From cf099d11218cb6f6c5cce947d6738e347f07fb12 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Sun, 20 Feb 2011 12:57:14 +0000
Subject: Vendor import of llvm trunk r126079:
 http://llvm.org/svn/llvm-project/llvm/trunk@126079

---
 .gitignore                                         |    37 +
 CMakeLists.txt                                     |   301 +-
 CREDITS.TXT                                        |     4 +-
 Makefile                                           |    19 +-
 Makefile.config.in                                 |    15 +-
 Makefile.rules                                     |   138 +-
 ModuleInfo.txt                                     |     2 +-
 README.txt                                         |     2 +-
 autoconf/configure.ac                              |   198 +-
 bindings/ada/analysis/llvm_analysis-binding.ads    |    32 -
 bindings/ada/analysis/llvm_analysis.ads            |    30 -
 bindings/ada/analysis/llvm_analysis_wrap.cxx       |   369 -
 bindings/ada/bitreader/llvm_bit_reader-binding.ads |    52 -
 bindings/ada/bitreader/llvm_bit_reader.ads         |     6 -
 bindings/ada/bitreader/llvm_bitreader_wrap.cxx     |   423 -
 bindings/ada/bitwriter/llvm_bit_writer-binding.ads |    28 -
 bindings/ada/bitwriter/llvm_bit_writer.ads         |     6 -
 bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx     |   335 -
 .../llvm_execution_engine-binding.ads              |   192 -
 .../ada/executionengine/llvm_execution_engine.ads  |    90 -
 .../executionengine/llvm_executionengine_wrap.cxx  |   924 -
 bindings/ada/llvm.gpr                              |    34 -
 bindings/ada/llvm/llvm-binding.ads                 |  1974 --
 bindings/ada/llvm/llvm.ads                         |   497 -
 .../ada/llvm/llvm_link_time_optimizer-binding.ads  |   207 -
 bindings/ada/llvm/llvm_link_time_optimizer.ads     |   184 -
 bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx  |   923 -
 bindings/ada/llvm/llvm_wrap.cxx                    |  8817 ---------
 bindings/ada/target/llvm_target-binding.ads        |   138 -
 bindings/ada/target/llvm_target.ads                |    72 -
 bindings/ada/target/llvm_target_wrap.cxx           |   720 -
 .../ada/transforms/llvm_transforms-binding.ads     |   206 -
 bindings/ada/transforms/llvm_transforms.ads        |     6 -
 bindings/ada/transforms/llvm_transforms_wrap.cxx   |   828 -
 bindings/ocaml/Makefile.ocaml                      |     9 +-
 bindings/ocaml/bitreader/llvm_bitreader.mli        |     8 +-
 .../ocaml/executionengine/llvm_executionengine.mli |    68 +-
 bindings/ocaml/llvm/llvm.mli                       |   962 +-
 bindings/ocaml/llvm/llvm_ocaml.c                   |     5 +
 .../ocaml/transforms/scalar/llvm_scalar_opts.ml    |     3 -
 .../ocaml/transforms/scalar/llvm_scalar_opts.mli   |     5 -
 .../ocaml/transforms/scalar/scalar_opts_ocaml.c    |     6 -
 cmake/config-ix.cmake                              |   162 +-
 cmake/modules/AddLLVM.cmake                        |    34 +-
 cmake/modules/AddLLVMDefinitions.cmake             |    24 +-
 cmake/modules/CMakeLists.txt                       |    21 +
 cmake/modules/CrossCompileLLVM.cmake               |    52 +-
 cmake/modules/GetTargetTriple.cmake                |     6 +-
 cmake/modules/HandleLLVMOptions.cmake              |   161 +
 cmake/modules/LLVM.cmake                           |    11 +
 cmake/modules/LLVMConfig.cmake                     |    76 +-
 cmake/modules/LLVMLibDeps.cmake                    |   121 +-
 cmake/modules/LLVMParseArguments.cmake             |    80 +
 cmake/modules/LLVMProcessSources.cmake             |    38 +-
 cmake/modules/TableGen.cmake                       |    26 +-
 cmake/modules/VersionFromVCS.cmake                 |    31 +-
 configure                                          |  3439 +++-
 docs/AliasAnalysis.html                            |    81 +-
 docs/BitCodeFormat.html                            |     9 +-
 docs/CMake.html                                    |    53 +-
 docs/CodeGenerator.html                            |   724 +-
 docs/CodingStandards.html                          |   538 +-
 docs/CommandGuide/FileCheck.pod                    |     4 +-
 docs/CommandGuide/index.html                       |     2 +-
 docs/CommandGuide/llc.pod                          |     8 +
 docs/CommandGuide/lli.pod                          |     9 +-
 docs/DeveloperPolicy.html                          |    85 +-
 docs/ExceptionHandling.html                        |    22 +-
 docs/GetElementPtr.html                            |    30 +-
 docs/GettingStarted.html                           |    74 +-
 docs/GettingStartedVS.html                         |   322 +-
 docs/GoldPlugin.html                               |     2 +-
 docs/LangRef.html                                  |   260 +-
 docs/LinkTimeOptimization.html                     |     4 +-
 docs/Makefile                                      |    15 +-
 docs/MakefileGuide.html                            |     8 +-
 docs/Passes.html                                   |    10 +-
 docs/ProgrammersManual.html                        |    28 +-
 docs/ReleaseNotes.html                             |    26 +-
 docs/SourceLevelDebugging.html                     |    54 +-
 docs/TableGenFundamentals.html                     |    24 +-
 docs/TestingGuide.html                             |    69 +-
 docs/UsingLibraries.html                           |     8 +-
 docs/WritingAnLLVMBackend.html                     |     4 +-
 docs/WritingAnLLVMPass.html                        |   130 +-
 docs/tutorial/LangImpl3.html                       |     6 +-
 docs/tutorial/LangImpl4.html                       |     7 +-
 docs/tutorial/LangImpl5.html                       |     5 +-
 docs/tutorial/LangImpl6.html                       |     5 +-
 docs/tutorial/LangImpl7.html                       |     5 +-
 docs/tutorial/OCamlLangImpl7.html                  |     4 +-
 docs/tutorial/OCamlLangImpl8.html                  |   365 +
 docs/tutorial/index.html                           |     2 +-
 examples/CMakeLists.txt                            |     3 -
 examples/ExceptionDemo/CMakeLists.txt              |     1 +
 examples/ExceptionDemo/ExceptionDemo.cpp           |     3 +
 examples/Kaleidoscope/Chapter4/toy.cpp             |     3 +
 examples/Kaleidoscope/Chapter5/toy.cpp             |     3 +
 examples/Kaleidoscope/Chapter6/toy.cpp             |     3 +
 examples/Kaleidoscope/Chapter7/CMakeLists.txt      |     1 +
 examples/Kaleidoscope/Chapter7/toy.cpp             |     3 +
 examples/Makefile                                  |     3 +-
 examples/ModuleMaker/README.txt                    |     2 +-
 examples/OCaml-Kaleidoscope/Chapter6/Makefile      |     9 +
 examples/OCaml-Kaleidoscope/Chapter7/Makefile      |     9 +
 include/llvm-c/Core.h                              |    24 +-
 include/llvm-c/EnhancedDisassembly.h               |     2 +-
 include/llvm-c/Initialization.h                    |    40 +
 include/llvm-c/LinkTimeOptimizer.h                 |     2 +-
 include/llvm-c/Transforms/Scalar.h                 |     3 -
 include/llvm-c/lto.h                               |    50 +-
 include/llvm/ADT/APFloat.h                         |     7 +
 include/llvm/ADT/APInt.h                           |   142 +-
 include/llvm/ADT/APSInt.h                          |    18 +-
 include/llvm/ADT/ArrayRef.h                        |   121 +
 include/llvm/ADT/BitVector.h                       |    36 +-
 include/llvm/ADT/DenseMap.h                        |     3 +-
 include/llvm/ADT/DenseMapInfo.h                    |    14 +
 include/llvm/ADT/DenseSet.h                        |     3 +
 include/llvm/ADT/EquivalenceClasses.h              |     2 +-
 include/llvm/ADT/FoldingSet.h                      |     2 +-
 include/llvm/ADT/ImmutableIntervalMap.h            |    54 +-
 include/llvm/ADT/ImmutableList.h                   |    14 +-
 include/llvm/ADT/ImmutableMap.h                    |    53 +-
 include/llvm/ADT/ImmutableSet.h                    |   553 +-
 include/llvm/ADT/InMemoryStruct.h                  |    77 +
 include/llvm/ADT/IndexedMap.h                      |    14 +-
 include/llvm/ADT/IntEqClasses.h                    |    88 +
 include/llvm/ADT/IntervalMap.h                     |  2139 ++
 include/llvm/ADT/Optional.h                        |    54 +
 include/llvm/ADT/PointerIntPair.h                  |     7 +
 include/llvm/ADT/PointerUnion.h                    |    12 +
 include/llvm/ADT/PostOrderIterator.h               |     6 +-
 include/llvm/ADT/SCCIterator.h                     |     2 +-
 include/llvm/ADT/ScopedHashTable.h                 |   103 +-
 include/llvm/ADT/SetVector.h                       |     4 +-
 include/llvm/ADT/SmallBitVector.h                  |     7 +
 include/llvm/ADT/SmallPtrSet.h                     |     5 +-
 include/llvm/ADT/SmallString.h                     |    12 +-
 include/llvm/ADT/SmallVector.h                     |    26 +-
 include/llvm/ADT/SparseBitVector.h                 |     2 +-
 include/llvm/ADT/Statistic.h                       |     2 +-
 include/llvm/ADT/StringExtras.h                    |     9 +-
 include/llvm/ADT/StringMap.h                       |    21 +-
 include/llvm/ADT/StringRef.h                       |    25 +-
 include/llvm/ADT/Triple.h                          |    77 +-
 include/llvm/ADT/Twine.h                           |    30 +-
 include/llvm/ADT/ValueMap.h                        |     2 +-
 include/llvm/ADT/ilist.h                           |     1 +
 include/llvm/Analysis/AliasAnalysis.h              |   368 +-
 include/llvm/Analysis/AliasSetTracker.h            |    60 +-
 include/llvm/Analysis/CallGraph.h                  |    13 +-
 include/llvm/Analysis/CodeMetrics.h                |    31 +-
 include/llvm/Analysis/ConstantFolding.h            |    13 +-
 include/llvm/Analysis/DIBuilder.h                  |   459 +
 include/llvm/Analysis/DOTGraphTraitsPass.h         |     2 +-
 include/llvm/Analysis/DebugInfo.h                  |   196 +-
 include/llvm/Analysis/DominanceFrontier.h          |   189 +
 include/llvm/Analysis/DominatorInternals.h         |   189 +-
 include/llvm/Analysis/Dominators.h                 |   265 +-
 include/llvm/Analysis/FindUsedTypes.h              |     4 +-
 include/llvm/Analysis/InlineCost.h                 |    32 +-
 include/llvm/Analysis/InstructionSimplify.h        |   111 +-
 include/llvm/Analysis/IntervalPartition.h          |     4 +-
 include/llvm/Analysis/LazyValueInfo.h              |     4 +-
 include/llvm/Analysis/LibCallAliasAnalysis.h       |    10 +-
 include/llvm/Analysis/LibCallSemantics.h           |     2 +-
 include/llvm/Analysis/LoopDependenceAnalysis.h     |     4 +-
 include/llvm/Analysis/LoopInfo.h                   |    43 +-
 include/llvm/Analysis/MemoryBuiltins.h             |     4 +
 include/llvm/Analysis/MemoryDependenceAnalysis.h   |   109 +-
 include/llvm/Analysis/Passes.h                     |    35 +-
 include/llvm/Analysis/PathNumbering.h              |   304 +
 include/llvm/Analysis/PathProfileInfo.h            |   113 +
 include/llvm/Analysis/PointerTracking.h            |   132 -
 include/llvm/Analysis/PostDominators.h             |     7 +-
 include/llvm/Analysis/ProfileInfoTypes.h           |    33 +-
 include/llvm/Analysis/RegionInfo.h                 |    57 +-
 include/llvm/Analysis/RegionPass.h                 |   126 +
 include/llvm/Analysis/ScalarEvolution.h            |   146 +-
 include/llvm/Analysis/ScalarEvolutionExpander.h    |     6 +
 include/llvm/Analysis/ScalarEvolutionExpressions.h |   138 +-
 include/llvm/Analysis/ValueTracking.h              |    52 +-
 include/llvm/Attributes.h                          |     7 +-
 include/llvm/BasicBlock.h                          |    32 +-
 include/llvm/Bitcode/Archive.h                     |    13 +-
 include/llvm/Bitcode/BitCodes.h                    |     2 +-
 include/llvm/Bitcode/LLVMBitCodes.h                |    12 +-
 include/llvm/Bitcode/ReaderWriter.h                |     9 +
 include/llvm/CallingConv.h                         |    27 +-
 include/llvm/CodeGen/Analysis.h                    |    11 +-
 include/llvm/CodeGen/AsmPrinter.h                  |     7 +-
 include/llvm/CodeGen/BinaryObject.h                |     2 +-
 include/llvm/CodeGen/CalcSpillWeights.h            |    24 +-
 include/llvm/CodeGen/CallingConvLower.h            |    42 +-
 include/llvm/CodeGen/EdgeBundles.h                 |    61 +
 include/llvm/CodeGen/FastISel.h                    |    15 +-
 include/llvm/CodeGen/FunctionLoweringInfo.h        |     7 +-
 include/llvm/CodeGen/GCMetadata.h                  |     9 +-
 include/llvm/CodeGen/ISDOpcodes.h                  |    52 +-
 include/llvm/CodeGen/IntrinsicLowering.h           |     5 +
 include/llvm/CodeGen/JITCodeEmitter.h              |     2 +-
 include/llvm/CodeGen/LatencyPriorityQueue.h        |    22 +-
 include/llvm/CodeGen/LinkAllCodegenComponents.h    |     4 +-
 include/llvm/CodeGen/LiveInterval.h                |   193 +-
 include/llvm/CodeGen/LiveIntervalAnalysis.h        |    45 +-
 include/llvm/CodeGen/LiveStackAnalysis.h           |    18 +-
 include/llvm/CodeGen/LiveVariables.h               |    11 +-
 include/llvm/CodeGen/MachORelocation.h             |     2 +-
 include/llvm/CodeGen/MachineBasicBlock.h           |    19 +-
 include/llvm/CodeGen/MachineCodeEmitter.h          |     2 +-
 include/llvm/CodeGen/MachineCodeInfo.h             |     2 +-
 include/llvm/CodeGen/MachineDominators.h           |     2 +-
 include/llvm/CodeGen/MachineFrameInfo.h            |    14 +-
 include/llvm/CodeGen/MachineFunction.h             |    26 +-
 include/llvm/CodeGen/MachineFunctionAnalysis.h     |     4 +
 include/llvm/CodeGen/MachineInstr.h                |    36 +-
 include/llvm/CodeGen/MachineInstrBuilder.h         |    21 +
 include/llvm/CodeGen/MachineLocation.h             |     7 +-
 include/llvm/CodeGen/MachineLoopInfo.h             |     4 +-
 include/llvm/CodeGen/MachineLoopRanges.h           |   112 +
 include/llvm/CodeGen/MachineMemOperand.h           |    72 +-
 include/llvm/CodeGen/MachineModuleInfo.h           |    73 +-
 include/llvm/CodeGen/MachineOperand.h              |   134 +-
 include/llvm/CodeGen/MachineRegisterInfo.h         |    58 +-
 include/llvm/CodeGen/MachineRelocation.h           |     2 +-
 include/llvm/CodeGen/PBQP/Graph.h                  |   425 +
 include/llvm/CodeGen/PBQP/HeuristicBase.h          |   246 +
 include/llvm/CodeGen/PBQP/HeuristicSolver.h        |   616 +
 include/llvm/CodeGen/PBQP/Heuristics/Briggs.h      |   464 +
 include/llvm/CodeGen/PBQP/Math.h                   |   288 +
 include/llvm/CodeGen/PBQP/Solution.h               |    94 +
 include/llvm/CodeGen/Passes.h                      |    35 +-
 include/llvm/CodeGen/PostRAHazardRecognizer.h      |    94 -
 include/llvm/CodeGen/ProcessImplicitDefs.h         |     4 +-
 include/llvm/CodeGen/RegAllocPBQP.h                |   167 +
 include/llvm/CodeGen/RegisterCoalescer.h           |     2 +-
 include/llvm/CodeGen/ScheduleDAG.h                 |    85 +-
 include/llvm/CodeGen/ScheduleHazardRecognizer.h    |    28 +-
 include/llvm/CodeGen/ScoreboardHazardRecognizer.h  |   129 +
 include/llvm/CodeGen/SelectionDAG.h                |   106 +-
 include/llvm/CodeGen/SelectionDAGISel.h            |   102 +-
 include/llvm/CodeGen/SelectionDAGNodes.h           |    37 +-
 include/llvm/CodeGen/SlotIndexes.h                 |   118 +-
 .../llvm/CodeGen/TargetLoweringObjectFileImpl.h    |     6 +
 include/llvm/CodeGen/ValueTypes.h                  |    59 +-
 include/llvm/CodeGen/ValueTypes.td                 |    11 +-
 include/llvm/CompilerDriver/CompilationGraph.h     |     2 +-
 include/llvm/CompilerDriver/Tool.h                 |     4 +-
 include/llvm/Config/config.h.cmake                 |   340 +-
 include/llvm/Config/config.h.in                    |    76 +-
 include/llvm/Config/llvm-config.h.cmake            |     6 +-
 include/llvm/Constant.h                            |    13 +-
 include/llvm/Constants.h                           |   130 +-
 include/llvm/DerivedTypes.h                        |     3 +-
 include/llvm/ExecutionEngine/ExecutionEngine.h     |   164 +-
 include/llvm/ExecutionEngine/GenericValue.h        |     2 +-
 include/llvm/ExecutionEngine/JITEventListener.h    |    52 +-
 include/llvm/ExecutionEngine/JITMemoryManager.h    |    12 +-
 include/llvm/ExecutionEngine/MCJIT.h               |    38 +
 include/llvm/Function.h                            |     2 +-
 include/llvm/GlobalAlias.h                         |     3 +-
 include/llvm/GlobalValue.h                         |    17 +-
 include/llvm/GlobalVariable.h                      |    34 +-
 include/llvm/InitializePasses.h                    |   235 +
 include/llvm/InlineAsm.h                           |    59 +-
 include/llvm/InstrTypes.h                          |   193 +-
 include/llvm/Instruction.h                         |     7 +-
 include/llvm/Instructions.h                        |   422 +-
 include/llvm/IntrinsicInst.h                       |    29 +-
 include/llvm/Intrinsics.td                         |     8 +-
 include/llvm/IntrinsicsARM.td                      |     6 +
 include/llvm/IntrinsicsX86.td                      |   314 +-
 include/llvm/IntrinsicsXCore.td                    |    22 +
 include/llvm/LLVMContext.h                         |    39 +-
 include/llvm/LinkAllPasses.h                       |    18 +-
 include/llvm/LinkAllVMCore.h                       |    17 +-
 include/llvm/MC/EDInstInfo.h                       |     2 +-
 include/llvm/MC/ELFObjectWriter.h                  |    46 -
 include/llvm/MC/MCAsmInfo.h                        |   102 +-
 include/llvm/MC/MCAsmLayout.h                      |    43 +-
 include/llvm/MC/MCAssembler.h                      |   189 +-
 include/llvm/MC/MCCodeEmitter.h                    |    38 -
 include/llvm/MC/MCContext.h                        |    82 +-
 include/llvm/MC/MCDirectives.h                     |     9 +-
 include/llvm/MC/MCDisassembler.h                   |     2 +-
 include/llvm/MC/MCDwarf.h                          |   133 +-
 include/llvm/MC/MCELFObjectWriter.h                |    47 +
 include/llvm/MC/MCELFSymbolFlags.h                 |    11 +-
 include/llvm/MC/MCExpr.h                           |    44 +-
 include/llvm/MC/MCFixup.h                          |    16 +-
 include/llvm/MC/MCFixupKindInfo.h                  |    43 +
 include/llvm/MC/MCInst.h                           |    54 +-
 include/llvm/MC/MCInstPrinter.h                    |     8 +-
 include/llvm/MC/MCMachOSymbolFlags.h               |     4 +-
 include/llvm/MC/MCMachObjectWriter.h               |    65 +
 include/llvm/MC/MCObjectStreamer.h                 |    18 +-
 include/llvm/MC/MCObjectWriter.h                   |    35 +-
 include/llvm/MC/MCParser/AsmLexer.h                |    16 +-
 include/llvm/MC/MCParser/MCAsmLexer.h              |    33 +-
 include/llvm/MC/MCParser/MCAsmParser.h             |     6 +-
 include/llvm/MC/MCParser/MCParsedAsmOperand.h      |     4 +-
 include/llvm/MC/MCSection.h                        |    11 +-
 include/llvm/MC/MCSectionCOFF.h                    |     8 +-
 include/llvm/MC/MCSectionELF.h                     |   151 +-
 include/llvm/MC/MCSectionMachO.h                   |    22 +-
 include/llvm/MC/MCStreamer.h                       |   232 +-
 include/llvm/MC/MCSymbol.h                         |    19 +-
 include/llvm/MC/MCValue.h                          |     2 +-
 include/llvm/MC/MachObjectWriter.h                 |    44 -
 include/llvm/Metadata.h                            |     3 -
 include/llvm/Module.h                              |     2 +-
 include/llvm/Object/MachOFormat.h                  |   367 +
 include/llvm/Object/MachOObject.h                  |   180 +
 include/llvm/Object/ObjectFile.h                   |   262 +
 include/llvm/OperandTraits.h                       |    28 +-
 include/llvm/Operator.h                            |   159 +-
 include/llvm/Pass.h                                |     4 +-
 include/llvm/PassManagers.h                        |    11 +-
 include/llvm/PassRegistry.h                        |    75 +-
 include/llvm/PassSupport.h                         |   105 +-
 include/llvm/Support/AIXDataTypesFix.h             |    25 +
 include/llvm/Support/AlignOf.h                     |     6 +-
 include/llvm/Support/Allocator.h                   |    10 +-
 include/llvm/Support/Atomic.h                      |    39 +
 include/llvm/Support/COFF.h                        |    12 +-
 include/llvm/Support/CallSite.h                    |    19 +-
 include/llvm/Support/Casting.h                     |     4 +-
 include/llvm/Support/Compiler.h                    |    86 +-
 include/llvm/Support/ConstantFolder.h              |    72 +-
 include/llvm/Support/ConstantRange.h               |    17 +-
 include/llvm/Support/CrashRecoveryContext.h        |     8 +
 include/llvm/Support/DataTypes.h.cmake             |   189 +
 include/llvm/Support/DataTypes.h.in                |   111 +
 include/llvm/Support/Disassembler.h                |    35 +
 include/llvm/Support/Dwarf.h                       |     7 +-
 include/llvm/Support/DynamicLibrary.h              |    86 +
 include/llvm/Support/DynamicLinker.h               |    40 -
 include/llvm/Support/ELF.h                         |   299 +-
 include/llvm/Support/Endian.h                      |   213 +
 include/llvm/Support/Errno.h                       |    34 +
 include/llvm/Support/ErrorHandling.h               |    13 +-
 include/llvm/Support/FEnv.h                        |    56 +
 include/llvm/Support/FileSystem.h                  |   690 +
 include/llvm/Support/FileUtilities.h               |     2 +-
 include/llvm/Support/GraphWriter.h                 |    45 +-
 include/llvm/Support/Host.h                        |    66 +
 include/llvm/Support/IRBuilder.h                   |   479 +-
 include/llvm/Support/IRReader.h                    |    24 +-
 include/llvm/Support/IncludeFile.h                 |    79 +
 include/llvm/Support/LICENSE.TXT                   |     6 +
 include/llvm/Support/MachO.h                       |    38 +-
 include/llvm/Support/ManagedStatic.h               |    10 +-
 include/llvm/Support/MathExtras.h                  |    48 +-
 include/llvm/Support/Memory.h                      |    96 +
 include/llvm/Support/MemoryBuffer.h                |    58 +-
 include/llvm/Support/MemoryObject.h                |     2 +-
 include/llvm/Support/Mutex.h                       |   154 +
 include/llvm/Support/MutexGuard.h                  |     2 +-
 include/llvm/Support/NoFolder.h                    |   138 +-
 include/llvm/Support/Path.h                        |    16 +
 include/llvm/Support/PathV1.h                      |   755 +
 include/llvm/Support/PathV2.h                      |   347 +
 include/llvm/Support/PatternMatch.h                |   339 +-
 include/llvm/Support/PointerLikeTypeTraits.h       |     2 +-
 include/llvm/Support/Process.h                     |   146 +
 include/llvm/Support/Program.h                     |   157 +
 include/llvm/Support/RWMutex.h                     |   173 +
 include/llvm/Support/Signals.h                     |    59 +
 include/llvm/Support/Solaris.h                     |    40 +
 include/llvm/Support/SourceMgr.h                   |    15 +-
 include/llvm/Support/StableBasicBlockNumbering.h   |    59 -
 include/llvm/Support/StandardPasses.h              |    75 +-
 include/llvm/Support/SwapByteOrder.h               |   101 +
 include/llvm/Support/SystemUtils.h                 |    13 +-
 include/llvm/Support/TargetFolder.h                |    68 +-
 include/llvm/Support/ThreadLocal.h                 |    54 +
 include/llvm/Support/Threading.h                   |    59 +
 include/llvm/Support/TimeValue.h                   |   382 +
 include/llvm/Support/Timer.h                       |     2 +-
 include/llvm/Support/ToolOutputFile.h              |    62 +
 include/llvm/Support/TypeBuilder.h                 |     6 +
 include/llvm/Support/Valgrind.h                    |    32 +
 include/llvm/Support/raw_ostream.h                 |    66 +-
 include/llvm/Support/system_error.h                |   910 +
 include/llvm/System/AIXDataTypesFix.h              |    25 -
 include/llvm/System/Alarm.h                        |    51 -
 include/llvm/System/Atomic.h                       |    39 -
 include/llvm/System/DataTypes.h.cmake              |   189 -
 include/llvm/System/DataTypes.h.in                 |   111 -
 include/llvm/System/Disassembler.h                 |    35 -
 include/llvm/System/DynamicLibrary.h               |    86 -
 include/llvm/System/Errno.h                        |    34 -
 include/llvm/System/Host.h                         |    66 -
 include/llvm/System/IncludeFile.h                  |    79 -
 include/llvm/System/LICENSE.TXT                    |     6 -
 include/llvm/System/Memory.h                       |    96 -
 include/llvm/System/Mutex.h                        |   154 -
 include/llvm/System/Path.h                         |   716 -
 include/llvm/System/Process.h                      |   146 -
 include/llvm/System/Program.h                      |   155 -
 include/llvm/System/RWMutex.h                      |   173 -
 include/llvm/System/Signals.h                      |    59 -
 include/llvm/System/Solaris.h                      |    40 -
 include/llvm/System/ThreadLocal.h                  |    54 -
 include/llvm/System/Threading.h                    |    45 -
 include/llvm/System/TimeValue.h                    |   382 -
 include/llvm/System/Valgrind.h                     |    32 -
 include/llvm/Target/Mangler.h                      |     7 -
 include/llvm/Target/SubtargetFeature.h             |     2 +-
 include/llvm/Target/Target.td                      |   129 +-
 include/llvm/Target/TargetAsmBackend.h             |    67 +-
 include/llvm/Target/TargetAsmInfo.h                |    75 +
 include/llvm/Target/TargetAsmParser.h              |    16 +-
 include/llvm/Target/TargetCallingConv.h            |    14 +-
 include/llvm/Target/TargetData.h                   |     3 +-
 include/llvm/Target/TargetELFWriterInfo.h          |     3 +-
 include/llvm/Target/TargetFrameInfo.h              |    97 -
 include/llvm/Target/TargetFrameLowering.h          |   196 +
 include/llvm/Target/TargetInstrDesc.h              |    13 +-
 include/llvm/Target/TargetInstrInfo.h              |   187 +-
 include/llvm/Target/TargetInstrItineraries.h       |    88 +-
 include/llvm/Target/TargetJITInfo.h                |     2 +-
 include/llvm/Target/TargetLibraryInfo.h            |    66 +
 include/llvm/Target/TargetLowering.h               |   171 +-
 include/llvm/Target/TargetLoweringObjectFile.h     |    15 +-
 include/llvm/Target/TargetMachine.h                |    56 +-
 include/llvm/Target/TargetRegisterInfo.h           |   214 +-
 include/llvm/Target/TargetRegistry.h               |    66 +-
 include/llvm/Target/TargetSchedule.td              |    43 +-
 include/llvm/Target/TargetSelectionDAG.td          |   122 +-
 include/llvm/Target/TargetSelectionDAGInfo.h       |    10 +-
 include/llvm/Transforms/IPO.h                      |     8 +-
 include/llvm/Transforms/Instrumentation.h          |     3 +
 include/llvm/Transforms/RSProfiling.h              |    42 -
 include/llvm/Transforms/Scalar.h                   |    30 +-
 include/llvm/Transforms/Utils/AddrModeMatcher.h    |     8 +-
 include/llvm/Transforms/Utils/BasicBlockUtils.h    |    26 +-
 include/llvm/Transforms/Utils/BuildLibCalls.h      |    14 -
 include/llvm/Transforms/Utils/Cloning.h            |    15 +-
 include/llvm/Transforms/Utils/Local.h              |    16 +
 include/llvm/Transforms/Utils/PromoteMemToReg.h    |     3 +-
 include/llvm/Transforms/Utils/SSAUpdater.h         |    49 +
 .../llvm/Transforms/Utils/UnifyFunctionExitNodes.h |     4 +-
 include/llvm/Transforms/Utils/ValueMapper.h        |    25 +-
 include/llvm/Type.h                                |    26 +-
 include/llvm/TypeSymbolTable.h                     |     3 +-
 include/llvm/Use.h                                 |    37 +-
 include/llvm/User.h                                |    41 +-
 include/llvm/Value.h                               |    18 +-
 include/llvm/ValueSymbolTable.h                    |     2 +-
 lib/Analysis/AliasAnalysis.cpp                     |   187 +-
 lib/Analysis/AliasAnalysisCounter.cpp              |    45 +-
 lib/Analysis/AliasAnalysisEvaluator.cpp            |    36 +-
 lib/Analysis/AliasDebugger.cpp                     |    29 +-
 lib/Analysis/AliasSetTracker.cpp                   |   105 +-
 lib/Analysis/Analysis.cpp                          |    67 +-
 lib/Analysis/BasicAliasAnalysis.cpp                |   459 +-
 lib/Analysis/CFGPrinter.cpp                        |    27 +-
 lib/Analysis/CMakeLists.txt                        |    11 +-
 lib/Analysis/CaptureTracking.cpp                   |     3 +
 lib/Analysis/ConstantFolding.cpp                   |   298 +-
 lib/Analysis/DIBuilder.cpp                         |   801 +
 lib/Analysis/DbgInfoPrinter.cpp                    |   129 +-
 lib/Analysis/DebugInfo.cpp                         |   244 +-
 lib/Analysis/DomPrinter.cpp                        |    50 +-
 lib/Analysis/DominanceFrontier.cpp                 |   137 +
 lib/Analysis/IPA/CMakeLists.txt                    |     1 +
 lib/Analysis/IPA/CallGraph.cpp                     |    24 +-
 lib/Analysis/IPA/CallGraphSCCPass.cpp              |     1 -
 lib/Analysis/IPA/FindUsedTypes.cpp                 |     2 +-
 lib/Analysis/IPA/GlobalsModRef.cpp                 |    77 +-
 lib/Analysis/IPA/IPA.cpp                           |    29 +
 lib/Analysis/IVUsers.cpp                           |    12 +-
 lib/Analysis/InlineCost.cpp                        |   486 +-
 lib/Analysis/InstCount.cpp                         |     6 +-
 lib/Analysis/InstructionSimplify.cpp               |  1904 +-
 lib/Analysis/IntervalPartition.cpp                 |     2 +-
 lib/Analysis/LazyValueInfo.cpp                     |   844 +-
 lib/Analysis/LibCallAliasAnalysis.cpp              |    20 +-
 lib/Analysis/Lint.cpp                              |   110 +-
 lib/Analysis/LiveValues.cpp                        |    15 +-
 lib/Analysis/Loads.cpp                             |     4 +-
 lib/Analysis/LoopDependenceAnalysis.cpp            |    17 +-
 lib/Analysis/LoopInfo.cpp                          |    22 +-
 lib/Analysis/LoopPass.cpp                          |     1 -
 lib/Analysis/MemDepPrinter.cpp                     |   167 +
 lib/Analysis/MemoryDependenceAnalysis.cpp          |   373 +-
 lib/Analysis/ModuleDebugInfoPrinter.cpp            |     6 +-
 lib/Analysis/NoAliasAnalysis.cpp                   |    88 +
 lib/Analysis/PHITransAddr.cpp                      |   154 +-
 lib/Analysis/PathNumbering.cpp                     |   525 +
 lib/Analysis/PathProfileInfo.cpp                   |   434 +
 lib/Analysis/PathProfileVerifier.cpp               |   207 +
 lib/Analysis/PointerTracking.cpp                   |   316 -
 lib/Analysis/PostDominators.cpp                    |    10 +-
 lib/Analysis/ProfileEstimatorPass.cpp              |    11 +-
 lib/Analysis/ProfileInfo.cpp                       |    17 +-
 lib/Analysis/ProfileInfoLoaderPass.cpp             |     3 +-
 lib/Analysis/ProfileVerifierPass.cpp               |    11 +-
 lib/Analysis/RegionInfo.cpp                        |   168 +-
 lib/Analysis/RegionPass.cpp                        |   275 +
 lib/Analysis/RegionPrinter.cpp                     |    36 +-
 lib/Analysis/ScalarEvolution.cpp                   |  1170 +-
 lib/Analysis/ScalarEvolutionAliasAnalysis.cpp      |    41 +-
 lib/Analysis/ScalarEvolutionExpander.cpp           |    51 +-
 lib/Analysis/TypeBasedAliasAnalysis.cpp            |   232 +-
 lib/Analysis/ValueTracking.cpp                     |   369 +-
 lib/Archive/Archive.cpp                            |    54 +-
 lib/Archive/ArchiveInternals.h                     |     2 +-
 lib/Archive/ArchiveWriter.cpp                      |   117 +-
 lib/AsmParser/LLLexer.cpp                          |    21 +-
 lib/AsmParser/LLLexer.h                            |     4 +-
 lib/AsmParser/LLParser.cpp                         |   151 +-
 lib/AsmParser/LLParser.h                           |     8 +-
 lib/AsmParser/LLToken.h                            |     3 +
 lib/AsmParser/Parser.cpp                           |    10 +-
 lib/Bitcode/CMakeLists.txt                         |     2 +
 lib/Bitcode/Reader/BitcodeReader.cpp               |   181 +-
 lib/Bitcode/Reader/BitcodeReader.h                 |     5 +
 lib/Bitcode/Writer/BitcodeWriter.cpp               |    32 +-
 lib/CMakeLists.txt                                 |    14 +
 lib/CodeGen/AggressiveAntiDepBreaker.cpp           |    45 +-
 lib/CodeGen/AllocationOrder.cpp                    |    68 +
 lib/CodeGen/AllocationOrder.h                      |    54 +
 lib/CodeGen/Analysis.cpp                           |    30 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp              |    64 +-
 lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp         |    61 +-
 lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp     |   122 +-
 lib/CodeGen/AsmPrinter/CMakeLists.txt              |     3 +-
 lib/CodeGen/AsmPrinter/DwarfCFIException.cpp       |   138 +
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp              |   681 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.h                |   139 +-
 lib/CodeGen/AsmPrinter/DwarfException.cpp          |   338 +-
 lib/CodeGen/AsmPrinter/DwarfException.h            |   155 +-
 lib/CodeGen/AsmPrinter/DwarfTableException.cpp     |   349 +
 lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp          |     1 +
 lib/CodeGen/CMakeLists.txt                         |    17 +-
 lib/CodeGen/CalcSpillWeights.cpp                   |    15 +-
 lib/CodeGen/CallingConvLower.cpp                   |    40 +-
 lib/CodeGen/CodeGen.cpp                            |    61 +
 lib/CodeGen/CriticalAntiDepBreaker.cpp             |    99 +-
 lib/CodeGen/CriticalAntiDepBreaker.h               |    12 +-
 lib/CodeGen/DeadMachineInstructionElim.cpp         |    16 +-
 lib/CodeGen/DwarfEHPrepare.cpp                     |    30 +-
 lib/CodeGen/ELF.h                                  |     2 +-
 lib/CodeGen/ELFWriter.cpp                          |    17 +-
 lib/CodeGen/EdgeBundles.cpp                        |    86 +
 lib/CodeGen/ExpandISelPseudos.cpp                  |    82 +
 lib/CodeGen/GCMetadata.cpp                         |     7 +-
 lib/CodeGen/GCStrategy.cpp                         |    44 +-
 lib/CodeGen/IfConversion.cpp                       |   247 +-
 lib/CodeGen/InlineSpiller.cpp                      |   287 +-
 lib/CodeGen/IntrinsicLowering.cpp                  |    32 +-
 lib/CodeGen/LLVMTargetMachine.cpp                  |    43 +-
 lib/CodeGen/LatencyPriorityQueue.cpp               |    26 +-
 lib/CodeGen/LiveDebugVariables.cpp                 |   711 +
 lib/CodeGen/LiveDebugVariables.h                   |    63 +
 lib/CodeGen/LiveInterval.cpp                       |   312 +-
 lib/CodeGen/LiveIntervalAnalysis.cpp               |   357 +-
 lib/CodeGen/LiveIntervalUnion.cpp                  |   315 +
 lib/CodeGen/LiveIntervalUnion.h                    |   258 +
 lib/CodeGen/LiveRangeEdit.cpp                      |   129 +
 lib/CodeGen/LiveRangeEdit.h                        |   135 +
 lib/CodeGen/LiveStackAnalysis.cpp                  |    20 +-
 lib/CodeGen/LiveVariables.cpp                      |    42 +-
 lib/CodeGen/LocalStackSlotAllocation.cpp           |    29 +-
 lib/CodeGen/MachineBasicBlock.cpp                  |    79 +-
 lib/CodeGen/MachineCSE.cpp                         |   162 +-
 lib/CodeGen/MachineDominators.cpp                  |     3 +-
 lib/CodeGen/MachineFunction.cpp                    |    69 +-
 lib/CodeGen/MachineFunctionAnalysis.cpp            |    12 +-
 lib/CodeGen/MachineInstr.cpp                       |   171 +-
 lib/CodeGen/MachineLICM.cpp                        |   506 +-
 lib/CodeGen/MachineLoopInfo.cpp                    |     7 +-
 lib/CodeGen/MachineLoopRanges.cpp                  |   116 +
 lib/CodeGen/MachineModuleInfo.cpp                  |    76 +-
 lib/CodeGen/MachineRegisterInfo.cpp                |    64 +-
 lib/CodeGen/MachineSink.cpp                        |   312 +-
 lib/CodeGen/MachineVerifier.cpp                    |   377 +-
 lib/CodeGen/OptimizePHIs.cpp                       |     6 +-
 lib/CodeGen/PBQP/Graph.h                           |   425 -
 lib/CodeGen/PBQP/HeuristicBase.h                   |   246 -
 lib/CodeGen/PBQP/HeuristicSolver.h                 |   616 -
 lib/CodeGen/PBQP/Heuristics/Briggs.h               |   460 -
 lib/CodeGen/PBQP/Math.h                            |   288 -
 lib/CodeGen/PBQP/Solution.h                        |    89 -
 lib/CodeGen/PHIElimination.cpp                     |   143 +-
 lib/CodeGen/PHIElimination.h                       |   115 -
 lib/CodeGen/PHIEliminationUtils.cpp                |    61 +
 lib/CodeGen/PHIEliminationUtils.h                  |    25 +
 lib/CodeGen/PeepholeOptimizer.cpp                  |   131 +-
 lib/CodeGen/PostRAHazardRecognizer.cpp             |   180 -
 lib/CodeGen/PostRASchedulerList.cpp                |    74 +-
 lib/CodeGen/PreAllocSplitting.cpp                  |    43 +-
 lib/CodeGen/ProcessImplicitDefs.cpp                |     7 +-
 lib/CodeGen/PrologEpilogInserter.cpp               |    62 +-
 lib/CodeGen/PrologEpilogInserter.h                 |     4 +-
 lib/CodeGen/PseudoSourceValue.cpp                  |     2 +-
 lib/CodeGen/RegAllocBase.h                         |   181 +
 lib/CodeGen/RegAllocBasic.cpp                      |   523 +
 lib/CodeGen/RegAllocFast.cpp                       |    68 +-
 lib/CodeGen/RegAllocGreedy.cpp                     |  1285 ++
 lib/CodeGen/RegAllocLinearScan.cpp                 |   109 +-
 lib/CodeGen/RegAllocPBQP.cpp                       |   994 +-
 lib/CodeGen/RegisterCoalescer.cpp                  |     3 +-
 lib/CodeGen/RenderMachineFunction.cpp              |    14 +-
 lib/CodeGen/RenderMachineFunction.h                |     4 +-
 lib/CodeGen/ScheduleDAG.cpp                        |    15 +-
 lib/CodeGen/ScheduleDAGEmit.cpp                    |     2 +-
 lib/CodeGen/ScheduleDAGInstrs.cpp                  |   137 +-
 lib/CodeGen/ScheduleDAGInstrs.h                    |    10 +
 lib/CodeGen/ScoreboardHazardRecognizer.cpp         |   243 +
 lib/CodeGen/SelectionDAG/CMakeLists.txt            |     2 -
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp           |  1211 +-
 lib/CodeGen/SelectionDAG/FastISel.cpp              |    84 +-
 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp  |     1 -
 lib/CodeGen/SelectionDAG/InstrEmitter.cpp          |   141 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp           |  1056 +-
 lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp    |    33 +-
 lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp  |   426 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.cpp         |    49 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.h           |    34 +-
 lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp  |    66 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp     |     4 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp   |   325 +-
 lib/CodeGen/SelectionDAG/SDNodeDbgValue.h          |     2 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp       |    12 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp       |    51 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp     |  1969 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp    |   333 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h      |    35 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp          |   684 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp   |   850 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h     |    14 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp      |   791 +-
 lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp   |    16 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp        |   669 +-
 lib/CodeGen/ShrinkWrapping.cpp                     |     4 +-
 lib/CodeGen/SimpleRegisterCoalescing.cpp           |   247 +-
 lib/CodeGen/SimpleRegisterCoalescing.h             |    13 +-
 lib/CodeGen/SjLjEHPrepare.cpp                      |   446 +-
 lib/CodeGen/SlotIndexes.cpp                        |    33 +-
 lib/CodeGen/SpillPlacement.cpp                     |   330 +
 lib/CodeGen/SpillPlacement.h                       |   108 +
 lib/CodeGen/Spiller.cpp                            |   316 +-
 lib/CodeGen/Spiller.h                              |    12 +-
 lib/CodeGen/SplitKit.cpp                           |  1491 +-
 lib/CodeGen/SplitKit.h                             |   419 +-
 lib/CodeGen/Splitter.cpp                           |    32 +-
 lib/CodeGen/Splitter.h                             |     4 +-
 lib/CodeGen/StackProtector.cpp                     |    28 +-
 lib/CodeGen/StackSlotColoring.cpp                  |    30 +-
 lib/CodeGen/StrongPHIElimination.cpp               |  1694 +-
 lib/CodeGen/TailDuplication.cpp                    |    21 +-
 lib/CodeGen/TargetInstrInfoImpl.cpp                |    50 +-
 lib/CodeGen/TargetLoweringObjectFileImpl.cpp       |   253 +-
 lib/CodeGen/TwoAddressInstructionPass.cpp          |    54 +-
 lib/CodeGen/UnreachableBlockElim.cpp               |    15 +-
 lib/CodeGen/VirtRegMap.cpp                         |   165 +-
 lib/CodeGen/VirtRegMap.h                           |    33 +-
 lib/CodeGen/VirtRegRewriter.cpp                    |   896 +-
 lib/CompilerDriver/Action.cpp                      |    26 +-
 lib/CompilerDriver/CMakeLists.txt                  |     2 +-
 lib/CompilerDriver/CompilationGraph.cpp            |    66 +-
 lib/CompilerDriver/Main.cpp                        |     9 +-
 lib/CompilerDriver/Makefile                        |     2 +-
 lib/CompilerDriver/Tool.cpp                        |     6 +-
 lib/ExecutionEngine/CMakeLists.txt                 |     4 +
 lib/ExecutionEngine/ExecutionEngine.cpp            |   378 +-
 lib/ExecutionEngine/Interpreter/CMakeLists.txt     |    12 +
 lib/ExecutionEngine/Interpreter/Execution.cpp      |     6 +-
 .../Interpreter/ExternalFunctions.cpp              |     4 +-
 lib/ExecutionEngine/Interpreter/Interpreter.h      |     2 +-
 lib/ExecutionEngine/JIT/Intercept.cpp              |     2 +-
 lib/ExecutionEngine/JIT/JIT.cpp                    |    23 +-
 lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp     |     4 +-
 lib/ExecutionEngine/JIT/JITDebugRegisterer.h       |     2 +-
 lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp        |    11 +-
 lib/ExecutionEngine/JIT/JITDwarfEmitter.h          |     2 +
 lib/ExecutionEngine/JIT/JITEmitter.cpp             |     4 +-
 lib/ExecutionEngine/JIT/JITMemoryManager.cpp       |     2 +-
 .../JIT/OProfileJITEventListener.cpp               |     2 +-
 lib/ExecutionEngine/JIT/TargetSelect.cpp           |     2 +-
 lib/ExecutionEngine/MCJIT/CMakeLists.txt           |     4 +
 lib/ExecutionEngine/MCJIT/MCJIT.cpp                |    92 +
 lib/ExecutionEngine/MCJIT/MCJIT.h                  |    68 +
 lib/ExecutionEngine/MCJIT/Makefile                 |    13 +
 lib/ExecutionEngine/MCJIT/TargetSelect.cpp         |    91 +
 lib/ExecutionEngine/Makefile                       |     2 +-
 lib/Linker/LinkItems.cpp                           |    13 +-
 lib/Linker/LinkModules.cpp                         |   100 +-
 lib/Linker/Linker.cpp                              |    18 +-
 lib/MC/CMakeLists.txt                              |     6 +
 lib/MC/ELFObjectWriter.cpp                         |  1858 +-
 lib/MC/MCAsmInfo.cpp                               |     7 +-
 lib/MC/MCAsmInfoDarwin.cpp                         |     9 +-
 lib/MC/MCAsmStreamer.cpp                           |   382 +-
 lib/MC/MCAssembler.cpp                             |   794 +-
 lib/MC/MCCodeEmitter.cpp                           |    12 -
 lib/MC/MCContext.cpp                               |   117 +-
 lib/MC/MCDisassembler/EDDisassembler.cpp           |     2 +-
 lib/MC/MCDisassembler/EDDisassembler.h             |     8 +-
 lib/MC/MCDisassembler/EDInst.cpp                   |     2 +
 lib/MC/MCDisassembler/EDInst.h                     |     2 +-
 lib/MC/MCDisassembler/EDOperand.cpp                |    23 +-
 lib/MC/MCDisassembler/EDOperand.h                  |     2 +-
 lib/MC/MCDisassembler/EDToken.h                    |     2 +-
 lib/MC/MCDwarf.cpp                                 |   793 +
 lib/MC/MCELFObjectTargetWriter.cpp                 |    23 +
 lib/MC/MCELFStreamer.cpp                           |   350 +-
 lib/MC/MCExpr.cpp                                  |   284 +-
 lib/MC/MCLoggingStreamer.cpp                       |    60 +-
 lib/MC/MCMachOStreamer.cpp                         |   355 +-
 lib/MC/MCMachObjectTargetWriter.cpp                |    22 +
 lib/MC/MCNullStreamer.cpp                          |    31 +-
 lib/MC/MCObjectStreamer.cpp                        |   187 +-
 lib/MC/MCObjectWriter.cpp                          |    65 +
 lib/MC/MCParser/AsmLexer.cpp                       |   182 +-
 lib/MC/MCParser/AsmParser.cpp                      |   889 +-
 lib/MC/MCParser/CMakeLists.txt                     |     1 +
 lib/MC/MCParser/COFFAsmParser.cpp                  |   144 +
 lib/MC/MCParser/DarwinAsmParser.cpp                |     2 +-
 lib/MC/MCParser/ELFAsmParser.cpp                   |   460 +-
 lib/MC/MCPureStreamer.cpp                          |   234 +
 lib/MC/MCSectionCOFF.cpp                           |     8 +
 lib/MC/MCSectionELF.cpp                            |   121 +-
 lib/MC/MCSectionMachO.cpp                          |    62 +-
 lib/MC/MCStreamer.cpp                              |   218 +-
 lib/MC/MCSymbol.cpp                                |    13 +
 lib/MC/MachObjectWriter.cpp                        |   935 +-
 lib/MC/TargetAsmBackend.cpp                        |    25 +-
 lib/MC/WinCOFFObjectWriter.cpp                     |   371 +-
 lib/MC/WinCOFFStreamer.cpp                         |   180 +-
 lib/Makefile                                       |     2 +-
 lib/Object/CMakeLists.txt                          |     6 +
 lib/Object/COFFObjectFile.cpp                      |   375 +
 lib/Object/ELFObjectFile.cpp                       |   686 +
 lib/Object/MachOObject.cpp                         |   342 +
 lib/Object/Makefile                                |    14 +
 lib/Object/ObjectFile.cpp                          |    71 +
 lib/Support/APFloat.cpp                            |    28 +-
 lib/Support/APInt.cpp                              |   234 +-
 lib/Support/Allocator.cpp                          |    10 +-
 lib/Support/Atomic.cpp                             |   112 +
 lib/Support/CMakeLists.txt                         |    58 +-
 lib/Support/CommandLine.cpp                        |    84 +-
 lib/Support/ConstantRange.cpp                      |    94 +-
 lib/Support/CrashRecoveryContext.cpp               |    30 +-
 lib/Support/Debug.cpp                              |     2 +-
 lib/Support/Disassembler.cpp                       |    75 +
 lib/Support/Dwarf.cpp                              |     4 +
 lib/Support/DynamicLibrary.cpp                     |   170 +
 lib/Support/Errno.cpp                              |    74 +
 lib/Support/ErrorHandling.cpp                      |    11 +-
 lib/Support/FileUtilities.cpp                      |    26 +-
 lib/Support/FoldingSet.cpp                         |    29 +-
 lib/Support/FormattedStream.cpp                    |     1 +
 lib/Support/GraphWriter.cpp                        |    48 +-
 lib/Support/Host.cpp                               |   307 +
 lib/Support/IncludeFile.cpp                        |    20 +
 lib/Support/IntEqClasses.cpp                       |    70 +
 lib/Support/IntervalMap.cpp                        |   161 +
 lib/Support/Makefile                               |     5 +
 lib/Support/ManagedStatic.cpp                      |     2 +-
 lib/Support/Memory.cpp                             |    74 +
 lib/Support/MemoryBuffer.cpp                       |   113 +-
 lib/Support/Mutex.cpp                              |   157 +
 lib/Support/Path.cpp                               |   283 +
 lib/Support/PathV2.cpp                             |   774 +
 lib/Support/PluginLoader.cpp                       |     4 +-
 lib/Support/PrettyStackTrace.cpp                   |    17 +-
 lib/Support/Process.cpp                            |    33 +
 lib/Support/Program.cpp                            |    56 +
 lib/Support/README.txt.system                      |    43 +
 lib/Support/RWMutex.cpp                            |   157 +
 lib/Support/SearchForAddressOfSpecialSymbol.cpp    |    73 +
 lib/Support/Signals.cpp                            |    34 +
 lib/Support/SourceMgr.cpp                          |    31 +-
 lib/Support/Statistic.cpp                          |     2 +-
 lib/Support/StringMap.cpp                          |     2 +-
 lib/Support/StringRef.cpp                          |    73 +-
 lib/Support/SystemUtils.cpp                        |    40 +-
 lib/Support/TargetRegistry.cpp                     |     2 +-
 lib/Support/ThreadLocal.cpp                        |    84 +
 lib/Support/Threading.cpp                          |   116 +
 lib/Support/TimeValue.cpp                          |    57 +
 lib/Support/Timer.cpp                              |     4 +-
 lib/Support/ToolOutputFile.cpp                     |    43 +
 lib/Support/Triple.cpp                             |   106 +-
 lib/Support/Twine.cpp                              |    34 +-
 lib/Support/Unix/Host.inc                          |    97 +
 lib/Support/Unix/Memory.inc                        |   151 +
 lib/Support/Unix/Mutex.inc                         |    43 +
 lib/Support/Unix/Path.inc                          |   887 +
 lib/Support/Unix/PathV2.inc                        |   507 +
 lib/Support/Unix/Process.inc                       |   295 +
 lib/Support/Unix/Program.inc                       |   424 +
 lib/Support/Unix/README.txt                        |    16 +
 lib/Support/Unix/RWMutex.inc                       |    43 +
 lib/Support/Unix/Signals.inc                       |   303 +
 lib/Support/Unix/ThreadLocal.inc                   |    26 +
 lib/Support/Unix/TimeValue.inc                     |    56 +
 lib/Support/Unix/Unix.h                            |    87 +
 lib/Support/Unix/system_error.inc                  |    34 +
 lib/Support/Valgrind.cpp                           |    54 +
 lib/Support/Windows/DynamicLibrary.inc             |   166 +
 lib/Support/Windows/Host.inc                       |    23 +
 lib/Support/Windows/Memory.inc                     |    73 +
 lib/Support/Windows/Mutex.inc                      |    58 +
 lib/Support/Windows/Path.inc                       |   921 +
 lib/Support/Windows/PathV2.inc                     |   750 +
 lib/Support/Windows/Process.inc                    |   222 +
 lib/Support/Windows/Program.inc                    |   403 +
 lib/Support/Windows/RWMutex.inc                    |    58 +
 lib/Support/Windows/Signals.inc                    |   328 +
 lib/Support/Windows/ThreadLocal.inc                |    54 +
 lib/Support/Windows/TimeValue.inc                  |    51 +
 lib/Support/Windows/Windows.h                      |   120 +
 lib/Support/Windows/explicit_symbols.inc           |    66 +
 lib/Support/Windows/system_error.inc               |   142 +
 lib/Support/raw_ostream.cpp                        |   103 +-
 lib/Support/regexec.c                              |     5 +-
 lib/Support/system_error.cpp                       |   130 +
 lib/System/Alarm.cpp                               |    33 -
 lib/System/Atomic.cpp                              |   112 -
 lib/System/CMakeLists.txt                          |    48 -
 lib/System/Disassembler.cpp                        |    75 -
 lib/System/DynamicLibrary.cpp                      |   161 -
 lib/System/Errno.cpp                               |    74 -
 lib/System/Host.cpp                                |   305 -
 lib/System/IncludeFile.cpp                         |    20 -
 lib/System/Makefile                                |    25 -
 lib/System/Memory.cpp                              |    74 -
 lib/System/Mutex.cpp                               |   157 -
 lib/System/Path.cpp                                |   264 -
 lib/System/Process.cpp                             |    33 -
 lib/System/Program.cpp                             |    56 -
 lib/System/README.txt                              |    43 -
 lib/System/RWMutex.cpp                             |   157 -
 lib/System/SearchForAddressOfSpecialSymbol.cpp     |    64 -
 lib/System/Signals.cpp                             |    34 -
 lib/System/ThreadLocal.cpp                         |    85 -
 lib/System/Threading.cpp                           |    64 -
 lib/System/TimeValue.cpp                           |    58 -
 lib/System/Unix/Alarm.inc                          |    72 -
 lib/System/Unix/Host.inc                           |    96 -
 lib/System/Unix/Memory.inc                         |   151 -
 lib/System/Unix/Mutex.inc                          |    43 -
 lib/System/Unix/Path.inc                           |   923 -
 lib/System/Unix/Process.inc                        |   295 -
 lib/System/Unix/Program.inc                        |   402 -
 lib/System/Unix/README.txt                         |    16 -
 lib/System/Unix/RWMutex.inc                        |    43 -
 lib/System/Unix/Signals.inc                        |   299 -
 lib/System/Unix/ThreadLocal.inc                    |    26 -
 lib/System/Unix/TimeValue.inc                      |    56 -
 lib/System/Unix/Unix.h                             |    87 -
 lib/System/Valgrind.cpp                            |    54 -
 lib/System/Win32/Alarm.inc                         |    43 -
 lib/System/Win32/DynamicLibrary.inc                |   200 -
 lib/System/Win32/Host.inc                          |    23 -
 lib/System/Win32/Memory.inc                        |    73 -
 lib/System/Win32/Mutex.inc                         |    58 -
 lib/System/Win32/Path.inc                          |   872 -
 lib/System/Win32/Process.inc                       |   221 -
 lib/System/Win32/Program.inc                       |   409 -
 lib/System/Win32/RWMutex.inc                       |    58 -
 lib/System/Win32/Signals.inc                       |   332 -
 lib/System/Win32/ThreadLocal.inc                   |    53 -
 lib/System/Win32/TimeValue.inc                     |    51 -
 lib/System/Win32/Win32.h                           |    57 -
 lib/Target/ARM/ARM.h                               |   114 +-
 lib/Target/ARM/ARM.td                              |    58 +-
 lib/Target/ARM/ARMAddressingModes.h                |    12 +
 lib/Target/ARM/ARMAsmBackend.cpp                   |   512 +
 lib/Target/ARM/ARMAsmPrinter.cpp                   |  2225 ++-
 lib/Target/ARM/ARMAsmPrinter.h                     |   112 +
 lib/Target/ARM/ARMBaseInfo.h                       |   249 +
 lib/Target/ARM/ARMBaseInstrInfo.cpp                |  1305 +-
 lib/Target/ARM/ARMBaseInstrInfo.h                  |   171 +-
 lib/Target/ARM/ARMBaseRegisterInfo.cpp             |   948 +-
 lib/Target/ARM/ARMBaseRegisterInfo.h               |    65 +-
 lib/Target/ARM/ARMBuildAttrs.h                     |    73 +-
 lib/Target/ARM/ARMCallingConv.h                    |   160 +
 lib/Target/ARM/ARMCallingConv.td                   |    29 +
 lib/Target/ARM/ARMCodeEmitter.cpp                  |   368 +-
 lib/Target/ARM/ARMConstantIslandPass.cpp           |    27 +-
 lib/Target/ARM/ARMConstantPoolValue.cpp            |    26 +-
 lib/Target/ARM/ARMConstantPoolValue.h              |    43 +-
 lib/Target/ARM/ARMELFWriterInfo.cpp                |    83 +
 lib/Target/ARM/ARMELFWriterInfo.h                  |    58 +
 lib/Target/ARM/ARMExpandPseudoInsts.cpp            |  1227 +-
 lib/Target/ARM/ARMFastISel.cpp                     |  1670 +-
 lib/Target/ARM/ARMFixupKinds.h                     |    97 +
 lib/Target/ARM/ARMFrameInfo.h                      |    32 -
 lib/Target/ARM/ARMFrameLowering.cpp                |  1021 +
 lib/Target/ARM/ARMFrameLowering.h                  |    74 +
 lib/Target/ARM/ARMGlobalMerge.cpp                  |    69 +-
 lib/Target/ARM/ARMHazardRecognizer.cpp             |   121 +
 lib/Target/ARM/ARMHazardRecognizer.h               |    54 +
 lib/Target/ARM/ARMISelDAGToDAG.cpp                 |  1823 +-
 lib/Target/ARM/ARMISelLowering.cpp                 |  2278 ++-
 lib/Target/ARM/ARMISelLowering.h                   |    88 +-
 lib/Target/ARM/ARMInstrFormats.td                  |  1191 +-
 lib/Target/ARM/ARMInstrInfo.cpp                    |    33 +-
 lib/Target/ARM/ARMInstrInfo.h                      |     5 -
 lib/Target/ARM/ARMInstrInfo.td                     |  3554 ++--
 lib/Target/ARM/ARMInstrNEON.td                     |  3650 ++--
 lib/Target/ARM/ARMInstrThumb.td                    |  1661 +-
 lib/Target/ARM/ARMInstrThumb2.td                   |  2725 ++-
 lib/Target/ARM/ARMInstrVFP.td                      |  1146 +-
 lib/Target/ARM/ARMJITInfo.cpp                      |    13 +-
 lib/Target/ARM/ARMJITInfo.h                        |     2 +-
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp           |   519 +-
 lib/Target/ARM/ARMMCCodeEmitter.cpp                |  1230 ++
 lib/Target/ARM/ARMMCExpr.cpp                       |    73 +
 lib/Target/ARM/ARMMCExpr.h                         |    73 +
 lib/Target/ARM/ARMMCInstLower.cpp                  |   147 +-
 lib/Target/ARM/ARMMCInstLower.h                    |    56 -
 lib/Target/ARM/ARMMachineFunctionInfo.h            |    60 +-
 lib/Target/ARM/ARMPerfectShuffle.h                 | 13122 ++++++-------
 lib/Target/ARM/ARMRegisterInfo.cpp                 |     1 -
 lib/Target/ARM/ARMRegisterInfo.td                  |    90 +-
 lib/Target/ARM/ARMSchedule.td                      |   140 +-
 lib/Target/ARM/ARMScheduleA8.td                    |   862 +-
 lib/Target/ARM/ARMScheduleA9.td                    |  1799 +-
 lib/Target/ARM/ARMScheduleV6.td                    |   130 +-
 lib/Target/ARM/ARMSelectionDAGInfo.cpp             |    16 +-
 lib/Target/ARM/ARMSelectionDAGInfo.h               |     6 +-
 lib/Target/ARM/ARMSubtarget.cpp                    |   119 +-
 lib/Target/ARM/ARMSubtarget.h                      |    48 +-
 lib/Target/ARM/ARMTargetMachine.cpp                |    62 +-
 lib/Target/ARM/ARMTargetMachine.h                  |    36 +-
 lib/Target/ARM/ARMTargetObjectFile.cpp             |    19 +-
 lib/Target/ARM/ARMTargetObjectFile.h               |    11 +-
 lib/Target/ARM/AsmParser/ARMAsmLexer.cpp           |   192 +-
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp          |  1530 +-
 lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp       |   800 -
 lib/Target/ARM/AsmPrinter/ARMInstPrinter.h         |   118 -
 lib/Target/ARM/AsmPrinter/CMakeLists.txt           |     6 -
 lib/Target/ARM/AsmPrinter/Makefile                 |    15 -
 lib/Target/ARM/CMakeLists.txt                      |    25 +-
 lib/Target/ARM/Disassembler/ARMDisassembler.cpp    |    49 +-
 .../ARM/Disassembler/ARMDisassemblerCore.cpp       |   259 +-
 lib/Target/ARM/Disassembler/CMakeLists.txt         |    14 +
 .../ARM/Disassembler/ThumbDisassemblerCore.h       |   298 +-
 lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp      |   711 +
 lib/Target/ARM/InstPrinter/ARMInstPrinter.h        |   111 +
 lib/Target/ARM/InstPrinter/CMakeLists.txt          |     6 +
 lib/Target/ARM/InstPrinter/Makefile                |    15 +
 lib/Target/ARM/MLxExpansionPass.cpp                |   321 +
 lib/Target/ARM/Makefile                            |     4 +-
 lib/Target/ARM/NEONPreAllocPass.cpp                |   406 -
 lib/Target/ARM/README-Thumb.txt                    |    21 +-
 lib/Target/ARM/Thumb1FrameLowering.cpp             |   352 +
 lib/Target/ARM/Thumb1FrameLowering.h               |    52 +
 lib/Target/ARM/Thumb1InstrInfo.cpp                 |    84 +-
 lib/Target/ARM/Thumb1InstrInfo.h                   |    17 +-
 lib/Target/ARM/Thumb1RegisterInfo.cpp              |   332 +-
 lib/Target/ARM/Thumb1RegisterInfo.h                |     5 -
 lib/Target/ARM/Thumb2HazardRecognizer.cpp          |    53 -
 lib/Target/ARM/Thumb2HazardRecognizer.h            |    40 -
 lib/Target/ARM/Thumb2InstrInfo.cpp                 |    44 +-
 lib/Target/ARM/Thumb2InstrInfo.h                   |     8 -
 lib/Target/ARM/Thumb2RegisterInfo.cpp              |     1 -
 lib/Target/ARM/Thumb2SizeReduction.cpp             |   133 +-
 lib/Target/Alpha/Alpha.h                           |     7 +
 lib/Target/Alpha/AlphaAsmPrinter.cpp               |   166 +
 lib/Target/Alpha/AlphaCodeEmitter.cpp              |   222 -
 lib/Target/Alpha/AlphaFrameLowering.cpp            |   143 +
 lib/Target/Alpha/AlphaFrameLowering.h              |    43 +
 lib/Target/Alpha/AlphaISelDAGToDAG.cpp             |    19 +-
 lib/Target/Alpha/AlphaISelLowering.cpp             |    87 +-
 lib/Target/Alpha/AlphaISelLowering.h               |     5 +
 lib/Target/Alpha/AlphaInstrInfo.td                 |     6 +-
 lib/Target/Alpha/AlphaJITInfo.cpp                  |   310 -
 lib/Target/Alpha/AlphaJITInfo.h                    |    53 -
 lib/Target/Alpha/AlphaRegisterInfo.cpp             |   152 +-
 lib/Target/Alpha/AlphaRegisterInfo.h               |    10 -
 lib/Target/Alpha/AlphaSchedule.td                  |     4 +-
 lib/Target/Alpha/AlphaTargetMachine.cpp            |    10 +-
 lib/Target/Alpha/AlphaTargetMachine.h              |    20 +-
 lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp    |   166 -
 lib/Target/Alpha/AsmPrinter/CMakeLists.txt         |     6 -
 lib/Target/Alpha/AsmPrinter/Makefile               |    15 -
 lib/Target/Alpha/CMakeLists.txt                    |     7 +-
 lib/Target/Alpha/Makefile                          |     4 +-
 .../Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp     |   156 -
 lib/Target/Blackfin/AsmPrinter/CMakeLists.txt      |     6 -
 lib/Target/Blackfin/AsmPrinter/Makefile            |    16 -
 lib/Target/Blackfin/BlackfinAsmPrinter.cpp         |   156 +
 lib/Target/Blackfin/BlackfinFrameLowering.cpp      |   124 +
 lib/Target/Blackfin/BlackfinFrameLowering.h        |    46 +
 lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp       |     6 +-
 lib/Target/Blackfin/BlackfinISelLowering.cpp       |    61 +-
 lib/Target/Blackfin/BlackfinISelLowering.h         |     6 +
 lib/Target/Blackfin/BlackfinInstrInfo.td           |     8 +-
 lib/Target/Blackfin/BlackfinRegisterInfo.cpp       |   106 +-
 lib/Target/Blackfin/BlackfinRegisterInfo.h         |     8 -
 lib/Target/Blackfin/BlackfinRegisterInfo.td        |    20 +-
 lib/Target/Blackfin/BlackfinTargetMachine.cpp      |     2 +-
 lib/Target/Blackfin/BlackfinTargetMachine.h        |    17 +-
 lib/Target/Blackfin/CMakeLists.txt                 |     4 +
 lib/Target/Blackfin/Makefile                       |     2 +-
 lib/Target/CBackend/CBackend.cpp                   |   337 +-
 lib/Target/CBackend/CMakeLists.txt                 |     2 +
 lib/Target/CMakeLists.txt                          |    44 +-
 lib/Target/CellSPU/AsmPrinter/CMakeLists.txt       |     9 -
 lib/Target/CellSPU/AsmPrinter/Makefile             |    17 -
 lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp    |   364 -
 lib/Target/CellSPU/CMakeLists.txt                  |     6 +-
 lib/Target/CellSPU/Makefile                        |     2 +-
 lib/Target/CellSPU/README.txt                      |     2 +-
 lib/Target/CellSPU/SPU.h                           |     1 +
 lib/Target/CellSPU/SPU64InstrInfo.td               |    79 +-
 lib/Target/CellSPU/SPUAsmPrinter.cpp               |   327 +
 lib/Target/CellSPU/SPUFrameInfo.cpp                |    29 -
 lib/Target/CellSPU/SPUFrameInfo.h                  |    75 -
 lib/Target/CellSPU/SPUFrameLowering.cpp            |   276 +
 lib/Target/CellSPU/SPUFrameLowering.h              |    94 +
 lib/Target/CellSPU/SPUHazardRecognizers.cpp        |     4 +-
 lib/Target/CellSPU/SPUHazardRecognizers.h          |     4 +-
 lib/Target/CellSPU/SPUISelDAGToDAG.cpp             |   223 +-
 lib/Target/CellSPU/SPUISelLowering.cpp             |   787 +-
 lib/Target/CellSPU/SPUISelLowering.h               |    23 +-
 lib/Target/CellSPU/SPUInstrInfo.cpp                |    15 +-
 lib/Target/CellSPU/SPUInstrInfo.h                  |     4 +
 lib/Target/CellSPU/SPUInstrInfo.td                 |   396 +-
 lib/Target/CellSPU/SPUMCAsmInfo.cpp                |     3 +-
 lib/Target/CellSPU/SPUNodes.td                     |    18 +-
 lib/Target/CellSPU/SPUNopFiller.cpp                |   153 +
 lib/Target/CellSPU/SPUOperands.td                  |    18 +-
 lib/Target/CellSPU/SPURegisterInfo.cpp             |   264 +-
 lib/Target/CellSPU/SPURegisterInfo.h               |    16 +-
 lib/Target/CellSPU/SPUSchedule.td                  |     8 +-
 lib/Target/CellSPU/SPUSubtarget.cpp                |    21 +
 lib/Target/CellSPU/SPUSubtarget.h                  |     6 +-
 lib/Target/CellSPU/SPUTargetMachine.cpp            |    13 +-
 lib/Target/CellSPU/SPUTargetMachine.h              |    15 +-
 lib/Target/CppBackend/CMakeLists.txt               |     2 +
 lib/Target/CppBackend/CPPBackend.cpp               |    37 +-
 lib/Target/MBlaze/AsmParser/CMakeLists.txt         |     8 +
 lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp     |   127 +
 lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp    |   568 +
 lib/Target/MBlaze/AsmParser/Makefile               |    15 +
 lib/Target/MBlaze/AsmPrinter/CMakeLists.txt        |     9 -
 lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp  |   295 -
 lib/Target/MBlaze/AsmPrinter/Makefile              |    17 -
 lib/Target/MBlaze/CMakeLists.txt                   |    14 +-
 lib/Target/MBlaze/Disassembler/CMakeLists.txt      |    16 +
 .../MBlaze/Disassembler/MBlazeDisassembler.cpp     |   647 +
 .../MBlaze/Disassembler/MBlazeDisassembler.h       |    55 +
 lib/Target/MBlaze/Disassembler/Makefile            |    16 +
 lib/Target/MBlaze/InstPrinter/CMakeLists.txt       |     8 +
 .../MBlaze/InstPrinter/MBlazeInstPrinter.cpp       |    69 +
 lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h  |    43 +
 lib/Target/MBlaze/InstPrinter/Makefile             |    16 +
 lib/Target/MBlaze/MBlaze.h                         |     8 +
 lib/Target/MBlaze/MBlaze.td                        |    41 +-
 lib/Target/MBlaze/MBlazeAsmBackend.cpp             |   163 +
 lib/Target/MBlaze/MBlazeAsmPrinter.cpp             |   335 +
 lib/Target/MBlaze/MBlazeCallingConv.td             |    14 +-
 lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp        |   191 +-
 lib/Target/MBlaze/MBlazeELFWriterInfo.cpp          |   111 +
 lib/Target/MBlaze/MBlazeELFWriterInfo.h            |    58 +
 lib/Target/MBlaze/MBlazeFrameLowering.cpp          |   450 +
 lib/Target/MBlaze/MBlazeFrameLowering.h            |    53 +
 lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp           |    87 +-
 lib/Target/MBlaze/MBlazeISelLowering.cpp           |   720 +-
 lib/Target/MBlaze/MBlazeISelLowering.h             |    46 +-
 lib/Target/MBlaze/MBlazeInstrFPU.td                |   253 +-
 lib/Target/MBlaze/MBlazeInstrFSL.td                |   326 +-
 lib/Target/MBlaze/MBlazeInstrFormats.td            |   272 +-
 lib/Target/MBlaze/MBlazeInstrInfo.cpp              |   179 +-
 lib/Target/MBlaze/MBlazeInstrInfo.h                |   166 +-
 lib/Target/MBlaze/MBlazeInstrInfo.td               |   927 +-
 lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp          |     6 +-
 lib/Target/MBlaze/MBlazeIntrinsics.td              |     6 +-
 lib/Target/MBlaze/MBlazeMCAsmInfo.cpp              |     9 +-
 lib/Target/MBlaze/MBlazeMCAsmInfo.h                |     4 +-
 lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp          |   223 +
 lib/Target/MBlaze/MBlazeMCInstLower.cpp            |   166 +
 lib/Target/MBlaze/MBlazeMCInstLower.h              |    50 +
 lib/Target/MBlaze/MBlazeMachineFunction.h          |    86 +-
 lib/Target/MBlaze/MBlazeRegisterInfo.cpp           |   343 +-
 lib/Target/MBlaze/MBlazeRegisterInfo.h             |    20 +-
 lib/Target/MBlaze/MBlazeRegisterInfo.td            |   140 +-
 lib/Target/MBlaze/MBlazeRelocations.h              |    47 +
 lib/Target/MBlaze/MBlazeSchedule.td                |     4 +-
 lib/Target/MBlaze/MBlazeTargetMachine.cpp          |    66 +-
 lib/Target/MBlaze/MBlazeTargetMachine.h            |    33 +-
 lib/Target/MBlaze/MBlazeTargetObjectFile.cpp       |     9 +-
 lib/Target/MBlaze/MBlazeTargetObjectFile.h         |     7 +-
 lib/Target/MBlaze/Makefile                         |    12 +-
 lib/Target/MBlaze/TODO                             |    26 +
 lib/Target/MBlaze/TargetInfo/CMakeLists.txt        |     3 +-
 lib/Target/MSP430/AsmPrinter/CMakeLists.txt        |     8 -
 lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp  |   179 -
 lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp |   116 -
 lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h   |    43 -
 lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp |   150 -
 lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h   |    50 -
 lib/Target/MSP430/AsmPrinter/Makefile              |    15 -
 lib/Target/MSP430/CMakeLists.txt                   |     6 +-
 lib/Target/MSP430/InstPrinter/CMakeLists.txt       |     6 +
 .../MSP430/InstPrinter/MSP430InstPrinter.cpp       |   113 +
 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h  |    43 +
 lib/Target/MSP430/InstPrinter/Makefile             |    15 +
 lib/Target/MSP430/MSP430.td                        |     1 +
 lib/Target/MSP430/MSP430AsmPrinter.cpp             |   179 +
 lib/Target/MSP430/MSP430FrameLowering.cpp          |   223 +
 lib/Target/MSP430/MSP430FrameLowering.h            |    53 +
 lib/Target/MSP430/MSP430ISelDAGToDAG.cpp           |    17 +-
 lib/Target/MSP430/MSP430ISelLowering.cpp           |    22 +-
 lib/Target/MSP430/MSP430InstrInfo.cpp              |    52 +-
 lib/Target/MSP430/MSP430InstrInfo.h                |     9 -
 lib/Target/MSP430/MSP430InstrInfo.td               |    16 +-
 lib/Target/MSP430/MSP430MCInstLower.cpp            |   150 +
 lib/Target/MSP430/MSP430MCInstLower.h              |    50 +
 lib/Target/MSP430/MSP430RegisterInfo.cpp           |   170 +-
 lib/Target/MSP430/MSP430RegisterInfo.h             |     6 -
 lib/Target/MSP430/MSP430RegisterInfo.td            |     8 +-
 lib/Target/MSP430/MSP430TargetMachine.cpp          |    14 +-
 lib/Target/MSP430/MSP430TargetMachine.h            |    12 +-
 lib/Target/MSP430/Makefile                         |     2 +-
 lib/Target/MSP430/TargetInfo/CMakeLists.txt        |     2 +-
 lib/Target/Mangler.cpp                             |    10 -
 lib/Target/Mips/AsmPrinter/CMakeLists.txt          |     9 -
 lib/Target/Mips/AsmPrinter/Makefile                |    17 -
 lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp      |   386 -
 lib/Target/Mips/CMakeLists.txt                     |     4 +-
 lib/Target/Mips/Makefile                           |     2 +-
 lib/Target/Mips/Mips.td                            |    30 +-
 lib/Target/Mips/MipsAsmPrinter.cpp                 |   393 +
 lib/Target/Mips/MipsDelaySlotFiller.cpp            |    13 +-
 lib/Target/Mips/MipsFrameLowering.cpp              |   314 +
 lib/Target/Mips/MipsFrameLowering.h                |    48 +
 lib/Target/Mips/MipsISelDAGToDAG.cpp               |    28 +-
 lib/Target/Mips/MipsISelLowering.cpp               |   620 +-
 lib/Target/Mips/MipsISelLowering.h                 |    18 +-
 lib/Target/Mips/MipsInstrFPU.td                    |     2 +-
 lib/Target/Mips/MipsInstrInfo.td                   |   355 +-
 lib/Target/Mips/MipsMachineFunction.h              |    34 +-
 lib/Target/Mips/MipsRegisterInfo.cpp               |   287 +-
 lib/Target/Mips/MipsRegisterInfo.h                 |     5 -
 lib/Target/Mips/MipsSchedule.td                    |     2 +-
 lib/Target/Mips/MipsSubtarget.h                    |     4 +-
 lib/Target/Mips/MipsTargetMachine.cpp              |    20 +-
 lib/Target/Mips/MipsTargetMachine.h                |    21 +-
 lib/Target/Mips/MipsTargetObjectFile.cpp           |    29 +-
 lib/Target/PIC16/AsmPrinter/CMakeLists.txt         |     9 -
 lib/Target/PIC16/AsmPrinter/Makefile               |    15 -
 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp    |   512 -
 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h      |    88 -
 lib/Target/PIC16/CMakeLists.txt                    |    26 -
 lib/Target/PIC16/Makefile                          |    24 -
 lib/Target/PIC16/PIC16.h                           |   134 -
 lib/Target/PIC16/PIC16.td                          |    40 -
 lib/Target/PIC16/PIC16ABINames.h                   |   399 -
 lib/Target/PIC16/PIC16DebugInfo.cpp                |   490 -
 lib/Target/PIC16/PIC16DebugInfo.h                  |   156 -
 lib/Target/PIC16/PIC16ISelDAGToDAG.cpp             |    50 -
 lib/Target/PIC16/PIC16ISelDAGToDAG.h               |    60 -
 lib/Target/PIC16/PIC16ISelLowering.cpp             |  2000 --
 lib/Target/PIC16/PIC16ISelLowering.h               |   253 -
 lib/Target/PIC16/PIC16InstrFormats.td              |   117 -
 lib/Target/PIC16/PIC16InstrInfo.cpp                |   224 -
 lib/Target/PIC16/PIC16InstrInfo.h                  |    76 -
 lib/Target/PIC16/PIC16InstrInfo.td                 |   540 -
 lib/Target/PIC16/PIC16MCAsmInfo.cpp                |    59 -
 lib/Target/PIC16/PIC16MCAsmInfo.h                  |    35 -
 lib/Target/PIC16/PIC16MachineFunctionInfo.h        |    52 -
 lib/Target/PIC16/PIC16MemSelOpt.cpp                |   254 -
 lib/Target/PIC16/PIC16Passes/Makefile              |    15 -
 lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp       |   299 -
 lib/Target/PIC16/PIC16Passes/PIC16Cloner.h         |    83 -
 lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp      |   182 -
 lib/Target/PIC16/PIC16Passes/PIC16Overlay.h        |    60 -
 lib/Target/PIC16/PIC16RegisterInfo.cpp             |    84 -
 lib/Target/PIC16/PIC16RegisterInfo.h               |    64 -
 lib/Target/PIC16/PIC16RegisterInfo.td              |    33 -
 lib/Target/PIC16/PIC16Section.cpp                  |   104 -
 lib/Target/PIC16/PIC16Section.h                    |    99 -
 lib/Target/PIC16/PIC16SelectionDAGInfo.cpp         |    23 -
 lib/Target/PIC16/PIC16SelectionDAGInfo.h           |    31 -
 lib/Target/PIC16/PIC16Subtarget.cpp                |    27 -
 lib/Target/PIC16/PIC16Subtarget.h                  |    44 -
 lib/Target/PIC16/PIC16TargetMachine.cpp            |    55 -
 lib/Target/PIC16/PIC16TargetMachine.h              |    70 -
 lib/Target/PIC16/PIC16TargetObjectFile.cpp         |   384 -
 lib/Target/PIC16/PIC16TargetObjectFile.h           |   168 -
 lib/Target/PIC16/TargetInfo/CMakeLists.txt         |     7 -
 lib/Target/PIC16/TargetInfo/Makefile               |    15 -
 lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp    |    22 -
 lib/Target/PTX/CMakeLists.txt                      |    26 +
 lib/Target/PTX/Makefile                            |    26 +
 lib/Target/PTX/PTX.h                               |    49 +
 lib/Target/PTX/PTX.td                              |    54 +
 lib/Target/PTX/PTXAsmPrinter.cpp                   |   347 +
 lib/Target/PTX/PTXFrameLowering.cpp                |    24 +
 lib/Target/PTX/PTXFrameLowering.h                  |    43 +
 lib/Target/PTX/PTXISelDAGToDAG.cpp                 |   151 +
 lib/Target/PTX/PTXISelLowering.cpp                 |   210 +
 lib/Target/PTX/PTXISelLowering.h                   |    67 +
 lib/Target/PTX/PTXInstrFormats.td                  |    24 +
 lib/Target/PTX/PTXInstrInfo.cpp                    |    87 +
 lib/Target/PTX/PTXInstrInfo.h                      |    75 +
 lib/Target/PTX/PTXInstrInfo.td                     |   257 +
 lib/Target/PTX/PTXMCAsmInfo.cpp                    |    30 +
 lib/Target/PTX/PTXMCAsmInfo.h                      |    28 +
 lib/Target/PTX/PTXMCAsmStreamer.cpp                |   542 +
 lib/Target/PTX/PTXMFInfoExtract.cpp                |    96 +
 lib/Target/PTX/PTXMachineFunctionInfo.h            |    79 +
 lib/Target/PTX/PTXRegisterInfo.cpp                 |    19 +
 lib/Target/PTX/PTXRegisterInfo.h                   |    63 +
 lib/Target/PTX/PTXRegisterInfo.td                  |   102 +
 lib/Target/PTX/PTXSubtarget.cpp                    |    23 +
 lib/Target/PTX/PTXSubtarget.h                      |    32 +
 lib/Target/PTX/PTXTargetMachine.cpp                |    60 +
 lib/Target/PTX/PTXTargetMachine.h                  |    60 +
 lib/Target/PTX/TargetInfo/CMakeLists.txt           |     7 +
 lib/Target/PTX/TargetInfo/Makefile                 |    15 +
 lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp        |    21 +
 lib/Target/PowerPC/AsmPrinter/CMakeLists.txt       |     6 -
 lib/Target/PowerPC/AsmPrinter/Makefile             |    15 -
 lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp    |   922 -
 lib/Target/PowerPC/CMakeLists.txt                  |     9 +-
 lib/Target/PowerPC/InstPrinter/CMakeLists.txt      |     6 +
 lib/Target/PowerPC/InstPrinter/Makefile            |    16 +
 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp  |   292 +
 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h    |    69 +
 lib/Target/PowerPC/Makefile                        |     5 +-
 lib/Target/PowerPC/PPC.h                           |    62 +-
 lib/Target/PowerPC/PPC.td                          |     6 +
 lib/Target/PowerPC/PPCAsmBackend.cpp               |   119 +
 lib/Target/PowerPC/PPCAsmPrinter.cpp               |   696 +
 lib/Target/PowerPC/PPCCodeEmitter.cpp              |   253 +-
 lib/Target/PowerPC/PPCFixupKinds.h                 |    45 +
 lib/Target/PowerPC/PPCFrameInfo.h                  |   300 -
 lib/Target/PowerPC/PPCFrameLowering.cpp            |   971 +
 lib/Target/PowerPC/PPCFrameLowering.h              |   322 +
 lib/Target/PowerPC/PPCHazardRecognizers.cpp        |    56 +-
 lib/Target/PowerPC/PPCHazardRecognizers.h          |    20 +-
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp             |   210 +-
 lib/Target/PowerPC/PPCISelLowering.cpp             |   731 +-
 lib/Target/PowerPC/PPCISelLowering.h               |     7 +-
 lib/Target/PowerPC/PPCInstr64Bit.td                |    57 +-
 lib/Target/PowerPC/PPCInstrFormats.td              |    39 +-
 lib/Target/PowerPC/PPCInstrInfo.cpp                |    81 +-
 lib/Target/PowerPC/PPCInstrInfo.h                  |    26 +-
 lib/Target/PowerPC/PPCInstrInfo.td                 |   177 +-
 lib/Target/PowerPC/PPCJITInfo.cpp                  |     2 +-
 lib/Target/PowerPC/PPCMCAsmInfo.cpp                |     5 +-
 lib/Target/PowerPC/PPCMCCodeEmitter.cpp            |   195 +
 lib/Target/PowerPC/PPCMCInstLower.cpp              |   172 +
 lib/Target/PowerPC/PPCRegisterInfo.cpp             |   975 +-
 lib/Target/PowerPC/PPCRegisterInfo.h               |    19 -
 lib/Target/PowerPC/PPCRegisterInfo.td              |    13 +-
 lib/Target/PowerPC/PPCScheduleG3.td                |     2 +-
 lib/Target/PowerPC/PPCScheduleG4.td                |     2 +-
 lib/Target/PowerPC/PPCScheduleG4Plus.td            |     2 +-
 lib/Target/PowerPC/PPCScheduleG5.td                |     2 +-
 lib/Target/PowerPC/PPCSubtarget.cpp                |     2 +-
 lib/Target/PowerPC/PPCTargetMachine.cpp            |    31 +-
 lib/Target/PowerPC/PPCTargetMachine.h              |    18 +-
 lib/Target/PowerPC/README.txt                      |    29 +-
 lib/Target/README.txt                              |   979 +-
 lib/Target/Sparc/AsmPrinter/CMakeLists.txt         |     6 -
 lib/Target/Sparc/AsmPrinter/Makefile               |    15 -
 lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp    |   249 -
 lib/Target/Sparc/CMakeLists.txt                    |     4 +-
 lib/Target/Sparc/DelaySlotFiller.cpp               |   230 +-
 lib/Target/Sparc/Makefile                          |     2 +-
 lib/Target/Sparc/SparcAsmPrinter.cpp               |   251 +
 lib/Target/Sparc/SparcCallingConv.td               |    10 +-
 lib/Target/Sparc/SparcFrameLowering.cpp            |    80 +
 lib/Target/Sparc/SparcFrameLowering.h              |    41 +
 lib/Target/Sparc/SparcISelDAGToDAG.cpp             |    18 +-
 lib/Target/Sparc/SparcISelLowering.cpp             |   721 +-
 lib/Target/Sparc/SparcISelLowering.h               |     3 +-
 lib/Target/Sparc/SparcInstrInfo.cpp                |   195 +-
 lib/Target/Sparc/SparcInstrInfo.h                  |    11 +-
 lib/Target/Sparc/SparcInstrInfo.td                 |   221 +-
 lib/Target/Sparc/SparcMachineFunctionInfo.h        |    11 +-
 lib/Target/Sparc/SparcRegisterInfo.cpp             |    53 -
 lib/Target/Sparc/SparcRegisterInfo.h               |     9 +-
 lib/Target/Sparc/SparcRegisterInfo.td              |     3 +
 lib/Target/Sparc/SparcTargetMachine.cpp            |     6 +-
 lib/Target/Sparc/SparcTargetMachine.h              |    15 +-
 lib/Target/SubtargetFeature.cpp                    |     3 +-
 lib/Target/SystemZ/AsmPrinter/CMakeLists.txt       |     6 -
 lib/Target/SystemZ/AsmPrinter/Makefile             |    15 -
 .../SystemZ/AsmPrinter/SystemZAsmPrinter.cpp       |   217 -
 lib/Target/SystemZ/CMakeLists.txt                  |     4 +-
 lib/Target/SystemZ/Makefile                        |     2 +-
 lib/Target/SystemZ/SystemZAsmPrinter.cpp           |   223 +
 lib/Target/SystemZ/SystemZFrameLowering.cpp        |   386 +
 lib/Target/SystemZ/SystemZFrameLowering.h          |    57 +
 lib/Target/SystemZ/SystemZISelDAGToDAG.cpp         |    31 +-
 lib/Target/SystemZ/SystemZISelLowering.cpp         |    18 +-
 lib/Target/SystemZ/SystemZInstrBuilder.h           |     6 +-
 lib/Target/SystemZ/SystemZInstrInfo.cpp            |   150 -
 lib/Target/SystemZ/SystemZInstrInfo.h              |    10 -
 lib/Target/SystemZ/SystemZInstrInfo.td             |    56 +-
 lib/Target/SystemZ/SystemZMCAsmInfo.cpp            |     5 +-
 lib/Target/SystemZ/SystemZOperands.td              |    15 +
 lib/Target/SystemZ/SystemZRegisterInfo.cpp         |   214 +-
 lib/Target/SystemZ/SystemZRegisterInfo.h           |    12 -
 lib/Target/SystemZ/SystemZRegisterInfo.td          |    48 +-
 lib/Target/SystemZ/SystemZTargetMachine.cpp        |     2 +-
 lib/Target/SystemZ/SystemZTargetMachine.h          |    12 +-
 lib/Target/Target.cpp                              |    15 +-
 lib/Target/TargetAsmInfo.cpp                       |    27 +
 lib/Target/TargetData.cpp                          |    58 +-
 lib/Target/TargetELFWriterInfo.cpp                 |     5 +-
 lib/Target/TargetFrameInfo.cpp                     |    19 -
 lib/Target/TargetFrameLowering.cpp                 |    53 +
 lib/Target/TargetInstrInfo.cpp                     |    93 +-
 lib/Target/TargetLibraryInfo.cpp                   |    55 +
 lib/Target/TargetLoweringObjectFile.cpp            |     8 +-
 lib/Target/TargetMachine.cpp                       |     4 +-
 lib/Target/TargetRegisterInfo.cpp                  |    43 +-
 lib/Target/X86/AsmParser/X86AsmLexer.cpp           |     9 +-
 lib/Target/X86/AsmParser/X86AsmParser.cpp          |   437 +-
 lib/Target/X86/AsmPrinter/CMakeLists.txt           |     8 -
 lib/Target/X86/AsmPrinter/Makefile                 |    15 -
 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp    |   129 -
 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h      |    81 -
 lib/Target/X86/AsmPrinter/X86InstComments.cpp      |   232 -
 lib/Target/X86/AsmPrinter/X86InstComments.h        |    25 -
 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp  |   140 -
 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h    |    95 -
 lib/Target/X86/CMakeLists.txt                      |    14 +-
 lib/Target/X86/Disassembler/CMakeLists.txt         |     2 +-
 lib/Target/X86/Disassembler/X86Disassembler.cpp    |    15 +-
 lib/Target/X86/Disassembler/X86Disassembler.h      |     2 +-
 .../X86/Disassembler/X86DisassemblerDecoder.c      |    31 +-
 .../X86/Disassembler/X86DisassemblerDecoder.h      |     4 +-
 .../Disassembler/X86DisassemblerDecoderCommon.h    |     3 +-
 lib/Target/X86/InstPrinter/CMakeLists.txt          |     8 +
 lib/Target/X86/InstPrinter/Makefile                |    15 +
 lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp   |   127 +
 lib/Target/X86/InstPrinter/X86ATTInstPrinter.h     |    81 +
 lib/Target/X86/InstPrinter/X86InstComments.cpp     |   232 +
 lib/Target/X86/InstPrinter/X86InstComments.h       |    25 +
 lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp |   139 +
 lib/Target/X86/InstPrinter/X86IntelInstPrinter.h   |    95 +
 lib/Target/X86/Makefile                            |     2 +-
 lib/Target/X86/README-SSE.txt                      |    50 +-
 lib/Target/X86/README-X86-64.txt                   |    44 -
 lib/Target/X86/README.txt                          |   335 +-
 lib/Target/X86/Utils/CMakeLists.txt                |     6 +
 lib/Target/X86/Utils/Makefile                      |    15 +
 lib/Target/X86/Utils/X86ShuffleDecode.cpp          |   148 +
 lib/Target/X86/Utils/X86ShuffleDecode.h            |    69 +
 lib/Target/X86/X86.h                               |    10 +
 lib/Target/X86/X86.td                              |    28 +-
 lib/Target/X86/X86AsmBackend.cpp                   |   270 +-
 lib/Target/X86/X86AsmPrinter.cpp                   |    97 +-
 lib/Target/X86/X86AsmPrinter.h                     |     2 -
 lib/Target/X86/X86CallingConv.td                   |    67 +-
 lib/Target/X86/X86CodeEmitter.cpp                  |    21 +-
 lib/Target/X86/X86ELFWriterInfo.cpp                |    55 +-
 lib/Target/X86/X86ELFWriterInfo.h                  |    19 +-
 lib/Target/X86/X86FastISel.cpp                     |   300 +-
 lib/Target/X86/X86FixupKinds.h                     |    16 +-
 lib/Target/X86/X86FloatingPoint.cpp                |   129 +-
 lib/Target/X86/X86FrameLowering.cpp                |   994 +
 lib/Target/X86/X86FrameLowering.h                  |    65 +
 lib/Target/X86/X86ISelDAGToDAG.cpp                 |   200 +-
 lib/Target/X86/X86ISelLowering.cpp                 |  3194 ++-
 lib/Target/X86/X86ISelLowering.h                   |   243 +-
 lib/Target/X86/X86Instr3DNow.td                    |    77 +
 lib/Target/X86/X86Instr64bit.td                    |  2250 ---
 lib/Target/X86/X86InstrArithmetic.td               |  1125 ++
 lib/Target/X86/X86InstrBuilder.h                   |    37 +-
 lib/Target/X86/X86InstrCMovSetCC.td                |   104 +
 lib/Target/X86/X86InstrCompiler.td                 |  1626 ++
 lib/Target/X86/X86InstrControl.td                  |   294 +
 lib/Target/X86/X86InstrExtension.td                |   172 +
 lib/Target/X86/X86InstrFPStack.td                  |    82 +-
 lib/Target/X86/X86InstrFormats.td                  |    24 +-
 lib/Target/X86/X86InstrFragmentsSIMD.td            |   107 +-
 lib/Target/X86/X86InstrInfo.cpp                    |   448 +-
 lib/Target/X86/X86InstrInfo.h                      |    84 +-
 lib/Target/X86/X86InstrInfo.td                     |  4842 +----
 lib/Target/X86/X86InstrMMX.td                      |   607 +-
 lib/Target/X86/X86InstrSSE.td                      |   571 +-
 lib/Target/X86/X86InstrShiftRotate.td              |   746 +
 lib/Target/X86/X86InstrSystem.td                   |   390 +
 lib/Target/X86/X86InstrVMX.td                      |    54 +
 lib/Target/X86/X86JITInfo.cpp                      |    16 +-
 lib/Target/X86/X86MCAsmInfo.cpp                    |    15 +-
 lib/Target/X86/X86MCCodeEmitter.cpp                |   149 +-
 lib/Target/X86/X86MCInstLower.cpp                  |   117 +-
 lib/Target/X86/X86MCInstLower.h                    |     2 -
 lib/Target/X86/X86MachObjectWriter.cpp             |    32 +
 lib/Target/X86/X86RegisterInfo.cpp                 |   955 +-
 lib/Target/X86/X86RegisterInfo.h                   |    17 +-
 lib/Target/X86/X86RegisterInfo.td                  |   100 +-
 lib/Target/X86/X86SelectionDAGInfo.cpp             |    52 +-
 lib/Target/X86/X86SelectionDAGInfo.h               |     9 +-
 lib/Target/X86/X86ShuffleDecode.h                  |   155 -
 lib/Target/X86/X86Subtarget.cpp                    |    18 +-
 lib/Target/X86/X86Subtarget.h                      |    36 +-
 lib/Target/X86/X86TargetMachine.cpp                |    55 +-
 lib/Target/X86/X86TargetMachine.h                  |    75 +-
 lib/Target/XCore/AsmPrinter/CMakeLists.txt         |     6 -
 lib/Target/XCore/AsmPrinter/Makefile               |    16 -
 lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp    |   280 -
 lib/Target/XCore/CMakeLists.txt                    |     5 +-
 lib/Target/XCore/Makefile                          |     2 +-
 lib/Target/XCore/TargetInfo/CMakeLists.txt         |     2 +-
 lib/Target/XCore/XCoreAsmPrinter.cpp               |   280 +
 lib/Target/XCore/XCoreCallingConv.td               |     3 +
 lib/Target/XCore/XCoreFrameInfo.cpp                |    27 -
 lib/Target/XCore/XCoreFrameInfo.h                  |    34 -
 lib/Target/XCore/XCoreFrameLowering.cpp            |   387 +
 lib/Target/XCore/XCoreFrameLowering.h              |    59 +
 lib/Target/XCore/XCoreISelDAGToDAG.cpp             |    21 +-
 lib/Target/XCore/XCoreISelLowering.cpp             |   172 +-
 lib/Target/XCore/XCoreISelLowering.h               |     1 +
 lib/Target/XCore/XCoreInstrInfo.cpp                |    66 +-
 lib/Target/XCore/XCoreInstrInfo.h                  |     9 -
 lib/Target/XCore/XCoreInstrInfo.td                 |    76 +-
 lib/Target/XCore/XCoreRegisterInfo.cpp             |   284 +-
 lib/Target/XCore/XCoreRegisterInfo.h               |    11 -
 lib/Target/XCore/XCoreRegisterInfo.td              |     4 +-
 lib/Target/XCore/XCoreTargetMachine.cpp            |     2 +-
 lib/Target/XCore/XCoreTargetMachine.h              |     8 +-
 lib/Target/XCore/XCoreTargetObjectFile.cpp         |    49 +-
 lib/Transforms/CMakeLists.txt                      |     6 +
 lib/Transforms/Hello/Hello.cpp                     |     7 +-
 lib/Transforms/IPO/ArgumentPromotion.cpp           |   117 +-
 lib/Transforms/IPO/CMakeLists.txt                  |     3 -
 lib/Transforms/IPO/ConstantMerge.cpp               |    86 +-
 lib/Transforms/IPO/DeadArgumentElimination.cpp     |    76 +-
 lib/Transforms/IPO/DeadTypeElimination.cpp         |     9 +-
 lib/Transforms/IPO/ExtractGV.cpp                   |    30 +-
 lib/Transforms/IPO/FunctionAttrs.cpp               |   141 +-
 lib/Transforms/IPO/GlobalDCE.cpp                   |     6 +-
 lib/Transforms/IPO/GlobalOpt.cpp                   |   830 +-
 lib/Transforms/IPO/IPConstantPropagation.cpp       |     6 +-
 lib/Transforms/IPO/IPO.cpp                         |    38 +-
 lib/Transforms/IPO/InlineAlways.cpp                |    11 +-
 lib/Transforms/IPO/InlineSimple.cpp                |    15 +-
 lib/Transforms/IPO/Inliner.cpp                     |    56 +-
 lib/Transforms/IPO/Internalize.cpp                 |     4 +-
 lib/Transforms/IPO/LoopExtractor.cpp               |    17 +-
 lib/Transforms/IPO/LowerSetJmp.cpp                 |     6 +-
 lib/Transforms/IPO/MergeFunctions.cpp              |   646 +-
 lib/Transforms/IPO/PartialInlining.cpp             |     8 +-
 lib/Transforms/IPO/PartialSpecialization.cpp       |   216 -
 lib/Transforms/IPO/PruneEH.cpp                     |    11 +-
 lib/Transforms/IPO/StripDeadPrototypes.cpp         |     6 +-
 lib/Transforms/IPO/StripSymbols.cpp                |    24 +-
 lib/Transforms/IPO/StructRetPromotion.cpp          |    11 +-
 lib/Transforms/InstCombine/CMakeLists.txt          |     2 -
 lib/Transforms/InstCombine/InstCombine.h           |    28 +-
 lib/Transforms/InstCombine/InstCombineAddSub.cpp   |   350 +-
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp |   597 +-
 lib/Transforms/InstCombine/InstCombineCalls.cpp    |   288 +-
 lib/Transforms/InstCombine/InstCombineCasts.cpp    |    35 +-
 lib/Transforms/InstCombine/InstCombineCompares.cpp |   772 +-
 .../InstCombine/InstCombineLoadStoreAlloca.cpp     |    11 +-
 .../InstCombine/InstCombineMulDivRem.cpp           |   315 +-
 lib/Transforms/InstCombine/InstCombinePHI.cpp      |    79 +-
 lib/Transforms/InstCombine/InstCombineSelect.cpp   |   294 +-
 lib/Transforms/InstCombine/InstCombineShifts.cpp   |   116 +-
 .../InstCombine/InstCombineSimplifyDemanded.cpp    |   100 +-
 .../InstCombine/InstCombineVectorOps.cpp           |   272 +-
 .../InstCombine/InstructionCombining.cpp           |   604 +-
 lib/Transforms/Instrumentation/CMakeLists.txt      |     2 +
 lib/Transforms/Instrumentation/EdgeProfiling.cpp   |     9 +-
 lib/Transforms/Instrumentation/Instrumentation.cpp |    32 +
 .../Instrumentation/OptimalEdgeProfiling.cpp       |    17 +-
 lib/Transforms/Instrumentation/PathProfiling.cpp   |  1423 ++
 lib/Transforms/Instrumentation/ProfilingUtils.cpp  |    22 +-
 lib/Transforms/Instrumentation/ProfilingUtils.h    |     7 +-
 lib/Transforms/Scalar/ADCE.cpp                     |     6 +-
 lib/Transforms/Scalar/BasicBlockPlacement.cpp      |    11 +-
 lib/Transforms/Scalar/CMakeLists.txt               |     6 +-
 lib/Transforms/Scalar/CodeGenPrepare.cpp           |   369 +-
 lib/Transforms/Scalar/ConstantProp.cpp             |     6 +-
 .../Scalar/CorrelatedValuePropagation.cpp          |    86 +-
 lib/Transforms/Scalar/DCE.cpp                      |    12 +-
 lib/Transforms/Scalar/DeadStoreElimination.cpp     |   847 +-
 lib/Transforms/Scalar/EarlyCSE.cpp                 |   470 +
 lib/Transforms/Scalar/GEPSplitter.cpp              |     6 +-
 lib/Transforms/Scalar/GVN.cpp                      |   813 +-
 lib/Transforms/Scalar/IndVarSimplify.cpp           |    49 +-
 lib/Transforms/Scalar/JumpThreading.cpp            |   998 +-
 lib/Transforms/Scalar/LICM.cpp                     |   324 +-
 lib/Transforms/Scalar/LoopDeletion.cpp             |    26 +-
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp       |   594 +
 lib/Transforms/Scalar/LoopIndexSplit.cpp           |  1270 --
 lib/Transforms/Scalar/LoopInstSimplify.cpp         |   170 +
 lib/Transforms/Scalar/LoopRotation.cpp             |   491 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp       |   200 +-
 lib/Transforms/Scalar/LoopUnrollPass.cpp           |    57 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp             |    60 +-
 lib/Transforms/Scalar/LowerAtomic.cpp              |   146 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp          |   729 +-
 lib/Transforms/Scalar/Reassociate.cpp              |    38 +-
 lib/Transforms/Scalar/Reg2Mem.cpp                  |    12 +-
 lib/Transforms/Scalar/SCCP.cpp                     |    39 +-
 lib/Transforms/Scalar/Scalar.cpp                   |    55 +-
 lib/Transforms/Scalar/ScalarReplAggregates.cpp     |  1155 +-
 lib/Transforms/Scalar/SimplifyCFGPass.cpp          |     6 +-
 lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp |     9 +-
 lib/Transforms/Scalar/SimplifyLibCalls.cpp         |   342 +-
 lib/Transforms/Scalar/Sink.cpp                     |    20 +-
 lib/Transforms/Scalar/TailDuplication.cpp          |    18 +-
 lib/Transforms/Scalar/TailRecursionElimination.cpp |   139 +-
 lib/Transforms/Utils/AddrModeMatcher.cpp           |    28 +-
 lib/Transforms/Utils/BasicBlockUtils.cpp           |   157 +-
 lib/Transforms/Utils/BreakCriticalEdges.cpp        |    56 +-
 lib/Transforms/Utils/BuildLibCalls.cpp             |    54 +-
 lib/Transforms/Utils/CMakeLists.txt                |     3 +-
 lib/Transforms/Utils/CloneFunction.cpp             |    40 +-
 lib/Transforms/Utils/CloneLoop.cpp                 |    45 +-
 lib/Transforms/Utils/CloneModule.cpp               |     8 +-
 lib/Transforms/Utils/CodeExtractor.cpp             |     4 +-
 lib/Transforms/Utils/DemoteRegToStack.cpp          |     2 +-
 lib/Transforms/Utils/InlineFunction.cpp            |   173 +-
 lib/Transforms/Utils/InstructionNamer.cpp          |     9 +-
 lib/Transforms/Utils/LCSSA.cpp                     |    12 +-
 lib/Transforms/Utils/Local.cpp                     |   148 +-
 lib/Transforms/Utils/LoopSimplify.cpp              |    69 +-
 lib/Transforms/Utils/LoopUnroll.cpp                |    38 +-
 lib/Transforms/Utils/LowerInvoke.cpp               |    27 +-
 lib/Transforms/Utils/LowerSwitch.cpp               |     9 +-
 lib/Transforms/Utils/Mem2Reg.cpp                   |    17 +-
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp   |   209 +-
 lib/Transforms/Utils/SSAUpdater.cpp                |   171 +-
 lib/Transforms/Utils/SimplifyCFG.cpp               |  2003 +-
 lib/Transforms/Utils/SimplifyInstructions.cpp      |    94 +
 lib/Transforms/Utils/UnifyFunctionExitNodes.cpp    |     2 +-
 lib/Transforms/Utils/Utils.cpp                     |    37 +
 lib/Transforms/Utils/ValueMapper.cpp               |   178 +-
 lib/VMCore/AsmWriter.cpp                           |    40 +-
 lib/VMCore/Attributes.cpp                          |    33 +-
 lib/VMCore/AutoUpgrade.cpp                         |   569 +-
 lib/VMCore/BasicBlock.cpp                          |     9 +-
 lib/VMCore/CMakeLists.txt                          |     3 +
 lib/VMCore/ConstantFold.cpp                        |   159 +-
 lib/VMCore/ConstantFold.h                          |     2 +
 lib/VMCore/Constants.cpp                           |   382 +-
 lib/VMCore/ConstantsContext.h                      |    30 +-
 lib/VMCore/Core.cpp                                |   234 +-
 lib/VMCore/Dominators.cpp                          |   275 +-
 lib/VMCore/Function.cpp                            |    21 +-
 lib/VMCore/Globals.cpp                             |    49 -
 lib/VMCore/IRBuilder.cpp                           |    81 +
 lib/VMCore/InlineAsm.cpp                           |    79 +-
 lib/VMCore/Instruction.cpp                         |    36 +-
 lib/VMCore/Instructions.cpp                        |   212 +-
 lib/VMCore/LLVMContext.cpp                         |    42 +-
 lib/VMCore/LLVMContextImpl.cpp                     |    13 +-
 lib/VMCore/LLVMContextImpl.h                       |     8 +-
 lib/VMCore/LeakDetector.cpp                        |     4 +-
 lib/VMCore/Metadata.cpp                            |    13 +-
 lib/VMCore/Module.cpp                              |     2 +
 lib/VMCore/Pass.cpp                                |     1 -
 lib/VMCore/PassManager.cpp                         |   128 +-
 lib/VMCore/PassRegistry.cpp                        |   173 +-
 lib/VMCore/PrintModulePass.cpp                     |     4 +-
 lib/VMCore/Type.cpp                                |    30 +-
 lib/VMCore/TypesContext.h                          |     2 +-
 lib/VMCore/Use.cpp                                 |   122 +-
 lib/VMCore/User.cpp                                |    81 +
 lib/VMCore/Value.cpp                               |    97 +-
 lib/VMCore/ValueTypes.cpp                          |     5 +-
 lib/VMCore/Verifier.cpp                            |    32 +-
 projects/Makefile                                  |     5 +-
 projects/sample/autoconf/AutoRegen.sh              |     8 +-
 projects/sample/autoconf/configure.ac              |    10 +-
 projects/sample/configure                          |  2371 ++-
 projects/sample/lib/sample/sample.c                |     2 +-
 runtime/libprofile/CommonProfiling.c               |    53 +-
 runtime/libprofile/PathProfiling.c                 |   266 +
 runtime/libprofile/Profiling.h                     |    11 +-
 runtime/libprofile/libprofile.exports              |     3 +
 test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll |     2 +-
 test/Analysis/BasicAA/2003-04-22-GEPProblem.ll     |     2 +-
 test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll    |     2 +-
 test/Analysis/BasicAA/2003-09-19-LocalArgument.ll  |     2 +-
 test/Analysis/BasicAA/2003-11-04-SimpleCases.ll    |     2 +-
 test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll   |     2 +-
 test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll   |     2 +-
 test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll   |     2 +-
 test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll  |     2 +-
 .../BasicAA/2006-03-03-BadArraySubscript.ll        |     2 +-
 .../BasicAA/2006-11-03-BasicAAVectorCrash.ll       |     2 +-
 test/Analysis/BasicAA/2007-11-05-SizeCrash.ll      |     2 +-
 .../BasicAA/2007-12-08-OutOfBoundsCrash.ll         |     2 +-
 test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll   |     2 +-
 test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll     |     2 +-
 test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll   |     2 +-
 .../Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll |     4 +-
 .../BasicAA/2010-09-15-GEP-SignedArithmetic.ll     |    15 +
 test/Analysis/BasicAA/args-rets-allocas-loads.ll   |     2 +-
 test/Analysis/BasicAA/byval.ll                     |     2 +-
 test/Analysis/BasicAA/constant-over-index.ll       |     2 +-
 test/Analysis/BasicAA/empty.ll                     |     2 +-
 test/Analysis/BasicAA/full-store-partial-alias.ll  |    33 +
 test/Analysis/BasicAA/gep-alias.ll                 |     2 +-
 test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll       |     2 +-
 test/Analysis/BasicAA/global-size.ll               |    36 +-
 test/Analysis/BasicAA/modref.ll                    |    10 +-
 test/Analysis/BasicAA/phi-aa.ll                    |     2 +-
 test/Analysis/BasicAA/phi-and-select.ll            |     2 +-
 test/Analysis/BasicAA/unreachable-block.ll         |     2 +-
 test/Analysis/GlobalsModRef/aliastest.ll           |     2 +-
 test/Analysis/GlobalsModRef/chaining-analysis.ll   |     2 +-
 test/Analysis/GlobalsModRef/indirect-global.ll     |     2 +-
 test/Analysis/GlobalsModRef/modreftest.ll          |     2 +-
 test/Analysis/LoopDependenceAnalysis/alias.ll      |     2 +-
 test/Analysis/LoopDependenceAnalysis/siv-strong.ll |     2 +-
 .../LoopDependenceAnalysis/siv-weak-crossing.ll    |     2 +-
 .../LoopDependenceAnalysis/siv-weak-zero.ll        |     2 +-
 test/Analysis/LoopDependenceAnalysis/ziv.ll        |     2 +-
 test/Analysis/PointerTracking/dg.exp               |     3 -
 test/Analysis/PointerTracking/sizes.ll             |    86 -
 test/Analysis/Profiling/profiling-tool-chain.ll    |     4 +-
 .../2010-09-03-RequiredTransitive.ll               |    24 +
 test/Analysis/ScalarEvolution/fold.ll              |    62 +
 test/Analysis/ScalarEvolution/nsw.ll               |    70 +-
 test/Analysis/ScalarEvolution/scev-aa.ll           |     7 +-
 test/Analysis/TypeBasedAliasAnalysis/aliastest.ll  |    62 +
 .../TypeBasedAliasAnalysis/argument-promotion.ll   |    31 +
 test/Analysis/TypeBasedAliasAnalysis/dg.exp        |     3 +
 test/Analysis/TypeBasedAliasAnalysis/dse.ll        |    66 +
 .../TypeBasedAliasAnalysis/functionattrs.ll        |    81 +
 .../gvn-nonlocal-type-mismatch.ll                  |    91 +
 test/Analysis/TypeBasedAliasAnalysis/licm.ll       |    61 +
 test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll  |    23 +
 test/Analysis/TypeBasedAliasAnalysis/precedence.ll |    46 +
 test/Analysis/TypeBasedAliasAnalysis/sink.ll       |    20 +
 test/Archive/GNU.toc                               |     4 -
 test/Archive/MacOSX.toc                            |     5 -
 test/Archive/SVR4.toc                              |     4 -
 test/Archive/extract.ll                            |     8 +-
 test/Archive/toc_GNU.ll                            |     7 +-
 test/Archive/toc_MacOSX.ll                         |     8 +-
 test/Archive/toc_SVR4.ll                           |     7 +-
 test/Archive/toc_xpg4.ll                           |     7 +-
 test/Archive/xpg4.toc                              |     4 -
 test/Assembler/2003-05-21-MalformedShiftCrash.ll   |     2 +-
 test/Assembler/AutoUpgradeIntrinsics.ll            |     2 +-
 test/Assembler/AutoUpgradeMMXIntrinsics.ll         |   223 +
 test/Assembler/extractvalue-invalid-idx.ll         |     8 +
 test/Assembler/flags.ll                            |    64 +
 test/Assembler/insertvalue-invalid-idx.ll          |     7 +
 test/Assembler/unnamed-addr.ll                     |    18 +
 test/Assembler/x86mmx.ll                           |     8 +
 test/Bindings/Ocaml/analysis.ml                    |     1 +
 test/Bindings/Ocaml/bitreader.ml                   |     1 +
 test/Bindings/Ocaml/bitwriter.ml                   |     1 +
 test/Bindings/Ocaml/executionengine.ml             |     1 +
 test/Bindings/Ocaml/ext_exc.ml                     |    17 +
 test/Bindings/Ocaml/scalar_opts.ml                 |     4 +-
 test/Bindings/Ocaml/target.ml                      |     1 +
 test/Bindings/Ocaml/vmcore.ml                      |     1 +
 test/Bitcode/null-type.ll                          |     2 +
 test/Bitcode/null-type.ll.bc                       |   Bin 0 -> 312 bytes
 test/Bitcode/ssse3_palignr.ll.bc                   |   Bin 1280 -> 1504 bytes
 test/BugPoint/crash-narrowfunctiontest.ll          |     5 +-
 test/BugPoint/metadata.ll                          |     4 +-
 test/BugPoint/remove_arguments_test.ll             |     5 +-
 test/CMakeLists.txt                                |    57 +-
 test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll        |     5 +-
 test/CodeGen/ARM/2009-08-21-PostRAKill4.ll         |    26 -
 test/CodeGen/ARM/2009-09-01-PostRAProlog.ll        |   106 -
 test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll         |     2 +-
 test/CodeGen/ARM/2009-11-02-NegativeLane.ll        |     3 +-
 test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll            |     4 +-
 .../CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll |    43 +-
 test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll    |    17 -
 test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll     |    10 -
 .../ARM/2010-06-29-PartialRedefFastAlloc.ll        |     6 +-
 test/CodeGen/ARM/2010-09-21-OptCmpBug.ll           |    84 +
 test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll  |    13 +
 test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll    |    37 +
 test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll           |    31 +
 test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll   |    85 +
 test/CodeGen/ARM/2010-11-29-PrologueBug.ll         |    28 +
 test/CodeGen/ARM/2010-11-30-reloc-movt.ll          |    42 +
 test/CodeGen/ARM/2010-12-07-PEIBug.ll              |    40 +
 test/CodeGen/ARM/2010-12-08-tpsoft.ll              |    52 +
 test/CodeGen/ARM/2010-12-13-reloc-pic.ll           |   100 +
 test/CodeGen/ARM/2010-12-15-elf-lcomm.ll           |    35 +
 test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll |    15 +
 test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll     |   127 +
 test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll     |   128 +
 test/CodeGen/ARM/2011-02-07-AntidepClobber.ll      |    89 +
 test/CodeGen/ARM/align.ll                          |     4 +-
 test/CodeGen/ARM/arguments.ll                      |     4 +-
 test/CodeGen/ARM/arm-and-tst-peephole.ll           |   112 +
 test/CodeGen/ARM/atomic-cmp.ll                     |    17 +
 test/CodeGen/ARM/bfi.ll                            |    32 +-
 test/CodeGen/ARM/bits.ll                           |    17 +-
 test/CodeGen/ARM/bswap-inline-asm.ll               |     9 +
 test/CodeGen/ARM/bx_fold.ll                        |     5 +-
 test/CodeGen/ARM/call-tc.ll                        |    96 +-
 test/CodeGen/ARM/clz.ll                            |     6 +-
 test/CodeGen/ARM/code-placement.ll                 |    54 +-
 test/CodeGen/ARM/constants.ll                      |    13 +-
 test/CodeGen/ARM/crash.ll                          |    29 +
 test/CodeGen/ARM/div.ll                            |     2 +-
 test/CodeGen/ARM/fabss.ll                          |     2 +-
 test/CodeGen/ARM/fadds.ll                          |     2 +-
 test/CodeGen/ARM/fast-isel-crash.ll                |    21 +
 test/CodeGen/ARM/fast-isel-static.ll               |    30 +
 test/CodeGen/ARM/fast-isel.ll                      |    31 +-
 test/CodeGen/ARM/fcopysign.ll                      |    53 +-
 test/CodeGen/ARM/fdivs.ll                          |     2 +-
 test/CodeGen/ARM/fmacs.ll                          |    55 +-
 test/CodeGen/ARM/fmscs.ll                          |    39 +-
 test/CodeGen/ARM/fmuls.ll                          |     2 +-
 test/CodeGen/ARM/fnegs.ll                          |    20 +-
 test/CodeGen/ARM/fnmacs.ll                         |    31 +-
 test/CodeGen/ARM/fnmscs.ll                         |    64 +-
 test/CodeGen/ARM/fp.ll                             |     2 +-
 test/CodeGen/ARM/fpcmp-opt.ll                      |     1 +
 test/CodeGen/ARM/fpcmp_ueq.ll                      |    10 +-
 test/CodeGen/ARM/fpconsts.ll                       |     8 +-
 test/CodeGen/ARM/fpconv.ll                         |     2 +-
 test/CodeGen/ARM/global-merge.ll                   |    23 +
 test/CodeGen/ARM/hello.ll                          |     2 +-
 test/CodeGen/ARM/ifcvt10.ll                        |    43 +
 test/CodeGen/ARM/ifcvt11.ll                        |    59 +
 test/CodeGen/ARM/ifcvt6.ll                         |     7 +-
 test/CodeGen/ARM/ifcvt7.ll                         |    10 +-
 test/CodeGen/ARM/ifcvt8.ll                         |     4 +-
 test/CodeGen/ARM/inlineasm3.ll                     |     4 +-
 test/CodeGen/ARM/ispositive.ll                     |     2 +-
 test/CodeGen/ARM/ldm.ll                            |    11 +-
 test/CodeGen/ARM/ldst-f32-2-i32.ll                 |    28 +
 test/CodeGen/ARM/load-global.ll                    |    50 +
 test/CodeGen/ARM/long.ll                           |     8 +-
 test/CodeGen/ARM/long_shift.ll                     |    10 +-
 test/CodeGen/ARM/lsr-code-insertion.ll             |     2 +-
 test/CodeGen/ARM/lsr-on-unrolled-loops.ll          |    23 +-
 test/CodeGen/ARM/machine-licm.ll                   |    66 +
 test/CodeGen/ARM/mul_const.ll                      |     2 +-
 test/CodeGen/ARM/mult-alt-generic-arm.ll           |   323 +
 test/CodeGen/ARM/neon_div.ll                       |    48 +
 test/CodeGen/ARM/pack.ll                           |    69 +-
 test/CodeGen/ARM/phi.ll                            |    23 +
 test/CodeGen/ARM/prefetch.ll                       |    61 +
 test/CodeGen/ARM/reg_sequence.ll                   |    39 +-
 test/CodeGen/ARM/remat.ll                          |    65 -
 test/CodeGen/ARM/rev.ll                            |    41 +-
 test/CodeGen/ARM/select-imm.ll                     |    58 +-
 test/CodeGen/ARM/select.ll                         |     4 +-
 test/CodeGen/ARM/select_xform.ll                   |    63 +-
 test/CodeGen/ARM/shifter_operand.ll                |    72 +-
 test/CodeGen/ARM/spill-q.ll                        |    36 +-
 test/CodeGen/ARM/stm.ll                            |     5 +-
 test/CodeGen/ARM/str_pre-2.ll                      |     5 +-
 test/CodeGen/ARM/tail-opts.ll                      |     9 +-
 test/CodeGen/ARM/thumb1-varalloc.ll                |    40 +
 test/CodeGen/ARM/umulo-32.ll                       |    14 +
 test/CodeGen/ARM/unaligned_load_store.ll           |     3 +-
 test/CodeGen/ARM/vbits.ll                          |    42 +-
 test/CodeGen/ARM/vceq.ll                           |    11 +
 test/CodeGen/ARM/vcge.ll                           |    41 +
 test/CodeGen/ARM/vcgt.ll                           |    28 +-
 test/CodeGen/ARM/vcombine.ll                       |    38 +-
 test/CodeGen/ARM/vcvt.ll                           |    20 +-
 test/CodeGen/ARM/vdup.ll                           |    18 -
 test/CodeGen/ARM/vector-DAGCombine.ll              |   107 +
 test/CodeGen/ARM/vext.ll                           |    59 +
 test/CodeGen/ARM/vget_lane.ll                      |    37 +-
 test/CodeGen/ARM/vld1.ll                           |    50 +-
 test/CodeGen/ARM/vld2.ll                           |    59 +-
 test/CodeGen/ARM/vld3.ll                           |    48 +-
 test/CodeGen/ARM/vld4.ll                           |    62 +-
 test/CodeGen/ARM/vlddup.ll                         |   212 +
 test/CodeGen/ARM/vldlane.ll                        |   204 +-
 test/CodeGen/ARM/vmov.ll                           |    70 +-
 test/CodeGen/ARM/vmul.ll                           |    72 +
 test/CodeGen/ARM/vrev.ll                           |    18 +
 test/CodeGen/ARM/vst1.ll                           |    41 +-
 test/CodeGen/ARM/vst2.ll                           |    55 +-
 test/CodeGen/ARM/vst3.ll                           |    47 +-
 test/CodeGen/ARM/vst4.ll                           |    58 +-
 test/CodeGen/ARM/vstlane.ll                        |   161 +-
 .../Alpha/2010-04-07-DbgValueOtherTargets.ll       |    43 +-
 .../CellSPU/2010-04-07-DbgValueOtherTargets.ll     |    43 +-
 test/CodeGen/CellSPU/arg_ret.ll                    |     3 +-
 test/CodeGen/CellSPU/div_ops.ll                    |    22 +
 test/CodeGen/CellSPU/fcmp32.ll                     |    25 +-
 test/CodeGen/CellSPU/immed32.ll                    |    15 +-
 test/CodeGen/CellSPU/loads.ll                      |    12 +
 test/CodeGen/CellSPU/rotate_ops.ll                 |    14 +-
 test/CodeGen/CellSPU/sext128.ll                    |    30 +-
 test/CodeGen/CellSPU/shift_ops.ll                  |    18 +-
 test/CodeGen/CellSPU/shuffles.ll                   |    28 +-
 test/CodeGen/CellSPU/stores.ll                     |    22 +
 test/CodeGen/CellSPU/v2f32.ll                      |     3 +-
 test/CodeGen/CellSPU/v2i32.ll                      |    19 +-
 test/CodeGen/Generic/2010-11-04-BigByval.ll        |    11 +
 test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll  |    15 +
 test/CodeGen/Generic/2011-02-12-shuffle.ll         |    32 +
 test/CodeGen/Generic/add-with-overflow-128.ll      |    24 +-
 test/CodeGen/Generic/crash.ll                      |    32 +
 test/CodeGen/Generic/overflow.ll                   |   220 +
 .../MBlaze/2010-04-07-DbgValueOtherTargets.ll      |    43 +-
 test/CodeGen/MBlaze/brind.ll                       |    13 +-
 test/CodeGen/MBlaze/cc.ll                          |    85 +-
 test/CodeGen/MBlaze/fpu.ll                         |    16 +-
 test/CodeGen/MBlaze/imm.ll                         |    24 +-
 test/CodeGen/MBlaze/intr.ll                        |    48 +
 test/CodeGen/MBlaze/jumptable.ll                   |     4 +-
 test/CodeGen/MBlaze/loop.ll                        |     3 +-
 test/CodeGen/MBlaze/mul.ll                         |     6 +-
 test/CodeGen/MBlaze/shift.ll                       |    26 +-
 test/CodeGen/MBlaze/svol.ll                        |    80 +
 .../MSP430/2010-04-07-DbgValueOtherTargets.ll      |    43 +-
 test/CodeGen/MSP430/mult-alt-generic-msp430.ll     |   323 +
 test/CodeGen/Mips/2008-07-15-InternalConstant.ll   |     4 +-
 .../Mips/2010-04-07-DbgValueOtherTargets.ll        |    43 +-
 test/CodeGen/Mips/2010-07-20-Select.ll             |     6 +-
 test/CodeGen/Mips/2010-11-09-CountLeading.ll       |    33 +
 test/CodeGen/Mips/2010-11-09-Mul.ll                |    15 +
 test/CodeGen/Mips/cmov.ll                          |    15 +
 test/CodeGen/Mips/madd-msub.ll                     |    65 +
 test/CodeGen/Mips/o32_cc.ll                        |   325 +
 test/CodeGen/Mips/rotate.ll                        |    40 +
 test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll      |    32 -
 test/CodeGen/PIC16/2009-11-20-NewNode.ll           |    36 -
 test/CodeGen/PIC16/C16-11.ll                       |    40 -
 test/CodeGen/PIC16/C16-15.ll                       |    45 -
 test/CodeGen/PIC16/C16-49.ll                       |    15 -
 test/CodeGen/PIC16/check_inc_files.ll              |     9 -
 test/CodeGen/PIC16/dg.exp                          |     5 -
 test/CodeGen/PIC16/global-in-user-section.ll       |     6 -
 test/CodeGen/PIC16/globals.ll                      |    18 -
 test/CodeGen/PIC16/result_direction.ll             |    13 -
 test/CodeGen/PIC16/sext.ll                         |    11 -
 test/CodeGen/PIC16/test_indf_name.ll               |    12 -
 test/CodeGen/PTX/add.ll                            |    15 +
 test/CodeGen/PTX/dg.exp                            |     5 +
 test/CodeGen/PTX/exit.ll                           |    14 +
 test/CodeGen/PTX/ld.ll                             |    78 +
 test/CodeGen/PTX/mov.ll                            |    13 +
 test/CodeGen/PTX/options.ll                        |     6 +
 test/CodeGen/PTX/ret.ll                            |     7 +
 test/CodeGen/PTX/shl.ll                            |    22 +
 test/CodeGen/PTX/shr.ll                            |    43 +
 test/CodeGen/PTX/st.ll                             |    71 +
 test/CodeGen/PTX/sub.ll                            |    15 +
 test/CodeGen/PowerPC/2007-03-24-cntlzd.ll          |     2 +-
 .../PowerPC/2010-04-07-DbgValueOtherTargets.ll     |    43 +-
 test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll    |    16 +
 test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll    |    22 +
 test/CodeGen/PowerPC/align.ll                      |     4 +
 test/CodeGen/PowerPC/compare-simm.ll               |     2 +-
 test/CodeGen/PowerPC/indirectbr.ll                 |     4 +-
 test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll   |   321 +
 test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll |   321 +
 test/CodeGen/PowerPC/rlwimi2.ll                    |     2 +-
 test/CodeGen/PowerPC/stfiwx.ll                     |     4 +-
 test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll    |   585 -
 test/CodeGen/PowerPC/unsafe-math.ll                |     2 +-
 test/CodeGen/PowerPC/varargs.ll                    |    22 +
 .../SPARC/2010-04-07-DbgValueOtherTargets.ll       |    43 +-
 test/CodeGen/SPARC/2011-01-11-CC.ll                |   105 +
 test/CodeGen/SPARC/2011-01-11-Call.ll              |    13 +
 test/CodeGen/SPARC/2011-01-11-FrameAddr.ll         |    64 +
 test/CodeGen/SPARC/2011-01-19-DelaySlot.ll         |    90 +
 test/CodeGen/SPARC/2011-01-21-ByValArgs.ll         |    18 +
 test/CodeGen/SPARC/2011-01-22-SRet.ll              |    36 +
 test/CodeGen/SPARC/basictest.ll                    |    24 +-
 test/CodeGen/SPARC/mult-alt-generic-sparc.ll       |   323 +
 test/CodeGen/SPARC/xnor.ll                         |    15 -
 .../SystemZ/2010-04-07-DbgValueOtherTargets.ll     |    43 +-
 .../Thumb/2010-04-07-DbgValueOtherTargets.ll       |    43 +-
 test/CodeGen/Thumb/2010-07-15-debugOrdering.ll     |     2 +-
 test/CodeGen/Thumb/2011-EpilogueBug.ll             |    17 +
 test/CodeGen/Thumb/barrier.ll                      |    11 +-
 test/CodeGen/Thumb/dyn-stackalloc.ll               |    23 +-
 test/CodeGen/Thumb/large-stack.ll                  |    14 +-
 test/CodeGen/Thumb/long.ll                         |     2 +-
 test/CodeGen/Thumb/machine-licm.ll                 |    41 -
 test/CodeGen/Thumb/select.ll                       |     2 +-
 test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll         |     9 +-
 test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll      |    26 +
 test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll     |   106 +
 test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll     |    13 +-
 test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll    |     5 +-
 test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll     |     2 +-
 .../CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll |    12 +-
 test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll      |    34 +
 test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll   |    11 +
 test/CodeGen/Thumb2/bfi.ll                         |    11 +
 test/CodeGen/Thumb2/buildvector-crash.ll           |    17 +
 test/CodeGen/Thumb2/cortex-fp.ll                   |     2 +-
 test/CodeGen/Thumb2/cross-rc-coalescing-2.ll       |    10 +-
 test/CodeGen/Thumb2/div.ll                         |     2 +-
 test/CodeGen/Thumb2/large-stack.ll                 |     2 +-
 test/CodeGen/Thumb2/load-global.ll                 |    23 -
 test/CodeGen/Thumb2/machine-licm-vdup.ll           |    38 -
 test/CodeGen/Thumb2/machine-licm.ll                |    62 +-
 test/CodeGen/Thumb2/thumb2-badreg-operands.ll      |    15 -
 test/CodeGen/Thumb2/thumb2-barrier.ll              |    32 +-
 test/CodeGen/Thumb2/thumb2-ifcvt3.ll               |     1 -
 test/CodeGen/Thumb2/thumb2-ldrd.ll                 |     2 +-
 test/CodeGen/Thumb2/thumb2-mov.ll                  |     6 +-
 test/CodeGen/Thumb2/thumb2-mul.ll                  |    18 +
 test/CodeGen/Thumb2/thumb2-select_xform.ll         |     4 +-
 test/CodeGen/Thumb2/thumb2-spill-q.ll              |    36 +-
 test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll           |    41 +-
 test/CodeGen/X86/2007-05-15-maskmovq.ll            |     8 +-
 test/CodeGen/X86/2007-06-15-IntToMMX.ll            |    13 +-
 test/CodeGen/X86/2007-07-03-GR64ToVR64.ll          |    14 +-
 test/CodeGen/X86/2007-10-16-fp80_select.ll         |    19 -
 test/CodeGen/X86/2008-02-18-TailMergingBug.ll      |     2 +-
 test/CodeGen/X86/2008-04-08-CoalescerCrash.ll      |     8 +-
 test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll   |     4 +-
 test/CodeGen/X86/2008-07-19-movups-spills.ll       |     3 +-
 test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll      |     6 +-
 test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll      |    22 +-
 test/CodeGen/X86/2008-09-17-inline-asm-1.ll        |    16 +-
 test/CodeGen/X86/2008-10-27-CoalescerBug.ll        |    10 +-
 test/CodeGen/X86/2008-10-27-StackRealignment.ll    |     4 +-
 test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll |     9 -
 .../X86/2008-11-29-DivideConstant16bitSigned.ll    |     9 -
 test/CodeGen/X86/2009-01-13-DoubleUpdate.ll        |     2 +-
 test/CodeGen/X86/2009-01-27-NullStrings.ll         |     2 +-
 test/CodeGen/X86/2009-02-26-MachineLICMBug.ll      |     2 +-
 test/CodeGen/X86/2009-04-24.ll                     |     3 +-
 test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll |     9 +-
 test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll       |    14 +-
 .../X86/2009-06-05-ScalarToVectorByteMMX.ll        |     2 +-
 test/CodeGen/X86/2009-07-07-SplitICmp.ll           |     2 +-
 .../CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll |     8 +-
 test/CodeGen/X86/2009-08-06-inlineasm.ll           |     6 +-
 test/CodeGen/X86/2009-09-10-SpillComments.ll       |     4 +-
 test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll        |     2 +-
 .../CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll |    42 +-
 test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll         |    64 +-
 .../X86/2010-04-30-LocalAlloc-LandingPad.ll        |     2 +-
 test/CodeGen/X86/2010-05-25-DotDebugLoc.ll         |     7 +-
 test/CodeGen/X86/2010-05-26-DotDebugLoc.ll         |     2 +-
 test/CodeGen/X86/2010-05-28-Crash.ll               |     6 +-
 .../X86/2010-06-25-CoalescerSubRegDefDead.ll       |     8 +-
 test/CodeGen/X86/2010-07-02-asm-alignstack.ll      |     4 +-
 test/CodeGen/X86/2010-09-16-EmptyFilename.ll       |    29 +
 test/CodeGen/X86/2010-09-16-asmcrash.ll            |    56 +
 test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll  |    26 +
 test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll  |    71 +
 test/CodeGen/X86/2010-10-08-cmpxchg8b.ll           |    28 +
 test/CodeGen/X86/2010-11-02-DbgParameter.ll        |    35 +
 test/CodeGen/X86/2010-11-09-MOVLPS.ll              |    66 +
 test/CodeGen/X86/2010-11-18-SelectOfExtload.ll     |    15 +
 test/CodeGen/X86/2010-12-02-MC-Set.ll              |    22 +
 test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll  |    19 +
 test/CodeGen/X86/2011-01-10-DagCombineHang.ll      |    15 +
 test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll |   103 +
 test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll    |    14 +
 test/CodeGen/X86/3addr-or.ll                       |    38 +-
 test/CodeGen/X86/abi-isel.ll                       |   942 +-
 test/CodeGen/X86/add-of-carry.ll                   |    34 +
 test/CodeGen/X86/add.ll                            |    40 +
 test/CodeGen/X86/addr-label-difference.ll          |     2 +-
 test/CodeGen/X86/alldiv-divdi3.ll                  |    17 +
 test/CodeGen/X86/andimm8.ll                        |    19 +
 test/CodeGen/X86/apm.ll                            |    26 +
 test/CodeGen/X86/atomic_op.ll                      |    71 +-
 test/CodeGen/X86/avx-128.ll                        |     2 +-
 test/CodeGen/X86/avx-intrinsics-x86.ll             |    33 +-
 test/CodeGen/X86/avx-intrinsics-x86_64.ll          |     2 +-
 test/CodeGen/X86/bc-extract.ll                     |    27 +
 test/CodeGen/X86/bit-test-shift.ll                 |    13 +
 test/CodeGen/X86/bswap-inline-asm.ll               |    11 +-
 test/CodeGen/X86/byval.ll                          |    11 +-
 test/CodeGen/X86/cmp-test.ll                       |    27 -
 test/CodeGen/X86/cmp.ll                            |    92 +
 test/CodeGen/X86/cmp0.ll                           |    24 -
 test/CodeGen/X86/cmp2.ll                           |    18 -
 test/CodeGen/X86/commute-two-addr.ll               |    48 +-
 test/CodeGen/X86/compare-inf.ll                    |    16 +-
 test/CodeGen/X86/complex-asm.ll                    |    17 +
 test/CodeGen/X86/conditional-indecrement.ll        |    89 +
 test/CodeGen/X86/const-select.ll                   |    22 -
 test/CodeGen/X86/crash.ll                          |    58 +
 test/CodeGen/X86/critical-edge-split-2.ll          |    29 +
 test/CodeGen/X86/critical-edge-split.ll            |    50 -
 test/CodeGen/X86/ctpop-combine.ll                  |    40 +
 test/CodeGen/X86/dagcombine-buildvector.ll         |     2 +-
 test/CodeGen/X86/dbg-live-in-location.ll           |    84 +
 test/CodeGen/X86/dbg-merge-loc-entry.ll            |    71 +
 test/CodeGen/X86/dbg-value-inlined-parameter.ll    |    86 +
 test/CodeGen/X86/dbg-value-location.ll             |    70 +
 test/CodeGen/X86/dbg-value-range.ll                |    56 +
 test/CodeGen/X86/div_const.ll                      |     7 -
 test/CodeGen/X86/divide-by-constant.ll             |    62 +
 test/CodeGen/X86/dll-linkage.ll                    |     2 +-
 test/CodeGen/X86/dollar-name.ll                    |     2 +-
 .../X86/fast-isel-avoid-unnecessary-pic-base.ll    |    23 +
 test/CodeGen/X86/fast-isel-bc.ll                   |    16 +-
 test/CodeGen/X86/fast-isel-gep.ll                  |    17 +
 test/CodeGen/X86/fast-isel-mem.ll                  |    18 +-
 test/CodeGen/X86/fltused.ll                        |    19 +
 test/CodeGen/X86/fp-in-intregs.ll                  |     3 +-
 test/CodeGen/X86/fp-stack-compare.ll               |     3 +-
 test/CodeGen/X86/ghc-cc.ll                         |     4 +-
 test/CodeGen/X86/global-sections.ll                |    18 +-
 test/CodeGen/X86/inline-asm-h.ll                   |    12 +
 test/CodeGen/X86/inline-asm-ptr-cast.ll            |    27 +
 test/CodeGen/X86/insertelement-legalize.ll         |     2 +-
 test/CodeGen/X86/legalize-sub-zero-2.ll            |    41 +
 test/CodeGen/X86/legalize-sub-zero.ll              |    35 +
 test/CodeGen/X86/legalizedag_vec.ll                |     8 +-
 test/CodeGen/X86/licm-symbol.ll                    |     2 +-
 test/CodeGen/X86/loop-blocks.ll                    |    11 +-
 test/CodeGen/X86/lsr-reuse.ll                      |    15 +-
 test/CodeGen/X86/machine-cse.ll                    |    40 +
 test/CodeGen/X86/memcmp.ll                         |    12 +-
 test/CodeGen/X86/memcpy.ll                         |    64 +-
 test/CodeGen/X86/memmove-0.ll                      |     9 -
 test/CodeGen/X86/memmove-1.ll                      |     9 -
 test/CodeGen/X86/memmove-2.ll                      |     9 -
 test/CodeGen/X86/memmove-3.ll                      |     9 -
 test/CodeGen/X86/memset-2.ll                       |    26 +-
 test/CodeGen/X86/memset64-on-x86-32.ll             |     2 +-
 test/CodeGen/X86/mingw-alloca.ll                   |     4 +-
 test/CodeGen/X86/misaligned-memset.ll              |    15 +
 test/CodeGen/X86/mmx-arg-passing.ll                |    19 +-
 test/CodeGen/X86/mmx-arg-passing2.ll               |    14 +-
 test/CodeGen/X86/mmx-arith.ll                      |   380 +-
 test/CodeGen/X86/mmx-bitcast-to-i64.ll             |    37 +-
 test/CodeGen/X86/mmx-builtins.ll                   |  1324 ++
 test/CodeGen/X86/mmx-insert-element.ll             |    10 +-
 test/CodeGen/X86/mmx-pinsrw.ll                     |     2 +-
 test/CodeGen/X86/mmx-punpckhdq.ll                  |    19 +-
 test/CodeGen/X86/mmx-shift.ll                      |    24 +-
 test/CodeGen/X86/mmx-shuffle.ll                    |     6 +-
 test/CodeGen/X86/mmx-vzmovl-2.ll                   |    24 +-
 test/CodeGen/X86/mmx-vzmovl.ll                     |     4 +-
 test/CodeGen/X86/movgs.ll                          |    53 +-
 test/CodeGen/X86/mult-alt-generic-i686.ll          |   321 +
 test/CodeGen/X86/mult-alt-generic-x86_64.ll        |   321 +
 test/CodeGen/X86/mult-alt-x86.ll                   |   358 +
 test/CodeGen/X86/narrow-shl-load.ll                |    83 +
 test/CodeGen/X86/negative-sin.ll                   |     4 +-
 test/CodeGen/X86/non-globl-eh-frame.ll             |    24 +
 test/CodeGen/X86/phi-immediate-factoring.ll        |     2 +-
 test/CodeGen/X86/phys_subreg_coalesce-2.ll         |     2 +-
 test/CodeGen/X86/pic.ll                            |    24 +-
 test/CodeGen/X86/pic_jumptable.ll                  |     2 +-
 test/CodeGen/X86/popcnt.ll                         |    38 +
 test/CodeGen/X86/postra-licm.ll                    |     2 +-
 test/CodeGen/X86/pr2659.ll                         |     7 +-
 test/CodeGen/X86/pr3522.ll                         |     2 +-
 test/CodeGen/X86/pr9127.ll                         |    12 +
 test/CodeGen/X86/prefetch.ll                       |    10 +-
 test/CodeGen/X86/rodata-relocs.ll                  |    16 +-
 test/CodeGen/X86/scalar_widen_div.ll               |     2 +-
 test/CodeGen/X86/select-aggregate.ll               |    15 -
 test/CodeGen/X86/select-zero-one.ll                |    25 -
 test/CodeGen/X86/select.ll                         |   239 +-
 test/CodeGen/X86/sext-select.ll                    |    23 -
 test/CodeGen/X86/shift-folding.ll                  |     6 +-
 test/CodeGen/X86/sibcall-3.ll                      |     2 +-
 test/CodeGen/X86/sibcall-5.ll                      |    31 +
 test/CodeGen/X86/sibcall.ll                        |    30 +-
 test/CodeGen/X86/sink-hoist.ll                     |    29 +-
 test/CodeGen/X86/split-select.ll                   |     7 -
 test/CodeGen/X86/sse-align-11.ll                   |     3 +-
 test/CodeGen/X86/sse2.ll                           |    30 +
 test/CodeGen/X86/sse3.ll                           |    17 +-
 test/CodeGen/X86/sse41.ll                          |     4 +-
 test/CodeGen/X86/stack-align.ll                    |    17 +-
 test/CodeGen/X86/stdcall-notailcall.ll             |    13 +
 test/CodeGen/X86/store-narrow.ll                   |    14 +
 test/CodeGen/X86/store_op_load_fold2.ll            |     2 +-
 test/CodeGen/X86/switch-bt.ll                      |    30 +
 test/CodeGen/X86/switch-or.ll                      |    22 +
 test/CodeGen/X86/tail-opts.ll                      |    23 +-
 test/CodeGen/X86/tailcall-largecode.ll             |     8 +-
 test/CodeGen/X86/tailcall-ri64.ll                  |    24 +
 test/CodeGen/X86/tailcall-stackalign.ll            |     2 +-
 test/CodeGen/X86/tailcallfp2.ll                    |     4 +-
 test/CodeGen/X86/tailcallstack64.ll                |    17 +-
 test/CodeGen/X86/tls-1.ll                          |    19 -
 test/CodeGen/X86/tls-pic.ll                        |    16 +-
 test/CodeGen/X86/tls9.ll                           |     2 +-
 test/CodeGen/X86/tlv-1.ll                          |    35 +
 test/CodeGen/X86/tlv-2.ll                          |    32 +
 test/CodeGen/X86/twoaddr-lea.ll                    |    32 +-
 test/CodeGen/X86/uint64-to-float.ll                |    21 +
 test/CodeGen/X86/umul-with-overflow.ll             |     8 +-
 test/CodeGen/X86/umulo-64.ll                       |    28 +
 test/CodeGen/X86/unaligned-load.ll                 |     2 +-
 test/CodeGen/X86/unknown-location.ll               |    10 +-
 test/CodeGen/X86/vec-sign.ll                       |    30 +
 test/CodeGen/X86/vec-trunc-store.ll                |     2 +-
 test/CodeGen/X86/vec_cast.ll                       |     1 -
 test/CodeGen/X86/vec_compare-2.ll                  |     2 +-
 test/CodeGen/X86/vec_ext_inreg.ll                  |     1 -
 test/CodeGen/X86/vec_insert-5.ll                   |     9 +-
 test/CodeGen/X86/vec_insert-7.ll                   |    15 +-
 test/CodeGen/X86/vec_select.ll                     |    12 -
 test/CodeGen/X86/vec_set-F.ll                      |     6 +-
 test/CodeGen/X86/vec_shuffle-27.ll                 |    29 +-
 test/CodeGen/X86/vec_shuffle-30.ll                 |     2 +-
 test/CodeGen/X86/vec_shuffle-37.ll                 |    10 +
 test/CodeGen/X86/vec_zero_cse.ll                   |     5 +-
 test/CodeGen/X86/visibility.ll                     |    11 +
 test/CodeGen/X86/vshift-1.ll                       |     2 +-
 test/CodeGen/X86/vshift-2.ll                       |     2 +-
 test/CodeGen/X86/vshift-3.ll                       |     2 +-
 test/CodeGen/X86/vshift-4.ll                       |     2 +-
 test/CodeGen/X86/vshift-5.ll                       |     2 +-
 test/CodeGen/X86/vsplit-and.ll                     |     2 +-
 test/CodeGen/X86/widen_arith-1.ll                  |     2 +-
 test/CodeGen/X86/widen_arith-2.ll                  |     2 +-
 test/CodeGen/X86/widen_arith-3.ll                  |     2 +-
 test/CodeGen/X86/widen_arith-4.ll                  |     2 +-
 test/CodeGen/X86/widen_arith-5.ll                  |     2 +-
 test/CodeGen/X86/widen_arith-6.ll                  |     2 +-
 test/CodeGen/X86/widen_cast-1.ll                   |     2 +-
 test/CodeGen/X86/widen_cast-2.ll                   |     2 +-
 test/CodeGen/X86/widen_cast-3.ll                   |     2 +-
 test/CodeGen/X86/widen_cast-4.ll                   |     2 +-
 test/CodeGen/X86/widen_cast-5.ll                   |     2 +-
 test/CodeGen/X86/widen_cast-6.ll                   |     2 +-
 test/CodeGen/X86/widen_conv-1.ll                   |     2 +-
 test/CodeGen/X86/widen_conv-2.ll                   |     2 +-
 test/CodeGen/X86/widen_conv-3.ll                   |     2 +-
 test/CodeGen/X86/widen_conv-4.ll                   |     2 +-
 test/CodeGen/X86/widen_extract-1.ll                |     2 +-
 test/CodeGen/X86/widen_load-1.ll                   |     2 +-
 test/CodeGen/X86/widen_load-2.ll                   |     2 +-
 test/CodeGen/X86/widen_select-1.ll                 |    12 -
 test/CodeGen/X86/widen_shuffle-1.ll                |     2 +-
 test/CodeGen/X86/win64_params.ll                   |    11 +
 test/CodeGen/X86/win64_vararg.ll                   |    20 +
 test/CodeGen/X86/win_chkstk.ll                     |    15 +-
 test/CodeGen/X86/x86-64-extend-shift.ll            |    10 +
 test/CodeGen/X86/x86_64-mul-by-const.ll            |     9 +
 test/CodeGen/X86/zext-extract_subreg.ll            |    60 +
 .../XCore/2010-04-07-DbgValueOtherTargets.ll       |    43 +-
 test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll     |    10 +
 test/CodeGen/XCore/ashr.ll                         |     8 +-
 test/CodeGen/XCore/globals.ll                      |     6 +-
 test/CodeGen/XCore/resources.ll                    |   111 +
 test/CodeGen/XCore/trampoline.ll                   |    37 +
 test/DebugInfo/2009-10-16-Scope.ll                 |     3 +-
 test/DebugInfo/2010-05-10-MultipleCU.ll            |    18 +-
 test/DebugInfo/2010-08-04-StackVariable.ll         |     3 +-
 test/DebugInfo/2010-10-01-crash.ll                 |    21 +
 test/ExecutionEngine/2002-12-16-ArgTest.ll         |     3 +-
 test/ExecutionEngine/2003-01-04-ArgumentBug.ll     |     3 +-
 test/ExecutionEngine/2003-01-04-LoopTest.ll        |     3 +-
 test/ExecutionEngine/2003-01-04-PhiTest.ll         |     3 +-
 test/ExecutionEngine/2003-01-09-SARTest.ll         |     3 +-
 test/ExecutionEngine/2003-01-10-FUCOM.ll           |     3 +-
 test/ExecutionEngine/2003-01-15-AlignmentTest.ll   |     3 +-
 test/ExecutionEngine/2003-05-06-LivenessClobber.ll |     4 +-
 test/ExecutionEngine/2003-05-07-ArgumentTest.ll    |     2 +-
 test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll  |     3 +-
 test/ExecutionEngine/2003-06-04-bzip2-bug.ll       |     3 +-
 test/ExecutionEngine/2003-06-05-PHIBug.ll          |     3 +-
 test/ExecutionEngine/2003-08-15-AllocaAssertion.ll |     3 +-
 test/ExecutionEngine/2003-08-21-EnvironmentTest.ll |     3 +-
 .../2003-08-23-RegisterAllocatePhysReg.ll          |     3 +-
 ...-10-18-PHINode-ConstantExpr-CondCode-Failure.ll |     3 +-
 test/ExecutionEngine/2005-12-02-TailCallBug.ll     |     3 +-
 test/ExecutionEngine/2007-12-10-APIntLoadStore.ll  |     2 +-
 test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll  |     3 +-
 test/ExecutionEngine/2010-01-15-UndefValue.ll      |     3 +-
 test/ExecutionEngine/fpbitcast.ll                  |     3 +-
 test/ExecutionEngine/hello.ll                      |     3 +-
 test/ExecutionEngine/hello2.ll                     |     3 +-
 test/ExecutionEngine/simplesttest.ll               |     3 +-
 test/ExecutionEngine/simpletest.ll                 |     3 +-
 test/ExecutionEngine/stubs.ll                      |     2 +-
 test/ExecutionEngine/test-arith.ll                 |     3 +-
 test/ExecutionEngine/test-branch.ll                |     3 +-
 test/ExecutionEngine/test-call.ll                  |     4 +-
 test/ExecutionEngine/test-cast.ll                  |     3 +-
 test/ExecutionEngine/test-constantexpr.ll          |     3 +-
 test/ExecutionEngine/test-fp.ll                    |     3 +-
 test/ExecutionEngine/test-loadstore.ll             |     3 +-
 test/ExecutionEngine/test-logical.ll               |     3 +-
 test/ExecutionEngine/test-loop.ll                  |     3 +-
 test/ExecutionEngine/test-malloc.ll                |     3 +-
 test/ExecutionEngine/test-phi.ll                   |     3 +-
 test/ExecutionEngine/test-ret.ll                   |     3 +-
 test/ExecutionEngine/test-setcond-fp.ll            |     3 +-
 test/ExecutionEngine/test-setcond-int.ll           |     3 +-
 test/ExecutionEngine/test-shift.ll                 |     3 +-
 test/Feature/load_module.ll                        |     4 +-
 test/FrontendAda/Support/real_cst.ads              |     4 +
 test/FrontendAda/array_constructor.adb             |     2 +-
 test/FrontendAda/array_range_ref.adb               |     2 +-
 test/FrontendAda/array_ref.adb                     |     2 +-
 test/FrontendAda/array_size.adb                    |     2 +-
 test/FrontendAda/asm.adb                           |     2 +-
 test/FrontendAda/debug_var_size.ads                |     2 +-
 test/FrontendAda/element_copy.adb                  |     2 +-
 test/FrontendAda/emit_var.ads                      |     2 +-
 test/FrontendAda/fat_fields.adb                    |     4 +-
 test/FrontendAda/field_order.ads                   |     2 +-
 test/FrontendAda/global_constant.adb               |     2 +-
 test/FrontendAda/init_size.ads                     |     2 +-
 test/FrontendAda/negative_field_offset.adb         |     2 +-
 test/FrontendAda/non_bitfield.ads                  |     2 +-
 test/FrontendAda/non_lvalue.adb                    |     2 +-
 test/FrontendAda/placeholder.adb                   |     2 +-
 test/FrontendAda/real_cst.adb                      |     8 +
 test/FrontendAda/switch.adb                        |     2 +-
 test/FrontendAda/unc_constructor.adb               |     2 +-
 test/FrontendAda/var_offset.adb                    |     2 +-
 test/FrontendAda/var_size.adb                      |     2 +-
 test/FrontendAda/vce.adb                           |     2 +-
 test/FrontendAda/vce_lv.adb                        |     2 +-
 test/FrontendC++/2003-08-20-ExceptionFail.cpp      |    12 -
 test/FrontendC++/2003-08-21-EmptyClass.cpp         |     9 -
 test/FrontendC++/2003-08-24-Cleanup.cpp            |    10 -
 test/FrontendC++/2003-08-27-TypeNamespaces.cpp     |    16 -
 test/FrontendC++/2003-08-28-ForwardType.cpp        |    23 -
 test/FrontendC++/2003-08-28-SaveExprBug.cpp        |    24 -
 test/FrontendC++/2003-08-29-ArgPassingBug.cpp      |    13 -
 test/FrontendC++/2003-08-31-StructLayout.cpp       |    16 -
 test/FrontendC++/2003-09-22-CompositeExprValue.cpp |    11 -
 .../2003-09-29-ArgumentNumberMismatch.cpp          |    17 -
 test/FrontendC++/2003-09-30-CommaExprBug.cpp       |    10 -
 .../FrontendC++/2003-09-30-ForIncrementExprBug.cpp |    10 -
 .../2003-09-30-ForIncrementExprBug2.cpp            |    12 -
 test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp |    12 -
 test/FrontendC++/2003-10-17-BoolBitfields.cpp      |    11 -
 test/FrontendC++/2003-10-21-InnerClass.cpp         |    12 -
 .../2003-10-27-VirtualBaseClassCrash.cpp           |    17 -
 test/FrontendC++/2003-11-04-ArrayConstructors.cpp  |    12 -
 test/FrontendC++/2003-11-04-CatchLabelName.cpp     |    11 -
 test/FrontendC++/2003-11-08-ArrayAddress.cpp       |    10 -
 test/FrontendC++/2003-11-18-EnumArray.cpp          |    14 -
 .../2004-03-09-UnmangledBuiltinMethods.cpp         |     2 +-
 test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp  |     2 +-
 test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp  |     2 +-
 test/FrontendC++/2006-09-27-Debug-Protection.cpp   |     4 +-
 test/FrontendC++/2006-10-30-ClassBitfield.cpp      |     2 +-
 test/FrontendC++/2006-11-20-GlobalSymbols.cpp      |     2 +-
 test/FrontendC++/2006-11-30-ConstantExprCrash.cpp  |     2 +-
 test/FrontendC++/2006-11-30-NoCompileUnit.cpp      |    60 -
 test/FrontendC++/2007-01-02-UnboundedArray.cpp     |     2 +-
 test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp |     2 +-
 test/FrontendC++/2007-01-06-PtrMethodInit.cpp      |     2 +-
 test/FrontendC++/2007-03-27-FunctionVarRename.cpp  |     4 +-
 .../2007-04-11-InlineStorageClassC++.cpp           |    14 +-
 test/FrontendC++/2007-05-03-VectorInit.cpp         |     2 +-
 .../2007-05-16-ReverseBitFieldCrash.cpp            |     2 +-
 test/FrontendC++/2007-05-23-TryFinally.cpp         |     2 +-
 test/FrontendC++/2007-07-29-RestrictPtrArg.cpp     |     2 +-
 test/FrontendC++/2007-07-29-RestrictRefArg.cpp     |     2 +-
 test/FrontendC++/2007-08-01-RestrictMethod.cpp     |     2 +-
 .../2007-09-10-RecursiveTypeResolution.cpp         |     2 +-
 test/FrontendC++/2007-10-01-StructResize.cpp       |     2 +-
 test/FrontendC++/2008-10-29-WrongOffset.cpp        |     2 +-
 test/FrontendC++/2009-02-16-CtorNames-dbg.cpp      |     2 +-
 test/FrontendC++/2009-03-17-dbg.cpp                |     2 +-
 test/FrontendC++/2009-04-21-DtorNames-dbg.cpp      |     2 +-
 test/FrontendC++/2009-04-23-bool2.cpp              |     2 +-
 test/FrontendC++/2009-05-04-PureConstNounwind.cpp  |     2 +-
 test/FrontendC++/2009-06-16-DebugInfoCrash.cpp     |     2 +-
 test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp    |    14 +-
 test/FrontendC++/2009-08-05-ZeroInitWidth.cpp      |     2 +-
 test/FrontendC++/2009-08-11-VectorRetTy.cpp        |     2 +-
 test/FrontendC++/2009-09-04-modify-crash.cpp       |     2 +-
 test/FrontendC++/2009-09-09-packed-layout.cpp      |     2 +-
 test/FrontendC++/2009-10-27-crash.cpp              |     2 +-
 test/FrontendC++/2010-03-22-empty-baseclass.cpp    |     2 +-
 .../FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp |     4 +-
 .../2010-05-11-alwaysinlineinstantiation.cpp       |     4 +-
 test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp    |     2 +-
 test/FrontendC++/2010-06-22-BitfieldInit.cpp       |     2 +-
 test/FrontendC++/2010-06-22-ZeroBitfield.cpp       |     2 +-
 test/FrontendC++/2010-07-19-nowarn.cpp             |     2 +-
 test/FrontendC++/2010-07-23-DeclLoc.cpp            |     2 +-
 test/FrontendC++/member-alignment.cpp              |     2 +-
 test/FrontendC++/ptr-to-method-devirt.cpp          |     4 +-
 test/FrontendC++/varargs.cpp                       |     2 +-
 test/FrontendC++/weak-external.cpp                 |     2 +-
 .../x86-64-abi-sret-vs-2word-struct-param.cpp      |     2 +-
 test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c   |     2 +-
 test/FrontendC/2003-08-17-DeadCodeShortCircuit.c   |     2 +-
 test/FrontendC/2003-11-03-AddrArrayElement.c       |     2 +-
 .../2003-11-08-PointerSubNotGetelementptr.c        |     2 +-
 test/FrontendC/2003-11-13-TypeSafety.c             |     2 +-
 test/FrontendC/2003-12-14-ExternInlineSupport.c    |     2 +-
 test/FrontendC/2004-02-12-LargeAggregateCopy.c     |     2 +-
 .../2004-02-13-BuiltinFrameReturnAddress.c         |     2 +-
 test/FrontendC/2004-02-13-IllegalVararg.c          |     2 +-
 test/FrontendC/2004-02-13-Memset.c                 |     2 +-
 test/FrontendC/2004-02-20-Builtins.c               |     2 +-
 test/FrontendC/2004-03-07-ExternalConstant.c       |     2 +-
 test/FrontendC/2004-06-17-UnorderedCompares.c      |     2 +-
 .../FrontendC/2004-11-27-StaticFunctionRedeclare.c |     4 +-
 test/FrontendC/2005-01-02-PointerDifference.c      |     2 +-
 test/FrontendC/2005-02-27-MarkGlobalConstant.c     |     2 +-
 test/FrontendC/2005-12-04-AttributeUsed.c          |     2 +-
 test/FrontendC/2006-03-03-MissingInitializer.c     |     2 +-
 test/FrontendC/2007-01-06-KNR-Proto.c              |     2 +-
 test/FrontendC/2007-02-04-AddrLValue-2.c           |     2 +-
 test/FrontendC/2007-02-04-AddrLValue.c             |     2 +-
 test/FrontendC/2007-02-04-EmptyStruct.c            |     2 +-
 test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c         |     2 +-
 test/FrontendC/2007-02-05-nested.c                 |     2 +-
 test/FrontendC/2007-02-07-AddrLabel.c              |     2 +-
 test/FrontendC/2007-02-16-VoidPtrDiff.c            |     2 +-
 test/FrontendC/2007-02-16-WritableStrings.c        |     6 +-
 test/FrontendC/2007-02-25-C-DotDotDot.c            |     2 +-
 test/FrontendC/2007-03-01-VarSizeArrayIdx.c        |     2 +-
 test/FrontendC/2007-04-11-InlineAsmStruct.c        |     2 +-
 test/FrontendC/2007-04-11-InlineAsmUnion.c         |     2 +-
 test/FrontendC/2007-04-11-InlineStorageClassC89.c  |    14 +-
 test/FrontendC/2007-04-11-InlineStorageClassC99.c  |    14 +-
 test/FrontendC/2007-04-13-InlineAsmStruct2.c       |     2 +-
 test/FrontendC/2007-04-13-InlineAsmUnion2.c        |     2 +-
 test/FrontendC/2007-04-24-VolatileStructCopy.c     |     2 +-
 test/FrontendC/2007-04-24-bit-not-expr.c           |     2 +-
 test/FrontendC/2007-04-24-str-const.c              |     2 +-
 test/FrontendC/2007-05-07-PaddingElements.c        |     4 +-
 test/FrontendC/2007-05-11-str-const.c              |     2 +-
 test/FrontendC/2007-05-15-PaddingElement.c         |     2 +-
 test/FrontendC/2007-05-16-EmptyStruct.c            |     2 +-
 test/FrontendC/2007-05-29-UnionCopy.c              |     2 +-
 test/FrontendC/2007-06-05-NoInlineAttribute.c      |     2 +-
 test/FrontendC/2007-06-15-AnnotateAttribute.c      |     4 +-
 test/FrontendC/2007-06-18-SextAttrAggregate.c      |     2 +-
 test/FrontendC/2007-07-29-RestrictPtrArg.c         |     2 +-
 test/FrontendC/2007-08-01-LoadStoreAlign.c         |     4 +-
 test/FrontendC/2007-08-21-ComplexCst.c             |     2 +-
 test/FrontendC/2007-09-05-ConstCtor.c              |     2 +-
 test/FrontendC/2007-09-20-GcrootAttribute.c        |     6 +-
 test/FrontendC/2007-10-01-BuildArrayRef.c          |    18 +-
 test/FrontendC/2007-11-07-AlignedMemcpy.c          |     2 +-
 test/FrontendC/2007-11-27-SExtZExt.c               |     2 +-
 test/FrontendC/2008-01-25-ByValReadNone.c          |     4 +-
 test/FrontendC/2008-01-28-PragmaMark.c             |     2 +-
 test/FrontendC/2008-03-03-CtorAttrType.c           |     2 +-
 test/FrontendC/2008-03-05-syncPtr.c                |     2 +-
 test/FrontendC/2008-05-19-AlwaysInline.c           |     4 +-
 test/FrontendC/2008-08-07-AlignPadding1.c          |     2 +-
 test/FrontendC/2008-08-07-AlignPadding2.c          |     2 +-
 test/FrontendC/2008-10-30-ZeroPlacement.c          |     2 +-
 test/FrontendC/2008-11-02-WeakAlias.c              |     2 +-
 test/FrontendC/2008-11-08-InstCombineSelect.c      |     2 +-
 .../2008-11-11-AnnotateStructFieldAttribute.c      |     2 +-
 test/FrontendC/2008-12-23-AsmIntPointerTie.c       |     2 +-
 test/FrontendC/2009-01-05-BlockInlining.c          |     2 +-
 test/FrontendC/2009-03-13-dbg.c                    |     2 +-
 test/FrontendC/2009-05-04-EnumInreg.c              |     2 +-
 test/FrontendC/2010-01-13-MemBarrier.c             |     2 +-
 test/FrontendC/2010-05-18-asmsched.c               |     2 +-
 test/FrontendC/2010-05-26-AsmSideEffect.c          |     2 +-
 test/FrontendC/2010-06-28-nowarn.c                 |     2 +-
 test/FrontendC/2010-07-14-overconservative-align.c |     2 +-
 test/FrontendC/2010-11-16-asmblock.c               |    16 +
 test/FrontendC/2010-12-01-CommonGlobal.c           |     7 +
 test/FrontendC/arrayderef.c                        |    17 +
 test/FrontendC/attribute_constructor.c             |     2 +-
 test/FrontendC/block-copy.c                        |     2 +-
 test/FrontendC/cstring-align.c                     |     2 +-
 test/FrontendC/extern-weak.c                       |     4 +-
 test/FrontendC/func-aligned.c                      |     2 +-
 test/FrontendC/hidden-visibility.c                 |     2 +-
 test/FrontendC/implicit-arg.c                      |     4 +-
 test/FrontendC/libcalls-d.c                        |     8 +-
 test/FrontendC/libcalls-ld.c                       |     8 +-
 test/FrontendC/libcalls.c                          |     8 +-
 test/FrontendC/pr3518.c                            |     2 +-
 test/FrontendC/pr4349.c                            |    10 +-
 test/FrontendC/pr5406.c                            |     2 +-
 test/FrontendC/ptr-rotate.c                        |     4 +-
 test/FrontendC/sret.c                              |     2 +-
 test/FrontendC/sret2.c                             |     2 +-
 test/FrontendC/unaligned-memcpy.c                  |     2 +-
 test/FrontendFortran/2008-11-03-OptionOverride.f90 |     2 +-
 test/FrontendFortran/2009-02-09-FloorDivExpr.f90   |     2 +-
 test/FrontendFortran/cpow.f90                      |     2 +-
 test/FrontendObjC++/2007-10-03-MetadataPointers.mm |     2 +-
 .../FrontendObjC++/2010-08-02-NonPODObjectValue.mm |     2 +-
 test/FrontendObjC++/2010-08-04-Template.mm         |     2 +-
 test/FrontendObjC++/2010-08-06-X.Y-syntax.mm       |     2 +-
 test/FrontendObjC/2007-04-03-ObjcEH.m              |     2 +-
 test/FrontendObjC/2007-05-02-Strong.m              |     2 +-
 test/FrontendObjC/2007-09-25-EH.m                  |     2 +-
 test/FrontendObjC/2007-10-18-ProDescriptor.m       |     2 +-
 test/FrontendObjC/2007-10-23-GC-WriteBarrier.m     |     2 +-
 test/FrontendObjC/2008-10-3-EhValue.m              |     2 +-
 test/FrontendObjC/2008-11-12-Metadata.m            |     2 +-
 test/FrontendObjC/2008-11-25-Blocks.m              |     2 +-
 test/FrontendObjC/2009-02-05-VolatileProp.m        |     2 +-
 test/FrontendObjC/2009-04-14-AsmSection.m          |     2 +-
 test/FrontendObjC/2009-08-05-utf16.m               |     2 +-
 .../FrontendObjC/2010-02-11-fwritable-stringsBug.m |     4 +-
 test/LLVMC/C++/dg.exp                              |     2 +-
 test/LLVMC/C++/just-compile.cpp                    |    10 +
 test/LLVMC/C++/unknown_suffix.unk                  |     9 +
 test/LLVMC/C/emit-llvm-opt.c                       |     9 +
 test/LLVMC/C/emit-llvm.c                           |     3 +
 test/LLVMC/MultipleOutputLanguages.td              |    27 +
 test/LLVMC/OptionPreprocessor.td                   |     8 +-
 test/Linker/PR8300.ll                              |    13 +
 test/Linker/available_externally_a.ll              |     5 +
 test/Linker/available_externally_b.ll              |     4 +
 test/Linker/link-archive.ll                        |     1 +
 test/Linker/linkmdnode.ll                          |     1 +
 test/Linker/linkmdnode2.ll                         |    10 +
 test/Linker/unnamed-addr1-a.ll                     |    27 +
 test/Linker/unnamed-addr1-b.ll                     |    12 +
 test/MC/ARM/arm_fixups.s                           |     7 +
 test/MC/ARM/arm_instructions.s                     |   284 +
 test/MC/ARM/arm_word_directive.s                   |     6 +
 test/MC/ARM/dg.exp                                 |     5 +
 test/MC/ARM/elf-eflags-eabi.s                      |    13 +
 test/MC/ARM/elf-movt.s                             |    39 +
 test/MC/ARM/elf-reloc-01.ll                        |    71 +
 test/MC/ARM/elf-reloc-02.ll                        |    51 +
 test/MC/ARM/elf-reloc-03.ll                        |    98 +
 test/MC/ARM/hilo-16bit-relocations.s               |    20 +
 test/MC/ARM/neon-abs-encoding.s                    |    31 +
 test/MC/ARM/neon-absdiff-encoding.s                |    82 +
 test/MC/ARM/neon-add-encoding.s                    |   137 +
 test/MC/ARM/neon-bitcount-encoding.s               |    31 +
 test/MC/ARM/neon-bitwise-encoding.s                |    47 +
 test/MC/ARM/neon-cmp-encoding.s                    |   115 +
 test/MC/ARM/neon-convert-encoding.s                |    38 +
 test/MC/ARM/neon-dup-encoding.s                    |    27 +
 test/MC/ARM/neon-minmax-encoding.s                 |    58 +
 test/MC/ARM/neon-mov-encoding.s                    |   117 +
 test/MC/ARM/neon-mul-accum-encoding.s              |    67 +
 test/MC/ARM/neon-mul-encoding.s                    |    56 +
 test/MC/ARM/neon-neg-encoding.s                    |    30 +
 test/MC/ARM/neon-pairwise-encoding.s               |    86 +
 test/MC/ARM/neon-reciprocal-encoding.s             |    26 +
 test/MC/ARM/neon-reverse-encoding.s                |    26 +
 test/MC/ARM/neon-satshift-encoding.s               |   150 +
 test/MC/ARM/neon-shift-encoding.s                  |   160 +
 test/MC/ARM/neon-shiftaccum-encoding.s             |    98 +
 test/MC/ARM/neon-shuffle-encoding.s                |    46 +
 test/MC/ARM/neon-sub-encoding.s                    |   108 +
 test/MC/ARM/neon-table-encoding.s                  |    19 +
 test/MC/ARM/neon-vld-encoding.s                    |   110 +
 test/MC/ARM/neon-vst-encoding.s                    |   101 +
 test/MC/ARM/neont2-abs-encoding.s                  |    33 +
 test/MC/ARM/neont2-absdiff-encoding.s              |    86 +
 test/MC/ARM/neont2-add-encoding.s                  |   138 +
 test/MC/ARM/neont2-bitcount-encoding.s             |    34 +
 test/MC/ARM/neont2-bitwise-encoding.s              |    49 +
 test/MC/ARM/neont2-cmp-encoding.s                  |    36 +
 test/MC/ARM/neont2-convert-encoding.s              |    40 +
 test/MC/ARM/neont2-dup-encoding.s                  |    29 +
 test/MC/ARM/neont2-minmax-encoding.s               |    60 +
 test/MC/ARM/neont2-mov-encoding.s                  |   119 +
 test/MC/ARM/neont2-mul-accum-encoding.s            |    69 +
 test/MC/ARM/neont2-mul-encoding.s                  |    58 +
 test/MC/ARM/neont2-neg-encoding.s                  |    32 +
 test/MC/ARM/neont2-pairwise-encoding.s             |    89 +
 test/MC/ARM/neont2-reciprocal-encoding.s           |    28 +
 test/MC/ARM/neont2-reverse-encoding.s              |    26 +
 test/MC/ARM/neont2-satshift-encoding.s             |   152 +
 test/MC/ARM/neont2-shift-encoding.s                |   162 +
 test/MC/ARM/neont2-shiftaccum-encoding.s           |   100 +
 test/MC/ARM/neont2-shuffle-encoding.s              |    48 +
 test/MC/ARM/neont2-sub-encoding.s                  |    46 +
 test/MC/ARM/neont2-table-encoding.s                |    21 +
 test/MC/ARM/neont2-vld-encoding.s                  |   112 +
 test/MC/ARM/neont2-vst-encoding.s                  |   103 +
 test/MC/ARM/prefetch.ll                            |    58 +
 test/MC/ARM/reg-list.s                             |     8 +
 test/MC/ARM/simple-encoding.ll                     |   237 +
 test/MC/ARM/simple-fp-encoding.s                   |   236 +
 test/MC/ARM/thumb.s                                |    70 +
 test/MC/ARM/thumb2.s                               |   286 +
 test/MC/ARM/thumb2_instructions.s                  |    12 +
 test/MC/AsmParser/ARM/arm_instructions.s           |     8 -
 test/MC/AsmParser/ARM/arm_word_directive.s         |     6 -
 test/MC/AsmParser/ARM/dg.exp                       |     5 -
 test/MC/AsmParser/ELF/dg.exp                       |     6 -
 test/MC/AsmParser/ELF/directive_previous.s         |    13 -
 test/MC/AsmParser/ELF/directive_section.s          |    23 -
 test/MC/AsmParser/X86/dg.exp                       |     5 -
 test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s  |    42 -
 test/MC/AsmParser/X86/x86_32-avx-encoding.s        |  3241 ---
 test/MC/AsmParser/X86/x86_32-bit.s                 |  1631 --
 test/MC/AsmParser/X86/x86_32-bit_cat.s             |  7862 --------
 test/MC/AsmParser/X86/x86_32-encoding.s            | 10069 ----------
 test/MC/AsmParser/X86/x86_32-fma3-encoding.s       |   674 -
 test/MC/AsmParser/X86/x86_32-mismatched-add.s      |     8 -
 test/MC/AsmParser/X86/x86_32-new-encoder.s         |   425 -
 test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s  |    42 -
 test/MC/AsmParser/X86/x86_64-avx-encoding.s        |  3318 ----
 test/MC/AsmParser/X86/x86_64-encoding.s            |   142 -
 test/MC/AsmParser/X86/x86_64-fma3-encoding.s       |   674 -
 test/MC/AsmParser/X86/x86_64-imm-widths.s          |   105 -
 test/MC/AsmParser/X86/x86_64-incl_decl.s           |    26 -
 test/MC/AsmParser/X86/x86_64-new-encoder.s         |   159 -
 test/MC/AsmParser/X86/x86_64-operands.s            |    15 -
 test/MC/AsmParser/X86/x86_64-suffix-matching.s     |    10 -
 test/MC/AsmParser/X86/x86_instruction_errors.s     |     5 -
 test/MC/AsmParser/X86/x86_instructions.s           |   175 -
 test/MC/AsmParser/X86/x86_operands.s               |    58 -
 test/MC/AsmParser/X86/x86_word_directive.s         |     6 -
 test/MC/AsmParser/dash-n.s                         |     7 +
 test/MC/AsmParser/directive_abort.s                |     2 +-
 test/MC/AsmParser/directive_ascii.s                |     7 +
 test/MC/AsmParser/directive_loc.s                  |     2 +-
 test/MC/AsmParser/directive_set.s                  |     5 +
 test/MC/AsmParser/directive_values.s               |    20 +
 test/MC/AsmParser/equ.s                            |     9 +
 test/MC/AsmParser/expr_symbol_modifiers.s          |    14 +
 test/MC/AsmParser/exprs.s                          |    77 +-
 test/MC/AsmParser/floating-literals.s              |    35 +
 test/MC/AsmParser/full_line_comment.s              |     8 +
 test/MC/AsmParser/ifdef.s                          |    29 +
 test/MC/AsmParser/ifndef.s                         |    29 +
 test/MC/AsmParser/paren.s                          |     8 +
 test/MC/AsmParser/rename.s                         |    10 +
 test/MC/AsmParser/section.s                        |   107 +
 test/MC/COFF/align-nops.s                          |    50 +
 test/MC/COFF/basic-coff.ll                         |   136 -
 test/MC/COFF/basic-coff.s                          |   133 +
 test/MC/COFF/bss.s                                 |    15 +
 test/MC/COFF/dg.exp                                |     2 +-
 test/MC/COFF/module-asm.ll                         |    26 +
 test/MC/COFF/simple-fixups.s                       |    50 +
 test/MC/COFF/switch-relocations.ll                 |     3 +
 test/MC/COFF/symbol-alias.s                        |    62 +
 test/MC/COFF/symbol-fragment-offset.ll             |   182 -
 test/MC/COFF/symbol-fragment-offset.s              |   187 +
 test/MC/COFF/weak.s                                |    51 +
 test/MC/Disassembler/ARM/arm-tests.txt             |   132 +
 test/MC/Disassembler/ARM/dg.exp                    |     6 +
 test/MC/Disassembler/ARM/neon-tests.txt            |    61 +
 test/MC/Disassembler/ARM/thumb-tests.txt           |   120 +
 test/MC/Disassembler/MBlaze/dg.exp                 |     6 +
 test/MC/Disassembler/MBlaze/mblaze_branch.txt      |   119 +
 test/MC/Disassembler/MBlaze/mblaze_fpu.txt         |    47 +
 test/MC/Disassembler/MBlaze/mblaze_fsl.txt         |   338 +
 test/MC/Disassembler/MBlaze/mblaze_imm.txt         |   121 +
 test/MC/Disassembler/MBlaze/mblaze_memory.txt      |    65 +
 test/MC/Disassembler/MBlaze/mblaze_operands.txt    |   197 +
 test/MC/Disassembler/MBlaze/mblaze_pattern.txt     |    14 +
 test/MC/Disassembler/MBlaze/mblaze_shift.txt       |    29 +
 test/MC/Disassembler/MBlaze/mblaze_special.txt     |   105 +
 test/MC/Disassembler/MBlaze/mblaze_typea.txt       |    74 +
 test/MC/Disassembler/MBlaze/mblaze_typeb.txt       |    56 +
 test/MC/Disassembler/X86/dg.exp                    |     6 +
 test/MC/Disassembler/X86/simple-tests.txt          |    68 +
 test/MC/Disassembler/X86/truncated-input.txt       |     4 +
 test/MC/Disassembler/arm-tests.txt                 |   111 -
 test/MC/Disassembler/dg.exp                        |     6 -
 test/MC/Disassembler/neon-tests.txt                |    51 -
 test/MC/Disassembler/simple-tests.txt              |    62 -
 test/MC/Disassembler/thumb-tests.txt               |   105 -
 test/MC/ELF/abs.s                                  |    16 +
 test/MC/ELF/alias-reloc.s                          |    52 +
 test/MC/ELF/alias.s                                |    85 +
 test/MC/ELF/align-bss.s                            |    17 +
 test/MC/ELF/align-nops.s                           |    40 +
 test/MC/ELF/align-size.s                           |    13 +
 test/MC/ELF/align-text.s                           |    19 +
 test/MC/ELF/align.s                                |    32 +
 test/MC/ELF/bad-section.s                          |     9 +
 test/MC/ELF/basic-elf-32.s                         |    78 +
 test/MC/ELF/basic-elf-64.s                         |    82 +
 test/MC/ELF/call-abs.s                             |    24 +
 test/MC/ELF/cfi-advance-loc2.s                     |    45 +
 test/MC/ELF/cfi-def-cfa-offset.s                   |    46 +
 test/MC/ELF/cfi-def-cfa-register.s                 |    41 +
 test/MC/ELF/cfi-def-cfa.s                          |    42 +
 test/MC/ELF/cfi-offset.s                           |    42 +
 test/MC/ELF/cfi-remember.s                         |    45 +
 test/MC/ELF/cfi-zero-addr-delta.s                  |    48 +
 test/MC/ELF/cfi.s                                  |   674 +
 test/MC/ELF/comdat.s                               |    86 +
 test/MC/ELF/common.s                               |    88 +
 test/MC/ELF/common2.s                              |    21 +
 test/MC/ELF/debug-line.s                           |    22 +
 test/MC/ELF/debug-loc.s                            |    32 +
 test/MC/ELF/dg.exp                                 |     2 +-
 test/MC/ELF/diff.s                                 |    15 +
 test/MC/ELF/diff2.s                                |    13 +
 test/MC/ELF/elf_directive_previous.s               |    13 +
 test/MC/ELF/elf_directive_section.s                |    23 +
 test/MC/ELF/empty-dwarf-lines.s                    |    21 +
 test/MC/ELF/empty.s                                |    70 +
 test/MC/ELF/entsize.ll                             |    44 +
 test/MC/ELF/entsize.s                              |    69 +
 test/MC/ELF/file.s                                 |    23 +
 test/MC/ELF/global-offset.s                        |    18 +
 test/MC/ELF/got.s                                  |    25 +
 test/MC/ELF/ident.s                                |    17 +
 test/MC/ELF/invalid-symver.s                       |     7 +
 test/MC/ELF/leb128.s                               |    19 +
 test/MC/ELF/local-reloc.s                          |    31 +
 test/MC/ELF/merge.s                                |    97 +
 test/MC/ELF/n_bytes.s                              |    20 +
 test/MC/ELF/no-fixup.s                             |    16 +
 test/MC/ELF/noexec.s                               |    24 +
 test/MC/ELF/norelocation.s                         |    18 +
 test/MC/ELF/pic-diff.s                             |    29 +
 test/MC/ELF/plt.s                                  |    14 +
 test/MC/ELF/relax-arith.s                          |    75 +
 test/MC/ELF/relax-crash.s                          |    11 +
 test/MC/ELF/relax.s                                |    27 +
 test/MC/ELF/relocation-386.s                       |   226 +
 test/MC/ELF/relocation.s                           |   114 +
 test/MC/ELF/rename.s                               |    46 +
 test/MC/ELF/section.s                              |   110 +
 test/MC/ELF/set.s                                  |    34 +
 test/MC/ELF/sleb.s                                 |    29 +
 test/MC/ELF/symref.s                               |   165 +
 test/MC/ELF/tls-i386.s                             |    64 +
 test/MC/ELF/tls.s                                  |    48 +
 test/MC/ELF/type.s                                 |    32 +
 test/MC/ELF/uleb.s                                 |    22 +
 test/MC/ELF/undef.s                                |    46 +
 test/MC/ELF/undef2.s                               |    10 +
 test/MC/ELF/weak.s                                 |    30 +
 test/MC/ELF/weakref-plt.s                          |     8 +
 test/MC/ELF/weakref-reloc.s                        |    49 +
 test/MC/ELF/weakref.s                              |   234 +
 test/MC/ELF/zero.s                                 |    16 +
 test/MC/MBlaze/dg.exp                              |     5 +
 test/MC/MBlaze/mblaze_branch.s                     |   197 +
 test/MC/MBlaze/mblaze_fpu.s                        |    77 +
 test/MC/MBlaze/mblaze_fsl.s                        |   568 +
 test/MC/MBlaze/mblaze_imm.s                        |   194 +
 test/MC/MBlaze/mblaze_memory.s                     |   107 +
 test/MC/MBlaze/mblaze_operands.s                   |   328 +
 test/MC/MBlaze/mblaze_pattern.s                    |    22 +
 test/MC/MBlaze/mblaze_shift.s                      |    47 +
 test/MC/MBlaze/mblaze_special.s                    |   167 +
 test/MC/MBlaze/mblaze_typea.s                      |   122 +
 test/MC/MBlaze/mblaze_typeb.s                      |    92 +
 test/MC/MachO/absolutize.s                         |     6 +-
 test/MC/MachO/comm-1.s                             |     2 +-
 test/MC/MachO/darwin-ARM-reloc.s                   |   171 +
 test/MC/MachO/darwin-Thumb-reloc.s                 |   139 +
 test/MC/MachO/darwin-complex-difference.s          |   129 +
 test/MC/MachO/darwin-x86_64-diff-relocs.s          |     2 +-
 test/MC/MachO/darwin-x86_64-reloc-offsets.s        |     6 +-
 test/MC/MachO/darwin-x86_64-reloc.s                |    10 +-
 test/MC/MachO/diff-with-two-sections.s             |    64 +
 test/MC/MachO/direction_labels.s                   |     4 +-
 test/MC/MachO/empty-dwarf-lines.s                  |    25 +
 test/MC/MachO/indirect-symbols.s                   |     6 +-
 test/MC/MachO/jcc.s                                |     4 +-
 test/MC/MachO/lcomm-attributes.s                   |     2 +-
 test/MC/MachO/loc.s                                |    25 +
 test/MC/MachO/pcrel-to-other-section.s             |   107 +
 test/MC/MachO/relax-jumps.s                        |     6 +-
 test/MC/MachO/reloc-pcrel-offset.s                 |     2 +-
 test/MC/MachO/reloc.s                              |    70 +-
 test/MC/MachO/section-align-1.s                    |     2 +-
 test/MC/MachO/section-align-2.s                    |     2 +-
 test/MC/MachO/string-table.s                       |     4 +-
 test/MC/MachO/symbol-diff.s                        |   122 +
 test/MC/MachO/symbol-flags.s                       |    59 +-
 test/MC/MachO/symbol-indirect.s                    |     2 +-
 test/MC/MachO/symbols-1.s                          |     4 +-
 test/MC/MachO/tbss.s                               |     4 +-
 test/MC/MachO/tdata.s                              |     4 +-
 test/MC/MachO/thread_init_func.s                   |     2 +-
 test/MC/MachO/tls.s                                |     8 +-
 test/MC/MachO/tlv-reloc.s                          |     8 +-
 test/MC/MachO/tlv.s                                |     4 +-
 test/MC/MachO/values.s                             |     2 +-
 test/MC/MachO/weakdef.s                            |   141 +
 test/MC/MachO/x86_32-optimal_nop.s                 |     8 +-
 test/MC/MachO/x86_32-symbols.s                     |     2 +-
 test/MC/MachO/x86_64-symbols.s                     |     2 +-
 test/MC/MachO/zerofill-1.s                         |     2 +-
 test/MC/MachO/zerofill-2.s                         |     2 +-
 test/MC/MachO/zerofill-3.s                         |     2 +-
 test/MC/MachO/zerofill-5.s                         |     6 +-
 test/MC/X86/3DNow.s                                |    92 +
 test/MC/X86/dg.exp                                 |     5 +
 test/MC/X86/x86-32-avx.s                           |  3283 ++++
 test/MC/X86/x86-32-coverage.s                      | 19564 +++++++++++++++++++
 test/MC/X86/x86-32-fma3.s                          |   674 +
 test/MC/X86/x86-32.s                               |   810 +
 test/MC/X86/x86-64.s                               |   944 +
 test/MC/X86/x86_64-avx-clmul-encoding.s            |    42 +
 test/MC/X86/x86_64-avx-encoding.s                  |  3318 ++++
 test/MC/X86/x86_64-encoding.s                      |   157 +
 test/MC/X86/x86_64-fma3-encoding.s                 |   674 +
 test/MC/X86/x86_64-imm-widths.s                    |   105 +
 test/MC/X86/x86_directives.s                       |     6 +
 test/MC/X86/x86_errors.s                           |     5 +
 test/MC/X86/x86_operands.s                         |    58 +
 test/Makefile                                      |    11 +-
 .../TestObjectFiles/trivial-object-test.coff-i386  |   Bin 0 -> 346 bytes
 .../trivial-object-test.coff-x86-64                |   Bin 0 -> 347 bytes
 .../TestObjectFiles/trivial-object-test.elf-i386   |   Bin 0 -> 716 bytes
 .../TestObjectFiles/trivial-object-test.elf-x86-64 |   Bin 0 -> 1024 bytes
 .../TestObjectFiles/trivial-object-test.macho-i386 |   Bin 0 -> 552 bytes
 .../trivial-object-test.macho-x86-64               |   Bin 0 -> 552 bytes
 test/Object/dg.exp                                 |     3 +
 test/Object/nm-trivial-object.test-broken          |    19 +
 test/Object/objdump-trivial-object.test-broken     |    54 +
 test/Other/2008-08-14-PassManager.ll               |     5 -
 test/Other/close-stderr.ll                         |     2 +
 test/Other/extract.ll                              |    27 +
 test/Other/lint.ll                                 |     2 +-
 test/Scripts/coff-dump.py                          |  1008 +-
 test/Scripts/coff-dump.py.bat                      |     5 +-
 test/Scripts/common_dump.py                        |    46 +
 test/Scripts/elf-dump                              |   231 +
 test/Scripts/elf-dump.bat                          |     7 +
 test/Scripts/macho-dump                            |   289 -
 test/Scripts/macho-dump.bat                        |     7 -
 test/Scripts/macho-dumpx                           |   294 +
 test/Scripts/macho-dumpx.bat                       |     7 +
 test/TableGen/Dag.td                               |    71 +
 test/TableGen/DagDefSubst.td                       |    16 -
 test/TableGen/DagIntSubst.td                       |    11 -
 test/TableGen/FieldAccess.td                       |     2 +
 test/TableGen/ListManip.td                         |     4 +-
 test/TableGen/Slice.td                             |     8 +-
 test/TableGen/defmclass.td                         |    12 +
 test/TableGen/if.td                                |    34 +-
 test/TableGen/lisp.td                              |     2 +-
 test/TableGen/nameconcat.td                        |    91 -
 test/Transforms/ArgumentPromotion/basictest.ll     |     2 +-
 test/Transforms/ArgumentPromotion/crash.ll         |    21 +
 test/Transforms/CodeGenPrepare/basic.ll            |    29 +
 test/Transforms/ConstProp/basictest.ll             |     9 +
 test/Transforms/ConstProp/bitcast.ll               |    12 +-
 test/Transforms/ConstProp/bitcast2.ll              |     8 -
 test/Transforms/ConstProp/calls.ll                 |    58 +-
 test/Transforms/ConstProp/constant-expr.ll         |    44 +
 test/Transforms/ConstProp/extractvalue.ll          |    68 +
 test/Transforms/ConstProp/insertvalue.ll           |    68 +
 test/Transforms/ConstProp/loads.ll                 |    17 +
 test/Transforms/ConstProp/logicaltest.ll           |     4 +-
 test/Transforms/ConstProp/nottest.ll               |    19 -
 test/Transforms/ConstProp/overflow-ops.ll          |    11 +
 .../ConstantMerge/2011-01-15-EitherOrder.ll        |    18 +
 test/Transforms/ConstantMerge/merge-both.ll        |    26 +
 test/Transforms/ConstantMerge/unnamed-addr.ll      |    40 +
 .../2010-09-26-MergeConstantRange.ll               |    82 +
 .../Transforms/CorrelatedValuePropagation/basic.ll |     3 +-
 .../Transforms/CorrelatedValuePropagation/crash.ll |    37 +
 .../CorrelatedValuePropagation/non-null.ll         |   103 +
 test/Transforms/DeadArgElim/deadexternal.ll        |    28 +-
 .../2004-11-28-LiveStoreDeleted.ll                 |    14 -
 .../2004-12-28-PartialStore.ll                     |    13 -
 .../DeadStoreElimination/2005-11-30-vaarg.ll       |     9 -
 .../DeadStoreElimination/2006-06-27-AST-Remove.ll  |  1113 --
 .../DeadStoreElimination/2008-07-28-load-store.ll  |    15 -
 .../2008-11-28-MemDepUpdate.ll                     |    16 -
 .../2008-11-29-OffEndOfBlock.ll                    |    27 -
 .../DeadStoreElimination/2009-11-10-Trampoline.ll  |    16 -
 .../DeadStoreElimination/PartialStore.ll           |    71 +-
 test/Transforms/DeadStoreElimination/alloca.ll     |     9 -
 test/Transforms/DeadStoreElimination/byval.ll      |    10 -
 .../DeadStoreElimination/const-pointers.ll         |     2 +-
 .../DeadStoreElimination/context-sensitive.ll      |    15 -
 test/Transforms/DeadStoreElimination/crash.ll      |    19 +-
 test/Transforms/DeadStoreElimination/free.ll       |    27 +-
 test/Transforms/DeadStoreElimination/lifetime.ll   |     2 +-
 test/Transforms/DeadStoreElimination/memcpy.ll     |    52 -
 .../DeadStoreElimination/no-targetdata.ll          |     2 +-
 .../DeadStoreElimination/partial-overwrite.ll      |    14 -
 test/Transforms/DeadStoreElimination/simple.ll     |   234 +-
 .../DeadStoreElimination/volatile-load.ll          |     8 -
 test/Transforms/EarlyCSE/basic.ll                  |   121 +
 test/Transforms/EarlyCSE/dg.exp                    |     3 +
 .../FunctionAttrs/2008-09-03-ReadNone.ll           |     2 +-
 .../FunctionAttrs/2008-09-03-ReadOnly.ll           |     2 +-
 .../FunctionAttrs/2008-10-04-LocalMemory.ll        |     2 +-
 .../FunctionAttrs/2008-12-29-Constant.ll           |     2 +-
 .../FunctionAttrs/2010-10-30-volatile.ll           |    10 +
 test/Transforms/GVN/2007-07-25-InfiniteLoop.ll     |     2 +-
 .../Transforms/GVN/2007-07-26-InterlockingLoops.ll |    29 +-
 test/Transforms/GVN/2007-07-31-NoDomInherit.ll     |     2 +-
 test/Transforms/GVN/2007-07-31-RedundantPhi.ll     |     2 +-
 test/Transforms/GVN/2008-07-02-Unreachable.ll      |     2 +-
 test/Transforms/GVN/2010-03-31-RedundantPHIs.ll    |    12 +-
 test/Transforms/GVN/2010-11-13-Simplify.ll         |    15 +
 test/Transforms/GVN/calls-nonlocal.ll              |     2 +-
 test/Transforms/GVN/condprop.ll                    |    35 +-
 test/Transforms/GVN/invariant-simple.ll            |     2 +-
 test/Transforms/GVN/lifetime-simple.ll             |     2 +-
 test/Transforms/GVN/load-constant-mem.ll           |     2 +-
 test/Transforms/GVN/load-pre-licm.ll               |    39 +
 test/Transforms/GVN/lpre-call-wrap-2.ll            |     2 +-
 test/Transforms/GVN/mixed.ll                       |     4 +-
 test/Transforms/GVN/non-local-offset.ll            |    59 +
 test/Transforms/GVN/nonescaping-malloc.ll          |     2 +-
 test/Transforms/GVN/null-aliases-nothing.ll        |     2 +-
 test/Transforms/GVN/phi-translate.ll               |    31 +
 test/Transforms/GVN/pre-load.ll                    |     2 +-
 test/Transforms/GVN/pre-single-pred.ll             |    14 +-
 test/Transforms/GVN/preserve-tbaa.ll               |    28 +
 test/Transforms/GVN/rle-must-alias.ll              |     2 +-
 test/Transforms/GVN/rle-nonlocal.ll                |     2 +-
 test/Transforms/GVN/rle-semidominated.ll           |     2 +-
 test/Transforms/GVN/rle.ll                         |     2 +-
 .../GlobalOpt/2008-04-26-SROA-Global-Align.ll      |     6 +-
 .../GlobalOpt/2009-03-07-PromotePtrToBool.ll       |     2 +-
 .../2009-11-16-MallocSingleStoreToGlobalVar.ll     |     2 +-
 test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll    |    16 +
 test/Transforms/GlobalOpt/crash.ll                 |     9 +
 .../GlobalOpt/ctor-list-opt-constexpr.ll           |    23 +
 test/Transforms/GlobalOpt/ctor-list-opt.ll         |    19 +-
 .../GlobalOpt/globalsra-unknown-index.ll           |     2 +-
 test/Transforms/GlobalOpt/memcpy.ll                |     2 +-
 test/Transforms/GlobalOpt/unnamed-addr.ll          |    54 +
 .../IndVarSimplify/loop-invariant-step.ll          |    33 -
 test/Transforms/Inline/basictest.ll                |     2 +-
 test/Transforms/Inline/byval.ll                    |    82 +-
 test/Transforms/Inline/byval2.ll                   |    28 -
 test/Transforms/Inline/devirtualize-3.ll           |     2 +-
 test/Transforms/Inline/devirtualize.ll             |     2 +-
 test/Transforms/Inline/gvn-inline-iteration.ll     |     2 +-
 .../InstCombine/2003-08-12-AllocaNonNull.ll        |     3 +-
 .../InstCombine/2006-04-28-ShiftShiftLongLong.ll   |     2 +-
 .../InstCombine/2007-03-26-BadShiftMask.ll         |     3 +-
 .../Transforms/InstCombine/2008-11-20-DivMulRem.ll |    43 +-
 .../Transforms/InstCombine/2010-11-01-lshr-mask.ll |    46 +
 .../InstCombine/2010-11-21-SizeZeroTypeGEP.ll      |    17 +
 .../InstCombine/2010-11-23-Distributed.ll          |    23 +
 test/Transforms/InstCombine/2011-02-14-InfLoop.ll  |    19 +
 .../InstCombine/2011-02-16-InsertelementHang.ll    |    11 +
 test/Transforms/InstCombine/add.ll                 |    24 +
 test/Transforms/InstCombine/add2.ll                |    25 +-
 test/Transforms/InstCombine/and2.ll                |    18 +
 test/Transforms/InstCombine/bit-checks.ll          |   348 +-
 test/Transforms/InstCombine/bitcast-store.ll       |    21 +
 test/Transforms/InstCombine/bitcast-vec-uniform.ll |    70 +
 test/Transforms/InstCombine/cast.ll                |    32 +-
 test/Transforms/InstCombine/constant-fold-gep.ll   |    19 +
 test/Transforms/InstCombine/crash.ll               |   118 +
 test/Transforms/InstCombine/div-cmp-overflow.ll    |     8 -
 test/Transforms/InstCombine/exact-sdiv.ll          |    52 -
 test/Transforms/InstCombine/exact.ll               |   154 +
 test/Transforms/InstCombine/extractvalue.ll        |    81 +-
 test/Transforms/InstCombine/fold-calls.ll          |    19 +
 test/Transforms/InstCombine/fold-vector-select.ll  |    13 +
 test/Transforms/InstCombine/icmp.ll                |   223 +
 test/Transforms/InstCombine/intrinsics.ll          |    19 +
 test/Transforms/InstCombine/memcpy.ll              |    19 +-
 test/Transforms/InstCombine/memset2.ll             |    15 +
 test/Transforms/InstCombine/neon-intrinsics.ll     |    25 +
 test/Transforms/InstCombine/nsw.ll                 |    41 +-
 test/Transforms/InstCombine/objsize.ll             |    10 +
 test/Transforms/InstCombine/or-fcmp.ll             |    28 +-
 test/Transforms/InstCombine/or.ll                  |    48 +-
 test/Transforms/InstCombine/overflow.ll            |   133 +
 test/Transforms/InstCombine/phi.ll                 |   125 +-
 test/Transforms/InstCombine/pr8547.ll              |    26 +
 test/Transforms/InstCombine/rem.ll                 |     5 +
 test/Transforms/InstCombine/select-crash.ll        |    20 +
 test/Transforms/InstCombine/select.ll              |   244 +
 test/Transforms/InstCombine/sext.ll                |     2 +-
 test/Transforms/InstCombine/shift.ll               |    50 +-
 test/Transforms/InstCombine/signext.ll             |    12 +-
 test/Transforms/InstCombine/sub.ll                 |    28 +-
 test/Transforms/InstCombine/trunc.ll               |    24 +-
 test/Transforms/InstCombine/vec_demanded_elts-2.ll |    19 -
 test/Transforms/InstCombine/vec_demanded_elts-3.ll |    14 -
 test/Transforms/InstCombine/vec_demanded_elts.ll   |   129 +-
 test/Transforms/InstCombine/vec_sext.ll            |    22 +
 test/Transforms/InstCombine/vec_shuffle.ll         |    23 +
 test/Transforms/InstCombine/vector-casts.ll        |    28 +
 test/Transforms/InstCombine/xor2.ll                |     2 +-
 test/Transforms/InstSimplify/2010-12-20-Boolean.ll |    29 +
 .../InstSimplify/2010-12-20-Distribute.ll          |    62 +
 test/Transforms/InstSimplify/2011-01-14-Thread.ll  |     9 +
 test/Transforms/InstSimplify/2011-02-01-Vector.ll  |     8 +
 test/Transforms/InstSimplify/compare.ll            |   189 +
 test/Transforms/InstSimplify/dg.exp                |     3 +
 test/Transforms/InstSimplify/exact-nsw-nuw.ll      |    44 +
 test/Transforms/InstSimplify/fdiv.ll               |    17 +
 test/Transforms/InstSimplify/reassociate.ll        |   186 +
 test/Transforms/JumpThreading/2010-08-26-and.ll    |     2 +-
 test/Transforms/JumpThreading/and-and-cond.ll      |    10 +-
 test/Transforms/JumpThreading/and-cond.ll          |     9 +-
 test/Transforms/JumpThreading/basic.ll             |     2 +-
 test/Transforms/JumpThreading/crash.ll             |    27 +
 test/Transforms/JumpThreading/degenerate-phi.ll    |    24 +
 test/Transforms/JumpThreading/indirectbr.ll        |    94 +
 test/Transforms/JumpThreading/lvi-load.ll          |     2 +-
 test/Transforms/JumpThreading/select.ll            |   123 +
 test/Transforms/JumpThreading/thread-loads.ll      |     9 +-
 .../LCSSA/2006-06-03-IncorrectIDFPhis.ll           |     4 +-
 .../LICM/2003-02-27-NestedLoopExitBlocks.ll        |     2 +-
 .../LICM/2008-07-22-LoadGlobalConstant.ll          |     2 +-
 test/Transforms/LICM/2009-03-25-AliasSetTracker.ll |    39 -
 test/Transforms/LICM/crash.ll                      |    13 +
 test/Transforms/LICM/scalar_promote.ll             |    32 +-
 test/Transforms/LoopIdiom/basic.ll                 |   349 +
 test/Transforms/LoopIdiom/dg.exp                   |     3 +
 .../LoopIndexSplit/2007-09-21-LoopBound.ll         |    63 -
 .../2007-09-24-UpdateIterationSpace.ll             |    57 -
 .../2007-09-25-UpdateIterationSpace-2.ll           |    60 -
 .../LoopIndexSplit/2008-01-28-IndDecrement.ll      |    46 -
 test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll |    48 -
 .../LoopIndexSplit/2008-02-13-ExitValueNum.ll      |    67 -
 .../LoopIndexSplit/2008-02-13-LoopLatch.ll         |    72 -
 .../LoopIndexSplit/2008-02-13-LoopLatchPHI.ll      |    74 -
 test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll |   464 -
 .../LoopIndexSplit/2008-03-24-ExitPhi.ll           |    69 -
 .../Transforms/LoopIndexSplit/2008-05-19-IndVar.ll |    40 -
 .../LoopIndexSplit/2008-06-03-DomFrontier.ll       |    32 -
 .../LoopIndexSplit/2008-07-08-MisCompilation.ll    |    25 -
 test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll |    78 -
 test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll |    38 -
 test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll |    31 -
 .../LoopIndexSplit/2008-10-10-OneIteration.ll      |    66 -
 test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll  |    69 -
 .../2009-03-02-UpdateIterationSpace-crash.ll       |    64 -
 test/Transforms/LoopIndexSplit/2009-03-30-undef.ll |    24 -
 test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll |    52 -
 test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll |    44 -
 .../Transforms/LoopIndexSplit/Crash2-2007-08-17.ll |    58 -
 .../LoopIndexSplit/ExitCondition-2007-09-10.ll     |    50 -
 .../LoopIndexSplit/OneIterLoop-2007-08-17.ll       |    67 -
 .../LoopIndexSplit/OneIterLoop2-2007-08-17.ll      |    69 -
 .../LoopIndexSplit/OneIterLoop3-2007-08-17.ll      |    34 -
 test/Transforms/LoopIndexSplit/PR3913.ll           |    24 -
 test/Transforms/LoopIndexSplit/PR4174-2.ll         |    38 -
 test/Transforms/LoopIndexSplit/PR4174.ll           |    23 -
 .../LoopIndexSplit/SaveLastValue-2007-08-17.ll     |    52 -
 .../LoopIndexSplit/SplitValue-2007-08-24.ll        |    52 -
 .../LoopIndexSplit/UpperBound-2007-08-24.ll        |    52 -
 test/Transforms/LoopIndexSplit/dg.exp              |     3 -
 .../LoopIndexSplit/non-iv-cmp-operand.ll           |   195 -
 test/Transforms/LoopRotate/LRCrash-1.ll            |    18 -
 test/Transforms/LoopRotate/LRCrash-2.ll            |    24 -
 test/Transforms/LoopRotate/LRCrash-3.ll            |    29 -
 test/Transforms/LoopRotate/LRCrash-4.ll            |    18 -
 test/Transforms/LoopRotate/LRCrash-5.ll            |    26 -
 test/Transforms/LoopRotate/basic.ll                |    35 +
 test/Transforms/LoopRotate/crash.ll                |   139 +
 test/Transforms/LoopRotate/dbgvalue.ll             |    59 +
 test/Transforms/LoopRotate/phi-duplicate.ll        |    19 +-
 .../LoopSimplify/2003-04-25-AssertFail.ll          |     2 +-
 .../2003-05-12-PreheaderExitOfChild.ll             |     2 +-
 .../2004-02-05-DominatorInfoCorruption.ll          |     2 +-
 .../LoopSimplify/2004-03-15-IncorrectDomUpdate.ll  |     2 +-
 .../LoopSimplify/2004-04-01-IncorrectDomUpdate.ll  |     2 +-
 .../2004-04-12-LoopSimplify-SwitchBackedges.ll     |     2 +-
 .../2004-04-13-LoopSimplifyUpdateDomFrontier.ll    |     2 +-
 .../LoopSimplify/2007-10-28-InvokeCrash.ll         |     2 +-
 .../2010-07-15-IncorrectDomFrontierUpdate.ll       |     2 +-
 .../LoopSimplify/2010-12-26-PHIInfiniteLoop.ll     |    43 +
 test/Transforms/LoopSimplify/basictest.ll          |     2 +-
 test/Transforms/LoopSimplify/hardertest.ll         |     2 +-
 .../Transforms/LoopSimplify/indirectbr-backedge.ll |     2 +-
 test/Transforms/LoopSimplify/indirectbr.ll         |     2 +-
 test/Transforms/LoopSimplify/merge-exits.ll        |     2 +-
 test/Transforms/LoopSimplify/phi-node-simplify.ll  |     2 +-
 .../LoopSimplify/unreachable-loop-pred.ll          |     2 +-
 .../LoopStrengthReduce/hoist-parent-preheader.ll   |    32 +
 test/Transforms/LoopStrengthReduce/pr2570.ll       |     2 +-
 .../LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll     |     2 +-
 test/Transforms/LoopUnroll/basic.ll                |    24 +
 test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll   |    28 +
 .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll      |     2 +-
 .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll      |     2 +-
 .../Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll |    17 -
 test/Transforms/MemCpyOpt/align.ll                 |     4 +-
 test/Transforms/MemCpyOpt/crash.ll                 |    19 +-
 test/Transforms/MemCpyOpt/form-memset.ll           |   175 +-
 test/Transforms/MemCpyOpt/form-memset2.ll          |    99 -
 test/Transforms/MemCpyOpt/loadstore-sret.ll        |    25 +
 test/Transforms/MemCpyOpt/memcpy-to-memset.ll      |    19 +
 test/Transforms/MemCpyOpt/memcpy.ll                |    93 +-
 test/Transforms/MemCpyOpt/memmove.ll               |     2 +-
 test/Transforms/MemCpyOpt/smaller.ll               |    28 +
 test/Transforms/MemCpyOpt/sret.ll                  |     6 +-
 .../Transforms/MergeFunc/2011-02-08-RemoveEqual.ll |   276 +
 test/Transforms/MergeFunc/fold-weak.ll             |     4 +
 test/Transforms/MergeFunc/vector.ll                |    76 +
 test/Transforms/PartialSpecialize/dg.exp           |     3 -
 .../PartialSpecialize/two-specializations.ll       |    37 -
 .../Reassociate/2011-01-26-UseAfterFree.ll         |    35 +
 test/Transforms/Reassociate/optional-flags.ll      |    29 +
 .../ScalarRepl/2003-05-30-InvalidIndices.ll        |     8 -
 .../Transforms/ScalarRepl/2003-05-30-MultiLevel.ll |    10 -
 .../ScalarRepl/2005-12-14-UnionPromoteCrash.ll     |    28 -
 .../2006-01-24-IllegalUnionPromoteCrash.ll         |    12 -
 .../ScalarRepl/2006-04-20-PromoteCrash.ll          |    18 -
 .../ScalarRepl/2006-10-23-PointerUnionCrash.ll     |    57 -
 .../Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll |    20 -
 .../ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll    |    44 -
 .../ScalarRepl/2009-01-09-scalarrepl-empty.ll      |    15 -
 .../ScalarRepl/2009-04-21-ZeroLengthMemSet.ll      |    16 -
 test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll   |    12 -
 .../ScalarRepl/2009-06-01-BitcastIntPadding.ll     |    17 -
 test/Transforms/ScalarRepl/2009-08-16-VLA.ll       |    23 -
 test/Transforms/ScalarRepl/basictest.ll            |    23 +-
 test/Transforms/ScalarRepl/copy-aggregate.ll       |    52 +-
 test/Transforms/ScalarRepl/crash.ll                |   260 +
 test/Transforms/ScalarRepl/memcpy-from-global.ll   |    66 +-
 test/Transforms/ScalarRepl/phi-select.ll           |   153 +
 test/Transforms/ScalarRepl/vector_promote.ll       |     6 +-
 .../SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll    |     6 +
 test/Transforms/SimplifyCFG/MagicPointer.ll        |     1 -
 test/Transforms/SimplifyCFG/PhiBlockMerge.ll       |     7 +-
 test/Transforms/SimplifyCFG/PhiEliminate.ll        |    14 -
 test/Transforms/SimplifyCFG/basictest.ll           |    23 +-
 test/Transforms/SimplifyCFG/indirectbr.ll          |   118 +
 test/Transforms/SimplifyCFG/invoke_unwind.ll       |     5 +-
 .../SimplifyCFG/speculate-with-offset.ll           |    94 +
 test/Transforms/SimplifyCFG/switch-to-icmp.ll      |    39 +
 test/Transforms/SimplifyCFG/switch_create.ll       |   436 +-
 .../Transforms/SimplifyCFG/switch_formation.dbg.ll |     8 +-
 test/Transforms/SimplifyCFG/switch_formation.ll    |    30 -
 .../SimplifyLibCalls/2009-02-12-StrTo.ll           |     2 +-
 test/Transforms/SimplifyLibCalls/FPuts.ll          |    29 +
 test/Transforms/SimplifyLibCalls/Printf.ll         |    29 +-
 test/Transforms/SimplifyLibCalls/Puts.ll           |    30 +-
 test/Transforms/SimplifyLibCalls/StrChr.ll         |    28 +-
 test/Transforms/SimplifyLibCalls/StrPBrk.ll        |    25 +
 test/Transforms/SimplifyLibCalls/StrRChr.ll        |    23 +
 test/Transforms/SimplifyLibCalls/StrSpn.ll         |    41 +
 test/Transforms/SimplifyLibCalls/floor.ll          |     2 +
 test/Transforms/Sink/basic.ll                      |     2 +-
 test/Transforms/TailCallElim/dup_tail.ll           |    23 +
 test/Unit/lit.cfg                                  |    12 +-
 test/Unit/lit.site.cfg.in                          |    10 +
 test/lib/llvm.exp                                  |     6 +-
 test/lit.cfg                                       |   109 +-
 test/lit.site.cfg.in                               |    10 +
 test/site.exp.in                                   |     2 +
 tools/CMakeLists.txt                               |    16 +-
 tools/Makefile                                     |    48 +-
 tools/bugpoint-passes/CMakeLists.txt               |     2 +
 tools/bugpoint/BugDriver.cpp                       |     2 +-
 tools/bugpoint/BugDriver.h                         |     3 +-
 tools/bugpoint/CrashDebugger.cpp                   |     8 +-
 tools/bugpoint/ExecutionDriver.cpp                 |    44 +-
 tools/bugpoint/ExtractFunction.cpp                 |    12 +-
 tools/bugpoint/Miscompilation.cpp                  |    12 +-
 tools/bugpoint/OptimizerDriver.cpp                 |    28 +-
 tools/bugpoint/ToolRunner.cpp                      |   206 +-
 tools/bugpoint/ToolRunner.h                        |    13 +-
 tools/bugpoint/bugpoint.cpp                        |    31 +-
 tools/edis/CMakeLists.txt                          |     2 -
 tools/edis/Makefile                                |     4 +-
 tools/gold/Makefile                                |     3 +-
 tools/gold/gold-plugin.cpp                         |   139 +-
 tools/llc/llc.cpp                                  |    22 +-
 tools/lli/CMakeLists.txt                           |     2 +-
 tools/lli/Makefile                                 |     2 +-
 tools/lli/lli.cpp                                  |    40 +-
 tools/llvm-ar/llvm-ar.cpp                          |    78 +-
 tools/llvm-as/llvm-as.cpp                          |     4 +-
 tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp          |    30 +-
 tools/llvm-config/CMakeLists.txt                   |    19 +-
 tools/llvm-config/llvm-config.in.in                |     5 +-
 tools/llvm-diff/llvm-diff.cpp                      |    75 +-
 tools/llvm-dis/llvm-dis.cpp                        |    37 +-
 tools/llvm-extract/llvm-extract.cpp                |    45 +-
 tools/llvm-ld/CMakeLists.txt                       |     2 +
 tools/llvm-ld/Optimize.cpp                         |     2 +-
 tools/llvm-ld/llvm-ld.cpp                          |    62 +-
 tools/llvm-link/llvm-link.cpp                      |     6 +-
 tools/llvm-mc/Disassembler.cpp                     |     3 +-
 tools/llvm-mc/llvm-mc.cpp                          |   193 +-
 tools/llvm-nm/CMakeLists.txt                       |     2 +-
 tools/llvm-nm/Makefile                             |     2 +-
 tools/llvm-nm/llvm-nm.cpp                          |   228 +-
 tools/llvm-objdump/CMakeLists.txt                  |    11 +
 tools/llvm-objdump/Makefile                        |    17 +
 tools/llvm-objdump/llvm-objdump.cpp                |   255 +
 tools/llvm-prof/llvm-prof.cpp                      |    14 +-
 tools/llvm-ranlib/llvm-ranlib.cpp                  |    14 +-
 tools/llvm-shlib/Makefile                          |    15 +-
 tools/llvm-stub/llvm-stub.c                        |    10 +-
 tools/llvmc/doc/LLVMC-Reference.rst                |    71 +-
 tools/llvmc/examples/mcc16/Hooks.cpp               |     2 +-
 tools/llvmc/examples/mcc16/Main.cpp                |     2 +-
 tools/llvmc/src/Base.td.in                         |   227 +-
 tools/llvmc/src/Clang.td                           |     6 +-
 tools/llvmc/src/Hooks.cpp                          |   181 +-
 tools/lto/LTOCodeGenerator.cpp                     |    38 +-
 tools/lto/LTOModule.cpp                            |    83 +-
 tools/lto/LTOModule.h                              |     3 +
 tools/lto/Makefile                                 |     4 +
 tools/lto/lto.cpp                                  |    14 +-
 tools/lto/lto.exports                              |     1 +
 tools/macho-dump/CMakeLists.txt                    |     5 +
 tools/macho-dump/Makefile                          |    23 +
 tools/macho-dump/macho-dump.cpp                    |   391 +
 tools/opt/GraphPrinters.cpp                        |    13 +-
 tools/opt/opt.cpp                                  |   223 +-
 unittests/ADT/APIntTest.cpp                        |     7 +-
 unittests/ADT/BitVectorTest.cpp                    |     7 +
 unittests/ADT/FoldingSet.cpp                       |    39 +
 unittests/ADT/ImmutableSetTest.cpp                 |    48 +-
 unittests/ADT/IntEqClassesTest.cpp                 |   107 +
 unittests/ADT/IntervalMapTest.cpp                  |   716 +
 unittests/ADT/Makefile                             |     2 +-
 unittests/ADT/SmallBitVectorTest.cpp               |     7 +
 unittests/ADT/SmallVectorTest.cpp                  |     2 +-
 unittests/ADT/StringMapTest.cpp                    |     2 +-
 unittests/ADT/StringRefTest.cpp                    |     2 +-
 unittests/ADT/TripleTest.cpp                       |    80 +-
 unittests/ADT/TwineTest.cpp                        |     8 +
 unittests/ADT/ValueMapTest.cpp                     |   294 -
 unittests/CMakeLists.txt                           |   142 +
 unittests/ExecutionEngine/JIT/JITTests.def         |     4 +
 unittests/Makefile.unittest                        |    10 +-
 unittests/Support/ConstantRangeTest.cpp            |    54 +-
 unittests/Support/EndianTest.cpp                   |    72 +
 unittests/Support/Path.cpp                         |   253 +
 unittests/Support/SwapByteOrderTest.cpp            |   128 +
 unittests/Support/System.cpp                       |    16 -
 unittests/Support/TimeValue.cpp                    |    23 +
 unittests/Support/ValueHandleTest.cpp              |     4 +-
 unittests/Transforms/Utils/Local.cpp               |    49 +
 unittests/VMCore/ConstantsTest.cpp                 |     9 +
 unittests/VMCore/InstructionsTest.cpp              |    17 -
 unittests/VMCore/PassManagerTest.cpp               |    37 +-
 unittests/VMCore/ValueMapTest.cpp                  |   294 +
 unittests/VMCore/VerifierTest.cpp                  |    19 +
 utils/CollectDebugInfoUsingLLDB.py                 |   182 +
 utils/CompareDebugInfo.py                          |   182 +
 utils/FileCheck/CMakeLists.txt                     |     2 +-
 utils/FileCheck/FileCheck.cpp                      |    65 +-
 utils/FileCheck/Makefile                           |     6 +-
 utils/FileUpdate/CMakeLists.txt                    |     2 +-
 utils/FileUpdate/FileUpdate.cpp                    |    20 +-
 utils/FileUpdate/Makefile                          |     6 +-
 utils/GenLibDeps.pl                                |     2 -
 utils/GetRepositoryPath                            |    27 +
 utils/GetSourceVersion                             |    20 +-
 utils/KillTheDoctor/CMakeLists.txt                 |     5 +
 utils/KillTheDoctor/KillTheDoctor.cpp              |   596 +
 utils/Makefile                                     |     2 +-
 utils/OldenDataRecover.pl                          |    37 -
 utils/PerfectShuffle/PerfectShuffle.cpp            |     3 +-
 utils/TableGen/ARMDecoderEmitter.cpp               |   313 +-
 utils/TableGen/ARMDecoderEmitter.h                 |     2 +-
 utils/TableGen/AsmMatcherEmitter.cpp               |  2362 ++-
 utils/TableGen/AsmWriterEmitter.cpp                |   122 +-
 utils/TableGen/AsmWriterInst.cpp                   |   112 +-
 utils/TableGen/AsmWriterInst.h                     |    36 +-
 utils/TableGen/CMakeLists.txt                      |    12 +-
 utils/TableGen/CallingConvEmitter.cpp              |    12 +-
 utils/TableGen/ClangASTNodesEmitter.h              |     2 +-
 utils/TableGen/ClangAttrEmitter.cpp                |    94 +-
 utils/TableGen/ClangAttrEmitter.h                  |    13 +
 utils/TableGen/ClangDiagnosticsEmitter.cpp         |    26 +-
 utils/TableGen/ClangSACheckersEmitter.cpp          |   229 +
 utils/TableGen/ClangSACheckersEmitter.h            |    31 +
 utils/TableGen/CodeEmitterGen.cpp                  |   253 +-
 utils/TableGen/CodeEmitterGen.h                    |     8 +-
 utils/TableGen/CodeGenDAGPatterns.cpp              |   796 +-
 utils/TableGen/CodeGenDAGPatterns.h                |   208 +-
 utils/TableGen/CodeGenInstruction.cpp              |   519 +-
 utils/TableGen/CodeGenInstruction.h                |   231 +-
 utils/TableGen/CodeGenRegisters.h                  |     6 +
 utils/TableGen/CodeGenTarget.cpp                   |   114 +-
 utils/TableGen/CodeGenTarget.h                     |    17 +-
 utils/TableGen/DAGISelMatcher.cpp                  |    42 +-
 utils/TableGen/DAGISelMatcher.h                    |   368 +-
 utils/TableGen/DAGISelMatcherEmitter.cpp           |    38 +-
 utils/TableGen/DAGISelMatcherGen.cpp               |   273 +-
 utils/TableGen/DAGISelMatcherOpt.cpp               |    18 +-
 utils/TableGen/DisassemblerEmitter.cpp             |     7 +-
 utils/TableGen/EDEmitter.cpp                       |   288 +-
 utils/TableGen/FastISelEmitter.cpp                 |    70 +-
 utils/TableGen/FixedLenDecoderEmitter.cpp          |  1372 ++
 utils/TableGen/FixedLenDecoderEmitter.h            |    56 +
 utils/TableGen/InstrEnumEmitter.cpp                |     2 +-
 utils/TableGen/InstrInfoEmitter.cpp                |    33 +-
 utils/TableGen/IntrinsicEmitter.cpp                |   204 +-
 utils/TableGen/LLVMCConfigurationEmitter.cpp       |   335 +-
 utils/TableGen/LLVMCConfigurationEmitter.h         |     4 +-
 utils/TableGen/Makefile                            |     2 +-
 utils/TableGen/NeonEmitter.cpp                     |  1059 +-
 utils/TableGen/NeonEmitter.h                       |    78 +-
 utils/TableGen/Record.cpp                          |   200 +-
 utils/TableGen/Record.h                            |    42 +-
 utils/TableGen/RegisterInfoEmitter.cpp             |    24 +-
 utils/TableGen/StringMatcher.cpp                   |   149 +
 utils/TableGen/StringMatcher.h                     |    54 +
 utils/TableGen/SubtargetEmitter.cpp                |   106 +-
 utils/TableGen/SubtargetEmitter.h                  |     7 +-
 utils/TableGen/TGLexer.cpp                         |    68 +-
 utils/TableGen/TGLexer.h                           |    15 +-
 utils/TableGen/TGParser.cpp                        |   268 +-
 utils/TableGen/TGParser.h                          |    12 +-
 utils/TableGen/TableGen.cpp                        |    49 +-
 utils/TableGen/X86DisassemblerTables.cpp           |    16 +-
 utils/TableGen/X86ModRMFilters.h                   |     2 +-
 utils/TableGen/X86RecognizableInstr.cpp            |    33 +-
 utils/TableGen/X86RecognizableInstr.h              |     5 +-
 utils/Target/ARM/analyze-match-table.py            |    61 +
 utils/buildit/build_llvm                           |    14 +-
 utils/emacs/llvm-mode.el                           |    17 +-
 utils/emacs/tablegen-mode.el                       |    14 +-
 utils/findmisopt                                   |     4 +-
 utils/findoptdiff                                  |     6 +-
 utils/fpcmp/Makefile                               |     6 +-
 utils/kate/README                                  |    12 +
 utils/kate/llvm.xml                                |   255 +
 utils/lit/TODO                                     |    10 -
 utils/lit/lit/LitConfig.py                         |    19 +
 utils/lit/lit/LitFormats.py                        |     1 +
 utils/lit/lit/TestFormats.py                       |    23 +-
 utils/lit/lit/TestRunner.py                        |    21 +-
 utils/lit/lit/TestingConfig.py                     |     2 +
 utils/lit/lit/Util.py                              |    18 +-
 utils/lit/lit/__init__.py                          |     6 +-
 utils/lit/lit/lit.py                               |   648 -
 utils/lit/lit/main.py                              |   648 +
 utils/lit/setup.py                                 |    23 +-
 utils/llvm-lit/CMakeLists.txt                      |    12 +
 utils/llvm-lit/llvm-lit.in                         |     6 +
 utils/llvm-native-gcc                              |     4 +-
 utils/llvm-native-gxx                              |     4 +-
 utils/not/CMakeLists.txt                           |     2 +-
 utils/not/Makefile                                 |     6 +-
 utils/not/not.cpp                                  |    16 +-
 utils/profile.pl                                   |     4 +-
 utils/release/test-release.sh                      |   398 +
 utils/test_debuginfo.pl                            |    61 +
 utils/unittest/CMakeLists.txt                      |    41 +
 utils/unittest/UnitTestMain/TestMain.cpp           |    27 +
 utils/unittest/googletest/gtest.cc                 |    35 +-
 .../googletest/include/gtest/internal/gtest-port.h |     3 +-
 utils/valgrind/x86_64-pc-linux-gnu.supp            |     8 +-
 utils/vim/llvm.vim                                 |     3 +-
 utils/vim/vimrc                                    |     9 +-
 3241 files changed, 240491 insertions(+), 160008 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 bindings/ada/analysis/llvm_analysis-binding.ads
 delete mode 100644 bindings/ada/analysis/llvm_analysis.ads
 delete mode 100644 bindings/ada/analysis/llvm_analysis_wrap.cxx
 delete mode 100644 bindings/ada/bitreader/llvm_bit_reader-binding.ads
 delete mode 100644 bindings/ada/bitreader/llvm_bit_reader.ads
 delete mode 100644 bindings/ada/bitreader/llvm_bitreader_wrap.cxx
 delete mode 100644 bindings/ada/bitwriter/llvm_bit_writer-binding.ads
 delete mode 100644 bindings/ada/bitwriter/llvm_bit_writer.ads
 delete mode 100644 bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx
 delete mode 100644 bindings/ada/executionengine/llvm_execution_engine-binding.ads
 delete mode 100644 bindings/ada/executionengine/llvm_execution_engine.ads
 delete mode 100644 bindings/ada/executionengine/llvm_executionengine_wrap.cxx
 delete mode 100644 bindings/ada/llvm.gpr
 delete mode 100644 bindings/ada/llvm/llvm-binding.ads
 delete mode 100644 bindings/ada/llvm/llvm.ads
 delete mode 100644 bindings/ada/llvm/llvm_link_time_optimizer-binding.ads
 delete mode 100644 bindings/ada/llvm/llvm_link_time_optimizer.ads
 delete mode 100644 bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx
 delete mode 100644 bindings/ada/llvm/llvm_wrap.cxx
 delete mode 100644 bindings/ada/target/llvm_target-binding.ads
 delete mode 100644 bindings/ada/target/llvm_target.ads
 delete mode 100644 bindings/ada/target/llvm_target_wrap.cxx
 delete mode 100644 bindings/ada/transforms/llvm_transforms-binding.ads
 delete mode 100644 bindings/ada/transforms/llvm_transforms.ads
 delete mode 100644 bindings/ada/transforms/llvm_transforms_wrap.cxx
 create mode 100644 cmake/modules/HandleLLVMOptions.cmake
 create mode 100644 cmake/modules/LLVMParseArguments.cmake
 create mode 100644 docs/tutorial/OCamlLangImpl8.html
 create mode 100644 include/llvm-c/Initialization.h
 create mode 100644 include/llvm/ADT/ArrayRef.h
 create mode 100644 include/llvm/ADT/InMemoryStruct.h
 create mode 100644 include/llvm/ADT/IntEqClasses.h
 create mode 100644 include/llvm/ADT/IntervalMap.h
 create mode 100644 include/llvm/Analysis/DIBuilder.h
 create mode 100644 include/llvm/Analysis/DominanceFrontier.h
 create mode 100644 include/llvm/Analysis/PathNumbering.h
 create mode 100644 include/llvm/Analysis/PathProfileInfo.h
 delete mode 100644 include/llvm/Analysis/PointerTracking.h
 create mode 100644 include/llvm/Analysis/RegionPass.h
 create mode 100644 include/llvm/CodeGen/EdgeBundles.h
 create mode 100644 include/llvm/CodeGen/MachineLoopRanges.h
 create mode 100644 include/llvm/CodeGen/PBQP/Graph.h
 create mode 100644 include/llvm/CodeGen/PBQP/HeuristicBase.h
 create mode 100644 include/llvm/CodeGen/PBQP/HeuristicSolver.h
 create mode 100644 include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
 create mode 100644 include/llvm/CodeGen/PBQP/Math.h
 create mode 100644 include/llvm/CodeGen/PBQP/Solution.h
 delete mode 100644 include/llvm/CodeGen/PostRAHazardRecognizer.h
 create mode 100644 include/llvm/CodeGen/RegAllocPBQP.h
 create mode 100644 include/llvm/CodeGen/ScoreboardHazardRecognizer.h
 create mode 100644 include/llvm/ExecutionEngine/MCJIT.h
 create mode 100644 include/llvm/InitializePasses.h
 delete mode 100644 include/llvm/MC/ELFObjectWriter.h
 create mode 100644 include/llvm/MC/MCELFObjectWriter.h
 create mode 100644 include/llvm/MC/MCFixupKindInfo.h
 create mode 100644 include/llvm/MC/MCMachObjectWriter.h
 delete mode 100644 include/llvm/MC/MachObjectWriter.h
 create mode 100644 include/llvm/Object/MachOFormat.h
 create mode 100644 include/llvm/Object/MachOObject.h
 create mode 100644 include/llvm/Object/ObjectFile.h
 create mode 100644 include/llvm/Support/AIXDataTypesFix.h
 create mode 100644 include/llvm/Support/Atomic.h
 create mode 100644 include/llvm/Support/DataTypes.h.cmake
 create mode 100644 include/llvm/Support/DataTypes.h.in
 create mode 100644 include/llvm/Support/Disassembler.h
 create mode 100644 include/llvm/Support/DynamicLibrary.h
 delete mode 100644 include/llvm/Support/DynamicLinker.h
 create mode 100644 include/llvm/Support/Endian.h
 create mode 100644 include/llvm/Support/Errno.h
 create mode 100644 include/llvm/Support/FEnv.h
 create mode 100644 include/llvm/Support/FileSystem.h
 create mode 100644 include/llvm/Support/Host.h
 create mode 100644 include/llvm/Support/IncludeFile.h
 create mode 100644 include/llvm/Support/LICENSE.TXT
 create mode 100644 include/llvm/Support/Memory.h
 create mode 100644 include/llvm/Support/Mutex.h
 create mode 100644 include/llvm/Support/Path.h
 create mode 100644 include/llvm/Support/PathV1.h
 create mode 100644 include/llvm/Support/PathV2.h
 create mode 100644 include/llvm/Support/Process.h
 create mode 100644 include/llvm/Support/Program.h
 create mode 100644 include/llvm/Support/RWMutex.h
 create mode 100644 include/llvm/Support/Signals.h
 create mode 100644 include/llvm/Support/Solaris.h
 delete mode 100644 include/llvm/Support/StableBasicBlockNumbering.h
 create mode 100644 include/llvm/Support/SwapByteOrder.h
 create mode 100644 include/llvm/Support/ThreadLocal.h
 create mode 100644 include/llvm/Support/Threading.h
 create mode 100644 include/llvm/Support/TimeValue.h
 create mode 100644 include/llvm/Support/ToolOutputFile.h
 create mode 100644 include/llvm/Support/Valgrind.h
 create mode 100644 include/llvm/Support/system_error.h
 delete mode 100644 include/llvm/System/AIXDataTypesFix.h
 delete mode 100644 include/llvm/System/Alarm.h
 delete mode 100644 include/llvm/System/Atomic.h
 delete mode 100644 include/llvm/System/DataTypes.h.cmake
 delete mode 100644 include/llvm/System/DataTypes.h.in
 delete mode 100644 include/llvm/System/Disassembler.h
 delete mode 100644 include/llvm/System/DynamicLibrary.h
 delete mode 100644 include/llvm/System/Errno.h
 delete mode 100644 include/llvm/System/Host.h
 delete mode 100644 include/llvm/System/IncludeFile.h
 delete mode 100644 include/llvm/System/LICENSE.TXT
 delete mode 100644 include/llvm/System/Memory.h
 delete mode 100644 include/llvm/System/Mutex.h
 delete mode 100644 include/llvm/System/Path.h
 delete mode 100644 include/llvm/System/Process.h
 delete mode 100644 include/llvm/System/Program.h
 delete mode 100644 include/llvm/System/RWMutex.h
 delete mode 100644 include/llvm/System/Signals.h
 delete mode 100644 include/llvm/System/Solaris.h
 delete mode 100644 include/llvm/System/ThreadLocal.h
 delete mode 100644 include/llvm/System/Threading.h
 delete mode 100644 include/llvm/System/TimeValue.h
 delete mode 100644 include/llvm/System/Valgrind.h
 create mode 100644 include/llvm/Target/TargetAsmInfo.h
 delete mode 100644 include/llvm/Target/TargetFrameInfo.h
 create mode 100644 include/llvm/Target/TargetFrameLowering.h
 create mode 100644 include/llvm/Target/TargetLibraryInfo.h
 delete mode 100644 include/llvm/Transforms/RSProfiling.h
 create mode 100644 lib/Analysis/DIBuilder.cpp
 create mode 100644 lib/Analysis/DominanceFrontier.cpp
 create mode 100644 lib/Analysis/IPA/IPA.cpp
 create mode 100644 lib/Analysis/MemDepPrinter.cpp
 create mode 100644 lib/Analysis/NoAliasAnalysis.cpp
 create mode 100644 lib/Analysis/PathNumbering.cpp
 create mode 100644 lib/Analysis/PathProfileInfo.cpp
 create mode 100644 lib/Analysis/PathProfileVerifier.cpp
 delete mode 100644 lib/Analysis/PointerTracking.cpp
 create mode 100644 lib/Analysis/RegionPass.cpp
 create mode 100644 lib/Bitcode/CMakeLists.txt
 create mode 100644 lib/CMakeLists.txt
 create mode 100644 lib/CodeGen/AllocationOrder.cpp
 create mode 100644 lib/CodeGen/AllocationOrder.h
 create mode 100644 lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
 create mode 100644 lib/CodeGen/AsmPrinter/DwarfTableException.cpp
 create mode 100644 lib/CodeGen/CodeGen.cpp
 create mode 100644 lib/CodeGen/EdgeBundles.cpp
 create mode 100644 lib/CodeGen/ExpandISelPseudos.cpp
 create mode 100644 lib/CodeGen/LiveDebugVariables.cpp
 create mode 100644 lib/CodeGen/LiveDebugVariables.h
 create mode 100644 lib/CodeGen/LiveIntervalUnion.cpp
 create mode 100644 lib/CodeGen/LiveIntervalUnion.h
 create mode 100644 lib/CodeGen/LiveRangeEdit.cpp
 create mode 100644 lib/CodeGen/LiveRangeEdit.h
 create mode 100644 lib/CodeGen/MachineLoopRanges.cpp
 delete mode 100644 lib/CodeGen/PBQP/Graph.h
 delete mode 100644 lib/CodeGen/PBQP/HeuristicBase.h
 delete mode 100644 lib/CodeGen/PBQP/HeuristicSolver.h
 delete mode 100644 lib/CodeGen/PBQP/Heuristics/Briggs.h
 delete mode 100644 lib/CodeGen/PBQP/Math.h
 delete mode 100644 lib/CodeGen/PBQP/Solution.h
 delete mode 100644 lib/CodeGen/PHIElimination.h
 create mode 100644 lib/CodeGen/PHIEliminationUtils.cpp
 create mode 100644 lib/CodeGen/PHIEliminationUtils.h
 delete mode 100644 lib/CodeGen/PostRAHazardRecognizer.cpp
 create mode 100644 lib/CodeGen/RegAllocBase.h
 create mode 100644 lib/CodeGen/RegAllocBasic.cpp
 create mode 100644 lib/CodeGen/RegAllocGreedy.cpp
 create mode 100644 lib/CodeGen/ScoreboardHazardRecognizer.cpp
 create mode 100644 lib/CodeGen/SpillPlacement.cpp
 create mode 100644 lib/CodeGen/SpillPlacement.h
 create mode 100644 lib/ExecutionEngine/MCJIT/CMakeLists.txt
 create mode 100644 lib/ExecutionEngine/MCJIT/MCJIT.cpp
 create mode 100644 lib/ExecutionEngine/MCJIT/MCJIT.h
 create mode 100644 lib/ExecutionEngine/MCJIT/Makefile
 create mode 100644 lib/ExecutionEngine/MCJIT/TargetSelect.cpp
 create mode 100644 lib/MC/MCELFObjectTargetWriter.cpp
 create mode 100644 lib/MC/MCMachObjectTargetWriter.cpp
 create mode 100644 lib/MC/MCParser/COFFAsmParser.cpp
 create mode 100644 lib/MC/MCPureStreamer.cpp
 create mode 100644 lib/Object/CMakeLists.txt
 create mode 100644 lib/Object/COFFObjectFile.cpp
 create mode 100644 lib/Object/ELFObjectFile.cpp
 create mode 100644 lib/Object/MachOObject.cpp
 create mode 100644 lib/Object/Makefile
 create mode 100644 lib/Object/ObjectFile.cpp
 create mode 100644 lib/Support/Atomic.cpp
 create mode 100644 lib/Support/Disassembler.cpp
 create mode 100644 lib/Support/DynamicLibrary.cpp
 create mode 100644 lib/Support/Errno.cpp
 create mode 100644 lib/Support/Host.cpp
 create mode 100644 lib/Support/IncludeFile.cpp
 create mode 100644 lib/Support/IntEqClasses.cpp
 create mode 100644 lib/Support/IntervalMap.cpp
 create mode 100644 lib/Support/Memory.cpp
 create mode 100644 lib/Support/Mutex.cpp
 create mode 100644 lib/Support/Path.cpp
 create mode 100644 lib/Support/PathV2.cpp
 create mode 100644 lib/Support/Process.cpp
 create mode 100644 lib/Support/Program.cpp
 create mode 100644 lib/Support/README.txt.system
 create mode 100644 lib/Support/RWMutex.cpp
 create mode 100644 lib/Support/SearchForAddressOfSpecialSymbol.cpp
 create mode 100644 lib/Support/Signals.cpp
 create mode 100644 lib/Support/ThreadLocal.cpp
 create mode 100644 lib/Support/Threading.cpp
 create mode 100644 lib/Support/TimeValue.cpp
 create mode 100644 lib/Support/ToolOutputFile.cpp
 create mode 100644 lib/Support/Unix/Host.inc
 create mode 100644 lib/Support/Unix/Memory.inc
 create mode 100644 lib/Support/Unix/Mutex.inc
 create mode 100644 lib/Support/Unix/Path.inc
 create mode 100644 lib/Support/Unix/PathV2.inc
 create mode 100644 lib/Support/Unix/Process.inc
 create mode 100644 lib/Support/Unix/Program.inc
 create mode 100644 lib/Support/Unix/README.txt
 create mode 100644 lib/Support/Unix/RWMutex.inc
 create mode 100644 lib/Support/Unix/Signals.inc
 create mode 100644 lib/Support/Unix/ThreadLocal.inc
 create mode 100644 lib/Support/Unix/TimeValue.inc
 create mode 100644 lib/Support/Unix/Unix.h
 create mode 100644 lib/Support/Unix/system_error.inc
 create mode 100644 lib/Support/Valgrind.cpp
 create mode 100644 lib/Support/Windows/DynamicLibrary.inc
 create mode 100644 lib/Support/Windows/Host.inc
 create mode 100644 lib/Support/Windows/Memory.inc
 create mode 100644 lib/Support/Windows/Mutex.inc
 create mode 100644 lib/Support/Windows/Path.inc
 create mode 100644 lib/Support/Windows/PathV2.inc
 create mode 100644 lib/Support/Windows/Process.inc
 create mode 100644 lib/Support/Windows/Program.inc
 create mode 100644 lib/Support/Windows/RWMutex.inc
 create mode 100644 lib/Support/Windows/Signals.inc
 create mode 100644 lib/Support/Windows/ThreadLocal.inc
 create mode 100644 lib/Support/Windows/TimeValue.inc
 create mode 100644 lib/Support/Windows/Windows.h
 create mode 100644 lib/Support/Windows/explicit_symbols.inc
 create mode 100644 lib/Support/Windows/system_error.inc
 create mode 100644 lib/Support/system_error.cpp
 delete mode 100644 lib/System/Alarm.cpp
 delete mode 100644 lib/System/Atomic.cpp
 delete mode 100644 lib/System/CMakeLists.txt
 delete mode 100644 lib/System/Disassembler.cpp
 delete mode 100644 lib/System/DynamicLibrary.cpp
 delete mode 100644 lib/System/Errno.cpp
 delete mode 100644 lib/System/Host.cpp
 delete mode 100644 lib/System/IncludeFile.cpp
 delete mode 100644 lib/System/Makefile
 delete mode 100644 lib/System/Memory.cpp
 delete mode 100644 lib/System/Mutex.cpp
 delete mode 100644 lib/System/Path.cpp
 delete mode 100644 lib/System/Process.cpp
 delete mode 100644 lib/System/Program.cpp
 delete mode 100644 lib/System/README.txt
 delete mode 100644 lib/System/RWMutex.cpp
 delete mode 100644 lib/System/SearchForAddressOfSpecialSymbol.cpp
 delete mode 100644 lib/System/Signals.cpp
 delete mode 100644 lib/System/ThreadLocal.cpp
 delete mode 100644 lib/System/Threading.cpp
 delete mode 100644 lib/System/TimeValue.cpp
 delete mode 100644 lib/System/Unix/Alarm.inc
 delete mode 100644 lib/System/Unix/Host.inc
 delete mode 100644 lib/System/Unix/Memory.inc
 delete mode 100644 lib/System/Unix/Mutex.inc
 delete mode 100644 lib/System/Unix/Path.inc
 delete mode 100644 lib/System/Unix/Process.inc
 delete mode 100644 lib/System/Unix/Program.inc
 delete mode 100644 lib/System/Unix/README.txt
 delete mode 100644 lib/System/Unix/RWMutex.inc
 delete mode 100644 lib/System/Unix/Signals.inc
 delete mode 100644 lib/System/Unix/ThreadLocal.inc
 delete mode 100644 lib/System/Unix/TimeValue.inc
 delete mode 100644 lib/System/Unix/Unix.h
 delete mode 100644 lib/System/Valgrind.cpp
 delete mode 100644 lib/System/Win32/Alarm.inc
 delete mode 100644 lib/System/Win32/DynamicLibrary.inc
 delete mode 100644 lib/System/Win32/Host.inc
 delete mode 100644 lib/System/Win32/Memory.inc
 delete mode 100644 lib/System/Win32/Mutex.inc
 delete mode 100644 lib/System/Win32/Path.inc
 delete mode 100644 lib/System/Win32/Process.inc
 delete mode 100644 lib/System/Win32/Program.inc
 delete mode 100644 lib/System/Win32/RWMutex.inc
 delete mode 100644 lib/System/Win32/Signals.inc
 delete mode 100644 lib/System/Win32/ThreadLocal.inc
 delete mode 100644 lib/System/Win32/TimeValue.inc
 delete mode 100644 lib/System/Win32/Win32.h
 create mode 100644 lib/Target/ARM/ARMAsmBackend.cpp
 create mode 100644 lib/Target/ARM/ARMAsmPrinter.h
 create mode 100644 lib/Target/ARM/ARMBaseInfo.h
 create mode 100644 lib/Target/ARM/ARMCallingConv.h
 create mode 100644 lib/Target/ARM/ARMELFWriterInfo.cpp
 create mode 100644 lib/Target/ARM/ARMELFWriterInfo.h
 create mode 100644 lib/Target/ARM/ARMFixupKinds.h
 delete mode 100644 lib/Target/ARM/ARMFrameInfo.h
 create mode 100644 lib/Target/ARM/ARMFrameLowering.cpp
 create mode 100644 lib/Target/ARM/ARMFrameLowering.h
 create mode 100644 lib/Target/ARM/ARMHazardRecognizer.cpp
 create mode 100644 lib/Target/ARM/ARMHazardRecognizer.h
 create mode 100644 lib/Target/ARM/ARMMCCodeEmitter.cpp
 create mode 100644 lib/Target/ARM/ARMMCExpr.cpp
 create mode 100644 lib/Target/ARM/ARMMCExpr.h
 delete mode 100644 lib/Target/ARM/ARMMCInstLower.h
 delete mode 100644 lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
 delete mode 100644 lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
 delete mode 100644 lib/Target/ARM/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/ARM/AsmPrinter/Makefile
 create mode 100644 lib/Target/ARM/Disassembler/CMakeLists.txt
 create mode 100644 lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
 create mode 100644 lib/Target/ARM/InstPrinter/ARMInstPrinter.h
 create mode 100644 lib/Target/ARM/InstPrinter/CMakeLists.txt
 create mode 100644 lib/Target/ARM/InstPrinter/Makefile
 create mode 100644 lib/Target/ARM/MLxExpansionPass.cpp
 delete mode 100644 lib/Target/ARM/NEONPreAllocPass.cpp
 create mode 100644 lib/Target/ARM/Thumb1FrameLowering.cpp
 create mode 100644 lib/Target/ARM/Thumb1FrameLowering.h
 delete mode 100644 lib/Target/ARM/Thumb2HazardRecognizer.cpp
 delete mode 100644 lib/Target/ARM/Thumb2HazardRecognizer.h
 create mode 100644 lib/Target/Alpha/AlphaAsmPrinter.cpp
 delete mode 100644 lib/Target/Alpha/AlphaCodeEmitter.cpp
 create mode 100644 lib/Target/Alpha/AlphaFrameLowering.cpp
 create mode 100644 lib/Target/Alpha/AlphaFrameLowering.h
 delete mode 100644 lib/Target/Alpha/AlphaJITInfo.cpp
 delete mode 100644 lib/Target/Alpha/AlphaJITInfo.h
 delete mode 100644 lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
 delete mode 100644 lib/Target/Alpha/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/Alpha/AsmPrinter/Makefile
 delete mode 100644 lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
 delete mode 100644 lib/Target/Blackfin/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/Blackfin/AsmPrinter/Makefile
 create mode 100644 lib/Target/Blackfin/BlackfinAsmPrinter.cpp
 create mode 100644 lib/Target/Blackfin/BlackfinFrameLowering.cpp
 create mode 100644 lib/Target/Blackfin/BlackfinFrameLowering.h
 delete mode 100644 lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/CellSPU/AsmPrinter/Makefile
 delete mode 100644 lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
 create mode 100644 lib/Target/CellSPU/SPUAsmPrinter.cpp
 delete mode 100644 lib/Target/CellSPU/SPUFrameInfo.cpp
 delete mode 100644 lib/Target/CellSPU/SPUFrameInfo.h
 create mode 100644 lib/Target/CellSPU/SPUFrameLowering.cpp
 create mode 100644 lib/Target/CellSPU/SPUFrameLowering.h
 create mode 100644 lib/Target/CellSPU/SPUNopFiller.cpp
 create mode 100644 lib/Target/MBlaze/AsmParser/CMakeLists.txt
 create mode 100644 lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
 create mode 100644 lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
 create mode 100644 lib/Target/MBlaze/AsmParser/Makefile
 delete mode 100644 lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
 delete mode 100644 lib/Target/MBlaze/AsmPrinter/Makefile
 create mode 100644 lib/Target/MBlaze/Disassembler/CMakeLists.txt
 create mode 100644 lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
 create mode 100644 lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
 create mode 100644 lib/Target/MBlaze/Disassembler/Makefile
 create mode 100644 lib/Target/MBlaze/InstPrinter/CMakeLists.txt
 create mode 100644 lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
 create mode 100644 lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
 create mode 100644 lib/Target/MBlaze/InstPrinter/Makefile
 create mode 100644 lib/Target/MBlaze/MBlazeAsmBackend.cpp
 create mode 100644 lib/Target/MBlaze/MBlazeAsmPrinter.cpp
 create mode 100644 lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
 create mode 100644 lib/Target/MBlaze/MBlazeELFWriterInfo.h
 create mode 100644 lib/Target/MBlaze/MBlazeFrameLowering.cpp
 create mode 100644 lib/Target/MBlaze/MBlazeFrameLowering.h
 create mode 100644 lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
 create mode 100644 lib/Target/MBlaze/MBlazeMCInstLower.cpp
 create mode 100644 lib/Target/MBlaze/MBlazeMCInstLower.h
 create mode 100644 lib/Target/MBlaze/MBlazeRelocations.h
 create mode 100644 lib/Target/MBlaze/TODO
 delete mode 100644 lib/Target/MSP430/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
 delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
 delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h
 delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
 delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
 delete mode 100644 lib/Target/MSP430/AsmPrinter/Makefile
 create mode 100644 lib/Target/MSP430/InstPrinter/CMakeLists.txt
 create mode 100644 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
 create mode 100644 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
 create mode 100644 lib/Target/MSP430/InstPrinter/Makefile
 create mode 100644 lib/Target/MSP430/MSP430AsmPrinter.cpp
 create mode 100644 lib/Target/MSP430/MSP430FrameLowering.cpp
 create mode 100644 lib/Target/MSP430/MSP430FrameLowering.h
 create mode 100644 lib/Target/MSP430/MSP430MCInstLower.cpp
 create mode 100644 lib/Target/MSP430/MSP430MCInstLower.h
 delete mode 100644 lib/Target/Mips/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/Mips/AsmPrinter/Makefile
 delete mode 100644 lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
 create mode 100644 lib/Target/Mips/MipsAsmPrinter.cpp
 create mode 100644 lib/Target/Mips/MipsFrameLowering.cpp
 create mode 100644 lib/Target/Mips/MipsFrameLowering.h
 delete mode 100644 lib/Target/PIC16/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/PIC16/AsmPrinter/Makefile
 delete mode 100644 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
 delete mode 100644 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
 delete mode 100644 lib/Target/PIC16/CMakeLists.txt
 delete mode 100644 lib/Target/PIC16/Makefile
 delete mode 100644 lib/Target/PIC16/PIC16.h
 delete mode 100644 lib/Target/PIC16/PIC16.td
 delete mode 100644 lib/Target/PIC16/PIC16ABINames.h
 delete mode 100644 lib/Target/PIC16/PIC16DebugInfo.cpp
 delete mode 100644 lib/Target/PIC16/PIC16DebugInfo.h
 delete mode 100644 lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
 delete mode 100644 lib/Target/PIC16/PIC16ISelDAGToDAG.h
 delete mode 100644 lib/Target/PIC16/PIC16ISelLowering.cpp
 delete mode 100644 lib/Target/PIC16/PIC16ISelLowering.h
 delete mode 100644 lib/Target/PIC16/PIC16InstrFormats.td
 delete mode 100644 lib/Target/PIC16/PIC16InstrInfo.cpp
 delete mode 100644 lib/Target/PIC16/PIC16InstrInfo.h
 delete mode 100644 lib/Target/PIC16/PIC16InstrInfo.td
 delete mode 100644 lib/Target/PIC16/PIC16MCAsmInfo.cpp
 delete mode 100644 lib/Target/PIC16/PIC16MCAsmInfo.h
 delete mode 100644 lib/Target/PIC16/PIC16MachineFunctionInfo.h
 delete mode 100644 lib/Target/PIC16/PIC16MemSelOpt.cpp
 delete mode 100644 lib/Target/PIC16/PIC16Passes/Makefile
 delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
 delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
 delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
 delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
 delete mode 100644 lib/Target/PIC16/PIC16RegisterInfo.cpp
 delete mode 100644 lib/Target/PIC16/PIC16RegisterInfo.h
 delete mode 100644 lib/Target/PIC16/PIC16RegisterInfo.td
 delete mode 100644 lib/Target/PIC16/PIC16Section.cpp
 delete mode 100644 lib/Target/PIC16/PIC16Section.h
 delete mode 100644 lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
 delete mode 100644 lib/Target/PIC16/PIC16SelectionDAGInfo.h
 delete mode 100644 lib/Target/PIC16/PIC16Subtarget.cpp
 delete mode 100644 lib/Target/PIC16/PIC16Subtarget.h
 delete mode 100644 lib/Target/PIC16/PIC16TargetMachine.cpp
 delete mode 100644 lib/Target/PIC16/PIC16TargetMachine.h
 delete mode 100644 lib/Target/PIC16/PIC16TargetObjectFile.cpp
 delete mode 100644 lib/Target/PIC16/PIC16TargetObjectFile.h
 delete mode 100644 lib/Target/PIC16/TargetInfo/CMakeLists.txt
 delete mode 100644 lib/Target/PIC16/TargetInfo/Makefile
 delete mode 100644 lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
 create mode 100644 lib/Target/PTX/CMakeLists.txt
 create mode 100644 lib/Target/PTX/Makefile
 create mode 100644 lib/Target/PTX/PTX.h
 create mode 100644 lib/Target/PTX/PTX.td
 create mode 100644 lib/Target/PTX/PTXAsmPrinter.cpp
 create mode 100644 lib/Target/PTX/PTXFrameLowering.cpp
 create mode 100644 lib/Target/PTX/PTXFrameLowering.h
 create mode 100644 lib/Target/PTX/PTXISelDAGToDAG.cpp
 create mode 100644 lib/Target/PTX/PTXISelLowering.cpp
 create mode 100644 lib/Target/PTX/PTXISelLowering.h
 create mode 100644 lib/Target/PTX/PTXInstrFormats.td
 create mode 100644 lib/Target/PTX/PTXInstrInfo.cpp
 create mode 100644 lib/Target/PTX/PTXInstrInfo.h
 create mode 100644 lib/Target/PTX/PTXInstrInfo.td
 create mode 100644 lib/Target/PTX/PTXMCAsmInfo.cpp
 create mode 100644 lib/Target/PTX/PTXMCAsmInfo.h
 create mode 100644 lib/Target/PTX/PTXMCAsmStreamer.cpp
 create mode 100644 lib/Target/PTX/PTXMFInfoExtract.cpp
 create mode 100644 lib/Target/PTX/PTXMachineFunctionInfo.h
 create mode 100644 lib/Target/PTX/PTXRegisterInfo.cpp
 create mode 100644 lib/Target/PTX/PTXRegisterInfo.h
 create mode 100644 lib/Target/PTX/PTXRegisterInfo.td
 create mode 100644 lib/Target/PTX/PTXSubtarget.cpp
 create mode 100644 lib/Target/PTX/PTXSubtarget.h
 create mode 100644 lib/Target/PTX/PTXTargetMachine.cpp
 create mode 100644 lib/Target/PTX/PTXTargetMachine.h
 create mode 100644 lib/Target/PTX/TargetInfo/CMakeLists.txt
 create mode 100644 lib/Target/PTX/TargetInfo/Makefile
 create mode 100644 lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
 delete mode 100644 lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/PowerPC/AsmPrinter/Makefile
 delete mode 100644 lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
 create mode 100644 lib/Target/PowerPC/InstPrinter/CMakeLists.txt
 create mode 100644 lib/Target/PowerPC/InstPrinter/Makefile
 create mode 100644 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
 create mode 100644 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
 create mode 100644 lib/Target/PowerPC/PPCAsmBackend.cpp
 create mode 100644 lib/Target/PowerPC/PPCAsmPrinter.cpp
 create mode 100644 lib/Target/PowerPC/PPCFixupKinds.h
 delete mode 100644 lib/Target/PowerPC/PPCFrameInfo.h
 create mode 100644 lib/Target/PowerPC/PPCFrameLowering.cpp
 create mode 100644 lib/Target/PowerPC/PPCFrameLowering.h
 create mode 100644 lib/Target/PowerPC/PPCMCCodeEmitter.cpp
 create mode 100644 lib/Target/PowerPC/PPCMCInstLower.cpp
 delete mode 100644 lib/Target/Sparc/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/Sparc/AsmPrinter/Makefile
 delete mode 100644 lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
 create mode 100644 lib/Target/Sparc/SparcAsmPrinter.cpp
 create mode 100644 lib/Target/Sparc/SparcFrameLowering.cpp
 create mode 100644 lib/Target/Sparc/SparcFrameLowering.h
 delete mode 100644 lib/Target/SystemZ/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/SystemZ/AsmPrinter/Makefile
 delete mode 100644 lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
 create mode 100644 lib/Target/SystemZ/SystemZAsmPrinter.cpp
 create mode 100644 lib/Target/SystemZ/SystemZFrameLowering.cpp
 create mode 100644 lib/Target/SystemZ/SystemZFrameLowering.h
 create mode 100644 lib/Target/TargetAsmInfo.cpp
 delete mode 100644 lib/Target/TargetFrameInfo.cpp
 create mode 100644 lib/Target/TargetFrameLowering.cpp
 create mode 100644 lib/Target/TargetLibraryInfo.cpp
 delete mode 100644 lib/Target/X86/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/X86/AsmPrinter/Makefile
 delete mode 100644 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
 delete mode 100644 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
 delete mode 100644 lib/Target/X86/AsmPrinter/X86InstComments.cpp
 delete mode 100644 lib/Target/X86/AsmPrinter/X86InstComments.h
 delete mode 100644 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
 delete mode 100644 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
 create mode 100644 lib/Target/X86/InstPrinter/CMakeLists.txt
 create mode 100644 lib/Target/X86/InstPrinter/Makefile
 create mode 100644 lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
 create mode 100644 lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
 create mode 100644 lib/Target/X86/InstPrinter/X86InstComments.cpp
 create mode 100644 lib/Target/X86/InstPrinter/X86InstComments.h
 create mode 100644 lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
 create mode 100644 lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
 create mode 100644 lib/Target/X86/Utils/CMakeLists.txt
 create mode 100644 lib/Target/X86/Utils/Makefile
 create mode 100644 lib/Target/X86/Utils/X86ShuffleDecode.cpp
 create mode 100644 lib/Target/X86/Utils/X86ShuffleDecode.h
 create mode 100644 lib/Target/X86/X86FrameLowering.cpp
 create mode 100644 lib/Target/X86/X86FrameLowering.h
 create mode 100644 lib/Target/X86/X86Instr3DNow.td
 delete mode 100644 lib/Target/X86/X86Instr64bit.td
 create mode 100644 lib/Target/X86/X86InstrArithmetic.td
 create mode 100644 lib/Target/X86/X86InstrCMovSetCC.td
 create mode 100644 lib/Target/X86/X86InstrCompiler.td
 create mode 100644 lib/Target/X86/X86InstrControl.td
 create mode 100644 lib/Target/X86/X86InstrExtension.td
 create mode 100644 lib/Target/X86/X86InstrShiftRotate.td
 create mode 100644 lib/Target/X86/X86InstrSystem.td
 create mode 100644 lib/Target/X86/X86InstrVMX.td
 create mode 100644 lib/Target/X86/X86MachObjectWriter.cpp
 delete mode 100644 lib/Target/X86/X86ShuffleDecode.h
 delete mode 100644 lib/Target/XCore/AsmPrinter/CMakeLists.txt
 delete mode 100644 lib/Target/XCore/AsmPrinter/Makefile
 delete mode 100644 lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
 create mode 100644 lib/Target/XCore/XCoreAsmPrinter.cpp
 delete mode 100644 lib/Target/XCore/XCoreFrameInfo.cpp
 delete mode 100644 lib/Target/XCore/XCoreFrameInfo.h
 create mode 100644 lib/Target/XCore/XCoreFrameLowering.cpp
 create mode 100644 lib/Target/XCore/XCoreFrameLowering.h
 create mode 100644 lib/Transforms/CMakeLists.txt
 delete mode 100644 lib/Transforms/IPO/PartialSpecialization.cpp
 create mode 100644 lib/Transforms/Instrumentation/Instrumentation.cpp
 create mode 100644 lib/Transforms/Instrumentation/PathProfiling.cpp
 create mode 100644 lib/Transforms/Scalar/EarlyCSE.cpp
 create mode 100644 lib/Transforms/Scalar/LoopIdiomRecognize.cpp
 delete mode 100644 lib/Transforms/Scalar/LoopIndexSplit.cpp
 create mode 100644 lib/Transforms/Scalar/LoopInstSimplify.cpp
 create mode 100644 lib/Transforms/Utils/SimplifyInstructions.cpp
 create mode 100644 lib/Transforms/Utils/Utils.cpp
 create mode 100644 lib/VMCore/User.cpp
 create mode 100644 runtime/libprofile/PathProfiling.c
 create mode 100644 test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
 create mode 100644 test/Analysis/BasicAA/full-store-partial-alias.ll
 delete mode 100644 test/Analysis/PointerTracking/dg.exp
 delete mode 100644 test/Analysis/PointerTracking/sizes.ll
 create mode 100644 test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
 create mode 100644 test/Analysis/ScalarEvolution/fold.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/dg.exp
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/dse.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/licm.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/precedence.ll
 create mode 100644 test/Analysis/TypeBasedAliasAnalysis/sink.ll
 delete mode 100644 test/Archive/GNU.toc
 delete mode 100644 test/Archive/MacOSX.toc
 delete mode 100644 test/Archive/SVR4.toc
 delete mode 100644 test/Archive/xpg4.toc
 create mode 100644 test/Assembler/AutoUpgradeMMXIntrinsics.ll
 create mode 100644 test/Assembler/extractvalue-invalid-idx.ll
 create mode 100644 test/Assembler/insertvalue-invalid-idx.ll
 create mode 100644 test/Assembler/unnamed-addr.ll
 create mode 100644 test/Assembler/x86mmx.ll
 create mode 100644 test/Bindings/Ocaml/ext_exc.ml
 create mode 100644 test/Bitcode/null-type.ll
 create mode 100644 test/Bitcode/null-type.ll.bc
 delete mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
 delete mode 100644 test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
 delete mode 100644 test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
 delete mode 100644 test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll
 create mode 100644 test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
 create mode 100644 test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
 create mode 100644 test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
 create mode 100644 test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
 create mode 100644 test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
 create mode 100644 test/CodeGen/ARM/2010-11-29-PrologueBug.ll
 create mode 100644 test/CodeGen/ARM/2010-11-30-reloc-movt.ll
 create mode 100644 test/CodeGen/ARM/2010-12-07-PEIBug.ll
 create mode 100644 test/CodeGen/ARM/2010-12-08-tpsoft.ll
 create mode 100644 test/CodeGen/ARM/2010-12-13-reloc-pic.ll
 create mode 100644 test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
 create mode 100644 test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
 create mode 100644 test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
 create mode 100644 test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
 create mode 100644 test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
 create mode 100644 test/CodeGen/ARM/arm-and-tst-peephole.ll
 create mode 100644 test/CodeGen/ARM/atomic-cmp.ll
 create mode 100644 test/CodeGen/ARM/bswap-inline-asm.ll
 create mode 100644 test/CodeGen/ARM/crash.ll
 create mode 100644 test/CodeGen/ARM/fast-isel-crash.ll
 create mode 100644 test/CodeGen/ARM/fast-isel-static.ll
 create mode 100644 test/CodeGen/ARM/global-merge.ll
 create mode 100644 test/CodeGen/ARM/ifcvt10.ll
 create mode 100644 test/CodeGen/ARM/ifcvt11.ll
 create mode 100644 test/CodeGen/ARM/ldst-f32-2-i32.ll
 create mode 100644 test/CodeGen/ARM/load-global.ll
 create mode 100644 test/CodeGen/ARM/machine-licm.ll
 create mode 100644 test/CodeGen/ARM/mult-alt-generic-arm.ll
 create mode 100644 test/CodeGen/ARM/neon_div.ll
 create mode 100644 test/CodeGen/ARM/phi.ll
 create mode 100644 test/CodeGen/ARM/prefetch.ll
 delete mode 100644 test/CodeGen/ARM/remat.ll
 create mode 100644 test/CodeGen/ARM/thumb1-varalloc.ll
 create mode 100644 test/CodeGen/ARM/umulo-32.ll
 create mode 100644 test/CodeGen/ARM/vector-DAGCombine.ll
 create mode 100644 test/CodeGen/ARM/vlddup.ll
 create mode 100644 test/CodeGen/CellSPU/div_ops.ll
 create mode 100644 test/CodeGen/Generic/2010-11-04-BigByval.ll
 create mode 100644 test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
 create mode 100644 test/CodeGen/Generic/2011-02-12-shuffle.ll
 create mode 100644 test/CodeGen/Generic/overflow.ll
 create mode 100644 test/CodeGen/MBlaze/intr.ll
 create mode 100644 test/CodeGen/MBlaze/svol.ll
 create mode 100644 test/CodeGen/MSP430/mult-alt-generic-msp430.ll
 create mode 100644 test/CodeGen/Mips/2010-11-09-CountLeading.ll
 create mode 100644 test/CodeGen/Mips/2010-11-09-Mul.ll
 create mode 100755 test/CodeGen/Mips/cmov.ll
 create mode 100644 test/CodeGen/Mips/madd-msub.ll
 create mode 100644 test/CodeGen/Mips/o32_cc.ll
 create mode 100644 test/CodeGen/Mips/rotate.ll
 delete mode 100644 test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
 delete mode 100644 test/CodeGen/PIC16/2009-11-20-NewNode.ll
 delete mode 100644 test/CodeGen/PIC16/C16-11.ll
 delete mode 100644 test/CodeGen/PIC16/C16-15.ll
 delete mode 100644 test/CodeGen/PIC16/C16-49.ll
 delete mode 100644 test/CodeGen/PIC16/check_inc_files.ll
 delete mode 100644 test/CodeGen/PIC16/dg.exp
 delete mode 100644 test/CodeGen/PIC16/global-in-user-section.ll
 delete mode 100644 test/CodeGen/PIC16/globals.ll
 delete mode 100644 test/CodeGen/PIC16/result_direction.ll
 delete mode 100644 test/CodeGen/PIC16/sext.ll
 delete mode 100644 test/CodeGen/PIC16/test_indf_name.ll
 create mode 100644 test/CodeGen/PTX/add.ll
 create mode 100644 test/CodeGen/PTX/dg.exp
 create mode 100644 test/CodeGen/PTX/exit.ll
 create mode 100644 test/CodeGen/PTX/ld.ll
 create mode 100644 test/CodeGen/PTX/mov.ll
 create mode 100644 test/CodeGen/PTX/options.ll
 create mode 100644 test/CodeGen/PTX/ret.ll
 create mode 100644 test/CodeGen/PTX/shl.ll
 create mode 100644 test/CodeGen/PTX/shr.ll
 create mode 100644 test/CodeGen/PTX/st.ll
 create mode 100644 test/CodeGen/PTX/sub.ll
 create mode 100644 test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
 create mode 100644 test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
 create mode 100644 test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
 create mode 100644 test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
 delete mode 100644 test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
 create mode 100644 test/CodeGen/PowerPC/varargs.ll
 create mode 100755 test/CodeGen/SPARC/2011-01-11-CC.ll
 create mode 100644 test/CodeGen/SPARC/2011-01-11-Call.ll
 create mode 100644 test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
 create mode 100644 test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
 create mode 100644 test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
 create mode 100644 test/CodeGen/SPARC/2011-01-22-SRet.ll
 create mode 100644 test/CodeGen/SPARC/mult-alt-generic-sparc.ll
 delete mode 100644 test/CodeGen/SPARC/xnor.ll
 create mode 100644 test/CodeGen/Thumb/2011-EpilogueBug.ll
 delete mode 100644 test/CodeGen/Thumb/machine-licm.ll
 create mode 100644 test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
 create mode 100644 test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
 create mode 100644 test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
 create mode 100644 test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
 create mode 100644 test/CodeGen/Thumb2/buildvector-crash.ll
 delete mode 100644 test/CodeGen/Thumb2/load-global.ll
 delete mode 100644 test/CodeGen/Thumb2/machine-licm-vdup.ll
 delete mode 100644 test/CodeGen/Thumb2/thumb2-badreg-operands.ll
 delete mode 100644 test/CodeGen/X86/2007-10-16-fp80_select.ll
 delete mode 100644 test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
 delete mode 100644 test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
 create mode 100644 test/CodeGen/X86/2010-09-16-EmptyFilename.ll
 create mode 100644 test/CodeGen/X86/2010-09-16-asmcrash.ll
 create mode 100644 test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
 create mode 100644 test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
 create mode 100644 test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
 create mode 100644 test/CodeGen/X86/2010-11-02-DbgParameter.ll
 create mode 100644 test/CodeGen/X86/2010-11-09-MOVLPS.ll
 create mode 100644 test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
 create mode 100644 test/CodeGen/X86/2010-12-02-MC-Set.ll
 create mode 100644 test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
 create mode 100644 test/CodeGen/X86/2011-01-10-DagCombineHang.ll
 create mode 100644 test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
 create mode 100644 test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
 create mode 100644 test/CodeGen/X86/add-of-carry.ll
 create mode 100644 test/CodeGen/X86/alldiv-divdi3.ll
 create mode 100644 test/CodeGen/X86/andimm8.ll
 create mode 100644 test/CodeGen/X86/apm.ll
 create mode 100644 test/CodeGen/X86/bc-extract.ll
 create mode 100644 test/CodeGen/X86/bit-test-shift.ll
 delete mode 100644 test/CodeGen/X86/cmp-test.ll
 create mode 100644 test/CodeGen/X86/cmp.ll
 delete mode 100644 test/CodeGen/X86/cmp0.ll
 delete mode 100644 test/CodeGen/X86/cmp2.ll
 create mode 100644 test/CodeGen/X86/complex-asm.ll
 create mode 100644 test/CodeGen/X86/conditional-indecrement.ll
 delete mode 100644 test/CodeGen/X86/const-select.ll
 create mode 100644 test/CodeGen/X86/critical-edge-split-2.ll
 delete mode 100644 test/CodeGen/X86/critical-edge-split.ll
 create mode 100644 test/CodeGen/X86/ctpop-combine.ll
 create mode 100644 test/CodeGen/X86/dbg-live-in-location.ll
 create mode 100644 test/CodeGen/X86/dbg-merge-loc-entry.ll
 create mode 100644 test/CodeGen/X86/dbg-value-inlined-parameter.ll
 create mode 100644 test/CodeGen/X86/dbg-value-location.ll
 create mode 100644 test/CodeGen/X86/dbg-value-range.ll
 delete mode 100644 test/CodeGen/X86/div_const.ll
 create mode 100644 test/CodeGen/X86/divide-by-constant.ll
 create mode 100644 test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
 create mode 100644 test/CodeGen/X86/fltused.ll
 create mode 100644 test/CodeGen/X86/inline-asm-h.ll
 create mode 100644 test/CodeGen/X86/inline-asm-ptr-cast.ll
 create mode 100644 test/CodeGen/X86/legalize-sub-zero-2.ll
 create mode 100644 test/CodeGen/X86/legalize-sub-zero.ll
 delete mode 100644 test/CodeGen/X86/memmove-0.ll
 delete mode 100644 test/CodeGen/X86/memmove-1.ll
 delete mode 100644 test/CodeGen/X86/memmove-2.ll
 delete mode 100644 test/CodeGen/X86/memmove-3.ll
 create mode 100644 test/CodeGen/X86/misaligned-memset.ll
 create mode 100644 test/CodeGen/X86/mmx-builtins.ll
 create mode 100644 test/CodeGen/X86/mult-alt-generic-i686.ll
 create mode 100644 test/CodeGen/X86/mult-alt-generic-x86_64.ll
 create mode 100644 test/CodeGen/X86/mult-alt-x86.ll
 create mode 100644 test/CodeGen/X86/narrow-shl-load.ll
 create mode 100644 test/CodeGen/X86/non-globl-eh-frame.ll
 create mode 100644 test/CodeGen/X86/popcnt.ll
 create mode 100644 test/CodeGen/X86/pr9127.ll
 delete mode 100644 test/CodeGen/X86/select-aggregate.ll
 delete mode 100644 test/CodeGen/X86/select-zero-one.ll
 delete mode 100644 test/CodeGen/X86/sext-select.ll
 create mode 100644 test/CodeGen/X86/sibcall-5.ll
 delete mode 100644 test/CodeGen/X86/split-select.ll
 create mode 100644 test/CodeGen/X86/stdcall-notailcall.ll
 create mode 100644 test/CodeGen/X86/switch-or.ll
 create mode 100644 test/CodeGen/X86/tailcall-ri64.ll
 delete mode 100644 test/CodeGen/X86/tls-1.ll
 create mode 100644 test/CodeGen/X86/tlv-1.ll
 create mode 100644 test/CodeGen/X86/tlv-2.ll
 create mode 100644 test/CodeGen/X86/uint64-to-float.ll
 create mode 100644 test/CodeGen/X86/umulo-64.ll
 create mode 100644 test/CodeGen/X86/vec-sign.ll
 delete mode 100644 test/CodeGen/X86/vec_select.ll
 create mode 100644 test/CodeGen/X86/visibility.ll
 delete mode 100644 test/CodeGen/X86/widen_select-1.ll
 create mode 100644 test/CodeGen/X86/win64_params.ll
 create mode 100644 test/CodeGen/X86/win64_vararg.ll
 create mode 100644 test/CodeGen/X86/x86-64-extend-shift.ll
 create mode 100644 test/CodeGen/X86/x86_64-mul-by-const.ll
 create mode 100644 test/CodeGen/X86/zext-extract_subreg.ll
 create mode 100644 test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
 create mode 100644 test/CodeGen/XCore/resources.ll
 create mode 100644 test/CodeGen/XCore/trampoline.ll
 create mode 100644 test/DebugInfo/2010-10-01-crash.ll
 create mode 100644 test/FrontendAda/Support/real_cst.ads
 create mode 100644 test/FrontendAda/real_cst.adb
 delete mode 100644 test/FrontendC++/2003-08-20-ExceptionFail.cpp
 delete mode 100644 test/FrontendC++/2003-08-21-EmptyClass.cpp
 delete mode 100644 test/FrontendC++/2003-08-24-Cleanup.cpp
 delete mode 100644 test/FrontendC++/2003-08-27-TypeNamespaces.cpp
 delete mode 100644 test/FrontendC++/2003-08-28-ForwardType.cpp
 delete mode 100644 test/FrontendC++/2003-08-28-SaveExprBug.cpp
 delete mode 100644 test/FrontendC++/2003-08-29-ArgPassingBug.cpp
 delete mode 100644 test/FrontendC++/2003-08-31-StructLayout.cpp
 delete mode 100644 test/FrontendC++/2003-09-22-CompositeExprValue.cpp
 delete mode 100644 test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp
 delete mode 100644 test/FrontendC++/2003-09-30-CommaExprBug.cpp
 delete mode 100644 test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp
 delete mode 100644 test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp
 delete mode 100644 test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp
 delete mode 100644 test/FrontendC++/2003-10-17-BoolBitfields.cpp
 delete mode 100644 test/FrontendC++/2003-10-21-InnerClass.cpp
 delete mode 100644 test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp
 delete mode 100644 test/FrontendC++/2003-11-04-ArrayConstructors.cpp
 delete mode 100644 test/FrontendC++/2003-11-04-CatchLabelName.cpp
 delete mode 100644 test/FrontendC++/2003-11-08-ArrayAddress.cpp
 delete mode 100644 test/FrontendC++/2003-11-18-EnumArray.cpp
 delete mode 100644 test/FrontendC++/2006-11-30-NoCompileUnit.cpp
 create mode 100644 test/FrontendC/2010-11-16-asmblock.c
 create mode 100644 test/FrontendC/2010-12-01-CommonGlobal.c
 create mode 100644 test/FrontendC/arrayderef.c
 create mode 100644 test/LLVMC/C++/just-compile.cpp
 create mode 100644 test/LLVMC/C++/unknown_suffix.unk
 create mode 100644 test/LLVMC/C/emit-llvm-opt.c
 create mode 100644 test/LLVMC/MultipleOutputLanguages.td
 create mode 100644 test/Linker/PR8300.ll
 create mode 100644 test/Linker/available_externally_a.ll
 create mode 100644 test/Linker/available_externally_b.ll
 create mode 100644 test/Linker/unnamed-addr1-a.ll
 create mode 100644 test/Linker/unnamed-addr1-b.ll
 create mode 100644 test/MC/ARM/arm_fixups.s
 create mode 100644 test/MC/ARM/arm_instructions.s
 create mode 100644 test/MC/ARM/arm_word_directive.s
 create mode 100644 test/MC/ARM/dg.exp
 create mode 100644 test/MC/ARM/elf-eflags-eabi.s
 create mode 100644 test/MC/ARM/elf-movt.s
 create mode 100644 test/MC/ARM/elf-reloc-01.ll
 create mode 100644 test/MC/ARM/elf-reloc-02.ll
 create mode 100644 test/MC/ARM/elf-reloc-03.ll
 create mode 100644 test/MC/ARM/hilo-16bit-relocations.s
 create mode 100644 test/MC/ARM/neon-abs-encoding.s
 create mode 100644 test/MC/ARM/neon-absdiff-encoding.s
 create mode 100644 test/MC/ARM/neon-add-encoding.s
 create mode 100644 test/MC/ARM/neon-bitcount-encoding.s
 create mode 100644 test/MC/ARM/neon-bitwise-encoding.s
 create mode 100644 test/MC/ARM/neon-cmp-encoding.s
 create mode 100644 test/MC/ARM/neon-convert-encoding.s
 create mode 100644 test/MC/ARM/neon-dup-encoding.s
 create mode 100644 test/MC/ARM/neon-minmax-encoding.s
 create mode 100644 test/MC/ARM/neon-mov-encoding.s
 create mode 100644 test/MC/ARM/neon-mul-accum-encoding.s
 create mode 100644 test/MC/ARM/neon-mul-encoding.s
 create mode 100644 test/MC/ARM/neon-neg-encoding.s
 create mode 100644 test/MC/ARM/neon-pairwise-encoding.s
 create mode 100644 test/MC/ARM/neon-reciprocal-encoding.s
 create mode 100644 test/MC/ARM/neon-reverse-encoding.s
 create mode 100644 test/MC/ARM/neon-satshift-encoding.s
 create mode 100644 test/MC/ARM/neon-shift-encoding.s
 create mode 100644 test/MC/ARM/neon-shiftaccum-encoding.s
 create mode 100644 test/MC/ARM/neon-shuffle-encoding.s
 create mode 100644 test/MC/ARM/neon-sub-encoding.s
 create mode 100644 test/MC/ARM/neon-table-encoding.s
 create mode 100644 test/MC/ARM/neon-vld-encoding.s
 create mode 100644 test/MC/ARM/neon-vst-encoding.s
 create mode 100644 test/MC/ARM/neont2-abs-encoding.s
 create mode 100644 test/MC/ARM/neont2-absdiff-encoding.s
 create mode 100644 test/MC/ARM/neont2-add-encoding.s
 create mode 100644 test/MC/ARM/neont2-bitcount-encoding.s
 create mode 100644 test/MC/ARM/neont2-bitwise-encoding.s
 create mode 100644 test/MC/ARM/neont2-cmp-encoding.s
 create mode 100644 test/MC/ARM/neont2-convert-encoding.s
 create mode 100644 test/MC/ARM/neont2-dup-encoding.s
 create mode 100644 test/MC/ARM/neont2-minmax-encoding.s
 create mode 100644 test/MC/ARM/neont2-mov-encoding.s
 create mode 100644 test/MC/ARM/neont2-mul-accum-encoding.s
 create mode 100644 test/MC/ARM/neont2-mul-encoding.s
 create mode 100644 test/MC/ARM/neont2-neg-encoding.s
 create mode 100644 test/MC/ARM/neont2-pairwise-encoding.s
 create mode 100644 test/MC/ARM/neont2-reciprocal-encoding.s
 create mode 100644 test/MC/ARM/neont2-reverse-encoding.s
 create mode 100644 test/MC/ARM/neont2-satshift-encoding.s
 create mode 100644 test/MC/ARM/neont2-shift-encoding.s
 create mode 100644 test/MC/ARM/neont2-shiftaccum-encoding.s
 create mode 100644 test/MC/ARM/neont2-shuffle-encoding.s
 create mode 100644 test/MC/ARM/neont2-sub-encoding.s
 create mode 100644 test/MC/ARM/neont2-table-encoding.s
 create mode 100644 test/MC/ARM/neont2-vld-encoding.s
 create mode 100644 test/MC/ARM/neont2-vst-encoding.s
 create mode 100644 test/MC/ARM/prefetch.ll
 create mode 100644 test/MC/ARM/reg-list.s
 create mode 100644 test/MC/ARM/simple-encoding.ll
 create mode 100644 test/MC/ARM/simple-fp-encoding.s
 create mode 100644 test/MC/ARM/thumb.s
 create mode 100644 test/MC/ARM/thumb2.s
 create mode 100644 test/MC/ARM/thumb2_instructions.s
 delete mode 100644 test/MC/AsmParser/ARM/arm_instructions.s
 delete mode 100644 test/MC/AsmParser/ARM/arm_word_directive.s
 delete mode 100644 test/MC/AsmParser/ARM/dg.exp
 delete mode 100644 test/MC/AsmParser/ELF/dg.exp
 delete mode 100644 test/MC/AsmParser/ELF/directive_previous.s
 delete mode 100644 test/MC/AsmParser/ELF/directive_section.s
 delete mode 100644 test/MC/AsmParser/X86/dg.exp
 delete mode 100644 test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-avx-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-bit.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-bit_cat.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-fma3-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-mismatched-add.s
 delete mode 100644 test/MC/AsmParser/X86/x86_32-new-encoder.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-avx-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-fma3-encoding.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-imm-widths.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-incl_decl.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-new-encoder.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-operands.s
 delete mode 100644 test/MC/AsmParser/X86/x86_64-suffix-matching.s
 delete mode 100644 test/MC/AsmParser/X86/x86_instruction_errors.s
 delete mode 100644 test/MC/AsmParser/X86/x86_instructions.s
 delete mode 100644 test/MC/AsmParser/X86/x86_operands.s
 delete mode 100644 test/MC/AsmParser/X86/x86_word_directive.s
 create mode 100644 test/MC/AsmParser/dash-n.s
 create mode 100644 test/MC/AsmParser/equ.s
 create mode 100644 test/MC/AsmParser/expr_symbol_modifiers.s
 create mode 100644 test/MC/AsmParser/floating-literals.s
 create mode 100644 test/MC/AsmParser/full_line_comment.s
 create mode 100644 test/MC/AsmParser/ifdef.s
 create mode 100644 test/MC/AsmParser/ifndef.s
 create mode 100644 test/MC/AsmParser/paren.s
 create mode 100644 test/MC/AsmParser/rename.s
 create mode 100644 test/MC/AsmParser/section.s
 create mode 100644 test/MC/COFF/align-nops.s
 delete mode 100644 test/MC/COFF/basic-coff.ll
 create mode 100644 test/MC/COFF/basic-coff.s
 create mode 100644 test/MC/COFF/bss.s
 create mode 100644 test/MC/COFF/module-asm.ll
 create mode 100644 test/MC/COFF/simple-fixups.s
 create mode 100644 test/MC/COFF/symbol-alias.s
 delete mode 100644 test/MC/COFF/symbol-fragment-offset.ll
 create mode 100644 test/MC/COFF/symbol-fragment-offset.s
 create mode 100644 test/MC/COFF/weak.s
 create mode 100644 test/MC/Disassembler/ARM/arm-tests.txt
 create mode 100644 test/MC/Disassembler/ARM/dg.exp
 create mode 100644 test/MC/Disassembler/ARM/neon-tests.txt
 create mode 100644 test/MC/Disassembler/ARM/thumb-tests.txt
 create mode 100644 test/MC/Disassembler/MBlaze/dg.exp
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_branch.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_fpu.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_fsl.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_imm.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_memory.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_operands.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_pattern.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_shift.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_special.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_typea.txt
 create mode 100644 test/MC/Disassembler/MBlaze/mblaze_typeb.txt
 create mode 100644 test/MC/Disassembler/X86/dg.exp
 create mode 100644 test/MC/Disassembler/X86/simple-tests.txt
 create mode 100644 test/MC/Disassembler/X86/truncated-input.txt
 delete mode 100644 test/MC/Disassembler/arm-tests.txt
 delete mode 100644 test/MC/Disassembler/dg.exp
 delete mode 100644 test/MC/Disassembler/neon-tests.txt
 delete mode 100644 test/MC/Disassembler/simple-tests.txt
 delete mode 100644 test/MC/Disassembler/thumb-tests.txt
 create mode 100644 test/MC/ELF/abs.s
 create mode 100644 test/MC/ELF/alias-reloc.s
 create mode 100644 test/MC/ELF/alias.s
 create mode 100644 test/MC/ELF/align-bss.s
 create mode 100644 test/MC/ELF/align-nops.s
 create mode 100644 test/MC/ELF/align-size.s
 create mode 100644 test/MC/ELF/align-text.s
 create mode 100644 test/MC/ELF/align.s
 create mode 100644 test/MC/ELF/bad-section.s
 create mode 100644 test/MC/ELF/basic-elf-32.s
 create mode 100644 test/MC/ELF/basic-elf-64.s
 create mode 100644 test/MC/ELF/call-abs.s
 create mode 100644 test/MC/ELF/cfi-advance-loc2.s
 create mode 100644 test/MC/ELF/cfi-def-cfa-offset.s
 create mode 100644 test/MC/ELF/cfi-def-cfa-register.s
 create mode 100644 test/MC/ELF/cfi-def-cfa.s
 create mode 100644 test/MC/ELF/cfi-offset.s
 create mode 100644 test/MC/ELF/cfi-remember.s
 create mode 100644 test/MC/ELF/cfi-zero-addr-delta.s
 create mode 100644 test/MC/ELF/cfi.s
 create mode 100644 test/MC/ELF/comdat.s
 create mode 100644 test/MC/ELF/common.s
 create mode 100644 test/MC/ELF/common2.s
 create mode 100644 test/MC/ELF/debug-line.s
 create mode 100644 test/MC/ELF/debug-loc.s
 create mode 100644 test/MC/ELF/diff.s
 create mode 100644 test/MC/ELF/diff2.s
 create mode 100644 test/MC/ELF/elf_directive_previous.s
 create mode 100644 test/MC/ELF/elf_directive_section.s
 create mode 100644 test/MC/ELF/empty-dwarf-lines.s
 create mode 100644 test/MC/ELF/empty.s
 create mode 100644 test/MC/ELF/entsize.ll
 create mode 100644 test/MC/ELF/entsize.s
 create mode 100644 test/MC/ELF/file.s
 create mode 100644 test/MC/ELF/global-offset.s
 create mode 100644 test/MC/ELF/got.s
 create mode 100644 test/MC/ELF/ident.s
 create mode 100644 test/MC/ELF/invalid-symver.s
 create mode 100644 test/MC/ELF/leb128.s
 create mode 100644 test/MC/ELF/local-reloc.s
 create mode 100644 test/MC/ELF/merge.s
 create mode 100644 test/MC/ELF/n_bytes.s
 create mode 100644 test/MC/ELF/no-fixup.s
 create mode 100644 test/MC/ELF/noexec.s
 create mode 100644 test/MC/ELF/norelocation.s
 create mode 100644 test/MC/ELF/pic-diff.s
 create mode 100644 test/MC/ELF/plt.s
 create mode 100644 test/MC/ELF/relax-arith.s
 create mode 100644 test/MC/ELF/relax-crash.s
 create mode 100644 test/MC/ELF/relax.s
 create mode 100644 test/MC/ELF/relocation-386.s
 create mode 100644 test/MC/ELF/relocation.s
 create mode 100644 test/MC/ELF/rename.s
 create mode 100644 test/MC/ELF/section.s
 create mode 100644 test/MC/ELF/set.s
 create mode 100644 test/MC/ELF/sleb.s
 create mode 100644 test/MC/ELF/symref.s
 create mode 100644 test/MC/ELF/tls-i386.s
 create mode 100644 test/MC/ELF/tls.s
 create mode 100644 test/MC/ELF/type.s
 create mode 100644 test/MC/ELF/uleb.s
 create mode 100644 test/MC/ELF/undef.s
 create mode 100644 test/MC/ELF/undef2.s
 create mode 100644 test/MC/ELF/weak.s
 create mode 100644 test/MC/ELF/weakref-plt.s
 create mode 100644 test/MC/ELF/weakref-reloc.s
 create mode 100644 test/MC/ELF/weakref.s
 create mode 100644 test/MC/ELF/zero.s
 create mode 100644 test/MC/MBlaze/dg.exp
 create mode 100644 test/MC/MBlaze/mblaze_branch.s
 create mode 100644 test/MC/MBlaze/mblaze_fpu.s
 create mode 100644 test/MC/MBlaze/mblaze_fsl.s
 create mode 100644 test/MC/MBlaze/mblaze_imm.s
 create mode 100644 test/MC/MBlaze/mblaze_memory.s
 create mode 100644 test/MC/MBlaze/mblaze_operands.s
 create mode 100644 test/MC/MBlaze/mblaze_pattern.s
 create mode 100644 test/MC/MBlaze/mblaze_shift.s
 create mode 100644 test/MC/MBlaze/mblaze_special.s
 create mode 100644 test/MC/MBlaze/mblaze_typea.s
 create mode 100644 test/MC/MBlaze/mblaze_typeb.s
 create mode 100644 test/MC/MachO/darwin-ARM-reloc.s
 create mode 100644 test/MC/MachO/darwin-Thumb-reloc.s
 create mode 100644 test/MC/MachO/darwin-complex-difference.s
 create mode 100644 test/MC/MachO/diff-with-two-sections.s
 create mode 100644 test/MC/MachO/empty-dwarf-lines.s
 create mode 100644 test/MC/MachO/loc.s
 create mode 100644 test/MC/MachO/pcrel-to-other-section.s
 create mode 100644 test/MC/MachO/symbol-diff.s
 create mode 100644 test/MC/MachO/weakdef.s
 create mode 100644 test/MC/X86/3DNow.s
 create mode 100644 test/MC/X86/dg.exp
 create mode 100644 test/MC/X86/x86-32-avx.s
 create mode 100644 test/MC/X86/x86-32-coverage.s
 create mode 100644 test/MC/X86/x86-32-fma3.s
 create mode 100644 test/MC/X86/x86-32.s
 create mode 100644 test/MC/X86/x86-64.s
 create mode 100644 test/MC/X86/x86_64-avx-clmul-encoding.s
 create mode 100644 test/MC/X86/x86_64-avx-encoding.s
 create mode 100644 test/MC/X86/x86_64-encoding.s
 create mode 100644 test/MC/X86/x86_64-fma3-encoding.s
 create mode 100644 test/MC/X86/x86_64-imm-widths.s
 create mode 100644 test/MC/X86/x86_directives.s
 create mode 100644 test/MC/X86/x86_errors.s
 create mode 100644 test/MC/X86/x86_operands.s
 create mode 100644 test/Object/TestObjectFiles/trivial-object-test.coff-i386
 create mode 100644 test/Object/TestObjectFiles/trivial-object-test.coff-x86-64
 create mode 100644 test/Object/TestObjectFiles/trivial-object-test.elf-i386
 create mode 100644 test/Object/TestObjectFiles/trivial-object-test.elf-x86-64
 create mode 100644 test/Object/TestObjectFiles/trivial-object-test.macho-i386
 create mode 100644 test/Object/TestObjectFiles/trivial-object-test.macho-x86-64
 create mode 100644 test/Object/dg.exp
 create mode 100644 test/Object/nm-trivial-object.test-broken
 create mode 100644 test/Object/objdump-trivial-object.test-broken
 delete mode 100644 test/Other/2008-08-14-PassManager.ll
 create mode 100644 test/Other/extract.ll
 create mode 100644 test/Scripts/common_dump.py
 create mode 100755 test/Scripts/elf-dump
 create mode 100644 test/Scripts/elf-dump.bat
 delete mode 100755 test/Scripts/macho-dump
 delete mode 100644 test/Scripts/macho-dump.bat
 create mode 100755 test/Scripts/macho-dumpx
 create mode 100644 test/Scripts/macho-dumpx.bat
 create mode 100644 test/TableGen/Dag.td
 delete mode 100644 test/TableGen/DagDefSubst.td
 delete mode 100644 test/TableGen/DagIntSubst.td
 delete mode 100644 test/TableGen/nameconcat.td
 create mode 100644 test/Transforms/CodeGenPrepare/basic.ll
 delete mode 100644 test/Transforms/ConstProp/bitcast2.ll
 create mode 100644 test/Transforms/ConstProp/extractvalue.ll
 create mode 100644 test/Transforms/ConstProp/insertvalue.ll
 delete mode 100644 test/Transforms/ConstProp/nottest.ll
 create mode 100644 test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
 create mode 100644 test/Transforms/ConstantMerge/merge-both.ll
 create mode 100644 test/Transforms/ConstantMerge/unnamed-addr.ll
 create mode 100644 test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
 create mode 100644 test/Transforms/CorrelatedValuePropagation/crash.ll
 create mode 100644 test/Transforms/CorrelatedValuePropagation/non-null.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/alloca.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/byval.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/context-sensitive.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/memcpy.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/partial-overwrite.ll
 delete mode 100644 test/Transforms/DeadStoreElimination/volatile-load.ll
 create mode 100644 test/Transforms/EarlyCSE/basic.ll
 create mode 100644 test/Transforms/EarlyCSE/dg.exp
 create mode 100644 test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
 create mode 100644 test/Transforms/GVN/2010-11-13-Simplify.ll
 create mode 100644 test/Transforms/GVN/load-pre-licm.ll
 create mode 100644 test/Transforms/GVN/non-local-offset.ll
 create mode 100644 test/Transforms/GVN/phi-translate.ll
 create mode 100644 test/Transforms/GVN/preserve-tbaa.ll
 create mode 100644 test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
 create mode 100644 test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
 create mode 100644 test/Transforms/GlobalOpt/unnamed-addr.ll
 delete mode 100644 test/Transforms/IndVarSimplify/loop-invariant-step.ll
 delete mode 100644 test/Transforms/Inline/byval2.ll
 create mode 100644 test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
 create mode 100644 test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
 create mode 100644 test/Transforms/InstCombine/2010-11-23-Distributed.ll
 create mode 100644 test/Transforms/InstCombine/2011-02-14-InfLoop.ll
 create mode 100644 test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
 create mode 100644 test/Transforms/InstCombine/bitcast-store.ll
 create mode 100644 test/Transforms/InstCombine/bitcast-vec-uniform.ll
 delete mode 100644 test/Transforms/InstCombine/div-cmp-overflow.ll
 delete mode 100644 test/Transforms/InstCombine/exact-sdiv.ll
 create mode 100644 test/Transforms/InstCombine/exact.ll
 create mode 100644 test/Transforms/InstCombine/fold-calls.ll
 create mode 100644 test/Transforms/InstCombine/fold-vector-select.ll
 create mode 100644 test/Transforms/InstCombine/memset2.ll
 create mode 100644 test/Transforms/InstCombine/neon-intrinsics.ll
 create mode 100644 test/Transforms/InstCombine/overflow.ll
 create mode 100644 test/Transforms/InstCombine/pr8547.ll
 create mode 100644 test/Transforms/InstCombine/select-crash.ll
 delete mode 100644 test/Transforms/InstCombine/vec_demanded_elts-2.ll
 delete mode 100644 test/Transforms/InstCombine/vec_demanded_elts-3.ll
 create mode 100644 test/Transforms/InstCombine/vec_sext.ll
 create mode 100644 test/Transforms/InstSimplify/2010-12-20-Boolean.ll
 create mode 100644 test/Transforms/InstSimplify/2010-12-20-Distribute.ll
 create mode 100644 test/Transforms/InstSimplify/2011-01-14-Thread.ll
 create mode 100644 test/Transforms/InstSimplify/2011-02-01-Vector.ll
 create mode 100644 test/Transforms/InstSimplify/compare.ll
 create mode 100644 test/Transforms/InstSimplify/dg.exp
 create mode 100644 test/Transforms/InstSimplify/exact-nsw-nuw.ll
 create mode 100644 test/Transforms/InstSimplify/fdiv.ll
 create mode 100644 test/Transforms/InstSimplify/reassociate.ll
 create mode 100644 test/Transforms/JumpThreading/degenerate-phi.ll
 create mode 100644 test/Transforms/JumpThreading/indirectbr.ll
 create mode 100644 test/Transforms/JumpThreading/select.ll
 delete mode 100644 test/Transforms/LICM/2009-03-25-AliasSetTracker.ll
 create mode 100644 test/Transforms/LoopIdiom/basic.ll
 create mode 100644 test/Transforms/LoopIdiom/dg.exp
 delete mode 100644 test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/2009-03-30-undef.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/PR3913.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/PR4174-2.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/PR4174.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll
 delete mode 100644 test/Transforms/LoopIndexSplit/dg.exp
 delete mode 100644 test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll
 delete mode 100644 test/Transforms/LoopRotate/LRCrash-1.ll
 delete mode 100644 test/Transforms/LoopRotate/LRCrash-2.ll
 delete mode 100644 test/Transforms/LoopRotate/LRCrash-3.ll
 delete mode 100644 test/Transforms/LoopRotate/LRCrash-4.ll
 delete mode 100644 test/Transforms/LoopRotate/LRCrash-5.ll
 create mode 100644 test/Transforms/LoopRotate/basic.ll
 create mode 100644 test/Transforms/LoopRotate/crash.ll
 create mode 100644 test/Transforms/LoopRotate/dbgvalue.ll
 create mode 100644 test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
 create mode 100644 test/Transforms/LoopUnroll/basic.ll
 create mode 100644 test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
 delete mode 100644 test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
 delete mode 100644 test/Transforms/MemCpyOpt/form-memset2.ll
 create mode 100644 test/Transforms/MemCpyOpt/loadstore-sret.ll
 create mode 100644 test/Transforms/MemCpyOpt/memcpy-to-memset.ll
 create mode 100644 test/Transforms/MemCpyOpt/smaller.ll
 create mode 100644 test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
 create mode 100644 test/Transforms/MergeFunc/vector.ll
 delete mode 100644 test/Transforms/PartialSpecialize/dg.exp
 delete mode 100644 test/Transforms/PartialSpecialize/two-specializations.ll
 create mode 100644 test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
 create mode 100644 test/Transforms/Reassociate/optional-flags.ll
 delete mode 100644 test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll
 delete mode 100644 test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll
 delete mode 100644 test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll
 delete mode 100644 test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll
 delete mode 100644 test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll
 delete mode 100644 test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll
 delete mode 100644 test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll
 delete mode 100644 test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll
 delete mode 100644 test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
 delete mode 100644 test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll
 delete mode 100644 test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll
 delete mode 100644 test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll
 delete mode 100644 test/Transforms/ScalarRepl/2009-08-16-VLA.ll
 create mode 100644 test/Transforms/ScalarRepl/crash.ll
 create mode 100644 test/Transforms/ScalarRepl/phi-select.ll
 create mode 100644 test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
 create mode 100644 test/Transforms/SimplifyCFG/speculate-with-offset.ll
 create mode 100644 test/Transforms/SimplifyCFG/switch-to-icmp.ll
 delete mode 100644 test/Transforms/SimplifyCFG/switch_formation.ll
 create mode 100644 test/Transforms/SimplifyLibCalls/FPuts.ll
 create mode 100644 test/Transforms/SimplifyLibCalls/StrPBrk.ll
 create mode 100644 test/Transforms/SimplifyLibCalls/StrRChr.ll
 create mode 100644 test/Transforms/SimplifyLibCalls/StrSpn.ll
 create mode 100644 test/Transforms/TailCallElim/dup_tail.ll
 create mode 100644 tools/llvm-objdump/CMakeLists.txt
 create mode 100644 tools/llvm-objdump/Makefile
 create mode 100644 tools/llvm-objdump/llvm-objdump.cpp
 create mode 100644 tools/macho-dump/CMakeLists.txt
 create mode 100644 tools/macho-dump/Makefile
 create mode 100644 tools/macho-dump/macho-dump.cpp
 create mode 100644 unittests/ADT/FoldingSet.cpp
 create mode 100644 unittests/ADT/IntEqClassesTest.cpp
 create mode 100644 unittests/ADT/IntervalMapTest.cpp
 delete mode 100644 unittests/ADT/ValueMapTest.cpp
 create mode 100644 unittests/CMakeLists.txt
 create mode 100644 unittests/ExecutionEngine/JIT/JITTests.def
 create mode 100644 unittests/Support/EndianTest.cpp
 create mode 100644 unittests/Support/Path.cpp
 create mode 100644 unittests/Support/SwapByteOrderTest.cpp
 delete mode 100644 unittests/Support/System.cpp
 create mode 100644 unittests/Support/TimeValue.cpp
 create mode 100644 unittests/Transforms/Utils/Local.cpp
 create mode 100644 unittests/VMCore/ValueMapTest.cpp
 create mode 100755 utils/CollectDebugInfoUsingLLDB.py
 create mode 100755 utils/CompareDebugInfo.py
 create mode 100755 utils/GetRepositoryPath
 create mode 100644 utils/KillTheDoctor/CMakeLists.txt
 create mode 100644 utils/KillTheDoctor/KillTheDoctor.cpp
 delete mode 100644 utils/OldenDataRecover.pl
 create mode 100644 utils/TableGen/ClangSACheckersEmitter.cpp
 create mode 100644 utils/TableGen/ClangSACheckersEmitter.h
 create mode 100644 utils/TableGen/FixedLenDecoderEmitter.cpp
 create mode 100644 utils/TableGen/FixedLenDecoderEmitter.h
 create mode 100644 utils/TableGen/StringMatcher.cpp
 create mode 100644 utils/TableGen/StringMatcher.h
 create mode 100644 utils/Target/ARM/analyze-match-table.py
 create mode 100644 utils/kate/README
 create mode 100644 utils/kate/llvm.xml
 delete mode 100755 utils/lit/lit/lit.py
 create mode 100755 utils/lit/lit/main.py
 create mode 100644 utils/llvm-lit/CMakeLists.txt
 create mode 100755 utils/release/test-release.sh
 create mode 100755 utils/test_debuginfo.pl
 create mode 100644 utils/unittest/CMakeLists.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000000..2e2713a48ae5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,37 @@
+#==============================================================================#
+# This file specifies intentionally untracked files that git should ignore.
+# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html
+#
+# This file is intentionally different from the output of `git svn show-ignore`,
+# as most of those are useless.
+#==============================================================================#
+
+#==============================================================================#
+# File extensions to be ignored anywhere in the tree.
+#==============================================================================#
+# Temp files created by most text editors.
+*~
+# Merge files created by git.
+*.orig
+# Byte compiled python modules.
+*.pyc
+
+#==============================================================================#
+# Explicit files to ignore (only matches one).
+#==============================================================================#
+.gitusers
+cscope.files
+cscope.out
+autoconf/aclocal.m4
+autoconf/autom4te.cache
+
+#==============================================================================#
+# Directories to ignore (do not add trailing '/'s, they skip symlinks).
+#==============================================================================#
+# External projects that are tracked independently.
+projects/*
+!projects/sample
+!projects/CMakeLists.txt
+!projects/Makefile
+# Clang, which is tracked independently.
+tools/clang
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a6099d17c56a..0a5d5f39d85a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,9 +10,16 @@ set(CMAKE_MODULE_PATH
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
   )
 
-set(PACKAGE_VERSION "2.8")
+set(PACKAGE_VERSION "2.9")
+
 include(VersionFromVCS)
-add_version_info_from_vcs(PACKAGE_VERSION)
+
+option(LLVM_APPEND_VC_REV
+  "Append the version control system revision id to LLVM version" OFF)
+
+if( LLVM_APPEND_VC_REV )
+  add_version_info_from_vcs(PACKAGE_VERSION)
+endif()
 
 set(PACKAGE_NAME llvm)
 set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
@@ -64,8 +71,8 @@ set(LLVM_ALL_TARGETS
   Mips
   MBlaze
   MSP430
-  PIC16
   PowerPC
+  PTX
   Sparc
   SystemZ
   X86
@@ -80,36 +87,25 @@ else( MSVC )
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 endif( MSVC )
 
-set(C_INCLUDE_DIRS "" CACHE STRING
-  "Colon separated list of directories clang will search for headers.")
+option(LLVM_ENABLE_CBE_PRINTF_A "Set to ON if CBE is enabled for printf %a output" ON)
+if(LLVM_ENABLE_CBE_PRINTF_A)
+  set(ENABLE_CBE_PRINTF_A 1)
+endif()
+
+option(LLVM_ENABLE_TIMESTAMPS "Enable embedding timestamp information in build" ON)
+if(LLVM_ENABLE_TIMESTAMPS)
+  set(ENABLE_TIMESTAMPS 1)
+endif()
+
+option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF)
+set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so")
+set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h")
 
 set(LLVM_TARGET_ARCH "host"
   CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.")
 
 option(LLVM_ENABLE_THREADS "Use threads if available." ON)
 
-if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
-  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF)
-else()
-  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
-endif()
-
-if( LLVM_ENABLE_ASSERTIONS )
-  # MSVC doesn't like _DEBUG on release builds. See PR 4379.
-  if( NOT MSVC )
-    add_definitions( -D_DEBUG )
-  endif()
-  # On Release builds cmake automatically defines NDEBUG, so we
-  # explicitly undefine it:
-  if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
-    add_definitions( -UNDEBUG )
-  endif()
-else()
-  if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
-    add_definitions( -DNDEBUG )
-  endif()
-endif()
-
 if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
   set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
 endif()
@@ -135,65 +131,34 @@ set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
 
 include(AddLLVMDefinitions)
 
-if(WIN32)
-  if(CYGWIN)
-    set(LLVM_ON_WIN32 0)
-    set(LLVM_ON_UNIX 1)
-  else(CYGWIN)
-    set(LLVM_ON_WIN32 1)
-    set(LLVM_ON_UNIX 0)
-  endif(CYGWIN)
-  set(LTDL_SHLIB_EXT ".dll")
-  set(EXEEXT ".exe")
-  # Maximum path length is 160 for non-unicode paths
-  set(MAXPATHLEN 160)
-else(WIN32)
-  if(UNIX)
-    set(LLVM_ON_WIN32 0)
-    set(LLVM_ON_UNIX 1)
-    if(APPLE)
-      set(LTDL_SHLIB_EXT ".dylib")
-    else(APPLE)
-      set(LTDL_SHLIB_EXT ".so")
-    endif(APPLE)
-    set(EXEEXT "")
-    # FIXME: Maximum path length is currently set to 'safe' fixed value
-    set(MAXPATHLEN 2024)
-  else(UNIX)
-    MESSAGE(SEND_ERROR "Unable to determine platform")
-  endif(UNIX)
-endif(WIN32)
+option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
 
 include(config-ix)
 
-option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
+include(HandleLLVMOptions)
 
-set(ENABLE_PIC 0)
-if( LLVM_ENABLE_PIC )
- if( XCODE )
-   # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
-   # know how to disable this, so just force ENABLE_PIC off for now.
-   message(STATUS "Warning: -fPIC not supported with Xcode.")
- else( XCODE )
-   if( SUPPORTS_FPIC_FLAG )
-      message(STATUS "Building with -fPIC")
-      add_llvm_definitions(-fPIC)
-      set(ENABLE_PIC 1)
-   else( SUPPORTS_FPIC_FLAG )
-      message(STATUS "Warning: -fPIC not supported.")
-   endif()
- endif()
+if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF)
+else()
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
 endif()
 
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/Config/config.h)
+
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h)
+
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/Support/DataTypes.h)
+
 set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR} )
 set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
 set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
 
-# set(CMAKE_VERBOSE_MAKEFILE true)
-
-add_llvm_definitions( -D__STDC_LIMIT_MACROS )
-add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
-
 # MSVC has a gazillion warnings with this.
 if( MSVC )
   option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF)
@@ -204,65 +169,34 @@ endif()
 option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
 
-if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
-  # TODO: support other platforms and toolchains.
-  option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF)
-  if( LLVM_BUILD_32_BITS )
-    message(STATUS "Building 32 bits executables and libraries.")
-    add_llvm_definitions( -m32 )
-    list(APPEND CMAKE_EXE_LINKER_FLAGS -m32)
-    list(APPEND CMAKE_SHARED_LINKER_FLAGS -m32)
-  endif( LLVM_BUILD_32_BITS )
-endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
-
-if( MSVC )
-  include(ChooseMSVCCRT)
-
-  add_llvm_definitions( -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS )
-  add_llvm_definitions( -D_SCL_SECURE_NO_WARNINGS -DCRT_NONSTDC_NO_WARNINGS )
-  add_llvm_definitions( -D_SCL_SECURE_NO_DEPRECATE )
-  add_llvm_definitions( -wd4146 -wd4503 -wd4996 -wd4800 -wd4244 -wd4624 )
-  add_llvm_definitions( -wd4355 -wd4715 -wd4180 -wd4345 -wd4224 )
-
-  # Suppress 'new behavior: elements of array 'array' will be default initialized'
-  add_llvm_definitions( -wd4351 )
-
-  # Enable warnings
-  if (LLVM_ENABLE_WARNINGS)
-    add_llvm_definitions( /W4 /Wall )
-    if (LLVM_ENABLE_PEDANTIC)
-      # No MSVC equivalent available
-    endif (LLVM_ENABLE_PEDANTIC)
-  endif (LLVM_ENABLE_WARNINGS)
-  if (LLVM_ENABLE_WERROR)
-    add_llvm_definitions( /WX )
-  endif (LLVM_ENABLE_WERROR)
-elseif( CMAKE_COMPILER_IS_GNUCXX )
-  if (LLVM_ENABLE_WARNINGS)
-    add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings )
-    if (LLVM_ENABLE_PEDANTIC)
-      add_llvm_definitions( -pedantic -Wno-long-long )
-    endif (LLVM_ENABLE_PEDANTIC)
-  endif (LLVM_ENABLE_WARNINGS)
-  if (LLVM_ENABLE_WERROR)
-    add_llvm_definitions( -Werror )
-  endif (LLVM_ENABLE_WERROR)
-endif( MSVC )
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
 include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
 
 if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
-   SET(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-include llvm/System/Solaris.h")
+   SET(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-include llvm/Support/Solaris.h")
 endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
 
 include(AddLLVM)
 include(TableGen)
 
-add_subdirectory(lib/Support)
-add_subdirectory(lib/System)
+if( MINGW )
+  get_system_libs(LLVM_SYSTEM_LIBS_LIST)
+  foreach(l ${LLVM_SYSTEM_LIBS_LIST})
+    set(LLVM_SYSTEM_LIBS "${LLVM_SYSTEM_LIBS} -l${l}")
+  endforeach()
+  set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}")
+  set(CMAKE_C_STANDARD_LIBRARIES "${CMAKE_C_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}")
+endif()
+
+if( MINGW )
+  # People report that -O3 is unreliable on MinGW. The traditional
+  # build also uses -O2 for that reason:
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
+endif()
 
-# Everything else depends on Support and System:
-set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${LLVM_LIBS} )
+# Put this before tblgen. Else we have a circular dependence.
+add_subdirectory(lib/Support)
 
 set(LLVM_TABLEGEN "tblgen" CACHE
   STRING "Native TableGen executable. Saves building one when cross-compiling.")
@@ -278,87 +212,43 @@ endif( CMAKE_CROSSCOMPILING )
 
 add_subdirectory(include/llvm)
 
-add_subdirectory(lib/VMCore)
-add_subdirectory(lib/CodeGen)
-add_subdirectory(lib/CodeGen/SelectionDAG)
-add_subdirectory(lib/CodeGen/AsmPrinter)
-add_subdirectory(lib/Bitcode/Reader)
-add_subdirectory(lib/Bitcode/Writer)
-add_subdirectory(lib/Transforms/Utils)
-add_subdirectory(lib/Transforms/Instrumentation)
-add_subdirectory(lib/Transforms/InstCombine)
-add_subdirectory(lib/Transforms/Scalar)
-add_subdirectory(lib/Transforms/IPO)
-add_subdirectory(lib/Transforms/Hello)
-add_subdirectory(lib/Linker)
-add_subdirectory(lib/Analysis)
-add_subdirectory(lib/Analysis/IPA)
-add_subdirectory(lib/MC)
-add_subdirectory(lib/MC/MCParser)
-add_subdirectory(lib/MC/MCDisassembler)
-add_subdirectory(test)
+add_subdirectory(lib)
 
 add_subdirectory(utils/FileCheck)
+add_subdirectory(utils/FileUpdate)
 add_subdirectory(utils/count)
 add_subdirectory(utils/not)
-
-set(LLVM_ENUM_ASM_PRINTERS "")
-set(LLVM_ENUM_ASM_PARSERS "")
-set(LLVM_ENUM_DISASSEMBLERS "")
-foreach(t ${LLVM_TARGETS_TO_BUILD})
-  message(STATUS "Targeting ${t}")
-  add_subdirectory(lib/Target/${t})
-  add_subdirectory(lib/Target/${t}/TargetInfo)
-  if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
-    add_subdirectory(lib/Target/${t}/AsmPrinter)
-    set(LLVM_ENUM_ASM_PRINTERS 
-      "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
-  endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
-  if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt )
-    add_subdirectory(lib/Target/${t}/AsmParser)
-    set(LLVM_ENUM_ASM_PARSERS 
-      "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
-  endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt )
-  if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/Disassembler/CMakeLists.txt )
-    add_subdirectory(lib/Target/${t}/Disassembler)
-    set(LLVM_ENUM_DISASSEMBLERS
-      "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n")
-  endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/Disassembler/CMakeLists.txt )
-  set(CURRENT_LLVM_TARGET)
-endforeach(t)
-
-# Produce llvm/Config/AsmPrinters.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
-  )
-
-# Produce llvm/Config/AsmParsers.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
-  )
-
-# Produce llvm/Config/Disassemblers.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def
-  )
-
-add_subdirectory(lib/ExecutionEngine)
-add_subdirectory(lib/ExecutionEngine/Interpreter)
-add_subdirectory(lib/ExecutionEngine/JIT)
-add_subdirectory(lib/Target)
-add_subdirectory(lib/AsmParser)
-add_subdirectory(lib/Archive)
+add_subdirectory(utils/llvm-lit)
 
 add_subdirectory(projects)
 
-option(LLVM_BUILD_TOOLS "Build LLVM tool programs." ON)
-add_subdirectory(tools)
+option(LLVM_BUILD_TOOLS
+  "Build the LLVM tools. If OFF, just generate build targets." ON)
+option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON)
+if( LLVM_INCLUDE_TOOLS )
+  add_subdirectory(tools)
+endif()
 
-option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." OFF)
-add_subdirectory(examples)
+option(LLVM_BUILD_EXAMPLES
+  "Build the LLVM example programs. If OFF, just generate build targets." OFF)
+option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
+if( LLVM_INCLUDE_EXAMPLES )
+  add_subdirectory(examples)
+endif()
+
+option(LLVM_BUILD_TESTS
+  "Build LLVM unit tests. If OFF, just generate build targes." OFF)
+option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
+if( LLVM_INCLUDE_TESTS )
+  add_subdirectory(test)
+  add_subdirectory(utils/unittest)
+  add_subdirectory(unittests)
+  if (MSVC)
+    # This utility is used to prevent chrashing tests from calling Dr. Watson on
+    # Windows.
+    add_subdirectory(utils/KillTheDoctor)
+  endif()
+endif()
 
 add_subdirectory(cmake/modules)
 
@@ -385,3 +275,18 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
   )
 
 # TODO: make and install documentation.
+
+set(CPACK_PACKAGE_VENDOR "LLVM")
+set(CPACK_PACKAGE_VERSION_MAJOR 2)
+set(CPACK_PACKAGE_VERSION_MINOR 9)
+add_version_info_from_vcs(CPACK_PACKAGE_VERSION_PATCH)
+include(CPack)
+
+# Workaround for MSVS10 to avoid the Dialog Hell
+# FIXME: This could be removed with future version of CMake.
+if(MSVC_VERSION EQUAL 1600)
+  set(LLVM_SLN_FILENAME "${CMAKE_CURRENT_BINARY_DIR}/LLVM.sln")
+  if( EXISTS "${LLVM_SLN_FILENAME}" )
+    file(APPEND "${LLVM_SLN_FILENAME}" "\n# This should be regenerated!\n")
+  endif()
+endif()
diff --git a/CREDITS.TXT b/CREDITS.TXT
index aeecfe2e21e1..ab01dde338a5 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -8,6 +8,7 @@ beautification by scripts.  The fields are: name (N), email (E), web-address
 (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
 (S).
 
+
 N: Vikram Adve
 E: vadve@cs.uiuc.edu
 W: http://www.cs.uiuc.edu/~vadve/
@@ -39,7 +40,7 @@ N: Misha Brukman
 E: brukman+llvm@uiuc.edu
 W: http://misha.brukman.net
 D: Portions of X86 and Sparc JIT compilers, PowerPC backend
-D: Incremental bytecode loader
+D: Incremental bitcode loader
 
 N: Cameron Buschardt
 E: buschard@uiuc.edu
@@ -328,6 +329,7 @@ D: The `paths' pass
 N: Michael J. Spencer
 E: bigcheesegs@gmail.com
 D: Shepherding Windows COFF support into MC.
+D: Lots of Windows stuff.
 
 N: Reid Spencer
 E: rspencer@reidspencer.com
diff --git a/Makefile b/Makefile
index ae650b7f2d93..dbb759dd5fce 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@
 LEVEL := .
 
 # Top-Level LLVM Build Stages:
-#   1. Build lib/System and lib/Support, which are used by utils (tblgen).
+#   1. Build lib/Support, which is used by utils (tblgen).
 #   2. Build utils, which is used by VMCore.
 #   3. Build VMCore, which builds the Intrinsics.inc file used by libs.
 #   4. Build libs, which are needed by llvm-config.
@@ -27,10 +27,10 @@ LEVEL := .
 ifneq ($(findstring llvmCore, $(RC_ProjectName)),llvmCore)  # Normal build (not "Apple-style").
 
 ifeq ($(BUILD_DIRS_ONLY),1)
-  DIRS := lib/System lib/Support utils
+  DIRS := lib/Support utils
   OPTIONAL_DIRS :=
 else
-  DIRS := lib/System lib/Support utils lib/VMCore lib tools/llvm-shlib \
+  DIRS := lib/Support utils lib/VMCore lib tools/llvm-shlib \
           tools/llvm-config tools runtime docs unittests
   OPTIONAL_DIRS := projects bindings
 endif
@@ -47,6 +47,10 @@ ifneq ($(ENABLE_SHARED),1)
   DIRS := $(filter-out tools/llvm-shlib, $(DIRS))
 endif
 
+ifneq ($(ENABLE_DOCS),1)
+  DIRS := $(filter-out docs, $(DIRS))
+endif
+
 ifeq ($(MAKECMDGOALS),libs-only)
   DIRS := $(filter-out tools runtime docs, $(DIRS))
   OPTIONAL_DIRS :=
@@ -95,6 +99,11 @@ ifeq ($(MAKECMDGOALS),install)
   OPTIONAL_DIRS := $(filter bindings, $(OPTIONAL_DIRS))
 endif
 
+# Don't build unittests when ONLY_TOOLS is set.
+ifneq ($(ONLY_TOOLS),)
+  DIRS := $(filter-out unittests, $(DIRS))
+endif
+
 # If we're cross-compiling, build the build-hosted tools first
 ifeq ($(LLVM_CROSS_COMPILING),1)
 all:: cross-compile-build-tools
@@ -150,7 +159,7 @@ dist-hook::
 	$(Echo) Eliminating files constructed by configure
 	$(Verb) $(RM) -f \
 	  $(TopDistDir)/include/llvm/Config/config.h  \
-	  $(TopDistDir)/include/llvm/System/DataTypes.h
+	  $(TopDistDir)/include/llvm/Support/DataTypes.h
 
 clang-only: all
 tools-only: all
@@ -169,7 +178,7 @@ FilesToConfig := \
   include/llvm/Config/AsmPrinters.def \
   include/llvm/Config/AsmParsers.def \
   include/llvm/Config/Disassemblers.def \
-  include/llvm/System/DataTypes.h \
+  include/llvm/Support/DataTypes.h \
   tools/llvmc/src/Base.td
 FilesToConfigPATH  := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig))
 
diff --git a/Makefile.config.in b/Makefile.config.in
index 5ebd80384fb0..5c737580632e 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -164,7 +164,7 @@ CAT        := @CAT@
 DOT        := @DOT@
 DOXYGEN    := @DOXYGEN@
 GROFF      := @GROFF@
-GZIP       := @GZIP@
+GZIPBIN    := @GZIPBIN@
 OCAMLC     := @OCAMLC@
 OCAMLOPT   := @OCAMLOPT@
 OCAMLDEP   := @OCAMLDEP@
@@ -195,6 +195,7 @@ LLVMGXX  := @LLVMGXX@
 LLVMCC1  := @LLVMCC1@
 LLVMCC1PLUS := @LLVMCC1PLUS@
 LLVMGCC_LANGS := @LLVMGCC_LANGS@
+LLVMGCC_DRAGONEGG := @LLVMGCC_DRAGONEGG@
 
 # Information on Clang, if configured.
 CLANGPATH := @CLANGPATH@
@@ -204,6 +205,10 @@ ENABLE_BUILT_CLANG := @ENABLE_BUILT_CLANG@
 # The LLVM capable compiler to use.
 LLVMCC_OPTION := @LLVMCC_OPTION@
 
+# The flag used to emit LLVM IR.
+LLVMCC_EMITIR_FLAG = @LLVMCC_EMITIR_FLAG@
+LLVMCC_DISABLEOPT_FLAGS := @LLVMCC_DISABLEOPT_FLAGS@
+
 # Path to directory where object files should be stored during a build.
 # Set OBJ_ROOT to "." if you do not want to use a separate place for
 # object files.
@@ -259,6 +264,9 @@ OPTIMIZE_OPTION := @OPTIMIZE_OPTION@
 # information to allow gprof to be used to get execution frequencies.
 #ENABLE_PROFILING = 1
 
+# When ENABLE_DOCS is disabled, docs/ will not be built.
+ENABLE_DOCS = @ENABLE_DOCS@
+
 # When ENABLE_DOXYGEN is enabled, the doxygen documentation will be built
 ENABLE_DOXYGEN = @ENABLE_DOXYGEN@
 
@@ -271,6 +279,9 @@ ENABLE_PIC := @ENABLE_PIC@
 # Do we want to build a shared library and link the tools with it?
 ENABLE_SHARED := @ENABLE_SHARED@
 
+# Do we want to link the stdc++ into a shared library? (Cygming)
+ENABLE_EMBED_STDCXX := @ENABLE_EMBED_STDCXX@
+
 # Use -fvisibility-inlines-hidden?
 ENABLE_VISIBILITY_INLINES_HIDDEN := @ENABLE_VISIBILITY_INLINES_HIDDEN@
 
@@ -341,6 +352,8 @@ NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
 # -Wno-variadic-macros
 NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
 
+# Was polly found in tools/polly?
+LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
 # Flags supported by the linker.
 # bfd ld / gold --version-script=file
 HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@
diff --git a/Makefile.rules b/Makefile.rules
index 9cff1053d0d9..363fa9605b5a 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -129,11 +129,8 @@ reconfigure:
 	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
 	  $(ConfigStatusScript)
 
-# FIXME: The {PIC16,MSP430}/AsmPrinter line here is a hack to force a reconfigure to pick
-# up AsmPrinter changes. Remove it after a reasonable delay from 2009-08-13.
-
 .PRECIOUS: $(ConfigStatusScript)
-$(ConfigStatusScript): $(ConfigureScript) $(LLVM_SRC_ROOT)/lib/Target/PIC16/AsmPrinter/Makefile $(LLVM_SRC_ROOT)/lib/Target/MSP430/AsmPrinter/Makefile
+$(ConfigStatusScript): $(ConfigureScript)
 	$(Echo) Reconfiguring with $<
 	$(Verb) cd $(PROJ_OBJ_ROOT) && \
 	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
@@ -161,9 +158,13 @@ endif
 # If the Makefile in the source tree has been updated, copy it over into the
 # build tree. But, only do this if the source and object makefiles differ
 #------------------------------------------------------------------------
+ifndef PROJ_MAKEFILE
+PROJ_MAKEFILE := $(PROJ_SRC_DIR)/Makefile
+endif
+
 ifneq ($(PROJ_OBJ_DIR),$(PROJ_SRC_DIR))
 
-Makefile: $(PROJ_SRC_DIR)/Makefile $(ExtraMakefiles)
+Makefile: $(PROJ_MAKEFILE) $(ExtraMakefiles)
 	$(Echo) "Updating Makefile"
 	$(Verb) $(MKDIR) $(@D)
 	$(Verb) $(CP) -f $< $@
@@ -171,7 +172,7 @@ Makefile: $(PROJ_SRC_DIR)/Makefile $(ExtraMakefiles)
 # Copy the Makefile.* files unless we're in the root directory which avoids
 # the copying of Makefile.config.in or other things that should be explicitly
 # taken care of.
-$(PROJ_OBJ_DIR)/Makefile% : $(PROJ_SRC_DIR)/Makefile%
+$(PROJ_OBJ_DIR)/Makefile% : $(PROJ_MAKEFILE)%
 	@case '$?' in \
           *Makefile.rules) ;; \
           *.in) ;; \
@@ -204,7 +205,7 @@ ifdef LLVMC_BASED_DRIVER
 TOOLNAME = $(LLVMC_BASED_DRIVER)
 
 LLVMLIBS = CompilerDriver.a
-LINK_COMPONENTS = support system
+LINK_COMPONENTS = support
 
 endif # LLVMC_BASED_DRIVER
 
@@ -300,7 +301,7 @@ ifneq ($(REQUIRES_RTTI), 1)
   CXX.Flags += -fno-rtti
 endif
 
-ifdef ENABLE_COVERAGE
+ifeq ($(ENABLE_COVERAGE),1)
   BuildMode := $(BuildMode)+Coverage
   CXX.Flags += -ftest-coverage -fprofile-arcs
   C.Flags   += -ftest-coverage -fprofile-arcs
@@ -308,17 +309,17 @@ endif
 
 # If DISABLE_ASSERTIONS=1 is specified (make command line or configured),
 # then disable assertions by defining the appropriate preprocessor symbols.
-ifndef DISABLE_ASSERTIONS
+ifeq ($(DISABLE_ASSERTIONS),1)
+  CPP.Defines += -DNDEBUG
+else
   BuildMode := $(BuildMode)+Asserts
   CPP.Defines += -D_DEBUG
-else
-  CPP.Defines += -DNDEBUG
 endif
 
 # If ENABLE_EXPENSIVE_CHECKS=1 is specified (make command line or
 # configured), then enable expensive checks by defining the
 # appropriate preprocessor symbols.
-ifdef ENABLE_EXPENSIVE_CHECKS
+ifeq ($(ENABLE_EXPENSIVE_CHECKS),1)
   BuildMode := $(BuildMode)+Checks
   CPP.Defines += -D_GLIBCXX_DEBUG -DXDEBUG
 endif
@@ -387,12 +388,21 @@ ifeq ($(ENABLE_PIC),0)
   CXX.Flags     += -fPIC
   CPP.BaseFlags += -fPIC
 endif
-endif
 
-ifeq ($(ARCH),Alpha)
   LD.Flags += -Wl,--no-relax
 endif
 
+# GNU ld/PECOFF accepts but ignores them below;
+#   --version-script
+#   --export-dynamic
+#   --rpath
+# FIXME: autoconf should be aware of them.
+ifneq (,$(filter $(HOST_OS),Cygwin MingW))
+  HAVE_LINK_VERSION_SCRIPT := 0
+  RPATH :=
+  RDYNAMIC := -Wl,--export-all-symbols
+endif
+
 #--------------------------------------------------------------------
 # Directory locations
 #--------------------------------------------------------------------
@@ -497,8 +507,8 @@ ifeq ($(HOST_OS),Darwin)
   # Get "4" out of 10.4 for later pieces in the makefile.
   DARWIN_MAJVERS := $(shell echo $(DARWIN_VERSION)| sed -E 's/10.([0-9]).*/\1/')
 
-  SharedLinkOptions=-Wl,-flat_namespace -Wl,-undefined,suppress \
-                    -dynamiclib
+  LoadableModuleOptions := -Wl,-flat_namespace -Wl,-undefined,suppress
+  SharedLinkOptions := -dynamiclib
   ifneq ($(ARCH),ARM)
     SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
   endif
@@ -516,10 +526,6 @@ ifdef SHARED_LIBRARY
 ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
 ifneq ($(HOST_OS),Darwin)
   LD.Flags += $(RPATH) -Wl,'$$ORIGIN'
-else
-ifneq ($(DARWIN_MAJVERS),4)
-  LD.Flags += $(RPATH) -Wl,$(SharedLibDir)
-endif
 endif
 endif
 endif
@@ -547,15 +553,21 @@ ifndef KEEP_SYMBOLS
   Install.StripFlag += -s
 endif
 
+ifdef TOOL_NO_EXPORTS
+  DynamicFlags :=
+else
+  DynamicFlag := $(RDYNAMIC)
+endif
+
 # Adjust linker flags for building an executable
 ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
 ifneq ($(HOST_OS), Darwin)
 ifdef TOOLNAME
   LD.Flags += $(RPATH) -Wl,'$$ORIGIN/../lib'
   ifdef EXAMPLE_TOOL
-    LD.Flags += $(RPATH) -Wl,$(ExmplDir) $(RDYNAMIC)
+    LD.Flags += $(RPATH) -Wl,$(ExmplDir) $(DynamicFlag)
   else
-    LD.Flags += $(RPATH) -Wl,$(ToolDir) $(RDYNAMIC)
+    LD.Flags += $(RPATH) -Wl,$(ToolDir) $(DynamicFlag)
   endif
 endif
 else
@@ -618,11 +630,11 @@ else
 endif
 
 ifeq ($(HOST_OS),SunOS)
-CPP.BaseFlags += -include llvm/System/Solaris.h
+CPP.BaseFlags += -include llvm/Support/Solaris.h
 endif
 
 ifeq ($(HOST_OS),AuroraUX)
-CPP.BaseFlags += -include llvm/System/Solaris.h
+CPP.BaseFlags += -include llvm/Support/Solaris.h
 endif # !HOST_OS - AuroraUX.
 
 LD.Flags      += -L$(LibDir) -L$(LLVMLibDir)
@@ -828,7 +840,9 @@ $(RecursiveTargets)::
 else
 $(RecursiveTargets)::
 	$(Verb) for dir in $(OPTIONAL_DIRS); do \
-	  ($(MAKE) -C$$dir $@ ) || exit 1; \
+	  if [ -d $(PROJ_SRC_DIR)/$$dir ]; then\
+	    ($(MAKE) -C$$dir $@ ) || exit 1; \
+	  fi \
 	done
 endif
 endif
@@ -890,10 +904,13 @@ LLVMUsedLibs    := $(patsubst %.a.o, lib%.a, $(addsuffix .o, $(LLVMLIBS)))
 LLVMLibsPaths   := $(addprefix $(LLVMLibDir)/,$(LLVMUsedLibs))
 endif
 
-# Win32.DLL may refer to other components.
-ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# Loadable module for Win32 requires all symbols resolved for linking.
+# Then all symbols in LLVM.dll will be available.
+ifeq ($(ENABLE_SHARED),1)
   ifdef LOADABLE_MODULE
-    LINK_COMPONENTS := all
+    ifneq (,$(filter $(HOST_OS),Cygwin MingW))
+      LINK_COMPONENTS += all
+    endif
   endif
 endif
 
@@ -941,11 +958,6 @@ ifdef EXPORTED_SYMBOL_FILE
 # First, set up the native export file, which may differ from the source
 # export file.
 
-# The option --version-script is not effective on GNU ld win32.
-ifneq (,$(filter $(HOST_OS),Cygwin MingW))
-  HAVE_LINK_VERSION_SCRIPT := 0
-endif
-
 ifeq ($(HOST_OS),Darwin)
 # Darwin convention prefixes symbols with underscores.
 NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).sed
@@ -1073,12 +1085,14 @@ ifdef LIBRARYNAME
 # Make sure there isn't any extraneous whitespace on the LIBRARYNAME option
 LIBRARYNAME := $(strip $(LIBRARYNAME))
 ifdef LOADABLE_MODULE
-LibName.A  := $(LibDir)/$(LIBRARYNAME).a
-LibName.SO := $(SharedLibDir)/$(LIBRARYNAME)$(SHLIBEXT)
+BaseLibName.A  := $(LIBRARYNAME).a
+BaseLibName.SO := $(LIBRARYNAME)$(SHLIBEXT)
 else
-LibName.A  := $(LibDir)/lib$(LIBRARYNAME).a
-LibName.SO := $(SharedLibDir)/$(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
+BaseLibName.A  := lib$(LIBRARYNAME).a
+BaseLibName.SO := $(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
 endif
+LibName.A  := $(LibDir)/$(BaseLibName.A)
+LibName.SO := $(SharedLibDir)/$(BaseLibName.SO)
 LibName.O  := $(LibDir)/$(LIBRARYNAME).o
 LibName.BCA:= $(LibDir)/lib$(LIBRARYNAME).bca
 
@@ -1099,6 +1113,7 @@ endif
 ifdef LINK_LIBS_IN_SHARED
 ifdef LOADABLE_MODULE
 SharedLibKindMessage := "Loadable Module"
+SharedLinkOptions := $(LoadableModuleOptions) $(SharedLinkOptions)
 else
 SharedLibKindMessage := "Shared Library"
 endif
@@ -1131,7 +1146,7 @@ DestSharedLibDir := $(DESTDIR)$(PROJ_bindir)
 else
 DestSharedLibDir := $(DESTDIR)$(PROJ_libdir)
 endif
-DestSharedLib := $(DestSharedLibDir)/$(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
+DestSharedLib := $(DestSharedLibDir)/$(BaseLibName.SO)
 
 install-local:: $(DestSharedLib)
 
@@ -1323,6 +1338,18 @@ endif
 endif
 endif
 
+#---------------------------------------------------------
+# Tool Order File Support
+#---------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+ifdef TOOL_ORDER_FINE
+
+LD.Flags += -Wl,-order_file,$(TOOL_ORDER_FILE)
+
+endif
+endif
+
 #---------------------------------------------------------
 # Tool Version Info Support
 #---------------------------------------------------------
@@ -1441,27 +1468,27 @@ DEPEND_OPTIONS = -MMD -MP -MF "$(ObjDir)/$*.d.tmp" \
 DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.d.tmp" "$(ObjDir)/$*.d"; \
                   else $(RM) "$(ObjDir)/$*.d.tmp"; exit 1; fi
 
-$(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+$(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
-$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
 	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
-$(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+$(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
-$(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+$(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
 	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
-$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
 	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
@@ -1481,31 +1508,31 @@ BC_DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.bc.d.tmp" "$(ObjDir)/$*.bc.d";
 $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
-                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
-                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
-                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
-                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 $(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
-                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
 	        $(BC_DEPEND_MOVEFILE)
 
 # Provide alternate rule sets if dependencies are disabled
@@ -1533,23 +1560,23 @@ $(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
 
 $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
-	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
+	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
-	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
+	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
-	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
+	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
-	$(BCCompile.C) $< -o $@ -S -emit-llvm
+	$(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
 
 $(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
-	$(BCCompile.C) $< -o $@ -S -emit-llvm
+	$(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
 
 endif
 
@@ -1699,6 +1726,11 @@ $(ObjDir)/%GenAsmMatcher.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) assembly matcher with tblgen"
 	$(Verb) $(TableGen) -gen-asm-matcher -o $(call SYSPATH, $@) $<
 
+$(TARGET:%=$(ObjDir)/%GenMCCodeEmitter.inc.tmp): \
+$(ObjDir)/%GenMCCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) MC code emitter with tblgen"
+	$(Verb) $(TableGen) -gen-emitter -mc-emitter -o $(call SYSPATH, $@) $<
+
 $(TARGET:%=$(ObjDir)/%GenCodeEmitter.inc.tmp): \
 $(ObjDir)/%GenCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) code emitter with tblgen"
diff --git a/ModuleInfo.txt b/ModuleInfo.txt
index 5a1d8b85ad0b..40607c71a944 100644
--- a/ModuleInfo.txt
+++ b/ModuleInfo.txt
@@ -1,4 +1,4 @@
-DepModule: 
+DepModule:
 BuildCmd: ./build-for-llvm-top.sh
 CleanCmd: make clean -C ../build.llvm
 InstallCmd: make install -C ../build.llvm
diff --git a/README.txt b/README.txt
index f54f5bf1b372..0dad9f5e0a67 100644
--- a/README.txt
+++ b/README.txt
@@ -1,4 +1,4 @@
-\Low Level Virtual Machine (LLVM)
+Low Level Virtual Machine (LLVM)
 ================================
 
 This directory and its subdirectories contain source code for the Low Level
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 0596dd60d230..9259633de94a 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,7 +31,7 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[2.8]],[llvmbugs@cs.uiuc.edu])
+AC_INIT([[llvm]],[[2.9svn]],[llvmbugs@cs.uiuc.edu])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
@@ -121,6 +121,26 @@ do
   fi
 done
 
+dnl Disable the build of polly, even if it is checked out into tools/polly.
+AC_ARG_ENABLE(polly,
+              AS_HELP_STRING([--enable-polly],
+                             [Use polly if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_POLLY,[1]) ;;
+  no)  AC_SUBST(ENABLE_POLLY,[0]) ;;
+  default) AC_SUBST(ENABLE_POLLY,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-polly. Use "yes" or "no"]) ;;
+esac
+
+
+dnl Check if polly is checked out into tools/polly and configure it if
+dnl available.
+if (test -d ${srcdir}/tools/polly) && (test $ENABLE_POLLY -eq 1) ; then
+  AC_SUBST(LLVM_HAS_POLLY,1)
+  AC_CONFIG_SUBDIRS([tools/polly])
+fi
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 2: Architecture, target, and host checks
@@ -299,7 +319,7 @@ AC_SUBST(LINKALL,$llvm_cv_link_all_option)
 AC_SUBST(NOLINKALL,$llvm_cv_no_link_all_option)
 
 dnl Set the "LLVM_ON_*" variables based on llvm_cv_platform_type
-dnl This is used by lib/System to determine the basic kind of implementation
+dnl This is used by lib/Support to determine the basic kind of implementation
 dnl to use.
 case $llvm_cv_platform_type in
   Unix)
@@ -326,12 +346,12 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   alpha*-*)               llvm_cv_target_arch="Alpha" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   mips-*)                 llvm_cv_target_arch="Mips" ;;
-  pic16-*)                llvm_cv_target_arch="PIC16" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
   s390x-*)                llvm_cv_target_arch="SystemZ" ;;
   bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
+  ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac])
 
@@ -391,7 +411,7 @@ dnl===-----------------------------------------------------------------------===
 
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
- --enable-optimized,[Compile with optimizations enabled (default is YES)]),,enableval="yes")
+ --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
 if test ${enableval} = "no" ; then
   AC_SUBST(ENABLE_OPTIMIZED,[[]])
 else
@@ -409,7 +429,7 @@ fi
 
 dnl --enable-assertions : check whether they want to turn on assertions or not:
 AC_ARG_ENABLE(assertions,AS_HELP_STRING(
-  --enable-assertions,[Compile with assertion checks enabled (default is NO)]),, enableval="no")
+  --enable-assertions,[Compile with assertion checks enabled (default is YES)]),, enableval="yes")
 if test ${enableval} = "yes" ; then
   AC_SUBST(DISABLE_ASSERTIONS,[[]])
 else
@@ -460,19 +480,31 @@ else
     Sparc)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
-    Alpha)       AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Alpha)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,0) ;;
-    PIC16)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
     SystemZ)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     Blackfin)    AC_SUBST(TARGET_HAS_JIT,0) ;;
     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
   esac
 fi
 
+dnl Allow enablement of building and installing docs
+AC_ARG_ENABLE(docs,
+              AS_HELP_STRING([--enable-docs],
+                             [Build documents (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_DOCS,[1]) ;;
+  no)  AC_SUBST(ENABLE_DOCS,[0]) ;;
+  default) AC_SUBST(ENABLE_DOCS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-docs. Use "yes" or "no"]) ;;
+esac
+
 dnl Allow enablement of doxygen generated documentation
 AC_ARG_ENABLE(doxygen,
               AS_HELP_STRING([--enable-doxygen],
@@ -498,6 +530,18 @@ case "$enableval" in
 esac
 AC_DEFINE_UNQUOTED([ENABLE_THREADS],$ENABLE_THREADS,[Define if threads enabled])
 
+dnl Allow disablement of pthread.h
+AC_ARG_ENABLE(pthreads,
+              AS_HELP_STRING([--enable-pthreads],
+                             [Use pthreads if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_PTHREADS,[1]) ;;
+  no)  AC_SUBST(ENABLE_PTHREADS,[0]) ;;
+  default) AC_SUBST(ENABLE_PTHREADS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-pthreads. Use "yes" or "no"]) ;;
+esac
+
 dnl Allow building without position independent code
 AC_ARG_ENABLE(pic,
   AS_HELP_STRING([--enable-pic],
@@ -524,6 +568,18 @@ case "$enableval" in
   *) AC_MSG_ERROR([Invalid setting for --enable-shared. Use "yes" or "no"]) ;;
 esac
 
+dnl Allow libstdc++ is embedded in LLVM.dll.
+AC_ARG_ENABLE(embed-stdcxx,
+  AS_HELP_STRING([--enable-embed-stdcxx],
+                 [Build a shared library with embedded libstdc++ for Win32 DLL (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_EMBED_STDCXX,[1]) ;;
+  no)  AC_SUBST(ENABLE_EMBED_STDCXX,[0]) ;;
+  default) AC_SUBST(ENABLE_EMBED_STDCXX,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-embed-stdcxx. Use "yes" or "no"]) ;;
+esac
+
 dnl Enable embedding timestamp information into build.
 AC_ARG_ENABLE(timestamps,
   AS_HELP_STRING([--enable-timestamps],
@@ -542,14 +598,14 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu, pic16,
-     xcore, msp430, systemz, blackfin, cbe, and cpp (default=all)]),,
+     host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu,
+     xcore, msp430, systemz, blackfin, ptx, cbe, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze PTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -560,7 +616,6 @@ case "$enableval" in
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
-        pic16)    TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
@@ -568,6 +623,7 @@ case "$enableval" in
         cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+        ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -578,11 +634,11 @@ case "$enableval" in
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
-            PIC16)       TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
@@ -618,7 +674,7 @@ LLVM_ENUM_ASM_PARSERS=""
 LLVM_ENUM_DISASSEMBLERS=""
 for target_to_build in $TARGETS_TO_BUILD; do
   LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
-  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then
+  if test -f ${srcdir}/lib/Target/${target_to_build}/*AsmPrinter.cpp ; then
     LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
   fi
   if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
@@ -800,6 +856,13 @@ case "$withval" in
   *) AC_MSG_ERROR([Invalid path for --with-ocaml-libdir. Provide full path]) ;;
 esac
 
+AC_ARG_WITH(clang-resource-dir,
+  AS_HELP_STRING([--with-clang-resource-dir],
+    [Relative directory from the Clang binary for resource files]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CLANG_RESOURCE_DIR,"$withval",
+                   [Relative directory for resource files])
+
 AC_ARG_WITH(c-include-dirs,
   AS_HELP_STRING([--with-c-include-dirs],
     [Colon separated list of directories clang will search for headers]),,
@@ -975,6 +1038,16 @@ if test "$DOTTY" != "echo dotty" ; then
   AC_DEFINE_UNQUOTED([LLVM_PATH_DOTTY],"$DOTTY${EXEEXT}",
    [Define to path to dotty program if found or 'echo dotty' otherwise])
 fi
+AC_PATH_PROG(XDOT_PY, [xdot.py], [echo xdot.py])
+if test "$XDOT_PY" != "echo xdot.py" ; then
+  AC_DEFINE([HAVE_XDOT_PY],[1],[Define if the xdot.py program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    XDOT_PY=`echo $XDOT_PY | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_XDOT_PY],"$XDOT_PY${EXEEXT}",
+   [Define to path to xdot.py program if found or 'echo xdot.py' otherwise])
+fi
 
 dnl Look for a sufficiently recent version of Perl.
 LLVM_PROG_PERL([5.006])
@@ -1004,7 +1077,7 @@ AC_PATH_PROG(BZIP2, [bzip2])
 AC_PATH_PROG(CAT, [cat])
 AC_PATH_PROG(DOXYGEN, [doxygen])
 AC_PATH_PROG(GROFF, [groff])
-AC_PATH_PROG(GZIP, [gzip])
+AC_PATH_PROG(GZIPBIN, [gzip])
 AC_PATH_PROG(POD2HTML, [pod2html])
 AC_PATH_PROG(POD2MAN, [pod2man])
 AC_PATH_PROG(PDFROFF, [pdfroff])
@@ -1107,10 +1180,10 @@ then
   AC_MSG_ERROR([gcc|icc required but not found])
 fi
 
-dnl Ensure that compilation tools are GCC; we use GCC specific extensions
+dnl Ensure that compilation tools are compatible with GCC extensions
 if test "$GXX" != "yes" && test "$IXX" != "yes"
 then
-  AC_MSG_ERROR([g++|icc required but not found])
+  AC_MSG_ERROR([g++|clang++|icc required but not found])
 fi
 
 dnl Verify that GCC is version 3.0 or higher
@@ -1167,7 +1240,7 @@ AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
 
 dnl pthread locking functions are optional - but llvm will not be thread-safe
 dnl without locks.
-if test "$ENABLE_THREADS" -eq 1 ; then
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_LIB(pthread, pthread_mutex_init)
   AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
                  AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
@@ -1255,10 +1328,11 @@ AC_HEADER_TIME
 AC_CHECK_HEADERS([dlfcn.h execinfo.h fcntl.h inttypes.h limits.h link.h])
 AC_CHECK_HEADERS([malloc.h setjmp.h signal.h stdint.h termios.h unistd.h])
 AC_CHECK_HEADERS([utime.h windows.h])
-AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h])
+AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
 AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
 AC_CHECK_HEADERS([valgrind/valgrind.h])
-if test "$ENABLE_THREADS" -eq 1 ; then
+AC_CHECK_HEADERS([fenv.h])
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_HEADERS(pthread.h,
                    AC_SUBST(HAVE_PTHREAD, 1),
                    AC_SUBST(HAVE_PTHREAD, 0))
@@ -1271,9 +1345,25 @@ if test "$llvm_cv_enable_libffi" = "yes" ; then
   AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
 fi
 
-dnl Try to find Darwin specific crash reporting library.
+dnl Try to find Darwin specific crash reporting libraries.
 AC_CHECK_HEADERS([CrashReporterClient.h])
 
+dnl Try to find Darwin specific crash reporting global.
+AC_MSG_CHECKING([__crashreporter_info__])
+AC_LINK_IFELSE(
+  AC_LANG_SOURCE(
+    [[extern const char *__crashreporter_info__;
+      int main() {
+        __crashreporter_info__ = "test";
+        return 0;
+      }
+    ]]),
+  AC_MSG_RESULT(yes)
+  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 1, Can use __crashreporter_info__),
+  AC_MSG_RESULT(no)
+  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 0,
+            Define if __crashreporter_info__ exists.))
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 7: Check for types and structures
@@ -1301,12 +1391,37 @@ AC_CHECK_FUNCS([powf fmodf strtof round ])
 AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
 AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
 AC_CHECK_FUNCS([mktemp posix_spawn realpath sbrk setrlimit strdup ])
-AC_CHECK_FUNCS([strerror strerror_r strerror_s setenv ])
+AC_CHECK_FUNCS([strerror strerror_r setenv ])
 AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
-AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp])
+AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev])
 AC_C_PRINTF_A
 AC_FUNC_RAND48
 
+dnl Check the declaration "Secure API" on Windows environments.
+AC_CHECK_DECLS([strerror_s])
+
+dnl Check symbols in libgcc.a for JIT on Mingw.
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_CHECK_LIB(gcc,_alloca,AC_DEFINE([HAVE__ALLOCA],[1],[Have host's _alloca]))
+  AC_CHECK_LIB(gcc,__alloca,AC_DEFINE([HAVE___ALLOCA],[1],[Have host's __alloca]))
+  AC_CHECK_LIB(gcc,__chkstk,AC_DEFINE([HAVE___CHKSTK],[1],[Have host's __chkstk]))
+  AC_CHECK_LIB(gcc,___chkstk,AC_DEFINE([HAVE____CHKSTK],[1],[Have host's ___chkstk]))
+
+  AC_CHECK_LIB(gcc,__ashldi3,AC_DEFINE([HAVE___ASHLDI3],[1],[Have host's __ashldi3]))
+  AC_CHECK_LIB(gcc,__ashrdi3,AC_DEFINE([HAVE___ASHRDI3],[1],[Have host's __ashrdi3]))
+  AC_CHECK_LIB(gcc,__divdi3,AC_DEFINE([HAVE___DIVDI3],[1],[Have host's __divdi3]))
+  AC_CHECK_LIB(gcc,__fixdfdi,AC_DEFINE([HAVE___FIXDFDI],[1],[Have host's __fixdfdi]))
+  AC_CHECK_LIB(gcc,__fixsfdi,AC_DEFINE([HAVE___FIXSFDI],[1],[Have host's __fixsfdi]))
+  AC_CHECK_LIB(gcc,__floatdidf,AC_DEFINE([HAVE___FLOATDIDF],[1],[Have host's __floatdidf]))
+  AC_CHECK_LIB(gcc,__lshrdi3,AC_DEFINE([HAVE___LSHRDI3],[1],[Have host's __lshrdi3]))
+  AC_CHECK_LIB(gcc,__moddi3,AC_DEFINE([HAVE___MODDI3],[1],[Have host's __moddi3]))
+  AC_CHECK_LIB(gcc,__udivdi3,AC_DEFINE([HAVE___UDIVDI3],[1],[Have host's __udivdi3]))
+  AC_CHECK_LIB(gcc,__umoddi3,AC_DEFINE([HAVE___UMODDI3],[1],[Have host's __umoddi3]))
+
+  AC_CHECK_LIB(gcc,__main,AC_DEFINE([HAVE___MAIN],[1],[Have host's __main]))
+  AC_CHECK_LIB(gcc,__cmpdi2,AC_DEFINE([HAVE___CMPDI2],[1],[Have host's __cmpdi2]))
+fi
+
 dnl Check for variations in the Standard C++ library and STL. These macros are
 dnl provided by LLVM in the autoconf/m4 directory.
 AC_FUNC_ISNAN
@@ -1372,12 +1487,36 @@ fi
 dnl Check, whether __dso_handle is present
 AC_CHECK_FUNCS([__dso_handle])
 
+dnl Check wether llvm-gcc is based on dragonegg
+AC_CACHE_CHECK([whether llvm-gcc is dragonegg],[llvm_cv_llvmgcc_dragonegg],
+[llvm_cv_llvmgcc_dragonegg="no"
+if test -n "$LLVMGCC" ; then
+  cp /dev/null conftest.c
+  $LLVMGCC -fplugin-arg-dragonegg-emit-ir -S -o - conftest.c > /dev/null 2>&1
+  if test $? -eq 0 ; then
+    llvm_cv_llvmgcc_dragonegg="yes"
+  fi
+  rm conftest.c
+fi])
+
+dnl Set the flags needed to emit LLVM IR and to disable optimizations
+dnl in llvmgcc
+if test "$llvm_cv_llvmgcc_dragonegg" = "yes" ; then
+  LLVMCC_EMITIR_FLAG="-fplugin-arg-dragonegg-emit-ir"
+  LLVMCC_DISABLEOPT_FLAGS="-fplugin-arg-dragonegg-disable-llvm-optzns"
+else
+  LLVMCC_EMITIR_FLAG="-emit-llvm"
+  LLVMCC_DISABLEOPT_FLAGS="-mllvm -disable-llvm-optzns"
+fi
+
+AC_SUBST(LLVMCC_EMITIR_FLAG)
+
 dnl See if the llvm-gcc executable can compile to LLVM assembly
 AC_CACHE_CHECK([whether llvm-gcc is sane],[llvm_cv_llvmgcc_sanity],
 [llvm_cv_llvmgcc_sanity="no"
-if test -x "$LLVMGCC" ; then
+if test -n "$LLVMGCC" ; then
   cp /dev/null conftest.c
-  "$LLVMGCC" -emit-llvm -S -o - conftest.c | \
+  $LLVMGCC "$LLVMCC_EMITIR_FLAG" -S -o - conftest.c | \
       grep 'target datalayout =' > /dev/null 2>&1
   if test $? -eq 0 ; then
     llvm_cv_llvmgcc_sanity="yes"
@@ -1386,16 +1525,19 @@ if test -x "$LLVMGCC" ; then
 fi])
 
 dnl Since we have a sane llvm-gcc, identify it and its sub-tools
+dnl Furthermore, add some information about the tools
 if test "$llvm_cv_llvmgcc_sanity" = "yes" ; then
   AC_MSG_CHECKING([llvm-gcc component support])
-  llvmcc1path=`"$LLVMGCC" --print-prog-name=cc1`
+  llvmcc1path=`$LLVMGCC --print-prog-name=cc1`
   AC_SUBST(LLVMCC1,$llvmcc1path)
-  llvmcc1pluspath=`"$LLVMGCC" --print-prog-name=cc1plus`
+  llvmcc1pluspath=`$LLVMGCC --print-prog-name=cc1plus`
   AC_SUBST(LLVMCC1PLUS,$llvmcc1pluspath)
   llvmgccdir=`echo "$llvmcc1path" | sed 's,/libexec/.*,,'`
   AC_SUBST(LLVMGCCDIR,$llvmgccdir)
-  llvmgcclangs=[`"$LLVMGCC" -v --help 2>&1 | grep '^Configured with:' | sed 's/^.*--enable-languages=\([^ ]*\).*/\1/'`]
+  llvmgcclangs=[`$LLVMGCC -v --help 2>&1 | grep '^Configured with:' | sed 's/^.*--enable-languages=\([^ ]*\).*/\1/'`]
   AC_SUBST(LLVMGCC_LANGS,$llvmgcclangs)
+  AC_SUBST(LLVMGCC_DRAGONEGG,$llvm_cv_llvmgcc_dragonegg)
+  AC_SUBST(LLVMCC_DISABLEOPT_FLAGS)
   AC_MSG_RESULT([ok])
 fi
 
@@ -1548,7 +1690,7 @@ AC_CONFIG_FILES([include/llvm/Config/Targets.def])
 AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def])
 AC_CONFIG_FILES([include/llvm/Config/AsmParsers.def])
 AC_CONFIG_FILES([include/llvm/Config/Disassemblers.def])
-AC_CONFIG_HEADERS([include/llvm/System/DataTypes.h])
+AC_CONFIG_HEADERS([include/llvm/Support/DataTypes.h])
 
 dnl Configure the makefile's configuration data
 AC_CONFIG_FILES([Makefile.config])
diff --git a/bindings/ada/analysis/llvm_analysis-binding.ads b/bindings/ada/analysis/llvm_analysis-binding.ads
deleted file mode 100644
index c51a50353f11..000000000000
--- a/bindings/ada/analysis/llvm_analysis-binding.ads
+++ /dev/null
@@ -1,32 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with llvm;
-with Interfaces.C.Strings;
-
-
-package LLVM_Analysis.Binding is
-
-   function LLVMVerifyModule
-     (M          : in llvm.LLVMModuleRef;
-      Action     : in LLVM_Analysis.LLVMVerifierFailureAction;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-   function LLVMVerifyFunction
-     (Fn     : in llvm.LLVMValueRef;
-      Action : in LLVM_Analysis.LLVMVerifierFailureAction)
-      return   Interfaces.C.int;
-
-   procedure LLVMViewFunctionCFG (Fn : in llvm.LLVMValueRef);
-
-   procedure LLVMViewFunctionCFGOnly (Fn : in llvm.LLVMValueRef);
-
-private
-
-   pragma Import (C, LLVMVerifyModule, "Ada_LLVMVerifyModule");
-   pragma Import (C, LLVMVerifyFunction, "Ada_LLVMVerifyFunction");
-   pragma Import (C, LLVMViewFunctionCFG, "Ada_LLVMViewFunctionCFG");
-   pragma Import (C, LLVMViewFunctionCFGOnly, "Ada_LLVMViewFunctionCFGOnly");
-
-end LLVM_Analysis.Binding;
diff --git a/bindings/ada/analysis/llvm_analysis.ads b/bindings/ada/analysis/llvm_analysis.ads
deleted file mode 100644
index aa7b3f0e2e91..000000000000
--- a/bindings/ada/analysis/llvm_analysis.ads
+++ /dev/null
@@ -1,30 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C;
-
-
-package LLVM_Analysis is
-
-   -- LLVMVerifierFailureAction
-   --
-   type LLVMVerifierFailureAction is (
-      LLVMAbortProcessAction,
-      LLVMPrintMessageAction,
-      LLVMReturnStatusAction);
-
-   for LLVMVerifierFailureAction use
-     (LLVMAbortProcessAction => 0,
-      LLVMPrintMessageAction => 1,
-      LLVMReturnStatusAction => 2);
-
-   pragma Convention (C, LLVMVerifierFailureAction);
-
-   type LLVMVerifierFailureAction_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Analysis.LLVMVerifierFailureAction;
-
-   type LLVMVerifierFailureAction_view is access all
-     LLVM_Analysis.LLVMVerifierFailureAction;
-
-end LLVM_Analysis;
diff --git a/bindings/ada/analysis/llvm_analysis_wrap.cxx b/bindings/ada/analysis/llvm_analysis_wrap.cxx
deleted file mode 100644
index f2a8637343de..000000000000
--- a/bindings/ada/analysis/llvm_analysis_wrap.cxx
+++ /dev/null
@@ -1,369 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Analysis (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Analysis(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-#include "llvm-c/Analysis.h"
-//#include "llvm-c/BitReader.h"
-//#include "llvm-c/BitWriter.h"
-//#include "llvm-c/Core.h"
-//#include "llvm-c/ExecutionEngine.h"
-//#include "llvm-c/LinkTimeOptimizer.h"
-//#include "llvm-c/lto.h"
-//#include "llvm-c/Target.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport int SWIGSTDCALL Ada_LLVMVerifyModule (
-  void * jarg1
-  ,
-  
-  int jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMVerifierFailureAction arg2 ;
-  char **arg3 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = (LLVMVerifierFailureAction) jarg2; 
-  
-  arg3 = (char **)jarg3; 
-  
-  result = (int)LLVMVerifyModule(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMVerifyFunction (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMVerifierFailureAction arg2 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMVerifierFailureAction) jarg2; 
-  
-  result = (int)LLVMVerifyFunction(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMViewFunctionCFG (
-  void * jarg1
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  LLVMViewFunctionCFG(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMViewFunctionCFGOnly (
-  void * jarg1
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  LLVMViewFunctionCFGOnly(arg1);
-  
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/bitreader/llvm_bit_reader-binding.ads b/bindings/ada/bitreader/llvm_bit_reader-binding.ads
deleted file mode 100644
index 4fcdb4a84fcf..000000000000
--- a/bindings/ada/bitreader/llvm_bit_reader-binding.ads
+++ /dev/null
@@ -1,52 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with llvm;
-with Interfaces.C.Strings;
-
-
-package LLVM_bit_Reader.Binding is
-
-   function LLVMParseBitcode
-     (MemBuf     : in llvm.LLVMMemoryBufferRef;
-      OutModule  : access llvm.LLVMModuleRef;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-   function LLVMParseBitcodeInContext
-     (MemBuf     : in llvm.LLVMMemoryBufferRef;
-      ContextRef : in llvm.LLVMContextRef;
-      OutModule  : access llvm.LLVMModuleRef;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-   function LLVMGetBitcodeModuleProvider
-     (MemBuf     : in llvm.LLVMMemoryBufferRef;
-      OutMP      : access llvm.LLVMModuleProviderRef;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-   function LLVMGetBitcodeModuleProviderInContext
-     (MemBuf     : in llvm.LLVMMemoryBufferRef;
-      ContextRef : in llvm.LLVMContextRef;
-      OutMP      : access llvm.LLVMModuleProviderRef;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-private
-
-   pragma Import (C, LLVMParseBitcode, "Ada_LLVMParseBitcode");
-   pragma Import
-     (C,
-      LLVMParseBitcodeInContext,
-      "Ada_LLVMParseBitcodeInContext");
-   pragma Import
-     (C,
-      LLVMGetBitcodeModuleProvider,
-      "Ada_LLVMGetBitcodeModuleProvider");
-   pragma Import
-     (C,
-      LLVMGetBitcodeModuleProviderInContext,
-      "Ada_LLVMGetBitcodeModuleProviderInContext");
-
-end LLVM_bit_Reader.Binding;
diff --git a/bindings/ada/bitreader/llvm_bit_reader.ads b/bindings/ada/bitreader/llvm_bit_reader.ads
deleted file mode 100644
index 7579dea2819d..000000000000
--- a/bindings/ada/bitreader/llvm_bit_reader.ads
+++ /dev/null
@@ -1,6 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-package LLVM_bit_Reader is
-
-end LLVM_bit_Reader;
diff --git a/bindings/ada/bitreader/llvm_bitreader_wrap.cxx b/bindings/ada/bitreader/llvm_bitreader_wrap.cxx
deleted file mode 100644
index b7ecbed355af..000000000000
--- a/bindings/ada/bitreader/llvm_bitreader_wrap.cxx
+++ /dev/null
@@ -1,423 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_bit_Reader (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_bit_Reader(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-//#include "llvm-c/Analysis.h"
-#include "llvm-c/BitReader.h"
-//#include "llvm-c/BitWriter.h"
-//#include "llvm-c/Core.h"
-//#include "llvm-c/ExecutionEngine.h"
-//#include "llvm-c/LinkTimeOptimizer.h"
-//#include "llvm-c/lto.h"
-//#include "llvm-c/Target.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport int SWIGSTDCALL Ada_LLVMParseBitcode (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
-  LLVMModuleRef *arg2 = (LLVMModuleRef *) 0 ;
-  char **arg3 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMMemoryBufferRef)jarg1; 
-  
-  arg2 = (LLVMModuleRef *)jarg2; 
-  
-  arg3 = (char **)jarg3; 
-  
-  result = (int)LLVMParseBitcode(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMParseBitcodeInContext (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  )
-{
-  int jresult ;
-  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
-  LLVMContextRef arg2 = (LLVMContextRef) 0 ;
-  LLVMModuleRef *arg3 = (LLVMModuleRef *) 0 ;
-  char **arg4 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMMemoryBufferRef)jarg1; 
-  
-  arg2 = (LLVMContextRef)jarg2; 
-  
-  arg3 = (LLVMModuleRef *)jarg3; 
-  
-  arg4 = (char **)jarg4; 
-  
-  result = (int)LLVMParseBitcodeInContext(arg1,arg2,arg3,arg4);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMGetBitcodeModuleProvider (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
-  LLVMModuleProviderRef *arg2 = (LLVMModuleProviderRef *) 0 ;
-  char **arg3 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMMemoryBufferRef)jarg1; 
-  
-  arg2 = (LLVMModuleProviderRef *)jarg2; 
-  
-  arg3 = (char **)jarg3; 
-  
-  result = (int)LLVMGetBitcodeModuleProvider(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMGetBitcodeModuleProviderInContext (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  )
-{
-  int jresult ;
-  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
-  LLVMContextRef arg2 = (LLVMContextRef) 0 ;
-  LLVMModuleProviderRef *arg3 = (LLVMModuleProviderRef *) 0 ;
-  char **arg4 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMMemoryBufferRef)jarg1; 
-  
-  arg2 = (LLVMContextRef)jarg2; 
-  
-  arg3 = (LLVMModuleProviderRef *)jarg3; 
-  
-  arg4 = (char **)jarg4; 
-  
-  result = (int)LLVMGetBitcodeModuleProviderInContext(arg1,arg2,arg3,arg4);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/bitwriter/llvm_bit_writer-binding.ads b/bindings/ada/bitwriter/llvm_bit_writer-binding.ads
deleted file mode 100644
index b5542df0e062..000000000000
--- a/bindings/ada/bitwriter/llvm_bit_writer-binding.ads
+++ /dev/null
@@ -1,28 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with llvm;
-with Interfaces.C.Strings;
-
-
-package LLVM_bit_Writer.Binding is
-
-   function LLVMWriteBitcodeToFileHandle
-     (M      : in llvm.LLVMModuleRef;
-      Handle : in Interfaces.C.int)
-      return   Interfaces.C.int;
-
-   function LLVMWriteBitcodeToFile
-     (M    : in llvm.LLVMModuleRef;
-      Path : in Interfaces.C.Strings.chars_ptr)
-      return Interfaces.C.int;
-
-private
-
-   pragma Import
-     (C,
-      LLVMWriteBitcodeToFileHandle,
-      "Ada_LLVMWriteBitcodeToFileHandle");
-   pragma Import (C, LLVMWriteBitcodeToFile, "Ada_LLVMWriteBitcodeToFile");
-
-end LLVM_bit_Writer.Binding;
diff --git a/bindings/ada/bitwriter/llvm_bit_writer.ads b/bindings/ada/bitwriter/llvm_bit_writer.ads
deleted file mode 100644
index 35b1f38aa996..000000000000
--- a/bindings/ada/bitwriter/llvm_bit_writer.ads
+++ /dev/null
@@ -1,6 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-package LLVM_bit_Writer is
-
-end LLVM_bit_Writer;
diff --git a/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx b/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx
deleted file mode 100644
index 4abf44fffd5c..000000000000
--- a/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx
+++ /dev/null
@@ -1,335 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_bit_Writer (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_bit_Writer(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-#include "llvm-c/Analysis.h"
-#include "llvm-c/BitReader.h"
-#include "llvm-c/BitWriter.h"
-#include "llvm-c/Core.h"
-#include "llvm-c/ExecutionEngine.h"
-#include "llvm-c/LinkTimeOptimizer.h"
-#include "llvm-c/lto.h"
-#include "llvm-c/Target.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport int SWIGSTDCALL Ada_LLVMWriteBitcodeToFileHandle (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  int jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  int arg2 ;
-  int result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  
-  arg2 = (int) jarg2; 
-  
-  
-  result = (int)LLVMWriteBitcodeToFileHandle(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMWriteBitcodeToFile (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  int jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  int result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (int)LLVMWriteBitcodeToFile(arg1,(char const *)arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/executionengine/llvm_execution_engine-binding.ads b/bindings/ada/executionengine/llvm_execution_engine-binding.ads
deleted file mode 100644
index a37c462cf324..000000000000
--- a/bindings/ada/executionengine/llvm_execution_engine-binding.ads
+++ /dev/null
@@ -1,192 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with llvm;
-with Interfaces.C.Strings;
-
-
-package LLVM_execution_Engine.Binding is
-
-   procedure LLVMLinkInJIT;
-
-   procedure LLVMLinkInInterpreter;
-
-   function LLVMCreateGenericValueOfInt
-     (Ty       : in llvm.LLVMTypeRef;
-      N        : in Interfaces.C.Extensions.unsigned_long_long;
-      IsSigned : in Interfaces.C.int)
-      return     LLVM_execution_Engine.LLVMGenericValueRef;
-
-   function LLVMCreateGenericValueOfPointer
-     (P    : access Interfaces.C.Extensions.void)
-      return LLVM_execution_Engine.LLVMGenericValueRef;
-
-   function LLVMCreateGenericValueOfFloat
-     (Ty   : in llvm.LLVMTypeRef;
-      N    : in Interfaces.C.double)
-      return LLVM_execution_Engine.LLVMGenericValueRef;
-
-   function LLVMGenericValueIntWidth
-     (GenValRef : in LLVM_execution_Engine.LLVMGenericValueRef)
-      return      Interfaces.C.unsigned;
-
-   function LLVMGenericValueToInt
-     (GenVal   : in LLVM_execution_Engine.LLVMGenericValueRef;
-      IsSigned : in Interfaces.C.int)
-      return     Interfaces.C.Extensions.unsigned_long_long;
-
-   function LLVMGenericValueToPointer
-     (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef)
-      return   access Interfaces.C.Extensions.void;
-
-   function LLVMGenericValueToFloat
-     (TyRef  : in llvm.LLVMTypeRef;
-      GenVal : in LLVM_execution_Engine.LLVMGenericValueRef)
-      return   Interfaces.C.double;
-
-   procedure LLVMDisposeGenericValue
-     (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef);
-
-   function LLVMCreateExecutionEngine
-     (OutEE    : access LLVM_execution_Engine.LLVMExecutionEngineRef;
-      MP       : in llvm.LLVMModuleProviderRef;
-      OutError : access Interfaces.C.Strings.chars_ptr)
-      return     Interfaces.C.int;
-
-   function LLVMCreateInterpreter
-     (OutInterp : access LLVM_execution_Engine.LLVMExecutionEngineRef;
-      MP        : in llvm.LLVMModuleProviderRef;
-      OutError  : access Interfaces.C.Strings.chars_ptr)
-      return      Interfaces.C.int;
-
-   function LLVMCreateJITCompiler
-     (OutJIT   : access LLVM_execution_Engine.LLVMExecutionEngineRef;
-      MP       : in llvm.LLVMModuleProviderRef;
-      OptLevel : in Interfaces.C.unsigned;
-      OutError : access Interfaces.C.Strings.chars_ptr)
-      return     Interfaces.C.int;
-
-   procedure LLVMDisposeExecutionEngine
-     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef);
-
-   procedure LLVMRunStaticConstructors
-     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef);
-
-   procedure LLVMRunStaticDestructors
-     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef);
-
-   function LLVMRunFunctionAsMain
-     (EE   : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      F    : in llvm.LLVMValueRef;
-      ArgC : in Interfaces.C.unsigned;
-      ArgV : access Interfaces.C.Strings.chars_ptr;
-      EnvP : access Interfaces.C.Strings.chars_ptr)
-      return Interfaces.C.int;
-
-   function LLVMRunFunction
-     (EE      : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      F       : in llvm.LLVMValueRef;
-      NumArgs : in Interfaces.C.unsigned;
-      Args    : access LLVM_execution_Engine.LLVMGenericValueRef)
-      return    LLVM_execution_Engine.LLVMGenericValueRef;
-
-   procedure LLVMFreeMachineCodeForFunction
-     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      F  : in llvm.LLVMValueRef);
-
-   procedure LLVMAddModuleProvider
-     (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      MP : in llvm.LLVMModuleProviderRef);
-
-   function LLVMRemoveModuleProvider
-     (EE       : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      MP       : in llvm.LLVMModuleProviderRef;
-      OutMod   : access llvm.LLVMModuleRef;
-      OutError : access Interfaces.C.Strings.chars_ptr)
-      return     Interfaces.C.int;
-
-   function LLVMFindFunction
-     (EE    : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      Name  : in Interfaces.C.Strings.chars_ptr;
-      OutFn : access llvm.LLVMValueRef)
-      return  Interfaces.C.int;
-
-   function LLVMGetExecutionEngineTargetData
-     (EE   : in LLVM_execution_Engine.LLVMExecutionEngineRef)
-      return LLVM_execution_Engine.LLVMTargetDataRef;
-
-   procedure LLVMAddGlobalMapping
-     (EE     : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      Global : in llvm.LLVMValueRef;
-      Addr   : access Interfaces.C.Extensions.void);
-
-   function LLVMGetPointerToGlobal
-     (EE     : in LLVM_execution_Engine.LLVMExecutionEngineRef;
-      Global : in llvm.LLVMValueRef)
-      return   access Interfaces.C.Extensions.void;
-
-private
-
-   pragma Import (C, LLVMLinkInJIT, "Ada_LLVMLinkInJIT");
-   pragma Import (C, LLVMLinkInInterpreter, "Ada_LLVMLinkInInterpreter");
-   pragma Import
-     (C,
-      LLVMCreateGenericValueOfInt,
-      "Ada_LLVMCreateGenericValueOfInt");
-   pragma Import
-     (C,
-      LLVMCreateGenericValueOfPointer,
-      "Ada_LLVMCreateGenericValueOfPointer");
-   pragma Import
-     (C,
-      LLVMCreateGenericValueOfFloat,
-      "Ada_LLVMCreateGenericValueOfFloat");
-   pragma Import
-     (C,
-      LLVMGenericValueIntWidth,
-      "Ada_LLVMGenericValueIntWidth");
-   pragma Import (C, LLVMGenericValueToInt, "Ada_LLVMGenericValueToInt");
-   pragma Import
-     (C,
-      LLVMGenericValueToPointer,
-      "Ada_LLVMGenericValueToPointer");
-   pragma Import (C, LLVMGenericValueToFloat, "Ada_LLVMGenericValueToFloat");
-   pragma Import (C, LLVMDisposeGenericValue, "Ada_LLVMDisposeGenericValue");
-   pragma Import
-     (C,
-      LLVMCreateExecutionEngine,
-      "Ada_LLVMCreateExecutionEngine");
-   pragma Import (C, LLVMCreateInterpreter, "Ada_LLVMCreateInterpreter");
-   pragma Import (C, LLVMCreateJITCompiler, "Ada_LLVMCreateJITCompiler");
-   pragma Import
-     (C,
-      LLVMDisposeExecutionEngine,
-      "Ada_LLVMDisposeExecutionEngine");
-   pragma Import
-     (C,
-      LLVMRunStaticConstructors,
-      "Ada_LLVMRunStaticConstructors");
-   pragma Import
-     (C,
-      LLVMRunStaticDestructors,
-      "Ada_LLVMRunStaticDestructors");
-   pragma Import (C, LLVMRunFunctionAsMain, "Ada_LLVMRunFunctionAsMain");
-   pragma Import (C, LLVMRunFunction, "Ada_LLVMRunFunction");
-   pragma Import
-     (C,
-      LLVMFreeMachineCodeForFunction,
-      "Ada_LLVMFreeMachineCodeForFunction");
-   pragma Import (C, LLVMAddModuleProvider, "Ada_LLVMAddModuleProvider");
-   pragma Import
-     (C,
-      LLVMRemoveModuleProvider,
-      "Ada_LLVMRemoveModuleProvider");
-   pragma Import (C, LLVMFindFunction, "Ada_LLVMFindFunction");
-   pragma Import
-     (C,
-      LLVMGetExecutionEngineTargetData,
-      "Ada_LLVMGetExecutionEngineTargetData");
-   pragma Import (C, LLVMAddGlobalMapping, "Ada_LLVMAddGlobalMapping");
-   pragma Import (C, LLVMGetPointerToGlobal, "Ada_LLVMGetPointerToGlobal");
-
-end LLVM_execution_Engine.Binding;
diff --git a/bindings/ada/executionengine/llvm_execution_engine.ads b/bindings/ada/executionengine/llvm_execution_engine.ads
deleted file mode 100644
index c7669920f7ac..000000000000
--- a/bindings/ada/executionengine/llvm_execution_engine.ads
+++ /dev/null
@@ -1,90 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C.Extensions;
-
-
-package LLVM_execution_Engine is
-
-   -- LLVMOpaqueGenericValue
-   --
-   type LLVMOpaqueGenericValue is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueGenericValue_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.LLVMOpaqueGenericValue;
-
-   type LLVMOpaqueGenericValue_view is access all
-     LLVM_execution_Engine.LLVMOpaqueGenericValue;
-
-   -- LLVMGenericValueRef
-   --
-   type LLVMGenericValueRef is access all
-     LLVM_execution_Engine.LLVMOpaqueGenericValue;
-
-   type LLVMGenericValueRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.LLVMGenericValueRef;
-
-   type LLVMGenericValueRef_view is access all
-     LLVM_execution_Engine.LLVMGenericValueRef;
-
-   -- LLVMOpaqueExecutionEngine
-   --
-   type LLVMOpaqueExecutionEngine is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueExecutionEngine_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.LLVMOpaqueExecutionEngine;
-
-   type LLVMOpaqueExecutionEngine_view is access all
-     LLVM_execution_Engine.LLVMOpaqueExecutionEngine;
-
-   -- LLVMExecutionEngineRef
-   --
-   type LLVMExecutionEngineRef is access all
-     LLVM_execution_Engine.LLVMOpaqueExecutionEngine;
-
-   type LLVMExecutionEngineRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.LLVMExecutionEngineRef;
-
-   type LLVMExecutionEngineRef_view is access all
-     LLVM_execution_Engine.LLVMExecutionEngineRef;
-
-   -- LLVMTargetDataRef
-   --
-   type LLVMTargetDataRef is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMTargetDataRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.LLVMTargetDataRef;
-
-   type LLVMTargetDataRef_view is access all
-     LLVM_execution_Engine.LLVMTargetDataRef;
-
-   -- GenericValue
-   --
-   type GenericValue is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type GenericValue_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.GenericValue;
-
-   type GenericValue_view is access all LLVM_execution_Engine.GenericValue;
-
-   -- ExecutionEngine
-   --
-   type ExecutionEngine is new Interfaces.C.Extensions.incomplete_class_def;
-
-   type ExecutionEngine_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_execution_Engine.ExecutionEngine;
-
-   type ExecutionEngine_view is access all
-     LLVM_execution_Engine.ExecutionEngine;
-
-
-end LLVM_execution_Engine;
diff --git a/bindings/ada/executionengine/llvm_executionengine_wrap.cxx b/bindings/ada/executionengine/llvm_executionengine_wrap.cxx
deleted file mode 100644
index b63acacb361f..000000000000
--- a/bindings/ada/executionengine/llvm_executionengine_wrap.cxx
+++ /dev/null
@@ -1,924 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_execution_Engine (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_execution_Engine(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-#include "llvm-c/ExecutionEngine.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport void SWIGSTDCALL Ada_LLVMLinkInJIT (
-  )
-{
-  LLVMLinkInJIT();
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMLinkInInterpreter (
-  )
-{
-  LLVMLinkInInterpreter();
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfInt (
-  void * jarg1
-  ,
-  
-  unsigned long long jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned long long arg2 ;
-  int arg3 ;
-  LLVMGenericValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (unsigned long long) jarg2; 
-  
-  
-  
-  arg3 = (int) jarg3; 
-  
-  
-  result = (LLVMGenericValueRef)LLVMCreateGenericValueOfInt(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfPointer (
-  void* jarg1
-  )
-{
-  void * jresult ;
-  void *arg1 = (void *) 0 ;
-  LLVMGenericValueRef result;
-  
-  arg1 = (void *)jarg1; 
-  
-  result = (LLVMGenericValueRef)LLVMCreateGenericValueOfPointer(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfFloat (
-  void * jarg1
-  ,
-  
-  double jarg2
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  double arg2 ;
-  LLVMGenericValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (double) jarg2; 
-  
-  
-  result = (LLVMGenericValueRef)LLVMCreateGenericValueOfFloat(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGenericValueIntWidth (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMGenericValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMGenericValueIntWidth(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned long long SWIGSTDCALL Ada_LLVMGenericValueToInt (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  unsigned long long jresult ;
-  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
-  int arg2 ;
-  unsigned long long result;
-  
-  arg1 = (LLVMGenericValueRef)jarg1; 
-  
-  
-  arg2 = (int) jarg2; 
-  
-  
-  result = (unsigned long long)LLVMGenericValueToInt(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void* SWIGSTDCALL Ada_LLVMGenericValueToPointer (
-  void * jarg1
-  )
-{
-  void* jresult ;
-  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
-  void *result = 0 ;
-  
-  arg1 = (LLVMGenericValueRef)jarg1; 
-  
-  result = (void *)LLVMGenericValueToPointer(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport double SWIGSTDCALL Ada_LLVMGenericValueToFloat (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  double jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMGenericValueRef arg2 = (LLVMGenericValueRef) 0 ;
-  double result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = (LLVMGenericValueRef)jarg2; 
-  
-  result = (double)LLVMGenericValueToFloat(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeGenericValue (
-  void * jarg1
-  )
-{
-  LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ;
-  
-  arg1 = (LLVMGenericValueRef)jarg1; 
-  
-  LLVMDisposeGenericValue(arg1);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMCreateExecutionEngine (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ;
-  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
-  char **arg3 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMExecutionEngineRef *)jarg1; 
-  
-  arg2 = (LLVMModuleProviderRef)jarg2; 
-  
-  arg3 = (char **)jarg3; 
-  
-  result = (int)LLVMCreateExecutionEngine(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMCreateInterpreter (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ;
-  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
-  char **arg3 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMExecutionEngineRef *)jarg1; 
-  
-  arg2 = (LLVMModuleProviderRef)jarg2; 
-  
-  arg3 = (char **)jarg3; 
-  
-  result = (int)LLVMCreateInterpreter(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMCreateJITCompiler (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  ,
-  
-  void * jarg4
-  )
-{
-  int jresult ;
-  LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ;
-  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
-  unsigned int arg3 ;
-  char **arg4 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMExecutionEngineRef *)jarg1; 
-  
-  arg2 = (LLVMModuleProviderRef)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  arg4 = (char **)jarg4; 
-  
-  result = (int)LLVMCreateJITCompiler(arg1,arg2,arg3,arg4);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeExecutionEngine (
-  void * jarg1
-  )
-{
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  LLVMDisposeExecutionEngine(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMRunStaticConstructors (
-  void * jarg1
-  )
-{
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  LLVMRunStaticConstructors(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMRunStaticDestructors (
-  void * jarg1
-  )
-{
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  LLVMRunStaticDestructors(arg1);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMRunFunctionAsMain (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  ,
-  
-  void * jarg4
-  ,
-  
-  void * jarg5
-  )
-{
-  int jresult ;
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  unsigned int arg3 ;
-  char **arg4 = (char **) 0 ;
-  char **arg5 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  arg4 = (char **)jarg4; 
-  
-  arg5 = (char **)jarg5; 
-  
-  result = (int)LLVMRunFunctionAsMain(arg1,arg2,arg3,(char const *const *)arg4,(char const *const *)arg5);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMRunFunction (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  ,
-  
-  void * jarg4
-  )
-{
-  void * jresult ;
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  unsigned int arg3 ;
-  LLVMGenericValueRef *arg4 = (LLVMGenericValueRef *) 0 ;
-  LLVMGenericValueRef result;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  arg4 = (LLVMGenericValueRef *)jarg4; 
-  
-  result = (LLVMGenericValueRef)LLVMRunFunction(arg1,arg2,arg3,arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMFreeMachineCodeForFunction (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  LLVMFreeMachineCodeForFunction(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddModuleProvider (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMModuleProviderRef)jarg2; 
-  
-  LLVMAddModuleProvider(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMRemoveModuleProvider (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  )
-{
-  int jresult ;
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ;
-  LLVMModuleRef *arg3 = (LLVMModuleRef *) 0 ;
-  char **arg4 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMModuleProviderRef)jarg2; 
-  
-  arg3 = (LLVMModuleRef *)jarg3; 
-  
-  arg4 = (char **)jarg4; 
-  
-  result = (int)LLVMRemoveModuleProvider(arg1,arg2,arg3,arg4);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMFindFunction (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
-  int result;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  arg3 = (LLVMValueRef *)jarg3; 
-  
-  result = (int)LLVMFindFunction(arg1,(char const *)arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport LLVMTargetDataRef SWIGSTDCALL Ada_LLVMGetExecutionEngineTargetData (
-  void * jarg1
-  )
-{
-  LLVMTargetDataRef jresult ;
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMTargetDataRef result;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  result = LLVMGetExecutionEngineTargetData(arg1);
-  
-  jresult = result; 
-  //jresult = new LLVMTargetDataRef ((LLVMTargetDataRef &) result); 
-  
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddGlobalMapping (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void* jarg3
-  )
-{
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  void *arg3 = (void *) 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (void *)jarg3; 
-  
-  LLVMAddGlobalMapping(arg1,arg2,arg3);
-  
-  
-}
-
-
-
-DllExport void* SWIGSTDCALL Ada_LLVMGetPointerToGlobal (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void* jresult ;
-  LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  void *result = 0 ;
-  
-  arg1 = (LLVMExecutionEngineRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (void *)LLVMGetPointerToGlobal(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/llvm.gpr b/bindings/ada/llvm.gpr
deleted file mode 100644
index 8e87af4fa12e..000000000000
--- a/bindings/ada/llvm.gpr
+++ /dev/null
@@ -1,34 +0,0 @@
-project LLVM is
-
-   for Languages use ("Ada", "C++");
-   for Source_Dirs use (".", "analysis", "bitreader", "bitwriter", "executionengine", "llvm", "target", "transforms");
-   for Object_Dir use "build";
-   for Exec_Dir use ".";
-   for Library_Name use "llvm_ada";
-   for Library_Dir use "lib";
-   for Library_Ali_Dir use "objects";
-
-   package Naming is
-      for Specification_Suffix ("c++") use ".h";
-      for Implementation_Suffix ("c++") use ".cxx";
-   end Naming;
-
-   package Builder is
-      for Default_Switches ("ada") use ("-g");
-   end Builder;
-
-   package Compiler is
-      for Default_Switches ("ada") use ("-gnato", "-fstack-check", "-g", "-gnata", "-gnat05", "-I/usr/local/include");
-      for Default_Switches ("c++") use ("-D__STDC_LIMIT_MACROS", "-D__STDC_CONSTANT_MACROS", "-I../../include", "-g");
-   end Compiler;
-
-   package Binder is
-      for Default_Switches ("ada") use ("-E");
-   end Binder;
-
-   package Linker is
-      for Default_Switches ("c++") use ("-g");
-   end Linker;
-
-end LLVM;
-
diff --git a/bindings/ada/llvm/llvm-binding.ads b/bindings/ada/llvm/llvm-binding.ads
deleted file mode 100644
index c0e48a1b5bf3..000000000000
--- a/bindings/ada/llvm/llvm-binding.ads
+++ /dev/null
@@ -1,1974 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C.Strings;
-
-
-package llvm.Binding is
-
-   procedure LLVMDisposeMessage
-     (Message : in Interfaces.C.Strings.chars_ptr);
-
-   function LLVMContextCreate return  llvm.LLVMContextRef;
-
-   function LLVMGetGlobalContext return  llvm.LLVMContextRef;
-
-   procedure LLVMContextDispose (C : in llvm.LLVMContextRef);
-
-   function LLVMModuleCreateWithName
-     (ModuleID : in Interfaces.C.Strings.chars_ptr)
-      return     llvm.LLVMModuleRef;
-
-   function LLVMModuleCreateWithNameInContext
-     (ModuleID : in Interfaces.C.Strings.chars_ptr;
-      C        : in llvm.LLVMContextRef)
-      return     llvm.LLVMModuleRef;
-
-   procedure LLVMDisposeModule (M : in llvm.LLVMModuleRef);
-
-   function LLVMGetDataLayout
-     (M    : in llvm.LLVMModuleRef)
-      return Interfaces.C.Strings.chars_ptr;
-
-   procedure LLVMSetDataLayout
-     (M      : in llvm.LLVMModuleRef;
-      Triple : in Interfaces.C.Strings.chars_ptr);
-
-   function LLVMGetTarget
-     (M    : in llvm.LLVMModuleRef)
-      return Interfaces.C.Strings.chars_ptr;
-
-   procedure LLVMSetTarget
-     (M      : in llvm.LLVMModuleRef;
-      Triple : in Interfaces.C.Strings.chars_ptr);
-
-   function LLVMAddTypeName
-     (M    : in llvm.LLVMModuleRef;
-      Name : in Interfaces.C.Strings.chars_ptr;
-      Ty   : in llvm.LLVMTypeRef)
-      return Interfaces.C.int;
-
-   procedure LLVMDeleteTypeName
-     (M    : in llvm.LLVMModuleRef;
-      Name : in Interfaces.C.Strings.chars_ptr);
-
-   function LLVMGetTypeByName
-     (M    : in llvm.LLVMModuleRef;
-      Name : in Interfaces.C.Strings.chars_ptr)
-      return llvm.LLVMTypeRef;
-
-   procedure LLVMDumpModule (M : in llvm.LLVMModuleRef);
-
-   function LLVMGetTypeKind
-     (Ty   : in llvm.LLVMTypeRef)
-      return llvm.LLVMTypeKind;
-
-   function LLVMInt1Type return  llvm.LLVMTypeRef;
-
-   function LLVMInt8Type return  llvm.LLVMTypeRef;
-
-   function LLVMInt16Type return  llvm.LLVMTypeRef;
-
-   function LLVMInt32Type return  llvm.LLVMTypeRef;
-
-   function LLVMInt64Type return  llvm.LLVMTypeRef;
-
-   function LLVMIntType
-     (NumBits : in Interfaces.C.unsigned)
-      return    llvm.LLVMTypeRef;
-
-   function LLVMGetIntTypeWidth
-     (IntegerTy : in llvm.LLVMTypeRef)
-      return      Interfaces.C.unsigned;
-
-   function LLVMFloatType return  llvm.LLVMTypeRef;
-
-   function LLVMDoubleType return  llvm.LLVMTypeRef;
-
-   function LLVMX86FP80Type return  llvm.LLVMTypeRef;
-
-   function LLVMFP128Type return  llvm.LLVMTypeRef;
-
-   function LLVMPPCFP128Type return  llvm.LLVMTypeRef;
-
-   function LLVMFunctionType
-     (ReturnType : in llvm.LLVMTypeRef;
-      ParamTypes : access llvm.LLVMTypeRef;
-      ParamCount : in Interfaces.C.unsigned;
-      IsVarArg   : in Interfaces.C.int)
-      return       llvm.LLVMTypeRef;
-
-   function LLVMIsFunctionVarArg
-     (FunctionTy : in llvm.LLVMTypeRef)
-      return       Interfaces.C.int;
-
-   function LLVMGetReturnType
-     (FunctionTy : in llvm.LLVMTypeRef)
-      return       llvm.LLVMTypeRef;
-
-   function LLVMCountParamTypes
-     (FunctionTy : in llvm.LLVMTypeRef)
-      return       Interfaces.C.unsigned;
-
-   procedure LLVMGetParamTypes
-     (FunctionTy : in llvm.LLVMTypeRef;
-      Dest       : access llvm.LLVMTypeRef);
-
-   function LLVMStructType
-     (ElementTypes : access llvm.LLVMTypeRef;
-      ElementCount : in Interfaces.C.unsigned;
-      Packed       : in Interfaces.C.int)
-      return         llvm.LLVMTypeRef;
-
-   function LLVMCountStructElementTypes
-     (StructTy : in llvm.LLVMTypeRef)
-      return     Interfaces.C.unsigned;
-
-   procedure LLVMGetStructElementTypes
-     (StructTy : in llvm.LLVMTypeRef;
-      Dest     : access llvm.LLVMTypeRef);
-
-   function LLVMIsPackedStruct
-     (StructTy : in llvm.LLVMTypeRef)
-      return     Interfaces.C.int;
-
-   function LLVMArrayType
-     (ElementType  : in llvm.LLVMTypeRef;
-      ElementCount : in Interfaces.C.unsigned)
-      return         llvm.LLVMTypeRef;
-
-   function LLVMPointerType
-     (ElementType  : in llvm.LLVMTypeRef;
-      AddressSpace : in Interfaces.C.unsigned)
-      return         llvm.LLVMTypeRef;
-
-   function LLVMVectorType
-     (ElementType  : in llvm.LLVMTypeRef;
-      ElementCount : in Interfaces.C.unsigned)
-      return         llvm.LLVMTypeRef;
-
-   function LLVMGetElementType
-     (Ty   : in llvm.LLVMTypeRef)
-      return llvm.LLVMTypeRef;
-
-   function LLVMGetArrayLength
-     (ArrayTy : in llvm.LLVMTypeRef)
-      return    Interfaces.C.unsigned;
-
-   function LLVMGetPointerAddressSpace
-     (PointerTy : in llvm.LLVMTypeRef)
-      return      Interfaces.C.unsigned;
-
-   function LLVMGetVectorSize
-     (VectorTy : in llvm.LLVMTypeRef)
-      return     Interfaces.C.unsigned;
-
-   function LLVMVoidType return  llvm.LLVMTypeRef;
-
-   function LLVMLabelType return  llvm.LLVMTypeRef;
-
-   function LLVMOpaqueType return  llvm.LLVMTypeRef;
-
-   function LLVMCreateTypeHandle
-     (PotentiallyAbstractTy : in llvm.LLVMTypeRef)
-      return                  llvm.LLVMTypeHandleRef;
-
-   procedure LLVMRefineType
-     (AbstractTy : in llvm.LLVMTypeRef;
-      ConcreteTy : in llvm.LLVMTypeRef);
-
-   function LLVMResolveTypeHandle
-     (TypeHandle : in llvm.LLVMTypeHandleRef)
-      return       llvm.LLVMTypeRef;
-
-   procedure LLVMDisposeTypeHandle (TypeHandle : in llvm.LLVMTypeHandleRef);
-
-   function LLVMTypeOf (Val : in llvm.LLVMValueRef) return llvm.LLVMTypeRef;
-
-   function LLVMGetValueName
-     (Val  : in llvm.LLVMValueRef)
-      return Interfaces.C.Strings.chars_ptr;
-
-   procedure LLVMSetValueName
-     (Val  : in llvm.LLVMValueRef;
-      Name : in Interfaces.C.Strings.chars_ptr);
-
-   procedure LLVMDumpValue (Val : in llvm.LLVMValueRef);
-
-   function LLVMIsAArgument
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsABasicBlock
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAInlineAsm
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAUser
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstant
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantAggregateZero
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantArray
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantExpr
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantFP
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantInt
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantPointerNull
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantStruct
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAConstantVector
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAGlobalValue
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFunction
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAGlobalAlias
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAGlobalVariable
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAUndefValue
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAInstruction
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsABinaryOperator
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsACallInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAIntrinsicInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsADbgInfoIntrinsic
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsADbgDeclareInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsADbgFuncStartInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsADbgRegionEndInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsADbgRegionStartInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsADbgStopPointInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAEHSelectorInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAMemIntrinsic
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAMemCpyInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAMemMoveInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAMemSetInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsACmpInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFCmpInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAICmpInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAExtractElementInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAGetElementPtrInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAInsertElementInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAInsertValueInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAPHINode
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsASelectInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAShuffleVectorInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAStoreInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsATerminatorInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsABranchInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAInvokeInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAReturnInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsASwitchInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAUnreachableInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAUnwindInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAUnaryInstruction
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAAllocationInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAAllocaInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAMallocInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsACastInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsABitCastInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFPExtInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFPToSIInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFPToUIInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFPTruncInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAIntToPtrInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAPtrToIntInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsASExtInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsASIToFPInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsATruncInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAUIToFPInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAZExtInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAExtractValueInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAFreeInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsALoadInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsAVAArgInst
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMConstNull
-     (Ty   : in llvm.LLVMTypeRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMConstAllOnes
-     (Ty   : in llvm.LLVMTypeRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetUndef
-     (Ty   : in llvm.LLVMTypeRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMIsConstant
-     (Val  : in llvm.LLVMValueRef)
-      return Interfaces.C.int;
-
-   function LLVMIsNull (Val : in llvm.LLVMValueRef) return Interfaces.C.int;
-
-   function LLVMIsUndef
-     (Val  : in llvm.LLVMValueRef)
-      return Interfaces.C.int;
-
-   function LLVMConstPointerNull
-     (Ty   : in llvm.LLVMTypeRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMConstInt
-     (IntTy      : in llvm.LLVMTypeRef;
-      N          : in Interfaces.C.Extensions.unsigned_long_long;
-      SignExtend : in Interfaces.C.int)
-      return       llvm.LLVMValueRef;
-
-   function LLVMConstReal
-     (RealTy : in llvm.LLVMTypeRef;
-      N      : in Interfaces.C.double)
-      return   llvm.LLVMValueRef;
-
-   function LLVMConstRealOfString
-     (RealTy : in llvm.LLVMTypeRef;
-      Text   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMConstString
-     (Str               : in Interfaces.C.Strings.chars_ptr;
-      Length            : in Interfaces.C.unsigned;
-      DontNullTerminate : in Interfaces.C.int)
-      return              llvm.LLVMValueRef;
-
-   function LLVMConstArray
-     (ElementTy    : in llvm.LLVMTypeRef;
-      ConstantVals : access llvm.LLVMValueRef;
-      Length       : in Interfaces.C.unsigned)
-      return         llvm.LLVMValueRef;
-
-   function LLVMConstStruct
-     (ConstantVals : access llvm.LLVMValueRef;
-      Count        : in Interfaces.C.unsigned;
-      packed       : in Interfaces.C.int)
-      return         llvm.LLVMValueRef;
-
-   function LLVMConstVector
-     (ScalarConstantVals : access llvm.LLVMValueRef;
-      Size               : in Interfaces.C.unsigned)
-      return               llvm.LLVMValueRef;
-
-   function LLVMSizeOf (Ty : in llvm.LLVMTypeRef) return llvm.LLVMValueRef;
-
-   function LLVMConstNeg
-     (ConstantVal : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstNot
-     (ConstantVal : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstAdd
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstSub
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstMul
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstUDiv
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstSDiv
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFDiv
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstURem
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstSRem
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFRem
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstAnd
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstOr
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstXor
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstICmp
-     (Predicate   : in llvm.LLVMIntPredicate;
-      LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFCmp
-     (Predicate   : in llvm.LLVMRealPredicate;
-      LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstShl
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstLShr
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstAShr
-     (LHSConstant : in llvm.LLVMValueRef;
-      RHSConstant : in llvm.LLVMValueRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstGEP
-     (ConstantVal     : in llvm.LLVMValueRef;
-      ConstantIndices : access llvm.LLVMValueRef;
-      NumIndices      : in Interfaces.C.unsigned)
-      return            llvm.LLVMValueRef;
-
-   function LLVMConstTrunc
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstSExt
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstZExt
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFPTrunc
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFPExt
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstUIToFP
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstSIToFP
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFPToUI
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstFPToSI
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstPtrToInt
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstIntToPtr
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstBitCast
-     (ConstantVal : in llvm.LLVMValueRef;
-      ToType      : in llvm.LLVMTypeRef)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstSelect
-     (ConstantCondition : in llvm.LLVMValueRef;
-      ConstantIfTrue    : in llvm.LLVMValueRef;
-      ConstantIfFalse   : in llvm.LLVMValueRef)
-      return              llvm.LLVMValueRef;
-
-   function LLVMConstExtractElement
-     (VectorConstant : in llvm.LLVMValueRef;
-      IndexConstant  : in llvm.LLVMValueRef)
-      return           llvm.LLVMValueRef;
-
-   function LLVMConstInsertElement
-     (VectorConstant       : in llvm.LLVMValueRef;
-      ElementValueConstant : in llvm.LLVMValueRef;
-      IndexConstant        : in llvm.LLVMValueRef)
-      return                 llvm.LLVMValueRef;
-
-   function LLVMConstShuffleVector
-     (VectorAConstant : in llvm.LLVMValueRef;
-      VectorBConstant : in llvm.LLVMValueRef;
-      MaskConstant    : in llvm.LLVMValueRef)
-      return            llvm.LLVMValueRef;
-
-   function LLVMConstExtractValue
-     (AggConstant : in llvm.LLVMValueRef;
-      IdxList     : access Interfaces.C.unsigned;
-      NumIdx      : in Interfaces.C.unsigned)
-      return        llvm.LLVMValueRef;
-
-   function LLVMConstInsertValue
-     (AggConstant          : in llvm.LLVMValueRef;
-      ElementValueConstant : in llvm.LLVMValueRef;
-      IdxList              : access Interfaces.C.unsigned;
-      NumIdx               : in Interfaces.C.unsigned)
-      return                 llvm.LLVMValueRef;
-
-   function LLVMConstInlineAsm
-     (Ty             : in llvm.LLVMTypeRef;
-      AsmString      : in Interfaces.C.Strings.chars_ptr;
-      Constraints    : in Interfaces.C.Strings.chars_ptr;
-      HasSideEffects : in Interfaces.C.int)
-      return           llvm.LLVMValueRef;
-
-   function LLVMGetGlobalParent
-     (Global : in llvm.LLVMValueRef)
-      return   llvm.LLVMModuleRef;
-
-   function LLVMIsDeclaration
-     (Global : in llvm.LLVMValueRef)
-      return   Interfaces.C.int;
-
-   function LLVMGetLinkage
-     (Global : in llvm.LLVMValueRef)
-      return   llvm.LLVMLinkage;
-
-   procedure LLVMSetLinkage
-     (Global  : in llvm.LLVMValueRef;
-      Linkage : in llvm.LLVMLinkage);
-
-   function LLVMGetSection
-     (Global : in llvm.LLVMValueRef)
-      return   Interfaces.C.Strings.chars_ptr;
-
-   procedure LLVMSetSection
-     (Global  : in llvm.LLVMValueRef;
-      Section : in Interfaces.C.Strings.chars_ptr);
-
-   function LLVMGetVisibility
-     (Global : in llvm.LLVMValueRef)
-      return   llvm.LLVMVisibility;
-
-   procedure LLVMSetVisibility
-     (Global : in llvm.LLVMValueRef;
-      Viz    : in llvm.LLVMVisibility);
-
-   function LLVMGetAlignment
-     (Global : in llvm.LLVMValueRef)
-      return   Interfaces.C.unsigned;
-
-   procedure LLVMSetAlignment
-     (Global : in llvm.LLVMValueRef;
-      Bytes  : in Interfaces.C.unsigned);
-
-   function LLVMAddGlobal
-     (M    : in llvm.LLVMModuleRef;
-      Ty   : in llvm.LLVMTypeRef;
-      Name : in Interfaces.C.Strings.chars_ptr)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetNamedGlobal
-     (M    : in llvm.LLVMModuleRef;
-      Name : in Interfaces.C.Strings.chars_ptr)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetFirstGlobal
-     (M    : in llvm.LLVMModuleRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetLastGlobal
-     (M    : in llvm.LLVMModuleRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetNextGlobal
-     (GlobalVar : in llvm.LLVMValueRef)
-      return      llvm.LLVMValueRef;
-
-   function LLVMGetPreviousGlobal
-     (GlobalVar : in llvm.LLVMValueRef)
-      return      llvm.LLVMValueRef;
-
-   procedure LLVMDeleteGlobal (GlobalVar : in llvm.LLVMValueRef);
-
-   function LLVMGetInitializer
-     (GlobalVar : in llvm.LLVMValueRef)
-      return      llvm.LLVMValueRef;
-
-   procedure LLVMSetInitializer
-     (GlobalVar   : in llvm.LLVMValueRef;
-      ConstantVal : in llvm.LLVMValueRef);
-
-   function LLVMIsThreadLocal
-     (GlobalVar : in llvm.LLVMValueRef)
-      return      Interfaces.C.int;
-
-   procedure LLVMSetThreadLocal
-     (GlobalVar     : in llvm.LLVMValueRef;
-      IsThreadLocal : in Interfaces.C.int);
-
-   function LLVMIsGlobalConstant
-     (GlobalVar : in llvm.LLVMValueRef)
-      return      Interfaces.C.int;
-
-   procedure LLVMSetGlobalConstant
-     (GlobalVar  : in llvm.LLVMValueRef;
-      IsConstant : in Interfaces.C.int);
-
-   function LLVMAddAlias
-     (M       : in llvm.LLVMModuleRef;
-      Ty      : in llvm.LLVMTypeRef;
-      Aliasee : in llvm.LLVMValueRef;
-      Name    : in Interfaces.C.Strings.chars_ptr)
-      return    llvm.LLVMValueRef;
-
-   function LLVMAddFunction
-     (M          : in llvm.LLVMModuleRef;
-      Name       : in Interfaces.C.Strings.chars_ptr;
-      FunctionTy : in llvm.LLVMTypeRef)
-      return       llvm.LLVMValueRef;
-
-   function LLVMGetNamedFunction
-     (M    : in llvm.LLVMModuleRef;
-      Name : in Interfaces.C.Strings.chars_ptr)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetFirstFunction
-     (M    : in llvm.LLVMModuleRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetLastFunction
-     (M    : in llvm.LLVMModuleRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetNextFunction
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetPreviousFunction
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   procedure LLVMDeleteFunction (Fn : in llvm.LLVMValueRef);
-
-   function LLVMGetIntrinsicID
-     (Fn   : in llvm.LLVMValueRef)
-      return Interfaces.C.unsigned;
-
-   function LLVMGetFunctionCallConv
-     (Fn   : in llvm.LLVMValueRef)
-      return Interfaces.C.unsigned;
-
-   procedure LLVMSetFunctionCallConv
-     (Fn : in llvm.LLVMValueRef;
-      CC : in Interfaces.C.unsigned);
-
-   function LLVMGetGC
-     (Fn   : in llvm.LLVMValueRef)
-      return Interfaces.C.Strings.chars_ptr;
-
-   procedure LLVMSetGC
-     (Fn   : in llvm.LLVMValueRef;
-      Name : in Interfaces.C.Strings.chars_ptr);
-
-   procedure LLVMAddFunctionAttr
-     (Fn : in llvm.LLVMValueRef;
-      PA : in llvm.LLVMAttribute);
-
-   procedure LLVMRemoveFunctionAttr
-     (Fn : in llvm.LLVMValueRef;
-      PA : in llvm.LLVMAttribute);
-
-   function LLVMCountParams
-     (Fn   : in llvm.LLVMValueRef)
-      return Interfaces.C.unsigned;
-
-   procedure LLVMGetParams
-     (Fn     : in llvm.LLVMValueRef;
-      Params : access llvm.LLVMValueRef);
-
-   function LLVMGetParam
-     (Fn    : in llvm.LLVMValueRef;
-      Index : in Interfaces.C.unsigned)
-      return  llvm.LLVMValueRef;
-
-   function LLVMGetParamParent
-     (Inst : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetFirstParam
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetLastParam
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetNextParam
-     (Arg  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetPreviousParam
-     (Arg  : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   procedure LLVMAddAttribute
-     (Arg : in llvm.LLVMValueRef;
-      PA  : in llvm.LLVMAttribute);
-
-   procedure LLVMRemoveAttribute
-     (Arg : in llvm.LLVMValueRef;
-      PA  : in llvm.LLVMAttribute);
-
-   procedure LLVMSetParamAlignment
-     (Arg   : in llvm.LLVMValueRef;
-      align : in Interfaces.C.unsigned);
-
-   function LLVMBasicBlockAsValue
-     (BB   : in llvm.LLVMBasicBlockRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMValueIsBasicBlock
-     (Val  : in llvm.LLVMValueRef)
-      return Interfaces.C.int;
-
-   function LLVMValueAsBasicBlock
-     (Val  : in llvm.LLVMValueRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMGetBasicBlockParent
-     (BB   : in llvm.LLVMBasicBlockRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMCountBasicBlocks
-     (Fn   : in llvm.LLVMValueRef)
-      return Interfaces.C.unsigned;
-
-   procedure LLVMGetBasicBlocks
-     (Fn          : in llvm.LLVMValueRef;
-      BasicBlocks : access llvm.LLVMBasicBlockRef);
-
-   function LLVMGetFirstBasicBlock
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMGetLastBasicBlock
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMGetNextBasicBlock
-     (BB   : in llvm.LLVMBasicBlockRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMGetPreviousBasicBlock
-     (BB   : in llvm.LLVMBasicBlockRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMGetEntryBasicBlock
-     (Fn   : in llvm.LLVMValueRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMAppendBasicBlock
-     (Fn   : in llvm.LLVMValueRef;
-      Name : in Interfaces.C.Strings.chars_ptr)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMInsertBasicBlock
-     (InsertBeforeBB : in llvm.LLVMBasicBlockRef;
-      Name           : in Interfaces.C.Strings.chars_ptr)
-      return           llvm.LLVMBasicBlockRef;
-
-   procedure LLVMDeleteBasicBlock (BB : in llvm.LLVMBasicBlockRef);
-
-   function LLVMGetInstructionParent
-     (Inst : in llvm.LLVMValueRef)
-      return llvm.LLVMBasicBlockRef;
-
-   function LLVMGetFirstInstruction
-     (BB   : in llvm.LLVMBasicBlockRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetLastInstruction
-     (BB   : in llvm.LLVMBasicBlockRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetNextInstruction
-     (Inst : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   function LLVMGetPreviousInstruction
-     (Inst : in llvm.LLVMValueRef)
-      return llvm.LLVMValueRef;
-
-   procedure LLVMSetInstructionCallConv
-     (Instr : in llvm.LLVMValueRef;
-      CC    : in Interfaces.C.unsigned);
-
-   function LLVMGetInstructionCallConv
-     (Instr : in llvm.LLVMValueRef)
-      return  Interfaces.C.unsigned;
-
-   procedure LLVMAddInstrAttribute
-     (Instr : in llvm.LLVMValueRef;
-      index : in Interfaces.C.unsigned;
-      arg_1 : in llvm.LLVMAttribute);
-
-   procedure LLVMRemoveInstrAttribute
-     (Instr : in llvm.LLVMValueRef;
-      index : in Interfaces.C.unsigned;
-      arg_1 : in llvm.LLVMAttribute);
-
-   procedure LLVMSetInstrParamAlignment
-     (Instr : in llvm.LLVMValueRef;
-      index : in Interfaces.C.unsigned;
-      align : in Interfaces.C.unsigned);
-
-   function LLVMIsTailCall
-     (CallInst : in llvm.LLVMValueRef)
-      return     Interfaces.C.int;
-
-   procedure LLVMSetTailCall
-     (CallInst   : in llvm.LLVMValueRef;
-      IsTailCall : in Interfaces.C.int);
-
-   procedure LLVMAddIncoming
-     (PhiNode        : in llvm.LLVMValueRef;
-      IncomingValues : access llvm.LLVMValueRef;
-      IncomingBlocks : access llvm.LLVMBasicBlockRef;
-      Count          : in Interfaces.C.unsigned);
-
-   function LLVMCountIncoming
-     (PhiNode : in llvm.LLVMValueRef)
-      return    Interfaces.C.unsigned;
-
-   function LLVMGetIncomingValue
-     (PhiNode : in llvm.LLVMValueRef;
-      Index   : in Interfaces.C.unsigned)
-      return    llvm.LLVMValueRef;
-
-   function LLVMGetIncomingBlock
-     (PhiNode : in llvm.LLVMValueRef;
-      Index   : in Interfaces.C.unsigned)
-      return    llvm.LLVMBasicBlockRef;
-
-   function LLVMCreateBuilder return  llvm.LLVMBuilderRef;
-
-   procedure LLVMPositionBuilder
-     (Builder : in llvm.LLVMBuilderRef;
-      Block   : in llvm.LLVMBasicBlockRef;
-      Instr   : in llvm.LLVMValueRef);
-
-   procedure LLVMPositionBuilderBefore
-     (Builder : in llvm.LLVMBuilderRef;
-      Instr   : in llvm.LLVMValueRef);
-
-   procedure LLVMPositionBuilderAtEnd
-     (Builder : in llvm.LLVMBuilderRef;
-      Block   : in llvm.LLVMBasicBlockRef);
-
-   function LLVMGetInsertBlock
-     (Builder : in llvm.LLVMBuilderRef)
-      return    llvm.LLVMBasicBlockRef;
-
-   procedure LLVMClearInsertionPosition (Builder : in llvm.LLVMBuilderRef);
-
-   procedure LLVMInsertIntoBuilder
-     (Builder : in llvm.LLVMBuilderRef;
-      Instr   : in llvm.LLVMValueRef);
-
-   procedure LLVMDisposeBuilder (Builder : in llvm.LLVMBuilderRef);
-
-   function LLVMBuildRetVoid
-     (arg_1 : in llvm.LLVMBuilderRef)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildRet
-     (arg_1 : in llvm.LLVMBuilderRef;
-      V     : in llvm.LLVMValueRef)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildBr
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Dest  : in llvm.LLVMBasicBlockRef)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildCondBr
-     (arg_1    : in llvm.LLVMBuilderRef;
-      the_If   : in llvm.LLVMValueRef;
-      the_Then : in llvm.LLVMBasicBlockRef;
-      the_Else : in llvm.LLVMBasicBlockRef)
-      return     llvm.LLVMValueRef;
-
-   function LLVMBuildSwitch
-     (arg_1    : in llvm.LLVMBuilderRef;
-      V        : in llvm.LLVMValueRef;
-      the_Else : in llvm.LLVMBasicBlockRef;
-      NumCases : in Interfaces.C.unsigned)
-      return     llvm.LLVMValueRef;
-
-   function LLVMBuildInvoke
-     (arg_1    : in llvm.LLVMBuilderRef;
-      Fn       : in llvm.LLVMValueRef;
-      Args     : access llvm.LLVMValueRef;
-      NumArgs  : in Interfaces.C.unsigned;
-      the_Then : in llvm.LLVMBasicBlockRef;
-      Catch    : in llvm.LLVMBasicBlockRef;
-      Name     : in Interfaces.C.Strings.chars_ptr)
-      return     llvm.LLVMValueRef;
-
-   function LLVMBuildUnwind
-     (arg_1 : in llvm.LLVMBuilderRef)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildUnreachable
-     (arg_1 : in llvm.LLVMBuilderRef)
-      return  llvm.LLVMValueRef;
-
-   procedure LLVMAddCase
-     (Switch : in llvm.LLVMValueRef;
-      OnVal  : in llvm.LLVMValueRef;
-      Dest   : in llvm.LLVMBasicBlockRef);
-
-   function LLVMBuildAdd
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildSub
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildMul
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildUDiv
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildSDiv
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildFDiv
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildURem
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildSRem
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildFRem
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildShl
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildLShr
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildAShr
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildAnd
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildOr
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildXor
-     (arg_1 : in llvm.LLVMBuilderRef;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildNeg
-     (arg_1 : in llvm.LLVMBuilderRef;
-      V     : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildNot
-     (arg_1 : in llvm.LLVMBuilderRef;
-      V     : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildMalloc
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Ty    : in llvm.LLVMTypeRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildArrayMalloc
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Ty    : in llvm.LLVMTypeRef;
-      Val   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildAlloca
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Ty    : in llvm.LLVMTypeRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildArrayAlloca
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Ty    : in llvm.LLVMTypeRef;
-      Val   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildFree
-     (arg_1      : in llvm.LLVMBuilderRef;
-      PointerVal : in llvm.LLVMValueRef)
-      return       llvm.LLVMValueRef;
-
-   function LLVMBuildLoad
-     (arg_1      : in llvm.LLVMBuilderRef;
-      PointerVal : in llvm.LLVMValueRef;
-      Name       : in Interfaces.C.Strings.chars_ptr)
-      return       llvm.LLVMValueRef;
-
-   function LLVMBuildStore
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Val   : in llvm.LLVMValueRef;
-      Ptr   : in llvm.LLVMValueRef)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildGEP
-     (B          : in llvm.LLVMBuilderRef;
-      Pointer    : in llvm.LLVMValueRef;
-      Indices    : access llvm.LLVMValueRef;
-      NumIndices : in Interfaces.C.unsigned;
-      Name       : in Interfaces.C.Strings.chars_ptr)
-      return       llvm.LLVMValueRef;
-
-   function LLVMBuildTrunc
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildZExt
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildSExt
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildFPToUI
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildFPToSI
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildUIToFP
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildSIToFP
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildFPTrunc
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildFPExt
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildPtrToInt
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildIntToPtr
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildBitCast
-     (arg_1  : in llvm.LLVMBuilderRef;
-      Val    : in llvm.LLVMValueRef;
-      DestTy : in llvm.LLVMTypeRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildICmp
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Op    : in llvm.LLVMIntPredicate;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildFCmp
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Op    : in llvm.LLVMRealPredicate;
-      LHS   : in llvm.LLVMValueRef;
-      RHS   : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildPhi
-     (arg_1 : in llvm.LLVMBuilderRef;
-      Ty    : in llvm.LLVMTypeRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildCall
-     (arg_1   : in llvm.LLVMBuilderRef;
-      Fn      : in llvm.LLVMValueRef;
-      Args    : access llvm.LLVMValueRef;
-      NumArgs : in Interfaces.C.unsigned;
-      Name    : in Interfaces.C.Strings.chars_ptr)
-      return    llvm.LLVMValueRef;
-
-   function LLVMBuildSelect
-     (arg_1    : in llvm.LLVMBuilderRef;
-      the_If   : in llvm.LLVMValueRef;
-      the_Then : in llvm.LLVMValueRef;
-      the_Else : in llvm.LLVMValueRef;
-      Name     : in Interfaces.C.Strings.chars_ptr)
-      return     llvm.LLVMValueRef;
-
-   function LLVMBuildVAArg
-     (arg_1 : in llvm.LLVMBuilderRef;
-      List  : in llvm.LLVMValueRef;
-      Ty    : in llvm.LLVMTypeRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildExtractElement
-     (arg_1  : in llvm.LLVMBuilderRef;
-      VecVal : in llvm.LLVMValueRef;
-      Index  : in llvm.LLVMValueRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildInsertElement
-     (arg_1  : in llvm.LLVMBuilderRef;
-      VecVal : in llvm.LLVMValueRef;
-      EltVal : in llvm.LLVMValueRef;
-      Index  : in llvm.LLVMValueRef;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildShuffleVector
-     (arg_1 : in llvm.LLVMBuilderRef;
-      V1    : in llvm.LLVMValueRef;
-      V2    : in llvm.LLVMValueRef;
-      Mask  : in llvm.LLVMValueRef;
-      Name  : in Interfaces.C.Strings.chars_ptr)
-      return  llvm.LLVMValueRef;
-
-   function LLVMBuildExtractValue
-     (arg_1  : in llvm.LLVMBuilderRef;
-      AggVal : in llvm.LLVMValueRef;
-      Index  : in Interfaces.C.unsigned;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMBuildInsertValue
-     (arg_1  : in llvm.LLVMBuilderRef;
-      AggVal : in llvm.LLVMValueRef;
-      EltVal : in llvm.LLVMValueRef;
-      Index  : in Interfaces.C.unsigned;
-      Name   : in Interfaces.C.Strings.chars_ptr)
-      return   llvm.LLVMValueRef;
-
-   function LLVMCreateModuleProviderForExistingModule
-     (M    : in llvm.LLVMModuleRef)
-      return llvm.LLVMModuleProviderRef;
-
-   procedure LLVMDisposeModuleProvider (MP : in llvm.LLVMModuleProviderRef);
-
-   function LLVMCreateMemoryBufferWithContentsOfFile
-     (Path       : in Interfaces.C.Strings.chars_ptr;
-      OutMemBuf  : access llvm.LLVMMemoryBufferRef;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-   function LLVMCreateMemoryBufferWithSTDIN
-     (OutMemBuf  : access llvm.LLVMMemoryBufferRef;
-      OutMessage : access Interfaces.C.Strings.chars_ptr)
-      return       Interfaces.C.int;
-
-   procedure LLVMDisposeMemoryBuffer (MemBuf : in llvm.LLVMMemoryBufferRef);
-
-   function LLVMCreatePassManager return  llvm.LLVMPassManagerRef;
-
-   function LLVMCreateFunctionPassManager
-     (MP   : in llvm.LLVMModuleProviderRef)
-      return llvm.LLVMPassManagerRef;
-
-   function LLVMRunPassManager
-     (PM   : in llvm.LLVMPassManagerRef;
-      M    : in llvm.LLVMModuleRef)
-      return Interfaces.C.int;
-
-   function LLVMInitializeFunctionPassManager
-     (FPM  : in llvm.LLVMPassManagerRef)
-      return Interfaces.C.int;
-
-   function LLVMRunFunctionPassManager
-     (FPM  : in llvm.LLVMPassManagerRef;
-      F    : in llvm.LLVMValueRef)
-      return Interfaces.C.int;
-
-   function LLVMFinalizeFunctionPassManager
-     (FPM  : in llvm.LLVMPassManagerRef)
-      return Interfaces.C.int;
-
-   procedure LLVMDisposePassManager (PM : in llvm.LLVMPassManagerRef);
-
-private
-
-   pragma Import (C, LLVMDisposeMessage, "Ada_LLVMDisposeMessage");
-   pragma Import (C, LLVMContextCreate, "Ada_LLVMContextCreate");
-   pragma Import (C, LLVMGetGlobalContext, "Ada_LLVMGetGlobalContext");
-   pragma Import (C, LLVMContextDispose, "Ada_LLVMContextDispose");
-   pragma Import
-     (C,
-      LLVMModuleCreateWithName,
-      "Ada_LLVMModuleCreateWithName");
-   pragma Import
-     (C,
-      LLVMModuleCreateWithNameInContext,
-      "Ada_LLVMModuleCreateWithNameInContext");
-   pragma Import (C, LLVMDisposeModule, "Ada_LLVMDisposeModule");
-   pragma Import (C, LLVMGetDataLayout, "Ada_LLVMGetDataLayout");
-   pragma Import (C, LLVMSetDataLayout, "Ada_LLVMSetDataLayout");
-   pragma Import (C, LLVMGetTarget, "Ada_LLVMGetTarget");
-   pragma Import (C, LLVMSetTarget, "Ada_LLVMSetTarget");
-   pragma Import (C, LLVMAddTypeName, "Ada_LLVMAddTypeName");
-   pragma Import (C, LLVMDeleteTypeName, "Ada_LLVMDeleteTypeName");
-   pragma Import (C, LLVMGetTypeByName, "Ada_LLVMGetTypeByName");
-   pragma Import (C, LLVMDumpModule, "Ada_LLVMDumpModule");
-   pragma Import (C, LLVMGetTypeKind, "Ada_LLVMGetTypeKind");
-   pragma Import (C, LLVMInt1Type, "Ada_LLVMInt1Type");
-   pragma Import (C, LLVMInt8Type, "Ada_LLVMInt8Type");
-   pragma Import (C, LLVMInt16Type, "Ada_LLVMInt16Type");
-   pragma Import (C, LLVMInt32Type, "Ada_LLVMInt32Type");
-   pragma Import (C, LLVMInt64Type, "Ada_LLVMInt64Type");
-   pragma Import (C, LLVMIntType, "Ada_LLVMIntType");
-   pragma Import (C, LLVMGetIntTypeWidth, "Ada_LLVMGetIntTypeWidth");
-   pragma Import (C, LLVMFloatType, "Ada_LLVMFloatType");
-   pragma Import (C, LLVMDoubleType, "Ada_LLVMDoubleType");
-   pragma Import (C, LLVMX86FP80Type, "Ada_LLVMX86FP80Type");
-   pragma Import (C, LLVMFP128Type, "Ada_LLVMFP128Type");
-   pragma Import (C, LLVMPPCFP128Type, "Ada_LLVMPPCFP128Type");
-   pragma Import (C, LLVMFunctionType, "Ada_LLVMFunctionType");
-   pragma Import (C, LLVMIsFunctionVarArg, "Ada_LLVMIsFunctionVarArg");
-   pragma Import (C, LLVMGetReturnType, "Ada_LLVMGetReturnType");
-   pragma Import (C, LLVMCountParamTypes, "Ada_LLVMCountParamTypes");
-   pragma Import (C, LLVMGetParamTypes, "Ada_LLVMGetParamTypes");
-   pragma Import (C, LLVMStructType, "Ada_LLVMStructType");
-   pragma Import
-     (C,
-      LLVMCountStructElementTypes,
-      "Ada_LLVMCountStructElementTypes");
-   pragma Import
-     (C,
-      LLVMGetStructElementTypes,
-      "Ada_LLVMGetStructElementTypes");
-   pragma Import (C, LLVMIsPackedStruct, "Ada_LLVMIsPackedStruct");
-   pragma Import (C, LLVMArrayType, "Ada_LLVMArrayType");
-   pragma Import (C, LLVMPointerType, "Ada_LLVMPointerType");
-   pragma Import (C, LLVMVectorType, "Ada_LLVMVectorType");
-   pragma Import (C, LLVMGetElementType, "Ada_LLVMGetElementType");
-   pragma Import (C, LLVMGetArrayLength, "Ada_LLVMGetArrayLength");
-   pragma Import
-     (C,
-      LLVMGetPointerAddressSpace,
-      "Ada_LLVMGetPointerAddressSpace");
-   pragma Import (C, LLVMGetVectorSize, "Ada_LLVMGetVectorSize");
-   pragma Import (C, LLVMVoidType, "Ada_LLVMVoidType");
-   pragma Import (C, LLVMLabelType, "Ada_LLVMLabelType");
-   pragma Import (C, LLVMOpaqueType, "Ada_LLVMOpaqueType");
-   pragma Import (C, LLVMCreateTypeHandle, "Ada_LLVMCreateTypeHandle");
-   pragma Import (C, LLVMRefineType, "Ada_LLVMRefineType");
-   pragma Import (C, LLVMResolveTypeHandle, "Ada_LLVMResolveTypeHandle");
-   pragma Import (C, LLVMDisposeTypeHandle, "Ada_LLVMDisposeTypeHandle");
-   pragma Import (C, LLVMTypeOf, "Ada_LLVMTypeOf");
-   pragma Import (C, LLVMGetValueName, "Ada_LLVMGetValueName");
-   pragma Import (C, LLVMSetValueName, "Ada_LLVMSetValueName");
-   pragma Import (C, LLVMDumpValue, "Ada_LLVMDumpValue");
-   pragma Import (C, LLVMIsAArgument, "Ada_LLVMIsAArgument");
-   pragma Import (C, LLVMIsABasicBlock, "Ada_LLVMIsABasicBlock");
-   pragma Import (C, LLVMIsAInlineAsm, "Ada_LLVMIsAInlineAsm");
-   pragma Import (C, LLVMIsAUser, "Ada_LLVMIsAUser");
-   pragma Import (C, LLVMIsAConstant, "Ada_LLVMIsAConstant");
-   pragma Import
-     (C,
-      LLVMIsAConstantAggregateZero,
-      "Ada_LLVMIsAConstantAggregateZero");
-   pragma Import (C, LLVMIsAConstantArray, "Ada_LLVMIsAConstantArray");
-   pragma Import (C, LLVMIsAConstantExpr, "Ada_LLVMIsAConstantExpr");
-   pragma Import (C, LLVMIsAConstantFP, "Ada_LLVMIsAConstantFP");
-   pragma Import (C, LLVMIsAConstantInt, "Ada_LLVMIsAConstantInt");
-   pragma Import
-     (C,
-      LLVMIsAConstantPointerNull,
-      "Ada_LLVMIsAConstantPointerNull");
-   pragma Import (C, LLVMIsAConstantStruct, "Ada_LLVMIsAConstantStruct");
-   pragma Import (C, LLVMIsAConstantVector, "Ada_LLVMIsAConstantVector");
-   pragma Import (C, LLVMIsAGlobalValue, "Ada_LLVMIsAGlobalValue");
-   pragma Import (C, LLVMIsAFunction, "Ada_LLVMIsAFunction");
-   pragma Import (C, LLVMIsAGlobalAlias, "Ada_LLVMIsAGlobalAlias");
-   pragma Import (C, LLVMIsAGlobalVariable, "Ada_LLVMIsAGlobalVariable");
-   pragma Import (C, LLVMIsAUndefValue, "Ada_LLVMIsAUndefValue");
-   pragma Import (C, LLVMIsAInstruction, "Ada_LLVMIsAInstruction");
-   pragma Import (C, LLVMIsABinaryOperator, "Ada_LLVMIsABinaryOperator");
-   pragma Import (C, LLVMIsACallInst, "Ada_LLVMIsACallInst");
-   pragma Import (C, LLVMIsAIntrinsicInst, "Ada_LLVMIsAIntrinsicInst");
-   pragma Import (C, LLVMIsADbgInfoIntrinsic, "Ada_LLVMIsADbgInfoIntrinsic");
-   pragma Import (C, LLVMIsADbgDeclareInst, "Ada_LLVMIsADbgDeclareInst");
-   pragma Import (C, LLVMIsADbgFuncStartInst, "Ada_LLVMIsADbgFuncStartInst");
-   pragma Import (C, LLVMIsADbgRegionEndInst, "Ada_LLVMIsADbgRegionEndInst");
-   pragma Import
-     (C,
-      LLVMIsADbgRegionStartInst,
-      "Ada_LLVMIsADbgRegionStartInst");
-   pragma Import (C, LLVMIsADbgStopPointInst, "Ada_LLVMIsADbgStopPointInst");
-   pragma Import (C, LLVMIsAEHSelectorInst, "Ada_LLVMIsAEHSelectorInst");
-   pragma Import (C, LLVMIsAMemIntrinsic, "Ada_LLVMIsAMemIntrinsic");
-   pragma Import (C, LLVMIsAMemCpyInst, "Ada_LLVMIsAMemCpyInst");
-   pragma Import (C, LLVMIsAMemMoveInst, "Ada_LLVMIsAMemMoveInst");
-   pragma Import (C, LLVMIsAMemSetInst, "Ada_LLVMIsAMemSetInst");
-   pragma Import (C, LLVMIsACmpInst, "Ada_LLVMIsACmpInst");
-   pragma Import (C, LLVMIsAFCmpInst, "Ada_LLVMIsAFCmpInst");
-   pragma Import (C, LLVMIsAICmpInst, "Ada_LLVMIsAICmpInst");
-   pragma Import
-     (C,
-      LLVMIsAExtractElementInst,
-      "Ada_LLVMIsAExtractElementInst");
-   pragma Import
-     (C,
-      LLVMIsAGetElementPtrInst,
-      "Ada_LLVMIsAGetElementPtrInst");
-   pragma Import
-     (C,
-      LLVMIsAInsertElementInst,
-      "Ada_LLVMIsAInsertElementInst");
-   pragma Import (C, LLVMIsAInsertValueInst, "Ada_LLVMIsAInsertValueInst");
-   pragma Import (C, LLVMIsAPHINode, "Ada_LLVMIsAPHINode");
-   pragma Import (C, LLVMIsASelectInst, "Ada_LLVMIsASelectInst");
-   pragma Import
-     (C,
-      LLVMIsAShuffleVectorInst,
-      "Ada_LLVMIsAShuffleVectorInst");
-   pragma Import (C, LLVMIsAStoreInst, "Ada_LLVMIsAStoreInst");
-   pragma Import (C, LLVMIsATerminatorInst, "Ada_LLVMIsATerminatorInst");
-   pragma Import (C, LLVMIsABranchInst, "Ada_LLVMIsABranchInst");
-   pragma Import (C, LLVMIsAInvokeInst, "Ada_LLVMIsAInvokeInst");
-   pragma Import (C, LLVMIsAReturnInst, "Ada_LLVMIsAReturnInst");
-   pragma Import (C, LLVMIsASwitchInst, "Ada_LLVMIsASwitchInst");
-   pragma Import (C, LLVMIsAUnreachableInst, "Ada_LLVMIsAUnreachableInst");
-   pragma Import (C, LLVMIsAUnwindInst, "Ada_LLVMIsAUnwindInst");
-   pragma Import (C, LLVMIsAUnaryInstruction, "Ada_LLVMIsAUnaryInstruction");
-   pragma Import (C, LLVMIsAAllocationInst, "Ada_LLVMIsAAllocationInst");
-   pragma Import (C, LLVMIsAAllocaInst, "Ada_LLVMIsAAllocaInst");
-   pragma Import (C, LLVMIsAMallocInst, "Ada_LLVMIsAMallocInst");
-   pragma Import (C, LLVMIsACastInst, "Ada_LLVMIsACastInst");
-   pragma Import (C, LLVMIsABitCastInst, "Ada_LLVMIsABitCastInst");
-   pragma Import (C, LLVMIsAFPExtInst, "Ada_LLVMIsAFPExtInst");
-   pragma Import (C, LLVMIsAFPToSIInst, "Ada_LLVMIsAFPToSIInst");
-   pragma Import (C, LLVMIsAFPToUIInst, "Ada_LLVMIsAFPToUIInst");
-   pragma Import (C, LLVMIsAFPTruncInst, "Ada_LLVMIsAFPTruncInst");
-   pragma Import (C, LLVMIsAIntToPtrInst, "Ada_LLVMIsAIntToPtrInst");
-   pragma Import (C, LLVMIsAPtrToIntInst, "Ada_LLVMIsAPtrToIntInst");
-   pragma Import (C, LLVMIsASExtInst, "Ada_LLVMIsASExtInst");
-   pragma Import (C, LLVMIsASIToFPInst, "Ada_LLVMIsASIToFPInst");
-   pragma Import (C, LLVMIsATruncInst, "Ada_LLVMIsATruncInst");
-   pragma Import (C, LLVMIsAUIToFPInst, "Ada_LLVMIsAUIToFPInst");
-   pragma Import (C, LLVMIsAZExtInst, "Ada_LLVMIsAZExtInst");
-   pragma Import (C, LLVMIsAExtractValueInst, "Ada_LLVMIsAExtractValueInst");
-   pragma Import (C, LLVMIsAFreeInst, "Ada_LLVMIsAFreeInst");
-   pragma Import (C, LLVMIsALoadInst, "Ada_LLVMIsALoadInst");
-   pragma Import (C, LLVMIsAVAArgInst, "Ada_LLVMIsAVAArgInst");
-   pragma Import (C, LLVMConstNull, "Ada_LLVMConstNull");
-   pragma Import (C, LLVMConstAllOnes, "Ada_LLVMConstAllOnes");
-   pragma Import (C, LLVMGetUndef, "Ada_LLVMGetUndef");
-   pragma Import (C, LLVMIsConstant, "Ada_LLVMIsConstant");
-   pragma Import (C, LLVMIsNull, "Ada_LLVMIsNull");
-   pragma Import (C, LLVMIsUndef, "Ada_LLVMIsUndef");
-   pragma Import (C, LLVMConstPointerNull, "Ada_LLVMConstPointerNull");
-   pragma Import (C, LLVMConstInt, "Ada_LLVMConstInt");
-   pragma Import (C, LLVMConstReal, "Ada_LLVMConstReal");
-   pragma Import (C, LLVMConstRealOfString, "Ada_LLVMConstRealOfString");
-   pragma Import (C, LLVMConstString, "Ada_LLVMConstString");
-   pragma Import (C, LLVMConstArray, "Ada_LLVMConstArray");
-   pragma Import (C, LLVMConstStruct, "Ada_LLVMConstStruct");
-   pragma Import (C, LLVMConstVector, "Ada_LLVMConstVector");
-   pragma Import (C, LLVMSizeOf, "Ada_LLVMSizeOf");
-   pragma Import (C, LLVMConstNeg, "Ada_LLVMConstNeg");
-   pragma Import (C, LLVMConstNot, "Ada_LLVMConstNot");
-   pragma Import (C, LLVMConstAdd, "Ada_LLVMConstAdd");
-   pragma Import (C, LLVMConstSub, "Ada_LLVMConstSub");
-   pragma Import (C, LLVMConstMul, "Ada_LLVMConstMul");
-   pragma Import (C, LLVMConstUDiv, "Ada_LLVMConstUDiv");
-   pragma Import (C, LLVMConstSDiv, "Ada_LLVMConstSDiv");
-   pragma Import (C, LLVMConstFDiv, "Ada_LLVMConstFDiv");
-   pragma Import (C, LLVMConstURem, "Ada_LLVMConstURem");
-   pragma Import (C, LLVMConstSRem, "Ada_LLVMConstSRem");
-   pragma Import (C, LLVMConstFRem, "Ada_LLVMConstFRem");
-   pragma Import (C, LLVMConstAnd, "Ada_LLVMConstAnd");
-   pragma Import (C, LLVMConstOr, "Ada_LLVMConstOr");
-   pragma Import (C, LLVMConstXor, "Ada_LLVMConstXor");
-   pragma Import (C, LLVMConstICmp, "Ada_LLVMConstICmp");
-   pragma Import (C, LLVMConstFCmp, "Ada_LLVMConstFCmp");
-   pragma Import (C, LLVMConstShl, "Ada_LLVMConstShl");
-   pragma Import (C, LLVMConstLShr, "Ada_LLVMConstLShr");
-   pragma Import (C, LLVMConstAShr, "Ada_LLVMConstAShr");
-   pragma Import (C, LLVMConstGEP, "Ada_LLVMConstGEP");
-   pragma Import (C, LLVMConstTrunc, "Ada_LLVMConstTrunc");
-   pragma Import (C, LLVMConstSExt, "Ada_LLVMConstSExt");
-   pragma Import (C, LLVMConstZExt, "Ada_LLVMConstZExt");
-   pragma Import (C, LLVMConstFPTrunc, "Ada_LLVMConstFPTrunc");
-   pragma Import (C, LLVMConstFPExt, "Ada_LLVMConstFPExt");
-   pragma Import (C, LLVMConstUIToFP, "Ada_LLVMConstUIToFP");
-   pragma Import (C, LLVMConstSIToFP, "Ada_LLVMConstSIToFP");
-   pragma Import (C, LLVMConstFPToUI, "Ada_LLVMConstFPToUI");
-   pragma Import (C, LLVMConstFPToSI, "Ada_LLVMConstFPToSI");
-   pragma Import (C, LLVMConstPtrToInt, "Ada_LLVMConstPtrToInt");
-   pragma Import (C, LLVMConstIntToPtr, "Ada_LLVMConstIntToPtr");
-   pragma Import (C, LLVMConstBitCast, "Ada_LLVMConstBitCast");
-   pragma Import (C, LLVMConstSelect, "Ada_LLVMConstSelect");
-   pragma Import (C, LLVMConstExtractElement, "Ada_LLVMConstExtractElement");
-   pragma Import (C, LLVMConstInsertElement, "Ada_LLVMConstInsertElement");
-   pragma Import (C, LLVMConstShuffleVector, "Ada_LLVMConstShuffleVector");
-   pragma Import (C, LLVMConstExtractValue, "Ada_LLVMConstExtractValue");
-   pragma Import (C, LLVMConstInsertValue, "Ada_LLVMConstInsertValue");
-   pragma Import (C, LLVMConstInlineAsm, "Ada_LLVMConstInlineAsm");
-   pragma Import (C, LLVMGetGlobalParent, "Ada_LLVMGetGlobalParent");
-   pragma Import (C, LLVMIsDeclaration, "Ada_LLVMIsDeclaration");
-   pragma Import (C, LLVMGetLinkage, "Ada_LLVMGetLinkage");
-   pragma Import (C, LLVMSetLinkage, "Ada_LLVMSetLinkage");
-   pragma Import (C, LLVMGetSection, "Ada_LLVMGetSection");
-   pragma Import (C, LLVMSetSection, "Ada_LLVMSetSection");
-   pragma Import (C, LLVMGetVisibility, "Ada_LLVMGetVisibility");
-   pragma Import (C, LLVMSetVisibility, "Ada_LLVMSetVisibility");
-   pragma Import (C, LLVMGetAlignment, "Ada_LLVMGetAlignment");
-   pragma Import (C, LLVMSetAlignment, "Ada_LLVMSetAlignment");
-   pragma Import (C, LLVMAddGlobal, "Ada_LLVMAddGlobal");
-   pragma Import (C, LLVMGetNamedGlobal, "Ada_LLVMGetNamedGlobal");
-   pragma Import (C, LLVMGetFirstGlobal, "Ada_LLVMGetFirstGlobal");
-   pragma Import (C, LLVMGetLastGlobal, "Ada_LLVMGetLastGlobal");
-   pragma Import (C, LLVMGetNextGlobal, "Ada_LLVMGetNextGlobal");
-   pragma Import (C, LLVMGetPreviousGlobal, "Ada_LLVMGetPreviousGlobal");
-   pragma Import (C, LLVMDeleteGlobal, "Ada_LLVMDeleteGlobal");
-   pragma Import (C, LLVMGetInitializer, "Ada_LLVMGetInitializer");
-   pragma Import (C, LLVMSetInitializer, "Ada_LLVMSetInitializer");
-   pragma Import (C, LLVMIsThreadLocal, "Ada_LLVMIsThreadLocal");
-   pragma Import (C, LLVMSetThreadLocal, "Ada_LLVMSetThreadLocal");
-   pragma Import (C, LLVMIsGlobalConstant, "Ada_LLVMIsGlobalConstant");
-   pragma Import (C, LLVMSetGlobalConstant, "Ada_LLVMSetGlobalConstant");
-   pragma Import (C, LLVMAddAlias, "Ada_LLVMAddAlias");
-   pragma Import (C, LLVMAddFunction, "Ada_LLVMAddFunction");
-   pragma Import (C, LLVMGetNamedFunction, "Ada_LLVMGetNamedFunction");
-   pragma Import (C, LLVMGetFirstFunction, "Ada_LLVMGetFirstFunction");
-   pragma Import (C, LLVMGetLastFunction, "Ada_LLVMGetLastFunction");
-   pragma Import (C, LLVMGetNextFunction, "Ada_LLVMGetNextFunction");
-   pragma Import (C, LLVMGetPreviousFunction, "Ada_LLVMGetPreviousFunction");
-   pragma Import (C, LLVMDeleteFunction, "Ada_LLVMDeleteFunction");
-   pragma Import (C, LLVMGetIntrinsicID, "Ada_LLVMGetIntrinsicID");
-   pragma Import (C, LLVMGetFunctionCallConv, "Ada_LLVMGetFunctionCallConv");
-   pragma Import (C, LLVMSetFunctionCallConv, "Ada_LLVMSetFunctionCallConv");
-   pragma Import (C, LLVMGetGC, "Ada_LLVMGetGC");
-   pragma Import (C, LLVMSetGC, "Ada_LLVMSetGC");
-   pragma Import (C, LLVMAddFunctionAttr, "Ada_LLVMAddFunctionAttr");
-   pragma Import (C, LLVMRemoveFunctionAttr, "Ada_LLVMRemoveFunctionAttr");
-   pragma Import (C, LLVMCountParams, "Ada_LLVMCountParams");
-   pragma Import (C, LLVMGetParams, "Ada_LLVMGetParams");
-   pragma Import (C, LLVMGetParam, "Ada_LLVMGetParam");
-   pragma Import (C, LLVMGetParamParent, "Ada_LLVMGetParamParent");
-   pragma Import (C, LLVMGetFirstParam, "Ada_LLVMGetFirstParam");
-   pragma Import (C, LLVMGetLastParam, "Ada_LLVMGetLastParam");
-   pragma Import (C, LLVMGetNextParam, "Ada_LLVMGetNextParam");
-   pragma Import (C, LLVMGetPreviousParam, "Ada_LLVMGetPreviousParam");
-   pragma Import (C, LLVMAddAttribute, "Ada_LLVMAddAttribute");
-   pragma Import (C, LLVMRemoveAttribute, "Ada_LLVMRemoveAttribute");
-   pragma Import (C, LLVMSetParamAlignment, "Ada_LLVMSetParamAlignment");
-   pragma Import (C, LLVMBasicBlockAsValue, "Ada_LLVMBasicBlockAsValue");
-   pragma Import (C, LLVMValueIsBasicBlock, "Ada_LLVMValueIsBasicBlock");
-   pragma Import (C, LLVMValueAsBasicBlock, "Ada_LLVMValueAsBasicBlock");
-   pragma Import (C, LLVMGetBasicBlockParent, "Ada_LLVMGetBasicBlockParent");
-   pragma Import (C, LLVMCountBasicBlocks, "Ada_LLVMCountBasicBlocks");
-   pragma Import (C, LLVMGetBasicBlocks, "Ada_LLVMGetBasicBlocks");
-   pragma Import (C, LLVMGetFirstBasicBlock, "Ada_LLVMGetFirstBasicBlock");
-   pragma Import (C, LLVMGetLastBasicBlock, "Ada_LLVMGetLastBasicBlock");
-   pragma Import (C, LLVMGetNextBasicBlock, "Ada_LLVMGetNextBasicBlock");
-   pragma Import
-     (C,
-      LLVMGetPreviousBasicBlock,
-      "Ada_LLVMGetPreviousBasicBlock");
-   pragma Import (C, LLVMGetEntryBasicBlock, "Ada_LLVMGetEntryBasicBlock");
-   pragma Import (C, LLVMAppendBasicBlock, "Ada_LLVMAppendBasicBlock");
-   pragma Import (C, LLVMInsertBasicBlock, "Ada_LLVMInsertBasicBlock");
-   pragma Import (C, LLVMDeleteBasicBlock, "Ada_LLVMDeleteBasicBlock");
-   pragma Import
-     (C,
-      LLVMGetInstructionParent,
-      "Ada_LLVMGetInstructionParent");
-   pragma Import (C, LLVMGetFirstInstruction, "Ada_LLVMGetFirstInstruction");
-   pragma Import (C, LLVMGetLastInstruction, "Ada_LLVMGetLastInstruction");
-   pragma Import (C, LLVMGetNextInstruction, "Ada_LLVMGetNextInstruction");
-   pragma Import
-     (C,
-      LLVMGetPreviousInstruction,
-      "Ada_LLVMGetPreviousInstruction");
-   pragma Import
-     (C,
-      LLVMSetInstructionCallConv,
-      "Ada_LLVMSetInstructionCallConv");
-   pragma Import
-     (C,
-      LLVMGetInstructionCallConv,
-      "Ada_LLVMGetInstructionCallConv");
-   pragma Import (C, LLVMAddInstrAttribute, "Ada_LLVMAddInstrAttribute");
-   pragma Import
-     (C,
-      LLVMRemoveInstrAttribute,
-      "Ada_LLVMRemoveInstrAttribute");
-   pragma Import
-     (C,
-      LLVMSetInstrParamAlignment,
-      "Ada_LLVMSetInstrParamAlignment");
-   pragma Import (C, LLVMIsTailCall, "Ada_LLVMIsTailCall");
-   pragma Import (C, LLVMSetTailCall, "Ada_LLVMSetTailCall");
-   pragma Import (C, LLVMAddIncoming, "Ada_LLVMAddIncoming");
-   pragma Import (C, LLVMCountIncoming, "Ada_LLVMCountIncoming");
-   pragma Import (C, LLVMGetIncomingValue, "Ada_LLVMGetIncomingValue");
-   pragma Import (C, LLVMGetIncomingBlock, "Ada_LLVMGetIncomingBlock");
-   pragma Import (C, LLVMCreateBuilder, "Ada_LLVMCreateBuilder");
-   pragma Import (C, LLVMPositionBuilder, "Ada_LLVMPositionBuilder");
-   pragma Import
-     (C,
-      LLVMPositionBuilderBefore,
-      "Ada_LLVMPositionBuilderBefore");
-   pragma Import
-     (C,
-      LLVMPositionBuilderAtEnd,
-      "Ada_LLVMPositionBuilderAtEnd");
-   pragma Import (C, LLVMGetInsertBlock, "Ada_LLVMGetInsertBlock");
-   pragma Import
-     (C,
-      LLVMClearInsertionPosition,
-      "Ada_LLVMClearInsertionPosition");
-   pragma Import (C, LLVMInsertIntoBuilder, "Ada_LLVMInsertIntoBuilder");
-   pragma Import (C, LLVMDisposeBuilder, "Ada_LLVMDisposeBuilder");
-   pragma Import (C, LLVMBuildRetVoid, "Ada_LLVMBuildRetVoid");
-   pragma Import (C, LLVMBuildRet, "Ada_LLVMBuildRet");
-   pragma Import (C, LLVMBuildBr, "Ada_LLVMBuildBr");
-   pragma Import (C, LLVMBuildCondBr, "Ada_LLVMBuildCondBr");
-   pragma Import (C, LLVMBuildSwitch, "Ada_LLVMBuildSwitch");
-   pragma Import (C, LLVMBuildInvoke, "Ada_LLVMBuildInvoke");
-   pragma Import (C, LLVMBuildUnwind, "Ada_LLVMBuildUnwind");
-   pragma Import (C, LLVMBuildUnreachable, "Ada_LLVMBuildUnreachable");
-   pragma Import (C, LLVMAddCase, "Ada_LLVMAddCase");
-   pragma Import (C, LLVMBuildAdd, "Ada_LLVMBuildAdd");
-   pragma Import (C, LLVMBuildSub, "Ada_LLVMBuildSub");
-   pragma Import (C, LLVMBuildMul, "Ada_LLVMBuildMul");
-   pragma Import (C, LLVMBuildUDiv, "Ada_LLVMBuildUDiv");
-   pragma Import (C, LLVMBuildSDiv, "Ada_LLVMBuildSDiv");
-   pragma Import (C, LLVMBuildFDiv, "Ada_LLVMBuildFDiv");
-   pragma Import (C, LLVMBuildURem, "Ada_LLVMBuildURem");
-   pragma Import (C, LLVMBuildSRem, "Ada_LLVMBuildSRem");
-   pragma Import (C, LLVMBuildFRem, "Ada_LLVMBuildFRem");
-   pragma Import (C, LLVMBuildShl, "Ada_LLVMBuildShl");
-   pragma Import (C, LLVMBuildLShr, "Ada_LLVMBuildLShr");
-   pragma Import (C, LLVMBuildAShr, "Ada_LLVMBuildAShr");
-   pragma Import (C, LLVMBuildAnd, "Ada_LLVMBuildAnd");
-   pragma Import (C, LLVMBuildOr, "Ada_LLVMBuildOr");
-   pragma Import (C, LLVMBuildXor, "Ada_LLVMBuildXor");
-   pragma Import (C, LLVMBuildNeg, "Ada_LLVMBuildNeg");
-   pragma Import (C, LLVMBuildNot, "Ada_LLVMBuildNot");
-   pragma Import (C, LLVMBuildMalloc, "Ada_LLVMBuildMalloc");
-   pragma Import (C, LLVMBuildArrayMalloc, "Ada_LLVMBuildArrayMalloc");
-   pragma Import (C, LLVMBuildAlloca, "Ada_LLVMBuildAlloca");
-   pragma Import (C, LLVMBuildArrayAlloca, "Ada_LLVMBuildArrayAlloca");
-   pragma Import (C, LLVMBuildFree, "Ada_LLVMBuildFree");
-   pragma Import (C, LLVMBuildLoad, "Ada_LLVMBuildLoad");
-   pragma Import (C, LLVMBuildStore, "Ada_LLVMBuildStore");
-   pragma Import (C, LLVMBuildGEP, "Ada_LLVMBuildGEP");
-   pragma Import (C, LLVMBuildTrunc, "Ada_LLVMBuildTrunc");
-   pragma Import (C, LLVMBuildZExt, "Ada_LLVMBuildZExt");
-   pragma Import (C, LLVMBuildSExt, "Ada_LLVMBuildSExt");
-   pragma Import (C, LLVMBuildFPToUI, "Ada_LLVMBuildFPToUI");
-   pragma Import (C, LLVMBuildFPToSI, "Ada_LLVMBuildFPToSI");
-   pragma Import (C, LLVMBuildUIToFP, "Ada_LLVMBuildUIToFP");
-   pragma Import (C, LLVMBuildSIToFP, "Ada_LLVMBuildSIToFP");
-   pragma Import (C, LLVMBuildFPTrunc, "Ada_LLVMBuildFPTrunc");
-   pragma Import (C, LLVMBuildFPExt, "Ada_LLVMBuildFPExt");
-   pragma Import (C, LLVMBuildPtrToInt, "Ada_LLVMBuildPtrToInt");
-   pragma Import (C, LLVMBuildIntToPtr, "Ada_LLVMBuildIntToPtr");
-   pragma Import (C, LLVMBuildBitCast, "Ada_LLVMBuildBitCast");
-   pragma Import (C, LLVMBuildICmp, "Ada_LLVMBuildICmp");
-   pragma Import (C, LLVMBuildFCmp, "Ada_LLVMBuildFCmp");
-   pragma Import (C, LLVMBuildPhi, "Ada_LLVMBuildPhi");
-   pragma Import (C, LLVMBuildCall, "Ada_LLVMBuildCall");
-   pragma Import (C, LLVMBuildSelect, "Ada_LLVMBuildSelect");
-   pragma Import (C, LLVMBuildVAArg, "Ada_LLVMBuildVAArg");
-   pragma Import (C, LLVMBuildExtractElement, "Ada_LLVMBuildExtractElement");
-   pragma Import (C, LLVMBuildInsertElement, "Ada_LLVMBuildInsertElement");
-   pragma Import (C, LLVMBuildShuffleVector, "Ada_LLVMBuildShuffleVector");
-   pragma Import (C, LLVMBuildExtractValue, "Ada_LLVMBuildExtractValue");
-   pragma Import (C, LLVMBuildInsertValue, "Ada_LLVMBuildInsertValue");
-   pragma Import
-     (C,
-      LLVMCreateModuleProviderForExistingModule,
-      "Ada_LLVMCreateModuleProviderForExistingModule");
-   pragma Import
-     (C,
-      LLVMDisposeModuleProvider,
-      "Ada_LLVMDisposeModuleProvider");
-   pragma Import
-     (C,
-      LLVMCreateMemoryBufferWithContentsOfFile,
-      "Ada_LLVMCreateMemoryBufferWithContentsOfFile");
-   pragma Import
-     (C,
-      LLVMCreateMemoryBufferWithSTDIN,
-      "Ada_LLVMCreateMemoryBufferWithSTDIN");
-   pragma Import (C, LLVMDisposeMemoryBuffer, "Ada_LLVMDisposeMemoryBuffer");
-   pragma Import (C, LLVMCreatePassManager, "Ada_LLVMCreatePassManager");
-   pragma Import
-     (C,
-      LLVMCreateFunctionPassManager,
-      "Ada_LLVMCreateFunctionPassManager");
-   pragma Import (C, LLVMRunPassManager, "Ada_LLVMRunPassManager");
-   pragma Import
-     (C,
-      LLVMInitializeFunctionPassManager,
-      "Ada_LLVMInitializeFunctionPassManager");
-   pragma Import
-     (C,
-      LLVMRunFunctionPassManager,
-      "Ada_LLVMRunFunctionPassManager");
-   pragma Import
-     (C,
-      LLVMFinalizeFunctionPassManager,
-      "Ada_LLVMFinalizeFunctionPassManager");
-   pragma Import (C, LLVMDisposePassManager, "Ada_LLVMDisposePassManager");
-
-end llvm.Binding;
diff --git a/bindings/ada/llvm/llvm.ads b/bindings/ada/llvm/llvm.ads
deleted file mode 100644
index 20fc940f8c24..000000000000
--- a/bindings/ada/llvm/llvm.ads
+++ /dev/null
@@ -1,497 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C.Extensions;
-
-
-package llvm is
-
-   -- LLVMCtxt
-   --
-   type LLVMCtxt is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMCtxt_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMCtxt;
-
-   type LLVMCtxt_view is access all llvm.LLVMCtxt;
-
-   -- LLVMContextRef
-   --
-   type LLVMContextRef is access all llvm.LLVMCtxt;
-
-   type LLVMContextRef_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMContextRef;
-
-   type LLVMContextRef_view is access all llvm.LLVMContextRef;
-
-   -- LLVMOpaqueModule
-   --
-   type LLVMOpaqueModule is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueModule_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueModule;
-
-   type LLVMOpaqueModule_view is access all llvm.LLVMOpaqueModule;
-
-   -- LLVMModuleRef
-   --
-   type LLVMModuleRef is access all llvm.LLVMOpaqueModule;
-
-   type LLVMModuleRef_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMModuleRef;
-
-   type LLVMModuleRef_view is access all llvm.LLVMModuleRef;
-
-   -- LLVMOpaqueType
-   --
-   type LLVMOpaqueType is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueType_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueType;
-
-   type LLVMOpaqueType_view is access all llvm.LLVMOpaqueType;
-
-   -- LLVMTypeRef
-   --
-   type LLVMTypeRef is access all llvm.LLVMOpaqueType;
-
-   type LLVMTypeRef_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMTypeRef;
-
-   type LLVMTypeRef_view is access all llvm.LLVMTypeRef;
-
-   -- LLVMOpaqueTypeHandle
-   --
-   type LLVMOpaqueTypeHandle is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueTypeHandle_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMOpaqueTypeHandle;
-
-   type LLVMOpaqueTypeHandle_view is access all llvm.LLVMOpaqueTypeHandle;
-
-   -- LLVMTypeHandleRef
-   --
-   type LLVMTypeHandleRef is access all llvm.LLVMOpaqueTypeHandle;
-
-   type LLVMTypeHandleRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMTypeHandleRef;
-
-   type LLVMTypeHandleRef_view is access all llvm.LLVMTypeHandleRef;
-
-   -- LLVMOpaqueValue
-   --
-   type LLVMOpaqueValue is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueValue_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueValue;
-
-   type LLVMOpaqueValue_view is access all llvm.LLVMOpaqueValue;
-
-   -- LLVMValueRef
-   --
-   type LLVMValueRef is access all llvm.LLVMOpaqueValue;
-
-   type LLVMValueRef_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMValueRef;
-
-   type LLVMValueRef_view is access all llvm.LLVMValueRef;
-
-   -- LLVMOpaqueBasicBlock
-   --
-   type LLVMOpaqueBasicBlock is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueBasicBlock_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMOpaqueBasicBlock;
-
-   type LLVMOpaqueBasicBlock_view is access all llvm.LLVMOpaqueBasicBlock;
-
-   -- LLVMBasicBlockRef
-   --
-   type LLVMBasicBlockRef is access all llvm.LLVMOpaqueBasicBlock;
-
-   type LLVMBasicBlockRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMBasicBlockRef;
-
-   type LLVMBasicBlockRef_view is access all llvm.LLVMBasicBlockRef;
-
-   -- LLVMOpaqueBuilder
-   --
-   type LLVMOpaqueBuilder is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueBuilder_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMOpaqueBuilder;
-
-   type LLVMOpaqueBuilder_view is access all llvm.LLVMOpaqueBuilder;
-
-   -- LLVMBuilderRef
-   --
-   type LLVMBuilderRef is access all llvm.LLVMOpaqueBuilder;
-
-   type LLVMBuilderRef_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMBuilderRef;
-
-   type LLVMBuilderRef_view is access all llvm.LLVMBuilderRef;
-
-   -- LLVMOpaqueModuleProvider
-   --
-   type LLVMOpaqueModuleProvider is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueModuleProvider_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMOpaqueModuleProvider;
-
-   type LLVMOpaqueModuleProvider_view is access all
-     llvm.LLVMOpaqueModuleProvider;
-
-   -- LLVMModuleProviderRef
-   --
-   type LLVMModuleProviderRef is access all llvm.LLVMOpaqueModuleProvider;
-
-   type LLVMModuleProviderRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMModuleProviderRef;
-
-   type LLVMModuleProviderRef_view is access all llvm.LLVMModuleProviderRef;
-
-   -- LLVMOpaqueMemoryBuffer
-   --
-   type LLVMOpaqueMemoryBuffer is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueMemoryBuffer_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMOpaqueMemoryBuffer;
-
-   type LLVMOpaqueMemoryBuffer_view is access all llvm.LLVMOpaqueMemoryBuffer;
-
-   -- LLVMMemoryBufferRef
-   --
-   type LLVMMemoryBufferRef is access all llvm.LLVMOpaqueMemoryBuffer;
-
-   type LLVMMemoryBufferRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMMemoryBufferRef;
-
-   type LLVMMemoryBufferRef_view is access all llvm.LLVMMemoryBufferRef;
-
-   -- LLVMOpaquePassManager
-   --
-   type LLVMOpaquePassManager is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaquePassManager_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMOpaquePassManager;
-
-   type LLVMOpaquePassManager_view is access all llvm.LLVMOpaquePassManager;
-
-   -- LLVMPassManagerRef
-   --
-   type LLVMPassManagerRef is access all llvm.LLVMOpaquePassManager;
-
-   type LLVMPassManagerRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMPassManagerRef;
-
-   type LLVMPassManagerRef_view is access all llvm.LLVMPassManagerRef;
-
-   -- LLVMAttribute
-   --
-   type LLVMAttribute is (
-      LLVMZExtAttribute,
-      LLVMSExtAttribute,
-      LLVMNoReturnAttribute,
-      LLVMInRegAttribute,
-      LLVMStructRetAttribute,
-      LLVMNoUnwindAttribute,
-      LLVMNoAliasAttribute,
-      LLVMByValAttribute,
-      LLVMNestAttribute,
-      LLVMReadNoneAttribute,
-      LLVMReadOnlyAttribute,
-      LLVMNoInlineAttribute,
-      LLVMAlwaysInlineAttribute,
-      LLVMOptimizeForSizeAttribute,
-      LLVMStackProtectAttribute,
-      LLVMStackProtectReqAttribute,
-      LLVMNoCaptureAttribute,
-      LLVMNoRedZoneAttribute,
-      LLVMNoImplicitFloatAttribute,
-      LLVMNakedAttribute);
-
-   for LLVMAttribute use
-     (LLVMZExtAttribute            => 1,
-      LLVMSExtAttribute            => 2,
-      LLVMNoReturnAttribute        => 4,
-      LLVMInRegAttribute           => 8,
-      LLVMStructRetAttribute       => 16,
-      LLVMNoUnwindAttribute        => 32,
-      LLVMNoAliasAttribute         => 64,
-      LLVMByValAttribute           => 128,
-      LLVMNestAttribute            => 256,
-      LLVMReadNoneAttribute        => 512,
-      LLVMReadOnlyAttribute        => 1024,
-      LLVMNoInlineAttribute        => 2048,
-      LLVMAlwaysInlineAttribute    => 4096,
-      LLVMOptimizeForSizeAttribute => 8192,
-      LLVMStackProtectAttribute    => 16384,
-      LLVMStackProtectReqAttribute => 32768,
-      LLVMNoCaptureAttribute       => 2097152,
-      LLVMNoRedZoneAttribute       => 4194304,
-      LLVMNoImplicitFloatAttribute => 8388608,
-      LLVMNakedAttribute           => 16777216);
-
-   pragma Convention (C, LLVMAttribute);
-
-   type LLVMAttribute_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMAttribute;
-
-   type LLVMAttribute_view is access all llvm.LLVMAttribute;
-
-   -- LLVMTypeKind
-   --
-   type LLVMTypeKind is (
-      LLVMVoidTypeKind,
-      LLVMFloatTypeKind,
-      LLVMDoubleTypeKind,
-      LLVMX86_FP80TypeKind,
-      LLVMFP128TypeKind,
-      LLVMPPC_FP128TypeKind,
-      LLVMLabelTypeKind,
-      LLVMIntegerTypeKind,
-      LLVMFunctionTypeKind,
-      LLVMStructTypeKind,
-      LLVMArrayTypeKind,
-      LLVMPointerTypeKind,
-      LLVMOpaqueTypeKind,
-      LLVMVectorTypeKind,
-      LLVMMetadataTypeKind);
-
-   for LLVMTypeKind use
-     (LLVMVoidTypeKind      => 0,
-      LLVMFloatTypeKind     => 1,
-      LLVMDoubleTypeKind    => 2,
-      LLVMX86_FP80TypeKind  => 3,
-      LLVMFP128TypeKind     => 4,
-      LLVMPPC_FP128TypeKind => 5,
-      LLVMLabelTypeKind     => 6,
-      LLVMIntegerTypeKind   => 7,
-      LLVMFunctionTypeKind  => 8,
-      LLVMStructTypeKind    => 9,
-      LLVMArrayTypeKind     => 10,
-      LLVMPointerTypeKind   => 11,
-      LLVMOpaqueTypeKind    => 12,
-      LLVMVectorTypeKind    => 13,
-      LLVMMetadataTypeKind  => 14);
-
-   pragma Convention (C, LLVMTypeKind);
-
-   type LLVMTypeKind_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMTypeKind;
-
-   type LLVMTypeKind_view is access all llvm.LLVMTypeKind;
-
-   -- LLVMLinkage
-   --
-   type LLVMLinkage is (
-      LLVMExternalLinkage,
-      LLVMAvailableExternallyLinkage,
-      LLVMLinkOnceAnyLinkage,
-      LLVMLinkOnceODRLinkage,
-      LLVMWeakAnyLinkage,
-      LLVMWeakODRLinkage,
-      LLVMAppendingLinkage,
-      LLVMInternalLinkage,
-      LLVMPrivateLinkage,
-      LLVMDLLImportLinkage,
-      LLVMDLLExportLinkage,
-      LLVMExternalWeakLinkage,
-      LLVMGhostLinkage,
-      LLVMCommonLinkage,
-      LLVMLinkerPrivateLinkage,
-      LLVMLinkerPrivateWeakLinkage,
-      LinkerPrivateWeakDefAutoLinkage);
-
-   for LLVMLinkage use
-     (LLVMExternalLinkage             => 0,
-      LLVMAvailableExternallyLinkage  => 1,
-      LLVMLinkOnceAnyLinkage          => 2,
-      LLVMLinkOnceODRLinkage          => 3,
-      LLVMWeakAnyLinkage              => 4,
-      LLVMWeakODRLinkage              => 5,
-      LLVMAppendingLinkage            => 6,
-      LLVMInternalLinkage             => 7,
-      LLVMPrivateLinkage              => 8,
-      LLVMDLLImportLinkage            => 9,
-      LLVMDLLExportLinkage            => 10,
-      LLVMExternalWeakLinkage         => 11,
-      LLVMGhostLinkage                => 12,
-      LLVMCommonLinkage               => 13,
-      LLVMLinkerPrivateLinkage        => 14,
-      LLVMLinkerPrivateWeakLinkage    => 15,
-      LinkerPrivateWeakDefAutoLinkage => 16);
-
-   pragma Convention (C, LLVMLinkage);
-
-   type LLVMLinkage_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMLinkage;
-
-   type LLVMLinkage_view is access all llvm.LLVMLinkage;
-
-   -- LLVMVisibility
-   --
-   type LLVMVisibility is (
-      LLVMDefaultVisibility,
-      LLVMHiddenVisibility,
-      LLVMProtectedVisibility);
-
-   for LLVMVisibility use
-     (LLVMDefaultVisibility   => 0,
-      LLVMHiddenVisibility    => 1,
-      LLVMProtectedVisibility => 2);
-
-   pragma Convention (C, LLVMVisibility);
-
-   type LLVMVisibility_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMVisibility;
-
-   type LLVMVisibility_view is access all llvm.LLVMVisibility;
-
-   -- LLVMCallConv
-   --
-   type LLVMCallConv is (
-      LLVMCCallConv,
-      LLVMFastCallConv,
-      LLVMColdCallConv,
-      LLVMX86StdcallCallConv,
-      LLVMX86FastcallCallConv);
-
-   for LLVMCallConv use
-     (LLVMCCallConv           => 0,
-      LLVMFastCallConv        => 8,
-      LLVMColdCallConv        => 9,
-      LLVMX86StdcallCallConv  => 64,
-      LLVMX86FastcallCallConv => 65);
-
-   pragma Convention (C, LLVMCallConv);
-
-   type LLVMCallConv_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMCallConv;
-
-   type LLVMCallConv_view is access all llvm.LLVMCallConv;
-
-   -- LLVMIntPredicate
-   --
-   type LLVMIntPredicate is (
-      LLVMIntEQ,
-      LLVMIntNE,
-      LLVMIntUGT,
-      LLVMIntUGE,
-      LLVMIntULT,
-      LLVMIntULE,
-      LLVMIntSGT,
-      LLVMIntSGE,
-      LLVMIntSLT,
-      LLVMIntSLE);
-
-   for LLVMIntPredicate use
-     (LLVMIntEQ  => 32,
-      LLVMIntNE  => 33,
-      LLVMIntUGT => 34,
-      LLVMIntUGE => 35,
-      LLVMIntULT => 36,
-      LLVMIntULE => 37,
-      LLVMIntSGT => 38,
-      LLVMIntSGE => 39,
-      LLVMIntSLT => 40,
-      LLVMIntSLE => 41);
-
-   pragma Convention (C, LLVMIntPredicate);
-
-   type LLVMIntPredicate_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.LLVMIntPredicate;
-
-   type LLVMIntPredicate_view is access all llvm.LLVMIntPredicate;
-
-   -- LLVMRealPredicate
-   --
-   type LLVMRealPredicate is (
-      LLVMRealPredicateFalse,
-      LLVMRealOEQ,
-      LLVMRealOGT,
-      LLVMRealOGE,
-      LLVMRealOLT,
-      LLVMRealOLE,
-      LLVMRealONE,
-      LLVMRealORD,
-      LLVMRealUNO,
-      LLVMRealUEQ,
-      LLVMRealUGT,
-      LLVMRealUGE,
-      LLVMRealULT,
-      LLVMRealULE,
-      LLVMRealUNE,
-      LLVMRealPredicateTrue);
-
-   for LLVMRealPredicate use
-     (LLVMRealPredicateFalse => 0,
-      LLVMRealOEQ            => 1,
-      LLVMRealOGT            => 2,
-      LLVMRealOGE            => 3,
-      LLVMRealOLT            => 4,
-      LLVMRealOLE            => 5,
-      LLVMRealONE            => 6,
-      LLVMRealORD            => 7,
-      LLVMRealUNO            => 8,
-      LLVMRealUEQ            => 9,
-      LLVMRealUGT            => 10,
-      LLVMRealUGE            => 11,
-      LLVMRealULT            => 12,
-      LLVMRealULE            => 13,
-      LLVMRealUNE            => 14,
-      LLVMRealPredicateTrue  => 15);
-
-   pragma Convention (C, LLVMRealPredicate);
-
-   type LLVMRealPredicate_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased llvm.LLVMRealPredicate;
-
-   type LLVMRealPredicate_view is access all llvm.LLVMRealPredicate;
-
-   -- ModuleProvider
-   --
-   type ModuleProvider is new Interfaces.C.Extensions.incomplete_class_def;
-
-   type ModuleProvider_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.ModuleProvider;
-
-   type ModuleProvider_view is access all llvm.ModuleProvider;
-
-   -- MemoryBuffer
-   --
-   type MemoryBuffer is new Interfaces.C.Extensions.incomplete_class_def;
-
-   type MemoryBuffer_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.MemoryBuffer;
-
-   type MemoryBuffer_view is access all llvm.MemoryBuffer;
-
-   -- PassManagerBase
-   --
-   type PassManagerBase is new Interfaces.C.Extensions.incomplete_class_def;
-
-   type PassManagerBase_array is
-     array (Interfaces.C.size_t range <>) of aliased llvm.PassManagerBase;
-
-   type PassManagerBase_view is access all llvm.PassManagerBase;
-
-end llvm;
diff --git a/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads b/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads
deleted file mode 100644
index 7c0b086b4282..000000000000
--- a/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads
+++ /dev/null
@@ -1,207 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C.Strings;
-
-
-package LLVM_link_time_Optimizer.Binding is
-
-   LTO_H           : constant := 1;
-   LTO_API_VERSION : constant := 3;
-
-   function lto_get_version return  Interfaces.C.Strings.chars_ptr;
-
-   function lto_get_error_message return  Interfaces.C.Strings.chars_ptr;
-
-   function lto_module_is_object_file
-     (path : in Interfaces.C.Strings.chars_ptr)
-      return Interfaces.C.Extensions.bool;
-
-   function lto_module_is_object_file_for_target
-     (path                 : in Interfaces.C.Strings.chars_ptr;
-      target_triple_prefix : in Interfaces.C.Strings.chars_ptr)
-      return                 Interfaces.C.Extensions.bool;
-
-   function lto_module_is_object_file_in_memory
-     (mem    : access Interfaces.C.Extensions.void;
-      length : in Interfaces.C.size_t)
-      return   Interfaces.C.Extensions.bool;
-
-   function lto_module_is_object_file_in_memory_for_target
-     (mem                  : access Interfaces.C.Extensions.void;
-      length               : in Interfaces.C.size_t;
-      target_triple_prefix : in Interfaces.C.Strings.chars_ptr)
-      return                 Interfaces.C.Extensions.bool;
-
-   function lto_module_create
-     (path : in Interfaces.C.Strings.chars_ptr)
-      return LLVM_link_time_Optimizer.lto_module_t;
-
-   function lto_module_create_from_memory
-     (mem    : access Interfaces.C.Extensions.void;
-      length : in Interfaces.C.size_t)
-      return   LLVM_link_time_Optimizer.lto_module_t;
-
-   procedure lto_module_dispose
-     (the_mod : in LLVM_link_time_Optimizer.lto_module_t);
-
-   function lto_module_get_target_triple
-     (the_mod : in LLVM_link_time_Optimizer.lto_module_t)
-      return    Interfaces.C.Strings.chars_ptr;
-
-   function lto_module_get_num_symbols
-     (the_mod : in LLVM_link_time_Optimizer.lto_module_t)
-      return    Interfaces.C.unsigned;
-
-   function lto_module_get_symbol_name
-     (the_mod : in LLVM_link_time_Optimizer.lto_module_t;
-      index   : in Interfaces.C.unsigned)
-      return    Interfaces.C.Strings.chars_ptr;
-
-   function lto_module_get_symbol_attribute
-     (the_mod : in LLVM_link_time_Optimizer.lto_module_t;
-      index   : in Interfaces.C.unsigned)
-      return    LLVM_link_time_Optimizer.lto_symbol_attributes;
-
-   function lto_codegen_create return  LLVM_link_time_Optimizer.lto_code_gen_t;
-
-   procedure lto_codegen_dispose
-     (arg_1 : in LLVM_link_time_Optimizer.lto_code_gen_t);
-
-   function lto_codegen_add_module
-     (cg      : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      the_mod : in LLVM_link_time_Optimizer.lto_module_t)
-      return    Interfaces.C.Extensions.bool;
-
-   function lto_codegen_set_debug_model
-     (cg    : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      arg_1 : in LLVM_link_time_Optimizer.lto_debug_model)
-      return  Interfaces.C.Extensions.bool;
-
-   function lto_codegen_set_pic_model
-     (cg    : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      arg_1 : in LLVM_link_time_Optimizer.lto_codegen_model)
-      return  Interfaces.C.Extensions.bool;
-
-   procedure lto_codegen_set_gcc_path
-     (cg   : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      path : in Interfaces.C.Strings.chars_ptr);
-
-   procedure lto_codegen_set_assembler_path
-     (cg   : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      path : in Interfaces.C.Strings.chars_ptr);
-
-   procedure lto_codegen_add_must_preserve_symbol
-     (cg     : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      symbol : in Interfaces.C.Strings.chars_ptr);
-
-   function lto_codegen_write_merged_modules
-     (cg   : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      path : in Interfaces.C.Strings.chars_ptr)
-      return Interfaces.C.Extensions.bool;
-
-   function lto_codegen_compile
-     (cg     : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      length : access Interfaces.C.size_t)
-      return   access Interfaces.C.Extensions.void;
-
-   procedure lto_codegen_debug_options
-     (cg    : in LLVM_link_time_Optimizer.lto_code_gen_t;
-      arg_1 : in Interfaces.C.Strings.chars_ptr);
-
-   function llvm_create_optimizer return
-     LLVM_link_time_Optimizer.llvm_lto_t;
-
-   procedure llvm_destroy_optimizer
-     (lto : in LLVM_link_time_Optimizer.llvm_lto_t);
-
-   function llvm_read_object_file
-     (lto            : in LLVM_link_time_Optimizer.llvm_lto_t;
-      input_filename : in Interfaces.C.Strings.chars_ptr)
-      return           LLVM_link_time_Optimizer.llvm_lto_status_t;
-
-   function llvm_optimize_modules
-     (lto             : in LLVM_link_time_Optimizer.llvm_lto_t;
-      output_filename : in Interfaces.C.Strings.chars_ptr)
-      return            LLVM_link_time_Optimizer.llvm_lto_status_t;
-
-private
-
-   pragma Import (C, lto_get_version, "Ada_lto_get_version");
-   pragma Import (C, lto_get_error_message, "Ada_lto_get_error_message");
-   pragma Import
-     (C,
-      lto_module_is_object_file,
-      "Ada_lto_module_is_object_file");
-   pragma Import
-     (C,
-      lto_module_is_object_file_for_target,
-      "Ada_lto_module_is_object_file_for_target");
-   pragma Import
-     (C,
-      lto_module_is_object_file_in_memory,
-      "Ada_lto_module_is_object_file_in_memory");
-   pragma Import
-     (C,
-      lto_module_is_object_file_in_memory_for_target,
-      "Ada_lto_module_is_object_file_in_memory_for_target");
-   pragma Import (C, lto_module_create, "Ada_lto_module_create");
-   pragma Import
-     (C,
-      lto_module_create_from_memory,
-      "Ada_lto_module_create_from_memory");
-   pragma Import (C, lto_module_dispose, "Ada_lto_module_dispose");
-   pragma Import
-     (C,
-      lto_module_get_target_triple,
-      "Ada_lto_module_get_target_triple");
-   pragma Import
-     (C,
-      lto_module_get_num_symbols,
-      "Ada_lto_module_get_num_symbols");
-   pragma Import
-     (C,
-      lto_module_get_symbol_name,
-      "Ada_lto_module_get_symbol_name");
-   pragma Import
-     (C,
-      lto_module_get_symbol_attribute,
-      "Ada_lto_module_get_symbol_attribute");
-   pragma Import (C, lto_codegen_create, "Ada_lto_codegen_create");
-   pragma Import (C, lto_codegen_dispose, "Ada_lto_codegen_dispose");
-   pragma Import (C, lto_codegen_add_module, "Ada_lto_codegen_add_module");
-   pragma Import
-     (C,
-      lto_codegen_set_debug_model,
-      "Ada_lto_codegen_set_debug_model");
-   pragma Import
-     (C,
-      lto_codegen_set_pic_model,
-      "Ada_lto_codegen_set_pic_model");
-   pragma Import
-     (C,
-      lto_codegen_set_gcc_path,
-      "Ada_lto_codegen_set_gcc_path");
-   pragma Import
-     (C,
-      lto_codegen_set_assembler_path,
-      "Ada_lto_codegen_set_assembler_path");
-   pragma Import
-     (C,
-      lto_codegen_add_must_preserve_symbol,
-      "Ada_lto_codegen_add_must_preserve_symbol");
-   pragma Import
-     (C,
-      lto_codegen_write_merged_modules,
-      "Ada_lto_codegen_write_merged_modules");
-   pragma Import (C, lto_codegen_compile, "Ada_lto_codegen_compile");
-   pragma Import
-     (C,
-      lto_codegen_debug_options,
-      "Ada_lto_codegen_debug_options");
-   pragma Import (C, llvm_create_optimizer, "Ada_llvm_create_optimizer");
-   pragma Import (C, llvm_destroy_optimizer, "Ada_llvm_destroy_optimizer");
-   pragma Import (C, llvm_read_object_file, "Ada_llvm_read_object_file");
-   pragma Import (C, llvm_optimize_modules, "Ada_llvm_optimize_modules");
-
-end LLVM_link_time_Optimizer.Binding;
diff --git a/bindings/ada/llvm/llvm_link_time_optimizer.ads b/bindings/ada/llvm/llvm_link_time_optimizer.ads
deleted file mode 100644
index c27f7c5893b6..000000000000
--- a/bindings/ada/llvm/llvm_link_time_optimizer.ads
+++ /dev/null
@@ -1,184 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C.Extensions;
-
-
-package LLVM_link_time_Optimizer is
-
-   -- lto_symbol_attributes
-   --
-   type lto_symbol_attributes is (
-      LTO_SYMBOL_ALIGNMENT_MASK,
-      LTO_SYMBOL_PERMISSIONS_RODATA,
-      LTO_SYMBOL_PERMISSIONS_CODE,
-      LTO_SYMBOL_PERMISSIONS_DATA,
-      LTO_SYMBOL_PERMISSIONS_MASK,
-      LTO_SYMBOL_DEFINITION_REGULAR,
-      LTO_SYMBOL_DEFINITION_TENTATIVE,
-      LTO_SYMBOL_DEFINITION_WEAK,
-      LTO_SYMBOL_DEFINITION_UNDEFINED,
-      LTO_SYMBOL_DEFINITION_WEAKUNDEF,
-      LTO_SYMBOL_DEFINITION_MASK,
-      LTO_SYMBOL_SCOPE_INTERNAL,
-      LTO_SYMBOL_SCOPE_HIDDEN,
-      LTO_SYMBOL_SCOPE_DEFAULT,
-      LTO_SYMBOL_SCOPE_PROTECTED,
-      LTO_SYMBOL_SCOPE_MASK);
-
-   for lto_symbol_attributes use
-     (LTO_SYMBOL_ALIGNMENT_MASK       => 31,
-      LTO_SYMBOL_PERMISSIONS_RODATA   => 128,
-      LTO_SYMBOL_PERMISSIONS_CODE     => 160,
-      LTO_SYMBOL_PERMISSIONS_DATA     => 192,
-      LTO_SYMBOL_PERMISSIONS_MASK     => 224,
-      LTO_SYMBOL_DEFINITION_REGULAR   => 256,
-      LTO_SYMBOL_DEFINITION_TENTATIVE => 512,
-      LTO_SYMBOL_DEFINITION_WEAK      => 768,
-      LTO_SYMBOL_DEFINITION_UNDEFINED => 1024,
-      LTO_SYMBOL_DEFINITION_WEAKUNDEF => 1280,
-      LTO_SYMBOL_DEFINITION_MASK      => 1792,
-      LTO_SYMBOL_SCOPE_INTERNAL       => 2048,
-      LTO_SYMBOL_SCOPE_HIDDEN         => 4096,
-      LTO_SYMBOL_SCOPE_DEFAULT        => 6144,
-      LTO_SYMBOL_SCOPE_PROTECTED      => 8192,
-      LTO_SYMBOL_SCOPE_MASK           => 14336);
-
-   pragma Convention (C, lto_symbol_attributes);
-
-   type lto_symbol_attributes_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.lto_symbol_attributes;
-
-   type lto_symbol_attributes_view is access all
-     LLVM_link_time_Optimizer.lto_symbol_attributes;
-
-   -- lto_debug_model
-   --
-   type lto_debug_model is (LTO_DEBUG_MODEL_NONE, LTO_DEBUG_MODEL_DWARF);
-
-   for lto_debug_model use
-     (LTO_DEBUG_MODEL_NONE  => 0,
-      LTO_DEBUG_MODEL_DWARF => 1);
-
-   pragma Convention (C, lto_debug_model);
-
-   type lto_debug_model_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.lto_debug_model;
-
-   type lto_debug_model_view is access all
-     LLVM_link_time_Optimizer.lto_debug_model;
-
-   -- lto_codegen_model
-   --
-   type lto_codegen_model is (
-      LTO_CODEGEN_PIC_MODEL_STATIC,
-      LTO_CODEGEN_PIC_MODEL_DYNAMIC,
-      LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC);
-
-   for lto_codegen_model use
-     (LTO_CODEGEN_PIC_MODEL_STATIC         => 0,
-      LTO_CODEGEN_PIC_MODEL_DYNAMIC        => 1,
-      LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC => 2);
-
-   pragma Convention (C, lto_codegen_model);
-
-   type lto_codegen_model_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.lto_codegen_model;
-
-   type lto_codegen_model_view is access all
-     LLVM_link_time_Optimizer.lto_codegen_model;
-
-   -- LTOModule
-   --
-   type LTOModule is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LTOModule_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.LTOModule;
-
-   type LTOModule_view is access all LLVM_link_time_Optimizer.LTOModule;
-
-   -- lto_module_t
-   --
-   type lto_module_t is access all LLVM_link_time_Optimizer.LTOModule;
-
-   type lto_module_t_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.lto_module_t;
-
-   type lto_module_t_view is access all LLVM_link_time_Optimizer.lto_module_t;
-
-   -- LTOCodeGenerator
-   --
-   type LTOCodeGenerator is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LTOCodeGenerator_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.LTOCodeGenerator;
-
-   type LTOCodeGenerator_view is access all
-     LLVM_link_time_Optimizer.LTOCodeGenerator;
-
-   -- lto_code_gen_t
-   --
-   type lto_code_gen_t is access all LLVM_link_time_Optimizer.LTOCodeGenerator;
-
-   type lto_code_gen_t_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.lto_code_gen_t;
-
-   type lto_code_gen_t_view is access all
-     LLVM_link_time_Optimizer.lto_code_gen_t;
-
-   -- llvm_lto_status_t
-   --
-   type llvm_lto_status_t is (
-      LLVM_LTO_UNKNOWN,
-      LLVM_LTO_OPT_SUCCESS,
-      LLVM_LTO_READ_SUCCESS,
-      LLVM_LTO_READ_FAILURE,
-      LLVM_LTO_WRITE_FAILURE,
-      LLVM_LTO_NO_TARGET,
-      LLVM_LTO_NO_WORK,
-      LLVM_LTO_MODULE_MERGE_FAILURE,
-      LLVM_LTO_ASM_FAILURE,
-      LLVM_LTO_NULL_OBJECT);
-
-   for llvm_lto_status_t use
-     (LLVM_LTO_UNKNOWN              => 0,
-      LLVM_LTO_OPT_SUCCESS          => 1,
-      LLVM_LTO_READ_SUCCESS         => 2,
-      LLVM_LTO_READ_FAILURE         => 3,
-      LLVM_LTO_WRITE_FAILURE        => 4,
-      LLVM_LTO_NO_TARGET            => 5,
-      LLVM_LTO_NO_WORK              => 6,
-      LLVM_LTO_MODULE_MERGE_FAILURE => 7,
-      LLVM_LTO_ASM_FAILURE          => 8,
-      LLVM_LTO_NULL_OBJECT          => 9);
-
-   pragma Convention (C, llvm_lto_status_t);
-
-   type llvm_lto_status_t_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.llvm_lto_status_t;
-
-   type llvm_lto_status_t_view is access all
-     LLVM_link_time_Optimizer.llvm_lto_status_t;
-
-
-   -- llvm_lto_t
-   --
-   type llvm_lto_t is access all Interfaces.C.Extensions.void;
-
-   type llvm_lto_t_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_link_time_Optimizer.llvm_lto_t;
-
-   type llvm_lto_t_view is access all
-     LLVM_link_time_Optimizer.llvm_lto_t;
-
-
-end LLVM_link_time_Optimizer;
diff --git a/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx b/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx
deleted file mode 100644
index eb2e7ab15633..000000000000
--- a/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx
+++ /dev/null
@@ -1,923 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_link_time_Optimizer (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_link_time_Optimizer(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-#include "llvm-c/lto.h"
-#include "llvm-c/LinkTimeOptimizer.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport char * SWIGSTDCALL Ada_lto_get_version (
-  )
-{
-  char * jresult ;
-  char *result = 0 ;
-  
-  result = (char *)lto_get_version();
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_lto_get_error_message (
-  )
-{
-  char * jresult ;
-  char *result = 0 ;
-  
-  result = (char *)lto_get_error_message();
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file (
-  char * jarg1
-  )
-{
-  unsigned int jresult ;
-  char *arg1 = (char *) 0 ;
-  bool result;
-  
-  arg1 = jarg1; 
-  
-  result = (bool)lto_module_is_object_file((char const *)arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_for_target (
-  char * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  unsigned int jresult ;
-  char *arg1 = (char *) 0 ;
-  char *arg2 = (char *) 0 ;
-  bool result;
-  
-  arg1 = jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (bool)lto_module_is_object_file_for_target((char const *)arg1,(char const *)arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_in_memory (
-  void* jarg1
-  ,
-  
-  size_t jarg2
-  )
-{
-  unsigned int jresult ;
-  void *arg1 = (void *) 0 ;
-  size_t arg2 ;
-  bool result;
-  
-  arg1 = (void *)jarg1; 
-  
-  
-  arg2 = (size_t) jarg2; 
-  
-  
-  result = (bool)lto_module_is_object_file_in_memory((void const *)arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_in_memory_for_target (
-  void* jarg1
-  ,
-  
-  size_t jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  unsigned int jresult ;
-  void *arg1 = (void *) 0 ;
-  size_t arg2 ;
-  char *arg3 = (char *) 0 ;
-  bool result;
-  
-  arg1 = (void *)jarg1; 
-  
-  
-  arg2 = (size_t) jarg2; 
-  
-  
-  arg3 = jarg3; 
-  
-  result = (bool)lto_module_is_object_file_in_memory_for_target((void const *)arg1,arg2,(char const *)arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_lto_module_create (
-  char * jarg1
-  )
-{
-  void * jresult ;
-  char *arg1 = (char *) 0 ;
-  lto_module_t result;
-  
-  arg1 = jarg1; 
-  
-  result = (lto_module_t)lto_module_create((char const *)arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_lto_module_create_from_memory (
-  void* jarg1
-  ,
-  
-  size_t jarg2
-  )
-{
-  void * jresult ;
-  void *arg1 = (void *) 0 ;
-  size_t arg2 ;
-  lto_module_t result;
-  
-  arg1 = (void *)jarg1; 
-  
-  
-  arg2 = (size_t) jarg2; 
-  
-  
-  result = (lto_module_t)lto_module_create_from_memory((void const *)arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_lto_module_dispose (
-  void * jarg1
-  )
-{
-  lto_module_t arg1 = (lto_module_t) 0 ;
-  
-  arg1 = (lto_module_t)jarg1; 
-  
-  lto_module_dispose(arg1);
-  
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_lto_module_get_target_triple (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  lto_module_t arg1 = (lto_module_t) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (lto_module_t)jarg1; 
-  
-  result = (char *)lto_module_get_target_triple(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_module_get_num_symbols (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  lto_module_t arg1 = (lto_module_t) 0 ;
-  unsigned int result;
-  
-  arg1 = (lto_module_t)jarg1; 
-  
-  result = (unsigned int)lto_module_get_num_symbols(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_lto_module_get_symbol_name (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  char * jresult ;
-  lto_module_t arg1 = (lto_module_t) 0 ;
-  unsigned int arg2 ;
-  char *result = 0 ;
-  
-  arg1 = (lto_module_t)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (char *)lto_module_get_symbol_name(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_lto_module_get_symbol_attribute (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  int jresult ;
-  lto_module_t arg1 = (lto_module_t) 0 ;
-  unsigned int arg2 ;
-  lto_symbol_attributes result;
-  
-  arg1 = (lto_module_t)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (lto_symbol_attributes)lto_module_get_symbol_attribute(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_lto_codegen_create (
-  )
-{
-  void * jresult ;
-  lto_code_gen_t result;
-  
-  result = (lto_code_gen_t)lto_codegen_create();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_lto_codegen_dispose (
-  void * jarg1
-  )
-{
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  lto_codegen_dispose(arg1);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_add_module (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned int jresult ;
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  lto_module_t arg2 = (lto_module_t) 0 ;
-  bool result;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = (lto_module_t)jarg2; 
-  
-  result = (bool)lto_codegen_add_module(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_set_debug_model (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  unsigned int jresult ;
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  lto_debug_model arg2 ;
-  bool result;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = (lto_debug_model) jarg2; 
-  
-  result = (bool)lto_codegen_set_debug_model(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_set_pic_model (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  unsigned int jresult ;
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  lto_codegen_model arg2 ;
-  bool result;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = (lto_codegen_model) jarg2; 
-  
-  result = (bool)lto_codegen_set_pic_model(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_lto_codegen_set_gcc_path (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  lto_codegen_set_gcc_path(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_lto_codegen_set_assembler_path (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  lto_codegen_set_assembler_path(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_lto_codegen_add_must_preserve_symbol (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  lto_codegen_add_must_preserve_symbol(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_write_merged_modules (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  unsigned int jresult ;
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  bool result;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (bool)lto_codegen_write_merged_modules(arg1,(char const *)arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void* SWIGSTDCALL Ada_lto_codegen_compile (
-  void * jarg1
-  ,
-  
-  size_t* jarg2
-  )
-{
-  void* jresult ;
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  size_t *arg2 = (size_t *) 0 ;
-  void *result = 0 ;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  
-  arg2 = (size_t *) jarg2;
-  
-  
-  result = (void *)lto_codegen_compile(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_lto_codegen_debug_options (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  lto_code_gen_t arg1 = (lto_code_gen_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (lto_code_gen_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  lto_codegen_debug_options(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport void* SWIGSTDCALL Ada_llvm_create_optimizer (
-  )
-{
-  void* jresult ;
-  llvm_lto_t result;
-  
-  result = (llvm_lto_t)llvm_create_optimizer();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_llvm_destroy_optimizer (
-  void* jarg1
-  )
-{
-  llvm_lto_t arg1 = (llvm_lto_t) 0 ;
-  
-  arg1 = (llvm_lto_t)jarg1; 
-  
-  llvm_destroy_optimizer(arg1);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_llvm_read_object_file (
-  void* jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  int jresult ;
-  llvm_lto_t arg1 = (llvm_lto_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  llvm_lto_status_t result;
-  
-  arg1 = (llvm_lto_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (llvm_lto_status_t)llvm_read_object_file(arg1,(char const *)arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_llvm_optimize_modules (
-  void* jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  int jresult ;
-  llvm_lto_t arg1 = (llvm_lto_t) 0 ;
-  char *arg2 = (char *) 0 ;
-  llvm_lto_status_t result;
-  
-  arg1 = (llvm_lto_t)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (llvm_lto_status_t)llvm_optimize_modules(arg1,(char const *)arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/llvm/llvm_wrap.cxx b/bindings/ada/llvm/llvm_wrap.cxx
deleted file mode 100644
index 79b19ff4c0bb..000000000000
--- a/bindings/ada/llvm/llvm_wrap.cxx
+++ /dev/null
@@ -1,8817 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_llvm (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_llvm(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-//#include "llvm-c/Analysis.h"
-//#include "llvm-c/BitReader.h"
-//#include "llvm-c/BitWriter.h"
-#include "llvm-c/Core.h"
-//#include "llvm-c/ExecutionEngine.h"
-//#include "llvm-c/LinkTimeOptimizer.h"
-//#include "llvm-c/lto.h"
-//#include "llvm-c/Target.h"
-
-
-
-  struct LLVMCtxt;
-//  struct LLVMOpaqueType;
-//  struct LLVMOpaqueValue;
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport void SWIGSTDCALL Ada_LLVMDisposeMessage (
-  char * jarg1
-  )
-{
-  char *arg1 = (char *) 0 ;
-  
-  arg1 = jarg1; 
-  
-  LLVMDisposeMessage(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMContextCreate (
-  )
-{
-  void * jresult ;
-  LLVMContextRef result;
-  
-  result = (LLVMContextRef)LLVMContextCreate();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetGlobalContext (
-  )
-{
-  void * jresult ;
-  LLVMContextRef result;
-  
-  result = (LLVMContextRef)LLVMGetGlobalContext();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMContextDispose (
-  void * jarg1
-  )
-{
-  LLVMContextRef arg1 = (LLVMContextRef) 0 ;
-  
-  arg1 = (LLVMContextRef)jarg1; 
-  
-  LLVMContextDispose(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMModuleCreateWithName (
-  char * jarg1
-  )
-{
-  void * jresult ;
-  char *arg1 = (char *) 0 ;
-  LLVMModuleRef result;
-  
-  arg1 = jarg1; 
-  
-  result = (LLVMModuleRef)LLVMModuleCreateWithName((char const *)arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMModuleCreateWithNameInContext (
-  char * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  char *arg1 = (char *) 0 ;
-  LLVMContextRef arg2 = (LLVMContextRef) 0 ;
-  LLVMModuleRef result;
-  
-  arg1 = jarg1; 
-  
-  arg2 = (LLVMContextRef)jarg2; 
-  
-  result = (LLVMModuleRef)LLVMModuleCreateWithNameInContext((char const *)arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeModule (
-  void * jarg1
-  )
-{
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  LLVMDisposeModule(arg1);
-  
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_LLVMGetDataLayout (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (char *)LLVMGetDataLayout(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetDataLayout (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  LLVMSetDataLayout(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_LLVMGetTarget (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (char *)LLVMGetTarget(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetTarget (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  LLVMSetTarget(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMAddTypeName (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  result = (int)LLVMAddTypeName(arg1,(char const *)arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDeleteTypeName (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  LLVMDeleteTypeName(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetTypeByName (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (LLVMTypeRef)LLVMGetTypeByName(arg1,(char const *)arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDumpModule (
-  void * jarg1
-  )
-{
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  LLVMDumpModule(arg1);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMGetTypeKind (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeKind result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMTypeKind)LLVMGetTypeKind(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMInt1Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMInt1Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMInt8Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMInt8Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMInt16Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMInt16Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMInt32Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMInt32Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMInt64Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMInt64Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIntType (
-  unsigned int jarg1
-  )
-{
-  void * jresult ;
-  unsigned int arg1 ;
-  LLVMTypeRef result;
-  
-  
-  arg1 = (unsigned int) jarg1; 
-  
-  
-  result = (LLVMTypeRef)LLVMIntType(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetIntTypeWidth (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetIntTypeWidth(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMFloatType (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMFloatType();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMDoubleType (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMDoubleType();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMX86FP80Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMX86FP80Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMFP128Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMFP128Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMPPCFP128Type (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMPPCFP128Type();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMFunctionType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  ,
-  
-  int jarg4
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ;
-  unsigned int arg3 ;
-  int arg4 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef *)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  
-  arg4 = (int) jarg4; 
-  
-  
-  result = (LLVMTypeRef)LLVMFunctionType(arg1,arg2,arg3,arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsFunctionVarArg (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (int)LLVMIsFunctionVarArg(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetReturnType (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMTypeRef)LLVMGetReturnType(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMCountParamTypes (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (unsigned int)LLVMCountParamTypes(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMGetParamTypes (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef *)jarg2; 
-  
-  LLVMGetParamTypes(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMStructType (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  void * jresult ;
-  LLVMTypeRef *arg1 = (LLVMTypeRef *) 0 ;
-  unsigned int arg2 ;
-  int arg3 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef *)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  
-  arg3 = (int) jarg3; 
-  
-  
-  result = (LLVMTypeRef)LLVMStructType(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMCountStructElementTypes (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (unsigned int)LLVMCountStructElementTypes(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMGetStructElementTypes (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef *)jarg2; 
-  
-  LLVMGetStructElementTypes(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsPackedStruct (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (int)LLVMIsPackedStruct(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMArrayType (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int arg2 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMTypeRef)LLVMArrayType(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMPointerType (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int arg2 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMTypeRef)LLVMPointerType(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMVectorType (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int arg2 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMTypeRef)LLVMVectorType(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetElementType (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMTypeRef)LLVMGetElementType(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetArrayLength (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetArrayLength(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetPointerAddressSpace (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetPointerAddressSpace(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetVectorSize (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetVectorSize(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMVoidType (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMVoidType();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMLabelType (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMLabelType();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMOpaqueType (
-  )
-{
-  void * jresult ;
-  LLVMTypeRef result;
-  
-  result = (LLVMTypeRef)LLVMOpaqueType();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateTypeHandle (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeHandleRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMTypeHandleRef)LLVMCreateTypeHandle(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMRefineType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  LLVMRefineType(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMResolveTypeHandle (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeHandleRef arg1 = (LLVMTypeHandleRef) 0 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTypeHandleRef)jarg1; 
-  
-  result = (LLVMTypeRef)LLVMResolveTypeHandle(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeTypeHandle (
-  void * jarg1
-  )
-{
-  LLVMTypeHandleRef arg1 = (LLVMTypeHandleRef) 0 ;
-  
-  arg1 = (LLVMTypeHandleRef)jarg1; 
-  
-  LLVMDisposeTypeHandle(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMTypeOf (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMTypeRef)LLVMTypeOf(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_LLVMGetValueName (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (char *)LLVMGetValueName(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetValueName (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  LLVMSetValueName(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDumpValue (
-  void * jarg1
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  LLVMDumpValue(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAArgument (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAArgument(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsABasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsABasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAInlineAsm (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAInlineAsm(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAUser (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAUser(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstant (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstant(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantAggregateZero (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantAggregateZero(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantArray (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantArray(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantExpr (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantExpr(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantFP (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantFP(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantInt (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantInt(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantPointerNull (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantPointerNull(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantStruct (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantStruct(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantVector (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAConstantVector(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalValue (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAGlobalValue(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFunction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFunction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalAlias (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAGlobalAlias(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalVariable (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAGlobalVariable(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAUndefValue (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAUndefValue(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAInstruction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAInstruction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsABinaryOperator (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsABinaryOperator(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsACallInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsACallInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAIntrinsicInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAIntrinsicInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsADbgInfoIntrinsic (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsADbgInfoIntrinsic(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsADbgDeclareInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsADbgDeclareInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsADbgFuncStartInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsADbgFuncStartInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsADbgRegionEndInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsADbgRegionEndInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsADbgRegionStartInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsADbgRegionStartInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsADbgStopPointInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsADbgStopPointInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAEHSelectorInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAEHSelectorInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAMemIntrinsic (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAMemIntrinsic(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAMemCpyInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAMemCpyInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAMemMoveInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAMemMoveInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAMemSetInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAMemSetInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsACmpInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsACmpInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFCmpInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFCmpInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAICmpInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAICmpInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAExtractElementInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAExtractElementInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAGetElementPtrInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAGetElementPtrInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAInsertElementInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAInsertElementInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAInsertValueInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAInsertValueInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAPHINode (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAPHINode(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsASelectInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsASelectInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAShuffleVectorInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAShuffleVectorInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAStoreInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAStoreInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsATerminatorInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsATerminatorInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsABranchInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsABranchInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAInvokeInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAInvokeInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAReturnInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAReturnInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsASwitchInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsASwitchInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAUnreachableInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAUnreachableInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAUnwindInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAUnwindInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAUnaryInstruction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAUnaryInstruction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAAllocationInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAAllocationInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAAllocaInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAAllocaInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAMallocInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAMallocInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsACastInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsACastInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsABitCastInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsABitCastInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFPExtInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFPExtInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFPToSIInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFPToSIInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFPToUIInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFPToUIInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFPTruncInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFPTruncInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAIntToPtrInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAIntToPtrInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAPtrToIntInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAPtrToIntInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsASExtInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsASExtInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsASIToFPInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsASIToFPInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsATruncInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsATruncInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAUIToFPInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAUIToFPInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAZExtInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAZExtInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAExtractValueInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAExtractValueInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAFreeInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAFreeInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsALoadInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsALoadInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIsAVAArgInst (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMIsAVAArgInst(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstNull (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMConstNull(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstAllOnes (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMConstAllOnes(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetUndef (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetUndef(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsConstant (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsConstant(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsNull (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsNull(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsUndef (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsUndef(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstPointerNull (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMConstPointerNull(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstInt (
-  void * jarg1
-  ,
-  
-  unsigned long long jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  unsigned long long arg2 ;
-  int arg3 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (unsigned long long) jarg2; 
-  
-  
-  
-  arg3 = (int) jarg3; 
-  
-  
-  result = (LLVMValueRef)LLVMConstInt(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstReal (
-  void * jarg1
-  ,
-  
-  double jarg2
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  double arg2 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  
-  arg2 = (double) jarg2; 
-  
-  
-  result = (LLVMValueRef)LLVMConstReal(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstRealOfString (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstRealOfString(arg1,(char const *)arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstString (
-  char * jarg1
-  ,
-  
-  unsigned int jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  void * jresult ;
-  char *arg1 = (char *) 0 ;
-  unsigned int arg2 ;
-  int arg3 ;
-  LLVMValueRef result;
-  
-  arg1 = jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  
-  arg3 = (int) jarg3; 
-  
-  
-  result = (LLVMValueRef)LLVMConstString((char const *)arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstArray (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
-  unsigned int arg3 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = (LLVMValueRef *)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  result = (LLVMValueRef)LLVMConstArray(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstStruct (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  void * jresult ;
-  LLVMValueRef *arg1 = (LLVMValueRef *) 0 ;
-  unsigned int arg2 ;
-  int arg3 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef *)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  
-  arg3 = (int) jarg3; 
-  
-  
-  result = (LLVMValueRef)LLVMConstStruct(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstVector (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef *arg1 = (LLVMValueRef *) 0 ;
-  unsigned int arg2 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef *)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMValueRef)LLVMConstVector(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMSizeOf (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMSizeOf(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstNeg (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMConstNeg(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstNot (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMConstNot(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstAdd (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstAdd(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstSub (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstSub(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstMul (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstMul(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstUDiv (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstUDiv(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstSDiv (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstSDiv(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFDiv (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstFDiv(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstURem (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstURem(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstSRem (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstSRem(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFRem (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstFRem(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstAnd (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstAnd(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstOr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstOr(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstXor (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstXor(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstICmp (
-  int jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMIntPredicate arg1 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMIntPredicate) jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMConstICmp(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFCmp (
-  int jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMRealPredicate arg1 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMRealPredicate) jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMConstFCmp(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstShl (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstShl(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstLShr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstLShr(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstAShr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstAShr(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstGEP (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
-  unsigned int arg3 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef *)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  result = (LLVMValueRef)LLVMConstGEP(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstTrunc (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstTrunc(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstSExt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstSExt(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstZExt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstZExt(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFPTrunc (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstFPTrunc(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFPExt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstFPExt(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstUIToFP (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstUIToFP(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstSIToFP (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstSIToFP(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFPToUI (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstFPToUI(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstFPToSI (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstFPToSI(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstPtrToInt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstPtrToInt(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstIntToPtr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstIntToPtr(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstBitCast (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstBitCast(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstSelect (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMConstSelect(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstExtractElement (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMConstExtractElement(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstInsertElement (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMConstInsertElement(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstShuffleVector (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMConstShuffleVector(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstExtractValue (
-  void * jarg1
-  ,
-  
-  unsigned int* jarg2
-  ,
-  
-  unsigned int jarg3
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int *arg2 = (unsigned int *) 0 ;
-  unsigned int arg3 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int *) jarg2;
-  
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  result = (LLVMValueRef)LLVMConstExtractValue(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstInsertValue (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int* jarg3
-  ,
-  
-  unsigned int jarg4
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  unsigned int *arg3 = (unsigned int *) 0 ;
-  unsigned int arg4 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  
-  arg3 = (unsigned int *) jarg3;
-  
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  result = (LLVMValueRef)LLVMConstInsertValue(arg1,arg2,arg3,arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMConstInlineAsm (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  ,
-  
-  char * jarg3
-  ,
-  
-  int jarg4
-  )
-{
-  void * jresult ;
-  LLVMTypeRef arg1 = (LLVMTypeRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  char *arg3 = (char *) 0 ;
-  int arg4 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMTypeRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  arg3 = jarg3; 
-  
-  
-  arg4 = (int) jarg4; 
-  
-  
-  result = (LLVMValueRef)LLVMConstInlineAsm(arg1,(char const *)arg2,(char const *)arg3,arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetGlobalParent (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMModuleRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMModuleRef)LLVMGetGlobalParent(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsDeclaration (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsDeclaration(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMGetLinkage (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMLinkage result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMLinkage)LLVMGetLinkage(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetLinkage (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMLinkage arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMLinkage) jarg2; 
-  
-  LLVMSetLinkage(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_LLVMGetSection (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (char *)LLVMGetSection(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetSection (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  LLVMSetSection(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMGetVisibility (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMVisibility result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMVisibility)LLVMGetVisibility(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetVisibility (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMVisibility arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMVisibility) jarg2; 
-  
-  LLVMSetVisibility(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetAlignment (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetAlignment(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetAlignment (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  LLVMSetAlignment(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMAddGlobal (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMAddGlobal(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNamedGlobal (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (LLVMValueRef)LLVMGetNamedGlobal(arg1,(char const *)arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetFirstGlobal (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetFirstGlobal(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetLastGlobal (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetLastGlobal(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNextGlobal (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetNextGlobal(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousGlobal (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetPreviousGlobal(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDeleteGlobal (
-  void * jarg1
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  LLVMDeleteGlobal(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetInitializer (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetInitializer(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetInitializer (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  LLVMSetInitializer(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsThreadLocal (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsThreadLocal(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetThreadLocal (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (int) jarg2; 
-  
-  
-  LLVMSetThreadLocal(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsGlobalConstant (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsGlobalConstant(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetGlobalConstant (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (int) jarg2; 
-  
-  
-  LLVMSetGlobalConstant(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMAddAlias (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMAddAlias(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMAddFunction (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMAddFunction(arg1,(char const *)arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNamedFunction (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (LLVMValueRef)LLVMGetNamedFunction(arg1,(char const *)arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetFirstFunction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetFirstFunction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetLastFunction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetLastFunction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNextFunction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetNextFunction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousFunction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetPreviousFunction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDeleteFunction (
-  void * jarg1
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  LLVMDeleteFunction(arg1);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetIntrinsicID (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetIntrinsicID(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetFunctionCallConv (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetFunctionCallConv(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetFunctionCallConv (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  LLVMSetFunctionCallConv(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_LLVMGetGC (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (char *)LLVMGetGC(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetGC (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  LLVMSetGC(arg1,(char const *)arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddFunctionAttr (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMAttribute arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMAttribute) jarg2; 
-  
-  LLVMAddFunctionAttr(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMRemoveFunctionAttr (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMAttribute arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMAttribute) jarg2; 
-  
-  LLVMRemoveFunctionAttr(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMCountParams (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMCountParams(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMGetParams (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef *)jarg2; 
-  
-  LLVMGetParams(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetParam (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMValueRef)LLVMGetParam(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetParamParent (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetParamParent(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetFirstParam (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetFirstParam(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetLastParam (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetLastParam(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNextParam (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetNextParam(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousParam (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetPreviousParam(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddAttribute (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMAttribute arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMAttribute) jarg2; 
-  
-  LLVMAddAttribute(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMRemoveAttribute (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMAttribute arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMAttribute) jarg2; 
-  
-  LLVMRemoveAttribute(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetParamAlignment (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  LLVMSetParamAlignment(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBasicBlockAsValue (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMBasicBlockAsValue(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMValueIsBasicBlock (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMValueIsBasicBlock(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMValueAsBasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMValueAsBasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetBasicBlockParent (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetBasicBlockParent(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMCountBasicBlocks (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMCountBasicBlocks(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMGetBasicBlocks (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef *arg2 = (LLVMBasicBlockRef *) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMBasicBlockRef *)jarg2; 
-  
-  LLVMGetBasicBlocks(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetFirstBasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetFirstBasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetLastBasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetLastBasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNextBasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetNextBasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousBasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetPreviousBasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetEntryBasicBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetEntryBasicBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMAppendBasicBlock (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (LLVMBasicBlockRef)LLVMAppendBasicBlock(arg1,(char const *)arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMInsertBasicBlock (
-  void * jarg1
-  ,
-  
-  char * jarg2
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  char *arg2 = (char *) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  arg2 = jarg2; 
-  
-  result = (LLVMBasicBlockRef)LLVMInsertBasicBlock(arg1,(char const *)arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDeleteBasicBlock (
-  void * jarg1
-  )
-{
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  LLVMDeleteBasicBlock(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetInstructionParent (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetInstructionParent(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetFirstInstruction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetFirstInstruction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetLastInstruction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBasicBlockRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetLastInstruction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetNextInstruction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetNextInstruction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousInstruction (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMGetPreviousInstruction(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetInstructionCallConv (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  LLVMSetInstructionCallConv(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMGetInstructionCallConv (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMGetInstructionCallConv(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddInstrAttribute (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  LLVMAttribute arg3 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  arg3 = (LLVMAttribute) jarg3; 
-  
-  LLVMAddInstrAttribute(arg1,arg2,arg3);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMRemoveInstrAttribute (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  ,
-  
-  int jarg3
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  LLVMAttribute arg3 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  arg3 = (LLVMAttribute) jarg3; 
-  
-  LLVMRemoveInstrAttribute(arg1,arg2,arg3);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetInstrParamAlignment (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  ,
-  
-  unsigned int jarg3
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  unsigned int arg3 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  LLVMSetInstrParamAlignment(arg1,arg2,arg3);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMIsTailCall (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (int)LLVMIsTailCall(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMSetTailCall (
-  void * jarg1
-  ,
-  
-  int jarg2
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  int arg2 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (int) jarg2; 
-  
-  
-  LLVMSetTailCall(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddIncoming (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  unsigned int jarg4
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef *arg2 = (LLVMValueRef *) 0 ;
-  LLVMBasicBlockRef *arg3 = (LLVMBasicBlockRef *) 0 ;
-  unsigned int arg4 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef *)jarg2; 
-  
-  arg3 = (LLVMBasicBlockRef *)jarg3; 
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  LLVMAddIncoming(arg1,arg2,arg3,arg4);
-  
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMCountIncoming (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  result = (unsigned int)LLVMCountIncoming(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetIncomingValue (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMValueRef)LLVMGetIncomingValue(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetIncomingBlock (
-  void * jarg1
-  ,
-  
-  unsigned int jarg2
-  )
-{
-  void * jresult ;
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  unsigned int arg2 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  
-  arg2 = (unsigned int) jarg2; 
-  
-  
-  result = (LLVMBasicBlockRef)LLVMGetIncomingBlock(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateBuilder (
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef result;
-  
-  result = (LLVMBuilderRef)LLVMCreateBuilder();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMPositionBuilder (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMBasicBlockRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  LLVMPositionBuilder(arg1,arg2,arg3);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMPositionBuilderBefore (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  LLVMPositionBuilderBefore(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMPositionBuilderAtEnd (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMBasicBlockRef)jarg2; 
-  
-  LLVMPositionBuilderAtEnd(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMGetInsertBlock (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMBasicBlockRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  result = (LLVMBasicBlockRef)LLVMGetInsertBlock(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMClearInsertionPosition (
-  void * jarg1
-  )
-{
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  LLVMClearInsertionPosition(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMInsertIntoBuilder (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  LLVMInsertIntoBuilder(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeBuilder (
-  void * jarg1
-  )
-{
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  LLVMDisposeBuilder(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildRetVoid (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMBuildRetVoid(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildRet (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMBuildRet(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildBr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMBasicBlockRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMBuildBr(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildCondBr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ;
-  LLVMBasicBlockRef arg4 = (LLVMBasicBlockRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMBasicBlockRef)jarg3; 
-  
-  arg4 = (LLVMBasicBlockRef)jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildCondBr(arg1,arg2,arg3,arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSwitch (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  unsigned int jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ;
-  unsigned int arg4 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMBasicBlockRef)jarg3; 
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  result = (LLVMValueRef)LLVMBuildSwitch(arg1,arg2,arg3,arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildInvoke (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  unsigned int jarg4
-  ,
-  
-  void * jarg5
-  ,
-  
-  void * jarg6
-  ,
-  
-  char * jarg7
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
-  unsigned int arg4 ;
-  LLVMBasicBlockRef arg5 = (LLVMBasicBlockRef) 0 ;
-  LLVMBasicBlockRef arg6 = (LLVMBasicBlockRef) 0 ;
-  char *arg7 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef *)jarg3; 
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  arg5 = (LLVMBasicBlockRef)jarg5; 
-  
-  arg6 = (LLVMBasicBlockRef)jarg6; 
-  
-  arg7 = jarg7; 
-  
-  result = (LLVMValueRef)LLVMBuildInvoke(arg1,arg2,arg3,arg4,arg5,arg6,(char const *)arg7);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildUnwind (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMBuildUnwind(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildUnreachable (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  result = (LLVMValueRef)LLVMBuildUnreachable(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddCase (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  LLVMValueRef arg1 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ;
-  
-  arg1 = (LLVMValueRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMBasicBlockRef)jarg3; 
-  
-  LLVMAddCase(arg1,arg2,arg3);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildAdd (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildAdd(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSub (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildSub(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildMul (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildMul(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildUDiv (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildUDiv(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSDiv (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildSDiv(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFDiv (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildFDiv(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildURem (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildURem(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSRem (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildSRem(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFRem (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildFRem(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildShl (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildShl(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildLShr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildLShr(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildAShr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildAShr(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildAnd (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildAnd(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildOr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildOr(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildXor (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildXor(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildNeg (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildNeg(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildNot (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildNot(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildMalloc (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildMalloc(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildArrayMalloc (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildArrayMalloc(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildAlloca (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildAlloca(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildArrayAlloca (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildArrayAlloca(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFree (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (LLVMValueRef)LLVMBuildFree(arg1,arg2);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildLoad (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildLoad(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildStore (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildStore(arg1,arg2,arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildGEP (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  unsigned int jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
-  unsigned int arg4 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef *)jarg3; 
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildGEP(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildTrunc (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildTrunc(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildZExt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildZExt(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSExt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildSExt(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFPToUI (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildFPToUI(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFPToSI (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildFPToSI(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildUIToFP (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildUIToFP(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSIToFP (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildSIToFP(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFPTrunc (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildFPTrunc(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFPExt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildFPExt(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildPtrToInt (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildPtrToInt(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildIntToPtr (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildIntToPtr(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildBitCast (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildBitCast(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildICmp (
-  void * jarg1
-  ,
-  
-  int jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMIntPredicate arg2 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMIntPredicate) jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = (LLVMValueRef)jarg4; 
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildICmp(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildFCmp (
-  void * jarg1
-  ,
-  
-  int jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMRealPredicate arg2 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMRealPredicate) jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = (LLVMValueRef)jarg4; 
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildFCmp(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildPhi (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  char * jarg3
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  char *arg3 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  arg3 = jarg3; 
-  
-  result = (LLVMValueRef)LLVMBuildPhi(arg1,arg2,(char const *)arg3);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildCall (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  unsigned int jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef *arg3 = (LLVMValueRef *) 0 ;
-  unsigned int arg4 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef *)jarg3; 
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildCall(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildSelect (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = (LLVMValueRef)jarg4; 
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildSelect(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildVAArg (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMTypeRef arg3 = (LLVMTypeRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMTypeRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildVAArg(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildExtractElement (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildExtractElement(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildInsertElement (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = (LLVMValueRef)jarg4; 
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildInsertElement(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildShuffleVector (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  void * jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg4 = (LLVMValueRef) 0 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  arg4 = (LLVMValueRef)jarg4; 
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildShuffleVector(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildExtractValue (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  ,
-  
-  char * jarg4
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  unsigned int arg3 ;
-  char *arg4 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  arg4 = jarg4; 
-  
-  result = (LLVMValueRef)LLVMBuildExtractValue(arg1,arg2,arg3,(char const *)arg4);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMBuildInsertValue (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  ,
-  
-  unsigned int jarg4
-  ,
-  
-  char * jarg5
-  )
-{
-  void * jresult ;
-  LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  LLVMValueRef arg3 = (LLVMValueRef) 0 ;
-  unsigned int arg4 ;
-  char *arg5 = (char *) 0 ;
-  LLVMValueRef result;
-  
-  arg1 = (LLVMBuilderRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  arg3 = (LLVMValueRef)jarg3; 
-  
-  
-  arg4 = (unsigned int) jarg4; 
-  
-  
-  arg5 = jarg5; 
-  
-  result = (LLVMValueRef)LLVMBuildInsertValue(arg1,arg2,arg3,arg4,(char const *)arg5);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateModuleProviderForExistingModule (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMModuleRef arg1 = (LLVMModuleRef) 0 ;
-  LLVMModuleProviderRef result;
-  
-  arg1 = (LLVMModuleRef)jarg1; 
-  
-  result = (LLVMModuleProviderRef)LLVMCreateModuleProviderForExistingModule(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeModuleProvider (
-  void * jarg1
-  )
-{
-  LLVMModuleProviderRef arg1 = (LLVMModuleProviderRef) 0 ;
-  
-  arg1 = (LLVMModuleProviderRef)jarg1; 
-  
-  LLVMDisposeModuleProvider(arg1);
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMCreateMemoryBufferWithContentsOfFile (
-  char * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  void * jarg3
-  )
-{
-  int jresult ;
-  char *arg1 = (char *) 0 ;
-  LLVMMemoryBufferRef *arg2 = (LLVMMemoryBufferRef *) 0 ;
-  char **arg3 = (char **) 0 ;
-  int result;
-  
-  arg1 = jarg1; 
-  
-  arg2 = (LLVMMemoryBufferRef *)jarg2; 
-  
-  arg3 = (char **)jarg3; 
-  
-  result = (int)LLVMCreateMemoryBufferWithContentsOfFile((char const *)arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMCreateMemoryBufferWithSTDIN (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  int jresult ;
-  LLVMMemoryBufferRef *arg1 = (LLVMMemoryBufferRef *) 0 ;
-  char **arg2 = (char **) 0 ;
-  int result;
-  
-  arg1 = (LLVMMemoryBufferRef *)jarg1; 
-  
-  arg2 = (char **)jarg2; 
-  
-  result = (int)LLVMCreateMemoryBufferWithSTDIN(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeMemoryBuffer (
-  void * jarg1
-  )
-{
-  LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ;
-  
-  arg1 = (LLVMMemoryBufferRef)jarg1; 
-  
-  LLVMDisposeMemoryBuffer(arg1);
-  
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreatePassManager (
-  )
-{
-  void * jresult ;
-  LLVMPassManagerRef result;
-  
-  result = (LLVMPassManagerRef)LLVMCreatePassManager();
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateFunctionPassManager (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMModuleProviderRef arg1 = (LLVMModuleProviderRef) 0 ;
-  LLVMPassManagerRef result;
-  
-  arg1 = (LLVMModuleProviderRef)jarg1; 
-  
-  result = (LLVMPassManagerRef)LLVMCreateFunctionPassManager(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMRunPassManager (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  int jresult ;
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  LLVMModuleRef arg2 = (LLVMModuleRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  arg2 = (LLVMModuleRef)jarg2; 
-  
-  result = (int)LLVMRunPassManager(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMInitializeFunctionPassManager (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  result = (int)LLVMInitializeFunctionPassManager(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMRunFunctionPassManager (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  int jresult ;
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (int)LLVMRunFunctionPassManager(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMFinalizeFunctionPassManager (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  int result;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  result = (int)LLVMFinalizeFunctionPassManager(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposePassManager (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMDisposePassManager(arg1);
-  
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/target/llvm_target-binding.ads b/bindings/ada/target/llvm_target-binding.ads
deleted file mode 100644
index 61201c8d1753..000000000000
--- a/bindings/ada/target/llvm_target-binding.ads
+++ /dev/null
@@ -1,138 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with llvm;
-with Interfaces.C.Strings;
-
-
-package LLVM_Target.Binding is
-
-   LLVMBigEndian    : constant := 0;
-   LLVMLittleEndian : constant := 1;
-
-   procedure LLVMInitializeAllTargets;
-
-   function LLVMInitializeNativeTarget return  Interfaces.C.int;
-
-   function LLVMCreateTargetData
-     (StringRep : in Interfaces.C.Strings.chars_ptr)
-      return      LLVM_Target.LLVMTargetDataRef;
-
-   procedure LLVMAddTargetData
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMPassManagerRef);
-
-   function LLVMCopyStringRepOfTargetData
-     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
-      return  Interfaces.C.Strings.chars_ptr;
-
-   function LLVMByteOrder
-     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
-      return  LLVM_Target.LLVMByteOrdering;
-
-   function LLVMPointerSize
-     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
-      return  Interfaces.C.unsigned;
-
-   function LLVMIntPtrType
-     (arg_1 : in LLVM_Target.LLVMTargetDataRef)
-      return  llvm.LLVMTypeRef;
-
-   function LLVMSizeOfTypeInBits
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMTypeRef)
-      return    Interfaces.C.Extensions.unsigned_long_long;
-
-   function LLVMStoreSizeOfType
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMTypeRef)
-      return    Interfaces.C.Extensions.unsigned_long_long;
-
-   function LLVMABISizeOfType
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMTypeRef)
-      return    Interfaces.C.Extensions.unsigned_long_long;
-
-   function LLVMABIAlignmentOfType
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMTypeRef)
-      return    Interfaces.C.unsigned;
-
-   function LLVMCallFrameAlignmentOfType
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMTypeRef)
-      return    Interfaces.C.unsigned;
-
-   function LLVMPreferredAlignmentOfType
-     (arg_2_1 : in LLVM_Target.LLVMTargetDataRef;
-      arg_2_2 : in llvm.LLVMTypeRef)
-      return    Interfaces.C.unsigned;
-
-   function LLVMPreferredAlignmentOfGlobal
-     (arg_1     : in LLVM_Target.LLVMTargetDataRef;
-      GlobalVar : in llvm.LLVMValueRef)
-      return      Interfaces.C.unsigned;
-
-   function LLVMElementAtOffset
-     (arg_1    : in LLVM_Target.LLVMTargetDataRef;
-      StructTy : in llvm.LLVMTypeRef;
-      Offset   : in Interfaces.C.Extensions.unsigned_long_long)
-      return     Interfaces.C.unsigned;
-
-   function LLVMOffsetOfElement
-     (arg_1    : in LLVM_Target.LLVMTargetDataRef;
-      StructTy : in llvm.LLVMTypeRef;
-      Element  : in Interfaces.C.unsigned)
-      return     Interfaces.C.Extensions.unsigned_long_long;
-
-   procedure LLVMInvalidateStructLayout
-     (arg_1    : in LLVM_Target.LLVMTargetDataRef;
-      StructTy : in llvm.LLVMTypeRef);
-
-   procedure LLVMDisposeTargetData
-     (arg_1 : in LLVM_Target.LLVMTargetDataRef);
-
-private
-
-   pragma Import
-     (C,
-      LLVMInitializeAllTargets,
-      "Ada_LLVMInitializeAllTargets");
-   pragma Import
-     (C,
-      LLVMInitializeNativeTarget,
-      "Ada_LLVMInitializeNativeTarget");
-   pragma Import (C, LLVMCreateTargetData, "Ada_LLVMCreateTargetData");
-   pragma Import (C, LLVMAddTargetData, "Ada_LLVMAddTargetData");
-   pragma Import
-     (C,
-      LLVMCopyStringRepOfTargetData,
-      "Ada_LLVMCopyStringRepOfTargetData");
-   pragma Import (C, LLVMByteOrder, "Ada_LLVMByteOrder");
-   pragma Import (C, LLVMPointerSize, "Ada_LLVMPointerSize");
-   pragma Import (C, LLVMIntPtrType, "Ada_LLVMIntPtrType");
-   pragma Import (C, LLVMSizeOfTypeInBits, "Ada_LLVMSizeOfTypeInBits");
-   pragma Import (C, LLVMStoreSizeOfType, "Ada_LLVMStoreSizeOfType");
-   pragma Import (C, LLVMABISizeOfType, "Ada_LLVMABISizeOfType");
-   pragma Import (C, LLVMABIAlignmentOfType, "Ada_LLVMABIAlignmentOfType");
-   pragma Import
-     (C,
-      LLVMCallFrameAlignmentOfType,
-      "Ada_LLVMCallFrameAlignmentOfType");
-   pragma Import
-     (C,
-      LLVMPreferredAlignmentOfType,
-      "Ada_LLVMPreferredAlignmentOfType");
-   pragma Import
-     (C,
-      LLVMPreferredAlignmentOfGlobal,
-      "Ada_LLVMPreferredAlignmentOfGlobal");
-   pragma Import (C, LLVMElementAtOffset, "Ada_LLVMElementAtOffset");
-   pragma Import (C, LLVMOffsetOfElement, "Ada_LLVMOffsetOfElement");
-   pragma Import
-     (C,
-      LLVMInvalidateStructLayout,
-      "Ada_LLVMInvalidateStructLayout");
-   pragma Import (C, LLVMDisposeTargetData, "Ada_LLVMDisposeTargetData");
-
-end LLVM_Target.Binding;
diff --git a/bindings/ada/target/llvm_target.ads b/bindings/ada/target/llvm_target.ads
deleted file mode 100644
index 11cb05d55b35..000000000000
--- a/bindings/ada/target/llvm_target.ads
+++ /dev/null
@@ -1,72 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with Interfaces.C.Extensions;
-
-
-package LLVM_Target is
-
-   -- LLVMOpaqueTargetData
-   --
-   type LLVMOpaqueTargetData is new
-     Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMOpaqueTargetData_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Target.LLVMOpaqueTargetData;
-
-   type LLVMOpaqueTargetData_view is access all
-     LLVM_Target.LLVMOpaqueTargetData;
-
-   -- LLVMTargetDataRef
-   --
-   type LLVMTargetDataRef is access all LLVM_Target.LLVMOpaqueTargetData;
-
-   type LLVMTargetDataRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Target.LLVMTargetDataRef;
-
-   type LLVMTargetDataRef_view is access all LLVM_Target.LLVMTargetDataRef;
-
-   -- LLVMStructLayout
-   --
-   type LLVMStructLayout is new Interfaces.C.Extensions.opaque_structure_def;
-
-   type LLVMStructLayout_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Target.LLVMStructLayout;
-
-   type LLVMStructLayout_view is access all LLVM_Target.LLVMStructLayout;
-
-   -- LLVMStructLayoutRef
-   --
-   type LLVMStructLayoutRef is access all LLVM_Target.LLVMStructLayout;
-
-   type LLVMStructLayoutRef_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Target.LLVMStructLayoutRef;
-
-   type LLVMStructLayoutRef_view is access all LLVM_Target.LLVMStructLayoutRef;
-
-   -- TargetData
-   --
-   type TargetData is new Interfaces.C.Extensions.incomplete_class_def;
-
-   type TargetData_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Target.TargetData;
-
-   type TargetData_view is access all LLVM_Target.TargetData;
-
-   -- LLVMByteOrdering
-   --
-   type LLVMByteOrdering is new Interfaces.C.int;
-
-   type LLVMByteOrdering_array is
-     array (Interfaces.C.size_t range <>)
-            of aliased LLVM_Target.LLVMByteOrdering;
-
-   type LLVMByteOrdering_view is access all LLVM_Target.LLVMByteOrdering;
-
-
-end LLVM_Target;
diff --git a/bindings/ada/target/llvm_target_wrap.cxx b/bindings/ada/target/llvm_target_wrap.cxx
deleted file mode 100644
index 16aca8a4379a..000000000000
--- a/bindings/ada/target/llvm_target_wrap.cxx
+++ /dev/null
@@ -1,720 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Target (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Target(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-#include "llvm-c/Target.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport void SWIGSTDCALL Ada_LLVMInitializeAllTargets (
-  )
-{
-  LLVMInitializeAllTargets();
-  
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMInitializeNativeTarget (
-  )
-{
-  int jresult ;
-  int result;
-  
-  result = (int)LLVMInitializeNativeTarget();
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMCreateTargetData (
-  char * jarg1
-  )
-{
-  void * jresult ;
-  char *arg1 = (char *) 0 ;
-  LLVMTargetDataRef result;
-  
-  arg1 = jarg1; 
-  
-  result = (LLVMTargetDataRef)LLVMCreateTargetData((char const *)arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddTargetData (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMPassManagerRef arg2 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMPassManagerRef)jarg2; 
-  
-  LLVMAddTargetData(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport char * SWIGSTDCALL Ada_LLVMCopyStringRepOfTargetData (
-  void * jarg1
-  )
-{
-  char * jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  char *result = 0 ;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  result = (char *)LLVMCopyStringRepOfTargetData(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport int SWIGSTDCALL Ada_LLVMByteOrder (
-  void * jarg1
-  )
-{
-  int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMByteOrdering result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  result = (LLVMByteOrdering)LLVMByteOrder(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMPointerSize (
-  void * jarg1
-  )
-{
-  unsigned int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  result = (unsigned int)LLVMPointerSize(arg1);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void * SWIGSTDCALL Ada_LLVMIntPtrType (
-  void * jarg1
-  )
-{
-  void * jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  result = (LLVMTypeRef)LLVMIntPtrType(arg1);
-  jresult = (void *) result;      
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned long long SWIGSTDCALL Ada_LLVMSizeOfTypeInBits (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned long long jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned long long result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (unsigned long long)LLVMSizeOfTypeInBits(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned long long SWIGSTDCALL Ada_LLVMStoreSizeOfType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned long long jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned long long result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (unsigned long long)LLVMStoreSizeOfType(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned long long SWIGSTDCALL Ada_LLVMABISizeOfType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned long long jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned long long result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (unsigned long long)LLVMABISizeOfType(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMABIAlignmentOfType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (unsigned int)LLVMABIAlignmentOfType(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMCallFrameAlignmentOfType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (unsigned int)LLVMCallFrameAlignmentOfType(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMPreferredAlignmentOfType (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  result = (unsigned int)LLVMPreferredAlignmentOfType(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMPreferredAlignmentOfGlobal (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  unsigned int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMValueRef arg2 = (LLVMValueRef) 0 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMValueRef)jarg2; 
-  
-  result = (unsigned int)LLVMPreferredAlignmentOfGlobal(arg1,arg2);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned int SWIGSTDCALL Ada_LLVMElementAtOffset (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned long long jarg3
-  )
-{
-  unsigned int jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned long long arg3 ;
-  unsigned int result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  
-  arg3 = (unsigned long long) jarg3; 
-  
-  
-  result = (unsigned int)LLVMElementAtOffset(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport unsigned long long SWIGSTDCALL Ada_LLVMOffsetOfElement (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  ,
-  
-  unsigned int jarg3
-  )
-{
-  unsigned long long jresult ;
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  unsigned int arg3 ;
-  unsigned long long result;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  
-  arg3 = (unsigned int) jarg3; 
-  
-  
-  result = (unsigned long long)LLVMOffsetOfElement(arg1,arg2,arg3);
-  jresult = result; 
-  
-  
-  
-  return jresult;
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMInvalidateStructLayout (
-  void * jarg1
-  ,
-  
-  void * jarg2
-  )
-{
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  LLVMTypeRef arg2 = (LLVMTypeRef) 0 ;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  arg2 = (LLVMTypeRef)jarg2; 
-  
-  LLVMInvalidateStructLayout(arg1,arg2);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMDisposeTargetData (
-  void * jarg1
-  )
-{
-  LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ;
-  
-  arg1 = (LLVMTargetDataRef)jarg1; 
-  
-  LLVMDisposeTargetData(arg1);
-  
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ada/transforms/llvm_transforms-binding.ads b/bindings/ada/transforms/llvm_transforms-binding.ads
deleted file mode 100644
index 2254b6eec2c3..000000000000
--- a/bindings/ada/transforms/llvm_transforms-binding.ads
+++ /dev/null
@@ -1,206 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-with llvm;
-
-
-package LLVM_Transforms.Binding is
-
-   procedure LLVMAddArgumentPromotionPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddConstantMergePass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddDeadArgEliminationPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddDeadTypeEliminationPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddFunctionAttrsPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddFunctionInliningPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddGlobalDCEPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddGlobalOptimizerPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddIPConstantPropagationPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLowerSetJmpPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddPruneEHPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddRaiseAllocationsPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddStripDeadPrototypesPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddStripSymbolsPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddAggressiveDCEPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddCFGSimplificationPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddCondPropagationPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddDeadStoreEliminationPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddGVNPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddIndVarSimplifyPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddInstructionCombiningPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddJumpThreadingPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLICMPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLoopDeletionPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLoopIndexSplitPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLoopRotatePass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLoopUnrollPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddLoopUnswitchPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddMemCpyOptPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddPromoteMemoryToRegisterPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddReassociatePass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddSCCPPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddScalarReplAggregatesPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddSimplifyLibCallsPass (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddTailCallEliminationPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddConstantPropagationPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-   procedure LLVMAddDemoteMemoryToRegisterPass
-     (PM : in llvm.LLVMPassManagerRef);
-
-private
-
-   pragma Import
-     (C,
-      LLVMAddArgumentPromotionPass,
-      "Ada_LLVMAddArgumentPromotionPass");
-   pragma Import
-     (C,
-      LLVMAddConstantMergePass,
-      "Ada_LLVMAddConstantMergePass");
-   pragma Import
-     (C,
-      LLVMAddDeadArgEliminationPass,
-      "Ada_LLVMAddDeadArgEliminationPass");
-   pragma Import
-     (C,
-      LLVMAddDeadTypeEliminationPass,
-      "Ada_LLVMAddDeadTypeEliminationPass");
-   pragma Import
-     (C,
-      LLVMAddFunctionAttrsPass,
-      "Ada_LLVMAddFunctionAttrsPass");
-   pragma Import
-     (C,
-      LLVMAddFunctionInliningPass,
-      "Ada_LLVMAddFunctionInliningPass");
-   pragma Import (C, LLVMAddGlobalDCEPass, "Ada_LLVMAddGlobalDCEPass");
-   pragma Import
-     (C,
-      LLVMAddGlobalOptimizerPass,
-      "Ada_LLVMAddGlobalOptimizerPass");
-   pragma Import
-     (C,
-      LLVMAddIPConstantPropagationPass,
-      "Ada_LLVMAddIPConstantPropagationPass");
-   pragma Import (C, LLVMAddLowerSetJmpPass, "Ada_LLVMAddLowerSetJmpPass");
-   pragma Import (C, LLVMAddPruneEHPass, "Ada_LLVMAddPruneEHPass");
-   pragma Import
-     (C,
-      LLVMAddRaiseAllocationsPass,
-      "Ada_LLVMAddRaiseAllocationsPass");
-   pragma Import
-     (C,
-      LLVMAddStripDeadPrototypesPass,
-      "Ada_LLVMAddStripDeadPrototypesPass");
-   pragma Import (C, LLVMAddStripSymbolsPass, "Ada_LLVMAddStripSymbolsPass");
-   pragma Import
-     (C,
-      LLVMAddAggressiveDCEPass,
-      "Ada_LLVMAddAggressiveDCEPass");
-   pragma Import
-     (C,
-      LLVMAddCFGSimplificationPass,
-      "Ada_LLVMAddCFGSimplificationPass");
-   pragma Import
-     (C,
-      LLVMAddCondPropagationPass,
-      "Ada_LLVMAddCondPropagationPass");
-   pragma Import
-     (C,
-      LLVMAddDeadStoreEliminationPass,
-      "Ada_LLVMAddDeadStoreEliminationPass");
-   pragma Import (C, LLVMAddGVNPass, "Ada_LLVMAddGVNPass");
-   pragma Import
-     (C,
-      LLVMAddIndVarSimplifyPass,
-      "Ada_LLVMAddIndVarSimplifyPass");
-   pragma Import
-     (C,
-      LLVMAddInstructionCombiningPass,
-      "Ada_LLVMAddInstructionCombiningPass");
-   pragma Import
-     (C,
-      LLVMAddJumpThreadingPass,
-      "Ada_LLVMAddJumpThreadingPass");
-   pragma Import (C, LLVMAddLICMPass, "Ada_LLVMAddLICMPass");
-   pragma Import (C, LLVMAddLoopDeletionPass, "Ada_LLVMAddLoopDeletionPass");
-   pragma Import
-     (C,
-      LLVMAddLoopIndexSplitPass,
-      "Ada_LLVMAddLoopIndexSplitPass");
-   pragma Import (C, LLVMAddLoopRotatePass, "Ada_LLVMAddLoopRotatePass");
-   pragma Import (C, LLVMAddLoopUnrollPass, "Ada_LLVMAddLoopUnrollPass");
-   pragma Import (C, LLVMAddLoopUnswitchPass, "Ada_LLVMAddLoopUnswitchPass");
-   pragma Import (C, LLVMAddMemCpyOptPass, "Ada_LLVMAddMemCpyOptPass");
-   pragma Import
-     (C,
-      LLVMAddPromoteMemoryToRegisterPass,
-      "Ada_LLVMAddPromoteMemoryToRegisterPass");
-   pragma Import (C, LLVMAddReassociatePass, "Ada_LLVMAddReassociatePass");
-   pragma Import (C, LLVMAddSCCPPass, "Ada_LLVMAddSCCPPass");
-   pragma Import
-     (C,
-      LLVMAddScalarReplAggregatesPass,
-      "Ada_LLVMAddScalarReplAggregatesPass");
-   pragma Import
-     (C,
-      LLVMAddSimplifyLibCallsPass,
-      "Ada_LLVMAddSimplifyLibCallsPass");
-   pragma Import
-     (C,
-      LLVMAddTailCallEliminationPass,
-      "Ada_LLVMAddTailCallEliminationPass");
-   pragma Import
-     (C,
-      LLVMAddConstantPropagationPass,
-      "Ada_LLVMAddConstantPropagationPass");
-   pragma Import
-     (C,
-      LLVMAddDemoteMemoryToRegisterPass,
-      "Ada_LLVMAddDemoteMemoryToRegisterPass");
-
-end LLVM_Transforms.Binding;
diff --git a/bindings/ada/transforms/llvm_transforms.ads b/bindings/ada/transforms/llvm_transforms.ads
deleted file mode 100644
index 4f37aafe805c..000000000000
--- a/bindings/ada/transforms/llvm_transforms.ads
+++ /dev/null
@@ -1,6 +0,0 @@
--- This file is generated by SWIG. Do *not* modify by hand.
---
-
-package LLVM_Transforms is
-
-end LLVM_Transforms;
diff --git a/bindings/ada/transforms/llvm_transforms_wrap.cxx b/bindings/ada/transforms/llvm_transforms_wrap.cxx
deleted file mode 100644
index 8cb04db791aa..000000000000
--- a/bindings/ada/transforms/llvm_transforms_wrap.cxx
+++ /dev/null
@@ -1,828 +0,0 @@
-/* ----------------------------------------------------------------------------
- * This file was automatically generated by SWIG (http://www.swig.org).
- * Version 1.3.36
- * 
- * This file is not intended to be easily readable and contains a number of 
- * coding conventions designed to improve portability and efficiency. Do not make
- * changes to this file unless you know what you are doing--modify the SWIG 
- * interface file instead. 
- * ----------------------------------------------------------------------------- */
-
-
-#ifdef __cplusplus
-template<typename T> class SwigValueWrapper {
-    T *tt;
-public:
-    SwigValueWrapper() : tt(0) { }
-    SwigValueWrapper(const SwigValueWrapper<T>& rhs) : tt(new T(*rhs.tt)) { }
-    SwigValueWrapper(const T& t) : tt(new T(t)) { }
-    ~SwigValueWrapper() { delete tt; } 
-    SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; }
-    operator T&() const { return *tt; }
-    T *operator&() { return tt; }
-private:
-    SwigValueWrapper& operator=(const SwigValueWrapper<T>& rhs);
-};
-
-template <typename T> T SwigValueInit() {
-  return T();
-}
-#endif
-
-/* -----------------------------------------------------------------------------
- *  This section contains generic SWIG labels for method/variable
- *  declarations/attributes, and other compiler dependent labels.
- * ----------------------------------------------------------------------------- */
-
-/* template workaround for compilers that cannot correctly implement the C++ standard */
-#ifndef SWIGTEMPLATEDISAMBIGUATOR
-# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560)
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# elif defined(__HP_aCC)
-/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */
-/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */
-#  define SWIGTEMPLATEDISAMBIGUATOR template
-# else
-#  define SWIGTEMPLATEDISAMBIGUATOR
-# endif
-#endif
-
-/* inline attribute */
-#ifndef SWIGINLINE
-# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__))
-#   define SWIGINLINE inline
-# else
-#   define SWIGINLINE
-# endif
-#endif
-
-/* attribute recognised by some compilers to avoid 'unused' warnings */
-#ifndef SWIGUNUSED
-# if defined(__GNUC__)
-#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#     define SWIGUNUSED __attribute__ ((__unused__)) 
-#   else
-#     define SWIGUNUSED
-#   endif
-# elif defined(__ICC)
-#   define SWIGUNUSED __attribute__ ((__unused__)) 
-# else
-#   define SWIGUNUSED 
-# endif
-#endif
-
-#ifndef SWIGUNUSEDPARM
-# ifdef __cplusplus
-#   define SWIGUNUSEDPARM(p)
-# else
-#   define SWIGUNUSEDPARM(p) p SWIGUNUSED 
-# endif
-#endif
-
-/* internal SWIG method */
-#ifndef SWIGINTERN
-# define SWIGINTERN static SWIGUNUSED
-#endif
-
-/* internal inline SWIG method */
-#ifndef SWIGINTERNINLINE
-# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE
-#endif
-
-/* exporting methods */
-#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#  ifndef GCC_HASCLASSVISIBILITY
-#    define GCC_HASCLASSVISIBILITY
-#  endif
-#endif
-
-#ifndef SWIGEXPORT
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   if defined(STATIC_LINKED)
-#     define SWIGEXPORT
-#   else
-#     define SWIGEXPORT __declspec(dllexport)
-#   endif
-# else
-#   if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY)
-#     define SWIGEXPORT __attribute__ ((visibility("default")))
-#   else
-#     define SWIGEXPORT
-#   endif
-# endif
-#endif
-
-/* calling conventions for Windows */
-#ifndef SWIGSTDCALL
-# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__)
-#   define SWIGSTDCALL __stdcall
-# else
-#   define SWIGSTDCALL
-# endif 
-#endif
-
-/* Deal with Microsoft's attempt at deprecating C standard runtime functions */
-#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE)
-# define _CRT_SECURE_NO_DEPRECATE
-#endif
-
-/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */
-#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE)
-# define _SCL_SECURE_NO_DEPRECATE
-#endif
-
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#if defined(_WIN32) || defined(__CYGWIN32__)
-#  define DllExport   __declspec( dllexport )
-#  define SWIGSTDCALL __stdcall
-#else
-#  define DllExport  
-#  define SWIGSTDCALL
-#endif 
-
-
-#ifdef __cplusplus
-#  include <new>
-#endif
-
-
-
-
-/* Support for throwing Ada exceptions from C/C++ */
-
-typedef enum 
-{
-  SWIG_AdaException,
-  SWIG_AdaOutOfMemoryException,
-  SWIG_AdaIndexOutOfRangeException,
-  SWIG_AdaDivideByZeroException,
-  SWIG_AdaArgumentOutOfRangeException,
-  SWIG_AdaNullReferenceException
-} SWIG_AdaExceptionCodes;
-
-
-typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *);
-
-
-typedef struct 
-{
-  SWIG_AdaExceptionCodes code;
-  SWIG_AdaExceptionCallback_t callback;
-} 
-  SWIG_AdaExceptions_t;
-
-
-static 
-SWIG_AdaExceptions_t 
-SWIG_ada_exceptions[] = 
-{
-  { SWIG_AdaException, NULL },
-  { SWIG_AdaOutOfMemoryException, NULL },
-  { SWIG_AdaIndexOutOfRangeException, NULL },
-  { SWIG_AdaDivideByZeroException, NULL },
-  { SWIG_AdaArgumentOutOfRangeException, NULL },
-  { SWIG_AdaNullReferenceException, NULL } 
-};
-
-
-static 
-void 
-SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) 
-{
-  SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback;
-  if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) {
-    callback = SWIG_ada_exceptions[code].callback;
-  }
-  callback(msg);
-}
-
-
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-
-DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Transforms (SWIG_AdaExceptionCallback_t systemException,
-                                                                   SWIG_AdaExceptionCallback_t outOfMemory, 
-                                                                   SWIG_AdaExceptionCallback_t indexOutOfRange, 
-                                                                   SWIG_AdaExceptionCallback_t divideByZero, 
-                                                                   SWIG_AdaExceptionCallback_t argumentOutOfRange,
-                                                                   SWIG_AdaExceptionCallback_t nullReference) 
-{
-  SWIG_ada_exceptions [SWIG_AdaException].callback                   = systemException;
-  SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback        = outOfMemory;
-  SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback    = indexOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback       = divideByZero;
-  SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange;
-  SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback      = nullReference;
-}
-
-
-/* Callback for returning strings to Ada without leaking memory */
-
-typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *);
-static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL;
-
-
-
-/* probably obsolete ...
-#ifdef __cplusplus
-extern "C" 
-#endif
-DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Transforms(SWIG_AdaStringHelperCallback callback) {
-  SWIG_ada_string_callback = callback;
-}
-*/
-
-
-
-/* Contract support */
-
-#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else
-
-
-#define protected public
-#define private   public
-
-#include "llvm-c/Transforms/IPO.h"
-#include "llvm-c/Transforms/Scalar.h"
-
-
-
-//  struct LLVMCtxt;
-
-
-#undef protected
-#undef private
-#ifdef __cplusplus 
-extern "C" {
-#endif
-DllExport void SWIGSTDCALL Ada_LLVMAddArgumentPromotionPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddArgumentPromotionPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddConstantMergePass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddConstantMergePass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddDeadArgEliminationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddDeadArgEliminationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddDeadTypeEliminationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddDeadTypeEliminationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddFunctionAttrsPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddFunctionAttrsPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddFunctionInliningPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddFunctionInliningPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddGlobalDCEPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddGlobalDCEPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddGlobalOptimizerPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddGlobalOptimizerPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddIPConstantPropagationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddIPConstantPropagationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLowerSetJmpPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLowerSetJmpPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddPruneEHPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddPruneEHPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddRaiseAllocationsPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddRaiseAllocationsPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddStripDeadPrototypesPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddStripDeadPrototypesPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddStripSymbolsPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddStripSymbolsPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddAggressiveDCEPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddAggressiveDCEPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddCFGSimplificationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddCFGSimplificationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddCondPropagationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddCondPropagationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddDeadStoreEliminationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddDeadStoreEliminationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddGVNPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddGVNPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddIndVarSimplifyPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddIndVarSimplifyPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddInstructionCombiningPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddInstructionCombiningPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddJumpThreadingPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddJumpThreadingPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLICMPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLICMPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLoopDeletionPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLoopDeletionPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLoopIndexSplitPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLoopIndexSplitPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLoopRotatePass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLoopRotatePass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLoopUnrollPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLoopUnrollPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddLoopUnswitchPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddLoopUnswitchPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddMemCpyOptPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddMemCpyOptPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddPromoteMemoryToRegisterPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddPromoteMemoryToRegisterPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddReassociatePass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddReassociatePass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddSCCPPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddSCCPPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddScalarReplAggregatesPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddScalarReplAggregatesPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddSimplifyLibCallsPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddSimplifyLibCallsPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddTailCallEliminationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddTailCallEliminationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddConstantPropagationPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddConstantPropagationPass(arg1);
-  
-  
-}
-
-
-
-DllExport void SWIGSTDCALL Ada_LLVMAddDemoteMemoryToRegisterPass (
-  void * jarg1
-  )
-{
-  LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ;
-  
-  arg1 = (LLVMPassManagerRef)jarg1; 
-  
-  LLVMAddDemoteMemoryToRegisterPass(arg1);
-  
-  
-}
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/bindings/ocaml/Makefile.ocaml b/bindings/ocaml/Makefile.ocaml
index 1cff422c28d4..40ecc9c08e09 100644
--- a/bindings/ocaml/Makefile.ocaml
+++ b/bindings/ocaml/Makefile.ocaml
@@ -73,8 +73,13 @@ Archive.EXE := $(strip $(OCAMLC) -cc $(CXX) $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG:%=%.
 endif
 
 # Source files
+ifndef OcamlSources1
 OcamlSources1 := $(sort $(wildcard $(PROJ_SRC_DIR)/*.ml))
+endif
+
+ifndef OcamlHeaders1
 OcamlHeaders1 := $(sort $(wildcard $(PROJ_SRC_DIR)/*.mli))
+endif
 
 OcamlSources2 := $(filter-out $(ExcludeSources),$(OcamlSources1))
 OcamlHeaders2 := $(filter-out $(ExcludeHeaders),$(OcamlHeaders1))
@@ -352,11 +357,11 @@ $(OutputEXE): $(ToolEXE) $(OcamlDir)/.dir
 ifndef OCAMLOPT
 $(ToolEXE): $(ObjectsCMO) $(OcamlDir)/.dir
 	$(Echo) "Archiving $(notdir $@) for $(BuildMode) build"
-	$(Verb) $(Archive.EXE) $@ $<
+	$(Verb) $(Archive.EXE) $@ $(ObjectsCMO)
 else
 $(ToolEXE): $(ObjectsCMX) $(OcamlDir)/.dir
 	$(Echo) "Archiving $(notdir $@) for $(BuildMode) build"
-	$(Verb) $(Archive.EXE) $@ $<
+	$(Verb) $(Archive.EXE) $@ $(ObjectsCMX)
 endif
 endif
 
diff --git a/bindings/ocaml/bitreader/llvm_bitreader.mli b/bindings/ocaml/bitreader/llvm_bitreader.mli
index 5e2240974af4..1d333191c1d3 100644
--- a/bindings/ocaml/bitreader/llvm_bitreader.mli
+++ b/bindings/ocaml/bitreader/llvm_bitreader.mli
@@ -18,12 +18,12 @@ exception Error of string
     memory buffer [mb] in the context [context].  Returns [m] if successful, or
     raises [Error msg] otherwise, where [msg] is a description of the error
     encountered. See the function [llvm::getBitcodeModule]. *)
-external get_module : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
-                    = "llvm_get_module"
+val get_module : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
+
 
 (** [parse_bitcode context mb] parses the bitcode for a new module [m] from the
     memory buffer [mb] in the context [context]. Returns [m] if successful, or
 	 	raises [Error msg] otherwise, where [msg] is a description of the error
 	 	encountered. See the function [llvm::ParseBitcodeFile]. *)
-external parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
-                       = "llvm_parse_bitcode"
+val parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
+
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.mli b/bindings/ocaml/executionengine/llvm_executionengine.mli
index ce25f9d0ae09..166b7bcddca6 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.mli
+++ b/bindings/ocaml/executionengine/llvm_executionengine.mli
@@ -25,58 +25,58 @@ module GenericValue: sig
   (** [of_float fpty n] boxes the float [n] in a float-valued generic value
       according to the floating point type [fpty]. See the fields
       [llvm::GenericValue::DoubleVal] and [llvm::GenericValue::FloatVal]. *)
-  external of_float : Llvm.lltype -> float -> t = "llvm_genericvalue_of_float"
+  val of_float : Llvm.lltype -> float -> t
   
   (** [of_pointer v] boxes the pointer value [v] in a generic value. See the
       field [llvm::GenericValue::PointerVal]. *)
-  external of_pointer : 'a -> t = "llvm_genericvalue_of_pointer"
+  val of_pointer : 'a -> t
   
   (** [of_int32 n w] boxes the int32 [i] in a generic value with the bitwidth
       [w]. See the field [llvm::GenericValue::IntVal]. *)
-  external of_int32 : Llvm.lltype -> int32 -> t = "llvm_genericvalue_of_int32"
+  val of_int32 : Llvm.lltype -> int32 -> t
   
   (** [of_int n w] boxes the int [i] in a generic value with the bitwidth
       [w]. See the field [llvm::GenericValue::IntVal]. *)
-  external of_int : Llvm.lltype -> int -> t = "llvm_genericvalue_of_int"
+  val of_int : Llvm.lltype -> int -> t
   
   (** [of_natint n w] boxes the native int [i] in a generic value with the
       bitwidth [w]. See the field [llvm::GenericValue::IntVal]. *)
-  external of_nativeint : Llvm.lltype -> nativeint -> t
-                        = "llvm_genericvalue_of_nativeint"
+  val of_nativeint : Llvm.lltype -> nativeint -> t
+
 
   (** [of_int64 n w] boxes the int64 [i] in a generic value with the bitwidth
       [w]. See the field [llvm::GenericValue::IntVal]. *)
-  external of_int64 : Llvm.lltype -> int64 -> t = "llvm_genericvalue_of_int64"
+  val of_int64 : Llvm.lltype -> int64 -> t
 
   (** [as_float fpty gv] unboxes the floating point-valued generic value [gv] of
       floating point type [fpty]. See the fields [llvm::GenericValue::DoubleVal]
       and [llvm::GenericValue::FloatVal]. *)
-  external as_float : Llvm.lltype -> t -> float = "llvm_genericvalue_as_float"
+  val as_float : Llvm.lltype -> t -> float
   
   (** [as_pointer gv] unboxes the pointer-valued generic value [gv]. See the
       field [llvm::GenericValue::PointerVal]. *)
-  external as_pointer : t -> 'a = "llvm_genericvalue_as_pointer"
+  val as_pointer : t -> 'a
   
   (** [as_int32 gv] unboxes the integer-valued generic value [gv] as an [int32].
       Is invalid if [gv] has a bitwidth greater than 32 bits. See the field
       [llvm::GenericValue::IntVal]. *)
-  external as_int32 : t -> int32 = "llvm_genericvalue_as_int32"
+  val as_int32 : t -> int32
   
   (** [as_int gv] unboxes the integer-valued generic value [gv] as an [int].
       Is invalid if [gv] has a bitwidth greater than the host bit width (but the
       most significant bit may be lost). See the field
       [llvm::GenericValue::IntVal]. *)
-  external as_int : t -> int = "llvm_genericvalue_as_int"
+  val as_int : t -> int
   
   (** [as_natint gv] unboxes the integer-valued generic value [gv] as a
       [nativeint]. Is invalid if [gv] has a bitwidth greater than
       [nativeint]. See the field [llvm::GenericValue::IntVal]. *)
-  external as_nativeint : t -> nativeint = "llvm_genericvalue_as_nativeint"
+  val as_nativeint : t -> nativeint
   
   (** [as_int64 gv] returns the integer-valued generic value [gv] as an [int64].
       Is invalid if [gv] has a bitwidth greater than [int64]. See the field
       [llvm::GenericValue::IntVal]. *)
-  external as_int64 : t -> int64 = "llvm_genericvalue_as_int64"
+  val as_int64 : t -> int64
 end
 
 
@@ -91,73 +91,73 @@ module ExecutionEngine: sig
       interpreter. Raises [Error msg] if an error occurrs. The execution engine
       is not garbage collected and must be destroyed with [dispose ee].
       See the function [llvm::EngineBuilder::create]. *)
-  external create : Llvm.llmodule -> t = "llvm_ee_create"
+  val create : Llvm.llmodule -> t
   
   (** [create_interpreter m] creates a new interpreter, taking ownership of the
       module [m] if successful. Raises [Error msg] if an error occurrs. The
       execution engine is not garbage collected and must be destroyed with
       [dispose ee].
       See the function [llvm::EngineBuilder::create]. *)
-  external create_interpreter : Llvm.llmodule -> t = "llvm_ee_create_interpreter"
+  val create_interpreter : Llvm.llmodule -> t
   
   (** [create_jit m optlevel] creates a new JIT (just-in-time compiler), taking
       ownership of the module [m] if successful with the desired optimization
       level [optlevel]. Raises [Error msg] if an error occurrs. The execution
       engine is not garbage collected and must be destroyed with [dispose ee].
       See the function [llvm::EngineBuilder::create]. *)
-  external create_jit : Llvm.llmodule -> int -> t = "llvm_ee_create_jit"
+  val create_jit : Llvm.llmodule -> int -> t
 
   (** [dispose ee] releases the memory used by the execution engine and must be
       invoked to avoid memory leaks. *)
-  external dispose : t -> unit = "llvm_ee_dispose"
+  val dispose : t -> unit
   
   (** [add_module m ee] adds the module [m] to the execution engine [ee]. *)
-  external add_module : Llvm.llmodule -> t -> unit = "llvm_ee_add_module"
+  val add_module : Llvm.llmodule -> t -> unit
   
   (** [remove_module m ee] removes the module [m] from the execution engine
       [ee], disposing of [m] and the module referenced by [mp]. Raises
       [Error msg] if an error occurs. *)
-  external remove_module : Llvm.llmodule -> t -> Llvm.llmodule
-                         = "llvm_ee_remove_module"
+  val remove_module : Llvm.llmodule -> t -> Llvm.llmodule
+
   
   (** [find_function n ee] finds the function named [n] defined in any of the
       modules owned by the execution engine [ee]. Returns [None] if the function
       is not found and [Some f] otherwise. *)
-  external find_function : string -> t -> Llvm.llvalue option
-                         = "llvm_ee_find_function"
+  val find_function : string -> t -> Llvm.llvalue option
+
   
   (** [run_function f args ee] synchronously executes the function [f] with the
       arguments [args], which must be compatible with the parameter types. *)
-  external run_function : Llvm.llvalue -> GenericValue.t array -> t ->
+  val run_function : Llvm.llvalue -> GenericValue.t array -> t ->
                      GenericValue.t
-                   = "llvm_ee_run_function"
+
   
   (** [run_static_ctors ee] executes the static constructors of each module in
       the execution engine [ee]. *)
-  external run_static_ctors : t -> unit = "llvm_ee_run_static_ctors"
+  val run_static_ctors : t -> unit
   
   (** [run_static_dtors ee] executes the static destructors of each module in
       the execution engine [ee]. *)
-  external run_static_dtors : t -> unit = "llvm_ee_run_static_dtors"
+  val run_static_dtors : t -> unit
   
   (** [run_function_as_main f args env ee] executes the function [f] as a main
       function, passing it [argv] and [argc] according to the string array
       [args], and [envp] as specified by the array [env]. Returns the integer
       return value of the function. *)
-  external run_function_as_main : Llvm.llvalue -> string array ->
+  val run_function_as_main : Llvm.llvalue -> string array ->
                                   (string * string) array -> t -> int
-                                = "llvm_ee_run_function_as_main"
+
   
   (** [free_machine_code f ee] releases the memory in the execution engine [ee]
       used to store the machine code for the function [f]. *)
-  external free_machine_code : Llvm.llvalue -> t -> unit
-                             = "llvm_ee_free_machine_code"
+  val free_machine_code : Llvm.llvalue -> t -> unit
+
 
   (** [target_data ee] is the target data owned by the execution engine
       [ee]. *)
-  external target_data : t -> Llvm_target.TargetData.t
-                       = "LLVMGetExecutionEngineTargetData"
+  val target_data : t -> Llvm_target.TargetData.t
+
 end
 
-external initialize_native_target : unit -> bool
-                                  = "llvm_initialize_native_target"
+val initialize_native_target : unit -> bool
+
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index ba3bbe248b71..9b037aae7a46 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -212,19 +212,19 @@ exception IoError of string
 
 (** [create_context ()] creates a context for storing the "global" state in
     LLVM. See the constructor [llvm::LLVMContext]. *)
-external create_context : unit -> llcontext = "llvm_create_context"
+val create_context : unit -> llcontext
 
 (** [destroy_context ()] destroys a context. See the destructor
     [llvm::LLVMContext::~LLVMContext]. *)
-external dispose_context : llcontext -> unit = "llvm_dispose_context"
+val dispose_context : llcontext -> unit
 
 (** See the function [llvm::getGlobalContext]. *)
-external global_context : unit -> llcontext = "llvm_global_context"
+val global_context : unit -> llcontext
 
 (** [mdkind_id context name] returns the MDKind ID that corresponds to the
     name [name] in the context [context].  See the function
     [llvm::LLVMContext::getMDKindID]. *)
-external mdkind_id : llcontext -> string -> int = "llvm_mdkind_id"
+val mdkind_id : llcontext -> string -> int
 
 
 (** {6 Modules} *)
@@ -233,71 +233,71 @@ external mdkind_id : llcontext -> string -> int = "llvm_mdkind_id"
     the context [context].  Modules are not garbage collected; it is mandatory
     to call {!dispose_module} to free memory. See the constructor
     [llvm::Module::Module]. *)
-external create_module : llcontext -> string -> llmodule = "llvm_create_module"
+val create_module : llcontext -> string -> llmodule
 
 (** [dispose_module m] destroys a module [m] and all of the IR objects it
     contained. All references to subordinate objects are invalidated;
     referencing them will invoke undefined behavior. See the destructor
     [llvm::Module::~Module]. *)
-external dispose_module : llmodule -> unit = "llvm_dispose_module"
+val dispose_module : llmodule -> unit
 
 (** [target_triple m] is the target specifier for the module [m], something like
     [i686-apple-darwin8]. See the method [llvm::Module::getTargetTriple]. *)
-external target_triple: llmodule -> string
-                      = "llvm_target_triple"
+val target_triple: llmodule -> string
+
 
 (** [target_triple triple m] changes the target specifier for the module [m] to
     the string [triple]. See the method [llvm::Module::setTargetTriple]. *)
-external set_target_triple: string -> llmodule -> unit
-                          = "llvm_set_target_triple"
+val set_target_triple: string -> llmodule -> unit
+
 
 (** [data_layout m] is the data layout specifier for the module [m], something
     like [e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-...-a0:0:64-f80:128:128]. See the
     method [llvm::Module::getDataLayout]. *)
-external data_layout: llmodule -> string
-                    = "llvm_data_layout"
+val data_layout: llmodule -> string
+
 
 (** [set_data_layout s m] changes the data layout specifier for the module [m]
     to the string [s]. See the method [llvm::Module::setDataLayout]. *)
-external set_data_layout: string -> llmodule -> unit
-                        = "llvm_set_data_layout"
+val set_data_layout: string -> llmodule -> unit
+
 
 (** [define_type_name name ty m] adds a named type to the module's symbol table.
     Returns [true] if successful. If such a name already exists, then no entry
     is added and [false] is returned. See the [llvm::Module::addTypeName]
     method. *)
-external define_type_name : string -> lltype -> llmodule -> bool
-                          = "llvm_add_type_name"
+val define_type_name : string -> lltype -> llmodule -> bool
+
 
 (** [delete_type_name name] removes a type name from the module's symbol
     table. *)
-external delete_type_name : string -> llmodule -> unit
-                          = "llvm_delete_type_name"
+val delete_type_name : string -> llmodule -> unit
+
 
 (** [type_by_name m n] returns the type in the module [m] named [n], or [None]
     if it does not exist. See the method [llvm::Module::getTypeByName]. *)
-external type_by_name : llmodule -> string -> lltype option
-                      = "llvm_type_by_name"
+val type_by_name : llmodule -> string -> lltype option
+
 
 (** [dump_module m] prints the .ll representation of the module [m] to standard
     error. See the method [llvm::Module::dump]. *)
-external dump_module : llmodule -> unit = "llvm_dump_module"
+val dump_module : llmodule -> unit
 
 (** [set_module_inline_asm m asm] sets the inline assembler for the module. See
     the method [llvm::Module::setModuleInlineAsm]. *)
-external set_module_inline_asm : llmodule -> string -> unit
-                               = "llvm_set_module_inline_asm"
+val set_module_inline_asm : llmodule -> string -> unit
+
 
 
 (** {6 Types} *)
 
 (** [classify_type ty] returns the {!TypeKind.t} corresponding to the type [ty].
     See the method [llvm::Type::getTypeID]. *)
-external classify_type : lltype -> TypeKind.t = "llvm_classify_type"
+val classify_type : lltype -> TypeKind.t
 
 (** [type_context ty] returns the {!llcontext} corresponding to the type [ty].
     See the method [llvm::Type::getContext]. *)
-external type_context : lltype -> llcontext = "llvm_type_context"
+val type_context : lltype -> llcontext
 
 (** [string_of_lltype ty] returns a string describing the type [ty]. *)
 val string_of_lltype : lltype -> string
@@ -306,54 +306,54 @@ val string_of_lltype : lltype -> string
 
 (** [i1_type c] returns an integer type of bitwidth 1 in the context [c]. See
     [llvm::Type::Int1Ty]. *)
-external i1_type : llcontext -> lltype = "llvm_i1_type"
+val i1_type : llcontext -> lltype
 
 (** [i8_type c] returns an integer type of bitwidth 8 in the context [c]. See
     [llvm::Type::Int8Ty]. *)
-external i8_type : llcontext -> lltype = "llvm_i8_type"
+val i8_type : llcontext -> lltype
 
 (** [i16_type c] returns an integer type of bitwidth 16 in the context [c]. See
     [llvm::Type::Int16Ty]. *)
-external i16_type : llcontext -> lltype = "llvm_i16_type"
+val i16_type : llcontext -> lltype
 
 (** [i32_type c] returns an integer type of bitwidth 32 in the context [c]. See
     [llvm::Type::Int32Ty]. *)
-external i32_type : llcontext -> lltype = "llvm_i32_type"
+val i32_type : llcontext -> lltype
 
 (** [i64_type c] returns an integer type of bitwidth 64 in the context [c]. See
     [llvm::Type::Int64Ty]. *)
-external i64_type : llcontext -> lltype = "llvm_i64_type"
+val i64_type : llcontext -> lltype
 
 (** [integer_type c n] returns an integer type of bitwidth [n] in the context
     [c]. See the method [llvm::IntegerType::get]. *)
-external integer_type : llcontext -> int -> lltype = "llvm_integer_type"
+val integer_type : llcontext -> int -> lltype
 
 (** [integer_bitwidth c ty] returns the number of bits in the integer type [ty]
     in the context [c].  See the method [llvm::IntegerType::getBitWidth]. *)
-external integer_bitwidth : lltype -> int = "llvm_integer_bitwidth"
+val integer_bitwidth : lltype -> int
 
 
 (** {7 Operations on real types} *)
 
 (** [float_type c] returns the IEEE 32-bit floating point type in the context
     [c]. See [llvm::Type::FloatTy]. *)
-external float_type : llcontext -> lltype = "llvm_float_type"
+val float_type : llcontext -> lltype
 
 (** [double_type c] returns the IEEE 64-bit floating point type in the context
     [c]. See [llvm::Type::DoubleTy]. *)
-external double_type : llcontext -> lltype = "llvm_double_type"
+val double_type : llcontext -> lltype
 
 (** [x86fp80_type c] returns the x87 80-bit floating point type in the context
     [c]. See [llvm::Type::X86_FP80Ty]. *)
-external x86fp80_type : llcontext -> lltype = "llvm_x86fp80_type"
+val x86fp80_type : llcontext -> lltype
 
 (** [fp128_type c] returns the IEEE 128-bit floating point type in the context
     [c]. See [llvm::Type::FP128Ty]. *)
-external fp128_type : llcontext -> lltype = "llvm_fp128_type"
+val fp128_type : llcontext -> lltype
 
 (** [ppc_fp128_type c] returns the PowerPC 128-bit floating point type in the
     context [c]. See [llvm::Type::PPC_FP128Ty]. *)
-external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type"
+val ppc_fp128_type : llcontext -> lltype
 
 
 (** {7 Operations on function types} *)
@@ -361,26 +361,26 @@ external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type"
 (** [function_type ret_ty param_tys] returns the function type returning
     [ret_ty] and taking [param_tys] as parameters.
     See the method [llvm::FunctionType::get]. *)
-external function_type : lltype -> lltype array -> lltype = "llvm_function_type"
+val function_type : lltype -> lltype array -> lltype
 
 (** [va_arg_function_type ret_ty param_tys] is just like
     [function_type ret_ty param_tys] except that it returns the function type
     which also takes a variable number of arguments.
     See the method [llvm::FunctionType::get]. *)
-external var_arg_function_type : lltype -> lltype array -> lltype
-                               = "llvm_var_arg_function_type"
+val var_arg_function_type : lltype -> lltype array -> lltype
+
 
 (** [is_var_arg fty] returns [true] if [fty] is a varargs function type, [false]
     otherwise. See the method [llvm::FunctionType::isVarArg]. *)
-external is_var_arg : lltype -> bool = "llvm_is_var_arg"
+val is_var_arg : lltype -> bool
 
 (** [return_type fty] gets the return type of the function type [fty].
     See the method [llvm::FunctionType::getReturnType]. *)
-external return_type : lltype -> lltype = "LLVMGetReturnType"
+val return_type : lltype -> lltype
 
 (** [param_types fty] gets the parameter types of the function type [fty].
     See the method [llvm::FunctionType::getParamType]. *)
-external param_types : lltype -> lltype array = "llvm_param_types"
+val param_types : lltype -> lltype array
 
 
 (** {7 Operations on struct types} *)
@@ -388,61 +388,61 @@ external param_types : lltype -> lltype array = "llvm_param_types"
 (** [struct_type context tys] returns the structure type in the context
     [context] containing in the types in the array [tys]. See the method
     [llvm::StructType::get]. *)
-external struct_type : llcontext -> lltype array -> lltype
-                     = "llvm_struct_type"
+val struct_type : llcontext -> lltype array -> lltype
+
 
 (** [packed_struct_type context ys] returns the packed structure type in the
     context [context] containing in the types in the array [tys]. See the method
     [llvm::StructType::get]. *)
-external packed_struct_type : llcontext -> lltype array -> lltype
-                            = "llvm_packed_struct_type"
+val packed_struct_type : llcontext -> lltype array -> lltype
+
 
 (** [struct_element_types sty] returns the constituent types of the struct type
     [sty]. See the method [llvm::StructType::getElementType]. *)
-external struct_element_types : lltype -> lltype array
-                              = "llvm_struct_element_types"
+val struct_element_types : lltype -> lltype array
+
 
 (** [is_packed sty] returns [true] if the structure type [sty] is packed,
     [false] otherwise. See the method [llvm::StructType::isPacked]. *)
-external is_packed : lltype -> bool = "llvm_is_packed"
+val is_packed : lltype -> bool
 
 
 (** {7 Operations on pointer, vector, and array types} *)
 
 (** [array_type ty n] returns the array type containing [n] elements of type
     [ty]. See the method [llvm::ArrayType::get]. *)
-external array_type : lltype -> int -> lltype = "llvm_array_type"
+val array_type : lltype -> int -> lltype
 
 (** [pointer_type ty] returns the pointer type referencing objects of type
     [ty] in the default address space (0).
     See the method [llvm::PointerType::getUnqual]. *)
-external pointer_type : lltype -> lltype = "llvm_pointer_type"
+val pointer_type : lltype -> lltype
 
 (** [qualified_pointer_type ty as] returns the pointer type referencing objects
     of type [ty] in address space [as].
     See the method [llvm::PointerType::get]. *)
-external qualified_pointer_type : lltype -> int -> lltype
-                                = "llvm_qualified_pointer_type"
+val qualified_pointer_type : lltype -> int -> lltype
+
 
 (** [vector_type ty n] returns the array type containing [n] elements of the
     primitive type [ty]. See the method [llvm::ArrayType::get]. *)
-external vector_type : lltype -> int -> lltype = "llvm_vector_type"
+val vector_type : lltype -> int -> lltype
 
 (** [element_type ty] returns the element type of the pointer, vector, or array
     type [ty]. See the method [llvm::SequentialType::get]. *)
-external element_type : lltype -> lltype = "LLVMGetElementType"
+val element_type : lltype -> lltype
 
 (** [element_type aty] returns the element count of the array type [aty].
     See the method [llvm::ArrayType::getNumElements]. *)
-external array_length : lltype -> int = "llvm_array_length"
+val array_length : lltype -> int
 
 (** [address_space pty] returns the address space qualifier of the pointer type
     [pty]. See the method [llvm::PointerType::getAddressSpace]. *)
-external address_space : lltype -> int = "llvm_address_space"
+val address_space : lltype -> int
 
 (** [element_type ty] returns the element count of the vector type [ty].
     See the method [llvm::VectorType::getNumElements]. *)
-external vector_size : lltype -> int = "llvm_vector_size"
+val vector_size : lltype -> int
 
 
 (** {7 Operations on other types} *)
@@ -450,15 +450,15 @@ external vector_size : lltype -> int = "llvm_vector_size"
 (** [opaque_type c] creates a new opaque type distinct from any other in the
     context [c]. Opaque types are useful for building recursive types in
     combination with {!refine_type}. See [llvm::OpaqueType::get]. *)
-external opaque_type : llcontext -> lltype = "llvm_opaque_type"
+val opaque_type : llcontext -> lltype
 
 (** [void_type c] creates a type of a function which does not return any
     value in the context [c]. See [llvm::Type::VoidTy]. *)
-external void_type : llcontext -> lltype = "llvm_void_type"
+val void_type : llcontext -> lltype
 
 (** [label_type c] creates a type of a basic block in the context [c]. See
     [llvm::Type::LabelTy]. *)
-external label_type : llcontext -> lltype = "llvm_label_type"
+val label_type : llcontext -> lltype
 
 (** {7 Operations on type handles} *)
 
@@ -466,43 +466,43 @@ external label_type : llcontext -> lltype = "llvm_label_type"
     refined as a result of a call to {!refine_type}, the handle will be updated;
     any bare [lltype] references will become invalid.
     See the class [llvm::PATypeHolder]. *)
-external handle_to_type : lltype -> lltypehandle = "llvm_handle_to_type"
+val handle_to_type : lltype -> lltypehandle
 
 (** [type_of_handle tyh] resolves the type handle [tyh].
     See the method [llvm::PATypeHolder::get()]. *)
-external type_of_handle : lltypehandle -> lltype = "llvm_type_of_handle"
+val type_of_handle : lltypehandle -> lltype
 
 (** [refine_type opaque_ty ty] replaces the abstract type [opaque_ty] with the
     concrete type [ty] in all users. Warning: This may invalidate {!lltype}
     values! Use {!lltypehandle} to manipulate potentially abstract types. See
     the method [llvm::Type::refineAbstractType]. *)
-external refine_type : lltype -> lltype -> unit = "llvm_refine_type"
+val refine_type : lltype -> lltype -> unit
 
 
 (* {6 Values} *)
 
 (** [type_of v] returns the type of the value [v].
     See the method [llvm::Value::getType]. *)
-external type_of : llvalue -> lltype = "llvm_type_of"
+val type_of : llvalue -> lltype
 
 (** [value_name v] returns the name of the value [v]. For global values, this is
     the symbol name. For instructions and basic blocks, it is the SSA register
     name. It is meaningless for constants.
     See the method [llvm::Value::getName]. *)
-external value_name : llvalue -> string = "llvm_value_name"
+val value_name : llvalue -> string
 
 (** [set_value_name n v] sets the name of the value [v] to [n]. See the method
     [llvm::Value::setName]. *)
-external set_value_name : string -> llvalue -> unit = "llvm_set_value_name"
+val set_value_name : string -> llvalue -> unit
 
 (** [dump_value v] prints the .ll representation of the value [v] to standard
     error. See the method [llvm::Value::dump]. *)
-external dump_value : llvalue -> unit = "llvm_dump_value"
+val dump_value : llvalue -> unit
 
 (** [replace_all_uses_with old new] replaces all uses of the value [old]
  * with the value [new]. See the method [llvm::Value::replaceAllUsesWith]. *)
-external replace_all_uses_with : llvalue -> llvalue -> unit
-                               = "LLVMReplaceAllUsesWith"
+val replace_all_uses_with : llvalue -> llvalue -> unit
+
 
 
 (* {6 Uses} *)
@@ -510,19 +510,19 @@ external replace_all_uses_with : llvalue -> llvalue -> unit
 (** [use_begin v] returns the first position in the use list for the value [v].
     [use_begin] and [use_succ] can e used to iterate over the use list in order.
     See the method [llvm::Value::use_begin]. *)
-external use_begin : llvalue -> lluse option = "llvm_use_begin"
+val use_begin : llvalue -> lluse option
 
 (** [use_succ u] returns the use list position succeeding [u].
     See the method [llvm::use_value_iterator::operator++]. *)
-external use_succ : lluse -> lluse option = "llvm_use_succ"
+val use_succ : lluse -> lluse option
 
 (** [user u] returns the user of the use [u].
     See the method [llvm::Use::getUser]. *)
-external user : lluse -> llvalue = "llvm_user"
+val user : lluse -> llvalue
 
 (** [used_value u] returns the usee of the use [u].
     See the method [llvm::Use::getUsedValue]. *)
-external used_value : lluse -> llvalue = "llvm_used_value"
+val used_value : lluse -> llvalue
 
 (** [iter_uses f v] applies function [f] to each of the users of the value [v]
     in order. Tail recursive. *)
@@ -541,46 +541,46 @@ val fold_right_uses : (lluse -> 'a -> 'a) -> llvalue -> 'a -> 'a
 
 (** [operand v i] returns the operand at index [i] for the value [v]. See the
     method [llvm::User::getOperand]. *)
-external operand : llvalue -> int -> llvalue = "llvm_operand"
+val operand : llvalue -> int -> llvalue
 
 (** [set_operand v i o] sets the operand of the value [v] at the index [i] to
     the value [o].
     See the method [llvm::User::setOperand]. *)
-external set_operand : llvalue -> int -> llvalue -> unit = "llvm_set_operand"
+val set_operand : llvalue -> int -> llvalue -> unit
 
 (** [num_operands v] returns the number of operands for the value [v].
     See the method [llvm::User::getNumOperands]. *)
-external num_operands : llvalue -> int = "llvm_num_operands"
+val num_operands : llvalue -> int
 
 (** {7 Operations on constants of (mostly) any type} *)
 
 (** [is_constant v] returns [true] if the value [v] is a constant, [false]
     otherwise. Similar to [llvm::isa<Constant>]. *)
-external is_constant : llvalue -> bool = "llvm_is_constant"
+val is_constant : llvalue -> bool
 
 (** [const_null ty] returns the constant null (zero) of the type [ty].
     See the method [llvm::Constant::getNullValue]. *)
-external const_null : lltype -> llvalue = "LLVMConstNull"
+val const_null : lltype -> llvalue
 
 (** [const_all_ones ty] returns the constant '-1' of the integer or vector type
     [ty]. See the method [llvm::Constant::getAllOnesValue]. *)
-external const_all_ones : (*int|vec*)lltype -> llvalue = "LLVMConstAllOnes"
+val const_all_ones : (*int|vec*)lltype -> llvalue
 
 (** [const_pointer_null ty] returns the constant null (zero) pointer of the type
     [ty]. See the method [llvm::ConstantPointerNull::get]. *)
-external const_pointer_null : lltype -> llvalue = "LLVMConstPointerNull"
+val const_pointer_null : lltype -> llvalue
 
 (** [undef ty] returns the undefined value of the type [ty].
     See the method [llvm::UndefValue::get]. *)
-external undef : lltype -> llvalue = "LLVMGetUndef"
+val undef : lltype -> llvalue
 
 (** [is_null v] returns [true] if the value [v] is the null (zero) value.
     See the method [llvm::Constant::isNullValue]. *)
-external is_null : llvalue -> bool = "llvm_is_null"
+val is_null : llvalue -> bool
 
 (** [is_undef v] returns [true] if the value [v] is an undefined value, [false]
     otherwise. Similar to [llvm::isa<UndefValue>]. *)
-external is_undef : llvalue -> bool = "llvm_is_undef"
+val is_undef : llvalue -> bool
 
 
 (** {7 Operations on instructions} *)
@@ -588,58 +588,58 @@ external is_undef : llvalue -> bool = "llvm_is_undef"
 (** [has_metadata i] returns whether or not the instruction [i] has any
     metadata attached to it. See the function
     [llvm::Instruction::hasMetadata]. *)
-external has_metadata : llvalue -> bool = "llvm_has_metadata"
+val has_metadata : llvalue -> bool
 
 (** [metadata i kind] optionally returns the metadata associated with the
     kind [kind] in the instruction [i] See the function
     [llvm::Instruction::getMetadata]. *)
-external metadata : llvalue -> int -> llvalue option = "llvm_metadata"
+val metadata : llvalue -> int -> llvalue option
 
 (** [set_metadata i kind md] sets the metadata [md] of kind [kind] in the
     instruction [i]. See the function [llvm::Instruction::setMetadata]. *)
-external set_metadata : llvalue -> int -> llvalue -> unit = "llvm_set_metadata"
+val set_metadata : llvalue -> int -> llvalue -> unit
 
 (** [clear_metadata i kind] clears the metadata of kind [kind] in the
     instruction [i]. See the function [llvm::Instruction::setMetadata]. *)
-external clear_metadata : llvalue -> int -> unit = "llvm_clear_metadata"
+val clear_metadata : llvalue -> int -> unit
 
 
 (** {7 Operations on metadata} *)
 
 (** [mdstring c s] returns the MDString of the string [s] in the context [c].
     See the method [llvm::MDNode::get]. *)
-external mdstring : llcontext -> string -> llvalue = "llvm_mdstring"
+val mdstring : llcontext -> string -> llvalue
 
 (** [mdnode c elts] returns the MDNode containing the values [elts] in the
     context [c].
     See the method [llvm::MDNode::get]. *)
-external mdnode : llcontext -> llvalue array -> llvalue = "llvm_mdnode"
+val mdnode : llcontext -> llvalue array -> llvalue
 
 
 (** {7 Operations on scalar constants} *)
 
 (** [const_int ty i] returns the integer constant of type [ty] and value [i].
     See the method [llvm::ConstantInt::get]. *)
-external const_int : lltype -> int -> llvalue = "llvm_const_int"
+val const_int : lltype -> int -> llvalue
 
 (** [const_of_int64 ty i] returns the integer constant of type [ty] and value
     [i]. See the method [llvm::ConstantInt::get]. *)
-external const_of_int64 : lltype -> Int64.t -> bool -> llvalue
-                        = "llvm_const_of_int64"
+val const_of_int64 : lltype -> Int64.t -> bool -> llvalue
+
 
 (** [const_int_of_string ty s r] returns the integer constant of type [ty] and
  * value [s], with the radix [r]. See the method [llvm::ConstantInt::get]. *)
-external const_int_of_string : lltype -> string -> int -> llvalue
-                   = "llvm_const_int_of_string"
+val const_int_of_string : lltype -> string -> int -> llvalue
+
 
 (** [const_float ty n] returns the floating point constant of type [ty] and
     value [n]. See the method [llvm::ConstantFP::get]. *)
-external const_float : lltype -> float -> llvalue = "llvm_const_float"
+val const_float : lltype -> float -> llvalue
 
 (** [const_float_of_string ty s] returns the floating point constant of type
     [ty] and value [n]. See the method [llvm::ConstantFP::get]. *)
-external const_float_of_string : lltype -> string -> llvalue
-                               = "llvm_const_float_of_string"
+val const_float_of_string : lltype -> string -> llvalue
+
 
 
 (** {7 Operations on composite constants} *)
@@ -649,39 +649,39 @@ external const_float_of_string : lltype -> string -> llvalue
     null-terminated (but see {!const_stringz}). This value can in turn be used
     as the initializer for a global variable. See the method
     [llvm::ConstantArray::get]. *)
-external const_string : llcontext -> string -> llvalue = "llvm_const_string"
+val const_string : llcontext -> string -> llvalue
 
 (** [const_stringz c s] returns the constant [i8] array with the values of the
     characters in the string [s] and a null terminator in the context [c]. This
     value can in turn be used as the initializer for a global variable.
     See the method [llvm::ConstantArray::get]. *)
-external const_stringz : llcontext -> string -> llvalue = "llvm_const_stringz"
+val const_stringz : llcontext -> string -> llvalue
 
 (** [const_array ty elts] returns the constant array of type
     [array_type ty (Array.length elts)] and containing the values [elts].
     This value can in turn be used as the initializer for a global variable.
     See the method [llvm::ConstantArray::get]. *)
-external const_array : lltype -> llvalue array -> llvalue = "llvm_const_array"
+val const_array : lltype -> llvalue array -> llvalue
 
 (** [const_struct context elts] returns the structured constant of type
     [struct_type (Array.map type_of elts)] and containing the values [elts]
     in the context [context]. This value can in turn be used as the initializer
     for a global variable. See the method [llvm::ConstantStruct::get]. *)
-external const_struct : llcontext -> llvalue array -> llvalue
-                      = "llvm_const_struct"
+val const_struct : llcontext -> llvalue array -> llvalue
+
 
 (** [const_packed_struct context elts] returns the structured constant of
     type {!packed_struct_type} [(Array.map type_of elts)] and containing the
     values [elts] in the context [context]. This value can in turn be used as
     the initializer for a global variable. See the method
     [llvm::ConstantStruct::get]. *)
-external const_packed_struct : llcontext -> llvalue array -> llvalue
-                             = "llvm_const_packed_struct"
+val const_packed_struct : llcontext -> llvalue array -> llvalue
+
 
 (** [const_vector elts] returns the vector constant of type
     [vector_type (type_of elts.(0)) (Array.length elts)] and containing the
     values [elts]. See the method [llvm::ConstantVector::get]. *)
-external const_vector : llvalue array -> llvalue = "llvm_const_vector"
+val const_vector : llvalue array -> llvalue
 
 
 (** {7 Constant expressions} *)
@@ -690,286 +690,286 @@ external const_vector : llvalue array -> llvalue = "llvm_const_vector"
     equivalent to [const_ptrtoint (const_gep (const_null (pointer_type {i8,ty}))
     (const_int i32_type 0) (const_int i32_type 1)) i32_type], but considerably
     more readable.  See the method [llvm::ConstantExpr::getAlignOf]. *)
-external align_of : lltype -> llvalue = "LLVMAlignOf"
+val align_of : lltype -> llvalue
 
 (** [size_of ty] returns the sizeof constant for the type [ty]. This is
     equivalent to [const_ptrtoint (const_gep (const_null (pointer_type ty))
     (const_int i32_type 1)) i64_type], but considerably more readable.
     See the method [llvm::ConstantExpr::getSizeOf]. *)
-external size_of : lltype -> llvalue = "LLVMSizeOf"
+val size_of : lltype -> llvalue
 
 (** [const_neg c] returns the arithmetic negation of the constant [c].
     See the method [llvm::ConstantExpr::getNeg]. *)
-external const_neg : llvalue -> llvalue = "LLVMConstNeg"
+val const_neg : llvalue -> llvalue
 
 (** [const_nsw_neg c] returns the arithmetic negation of the constant [c] with
     no signed wrapping. The result is undefined if the negation overflows.
     See the method [llvm::ConstantExpr::getNSWNeg]. *)
-external const_nsw_neg : llvalue -> llvalue = "LLVMConstNSWNeg"
+val const_nsw_neg : llvalue -> llvalue
 
 (** [const_nuw_neg c] returns the arithmetic negation of the constant [c] with
     no unsigned wrapping. The result is undefined if the negation overflows.
     See the method [llvm::ConstantExpr::getNUWNeg]. *)
-external const_nuw_neg : llvalue -> llvalue = "LLVMConstNUWNeg"
+val const_nuw_neg : llvalue -> llvalue
 
 (** [const_fneg c] returns the arithmetic negation of the constant float [c].
     See the method [llvm::ConstantExpr::getFNeg]. *)
-external const_fneg : llvalue -> llvalue = "LLVMConstFNeg"
+val const_fneg : llvalue -> llvalue
 
 (** [const_not c] returns the bitwise inverse of the constant [c].
     See the method [llvm::ConstantExpr::getNot]. *)
-external const_not : llvalue -> llvalue = "LLVMConstNot"
+val const_not : llvalue -> llvalue
 
 (** [const_add c1 c2] returns the constant sum of two constants.
     See the method [llvm::ConstantExpr::getAdd]. *)
-external const_add : llvalue -> llvalue -> llvalue = "LLVMConstAdd"
+val const_add : llvalue -> llvalue -> llvalue
 
 (** [const_nsw_add c1 c2] returns the constant sum of two constants with no
     signed wrapping. The result is undefined if the sum overflows.
     See the method [llvm::ConstantExpr::getNSWAdd]. *)
-external const_nsw_add : llvalue -> llvalue -> llvalue = "LLVMConstNSWAdd"
+val const_nsw_add : llvalue -> llvalue -> llvalue
 
 (** [const_nuw_add c1 c2] returns the constant sum of two constants with no
     unsigned wrapping. The result is undefined if the sum overflows.
     See the method [llvm::ConstantExpr::getNSWAdd]. *)
-external const_nuw_add : llvalue -> llvalue -> llvalue = "LLVMConstNUWAdd"
+val const_nuw_add : llvalue -> llvalue -> llvalue
 
 (** [const_fadd c1 c2] returns the constant sum of two constant floats.
     See the method [llvm::ConstantExpr::getFAdd]. *)
-external const_fadd : llvalue -> llvalue -> llvalue = "LLVMConstFAdd"
+val const_fadd : llvalue -> llvalue -> llvalue
 
 (** [const_sub c1 c2] returns the constant difference, [c1 - c2], of two
     constants. See the method [llvm::ConstantExpr::getSub]. *)
-external const_sub : llvalue -> llvalue -> llvalue = "LLVMConstSub"
+val const_sub : llvalue -> llvalue -> llvalue
 
 (** [const_nsw_sub c1 c2] returns the constant difference of two constants with
     no signed wrapping. The result is undefined if the sum overflows.
     See the method [llvm::ConstantExpr::getNSWSub]. *)
-external const_nsw_sub : llvalue -> llvalue -> llvalue = "LLVMConstNSWSub"
+val const_nsw_sub : llvalue -> llvalue -> llvalue
 
 (** [const_nuw_sub c1 c2] returns the constant difference of two constants with
     no unsigned wrapping. The result is undefined if the sum overflows.
     See the method [llvm::ConstantExpr::getNSWSub]. *)
-external const_nuw_sub : llvalue -> llvalue -> llvalue = "LLVMConstNUWSub"
+val const_nuw_sub : llvalue -> llvalue -> llvalue
 
 (** [const_fsub c1 c2] returns the constant difference, [c1 - c2], of two
     constant floats. See the method [llvm::ConstantExpr::getFSub]. *)
-external const_fsub : llvalue -> llvalue -> llvalue = "LLVMConstFSub"
+val const_fsub : llvalue -> llvalue -> llvalue
 
 (** [const_mul c1 c2] returns the constant product of two constants.
     See the method [llvm::ConstantExpr::getMul]. *)
-external const_mul : llvalue -> llvalue -> llvalue = "LLVMConstMul"
+val const_mul : llvalue -> llvalue -> llvalue
 
 (** [const_nsw_mul c1 c2] returns the constant product of two constants with
     no signed wrapping. The result is undefined if the sum overflows.
     See the method [llvm::ConstantExpr::getNSWMul]. *)
-external const_nsw_mul : llvalue -> llvalue -> llvalue = "LLVMConstNSWMul"
+val const_nsw_mul : llvalue -> llvalue -> llvalue
 
 (** [const_nuw_mul c1 c2] returns the constant product of two constants with
     no unsigned wrapping. The result is undefined if the sum overflows.
     See the method [llvm::ConstantExpr::getNSWMul]. *)
-external const_nuw_mul : llvalue -> llvalue -> llvalue = "LLVMConstNUWMul"
+val const_nuw_mul : llvalue -> llvalue -> llvalue
 
 (** [const_fmul c1 c2] returns the constant product of two constants floats.
     See the method [llvm::ConstantExpr::getFMul]. *)
-external const_fmul : llvalue -> llvalue -> llvalue = "LLVMConstFMul"
+val const_fmul : llvalue -> llvalue -> llvalue
 
 (** [const_udiv c1 c2] returns the constant quotient [c1 / c2] of two unsigned
     integer constants.
     See the method [llvm::ConstantExpr::getUDiv]. *)
-external const_udiv : llvalue -> llvalue -> llvalue = "LLVMConstUDiv"
+val const_udiv : llvalue -> llvalue -> llvalue
 
 (** [const_sdiv c1 c2] returns the constant quotient [c1 / c2] of two signed
     integer constants.
     See the method [llvm::ConstantExpr::getSDiv]. *)
-external const_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstSDiv"
+val const_sdiv : llvalue -> llvalue -> llvalue
 
 (** [const_exact_sdiv c1 c2] returns the constant quotient [c1 / c2] of two
     signed integer constants. The result is undefined if the result is rounded
     or overflows. See the method [llvm::ConstantExpr::getExactSDiv]. *)
-external const_exact_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstExactSDiv"
+val const_exact_sdiv : llvalue -> llvalue -> llvalue
 
 (** [const_fdiv c1 c2] returns the constant quotient [c1 / c2] of two floating
     point constants.
     See the method [llvm::ConstantExpr::getFDiv]. *)
-external const_fdiv : llvalue -> llvalue -> llvalue = "LLVMConstFDiv"
+val const_fdiv : llvalue -> llvalue -> llvalue
 
 (** [const_urem c1 c2] returns the constant remainder [c1 MOD c2] of two
     unsigned integer constants.
     See the method [llvm::ConstantExpr::getURem]. *)
-external const_urem : llvalue -> llvalue -> llvalue = "LLVMConstURem"
+val const_urem : llvalue -> llvalue -> llvalue
 
 (** [const_srem c1 c2] returns the constant remainder [c1 MOD c2] of two
     signed integer constants.
     See the method [llvm::ConstantExpr::getSRem]. *)
-external const_srem : llvalue -> llvalue -> llvalue = "LLVMConstSRem"
+val const_srem : llvalue -> llvalue -> llvalue
 
 (** [const_frem c1 c2] returns the constant remainder [c1 MOD c2] of two
     signed floating point constants.
     See the method [llvm::ConstantExpr::getFRem]. *)
-external const_frem : llvalue -> llvalue -> llvalue = "LLVMConstFRem"
+val const_frem : llvalue -> llvalue -> llvalue
 
 (** [const_and c1 c2] returns the constant bitwise [AND] of two integer
     constants.
     See the method [llvm::ConstantExpr::getAnd]. *)
-external const_and : llvalue -> llvalue -> llvalue = "LLVMConstAnd"
+val const_and : llvalue -> llvalue -> llvalue
 
 (** [const_or c1 c2] returns the constant bitwise [OR] of two integer
     constants.
     See the method [llvm::ConstantExpr::getOr]. *)
-external const_or : llvalue -> llvalue -> llvalue = "LLVMConstOr"
+val const_or : llvalue -> llvalue -> llvalue
 
 (** [const_xor c1 c2] returns the constant bitwise [XOR] of two integer
     constants.
     See the method [llvm::ConstantExpr::getXor]. *)
-external const_xor : llvalue -> llvalue -> llvalue = "LLVMConstXor"
+val const_xor : llvalue -> llvalue -> llvalue
 
 (** [const_icmp pred c1 c2] returns the constant comparison of two integer
     constants, [c1 pred c2].
     See the method [llvm::ConstantExpr::getICmp]. *)
-external const_icmp : Icmp.t -> llvalue -> llvalue -> llvalue
-                    = "llvm_const_icmp"
+val const_icmp : Icmp.t -> llvalue -> llvalue -> llvalue
+
 
 (** [const_fcmp pred c1 c2] returns the constant comparison of two floating
     point constants, [c1 pred c2].
     See the method [llvm::ConstantExpr::getFCmp]. *)
-external const_fcmp : Fcmp.t -> llvalue -> llvalue -> llvalue
-                    = "llvm_const_fcmp"
+val const_fcmp : Fcmp.t -> llvalue -> llvalue -> llvalue
+
 
 (** [const_shl c1 c2] returns the constant integer [c1] left-shifted by the
     constant integer [c2].
     See the method [llvm::ConstantExpr::getShl]. *)
-external const_shl : llvalue -> llvalue -> llvalue = "LLVMConstShl"
+val const_shl : llvalue -> llvalue -> llvalue
 
 (** [const_lshr c1 c2] returns the constant integer [c1] right-shifted by the
     constant integer [c2] with zero extension.
     See the method [llvm::ConstantExpr::getLShr]. *)
-external const_lshr : llvalue -> llvalue -> llvalue = "LLVMConstLShr"
+val const_lshr : llvalue -> llvalue -> llvalue
 
 (** [const_ashr c1 c2] returns the constant integer [c1] right-shifted by the
     constant integer [c2] with sign extension.
     See the method [llvm::ConstantExpr::getAShr]. *)
-external const_ashr : llvalue -> llvalue -> llvalue = "LLVMConstAShr"
+val const_ashr : llvalue -> llvalue -> llvalue
 
 (** [const_gep pc indices] returns the constant [getElementPtr] of [p1] with the
     constant integers indices from the array [indices].
     See the method [llvm::ConstantExpr::getGetElementPtr]. *)
-external const_gep : llvalue -> llvalue array -> llvalue = "llvm_const_gep"
+val const_gep : llvalue -> llvalue array -> llvalue
 
 (** [const_in_bounds_gep pc indices] returns the constant [getElementPtr] of [p1]
     with the constant integers indices from the array [indices].
     See the method [llvm::ConstantExpr::getInBoundsGetElementPtr]. *)
-external const_in_bounds_gep : llvalue -> llvalue array -> llvalue
-                            = "llvm_const_in_bounds_gep"
+val const_in_bounds_gep : llvalue -> llvalue array -> llvalue
+
 
 (** [const_trunc c ty] returns the constant truncation of integer constant [c]
     to the smaller integer type [ty].
     See the method [llvm::ConstantExpr::getTrunc]. *)
-external const_trunc : llvalue -> lltype -> llvalue = "LLVMConstTrunc"
+val const_trunc : llvalue -> lltype -> llvalue
 
 (** [const_sext c ty] returns the constant sign extension of integer constant
     [c] to the larger integer type [ty].
     See the method [llvm::ConstantExpr::getSExt]. *)
-external const_sext : llvalue -> lltype -> llvalue = "LLVMConstSExt"
+val const_sext : llvalue -> lltype -> llvalue
 
 (** [const_zext c ty] returns the constant zero extension of integer constant
     [c] to the larger integer type [ty].
     See the method [llvm::ConstantExpr::getZExt]. *)
-external const_zext : llvalue -> lltype -> llvalue = "LLVMConstZExt"
+val const_zext : llvalue -> lltype -> llvalue
 
 (** [const_fptrunc c ty] returns the constant truncation of floating point
     constant [c] to the smaller floating point type [ty].
     See the method [llvm::ConstantExpr::getFPTrunc]. *)
-external const_fptrunc : llvalue -> lltype -> llvalue = "LLVMConstFPTrunc"
+val const_fptrunc : llvalue -> lltype -> llvalue
 
 (** [const_fpext c ty] returns the constant extension of floating point constant
     [c] to the larger floating point type [ty].
     See the method [llvm::ConstantExpr::getFPExt]. *)
-external const_fpext : llvalue -> lltype -> llvalue = "LLVMConstFPExt"
+val const_fpext : llvalue -> lltype -> llvalue
 
 (** [const_uitofp c ty] returns the constant floating point conversion of
     unsigned integer constant [c] to the floating point type [ty].
     See the method [llvm::ConstantExpr::getUIToFP]. *)
-external const_uitofp : llvalue -> lltype -> llvalue = "LLVMConstUIToFP"
+val const_uitofp : llvalue -> lltype -> llvalue
 
 (** [const_sitofp c ty] returns the constant floating point conversion of
     signed integer constant [c] to the floating point type [ty].
     See the method [llvm::ConstantExpr::getSIToFP]. *)
-external const_sitofp : llvalue -> lltype -> llvalue = "LLVMConstSIToFP"
+val const_sitofp : llvalue -> lltype -> llvalue
 
 (** [const_fptoui c ty] returns the constant unsigned integer conversion of
     floating point constant [c] to integer type [ty].
     See the method [llvm::ConstantExpr::getFPToUI]. *)
-external const_fptoui : llvalue -> lltype -> llvalue = "LLVMConstFPToUI"
+val const_fptoui : llvalue -> lltype -> llvalue
 
 (** [const_fptoui c ty] returns the constant unsigned integer conversion of
     floating point constant [c] to integer type [ty].
     See the method [llvm::ConstantExpr::getFPToSI]. *)
-external const_fptosi : llvalue -> lltype -> llvalue = "LLVMConstFPToSI"
+val const_fptosi : llvalue -> lltype -> llvalue
 
 (** [const_ptrtoint c ty] returns the constant integer conversion of
     pointer constant [c] to integer type [ty].
     See the method [llvm::ConstantExpr::getPtrToInt]. *)
-external const_ptrtoint : llvalue -> lltype -> llvalue = "LLVMConstPtrToInt"
+val const_ptrtoint : llvalue -> lltype -> llvalue
 
 (** [const_inttoptr c ty] returns the constant pointer conversion of
     integer constant [c] to pointer type [ty].
     See the method [llvm::ConstantExpr::getIntToPtr]. *)
-external const_inttoptr : llvalue -> lltype -> llvalue = "LLVMConstIntToPtr"
+val const_inttoptr : llvalue -> lltype -> llvalue
 
 (** [const_bitcast c ty] returns the constant bitwise conversion of constant [c]
     to type [ty] of equal size.
     See the method [llvm::ConstantExpr::getBitCast]. *)
-external const_bitcast : llvalue -> lltype -> llvalue = "LLVMConstBitCast"
+val const_bitcast : llvalue -> lltype -> llvalue
 
 (** [const_zext_or_bitcast c ty] returns a constant zext or bitwise cast
     conversion of constant [c] to type [ty].
     See the method [llvm::ConstantExpr::getZExtOrBitCast]. *)
-external const_zext_or_bitcast : llvalue -> lltype -> llvalue
-                               = "LLVMConstZExtOrBitCast"
+val const_zext_or_bitcast : llvalue -> lltype -> llvalue
+
 
 (** [const_sext_or_bitcast c ty] returns a constant sext or bitwise cast
     conversion of constant [c] to type [ty].
     See the method [llvm::ConstantExpr::getSExtOrBitCast]. *)
-external const_sext_or_bitcast : llvalue -> lltype -> llvalue
-                               = "LLVMConstSExtOrBitCast"
+val const_sext_or_bitcast : llvalue -> lltype -> llvalue
+
 
 (** [const_trunc_or_bitcast c ty] returns a constant trunc or bitwise cast
     conversion of constant [c] to type [ty].
     See the method [llvm::ConstantExpr::getTruncOrBitCast]. *)
-external const_trunc_or_bitcast : llvalue -> lltype -> llvalue
-                                = "LLVMConstTruncOrBitCast"
+val const_trunc_or_bitcast : llvalue -> lltype -> llvalue
+
 
 (** [const_pointercast c ty] returns a constant bitcast or a pointer-to-int
     cast conversion of constant [c] to type [ty] of equal size.
     See the method [llvm::ConstantExpr::getPointerCast]. *)
-external const_pointercast : llvalue -> lltype -> llvalue
-                           = "LLVMConstPointerCast"
+val const_pointercast : llvalue -> lltype -> llvalue
+
 
 (** [const_intcast c ty] returns a constant zext, bitcast, or trunc for integer
     -> integer casts of constant [c] to type [ty].
     See the method [llvm::ConstantExpr::getIntCast]. *)
-external const_intcast : llvalue -> lltype -> llvalue
-                       = "LLVMConstIntCast"
+val const_intcast : llvalue -> lltype -> llvalue
+
 
 (** [const_fpcast c ty] returns a constant fpext, bitcast, or fptrunc for fp ->
     fp casts of constant [c] to type [ty].
     See the method [llvm::ConstantExpr::getFPCast]. *)
-external const_fpcast : llvalue -> lltype -> llvalue
-                      = "LLVMConstFPCast"
+val const_fpcast : llvalue -> lltype -> llvalue
+
 
 (** [const_select cond t f] returns the constant conditional which returns value
     [t] if the boolean constant [cond] is true and the value [f] otherwise.
     See the method [llvm::ConstantExpr::getSelect]. *)
-external const_select : llvalue -> llvalue -> llvalue -> llvalue
-                      = "LLVMConstSelect"
+val const_select : llvalue -> llvalue -> llvalue -> llvalue
+
 
 (** [const_extractelement vec i] returns the constant [i]th element of
     constant vector [vec]. [i] must be a constant [i32] value unsigned less than
     the size of the vector.
     See the method [llvm::ConstantExpr::getExtractElement]. *)
-external const_extractelement : llvalue -> llvalue -> llvalue
-                              = "LLVMConstExtractElement"
+val const_extractelement : llvalue -> llvalue -> llvalue
+
 
 (** [const_insertelement vec v i] returns the constant vector with the same
     elements as constant vector [v] but the [i]th element replaced by the
@@ -977,82 +977,82 @@ external const_extractelement : llvalue -> llvalue -> llvalue
     elements. [i] must be a constant [i32] value unsigned less than the size
     of the vector.
     See the method [llvm::ConstantExpr::getInsertElement]. *)
-external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
-                             = "LLVMConstInsertElement"
+val const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
+
 
 (** [const_shufflevector a b mask] returns a constant [shufflevector].
     See the LLVM Language Reference for details on the [shufflevector]
     instruction.
     See the method [llvm::ConstantExpr::getShuffleVector]. *)
-external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
-                             = "LLVMConstShuffleVector"
+val const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
+
 
 (** [const_extractvalue agg idxs] returns the constant [idxs]th value of
     constant aggregate [agg]. Each [idxs] must be less than the size of the
     aggregate.  See the method [llvm::ConstantExpr::getExtractValue]. *)
-external const_extractvalue : llvalue -> int array -> llvalue
-                            = "llvm_const_extractvalue"
+val const_extractvalue : llvalue -> int array -> llvalue
+
 
 (** [const_insertvalue agg val idxs] inserts the value [val] in the specified
     indexs [idxs] in the aggegate [agg]. Each [idxs] must be less than the size
     of the aggregate. See the method [llvm::ConstantExpr::getInsertValue]. *)
-external const_insertvalue : llvalue -> llvalue -> int array -> llvalue
-                           = "llvm_const_insertvalue"
+val const_insertvalue : llvalue -> llvalue -> int array -> llvalue
+
 
 (** [const_inline_asm ty asm con side align] inserts a inline assembly string.
     See the method [llvm::InlineAsm::get]. *)
-external const_inline_asm : lltype -> string -> string -> bool -> bool ->
+val const_inline_asm : lltype -> string -> string -> bool -> bool ->
                             llvalue
-                          = "llvm_const_inline_asm"
+
 
 (** [block_address f bb] returns the address of the basic block [bb] in the
     function [f]. See the method [llvm::BasicBlock::get]. *)
-external block_address : llvalue -> llbasicblock -> llvalue = "LLVMBlockAddress"
+val block_address : llvalue -> llbasicblock -> llvalue
 
 
 (** {7 Operations on global variables, functions, and aliases (globals)} *)
 
 (** [global_parent g] is the enclosing module of the global value [g].
     See the method [llvm::GlobalValue::getParent]. *)
-external global_parent : llvalue -> llmodule = "LLVMGetGlobalParent"
+val global_parent : llvalue -> llmodule
 
 (** [is_declaration g] returns [true] if the global value [g] is a declaration
     only. Returns [false] otherwise.
     See the method [llvm::GlobalValue::isDeclaration]. *)
-external is_declaration : llvalue -> bool = "llvm_is_declaration"
+val is_declaration : llvalue -> bool
 
 (** [linkage g] returns the linkage of the global value [g].
     See the method [llvm::GlobalValue::getLinkage]. *)
-external linkage : llvalue -> Linkage.t = "llvm_linkage"
+val linkage : llvalue -> Linkage.t
 
 (** [set_linkage l g] sets the linkage of the global value [g] to [l].
     See the method [llvm::GlobalValue::setLinkage]. *)
-external set_linkage : Linkage.t -> llvalue -> unit = "llvm_set_linkage"
+val set_linkage : Linkage.t -> llvalue -> unit
 
 (** [section g] returns the linker section of the global value [g].
     See the method [llvm::GlobalValue::getSection]. *)
-external section : llvalue -> string = "llvm_section"
+val section : llvalue -> string
 
 (** [set_section s g] sets the linker section of the global value [g] to [s].
     See the method [llvm::GlobalValue::setSection]. *)
-external set_section : string -> llvalue -> unit = "llvm_set_section"
+val set_section : string -> llvalue -> unit
 
 (** [visibility g] returns the linker visibility of the global value [g].
     See the method [llvm::GlobalValue::getVisibility]. *)
-external visibility : llvalue -> Visibility.t = "llvm_visibility"
+val visibility : llvalue -> Visibility.t
 
 (** [set_visibility v g] sets the linker visibility of the global value [g] to
     [v]. See the method [llvm::GlobalValue::setVisibility]. *)
-external set_visibility : Visibility.t -> llvalue -> unit
-                        = "llvm_set_visibility"
+val set_visibility : Visibility.t -> llvalue -> unit
+
 
 (** [alignment g] returns the required alignment of the global value [g].
     See the method [llvm::GlobalValue::getAlignment]. *)
-external alignment : llvalue -> int = "llvm_alignment"
+val alignment : llvalue -> int
 
 (** [set_alignment n g] sets the required alignment of the global value [g] to
     [n] bytes. See the method [llvm::GlobalValue::setAlignment]. *)
-external set_alignment : int -> llvalue -> unit = "llvm_set_alignment"
+val set_alignment : int -> llvalue -> unit
 
 
 (** {7 Operations on global variables} *)
@@ -1061,55 +1061,55 @@ external set_alignment : int -> llvalue -> unit = "llvm_set_alignment"
     with name [name] in module [m] in the default address space (0). If such a
     global variable already exists, it is returned. If the type of the existing
     global differs, then a bitcast to [ty] is returned. *)
-external declare_global : lltype -> string -> llmodule -> llvalue
-                        = "llvm_declare_global"
+val declare_global : lltype -> string -> llmodule -> llvalue
+
 
 (** [declare_qualified_global ty name addrspace m] returns a new global variable
     of type [ty] and with name [name] in module [m] in the address space
     [addrspace]. If such a global variable already exists, it is returned. If
     the type of the existing global differs, then a bitcast to [ty] is
     returned. *)
-external declare_qualified_global : lltype -> string -> int -> llmodule ->
+val declare_qualified_global : lltype -> string -> int -> llmodule ->
                                     llvalue
-                                  = "llvm_declare_qualified_global"
+
 
 (** [define_global name init m] returns a new global with name [name] and
     initializer [init] in module [m] in the default address space (0). If the
     named global already exists, it is renamed.
     See the constructor of [llvm::GlobalVariable]. *)
-external define_global : string -> llvalue -> llmodule -> llvalue
-                       = "llvm_define_global"
+val define_global : string -> llvalue -> llmodule -> llvalue
+
 
 (** [define_qualified_global name init addrspace m] returns a new global with
     name [name] and initializer [init] in module [m] in the address space
     [addrspace]. If the named global already exists, it is renamed.
     See the constructor of [llvm::GlobalVariable]. *)
-external define_qualified_global : string -> llvalue -> int -> llmodule ->
+val define_qualified_global : string -> llvalue -> int -> llmodule ->
                                    llvalue
-                                 = "llvm_define_qualified_global"
+
 
 (** [lookup_global name m] returns [Some g] if a global variable with name
     [name] exists in module [m]. If no such global exists, returns [None].
     See the [llvm::GlobalVariable] constructor. *)
-external lookup_global : string -> llmodule -> llvalue option
-                       = "llvm_lookup_global"
+val lookup_global : string -> llmodule -> llvalue option
+
 
 (** [delete_global gv] destroys the global variable [gv].
     See the method [llvm::GlobalVariable::eraseFromParent]. *)
-external delete_global : llvalue -> unit = "llvm_delete_global"
+val delete_global : llvalue -> unit
 
 (** [global_begin m] returns the first position in the global variable list of
     the module [m]. [global_begin] and [global_succ] can be used to iterate
     over the global list in order.
     See the method [llvm::Module::global_begin]. *)
-external global_begin : llmodule -> (llmodule, llvalue) llpos
-                      = "llvm_global_begin"
+val global_begin : llmodule -> (llmodule, llvalue) llpos
+
 
 (** [global_succ gv] returns the global variable list position succeeding
     [Before gv].
     See the method [llvm::Module::global_iterator::operator++]. *)
-external global_succ : llvalue -> (llmodule, llvalue) llpos
-                     = "llvm_global_succ"
+val global_succ : llvalue -> (llmodule, llvalue) llpos
+
 
 (** [iter_globals f m] applies function [f] to each of the global variables of
     module [m] in order. Tail recursive. *)
@@ -1123,14 +1123,14 @@ val fold_left_globals : ('a -> llvalue -> 'a) -> 'a -> llmodule -> 'a
     module [m]. [global_end] and [global_pred] can be used to iterate over the
     global list in reverse.
     See the method [llvm::Module::global_end]. *)
-external global_end : llmodule -> (llmodule, llvalue) llrev_pos
-                    = "llvm_global_end"
+val global_end : llmodule -> (llmodule, llvalue) llrev_pos
+
 
 (** [global_pred gv] returns the global variable list position preceding
     [After gv].
     See the method [llvm::Module::global_iterator::operator--]. *)
-external global_pred : llvalue -> (llmodule, llvalue) llrev_pos
-                     = "llvm_global_pred"
+val global_pred : llvalue -> (llmodule, llvalue) llrev_pos
+
 
 (** [rev_iter_globals f m] applies function [f] to each of the global variables
     of module [m] in reverse order. Tail recursive. *)
@@ -1143,37 +1143,37 @@ val fold_right_globals : (llvalue -> 'a -> 'a) -> llmodule -> 'a -> 'a
 (** [is_global_constant gv] returns [true] if the global variabile [gv] is a
     constant. Returns [false] otherwise.
     See the method [llvm::GlobalVariable::isConstant]. *)
-external is_global_constant : llvalue -> bool = "llvm_is_global_constant"
+val is_global_constant : llvalue -> bool
 
 (** [set_global_constant c gv] sets the global variable [gv] to be a constant if
     [c] is [true] and not if [c] is [false].
     See the method [llvm::GlobalVariable::setConstant]. *)
-external set_global_constant : bool -> llvalue -> unit
-                             = "llvm_set_global_constant"
+val set_global_constant : bool -> llvalue -> unit
+
 
 (** [global_initializer gv] returns the initializer for the global variable
     [gv]. See the method [llvm::GlobalVariable::getInitializer]. *)
-external global_initializer : llvalue -> llvalue = "LLVMGetInitializer"
+val global_initializer : llvalue -> llvalue
 
 (** [set_initializer c gv] sets the initializer for the global variable
     [gv] to the constant [c].
     See the method [llvm::GlobalVariable::setInitializer]. *)
-external set_initializer : llvalue -> llvalue -> unit = "llvm_set_initializer"
+val set_initializer : llvalue -> llvalue -> unit
 
 (** [remove_initializer gv] unsets the initializer for the global variable
     [gv].
     See the method [llvm::GlobalVariable::setInitializer]. *)
-external remove_initializer : llvalue -> unit = "llvm_remove_initializer"
+val remove_initializer : llvalue -> unit
 
 (** [is_thread_local gv] returns [true] if the global variable [gv] is
     thread-local and [false] otherwise.
     See the method [llvm::GlobalVariable::isThreadLocal]. *)
-external is_thread_local : llvalue -> bool = "llvm_is_thread_local"
+val is_thread_local : llvalue -> bool
 
 (** [set_thread_local c gv] sets the global variable [gv] to be thread local if
     [c] is [true] and not otherwise.
     See the method [llvm::GlobalVariable::setThreadLocal]. *)
-external set_thread_local : bool -> llvalue -> unit = "llvm_set_thread_local"
+val set_thread_local : bool -> llvalue -> unit
 
 
 (** {7 Operations on aliases} *)
@@ -1181,8 +1181,8 @@ external set_thread_local : bool -> llvalue -> unit = "llvm_set_thread_local"
 (** [add_alias m t a n] inserts an alias in the module [m] with the type [t] and
     the aliasee [a] with the name [n].
     See the constructor for [llvm::GlobalAlias]. *)
-external add_alias : llmodule -> lltype -> llvalue -> string -> llvalue
-                   = "llvm_add_alias"
+val add_alias : llmodule -> lltype -> llvalue -> string -> llvalue
+
 
 
 (** {7 Operations on functions} *)
@@ -1191,38 +1191,38 @@ external add_alias : llmodule -> lltype -> llvalue -> string -> llvalue
     with name [name] in module [m]. If such a function already exists,
     it is returned. If the type of the existing function differs, then a bitcast
     to [ty] is returned. *)
-external declare_function : string -> lltype -> llmodule -> llvalue
-                          = "llvm_declare_function"
+val declare_function : string -> lltype -> llmodule -> llvalue
+
 
 (** [define_function name ty m] creates a new function with name [name] and
     type [ty] in module [m]. If the named function already exists, it is
     renamed. An entry basic block is created in the function.
     See the constructor of [llvm::GlobalVariable]. *)
-external define_function : string -> lltype -> llmodule -> llvalue
-                         = "llvm_define_function"
+val define_function : string -> lltype -> llmodule -> llvalue
+
 
 (** [lookup_function name m] returns [Some f] if a function with name
     [name] exists in module [m]. If no such function exists, returns [None].
     See the method [llvm::Module] constructor. *)
-external lookup_function : string -> llmodule -> llvalue option
-                         = "llvm_lookup_function"
+val lookup_function : string -> llmodule -> llvalue option
+
 
 (** [delete_function f] destroys the function [f].
     See the method [llvm::Function::eraseFromParent]. *)
-external delete_function : llvalue -> unit = "llvm_delete_function"
+val delete_function : llvalue -> unit
 
 (** [function_begin m] returns the first position in the function list of the
     module [m]. [function_begin] and [function_succ] can be used to iterate over
     the function list in order.
     See the method [llvm::Module::begin]. *)
-external function_begin : llmodule -> (llmodule, llvalue) llpos
-                        = "llvm_function_begin"
+val function_begin : llmodule -> (llmodule, llvalue) llpos
+
 
 (** [function_succ gv] returns the function list position succeeding
     [Before gv].
     See the method [llvm::Module::iterator::operator++]. *)
-external function_succ : llvalue -> (llmodule, llvalue) llpos
-                       = "llvm_function_succ"
+val function_succ : llvalue -> (llmodule, llvalue) llpos
+
 
 (** [iter_functions f m] applies function [f] to each of the functions of module
     [m] in order. Tail recursive. *)
@@ -1236,13 +1236,13 @@ val fold_left_functions : ('a -> llvalue -> 'a) -> 'a -> llmodule -> 'a
     the module [m]. [function_end] and [function_pred] can be used to iterate
     over the function list in reverse.
     See the method [llvm::Module::end]. *)
-external function_end : llmodule -> (llmodule, llvalue) llrev_pos
-                      = "llvm_function_end"
+val function_end : llmodule -> (llmodule, llvalue) llrev_pos
+
 
 (** [function_pred gv] returns the function list position preceding [After gv].
     See the method [llvm::Module::iterator::operator--]. *)
-external function_pred : llvalue -> (llmodule, llvalue) llrev_pos
-                       = "llvm_function_pred"
+val function_pred : llvalue -> (llmodule, llvalue) llrev_pos
+
 
 (** [rev_iter_functions f fn] applies function [f] to each of the functions of
     module [m] in reverse order. Tail recursive. *)
@@ -1254,26 +1254,26 @@ val fold_right_functions : (llvalue -> 'a -> 'a) -> llmodule -> 'a -> 'a
 
 (** [is_intrinsic f] returns true if the function [f] is an intrinsic.
     See the method [llvm::Function::isIntrinsic]. *)
-external is_intrinsic : llvalue -> bool = "llvm_is_intrinsic"
+val is_intrinsic : llvalue -> bool
 
 (** [function_call_conv f] returns the calling convention of the function [f].
     See the method [llvm::Function::getCallingConv]. *)
-external function_call_conv : llvalue -> int = "llvm_function_call_conv"
+val function_call_conv : llvalue -> int
 
 (** [set_function_call_conv cc f] sets the calling convention of the function
     [f] to the calling convention numbered [cc].
     See the method [llvm::Function::setCallingConv]. *)
-external set_function_call_conv : int -> llvalue -> unit
-                                = "llvm_set_function_call_conv"
+val set_function_call_conv : int -> llvalue -> unit
+
 
 (** [gc f] returns [Some name] if the function [f] has a garbage
     collection algorithm specified and [None] otherwise.
     See the method [llvm::Function::getGC]. *)
-external gc : llvalue -> string option = "llvm_gc"
+val gc : llvalue -> string option
 
 (** [set_gc gc f] sets the collection algorithm for the function [f] to
     [gc]. See the method [llvm::Function::setGC]. *)
-external set_gc : string option -> llvalue -> unit = "llvm_set_gc"
+val set_gc : string option -> llvalue -> unit
 
 (** [add_function_attr f a] adds attribute [a] to the return type of function
     [f]. *)
@@ -1287,26 +1287,26 @@ val remove_function_attr : llvalue -> Attribute.t -> unit
 
 (** [params f] returns the parameters of function [f].
     See the method [llvm::Function::getArgumentList]. *)
-external params : llvalue -> llvalue array = "llvm_params"
+val params : llvalue -> llvalue array
 
 (** [param f n] returns the [n]th parameter of function [f].
     See the method [llvm::Function::getArgumentList]. *)
-external param : llvalue -> int -> llvalue = "llvm_param"
+val param : llvalue -> int -> llvalue
 
 (** [param_parent p] returns the parent function that owns the parameter.
     See the method [llvm::Argument::getParent]. *)
-external param_parent : llvalue -> llvalue = "LLVMGetParamParent"
+val param_parent : llvalue -> llvalue
 
 (** [param_begin f] returns the first position in the parameter list of the
     function [f]. [param_begin] and [param_succ] can be used to iterate over
     the parameter list in order.
     See the method [llvm::Function::arg_begin]. *)
-external param_begin : llvalue -> (llvalue, llvalue) llpos = "llvm_param_begin"
+val param_begin : llvalue -> (llvalue, llvalue) llpos
 
 (** [param_succ bb] returns the parameter list position succeeding
     [Before bb].
     See the method [llvm::Function::arg_iterator::operator++]. *)
-external param_succ : llvalue -> (llvalue, llvalue) llpos = "llvm_param_succ"
+val param_succ : llvalue -> (llvalue, llvalue) llpos
 
 (** [iter_params f fn] applies function [f] to each of the parameters
     of function [fn] in order. Tail recursive. *)
@@ -1320,12 +1320,12 @@ val fold_left_params : ('a -> llvalue -> 'a) -> 'a -> llvalue -> 'a
     the function [f]. [param_end] and [param_pred] can be used to iterate
     over the parameter list in reverse.
     See the method [llvm::Function::arg_end]. *)
-external param_end : llvalue -> (llvalue, llvalue) llrev_pos = "llvm_param_end"
+val param_end : llvalue -> (llvalue, llvalue) llrev_pos
 
 (** [param_pred gv] returns the function list position preceding [After gv].
     See the method [llvm::Function::arg_iterator::operator--]. *)
-external param_pred : llvalue -> (llvalue, llvalue) llrev_pos
-                    = "llvm_param_pred"
+val param_pred : llvalue -> (llvalue, llvalue) llrev_pos
+
 
 (** [rev_iter_params f fn] applies function [f] to each of the parameters
     of function [fn] in reverse order. Tail recursive. *)
@@ -1342,51 +1342,51 @@ val add_param_attr : llvalue -> Attribute.t -> unit
 val remove_param_attr : llvalue -> Attribute.t -> unit
 
 (** [set_param_alignment p a] set the alignment of parameter [p] to [a]. *)
-external set_param_alignment : llvalue -> int -> unit
-                             = "llvm_set_param_alignment"
+val set_param_alignment : llvalue -> int -> unit
+
 
 (** {7 Operations on basic blocks} *)
 
 (** [basic_blocks fn] returns the basic blocks of the function [f].
     See the method [llvm::Function::getBasicBlockList]. *)
-external basic_blocks : llvalue -> llbasicblock array = "llvm_basic_blocks"
+val basic_blocks : llvalue -> llbasicblock array
 
 (** [entry_block fn] returns the entry basic block of the function [f].
     See the method [llvm::Function::getEntryBlock]. *)
-external entry_block : llvalue -> llbasicblock = "LLVMGetEntryBasicBlock"
+val entry_block : llvalue -> llbasicblock
 
 (** [delete_block bb] deletes the basic block [bb].
     See the method [llvm::BasicBlock::eraseFromParent]. *)
-external delete_block : llbasicblock -> unit = "llvm_delete_block"
+val delete_block : llbasicblock -> unit
 
 (** [append_block c name f] creates a new basic block named [name] at the end of
     function [f] in the context [c].
     See the constructor of [llvm::BasicBlock]. *)
-external append_block : llcontext -> string -> llvalue -> llbasicblock
-                      = "llvm_append_block"
+val append_block : llcontext -> string -> llvalue -> llbasicblock
+
 
 (** [insert_block c name bb] creates a new basic block named [name] before the
     basic block [bb] in the context [c].
     See the constructor of [llvm::BasicBlock]. *)
-external insert_block : llcontext -> string -> llbasicblock -> llbasicblock
-                      = "llvm_insert_block"
+val insert_block : llcontext -> string -> llbasicblock -> llbasicblock
+
 
 (** [block_parent bb] returns the parent function that owns the basic block.
     See the method [llvm::BasicBlock::getParent]. *)
-external block_parent : llbasicblock -> llvalue = "LLVMGetBasicBlockParent"
+val block_parent : llbasicblock -> llvalue
 
 (** [block_begin f] returns the first position in the basic block list of the
     function [f]. [block_begin] and [block_succ] can be used to iterate over
     the basic block list in order.
     See the method [llvm::Function::begin]. *)
-external block_begin : llvalue -> (llvalue, llbasicblock) llpos
-                     = "llvm_block_begin"
+val block_begin : llvalue -> (llvalue, llbasicblock) llpos
+
 
 (** [block_succ bb] returns the basic block list position succeeding
     [Before bb].
     See the method [llvm::Function::iterator::operator++]. *)
-external block_succ : llbasicblock -> (llvalue, llbasicblock) llpos
-                    = "llvm_block_succ"
+val block_succ : llbasicblock -> (llvalue, llbasicblock) llpos
+
 
 (** [iter_blocks f fn] applies function [f] to each of the basic blocks
     of function [fn] in order. Tail recursive. *)
@@ -1400,13 +1400,13 @@ val fold_left_blocks : ('a -> llbasicblock -> 'a) -> 'a -> llvalue -> 'a
     the function [f]. [block_end] and [block_pred] can be used to iterate
     over the basic block list in reverse.
     See the method [llvm::Function::end]. *)
-external block_end : llvalue -> (llvalue, llbasicblock) llrev_pos
-                   = "llvm_block_end"
+val block_end : llvalue -> (llvalue, llbasicblock) llrev_pos
+
 
 (** [block_pred gv] returns the function list position preceding [After gv].
     See the method [llvm::Function::iterator::operator--]. *)
-external block_pred : llbasicblock -> (llvalue, llbasicblock) llrev_pos
-                    = "llvm_block_pred"
+val block_pred : llbasicblock -> (llvalue, llbasicblock) llrev_pos
+
 
 (** [rev_iter_blocks f fn] applies function [f] to each of the basic blocks
     of function [fn] in reverse order. Tail recursive. *)
@@ -1417,34 +1417,34 @@ val rev_iter_blocks : (llbasicblock -> unit) -> llvalue -> unit
 val fold_right_blocks : (llbasicblock -> 'a -> 'a) -> llvalue -> 'a -> 'a
 
 (** [value_of_block bb] losslessly casts [bb] to an [llvalue]. *)
-external value_of_block : llbasicblock -> llvalue = "LLVMBasicBlockAsValue"
+val value_of_block : llbasicblock -> llvalue
 
 (** [value_is_block v] returns [true] if the value [v] is a basic block and
     [false] otherwise.
     Similar to [llvm::isa<BasicBlock>]. *)
-external value_is_block : llvalue -> bool = "llvm_value_is_block"
+val value_is_block : llvalue -> bool
 
 (** [block_of_value v] losslessly casts [v] to an [llbasicblock]. *)
-external block_of_value : llvalue -> llbasicblock = "LLVMValueAsBasicBlock"
+val block_of_value : llvalue -> llbasicblock
 
 
 (** {7 Operations on instructions} *)
 
 (** [instr_parent i] is the enclosing basic block of the instruction [i].
     See the method [llvm::Instruction::getParent]. *)
-external instr_parent : llvalue -> llbasicblock = "LLVMGetInstructionParent"
+val instr_parent : llvalue -> llbasicblock
 
 (** [instr_begin bb] returns the first position in the instruction list of the
     basic block [bb]. [instr_begin] and [instr_succ] can be used to iterate over
     the instruction list in order.
     See the method [llvm::BasicBlock::begin]. *)
-external instr_begin : llbasicblock -> (llbasicblock, llvalue) llpos
-                     = "llvm_instr_begin"
+val instr_begin : llbasicblock -> (llbasicblock, llvalue) llpos
+
 
 (** [instr_succ i] returns the instruction list position succeeding [Before i].
     See the method [llvm::BasicBlock::iterator::operator++]. *)
-external instr_succ : llvalue -> (llbasicblock, llvalue) llpos
-                     = "llvm_instr_succ"
+val instr_succ : llvalue -> (llbasicblock, llvalue) llpos
+
 
 (** [iter_instrs f bb] applies function [f] to each of the instructions of basic
     block [bb] in order. Tail recursive. *)
@@ -1458,13 +1458,13 @@ val fold_left_instrs: ('a -> llvalue -> 'a) -> 'a -> llbasicblock -> 'a
     basic block [bb]. [instr_end] and [instr_pred] can be used to iterate over
     the instruction list in reverse.
     See the method [llvm::BasicBlock::end]. *)
-external instr_end : llbasicblock -> (llbasicblock, llvalue) llrev_pos
-                     = "llvm_instr_end"
+val instr_end : llbasicblock -> (llbasicblock, llvalue) llrev_pos
+
 
 (** [instr_pred i] returns the instruction list position preceding [After i].
     See the method [llvm::BasicBlock::iterator::operator--]. *)
-external instr_pred : llvalue -> (llbasicblock, llvalue) llrev_pos
-                     = "llvm_instr_pred"
+val instr_pred : llvalue -> (llbasicblock, llvalue) llrev_pos
+
 
 (** [fold_right_instrs f bb init] is [f (... (f init fN) ...) f1] where
     [f1,...,fN] are the instructions of basic block [bb]. Tail recursive. *)
@@ -1477,16 +1477,16 @@ val fold_right_instrs: (llvalue -> 'a -> 'a) -> llbasicblock -> 'a -> 'a
     instruction [ci], which may be one of the values from the module
     {!CallConv}. See the method [llvm::CallInst::getCallingConv] and
     [llvm::InvokeInst::getCallingConv]. *)
-external instruction_call_conv: llvalue -> int
-                              = "llvm_instruction_call_conv"
+val instruction_call_conv: llvalue -> int
+
 
 (** [set_instruction_call_conv cc ci] sets the calling convention for the call
     or invoke instruction [ci] to the integer [cc], which can be one of the
     values from the module {!CallConv}.
     See the method [llvm::CallInst::setCallingConv]
     and [llvm::InvokeInst::setCallingConv]. *)
-external set_instruction_call_conv: int -> llvalue -> unit
-                                  = "llvm_set_instruction_call_conv"
+val set_instruction_call_conv: int -> llvalue -> unit
+
 
 (** [add_instruction_param_attr ci i a] adds attribute [a] to the [i]th
     parameter of the call or invoke instruction [ci]. [i]=0 denotes the return
@@ -1503,23 +1503,23 @@ val remove_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
 (** [is_tail_call ci] is [true] if the call instruction [ci] is flagged as
     eligible for tail call optimization, [false] otherwise.
     See the method [llvm::CallInst::isTailCall]. *)
-external is_tail_call : llvalue -> bool = "llvm_is_tail_call"
+val is_tail_call : llvalue -> bool
 
 (** [set_tail_call tc ci] flags the call instruction [ci] as eligible for tail
     call optimization if [tc] is [true], clears otherwise.
     See the method [llvm::CallInst::setTailCall]. *)
-external set_tail_call : bool -> llvalue -> unit = "llvm_set_tail_call"
+val set_tail_call : bool -> llvalue -> unit
 
 (** {7 Operations on phi nodes} *)
 
 (** [add_incoming (v, bb) pn] adds the value [v] to the phi node [pn] for use
     with branches from [bb]. See the method [llvm::PHINode::addIncoming]. *)
-external add_incoming : (llvalue * llbasicblock) -> llvalue -> unit
-                      = "llvm_add_incoming"
+val add_incoming : (llvalue * llbasicblock) -> llvalue -> unit
+
 
 (** [incoming pn] returns the list of value-block pairs for phi node [pn].
     See the method [llvm::PHINode::getIncomingValue]. *)
-external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming"
+val incoming : llvalue -> (llvalue * llbasicblock) list
 
 
 
@@ -1529,7 +1529,7 @@ external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming"
     the context [context]. It is invalid to use this builder until its position
     is set with {!position_before} or {!position_at_end}. See the constructor
     for [llvm::LLVMBuilder]. *)
-external builder : llcontext -> llbuilder = "llvm_builder"
+val builder : llcontext -> llbuilder
 
 (** [builder_at ip] creates an instruction builder positioned at [ip].
     See the constructor for [llvm::LLVMBuilder]. *)
@@ -1546,8 +1546,8 @@ val builder_at_end : llcontext -> llbasicblock -> llbuilder
 (** [position_builder ip bb] moves the instruction builder [bb] to the position
     [ip].
     See the constructor for [llvm::LLVMBuilder]. *)
-external position_builder : (llbasicblock, llvalue) llpos -> llbuilder -> unit
-                          = "llvm_position_builder"
+val position_builder : (llbasicblock, llvalue) llpos -> llbuilder -> unit
+
 
 (** [position_before ins b] moves the instruction builder [b] to before the
     instruction [isn]. See the method [llvm::LLVMBuilder::SetInsertPoint]. *)
@@ -1561,38 +1561,38 @@ val position_at_end : llbasicblock -> llbuilder -> unit
     positioned to insert into. Raises [Not_Found] if the instruction builder is
     uninitialized.
     See the method [llvm::LLVMBuilder::GetInsertBlock]. *)
-external insertion_block : llbuilder -> llbasicblock = "llvm_insertion_block"
+val insertion_block : llbuilder -> llbasicblock
 
 (** [insert_into_builder i name b] inserts the specified instruction [i] at the
     position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::Insert]. *)
-external insert_into_builder : llvalue -> string -> llbuilder -> unit
-                             = "llvm_insert_into_builder"
+val insert_into_builder : llvalue -> string -> llbuilder -> unit
+
 
 (** {7 Metadata} *)
 
 (** [set_current_debug_location b md] sets the current debug location [md] in
     the builder [b].
     See the method [llvm::IRBuilder::SetDebugLocation]. *)
-external set_current_debug_location : llbuilder -> llvalue -> unit
-                                    = "llvm_set_current_debug_location"
+val set_current_debug_location : llbuilder -> llvalue -> unit
+
 
 (** [clear_current_debug_location b] clears the current debug location in the
     builder [b]. *)
-external clear_current_debug_location : llbuilder -> unit
-                                      = "llvm_clear_current_debug_location"
+val clear_current_debug_location : llbuilder -> unit
+
 
 (** [current_debug_location b] returns the current debug location, or None
     if none is currently set.
     See the method [llvm::IRBuilder::GetDebugLocation]. *)
-external current_debug_location : llbuilder -> llvalue option
-                                = "llvm_current_debug_location"
+val current_debug_location : llbuilder -> llvalue option
+
 
 (** [set_inst_debug_location b i] sets the current debug location of the builder
     [b] to the instruction [i].
     See the method [llvm::IRBuilder::SetInstDebugLocation]. *)
-external set_inst_debug_location : llbuilder -> llvalue -> unit
-                                 = "llvm_set_inst_debug_location"
+val set_inst_debug_location : llbuilder -> llvalue -> unit
+
 
 (** {7 Terminators} *)
 
@@ -1600,81 +1600,81 @@ external set_inst_debug_location : llbuilder -> llvalue -> unit
     [ret void]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateRetVoid]. *)
-external build_ret_void : llbuilder -> llvalue = "llvm_build_ret_void"
+val build_ret_void : llbuilder -> llvalue
 
 (** [build_ret v b] creates a
     [ret %v]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateRet]. *)
-external build_ret : llvalue -> llbuilder -> llvalue = "llvm_build_ret"
+val build_ret : llvalue -> llbuilder -> llvalue
 
 (** [build_aggregate_ret vs b] creates a
     [ret {...} { %v1, %v2, ... } ]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateAggregateRet]. *)
-external build_aggregate_ret : llvalue array -> llbuilder -> llvalue
-                             = "llvm_build_aggregate_ret"
+val build_aggregate_ret : llvalue array -> llbuilder -> llvalue
+
 
 (** [build_br bb b] creates a
     [br %bb]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateBr]. *)
-external build_br : llbasicblock -> llbuilder -> llvalue = "llvm_build_br"
+val build_br : llbasicblock -> llbuilder -> llvalue
 
 (** [build_cond_br cond tbb fbb b] creates a
     [br %cond, %tbb, %fbb]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateCondBr]. *)
-external build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llbuilder ->
-                         llvalue = "llvm_build_cond_br"
+val build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llbuilder ->
+                         llvalue
 
 (** [build_switch case elsebb count b] creates an empty
     [switch %case, %elsebb]
     instruction at the position specified by the instruction builder [b] with
     space reserved for [count] cases.
     See the method [llvm::LLVMBuilder::CreateSwitch]. *)
-external build_switch : llvalue -> llbasicblock -> int -> llbuilder -> llvalue
-                      = "llvm_build_switch"
+val build_switch : llvalue -> llbasicblock -> int -> llbuilder -> llvalue
+
 
 (** [add_case sw onval bb] causes switch instruction [sw] to branch to [bb]
     when its input matches the constant [onval].
     See the method [llvm::SwitchInst::addCase]. **)
-external add_case : llvalue -> llvalue -> llbasicblock -> unit
-                  = "llvm_add_case"
+val add_case : llvalue -> llvalue -> llbasicblock -> unit
+
 
 (** [build_indirect_br addr count b] creates a
     [indirectbr %addr]
     instruction at the position specified by the instruction builder [b] with
     space reserved for [count] destinations.
     See the method [llvm::LLVMBuilder::CreateIndirectBr]. *)
-external build_indirect_br : llvalue -> int -> llbuilder -> llvalue
-                           = "llvm_build_indirect_br"
+val build_indirect_br : llvalue -> int -> llbuilder -> llvalue
+
 
 (** [add_destination br bb] adds the basic block [bb] as a possible branch
     location for the indirectbr instruction [br].
     See the method [llvm::IndirectBrInst::addDestination]. **)
-external add_destination : llvalue -> llbasicblock -> unit
-                         = "llvm_add_destination"
+val add_destination : llvalue -> llbasicblock -> unit
+
 
 (** [build_invoke fn args tobb unwindbb name b] creates an
     [%name = invoke %fn(args) to %tobb unwind %unwindbb]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateInvoke]. *)
-external build_invoke : llvalue -> llvalue array -> llbasicblock ->
+val build_invoke : llvalue -> llvalue array -> llbasicblock ->
                         llbasicblock -> string -> llbuilder -> llvalue
-                      = "llvm_build_invoke_bc" "llvm_build_invoke_nat"
+
 
 (** [build_unwind b] creates an
     [unwind]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateUnwind]. *)
-external build_unwind : llbuilder -> llvalue = "llvm_build_unwind"
+val build_unwind : llbuilder -> llvalue
 
 (** [build_unreachable b] creates an
     [unreachable]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateUnwind]. *)
-external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable"
+val build_unreachable : llbuilder -> llvalue
 
 
 (** {7 Arithmetic} *)
@@ -1683,216 +1683,216 @@ external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable"
     [%name = add %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateAdd]. *)
-external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_add"
+val build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nsw_add x y name b] creates a
     [%name = nsw add %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateNSWAdd]. *)
-external build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                      = "llvm_build_nsw_add"
+val build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nuw_add x y name b] creates a
     [%name = nuw add %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateNUWAdd]. *)
-external build_nuw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                      = "llvm_build_nuw_add"
+val build_nuw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_fadd x y name b] creates a
     [%name = fadd %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFAdd]. *)
-external build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_fadd"
+val build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_sub x y name b] creates a
     [%name = sub %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSub]. *)
-external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_sub"
+val build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nsw_sub x y name b] creates a
     [%name = nsw sub %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateNSWSub]. *)
-external build_nsw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_nsw_sub"
+val build_nsw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nuw_sub x y name b] creates a
     [%name = nuw sub %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateNUWSub]. *)
-external build_nuw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_nuw_sub"
+val build_nuw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_fsub x y name b] creates a
     [%name = fsub %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFSub]. *)
-external build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_fsub"
+val build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_mul x y name b] creates a
     [%name = mul %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateMul]. *)
-external build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_mul"
+val build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nsw_mul x y name b] creates a
     [%name = nsw mul %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateNSWMul]. *)
-external build_nsw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_nsw_mul"
+val build_nsw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nuw_mul x y name b] creates a
     [%name = nuw mul %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateNUWMul]. *)
-external build_nuw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_nuw_mul"
+val build_nuw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_fmul x y name b] creates a
     [%name = fmul %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFMul]. *)
-external build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_fmul"
+val build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_udiv x y name b] creates a
     [%name = udiv %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateUDiv]. *)
-external build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_udiv"
+val build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_sdiv x y name b] creates a
     [%name = sdiv %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSDiv]. *)
-external build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_sdiv"
+val build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_exact_sdiv x y name b] creates a
     [%name = exact sdiv %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateExactSDiv]. *)
-external build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                          = "llvm_build_exact_sdiv"
+val build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_fdiv x y name b] creates a
     [%name = fdiv %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFDiv]. *)
-external build_fdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_fdiv"
+val build_fdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_urem x y name b] creates a
     [%name = urem %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateURem]. *)
-external build_urem : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_urem"
+val build_urem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_SRem x y name b] creates a
     [%name = srem %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSRem]. *)
-external build_srem : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_srem"
+val build_srem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_frem x y name b] creates a
     [%name = frem %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFRem]. *)
-external build_frem : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_frem"
+val build_frem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_shl x y name b] creates a
     [%name = shl %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateShl]. *)
-external build_shl : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_shl"
+val build_shl : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_lshr x y name b] creates a
     [%name = lshr %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateLShr]. *)
-external build_lshr : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_lshr"
+val build_lshr : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_ashr x y name b] creates a
     [%name = ashr %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateAShr]. *)
-external build_ashr : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_ashr"
+val build_ashr : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_and x y name b] creates a
     [%name = and %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateAnd]. *)
-external build_and : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_and"
+val build_and : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_or x y name b] creates a
     [%name = or %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateOr]. *)
-external build_or : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                  = "llvm_build_or"
+val build_or : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_xor x y name b] creates a
     [%name = xor %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateXor]. *)
-external build_xor : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_xor"
+val build_xor : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_neg x name b] creates a
     [%name = sub 0, %x]
     instruction at the position specified by the instruction builder [b].
     [-0.0] is used for floating point types to compute the correct sign.
     See the method [llvm::LLVMBuilder::CreateNeg]. *)
-external build_neg : llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_neg"
+val build_neg : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nsw_neg x name b] creates a
     [%name = nsw sub 0, %x]
     instruction at the position specified by the instruction builder [b].
     [-0.0] is used for floating point types to compute the correct sign.
     See the method [llvm::LLVMBuilder::CreateNeg]. *)
-external build_nsw_neg : llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_nsw_neg"
+val build_nsw_neg : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_nuw_neg x name b] creates a
     [%name = nuw sub 0, %x]
     instruction at the position specified by the instruction builder [b].
     [-0.0] is used for floating point types to compute the correct sign.
     See the method [llvm::LLVMBuilder::CreateNeg]. *)
-external build_nuw_neg : llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_nuw_neg"
+val build_nuw_neg : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_fneg x name b] creates a
     [%name = fsub 0, %x]
     instruction at the position specified by the instruction builder [b].
     [-0.0] is used for floating point types to compute the correct sign.
     See the method [llvm::LLVMBuilder::CreateFNeg]. *)
-external build_fneg : llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_fneg"
+val build_fneg : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_xor x name b] creates a
     [%name = xor %x, -1]
     instruction at the position specified by the instruction builder [b].
     [-1] is the correct "all ones" value for the type of [x].
     See the method [llvm::LLVMBuilder::CreateXor]. *)
-external build_not : llvalue -> string -> llbuilder -> llvalue
-                   = "llvm_build_not"
+val build_not : llvalue -> string -> llbuilder -> llvalue
+
 
 
 (** {7 Memory} *)
@@ -1901,63 +1901,63 @@ external build_not : llvalue -> string -> llbuilder -> llvalue
     [%name = alloca %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateAlloca]. *)
-external build_alloca : lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_alloca"
+val build_alloca : lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_array_alloca ty n name b] creates a
     [%name = alloca %ty, %n]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateAlloca]. *)
-external build_array_alloca : lltype -> llvalue -> string -> llbuilder ->
-                              llvalue = "llvm_build_array_alloca"
+val build_array_alloca : lltype -> llvalue -> string -> llbuilder ->
+                              llvalue
 
 (** [build_load v name b] creates a
     [%name = load %v]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateLoad]. *)
-external build_load : llvalue -> string -> llbuilder -> llvalue
-                    = "llvm_build_load"
+val build_load : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_store v p b] creates a
     [store %v, %p]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateStore]. *)
-external build_store : llvalue -> llvalue -> llbuilder -> llvalue
-                     = "llvm_build_store"
+val build_store : llvalue -> llvalue -> llbuilder -> llvalue
+
 
 (** [build_gep p indices name b] creates a
     [%name = getelementptr %p, indices...]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateGetElementPtr]. *)
-external build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue
-                   = "llvm_build_gep"
+val build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue
+
 
 (** [build_in_bounds_gep p indices name b] creates a
     [%name = gelementptr inbounds %p, indices...]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateInBoundsGetElementPtr]. *)
-external build_in_bounds_gep : llvalue -> llvalue array -> string -> llbuilder ->
-                               llvalue = "llvm_build_in_bounds_gep"
+val build_in_bounds_gep : llvalue -> llvalue array -> string -> llbuilder ->
+                               llvalue
 
 (** [build_struct_gep p idx name b] creates a
     [%name = getelementptr %p, 0, idx]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateStructGetElementPtr]. *)
-external build_struct_gep : llvalue -> int -> string -> llbuilder ->
-                            llvalue = "llvm_build_struct_gep"
+val build_struct_gep : llvalue -> int -> string -> llbuilder ->
+                            llvalue
 
 (** [build_global_string str name b] creates a series of instructions that adds
     a global string at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateGlobalString]. *)
-external build_global_string : string -> string -> llbuilder -> llvalue
-                             = "llvm_build_global_string"
+val build_global_string : string -> string -> llbuilder -> llvalue
+
 
 (** [build_global_stringptr str name b] creates a series of instructions that
     adds a global string pointer at the position specified by the instruction
     builder [b].
     See the method [llvm::LLVMBuilder::CreateGlobalStringPtr]. *)
-external build_global_stringptr : string -> string -> llbuilder -> llvalue
-                                = "llvm_build_global_stringptr"
+val build_global_stringptr : string -> string -> llbuilder -> llvalue
+
 
 
 (** {7 Casts} *)
@@ -1966,121 +1966,121 @@ external build_global_stringptr : string -> string -> llbuilder -> llvalue
     [%name = trunc %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateTrunc]. *)
-external build_trunc : llvalue -> lltype -> string -> llbuilder -> llvalue
-                     = "llvm_build_trunc"
+val build_trunc : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_zext v ty name b] creates a
     [%name = zext %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateZExt]. *)
-external build_zext : llvalue -> lltype -> string -> llbuilder -> llvalue
-                    = "llvm_build_zext"
+val build_zext : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_sext v ty name b] creates a
     [%name = sext %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSExt]. *)
-external build_sext : llvalue -> lltype -> string -> llbuilder -> llvalue
-                    = "llvm_build_sext"
+val build_sext : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_fptoui v ty name b] creates a
     [%name = fptoui %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFPToUI]. *)
-external build_fptoui : llvalue -> lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_fptoui"
+val build_fptoui : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_fptosi v ty name b] creates a
     [%name = fptosi %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFPToSI]. *)
-external build_fptosi : llvalue -> lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_fptosi"
+val build_fptosi : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_uitofp v ty name b] creates a
     [%name = uitofp %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateUIToFP]. *)
-external build_uitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_uitofp"
+val build_uitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_sitofp v ty name b] creates a
     [%name = sitofp %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSIToFP]. *)
-external build_sitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_sitofp"
+val build_sitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_fptrunc v ty name b] creates a
     [%name = fptrunc %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFPTrunc]. *)
-external build_fptrunc : llvalue -> lltype -> string -> llbuilder -> llvalue
-                       = "llvm_build_fptrunc"
+val build_fptrunc : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_fpext v ty name b] creates a
     [%name = fpext %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFPExt]. *)
-external build_fpext : llvalue -> lltype -> string -> llbuilder -> llvalue
-                     = "llvm_build_fpext"
+val build_fpext : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_ptrtoint v ty name b] creates a
     [%name = prtotint %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreatePtrToInt]. *)
-external build_ptrtoint : llvalue -> lltype -> string -> llbuilder -> llvalue
-                        = "llvm_build_prttoint"
+val build_ptrtoint : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_inttoptr v ty name b] creates a
     [%name = inttoptr %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateIntToPtr]. *)
-external build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue
-                        = "llvm_build_inttoptr"
+val build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_bitcast v ty name b] creates a
     [%name = bitcast %p to %ty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateBitCast]. *)
-external build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue
-                       = "llvm_build_bitcast"
+val build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_zext_or_bitcast v ty name b] creates a zext or bitcast
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *)
-external build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
-                                 llvalue = "llvm_build_zext_or_bitcast"
+val build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue
 
 (** [build_sext_or_bitcast v ty name b] creates a sext or bitcast
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSExtOrBitCast]. *)
-external build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
-                                 llvalue = "llvm_build_sext_or_bitcast"
+val build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue
 
 (** [build_trunc_or_bitcast v ty name b] creates a trunc or bitcast
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *)
-external build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
-                                  llvalue = "llvm_build_trunc_or_bitcast"
+val build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                  llvalue
 
 (** [build_pointercast v ty name b] creates a bitcast or pointer-to-int
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreatePointerCast]. *)
-external build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue
-                           = "llvm_build_pointercast"
+val build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_intcast v ty name b] creates a zext, bitcast, or trunc
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateIntCast]. *)
-external build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue
-                       = "llvm_build_intcast"
+val build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_fpcast v ty name b] creates a fpext, bitcast, or fptrunc
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFPCast]. *)
-external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_fpcast"
+val build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 
 (** {7 Comparisons} *)
@@ -2089,15 +2089,15 @@ external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
     [%name = icmp %pred %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateICmp]. *)
-external build_icmp : Icmp.t -> llvalue -> llvalue -> string ->
-                      llbuilder -> llvalue = "llvm_build_icmp"
+val build_icmp : Icmp.t -> llvalue -> llvalue -> string ->
+                      llbuilder -> llvalue
 
 (** [build_fcmp pred x y name b] creates a
     [%name = fcmp %pred %x, %y]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateFCmp]. *)
-external build_fcmp : Fcmp.t -> llvalue -> llvalue -> string ->
-                      llbuilder -> llvalue = "llvm_build_fcmp"
+val build_fcmp : Fcmp.t -> llvalue -> llvalue -> string ->
+                      llbuilder -> llvalue
 
 
 (** {7 Miscellaneous instructions} *)
@@ -2107,85 +2107,85 @@ external build_fcmp : Fcmp.t -> llvalue -> llvalue -> string ->
     instruction at the position specified by the instruction builder [b].
     [incoming] is a list of [(llvalue, llbasicblock)] tuples.
     See the method [llvm::LLVMBuilder::CreatePHI]. *)
-external build_phi : (llvalue * llbasicblock) list -> string -> llbuilder ->
-                     llvalue = "llvm_build_phi"
+val build_phi : (llvalue * llbasicblock) list -> string -> llbuilder ->
+                     llvalue
 
 (** [build_call fn args name b] creates a
     [%name = call %fn(args...)]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateCall]. *)
-external build_call : llvalue -> llvalue array -> string -> llbuilder -> llvalue
-                    = "llvm_build_call"
+val build_call : llvalue -> llvalue array -> string -> llbuilder -> llvalue
+
 
 (** [build_select cond thenv elsev name b] creates a
     [%name = select %cond, %thenv, %elsev]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateSelect]. *)
-external build_select : llvalue -> llvalue -> llvalue -> string -> llbuilder ->
-                        llvalue = "llvm_build_select"
+val build_select : llvalue -> llvalue -> llvalue -> string -> llbuilder ->
+                        llvalue
 
 (** [build_va_arg valist argty name b] creates a
     [%name = va_arg %valist, %argty]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateVAArg]. *)
-external build_va_arg : llvalue -> lltype -> string -> llbuilder -> llvalue
-                      = "llvm_build_va_arg"
+val build_va_arg : llvalue -> lltype -> string -> llbuilder -> llvalue
+
 
 (** [build_extractelement vec i name b] creates a
     [%name = extractelement %vec, %i]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateExtractElement]. *)
-external build_extractelement : llvalue -> llvalue -> string -> llbuilder ->
-                                llvalue = "llvm_build_extractelement"
+val build_extractelement : llvalue -> llvalue -> string -> llbuilder ->
+                                llvalue
 
 (** [build_insertelement vec elt i name b] creates a
     [%name = insertelement %vec, %elt, %i]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateInsertElement]. *)
-external build_insertelement : llvalue -> llvalue -> llvalue -> string ->
-                               llbuilder -> llvalue = "llvm_build_insertelement"
+val build_insertelement : llvalue -> llvalue -> llvalue -> string ->
+                               llbuilder -> llvalue
 
 (** [build_shufflevector veca vecb mask name b] creates a
     [%name = shufflevector %veca, %vecb, %mask]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateShuffleVector]. *)
-external build_shufflevector : llvalue -> llvalue -> llvalue -> string ->
-                               llbuilder -> llvalue = "llvm_build_shufflevector"
+val build_shufflevector : llvalue -> llvalue -> llvalue -> string ->
+                               llbuilder -> llvalue
 
 (** [build_insertvalue agg idx name b] creates a
     [%name = extractvalue %agg, %idx]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateExtractValue]. *)
-external build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue
-                            = "llvm_build_extractvalue"
+val build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue
+
 
 (** [build_insertvalue agg val idx name b] creates a
     [%name = insertvalue %agg, %val, %idx]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateInsertValue]. *)
-external build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder ->
-                             llvalue = "llvm_build_insertvalue"
+val build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder ->
+                             llvalue
 
 (** [build_is_null val name b] creates a
     [%name = icmp eq %val, null]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateIsNull]. *)
-external build_is_null : llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_is_null"
+val build_is_null : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_is_not_null val name b] creates a
     [%name = icmp ne %val, null]
     instruction at the position specified by the instruction builder [b].
     See the method [llvm::LLVMBuilder::CreateIsNotNull]. *)
-external build_is_not_null : llvalue -> string -> llbuilder -> llvalue
-                           = "llvm_build_is_not_null"
+val build_is_not_null : llvalue -> string -> llbuilder -> llvalue
+
 
 (** [build_ptrdiff lhs rhs name b] creates a series of instructions that measure
     the difference between two pointer values at the position specified by the
     instruction builder [b].
     See the method [llvm::LLVMBuilder::CreatePtrDiff]. *)
-external build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue
-                       = "llvm_build_ptrdiff"
+val build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
 
 
 (** {6 Memory buffers} *)
@@ -2194,14 +2194,14 @@ module MemoryBuffer : sig
   (** [of_file p] is the memory buffer containing the contents of the file at
       path [p]. If the file could not be read, then [IoError msg] is
       raised. *)
-  external of_file : string -> llmemorybuffer = "llvm_memorybuffer_of_file"
+  val of_file : string -> llmemorybuffer
   
   (** [stdin ()] is the memory buffer containing the contents of standard input.
       If standard input is empty, then [IoError msg] is raised. *)
-  external of_stdin : unit -> llmemorybuffer = "llvm_memorybuffer_of_stdin"
+  val of_stdin : unit -> llmemorybuffer
   
   (** Disposes of a memory buffer. *)
-  external dispose : llmemorybuffer -> unit = "llvm_memorybuffer_dispose"
+  val dispose : llmemorybuffer -> unit
 end
 
 
@@ -2216,44 +2216,44 @@ module PassManager : sig
       type of pipeline is suitable for link-time optimization and whole-module
       transformations.
       See the constructor of [llvm::PassManager]. *)
-  external create : unit -> [ `Module ] t = "llvm_passmanager_create"
+  val create : unit -> [ `Module ] t
   
   (** [PassManager.create_function m] constructs a new function-by-function
       pass pipeline over the module [m]. It does not take ownership of [m].
       This type of pipeline is suitable for code generation and JIT compilation
       tasks.
       See the constructor of [llvm::FunctionPassManager]. *)
-  external create_function : llmodule -> [ `Function ] t
-                           = "LLVMCreateFunctionPassManager"
+  val create_function : llmodule -> [ `Function ] t
+
   
   (** [run_module m pm] initializes, executes on the module [m], and finalizes
       all of the passes scheduled in the pass manager [pm]. Returns [true] if
       any of the passes modified the module, [false] otherwise.
       See the [llvm::PassManager::run] method. *)
-  external run_module : llmodule -> [ `Module ] t -> bool
-                      = "llvm_passmanager_run_module"
+  val run_module : llmodule -> [ `Module ] t -> bool
+
   
   (** [initialize fpm] initializes all of the function passes scheduled in the
       function pass manager [fpm]. Returns [true] if any of the passes modified
       the module, [false] otherwise.
       See the [llvm::FunctionPassManager::doInitialization] method. *)
-  external initialize : [ `Function ] t -> bool = "llvm_passmanager_initialize"
+  val initialize : [ `Function ] t -> bool
   
   (** [run_function f fpm] executes all of the function passes scheduled in the
       function pass manager [fpm] over the function [f]. Returns [true] if any
       of the passes modified [f], [false] otherwise.
       See the [llvm::FunctionPassManager::run] method. *)
-  external run_function : llvalue -> [ `Function ] t -> bool
-                        = "llvm_passmanager_run_function"
+  val run_function : llvalue -> [ `Function ] t -> bool
+
   
   (** [finalize fpm] finalizes all of the function passes scheduled in in the
       function pass manager [fpm]. Returns [true] if any of the passes
       modified the module, [false] otherwise.
       See the [llvm::FunctionPassManager::doFinalization] method. *)
-  external finalize : [ `Function ] t -> bool = "llvm_passmanager_finalize"
+  val finalize : [ `Function ] t -> bool
   
   (** Frees the memory of a pass pipeline. For function pipelines, does not free
       the module.
       See the destructor of [llvm::BasePassManager]. *)
-  external dispose : [< any ] t -> unit = "llvm_passmanager_dispose"
+  val dispose : [< any ] t -> unit
 end
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index ef2e3d66629c..ce6cf8ea79e9 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -264,6 +264,11 @@ CAMLprim LLVMTypeRef llvm_ppc_fp128_type(LLVMContextRef Context) {
   return LLVMPPCFP128TypeInContext(Context);
 }
 
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_x86mmx_type(LLVMContextRef Context) {
+  return LLVMX86MMXTypeInContext(Context);
+}
+
 /*--... Operations on function types .......................................--*/
 
 /* lltype -> lltype array -> lltype */
diff --git a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
index 5699152b5a92..276e1182d054 100644
--- a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
+++ b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
@@ -39,9 +39,6 @@ external add_loop_unroll : [<Llvm.PassManager.any] Llvm.PassManager.t
 external add_loop_rotation : [<Llvm.PassManager.any] Llvm.PassManager.t
                              -> unit
                            = "llvm_add_loop_rotation"
-external add_loop_index_split : [<Llvm.PassManager.any] Llvm.PassManager.t
-                                -> unit
-                              = "llvm_add_loop_index_split"
 external
 add_memory_to_register_promotion : [<Llvm.PassManager.any] Llvm.PassManager.t
                                    -> unit
diff --git a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
index 9f95fbce9f89..d7162c769e43 100644
--- a/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
+++ b/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
@@ -66,11 +66,6 @@ external add_loop_rotation : [<Llvm.PassManager.any] Llvm.PassManager.t
                              -> unit
                            = "llvm_add_loop_rotation"
 
-(** See the [llvm::createLoopIndexSplitPass] function. *)
-external add_loop_index_split : [<Llvm.PassManager.any] Llvm.PassManager.t
-                                -> unit
-                              = "llvm_add_loop_index_split"
-
 (** See the [llvm::createPromoteMemoryToRegisterPass] function. *)
 external
 add_memory_to_register_promotion : [<Llvm.PassManager.any] Llvm.PassManager.t
diff --git a/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c b/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
index c20bdde5753a..df44807859c8 100644
--- a/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
+++ b/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
@@ -85,12 +85,6 @@ CAMLprim value llvm_add_loop_rotation(LLVMPassManagerRef PM) {
   return Val_unit;
 }
 
-/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
-CAMLprim value llvm_add_loop_index_split(LLVMPassManagerRef PM) {
-  LLVMAddLoopIndexSplitPass(PM);
-  return Val_unit;
-}
-
 /* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
 CAMLprim value llvm_add_memory_to_register_promotion(LLVMPassManagerRef PM) {
   LLVMAddPromoteMemoryToRegisterPass(PM);
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index f75e5dfb2656..c2fe4317b53a 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -1,8 +1,14 @@
+if( WIN32 AND NOT CYGWIN )
+  # We consider Cygwin as another Unix
+  set(PURE_WINDOWS 1)
+endif()
+
 include(CheckIncludeFile)
 include(CheckLibraryExists)
 include(CheckSymbolExists)
 include(CheckFunctionExists)
 include(CheckCXXSourceCompiles)
+include(TestBigEndian)
 
 if( UNIX AND NOT BEOS )
   # Used by check_symbol_exists:
@@ -30,6 +36,7 @@ endfunction()
 # include checks
 check_include_file(argz.h HAVE_ARGZ_H)
 check_include_file(assert.h HAVE_ASSERT_H)
+check_include_file(ctype.h HAVE_CTYPE_H)
 check_include_file(dirent.h HAVE_DIRENT_H)
 check_include_file(dl.h HAVE_DL_H)
 check_include_file(dld.h HAVE_DLD_H)
@@ -44,7 +51,7 @@ check_include_file(malloc.h HAVE_MALLOC_H)
 check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
 check_include_file(memory.h HAVE_MEMORY_H)
 check_include_file(ndir.h HAVE_NDIR_H)
-if( NOT LLVM_ON_WIN32 )
+if( NOT PURE_WINDOWS )
   check_include_file(pthread.h HAVE_PTHREAD_H)
 endif()
 check_include_file(setjmp.h HAVE_SETJMP_H)
@@ -53,6 +60,7 @@ check_include_file(stdint.h HAVE_STDINT_H)
 check_include_file(stdio.h HAVE_STDIO_H)
 check_include_file(stdlib.h HAVE_STDLIB_H)
 check_include_file(string.h HAVE_STRING_H)
+check_include_file(strings.h HAVE_STRINGS_H)
 check_include_file(sys/dir.h HAVE_SYS_DIR_H)
 check_include_file(sys/dl.h HAVE_SYS_DL_H)
 check_include_file(sys/ioctl.h HAVE_SYS_IOCTL_H)
@@ -63,15 +71,19 @@ check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
 check_include_file(sys/stat.h HAVE_SYS_STAT_H)
 check_include_file(sys/time.h HAVE_SYS_TIME_H)
 check_include_file(sys/types.h HAVE_SYS_TYPES_H)
+check_include_file(sys/uio.h HAVE_SYS_UIO_H)
 check_include_file(sys/wait.h HAVE_SYS_WAIT_H)
 check_include_file(termios.h HAVE_TERMIOS_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(utime.h HAVE_UTIME_H)
 check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
 check_include_file(windows.h HAVE_WINDOWS_H)
+check_include_file(fenv.h HAVE_FENV_H)
+check_include_file(mach/mach.h HAVE_MACH_MACH_H)
+check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H)
 
 # library checks
-if( NOT LLVM_ON_WIN32 )
+if( NOT PURE_WINDOWS )
   check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
   check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC)
   check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT)
@@ -83,6 +95,7 @@ check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
 check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE)
 check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT)
 check_function_exists(isatty HAVE_ISATTY)
+check_symbol_exists(index strings.h HAVE_INDEX)
 check_symbol_exists(isinf cmath HAVE_ISINF_IN_CMATH)
 check_symbol_exists(isinf math.h HAVE_ISINF_IN_MATH_H)
 check_symbol_exists(finite ieeefp.h HAVE_FINITE_IN_IEEEFP_H)
@@ -90,6 +103,16 @@ check_symbol_exists(isnan cmath HAVE_ISNAN_IN_CMATH)
 check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
 check_symbol_exists(ceilf math.h HAVE_CEILF)
 check_symbol_exists(floorf math.h HAVE_FLOORF)
+check_symbol_exists(fmodf math.h HAVE_FMODF)
+if( HAVE_SETJMP_H )
+  check_symbol_exists(longjmp setjmp.h HAVE_LONGJMP)
+  check_symbol_exists(setjmp setjmp.h HAVE_SETJMP)
+  check_symbol_exists(siglongjmp setjmp.h HAVE_SIGLONGJMP)
+  check_symbol_exists(sigsetjmp setjmp.h HAVE_SIGSETJMP)
+endif()
+if( HAVE_SYS_UIO_H )
+  check_symbol_exists(writev sys/uio.h HAVE_WRITEV)
+endif()
 check_symbol_exists(nearbyintf math.h HAVE_NEARBYINTF)
 check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
 check_symbol_exists(malloc_zone_statistics malloc/malloc.h
@@ -97,15 +120,78 @@ check_symbol_exists(malloc_zone_statistics malloc/malloc.h
 check_symbol_exists(mkdtemp "stdlib.h;unistd.h" HAVE_MKDTEMP)
 check_symbol_exists(mkstemp "stdlib.h;unistd.h" HAVE_MKSTEMP)
 check_symbol_exists(mktemp "stdlib.h;unistd.h" HAVE_MKTEMP)
-if( NOT LLVM_ON_WIN32 )
+check_symbol_exists(closedir "sys/types.h;dirent.h" HAVE_CLOSEDIR)
+check_symbol_exists(opendir "sys/types.h;dirent.h" HAVE_OPENDIR)
+check_symbol_exists(readdir "sys/types.h;dirent.h" HAVE_READDIR)
+check_symbol_exists(getcwd unistd.h HAVE_GETCWD)
+check_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY)
+check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT)
+check_symbol_exists(rindex strings.h HAVE_RINDEX)
+check_symbol_exists(strchr string.h HAVE_STRCHR)
+check_symbol_exists(strcmp string.h HAVE_STRCMP)
+check_symbol_exists(strdup string.h HAVE_STRDUP)
+check_symbol_exists(strrchr string.h HAVE_STRRCHR)
+if( NOT PURE_WINDOWS )
   check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
 endif()
 check_symbol_exists(sbrk unistd.h HAVE_SBRK)
+check_symbol_exists(srand48 stdlib.h HAVE_RAND48_SRAND48)
+if( HAVE_RAND48_SRAND48 )
+  check_symbol_exists(lrand48 stdlib.h HAVE_RAND48_LRAND48)
+  if( HAVE_RAND48_LRAND48 )
+    check_symbol_exists(drand48 stdlib.h HAVE_RAND48_DRAND48)
+    if( HAVE_RAND48_DRAND48 )
+      set(HAVE_RAND48 1 CACHE INTERNAL "are srand48/lrand48/drand48 available?")
+    endif()
+  endif()
+endif()
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
+check_symbol_exists(strtoq stdlib.h HAVE_STRTOQ)
 check_symbol_exists(strerror string.h HAVE_STRERROR)
 check_symbol_exists(strerror_r string.h HAVE_STRERROR_R)
-check_symbol_exists(strerror_s string.h HAVE_STRERROR_S)
+check_symbol_exists(strerror_s string.h HAVE_DECL_STRERROR_S)
+check_symbol_exists(memcpy string.h HAVE_MEMCPY)
+check_symbol_exists(memmove string.h HAVE_MEMMOVE)
 check_symbol_exists(setenv stdlib.h HAVE_SETENV)
+if( PURE_WINDOWS )
+  check_symbol_exists(_chsize_s io.h HAVE__CHSIZE_S)
+
+  check_function_exists(_alloca HAVE__ALLOCA)
+  check_function_exists(__alloca HAVE___ALLOCA)
+  check_function_exists(__chkstk HAVE___CHKSTK)
+  check_function_exists(___chkstk HAVE____CHKSTK)
+
+  check_function_exists(__ashldi3 HAVE___ASHLDI3)
+  check_function_exists(__ashrdi3 HAVE___ASHRDI3)
+  check_function_exists(__divdi3 HAVE___DIVDI3)
+  check_function_exists(__fixdfdi HAVE___FIXDFDI)
+  check_function_exists(__fixsfdi HAVE___FIXSFDI)
+  check_function_exists(__floatdidf HAVE___FLOATDIDF)
+  check_function_exists(__lshrdi3 HAVE___LSHRDI3)
+  check_function_exists(__moddi3 HAVE___MODDI3)
+  check_function_exists(__udivdi3 HAVE___UDIVDI3)
+  check_function_exists(__umoddi3 HAVE___UMODDI3)
+
+  check_function_exists(__main HAVE___MAIN)
+  check_function_exists(__cmpdi2 HAVE___CMPDI2)
+endif()
+if( HAVE_ARGZ_H )
+  check_symbol_exists(argz_append argz.h HAVE_ARGZ_APPEND)
+  check_symbol_exists(argz_create_sep argz.h HAVE_ARGZ_CREATE_SEP)
+  check_symbol_exists(argz_insert argz.h HAVE_ARGZ_INSERT)
+  check_symbol_exists(argz_next argz.h HAVE_ARGZ_NEXT)
+  check_symbol_exists(argz_stringify argz.h HAVE_ARGZ_STRINGIFY)
+endif()
+if( HAVE_DLFCN_H )
+  if( HAVE_LIBDL )
+    list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
+  endif()
+  check_symbol_exists(dlerror dlfcn.h HAVE_DLERROR)
+  check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN)
+  if( HAVE_LIBDL )
+    list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl)
+  endif()
+endif()
 
 check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
 if( LLVM_USING_GLIBC )
@@ -130,8 +216,10 @@ if (HAVE_STDINT_H)
   set(headers ${headers} "stdint.h")
 endif()
 
+check_type_exists(int64_t "${headers}" HAVE_INT64_T)
 check_type_exists(uint64_t "${headers}" HAVE_UINT64_T)
 check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T)
+check_type_exists(error_t errno.h HAVE_ERROR_T)
 
 # available programs checks
 function(llvm_find_program name)
@@ -154,16 +242,48 @@ llvm_find_program(fdp)
 llvm_find_program(dot)
 llvm_find_program(dotty)
 
+if( LLVM_ENABLE_FFI )
+  find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR})
+  if( FFI_INCLUDE_PATH )
+    set(FFI_HEADER ffi.h CACHE INTERNAL "")
+    set(HAVE_FFI_H 1 CACHE INTERNAL "")
+  else()
+    find_path(FFI_INCLUDE_PATH ffi/ffi.h PATHS ${FFI_INCLUDE_DIR})
+    if( FFI_INCLUDE_PATH )
+      set(FFI_HEADER ffi/ffi.h CACHE INTERNAL "")
+      set(HAVE_FFI_FFI_H 1 CACHE INTERNAL "")
+    endif()
+  endif()
+
+  if( NOT FFI_HEADER )
+    message(FATAL_ERROR "libffi includes are not found.")
+  endif()
+
+  find_library(FFI_LIBRARY_PATH ffi PATHS ${FFI_LIBRARY_DIR})
+  if( NOT FFI_LIBRARY_PATH )
+    message(FATAL_ERROR "libffi is not found.")
+  endif()
+
+  list(APPEND CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH})
+  list(APPEND CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH})
+  check_symbol_exists(ffi_call ${FFI_HEADER} HAVE_FFI_CALL)
+  list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH})
+  list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH})
+endif( LLVM_ENABLE_FFI )
+
 # Define LLVM_MULTITHREADED if gcc atomic builtins exists.
 include(CheckAtomic)
 
-include(CheckCXXCompilerFlag)
-# On windows all code is position-independent and mingw warns if -fPIC
-# is in the command-line.
-if( NOT WIN32 )
-  check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG)
+if( LLVM_ENABLE_PIC )
+  set(ENABLE_PIC 1)
+else()
+  set(ENABLE_PIC 0)
 endif()
 
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+
 include(GetTargetTriple)
 get_target_triple(LLVM_HOSTTRIPLE)
 
@@ -194,22 +314,20 @@ elseif (LLVM_NATIVE_ARCH MATCHES "arm")
   set(LLVM_NATIVE_ARCH ARM)
 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
   set(LLVM_NATIVE_ARCH Mips)
-elseif (LLVM_NATIVE_ARCH MATCHES "pic16")
-  set(LLVM_NATIVE_ARCH "PIC16")
 elseif (LLVM_NATIVE_ARCH MATCHES "xcore")
   set(LLVM_NATIVE_ARCH XCore)
 elseif (LLVM_NATIVE_ARCH MATCHES "msp430")
   set(LLVM_NATIVE_ARCH MSP430)
 else ()
-  message(STATUS 
+  message(STATUS
     "Unknown architecture ${LLVM_NATIVE_ARCH}; lli will not JIT code")
   set(LLVM_NATIVE_ARCH)
 endif ()
-  
+
 if (LLVM_NATIVE_ARCH)
   list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX)
   if (NATIVE_ARCH_IDX EQUAL -1)
-    message(STATUS 
+    message(STATUS
       "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code")
     set(LLVM_NATIVE_ARCH)
   else ()
@@ -263,19 +381,3 @@ else( ENABLE_THREADS )
 endif()
 
 set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
-
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
-  ${LLVM_BINARY_DIR}/include/llvm/Config/config.h
-  )
-
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake
-  ${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h
-  )
-
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/System/DataTypes.h.cmake
-  ${LLVM_BINARY_DIR}/include/llvm/System/DataTypes.h
-  )
-
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 0ecd153c6be4..dfe67cded331 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -4,11 +4,17 @@ include(LLVMConfig)
 macro(add_llvm_library name)
   llvm_process_sources( ALL_FILES ${ARGN} )
   add_library( ${name} ${ALL_FILES} )
-  set( llvm_libs ${llvm_libs} ${name} PARENT_SCOPE)
-  set( llvm_lib_targets ${llvm_lib_targets} ${name} PARENT_SCOPE )
+  set_property( GLOBAL APPEND PROPERTY LLVM_LIBS ${name} )
+  set_property( GLOBAL APPEND PROPERTY LLVM_LIB_TARGETS ${name} )
   if( LLVM_COMMON_DEPENDS )
     add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
   endif( LLVM_COMMON_DEPENDS )
+
+  if( BUILD_SHARED_LIBS )
+    get_system_libs(sl)
+    target_link_libraries( ${name} ${sl} )
+  endif()
+
   install(TARGETS ${name}
     LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
     ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
@@ -23,12 +29,20 @@ endmacro(add_llvm_library name)
 
 
 macro(add_llvm_loadable_module name)
-  if( NOT LLVM_ON_UNIX )
+  if( NOT LLVM_ON_UNIX OR CYGWIN )
     message(STATUS "Loadable modules not supported on this platform.
 ${name} ignored.")
+    # Add empty "phony" target
+    add_custom_target(${name})
   else()
     llvm_process_sources( ALL_FILES ${ARGN} )
-    add_library( ${name} MODULE ${ALL_FILES} )
+    if (MODULE)
+      set(libkind MODULE)
+    else()
+      set(libkind SHARED)
+    endif()
+
+    add_library( ${name} ${libkind} ${ALL_FILES} )
     set_target_properties( ${name} PROPERTIES PREFIX "" )
 
     if (APPLE)
@@ -60,13 +74,15 @@ macro(add_llvm_executable name)
   if( LLVM_LINK_COMPONENTS )
     llvm_config(${name} ${LLVM_LINK_COMPONENTS})
   endif( LLVM_LINK_COMPONENTS )
-  get_system_libs(llvm_system_libs)
-  if( llvm_system_libs )
-    target_link_libraries(${name} ${llvm_system_libs})
-  endif()
   if( LLVM_COMMON_DEPENDS )
     add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
   endif( LLVM_COMMON_DEPENDS )
+  if( NOT MINGW )
+    get_system_libs(llvm_system_libs)
+    if( llvm_system_libs )
+      target_link_libraries(${name} ${llvm_system_libs})
+    endif()
+  endif()
 endmacro(add_llvm_executable name)
 
 
@@ -105,5 +121,5 @@ macro(add_llvm_target target_name)
   if ( TABLEGEN_OUTPUT )
     add_dependencies(LLVM${target_name} ${target_name}Table_gen)
   endif (TABLEGEN_OUTPUT)
-  set(CURRENT_LLVM_TARGET LLVM${target_name} PARENT_SCOPE)
+  set( CURRENT_LLVM_TARGET LLVM${target_name} )
 endmacro(add_llvm_target)
diff --git a/cmake/modules/AddLLVMDefinitions.cmake b/cmake/modules/AddLLVMDefinitions.cmake
index 0f6d81f736d5..33ac9731db5d 100644
--- a/cmake/modules/AddLLVMDefinitions.cmake
+++ b/cmake/modules/AddLLVMDefinitions.cmake
@@ -1,11 +1,13 @@
-# There is no clear way of keeping track of compiler command-line
-# options chosen via `add_definitions', so we need our own method for
-# using it on tools/llvm-config/CMakeLists.txt.
-
-# Beware that there is no implementation of remove_llvm_definitions.
-
-macro(add_llvm_definitions)
-  set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${ARGN}")
-  add_definitions( ${ARGN} )
-endmacro(add_llvm_definitions)
-
+# There is no clear way of keeping track of compiler command-line
+# options chosen via `add_definitions', so we need our own method for
+# using it on tools/llvm-config/CMakeLists.txt.
+
+# Beware that there is no implementation of remove_llvm_definitions.
+
+macro(add_llvm_definitions)
+  # We don't want no semicolons on LLVM_DEFINITIONS:
+  foreach(arg ${ARGN})
+    set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}")
+  endforeach(arg)
+  add_definitions( ${ARGN} )
+endmacro(add_llvm_definitions)
diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt
index 416d7f478564..9a5566effb08 100644
--- a/cmake/modules/CMakeLists.txt
+++ b/cmake/modules/CMakeLists.txt
@@ -1,5 +1,8 @@
 set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/share/llvm/cmake")
 
+get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
+get_property(llvm_lib_targets GLOBAL PROPERTY LLVM_LIB_TARGETS)
+
 configure_file(
   LLVM.cmake
   ${llvm_cmake_builddir}/LLVM.cmake
@@ -10,3 +13,21 @@ install(FILES
   LLVMConfig.cmake
   LLVMLibDeps.cmake
   DESTINATION share/llvm/cmake)
+
+install(DIRECTORY .
+  DESTINATION share/llvm/cmake
+  FILES_MATCHING PATTERN *.cmake
+  PATTERN .svn EXCLUDE
+  PATTERN LLVM.cmake EXCLUDE
+  PATTERN LLVMConfig.cmake EXCLUDE
+  PATTERN LLVMLibDeps.cmake EXCLUDE
+  PATTERN FindBison.cmake EXCLUDE
+  PATTERN GetTargetTriple.cmake EXCLUDE
+  PATTERN VersionFromVCS.cmake EXCLUDE
+  PATTERN CheckAtomic.cmake EXCLUDE)
+
+install(FILES
+  ${llvm_cmake_builddir}/LLVM.cmake
+  LLVMConfig.cmake
+  LLVMLibDeps.cmake
+  DESTINATION share/llvm/cmake)
diff --git a/cmake/modules/CrossCompileLLVM.cmake b/cmake/modules/CrossCompileLLVM.cmake
index 138ff0e9fe65..98e60a54366f 100644
--- a/cmake/modules/CrossCompileLLVM.cmake
+++ b/cmake/modules/CrossCompileLLVM.cmake
@@ -1,26 +1,26 @@
-
-if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
-  set(CX_NATIVE_TG_DIR "${CMAKE_BINARY_DIR}/native")
-  set(LLVM_TABLEGEN_EXE "${CX_NATIVE_TG_DIR}/bin/tblgen")
-
-  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${CX_NATIVE_TG_DIR}
-    COMMENT "Creating ${CX_NATIVE_TG_DIR}...")
-
-  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}/CMakeCache.txt
-    COMMAND ${CMAKE_COMMAND} -UMAKE_TOOLCHAIN_FILE -DCMAKE_BUILD_TYPE=Release ${CMAKE_SOURCE_DIR}
-    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}
-    DEPENDS ${CX_NATIVE_TG_DIR}
-    COMMENT "Configuring native TableGen...")
-
-  add_custom_command(OUTPUT ${LLVM_TABLEGEN_EXE}
-    COMMAND ${CMAKE_BUILD_TOOL}
-    DEPENDS ${CX_NATIVE_TG_DIR}/CMakeCache.txt
-    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}/utils/TableGen
-    COMMENT "Building native TableGen...")
-  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN_EXE})
-
-  add_dependencies(tblgen NativeTableGen)
-
-  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${CX_NATIVE_TG_DIR})
-endif()
+
+if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
+  set(CX_NATIVE_TG_DIR "${CMAKE_BINARY_DIR}/native")
+  set(LLVM_TABLEGEN_EXE "${CX_NATIVE_TG_DIR}/bin/tblgen")
+
+  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${CX_NATIVE_TG_DIR}
+    COMMENT "Creating ${CX_NATIVE_TG_DIR}...")
+
+  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}/CMakeCache.txt
+    COMMAND ${CMAKE_COMMAND} -UMAKE_TOOLCHAIN_FILE -DCMAKE_BUILD_TYPE=Release ${CMAKE_SOURCE_DIR}
+    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}
+    DEPENDS ${CX_NATIVE_TG_DIR}
+    COMMENT "Configuring native TableGen...")
+
+  add_custom_command(OUTPUT ${LLVM_TABLEGEN_EXE}
+    COMMAND ${CMAKE_BUILD_TOOL}
+    DEPENDS ${CX_NATIVE_TG_DIR}/CMakeCache.txt
+    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}/utils/TableGen
+    COMMENT "Building native TableGen...")
+  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN_EXE})
+
+  add_dependencies(tblgen NativeTableGen)
+
+  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${CX_NATIVE_TG_DIR})
+endif()
diff --git a/cmake/modules/GetTargetTriple.cmake b/cmake/modules/GetTargetTriple.cmake
index ac0c00924266..f4321c9b67ec 100644
--- a/cmake/modules/GetTargetTriple.cmake
+++ b/cmake/modules/GetTargetTriple.cmake
@@ -9,7 +9,11 @@ function( get_target_triple var )
       set( value "i686-pc-win32" )
     endif()
   elseif( MINGW AND NOT MSYS )
-    set( value "i686-pc-mingw32" )
+    if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set( value "x86_64-w64-mingw32" )
+    else()
+      set( value "i686-pc-mingw32" )
+    endif()
   else( MSVC )
     set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess)
     execute_process(COMMAND sh ${config_guess}
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
new file mode 100644
index 000000000000..7ca2bd07fd53
--- /dev/null
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -0,0 +1,161 @@
+include(AddLLVMDefinitions)
+
+# Run-time build mode; It is used for unittests.
+if(MSVC_IDE)
+  # Expect "$(Configuration)", "$(OutDir)", etc.
+  # It is expanded by msbuild or similar.
+  set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
+elseif(NOT CMAKE_BUILD_TYPE STREQUAL "")
+  # Expect "Release" "Debug", etc.
+  # Or unittests could not run.
+  set(RUNTIME_BUILD_MODE ${CMAKE_BUILD_TYPE})
+else()
+  # It might be "."
+  set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
+endif()
+
+set(LIT_ARGS_DEFAULT "-sv")
+if (MSVC OR XCODE)
+  set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
+endif()
+set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}"
+    CACHE STRING "Default options for lit")
+
+if( LLVM_ENABLE_ASSERTIONS )
+  # MSVC doesn't like _DEBUG on release builds. See PR 4379.
+  if( NOT MSVC )
+    add_definitions( -D_DEBUG )
+  endif()
+  # On Release builds cmake automatically defines NDEBUG, so we
+  # explicitly undefine it:
+  if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    add_definitions( -UNDEBUG )
+  endif()
+else()
+  if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    if( NOT MSVC_IDE AND NOT XCODE )
+      add_definitions( -DNDEBUG )
+    endif()
+  endif()
+endif()
+
+if(WIN32)
+  if(CYGWIN)
+    set(LLVM_ON_WIN32 0)
+    set(LLVM_ON_UNIX 1)
+  else(CYGWIN)
+    set(LLVM_ON_WIN32 1)
+    set(LLVM_ON_UNIX 0)
+
+    # This is effective only on Win32 hosts to use gnuwin32 tools.
+    set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
+  endif(CYGWIN)
+  set(LTDL_SHLIB_EXT ".dll")
+  set(EXEEXT ".exe")
+  # Maximum path length is 160 for non-unicode paths
+  set(MAXPATHLEN 160)
+else(WIN32)
+  if(UNIX)
+    set(LLVM_ON_WIN32 0)
+    set(LLVM_ON_UNIX 1)
+    if(APPLE)
+      set(LTDL_SHLIB_EXT ".dylib")
+    else(APPLE)
+      set(LTDL_SHLIB_EXT ".so")
+    endif(APPLE)
+    set(EXEEXT "")
+    # FIXME: Maximum path length is currently set to 'safe' fixed value
+    set(MAXPATHLEN 2024)
+  else(UNIX)
+    MESSAGE(SEND_ERROR "Unable to determine platform")
+  endif(UNIX)
+endif(WIN32)
+
+if( LLVM_ENABLE_PIC )
+  if( XCODE )
+    # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
+    # know how to disable this, so just force ENABLE_PIC off for now.
+    message(WARNING "-fPIC not supported with Xcode.")
+  elseif( WIN32 )
+    # On Windows all code is PIC. MinGW warns if -fPIC is used.
+  else()
+    include(CheckCXXCompilerFlag)
+    check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG)
+    if( SUPPORTS_FPIC_FLAG )
+      message(STATUS "Building with -fPIC")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+      set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+    else( SUPPORTS_FPIC_FLAG )
+      message(WARNING "-fPIC not supported.")
+    endif()
+  endif()
+endif()
+
+if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
+  # TODO: support other platforms and toolchains.
+  option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF)
+  if( LLVM_BUILD_32_BITS )
+    message(STATUS "Building 32 bits executables and libraries.")
+    add_llvm_definitions( -m32 )
+    list(APPEND CMAKE_EXE_LINKER_FLAGS -m32)
+    list(APPEND CMAKE_SHARED_LINKER_FLAGS -m32)
+  endif( LLVM_BUILD_32_BITS )
+endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
+
+if( MSVC )
+  include(ChooseMSVCCRT)
+
+  # Add definitions that make MSVC much less annoying.
+  add_llvm_definitions(
+    # For some reason MS wants to deprecate a bunch of standard functions...
+    -D_CRT_SECURE_NO_DEPRECATE
+    -D_CRT_SECURE_NO_WARNINGS
+    -D_CRT_NONSTDC_NO_DEPRECATE
+    -D_CRT_NONSTDC_NO_WARNINGS
+    -D_SCL_SECURE_NO_DEPRECATE
+    -D_SCL_SECURE_NO_WARNINGS
+
+    -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
+    -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
+    -wd4224 # Suppress 'nonstandard extension used : formal parameter 'identifier' was previously defined as a type'
+    -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
+    -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data'
+    -wd4275 # Suppress 'An exported class was derived from a class that was not exported.'
+    -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception'
+    -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized'
+    -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized'
+    -wd4355 # Suppress ''this' : used in base member initializer list'
+    -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated'
+    -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
+    -wd4715 # Suppress ''function' : not all control paths return a value'
+    -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
+    -wd4065 # Suppress 'switch statement contains 'default' but no 'case' labels'
+
+    -w14062 # Promote "enumerator in switch of enum is not handled" to level 1 warning.
+    )
+
+  # Enable warnings
+  if (LLVM_ENABLE_WARNINGS)
+    add_llvm_definitions( /W4 /Wall )
+    if (LLVM_ENABLE_PEDANTIC)
+      # No MSVC equivalent available
+    endif (LLVM_ENABLE_PEDANTIC)
+  endif (LLVM_ENABLE_WARNINGS)
+  if (LLVM_ENABLE_WERROR)
+    add_llvm_definitions( /WX )
+  endif (LLVM_ENABLE_WERROR)
+elseif( CMAKE_COMPILER_IS_GNUCXX )
+  if (LLVM_ENABLE_WARNINGS)
+    add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings )
+    if (LLVM_ENABLE_PEDANTIC)
+      add_llvm_definitions( -pedantic -Wno-long-long )
+    endif (LLVM_ENABLE_PEDANTIC)
+  endif (LLVM_ENABLE_WARNINGS)
+  if (LLVM_ENABLE_WERROR)
+    add_llvm_definitions( -Werror )
+  endif (LLVM_ENABLE_WERROR)
+endif( MSVC )
+
+add_llvm_definitions( -D__STDC_LIMIT_MACROS )
+add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
+
diff --git a/cmake/modules/LLVM.cmake b/cmake/modules/LLVM.cmake
index 9621454f4119..d610f3e76516 100644
--- a/cmake/modules/LLVM.cmake
+++ b/cmake/modules/LLVM.cmake
@@ -1,11 +1,15 @@
 # This file provides information and services to the final user.
 
+set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@)
+
 set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
 
 set(llvm_libs @llvm_libs@)
 
 set(llvm_lib_targets @llvm_lib_targets@)
 
+set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@)
+
 set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@)
 
 set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@)
@@ -14,6 +18,13 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
 
 set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
 
+set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
+
+set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS)
+
+set(HAVE_LIBDL @HAVE_LIBDL@)
+set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD)
+
 # We try to include using the current setting of CMAKE_MODULE_PATH,
 # which suppossedly was filled by the user with the directory where
 # this file was installed:
diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVMConfig.cmake
index e5497084be84..349544edc335 100755
--- a/cmake/modules/LLVMConfig.cmake
+++ b/cmake/modules/LLVMConfig.cmake
@@ -59,6 +59,9 @@ endfunction(llvm_map_components_to_libraries)
 
 function(explicit_map_components_to_libraries out_libs)
   set( link_components ${ARGN} )
+  get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
+  string(TOUPPER "${llvm_libs}" capitalized_libs)
+  # Translate symbolic component names to real libraries:
   foreach(c ${link_components})
     # add codegen, asmprinter, asmparser, disassembler
     list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
@@ -102,39 +105,48 @@ function(explicit_map_components_to_libraries out_libs)
     elseif( c STREQUAL "all" )
       list(APPEND expanded_components ${llvm_libs})
     else( NOT idx LESS 0 )
-      list(APPEND expanded_components LLVM${c})
+      # Canonize the component name:
+      string(TOUPPER "${c}" capitalized)
+      list(FIND capitalized_libs LLVM${capitalized} lib_idx)
+      if( lib_idx LESS 0 )
+	# The component is unkown. Maybe is an ommitted target?
+	is_llvm_target_library(${c} iltl_result)
+	if( NOT iltl_result )
+	  message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.")
+	endif()
+      else( lib_idx LESS 0 )
+	list(GET llvm_libs ${lib_idx} canonical_lib)
+	list(APPEND expanded_components ${canonical_lib})
+      endif( lib_idx LESS 0 )
     endif( NOT idx LESS 0 )
   endforeach(c)
-  # We must match capitalization.
-  string(TOUPPER "${llvm_libs}" capitalized_libs)
-  list(REMOVE_DUPLICATES expanded_components)
+  # Expand dependencies while topologically sorting the list of libraries:
   list(LENGTH expanded_components lst_size)
-  set(result "")
-  while( 0 LESS ${lst_size} )
-    list(GET expanded_components 0 c)
-    string(TOUPPER "${c}" capitalized)
-    list(FIND capitalized_libs ${capitalized} idx)
-    set(add_it ON)
-    if( idx LESS 0 )
-      # The library is unkown. Maybe is an ommitted target?
-      is_llvm_target_library(${c} iltl_result)
-      if( NOT iltl_result )
-	message(FATAL_ERROR "Library ${c} not found in list of llvm libraries.")
-      endif()
-      set(add_it OFF)
-    endif( idx LESS 0 )
-    list(GET llvm_libs ${idx} canonical_lib)
-    list(REMOVE_ITEM result ${canonical_lib})
-    foreach(c ${MSVC_LIB_DEPS_${canonical_lib}})
-      list(REMOVE_ITEM expanded_components ${c})
-    endforeach()
-    if( add_it )
-      list(APPEND result ${canonical_lib})
-      list(APPEND expanded_components ${MSVC_LIB_DEPS_${canonical_lib}})
-    endif()
-    list(REMOVE_AT expanded_components 0)
+  set(cursor 0)
+  set(processed)
+  while( cursor LESS lst_size )
+    list(GET expanded_components ${cursor} lib)
+    list(APPEND expanded_components ${MSVC_LIB_DEPS_${lib}})
+    # Remove duplicates at the front:
+    list(REVERSE expanded_components)
+    list(REMOVE_DUPLICATES expanded_components)
+    list(REVERSE expanded_components)
+    list(APPEND processed ${lib})
+    # Find the maximum index that doesn't have to be re-processed:
+    while(NOT "${expanded_components}" MATCHES "^${processed}.*" )
+      list(REMOVE_AT processed -1)
+    endwhile()
+    list(LENGTH processed cursor)
     list(LENGTH expanded_components lst_size)
-  endwhile( 0 LESS ${lst_size} )
+  endwhile( cursor LESS lst_size )
+  # Return just the libraries included in this build:
+  set(result)
+  foreach(c ${expanded_components})
+    list(FIND llvm_libs ${c} lib_idx)
+    if( NOT lib_idx LESS 0 )
+      set(result ${result} ${c})
+    endif()
+  endforeach(c)
   set(${out_libs} ${result} PARENT_SCOPE)
 endfunction(explicit_map_components_to_libraries)
 
@@ -151,13 +163,13 @@ endfunction(explicit_map_components_to_libraries)
 
 # The format generated by GenLibDeps.pl
 
-# libLLVMARMAsmPrinter.a: libLLVMMC.a libLLVMSupport.a
+# LLVMARMAsmPrinter.o: LLVMARMCodeGen.o libLLVMAsmPrinter.a libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMTarget.a
 
 # is translated to:
 
-# set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMMC LLVMSupport)
+# set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
 
-# It is necessary to remove the `lib' prefix and the `.a' suffix.
+# It is necessary to remove the `lib' prefix and the `.a'.
 
 # This 'sed' script should do the trick:
 # sed -e s'#\.a##g' -e 's#libLLVM#LLVM#g' -e 's#: # #' -e 's#\(.*\)#set(MSVC_LIB_DEPS_\1)#' ~/llvm/tools/llvm-config/LibDeps.txt
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index e639b04e9800..afba85e45aa3 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -1,71 +1,68 @@
-set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMCodeGen LLVMARMInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMAsmPrinter LLVMARMInfo LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMDisassembler LLVMARMCodeGen LLVMARMInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMARMInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMMCParser LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
-set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMCodeGen LLVMCore LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMInfo LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmPrinter LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430AsmPrinter LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsAsmPrinter LLVMMipsCodeGen LLVMMipsInfo LLVMPIC16AsmPrinter LLVMPIC16CodeGen LLVMPIC16Info LLVMPowerPCAsmPrinter LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcAsmPrinter LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMSystemZAsmPrinter LLVMSystemZCodeGen LLVMSystemZInfo LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreAsmPrinter LLVMXCoreCodeGen LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeAsmPrinter LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMMCJIT LLVMExecutionEngine LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMCParser LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16CodeGen LLVMPIC16Info LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPIC16CodeGen LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMSystem )
-set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystemZInfo LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMObject LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPTXCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPTXInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPTXInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCAsmPrinter LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMSupport )
+set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMipa)
+set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMipa)
 set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMMCParser LLVMSupport LLVMTarget LLVMX86Info)
-set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86AsmPrinter LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport LLVMX86Utils)
+set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMX86AsmPrinter LLVMX86Info LLVMX86Utils)
 set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info)
-set(MSVC_LIB_DEPS_LLVMX86Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMXCoreAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMXCoreInfo)
-set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
-set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMX86Info LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMX86Utils LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
diff --git a/cmake/modules/LLVMParseArguments.cmake b/cmake/modules/LLVMParseArguments.cmake
new file mode 100644
index 000000000000..ce19be114b31
--- /dev/null
+++ b/cmake/modules/LLVMParseArguments.cmake
@@ -0,0 +1,80 @@
+# Copied from http://www.itk.org/Wiki/CMakeMacroParseArguments under
+# http://creativecommons.org/licenses/by/2.5/.
+#
+# The PARSE_ARGUMENTS macro will take the arguments of another macro and define
+# several variables. The first argument to PARSE_ARGUMENTS is a prefix to put on
+# all variables it creates. The second argument is a list of names, and the
+# third argument is a list of options. Both of these lists should be quoted. The
+# rest of PARSE_ARGUMENTS are arguments from another macro to be parsed.
+#
+# PARSE_ARGUMENTS(prefix arg_names options arg1 arg2...)
+#
+# For each item in options, PARSE_ARGUMENTS will create a variable with that
+# name, prefixed with prefix_. So, for example, if prefix is MY_MACRO and
+# options is OPTION1;OPTION2, then PARSE_ARGUMENTS will create the variables
+# MY_MACRO_OPTION1 and MY_MACRO_OPTION2. These variables will be set to true if
+# the option exists in the command line or false otherwise.
+#
+#For each item in arg_names, PARSE_ARGUMENTS will create a variable with that
+#name, prefixed with prefix_. Each variable will be filled with the arguments
+#that occur after the given arg_name is encountered up to the next arg_name or
+#the end of the arguments. All options are removed from these
+#lists. PARSE_ARGUMENTS also creates a prefix_DEFAULT_ARGS variable containing
+#the list of all arguments up to the first arg_name encountered.
+#
+#Here is a simple, albeit impractical, example of using PARSE_ARGUMENTS that
+#demonstrates its behavior.
+#
+# SET(arguments
+#     hello OPTION3 world
+#     LIST3 foo bar
+#     OPTION2
+#     LIST1 fuz baz
+#     )
+#
+# PARSE_ARGUMENTS(ARG "LIST1;LIST2;LIST3" "OPTION1;OPTION2;OPTION3" ${arguments})
+#
+# PARSE_ARGUMENTS creates 7 variables and sets them as follows:
+#   ARG_DEFAULT_ARGS: hello;world
+#   ARG_LIST1: fuz;baz
+#   ARG_LIST2:
+#   ARG_LIST3: foo;bar
+#   ARG_OPTION1: FALSE
+#   ARG_OPTION2: TRUE
+#   ARG_OPTION3: TRUE
+#
+# If you don't have any options, use an empty string in its place.
+#   PARSE_ARGUMENTS(ARG "LIST1;LIST2;LIST3" "" ${arguments})
+# Likewise if you have no lists.
+#   PARSE_ARGUMENTS(ARG "" "OPTION1;OPTION2;OPTION3" ${arguments})
+
+MACRO(PARSE_ARGUMENTS prefix arg_names option_names)
+  SET(DEFAULT_ARGS)
+  FOREACH(arg_name ${arg_names})
+    SET(${prefix}_${arg_name})
+  ENDFOREACH(arg_name)
+  FOREACH(option ${option_names})
+    SET(${prefix}_${option} FALSE)
+  ENDFOREACH(option)
+
+  SET(current_arg_name DEFAULT_ARGS)
+  SET(current_arg_list)
+  FOREACH(arg ${ARGN})
+    SET(larg_names ${arg_names})
+    LIST(FIND larg_names "${arg}" is_arg_name)
+    IF (is_arg_name GREATER -1)
+      SET(${prefix}_${current_arg_name} ${current_arg_list})
+      SET(current_arg_name ${arg})
+      SET(current_arg_list)
+    ELSE (is_arg_name GREATER -1)
+      SET(loption_names ${option_names})
+      LIST(FIND loption_names "${arg}" is_option)
+      IF (is_option GREATER -1)
+        SET(${prefix}_${arg} TRUE)
+      ELSE (is_option GREATER -1)
+        SET(current_arg_list ${current_arg_list} ${arg})
+      ENDIF (is_option GREATER -1)
+    ENDIF (is_arg_name GREATER -1)
+  ENDFOREACH(arg)
+  SET(${prefix}_${current_arg_name} ${current_arg_list})
+ENDMACRO(PARSE_ARGUMENTS)
diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake
index b753735cd55e..270292ad3b86 100644
--- a/cmake/modules/LLVMProcessSources.cmake
+++ b/cmake/modules/LLVMProcessSources.cmake
@@ -1,5 +1,22 @@
 include(AddFileDependencies)
 
+function(llvm_replace_compiler_option var old new)
+  # Replaces a compiler option or switch `old' in `var' by `new'.
+  # If `old' is not in `var', appends `new' to `var'.
+  # Example: llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
+  # If the option already is on the variable, don't add it:
+  if( "${${var}}" MATCHES "(^| )${new}($| )" )
+    set(n "")
+  else()
+    set(n "${new}")
+  endif()
+  if( "${${var}}" MATCHES "(^| )${old}($| )" )
+    string( REGEX REPLACE "(^| )${old}($| )" " ${n} " ${var} "${${var}}" )
+  else()
+    set( ${var} "${${var}} ${n}" )
+  endif()
+  set( ${var} "${${var}}" PARENT_SCOPE )
+endfunction(llvm_replace_compiler_option)
 
 macro(add_td_sources srcs)
   file(GLOB tds *.td)
@@ -12,7 +29,7 @@ endmacro(add_td_sources)
 
 
 macro(add_header_files srcs)
-  file(GLOB hds *.h)
+  file(GLOB hds *.h *.def)
   if( hds )
     set_source_files_properties(${hds} PROPERTIES HEADER_FILE_ONLY ON)
     list(APPEND ${srcs} ${hds})
@@ -36,6 +53,25 @@ function(llvm_process_sources OUT_VAR)
     add_td_sources(sources)
     add_header_files(sources)
   endif()
+
+  # Set common compiler options:
+  if( NOT LLVM_REQUIRES_EH )
+    if( CMAKE_COMPILER_IS_GNUCXX )
+      add_definitions( -fno-exceptions )
+    elseif( MSVC )
+      llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/EHsc" "/EHs-c-")
+      add_definitions( /D_HAS_EXCEPTIONS=0 )
+    endif()
+  endif()
+  if( NOT LLVM_REQUIRES_RTTI )
+    if( CMAKE_COMPILER_IS_GNUCXX )
+      llvm_replace_compiler_option(CMAKE_CXX_FLAGS "-frtti" "-fno-rtti")
+    elseif( MSVC )
+      llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/GR" "/GR-")
+    endif()
+  endif()
+
+  set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE )
   set( ${OUT_VAR} ${sources} PARENT_SCOPE )
 endfunction(llvm_process_sources)
 
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index cf7cd1f62e52..9d67137bb42a 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -12,14 +12,34 @@ macro(tablegen ofn)
     set(LLVM_TARGET_DEFINITIONS_ABSOLUTE 
       ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS})
   endif()
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    # Generate tablegen output in a temporary file.
     COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
     -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR}
     ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} 
-    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
-    DEPENDS tblgen ${local_tds} ${global_tds}
+    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    # The file in LLVM_TARGET_DEFINITIONS may be not in the current
+    # directory and local_tds may not contain it, so we must
+    # explicitly list it here:
+    DEPENDS ${LLVM_TABLEGEN_EXE} ${local_tds} ${global_tds}
+    ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
     COMMENT "Building ${ofn}..."
     )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+    # Only update the real output file if there are any differences.
+    # This prevents recompilation of all the files depending on it if there
+    # aren't any.
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+        ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    COMMENT ""
+    )
+
+  # `make clean' must remove all those generated files:
+  set_property(DIRECTORY APPEND
+    PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${ofn}.tmp ${ofn})
+
   set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn})
   set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${ofn} 
     PROPERTIES GENERATED 1)
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index 1016df22590d..81739be927a4 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -4,13 +4,16 @@
 
 function(add_version_info_from_vcs VERS)
   set(result ${${VERS}})
-  if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.svn )
+  if( EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.svn" )
     set(result "${result}svn")
-    find_package(Subversion)
+    # FindSubversion does not work with symlinks. See PR 8437
+    if( NOT IS_SYMLINK "${CMAKE_CURRENT_SOURCE_DIR}" )
+      find_package(Subversion)
+    endif()
     if( Subversion_FOUND )
       subversion_wc_info( ${CMAKE_CURRENT_SOURCE_DIR} Project )
       if( Project_WC_REVISION )
-	set(result "${result}-r${Project_WC_REVISION}")
+        set(result "${result}-r${Project_WC_REVISION}")
       endif()
     endif()
   elseif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git )
@@ -19,13 +22,23 @@ function(add_version_info_from_vcs VERS)
     find_program(git_executable NAMES git git.exe git.cmd)
     if( git_executable )
       execute_process(COMMAND ${git_executable} show-ref HEAD
-	WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-	TIMEOUT 5
-	RESULT_VARIABLE git_result
-	OUTPUT_VARIABLE git_output)
+                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                      TIMEOUT 5
+                      RESULT_VARIABLE git_result
+                      OUTPUT_VARIABLE git_output)
       if( git_result EQUAL 0 )
-	string(SUBSTRING ${git_output} 0 7 git_ref_id)
-	set(result "${result}-${git_ref_id}")
+        string(SUBSTRING ${git_output} 0 7 git_ref_id)
+        set(result "${result}-${git_ref_id}")
+      else()
+        execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
+                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                        TIMEOUT 5
+                        RESULT_VARIABLE git_result
+                        OUTPUT_VARIABLE git_output)
+        if( git_result EQUAL 0 )
+          string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
+          set(result "${result}-svn-${git_svn_rev}")
+        endif()
       endif()
     endif()
   endif()
diff --git a/configure b/configure
index 776de364bb20..959822fa0e15 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for llvm 2.8.
+# Generated by GNU Autoconf 2.60 for llvm 2.9svn.
 #
 # Report bugs to <llvmbugs@cs.uiuc.edu>.
 #
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='llvm'
 PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='2.8'
-PACKAGE_STRING='llvm 2.8'
+PACKAGE_VERSION='2.9svn'
+PACKAGE_STRING='llvm 2.9svn'
 PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
 
 ac_unique_file="lib/VMCore/Module.cpp"
@@ -641,6 +641,8 @@ host_alias
 target_alias
 LLVM_COPYRIGHT
 subdirs
+ENABLE_POLLY
+LLVM_HAS_POLLY
 build
 build_cpu
 build_vendor
@@ -686,10 +688,13 @@ DEBUG_RUNTIME
 DEBUG_SYMBOLS
 JIT
 TARGET_HAS_JIT
+ENABLE_DOCS
 ENABLE_DOXYGEN
 ENABLE_THREADS
+ENABLE_PTHREADS
 ENABLE_PIC
 ENABLE_SHARED
+ENABLE_EMBED_STDCXX
 ENABLE_TIMESTAMPS
 TARGETS_TO_BUILD
 LLVM_ENUM_TARGETS
@@ -729,6 +734,7 @@ TWOPI
 CIRCO
 GV
 DOTTY
+XDOT_PY
 PERL
 HAVE_PERL
 INSTALL_PROGRAM
@@ -738,7 +744,7 @@ BZIP2
 CAT
 DOXYGEN
 GROFF
-GZIP
+GZIPBIN
 POD2HTML
 POD2MAN
 PDFROFF
@@ -768,10 +774,13 @@ USE_OPROFILE
 HAVE_PTHREAD
 HUGE_VAL_SANITY
 MMAP_FILE
+LLVMCC_EMITIR_FLAG
 LLVMCC1
 LLVMCC1PLUS
 LLVMGCCDIR
 LLVMGCC_LANGS
+LLVMGCC_DRAGONEGG
+LLVMCC_DISABLEOPT_FLAGS
 SHLIBEXT
 SHLIBPATH_VAR
 LLVM_PREFIX
@@ -816,7 +825,8 @@ projects/llvm-reopt
 projects/llvm-java
 projects/llvm-tv
 projects/safecode
-projects/llvm-kernel'
+projects/llvm-kernel
+tools/polly'
 
 # Initialize some variables set by options.
 ac_init_help=
@@ -1318,7 +1328,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures llvm 2.8 to adapt to many kinds of systems.
+\`configure' configures llvm 2.9svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1384,17 +1394,18 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of llvm 2.8:";;
+     short | recursive ) echo "Configuration of llvm 2.9svn:";;
    esac
   cat <<\_ACEOF
 
 Optional Features:
   --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-polly          Use polly if available (default is YES)
   --enable-optimized      Compile with optimizations enabled (default is NO)
   --enable-profiling      Compile with profiling enabled (default is NO)
   --enable-assertions     Compile with assertion checks enabled (default is
-                          NO)
+                          YES)
   --enable-expensive-checks
                           Compile with expensive debug checks enabled (default
                           is NO)
@@ -1403,19 +1414,23 @@ Optional Features:
   --enable-debug-symbols  Build compiler with debug symbols (default is NO if
                           optimization is on and YES if it's off)
   --enable-jit            Enable Just In Time Compiling (default is YES)
+  --enable-docs           Build documents (default is YES)
   --enable-doxygen        Build doxygen documentation (default is NO)
   --enable-threads        Use threads if available (default is YES)
+  --enable-pthreads       Use pthreads if available (default is YES)
   --enable-pic            Build LLVM with Position Independent Code (default
                           is YES)
   --enable-shared         Build a shared library and link tools against it
                           (default is NO)
+  --enable-embed-stdcxx   Build a shared library with embedded libstdc++ for
+                          Win32 DLL (default is YES)
   --enable-timestamps     Enable embedding timestamp information in build
                           (default is YES)
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, alpha, arm, mips, spu,
-                          pic16, xcore, msp430, systemz, blackfin, cbe, and
-                          cpp (default=all)
+                          xcore, msp430, systemz, blackfin, ptx, cbe, and cpp
+                          (default=all)
   --enable-cbe-printf-a   Enable C Backend output with hex floating point via
                           %a (default is YES)
   --enable-bindings       Build specific language bindings:
@@ -1441,6 +1456,9 @@ Optional Packages:
   --with-extra-options    Specify additional options to compile LLVM with
   --with-ocaml-libdir     Specify install location for ocaml bindings (default
                           is stdlib)
+  --with-clang-resource-dir
+                          Relative directory from the Clang binary for
+                          resource files
   --with-c-include-dirs   Colon separated list of directories clang will
                           search for headers
   --with-cxx-include-root Directory with the libstdc++ headers.
@@ -1533,7 +1551,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-llvm configure 2.8
+llvm configure 2.9svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1549,7 +1567,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by llvm $as_me 2.8, which was
+It was created by llvm $as_me 2.9svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -2011,6 +2029,33 @@ echo "$as_me: WARNING: Unknown project (${i}) won't be configured automatically"
   fi
 done
 
+# Check whether --enable-polly was given.
+if test "${enable_polly+set}" = set; then
+  enableval=$enable_polly;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_POLLY=1
+ ;;
+  no)  ENABLE_POLLY=0
+ ;;
+  default) ENABLE_POLLY=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-polly. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-polly. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+if (test -d ${srcdir}/tools/polly) && (test $ENABLE_POLLY -eq 1) ; then
+  LLVM_HAS_POLLY=1
+
+  subdirs="$subdirs tools/polly"
+
+fi
+
 
 # Make sure we can run config.sub.
 $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
@@ -2355,12 +2400,12 @@ else
   alpha*-*)               llvm_cv_target_arch="Alpha" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   mips-*)                 llvm_cv_target_arch="Mips" ;;
-  pic16-*)                llvm_cv_target_arch="PIC16" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
   s390x-*)                llvm_cv_target_arch="SystemZ" ;;
   bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
+  ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac
 fi
@@ -4699,7 +4744,7 @@ fi
 if test "${enable_optimized+set}" = set; then
   enableval=$enable_optimized;
 else
-  enableval="yes"
+  enableval=$optimize
 fi
 
 if test ${enableval} = "no" ; then
@@ -4729,7 +4774,7 @@ fi
 if test "${enable_assertions+set}" = set; then
   enableval=$enable_assertions;
 else
-  enableval="no"
+  enableval="yes"
 fi
 
 if test ${enableval} = "yes" ; then
@@ -4810,13 +4855,11 @@ else
  ;;
     x86_64)      TARGET_HAS_JIT=1
  ;;
-    Alpha)       TARGET_HAS_JIT=1
+    Alpha)       TARGET_HAS_JIT=0
  ;;
     ARM)         TARGET_HAS_JIT=1
  ;;
     Mips)        TARGET_HAS_JIT=0
- ;;
-    PIC16)       TARGET_HAS_JIT=0
  ;;
     XCore)       TARGET_HAS_JIT=0
  ;;
@@ -4827,12 +4870,33 @@ else
     Blackfin)    TARGET_HAS_JIT=0
  ;;
     MBlaze)      TARGET_HAS_JIT=0
+ ;;
+    PTX)         TARGET_HAS_JIT=0
  ;;
     *)           TARGET_HAS_JIT=0
  ;;
   esac
 fi
 
+# Check whether --enable-docs was given.
+if test "${enable_docs+set}" = set; then
+  enableval=$enable_docs;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_DOCS=1
+ ;;
+  no)  ENABLE_DOCS=0
+ ;;
+  default) ENABLE_DOCS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-docs. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-docs. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-doxygen was given.
 if test "${enable_doxygen+set}" = set; then
   enableval=$enable_doxygen;
@@ -4876,6 +4940,25 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+# Check whether --enable-pthreads was given.
+if test "${enable_pthreads+set}" = set; then
+  enableval=$enable_pthreads;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_PTHREADS=1
+ ;;
+  no)  ENABLE_PTHREADS=0
+ ;;
+  default) ENABLE_PTHREADS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-pic was given.
 if test "${enable_pic+set}" = set; then
   enableval=$enable_pic;
@@ -4919,6 +5002,25 @@ echo "$as_me: error: Invalid setting for --enable-shared. Use \"yes\" or \"no\""
    { (exit 1); exit 1; }; } ;;
 esac
 
+# Check whether --enable-embed-stdcxx was given.
+if test "${enable_embed_stdcxx+set}" = set; then
+  enableval=$enable_embed_stdcxx;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_EMBED_STDCXX=1
+ ;;
+  no)  ENABLE_EMBED_STDCXX=0
+ ;;
+  default) ENABLE_EMBED_STDCXX=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-embed-stdcxx. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-embed-stdcxx. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-timestamps was given.
 if test "${enable_timestamps+set}" = set; then
   enableval=$enable_timestamps;
@@ -4955,7 +5057,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze PTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -4966,7 +5068,6 @@ case "$enableval" in
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
-        pic16)    TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
@@ -4974,6 +5075,7 @@ case "$enableval" in
         cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+        ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -4984,11 +5086,11 @@ case "$enableval" in
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
-            PIC16)       TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
    { (exit 1); exit 1; }; } ;;
@@ -5041,7 +5143,7 @@ LLVM_ENUM_ASM_PARSERS=""
 LLVM_ENUM_DISASSEMBLERS=""
 for target_to_build in $TARGETS_TO_BUILD; do
   LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
-  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then
+  if test -f ${srcdir}/lib/Target/${target_to_build}/*AsmPrinter.cpp ; then
     LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
   fi
   if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
@@ -5274,6 +5376,20 @@ echo "$as_me: error: Invalid path for --with-ocaml-libdir. Provide full path" >&
 esac
 
 
+# Check whether --with-clang-resource-dir was given.
+if test "${with_clang_resource_dir+set}" = set; then
+  withval=$with_clang_resource_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CLANG_RESOURCE_DIR "$withval"
+_ACEOF
+
+
+
 # Check whether --with-c-include-dirs was given.
 if test "${with_c_include_dirs+set}" = set; then
   withval=$with_c_include_dirs;
@@ -7828,6 +7944,62 @@ cat >>confdefs.h <<_ACEOF
 #define LLVM_PATH_DOTTY "$DOTTY${EXEEXT}"
 _ACEOF
 
+fi
+# Extract the first word of "xdot.py", so it can be a program name with args.
+set dummy xdot.py; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_XDOT_PY+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $XDOT_PY in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_XDOT_PY="$XDOT_PY" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_XDOT_PY="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_XDOT_PY" && ac_cv_path_XDOT_PY="echo xdot.py"
+  ;;
+esac
+fi
+XDOT_PY=$ac_cv_path_XDOT_PY
+if test -n "$XDOT_PY"; then
+  { echo "$as_me:$LINENO: result: $XDOT_PY" >&5
+echo "${ECHO_T}$XDOT_PY" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$XDOT_PY" != "echo xdot.py" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_XDOT_PY 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    XDOT_PY=`echo $XDOT_PY | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_XDOT_PY "$XDOT_PY${EXEEXT}"
+_ACEOF
+
 fi
 
 
@@ -8146,12 +8318,12 @@ fi
 set dummy gzip; ac_word=$2
 { echo "$as_me:$LINENO: checking for $ac_word" >&5
 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
-if test "${ac_cv_path_GZIP+set}" = set; then
+if test "${ac_cv_path_GZIPBIN+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  case $GZIP in
+  case $GZIPBIN in
   [\\/]* | ?:[\\/]*)
-  ac_cv_path_GZIP="$GZIP" # Let the user override the test with a path.
+  ac_cv_path_GZIPBIN="$GZIPBIN" # Let the user override the test with a path.
   ;;
   *)
   as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -8161,7 +8333,7 @@ do
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
   if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_GZIP="$as_dir/$ac_word$ac_exec_ext"
+    ac_cv_path_GZIPBIN="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
   fi
@@ -8172,10 +8344,10 @@ IFS=$as_save_IFS
   ;;
 esac
 fi
-GZIP=$ac_cv_path_GZIP
-if test -n "$GZIP"; then
-  { echo "$as_me:$LINENO: result: $GZIP" >&5
-echo "${ECHO_T}$GZIP" >&6; }
+GZIPBIN=$ac_cv_path_GZIPBIN
+if test -n "$GZIPBIN"; then
+  { echo "$as_me:$LINENO: result: $GZIPBIN" >&5
+echo "${ECHO_T}$GZIPBIN" >&6; }
 else
   { echo "$as_me:$LINENO: result: no" >&5
 echo "${ECHO_T}no" >&6; }
@@ -11389,7 +11561,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11392 "configure"
+#line 11564 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -13212,8 +13384,8 @@ fi
 
 if test "$GXX" != "yes" && test "$IXX" != "yes"
 then
-  { { echo "$as_me:$LINENO: error: g++|icc required but not found" >&5
-echo "$as_me: error: g++|icc required but not found" >&2;}
+  { { echo "$as_me:$LINENO: error: g++|clang++|icc required but not found" >&5
+echo "$as_me: error: g++|clang++|icc required but not found" >&2;}
    { (exit 1); exit 1; }; }
 fi
 
@@ -13865,7 +14037,7 @@ _ACEOF
 fi
 
 
-if test "$ENABLE_THREADS" -eq 1 ; then
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 { echo "$as_me:$LINENO: checking for pthread_mutex_init in -lpthread" >&5
 echo $ECHO_N "checking for pthread_mutex_init in -lpthread... $ECHO_C" >&6; }
@@ -16090,7 +16262,8 @@ done
 
 
 
-for ac_header in sys/mman.h sys/param.h sys/resource.h sys/time.h
+
+for ac_header in sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h
 do
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
 if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
@@ -16599,9 +16772,8 @@ fi
 
 done
 
-if test "$ENABLE_THREADS" -eq 1 ; then
 
-for ac_header in pthread.h
+for ac_header in fenv.h
 do
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
 if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
@@ -16764,24 +16936,14 @@ if test `eval echo '${'$as_ac_Header'}'` = yes; then
   cat >>confdefs.h <<_ACEOF
 #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
 _ACEOF
- HAVE_PTHREAD=1
-
-else
-  HAVE_PTHREAD=0
 
 fi
 
 done
 
-else
-  HAVE_PTHREAD=0
-
-fi
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
-if test "$llvm_cv_enable_libffi" = "yes" ; then
-
-
-for ac_header in ffi.h ffi/ffi.h
+for ac_header in pthread.h
 do
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
 if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
@@ -16944,15 +17106,24 @@ if test `eval echo '${'$as_ac_Header'}'` = yes; then
   cat >>confdefs.h <<_ACEOF
 #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
 _ACEOF
+ HAVE_PTHREAD=1
+
+else
+  HAVE_PTHREAD=0
 
 fi
 
 done
 
+else
+  HAVE_PTHREAD=0
+
 fi
 
+if test "$llvm_cv_enable_libffi" = "yes" ; then
 
-for ac_header in CrashReporterClient.h
+
+for ac_header in ffi.h ffi/ffi.h
 do
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
 if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
@@ -17120,112 +17291,33 @@ fi
 
 done
 
-
-
-
-
-  { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5
-echo $ECHO_N "checking for HUGE_VAL sanity... $ECHO_C" >&6; }
-if test "${ac_cv_huge_val_sanity+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-
-    ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-    ac_save_CXXFLAGS=$CXXFLAGS
-    CXXFLAGS=-pedantic
-    if test "$cross_compiling" = yes; then
-  ac_cv_huge_val_sanity=yes
-else
-  cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-#include <math.h>
-int
-main ()
-{
-double x = HUGE_VAL; return x != x;
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_link") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_cv_huge_val_sanity=yes
-else
-  echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-ac_cv_huge_val_sanity=no
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-
-
-    CXXFLAGS=$ac_save_CXXFLAGS
-    ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_huge_val_sanity" >&5
-echo "${ECHO_T}$ac_cv_huge_val_sanity" >&6; }
-  HUGE_VAL_SANITY=$ac_cv_huge_val_sanity
 
 
-{ echo "$as_me:$LINENO: checking for pid_t" >&5
-echo $ECHO_N "checking for pid_t... $ECHO_C" >&6; }
-if test "${ac_cv_type_pid_t+set}" = set; then
+for ac_header in CrashReporterClient.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
 $ac_includes_default
-typedef pid_t ac__type_new_;
-int
-main ()
-{
-if ((ac__type_new_ *) 0)
-  return 0;
-if (sizeof (ac__type_new_))
-  return 0;
-  ;
-  return 0;
-}
+#include <$ac_header>
 _ACEOF
 rm -f conftest.$ac_objext
 if { (ac_try="$ac_compile"
@@ -17261,31 +17353,1854 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_type_pid_t=yes
+  ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_type_pid_t=no
+	ac_header_compiler=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_type_pid_t" >&5
-echo "${ECHO_T}$ac_cv_type_pid_t" >&6; }
-if test $ac_cv_type_pid_t = yes; then
-  :
-else
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
 
-cat >>confdefs.h <<_ACEOF
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking __crashreporter_info__" >&5
+echo $ECHO_N "checking __crashreporter_info__... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+extern const char *__crashreporter_info__;
+      int main() {
+        __crashreporter_info__ = "test";
+        return 0;
+      }
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CRASHREPORTER_INFO 1
+_ACEOF
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CRASHREPORTER_INFO 0
+_ACEOF
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+
+
+
+  { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5
+echo $ECHO_N "checking for HUGE_VAL sanity... $ECHO_C" >&6; }
+if test "${ac_cv_huge_val_sanity+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+    ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+    ac_save_CXXFLAGS=$CXXFLAGS
+    CXXFLAGS=-pedantic
+    if test "$cross_compiling" = yes; then
+  ac_cv_huge_val_sanity=yes
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+double x = HUGE_VAL; return x != x;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_huge_val_sanity=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_huge_val_sanity=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+    CXXFLAGS=$ac_save_CXXFLAGS
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_huge_val_sanity" >&5
+echo "${ECHO_T}$ac_cv_huge_val_sanity" >&6; }
+  HUGE_VAL_SANITY=$ac_cv_huge_val_sanity
+
+
+{ echo "$as_me:$LINENO: checking for pid_t" >&5
+echo $ECHO_N "checking for pid_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_pid_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef pid_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_pid_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_pid_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_pid_t" >&5
+echo "${ECHO_T}$ac_cv_type_pid_t" >&6; }
+if test $ac_cv_type_pid_t = yes; then
+  :
+else
+
+cat >>confdefs.h <<_ACEOF
 #define pid_t int
 _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for size_t" >&5
-echo $ECHO_N "checking for size_t... $ECHO_C" >&6; }
-if test "${ac_cv_type_size_t+set}" = set; then
+{ echo "$as_me:$LINENO: checking for size_t" >&5
+echo $ECHO_N "checking for size_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_size_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef size_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_size_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_size_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5
+echo "${ECHO_T}$ac_cv_type_size_t" >&6; }
+if test $ac_cv_type_size_t = yes; then
+  :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define size_t unsigned int
+_ACEOF
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define RETSIGTYPE void
+_ACEOF
+
+{ echo "$as_me:$LINENO: checking whether struct tm is in sys/time.h or time.h" >&5
+echo $ECHO_N "checking whether struct tm is in sys/time.h or time.h... $ECHO_C" >&6; }
+if test "${ac_cv_struct_tm+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <time.h>
+
+int
+main ()
+{
+struct tm *tp; tp->tm_sec;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_struct_tm=time.h
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_struct_tm=sys/time.h
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_struct_tm" >&5
+echo "${ECHO_T}$ac_cv_struct_tm" >&6; }
+if test $ac_cv_struct_tm = sys/time.h; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TM_IN_SYS_TIME 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for int64_t" >&5
+echo $ECHO_N "checking for int64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_int64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef int64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_int64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_int64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_int64_t" >&5
+echo "${ECHO_T}$ac_cv_type_int64_t" >&6; }
+if test $ac_cv_type_int64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_INT64_T 1
+_ACEOF
+
+
+else
+  { { echo "$as_me:$LINENO: error: Type int64_t required but not found" >&5
+echo "$as_me: error: Type int64_t required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: checking for uint64_t" >&5
+echo $ECHO_N "checking for uint64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_uint64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef uint64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_uint64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_uint64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_uint64_t" >&5
+echo "${ECHO_T}$ac_cv_type_uint64_t" >&6; }
+if test $ac_cv_type_uint64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_UINT64_T 1
+_ACEOF
+
+
+else
+  { echo "$as_me:$LINENO: checking for u_int64_t" >&5
+echo $ECHO_N "checking for u_int64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_u_int64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef u_int64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_u_int64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_u_int64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_u_int64_t" >&5
+echo "${ECHO_T}$ac_cv_type_u_int64_t" >&6; }
+if test $ac_cv_type_u_int64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_U_INT64_T 1
+_ACEOF
+
+
+else
+  { { echo "$as_me:$LINENO: error: Type uint64_t or u_int64_t required but not found" >&5
+echo "$as_me: error: Type uint64_t or u_int64_t required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+
+
+
+
+
+
+
+
+
+
+for ac_func in backtrace ceilf floorf roundf rintf nearbyintf getcwd
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+for ac_func in powf fmodf strtof round
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+for ac_func in getpagesize getrusage getrlimit setrlimit gettimeofday
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in isatty mkdtemp mkstemp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+
+for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in strerror strerror_r setenv
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+for ac_func in strtoll strtoq sysconf malloc_zone_statistics
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+for ac_func in setjmp longjmp sigsetjmp siglongjmp writev
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+{ echo "$as_me:$LINENO: checking if printf has the %a format character" >&5
+echo $ECHO_N "checking if printf has the %a format character... $ECHO_C" >&6; }
+if test "${llvm_cv_c_printf_a+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ if test "$cross_compiling" = yes; then
+  llvmac_cv_c_printf_a=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+
+  /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+
+volatile double A, B;
+char Buffer[100];
+A = 1;
+A /= 10.0;
+sprintf(Buffer, "%a", A);
+B = atof(Buffer);
+if (A != B)
+  return (1);
+if (A != 0x1.999999999999ap-4)
+  return (1);
+return (0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_c_printf_a=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+llvmac_cv_c_printf_a=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_c_printf_a" >&5
+echo "${ECHO_T}$llvm_cv_c_printf_a" >&6; }
+ if test "$llvm_cv_c_printf_a" = "yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRINTF_A 1
+_ACEOF
+
+ fi
+
+
+{ echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
+echo $ECHO_N "checking for srand48/lrand48/drand48 in <stdlib.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_rand48+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+int
+main ()
+{
+srand48(0);lrand48();drand48();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_rand48=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_rand48=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5
+echo "${ECHO_T}$ac_cv_func_rand48" >&6; }
+
+if test "$ac_cv_func_rand48" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_RAND48 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether strerror_s is declared" >&5
+echo $ECHO_N "checking whether strerror_s is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_strerror_s+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
   cat >conftest.$ac_ext <<_ACEOF
@@ -17295,14 +19210,14 @@ cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
 $ac_includes_default
-typedef size_t ac__type_new_;
 int
 main ()
 {
-if ((ac__type_new_ *) 0)
-  return 0;
-if (sizeof (ac__type_new_))
-  return 0;
+#ifndef strerror_s
+  char *p = (char *) strerror_s;
+  return !p;
+#endif
+
   ;
   return 0;
 }
@@ -17341,63 +19256,243 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_type_size_t=yes
+  ac_cv_have_decl_strerror_s=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_type_size_t=no
+	ac_cv_have_decl_strerror_s=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5
-echo "${ECHO_T}$ac_cv_type_size_t" >&6; }
-if test $ac_cv_type_size_t = yes; then
-  :
-else
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_strerror_s" >&5
+echo "${ECHO_T}$ac_cv_have_decl_strerror_s" >&6; }
+if test $ac_cv_have_decl_strerror_s = yes; then
 
 cat >>confdefs.h <<_ACEOF
-#define size_t unsigned int
+#define HAVE_DECL_STRERROR_S 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRERROR_S 0
 _ACEOF
 
+
 fi
 
 
-cat >>confdefs.h <<_ACEOF
-#define RETSIGTYPE void
+
+if test "$llvm_cv_os_type" = "MingW" ; then
+  { echo "$as_me:$LINENO: checking for _alloca in -lgcc" >&5
+echo $ECHO_N "checking for _alloca in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc__alloca+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
 _ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
 
-{ echo "$as_me:$LINENO: checking whether struct tm is in sys/time.h or time.h" >&5
-echo $ECHO_N "checking whether struct tm is in sys/time.h or time.h... $ECHO_C" >&6; }
-if test "${ac_cv_struct_tm+set}" = set; then
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char _alloca ();
+int
+main ()
+{
+return _alloca ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc__alloca=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc__alloca=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc__alloca" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc__alloca" >&6; }
+if test $ac_cv_lib_gcc__alloca = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE__ALLOCA 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __alloca in -lgcc" >&5
+echo $ECHO_N "checking for __alloca in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___alloca+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-#include <sys/types.h>
-#include <time.h>
 
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __alloca ();
 int
 main ()
 {
-struct tm *tp; tp->tm_sec;
+return __alloca ();
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___alloca=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___alloca=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___alloca" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___alloca" >&6; }
+if test $ac_cv_lib_gcc___alloca = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ALLOCA 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __chkstk in -lgcc" >&5
+echo $ECHO_N "checking for __chkstk in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___chkstk+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __chkstk ();
+int
+main ()
+{
+return __chkstk ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
   grep -v '^ *+' conftest.er1 >conftest.err
   rm -f conftest.er1
@@ -17414,7 +19509,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
+	 { ac_try='test -s conftest$ac_exeext'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -17424,58 +19519,65 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_struct_tm=time.h
+  ac_cv_lib_gcc___chkstk=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_struct_tm=sys/time.h
+	ac_cv_lib_gcc___chkstk=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_struct_tm" >&5
-echo "${ECHO_T}$ac_cv_struct_tm" >&6; }
-if test $ac_cv_struct_tm = sys/time.h; then
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___chkstk" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___chkstk" >&6; }
+if test $ac_cv_lib_gcc___chkstk = yes; then
 
 cat >>confdefs.h <<\_ACEOF
-#define TM_IN_SYS_TIME 1
+#define HAVE___CHKSTK 1
 _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for int64_t" >&5
-echo $ECHO_N "checking for int64_t... $ECHO_C" >&6; }
-if test "${ac_cv_type_int64_t+set}" = set; then
+  { echo "$as_me:$LINENO: checking for ___chkstk in -lgcc" >&5
+echo $ECHO_N "checking for ___chkstk in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc____chkstk+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-$ac_includes_default
-typedef int64_t ac__type_new_;
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ___chkstk ();
 int
 main ()
 {
-if ((ac__type_new_ *) 0)
-  return 0;
-if (sizeof (ac__type_new_))
-  return 0;
+return ___chkstk ();
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
   grep -v '^ *+' conftest.er1 >conftest.err
   rm -f conftest.er1
@@ -17492,7 +19594,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
+	 { ac_try='test -s conftest$ac_exeext'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -17502,63 +19604,66 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_type_int64_t=yes
+  ac_cv_lib_gcc____chkstk=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_type_int64_t=no
+	ac_cv_lib_gcc____chkstk=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_type_int64_t" >&5
-echo "${ECHO_T}$ac_cv_type_int64_t" >&6; }
-if test $ac_cv_type_int64_t = yes; then
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc____chkstk" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc____chkstk" >&6; }
+if test $ac_cv_lib_gcc____chkstk = yes; then
 
-cat >>confdefs.h <<_ACEOF
-#define HAVE_INT64_T 1
+cat >>confdefs.h <<\_ACEOF
+#define HAVE____CHKSTK 1
 _ACEOF
 
-
-else
-  { { echo "$as_me:$LINENO: error: Type int64_t required but not found" >&5
-echo "$as_me: error: Type int64_t required but not found" >&2;}
-   { (exit 1); exit 1; }; }
 fi
 
-{ echo "$as_me:$LINENO: checking for uint64_t" >&5
-echo $ECHO_N "checking for uint64_t... $ECHO_C" >&6; }
-if test "${ac_cv_type_uint64_t+set}" = set; then
+
+  { echo "$as_me:$LINENO: checking for __ashldi3 in -lgcc" >&5
+echo $ECHO_N "checking for __ashldi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___ashldi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-$ac_includes_default
-typedef uint64_t ac__type_new_;
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __ashldi3 ();
 int
 main ()
 {
-if ((ac__type_new_ *) 0)
-  return 0;
-if (sizeof (ac__type_new_))
-  return 0;
+return __ashldi3 ();
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
   grep -v '^ *+' conftest.er1 >conftest.err
   rm -f conftest.er1
@@ -17575,7 +19680,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
+	 { ac_try='test -s conftest$ac_exeext'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -17585,58 +19690,65 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_type_uint64_t=yes
+  ac_cv_lib_gcc___ashldi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_type_uint64_t=no
+	ac_cv_lib_gcc___ashldi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_type_uint64_t" >&5
-echo "${ECHO_T}$ac_cv_type_uint64_t" >&6; }
-if test $ac_cv_type_uint64_t = yes; then
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___ashldi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___ashldi3" >&6; }
+if test $ac_cv_lib_gcc___ashldi3 = yes; then
 
-cat >>confdefs.h <<_ACEOF
-#define HAVE_UINT64_T 1
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ASHLDI3 1
 _ACEOF
 
+fi
 
-else
-  { echo "$as_me:$LINENO: checking for u_int64_t" >&5
-echo $ECHO_N "checking for u_int64_t... $ECHO_C" >&6; }
-if test "${ac_cv_type_u_int64_t+set}" = set; then
+  { echo "$as_me:$LINENO: checking for __ashrdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __ashrdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___ashrdi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-$ac_includes_default
-typedef u_int64_t ac__type_new_;
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __ashrdi3 ();
 int
 main ()
 {
-if ((ac__type_new_ *) 0)
-  return 0;
-if (sizeof (ac__type_new_))
-  return 0;
+return __ashrdi3 ();
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
   grep -v '^ *+' conftest.er1 >conftest.err
   rm -f conftest.er1
@@ -17653,7 +19765,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
+	 { ac_try='test -s conftest$ac_exeext'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -17663,72 +19775,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_type_u_int64_t=yes
+  ac_cv_lib_gcc___ashrdi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_type_u_int64_t=no
+	ac_cv_lib_gcc___ashrdi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_type_u_int64_t" >&5
-echo "${ECHO_T}$ac_cv_type_u_int64_t" >&6; }
-if test $ac_cv_type_u_int64_t = yes; then
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___ashrdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___ashrdi3" >&6; }
+if test $ac_cv_lib_gcc___ashrdi3 = yes; then
 
-cat >>confdefs.h <<_ACEOF
-#define HAVE_U_INT64_T 1
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ASHRDI3 1
 _ACEOF
 
-
-else
-  { { echo "$as_me:$LINENO: error: Type uint64_t or u_int64_t required but not found" >&5
-echo "$as_me: error: Type uint64_t or u_int64_t required but not found" >&2;}
-   { (exit 1); exit 1; }; }
-fi
-
 fi
 
-
-
-
-
-
-
-
-
-
-for ac_func in backtrace ceilf floorf roundf rintf nearbyintf getcwd
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __divdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __divdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___divdi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -17736,18 +19817,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __divdi3 ();
 int
 main ()
 {
-return $ac_func ();
+return __divdi3 ();
   ;
   return 0;
 }
@@ -17786,62 +19860,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___divdi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
+	ac_cv_lib_gcc___divdi3=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___divdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___divdi3" >&6; }
+if test $ac_cv_lib_gcc___divdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___DIVDI3 1
 _ACEOF
 
 fi
-done
 
-
-
-
-
-for ac_func in powf fmodf strtof round
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __fixdfdi in -lgcc" >&5
+echo $ECHO_N "checking for __fixdfdi in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___fixdfdi+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -17849,18 +19902,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __fixdfdi ();
 int
 main ()
 {
-return $ac_func ();
+return __fixdfdi ();
   ;
   return 0;
 }
@@ -17899,63 +19945,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___fixdfdi=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
-      conftest$ac_exeext conftest.$ac_ext
-fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
+	ac_cv_lib_gcc___fixdfdi=no
 fi
-done
-
-
 
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___fixdfdi" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___fixdfdi" >&6; }
+if test $ac_cv_lib_gcc___fixdfdi = yes; then
 
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FIXDFDI 1
+_ACEOF
 
+fi
 
-for ac_func in getpagesize getrusage getrlimit setrlimit gettimeofday
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __fixsfdi in -lgcc" >&5
+echo $ECHO_N "checking for __fixsfdi in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___fixsfdi+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -17963,18 +19987,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __fixsfdi ();
 int
 main ()
 {
-return $ac_func ();
+return __fixsfdi ();
   ;
   return 0;
 }
@@ -18013,61 +20030,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___fixsfdi=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
+	ac_cv_lib_gcc___fixsfdi=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___fixsfdi" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___fixsfdi" >&6; }
+if test $ac_cv_lib_gcc___fixsfdi = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FIXSFDI 1
 _ACEOF
 
 fi
-done
-
-
-
 
-for ac_func in isatty mkdtemp mkstemp
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __floatdidf in -lgcc" >&5
+echo $ECHO_N "checking for __floatdidf in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___floatdidf+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -18075,18 +20072,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __floatdidf ();
 int
 main ()
 {
-return $ac_func ();
+return __floatdidf ();
   ;
   return 0;
 }
@@ -18125,64 +20115,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___floatdidf=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
+	ac_cv_lib_gcc___floatdidf=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___floatdidf" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___floatdidf" >&6; }
+if test $ac_cv_lib_gcc___floatdidf = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FLOATDIDF 1
 _ACEOF
 
 fi
-done
-
-
 
-
-
-
-
-for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __lshrdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __lshrdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___lshrdi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -18190,18 +20157,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __lshrdi3 ();
 int
 main ()
 {
-return $ac_func ();
+return __lshrdi3 ();
   ;
   return 0;
 }
@@ -18240,62 +20200,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___lshrdi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
+	ac_cv_lib_gcc___lshrdi3=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___lshrdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___lshrdi3" >&6; }
+if test $ac_cv_lib_gcc___lshrdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___LSHRDI3 1
 _ACEOF
 
 fi
-done
-
-
 
-
-
-for ac_func in strerror strerror_r strerror_s setenv
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __moddi3 in -lgcc" >&5
+echo $ECHO_N "checking for __moddi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___moddi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -18303,18 +20242,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __moddi3 ();
 int
 main ()
 {
-return $ac_func ();
+return __moddi3 ();
   ;
   return 0;
 }
@@ -18353,62 +20285,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___moddi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
+	ac_cv_lib_gcc___moddi3=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___moddi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___moddi3" >&6; }
+if test $ac_cv_lib_gcc___moddi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___MODDI3 1
 _ACEOF
 
 fi
-done
-
 
-
-
-
-for ac_func in strtoll strtoq sysconf malloc_zone_statistics
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __udivdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __udivdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___udivdi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -18416,18 +20327,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __udivdi3 ();
 int
 main ()
 {
-return $ac_func ();
+return __udivdi3 ();
   ;
   return 0;
 }
@@ -18466,62 +20370,41 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___udivdi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
-      conftest$ac_exeext conftest.$ac_ext
-fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
+	ac_cv_lib_gcc___udivdi3=no
 fi
-done
-
 
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___udivdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___udivdi3" >&6; }
+if test $ac_cv_lib_gcc___udivdi3 = yes; then
 
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___UDIVDI3 1
+_ACEOF
 
+fi
 
-for ac_func in setjmp longjmp sigsetjmp siglongjmp
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-{ echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
-if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for __umoddi3 in -lgcc" >&5
+echo $ECHO_N "checking for __umoddi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___umoddi3+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $ac_func innocuous_$ac_func
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $ac_func (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $ac_func
 
 /* Override any GCC internal prototype to avoid an error.
    Use char because int might match the return type of a GCC
@@ -18529,18 +20412,11 @@ cat >>conftest.$ac_ext <<_ACEOF
 #ifdef __cplusplus
 extern "C"
 #endif
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$ac_func || defined __stub___$ac_func
-choke me
-#endif
-
+char __umoddi3 ();
 int
 main ()
 {
-return $ac_func ();
+return __umoddi3 ();
   ;
   return 0;
 }
@@ -18579,83 +20455,83 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  eval "$as_ac_var=yes"
+  ac_cv_lib_gcc___umoddi3=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	eval "$as_ac_var=no"
+	ac_cv_lib_gcc___umoddi3=no
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-ac_res=`eval echo '${'$as_ac_var'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-if test `eval echo '${'$as_ac_var'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___umoddi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___umoddi3" >&6; }
+if test $ac_cv_lib_gcc___umoddi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___UMODDI3 1
 _ACEOF
 
 fi
-done
 
-{ echo "$as_me:$LINENO: checking if printf has the %a format character" >&5
-echo $ECHO_N "checking if printf has the %a format character... $ECHO_C" >&6; }
-if test "${llvm_cv_c_printf_a+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
- if test "$cross_compiling" = yes; then
-  llvmac_cv_c_printf_a=no
+  { echo "$as_me:$LINENO: checking for __main in -lgcc" >&5
+echo $ECHO_N "checking for __main in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  cat >conftest.$ac_ext <<_ACEOF
-
-  /* confdefs.h.  */
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
 
-#include <stdio.h>
-#include <stdlib.h>
-
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __main ();
 int
 main ()
 {
-
-volatile double A, B;
-char Buffer[100];
-A = 1;
-A /= 10.0;
-sprintf(Buffer, "%a", A);
-B = atof(Buffer);
-if (A != B)
-  return (1);
-if (A != 0x1.999999999999ap-4)
-  return (1);
-return (0);
+return __main ();
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest$ac_exeext
+rm -f conftest.$ac_objext conftest$ac_exeext
 if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_link") 2>&5
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -18665,78 +20541,72 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  llvm_cv_c_printf_a=yes
+  ac_cv_lib_gcc___main=yes
 else
-  echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
+  echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-( exit $ac_status )
-llvmac_cv_c_printf_a=no
+	ac_cv_lib_gcc___main=no
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-
-
- ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-{ echo "$as_me:$LINENO: result: $llvm_cv_c_printf_a" >&5
-echo "${ECHO_T}$llvm_cv_c_printf_a" >&6; }
- if test "$llvm_cv_c_printf_a" = "yes"; then
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___main" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___main" >&6; }
+if test $ac_cv_lib_gcc___main = yes; then
 
 cat >>confdefs.h <<\_ACEOF
-#define HAVE_PRINTF_A 1
+#define HAVE___MAIN 1
 _ACEOF
 
- fi
-
+fi
 
-{ echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
-echo $ECHO_N "checking for srand48/lrand48/drand48 in <stdlib.h>... $ECHO_C" >&6; }
-if test "${ac_cv_func_rand48+set}" = set; then
+  { echo "$as_me:$LINENO: checking for __cmpdi2 in -lgcc" >&5
+echo $ECHO_N "checking for __cmpdi2 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___cmpdi2+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-   cat >conftest.$ac_ext <<_ACEOF
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-#include <stdlib.h>
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __cmpdi2 ();
 int
 main ()
 {
-srand48(0);lrand48();drand48();
+return __cmpdi2 ();
   ;
   return 0;
 }
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
 case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
 esac
 eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
+  (eval "$ac_link") 2>conftest.er1
   ac_status=$?
   grep -v '^ *+' conftest.er1 >conftest.err
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -18746,7 +20616,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
+	 { ac_try='test -s conftest$ac_exeext'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -18756,33 +20626,29 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_cv_func_rand48=yes
+  ac_cv_lib_gcc___cmpdi2=yes
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	ac_cv_func_rand48=no
+	ac_cv_lib_gcc___cmpdi2=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
 fi
-{ echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5
-echo "${ECHO_T}$ac_cv_func_rand48" >&6; }
-
-if test "$ac_cv_func_rand48" = "yes" ; then
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___cmpdi2" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___cmpdi2" >&6; }
+if test $ac_cv_lib_gcc___cmpdi2 = yes; then
 
 cat >>confdefs.h <<\_ACEOF
-#define HAVE_RAND48 1
+#define HAVE___CMPDI2 1
 _ACEOF
 
 fi
 
+fi
 
 
 { echo "$as_me:$LINENO: checking for isnan in <math.h>" >&5
@@ -20282,15 +22148,43 @@ fi
 done
 
 
+{ echo "$as_me:$LINENO: checking whether llvm-gcc is dragonegg" >&5
+echo $ECHO_N "checking whether llvm-gcc is dragonegg... $ECHO_C" >&6; }
+if test "${llvm_cv_llvmgcc_dragonegg+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  llvm_cv_llvmgcc_dragonegg="no"
+if test -n "$LLVMGCC" ; then
+  cp /dev/null conftest.c
+  $LLVMGCC -fplugin-arg-dragonegg-emit-ir -S -o - conftest.c > /dev/null 2>&1
+  if test $? -eq 0 ; then
+    llvm_cv_llvmgcc_dragonegg="yes"
+  fi
+  rm conftest.c
+fi
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_llvmgcc_dragonegg" >&5
+echo "${ECHO_T}$llvm_cv_llvmgcc_dragonegg" >&6; }
+
+if test "$llvm_cv_llvmgcc_dragonegg" = "yes" ; then
+  LLVMCC_EMITIR_FLAG="-fplugin-arg-dragonegg-emit-ir"
+  LLVMCC_DISABLEOPT_FLAGS="-fplugin-arg-dragonegg-disable-llvm-optzns"
+else
+  LLVMCC_EMITIR_FLAG="-emit-llvm"
+  LLVMCC_DISABLEOPT_FLAGS="-mllvm -disable-llvm-optzns"
+fi
+
+
+
 { echo "$as_me:$LINENO: checking whether llvm-gcc is sane" >&5
 echo $ECHO_N "checking whether llvm-gcc is sane... $ECHO_C" >&6; }
 if test "${llvm_cv_llvmgcc_sanity+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
   llvm_cv_llvmgcc_sanity="no"
-if test -x "$LLVMGCC" ; then
+if test -n "$LLVMGCC" ; then
   cp /dev/null conftest.c
-  "$LLVMGCC" -emit-llvm -S -o - conftest.c | \
+  $LLVMGCC "$LLVMCC_EMITIR_FLAG" -S -o - conftest.c | \
       grep 'target datalayout =' > /dev/null 2>&1
   if test $? -eq 0 ; then
     llvm_cv_llvmgcc_sanity="yes"
@@ -20304,18 +22198,21 @@ echo "${ECHO_T}$llvm_cv_llvmgcc_sanity" >&6; }
 if test "$llvm_cv_llvmgcc_sanity" = "yes" ; then
   { echo "$as_me:$LINENO: checking llvm-gcc component support" >&5
 echo $ECHO_N "checking llvm-gcc component support... $ECHO_C" >&6; }
-  llvmcc1path=`"$LLVMGCC" --print-prog-name=cc1`
+  llvmcc1path=`$LLVMGCC --print-prog-name=cc1`
   LLVMCC1=$llvmcc1path
 
-  llvmcc1pluspath=`"$LLVMGCC" --print-prog-name=cc1plus`
+  llvmcc1pluspath=`$LLVMGCC --print-prog-name=cc1plus`
   LLVMCC1PLUS=$llvmcc1pluspath
 
   llvmgccdir=`echo "$llvmcc1path" | sed 's,/libexec/.*,,'`
   LLVMGCCDIR=$llvmgccdir
 
-  llvmgcclangs=`"$LLVMGCC" -v --help 2>&1 | grep '^Configured with:' | sed 's/^.*--enable-languages=\([^ ]*\).*/\1/'`
+  llvmgcclangs=`$LLVMGCC -v --help 2>&1 | grep '^Configured with:' | sed 's/^.*--enable-languages=\([^ ]*\).*/\1/'`
   LLVMGCC_LANGS=$llvmgcclangs
 
+  LLVMGCC_DRAGONEGG=$llvm_cv_llvmgcc_dragonegg
+
+
   { echo "$as_me:$LINENO: result: ok" >&5
 echo "${ECHO_T}ok" >&6; }
 fi
@@ -20591,7 +22488,7 @@ ac_config_files="$ac_config_files include/llvm/Config/AsmParsers.def"
 
 ac_config_files="$ac_config_files include/llvm/Config/Disassemblers.def"
 
-ac_config_headers="$ac_config_headers include/llvm/System/DataTypes.h"
+ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h"
 
 
 ac_config_files="$ac_config_files Makefile.config"
@@ -21045,7 +22942,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by llvm $as_me 2.8, which was
+This file was extended by llvm $as_me 2.9svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21098,7 +22995,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-llvm config.status 2.8
+llvm config.status 2.9svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -21217,7 +23114,7 @@ do
     "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;;
     "include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;;
     "include/llvm/Config/Disassemblers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Disassemblers.def" ;;
-    "include/llvm/System/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/System/DataTypes.h" ;;
+    "include/llvm/Support/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Support/DataTypes.h" ;;
     "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
     "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
     "tools/llvmc/src/Base.td") CONFIG_FILES="$CONFIG_FILES tools/llvmc/src/Base.td" ;;
@@ -21337,6 +23234,8 @@ host_alias!$host_alias$ac_delim
 target_alias!$target_alias$ac_delim
 LLVM_COPYRIGHT!$LLVM_COPYRIGHT$ac_delim
 subdirs!$subdirs$ac_delim
+ENABLE_POLLY!$ENABLE_POLLY$ac_delim
+LLVM_HAS_POLLY!$LLVM_HAS_POLLY$ac_delim
 build!$build$ac_delim
 build_cpu!$build_cpu$ac_delim
 build_vendor!$build_vendor$ac_delim
@@ -21382,19 +23281,17 @@ DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
 DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
+ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 ENABLE_THREADS!$ENABLE_THREADS$ac_delim
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
+ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
 ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
 TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
 LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
 LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
-LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
-LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim
-ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
-CLANGPATH!$CLANGPATH$ac_delim
-CLANGXXPATH!$CLANGXXPATH$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -21436,6 +23333,11 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
+LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim
+ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
+CLANGPATH!$CLANGPATH$ac_delim
+CLANGXXPATH!$CLANGXXPATH$ac_delim
 ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim
 OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
@@ -21466,6 +23368,7 @@ TWOPI!$TWOPI$ac_delim
 CIRCO!$CIRCO$ac_delim
 GV!$GV$ac_delim
 DOTTY!$DOTTY$ac_delim
+XDOT_PY!$XDOT_PY$ac_delim
 PERL!$PERL$ac_delim
 HAVE_PERL!$HAVE_PERL$ac_delim
 INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
@@ -21475,7 +23378,7 @@ BZIP2!$BZIP2$ac_delim
 CAT!$CAT$ac_delim
 DOXYGEN!$DOXYGEN$ac_delim
 GROFF!$GROFF$ac_delim
-GZIP!$GZIP$ac_delim
+GZIPBIN!$GZIPBIN$ac_delim
 POD2HTML!$POD2HTML$ac_delim
 POD2MAN!$POD2MAN$ac_delim
 PDFROFF!$PDFROFF$ac_delim
@@ -21505,10 +23408,13 @@ USE_OPROFILE!$USE_OPROFILE$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
+LLVMCC_EMITIR_FLAG!$LLVMCC_EMITIR_FLAG$ac_delim
 LLVMCC1!$LLVMCC1$ac_delim
 LLVMCC1PLUS!$LLVMCC1PLUS$ac_delim
 LLVMGCCDIR!$LLVMGCCDIR$ac_delim
 LLVMGCC_LANGS!$LLVMGCC_LANGS$ac_delim
+LLVMGCC_DRAGONEGG!$LLVMGCC_DRAGONEGG$ac_delim
+LLVMCC_DISABLEOPT_FLAGS!$LLVMCC_DISABLEOPT_FLAGS$ac_delim
 SHLIBEXT!$SHLIBEXT$ac_delim
 SHLIBPATH_VAR!$SHLIBPATH_VAR$ac_delim
 LLVM_PREFIX!$LLVM_PREFIX$ac_delim
@@ -21524,6 +23430,47 @@ LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
 BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
 ALL_BINDINGS!$ALL_BINDINGS$ac_delim
 OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+CEOF$ac_eof
+_ACEOF
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
 ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
 RPATH!$RPATH$ac_delim
 RDYNAMIC!$RDYNAMIC$ac_delim
@@ -21531,7 +23478,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 93; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 5; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
@@ -21549,7 +23496,7 @@ if test -n "$ac_eof"; then
 fi
 
 cat >>$CONFIG_STATUS <<_ACEOF
-cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof
+cat >"\$tmp/subs-3.sed" <<\CEOF$ac_eof
 /@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
 _ACEOF
 sed '
@@ -21812,7 +23759,7 @@ s&@abs_builddir@&$ac_abs_builddir&;t t
 s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
 s&@INSTALL@&$ac_INSTALL&;t t
 $ac_datarootdir_hack
-" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" | sed -f "$tmp/subs-3.sed" >$tmp/out
 
 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
   { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html
index cffaa8206228..20b7e96460f3 100644
--- a/docs/AliasAnalysis.html
+++ b/docs/AliasAnalysis.html
@@ -31,7 +31,7 @@
     <li><a href="#chaining"><tt>AliasAnalysis</tt> chaining behavior</a></li>
     <li><a href="#updating">Updating analysis results for transformations</a></li>
     <li><a href="#implefficiency">Efficiency Issues</a></li>
-    <li><a href="#passmanager">Pass Manager Issues</a></li>
+    <li><a href="#limitations">Limitations</a></li>
     </ul>
   </li>
 
@@ -188,7 +188,8 @@ that the accesses alias.</p>
 <div class="doc_text">
 <p>The <tt>alias</tt> method is the primary interface used to determine whether
 or not two memory objects alias each other.  It takes two memory objects as
-input and returns MustAlias, MayAlias, or NoAlias as appropriate.</p>
+input and returns MustAlias, PartialAlias, MayAlias, or NoAlias as
+appropriate.</p>
 
 <p>Like all <tt>AliasAnalysis</tt> interfaces, the <tt>alias</tt> method requires
 that either the two pointer values be defined within the same function, or at
@@ -215,8 +216,10 @@ and reallocation.</p>
 dependencies are ignored.</p>
 
 <p>The MayAlias response is used whenever the two pointers might refer to the
-same object.  If the two memory objects overlap, but do not start at the same
-location, return MayAlias.</p>
+same object.</p>
+
+<p>The PartialAlias response is used when the two memory objects are known
+to be overlapping in some way, but do not start at the same address.</p>
 
 <p>The MustAlias response may only be returned if the two memory objects are
 guaranteed to always start at exactly the same location. A MustAlias response
@@ -461,7 +464,7 @@ analysis results updated to reflect the changes made by these transformations.
 </p>
 
 <p>
-The <tt>AliasAnalysis</tt> interface exposes two methods which are used to
+The <tt>AliasAnalysis</tt> interface exposes four methods which are used to
 communicate program changes from the clients to the analysis implementations.
 Various alias analysis implementations should use these methods to ensure that
 their internal data structures are kept up-to-date as the program changes (for
@@ -502,6 +505,28 @@ value, then deleting the old value.  This method cannot be overridden by alias
 analysis implementations.
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>addEscapingUse</tt> method</div>
+
+<div class="doc_text">
+<p>The <tt>addEscapingUse</tt> method is used when the uses of a pointer
+value have changed in ways that may invalidate precomputed analysis information. 
+Implementations may either use this callback to provide conservative responses
+for points whose uses have change since analysis time, or may recompute some
+or all of their internal state to continue providing accurate responses.</p>
+
+<p>In general, any new use of a pointer value is considered an escaping use,
+and must be reported through this callback, <em>except</em> for the
+uses below:</p>
+
+<ul>
+  <li>A <tt>bitcast</tt> or <tt>getelementptr</tt> of the pointer</li>
+  <li>A <tt>store</tt> through the pointer (but not a <tt>store</tt>
+      <em>of</em> the pointer)</li>
+  <li>A <tt>load</tt> through the pointer</li>
+</ul>
+</div>
+
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="implefficiency">Efficiency Issues</a>
@@ -520,13 +545,13 @@ method as possible (within reason).</p>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="passmanager">Pass Manager Issues</a>
+  <a name="limitations">Limitations</a>
 </div>
 
 <div class="doc_text">
 
-<p>PassManager support for alternative AliasAnalysis implementation
-has some issues.</p>
+<p>The AliasAnalysis infrastructure has several limitations which make
+writing a new <tt>AliasAnalysis</tt> implementation difficult.</p>
 
 <p>There is no way to override the default alias analysis. It would
 be very useful to be able to do something like "opt -my-aa -O2" and
@@ -555,6 +580,40 @@ silently route alias analysis queries directly to
 passes between each pass, which prevents the use of <tt>FunctionPass</tt>
 alias analysis passes.</p>
 
+<p>The <tt>AliasAnalysis</tt> API does have functions for notifying
+implementations when values are deleted or copied, however these
+aren't sufficient. There are many other ways that LLVM IR can be
+modified which could be relevant to <tt>AliasAnalysis</tt>
+implementations which can not be expressed.</p>
+
+<p>The <tt>AliasAnalysisDebugger</tt> utility seems to suggest that
+<tt>AliasAnalysis</tt> implementations can expect that they will be
+informed of any relevant <tt>Value</tt> before it appears in an
+alias query. However, popular clients such as <tt>GVN</tt> don't
+support this, and are known to trigger errors when run with the
+<tt>AliasAnalysisDebugger</tt>.</p>
+
+<p>Due to several of the above limitations, the most obvious use for
+the <tt>AliasAnalysisCounter</tt> utility, collecting stats on all
+alias queries in a compilation, doesn't work, even if the
+<tt>AliasAnalysis</tt> implementations don't use <tt>FunctionPass</tt>.
+There's no way to set a default, much less a default sequence,
+and there's no way to preserve it.</p>
+
+<p>The <tt>AliasSetTracker</tt> class (which is used by <tt>LICM</tt>
+makes a non-deterministic number of alias queries. This can cause stats
+collected by <tt>AliasAnalysisCounter</tt> to have fluctuations among
+identical runs, for example. Another consequence is that debugging
+techniques involving pausing execution after a predetermined number
+of queries can be unreliable.</p>
+
+<p>Many alias queries can be reformulated in terms of other alias
+queries. When multiple <tt>AliasAnalysis</tt> queries are chained together,
+it would make sense to start those queries from the beginning of the chain,
+with care taken to avoid infinite looping, however currently an
+implementation which wants to do this can only start such queries
+from itself.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -713,8 +772,8 @@ problem.</p>
 
 <div class="doc_text">
 
-<p>The <tt>-basicaa</tt> pass is the default LLVM alias analysis.  It is an
-aggressive local analysis that "knows" many important facts:</p>
+<p>The <tt>-basicaa</tt> pass is an aggressive local analysis that "knows"
+many important facts:</p>
 
 <ul>
 <li>Distinct globals, stack allocations, and heap allocations can never
@@ -998,7 +1057,7 @@ analysis directly.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-08-31 01:47:24 +0200 (Tue, 31 Aug 2010) $
+  Last modified: $Date: 2011-01-03 22:38:41 +0100 (Mon, 03 Jan 2011) $
 </address>
 
 </body>
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index bd53a1edd76c..8d3d382da7a9 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -922,6 +922,9 @@ encoding of the visibility of this variable:
 <li><i>threadlocal</i>: If present and non-zero, indicates that the variable
 is <tt>thread_local</tt></li>
 
+<li><i>unnamed_addr</i>: If present and non-zero, indicates that the variable
+has <tt>unnamed_addr<tt></li>
+
 </ul>
 </div>
 
@@ -975,6 +978,10 @@ entries.</li>
 <li><i>gc</i>: If present and nonzero, the 1-based garbage collector
 index in the table of
 <a href="#MODULE_CODE_GCNAME">MODULE_CODE_GCNAME</a> entries.</li>
+
+<li><i>unnamed_addr</i>: If present and non-zero, indicates that the function
+has <tt>unnamed_addr<tt></li>
+
 </ul>
 </div>
 
@@ -1474,7 +1481,7 @@ name. Each entry corresponds to a single named type.
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2010-08-28 06:09:24 +0200 (Sat, 28 Aug 2010) $
+Last modified: $Date: 2011-01-08 17:42:36 +0100 (Sat, 08 Jan 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/CMake.html b/docs/CMake.html
index ca0b50f628e9..e303d132b590 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -68,7 +68,7 @@
 <ol>
 
   <li><p><a href="http://www.cmake.org/cmake/resources/software.html">Download</a>
-      and install CMake. Version 2.6.2 is the minimum required.</p>
+      and install CMake. Version 2.8 is the minimum required.</p>
 
   <li><p>Open a shell. Your development tools must be reachable from this
       shell through the PATH environment variable.</p>
@@ -257,11 +257,41 @@
     with a makefile-based system executing <i>make llvm-as</i> on the
     root of your build directory.</dd>
 
+  <dt><b>LLVM_INCLUDE_TOOLS</b>:BOOL</dt>
+  <dd>Generate build targets for the LLVM tools. Defaults to
+    ON. You can use that option for disabling the generation of build
+    targets for the LLVM tools.</dd>
+
   <dt><b>LLVM_BUILD_EXAMPLES</b>:BOOL</dt>
   <dd>Build LLVM examples. Defaults to OFF. Targets for building each
     example are generated in any case. See documentation
     for <i>LLVM_BUILD_TOOLS</i> above for more details.</dd>
 
+  <dt><b>LLVM_INCLUDE_EXAMPLES</b>:BOOL</dt>
+  <dd>Generate build targets for the LLVM examples. Defaults to
+    ON. You can use that option for disabling the generation of build
+    targets for the LLVM examples.</dd>
+
+  <dt><b>LLVM_BUILD_TESTS</b>:BOOL</dt>
+  <dd>Build LLVM unit tests. Defaults to OFF. Targets for building
+    each unit test are generated in any case. You can build a specific
+    unit test with the target <i>UnitTestNameTests</i> (where at this
+    time <i>UnitTestName</i> can be ADT, Analysis, ExecutionEngine,
+    JIT, Support, Transform, VMCore; see the subdirectories
+    of <i>unittests</i> for an updated list.) It is possible to build
+    all unit tests with the target <i>UnitTests</i>.</dd>
+
+  <dt><b>LLVM_INCLUDE_TESTS</b>:BOOL</dt>
+  <dd>Generate build targets for the LLVM unit tests. Defaults to
+    ON. You can use that option for disabling the generation of build
+    targets for the LLVM unit tests.</dd>
+
+  <dt><b>LLVM_APPEND_VC_REV</b>:BOOL</dt>
+  <dd>Append version control revision info (svn revision number or git
+    revision id) to LLVM version string (stored in the PACKAGE_VERSION
+    macro). For this to work cmake must be invoked before the
+    build. Defaults to OFF.</dd>
+
   <dt><b>LLVM_ENABLE_THREADS</b>:BOOL</dt>
   <dd>Build with threads support, if available. Defaults to ON.</dd>
 
@@ -301,6 +331,25 @@
   <dd>Full path to a native TableGen executable (usually
     named <i>tblgen</i>). This is intented for cross-compiling: if the
     user sets this variable, no native TableGen will be created.</dd>
+
+  <dt><b>LLVM_LIT_ARGS</b>:STRING</dt>
+  <dd>Arguments given to lit.
+    <tt>make check</tt> and <tt>make clang-test</tt> are affected.
+    By default, <tt>&quot;-sv --no-progress-bar&quot;</tt>
+    on Visual C++ and Xcode,
+    <tt>&quot;-sv&quot;</tt> on others.</dd>
+
+  <dt><b>LLVM_LIT_TOOLS_DIR</b>:STRING</dt>
+  <dd>The path to GnuWin32 tools for tests. Valid on Windows host.
+    Defaults to "", then Lit seeks tools according to %PATH%.
+    Lit can find tools(eg. grep, sort, &c) on LLVM_LIT_TOOLS_DIR at first,
+    without specifying GnuWin32 to %PATH%.</dd>
+
+  <dt><b>LLVM_ENABLE_FFI</b>:BOOL</dt>
+  <dd>Indicates whether LLVM Interpreter will be linked with Foreign
+    Function Interface library. If the library or its headers are
+    installed on a custom location, you can set the variables
+    FFI_INCLUDE_DIR and FFI_LIBRARY_DIR. Defaults to OFF.</dd>
 </dl>
 
 </div>
@@ -321,7 +370,7 @@
   <p><tt>make check</tt></p>
 </div>
 
-<p>Testing is not supported on Visual Studio.</p>
+<p>On Visual Studio, you may run tests to build the project "check".</p>
 
 </div>
 
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 4b2e261094bd..925156ff0787 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -5,6 +5,17 @@
   <meta http-equiv="content-type" content="text/html; charset=utf-8">
   <title>The LLVM Target-Independent Code Generator</title>
   <link rel="stylesheet" href="llvm.css" type="text/css">
+
+  <style type="text/css">
+    .unknown { background-color: #C0C0C0; text-align: center; }
+    .unknown:before { content: "?" }
+    .no { background-color: #C11B17 }
+    .no:before { content: "N" }
+    .partial { background-color: #F88017 }
+    .yes { background-color: #0F0; }
+    .yes:before { content: "Y" }
+  </style>
+
 </head>
 <body>
 
@@ -33,7 +44,7 @@
       <li><a href="#targetjitinfo">The <tt>TargetJITInfo</tt> class</a></li>
     </ul>
   </li>
-  <li><a href="#codegendesc">Machine code description classes</a>
+  <li><a href="#codegendesc">The "Machine" Code Generator classes</a>
     <ul>
     <li><a href="#machineinstr">The <tt>MachineInstr</tt> class</a></li>
     <li><a href="#machinebasicblock">The <tt>MachineBasicBlock</tt>
@@ -41,6 +52,15 @@
     <li><a href="#machinefunction">The <tt>MachineFunction</tt> class</a></li>
     </ul>
   </li>
+  <li><a href="#mc">The "MC" Layer</a>
+    <ul>
+    <li><a href="#mcstreamer">The <tt>MCStreamer</tt> API</a></li>
+    <li><a href="#mccontext">The <tt>MCContext</tt> class</a>
+    <li><a href="#mcsymbol">The <tt>MCSymbol</tt> class</a></li>
+    <li><a href="#mcsection">The <tt>MCSection</tt> class</a></li>
+    <li><a href="#mcinst">The <tt>MCInst</tt> class</a></li>
+    </ul>
+  </li>
   <li><a href="#codegenalgs">Target-independent code generation algorithms</a>
     <ul>
     <li><a href="#instselect">Instruction Selection</a>
@@ -76,15 +96,14 @@
       <li><a href="#regAlloc_fold">Instruction folding</a></li>
       <li><a href="#regAlloc_builtIn">Built in register allocators</a></li>
       </ul></li>
-    <li><a href="#codeemit">Code Emission</a>
-        <ul>
-        <li><a href="#codeemit_asm">Generating Assembly Code</a></li>
-        <li><a href="#codeemit_bin">Generating Binary Machine Code</a></li>
-        </ul></li>
+    <li><a href="#codeemit">Code Emission</a></li>
     </ul>
   </li>
+  <li><a href="#nativeassembler">Implementing a Native Assembler</a></li>
+  
   <li><a href="#targetimpls">Target-specific Implementation Notes</a>
     <ul>
+    <li><a href="#targetfeatures">Target Feature Matrix</a></li>
     <li><a href="#tailcallopt">Tail call optimization</a></li>
     <li><a href="#sibcallopt">Sibling call optimization</a></li>
     <li><a href="#x86">The X86 backend</a></li>
@@ -100,11 +119,7 @@
 </ol>
 
 <div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
-                <a href="mailto:isanbard@gmail.com">Bill Wendling</a>,
-                <a href="mailto:pronesto@gmail.com">Fernando Magno Quintao
-                                                    Pereira</a> and
-                <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+  <p>Written by the LLVM Team.</p>
 </div>
 
 <div class="doc_warning">
@@ -123,7 +138,7 @@
    suite of reusable components for translating the LLVM internal representation
    to the machine code for a specified target&mdash;either in assembly form
    (suitable for a static compiler) or in binary machine code format (usable for
-   a JIT compiler). The LLVM target-independent code generator consists of five
+   a JIT compiler). The LLVM target-independent code generator consists of six
    main components:</p>
 
 <ol>
@@ -132,10 +147,17 @@
       independently of how they will be used.  These interfaces are defined in
       <tt>include/llvm/Target/</tt>.</li>
 
-  <li>Classes used to represent the <a href="#codegendesc">machine code</a>
-      being generated for a target.  These classes are intended to be abstract
+  <li>Classes used to represent the <a href="#codegendesc">code being
+      generated</a> for a target.  These classes are intended to be abstract
       enough to represent the machine code for <i>any</i> target machine.  These
-      classes are defined in <tt>include/llvm/CodeGen/</tt>.</li>
+      classes are defined in <tt>include/llvm/CodeGen/</tt>. At this level,
+      concepts like "constant pool entries" and "jump tables" are explicitly
+      exposed.</li>
+
+  <li>Classes and algorithms used to represent code as the object file level,
+      the <a href="#mc">MC Layer</a>.  These classes represent assembly level
+      constructs like labels, sections, and instructions.  At this level,
+      concepts like "constant pool entries" and "jump tables" don't exist.</li>
 
   <li><a href="#codegenalgs">Target-independent algorithms</a> used to implement
       various phases of native code generation (register allocation, scheduling,
@@ -732,6 +754,157 @@ ret
 
 </div>
 
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="mc">The "MC" Layer</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The MC Layer is used to represent and process code at the raw machine code
+level, devoid of "high level" information like "constant pools", "jump tables",
+"global variables" or anything like that.  At this level, LLVM handles things
+like label names, machine instructions, and sections in the object file.  The
+code in this layer is used for a number of important purposes: the tail end of
+the code generator uses it to write a .s or .o file, and it is also used by the
+llvm-mc tool to implement standalone machine codeassemblers and disassemblers.
+</p>
+
+<p>
+This section describes some of the important classes.  There are also a number
+of important subsystems that interact at this layer, they are described later
+in this manual.
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcstreamer">The <tt>MCStreamer</tt> API</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+MCStreamer is best thought of as an assembler API.  It is an abstract API which
+is <em>implemented</em> in different ways (e.g. to output a .s file, output an
+ELF .o file, etc) but whose API correspond directly to what you see in a .s
+file.  MCStreamer has one method per directive, such as EmitLabel,
+EmitSymbolAttribute, SwitchSection, EmitValue (for .byte, .word), etc, which
+directly correspond to assembly level directives.  It also has an
+EmitInstruction method, which is used to output an MCInst to the streamer.
+</p>
+
+<p>
+This API is most important for two clients: the llvm-mc stand-alone assembler is
+effectively a parser that parses a line, then invokes a method on MCStreamer. In
+the code generator, the <a href="#codeemit">Code Emission</a> phase of the code
+generator lowers higher level LLVM IR and Machine* constructs down to the MC
+layer, emitting directives through MCStreamer.</p>
+
+<p>
+On the implementation side of MCStreamer, there are two major implementations:
+one for writing out a .s file (MCAsmStreamer), and one for writing out a .o
+file (MCObjectStreamer).  MCAsmStreamer is a straight-forward implementation
+that prints out a directive for each method (e.g. EmitValue -&gt; .byte), but
+MCObjectStreamer implements a full assembler.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mccontext">The <tt>MCContext</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCContext class is the owner of a variety of uniqued data structures at the
+MC layer, including symbols, sections, etc.  As such, this is the class that you
+interact with to create symbols and sections.  This class can not be subclassed.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcsymbol">The <tt>MCSymbol</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCSymbol class represents a symbol (aka label) in the assembly file.  There
+are two interesting kinds of symbols: assembler temporary symbols, and normal
+symbols.  Assembler temporary symbols are used and processed by the assembler
+but are discarded when the object file is produced.  The distinction is usually
+represented by adding a prefix to the label, for example "L" labels are
+assembler temporary labels in MachO.
+</p>
+
+<p>MCSymbols are created by MCContext and uniqued there.  This means that
+MCSymbols can be compared for pointer equivalence to find out if they are the
+same symbol.  Note that pointer inequality does not guarantee the labels will
+end up at different addresses though.  It's perfectly legal to output something
+like this to the .s file:<p>
+
+<pre>
+  foo:
+  bar:
+    .byte 4
+</pre>
+
+<p>In this case, both the foo and bar symbols will have the same address.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcsection">The <tt>MCSection</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCSection class represents an object-file specific section. It is subclassed
+by object file specific implementations (e.g. <tt>MCSectionMachO</tt>, 
+<tt>MCSectionCOFF</tt>, <tt>MCSectionELF</tt>) and these are created and uniqued
+by MCContext.  The MCStreamer has a notion of the current section, which can be
+changed with the SwitchToSection method (which corresponds to a ".section"
+directive in a .s file).
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcinst">The <tt>MCInst</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCInst class is a target-independent representation of an instruction.  It
+is a simple class (much more so than <a href="#machineinstr">MachineInstr</a>)
+that holds a target-specific opcode and a vector of MCOperands.  MCOperand, in
+turn, is a simple discriminated union of three cases: 1) a simple immediate, 
+2) a target register ID, 3) a symbolic expression (e.g. "Lfoo-Lbar+42") as an
+MCExpr.
+</p>
+
+<p>MCInst is the common currency used to represent machine instructions at the
+MC layer.  It is the type used by the instruction encoder, the instruction
+printer, and the type generated by the assembly parser and disassembler.
+</p>
+
+</div>
+
+
 <!-- *********************************************************************** -->
 <div class="doc_section">
   <a name="codegenalgs">Target-independent code generation algorithms</a>
@@ -857,9 +1030,9 @@ ret
       SelectionDAG optimizer is run to clean up redundancies exposed by type
       legalization.</li>
 
-  <li><a href="#selectiondag_legalize">Legalize SelectionDAG Types</a> &mdash;
-      This stage transforms SelectionDAG nodes to eliminate any types that are
-      unsupported on the target.</li>
+  <li><a href="#selectiondag_legalize">Legalize SelectionDAG Ops</a> &mdash;
+      This stage transforms SelectionDAG nodes to eliminate any operations 
+      that are unsupported on the target.</li>
 
   <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; The
       SelectionDAG optimizer is run to eliminate inefficiencies introduced by
@@ -1386,18 +1559,25 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
    </p>
 
 <p>Virtual registers are also denoted by integer numbers. Contrary to physical
-   registers, different virtual registers never share the same number. The
-   smallest virtual register is normally assigned the number 1024. This may
-   change, so, in order to know which is the first virtual register, you should
-   access <tt>TargetRegisterInfo::FirstVirtualRegister</tt>. Any register whose
-   number is greater than or equal
-   to <tt>TargetRegisterInfo::FirstVirtualRegister</tt> is considered a virtual
-   register. Whereas physical registers are statically defined in
-   a <tt>TargetRegisterInfo.td</tt> file and cannot be created by the
-   application developer, that is not the case with virtual registers.  In order
-   to create new virtual registers, use the
+   registers, different virtual registers never share the same number. Whereas
+   physical registers are statically defined in a <tt>TargetRegisterInfo.td</tt>
+   file and cannot be created by the application developer, that is not the case
+   with virtual registers. In order to create new virtual registers, use the
    method <tt>MachineRegisterInfo::createVirtualRegister()</tt>. This method
-   will return a virtual register with the highest code.</p>
+   will return a new virtual register. Use an <tt>IndexedMap&lt;Foo,
+   VirtReg2IndexFunctor&gt;</tt> to hold information per virtual register. If you
+   need to enumerate all virtual registers, use the function
+   <tt>TargetRegisterInfo::index2VirtReg()</tt> to find the virtual register
+   numbers:</p>
+
+<div class="doc_code">
+<pre>
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
+    stuff(VirtReg);
+  }
+</pre>
+</div>
 
 <p>Before register allocation, the operands of an instruction are mostly virtual
    registers, although physical registers may also be used. In order to check if
@@ -1635,25 +1815,228 @@ $ llc -regalloc=pbqp file.bc -o pbqp.s;
   <a name="latemco">Late Machine Code Optimizations</a>
 </div>
 <div class="doc_text"><p>To Be Written</p></div>
+
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="codeemit">Code Emission</a>
 </div>
-<div class="doc_text"><p>To Be Written</p></div>
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="codeemit_asm">Generating Assembly Code</a>
+
+<div class="doc_text">
+
+<p>The code emission step of code generation is responsible for lowering from
+the code generator abstractions (like <a 
+href="#machinefunction">MachineFunction</a>, <a 
+href="#machineinstr">MachineInstr</a>, etc) down
+to the abstractions used by the MC layer (<a href="#mcinst">MCInst</a>, 
+<a href="#mcstreamer">MCStreamer</a>, etc).  This is
+done with a combination of several different classes: the (misnamed)
+target-independent AsmPrinter class, target-specific subclasses of AsmPrinter
+(such as SparcAsmPrinter), and the TargetLoweringObjectFile class.</p>
+
+<p>Since the MC layer works at the level of abstraction of object files, it
+doesn't have a notion of functions, global variables etc.  Instead, it thinks
+about labels, directives, and instructions.  A key class used at this time is
+the MCStreamer class.  This is an abstract API that is implemented in different
+ways (e.g. to output a .s file, output an ELF .o file, etc) that is effectively
+an "assembler API".  MCStreamer has one method per directive, such as EmitLabel,
+EmitSymbolAttribute, SwitchSection, etc, which directly correspond to assembly
+level directives.
+</p>
+
+<p>If you are interested in implementing a code generator for a target, there
+are three important things that you have to implement for your target:</p>
+
+<ol>
+<li>First, you need a subclass of AsmPrinter for your target.  This class
+implements the general lowering process converting MachineFunction's into MC
+label constructs.  The AsmPrinter base class provides a number of useful methods
+and routines, and also allows you to override the lowering process in some
+important ways.  You should get much of the lowering for free if you are
+implementing an ELF, COFF, or MachO target, because the TargetLoweringObjectFile
+class implements much of the common logic.</li>
+
+<li>Second, you need to implement an instruction printer for your target.  The
+instruction printer takes an <a href="#mcinst">MCInst</a> and renders it to a
+raw_ostream as text.  Most of this is automatically generated from the .td file
+(when you specify something like "<tt>add $dst, $src1, $src2</tt>" in the
+instructions), but you need to implement routines to print operands.</li>
+
+<li>Third, you need to implement code that lowers a <a
+href="#machineinstr">MachineInstr</a> to an MCInst, usually implemented in
+"&lt;target&gt;MCInstLower.cpp".  This lowering process is often target
+specific, and is responsible for turning jump table entries, constant pool
+indices, global variable addresses, etc into MCLabels as appropriate.  This
+translation layer is also responsible for expanding pseudo ops used by the code
+generator into the actual machine instructions they correspond to. The MCInsts
+that are generated by this are fed into the instruction printer or the encoder.
+</li>
+
+</ol>
+
+<p>Finally, at your choosing, you can also implement an subclass of
+MCCodeEmitter which lowers MCInst's into machine code bytes and relocations.
+This is important if you want to support direct .o file emission, or would like
+to implement an assembler for your target.</p>
+
 </div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="nativeassembler">Implementing a Native Assembler</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Though you're probably reading this because you want to write or maintain a
+compiler backend, LLVM also fully supports building a native assemblers too.
+We've tried hard to automate the generation of the assembler from the .td files
+(in particular the instruction syntax and encodings), which means that a large
+part of the manual and repetitive data entry can be factored and shared with the
+compiler.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection" id="na_instparsing">Instruction Parsing</div>
+
 <div class="doc_text"><p>To Be Written</p></div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection" id="na_instaliases">
+  Instruction Alias Processing
+</div>
+
+<div class="doc_text">
+<p>Once the instruction is parsed, it enters the MatchInstructionImpl function.
+The MatchInstructionImpl function performs alias processing and then does
+actual matching.</p>
+
+<p>Alias processing is the phase that canonicalizes different lexical forms of
+the same instructions down to one representation.  There are several different
+kinds of alias that are possible to implement and they are listed below in the
+order that they are processed (which is in order from simplest/weakest to most
+complex/powerful).  Generally you want to use the first alias mechanism that
+meets the needs of your instruction, because it will allow a more concise
+description.</p>
+
+</div>
+
 <!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection">
-  <a name="codeemit_bin">Generating Binary Machine Code</a>
+<div class="doc_subsubsection">Mnemonic Aliases</div>
+
+<div class="doc_text">
+
+<p>The first phase of alias processing is simple instruction mnemonic
+remapping for classes of instructions which are allowed with two different
+mnemonics.  This phase is a simple and unconditionally remapping from one input
+mnemonic to one output mnemonic.  It isn't possible for this form of alias to
+look at the operands at all, so the remapping must apply for all forms of a
+given mnemonic.  Mnemonic aliases are defined simply, for example X86 has:
+</p>
+
+<div class="doc_code">
+<pre>
+def : MnemonicAlias&lt;"cbw",     "cbtw"&gt;;
+def : MnemonicAlias&lt;"smovq",   "movsq"&gt;;
+def : MnemonicAlias&lt;"fldcww",  "fldcw"&gt;;
+def : MnemonicAlias&lt;"fucompi", "fucomip"&gt;;
+def : MnemonicAlias&lt;"ud2a",    "ud2"&gt;;
+</pre>
+</div>
+
+<p>... and many others.  With a MnemonicAlias definition, the mnemonic is
+remapped simply and directly.  Though MnemonicAlias's can't look at any aspect
+of the instruction (such as the operands) they can depend on global modes (the
+same ones supported by the matcher), through a Requires clause:</p>
+
+<div class="doc_code">
+<pre>
+def : MnemonicAlias&lt;"pushf", "pushfq"&gt;, Requires&lt;[In64BitMode]&gt;;
+def : MnemonicAlias&lt;"pushf", "pushfl"&gt;, Requires&lt;[In32BitMode]&gt;;
+</pre>
 </div>
 
+<p>In this example, the mnemonic gets mapped into different a new one depending
+on the current instruction set.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Instruction Aliases</div>
+
 <div class="doc_text">
-   <p>For the JIT or <tt>.o</tt> file writer</p>
+
+<p>The most general phase of alias processing occurs while matching is
+happening: it provides new forms for the matcher to match along with a specific
+instruction to generate.  An instruction alias has two parts: the string to
+match and the instruction to generate.  For example:
+</p>
+
+<div class="doc_code">
+<pre>
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8  :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX32rr8  GR32:$dst, GR8  :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr8  GR64:$dst, GR8  :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)&gt;;
+</pre>
 </div>
 
+<p>This shows a powerful example of the instruction aliases, matching the
+same mnemonic in multiple different ways depending on what operands are present
+in the assembly.  The result of instruction aliases can include operands in a
+different order than the destination instruction, and can use an input
+multiple times, for example:</p>
+
+<div class="doc_code">
+<pre>
+def : InstAlias&lt;"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)&gt;;
+def : InstAlias&lt;"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)&gt;;
+def : InstAlias&lt;"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)&gt;;
+def : InstAlias&lt;"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)&gt;;
+</pre>
+</div>
+
+<p>This example also shows that tied operands are only listed once.  In the X86
+backend, XOR8rr has two input GR8's and one output GR8 (where an input is tied
+to the output).  InstAliases take a flattened operand list without duplicates
+for tied operands.  The result of an instruction alias can also use immediates
+and fixed physical registers which are added as simple immediate operands in the
+result, for example:</p>
+
+<div class="doc_code">
+<pre>
+// Fixed Immediate operand.
+def : InstAlias&lt;"aad", (AAD8i8 10)&gt;;
+
+// Fixed register operand.
+def : InstAlias&lt;"fcomi", (COM_FIr ST1)&gt;;
+
+// Simple alias.
+def : InstAlias&lt;"fcomi $reg", (COM_FIr RST:$reg)&gt;;
+</pre>
+</div>
+
+
+<p>Instruction aliases can also have a Requires clause to make them
+subtarget specific.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection" id="na_matching">Instruction Matching</div>
+
+<div class="doc_text"><p>To Be Written</p></div>
+
+
+
 
 <!-- *********************************************************************** -->
 <div class="doc_section">
@@ -1664,10 +2047,275 @@ $ llc -regalloc=pbqp file.bc -o pbqp.s;
 <div class="doc_text">
 
 <p>This section of the document explains features or design decisions that are
-   specific to the code generator for a particular target.</p>
+   specific to the code generator for a particular target.  First we start
+   with a table that summarizes what features are supported by each target.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetfeatures">Target Feature Matrix</a>
+</div>
+
+<div class="doc_text">
+
+<p>Note that this table does not include the C backend or Cpp backends, since
+they do not use the target independent code generator infrastructure.  It also
+doesn't list features that are not supported fully by any target yet.  It
+considers a feature to be supported if at least one subtarget supports it.  A
+feature being supported means that it is useful and works for most cases, it
+does not indicate that there are zero known bugs in the implementation.  Here
+is the key:</p>
+
+
+<table border="1" cellspacing="0">
+  <tr>
+    <th>Unknown</th>
+    <th>No support</th>
+    <th>Partial Support</th>
+    <th>Complete Support</th>
+  </tr>
+  <tr>
+    <td class="unknown"></td>
+    <td class="no"></td>
+    <td class="partial"></td>
+    <td class="yes"></td>
+  </tr>
+</table>
+
+<p>Here is the table:</p>
+
+<table width="689" border="1" cellspacing="0">
+<tr><td></td>
+<td colspan="13" align="center" style="background-color:#ffc">Target</td>
+</tr>
+  <tr>
+    <th>Feature</th>
+    <th>ARM</th>
+    <th>Alpha</th>
+    <th>Blackfin</th>
+    <th>CellSPU</th>
+    <th>MBlaze</th>
+    <th>MSP430</th>
+    <th>Mips</th>
+    <th>PTX</th>
+    <th>PowerPC</th>
+    <th>Sparc</th>
+    <th>SystemZ</th>
+    <th>X86</th>
+    <th>XCore</th>
+  </tr>
+
+<tr>
+  <td><a href="#feat_reliable">is generally reliable</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="unknown"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="yes"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_asmparser">assembly parser</a></td>
+  <td class="no"></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="no"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_disassembler">disassembler</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="no"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_inlineasm">inline asm</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="unknown"></td> <!-- Alpha -->
+  <td class="yes"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="unknown"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="unknown"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"><a href="#feat_inlineasm_x86">*</a></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_jit">jit</a></td>
+  <td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="unknown"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="unknown"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>
+  <td class="no"></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="no"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_tailcall">tail calls</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="unknown"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="unknown"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="unknown"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_reliable">Is Generally Reliable</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target is considered to be production quality.
+This indicates that the target has been used as a static compiler to
+compile large amounts of code by a variety of different people and is in
+continuous use.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_asmparser">Assembly Parser</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports parsing target specific .s
+files by implementing the MCAsmParser interface.  This is required for llvm-mc
+to be able to act as a native assembler and is required for inline assembly
+support in the native .o file writer.</p>
 
 </div>
 
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_disassembler">Disassembler</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports the MCDisassembler API for
+disassembling machine opcode bytes into MCInst's.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_inlineasm">Inline Asm</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports most popular inline assembly
+constraints and modifiers.</p>
+
+<p id="feat_inlineasm_x86">X86 lacks reliable support for inline assembly
+constraints relating to the X86 floating point stack.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_jit">JIT Support</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports the JIT compiler through
+the ExecutionEngine interface.</p>
+
+<p id="feat_jit_arm">The ARM backend has basic support for integer code
+in ARM codegen mode, but lacks NEON and full Thumb support.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_objectwrite">.o File Writing</div>
+
+<div class="doc_text">
+
+<p>This box indicates whether the target supports writing .o files (e.g. MachO,
+ELF, and/or COFF) files directly from the target.  Note that the target also
+must include an assembly parser and general inline assembly support for full
+inline assembly support in the .o writer.</p>
+
+<p>Targets that don't support this feature can obviously still write out .o
+files, they just rely on having an external assembler to translate from a .s
+file to a .o file (as is the case for many C compilers).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_tailcall">Tail Calls</div>
+
+<div class="doc_text">
+
+<p>This box indicates whether the target supports guaranteed tail calls.  These
+are calls marked "<a href="LangRef.html#i_call">tail</a>" and use the fastcc
+calling convention.  Please see the <a href="#tailcallopt">tail call section
+more more details</a>.</p>
+
+</div>
+
+
+
+
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="tailcallopt">Tail call optimization</a>
@@ -2162,7 +2810,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-01 00:01:07 +0200 (Wed, 01 Sep 2010) $
+  Last modified: $Date: 2011-01-09 00:10:59 +0100 (Sun, 09 Jan 2011) $
 </address>
 
 </body>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
index bb88a91772aa..4a9ab7d857a8 100644
--- a/docs/CodingStandards.html
+++ b/docs/CodingStandards.html
@@ -29,37 +29,39 @@
           <li><a href="#ci_warningerrors">Treat Compiler Warnings Like
               Errors</a></li>
           <li><a href="#ci_portable_code">Write Portable Code</a></li>
-          <li><a href="#ci_class_struct">Use of class/struct Keywords</a></li>
+          <li><a href="#ci_rtti_exceptions">Do not use RTTI or Exceptions</a></li>
+          <li><a href="#ci_class_struct">Use of <tt>class</tt>/<tt>struct</tt> Keywords</a></li>
         </ol></li>
     </ol></li>
   <li><a href="#styleissues">Style Issues</a>
     <ol>
-      <li><a href="#macro">The High Level Issues</a>
+      <li><a href="#macro">The High-Level Issues</a>
         <ol>
           <li><a href="#hl_module">A Public Header File <b>is</b> a
               Module</a></li>
-          <li><a href="#hl_dontinclude">#include as Little as Possible</a></li>
+          <li><a href="#hl_dontinclude"><tt>#include</tt> as Little as Possible</a></li>
           <li><a href="#hl_privateheaders">Keep "internal" Headers
               Private</a></li>
-          <li><a href="#hl_earlyexit">Use Early Exits and 'continue' to Simplify
+          <li><a href="#hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify
               Code</a></li>
-          <li><a href="#hl_else_after_return">Don't use "else" after a
-              return</a></li>
+          <li><a href="#hl_else_after_return">Don't use <tt>else</tt> after a
+              <tt>return</tt></a></li>
           <li><a href="#hl_predicateloops">Turn Predicate Loops into Predicate
               Functions</a></li>
         </ol></li>
-      <li><a href="#micro">The Low Level Issues</a>
+      <li><a href="#micro">The Low-Level Issues</a>
         <ol>
+          <li><a href="#ll_naming">Name Types, Functions, Variables, and Enumerators Properly</a></li>
           <li><a href="#ll_assert">Assert Liberally</a></li>
-          <li><a href="#ll_ns_std">Do not use 'using namespace std'</a></li>
+          <li><a href="#ll_ns_std">Do not use '<tt>using namespace std</tt>'</a></li>
           <li><a href="#ll_virtual_anch">Provide a virtual method anchor for
               classes in headers</a></li>
-          <li><a href="#ll_end">Don't evaluate end() every time through a
+          <li><a href="#ll_end">Don't evaluate <tt>end()</tt> every time through a
               loop</a></li>
           <li><a href="#ll_iostream"><tt>#include &lt;iostream&gt;</tt> is
               <em>forbidden</em></a></li>
+          <li><a href="#ll_raw_ostream">Use <tt>raw_ostream</tt></a></li>
           <li><a href="#ll_avoidendl">Avoid <tt>std::endl</tt></a></li>
-          <li><a href="#ll_raw_ostream">Use <tt>raw_ostream</tt></a</li>
         </ol></li>
         
       <li><a href="#nano">Microscopic Details</a>
@@ -167,8 +169,8 @@ this:</p>
 
 <p>A few things to note about this particular format:  The "<tt>-*- C++
 -*-</tt>" string on the first line is there to tell Emacs that the source file
-is a C++ file, not a C file (Emacs assumes .h files are C files by default).
-Note that this tag is not necessary in .cpp files.  The name of the file is also
+is a C++ file, not a C file (Emacs assumes <tt>.h</tt> files are C files by default).
+Note that this tag is not necessary in <tt>.cpp</tt> files.  The name of the file is also
 on the first line, along with a very short description of the purpose of the
 file.  This is important when printing out code and flipping though lots of
 pages.</p>
@@ -217,7 +219,7 @@ require less typing, don't have nesting problems, etc.  There are a few cases
 when it is useful to use C style (<tt>/* */</tt>) comments however:</p>
 
 <ol>
-  <li>When writing a C code: Obviously if you are writing C code, use C style
+  <li>When writing C code: Obviously if you are writing C code, use C style
       comments.</li>
   <li>When writing a header file that may be <tt>#include</tt>d by a C source
       file.</li>
@@ -244,12 +246,12 @@ file should be listed.  We prefer these <tt>#include</tt>s to be listed in this
 order:</p>
 
 <ol>
-  <li><a href="#mmheader">Main Module header</a></li>
+  <li><a href="#mmheader">Main Module Header</a></li>
   <li><a href="#hl_privateheaders">Local/Private Headers</a></li>
   <li><tt>llvm/*</tt></li>
   <li><tt>llvm/Analysis/*</tt></li>
   <li><tt>llvm/Assembly/*</tt></li>
-  <li><tt>llvm/Bytecode/*</tt></li>
+  <li><tt>llvm/Bitcode/*</tt></li>
   <li><tt>llvm/CodeGen/*</tt></li>
   <li>...</li>
   <li><tt>Support/*</tt></li>
@@ -257,15 +259,15 @@ order:</p>
   <li>System <tt>#includes</tt></li>
 </ol>
 
-<p>... and each category should be sorted by name.</p>
+<p>and each category should be sorted by name.</p>
 
-<p><a name="mmheader">The "Main Module Header"</a> file applies to .cpp file
-which implement an interface defined by a .h file.  This <tt>#include</tt>
+<p><a name="mmheader">The "Main Module Header"</a> file applies to <tt>.cpp</tt> files
+which implement an interface defined by a <tt>.h</tt> file.  This <tt>#include</tt>
 should always be included <b>first</b> regardless of where it lives on the file
-system.  By including a header file first in the .cpp files that implement the
+system.  By including a header file first in the <tt>.cpp</tt> files that implement the
 interfaces, we ensure that the header does not have any hidden dependencies
 which are not explicitly #included in the header, but should be.  It is also a
-form of documentation in the .cpp file to indicate where the interfaces it
+form of documentation in the <tt>.cpp</tt> file to indicate where the interfaces it
 implements are defined.</p>
 
 </div>
@@ -290,7 +292,7 @@ value and would be detrimental to printing out code.  Also many other projects
 have standardized on 80 columns, so some people have already configured their
 editors for it (vs something else, like 90 columns).</p>
 
-<p>This is one of many contentious issues in coding standards, but is not up
+<p>This is one of many contentious issues in coding standards, but it is not up
 for debate.</p>
 
 </div>
@@ -304,12 +306,12 @@ for debate.</p>
 
 <p>In all cases, prefer spaces to tabs in source files.  People have different
 preferred indentation levels, and different styles of indentation that they
-like... this is fine.  What isn't is that different editors/viewers expand tabs
-out to different tab stops.  This can cause your code to look completely
+like; this is fine.  What isn't fine is that different editors/viewers expand
+tabs out to different tab stops.  This can cause your code to look completely
 unreadable, and it is not worth dealing with.</p>
 
 <p>As always, follow the <a href="#goldenrule">Golden Rule</a> above: follow the
-style of existing code if your are modifying and extending it.  If you like four
+style of existing code if you are modifying and extending it.  If you like four
 spaces of indentation, <b>DO NOT</b> do that in the middle of a chunk of code
 with two spaces of indentation.  Also, do not reindent a whole source file: it
 makes for incredible diffs that are absolutely worthless.</p>
@@ -323,7 +325,7 @@ makes for incredible diffs that are absolutely worthless.</p>
 
 <div class="doc_text">
 
-<p>Okay, your first year of programming you were told that indentation is
+<p>Okay, in your first year of programming you were told that indentation is
 important.  If you didn't believe and internalize this then, now is the time.
 Just do it.</p>
 
@@ -343,17 +345,17 @@ Just do it.</p>
 
 <div class="doc_text">
 
-<p>If your code has compiler warnings in it, something is wrong: you aren't
-casting values correctly, your have "questionable" constructs in your code, or
-you are doing something legitimately wrong.  Compiler warnings can cover up
-legitimate errors in output and make dealing with a translation unit
+<p>If your code has compiler warnings in it, something is wrong &mdash; you
+aren't casting values correctly, your have "questionable" constructs in your
+code, or you are doing something legitimately wrong.  Compiler warnings can
+cover up legitimate errors in output and make dealing with a translation unit
 difficult.</p>
 
 <p>It is not possible to prevent all warnings from all compilers, nor is it
 desirable.  Instead, pick a standard compiler (like <tt>gcc</tt>) that provides
-a good thorough set of warnings, and stick to them.  At least in the case of
+a good thorough set of warnings, and stick to it.  At least in the case of
 <tt>gcc</tt>, it is possible to work around any spurious errors by changing the
-syntax of the code slightly.  For example, an warning that annoys me occurs when
+syntax of the code slightly.  For example, a warning that annoys me occurs when
 I write code like this:</p>
 
 <div class="doc_code">
@@ -377,11 +379,16 @@ if ((V = getValue())) {
 </pre>
 </div>
 
-<p>...which shuts <tt>gcc</tt> up.  Any <tt>gcc</tt> warning that annoys you can
+<p>which shuts <tt>gcc</tt> up.  Any <tt>gcc</tt> warning that annoys you can
 be fixed by massaging the code appropriately.</p>
 
-<p>These are the <tt>gcc</tt> warnings that I prefer to enable: <tt>-Wall
--Winline -W -Wwrite-strings -Wno-unused</tt></p>
+<p>These are the <tt>gcc</tt> warnings that I prefer to enable:</p>
+
+<div class="doc_code">
+<pre>
+-Wall -Winline -W -Wwrite-strings -Wno-unused
+</pre>
+</div>
 
 </div>
 
@@ -397,9 +404,31 @@ portable code.  If there are cases where it isn't possible to write portable
 code, isolate it behind a well defined (and well documented) interface.</p>
 
 <p>In practice, this means that you shouldn't assume much about the host
-compiler, including its support for "high tech" features like partial
-specialization of templates.  If these features are used, they should only be
-an implementation detail of a library which has a simple exposed API.</p>
+compiler, and Visual Studio tends to be the lowest common denominator.
+If advanced features are used, they should only be an implementation detail of 
+a library which has a simple exposed API, and preferably be buried in 
+libSystem.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+<a name="ci_rtti_exceptions">Do not use RTTI or Exceptions</a>
+</div>
+<div class="doc_text">
+
+<p>In an effort to reduce code and executable size, LLVM does not use RTTI
+(e.g. <tt>dynamic_cast&lt;&gt;</tt>) or exceptions.  These two language features
+violate the general C++ principle of <i>"you only pay for what you use"</i>,
+causing executable bloat even if exceptions are never used in the code base, or
+if RTTI is never used for a class.  Because of this, we turn them off globally
+in the code.</p>
+
+<p>That said, LLVM does make extensive use of a hand-rolled form of RTTI that
+use templates like <a href="ProgrammersManual.html#isa"><tt>isa&lt;&gt;</tt>,
+<tt>cast&lt;&gt;</tt>, and <tt>dyn_cast&lt;&gt;</tt></a>.  This form of RTTI is
+opt-in and can be added to any class.  It is also substantially more efficient
+than <tt>dynamic_cast&lt;&gt;</tt>.</p>
 
 </div>
 
@@ -419,8 +448,9 @@ different symbols based on whether <tt>class</tt> or <tt>struct</tt> was used to
 declare the symbol.  This can lead to problems at link time.</p> 
 
 <p>So, the rule for LLVM is to always use the <tt>class</tt> keyword, unless
-<b>all</b> members are public and the type is a C++ "POD" type, in which case 
-<tt>struct</tt> is allowed.</p>
+<b>all</b> members are public and the type is a C++
+<a href="http://en.wikipedia.org/wiki/Plain_old_data_structure">POD</a> type, in
+which case <tt>struct</tt> is allowed.</p>
 
 </div>
 
@@ -433,7 +463,7 @@ declare the symbol.  This can lead to problems at link time.</p>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="macro">The High Level Issues</a>
+  <a name="macro">The High-Level Issues</a>
 </div>
 <!-- ======================================================================= -->
 
@@ -448,20 +478,20 @@ declare the symbol.  This can lead to problems at link time.</p>
 <p>C++ doesn't do too well in the modularity department.  There is no real
 encapsulation or data hiding (unless you use expensive protocol classes), but it
 is what we have to work with.  When you write a public header file (in the LLVM
-source tree, they live in the top level "include" directory), you are defining a
-module of functionality.</p>
+source tree, they live in the top level "<tt>include</tt>" directory), you are
+defining a module of functionality.</p>
 
 <p>Ideally, modules should be completely independent of each other, and their
-header files should only include the absolute minimum number of headers
-possible. A module is not just a class, a function, or a namespace: <a
-href="http://www.cuj.com/articles/2000/0002/0002c/0002c.htm">it's a collection
-of these</a> that defines an interface.  This interface may be several
-functions, classes or data structures, but the important issue is how they work
-together.</p>
-
-<p>In general, a module should be implemented with one or more <tt>.cpp</tt>
+header files should only <tt>#include</tt> the absolute minimum number of
+headers possible. A module is not just a class, a function, or a
+namespace: <a href="http://www.cuj.com/articles/2000/0002/0002c/0002c.htm">it's
+a collection of these</a> that defines an interface.  This interface may be
+several functions, classes, or data structures, but the important issue is how
+they work together.</p>
+
+<p>In general, a module should be implemented by one or more <tt>.cpp</tt>
 files.  Each of these <tt>.cpp</tt> files should include the header that defines
-their interface first.  This ensure that all of the dependences of the module
+their interface first.  This ensures that all of the dependences of the module
 header have been properly added to the module header itself, and are not
 implicit.  System headers should be included after user headers for a
 translation unit.</p>
@@ -478,29 +508,28 @@ translation unit.</p>
 <p><tt>#include</tt> hurts compile time performance.  Don't do it unless you
 have to, especially in header files.</p>
 
-<p>But wait, sometimes you need to have the definition of a class to use it, or
+<p>But wait! Sometimes you need to have the definition of a class to use it, or
 to inherit from it.  In these cases go ahead and <tt>#include</tt> that header
 file.  Be aware however that there are many cases where you don't need to have
 the full definition of a class.  If you are using a pointer or reference to a
 class, you don't need the header file.  If you are simply returning a class
 instance from a prototyped function or method, you don't need it.  In fact, for
-most cases, you simply don't need the definition of a class... and not
+most cases, you simply don't need the definition of a class. And not
 <tt>#include</tt>'ing speeds up compilation.</p>
 
 <p>It is easy to try to go too overboard on this recommendation, however.  You
-<b>must</b> include all of the header files that you are using -- you can 
-include them either directly
-or indirectly (through another header file).  To make sure that you don't
-accidentally forget to include a header file in your module header, make sure to
-include your module header <b>first</b> in the implementation file (as mentioned
-above).  This way there won't be any hidden dependencies that you'll find out
-about later...</p>
+<b>must</b> include all of the header files that you are using &mdash; you can
+include them either directly or indirectly (through another header file).  To
+make sure that you don't accidentally forget to include a header file in your
+module header, make sure to include your module header <b>first</b> in the
+implementation file (as mentioned above).  This way there won't be any hidden
+dependencies that you'll find out about later.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="hl_privateheaders">Keep "internal" Headers Private</a>
+  <a name="hl_privateheaders">Keep "Internal" Headers Private</a>
 </div>
 
 <div class="doc_text">
@@ -508,20 +537,20 @@ about later...</p>
 <p>Many modules have a complex implementation that causes them to use more than
 one implementation (<tt>.cpp</tt>) file.  It is often tempting to put the
 internal communication interface (helper classes, extra functions, etc) in the
-public module header file.  Don't do this.</p>
+public module header file.  Don't do this!</p>
 
 <p>If you really need to do something like this, put a private header file in
 the same directory as the source files, and include it locally.  This ensures
 that your private interface remains private and undisturbed by outsiders.</p>
 
-<p>Note however, that it's okay to put extra implementation methods a public
-class itself... just make them private (or protected), and all is well.</p>
+<p>Note however, that it's okay to put extra implementation methods in a public
+class itself. Just make them private (or protected) and all is well.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="hl_earlyexit">Use Early Exits and 'continue' to Simplify Code</a>
+  <a name="hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify Code</a>
 </div>
 
 <div class="doc_text">
@@ -530,8 +559,8 @@ class itself... just make them private (or protected), and all is well.</p>
 decisions have to be remembered by the reader to understand a block of code.
 Aim to reduce indentation where possible when it doesn't make it more difficult
 to understand the code.  One great way to do this is by making use of early
-exits and the 'continue' keyword in long loops.  As an example of using an early
-exit from a function, consider this "bad" code:</p>
+exits and the <tt>continue</tt> keyword in long loops.  As an example of using
+an early exit from a function, consider this "bad" code:</p>
 
 <div class="doc_code">
 <pre>
@@ -546,23 +575,23 @@ Value *DoSomething(Instruction *I) {
 </pre>
 </div>
 
-<p>This code has several problems if the body of the 'if' is large.  When you're
-looking at the top of the function, it isn't immediately clear that this
-<em>only</em> does interesting things with non-terminator instructions, and only
-applies to things with the other predicates.  Second, it is relatively difficult
-to describe (in comments) why these predicates are important because the if
-statement makes it difficult to lay out the comments.  Third, when you're deep
-within the body of the code, it is indented an extra level.   Finally, when
-reading the top of the function, it isn't clear what the result is if the
-predicate isn't true, you have to read to the end of the function to know that
-it returns null.</p>
+<p>This code has several problems if the body of the '<tt>if</tt>' is large.
+When you're looking at the top of the function, it isn't immediately clear that
+this <em>only</em> does interesting things with non-terminator instructions, and
+only applies to things with the other predicates.  Second, it is relatively
+difficult to describe (in comments) why these predicates are important because
+the <tt>if</tt> statement makes it difficult to lay out the comments.  Third,
+when you're deep within the body of the code, it is indented an extra level.
+Finally, when reading the top of the function, it isn't clear what the result is
+if the predicate isn't true; you have to read to the end of the function to know
+that it returns null.</p>
 
 <p>It is much preferred to format the code like this:</p>
 
 <div class="doc_code">
 <pre>
 Value *DoSomething(Instruction *I) {
-  // Terminators never need 'something' done to them because, ... 
+  // Terminators never need 'something' done to them because ... 
   if (isa&lt;TerminatorInst&gt;(I))
     return 0;
 
@@ -580,7 +609,7 @@ Value *DoSomething(Instruction *I) {
 </pre>
 </div>
 
-<p>This fixes these problems.  A similar problem frequently happens in for
+<p>This fixes these problems.  A similar problem frequently happens in <tt>for</tt>
 loops.  A silly example is something like this:</p>
 
 <div class="doc_code">
@@ -597,14 +626,13 @@ loops.  A silly example is something like this:</p>
 </pre>
 </div>
 
-<p>When you have very very small loops, this sort of structure is fine, but if
+<p>When you have very, very small loops, this sort of structure is fine. But if
 it exceeds more than 10-15 lines, it becomes difficult for people to read and
-understand at a glance.
-The problem with this sort of code is that it gets very nested very quickly,
-meaning that the reader of the code has to keep a lot of context in their brain
-to remember what is going immediately on in the loop, because they don't know
-if/when the if conditions will have elses etc.  It is strongly preferred to
-structure the loop like this:</p>
+understand at a glance. The problem with this sort of code is that it gets very
+nested very quickly. Meaning that the reader of the code has to keep a lot of
+context in their brain to remember what is going immediately on in the loop,
+because they don't know if/when the <tt>if</tt> conditions will have elses etc.
+It is strongly preferred to structure the loop like this:</p>
 
 <div class="doc_code">
 <pre>
@@ -615,30 +643,32 @@ structure the loop like this:</p>
     Value *LHS = BO-&gt;getOperand(0);
     Value *RHS = BO-&gt;getOperand(1);
     if (LHS == RHS) continue;
+
+    ...
   }
 </pre>
 </div>
 
-<p>This has all the benefits of using early exits from functions: it reduces
+<p>This has all the benefits of using early exits for functions: it reduces
 nesting of the loop, it makes it easier to describe why the conditions are true,
-and it makes it obvious to the reader that there is no "else" coming up that
-they have to push context into their brain for.  If a loop is large, this can
-be a big understandability win.</p>
+and it makes it obvious to the reader that there is no <tt>else</tt> coming up
+that they have to push context into their brain for.  If a loop is large, this
+can be a big understandability win.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="hl_else_after_return">Don't use "else" after a return</a>
+  <a name="hl_else_after_return">Don't use <tt>else</tt> after a <tt>return</tt></a>
 </div>
 
 <div class="doc_text">
 
 <p>For similar reasons above (reduction of indentation and easier reading),
-   please do not use "else" or "else if" after something that interrupts
-   control flow like return, break, continue, goto, etc.  For example, this is
-   "bad":</p>
-   
+please do not use '<tt>else</tt>' or '<tt>else if</tt>' after something that
+interrupts control flow &mdash; like <tt>return</tt>, <tt>break</tt>,
+<tt>continue</tt>, <tt>goto</tt>, etc. For example, this is <em>bad</em>:</p>
+
 <div class="doc_code">
 <pre>
   case 'J': {
@@ -647,24 +677,24 @@ be a big understandability win.</p>
       if (Type.isNull()) {
         Error = ASTContext::GE_Missing_sigjmp_buf;
         return QualType();
-      } else {
+      <b>} else {
         break;
-      }
+      }</b>
     } else {
       Type = Context.getjmp_bufType();
       if (Type.isNull()) {
         Error = ASTContext::GE_Missing_jmp_buf;
         return QualType();
-      } else {
+      <b>} else {
         break;
-      }
+      }</b>
     }
   }
   }
 </pre>
 </div>
 
-<p>It is better to write this something like:</p>
+<p>It is better to write it like this:</p>
 
 <div class="doc_code">
 <pre>
@@ -682,11 +712,11 @@ be a big understandability win.</p>
         return QualType();
       }
     }
-    break;
+    <b>break;</b>
 </pre>
 </div>
 
-<p>Or better yet (in this case), as:</p>
+<p>Or better yet (in this case) as:</p>
 
 <div class="doc_code">
 <pre>
@@ -701,12 +731,12 @@ be a big understandability win.</p>
                        ASTContext::GE_Missing_jmp_buf;
       return QualType();
     }
-    break;
+    <b>break;</b>
 </pre>
 </div>
 
 <p>The idea is to reduce indentation and the amount of code you have to keep
-   track of when reading the code.</p>
+track of when reading the code.</p>
               
 </div>
 
@@ -717,9 +747,9 @@ be a big understandability win.</p>
 
 <div class="doc_text">
 
-<p>It is very common to write small loops that just compute a boolean
-   value.  There are a number of ways that people commonly write these, but an
-   example of this sort of thing is:</p>
+<p>It is very common to write small loops that just compute a boolean value.
+There are a number of ways that people commonly write these, but an example of
+this sort of thing is:</p>
    
 <div class="doc_code">
 <pre>
@@ -740,9 +770,7 @@ be a big understandability win.</p>
 Instead of this sort of loop, we strongly prefer to use a predicate function
 (which may be <a href="#micro_anonns">static</a>) that uses
 <a href="#hl_earlyexit">early exits</a> to compute the predicate.  We prefer
-the code to be structured like this:
-</p>
-
+the code to be structured like this:</p>
 
 <div class="doc_code">
 <pre>
@@ -777,11 +805,94 @@ locality.</p>
 
 <!-- ======================================================================= -->
 <div class="doc_subsection">
-  <a name="micro">The Low Level Issues</a>
+  <a name="micro">The Low-Level Issues</a>
 </div>
 <!-- ======================================================================= -->
 
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_naming">Name Types, Functions, Variables, and Enumerators Properly</a>
+</div>
+
+<div class="doc_text">
+
+<p>Poorly-chosen names can mislead the reader and cause bugs. We cannot stress
+enough how important it is to use <em>descriptive</em> names.  Pick names that
+match the semantics and role of the underlying entities, within reason.  Avoid
+abbreviations unless they are well known.  After picking a good name, make sure
+to use consistent capitalization for the name, as inconsistency requires clients
+to either memorize the APIs or to look it up to find the exact spelling.</p>
+
+<p>In general, names should be in camel case (e.g. <tt>TextFileReader</tt>
+and <tt>isLValue()</tt>).  Different kinds of declarations have different
+rules:</p>
+
+<ul>
+<li><p><b>Type names</b> (including classes, structs, enums, typedefs, etc)
+  should be nouns and start with an upper-case letter (e.g.
+  <tt>TextFileReader</tt>).</p></li>
+  
+<li><p><b>Function names</b> should be verb phrases (as they represent
+    actions), and command-like function should be imperative.  The name should
+    be camel case, and start with a lower case letter (e.g. <tt>openFile()</tt>
+    or <tt>isFoo()</tt>).</p></li>
+
+<li><p><b>Enum declarations</b> (e.g. <tt>enum Foo {...}</tt>) are types, so
+    they should follow the naming conventions for types.  A common use for enums
+    is as a discriminator for a union, or an indicator of a subclass.  When an
+    enum is used for something like this, it should have a <tt>Kind</tt> suffix
+    (e.g. <tt>ValueKind</tt>).</p></li>
+  
+<li><p><b>Enumerators</b> (e.g. <tt>enum { Foo, Bar }</tt>) and <b>public member
+    variables</b> should start with an upper-case letter, just like types.
+    Unless the enumerators are defined in their own small namespace or inside a
+    class, enumerators should have a prefix corresponding to the enum
+    declaration name.  For example, <tt>enum ValueKind { ... };</tt> may contain
+    enumerators like <tt>VK_Argument</tt>, <tt>VK_BasicBlock</tt>, etc.
+    Enumerators that are just convenience constants are exempt from the
+    requirement for a prefix.  For instance:</p>
+
+<div class="doc_code">
+<pre>
+enum {
+  MaxSize = 42,
+  Density = 12
+};
+</pre>
+</div>
+</li>
+
+</ul>
+  
+<p>As an exception, classes that mimic STL classes can have member names in
+STL's style of lower-case words separated by underscores (e.g. <tt>begin()</tt>,
+<tt>push_back()</tt>, and <tt>empty()</tt>).</p>
+
+<p>Here are some examples of good and bad names:</p>
+
+<div class="doc_code">
+<pre>
+class VehicleMaker {
+  ...
+  Factory&lt;Tire&gt; F;            // Bad -- abbreviation and non-descriptive.
+  Factory&lt;Tire&gt; Factory;      // Better.
+  Factory&lt;Tire&gt; TireFactory;  // Even better -- if VehicleMaker has more than one
+                              // kind of factories.
+};
+
+Vehicle MakeVehicle(VehicleType Type) {
+  VehicleMaker M;                         // Might be OK if having a short life-span.
+  Tire tmp1 = M.makeTire();               // Bad -- 'tmp1' provides no information.
+  Light headlight = M.makeLight("head");  // Good -- descriptive.
+  ...
+}
+</pre>
+</div>
+
+</div>
+
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="ll_assert">Assert Liberally</a>
@@ -789,7 +900,7 @@ locality.</p>
 
 <div class="doc_text">
 
-<p>Use the "<tt>assert</tt>" function to its fullest.  Check all of your
+<p>Use the "<tt>assert</tt>" macro to its fullest.  Check all of your
 preconditions and assumptions, you never know when a bug (not necessarily even
 yours) might be caught early by an assertion, which reduces debugging time
 dramatically.  The "<tt>&lt;cassert&gt;</tt>" header file is probably already
@@ -797,8 +908,8 @@ included by the header files you are using, so it doesn't cost anything to use
 it.</p>
 
 <p>To further assist with debugging, make sure to put some kind of error message
-in the assertion statement (which is printed if the assertion is tripped). This
-helps the poor debugging make sense of why an assertion is being made and
+in the assertion statement, which is printed if the assertion is tripped. This
+helps the poor debugger make sense of why an assertion is being made and
 enforced, and hopefully what to do about it.  Here is one complete example:</p>
 
 <div class="doc_code">
@@ -810,7 +921,7 @@ inline Value *getOperand(unsigned i) {
 </pre>
 </div>
 
-<p>Here are some examples:</p>
+<p>Here are more examples:</p>
 
 <div class="doc_code">
 <pre>
@@ -826,9 +937,9 @@ assert(isa&lt;PHINode&gt;(Succ-&gt;front()) &amp;&amp; "Only works on PHId BBs!"
 </pre>
 </div>
 
-<p>You get the idea...</p>
+<p>You get the idea.</p>
 
-<p>Please be aware when adding assert statements that not all compilers are aware of
+<p>Please be aware that, when adding assert statements, not all compilers are aware of
 the semantics of the assert.  In some places, asserts are used to indicate a piece of
 code that should not be reached.  These are typically of the form:</p>
 
@@ -851,14 +962,47 @@ return 0;
 </pre>
 </div>
 
+<p>Another issue is that values used only by assertions will produce an "unused
+value" warning when assertions are disabled.  For example, this code will
+warn:</p>
+
+<div class="doc_code">
+<pre>
+unsigned Size = V.size();
+assert(Size &gt; 42 &amp;&amp; "Vector smaller than it should be");
+
+bool NewToSet = Myset.insert(Value);
+assert(NewToSet &amp;&amp; "The value shouldn't be in the set yet");
+</pre>
+</div>
+
+<p>These are two interesting different cases. In the first case, the call to
+V.size() is only useful for the assert, and we don't want it executed when
+assertions are disabled.  Code like this should move the call into the assert
+itself.  In the second case, the side effects of the call must happen whether
+the assert is enabled or not.  In this case, the value should be cast to void to
+disable the warning.  To be specific, it is preferred to write the code like
+this:</p>
+
+<div class="doc_code">
+<pre>
+assert(V.size() &gt; 42 &amp;&amp; "Vector smaller than it should be");
+
+bool NewToSet = Myset.insert(Value); (void)NewToSet;
+assert(NewToSet &amp;&amp; "The value shouldn't be in the set yet");
+</pre>
+</div>
+
+
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_ns_std">Do not use '<tt>using namespace std</tt>'</a>
+  <a name="ll_ns_std">Do Not Use '<tt>using namespace std</tt>'</a>
 </div>
 
 <div class="doc_text">
+
 <p>In LLVM, we prefer to explicitly prefix all identifiers from the standard
 namespace with an "<tt>std::</tt>" prefix, rather than rely on
 "<tt>using namespace std;</tt>".</p>
@@ -867,10 +1011,10 @@ namespace with an "<tt>std::</tt>" prefix, rather than rely on
 the namespace of any source file that <tt>#include</tt>s the header.  This is
 clearly a bad thing.</p>
 
-<p>In implementation files (e.g. .cpp files), the rule is more of a stylistic
+<p>In implementation files (e.g. <tt>.cpp</tt> files), the rule is more of a stylistic
 rule, but is still important.  Basically, using explicit namespace prefixes
 makes the code <b>clearer</b>, because it is immediately obvious what facilities
-are being used and where they are coming from, and <b>more portable</b>, because
+are being used and where they are coming from. And <b>more portable</b>, because
 namespace clashes cannot occur between LLVM code and other namespaces.  The
 portability rule is important because different standard library implementations
 expose different symbols (potentially ones they shouldn't), and future revisions
@@ -880,18 +1024,20 @@ such, we never use '<tt>using namespace std;</tt>' in LLVM.</p>
 <p>The exception to the general rule (i.e. it's not an exception for
 the <tt>std</tt> namespace) is for implementation files.  For example, all of
 the code in the LLVM project implements code that lives in the 'llvm' namespace.
-As such, it is ok, and actually clearer, for the .cpp files to have a '<tt>using
-namespace llvm</tt>' directive at their top, after the <tt>#include</tt>s.  The
-general form of this rule is that any .cpp file that implements code in any
-namespace may use that namespace (and its parents'), but should not use any
-others.</p>
+As such, it is ok, and actually clearer, for the <tt>.cpp</tt> files to have a
+'<tt>using namespace llvm;</tt>' directive at the top, after the
+<tt>#include</tt>s.  This reduces indentation in the body of the file for source
+editors that indent based on braces, and keeps the conceptual context cleaner.
+The general form of this rule is that any <tt>.cpp</tt> file that implements
+code in any namespace may use that namespace (and its parents'), but should not
+use any others.</p>
 
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_virtual_anch">Provide a virtual method anchor for classes
-  in headers</a>
+  <a name="ll_virtual_anch">Provide a Virtual Method Anchor for Classes
+  in Headers</a>
 </div>
 
 <div class="doc_text">
@@ -907,15 +1053,16 @@ increasing link times.</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_end">Don't evaluate end() every time through a loop</a>
+  <a name="ll_end">Don't evaluate <tt>end()</tt> every time through a loop</a>
 </div>
 
 <div class="doc_text">
 
-<p>Because C++ doesn't have a standard "foreach" loop (though it can be emulated
-with macros and may be coming in C++'0x) we end up writing a lot of loops that
-manually iterate from begin to end on a variety of containers or through other
-data structures.  One common mistake is to write a loop in this style:</p>
+<p>Because C++ doesn't have a standard "<tt>foreach</tt>" loop (though it can be
+emulated with macros and may be coming in C++'0x) we end up writing a lot of
+loops that manually iterate from begin to end on a variety of containers or
+through other data structures.  One common mistake is to write a loop in this
+style:</p>
 
 <div class="doc_code">
 <pre>
@@ -946,10 +1093,10 @@ behavior, please write the loop in the first form and add a comment indicating
 that you did it intentionally.</p>
 
 <p>Why do we prefer the second form (when correct)?  Writing the loop in the
-first form has two problems: First it may be less efficient than evaluating it
-at the start of the loop.  In this case, the cost is probably minor: a few extra
-loads every time through the loop.  However, if the base expression is more
-complex, then the cost can rise quickly.  I've seen loops where the end
+first form has two problems. First it may be less efficient than evaluating it
+at the start of the loop.  In this case, the cost is probably minor &mdash; a
+few extra loads every time through the loop.  However, if the base expression is
+more complex, then the cost can rise quickly.  I've seen loops where the end
 expression was actually something like: "<tt>SomeMap[x]->end()</tt>" and map
 lookups really aren't cheap.  By writing it in the second form consistently, you
 eliminate the issue entirely and don't even have to think about it.</p>
@@ -968,7 +1115,7 @@ prefer it.</p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is forbidden</a>
+  <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is Forbidden</a>
 </div>
 
 <div class="doc_text">
@@ -977,12 +1124,13 @@ prefer it.</p>
 hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
 support clients using LLVM libraries as part of larger systems. In particular,
 we statically link LLVM into some dynamic libraries. Even if LLVM isn't used,
-the static c'tors are run whenever an application start up that uses the dynamic
-library. There are two problems with this:</p>
+the static constructors are run whenever an application starts up that uses the
+dynamic library. There are two problems with this:</p>
 
 <ol>
-  <li>The time to run the static c'tors impacts startup time of
-      applications&mdash;a critical time for GUI apps.</li>
+  <li>The time to run the static c'tors impacts startup time of applications
+      &mdash; a critical time for GUI apps.</li>
+
   <li>The static c'tors cause the app to pull many extra pages of memory off the
       disk: both the code for the static c'tors in each <tt>.o</tt> file and the
       small amount of data that gets touched. In addition, touched/dirty pages
@@ -990,12 +1138,10 @@ library. There are two problems with this:</p>
 </ol>
 
 <p>Note that using the other stream headers (<tt>&lt;sstream&gt;</tt> for
-example) is not problematic in this regard (just <tt>&lt;iostream&gt;</tt>).
-However, raw_ostream provides various APIs that are better performing for almost
-every use than std::ostream style APIs, so you should just use it for new
-code.</p>
-
-<p><b>New code should always
+example) is not problematic in this regard &mdash;
+just <tt>&lt;iostream&gt;</tt>. However, <tt>raw_ostream</tt> provides various
+APIs that are better performing for almost every use than <tt>std::ostream</tt>
+style APIs. <b>Therefore new code should always
 use <a href="#ll_raw_ostream"><tt>raw_ostream</tt></a> for writing, or
 the <tt>llvm::MemoryBuffer</tt> API for reading files.</b></p>
 
@@ -1004,44 +1150,44 @@ the <tt>llvm::MemoryBuffer</tt> API for reading files.</b></p>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
+  <a name="ll_raw_ostream">Use <tt>raw_ostream</tt></a>
 </div>
 
 <div class="doc_text">
 
-<p>The <tt>std::endl</tt> modifier, when used with iostreams outputs a newline
-to the output stream specified.  In addition to doing this, however, it also
-flushes the output stream.  In other words, these are equivalent:</p>
-
-<div class="doc_code">
-<pre>
-std::cout &lt;&lt; std::endl;
-std::cout &lt;&lt; '\n' &lt;&lt; std::flush;
-</pre>
-</div>
+<p>LLVM includes a lightweight, simple, and efficient stream implementation
+in <tt>llvm/Support/raw_ostream.h</tt>, which provides all of the common
+features of <tt>std::ostream</tt>.  All new code should use <tt>raw_ostream</tt>
+instead of <tt>ostream</tt>.</p>
 
-<p>Most of the time, you probably have no reason to flush the output stream, so
-it's better to use a literal <tt>'\n'</tt>.</p>
+<p>Unlike <tt>std::ostream</tt>, <tt>raw_ostream</tt> is not a template and can
+be forward declared as <tt>class raw_ostream</tt>.  Public headers should
+generally not include the <tt>raw_ostream</tt> header, but use forward
+declarations and constant references to <tt>raw_ostream</tt> instances.</p>
 
 </div>
 
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
-  <a name="ll_raw_ostream">Use <tt>raw_ostream</tt></a>
+  <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
 </div>
 
 <div class="doc_text">
 
-<p>LLVM includes a lightweight, simple, and efficient stream implementation
-in <tt>llvm/Support/raw_ostream.h</tt> which provides all of the common features
-of <tt>std::ostream</tt>.  All new code should use <tt>raw_ostream</tt> instead
-of <tt>ostream</tt>.</p>
+<p>The <tt>std::endl</tt> modifier, when used with <tt>iostreams</tt> outputs a
+newline to the output stream specified.  In addition to doing this, however, it
+also flushes the output stream.  In other words, these are equivalent:</p>
 
-<p>Unlike <tt>std::ostream</tt>, <tt>raw_ostream</tt> is not a template and can
-be forward declared as <tt>class raw_ostream</tt>.  Public headers should
-generally not include the <tt>raw_ostream</tt> header, but use forward
-declarations and constant references to <tt>raw_ostream</tt> instances.</p>
+<div class="doc_code">
+<pre>
+std::cout &lt;&lt; std::endl;
+std::cout &lt;&lt; '\n' &lt;&lt; std::flush;
+</pre>
+</div>
+
+<p>Most of the time, you probably have no reason to flush the output stream, so
+it's better to use a literal <tt>'\n'</tt>.</p>
 
 </div>
 
@@ -1062,54 +1208,54 @@ reasoning on why we prefer them.</p>
 
 <div class="doc_text">
 
-<p>We prefer to put a space before a parentheses only in control flow
+<p>We prefer to put a space before an open parenthesis only in control flow
 statements, but not in normal function call expressions and function-like
 macros.  For example, this is good:</p>
 
 <div class="doc_code">
 <pre>
-  <b>if (</b>x) ...
-  <b>for (</b>i = 0; i != 100; ++i) ...
-  <b>while (</b>llvm_rocks) ...
+<b>if (</b>x) ...
+<b>for (</b>i = 0; i != 100; ++i) ...
+<b>while (</b>llvm_rocks) ...
 
-  <b>somefunc(</b>42);
-  <b><a href="#ll_assert">assert</a>(</b>3 != 4 &amp;&amp; "laws of math are failing me");
+<b>somefunc(</b>42);
+<b><a href="#ll_assert">assert</a>(</b>3 != 4 &amp;&amp; "laws of math are failing me");
   
-  a = <b>foo(</b>42, 92) + <b>bar(</b>x);
-  </pre>
+a = <b>foo(</b>42, 92) + <b>bar(</b>x);
+</pre>
 </div>
 
-<p>... and this is bad:</p>
+<p>and this is bad:</p>
 
 <div class="doc_code">
 <pre>
-  <b>if(</b>x) ...
-  <b>for(</b>i = 0; i != 100; ++i) ...
-  <b>while(</b>llvm_rocks) ...
+<b>if(</b>x) ...
+<b>for(</b>i = 0; i != 100; ++i) ...
+<b>while(</b>llvm_rocks) ...
 
-  <b>somefunc (</b>42);
-  <b><a href="#ll_assert">assert</a> (</b>3 != 4 &amp;&amp; "laws of math are failing me");
+<b>somefunc (</b>42);
+<b><a href="#ll_assert">assert</a> (</b>3 != 4 &amp;&amp; "laws of math are failing me");
   
-  a = <b>foo (</b>42, 92) + <b>bar (</b>x);
+a = <b>foo (</b>42, 92) + <b>bar (</b>x);
 </pre>
 </div>
 
 <p>The reason for doing this is not completely arbitrary.  This style makes
-   control flow operators stand out more, and makes expressions flow better. The
-   function call operator binds very tightly as a postfix operator.  Putting
-   a space after a function name (as in the last example) makes it appear that
-   the code might bind the arguments of the left-hand-side of a binary operator
-   with the argument list of a function and the name of the right side.  More
-   specifically, it is easy to misread the "a" example as:</p>
+control flow operators stand out more, and makes expressions flow better. The
+function call operator binds very tightly as a postfix operator.  Putting a
+space after a function name (as in the last example) makes it appear that the
+code might bind the arguments of the left-hand-side of a binary operator with
+the argument list of a function and the name of the right side.  More
+specifically, it is easy to misread the "a" example as:</p>
    
 <div class="doc_code">
 <pre>
-  a = foo <b>(</b>(42, 92) + bar<b>)</b> (x);
+a = foo <b>(</b>(42, 92) + bar<b>)</b> (x);
 </pre>
 </div>
 
-<p>... when skimming through the code.  By avoiding a space in a function, we
-avoid this misinterpretation.</p>
+<p>when skimming through the code.  By avoiding a space in a function, we avoid
+this misinterpretation.</p>
 
 </div>
 
@@ -1141,7 +1287,7 @@ get in the habit of always using preincrement, and you won't have a problem.</p>
 <div class="doc_text">
 
 <p>
-In general, we strive to reduce indentation where ever possible.  This is useful
+In general, we strive to reduce indentation wherever possible.  This is useful
 because we want code to <a href="#scf_codewidth">fit into 80 columns</a> without
 wrapping horribly, but also because it makes it easier to understand the code.
 Namespaces are a funny thing: they are often large, and we often desire to put
@@ -1186,7 +1332,7 @@ namespace llvm {
 <p>Since the body is small, indenting adds value because it makes it very clear
 where the namespace starts and ends, and it is easy to take the whole thing in
 in one "gulp" when reading the code.  If the blob of code in the namespace is
-larger (as it typically is in a header in the llvm or clang namespaces), do not
+larger (as it typically is in a header in the <tt>llvm</tt> or <tt>clang</tt> namespaces), do not
 indent the code, and add a comment indicating what namespace is being closed.
 For example:</p>
 
@@ -1346,7 +1492,7 @@ something.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2011-02-20 03:03:04 +0100 (Sun, 20 Feb 2011) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/FileCheck.pod b/docs/CommandGuide/FileCheck.pod
index 433979a87190..3ccaa63e176b 100644
--- a/docs/CommandGuide/FileCheck.pod
+++ b/docs/CommandGuide/FileCheck.pod
@@ -133,7 +133,7 @@ both 32-bit and 64-bit code generation.
 =head2 The "CHECK-NEXT:" directive
 
 Sometimes you want to match lines and would like to verify that matches
-happen on exactly consequtive lines with no other lines in between them.  In
+happen on exactly consecutive lines with no other lines in between them.  In
 this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
 you specified a custom check prefix, just use "<PREFIX>-NEXT:".  For
 example, something like this works as you'd expect:
@@ -165,7 +165,7 @@ directive in a file.
 =head2 The "CHECK-NOT:" directive
 
 The CHECK-NOT: directive is used to verify that a string doesn't occur
-between two matches (or the first match and the beginning of the file).  For
+between two matches (or before the first match, or after the last match).  For
 example, to verify that a load is removed by a transformation, a test like this
 can be used:
 
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index 67f0cfc1a110..3c1a9f9ed4f0 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -151,7 +151,7 @@ options) arguments to the tool you are interested in.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-08 01:32:02 +0200 (Wed, 08 Sep 2010) $
+  Last modified: $Date: 2010-09-08 01:10:21 +0200 (Wed, 08 Sep 2010) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod
index ac24aab4ff6b..eb26ec00fd76 100644
--- a/docs/CommandGuide/llc.pod
+++ b/docs/CommandGuide/llc.pod
@@ -84,6 +84,14 @@ Disable optimizations that may produce excess precision for floating point.
 Note that this option can dramatically slow down code on some systems
 (e.g. X86).
 
+=item B<--enable-no-infs-fp-math>
+
+Enable optimizations that assume no Inf values.
+
+=item B<--enable-no-nans-fp-math>
+
+Enable optimizations that assume no NAN values.
+
 =item B<--enable-unsafe-fp-math>
 
 Enable optimizations that make unsafe assumptions about IEEE math (e.g. that
diff --git a/docs/CommandGuide/lli.pod b/docs/CommandGuide/lli.pod
index d368bec8660a..52a2721e7d70 100644
--- a/docs/CommandGuide/lli.pod
+++ b/docs/CommandGuide/lli.pod
@@ -102,10 +102,13 @@ B<llvm-as E<lt> /dev/null | llc -march=xyz -mattr=help>
 
 Disable optimizations that may increase floating point precision.
 
-=item B<-enable-finite-only-fp-math>
+=item B<-enable-no-infs-fp-math>
 
-Enable optimizations that assumes only finite floating point math. That is,
-there is no NAN or Inf values.
+Enable optimizations that assume no Inf values.
+
+=item B<-enable-no-nans-fp-math>
+
+Enable optimizations that assume no NAN values.
 
 =item B<-enable-unsafe-fp-math>
 
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index 47352009ea04..ef99ebc9d412 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -29,7 +29,6 @@
     <li><a href="#copyright">Copyright</a></li>
     <li><a href="#license">License</a></li>
     <li><a href="#patents">Patents</a></li>
-    <li><a href="#devagree">Developer Agreements</a></li>
   </ol></li>
 </ol>
 <div class="doc_author">Written by the LLVM Oversight Team</div>
@@ -196,7 +195,11 @@
 <ol>
   <li><b>Evan Cheng</b>: Code generator and all targets.</li>
 
-  <li><b>Doug Gregor</b>: Clang Basic, Lex, Parse, and Sema Libraries.</li>
+  <li><b>Greg Clayton</b>: LLDB.</li>
+
+  <li><b>Doug Gregor</b>: Clang Frontend Libraries.</li>
+
+  <li><b>Howard Hinnant</b>: libc++.</li>
 
   <li><b>Anton Korobeynikov</b>: Exception handling, debug information, and
       Windows codegen.</li>
@@ -506,40 +509,40 @@ Changes</a></div>
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsection"><a name="copyright">Copyright</a></div>
 <div class="doc_text">
-<p>For consistency and ease of management, the project requires the copyright
-   for all LLVM software to be held by a single copyright holder: the University
-   of Illinois (UIUC).</p>
-  
-<p>Although UIUC may eventually reassign the copyright of the software to
-   another entity (e.g. a dedicated non-profit "LLVM Organization") the intent
-   for the project is to always have a single entity hold the copyrights to LLVM
-   at any given time.</p>
-
-<p>We believe that having a single copyright holder is in the best interests of
-   all developers and users as it greatly reduces the managerial burden for any
-   kind of administrative or technical decisions about LLVM.  The goal of the
-   LLVM project is to always keep the code open and <a href="#license">licensed
-   under a very liberal license</a>.</p>
+
+<p>The LLVM project does not require copyright assignments, which means that the
+   copyright for the code in the project is held by its respective contributors
+   who have each agreed to release their contributed code under the terms of the
+   <a href="#license">LLVM License</a>.</p>
+   
+<p>An implication of this is that the LLVM license is unlikely to ever change:
+   changing it would require tracking down all the contributors to LLVM and
+   getting them to agree that a license change is acceptable for their
+   contribution.  Since there are no plans to change the license, this is not a
+   cause for concern.</p>
+   
+<p>As a contributor to the project, this means that you (or your company) retain
+   ownership of the code you contribute, that it cannot be used in a way that
+   contradicts the license (which is a liberal BSD-style license), and that the
+   license for your contributions won't change without your approval in the
+   future.</p>
+   
 </div>
 
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsection"><a name="license">License</a></div>
 <div class="doc_text">
 <p>We intend to keep LLVM perpetually open source and to use a liberal open
-   source license. The current license is the
+   source license. All of the code in LLVM is available under the
    <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of
    Illinois/NCSA Open Source License</a>, which boils down to this:</p>
 
 <ul>
   <li>You can freely distribute LLVM.</li>
-
   <li>You must retain the copyright notice if you redistribute LLVM.</li>
-
-  <li>Binaries derived from LLVM must reproduce the copyright notice (e.g.  in
-      an included readme file).</li>
-
+  <li>Binaries derived from LLVM must reproduce the copyright notice (e.g. in an
+      included readme file).</li>
   <li>You can't use our names to promote your LLVM derived products.</li>
-
   <li>There's no warranty on LLVM at all.</li>
 </ul>
   
@@ -549,7 +552,22 @@ Changes</a></div>
    LLVM's license is not a "copyleft" license like the GPL). We suggest that you
    read the <a href="http://www.opensource.org/licenses/UoI-NCSA.php">License</a>
    if further clarification is needed.</p>
-  
+   
+<p>In addition to the UIUC license, the runtime library components of LLVM
+   (<b>compiler_rt and libc++</b>) are also licensed under the <a
+   href="http://www.opensource.org/licenses/mit-license.php">MIT license</a>,
+   which does not contain the binary redistribution clause.  As a user of these
+   runtime libraries, it means that you can choose to use the code under either
+   license (and thus don't need the binary redistribution clause), and as a
+   contributor to the code that you agree that any contributions to these
+   libraries be licensed under both licenses.  We feel that this is important
+   for runtime libraries, because they are implicitly linked into applications
+   and therefore should not subject those applications to the binary
+   redistribution clause. This also means that it is ok to move code from (e.g.)
+   libc++ to the LLVM core without concern, but that code cannot be moved from
+   the LLVM core to libc++ without the copyright owner's permission.
+</p>
+
 <p>Note that the LLVM Project does distribute llvm-gcc, <b>which is GPL.</b>
    This means that anything "linked" into llvm-gcc must itself be compatible
    with the GPL, and must be releasable under the terms of the GPL.  This
@@ -563,7 +581,7 @@ Changes</a></div>
   
 <p>We have no plans to change the license of LLVM.  If you have questions or
    comments about the license, please contact the
-   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a>.</p>
+   <a href="mailto:llvmdev@cs.uiuc.edu">LLVM Developer's Mailing List</a>.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -584,21 +602,6 @@ Changes</a></div>
    details.</p>
 </div>
 
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="devagree">Developer Agreements</a></div>
-<div class="doc_text">
-<p>With regards to the LLVM copyright and licensing, developers agree to assign
-   their copyrights to UIUC for any contribution made so that the entire
-   software base can be managed by a single copyright holder.  This implies that
-   any contributions can be licensed under the license that the project
-   uses.</p>
-
-<p>When contributing code, you also affirm that you are legally entitled to
-   grant this copyright, personally or on behalf of your employer.  If the code
-   belongs to some other entity, please raise this issue with the oversight
-   group before the code is committed.</p>
-</div>
-
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -609,7 +612,7 @@ Changes</a></div>
   Written by the 
   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-02 02:09:17 +0200 (Thu, 02 Sep 2010) $
+  Last modified: $Date: 2010-11-16 22:32:53 +0100 (Tue, 16 Nov 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
index d324c15390cd..009dbb5abd53 100644
--- a/docs/ExceptionHandling.html
+++ b/docs/ExceptionHandling.html
@@ -40,6 +40,7 @@
   	<li><a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_lsda"><tt>llvm.eh.sjlj.lsda</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_callsite"><tt>llvm.eh.sjlj.callsite</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_dispatchsetup"><tt>llvm.eh.sjlj.dispatchsetup</tt></a></li>
   </ol></li>
   <li><a href="#asm">Asm Table Formats</a>
   <ol>
@@ -419,7 +420,7 @@
 <div class="doc_text">
 
 <pre>
-  i32 %<a href="#llvm_eh_selector">llvm.eh.selector</a>(i8*, i8*, i8*, ...)
+  i32 %<a href="#llvm_eh_selector">llvm.eh.selector</a>(i8*, i8*, ...)
 </pre>
 
 <p>This intrinsic is used to compare the exception with the given type infos,
@@ -547,6 +548,23 @@
 
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  void %<a href="#llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>(i32)
+</pre>
+
+<p>For SJLJ based exception handling, the <a href="#llvm_eh_sjlj_dispatchsetup">
+  <tt>llvm.eh.sjlj.dispatchsetup</tt></a> intrinsic is used by targets to do
+  any unwind-edge setup they need. By default, no action is taken.  </p>
+
+</div>
+
 <!-- ======================================================================= -->
 <div class="doc_section">
   <a name="asm">Asm Table Formats</a>
@@ -619,7 +637,7 @@
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $
+  Last modified: $Date: 2010-12-10 00:05:48 +0100 (Fri, 10 Dec 2010) $
 </address>
 
 </body>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
index d840c9788ac1..5410137861ff 100644
--- a/docs/GetElementPtr.html
+++ b/docs/GetElementPtr.html
@@ -598,13 +598,27 @@ idx3 = (char*) &amp;MyVar + 8
   <a name="overflow"><b>What happens if a GEP computation overflows?</b></a>
 </div>
 <div class="doc_text">
-   <p>If the GEP has the <tt>inbounds</tt> keyword, the result value is
-      undefined.</p>
-
-   <p>Otherwise, the result value is the result from evaluating the implied
-      two's complement integer computation. However, since there's no
-      guarantee of where an object will be allocated in the address space,
-      such values have limited meaning.</p>
+   <p>If the GEP lacks the <tt>inbounds</tt> keyword, the value is the result
+      from evaluating the implied two's complement integer computation. However,
+      since there's no guarantee of where an object will be allocated in the
+      address space, such values have limited meaning.</p>
+
+  <p>If the GEP has the <tt>inbounds</tt> keyword, the result value is
+     undefined (a "<a href="LangRef.html#trapvalues">trap value</a>") if the GEP
+     overflows (i.e. wraps around the end of the address space).</p>
+  
+  <p>As such, there are some ramifications of this for inbounds GEPs: scales
+     implied by array/vector/pointer indices are always known to be "nsw" since
+     they are signed values that are scaled by the element size.  These values
+     are also allowed to be negative (e.g. "gep i32 *%P, i32 -1") but the
+     pointer itself is logically treated as an unsigned value.  This means that
+     GEPs have an asymmetric relation between the pointer base (which is treated
+     as unsigned) and the offset applied to it (which is treated as signed). The
+     result of the additions within the offset calculation cannot have signed
+     overflow, but when applied to the base pointer, there can be signed
+     overflow.
+  </p>
+  
 
 </div>
 
@@ -719,7 +733,7 @@ idx3 = (char*) &amp;MyVar + 8
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
-  Last modified: $Date: 2010-08-28 06:09:24 +0200 (Sat, 28 Aug 2010) $
+  Last modified: $Date: 2011-02-11 22:50:52 +0100 (Fri, 11 Feb 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index 7c105fea8d11..dfb976a29f1e 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -28,6 +28,7 @@
       <li><a href="#environment">Setting Up Your Environment</a></li>
       <li><a href="#unpack">Unpacking the LLVM Archives</a></li>
       <li><a href="#checkout">Checkout LLVM from Subversion</a></li>
+      <li><a href="#git_mirror">LLVM GIT mirror</a></li>
       <li><a href="#installcf">Install the GCC Front End</a></li>
       <li><a href="#config">Local LLVM Configuration</a></li>
       <li><a href="#compile">Compiling the LLVM Suite Source Code</a></li>
@@ -44,10 +45,9 @@
       <li><a href="#projects"><tt>llvm/projects</tt></a></li>
       <li><a href="#runtime"><tt>llvm/runtime</tt></a></li>
       <li><a href="#test"><tt>llvm/test</tt></a></li>
-      <li><a href="#llvmtest"><tt>llvm-test</tt></a></li>
+      <li><a href="#test-suite"><tt>test-suite</tt></a></li>
       <li><a href="#tools"><tt>llvm/tools</tt></a></li>
       <li><a href="#utils"><tt>llvm/utils</tt></a></li>
-      <li><a href="#win32"><tt>llvm/win32</tt></a></li>
     </ol></li>
 
   <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
@@ -80,11 +80,12 @@
 <p>Welcome to LLVM! In order to get started, you first need to know some
 basic information.</p>
 
-<p>First, LLVM comes in two pieces. The first piece is the LLVM suite. This
-contains all of the tools, libraries, and header files needed to use the low
-level virtual machine.  It contains an assembler, disassembler, bitcode
-analyzer and bitcode optimizer.  It also contains a test suite that can be
-used to test the LLVM tools and the GCC front end.</p>
+<p>First, LLVM comes in three pieces. The first piece is the LLVM
+suite. This contains all of the tools, libraries, and header files
+needed to use the low level virtual machine.  It contains an
+assembler, disassembler, bitcode analyzer and bitcode optimizer.  It
+also contains basic regression tests that can be used to test the LLVM
+tools and the GCC front end.</p>
 
 <p>The second piece is the GCC front end.  This component provides a version of
 GCC that compiles C and C++ code into LLVM bitcode.  Currently, the GCC front
@@ -93,7 +94,7 @@ compiled into LLVM bitcode, a program can be manipulated with the LLVM tools
 from the LLVM suite.</p>
 
 <p>
-There is a third, optional piece called llvm-test.  It is a suite of programs
+There is a third, optional piece called Test Suite.  It is a suite of programs
 with a testing harness that can be used to further test LLVM's functionality
 and performance.
 </p>
@@ -142,6 +143,7 @@ and performance.
       <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
       <li><tt>cd llvm/projects</tt>
       <li><tt>gunzip --stdout llvm-test-<i>version</i>.tar.gz | tar -xvf -</tt>
+      <li><tt>mv llvm-test-<i>version</i> test-suite</tt>
     </ol></li>
 
   </ul></li>
@@ -162,7 +164,7 @@ and performance.
         <p>Optionally, specify for <i>directory</i> the full pathname of the 
         C/C++ front end installation to use with this LLVM configuration. If
         not specified, the PATH will be searched.  This is only needed if you
-        want to run the testsuite or do some special kinds of LLVM builds.</p></li>
+        want to run test-suite or do some special kinds of LLVM builds.</p></li>
         <li><tt>--enable-spec2000=<i>directory</i></tt>
             <p>Enable the SPEC2000 benchmarks for testing.  The SPEC2000
             benchmarks should be available in
@@ -242,6 +244,11 @@ software you will need.</p>
   <td>x86<sup><a href="#pf_1">1</a></sup></td>
   <td>GCC</td>
 </tr>
+<tr>
+  <td>FreeBSD</td>
+  <td>amd64</td>
+  <td>GCC</td>
+</tr>
 <tr>
   <td>MacOS X<sup><a href="#pf_2">2</a></sup></td>
   <td>PowerPC</td>
@@ -564,6 +571,9 @@ as the previous one. It appears to work with ENABLE_OPTIMIZED=0 (the default).</
 <p><b>GCC 4.3.3 (Debian 4.3.3-10) on ARM</b>: Miscompiles parts of LLVM 2.6
 when optimizations are turned on. The symptom is an infinite loop in
 FoldingSetImpl::RemoveNode while running the code generator.</p>
+<p><b>GCC 4.3.5 and GCC 4.4.5 on ARM</b>: These can miscompile <tt>value >>
+1</tt> even at -O0. A test failure in <tt>test/Assembler/alignstack.ll</tt> is
+one symptom of the problem.
 <p><b>GNU ld 2.16.X</b>. Some 2.16.X versions of the ld linker will produce very
 long warning messages complaining that some ".gnu.linkonce.t.*" symbol was
 defined in a discarded section. You can safely ignore these messages as they are
@@ -684,7 +694,7 @@ compressed with the gzip program.
   <dd>Source release for the LLVM libraries and tools.<br></dd>
 
   <dt><tt>llvm-test-x.y.tar.gz</tt></dt>
-  <dd>Source release for the LLVM test suite.</dd>
+  <dd>Source release for the LLVM test-suite.</dd>
 
   <dt><tt>llvm-gcc-4.2-x.y.source.tar.gz</tt></dt>
   <dd>Source release of the llvm-gcc-4.2 front end.  See README.LLVM in the root
@@ -726,6 +736,8 @@ revision), you can checkout it from the '<tt>tags</tt>' directory (instead of
 subdirectories of the '<tt>tags</tt>' directory:</p>
 
 <ul>
+<li>Release 2.8: <b>RELEASE_28</b></li>
+<li>Release 2.7: <b>RELEASE_27</b></li>
 <li>Release 2.6: <b>RELEASE_26</b></li>
 <li>Release 2.5: <b>RELEASE_25</b></li>
 <li>Release 2.4: <b>RELEASE_24</b></li>
@@ -751,7 +763,7 @@ you get it from the Subversion repository:</p>
 <div class="doc_code">
 <pre>
 % cd llvm/projects
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk llvm-test
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
 </pre>
 </div>
 
@@ -765,6 +777,25 @@ instructions</a> to successfully get and build the LLVM GCC front-end.</p>
 
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="git_mirror">GIT mirror</a>
+</div>
+
+<div class="doc_text">
+
+<p>GIT mirrors are available for a number of LLVM subprojects. These mirrors
+  sync automatically with each Subversion commit and contain all necessary
+  git-svn marks (so, you can recreate git-svn metadata locally). Note that right
+  now mirrors reflect only <tt>trunk</tt> for each project. You can do the
+  read-only GIT clone of LLVM via: 
+<pre>
+% git clone http://llvm.org/git/llvm.git
+</pre>
+</p>
+
+</div>
+
 <!-- ======================================================================= -->
 <div class="doc_subsection">
   <a name="installcf">Install the GCC Front End</a>
@@ -774,7 +805,7 @@ instructions</a> to successfully get and build the LLVM GCC front-end.</p>
 
 <p>Before configuring and compiling the LLVM suite (or if you want to use just the LLVM
 GCC front end) you can optionally extract the front end from the binary distribution.
-It is used for running the llvm-test testsuite and for compiling C/C++ programs.  Note that
+It is used for running the LLVM test-suite and for compiling C/C++ programs.  Note that
 you can optionally <a href="GCCFEBuildInstrs.html">build llvm-gcc yourself</a> after building the
 main LLVM repository.</p>
 
@@ -795,9 +826,9 @@ to your <tt>PATH</tt> environment variable.  For example, if you uncompressed th
 
 <p>If you now want to build LLVM from source, when you configure LLVM, it will 
 automatically detect <tt>llvm-gcc</tt>'s presence (if it is in your path) enabling its
-use in llvm-test.  Note that you can always build or install <tt>llvm-gcc</tt> at any
+use in test-suite.  Note that you can always build or install <tt>llvm-gcc</tt> at any
 point after building the main LLVM repository: just reconfigure llvm and 
-llvm-test will pick it up.
+test-suite will pick it up.
 </p>
 
 <p>As a convenience for Windows users, the front end binaries for MinGW/x86 include
@@ -1348,7 +1379,7 @@ end to compile.</p>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"><a name="llvmtest"><tt>test-suite</tt></a></div>
+<div class="doc_subsection"><a name="test-suite"><tt>test-suite</tt></a></div>
 <div class="doc_text">
   <p>This is not a directory in the normal llvm module; it is a separate
   Subversion
@@ -1408,7 +1439,7 @@ information is in the <a href="CommandGuide/index.html">Command Guide</a>.</p>
 
   <dt><tt><b>llvm-ld</b></tt></dt>
   <dd><tt>llvm-ld</tt> is a general purpose and extensible linker for LLVM. 
-  This is the linker invoked by <tt>llvmc</tt>. It performsn standard link time
+  This is the linker invoked by <tt>llvmc</tt>. It performs standard link time
   optimizations and allows optimization modules to be loaded and run so that 
   language specific optimizations can be applied at link time.</dd>
 
@@ -1511,15 +1542,6 @@ are code generators for parts of LLVM infrastructure.</p>
 
 </div>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection"><a name="win32"><tt>llvm/win32</tt></a></div>
-<div class="doc_text">
-  <p>This directory contains build scripts and project files for use with 
-  Visual C++. This allows developers on Windows to build LLVM without the need
-  for Cygwin. The contents of this directory should be considered experimental
-  at this time.
-  </p>
-</div>
 <!-- *********************************************************************** -->
 <div class="doc_section">
   <a name="tutorial">An Example Using the LLVM Tool Chain</a>
@@ -1673,7 +1695,7 @@ out:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-08 10:27:18 +0200 (Thu, 08 Jul 2010) $
+  Last modified: $Date: 2011-02-01 21:08:28 +0100 (Tue, 01 Feb 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html
index e467e087c587..b6aa4c692d43 100644
--- a/docs/GettingStartedVS.html
+++ b/docs/GettingStartedVS.html
@@ -14,26 +14,19 @@
 
 <ul>
   <li><a href="#overview">Overview</a>
-  <li><a href="#quickstart">Getting Started Quickly (A Summary)</a>
   <li><a href="#requirements">Requirements</a>
     <ol>
       <li><a href="#hardware">Hardware</a>
       <li><a href="#software">Software</a>
     </ol></li>
-
-  <li><a href="#starting">Getting Started with LLVM</a>
-    <ol>
-      <li><a href="#terminology">Terminology and Notation</a>
-      <li><a href="#objfiles">The Location of LLVM Object Files</a>
-    </ol></li>
-
+  <li><a href="#quickstart">Getting Started</a>
   <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
   <li><a href="#problems">Common Problems</a>
   <li><a href="#links">Links</a>
 </ul>
 
 <div class="doc_author">
-  <p>Written by: 
+  <p>Written by:
     <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a>
   </p>
 </div>
@@ -47,26 +40,30 @@
 
 <div class="doc_text">
 
-  <p>The Visual Studio port at this time is experimental.  It is suitable for
-  use only if you are writing your own compiler front end or otherwise have a
-  need to dynamically generate machine code.  The JIT and interpreter are
-  functional, but it is currently not possible to generate assembly code which
-  is then assembled into an executable.  You can indirectly create executables
-  by using the C back end.</p>
-
-  <p>To emphasize, there is no C/C++ front end currently available.
-  <tt>llvm-gcc</tt> is based on GCC, which cannot be bootstrapped using VC++.
-  Eventually there should be a <tt>llvm-gcc</tt> based on Cygwin or MinGW that
-  is usable.  There is also the option of generating bitcode files on Unix and
-  copying them over to Windows.  But be aware the odds of linking C++ code
-  compiled with <tt>llvm-gcc</tt> with code compiled with VC++ is essentially
-  zero.</p>
-
-  <p>The LLVM test suite cannot be run on the Visual Studio port at this
+  <p>Welcome to LLVM on Windows! This document only covers LLVM on Windows using
+  Visual Studio, not mingw or cygwin. In order to get started, you first need to
+  know some basic information.</p>
+
+  <p>There are many different projects that compose LLVM. The first is the LLVM
+  suite. This contains all of the tools, libraries, and header files needed to
+  use the low level virtual machine. It contains an assembler, disassembler,
+  bitcode analyzer and bitcode optimizer. It also contains a test suite that can
+  be used to test the LLVM tools.</p>
+
+  <p>Another useful project on Windows is
+  <a href="http://clang.llvm.org/">clang</a>. Clang is a C family
+  ([Objective]C/C++) compiler. Clang mostly works on Windows, but does not
+  currently understand all of the Microsoft extensions to C and C++. Because of
+  this, clang cannot parse the C++ standard library included with Visual Studio,
+  nor parts of the Windows Platform SDK. However, most standard C programs do
+  compile. Clang can be used to emit bitcode, directly emit object files or
+  even linked executables using Visual Studio's <tt>link.exe</tt></p>
+
+  <p>The large LLVM test suite cannot be run on the Visual Studio port at this
   time.</p>
 
   <p>Most of the tools build and work.  <tt>bugpoint</tt> does build, but does
-  not work.  The other tools 'should' work, but have not been fully tested.</p>
+  not work.</p>
 
   <p>Additional information about the LLVM directory structure and tool chain
   can be found on the main <a href="GettingStarted.html">Getting Started</a>
@@ -74,89 +71,6 @@
 
 </div>
 
-<!-- *********************************************************************** -->
-<div class="doc_section">
-  <a name="quickstart"><b>Getting Started Quickly (A Summary)</b></a>
-</div>
-<!-- *********************************************************************** -->
-
-<div class="doc_text">
-
-<p>Here's the short story for getting up and running quickly with LLVM:</p>
-
-<ol>
-  <li>Read the documentation.</li>
-  <li>Seriously, read the documentation.</li>
-  <li>Remember that you were warned twice about reading the documentation.</li>
-
-  <li>Get the Source Code
-  <ul>
-    <li>With the distributed files:
-    <ol>
-      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
-      <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
-      <i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;or use WinZip</i>
-      <li><tt>cd llvm</tt></li>
-    </ol></li>
-
-    <li>With anonymous Subversion access:
-    <ol>
-      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li>
-      <li><tt>svn co http://llvm.org/svn/llvm-project/llvm-top/trunk llvm-top
-      </tt></li>
-      <li><tt>make checkout MODULE=llvm</tt>
-      <li><tt>cd llvm</tt></li>
-    </ol></li>
-  </ul></li>
-  
-  <li> Use <a href="http://www.cmake.org/">CMake</a> to generate up-to-date
-    project files:
-    <ul><li>This step is currently optional as LLVM does still come with a
-    normal Visual Studio solution file, but it is not always kept up-to-date
-    and will soon be deprecated in favor of the multi-platform generator
-    CMake.</li>
-    <li>If CMake is installed then the most simple way is to just start the
-    CMake GUI, select the directory where you have LLVM extracted to, and
-    the default options should all be fine.  The one option you may really
-    want to change, regardless of anything else, might be the
-    CMAKE_INSTALL_PREFIX setting to select a directory to INSTALL to once
-    compiling is complete.</li>
-    <li>If you use CMake to generate the Visual Studio solution and project
-    files, then the Solution will have a few extra options compared to the
-    current included one.  The projects may still be built individually, but
-    to build them all do not just select all of them in batch build (as some
-    are meant as configuration projects), but rather select and build just
-    the ALL_BUILD project to build everything, or the INSTALL project, which
-    first builds the ALL_BUILD project, then installs the LLVM headers, libs,
-    and other useful things to the directory set by the CMAKE_INSTALL_PREFIX
-    setting when you first configured CMake.</li>
-    </ul>
-  </li>
-
-  <li>Start Visual Studio
-  <ul>
-    <li>If you did not use CMake, then simply double click on the solution
-    file <tt>llvm/win32/llvm.sln</tt>.</li>
-    <li>If you used CMake, then the directory you created the project files,
-    the root directory will have an <tt>llvm.sln</tt> file, just
-    double-click on that to open Visual Studio.</li>
-  </ul></li>
-
-  <li>Build the LLVM Suite:
-  <ul>
-    <li>Simply build the solution.</li>
-    <li>The Fibonacci project is a sample program that uses the JIT.  Modify
-    the project's debugging properties to provide a numeric command line
-    argument.  The program will print the corresponding fibonacci value.</li>
-  </ul></li>
-
-</ol>
-
-<p>It is strongly encouraged that you get the latest version from Subversion as
-changes are continually making the VS support better.</p>
-
-</div>
-
 <!-- *********************************************************************** -->
 <div class="doc_section">
   <a name="requirements"><b>Requirements</b></a>
@@ -178,7 +92,7 @@ changes are continually making the VS support better.</p>
 
 <div class="doc_text">
 
-  <p>Any system that can adequately run Visual Studio .NET 2005 SP1 is fine.  
+  <p>Any system that can adequately run Visual Studio .NET 2005 SP1 is fine.
   The LLVM source tree and object files, libraries and executables will consume
   approximately 3GB.</p>
 
@@ -190,75 +104,126 @@ changes are continually making the VS support better.</p>
 
   <p>You will need Visual Studio .NET 2005 SP1 or higher.  The VS2005 SP1
   beta and the normal VS2005 still have bugs that are not completely
-  compatible. VS2003 would work except (at last check) it has a bug with
-  friend classes that you can work-around with some minor code rewriting
-  (and please submit a patch if you do).  Earlier versions of Visual Studio
-  do not support the C++ standard well enough and will not work.</p>
-  
+  compatible.  Earlier versions of Visual Studio do not support the C++ standard
+  well enough and will not work.</p>
+
   <p>You will also need the <a href="http://www.cmake.org/">CMake</a> build
   system since it generates the project files you will use to build with.</p>
 
-  <p>
-  Do not install the LLVM directory tree into a path containing spaces (e.g.
+  <p>If you would like to run the LLVM tests you will need
+  <a href="http://www.python.org/">Python</a>. Versions 2.4-2.7 are known to
+  work. You will need <a href="http://gnuwin32.sourceforge.net/">"GnuWin32"</a>
+  tools, too.</p>
+
+  <p>Do not install the LLVM directory tree into a path containing spaces (e.g.
   C:\Documents and Settings\...) as the configure step will fail.</p>
 
 </div>
 
 <!-- *********************************************************************** -->
 <div class="doc_section">
-  <a name="starting"><b>Getting Started with LLVM</b></a>
+  <a name="quickstart"><b>Getting Started</b></a>
 </div>
 <!-- *********************************************************************** -->
 
 <div class="doc_text">
 
-<p>The remainder of this guide is meant to get you up and running with
-LLVM using Visual Studio and to give you some basic information about the LLVM
-environment.</p>
+<p>Here's the short story for getting up and running quickly with LLVM:</p>
 
-</div>
+<ol>
+  <li>Read the documentation.</li>
+  <li>Seriously, read the documentation.</li>
+  <li>Remember that you were warned twice about reading the documentation.</li>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection">
-  <a name="terminology">Terminology and Notation</a>
-</div>
+  <li>Get the Source Code
+  <ul>
+    <li>With the distributed files:
+    <ol>
+      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+      <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
+      <i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;or use WinZip</i>
+      <li><tt>cd llvm</tt></li>
+    </ol></li>
 
-<div class="doc_text">
+    <li>With anonymous Subversion access:
+    <ol>
+      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li>
+      <li><tt>svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</tt></li>
+      <li><tt>cd llvm</tt></li>
+    </ol></li>
+  </ul></li>
 
-<p>Throughout this manual, the following names are used to denote paths
-specific to the local system and working environment.  <i>These are not
-environment variables you need to set but just strings used in the rest
-of this document below</i>.  In any of the examples below, simply replace
-each of these names with the appropriate pathname on your local system.
-All these paths are absolute:</p>
+  <li> Use <a href="http://www.cmake.org/">CMake</a> to generate up-to-date
+    project files:
+    <ul>
+      <li>Once CMake is installed then the simplest way is to just start the
+        CMake GUI, select the directory where you have LLVM extracted to, and the
+        default options should all be fine.  One option you may really want to
+        change, regardless of anything else, might be the CMAKE_INSTALL_PREFIX
+        setting to select a directory to INSTALL to once compiling is complete,
+        although installation is not mandatory for using LLVM.  Another important
+        option is LLVM_TARGETS_TO_BUILD, which controls the LLVM target
+        architectures that are included on the build.
+      <li>See the <a href="CMake.html">LLVM CMake guide</a> for
+        detailed information about how to configure the LLVM
+        build.</li>
+    </ul>
+  </li>
 
-<dl>
-    <dt>SRC_ROOT</dt>
-    <dd><p>This is the top level directory of the LLVM source tree.</p></dd>
+  <li>Start Visual Studio
+  <ul>
+    <li>In the directory you created the project files will have
+    an <tt>llvm.sln</tt> file, just double-click on that to open
+    Visual Studio.</li>
+  </ul></li>
 
-    <dt>OBJ_ROOT</dt>
-    <dd><p>This is the top level directory of the LLVM object tree (i.e. the
-        tree where object files and compiled programs will be placed.  It is
-        fixed at SRC_ROOT/win32).</p></dd>
-</dl>
+  <li>Build the LLVM Suite:
+  <ul>
+    <li>The projects may still be built individually, but
+    to build them all do not just select all of them in batch build (as some
+    are meant as configuration projects), but rather select and build just
+    the ALL_BUILD project to build everything, or the INSTALL project, which
+    first builds the ALL_BUILD project, then installs the LLVM headers, libs,
+    and other useful things to the directory set by the CMAKE_INSTALL_PREFIX
+    setting when you first configured CMake.</li>
+    <li>The Fibonacci project is a sample program that uses the JIT.
+    Modify the project's debugging properties to provide a numeric
+    command line argument or run it from the command line.  The
+    program will print the corresponding fibonacci value.</li>
+  </ul></li>
 
-</div>
+  <li>Test LLVM on Visual Studio:
+  <ul>
+    <li>If %PATH% does not contain GnuWin32, you may specify LLVM_LIT_TOOLS_DIR
+    on CMake for the path to GnuWin32.</li>
+    <li>You can run LLVM tests to build the project "check".</li>
+  </ul>
+  </li>
 
-<!-- ======================================================================= -->
-<div class="doc_subsection">
-  <a name="objfiles">The Location of LLVM Object Files</a>
+  <!-- FIXME: Is it up-to-date? -->
+  <li>Test LLVM:
+  <ul>
+    <li>The LLVM tests can be run by <tt>cd</tt>ing to the llvm source directory
+        and running:
+
+<div class="doc_code">
+<pre>
+% llvm-lit test
+</pre>
 </div>
 
-<div class="doc_text">
+    <p>Note that quite a few of these test will fail.</p>
+    </li>
 
-  <p>The object files are placed under <tt>OBJ_ROOT/Debug</tt> for debug builds
-  and <tt>OBJ_ROOT/Release</tt> for release (optimized) builds.  These include
-  both executables and libararies that your application can link against.</p>
+    <li>A specific test or test directory can be run with:</li>
 
-  <p>The files that <tt>configure</tt> would create when building on Unix are
-  created by the <tt>Configure</tt> project and placed in
-  <tt>OBJ_ROOT/llvm</tt>.  You application must have OBJ_ROOT in its include
-  search path just before <tt>SRC_ROOT/include</tt>.</p>
+<div class="doc_code">
+<pre>
+% llvm-lit test/path/to/test
+</pre>
+</div>
+
+</ol>
 
 </div>
 
@@ -286,7 +251,7 @@ int main() {
 
 <div class="doc_code">
 <pre>
-% llvm-gcc -c hello.c -emit-llvm -o hello.bc
+% clang -c hello.c -emit-llvm -o hello.bc
 </pre>
 </div>
 
@@ -295,23 +260,27 @@ int main() {
          facilities that it required.  You can execute this file directly using
          <tt>lli</tt> tool, compile it to native assembly with the <tt>llc</tt>,
          optimize or analyze it further with the <tt>opt</tt> tool, etc.</p>
-      
-      <p><b>Note: while you cannot do this step on Windows, you can do it on a
-         Unix system and transfer <tt>hello.bc</tt> to Windows.  Important:
-         transfer as a binary file!</b></p></li>
+
+      <p>Alternatively you can directly output an executable with clang with:
+      </p>
+
+<div class="doc_code">
+<pre>
+% clang hello.c -o hello.exe
+</pre>
+</div>
+
+  <p>The <tt>-o hello.exe</tt> is required because clang currently outputs
+  <tt>a.out</tt> when neither <tt>-o</tt> nor <tt>-c</tt> are given.</p>
 
   <li><p>Run the program using the just-in-time compiler:</p>
-      
+
 <div class="doc_code">
 <pre>
 % lli hello.bc
 </pre>
 </div>
 
-      <p>Note: this will only work for trivial C programs.  Non-trivial programs
-         (and any C++ program) will have dependencies on the GCC runtime that
-         won't be satisfied by the Microsoft runtime libraries.</p></li>
-
   <li><p>Use the <tt>llvm-dis</tt> utility to take a look at the LLVM assembly
       code:</p>
 
@@ -321,31 +290,27 @@ int main() {
 </pre>
 </div></li>
 
-  <li><p>Compile the program to C using the LLC code generator:</p>
+  <li><p>Compile the program to object code using the LLC code generator:</p>
 
 <div class="doc_code">
 <pre>
-% llc -march=c hello.bc
+% llc -filetype=obj hello.bc
 </pre>
 </div></li>
 
-  <li><p>Compile to binary using Microsoft C:</p>
+  <li><p>Link to binary using Microsoft link:</p>
 
 <div class="doc_code">
 <pre>
-% cl hello.cbe.c
+% link hello.obj -defaultlib:libcmt
 </pre>
 </div>
 
-    <p>Note: this will only work for trivial C programs.  Non-trivial programs
-      (and any C++ program) will have dependencies on the GCC runtime that won't
-      be satisfied by the Microsoft runtime libraries.</p></li>
-
   <li><p>Execute the native code program:</p>
 
 <div class="doc_code">
 <pre>
-% hello.cbe.exe
+% hello.exe
 </pre>
 </div></li>
 </ol>
@@ -360,17 +325,6 @@ int main() {
 
 <div class="doc_text">
 
-  <ul>
-    <li>In Visual C++, if you are linking with the x86 target statically, the
-    linker will remove the x86 target library from your generated executable or
-    shared library because there are no references to it. You can force the
-    linker to include these references by using
-    <tt>"/INCLUDE:_X86TargetMachineModule"</tt> when linking. In the Visual
-    Studio IDE, this can be added in
-<tt>Project&nbsp;Properties->Linker->Input->Force&nbsp;Symbol&nbsp;References</tt>.
-    </li>
-  </ul>
-
 <p>If you are having problems building or using LLVM, or if you have any other
 general questions about LLVM, please consult the <a href="FAQ.html">Frequently
 Asked Questions</a> page.</p>
@@ -411,7 +365,7 @@ out:</p>
 
   <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2011-02-09 05:19:28 +0100 (Wed, 09 Feb 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 3f2e9fb2e640..68c5cf192802 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -55,7 +55,7 @@ mkdir binutils
 cd binutils
 cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
 <em>{enter "anoncvs" as the password}</em>
-cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co src
+cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
 mkdir build
 cd build
 ../src/configure --enable-gold --enable-plugins
diff --git a/docs/LangRef.html b/docs/LangRef.html
index b717531e3479..05130c29efc2 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -62,6 +62,7 @@
         <ol>
           <li><a href="#t_integer">Integer Type</a></li>
           <li><a href="#t_floating">Floating Point Types</a></li>
+          <li><a href="#t_x86mmx">X86mmx Type</a></li>
           <li><a href="#t_void">Void Type</a></li>
           <li><a href="#t_label">Label Type</a></li>
           <li><a href="#t_metadata">Metadata Type</a></li>
@@ -492,7 +493,7 @@
 
 <pre class="doc_code">
 <i>; Declare the string constant as a global constant.</i>&nbsp;
-<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a> <a href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00"      <i>; [13 x i8]*</i>&nbsp;
+<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"      <i>; [13 x i8]*</i>&nbsp;
 
 <i>; External declaration of the puts function</i>&nbsp;
 <a href="#functionstructure">declare</a> i32 @puts(i8*)                                      <i>; i32 (i8*)* </i>&nbsp;
@@ -845,6 +846,13 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
    region of memory, and all memory objects in LLVM are accessed through
    pointers.</p>
 
+<p>Global variables can be marked with <tt>unnamed_addr</tt> which indicates
+  that the address is not significant, only the content. Constants marked
+  like this can be merged with other constants if they have the same
+  initializer. Note that a constant with significant address <em>can</em>
+  be merged with a <tt>unnamed_addr</tt> constant, the result being a
+  constant whose address is significant.</p>
+
 <p>A global variable may be declared to reside in a target-specific numbered
    address space. For targets that support them, address spaces may affect how
    optimizations are performed and/or what target instructions are used to
@@ -884,7 +892,8 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 <p>LLVM function definitions consist of the "<tt>define</tt>" keyword, an
    optional <a href="#linkage">linkage type</a>, an optional
    <a href="#visibility">visibility style</a>, an optional
-   <a href="#callingconv">calling convention</a>, a return type, an optional
+   <a href="#callingconv">calling convention</a>,
+   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
    <a href="#paramattrs">parameter attribute</a> for the return type, a function
    name, a (possibly empty) argument list (each with optional
    <a href="#paramattrs">parameter attributes</a>), optional
@@ -895,7 +904,8 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
 <p>LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
    optional <a href="#linkage">linkage type</a>, an optional
    <a href="#visibility">visibility style</a>, an optional
-   <a href="#callingconv">calling convention</a>, a return type, an optional
+   <a href="#callingconv">calling convention</a>,
+   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
    <a href="#paramattrs">parameter attribute</a> for the return type, a function
    name, a possibly empty list of arguments, an optional alignment, and an
    optional <a href="#gc">garbage collector name</a>.</p>
@@ -921,6 +931,9 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
    specified, the function is forced to have at least that much alignment.  All
    alignments must be a power of 2.</p>
 
+<p>If the <tt>unnamed_addr</tt> attribute is given, the address is know to not
+  be significant and two identical functions can be merged</p>.
+
 <h5>Syntax:</h5>
 <pre class="doc_code">
 define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
@@ -1020,8 +1033,9 @@ declare signext i8 @returns_signed_char()
       registers).  Use of this attribute is target-specific.</dd>
 
   <dt><tt><b><a name="byval">byval</a></b></tt></dt>
-  <dd>This indicates that the pointer parameter should really be passed by value
-      to the function.  The attribute implies that a hidden copy of the pointee
+  <dd><p>This indicates that the pointer parameter should really be passed by
+      value to the function.  The attribute implies that a hidden copy of the
+      pointee
       is made between the caller and the callee, so the callee is unable to
       modify the value in the callee.  This attribute is only valid on LLVM
       pointer arguments.  It is generally used to pass structs and arrays by
@@ -1029,10 +1043,13 @@ declare signext i8 @returns_signed_char()
       to belong to the caller not the callee (for example,
       <tt><a href="#readonly">readonly</a></tt> functions should not write to
       <tt>byval</tt> parameters). This is not a valid attribute for return
-      values.  The byval attribute also supports specifying an alignment with
-      the align attribute.  This has a target-specific effect on the code
-      generator that usually indicates a desired alignment for the synthesized
-      stack slot.</dd>
+      values.</p>
+      
+      <p>The byval attribute also supports specifying an alignment with
+      the align attribute.  It indicates the alignment of the stack slot to
+      form and the known alignment of the pointer specified to the call site. If
+      the alignment is not specified, then the code generator makes a
+      target-specific assumption.</p></dd>
 
   <dt><tt><b><a name="sret">sret</a></b></tt></dt>
   <dd>This indicates that the pointer parameter specifies the address of a
@@ -1130,6 +1147,14 @@ define void @f() optsize { ... }
       function into callers whenever possible, ignoring any active inlining size
       threshold for this caller.</dd>
 
+  <dt><tt><b>hotpatch</b></tt></dt>
+  <dd>This attribute indicates that the function should be 'hotpatchable',
+      meaning the function can be patched and/or hooked even while it is
+      loaded into memory. On x86, the function prologue will be preceded
+      by six bytes of padding and will begin with a two-byte instruction.
+      Most of the functions in the Windows system DLLs in Windows XP SP2 or
+      higher were compiled in this fashion.</dd>
+
   <dt><tt><b>inlinehint</b></tt></dt>
   <dd>This attribute indicates that the source code contained a hint that inlining
       this function is desirable (such as the "inline" keyword in C/C++).  It
@@ -1483,7 +1508,9 @@ Classifications</a> </div>
       <td><a href="#t_primitive">primitive</a></td>
       <td><a href="#t_label">label</a>,
           <a href="#t_void">void</a>,
+          <a href="#t_integer">integer</a>,
           <a href="#t_floating">floating point</a>,
+          <a href="#t_x86mmx">x86mmx</a>,
           <a href="#t_metadata">metadata</a>.</td>
     </tr>
     <tr>
@@ -1570,6 +1597,21 @@ Classifications</a> </div>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_x86mmx">X86mmx Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The x86mmx type represents a value held in an MMX register on an x86 machine.  The operations allowed on it are quite limited:  parameters and return values, load and store, and bitcast.  User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type.  There are no arrays, vectors or constants of this type.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  x86mmx
+</pre>
+
+</div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection"> <a name="t_void">Void Type</a> </div>
 
@@ -1893,8 +1935,9 @@ Classifications</a> </div>
   &lt; &lt;# elements&gt; x &lt;elementtype&gt; &gt;
 </pre>
 
-<p>The number of elements is a constant integer value; elementtype may be any
-   integer or floating point type.</p>
+<p>The number of elements is a constant integer value larger than 0; elementtype
+   may be any integer or floating point type.  Vectors of size zero are not
+   allowed, and pointers are not allowed as the element type.</p>
 
 <h5>Examples:</h5>
 <table class="layout">
@@ -2050,6 +2093,7 @@ Classifications</a> </div>
    they match the long double format on your target.  All hexadecimal formats
    are big-endian (sign bit at the left).</p>
 
+<p>There are no constants of type x86mmx.</p>
 </div>
 
 <!-- ======================================================================= -->
@@ -2135,8 +2179,8 @@ Classifications</a> </div>
 
 <p>The string '<tt>undef</tt>' can be used anywhere a constant is expected, and
    indicates that the user of the value may receive an unspecified bit-pattern.
-   Undefined values may be of any type (other than label or void) and be used
-   anywhere a constant is permitted.</p>
+   Undefined values may be of any type (other than '<tt>label</tt>'
+   or '<tt>void</tt>') and be used anywhere a constant is permitted.</p>
 
 <p>Undefined values are useful because they indicate to the compiler that the
    program is well defined no matter what value is used.  This gives the
@@ -2155,7 +2199,7 @@ Safe:
 </pre>
 
 <p>This is safe because all of the output bits are affected by the undef bits.
-Any output bit can have a zero or one depending on the input bits.</p>
+   Any output bit can have a zero or one depending on the input bits.</p>
 
 <pre class="doc_code">
   %A = or %X, undef
@@ -2169,13 +2213,14 @@ Unsafe:
 </pre>
 
 <p>These logical operations have bits that are not always affected by the input.
-For example, if "%X" has a zero bit, then the output of the 'and' operation will
-always be a zero, no matter what the corresponding bit from the undef is.  As
-such, it is unsafe to optimize or assume that the result of the and is undef.
-However, it is safe to assume that all bits of the undef could be 0, and
-optimize the and to 0.  Likewise, it is safe to assume that all the bits of
-the undef operand to the or could be set, allowing the or to be folded to
--1.</p>
+   For example, if <tt>%X</tt> has a zero bit, then the output of the
+   '<tt>and</tt>' operation will always be a zero for that bit, no matter what
+   the corresponding bit from the '<tt>undef</tt>' is. As such, it is unsafe to
+   optimize or assume that the result of the '<tt>and</tt>' is '<tt>undef</tt>'.
+   However, it is safe to assume that all bits of the '<tt>undef</tt>' could be
+   0, and optimize the '<tt>and</tt>' to 0. Likewise, it is safe to assume that
+   all the bits of the '<tt>undef</tt>' operand to the '<tt>or</tt>' could be
+   set, allowing the '<tt>or</tt>' to be folded to -1.</p>
 
 <pre class="doc_code">
   %A = select undef, %X, %Y
@@ -2191,13 +2236,14 @@ Unsafe:
   %C = undef
 </pre>
 
-<p>This set of examples show that undefined select (and conditional branch)
-conditions can go "either way" but they have to come from one of the two
-operands.  In the %A example, if %X and %Y were both known to have a clear low
-bit, then %A would have to have a cleared low bit.  However, in the %C example,
-the optimizer is allowed to assume that the undef operand could be the same as
-%Y, allowing the whole select to be eliminated.</p>
-
+<p>This set of examples shows that undefined '<tt>select</tt>' (and conditional
+   branch) conditions can go <em>either way</em>, but they have to come from one
+   of the two operands.  In the <tt>%A</tt> example, if <tt>%X</tt> and
+   <tt>%Y</tt> were both known to have a clear low bit, then <tt>%A</tt> would
+   have to have a cleared low bit. However, in the <tt>%C</tt> example, the
+   optimizer is allowed to assume that the '<tt>undef</tt>' operand could be the
+   same as <tt>%Y</tt>, allowing the whole '<tt>select</tt>' to be
+   eliminated.</p>
 
 <pre class="doc_code">
   %A = xor undef, undef
@@ -2218,16 +2264,17 @@ Safe:
   %F = undef
 </pre>
 
-<p>This example points out that two undef operands are not necessarily the same.
-This can be surprising to people (and also matches C semantics) where they
-assume that "X^X" is always zero, even if X is undef.  This isn't true for a
-number of reasons, but the short answer is that an undef "variable" can
-arbitrarily change its value over its "live range".  This is true because the
-"variable" doesn't actually <em>have a live range</em>.  Instead, the value is
-logically read from arbitrary registers that happen to be around when needed,
-so the value is not necessarily consistent over time.  In fact, %A and %C need
-to have the same semantics or the core LLVM "replace all uses with" concept
-would not hold.</p>
+<p>This example points out that two '<tt>undef</tt>' operands are not
+   necessarily the same. This can be surprising to people (and also matches C
+   semantics) where they assume that "<tt>X^X</tt>" is always zero, even
+   if <tt>X</tt> is undefined. This isn't true for a number of reasons, but the
+   short answer is that an '<tt>undef</tt>' "variable" can arbitrarily change
+   its value over its "live range".  This is true because the variable doesn't
+   actually <em>have a live range</em>. Instead, the value is logically read
+   from arbitrary registers that happen to be around when needed, so the value
+   is not necessarily consistent over time. In fact, <tt>%A</tt> and <tt>%C</tt>
+   need to have the same semantics or the core LLVM "replace all uses with"
+   concept would not hold.</p>
 
 <pre class="doc_code">
   %A = fdiv undef, %X
@@ -2238,17 +2285,17 @@ b: unreachable
 </pre>
 
 <p>These examples show the crucial difference between an <em>undefined
-value</em> and <em>undefined behavior</em>.  An undefined value (like undef) is
-allowed to have an arbitrary bit-pattern.  This means that the %A operation
-can be constant folded to undef because the undef could be an SNaN, and fdiv is
-not (currently) defined on SNaN's.  However, in the second example, we can make
-a more aggressive assumption: because the undef is allowed to be an arbitrary
-value, we are allowed to assume that it could be zero.  Since a divide by zero
-has <em>undefined behavior</em>, we are allowed to assume that the operation
-does not execute at all.  This allows us to delete the divide and all code after
-it: since the undefined operation "can't happen", the optimizer can assume that
-it occurs in dead code.
-</p>
+  value</em> and <em>undefined behavior</em>. An undefined value (like
+  '<tt>undef</tt>') is allowed to have an arbitrary bit-pattern. This means that
+  the <tt>%A</tt> operation can be constant folded to '<tt>undef</tt>', because
+  the '<tt>undef</tt>' could be an SNaN, and <tt>fdiv</tt> is not (currently)
+  defined on SNaN's. However, in the second example, we can make a more
+  aggressive assumption: because the <tt>undef</tt> is allowed to be an
+  arbitrary value, we are allowed to assume that it could be zero. Since a
+  divide by zero has <em>undefined behavior</em>, we are allowed to assume that
+  the operation does not execute at all. This allows us to delete the divide and
+  all code after it. Because the undefined operation "can't happen", the
+  optimizer can assume that it occurs in dead code.</p>
 
 <pre class="doc_code">
 a:  store undef -> %X
@@ -2258,11 +2305,11 @@ a: &lt;deleted&gt;
 b: unreachable
 </pre>
 
-<p>These examples reiterate the fdiv example: a store "of" an undefined value
-can be assumed to not have any effect: we can assume that the value is
-overwritten with bits that happen to match what was already there.  However, a
-store "to" an undefined location could clobber arbitrary memory, therefore, it
-has undefined behavior.</p>
+<p>These examples reiterate the <tt>fdiv</tt> example: a store <em>of</em> an
+   undefined value can be assumed to not have any effect; we can assume that the
+   value is overwritten with bits that happen to match what was already there.
+   However, a store <em>to</em> an undefined location could clobber arbitrary
+   memory, therefore, it has undefined behavior.</p>
 
 </div>
 
@@ -2383,18 +2430,17 @@ end:
    the address of the entry block is illegal.</p>
 
 <p>This value only has defined behavior when used as an operand to the
-   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction or for comparisons
-   against null.  Pointer equality tests between labels addresses is undefined
-   behavior - though, again, comparison against null is ok, and no label is
-   equal to the null pointer.  This may also be passed around as an opaque
-   pointer sized value as long as the bits are not inspected.  This allows
-   <tt>ptrtoint</tt> and arithmetic to be performed on these values so long as
-   the original value is reconstituted before the <tt>indirectbr</tt>.</p>
+   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction, or for
+   comparisons against null. Pointer equality tests between labels addresses
+   results in undefined behavior &mdash; though, again, comparison against null
+   is ok, and no label is equal to the null pointer. This may be passed around
+   as an opaque pointer sized value as long as the bits are not inspected. This
+   allows <tt>ptrtoint</tt> and arithmetic to be performed on these values so
+   long as the original value is reconstituted before the <tt>indirectbr</tt>
+   instruction.</p>
 
-<p>Finally, some targets may provide defined semantics when
-   using the value as the operand to an inline assembly, but that is target
-   specific.
-   </p>
+<p>Finally, some targets may provide defined semantics when using the value as
+   the operand to an inline assembly, but that is target specific.</p>
 
 </div>
 
@@ -2409,7 +2455,7 @@ end:
    to be used as constants.  Constant expressions may be of
    any <a href="#t_firstclass">first class</a> type and may involve any LLVM
    operation that does not have side effects (e.g. load and call are not
-   supported).  The following is the syntax for constant expressions:</p>
+   supported). The following is the syntax for constant expressions:</p>
 
 <dl>
   <dt><b><tt>trunc (CST to TYPE)</tt></b></dt>
@@ -2596,8 +2642,8 @@ call void asm alignstack "eieio", ""()
 <div class="doc_text">
 
 <p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
-   attached to it that contains a constant integer.  If present, the code
-   generator will use the integer as the location cookie value when report
+   attached to it that contains a list of constant integers.  If present, the
+  code generator will use the integer as the location cookie value when report
    errors through the LLVMContext error reporting mechanisms.  This allows a
    front-end to correlate backend errors that occur with inline asm back to the
    source code that produced it.  For example:</p>
@@ -2609,7 +2655,8 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
 </pre>
 
 <p>It is up to the front-end to make sense of the magic numbers it places in the
-   IR.</p>
+   IR.  If the MDNode contains multiple constants, the code generator will use
+   the one that corresponds to the line of the asm that the error occurs on.</p>
 
 </div>
 
@@ -3394,7 +3441,8 @@ Instruction</a> </div>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = udiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -3413,6 +3461,11 @@ Instruction</a> </div>
 
 <p>Division by zero leads to undefined behavior.</p>
 
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>udiv</tt> is a <a href="#trapvalues">trap value</a> if %op1 is not a
+  multiple of %op2 (as such, "((a udiv exact b) mul b) == a").</p>
+
+
 <h5>Example:</h5>
 <pre>
   &lt;result&gt; = udiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
@@ -3631,7 +3684,10 @@ Instruction</a> </div>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;           <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -3651,6 +3707,14 @@ Instruction</a> </div>
    vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
    shift amount in <tt>op2</tt>.</p>
 
+<p>If the <tt>nuw</tt> keyword is present, then the shift produces a 
+   <a href="#trapvalues">trap value</a> if it shifts out any non-zero bits.  If
+   the <tt>nsw</tt> keyword is present, then the shift produces a
+   <a href="#trapvalues">trap value</a> if it shifts out any bits that disagree
+   with the resultant sign bit.  As such, NUW/NSW have the same semantics as
+   they would if the shift were expressed as a mul instruction with the same
+   nsw/nuw bits in (mul %op1, (shl 1, %op2)).</p>
+
 <h5>Example:</h5>
 <pre>
   &lt;result&gt; = shl i32 4, %var   <i>; yields {i32}: 4 &lt;&lt; %var</i>
@@ -3670,7 +3734,8 @@ Instruction</a> </div>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = lshr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -3690,6 +3755,11 @@ Instruction</a> </div>
    vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
    shift amount in <tt>op2</tt>.</p>
 
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>lshr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   shifted out are non-zero.</p>
+
+
 <h5>Example:</h5>
 <pre>
   &lt;result&gt; = lshr i32 4, 1   <i>; yields {i32}:result = 2</i>
@@ -3709,7 +3779,8 @@ Instruction</a> </div>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = ashr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -3730,6 +3801,10 @@ Instruction</a> </div>
    the arguments are vectors, each vector element of <tt>op1</tt> is shifted by
    the corresponding shift amount in <tt>op2</tt>.</p>
 
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>ashr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   shifted out are non-zero.</p>
+
 <h5>Example:</h5>
 <pre>
   &lt;result&gt; = ashr i32 4, 1   <i>; yields {i32}:result = 2</i>
@@ -4097,6 +4172,14 @@ Instruction</a> </div>
    <a href="#t_array">array</a> type.  The operands are constant indices to
    specify which value to extract in a similar manner as indices in a
    '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+   <p>The major differences to <tt>getelementptr</tt> indexing are:</p>
+     <ul>
+       <li>Since the value being indexed is not a pointer, the first index is
+           omitted and assumed to be zero.</li>
+       <li>At least one index must be specified.</li>
+       <li>Not only struct indices but also array indices must be in
+           bounds.</li>
+     </ul>
 
 <h5>Semantics:</h5>
 <p>The result is the value at the position in the aggregate specified by the
@@ -4131,7 +4214,7 @@ Instruction</a> </div>
    <a href="#t_array">array</a> type.  The second operand is a first-class
    value to insert.  The following operands are constant indices indicating
    the position at which to insert the value in a similar manner as indices in a
-   '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.  The
+   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' instruction.  The
    value to insert must have the same type as the value identified by the
    indices.</p>
 
@@ -7472,7 +7555,7 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  declare {}* @llvm.invariant.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;) readonly
+  declare {}* @llvm.invariant.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -7647,7 +7730,7 @@ LLVM</a>.</p>
    the <tt>AllocaInst</tt> stack slot to be before local variables on the
    stack. This is to ensure that if a local variable on the stack is
    overwritten, it will destroy the value of the guard. When the function exits,
-   the guard on the stack is checked against the original guard. If they're
+   the guard on the stack is checked against the original guard. If they are
    different, then the program aborts by calling the <tt>__stack_chk_fail()</tt>
    function.</p>
 
@@ -7667,25 +7750,24 @@ LLVM</a>.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information
-   to the optimizers to discover at compile time either a) when an
-   operation like memcpy will either overflow a buffer that corresponds to
-   an object, or b) to determine that a runtime check for overflow isn't
-   necessary. An object in this context means an allocation of a
-   specific class, structure, array, or other object.</p>
+<p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information to
+   the optimizers to determine at compile time whether a) an operation (like
+   memcpy) will overflow a buffer that corresponds to an object, or b) that a
+   runtime check for overflow isn't necessary. An object in this context means
+   an allocation of a specific class, structure, array, or other object.</p>
 
 <h5>Arguments:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic takes two arguments.  The first
+<p>The <tt>llvm.objectsize</tt> intrinsic takes two arguments. The first
    argument is a pointer to or into the <tt>object</tt>. The second argument
-   is a boolean 0 or 1.  This argument determines whether you want the 
-   maximum (0) or minimum (1) bytes remaining.  This needs to be a literal 0 or
+   is a boolean 0 or 1. This argument determines whether you want the 
+   maximum (0) or minimum (1) bytes remaining. This needs to be a literal 0 or
    1, variables are not allowed.</p>
    
 <h5>Semantics:</h5>
 <p>The <tt>llvm.objectsize</tt> intrinsic is lowered to either a constant
-   representing the size of the object concerned or <tt>i32/i64 -1 or 0</tt>
-   (depending on the <tt>type</tt> argument if the size cannot be determined
-   at compile time.</p>
+   representing the size of the object concerned, or <tt>i32/i64 -1 or 0</tt>,
+   depending on the <tt>type</tt> argument, if the size cannot be determined at
+   compile time.</p>
 
 </div>
 
@@ -7699,7 +7781,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-08-28 06:09:24 +0200 (Sat, 28 Aug 2010) $
+  Last modified: $Date: 2011-02-09 17:44:44 +0100 (Wed, 09 Feb 2011) $
 </address>
 
 </body>
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
index 03dc67767990..30334744d7f9 100644
--- a/docs/LinkTimeOptimization.html
+++ b/docs/LinkTimeOptimization.html
@@ -19,7 +19,7 @@
   </ul></li>
   <li><a href="#multiphase">Multi-phase communication between LLVM and linker</a>
   <ul>
-    <li><a href="#phase1">Phase 1 : Read LLVM Bytecode Files</a></li>
+    <li><a href="#phase1">Phase 1 : Read LLVM Bitcode Files</a></li>
     <li><a href="#phase2">Phase 2 : Symbol Resolution</a></li>
     <li><a href="#phase3">Phase 3 : Optimize Bitcode Files</a></li>
     <li><a href="#phase4">Phase 4 : Symbol Resolution after optimization</a></li>
@@ -382,7 +382,7 @@ of the native object files.</p>
 
   Devang Patel and Nick Kledzik<br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2010-09-29 22:09:55 +0200 (Wed, 29 Sep 2010) $
 </address>
 
 </body>
diff --git a/docs/Makefile b/docs/Makefile
index 8f7d6171d3b3..389fd90a485e 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -37,14 +37,21 @@ install_targets := install-html
 ifeq ($(ENABLE_DOXYGEN),1)
 install_targets += install-doxygen
 endif
+ifdef OCAMLDOC
 ifneq (,$(filter ocaml,$(BINDINGS_TO_BUILD)))
 install_targets += install-ocamldoc
 endif
+endif
 install-local:: $(install_targets)
 
+generated_targets := doxygen
+ifdef OCAMLDOC
+generated_targets += ocamldoc
+endif
+
 # Live documentation is generated for the web site using this target:
 # 'make generated BUILD_FOR_WEBSITE=1'
-generated:: doxygen ocamldoc
+generated:: $(generated_targets)
 
 install-html: $(PROJ_OBJ_DIR)/html.tar.gz
 	$(Echo) Installing HTML documentation
@@ -59,7 +66,7 @@ $(PROJ_OBJ_DIR)/html.tar.gz: $(HTML)
 	$(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/html.tar
 	$(Verb) cd $(PROJ_SRC_DIR) && \
 	  $(TAR) cf $(PROJ_OBJ_DIR)/html.tar *.html
-	$(Verb) $(GZIP) $(PROJ_OBJ_DIR)/html.tar
+	$(Verb) $(GZIPBIN) $(PROJ_OBJ_DIR)/html.tar
 
 install-doxygen: doxygen
 	$(Echo) Installing doxygen documentation
@@ -82,7 +89,7 @@ $(PROJ_OBJ_DIR)/doxygen.tar.gz: $(DOXYFILES) $(PROJ_OBJ_DIR)/doxygen.cfg
 	$(Echo) Packaging doxygen documentation
 	$(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/doxygen.tar
 	$(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/doxygen.tar doxygen
-	$(Verb) $(GZIP) $(PROJ_OBJ_DIR)/doxygen.tar
+	$(Verb) $(GZIPBIN) $(PROJ_OBJ_DIR)/doxygen.tar
 	$(Verb) $(CP) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(PROJ_OBJ_DIR)/doxygen/html/
 
 userloc: $(LLVM_SRC_ROOT)/docs/userloc.html
@@ -104,7 +111,7 @@ ocamldoc: regen-ocamldoc
 	$(Echo) Packaging ocamldoc documentation
 	$(Verb) $(RM) -rf $(PROJ_OBJ_DIR)/ocamldoc.tar*
 	$(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/ocamldoc.tar ocamldoc
-	$(Verb) $(GZIP) $(PROJ_OBJ_DIR)/ocamldoc.tar
+	$(Verb) $(GZIPBIN) $(PROJ_OBJ_DIR)/ocamldoc.tar
 	$(Verb) $(CP) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_OBJ_DIR)/ocamldoc/html/
 
 regen-ocamldoc:
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
index 38b7ae19fa64..6ceb09db3274 100644
--- a/docs/MakefileGuide.html
+++ b/docs/MakefileGuide.html
@@ -640,18 +640,18 @@
     generate dependencies when running the compiler. Use of this feature is
     discouraged and it may be removed at a later date.</dd>
     <dt><a name="ENABLE_OPTIMIZED"><tt>ENABLE_OPTIMIZED</tt></a></dt>
-    <dd>If set to any value, causes the build to generate optimized objects,
+    <dd>If set to 1, causes the build to generate optimized objects,
     libraries and executables. This alters the flags specified to the compilers
     and linkers. Generally debugging won't be a fun experience with an optimized
     build.</dd>
     <dt><a name="ENABLE_PROFILING"><tt>ENABLE_PROFILING</tt></a></dt>
-    <dd>If set to any value, causes the build to generate both optimized and 
+    <dd>If set to 1, causes the build to generate both optimized and 
     profiled objects, libraries and executables. This alters the flags specified
     to the compilers and linkers to ensure that profile data can be collected
     from the tools built. Use the <tt>gprof</tt> tool to analyze the output from
     the profiled tools (<tt>gmon.out</tt>).</dd>
     <dt><a name="DISABLE_ASSERTIONS"><tt>DISABLE_ASSERTIONS</tt></a></dt>
-    <dd>If set to any value, causes the build to disable assertions, even if 
+    <dd>If set to 1, causes the build to disable assertions, even if 
     building a debug or profile build.  This will exclude all assertion check
     code from the build. LLVM will execute faster, but with little help when
     things go wrong.</dd>
@@ -1028,7 +1028,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-24 19:54:00 +0200 (Sat, 24 Jul 2010) $
+  Last modified: $Date: 2010-10-22 14:54:34 +0200 (Fri, 22 Oct 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/Passes.html b/docs/Passes.html
index 0358745f79f7..fb2aff585bdb 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -166,7 +166,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#loop-rotate">-loop-rotate</a></td><td>Rotate Loops</td></tr>
 <tr><td><a href="#loop-unroll">-loop-unroll</a></td><td>Unroll loops</td></tr>
 <tr><td><a href="#loop-unswitch">-loop-unswitch</a></td><td>Unswitch loops</td></tr>
-<tr><td><a href="#loopsimplify">-loopsimplify</a></td><td>Canonicalize natural loops</td></tr>
+<tr><td><a href="#loop-simplify">-loop-simplify</a></td><td>Canonicalize natural loops</td></tr>
 <tr><td><a href="#loweratomic">-loweratomic</a></td><td>Lower atomic intrinsics</td></tr>
 <tr><td><a href="#lowerinvoke">-lowerinvoke</a></td><td>Lower invoke and unwind, for unwindless code generators</td></tr>
 <tr><td><a href="#lowersetjmp">-lowersetjmp</a></td><td>Lower Set Jump</td></tr>
@@ -382,7 +382,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="dot-postdom">dot-postdom: Print post dominator tree of function to 'dot' file</a>
+  <a name="dot-postdom">-dot-postdom: Print post dominator tree of function to 'dot' file</a>
 </div>
 <div class="doc_text">
   <p>
@@ -394,7 +394,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="dot-postdom-only">dot-postdom-only: Print post dominator tree of function to 'dot' file
+  <a name="dot-postdom-only">-dot-postdom-only: Print post dominator tree of function to 'dot' file
   (with no function bodies)</a>
 </div>
 <div class="doc_text">
@@ -1491,7 +1491,7 @@ if (X &lt; 3) {</pre>
 
 <!-------------------------------------------------------------------------- -->
 <div class="doc_subsection">
-  <a name="loopsimplify">-loopsimplify: Canonicalize natural loops</a>
+  <a name="loop-simplify">-loop-simplify: Canonicalize natural loops</a>
 </div>
 <div class="doc_text">
   <p>
@@ -2242,7 +2242,7 @@ if (X &lt; 3) {</pre>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-08-20 03:03:44 +0200 (Fri, 20 Aug 2010) $
+  Last modified: $Date: 2011-02-13 21:57:25 +0100 (Sun, 13 Feb 2011) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index 8fdd8a00b9bc..0351dd03b7d0 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -84,6 +84,7 @@ option</a></li>
       <li><a href="#dss_indexedmap">"llvm/ADT/IndexedMap.h"</a></li>
       <li><a href="#dss_densemap">"llvm/ADT/DenseMap.h"</a></li>
       <li><a href="#dss_valuemap">"llvm/ADT/ValueMap.h"</a></li>
+      <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
       <li><a href="#dss_map">&lt;map&gt;</a></li>
       <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
     </ul></li>
@@ -269,9 +270,9 @@ can get, so it will not be discussed in this document.</p>
 
 <ol>
 
-<li><a href="http://www.dinkumware.com/refxcpp.html">Dinkumware C++ Library
-reference</a> - an excellent reference for the STL and other parts of the
-standard C++ library.</li>
+<li><a href="http://www.dinkumware.com/manuals/#Standard C++ Library">Dinkumware
+C++ Library reference</a> - an excellent reference for the STL and other parts
+of the standard C++ library.</li>
 
 <li><a href="http://www.tempest-sw.com/cpp/">C++ In a Nutshell</a> - This is an
 O'Reilly book in the making.  It has a decent Standard Library
@@ -1507,6 +1508,23 @@ a <code>Config</code> parameter to the ValueMap template.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_intervalmap">"llvm/ADT/IntervalMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p> IntervalMap is a compact map for small keys and values. It maps key
+intervals instead of single keys, and it will automatically coalesce adjacent
+intervals. When then map only contains a few intervals, they are stored in the
+map object itself to avoid allocations.</p>
+
+<p> The IntervalMap iterators are quite big, so they should not be passed around
+as STL iterators. The heavyweight iterators allow a smaller data structure.</p>
+
+</div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsubsection">
   <a name="dss_map">&lt;map&gt;</a>
@@ -3838,7 +3856,7 @@ doxygen info: <a href="/doxygen/classllvm_1_1BasicBlock.html">BasicBlock
 Class</a><br>
 Superclass: <a href="#Value"><tt>Value</tt></a></p>
 
-<p>This class represents a single entry multiple exit section of the code,
+<p>This class represents a single entry single exit section of the code,
 commonly known as a basic block by the compiler community.  The
 <tt>BasicBlock</tt> class maintains a list of <a
 href="#Instruction"><tt>Instruction</tt></a>s, which form the body of the block.
@@ -3940,7 +3958,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-08-04 17:59:16 +0200 (Wed, 04 Aug 2010) $
+  Last modified: $Date: 2011-02-17 03:19:22 +0100 (Thu, 17 Feb 2011) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 9b6d5e847e94..84298376a732 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -126,7 +126,7 @@ production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86
     <li>Introduced many new warnings, including <code>-Wmissing-field-initializers</code>, <code>-Wshadow</code>, <code>-Wno-protocol</code>, <code>-Wtautological-compare</code>, <code>-Wstrict-selector-match</code>, <code>-Wcast-align</code>, <code>-Wunused</code> improvements, and greatly improved format-string checking.</li>
     <li>Introduced the "libclang" library, a C interface to Clang intended to support IDE clients.</li>
     <li>Added support for <code>#pragma GCC visibility</code>, <code>#pragma align</code>, and others.</li>
-    <li>Added support for SSE, ARM NEON, and Altivec.</li>
+    <li>Added support for SSE, AVX, ARM NEON, and AltiVec.</li>
     <li>Improved support for many Microsoft extensions.</li>
     <li>Implemented support for blocks in C++.</li>
     <li>Implemented precompiled headers for C++.</li>
@@ -269,7 +269,7 @@ support new platforms, new languages, new architectures, and new features.
 
 <div class="doc_text">
 <p>
-<a href="http://libc++.llvm.org/">libc++</a> is another new member of the LLVM
+<a href="http://libcxx.llvm.org/">libc++</a> is another new member of the LLVM
 family.  It is an implementation of the C++ standard library, written from the
 ground up to specifically target the forthcoming C++'0X standard and focus on
 delivering great performance.</p>
@@ -673,7 +673,7 @@ release includes a few major enhancements and additions to the optimizers:</p>
     be 13 in one of the predecessors of a block.  It does this in conjunction
     with the new LazyValueInfo analysis pass.</li>
 <li>The new RegionInfo analysis pass identifies single-entry single-exit regions
-    in the CFG.  You can play with it with the "opt -regions analyze" or
+    in the CFG.  You can play with it with the "opt -regions -analyze" or
     "opt -view-regions" commands.</li>
 <li>The loop optimizer has significantly improved strength reduction and analysis
   capabilities.  Notably it is able to build on the trap value and signed
@@ -879,8 +879,9 @@ it run faster:</p>
     variables can be accessed via same base address) and potentially reducing
     register pressure.</li>
 
-<li>The ARM has received many minor improvements and tweaks which lead to
-substantially better performance in a wide range of different scenarios.</li>
+<li>The ARM backend has received many minor improvements and tweaks which lead
+    to substantially better performance in a wide range of different scenarios.
+</li>
 
 <li>The ARM NEON intrinsics have been substantially reworked to reduce
     redundancy and improve code generation.  Some of the major changes are:
@@ -1010,6 +1011,17 @@ API changes are:</p>
   LLVM.  The Triple::normalize utility method has been added to help front-ends
   deal with funky triples.
 </li>
+<li>
+  The signature of the <tt>GCMetadataPrinter::finishAssembly</tt> virtual
+  function changed: the <tt>raw_ostream</tt> and <tt>MCAsmInfo</tt> arguments
+  were dropped.  GC plugins which compute stack maps must be updated to avoid
+  having the old definition overload the new signature.
+</li>
+<li>
+  The signature of <tt>MemoryBuffer::getMemBuffer</tt> changed.  Unfortunately
+  calls intended for the old version still compile, but will not work correctly,
+  leading to a confusing error about an invalid header in the bitcode.
+</li>
   
 <li>
   Some APIs were renamed:
@@ -1102,7 +1114,7 @@ components, please contact us on the <a
 href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
 
 <ul>
-<li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, PIC16, SystemZ
+<li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, SystemZ
     and XCore backends are experimental.</li>
 <li><tt>llc</tt> "<tt>-filetype=obj</tt>" is experimental on all targets
     other than darwin-i386 and darwin-x86_64.</li>
@@ -1287,7 +1299,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-10-04 22:41:06 +0200 (Mon, 04 Oct 2010) $
+  Last modified: $Date: 2010-10-26 14:43:36 +0200 (Tue, 26 Oct 2010) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 9d82e3ff5c69..186ea4abe732 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -78,7 +78,7 @@ height="369">
    that the LLVM debug information</a> takes, which is useful for those
    interested in creating front-ends or dealing directly with the information.
    Further, this document provides specific examples of what debug information
-   for C/C++.</p>
+   for C/C++ looks like.</p>
 
 </div>
 
@@ -460,15 +460,17 @@ provide details such as name, type and where the variable is defined.</p>
 <div class="doc_code">
 <pre>
 !3 = metadata !{
-  i32,     ;; Tag = 13 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
-  metadata ;; Reference to context descriptor
+  i32,     ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
+  metadata,;; Reference to context descriptor
+  i32,     ;; Line number
+  i32      ;; Column number
 }
 </pre>
 </div>
 
 <p>These descriptors provide debug information about nested blocks within a
-   subprogram.  The array of member descriptors is used to define local
-   variables and deeper nested blocks.</p>
+   subprogram. The line number and column numbers are used to dinstinguish
+   two lexical blocks at same depth. </p>
 
 </div>
 
@@ -539,9 +541,9 @@ DW_ATE_unsigned_char = 8
   metadata, ;; Name (may be "" for anonymous types)
   metadata, ;; Reference to file where defined (may be NULL)
   i32,      ;; Line number where defined (may be 0)
-  i32,      ;; Size in bits
-  i32,      ;; Alignment in bits
-  i32,      ;; Offset in bits
+  i64,      ;; Size in bits
+  i64,      ;; Alignment in bits
+  i64,      ;; Offset in bits
   metadata  ;; Reference to type derived from
 }
 </pre>
@@ -586,9 +588,8 @@ DW_TAG_restrict_type    = 55
    the bit offset if embedded in a <a href="#format_composite_type">composite
    type</a>.</p>
 
-<p>Note that the <tt>void *</tt> type is expressed as a
-   <tt>llvm.dbg.derivedtype.type</tt> with tag of <tt>DW_TAG_pointer_type</tt>
-   and <tt>NULL</tt> derived type.</p>
+<p>Note that the <tt>void *</tt> type is expressed as a type derived from NULL.
+</p>
 
 </div>
 
@@ -687,7 +688,7 @@ DW_TAG_inheritance      = 28
 
 <div class="doc_code">
 <pre>
-%<a href="#format_subrange">llvm.dbg.subrange.type</a> = type {
+!42 = metadata !{
   i32,    ;; Tag = 33 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subrange_type)
   i64,    ;; Low value
   i64     ;; High value
@@ -789,15 +790,12 @@ DW_TAG_return_variable = 258
 
 <div class="doc_text">
 <pre>
-  void %<a href="#format_common_declare">llvm.dbg.declare</a>({}*, metadata)
+  void %<a href="#format_common_declare">llvm.dbg.declare</a>(metadata, metadata)
 </pre>
 
 <p>This intrinsic provides information about a local element (ex. variable.) The
-   first argument is the alloca for the variable, cast to a <tt>{}*</tt>. The
-   second argument is
-   the <tt>%<a href="#format_variables">llvm.dbg.variable</a></tt> containing
-   the description of the variable. </p>
-
+   first argument is metadata holding alloca for the variable.</tt>. The
+   second argument is metadata containing description of the variable. </p>
 </div>
 
 <!-- ======================================================================= -->
@@ -813,10 +811,8 @@ DW_TAG_return_variable = 258
 <p>This intrinsic provides information when a user source variable is set to a
    new value.  The first argument is the new value (wrapped as metadata).  The
    second argument is the offset in the user source variable where the new value
-   is written.  The third argument is
-   the <tt>%<a href="#format_variables">llvm.dbg.variable</a></tt> containing
-   the description of the user source variable. </p>
-
+   is written.  The third argument is metadata containing description of the
+   user source variable. </p>
 </div>
 
 <!-- ======================================================================= -->
@@ -862,13 +858,13 @@ entry:
   %Y = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=4]
   %Z = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=3]
   %0 = bitcast i32* %X to {}*                     ; &lt;{}*&gt; [#uses=1]
-  call void @llvm.dbg.declare({}* %0, metadata !0), !dbg !7
+  call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
   store i32 21, i32* %X, !dbg !8
   %1 = bitcast i32* %Y to {}*                     ; &lt;{}*&gt; [#uses=1]
-  call void @llvm.dbg.declare({}* %1, metadata !9), !dbg !10
+  call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
   store i32 22, i32* %Y, !dbg !11
   %2 = bitcast i32* %Z to {}*                     ; &lt;{}*&gt; [#uses=1]
-  call void @llvm.dbg.declare({}* %2, metadata !12), !dbg !14
+  call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
   store i32 23, i32* %Z, !dbg !15
   %tmp = load i32* %X, !dbg !16                   ; &lt;i32&gt; [#uses=1]
   %tmp1 = load i32* %Y, !dbg !16                  ; &lt;i32&gt; [#uses=1]
@@ -879,7 +875,7 @@ entry:
   ret void, !dbg !18
 }
 
-declare void @llvm.dbg.declare({}*, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !0 = metadata !{i32 459008, metadata !1, metadata !"X", 
                 metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
@@ -921,7 +917,7 @@ declare void @llvm.dbg.declare({}*, metadata) nounwind readnone
 
 <div class="doc_code">
 <pre>
-call void @llvm.dbg.declare({}* %0, metadata !0), !dbg !7   
+call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7   
 </pre>
 </div>
 
@@ -956,7 +952,7 @@ call void @llvm.dbg.declare({}* %0, metadata !0), !dbg !7
 
 <div class="doc_code">
 <pre>
-call void @llvm.dbg.declare({}* %2, metadata !12), !dbg !14
+call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
 </pre>
 </div>
 
@@ -1780,7 +1776,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-13 18:53:20 +0200 (Tue, 13 Jul 2010) $
+  Last modified: $Date: 2011-02-03 01:22:17 +0100 (Thu, 03 Feb 2011) $
 </address>
 
 </body>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 0bdb6dd62c53..d118332c9939 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -405,8 +405,6 @@ which case the user must specify it explicitly.</dd>
 the symbol table.  If the type of 'a' does not match <em>type</em>, TableGen
 aborts with an error. !cast&lt;string&gt; is a special case in that the argument must
 be an object defined by a 'def' construct.</dd>
-<dt><tt>!nameconcat&lt;type&gt;(a, b)</tt></dt>
-  <dd>Shorthand for !cast&lt;type&gt;(!strconcat(a, b))</dd>
 <dt><tt>!subst(a, b, c)</tt></dt>
   <dd>If 'a' and 'b' are of string type or are symbol references, substitute 
 'b' for 'a' in 'c.'  This operation is analogous to $(subst) in GNU make.</dd>
@@ -414,18 +412,18 @@ be an object defined by a 'def' construct.</dd>
   <dd>For each member 'b' of dag or list 'a' apply operator 'c.'  'b' is a 
 dummy variable that should be declared as a member variable of an instantiated 
 class.  This operation is analogous to $(foreach) in GNU make.</dd>
-<dt><tt>!car(a)</tt></dt>
+<dt><tt>!head(a)</tt></dt>
   <dd>The first element of list 'a.'</dd>
-<dt><tt>!cdr(a)</tt></dt>
+<dt><tt>!tail(a)</tt></dt>
   <dd>The 2nd-N elements of list 'a.'</dd>
-<dt><tt>!null(a)</tt></dt>
+<dt><tt>!empty(a)</tt></dt>
   <dd>An integer {0,1} indicating whether list 'a' is empty.</dd>
 <dt><tt>!if(a,b,c)</tt></dt>
   <dd>'b' if the result of 'int' or 'bit' operator 'a' is nonzero,
       'c' otherwise.</dd>
 <dt><tt>!eq(a,b)</tt></dt>
-  <dd>Integer one if string a is equal to string b, zero otherwise.  This
-      only operates on string, int and bit objects.  Use !cast<string> to
+  <dd>'bit 1' if string a is equal to string b, 0 otherwise.  This
+      only operates on string, int and bit objects.  Use !cast&lt;string&gt; to
       compare other types of objects.</dd>
 </dl>
 
@@ -844,8 +842,7 @@ more ways to factor out commonality from the records, specially if using
 several levels of multiclass instanciations. This also avoids the need of using
 "let" expressions within subsequent records inside a multiclass.</p> 
 
-<div class="doc_code">
-<pre>
+<pre class="doc_code">
 <b>multiclass </b>basic_r&lt;bits&lt;4&gt; opc&gt; {
   <b>let </b>Predicates = [HasSSE2] in {
     <b>def </b>rr : Instruction&lt;opc, "rr"&gt;;
@@ -871,16 +868,17 @@ several levels of multiclass instanciations. This also avoids the need of using
 <div class="doc_section"><a name="codegen">Code Generator backend info</a></div>
 <!-- *********************************************************************** -->
 
+<div class="doc_text">
+
 <p>Expressions used by code generator to describe instructions and isel
 patterns:</p>
 
-<div class="doc_text">
-
+<dl>
 <dt><tt>(implicit a)</tt></dt>
   <dd>an implicitly defined physical register.  This tells the dag instruction
   selection emitter the input pattern's extra definitions matches implicit
   physical register definitions.</dd>
-
+</dl>
 </div>
 
 <!-- *********************************************************************** -->
@@ -906,7 +904,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-06-21 22:35:09 +0200 (Mon, 21 Jun 2010) $
+  Last modified: $Date: 2011-01-07 18:05:37 +0100 (Fri, 07 Jan 2011) $
 </address>
 
 </body>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index c7353ebb0d43..964bdc31247d 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -18,12 +18,14 @@
     <ul>
       <li><a href="#regressiontests">Regression tests</a></li>
       <li><a href="#testsuite">Test suite</a></li>
+      <li><a href="#debuginfotests">Debugging Information tests</a></li>
     </ul>
   </li>
   <li><a href="#quick">Quick start</a>
     <ul>
       <li><a href="#quickregressiontests">Regression tests</a></li>
       <li><a href="#quicktestsuite">Test suite</a></li>
+      <li><a href="#quickdebuginfotests">Debugging Information tests</a></li>
    </ul>
   </li>
   <li><a href="#rtstructure">Regression test structure</a>
@@ -40,7 +42,7 @@
       <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
       <li><a href="#testsuitetests">Running different tests</a></li>
       <li><a href="#testsuiteoutput">Generating test output</a></li>
-      <li><a href="#testsuitecustom">Writing custom tests for llvm-test</a></li>
+      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
    </ul>
   </li>
 </ol>
@@ -141,6 +143,23 @@ generates code.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="debuginfotests">Debugging Information 
+tests</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>The test suite contains tests to check quality of debugging information.
+The test are written in C based languages or in LLVM assembly language. </p>
+
+<p>These tests are compiled and run under a debugger. The debugger output
+is checked to validate of debugging information. See README.txt in the 
+test suite for more information . This test suite is located in the 
+<tt>debuginfo-tests</tt> Subversion module. </p>
+
+</div>
+
 <!--=========================================================================-->
 <div class="doc_section"><a name="quick">Quick start</a></div>
 <!--=========================================================================-->
@@ -153,7 +172,7 @@ generates code.</p>
   The more comprehensive test suite that includes whole 
 programs in C and C++ is in the <tt>test-suite</tt> module. This module should
 be checked out to the <tt>llvm/projects</tt> directory (don't use another name
-then the default "test-suite", for then the test suite will be run every time
+than the default "test-suite", for then the test suite will be run every time
 you run <tt>make</tt> in the main <tt>llvm</tt> directory).
 When you <tt>configure</tt> the <tt>llvm</tt> module, 
 the <tt>test-suite</tt> directory will be automatically configured. 
@@ -237,7 +256,7 @@ programs), first checkout and setup the <tt>test-suite</tt> module:</p>
 </div>
 
 <p>where <tt>$LLVM_GCC_DIR</tt> is the directory where
-you <em>installed</em> llvm-gcc, not it's src or obj
+you <em>installed</em> llvm-gcc, not its src or obj
 dir. The <tt>--with-llvmgccdir</tt> option assumes that
 the <tt>llvm-gcc-4.2</tt> module was configured with
 <tt>--program-prefix=llvm-</tt>, and therefore that the C and C++
@@ -272,6 +291,25 @@ that subdirectory.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="quickdebuginfotests">Debugging Information 
+tests</a></div>
+<!-- _______________________________________________________________________ -->
+
+<p> To run debugging information tests simply checkout the tests inside
+clang/test directory. </p>
+
+<div class="doc_code">
+<pre>
+%cd clang/test
+% svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
+</pre>
+</div>
+
+<p> These tests are already set up to run as part of clang regression tests.</p>
+
+</div>
+
 <!--=========================================================================-->
 <div class="doc_section"><a name="rtstructure">Regression test structure</a></div>
 <!--=========================================================================-->
@@ -338,6 +376,11 @@ that subdirectory.</p>
   shell. Consequently the syntax differs from normal shell script syntax in a 
   few ways.  You can specify as many RUN lines as needed.</p>
 
+  <p>lit performs substitution on each RUN line to replace LLVM tool
+  names with the full paths to the executable built for each tool (in
+  $(LLVM_OBJ_ROOT)/$(BuildMode)/bin).  This ensures that lit does not
+  invoke any stray LLVM tools in the user's path during testing.</p>
+
   <p>Each RUN line is executed on its own, distinct from other lines unless
   its last character is <tt>\</tt>. This continuation character causes the RUN
   line to be concatenated with the next one. In this way you can build up long
@@ -561,7 +604,7 @@ name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a></div>
 <div class="doc_text">
 
 <p>Sometimes you want to match lines and would like to verify that matches
-happen on exactly consequtive lines with no other lines in between them.  In
+happen on exactly consecutive lines with no other lines in between them.  In
 this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
 you specified a custom check prefix, just use "&lt;PREFIX&gt;-NEXT:".  For
 example, something like this works as you'd expect:</p>
@@ -870,34 +913,34 @@ want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
 test using the <tt>TEST</tt> variable to change what tests or run on the
 selected programs (see below for more info).</p>
 
-<p>In addition for testing correctness, the <tt>llvm-test</tt> directory also
+<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
 performs timing tests of various LLVM optimizations.  It also records
 compilation times for the compilers and the JIT.  This information can be
 used to compare the effectiveness of LLVM's optimizations and code
 generation.</p>
 
-<p><tt>llvm-test</tt> tests are divided into three types of tests: MultiSource,
+<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
 SingleSource, and External.</p> 
 
 <ul>
-<li><tt>llvm-test/SingleSource</tt>
+<li><tt>test-suite/SingleSource</tt>
 <p>The SingleSource directory contains test programs that are only a single 
 source file in size.  These are usually small benchmark programs or small 
 programs that calculate a particular value.  Several such programs are grouped 
 together in each directory.</p></li>
 
-<li><tt>llvm-test/MultiSource</tt>
+<li><tt>test-suite/MultiSource</tt>
 <p>The MultiSource directory contains subdirectories which contain entire 
 programs with multiple source files.  Large benchmarks and whole applications 
 go here.</p></li>
 
-<li><tt>llvm-test/External</tt>
+<li><tt>test-suite/External</tt>
 <p>The External directory contains Makefiles for building code that is external
 to (i.e., not distributed with) LLVM.  The most prominent members of this
 directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
 directory does not contain these actual tests, but only the Makefiles that know
 how to properly compile these programs from somewhere else. The presence and
-location of these external programs is configured by the llvm-test
+location of these external programs is configured by the test-suite
 <tt>configure</tt> script.</p></li>
 </ul>
 
@@ -1084,9 +1127,9 @@ many times it triggers.  First thing you should do is add an LLVM
 will tally counts of things you care about.</p>
 
 <p>Following this, you can set up a test and a report that collects these and
-formats them for easy viewing.  This consists of two files, an
+formats them for easy viewing.  This consists of two files, a
 "<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
-test) and an "<tt>llvm-test/TEST.XXX.report</tt>" file that indicates how to
+test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
 format the output into a table.  There are many example reports of various
 levels of sophistication included with the test suite, and the framework is very
 general.</p>
@@ -1147,7 +1190,7 @@ example reports that can do fancy stuff.</p>
 
   John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-08-02 03:20:23 +0200 (Mon, 02 Aug 2010) $
+  Last modified: $Date: 2011-02-15 10:23:02 +0100 (Tue, 15 Feb 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html
index e7a1d3d4b60f..ea28dbec0cc4 100644
--- a/docs/UsingLibraries.html
+++ b/docs/UsingLibraries.html
@@ -23,7 +23,11 @@
   <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
 </div>
 
-<p class="doc_warning">Warning: This document is out of date, please see <a href="CommandGuide/html/llvm-config.html">llvm-config</a> for more information.</p>
+<p class="doc_warning">Warning: This document is out of date, for more
+  information please
+  see <a href="CommandGuide/html/llvm-config.html">llvm-config</a> or,
+  if you use CMake, <a href=CMake.html#embedding>the CMake LLVM
+  guide</a>.</p>
 
 <!-- ======================================================================= -->
 <div class="doc_section"><a name="abstract">Abstract</a></div>
@@ -432,7 +436,7 @@
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
 </address>
 <a href="http://llvm.org">The LLVM Compiler Infrastructure</a> 
-<br>Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ </div>
+<br>Last modified: $Date: 2010-09-17 02:30:52 +0200 (Fri, 17 Sep 2010) $ </div>
 </body>
 </html>
 <!-- vim: sw=2 ts=2 ai
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index 2dc0ef772cca..193a1d4d68c9 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -1825,7 +1825,7 @@ register to convert the floating-point value to an integer.
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &amp;DAG) {
   assert(Op.getValueType() == MVT::i32);
   Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+  return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
 }
 </pre>
 </div>    
@@ -2549,7 +2549,7 @@ with assembler.
   <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2010-07-17 00:35:46 +0200 (Sat, 17 Jul 2010) $
+  Last modified: $Date: 2010-11-23 04:31:01 +0100 (Tue, 23 Nov 2010) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index 1a6edcfc59f3..80258e428352 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -51,6 +51,14 @@
         <li><a href="#doFinalization_loop">The <tt>doFinalization()
                                             </tt> method</a></li>
         </ul></li>
+     <li><a href="#RegionPass">The <tt>RegionPass</tt> class</a>
+        <ul>
+        <li><a href="#doInitialization_region">The <tt>doInitialization(Region *,
+                                            RGPassManager &amp;)</tt> method</a></li>
+        <li><a href="#runOnRegion">The <tt>runOnRegion</tt> method</a></li>
+        <li><a href="#doFinalization_region">The <tt>doFinalization()
+                                            </tt> method</a></li>
+        </ul></li>
      <li><a href="#BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
         <ul>
         <li><a href="#doInitialization_fn">The <tt>doInitialization(Function
@@ -134,6 +142,7 @@ the <tt><a href="#ModulePass">ModulePass</a></tt>, <tt><a
 href="#CallGraphSCCPass">CallGraphSCCPass</a></tt>, <tt><a
 href="#FunctionPass">FunctionPass</a></tt>, or <tt><a
 href="#LoopPass">LoopPass</a></tt>, or <tt><a
+href="#RegionPass">RegionPass</a></tt>, or <tt><a
 href="#BasicBlockPass">BasicBlockPass</a></tt> classes, which gives the system
 more information about what your pass does, and how it can be combined with
 other passes.  One of the main features of the LLVM Pass Framework is that it
@@ -169,9 +178,11 @@ source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
 
 <div class="doc_text">
 
-  <p>First, you need to create a new directory somewhere in the LLVM source 
+  <p>First, configure and build LLVM.  This needs to be done directly inside the
+  LLVM source tree rather than in a separate objects directory.
+  Next, you need to create a new directory somewhere in the LLVM source 
   base.  For this example, we'll assume that you made 
-  <tt>lib/Transforms/Hello</tt>.  Next, you must set up a build script 
+  <tt>lib/Transforms/Hello</tt>.  Finally, you must set up a build script 
   (Makefile) that will compile the source code for the new pass.  To do this, 
   copy the following into <tt>Makefile</tt>:</p>
   <hr/>
@@ -194,8 +205,8 @@ include $(LEVEL)/Makefile.common
 </pre></div>
 
 <p>This makefile specifies that all of the <tt>.cpp</tt> files in the current
-directory are to be compiled and linked together into a
-<tt>Debug+Asserts/lib/Hello.so</tt> shared object that can be dynamically loaded by
+directory are to be compiled and linked together into a shared object
+<tt>$(LEVEL)/Debug+Asserts/lib/Hello.so</tt> that can be dynamically loaded by
 the <tt>opt</tt> or <tt>bugpoint</tt> tools via their <tt>-load</tt> options.  
 If your operating system uses a suffix other than .so (such as windows or 
 Mac OS/X), the appropriate extension will be used.</p>
@@ -262,7 +273,7 @@ time.</p>
 
 <div class="doc_code"><pre>
      static char ID;
-     Hello() : FunctionPass(&amp;ID) {}
+     Hello() : FunctionPass(ID) {}
 </pre></div><p>
 
 <p> This declares pass identifier used by LLVM to identify pass. This allows LLVM to
@@ -290,7 +301,7 @@ function.</p>
 initialization value is not important.</p>
 
 <div class="doc_code"><pre>
-  INITIALIZE_PASS(Hello, "<i>hello</i>", "<i>Hello World Pass</i>",
+  static RegisterPass<Hello> X("<i>hello</i>", "<i>Hello World Pass</i>",
                         false /* Only looks at CFG */,
                         false /* Analysis Pass */);
 }  <i>// end of anonymous namespace</i>
@@ -317,7 +328,7 @@ is supplied as fourth argument. </p>
   <b>struct Hello</b> : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
     
     static char ID;
-    Hello() : FunctionPass(&amp;ID) {}
+    Hello() : FunctionPass(ID) {}
 
     <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
       errs() &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
@@ -326,14 +337,15 @@ is supplied as fourth argument. </p>
   };
   
   char Hello::ID = 0;
-  INITIALIZE_PASS(Hello, "<i>Hello</i>", "<i>Hello World Pass</i>", false, false);
+  static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
 }
 
 </pre></div>
 
 <p>Now that it's all together, compile the file with a simple "<tt>gmake</tt>"
-command in the local directory and you should get a new
-"<tt>Debug+Asserts/lib/Hello.so</tt> file.  Note that everything in this file is
+command in the local directory and you should get a new file
+"<tt>Debug+Asserts/lib/Hello.so</tt>" under the top level directory of the LLVM
+source tree (not in the local directory).  Note that everything in this file is
 contained in an anonymous namespace: this reflects the fact that passes are self
 contained units that do not need external interfaces (although they can have
 them) to be useful.</p>
@@ -349,7 +361,7 @@ them) to be useful.</p>
 
 <p>Now that you have a brand new shiny shared object file, we can use the
 <tt>opt</tt> command to run an LLVM program through your pass.  Because you
-registered your pass with the <tt>INITIALIZE_PASS</tt> macro, you will be able to
+registered your pass with <tt>RegisterPass</tt>, you will be able to
 use the <tt>opt</tt> tool to access it, once loaded.</p>
 
 <p>To test it, follow the example at the end of the <a
@@ -547,11 +559,9 @@ href="#BasicBlockPass">BasicBlockPass</a></tt>, you should derive from
 
 <ol>
 
-<li>... <em>not allowed</em> to modify any <tt>Function</tt>s that are not in
-the current SCC.</li>
-
-<li>... <em>not allowed</em> to inspect any Function's other than those in the
-current SCC and the direct callees of the SCC.</li>
+<li>... <em>not allowed</em> to inspect or modify any <tt>Function</tt>s other
+than those in the current SCC and the direct callers and direct callees of the
+SCC.</li>
 
 <li>... <em>required</em> to preserve the current CallGraph object, updating it
 to reflect any changes made to the program.</li>
@@ -805,6 +815,84 @@ program being compiled. </p>
 
 </div>
 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="RegionPass">The <tt>RegionPass</tt> class </a>
+</div>
+
+<div class="doc_text">
+
+<p> <tt>RegionPass</tt> is similar to <a href="#LoopPass"><tt>LoopPass</tt></a>,
+but executes on each single entry single exit region in the function.
+<tt>RegionPass</tt> processes regions in nested order such that the outer most
+region is processed last.  </p>
+
+<p> <tt>RegionPass</tt> subclasses are allowed to update the region tree by using
+the <tt>RGPassManager</tt> interface. You may overload three virtual methods of
+<tt>RegionPass</tt> to implementing your own region pass is usually. All these
+methods should return true if they modified the program, or false if they didn not.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doInitialization_region">The <tt>doInitialization(Region *,
+                                                 RGPassManager &amp;)</tt>
+  method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doInitialization(Region *, RGPassManager &amp;RGM);
+</pre></div>
+
+<p>The <tt>doInitialization</tt> method is designed to do simple initialization
+type of stuff that does not depend on the functions being processed.  The
+<tt>doInitialization</tt> method call is not scheduled to overlap with any
+other pass executions (thus it should be very fast). RPPassManager
+interface should be used to access Function or Module level analysis
+information.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnRegion">The <tt>runOnRegion</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnRegion(Region *, RGPassManager &amp;RGM) = 0;
+</pre></div><p>
+
+<p>The <tt>runOnRegion</tt> method must be implemented by your subclass to do
+the transformation or analysis work of your pass.  As usual, a true value should
+be returned if the region is modified. <tt>RGPassManager</tt> interface
+should be used to update region tree.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doFinalization_region">The <tt>doFinalization()</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doFinalization();
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnRegion"><tt>runOnRegion</tt></a> for every region in the
+program being compiled. </p>
+
+</div>
+
 
 
 <!-- ======================================================================= -->
@@ -967,10 +1055,10 @@ remember, you may not modify the LLVM <tt>Function</tt> or its contents from a
 pass registration works, and discussed some of the reasons that it is used and
 what it does.  Here we discuss how and why passes are registered.</p>
 
-<p>As we saw above, passes are registered with the <b><tt>INITIALIZE_PASS</tt></b>
-macro.  The first parameter is the name of the pass that is to be used on
+<p>As we saw above, passes are registered with the <b><tt>RegisterPass</tt></b>
+template.  The template parameter is the name of the pass that is to be used on
 the command line to specify that the pass should be added to a program (for
-example, with <tt>opt</tt> or <tt>bugpoint</tt>).  The second argument is the
+example, with <tt>opt</tt> or <tt>bugpoint</tt>).  The first argument is the
 name of the pass, which is to be used for the <tt>-help</tt> output of
 programs, as
 well as for debug output generated by the <tt>--debug-pass</tt> option.</p>
@@ -1386,7 +1474,7 @@ results as soon as they are no longer needed.</li>
 <li><b>Pipeline the execution of passes on the program</b> - The
 <tt>PassManager</tt> attempts to get better cache and memory usage behavior out
 of a series of passes by pipelining the passes together.  This means that, given
-a series of consequtive <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s, it
+a series of consecutive <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s, it
 will execute all of the <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s on
 the first function, then all of the <a
 href="#FunctionPass"><tt>FunctionPass</tt></a>es on the second function,
@@ -1833,7 +1921,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-22 01:07:00 +0200 (Thu, 22 Jul 2010) $
+  Last modified: $Date: 2011-02-15 10:23:02 +0100 (Tue, 15 Feb 2011) $
 </address>
 
 </body>
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
index 6cd33b010adc..a320ff7e9064 100644
--- a/docs/tutorial/LangImpl3.html
+++ b/docs/tutorial/LangImpl3.html
@@ -353,8 +353,8 @@ above.</p>
 </div>
 
 <p>The Module symbol table works just like the Function symbol table when it
-comes to name conflicts: if a new function is created with a name was previously
-added to the symbol table, it will get implicitly renamed when added to the
+comes to name conflicts: if a new function is created with a name that was previously
+added to the symbol table, the new function will get implicitly renamed when added to the
 Module.  The code above exploits this fact to determine if there was a previous
 definition of this function.</p>
 
@@ -1263,7 +1263,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
+  Last modified: $Date: 2011-02-15 01:24:32 +0100 (Tue, 15 Feb 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
index d286364d2a56..a2511d959e7b 100644
--- a/docs/tutorial/LangImpl4.html
+++ b/docs/tutorial/LangImpl4.html
@@ -176,6 +176,8 @@ add a set of optimizations to run.  The code looks like this:</p>
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
@@ -507,6 +509,7 @@ at runtime.</p>
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -1086,6 +1089,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
@@ -1126,7 +1131,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $
+  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
index 4450f2e3a11a..d2c3bd03dc4e 100644
--- a/docs/tutorial/LangImpl5.html
+++ b/docs/tutorial/LangImpl5.html
@@ -907,6 +907,7 @@ if/then/else and for expressions..  To build this example, use:
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -1731,6 +1732,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
@@ -1771,7 +1774,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
+  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index c6a0b8a7d603..7ddf3a099cbc 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -826,6 +826,7 @@ if/then/else and for expressions..  To build this example, use:
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -1768,6 +1769,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
@@ -1808,7 +1811,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
+  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
index 1ec99b15bf5c..3b36129d6716 100644
--- a/docs/tutorial/LangImpl7.html
+++ b/docs/tutorial/LangImpl7.html
@@ -1009,6 +1009,7 @@ variables and var/in support.  To build this example, use:
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -2116,6 +2117,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Promote allocas to registers.
   OurFPM.add(createPromoteMemoryToRegisterPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
@@ -2158,7 +2161,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
+  Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html
index ac31fbfc0766..a9fcd704cf8b 100644
--- a/docs/tutorial/OCamlLangImpl7.html
+++ b/docs/tutorial/OCamlLangImpl7.html
@@ -30,7 +30,7 @@
     <li><a href="#code">Full Code Listing</a></li>
   </ol>
 </li>
-<li><a href="LangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
+<li><a href="OCamlLangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
  tidbits</li>
 </ul>
 
@@ -1901,7 +1901,7 @@ extern double printd(double X) {
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $
+  Last modified: $Date: 2011-01-01 04:27:43 +0100 (Sat, 01 Jan 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html
new file mode 100644
index 000000000000..64a62002c4cc
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl8.html
@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Conclusion and other useful LLVM
+ tidbits</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 8
+  <ol>
+    <li><a href="#conclusion">Tutorial Conclusion</a></li>
+    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
+    <ul>
+      <li><a href="#targetindep">Target Independence</a></li>
+      <li><a href="#safety">Safety Guarantees</a></li>
+      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
+    </ul>
+    </li>
+    <li><a href="#tipsandtricks">Tips and Tricks</a>
+    <ul>
+      <li><a href="#offsetofsizeof">Implementing portable 
+                                    offsetof/sizeof</a></li>
+      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
+    </ul>
+    </li>
+  </ol>
+</li>
+</ul>
+
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="conclusion">Tutorial Conclusion</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to the the final chapter of the "<a href="index.html">Implementing a
+language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
+our little Kaleidoscope language from being a useless toy, to being a
+semi-interesting (but probably still useless) toy. :)</p>
+
+<p>It is interesting to see how far we've come, and how little code it has
+taken.  We built the entire lexer, parser, AST, code generator, and an 
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.</p>
+
+<p>Our little language supports a couple of interesting features: it supports
+user defined binary and unary operators, it uses JIT compilation for immediate
+evaluation, and it supports a few control flow constructs with SSA construction.
+</p>
+
+<p>Part of the idea of this tutorial was to show you how easy and fun it can be
+to define, build, and play with languages.  Building a compiler need not be a
+scary or mystical process!  Now that you've seen some of the basics, I strongly
+encourage you to take the code and hack on it.  For example, try adding:</p>
+
+<ul>
+<li><b>global variables</b> - While global variables have questional value in
+modern software engineering, they are often useful when putting together quick
+little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
+setup makes it very easy to add global variables: just have value lookup check
+to see if an unresolved variable is in the global variable symbol table before
+rejecting it.  To create a new global variable, make an instance of the LLVM
+<tt>GlobalVariable</tt> class.</li>
+
+<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
+type double.  This gives the language a very nice elegance, because only
+supporting one type means that you never have to specify types.  Different
+languages have different ways of handling this.  The easiest way is to require
+the user to specify types for every variable definition, and record the type
+of the variable in the symbol table along with its Value*.</li>
+
+<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
+extending the type system in all sorts of interesting ways.  Simple arrays are
+very easy and are quite useful for many different applications.  Adding them is
+mostly an exercise in learning how the LLVM <a 
+href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
+is so nifty/unconventional, it <a 
+href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
+for recursive types (e.g. linked lists), make sure to read the <a 
+href="../ProgrammersManual.html#TypeResolve">section in the LLVM
+Programmer's Manual</a> that describes how to construct them.</li>
+
+<li><b>standard runtime</b> - Our current language allows the user to access
+arbitrary external functions, and we use it for things like "printd" and
+"putchard".  As you extend the language to add higher-level constructs, often
+these constructs make the most sense if they are lowered to calls into a
+language-supplied runtime.  For example, if you add hash tables to the language,
+it would probably make sense to add the routines to a runtime, instead of 
+inlining them all the way.</li>
+
+<li><b>memory management</b> - Currently we can only access the stack in
+Kaleidoscope.  It would also be useful to be able to allocate heap memory,
+either with calls to the standard libc malloc/free interface or with a garbage
+collector.  If you would like to use garbage collection, note that LLVM fully
+supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
+including algorithms that move objects and need to scan/update the stack.</li>
+
+<li><b>debugger support</b> - LLVM supports generation of <a 
+href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
+common debuggers like GDB.  Adding support for debug info is fairly 
+straightforward.  The best way to understand it is to compile some C/C++ code
+with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
+
+<li><b>exception handling support</b> - LLVM supports generation of <a 
+href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
+with code compiled in other languages.  You could also generate code by
+implicitly making every function return an error value and checking it.  You 
+could also make explicit use of setjmp/longjmp.  There are many different ways
+to go here.</li>
+
+<li><b>object orientation, generics, database access, complex numbers,
+geometric programming, ...</b> - Really, there is
+no end of crazy features that you can add to the language.</li>
+
+<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
+that many people are interested in: building a compiler for a specific language.
+However, there are many other domains that can use compiler technology that are
+not typically considered.  For example, LLVM has been used to implement OpenGL
+graphics acceleration, translate C++ code to ActionScript, and many other
+cute and clever things.  Maybe you will be the first to JIT compile a regular
+expression interpreter into native code with LLVM?</li>
+
+</ul>
+
+<p>
+Have fun - try doing something crazy and unusual.  Building a language like
+everyone else always has, is much less fun than trying something a little crazy
+or off the wall and seeing how it turns out.  If you get stuck or want to talk
+about it, feel free to email the <a 
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
+list</a>: it has lots of people who are interested in languages and are often
+willing to help out.
+</p>
+
+<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
+LLVM IR.  These are some of the more subtle things that may not be obvious, but
+are very useful if you want to take advantage of LLVM's capabilities.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="llvmirproperties">Properties of the LLVM 
+IR</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>We have a couple common questions about code in the LLVM IR form - lets just
+get these out of the way right now, shall we?</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="targetindep">Target 
+Independence</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Kaleidoscope is an example of a "portable language": any program written in
+Kaleidoscope will work the same way on any target that it runs on.  Many other
+languages have this property, e.g. lisp, java, haskell, javascript, python, etc
+(note that while these languages are portable, not all their libraries are).</p>
+
+<p>One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
+program and run it on any target that LLVM supports, even emitting C code and
+compiling that on targets that LLVM doesn't support natively.  You can trivially
+tell that the Kaleidoscope compiler generates target-independent code because it
+never queries for any target-specific information when generating code.</p>
+
+<p>The fact that LLVM provides a compact, target-independent, representation for
+code gets a lot of people excited.  Unfortunately, these people are usually
+thinking about C or a language from the C family when they are asking questions
+about language portability.  I say "unfortunately", because there is really no
+way to make (fully general) C code portable, other than shipping the source code
+around (and of course, C source code is not actually portable in general
+either - ever port a really old application from 32- to 64-bits?).</p>
+
+<p>The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions.  As one simple example, the preprocessor
+often destructively removes target-independence from the code when it processes
+the input text:</p>
+
+<div class="doc_code">
+<pre>
+#ifdef __i386__
+  int X = 1;
+#else
+  int X = 42;
+#endif
+</pre>
+</div>
+
+<p>While it is possible to engineer more and more complex solutions to problems
+like this, it cannot be solved in full generality in a way that is better than shipping
+the actual source code.</p>
+
+<p>That said, there are interesting subsets of C that can be made portable.  If
+you are willing to fix primitive types to a fixed size (say int = 32-bits, 
+and long = 64-bits), don't care about ABI compatibility with existing binaries,
+and are willing to give up some other minor features, you can have portable
+code.  This can make sense for specialized domains such as an
+in-kernel language.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="safety">Safety Guarantees</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Many of the languages above are also "safe" languages: it is impossible for
+a program written in Java to corrupt its address space and crash the process
+(assuming the JVM has no bugs).
+Safety is an interesting property that requires a combination of language
+design, runtime support, and often operating system support.</p>
+
+<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
+does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
+use after free bugs, buffer over-runs, and a variety of other problems.  Safety
+needs to be implemented as a layer on top of LLVM and, conveniently, several
+groups have investigated this.  Ask on the <a 
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
+list</a> if you are interested in more details.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="langspecific">Language-Specific 
+Optimizations</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>One thing about LLVM that turns off many people is that it does not solve all
+the world's problems in one system (sorry 'world hunger', someone else will have
+to solve you some other day).  One specific complaint is that people perceive
+LLVM as being incapable of performing high-level language-specific optimization:
+LLVM "loses too much information".</p>
+
+<p>Unfortunately, this is really not the place to give you a full and unified
+version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
+few observations:</p>
+
+<p>First, you're right that LLVM does lose information.  For example, as of this
+writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
+from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
+get compiled down to an 'i32' value and the information about what it came from
+is lost.  The more general issue here, is that the LLVM type system uses
+"structural equivalence" instead of "name equivalence".  Another place this
+surprises people is if you have two types in a high-level language that have the
+same structure (e.g. two different structs that have a single int field): these
+types will compile down into a single LLVM type and it will be impossible to
+tell what it came from.</p>
+
+<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
+continue to enhance and improve it in many different ways.  In addition to
+adding new features (LLVM did not always support exceptions or debug info), we
+also extend the IR to capture important information for optimization (e.g.
+whether an argument is sign or zero extended, information about pointers
+aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
+include some specific feature, so they go ahead and extend it.</p>
+
+<p>Third, it is <em>possible and easy</em> to add language-specific
+optimizations, and you have a number of choices in how to do it.  As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language.  In the case of the C family,
+there is an optimization pass that "knows" about the standard C library
+functions.  If you call "exit(0)" in main(), it knows that it is safe to
+optimize that into "return 0;" because C specifies what the 'exit'
+function does.</p>
+
+<p>In addition to simple library knowledge, it is possible to embed a variety of
+other language-specific information into the LLVM IR.  If you have a specific
+need and run into a wall, please bring the topic up on the llvmdev list.  At the
+very worst, you can always treat LLVM as if it were a "dumb code generator" and
+implement the high-level optimizations you desire in your front-end, on the
+language-specific AST.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="tipsandtricks">Tips and Tricks</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance.  Instead of letting
+everyone rediscover them, this section talks about some of these issues.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="offsetofsizeof">Implementing portable
+offsetof/sizeof</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>One interesting thing that comes up, if you are trying to keep the code 
+generated by your compiler "target independent", is that you often need to know
+the size of some LLVM type or the offset of some field in an llvm structure.
+For example, you might need to pass the size of a type into a function that
+allocates memory.</p>
+
+<p>Unfortunately, this can vary widely across targets: for example the width of
+a pointer is trivially target-specific.  However, there is a <a 
+href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
+way to use the getelementptr instruction</a> that allows you to compute this
+in a portable way.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="gcstack">Garbage Collected 
+Stack Frames</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Some languages want to explicitly manage their stack frames, often so that
+they are garbage collected or to allow easy implementation of closures.  There
+are often better ways to implement these features than explicit stack frames,
+but <a 
+href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
+does support them,</a> if you want.  It requires your front-end to convert the
+code into <a 
+href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
+Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html
index 250b533f3f8a..11dd5e2d732a 100644
--- a/docs/tutorial/index.html
+++ b/docs/tutorial/index.html
@@ -35,7 +35,7 @@
     <li><a href="OCamlLangImpl5.html">Extending the language: control flow</a></li>
     <li><a href="OCamlLangImpl6.html">Extending the language: user-defined operators</a></li>
     <li><a href="OCamlLangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
-    <li><a href="LangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
+    <li><a href="OCamlLangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
   </ol></li>
   <li>Advanced Topics
   <ol>
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f60c0eda0306..54ee6cc3a3a4 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -8,9 +8,6 @@ if( NOT WIN32 )
     add_subdirectory(ExceptionDemo)
 endif()
 
-include(CheckIncludeFile)
-check_include_file(pthread.h HAVE_PTHREAD_H)
-
 if( HAVE_PTHREAD_H )
   add_subdirectory(ParallelJIT)
 endif( HAVE_PTHREAD_H )
diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt
index d66191556843..88c9ab7c1816 100644
--- a/examples/ExceptionDemo/CMakeLists.txt
+++ b/examples/ExceptionDemo/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS jit nativecodegen)
+set(LLVM_REQUIRES_EH 1)
 
 add_llvm_example(ExceptionDemo
   ExceptionDemo.cpp
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index e09c990f8a9f..95ccd24a6894 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -1974,6 +1974,9 @@ int main(int argc, char* argv[]) {
         // Optimizations turned on
 #ifdef ADD_OPT_PASSES
 
+        // Basic AliasAnslysis support for GVN.
+        fpm.add(llvm::createBasicAliasAnalysisPass());
+
         // Promote allocas to registers.
         fpm.add(llvm::createPromoteMemoryToRegisterPass());
 
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
index 327c5c0591c0..a50d2a43dd28 100644
--- a/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -5,6 +5,7 @@
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -584,6 +585,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index c98ee88c394f..26b3db66202f 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -5,6 +5,7 @@
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -829,6 +830,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index b7b8738f587d..838125ae77dc 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -5,6 +5,7 @@
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -947,6 +948,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
   OurFPM.add(createInstructionCombiningPass());
   // Reassociate expressions.
diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
index 9b8227c69340..da3839843bd0 100644
--- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS core jit interpreter native)
+set(LLVM_REQUIRES_RTTI 1)
 
 add_llvm_example(Kaleidoscope-Ch7
   toy.cpp
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index 0cf7869d02f8..e63578f57e6c 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -5,6 +5,7 @@
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
@@ -1111,6 +1112,8 @@ int main() {
   // Set up the optimizer pipeline.  Start with registering info about how the
   // target lays out data structures.
   OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
   // Promote allocas to registers.
   OurFPM.add(createPromoteMemoryToRegisterPass());
   // Do simple "peephole" optimizations and bit-twiddling optzns.
diff --git a/examples/Makefile b/examples/Makefile
index bc09b8e0473b..50a6db76aa25 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -10,8 +10,7 @@ LEVEL=..
 
 include $(LEVEL)/Makefile.config
 
-PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker \
-                TracingBrainF
+PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker
 
 ifeq ($(HAVE_PTHREAD),1)
 PARALLEL_DIRS += ParallelJIT
diff --git a/examples/ModuleMaker/README.txt b/examples/ModuleMaker/README.txt
index ecbe30e4cfe8..66a5d3fe0b1a 100644
--- a/examples/ModuleMaker/README.txt
+++ b/examples/ModuleMaker/README.txt
@@ -4,5 +4,5 @@
 
 This project is an extremely simple example of using some simple pieces of the 
 LLVM API.  The actual executable generated by this project simply emits an 
-LLVM bytecode file to standard output.  It is designed to show some basic 
+LLVM bitcode file to standard output.  It is designed to show some basic 
 usage of LLVM APIs, and how to link to LLVM libraries.
diff --git a/examples/OCaml-Kaleidoscope/Chapter6/Makefile b/examples/OCaml-Kaleidoscope/Chapter6/Makefile
index 831213863a6e..21f0c53df4b9 100644
--- a/examples/OCaml-Kaleidoscope/Chapter6/Makefile
+++ b/examples/OCaml-Kaleidoscope/Chapter6/Makefile
@@ -20,6 +20,15 @@ UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \
 
 OCAMLCFLAGS += -pp camlp4of
 
+OcamlSources1 = \
+	$(PROJ_SRC_DIR)/ast.ml \
+	$(PROJ_SRC_DIR)/parser.ml \
+	$(PROJ_SRC_DIR)/codegen.ml \
+	$(PROJ_SRC_DIR)/lexer.ml \
+	$(PROJ_SRC_DIR)/token.ml \
+	$(PROJ_SRC_DIR)/toplevel.ml \
+	$(PROJ_SRC_DIR)/toy.ml
+
 ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
 
 include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/examples/OCaml-Kaleidoscope/Chapter7/Makefile b/examples/OCaml-Kaleidoscope/Chapter7/Makefile
index ddf667b0e123..99686e17ea80 100644
--- a/examples/OCaml-Kaleidoscope/Chapter7/Makefile
+++ b/examples/OCaml-Kaleidoscope/Chapter7/Makefile
@@ -20,6 +20,15 @@ UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \
 
 OCAMLCFLAGS += -pp camlp4of
 
+OcamlSources1 = \
+	$(PROJ_SRC_DIR)/ast.ml \
+	$(PROJ_SRC_DIR)/parser.ml \
+	$(PROJ_SRC_DIR)/codegen.ml \
+	$(PROJ_SRC_DIR)/lexer.ml \
+	$(PROJ_SRC_DIR)/token.ml \
+	$(PROJ_SRC_DIR)/toplevel.ml \
+	$(PROJ_SRC_DIR)/toy.ml
+
 ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
 
 include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 75cee7d203a1..39c3cb40117c 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -33,13 +33,14 @@
 #ifndef LLVM_C_CORE_H
 #define LLVM_C_CORE_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 #ifdef __cplusplus
 
 /* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' 
    and 'unwrap' conversion functions. */
 #include "llvm/Module.h"
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/IRBuilder.h"
 
 extern "C" {
@@ -92,6 +93,9 @@ typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
 /** See the llvm::PassManagerBase class. */
 typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
 
+/** See the llvm::PassRegistry class. */
+typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
+
 /** Used to get the users and usees of a Value. See the llvm::Use class. */
 typedef struct LLVMOpaqueUse *LLVMUseRef;
 
@@ -204,7 +208,8 @@ typedef enum {
   LLVMPointerTypeKind,     /**< Pointers */
   LLVMOpaqueTypeKind,      /**< Opaque: type with unknown structure */
   LLVMVectorTypeKind,      /**< SIMD 'packed' format, or other vector type */
-  LLVMMetadataTypeKind     /**< Metadata */
+  LLVMMetadataTypeKind,    /**< Metadata */
+  LLVMX86_MMXTypeKind      /**< X86 MMX */
 } LLVMTypeKind;
 
 typedef enum {
@@ -317,6 +322,7 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
 LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty);
 void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name);
 LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
+const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty);
 
 /** See Module::dump. */
 void LLVMDumpModule(LLVMModuleRef M);
@@ -324,6 +330,9 @@ void LLVMDumpModule(LLVMModuleRef M);
 /** See Module::setModuleInlineAsm. */
 void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm);
 
+/** See Module::getContext. */
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
+
 /*===-- Types -------------------------------------------------------------===*/
 
 /* LLVM types conform to the following hierarchy:
@@ -408,10 +417,12 @@ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy);
 LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C);
 
 LLVMTypeRef LLVMVoidType(void);
 LLVMTypeRef LLVMLabelType(void);
 LLVMTypeRef LLVMOpaqueType(void);
+LLVMTypeRef LLVMX86MMXType(void);
 
 /* Operations on type handles */
 LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy);
@@ -540,6 +551,9 @@ LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
 /* Operations on scalar constants */
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
                           LLVMBool SignExtend);
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+                                              unsigned NumWords,
+                                              const uint64_t Words[]);
 LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text,
                                   uint8_t Radix);
 LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text,
@@ -1013,6 +1027,11 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
                                          char **OutMessage);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+/** Return the global pass registry, for use with initialization functions.
+    See llvm::PassRegistry::getPassRegistry. */
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
 
 /*===-- Pass Managers -----------------------------------------------------===*/
 
@@ -1101,6 +1120,7 @@ namespace llvm {
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext,        LLVMContextRef       )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use,                LLVMUseRef           )
   DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBase,    LLVMPassManagerRef   )
+  DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry,       LLVMPassRegistryRef  )
   /* LLVMModuleProviderRef exists for historical reasons, but now just holds a
    * Module.
    */
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
index d177381988df..28ac0ed2ab35 100644
--- a/include/llvm-c/EnhancedDisassembly.h
+++ b/include/llvm-c/EnhancedDisassembly.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
 #define LLVM_C_ENHANCEDDISASSEMBLY_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
new file mode 100644
index 000000000000..3b59abbec03c
--- /dev/null
+++ b/include/llvm-c/Initialization.h
@@ -0,0 +1,40 @@
+/*===-- llvm-c/Initialization.h - Initialization C Interface ------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to LLVM initialization routines,      *|
+|* which must be called before you can use the functionality provided by      *|
+|* the corresponding LLVM library.                                            *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_INITIALIZEPASSES_H
+#define LLVM_C_INITIALIZEPASSES_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void LLVMInitializeCore(LLVMPassRegistryRef R);
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R);
+void LLVMInitializeIPO(LLVMPassRegistryRef R);
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R);
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R);
+void LLVMInitializeIPA(LLVMPassRegistryRef R);
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R);
+void LLVMInitializeTarget(LLVMPassRegistryRef R);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h
index ccfdceed0264..fca394681c76 100644
--- a/include/llvm-c/LinkTimeOptimizer.h
+++ b/include/llvm-c/LinkTimeOptimizer.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This header provides a C API to use the LLVM link time optimization
-// library. This is inteded to be used by linkers which are C-only in
+// library. This is intended to be used by linkers which are C-only in
 // their implementation for performing LTO.
 //
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index c94019ac98b0..2ddfb38171c2 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -52,9 +52,6 @@ void LLVMAddLICMPass(LLVMPassManagerRef PM);
 /** See llvm::createLoopDeletionPass function. */
 void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM);
 
-/** See llvm::createLoopIndexSplitPass function. */
-void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM);
-
 /** See llvm::createLoopRotatePass function. */
 void LLVMAddLoopRotatePass(LLVMPassManagerRef PM);
 
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index e6f69afa2ec2..1c42ce0cec77 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -18,27 +18,28 @@
 
 #include <stdbool.h>
 #include <stddef.h>
-#include "llvm/System/DataTypes.h"
+#include <unistd.h>
 
-#define LTO_API_VERSION 3
+#define LTO_API_VERSION 4
 
 typedef enum {
-    LTO_SYMBOL_ALIGNMENT_MASK         = 0x0000001F,    /* log2 of alignment */
-    LTO_SYMBOL_PERMISSIONS_MASK       = 0x000000E0,    
-    LTO_SYMBOL_PERMISSIONS_CODE       = 0x000000A0,    
-    LTO_SYMBOL_PERMISSIONS_DATA       = 0x000000C0,    
-    LTO_SYMBOL_PERMISSIONS_RODATA     = 0x00000080,    
-    LTO_SYMBOL_DEFINITION_MASK        = 0x00000700,    
-    LTO_SYMBOL_DEFINITION_REGULAR     = 0x00000100,    
-    LTO_SYMBOL_DEFINITION_TENTATIVE   = 0x00000200,    
-    LTO_SYMBOL_DEFINITION_WEAK        = 0x00000300,    
-    LTO_SYMBOL_DEFINITION_UNDEFINED   = 0x00000400,    
-    LTO_SYMBOL_DEFINITION_WEAKUNDEF   = 0x00000500,
-    LTO_SYMBOL_SCOPE_MASK             = 0x00003800,    
-    LTO_SYMBOL_SCOPE_INTERNAL         = 0x00000800,    
-    LTO_SYMBOL_SCOPE_HIDDEN           = 0x00001000,    
-    LTO_SYMBOL_SCOPE_PROTECTED        = 0x00002000,    
-    LTO_SYMBOL_SCOPE_DEFAULT          = 0x00001800    
+    LTO_SYMBOL_ALIGNMENT_MASK              = 0x0000001F, /* log2 of alignment */
+    LTO_SYMBOL_PERMISSIONS_MASK            = 0x000000E0,    
+    LTO_SYMBOL_PERMISSIONS_CODE            = 0x000000A0,    
+    LTO_SYMBOL_PERMISSIONS_DATA            = 0x000000C0,    
+    LTO_SYMBOL_PERMISSIONS_RODATA          = 0x00000080,    
+    LTO_SYMBOL_DEFINITION_MASK             = 0x00000700,    
+    LTO_SYMBOL_DEFINITION_REGULAR          = 0x00000100,    
+    LTO_SYMBOL_DEFINITION_TENTATIVE        = 0x00000200,    
+    LTO_SYMBOL_DEFINITION_WEAK             = 0x00000300,    
+    LTO_SYMBOL_DEFINITION_UNDEFINED        = 0x00000400,    
+    LTO_SYMBOL_DEFINITION_WEAKUNDEF        = 0x00000500,
+    LTO_SYMBOL_SCOPE_MASK                  = 0x00003800,    
+    LTO_SYMBOL_SCOPE_INTERNAL              = 0x00000800,    
+    LTO_SYMBOL_SCOPE_HIDDEN                = 0x00001000,    
+    LTO_SYMBOL_SCOPE_PROTECTED             = 0x00002000,    
+    LTO_SYMBOL_SCOPE_DEFAULT               = 0x00001800,
+    LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN = 0x00002800
 } lto_symbol_attributes;
 
 typedef enum {
@@ -121,6 +122,13 @@ lto_module_create(const char* path);
 extern lto_module_t
 lto_module_create_from_memory(const void* mem, size_t length);
 
+/**
+ * Loads an object file from disk. The seek point of fd is not preserved.
+ * Returns NULL on error (check lto_get_error_message() for details).
+ */
+extern lto_module_t
+lto_module_create_from_fd(int fd, const char *path, off_t size);
+
 
 /**
  * Frees all memory internally allocated by the module.
@@ -146,7 +154,7 @@ lto_module_set_target_triple(lto_module_t mod, const char *triple);
 /**
  * Returns the number of symbols in the object module.
  */
-extern uint32_t
+extern unsigned int
 lto_module_get_num_symbols(lto_module_t mod);
 
 
@@ -154,14 +162,14 @@ lto_module_get_num_symbols(lto_module_t mod);
  * Returns the name of the ith symbol in the object module.
  */
 extern const char*
-lto_module_get_symbol_name(lto_module_t mod, uint32_t index);
+lto_module_get_symbol_name(lto_module_t mod, unsigned int index);
 
 
 /**
  * Returns the attributes of the ith symbol in the object module.
  */
 extern lto_symbol_attributes
-lto_module_get_symbol_attribute(lto_module_t mod, uint32_t index);
+lto_module_get_symbol_attribute(lto_module_t mod, unsigned int index);
 
 
 /**
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index dfe4e0f49adb..ca4138b825a6 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -246,6 +246,13 @@ namespace llvm {
     static APFloat getSmallestNormalized(const fltSemantics &Sem,
                                          bool Negative = false);
 
+    /// getAllOnesValue - Returns a float which is bitcasted from
+    /// an all one value int.
+    ///
+    /// \param BitWidth - Select float type
+    /// \param isIEEE   - If 128 bit number, select between PPC and IEEE
+    static APFloat getAllOnesValue(unsigned BitWidth, bool isIEEE = false);
+
     /// Profile - Used to insert APFloat objects, or objects that contain
     ///  APFloat objects, into FoldingSets.
     void Profile(FoldingSetNodeID& NID) const;
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 8004cb4b123b..b91d5dc9bcf9 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -275,12 +275,6 @@ public:
   ///  objects, into FoldingSets.
   void Profile(FoldingSetNodeID& id) const;
 
-  /// @brief Used by the Bitcode serializer to emit APInts to Bitcode.
-  void Emit(Serializer& S) const;
-
-  /// @brief Used by the Bitcode deserializer to deserialize APInts.
-  void Read(Deserializer& D);
-
   /// @}
   /// @name Value Tests
   /// @{
@@ -302,7 +296,7 @@ public:
   /// @returns true if this APInt is positive.
   /// @brief Determine if this APInt Value is positive.
   bool isStrictlyPositive() const {
-    return isNonNegative() && (*this) != 0;
+    return isNonNegative() && !!*this;
   }
 
   /// This checks to see if the value has all bits of the APInt are set or not.
@@ -330,15 +324,14 @@ public:
   /// value for the APInt's bit width.
   /// @brief Determine if this is the smallest unsigned value.
   bool isMinValue() const {
-    return countPopulation() == 0;
+    return !*this;
   }
 
   /// This checks to see if the value of this APInt is the minimum signed
   /// value for the APInt's bit width.
   /// @brief Determine if this is the smallest signed value.
   bool isMinSignedValue() const {
-    return BitWidth == 1 ? VAL == 1 :
-                           isNegative() && countPopulation() == 1;
+    return BitWidth == 1 ? VAL == 1 : isNegative() && isPowerOf2();
   }
 
   /// @brief Check if this APInt has an N-bits unsigned integer value.
@@ -348,10 +341,8 @@ public:
       return true;
 
     if (isSingleWord())
-      return VAL == (VAL & (~0ULL >> (64 - N)));
-    APInt Tmp(N, getNumWords(), pVal);
-    Tmp.zext(getBitWidth());
-    return Tmp == (*this);
+      return isUIntN(N, VAL);
+    return APInt(N, getNumWords(), pVal).zext(getBitWidth()) == (*this);
   }
 
   /// @brief Check if this APInt has an N-bits signed integer value.
@@ -361,7 +352,11 @@ public:
   }
 
   /// @returns true if the argument APInt value is a power of two > 0.
-  bool isPowerOf2() const;
+  bool isPowerOf2() const {
+    if (isSingleWord())
+      return isPowerOf2_64(VAL);
+    return countPopulationSlowCase() == 1;
+  }
 
   /// isSignBit - Return true if this is the value returned by getSignBit.
   bool isSignBit() const { return isMinSignedValue(); }
@@ -369,7 +364,7 @@ public:
   /// This converts the APInt to a boolean value as a test against zero.
   /// @brief Boolean conversion function.
   bool getBoolValue() const {
-    return *this != 0;
+    return !!*this;
   }
 
   /// getLimitedValue - If this value is smaller than the specified limit,
@@ -385,12 +380,14 @@ public:
   /// @{
   /// @brief Gets maximum unsigned value of APInt for specific bit width.
   static APInt getMaxValue(unsigned numBits) {
-    return APInt(numBits, 0).set();
+    return getAllOnesValue(numBits);
   }
 
   /// @brief Gets maximum signed value of APInt for a specific bit width.
   static APInt getSignedMaxValue(unsigned numBits) {
-    return APInt(numBits, 0).set().clear(numBits - 1);
+    APInt API = getAllOnesValue(numBits);
+    API.clearBit(numBits - 1);
+    return API;
   }
 
   /// @brief Gets minimum unsigned value of APInt for a specific bit width.
@@ -400,7 +397,9 @@ public:
 
   /// @brief Gets minimum signed value of APInt for a specific bit width.
   static APInt getSignedMinValue(unsigned numBits) {
-    return APInt(numBits, 0).set(numBits - 1);
+    APInt API(numBits, 0);
+    API.setBit(numBits - 1);
+    return API;
   }
 
   /// getSignBit - This is just a wrapper function of getSignedMinValue(), and
@@ -413,7 +412,7 @@ public:
   /// @returns the all-ones value for an APInt of the specified bit-width.
   /// @brief Get the all-ones value.
   static APInt getAllOnesValue(unsigned numBits) {
-    return APInt(numBits, 0).set();
+    return APInt(numBits, -1ULL, true);
   }
 
   /// @returns the '0' value for an APInt of the specified bit-width.
@@ -432,6 +431,13 @@ public:
   /// @returns the low "numBits" bits of this APInt.
   APInt getLoBits(unsigned numBits) const;
 
+  /// getOneBitSet - Return an APInt with exactly one bit set in the result.
+  static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
+    APInt Res(numBits, 0);
+    Res.setBit(BitNo);
+    return Res;
+  }
+  
   /// Constructs an APInt value that has a contiguous range of bits set. The
   /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
   /// bits will be zero. For example, with parameters(32, 0, 16) you would get
@@ -530,7 +536,7 @@ public:
   /// @brief Unary bitwise complement operator.
   APInt operator~() const {
     APInt Result(*this);
-    Result.flip();
+    Result.flipAllBits();
     return Result;
   }
 
@@ -741,11 +747,11 @@ public:
   /// RHS are treated as unsigned quantities for purposes of this division.
   /// @returns a new APInt value containing the division result
   /// @brief Unsigned division operation.
-  APInt udiv(const APInt& RHS) const;
+  APInt udiv(const APInt &RHS) const;
 
   /// Signed divide this APInt by APInt RHS.
   /// @brief Signed division function for APInt.
-  APInt sdiv(const APInt& RHS) const {
+  APInt sdiv(const APInt &RHS) const {
     if (isNegative())
       if (RHS.isNegative())
         return (-(*this)).udiv(-RHS);
@@ -763,11 +769,11 @@ public:
   /// which is *this.
   /// @returns a new APInt value containing the remainder result
   /// @brief Unsigned remainder operation.
-  APInt urem(const APInt& RHS) const;
+  APInt urem(const APInt &RHS) const;
 
   /// Signed remainder operation on APInt.
   /// @brief Function for signed remainder operation.
-  APInt srem(const APInt& RHS) const {
+  APInt srem(const APInt &RHS) const {
     if (isNegative())
       if (RHS.isNegative())
         return -((-(*this)).urem(-RHS));
@@ -788,8 +794,7 @@ public:
                       APInt &Quotient, APInt &Remainder);
 
   static void sdivrem(const APInt &LHS, const APInt &RHS,
-                      APInt &Quotient, APInt &Remainder)
-  {
+                      APInt &Quotient, APInt &Remainder) {
     if (LHS.isNegative()) {
       if (RHS.isNegative())
         APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
@@ -804,6 +809,16 @@ public:
       APInt::udivrem(LHS, RHS, Quotient, Remainder);
     }
   }
+  
+  
+  // Operations that return overflow indicators.
+  APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
+  APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
+  APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
+  APInt usub_ov(const APInt &RHS, bool &Overflow) const;
+  APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
+  APInt smul_ov(const APInt &RHS, bool &Overflow) const;
+  APInt sshl_ov(unsigned Amt, bool &Overflow) const;
 
   /// @returns the bit value at bitPosition
   /// @brief Array-indexing support.
@@ -868,7 +883,7 @@ public:
   /// the validity of the less-than relationship.
   /// @returns true if *this < RHS when both are considered unsigned.
   /// @brief Unsigned less than comparison
-  bool ult(const APInt& RHS) const;
+  bool ult(const APInt &RHS) const;
 
   /// Regards both *this as an unsigned quantity and compares it with RHS for
   /// the validity of the less-than relationship.
@@ -988,6 +1003,9 @@ public:
     return sge(APInt(getBitWidth(), RHS));
   }
 
+  
+  
+  
   /// This operation tests if there are any pairs of corresponding bits
   /// between this APInt and RHS that are both set.
   bool intersects(const APInt &RHS) const {
@@ -1000,80 +1018,78 @@ public:
   /// Truncate the APInt to a specified width. It is an error to specify a width
   /// that is greater than or equal to the current width.
   /// @brief Truncate to new width.
-  APInt &trunc(unsigned width);
+  APInt trunc(unsigned width) const;
 
   /// This operation sign extends the APInt to a new width. If the high order
   /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
   /// It is an error to specify a width that is less than or equal to the
   /// current width.
   /// @brief Sign extend to a new width.
-  APInt &sext(unsigned width);
+  APInt sext(unsigned width) const;
 
   /// This operation zero extends the APInt to a new width. The high order bits
   /// are filled with 0 bits.  It is an error to specify a width that is less
   /// than or equal to the current width.
   /// @brief Zero extend to a new width.
-  APInt &zext(unsigned width);
+  APInt zext(unsigned width) const;
 
   /// Make this APInt have the bit width given by \p width. The value is sign
   /// extended, truncated, or left alone to make it that width.
   /// @brief Sign extend or truncate to width
-  APInt &sextOrTrunc(unsigned width);
+  APInt sextOrTrunc(unsigned width) const;
 
   /// Make this APInt have the bit width given by \p width. The value is zero
   /// extended, truncated, or left alone to make it that width.
   /// @brief Zero extend or truncate to width
-  APInt &zextOrTrunc(unsigned width);
+  APInt zextOrTrunc(unsigned width) const;
 
   /// @}
   /// @name Bit Manipulation Operators
   /// @{
   /// @brief Set every bit to 1.
-  APInt& set() {
-    if (isSingleWord()) {
+  void setAllBits() {
+    if (isSingleWord())
       VAL = -1ULL;
-      return clearUnusedBits();
+    else {
+      // Set all the bits in all the words.
+      for (unsigned i = 0; i < getNumWords(); ++i)
+	pVal[i] = -1ULL;
     }
-
-    // Set all the bits in all the words.
-    for (unsigned i = 0; i < getNumWords(); ++i)
-      pVal[i] = -1ULL;
     // Clear the unused ones
-    return clearUnusedBits();
+    clearUnusedBits();
   }
 
   /// Set the given bit to 1 whose position is given as "bitPosition".
   /// @brief Set a given bit to 1.
-  APInt& set(unsigned bitPosition);
+  void setBit(unsigned bitPosition);
 
   /// @brief Set every bit to 0.
-  APInt& clear() {
+  void clearAllBits() {
     if (isSingleWord())
       VAL = 0;
     else
       memset(pVal, 0, getNumWords() * APINT_WORD_SIZE);
-    return *this;
   }
 
   /// Set the given bit to 0 whose position is given as "bitPosition".
   /// @brief Set a given bit to 0.
-  APInt& clear(unsigned bitPosition);
+  void clearBit(unsigned bitPosition);
 
   /// @brief Toggle every bit to its opposite value.
-  APInt& flip() {
-    if (isSingleWord()) {
+  void flipAllBits() {
+    if (isSingleWord())
       VAL ^= -1ULL;
-      return clearUnusedBits();
+    else {
+      for (unsigned i = 0; i < getNumWords(); ++i)
+        pVal[i] ^= -1ULL;
     }
-    for (unsigned i = 0; i < getNumWords(); ++i)
-      pVal[i] ^= -1ULL;
-    return clearUnusedBits();
+    clearUnusedBits();
   }
 
   /// Toggle a given bit to its opposite value whose position is given
   /// as "bitPosition".
   /// @brief Toggles a given bit to its opposite value.
-  APInt& flip(unsigned bitPosition);
+  void flipBit(unsigned bitPosition);
 
   /// @}
   /// @name Value Characterization Functions
@@ -1281,37 +1297,27 @@ public:
   }
 
   /// The conversion does not do a translation from double to integer, it just
-  /// re-interprets the bits of the double. Note that it is valid to do this on
-  /// any bit width but bits from V may get truncated.
+  /// re-interprets the bits of the double.
   /// @brief Converts a double to APInt bits.
-  APInt& doubleToBits(double V) {
+  static APInt doubleToBits(double V) {
     union {
       uint64_t I;
       double D;
     } T;
     T.D = V;
-    if (isSingleWord())
-      VAL = T.I;
-    else
-      pVal[0] = T.I;
-    return clearUnusedBits();
+    return APInt(sizeof T * CHAR_BIT, T.I);
   }
 
   /// The conversion does not do a translation from float to integer, it just
-  /// re-interprets the bits of the float. Note that it is valid to do this on
-  /// any bit width but bits from V may get truncated.
+  /// re-interprets the bits of the float.
   /// @brief Converts a float to APInt bits.
-  APInt& floatToBits(float V) {
+  static APInt floatToBits(float V) {
     union {
       unsigned I;
       float F;
     } T;
     T.F = V;
-    if (isSingleWord())
-      VAL = T.I;
-    else
-      pVal[0] = T.I;
-    return clearUnusedBits();
+    return APInt(sizeof T * CHAR_BIT, T.I);
   }
 
   /// @}
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 1c9931c30fe5..54a7b601d1f1 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -68,20 +68,22 @@ public:
   }
   using APInt::toString;
 
-  APSInt& extend(uint32_t width) {
+  APSInt trunc(uint32_t width) const {
+    return APSInt(APInt::trunc(width), IsUnsigned);
+  }
+
+  APSInt extend(uint32_t width) const {
     if (IsUnsigned)
-      zext(width);
+      return APSInt(zext(width), IsUnsigned);
     else
-      sext(width);
-    return *this;
+      return APSInt(sext(width), IsUnsigned);
   }
 
-  APSInt& extOrTrunc(uint32_t width) {
+  APSInt extOrTrunc(uint32_t width) const {
       if (IsUnsigned)
-        zextOrTrunc(width);
+        return APSInt(zextOrTrunc(width), IsUnsigned);
       else
-        sextOrTrunc(width);
-      return *this;
+        return APSInt(sextOrTrunc(width), IsUnsigned);
   }
 
   const APSInt &operator%=(const APSInt &RHS) {
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
new file mode 100644
index 000000000000..1c5470d678b6
--- /dev/null
+++ b/include/llvm/ADT/ArrayRef.h
@@ -0,0 +1,121 @@
+//===--- ArrayRef.h - Array Reference Wrapper -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ARRAYREF_H
+#define LLVM_ADT_ARRAYREF_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <vector>
+
+namespace llvm {
+  class APInt;
+  
+  /// ArrayRef - Represent a constant reference to an array (0 or more elements
+  /// consecutively in memory), i.e. a start pointer and a length.  It allows
+  /// various APIs to take consecutive elements easily and conveniently.
+  ///
+  /// This class does not own the underlying data, it is expected to be used in
+  /// situations where the data resides in some other buffer, whose lifetime
+  /// extends past that of the StringRef. For this reason, it is not in general
+  /// safe to store a ArrayRef.
+  ///
+  /// This is intended to be trivially copyable, so it should be passed by
+  /// value.
+  template<typename T>
+  class ArrayRef {
+  public:
+    typedef const T *iterator;
+    typedef const T *const_iterator;
+    typedef size_t size_type;
+    
+  private:
+    /// The start of the array, in an external buffer.
+    const T *Data;
+    
+    /// The number of elements.
+    size_t Length;
+    
+  public:
+    /// @name Constructors
+    /// @{
+    
+    /// Construct an empty ArrayRef.
+    /*implicit*/ ArrayRef() : Data(0), Length(0) {}
+    
+    /// Construct an ArrayRef from a single element.
+    /*implicit*/ ArrayRef(const T &OneElt)
+      : Data(&OneElt), Length(1) {}
+    
+    /// Construct an ArrayRef from a pointer and length.
+    /*implicit*/ ArrayRef(const T *data, size_t length)
+      : Data(data), Length(length) {}
+    
+    /// Construct an ArrayRef from a SmallVector.
+    /*implicit*/ ArrayRef(const SmallVectorImpl<T> &Vec)
+      : Data(Vec.data()), Length(Vec.size()) {}
+
+    /// Construct an ArrayRef from a std::vector.
+    /*implicit*/ ArrayRef(const std::vector<T> &Vec)
+      : Data(Vec.empty() ? (T*)0 : &Vec[0]), Length(Vec.size()) {}
+    
+    // TODO: C arrays.
+    
+    /// @}
+    /// @name Simple Operations
+    /// @{
+
+    iterator begin() const { return Data; }
+    iterator end() const { return Data + Length; }
+    
+    /// empty - Check if the array is empty.
+    bool empty() const { return Length == 0; }
+    
+    /// size - Get the array size.
+    size_t size() const { return Length; }
+    
+    /// front - Get the first element.
+    const T &front() const {
+      assert(!empty());
+      return Data[0];
+    }
+    
+    /// back - Get the last element.
+    const T &back() const {
+      assert(!empty());
+      return Data[Length-1];
+    }
+    
+    /// @}
+    /// @name Operator Overloads
+    /// @{
+    
+    const T &operator[](size_t Index) const {
+      assert(Index < Length && "Invalid index!");
+      return Data[Index];
+    }
+    
+    /// @}
+    /// @name Expensive Operations
+    /// @{
+    
+    std::vector<T> vec() const {
+      return std::vector<T>(Data, Data+Length);
+    }
+    
+    /// @}
+  };
+  
+  // ArrayRefs can be treated like a POD type.
+  template <typename T> struct isPodLike;
+  template <typename T> struct isPodLike<ArrayRef<T> > {
+    static const bool value = true;
+  };
+}
+
+#endif
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 9dcb9e106f26..ac1cf0c79a8f 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <cassert>
 #include <climits>
+#include <cstdlib>
 #include <cstring>
 
 namespace llvm {
@@ -77,7 +78,7 @@ public:
   /// bits are initialized to the specified value.
   explicit BitVector(unsigned s, bool t = false) : Size(s) {
     Capacity = NumBitWords(s);
-    Bits = new BitWord[Capacity];
+    Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord));
     init_words(Bits, Capacity, t);
     if (t)
       clear_unused_bits();
@@ -92,12 +93,12 @@ public:
     }
 
     Capacity = NumBitWords(RHS.size());
-    Bits = new BitWord[Capacity];
-    std::copy(RHS.Bits, &RHS.Bits[Capacity], Bits);
+    Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord));
+    std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord));
   }
 
   ~BitVector() {
-    delete[] Bits;
+    std::free(Bits);
   }
 
   /// empty - Tests whether there are no bits in this bitvector.
@@ -127,6 +128,12 @@ public:
     return false;
   }
 
+  /// all - Returns true if all bits are set.
+  bool all() const {
+    // TODO: Optimize this.
+    return count() == size();
+  }
+
   /// none - Returns true if none of the bits are set.
   bool none() const {
     return !any();
@@ -335,18 +342,18 @@ public:
     unsigned RHSWords = NumBitWords(Size);
     if (Size <= Capacity * BITWORD_SIZE) {
       if (Size)
-        std::copy(RHS.Bits, &RHS.Bits[RHSWords], Bits);
+        std::memcpy(Bits, RHS.Bits, RHSWords * sizeof(BitWord));
       clear_unused_bits();
       return *this;
     }
 
     // Grow the bitvector to have enough elements.
     Capacity = RHSWords;
-    BitWord *NewBits = new BitWord[Capacity];
-    std::copy(RHS.Bits, &RHS.Bits[RHSWords], NewBits);
+    BitWord *NewBits = (BitWord *)std::malloc(Capacity * sizeof(BitWord));
+    std::memcpy(NewBits, RHS.Bits, Capacity * sizeof(BitWord));
 
     // Destroy the old bits.
-    delete[] Bits;
+    std::free(Bits);
     Bits = NewBits;
 
     return *this;
@@ -384,17 +391,8 @@ private:
   }
 
   void grow(unsigned NewSize) {
-    unsigned OldCapacity = Capacity;
-    Capacity = NumBitWords(NewSize);
-    BitWord *NewBits = new BitWord[Capacity];
-
-    // Copy the old bits over.
-    if (OldCapacity != 0)
-      std::copy(Bits, &Bits[OldCapacity], NewBits);
-
-    // Destroy the old bits.
-    delete[] Bits;
-    Bits = NewBits;
+    Capacity = std::max(NumBitWords(NewSize), Capacity * 2);
+    Bits = (BitWord *)std::realloc(Bits, Capacity * sizeof(BitWord));
 
     clear_unused_bits();
   }
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 06a1575da4d0..61d6ae70e1d9 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -18,6 +18,7 @@
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/type_traits.h"
 #include "llvm/ADT/DenseMapInfo.h"
+#include <algorithm>
 #include <iterator>
 #include <new>
 #include <utility>
@@ -385,7 +386,7 @@ private:
         // Insert the key/value into the new table.
         BucketT *DestBucket;
         bool FoundVal = LookupBucketFor(B->first, DestBucket);
-        FoundVal = FoundVal; // silence warning.
+        (void)FoundVal; // silence warning.
         assert(!FoundVal && "Key already in new map?");
         DestBucket->first = B->first;
         new (&DestBucket->second) ValueT(B->second);
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 529938699270..25e341bf4fd4 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -102,6 +102,20 @@ template<> struct DenseMapInfo<int> {
   }
 };
 
+// Provide DenseMapInfo for longs.
+template<> struct DenseMapInfo<long> {
+  static inline long getEmptyKey() {
+    return (1UL << (sizeof(long) * 8 - 1)) - 1L;
+  }
+  static inline long getTombstoneKey() { return getEmptyKey() - 1L; }
+  static unsigned getHashValue(const long& Val) {
+    return (unsigned)(Val * 37L);
+  }
+  static bool isEqual(const long& LHS, const long& RHS) {
+    return LHS == RHS;
+  }
+};
+
 // Provide DenseMapInfo for long longs.
 template<> struct DenseMapInfo<long long> {
   static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index 00bcf64a2fc7..67321f539848 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -33,6 +33,9 @@ public:
   bool empty() const { return TheMap.empty(); }
   unsigned size() const { return TheMap.size(); }
 
+  /// Grow the denseset so that it has at least Size buckets. Does not shrink
+  void resize(size_t Size) { TheMap.resize(Size); }
+
   void clear() {
     TheMap.clear();
   }
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index 07a5edfdb6ca..771476c30361 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <set>
 
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 662b5e273548..879dbd05e174 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_ADT_FOLDINGSET_H
 #define LLVM_ADT_FOLDINGSET_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 
diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h
index 968ce152779f..d3196ca23df9 100644
--- a/include/llvm/ADT/ImmutableIntervalMap.h
+++ b/include/llvm/ADT/ImmutableIntervalMap.h
@@ -94,7 +94,7 @@ public:
     : ImutAVLFactory<ImutInfo>(Alloc) {}
 
   TreeTy *Add(TreeTy *T, value_type_ref V) {
-    T = Add_internal(V,T);
+    T = add_internal(V,T);
     this->MarkImmutable(T);
     return T;
   }
@@ -103,20 +103,20 @@ public:
     if (!T)
       return NULL;
 
-    key_type_ref CurrentKey = ImutInfo::KeyOfValue(this->Value(T));
+    key_type_ref CurrentKey = ImutInfo::KeyOfValue(this->getValue(T));
 
     if (ImutInfo::isContainedIn(K, CurrentKey))
       return T;
     else if (ImutInfo::isLess(K, CurrentKey))
-      return Find(this->Left(T), K);
+      return Find(this->getLeft(T), K);
     else
-      return Find(this->Right(T), K);
+      return Find(this->getRight(T), K);
   }
 
 private:
-  TreeTy *Add_internal(value_type_ref V, TreeTy *T) {
+  TreeTy *add_internal(value_type_ref V, TreeTy *T) {
     key_type_ref K = ImutInfo::KeyOfValue(V);
-    T = RemoveAllOverlaps(T, K);
+    T = removeAllOverlaps(T, K);
     if (this->isEmpty(T))
       return this->CreateNode(NULL, V, NULL);
 
@@ -125,38 +125,38 @@ private:
     key_type_ref KCurrent = ImutInfo::KeyOfValue(this->Value(T));
 
     if (ImutInfo::isLess(K, KCurrent))
-      return this->Balance(Add_internal(V, this->Left(T)), this->Value(T), 
+      return this->Balance(add_internal(V, this->Left(T)), this->Value(T), 
                                         this->Right(T));
     else
       return this->Balance(this->Left(T), this->Value(T), 
-                           Add_internal(V, this->Right(T)));
+                           add_internal(V, this->Right(T)));
   }
 
   // Remove all overlaps from T.
-  TreeTy *RemoveAllOverlaps(TreeTy *T, key_type_ref K) {
+  TreeTy *removeAllOverlaps(TreeTy *T, key_type_ref K) {
     bool Changed;
     do {
       Changed = false;
-      T = RemoveOverlap(T, K, Changed);
-      this->MarkImmutable(T);
+      T = removeOverlap(T, K, Changed);
+      this->markImmutable(T);
     } while (Changed);
 
     return T;
   }
 
   // Remove one overlap from T.
-  TreeTy *RemoveOverlap(TreeTy *T, key_type_ref K, bool &Changed) {
+  TreeTy *removeOverlap(TreeTy *T, key_type_ref K, bool &Changed) {
     if (!T)
       return NULL;
     Interval CurrentK = ImutInfo::KeyOfValue(this->Value(T));
 
     // If current key does not overlap the inserted key.
     if (CurrentK.getStart() > K.getEnd())
-      return this->Balance(RemoveOverlap(this->Left(T), K, Changed),
+      return this->Balance(removeOverlap(this->Left(T), K, Changed),
                            this->Value(T), this->Right(T));
     else if (CurrentK.getEnd() < K.getStart())
       return this->Balance(this->Left(T), this->Value(T), 
-                           RemoveOverlap(this->Right(T), K, Changed));
+                           removeOverlap(this->Right(T), K, Changed));
 
     // Current key overlaps with the inserted key.
     // Remove the current key.
@@ -167,18 +167,18 @@ private:
     if (CurrentK.getStart() < K.getStart()) {
       if (CurrentK.getEnd() <= K.getEnd()) {
         Interval NewK(CurrentK.getStart(), K.getStart()-1);
-        return Add_internal(std::make_pair(NewK, OldData), T);
+        return add_internal(std::make_pair(NewK, OldData), T);
       } else {
         Interval NewK1(CurrentK.getStart(), K.getStart()-1);
-        T = Add_internal(std::make_pair(NewK1, OldData), T); 
+        T = add_internal(std::make_pair(NewK1, OldData), T); 
 
         Interval NewK2(K.getEnd()+1, CurrentK.getEnd());
-        return Add_internal(std::make_pair(NewK2, OldData), T);
+        return add_internal(std::make_pair(NewK2, OldData), T);
       }
     } else {
       if (CurrentK.getEnd() > K.getEnd()) {
         Interval NewK(K.getEnd()+1, CurrentK.getEnd());
-        return Add_internal(std::make_pair(NewK, OldData), T);
+        return add_internal(std::make_pair(NewK, OldData), T);
       } else
         return T;
     }
@@ -209,22 +209,22 @@ public:
   public:
     Factory(BumpPtrAllocator& Alloc) : F(Alloc) {}
 
-    ImmutableIntervalMap GetEmptyMap() { 
-      return ImmutableIntervalMap(F.GetEmptyTree()); 
+    ImmutableIntervalMap getEmptyMap() { 
+      return ImmutableIntervalMap(F.getEmptyTree()); 
     }
 
-    ImmutableIntervalMap Add(ImmutableIntervalMap Old, 
+    ImmutableIntervalMap add(ImmutableIntervalMap Old, 
                              key_type_ref K, data_type_ref D) {
-      TreeTy *T = F.Add(Old.Root, std::make_pair<key_type, data_type>(K, D));
-      return ImmutableIntervalMap(F.GetCanonicalTree(T));
+      TreeTy *T = F.add(Old.Root, std::make_pair<key_type, data_type>(K, D));
+      return ImmutableIntervalMap(F.getCanonicalTree(T));
     }
 
-    ImmutableIntervalMap Remove(ImmutableIntervalMap Old, key_type_ref K) {
-      TreeTy *T = F.Remove(Old.Root, K);
-      return ImmutableIntervalMap(F.GetCanonicalTree(T));
+    ImmutableIntervalMap remove(ImmutableIntervalMap Old, key_type_ref K) {
+      TreeTy *T = F.remove(Old.Root, K);
+      return ImmutableIntervalMap(F.getCanonicalTree(T));
     }
 
-    data_type *Lookup(ImmutableIntervalMap M, key_type_ref K) {
+    data_type *lookup(ImmutableIntervalMap M, key_type_ref K) {
       TreeTy *T = F.Find(M.getRoot(), K);
       if (T)
         return &T->getValue().second;
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 7757c08770bd..714355b95131 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -16,7 +16,7 @@
 
 #include "llvm/Support/Allocator.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
@@ -156,7 +156,7 @@ public:
     if (ownsAllocator()) delete &getAllocator();
   }
 
-  ImmutableList<T> Concat(const T& Head, ImmutableList<T> Tail) {
+  ImmutableList<T> concat(const T& Head, ImmutableList<T> Tail) {
     // Profile the new list to see if it already exists in our cache.
     FoldingSetNodeID ID;
     void* InsertPos;
@@ -178,16 +178,16 @@ public:
     return L;
   }
 
-  ImmutableList<T> Add(const T& D, ImmutableList<T> L) {
-    return Concat(D, L);
+  ImmutableList<T> add(const T& D, ImmutableList<T> L) {
+    return concat(D, L);
   }
 
-  ImmutableList<T> GetEmptyList() const {
+  ImmutableList<T> getEmptyList() const {
     return ImmutableList<T>(0);
   }
 
-  ImmutableList<T> Create(const T& X) {
-    return Concat(X, GetEmptyList());
+  ImmutableList<T> create(const T& X) {
+    return Concat(X, getEmptyList());
   }
 };
 
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index 8af128ef3bd8..e439a0994821 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -76,7 +76,23 @@ public:
   /// should use a Factory object to create maps instead of directly
   /// invoking the constructor, but there are cases where make this
   /// constructor public is useful.
-  explicit ImmutableMap(const TreeTy* R) : Root(const_cast<TreeTy*>(R)) {}
+  explicit ImmutableMap(const TreeTy* R) : Root(const_cast<TreeTy*>(R)) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableMap(const ImmutableMap &X) : Root(X.Root) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableMap &operator=(const ImmutableMap &X) {
+    if (Root != X.Root) {
+      if (X.Root) { X.Root->retain(); }
+      if (Root) { Root->release(); }
+      Root = X.Root;
+    }
+    return *this;
+  }
+  ~ImmutableMap() {
+    if (Root) { Root->release(); }
+  }
 
   class Factory {
     typename TreeTy::Factory F;
@@ -89,16 +105,16 @@ public:
     Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
       : F(Alloc), Canonicalize(canonicalize) {}
 
-    ImmutableMap GetEmptyMap() { return ImmutableMap(F.GetEmptyTree()); }
+    ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); }
 
-    ImmutableMap Add(ImmutableMap Old, key_type_ref K, data_type_ref D) {
-      TreeTy *T = F.Add(Old.Root, std::make_pair<key_type,data_type>(K,D));
-      return ImmutableMap(Canonicalize ? F.GetCanonicalTree(T): T);
+    ImmutableMap add(ImmutableMap Old, key_type_ref K, data_type_ref D) {
+      TreeTy *T = F.add(Old.Root, std::make_pair<key_type,data_type>(K,D));
+      return ImmutableMap(Canonicalize ? F.getCanonicalTree(T): T);
     }
 
-    ImmutableMap Remove(ImmutableMap Old, key_type_ref K) {
-      TreeTy *T = F.Remove(Old.Root,K);
-      return ImmutableMap(Canonicalize ? F.GetCanonicalTree(T): T);
+    ImmutableMap remove(ImmutableMap Old, key_type_ref K) {
+      TreeTy *T = F.remove(Old.Root,K);
+      return ImmutableMap(Canonicalize ? F.getCanonicalTree(T): T);
     }
 
   private:
@@ -110,15 +126,30 @@ public:
     return Root ? Root->contains(K) : false;
   }
 
-  bool operator==(ImmutableMap RHS) const {
+  bool operator==(const ImmutableMap &RHS) const {
     return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root;
   }
 
-  bool operator!=(ImmutableMap RHS) const {
+  bool operator!=(const ImmutableMap &RHS) const {
     return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
   }
 
-  TreeTy* getRoot() const { return Root; }
+  TreeTy *getRoot() const {
+    if (Root) { Root->retain(); }
+    return Root;
+  }
+
+  TreeTy *getRootWithoutRetain() const {
+    return Root;
+  }
+  
+  void manualRetain() {
+    if (Root) Root->retain();
+  }
+  
+  void manualRelease() {
+    if (Root) Root->release();
+  }
 
   bool isEmpty() const { return !Root; }
 
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 70c3caf2a061..3ca910ce944f 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -15,10 +15,13 @@
 #define LLVM_ADT_IMSET_H
 
 #include "llvm/Support/Allocator.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <functional>
+#include <vector>
+#include <stdio.h>
 
 namespace llvm {
 
@@ -32,7 +35,7 @@ template <typename ImutInfo> class ImutAVLTreeInOrderIterator;
 template <typename ImutInfo> class ImutAVLTreeGenericIterator;
 
 template <typename ImutInfo >
-class ImutAVLTree : public FoldingSetNode {
+class ImutAVLTree {
 public:
   typedef typename ImutInfo::key_type_ref   key_type_ref;
   typedef typename ImutInfo::value_type     value_type;
@@ -43,7 +46,6 @@ public:
   friend class ImutIntervalAVLFactory<ImutInfo>;
 
   friend class ImutAVLTreeGenericIterator<ImutInfo>;
-  friend class FoldingSet<ImutAVLTree>;
 
   typedef ImutAVLTreeInOrderIterator<ImutInfo>  iterator;
 
@@ -51,29 +53,27 @@ public:
   // Public Interface.
   //===----------------------------------------------------===//
 
-  /// getLeft - Returns a pointer to the left subtree.  This value
+  /// Return a pointer to the left subtree.  This value
   ///  is NULL if there is no left subtree.
-  ImutAVLTree *getLeft() const { return Left; }
+  ImutAVLTree *getLeft() const { return left; }
 
-  /// getRight - Returns a pointer to the right subtree.  This value is
+  /// Return a pointer to the right subtree.  This value is
   ///  NULL if there is no right subtree.
-  ImutAVLTree *getRight() const { return Right; }
+  ImutAVLTree *getRight() const { return right; }
 
   /// getHeight - Returns the height of the tree.  A tree with no subtrees
   ///  has a height of 1.
-  unsigned getHeight() const { return Height; }
+  unsigned getHeight() const { return height; }
 
   /// getValue - Returns the data value associated with the tree node.
-  const value_type& getValue() const { return Value; }
+  const value_type& getValue() const { return value; }
 
   /// find - Finds the subtree associated with the specified key value.
   ///  This method returns NULL if no matching subtree is found.
   ImutAVLTree* find(key_type_ref K) {
     ImutAVLTree *T = this;
-
     while (T) {
       key_type_ref CurrentKey = ImutInfo::KeyOfValue(T->getValue());
-
       if (ImutInfo::isEqual(K,CurrentKey))
         return T;
       else if (ImutInfo::isLess(K,CurrentKey))
@@ -81,7 +81,6 @@ public:
       else
         T = T->getRight();
     }
-
     return NULL;
   }
   
@@ -90,7 +89,7 @@ public:
   ImutAVLTree* getMaxElement() {
     ImutAVLTree *T = this;
     ImutAVLTree *Right = T->getRight();    
-    while (Right) { T = Right; Right = T->getRight(); }
+    while (Right) { T = right; right = T->getRight(); }
     return T;
   }
 
@@ -98,10 +97,10 @@ public:
   ///  both leaves and non-leaf nodes.
   unsigned size() const {
     unsigned n = 1;
-
-    if (const ImutAVLTree* L = getLeft())  n += L->size();
-    if (const ImutAVLTree* R = getRight()) n += R->size();
-
+    if (const ImutAVLTree* L = getLeft())
+      n += L->size();
+    if (const ImutAVLTree* R = getRight())
+      n += R->size();
     return n;
   }
 
@@ -114,7 +113,7 @@ public:
   ///  inorder traversal.
   iterator end() const { return iterator(); }
 
-  bool ElementEqual(value_type_ref V) const {
+  bool isElementEqual(value_type_ref V) const {
     // Compare the keys.
     if (!ImutInfo::isEqual(ImutInfo::KeyOfValue(getValue()),
                            ImutInfo::KeyOfValue(V)))
@@ -128,8 +127,8 @@ public:
     return true;
   }
 
-  bool ElementEqual(const ImutAVLTree* RHS) const {
-    return ElementEqual(RHS->getValue());
+  bool isElementEqual(const ImutAVLTree* RHS) const {
+    return isElementEqual(RHS->getValue());
   }
 
   /// isEqual - Compares two trees for structural equality and returns true
@@ -144,12 +143,12 @@ public:
 
     while (LItr != LEnd && RItr != REnd) {
       if (*LItr == *RItr) {
-        LItr.SkipSubTree();
-        RItr.SkipSubTree();
+        LItr.skipSubTree();
+        RItr.skipSubTree();
         continue;
       }
 
-      if (!LItr->ElementEqual(*RItr))
+      if (!LItr->isElementEqual(*RItr))
         return false;
 
       ++LItr;
@@ -173,22 +172,24 @@ public:
   ///  Nodes are visited using an inorder traversal.
   template <typename Callback>
   void foreach(Callback& C) {
-    if (ImutAVLTree* L = getLeft()) L->foreach(C);
+    if (ImutAVLTree* L = getLeft())
+      L->foreach(C);
 
-    C(Value);
+    C(value);
 
-    if (ImutAVLTree* R = getRight()) R->foreach(C);
+    if (ImutAVLTree* R = getRight())
+      R->foreach(C);
   }
 
-  /// verify - A utility method that checks that the balancing and
+  /// validateTree - A utility method that checks that the balancing and
   ///  ordering invariants of the tree are satisifed.  It is a recursive
   ///  method that returns the height of the tree, which is then consumed
-  ///  by the enclosing verify call.  External callers should ignore the
+  ///  by the enclosing validateTree call.  External callers should ignore the
   ///  return value.  An invalid tree will cause an assertion to fire in
   ///  a debug build.
-  unsigned verify() const {
-    unsigned HL = getLeft() ? getLeft()->verify() : 0;
-    unsigned HR = getRight() ? getRight()->verify() : 0;
+  unsigned validateTree() const {
+    unsigned HL = getLeft() ? getLeft()->validateTree() : 0;
+    unsigned HR = getRight() ? getRight()->validateTree() : 0;
     (void) HL;
     (void) HR;
 
@@ -198,37 +199,39 @@ public:
     assert((HL > HR ? HL-HR : HR-HL) <= 2
            && "Balancing invariant violated");
 
-    assert(!getLeft()
-           || ImutInfo::isLess(ImutInfo::KeyOfValue(getLeft()->getValue()),
-                               ImutInfo::KeyOfValue(getValue()))
-           && "Value in left child is not less that current value");
+    assert((!getLeft() ||
+            ImutInfo::isLess(ImutInfo::KeyOfValue(getLeft()->getValue()),
+                             ImutInfo::KeyOfValue(getValue()))) &&
+           "Value in left child is not less that current value");
 
 
-    assert(!getRight()
-           || ImutInfo::isLess(ImutInfo::KeyOfValue(getValue()),
-                               ImutInfo::KeyOfValue(getRight()->getValue()))
-           && "Current value is not less that value of right child");
+    assert(!(getRight() ||
+             ImutInfo::isLess(ImutInfo::KeyOfValue(getValue()),
+                              ImutInfo::KeyOfValue(getRight()->getValue()))) &&
+           "Current value is not less that value of right child");
 
     return getHeight();
   }
 
-  /// Profile - Profiling for ImutAVLTree.
-  void Profile(llvm::FoldingSetNodeID& ID) {
-    ID.AddInteger(ComputeDigest());
-  }
-
   //===----------------------------------------------------===//
-  // Internal Values.
+  // Internal values.
   //===----------------------------------------------------===//
 
 private:
-  ImutAVLTree*     Left;
-  ImutAVLTree*     Right;
-  unsigned         Height       : 28;
-  unsigned         Mutable      : 1;
-  unsigned         CachedDigest : 1;
-  value_type       Value;
-  uint32_t         Digest;
+  Factory *factory;
+  ImutAVLTree *left;
+  ImutAVLTree *right;
+  ImutAVLTree *prev;
+  ImutAVLTree *next;
+
+  unsigned height         : 28;
+  unsigned IsMutable      : 1;
+  unsigned IsDigestCached : 1;
+  unsigned IsCanonicalized : 1;
+
+  value_type value;
+  uint32_t digest;
+  uint32_t refCount;
 
   //===----------------------------------------------------===//
   // Internal methods (node manipulation; used by Factory).
@@ -237,10 +240,15 @@ private:
 private:
   /// ImutAVLTree - Internal constructor that is only called by
   ///   ImutAVLFactory.
-  ImutAVLTree(ImutAVLTree* l, ImutAVLTree* r, value_type_ref v,
+  ImutAVLTree(Factory *f, ImutAVLTree* l, ImutAVLTree* r, value_type_ref v,
               unsigned height)
-    : Left(l), Right(r), Height(height), Mutable(true), CachedDigest(false),
-      Value(v), Digest(0) {}
+    : factory(f), left(l), right(r), prev(0), next(0), height(height),
+      IsMutable(true), IsDigestCached(false), IsCanonicalized(0),
+      value(v), digest(0), refCount(0)
+  {
+    if (left) left->retain();
+    if (right) right->retain();
+  }
 
   /// isMutable - Returns true if the left and right subtree references
   ///  (as well as height) can be changed.  If this method returns false,
@@ -248,11 +256,11 @@ private:
   ///  object should always have this method return true.  Further, if this
   ///  method returns false for an instance of ImutAVLTree, all subtrees
   ///  will also have this method return false.  The converse is not true.
-  bool isMutable() const { return Mutable; }
+  bool isMutable() const { return IsMutable; }
   
   /// hasCachedDigest - Returns true if the digest for this tree is cached.
   ///  This can only be true if the tree is immutable.
-  bool hasCachedDigest() const { return CachedDigest; }
+  bool hasCachedDigest() const { return IsDigestCached; }
 
   //===----------------------------------------------------===//
   // Mutating operations.  A tree root can be manipulated as
@@ -265,51 +273,32 @@ private:
   // immutable.
   //===----------------------------------------------------===//
 
-  /// MarkImmutable - Clears the mutable flag for a tree.  After this happens,
+  /// markImmutable - Clears the mutable flag for a tree.  After this happens,
   ///   it is an error to call setLeft(), setRight(), and setHeight().
-  void MarkImmutable() {
+  void markImmutable() {
     assert(isMutable() && "Mutable flag already removed.");
-    Mutable = false;
+    IsMutable = false;
   }
   
-  /// MarkedCachedDigest - Clears the NoCachedDigest flag for a tree.
-  void MarkedCachedDigest() {
+  /// markedCachedDigest - Clears the NoCachedDigest flag for a tree.
+  void markedCachedDigest() {
     assert(!hasCachedDigest() && "NoCachedDigest flag already removed.");
-    CachedDigest = true;
-  }
-
-  /// setLeft - Changes the reference of the left subtree.  Used internally
-  ///   by ImutAVLFactory.
-  void setLeft(ImutAVLTree* NewLeft) {
-    assert(isMutable() &&
-           "Only a mutable tree can have its left subtree changed.");
-    Left = NewLeft;
-    CachedDigest = false;
-  }
-
-  /// setRight - Changes the reference of the right subtree.  Used internally
-  ///  by ImutAVLFactory.
-  void setRight(ImutAVLTree* NewRight) {
-    assert(isMutable() &&
-           "Only a mutable tree can have its right subtree changed.");
-
-    Right = NewRight;
-    CachedDigest = false;
+    IsDigestCached = true;
   }
 
   /// setHeight - Changes the height of the tree.  Used internally by
   ///  ImutAVLFactory.
   void setHeight(unsigned h) {
     assert(isMutable() && "Only a mutable tree can have its height changed.");
-    Height = h;
+    height = h;
   }
 
   static inline
-  uint32_t ComputeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) {
+  uint32_t computeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) {
     uint32_t digest = 0;
 
     if (L)
-      digest += L->ComputeDigest();
+      digest += L->computeDigest();
 
     // Compute digest of stored data.
     FoldingSetNodeID ID;
@@ -317,22 +306,54 @@ private:
     digest += ID.ComputeHash();
 
     if (R)
-      digest += R->ComputeDigest();
+      digest += R->computeDigest();
 
     return digest;
   }
 
-  inline uint32_t ComputeDigest() {
+  inline uint32_t computeDigest() {
     // Check the lowest bit to determine if digest has actually been
     // pre-computed.
     if (hasCachedDigest())
-      return Digest;
+      return digest;
 
-    uint32_t X = ComputeDigest(getLeft(), getRight(), getValue());
-    Digest = X;
-    MarkedCachedDigest();
+    uint32_t X = computeDigest(getLeft(), getRight(), getValue());
+    digest = X;
+    markedCachedDigest();
     return X;
   }
+
+  //===----------------------------------------------------===//
+  // Reference count operations.
+  //===----------------------------------------------------===//
+
+public:
+  void retain() { ++refCount; }
+  void release() {
+    assert(refCount > 0);
+    if (--refCount == 0)
+      destroy();
+  }
+  void destroy() {
+    if (left)
+      left->release();
+    if (right)
+      right->release();
+    if (IsCanonicalized) {
+      if (next)
+        next->prev = prev;
+
+      if (prev)
+        prev->next = next;
+      else
+        factory->Cache[computeDigest()] = next;
+    }
+    
+    // We need to clear the mutability bit in case we are
+    // destroying the node as part of a sweep in ImutAVLFactory::recoverNodes().
+    IsMutable = false;
+    factory->freeNodes.push_back(this);
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -341,14 +362,17 @@ private:
 
 template <typename ImutInfo >
 class ImutAVLFactory {
+  friend class ImutAVLTree<ImutInfo>;
   typedef ImutAVLTree<ImutInfo> TreeTy;
   typedef typename TreeTy::value_type_ref value_type_ref;
   typedef typename TreeTy::key_type_ref   key_type_ref;
 
-  typedef FoldingSet<TreeTy> CacheTy;
+  typedef DenseMap<unsigned, TreeTy*> CacheTy;
 
   CacheTy Cache;
   uintptr_t Allocator;
+  std::vector<TreeTy*> createdNodes;
+  std::vector<TreeTy*> freeNodes;
 
   bool ownsAllocator() const {
     return Allocator & 0x1 ? false : true;
@@ -373,55 +397,56 @@ public:
     if (ownsAllocator()) delete &getAllocator();
   }
 
-  TreeTy* Add(TreeTy* T, value_type_ref V) {
-    T = Add_internal(V,T);
-    MarkImmutable(T);
+  TreeTy* add(TreeTy* T, value_type_ref V) {
+    T = add_internal(V,T);
+    markImmutable(T);
+    recoverNodes();
     return T;
   }
 
-  TreeTy* Remove(TreeTy* T, key_type_ref V) {
-    T = Remove_internal(V,T);
-    MarkImmutable(T);
+  TreeTy* remove(TreeTy* T, key_type_ref V) {
+    T = remove_internal(V,T);
+    markImmutable(T);
+    recoverNodes();
     return T;
   }
 
-  TreeTy* GetEmptyTree() const { return NULL; }
+  TreeTy* getEmptyTree() const { return NULL; }
 
+protected:
+  
   //===--------------------------------------------------===//
   // A bunch of quick helper functions used for reasoning
   // about the properties of trees and their children.
   // These have succinct names so that the balancing code
   // is as terse (and readable) as possible.
   //===--------------------------------------------------===//
-protected:
 
-  bool           isEmpty(TreeTy* T) const { return !T; }
-  unsigned Height(TreeTy* T) const { return T ? T->getHeight() : 0; }
-  TreeTy*           Left(TreeTy* T) const { return T->getLeft(); }
-  TreeTy*          Right(TreeTy* T) const { return T->getRight(); }
-  value_type_ref   Value(TreeTy* T) const { return T->Value; }
+  bool            isEmpty(TreeTy* T) const { return !T; }
+  unsigned        getHeight(TreeTy* T) const { return T ? T->getHeight() : 0; }
+  TreeTy*         getLeft(TreeTy* T) const { return T->getLeft(); }
+  TreeTy*         getRight(TreeTy* T) const { return T->getRight(); }
+  value_type_ref  getValue(TreeTy* T) const { return T->value; }
 
-  unsigned IncrementHeight(TreeTy* L, TreeTy* R) const {
-    unsigned hl = Height(L);
-    unsigned hr = Height(R);
+  unsigned incrementHeight(TreeTy* L, TreeTy* R) const {
+    unsigned hl = getHeight(L);
+    unsigned hr = getHeight(R);
     return (hl > hr ? hl : hr) + 1;
   }
 
-  static bool CompareTreeWithSection(TreeTy* T,
+  static bool compareTreeWithSection(TreeTy* T,
                                      typename TreeTy::iterator& TI,
                                      typename TreeTy::iterator& TE) {
-
     typename TreeTy::iterator I = T->begin(), E = T->end();
-
-    for ( ; I!=E ; ++I, ++TI)
-      if (TI == TE || !I->ElementEqual(*TI))
+    for ( ; I!=E ; ++I, ++TI) {
+      if (TI == TE || !I->isElementEqual(*TI))
         return false;
-
+    }
     return true;
   }
 
   //===--------------------------------------------------===//
-  // "CreateNode" is used to generate new tree roots that link
+  // "createNode" is used to generate new tree roots that link
   // to other trees.  The functon may also simply move links
   // in an existing root if that root is still marked mutable.
   // This is necessary because otherwise our balancing code
@@ -430,181 +455,188 @@ protected:
   // returned to the caller.
   //===--------------------------------------------------===//
 
-  TreeTy* CreateNode(TreeTy* L, value_type_ref V, TreeTy* R) {   
+  TreeTy* createNode(TreeTy* L, value_type_ref V, TreeTy* R) {   
     BumpPtrAllocator& A = getAllocator();
-    TreeTy* T = (TreeTy*) A.Allocate<TreeTy>();
-    new (T) TreeTy(L, R, V, IncrementHeight(L,R));
+    TreeTy* T;
+    if (!freeNodes.empty()) {
+      T = freeNodes.back();
+      freeNodes.pop_back();
+      assert(T != L);
+      assert(T != R);
+    }
+    else {
+      T = (TreeTy*) A.Allocate<TreeTy>();
+    }
+    new (T) TreeTy(this, L, R, V, incrementHeight(L,R));
+    createdNodes.push_back(T);
     return T;
   }
 
-  TreeTy* CreateNode(TreeTy* L, TreeTy* OldTree, TreeTy* R) {
-    assert(!isEmpty(OldTree));
+  TreeTy* createNode(TreeTy* newLeft, TreeTy* oldTree, TreeTy* newRight) {
+    return createNode(newLeft, getValue(oldTree), newRight);
+  }
 
-    if (OldTree->isMutable()) {
-      OldTree->setLeft(L);
-      OldTree->setRight(R);
-      OldTree->setHeight(IncrementHeight(L, R));
-      return OldTree;
+  void recoverNodes() {
+    for (unsigned i = 0, n = createdNodes.size(); i < n; ++i) {
+      TreeTy *N = createdNodes[i];
+      if (N->isMutable() && N->refCount == 0)
+        N->destroy();
     }
-    else
-      return CreateNode(L, Value(OldTree), R);
+    createdNodes.clear();
   }
 
-  /// Balance - Used by Add_internal and Remove_internal to
+  /// balanceTree - Used by add_internal and remove_internal to
   ///  balance a newly created tree.
-  TreeTy* Balance(TreeTy* L, value_type_ref V, TreeTy* R) {
-
-    unsigned hl = Height(L);
-    unsigned hr = Height(R);
+  TreeTy* balanceTree(TreeTy* L, value_type_ref V, TreeTy* R) {
+    unsigned hl = getHeight(L);
+    unsigned hr = getHeight(R);
 
     if (hl > hr + 2) {
       assert(!isEmpty(L) && "Left tree cannot be empty to have a height >= 2");
 
-      TreeTy* LL = Left(L);
-      TreeTy* LR = Right(L);
+      TreeTy *LL = getLeft(L);
+      TreeTy *LR = getRight(L);
 
-      if (Height(LL) >= Height(LR))
-        return CreateNode(LL, L, CreateNode(LR,V,R));
+      if (getHeight(LL) >= getHeight(LR))
+        return createNode(LL, L, createNode(LR,V,R));
 
       assert(!isEmpty(LR) && "LR cannot be empty because it has a height >= 1");
 
-      TreeTy* LRL = Left(LR);
-      TreeTy* LRR = Right(LR);
+      TreeTy *LRL = getLeft(LR);
+      TreeTy *LRR = getRight(LR);
 
-      return CreateNode(CreateNode(LL,L,LRL), LR, CreateNode(LRR,V,R));
+      return createNode(createNode(LL,L,LRL), LR, createNode(LRR,V,R));
     }
     else if (hr > hl + 2) {
       assert(!isEmpty(R) && "Right tree cannot be empty to have a height >= 2");
 
-      TreeTy* RL = Left(R);
-      TreeTy* RR = Right(R);
+      TreeTy *RL = getLeft(R);
+      TreeTy *RR = getRight(R);
 
-      if (Height(RR) >= Height(RL))
-        return CreateNode(CreateNode(L,V,RL), R, RR);
+      if (getHeight(RR) >= getHeight(RL))
+        return createNode(createNode(L,V,RL), R, RR);
 
       assert(!isEmpty(RL) && "RL cannot be empty because it has a height >= 1");
 
-      TreeTy* RLL = Left(RL);
-      TreeTy* RLR = Right(RL);
+      TreeTy *RLL = getLeft(RL);
+      TreeTy *RLR = getRight(RL);
 
-      return CreateNode(CreateNode(L,V,RLL), RL, CreateNode(RLR,R,RR));
+      return createNode(createNode(L,V,RLL), RL, createNode(RLR,R,RR));
     }
     else
-      return CreateNode(L,V,R);
+      return createNode(L,V,R);
   }
 
-  /// Add_internal - Creates a new tree that includes the specified
+  /// add_internal - Creates a new tree that includes the specified
   ///  data and the data from the original tree.  If the original tree
   ///  already contained the data item, the original tree is returned.
-  TreeTy* Add_internal(value_type_ref V, TreeTy* T) {
+  TreeTy* add_internal(value_type_ref V, TreeTy* T) {
     if (isEmpty(T))
-      return CreateNode(T, V, T);
-
+      return createNode(T, V, T);
     assert(!T->isMutable());
 
     key_type_ref K = ImutInfo::KeyOfValue(V);
-    key_type_ref KCurrent = ImutInfo::KeyOfValue(Value(T));
+    key_type_ref KCurrent = ImutInfo::KeyOfValue(getValue(T));
 
     if (ImutInfo::isEqual(K,KCurrent))
-      return CreateNode(Left(T), V, Right(T));
+      return createNode(getLeft(T), V, getRight(T));
     else if (ImutInfo::isLess(K,KCurrent))
-      return Balance(Add_internal(V,Left(T)), Value(T), Right(T));
+      return balanceTree(add_internal(V, getLeft(T)), getValue(T), getRight(T));
     else
-      return Balance(Left(T), Value(T), Add_internal(V,Right(T)));
+      return balanceTree(getLeft(T), getValue(T), add_internal(V, getRight(T)));
   }
 
-  /// Remove_internal - Creates a new tree that includes all the data
+  /// remove_internal - Creates a new tree that includes all the data
   ///  from the original tree except the specified data.  If the
   ///  specified data did not exist in the original tree, the original
   ///  tree is returned.
-  TreeTy* Remove_internal(key_type_ref K, TreeTy* T) {
+  TreeTy* remove_internal(key_type_ref K, TreeTy* T) {
     if (isEmpty(T))
       return T;
 
     assert(!T->isMutable());
 
-    key_type_ref KCurrent = ImutInfo::KeyOfValue(Value(T));
+    key_type_ref KCurrent = ImutInfo::KeyOfValue(getValue(T));
 
-    if (ImutInfo::isEqual(K,KCurrent))
-      return CombineLeftRightTrees(Left(T),Right(T));
-    else if (ImutInfo::isLess(K,KCurrent))
-      return Balance(Remove_internal(K,Left(T)), Value(T), Right(T));
-    else
-      return Balance(Left(T), Value(T), Remove_internal(K,Right(T)));
+    if (ImutInfo::isEqual(K,KCurrent)) {
+      return combineTrees(getLeft(T), getRight(T));
+    } else if (ImutInfo::isLess(K,KCurrent)) {
+      return balanceTree(remove_internal(K, getLeft(T)),
+                                            getValue(T), getRight(T));
+    } else {
+      return balanceTree(getLeft(T), getValue(T),
+                         remove_internal(K, getRight(T)));
+    }
   }
 
-  TreeTy* CombineLeftRightTrees(TreeTy* L, TreeTy* R) {
-    if (isEmpty(L)) return R;
-    if (isEmpty(R)) return L;
-
+  TreeTy* combineTrees(TreeTy* L, TreeTy* R) {
+    if (isEmpty(L))
+      return R;
+    if (isEmpty(R))
+      return L;
     TreeTy* OldNode;
-    TreeTy* NewRight = RemoveMinBinding(R,OldNode);
-    return Balance(L,Value(OldNode),NewRight);
+    TreeTy* newRight = removeMinBinding(R,OldNode);
+    return balanceTree(L, getValue(OldNode), newRight);
   }
 
-  TreeTy* RemoveMinBinding(TreeTy* T, TreeTy*& NodeRemoved) {
+  TreeTy* removeMinBinding(TreeTy* T, TreeTy*& Noderemoved) {
     assert(!isEmpty(T));
-
-    if (isEmpty(Left(T))) {
-      NodeRemoved = T;
-      return Right(T);
+    if (isEmpty(getLeft(T))) {
+      Noderemoved = T;
+      return getRight(T);
     }
-
-    return Balance(RemoveMinBinding(Left(T),NodeRemoved),Value(T),Right(T));
+    return balanceTree(removeMinBinding(getLeft(T), Noderemoved),
+                       getValue(T), getRight(T));
   }
 
-  /// MarkImmutable - Clears the mutable bits of a root and all of its
+  /// markImmutable - Clears the mutable bits of a root and all of its
   ///  descendants.
-  void MarkImmutable(TreeTy* T) {
+  void markImmutable(TreeTy* T) {
     if (!T || !T->isMutable())
       return;
-
-    T->MarkImmutable();
-    MarkImmutable(Left(T));
-    MarkImmutable(Right(T));
+    T->markImmutable();
+    markImmutable(getLeft(T));
+    markImmutable(getRight(T));
   }
   
 public:
-  TreeTy *GetCanonicalTree(TreeTy *TNew) {
+  TreeTy *getCanonicalTree(TreeTy *TNew) {
     if (!TNew)
-      return NULL;    
-    
-    // Search the FoldingSet bucket for a Tree with the same digest.
-    FoldingSetNodeID ID;
-    unsigned digest = TNew->ComputeDigest();
-    ID.AddInteger(digest);
-    unsigned hash = ID.ComputeHash();
-    
-    typename CacheTy::bucket_iterator I = Cache.bucket_begin(hash);
-    typename CacheTy::bucket_iterator E = Cache.bucket_end(hash);
-    
-    for (; I != E; ++I) {
-      TreeTy *T = &*I;
-      
-      if (T->ComputeDigest() != digest)
-        continue;
-      
-      // We found a collision.  Perform a comparison of Contents('T')
-      // with Contents('TNew')
-      typename TreeTy::iterator TI = T->begin(), TE = T->end();
-      
-      if (!CompareTreeWithSection(TNew, TI, TE))
-        continue;
-      
-      if (TI != TE)
-        continue; // T has more contents than TNew.
-      
-      // Trees did match!  Return 'T'.
-      return T;
+      return 0;
+
+    if (TNew->IsCanonicalized)
+      return TNew;
+
+    // Search the hashtable for another tree with the same digest, and
+    // if find a collision compare those trees by their contents.
+    unsigned digest = TNew->computeDigest();
+    TreeTy *&entry = Cache[digest];
+    do {
+      if (!entry)
+        break;
+      for (TreeTy *T = entry ; T != 0; T = T->next) {
+        // Compare the Contents('T') with Contents('TNew')
+        typename TreeTy::iterator TI = T->begin(), TE = T->end();
+        if (!compareTreeWithSection(TNew, TI, TE))
+          continue;
+        if (TI != TE)
+          continue; // T has more contents than TNew.
+        // Trees did match!  Return 'T'.
+        if (TNew->refCount == 0)
+          TNew->destroy();
+        return T;
+      }
+      entry->prev = TNew;
+      TNew->next = entry;
     }
+    while (false);
 
-    // 'TNew' is the only tree of its kind.  Return it.
-    Cache.InsertNode(TNew, (void*) &*Cache.bucket_end(hash));
+    entry = TNew;
+    TNew->IsCanonicalized = true;
     return TNew;
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // Immutable AVL-Tree Iterators.
 //===----------------------------------------------------------------------===//
@@ -635,19 +667,17 @@ public:
   }
 
 
-  bool AtEnd() const { return stack.empty(); }
+  bool atEnd() const { return stack.empty(); }
 
-  bool AtBeginning() const {
+  bool atBeginning() const {
     return stack.size() == 1 && getVisitState() == VisitedNone;
   }
 
-  void SkipToParent() {
+  void skipToParent() {
     assert(!stack.empty());
     stack.pop_back();
-
     if (stack.empty())
       return;
-
     switch (getVisitState()) {
       case VisitedNone:
         stack.back() |= VisitedLeft;
@@ -663,11 +693,9 @@ public:
   inline bool operator==(const _Self& x) const {
     if (stack.size() != x.stack.size())
       return false;
-
     for (unsigned i = 0 ; i < stack.size(); i++)
       if (stack[i] != x.stack[i])
         return false;
-
     return true;
   }
 
@@ -675,70 +703,52 @@ public:
 
   _Self& operator++() {
     assert(!stack.empty());
-
     TreeTy* Current = reinterpret_cast<TreeTy*>(stack.back() & ~Flags);
     assert(Current);
-
     switch (getVisitState()) {
       case VisitedNone:
         if (TreeTy* L = Current->getLeft())
           stack.push_back(reinterpret_cast<uintptr_t>(L));
         else
           stack.back() |= VisitedLeft;
-
         break;
-
       case VisitedLeft:
         if (TreeTy* R = Current->getRight())
           stack.push_back(reinterpret_cast<uintptr_t>(R));
         else
           stack.back() |= VisitedRight;
-
         break;
-
       case VisitedRight:
-        SkipToParent();
+        skipToParent();
         break;
-
       default:
         assert(false && "Unreachable.");
     }
-
     return *this;
   }
 
   _Self& operator--() {
     assert(!stack.empty());
-
     TreeTy* Current = reinterpret_cast<TreeTy*>(stack.back() & ~Flags);
     assert(Current);
-
     switch (getVisitState()) {
       case VisitedNone:
         stack.pop_back();
         break;
-
       case VisitedLeft:
         stack.back() &= ~Flags; // Set state to "VisitedNone."
-
         if (TreeTy* L = Current->getLeft())
           stack.push_back(reinterpret_cast<uintptr_t>(L) | VisitedRight);
-
         break;
-
       case VisitedRight:
         stack.back() &= ~Flags;
         stack.back() |= VisitedLeft;
-
         if (TreeTy* R = Current->getRight())
           stack.push_back(reinterpret_cast<uintptr_t>(R) | VisitedRight);
-
         break;
-
       default:
         assert(false && "Unreachable.");
     }
-
     return *this;
   }
 };
@@ -769,7 +779,7 @@ public:
 
   inline _Self& operator++() {
     do ++InternalItr;
-    while (!InternalItr.AtEnd() &&
+    while (!InternalItr.atEnd() &&
            InternalItr.getVisitState() != InternalIteratorTy::VisitedLeft);
 
     return *this;
@@ -777,16 +787,16 @@ public:
 
   inline _Self& operator--() {
     do --InternalItr;
-    while (!InternalItr.AtBeginning() &&
+    while (!InternalItr.atBeginning() &&
            InternalItr.getVisitState() != InternalIteratorTy::VisitedLeft);
 
     return *this;
   }
 
-  inline void SkipSubTree() {
-    InternalItr.SkipToParent();
+  inline void skipSubTree() {
+    InternalItr.skipToParent();
 
-    while (!InternalItr.AtEnd() &&
+    while (!InternalItr.atEnd() &&
            InternalItr.getVisitState() != InternalIteratorTy::VisitedLeft)
       ++InternalItr;
   }
@@ -927,7 +937,23 @@ public:
   /// should use a Factory object to create sets instead of directly
   /// invoking the constructor, but there are cases where make this
   /// constructor public is useful.
-  explicit ImmutableSet(TreeTy* R) : Root(R) {}
+  explicit ImmutableSet(TreeTy* R) : Root(R) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableSet(const ImmutableSet &X) : Root(X.Root) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableSet &operator=(const ImmutableSet &X) {
+    if (Root != X.Root) {
+      if (X.Root) { X.Root->retain(); }
+      if (Root) { Root->release(); }
+      Root = X.Root;
+    }
+    return *this;
+  }
+  ~ImmutableSet() {
+    if (Root) { Root->release(); }
+  }
 
   class Factory {
     typename TreeTy::Factory F;
@@ -940,33 +966,33 @@ public:
     Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
       : F(Alloc), Canonicalize(canonicalize) {}
 
-    /// GetEmptySet - Returns an immutable set that contains no elements.
-    ImmutableSet GetEmptySet() {
-      return ImmutableSet(F.GetEmptyTree());
+    /// getEmptySet - Returns an immutable set that contains no elements.
+    ImmutableSet getEmptySet() {
+      return ImmutableSet(F.getEmptyTree());
     }
 
-    /// Add - Creates a new immutable set that contains all of the values
+    /// add - Creates a new immutable set that contains all of the values
     ///  of the original set with the addition of the specified value.  If
     ///  the original set already included the value, then the original set is
     ///  returned and no memory is allocated.  The time and space complexity
     ///  of this operation is logarithmic in the size of the original set.
     ///  The memory allocated to represent the set is released when the
     ///  factory object that created the set is destroyed.
-    ImmutableSet Add(ImmutableSet Old, value_type_ref V) {
-      TreeTy *NewT = F.Add(Old.Root, V);
-      return ImmutableSet(Canonicalize ? F.GetCanonicalTree(NewT) : NewT);
+    ImmutableSet add(ImmutableSet Old, value_type_ref V) {
+      TreeTy *NewT = F.add(Old.Root, V);
+      return ImmutableSet(Canonicalize ? F.getCanonicalTree(NewT) : NewT);
     }
 
-    /// Remove - Creates a new immutable set that contains all of the values
+    /// remove - Creates a new immutable set that contains all of the values
     ///  of the original set with the exception of the specified value.  If
     ///  the original set did not contain the value, the original set is
     ///  returned and no memory is allocated.  The time and space complexity
     ///  of this operation is logarithmic in the size of the original set.
     ///  The memory allocated to represent the set is released when the
     ///  factory object that created the set is destroyed.
-    ImmutableSet Remove(ImmutableSet Old, value_type_ref V) {
-      TreeTy *NewT = F.Remove(Old.Root, V);
-      return ImmutableSet(Canonicalize ? F.GetCanonicalTree(NewT) : NewT);
+    ImmutableSet remove(ImmutableSet Old, value_type_ref V) {
+      TreeTy *NewT = F.remove(Old.Root, V);
+      return ImmutableSet(Canonicalize ? F.getCanonicalTree(NewT) : NewT);
     }
 
     BumpPtrAllocator& getAllocator() { return F.getAllocator(); }
@@ -978,20 +1004,21 @@ public:
 
   friend class Factory;
 
-  /// contains - Returns true if the set contains the specified value.
+  /// Returns true if the set contains the specified value.
   bool contains(value_type_ref V) const {
     return Root ? Root->contains(V) : false;
   }
 
-  bool operator==(ImmutableSet RHS) const {
+  bool operator==(const ImmutableSet &RHS) const {
     return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root;
   }
 
-  bool operator!=(ImmutableSet RHS) const {
+  bool operator!=(const ImmutableSet &RHS) const {
     return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
   }
 
   TreeTy *getRoot() { 
+    if (Root) { Root->retain(); }
     return Root;
   }
 
@@ -1049,7 +1076,7 @@ public:
   // For testing.
   //===--------------------------------------------------===//
 
-  void verify() const { if (Root) Root->verify(); }
+  void validateTree() const { if (Root) Root->validateTree(); }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ADT/InMemoryStruct.h b/include/llvm/ADT/InMemoryStruct.h
new file mode 100644
index 000000000000..a56084501a62
--- /dev/null
+++ b/include/llvm/ADT/InMemoryStruct.h
@@ -0,0 +1,77 @@
+//===- InMemoryStruct.h - Indirect Struct Access Smart Pointer --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INMEMORYSTRUCT_H
+#define LLVM_ADT_INMEMORYSTRUCT_H
+
+#include <cassert>
+
+namespace llvm {
+
+/// \brief Helper object for abstracting access to an in-memory structure which
+/// may require some kind of temporary storage.
+///
+/// This class is designed to be used for accessing file data structures which
+/// in the common case can be accessed from a direct pointer to a memory mapped
+/// object, but which in some cases may require indirect access to a temporary
+/// structure (which, for example, may have undergone endianness translation).
+template<typename T>
+class InMemoryStruct {
+  typedef T value_type;
+  typedef value_type &reference;
+  typedef value_type *pointer;
+  typedef const value_type &const_reference;
+  typedef const value_type *const_pointer;
+
+  /// \brief The smart pointer target.
+  value_type *Target;
+
+  /// \brief A temporary object which can be used as a target of the smart
+  /// pointer.
+  value_type Contents;
+
+private:
+
+public:
+  InMemoryStruct() : Target(0) {}
+  InMemoryStruct(reference Value) : Target(&Contents), Contents(Value) {}
+  InMemoryStruct(pointer Value) : Target(Value) {}
+  InMemoryStruct(const InMemoryStruct<T> &Value) { *this = Value; }
+  
+  void operator=(const InMemoryStruct<T> &Value) {
+    if (Value.Target != &Value.Contents) {
+      Target = Value.Target;
+    } else {
+      Target = &Contents;
+      Contents = Value.Contents;
+    }
+  }
+  
+  const_reference operator*() const {
+    assert(Target && "Cannot dereference null pointer");
+    return *Target;
+  }
+  reference operator*() {
+    assert(Target && "Cannot dereference null pointer");
+    return *Target;
+  }
+
+  const_pointer operator->() const {
+    return Target;
+  }
+  pointer operator->() {
+    return Target;
+  }
+
+  operator bool() const { return Target != 0; }
+};
+
+}
+
+#endif
diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h
index 89f0dfa64e1c..87126ea49187 100644
--- a/include/llvm/ADT/IndexedMap.h
+++ b/include/llvm/ADT/IndexedMap.h
@@ -55,6 +55,14 @@ namespace llvm {
       return storage_[toIndex_(n)];
     }
 
+    void reserve(typename StorageT::size_type s) {
+      storage_.reserve(s);
+    }
+
+    void resize(typename StorageT::size_type s) {
+      storage_.resize(s, nullVal_);
+    }
+
     void clear() {
       storage_.clear();
     }
@@ -62,7 +70,11 @@ namespace llvm {
     void grow(IndexT n) {
       unsigned NewSize = toIndex_(n) + 1;
       if (NewSize > storage_.size())
-        storage_.resize(NewSize, nullVal_);
+        resize(NewSize);
+    }
+
+    bool inBounds(IndexT n) const {
+      return toIndex_(n) < storage_.size();
     }
 
     typename StorageT::size_type size() const {
diff --git a/include/llvm/ADT/IntEqClasses.h b/include/llvm/ADT/IntEqClasses.h
new file mode 100644
index 000000000000..8e75c48e3764
--- /dev/null
+++ b/include/llvm/ADT/IntEqClasses.h
@@ -0,0 +1,88 @@
+//===-- llvm/ADT/IntEqClasses.h - Equiv. Classes of Integers ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INTEQCLASSES_H
+#define LLVM_ADT_INTEQCLASSES_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class IntEqClasses {
+  /// EC - When uncompressed, map each integer to a smaller member of its
+  /// equivalence class. The class leader is the smallest member and maps to
+  /// itself.
+  ///
+  /// When compressed, EC[i] is the equivalence class of i.
+  SmallVector<unsigned, 8> EC;
+
+  /// NumClasses - The number of equivalence classes when compressed, or 0 when
+  /// uncompressed.
+  unsigned NumClasses;
+
+public:
+  /// IntEqClasses - Create an equivalence class mapping for 0 .. N-1.
+  IntEqClasses(unsigned N = 0) : NumClasses(0) { grow(N); }
+
+  /// grow - Increase capacity to hold 0 .. N-1, putting new integers in unique
+  /// equivalence classes.
+  /// This requires an uncompressed map.
+  void grow(unsigned N);
+
+  /// clear - Clear all classes so that grow() will assign a unique class to
+  /// every integer.
+  void clear() {
+    EC.clear();
+    NumClasses = 0;
+  }
+
+  /// join - Join the equivalence classes of a and b. After joining classes,
+  /// findLeader(a) == findLeader(b).
+  /// This requires an uncompressed map.
+  void join(unsigned a, unsigned b);
+
+  /// findLeader - Compute the leader of a's equivalence class. This is the
+  /// smallest member of the class.
+  /// This requires an uncompressed map.
+  unsigned findLeader(unsigned a) const;
+
+  /// compress - Compress equivalence classes by numbering them 0 .. M.
+  /// This makes the equivalence class map immutable.
+  void compress();
+
+  /// getNumClasses - Return the number of equivalence classes after compress()
+  /// was called.
+  unsigned getNumClasses() const { return NumClasses; }
+
+  /// operator[] - Return a's equivalence class number, 0 .. getNumClasses()-1.
+  /// This requires a compressed map.
+  unsigned operator[](unsigned a) const {
+    assert(NumClasses && "operator[] called before compress()");
+    return EC[a];
+  }
+
+  /// uncompress - Change back to the uncompressed representation that allows
+  /// editing.
+  void uncompress();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
new file mode 100644
index 000000000000..79f24d31c068
--- /dev/null
+++ b/include/llvm/ADT/IntervalMap.h
@@ -0,0 +1,2139 @@
+//===- llvm/ADT/IntervalMap.h - A sorted interval map -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a coalescing interval map for small objects.
+//
+// KeyT objects are mapped to ValT objects. Intervals of keys that map to the
+// same value are represented in a compressed form.
+//
+// Iterators provide ordered access to the compressed intervals rather than the
+// individual keys, and insert and erase operations use key intervals as well.
+//
+// Like SmallVector, IntervalMap will store the first N intervals in the map
+// object itself without any allocations. When space is exhausted it switches to
+// a B+-tree representation with very small overhead for small key and value
+// objects.
+//
+// A Traits class specifies how keys are compared. It also allows IntervalMap to
+// work with both closed and half-open intervals.
+//
+// Keys and values are not stored next to each other in a std::pair, so we don't
+// provide such a value_type. Dereferencing iterators only returns the mapped
+// value. The interval bounds are accessible through the start() and stop()
+// iterator methods.
+//
+// IntervalMap is optimized for small key and value objects, 4 or 8 bytes each
+// is the optimal size. For large objects use std::map instead.
+//
+//===----------------------------------------------------------------------===//
+//
+// Synopsis:
+//
+// template <typename KeyT, typename ValT, unsigned N, typename Traits>
+// class IntervalMap {
+// public:
+//   typedef KeyT key_type;
+//   typedef ValT mapped_type;
+//   typedef RecyclingAllocator<...> Allocator;
+//   class iterator;
+//   class const_iterator;
+//
+//   explicit IntervalMap(Allocator&);
+//   ~IntervalMap():
+//
+//   bool empty() const;
+//   KeyT start() const;
+//   KeyT stop() const;
+//   ValT lookup(KeyT x, Value NotFound = Value()) const;
+//
+//   const_iterator begin() const;
+//   const_iterator end() const;
+//   iterator begin();
+//   iterator end();
+//   const_iterator find(KeyT x) const;
+//   iterator find(KeyT x);
+//
+//   void insert(KeyT a, KeyT b, ValT y);
+//   void clear();
+// };
+//
+// template <typename KeyT, typename ValT, unsigned N, typename Traits>
+// class IntervalMap::const_iterator :
+//   public std::iterator<std::bidirectional_iterator_tag, ValT> {
+// public:
+//   bool operator==(const const_iterator &) const;
+//   bool operator!=(const const_iterator &) const;
+//   bool valid() const;
+//
+//   const KeyT &start() const;
+//   const KeyT &stop() const;
+//   const ValT &value() const;
+//   const ValT &operator*() const;
+//   const ValT *operator->() const;
+//
+//   const_iterator &operator++();
+//   const_iterator &operator++(int);
+//   const_iterator &operator--();
+//   const_iterator &operator--(int);
+//   void goToBegin();
+//   void goToEnd();
+//   void find(KeyT x);
+//   void advanceTo(KeyT x);
+// };
+//
+// template <typename KeyT, typename ValT, unsigned N, typename Traits>
+// class IntervalMap::iterator : public const_iterator {
+// public:
+//   void insert(KeyT a, KeyT b, Value y);
+//   void erase();
+// };
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INTERVALMAP_H
+#define LLVM_ADT_INTERVALMAP_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include <iterator>
+
+namespace llvm {
+
+
+//===----------------------------------------------------------------------===//
+//---                              Key traits                              ---//
+//===----------------------------------------------------------------------===//
+//
+// The IntervalMap works with closed or half-open intervals.
+// Adjacent intervals that map to the same value are coalesced.
+//
+// The IntervalMapInfo traits class is used to determine if a key is contained
+// in an interval, and if two intervals are adjacent so they can be coalesced.
+// The provided implementation works for closed integer intervals, other keys
+// probably need a specialized version.
+//
+// The point x is contained in [a;b] when !startLess(x, a) && !stopLess(b, x).
+//
+// It is assumed that (a;b] half-open intervals are not used, only [a;b) is
+// allowed. This is so that stopLess(a, b) can be used to determine if two
+// intervals overlap.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+struct IntervalMapInfo {
+
+  /// startLess - Return true if x is not in [a;b].
+  /// This is x < a both for closed intervals and for [a;b) half-open intervals.
+  static inline bool startLess(const T &x, const T &a) {
+    return x < a;
+  }
+
+  /// stopLess - Return true if x is not in [a;b].
+  /// This is b < x for a closed interval, b <= x for [a;b) half-open intervals.
+  static inline bool stopLess(const T &b, const T &x) {
+    return b < x;
+  }
+
+  /// adjacent - Return true when the intervals [x;a] and [b;y] can coalesce.
+  /// This is a+1 == b for closed intervals, a == b for half-open intervals.
+  static inline bool adjacent(const T &a, const T &b) {
+    return a+1 == b;
+  }
+
+};
+
+/// IntervalMapImpl - Namespace used for IntervalMap implementation details.
+/// It should be considered private to the implementation.
+namespace IntervalMapImpl {
+
+// Forward declarations.
+template <typename, typename, unsigned, typename> class LeafNode;
+template <typename, typename, unsigned, typename> class BranchNode;
+
+typedef std::pair<unsigned,unsigned> IdxPair;
+
+
+//===----------------------------------------------------------------------===//
+//---                    IntervalMapImpl::NodeBase                         ---//
+//===----------------------------------------------------------------------===//
+//
+// Both leaf and branch nodes store vectors of pairs.
+// Leaves store ((KeyT, KeyT), ValT) pairs, branches use (NodeRef, KeyT).
+//
+// Keys and values are stored in separate arrays to avoid padding caused by
+// different object alignments. This also helps improve locality of reference
+// when searching the keys.
+//
+// The nodes don't know how many elements they contain - that information is
+// stored elsewhere. Omitting the size field prevents padding and allows a node
+// to fill the allocated cache lines completely.
+//
+// These are typical key and value sizes, the node branching factor (N), and
+// wasted space when nodes are sized to fit in three cache lines (192 bytes):
+//
+//   T1  T2   N Waste  Used by
+//    4   4  24   0    Branch<4> (32-bit pointers)
+//    8   4  16   0    Leaf<4,4>, Branch<4>
+//    8   8  12   0    Leaf<4,8>, Branch<8>
+//   16   4   9  12    Leaf<8,4>
+//   16   8   8   0    Leaf<8,8>
+//
+//===----------------------------------------------------------------------===//
+
+template <typename T1, typename T2, unsigned N>
+class NodeBase {
+public:
+  enum { Capacity = N };
+
+  T1 first[N];
+  T2 second[N];
+
+  /// copy - Copy elements from another node.
+  /// @param Other Node elements are copied from.
+  /// @param i     Beginning of the source range in other.
+  /// @param j     Beginning of the destination range in this.
+  /// @param Count Number of elements to copy.
+  template <unsigned M>
+  void copy(const NodeBase<T1, T2, M> &Other, unsigned i,
+            unsigned j, unsigned Count) {
+    assert(i + Count <= M && "Invalid source range");
+    assert(j + Count <= N && "Invalid dest range");
+    for (unsigned e = i + Count; i != e; ++i, ++j) {
+      first[j]  = Other.first[i];
+      second[j] = Other.second[i];
+    }
+  }
+
+  /// moveLeft - Move elements to the left.
+  /// @param i     Beginning of the source range.
+  /// @param j     Beginning of the destination range.
+  /// @param Count Number of elements to copy.
+  void moveLeft(unsigned i, unsigned j, unsigned Count) {
+    assert(j <= i && "Use moveRight shift elements right");
+    copy(*this, i, j, Count);
+  }
+
+  /// moveRight - Move elements to the right.
+  /// @param i     Beginning of the source range.
+  /// @param j     Beginning of the destination range.
+  /// @param Count Number of elements to copy.
+  void moveRight(unsigned i, unsigned j, unsigned Count) {
+    assert(i <= j && "Use moveLeft shift elements left");
+    assert(j + Count <= N && "Invalid range");
+    while (Count--) {
+      first[j + Count]  = first[i + Count];
+      second[j + Count] = second[i + Count];
+    }
+  }
+
+  /// erase - Erase elements [i;j).
+  /// @param i    Beginning of the range to erase.
+  /// @param j    End of the range. (Exclusive).
+  /// @param Size Number of elements in node.
+  void erase(unsigned i, unsigned j, unsigned Size) {
+    moveLeft(j, i, Size - j);
+  }
+
+  /// erase - Erase element at i.
+  /// @param i    Index of element to erase.
+  /// @param Size Number of elements in node.
+  void erase(unsigned i, unsigned Size) {
+    erase(i, i+1, Size);
+  }
+
+  /// shift - Shift elements [i;size) 1 position to the right.
+  /// @param i    Beginning of the range to move.
+  /// @param Size Number of elements in node.
+  void shift(unsigned i, unsigned Size) {
+    moveRight(i, i + 1, Size - i);
+  }
+
+  /// transferToLeftSib - Transfer elements to a left sibling node.
+  /// @param Size  Number of elements in this.
+  /// @param Sib   Left sibling node.
+  /// @param SSize Number of elements in sib.
+  /// @param Count Number of elements to transfer.
+  void transferToLeftSib(unsigned Size, NodeBase &Sib, unsigned SSize,
+                         unsigned Count) {
+    Sib.copy(*this, 0, SSize, Count);
+    erase(0, Count, Size);
+  }
+
+  /// transferToRightSib - Transfer elements to a right sibling node.
+  /// @param Size  Number of elements in this.
+  /// @param Sib   Right sibling node.
+  /// @param SSize Number of elements in sib.
+  /// @param Count Number of elements to transfer.
+  void transferToRightSib(unsigned Size, NodeBase &Sib, unsigned SSize,
+                          unsigned Count) {
+    Sib.moveRight(0, Count, SSize);
+    Sib.copy(*this, Size-Count, 0, Count);
+  }
+
+  /// adjustFromLeftSib - Adjust the number if elements in this node by moving
+  /// elements to or from a left sibling node.
+  /// @param Size  Number of elements in this.
+  /// @param Sib   Right sibling node.
+  /// @param SSize Number of elements in sib.
+  /// @param Add   The number of elements to add to this node, possibly < 0.
+  /// @return      Number of elements added to this node, possibly negative.
+  int adjustFromLeftSib(unsigned Size, NodeBase &Sib, unsigned SSize, int Add) {
+    if (Add > 0) {
+      // We want to grow, copy from sib.
+      unsigned Count = std::min(std::min(unsigned(Add), SSize), N - Size);
+      Sib.transferToRightSib(SSize, *this, Size, Count);
+      return Count;
+    } else {
+      // We want to shrink, copy to sib.
+      unsigned Count = std::min(std::min(unsigned(-Add), Size), N - SSize);
+      transferToLeftSib(Size, Sib, SSize, Count);
+      return -Count;
+    }
+  }
+};
+
+/// IntervalMapImpl::adjustSiblingSizes - Move elements between sibling nodes.
+/// @param Node  Array of pointers to sibling nodes.
+/// @param Nodes Number of nodes.
+/// @param CurSize Array of current node sizes, will be overwritten.
+/// @param NewSize Array of desired node sizes.
+template <typename NodeT>
+void adjustSiblingSizes(NodeT *Node[], unsigned Nodes,
+                        unsigned CurSize[], const unsigned NewSize[]) {
+  // Move elements right.
+  for (int n = Nodes - 1; n; --n) {
+    if (CurSize[n] == NewSize[n])
+      continue;
+    for (int m = n - 1; m != -1; --m) {
+      int d = Node[n]->adjustFromLeftSib(CurSize[n], *Node[m], CurSize[m],
+                                         NewSize[n] - CurSize[n]);
+      CurSize[m] -= d;
+      CurSize[n] += d;
+      // Keep going if the current node was exhausted.
+      if (CurSize[n] >= NewSize[n])
+          break;
+    }
+  }
+
+  if (Nodes == 0)
+    return;
+
+  // Move elements left.
+  for (unsigned n = 0; n != Nodes - 1; ++n) {
+    if (CurSize[n] == NewSize[n])
+      continue;
+    for (unsigned m = n + 1; m != Nodes; ++m) {
+      int d = Node[m]->adjustFromLeftSib(CurSize[m], *Node[n], CurSize[n],
+                                        CurSize[n] -  NewSize[n]);
+      CurSize[m] += d;
+      CurSize[n] -= d;
+      // Keep going if the current node was exhausted.
+      if (CurSize[n] >= NewSize[n])
+          break;
+    }
+  }
+
+#ifndef NDEBUG
+  for (unsigned n = 0; n != Nodes; n++)
+    assert(CurSize[n] == NewSize[n] && "Insufficient element shuffle");
+#endif
+}
+
+/// IntervalMapImpl::distribute - Compute a new distribution of node elements
+/// after an overflow or underflow. Reserve space for a new element at Position,
+/// and compute the node that will hold Position after redistributing node
+/// elements.
+///
+/// It is required that
+///
+///   Elements == sum(CurSize), and
+///   Elements + Grow <= Nodes * Capacity.
+///
+/// NewSize[] will be filled in such that:
+///
+///   sum(NewSize) == Elements, and
+///   NewSize[i] <= Capacity.
+///
+/// The returned index is the node where Position will go, so:
+///
+///   sum(NewSize[0..idx-1]) <= Position
+///   sum(NewSize[0..idx])   >= Position
+///
+/// The last equality, sum(NewSize[0..idx]) == Position, can only happen when
+/// Grow is set and NewSize[idx] == Capacity-1. The index points to the node
+/// before the one holding the Position'th element where there is room for an
+/// insertion.
+///
+/// @param Nodes    The number of nodes.
+/// @param Elements Total elements in all nodes.
+/// @param Capacity The capacity of each node.
+/// @param CurSize  Array[Nodes] of current node sizes, or NULL.
+/// @param NewSize  Array[Nodes] to receive the new node sizes.
+/// @param Position Insert position.
+/// @param Grow     Reserve space for a new element at Position.
+/// @return         (node, offset) for Position.
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+                   const unsigned *CurSize, unsigned NewSize[],
+                   unsigned Position, bool Grow);
+
+
+//===----------------------------------------------------------------------===//
+//---                   IntervalMapImpl::NodeSizer                         ---//
+//===----------------------------------------------------------------------===//
+//
+// Compute node sizes from key and value types.
+//
+// The branching factors are chosen to make nodes fit in three cache lines.
+// This may not be possible if keys or values are very large. Such large objects
+// are handled correctly, but a std::map would probably give better performance.
+//
+//===----------------------------------------------------------------------===//
+
+enum {
+  // Cache line size. Most architectures have 32 or 64 byte cache lines.
+  // We use 64 bytes here because it provides good branching factors.
+  Log2CacheLine = 6,
+  CacheLineBytes = 1 << Log2CacheLine,
+  DesiredNodeBytes = 3 * CacheLineBytes
+};
+
+template <typename KeyT, typename ValT>
+struct NodeSizer {
+  enum {
+    // Compute the leaf node branching factor that makes a node fit in three
+    // cache lines. The branching factor must be at least 3, or some B+-tree
+    // balancing algorithms won't work.
+    // LeafSize can't be larger than CacheLineBytes. This is required by the
+    // PointerIntPair used by NodeRef.
+    DesiredLeafSize = DesiredNodeBytes /
+      static_cast<unsigned>(2*sizeof(KeyT)+sizeof(ValT)),
+    MinLeafSize = 3,
+    LeafSize = DesiredLeafSize > MinLeafSize ? DesiredLeafSize : MinLeafSize
+  };
+
+  typedef NodeBase<std::pair<KeyT, KeyT>, ValT, LeafSize> LeafBase;
+
+  enum {
+    // Now that we have the leaf branching factor, compute the actual allocation
+    // unit size by rounding up to a whole number of cache lines.
+    AllocBytes = (sizeof(LeafBase) + CacheLineBytes-1) & ~(CacheLineBytes-1),
+
+    // Determine the branching factor for branch nodes.
+    BranchSize = AllocBytes /
+      static_cast<unsigned>(sizeof(KeyT) + sizeof(void*))
+  };
+
+  /// Allocator - The recycling allocator used for both branch and leaf nodes.
+  /// This typedef is very likely to be identical for all IntervalMaps with
+  /// reasonably sized entries, so the same allocator can be shared among
+  /// different kinds of maps.
+  typedef RecyclingAllocator<BumpPtrAllocator, char,
+                             AllocBytes, CacheLineBytes> Allocator;
+
+};
+
+
+//===----------------------------------------------------------------------===//
+//---                     IntervalMapImpl::NodeRef                         ---//
+//===----------------------------------------------------------------------===//
+//
+// B+-tree nodes can be leaves or branches, so we need a polymorphic node
+// pointer that can point to both kinds.
+//
+// All nodes are cache line aligned and the low 6 bits of a node pointer are
+// always 0. These bits are used to store the number of elements in the
+// referenced node. Besides saving space, placing node sizes in the parents
+// allow tree balancing algorithms to run without faulting cache lines for nodes
+// that may not need to be modified.
+//
+// A NodeRef doesn't know whether it references a leaf node or a branch node.
+// It is the responsibility of the caller to use the correct types.
+//
+// Nodes are never supposed to be empty, and it is invalid to store a node size
+// of 0 in a NodeRef. The valid range of sizes is 1-64.
+//
+//===----------------------------------------------------------------------===//
+
+class NodeRef {
+  struct CacheAlignedPointerTraits {
+    static inline void *getAsVoidPointer(void *P) { return P; }
+    static inline void *getFromVoidPointer(void *P) { return P; }
+    enum { NumLowBitsAvailable = Log2CacheLine };
+  };
+  PointerIntPair<void*, Log2CacheLine, unsigned, CacheAlignedPointerTraits> pip;
+
+public:
+  /// NodeRef - Create a null ref.
+  NodeRef() {}
+
+  /// operator bool - Detect a null ref.
+  operator bool() const { return pip.getOpaqueValue(); }
+
+  /// NodeRef - Create a reference to the node p with n elements.
+  template <typename NodeT>
+  NodeRef(NodeT *p, unsigned n) : pip(p, n - 1) {
+    assert(n <= NodeT::Capacity && "Size too big for node");
+  }
+
+  /// size - Return the number of elements in the referenced node.
+  unsigned size() const { return pip.getInt() + 1; }
+
+  /// setSize - Update the node size.
+  void setSize(unsigned n) { pip.setInt(n - 1); }
+
+  /// subtree - Access the i'th subtree reference in a branch node.
+  /// This depends on branch nodes storing the NodeRef array as their first
+  /// member.
+  NodeRef &subtree(unsigned i) const {
+    return reinterpret_cast<NodeRef*>(pip.getPointer())[i];
+  }
+
+  /// get - Dereference as a NodeT reference.
+  template <typename NodeT>
+  NodeT &get() const {
+    return *reinterpret_cast<NodeT*>(pip.getPointer());
+  }
+
+  bool operator==(const NodeRef &RHS) const {
+    if (pip == RHS.pip)
+      return true;
+    assert(pip.getPointer() != RHS.pip.getPointer() && "Inconsistent NodeRefs");
+    return false;
+  }
+
+  bool operator!=(const NodeRef &RHS) const {
+    return !operator==(RHS);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//---                      IntervalMapImpl::LeafNode                       ---//
+//===----------------------------------------------------------------------===//
+//
+// Leaf nodes store up to N disjoint intervals with corresponding values.
+//
+// The intervals are kept sorted and fully coalesced so there are no adjacent
+// intervals mapping to the same value.
+//
+// These constraints are always satisfied:
+//
+// - Traits::stopLess(start(i), stop(i))    - Non-empty, sane intervals.
+//
+// - Traits::stopLess(stop(i), start(i + 1) - Sorted.
+//
+// - value(i) != value(i + 1) || !Traits::adjacent(stop(i), start(i + 1))
+//                                          - Fully coalesced.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class LeafNode : public NodeBase<std::pair<KeyT, KeyT>, ValT, N> {
+public:
+  const KeyT &start(unsigned i) const { return this->first[i].first; }
+  const KeyT &stop(unsigned i) const { return this->first[i].second; }
+  const ValT &value(unsigned i) const { return this->second[i]; }
+
+  KeyT &start(unsigned i) { return this->first[i].first; }
+  KeyT &stop(unsigned i) { return this->first[i].second; }
+  ValT &value(unsigned i) { return this->second[i]; }
+
+  /// findFrom - Find the first interval after i that may contain x.
+  /// @param i    Starting index for the search.
+  /// @param Size Number of elements in node.
+  /// @param x    Key to search for.
+  /// @return     First index with !stopLess(key[i].stop, x), or size.
+  ///             This is the first interval that can possibly contain x.
+  unsigned findFrom(unsigned i, unsigned Size, KeyT x) const {
+    assert(i <= Size && Size <= N && "Bad indices");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index is past the needed point");
+    while (i != Size && Traits::stopLess(stop(i), x)) ++i;
+    return i;
+  }
+
+  /// safeFind - Find an interval that is known to exist. This is the same as
+  /// findFrom except is it assumed that x is at least within range of the last
+  /// interval.
+  /// @param i Starting index for the search.
+  /// @param x Key to search for.
+  /// @return  First index with !stopLess(key[i].stop, x), never size.
+  ///          This is the first interval that can possibly contain x.
+  unsigned safeFind(unsigned i, KeyT x) const {
+    assert(i < N && "Bad index");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index is past the needed point");
+    while (Traits::stopLess(stop(i), x)) ++i;
+    assert(i < N && "Unsafe intervals");
+    return i;
+  }
+
+  /// safeLookup - Lookup mapped value for a safe key.
+  /// It is assumed that x is within range of the last entry.
+  /// @param x        Key to search for.
+  /// @param NotFound Value to return if x is not in any interval.
+  /// @return         The mapped value at x or NotFound.
+  ValT safeLookup(KeyT x, ValT NotFound) const {
+    unsigned i = safeFind(0, x);
+    return Traits::startLess(x, start(i)) ? NotFound : value(i);
+  }
+
+  unsigned insertFrom(unsigned &Pos, unsigned Size, KeyT a, KeyT b, ValT y);
+};
+
+/// insertFrom - Add mapping of [a;b] to y if possible, coalescing as much as
+/// possible. This may cause the node to grow by 1, or it may cause the node
+/// to shrink because of coalescing.
+/// @param i    Starting index = insertFrom(0, size, a)
+/// @param Size Number of elements in node.
+/// @param a    Interval start.
+/// @param b    Interval stop.
+/// @param y    Value be mapped.
+/// @return     (insert position, new size), or (i, Capacity+1) on overflow.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+unsigned LeafNode<KeyT, ValT, N, Traits>::
+insertFrom(unsigned &Pos, unsigned Size, KeyT a, KeyT b, ValT y) {
+  unsigned i = Pos;
+  assert(i <= Size && Size <= N && "Invalid index");
+  assert(!Traits::stopLess(b, a) && "Invalid interval");
+
+  // Verify the findFrom invariant.
+  assert((i == 0 || Traits::stopLess(stop(i - 1), a)));
+  assert((i == Size || !Traits::stopLess(stop(i), a)));
+  assert((i == Size || Traits::stopLess(b, start(i))) && "Overlapping insert");
+
+  // Coalesce with previous interval.
+  if (i && value(i - 1) == y && Traits::adjacent(stop(i - 1), a)) {
+    Pos = i - 1;
+    // Also coalesce with next interval?
+    if (i != Size && value(i) == y && Traits::adjacent(b, start(i))) {
+      stop(i - 1) = stop(i);
+      this->erase(i, Size);
+      return Size - 1;
+    }
+    stop(i - 1) = b;
+    return Size;
+  }
+
+  // Detect overflow.
+  if (i == N)
+    return N + 1;
+
+  // Add new interval at end.
+  if (i == Size) {
+    start(i) = a;
+    stop(i) = b;
+    value(i) = y;
+    return Size + 1;
+  }
+
+  // Try to coalesce with following interval.
+  if (value(i) == y && Traits::adjacent(b, start(i))) {
+    start(i) = a;
+    return Size;
+  }
+
+  // We must insert before i. Detect overflow.
+  if (Size == N)
+    return N + 1;
+
+  // Insert before i.
+  this->shift(i, Size);
+  start(i) = a;
+  stop(i) = b;
+  value(i) = y;
+  return Size + 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+//---                   IntervalMapImpl::BranchNode                        ---//
+//===----------------------------------------------------------------------===//
+//
+// A branch node stores references to 1--N subtrees all of the same height.
+//
+// The key array in a branch node holds the rightmost stop key of each subtree.
+// It is redundant to store the last stop key since it can be found in the
+// parent node, but doing so makes tree balancing a lot simpler.
+//
+// It is unusual for a branch node to only have one subtree, but it can happen
+// in the root node if it is smaller than the normal nodes.
+//
+// When all of the leaf nodes from all the subtrees are concatenated, they must
+// satisfy the same constraints as a single leaf node. They must be sorted,
+// sane, and fully coalesced.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class BranchNode : public NodeBase<NodeRef, KeyT, N> {
+public:
+  const KeyT &stop(unsigned i) const { return this->second[i]; }
+  const NodeRef &subtree(unsigned i) const { return this->first[i]; }
+
+  KeyT &stop(unsigned i) { return this->second[i]; }
+  NodeRef &subtree(unsigned i) { return this->first[i]; }
+
+  /// findFrom - Find the first subtree after i that may contain x.
+  /// @param i    Starting index for the search.
+  /// @param Size Number of elements in node.
+  /// @param x    Key to search for.
+  /// @return     First index with !stopLess(key[i], x), or size.
+  ///             This is the first subtree that can possibly contain x.
+  unsigned findFrom(unsigned i, unsigned Size, KeyT x) const {
+    assert(i <= Size && Size <= N && "Bad indices");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index to findFrom is past the needed point");
+    while (i != Size && Traits::stopLess(stop(i), x)) ++i;
+    return i;
+  }
+
+  /// safeFind - Find a subtree that is known to exist. This is the same as
+  /// findFrom except is it assumed that x is in range.
+  /// @param i Starting index for the search.
+  /// @param x Key to search for.
+  /// @return  First index with !stopLess(key[i], x), never size.
+  ///          This is the first subtree that can possibly contain x.
+  unsigned safeFind(unsigned i, KeyT x) const {
+    assert(i < N && "Bad index");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index is past the needed point");
+    while (Traits::stopLess(stop(i), x)) ++i;
+    assert(i < N && "Unsafe intervals");
+    return i;
+  }
+
+  /// safeLookup - Get the subtree containing x, Assuming that x is in range.
+  /// @param x Key to search for.
+  /// @return  Subtree containing x
+  NodeRef safeLookup(KeyT x) const {
+    return subtree(safeFind(0, x));
+  }
+
+  /// insert - Insert a new (subtree, stop) pair.
+  /// @param i    Insert position, following entries will be shifted.
+  /// @param Size Number of elements in node.
+  /// @param Node Subtree to insert.
+  /// @param Stop Last key in subtree.
+  void insert(unsigned i, unsigned Size, NodeRef Node, KeyT Stop) {
+    assert(Size < N && "branch node overflow");
+    assert(i <= Size && "Bad insert position");
+    this->shift(i, Size);
+    subtree(i) = Node;
+    stop(i) = Stop;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//---                         IntervalMapImpl::Path                        ---//
+//===----------------------------------------------------------------------===//
+//
+// A Path is used by iterators to represent a position in a B+-tree, and the
+// path to get there from the root.
+//
+// The Path class also constains the tree navigation code that doesn't have to
+// be templatized.
+//
+//===----------------------------------------------------------------------===//
+
+class Path {
+  /// Entry - Each step in the path is a node pointer and an offset into that
+  /// node.
+  struct Entry {
+    void *node;
+    unsigned size;
+    unsigned offset;
+
+    Entry(void *Node, unsigned Size, unsigned Offset)
+      : node(Node), size(Size), offset(Offset) {}
+
+    Entry(NodeRef Node, unsigned Offset)
+      : node(&Node.subtree(0)), size(Node.size()), offset(Offset) {}
+
+    NodeRef &subtree(unsigned i) const {
+      return reinterpret_cast<NodeRef*>(node)[i];
+    }
+  };
+
+  /// path - The path entries, path[0] is the root node, path.back() is a leaf.
+  SmallVector<Entry, 4> path;
+
+public:
+  // Node accessors.
+  template <typename NodeT> NodeT &node(unsigned Level) const {
+    return *reinterpret_cast<NodeT*>(path[Level].node);
+  }
+  unsigned size(unsigned Level) const { return path[Level].size; }
+  unsigned offset(unsigned Level) const { return path[Level].offset; }
+  unsigned &offset(unsigned Level) { return path[Level].offset; }
+
+  // Leaf accessors.
+  template <typename NodeT> NodeT &leaf() const {
+    return *reinterpret_cast<NodeT*>(path.back().node);
+  }
+  unsigned leafSize() const { return path.back().size; }
+  unsigned leafOffset() const { return path.back().offset; }
+  unsigned &leafOffset() { return path.back().offset; }
+
+  /// valid - Return true if path is at a valid node, not at end().
+  bool valid() const {
+    return !path.empty() && path.front().offset < path.front().size;
+  }
+
+  /// height - Return the height of the tree corresponding to this path.
+  /// This matches map->height in a full path.
+  unsigned height() const { return path.size() - 1; }
+
+  /// subtree - Get the subtree referenced from Level. When the path is
+  /// consistent, node(Level + 1) == subtree(Level).
+  /// @param Level 0..height-1. The leaves have no subtrees.
+  NodeRef &subtree(unsigned Level) const {
+    return path[Level].subtree(path[Level].offset);
+  }
+
+  /// reset - Reset cached information about node(Level) from subtree(Level -1).
+  /// @param Level 1..height. THe node to update after parent node changed.
+  void reset(unsigned Level) {
+    path[Level] = Entry(subtree(Level - 1), offset(Level));
+  }
+
+  /// push - Add entry to path.
+  /// @param Node Node to add, should be subtree(path.size()-1).
+  /// @param Offset Offset into Node.
+  void push(NodeRef Node, unsigned Offset) {
+    path.push_back(Entry(Node, Offset));
+  }
+
+  /// pop - Remove the last path entry.
+  void pop() {
+    path.pop_back();
+  }
+
+  /// setSize - Set the size of a node both in the path and in the tree.
+  /// @param Level 0..height. Note that setting the root size won't change
+  ///              map->rootSize.
+  /// @param Size New node size.
+  void setSize(unsigned Level, unsigned Size) {
+    path[Level].size = Size;
+    if (Level)
+      subtree(Level - 1).setSize(Size);
+  }
+
+  /// setRoot - Clear the path and set a new root node.
+  /// @param Node New root node.
+  /// @param Size New root size.
+  /// @param Offset Offset into root node.
+  void setRoot(void *Node, unsigned Size, unsigned Offset) {
+    path.clear();
+    path.push_back(Entry(Node, Size, Offset));
+  }
+
+  /// replaceRoot - Replace the current root node with two new entries after the
+  /// tree height has increased.
+  /// @param Root The new root node.
+  /// @param Size Number of entries in the new root.
+  /// @param Offsets Offsets into the root and first branch nodes.
+  void replaceRoot(void *Root, unsigned Size, IdxPair Offsets);
+
+  /// getLeftSibling - Get the left sibling node at Level, or a null NodeRef.
+  /// @param Level Get the sibling to node(Level).
+  /// @return Left sibling, or NodeRef().
+  NodeRef getLeftSibling(unsigned Level) const;
+
+  /// moveLeft - Move path to the left sibling at Level. Leave nodes below Level
+  /// unaltered.
+  /// @param Level Move node(Level).
+  void moveLeft(unsigned Level);
+
+  /// fillLeft - Grow path to Height by taking leftmost branches.
+  /// @param Height The target height.
+  void fillLeft(unsigned Height) {
+    while (height() < Height)
+      push(subtree(height()), 0);
+  }
+
+  /// getLeftSibling - Get the left sibling node at Level, or a null NodeRef.
+  /// @param Level Get the sinbling to node(Level).
+  /// @return Left sibling, or NodeRef().
+  NodeRef getRightSibling(unsigned Level) const;
+
+  /// moveRight - Move path to the left sibling at Level. Leave nodes below
+  /// Level unaltered.
+  /// @param Level Move node(Level).
+  void moveRight(unsigned Level);
+
+  /// atBegin - Return true if path is at begin().
+  bool atBegin() const {
+    for (unsigned i = 0, e = path.size(); i != e; ++i)
+      if (path[i].offset != 0)
+        return false;
+    return true;
+  }
+
+  /// atLastEntry - Return true if the path is at the last entry of the node at
+  /// Level.
+  /// @param Level Node to examine.
+  bool atLastEntry(unsigned Level) const {
+    return path[Level].offset == path[Level].size - 1;
+  }
+
+  /// legalizeForInsert - Prepare the path for an insertion at Level. When the
+  /// path is at end(), node(Level) may not be a legal node. legalizeForInsert
+  /// ensures that node(Level) is real by moving back to the last node at Level,
+  /// and setting offset(Level) to size(Level) if required.
+  /// @param Level The level where an insertion is about to take place.
+  void legalizeForInsert(unsigned Level) {
+    if (valid())
+      return;
+    moveLeft(Level);
+    ++path[Level].offset;
+  }
+};
+
+} // namespace IntervalMapImpl
+
+
+//===----------------------------------------------------------------------===//
+//---                          IntervalMap                                ----//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT,
+          unsigned N = IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
+          typename Traits = IntervalMapInfo<KeyT> >
+class IntervalMap {
+  typedef IntervalMapImpl::NodeSizer<KeyT, ValT> Sizer;
+  typedef IntervalMapImpl::LeafNode<KeyT, ValT, Sizer::LeafSize, Traits> Leaf;
+  typedef IntervalMapImpl::BranchNode<KeyT, ValT, Sizer::BranchSize, Traits>
+    Branch;
+  typedef IntervalMapImpl::LeafNode<KeyT, ValT, N, Traits> RootLeaf;
+  typedef IntervalMapImpl::IdxPair IdxPair;
+
+  // The RootLeaf capacity is given as a template parameter. We must compute the
+  // corresponding RootBranch capacity.
+  enum {
+    DesiredRootBranchCap = (sizeof(RootLeaf) - sizeof(KeyT)) /
+      (sizeof(KeyT) + sizeof(IntervalMapImpl::NodeRef)),
+    RootBranchCap = DesiredRootBranchCap ? DesiredRootBranchCap : 1
+  };
+
+  typedef IntervalMapImpl::BranchNode<KeyT, ValT, RootBranchCap, Traits>
+    RootBranch;
+
+  // When branched, we store a global start key as well as the branch node.
+  struct RootBranchData {
+    KeyT start;
+    RootBranch node;
+  };
+
+  enum {
+    RootDataSize = sizeof(RootBranchData) > sizeof(RootLeaf) ?
+                   sizeof(RootBranchData) : sizeof(RootLeaf)
+  };
+
+public:
+  typedef typename Sizer::Allocator Allocator;
+  typedef KeyT KeyType;
+  typedef ValT ValueType;
+  typedef Traits KeyTraits;
+
+private:
+  // The root data is either a RootLeaf or a RootBranchData instance.
+  // We can't put them in a union since C++03 doesn't allow non-trivial
+  // constructors in unions.
+  // Instead, we use a char array with pointer alignment. The alignment is
+  // ensured by the allocator member in the class, but still verified in the
+  // constructor. We don't support keys or values that are more aligned than a
+  // pointer.
+  char data[RootDataSize];
+
+  // Tree height.
+  // 0: Leaves in root.
+  // 1: Root points to leaf.
+  // 2: root->branch->leaf ...
+  unsigned height;
+
+  // Number of entries in the root node.
+  unsigned rootSize;
+
+  // Allocator used for creating external nodes.
+  Allocator &allocator;
+
+  /// dataAs - Represent data as a node type without breaking aliasing rules.
+  template <typename T>
+  T &dataAs() const {
+    union {
+      const char *d;
+      T *t;
+    } u;
+    u.d = data;
+    return *u.t;
+  }
+
+  const RootLeaf &rootLeaf() const {
+    assert(!branched() && "Cannot acces leaf data in branched root");
+    return dataAs<RootLeaf>();
+  }
+  RootLeaf &rootLeaf() {
+    assert(!branched() && "Cannot acces leaf data in branched root");
+    return dataAs<RootLeaf>();
+  }
+  RootBranchData &rootBranchData() const {
+    assert(branched() && "Cannot access branch data in non-branched root");
+    return dataAs<RootBranchData>();
+  }
+  RootBranchData &rootBranchData() {
+    assert(branched() && "Cannot access branch data in non-branched root");
+    return dataAs<RootBranchData>();
+  }
+  const RootBranch &rootBranch() const { return rootBranchData().node; }
+  RootBranch &rootBranch()             { return rootBranchData().node; }
+  KeyT rootBranchStart() const { return rootBranchData().start; }
+  KeyT &rootBranchStart()      { return rootBranchData().start; }
+
+  template <typename NodeT> NodeT *newNode() {
+    return new(allocator.template Allocate<NodeT>()) NodeT();
+  }
+
+  template <typename NodeT> void deleteNode(NodeT *P) {
+    P->~NodeT();
+    allocator.Deallocate(P);
+  }
+
+  IdxPair branchRoot(unsigned Position);
+  IdxPair splitRoot(unsigned Position);
+
+  void switchRootToBranch() {
+    rootLeaf().~RootLeaf();
+    height = 1;
+    new (&rootBranchData()) RootBranchData();
+  }
+
+  void switchRootToLeaf() {
+    rootBranchData().~RootBranchData();
+    height = 0;
+    new(&rootLeaf()) RootLeaf();
+  }
+
+  bool branched() const { return height > 0; }
+
+  ValT treeSafeLookup(KeyT x, ValT NotFound) const;
+  void visitNodes(void (IntervalMap::*f)(IntervalMapImpl::NodeRef,
+                  unsigned Level));
+  void deleteNode(IntervalMapImpl::NodeRef Node, unsigned Level);
+
+public:
+  explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) {
+    assert((uintptr_t(data) & (alignOf<RootLeaf>() - 1)) == 0 &&
+           "Insufficient alignment");
+    new(&rootLeaf()) RootLeaf();
+  }
+
+  ~IntervalMap() {
+    clear();
+    rootLeaf().~RootLeaf();
+  }
+
+  /// empty -  Return true when no intervals are mapped.
+  bool empty() const {
+    return rootSize == 0;
+  }
+
+  /// start - Return the smallest mapped key in a non-empty map.
+  KeyT start() const {
+    assert(!empty() && "Empty IntervalMap has no start");
+    return !branched() ? rootLeaf().start(0) : rootBranchStart();
+  }
+
+  /// stop - Return the largest mapped key in a non-empty map.
+  KeyT stop() const {
+    assert(!empty() && "Empty IntervalMap has no stop");
+    return !branched() ? rootLeaf().stop(rootSize - 1) :
+                         rootBranch().stop(rootSize - 1);
+  }
+
+  /// lookup - Return the mapped value at x or NotFound.
+  ValT lookup(KeyT x, ValT NotFound = ValT()) const {
+    if (empty() || Traits::startLess(x, start()) || Traits::stopLess(stop(), x))
+      return NotFound;
+    return branched() ? treeSafeLookup(x, NotFound) :
+                        rootLeaf().safeLookup(x, NotFound);
+  }
+
+  /// insert - Add a mapping of [a;b] to y, coalesce with adjacent intervals.
+  /// It is assumed that no key in the interval is mapped to another value, but
+  /// overlapping intervals already mapped to y will be coalesced.
+  void insert(KeyT a, KeyT b, ValT y) {
+    if (branched() || rootSize == RootLeaf::Capacity)
+      return find(a).insert(a, b, y);
+
+    // Easy insert into root leaf.
+    unsigned p = rootLeaf().findFrom(0, rootSize, a);
+    rootSize = rootLeaf().insertFrom(p, rootSize, a, b, y);
+  }
+
+  /// clear - Remove all entries.
+  void clear();
+
+  class const_iterator;
+  class iterator;
+  friend class const_iterator;
+  friend class iterator;
+
+  const_iterator begin() const {
+    const_iterator I(*this);
+    I.goToBegin();
+    return I;
+  }
+
+  iterator begin() {
+    iterator I(*this);
+    I.goToBegin();
+    return I;
+  }
+
+  const_iterator end() const {
+    const_iterator I(*this);
+    I.goToEnd();
+    return I;
+  }
+
+  iterator end() {
+    iterator I(*this);
+    I.goToEnd();
+    return I;
+  }
+
+  /// find - Return an iterator pointing to the first interval ending at or
+  /// after x, or end().
+  const_iterator find(KeyT x) const {
+    const_iterator I(*this);
+    I.find(x);
+    return I;
+  }
+
+  iterator find(KeyT x) {
+    iterator I(*this);
+    I.find(x);
+    return I;
+  }
+};
+
+/// treeSafeLookup - Return the mapped value at x or NotFound, assuming a
+/// branched root.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+ValT IntervalMap<KeyT, ValT, N, Traits>::
+treeSafeLookup(KeyT x, ValT NotFound) const {
+  assert(branched() && "treeLookup assumes a branched root");
+
+  IntervalMapImpl::NodeRef NR = rootBranch().safeLookup(x);
+  for (unsigned h = height-1; h; --h)
+    NR = NR.get<Branch>().safeLookup(x);
+  return NR.get<Leaf>().safeLookup(x, NotFound);
+}
+
+
+// branchRoot - Switch from a leaf root to a branched root.
+// Return the new (root offset, node offset) corresponding to Position.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+IntervalMapImpl::IdxPair IntervalMap<KeyT, ValT, N, Traits>::
+branchRoot(unsigned Position) {
+  using namespace IntervalMapImpl;
+  // How many external leaf nodes to hold RootLeaf+1?
+  const unsigned Nodes = RootLeaf::Capacity / Leaf::Capacity + 1;
+
+  // Compute element distribution among new nodes.
+  unsigned size[Nodes];
+  IdxPair NewOffset(0, Position);
+
+  // Is is very common for the root node to be smaller than external nodes.
+  if (Nodes == 1)
+    size[0] = rootSize;
+  else
+    NewOffset = distribute(Nodes, rootSize, Leaf::Capacity,  NULL, size,
+                           Position, true);
+
+  // Allocate new nodes.
+  unsigned pos = 0;
+  NodeRef node[Nodes];
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Leaf *L = newNode<Leaf>();
+    L->copy(rootLeaf(), pos, 0, size[n]);
+    node[n] = NodeRef(L, size[n]);
+    pos += size[n];
+  }
+
+  // Destroy the old leaf node, construct branch node instead.
+  switchRootToBranch();
+  for (unsigned n = 0; n != Nodes; ++n) {
+    rootBranch().stop(n) = node[n].template get<Leaf>().stop(size[n]-1);
+    rootBranch().subtree(n) = node[n];
+  }
+  rootBranchStart() = node[0].template get<Leaf>().start(0);
+  rootSize = Nodes;
+  return NewOffset;
+}
+
+// splitRoot - Split the current BranchRoot into multiple Branch nodes.
+// Return the new (root offset, node offset) corresponding to Position.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+IntervalMapImpl::IdxPair IntervalMap<KeyT, ValT, N, Traits>::
+splitRoot(unsigned Position) {
+  using namespace IntervalMapImpl;
+  // How many external leaf nodes to hold RootBranch+1?
+  const unsigned Nodes = RootBranch::Capacity / Branch::Capacity + 1;
+
+  // Compute element distribution among new nodes.
+  unsigned Size[Nodes];
+  IdxPair NewOffset(0, Position);
+
+  // Is is very common for the root node to be smaller than external nodes.
+  if (Nodes == 1)
+    Size[0] = rootSize;
+  else
+    NewOffset = distribute(Nodes, rootSize, Leaf::Capacity,  NULL, Size,
+                           Position, true);
+
+  // Allocate new nodes.
+  unsigned Pos = 0;
+  NodeRef Node[Nodes];
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Branch *B = newNode<Branch>();
+    B->copy(rootBranch(), Pos, 0, Size[n]);
+    Node[n] = NodeRef(B, Size[n]);
+    Pos += Size[n];
+  }
+
+  for (unsigned n = 0; n != Nodes; ++n) {
+    rootBranch().stop(n) = Node[n].template get<Branch>().stop(Size[n]-1);
+    rootBranch().subtree(n) = Node[n];
+  }
+  rootSize = Nodes;
+  ++height;
+  return NewOffset;
+}
+
+/// visitNodes - Visit each external node.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+visitNodes(void (IntervalMap::*f)(IntervalMapImpl::NodeRef, unsigned Height)) {
+  if (!branched())
+    return;
+  SmallVector<IntervalMapImpl::NodeRef, 4> Refs, NextRefs;
+
+  // Collect level 0 nodes from the root.
+  for (unsigned i = 0; i != rootSize; ++i)
+    Refs.push_back(rootBranch().subtree(i));
+
+  // Visit all branch nodes.
+  for (unsigned h = height - 1; h; --h) {
+    for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+      for (unsigned j = 0, s = Refs[i].size(); j != s; ++j)
+        NextRefs.push_back(Refs[i].subtree(j));
+      (this->*f)(Refs[i], h);
+    }
+    Refs.clear();
+    Refs.swap(NextRefs);
+  }
+
+  // Visit all leaf nodes.
+  for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+    (this->*f)(Refs[i], 0);
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+deleteNode(IntervalMapImpl::NodeRef Node, unsigned Level) {
+  if (Level)
+    deleteNode(&Node.get<Branch>());
+  else
+    deleteNode(&Node.get<Leaf>());
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+clear() {
+  if (branched()) {
+    visitNodes(&IntervalMap::deleteNode);
+    switchRootToLeaf();
+  }
+  rootSize = 0;
+}
+
+//===----------------------------------------------------------------------===//
+//---                   IntervalMap::const_iterator                       ----//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class IntervalMap<KeyT, ValT, N, Traits>::const_iterator :
+  public std::iterator<std::bidirectional_iterator_tag, ValT> {
+protected:
+  friend class IntervalMap;
+
+  // The map referred to.
+  IntervalMap *map;
+
+  // We store a full path from the root to the current position.
+  // The path may be partially filled, but never between iterator calls.
+  IntervalMapImpl::Path path;
+
+  explicit const_iterator(const IntervalMap &map) :
+    map(const_cast<IntervalMap*>(&map)) {}
+
+  bool branched() const {
+    assert(map && "Invalid iterator");
+    return map->branched();
+  }
+
+  void setRoot(unsigned Offset) {
+    if (branched())
+      path.setRoot(&map->rootBranch(), map->rootSize, Offset);
+    else
+      path.setRoot(&map->rootLeaf(), map->rootSize, Offset);
+  }
+
+  void pathFillFind(KeyT x);
+  void treeFind(KeyT x);
+  void treeAdvanceTo(KeyT x);
+
+  /// unsafeStart - Writable access to start() for iterator.
+  KeyT &unsafeStart() const {
+    assert(valid() && "Cannot access invalid iterator");
+    return branched() ? path.leaf<Leaf>().start(path.leafOffset()) :
+                        path.leaf<RootLeaf>().start(path.leafOffset());
+  }
+
+  /// unsafeStop - Writable access to stop() for iterator.
+  KeyT &unsafeStop() const {
+    assert(valid() && "Cannot access invalid iterator");
+    return branched() ? path.leaf<Leaf>().stop(path.leafOffset()) :
+                        path.leaf<RootLeaf>().stop(path.leafOffset());
+  }
+
+  /// unsafeValue - Writable access to value() for iterator.
+  ValT &unsafeValue() const {
+    assert(valid() && "Cannot access invalid iterator");
+    return branched() ? path.leaf<Leaf>().value(path.leafOffset()) :
+                        path.leaf<RootLeaf>().value(path.leafOffset());
+  }
+
+public:
+  /// const_iterator - Create an iterator that isn't pointing anywhere.
+  const_iterator() : map(0) {}
+
+  /// valid - Return true if the current position is valid, false for end().
+  bool valid() const { return path.valid(); }
+
+  /// start - Return the beginning of the current interval.
+  const KeyT &start() const { return unsafeStart(); }
+
+  /// stop - Return the end of the current interval.
+  const KeyT &stop() const { return unsafeStop(); }
+
+  /// value - Return the mapped value at the current interval.
+  const ValT &value() const { return unsafeValue(); }
+
+  const ValT &operator*() const { return value(); }
+
+  bool operator==(const const_iterator &RHS) const {
+    assert(map == RHS.map && "Cannot compare iterators from different maps");
+    if (!valid())
+      return !RHS.valid();
+    if (path.leafOffset() != RHS.path.leafOffset())
+      return false;
+    return &path.template leaf<Leaf>() == &RHS.path.template leaf<Leaf>();
+  }
+
+  bool operator!=(const const_iterator &RHS) const {
+    return !operator==(RHS);
+  }
+
+  /// goToBegin - Move to the first interval in map.
+  void goToBegin() {
+    setRoot(0);
+    if (branched())
+      path.fillLeft(map->height);
+  }
+
+  /// goToEnd - Move beyond the last interval in map.
+  void goToEnd() {
+    setRoot(map->rootSize);
+  }
+
+  /// preincrement - move to the next interval.
+  const_iterator &operator++() {
+    assert(valid() && "Cannot increment end()");
+    if (++path.leafOffset() == path.leafSize() && branched())
+      path.moveRight(map->height);
+    return *this;
+  }
+
+  /// postincrement - Dont do that!
+  const_iterator operator++(int) {
+    const_iterator tmp = *this;
+    operator++();
+    return tmp;
+  }
+
+  /// predecrement - move to the previous interval.
+  const_iterator &operator--() {
+    if (path.leafOffset() && (valid() || !branched()))
+      --path.leafOffset();
+    else
+      path.moveLeft(map->height);
+    return *this;
+  }
+
+  /// postdecrement - Dont do that!
+  const_iterator operator--(int) {
+    const_iterator tmp = *this;
+    operator--();
+    return tmp;
+  }
+
+  /// find - Move to the first interval with stop >= x, or end().
+  /// This is a full search from the root, the current position is ignored.
+  void find(KeyT x) {
+    if (branched())
+      treeFind(x);
+    else
+      setRoot(map->rootLeaf().findFrom(0, map->rootSize, x));
+  }
+
+  /// advanceTo - Move to the first interval with stop >= x, or end().
+  /// The search is started from the current position, and no earlier positions
+  /// can be found. This is much faster than find() for small moves.
+  void advanceTo(KeyT x) {
+    if (!valid())
+      return;
+    if (branched())
+      treeAdvanceTo(x);
+    else
+      path.leafOffset() =
+        map->rootLeaf().findFrom(path.leafOffset(), map->rootSize, x);
+  }
+
+};
+
+/// pathFillFind - Complete path by searching for x.
+/// @param x Key to search for.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+const_iterator::pathFillFind(KeyT x) {
+  IntervalMapImpl::NodeRef NR = path.subtree(path.height());
+  for (unsigned i = map->height - path.height() - 1; i; --i) {
+    unsigned p = NR.get<Branch>().safeFind(0, x);
+    path.push(NR, p);
+    NR = NR.subtree(p);
+  }
+  path.push(NR, NR.get<Leaf>().safeFind(0, x));
+}
+
+/// treeFind - Find in a branched tree.
+/// @param x Key to search for.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+const_iterator::treeFind(KeyT x) {
+  setRoot(map->rootBranch().findFrom(0, map->rootSize, x));
+  if (valid())
+    pathFillFind(x);
+}
+
+/// treeAdvanceTo - Find position after the current one.
+/// @param x Key to search for.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+const_iterator::treeAdvanceTo(KeyT x) {
+  // Can we stay on the same leaf node?
+  if (!Traits::stopLess(path.leaf<Leaf>().stop(path.leafSize() - 1), x)) {
+    path.leafOffset() = path.leaf<Leaf>().safeFind(path.leafOffset(), x);
+    return;
+  }
+
+  // Drop the current leaf.
+  path.pop();
+
+  // Search towards the root for a usable subtree.
+  if (path.height()) {
+    for (unsigned l = path.height() - 1; l; --l) {
+      if (!Traits::stopLess(path.node<Branch>(l).stop(path.offset(l)), x)) {
+        // The branch node at l+1 is usable
+        path.offset(l + 1) =
+          path.node<Branch>(l + 1).safeFind(path.offset(l + 1), x);
+        return pathFillFind(x);
+      }
+      path.pop();
+    }
+    // Is the level-1 Branch usable?
+    if (!Traits::stopLess(map->rootBranch().stop(path.offset(0)), x)) {
+      path.offset(1) = path.node<Branch>(1).safeFind(path.offset(1), x);
+      return pathFillFind(x);
+    }
+  }
+
+  // We reached the root.
+  setRoot(map->rootBranch().findFrom(path.offset(0), map->rootSize, x));
+  if (valid())
+    pathFillFind(x);
+}
+
+//===----------------------------------------------------------------------===//
+//---                       IntervalMap::iterator                         ----//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class IntervalMap<KeyT, ValT, N, Traits>::iterator : public const_iterator {
+  friend class IntervalMap;
+  typedef IntervalMapImpl::IdxPair IdxPair;
+
+  explicit iterator(IntervalMap &map) : const_iterator(map) {}
+
+  void setNodeStop(unsigned Level, KeyT Stop);
+  bool insertNode(unsigned Level, IntervalMapImpl::NodeRef Node, KeyT Stop);
+  template <typename NodeT> bool overflow(unsigned Level);
+  void treeInsert(KeyT a, KeyT b, ValT y);
+  void eraseNode(unsigned Level);
+  void treeErase(bool UpdateRoot = true);
+  bool canCoalesceLeft(KeyT Start, ValT x);
+  bool canCoalesceRight(KeyT Stop, ValT x);
+
+public:
+  /// iterator - Create null iterator.
+  iterator() {}
+
+  /// setStart - Move the start of the current interval.
+  /// This may cause coalescing with the previous interval.
+  /// @param a New start key, must not overlap the previous interval.
+  void setStart(KeyT a);
+
+  /// setStop - Move the end of the current interval.
+  /// This may cause coalescing with the following interval.
+  /// @param b New stop key, must not overlap the following interval.
+  void setStop(KeyT b);
+
+  /// setValue - Change the mapped value of the current interval.
+  /// This may cause coalescing with the previous and following intervals.
+  /// @param x New value.
+  void setValue(ValT x);
+
+  /// setStartUnchecked - Move the start of the current interval without
+  /// checking for coalescing or overlaps.
+  /// This should only be used when it is known that coalescing is not required.
+  /// @param a New start key.
+  void setStartUnchecked(KeyT a) { this->unsafeStart() = a; }
+
+  /// setStopUnchecked - Move the end of the current interval without checking
+  /// for coalescing or overlaps.
+  /// This should only be used when it is known that coalescing is not required.
+  /// @param b New stop key.
+  void setStopUnchecked(KeyT b) {
+    this->unsafeStop() = b;
+    // Update keys in branch nodes as well.
+    if (this->path.atLastEntry(this->path.height()))
+      setNodeStop(this->path.height(), b);
+  }
+
+  /// setValueUnchecked - Change the mapped value of the current interval
+  /// without checking for coalescing.
+  /// @param x New value.
+  void setValueUnchecked(ValT x) { this->unsafeValue() = x; }
+
+  /// insert - Insert mapping [a;b] -> y before the current position.
+  void insert(KeyT a, KeyT b, ValT y);
+
+  /// erase - Erase the current interval.
+  void erase();
+
+  iterator &operator++() {
+    const_iterator::operator++();
+    return *this;
+  }
+
+  iterator operator++(int) {
+    iterator tmp = *this;
+    operator++();
+    return tmp;
+  }
+
+  iterator &operator--() {
+    const_iterator::operator--();
+    return *this;
+  }
+
+  iterator operator--(int) {
+    iterator tmp = *this;
+    operator--();
+    return tmp;
+  }
+
+};
+
+/// canCoalesceLeft - Can the current interval coalesce to the left after
+/// changing start or value?
+/// @param Start New start of current interval.
+/// @param Value New value for current interval.
+/// @return True when updating the current interval would enable coalescing.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::canCoalesceLeft(KeyT Start, ValT Value) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+  if (!this->branched()) {
+    unsigned i = P.leafOffset();
+    RootLeaf &Node = P.leaf<RootLeaf>();
+    return i && Node.value(i-1) == Value &&
+                Traits::adjacent(Node.stop(i-1), Start);
+  }
+  // Branched.
+  if (unsigned i = P.leafOffset()) {
+    Leaf &Node = P.leaf<Leaf>();
+    return Node.value(i-1) == Value && Traits::adjacent(Node.stop(i-1), Start);
+  } else if (NodeRef NR = P.getLeftSibling(P.height())) {
+    unsigned i = NR.size() - 1;
+    Leaf &Node = NR.get<Leaf>();
+    return Node.value(i) == Value && Traits::adjacent(Node.stop(i), Start);
+  }
+  return false;
+}
+
+/// canCoalesceRight - Can the current interval coalesce to the right after
+/// changing stop or value?
+/// @param Stop New stop of current interval.
+/// @param Value New value for current interval.
+/// @return True when updating the current interval would enable coalescing.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::canCoalesceRight(KeyT Stop, ValT Value) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+  unsigned i = P.leafOffset() + 1;
+  if (!this->branched()) {
+    if (i >= P.leafSize())
+      return false;
+    RootLeaf &Node = P.leaf<RootLeaf>();
+    return Node.value(i) == Value && Traits::adjacent(Stop, Node.start(i));
+  }
+  // Branched.
+  if (i < P.leafSize()) {
+    Leaf &Node = P.leaf<Leaf>();
+    return Node.value(i) == Value && Traits::adjacent(Stop, Node.start(i));
+  } else if (NodeRef NR = P.getRightSibling(P.height())) {
+    Leaf &Node = NR.get<Leaf>();
+    return Node.value(0) == Value && Traits::adjacent(Stop, Node.start(0));
+  }
+  return false;
+}
+
+/// setNodeStop - Update the stop key of the current node at level and above.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setNodeStop(unsigned Level, KeyT Stop) {
+  // There are no references to the root node, so nothing to update.
+  if (!Level)
+    return;
+  IntervalMapImpl::Path &P = this->path;
+  // Update nodes pointing to the current node.
+  while (--Level) {
+    P.node<Branch>(Level).stop(P.offset(Level)) = Stop;
+    if (!P.atLastEntry(Level))
+      return;
+  }
+  // Update root separately since it has a different layout.
+  P.node<RootBranch>(Level).stop(P.offset(Level)) = Stop;
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setStart(KeyT a) {
+  assert(Traits::stopLess(a, this->stop()) && "Cannot move start beyond stop");
+  KeyT &CurStart = this->unsafeStart();
+  if (!Traits::startLess(a, CurStart) || !canCoalesceLeft(a, this->value())) {
+    CurStart = a;
+    return;
+  }
+  // Coalesce with the interval to the left.
+  --*this;
+  a = this->start();
+  erase();
+  setStartUnchecked(a);
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setStop(KeyT b) {
+  assert(Traits::stopLess(this->start(), b) && "Cannot move stop beyond start");
+  if (Traits::startLess(b, this->stop()) ||
+      !canCoalesceRight(b, this->value())) {
+    setStopUnchecked(b);
+    return;
+  }
+  // Coalesce with interval to the right.
+  KeyT a = this->start();
+  erase();
+  setStartUnchecked(a);
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setValue(ValT x) {
+  setValueUnchecked(x);
+  if (canCoalesceRight(this->stop(), x)) {
+    KeyT a = this->start();
+    erase();
+    setStartUnchecked(a);
+  }
+  if (canCoalesceLeft(this->start(), x)) {
+    --*this;
+    KeyT a = this->start();
+    erase();
+    setStartUnchecked(a);
+  }
+}
+
+/// insertNode - insert a node before the current path at level.
+/// Leave the current path pointing at the new node.
+/// @param Level path index of the node to be inserted.
+/// @param Node The node to be inserted.
+/// @param Stop The last index in the new node.
+/// @return True if the tree height was increased.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::insertNode(unsigned Level, IntervalMapImpl::NodeRef Node, KeyT Stop) {
+  assert(Level && "Cannot insert next to the root");
+  bool SplitRoot = false;
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+
+  if (Level == 1) {
+    // Insert into the root branch node.
+    if (IM.rootSize < RootBranch::Capacity) {
+      IM.rootBranch().insert(P.offset(0), IM.rootSize, Node, Stop);
+      P.setSize(0, ++IM.rootSize);
+      P.reset(Level);
+      return SplitRoot;
+    }
+
+    // We need to split the root while keeping our position.
+    SplitRoot = true;
+    IdxPair Offset = IM.splitRoot(P.offset(0));
+    P.replaceRoot(&IM.rootBranch(), IM.rootSize, Offset);
+
+    // Fall through to insert at the new higher level.
+    ++Level;
+  }
+
+  // When inserting before end(), make sure we have a valid path.
+  P.legalizeForInsert(--Level);
+
+  // Insert into the branch node at Level-1.
+  if (P.size(Level) == Branch::Capacity) {
+    // Branch node is full, handle handle the overflow.
+    assert(!SplitRoot && "Cannot overflow after splitting the root");
+    SplitRoot = overflow<Branch>(Level);
+    Level += SplitRoot;
+  }
+  P.node<Branch>(Level).insert(P.offset(Level), P.size(Level), Node, Stop);
+  P.setSize(Level, P.size(Level) + 1);
+  if (P.atLastEntry(Level))
+    setNodeStop(Level, Stop);
+  P.reset(Level + 1);
+  return SplitRoot;
+}
+
+// insert
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::insert(KeyT a, KeyT b, ValT y) {
+  if (this->branched())
+    return treeInsert(a, b, y);
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+
+  // Try simple root leaf insert.
+  unsigned Size = IM.rootLeaf().insertFrom(P.leafOffset(), IM.rootSize, a, b, y);
+
+  // Was the root node insert successful?
+  if (Size <= RootLeaf::Capacity) {
+    P.setSize(0, IM.rootSize = Size);
+    return;
+  }
+
+  // Root leaf node is full, we must branch.
+  IdxPair Offset = IM.branchRoot(P.leafOffset());
+  P.replaceRoot(&IM.rootBranch(), IM.rootSize, Offset);
+
+  // Now it fits in the new leaf.
+  treeInsert(a, b, y);
+}
+
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::treeInsert(KeyT a, KeyT b, ValT y) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+
+  if (!P.valid())
+    P.legalizeForInsert(this->map->height);
+
+  // Check if this insertion will extend the node to the left.
+  if (P.leafOffset() == 0 && Traits::startLess(a, P.leaf<Leaf>().start(0))) {
+    // Node is growing to the left, will it affect a left sibling node?
+    if (NodeRef Sib = P.getLeftSibling(P.height())) {
+      Leaf &SibLeaf = Sib.get<Leaf>();
+      unsigned SibOfs = Sib.size() - 1;
+      if (SibLeaf.value(SibOfs) == y &&
+          Traits::adjacent(SibLeaf.stop(SibOfs), a)) {
+        // This insertion will coalesce with the last entry in SibLeaf. We can
+        // handle it in two ways:
+        //  1. Extend SibLeaf.stop to b and be done, or
+        //  2. Extend a to SibLeaf, erase the SibLeaf entry and continue.
+        // We prefer 1., but need 2 when coalescing to the right as well.
+        Leaf &CurLeaf = P.leaf<Leaf>();
+        P.moveLeft(P.height());
+        if (Traits::stopLess(b, CurLeaf.start(0)) &&
+            (y != CurLeaf.value(0) || !Traits::adjacent(b, CurLeaf.start(0)))) {
+          // Easy, just extend SibLeaf and we're done.
+          setNodeStop(P.height(), SibLeaf.stop(SibOfs) = b);
+          return;
+        } else {
+          // We have both left and right coalescing. Erase the old SibLeaf entry
+          // and continue inserting the larger interval.
+          a = SibLeaf.start(SibOfs);
+          treeErase(/* UpdateRoot= */false);
+        }
+      }
+    } else {
+      // No left sibling means we are at begin(). Update cached bound.
+      this->map->rootBranchStart() = a;
+    }
+  }
+
+  // When we are inserting at the end of a leaf node, we must update stops.
+  unsigned Size = P.leafSize();
+  bool Grow = P.leafOffset() == Size;
+  Size = P.leaf<Leaf>().insertFrom(P.leafOffset(), Size, a, b, y);
+
+  // Leaf insertion unsuccessful? Overflow and try again.
+  if (Size > Leaf::Capacity) {
+    overflow<Leaf>(P.height());
+    Grow = P.leafOffset() == P.leafSize();
+    Size = P.leaf<Leaf>().insertFrom(P.leafOffset(), P.leafSize(), a, b, y);
+    assert(Size <= Leaf::Capacity && "overflow() didn't make room");
+  }
+
+  // Inserted, update offset and leaf size.
+  P.setSize(P.height(), Size);
+
+  // Insert was the last node entry, update stops.
+  if (Grow)
+    setNodeStop(P.height(), b);
+}
+
+/// erase - erase the current interval and move to the next position.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::erase() {
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+  assert(P.valid() && "Cannot erase end()");
+  if (this->branched())
+    return treeErase();
+  IM.rootLeaf().erase(P.leafOffset(), IM.rootSize);
+  P.setSize(0, --IM.rootSize);
+}
+
+/// treeErase - erase() for a branched tree.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::treeErase(bool UpdateRoot) {
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+  Leaf &Node = P.leaf<Leaf>();
+
+  // Nodes are not allowed to become empty.
+  if (P.leafSize() == 1) {
+    IM.deleteNode(&Node);
+    eraseNode(IM.height);
+    // Update rootBranchStart if we erased begin().
+    if (UpdateRoot && IM.branched() && P.valid() && P.atBegin())
+      IM.rootBranchStart() = P.leaf<Leaf>().start(0);
+    return;
+  }
+
+  // Erase current entry.
+  Node.erase(P.leafOffset(), P.leafSize());
+  unsigned NewSize = P.leafSize() - 1;
+  P.setSize(IM.height, NewSize);
+  // When we erase the last entry, update stop and move to a legal position.
+  if (P.leafOffset() == NewSize) {
+    setNodeStop(IM.height, Node.stop(NewSize - 1));
+    P.moveRight(IM.height);
+  } else if (UpdateRoot && P.atBegin())
+    IM.rootBranchStart() = P.leaf<Leaf>().start(0);
+}
+
+/// eraseNode - Erase the current node at Level from its parent and move path to
+/// the first entry of the next sibling node.
+/// The node must be deallocated by the caller.
+/// @param Level 1..height, the root node cannot be erased.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::eraseNode(unsigned Level) {
+  assert(Level && "Cannot erase root node");
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+
+  if (--Level == 0) {
+    IM.rootBranch().erase(P.offset(0), IM.rootSize);
+    P.setSize(0, --IM.rootSize);
+    // If this cleared the root, switch to height=0.
+    if (IM.empty()) {
+      IM.switchRootToLeaf();
+      this->setRoot(0);
+      return;
+    }
+  } else {
+    // Remove node ref from branch node at Level.
+    Branch &Parent = P.node<Branch>(Level);
+    if (P.size(Level) == 1) {
+      // Branch node became empty, remove it recursively.
+      IM.deleteNode(&Parent);
+      eraseNode(Level);
+    } else {
+      // Branch node won't become empty.
+      Parent.erase(P.offset(Level), P.size(Level));
+      unsigned NewSize = P.size(Level) - 1;
+      P.setSize(Level, NewSize);
+      // If we removed the last branch, update stop and move to a legal pos.
+      if (P.offset(Level) == NewSize) {
+        setNodeStop(Level, Parent.stop(NewSize - 1));
+        P.moveRight(Level);
+      }
+    }
+  }
+  // Update path cache for the new right sibling position.
+  if (P.valid()) {
+    P.reset(Level + 1);
+    P.offset(Level + 1) = 0;
+  }
+}
+
+/// overflow - Distribute entries of the current node evenly among
+/// its siblings and ensure that the current node is not full.
+/// This may require allocating a new node.
+/// @param NodeT The type of node at Level (Leaf or Branch).
+/// @param Level path index of the overflowing node.
+/// @return True when the tree height was changed.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+template <typename NodeT>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::overflow(unsigned Level) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+  unsigned CurSize[4];
+  NodeT *Node[4];
+  unsigned Nodes = 0;
+  unsigned Elements = 0;
+  unsigned Offset = P.offset(Level);
+
+  // Do we have a left sibling?
+  NodeRef LeftSib = P.getLeftSibling(Level);
+  if (LeftSib) {
+    Offset += Elements = CurSize[Nodes] = LeftSib.size();
+    Node[Nodes++] = &LeftSib.get<NodeT>();
+  }
+
+  // Current node.
+  Elements += CurSize[Nodes] = P.size(Level);
+  Node[Nodes++] = &P.node<NodeT>(Level);
+
+  // Do we have a right sibling?
+  NodeRef RightSib = P.getRightSibling(Level);
+  if (RightSib) {
+    Elements += CurSize[Nodes] = RightSib.size();
+    Node[Nodes++] = &RightSib.get<NodeT>();
+  }
+
+  // Do we need to allocate a new node?
+  unsigned NewNode = 0;
+  if (Elements + 1 > Nodes * NodeT::Capacity) {
+    // Insert NewNode at the penultimate position, or after a single node.
+    NewNode = Nodes == 1 ? 1 : Nodes - 1;
+    CurSize[Nodes] = CurSize[NewNode];
+    Node[Nodes] = Node[NewNode];
+    CurSize[NewNode] = 0;
+    Node[NewNode] = this->map->newNode<NodeT>();
+    ++Nodes;
+  }
+
+  // Compute the new element distribution.
+  unsigned NewSize[4];
+  IdxPair NewOffset = distribute(Nodes, Elements, NodeT::Capacity,
+                                 CurSize, NewSize, Offset, true);
+  adjustSiblingSizes(Node, Nodes, CurSize, NewSize);
+
+  // Move current location to the leftmost node.
+  if (LeftSib)
+    P.moveLeft(Level);
+
+  // Elements have been rearranged, now update node sizes and stops.
+  bool SplitRoot = false;
+  unsigned Pos = 0;
+  for (;;) {
+    KeyT Stop = Node[Pos]->stop(NewSize[Pos]-1);
+    if (NewNode && Pos == NewNode) {
+      SplitRoot = insertNode(Level, NodeRef(Node[Pos], NewSize[Pos]), Stop);
+      Level += SplitRoot;
+    } else {
+      P.setSize(Level, NewSize[Pos]);
+      setNodeStop(Level, Stop);
+    }
+    if (Pos + 1 == Nodes)
+      break;
+    P.moveRight(Level);
+    ++Pos;
+  }
+
+  // Where was I? Find NewOffset.
+  while(Pos != NewOffset.first) {
+    P.moveLeft(Level);
+    --Pos;
+  }
+  P.offset(Level) = NewOffset.second;
+  return SplitRoot;
+}
+
+//===----------------------------------------------------------------------===//
+//---                       IntervalMapOverlaps                           ----//
+//===----------------------------------------------------------------------===//
+
+/// IntervalMapOverlaps - Iterate over the overlaps of mapped intervals in two
+/// IntervalMaps. The maps may be different, but the KeyT and Traits types
+/// should be the same.
+///
+/// Typical uses:
+///
+/// 1. Test for overlap:
+///    bool overlap = IntervalMapOverlaps(a, b).valid();
+///
+/// 2. Enumerate overlaps:
+///    for (IntervalMapOverlaps I(a, b); I.valid() ; ++I) { ... }
+///
+template <typename MapA, typename MapB>
+class IntervalMapOverlaps {
+  typedef typename MapA::KeyType KeyType;
+  typedef typename MapA::KeyTraits Traits;
+  typename MapA::const_iterator posA;
+  typename MapB::const_iterator posB;
+
+  /// advance - Move posA and posB forward until reaching an overlap, or until
+  /// either meets end.
+  /// Don't move the iterators if they are already overlapping.
+  void advance() {
+    if (!valid())
+      return;
+
+    if (Traits::stopLess(posA.stop(), posB.start())) {
+      // A ends before B begins. Catch up.
+      posA.advanceTo(posB.start());
+      if (!posA.valid() || !Traits::stopLess(posB.stop(), posA.start()))
+        return;
+    } else if (Traits::stopLess(posB.stop(), posA.start())) {
+      // B ends before A begins. Catch up.
+      posB.advanceTo(posA.start());
+      if (!posB.valid() || !Traits::stopLess(posA.stop(), posB.start()))
+        return;
+    } else
+      // Already overlapping.
+      return;
+
+    for (;;) {
+      // Make a.end > b.start.
+      posA.advanceTo(posB.start());
+      if (!posA.valid() || !Traits::stopLess(posB.stop(), posA.start()))
+        return;
+      // Make b.end > a.start.
+      posB.advanceTo(posA.start());
+      if (!posB.valid() || !Traits::stopLess(posA.stop(), posB.start()))
+        return;
+    }
+  }
+
+public:
+  /// IntervalMapOverlaps - Create an iterator for the overlaps of a and b.
+  IntervalMapOverlaps(const MapA &a, const MapB &b)
+    : posA(b.empty() ? a.end() : a.find(b.start())),
+      posB(posA.valid() ? b.find(posA.start()) : b.end()) { advance(); }
+
+  /// valid - Return true if iterator is at an overlap.
+  bool valid() const {
+    return posA.valid() && posB.valid();
+  }
+
+  /// a - access the left hand side in the overlap.
+  const typename MapA::const_iterator &a() const { return posA; }
+
+  /// b - access the right hand side in the overlap.
+  const typename MapB::const_iterator &b() const { return posB; }
+
+  /// start - Beginning of the overlapping interval.
+  KeyType start() const {
+    KeyType ak = a().start();
+    KeyType bk = b().start();
+    return Traits::startLess(ak, bk) ? bk : ak;
+  }
+
+  /// stop - End of the overlapping interval.
+  KeyType stop() const {
+    KeyType ak = a().stop();
+    KeyType bk = b().stop();
+    return Traits::startLess(ak, bk) ? ak : bk;
+  }
+
+  /// skipA - Move to the next overlap that doesn't involve a().
+  void skipA() {
+    ++posA;
+    advance();
+  }
+
+  /// skipB - Move to the next overlap that doesn't involve b().
+  void skipB() {
+    ++posB;
+    advance();
+  }
+
+  /// Preincrement - Move to the next overlap.
+  IntervalMapOverlaps &operator++() {
+    // Bump the iterator that ends first. The other one may have more overlaps.
+    if (Traits::startLess(posB.stop(), posA.stop()))
+      skipB();
+    else
+      skipA();
+    return *this;
+  }
+
+  /// advanceTo - Move to the first overlapping interval with
+  /// stopLess(x, stop()).
+  void advanceTo(KeyType x) {
+    if (!valid())
+      return;
+    // Make sure advanceTo sees monotonic keys.
+    if (Traits::stopLess(posA.stop(), x))
+      posA.advanceTo(x);
+    if (Traits::stopLess(posB.stop(), x))
+      posB.advanceTo(x);
+    advance();
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index 34e54a07a0ef..ee8b69f3d12f 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -61,6 +61,60 @@ template <typename T>
 struct simplify_type<Optional<T> >
   : public simplify_type<const Optional<T> > {};
 
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator==(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator!=(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator<(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator<=(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator>=(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator>(const Optional<T> &X, const Optional<U> &Y);
+
 } // end llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 64f4a7cee4b9..85dbba2b4a4a 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -91,6 +91,13 @@ public:
     Value |= IntVal << IntShift;  // Set new integer.
   }
 
+  PointerTy const *getAddrOfPointer() const {
+    assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&
+           "Can only return the address if IntBits is cleared and "
+           "PtrTraits doesn't change the pointer");
+    return reinterpret_cast<PointerTy const *>(&Value);
+  }
+
   void *getOpaqueValue() const { return reinterpret_cast<void*>(Value); }
   void setFromOpaqueValue(void *Val) { Value = reinterpret_cast<intptr_t>(Val);}
 
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 3a514b562697..61de042b0ff2 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -107,6 +107,18 @@ namespace llvm {
       if (is<T>()) return get<T>();
       return T();
     }
+
+    /// \brief If the union is set to the first pointer type we can get an
+    /// address pointing to it.
+    template <typename T>
+    PT1 const *getAddrOf() const {
+      assert(is<PT1>() && "Val is not the first pointer");
+      assert(get<PT1>() == Val.getPointer() &&
+         "Can't get the address because PointerLikeTypeTraits changes the ptr");
+      T const *can_only_get_address_of_first_pointer_type
+                        = reinterpret_cast<PT1 const *>(Val.getAddrOfPointer());
+      return can_only_get_address_of_first_pointer_type;
+    }
     
     /// Assignment operators - Allow assigning into this union from either
     /// pointer type, setting the discriminator to remember what it came from.
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index 47e5b2bd4ad0..e3b499488d0c 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -56,8 +56,7 @@ class po_iterator : public std::iterator<std::forward_iterator_tag,
   void traverseChild() {
     while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
       NodeType *BB = *VisitStack.back().second++;
-      if (!this->Visited.count(BB)) {  // If the block is not visited...
-        this->Visited.insert(BB);
+      if (this->Visited.insert(BB)) {  // If the block is not visited...
         VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
       }
     }
@@ -72,8 +71,7 @@ class po_iterator : public std::iterator<std::forward_iterator_tag,
 
   inline po_iterator(NodeType *BB, SetType &S) :
     po_iterator_storage<SetType, ExtStorage>(S) {
-    if(!S.count(BB)) {
-      this->Visited.insert(BB);
+    if (this->Visited.insert(BB)) {
       VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
       traverseChild();
     }
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index c49d599cf38f..3e93cfe914fa 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -60,7 +60,7 @@ class scc_iterator
   // First element is basic block pointer, second is the 'next child' to visit
   std::vector<std::pair<NodeType *, ChildItTy> > VisitStack;
 
-  // MinVistNumStack - Stack holding the "min" values for each node in the DFS.
+  // MinVisitNumStack - Stack holding the "min" values for each node in the DFS.
   // This is used to track the minimum uplink values for all children of
   // the corresponding node on the VisitStack.
   std::vector<unsigned> MinVisitNumStack;
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index c96ad19707f3..af3c482043b1 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -31,25 +31,23 @@
 #ifndef LLVM_ADT_SCOPEDHASHTABLE_H
 #define LLVM_ADT_SCOPEDHASHTABLE_H
 
-#include <cassert>
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Allocator.h"
 
 namespace llvm {
 
-template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
+template <typename K, typename V, typename KInfo = DenseMapInfo<K>,
+          typename AllocatorTy = MallocAllocator>
 class ScopedHashTable;
 
-template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
+template <typename K, typename V>
 class ScopedHashTableVal {
   ScopedHashTableVal *NextInScope;
   ScopedHashTableVal *NextForKey;
   K Key;
   V Val;
+  ScopedHashTableVal(const K &key, const V &val) : Key(key), Val(val) {}
 public:
-  ScopedHashTableVal(ScopedHashTableVal *nextInScope,
-                     ScopedHashTableVal *nextForKey, const K &key, const V &val)
-    : NextInScope(nextInScope), NextForKey(nextForKey), Key(key), Val(val) {
-  }
 
   const K &getKey() const { return Key; }
   const V &getValue() const { return Val; }
@@ -57,33 +55,53 @@ public:
 
   ScopedHashTableVal *getNextForKey() { return NextForKey; }
   const ScopedHashTableVal *getNextForKey() const { return NextForKey; }
-public:
   ScopedHashTableVal *getNextInScope() { return NextInScope; }
+  
+  template <typename AllocatorTy>
+  static ScopedHashTableVal *Create(ScopedHashTableVal *nextInScope,
+                                    ScopedHashTableVal *nextForKey,
+                                    const K &key, const V &val,
+                                    AllocatorTy &Allocator) {
+    ScopedHashTableVal *New = Allocator.template Allocate<ScopedHashTableVal>();
+    // Set up the value.
+    new (New) ScopedHashTableVal(key, val);
+    New->NextInScope = nextInScope;
+    New->NextForKey = nextForKey; 
+    return New;
+  }
+  
+  template <typename AllocatorTy>
+  void Destroy(AllocatorTy &Allocator) {
+    // Free memory referenced by the item.
+    this->~ScopedHashTableVal();
+    Allocator.Deallocate(this);
+  }
 };
 
-template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
+template <typename K, typename V, typename KInfo = DenseMapInfo<K>,
+          typename AllocatorTy = MallocAllocator>
 class ScopedHashTableScope {
   /// HT - The hashtable that we are active for.
-  ScopedHashTable<K, V, KInfo> &HT;
+  ScopedHashTable<K, V, KInfo, AllocatorTy> &HT;
 
   /// PrevScope - This is the scope that we are shadowing in HT.
   ScopedHashTableScope *PrevScope;
 
   /// LastValInScope - This is the last value that was inserted for this scope
   /// or null if none have been inserted yet.
-  ScopedHashTableVal<K, V, KInfo> *LastValInScope;
+  ScopedHashTableVal<K, V> *LastValInScope;
   void operator=(ScopedHashTableScope&);       // DO NOT IMPLEMENT
   ScopedHashTableScope(ScopedHashTableScope&); // DO NOT IMPLEMENT
 public:
-  ScopedHashTableScope(ScopedHashTable<K, V, KInfo> &HT);
+  ScopedHashTableScope(ScopedHashTable<K, V, KInfo, AllocatorTy> &HT);
   ~ScopedHashTableScope();
 
 private:
-  friend class ScopedHashTable<K, V, KInfo>;
-  ScopedHashTableVal<K, V, KInfo> *getLastValInScope() {
+  friend class ScopedHashTable<K, V, KInfo, AllocatorTy>;
+  ScopedHashTableVal<K, V> *getLastValInScope() {
     return LastValInScope;
   }
-  void setLastValInScope(ScopedHashTableVal<K, V, KInfo> *Val) {
+  void setLastValInScope(ScopedHashTableVal<K, V> *Val) {
     LastValInScope = Val;
   }
 };
@@ -91,9 +109,9 @@ private:
 
 template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
 class ScopedHashTableIterator {
-  ScopedHashTableVal<K, V, KInfo> *Node;
+  ScopedHashTableVal<K, V> *Node;
 public:
-  ScopedHashTableIterator(ScopedHashTableVal<K, V, KInfo> *node) : Node(node) {}
+  ScopedHashTableIterator(ScopedHashTableVal<K, V> *node) : Node(node) {}
 
   V &operator*() const {
     assert(Node && "Dereference end()");
@@ -121,26 +139,40 @@ public:
 };
 
 
-template <typename K, typename V, typename KInfo>
+template <typename K, typename V, typename KInfo, typename AllocatorTy>
 class ScopedHashTable {
-  DenseMap<K, ScopedHashTableVal<K, V, KInfo>*, KInfo> TopLevelMap;
-  ScopedHashTableScope<K, V, KInfo> *CurScope;
+  typedef ScopedHashTableVal<K, V> ValTy;
+  DenseMap<K, ValTy*, KInfo> TopLevelMap;
+  ScopedHashTableScope<K, V, KInfo, AllocatorTy> *CurScope;
+  
+  AllocatorTy Allocator;
+  
   ScopedHashTable(const ScopedHashTable&); // NOT YET IMPLEMENTED
   void operator=(const ScopedHashTable&);  // NOT YET IMPLEMENTED
-  friend class ScopedHashTableScope<K, V, KInfo>;
+  friend class ScopedHashTableScope<K, V, KInfo, AllocatorTy>;
 public:
   ScopedHashTable() : CurScope(0) {}
+  ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {}
   ~ScopedHashTable() {
     assert(CurScope == 0 && TopLevelMap.empty() && "Scope imbalance!");
   }
+  
+  /// ScopeTy - This is a helpful typedef that allows clients to get easy access
+  /// to the name of the scope for this hash table.
+  typedef ScopedHashTableScope<K, V, KInfo, AllocatorTy> ScopeTy;
+
+  /// Access to the allocator.
+  typedef typename ReferenceAdder<AllocatorTy>::result AllocatorRefTy;
+  typedef typename ReferenceAdder<const AllocatorTy>::result AllocatorCRefTy;
+  AllocatorRefTy getAllocator() { return Allocator; }
+  AllocatorCRefTy getAllocator() const { return Allocator; }
 
   bool count(const K &Key) const {
     return TopLevelMap.count(Key);
   }
 
   V lookup(const K &Key) {
-    typename DenseMap<K, ScopedHashTableVal<K, V, KInfo>*, KInfo>::iterator
-      I = TopLevelMap.find(Key);
+    typename DenseMap<K, ValTy*, KInfo>::iterator I = TopLevelMap.find(Key);
     if (I != TopLevelMap.end())
       return I->second->getValue();
       
@@ -150,10 +182,10 @@ public:
   void insert(const K &Key, const V &Val) {
     assert(CurScope && "No scope active!");
 
-    ScopedHashTableVal<K, V, KInfo> *&KeyEntry = TopLevelMap[Key];
+    ScopedHashTableVal<K, V> *&KeyEntry = TopLevelMap[Key];
 
-    KeyEntry= new ScopedHashTableVal<K, V, KInfo>(CurScope->getLastValInScope(),
-                                                  KeyEntry, Key, Val);
+    KeyEntry = ValTy::Create(CurScope->getLastValInScope(), KeyEntry, Key, Val,
+                             Allocator);
     CurScope->setLastValInScope(KeyEntry);
   }
 
@@ -162,7 +194,7 @@ public:
   iterator end() { return iterator(0); }
 
   iterator begin(const K &Key) {
-    typename DenseMap<K, ScopedHashTableVal<K, V, KInfo>*, KInfo>::iterator I =
+    typename DenseMap<K, ValTy*, KInfo>::iterator I =
       TopLevelMap.find(Key);
     if (I == TopLevelMap.end()) return end();
     return iterator(I->second);
@@ -171,29 +203,28 @@ public:
 
 /// ScopedHashTableScope ctor - Install this as the current scope for the hash
 /// table.
-template <typename K, typename V, typename KInfo>
-ScopedHashTableScope<K, V, KInfo>::
-  ScopedHashTableScope(ScopedHashTable<K, V, KInfo> &ht) : HT(ht) {
+template <typename K, typename V, typename KInfo, typename Allocator>
+ScopedHashTableScope<K, V, KInfo, Allocator>::
+  ScopedHashTableScope(ScopedHashTable<K, V, KInfo, Allocator> &ht) : HT(ht) {
   PrevScope = HT.CurScope;
   HT.CurScope = this;
   LastValInScope = 0;
 }
 
-template <typename K, typename V, typename KInfo>
-ScopedHashTableScope<K, V, KInfo>::~ScopedHashTableScope() {
+template <typename K, typename V, typename KInfo, typename Allocator>
+ScopedHashTableScope<K, V, KInfo, Allocator>::~ScopedHashTableScope() {
   assert(HT.CurScope == this && "Scope imbalance!");
   HT.CurScope = PrevScope;
 
   // Pop and delete all values corresponding to this scope.
-  while (ScopedHashTableVal<K, V, KInfo> *ThisEntry = LastValInScope) {
+  while (ScopedHashTableVal<K, V> *ThisEntry = LastValInScope) {
     // Pop this value out of the TopLevelMap.
     if (ThisEntry->getNextForKey() == 0) {
       assert(HT.TopLevelMap[ThisEntry->getKey()] == ThisEntry &&
              "Scope imbalance!");
       HT.TopLevelMap.erase(ThisEntry->getKey());
     } else {
-      ScopedHashTableVal<K, V, KInfo> *&KeyEntry =
-        HT.TopLevelMap[ThisEntry->getKey()];
+      ScopedHashTableVal<K, V> *&KeyEntry = HT.TopLevelMap[ThisEntry->getKey()];
       assert(KeyEntry == ThisEntry && "Scope imbalance!");
       KeyEntry = ThisEntry->getNextForKey();
     }
@@ -202,7 +233,7 @@ ScopedHashTableScope<K, V, KInfo>::~ScopedHashTableScope() {
     LastValInScope = ThisEntry->getNextInScope();
 
     // Delete this entry.
-    delete ThisEntry;
+    ThisEntry->Destroy(HT.getAllocator());
   }
 }
 
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index bf8286c1d840..abe20676d54d 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -114,13 +114,15 @@ public:
   }
 
   /// @brief Remove an item from the set vector.
-  void remove(const value_type& X) {
+  bool remove(const value_type& X) {
     if (set_.erase(X)) {
       typename vector_type::iterator I =
         std::find(vector_.begin(), vector_.end(), X);
       assert(I != vector_.end() && "Corrupted SetVector instances!");
       vector_.erase(I);
+      return true;
     }
+    return false;
   }
 
 
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 3441d0a90c9b..b15b3ee0418f 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -187,6 +187,13 @@ public:
     return getPointer()->any();
   }
 
+  /// all - Returns true if all bits are set.
+  bool all() const {
+    if (isSmall())
+      return getSmallBits() == (uintptr_t(1) << getSmallSize()) - 1;
+    return getPointer()->all();
+  }
+
   /// none - Returns true if none of the bits are set.
   bool none() const {
     if (isSmall())
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 424bdba5a20e..ff32ba87a264 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -16,9 +16,10 @@
 #define LLVM_ADT_SMALLPTRSET_H
 
 #include <cassert>
+#include <cstddef>
 #include <cstring>
 #include <iterator>
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 
 namespace llvm {
@@ -56,7 +57,7 @@ protected:
   /// it, so that the end iterator actually points to valid memory.
   unsigned CurArraySize;
 
-  // If small, this is # elts allocated consequtively
+  // If small, this is # elts allocated consecutively
   unsigned NumElements;
   unsigned NumTombstones;
 
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index 05bd8a42c67f..da264164821f 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -27,6 +27,9 @@ public:
   // Default ctor - Initialize to empty.
   SmallString() {}
 
+  // Initialize from a StringRef.
+  SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {}
+
   // Initialize with a range.
   template<typename ItTy>
   SmallString(ItTy S, ItTy E) : SmallVector<char, InternalLen>(S, E) {}
@@ -38,15 +41,16 @@ public:
   // Extra methods.
   StringRef str() const { return StringRef(this->begin(), this->size()); }
 
-  // Implicit conversion to StringRef.
-  operator StringRef() const { return str(); }
-
-  const char *c_str() {
+  // TODO: Make this const, if it's safe...
+  const char* c_str() {
     this->push_back(0);
     this->pop_back();
     return this->data();
   }
 
+  // Implicit conversion to StringRef.
+  operator StringRef() const { return str(); }
+
   // Extra operators.
   const SmallString &operator=(StringRef RHS) {
     this->clear();
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index fec6bcd628cc..8b0a13d6ed74 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -20,6 +20,7 @@
 #include <cstddef>
 #include <cstdlib>
 #include <cstring>
+#include <iterator>
 #include <memory>
 
 #ifdef _MSC_VER
@@ -57,19 +58,13 @@ protected:
   // Allocate raw space for N elements of type T.  If T has a ctor or dtor, we
   // don't want it to be automatically run, so we need to represent the space as
   // something else.  An array of char would work great, but might not be
-  // aligned sufficiently.  Instead, we either use GCC extensions, or some
-  // number of union instances for the space, which guarantee maximal alignment.
-  struct U {
-#ifdef __GNUC__
-    char X __attribute__((aligned));
-#else
-    union {
-      double D;
-      long double LD;
-      long long L;
-      void *P;
-    } X;
-#endif
+  // aligned sufficiently.  Instead we use some number of union instances for
+  // the space, which guarantee maximal alignment.
+  union U {
+    double D;
+    long double LD;
+    long long L;
+    void *P;
   } FirstEl;
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
 
@@ -94,7 +89,7 @@ protected:
   }
 
   /// grow_pod - This is an implementation of the grow() method which only works
-  /// on POD-like datatypes and is out of line to reduce code duplication.
+  /// on POD-like data types and is out of line to reduce code duplication.
   void grow_pod(size_t MinSizeInBytes, size_t TSize);
 
 public:
@@ -269,7 +264,7 @@ public:
 template <typename T>
 class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
   typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
-  
+
   SmallVectorImpl(const SmallVectorImpl&); // DISABLED.
 public:
   typedef typename SuperClass::iterator iterator;
@@ -346,7 +341,6 @@ public:
     return Result;
   }
 
-
   void swap(SmallVectorImpl &RHS);
 
   /// append - Add the specified range to the end of the SmallVector.
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 0862981887ab..d977136b2fc1 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -17,7 +17,7 @@
 
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index 3a1319f1090c..f137ea21d058 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -26,7 +26,7 @@
 #ifndef LLVM_ADT_STATISTIC_H
 #define LLVM_ADT_STATISTIC_H
 
-#include "llvm/System/Atomic.h"
+#include "llvm/Support/Atomic.h"
 
 namespace llvm {
 class raw_ostream;
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 3c53adee63c8..acbed66ef401 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_ADT_STRINGEXTRAS_H
 #define LLVM_ADT_STRINGEXTRAS_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/StringRef.h"
 #include <cctype>
@@ -25,10 +25,11 @@
 namespace llvm {
 template<typename T> class SmallVectorImpl;
 
-/// hexdigit - Return the (uppercase) hexadecimal character for the
+/// hexdigit - Return the hexadecimal character for the
 /// given number \arg X (which should be less than 16).
-static inline char hexdigit(unsigned X) {
-  return X < 10 ? '0' + X : 'A' + X - 10;
+static inline char hexdigit(unsigned X, bool LowerCase = false) {
+  const char HexChar = LowerCase ? 'a' : 'A';
+  return X < 10 ? '0' + X : HexChar + X - 10;
 }
 
 /// utohex_buffer - Emit the specified number into the buffer specified by
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 59ff6aa4f6aa..bad0e6f5136a 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -137,8 +137,8 @@ public:
   StringMapEntry(unsigned strLen, const ValueTy &V)
     : StringMapEntryBase(strLen), second(V) {}
 
-  StringRef getKey() const { 
-    return StringRef(getKeyData(), getKeyLength()); 
+  StringRef getKey() const {
+    return StringRef(getKeyData(), getKeyLength());
   }
 
   const ValueTy &getValue() const { return second; }
@@ -167,7 +167,7 @@ public:
 
     unsigned AllocSize = static_cast<unsigned>(sizeof(StringMapEntry))+
       KeyLength+1;
-    unsigned Alignment = alignof<StringMapEntry>();
+    unsigned Alignment = alignOf<StringMapEntry>();
 
     StringMapEntry *NewItem =
       static_cast<StringMapEntry*>(Allocator.Allocate(AllocSize,Alignment));
@@ -216,14 +216,14 @@ public:
   static const StringMapEntry &GetStringMapEntryFromValue(const ValueTy &V) {
     return GetStringMapEntryFromValue(const_cast<ValueTy&>(V));
   }
-  
+
   /// GetStringMapEntryFromKeyData - Given key data that is known to be embedded
   /// into a StringMapEntry, return the StringMapEntry itself.
   static StringMapEntry &GetStringMapEntryFromKeyData(const char *KeyData) {
     char *Ptr = const_cast<char*>(KeyData) - sizeof(StringMapEntry<ValueTy>);
     return *reinterpret_cast<StringMapEntry*>(Ptr);
   }
-  
+
 
   /// Destroy - Destroy this StringMapEntry, releasing memory back to the
   /// specified allocator.
@@ -254,7 +254,7 @@ public:
   StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
   explicit StringMap(unsigned InitialSize)
     : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
-  
+
   explicit StringMap(AllocatorTy A)
     : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), Allocator(A) {}
 
@@ -262,16 +262,19 @@ public:
     : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
     assert(RHS.empty() &&
            "Copy ctor from non-empty stringmap not implemented yet!");
+    (void)RHS;
   }
   void operator=(const StringMap &RHS) {
     assert(RHS.empty() &&
            "assignment from non-empty stringmap not implemented yet!");
+    (void)RHS;
     clear();
   }
 
-
-  AllocatorTy &getAllocator() { return Allocator; }
-  const AllocatorTy &getAllocator() const { return Allocator; }
+  typedef typename ReferenceAdder<AllocatorTy>::result AllocatorRefTy;
+  typedef typename ReferenceAdder<const AllocatorTy>::result AllocatorCRefTy;
+  AllocatorRefTy getAllocator() { return Allocator; }
+  AllocatorCRefTy getAllocator() const { return Allocator; }
 
   typedef const char* key_type;
   typedef ValueTy mapped_type;
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 8386d3ee428b..1766d2b9f2d0 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -132,7 +132,7 @@ namespace llvm {
     /// numbers.
     int compare_numeric(StringRef RHS) const;
 
-    /// \brief Determine the edit distance between this string and another 
+    /// \brief Determine the edit distance between this string and another
     /// string.
     ///
     /// \param Other the string to compare this string against.
@@ -142,11 +142,16 @@ namespace llvm {
     /// operation, rather than as two operations (an insertion and a
     /// removal).
     ///
+    /// \param MaxEditDistance If non-zero, the maximum edit distance that
+    /// this routine is allowed to compute. If the edit distance will exceed
+    /// that maximum, returns \c MaxEditDistance+1.
+    ///
     /// \returns the minimum number of character insertions, removals,
     /// or (if \p AllowReplacements is \c true) replacements needed to
     /// transform one of the given strings into the other. If zero,
     /// the strings are identical.
-    unsigned edit_distance(StringRef Other, bool AllowReplacements = true);
+    unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
+                           unsigned MaxEditDistance = 0);
 
     /// str - Get the contents as an std::string.
     std::string str() const {
@@ -251,6 +256,18 @@ namespace llvm {
     /// Note: O(size() + Chars.size())
     size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
 
+    /// find_last_of - Find the last character in the string that is \arg C, or
+    /// npos if not found.
+    size_type find_last_of(char C, size_t From = npos) const {
+      return rfind(C, From);
+    }
+
+    /// find_last_of - Find the last character in the string that is in \arg C,
+    /// or npos if not found.
+    ///
+    /// Note: O(size() + Chars.size())
+    size_type find_last_of(StringRef Chars, size_t From = npos) const;
+
     /// @}
     /// @name Helpful Algorithms
     /// @{
@@ -432,6 +449,10 @@ namespace llvm {
 
   /// @}
 
+  // StringRefs can be treated like a POD type.
+  template <typename T> struct isPodLike;
+  template <> struct isPodLike<StringRef> { static const bool value = true; };
+
 }
 
 #endif
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 8dca3c1cfb1b..e6dcc23258f2 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -45,7 +45,7 @@ class Triple {
 public:
   enum ArchType {
     UnknownArch,
-    
+
     alpha,   // Alpha: alpha
     arm,     // ARM; arm, armv.*, xscale
     bfin,    // Blackfin: bfin
@@ -53,7 +53,6 @@ public:
     mips,    // MIPS: mips, mipsallegrex
     mipsel,  // MIPSEL: mipsel, mipsallegrexel, psp
     msp430,  // MSP430: msp430
-    pic16,   // PIC16: pic16
     ppc,     // PPC: powerpc
     ppc64,   // PPC64: powerpc64, ppu
     sparc,   // Sparc: sparc
@@ -65,13 +64,14 @@ public:
     x86_64,  // X86-64: amd64, x86_64
     xcore,   // XCore: xcore
     mblaze,  // MBlaze: mblaze
+    ptx,     // PTX: ptx
 
     InvalidArch
   };
   enum VendorType {
     UnknownVendor,
 
-    Apple, 
+    Apple,
     PC
   };
   enum OSType {
@@ -84,8 +84,7 @@ public:
     FreeBSD,
     Linux,
     Lv2,        // PS3
-    MinGW32,
-    MinGW64,
+    MinGW32,    // i*86-pc-mingw32, *-w64-mingw32
     NetBSD,
     OpenBSD,
     Psp,
@@ -94,7 +93,15 @@ public:
     Haiku,
     Minix
   };
-  
+  enum EnvironmentType {
+    UnknownEnvironment,
+
+    GNU,
+    GNUEABI,
+    EABI,
+    MachO
+  };
+
 private:
   std::string Data;
 
@@ -107,16 +114,20 @@ private:
   /// The parsed OS type.
   mutable OSType OS;
 
+  /// The parsed Environment type.
+  mutable EnvironmentType Environment;
+
   bool isInitialized() const { return Arch != InvalidArch; }
   static ArchType ParseArch(StringRef ArchName);
   static VendorType ParseVendor(StringRef VendorName);
   static OSType ParseOS(StringRef OSName);
+  static EnvironmentType ParseEnvironment(StringRef EnvironmentName);
   void Parse() const;
 
 public:
   /// @name Constructors
   /// @{
-  
+
   Triple() : Data(), Arch(InvalidArch) {}
   explicit Triple(StringRef Str) : Data(Str), Arch(InvalidArch) {}
   explicit Triple(StringRef ArchStr, StringRef VendorStr, StringRef OSStr)
@@ -127,6 +138,17 @@ public:
     Data += OSStr;
   }
 
+  explicit Triple(StringRef ArchStr, StringRef VendorStr, StringRef OSStr,
+    StringRef EnvironmentStr)
+    : Data(ArchStr), Arch(InvalidArch) {
+    Data += '-';
+    Data += VendorStr;
+    Data += '-';
+    Data += OSStr;
+    Data += '-';
+    Data += EnvironmentStr;
+  }
+
   /// @}
   /// @name Normalization
   /// @{
@@ -140,22 +162,22 @@ public:
   /// @}
   /// @name Typed Component Access
   /// @{
-  
+
   /// getArch - Get the parsed architecture type of this triple.
-  ArchType getArch() const { 
-    if (!isInitialized()) Parse(); 
+  ArchType getArch() const {
+    if (!isInitialized()) Parse();
     return Arch;
   }
-  
+
   /// getVendor - Get the parsed vendor type of this triple.
-  VendorType getVendor() const { 
-    if (!isInitialized()) Parse(); 
+  VendorType getVendor() const {
+    if (!isInitialized()) Parse();
     return Vendor;
   }
-  
+
   /// getOS - Get the parsed operating system type of this triple.
-  OSType getOS() const { 
-    if (!isInitialized()) Parse(); 
+  OSType getOS() const {
+    if (!isInitialized()) Parse();
     return OS;
   }
 
@@ -165,6 +187,12 @@ public:
     return getEnvironmentName() != "";
   }
 
+  /// getEnvironment - Get the parsed environment type of this triple.
+  EnvironmentType getEnvironment() const {
+    if (!isInitialized()) Parse();
+    return Environment;
+  }
+
   /// @}
   /// @name Direct Component Access
   /// @{
@@ -193,13 +221,13 @@ public:
   /// if the environment component is present).
   StringRef getOSAndEnvironmentName() const;
 
-  
+
   /// getDarwinNumber - Parse the 'darwin number' out of the specific target
   /// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
   /// not defined, return 0's.  This requires that the triple have an OSType of
   /// darwin before it is called.
   void getDarwinNumber(unsigned &Maj, unsigned &Min, unsigned &Revision) const;
-  
+
   /// getDarwinMajorNumber - Return just the major version number, this is
   /// specialized because it is a common query.
   unsigned getDarwinMajorNumber() const {
@@ -207,7 +235,7 @@ public:
     getDarwinNumber(Maj, Min, Rev);
     return Maj;
   }
-  
+
   /// @}
   /// @name Mutators
   /// @{
@@ -224,6 +252,10 @@ public:
   /// to a known type.
   void setOS(OSType Kind);
 
+  /// setEnvironment - Set the environment (fourth) component of the triple
+  /// to a known type.
+  void setEnvironment(EnvironmentType Kind);
+
   /// setTriple - Set all components to the new triple \arg Str.
   void setTriple(const Twine &Str);
 
@@ -271,9 +303,14 @@ public:
   /// vendor.
   static const char *getVendorTypeName(VendorType Kind);
 
-  /// getOSTypeName - Get the canonical name for the \arg Kind vendor.
+  /// getOSTypeName - Get the canonical name for the \arg Kind operating
+  /// system.
   static const char *getOSTypeName(OSType Kind);
 
+  /// getEnvironmentTypeName - Get the canonical name for the \arg Kind
+  /// environment.
+  static const char *getEnvironmentTypeName(EnvironmentType Kind);
+
   /// @}
   /// @name Static helpers for converting alternate architecture names.
   /// @{
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index b519a3e2ed11..ab8d3653e33f 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -11,7 +11,7 @@
 #define LLVM_ADT_TWINE_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <string>
 
@@ -42,7 +42,7 @@ namespace llvm {
   /// Twines support a special 'null' value, which always concatenates to form
   /// itself, and renders as an empty string. This can be returned from APIs to
   /// effectively nullify any concatenations performed on the result.
-  /// 
+  ///
   /// \b Implementation \n
   ///
   /// Given the nature of a Twine, it is not possible for the Twine's
@@ -99,7 +99,7 @@ namespace llvm {
       /// A pointer to a StringRef instance.
       StringRefKind,
 
-      /// An unsigned int value reinterpreted as a pointer, to render as an 
+      /// An unsigned int value reinterpreted as a pointer, to render as an
       /// unsigned decimal integer.
       DecUIKind,
 
@@ -260,32 +260,32 @@ namespace llvm {
     }
 
     /// Construct a twine to print \arg Val as an unsigned decimal integer.
-    explicit Twine(unsigned Val) 
+    explicit Twine(unsigned Val)
       : LHS((void*)(intptr_t)Val), LHSKind(DecUIKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as a signed decimal integer.
-    explicit Twine(int Val) 
+    explicit Twine(int Val)
       : LHS((void*)(intptr_t)Val), LHSKind(DecIKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as an unsigned decimal integer.
-    explicit Twine(const unsigned long &Val) 
+    explicit Twine(const unsigned long &Val)
       : LHS(&Val), LHSKind(DecULKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as a signed decimal integer.
-    explicit Twine(const long &Val) 
+    explicit Twine(const long &Val)
       : LHS(&Val), LHSKind(DecLKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as an unsigned decimal integer.
-    explicit Twine(const unsigned long long &Val) 
+    explicit Twine(const unsigned long long &Val)
       : LHS(&Val), LHSKind(DecULLKind), RHSKind(EmptyKind) {
     }
 
     /// Construct a twine to print \arg Val as a signed decimal integer.
-    explicit Twine(const long long &Val) 
+    explicit Twine(const long long &Val)
       : LHS(&Val), LHSKind(DecLLKind), RHSKind(EmptyKind) {
     }
 
@@ -330,12 +330,12 @@ namespace llvm {
     bool isTriviallyEmpty() const {
       return isNullary();
     }
-    
+
     /// isSingleStringRef - Return true if this twine can be dynamically
     /// accessed as a single StringRef value with getSingleStringRef().
     bool isSingleStringRef() const {
       if (getRHSKind() != EmptyKind) return false;
-      
+
       switch (getLHSKind()) {
       case EmptyKind:
       case CStringKind:
@@ -382,6 +382,14 @@ namespace llvm {
     /// SmallVector and a StringRef to the SmallVector's data is returned.
     StringRef toStringRef(SmallVectorImpl<char> &Out) const;
 
+    /// toNullTerminatedStringRef - This returns the twine as a single null
+    /// terminated StringRef if it can be represented as such. Otherwise the
+    /// twine is written into the given SmallVector and a StringRef to the
+    /// SmallVector's data is returned.
+    ///
+    /// The returned StringRef's size does not include the null terminator.
+    StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
+
     /// print - Write the concatenated string represented by this twine to the
     /// stream \arg OS.
     void print(raw_ostream &OS) const;
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index ded17fc32223..d1f4e5a0dacd 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -29,7 +29,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 
 #include <iterator>
 
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 4e3afe171199..865fcb3d8aad 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -38,6 +38,7 @@
 #ifndef LLVM_ADT_ILIST_H
 #define LLVM_ADT_ILIST_H
 
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <iterator>
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index ad68d48e531b..71a5982c7d39 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -16,11 +16,21 @@
 // which automatically provides functionality for the entire suite of client
 // APIs.
 //
-// This API represents memory as a (Pointer, Size) pair.  The Pointer component
-// specifies the base memory address of the region, the Size specifies how large
-// of an area is being queried, or UnknownSize if the size is not known.
-// Pointers that point to two completely different objects in memory never
-// alias, regardless of the value of the Size component.
+// This API identifies memory regions with the Location class. The pointer
+// component specifies the base memory address of the region. The Size specifies
+// the maximum size (in address units) of the memory region, or UnknownSize if
+// the size is not known. The TBAA tag identifies the "type" of the memory
+// reference; see the TypeBasedAliasAnalysis class for details.
+//
+// Some non-obvious details include:
+//  - Pointers that point to two completely different objects in memory never
+//    alias, regardless of the value of the Size component.
+//  - NoAlias doesn't imply inequal pointers. The most obvious example of this
+//    is two pointers to constant memory. Even if they are equal, constant
+//    memory is never stored to, so there will never be any dependencies.
+//    In this and other situations, the pointers may be both NoAlias and
+//    MustAlias at the same time. The current API can only return one result,
+//    though this is rarely a problem in practice.
 //
 //===----------------------------------------------------------------------===//
 
@@ -28,7 +38,6 @@
 #define LLVM_ANALYSIS_ALIAS_ANALYSIS_H
 
 #include "llvm/Support/CallSite.h"
-#include "llvm/System/IncludeFile.h"
 #include <vector>
 
 namespace llvm {
@@ -39,6 +48,8 @@ class VAArgInst;
 class TargetData;
 class Pass;
 class AnalysisUsage;
+class MemTransferInst;
+class MemIntrinsic;
 
 class AliasAnalysis {
 protected:
@@ -67,7 +78,7 @@ public:
   /// UnknownSize - This is a special value which can be used with the
   /// size arguments in alias queries to indicate that the caller does not
   /// know the sizes of the potential memory references.
-  static unsigned const UnknownSize = ~0u;
+  static uint64_t const UnknownSize = ~UINT64_C(0);
 
   /// getTargetData - Return a pointer to the current TargetData object, or
   /// null if no TargetData object is available.
@@ -77,12 +88,57 @@ public:
   /// getTypeStoreSize - Return the TargetData store size for the given type,
   /// if known, or a conservative value otherwise.
   ///
-  unsigned getTypeStoreSize(const Type *Ty);
+  uint64_t getTypeStoreSize(const Type *Ty);
 
   //===--------------------------------------------------------------------===//
   /// Alias Queries...
   ///
 
+  /// Location - A description of a memory location.
+  struct Location {
+    /// Ptr - The address of the start of the location.
+    const Value *Ptr;
+    /// Size - The maximum size of the location, in address-units, or
+    /// UnknownSize if the size is not known.  Note that an unknown size does
+    /// not mean the pointer aliases the entire virtual address space, because
+    /// there are restrictions on stepping out of one object and into another.
+    /// See http://llvm.org/docs/LangRef.html#pointeraliasing
+    uint64_t Size;
+    /// TBAATag - The metadata node which describes the TBAA type of
+    /// the location, or null if there is no known unique tag.
+    const MDNode *TBAATag;
+
+    explicit Location(const Value *P = 0, uint64_t S = UnknownSize,
+                      const MDNode *N = 0)
+      : Ptr(P), Size(S), TBAATag(N) {}
+
+    Location getWithNewPtr(const Value *NewPtr) const {
+      Location Copy(*this);
+      Copy.Ptr = NewPtr;
+      return Copy;
+    }
+
+    Location getWithNewSize(uint64_t NewSize) const {
+      Location Copy(*this);
+      Copy.Size = NewSize;
+      return Copy;
+    }
+
+    Location getWithoutTBAATag() const {
+      Location Copy(*this);
+      Copy.TBAATag = 0;
+      return Copy;
+    }
+  };
+
+  /// getLocation - Fill in Loc with information about the memory reference by
+  /// the given instruction.
+  Location getLocation(const LoadInst *LI);
+  Location getLocation(const StoreInst *SI);
+  Location getLocation(const VAArgInst *VI);
+  static Location getLocationForSource(const MemTransferInst *MTI);
+  static Location getLocationForDest(const MemIntrinsic *MI);
+
   /// Alias analysis result - Either we know for sure that it does not alias, we
   /// know for sure it must alias, or we don't know anything: The two pointers
   /// _might_ alias.  This enum is designed so you can do things like:
@@ -92,33 +148,63 @@ public:
   /// See docs/AliasAnalysis.html for more information on the specific meanings
   /// of these values.
   ///
-  enum AliasResult { NoAlias = 0, MayAlias = 1, MustAlias = 2 };
+  enum AliasResult {
+    NoAlias = 0,        ///< No dependencies.
+    MayAlias,           ///< Anything goes.
+    PartialAlias,       ///< Pointers differ, but pointees overlap.
+    MustAlias           ///< Pointers are equal.
+  };
 
   /// alias - The main low level interface to the alias analysis implementation.
-  /// Returns a Result indicating whether the two pointers are aliased to each
-  /// other.  This is the interface that must be implemented by specific alias
-  /// analysis implementations.
-  ///
-  virtual AliasResult alias(const Value *V1, unsigned V1Size,
-                            const Value *V2, unsigned V2Size);
+  /// Returns an AliasResult indicating whether the two pointers are aliased to
+  /// each other.  This is the interface that must be implemented by specific
+  /// alias analysis implementations.
+  virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+  /// alias - A convenience wrapper.
+  AliasResult alias(const Value *V1, uint64_t V1Size,
+                    const Value *V2, uint64_t V2Size) {
+    return alias(Location(V1, V1Size), Location(V2, V2Size));
+  }
 
-  /// alias - A convenience wrapper for the case where the sizes are unknown.
+  /// alias - A convenience wrapper.
   AliasResult alias(const Value *V1, const Value *V2) {
     return alias(V1, UnknownSize, V2, UnknownSize);
   }
 
   /// isNoAlias - A trivial helper function to check to see if the specified
   /// pointers are no-alias.
-  bool isNoAlias(const Value *V1, unsigned V1Size,
-                 const Value *V2, unsigned V2Size) {
-    return alias(V1, V1Size, V2, V2Size) == NoAlias;
+  bool isNoAlias(const Location &LocA, const Location &LocB) {
+    return alias(LocA, LocB) == NoAlias;
   }
 
-  /// pointsToConstantMemory - If the specified pointer is known to point into
-  /// constant global memory, return true.  This allows disambiguation of store
-  /// instructions from constant pointers.
-  ///
-  virtual bool pointsToConstantMemory(const Value *P);
+  /// isNoAlias - A convenience wrapper.
+  bool isNoAlias(const Value *V1, uint64_t V1Size,
+                 const Value *V2, uint64_t V2Size) {
+    return isNoAlias(Location(V1, V1Size), Location(V2, V2Size));
+  }
+  
+  /// isMustAlias - A convenience wrapper.
+  bool isMustAlias(const Location &LocA, const Location &LocB) {
+    return alias(LocA, LocB) == MustAlias;
+  }
+
+  /// isMustAlias - A convenience wrapper.
+  bool isMustAlias(const Value *V1, const Value *V2) {
+    return alias(V1, 1, V2, 1) == MustAlias;
+  }
+  
+  /// pointsToConstantMemory - If the specified memory location is
+  /// known to be constant, return true. If OrLocal is true and the
+  /// specified memory location is known to be "local" (derived from
+  /// an alloca), return true. Otherwise return false.
+  virtual bool pointsToConstantMemory(const Location &Loc,
+                                      bool OrLocal = false);
+
+  /// pointsToConstantMemory - A convenient wrapper.
+  bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
+    return pointsToConstantMemory(Location(P), OrLocal);
+  }
 
   //===--------------------------------------------------------------------===//
   /// Simple mod/ref information...
@@ -129,36 +215,48 @@ public:
   ///
   enum ModRefResult { NoModRef = 0, Ref = 1, Mod = 2, ModRef = 3 };
 
+  /// These values define additional bits used to define the
+  /// ModRefBehavior values.
+  enum { Nowhere = 0, ArgumentPointees = 4, Anywhere = 8 | ArgumentPointees };
 
   /// ModRefBehavior - Summary of how a function affects memory in the program.
   /// Loads from constant globals are not considered memory accesses for this
   /// interface.  Also, functions may freely modify stack space local to their
   /// invocation without having to report it through these interfaces.
   enum ModRefBehavior {
-    // DoesNotAccessMemory - This function does not perform any non-local loads
-    // or stores to memory.
-    //
-    // This property corresponds to the GCC 'const' attribute.
-    DoesNotAccessMemory,
-
-    // AccessesArguments - This function accesses function arguments in well
-    // known (possibly volatile) ways, but does not access any other memory.
-    AccessesArguments,
-
-    // AccessesArgumentsAndGlobals - This function has accesses function
-    // arguments and global variables well known (possibly volatile) ways, but
-    // does not access any other memory.
-    AccessesArgumentsAndGlobals,
-
-    // OnlyReadsMemory - This function does not perform any non-local stores or
-    // volatile loads, but may read from any memory location.
-    //
-    // This property corresponds to the GCC 'pure' attribute.
-    OnlyReadsMemory,
-
-    // UnknownModRefBehavior - This indicates that the function could not be
-    // classified into one of the behaviors above.
-    UnknownModRefBehavior
+    /// DoesNotAccessMemory - This function does not perform any non-local loads
+    /// or stores to memory.
+    ///
+    /// This property corresponds to the GCC 'const' attribute.
+    /// This property corresponds to the LLVM IR 'readnone' attribute.
+    /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
+    DoesNotAccessMemory = Nowhere | NoModRef,
+
+    /// OnlyReadsArgumentPointees - The only memory references in this function
+    /// (if it has any) are non-volatile loads from objects pointed to by its
+    /// pointer-typed arguments, with arbitrary offsets.
+    ///
+    /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
+    OnlyReadsArgumentPointees = ArgumentPointees | Ref,
+
+    /// OnlyAccessesArgumentPointees - The only memory references in this
+    /// function (if it has any) are non-volatile loads and stores from objects
+    /// pointed to by its pointer-typed arguments, with arbitrary offsets.
+    ///
+    /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag.
+    OnlyAccessesArgumentPointees = ArgumentPointees | ModRef,
+
+    /// OnlyReadsMemory - This function does not perform any non-local stores or
+    /// volatile loads, but may read from any memory location.
+    ///
+    /// This property corresponds to the GCC 'pure' attribute.
+    /// This property corresponds to the LLVM IR 'readonly' attribute.
+    /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
+    OnlyReadsMemory = Anywhere | Ref,
+
+    /// UnknownModRefBehavior - This indicates that the function could not be
+    /// classified into one of the behaviors above.
+    UnknownModRefBehavior = Anywhere | ModRef
   };
 
   /// getModRefBehavior - Return the behavior when calling the given call site.
@@ -168,11 +266,6 @@ public:
   /// For use when the call site is not known.
   virtual ModRefBehavior getModRefBehavior(const Function *F);
 
-  /// getIntrinsicModRefBehavior - Return the modref behavior of the intrinsic
-  /// with the given id.  Most clients won't need this, because the regular
-  /// getModRefBehavior incorporates this information.
-  static ModRefBehavior getIntrinsicModRefBehavior(unsigned iid);
-
   /// doesNotAccessMemory - If the specified call is known to never read or
   /// write memory, return true.  If the call only reads from known-constant
   /// memory, it is also legal to return true.  Calls that unwind the stack
@@ -205,8 +298,7 @@ public:
   /// This property corresponds to the GCC 'pure' attribute.
   ///
   bool onlyReadsMemory(ImmutableCallSite CS) {
-    ModRefBehavior MRB = getModRefBehavior(CS);
-    return MRB == DoesNotAccessMemory || MRB == OnlyReadsMemory;
+    return onlyReadsMemory(getModRefBehavior(CS));
   }
 
   /// onlyReadsMemory - If the specified function is known to only read from
@@ -214,21 +306,114 @@ public:
   /// when the call site is not known.
   ///
   bool onlyReadsMemory(const Function *F) {
-    ModRefBehavior MRB = getModRefBehavior(F);
-    return MRB == DoesNotAccessMemory || MRB == OnlyReadsMemory;
+    return onlyReadsMemory(getModRefBehavior(F));
   }
 
+  /// onlyReadsMemory - Return true if functions with the specified behavior are
+  /// known to only read from non-volatile memory (or not access memory at all).
+  ///
+  static bool onlyReadsMemory(ModRefBehavior MRB) {
+    return !(MRB & Mod);
+  }
+
+  /// onlyAccessesArgPointees - Return true if functions with the specified
+  /// behavior are known to read and write at most from objects pointed to by
+  /// their pointer-typed arguments (with arbitrary offsets).
+  ///
+  static bool onlyAccessesArgPointees(ModRefBehavior MRB) {
+    return !(MRB & Anywhere & ~ArgumentPointees);
+  }
+
+  /// doesAccessArgPointees - Return true if functions with the specified
+  /// behavior are known to potentially read or write  from objects pointed
+  /// to be their pointer-typed arguments (with arbitrary offsets).
+  ///
+  static bool doesAccessArgPointees(ModRefBehavior MRB) {
+    return (MRB & ModRef) && (MRB & ArgumentPointees);
+  }
 
   /// getModRefInfo - Return information about whether or not an instruction may
-  /// read or write memory specified by the pointer operand.  An instruction
+  /// read or write the specified memory location.  An instruction
   /// that doesn't read or write memory may be trivially LICM'd for example.
+  ModRefResult getModRefInfo(const Instruction *I,
+                             const Location &Loc) {
+    switch (I->getOpcode()) {
+    case Instruction::VAArg:  return getModRefInfo((const VAArgInst*)I, Loc);
+    case Instruction::Load:   return getModRefInfo((const LoadInst*)I,  Loc);
+    case Instruction::Store:  return getModRefInfo((const StoreInst*)I, Loc);
+    case Instruction::Call:   return getModRefInfo((const CallInst*)I,  Loc);
+    case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
+    default:                  return NoModRef;
+    }
+  }
+
+  /// getModRefInfo - A convenience wrapper.
+  ModRefResult getModRefInfo(const Instruction *I,
+                             const Value *P, uint64_t Size) {
+    return getModRefInfo(I, Location(P, Size));
+  }
 
   /// getModRefInfo (for call sites) - Return whether information about whether
-  /// a particular call site modifies or reads the memory specified by the
-  /// pointer.
-  ///
+  /// a particular call site modifies or reads the specified memory location.
   virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                     const Value *P, unsigned Size);
+                                     const Location &Loc);
+
+  /// getModRefInfo (for call sites) - A convenience wrapper.
+  ModRefResult getModRefInfo(ImmutableCallSite CS,
+                             const Value *P, uint64_t Size) {
+    return getModRefInfo(CS, Location(P, Size));
+  }
+
+  /// getModRefInfo (for calls) - Return whether information about whether
+  /// a particular call modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const CallInst *C, const Location &Loc) {
+    return getModRefInfo(ImmutableCallSite(C), Loc);
+  }
+
+  /// getModRefInfo (for calls) - A convenience wrapper.
+  ModRefResult getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
+    return getModRefInfo(C, Location(P, Size));
+  }
+
+  /// getModRefInfo (for invokes) - Return whether information about whether
+  /// a particular invoke modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const InvokeInst *I,
+                             const Location &Loc) {
+    return getModRefInfo(ImmutableCallSite(I), Loc);
+  }
+
+  /// getModRefInfo (for invokes) - A convenience wrapper.
+  ModRefResult getModRefInfo(const InvokeInst *I,
+                             const Value *P, uint64_t Size) {
+    return getModRefInfo(I, Location(P, Size));
+  }
+
+  /// getModRefInfo (for loads) - Return whether information about whether
+  /// a particular load modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const LoadInst *L, const Location &Loc);
+
+  /// getModRefInfo (for loads) - A convenience wrapper.
+  ModRefResult getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
+    return getModRefInfo(L, Location(P, Size));
+  }
+
+  /// getModRefInfo (for stores) - Return whether information about whether
+  /// a particular store modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const StoreInst *S, const Location &Loc);
+
+  /// getModRefInfo (for stores) - A convenience wrapper.
+  ModRefResult getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size){
+    return getModRefInfo(S, Location(P, Size));
+  }
+
+  /// getModRefInfo (for va_args) - Return whether information about whether
+  /// a particular va_arg modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const VAArgInst* I, const Location &Loc);
+
+  /// getModRefInfo (for va_args) - A convenience wrapper.
+  ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){
+    return getModRefInfo(I, Location(P, Size));
+  }
 
   /// getModRefInfo - Return information about whether two call sites may refer
   /// to the same set of memory locations.  See 
@@ -237,46 +422,31 @@ public:
   virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                      ImmutableCallSite CS2);
 
-public:
-  /// Convenience functions...
-  ModRefResult getModRefInfo(const LoadInst *L, const Value *P, unsigned Size);
-  ModRefResult getModRefInfo(const StoreInst *S, const Value *P, unsigned Size);
-  ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, unsigned Size);
-  ModRefResult getModRefInfo(const CallInst *C, const Value *P, unsigned Size) {
-    return getModRefInfo(ImmutableCallSite(C), P, Size);
-  }
-  ModRefResult getModRefInfo(const InvokeInst *I,
-                             const Value *P, unsigned Size) {
-    return getModRefInfo(ImmutableCallSite(I), P, Size);
-  }
-  ModRefResult getModRefInfo(const Instruction *I,
-                             const Value *P, unsigned Size) {
-    switch (I->getOpcode()) {
-    case Instruction::VAArg:  return getModRefInfo((const VAArgInst*)I, P,Size);
-    case Instruction::Load:   return getModRefInfo((const LoadInst*)I, P, Size);
-    case Instruction::Store:  return getModRefInfo((const StoreInst*)I, P,Size);
-    case Instruction::Call:   return getModRefInfo((const CallInst*)I, P, Size);
-    case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,P,Size);
-    default:                  return NoModRef;
-    }
-  }
-
   //===--------------------------------------------------------------------===//
   /// Higher level methods for querying mod/ref information.
   ///
 
   /// canBasicBlockModify - Return true if it is possible for execution of the
   /// specified basic block to modify the value pointed to by Ptr.
-  ///
-  bool canBasicBlockModify(const BasicBlock &BB, const Value *P, unsigned Size);
+  bool canBasicBlockModify(const BasicBlock &BB, const Location &Loc);
+
+  /// canBasicBlockModify - A convenience wrapper.
+  bool canBasicBlockModify(const BasicBlock &BB, const Value *P, uint64_t Size){
+    return canBasicBlockModify(BB, Location(P, Size));
+  }
 
   /// canInstructionRangeModify - Return true if it is possible for the
   /// execution of the specified instructions to modify the value pointed to by
   /// Ptr.  The instructions to consider are all of the instructions in the
   /// range of [I1,I2] INCLUSIVE.  I1 and I2 must be in the same basic block.
-  ///
   bool canInstructionRangeModify(const Instruction &I1, const Instruction &I2,
-                                 const Value *Ptr, unsigned Size);
+                                 const Location &Loc);
+
+  /// canInstructionRangeModify - A convenience wrapper.
+  bool canInstructionRangeModify(const Instruction &I1, const Instruction &I2,
+                                 const Value *Ptr, uint64_t Size) {
+    return canInstructionRangeModify(I1, I2, Location(Ptr, Size));
+  }
 
   //===--------------------------------------------------------------------===//
   /// Methods that clients should call when they transform the program to allow
@@ -299,6 +469,17 @@ public:
   ///
   virtual void copyValue(Value *From, Value *To);
 
+  /// addEscapingUse - This method should be used whenever an escaping use is
+  /// added to a pointer value.  Analysis implementations may either return
+  /// conservative responses for that value in the future, or may recompute
+  /// some or all internal state to continue providing precise responses.
+  ///
+  /// Escaping uses are considered by anything _except_ the following:
+  ///  - GEPs or bitcasts of the pointer
+  ///  - Loads through the pointer
+  ///  - Stores through (but not of) the pointer
+  virtual void addEscapingUse(Use &U);
+
   /// replaceWithNewValue - This method is the obvious combination of the two
   /// above, and it provided as a helper to simplify client code.
   ///
@@ -323,11 +504,4 @@ bool isIdentifiedObject(const Value *V);
 
 } // End llvm namespace
 
-// Because of the way .a files work, we must force the BasicAA implementation to
-// be pulled in if the AliasAnalysis header is included.  Otherwise we run
-// the risk of AliasAnalysis being used, but the default implementation not
-// being linked into the tool that uses it.
-FORCE_DEFINING_FILE_TO_BE_LINKED(AliasAnalysis)
-FORCE_DEFINING_FILE_TO_BE_LINKED(BasicAliasAnalysis)
-
 #endif
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 8e2f7fd29a31..e844d10dda03 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -40,10 +40,12 @@ class AliasSet : public ilist_node<AliasSet> {
     Value *Val;  // The pointer this record corresponds to.
     PointerRec **PrevInList, *NextInList;
     AliasSet *AS;
-    unsigned Size;
+    uint64_t Size;
+    const MDNode *TBAAInfo;
   public:
     PointerRec(Value *V)
-      : Val(V), PrevInList(0), NextInList(0), AS(0), Size(0) {}
+      : Val(V), PrevInList(0), NextInList(0), AS(0), Size(0),
+        TBAAInfo(DenseMapInfo<const MDNode *>::getEmptyKey()) {}
 
     Value *getValue() const { return Val; }
     
@@ -55,11 +57,28 @@ class AliasSet : public ilist_node<AliasSet> {
       return &NextInList;
     }
 
-    void updateSize(unsigned NewSize) {
+    void updateSizeAndTBAAInfo(uint64_t NewSize, const MDNode *NewTBAAInfo) {
       if (NewSize > Size) Size = NewSize;
+
+      if (TBAAInfo == DenseMapInfo<const MDNode *>::getEmptyKey())
+        // We don't have a TBAAInfo yet. Set it to NewTBAAInfo.
+        TBAAInfo = NewTBAAInfo;
+      else if (TBAAInfo != NewTBAAInfo)
+        // NewTBAAInfo conflicts with TBAAInfo.
+        TBAAInfo = DenseMapInfo<const MDNode *>::getTombstoneKey();
     }
 
-    unsigned getSize() const { return Size; }
+    uint64_t getSize() const { return Size; }
+
+    /// getTBAAInfo - Return the TBAAInfo, or null if there is no
+    /// information or conflicting information.
+    const MDNode *getTBAAInfo() const {
+      // If we have missing or conflicting TBAAInfo, return null.
+      if (TBAAInfo == DenseMapInfo<const MDNode *>::getEmptyKey() ||
+          TBAAInfo == DenseMapInfo<const MDNode *>::getTombstoneKey())
+        return 0;
+      return TBAAInfo;
+    }
 
     AliasSet *getAliasSet(AliasSetTracker &AST) {
       assert(AS && "No AliasSet yet!");
@@ -186,7 +205,8 @@ public:
     value_type *operator->() const { return &operator*(); }
 
     Value *getPointer() const { return CurNode->getValue(); }
-    unsigned getSize() const { return CurNode->getSize(); }
+    uint64_t getSize() const { return CurNode->getSize(); }
+    const MDNode *getTBAAInfo() const { return CurNode->getTBAAInfo(); }
 
     iterator& operator++() {                // Preincrement
       assert(CurNode && "Advancing past AliasSet.end()!");
@@ -230,7 +250,8 @@ private:
 
   void removeFromTracker(AliasSetTracker &AST);
 
-  void addPointer(AliasSetTracker &AST, PointerRec &Entry, unsigned Size,
+  void addPointer(AliasSetTracker &AST, PointerRec &Entry, uint64_t Size,
+                  const MDNode *TBAAInfo,
                   bool KnownMustAlias = false);
   void addCallSite(CallSite CS, AliasAnalysis &AA);
   void removeCallSite(CallSite CS) {
@@ -245,7 +266,8 @@ private:
   /// aliasesPointer - Return true if the specified pointer "may" (or must)
   /// alias one of the members in the set.
   ///
-  bool aliasesPointer(const Value *Ptr, unsigned Size, AliasAnalysis &AA) const;
+  bool aliasesPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo,
+                      AliasAnalysis &AA) const;
   bool aliasesCallSite(CallSite CS, AliasAnalysis &AA) const;
 };
 
@@ -298,7 +320,7 @@ public:
   /// These methods return true if inserting the instruction resulted in the
   /// addition of a new alias set (i.e., the pointer did not alias anything).
   ///
-  bool add(Value *Ptr, unsigned Size);  // Add a location
+  bool add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo); // Add a location
   bool add(LoadInst *LI);
   bool add(StoreInst *SI);
   bool add(VAArgInst *VAAI);
@@ -312,7 +334,8 @@ public:
   /// remove methods - These methods are used to remove all entries that might
   /// be aliased by the specified instruction.  These methods return true if any
   /// alias sets were eliminated.
-  bool remove(Value *Ptr, unsigned Size);  // Remove a location
+  // Remove a location
+  bool remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo);
   bool remove(LoadInst *LI);
   bool remove(StoreInst *SI);
   bool remove(VAArgInst *VAAI);
@@ -332,18 +355,21 @@ public:
   /// lives in.  If the New argument is non-null, this method sets the value to
   /// true if a new alias set is created to contain the pointer (because the
   /// pointer didn't alias anything).
-  AliasSet &getAliasSetForPointer(Value *P, unsigned Size, bool *New = 0);
+  AliasSet &getAliasSetForPointer(Value *P, uint64_t Size,
+                                  const MDNode *TBAAInfo,
+                                  bool *New = 0);
 
   /// getAliasSetForPointerIfExists - Return the alias set containing the
   /// location specified if one exists, otherwise return null.
-  AliasSet *getAliasSetForPointerIfExists(Value *P, unsigned Size) {
-    return findAliasSetForPointer(P, Size);
+  AliasSet *getAliasSetForPointerIfExists(Value *P, uint64_t Size,
+                                          const MDNode *TBAAInfo) {
+    return findAliasSetForPointer(P, Size, TBAAInfo);
   }
 
   /// containsPointer - Return true if the specified location is represented by
   /// this alias set, false otherwise.  This does not modify the AST object or
   /// alias sets.
-  bool containsPointer(Value *P, unsigned Size) const;
+  bool containsPointer(Value *P, uint64_t Size, const MDNode *TBAAInfo) const;
 
   /// getAliasAnalysis - Return the underlying alias analysis object used by
   /// this tracker.
@@ -390,14 +416,16 @@ private:
     return *Entry;
   }
 
-  AliasSet &addPointer(Value *P, unsigned Size, AliasSet::AccessType E,
+  AliasSet &addPointer(Value *P, uint64_t Size, const MDNode *TBAAInfo,
+                       AliasSet::AccessType E,
                        bool &NewSet) {
     NewSet = false;
-    AliasSet &AS = getAliasSetForPointer(P, Size, &NewSet);
+    AliasSet &AS = getAliasSetForPointer(P, Size, TBAAInfo, &NewSet);
     AS.AccessTy |= E;
     return AS;
   }
-  AliasSet *findAliasSetForPointer(const Value *Ptr, unsigned Size);
+  AliasSet *findAliasSetForPointer(const Value *Ptr, uint64_t Size,
+                                   const MDNode *TBAAInfo);
 
   AliasSet *findAliasSetForCallSite(CallSite CS);
 };
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index a4884edd5bd6..089f322e4a86 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -57,7 +57,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/System/IncludeFile.h"
+#include "llvm/Support/IncludeFile.h"
 #include <map>
 
 namespace llvm {
@@ -138,6 +138,13 @@ public:
   /// not already exist.
   CallGraphNode *getOrInsertFunction(const Function *F);
 
+  /// spliceFunction - Replace the function represented by this node by another.
+  /// This does not rescan the body of the function, so it is suitable when
+  /// splicing the body of one function to another while also updating all
+  /// callers from the old function to the new.
+  ///
+  void spliceFunction(const Function *From, const Function *To);
+
   //===---------------------------------------------------------------------
   // Pass infrastructure interface glue code.
   //
@@ -163,8 +170,10 @@ protected:
 // CallGraphNode class definition.
 //
 class CallGraphNode {
-  AssertingVH<Function> F;
+  friend class CallGraph;
   
+  AssertingVH<Function> F;
+
   // CallRecord - This is a pair of the calling instruction (a call or invoke)
   // and the callgraph node being called.
 public:
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 58096f1f15b1..75edfbbed2e3 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -7,15 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements various weight measurements for a function, helping
-// the Inliner and PartialSpecialization decide whether to duplicate its
-// contents.
+// This file implements various weight measurements for code, helping
+// the Inliner and other passes decide whether to duplicate its contents.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_CODEMETRICS_H
 #define LLVM_ANALYSIS_CODEMETRICS_H
 
+#include "llvm/ADT/DenseMap.h"
+
 namespace llvm {
   // CodeMetrics - Calculate size and a few similar metrics for a set of
   // basic blocks.
@@ -45,6 +46,11 @@ namespace llvm {
 
     /// NumCalls - Keep track of the number of calls to 'big' functions.
     unsigned NumCalls;
+    
+    /// NumInlineCandidates - Keep track of the number of calls to internal
+    /// functions with only a single caller.  These are likely targets for
+    /// future inlining, likely exposed by interleaved devirtualization.
+    unsigned NumInlineCandidates;
 
     /// NumVectorInsts - Keep track of how many instructions produce vector
     /// values.  The inliner is being more aggressive with inlining vector
@@ -56,7 +62,8 @@ namespace llvm {
 
     CodeMetrics() : callsSetJmp(false), isRecursive(false),
                     containsIndirectBr(false), usesDynamicAlloca(false), 
-                    NumInsts(0), NumBlocks(0), NumCalls(0), NumVectorInsts(0), 
+                    NumInsts(0), NumBlocks(0), NumCalls(0),
+                    NumInlineCandidates(0), NumVectorInsts(0), 
                     NumRets(0) {}
 
     /// analyzeBasicBlock - Add information about the specified basic block
@@ -66,6 +73,22 @@ namespace llvm {
     /// analyzeFunction - Add information about the specified function
     /// to the current structure.
     void analyzeFunction(Function *F);
+    
+    /// CountCodeReductionForConstant - Figure out an approximation for how
+    /// many instructions will be constant folded if the specified value is
+    /// constant.
+    unsigned CountCodeReductionForConstant(Value *V);
+   
+    /// CountBonusForConstant - Figure out an approximation for how much
+    /// per-call performance boost we can expect if the specified value is
+    /// constant.
+    unsigned CountBonusForConstant(Value *V);
+
+    /// CountCodeReductionForAlloca - Figure out an approximation of how much
+    /// smaller the function will be if it is inlined into a context where an
+    /// argument becomes an alloca.
+    ///
+    unsigned CountCodeReductionForAlloca(Value *V);
   };
 }
 
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index e2675eb2d4f0..f6b1f5ab9915 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares routines for folding instructions into constants.
+// This file declares routines for folding instructions into constants when all
+// operands are constants, for example "sub i32 1, 0" -> "1".
 //
 // Also, to supplement the basic VMCore ConstantExpr simplifications,
 // this file declares some additional folding routines that can make use of
@@ -27,11 +28,11 @@ namespace llvm {
   class Function;
   class Type;
 
-/// ConstantFoldInstruction - Attempt to constant fold the specified
-/// instruction.  If successful, the constant result is returned, if not, null
-/// is returned.  Note that this function can only fail when attempting to fold
-/// instructions like loads and stores, which have no constant expression form.
-///
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant.  Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
 Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0);
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/Analysis/DIBuilder.h
new file mode 100644
index 000000000000..bd221344e5fa
--- /dev/null
+++ b/include/llvm/Analysis/DIBuilder.h
@@ -0,0 +1,459 @@
+//===--- llvm/Analysis/DIBuilder.h - Debug Information Builder --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DIBuilder that is useful for creating debugging 
+// information entries in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DIBUILDER_H
+#define LLVM_ANALYSIS_DIBUILDER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class BasicBlock;
+  class Instruction;
+  class Function;
+  class Module;
+  class Value;
+  class LLVMContext;
+  class MDNode;
+  class StringRef;
+  class DIDescriptor;
+  class DIFile;
+  class DIEnumerator;
+  class DIType;
+  class DIArray;
+  class DIGlobalVariable;
+  class DINameSpace;
+  class DIVariable;
+  class DISubrange;
+  class DILexicalBlock;
+  class DISubprogram;
+  class DITemplateTypeParameter;
+  class DITemplateValueParameter;
+
+  class DIBuilder {
+    private:
+    Module &M;
+    LLVMContext & VMContext;
+    MDNode *TheCU;
+
+    Function *DeclareFn;     // llvm.dbg.declare
+    Function *ValueFn;       // llvm.dbg.value
+
+    DIBuilder(const DIBuilder &);       // DO NOT IMPLEMENT
+    void operator=(const DIBuilder &);  // DO NOT IMPLEMENT
+
+    public:
+    explicit DIBuilder(Module &M);
+    const MDNode *getCU() { return TheCU; }
+    enum ComplexAddrKind { OpPlus=1, OpDeref };
+
+    /// CreateCompileUnit - A CompileUnit provides an anchor for all debugging
+    /// information generated during this instance of compilation.
+    /// @param Lang     Source programming language, eg. dwarf::DW_LANG_C99
+    /// @param File     File name
+    /// @param Dir      Directory
+    /// @param Producer String identify producer of debugging information. 
+    ///                 Usuall this is a compiler version string.
+    /// @param isOptimized A boolean flag which indicates whether optimization
+    ///                    is ON or not.
+    /// @param Flags    This string lists command line options. This string is 
+    ///                 directly embedded in debug info output which may be used
+    ///                 by a tool analyzing generated debugging information.
+    /// @param RV       This indicates runtime version for languages like 
+    ///                 Objective-C.
+    void CreateCompileUnit(unsigned Lang, StringRef File, StringRef Dir, 
+                           StringRef Producer,
+                           bool isOptimized, StringRef Flags, unsigned RV);
+
+    /// CreateFile - Create a file descriptor to hold debugging information
+    /// for a file.
+    DIFile CreateFile(StringRef Filename, StringRef Directory);
+                           
+    /// CreateEnumerator - Create a single enumerator value.
+    DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val);
+
+    /// CreateBasicType - Create debugging information entry for a basic 
+    /// type.
+    /// @param Name        Type name.
+    /// @param SizeInBits  Size of the type.
+    /// @param AlignInBits Type alignment.
+    /// @param Encoding    DWARF encoding code, e.g. dwarf::DW_ATE_float.
+    DIType CreateBasicType(StringRef Name, uint64_t SizeInBits, 
+                           uint64_t AlignInBits, unsigned Encoding);
+
+    /// CreateQualifiedType - Create debugging information entry for a qualified
+    /// type, e.g. 'const int'.
+    /// @param Tag         Tag identifing type, e.g. dwarf::TAG_volatile_type
+    /// @param FromTy      Base Type.
+    DIType CreateQualifiedType(unsigned Tag, DIType FromTy);
+
+    /// CreatePointerType - Create debugging information entry for a pointer.
+    /// @param PointeeTy   Type pointed by this pointer.
+    /// @param SizeInBits  Size.
+    /// @param AlignInBits Alignment. (optional)
+    /// @param Name        Pointer type name. (optional)
+    DIType CreatePointerType(DIType PointeeTy, uint64_t SizeInBits,
+                             uint64_t AlignInBits = 0, 
+                             StringRef Name = StringRef());
+
+    /// CreateReferenceType - Create debugging information entry for a c++
+    /// style reference.
+    DIType CreateReferenceType(DIType RTy);
+
+    /// CreateTypedef - Create debugging information entry for a typedef.
+    /// @param Ty          Original type.
+    /// @param Name        Typedef name.
+    /// @param File        File where this type is defined.
+    /// @param LineNo      Line number.
+    DIType CreateTypedef(DIType Ty, StringRef Name, DIFile File, 
+                         unsigned LineNo);
+
+    /// CreateFriend - Create debugging information entry for a 'friend'.
+    DIType CreateFriend(DIType Ty, DIType FriendTy);
+
+    /// CreateInheritance - Create debugging information entry to establish
+    /// inheritance relationship between two types.
+    /// @param Ty           Original type.
+    /// @param BaseTy       Base type. Ty is inherits from base.
+    /// @param BaseOffset   Base offset.
+    /// @param Flags        Flags to describe inheritance attribute, 
+    ///                     e.g. private
+    DIType CreateInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset,
+                             unsigned Flags);
+
+    /// CreateMemberType - Create debugging information entry for a member.
+    /// @param Name         Member name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param OffsetInBits Member offset.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Ty           Parent type.
+    DIType CreateMemberType(StringRef Name, DIFile File,
+                            unsigned LineNo, uint64_t SizeInBits, 
+                            uint64_t AlignInBits, uint64_t OffsetInBits, 
+                            unsigned Flags, DIType Ty);
+
+    /// CreateClassType - Create debugging information entry for a class.
+    /// @param Scope        Scope in which this class is defined.
+    /// @param Name         class name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param OffsetInBits Member offset.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Elements     class members.
+    /// @param VTableHolder Debug info of the base class that contains vtable
+    ///                     for this type. This is used in 
+    ///                     DW_AT_containing_type. See DWARF documentation
+    ///                     for more info.
+    /// @param TemplateParms Template type parameters.
+    DIType CreateClassType(DIDescriptor Scope, StringRef Name, DIFile File,
+                           unsigned LineNumber, uint64_t SizeInBits,
+                           uint64_t AlignInBits, uint64_t OffsetInBits,
+                           unsigned Flags, DIType DerivedFrom, 
+                           DIArray Elements, MDNode *VTableHolder = 0,
+                           MDNode *TemplateParms = 0);
+
+    /// CreateStructType - Create debugging information entry for a struct.
+    /// @param Scope        Scope in which this struct is defined.
+    /// @param Name         Struct name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Elements     Struct elements.
+    /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
+    DIType CreateStructType(DIDescriptor Scope, StringRef Name, DIFile File,
+                            unsigned LineNumber, uint64_t SizeInBits,
+                            uint64_t AlignInBits, unsigned Flags,
+                            DIArray Elements, unsigned RunTimeLang = 0);
+
+    /// CreateUnionType - Create debugging information entry for an union.
+    /// @param Scope        Scope in which this union is defined.
+    /// @param Name         Union name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Elements     Union elements.
+    /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
+    DIType CreateUnionType(DIDescriptor Scope, StringRef Name, DIFile File,
+                           unsigned LineNumber, uint64_t SizeInBits,
+                           uint64_t AlignInBits, unsigned Flags,
+                           DIArray Elements, unsigned RunTimeLang = 0);
+
+    /// CreateTemplateTypeParameter - Create debugging information for template
+    /// type parameter.
+    /// @param Scope        Scope in which this type is defined.
+    /// @param Name         Type parameter name.
+    /// @param Ty           Parameter type.
+    /// @param File         File where this type parameter is defined.
+    /// @param LineNo       Line number.
+    /// @param ColumnNo     Column Number.
+    DITemplateTypeParameter
+    CreateTemplateTypeParameter(DIDescriptor Scope, StringRef Name, DIType Ty,
+                                MDNode *File = 0, unsigned LineNo = 0,
+                                unsigned ColumnNo = 0);
+
+    /// CreateTemplateValueParameter - Create debugging information for template
+    /// value parameter.
+    /// @param Scope        Scope in which this type is defined.
+    /// @param Name         Value parameter name.
+    /// @param Ty           Parameter type.
+    /// @param Value        Constant parameter value.
+    /// @param File         File where this type parameter is defined.
+    /// @param LineNo       Line number.
+    /// @param ColumnNo     Column Number.
+    DITemplateValueParameter
+    CreateTemplateValueParameter(DIDescriptor Scope, StringRef Name, DIType Ty,
+                                 uint64_t Value,
+                                 MDNode *File = 0, unsigned LineNo = 0,
+                                 unsigned ColumnNo = 0);
+
+    /// CreateArrayType - Create debugging information entry for an array.
+    /// @param Size         Array size.
+    /// @param AlignInBits  Alignment.
+    /// @param Ty           Element type.
+    /// @param Subscripts   Subscripts.
+    DIType CreateArrayType(uint64_t Size, uint64_t AlignInBits, 
+                           DIType Ty, DIArray Subscripts);
+
+    /// CreateVectorType - Create debugging information entry for a vector type.
+    /// @param Size         Array size.
+    /// @param AlignInBits  Alignment.
+    /// @param Ty           Element type.
+    /// @param Subscripts   Subscripts.
+    DIType CreateVectorType(uint64_t Size, uint64_t AlignInBits, 
+                            DIType Ty, DIArray Subscripts);
+
+    /// CreateEnumerationType - Create debugging information entry for an 
+    /// enumeration.
+    /// @param Scope        Scope in which this enumeration is defined.
+    /// @param Name         Union name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param Elements     Enumeration elements.
+    DIType CreateEnumerationType(DIDescriptor Scope, StringRef Name, 
+                                 DIFile File, unsigned LineNumber, 
+                                 uint64_t SizeInBits, 
+                                 uint64_t AlignInBits, DIArray Elements);
+
+    /// CreateSubroutineType - Create subroutine type.
+    /// @param File          File in which this subroutine is defined.
+    /// @param ParamterTypes An array of subroutine parameter types. This
+    ///                      includes return type at 0th index.
+    DIType CreateSubroutineType(DIFile File, DIArray ParameterTypes);
+
+    /// CreateArtificialType - Create a new DIType with "artificial" flag set.
+    DIType CreateArtificialType(DIType Ty);
+
+    /// CreateTemporaryType - Create a temporary forward-declared type.
+    DIType CreateTemporaryType();
+    DIType CreateTemporaryType(DIFile F);
+
+    /// RetainType - Retain DIType in a module even if it is not referenced 
+    /// through debug info anchors.
+    void RetainType(DIType T);
+
+    /// CreateUnspecifiedParameter - Create unspeicified type descriptor
+    /// for a subroutine type.
+    DIDescriptor CreateUnspecifiedParameter();
+
+    /// GetOrCreateArray - Get a DIArray, create one if required.
+    DIArray GetOrCreateArray(Value *const *Elements, unsigned NumElements);
+
+    /// GetOrCreateSubrange - Create a descriptor for a value range.  This
+    /// implicitly uniques the values returned.
+    DISubrange GetOrCreateSubrange(int64_t Lo, int64_t Hi);
+
+    /// CreateGlobalVariable - Create a new descriptor for the specified global.
+    /// @param Name        Name of the variable.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type.
+    /// @param isLocalToUnit Boolean flag indicate whether this variable is
+    ///                      externally visible or not.
+    /// @param Val         llvm::Value of the variable.
+    DIGlobalVariable
+    CreateGlobalVariable(StringRef Name, DIFile File, unsigned LineNo,
+                         DIType Ty, bool isLocalToUnit, llvm::Value *Val);
+
+
+    /// CreateStaticVariable - Create a new descriptor for the specified 
+    /// variable.
+    /// @param Conext      Variable scope. 
+    /// @param Name        Name of the variable.
+    /// @param LinakgeName Mangled  name of the variable.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type.
+    /// @param isLocalToUnit Boolean flag indicate whether this variable is
+    ///                      externally visible or not.
+    /// @param Val         llvm::Value of the variable.
+    DIGlobalVariable
+    CreateStaticVariable(DIDescriptor Context, StringRef Name, 
+                         StringRef LinkageName, DIFile File, unsigned LineNo, 
+                         DIType Ty, bool isLocalToUnit, llvm::Value *Val);
+
+
+    /// CreateLocalVariable - Create a new descriptor for the specified 
+    /// local variable.
+    /// @param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
+    ///                    DW_TAG_arg_variable.
+    /// @param Scope       Variable scope.
+    /// @param Name        Variable name.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type
+    /// @param AlwaysPreserve Boolean. Set to true if debug info for this
+    ///                       variable should be preserved in optimized build.
+    /// @param Flags          Flags, e.g. artificial variable.
+    DIVariable CreateLocalVariable(unsigned Tag, DIDescriptor Scope,
+                                   StringRef Name,
+                                   DIFile File, unsigned LineNo,
+                                   DIType Ty, bool AlwaysPreserve = false,
+                                   unsigned Flags = 0);
+
+
+    /// CreateComplexVariable - Create a new descriptor for the specified
+    /// variable which has a complex address expression for its address.
+    /// @param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
+    ///                    DW_TAG_arg_variable.
+    /// @param Scope       Variable scope.
+    /// @param Name        Variable name.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type
+    /// @param Addr        A pointer to a vector of complex address operations.
+    /// @param NumAddr     Num of address operations in the vector.
+    DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Scope,
+                                     StringRef Name, DIFile F, unsigned LineNo,
+                                     DIType Ty, Value *const *Addr,
+                                     unsigned NumAddr);
+
+    /// CreateFunction - Create a new descriptor for the specified subprogram.
+    /// See comments in DISubprogram for descriptions of these fields.
+    /// @param Scope         Function scope.
+    /// @param Name          Function name.
+    /// @param LinkageName   Mangled function name.
+    /// @param File          File where this variable is defined.
+    /// @param LineNo        Line number.
+    /// @param Ty            Function type.
+    /// @param isLocalToUnit True if this function is not externally visible..
+    /// @param isDefinition  True if this is a function definition.
+    /// @param Flags         e.g. is this function prototyped or not.
+    ///                      This flags are used to emit dwarf attributes.
+    /// @param isOptimized   True if optimization is ON.
+    /// @param Fn            llvm::Function pointer.
+    DISubprogram CreateFunction(DIDescriptor Scope, StringRef Name,
+                                StringRef LinkageName,
+                                DIFile File, unsigned LineNo,
+                                DIType Ty, bool isLocalToUnit,
+                                bool isDefinition,
+                                unsigned Flags = 0,
+                                bool isOptimized = false,
+                                Function *Fn = 0);
+
+    /// CreateMethod - Create a new descriptor for the specified C++ method.
+    /// See comments in DISubprogram for descriptions of these fields.
+    /// @param Scope         Function scope.
+    /// @param Name          Function name.
+    /// @param LinkageName   Mangled function name.
+    /// @param File          File where this variable is defined.
+    /// @param LineNo        Line number.
+    /// @param Ty            Function type.
+    /// @param isLocalToUnit True if this function is not externally visible..
+    /// @param isDefinition  True if this is a function definition.
+    /// @param Virtuality    Attributes describing virutallness. e.g. pure 
+    ///                      virtual function.
+    /// @param VTableIndex   Index no of this method in virtual table.
+    /// @param VTableHolder  Type that holds vtable.
+    /// @param Flags         e.g. is this function prototyped or not.
+    ///                      This flags are used to emit dwarf attributes.
+    /// @param isOptimized   True if optimization is ON.
+    /// @param Fn            llvm::Function pointer.
+    DISubprogram CreateMethod(DIDescriptor Scope, StringRef Name,
+                              StringRef LinkageName,
+                              DIFile File, unsigned LineNo,
+                              DIType Ty, bool isLocalToUnit,
+                              bool isDefinition,
+                              unsigned Virtuality = 0, unsigned VTableIndex = 0,
+                              MDNode *VTableHolder = 0,
+                              unsigned Flags = 0,
+                              bool isOptimized = false,
+                              Function *Fn = 0);
+
+    /// CreateNameSpace - This creates new descriptor for a namespace
+    /// with the specified parent scope.
+    /// @param Scope       Namespace scope
+    /// @param Name        Name of this namespace
+    /// @param File        Source file
+    /// @param LineNo      Line number
+    DINameSpace CreateNameSpace(DIDescriptor Scope, StringRef Name,
+                                DIFile File, unsigned LineNo);
+
+
+    /// CreateLexicalBlock - This creates a descriptor for a lexical block
+    /// with the specified parent context.
+    /// @param Scope       Parent lexical scope.
+    /// @param File        Source file
+    /// @param Line        Line number
+    /// @param Col         Column number
+    DILexicalBlock CreateLexicalBlock(DIDescriptor Scope, DIFile File,
+                                      unsigned Line, unsigned Col);
+
+    /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+    /// @param Storage     llvm::Value of the variable
+    /// @param VarInfo     Variable's debug info descriptor.
+    /// @param InsertAtEnd Location for the new intrinsic.
+    Instruction *InsertDeclare(llvm::Value *Storage, DIVariable VarInfo,
+                               BasicBlock *InsertAtEnd);
+
+    /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+    /// @param Storage      llvm::Value of the variable
+    /// @param VarInfo      Variable's debug info descriptor.
+    /// @param InsertBefore Location for the new intrinsic.
+    Instruction *InsertDeclare(llvm::Value *Storage, DIVariable VarInfo,
+                               Instruction *InsertBefore);
+
+
+    /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+    /// @param Val          llvm::Value of the variable
+    /// @param Offset       Offset
+    /// @param VarInfo      Variable's debug info descriptor.
+    /// @param InsertAtEnd Location for the new intrinsic.
+    Instruction *InsertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
+                                         DIVariable VarInfo, 
+                                         BasicBlock *InsertAtEnd);
+    
+    /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+    /// @param Val          llvm::Value of the variable
+    /// @param Offset       Offset
+    /// @param VarInfo      Variable's debug info descriptor.
+    /// @param InsertBefore Location for the new intrinsic.
+    Instruction *InsertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
+                                         DIVariable VarInfo, 
+                                         Instruction *InsertBefore);
+
+  };
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index d8daf5196fca..30741c4970ab 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -67,7 +67,7 @@ struct DOTGraphTraitsPrinter : public FunctionPass {
     Title = GraphName + " for '" + F.getNameStr() + "' function";
 
     if (ErrorInfo.empty())
-      WriteGraph(File, Graph, Simple, Name, Title);
+      WriteGraph(File, Graph, Simple, Title);
     else
       errs() << "  error opening file for writing!";
     errs() << "\n";
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 2d1418da64d8..aa69088b425b 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
   class DbgDeclareInst;
   class Instruction;
   class MDNode;
+  class NamedMDNode;
   class LLVMContext;
   class raw_ostream;
 
@@ -46,6 +47,18 @@ namespace llvm {
   /// This should not be stored in a container, because underly MDNode may
   /// change in certain situations.
   class DIDescriptor {
+  public:
+    enum {
+      FlagPrivate          = 1 << 0,
+      FlagProtected        = 1 << 1,
+      FlagFwdDecl          = 1 << 2,
+      FlagAppleBlock       = 1 << 3,
+      FlagBlockByrefStruct = 1 << 4,
+      FlagVirtual          = 1 << 5,
+      FlagArtificial       = 1 << 6,
+      FlagExplicit         = 1 << 7,
+      FlagPrototyped       = 1 << 8
+    };
   protected:
     const MDNode *DbgNode;
 
@@ -108,6 +121,9 @@ namespace llvm {
     bool isEnumerator() const;
     bool isType() const;
     bool isGlobal() const;
+    bool isUnspecifiedParameter() const;
+    bool isTemplateTypeParameter() const;
+    bool isTemplateValueParameter() const;
   };
 
   /// DISubrange - This is used to represent ranges, for array bounds.
@@ -160,8 +176,8 @@ namespace llvm {
     /// module does not contain any main compile unit then the code generator
     /// will emit multiple compile units in the output object file.
 
-    bool isMain() const                { return getUnsignedField(6); }
-    bool isOptimized() const           { return getUnsignedField(7); }
+    bool isMain() const                { return getUnsignedField(6) != 0; }
+    bool isOptimized() const           { return getUnsignedField(7) != 0; }
     StringRef getFlags() const       { return getStringField(8);   }
     unsigned getRunTimeVersion() const { return getUnsignedField(9); }
 
@@ -203,17 +219,6 @@ namespace llvm {
   /// others do not require a huge and empty descriptor full of zeros.
   class DIType : public DIScope {
   public:
-    enum {
-      FlagPrivate          = 1 << 0,
-      FlagProtected        = 1 << 1,
-      FlagFwdDecl          = 1 << 2,
-      FlagAppleBlock       = 1 << 3,
-      FlagBlockByrefStruct = 1 << 4,
-      FlagVirtual          = 1 << 5,
-      FlagArtificial       = 1 << 6  // To identify artificial arguments in
-                                     // a subroutine type. e.g. "this" in c++.
-    };
-
   protected:
     // This ctor is used when the Tag has already been validated by a derived
     // ctor.
@@ -231,12 +236,12 @@ namespace llvm {
     DIScope getContext() const          { return getFieldAs<DIScope>(1); }
     StringRef getName() const           { return getStringField(2);     }
     DICompileUnit getCompileUnit() const{ 
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getFieldAs<DICompileUnit>(3);
-
-      DIFile F = getFieldAs<DIFile>(3);
-      return F.getCompileUnit();
+     if (getVersion() == llvm::LLVMDebugVersion7)
+       return getFieldAs<DICompileUnit>(3);
+     
+     return getFieldAs<DIFile>(3).getCompileUnit();
     }
+    DIFile getFile() const              { return getFieldAs<DIFile>(3); }
     unsigned getLineNumber() const      { return getUnsignedField(4); }
     uint64_t getSizeInBits() const      { return getUInt64Field(5); }
     uint64_t getAlignInBits() const     { return getUInt64Field(6); }
@@ -269,12 +274,23 @@ namespace llvm {
     bool isValid() const {
       return DbgNode && (isBasicType() || isDerivedType() || isCompositeType());
     }
-    StringRef getFilename() const    { return getCompileUnit().getFilename();}
-    StringRef getDirectory() const   { return getCompileUnit().getDirectory();}
+    StringRef getDirectory() const  { 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getCompileUnit().getDirectory();
+
+      return getFieldAs<DIFile>(3).getDirectory();
+    }
+    StringRef getFilename() const  { 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getCompileUnit().getFilename();
+
+      return getFieldAs<DIFile>(3).getFilename();
+    }
 
     /// replaceAllUsesWith - Replace all uses of debug info referenced by
     /// this descriptor.
     void replaceAllUsesWith(DIDescriptor &D);
+    void replaceAllUsesWith(MDNode *D);
 
     /// print - print type.
     void print(raw_ostream &OS) const;
@@ -342,6 +358,7 @@ namespace llvm {
     DICompositeType getContainingType() const {
       return getFieldAs<DICompositeType>(12);
     }
+    DIArray getTemplateParams() const { return getFieldAs<DIArray>(13); }
 
     /// Verify - Verify that a composite type descriptor is well formed.
     bool Verify() const;
@@ -353,6 +370,43 @@ namespace llvm {
     void dump() const;
   };
 
+  /// DITemplateTypeParameter - This is a wrapper for template type parameter.
+  class DITemplateTypeParameter : public DIDescriptor {
+  public:
+    explicit DITemplateTypeParameter(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    DIScope getContext() const       { return getFieldAs<DIScope>(1); }
+    StringRef getName() const        { return getStringField(2); }
+    DIType getType() const           { return getFieldAs<DIType>(3); }
+    StringRef getFilename() const    { 
+      return getFieldAs<DIFile>(4).getFilename();
+    }
+    StringRef getDirectory() const   { 
+      return getFieldAs<DIFile>(4).getDirectory();
+    }
+    unsigned getLineNumber() const   { return getUnsignedField(5); }
+    unsigned getColumnNumber() const { return getUnsignedField(6); }
+  };
+
+  /// DITemplateValueParameter - This is a wrapper for template value parameter.
+  class DITemplateValueParameter : public DIDescriptor {
+  public:
+    explicit DITemplateValueParameter(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    DIScope getContext() const       { return getFieldAs<DIScope>(1); }
+    StringRef getName() const        { return getStringField(2); }
+    DIType getType() const           { return getFieldAs<DIType>(3); }
+    uint64_t getValue() const         { return getUInt64Field(4); }
+    StringRef getFilename() const    { 
+      return getFieldAs<DIFile>(5).getFilename();
+    }
+    StringRef getDirectory() const   { 
+      return getFieldAs<DIFile>(5).getDirectory();
+    }
+    unsigned getLineNumber() const   { return getUnsignedField(6); }
+    unsigned getColumnNumber() const { return getUnsignedField(7); }
+  };
+
   /// DISubprogram - This is a wrapper for a subprogram (e.g. a function).
   class DISubprogram : public DIScope {
   public:
@@ -366,8 +420,7 @@ namespace llvm {
       if (getVersion() == llvm::LLVMDebugVersion7)
         return getFieldAs<DICompileUnit>(6);
 
-      DIFile F = getFieldAs<DIFile>(6); 
-      return F.getCompileUnit();
+      return getFieldAs<DIFile>(6).getCompileUnit(); 
     }
     unsigned getLineNumber() const      { return getUnsignedField(7); }
     DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
@@ -396,23 +449,52 @@ namespace llvm {
     DICompositeType getContainingType() const {
       return getFieldAs<DICompositeType>(13);
     }
-    unsigned isArtificial() const    { return getUnsignedField(14); }
+    unsigned isArtificial() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return getUnsignedField(14); 
+      return (getUnsignedField(14) & FlagArtificial) != 0;
+    }
+    /// isPrivate - Return true if this subprogram has "private"
+    /// access specifier.
+    bool isPrivate() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagPrivate) != 0;
+    }
+    /// isProtected - Return true if this subprogram has "protected"
+    /// access specifier.
+    bool isProtected() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagProtected) != 0;
+    }
+    /// isExplicit - Return true if this subprogram is marked as explicit.
+    bool isExplicit() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagExplicit) != 0;
+    }
+    /// isPrototyped - Return true if this subprogram is prototyped.
+    bool isPrototyped() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagPrototyped) != 0;
+    }
+
     unsigned isOptimized() const;
 
     StringRef getFilename() const    { 
       if (getVersion() == llvm::LLVMDebugVersion7)
         return getCompileUnit().getFilename();
 
-      DIFile F = getFieldAs<DIFile>(6); 
-      return F.getFilename();
+      return getFieldAs<DIFile>(6).getFilename(); 
     }
 
     StringRef getDirectory() const   { 
       if (getVersion() == llvm::LLVMDebugVersion7)
         return getCompileUnit().getFilename();
 
-      DIFile F = getFieldAs<DIFile>(6); 
-      return F.getDirectory();
+      return getFieldAs<DIFile>(6).getDirectory(); 
     }
 
     /// Verify - Verify that a subprogram descriptor is well formed.
@@ -484,6 +566,13 @@ namespace llvm {
     }
     unsigned getLineNumber() const      { return getUnsignedField(4); }
     DIType getType() const              { return getFieldAs<DIType>(5); }
+    
+    /// isArtificial - Return true if this variable is marked as "artificial".
+    bool isArtificial() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(6) & FlagArtificial) != 0;
+    }
 
 
     /// Verify - Verify that a variable descriptor is well formed.
@@ -525,13 +614,11 @@ namespace llvm {
     unsigned getLineNumber() const   { return getUnsignedField(2);         }
     unsigned getColumnNumber() const { return getUnsignedField(3);         }
     StringRef getDirectory() const {
-      DIFile F = getFieldAs<DIFile>(4);
-      StringRef dir = F.getDirectory();
+      StringRef dir = getFieldAs<DIFile>(4).getDirectory();
       return !dir.empty() ? dir : getContext().getDirectory();
     }
     StringRef getFilename() const {
-      DIFile F = getFieldAs<DIFile>(4);
-      StringRef filename = F.getFilename();
+      StringRef filename = getFieldAs<DIFile>(4).getFilename();
       return !filename.empty() ? filename : getContext().getFilename();
     }
   };
@@ -542,14 +629,17 @@ namespace llvm {
     explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
     DIScope getContext() const     { return getFieldAs<DIScope>(1);      }
     StringRef getName() const      { return getStringField(2);           }
-    StringRef getDirectory() const { return getContext().getDirectory(); }
-    StringRef getFilename() const  { return getContext().getFilename();  }
+    StringRef getDirectory() const  { 
+      return getFieldAs<DIFile>(3).getDirectory();
+    }
+    StringRef getFilename() const  { 
+      return getFieldAs<DIFile>(3).getFilename();
+    }
     DICompileUnit getCompileUnit() const{ 
       if (getVersion() == llvm::LLVMDebugVersion7)
         return getFieldAs<DICompileUnit>(3);
 
-      DIFile F = getFieldAs<DIFile>(3); 
-      return F.getCompileUnit();
+      return getFieldAs<DIFile>(3).getCompileUnit(); 
     }
     unsigned getLineNumber() const { return getUnsignedField(4);         }
     bool Verify() const;
@@ -594,6 +684,10 @@ namespace llvm {
     /// implicitly uniques the values returned.
     DISubrange GetOrCreateSubrange(int64_t Lo, int64_t Hi);
 
+    /// CreateUnspecifiedParameter - Create unspeicified type descriptor
+    /// for a subroutine type.
+    DIDescriptor CreateUnspecifiedParameter();
+
     /// CreateCompileUnit - Create a new descriptor for the specified compile
     /// unit.
     DICompileUnit CreateCompileUnit(unsigned LangID,
@@ -662,6 +756,7 @@ namespace llvm {
 
     /// CreateTemporaryType - Create a temporary forward-declared type.
     DIType CreateTemporaryType();
+    DIType CreateTemporaryType(DIFile F);
 
     /// CreateArtificialType - Create a new DIType with "artificial" flag set.
     DIType CreateArtificialType(DIType Ty);
@@ -690,8 +785,8 @@ namespace llvm {
                                   bool isDefinition,
                                   unsigned VK = 0,
                                   unsigned VIndex = 0,
-                                  DIType = DIType(),
-                                  bool isArtificial = 0,
+                                  DIType ContainingType = DIType(),
+                                  unsigned Flags = 0,
                                   bool isOptimized = false,
                                   Function *Fn = 0);
 
@@ -721,15 +816,15 @@ namespace llvm {
     DIVariable CreateVariable(unsigned Tag, DIDescriptor Context,
                               StringRef Name,
                               DIFile F, unsigned LineNo,
-                              DIType Ty, bool AlwaysPreserve = false);
+                              DIType Ty, bool AlwaysPreserve = false,
+                              unsigned Flags = 0);
 
     /// CreateComplexVariable - Create a new descriptor for the specified
     /// variable which has a complex address expression for its address.
     DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Context,
-                                     const std::string &Name,
-                                     DIFile F, unsigned LineNo,
-                                     DIType Ty,
-                                     SmallVector<Value *, 9> &addr);
+                                     StringRef Name, DIFile F, unsigned LineNo,
+                                     DIType Ty, Value *const *Addr,
+                                     unsigned NumAddr);
 
     /// CreateLexicalBlock - This creates a descriptor for a lexical block
     /// with the specified parent context.
@@ -764,20 +859,29 @@ namespace llvm {
     /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
     Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset,
                                        DIVariable D, Instruction *InsertBefore);
+
+    // RecordType - Record DIType in a module such that it is not lost even if
+    // it is not referenced through debug info anchors.
+    void RecordType(DIType T);
+
   private:
     Constant *GetTagConstant(unsigned TAG);
   };
 
-  bool getLocationInfo(const Value *V, std::string &DisplayName,
-                       std::string &Type, unsigned &LineNo, std::string &File,
-                       std::string &Dir);
-
   /// getDISubprogram - Find subprogram that is enclosing this scope.
   DISubprogram getDISubprogram(const MDNode *Scope);
 
   /// getDICompositeType - Find underlying composite type.
   DICompositeType getDICompositeType(DIType T);
 
+  /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+  /// to hold function specific information.
+  NamedMDNode *getOrInsertFnSpecificMDNode(Module &M, StringRef Name);
+
+  /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
+  /// suitable to hold function specific information.
+  NamedMDNode *getFnSpecificMDNode(const Module &M, StringRef Name);
+
   class DebugInfoFinder {
   public:
     /// processModule - Process entire module and collect debug info
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
new file mode 100644
index 000000000000..d7f74af1c65c
--- /dev/null
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -0,0 +1,189 @@
+//===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DominanceFrontier class, which calculate and holds the
+// dominance frontier for a function.
+//
+// This should be considered deprecated, don't add any more uses of this data
+// structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOMINANCEFRONTIER_H
+#define LLVM_ANALYSIS_DOMINANCEFRONTIER_H
+
+#include "llvm/Analysis/Dominators.h"
+#include <map>
+#include <set>
+
+namespace llvm {
+  
+//===----------------------------------------------------------------------===//
+/// DominanceFrontierBase - Common base class for computing forward and inverse
+/// dominance frontiers for a function.
+///
+class DominanceFrontierBase : public FunctionPass {
+public:
+  typedef std::set<BasicBlock*>             DomSetType;    // Dom set for a bb
+  typedef std::map<BasicBlock*, DomSetType> DomSetMapType; // Dom set map
+protected:
+  DomSetMapType Frontiers;
+  std::vector<BasicBlock*> Roots;
+  const bool IsPostDominators;
+
+public:
+  DominanceFrontierBase(char &ID, bool isPostDom)
+    : FunctionPass(ID), IsPostDominators(isPostDom) {}
+
+  /// getRoots - Return the root blocks of the current CFG.  This may include
+  /// multiple blocks if we are computing post dominators.  For forward
+  /// dominators, this will always be a single block (the entry node).
+  ///
+  inline const std::vector<BasicBlock*> &getRoots() const { return Roots; }
+
+  /// isPostDominator - Returns true if analysis based of postdoms
+  ///
+  bool isPostDominator() const { return IsPostDominators; }
+
+  virtual void releaseMemory() { Frontiers.clear(); }
+
+  // Accessor interface:
+  typedef DomSetMapType::iterator iterator;
+  typedef DomSetMapType::const_iterator const_iterator;
+  iterator       begin()       { return Frontiers.begin(); }
+  const_iterator begin() const { return Frontiers.begin(); }
+  iterator       end()         { return Frontiers.end(); }
+  const_iterator end()   const { return Frontiers.end(); }
+  iterator       find(BasicBlock *B)       { return Frontiers.find(B); }
+  const_iterator find(BasicBlock *B) const { return Frontiers.find(B); }
+
+  iterator addBasicBlock(BasicBlock *BB, const DomSetType &frontier) {
+    assert(find(BB) == end() && "Block already in DominanceFrontier!");
+    return Frontiers.insert(std::make_pair(BB, frontier)).first;
+  }
+
+  /// removeBlock - Remove basic block BB's frontier.
+  void removeBlock(BasicBlock *BB) {
+    assert(find(BB) != end() && "Block is not in DominanceFrontier!");
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      I->second.erase(BB);
+    Frontiers.erase(BB);
+  }
+
+  void addToFrontier(iterator I, BasicBlock *Node) {
+    assert(I != end() && "BB is not in DominanceFrontier!");
+    I->second.insert(Node);
+  }
+
+  void removeFromFrontier(iterator I, BasicBlock *Node) {
+    assert(I != end() && "BB is not in DominanceFrontier!");
+    assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB");
+    I->second.erase(Node);
+  }
+
+  /// compareDomSet - Return false if two domsets match. Otherwise
+  /// return true;
+  bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const {
+    std::set<BasicBlock *> tmpSet;
+    for (DomSetType::const_iterator I = DS2.begin(),
+           E = DS2.end(); I != E; ++I)
+      tmpSet.insert(*I);
+
+    for (DomSetType::const_iterator I = DS1.begin(),
+           E = DS1.end(); I != E; ) {
+      BasicBlock *Node = *I++;
+
+      if (tmpSet.erase(Node) == 0)
+        // Node is in DS1 but not in DS2.
+        return true;
+    }
+
+    if (!tmpSet.empty())
+      // There are nodes that are in DS2 but not in DS1.
+      return true;
+
+    // DS1 and DS2 matches.
+    return false;
+  }
+
+  /// compare - Return true if the other dominance frontier base matches
+  /// this dominance frontier base. Otherwise return false.
+  bool compare(DominanceFrontierBase &Other) const {
+    DomSetMapType tmpFrontiers;
+    for (DomSetMapType::const_iterator I = Other.begin(),
+           E = Other.end(); I != E; ++I)
+      tmpFrontiers.insert(std::make_pair(I->first, I->second));
+
+    for (DomSetMapType::iterator I = tmpFrontiers.begin(),
+           E = tmpFrontiers.end(); I != E; ) {
+      BasicBlock *Node = I->first;
+      const_iterator DFI = find(Node);
+      if (DFI == end())
+        return true;
+
+      if (compareDomSet(I->second, DFI->second))
+        return true;
+
+      ++I;
+      tmpFrontiers.erase(Node);
+    }
+
+    if (!tmpFrontiers.empty())
+      return true;
+
+    return false;
+  }
+
+  /// print - Convert to human readable form
+  ///
+  virtual void print(raw_ostream &OS, const Module* = 0) const;
+
+  /// dump - Dump the dominance frontier to dbgs().
+  void dump() const;
+};
+
+
+//===-------------------------------------
+/// DominanceFrontier Class - Concrete subclass of DominanceFrontierBase that is
+/// used to compute a forward dominator frontiers.
+///
+class DominanceFrontier : public DominanceFrontierBase {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  DominanceFrontier() :
+    DominanceFrontierBase(ID, false) {
+      initializeDominanceFrontierPass(*PassRegistry::getPassRegistry());
+    }
+
+  BasicBlock *getRoot() const {
+    assert(Roots.size() == 1 && "Should always have entry node!");
+    return Roots[0];
+  }
+
+  virtual bool runOnFunction(Function &) {
+    Frontiers.clear();
+    DominatorTree &DT = getAnalysis<DominatorTree>();
+    Roots = DT.getRoots();
+    assert(Roots.size() == 1 && "Only one entry block for forward domfronts!");
+    calculate(DT, DT[Roots[0]]);
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<DominatorTree>();
+  }
+
+  const DomSetType &calculate(const DominatorTree &DT,
+                              const DomTreeNode *Node);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h
index 0419688a53bd..ae552b05abff 100644
--- a/include/llvm/Analysis/DominatorInternals.h
+++ b/include/llvm/Analysis/DominatorInternals.h
@@ -22,13 +22,9 @@
 //   A Fast Algorithm for Finding Dominators in a Flowgraph
 //   T. Lengauer & R. Tarjan, ACM TOPLAS July 1979, pgs 121-141.
 //
-// This implements both the O(n*ack(n)) and the O(n*log(n)) versions of EVAL and
-// LINK, but it turns out that the theoretically slower O(n*log(n))
-// implementation is actually faster than the "efficient" algorithm (even for
-// large CFGs) because the constant overheads are substantially smaller.  The
-// lower-complexity version can be enabled with the following #define:
-//
-#define BALANCE_IDOM_TREE 0
+// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
+// out that the theoretically slower O(n*log(n)) implementation is actually
+// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
 //
 //===----------------------------------------------------------------------===//
 
@@ -46,9 +42,6 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
   VInfo.Label = V;
 
   Vertex.push_back(V);        // Vertex[n] = V;
-  //Info[V].Ancestor = 0;     // Ancestor[n] = 0
-  //Info[V].Child = 0;        // Child[v] = 0
-  VInfo.Size = 1;             // Size[v] = 1
 
   for (succ_iterator SI = succ_begin(V), E = succ_end(V); SI != E; ++SI) {
     InfoRec &SuccVInfo = DT.Info[*SI];
@@ -58,10 +51,10 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
     }
   }
 #else
-  bool IsChilOfArtificialExit = (N != 0);
+  bool IsChildOfArtificialExit = (N != 0);
 
-  std::vector<std::pair<typename GraphT::NodeType*,
-                        typename GraphT::ChildIteratorType> > Worklist;
+  SmallVector<std::pair<typename GraphT::NodeType*,
+                        typename GraphT::ChildIteratorType>, 32> Worklist;
   Worklist.push_back(std::make_pair(V, GraphT::child_begin(V)));
   while (!Worklist.empty()) {
     typename GraphT::NodeType* BB = Worklist.back().first;
@@ -76,14 +69,11 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
       BBInfo.Label = BB;
 
       DT.Vertex.push_back(BB);       // Vertex[n] = V;
-      //BBInfo[V].Ancestor = 0;   // Ancestor[n] = 0
-      //BBInfo[V].Child = 0;      // Child[v] = 0
-      BBInfo.Size = 1;            // Size[v] = 1
 
-      if (IsChilOfArtificialExit)
+      if (IsChildOfArtificialExit)
         BBInfo.Parent = 1;
 
-      IsChilOfArtificialExit = false;
+      IsChildOfArtificialExit = false;
     }
 
     // store the DFS number of the current BB - the reference to BBInfo might
@@ -114,118 +104,47 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
 }
 
 template<class GraphT>
-void Compress(DominatorTreeBase<typename GraphT::NodeType>& DT,
-              typename GraphT::NodeType *VIn) {
-  std::vector<typename GraphT::NodeType*> Work;
+typename GraphT::NodeType* 
+Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
+     typename GraphT::NodeType *VIn, unsigned LastLinked) {
+  typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInInfo =
+                                                                  DT.Info[VIn];
+  if (VInInfo.DFSNum < LastLinked)
+    return VIn;
+
+  SmallVector<typename GraphT::NodeType*, 32> Work;
   SmallPtrSet<typename GraphT::NodeType*, 32> Visited;
-  typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInVAInfo =
-                                      DT.Info[DT.Vertex[DT.Info[VIn].Ancestor]];
 
-  if (VInVAInfo.Ancestor != 0)
+  if (VInInfo.Parent >= LastLinked)
     Work.push_back(VIn);
   
   while (!Work.empty()) {
     typename GraphT::NodeType* V = Work.back();
     typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInfo =
                                                                      DT.Info[V];
-    typename GraphT::NodeType* VAncestor = DT.Vertex[VInfo.Ancestor];
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VAInfo =
-                                                             DT.Info[VAncestor];
+    typename GraphT::NodeType* VAncestor = DT.Vertex[VInfo.Parent];
 
     // Process Ancestor first
-    if (Visited.insert(VAncestor) &&
-        VAInfo.Ancestor != 0) {
+    if (Visited.insert(VAncestor) && VInfo.Parent >= LastLinked) {
       Work.push_back(VAncestor);
       continue;
     } 
     Work.pop_back(); 
 
     // Update VInfo based on Ancestor info
-    if (VAInfo.Ancestor == 0)
+    if (VInfo.Parent < LastLinked)
       continue;
+
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VAInfo =
+                                                             DT.Info[VAncestor];
     typename GraphT::NodeType* VAncestorLabel = VAInfo.Label;
     typename GraphT::NodeType* VLabel = VInfo.Label;
     if (DT.Info[VAncestorLabel].Semi < DT.Info[VLabel].Semi)
       VInfo.Label = VAncestorLabel;
-    VInfo.Ancestor = VAInfo.Ancestor;
-  }
-}
-
-template<class GraphT>
-typename GraphT::NodeType* 
-Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
-     typename GraphT::NodeType *V) {
-  typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInfo =
-                                                                     DT.Info[V];
-#if !BALANCE_IDOM_TREE
-  // Higher-complexity but faster implementation
-  if (VInfo.Ancestor == 0)
-    return V;
-  Compress<GraphT>(DT, V);
-  return VInfo.Label;
-#else
-  // Lower-complexity but slower implementation
-  if (VInfo.Ancestor == 0)
-    return VInfo.Label;
-  Compress<GraphT>(DT, V);
-  GraphT::NodeType* VLabel = VInfo.Label;
-
-  GraphT::NodeType* VAncestorLabel = DT.Info[VInfo.Ancestor].Label;
-  if (DT.Info[VAncestorLabel].Semi >= DT.Info[VLabel].Semi)
-    return VLabel;
-  else
-    return VAncestorLabel;
-#endif
-}
-
-template<class GraphT>
-void Link(DominatorTreeBase<typename GraphT::NodeType>& DT,
-          unsigned DFSNumV, typename GraphT::NodeType* W,
-        typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &WInfo) {
-#if !BALANCE_IDOM_TREE
-  // Higher-complexity but faster implementation
-  WInfo.Ancestor = DFSNumV;
-#else
-  // Lower-complexity but slower implementation
-  GraphT::NodeType* WLabel = WInfo.Label;
-  unsigned WLabelSemi = DT.Info[WLabel].Semi;
-  GraphT::NodeType* S = W;
-  InfoRec *SInfo = &DT.Info[S];
-
-  GraphT::NodeType* SChild = SInfo->Child;
-  InfoRec *SChildInfo = &DT.Info[SChild];
-
-  while (WLabelSemi < DT.Info[SChildInfo->Label].Semi) {
-    GraphT::NodeType* SChildChild = SChildInfo->Child;
-    if (SInfo->Size+DT.Info[SChildChild].Size >= 2*SChildInfo->Size) {
-      SChildInfo->Ancestor = S;
-      SInfo->Child = SChild = SChildChild;
-      SChildInfo = &DT.Info[SChild];
-    } else {
-      SChildInfo->Size = SInfo->Size;
-      S = SInfo->Ancestor = SChild;
-      SInfo = SChildInfo;
-      SChild = SChildChild;
-      SChildInfo = &DT.Info[SChild];
-    }
+    VInfo.Parent = VAInfo.Parent;
   }
 
-  DominatorTreeBase::InfoRec &VInfo = DT.Info[V];
-  SInfo->Label = WLabel;
-
-  assert(V != W && "The optimization here will not work in this case!");
-  unsigned WSize = WInfo.Size;
-  unsigned VSize = (VInfo.Size += WSize);
-
-  if (VSize < 2*WSize)
-    std::swap(S, VInfo.Child);
-
-  while (S) {
-    SInfo = &DT.Info[S];
-    SInfo->Ancestor = V;
-    S = SInfo->Child;
-  }
-#endif
+  return VInInfo.Label;
 }
 
 template<class FuncT, class NodeT>
@@ -242,9 +161,6 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
     BBInfo.Label = NULL;
 
     DT.Vertex.push_back(NULL);       // Vertex[n] = V;
-      //BBInfo[V].Ancestor = 0;   // Ancestor[n] = 0
-      //BBInfo[V].Child = 0;      // Child[v] = 0
-    BBInfo.Size = 1;            // Size[v] = 1
   }
 
   // Step #1: Number blocks in depth-first order and initialize variables used
@@ -257,12 +173,34 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
   // infinite loops). In these cases an artificial exit node is required.
   MultipleRoots |= (DT.isPostDominator() && N != F.size());
 
+  // When naively implemented, the Lengauer-Tarjan algorithm requires a separate
+  // bucket for each vertex. However, this is unnecessary, because each vertex
+  // is only placed into a single bucket (that of its semidominator), and each
+  // vertex's bucket is processed before it is added to any bucket itself.
+  //
+  // Instead of using a bucket per vertex, we use a single array Buckets that
+  // has two purposes. Before the vertex V with preorder number i is processed,
+  // Buckets[i] stores the index of the first element in V's bucket. After V's
+  // bucket is processed, Buckets[i] stores the index of the next element in the
+  // bucket containing V, if any.
+  SmallVector<unsigned, 32> Buckets;
+  Buckets.resize(N + 1);
+  for (unsigned i = 1; i <= N; ++i)
+    Buckets[i] = i;
+
   for (unsigned i = N; i >= 2; --i) {
     typename GraphT::NodeType* W = DT.Vertex[i];
     typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &WInfo =
                                                                      DT.Info[W];
 
-    // Step #2: Calculate the semidominators of all vertices
+    // Step #2: Implicitly define the immediate dominator of vertices
+    for (unsigned j = i; Buckets[j] != i; j = Buckets[j]) {
+      typename GraphT::NodeType* V = DT.Vertex[Buckets[j]];
+      typename GraphT::NodeType* U = Eval<GraphT>(DT, V, i + 1);
+      DT.IDoms[V] = DT.Info[U].Semi < i ? U : W;
+    }
+
+    // Step #3: Calculate the semidominators of all vertices
 
     // initialize the semi dominator to point to the parent node
     WInfo.Semi = WInfo.Parent;
@@ -272,25 +210,28 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
          E = InvTraits::child_end(W); CI != E; ++CI) {
       typename InvTraits::NodeType *N = *CI;
       if (DT.Info.count(N)) {  // Only if this predecessor is reachable!
-        unsigned SemiU = DT.Info[Eval<GraphT>(DT, N)].Semi;
+        unsigned SemiU = DT.Info[Eval<GraphT>(DT, N, i + 1)].Semi;
         if (SemiU < WInfo.Semi)
           WInfo.Semi = SemiU;
       }
     }
 
-    DT.Info[DT.Vertex[WInfo.Semi]].Bucket.push_back(W);
-
-    typename GraphT::NodeType* WParent = DT.Vertex[WInfo.Parent];
-    Link<GraphT>(DT, WInfo.Parent, W, WInfo);
+    // If V is a non-root vertex and sdom(V) = parent(V), then idom(V) is
+    // necessarily parent(V). In this case, set idom(V) here and avoid placing
+    // V into a bucket.
+    if (WInfo.Semi == WInfo.Parent) {
+      DT.IDoms[W] = DT.Vertex[WInfo.Parent];
+    } else {
+      Buckets[i] = Buckets[WInfo.Semi];
+      Buckets[WInfo.Semi] = i;
+    }
+  }
 
-    // Step #3: Implicitly define the immediate dominator of vertices
-    std::vector<typename GraphT::NodeType*> &WParentBucket =
-                                                        DT.Info[WParent].Bucket;
-    while (!WParentBucket.empty()) {
-      typename GraphT::NodeType* V = WParentBucket.back();
-      WParentBucket.pop_back();
-      typename GraphT::NodeType* U = Eval<GraphT>(DT, V);
-      DT.IDoms[V] = DT.Info[U].Semi < DT.Info[V].Semi ? U : WParent;
+  if (N >= 1) {
+    typename GraphT::NodeType* Root = DT.Vertex[1];
+    for (unsigned j = 1; Buckets[j] != 1; j = Buckets[j]) {
+      typename GraphT::NodeType* V = DT.Vertex[Buckets[j]];
+      DT.IDoms[V] = Root;
     }
   }
 
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 73c6e6286b5b..230e83d30121 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -7,14 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the following classes:
-//  1. DominatorTree: Represent dominators as an explicit tree structure.
-//  2. DominanceFrontier: Calculate and hold the dominance frontier for a
-//     function.
-//
-//  These data structures are listed in increasing order of complexity.  It
-//  takes longer to calculate the dominator frontier, for example, than the
-//  DominatorTree mapping.
+// This file defines the DominatorTree class, which provides fast and efficient
+// dominance queries.
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,19 +17,15 @@
 
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <map>
-#include <set>
 
 namespace llvm {
 
@@ -205,15 +195,11 @@ protected:
   // Information record used during immediate dominators computation.
   struct InfoRec {
     unsigned DFSNum;
+    unsigned Parent;
     unsigned Semi;
-    unsigned Size;
-    NodeT *Label, *Child;
-    unsigned Parent, Ancestor;
-
-    std::vector<NodeT*> Bucket;
+    NodeT *Label;
 
-    InfoRec() : DFSNum(0), Semi(0), Size(0), Label(0), Child(0), Parent(0),
-                Ancestor(0) {}
+    InfoRec() : DFSNum(0), Parent(0), Semi(0), Label(0) {}
   };
 
   DenseMap<NodeT*, NodeT*> IDoms;
@@ -303,9 +289,6 @@ public:
     : DominatorBase<NodeT>(isPostDom), DFSInfoValid(false), SlowQueries(0) {}
   virtual ~DominatorTreeBase() { reset(); }
 
-  // FIXME: Should remove this
-  virtual bool runOnFunction(Function &F) { return false; }
-
   /// compare - Return false if the other dominator tree base matches this
   /// dominator tree base. Otherwise return true.
   bool compare(DominatorTreeBase &Other) const {
@@ -361,8 +344,15 @@ public:
     return dominatedBySlowTreeWalk(A, B);
   }
 
-  inline bool properlyDominates(NodeT *A, NodeT *B) {
-    return properlyDominates(getNode(A), getNode(B));
+  inline bool properlyDominates(const NodeT *A, const NodeT *B) {
+    if (A == B)
+      return false;
+
+    // Cast away the const qualifiers here. This is ok since
+    // this function doesn't actually return the values returned
+    // from getNode.
+    return properlyDominates(getNode(const_cast<NodeT *>(A)),
+                             getNode(const_cast<NodeT *>(B)));
   }
 
   bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
@@ -377,7 +367,7 @@ public:
 
   /// isReachableFromEntry - Return true if A is dominated by the entry
   /// block of the function containing it.
-  bool isReachableFromEntry(NodeT* A) {
+  bool isReachableFromEntry(const NodeT* A) {
     assert(!this->isPostDominator() &&
            "This is not implemented for post dominators");
     return dominates(&A->getParent()->front(), A);
@@ -478,6 +468,13 @@ public:
     return NULL;
   }
 
+  const NodeT *findNearestCommonDominator(const NodeT *A, const NodeT *B) {
+    // Cast away the const qualifiers here. This is ok since
+    // const is re-introduced on the return type.
+    return findNearestCommonDominator(const_cast<NodeT *>(A),
+                                      const_cast<NodeT *>(B));
+  }
+
   //===--------------------------------------------------------------------===//
   // API to update (Post)DominatorTree information based on modifications to
   // the CFG...
@@ -509,7 +506,7 @@ public:
   }
 
   /// eraseNode - Removes a node from the dominator tree. Block must not
-  /// domiante any other blocks. Removes node from its immediate dominator's
+  /// dominate any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   void eraseNode(NodeT *BB) {
     DomTreeNodeBase<NodeT> *Node = getNode(BB);
@@ -556,7 +553,7 @@ public:
       o << "Inorder PostDominator Tree: ";
     else
       o << "Inorder Dominator Tree: ";
-    if (this->DFSInfoValid)
+    if (!this->DFSInfoValid)
       o << "DFSNumbers invalid: " << SlowQueries << " slow queries.";
     o << "\n";
 
@@ -566,19 +563,11 @@ public:
   }
 
 protected:
-  template<class GraphT>
-  friend void Compress(DominatorTreeBase<typename GraphT::NodeType>& DT,
-                       typename GraphT::NodeType* VIn);
-
   template<class GraphT>
   friend typename GraphT::NodeType* Eval(
                                DominatorTreeBase<typename GraphT::NodeType>& DT,
-                                         typename GraphT::NodeType* V);
-
-  template<class GraphT>
-  friend void Link(DominatorTreeBase<typename GraphT::NodeType>& DT,
-                   unsigned DFSNumV, typename GraphT::NodeType* W,
-         typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &WInfo);
+                                         typename GraphT::NodeType* V,
+                                         unsigned LastLinked);
 
   template<class GraphT>
   friend unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
@@ -703,6 +692,7 @@ public:
   DominatorTreeBase<BasicBlock>* DT;
 
   DominatorTree() : FunctionPass(ID) {
+    initializeDominatorTreePass(*PassRegistry::getPassRegistry());
     DT = new DominatorTreeBase<BasicBlock>(false);
   }
 
@@ -751,7 +741,7 @@ public:
     AU.setPreservesAll();
   }
 
-  inline bool dominates(DomTreeNode* A, DomTreeNode* B) const {
+  inline bool dominates(const DomTreeNode* A, const DomTreeNode* B) const {
     return DT->dominates(A, B);
   }
 
@@ -767,7 +757,7 @@ public:
     return DT->properlyDominates(A, B);
   }
 
-  bool properlyDominates(BasicBlock *A, BasicBlock *B) const {
+  bool properlyDominates(const BasicBlock *A, const BasicBlock *B) const {
     return DT->properlyDominates(A, B);
   }
 
@@ -777,6 +767,11 @@ public:
     return DT->findNearestCommonDominator(A, B);
   }
 
+  inline const BasicBlock *findNearestCommonDominator(const BasicBlock *A,
+                                                      const BasicBlock *B) {
+    return DT->findNearestCommonDominator(A, B);
+  }
+
   inline DomTreeNode *operator[](BasicBlock *BB) const {
     return DT->getNode(BB);
   }
@@ -807,7 +802,7 @@ public:
   }
 
   /// eraseNode - Removes a node from the dominator tree. Block must not
-  /// domiante any other blocks. Removes node from its immediate dominator's
+  /// dominate any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   inline void eraseNode(BasicBlock *BB) {
     DT->eraseNode(BB);
@@ -819,7 +814,7 @@ public:
     DT->splitBlock(NewBB);
   }
 
-  bool isReachableFromEntry(BasicBlock* A) {
+  bool isReachableFromEntry(const BasicBlock* A) {
     return DT->isReachableFromEntry(A);
   }
 
@@ -876,194 +871,6 @@ template <> struct GraphTraits<DominatorTree*>
 };
 
 
-//===----------------------------------------------------------------------===//
-/// DominanceFrontierBase - Common base class for computing forward and inverse
-/// dominance frontiers for a function.
-///
-class DominanceFrontierBase : public FunctionPass {
-public:
-  typedef std::set<BasicBlock*>             DomSetType;    // Dom set for a bb
-  typedef std::map<BasicBlock*, DomSetType> DomSetMapType; // Dom set map
-protected:
-  DomSetMapType Frontiers;
-  std::vector<BasicBlock*> Roots;
-  const bool IsPostDominators;
-
-public:
-  DominanceFrontierBase(char &ID, bool isPostDom)
-    : FunctionPass(ID), IsPostDominators(isPostDom) {}
-
-  /// getRoots - Return the root blocks of the current CFG.  This may include
-  /// multiple blocks if we are computing post dominators.  For forward
-  /// dominators, this will always be a single block (the entry node).
-  ///
-  inline const std::vector<BasicBlock*> &getRoots() const { return Roots; }
-
-  /// isPostDominator - Returns true if analysis based of postdoms
-  ///
-  bool isPostDominator() const { return IsPostDominators; }
-
-  virtual void releaseMemory() { Frontiers.clear(); }
-
-  // Accessor interface:
-  typedef DomSetMapType::iterator iterator;
-  typedef DomSetMapType::const_iterator const_iterator;
-  iterator       begin()       { return Frontiers.begin(); }
-  const_iterator begin() const { return Frontiers.begin(); }
-  iterator       end()         { return Frontiers.end(); }
-  const_iterator end()   const { return Frontiers.end(); }
-  iterator       find(BasicBlock *B)       { return Frontiers.find(B); }
-  const_iterator find(BasicBlock *B) const { return Frontiers.find(B); }
-
-  iterator addBasicBlock(BasicBlock *BB, const DomSetType &frontier) {
-    assert(find(BB) == end() && "Block already in DominanceFrontier!");
-    return Frontiers.insert(std::make_pair(BB, frontier)).first;
-  }
-
-  /// removeBlock - Remove basic block BB's frontier.
-  void removeBlock(BasicBlock *BB) {
-    assert(find(BB) != end() && "Block is not in DominanceFrontier!");
-    for (iterator I = begin(), E = end(); I != E; ++I)
-      I->second.erase(BB);
-    Frontiers.erase(BB);
-  }
-
-  void addToFrontier(iterator I, BasicBlock *Node) {
-    assert(I != end() && "BB is not in DominanceFrontier!");
-    I->second.insert(Node);
-  }
-
-  void removeFromFrontier(iterator I, BasicBlock *Node) {
-    assert(I != end() && "BB is not in DominanceFrontier!");
-    assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB");
-    I->second.erase(Node);
-  }
-
-  /// compareDomSet - Return false if two domsets match. Otherwise
-  /// return true;
-  bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const {
-    std::set<BasicBlock *> tmpSet;
-    for (DomSetType::const_iterator I = DS2.begin(),
-           E = DS2.end(); I != E; ++I)
-      tmpSet.insert(*I);
-
-    for (DomSetType::const_iterator I = DS1.begin(),
-           E = DS1.end(); I != E; ) {
-      BasicBlock *Node = *I++;
-
-      if (tmpSet.erase(Node) == 0)
-        // Node is in DS1 but not in DS2.
-        return true;
-    }
-
-    if (!tmpSet.empty())
-      // There are nodes that are in DS2 but not in DS1.
-      return true;
-
-    // DS1 and DS2 matches.
-    return false;
-  }
-
-  /// compare - Return true if the other dominance frontier base matches
-  /// this dominance frontier base. Otherwise return false.
-  bool compare(DominanceFrontierBase &Other) const {
-    DomSetMapType tmpFrontiers;
-    for (DomSetMapType::const_iterator I = Other.begin(),
-           E = Other.end(); I != E; ++I)
-      tmpFrontiers.insert(std::make_pair(I->first, I->second));
-
-    for (DomSetMapType::iterator I = tmpFrontiers.begin(),
-           E = tmpFrontiers.end(); I != E; ) {
-      BasicBlock *Node = I->first;
-      const_iterator DFI = find(Node);
-      if (DFI == end())
-        return true;
-
-      if (compareDomSet(I->second, DFI->second))
-        return true;
-
-      ++I;
-      tmpFrontiers.erase(Node);
-    }
-
-    if (!tmpFrontiers.empty())
-      return true;
-
-    return false;
-  }
-
-  /// print - Convert to human readable form
-  ///
-  virtual void print(raw_ostream &OS, const Module* = 0) const;
-
-  /// dump - Dump the dominance frontier to dbgs().
-  void dump() const;
-};
-
-
-//===-------------------------------------
-/// DominanceFrontier Class - Concrete subclass of DominanceFrontierBase that is
-/// used to compute a forward dominator frontiers.
-///
-class DominanceFrontier : public DominanceFrontierBase {
-public:
-  static char ID; // Pass ID, replacement for typeid
-  DominanceFrontier() :
-    DominanceFrontierBase(ID, false) {}
-
-  BasicBlock *getRoot() const {
-    assert(Roots.size() == 1 && "Should always have entry node!");
-    return Roots[0];
-  }
-
-  virtual bool runOnFunction(Function &) {
-    Frontiers.clear();
-    DominatorTree &DT = getAnalysis<DominatorTree>();
-    Roots = DT.getRoots();
-    assert(Roots.size() == 1 && "Only one entry block for forward domfronts!");
-    calculate(DT, DT[Roots[0]]);
-    return false;
-  }
-
-  virtual void verifyAnalysis() const;
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-    AU.addRequired<DominatorTree>();
-  }
-
-  /// splitBlock - BB is split and now it has one successor. Update dominance
-  /// frontier to reflect this change.
-  void splitBlock(BasicBlock *BB);
-
-  /// BasicBlock BB's new dominator is NewBB. Update BB's dominance frontier
-  /// to reflect this change.
-  void changeImmediateDominator(BasicBlock *BB, BasicBlock *NewBB,
-                                DominatorTree *DT) {
-    // NewBB is now dominating BB. Which means BB's dominance
-    // frontier is now part of NewBB's dominance frontier. However, BB
-    // itself is not member of NewBB's dominance frontier.
-    DominanceFrontier::iterator NewDFI = find(NewBB);
-    DominanceFrontier::iterator DFI = find(BB);
-    // If BB was an entry block then its frontier is empty.
-    if (DFI == end())
-      return;
-    DominanceFrontier::DomSetType BBSet = DFI->second;
-    for (DominanceFrontier::DomSetType::iterator BBSetI = BBSet.begin(),
-           BBSetE = BBSet.end(); BBSetI != BBSetE; ++BBSetI) {
-      BasicBlock *DFMember = *BBSetI;
-      // Insert only if NewBB dominates DFMember.
-      if (!DT->dominates(NewBB, DFMember))
-        NewDFI->second.insert(DFMember);
-    }
-    NewDFI->second.erase(BB);
-  }
-
-  const DomSetType &calculate(const DominatorTree &DT,
-                              const DomTreeNode *Node);
-};
-
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/FindUsedTypes.h b/include/llvm/Analysis/FindUsedTypes.h
index 8a78eb624973..fc57e1a04690 100644
--- a/include/llvm/Analysis/FindUsedTypes.h
+++ b/include/llvm/Analysis/FindUsedTypes.h
@@ -26,7 +26,9 @@ class FindUsedTypes : public ModulePass {
   std::set<const Type *> UsedTypes;
 public:
   static char ID; // Pass identification, replacement for typeid
-  FindUsedTypes() : ModulePass(ID) {}
+  FindUsedTypes() : ModulePass(ID) {
+    initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
+  }
 
   /// getTypes - After the pass has been run, return the set containing all of
   /// the types used in the module.
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 462bddd53307..b08bf57ace96 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -33,7 +33,7 @@ namespace llvm {
   namespace InlineConstants {
     // Various magic constants used to adjust heuristics.
     const int InstrCost = 5;
-    const int IndirectCallBonus = 500;
+    const int IndirectCallBonus = -100;
     const int CallPenalty = 25;
     const int LastCallToStaticBonus = -15000;
     const int ColdccPenalty = 2000;
@@ -98,7 +98,8 @@ namespace llvm {
       unsigned AllocaWeight;
 
       ArgInfo(unsigned CWeight, unsigned AWeight)
-        : ConstantWeight(CWeight), AllocaWeight(AWeight) {}
+        : ConstantWeight(CWeight), AllocaWeight(AWeight)
+          {}
     };
 
     struct FunctionInfo {
@@ -110,17 +111,6 @@ namespace llvm {
       /// entry here.
       std::vector<ArgInfo> ArgumentWeights;
 
-      /// CountCodeReductionForConstant - Figure out an approximation for how
-      /// many instructions will be constant folded if the specified value is
-      /// constant.
-      unsigned CountCodeReductionForConstant(Value *V);
-
-      /// CountCodeReductionForAlloca - Figure out an approximation of how much
-      /// smaller the function will be if it is inlined into a context where an
-      /// argument becomes an alloca.
-      ///
-      unsigned CountCodeReductionForAlloca(Value *V);
-
       /// analyzeFunction - Add information about the specified function
       /// to the current structure.
       void analyzeFunction(Function *F);
@@ -134,6 +124,10 @@ namespace llvm {
     // the ValueMap will update itself when this happens.
     ValueMap<const Function *, FunctionInfo> CachedFunctionInfo;
 
+    int CountBonusForConstant(Value *V, Constant *C = NULL);
+    int ConstantFunctionBonus(CallSite CS, Constant *C);
+    int getInlineSize(CallSite CS, Function *Callee);
+    int getInlineBonuses(CallSite CS, Function *Callee);
   public:
 
     /// getInlineCost - The heuristic used to determine if we should inline the
@@ -150,6 +144,18 @@ namespace llvm {
                              Function *Callee,
                              SmallPtrSet<const Function *, 16> &NeverInline);
 
+    /// getSpecializationBonus - The heuristic used to determine the per-call
+    /// performance boost for using a specialization of Callee with argument
+    /// SpecializedArgNos replaced by a constant.
+    int getSpecializationBonus(Function *Callee,
+             SmallVectorImpl<unsigned> &SpecializedArgNo);
+
+    /// getSpecializationCost - The heuristic used to determine the code-size
+    /// impact of creating a specialized version of Callee with argument
+    /// SpecializedArgNo replaced by a constant.
+    InlineCost getSpecializationCost(Function *Callee,
+               SmallVectorImpl<unsigned> &SpecializedArgNo);
+
     /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
     /// higher threshold to determine if the function call should be inlined.
     float getInlineFudgeFactor(CallSite CS);
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index f47e740a741f..dff1ba2f7beb 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -7,9 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares routines for folding instructions into simpler forms that
-// do not require creating new instructions.  For example, this does constant
-// folding, and can handle identities like (X&0)->0.
+// This file declares routines for folding instructions into simpler forms
+// that do not require creating new instructions.  This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x").  If the simplification is also an instruction
+// then it dominates the original instruction.
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +20,7 @@
 #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 
 namespace llvm {
+  class DominatorTree;
   class Instruction;
   class Value;
   class TargetData;
@@ -24,56 +28,106 @@ namespace llvm {
   /// SimplifyAddInst - Given operands for an Add, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0);
-  
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifySubInst - Given operands for a Sub, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifyMulInst - Given operands for a Mul, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyMulInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
+
+  /// SimplifySDivInst - Given operands for an SDiv, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifySDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyUDivInst - Given operands for a UDiv, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyFDivInst - Given operands for an FDiv, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyShlInst - Given operands for a Shl, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifyLShrInst - Given operands for a LShr, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                          const TargetData *TD = 0, const DominatorTree *DT=0);
+
+  /// SimplifyAShrInst - Given operands for a AShr, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                          const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
   /// SimplifyAndInst - Given operands for an And, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyAndInst(Value *LHS, Value *RHS,
-                         const TargetData *TD = 0);
+  Value *SimplifyAndInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyOrInst - Given operands for an Or, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyOrInst(Value *LHS, Value *RHS,
-                        const TargetData *TD = 0);
-  
+  Value *SimplifyOrInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                        const DominatorTree *DT = 0);
+
+  /// SimplifyXorInst - Given operands for a Xor, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyXorInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
+
   /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const TargetData *TD = 0);
-  
+                          const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
   /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const TargetData *TD = 0);
-  
+                          const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
   /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
   /// the result.  If not, this returns null.
   Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
-                            const TargetData *TD = 0);
+                            const TargetData *TD = 0,
+                            const DominatorTree *DT = 0);
 
   /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyGEPInst(Value * const *Ops, unsigned NumOps,
-                         const TargetData *TD = 0);
-  
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
   //=== Helper functions for higher up the class hierarchy.
-  
-  
+
+
   /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                         const TargetData *TD = 0);
-  
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
   /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, 
-                       const TargetData *TD = 0);
-  
+  Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                       const TargetData *TD = 0, const DominatorTree *DT = 0);
+
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// instruction.  If not, this returns null.
-  Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0);
-  
-  
+  Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0,
+                             const DominatorTree *DT = 0);
+
+
   /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
   /// delete the From instruction.  In addition to a basic RAUW, this does a
   /// recursive simplification of the updated instructions.  This catches
@@ -81,7 +135,8 @@ namespace llvm {
   /// simplifies and deletes scalar operations, it does not change the CFG.
   ///
   void ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
-                                 const TargetData *TD = 0);
+                                 const TargetData *TD = 0,
+                                 const DominatorTree *DT = 0);
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index 75a5cdf1f99d..df7313f18f3d 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -48,7 +48,9 @@ class IntervalPartition : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  IntervalPartition() : FunctionPass(ID), RootInterval(0) {}
+  IntervalPartition() : FunctionPass(ID), RootInterval(0) {
+    initializeIntervalPartitionPass(*PassRegistry::getPassRegistry());
+  }
 
   // run - Calculate the interval partition for this function
   virtual bool runOnFunction(Function &F);
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index b2a3afbc986a..fc4d0af920e9 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -31,7 +31,9 @@ class LazyValueInfo : public FunctionPass {
   void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT.
 public:
   static char ID;
-  LazyValueInfo() : FunctionPass(ID), PImpl(0) {}
+  LazyValueInfo() : FunctionPass(ID), PImpl(0) {
+    initializeLazyValueInfoPass(*PassRegistry::getPassRegistry());
+  }
   ~LazyValueInfo() { assert(PImpl == 0 && "releaseMemory not called"); }
 
   /// Tristate - This is used to return true/false/dunno results.
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
index c9adf3f36ad7..243234b75635 100644
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -28,15 +28,17 @@ namespace llvm {
     LibCallInfo *LCI;
     
     explicit LibCallAliasAnalysis(LibCallInfo *LC = 0)
-      : FunctionPass(ID), LCI(LC) {
+        : FunctionPass(ID), LCI(LC) {
+      initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
     explicit LibCallAliasAnalysis(char &ID, LibCallInfo *LC)
-      : FunctionPass(ID), LCI(LC) {
+        : FunctionPass(ID), LCI(LC) {
+      initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
     ~LibCallAliasAnalysis();
     
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Value *P, unsigned Size);
+                               const Location &Loc);
     
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) {
@@ -64,7 +66,7 @@ namespace llvm {
   private:
     ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
                                        ImmutableCallSite CS,
-                                       const Value *P, unsigned Size);
+                                       const Location &Loc);
   };
 }  // End of llvm namespace
 
diff --git a/include/llvm/Analysis/LibCallSemantics.h b/include/llvm/Analysis/LibCallSemantics.h
index 31d7cc56ce53..f5a9e96cbdd0 100644
--- a/include/llvm/Analysis/LibCallSemantics.h
+++ b/include/llvm/Analysis/LibCallSemantics.h
@@ -48,7 +48,7 @@ namespace llvm {
       Yes, No, Unknown
     };
     LocResult (*isLocation)(ImmutableCallSite CS,
-                            const Value *Ptr, unsigned Size);
+                            const AliasAnalysis::Location &Loc);
   };
   
   /// LibCallFunctionInfo - Each record in the array of FunctionInfo structs
diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h
index 94fd9907090d..f195d2782418 100644
--- a/include/llvm/Analysis/LoopDependenceAnalysis.h
+++ b/include/llvm/Analysis/LoopDependenceAnalysis.h
@@ -91,7 +91,9 @@ class LoopDependenceAnalysis : public LoopPass {
 
 public:
   static char ID; // Class identification, replacement for typeinfo
-  LoopDependenceAnalysis() : LoopPass(ID) {}
+  LoopDependenceAnalysis() : LoopPass(ID) {
+    initializeLoopDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
 
   /// isDependencePair - Check whether two values can possibly give rise to
   /// a data dependence: that is the case if both are instructions accessing
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 462620f7e3cb..392bdad5ab02 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -32,6 +32,7 @@
 #define LLVM_ANALYSIS_LOOP_INFO_H
 
 #include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SmallVector.h"
@@ -40,6 +41,7 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <map>
 
 namespace llvm {
 
@@ -53,6 +55,7 @@ static void RemoveFromVector(std::vector<T*> &V, T *N) {
 class DominatorTree;
 class LoopInfo;
 class Loop;
+class PHINode;
 template<class N, class M> class LoopInfoBase;
 template<class N, class M> class LoopBase;
 
@@ -523,10 +526,9 @@ public:
   ///
   bool isLoopInvariant(Value *V) const;
 
-  /// isLoopInvariant - Return true if the specified instruction is
-  /// loop-invariant.
-  ///
-  bool isLoopInvariant(Instruction *I) const;
+  /// hasLoopInvariantOperands - Return true if all the operands of the
+  /// specified instruction are loop invariant. 
+  bool hasLoopInvariantOperands(Instruction *I) const;
 
   /// makeLoopInvariant - If the given value is an instruction inside of the
   /// loop and it can be hoisted, do so to make it trivially loop-invariant.
@@ -630,7 +632,7 @@ private:
 template<class BlockT, class LoopT>
 class LoopInfoBase {
   // BBMap - Mapping of basic blocks to the inner most loop they occur in
-  std::map<BlockT *, LoopT *> BBMap;
+  DenseMap<BlockT *, LoopT *> BBMap;
   std::vector<LoopT *> TopLevelLoops;
   friend class LoopBase<BlockT, LoopT>;
 
@@ -661,7 +663,7 @@ public:
   /// block is in no loop (for example the entry node), null is returned.
   ///
   LoopT *getLoopFor(const BlockT *BB) const {
-    typename std::map<BlockT *, LoopT *>::const_iterator I=
+    typename DenseMap<BlockT *, LoopT *>::const_iterator I=
       BBMap.find(const_cast<BlockT*>(BB));
     return I != BBMap.end() ? I->second : 0;
   }
@@ -729,7 +731,7 @@ public:
   /// including all of the Loop objects it is nested in and our mapping from
   /// BasicBlocks to loops.
   void removeBlock(BlockT *BB) {
-    typename std::map<BlockT *, LoopT *>::iterator I = BBMap.find(BB);
+    typename DenseMap<BlockT *, LoopT *>::iterator I = BBMap.find(BB);
     if (I != BBMap.end()) {
       for (LoopT *L = I->second; L; L = L->getParentLoop())
         L->removeBlockFromLoop(BB);
@@ -923,7 +925,7 @@ public:
     for (unsigned i = 0; i < TopLevelLoops.size(); ++i)
       TopLevelLoops[i]->print(OS);
   #if 0
-    for (std::map<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(),
+    for (DenseMap<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(),
            E = BBMap.end(); I != E; ++I)
       OS << "BB '" << I->first->getName() << "' level = "
          << I->second->getLoopDepth() << "\n";
@@ -940,7 +942,9 @@ class LoopInfo : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  LoopInfo() : FunctionPass(ID) {}
+  LoopInfo() : FunctionPass(ID) {
+    initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+  }
 
   LoopInfoBase<BasicBlock, Loop>& getBase() { return LI; }
 
@@ -1019,6 +1023,27 @@ public:
   void removeBlock(BasicBlock *BB) {
     LI.removeBlock(BB);
   }
+
+  /// replacementPreservesLCSSAForm - Returns true if replacing From with To
+  /// everywhere is guaranteed to preserve LCSSA form.
+  bool replacementPreservesLCSSAForm(Instruction *From, Value *To) {
+    // Preserving LCSSA form is only problematic if the replacing value is an
+    // instruction.
+    Instruction *I = dyn_cast<Instruction>(To);
+    if (!I) return true;
+    // If both instructions are defined in the same basic block then replacement
+    // cannot break LCSSA form.
+    if (I->getParent() == From->getParent())
+      return true;
+    // If the instruction is not defined in a loop then it can safely replace
+    // anything.
+    Loop *ToLoop = getLoopFor(I->getParent());
+    if (!ToLoop) return true;
+    // If the replacing instruction is defined in the same loop as the original
+    // instruction, or in a loop that contains it as an inner loop, then using
+    // it as a replacement will not break LCSSA form.
+    return ToLoop->contains(getLoopFor(From->getParent()));
+  }
 };
 
 
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index a4f916227b8b..22493f6f8b9e 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -74,6 +74,10 @@ Value *getMallocArraySize(CallInst *CI, const TargetData *TD,
 
 /// isFreeCall - Returns non-null if the value is a call to the builtin free()
 const CallInst *isFreeCall(const Value *I);
+  
+static inline CallInst *isFreeCall(Value *I) {
+  return const_cast<CallInst*>(isFreeCall((const Value*)I));
+}
 
 } // End llvm namespace
 
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index f6aab03690ff..4d5dd1987f28 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -17,6 +17,7 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -46,6 +47,9 @@ namespace llvm {
       /// pair holds the instruction that clobbers the memory.  For example,
       /// this occurs when we see a may-aliased store to the memory location we
       /// care about.
+      ///
+      /// A dependence query on the first instruction of the entry block will
+      /// return a clobber(self) result.
       Clobber,
 
       /// Def - This is a dependence on the specified instruction which
@@ -132,26 +136,49 @@ namespace llvm {
     }
   };
 
+  /// NonLocalDepEntry - This is an entry in the NonLocalDepInfo cache.  For
+  /// each BasicBlock (the BB entry) it keeps a MemDepResult.
+  class NonLocalDepEntry {
+    BasicBlock *BB;
+    MemDepResult Result;
+  public:
+    NonLocalDepEntry(BasicBlock *bb, MemDepResult result)
+      : BB(bb), Result(result) {}
+
+    // This is used for searches.
+    NonLocalDepEntry(BasicBlock *bb) : BB(bb) {}
+
+    // BB is the sort key, it can't be changed.
+    BasicBlock *getBB() const { return BB; }
+    
+    void setResult(const MemDepResult &R) { Result = R; }
+
+    const MemDepResult &getResult() const { return Result; }
+    
+    bool operator<(const NonLocalDepEntry &RHS) const {
+      return BB < RHS.BB;
+    }
+  };
+  
   /// NonLocalDepResult - This is a result from a NonLocal dependence query.
   /// For each BasicBlock (the BB entry) it keeps a MemDepResult and the
   /// (potentially phi translated) address that was live in the block.
   class NonLocalDepResult {
-    BasicBlock *BB;
-    MemDepResult Result;
+    NonLocalDepEntry Entry;
     Value *Address;
   public:
     NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address)
-      : BB(bb), Result(result), Address(address) {}
+      : Entry(bb, result), Address(address) {}
     
     // BB is the sort key, it can't be changed.
-    BasicBlock *getBB() const { return BB; }
+    BasicBlock *getBB() const { return Entry.getBB(); }
     
     void setResult(const MemDepResult &R, Value *Addr) {
-      Result = R;
+      Entry.setResult(R);
       Address = Addr;
     }
     
-    const MemDepResult &getResult() const { return Result; }
+    const MemDepResult &getResult() const { return Entry.getResult(); }
     
     /// getAddress - Return the address of this pointer in this block.  This can
     /// be different than the address queried for the non-local result because
@@ -163,30 +190,6 @@ namespace llvm {
     Value *getAddress() const { return Address; }
   };
   
-  /// NonLocalDepEntry - This is an entry in the NonLocalDepInfo cache.  For
-  /// each BasicBlock (the BB entry) it keeps a MemDepResult.
-  class NonLocalDepEntry {
-    BasicBlock *BB;
-    MemDepResult Result;
-  public:
-    NonLocalDepEntry(BasicBlock *bb, MemDepResult result)
-      : BB(bb), Result(result) {}
-
-    // This is used for searches.
-    NonLocalDepEntry(BasicBlock *bb) : BB(bb) {}
-
-    // BB is the sort key, it can't be changed.
-    BasicBlock *getBB() const { return BB; }
-    
-    void setResult(const MemDepResult &R) { Result = R; }
-
-    const MemDepResult &getResult() const { return Result; }
-    
-    bool operator<(const NonLocalDepEntry &RHS) const {
-      return BB < RHS.BB;
-    }
-  };
-  
   /// MemoryDependenceAnalysis - This is an analysis that determines, for a
   /// given memory operation, what preceding memory operations it depends on.
   /// It builds on alias analysis information, and tries to provide a lazy,
@@ -212,7 +215,7 @@ namespace llvm {
   private:
     /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if
     /// the dependence is a read only dependence, false if read/write.
-    typedef PointerIntPair<Value*, 1, bool> ValueIsLoadPair;
+    typedef PointerIntPair<const Value*, 1, bool> ValueIsLoadPair;
 
     /// BBSkipFirstBlockPair - This pair is used when caching information for a
     /// block.  If the pointer is null, the cache value is not a full query that
@@ -220,11 +223,28 @@ namespace llvm {
     /// or not the contents of the block was skipped.
     typedef PointerIntPair<BasicBlock*, 1, bool> BBSkipFirstBlockPair;
 
+    /// NonLocalPointerInfo - This record is the information kept for each
+    /// (value, is load) pair.
+    struct NonLocalPointerInfo {
+      /// Pair - The pair of the block and the skip-first-block flag.
+      BBSkipFirstBlockPair Pair;
+      /// NonLocalDeps - The results of the query for each relevant block.
+      NonLocalDepInfo NonLocalDeps;
+      /// Size - The maximum size of the dereferences of the
+      /// pointer. May be UnknownSize if the sizes are unknown.
+      uint64_t Size;
+      /// TBAATag - The TBAA tag associated with dereferences of the
+      /// pointer. May be null if there are no tags or conflicting tags.
+      const MDNode *TBAATag;
+
+      NonLocalPointerInfo() : Size(AliasAnalysis::UnknownSize), TBAATag(0) {}
+    };
+
     /// CachedNonLocalPointerInfo - This map stores the cached results of doing
     /// a pointer lookup at the bottom of a block.  The key of this map is the
     /// pointer+isload bit, the value is a list of <bb->result> mappings.
-    typedef DenseMap<ValueIsLoadPair, std::pair<BBSkipFirstBlockPair, 
-                  NonLocalDepInfo> > CachedNonLocalPointerInfo;
+    typedef DenseMap<ValueIsLoadPair,
+                     NonLocalPointerInfo> CachedNonLocalPointerInfo;
     CachedNonLocalPointerInfo NonLocalPointerDeps;
 
     // A map from instructions to their non-local pointer dependencies.
@@ -297,10 +317,10 @@ namespace llvm {
     /// set of instructions that either define or clobber the value.
     ///
     /// This method assumes the pointer has a "NonLocal" dependency within BB.
-    void getNonLocalPointerDependency(Value *Pointer, bool isLoad,
-                                      BasicBlock *BB,
+    void getNonLocalPointerDependency(const AliasAnalysis::Location &Loc,
+                                      bool isLoad, BasicBlock *BB,
                                     SmallVectorImpl<NonLocalDepResult> &Result);
-    
+
     /// removeInstruction - Remove an instruction from the dependence analysis,
     /// updating the dependence of instructions that previously depended on it.
     void removeInstruction(Instruction *InstToRemove);
@@ -318,20 +338,29 @@ namespace llvm {
     /// critical edges.
     void invalidateCachedPredecessors();
     
-  private:
-    MemDepResult getPointerDependencyFrom(Value *Pointer, uint64_t MemSize,
+    /// getPointerDependencyFrom - Return the instruction on which a memory
+    /// location depends.  If isLoad is true, this routine ignores may-aliases
+    /// with read-only operations.  If isLoad is false, this routine ignores
+    /// may-aliases with reads from read-only locations.
+    ///
+    /// Note that this is an uncached query, and thus may be inefficient.
+    ///
+    MemDepResult getPointerDependencyFrom(const AliasAnalysis::Location &Loc,
                                           bool isLoad, 
                                           BasicBlock::iterator ScanIt,
                                           BasicBlock *BB);
+    
+  private:
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,
                                            BasicBlock *BB);
-    bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t Size,
+    bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+                                     const AliasAnalysis::Location &Loc,
                                      bool isLoad, BasicBlock *BB,
                                      SmallVectorImpl<NonLocalDepResult> &Result,
                                      DenseMap<BasicBlock*, Value*> &Visited,
                                      bool SkipFirstBlock = false);
-    MemDepResult GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
+    MemDepResult GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                                          bool isLoad, BasicBlock *BB,
                                          NonLocalDepInfo *Cache,
                                          unsigned NumSortedEntries);
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 37425ebe8358..5b0c5b1e6bec 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -59,7 +59,7 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
-  // createBasicAliasAnalysisPass - This pass implements the default alias
+  // createBasicAliasAnalysisPass - This pass implements the stateless alias
   // analysis.
   //
   ImmutablePass *createBasicAliasAnalysisPass();
@@ -114,6 +114,28 @@ namespace llvm {
   //
   FunctionPass *createProfileVerifierPass();
 
+  //===--------------------------------------------------------------------===//
+  //
+  // createPathProfileLoaderPass - This pass loads information from a path
+  // profile dump file.
+  //
+  ModulePass *createPathProfileLoaderPass();
+  extern char &PathProfileLoaderPassID;
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createNoPathProfileInfoPass - This pass implements the default
+  // "no path profile".
+  //
+  ImmutablePass *createNoPathProfileInfoPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createPathProfileVerifierPass - This pass verifies path profiling
+  // information.
+  //
+  ModulePass *createPathProfileVerifierPass();
+
   //===--------------------------------------------------------------------===//
   //
   // createDSAAPass - This pass implements simple context sensitive alias
@@ -140,7 +162,7 @@ namespace llvm {
   // createLiveValuesPass - This creates an instance of the LiveValues pass.
   //
   FunctionPass *createLiveValuesPass();
-  
+
   //===--------------------------------------------------------------------===//
   //
   /// createLazyValueInfoPass - This creates an instance of the LazyValueInfo
@@ -153,7 +175,7 @@ namespace llvm {
   // LoopDependenceAnalysis pass.
   //
   LoopPass *createLoopDependenceAnalysisPass();
-  
+
   // Minor pass prototypes, allowing us to expose them through bugpoint and
   // analyze.
   FunctionPass *createInstCountPass();
@@ -170,6 +192,13 @@ namespace llvm {
 
   // Print module-level debug info metadata in human-readable form.
   ModulePass *createModuleDebugInfoPrinterPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createMemDepPrinter - This pass exhaustively collects all memdep
+  // information and prints it with -analyze.
+  //
+  FunctionPass *createMemDepPrinter();
 }
 
 #endif
diff --git a/include/llvm/Analysis/PathNumbering.h b/include/llvm/Analysis/PathNumbering.h
new file mode 100644
index 000000000000..7025e28484cc
--- /dev/null
+++ b/include/llvm/Analysis/PathNumbering.h
@@ -0,0 +1,304 @@
+//===- PathNumbering.h ----------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96].  For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner.  As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PATH_NUMBERING_H
+#define LLVM_PATH_NUMBERING_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include <map>
+#include <stack>
+#include <vector>
+
+namespace llvm {
+class BallLarusNode;
+class BallLarusEdge;
+class BallLarusDag;
+
+// typedefs for storage/ interators of various DAG components
+typedef std::vector<BallLarusNode*> BLNodeVector;
+typedef std::vector<BallLarusNode*>::iterator BLNodeIterator;
+typedef std::vector<BallLarusEdge*> BLEdgeVector;
+typedef std::vector<BallLarusEdge*>::iterator BLEdgeIterator;
+typedef std::map<BasicBlock*, BallLarusNode*> BLBlockNodeMap;
+typedef std::stack<BallLarusNode*> BLNodeStack;
+
+// Represents a basic block with information necessary for the BallLarus
+// algorithms.
+class BallLarusNode {
+public:
+  enum NodeColor { WHITE, GRAY, BLACK };
+
+  // Constructor: Initializes a new Node for the given BasicBlock
+  BallLarusNode(BasicBlock* BB) :
+    _basicBlock(BB), _numberPaths(0), _color(WHITE) {
+    static unsigned nextUID = 0;
+    _uid = nextUID++;
+  }
+
+  // Returns the basic block for the BallLarusNode
+  BasicBlock* getBlock();
+
+  // Get/set the number of paths to the exit starting at the node.
+  unsigned getNumberPaths();
+  void setNumberPaths(unsigned numberPaths);
+
+  // Get/set the NodeColor used in graph algorithms.
+  NodeColor getColor();
+  void setColor(NodeColor color);
+
+  // Iterator information for predecessor edges. Includes phony and
+  // backedges.
+  BLEdgeIterator predBegin();
+  BLEdgeIterator predEnd();
+  unsigned getNumberPredEdges();
+
+  // Iterator information for successor edges. Includes phony and
+  // backedges.
+  BLEdgeIterator succBegin();
+  BLEdgeIterator succEnd();
+  unsigned getNumberSuccEdges();
+
+  // Add an edge to the predecessor list.
+  void addPredEdge(BallLarusEdge* edge);
+
+  // Remove an edge from the predecessor list.
+  void removePredEdge(BallLarusEdge* edge);
+
+  // Add an edge to the successor list.
+  void addSuccEdge(BallLarusEdge* edge);
+
+  // Remove an edge from the successor list.
+  void removeSuccEdge(BallLarusEdge* edge);
+
+  // Returns the name of the BasicBlock being represented.  If BasicBlock
+  // is null then returns "<null>".  If BasicBlock has no name, then
+  // "<unnamed>" is returned.  Intended for use with debug output.
+  std::string getName();
+
+private:
+  // The corresponding underlying BB.
+  BasicBlock* _basicBlock;
+
+  // Holds the predecessor edges of this node.
+  BLEdgeVector _predEdges;
+
+  // Holds the successor edges of this node.
+  BLEdgeVector _succEdges;
+
+  // The number of paths from the node to the exit.
+  unsigned _numberPaths;
+
+  // 'Color' used by graph algorithms to mark the node.
+  NodeColor _color;
+
+  // Unique ID to ensure naming difference with dotgraphs
+  unsigned _uid;
+
+  // Removes an edge from an edgeVector.  Used by removePredEdge and
+  // removeSuccEdge.
+  void removeEdge(BLEdgeVector& v, BallLarusEdge* e);
+};
+
+// Represents an edge in the Dag.  For an edge, v -> w, v is the source, and
+// w is the target.
+class BallLarusEdge {
+public:
+  enum EdgeType { NORMAL, BACKEDGE, SPLITEDGE,
+    BACKEDGE_PHONY, SPLITEDGE_PHONY, CALLEDGE_PHONY };
+
+  // Constructor: Initializes an BallLarusEdge with a source and target.
+  BallLarusEdge(BallLarusNode* source, BallLarusNode* target,
+                                unsigned duplicateNumber)
+    : _source(source), _target(target), _weight(0), _edgeType(NORMAL),
+      _realEdge(NULL), _duplicateNumber(duplicateNumber) {}
+
+  // Returns the source/ target node of this edge.
+  BallLarusNode* getSource() const;
+  BallLarusNode* getTarget() const;
+
+  // Sets the type of the edge.
+  EdgeType getType() const;
+
+  // Gets the type of the edge.
+  void setType(EdgeType type);
+
+  // Returns the weight of this edge.  Used to decode path numbers to
+  // sequences of basic blocks.
+  unsigned getWeight();
+
+  // Sets the weight of the edge.  Used during path numbering.
+  void setWeight(unsigned weight);
+
+  // Gets/sets the phony edge originating at the root.
+  BallLarusEdge* getPhonyRoot();
+  void setPhonyRoot(BallLarusEdge* phonyRoot);
+
+  // Gets/sets the phony edge terminating at the exit.
+  BallLarusEdge* getPhonyExit();
+  void setPhonyExit(BallLarusEdge* phonyExit);
+
+  // Gets/sets the associated real edge if this is a phony edge.
+  BallLarusEdge* getRealEdge();
+  void setRealEdge(BallLarusEdge* realEdge);
+
+  // Returns the duplicate number of the edge.
+  unsigned getDuplicateNumber();
+
+protected:
+  // Source node for this edge.
+  BallLarusNode* _source;
+
+  // Target node for this edge.
+  BallLarusNode* _target;
+
+private:
+  // Edge weight cooresponding to path number increments before removing
+  // increments along a spanning tree. The sum over the edge weights gives
+  // the path number.
+  unsigned _weight;
+
+  // Type to represent for what this edge is intended
+  EdgeType _edgeType;
+
+  // For backedges and split-edges, the phony edge which is linked to the
+  // root node of the DAG. This contains a path number initialization.
+  BallLarusEdge* _phonyRoot;
+
+  // For backedges and split-edges, the phony edge which is linked to the
+  // exit node of the DAG. This contains a path counter increment, and
+  // potentially a path number increment.
+  BallLarusEdge* _phonyExit;
+
+  // If this is a phony edge, _realEdge is a link to the back or split
+  // edge. Otherwise, this is null.
+  BallLarusEdge* _realEdge;
+
+  // An ID to differentiate between those edges which have the same source
+  // and destination blocks.
+  unsigned _duplicateNumber;
+};
+
+// Represents the Ball Larus DAG for a given Function.  Can calculate
+// various properties required for instrumentation or analysis.  E.g. the
+// edge weights that determine the path number.
+class BallLarusDag {
+public:
+  // Initializes a BallLarusDag from the CFG of a given function.  Must
+  // call init() after creation, since some initialization requires
+  // virtual functions.
+  BallLarusDag(Function &F)
+    : _root(NULL), _exit(NULL), _function(F) {}
+
+  // Initialization that requires virtual functions which are not fully
+  // functional in the constructor.
+  void init();
+
+  // Frees all memory associated with the DAG.
+  virtual ~BallLarusDag();
+
+  // Calculate the path numbers by assigning edge increments as prescribed
+  // in Ball-Larus path profiling.
+  void calculatePathNumbers();
+
+  // Returns the number of paths for the DAG.
+  unsigned getNumberOfPaths();
+
+  // Returns the root (i.e. entry) node for the DAG.
+  BallLarusNode* getRoot();
+
+  // Returns the exit node for the DAG.
+  BallLarusNode* getExit();
+
+  // Returns the function for the DAG.
+  Function& getFunction();
+
+  // Clears the node colors.
+  void clearColors(BallLarusNode::NodeColor color);
+
+protected:
+  // All nodes in the DAG.
+  BLNodeVector _nodes;
+
+  // All edges in the DAG.
+  BLEdgeVector _edges;
+
+  // All backedges in the DAG.
+  BLEdgeVector _backEdges;
+
+  // Allows subclasses to determine which type of Node is created.
+  // Override this method to produce subclasses of BallLarusNode if
+  // necessary. The destructor of BallLarusDag will call free on each pointer
+  // created.
+  virtual BallLarusNode* createNode(BasicBlock* BB);
+
+  // Allows subclasses to determine which type of Edge is created.
+  // Override this method to produce subclasses of BallLarusEdge if
+  // necessary.  Parameters source and target will have been created by
+  // createNode and can be cast to the subclass of BallLarusNode*
+  // returned by createNode. The destructor of BallLarusDag will call free
+  // on each pointer created.
+  virtual BallLarusEdge* createEdge(BallLarusNode* source, BallLarusNode*
+                                    target, unsigned duplicateNumber);
+
+  // Proxy to node's constructor.  Updates the DAG state.
+  BallLarusNode* addNode(BasicBlock* BB);
+
+  // Proxy to edge's constructor.  Updates the DAG state.
+  BallLarusEdge* addEdge(BallLarusNode* source, BallLarusNode* target,
+                         unsigned duplicateNumber);
+
+private:
+  // The root (i.e. entry) node for this DAG.
+  BallLarusNode* _root;
+
+  // The exit node for this DAG.
+  BallLarusNode* _exit;
+
+  // The function represented by this DAG.
+  Function& _function;
+
+  // Processes one node and its imediate edges for building the DAG.
+  void buildNode(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& dfsStack);
+
+  // Process an edge in the CFG for DAG building.
+  void buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& dfsStack,
+                 BallLarusNode* currentNode, BasicBlock* succBB,
+                 unsigned duplicateNumber);
+
+  // The weight on each edge is the increment required along any path that
+  // contains that edge.
+  void calculatePathNumbersFrom(BallLarusNode* node);
+
+  // Adds a backedge with its phony edges.  Updates the DAG state.
+  void addBackedge(BallLarusNode* source, BallLarusNode* target,
+                   unsigned duplicateCount);
+};
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/PathProfileInfo.h b/include/llvm/Analysis/PathProfileInfo.h
new file mode 100644
index 000000000000..263763f7a8db
--- /dev/null
+++ b/include/llvm/Analysis/PathProfileInfo.h
@@ -0,0 +1,113 @@
+//===- PathProfileInfo.h --------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file outlines the interface used by optimizers to load path profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PATHPROFILEINFO_H
+#define LLVM_PATHPROFILEINFO_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include <stack>
+
+namespace llvm {
+
+class ProfilePath;
+class ProfilePathEdge;
+class PathProfileInfo;
+
+typedef std::vector<ProfilePathEdge> ProfilePathEdgeVector;
+typedef std::vector<ProfilePathEdge>::iterator ProfilePathEdgeIterator;
+
+typedef std::vector<BasicBlock*> ProfilePathBlockVector;
+typedef std::vector<BasicBlock*>::iterator ProfilePathBlockIterator;
+
+typedef std::map<unsigned int,ProfilePath*> ProfilePathMap;
+typedef std::map<unsigned int,ProfilePath*>::iterator ProfilePathIterator;
+
+typedef std::map<Function*,unsigned int> FunctionPathCountMap;
+typedef std::map<Function*,ProfilePathMap> FunctionPathMap;
+typedef std::map<Function*,ProfilePathMap>::iterator FunctionPathIterator;
+
+class ProfilePathEdge {
+public:
+  ProfilePathEdge(BasicBlock* source, BasicBlock* target,
+                  unsigned duplicateNumber);
+
+  inline unsigned getDuplicateNumber() { return _duplicateNumber; }
+  inline BasicBlock* getSource() { return _source; }
+  inline BasicBlock* getTarget() { return _target; }
+
+protected:
+  BasicBlock* _source;
+  BasicBlock* _target;
+  unsigned _duplicateNumber;
+};
+
+class ProfilePath {
+public:
+  ProfilePath(unsigned int number, unsigned int count,
+              double countStdDev, PathProfileInfo* ppi);
+
+  double getFrequency() const;
+
+  inline unsigned int getNumber() const { return _number; }
+  inline unsigned int getCount() const { return _count; }
+  inline double getCountStdDev() const { return _countStdDev; }
+
+  ProfilePathEdgeVector* getPathEdges() const;
+  ProfilePathBlockVector* getPathBlocks() const;
+
+  BasicBlock* getFirstBlockInPath() const;
+
+private:
+  unsigned int _number;
+  unsigned int _count;
+  double _countStdDev;
+
+  // double pointer back to the profiling info
+  PathProfileInfo* _ppi;
+};
+
+// TODO: overload [] operator for getting path
+// Add: getFunctionCallCount()
+class PathProfileInfo {
+  public:
+  PathProfileInfo();
+  ~PathProfileInfo();
+
+  void setCurrentFunction(Function* F);
+  Function* getCurrentFunction() const;
+  BasicBlock* getCurrentFunctionEntry();
+
+  ProfilePath* getPath(unsigned int number);
+  unsigned int getPotentialPathCount();
+
+  ProfilePathIterator pathBegin();
+  ProfilePathIterator pathEnd();
+  unsigned int pathsRun();
+
+  static char ID; // Pass identification
+  std::string argList;
+
+protected:
+  FunctionPathMap _functionPaths;
+  FunctionPathCountMap _functionPathCounts;
+
+private:
+  BallLarusDag* _currentDag;
+  Function* _currentFunction;
+
+  friend class ProfilePath;
+};
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h
deleted file mode 100644
index 6b49e18c1b63..000000000000
--- a/include/llvm/Analysis/PointerTracking.h
+++ /dev/null
@@ -1,132 +0,0 @@
-//===- PointerTracking.h - Pointer Bounds Tracking --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements tracking of pointer bounds.
-// It knows that the libc functions "calloc" and "realloc" allocate memory, thus
-// you should avoid using this pass if they mean something else for your
-// language.
-//
-// All methods assume that the pointer is not NULL, if it is then the returned
-// allocation size is wrong, and the result from checkLimits is wrong too.
-// It also assumes that pointers are valid, and that it is not analyzing a
-// use-after-free scenario.
-// Due to these limitations the "size" returned by these methods should be
-// considered as either 0 or the returned size.
-//
-// Another analysis pass should be used to find use-after-free/NULL dereference
-// bugs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_POINTERTRACKING_H
-#define LLVM_ANALYSIS_POINTERTRACKING_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/PredIteratorCache.h"
-
-namespace llvm {
-  class DominatorTree;
-  class ScalarEvolution;
-  class SCEV;
-  class Loop;
-  class LoopInfo;
-  class TargetData;
-
-  // Result from solver, assuming pointer is not NULL,
-  // and it is not a use-after-free situation.
-  enum SolverResult {
-    AlwaysFalse,// always false with above constraints
-    AlwaysTrue,// always true with above constraints
-    Unknown // it can sometimes be true, sometimes false, or it is undecided
-  };
-
-  class PointerTracking : public FunctionPass {
-  public:
-    typedef ICmpInst::Predicate Predicate;
-    static char ID;
-    PointerTracking();
-
-    virtual bool doInitialization(Module &M);
-
-    // If this pointer directly points to an allocation, return
-    // the number of elements of type Ty allocated.
-    // Otherwise return CouldNotCompute.
-    // Since allocations can fail by returning NULL, the real element count
-    // for every allocation is either 0 or the value returned by this function.
-    const SCEV *getAllocationElementCount(Value *P) const;
-
-    // Same as getAllocationSize() but returns size in bytes.
-    // We consider one byte as 8 bits.
-    const SCEV *getAllocationSizeInBytes(Value *V) const;
-
-    // Given a Pointer, determine a base pointer of known size, and an offset
-    // therefrom.
-    // When unable to determine, sets Base to NULL, and Limit/Offset to
-    // CouldNotCompute.
-    // BaseSize, and Offset are in bytes: Pointer == Base + Offset
-    void getPointerOffset(Value *Pointer, Value *&Base, const SCEV *& BaseSize,
-                          const SCEV *&Offset) const;
-
-    // Compares the 2 scalar evolution expressions according to predicate,
-    // and if it can prove that the result is always true or always false
-    // return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown.
-    enum SolverResult compareSCEV(const SCEV *A, Predicate Pred, const SCEV *B,
-                                  const Loop *L);
-
-    // Determines whether the condition LHS <Pred> RHS is sufficient
-    // for the condition A <Pred> B to hold.
-    // Currently only ULT/ULE is supported.
-    // This errs on the side of returning false.
-    bool conditionSufficient(const SCEV *LHS, Predicate Pred1, const SCEV *RHS,
-                             const SCEV *A, Predicate Pred2, const SCEV *B,
-                             const Loop *L);
-
-    // Determines whether Offset is known to be always in [0, Limit) bounds.
-    // This errs on the side of returning Unknown.
-    enum SolverResult checkLimits(const SCEV *Offset, const SCEV *Limit,
-                                  BasicBlock *BB);
-
-    virtual bool runOnFunction(Function &F);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    void print(raw_ostream &OS, const Module* = 0) const;
-    Value *computeAllocationCountValue(Value *P, const Type *&Ty) const;
-  private:
-    Function *FF;
-    TargetData *TD;
-    ScalarEvolution *SE;
-    LoopInfo *LI;
-    DominatorTree *DT;
-
-    Function *callocFunc;
-    Function *reallocFunc;
-    PredIteratorCache predCache;
-
-    SmallPtrSet<const SCEV*, 1> analyzing;
-
-    enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred,
-                                      const SCEV *A, const SCEV *B) const;
-    static bool isMonotonic(const SCEV *S);
-    bool scevPositive(const SCEV *A, const Loop *L, bool strict=true) const;
-    bool conditionSufficient(Value *Cond, bool negated,
-                             const SCEV *A, Predicate Pred, const SCEV *B);
-    Value *getConditionToReach(BasicBlock *A,
-                               DomTreeNodeBase<BasicBlock> *B,
-                               bool &negated);
-    Value *getConditionToReach(BasicBlock *A,
-                               BasicBlock *B,
-                               bool &negated);
-    const SCEV *computeAllocationCount(Value *P, const Type *&Ty) const;
-    const SCEV *computeAllocationCountForType(Value *P, const Type *Ty) const;
-  };
-}
-#endif
-
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 46ce8200f966..2cd6ae346eeb 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_ANALYSIS_POST_DOMINATORS_H
 #define LLVM_ANALYSIS_POST_DOMINATORS_H
 
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominanceFrontier.h"
 
 namespace llvm {
 
@@ -26,6 +26,7 @@ struct PostDominatorTree : public FunctionPass {
   DominatorTreeBase<BasicBlock>* DT;
 
   PostDominatorTree() : FunctionPass(ID) {
+    initializePostDominatorTreePass(*PassRegistry::getPassRegistry());
     DT = new DominatorTreeBase<BasicBlock>(true);
   }
 
@@ -106,7 +107,9 @@ template <> struct GraphTraits<PostDominatorTree*>
 struct PostDominanceFrontier : public DominanceFrontierBase {
   static char ID;
   PostDominanceFrontier()
-    : DominanceFrontierBase(ID, true) {}
+    : DominanceFrontierBase(ID, true) {
+      initializePostDominanceFrontierPass(*PassRegistry::getPassRegistry());
+    }
 
   virtual bool runOnFunction(Function &) {
     Frontiers.clear();
diff --git a/include/llvm/Analysis/ProfileInfoTypes.h b/include/llvm/Analysis/ProfileInfoTypes.h
index 0d531d5c5f88..6b4ac85082b0 100644
--- a/include/llvm/Analysis/ProfileInfoTypes.h
+++ b/include/llvm/Analysis/ProfileInfoTypes.h
@@ -1,4 +1,4 @@
-/*===-- ProfileInfoTypes.h - Profiling info shared constants ------*- C -*-===*\
+/*===-- ProfileInfoTypes.h - Profiling info shared constants --------------===*\
 |*
 |*                     The LLVM Compiler Infrastructure
 |*
@@ -16,6 +16,17 @@
 #ifndef LLVM_ANALYSIS_PROFILEINFOTYPES_H
 #define LLVM_ANALYSIS_PROFILEINFOTYPES_H
 
+/* Included by libprofile. */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* IDs to distinguish between those path counters stored in hashses vs arrays */
+enum ProfilingStorageType {
+  ProfilingArray = 1,
+  ProfilingHash = 2
+};
+
 enum ProfilingType {
   ArgumentInfo  = 1,   /* The command line argument block */
   FunctionInfo  = 2,   /* Function profiling information  */
@@ -26,4 +37,24 @@ enum ProfilingType {
   OptEdgeInfo   = 7    /* Edge profiling information, optimal version */
 };
 
+/*
+ * The header for tables that map path numbers to path counters.
+ */
+typedef struct {
+  unsigned fnNumber; /* function number for these counters */
+  unsigned numEntries;   /* number of entries stored */
+} PathProfileHeader;
+
+/*
+ * Describes an entry in a tagged table for path counters.
+ */
+typedef struct {
+  unsigned pathNumber;
+  unsigned pathCounter;
+} PathProfileTableEntry;
+
+#if defined(__cplusplus)
+}
+#endif
+
 #endif /* LLVM_ANALYSIS_PROFILEINFOTYPES_H */
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 7a2670f2c08c..a36ca110d8c0 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -58,6 +58,7 @@ class RegionNode {
   // DO NOT IMPLEMENT
   const RegionNode &operator=(const RegionNode &);
 
+protected:
   /// This is the entry basic block that starts this region node.  If this is a
   /// BasicBlock RegionNode, then entry is just the basic block, that this
   /// RegionNode represents.  Otherwise it is the entry of this (Sub)RegionNode.
@@ -70,7 +71,6 @@ class RegionNode {
   /// RegionNode.
   PointerIntPair<BasicBlock*, 1, bool> entry;
 
-protected:
   /// @brief The parent Region of this RegionNode.
   /// @see getParent()
   Region* parent;
@@ -257,6 +257,18 @@ public:
   /// @return The entry BasicBlock of the region.
   BasicBlock *getEntry() const { return RegionNode::getEntry(); }
 
+  /// @brief Replace the entry basic block of the region with the new basic
+  ///        block.
+  ///
+  /// @param BB  The new entry basic block of the region.
+  void replaceEntry(BasicBlock *BB);
+
+  /// @brief Replace the exit basic block of the region with the new basic
+  ///        block.
+  ///
+  /// @param BB  The new exit basic block of the region.
+  void replaceExit(BasicBlock *BB);
+
   /// @brief Get the exit BasicBlock of the Region.
   /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel
   ///         Region.
@@ -280,6 +292,33 @@ public:
   /// @return The depth of the region.
   unsigned getDepth() const;
 
+  /// @brief Check if a Region is the TopLevel region.
+  ///
+  /// The toplevel region represents the whole function.
+  bool isTopLevelRegion() const { return exit == NULL; }
+
+  /// @brief Return a new (non canonical) region, that is obtained by joining
+  ///        this region with its predecessors.
+  ///
+  /// @return A region also starting at getEntry(), but reaching to the next
+  ///         basic block that forms with getEntry() a (non canonical) region.
+  ///         NULL if such a basic block does not exist.
+  Region *getExpandedRegion() const;
+
+  /// @brief Return the first block of this region's single entry edge,
+  ///        if existing.
+  ///
+  /// @return The BasicBlock starting this region's single entry edge,
+  ///         else NULL.
+  BasicBlock *getEnteringBlock() const;
+
+  /// @brief Return the first block of this region's single exit edge,
+  ///        if existing.
+  ///
+  /// @return The BasicBlock starting this region's single exit edge,
+  ///         else NULL.
+  BasicBlock *getExitingBlock() const;
+
   /// @brief Is this a simple region?
   ///
   /// A region is simple if it has exactly one exit and one entry edge.
@@ -386,7 +425,9 @@ public:
   /// @brief Add a new subregion to this Region.
   ///
   /// @param SubRegion The new subregion that will be added.
-  void addSubRegion(Region *SubRegion);
+  /// @param moveChildren Move the children of this region, that are also
+  ///                     contained in SubRegion into SubRegion.
+  void addSubRegion(Region *SubRegion, bool moveChildren = false);
 
   /// @brief Remove a subregion from this Region.
   ///
@@ -565,6 +606,12 @@ public:
   /// region containing BB.
   Region *getRegionFor(BasicBlock *BB) const;
 
+  /// @brief  Set the smallest region that surrounds a basic block.
+  ///
+  /// @param BB The basic block surrounded by a region.
+  /// @param R The smallest region that surrounds BB.
+  void setRegionFor(BasicBlock *BB, Region *R);
+
   /// @brief A shortcut for getRegionFor().
   ///
   /// @param BB The basic block.
@@ -610,6 +657,12 @@ public:
     return TopLevelRegion;
   }
 
+  /// @brief Update RegionInfo after a basic block was split.
+  ///
+  /// @param NewBB The basic block that was created before OldBB.
+  /// @param OldBB The old basic block.
+  void splitBlock(BasicBlock* NewBB, BasicBlock *OldBB);
+
   /// @brief Clear the Node Cache for all Regions.
   ///
   /// @see Region::clearNodeCache()
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
new file mode 100644
index 000000000000..aedc06aa6cf3
--- /dev/null
+++ b/include/llvm/Analysis/RegionPass.h
@@ -0,0 +1,126 @@
+//===- RegionPass.h - RegionPass class ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegionPass class. All region based analysis,
+// optimization and transformation passes are derived from RegionPass.
+// This class is implemented following the some ideas of the LoopPass.h class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REGION_PASS_H
+#define LLVM_REGION_PASS_H
+
+#include "llvm/Analysis/RegionInfo.h"
+
+#include "llvm/Pass.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Function.h"
+
+#include <deque>
+
+namespace llvm {
+
+class RGPassManager;
+class Function;
+
+//===----------------------------------------------------------------------===//
+/// @brief A pass that runs on each Region in a function.
+///
+/// RegionPass is managed by RGPassManager.
+class RegionPass : public Pass {
+public:
+  explicit RegionPass(char &pid) : Pass(PT_Region, pid) {}
+
+  //===--------------------------------------------------------------------===//
+  /// @name To be implemented by every RegionPass
+  ///
+  //@{
+  /// @brief Run the pass on a specific Region
+  ///
+  /// Accessing regions not contained in the current region is not allowed.
+  ///
+  /// @param R The region this pass is run on.
+  /// @param RGM The RegionPassManager that manages this Pass.
+  ///
+  /// @return True if the pass modifies this Region.
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) = 0;
+
+  /// @brief Get a pass to print the LLVM IR in the region.
+  ///
+  /// @param O      The ouput stream to print the Region.
+  /// @param Banner The banner to seperate different printed passes.
+  ///
+  /// @return The pass to print the LLVM IR in the region.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  virtual bool doInitialization(Region *R, RGPassManager &RGM) { return false; }
+  virtual bool doFinalization() { return false; }
+  //@}
+
+  //===--------------------------------------------------------------------===//
+  /// @name PassManager API
+  ///
+  //@{
+  void preparePassManager(PMStack &PMS);
+
+  virtual void assignPassManager(PMStack &PMS,
+    PassManagerType PMT = PMT_RegionPassManager);
+
+  virtual PassManagerType getPotentialPassManagerType() const {
+    return PMT_RegionPassManager;
+  }
+  //@}
+};
+
+/// @brief The pass manager to schedule RegionPasses.
+class RGPassManager : public FunctionPass, public PMDataManager {
+  std::deque<Region*> RQ;
+  bool skipThisRegion;
+  bool redoThisRegion;
+  RegionInfo *RI;
+  Region *CurrentRegion;
+
+public:
+  static char ID;
+  explicit RGPassManager(int Depth);
+
+  /// @brief Execute all of the passes scheduled for execution.
+  ///
+  /// @return True if any of the passes modifies the function.
+  bool runOnFunction(Function &F);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  /// RGPassManager needs RegionInfo.
+  void getAnalysisUsage(AnalysisUsage &Info) const;
+
+  virtual const char *getPassName() const {
+    return "Region Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  /// @brief Print passes managed by this manager.
+  void dumpPassStructure(unsigned Offset);
+
+  /// @brief Print passes contained by this manager.
+  Pass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    Pass *FP = static_cast<Pass *>(PassVector[N]);
+    return FP;
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_RegionPassManager;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 1fa94e9c311c..d1938061bef6 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -24,7 +24,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Instructions.h"
 #include "llvm/Function.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ConstantRange.h"
@@ -70,27 +70,16 @@ namespace llvm {
   private:
     SCEV(const SCEV &);            // DO NOT IMPLEMENT
     void operator=(const SCEV &);  // DO NOT IMPLEMENT
-  protected:
-    virtual ~SCEV();
+
   public:
     explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) :
       FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
 
     unsigned getSCEVType() const { return SCEVType; }
 
-    /// isLoopInvariant - Return true if the value of this SCEV is unchanging in
-    /// the specified loop.
-    virtual bool isLoopInvariant(const Loop *L) const = 0;
-
-    /// hasComputableLoopEvolution - Return true if this SCEV changes value in a
-    /// known way in the specified loop.  This property being true implies that
-    /// the value is variant in the loop AND that we can emit an expression to
-    /// compute the value of the expression at any particular loop iteration.
-    virtual bool hasComputableLoopEvolution(const Loop *L) const = 0;
-
     /// getType - Return the LLVM type of this SCEV expression.
     ///
-    virtual const Type *getType() const = 0;
+    const Type *getType() const;
 
     /// isZero - Return true if the expression is a constant zero.
     ///
@@ -105,22 +94,10 @@ namespace llvm {
     ///
     bool isAllOnesValue() const;
 
-    /// hasOperand - Test whether this SCEV has Op as a direct or
-    /// indirect operand.
-    virtual bool hasOperand(const SCEV *Op) const = 0;
-
-    /// dominates - Return true if elements that makes up this SCEV dominates
-    /// the specified basic block.
-    virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const = 0;
-
-    /// properlyDominates - Return true if elements that makes up this SCEV
-    /// properly dominate the specified basic block.
-    virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const = 0;
-
     /// print - Print out the internal representation of this scalar to the
     /// specified stream.  This should really only be used for debugging
     /// purposes.
-    virtual void print(raw_ostream &OS) const = 0;
+    void print(raw_ostream &OS) const;
 
     /// dump - This method is used for debugging.
     ///
@@ -155,21 +132,6 @@ namespace llvm {
   struct SCEVCouldNotCompute : public SCEV {
     SCEVCouldNotCompute();
 
-    // None of these methods are valid for this object.
-    virtual bool isLoopInvariant(const Loop *L) const;
-    virtual const Type *getType() const;
-    virtual bool hasComputableLoopEvolution(const Loop *L) const;
-    virtual void print(raw_ostream &OS) const;
-    virtual bool hasOperand(const SCEV *Op) const;
-
-    virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const {
-      return true;
-    }
-
-    virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-      return true;
-    }
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVCouldNotCompute *S) { return true; }
     static bool classof(const SCEV *S);
@@ -180,6 +142,24 @@ namespace llvm {
   /// they must ask this class for services.
   ///
   class ScalarEvolution : public FunctionPass {
+  public:
+    /// LoopDisposition - An enum describing the relationship between a
+    /// SCEV and a loop.
+    enum LoopDisposition {
+      LoopVariant,    ///< The SCEV is loop-variant (unknown).
+      LoopInvariant,  ///< The SCEV is loop-invariant.
+      LoopComputable  ///< The SCEV varies predictably with the loop.
+    };
+
+    /// BlockDisposition - An enum describing the relationship between a
+    /// SCEV and a basic block.
+    enum BlockDisposition {
+      DoesNotDominateBlock,  ///< The SCEV does not dominate the block.
+      DominatesBlock,        ///< The SCEV dominates the block.
+      ProperlyDominatesBlock ///< The SCEV properly dominates the block.
+    };
+
+  private:
     /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be
     /// notified whenever a Value is deleted.
     class SCEVCallbackVH : public CallbackVH {
@@ -267,6 +247,46 @@ namespace llvm {
     std::map<const SCEV *,
              std::map<const Loop *, const SCEV *> > ValuesAtScopes;
 
+    /// LoopDispositions - Memoized computeLoopDisposition results.
+    std::map<const SCEV *,
+             std::map<const Loop *, LoopDisposition> > LoopDispositions;
+
+    /// computeLoopDisposition - Compute a LoopDisposition value.
+    LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
+
+    /// BlockDispositions - Memoized computeBlockDisposition results.
+    std::map<const SCEV *,
+             std::map<const BasicBlock *, BlockDisposition> > BlockDispositions;
+
+    /// computeBlockDisposition - Compute a BlockDisposition value.
+    BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
+
+    /// UnsignedRanges - Memoized results from getUnsignedRange
+    DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
+
+    /// SignedRanges - Memoized results from getSignedRange
+    DenseMap<const SCEV *, ConstantRange> SignedRanges;
+
+    /// setUnsignedRange - Set the memoized unsigned range for the given SCEV.
+    const ConstantRange &setUnsignedRange(const SCEV *S,
+                                          const ConstantRange &CR) {
+      std::pair<DenseMap<const SCEV *, ConstantRange>::iterator, bool> Pair =
+        UnsignedRanges.insert(std::make_pair(S, CR));
+      if (!Pair.second)
+        Pair.first->second = CR;
+      return Pair.first->second;
+    }
+
+    /// setUnsignedRange - Set the memoized signed range for the given SCEV.
+    const ConstantRange &setSignedRange(const SCEV *S,
+                                        const ConstantRange &CR) {
+      std::pair<DenseMap<const SCEV *, ConstantRange>::iterator, bool> Pair =
+        SignedRanges.insert(std::make_pair(S, CR));
+      if (!Pair.second)
+        Pair.first->second = CR;
+      return Pair.first->second;
+    }
+
     /// createSCEV - We know that there is no SCEV for the specified value.
     /// Analyze the expression.
     const SCEV *createSCEV(Value *V);
@@ -408,6 +428,9 @@ namespace llvm {
     bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
                                     const SCEV *LHS, const SCEV *RHS);
 
+    /// forgetMemoizedResults - Drop memoized information computed for S.
+    void forgetMemoizedResults(const SCEV *S);
+
   public:
     static char ID; // Pass identification, replacement for typeid
     ScalarEvolution();
@@ -514,10 +537,11 @@ namespace llvm {
     ///
     const SCEV *getNotSCEV(const SCEV *V);
 
-    /// getMinusSCEV - Return LHS-RHS.
-    ///
-    const SCEV *getMinusSCEV(const SCEV *LHS,
-                             const SCEV *RHS);
+    /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1,
+    /// and thus the HasNUW and HasNSW bits apply to the resultant add, not
+    /// whether the sub would have overflowed.
+    const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                             bool HasNUW = false, bool HasNSW = false);
 
     /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
     /// of the input value to the specified type.  If the type must be
@@ -675,6 +699,36 @@ namespace llvm {
                               const SCEV *&LHS,
                               const SCEV *&RHS);
 
+    /// getLoopDisposition - Return the "disposition" of the given SCEV with
+    /// respect to the given loop.
+    LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L);
+
+    /// isLoopInvariant - Return true if the value of the given SCEV is
+    /// unchanging in the specified loop.
+    bool isLoopInvariant(const SCEV *S, const Loop *L);
+
+    /// hasComputableLoopEvolution - Return true if the given SCEV changes value
+    /// in a known way in the specified loop.  This property being true implies
+    /// that the value is variant in the loop AND that we can emit an expression
+    /// to compute the value of the expression at any particular loop iteration.
+    bool hasComputableLoopEvolution(const SCEV *S, const Loop *L);
+
+    /// getLoopDisposition - Return the "disposition" of the given SCEV with
+    /// respect to the given block.
+    BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB);
+
+    /// dominates - Return true if elements that makes up the given SCEV
+    /// dominate the specified basic block.
+    bool dominates(const SCEV *S, const BasicBlock *BB);
+
+    /// properlyDominates - Return true if elements that makes up the given SCEV
+    /// properly dominate the specified basic block.
+    bool properlyDominates(const SCEV *S, const BasicBlock *BB);
+
+    /// hasOperand - Test whether the given SCEV has Op as a direct or
+    /// indirect operand.
+    bool hasOperand(const SCEV *S, const SCEV *Op) const;
+
     virtual bool runOnFunction(Function &F);
     virtual void releaseMemory();
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 4b02f82035fe..39d378ed9bec 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -35,6 +35,9 @@ namespace llvm {
     std::set<AssertingVH<Value> > InsertedValues;
     std::set<AssertingVH<Value> > InsertedPostIncValues;
 
+    /// RelevantLoops - A memoization of the "relevant" loop for a given SCEV.
+    DenseMap<const SCEV *, const Loop *> RelevantLoops;
+
     /// PostIncLoops - Addrecs referring to any of the given loops are expanded
     /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
     /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
@@ -168,6 +171,9 @@ namespace llvm {
       return InsertedValues.count(I) || InsertedPostIncValues.count(I);
     }
 
+    /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV.
+    const Loop *getRelevantLoop(const SCEV *);
+
     Value *visitConstant(const SCEVConstant *S) {
       return S->getValue();
     }
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 4213a287011b..db432c8173dd 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -42,29 +42,7 @@ namespace llvm {
   public:
     ConstantInt *getValue() const { return V; }
 
-    virtual bool isLoopInvariant(const Loop *L) const {
-      return true;
-    }
-
-    virtual bool hasComputableLoopEvolution(const Loop *L) const {
-      return false;  // Not loop variant
-    }
-
-    virtual const Type *getType() const;
-
-    virtual bool hasOperand(const SCEV *) const {
-      return false;
-    }
-
-    bool dominates(BasicBlock *BB, DominatorTree *DT) const {
-      return true;
-    }
-
-    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-      return true;
-    }
-
-    virtual void print(raw_ostream &OS) const;
+    const Type *getType() const { return V->getType(); }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVConstant *S) { return true; }
@@ -86,23 +64,7 @@ namespace llvm {
 
   public:
     const SCEV *getOperand() const { return Op; }
-    virtual const Type *getType() const { return Ty; }
-
-    virtual bool isLoopInvariant(const Loop *L) const {
-      return Op->isLoopInvariant(L);
-    }
-
-    virtual bool hasComputableLoopEvolution(const Loop *L) const {
-      return Op->hasComputableLoopEvolution(L);
-    }
-
-    virtual bool hasOperand(const SCEV *O) const {
-      return Op == O || Op->hasOperand(O);
-    }
-
-    virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
+    const Type *getType() const { return Ty; }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVCastExpr *S) { return true; }
@@ -124,8 +86,6 @@ namespace llvm {
                      const SCEV *op, const Type *ty);
 
   public:
-    virtual void print(raw_ostream &OS) const;
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVTruncateExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -144,8 +104,6 @@ namespace llvm {
                        const SCEV *op, const Type *ty);
 
   public:
-    virtual void print(raw_ostream &OS) const;
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVZeroExtendExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -164,8 +122,6 @@ namespace llvm {
                        const SCEV *op, const Type *ty);
 
   public:
-    virtual void print(raw_ostream &OS) const;
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVSignExtendExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -202,20 +158,7 @@ namespace llvm {
     op_iterator op_begin() const { return Operands; }
     op_iterator op_end() const { return Operands + NumOperands; }
 
-    virtual bool isLoopInvariant(const Loop *L) const;
-
-    // hasComputableLoopEvolution - N-ary expressions have computable loop
-    // evolutions iff they have at least one operand that varies with the loop,
-    // but that all varying operands are computable.
-    virtual bool hasComputableLoopEvolution(const Loop *L) const;
-
-    virtual bool hasOperand(const SCEV *O) const;
-
-    bool dominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    virtual const Type *getType() const { return getOperand(0)->getType(); }
+    const Type *getType() const { return getOperand(0)->getType(); }
 
     bool hasNoUnsignedWrap() const { return SubclassData & (1 << 0); }
     void setHasNoUnsignedWrap(bool B) {
@@ -248,10 +191,6 @@ namespace llvm {
       : SCEVNAryExpr(ID, T, O, N) {}
 
   public:
-    virtual const char *getOperationStr() const = 0;
-
-    virtual void print(raw_ostream &OS) const;
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVCommutativeExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -275,9 +214,7 @@ namespace llvm {
     }
 
   public:
-    virtual const char *getOperationStr() const { return " + "; }
-
-    virtual const Type *getType() const {
+    const Type *getType() const {
       // Use the type of the last operand, which is likely to be a pointer
       // type, if there is one. This doesn't usually matter, but it can help
       // reduce casts when the expressions are expanded.
@@ -303,8 +240,6 @@ namespace llvm {
     }
 
   public:
-    virtual const char *getOperationStr() const { return " * "; }
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVMulExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -328,27 +263,15 @@ namespace llvm {
     const SCEV *getLHS() const { return LHS; }
     const SCEV *getRHS() const { return RHS; }
 
-    virtual bool isLoopInvariant(const Loop *L) const {
-      return LHS->isLoopInvariant(L) && RHS->isLoopInvariant(L);
-    }
-
-    virtual bool hasComputableLoopEvolution(const Loop *L) const {
-      return LHS->hasComputableLoopEvolution(L) &&
-             RHS->hasComputableLoopEvolution(L);
-    }
-
-    virtual bool hasOperand(const SCEV *O) const {
-      return O == LHS || O == RHS || LHS->hasOperand(O) || RHS->hasOperand(O);
+    const Type *getType() const {
+      // In most cases the types of LHS and RHS will be the same, but in some
+      // crazy cases one or the other may be a pointer. ScalarEvolution doesn't
+      // depend on the type for correctness, but handling types carefully can
+      // avoid extra casts in the SCEVExpander. The LHS is more likely to be
+      // a pointer type than the RHS, so use the RHS' type here.
+      return getRHS()->getType();
     }
 
-    bool dominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    virtual const Type *getType() const;
-
-    void print(raw_ostream &OS) const;
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVUDivExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -373,11 +296,7 @@ namespace llvm {
 
     SCEVAddRecExpr(const FoldingSetNodeIDRef ID,
                    const SCEV *const *O, size_t N, const Loop *l)
-      : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {
-      for (size_t i = 0, e = NumOperands; i != e; ++i)
-        assert(Operands[i]->isLoopInvariant(l) &&
-               "Operands of AddRec must be loop-invariant!");
-    }
+      : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {}
 
   public:
     const SCEV *getStart() const { return Operands[0]; }
@@ -393,16 +312,6 @@ namespace llvm {
                               getLoop());
     }
 
-    virtual bool hasComputableLoopEvolution(const Loop *QL) const {
-      return L == QL;
-    }
-
-    virtual bool isLoopInvariant(const Loop *QueryLoop) const;
-
-    bool dominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
-
     /// isAffine - Return true if this is an affine AddRec (i.e., it represents
     /// an expressions A+B*x where A and B are loop invariant values.
     bool isAffine() const {
@@ -437,8 +346,6 @@ namespace llvm {
       return cast<SCEVAddRecExpr>(SE.getAddExpr(this, getStepRecurrence(SE)));
     }
 
-    virtual void print(raw_ostream &OS) const;
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVAddRecExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -462,8 +369,6 @@ namespace llvm {
     }
 
   public:
-    virtual const char *getOperationStr() const { return " smax "; }
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVSMaxExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -487,8 +392,6 @@ namespace llvm {
     }
 
   public:
-    virtual const char *getOperationStr() const { return " umax "; }
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVUMaxExpr *S) { return true; }
     static inline bool classof(const SCEV *S) {
@@ -534,22 +437,7 @@ namespace llvm {
     bool isAlignOf(const Type *&AllocTy) const;
     bool isOffsetOf(const Type *&STy, Constant *&FieldNo) const;
 
-    virtual bool isLoopInvariant(const Loop *L) const;
-    virtual bool hasComputableLoopEvolution(const Loop *QL) const {
-      return false; // not computable
-    }
-
-    virtual bool hasOperand(const SCEV *) const {
-      return false;
-    }
-
-    bool dominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const;
-
-    virtual const Type *getType() const;
-
-    virtual void print(raw_ostream &OS) const;
+    const Type *getType() const { return getValPtr()->getType(); }
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static inline bool classof(const SCEVUnknown *S) { return true; }
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 7b6026fea0a6..6df1693c78e6 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_ANALYSIS_VALUETRACKING_H
 #define LLVM_ANALYSIS_VALUETRACKING_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <string>
 
 namespace llvm {
@@ -39,6 +39,23 @@ namespace llvm {
                          APInt &KnownOne, const TargetData *TD = 0,
                          unsigned Depth = 0);
   
+  /// ComputeSignBit - Determine whether the sign bit is known to be zero or
+  /// one.  Convenience wrapper around ComputeMaskedBits.
+  void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                      const TargetData *TD = 0, unsigned Depth = 0);
+
+  /// isPowerOfTwo - Return true if the given value is known to have exactly one
+  /// bit set when defined. For vectors return true if every element is known to
+  /// be a power of two when defined.  Supports values with integer or pointer
+  /// type and vectors of integers.
+  bool isPowerOfTwo(Value *V, const TargetData *TD = 0, unsigned Depth = 0);
+
+  /// isKnownNonZero - Return true if the given value is known to be non-zero
+  /// when defined.  For vectors return true if every element is known to be
+  /// non-zero when defined.  Supports values with integer or pointer type and
+  /// vectors of integers.
+  bool isKnownNonZero(Value *V, const TargetData *TD = 0, unsigned Depth = 0);
+
   /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
   /// this predicate to simplify operations downstream.  Mask is known to be
   /// zero for bits that V cannot have.
@@ -77,7 +94,13 @@ namespace llvm {
   ///
   bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0);
 
-  
+  /// isBytewiseValue - If the specified value can be set by repeating the same
+  /// byte in memory, return the i8 value that it is represented with.  This is
+  /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+  /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
+  /// byte store (e.g. i16 0x1234), return null.
+  Value *isBytewiseValue(Value *V);
+    
   /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
   /// the scalar value indexed is already around as a register, for example if
   /// it were inserted directly into the aggregrate.
@@ -97,6 +120,17 @@ namespace llvm {
     return FindInsertedValue(V, &Idxs[0], &Idxs[1], InsertBefore);
   }
   
+  /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+  /// it can be expressed as a base pointer plus a constant offset.  Return the
+  /// base and offset to the caller.
+  Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                          const TargetData &TD);
+  static inline const Value *
+  GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
+                                   const TargetData &TD) {
+    return GetPointerBaseWithConstantOffset(const_cast<Value*>(Ptr), Offset,TD);
+  }
+  
   /// GetConstantStringInfo - This function computes the length of a
   /// null-terminated C string pointed to by V.  If successful, it returns true
   /// and returns the string in Str.  If unsuccessful, it returns false.  If
@@ -110,6 +144,20 @@ namespace llvm {
   /// GetStringLength - If we can compute the length of the string pointed to by
   /// the specified pointer, return 'len+1'.  If we can't, return 0.
   uint64_t GetStringLength(Value *V);
+
+  /// GetUnderlyingObject - This method strips off any GEP address adjustments
+  /// and pointer casts from the specified value, returning the original object
+  /// being addressed.  Note that the returned value has pointer type if the
+  /// specified value does.  If the MaxLookup value is non-zero, it limits the
+  /// number of instructions to be stripped off.
+  Value *GetUnderlyingObject(Value *V, const TargetData *TD = 0,
+                             unsigned MaxLookup = 6);
+  static inline const Value *
+  GetUnderlyingObject(const Value *V, const TargetData *TD = 0,
+                      unsigned MaxLookup = 6) {
+    return GetUnderlyingObject(const_cast<Value *>(V), TD, MaxLookup);
+  }
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 1296d67d5258..da6188b1a8ea 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -65,6 +65,8 @@ const Attributes StackAlignment  = 7<<26; ///< Alignment of stack for
                                           ///of alignment with +1 bias
                                           ///0 means unaligned (different from
                                           ///alignstack(1))
+const Attributes Hotpatch    = 1<<29;     ///< Function should have special
+                                          ///'hotpatch' sequence in prologue
 
 /// @brief Attributes that only apply to function parameters.
 const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
@@ -73,7 +75,8 @@ const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 /// be used on return values or function parameters.
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
   NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
-  NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment;
+  NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment |
+  Hotpatch;
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
 const Attributes VarArgsIncompatible = StructRet;
@@ -223,7 +226,7 @@ public:
   /// paramHasAttr - Return true if the specified parameter index has the
   /// specified attribute set.
   bool paramHasAttr(unsigned Idx, Attributes Attr) const {
-    return getAttributes(Idx) & Attr;
+    return (getAttributes(Idx) & Attr) != 0;
   }
 
   /// getParamAlignment - Return the alignment for the specified function
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index bf5874f6824a..7e7c9e76943d 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -18,7 +18,7 @@
 #include "llvm/SymbolTableListTraits.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -58,9 +58,9 @@ private:
 /// tables. The type of a BasicBlock is "Type::LabelTy" because the basic block
 /// represents a label to which a branch can jump.
 ///
-/// A well formed basic block is formed of a list of non-terminating 
-/// instructions followed by a single TerminatorInst instruction.  
-/// TerminatorInst's may not occur in the middle of basic blocks, and must 
+/// A well formed basic block is formed of a list of non-terminating
+/// instructions followed by a single TerminatorInst instruction.
+/// TerminatorInst's may not occur in the middle of basic blocks, and must
 /// terminate the blocks. The BasicBlock class allows malformed basic blocks to
 /// occur because it may be useful in the intermediate stage of constructing or
 /// modifying a program. However, the verifier will ensure that basic blocks
@@ -90,7 +90,7 @@ private:
 public:
   /// getContext - Get the context in which this basic block lives.
   LLVMContext &getContext() const;
-  
+
   /// Instruction iterators...
   typedef InstListType::iterator                              iterator;
   typedef InstListType::const_iterator                  const_iterator;
@@ -98,7 +98,7 @@ public:
   /// Create - Creates a new BasicBlock. If the Parent parameter is specified,
   /// the basic block is automatically inserted at either the end of the
   /// function (if InsertBefore is 0), or before the specified basic block.
-  static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "", 
+  static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "",
                             Function *Parent = 0,BasicBlock *InsertBefore = 0) {
     return new BasicBlock(Context, Name, Parent, InsertBefore);
   }
@@ -114,15 +114,15 @@ public:
   /// and BlockAddress's).
   User       *use_back()       { return cast<User>(*use_begin());}
   const User *use_back() const { return cast<User>(*use_begin());}
-  
+
   /// getTerminator() - If this is a well formed basic block, then this returns
   /// a pointer to the terminator instruction.  If it is not, then you get a
   /// null pointer back.
   ///
   TerminatorInst *getTerminator();
   const TerminatorInst *getTerminator() const;
-  
-  /// Returns a pointer to the first instructon in this block that is not a 
+
+  /// Returns a pointer to the first instructon in this block that is not a
   /// PHINode instruction. When adding instruction to the beginning of the
   /// basic block, they should be added before the returned value, not before
   /// the first instruction, which might be PHI.
@@ -137,7 +137,7 @@ public:
   const Instruction* getFirstNonPHIOrDbg() const {
     return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbg();
   }
-  
+
   /// removeFromParent - This method unlinks 'this' from the containing
   /// function, but does not delete it.
   ///
@@ -147,15 +147,15 @@ public:
   /// and deletes it.
   ///
   void eraseFromParent();
-  
+
   /// moveBefore - Unlink this basic block from its current function and
   /// insert it into the function that MovePos lives in, right before MovePos.
   void moveBefore(BasicBlock *MovePos);
-  
+
   /// moveAfter - Unlink this basic block from its current function and
   /// insert it into the function that MovePos lives in, right after MovePos.
   void moveAfter(BasicBlock *MovePos);
-  
+
 
   /// getSinglePredecessor - If this basic block has a single predecessor block,
   /// return the block, otherwise return a null pointer.
@@ -166,8 +166,8 @@ public:
 
   /// getUniquePredecessor - If this basic block has a unique predecessor block,
   /// return the block, otherwise return a null pointer.
-  /// Note that unique predecessor doesn't mean single edge, there can be 
-  /// multiple edges from the unique predecessor to this block (for example 
+  /// Note that unique predecessor doesn't mean single edge, there can be
+  /// multiple edges from the unique predecessor to this block (for example
   /// a switch statement with multiple cases having the same destination).
   BasicBlock *getUniquePredecessor();
   const BasicBlock *getUniquePredecessor() const {
@@ -247,7 +247,7 @@ public:
   /// hasAddressTaken - returns true if there are any uses of this basic block
   /// other than direct branches, switches, etc. to it.
   bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
-                     
+
 private:
   /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
   /// objects using it.  This is almost always 0, sometimes one, possibly but
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 934e764b6587..c3c07d8588a3 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -19,12 +19,13 @@
 
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include <map>
 #include <set>
 
 namespace llvm {
   class MemoryBuffer;
+  class raw_ostream;
 
 // Forward declare classes
 class Module;              // From VMCore
@@ -82,7 +83,7 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
     unsigned getGroup() const            { return info.getGroup(); }
 
     /// The "mode" specifies the access permissions for the file per Unix
-    /// security. This may not have any applicabiity on non-Unix systems but is
+    /// security. This may not have any applicability on non-Unix systems but is
     /// a required component of the "ar" file format.
     /// @brief Get the permission mode associated with this archive member.
     unsigned getMode() const             { return info.getMode(); }
@@ -144,7 +145,7 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
     /// allowed that doesn't have this restriction. This method determines if
     /// that "long format" is used for this member.
     /// @returns true iff the file name uses the long form
-    /// @brief Determin if the member has a long file name
+    /// @brief Determine if the member has a long file name
     bool hasLongFilename() const { return flags&HasLongFilenameFlag; }
 
     /// This method returns the status info (like Unix stat(2)) for the archive
@@ -402,7 +403,7 @@ class Archive {
     /// bitcode archive.  It first makes sure the symbol table has been loaded
     /// and has a non-zero size.  If it does, then it is an archive.  If not,
     /// then it tries to load all the bitcode modules of the archive.  Finally,
-    /// it returns whether it was successfull.
+    /// it returns whether it was successful.
     /// @returns true if the archive is a proper llvm bitcode archive
     /// @brief Determine whether the archive is a proper llvm bitcode archive.
     bool isBitcodeArchive();
@@ -482,7 +483,7 @@ class Archive {
     bool loadSymbolTable(std::string* ErrMessage);
 
     /// @brief Write the symbol table to an ofstream.
-    void writeSymbolTable(std::ofstream& ARFile);
+    void writeSymbolTable(raw_ostream& ARFile);
 
     /// Writes one ArchiveMember to an ofstream. If an error occurs, returns
     /// false, otherwise true. If an error occurs and error is non-null then
@@ -491,7 +492,7 @@ class Archive {
     /// @returns true Writing member failed, \p error set to error message
     bool writeMember(
       const ArchiveMember& member, ///< The member to be written
-      std::ofstream& ARFile,       ///< The file to write member onto
+      raw_ostream& ARFile,       ///< The file to write member onto
       bool CreateSymbolTable,      ///< Should symbol table be created?
       bool TruncateNames,          ///< Should names be truncated to 11 chars?
       bool ShouldCompress,         ///< Should the member be compressed?
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index ada2e65ee642..449dc35d7de2 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -19,7 +19,7 @@
 #define LLVM_BITCODE_BITCODES_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 4f9b783aa97b..7692bd28720b 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -94,7 +94,9 @@ namespace bitc {
     TYPE_CODE_FP128    = 14,   // LONG DOUBLE (112 bit mantissa)
     TYPE_CODE_PPC_FP128= 15,   // PPC LONG DOUBLE (2 doubles)
 
-    TYPE_CODE_METADATA = 16    // METADATA
+    TYPE_CODE_METADATA = 16,   // METADATA
+
+    TYPE_CODE_X86_MMX = 17     // X86 MMX
   };
 
   // The type symbol table only has one code (TST_ENTRY_CODE).
@@ -197,10 +199,10 @@ namespace bitc {
     OBO_NO_SIGNED_WRAP = 1
   };
 
-  /// SDivOperatorOptionalFlags - Flags for serializing SDivOperator's
-  /// SubclassOptionalData contents.
-  enum SDivOperatorOptionalFlags {
-    SDIV_EXACT = 0
+  /// PossiblyExactOperatorOptionalFlags - Flags for serializing 
+  /// PossiblyExactOperator's SubclassOptionalData contents.
+  enum PossiblyExactOperatorOptionalFlags {
+    PEO_EXACT = 0
   };
 
   // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index a186964743dc..fa754c014621 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -33,6 +33,15 @@ namespace llvm {
                                LLVMContext& Context,
                                std::string *ErrMsg = 0);
 
+  /// getBitcodeTargetTriple - Read the header of the specified bitcode
+  /// buffer and extract just the triple information. If successful,
+  /// this returns a string and *does not* take ownership
+  /// of 'buffer'. On error, this returns "", and fills in *ErrMsg
+  /// if ErrMsg is non-null.
+  std::string getBitcodeTargetTriple(MemoryBuffer *Buffer,
+                                     LLVMContext& Context,
+                                     std::string *ErrMsg = 0);
+
   /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
   /// If an error occurs, this returns null and fills in *ErrMsg if it is
   /// non-null.  This method *never* takes ownership of Buffer.
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index b0481b92baea..4c5ee626709a 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines LLVM's set of calling conventions. 
+// This file defines LLVM's set of calling conventions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,21 +20,21 @@ namespace llvm {
 /// the well-known calling conventions.
 ///
 namespace CallingConv {
-  /// A set of enums which specify the assigned numeric values for known llvm 
+  /// A set of enums which specify the assigned numeric values for known llvm
   /// calling conventions.
   /// @brief LLVM Calling Convention Representation
   enum ID {
     /// C - The default llvm calling convention, compatible with C.  This
     /// convention is the only calling convention that supports varargs calls.
-    /// As with typical C calling conventions, the callee/caller have to 
+    /// As with typical C calling conventions, the callee/caller have to
     /// tolerate certain amounts of prototype mismatch.
     C = 0,
-    
+
     // Generic LLVM calling conventions.  None of these calling conventions
     // support varargs calls, and all assume that the caller and callee
     // prototype exactly match.
 
-    /// Fast - This calling convention attempts to make calls as fast as 
+    /// Fast - This calling convention attempts to make calls as fast as
     /// possible (e.g. by passing things in registers).
     Fast = 8,
 
@@ -79,7 +79,22 @@ namespace CallingConv {
     /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX,
     /// others via stack. Callee is responsible for stack cleaning. MSVC uses
     /// this by default for methods in its ABI.
-    X86_ThisCall = 70
+    X86_ThisCall = 70,
+
+    /// PTX_Kernel - Call to a PTX kernel.
+    /// Passes all arguments in parameter space.
+    PTX_Kernel = 71,
+
+    /// PTX_Device - Call to a PTX device function.
+    /// Passes all arguments in register or parameter space.
+    PTX_Device = 72,
+
+    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt routines.
+    MBLAZE_INTR = 73,
+
+    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt support
+    /// routines (i.e. GCC's save_volatiles attribute).
+    MBLAZE_SVOL = 74
   };
 } // End CallingConv namespace
 
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index f33a9dbcae73..78bf9fc11aa8 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -23,14 +23,16 @@
 
 namespace llvm {
 
-class TargetLowering;
 class GlobalVariable;
+class TargetLowering;
+class SDNode;
+class SelectionDAG;
 
 /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
 /// of insertvalue or extractvalue indices that identify a member, return
 /// the linearized index of the start of the member.
 ///
-unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+unsigned ComputeLinearIndex(const Type *Ty,
                             const unsigned *Indices,
                             const unsigned *IndicesEnd,
                             unsigned CurIndex = 0);
@@ -52,7 +54,7 @@ GlobalVariable *ExtractTypeInfo(Value *V);
 
 /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
 /// processed uses a memory 'm' constraint.
-bool hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
                                const TargetLowering &TLI);
 
 /// getFCmpCondCode - Return the ISD condition code corresponding to
@@ -75,6 +77,9 @@ ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
 bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
                           const TargetLowering &TLI);
 
+bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                          const TargetLowering &TLI);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index b018603b314e..357b933db54c 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -17,7 +17,7 @@
 #define LLVM_CODEGEN_ASMPRINTER_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class BlockAddress;
@@ -49,6 +49,7 @@ namespace llvm {
   class MCSection;
   class MCStreamer;
   class MCSymbol;
+  class MDNode;
   class DwarfDebug;
   class DwarfException;
   class Mangler;
@@ -388,7 +389,7 @@ namespace llvm {
     /// frame.
     void EmitFrameMoves(const std::vector<MachineMove> &Moves,
                         MCSymbol *BaseLabel, bool isEH) const;
-
+    void EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const;
 
     //===------------------------------------------------------------------===//
     // Inline Asm Support
@@ -432,7 +433,7 @@ namespace llvm {
     mutable unsigned SetCounter;
 
     /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-    void EmitInlineAsm(StringRef Str, unsigned LocCookie) const;
+    void EmitInlineAsm(StringRef Str, const MDNode *LocMDNode = 0) const;
 
     /// EmitInlineAsm - This method formats and emits the specified machine
     /// instruction that is an inline asm.
diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h
index 3ade7c9e47cd..8c1431ffbeed 100644
--- a/include/llvm/CodeGen/BinaryObject.h
+++ b/include/llvm/CodeGen/BinaryObject.h
@@ -16,7 +16,7 @@
 #define LLVM_CODEGEN_BINARYOBJECT_H
 
 #include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 #include <string>
 #include <vector>
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 240734fb2e5e..853ebf99a87b 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -20,6 +20,26 @@ namespace llvm {
   class LiveIntervals;
   class MachineLoopInfo;
 
+  /// normalizeSpillWeight - The spill weight of a live interval is computed as:
+  ///
+  ///   (sum(use freq) + sum(def freq)) / (K + size)
+  ///
+  /// @param UseDefFreq Expected number of executed use and def instructions
+  ///                   per function call. Derived from block frequencies.
+  /// @param Size       Size of live interval as returnexd by getSize()
+  ///
+  static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size) {
+    // The magic constant 200 corresponds to approx. 25 instructions since
+    // SlotIndexes allocate 8 slots per instruction.
+    //
+    // The constant is added to avoid depending too much on accidental SlotIndex
+    // gaps for small intervals. The effect is that small intervals have a spill
+    // weight that is mostly proportional to the number of uses, while large
+    // intervals get a spill weight that is closer to a use density.
+    //
+    return UseDefFreq / (Size + 200);
+  }
+
   /// VirtRegAuxInfo - Calculate auxiliary information for a virtual
   /// register such as its spill weight and allocation hint.
   class VirtRegAuxInfo {
@@ -48,7 +68,9 @@ namespace llvm {
   public:
     static char ID;
 
-    CalculateSpillWeights() : MachineFunctionPass(ID) {}
+    CalculateSpillWeights() : MachineFunctionPass(ID) {
+      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 6fb843641dcd..2a9bbdfb7ceb 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -57,14 +57,14 @@ private:
   LocInfo HTP : 6;
 
   /// ValVT - The type of the value being assigned.
-  EVT ValVT;
+  MVT ValVT;
 
   /// LocVT - The type of the location being assigned to.
-  EVT LocVT;
+  MVT LocVT;
 public:
 
-  static CCValAssign getReg(unsigned ValNo, EVT ValVT,
-                            unsigned RegNo, EVT LocVT,
+  static CCValAssign getReg(unsigned ValNo, MVT ValVT,
+                            unsigned RegNo, MVT LocVT,
                             LocInfo HTP) {
     CCValAssign Ret;
     Ret.ValNo = ValNo;
@@ -77,8 +77,8 @@ public:
     return Ret;
   }
 
-  static CCValAssign getCustomReg(unsigned ValNo, EVT ValVT,
-                                  unsigned RegNo, EVT LocVT,
+  static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT,
+                                  unsigned RegNo, MVT LocVT,
                                   LocInfo HTP) {
     CCValAssign Ret;
     Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP);
@@ -86,8 +86,8 @@ public:
     return Ret;
   }
 
-  static CCValAssign getMem(unsigned ValNo, EVT ValVT,
-                            unsigned Offset, EVT LocVT,
+  static CCValAssign getMem(unsigned ValNo, MVT ValVT,
+                            unsigned Offset, MVT LocVT,
                             LocInfo HTP) {
     CCValAssign Ret;
     Ret.ValNo = ValNo;
@@ -100,8 +100,8 @@ public:
     return Ret;
   }
 
-  static CCValAssign getCustomMem(unsigned ValNo, EVT ValVT,
-                                  unsigned Offset, EVT LocVT,
+  static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT,
+                                  unsigned Offset, MVT LocVT,
                                   LocInfo HTP) {
     CCValAssign Ret;
     Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP);
@@ -110,7 +110,7 @@ public:
   }
 
   unsigned getValNo() const { return ValNo; }
-  EVT getValVT() const { return ValVT; }
+  MVT getValVT() const { return ValVT; }
 
   bool isRegLoc() const { return !isMem; }
   bool isMemLoc() const { return isMem; }
@@ -119,7 +119,7 @@ public:
 
   unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
   unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
-  EVT getLocVT() const { return LocVT; }
+  MVT getLocVT() const { return LocVT; }
 
   LocInfo getLocInfo() const { return HTP; }
   bool isExtInLoc() const {
@@ -129,16 +129,16 @@ public:
 };
 
 /// CCAssignFn - This function assigns a location for Val, updating State to
-/// reflect the change.
-typedef bool CCAssignFn(unsigned ValNo, EVT ValVT,
-                        EVT LocVT, CCValAssign::LocInfo LocInfo,
+/// reflect the change.  It returns 'true' if it failed to handle Val.
+typedef bool CCAssignFn(unsigned ValNo, MVT ValVT,
+                        MVT LocVT, CCValAssign::LocInfo LocInfo,
                         ISD::ArgFlagsTy ArgFlags, CCState &State);
 
 /// CCCustomFn - This function assigns a location for Val, possibly updating
 /// all args to reflect changes and indicates if it handled it. It must set
 /// isCustom if it handles the arg and returns true.
-typedef bool CCCustomFn(unsigned &ValNo, EVT &ValVT,
-                        EVT &LocVT, CCValAssign::LocInfo &LocInfo,
+typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT,
+                        MVT &LocVT, CCValAssign::LocInfo &LocInfo,
                         ISD::ArgFlagsTy &ArgFlags, CCState &State);
 
 /// CCState - This class holds information needed while lowering arguments and
@@ -198,7 +198,7 @@ public:
 
   /// AnalyzeCallOperands - Same as above except it takes vectors of types
   /// and argument flags.
-  void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+  void AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
                            SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                            CCAssignFn Fn);
 
@@ -209,7 +209,7 @@ public:
 
   /// AnalyzeCallResult - Same as above except it's specialized for calls which
   /// produce a single value.
-  void AnalyzeCallResult(EVT VT, CCAssignFn Fn);
+  void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
 
   /// getFirstUnallocated - Return the first unallocated register in the set, or
   /// NumRegs if they are all allocated.
@@ -284,8 +284,8 @@ public:
   // HandleByVal - Allocate a stack slot large enough to pass an argument by
   // value. The size and alignment information of the argument is encoded in its
   // parameter attribute.
-  void HandleByVal(unsigned ValNo, EVT ValVT,
-                   EVT LocVT, CCValAssign::LocInfo LocInfo,
+  void HandleByVal(unsigned ValNo, MVT ValVT,
+                   MVT LocVT, CCValAssign::LocInfo LocInfo,
                    int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
 
 private:
diff --git a/include/llvm/CodeGen/EdgeBundles.h b/include/llvm/CodeGen/EdgeBundles.h
new file mode 100644
index 000000000000..2c5215a7927a
--- /dev/null
+++ b/include/llvm/CodeGen/EdgeBundles.h
@@ -0,0 +1,61 @@
+//===-------- EdgeBundles.h - Bundles of CFG edges --------------*- c++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The EdgeBundles analysis forms equivalence classes of CFG edges such that all
+// edges leaving a machine basic block are in the same bundle, and all edges
+// leaving a basic block are in the same bundle.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EDGEBUNDLES_H
+#define LLVM_CODEGEN_EDGEBUNDLES_H
+
+#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class EdgeBundles : public MachineFunctionPass {
+  const MachineFunction *MF;
+
+  /// EC - Each edge bundle is an equivalence class. The keys are:
+  ///   2*BB->getNumber()   -> Ingoing bundle.
+  ///   2*BB->getNumber()+1 -> Outgoing bundle.
+  IntEqClasses EC;
+
+public:
+  static char ID;
+  EdgeBundles() : MachineFunctionPass(ID) {}
+
+  /// getBundle - Return the ingoing (Out = false) or outgoing (Out = true)
+  /// bundle number for basic block #N
+  unsigned getBundle(unsigned N, bool Out) const { return EC[2 * N + Out]; }
+
+  /// getNumBundles - Return the total number of bundles in the CFG.
+  unsigned getNumBundles() const { return EC.getNumClasses(); }
+
+  /// getMachineFunction - Return the last machine function computed.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+  /// view - Visualize the annotated bipartite CFG with Graphviz.
+  void view() const;
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+};
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &WriteGraph(raw_ostream &O, const EdgeBundles &G,
+                        bool ShortNames = false,
+                        const std::string &Title = "");
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 79b1554e22ac..fbb12005444f 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -15,9 +15,6 @@
 #define LLVM_CODEGEN_FASTISEL_H
 
 #include "llvm/ADT/DenseMap.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 
@@ -39,6 +36,7 @@ class TargetLowering;
 class TargetMachine;
 class TargetRegisterClass;
 class TargetRegisterInfo;
+class LoadInst;
 
 /// FastISel - This is a fast-path instruction selection class that
 /// generates poor code and doesn't support illegal types or non-trivial
@@ -102,7 +100,16 @@ public:
   /// index value.
   std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
 
-  /// recomputeInsertPt - Reset InsertPt to prepare for insterting instructions
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/,
+                             const LoadInst * /*LI*/) {
+    return false;
+  }
+  
+  /// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions
   /// into the current block.
   void recomputeInsertPt();
 
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index f17fe5a146fc..27631b7ea12f 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -19,6 +19,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallVector.h"
 #ifndef NDEBUG
 #include "llvm/ADT/SmallSet.h"
@@ -27,6 +28,7 @@
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
 namespace llvm {
@@ -104,9 +106,8 @@ public:
     LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
   };
   
-  /// LiveOutRegInfo - Information about live out vregs, indexed by their
-  /// register number offset by 'FirstVirtualRegister'.
-  std::vector<LiveOutInfo> LiveOutRegInfo;
+  /// LiveOutRegInfo - Information about live out vregs.
+  IndexedMap<LiveOutInfo, VirtReg2IndexFunctor> LiveOutRegInfo;
 
   /// PHINodesToUpdate - A list of phi instructions whose operand list will
   /// be updated after processing the current basic block.
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index b401068140b0..45469ed7de80 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -36,6 +36,7 @@
 #include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
   class AsmPrinter;
@@ -59,8 +60,10 @@ namespace llvm {
   struct GCPoint {
     GC::PointKind Kind; //< The kind of the safe point.
     MCSymbol *Label;    //< A label.
+    DebugLoc Loc;
 
-    GCPoint(GC::PointKind K, MCSymbol *L) : Kind(K), Label(L) {}
+    GCPoint(GC::PointKind K, MCSymbol *L, DebugLoc DL)
+        : Kind(K), Label(L), Loc(DL) {}
   };
 
   /// GCRoot - Metadata for a pointer to an object managed by the garbage
@@ -121,8 +124,8 @@ namespace llvm {
     /// addSafePoint - Notes the existence of a safe point. Num is the ID of the
     /// label just prior to the safe point (if the code generator is using
     /// MachineModuleInfo).
-    void addSafePoint(GC::PointKind Kind, MCSymbol *Label) {
-      SafePoints.push_back(GCPoint(Kind, Label));
+    void addSafePoint(GC::PointKind Kind, MCSymbol *Label, DebugLoc DL) {
+      SafePoints.push_back(GCPoint(Kind, Label, DL));
     }
 
     /// getFrameSize/setFrameSize - Records the function's frame size.
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 2e23f4e44e32..3da11c4a0e0f 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -107,6 +107,13 @@ namespace ISD {
     // and returns an outchain.
     EH_SJLJ_LONGJMP,
 
+    // OUTCHAIN = EH_SJLJ_DISPATCHSETUP(INCHAIN, context)
+    // This corresponds to the eh.sjlj.dispatchsetup intrinsic. It takes an
+    // input chain and a pointer to the sjlj function context as inputs and
+    // returns an outchain. By default, this does nothing. Targets can lower
+    // this to unwind setup code if needed.
+    EH_SJLJ_DISPATCHSETUP,
+
     // TargetConstant* - Like Constant*, but the DAG does not do any folding,
     // simplification, or lowering of the constant. They are used for constants
     // which are known to fit in the immediate fields of their users, or for
@@ -262,16 +269,24 @@ namespace ISD {
     /// lengths of the input vectors.
     CONCAT_VECTORS,
 
+    /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector
+    /// with VECTOR2 inserted into VECTOR1 at the (potentially
+    /// variable) element number IDX, which must be a multiple of the
+    /// VECTOR2 vector length.  The elements of VECTOR1 starting at
+    /// IDX are overwritten with VECTOR2.  Elements IDX through
+    /// vector_length(VECTOR2) must be valid VECTOR1 indices.
+    INSERT_SUBVECTOR,
+
     /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an
-    /// vector value) starting with the (potentially variable) element number
-    /// IDX, which must be a multiple of the result vector length.
+    /// vector value) starting with the element number IDX, which must be a
+    /// constant multiple of the result vector length.
     EXTRACT_SUBVECTOR,
 
-    /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as 
+    /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
     /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int
     /// values that indicate which value (or undef) each result element will
-    /// get.  These constant ints are accessible through the 
-    /// ShuffleVectorSDNode class.  This is quite similar to the Altivec 
+    /// get.  These constant ints are accessible through the
+    /// ShuffleVectorSDNode class.  This is quite similar to the Altivec
     /// 'vperm' instruction, except that the indices must be constants and are
     /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
     VECTOR_SHUFFLE,
@@ -288,13 +303,21 @@ namespace ISD {
     // an unsigned/signed value of type i[2*N], then return the top part.
     MULHU, MULHS,
 
-    // Bitwise operators - logical and, logical or, logical xor, shift left,
-    // shift right algebraic (shift in sign bits), shift right logical (shift in
-    // zeroes), rotate left, rotate right, and byteswap.
-    AND, OR, XOR, SHL, SRA, SRL, ROTL, ROTR, BSWAP,
+    /// Bitwise operators - logical and, logical or, logical xor.
+    AND, OR, XOR,
+    
+    /// Shift and rotation operations.  After legalization, the type of the
+    /// shift amount is known to be TLI.getShiftAmountTy().  Before legalization
+    /// the shift amount can be any type, but care must be taken to ensure it is
+    /// large enough.  TLI.getShiftAmountTy() is i8 on some targets, but before
+    /// legalization, types like i1024 can occur and i8 doesn't have enough bits
+    /// to represent the shift amount.  By convention, DAGCombine and
+    /// SelectionDAGBuilder forces these shift amounts to i32 for simplicity.
+    ///
+    SHL, SRA, SRL, ROTL, ROTR,
 
-    // Counting operators
-    CTTZ, CTLZ, CTPOP,
+    /// Byte Swap and Counting operators.
+    BSWAP, CTTZ, CTLZ, CTPOP,
 
     // Select(COND, TRUEVAL, FALSEVAL).  If the type of the boolean COND is not
     // i1 then the high bits must conform to getBooleanContents.
@@ -392,14 +415,14 @@ namespace ISD {
     /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
     FP_EXTEND,
 
-    // BIT_CONVERT - This operator converts between integer, vector and FP
+    // BITCAST - This operator converts between integer, vector and FP
     // values, as if the value was stored to memory with one type and loaded
     // from the same address with the other type (or equivalently for vector
     // format conversions, etc).  The source and result are required to have
     // the same bit size (e.g.  f32 <-> i32).  This can also be used for
     // int-to-int or fp-to-fp conversions, but that is a noop, deleted by
     // getNode().
-    BIT_CONVERT,
+    BITCAST,
 
     // CONVERT_RNDSAT - This operator is used to support various conversions
     // between various types (float, signed, unsigned and vectors of those
@@ -475,6 +498,7 @@ namespace ISD {
     //   Operand #0   : Input chain.
     //   Operand #1   : a ExternalSymbolSDNode with a pointer to the asm string.
     //   Operand #2   : a MDNodeSDNode with the !srcloc metadata.
+    //   Operand #3   : HasSideEffect, IsAlignStack bits.
     //   After this, it is followed by a list of operands with this format:
     //     ConstantSDNode: Flags that encode whether it is a mem or not, the
     //                     of operands that follow, etc.  See InlineAsm.h.
@@ -525,7 +549,7 @@ namespace ISD {
     // SRCVALUE - This is a node type that holds a Value* that is used to
     // make reference to a value in the LLVM IR.
     SRCVALUE,
-    
+
     // MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to
     // reference metadata in the IR.
     MDNODE_SDNODE,
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index eefbc45cb266..767b66622549 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -48,6 +48,11 @@ namespace llvm {
     /// be capable of handling this kind of change.
     ///
     void LowerIntrinsicCall(CallInst *CI);
+
+    /// LowerToByteSwap - Replace a call instruction into a call to bswap
+    /// intrinsic. Return false if it has determined the call is not a
+    /// simple integer bswap.
+    static bool LowerToByteSwap(CallInst *CI);
   };
 }
 
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index eb373fb145e8..fea852305158 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -18,7 +18,7 @@
 #define LLVM_CODEGEN_JITCODEEMITTER_H
 
 #include <string>
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/ADT/DenseMap.h"
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 13cebeaf42f4..1ed2547ca6cf 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -20,25 +20,25 @@
 
 namespace llvm {
   class LatencyPriorityQueue;
-  
+
   /// Sorting functions for the Available queue.
   struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
     LatencyPriorityQueue *PQ;
     explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
-    
+
     bool operator()(const SUnit* left, const SUnit* right) const;
   };
 
   class LatencyPriorityQueue : public SchedulingPriorityQueue {
     // SUnits - The SUnits for the current graph.
     std::vector<SUnit> *SUnits;
-    
+
     /// NumNodesSolelyBlocking - This vector contains, for every node in the
     /// Queue, the number of nodes that the node is the sole unscheduled
     /// predecessor for.  This is used as a tie-breaker heuristic for better
     /// mobility.
     std::vector<unsigned> NumNodesSolelyBlocking;
-    
+
     /// Queue - The queue.
     std::vector<SUnit*> Queue;
     latency_sort Picker;
@@ -47,6 +47,8 @@ namespace llvm {
     LatencyPriorityQueue() : Picker(this) {
     }
 
+    bool isBottomUp() const { return false; }
+
     void initNodes(std::vector<SUnit> &sunits) {
       SUnits = &sunits;
       NumNodesSolelyBlocking.resize(SUnits->size(), 0);
@@ -62,25 +64,27 @@ namespace llvm {
     void releaseState() {
       SUnits = 0;
     }
-    
+
     unsigned getLatency(unsigned NodeNum) const {
       assert(NodeNum < (*SUnits).size());
       return (*SUnits)[NodeNum].getHeight();
     }
-    
+
     unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
       assert(NodeNum < NumNodesSolelyBlocking.size());
       return NumNodesSolelyBlocking[NodeNum];
     }
-    
+
     bool empty() const { return Queue.empty(); }
-    
+
     virtual void push(SUnit *U);
-    
+
     virtual SUnit *pop();
 
     virtual void remove(SUnit *SU);
 
+    virtual void dump(ScheduleDAG* DAG) const;
+
     // ScheduledNode - As nodes are scheduled, we look to see if there are any
     // successor nodes that have a single unscheduled predecessor.  If so, that
     // single predecessor has a higher priority, since scheduling it will make
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index cd8293de5069..c931261f6332 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -34,8 +34,10 @@ namespace {
       (void) llvm::createDeadMachineInstructionElimPass();
 
       (void) llvm::createFastRegisterAllocator();
+      (void) llvm::createBasicRegisterAllocator();
       (void) llvm::createLinearScanRegisterAllocator();
-      (void) llvm::createPBQPRegisterAllocator();
+      (void) llvm::createGreedyRegisterAllocator();
+      (void) llvm::createDefaultPBQPRegisterAllocator();
 
       (void) llvm::createSimpleRegisterCoalescer();
       
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 29e689a52145..88131fbc40ff 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -21,7 +21,7 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_H
 #define LLVM_CODEGEN_LIVEINTERVAL_H
 
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/IntEqClasses.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -39,32 +39,17 @@ namespace llvm {
   /// This class holds information about a machine level values, including
   /// definition and use points.
   ///
-  /// Care must be taken in interpreting the def index of the value. The
-  /// following rules apply:
-  ///
-  /// If the isDefAccurate() method returns false then def does not contain the
-  /// index of the defining MachineInstr, or even (necessarily) to a
-  /// MachineInstr at all. In general such a def index is not meaningful
-  /// and should not be used. The exception is that, for values originally
-  /// defined by PHI instructions, after PHI elimination def will contain the
-  /// index of the MBB in which the PHI originally existed. This can be used
-  /// to insert code (spills or copies) which deals with the value, which will
-  /// be live in to the block.
   class VNInfo {
   private:
     enum {
       HAS_PHI_KILL    = 1,
       REDEF_BY_EC     = 1 << 1,
       IS_PHI_DEF      = 1 << 2,
-      IS_UNUSED       = 1 << 3,
-      IS_DEF_ACCURATE = 1 << 4
+      IS_UNUSED       = 1 << 3
     };
 
+    MachineInstr *copy;
     unsigned char flags;
-    union {
-      MachineInstr *copy;
-      unsigned reg;
-    } cr;
 
   public:
     typedef BumpPtrAllocator Allocator;
@@ -76,20 +61,19 @@ namespace llvm {
     SlotIndex def;
 
     /// VNInfo constructor.
-    /// d is presumed to point to the actual defining instr. If it doesn't
-    /// setIsDefAccurate(false) should be called after construction.
     VNInfo(unsigned i, SlotIndex d, MachineInstr *c)
-      : flags(IS_DEF_ACCURATE), id(i), def(d) { cr.copy = c; }
+      : copy(c), flags(0), id(i), def(d)
+    { }
 
     /// VNInfo construtor, copies values from orig, except for the value number.
     VNInfo(unsigned i, const VNInfo &orig)
-      : flags(orig.flags), cr(orig.cr), id(i), def(orig.def)
+      : copy(orig.copy), flags(orig.flags), id(i), def(orig.def)
     { }
 
     /// Copy from the parameter into this VNInfo.
     void copyFrom(VNInfo &src) {
       flags = src.flags;
-      cr = src.cr;
+      copy = src.copy;
       def = src.def;
     }
 
@@ -97,23 +81,23 @@ namespace llvm {
     unsigned getFlags() const { return flags; }
     void setFlags(unsigned flags) { this->flags = flags; }
 
+    /// Merge flags from another VNInfo
+    void mergeFlags(const VNInfo *VNI) {
+      flags = (flags | VNI->flags) & ~IS_UNUSED;
+    }
+
     /// For a register interval, if this VN was definied by a copy instr
     /// getCopy() returns a pointer to it, otherwise returns 0.
     /// For a stack interval the behaviour of this method is undefined.
-    MachineInstr* getCopy() const { return cr.copy; }
+    MachineInstr* getCopy() const { return copy; }
     /// For a register interval, set the copy member.
     /// This method should not be called on stack intervals as it may lead to
     /// undefined behavior.
-    void setCopy(MachineInstr *c) { cr.copy = c; }
+    void setCopy(MachineInstr *c) { copy = c; }
 
-    /// For a stack interval, returns the reg which this stack interval was
-    /// defined from.
-    /// For a register interval the behaviour of this method is undefined.
-    unsigned getReg() const { return cr.reg; }
-    /// For a stack interval, set the defining register.
-    /// This method should not be called on register intervals as it may lead
-    /// to undefined behaviour.
-    void setReg(unsigned reg) { cr.reg = reg; }
+    /// isDefByCopy - Return true when this value was defined by a copy-like
+    /// instruction as determined by MachineInstr::isCopyLike.
+    bool isDefByCopy() const { return copy != 0; }
 
     /// Returns true if one or more kills are PHI nodes.
     bool hasPHIKill() const { return flags & HAS_PHI_KILL; }
@@ -156,16 +140,6 @@ namespace llvm {
       else
         flags &= ~IS_UNUSED;
     }
-
-    /// Returns true if the def is accurate.
-    bool isDefAccurate() const { return flags & IS_DEF_ACCURATE; }
-    /// Set the "is def accurate" flag on this value.
-    void setIsDefAccurate(bool defAccurate) {
-      if (defAccurate)
-        flags |= IS_DEF_ACCURATE;
-      else
-        flags &= ~IS_DEF_ACCURATE;
-    }
   };
 
   /// LiveRange structure - This represents a simple register range in the
@@ -231,8 +205,7 @@ namespace llvm {
     typedef SmallVector<LiveRange,4> Ranges;
     typedef SmallVector<VNInfo*,4> VNInfoList;
 
-    unsigned reg;        // the register or stack slot of this interval
-                         // if the top bits is set, it represents a stack slot.
+    const unsigned reg;  // the register or stack slot of this interval.
     float weight;        // weight of this interval
     Ranges ranges;       // the ranges in which this register is live
     VNInfoList valnos;   // value#'s
@@ -248,11 +221,8 @@ namespace llvm {
 
     };
 
-    LiveInterval(unsigned Reg, float Weight, bool IsSS = false)
-      : reg(Reg), weight(Weight) {
-      if (IsSS)
-        reg = reg | (1U << (sizeof(unsigned)*CHAR_BIT-1));
-    }
+    LiveInterval(unsigned Reg, float Weight)
+      : reg(Reg), weight(Weight) {}
 
     typedef Ranges::iterator iterator;
     iterator begin() { return ranges.begin(); }
@@ -276,28 +246,29 @@ namespace llvm {
     /// position is in a hole, this method returns an iterator pointing to the
     /// LiveRange immediately after the hole.
     iterator advanceTo(iterator I, SlotIndex Pos) {
+      assert(I != end());
       if (Pos >= endIndex())
         return end();
       while (I->end <= Pos) ++I;
       return I;
     }
 
-    void clear() {
-      valnos.clear();
-      ranges.clear();
-    }
-
-    /// isStackSlot - Return true if this is a stack slot interval.
+    /// find - Return an iterator pointing to the first range that ends after
+    /// Pos, or end(). This is the same as advanceTo(begin(), Pos), but faster
+    /// when searching large intervals.
     ///
-    bool isStackSlot() const {
-      return reg & (1U << (sizeof(unsigned)*CHAR_BIT-1));
+    /// If Pos is contained in a LiveRange, that range is returned.
+    /// If Pos is in a hole, the following LiveRange is returned.
+    /// If Pos is beyond endIndex, end() is returned.
+    iterator find(SlotIndex Pos);
+
+    const_iterator find(SlotIndex Pos) const {
+      return const_cast<LiveInterval*>(this)->find(Pos);
     }
 
-    /// getStackSlotIndex - Return stack slot index if this is a stack slot
-    /// interval.
-    int getStackSlotIndex() const {
-      assert(isStackSlot() && "Interval is not a stack slot interval!");
-      return reg & ~(1U << (sizeof(unsigned)*CHAR_BIT-1));
+    void clear() {
+      valnos.clear();
+      ranges.clear();
     }
 
     bool hasAtLeastOneValue() const { return !valnos.empty(); }
@@ -318,10 +289,9 @@ namespace llvm {
     /// getNextValue - Create a new value number and return it.  MIIdx specifies
     /// the instruction that defines the value number.
     VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI,
-                       bool isDefAccurate, VNInfo::Allocator &VNInfoAllocator) {
+                         VNInfo::Allocator &VNInfoAllocator) {
       VNInfo *VNI =
         new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def, CopyMI);
-      VNI->setIsDefAccurate(isDefAccurate);
       valnos.push_back(VNI);
       return VNI;
     }
@@ -358,21 +328,6 @@ namespace llvm {
     /// cause merging of V1/V2 values numbers and compaction of the value space.
     VNInfo* MergeValueNumberInto(VNInfo *V1, VNInfo *V2);
 
-    /// MergeInClobberRanges - For any live ranges that are not defined in the
-    /// current interval, but are defined in the Clobbers interval, mark them
-    /// used with an unknown definition value. Caller must pass in reference to
-    /// VNInfoAllocator since it will create a new val#.
-    void MergeInClobberRanges(LiveIntervals &li_,
-                              const LiveInterval &Clobbers,
-                              VNInfo::Allocator &VNInfoAllocator);
-
-    /// MergeInClobberRange - Same as MergeInClobberRanges except it merge in a
-    /// single LiveRange only.
-    void MergeInClobberRange(LiveIntervals &li_,
-                             SlotIndex Start,
-                             SlotIndex End,
-                             VNInfo::Allocator &VNInfoAllocator);
-
     /// MergeValueInAsValue - Merge all of the live ranges of a specific val#
     /// in RHS into this live interval as the specified value number.
     /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
@@ -412,17 +367,18 @@ namespace llvm {
       return index >= endIndex();
     }
 
-    bool liveAt(SlotIndex index) const;
-
-    // liveBeforeAndAt - Check if the interval is live at the index and the
-    // index just before it. If index is liveAt, check if it starts a new live
-    // range.If it does, then check if the previous live range ends at index-1.
-    bool liveBeforeAndAt(SlotIndex index) const;
+    bool liveAt(SlotIndex index) const {
+      const_iterator r = find(index);
+      return r != end() && r->start <= index;
+    }
 
     /// killedAt - Return true if a live range ends at index. Note that the kill
     /// point is not contained in the half-open live range. It is usually the
     /// getDefIndex() slot following its last use.
-    bool killedAt(SlotIndex index) const;
+    bool killedAt(SlotIndex index) const {
+      const_iterator r = find(index.getUseIndex());
+      return r != end() && r->end == index;
+    }
 
     /// killedInRange - Return true if the interval has kills in [Start,End).
     /// Note that the kill point is considered the end of a live range, so it is
@@ -452,20 +408,20 @@ namespace llvm {
 
     /// FindLiveRangeContaining - Return an iterator to the live range that
     /// contains the specified index, or end() if there is none.
-    const_iterator FindLiveRangeContaining(SlotIndex Idx) const;
+    iterator FindLiveRangeContaining(SlotIndex Idx) {
+      iterator I = find(Idx);
+      return I != end() && I->start <= Idx ? I : end();
+    }
 
-    /// FindLiveRangeContaining - Return an iterator to the live range that
-    /// contains the specified index, or end() if there is none.
-    iterator FindLiveRangeContaining(SlotIndex Idx);
+    const_iterator FindLiveRangeContaining(SlotIndex Idx) const {
+      const_iterator I = find(Idx);
+      return I != end() && I->start <= Idx ? I : end();
+    }
 
     /// findDefinedVNInfo - Find the by the specified
     /// index (register interval) or defined
     VNInfo *findDefinedVNInfoForRegInt(SlotIndex Idx) const;
 
-    /// findDefinedVNInfo - Find the VNInfo that's defined by the specified
-    /// register (stack inteval only).
-    VNInfo *findDefinedVNInfoForStackInt(unsigned Reg) const;
-
 
     /// overlaps - Return true if the intersection of the two live intervals is
     /// not empty.
@@ -502,7 +458,10 @@ namespace llvm {
 
     /// isInOneLiveRange - Return true if the range specified is entirely in the
     /// a single LiveRange of the live interval.
-    bool isInOneLiveRange(SlotIndex Start, SlotIndex End);
+    bool isInOneLiveRange(SlotIndex Start, SlotIndex End) const {
+      const_iterator r = find(Start);
+      return r != end() && r->containsRange(Start, End);
+    }
 
     /// removeRange - Remove the specified range from this interval.  Note that
     /// the range must be a single LiveRange in its entirety.
@@ -569,6 +528,46 @@ namespace llvm {
     LI.print(OS);
     return OS;
   }
-}
 
+  /// ConnectedVNInfoEqClasses - Helper class that can divide VNInfos in a
+  /// LiveInterval into equivalence clases of connected components. A
+  /// LiveInterval that has multiple connected components can be broken into
+  /// multiple LiveIntervals.
+  ///
+  /// Given a LiveInterval that may have multiple connected components, run:
+  ///
+  ///   unsigned numComps = ConEQ.Classify(LI);
+  ///   if (numComps > 1) {
+  ///     // allocate numComps-1 new LiveIntervals into LIS[1..]
+  ///     ConEQ.Distribute(LIS);
+  /// }
+
+  class ConnectedVNInfoEqClasses {
+    LiveIntervals &lis_;
+    IntEqClasses eqClass_;
+
+    // Note that values a and b are connected.
+    void Connect(unsigned a, unsigned b);
+
+    unsigned Renumber();
+
+  public:
+    explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : lis_(lis) {}
+
+    /// Classify - Classify the values in LI into connected components.
+    /// Return the number of connected components.
+    unsigned Classify(const LiveInterval *LI);
+
+    /// getEqClass - Classify creates equivalence classes numbered 0..N. Return
+    /// the equivalence class assigned the VNI.
+    unsigned getEqClass(const VNInfo *VNI) const { return eqClass_[VNI->id]; }
+
+    /// Distribute - Distribute values in LIV[0] into a separate LiveInterval
+    /// for each connected component. LIV must have a LiveInterval for each
+    /// connected component. The LiveIntervals in Liv[1..] must be empty.
+    void Distribute(LiveInterval *LIV[]);
+
+  };
+
+}
 #endif
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 2918c3c2abe8..b09f8d111066 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -68,19 +68,13 @@ namespace llvm {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    LiveIntervals() : MachineFunctionPass(ID) {}
+    LiveIntervals() : MachineFunctionPass(ID) {
+      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    }
 
     // Calculate the spill weight to assign to a single instruction.
     static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth);
 
-    // After summing the spill weights of all defs and uses, the final weight
-    // should be normalized, dividing the weight of the interval by its size.
-    // This encourages spilling of intervals that are large and have few uses,
-    // and discourages spilling of small intervals with many uses.
-    void normalizeSpillWeight(LiveInterval &li) {
-      li.weight /= getApproximateInstructionCount(li) + 25;
-    }
-
     typedef Reg2IntervalMap::iterator iterator;
     typedef Reg2IntervalMap::const_iterator const_iterator;
     const_iterator begin() const { return r2iMap_.begin(); }
@@ -161,6 +155,12 @@ namespace llvm {
     LiveRange addLiveRangeToEndOfBlock(unsigned reg,
                                        MachineInstr* startInst);
 
+    /// shrinkToUses - After removing some uses of a register, shrink its live
+    /// range to just the remaining uses. This method does not compute reaching
+    /// defs for new uses, and it doesn't remove dead defs.
+    /// Dead PHIDef values are marked as unused.
+    void shrinkToUses(LiveInterval *li);
+
     // Interval removal
 
     void removeInterval(unsigned Reg) {
@@ -169,6 +169,10 @@ namespace llvm {
       r2iMap_.erase(I);
     }
 
+    SlotIndexes *getSlotIndexes() const {
+      return indexes_;
+    }
+
     SlotIndex getZeroIndex() const {
       return indexes_->getZeroIndex();
     }
@@ -227,10 +231,6 @@ namespace llvm {
       return indexes_->getMBBFromIndex(index);
     }
 
-    SlotIndex getMBBTerminatorGap(const MachineBasicBlock *mbb) {
-      return indexes_->getTerminatorGap(mbb);
-    }
-
     SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) {
       return indexes_->insertMachineInstrInMaps(MI);
     }
@@ -272,7 +272,7 @@ namespace llvm {
     /// (if any is created) by reference. This is temporary.
     std::vector<LiveInterval*>
     addIntervalsForSpills(const LiveInterval& i,
-                          SmallVectorImpl<LiveInterval*> &SpillIs,
+                          const SmallVectorImpl<LiveInterval*> &SpillIs,
                           const MachineLoopInfo *loopInfo, VirtRegMap& vrm);
 
     /// spillPhysRegAroundRegDefsUses - Spill the specified physical register
@@ -285,7 +285,7 @@ namespace llvm {
     /// val# of the specified interval is re-materializable. Also returns true
     /// by reference if all of the defs are load instructions.
     bool isReMaterializable(const LiveInterval &li,
-                            SmallVectorImpl<LiveInterval*> &SpillIs,
+                            const SmallVectorImpl<LiveInterval*> &SpillIs,
                             bool &isLoad);
 
     /// isReMaterializable - Returns true if the definition MI of the specified
@@ -306,6 +306,16 @@ namespace llvm {
     /// within a single basic block.
     bool intervalIsInOneMBB(const LiveInterval &li) const;
 
+    /// getLastSplitPoint - Return the last possible insertion point in mbb for
+    /// spilling and splitting code. This is the first terminator, or the call
+    /// instruction if li is live into a landing pad successor.
+    MachineBasicBlock::iterator getLastSplitPoint(const LiveInterval &li,
+                                                  MachineBasicBlock *mbb) const;
+
+    /// addKillFlags - Add kill flags to any instruction that kills a virtual
+    /// register.
+    void addKillFlags();
+
   private:
     /// computeIntervals - Compute live intervals.
     void computeIntervals();
@@ -362,7 +372,7 @@ namespace llvm {
     /// by reference if the def is a load.
     bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo,
                             MachineInstr *MI,
-                            SmallVectorImpl<LiveInterval*> &SpillIs,
+                            const SmallVectorImpl<LiveInterval*> &SpillIs,
                             bool &isLoad);
 
     /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
@@ -443,9 +453,6 @@ namespace llvm {
         DenseMap<unsigned,unsigned> &MBBVRegsMap,
         std::vector<LiveInterval*> &NewLIs);
 
-    // Normalize the spill weight of all the intervals in NewLIs.
-    void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs);
-
     static LiveInterval* createInterval(unsigned Reg);
 
     void printInstrs(raw_ostream &O) const;
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index ad984db1899e..8a8dcaf5728f 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -39,7 +39,9 @@ namespace llvm {
     
   public:
     static char ID; // Pass identification, replacement for typeid
-    LiveStacks() : MachineFunctionPass(ID) {}
+    LiveStacks() : MachineFunctionPass(ID) {
+      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+    }
 
     typedef SS2IntervalMap::iterator iterator;
     typedef SS2IntervalMap::const_iterator const_iterator;
@@ -50,19 +52,7 @@ namespace llvm {
 
     unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
 
-    LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
-      assert(Slot >= 0 && "Spill slot indice must be >= 0");
-      SS2IntervalMap::iterator I = S2IMap.find(Slot);
-      if (I == S2IMap.end()) {
-        I = S2IMap.insert(I,std::make_pair(Slot, LiveInterval(Slot,0.0F,true)));
-        S2RCMap.insert(std::make_pair(Slot, RC));
-      } else {
-        // Use the largest common subclass register class.
-        const TargetRegisterClass *OldRC = S2RCMap[Slot];
-        S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
-      }
-      return I->second;
-    }
+    LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
 
     LiveInterval &getInterval(int Slot) {
       assert(Slot >= 0 && "Spill slot indice must be >= 0");
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index c8182e073b9c..f9b81b1ea7d6 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -32,8 +32,10 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
@@ -46,7 +48,9 @@ class TargetRegisterInfo;
 class LiveVariables : public MachineFunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
-  LiveVariables() : MachineFunctionPass(ID) {}
+  LiveVariables() : MachineFunctionPass(ID) {
+    initializeLiveVariablesPass(*PassRegistry::getPassRegistry());
+  }
 
   /// VarInfo - This represents the regions where a virtual register is live in
   /// the program.  We represent this with three different pieces of
@@ -119,10 +123,9 @@ public:
 
 private:
   /// VirtRegInfo - This list is a mapping from virtual register number to
-  /// variable information.  FirstVirtualRegister is subtracted from the virtual
-  /// register number before indexing into this list.
+  /// variable information.
   ///
-  std::vector<VarInfo> VirtRegInfo;
+  IndexedMap<VarInfo, VirtReg2IndexFunctor> VirtRegInfo;
 
   /// PHIJoins - list of virtual registers that are PHI joins. These registers
   /// may have multiple definitions, and they require special handling when
diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h
index 27306c62d888..21fe74f8e1cd 100644
--- a/include/llvm/CodeGen/MachORelocation.h
+++ b/include/llvm/CodeGen/MachORelocation.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_MACHO_RELOCATION_H
 #define LLVM_CODEGEN_MACHO_RELOCATION_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 3cfc47ac4d84..1785451c7ec5 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -23,6 +23,7 @@ class Pass;
 class BasicBlock;
 class MachineFunction;
 class MCSymbol;
+class SlotIndexes;
 class StringRef;
 class raw_ostream;
 
@@ -223,6 +224,10 @@ public:
   /// this basic block is entered via an exception handler.
   void setIsLandingPad() { IsLandingPad = true; }
 
+  /// getLandingPadSuccessor - If this block has a successor that is a landing
+  /// pad, return it. Otherwise return NULL.
+  const MachineBasicBlock *getLandingPadSuccessor() const;
+
   // Code Layout methods.
   
   /// moveBefore/moveAfter - move 'this' block before or after the specified
@@ -289,11 +294,20 @@ public:
   /// Returns end() is there's no non-PHI instruction.
   iterator getFirstNonPHI();
 
+  /// SkipPHIsAndLabels - Return the first instruction in MBB after I that is
+  /// not a PHI or a label. This is the correct point to insert copies at the
+  /// beginning of a basic block.
+  iterator SkipPHIsAndLabels(iterator I);
+
   /// getFirstTerminator - returns an iterator to the first terminator
   /// instruction of this basic block. If a terminator does not exist,
   /// it returns end()
   iterator getFirstTerminator();
 
+  /// getLastNonDebugInstr - returns an iterator to the last non-debug
+  /// instruction in the basic block, or end()
+  iterator getLastNonDebugInstr();
+
   /// SplitCriticalEdge - Split the critical edge from this block to the
   /// given successor block, and return the newly created block, or null
   /// if splitting is not possible.
@@ -308,6 +322,9 @@ public:
   template<typename IT>
   void insert(iterator I, IT S, IT E) { Insts.insert(I, S, E); }
   iterator insert(iterator I, MachineInstr *M) { return Insts.insert(I, M); }
+  iterator insertAfter(iterator I, MachineInstr *M) { 
+    return Insts.insertAfter(I, M); 
+  }
 
   // erase - Remove the specified element or range from the instruction list.
   // These functions delete any instructions removed.
@@ -358,7 +375,7 @@ public:
 
   // Debugging methods.
   void dump() const;
-  void print(raw_ostream &OS) const;
+  void print(raw_ostream &OS, SlotIndexes* = 0) const;
 
   /// getNumber - MachineBasicBlocks are uniquely numbered at the function
   /// level, unless they're not in a MachineFunction yet, in which case this
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 7abb49a219ef..8fc80adf7fb8 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -17,7 +17,7 @@
 #ifndef LLVM_CODEGEN_MACHINECODEEMITTER_H
 #define LLVM_CODEGEN_MACHINECODEEMITTER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineCodeInfo.h b/include/llvm/CodeGen/MachineCodeInfo.h
index a75c02a052e7..c5c0c4450454 100644
--- a/include/llvm/CodeGen/MachineCodeInfo.h
+++ b/include/llvm/CodeGen/MachineCodeInfo.h
@@ -17,7 +17,7 @@
 #ifndef EE_MACHINE_CODE_INFO_H
 #define EE_MACHINE_CODE_INFO_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 48695d500b19..ab944a2335f7 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -145,7 +145,7 @@ public:
   }
   
   /// eraseNode - Removes a node from  the dominator tree. Block must not
-  /// domiante any other blocks. Removes node from its immediate dominator's
+  /// dominate any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   inline void eraseNode(MachineBasicBlock *BB) {
     DT->eraseNode(BB);
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index dca65ef6d407..22a82a9d6e75 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -16,7 +16,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 //#include "llvm/ADT/IndexedMap.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <vector>
 
@@ -27,7 +27,7 @@ class TargetRegisterClass;
 class Type;
 class MachineFunction;
 class MachineBasicBlock;
-class TargetFrameInfo;
+class TargetFrameLowering;
 class BitVector;
 
 /// The CalleeSavedInfo class tracks the information need to locate where a
@@ -192,13 +192,9 @@ class MachineFrameInfo {
   /// CSIValid - Has CSInfo been set yet?
   bool CSIValid;
 
-  /// SpillObjects - A vector indicating which frame indices refer to
-  /// spill slots.
-  SmallVector<bool, 8> SpillObjects;
-
-  /// TargetFrameInfo - Target information about frame layout.
+  /// TargetFrameLowering - Target information about frame layout.
   ///
-  const TargetFrameInfo &TFI;
+  const TargetFrameLowering &TFI;
 
   /// LocalFrameObjects - References to frame indices which are mapped
   /// into the local frame allocation block. <FrameIdx, LocalOffset>
@@ -217,7 +213,7 @@ class MachineFrameInfo {
   bool UseLocalStackAllocationBlock;
 
 public:
-    explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) {
+    explicit MachineFrameInfo(const TargetFrameLowering &tfi) : TFI(tfi) {
     StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
     HasVarSizedObjects = false;
     FrameAddressTaken = false;
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 5bb453dd50fa..abeaa4f58d3d 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -28,6 +28,7 @@ namespace llvm {
 
 class Value;
 class Function;
+class GCModuleInfo;
 class MachineRegisterInfo;
 class MachineFrameInfo;
 class MachineConstantPool;
@@ -37,6 +38,7 @@ class MCContext;
 class Pass;
 class TargetMachine;
 class TargetRegisterClass;
+struct MachinePointerInfo;
 
 template <>
 struct ilist_traits<MachineBasicBlock>
@@ -74,6 +76,7 @@ class MachineFunction {
   const TargetMachine &Target;
   MCContext &Ctx;
   MachineModuleInfo &MMI;
+  GCModuleInfo *GMI;
   
   // RegInfo - Information about each register in use in the function.
   MachineRegisterInfo *RegInfo;
@@ -126,10 +129,12 @@ class MachineFunction {
   void operator=(const MachineFunction&);   // DO NOT IMPLEMENT
 public:
   MachineFunction(const Function *Fn, const TargetMachine &TM,
-                  unsigned FunctionNum, MachineModuleInfo &MMI);
+                  unsigned FunctionNum, MachineModuleInfo &MMI,
+                  GCModuleInfo* GMI);
   ~MachineFunction();
 
   MachineModuleInfo &getMMI() const { return MMI; }
+  GCModuleInfo *getGMI() const { return GMI; }
   MCContext &getContext() const { return Ctx; }
   
   /// getFunction - Return the LLVM function that this machine code represents
@@ -243,7 +248,7 @@ public:
   /// print - Print out the MachineFunction in a format suitable for debugging
   /// to the specified stream.
   ///
-  void print(raw_ostream &OS) const;
+  void print(raw_ostream &OS, SlotIndexes* = 0) const;
 
   /// viewCFG - This function is meant for use from the debugger.  You can just
   /// say 'call F->viewCFG()' and a ghostview window should pop up from the
@@ -266,7 +271,7 @@ public:
 
   /// verify - Run the current MachineFunction through the machine code
   /// verifier, useful for debugger use.
-  void verify(Pass *p=NULL) const;
+  void verify(Pass *p = NULL, const char *Banner = NULL) const;
 
   // Provide accessors for the MachineBasicBlock list...
   typedef BasicBlockListType::iterator iterator;
@@ -276,7 +281,7 @@ public:
 
   /// addLiveIn - Add the specified physical register as a live-in value and
   /// create a corresponding virtual register for it.
-  unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC);
+  unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC, DebugLoc DL);
 
   //===--------------------------------------------------------------------===//
   // BasicBlock accessor functions.
@@ -368,10 +373,11 @@ public:
   /// getMachineMemOperand - Allocate a new MachineMemOperand.
   /// MachineMemOperands are owned by the MachineFunction and need not be
   /// explicitly deallocated.
-  MachineMemOperand *getMachineMemOperand(const Value *v, unsigned f,
-                                          int64_t o, uint64_t s,
-                                          unsigned base_alignment);
-
+  MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo,
+                                          unsigned f, uint64_t s,
+                                          unsigned base_alignment,
+                                          const MDNode *TBAAInfo = 0);
+  
   /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
   /// an existing one, adjusting by an offset and using the given size.
   /// MachineMemOperands are owned by the MachineFunction and need not be
@@ -406,6 +412,10 @@ public:
   /// normal 'L' label is returned.
   MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx, 
                          bool isLinkerPrivate = false) const;
+  
+  /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+  /// base.
+  MCSymbol *getPICBaseSymbol() const;
 };
 
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 75dbaab973d8..50676ad4ad49 100644
--- a/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -37,6 +37,10 @@ public:
 
   MachineFunction &getMF() const { return *MF; }
   CodeGenOpt::Level getOptLevel() const { return OptLevel; }
+  
+  virtual const char* getPassName() const {
+    return "Machine Function Analysis";
+  }
 
 private:
   virtual bool doInitialization(Module &M);
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index f843196105dd..82c5332ccd9f 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -127,6 +127,10 @@ public:
   ///
   unsigned short getAsmPrinterFlags() const { return AsmPrinterFlags; }
 
+  /// clearAsmPrinterFlags - clear the AsmPrinter bitvector
+  ///
+  void clearAsmPrinterFlags() { AsmPrinterFlags = 0; }
+  
   /// getAsmPrinterFlag - Return whether an AsmPrinter flag is set.
   ///
   bool getAsmPrinterFlag(CommentFlag Flag) const {
@@ -138,6 +142,12 @@ public:
   void setAsmPrinterFlag(CommentFlag Flag) {
     AsmPrinterFlags |= (unsigned short)Flag;
   }
+  
+  /// clearAsmPrinterFlag - clear specific AsmPrinter flags
+  ///
+  void clearAsmPrinterFlag(CommentFlag Flag) {
+    AsmPrinterFlags &= ~Flag;
+  }
 
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
@@ -167,7 +177,17 @@ public:
   /// getNumExplicitOperands - Returns the number of non-implicit operands.
   ///
   unsigned getNumExplicitOperands() const;
-  
+
+  /// iterator/begin/end - Iterate over all operands of a machine instruction.
+  typedef std::vector<MachineOperand>::iterator mop_iterator;
+  typedef std::vector<MachineOperand>::const_iterator const_mop_iterator;
+
+  mop_iterator operands_begin() { return Operands.begin(); }
+  mop_iterator operands_end() { return Operands.end(); }
+
+  const_mop_iterator operands_begin() const { return Operands.begin(); }
+  const_mop_iterator operands_end() const { return Operands.end(); }
+
   /// Access to memory operands of the instruction
   mmo_iterator memoperands_begin() const { return MemRefs; }
   mmo_iterator memoperands_end() const { return MemRefsEnd; }
@@ -217,6 +237,7 @@ public:
   bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
   bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
   bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
+  bool isStackAligningInlineAsm() const;
   bool isInsertSubreg() const {
     return getOpcode() == TargetOpcode::INSERT_SUBREG;
   }
@@ -412,10 +433,23 @@ public:
   /// return 0.
   unsigned isConstantValuePHI() const;
 
+  /// hasUnmodeledSideEffects - Return true if this instruction has side
+  /// effects that are not modeled by mayLoad / mayStore, etc.
+  /// For all instructions, the property is encoded in TargetInstrDesc::Flags
+  /// (see TargetInstrDesc::hasUnmodeledSideEffects(). The only exception is
+  /// INLINEASM instruction, in which case the side effect property is encoded
+  /// in one of its operands (see InlineAsm::Extra_HasSideEffect).
+  ///
+  bool hasUnmodeledSideEffects() const;
+
   /// allDefsAreDead - Return true if all the defs of this instruction are dead.
   ///
   bool allDefsAreDead() const;
 
+  /// copyImplicitOps - Copy implicit register operands from specified
+  /// instruction to this instruction.
+  void copyImplicitOps(const MachineInstr *MI);
+
   //
   // Debugging support
   //
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 37ac24cab841..1eb97353088f 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -18,6 +18,7 @@
 #define LLVM_CODEGEN_MACHINEINSTRBUILDER_H
 
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 
@@ -122,6 +123,13 @@ public:
     return *this;
   }
 
+  const MachineInstrBuilder &setMemRefs(MachineInstr::mmo_iterator b,
+                                        MachineInstr::mmo_iterator e) const {
+    MI->setMemRefs(b, e);
+    return *this;
+  }
+
+
   const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
     MI->addOperand(MO);
     return *this;
@@ -136,6 +144,19 @@ public:
     MI->addOperand(MachineOperand::CreateMCSymbol(Sym));
     return *this;
   }
+
+  // Add a displacement from an existing MachineOperand with an added offset.
+  const MachineInstrBuilder &addDisp(const MachineOperand &Disp,
+                                     int64_t off) const {
+    switch (Disp.getType()) {
+      default:
+        llvm_unreachable("Unhandled operand type in addDisp()");
+      case MachineOperand::MO_Immediate:
+        return addImm(Disp.getImm() + off);
+      case MachineOperand::MO_GlobalAddress:
+        return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off);
+    }
+  }
 };
 
 /// BuildMI - Builder interface.  Specify how to create the initial instruction
diff --git a/include/llvm/CodeGen/MachineLocation.h b/include/llvm/CodeGen/MachineLocation.h
index a1fcb9fe7576..21951b6680b6 100644
--- a/include/llvm/CodeGen/MachineLocation.h
+++ b/include/llvm/CodeGen/MachineLocation.h
@@ -32,7 +32,7 @@ private:
 public:
   enum {
     // The target register number for an abstract frame pointer. The value is
-    // an arbitrary value greater than TargetRegisterInfo::FirstVirtualRegister.
+    // an arbitrary value that doesn't collide with any real target register.
     VirtualFP = ~0U
   };
   MachineLocation()
@@ -41,6 +41,11 @@ public:
   : IsRegister(true), Register(R), Offset(0) {}
   MachineLocation(unsigned R, int O)
   : IsRegister(false), Register(R), Offset(O) {}
+
+  bool operator==(const MachineLocation &Other) const {
+      return IsRegister == Other.IsRegister && Register == Other.Register &&
+        Offset == Other.Offset;
+  }
   
   // Accessors
   bool isReg()           const { return IsRegister; }
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 9760eba7b86e..6dd9440500bf 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -67,7 +67,9 @@ class MachineLoopInfo : public MachineFunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  MachineLoopInfo() : MachineFunctionPass(ID) {}
+  MachineLoopInfo() : MachineFunctionPass(ID) {
+    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  }
 
   LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
 
diff --git a/include/llvm/CodeGen/MachineLoopRanges.h b/include/llvm/CodeGen/MachineLoopRanges.h
new file mode 100644
index 000000000000..6a30e8b53c09
--- /dev/null
+++ b/include/llvm/CodeGen/MachineLoopRanges.h
@@ -0,0 +1,112 @@
+//===- MachineLoopRanges.h - Ranges of machine loops -----------*- c++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the MachineLoopRanges analysis.
+//
+// Provide on-demand information about the ranges of machine instructions
+// covered by a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINELOOPRANGES_H
+#define LLVM_CODEGEN_MACHINELOOPRANGES_H
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class MachineLoop;
+class MachineLoopInfo;
+class raw_ostream;
+
+/// MachineLoopRange - Range information for a single loop.
+class MachineLoopRange {
+  friend class MachineLoopRanges;
+
+public:
+  typedef IntervalMap<SlotIndex, unsigned, 4> Map;
+  typedef Map::Allocator Allocator;
+
+private:
+  /// The mapped loop.
+  const MachineLoop *const Loop;
+
+  /// Map intervals to a bit mask.
+  /// Bit 0 = inside loop block.
+  Map Intervals;
+
+  /// Loop area as measured by SlotIndex::distance.
+  unsigned Area;
+
+  /// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+  MachineLoopRange(const MachineLoop*, Allocator&, SlotIndexes&);
+
+public:
+  /// getLoop - Return the mapped machine loop.
+  const MachineLoop *getLoop() const { return Loop; }
+
+  /// overlaps - Return true if this loop overlaps the given range of machine
+  /// inteructions.
+  bool overlaps(SlotIndex Start, SlotIndex Stop);
+
+  /// getNumber - Return the loop number. This is the same as the number of the
+  /// header block.
+  unsigned getNumber() const;
+
+  /// getArea - Return the loop area. This number is approximately proportional
+  /// to the number of instructions in the loop.
+  unsigned getArea() const { return Area; }
+
+  /// getMap - Allow public read-only access for IntervalMapOverlaps.
+  const Map &getMap() { return Intervals; }
+
+  /// print - Print loop ranges on OS.
+  void print(raw_ostream&) const;
+
+  /// byNumber - Comparator for array_pod_sort that sorts a list of
+  /// MachineLoopRange pointers by number.
+  static int byNumber(const void*, const void*);
+
+  /// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+  /// MachineLoopRange pointers by descending area, then by number.
+  static int byAreaDesc(const void*, const void*);
+};
+
+raw_ostream &operator<<(raw_ostream&, const MachineLoopRange&);
+
+/// MachineLoopRanges - Analysis pass that provides on-demand per-loop range
+/// information.
+class MachineLoopRanges : public MachineFunctionPass {
+  typedef DenseMap<const MachineLoop*, MachineLoopRange*> CacheMap;
+  typedef MachineLoopRange::Allocator MapAllocator;
+
+  MapAllocator Allocator;
+  SlotIndexes *Indexes;
+  CacheMap Cache;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  MachineLoopRanges() : MachineFunctionPass(ID), Indexes(0) {}
+  ~MachineLoopRanges() { releaseMemory(); }
+
+  /// getLoopRange - Return the range of loop.
+  MachineLoopRange *getLoopRange(const MachineLoop *Loop);
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINELOOPRANGES_H
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 7272aa5fc127..768ce47f8b39 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_CODEGEN_MACHINEMEMOPERAND_H
 #define LLVM_CODEGEN_MACHINEMEMOPERAND_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -24,6 +24,52 @@ class Value;
 class FoldingSetNodeID;
 class raw_ostream;
 
+/// MachinePointerInfo - This class contains a discriminated union of
+/// information about pointers in memory operands, relating them back to LLVM IR
+/// or to virtual locations (such as frame indices) that are exposed during
+/// codegen.
+struct MachinePointerInfo {
+  /// V - This is the IR pointer value for the access, or it is null if unknown.
+  /// If this is null, then the access is to a pointer in the default address
+  /// space.
+  const Value *V;
+  
+  /// Offset - This is an offset from the base Value*.
+  int64_t Offset;
+  
+  explicit MachinePointerInfo(const Value *v = 0, int64_t offset = 0)
+    : V(v), Offset(offset) {}
+  
+  MachinePointerInfo getWithOffset(int64_t O) const {
+    if (V == 0) return MachinePointerInfo(0, 0);
+    return MachinePointerInfo(V, Offset+O);
+  }
+  
+  /// getAddrSpace - Return the LLVM IR address space number that this pointer
+  /// points into.
+  unsigned getAddrSpace() const;
+  
+  /// getConstantPool - Return a MachinePointerInfo record that refers to the
+  /// constant pool.
+  static MachinePointerInfo getConstantPool();
+
+  /// getFixedStack - Return a MachinePointerInfo record that refers to the
+  /// the specified FrameIndex.
+  static MachinePointerInfo getFixedStack(int FI, int64_t offset = 0);
+  
+  /// getJumpTable - Return a MachinePointerInfo record that refers to a
+  /// jump table entry.
+  static MachinePointerInfo getJumpTable();
+  
+  /// getGOT - Return a MachinePointerInfo record that refers to a
+  /// GOT entry.
+  static MachinePointerInfo getGOT();
+  
+  /// getStack - stack pointer relative access.
+  static MachinePointerInfo getStack(int64_t Offset);
+};
+  
+  
 //===----------------------------------------------------------------------===//
 /// MachineMemOperand - A description of a memory reference used in the backend.
 /// Instead of holding a StoreInst or LoadInst, this class holds the address
@@ -33,10 +79,10 @@ class raw_ostream;
 /// that aren't explicit in the regular LLVM IR.
 ///
 class MachineMemOperand {
-  int64_t Offset;
+  MachinePointerInfo PtrInfo;
   uint64_t Size;
-  const Value *V;
-  unsigned int Flags;
+  unsigned Flags;
+  const MDNode *TBAAInfo;
 
 public:
   /// Flags values. These may be or'd together.
@@ -54,10 +100,12 @@ public:
   };
 
   /// MachineMemOperand - Construct an MachineMemOperand object with the
-  /// specified address Value, flags, offset, size, and base alignment.
-  MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s,
-                    unsigned int base_alignment);
+  /// specified PtrInfo, flags, size, and base alignment.
+  MachineMemOperand(MachinePointerInfo PtrInfo, unsigned flags, uint64_t s,
+                    unsigned base_alignment, const MDNode *TBAAInfo = 0);
 
+  const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
+  
   /// getValue - Return the base address of the memory access. This may either
   /// be a normal LLVM IR Value, or one of the special values used in CodeGen.
   /// Special values are those obtained via
@@ -65,7 +113,7 @@ public:
   /// other PseudoSourceValue member functions which return objects which stand
   /// for frame/stack pointer relative references and other special references
   /// which are not representable in the high-level IR.
-  const Value *getValue() const { return V; }
+  const Value *getValue() const { return PtrInfo.V; }
 
   /// getFlags - Return the raw flags of the source value, \see MemOperandFlags.
   unsigned int getFlags() const { return Flags & ((1 << MOMaxBits) - 1); }
@@ -73,7 +121,7 @@ public:
   /// getOffset - For normal values, this is a byte offset added to the base
   /// address. For PseudoSourceValue::FPRel values, this is the FrameIndex
   /// number.
-  int64_t getOffset() const { return Offset; }
+  int64_t getOffset() const { return PtrInfo.Offset; }
 
   /// getSize - Return the size in bytes of the memory reference.
   uint64_t getSize() const { return Size; }
@@ -86,6 +134,9 @@ public:
   /// base address, without the offset.
   uint64_t getBaseAlignment() const { return (1u << (Flags >> MOMaxBits)) >> 1; }
 
+  /// getTBAAInfo - Return the TBAA tag for the memory reference.
+  const MDNode *getTBAAInfo() const { return TBAAInfo; }
+
   bool isLoad() const { return Flags & MOLoad; }
   bool isStore() const { return Flags & MOStore; }
   bool isVolatile() const { return Flags & MOVolatile; }
@@ -99,7 +150,8 @@ public:
   /// setValue - Change the SourceValue for this MachineMemOperand. This
   /// should only be used when an object is being relocated and all references
   /// to it are being updated.
-  void setValue(const Value *NewSV) { V = NewSV; }
+  void setValue(const Value *NewSV) { PtrInfo.V = NewSV; }
+  void setOffset(int64_t NewOffset) { PtrInfo.Offset = NewOffset; }
 
   /// Profile - Gather unique data for the object.
   ///
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 0e719c86c18e..6bc80b099fd9 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -39,7 +39,7 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -57,7 +57,7 @@ class MachineFunction;
 class Module;
 class PointerType;
 class StructType;
-  
+
 /// MachineModuleInfoImpl - This class can be derived from and used by targets
 /// to hold private target-specific information for each Module.  Objects of
 /// type are accessed/created with MMI::getInfo and destroyed when the
@@ -70,8 +70,8 @@ public:
 protected:
   static SymbolListTy GetSortedStubs(const DenseMap<MCSymbol*, StubValueTy>&);
 };
-  
-  
+
+
 
 //===----------------------------------------------------------------------===//
 /// LandingPadInfo - This structure is used to retain landing pad info for
@@ -90,19 +90,19 @@ struct LandingPadInfo {
 };
 
 class MMIAddrLabelMap;
-  
+
 //===----------------------------------------------------------------------===//
 /// MachineModuleInfo - This class contains meta information specific to a
-/// module.  Queries can be made by different debugging and exception handling 
+/// module.  Queries can be made by different debugging and exception handling
 /// schemes and reformated for specific use.
 ///
 class MachineModuleInfo : public ImmutablePass {
   /// Context - This is the MCContext used for the entire code generator.
   MCContext Context;
-  
+
   /// TheModule - This is the LLVM Module being worked on.
   const Module *TheModule;
-  
+
   /// ObjFileMMI - This is the object-file-format-specific implementation of
   /// MachineModuleInfoImpl, which lets targets accumulate whatever info they
   /// want.
@@ -111,7 +111,7 @@ class MachineModuleInfo : public ImmutablePass {
   // FrameMoves - List of moves done by a function's prolog.  Used to construct
   // frame maps by debug and exception handling consumers.
   std::vector<MachineMove> FrameMoves;
-  
+
   // LandingPads - List of LandingPadInfo describing the landing pad information
   // in the current function.
   std::vector<LandingPadInfo> LandingPads;
@@ -145,18 +145,22 @@ class MachineModuleInfo : public ImmutablePass {
   /// llvm.compiler.used.
   SmallPtrSet<const Function *, 32> UsedFunctions;
 
-  
+
   /// AddrLabelSymbols - This map keeps track of which symbol is being used for
   /// the specified basic block's address of label.
   MMIAddrLabelMap *AddrLabelSymbols;
-  
+
   bool CallsEHReturn;
   bool CallsUnwindInit;
- 
+
   /// DbgInfoAvailable - True if debugging information is available
   /// in this module.
   bool DbgInfoAvailable;
 
+  /// True if this module calls VarArg function with floating point arguments.
+  /// This is used to emit an undefined reference to fltused on Windows targets.
+  bool CallsExternalVAFunctionWithFloatingPointArguments;
+
 public:
   static char ID; // Pass identification, replacement for typeid
 
@@ -166,22 +170,23 @@ public:
   VariableDbgInfoMapTy VariableDbgInfo;
 
   MachineModuleInfo();  // DUMMY CONSTRUCTOR, DO NOT CALL.
-  MachineModuleInfo(const MCAsmInfo &MAI);  // Real constructor.
+  // Real constructor.
+  MachineModuleInfo(const MCAsmInfo &MAI, const TargetAsmInfo *TAI);
   ~MachineModuleInfo();
-  
+
   bool doInitialization();
   bool doFinalization();
 
   /// EndFunction - Discard function meta information.
   ///
   void EndFunction();
-  
+
   const MCContext &getContext() const { return Context; }
   MCContext &getContext() { return Context; }
 
   void setModule(const Module *M) { TheModule = M; }
   const Module *getModule() const { return TheModule; }
-  
+
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
   ///
@@ -191,32 +196,40 @@ public:
       ObjFileMMI = new Ty(*this);
     return *static_cast<Ty*>(ObjFileMMI);
   }
-  
+
   template<typename Ty>
   const Ty &getObjFileInfo() const {
     return const_cast<MachineModuleInfo*>(this)->getObjFileInfo<Ty>();
   }
-  
+
   /// AnalyzeModule - Scan the module for global debug information.
   ///
   void AnalyzeModule(const Module &M);
-  
+
   /// hasDebugInfo - Returns true if valid debug info is present.
   ///
   bool hasDebugInfo() const { return DbgInfoAvailable; }
-  void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = true; }
+  void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
 
   bool callsEHReturn() const { return CallsEHReturn; }
   void setCallsEHReturn(bool b) { CallsEHReturn = b; }
 
   bool callsUnwindInit() const { return CallsUnwindInit; }
   void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
-  
+
+  bool callsExternalVAFunctionWithFloatingPointArguments() const {
+    return CallsExternalVAFunctionWithFloatingPointArguments;
+  }
+
+  void setCallsExternalVAFunctionWithFloatingPointArguments(bool b) {
+    CallsExternalVAFunctionWithFloatingPointArguments = b;
+  }
+
   /// getFrameMoves - Returns a reference to a list of moves done in the current
   /// function's prologue.  Used to construct frame maps for debug and exception
   /// handling comsumers.
   std::vector<MachineMove> &getFrameMoves() { return FrameMoves; }
-  
+
   /// getAddrLabelSymbol - Return the symbol to be used for the specified basic
   /// block when its address is taken.  This cannot be its normal LBB label
   /// because the block may be accessed outside its containing function.
@@ -226,15 +239,15 @@ public:
   /// basic block when its address is taken.  If other blocks were RAUW'd to
   /// this one, we may have to emit them as well, return the whole set.
   std::vector<MCSymbol*> getAddrLabelSymbolToEmit(const BasicBlock *BB);
-  
+
   /// takeDeletedSymbolsForFunction - If the specified function has had any
   /// references to address-taken blocks generated, but the block got deleted,
   /// return the symbol now so we can emit it.  This prevents emitting a
   /// reference to a symbol that has no definition.
-  void takeDeletedSymbolsForFunction(const Function *F, 
+  void takeDeletedSymbolsForFunction(const Function *F,
                                      std::vector<MCSymbol*> &Result);
 
-  
+
   //===- EH ---------------------------------------------------------------===//
 
   /// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
@@ -245,11 +258,11 @@ public:
   /// associate it with a try landing pad block.
   void addInvoke(MachineBasicBlock *LandingPad,
                  MCSymbol *BeginLabel, MCSymbol *EndLabel);
-  
-  /// addLandingPad - Add a new panding pad.  Returns the label ID for the 
+
+  /// addLandingPad - Add a new panding pad.  Returns the label ID for the
   /// landing pad entry.
   MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
-  
+
   /// addPersonality - Provide the personality function for the exception
   /// information.
   void addPersonality(MachineBasicBlock *LandingPad,
@@ -285,7 +298,7 @@ public:
   ///
   void addCleanup(MachineBasicBlock *LandingPad);
 
-  /// getTypeIDFor - Return the type id for the specified typeinfo.  This is 
+  /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
   /// function wide.
   unsigned getTypeIDFor(const GlobalVariable *TI);
 
@@ -296,7 +309,7 @@ public:
   /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
   /// pads.
   void TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = 0);
-                        
+
   /// getLandingPads - Return a reference to the landing pad info for the
   /// current function.
   const std::vector<LandingPadInfo> &getLandingPads() const {
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index afa2c298a273..8acc9490d8db 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_CODEGEN_MACHINEOPERAND_H
 #define LLVM_CODEGEN_MACHINEOPERAND_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
-  
+
 class BlockAddress;
 class ConstantFP;
 class GlobalValue;
@@ -30,7 +30,7 @@ class TargetMachine;
 class TargetRegisterInfo;
 class raw_ostream;
 class MCSymbol;
-  
+
 /// MachineOperand class - Representation of each machine instruction operand.
 ///
 class MachineOperand {
@@ -54,21 +54,21 @@ private:
   /// OpKind - Specify what kind of operand this is.  This discriminates the
   /// union.
   unsigned char OpKind; // MachineOperandType
-  
+
   /// SubReg - Subregister number, only valid for MO_Register.  A value of 0
   /// indicates the MO_Register has no subReg.
   unsigned char SubReg;
-  
+
   /// TargetFlags - This is a set of target-specific operand flags.
   unsigned char TargetFlags;
-  
+
   /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register
   /// operands.
-  
+
   /// IsDef - True if this is a def, false if this is a use of the register.
   ///
   bool IsDef : 1;
-  
+
   /// IsImp - True if this is an implicit def or use, false if it is explicit.
   ///
   bool IsImp : 1;
@@ -94,7 +94,16 @@ private:
   /// not a real instruction.  Such uses should be ignored during codegen.
   bool IsDebug : 1;
 
-  /// ParentMI - This is the instruction that this operand is embedded into. 
+  /// SmallContents - Thisreally should be part of the Contents union, but lives
+  /// out here so we can get a better packed struct.
+  /// MO_Register: Register number.
+  /// OffsetedInfo: Low bits of offset.
+  union {
+    unsigned RegNo;           // For MO_Register.
+    unsigned OffsetLo;        // Matches Contents.OffsetedInfo.OffsetHi.
+  } SmallContents;
+
+  /// ParentMI - This is the instruction that this operand is embedded into.
   /// This is valid for all operand types, when the operand is in an instr.
   MachineInstr *ParentMI;
 
@@ -107,11 +116,11 @@ private:
     MCSymbol *Sym;            // For MO_MCSymbol
 
     struct {                  // For MO_Register.
-      unsigned RegNo;
+      // Register number is in SmallContents.RegNo.
       MachineOperand **Prev;  // Access list for register.
       MachineOperand *Next;
     } Reg;
-    
+
     /// OffsetedInfo - This struct contains the offset and an object identifier.
     /// this represent the object as with an optional offset from it.
     struct {
@@ -121,10 +130,11 @@ private:
         const GlobalValue *GV;    // For MO_GlobalAddress.
         const BlockAddress *BA;   // For MO_BlockAddress.
       } Val;
-      int64_t Offset;             // An offset from the object.
+      // Low bits of offset are in SmallContents.OffsetLo.
+      int OffsetHi;               // An offset from the object, high 32 bits.
     } OffsetedInfo;
   } Contents;
-  
+
   explicit MachineOperand(MachineOperandType K) : OpKind(K), ParentMI(0) {
     TargetFlags = 0;
   }
@@ -132,17 +142,27 @@ public:
   /// getType - Returns the MachineOperandType for this operand.
   ///
   MachineOperandType getType() const { return (MachineOperandType)OpKind; }
-  
+
   unsigned char getTargetFlags() const { return TargetFlags; }
   void setTargetFlags(unsigned char F) { TargetFlags = F; }
   void addTargetFlag(unsigned char F) { TargetFlags |= F; }
-  
+
 
   /// getParent - Return the instruction that this operand belongs to.
   ///
   MachineInstr *getParent() { return ParentMI; }
   const MachineInstr *getParent() const { return ParentMI; }
-  
+
+  /// clearParent - Reset the parent pointer.
+  ///
+  /// The MachineOperand copy constructor also copies ParentMI, expecting the
+  /// original to be deleted. If a MachineOperand is ever stored outside a
+  /// MachineInstr, the parent pointer must be cleared.
+  ///
+  /// Never call clearParent() on an operand in a MachineInstr.
+  ///
+  void clearParent() { ParentMI = 0; }
+
   void print(raw_ostream &os, const TargetMachine *TM = 0) const;
 
   //===--------------------------------------------------------------------===//
@@ -180,44 +200,44 @@ public:
   /// getReg - Returns the register number.
   unsigned getReg() const {
     assert(isReg() && "This is not a register operand!");
-    return Contents.Reg.RegNo;
+    return SmallContents.RegNo;
   }
-  
+
   unsigned getSubReg() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return (unsigned)SubReg;
   }
-  
-  bool isUse() const { 
+
+  bool isUse() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return !IsDef;
   }
-  
+
   bool isDef() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsDef;
   }
-  
-  bool isImplicit() const { 
+
+  bool isImplicit() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsImp;
   }
-  
+
   bool isDead() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsDead;
   }
-  
+
   bool isKill() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsKill;
   }
-  
+
   bool isUndef() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsUndef;
   }
-  
+
   bool isEarlyClobber() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsEarlyClobber;
@@ -238,11 +258,11 @@ public:
   //===--------------------------------------------------------------------===//
   // Mutators for Register Operands
   //===--------------------------------------------------------------------===//
-  
+
   /// Change the register this operand corresponds to.
   ///
   void setReg(unsigned Reg);
-  
+
   void setSubReg(unsigned subReg) {
     assert(isReg() && "Wrong MachineOperand accessor");
     SubReg = (unsigned char)subReg;
@@ -266,14 +286,14 @@ public:
     assert((Val || !isDebug()) && "Marking a debug operation as def");
     IsDef = !Val;
   }
-  
+
   void setIsDef(bool Val = true) {
     assert(isReg() && "Wrong MachineOperand accessor");
     assert((!Val || !isDebug()) && "Marking a debug operation as def");
     IsDef = Val;
   }
 
-  void setImplicit(bool Val = true) { 
+  void setImplicit(bool Val = true) {
     assert(isReg() && "Wrong MachineOperand accessor");
     IsImp = Val;
   }
@@ -283,7 +303,7 @@ public:
     assert((!Val || !isDebug()) && "Marking a debug operation as kill");
     IsKill = Val;
   }
-  
+
   void setIsDead(bool Val = true) {
     assert(isReg() && IsDef && "Wrong MachineOperand accessor");
     IsDead = Val;
@@ -293,7 +313,7 @@ public:
     assert(isReg() && "Wrong MachineOperand accessor");
     IsUndef = Val;
   }
-  
+
   void setIsEarlyClobber(bool Val = true) {
     assert(isReg() && IsDef && "Wrong MachineOperand accessor");
     IsEarlyClobber = Val;
@@ -307,17 +327,17 @@ public:
   //===--------------------------------------------------------------------===//
   // Accessors for various operand types.
   //===--------------------------------------------------------------------===//
-  
+
   int64_t getImm() const {
     assert(isImm() && "Wrong MachineOperand accessor");
     return Contents.ImmVal;
   }
-  
+
   const ConstantFP *getFPImm() const {
     assert(isFPImm() && "Wrong MachineOperand accessor");
     return Contents.CFP;
   }
-  
+
   MachineBasicBlock *getMBB() const {
     assert(isMBB() && "Wrong MachineOperand accessor");
     return Contents.MBB;
@@ -328,7 +348,7 @@ public:
            "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.Index;
   }
-  
+
   const GlobalValue *getGlobal() const {
     assert(isGlobal() && "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.GV;
@@ -343,15 +363,16 @@ public:
     assert(isMCSymbol() && "Wrong MachineOperand accessor");
     return Contents.Sym;
   }
-  
+
   /// getOffset - Return the offset from the symbol in this operand. This always
   /// returns 0 for ExternalSymbol operands.
   int64_t getOffset() const {
     assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
            "Wrong MachineOperand accessor");
-    return Contents.OffsetedInfo.Offset;
+    return (int64_t(Contents.OffsetedInfo.OffsetHi) << 32) |
+           SmallContents.OffsetLo;
   }
-  
+
   const char *getSymbolName() const {
     assert(isSymbol() && "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.SymbolName;
@@ -361,11 +382,11 @@ public:
     assert(isMetadata() && "Wrong MachineOperand accessor");
     return Contents.MD;
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Mutators for various operand types.
   //===--------------------------------------------------------------------===//
-  
+
   void setImm(int64_t immVal) {
     assert(isImm() && "Wrong MachineOperand mutator");
     Contents.ImmVal = immVal;
@@ -374,56 +395,57 @@ public:
   void setOffset(int64_t Offset) {
     assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
         "Wrong MachineOperand accessor");
-    Contents.OffsetedInfo.Offset = Offset;
+    SmallContents.OffsetLo = unsigned(Offset);
+    Contents.OffsetedInfo.OffsetHi = int(Offset >> 32);
   }
-  
+
   void setIndex(int Idx) {
     assert((isFI() || isCPI() || isJTI()) &&
            "Wrong MachineOperand accessor");
     Contents.OffsetedInfo.Val.Index = Idx;
   }
-  
+
   void setMBB(MachineBasicBlock *MBB) {
     assert(isMBB() && "Wrong MachineOperand accessor");
     Contents.MBB = MBB;
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Other methods.
   //===--------------------------------------------------------------------===//
-  
+
   /// isIdenticalTo - Return true if this operand is identical to the specified
   /// operand. Note: This method ignores isKill and isDead properties.
   bool isIdenticalTo(const MachineOperand &Other) const;
-  
+
   /// ChangeToImmediate - Replace this operand with a new immediate operand of
   /// the specified value.  If an operand is known to be an immediate already,
   /// the setImm method should be used.
   void ChangeToImmediate(int64_t ImmVal);
-  
+
   /// ChangeToRegister - Replace this operand with a new register operand of
   /// the specified value.  If an operand is known to be an register already,
   /// the setReg method should be used.
   void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false,
                         bool isKill = false, bool isDead = false,
                         bool isUndef = false, bool isDebug = false);
-  
+
   //===--------------------------------------------------------------------===//
   // Construction methods.
   //===--------------------------------------------------------------------===//
-  
+
   static MachineOperand CreateImm(int64_t Val) {
     MachineOperand Op(MachineOperand::MO_Immediate);
     Op.setImm(Val);
     return Op;
   }
-  
+
   static MachineOperand CreateFPImm(const ConstantFP *CFP) {
     MachineOperand Op(MachineOperand::MO_FPImmediate);
     Op.Contents.CFP = CFP;
     return Op;
   }
-  
+
   static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false,
                                   bool isKill = false, bool isDead = false,
                                   bool isUndef = false,
@@ -438,7 +460,7 @@ public:
     Op.IsUndef = isUndef;
     Op.IsEarlyClobber = isEarlyClobber;
     Op.IsDebug = isDebug;
-    Op.Contents.Reg.RegNo = Reg;
+    Op.SmallContents.RegNo = Reg;
     Op.Contents.Reg.Prev = 0;
     Op.Contents.Reg.Next = 0;
     Op.SubReg = SubReg;
@@ -506,7 +528,7 @@ public:
     Op.Contents.Sym = Sym;
     return Op;
   }
-  
+
   friend class MachineInstr;
   friend class MachineRegisterInfo;
 private:
@@ -521,7 +543,7 @@ private:
     assert(isReg() && "Can only add reg operand to use lists");
     return Contents.Reg.Prev != 0;
   }
-  
+
   /// AddRegOperandToRegInfo - Add this register operand to the specified
   /// MachineRegisterInfo.  If it is null, then the next/prev fields should be
   /// explicitly nulled out.
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 066c91b36cf5..79ff714df63d 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -16,6 +16,9 @@
 
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/DebugLoc.h"
 #include <vector>
 
 namespace llvm {
@@ -24,13 +27,12 @@ namespace llvm {
 /// registers, including vreg register classes, use/def chains for registers,
 /// etc.
 class MachineRegisterInfo {
-  /// VRegInfo - Information we keep for each virtual register.  The entries in
-  /// this vector are actually converted to vreg numbers by adding the 
-  /// TargetRegisterInfo::FirstVirtualRegister delta to their index.
+  /// VRegInfo - Information we keep for each virtual register.
   ///
   /// Each element in this list contains the register class of the vreg and the
   /// start of the use/def list for the register.
-  std::vector<std::pair<const TargetRegisterClass*, MachineOperand*> > VRegInfo;
+  IndexedMap<std::pair<const TargetRegisterClass*, MachineOperand*>,
+             VirtReg2IndexFunctor> VRegInfo;
 
   /// RegClassVRegMap - This vector acts as a map from TargetRegisterClass to
   /// virtual registers. For each target register class, it keeps a list of
@@ -44,7 +46,7 @@ class MachineRegisterInfo {
   /// register for allocation. For example, if the hint is <0, 1024>, it means
   /// the allocator should prefer the physical register allocated to the virtual
   /// register of the hint.
-  std::vector<std::pair<unsigned, unsigned> > RegAllocHints;
+  IndexedMap<std::pair<unsigned, unsigned>, VirtReg2IndexFunctor> RegAllocHints;
   
   /// PhysRegUseDefLists - This is an array of the head of the use/def list for
   /// physical registers.
@@ -64,7 +66,10 @@ class MachineRegisterInfo {
   /// stored in the second element.
   std::vector<std::pair<unsigned, unsigned> > LiveIns;
   std::vector<unsigned> LiveOuts;
-  
+
+  /// LiveInLocs - Keep track of location livein registers.
+  DenseMap<unsigned, DebugLoc> LiveInLocs;
+
   MachineRegisterInfo(const MachineRegisterInfo&); // DO NOT IMPLEMENT
   void operator=(const MachineRegisterInfo&);      // DO NOT IMPLEMENT
 public:
@@ -159,17 +164,15 @@ public:
   /// getRegUseDefListHead - Return the head pointer for the register use/def
   /// list for the specified virtual or physical register.
   MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
-    if (RegNo < TargetRegisterInfo::FirstVirtualRegister)
-      return PhysRegUseDefLists[RegNo];
-    RegNo -= TargetRegisterInfo::FirstVirtualRegister;
-    return VRegInfo[RegNo].second;
+    if (TargetRegisterInfo::isVirtualRegister(RegNo))
+      return VRegInfo[RegNo].second;
+    return PhysRegUseDefLists[RegNo];
   }
   
   MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
-    if (RegNo < TargetRegisterInfo::FirstVirtualRegister)
-      return PhysRegUseDefLists[RegNo];
-    RegNo -= TargetRegisterInfo::FirstVirtualRegister;
-    return VRegInfo[RegNo].second;
+    if (TargetRegisterInfo::isVirtualRegister(RegNo))
+      return VRegInfo[RegNo].second;
+    return PhysRegUseDefLists[RegNo];
   }
 
   /// getVRegDef - Return the machine instr that defines the specified virtual
@@ -194,8 +197,6 @@ public:
   /// getRegClass - Return the register class of the specified virtual register.
   ///
   const TargetRegisterClass *getRegClass(unsigned Reg) const {
-    Reg -= TargetRegisterInfo::FirstVirtualRegister;
-    assert(Reg < VRegInfo.size() && "Invalid vreg!");
     return VRegInfo[Reg].first;
   }
 
@@ -203,16 +204,22 @@ public:
   ///
   void setRegClass(unsigned Reg, const TargetRegisterClass *RC);
 
+  /// constrainRegClass - Constrain the register class of the specified virtual
+  /// register to be a common subclass of RC and the current register class.
+  /// Return the new register class, or NULL if no such class exists.
+  /// This should only be used when the constraint is known to be trivial, like
+  /// GR32 -> GR32_NOSP. Beware of increasing register pressure.
+  const TargetRegisterClass *constrainRegClass(unsigned Reg,
+                                               const TargetRegisterClass *RC);
+
   /// createVirtualRegister - Create and return a new virtual register in the
   /// function with the specified register class.
   ///
   unsigned createVirtualRegister(const TargetRegisterClass *RegClass);
 
-  /// getLastVirtReg - Return the highest currently assigned virtual register.
+  /// getNumVirtRegs - Return the number of virtual registers created.
   ///
-  unsigned getLastVirtReg() const {
-    return (unsigned)VRegInfo.size()+TargetRegisterInfo::FirstVirtualRegister-1;
-  }
+  unsigned getNumVirtRegs() const { return VRegInfo.size(); }
 
   /// getRegClassVirtRegs - Return the list of virtual registers of the given
   /// target register class.
@@ -224,8 +231,6 @@ public:
   /// setRegAllocationHint - Specify a register allocation hint for the
   /// specified virtual register.
   void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) {
-    Reg -= TargetRegisterInfo::FirstVirtualRegister;
-    assert(Reg < VRegInfo.size() && "Invalid vreg!");
     RegAllocHints[Reg].first  = Type;
     RegAllocHints[Reg].second = PrefReg;
   }
@@ -234,8 +239,6 @@ public:
   /// specified virtual register.
   std::pair<unsigned, unsigned>
   getRegAllocationHint(unsigned Reg) const {
-    Reg -= TargetRegisterInfo::FirstVirtualRegister;
-    assert(Reg < VRegInfo.size() && "Invalid vreg!");
     return RegAllocHints[Reg];
   }
 
@@ -273,7 +276,12 @@ public:
     LiveIns.push_back(std::make_pair(Reg, vreg));
   }
   void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); }
-  
+
+  /// addLiveInLoc - Keep track of location info for live in reg.
+  void addLiveInLoc(unsigned VReg, DebugLoc DL) {
+    LiveInLocs[VReg] = DL;
+  }
+
   // Iteration support for live in/out sets.  These sets are kept in sorted
   // order by their register number.
   typedef std::vector<std::pair<unsigned,unsigned> >::const_iterator
diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h
index c316785dd112..244b466e1728 100644
--- a/include/llvm/CodeGen/MachineRelocation.h
+++ b/include/llvm/CodeGen/MachineRelocation.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_CODEGEN_MACHINERELOCATION_H
 #define LLVM_CODEGEN_MACHINERELOCATION_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
new file mode 100644
index 000000000000..b2224cb051dc
--- /dev/null
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -0,0 +1,425 @@
+//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Graph class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
+#define LLVM_CODEGEN_PBQP_GRAPH_H
+
+#include "Math.h"
+
+#include <list>
+#include <vector>
+#include <map>
+
+namespace PBQP {
+
+  /// PBQP Graph class.
+  /// Instances of this class describe PBQP problems.
+  class Graph {
+  private:
+
+    // ----- TYPEDEFS -----
+    class NodeEntry;
+    class EdgeEntry;
+
+    typedef std::list<NodeEntry> NodeList;
+    typedef std::list<EdgeEntry> EdgeList;
+
+  public:
+
+    typedef NodeList::iterator NodeItr;
+    typedef NodeList::const_iterator ConstNodeItr;
+
+    typedef EdgeList::iterator EdgeItr;
+    typedef EdgeList::const_iterator ConstEdgeItr;
+
+  private:
+
+    typedef std::list<EdgeItr> AdjEdgeList;
+  
+  public:
+
+    typedef AdjEdgeList::iterator AdjEdgeItr;
+
+  private:
+
+    class NodeEntry {
+    private:
+      Vector costs;      
+      AdjEdgeList adjEdges;
+      unsigned degree;
+      void *data;
+    public:
+      NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
+      Vector& getCosts() { return costs; }
+      const Vector& getCosts() const { return costs; }
+      unsigned getDegree() const { return degree; }
+      AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
+      AdjEdgeItr edgesEnd() { return adjEdges.end(); }
+      AdjEdgeItr addEdge(EdgeItr e) {
+        ++degree;
+        return adjEdges.insert(adjEdges.end(), e);
+      }
+      void removeEdge(AdjEdgeItr ae) {
+        --degree;
+        adjEdges.erase(ae);
+      }
+      void setData(void *data) { this->data = data; }
+      void* getData() { return data; }
+    };
+
+    class EdgeEntry {
+    private:
+      NodeItr node1, node2;
+      Matrix costs;
+      AdjEdgeItr node1AEItr, node2AEItr;
+      void *data;
+    public:
+      EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
+        : node1(node1), node2(node2), costs(costs) {}
+      NodeItr getNode1() const { return node1; }
+      NodeItr getNode2() const { return node2; }
+      Matrix& getCosts() { return costs; }
+      const Matrix& getCosts() const { return costs; }
+      void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
+      AdjEdgeItr getNode1AEItr() { return node1AEItr; }
+      void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
+      AdjEdgeItr getNode2AEItr() { return node2AEItr; }
+      void setData(void *data) { this->data = data; }
+      void *getData() { return data; }
+    };
+
+    // ----- MEMBERS -----
+
+    NodeList nodes;
+    unsigned numNodes;
+
+    EdgeList edges;
+    unsigned numEdges;
+
+    // ----- INTERNAL METHODS -----
+
+    NodeEntry& getNode(NodeItr nItr) { return *nItr; }
+    const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
+
+    EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
+    const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
+
+    NodeItr addConstructedNode(const NodeEntry &n) {
+      ++numNodes;
+      return nodes.insert(nodes.end(), n);
+    }
+
+    EdgeItr addConstructedEdge(const EdgeEntry &e) {
+      assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
+             "Attempt to add duplicate edge.");
+      ++numEdges;
+      EdgeItr edgeItr = edges.insert(edges.end(), e);
+      EdgeEntry &ne = getEdge(edgeItr);
+      NodeEntry &n1 = getNode(ne.getNode1());
+      NodeEntry &n2 = getNode(ne.getNode2());
+      // Sanity check on matrix dimensions:
+      assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
+             (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
+             "Edge cost dimensions do not match node costs dimensions.");
+      ne.setNode1AEItr(n1.addEdge(edgeItr));
+      ne.setNode2AEItr(n2.addEdge(edgeItr));
+      return edgeItr;
+    }
+
+    inline void copyFrom(const Graph &other);
+  public:
+
+    /// \brief Construct an empty PBQP graph.
+    Graph() : numNodes(0), numEdges(0) {}
+
+    /// \brief Copy construct this graph from "other". Note: Does not copy node
+    ///        and edge data, only graph structure and costs.
+    /// @param other Source graph to copy from.
+    Graph(const Graph &other) : numNodes(0), numEdges(0) {
+      copyFrom(other);
+    }
+
+    /// \brief Make this graph a copy of "other". Note: Does not copy node and
+    ///        edge data, only graph structure and costs.
+    /// @param other The graph to copy from.
+    /// @return A reference to this graph.
+    ///
+    /// This will clear the current graph, erasing any nodes and edges added,
+    /// before copying from other.
+    Graph& operator=(const Graph &other) {
+      clear();      
+      copyFrom(other);
+      return *this;
+    }
+
+    /// \brief Add a node with the given costs.
+    /// @param costs Cost vector for the new node.
+    /// @return Node iterator for the added node.
+    NodeItr addNode(const Vector &costs) {
+      return addConstructedNode(NodeEntry(costs));
+    }
+
+    /// \brief Add an edge between the given nodes with the given costs.
+    /// @param n1Itr First node.
+    /// @param n2Itr Second node.
+    /// @return Edge iterator for the added edge.
+    EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
+                    const Matrix &costs) {
+      assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
+             getNodeCosts(n2Itr).getLength() == costs.getCols() &&
+             "Matrix dimensions mismatch.");
+      return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); 
+    }
+
+    /// \brief Get the number of nodes in the graph.
+    /// @return Number of nodes in the graph.
+    unsigned getNumNodes() const { return numNodes; }
+
+    /// \brief Get the number of edges in the graph.
+    /// @return Number of edges in the graph.
+    unsigned getNumEdges() const { return numEdges; }
+
+    /// \brief Get a node's cost vector.
+    /// @param nItr Node iterator.
+    /// @return Node cost vector.
+    Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
+
+    /// \brief Get a node's cost vector (const version).
+    /// @param nItr Node iterator.
+    /// @return Node cost vector.
+    const Vector& getNodeCosts(ConstNodeItr nItr) const {
+      return getNode(nItr).getCosts();
+    }
+
+    /// \brief Set a node's data pointer.
+    /// @param nItr Node iterator.
+    /// @param data Pointer to node data.
+    ///
+    /// Typically used by a PBQP solver to attach data to aid in solution.
+    void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
+
+    /// \brief Get the node's data pointer.
+    /// @param nItr Node iterator.
+    /// @return Pointer to node data.
+    void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
+    
+    /// \brief Get an edge's cost matrix.
+    /// @param eItr Edge iterator.
+    /// @return Edge cost matrix.
+    Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
+
+    /// \brief Get an edge's cost matrix (const version).
+    /// @param eItr Edge iterator.
+    /// @return Edge cost matrix.
+    const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
+      return getEdge(eItr).getCosts();
+    }
+
+    /// \brief Set an edge's data pointer.
+    /// @param eItr Edge iterator.
+    /// @param data Pointer to edge data.
+    ///
+    /// Typically used by a PBQP solver to attach data to aid in solution.
+    void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
+
+    /// \brief Get an edge's data pointer.
+    /// @param eItr Edge iterator.
+    /// @return Pointer to edge data. 
+    void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
+
+    /// \brief Get a node's degree.
+    /// @param nItr Node iterator.
+    /// @return The degree of the node.
+    unsigned getNodeDegree(NodeItr nItr) const {
+      return getNode(nItr).getDegree();
+    }
+
+    /// \brief Begin iterator for node set.
+    NodeItr nodesBegin() { return nodes.begin(); }
+
+    /// \brief Begin const iterator for node set.
+    ConstNodeItr nodesBegin() const { return nodes.begin(); }
+
+    /// \brief End iterator for node set.
+    NodeItr nodesEnd() { return nodes.end(); }
+
+    /// \brief End const iterator for node set.
+    ConstNodeItr nodesEnd() const { return nodes.end(); }
+
+    /// \brief Begin iterator for edge set.
+    EdgeItr edgesBegin() { return edges.begin(); }
+
+    /// \brief End iterator for edge set.
+    EdgeItr edgesEnd() { return edges.end(); }
+
+    /// \brief Get begin iterator for adjacent edge set.
+    /// @param nItr Node iterator.
+    /// @return Begin iterator for the set of edges connected to the given node.
+    AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
+      return getNode(nItr).edgesBegin();
+    }
+
+    /// \brief Get end iterator for adjacent edge set.
+    /// @param nItr Node iterator.
+    /// @return End iterator for the set of edges connected to the given node.
+    AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
+      return getNode(nItr).edgesEnd();
+    }
+
+    /// \brief Get the first node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @return The first node connected to the given edge. 
+    NodeItr getEdgeNode1(EdgeItr eItr) {
+      return getEdge(eItr).getNode1();
+    }
+
+    /// \brief Get the second node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @return The second node connected to the given edge. 
+    NodeItr getEdgeNode2(EdgeItr eItr) {
+      return getEdge(eItr).getNode2();
+    } 
+
+    /// \brief Get the "other" node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @param nItr Node iterator for the "given" node.
+    /// @return The iterator for the "other" node connected to this edge. 
+    NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
+      EdgeEntry &e = getEdge(eItr);
+      if (e.getNode1() == nItr) {
+        return e.getNode2();
+      } // else
+      return e.getNode1();
+    }
+
+    /// \brief Get the edge connecting two nodes.
+    /// @param n1Itr First node iterator.
+    /// @param n2Itr Second node iterator.
+    /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
+    ///         otherwise returns edgesEnd(). 
+    EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
+      for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
+         aeItr != aeEnd; ++aeItr) {
+        if ((getEdgeNode1(*aeItr) == n2Itr) ||
+            (getEdgeNode2(*aeItr) == n2Itr)) {
+          return *aeItr;
+        }
+      }
+      return edges.end();
+    }
+
+    /// \brief Remove a node from the graph.
+    /// @param nItr Node iterator.
+    void removeNode(NodeItr nItr) {
+      NodeEntry &n = getNode(nItr);
+      for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
+        EdgeItr eItr = *itr;
+        ++itr;
+        removeEdge(eItr); 
+      }
+      nodes.erase(nItr);
+      --numNodes;
+    }
+
+    /// \brief Remove an edge from the graph.
+    /// @param eItr Edge iterator.
+    void removeEdge(EdgeItr eItr) {
+      EdgeEntry &e = getEdge(eItr);
+      NodeEntry &n1 = getNode(e.getNode1());
+      NodeEntry &n2 = getNode(e.getNode2());
+      n1.removeEdge(e.getNode1AEItr());
+      n2.removeEdge(e.getNode2AEItr());
+      edges.erase(eItr);
+      --numEdges;
+    }
+
+    /// \brief Remove all nodes and edges from the graph.
+    void clear() {
+      nodes.clear();
+      edges.clear();
+      numNodes = numEdges = 0;
+    }
+
+    /// \brief Print a representation of this graph in DOT format.
+    /// @param os Output stream to print on.
+    template <typename OStream>
+    void printDot(OStream &os) {
+    
+      os << "graph {\n";
+
+      for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+           nodeItr != nodeEnd; ++nodeItr) {
+
+        os << "  node" << nodeItr << " [ label=\""
+           << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
+      }
+
+      os << "  edge [ len=" << getNumNodes() << " ]\n";
+
+      for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+           edgeItr != edgeEnd; ++edgeItr) {
+
+        os << "  node" << getEdgeNode1(edgeItr)
+           << " -- node" << getEdgeNode2(edgeItr)
+           << " [ label=\"";
+
+        const Matrix &edgeCosts = getEdgeCosts(edgeItr);
+
+        for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
+          os << edgeCosts.getRowAsVector(i) << "\\n";
+        }
+        os << "\" ]\n";
+      }
+      os << "}\n";
+    }
+
+  };
+
+  class NodeItrComparator {
+  public:
+    bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
+      return &*n1 < &*n2;
+    }
+
+    bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
+      return &*n1 < &*n2;
+    }
+  };
+
+  class EdgeItrCompartor {
+  public:
+    bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
+      return &*e1 < &*e2;
+    }
+
+    bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
+      return &*e1 < &*e2;
+    }
+  };
+
+  void Graph::copyFrom(const Graph &other) {
+    std::map<Graph::ConstNodeItr, Graph::NodeItr,
+             NodeItrComparator> nodeMap;
+
+     for (Graph::ConstNodeItr nItr = other.nodesBegin(),
+                             nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
+    }
+      
+  }
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/include/llvm/CodeGen/PBQP/HeuristicBase.h b/include/llvm/CodeGen/PBQP/HeuristicBase.h
new file mode 100644
index 000000000000..791c227f0d07
--- /dev/null
+++ b/include/llvm/CodeGen/PBQP/HeuristicBase.h
@@ -0,0 +1,246 @@
+//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+
+#include "HeuristicSolver.h"
+
+namespace PBQP {
+
+  /// \brief Abstract base class for heuristic implementations.
+  ///
+  /// This class provides a handy base for heuristic implementations with common
+  /// solver behaviour implemented for a number of methods.
+  ///
+  /// To implement your own heuristic using this class as a base you'll have to
+  /// implement, as a minimum, the following methods:
+  /// <ul>
+  ///   <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
+  ///        heuristic reduction list.
+  ///   <li> void heuristicReduce() : Perform a single heuristic reduction.
+  ///   <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
+  ///        change to the cost matrix on the given edge (by R2).
+  ///   <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new 
+  ///        costs on the given edge.
+  ///   <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
+  ///        edge into the PBQP graph (by R2).
+  ///   <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
+  ///        disconnection of the given edge from the given node.
+  ///   <li> A constructor for your derived class : to pass back a reference to
+  ///        the solver which is using this heuristic.
+  /// </ul>
+  ///
+  /// These methods are implemented in this class for documentation purposes,
+  /// but will assert if called.
+  /// 
+  /// Note that this class uses the curiously recursive template idiom to
+  /// forward calls to the derived class. These methods need not be made
+  /// virtual, and indeed probably shouldn't for performance reasons.
+  ///
+  /// You'll also need to provide NodeData and EdgeData structs in your class.
+  /// These can be used to attach data relevant to your heuristic to each
+  /// node/edge in the PBQP graph.
+
+  template <typename HImpl>
+  class HeuristicBase {
+  private:
+
+    typedef std::list<Graph::NodeItr> OptimalList;
+
+    HeuristicSolverImpl<HImpl> &s;
+    Graph &g;
+    OptimalList optimalList;
+
+    // Return a reference to the derived heuristic.
+    HImpl& impl() { return static_cast<HImpl&>(*this); }
+
+    // Add the given node to the optimal reductions list. Keep an iterator to
+    // its location for fast removal. 
+    void addToOptimalReductionList(Graph::NodeItr nItr) {
+      optimalList.insert(optimalList.end(), nItr);
+    }
+
+  public:
+
+    /// \brief Construct an instance with a reference to the given solver.
+    /// @param solver The solver which is using this heuristic instance.
+    HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
+      : s(solver), g(s.getGraph()) { }
+
+    /// \brief Get the solver which is using this heuristic instance.
+    /// @return The solver which is using this heuristic instance.
+    ///
+    /// You can use this method to get access to the solver in your derived
+    /// heuristic implementation.
+    HeuristicSolverImpl<HImpl>& getSolver() { return s; }
+
+    /// \brief Get the graph representing the problem to be solved.
+    /// @return The graph representing the problem to be solved.
+    Graph& getGraph() { return g; }
+
+    /// \brief Tell the solver to simplify the graph before the reduction phase.
+    /// @return Whether or not the solver should run a simplification phase
+    ///         prior to the main setup and reduction.
+    ///
+    /// HeuristicBase returns true from this method as it's a sensible default,
+    /// however you can over-ride it in your derived class if you want different
+    /// behaviour.
+    bool solverRunSimplify() const { return true; }
+
+    /// \brief Decide whether a node should be optimally or heuristically 
+    ///        reduced.
+    /// @return Whether or not the given node should be listed for optimal
+    ///         reduction (via R0, R1 or R2).
+    ///
+    /// HeuristicBase returns true for any node with degree less than 3. This is
+    /// sane and sensible for many situations, but not all. You can over-ride
+    /// this method in your derived class if you want a different selection
+    /// criteria. Note however that your criteria for selecting optimal nodes
+    /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
+    /// higher should not be selected under any circumstances.
+    bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+      if (g.getNodeDegree(nItr) < 3)
+        return true;
+      // else
+      return false;
+    }
+
+    /// \brief Add the given node to the list of nodes to be optimally reduced.
+    /// @return nItr Node iterator to be added.
+    ///
+    /// You probably don't want to over-ride this, except perhaps to record
+    /// statistics before calling this implementation. HeuristicBase relies on
+    /// its behaviour.
+    void addToOptimalReduceList(Graph::NodeItr nItr) {
+      optimalList.push_back(nItr);
+    }
+
+    /// \brief Initialise the heuristic.
+    ///
+    /// HeuristicBase iterates over all nodes in the problem and adds them to
+    /// the appropriate list using addToOptimalReduceList or
+    /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
+    ///
+    /// This behaviour should be fine for most situations.
+    void setup() {
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+        if (impl().shouldOptimallyReduce(nItr)) {
+          addToOptimalReduceList(nItr);
+        } else {
+          impl().addToHeuristicReduceList(nItr);
+        }
+      }
+    }
+
+    /// \brief Optimally reduce one of the nodes in the optimal reduce list.
+    /// @return True if a reduction takes place, false if the optimal reduce
+    ///         list is empty.
+    ///
+    /// Selects a node from the optimal reduce list and removes it, applying
+    /// R0, R1 or R2 as appropriate based on the selected node's degree.
+    bool optimalReduce() {
+      if (optimalList.empty())
+        return false;
+
+      Graph::NodeItr nItr = optimalList.front();
+      optimalList.pop_front();
+
+      switch (s.getSolverDegree(nItr)) {
+        case 0: s.applyR0(nItr); break;
+        case 1: s.applyR1(nItr); break;
+        case 2: s.applyR2(nItr); break;
+        default: assert(false &&
+                        "Optimal reductions of degree > 2 nodes is invalid.");
+      }
+
+      return true;
+    }
+
+    /// \brief Perform the PBQP reduction process.
+    ///
+    /// Reduces the problem to the empty graph by repeated application of the
+    /// reduction rules R0, R1, R2 and RN.
+    /// R0, R1 or R2 are always applied if possible before RN is used.
+    void reduce() {
+      bool finished = false;
+
+      while (!finished) {
+        if (!optimalReduce()) {
+          if (impl().heuristicReduce()) {
+            getSolver().recordRN();
+          } else {
+            finished = true;
+          }
+        }
+      }
+    }
+
+    /// \brief Add a node to the heuristic reduce list.
+    /// @param nItr Node iterator to add to the heuristic reduce list.
+    void addToHeuristicList(Graph::NodeItr nItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Heuristically reduce one of the nodes in the heuristic
+    ///        reduce list.
+    /// @return True if a reduction takes place, false if the heuristic reduce
+    ///         list is empty.
+    void heuristicReduce() {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Prepare a change in the costs on the given edge.
+    /// @param eItr Edge iterator.    
+    void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle the change in the costs on the given edge.
+    /// @param eItr Edge iterator.
+    void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle the addition of a new edge into the PBQP graph.
+    /// @param eItr Edge iterator for the added edge.
+    void handleAddEdge(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle disconnection of an edge from a node.
+    /// @param eItr Edge iterator for edge being disconnected.
+    /// @param nItr Node iterator for the node being disconnected from.
+    ///
+    /// Edges are frequently removed due to the removal of a node. This
+    /// method allows for the effect to be computed only for the remaining
+    /// node in the graph.
+    void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Clean up any structures used by HeuristicBase.
+    ///
+    /// At present this just performs a sanity check: that the optimal reduce
+    /// list is empty now that reduction has completed.
+    ///
+    /// If your derived class has more complex structures which need tearing
+    /// down you should over-ride this method but include a call back to this
+    /// implementation.
+    void cleanup() {
+      assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
+    }
+
+  };
+
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/include/llvm/CodeGen/PBQP/HeuristicSolver.h b/include/llvm/CodeGen/PBQP/HeuristicSolver.h
new file mode 100644
index 000000000000..35514f967478
--- /dev/null
+++ b/include/llvm/CodeGen/PBQP/HeuristicSolver.h
@@ -0,0 +1,616 @@
+//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Heuristic PBQP solver. This solver is able to perform optimal reductions for
+// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
+// used to select a node for reduction. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+
+#include "Graph.h"
+#include "Solution.h"
+#include <vector>
+#include <limits>
+
+namespace PBQP {
+
+  /// \brief Heuristic PBQP solver implementation.
+  ///
+  /// This class should usually be created (and destroyed) indirectly via a call
+  /// to HeuristicSolver<HImpl>::solve(Graph&).
+  /// See the comments for HeuristicSolver.
+  ///
+  /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
+  /// backpropagation phase, and maintains the internal copy of the graph on
+  /// which the reduction is carried out (the original being kept to facilitate
+  /// backpropagation).
+  template <typename HImpl>
+  class HeuristicSolverImpl {
+  private:
+
+    typedef typename HImpl::NodeData HeuristicNodeData;
+    typedef typename HImpl::EdgeData HeuristicEdgeData;
+
+    typedef std::list<Graph::EdgeItr> SolverEdges;
+
+  public:
+  
+    /// \brief Iterator type for edges in the solver graph.
+    typedef SolverEdges::iterator SolverEdgeItr;
+
+  private:
+
+    class NodeData {
+    public:
+      NodeData() : solverDegree(0) {}
+
+      HeuristicNodeData& getHeuristicData() { return hData; }
+
+      SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
+        ++solverDegree;
+        return solverEdges.insert(solverEdges.end(), eItr);
+      }
+
+      void removeSolverEdge(SolverEdgeItr seItr) {
+        --solverDegree;
+        solverEdges.erase(seItr);
+      }
+
+      SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
+      SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
+      unsigned getSolverDegree() const { return solverDegree; }
+      void clearSolverEdges() {
+        solverDegree = 0;
+        solverEdges.clear(); 
+      }
+      
+    private:
+      HeuristicNodeData hData;
+      unsigned solverDegree;
+      SolverEdges solverEdges;
+    };
+ 
+    class EdgeData {
+    public:
+      HeuristicEdgeData& getHeuristicData() { return hData; }
+
+      void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
+        this->n1SolverEdgeItr = n1SolverEdgeItr;
+      }
+
+      SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
+
+      void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
+        this->n2SolverEdgeItr = n2SolverEdgeItr;
+      }
+
+      SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
+
+    private:
+
+      HeuristicEdgeData hData;
+      SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
+    };
+
+    Graph &g;
+    HImpl h;
+    Solution s;
+    std::vector<Graph::NodeItr> stack;
+
+    typedef std::list<NodeData> NodeDataList;
+    NodeDataList nodeDataList;
+
+    typedef std::list<EdgeData> EdgeDataList;
+    EdgeDataList edgeDataList;
+
+  public:
+
+    /// \brief Construct a heuristic solver implementation to solve the given
+    ///        graph.
+    /// @param g The graph representing the problem instance to be solved.
+    HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}  
+
+    /// \brief Get the graph being solved by this solver.
+    /// @return The graph representing the problem instance being solved by this
+    ///         solver.
+    Graph& getGraph() { return g; }
+
+    /// \brief Get the heuristic data attached to the given node.
+    /// @param nItr Node iterator.
+    /// @return The heuristic data attached to the given node.
+    HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).getHeuristicData();
+    }
+
+    /// \brief Get the heuristic data attached to the given edge.
+    /// @param eItr Edge iterator.
+    /// @return The heuristic data attached to the given node.
+    HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+      return getSolverEdgeData(eItr).getHeuristicData();
+    }
+
+    /// \brief Begin iterator for the set of edges adjacent to the given node in
+    ///        the solver graph.
+    /// @param nItr Node iterator.
+    /// @return Begin iterator for the set of edges adjacent to the given node
+    ///         in the solver graph. 
+    SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).solverEdgesBegin();
+    }
+
+    /// \brief End iterator for the set of edges adjacent to the given node in
+    ///        the solver graph.
+    /// @param nItr Node iterator.
+    /// @return End iterator for the set of edges adjacent to the given node in
+    ///         the solver graph. 
+    SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).solverEdgesEnd();
+    }
+
+    /// \brief Remove a node from the solver graph.
+    /// @param eItr Edge iterator for edge to be removed.
+    ///
+    /// Does <i>not</i> notify the heuristic of the removal. That should be
+    /// done manually if necessary.
+    void removeSolverEdge(Graph::EdgeItr eItr) {
+      EdgeData &eData = getSolverEdgeData(eItr);
+      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+      n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
+      n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
+    }
+
+    /// \brief Compute a solution to the PBQP problem instance with which this
+    ///        heuristic solver was constructed.
+    /// @return A solution to the PBQP problem.
+    ///
+    /// Performs the full PBQP heuristic solver algorithm, including setup,
+    /// calls to the heuristic (which will call back to the reduction rules in
+    /// this class), and cleanup.
+    Solution computeSolution() {
+      setup();
+      h.setup();
+      h.reduce();
+      backpropagate();
+      h.cleanup();
+      cleanup();
+      return s;
+    }
+
+    /// \brief Add to the end of the stack.
+    /// @param nItr Node iterator to add to the reduction stack.
+    void pushToStack(Graph::NodeItr nItr) {
+      getSolverNodeData(nItr).clearSolverEdges();
+      stack.push_back(nItr);
+    }
+
+    /// \brief Returns the solver degree of the given node.
+    /// @param nItr Node iterator for which degree is requested.
+    /// @return Node degree in the <i>solver</i> graph (not the original graph).
+    unsigned getSolverDegree(Graph::NodeItr nItr) {
+      return  getSolverNodeData(nItr).getSolverDegree();
+    }
+
+    /// \brief Set the solution of the given node.
+    /// @param nItr Node iterator to set solution for.
+    /// @param selection Selection for node.
+    void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
+      s.setSelection(nItr, selection);
+
+      for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+                             aeEnd = g.adjEdgesEnd(nItr);
+           aeItr != aeEnd; ++aeItr) {
+        Graph::EdgeItr eItr(*aeItr);
+        Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
+        getSolverNodeData(anItr).addSolverEdge(eItr);
+      }
+    }
+
+    /// \brief Apply rule R0.
+    /// @param nItr Node iterator for node to apply R0 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR0(Graph::NodeItr nItr) {
+      assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
+             "R0 applied to node with degree != 0.");
+
+      // Nothing to do. Just push the node onto the reduction stack.
+      pushToStack(nItr);
+
+      s.recordR0();
+    }
+
+    /// \brief Apply rule R1.
+    /// @param xnItr Node iterator for node to apply R1 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR1(Graph::NodeItr xnItr) {
+      NodeData &nd = getSolverNodeData(xnItr);
+      assert(nd.getSolverDegree() == 1 &&
+             "R1 applied to node with degree != 1.");
+
+      Graph::EdgeItr eItr = *nd.solverEdgesBegin();
+
+      const Matrix &eCosts = g.getEdgeCosts(eItr);
+      const Vector &xCosts = g.getNodeCosts(xnItr);
+      
+      // Duplicate a little to avoid transposing matrices.
+      if (xnItr == g.getEdgeNode1(eItr)) {
+        Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
+        Vector &yCosts = g.getNodeCosts(ynItr);
+        for (unsigned j = 0; j < yCosts.getLength(); ++j) {
+          PBQPNum min = eCosts[0][j] + xCosts[0];
+          for (unsigned i = 1; i < xCosts.getLength(); ++i) {
+            PBQPNum c = eCosts[i][j] + xCosts[i];
+            if (c < min)
+              min = c;
+          }
+          yCosts[j] += min;
+        }
+        h.handleRemoveEdge(eItr, ynItr);
+     } else {
+        Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
+        Vector &yCosts = g.getNodeCosts(ynItr);
+        for (unsigned i = 0; i < yCosts.getLength(); ++i) {
+          PBQPNum min = eCosts[i][0] + xCosts[0];
+          for (unsigned j = 1; j < xCosts.getLength(); ++j) {
+            PBQPNum c = eCosts[i][j] + xCosts[j];
+            if (c < min)
+              min = c;
+          }
+          yCosts[i] += min;
+        }
+        h.handleRemoveEdge(eItr, ynItr);
+      }
+      removeSolverEdge(eItr);
+      assert(nd.getSolverDegree() == 0 &&
+             "Degree 1 with edge removed should be 0.");
+      pushToStack(xnItr);
+      s.recordR1();
+    }
+
+    /// \brief Apply rule R2.
+    /// @param xnItr Node iterator for node to apply R2 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR2(Graph::NodeItr xnItr) {
+      assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
+             "R2 applied to node with degree != 2.");
+
+      NodeData &nd = getSolverNodeData(xnItr);
+      const Vector &xCosts = g.getNodeCosts(xnItr);
+
+      SolverEdgeItr aeItr = nd.solverEdgesBegin();
+      Graph::EdgeItr yxeItr = *aeItr,
+                     zxeItr = *(++aeItr);
+
+      Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
+                     znItr = g.getEdgeOtherNode(zxeItr, xnItr);
+
+      bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
+           flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
+
+      const Matrix *yxeCosts = flipEdge1 ?
+        new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
+        &g.getEdgeCosts(yxeItr);
+
+      const Matrix *zxeCosts = flipEdge2 ?
+        new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
+        &g.getEdgeCosts(zxeItr);
+
+      unsigned xLen = xCosts.getLength(),
+               yLen = yxeCosts->getRows(),
+               zLen = zxeCosts->getRows();
+               
+      Matrix delta(yLen, zLen);
+
+      for (unsigned i = 0; i < yLen; ++i) {
+        for (unsigned j = 0; j < zLen; ++j) {
+          PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
+          for (unsigned k = 1; k < xLen; ++k) {
+            PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
+            if (c < min) {
+              min = c;
+            }
+          }
+          delta[i][j] = min;
+        }
+      }
+
+      if (flipEdge1)
+        delete yxeCosts;
+
+      if (flipEdge2)
+        delete zxeCosts;
+
+      Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
+      bool addedEdge = false;
+
+      if (yzeItr == g.edgesEnd()) {
+        yzeItr = g.addEdge(ynItr, znItr, delta);
+        addedEdge = true;
+      } else {
+        Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
+        h.preUpdateEdgeCosts(yzeItr);
+        if (ynItr == g.getEdgeNode1(yzeItr)) {
+          yzeCosts += delta;
+        } else {
+          yzeCosts += delta.transpose();
+        }
+      }
+
+      bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
+
+      if (!addedEdge) {
+        // If we modified the edge costs let the heuristic know.
+        h.postUpdateEdgeCosts(yzeItr);
+      }
+ 
+      if (nullCostEdge) {
+        // If this edge ended up null remove it.
+        if (!addedEdge) {
+          // We didn't just add it, so we need to notify the heuristic
+          // and remove it from the solver.
+          h.handleRemoveEdge(yzeItr, ynItr);
+          h.handleRemoveEdge(yzeItr, znItr);
+          removeSolverEdge(yzeItr);
+        }
+        g.removeEdge(yzeItr);
+      } else if (addedEdge) {
+        // If the edge was added, and non-null, finish setting it up, add it to
+        // the solver & notify heuristic.
+        edgeDataList.push_back(EdgeData());
+        g.setEdgeData(yzeItr, &edgeDataList.back());
+        addSolverEdge(yzeItr);
+        h.handleAddEdge(yzeItr);
+      }
+
+      h.handleRemoveEdge(yxeItr, ynItr);
+      removeSolverEdge(yxeItr);
+      h.handleRemoveEdge(zxeItr, znItr);
+      removeSolverEdge(zxeItr);
+
+      pushToStack(xnItr);
+      s.recordR2();
+    }
+
+    /// \brief Record an application of the RN rule.
+    ///
+    /// For use by the HeuristicBase.
+    void recordRN() { s.recordRN(); } 
+
+  private:
+
+    NodeData& getSolverNodeData(Graph::NodeItr nItr) {
+      return *static_cast<NodeData*>(g.getNodeData(nItr));
+    }
+
+    EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
+      return *static_cast<EdgeData*>(g.getEdgeData(eItr));
+    }
+
+    void addSolverEdge(Graph::EdgeItr eItr) {
+      EdgeData &eData = getSolverEdgeData(eItr);
+      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+      eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
+      eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
+    }
+
+    void setup() {
+      if (h.solverRunSimplify()) {
+        simplify();
+      }
+
+      // Create node data objects.
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+        nodeDataList.push_back(NodeData());
+        g.setNodeData(nItr, &nodeDataList.back());
+      }
+
+      // Create edge data objects.
+      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+           eItr != eEnd; ++eItr) {
+        edgeDataList.push_back(EdgeData());
+        g.setEdgeData(eItr, &edgeDataList.back());
+        addSolverEdge(eItr);
+      }
+    }
+
+    void simplify() {
+      disconnectTrivialNodes();
+      eliminateIndependentEdges();
+    }
+
+    // Eliminate trivial nodes.
+    void disconnectTrivialNodes() {
+      unsigned numDisconnected = 0;
+
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+
+        if (g.getNodeCosts(nItr).getLength() == 1) {
+
+          std::vector<Graph::EdgeItr> edgesToRemove;
+
+          for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+                                 aeEnd = g.adjEdgesEnd(nItr);
+               aeItr != aeEnd; ++aeItr) {
+
+            Graph::EdgeItr eItr = *aeItr;
+
+            if (g.getEdgeNode1(eItr) == nItr) {
+              Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
+              g.getNodeCosts(otherNodeItr) +=
+                g.getEdgeCosts(eItr).getRowAsVector(0);
+            }
+            else {
+              Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
+              g.getNodeCosts(otherNodeItr) +=
+                g.getEdgeCosts(eItr).getColAsVector(0);
+            }
+
+            edgesToRemove.push_back(eItr);
+          }
+
+          if (!edgesToRemove.empty())
+            ++numDisconnected;
+
+          while (!edgesToRemove.empty()) {
+            g.removeEdge(edgesToRemove.back());
+            edgesToRemove.pop_back();
+          }
+        }
+      }
+    }
+
+    void eliminateIndependentEdges() {
+      std::vector<Graph::EdgeItr> edgesToProcess;
+      unsigned numEliminated = 0;
+
+      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+           eItr != eEnd; ++eItr) {
+        edgesToProcess.push_back(eItr);
+      }
+
+      while (!edgesToProcess.empty()) {
+        if (tryToEliminateEdge(edgesToProcess.back()))
+          ++numEliminated;
+        edgesToProcess.pop_back();
+      }
+    }
+
+    bool tryToEliminateEdge(Graph::EdgeItr eItr) {
+      if (tryNormaliseEdgeMatrix(eItr)) {
+        g.removeEdge(eItr);
+        return true; 
+      }
+      return false;
+    }
+
+    bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
+
+      const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
+
+      Matrix &edgeCosts = g.getEdgeCosts(eItr);
+      Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
+             &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
+
+      for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+        PBQPNum rowMin = infinity;
+
+        for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+          if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
+            rowMin = edgeCosts[r][c];
+        }
+
+        uCosts[r] += rowMin;
+
+        if (rowMin != infinity) {
+          edgeCosts.subFromRow(r, rowMin);
+        }
+        else {
+          edgeCosts.setRow(r, 0);
+        }
+      }
+
+      for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+        PBQPNum colMin = infinity;
+
+        for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+          if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
+            colMin = edgeCosts[r][c];
+        }
+
+        vCosts[c] += colMin;
+
+        if (colMin != infinity) {
+          edgeCosts.subFromCol(c, colMin);
+        }
+        else {
+          edgeCosts.setCol(c, 0);
+        }
+      }
+
+      return edgeCosts.isZero();
+    }
+
+    void backpropagate() {
+      while (!stack.empty()) {
+        computeSolution(stack.back());
+        stack.pop_back();
+      }
+    }
+
+    void computeSolution(Graph::NodeItr nItr) {
+
+      NodeData &nodeData = getSolverNodeData(nItr);
+
+      Vector v(g.getNodeCosts(nItr));
+
+      // Solve based on existing solved edges.
+      for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
+                         solvedEdgeEnd = nodeData.solverEdgesEnd();
+           solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
+
+        Graph::EdgeItr eItr(*solvedEdgeItr);
+        Matrix &edgeCosts = g.getEdgeCosts(eItr);
+
+        if (nItr == g.getEdgeNode1(eItr)) {
+          Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
+          unsigned adjSolution = s.getSelection(adjNode);
+          v += edgeCosts.getColAsVector(adjSolution);
+        }
+        else {
+          Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
+          unsigned adjSolution = s.getSelection(adjNode);
+          v += edgeCosts.getRowAsVector(adjSolution);
+        }
+
+      }
+
+      setSolution(nItr, v.minIndex());
+    }
+
+    void cleanup() {
+      h.cleanup();
+      nodeDataList.clear();
+      edgeDataList.clear();
+    }
+  };
+
+  /// \brief PBQP heuristic solver class.
+  ///
+  /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
+  /// by calling
+  /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
+  ///
+  /// The choice of heuristic for the H parameter will affect both the solver
+  /// speed and solution quality. The heuristic should be chosen based on the
+  /// nature of the problem being solved.
+  /// Currently the only solver included with LLVM is the Briggs heuristic for
+  /// register allocation.
+  template <typename HImpl>
+  class HeuristicSolver {
+  public:
+    static Solution solve(Graph &g) {
+      HeuristicSolverImpl<HImpl> hs(g);
+      return hs.computeSolution();
+    }
+  };
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
new file mode 100644
index 000000000000..47a287ccf2f6
--- /dev/null
+++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
@@ -0,0 +1,464 @@
+//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the Briggs test for "allocability" of nodes in a
+// PBQP graph representing a register allocation problem. Nodes which can be
+// proven allocable (by a safe and relatively accurate test) are removed from
+// the PBQP graph first. If no provably allocable node is present in the graph
+// then the node with the minimal spill-cost to degree ratio is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+
+#include "../HeuristicSolver.h"
+#include "../HeuristicBase.h"
+
+#include <set>
+#include <limits>
+
+namespace PBQP {
+  namespace Heuristics {
+
+    /// \brief PBQP Heuristic which applies an allocability test based on
+    ///        Briggs.
+    /// 
+    /// This heuristic assumes that the elements of cost vectors in the PBQP
+    /// problem represent storage options, with the first being the spill
+    /// option and subsequent elements representing legal registers for the
+    /// corresponding node. Edge cost matrices are likewise assumed to represent
+    /// register constraints.
+    /// If one or more nodes can be proven allocable by this heuristic (by
+    /// inspection of their constraint matrices) then the allocable node of
+    /// highest degree is selected for the next reduction and pushed to the
+    /// solver stack. If no nodes can be proven allocable then the node with
+    /// the lowest estimated spill cost is selected and push to the solver stack
+    /// instead.
+    /// 
+    /// This implementation is built on top of HeuristicBase.       
+    class Briggs : public HeuristicBase<Briggs> {
+    private:
+
+      class LinkDegreeComparator {
+      public:
+        LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
+        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+          if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
+            return true;
+          return false;
+        }
+      private:
+        HeuristicSolverImpl<Briggs> *s;
+      };
+
+      class SpillCostComparator {
+      public:
+        SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
+          : s(&s), g(&s.getGraph()) {}
+        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+          const PBQP::Vector &cv1 = g->getNodeCosts(n1Itr);
+          const PBQP::Vector &cv2 = g->getNodeCosts(n2Itr);
+
+          PBQPNum cost1 = cv1[0] / s->getSolverDegree(n1Itr);
+          PBQPNum cost2 = cv2[0] / s->getSolverDegree(n2Itr);
+
+          if (cost1 < cost2)
+            return true;
+          return false;
+        }
+
+      private:
+        HeuristicSolverImpl<Briggs> *s;
+        Graph *g;
+      };
+
+      typedef std::list<Graph::NodeItr> RNAllocableList;
+      typedef RNAllocableList::iterator RNAllocableListItr;
+
+      typedef std::list<Graph::NodeItr> RNUnallocableList;  
+      typedef RNUnallocableList::iterator RNUnallocableListItr;
+
+    public:
+
+      struct NodeData {
+        typedef std::vector<unsigned> UnsafeDegreesArray;
+        bool isHeuristic, isAllocable, isInitialized;
+        unsigned numDenied, numSafe;
+        UnsafeDegreesArray unsafeDegrees;
+        RNAllocableListItr rnaItr;
+        RNUnallocableListItr rnuItr;
+
+        NodeData()
+          : isHeuristic(false), isAllocable(false), isInitialized(false),
+            numDenied(0), numSafe(0) { }
+      };
+
+      struct EdgeData {
+        typedef std::vector<unsigned> UnsafeArray;
+        unsigned worst, reverseWorst;
+        UnsafeArray unsafe, reverseUnsafe;
+        bool isUpToDate;
+
+        EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
+      };
+
+      /// \brief Construct an instance of the Briggs heuristic.
+      /// @param solver A reference to the solver which is using this heuristic.
+      Briggs(HeuristicSolverImpl<Briggs> &solver) :
+        HeuristicBase<Briggs>(solver) {}
+
+      /// \brief Determine whether a node should be reduced using optimal
+      ///        reduction.
+      /// @param nItr Node iterator to be considered.
+      /// @return True if the given node should be optimally reduced, false
+      ///         otherwise.
+      ///
+      /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
+      /// exception. Nodes whose spill cost (element 0 of their cost vector) is
+      /// infinite are checked for allocability first. Allocable nodes may be
+      /// optimally reduced, but nodes whose allocability cannot be proven are
+      /// selected for heuristic reduction instead.
+      bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+        if (getSolver().getSolverDegree(nItr) < 3) {
+          return true;
+        }
+        // else
+        return false;
+      }
+
+      /// \brief Add a node to the heuristic reduce list.
+      /// @param nItr Node iterator to add to the heuristic reduce list.
+      void addToHeuristicReduceList(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+        initializeNode(nItr);
+        nd.isHeuristic = true;
+        if (nd.isAllocable) {
+          nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+        } else {
+          nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
+        }
+      }
+
+      /// \brief Heuristically reduce one of the nodes in the heuristic
+      ///        reduce list.
+      /// @return True if a reduction takes place, false if the heuristic reduce
+      ///         list is empty.
+      ///
+      /// If the list of allocable nodes is non-empty a node is selected
+      /// from it and pushed to the stack. Otherwise if the non-allocable list
+      /// is non-empty a node is selected from it and pushed to the stack.
+      /// If both lists are empty the method simply returns false with no action
+      /// taken.
+      bool heuristicReduce() {
+        if (!rnAllocableList.empty()) {
+          RNAllocableListItr rnaItr =
+            min_element(rnAllocableList.begin(), rnAllocableList.end(),
+                        LinkDegreeComparator(getSolver()));
+          Graph::NodeItr nItr = *rnaItr;
+          rnAllocableList.erase(rnaItr);
+          handleRemoveNode(nItr);
+          getSolver().pushToStack(nItr);
+          return true;
+        } else if (!rnUnallocableList.empty()) {
+          RNUnallocableListItr rnuItr =
+            min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
+                        SpillCostComparator(getSolver()));
+          Graph::NodeItr nItr = *rnuItr;
+          rnUnallocableList.erase(rnuItr);
+          handleRemoveNode(nItr);
+          getSolver().pushToStack(nItr);
+          return true;
+        }
+        // else
+        return false;
+      }
+
+      /// \brief Prepare a change in the costs on the given edge.
+      /// @param eItr Edge iterator.    
+      void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+        Graph &g = getGraph();
+        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+                       n2Itr = g.getEdgeNode2(eItr);
+        NodeData &n1 = getHeuristicNodeData(n1Itr),
+                 &n2 = getHeuristicNodeData(n2Itr);
+
+        if (n1.isHeuristic)
+          subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
+        if (n2.isHeuristic)
+          subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
+
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+        ed.isUpToDate = false;
+      }
+
+      /// \brief Handle the change in the costs on the given edge.
+      /// @param eItr Edge iterator.
+      void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
+        // This is effectively the same as adding a new edge now, since
+        // we've factored out the costs of the old one.
+        handleAddEdge(eItr);
+      }
+
+      /// \brief Handle the addition of a new edge into the PBQP graph.
+      /// @param eItr Edge iterator for the added edge.
+      ///
+      /// Updates allocability of any nodes connected by this edge which are
+      /// being managed by the heuristic. If allocability changes they are
+      /// moved to the appropriate list.
+      void handleAddEdge(Graph::EdgeItr eItr) {
+        Graph &g = getGraph();
+        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+                       n2Itr = g.getEdgeNode2(eItr);
+        NodeData &n1 = getHeuristicNodeData(n1Itr),
+                 &n2 = getHeuristicNodeData(n2Itr);
+
+        // If neither node is managed by the heuristic there's nothing to be
+        // done.
+        if (!n1.isHeuristic && !n2.isHeuristic)
+          return;
+
+        // Ok - we need to update at least one node.
+        computeEdgeContributions(eItr);
+
+        // Update node 1 if it's managed by the heuristic.
+        if (n1.isHeuristic) {
+          bool n1WasAllocable = n1.isAllocable;
+          addEdgeContributions(eItr, n1Itr);
+          updateAllocability(n1Itr);
+          if (n1WasAllocable && !n1.isAllocable) {
+            rnAllocableList.erase(n1.rnaItr);
+            n1.rnuItr =
+              rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
+          }
+        }
+
+        // Likewise for node 2.
+        if (n2.isHeuristic) {
+          bool n2WasAllocable = n2.isAllocable;
+          addEdgeContributions(eItr, n2Itr);
+          updateAllocability(n2Itr);
+          if (n2WasAllocable && !n2.isAllocable) {
+            rnAllocableList.erase(n2.rnaItr);
+            n2.rnuItr =
+              rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
+          }
+        }
+      }
+
+      /// \brief Handle disconnection of an edge from a node.
+      /// @param eItr Edge iterator for edge being disconnected.
+      /// @param nItr Node iterator for the node being disconnected from.
+      ///
+      /// Updates allocability of the given node and, if appropriate, moves the
+      /// node to a new list.
+      void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+
+        // If the node is not managed by the heuristic there's nothing to be
+        // done.
+        if (!nd.isHeuristic)
+          return;
+
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+        (void)ed;
+        assert(ed.isUpToDate && "Edge data is not up to date.");
+
+        // Update node.
+        bool ndWasAllocable = nd.isAllocable;
+        subtractEdgeContributions(eItr, nItr);
+        updateAllocability(nItr);
+
+        // If the node has gone optimal...
+        if (shouldOptimallyReduce(nItr)) {
+          nd.isHeuristic = false;
+          addToOptimalReduceList(nItr);
+          if (ndWasAllocable) {
+            rnAllocableList.erase(nd.rnaItr);
+          } else {
+            rnUnallocableList.erase(nd.rnuItr);
+          }
+        } else {
+          // Node didn't go optimal, but we might have to move it
+          // from "unallocable" to "allocable".
+          if (!ndWasAllocable && nd.isAllocable) {
+            rnUnallocableList.erase(nd.rnuItr);
+            nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+          }
+        }
+      }
+
+    private:
+
+      NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+        return getSolver().getHeuristicNodeData(nItr);
+      }
+
+      EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+        return getSolver().getHeuristicEdgeData(eItr);
+      }
+
+      // Work out what this edge will contribute to the allocability of the
+      // nodes connected to it.
+      void computeEdgeContributions(Graph::EdgeItr eItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        if (ed.isUpToDate)
+          return; // Edge data is already up to date.
+
+        Matrix &eCosts = getGraph().getEdgeCosts(eItr);
+
+        unsigned numRegs = eCosts.getRows() - 1,
+                 numReverseRegs = eCosts.getCols() - 1;
+
+        std::vector<unsigned> rowInfCounts(numRegs, 0),
+                              colInfCounts(numReverseRegs, 0);        
+
+        ed.worst = 0;
+        ed.reverseWorst = 0;
+        ed.unsafe.clear();
+        ed.unsafe.resize(numRegs, 0);
+        ed.reverseUnsafe.clear();
+        ed.reverseUnsafe.resize(numReverseRegs, 0);
+
+        for (unsigned i = 0; i < numRegs; ++i) {
+          for (unsigned j = 0; j < numReverseRegs; ++j) {
+            if (eCosts[i + 1][j + 1] ==
+                  std::numeric_limits<PBQPNum>::infinity()) {
+              ed.unsafe[i] = 1;
+              ed.reverseUnsafe[j] = 1;
+              ++rowInfCounts[i];
+              ++colInfCounts[j];
+
+              if (colInfCounts[j] > ed.worst) {
+                ed.worst = colInfCounts[j];
+              }
+
+              if (rowInfCounts[i] > ed.reverseWorst) {
+                ed.reverseWorst = rowInfCounts[i];
+              }
+            }
+          }
+        }
+
+        ed.isUpToDate = true;
+      }
+
+      // Add the contributions of the given edge to the given node's 
+      // numDenied and safe members. No action is taken other than to update
+      // these member values. Once updated these numbers can be used by clients
+      // to update the node's allocability.
+      void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        
+        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+        EdgeData::UnsafeArray &unsafe =
+          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+        nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
+
+        for (unsigned r = 0; r < numRegs; ++r) {
+          if (unsafe[r]) {
+            if (nd.unsafeDegrees[r]==0) {
+              --nd.numSafe;
+            }
+            ++nd.unsafeDegrees[r];
+          }
+        }
+      }
+
+      // Subtract the contributions of the given edge to the given node's 
+      // numDenied and safe members. No action is taken other than to update
+      // these member values. Once updated these numbers can be used by clients
+      // to update the node's allocability.
+      void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        
+        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+        EdgeData::UnsafeArray &unsafe =
+          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+        nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
+
+        for (unsigned r = 0; r < numRegs; ++r) {
+          if (unsafe[r]) { 
+            if (nd.unsafeDegrees[r] == 1) {
+              ++nd.numSafe;
+            }
+            --nd.unsafeDegrees[r];
+          }
+        }
+      }
+
+      void updateAllocability(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
+      }
+
+      void initializeNode(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+
+        if (nd.isInitialized)
+          return; // Node data is already up to date.
+
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+
+        nd.numDenied = 0;
+        nd.numSafe = numRegs;
+        nd.unsafeDegrees.resize(numRegs, 0);
+
+        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+
+        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
+                           aeEnd = getSolver().solverEdgesEnd(nItr);
+             aeItr != aeEnd; ++aeItr) {
+          
+          Graph::EdgeItr eItr = *aeItr;
+          computeEdgeContributions(eItr);
+          addEdgeContributions(eItr, nItr);
+        }
+
+        updateAllocability(nItr);
+        nd.isInitialized = true;
+      }
+
+      void handleRemoveNode(Graph::NodeItr xnItr) {
+        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+        std::vector<Graph::EdgeItr> edgesToRemove;
+        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
+                           aeEnd = getSolver().solverEdgesEnd(xnItr);
+             aeItr != aeEnd; ++aeItr) {
+          Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
+          handleRemoveEdge(*aeItr, ynItr);
+          edgesToRemove.push_back(*aeItr);
+        }
+        while (!edgesToRemove.empty()) {
+          getSolver().removeSolverEdge(edgesToRemove.back());
+          edgesToRemove.pop_back();
+        }
+      }
+
+      RNAllocableList rnAllocableList;
+      RNUnallocableList rnUnallocableList;
+    };
+
+  }
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
new file mode 100644
index 000000000000..e7598bf3e3f1
--- /dev/null
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -0,0 +1,288 @@
+//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_MATH_H 
+#define LLVM_CODEGEN_PBQP_MATH_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace PBQP {
+
+typedef float PBQPNum;
+
+/// \brief PBQP Vector class.
+class Vector {
+  public:
+
+    /// \brief Construct a PBQP vector of the given size.
+    explicit Vector(unsigned length) :
+      length(length), data(new PBQPNum[length]) {
+      }
+
+    /// \brief Construct a PBQP vector with initializer.
+    Vector(unsigned length, PBQPNum initVal) :
+      length(length), data(new PBQPNum[length]) {
+        std::fill(data, data + length, initVal);
+      }
+
+    /// \brief Copy construct a PBQP vector.
+    Vector(const Vector &v) :
+      length(v.length), data(new PBQPNum[length]) {
+        std::copy(v.data, v.data + length, data);
+      }
+
+    /// \brief Destroy this vector, return its memory.
+    ~Vector() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Vector& operator=(const Vector &v) {
+      delete[] data;
+      length = v.length;
+      data = new PBQPNum[length];
+      std::copy(v.data, v.data + length, data);
+      return *this;
+    }
+
+    /// \brief Return the length of the vector
+    unsigned getLength() const {
+      return length;
+    }
+
+    /// \brief Element access.
+    PBQPNum& operator[](unsigned index) {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Const element access.
+    const PBQPNum& operator[](unsigned index) const {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Add another vector to this one.
+    Vector& operator+=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Subtract another vector from this one.
+    Vector& operator-=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Returns the index of the minimum value in this vector
+    unsigned minIndex() const {
+      return std::min_element(data, data + length) - data;
+    }
+
+  private:
+    unsigned length;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given vector on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Vector &v) {
+  assert((v.getLength() != 0) && "Zero-length vector badness.");
+
+  os << "[ " << v[0];
+  for (unsigned i = 1; i < v.getLength(); ++i) {
+    os << ", " << v[i];
+  }
+  os << " ]";
+
+  return os;
+} 
+
+
+/// \brief PBQP Matrix class
+class Matrix {
+  public:
+
+    /// \brief Construct a PBQP Matrix with the given dimensions.
+    Matrix(unsigned rows, unsigned cols) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+    }
+
+    /// \brief Construct a PBQP Matrix with the given dimensions and initial
+    /// value.
+    Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+        std::fill(data, data + (rows * cols), initVal);
+    }
+
+    /// \brief Copy construct a PBQP matrix.
+    Matrix(const Matrix &m) :
+      rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+        std::copy(m.data, m.data + (rows * cols), data);  
+    }
+
+    /// \brief Destroy this matrix, return its memory.
+    ~Matrix() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Matrix& operator=(const Matrix &m) {
+      delete[] data;
+      rows = m.rows; cols = m.cols;
+      data = new PBQPNum[rows * cols];
+      std::copy(m.data, m.data + (rows * cols), data);
+      return *this;
+    }
+
+    /// \brief Return the number of rows in this matrix.
+    unsigned getRows() const { return rows; }
+
+    /// \brief Return the number of cols in this matrix.
+    unsigned getCols() const { return cols; }
+
+    /// \brief Matrix element access.
+    PBQPNum* operator[](unsigned r) {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Matrix element access.
+    const PBQPNum* operator[](unsigned r) const {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Returns the given row as a vector.
+    Vector getRowAsVector(unsigned r) const {
+      Vector v(cols);
+      for (unsigned c = 0; c < cols; ++c)
+        v[c] = (*this)[r][c];
+      return v; 
+    }
+
+    /// \brief Returns the given column as a vector.
+    Vector getColAsVector(unsigned c) const {
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][c];
+      return v;
+    }
+
+    /// \brief Reset the matrix to the given value.
+    Matrix& reset(PBQPNum val = 0) {
+      std::fill(data, data + (rows * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single row of this matrix to the given value.
+    Matrix& setRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds.");
+      std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single column of this matrix to the given value.
+    Matrix& setCol(unsigned c, PBQPNum val) {
+      assert(c < cols && "Column out of bounds.");
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] = val;
+      return *this;
+    }
+
+    /// \brief Matrix transpose.
+    Matrix transpose() const {
+      Matrix m(cols, rows);
+      for (unsigned r = 0; r < rows; ++r)
+        for (unsigned c = 0; c < cols; ++c)
+          m[c][r] = (*this)[r][c];
+      return m;
+    }
+
+    /// \brief Returns the diagonal of the matrix as a vector.
+    ///
+    /// Matrix must be square.
+    Vector diagonalize() const {
+      assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][r];
+      return v;
+    } 
+
+    /// \brief Add the given matrix to this one.
+    Matrix& operator+=(const Matrix &m) {
+      assert(rows == m.rows && cols == m.cols &&
+          "Matrix dimensions mismatch.");
+      std::transform(data, data + (rows * cols), m.data, data,
+          std::plus<PBQPNum>());
+      return *this;
+    }
+
+    /// \brief Returns the minimum of the given row
+    PBQPNum getRowMin(unsigned r) const {
+      assert(r < rows && "Row out of bounds");
+      return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+    }
+
+    /// \brief Returns the minimum of the given column
+    PBQPNum getColMin(unsigned c) const {
+      PBQPNum minElem = (*this)[0][c];
+      for (unsigned r = 1; r < rows; ++r)
+        if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+      return minElem;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given row.
+    Matrix& subFromRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds");
+      std::transform(data + (r * cols), data + ((r + 1) * cols),
+          data + (r * cols),
+          std::bind2nd(std::minus<PBQPNum>(), val));
+      return *this;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given column.
+    Matrix& subFromCol(unsigned c, PBQPNum val) {
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] -= val;
+      return *this;
+    }
+
+    /// \brief Returns true if this is a zero matrix.
+    bool isZero() const {
+      return find_if(data, data + (rows * cols),
+          std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+        data + (rows * cols);
+    }
+
+  private:
+    unsigned rows, cols;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given matrix on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Matrix &m) {
+
+  assert((m.getRows() != 0) && "Zero-row matrix badness.");
+
+  for (unsigned i = 0; i < m.getRows(); ++i) {
+    os << m.getRowAsVector(i);
+  }
+
+  return os;
+}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
new file mode 100644
index 000000000000..57d9b95fc3b1
--- /dev/null
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -0,0 +1,94 @@
+//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Solution class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
+#define LLVM_CODEGEN_PBQP_SOLUTION_H
+
+#include "Math.h"
+#include "Graph.h"
+
+#include <map>
+
+namespace PBQP {
+
+  /// \brief Represents a solution to a PBQP problem.
+  ///
+  /// To get the selection for each node in the problem use the getSelection method.
+  class Solution {
+  private:
+
+    typedef std::map<Graph::ConstNodeItr, unsigned,
+                     NodeItrComparator> SelectionsMap;
+    SelectionsMap selections;
+
+    unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
+
+  public:
+
+    /// \brief Initialise an empty solution.
+    Solution()
+      : r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
+
+    /// \brief Number of nodes for which selections have been made.
+    /// @return Number of nodes for which selections have been made.
+    unsigned numNodes() const { return selections.size(); }
+
+    /// \brief Records a reduction via the R0 rule. Should be called from the
+    ///        solver only.
+    void recordR0() { ++r0Reductions; }
+
+    /// \brief Returns the number of R0 reductions applied to solve the problem.
+    unsigned numR0Reductions() const { return r0Reductions; }
+
+    /// \brief Records a reduction via the R1 rule. Should be called from the
+    ///        solver only.
+    void recordR1() { ++r1Reductions; }
+
+    /// \brief Returns the number of R1 reductions applied to solve the problem.
+    unsigned numR1Reductions() const { return r1Reductions; }
+
+    /// \brief Records a reduction via the R2 rule. Should be called from the
+    ///        solver only.
+    void recordR2() { ++r2Reductions; }
+
+    /// \brief Returns the number of R2 reductions applied to solve the problem.
+    unsigned numR2Reductions() const { return r2Reductions; }
+
+    /// \brief Records a reduction via the RN rule. Should be called from the
+    ///        solver only.
+    void recordRN() { ++ rNReductions; }
+
+    /// \brief Returns the number of RN reductions applied to solve the problem.
+    unsigned numRNReductions() const { return rNReductions; }
+
+    /// \brief Set the selection for a given node.
+    /// @param nItr Node iterator.
+    /// @param selection Selection for nItr.
+    void setSelection(Graph::NodeItr nItr, unsigned selection) {
+      selections[nItr] = selection;
+    }
+
+    /// \brief Get a node's selection.
+    /// @param nItr Node iterator.
+    /// @return The selection for nItr;
+    unsigned getSelection(Graph::ConstNodeItr nItr) const {
+      SelectionsMap::const_iterator sItr = selections.find(nItr);
+      assert(sItr != selections.end() && "No selection for node.");
+      return sItr->second;
+    }
+
+  };
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 4762a39cc669..53aee7a9c9f6 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -45,10 +45,19 @@ namespace llvm {
   ///
   extern char &MachineLoopInfoID;
 
+  /// MachineLoopRanges pass - This pass is an on-demand loop coverage
+  /// analysis pass.
+  ///
+  extern char &MachineLoopRangesID;
+
   /// MachineDominators pass - This pass is a machine dominators analysis pass.
   ///
   extern char &MachineDominatorsID;
 
+  /// EdgeBundles analysis - Bundle machine CFG edges.
+  ///
+  extern char &EdgeBundlesID;
+
   /// PHIElimination pass - This pass eliminates machine instruction PHI nodes
   /// by inserting copy instructions.  This destroys SSA information, but is the
   /// desired input for some register allocators.  This pass is "required" by
@@ -66,6 +75,9 @@ namespace llvm {
 
   extern char &PreAllocSplittingID;
 
+  /// LiveStacks pass. An analysis keeping track of the liveness of stack slots.
+  extern char &LiveStacksID;
+
   /// SimpleRegisterCoalescing pass.  Aggressively coalesces every register
   /// copy it can.
   ///
@@ -76,6 +88,11 @@ namespace llvm {
   /// register allocators.
   extern char &TwoAddressInstructionPassID;
 
+  /// SpillPlacement analysis. Suggest optimal placement of spill code between
+  /// basic blocks.
+  ///
+  extern char &SpillPlacementID;
+
   /// UnreachableMachineBlockElimination pass - This pass removes unreachable
   /// machine basic blocks.
   extern char &UnreachableMachineBlockElimID;
@@ -95,6 +112,16 @@ namespace llvm {
   ///
   FunctionPass *createFastRegisterAllocator();
 
+  /// BasicRegisterAllocation Pass - This pass implements a degenerate global
+  /// register allocator using the basic regalloc framework.
+  ///
+  FunctionPass *createBasicRegisterAllocator();
+
+  /// Greedy register allocation pass - This pass implements a global register
+  /// allocator for optimized builds.
+  ///
+  FunctionPass *createGreedyRegisterAllocator();
+
   /// LinearScanRegisterAllocation Pass - This pass implements the linear scan
   /// register allocation algorithm, a global register allocator.
   ///
@@ -103,7 +130,7 @@ namespace llvm {
   /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean
   /// Quadratic Prograaming (PBQP) based register allocator.
   ///
-  FunctionPass *createPBQPRegisterAllocator();
+  FunctionPass *createDefaultPBQPRegisterAllocator();
 
   /// SimpleRegisterCoalescing Pass - Coalesce all copies possible.  Can run
   /// independently of the register allocator.
@@ -188,7 +215,7 @@ namespace llvm {
 
   /// createMachineVerifierPass - This pass verifies cenerated machine code
   /// instructions for correctness.
-  FunctionPass *createMachineVerifierPass();
+  FunctionPass *createMachineVerifierPass(const char *Banner = 0);
 
   /// createDwarfEHPass - This pass mulches exception handling code into a form
   /// adapted to code generation.  Required if using dwarf exception handling.
@@ -205,6 +232,10 @@ namespace llvm {
   /// addressing.
   FunctionPass *createLocalStackSlotAllocationPass();
 
+  /// createExpandISelPseudosPass - This pass expands pseudo-instructions.
+  ///
+  FunctionPass *createExpandISelPseudosPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/PostRAHazardRecognizer.h b/include/llvm/CodeGen/PostRAHazardRecognizer.h
deleted file mode 100644
index 24d73cb7860d..000000000000
--- a/include/llvm/CodeGen/PostRAHazardRecognizer.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//=- llvm/CodeGen/PostRAHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PostRAHazardRecognizer class, which
-// implements hazard-avoidance heuristics for scheduling, based on the
-// scheduling itineraries specified for the target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
-#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/System/DataTypes.h"
-
-#include <cassert>
-#include <cstring>
-#include <string>
-
-namespace llvm {
-
-class InstrItineraryData;
-class SUnit;
-
-class PostRAHazardRecognizer : public ScheduleHazardRecognizer {
-  // ScoreBoard to track function unit usage. ScoreBoard[0] is a
-  // mask of the FUs in use in the cycle currently being
-  // schedule. ScoreBoard[1] is a mask for the next cycle. The
-  // ScoreBoard is used as a circular buffer with the current cycle
-  // indicated by Head.
-  class ScoreBoard {
-    unsigned *Data;
-
-    // The maximum number of cycles monitored by the Scoreboard. This
-    // value is determined based on the target itineraries to ensure
-    // that all hazards can be tracked.
-    size_t Depth;
-    // Indices into the Scoreboard that represent the current cycle.
-    size_t Head;
-  public:
-    ScoreBoard():Data(NULL), Depth(0), Head(0) { }
-    ~ScoreBoard() {
-      delete[] Data;
-    }
-
-    size_t getDepth() const { return Depth; }
-    unsigned& operator[](size_t idx) const {
-      assert(Depth && "ScoreBoard was not initialized properly!");
-
-      return Data[(Head + idx) % Depth];
-    }
-
-    void reset(size_t d = 1) {
-      if (Data == NULL) {
-        Depth = d;
-        Data = new unsigned[Depth];
-      }
-
-      memset(Data, 0, Depth * sizeof(Data[0]));
-      Head = 0;
-    }
-
-    void advance() {
-      Head = (Head + 1) % Depth;
-    }
-
-    // Print the scoreboard.
-    void dump() const;
-  };
-
-  // Itinerary data for the target.
-  const InstrItineraryData &ItinData;
-
-  ScoreBoard ReservedScoreboard;
-  ScoreBoard RequiredScoreboard;
-
-public:
-  PostRAHazardRecognizer(const InstrItineraryData &ItinData);
-
-  virtual HazardType getHazardType(SUnit *SU);
-  virtual void Reset();
-  virtual void EmitInstruction(SUnit *SU);
-  virtual void AdvanceCycle();
-};
-
-}
-
-#endif
diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h
index 1d743c1cba24..e2ab899f183f 100644
--- a/include/llvm/CodeGen/ProcessImplicitDefs.h
+++ b/include/llvm/CodeGen/ProcessImplicitDefs.h
@@ -31,7 +31,9 @@ namespace llvm {
   public:
     static char ID;
 
-    ProcessImplicitDefs() : MachineFunctionPass(ID) {}
+    ProcessImplicitDefs() : MachineFunctionPass(ID) {
+      initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
new file mode 100644
index 000000000000..7e8745eddef8
--- /dev/null
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -0,0 +1,167 @@
+//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PBQPBuilder interface, for classes which build PBQP
+// instances to represent register allocation problems, and the RegAllocPBQP
+// interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCPBQP_H
+#define LLVM_CODEGEN_REGALLOCPBQP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Solution.h"
+
+#include <map>
+#include <set>
+
+namespace llvm {
+
+  class LiveIntervals;
+  class MachineFunction;
+  class MachineLoopInfo;
+
+  /// This class wraps up a PBQP instance representing a register allocation
+  /// problem, plus the structures necessary to map back from the PBQP solution
+  /// to a register allocation solution. (i.e. The PBQP-node <--> vreg map,
+  /// and the PBQP option <--> storage location map).
+
+  class PBQPRAProblem {
+  public:
+
+    typedef SmallVector<unsigned, 16> AllowedSet;
+
+    PBQP::Graph& getGraph() { return graph; }
+
+    const PBQP::Graph& getGraph() const { return graph; }
+
+    /// Record the mapping between the given virtual register and PBQP node,
+    /// and the set of allowed pregs for the vreg.
+    ///
+    /// If you are extending
+    /// PBQPBuilder you are unlikely to need this: Nodes and options for all
+    /// vregs will already have been set up for you by the base class. 
+    template <typename AllowedRegsItr>
+    void recordVReg(unsigned vreg, PBQP::Graph::NodeItr node,
+                    AllowedRegsItr arBegin, AllowedRegsItr arEnd) {
+      assert(node2VReg.find(node) == node2VReg.end() && "Re-mapping node.");
+      assert(vreg2Node.find(vreg) == vreg2Node.end() && "Re-mapping vreg.");
+      assert(allowedSets[vreg].empty() && "vreg already has pregs.");
+
+      node2VReg[node] = vreg;
+      vreg2Node[vreg] = node;
+      std::copy(arBegin, arEnd, std::back_inserter(allowedSets[vreg]));
+    }
+
+    /// Get the virtual register corresponding to the given PBQP node.
+    unsigned getVRegForNode(PBQP::Graph::ConstNodeItr node) const;
+
+    /// Get the PBQP node corresponding to the given virtual register.
+    PBQP::Graph::NodeItr getNodeForVReg(unsigned vreg) const;
+
+    /// Returns true if the given PBQP option represents a physical register,
+    /// false otherwise.
+    bool isPRegOption(unsigned vreg, unsigned option) const {
+      // At present we only have spills or pregs, so anything that's not a
+      // spill is a preg. (This might be extended one day to support remat).
+      return !isSpillOption(vreg, option);
+    }
+
+    /// Returns true if the given PBQP option represents spilling, false
+    /// otherwise.
+    bool isSpillOption(unsigned vreg, unsigned option) const {
+      // We hardcode option zero as the spill option.
+      return option == 0;
+    }
+
+    /// Returns the allowed set for the given virtual register.
+    const AllowedSet& getAllowedSet(unsigned vreg) const;
+
+    /// Get PReg for option.
+    unsigned getPRegForOption(unsigned vreg, unsigned option) const;
+
+  private:
+
+    typedef std::map<PBQP::Graph::ConstNodeItr, unsigned,
+                     PBQP::NodeItrComparator>  Node2VReg;
+    typedef DenseMap<unsigned, PBQP::Graph::NodeItr> VReg2Node;
+    typedef std::map<unsigned, AllowedSet> AllowedSetMap;
+
+    PBQP::Graph graph;
+    Node2VReg node2VReg;
+    VReg2Node vreg2Node;
+
+    AllowedSetMap allowedSets;
+    
+  };
+
+  /// Builds PBQP instances to represent register allocation problems. Includes
+  /// spill, interference and coalescing costs by default. You can extend this
+  /// class to support additional constraints for your architecture.
+  class PBQPBuilder {
+  private:
+    PBQPBuilder(const PBQPBuilder&) {}
+    void operator=(const PBQPBuilder&) {}
+  public:
+
+    typedef std::set<unsigned> RegSet;
+ 
+    /// Default constructor.
+    PBQPBuilder() {}
+
+    /// Clean up a PBQPBuilder.
+    virtual ~PBQPBuilder() {}
+
+    /// Build a PBQP instance to represent the register allocation problem for
+    /// the given MachineFunction.
+    virtual std::auto_ptr<PBQPRAProblem> build(
+                                              MachineFunction *mf,
+                                              const LiveIntervals *lis,
+                                              const MachineLoopInfo *loopInfo,
+                                              const RegSet &vregs);
+  private:
+
+    void addSpillCosts(PBQP::Vector &costVec, PBQP::PBQPNum spillCost);
+
+    void addInterferenceCosts(PBQP::Matrix &costMat,
+                              const PBQPRAProblem::AllowedSet &vr1Allowed,
+                              const PBQPRAProblem::AllowedSet &vr2Allowed,
+                              const TargetRegisterInfo *tri);
+  };
+
+  /// Extended builder which adds coalescing constraints to a problem.
+  class PBQPBuilderWithCoalescing : public PBQPBuilder {
+  public:
+ 
+    /// Build a PBQP instance to represent the register allocation problem for
+    /// the given MachineFunction.
+    virtual std::auto_ptr<PBQPRAProblem> build(
+                                              MachineFunction *mf,
+                                              const LiveIntervals *lis,
+                                              const MachineLoopInfo *loopInfo,
+                                              const RegSet &vregs);   
+
+  private:
+
+    void addPhysRegCoalesce(PBQP::Vector &costVec, unsigned pregOption,
+                            PBQP::PBQPNum benefit);
+
+    void addVirtRegCoalesce(PBQP::Matrix &costMat,
+                            const PBQPRAProblem::AllowedSet &vr1Allowed,
+                            const PBQPRAProblem::AllowedSet &vr2Allowed,
+                            PBQP::PBQPNum benefit);
+  };
+
+  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder);
+}
+
+#endif /* LLVM_CODEGEN_REGALLOCPBQP_H */
diff --git a/include/llvm/CodeGen/RegisterCoalescer.h b/include/llvm/CodeGen/RegisterCoalescer.h
index 7644433a33a1..af0b3946912b 100644
--- a/include/llvm/CodeGen/RegisterCoalescer.h
+++ b/include/llvm/CodeGen/RegisterCoalescer.h
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/System/IncludeFile.h"
+#include "llvm/Support/IncludeFile.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 076268b99c20..3864ffd50a19 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -221,6 +221,9 @@ namespace llvm {
     }
   };
 
+  template <>
+  struct isPodLike<SDep> { static const bool value = true; };
+
   /// SUnit - Scheduling unit. This is a node in the scheduling DAG.
   class SUnit {
   private:
@@ -229,9 +232,8 @@ namespace llvm {
   public:
     SUnit *OrigNode;                    // If not this, the node from which
                                         // this node was cloned.
-    
-    // Preds/Succs - The SUnits before/after us in the graph.  The boolean value
-    // is true if the edge is a token chain edge, false if it is a value edge. 
+
+    // Preds/Succs - The SUnits before/after us in the graph.
     SmallVector<SDep, 4> Preds;  // All sunit predecessors.
     SmallVector<SDep, 4> Succs;  // All sunit successors.
 
@@ -242,11 +244,13 @@ namespace llvm {
 
     unsigned NodeNum;                   // Entry # of node in the node vector.
     unsigned NodeQueueId;               // Queue id of node.
-    unsigned short Latency;             // Node latency.
     unsigned NumPreds;                  // # of SDep::Data preds.
     unsigned NumSuccs;                  // # of SDep::Data sucss.
     unsigned NumPredsLeft;              // # of preds not scheduled.
     unsigned NumSuccsLeft;              // # of succs not scheduled.
+    unsigned short NumRegDefsLeft;      // # of reg defs with no scheduled use.
+    unsigned short Latency;             // Node latency.
+    bool isCall           : 1;          // Is a function call.
     bool isTwoAddress     : 1;          // Is a two-address instruction.
     bool isCommutable     : 1;          // Is a commutable instruction.
     bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
@@ -267,13 +271,14 @@ namespace llvm {
   public:
     const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
     const TargetRegisterClass *CopySrcRC;
-    
+
     /// SUnit - Construct an SUnit for pre-regalloc scheduling to represent
     /// an SDNode and any nodes flagged to it.
     SUnit(SDNode *node, unsigned nodenum)
       : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
-        NodeQueueId(0),  Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
+        NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
+        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
         hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isCloned(false),
@@ -285,8 +290,9 @@ namespace llvm {
     /// a MachineInstr.
     SUnit(MachineInstr *instr, unsigned nodenum)
       : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
-        NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
+        NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
+        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
         hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isCloned(false),
@@ -297,8 +303,9 @@ namespace llvm {
     /// SUnit - Construct a placeholder SUnit.
     SUnit()
       : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
-        NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
+        NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
+        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
         hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isCloned(false),
@@ -320,6 +327,10 @@ namespace llvm {
       return Node;
     }
 
+    /// isInstr - Return true if this SUnit refers to a machine instruction as
+    /// opposed to an SDNode.
+    bool isInstr() const { return Instr; }
+
     /// setInstr - Assign the instruction for the SUnit.
     /// This may be used during post-regalloc scheduling.
     void setInstr(MachineInstr *MI) {
@@ -337,7 +348,7 @@ namespace llvm {
     /// addPred - This adds the specified edge as a pred of the current node if
     /// not already.  It also adds the current node as a successor of the
     /// specified node.
-    void addPred(const SDep &D);
+    bool addPred(const SDep &D);
 
     /// removePred - This removes the specified edge as a pred of the current
     /// node if it exists.  It also removes the current node as a successor of
@@ -347,7 +358,7 @@ namespace llvm {
     /// getDepth - Return the depth of this node, which is the length of the
     /// maximum path up to any node with has no predecessors.
     unsigned getDepth() const {
-      if (!isDepthCurrent) 
+      if (!isDepthCurrent)
         const_cast<SUnit *>(this)->ComputeDepth();
       return Depth;
     }
@@ -355,7 +366,7 @@ namespace llvm {
     /// getHeight - Return the height of this node, which is the length of the
     /// maximum path down to any node with has no successors.
     unsigned getHeight() const {
-      if (!isHeightCurrent) 
+      if (!isHeightCurrent)
         const_cast<SUnit *>(this)->ComputeHeight();
       return Height;
     }
@@ -387,7 +398,7 @@ namespace llvm {
           return true;
       return false;
     }
-    
+
     /// isSucc - Test if node N is a successor of this node.
     bool isSucc(SUnit *N) {
       for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i)
@@ -408,25 +419,38 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// SchedulingPriorityQueue - This interface is used to plug different
   /// priorities computation algorithms into the list scheduler. It implements
-  /// the interface of a standard priority queue, where nodes are inserted in 
+  /// the interface of a standard priority queue, where nodes are inserted in
   /// arbitrary order and returned in priority order.  The computation of the
   /// priority and the representation of the queue are totally up to the
   /// implementation to decide.
-  /// 
+  ///
   class SchedulingPriorityQueue {
     unsigned CurCycle;
+    bool HasReadyFilter;
   public:
-    SchedulingPriorityQueue() : CurCycle(0) {}
+    SchedulingPriorityQueue(bool rf = false):
+      CurCycle(0), HasReadyFilter(rf) {}
     virtual ~SchedulingPriorityQueue() {}
-  
+
+    virtual bool isBottomUp() const = 0;
+
     virtual void initNodes(std::vector<SUnit> &SUnits) = 0;
     virtual void addNode(const SUnit *SU) = 0;
     virtual void updateNode(const SUnit *SU) = 0;
     virtual void releaseState() = 0;
 
     virtual bool empty() const = 0;
+
+    bool hasReadyFilter() const { return HasReadyFilter; }
+
+    virtual bool tracksRegPressure() const { return false; }
+
+    virtual bool isReady(SUnit *) const {
+      assert(!HasReadyFilter && "The ready filter must override isReady()");
+      return true;
+    }
     virtual void push(SUnit *U) = 0;
-  
+
     void push_all(const std::vector<SUnit *> &Nodes) {
       for (std::vector<SUnit *>::const_iterator I = Nodes.begin(),
            E = Nodes.end(); I != E; ++I)
@@ -437,6 +461,8 @@ namespace llvm {
 
     virtual void remove(SUnit *SU) = 0;
 
+    virtual void dump(ScheduleDAG *) const {}
+
     /// ScheduledNode - As each node is scheduled, this method is invoked.  This
     /// allows the priority function to adjust the priority of related
     /// unscheduled nodes, for example.
@@ -451,7 +477,7 @@ namespace llvm {
 
     unsigned getCurCycle() const {
       return CurCycle;
-    }    
+    }
   };
 
   class ScheduleDAG {
@@ -473,11 +499,18 @@ namespace llvm {
 
     virtual ~ScheduleDAG();
 
+    /// getInstrDesc - Return the TargetInstrDesc of this SUnit.
+    /// Return NULL for SDNodes without a machine opcode.
+    const TargetInstrDesc *getInstrDesc(const SUnit *SU) const {
+      if (SU->isInstr()) return &SU->getInstr()->getDesc();
+      return getNodeDesc(SU->getNode());
+    }
+
     /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered
     /// using 'dot'.
     ///
     void viewGraph();
-  
+
     /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
     /// according to the order specified in Sequence.
     ///
@@ -536,6 +569,10 @@ namespace llvm {
     void EmitNoop();
 
     void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+
+  private:
+    // Return the TargetInstrDesc of this SDNode or NULL.
+    const TargetInstrDesc *getNodeDesc(const SDNode *Node) const;
   };
 
   class SUnitIterator : public std::iterator<std::forward_iterator_tag,
@@ -627,7 +664,7 @@ namespace llvm {
     /// Visited - a set of nodes visited during a DFS traversal.
     BitVector Visited;
 
-    /// DFS - make a DFS traversal and mark all nodes affected by the 
+    /// DFS - make a DFS traversal and mark all nodes affected by the
     /// edge insertion. These nodes will later get new topological indexes
     /// by means of the Shift method.
     void DFS(const SUnit *SU, int UpperBound, bool& HasLoop);
@@ -642,7 +679,7 @@ namespace llvm {
   public:
     explicit ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits);
 
-    /// InitDAGTopologicalSorting - create the initial topological 
+    /// InitDAGTopologicalSorting - create the initial topological
     /// ordering from the DAG to be scheduled.
     void InitDAGTopologicalSorting();
 
diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 09e3e8861316..2f53baa1c7e6 100644
--- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -23,7 +23,15 @@ class SUnit;
 /// issued this cycle, and whether or not a noop needs to be inserted to handle
 /// the hazard.
 class ScheduleHazardRecognizer {
+protected:
+  /// MaxLookAhead - Indicate the number of cycles in the scoreboard
+  /// state. Important to restore the state after backtracking. Additionally,
+  /// MaxLookAhead=0 identifies a fake recognizer, allowing the client to
+  /// bypass virtual calls. Currently the PostRA scheduler ignores it.
+  unsigned MaxLookAhead;
+
 public:
+  ScheduleHazardRecognizer(): MaxLookAhead(0) {}
   virtual ~ScheduleHazardRecognizer();
 
   enum HazardType {
@@ -32,6 +40,14 @@ public:
     NoopHazard     // This instruction can't be emitted, and needs noops.
   };
 
+  unsigned getMaxLookAhead() const { return MaxLookAhead; }
+
+  bool isEnabled() const { return MaxLookAhead != 0; }
+
+  /// atIssueLimit - Return true if no more instructions may be issued in this
+  /// cycle.
+  virtual bool atIssueLimit() const { return false; }
+
   /// getHazardType - Return the hazard type of emitting this node.  There are
   /// three possible results.  Either:
   ///  * NoHazard: it is legal to issue this instruction on this cycle.
@@ -39,7 +55,7 @@ public:
   ///     other instruction is available, issue it first.
   ///  * NoopHazard: issuing this instruction would break the program.  If
   ///     some other instruction can be issued, do so, otherwise issue a noop.
-  virtual HazardType getHazardType(SUnit *) {
+  virtual HazardType getHazardType(SUnit *m, int Stalls) {
     return NoHazard;
   }
 
@@ -52,12 +68,18 @@ public:
   /// emitted, to advance the hazard state.
   virtual void EmitInstruction(SUnit *) {}
 
-  /// AdvanceCycle - This callback is invoked when no instructions can be
-  /// issued on this cycle without a hazard.  This should increment the
+  /// AdvanceCycle - This callback is invoked whenever the next top-down
+  /// instruction to be scheduled cannot issue in the current cycle, either
+  /// because of latency or resource conflicts.  This should increment the
   /// internal state of the hazard recognizer so that previously "Hazard"
   /// instructions will now not be hazards.
   virtual void AdvanceCycle() {}
 
+  /// RecedeCycle - This callback is invoked whenever the next bottom-up
+  /// instruction to be scheduled cannot issue in the current cycle, either
+  /// because of latency or resource conflicts.
+  virtual void RecedeCycle() {}
+
   /// EmitNoop - This callback is invoked when a noop was added to the
   /// instruction stream.
   virtual void EmitNoop() {
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
new file mode 100644
index 000000000000..8850006df84c
--- /dev/null
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -0,0 +1,129 @@
+//=- llvm/CodeGen/ScoreboardHazardRecognizer.h - Schedule Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScoreboardHazardRecognizer class, which
+// encapsulates hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/DataTypes.h"
+
+#include <cassert>
+#include <cstring>
+#include <string>
+
+namespace llvm {
+
+class InstrItineraryData;
+class TargetInstrDesc;
+class ScheduleDAG;
+class SUnit;
+
+class ScoreboardHazardRecognizer : public ScheduleHazardRecognizer {
+  // Scoreboard to track function unit usage. Scoreboard[0] is a
+  // mask of the FUs in use in the cycle currently being
+  // schedule. Scoreboard[1] is a mask for the next cycle. The
+  // Scoreboard is used as a circular buffer with the current cycle
+  // indicated by Head.
+  //
+  // Scoreboard always counts cycles in forward execution order. If used by a
+  // bottom-up scheduler, then the scoreboard cycles are the inverse of the
+  // scheduler's cycles.
+  class Scoreboard {
+    unsigned *Data;
+
+    // The maximum number of cycles monitored by the Scoreboard. This
+    // value is determined based on the target itineraries to ensure
+    // that all hazards can be tracked.
+    size_t Depth;
+    // Indices into the Scoreboard that represent the current cycle.
+    size_t Head;
+  public:
+    Scoreboard():Data(NULL), Depth(0), Head(0) { }
+    ~Scoreboard() {
+      delete[] Data;
+    }
+
+    size_t getDepth() const { return Depth; }
+    unsigned& operator[](size_t idx) const {
+      // Depth is expected to be a power-of-2.
+      assert(Depth && !(Depth & (Depth - 1)) &&
+             "Scoreboard was not initialized properly!");
+
+      return Data[(Head + idx) & (Depth-1)];
+    }
+
+    void reset(size_t d = 1) {
+      if (Data == NULL) {
+        Depth = d;
+        Data = new unsigned[Depth];
+      }
+
+      memset(Data, 0, Depth * sizeof(Data[0]));
+      Head = 0;
+    }
+
+    void advance() {
+      Head = (Head + 1) & (Depth-1);
+    }
+
+    void recede() {
+      Head = (Head - 1) & (Depth-1);
+    }
+
+    // Print the scoreboard.
+    void dump() const;
+  };
+
+#ifndef NDEBUG
+  // Support for tracing ScoreboardHazardRecognizer as a component within
+  // another module. Follows the current thread-unsafe model of tracing.
+  static const char *DebugType;
+#endif
+
+  // Itinerary data for the target.
+  const InstrItineraryData *ItinData;
+
+  const ScheduleDAG *DAG;
+
+  /// IssueWidth - Max issue per cycle. 0=Unknown.
+  unsigned IssueWidth;
+
+  /// IssueCount - Count instructions issued in this cycle.
+  unsigned IssueCount;
+
+  Scoreboard ReservedScoreboard;
+  Scoreboard RequiredScoreboard;
+
+public:
+  ScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+                             const ScheduleDAG *DAG,
+                             const char *ParentDebugType = "");
+
+  /// atIssueLimit - Return true if no more instructions may be issued in this
+  /// cycle.
+  virtual bool atIssueLimit() const;
+
+  // Stalls provides an cycle offset at which SU will be scheduled. It will be
+  // negative for bottom-up scheduling.
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void Reset();
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void RecedeCycle();
+};
+
+}
+
+#endif //!LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 7723fa00e90d..c9de95bebd54 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -171,9 +171,6 @@ class SelectionDAG {
   /// DbgInfo - Tracks dbg_value information through SDISel.
   SDDbgInfo *DbgInfo;
 
-  /// VerifyNode - Sanity check the given node.  Aborts if it is invalid.
-  void VerifyNode(SDNode *N);
-
   /// setGraphColorHelper - Implementation of setSubgraphColor.
   /// Return whether we had to truncate the search.
   ///
@@ -401,21 +398,21 @@ public:
   }
 
   // This version of the getCopyToReg method takes an extra operand, which
-  // indicates that there is potentially an incoming flag value (if Flag is not
-  // null) and that there should be a flag result.
+  // indicates that there is potentially an incoming glue value (if Glue is not
+  // null) and that there should be a glue result.
   SDValue getCopyToReg(SDValue Chain, DebugLoc dl, unsigned Reg, SDValue N,
-                       SDValue Flag) {
-    SDVTList VTs = getVTList(MVT::Other, MVT::Flag);
-    SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Flag };
-    return getNode(ISD::CopyToReg, dl, VTs, Ops, Flag.getNode() ? 4 : 3);
+                       SDValue Glue) {
+    SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
+    return getNode(ISD::CopyToReg, dl, VTs, Ops, Glue.getNode() ? 4 : 3);
   }
 
   // Similar to last getCopyToReg() except parameter Reg is a SDValue
   SDValue getCopyToReg(SDValue Chain, DebugLoc dl, SDValue Reg, SDValue N,
-                         SDValue Flag) {
-    SDVTList VTs = getVTList(MVT::Other, MVT::Flag);
-    SDValue Ops[] = { Chain, Reg, N, Flag };
-    return getNode(ISD::CopyToReg, dl, VTs, Ops, Flag.getNode() ? 4 : 3);
+                         SDValue Glue) {
+    SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, Reg, N, Glue };
+    return getNode(ISD::CopyToReg, dl, VTs, Ops, Glue.getNode() ? 4 : 3);
   }
 
   SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT) {
@@ -425,13 +422,13 @@ public:
   }
 
   // This version of the getCopyFromReg method takes an extra operand, which
-  // indicates that there is potentially an incoming flag value (if Flag is not
-  // null) and that there should be a flag result.
+  // indicates that there is potentially an incoming glue value (if Glue is not
+  // null) and that there should be a glue result.
   SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT,
-                           SDValue Flag) {
-    SDVTList VTs = getVTList(VT, MVT::Other, MVT::Flag);
-    SDValue Ops[] = { Chain, getRegister(Reg, VT), Flag };
-    return getNode(ISD::CopyFromReg, dl, VTs, Ops, Flag.getNode() ? 3 : 2);
+                           SDValue Glue) {
+    SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
+    return getNode(ISD::CopyFromReg, dl, VTs, Ops, Glue.getNode() ? 3 : 2);
   }
 
   SDValue getCondCode(ISD::CondCode Cond);
@@ -465,27 +462,27 @@ public:
   SDValue getNOT(DebugLoc DL, SDValue Val, EVT VT);
 
   /// getCALLSEQ_START - Return a new CALLSEQ_START node, which always must have
-  /// a flag result (to ensure it's not CSE'd).  CALLSEQ_START does not have a
+  /// a glue result (to ensure it's not CSE'd).  CALLSEQ_START does not have a
   /// useful DebugLoc.
   SDValue getCALLSEQ_START(SDValue Chain, SDValue Op) {
-    SDVTList VTs = getVTList(MVT::Other, MVT::Flag);
+    SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain,  Op };
     return getNode(ISD::CALLSEQ_START, DebugLoc(), VTs, Ops, 2);
   }
 
   /// getCALLSEQ_END - Return a new CALLSEQ_END node, which always must have a
-  /// flag result (to ensure it's not CSE'd).  CALLSEQ_END does not have
+  /// glue result (to ensure it's not CSE'd).  CALLSEQ_END does not have
   /// a useful DebugLoc.
   SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2,
-                           SDValue InFlag) {
-    SDVTList NodeTys = getVTList(MVT::Other, MVT::Flag);
+                           SDValue InGlue) {
+    SDVTList NodeTys = getVTList(MVT::Other, MVT::Glue);
     SmallVector<SDValue, 4> Ops;
     Ops.push_back(Chain);
     Ops.push_back(Op1);
     Ops.push_back(Op2);
-    Ops.push_back(InFlag);
+    Ops.push_back(InGlue);
     return getNode(ISD::CALLSEQ_END, DebugLoc(), NodeTys, &Ops[0],
-                   (unsigned)Ops.size() - (InFlag.getNode() == 0 ? 1 : 0));
+                   (unsigned)Ops.size() - (InGlue.getNode() == 0 ? 1 : 0));
   }
 
   /// getUNDEF - Return an UNDEF node.  UNDEF does not have a useful DebugLoc.
@@ -542,17 +539,17 @@ public:
 
   SDValue getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
                     SDValue Size, unsigned Align, bool isVol, bool AlwaysInline,
-                    const Value *DstSV, uint64_t DstSVOff,
-                    const Value *SrcSV, uint64_t SrcSVOff);
+                    MachinePointerInfo DstPtrInfo,
+                    MachinePointerInfo SrcPtrInfo);
 
   SDValue getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
                      SDValue Size, unsigned Align, bool isVol,
-                     const Value *DstSV, uint64_t DstOSVff,
-                     const Value *SrcSV, uint64_t SrcSVOff);
+                     MachinePointerInfo DstPtrInfo,
+                     MachinePointerInfo SrcPtrInfo);
 
   SDValue getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
                     SDValue Size, unsigned Align, bool isVol,
-                    const Value *DstSV, uint64_t DstSVOff);
+                    MachinePointerInfo DstPtrInfo);
 
   /// getSetCC - Helper function to make it easier to build SetCC's if you just
   /// have an ISD::CondCode instead of an SDValue.
@@ -587,8 +584,8 @@ public:
   /// getAtomic - Gets a node for an atomic op, produces result and chain and
   /// takes 3 operands
   SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
-                    SDValue Ptr, SDValue Cmp, SDValue Swp, const Value* PtrVal,
-                    unsigned Alignment=0);
+                    SDValue Ptr, SDValue Cmp, SDValue Swp,
+                    MachinePointerInfo PtrInfo, unsigned Alignment=0);
   SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Cmp, SDValue Swp,
                     MachineMemOperand *MMO);
@@ -609,13 +606,13 @@ public:
   SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
                               const EVT *VTs, unsigned NumVTs,
                               const SDValue *Ops, unsigned NumOps,
-                              EVT MemVT, const Value *srcValue, int SVOff,
+                              EVT MemVT, MachinePointerInfo PtrInfo,
                               unsigned Align = 0, bool Vol = false,
                               bool ReadMem = true, bool WriteMem = true);
 
   SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                               const SDValue *Ops, unsigned NumOps,
-                              EVT MemVT, const Value *srcValue, int SVOff,
+                              EVT MemVT, MachinePointerInfo PtrInfo,
                               unsigned Align = 0, bool Vol = false,
                               bool ReadMem = true, bool WriteMem = true);
 
@@ -630,19 +627,22 @@ public:
   /// determined by their operands, and they produce a value AND a token chain.
   ///
   SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
-                  const Value *SV, int SVOffset, bool isVolatile,
-                  bool isNonTemporal, unsigned Alignment);
-  SDValue getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
-                     SDValue Chain, SDValue Ptr, const Value *SV,
-                     int SVOffset, EVT MemVT, bool isVolatile,
-                     bool isNonTemporal, unsigned Alignment);
+                  MachinePointerInfo PtrInfo, bool isVolatile,
+                  bool isNonTemporal, unsigned Alignment,
+                  const MDNode *TBAAInfo = 0);
+  SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+                     SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo,
+                     EVT MemVT, bool isVolatile,
+                     bool isNonTemporal, unsigned Alignment,
+                     const MDNode *TBAAInfo = 0);
   SDValue getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
                          SDValue Offset, ISD::MemIndexedMode AM);
   SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                   EVT VT, DebugLoc dl,
                   SDValue Chain, SDValue Ptr, SDValue Offset,
-                  const Value *SV, int SVOffset, EVT MemVT,
-                  bool isVolatile, bool isNonTemporal, unsigned Alignment);
+                  MachinePointerInfo PtrInfo, EVT MemVT,
+                  bool isVolatile, bool isNonTemporal, unsigned Alignment,
+                  const MDNode *TBAAInfo = 0);
   SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                   EVT VT, DebugLoc dl,
                   SDValue Chain, SDValue Ptr, SDValue Offset,
@@ -651,14 +651,16 @@ public:
   /// getStore - Helper function to build ISD::STORE nodes.
   ///
   SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
-                   const Value *SV, int SVOffset, bool isVolatile,
-                   bool isNonTemporal, unsigned Alignment);
+                   MachinePointerInfo PtrInfo, bool isVolatile,
+                   bool isNonTemporal, unsigned Alignment,
+                   const MDNode *TBAAInfo = 0);
   SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
                    MachineMemOperand *MMO);
   SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
-                        const Value *SV, int SVOffset, EVT TVT,
+                        MachinePointerInfo PtrInfo, EVT TVT,
                         bool isNonTemporal, bool isVolatile,
-                        unsigned Alignment);
+                        unsigned Alignment,
+                        const MDNode *TBAAInfo = 0);
   SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
                         EVT TVT, MachineMemOperand *MMO);
   SDValue getIndexedStore(SDValue OrigStoe, DebugLoc dl, SDValue Base,
@@ -899,6 +901,9 @@ public:
   SmallVector<SDDbgValue*,2> &GetDbgValues(const SDNode* SD) {
     return DbgInfo->getSDDbgValues(SD);
   }
+  
+  /// TransferDbgValues - Transfer SDDbgValues.
+  void TransferDbgValues(SDValue From, SDValue To);
 
   /// hasDebugValues - Return true if there are any SDDbgValue nodes associated
   /// with this SelectionDAG.
@@ -961,6 +966,13 @@ public:
   /// class to allow target nodes to be understood.
   unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
 
+  /// isBaseWithConstantOffset - Return true if the specified operand is an
+  /// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+  /// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+  /// semantics as an ADD.  This handles the equivalence:
+  ///     X|Cst == X+Cst iff X&Cst = 0.
+  bool isBaseWithConstantOffset(SDValue Op) const;
+  
   /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
   bool isKnownNeverNaN(SDValue Op) const;
 
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 01d05ddac11a..62358e7639ee 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -34,7 +34,8 @@ namespace llvm {
   class ScheduleHazardRecognizer;
   class GCFunctionInfo;
   class ScheduleDAGSDNodes;
- 
+  class LoadInst;
+
 /// SelectionDAGISel - This is the common base class used for SelectionDAG-based
 /// pattern-matching instruction selectors.
 class SelectionDAGISel : public MachineFunctionPass {
@@ -54,7 +55,7 @@ public:
   explicit SelectionDAGISel(const TargetMachine &tm,
                             CodeGenOpt::Level OL = CodeGenOpt::Default);
   virtual ~SelectionDAGISel();
-  
+
   const TargetLowering &getTargetLowering() { return TLI; }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -62,18 +63,18 @@ public:
   virtual bool runOnMachineFunction(MachineFunction &MF);
 
   virtual void EmitFunctionEntryCode() {}
-  
+
   /// PreprocessISelDAG - This hook allows targets to hack on the graph before
   /// instruction selection starts.
   virtual void PreprocessISelDAG() {}
-  
+
   /// PostprocessISelDAG() - This hook allows the target to hack on the graph
   /// right after selection.
   virtual void PostprocessISelDAG() {}
-  
+
   /// Select - Main hook targets implement to select a node.
   virtual SDNode *Select(SDNode *N) = 0;
-  
+
   /// SelectInlineAsmMemoryOperand - Select the specified address as a target
   /// addressing mode, according to the specified constraint code.  If this does
   /// not match or is not implemented, return true.  The resultant operands
@@ -91,25 +92,20 @@ public:
 
   /// IsLegalToFold - Returns true if the specific operand node N of
   /// U can be folded during instruction selection that starts at Root.
-  /// FIXME: This is a static member function because the PIC16 target,
-  /// which uses it during lowering.
+  /// FIXME: This is a static member function because the MSP430/SystemZ/X86
+  /// targets, which uses it during isel.  This could become a proper member.
   static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
                             CodeGenOpt::Level OptLevel,
                             bool IgnoreChains = false);
 
-  /// CreateTargetHazardRecognizer - Return a newly allocated hazard recognizer
-  /// to use for this target when scheduling the DAG.
-  virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer();
-  
-  
   // Opcodes used by the DAG state machine:
   enum BuiltinOpcodes {
     OPC_Scope,
     OPC_RecordNode,
-    OPC_RecordChild0, OPC_RecordChild1, OPC_RecordChild2, OPC_RecordChild3, 
+    OPC_RecordChild0, OPC_RecordChild1, OPC_RecordChild2, OPC_RecordChild3,
     OPC_RecordChild4, OPC_RecordChild5, OPC_RecordChild6, OPC_RecordChild7,
     OPC_RecordMemRef,
-    OPC_CaptureFlagInput,
+    OPC_CaptureGlueInput,
     OPC_MoveChild,
     OPC_MoveParent,
     OPC_CheckSame,
@@ -128,7 +124,7 @@ public:
     OPC_CheckComplexPat,
     OPC_CheckAndImm, OPC_CheckOrImm,
     OPC_CheckFoldableChainNode,
-    
+
     OPC_EmitInteger,
     OPC_EmitRegister,
     OPC_EmitConvertToTarget,
@@ -139,15 +135,15 @@ public:
     OPC_EmitNodeXForm,
     OPC_EmitNode,
     OPC_MorphNodeTo,
-    OPC_MarkFlagResults,
+    OPC_MarkGlueResults,
     OPC_CompleteMatch
   };
-  
+
   enum {
-    OPFL_None       = 0,     // Node has no chain or flag input and isn't variadic.
+    OPFL_None       = 0,  // Node has no chain or glue input and isn't variadic.
     OPFL_Chain      = 1,     // Node has a chain input.
-    OPFL_FlagInput  = 2,     // Node has a flag input.
-    OPFL_FlagOutput = 4,     // Node has a flag output.
+    OPFL_GlueInput  = 2,     // Node has a glue input.
+    OPFL_GlueOutput = 4,     // Node has a glue output.
     OPFL_MemRefs    = 8,     // Node gets accumulated MemRefs.
     OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
     OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
@@ -156,37 +152,37 @@ public:
     OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
     OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
     OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
-    
+
     OPFL_VariadicInfo = OPFL_Variadic6
   };
-  
+
   /// getNumFixedFromVariadicInfo - Transform an EmitNode flags word into the
   /// number of fixed arity values that should be skipped when copying from the
   /// root.
   static inline int getNumFixedFromVariadicInfo(unsigned Flags) {
     return ((Flags&OPFL_VariadicInfo) >> 4)-1;
   }
-  
-  
+
+
 protected:
   /// DAGSize - Size of DAG being instruction selected.
   ///
   unsigned DAGSize;
-  
+
   /// ISelPosition - Node iterator marking the current position of
   /// instruction selection as it procedes through the topologically-sorted
   /// node list.
   SelectionDAG::allnodes_iterator ISelPosition;
 
-  
-  /// ISelUpdater - helper class to handle updates of the 
+
+  /// ISelUpdater - helper class to handle updates of the
   /// instruction selection graph.
   class ISelUpdater : public SelectionDAG::DAGUpdateListener {
     SelectionDAG::allnodes_iterator &ISelPosition;
   public:
     explicit ISelUpdater(SelectionDAG::allnodes_iterator &isp)
       : ISelPosition(isp) {}
-    
+
     /// NodeDeleted - Handle nodes deleted from the graph. If the
     /// node being deleted is the current ISelPosition node, update
     /// ISelPosition.
@@ -195,46 +191,46 @@ protected:
       if (ISelPosition == SelectionDAG::allnodes_iterator(N))
         ++ISelPosition;
     }
-    
+
     /// NodeUpdated - Ignore updates for now.
     virtual void NodeUpdated(SDNode *N) {}
   };
-  
+
   /// ReplaceUses - replace all uses of the old node F with the use
   /// of the new node T.
   void ReplaceUses(SDValue F, SDValue T) {
     ISelUpdater ISU(ISelPosition);
     CurDAG->ReplaceAllUsesOfValueWith(F, T, &ISU);
   }
-  
+
   /// ReplaceUses - replace all uses of the old nodes F with the use
   /// of the new nodes T.
   void ReplaceUses(const SDValue *F, const SDValue *T, unsigned Num) {
     ISelUpdater ISU(ISelPosition);
     CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num, &ISU);
   }
-  
+
   /// ReplaceUses - replace all uses of the old node F with the use
   /// of the new node T.
   void ReplaceUses(SDNode *F, SDNode *T) {
     ISelUpdater ISU(ISelPosition);
     CurDAG->ReplaceAllUsesWith(F, T, &ISU);
   }
-  
+
 
   /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
   /// by tblgen.  Others should not call it.
   void SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops);
 
-  
+
 public:
   // Calls to these predicates are generated by tblgen.
   bool CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
                     int64_t DesiredMaskS) const;
   bool CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
                     int64_t DesiredMaskS) const;
-  
-  
+
+
   /// CheckPatternPredicate - This function is generated by tblgen in the
   /// target.  It runs the specified pattern predicate and returns true if it
   /// succeeds or false if it fails.  The number is a private implementation
@@ -252,13 +248,14 @@ public:
     assert(0 && "Tblgen should generate the implementation of this!");
     return 0;
   }
-  
-  virtual bool CheckComplexPattern(SDNode *Root, SDValue N, unsigned PatternNo,
-                                   SmallVectorImpl<SDValue> &Result) {
+
+  virtual bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,
+                                   unsigned PatternNo,
+                        SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {
     assert(0 && "Tblgen should generate the implementation of this!");
     return false;
   }
-  
+
   virtual SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {
     assert(0 && "Tblgen shoudl generate this!");
     return SDValue();
@@ -267,9 +264,9 @@ public:
   SDNode *SelectCodeCommon(SDNode *NodeToMatch,
                            const unsigned char *MatcherTable,
                            unsigned TableSize);
-  
+
 private:
-  
+
   // Calls to these functions are generated by tblgen.
   SDNode *Select_INLINEASM(SDNode *N);
   SDNode *Select_UNDEF(SDNode *N);
@@ -279,9 +276,10 @@ private:
   void DoInstructionSelection();
   SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTs,
                     const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo);
-  
+
   void PrepareEHLandingPad();
   void SelectAllBasicBlocks(const Function &Fn);
+  bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
   void FinishBasicBlock();
 
   void SelectBasicBlock(BasicBlock::const_iterator Begin,
@@ -289,7 +287,7 @@ private:
                         bool &HadTailCall);
   void CodeGenAndEmitDAG();
   void LowerArguments(const BasicBlock *BB);
-  
+
   void ComputeLiveOutVRegInfo();
 
   /// Create the scheduler. If a specific scheduler was specified
@@ -297,16 +295,16 @@ private:
   /// one preferred by the target.
   ///
   ScheduleDAGSDNodes *CreateScheduler();
-  
+
   /// OpcodeOffset - This is a cache used to dispatch efficiently into isel
   /// state machines that start with a OPC_SwitchOpcode node.
   std::vector<unsigned> OpcodeOffset;
-  
-  void UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
-                            const SmallVectorImpl<SDNode*> &ChainNodesMatched,
-                            SDValue InputFlag,const SmallVectorImpl<SDNode*> &F,
-                            bool isMorphNodeTo);
-    
+
+  void UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+                           const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                           SDValue InputGlue, const SmallVectorImpl<SDNode*> &F,
+                           bool isMorphNodeTo);
+
 };
 
 }
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 4cf6f367edfb..64546394ce91 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -29,7 +29,7 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 #include <cassert>
 
@@ -524,24 +524,24 @@ public:
     return X;
   }
 
-  /// getFlaggedNode - If this node has a flag operand, return the node
-  /// to which the flag operand points. Otherwise return NULL.
-  SDNode *getFlaggedNode() const {
+  /// getGluedNode - If this node has a glue operand, return the node
+  /// to which the glue operand points. Otherwise return NULL.
+  SDNode *getGluedNode() const {
     if (getNumOperands() != 0 &&
-      getOperand(getNumOperands()-1).getValueType().getSimpleVT() == MVT::Flag)
+      getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
       return getOperand(getNumOperands()-1).getNode();
     return 0;
   }
 
   // If this is a pseudo op, like copyfromreg, look to see if there is a
-  // real target node flagged to it.  If so, return the target node.
-  const SDNode *getFlaggedMachineNode() const {
+  // real target node glued to it.  If so, return the target node.
+  const SDNode *getGluedMachineNode() const {
     const SDNode *FoundNode = this;
 
-    // Climb up flag edges until a machine-opcode node is found, or the
+    // Climb up glue edges until a machine-opcode node is found, or the
     // end of the chain is reached.
     while (!FoundNode->isMachineOpcode()) {
-      const SDNode *N = FoundNode->getFlaggedNode();
+      const SDNode *N = FoundNode->getGluedNode();
       if (!N) break;
       FoundNode = N;
     }
@@ -549,11 +549,11 @@ public:
     return FoundNode;
   }
 
-  /// getFlaggedUser - If this node has a flag value with a user, return
+  /// getGluedUser - If this node has a glue value with a user, return
   /// the user (there is at most one). Otherwise return NULL.
-  SDNode *getFlaggedUser() const {
+  SDNode *getGluedUser() const {
     for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
-      if (UI.getUse().get().getValueType() == MVT::Flag)
+      if (UI.getUse().get().getValueType() == MVT::Glue)
         return *UI;
     return 0;
   }
@@ -902,6 +902,9 @@ public:
   const Value *getSrcValue() const { return MMO->getValue(); }
   int64_t getSrcValueOffset() const { return MMO->getOffset(); }
 
+  /// Returns the TBAAInfo that describes the dereference.
+  const MDNode *getTBAAInfo() const { return MMO->getTBAAInfo(); }
+
   /// getMemoryVT - Return the type of the in-memory value.
   EVT getMemoryVT() const { return MemoryVT; }
 
@@ -909,6 +912,10 @@ public:
   /// reference performed by operation.
   MachineMemOperand *getMemOperand() const { return MMO; }
 
+  const MachinePointerInfo &getPointerInfo() const {
+    return MMO->getPointerInfo();
+  }
+  
   /// refineAlignment - Update this MemSDNode's MachineMemOperand information
   /// to reflect the alignment of NewMMO, if it has a greater alignment.
   /// This must only be used when the new alignment applies to all users of
@@ -929,6 +936,7 @@ public:
     // with either an intrinsic or a target opcode.
     return N->getOpcode() == ISD::LOAD                ||
            N->getOpcode() == ISD::STORE               ||
+           N->getOpcode() == ISD::PREFETCH            ||
            N->getOpcode() == ISD::ATOMIC_CMP_SWAP     ||
            N->getOpcode() == ISD::ATOMIC_SWAP         ||
            N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
@@ -1004,8 +1012,8 @@ public:
 
 /// MemIntrinsicSDNode - This SDNode is used for target intrinsics that touch
 /// memory and need an associated MachineMemOperand. Its opcode may be
-/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, or a target-specific opcode with a
-/// value not less than FIRST_TARGET_MEMORY_OPCODE.
+/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
+/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
 class MemIntrinsicSDNode : public MemSDNode {
 public:
   MemIntrinsicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
@@ -1021,6 +1029,7 @@ public:
     // early a node with a target opcode can be of this class
     return N->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
            N->getOpcode() == ISD::INTRINSIC_VOID ||
+           N->getOpcode() == ISD::PREFETCH ||
            N->isTargetMemoryOpcode();
   }
 };
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 88044c7242c9..1da1e91be14a 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -13,10 +13,7 @@
 //
 // SlotIndex is mostly a proxy for entries of the SlotIndexList, a class which
 // is held is LiveIntervals and provides the real numbering. This allows
-// LiveIntervals to perform largely transparent renumbering. The SlotIndex
-// class does hold a PHI bit, which determines whether the index relates to a
-// PHI use or def point, or an actual instruction. See the SlotIndex class
-// description for futher information.
+// LiveIntervals to perform largely transparent renumbering.
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_SLOTINDEXES_H
@@ -130,12 +127,10 @@ namespace llvm {
 
     enum Slot { LOAD, USE, DEF, STORE, NUM };
 
-    static const unsigned PHI_BIT = 1 << 2;
+    PointerIntPair<IndexListEntry*, 2, unsigned> lie;
 
-    PointerIntPair<IndexListEntry*, 3, unsigned> lie;
-
-    SlotIndex(IndexListEntry *entry, unsigned phiAndSlot)
-      : lie(entry, phiAndSlot) {
+    SlotIndex(IndexListEntry *entry, unsigned slot)
+      : lie(entry, slot) {
       assert(entry != 0 && "Attempt to construct index with 0 pointer.");
     }
 
@@ -149,7 +144,7 @@ namespace llvm {
 
     /// Returns the slot for this SlotIndex.
     Slot getSlot() const {
-      return static_cast<Slot>(lie.getInt()  & ~PHI_BIT);
+      return static_cast<Slot>(lie.getInt());
     }
 
     static inline unsigned getHashValue(const SlotIndex &v) {
@@ -166,22 +161,13 @@ namespace llvm {
     static inline SlotIndex getTombstoneKey() {
       return SlotIndex(IndexListEntry::getTombstoneKeyEntry(), 0);
     }
-    
+
     /// Construct an invalid index.
     SlotIndex() : lie(IndexListEntry::getEmptyKeyEntry(), 0) {}
 
-    // Construct a new slot index from the given one, set the phi flag on the
-    // new index to the value of the phi parameter.
-    SlotIndex(const SlotIndex &li, bool phi)
-      : lie(&li.entry(), phi ? PHI_BIT | li.getSlot() : (unsigned)li.getSlot()){
-      assert(lie.getPointer() != 0 &&
-             "Attempt to construct index with 0 pointer.");
-    }
-
-    // Construct a new slot index from the given one, set the phi flag on the
-    // new index to the value of the phi parameter, and the slot to the new slot.
-    SlotIndex(const SlotIndex &li, bool phi, Slot s)
-      : lie(&li.entry(), phi ? PHI_BIT | s : (unsigned)s) {
+    // Construct a new slot index from the given one, and set the slot.
+    SlotIndex(const SlotIndex &li, Slot s)
+      : lie(&li.entry(), unsigned(s)) {
       assert(lie.getPointer() != 0 &&
              "Attempt to construct index with 0 pointer.");
     }
@@ -236,11 +222,6 @@ namespace llvm {
       return other.getIndex() - getIndex();
     }
 
-    /// Returns the state of the PHI bit.
-    bool isPHI() const {
-      return lie.getInt() & PHI_BIT;
-    }
-
     /// isLoad - Return true if this is a LOAD slot.
     bool isLoad() const {
       return getSlot() == LOAD;
@@ -405,9 +386,6 @@ namespace llvm {
     /// and MBB id.
     std::vector<IdxMBBPair> idx2MBBMap;
 
-    typedef DenseMap<const MachineBasicBlock*, SlotIndex> TerminatorGapsMap;
-    TerminatorGapsMap terminatorGaps;
-
     // IndexListEntry allocator.
     BumpPtrAllocator ileAllocator;
 
@@ -415,7 +393,7 @@ namespace llvm {
       IndexListEntry *entry =
         static_cast<IndexListEntry*>(
           ileAllocator.Allocate(sizeof(IndexListEntry),
-          alignof<IndexListEntry>()));
+          alignOf<IndexListEntry>()));
 
       new (entry) IndexListEntry(mi, index);
 
@@ -491,7 +469,9 @@ namespace llvm {
   public:
     static char ID;
 
-    SlotIndexes() : MachineFunctionPass(ID), indexListHead(0) {}
+    SlotIndexes() : MachineFunctionPass(ID), indexListHead(0) {
+      initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
     virtual void releaseMemory(); 
@@ -565,26 +545,22 @@ namespace llvm {
       return nextNonNull;
     }
 
-    /// Returns the first index in the given basic block.
-    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+    /// Return the (start,end) range of the given basic block.
+    const std::pair<SlotIndex, SlotIndex> &
+    getMBBRange(const MachineBasicBlock *mbb) const {
       MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb);
       assert(itr != mbb2IdxMap.end() && "MBB not found in maps.");
-      return itr->second.first;
+      return itr->second;
     }
 
-    /// Returns the last index in the given basic block.
-    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
-      MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb);
-      assert(itr != mbb2IdxMap.end() && "MBB not found in maps.");
-      return itr->second.second;
+    /// Returns the first index in the given basic block.
+    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+      return getMBBRange(mbb).first;
     }
 
-    /// Returns the terminator gap for the given index.
-    SlotIndex getTerminatorGap(const MachineBasicBlock *mbb) {
-      TerminatorGapsMap::iterator itr = terminatorGaps.find(mbb);
-      assert(itr != terminatorGaps.end() &&
-             "All MBBs should have terminator gaps in their indexes.");
-      return itr->second;
+    /// Returns the last index in the given basic block.
+    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
+      return getMBBRange(mbb).second;
     }
 
     /// Returns the basic block which the given index falls in.
@@ -618,29 +594,6 @@ namespace llvm {
       return resVal;
     }
 
-    /// Return a list of MBBs that can be reach via any branches or
-    /// fall-throughs.
-    bool findReachableMBBs(SlotIndex start, SlotIndex end,
-                           SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
-      std::vector<IdxMBBPair>::const_iterator itr =
-        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
-
-      bool resVal = false;
-      while (itr != idx2MBBMap.end()) {
-        if (itr->first > end)
-          break;
-        MachineBasicBlock *mbb = itr->second;
-        if (getMBBEndIdx(mbb) > end)
-          break;
-        for (MachineBasicBlock::succ_iterator si = mbb->succ_begin(),
-             se = mbb->succ_end(); si != se; ++si)
-          mbbs.push_back(*si);
-        resVal = true;
-        ++itr;
-      }
-      return resVal;
-    }
-
     /// Returns the MBB covering the given range, or null if the range covers
     /// more than one basic block.
     MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
@@ -672,6 +625,9 @@ namespace llvm {
     SlotIndex insertMachineInstrInMaps(MachineInstr *mi,
                                         bool *deferredRenumber = 0) {
       assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed.");
+      // Numbering DBG_VALUE instructions could cause code generation to be
+      // affected by debug information.
+      assert(!mi->isDebugValue() && "Cannot number DBG_VALUE instructions.");
 
       MachineBasicBlock *mbb = mi->getParent();
 
@@ -789,7 +745,7 @@ namespace llvm {
       MachineFunction::iterator nextMBB =
         llvm::next(MachineFunction::iterator(mbb));
       IndexListEntry *startEntry = createEntry(0, 0);
-      IndexListEntry *terminatorEntry = createEntry(0, 0); 
+      IndexListEntry *stopEntry = createEntry(0, 0);
       IndexListEntry *nextEntry = 0;
 
       if (nextMBB == mbb->getParent()->end()) {
@@ -799,15 +755,11 @@ namespace llvm {
       }
 
       insert(nextEntry, startEntry);
-      insert(nextEntry, terminatorEntry);
+      insert(nextEntry, stopEntry);
 
       SlotIndex startIdx(startEntry, SlotIndex::LOAD);
-      SlotIndex terminatorIdx(terminatorEntry, SlotIndex::PHI_BIT);
       SlotIndex endIdx(nextEntry, SlotIndex::LOAD);
 
-      terminatorGaps.insert(
-        std::make_pair(mbb, terminatorIdx));
-
       mbb2IdxMap.insert(
         std::make_pair(mbb, std::make_pair(startIdx, endIdx)));
 
@@ -828,6 +780,20 @@ namespace llvm {
   };
 
 
+  // Specialize IntervalMapInfo for half-open slot index intervals.
+  template <typename> struct IntervalMapInfo;
+  template <> struct IntervalMapInfo<SlotIndex> {
+    static inline bool startLess(const SlotIndex &x, const SlotIndex &a) {
+      return x < a;
+    }
+    static inline bool stopLess(const SlotIndex &b, const SlotIndex &x) {
+      return b <= x;
+    }
+    static inline bool adjacent(const SlotIndex &a, const SlotIndex &b) {
+      return a == b;
+    }
+  };
+
 }
 
 #endif // LLVM_CODEGEN_LIVEINDEX_H 
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index d8f037385957..fba3e48c475e 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -57,6 +57,8 @@ public:
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
+  virtual const MCSection *getEHFrameSection() const;
+
   const MCSection *getDataRelSection() const { return DataRelSection; }
 
   /// getSectionForConstant - Given a constant with the SectionKind, return a
@@ -121,6 +123,8 @@ public:
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
+  virtual const MCSection *getEHFrameSection() const;
+
   virtual const MCSection *
   SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const;
@@ -184,6 +188,8 @@ public:
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
+  virtual const MCSection *getEHFrameSection() const;
+
   virtual const MCSection *getDrectveSection() const { return DrectveSection; }
 
   virtual const MCSection *
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 51f324c959c0..22d16222078d 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -18,7 +18,7 @@
 
 #include <cassert>
 #include <string>
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 
 namespace llvm {
@@ -26,7 +26,10 @@ namespace llvm {
   class LLVMContext;
   struct EVT;
 
-  class MVT { // MVT = Machine Value Type
+  /// MVT - Machine Value Type.  Every type that is supported natively by some
+  /// processor targeted by LLVM occurs here.  This means that any legal value
+  /// type can be represented by a MVT.
+  class MVT {
   public:
     enum SimpleValueType {
       // If you change this numbering, you must change the values in
@@ -74,14 +77,16 @@ namespace llvm {
       FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v4f64,
 
-      Flag           =  33,   // This glues nodes together during pre-RA sched
+      x86mmx         =  33,   // This is an X86 MMX value
 
-      isVoid         =  34,   // This has no value
+      Glue           =  34,   // This glues nodes together during pre-RA sched
 
-      LAST_VALUETYPE =  35,   // This always remains at the end of the list.
+      isVoid         =  35,   // This has no value
+
+      LAST_VALUETYPE =  36,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
-      // EVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
+      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
       // This value must be a multiple of 32.
       MAX_ALLOWED_VALUETYPE = 64,
 
@@ -124,13 +129,14 @@ namespace llvm {
 
     MVT() : SimpleTy((SimpleValueType)(INVALID_SIMPLE_VALUE_TYPE)) {}
     MVT(SimpleValueType SVT) : SimpleTy(SVT) { }
-    
+
     bool operator>(const MVT& S)  const { return SimpleTy >  S.SimpleTy; }
     bool operator<(const MVT& S)  const { return SimpleTy <  S.SimpleTy; }
     bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
+    bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
     bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
     bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
-    
+
     /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
     bool isFloatingPoint() const {
       return ((SimpleTy >= MVT::f32 && SimpleTy <= MVT::ppcf128) ||
@@ -149,14 +155,14 @@ namespace llvm {
       return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
               SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
     }
-    
+
     /// isPow2VectorType - Returns true if the given vector is a power of 2.
     bool isPow2VectorType() const {
       unsigned NElts = getVectorNumElements();
       return !(NElts & (NElts - 1));
     }
 
-    /// getPow2VectorType - Widens the length of the given vector EVT up to
+    /// getPow2VectorType - Widens the length of the given vector MVT up to
     /// the nearest power of 2 and returns that type.
     MVT getPow2VectorType() const {
       if (isPow2VectorType())
@@ -172,7 +178,7 @@ namespace llvm {
     MVT getScalarType() const {
       return isVector() ? getVectorElementType() : *this;
     }
-    
+
     MVT getVectorElementType() const {
       switch (SimpleTy) {
       default:
@@ -200,7 +206,7 @@ namespace llvm {
       case v4f64: return f64;
       }
     }
-    
+
     unsigned getVectorNumElements() const {
       switch (SimpleTy) {
       default:
@@ -228,7 +234,7 @@ namespace llvm {
       case v1i64: return 1;
       }
     }
-    
+
     unsigned getSizeInBits() const {
       switch (SimpleTy) {
       case iPTR:
@@ -247,6 +253,7 @@ namespace llvm {
       case i32 :
       case v4i8:
       case v2i16: return 32;
+      case x86mmx:
       case f64 :
       case i64 :
       case v8i8:
@@ -273,7 +280,19 @@ namespace llvm {
       case v8i64: return 512;
       }
     }
-    
+
+    /// getStoreSize - Return the number of bytes overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSize() const {
+      return (getSizeInBits() + 7) / 8;
+    }
+
+    /// getStoreSizeInBits - Return the number of bits overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSizeInBits() const {
+      return getStoreSize() * 8;
+    }
+
     static MVT getFloatingPointVT(unsigned BitWidth) {
       switch (BitWidth) {
       default:
@@ -288,7 +307,7 @@ namespace llvm {
         return MVT::f128;
       }
     }
-    
+
     static MVT getIntegerVT(unsigned BitWidth) {
       switch (BitWidth) {
       default:
@@ -307,7 +326,7 @@ namespace llvm {
         return MVT::i128;
       }
     }
-    
+
     static MVT getVectorVT(MVT VT, unsigned NumElements) {
       switch (VT.SimpleTy) {
       default:
@@ -350,7 +369,11 @@ namespace llvm {
     }
   };
 
-  struct EVT { // EVT = Extended Value Type
+
+  /// EVT - Extended Value Type.  Capable of holding value types which are not
+  /// native for any processor (such as the i12345 type), as well as the types
+  /// a MVT can represent.
+  struct EVT {
   private:
     MVT V;
     const Type *LLVMTy;
@@ -527,7 +550,7 @@ namespace llvm {
     EVT getScalarType() const {
       return isVector() ? getVectorElementType() : *this;
     }
-    
+
     /// getVectorElementType - Given a vector type, return the type of
     /// each element.
     EVT getVectorElementType() const {
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 8151c0be3664..a1163f7a2f98 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -46,17 +46,18 @@ def v4i32  : ValueType<128, 22>;   //  4 x i32 vector value
 def v8i32  : ValueType<256, 23>;   //  8 x i32 vector value
 def v1i64  : ValueType<64 , 24>;   //  1 x i64 vector value
 def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 26>;   //  4 x f64 vector value
-def v8i64  : ValueType<512, 27>;   //  4 x f64 vector value
+def v4i64  : ValueType<256, 26>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 27>;   //  8 x i64 vector value
 
-def v2f32  : ValueType<64,  28>;   //  2 x f32 vector value
+def v2f32  : ValueType<64 , 28>;   //  2 x f32 vector value
 def v4f32  : ValueType<128, 29>;   //  4 x f32 vector value
 def v8f32  : ValueType<256, 30>;   //  8 x f32 vector value
 def v2f64  : ValueType<128, 31>;   //  2 x f64 vector value
 def v4f64  : ValueType<256, 32>;   //  4 x f64 vector value
 
-def FlagVT : ValueType<0  , 33>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 34>;   // Produces no value
+def x86mmx : ValueType<64 , 33>;   // X86 MMX value
+def FlagVT : ValueType<0  , 34>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 35>;   // Produces no value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 
diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h
index 619c904f15d6..e1eea325e348 100644
--- a/include/llvm/CompilerDriver/CompilationGraph.h
+++ b/include/llvm/CompilerDriver/CompilationGraph.h
@@ -21,7 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 
 #include <cassert>
 #include <string>
diff --git a/include/llvm/CompilerDriver/Tool.h b/include/llvm/CompilerDriver/Tool.h
index 45ef50d0b5af..d0926ba98312 100644
--- a/include/llvm/CompilerDriver/Tool.h
+++ b/include/llvm/CompilerDriver/Tool.h
@@ -18,7 +18,7 @@
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 
 #include <string>
 #include <vector>
@@ -58,7 +58,7 @@ namespace llvmc {
 
     virtual const char*  Name() const = 0;
     virtual const char** InputLanguages() const = 0;
-    virtual const char*  OutputLanguage() const = 0;
+    virtual const char** OutputLanguages() const = 0;
 
     virtual bool IsJoin() const = 0;
     virtual bool WorksOnEmpty() const = 0;
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index e8feabffdaf7..bf69375ff562 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -1,4 +1,3 @@
-
 /**************************************
 ** Created by Kevin from config.h.in **
 ***************************************/
@@ -6,50 +5,35 @@
 #ifndef CONFIG_H
 #define CONFIG_H
 
-/* Define if dlopen(0) will open the symbols of the program */
-#undef CAN_DLOPEN_SELF
-
 /* Define if CBE is enabled for printf %a output */
-#undef ENABLE_CBE_PRINTF_A
-
-/* Directories clang will search for headers */
-#define C_INCLUDE_DIRS "${C_INCLUDE_DIRS}"
-
-/* Directory clang will search for libstdc++ headers */
-#define CXX_INCLUDE_ROOT "${CXX_INCLUDE_ROOT}"
-
-/* Architecture of libstdc++ headers */
-#define CXX_INCLUDE_ARCH "${CXX_INCLUDE_ARCH}"
-
-/* 32 bit multilib directory */
-#define CXX_INCLUDE_32BIT_DIR "${CXX_INCLUDE_32BIT_DIR}"
-
-/* 64 bit multilib directory */
-#define CXX_INCLUDE_64BIT_DIR "${CXX_INCLUDE_64BIT_DIR}"
+#cmakedefine ENABLE_CBE_PRINTF_A ${ENABLE_CBE_PRINTF_A}
 
 /* Define if position independent code is enabled */
-#cmakedefine ENABLE_PIC ${ENABLE_PIC}
+#cmakedefine ENABLE_PIC
 
 /* Define if threads enabled */
 #cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
 
+/* Define if timestamp information (e.g., __DATE___) is allowed */
+#cmakedefine ENABLE_TIMESTAMPS ${ENABLE_TIMESTAMPS}
+
 /* Define to 1 if you have the `argz_append' function. */
-#undef HAVE_ARGZ_APPEND
+#cmakedefine HAVE_ARGZ_APPEND ${HAVE_ARGZ_APPEND}
 
 /* Define to 1 if you have the `argz_create_sep' function. */
-#undef HAVE_ARGZ_CREATE_SEP
+#cmakedefine HAVE_ARGZ_CREATE_SEP ${HAVE_ARGZ_CREATE_SEP}
 
 /* Define to 1 if you have the <argz.h> header file. */
 #cmakedefine HAVE_ARGZ_H ${HAVE_ARGZ_H}
 
 /* Define to 1 if you have the `argz_insert' function. */
-#undef HAVE_ARGZ_INSERT
+#cmakedefine HAVE_ARGZ_INSERT ${HAVE_ARGZ_INSERT}
 
 /* Define to 1 if you have the `argz_next' function. */
-#undef HAVE_ARGZ_NEXT
+#cmakedefine HAVE_ARGZ_NEXT ${HAVE_ARGZ_NEXT}
 
 /* Define to 1 if you have the `argz_stringify' function. */
-#undef HAVE_ARGZ_STRINGIFY
+#cmakedefine HAVE_ARGZ_STRINGIFY ${HAVE_ARGZ_STRINGIFY}
 
 /* Define to 1 if you have the <assert.h> header file. */
 #cmakedefine HAVE_ASSERT_H ${HAVE_ASSERT_H}
@@ -60,9 +44,6 @@
 /* Define to 1 if you have the `bcopy' function. */
 #undef HAVE_BCOPY
 
-/* Does not have bi-directional iterator */
-#undef HAVE_BI_ITERATOR
-
 /* Define to 1 if you have the `ceilf' function. */
 #cmakedefine HAVE_CEILF ${HAVE_CEILF}
 
@@ -70,10 +51,20 @@
 #cmakedefine HAVE_CIRCO ${HAVE_CIRCO}
 
 /* Define to 1 if you have the `closedir' function. */
-#undef HAVE_CLOSEDIR
+#cmakedefine HAVE_CLOSEDIR ${HAVE_CLOSEDIR}
+
+/* Define to 1 if you have the <CrashReporterClient.h> header file. */
+#undef HAVE_CRASHREPORTERCLIENT_H
+
+/* Define if __crashreporter_info__ exists. */
+#undef HAVE_CRASHREPORTER_INFO
 
 /* Define to 1 if you have the <ctype.h> header file. */
-#undef HAVE_CTYPE_H
+#cmakedefine HAVE_CTYPE_H ${HAVE_CTYPE_H}
+
+/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
+   don't. */
+#cmakedefine01 HAVE_DECL_STRERROR_S
 
 /* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
    */
@@ -86,13 +77,13 @@
 #cmakedefine HAVE_DLD_H ${HAVE_DLD_H}
 
 /* Define to 1 if you have the `dlerror' function. */
-#undef HAVE_DLERROR
+#cmakedefine HAVE_DLERROR ${HAVE_DLERROR}
 
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #cmakedefine HAVE_DLFCN_H ${HAVE_DLFCN_H}
 
 /* Define if dlopen() is available on this platform. */
-#undef HAVE_DLOPEN
+#cmakedefine HAVE_DLOPEN ${HAVE_DLOPEN}
 
 /* Define to 1 if you have the <dl.h> header file. */
 #cmakedefine HAVE_DL_H ${HAVE_DL_H}
@@ -110,7 +101,7 @@
 #cmakedefine HAVE_ERRNO_H ${HAVE_ERRNO_H}
 
 /* Define to 1 if the system has the type `error_t'. */
-#undef HAVE_ERROR_T
+#cmakedefine HAVE_ERROR_T ${HAVE_ERROR_T}
 
 /* Define to 1 if you have the <execinfo.h> header file. */
 #cmakedefine HAVE_EXECINFO_H ${HAVE_EXECINFO_H}
@@ -121,41 +112,41 @@
 /* Define if the neat program is available */
 #cmakedefine HAVE_FDP ${HAVE_FDP}
 
+/* Define to 1 if you have the <fenv.h> header file. */
+#cmakedefine HAVE_FENV_H ${HAVE_FENV_H}
+
+/* Define if libffi is available on this platform. */
+#cmakedefine HAVE_FFI_CALL ${HAVE_FFI_CALL}
+
+/* Define to 1 if you have the <ffi/ffi.h> header file. */
+#cmakedefine HAVE_FFI_FFI_H ${HAVE_FFI_FFI_H}
+
+/* Define to 1 if you have the <ffi.h> header file. */
+#cmakedefine HAVE_FFI_H ${HAVE_FFI_H}
+
 /* Set to 1 if the finite function is found in <ieeefp.h> */
 #cmakedefine HAVE_FINITE_IN_IEEEFP_H ${HAVE_FINITE_IN_IEEEFP_H}
 
 /* Define to 1 if you have the `floorf' function. */
 #cmakedefine HAVE_FLOORF ${HAVE_FLOORF}
 
-/* Does not have forward iterator */
-#undef HAVE_FWD_ITERATOR
+/* Define to 1 if you have the `fmodf' function. */
+#cmakedefine HAVE_FMODF ${HAVE_FMODF}
 
 /* Define to 1 if you have the `getcwd' function. */
-#undef HAVE_GETCWD
+#cmakedefine HAVE_GETCWD ${HAVE_GETCWD}
 
 /* Define to 1 if you have the `getpagesize' function. */
 #cmakedefine HAVE_GETPAGESIZE ${HAVE_GETPAGESIZE}
 
 /* Define to 1 if you have the `getrlimit' function. */
-#undef HAVE_GETRLIMIT
+#cmakedefine HAVE_GETRLIMIT ${HAVE_GETRLIMIT}
 
 /* Define to 1 if you have the `getrusage' function. */
 #cmakedefine HAVE_GETRUSAGE ${HAVE_GETRUSAGE}
 
 /* Define to 1 if you have the `gettimeofday' function. */
-#undef HAVE_GETTIMEOFDAY
-
-/* Does not have <hash_map> */
-#undef HAVE_GLOBAL_HASH_MAP
-
-/* Does not have hash_set in global namespace */
-#undef HAVE_GLOBAL_HASH_SET
-
-/* Does not have ext/hash_map */
-#undef HAVE_GNU_EXT_HASH_MAP
-
-/* Does not have hash_set in gnu namespace */
-#undef HAVE_GNU_EXT_HASH_SET
+#cmakedefine HAVE_GETTIMEOFDAY ${HAVE_GETTIMEOFDAY}
 
 /* Define if the Graphviz program is available */
 #undef HAVE_GRAPHVIZ
@@ -164,10 +155,10 @@
 #cmakedefine HAVE_GV ${HAVE_GV}
 
 /* Define to 1 if you have the `index' function. */
-#undef HAVE_INDEX
+#cmakedefine HAVE_INDEX ${HAVE_INDEX}
 
 /* Define to 1 if the system has the type `int64_t'. */
-#undef HAVE_INT64_T
+#cmakedefine HAVE_INT64_T ${HAVE_INT64_T}
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
@@ -188,7 +179,7 @@
 #cmakedefine HAVE_ISNAN_IN_MATH_H ${HAVE_ISNAN_IN_MATH_H}
 
 /* Define if you have the libdl library or equivalent. */
-#undef HAVE_LIBDL
+#cmakedefine HAVE_LIBDL ${HAVE_LIBDL}
 
 /* Define to 1 if you have the `imagehlp' library (-limagehlp). */
 #cmakedefine HAVE_LIBIMAGEHLP ${HAVE_LIBIMAGEHLP}
@@ -208,6 +199,9 @@
 /* Define to 1 if you have the <limits.h> header file. */
 #cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H}
 
+/* Define if you can use -Wl,-export-dynamic. */
+#define HAVE_LINK_EXPORT_DYNAMIC 1
+
 /* Define to 1 if you have the <link.h> header file. */
 #cmakedefine HAVE_LINK_H ${HAVE_LINK_H}
 
@@ -216,13 +210,13 @@
 #undef HAVE_LINK_R
 
 /* Define to 1 if you have the `longjmp' function. */
-#undef HAVE_LONGJMP
+#cmakedefine HAVE_LONGJMP ${HAVE_LONGJMP}
 
 /* Define to 1 if you have the <mach/mach.h> header file. */
-#undef HAVE_MACH_MACH_H
+#cmakedefine HAVE_MACH_MACH_H ${HAVE_MACH_MACH_H}
 
 /* Define to 1 if you have the <mach-o/dyld.h> header file. */
-#undef HAVE_MACH_O_DYLD_H
+#cmakedefine HAVE_MACH_O_DYLD_H ${HAVE_MACH_O_DYLD_H}
 
 /* Define if mallinfo() is available on this platform. */
 #cmakedefine HAVE_MALLINFO ${HAVE_MALLINFO}
@@ -237,10 +231,10 @@
 #cmakedefine HAVE_MALLOC_ZONE_STATISTICS ${HAVE_MALLOC_ZONE_STATISTICS}
 
 /* Define to 1 if you have the `memcpy' function. */
-#undef HAVE_MEMCPY
+#cmakedefine HAVE_MEMCPY ${HAVE_MEMCPY}
 
 /* Define to 1 if you have the `memmove' function. */
-#undef HAVE_MEMMOVE
+#cmakedefine HAVE_MEMMOVE ${HAVE_MEMMOVE}
 
 /* Define to 1 if you have the <memory.h> header file. */
 #cmakedefine HAVE_MEMORY_H ${HAVE_MEMORY_H}
@@ -264,9 +258,6 @@
 /* Define if mmap() can map files into memory */
 #undef HAVE_MMAP_FILE
 
-/* define if the compiler implements namespaces */
-#undef HAVE_NAMESPACES
-
 /* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
 #cmakedefine HAVE_NDIR_H ${HAVE_NDIR_H}
 
@@ -277,7 +268,13 @@
 #cmakedefine HAVE_NEATO ${HAVE_NEATO}
 
 /* Define to 1 if you have the `opendir' function. */
-#undef HAVE_OPENDIR
+#cmakedefine HAVE_OPENDIR ${HAVE_OPENDIR}
+
+/* Define to 1 if you have the `posix_spawn' function. */
+#cmakedefine HAVE_POSIX_SPAWN ${HAVE_POSIX_SPAWN}
+
+/* Define to 1 if you have the `powf' function. */
+#cmakedefine HAVE_POWF ${HAVE_POWF}
 
 /* Define if libtool can extract symbol lists from object files. */
 #undef HAVE_PRELOADED_SYMBOLS
@@ -285,7 +282,10 @@
 /* Define to have the %a format string */
 #undef HAVE_PRINTF_A
 
-/* Have pthread.h */
+/* Have pthread_getspecific */
+#cmakedefine HAVE_PTHREAD_GETSPECIFIC ${HAVE_PTHREAD_GETSPECIFIC}
+
+/* Define to 1 if you have the <pthread.h> header file. */
 #cmakedefine HAVE_PTHREAD_H ${HAVE_PTHREAD_H}
 
 /* Have pthread_mutex_lock */
@@ -294,30 +294,27 @@
 /* Have pthread_rwlock_init */
 #cmakedefine HAVE_PTHREAD_RWLOCK_INIT ${HAVE_PTHREAD_RWLOCK_INIT}
 
-/* Have pthread_getspecific */
-#cmakedefine HAVE_PTHREAD_GETSPECIFIC ${HAVE_PTHREAD_GETSPECIFIC}
-
 /* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
-#undef HAVE_RAND48
+#cmakedefine HAVE_RAND48 ${HAVE_RAND48}
 
 /* Define to 1 if you have the `readdir' function. */
-#undef HAVE_READDIR
+#cmakedefine HAVE_READDIR ${HAVE_READDIR}
 
 /* Define to 1 if you have the `realpath' function. */
 #undef HAVE_REALPATH
 
 /* Define to 1 if you have the `rindex' function. */
-#undef HAVE_RINDEX
+#cmakedefine HAVE_RINDEX ${HAVE_RINDEX}
 
 /* Define to 1 if you have the `rintf' function. */
 #undef HAVE_RINTF
 
-/* Define to 1 if you have the `roundf' function. */
-#undef HAVE_ROUNDF
-
 /* Define to 1 if you have the `round' function. */
 #cmakedefine HAVE_ROUND ${HAVE_ROUND}
 
+/* Define to 1 if you have the `roundf' function. */
+#undef HAVE_ROUNDF
+
 /* Define to 1 if you have the `sbrk' function. */
 #cmakedefine HAVE_SBRK ${HAVE_SBRK}
 
@@ -325,7 +322,7 @@
 #cmakedefine HAVE_SETENV ${HAVE_SETENV}
 
 /* Define to 1 if you have the `setjmp' function. */
-#undef HAVE_SETJMP
+#cmakedefine HAVE_SETJMP ${HAVE_SETJMP}
 
 /* Define to 1 if you have the <setjmp.h> header file. */
 #cmakedefine HAVE_SETJMP_H ${HAVE_SETJMP_H}
@@ -337,13 +334,13 @@
 #undef HAVE_SHL_LOAD
 
 /* Define to 1 if you have the `siglongjmp' function. */
-#undef HAVE_SIGLONGJMP
+#cmakedefine HAVE_SIGLONGJMP ${HAVE_SIGLONGJMP}
 
 /* Define to 1 if you have the <signal.h> header file. */
 #cmakedefine HAVE_SIGNAL_H ${HAVE_SIGNAL_H}
 
 /* Define to 1 if you have the `sigsetjmp' function. */
-#undef HAVE_SIGSETJMP
+#cmakedefine HAVE_SIGSETJMP ${HAVE_SIGSETJMP}
 
 /* Define to 1 if you have the <stdint.h> header file. */
 #cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
@@ -354,29 +351,20 @@
 /* Define to 1 if you have the <stdlib.h> header file. */
 #cmakedefine HAVE_STDLIB_H ${HAVE_STDLIB_H}
 
-/* Does not have ext/hash_map> */
-#undef HAVE_STD_EXT_HASH_MAP
-
-/* Does not have hash_set in std namespace */
-#undef HAVE_STD_EXT_HASH_SET
-
 /* Set to 1 if the std::isinf function is found in <cmath> */
 #undef HAVE_STD_ISINF_IN_CMATH
 
 /* Set to 1 if the std::isnan function is found in <cmath> */
 #undef HAVE_STD_ISNAN_IN_CMATH
 
-/* Does not have std namespace iterator */
-#undef HAVE_STD_ITERATOR
-
 /* Define to 1 if you have the `strchr' function. */
-#undef HAVE_STRCHR
+#cmakedefine HAVE_STRCHR ${HAVE_STRCHR}
 
 /* Define to 1 if you have the `strcmp' function. */
-#undef HAVE_STRCMP
+#cmakedefine HAVE_STRCMP ${HAVE_STRCMP}
 
 /* Define to 1 if you have the `strdup' function. */
-#undef HAVE_STRDUP
+#cmakedefine HAVE_STRDUP ${HAVE_STRDUP}
 
 /* Define to 1 if you have the `strerror' function. */
 #cmakedefine HAVE_STRERROR ${HAVE_STRERROR}
@@ -384,23 +372,23 @@
 /* Define to 1 if you have the `strerror_r' function. */
 #cmakedefine HAVE_STRERROR_R ${HAVE_STRERROR_R}
 
-/* Define to 1 if you have the `strerror_s' function. */
-#cmakedefine HAVE_STRERROR_S ${HAVE_STRERROR_S}
-
 /* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
+#cmakedefine HAVE_STRINGS_H ${HAVE_STRINGS_H}
 
 /* Define to 1 if you have the <string.h> header file. */
 #cmakedefine HAVE_STRING_H ${HAVE_STRING_H}
 
 /* Define to 1 if you have the `strrchr' function. */
-#undef HAVE_STRRCHR
+#cmakedefine HAVE_STRRCHR ${HAVE_STRRCHR}
+
+/* Define to 1 if you have the `strtof' function. */
+#cmakedefine HAVE_STRTOF ${HAVE_STRTOF}
 
 /* Define to 1 if you have the `strtoll' function. */
 #cmakedefine HAVE_STRTOLL ${HAVE_STRTOLL}
 
 /* Define to 1 if you have the `strtoq' function. */
-#undef HAVE_STRTOQ
+#cmakedefine HAVE_STRTOQ ${HAVE_STRTOQ}
 
 /* Define to 1 if you have the `sysconf' function. */
 #undef HAVE_SYSCONF
@@ -437,15 +425,12 @@
 /* Define to 1 if you have the <sys/types.h> header file. */
 #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
 
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#cmakedefine HAVE_SYS_UIO_H ${HAVE_SYS_UIO_H}
+
 /* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
 #cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H}
 
-/* Define if the neat program is available */
-#cmakedefine HAVE_TWOPI ${HAVE_TWOPI}
-
-/* Define to 1 if the system has the type `uint64_t'. */
-#undef HAVE_UINT64_T
-
 /* Define to 1 if you have the <termios.h> header file. */
 #cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H}
 
@@ -456,7 +441,7 @@
 #cmakedefine HAVE_UTIME_H ${HAVE_UTIME_H}
 
 /* Define to 1 if the system has the type `u_int64_t'. */
-#undef HAVE_U_INT64_T
+#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
 
 /* Define to 1 if you have the <valgrind/valgrind.h> header file. */
 #cmakedefine HAVE_VALGRIND_VALGRIND_H ${HAVE_VALGRIND_VALGRIND_H}
@@ -464,6 +449,66 @@
 /* Define to 1 if you have the <windows.h> header file. */
 #cmakedefine HAVE_WINDOWS_H ${HAVE_WINDOWS_H}
 
+/* Define to 1 if you have the `writev' function. */
+#cmakedefine HAVE_WRITEV ${HAVE_WRITEV}
+
+/* Define if the xdot.py program is available */
+#undef HAVE_XDOT_PY
+
+/* Have host's _alloca */
+#cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA}
+
+/* Have host's __alloca */
+#cmakedefine HAVE___ALLOCA ${HAVE___ALLOCA}
+
+/* Have host's __ashldi3 */
+#cmakedefine HAVE___ASHLDI3 ${HAVE___ASHLDI3}
+
+/* Have host's __ashrdi3 */
+#cmakedefine HAVE___ASHRDI3 ${HAVE___ASHRDI3}
+
+/* Have host's __chkstk */
+#cmakedefine HAVE___CHKSTK ${HAVE___CHKSTK}
+
+/* Have host's __cmpdi2 */
+#cmakedefine HAVE___CMPDI2 ${HAVE___CMPDI2}
+
+/* Have host's __divdi3 */
+#cmakedefine HAVE___DIVDI3 ${HAVE___DIVDI3}
+
+/* Define to 1 if you have the `__dso_handle' function. */
+#undef HAVE___DSO_HANDLE
+
+/* Have host's __fixdfdi */
+#cmakedefine HAVE___FIXDFDI ${HAVE___FIXDFDI}
+
+/* Have host's __fixsfdi */
+#cmakedefine HAVE___FIXSFDI ${HAVE___FIXSFDI}
+
+/* Have host's __floatdidf */
+#cmakedefine HAVE___FLOATDIDF ${HAVE___FLOATDIDF}
+
+/* Have host's __lshrdi3 */
+#cmakedefine HAVE___LSHRDI3 ${HAVE___LSHRDI3}
+
+/* Have host's __main */
+#cmakedefine HAVE___MAIN ${HAVE___MAIN}
+
+/* Have host's __moddi3 */
+#cmakedefine HAVE___MODDI3 ${HAVE___MODDI3}
+
+/* Have host's __udivdi3 */
+#cmakedefine HAVE___UDIVDI3 ${HAVE___UDIVDI3}
+
+/* Have host's __umoddi3 */
+#cmakedefine HAVE___UMODDI3 ${HAVE___UMODDI3}
+
+/* Have host's ___chkstk */
+#cmakedefine HAVE____CHKSTK ${HAVE____CHKSTK}
+
+/* Linker version detected at compile time. */
+#undef HOST_LINK_VERSION
+
 /* Installation directory for binary executables */
 #undef LLVM_BINDIR
 
@@ -473,6 +518,9 @@
 /* Installation directory for documentation */
 #undef LLVM_DATADIR
 
+/* Installation directory for documentation */
+#undef LLVM_DOCSDIR
+
 /* Installation directory for config files */
 #undef LLVM_ETCDIR
 
@@ -494,15 +542,24 @@
 /* Build multithreading support into LLVM */
 #cmakedefine LLVM_MULTITHREADED ${LLVM_MULTITHREADED}
 
+/* LLVM architecture name for the native architecture, if available */
+#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
+
+/* LLVM name for the native Target init function, if available */
+#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
+
+/* LLVM name for the native TargetInfo init function, if available */
+#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
+
 /* Define if this is Unixish platform */
 #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
 
 /* Define if this is Win32ish platform */
 #cmakedefine LLVM_ON_WIN32 ${LLVM_ON_WIN32}
 
-/* Added by Kevin -- Maximum path length */
-#cmakedefine MAXPATHLEN ${MAXPATHLEN}
-
 /* Define to path to circo program if found or 'echo circo' otherwise */
 #cmakedefine LLVM_PATH_CIRCO "${LLVM_PATH_CIRCO}"
 
@@ -527,6 +584,9 @@
 /* Define to path to twopi program if found or 'echo twopi' otherwise */
 #cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
 
+/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
+#undef LLVM_PATH_XDOT_PY
+
 /* Installation prefix directory */
 #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
 
@@ -572,6 +632,9 @@
 /* Define as the return type of signal handlers (`int' or `void'). */
 #cmakedefine RETSIGTYPE ${RETSIGTYPE}
 
+/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
+#undef STAT_MACROS_BROKEN
+
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
    automatically deduced at runtime.
@@ -592,28 +655,37 @@
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
+/* Define if we have the oprofile JIT-support library */
+#undef USE_OPROFILE
+
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
-/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
-   `char[]'. */
-#undef YYTEXT_POINTER
-
 /* Define to empty if `const' does not conform to ANSI C. */
 #undef const
 
 /* Define to a type to use for `error_t' if it is not otherwise available. */
 #cmakedefine error_t ${error_t}
 
-/* Define to a type to use for `mode_t' if it is not otherwise available. */
-#cmakedefine mode_t ${mode_t}
-
 /* Define to `int' if <sys/types.h> does not define. */
 #undef pid_t
 
 /* Define to `unsigned int' if <sys/types.h> does not define. */
 #undef size_t
 
+/* Define if the neat program is available */
+#cmakedefine HAVE_TWOPI ${HAVE_TWOPI}
+
+/* Define to 1 if the system has the type `uint64_t'. */
+#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
+
+/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
+   `char[]'. */
+#undef YYTEXT_POINTER
+
+/* Define to a type to use for `mode_t' if it is not otherwise available. */
+#cmakedefine mode_t ${mode_t}
+
 /* Define to a function replacing strtoll */
 #cmakedefine strtoll ${strtoll}
 
@@ -626,16 +698,40 @@
 /* Define to a function implementing strdup */
 #cmakedefine strdup ${strdup}
 
-/* LLVM architecture name for the native architecture, if available */
-#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
-  
-/* LLVM name for the native Target init function, if available */
-#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
- 
-/* LLVM name for the native TargetInfo init function, if available */
-#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
- 
-/* LLVM name for the native AsmPrinter init function, if available */
-#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
+/* Define to 1 if you have the `_chsize_s' function. */
+#cmakedefine HAVE__CHSIZE_S ${HAVE__CHSIZE_S}
+
+/* define if the compiler implements namespaces */
+#undef HAVE_NAMESPACES
+
+/* Does not have std namespace iterator */
+#undef HAVE_STD_ITERATOR
+
+/* Does not have forward iterator */
+#undef HAVE_FWD_ITERATOR
+
+/* Does not have bi-directional iterator */
+#undef HAVE_BI_ITERATOR
+
+/* Does not have <hash_map> */
+#undef HAVE_GLOBAL_HASH_MAP
+
+/* Does not have hash_set in global namespace */
+#undef HAVE_GLOBAL_HASH_SET
+
+/* Does not have ext/hash_map */
+#undef HAVE_GNU_EXT_HASH_MAP
+
+/* Does not have hash_set in gnu namespace */
+#undef HAVE_GNU_EXT_HASH_SET
+
+/* Does not have ext/hash_map> */
+#undef HAVE_STD_EXT_HASH_MAP
+
+/* Does not have hash_set in std namespace */
+#undef HAVE_STD_EXT_HASH_SET
+
+/* Added by Kevin -- Maximum path length */
+#cmakedefine MAXPATHLEN ${MAXPATHLEN}
 
 #endif
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index d62da1ab0377..14c44b4b1e20 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -3,6 +3,9 @@
 #ifndef CONFIG_H
 #define CONFIG_H
 
+/* Relative directory for resource files */
+#undef CLANG_RESOURCE_DIR
+
 /* 32 bit multilib directory. */
 #undef CXX_INCLUDE_32BIT_DIR
 
@@ -69,9 +72,16 @@
 /* Define to 1 if you have the <CrashReporterClient.h> header file. */
 #undef HAVE_CRASHREPORTERCLIENT_H
 
+/* Define if __crashreporter_info__ exists. */
+#undef HAVE_CRASHREPORTER_INFO
+
 /* Define to 1 if you have the <ctype.h> header file. */
 #undef HAVE_CTYPE_H
 
+/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
+   don't. */
+#undef HAVE_DECL_STRERROR_S
+
 /* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
    */
 #undef HAVE_DIRENT_H
@@ -118,6 +128,9 @@
 /* Define if the neat program is available */
 #undef HAVE_FDP
 
+/* Define to 1 if you have the <fenv.h> header file. */
+#undef HAVE_FENV_H
+
 /* Define if libffi is available on this platform. */
 #undef HAVE_FFI_CALL
 
@@ -375,9 +388,6 @@
 /* Define to 1 if you have the `strerror_r' function. */
 #undef HAVE_STRERROR_R
 
-/* Define to 1 if you have the `strerror_s' function. */
-#undef HAVE_STRERROR_S
-
 /* Define to 1 if you have the <strings.h> header file. */
 #undef HAVE_STRINGS_H
 
@@ -431,6 +441,9 @@
 /* Define to 1 if you have the <sys/types.h> header file. */
 #undef HAVE_SYS_TYPES_H
 
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#undef HAVE_SYS_UIO_H
+
 /* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
 #undef HAVE_SYS_WAIT_H
 
@@ -458,9 +471,63 @@
 /* Define to 1 if you have the <windows.h> header file. */
 #undef HAVE_WINDOWS_H
 
+/* Define to 1 if you have the `writev' function. */
+#undef HAVE_WRITEV
+
+/* Define if the xdot.py program is available */
+#undef HAVE_XDOT_PY
+
+/* Have host's _alloca */
+#undef HAVE__ALLOCA
+
+/* Have host's __alloca */
+#undef HAVE___ALLOCA
+
+/* Have host's __ashldi3 */
+#undef HAVE___ASHLDI3
+
+/* Have host's __ashrdi3 */
+#undef HAVE___ASHRDI3
+
+/* Have host's __chkstk */
+#undef HAVE___CHKSTK
+
+/* Have host's __cmpdi2 */
+#undef HAVE___CMPDI2
+
+/* Have host's __divdi3 */
+#undef HAVE___DIVDI3
+
 /* Define to 1 if you have the `__dso_handle' function. */
 #undef HAVE___DSO_HANDLE
 
+/* Have host's __fixdfdi */
+#undef HAVE___FIXDFDI
+
+/* Have host's __fixsfdi */
+#undef HAVE___FIXSFDI
+
+/* Have host's __floatdidf */
+#undef HAVE___FLOATDIDF
+
+/* Have host's __lshrdi3 */
+#undef HAVE___LSHRDI3
+
+/* Have host's __main */
+#undef HAVE___MAIN
+
+/* Have host's __moddi3 */
+#undef HAVE___MODDI3
+
+/* Have host's __udivdi3 */
+#undef HAVE___UDIVDI3
+
+/* Have host's __umoddi3 */
+#undef HAVE___UMODDI3
+
+/* Have host's ___chkstk */
+#undef HAVE____CHKSTK
+
 /* Linker version detected at compile time. */
 #undef HOST_LINK_VERSION
 
@@ -539,6 +606,9 @@
 /* Define to path to twopi program if found or 'echo twopi' otherwise */
 #undef LLVM_PATH_TWOPI
 
+/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
+#undef LLVM_PATH_XDOT_PY
+
 /* Installation prefix directory */
 #undef LLVM_PREFIX
 
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 8469bcc60674..a679b956b373 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -47,7 +47,7 @@
 #cmakedefine LLVM_MANDIR "${LLVM_MANDIR}"
 
 /* Build multithreading support into LLVM */
-#cmakedefine LLVM_MULTITHREADED
+#cmakedefine LLVM_MULTITHREADED ${LLVM_MULTITHREADED}
 
 /* LLVM architecture name for the native architecture, if available */
 #cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
@@ -62,10 +62,10 @@
 #cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
 
 /* Define if this is Unixish platform */
-#cmakedefine LLVM_ON_UNIX
+#cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
 
 /* Define if this is Win32ish platform */
-#cmakedefine LLVM_ON_WIN32
+#cmakedefine LLVM_ON_WIN32 ${LLVM_ON_WIN32}
 
 /* Define to path to circo program if found or 'echo circo' otherwise */
 #cmakedefine LLVM_PATH_CIRCO "${LLVM_PATH_CIRCO}"
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index 864729929982..38045fc0c1d6 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -20,7 +20,6 @@ namespace llvm {
   class APInt;
 
   template<typename T> class SmallVectorImpl;
-  class LLVMContext;
 
 /// This is an important base class in LLVM. It provides the common facilities
 /// of all constant values in an LLVM program. A constant is a value that is
@@ -142,16 +141,22 @@ public:
     assert(0 && "Constants that do not have operands cannot be using 'From'!");
   }
   
-  static Constant* getNullValue(const Type* Ty);
+  static Constant *getNullValue(const Type* Ty);
   
   /// @returns the value for an integer constant of the given type that has all
   /// its bits set to true.
   /// @brief Get the all ones value
-  static Constant* getAllOnesValue(const Type* Ty);
+  static Constant *getAllOnesValue(const Type* Ty);
 
   /// getIntegerValue - Return the value for an integer or pointer constant,
   /// or a vector thereof, with the given scalar value.
-  static Constant* getIntegerValue(const Type* Ty, const APInt &V);
+  static Constant *getIntegerValue(const Type* Ty, const APInt &V);
+  
+  /// removeDeadConstantUsers - If there are any dead constant users dangling
+  /// off of this constant, remove them.  This method is useful for clients
+  /// that want to check to see if a global is unused, but don't want to deal
+  /// with potentially dead constants hanging off of the globals.
+  void removeDeadConstantUsers() const;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index a7deae0451bc..c4768f842345 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -25,8 +25,7 @@
 #include "llvm/OperandTraits.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/SmallVector.h"
-#include <vector>
+#include "llvm/ADT/ArrayRef.h"
 
 namespace llvm {
 
@@ -265,8 +264,8 @@ public:
   inline const APFloat& getValueAPF() const { return Val; }
 
   /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.  Don't depend on == for doubles to tell us it's zero, it
-  /// considers -0.0 to be null as well as 0.0.  :(
+  /// getNullValue.  For ConstantFP, this is +0.0, but not -0.0.  To handle the
+  /// two the same, use isZero().
   virtual bool isNullValue() const;
   
   /// isNegativeZeroValue - Return true if the value is what would be returned 
@@ -404,7 +403,8 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantArray> : public VariadicOperandTraits<> {
+struct OperandTraits<ConstantArray> :
+  public VariadicOperandTraits<ConstantArray> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantArray, Constant)
@@ -453,7 +453,8 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantStruct> : public VariadicOperandTraits<> {
+struct OperandTraits<ConstantStruct> :
+  public VariadicOperandTraits<ConstantStruct> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantStruct, Constant)
@@ -470,9 +471,9 @@ protected:
   ConstantVector(const VectorType *T, const std::vector<Constant*> &Val);
 public:
   // ConstantVector accessors
+  static Constant *get(ArrayRef<Constant*> V);
+  // FIXME: Eliminate this constructor form.
   static Constant *get(const VectorType *T, const std::vector<Constant*> &V);
-  static Constant *get(const std::vector<Constant*> &V);
-  static Constant *get(Constant *const *Vals, unsigned NumVals);
   
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -497,7 +498,7 @@ public:
 
   /// getSplatValue - If this is a splat constant, meaning that all of the
   /// elements have the same value, return that value. Otherwise return NULL.
-  Constant *getSplatValue();
+  Constant *getSplatValue() const;
 
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
@@ -510,7 +511,8 @@ public:
 };
 
 template <>
-struct OperandTraits<ConstantVector> : public VariadicOperandTraits<> {
+struct OperandTraits<ConstantVector> :
+  public VariadicOperandTraits<ConstantVector> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantVector, Constant)
@@ -591,7 +593,8 @@ public:
 };
 
 template <>
-struct OperandTraits<BlockAddress> : public FixedNumOperandTraits<2> {
+struct OperandTraits<BlockAddress> :
+  public FixedNumOperandTraits<BlockAddress, 2> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(BlockAddress, Value)
@@ -624,11 +627,10 @@ protected:
                                 Constant *C2);
   static Constant *getSelectTy(const Type *Ty,
                                Constant *C1, Constant *C2, Constant *C3);
+  template<typename IndexTy>
   static Constant *getGetElementPtrTy(const Type *Ty, Constant *C,
-                                      Value* const *Idxs, unsigned NumIdxs);
-  static Constant *getInBoundsGetElementPtrTy(const Type *Ty, Constant *C,
-                                              Value* const *Idxs,
-                                              unsigned NumIdxs);
+                                      IndexTy const *Idxs, unsigned NumIdxs,
+                                      bool InBounds);
   static Constant *getExtractElementTy(const Type *Ty, Constant *Val,
                                        Constant *Idx);
   static Constant *getInsertElementTy(const Type *Ty, Constant *Val,
@@ -640,6 +642,10 @@ protected:
   static Constant *getInsertValueTy(const Type *Ty, Constant *Agg,
                                     Constant *Val,
                                     const unsigned *Idxs, unsigned NumIdxs);
+  template<typename IndexTy>
+  static Constant *getGetElementPtrImpl(Constant *C,
+                                        IndexTy const *IdxList,
+                                        unsigned NumIdx, bool InBounds);
 
 public:
   // Static methods to construct a ConstantExpr of different kinds.  Note that
@@ -649,35 +655,38 @@ public:
 
   /// getAlignOf constant expr - computes the alignment of a type in a target
   /// independent way (Note: the return type is an i64).
-  static Constant *getAlignOf(const Type* Ty);
+  static Constant *getAlignOf(const Type *Ty);
   
   /// getSizeOf constant expr - computes the (alloc) size of a type (in
   /// address-units, not bits) in a target independent way (Note: the return
   /// type is an i64).
   ///
-  static Constant *getSizeOf(const Type* Ty);
+  static Constant *getSizeOf(const Type *Ty);
 
   /// getOffsetOf constant expr - computes the offset of a struct field in a 
   /// target independent way (Note: the return type is an i64).
   ///
-  static Constant *getOffsetOf(const StructType* STy, unsigned FieldNo);
+  static Constant *getOffsetOf(const StructType *STy, unsigned FieldNo);
 
   /// getOffsetOf constant expr - This is a generalized form of getOffsetOf,
   /// which supports any aggregate type, and any Constant index.
   ///
-  static Constant *getOffsetOf(const Type* Ty, Constant *FieldNo);
+  static Constant *getOffsetOf(const Type *Ty, Constant *FieldNo);
   
-  static Constant *getNeg(Constant *C);
+  static Constant *getNeg(Constant *C, bool HasNUW = false, bool HasNSW =false);
   static Constant *getFNeg(Constant *C);
   static Constant *getNot(Constant *C);
-  static Constant *getAdd(Constant *C1, Constant *C2);
+  static Constant *getAdd(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
   static Constant *getFAdd(Constant *C1, Constant *C2);
-  static Constant *getSub(Constant *C1, Constant *C2);
+  static Constant *getSub(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
   static Constant *getFSub(Constant *C1, Constant *C2);
-  static Constant *getMul(Constant *C1, Constant *C2);
+  static Constant *getMul(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
   static Constant *getFMul(Constant *C1, Constant *C2);
-  static Constant *getUDiv(Constant *C1, Constant *C2);
-  static Constant *getSDiv(Constant *C1, Constant *C2);
+  static Constant *getUDiv(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getSDiv(Constant *C1, Constant *C2, bool isExact = false);
   static Constant *getFDiv(Constant *C1, Constant *C2);
   static Constant *getURem(Constant *C1, Constant *C2);
   static Constant *getSRem(Constant *C1, Constant *C2);
@@ -685,9 +694,10 @@ public:
   static Constant *getAnd(Constant *C1, Constant *C2);
   static Constant *getOr(Constant *C1, Constant *C2);
   static Constant *getXor(Constant *C1, Constant *C2);
-  static Constant *getShl(Constant *C1, Constant *C2);
-  static Constant *getLShr(Constant *C1, Constant *C2);
-  static Constant *getAShr(Constant *C1, Constant *C2);
+  static Constant *getShl(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getAShr(Constant *C1, Constant *C2, bool isExact = false);
   static Constant *getTrunc   (Constant *C, const Type *Ty);
   static Constant *getSExt    (Constant *C, const Type *Ty);
   static Constant *getZExt    (Constant *C, const Type *Ty);
@@ -701,15 +711,44 @@ public:
   static Constant *getIntToPtr(Constant *C, const Type *Ty);
   static Constant *getBitCast (Constant *C, const Type *Ty);
 
-  static Constant *getNSWNeg(Constant *C);
-  static Constant *getNUWNeg(Constant *C);
-  static Constant *getNSWAdd(Constant *C1, Constant *C2);
-  static Constant *getNUWAdd(Constant *C1, Constant *C2);
-  static Constant *getNSWSub(Constant *C1, Constant *C2);
-  static Constant *getNUWSub(Constant *C1, Constant *C2);
-  static Constant *getNSWMul(Constant *C1, Constant *C2);
-  static Constant *getNUWMul(Constant *C1, Constant *C2);
-  static Constant *getExactSDiv(Constant *C1, Constant *C2);
+  static Constant *getNSWNeg(Constant *C) { return getNeg(C, false, true); }
+  static Constant *getNUWNeg(Constant *C) { return getNeg(C, true, false); }
+  static Constant *getNSWAdd(Constant *C1, Constant *C2) {
+    return getAdd(C1, C2, false, true);
+  }
+  static Constant *getNUWAdd(Constant *C1, Constant *C2) {
+    return getAdd(C1, C2, true, false);
+  }
+  static Constant *getNSWSub(Constant *C1, Constant *C2) {
+    return getSub(C1, C2, false, true);
+  }
+  static Constant *getNUWSub(Constant *C1, Constant *C2) {
+    return getSub(C1, C2, true, false);
+  }
+  static Constant *getNSWMul(Constant *C1, Constant *C2) {
+    return getMul(C1, C2, false, true);
+  }
+  static Constant *getNUWMul(Constant *C1, Constant *C2) {
+    return getMul(C1, C2, true, false);
+  }
+  static Constant *getNSWShl(Constant *C1, Constant *C2) {
+    return getShl(C1, C2, false, true);
+  }
+  static Constant *getNUWShl(Constant *C1, Constant *C2) {
+    return getShl(C1, C2, true, false);
+  }
+  static Constant *getExactSDiv(Constant *C1, Constant *C2) {
+    return getSDiv(C1, C2, true);
+  }
+  static Constant *getExactUDiv(Constant *C1, Constant *C2) {
+    return getUDiv(C1, C2, true);
+  }
+  static Constant *getExactAShr(Constant *C1, Constant *C2) {
+    return getAShr(C1, C2, true);
+  }
+  static Constant *getExactLShr(Constant *C1, Constant *C2) {
+    return getLShr(C1, C2, true);
+  }
 
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -801,18 +840,24 @@ public:
   /// all elements must be Constant's.
   ///
   static Constant *getGetElementPtr(Constant *C,
-                                    Constant *const *IdxList, unsigned NumIdx);
+                                    Constant *const *IdxList, unsigned NumIdx,
+                                    bool InBounds = false);
   static Constant *getGetElementPtr(Constant *C,
-                                    Value* const *IdxList, unsigned NumIdx);
+                                    Value *const *IdxList, unsigned NumIdx,
+                                    bool InBounds = false);
 
   /// Create an "inbounds" getelementptr. See the documentation for the
   /// "inbounds" flag in LangRef.html for details.
   static Constant *getInBoundsGetElementPtr(Constant *C,
                                             Constant *const *IdxList,
-                                            unsigned NumIdx);
+                                            unsigned NumIdx) {
+    return getGetElementPtr(C, IdxList, NumIdx, true);
+  }
   static Constant *getInBoundsGetElementPtr(Constant *C,
                                             Value* const *IdxList,
-                                            unsigned NumIdx);
+                                            unsigned NumIdx) {
+    return getGetElementPtr(C, IdxList, NumIdx, true);
+  }
 
   static Constant *getExtractElement(Constant *Vec, Constant *Idx);
   static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx);
@@ -870,7 +915,8 @@ private:
 };
 
 template <>
-struct OperandTraits<ConstantExpr> : public VariadicOperandTraits<1> {
+struct OperandTraits<ConstantExpr> :
+  public VariadicOperandTraits<ConstantExpr, 1> {
 };
 
 DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantExpr, Constant)
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 9b6b19f15466..56d1e3e237d6 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -9,7 +9,7 @@
 //
 // This file contains the declarations of classes that represent "derived
 // types".  These are things like "arrays of x" or "structure of x, y, z" or
-// "method returning x taking (y,z) as parameters", etc...
+// "function returning x taking (y,z) as parameters", etc...
 //
 // The implementations of these classes live in the Type.cpp file.
 //
@@ -19,6 +19,7 @@
 #define LLVM_DERIVED_TYPES_H
 
 #include "llvm/Type.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 3287b39a3c95..71698fa00874 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -22,7 +22,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -41,6 +41,8 @@ class MutexGuard;
 class TargetData;
 class Type;
 
+/// \brief Helper class for helping synchronize access to the global address map
+/// table.
 class ExecutionEngineState {
 public:
   struct AddressMapConfig : public ValueMapConfig<const GlobalValue*> {
@@ -70,8 +72,7 @@ private:
 public:
   ExecutionEngineState(ExecutionEngine &EE);
 
-  GlobalAddressMapTy &
-  getGlobalAddressMap(const MutexGuard &) {
+  GlobalAddressMapTy &getGlobalAddressMap(const MutexGuard &) {
     return GlobalAddressMap;
   }
 
@@ -80,23 +81,41 @@ public:
     return GlobalAddressReverseMap;
   }
 
-  // Returns the address ToUnmap was mapped to.
+  /// \brief Erase an entry from the mapping table.
+  ///
+  /// \returns The address that \arg ToUnmap was happed to.
   void *RemoveMapping(const MutexGuard &, const GlobalValue *ToUnmap);
 };
 
-
+/// \brief Abstract interface for implementation execution of LLVM modules,
+/// designed to support both interpreter and just-in-time (JIT) compiler
+/// implementations.
 class ExecutionEngine {
-  const TargetData *TD;
+  /// The state object holding the global address mapping, which must be
+  /// accessed synchronously.
+  //
+  // FIXME: There is no particular need the entire map needs to be
+  // synchronized.  Wouldn't a reader-writer design be better here?
   ExecutionEngineState EEState;
+
+  /// The target data for the platform for which execution is being performed.
+  const TargetData *TD;
+
+  /// Whether lazy JIT compilation is enabled.
   bool CompilingLazily;
+
+  /// Whether JIT compilation of external global variables is allowed.
   bool GVCompilationDisabled;
+
+  /// Whether the JIT should perform lookups of external symbols (e.g.,
+  /// using dlsym).
   bool SymbolSearchingDisabled;
 
   friend class EngineBuilder;  // To allow access to JITCtor and InterpCtor.
 
 protected:
-  /// Modules - This is a list of Modules that we are JIT'ing from.  We use a
-  /// smallvector to optimize for the case where there is only one module.
+  /// The list of Modules that we are JIT'ing from.  We use a SmallVector to
+  /// optimize for the case where there is only one module.
   SmallVector<Module*, 1> Modules;
   
   void setTargetData(const TargetData *td) {
@@ -104,11 +123,11 @@ protected:
   }
   
   /// getMemoryforGV - Allocate memory for a global variable.
-  virtual char* getMemoryForGV(const GlobalVariable* GV);
+  virtual char *getMemoryForGV(const GlobalVariable *GV);
 
   // To avoid having libexecutionengine depend on the JIT and interpreter
-  // libraries, the JIT and Interpreter set these functions to ctor pointers
-  // at startup time if they are linked in.
+  // libraries, the execution engine implementations set these functions to ctor
+  // pointers at startup time if they are linked in.
   static ExecutionEngine *(*JITCtor)(
     Module *M,
     std::string *ErrorStr,
@@ -119,23 +138,36 @@ protected:
     StringRef MArch,
     StringRef MCPU,
     const SmallVectorImpl<std::string>& MAttrs);
+  static ExecutionEngine *(*MCJITCtor)(
+    Module *M,
+    std::string *ErrorStr,
+    JITMemoryManager *JMM,
+    CodeGenOpt::Level OptLevel,
+    bool GVsWithCode,
+    CodeModel::Model CMM,
+    StringRef MArch,
+    StringRef MCPU,
+    const SmallVectorImpl<std::string>& MAttrs);
   static ExecutionEngine *(*InterpCtor)(Module *M,
                                         std::string *ErrorStr);
 
   /// LazyFunctionCreator - If an unknown function is needed, this function
-  /// pointer is invoked to create it. If this returns null, the JIT will abort.
-  void* (*LazyFunctionCreator)(const std::string &);
+  /// pointer is invoked to create it.  If this returns null, the JIT will
+  /// abort.
+  void *(*LazyFunctionCreator)(const std::string &);
   
-  /// ExceptionTableRegister - If Exception Handling is set, the JIT will 
-  /// register dwarf tables with this function
+  /// ExceptionTableRegister - If Exception Handling is set, the JIT will
+  /// register dwarf tables with this function.
   typedef void (*EERegisterFn)(void*);
-  static EERegisterFn ExceptionTableRegister;
+  EERegisterFn ExceptionTableRegister;
+  EERegisterFn ExceptionTableDeregister;
+  std::vector<void*> AllExceptionTables;
 
 public:
-  /// lock - This lock is protects the ExecutionEngine, JIT, JITResolver and
+  /// lock - This lock protects the ExecutionEngine, JIT, JITResolver and
   /// JITEmitter classes.  It must be held while changing the internal state of
   /// any of those classes.
-  sys::Mutex lock; // Used to make this class and subclasses thread-safe
+  sys::Mutex lock;
 
   //===--------------------------------------------------------------------===//
   //  ExecutionEngine Startup
@@ -146,20 +178,18 @@ public:
   /// create - This is the factory method for creating an execution engine which
   /// is appropriate for the current machine.  This takes ownership of the
   /// module.
+  ///
+  /// \param GVsWithCode - Allocating globals with code breaks
+  /// freeMachineCodeForFunction and is probably unsafe and bad for performance.
+  /// However, we have clients who depend on this behavior, so we must support
+  /// it.  Eventually, when we're willing to break some backwards compatability,
+  /// this flag should be flipped to false, so that by default
+  /// freeMachineCodeForFunction works.
   static ExecutionEngine *create(Module *M,
                                  bool ForceInterpreter = false,
                                  std::string *ErrorStr = 0,
                                  CodeGenOpt::Level OptLevel =
                                    CodeGenOpt::Default,
-                                 // Allocating globals with code breaks
-                                 // freeMachineCodeForFunction and is probably
-                                 // unsafe and bad for performance.  However,
-                                 // we have clients who depend on this
-                                 // behavior, so we must support it.
-                                 // Eventually, when we're willing to break
-                                 // some backwards compatability, this flag
-                                 // should be flipped to false, so that by
-                                 // default freeMachineCodeForFunction works.
                                  bool GVsWithCode = true);
 
   /// createJIT - This is the factory method for creating a JIT for the current
@@ -184,11 +214,10 @@ public:
     Modules.push_back(M);
   }
   
-  //===----------------------------------------------------------------------===//
+  //===--------------------------------------------------------------------===//
 
   const TargetData *getTargetData() const { return TD; }
 
-
   /// removeModule - Remove a Module from the list of modules.  Returns true if
   /// M is found.
   virtual bool removeModule(Module *M);
@@ -200,17 +229,19 @@ public:
   
   /// runFunction - Execute the specified function with the specified arguments,
   /// and return the result.
-  ///
   virtual GenericValue runFunction(Function *F,
                                 const std::vector<GenericValue> &ArgValues) = 0;
 
   /// runStaticConstructorsDestructors - This method is used to execute all of
-  /// the static constructors or destructors for a program, depending on the
-  /// value of isDtors.
+  /// the static constructors or destructors for a program.
+  ///
+  /// \param isDtors - Run the destructors instead of constructors.
   void runStaticConstructorsDestructors(bool isDtors);
+
   /// runStaticConstructorsDestructors - This method is used to execute all of
-  /// the static constructors or destructors for a module, depending on the
-  /// value of isDtors.
+  /// the static constructors or destructors for a particular module.
+  ///
+  /// \param isDtors - Run the destructors instead of constructors.
   void runStaticConstructorsDestructors(Module *module, bool isDtors);
   
   
@@ -229,8 +260,8 @@ public:
   /// GlobalValue is destroyed.
   void addGlobalMapping(const GlobalValue *GV, void *Addr);
   
-  /// clearAllGlobalMappings - Clear all global mappings and start over again
-  /// use in dynamic compilation scenarios when you want to move globals
+  /// clearAllGlobalMappings - Clear all global mappings and start over again,
+  /// for use in dynamic compilation scenarios to move globals.
   void clearAllGlobalMappings();
   
   /// clearGlobalMappingsFromModule - Clear all global mappings that came from a
@@ -246,12 +277,10 @@ public:
   /// getPointerToGlobalIfAvailable - This returns the address of the specified
   /// global value if it is has already been codegen'd, otherwise it returns
   /// null.
-  ///
   void *getPointerToGlobalIfAvailable(const GlobalValue *GV);
 
   /// getPointerToGlobal - This returns the address of the specified global
-  /// value.  This may involve code generation if it's a function.
-  ///
+  /// value. This may involve code generation if it's a function.
   void *getPointerToGlobal(const GlobalValue *GV);
 
   /// getPointerToFunction - The different EE's represent function bodies in
@@ -259,20 +288,17 @@ public:
   /// pointer should look like.  When F is destroyed, the ExecutionEngine will
   /// remove its global mapping and free any machine code.  Be sure no threads
   /// are running inside F when that happens.
-  ///
   virtual void *getPointerToFunction(Function *F) = 0;
 
   /// getPointerToBasicBlock - The different EE's represent basic blocks in
   /// different ways.  Return the representation for a blockaddress of the
   /// specified block.
-  ///
   virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0;
   
   /// getPointerToFunctionOrStub - If the specified function has been
   /// code-gen'd, return a pointer to the function.  If not, compile it, or use
   /// a stub to implement lazy compilation if available.  See
   /// getPointerToFunction for the requirements on destroying F.
-  ///
   virtual void *getPointerToFunctionOrStub(Function *F) {
     // Default implementation, just codegen the function.
     return getPointerToFunction(F);
@@ -286,23 +312,25 @@ public:
   ///
   const GlobalValue *getGlobalValueAtAddress(void *Addr);
 
-
+  /// StoreValueToMemory - Stores the data in Val of type Ty at address Ptr.
+  /// Ptr is the address of the memory at which to store Val, cast to
+  /// GenericValue *.  It is not a pointer to a GenericValue containing the
+  /// address at which to store Val.
   void StoreValueToMemory(const GenericValue &Val, GenericValue *Ptr,
                           const Type *Ty);
+
   void InitializeMemory(const Constant *Init, void *Addr);
 
-  /// recompileAndRelinkFunction - This method is used to force a function
-  /// which has already been compiled to be compiled again, possibly
-  /// after it has been modified. Then the entry to the old copy is overwritten
-  /// with a branch to the new copy. If there was no old copy, this acts
-  /// just like VM::getPointerToFunction().
-  ///
+  /// recompileAndRelinkFunction - This method is used to force a function which
+  /// has already been compiled to be compiled again, possibly after it has been
+  /// modified.  Then the entry to the old copy is overwritten with a branch to
+  /// the new copy.  If there was no old copy, this acts just like
+  /// VM::getPointerToFunction().
   virtual void *recompileAndRelinkFunction(Function *F) = 0;
 
   /// freeMachineCodeForFunction - Release memory in the ExecutionEngine
   /// corresponding to the machine code emitted to execute this function, useful
   /// for garbage-collecting generated code.
-  ///
   virtual void freeMachineCodeForFunction(Function *F) = 0;
 
   /// getOrEmitGlobalVariable - Return the address of the specified global
@@ -373,25 +401,31 @@ public:
   
   /// InstallExceptionTableRegister - The JIT will use the given function
   /// to register the exception tables it generates.
-  static void InstallExceptionTableRegister(void (*F)(void*)) {
+  void InstallExceptionTableRegister(EERegisterFn F) {
     ExceptionTableRegister = F;
   }
+  void InstallExceptionTableDeregister(EERegisterFn F) {
+    ExceptionTableDeregister = F;
+  }
   
-  /// RegisterTable - Registers the given pointer as an exception table. It uses
-  /// the ExceptionTableRegister function.
-  static void RegisterTable(void* res) {
-    if (ExceptionTableRegister)
+  /// RegisterTable - Registers the given pointer as an exception table.  It
+  /// uses the ExceptionTableRegister function.
+  void RegisterTable(void* res) {
+    if (ExceptionTableRegister) {
       ExceptionTableRegister(res);
+      AllExceptionTables.push_back(res);
+    }
   }
 
+  /// DeregisterAllTables - Deregisters all previously registered pointers to an
+  /// exception tables.  It uses the ExceptionTableoDeregister function.
+  void DeregisterAllTables();
+
 protected:
   explicit ExecutionEngine(Module *M);
 
   void emitGlobals();
 
-  // EmitGlobalVariable - This method emits the specified global variable to the
-  // address specified in GlobalAddresses, or allocates new memory if it's not
-  // already in the map.
   void EmitGlobalVariable(const GlobalVariable *GV);
 
   GenericValue getConstantValue(const Constant *C);
@@ -412,8 +446,7 @@ namespace EngineKind {
 /// stack-allocating a builder, chaining the various set* methods, and
 /// terminating it with a .create() call.
 class EngineBuilder {
-
- private:
+private:
   Module *M;
   EngineKind::Kind WhichEngine;
   std::string *ErrorStr;
@@ -424,9 +457,9 @@ class EngineBuilder {
   std::string MArch;
   std::string MCPU;
   SmallVector<std::string, 4> MAttrs;
+  bool UseMCJIT;
 
   /// InitEngine - Does the common initialization of default options.
-  ///
   void InitEngine() {
     WhichEngine = EngineKind::Either;
     ErrorStr = NULL;
@@ -434,9 +467,10 @@ class EngineBuilder {
     JMM = NULL;
     AllocateGVsWithCode = false;
     CMModel = CodeModel::Default;
+    UseMCJIT = false;
   }
 
- public:
+public:
   /// EngineBuilder - Constructor for EngineBuilder.  If create() is called and
   /// is successful, the created engine takes ownership of the module.
   EngineBuilder(Module *m) : M(m) {
@@ -504,6 +538,12 @@ class EngineBuilder {
     return *this;
   }
 
+  /// setUseMCJIT - Set whether the MC-JIT implementation should be used
+  /// (experimental).
+  void setUseMCJIT(bool Value) {
+    UseMCJIT = Value;
+  }
+
   /// setMAttrs - Set cpu-specific attributes.
   template<typename StringSequence>
   EngineBuilder &setMAttrs(const StringSequence &mattrs) {
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index 1301320c1435..a2fed98c150e 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -16,7 +16,7 @@
 #define GENERIC_VALUE_H
 
 #include "llvm/ADT/APInt.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index dcc66b2a089f..abc063b07038 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
 #include <vector>
@@ -24,35 +24,38 @@ namespace llvm {
 class Function;
 class MachineFunction;
 
-/// Empty for now, but this object will contain all details about the
-/// generated machine code that a Listener might care about.
+/// JITEvent_EmittedFunctionDetails - Helper struct for containing information
+/// about a generated machine code function.
 struct JITEvent_EmittedFunctionDetails {
-  const MachineFunction *MF;
-
   struct LineStart {
-    // The address at which the current line changes.
+    /// The address at which the current line changes.
     uintptr_t Address;
-    // The new location information.  These can be translated to
-    // DebugLocTuples using MF->getDebugLocTuple().
+
+    /// The new location information.  These can be translated to DebugLocTuples
+    /// using MF->getDebugLocTuple().
     DebugLoc Loc;
   };
-  // This holds line boundary information sorted by address.
+
+  /// The machine function the struct contains information for.
+  const MachineFunction *MF;
+
+  /// The list of line boundary information, sorted by address.
   std::vector<LineStart> LineStarts;
 };
 
-/// JITEventListener - This interface is used by the JIT to notify clients about
-/// significant events during compilation.  For example, we could have
-/// implementations for profilers and debuggers that need to know where
-/// functions have been emitted.
+/// JITEventListener - Abstract interface for use by the JIT to notify clients
+/// about significant events during compilation. For example, to notify
+/// profilers and debuggers that need to know where functions have been emitted.
 ///
-/// Each method defaults to doing nothing, so you only need to override the ones
-/// you care about.
+/// The default implementation of each method does nothing.
 class JITEventListener {
+public:
+  typedef JITEvent_EmittedFunctionDetails EmittedFunctionDetails;
+
 public:
   JITEventListener() {}
-  virtual ~JITEventListener();  // Defined in JIT.cpp.
+  virtual ~JITEventListener();
 
-  typedef JITEvent_EmittedFunctionDetails EmittedFunctionDetails;
   /// NotifyFunctionEmitted - Called after a function has been successfully
   /// emitted to memory.  The function still has its MachineFunction attached,
   /// if you should happen to need that.
@@ -60,13 +63,14 @@ public:
                                      void *Code, size_t Size,
                                      const EmittedFunctionDetails &Details) {}
 
-  /// NotifyFreeingMachineCode - This is called inside of
-  /// freeMachineCodeForFunction(), after the global mapping is removed, but
-  /// before the machine code is returned to the allocator.  OldPtr is the
-  /// address of the machine code and will be the same as the Code parameter to
-  /// a previous NotifyFunctionEmitted call.  The Function passed to
-  /// NotifyFunctionEmitted may have been destroyed by the time of the matching
-  /// NotifyFreeingMachineCode call.
+  /// NotifyFreeingMachineCode - Called from freeMachineCodeForFunction(), after
+  /// the global mapping is removed, but before the machine code is returned to
+  /// the allocator.
+  ///
+  /// OldPtr is the address of the machine code and will be the same as the Code
+  /// parameter to a previous NotifyFunctionEmitted call.  The Function passed
+  /// to NotifyFunctionEmitted may have been destroyed by the time of the
+  /// matching NotifyFreeingMachineCode call.
   virtual void NotifyFreeingMachineCode(void *OldPtr) {}
 };
 
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index e0159309ce5c..384141801667 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -6,15 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the JITMemoryManagerInterface
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
 #define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <string>
 
 namespace llvm {
@@ -29,8 +25,8 @@ namespace llvm {
 class JITMemoryManager {
 protected:
   bool HasGOT;
-public:
 
+public:
   JITMemoryManager() : HasGOT(false) {}
   virtual ~JITMemoryManager();
   
@@ -48,7 +44,7 @@ public:
 
   /// setPoisonMemory - Setting this flag to true makes the memory manager
   /// garbage values over freed memory.  This is useful for testing and
-  /// debugging, and is be turned on by default in debug mode.
+  /// debugging, and may be turned on by default in debug mode.
   virtual void setPoisonMemory(bool poison) = 0;
 
   //===--------------------------------------------------------------------===//
@@ -61,7 +57,6 @@ public:
   virtual void AllocateGOT() = 0;
   
   /// isManagingGOT - Return true if the AllocateGOT method is called.
-  ///
   bool isManagingGOT() const {
     return HasGOT;
   }
@@ -111,7 +106,6 @@ public:
   virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
 
   /// allocateGlobal - Allocate memory for a global.
-  ///
   virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0;
 
   /// deallocateFunctionBody - Free the specified function body.  The argument
diff --git a/include/llvm/ExecutionEngine/MCJIT.h b/include/llvm/ExecutionEngine/MCJIT.h
new file mode 100644
index 000000000000..f956a5029b17
--- /dev/null
+++ b/include/llvm/ExecutionEngine/MCJIT.h
@@ -0,0 +1,38 @@
+//===-- MCJIT.h - MC-Based Just-In-Time Execution Engine --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file forces the MCJIT to link in on certain operating systems.
+// (Windows).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_MCJIT_H
+#define LLVM_EXECUTION_ENGINE_MCJIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include <cstdlib>
+
+extern "C" void LLVMLinkInMCJIT();
+
+namespace {
+  struct ForceMCJITLinking {
+    ForceMCJITLinking() {
+      // We must reference the passes in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      LLVMLinkInMCJIT();
+    }
+  } ForceMCJITLinking;
+}
+
+#endif
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index 2b19fa5a7f38..9a0825ab4a96 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -152,7 +152,7 @@ public:
   /// The particular intrinsic functions which correspond to this value are
   /// defined in llvm/Intrinsics.h.
   ///
-  unsigned getIntrinsicID() const ATTRIBUTE_READONLY;
+  unsigned getIntrinsicID() const LLVM_ATTRIBUTE_READONLY;
   bool isIntrinsic() const { return getIntrinsicID() != 0; }
 
   /// getCallingConv()/setCallingConv(CC) - These method get and set the
diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h
index 9867c518c890..f4af5b1202c5 100644
--- a/include/llvm/GlobalAlias.h
+++ b/include/llvm/GlobalAlias.h
@@ -89,7 +89,8 @@ public:
 };
 
 template <>
-struct OperandTraits<GlobalAlias> : public FixedNumOperandTraits<1> {
+struct OperandTraits<GlobalAlias> :
+  public FixedNumOperandTraits<GlobalAlias, 1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Value)
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 62e84f833510..b184b8e44971 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -60,7 +60,8 @@ protected:
   GlobalValue(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps,
               LinkageTypes linkage, const Twine &Name)
     : Constant(ty, vty, Ops, NumOps), Parent(0),
-      Linkage(linkage), Visibility(DefaultVisibility), Alignment(0) {
+      Linkage(linkage), Visibility(DefaultVisibility), Alignment(0),
+      UnnamedAddr(0) {
     setName(Name);
   }
 
@@ -70,6 +71,7 @@ protected:
   LinkageTypes Linkage : 5;   // The linkage of this global
   unsigned Visibility : 2;    // The visibility style of this global
   unsigned Alignment : 16;    // Alignment of this symbol, must be power of two
+  unsigned UnnamedAddr : 1;   // This value's address is not significant
   std::string Section;        // Section to emit this into, empty mean default
 public:
   ~GlobalValue() {
@@ -81,6 +83,9 @@ public:
   }
   void setAlignment(unsigned Align);
 
+  bool hasUnnamedAddr() const { return UnnamedAddr; }
+  void setUnnamedAddr(bool Val) { UnnamedAddr = Val; }
+
   VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); }
   bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; }
   bool hasHiddenVisibility() const { return Visibility == HiddenVisibility; }
@@ -173,7 +178,9 @@ public:
   }
 
   /// isWeakForLinker - Whether the definition of this global may be replaced at
-  /// link time.
+  /// link time.  NB: Using this method outside of the code generators is almost
+  /// always a mistake: when working at the IR level use mayBeOverridden instead
+  /// as it knows about ODR semantics.
   static bool isWeakForLinker(LinkageTypes Linkage)  {
     return Linkage == AvailableExternallyLinkage ||
            Linkage == WeakAnyLinkage ||
@@ -275,12 +282,6 @@ public:
   inline Module *getParent() { return Parent; }
   inline const Module *getParent() const { return Parent; }
 
-  /// removeDeadConstantUsers - If there are any dead constant users dangling
-  /// off of this global value, remove them.  This method is useful for clients
-  /// that want to check to see if a global is unused, but don't want to deal
-  /// with potentially dead constants hanging off of the globals.
-  void removeDeadConstantUsers() const;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const GlobalValue *) { return true; }
   static inline bool classof(const Value *V) {
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index 633e8b4dc335..1769c665d062 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -68,7 +68,7 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// isDeclaration - Is this global variable lacking an initializer?  If so, 
+  /// isDeclaration - Is this global variable lacking an initializer?  If so,
   /// the global variable is defined in some other translation unit, and is thus
   /// only a declaration here.
   virtual bool isDeclaration() const { return getNumOperands() == 0; }
@@ -80,7 +80,21 @@ public:
   inline bool hasInitializer() const { return !isDeclaration(); }
 
   /// hasDefinitiveInitializer - Whether the global variable has an initializer,
-  /// and this is the initializer that will be used in the final executable.
+  /// and any other instances of the global (this can happen due to weak
+  /// linkage) are guaranteed to have the same initializer.
+  ///
+  /// Note that if you want to transform a global, you must use
+  /// hasUniqueInitializer() instead, because of the *_odr linkage type.
+  ///
+  /// Example:
+  ///
+  /// @a = global SomeType* null - Initializer is both definitive and unique.
+  ///
+  /// @b = global weak SomeType* null - Initializer is neither definitive nor
+  /// unique.
+  ///
+  /// @c = global weak_odr SomeType* null - Initializer is definitive, but not
+  /// unique.
   inline bool hasDefinitiveInitializer() const {
     return hasInitializer() &&
       // The initializer of a global variable with weak linkage may change at
@@ -88,6 +102,19 @@ public:
       !mayBeOverridden();
   }
 
+  /// hasUniqueInitializer - Whether the global variable has an initializer, and
+  /// any changes made to the initializer will turn up in the final executable.
+  inline bool hasUniqueInitializer() const {
+    return hasInitializer() &&
+      // It's not safe to modify initializers of global variables with weak
+      // linkage, because the linker might choose to discard the initializer and
+      // use the initializer from another instance of the global variable
+      // instead. It is wrong to modify the initializer of a global variable
+      // with *_odr linkage because then different instances of the global may
+      // have different initializers, breaking the One Definition Rule.
+      !isWeakForLinker();
+  }
+
   /// getInitializer - Return the initializer for this global variable.  It is
   /// illegal to call this method if the global is external, because we cannot
   /// tell what the value is initialized to!
@@ -142,7 +169,8 @@ public:
 };
 
 template <>
-struct OperandTraits<GlobalVariable> : public OptionalOperandTraits<> {
+struct OperandTraits<GlobalVariable> :
+  public OptionalOperandTraits<GlobalVariable> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
new file mode 100644
index 000000000000..02dbfbd26d58
--- /dev/null
+++ b/include/llvm/InitializePasses.h
@@ -0,0 +1,235 @@
+//===- llvm/InitializePasses.h -------- Initialize All Passes ---*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations for the pass initialization routines
+// for the entire LLVM project.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INITIALIZEPASSES_H
+#define LLVM_INITIALIZEPASSES_H
+
+namespace llvm {
+
+class PassRegistry;
+
+/// initializeCore - Initialize all passes linked into the
+/// TransformUtils library.
+void initializeCore(PassRegistry&);
+
+/// initializeTransformUtils - Initialize all passes linked into the
+/// TransformUtils library.
+void initializeTransformUtils(PassRegistry&);
+
+/// initializeScalarOpts - Initialize all passes linked into the
+/// ScalarOpts library.
+void initializeScalarOpts(PassRegistry&);
+
+/// initializeInstCombine - Initialize all passes linked into the
+/// ScalarOpts library.
+void initializeInstCombine(PassRegistry&);
+
+/// initializeIPO - Initialize all passes linked into the IPO library.
+void initializeIPO(PassRegistry&);
+
+/// initializeInstrumentation - Initialize all passes linked into the
+/// Instrumentation library.
+void initializeInstrumentation(PassRegistry&);
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void initializeAnalysis(PassRegistry&);
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void initializeIPA(PassRegistry&);
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void initializeCodeGen(PassRegistry&);
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void initializeTarget(PassRegistry&);
+
+void initializeAAEvalPass(PassRegistry&);
+void initializeADCEPass(PassRegistry&);
+void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
+void initializeAliasAnalysisCounterPass(PassRegistry&);
+void initializeAliasDebuggerPass(PassRegistry&);
+void initializeAliasSetPrinterPass(PassRegistry&);
+void initializeAlwaysInlinerPass(PassRegistry&);
+void initializeArgPromotionPass(PassRegistry&);
+void initializeBasicAliasAnalysisPass(PassRegistry&);
+void initializeBasicCallGraphPass(PassRegistry&);
+void initializeBlockExtractorPassPass(PassRegistry&);
+void initializeBlockPlacementPass(PassRegistry&);
+void initializeBreakCriticalEdgesPass(PassRegistry&);
+void initializeCFGOnlyPrinterPass(PassRegistry&);
+void initializeCFGOnlyViewerPass(PassRegistry&);
+void initializeCFGPrinterPass(PassRegistry&);
+void initializeCFGSimplifyPassPass(PassRegistry&);
+void initializeCFGViewerPass(PassRegistry&);
+void initializeCalculateSpillWeightsPass(PassRegistry&);
+void initializeCallGraphAnalysisGroup(PassRegistry&);
+void initializeCodeGenPreparePass(PassRegistry&);
+void initializeConstantMergePass(PassRegistry&);
+void initializeConstantPropagationPass(PassRegistry&);
+void initializeCorrelatedValuePropagationPass(PassRegistry&);
+void initializeDAEPass(PassRegistry&);
+void initializeDAHPass(PassRegistry&);
+void initializeDCEPass(PassRegistry&);
+void initializeDSEPass(PassRegistry&);
+void initializeDTEPass(PassRegistry&);
+void initializeDeadInstEliminationPass(PassRegistry&);
+void initializeDeadMachineInstructionElimPass(PassRegistry&);
+void initializeDomOnlyPrinterPass(PassRegistry&);
+void initializeDomOnlyViewerPass(PassRegistry&);
+void initializeDomPrinterPass(PassRegistry&);
+void initializeDomViewerPass(PassRegistry&);
+void initializeDominanceFrontierPass(PassRegistry&);
+void initializeDominatorTreePass(PassRegistry&);
+void initializeEdgeBundlesPass(PassRegistry&);
+void initializeEdgeProfilerPass(PassRegistry&);
+void initializePathProfilerPass(PassRegistry&);
+void initializeEarlyCSEPass(PassRegistry&);
+void initializeExpandISelPseudosPass(PassRegistry&);
+void initializeFindUsedTypesPass(PassRegistry&);
+void initializeFunctionAttrsPass(PassRegistry&);
+void initializeGCModuleInfoPass(PassRegistry&);
+void initializeGEPSplitterPass(PassRegistry&);
+void initializeGVNPass(PassRegistry&);
+void initializeGlobalDCEPass(PassRegistry&);
+void initializeGlobalOptPass(PassRegistry&);
+void initializeGlobalsModRefPass(PassRegistry&);
+void initializeIPCPPass(PassRegistry&);
+void initializeIPSCCPPass(PassRegistry&);
+void initializeIVUsersPass(PassRegistry&);
+void initializeIfConverterPass(PassRegistry&);
+void initializeIndVarSimplifyPass(PassRegistry&);
+void initializeInstCombinerPass(PassRegistry&);
+void initializeInstCountPass(PassRegistry&);
+void initializeInstNamerPass(PassRegistry&);
+void initializeInternalizePassPass(PassRegistry&);
+void initializeIntervalPartitionPass(PassRegistry&);
+void initializeJumpThreadingPass(PassRegistry&);
+void initializeLCSSAPass(PassRegistry&);
+void initializeLICMPass(PassRegistry&);
+void initializeLazyValueInfoPass(PassRegistry&);
+void initializeLibCallAliasAnalysisPass(PassRegistry&);
+void initializeLintPass(PassRegistry&);
+void initializeLiveDebugVariablesPass(PassRegistry&);
+void initializeLiveIntervalsPass(PassRegistry&);
+void initializeLiveStacksPass(PassRegistry&);
+void initializeLiveValuesPass(PassRegistry&);
+void initializeLiveVariablesPass(PassRegistry&);
+void initializeLoaderPassPass(PassRegistry&);
+void initializePathProfileLoaderPassPass(PassRegistry&);
+void initializeLoopDeletionPass(PassRegistry&);
+void initializeLoopDependenceAnalysisPass(PassRegistry&);
+void initializeLoopExtractorPass(PassRegistry&);
+void initializeLoopInfoPass(PassRegistry&);
+void initializeLoopInstSimplifyPass(PassRegistry&);
+void initializeLoopRotatePass(PassRegistry&);
+void initializeLoopSimplifyPass(PassRegistry&);
+void initializeLoopSplitterPass(PassRegistry&);
+void initializeLoopStrengthReducePass(PassRegistry&);
+void initializeLoopUnrollPass(PassRegistry&);
+void initializeLoopUnswitchPass(PassRegistry&);
+void initializeLoopIdiomRecognizePass(PassRegistry&);
+void initializeLowerAtomicPass(PassRegistry&);
+void initializeLowerIntrinsicsPass(PassRegistry&);
+void initializeLowerInvokePass(PassRegistry&);
+void initializeLowerSetJmpPass(PassRegistry&);
+void initializeLowerSwitchPass(PassRegistry&);
+void initializeMachineCSEPass(PassRegistry&);
+void initializeMachineDominatorTreePass(PassRegistry&);
+void initializeMachineLICMPass(PassRegistry&);
+void initializeMachineLoopInfoPass(PassRegistry&);
+void initializeMachineLoopRangesPass(PassRegistry&);
+void initializeMachineModuleInfoPass(PassRegistry&);
+void initializeMachineSinkingPass(PassRegistry&);
+void initializeMachineVerifierPassPass(PassRegistry&);
+void initializeMemCpyOptPass(PassRegistry&);
+void initializeMemDepPrinterPass(PassRegistry&);
+void initializeMemoryDependenceAnalysisPass(PassRegistry&);
+void initializeMergeFunctionsPass(PassRegistry&);
+void initializeModuleDebugInfoPrinterPass(PassRegistry&);
+void initializeNoAAPass(PassRegistry&);
+void initializeNoProfileInfoPass(PassRegistry&);
+void initializeNoPathProfileInfoPass(PassRegistry&);
+void initializeOptimalEdgeProfilerPass(PassRegistry&);
+void initializeOptimizePHIsPass(PassRegistry&);
+void initializePEIPass(PassRegistry&);
+void initializePHIEliminationPass(PassRegistry&);
+void initializePartialInlinerPass(PassRegistry&);
+void initializePeepholeOptimizerPass(PassRegistry&);
+void initializePostDomOnlyPrinterPass(PassRegistry&);
+void initializePostDomOnlyViewerPass(PassRegistry&);
+void initializePostDomPrinterPass(PassRegistry&);
+void initializePostDomViewerPass(PassRegistry&);
+void initializePostDominanceFrontierPass(PassRegistry&);
+void initializePostDominatorTreePass(PassRegistry&);
+void initializePreAllocSplittingPass(PassRegistry&);
+void initializePreVerifierPass(PassRegistry&);
+void initializePrintDbgInfoPass(PassRegistry&);
+void initializePrintFunctionPassPass(PassRegistry&);
+void initializePrintModulePassPass(PassRegistry&);
+void initializeProcessImplicitDefsPass(PassRegistry&);
+void initializeProfileEstimatorPassPass(PassRegistry&);
+void initializeProfileInfoAnalysisGroup(PassRegistry&);
+void initializePathProfileInfoAnalysisGroup(PassRegistry&);
+void initializePathProfileVerifierPass(PassRegistry&);
+void initializeProfileVerifierPassPass(PassRegistry&);
+void initializePromotePassPass(PassRegistry&);
+void initializePruneEHPass(PassRegistry&);
+void initializeRALinScanPass(PassRegistry&);
+void initializeReassociatePass(PassRegistry&);
+void initializeRegToMemPass(PassRegistry&);
+void initializeRegionInfoPass(PassRegistry&);
+void initializeRegionOnlyPrinterPass(PassRegistry&);
+void initializeRegionOnlyViewerPass(PassRegistry&);
+void initializeRegionPrinterPass(PassRegistry&);
+void initializeRegionViewerPass(PassRegistry&);
+void initializeRegisterCoalescerAnalysisGroup(PassRegistry&);
+void initializeRenderMachineFunctionPass(PassRegistry&);
+void initializeSCCPPass(PassRegistry&);
+void initializeSRETPromotionPass(PassRegistry&);
+void initializeSROA_DTPass(PassRegistry&);
+void initializeSROA_SSAUpPass(PassRegistry&);
+void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
+void initializeScalarEvolutionPass(PassRegistry&);
+void initializeSimpleInlinerPass(PassRegistry&);
+void initializeSimpleRegisterCoalescingPass(PassRegistry&);
+void initializeSimplifyHalfPowrLibCallsPass(PassRegistry&);
+void initializeSimplifyLibCallsPass(PassRegistry&);
+void initializeSingleLoopExtractorPass(PassRegistry&);
+void initializeSinkingPass(PassRegistry&);
+void initializeSlotIndexesPass(PassRegistry&);
+void initializeSpillPlacementPass(PassRegistry&);
+void initializeStackProtectorPass(PassRegistry&);
+void initializeStackSlotColoringPass(PassRegistry&);
+void initializeStripDeadDebugInfoPass(PassRegistry&);
+void initializeStripDeadPrototypesPassPass(PassRegistry&);
+void initializeStripDebugDeclarePass(PassRegistry&);
+void initializeStripNonDebugSymbolsPass(PassRegistry&);
+void initializeStripSymbolsPass(PassRegistry&);
+void initializeStrongPHIEliminationPass(PassRegistry&);
+void initializeTailCallElimPass(PassRegistry&);
+void initializeTailDupPass(PassRegistry&);
+void initializeTargetDataPass(PassRegistry&);
+void initializeTargetLibraryInfoPass(PassRegistry&);
+void initializeTwoAddressInstructionPassPass(PassRegistry&);
+void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
+void initializeUnifyFunctionExitNodesPass(PassRegistry&);
+void initializeUnreachableBlockElimPass(PassRegistry&);
+void initializeUnreachableMachineBlockElimPass(PassRegistry&);
+void initializeVerifierPass(PassRegistry&);
+void initializeVirtRegMapPass(PassRegistry&);
+void initializeInstSimplifierPass(PassRegistry&);
+
+}
+
+#endif
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index 105b1bcd94c5..ed8f0f7f615e 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -87,6 +87,25 @@ public:
     isClobber           // '~x'
   };
   
+  typedef std::vector<std::string> ConstraintCodeVector;
+  
+  struct SubConstraintInfo {
+    /// MatchingInput - If this is not -1, this is an output constraint where an
+    /// input constraint is required to match it (e.g. "0").  The value is the
+    /// constraint number that matches this one (for example, if this is
+    /// constraint #0 and constraint #4 has the value "0", this will be 4).
+    signed char MatchingInput;
+    /// Code - The constraint code, either the register name (in braces) or the
+    /// constraint letter/number.
+    ConstraintCodeVector Codes;
+    /// Default constructor.
+    SubConstraintInfo() : MatchingInput(-1) {}
+  };
+
+  typedef std::vector<SubConstraintInfo> SubConstraintInfoVector;
+  struct ConstraintInfo;
+  typedef std::vector<ConstraintInfo> ConstraintInfoVector;
+  
   struct ConstraintInfo {
     /// Type - The basic type of the constraint: input/output/clobber
     ///
@@ -118,25 +137,42 @@ public:
     
     /// Code - The constraint code, either the register name (in braces) or the
     /// constraint letter/number.
-    std::vector<std::string> Codes;
+    ConstraintCodeVector Codes;
+    
+    /// isMultipleAlternative - '|': has multiple-alternative constraints.
+    bool isMultipleAlternative;
+    
+    /// multipleAlternatives - If there are multiple alternative constraints,
+    /// this array will contain them.  Otherwise it will be empty.
+    SubConstraintInfoVector multipleAlternatives;
+    
+    /// The currently selected alternative constraint index.
+    unsigned currentAlternativeIndex;
+    
+    ///Default constructor.
+    ConstraintInfo();
+    
+    /// Copy constructor.
+    ConstraintInfo(const ConstraintInfo &other);
     
     /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
     /// fields in this structure.  If the constraint string is not understood,
     /// return true, otherwise return false.
-    bool Parse(StringRef Str, 
-               std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar);
+    bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar);
+               
+    /// selectAlternative - Point this constraint to the alternative constraint
+    /// indicated by the index.
+    void selectAlternative(unsigned index);
   };
   
   /// ParseConstraints - Split up the constraint string into the specific
   /// constraints and their prefixes.  If this returns an empty vector, and if
   /// the constraint string itself isn't empty, there was an error parsing.
-  static std::vector<ConstraintInfo> 
-    ParseConstraints(StringRef ConstraintString);
+  static ConstraintInfoVector ParseConstraints(StringRef ConstraintString);
   
   /// ParseConstraints - Parse the constraints of this inlineasm object, 
   /// returning them the same way that ParseConstraints(str) does.
-  std::vector<ConstraintInfo> 
-  ParseConstraints() const {
+  ConstraintInfoVector ParseConstraints() const {
     return ParseConstraints(Constraints);
   }
   
@@ -154,8 +190,15 @@ public:
     Op_InputChain = 0,
     Op_AsmString = 1,
     Op_MDNode = 2,
-    Op_IsAlignStack = 3,
+    Op_ExtraInfo = 3,    // HasSideEffects, IsAlignStack
     Op_FirstOperand = 4,
+
+    MIOp_AsmString = 0,
+    MIOp_ExtraInfo = 1,    // HasSideEffects, IsAlignStack
+    MIOp_FirstOperand = 2,
+
+    Extra_HasSideEffects = 1,
+    Extra_IsAlignStack = 2,
     
     Kind_RegUse = 1,
     Kind_RegDef = 2,
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index 6715416afa1c..a166956e1a64 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -128,7 +128,8 @@ public:
 };
 
 template <>
-struct OperandTraits<UnaryInstruction> : public FixedNumOperandTraits<1> {
+struct OperandTraits<UnaryInstruction> :
+  public FixedNumOperandTraits<UnaryInstruction, 1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
@@ -193,154 +194,93 @@ public:
   }
 #include "llvm/Instruction.def"
 
-
-  /// CreateNSWAdd - Create an Add operator with the NSW flag set.
-  ///
-  static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2,
-                                      const Twine &Name = "") {
-    BinaryOperator *BO = CreateAdd(V1, V2, Name);
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
     BO->setHasNoSignedWrap(true);
     return BO;
   }
-  static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2,
-                                      const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateAdd(V1, V2, Name, BB);
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
     BO->setHasNoSignedWrap(true);
     return BO;
   }
-  static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2,
-                                      const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateAdd(V1, V2, Name, I);
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
     BO->setHasNoSignedWrap(true);
     return BO;
   }
-
-  /// CreateNUWAdd - Create an Add operator with the NUW flag set.
-  ///
-  static BinaryOperator *CreateNUWAdd(Value *V1, Value *V2,
-                                      const Twine &Name = "") {
-    BinaryOperator *BO = CreateAdd(V1, V2, Name);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNUWAdd(Value *V1, Value *V2,
-                                      const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateAdd(V1, V2, Name, BB);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNUWAdd(Value *V1, Value *V2,
-                                      const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateAdd(V1, V2, Name, I);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-
-  /// CreateNSWSub - Create an Sub operator with the NSW flag set.
-  ///
-  static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
-                                      const Twine &Name = "") {
-    BinaryOperator *BO = CreateSub(V1, V2, Name);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
-                                      const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateSub(V1, V2, Name, BB);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNSWSub(Value *V1, Value *V2,
-                                      const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateSub(V1, V2, Name, I);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-
-  /// CreateNUWSub - Create an Sub operator with the NUW flag set.
-  ///
-  static BinaryOperator *CreateNUWSub(Value *V1, Value *V2,
-                                      const Twine &Name = "") {
-    BinaryOperator *BO = CreateSub(V1, V2, Name);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNUWSub(Value *V1, Value *V2,
-                                      const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateSub(V1, V2, Name, BB);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNUWSub(Value *V1, Value *V2,
-                                      const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateSub(V1, V2, Name, I);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-
-  /// CreateNSWMul - Create a Mul operator with the NSW flag set.
-  ///
-  static BinaryOperator *CreateNSWMul(Value *V1, Value *V2,
-                                      const Twine &Name = "") {
-    BinaryOperator *BO = CreateMul(V1, V2, Name);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNSWMul(Value *V1, Value *V2,
-                                      const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateMul(V1, V2, Name, BB);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNSWMul(Value *V1, Value *V2,
-                                      const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateMul(V1, V2, Name, I);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-
-  /// CreateNUWMul - Create a Mul operator with the NUW flag set.
-  ///
-  static BinaryOperator *CreateNUWMul(Value *V1, Value *V2,
-                                      const Twine &Name = "") {
-    BinaryOperator *BO = CreateMul(V1, V2, Name);
+  
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
     BO->setHasNoUnsignedWrap(true);
     return BO;
   }
-  static BinaryOperator *CreateNUWMul(Value *V1, Value *V2,
-                                      const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateMul(V1, V2, Name, BB);
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
     BO->setHasNoUnsignedWrap(true);
     return BO;
   }
-  static BinaryOperator *CreateNUWMul(Value *V1, Value *V2,
-                                      const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateMul(V1, V2, Name, I);
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
     BO->setHasNoUnsignedWrap(true);
     return BO;
   }
-
-  /// CreateExactSDiv - Create an SDiv operator with the exact flag set.
-  ///
-  static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
-                                         const Twine &Name = "") {
-    BinaryOperator *BO = CreateSDiv(V1, V2, Name);
+  
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
     BO->setIsExact(true);
     return BO;
   }
-  static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
-                                         const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = CreateSDiv(V1, V2, Name, BB);
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
     BO->setIsExact(true);
     return BO;
   }
-  static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2,
-                                         const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = CreateSDiv(V1, V2, Name, I);
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
     BO->setIsExact(true);
     return BO;
   }
-
+  
+#define DEFINE_HELPERS(OPC, NUWNSWEXACT)                                     \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name = "") {                  \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name);            \
+  }                                                                          \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) {       \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB);        \
+  }                                                                          \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name, Instruction *I) {       \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I);         \
+  }
+  
+  DEFINE_HELPERS(Add, NSW)  // CreateNSWAdd
+  DEFINE_HELPERS(Add, NUW)  // CreateNUWAdd
+  DEFINE_HELPERS(Sub, NSW)  // CreateNSWSub
+  DEFINE_HELPERS(Sub, NUW)  // CreateNUWSub
+  DEFINE_HELPERS(Mul, NSW)  // CreateNSWMul
+  DEFINE_HELPERS(Mul, NUW)  // CreateNUWMul
+  DEFINE_HELPERS(Shl, NSW)  // CreateNSWShl
+  DEFINE_HELPERS(Shl, NUW)  // CreateNUWShl
+
+  DEFINE_HELPERS(SDiv, Exact)  // CreateExactSDiv
+  DEFINE_HELPERS(UDiv, Exact)  // CreateExactUDiv
+  DEFINE_HELPERS(AShr, Exact)  // CreateExactAShr
+  DEFINE_HELPERS(LShr, Exact)  // CreateExactLShr
+
+#undef DEFINE_HELPERS
+  
   /// Helper functions to construct and inspect unary operations (NEG and NOT)
   /// via binary operators SUB and XOR:
   ///
@@ -432,7 +372,8 @@ public:
 };
 
 template <>
-struct OperandTraits<BinaryOperator> : public FixedNumOperandTraits<2> {
+struct OperandTraits<BinaryOperator> :
+  public FixedNumOperandTraits<BinaryOperator, 2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
@@ -824,11 +765,11 @@ public:
 
   /// This is just a convenience that dispatches to the subclasses.
   /// @brief Determine if this CmpInst is commutative.
-  bool isCommutative();
+  bool isCommutative() const;
 
   /// This is just a convenience that dispatches to the subclasses.
   /// @brief Determine if this is an equals/not equals predicate.
-  bool isEquality();
+  bool isEquality() const;
 
   /// @returns true if the comparison is signed, false otherwise.
   /// @brief Determine if this instruction is using a signed comparison.
@@ -903,7 +844,7 @@ private:
 
 // FIXME: these are redundant if CmpInst < BinaryOperator
 template <>
-struct OperandTraits<CmpInst> : public FixedNumOperandTraits<2> {
+struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 88f5ce1b2622..89bb9fdf423d 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -200,11 +200,10 @@ public:
   ///
   ///   Associative operators satisfy:  x op (y op z) === (x op y) op z
   ///
-  /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative, when
-  /// not applied to floating point types.
+  /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
   ///
-  bool isAssociative() const { return isAssociative(getOpcode(), getType()); }
-  static bool isAssociative(unsigned op, const Type *Ty);
+  bool isAssociative() const { return isAssociative(getOpcode()); }
+  static bool isAssociative(unsigned op);
 
   /// isCommutative - Return true if the instruction is commutative:
   ///
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index bd1e889de076..17ff763c52bf 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -29,7 +29,6 @@ class ConstantInt;
 class ConstantRange;
 class APInt;
 class LLVMContext;
-class DominatorTree;
 
 //===----------------------------------------------------------------------===//
 //                                AllocaInst Class
@@ -43,7 +42,7 @@ protected:
 public:
   explicit AllocaInst(const Type *Ty, Value *ArraySize = 0,
                       const Twine &Name = "", Instruction *InsertBefore = 0);
-  AllocaInst(const Type *Ty, Value *ArraySize, 
+  AllocaInst(const Type *Ty, Value *ArraySize,
              const Twine &Name, BasicBlock *InsertAtEnd);
 
   AllocaInst(const Type *Ty, const Twine &Name, Instruction *InsertBefore = 0);
@@ -166,8 +165,8 @@ public:
   unsigned getPointerAddressSpace() const {
     return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
   }
-  
-  
+
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const LoadInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -237,7 +236,7 @@ public:
 
   Value *getValueOperand() { return getOperand(0); }
   const Value *getValueOperand() const { return getOperand(0); }
-  
+
   Value *getPointerOperand() { return getOperand(1); }
   const Value *getPointerOperand() const { return getOperand(1); }
   static unsigned getPointerOperandIndex() { return 1U; }
@@ -245,7 +244,7 @@ public:
   unsigned getPointerAddressSpace() const {
     return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
   }
-  
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const StoreInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -263,7 +262,7 @@ private:
 };
 
 template <>
-struct OperandTraits<StoreInst> : public FixedNumOperandTraits<2> {
+struct OperandTraits<StoreInst> : public FixedNumOperandTraits<StoreInst, 2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
@@ -289,8 +288,10 @@ class GetElementPtrInst : public Instruction {
             const Twine &NameStr);
   void init(Value *Ptr, Value *Idx, const Twine &NameStr);
 
-  template<typename InputIterator>
-  void init(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  void init(Value *Ptr,
+            RandomAccessIterator IdxBegin,
+            RandomAccessIterator IdxEnd,
             const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
@@ -313,10 +314,10 @@ class GetElementPtrInst : public Instruction {
   /// Null is returned if the indices are invalid for the specified
   /// pointer type.
   ///
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static const Type *getIndexedType(const Type *Ptr,
-                                    InputIterator IdxBegin,
-                                    InputIterator IdxEnd,
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd,
                                     // This argument ensures that we
                                     // have an iterator we can do
                                     // arithmetic on in constant time
@@ -331,18 +332,19 @@ class GetElementPtrInst : public Instruction {
   }
 
   /// Constructors - Create a getelementptr instruction with a base pointer an
-  /// list of indices.  The first ctor can optionally insert before an existing
+  /// list of indices. The first ctor can optionally insert before an existing
   /// instruction, the second appends the new instruction to the specified
   /// BasicBlock.
-  template<typename InputIterator>
-  inline GetElementPtrInst(Value *Ptr, InputIterator IdxBegin,
-                           InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  inline GetElementPtrInst(Value *Ptr, RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
                            unsigned Values,
                            const Twine &NameStr,
                            Instruction *InsertBefore);
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   inline GetElementPtrInst(Value *Ptr,
-                           InputIterator IdxBegin, InputIterator IdxEnd,
+                           RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
                            unsigned Values,
                            const Twine &NameStr, BasicBlock *InsertAtEnd);
 
@@ -355,23 +357,24 @@ class GetElementPtrInst : public Instruction {
 protected:
   virtual GetElementPtrInst *clone_impl() const;
 public:
-  template<typename InputIterator>
-  static GetElementPtrInst *Create(Value *Ptr, InputIterator IdxBegin,
-                                   InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  static GetElementPtrInst *Create(Value *Ptr, RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
                                    const Twine &NameStr = "",
                                    Instruction *InsertBefore = 0) {
-    typename std::iterator_traits<InputIterator>::difference_type Values =
-      1 + std::distance(IdxBegin, IdxEnd);
+    typename std::iterator_traits<RandomAccessIterator>::difference_type
+      Values = 1 + std::distance(IdxBegin, IdxEnd);
     return new(Values)
       GetElementPtrInst(Ptr, IdxBegin, IdxEnd, Values, NameStr, InsertBefore);
   }
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static GetElementPtrInst *Create(Value *Ptr,
-                                   InputIterator IdxBegin, InputIterator IdxEnd,
+                                   RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
                                    const Twine &NameStr,
                                    BasicBlock *InsertAtEnd) {
-    typename std::iterator_traits<InputIterator>::difference_type Values =
-      1 + std::distance(IdxBegin, IdxEnd);
+    typename std::iterator_traits<RandomAccessIterator>::difference_type
+      Values = 1 + std::distance(IdxBegin, IdxEnd);
     return new(Values)
       GetElementPtrInst(Ptr, IdxBegin, IdxEnd, Values, NameStr, InsertAtEnd);
   }
@@ -391,9 +394,10 @@ public:
 
   /// Create an "inbounds" getelementptr. See the documentation for the
   /// "inbounds" flag in LangRef.html for details.
-  template<typename InputIterator>
-  static GetElementPtrInst *CreateInBounds(Value *Ptr, InputIterator IdxBegin,
-                                           InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  static GetElementPtrInst *CreateInBounds(Value *Ptr,
+                                           RandomAccessIterator IdxBegin,
+                                           RandomAccessIterator IdxEnd,
                                            const Twine &NameStr = "",
                                            Instruction *InsertBefore = 0) {
     GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd,
@@ -401,10 +405,10 @@ public:
     GEP->setIsInBounds(true);
     return GEP;
   }
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static GetElementPtrInst *CreateInBounds(Value *Ptr,
-                                           InputIterator IdxBegin,
-                                           InputIterator IdxEnd,
+                                           RandomAccessIterator IdxBegin,
+                                           RandomAccessIterator IdxEnd,
                                            const Twine &NameStr,
                                            BasicBlock *InsertAtEnd) {
     GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd,
@@ -441,18 +445,21 @@ public:
   /// Null is returned if the indices are invalid for the specified
   /// pointer type.
   ///
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static const Type *getIndexedType(const Type *Ptr,
-                                    InputIterator IdxBegin,
-                                    InputIterator IdxEnd) {
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd) {
     return getIndexedType(Ptr, IdxBegin, IdxEnd,
-                          typename std::iterator_traits<InputIterator>::
+                          typename std::iterator_traits<RandomAccessIterator>::
                           iterator_category());
   }
 
   static const Type *getIndexedType(const Type *Ptr,
                                     Value* const *Idx, unsigned NumIdx);
 
+  static const Type *getIndexedType(const Type *Ptr,
+                                    Constant* const *Idx, unsigned NumIdx);
+
   static const Type *getIndexedType(const Type *Ptr,
                                     uint64_t const *Idx, unsigned NumIdx);
 
@@ -472,7 +479,7 @@ public:
   static unsigned getPointerOperandIndex() {
     return 0U;                      // get index for modifying correct operand
   }
-  
+
   unsigned getPointerAddressSpace() const {
     return cast<PointerType>(getType())->getAddressSpace();
   }
@@ -520,13 +527,14 @@ public:
 };
 
 template <>
-struct OperandTraits<GetElementPtrInst> : public VariadicOperandTraits<1> {
+struct OperandTraits<GetElementPtrInst> :
+  public VariadicOperandTraits<GetElementPtrInst, 1> {
 };
 
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 GetElementPtrInst::GetElementPtrInst(Value *Ptr,
-                                     InputIterator IdxBegin,
-                                     InputIterator IdxEnd,
+                                     RandomAccessIterator IdxBegin,
+                                     RandomAccessIterator IdxEnd,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      Instruction *InsertBefore)
@@ -539,12 +547,13 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - Values,
                 Values, InsertBefore) {
   init(Ptr, IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 GetElementPtrInst::GetElementPtrInst(Value *Ptr,
-                                     InputIterator IdxBegin,
-                                     InputIterator IdxEnd,
+                                     RandomAccessIterator IdxBegin,
+                                     RandomAccessIterator IdxEnd,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      BasicBlock *InsertAtEnd)
@@ -557,7 +566,8 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - Values,
                 Values, InsertAtEnd) {
   init(Ptr, IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
 
 
@@ -575,7 +585,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 class ICmpInst: public CmpInst {
 protected:
   /// @brief Clone an indentical ICmpInst
-  virtual ICmpInst *clone_impl() const;  
+  virtual ICmpInst *clone_impl() const;
 public:
   /// @brief Constructor with insert-before-instruction semantics.
   ICmpInst(
@@ -746,7 +756,7 @@ public:
     assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
            "Invalid operand types for FCmp instruction");
   }
-  
+
   /// @brief Constructor with insert-at-end semantics.
   FCmpInst(
     BasicBlock &InsertAtEnd, ///< Block to insert into.
@@ -838,8 +848,10 @@ class CallInst : public Instruction {
   void init(Value *Func, Value *Actual);
   void init(Value *Func);
 
-  template<typename InputIterator>
-  void init(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
+  template<typename RandomAccessIterator>
+  void init(Value *Func,
+            RandomAccessIterator ArgBegin,
+            RandomAccessIterator ArgEnd,
             const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
@@ -851,24 +863,26 @@ class CallInst : public Instruction {
     setName(NameStr);
   }
 
-  /// Construct a CallInst given a range of arguments.  InputIterator
+  /// Construct a CallInst given a range of arguments. RandomAccessIterator
   /// must be a random-access iterator pointing to contiguous storage
-  /// (e.g. a std::vector<>::iterator).  Checks are made for
+  /// (e.g. a std::vector<>::iterator). Checks are made for
   /// random-accessness but not for contiguous storage as that would
   /// incur runtime overhead.
   /// @brief Construct a CallInst from a range of arguments
-  template<typename InputIterator>
-  CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
+  template<typename RandomAccessIterator>
+  CallInst(Value *Func,
+           RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
            const Twine &NameStr, Instruction *InsertBefore);
 
-  /// Construct a CallInst given a range of arguments.  InputIterator
+  /// Construct a CallInst given a range of arguments.  RandomAccessIterator
   /// must be a random-access iterator pointing to contiguous storage
   /// (e.g. a std::vector<>::iterator).  Checks are made for
   /// random-accessness but not for contiguous storage as that would
   /// incur runtime overhead.
   /// @brief Construct a CallInst from a range of arguments
-  template<typename InputIterator>
-  inline CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
+  template<typename RandomAccessIterator>
+  inline CallInst(Value *Func,
+                  RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
                   const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   CallInst(Value *F, Value *Actual, const Twine &NameStr,
@@ -881,17 +895,19 @@ class CallInst : public Instruction {
 protected:
   virtual CallInst *clone_impl() const;
 public:
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static CallInst *Create(Value *Func,
-                          InputIterator ArgBegin, InputIterator ArgEnd,
+                          RandomAccessIterator ArgBegin,
+                          RandomAccessIterator ArgEnd,
                           const Twine &NameStr = "",
                           Instruction *InsertBefore = 0) {
     return new(unsigned(ArgEnd - ArgBegin + 1))
       CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertBefore);
   }
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static CallInst *Create(Value *Func,
-                          InputIterator ArgBegin, InputIterator ArgEnd,
+                          RandomAccessIterator ArgBegin,
+                          RandomAccessIterator ArgEnd,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
     return new(unsigned(ArgEnd - ArgBegin + 1))
       CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertAtEnd);
@@ -984,7 +1000,7 @@ public:
   unsigned getParamAlignment(unsigned i) const {
     return AttributeList.getParamAlignment(i);
   }
-  
+
   /// @brief Return true if the call should not be inlined.
   bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); }
   void setIsNoInline(bool Value = true) {
@@ -1052,7 +1068,7 @@ public:
   void setCalledFunction(Value* Fn) {
     Op<-1>() = Fn;
   }
-  
+
   /// isInlineAsm - Check if this call is an inline asm statement.
   bool isInlineAsm() const {
     return isa<InlineAsm>(Op<-1>());
@@ -1075,11 +1091,12 @@ private:
 };
 
 template <>
-struct OperandTraits<CallInst> : public VariadicOperandTraits<1> {
+struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
 };
 
-template<typename InputIterator>
-CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
+template<typename RandomAccessIterator>
+CallInst::CallInst(Value *Func,
+                   RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
                    const Twine &NameStr, BasicBlock *InsertAtEnd)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
@@ -1087,11 +1104,13 @@ CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
                 OperandTraits<CallInst>::op_end(this) - (ArgEnd - ArgBegin + 1),
                 unsigned(ArgEnd - ArgBegin + 1), InsertAtEnd) {
   init(Func, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
 
-template<typename InputIterator>
-CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
+template<typename RandomAccessIterator>
+CallInst::CallInst(Value *Func,
+                   RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
                    const Twine &NameStr, Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
@@ -1099,7 +1118,8 @@ CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd,
                 OperandTraits<CallInst>::op_end(this) - (ArgEnd - ArgBegin + 1),
                 unsigned(ArgEnd - ArgBegin + 1), InsertBefore) {
   init(Func, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
 
 
@@ -1156,7 +1176,7 @@ public:
   Value *getCondition() { return Op<0>(); }
   Value *getTrueValue() { return Op<1>(); }
   Value *getFalseValue() { return Op<2>(); }
-  
+
   /// areInvalidOperands - Return a string if the specified operands are invalid
   /// for a select operation, otherwise return null.
   static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
@@ -1179,7 +1199,7 @@ public:
 };
 
 template <>
-struct OperandTraits<SelectInst> : public FixedNumOperandTraits<3> {
+struct OperandTraits<SelectInst> : public FixedNumOperandTraits<SelectInst, 3> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
@@ -1207,6 +1227,10 @@ public:
     setName(NameStr);
   }
 
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const VAArgInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -1252,12 +1276,12 @@ public:
   Value *getIndexOperand() { return Op<1>(); }
   const Value *getVectorOperand() const { return Op<0>(); }
   const Value *getIndexOperand() const { return Op<1>(); }
-  
+
   const VectorType *getVectorOperandType() const {
     return reinterpret_cast<const VectorType*>(getVectorOperand()->getType());
   }
-  
-  
+
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1272,7 +1296,8 @@ public:
 };
 
 template <>
-struct OperandTraits<ExtractElementInst> : public FixedNumOperandTraits<2> {
+struct OperandTraits<ExtractElementInst> :
+  public FixedNumOperandTraits<ExtractElementInst, 2> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
@@ -1330,7 +1355,8 @@ public:
 };
 
 template <>
-struct OperandTraits<InsertElementInst> : public FixedNumOperandTraits<3> {
+struct OperandTraits<InsertElementInst> :
+  public FixedNumOperandTraits<InsertElementInst, 3> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
@@ -1387,7 +1413,8 @@ public:
 };
 
 template <>
-struct OperandTraits<ShuffleVectorInst> : public FixedNumOperandTraits<3> {
+struct OperandTraits<ShuffleVectorInst> :
+  public FixedNumOperandTraits<ShuffleVectorInst, 3> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)
@@ -1407,8 +1434,9 @@ class ExtractValueInst : public UnaryInstruction {
             const Twine &NameStr);
   void init(unsigned Idx, const Twine &NameStr);
 
-  template<typename InputIterator>
-  void init(InputIterator IdxBegin, InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  void init(RandomAccessIterator IdxBegin,
+            RandomAccessIterator IdxEnd,
             const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
@@ -1429,16 +1457,15 @@ class ExtractValueInst : public UnaryInstruction {
   /// getIndexedType - Returns the type of the element that would be extracted
   /// with an extractvalue instruction with the specified parameters.
   ///
-  /// Null is returned if the indices are invalid for the specified
-  /// pointer type.
+  /// Null is returned if the indices are invalid for the specified type.
   ///
   static const Type *getIndexedType(const Type *Agg,
                                     const unsigned *Idx, unsigned NumIdx);
 
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static const Type *getIndexedType(const Type *Ptr,
-                                    InputIterator IdxBegin,
-                                    InputIterator IdxEnd,
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd,
                                     // This argument ensures that we
                                     // have an iterator we can do
                                     // arithmetic on in constant time
@@ -1456,14 +1483,16 @@ class ExtractValueInst : public UnaryInstruction {
   /// value and a list of indices.  The first ctor can optionally insert before
   /// an existing instruction, the second appends the new instruction to the
   /// specified BasicBlock.
-  template<typename InputIterator>
-  inline ExtractValueInst(Value *Agg, InputIterator IdxBegin,
-                          InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  inline ExtractValueInst(Value *Agg,
+                          RandomAccessIterator IdxBegin,
+                          RandomAccessIterator IdxEnd,
                           const Twine &NameStr,
                           Instruction *InsertBefore);
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   inline ExtractValueInst(Value *Agg,
-                          InputIterator IdxBegin, InputIterator IdxEnd,
+                          RandomAccessIterator IdxBegin,
+                          RandomAccessIterator IdxEnd,
                           const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   // allocate space for exactly one operand
@@ -1474,17 +1503,19 @@ protected:
   virtual ExtractValueInst *clone_impl() const;
 
 public:
-  template<typename InputIterator>
-  static ExtractValueInst *Create(Value *Agg, InputIterator IdxBegin,
-                                  InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  static ExtractValueInst *Create(Value *Agg,
+                                  RandomAccessIterator IdxBegin,
+                                  RandomAccessIterator IdxEnd,
                                   const Twine &NameStr = "",
                                   Instruction *InsertBefore = 0) {
     return new
       ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertBefore);
   }
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static ExtractValueInst *Create(Value *Agg,
-                                  InputIterator IdxBegin, InputIterator IdxEnd,
+                                  RandomAccessIterator IdxBegin,
+                                  RandomAccessIterator IdxEnd,
                                   const Twine &NameStr,
                                   BasicBlock *InsertAtEnd) {
     return new ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertAtEnd);
@@ -1509,15 +1540,14 @@ public:
   /// getIndexedType - Returns the type of the element that would be extracted
   /// with an extractvalue instruction with the specified parameters.
   ///
-  /// Null is returned if the indices are invalid for the specified
-  /// pointer type.
+  /// Null is returned if the indices are invalid for the specified type.
   ///
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static const Type *getIndexedType(const Type *Ptr,
-                                    InputIterator IdxBegin,
-                                    InputIterator IdxEnd) {
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd) {
     return getIndexedType(Ptr, IdxBegin, IdxEnd,
-                          typename std::iterator_traits<InputIterator>::
+                          typename std::iterator_traits<RandomAccessIterator>::
                           iterator_category());
   }
   static const Type *getIndexedType(const Type *Ptr, unsigned Idx);
@@ -1554,29 +1584,31 @@ public:
   }
 };
 
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 ExtractValueInst::ExtractValueInst(Value *Agg,
-                                   InputIterator IdxBegin,
-                                   InputIterator IdxEnd,
+                                   RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
                                    const Twine &NameStr,
                                    Instruction *InsertBefore)
   : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
                                               IdxBegin, IdxEnd)),
                      ExtractValue, Agg, InsertBefore) {
   init(IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 ExtractValueInst::ExtractValueInst(Value *Agg,
-                                   InputIterator IdxBegin,
-                                   InputIterator IdxEnd,
+                                   RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
                                    const Twine &NameStr,
                                    BasicBlock *InsertAtEnd)
   : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
                                               IdxBegin, IdxEnd)),
                      ExtractValue, Agg, InsertAtEnd) {
   init(IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
 
 
@@ -1596,9 +1628,9 @@ class InsertValueInst : public Instruction {
             const Twine &NameStr);
   void init(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr);
 
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   void init(Value *Agg, Value *Val,
-            InputIterator IdxBegin, InputIterator IdxEnd,
+            RandomAccessIterator IdxBegin, RandomAccessIterator IdxEnd,
             const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
@@ -1620,14 +1652,16 @@ class InsertValueInst : public Instruction {
   /// value, a value to insert, and a list of indices.  The first ctor can
   /// optionally insert before an existing instruction, the second appends
   /// the new instruction to the specified BasicBlock.
-  template<typename InputIterator>
-  inline InsertValueInst(Value *Agg, Value *Val, InputIterator IdxBegin,
-                         InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  inline InsertValueInst(Value *Agg, Value *Val,
+                         RandomAccessIterator IdxBegin,
+                         RandomAccessIterator IdxEnd,
                          const Twine &NameStr,
                          Instruction *InsertBefore);
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   inline InsertValueInst(Value *Agg, Value *Val,
-                         InputIterator IdxBegin, InputIterator IdxEnd,
+                         RandomAccessIterator IdxBegin,
+                         RandomAccessIterator IdxEnd,
                          const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   /// Constructors - These two constructors are convenience methods because one
@@ -1645,17 +1679,19 @@ public:
     return User::operator new(s, 2);
   }
 
-  template<typename InputIterator>
-  static InsertValueInst *Create(Value *Agg, Value *Val, InputIterator IdxBegin,
-                                 InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  static InsertValueInst *Create(Value *Agg, Value *Val,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
                                  const Twine &NameStr = "",
                                  Instruction *InsertBefore = 0) {
     return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
                                NameStr, InsertBefore);
   }
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static InsertValueInst *Create(Value *Agg, Value *Val,
-                                 InputIterator IdxBegin, InputIterator IdxEnd,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
                                  const Twine &NameStr,
                                  BasicBlock *InsertAtEnd) {
     return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
@@ -1722,34 +1758,37 @@ public:
 };
 
 template <>
-struct OperandTraits<InsertValueInst> : public FixedNumOperandTraits<2> {
+struct OperandTraits<InsertValueInst> :
+  public FixedNumOperandTraits<InsertValueInst, 2> {
 };
 
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
-                                 InputIterator IdxBegin,
-                                 InputIterator IdxEnd,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
                                  const Twine &NameStr,
                                  Instruction *InsertBefore)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
                 2, InsertBefore) {
   init(Agg, Val, IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
-                                 InputIterator IdxBegin,
-                                 InputIterator IdxEnd,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
                                  const Twine &NameStr,
                                  BasicBlock *InsertAtEnd)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
                 2, InsertAtEnd) {
   init(Agg, Val, IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
@@ -1835,7 +1874,7 @@ public:
   BasicBlock *getIncomingBlock(unsigned i) const {
     return cast<BasicBlock>(getOperand(i*2+1));
   }
-  
+
   /// getIncomingBlock - Return incoming basic block corresponding
   /// to an operand of the PHI.
   ///
@@ -1843,7 +1882,7 @@ public:
     assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
     return cast<BasicBlock>((&U + 1)->get());
   }
-  
+
   /// getIncomingBlock - Return incoming basic block corresponding
   /// to value use iterator.
   ///
@@ -1851,8 +1890,8 @@ public:
   BasicBlock *getIncomingBlock(value_use_iterator<U> I) const {
     return getIncomingBlock(I.getUse());
   }
-  
-  
+
+
   void setIncomingBlock(unsigned i, BasicBlock *BB) {
     setOperand(i*2+1, (Value*)BB);
   }
@@ -1912,13 +1951,7 @@ public:
 
   /// hasConstantValue - If the specified PHI node always merges together the
   /// same value, return the value, otherwise return null.
-  ///
-  /// If the PHI has undef operands, but all the rest of the operands are
-  /// some unique value, return that value if it can be proved that the
-  /// value dominates the PHI. If DT is null, use a conservative check,
-  /// otherwise use DT to test for dominance.
-  ///
-  Value *hasConstantValue(DominatorTree *DT = 0) const;
+  Value *hasConstantValue() const;
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const PHINode *) { return true; }
@@ -1985,11 +2018,9 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// Convenience accessor
-  Value *getReturnValue(unsigned n = 0) const {
-    return n < getNumOperands()
-      ? getOperand(n)
-      : 0;
+  /// Convenience accessor. Returns null if there is no return value.
+  Value *getReturnValue() const {
+    return getNumOperands() != 0 ? getOperand(0) : 0;
   }
 
   unsigned getNumSuccessors() const { return 0; }
@@ -2009,7 +2040,7 @@ public:
 };
 
 template <>
-struct OperandTraits<ReturnInst> : public VariadicOperandTraits<> {
+struct OperandTraits<ReturnInst> : public VariadicOperandTraits<ReturnInst> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
@@ -2045,22 +2076,20 @@ protected:
   virtual BranchInst *clone_impl() const;
 public:
   static BranchInst *Create(BasicBlock *IfTrue, Instruction *InsertBefore = 0) {
-    return new(1, true) BranchInst(IfTrue, InsertBefore);
+    return new(1) BranchInst(IfTrue, InsertBefore);
   }
   static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
                             Value *Cond, Instruction *InsertBefore = 0) {
     return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore);
   }
   static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) {
-    return new(1, true) BranchInst(IfTrue, InsertAtEnd);
+    return new(1) BranchInst(IfTrue, InsertAtEnd);
   }
   static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
                             Value *Cond, BasicBlock *InsertAtEnd) {
     return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertAtEnd);
   }
 
-  ~BranchInst();
-
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -2077,19 +2106,6 @@ public:
     Op<-3>() = V;
   }
 
-  // setUnconditionalDest - Change the current branch to an unconditional branch
-  // targeting the specified block.
-  // FIXME: Eliminate this ugly method.
-  void setUnconditionalDest(BasicBlock *Dest) {
-    Op<-1>() = (Value*)Dest;
-    if (isConditional()) {  // Convert this to an uncond branch.
-      Op<-2>() = 0;
-      Op<-3>() = 0;
-      NumOperands = 1;
-      OperandList = op_begin();
-    }
-  }
-
   unsigned getNumSuccessors() const { return 1+isConditional(); }
 
   BasicBlock *getSuccessor(unsigned i) const {
@@ -2117,7 +2133,8 @@ private:
 };
 
 template <>
-struct OperandTraits<BranchInst> : public VariadicOperandTraits<1> {};
+struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> {
+};
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
 
@@ -2136,7 +2153,7 @@ class SwitchInst : public TerminatorInst {
   // Operand[2n  ] = Value to match
   // Operand[2n+1] = BasicBlock to go to on match
   SwitchInst(const SwitchInst &SI);
-  void init(Value *Value, BasicBlock *Default, unsigned NumCases);
+  void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
   void resizeOperands(unsigned No);
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
@@ -2230,7 +2247,8 @@ public:
 
   /// removeCase - This method removes the specified successor from the switch
   /// instruction.  Note that this cannot be used to remove the default
-  /// destination (successor #0).
+  /// destination (successor #0). Also note that this operation may reorder the
+  /// remaining cases at index idx and above.
   ///
   void removeCase(unsigned idx);
 
@@ -2298,7 +2316,7 @@ class IndirectBrInst : public TerminatorInst {
   /// here to make memory allocation more efficient.  This constructor can also
   /// autoinsert before another instruction.
   IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore);
-  
+
   /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
   /// Address to jump to.  The number of expected destinations can be specified
   /// here to make memory allocation more efficient.  This constructor also
@@ -2316,32 +2334,32 @@ public:
     return new IndirectBrInst(Address, NumDests, InsertAtEnd);
   }
   ~IndirectBrInst();
-  
+
   /// Provide fast operand accessors.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-  
+
   // Accessor Methods for IndirectBrInst instruction.
   Value *getAddress() { return getOperand(0); }
   const Value *getAddress() const { return getOperand(0); }
   void setAddress(Value *V) { setOperand(0, V); }
-  
-  
+
+
   /// getNumDestinations - return the number of possible destinations in this
   /// indirectbr instruction.
   unsigned getNumDestinations() const { return getNumOperands()-1; }
-  
+
   /// getDestination - Return the specified destination.
   BasicBlock *getDestination(unsigned i) { return getSuccessor(i); }
   const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); }
-  
+
   /// addDestination - Add a destination.
   ///
   void addDestination(BasicBlock *Dest);
-  
+
   /// removeDestination - This method removes the specified successor from the
   /// indirectbr instruction.
   void removeDestination(unsigned i);
-  
+
   unsigned getNumSuccessors() const { return getNumOperands()-1; }
   BasicBlock *getSuccessor(unsigned i) const {
     return cast<BasicBlock>(getOperand(i+1));
@@ -2349,7 +2367,7 @@ public:
   void setSuccessor(unsigned i, BasicBlock *NewSucc) {
     setOperand(i+1, (Value*)NewSucc);
   }
-  
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const IndirectBrInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2369,8 +2387,8 @@ struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
-  
-  
+
+
 //===----------------------------------------------------------------------===//
 //                               InvokeInst Class
 //===----------------------------------------------------------------------===//
@@ -2384,9 +2402,9 @@ class InvokeInst : public TerminatorInst {
   void init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
             Value* const *Args, unsigned NumArgs);
 
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-            InputIterator ArgBegin, InputIterator ArgEnd,
+            RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
             const Twine &NameStr,
             // This argument ensures that we have an iterator we can
             // do arithmetic on in constant time
@@ -2399,47 +2417,49 @@ class InvokeInst : public TerminatorInst {
   }
 
   /// Construct an InvokeInst given a range of arguments.
-  /// InputIterator must be a random-access iterator pointing to
+  /// RandomAccessIterator must be a random-access iterator pointing to
   /// contiguous storage (e.g. a std::vector<>::iterator).  Checks are
   /// made for random-accessness but not for contiguous storage as
   /// that would incur runtime overhead.
   ///
   /// @brief Construct an InvokeInst from a range of arguments
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    InputIterator ArgBegin, InputIterator ArgEnd,
+                    RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
                     unsigned Values,
                     const Twine &NameStr, Instruction *InsertBefore);
 
   /// Construct an InvokeInst given a range of arguments.
-  /// InputIterator must be a random-access iterator pointing to
+  /// RandomAccessIterator must be a random-access iterator pointing to
   /// contiguous storage (e.g. a std::vector<>::iterator).  Checks are
   /// made for random-accessness but not for contiguous storage as
   /// that would incur runtime overhead.
   ///
   /// @brief Construct an InvokeInst from a range of arguments
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    InputIterator ArgBegin, InputIterator ArgEnd,
+                    RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
                     unsigned Values,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
 protected:
   virtual InvokeInst *clone_impl() const;
 public:
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
-                            InputIterator ArgBegin, InputIterator ArgEnd,
+                            RandomAccessIterator ArgBegin,
+                            RandomAccessIterator ArgEnd,
                             const Twine &NameStr = "",
                             Instruction *InsertBefore = 0) {
     unsigned Values(ArgEnd - ArgBegin + 3);
     return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
                                   Values, NameStr, InsertBefore);
   }
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
-                            InputIterator ArgBegin, InputIterator ArgEnd,
+                            RandomAccessIterator ArgBegin,
+                            RandomAccessIterator ArgEnd,
                             const Twine &NameStr,
                             BasicBlock *InsertAtEnd) {
     unsigned Values(ArgEnd - ArgBegin + 3);
@@ -2606,13 +2626,14 @@ private:
 };
 
 template <>
-struct OperandTraits<InvokeInst> : public VariadicOperandTraits<3> {
+struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
 };
 
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 InvokeInst::InvokeInst(Value *Func,
                        BasicBlock *IfNormal, BasicBlock *IfException,
-                       InputIterator ArgBegin, InputIterator ArgEnd,
+                       RandomAccessIterator ArgBegin,
+                       RandomAccessIterator ArgEnd,
                        unsigned Values,
                        const Twine &NameStr, Instruction *InsertBefore)
   : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
@@ -2621,12 +2642,14 @@ InvokeInst::InvokeInst(Value *Func,
                    OperandTraits<InvokeInst>::op_end(this) - Values,
                    Values, InsertBefore) {
   init(Func, IfNormal, IfException, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
-template<typename InputIterator>
+template<typename RandomAccessIterator>
 InvokeInst::InvokeInst(Value *Func,
                        BasicBlock *IfNormal, BasicBlock *IfException,
-                       InputIterator ArgBegin, InputIterator ArgEnd,
+                       RandomAccessIterator ArgBegin,
+                       RandomAccessIterator ArgEnd,
                        unsigned Values,
                        const Twine &NameStr, BasicBlock *InsertAtEnd)
   : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
@@ -2635,7 +2658,8 @@ InvokeInst::InvokeInst(Value *Func,
                    OperandTraits<InvokeInst>::op_end(this) - Values,
                    Values, InsertAtEnd) {
   init(Func, IfNormal, IfException, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<InputIterator>::iterator_category());
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
 }
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index a17fa9cc5bdd..74c30fbddd72 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -55,7 +55,7 @@ namespace llvm {
       return isa<CallInst>(V) && classof(cast<CallInst>(V));
     }
   };
-
+  
   /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
   ///
   class DbgInfoIntrinsic : public IntrinsicInst {
@@ -139,6 +139,10 @@ namespace llvm {
       return !getVolatileCst()->isZero();
     }
 
+    unsigned getAddressSpace() const {
+      return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
+    }
+
     /// getDest - This is just like getRawDest, but it strips off any cast
     /// instructions that feed it, giving the original input.  The returned
     /// value is guaranteed to be a pointer.
@@ -297,29 +301,6 @@ namespace llvm {
     }
   };
 
-  /// MemoryUseIntrinsic - This is the common base class for the memory use
-  /// marker intrinsics.
-  ///
-  class MemoryUseIntrinsic : public IntrinsicInst {
-  public:
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const MemoryUseIntrinsic *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      switch (I->getIntrinsicID()) {
-      case Intrinsic::lifetime_start:
-      case Intrinsic::lifetime_end:
-      case Intrinsic::invariant_start:
-      case Intrinsic::invariant_end:
-        return true;
-      default: return false;
-      }
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
 }
 
 #endif
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index fb4f750f87b5..0c9be78b0d10 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -109,6 +109,9 @@ def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
 def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
 def llvm_metadata_ty   : LLVMType<MetadataVT>;                    // !{...}
 
+def llvm_x86mmx_ty     : LLVMType<x86mmx>;
+def llvm_ptrx86mmx_ty  : LLVMPointerType<llvm_x86mmx_ty>;         // <1 x i64>*
+
 def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
 def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
 def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
@@ -256,7 +259,7 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
 
 // Internal interface for object size checking
 def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i1_ty],
-                               [IntrReadArgMem]>,
+                               [IntrNoMem]>,
                                GCCBuiltin<"__builtin_object_size">;
 
 //===-------------------- Bit Manipulation Intrinsics ---------------------===//
@@ -304,6 +307,7 @@ let Properties = [IntrNoMem] in {
   def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
   def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>;
 }
+def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_ptr_ty]>;
 def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
 
@@ -432,7 +436,7 @@ def int_lifetime_end    : Intrinsic<[],
                                     [IntrReadWriteArgMem, NoCapture<1>]>;
 def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
                                     [llvm_i64_ty, llvm_ptr_ty],
-                                    [IntrReadArgMem, NoCapture<1>]>;
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
 def int_invariant_end   : Intrinsic<[],
                                     [llvm_descriptor_ty, llvm_i64_ty,
                                      llvm_ptr_ty],
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index 6c047718e6f6..546538a57abd 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -286,6 +286,12 @@ def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
 def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
 def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
 
+// Vector Conversions Between Half-Precision and Single-Precision.
+def int_arm_neon_vcvtfp2hf
+    : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+def int_arm_neon_vcvthf2fp
+    : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
+
 // Narrowing Saturating Vector Moves.
 def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
 def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 06ea3ae3b518..49462200f093 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -130,12 +130,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i64_ty], [IntrNoMem]>;
   def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 }
 
 // SIMD load ops
@@ -445,11 +445,11 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
 }
 
 // SIMD load ops
@@ -563,50 +563,50 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Horizontal arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_ssse3_phadd_w         : GCCBuiltin<"__builtin_ia32_phaddw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phadd_w_128     : GCCBuiltin<"__builtin_ia32_phaddw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_phadd_d         : GCCBuiltin<"__builtin_ia32_phaddd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phadd_d_128     : GCCBuiltin<"__builtin_ia32_phaddd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_phadd_sw        : GCCBuiltin<"__builtin_ia32_phaddsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phadd_sw_128    : GCCBuiltin<"__builtin_ia32_phaddsw128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_phsub_w         : GCCBuiltin<"__builtin_ia32_phsubw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phsub_w_128     : GCCBuiltin<"__builtin_ia32_phsubw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_phsub_d         : GCCBuiltin<"__builtin_ia32_phsubd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phsub_d_128     : GCCBuiltin<"__builtin_ia32_phsubd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_phsub_sw        : GCCBuiltin<"__builtin_ia32_phsubsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phsub_sw_128    : GCCBuiltin<"__builtin_ia32_phsubsw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_pmadd_ub_sw     : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
@@ -615,8 +615,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Packed multiply high with round and scale
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_ssse3_pmul_hr_sw      : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_ssse3_pmul_hr_sw_128  : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem, Commutative]>;
@@ -625,35 +625,35 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Shuffle ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_ssse3_pshuf_b         : GCCBuiltin<"__builtin_ia32_pshufb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_pshuf_b_128     : GCCBuiltin<"__builtin_ia32_pshufb128">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
                          llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_ssse3_pshuf_w         : GCCBuiltin<"__builtin_ia32_pshufw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_i32_ty],
+  def int_x86_sse_pshuf_w           : GCCBuiltin<"__builtin_ia32_pshufw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
                          [IntrNoMem]>;
 }
 
 // Sign ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_ssse3_psign_b         : GCCBuiltin<"__builtin_ia32_psignb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_psign_b_128     : GCCBuiltin<"__builtin_ia32_psignb128">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
                          llvm_v16i8_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_psign_w         : GCCBuiltin<"__builtin_ia32_psignw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_psign_w_128     : GCCBuiltin<"__builtin_ia32_psignw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_psign_d         : GCCBuiltin<"__builtin_ia32_psignd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_psign_d_128     : GCCBuiltin<"__builtin_ia32_psignd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
@@ -662,17 +662,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Absolute value ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_ssse3_pabs_b     : GCCBuiltin<"__builtin_ia32_pabsb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_pabs_w     : GCCBuiltin<"__builtin_ia32_pabsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_pabs_d     : GCCBuiltin<"__builtin_ia32_pabsd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
 }
@@ -1328,281 +1328,257 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // Addition
   def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
 
   def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   // Subtraction
   def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
 
   def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 
   // Multiplication
   def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   // Bitwise operations
   def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
 
   // Averages
   def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   // Maximum
   def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   // Minimum
   def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   // Packed sum of absolute differences
   def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 }
 
 // Integer shift ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // Shift left logical
   def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v1i64_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 
   def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
   def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Pack ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 }
 
 // Unpacking ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
 }
 
 // Integer comparison ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 
   def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
-              Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">,
-              Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">,
-              Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
 }
 
 // Misc.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
-              Intrinsic<[], [llvm_v8i8_ty, llvm_v8i8_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
 
   def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
-              Intrinsic<[llvm_i32_ty], [llvm_v8i8_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
 
   def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
-              Intrinsic<[], [llvm_ptr_ty, llvm_v1i64_ty], []>;
+              Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
 
-//   def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
-//               Intrinsic<[llvm_v1i64_ty], [llvm_1i64_ty, 
-//                          llvm_v1i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, 
+                        llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_mmx_pextr_w :
-              Intrinsic<[llvm_i32_ty], [llvm_v1i64_ty, llvm_i32_ty], 
+  def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], 
                         [IntrNoMem]>;
 
-  def int_x86_mmx_pinsr_w :
-              Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, 
+  def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, 
                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_cvtsi32_si64 : 
-              Intrinsic<[llvm_v1i64_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_mmx_cvtsi64_si32 : 
-              Intrinsic<[llvm_i32_ty], [llvm_v1i64_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_vec_init_b : GCCBuiltin<"__builtin_ia32_vec_init_v8qi">,
-              Intrinsic<[llvm_v8i8_ty],
-                        [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty, llvm_i8_ty,
-                         llvm_i8_ty, llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_vec_init_w : GCCBuiltin<"__builtin_ia32_vec_init_v4hi">,
-              Intrinsic<[llvm_v4i16_ty],
-                        [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_vec_init_d : GCCBuiltin<"__builtin_ia32_vec_init_v2si">,
-              Intrinsic<[llvm_v2i32_ty],
-                        [llvm_i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_mmx_vec_ext_d : GCCBuiltin<"__builtin_ia32_vec_ext_v2si">,
-              Intrinsic<[llvm_v2i32_ty],
-                        [llvm_v2i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
 }
diff --git a/include/llvm/IntrinsicsXCore.td b/include/llvm/IntrinsicsXCore.td
index a86cda28a5eb..97bac1d2daaf 100644
--- a/include/llvm/IntrinsicsXCore.td
+++ b/include/llvm/IntrinsicsXCore.td
@@ -11,4 +11,26 @@
 let TargetPrefix = "xcore" in {  // All intrinsics start with "llvm.xcore.".
   def int_xcore_bitrev : Intrinsic<[llvm_i32_ty],[llvm_i32_ty],[IntrNoMem]>;
   def int_xcore_getid : Intrinsic<[llvm_i32_ty],[],[IntrNoMem]>;
+
+  // Resource instructions.
+  def int_xcore_getr : Intrinsic<[llvm_anyptr_ty],[llvm_i32_ty]>;
+  def int_xcore_freer : Intrinsic<[],[llvm_anyptr_ty],
+                                   [NoCapture<0>]>;
+  def int_xcore_in : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],[NoCapture<0>]>;
+  def int_xcore_int : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],
+                                [NoCapture<0>]>;
+  def int_xcore_inct : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],
+                                 [NoCapture<0>]>;
+  def int_xcore_out : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                [NoCapture<0>]>;
+  def int_xcore_outt : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                 [NoCapture<0>]>;
+  def int_xcore_outct : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_chkct : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_setd : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_setc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
 }
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
index 7cb6579aef66..3502ff73c19f 100644
--- a/include/llvm/LLVMContext.h
+++ b/include/llvm/LLVMContext.h
@@ -20,6 +20,8 @@ namespace llvm {
 class LLVMContextImpl;
 class StringRef;
 class Instruction;
+class Module;
+class SMDiagnostic;
 template <typename T> class SmallVectorImpl;
 
 /// This is an important class for using LLVM in a threaded context.  It
@@ -28,10 +30,6 @@ template <typename T> class SmallVectorImpl;
 /// LLVMContext itself provides no locking guarantees, so you should be careful
 /// to have one context per thread.
 class LLVMContext {
-  // DO NOT IMPLEMENT
-  LLVMContext(LLVMContext&);
-  void operator=(LLVMContext&);
-
 public:
   LLVMContextImpl *const pImpl;
   LLVMContext();
@@ -40,7 +38,8 @@ public:
   // Pinned metadata names, which always have the same value.  This is a
   // compile-time performance optimization, not a correctness optimization.
   enum {
-    MD_dbg = 0   // "dbg"
+    MD_dbg = 0,  // "dbg"
+    MD_tbaa = 1  // "tbaa"
   };
   
   /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -51,18 +50,23 @@ public:
   /// custom metadata IDs registered in this LLVMContext.
   void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
   
+  
+  typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
+                                         unsigned LocCookie);
+  
   /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked
   /// when problems with inline asm are detected by the backend.  The first
-  /// argument is a function pointer (of type SourceMgr::DiagHandlerTy) and the
-  /// second is a context pointer that gets passed into the DiagHandler.
+  /// argument is a function pointer and the second is a context pointer that
+  /// gets passed into the DiagHandler.
   ///
-  /// LLVMContext doesn't take ownership or interpreter either of these
+  /// LLVMContext doesn't take ownership or interpret either of these
   /// pointers.
-  void setInlineAsmDiagnosticHandler(void *DiagHandler, void *DiagContext = 0);
+  void setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                                     void *DiagContext = 0);
 
   /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
   /// setInlineAsmDiagnosticHandler.
-  void *getInlineAsmDiagnosticHandler() const;
+  InlineAsmDiagHandlerTy getInlineAsmDiagnosticHandler() const;
 
   /// getInlineAsmDiagnosticContext - Return the diagnostic context set by
   /// setInlineAsmDiagnosticHandler.
@@ -77,6 +81,21 @@ public:
   void emitError(unsigned LocCookie, StringRef ErrorStr);
   void emitError(const Instruction *I, StringRef ErrorStr);
   void emitError(StringRef ErrorStr);
+
+private:
+  // DO NOT IMPLEMENT
+  LLVMContext(LLVMContext&);
+  void operator=(LLVMContext&);
+
+  /// addModule - Register a module as being instantiated in this context.  If
+  /// the context is deleted, the module will be deleted as well.
+  void addModule(Module*);
+  
+  /// removeModule - Unregister a module from this context.
+  void removeModule(Module*);
+  
+  // Module needs access to the add/removeModule methods.
+  friend class Module;
 };
 
 /// getGlobalContext - Returns a global context.  This is for LLVM clients that
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 35dab62143df..69e1bd919f74 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This header file pulls in all transformation and analysis passes for tools 
+// This header file pulls in all transformation and analysis passes for tools
 // like opt and bugpoint that need this functionality.
 //
 //===----------------------------------------------------------------------===//
@@ -20,8 +20,8 @@
 #include "llvm/Analysis/FindUsedTypes.h"
 #include "llvm/Analysis/IntervalPartition.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PointerTracking.h"
 #include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/RegionPass.h"
 #include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/Lint.h"
@@ -70,6 +70,7 @@ namespace {
       (void) llvm::createDomViewerPass();
       (void) llvm::createEdgeProfilerPass();
       (void) llvm::createOptimalEdgeProfilerPass();
+      (void) llvm::createPathProfilerPass();
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
       (void) llvm::createGlobalDCEPass();
@@ -90,8 +91,8 @@ namespace {
       (void) llvm::createLoopStrengthReducePass();
       (void) llvm::createLoopUnrollPass();
       (void) llvm::createLoopUnswitchPass();
+      (void) llvm::createLoopIdiomPass();
       (void) llvm::createLoopRotatePass();
-      (void) llvm::createLoopIndexSplitPass();
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerSetJmpPass();
       (void) llvm::createLowerSwitchPass();
@@ -99,7 +100,9 @@ namespace {
       (void) llvm::createNoProfileInfoPass();
       (void) llvm::createProfileEstimatorPass();
       (void) llvm::createProfileVerifierPass();
+      (void) llvm::createPathProfileVerifierPass();
       (void) llvm::createProfileLoaderPass();
+      (void) llvm::createPathProfileLoaderPass();
       (void) llvm::createPromoteMemoryToRegisterPass();
       (void) llvm::createDemoteRegisterToMemoryPass();
       (void) llvm::createPruneEHPass();
@@ -128,13 +131,13 @@ namespace {
       (void) llvm::createUnifyFunctionExitNodesPass();
       (void) llvm::createInstCountPass();
       (void) llvm::createCodeGenPreparePass();
+      (void) llvm::createEarlyCSEPass();
       (void) llvm::createGVNPass();
       (void) llvm::createMemCpyOptPass();
       (void) llvm::createLoopDeletionPass();
       (void) llvm::createPostDomTree();
       (void) llvm::createPostDomFrontier();
       (void) llvm::createInstructionNamerPass();
-      (void) llvm::createPartialSpecializationPass();
       (void) llvm::createFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
       (void) llvm::createPrintModulePass(0);
@@ -147,14 +150,17 @@ namespace {
       (void) llvm::createSinkingPass();
       (void) llvm::createLowerAtomicPass();
       (void) llvm::createCorrelatedValuePropagationPass();
+      (void) llvm::createMemDepPrinter();
+      (void) llvm::createInstructionSimplifierPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
       (void)new llvm::ScalarEvolution();
-      (void)new llvm::PointerTracking();
       ((llvm::Function*)0)->viewCFGOnly();
+      llvm::RGPassManager RGM(0);
+      ((llvm::RegionPass*)0)->runOnRegion((llvm::Region*)0, RGM);
       llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0);
-      X.add((llvm::Value*)0, 0);  // for -print-alias-sets
+      X.add((llvm::Value*)0, 0, 0);  // for -print-alias-sets
     }
   } ForcePassLinking; // Force link by creating a global definition.
 }
diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h
index 6959cb6d1efc..83684c0fb65d 100644
--- a/include/llvm/LinkAllVMCore.h
+++ b/include/llvm/LinkAllVMCore.h
@@ -22,15 +22,14 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/System/Alarm.h"
-#include "llvm/System/DynamicLibrary.h"
-#include "llvm/System/Memory.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Program.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/TimeValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/MathExtras.h"
 #include <cstdlib>
diff --git a/include/llvm/MC/EDInstInfo.h b/include/llvm/MC/EDInstInfo.h
index dded25521a27..83d9e780feb7 100644
--- a/include/llvm/MC/EDInstInfo.h
+++ b/include/llvm/MC/EDInstInfo.h
@@ -9,7 +9,7 @@
 #ifndef EDINSTINFO_H
 #define EDINSTINFO_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   
diff --git a/include/llvm/MC/ELFObjectWriter.h b/include/llvm/MC/ELFObjectWriter.h
deleted file mode 100644
index 3b9951f4e7ab..000000000000
--- a/include/llvm/MC/ELFObjectWriter.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- llvm/MC/ELFObjectWriter.h - ELF File Writer ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_ELFOBJECTWRITER_H
-#define LLVM_MC_ELFOBJECTWRITER_H
-
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-namespace llvm {
-class MCAsmFixup;
-class MCAssembler;
-class MCFragment;
-class MCValue;
-class raw_ostream;
-
-class ELFObjectWriter : public MCObjectWriter {
-  void *Impl;
-
-public:
-  ELFObjectWriter(raw_ostream &OS, bool Is64Bit, bool IsLittleEndian = true,
-                  bool HasRelocationAddend = true);
-
-  virtual ~ELFObjectWriter();
-
-  virtual void ExecutePostLayoutBinding(MCAssembler &Asm);
-
-  virtual void RecordRelocation(const MCAssembler &Asm,
-                                const MCAsmLayout &Layout,
-                                const MCFragment *Fragment,
-                                const MCFixup &Fixup, MCValue Target,
-                                uint64_t &FixedValue);
-
-  virtual void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 43952e0845da..9cfd0048a607 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -22,10 +22,12 @@
 namespace llvm {
   class MCSection;
   class MCContext;
-  
+
   /// MCAsmInfo - This class is intended to be used as a base class for asm
   /// properties and features specific to the target.
-  namespace ExceptionHandling { enum ExceptionsType { None, Dwarf, SjLj }; }
+  namespace ExceptionHandling {
+    enum ExceptionsType { None, DwarfTable, DwarfCFI, SjLj };
+  }
 
   class MCAsmInfo {
   protected:
@@ -36,25 +38,30 @@ namespace llvm {
     /// HasSubsectionsViaSymbols - True if this target has the MachO
     /// .subsections_via_symbols directive.
     bool HasSubsectionsViaSymbols;           // Default is false.
-    
+
     /// HasMachoZeroFillDirective - True if this is a MachO target that supports
     /// the macho-specific .zerofill directive for emitting BSS Symbols.
     bool HasMachoZeroFillDirective;               // Default is false.
-    
+
     /// HasMachoTBSSDirective - True if this is a MachO target that supports
     /// the macho-specific .tbss directive for emitting thread local BSS Symbols
     bool HasMachoTBSSDirective;                 // Default is false.
-    
+
     /// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should
     /// emit a ".reference .constructors_used" or ".reference .destructors_used"
     /// directive after the a static ctor/dtor list.  This directive is only
     /// emitted in Static relocation model.
     bool HasStaticCtorDtorReferenceInStaticMode;  // Default is false.
-    
+
+    /// LinkerRequiresNonEmptyDwarfLines - True if the linker has a bug and
+    /// requires that the debug_line section be of a minimum size. In practice
+    /// such a linker requires a non empty line sequence if a file is present.
+    bool LinkerRequiresNonEmptyDwarfLines; // Default to false.
+
     /// MaxInstLength - This is the maximum possible length of an instruction,
     /// which is needed to compute the size of an inline asm.
     unsigned MaxInstLength;                  // Defaults to 4.
-    
+
     /// PCSymbol - The symbol used to represent the current PC.  Used in PC
     /// relative expressions.
     const char *PCSymbol;                    // Defaults to "$".
@@ -72,6 +79,9 @@ namespace llvm {
     /// assembler.
     const char *CommentString;               // Defaults to "#"
 
+    /// LabelSuffix - This is appended to emitted labels.
+    const char *LabelSuffix;                 // Defaults to ":"
+
     /// GlobalPrefix - If this is set to a non-empty string, it is prepended
     /// onto all global symbols.  This is often used for "_" or ".".
     const char *GlobalPrefix;                // Defaults to ""
@@ -80,12 +90,12 @@ namespace llvm {
     /// pool entries that are completely private to the .s file and should not
     /// have names in the .o file.  This is often "." or "L".
     const char *PrivateGlobalPrefix;         // Defaults to "."
-    
+
     /// LinkerPrivateGlobalPrefix - This prefix is used for symbols that should
     /// be passed through the assembler but be removed by the linker.  This
     /// is "l" on Darwin, currently used for some ObjC metadata.
     const char *LinkerPrivateGlobalPrefix;   // Defaults to ""
-    
+
     /// InlineAsmStart/End - If these are nonempty, they contain a directive to
     /// emit before and after an inline assembly statement.
     const char *InlineAsmStart;              // Defaults to "#APP\n"
@@ -117,7 +127,7 @@ namespace llvm {
     /// AsciiDirective - This directive allows emission of an ascii string with
     /// the standard C escape characters embedded into it.
     const char *AsciiDirective;              // Defaults to "\t.ascii\t"
-    
+
     /// AscizDirective - If not null, this allows for special handling of
     /// zero terminated strings on this target.  This is commonly supported as
     /// ".asciz".  If a target doesn't support this, it can be set to null.
@@ -135,7 +145,7 @@ namespace llvm {
     /// which should be relocated as a 32-bit GP-relative offset, e.g. .gpword
     /// on Mips or .gprel32 on Alpha.
     const char *GPRel32Directive;            // Defaults to NULL.
-    
+
     /// getDataASDirective - Return the directive that should be used to emit
     /// data of the specified size to the specified numeric address space.
     virtual const char *getDataASDirective(unsigned Size, unsigned AS) const {
@@ -149,15 +159,15 @@ namespace llvm {
     bool SunStyleELFSectionSwitchSyntax;     // Defaults to false.
 
     /// UsesELFSectionDirectiveForBSS - This is true if this target uses ELF
-    /// '.section' directive before the '.bss' one. It's used for PPC/Linux 
+    /// '.section' directive before the '.bss' one. It's used for PPC/Linux
     /// which doesn't support the '.bss' directive only.
     bool UsesELFSectionDirectiveForBSS;      // Defaults to false.
-    
+
     /// HasMicrosoftFastStdCallMangling - True if this target uses microsoft
     /// style mangling for functions with X86_StdCall/X86_FastCall calling
     /// convention.
     bool HasMicrosoftFastStdCallMangling;    // Defaults to false.
-    
+
     //===--- Alignment Information ----------------------------------------===//
 
     /// AlignDirective - The directive used to emit round up to an alignment
@@ -176,27 +186,34 @@ namespace llvm {
     unsigned TextAlignFillValue;             // Defaults to 0
 
     //===--- Global Variable Emission Directives --------------------------===//
-    
+
     /// GlobalDirective - This is the directive used to declare a global entity.
     ///
     const char *GlobalDirective;             // Defaults to NULL.
 
-    /// ExternDirective - This is the directive used to declare external 
+    /// ExternDirective - This is the directive used to declare external
     /// globals.
     ///
     const char *ExternDirective;             // Defaults to NULL.
-    
+
     /// HasSetDirective - True if the assembler supports the .set directive.
     bool HasSetDirective;                    // Defaults to true.
-    
+
+    /// HasAggressiveSymbolFolding - False if the assembler requires that we use
+    /// Lc = a - b
+    /// .long Lc
+    /// instead of
+    /// .long a - b
+    bool HasAggressiveSymbolFolding;           // Defaults to true.
+
     /// HasLCOMMDirective - This is true if the target supports the .lcomm
     /// directive.
     bool HasLCOMMDirective;                  // Defaults to false.
-    
+
     /// COMMDirectiveAlignmentIsInBytes - True is COMMDirective's optional
     /// alignment is to be specified in bytes instead of log2(n).
     bool COMMDirectiveAlignmentIsInBytes;    // Defaults to true;
-    
+
     /// HasDotTypeDotSizeDirective - True if the target has .type and .size
     /// directives, this is true for most ELF targets.
     bool HasDotTypeDotSizeDirective;         // Defaults to true.
@@ -209,10 +226,14 @@ namespace llvm {
     /// directive.
     bool HasNoDeadStrip;                     // Defaults to false.
 
+    /// HasSymbolResolver - True if this target supports the MachO
+    /// .symbol_resolver directive.
+    bool HasSymbolResolver;                     // Defaults to false.
+
     /// WeakRefDirective - This directive, if non-null, is used to declare a
     /// global as being a weak undefined symbol.
     const char *WeakRefDirective;            // Defaults to NULL.
-    
+
     /// WeakDefDirective - This directive, if non-null, is used to declare a
     /// global as being a weak defined symbol.
     const char *WeakDefDirective;            // Defaults to NULL.
@@ -220,7 +241,7 @@ namespace llvm {
     /// LinkOnceDirective - This directive, if non-null is used to declare a
     /// global as being a weak defined symbol.  This is used on cygwin/mingw.
     const char *LinkOnceDirective;           // Defaults to NULL.
-    
+
     /// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to
     /// declare a symbol as having hidden visibility.
     MCSymbolAttr HiddenVisibilityAttr;       // Defaults to MCSA_Hidden.
@@ -234,10 +255,6 @@ namespace llvm {
     /// HasLEB128 - True if target asm supports leb128 directives.
     bool HasLEB128;                          // Defaults to false.
 
-    /// hasDotLocAndDotFile - True if target asm supports .loc and .file
-    /// directives for emitting debugging information.
-    bool HasDotLocAndDotFile;                // Defaults to false.
-
     /// SupportsDebugInformation - True if target supports emission of debugging
     /// information.
     bool SupportsDebugInformation;           // Defaults to false.
@@ -254,8 +271,8 @@ namespace llvm {
 
     /// DwarfSectionOffsetDirective - Special section offset directive.
     const char* DwarfSectionOffsetDirective; // Defaults to NULL
-    
-    /// DwarfUsesAbsoluteLabelForStmtList - True if DW_AT_stmt_list needs 
+
+    /// DwarfUsesAbsoluteLabelForStmtList - True if DW_AT_stmt_list needs
     /// absolute label instead of offset.
     bool DwarfUsesAbsoluteLabelForStmtList;  // Defaults to true;
 
@@ -276,7 +293,7 @@ namespace llvm {
     static unsigned getULEB128Size(unsigned Value);
 
     bool hasSubsectionsViaSymbols() const { return HasSubsectionsViaSymbols; }
-    
+
     // Data directive accessors.
     //
     const char *getData8bitsDirective(unsigned AS = 0) const {
@@ -299,11 +316,11 @@ namespace llvm {
     virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const{
       return 0;
     }
-    
+
     bool usesSunStyleELFSectionSwitchSyntax() const {
       return SunStyleELFSectionSwitchSyntax;
     }
-    
+
     bool usesELFSectionDirectiveForBSS() const {
       return UsesELFSectionDirectiveForBSS;
     }
@@ -311,7 +328,7 @@ namespace llvm {
     bool hasMicrosoftFastStdCallMangling() const {
       return HasMicrosoftFastStdCallMangling;
     }
-    
+
     // Accessors.
     //
     bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
@@ -319,6 +336,9 @@ namespace llvm {
     bool hasStaticCtorDtorReferenceInStaticMode() const {
       return HasStaticCtorDtorReferenceInStaticMode;
     }
+    bool getLinkerRequiresNonEmptyDwarfLines() const {
+      return LinkerRequiresNonEmptyDwarfLines;
+    }
     unsigned getMaxInstLength() const {
       return MaxInstLength;
     }
@@ -334,6 +354,9 @@ namespace llvm {
     const char *getCommentString() const {
       return CommentString;
     }
+    const char *getLabelSuffix() const {
+      return LabelSuffix;
+    }
     const char *getGlobalPrefix() const {
       return GlobalPrefix;
     }
@@ -386,6 +409,9 @@ namespace llvm {
       return ExternDirective;
     }
     bool hasSetDirective() const { return HasSetDirective; }
+    bool hasAggressiveSymbolFolding() const {
+      return HasAggressiveSymbolFolding;
+    }
     bool hasLCOMMDirective() const { return HasLCOMMDirective; }
     bool hasDotTypeDotSizeDirective() const {return HasDotTypeDotSizeDirective;}
     bool getCOMMDirectiveAlignmentIsInBytes() const {
@@ -393,10 +419,11 @@ namespace llvm {
     }
     bool hasSingleParameterDotFile() const { return HasSingleParameterDotFile; }
     bool hasNoDeadStrip() const { return HasNoDeadStrip; }
+    bool hasSymbolResolver() const { return HasSymbolResolver; }
     const char *getWeakRefDirective() const { return WeakRefDirective; }
     const char *getWeakDefDirective() const { return WeakDefDirective; }
     const char *getLinkOnceDirective() const { return LinkOnceDirective; }
-    
+
     MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;}
     MCSymbolAttr getProtectedVisibilityAttr() const {
       return ProtectedVisibilityAttr;
@@ -404,9 +431,6 @@ namespace llvm {
     bool hasLEB128() const {
       return HasLEB128;
     }
-    bool hasDotLocAndDotFile() const {
-      return HasDotLocAndDotFile;
-    }
     bool doesSupportDebugInformation() const {
       return SupportsDebugInformation;
     }
@@ -416,6 +440,12 @@ namespace llvm {
     ExceptionHandling::ExceptionsType getExceptionHandlingType() const {
       return ExceptionsType;
     }
+    bool isExceptionHandlingDwarf() const {
+      return
+        (ExceptionsType == ExceptionHandling::DwarfTable ||
+         ExceptionsType == ExceptionHandling::DwarfCFI);
+    }
+
     bool doesDwarfRequireFrameSection() const {
       return DwarfRequiresFrameSection;
     }
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index b9565ba06197..01cb0006b362 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -39,13 +39,12 @@ private:
   /// The last fragment which was layed out, or 0 if nothing has been layed
   /// out. Fragments are always layed out in order, so all fragments with a
   /// lower ordinal will be up to date.
-  mutable MCFragment *LastValidFragment;
+  mutable DenseMap<const MCSectionData*, MCFragment *> LastValidFragment;
 
   /// \brief Make sure that the layout for the given fragment is valid, lazily
   /// computing it if necessary.
   void EnsureValid(const MCFragment *F) const;
 
-  bool isSectionUpToDate(const MCSectionData *SD) const;
   bool isFragmentUpToDate(const MCFragment *F) const;
 
 public:
@@ -54,27 +53,15 @@ public:
   /// Get the assembler object this is a layout for.
   MCAssembler &getAssembler() const { return Assembler; }
 
-  /// \brief Update the layout because a fragment has been resized. The
-  /// fragments size should have already been updated, the \arg SlideAmount is
-  /// the delta from the old size.
-  void UpdateForSlide(MCFragment *F, int SlideAmount);
-
-  /// \brief Update the layout because a fragment has been replaced.
-  void FragmentReplaced(MCFragment *Src, MCFragment *Dst);
-
-  /// \brief Perform a full layout.
-  void LayoutFile();
+  /// \brief Invalidate all following fragments because a fragment has been
+  /// resized. The fragments size should have already been updated.
+  void Invalidate(MCFragment *F);
 
   /// \brief Perform layout for a single fragment, assuming that the previous
   /// fragment has already been layed out correctly, and the parent section has
   /// been initialized.
   void LayoutFragment(MCFragment *Fragment);
 
-  /// \brief Performs initial layout for a single section, assuming that the
-  /// previous section (including its fragments) has already been layed out
-  /// correctly.
-  void LayoutSection(MCSectionData *SD);
-
   /// @name Section Access (in layout order)
   /// @{
 
@@ -89,28 +76,13 @@ public:
   /// @name Fragment Layout Data
   /// @{
 
-  /// \brief Get the effective size of the given fragment, as computed in the
-  /// current layout.
-  uint64_t getFragmentEffectiveSize(const MCFragment *F) const;
-
   /// \brief Get the offset of the given fragment inside its containing section.
   uint64_t getFragmentOffset(const MCFragment *F) const;
 
-  /// @}
-  /// @name Section Layout Data
-  /// @{
-
-  /// \brief Get the computed address of the given section.
-  uint64_t getSectionAddress(const MCSectionData *SD) const;
-
   /// @}
   /// @name Utility Functions
   /// @{
 
-  /// \brief Get the address of the given fragment, as computed in the current
-  /// layout.
-  uint64_t getFragmentAddress(const MCFragment *F) const;
-
   /// \brief Get the address space size of the given section, as it effects
   /// layout. This may differ from the size reported by \see getSectionSize() by
   /// not including section tail padding.
@@ -120,12 +92,9 @@ public:
   /// file. This may include additional padding, or be 0 for virtual sections.
   uint64_t getSectionFileSize(const MCSectionData *SD) const;
 
-  /// \brief Get the logical data size of the given section.
-  uint64_t getSectionSize(const MCSectionData *SD) const;
-
-  /// \brief Get the address of the given symbol, as computed in the current
+  /// \brief Get the offset of the given symbol, as computed in the current
   /// layout.
-  uint64_t getSymbolAddress(const MCSymbolData *SD) const;
+  uint64_t getSymbolOffset(const MCSymbolData *SD) const;
 
   /// @}
 };
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index d193b986a934..30971c62a97e 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -11,13 +11,14 @@
 #define LLVM_MC_MCASSEMBLER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <vector> // FIXME: Shouldn't be needed.
 
 namespace llvm {
@@ -49,7 +50,10 @@ public:
     FT_Data,
     FT_Fill,
     FT_Inst,
-    FT_Org
+    FT_Org,
+    FT_Dwarf,
+    FT_DwarfFrame,
+    FT_LEB
   };
 
 private:
@@ -72,12 +76,7 @@ private:
   /// initialized.
   uint64_t Offset;
 
-  /// EffectiveSize - The compute size of this section. This is ~0 until
-  /// initialized.
-  uint64_t EffectiveSize;
-
-  /// LayoutOrder - The global layout order of this fragment. This is the index
-  /// across all fragments in the file, not just within the section.
+  /// LayoutOrder - The layout order of this fragment.
   unsigned LayoutOrder;
 
   /// @}
@@ -234,19 +233,12 @@ class MCAlignFragment : public MCFragment {
   /// target dependent.
   bool EmitNops : 1;
 
-  /// OnlyAlignAddress - Flag to indicate that this align is only used to adjust
-  /// the address space size of a section and that it should not be included as
-  /// part of the section size. This flag can only be used on the last fragment
-  /// in a section.
-  bool OnlyAlignAddress : 1;
-
 public:
   MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize,
                   unsigned _MaxBytesToEmit, MCSectionData *SD = 0)
     : MCFragment(FT_Align, SD), Alignment(_Alignment),
       Value(_Value),ValueSize(_ValueSize),
-      MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false),
-      OnlyAlignAddress(false) {}
+      MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false) {}
 
   /// @name Accessors
   /// @{
@@ -262,9 +254,6 @@ public:
   bool hasEmitNops() const { return EmitNops; }
   void setEmitNops(bool Value) { EmitNops = Value; }
 
-  bool hasOnlyAlignAddress() const { return OnlyAlignAddress; }
-  void setOnlyAlignAddress(bool Value) { OnlyAlignAddress = Value; }
-
   /// @}
 
   static bool classof(const MCFragment *F) {
@@ -337,6 +326,100 @@ public:
   static bool classof(const MCOrgFragment *) { return true; }
 };
 
+class MCLEBFragment : public MCFragment {
+  /// Value - The value this fragment should contain.
+  const MCExpr *Value;
+
+  /// IsSigned - True if this is a sleb128, false if uleb128.
+  bool IsSigned;
+
+  SmallString<8> Contents;
+public:
+  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSectionData *SD)
+    : MCFragment(FT_LEB, SD),
+      Value(&Value_), IsSigned(IsSigned_) { Contents.push_back(0); }
+
+  /// @name Accessors
+  /// @{
+
+  const MCExpr &getValue() const { return *Value; }
+
+  bool isSigned() const { return IsSigned; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_LEB;
+  }
+  static bool classof(const MCLEBFragment *) { return true; }
+};
+
+class MCDwarfLineAddrFragment : public MCFragment {
+  /// LineDelta - the value of the difference between the two line numbers
+  /// between two .loc dwarf directives.
+  int64_t LineDelta;
+
+  /// AddrDelta - The expression for the difference of the two symbols that
+  /// make up the address delta between two .loc dwarf directives.
+  const MCExpr *AddrDelta;
+
+  SmallString<8> Contents;
+
+public:
+  MCDwarfLineAddrFragment(int64_t _LineDelta, const MCExpr &_AddrDelta,
+                      MCSectionData *SD)
+    : MCFragment(FT_Dwarf, SD),
+      LineDelta(_LineDelta), AddrDelta(&_AddrDelta) { Contents.push_back(0); }
+
+  /// @name Accessors
+  /// @{
+
+  int64_t getLineDelta() const { return LineDelta; }
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Dwarf;
+  }
+  static bool classof(const MCDwarfLineAddrFragment *) { return true; }
+};
+
+class MCDwarfCallFrameFragment : public MCFragment {
+  /// AddrDelta - The expression for the difference of the two symbols that
+  /// make up the address delta between two .cfi_* dwarf directives.
+  const MCExpr *AddrDelta;
+
+  SmallString<8> Contents;
+
+public:
+  MCDwarfCallFrameFragment(const MCExpr &_AddrDelta,  MCSectionData *SD)
+    : MCFragment(FT_DwarfFrame, SD),
+      AddrDelta(&_AddrDelta) { Contents.push_back(0); }
+
+  /// @name Accessors
+  /// @{
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_DwarfFrame;
+  }
+  static bool classof(const MCDwarfCallFrameFragment *) { return true; }
+};
+
 // FIXME: Should this be a separate class, or just merged into MCSection? Since
 // we anticipate the fast path being through an MCAssembler, the only reason to
 // keep it out is for API abstraction.
@@ -373,10 +456,6 @@ private:
   //
   // FIXME: This could all be kept private to the assembler implementation.
 
-  /// Address - The computed address of this section. This is ~0 until
-  /// initialized.
-  uint64_t Address;
-
   /// HasInstructions - Whether this section has had instructions emitted into
   /// it.
   unsigned HasInstructions : 1;
@@ -585,6 +664,8 @@ private:
 
   MCCodeEmitter &Emitter;
 
+  MCObjectWriter &Writer;
+
   raw_ostream &OS;
 
   iplist<MCSectionData> Sections;
@@ -603,7 +684,17 @@ private:
 
   std::vector<IndirectSymbolData> IndirectSymbols;
 
+  /// The set of function symbols for which a .thumb_func directive has
+  /// been seen.
+  //
+  // FIXME: We really would like this in target specific code rather than
+  // here. Maybe when the relocation stuff moves to target specific,
+  // this can go with it? The streamer would need some target specific
+  // refactoring too.
+  SmallPtrSet<const MCSymbol*, 64> ThumbFuncs;
+
   unsigned RelaxAll : 1;
+  unsigned NoExecStack : 1;
   unsigned SubsectionsViaSymbols : 1;
 
 private:
@@ -633,24 +724,34 @@ private:
   bool FragmentNeedsRelaxation(const MCInstFragment *IF,
                                const MCAsmLayout &Layout) const;
 
-  /// Compute the effective fragment size assuming it is layed out at the given
-  /// \arg SectionAddress and \arg FragmentOffset.
-  uint64_t ComputeFragmentSize(MCAsmLayout &Layout, const MCFragment &F,
-                               uint64_t SectionAddress,
-                               uint64_t FragmentOffset) const;
-
   /// LayoutOnce - Perform one layout iteration and return true if any offsets
   /// were adjusted.
   bool LayoutOnce(MCAsmLayout &Layout);
 
+  bool LayoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
+
+  bool RelaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
+
+  bool RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
+
+  bool RelaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
+  bool RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+                                   MCDwarfCallFrameFragment &DF);
+
   /// FinishLayout - Finalize a layout, including fragment lowering.
   void FinishLayout(MCAsmLayout &Layout);
 
+  uint64_t HandleFixup(const MCAsmLayout &Layout,
+                       MCFragment &F, const MCFixup &Fixup);
+
 public:
+  /// Compute the effective fragment size assuming it is layed out at the given
+  /// \arg SectionAddress and \arg FragmentOffset.
+  uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const;
+
   /// Find the symbol which defines the atom containing the given symbol, or
   /// null if there is no such symbol.
-  const MCSymbolData *getAtom(const MCAsmLayout &Layout,
-                              const MCSymbolData *Symbol) const;
+  const MCSymbolData *getAtom(const MCSymbolData *Symbol) const;
 
   /// Check whether a particular symbol is visible to the linker and is required
   /// in the symbol table, or whether it can be discarded by the assembler. This
@@ -659,12 +760,16 @@ public:
   bool isSymbolLinkerVisible(const MCSymbol &SD) const;
 
   /// Emit the section contents using the given object writer.
-  //
-  // FIXME: Should MCAssembler always have a reference to the object writer?
-  void WriteSectionData(const MCSectionData *Section, const MCAsmLayout &Layout,
-                        MCObjectWriter *OW) const;
+  void WriteSectionData(const MCSectionData *Section,
+                        const MCAsmLayout &Layout) const;
 
-  void AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout);
+  /// Check whether a given symbol has been flagged with .thumb_func.
+  bool isThumbFunc(const MCSymbol *Func) const {
+    return ThumbFuncs.count(Func);
+  }
+
+  /// Flag a function symbol as the target of a .thumb_func directive.
+  void setIsThumbFunc(const MCSymbol *Func) { ThumbFuncs.insert(Func); }
 
 public:
   /// Construct a new assembler instance.
@@ -675,8 +780,9 @@ public:
   // concrete and require clients to pass in a target like object. The other
   // option is to make this abstract, and have targets provide concrete
   // implementations as we do with AsmParser.
-  MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
-              MCCodeEmitter &_Emitter, raw_ostream &OS);
+  MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+              MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+              raw_ostream &OS);
   ~MCAssembler();
 
   MCContext &getContext() const { return Context; }
@@ -685,10 +791,12 @@ public:
 
   MCCodeEmitter &getEmitter() const { return Emitter; }
 
+  MCObjectWriter &getWriter() const { return Writer; }
+
   /// Finish - Do final processing and write the object to the output stream.
   /// \arg Writer is used for custom object writer (as the MCJIT does),
   /// if not specified it is automatically created from backend.
-  void Finish(MCObjectWriter *Writer = 0);
+  void Finish();
 
   // FIXME: This does not belong here.
   bool getSubsectionsViaSymbols() const {
@@ -701,6 +809,9 @@ public:
   bool getRelaxAll() const { return RelaxAll; }
   void setRelaxAll(bool Value) { RelaxAll = Value; }
 
+  bool getNoExecStack() const { return NoExecStack; }
+  void setNoExecStack(bool Value) { NoExecStack = Value; }
+
   /// @name Section List Access
   /// @{
 
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index 010a2e556629..bc63241bece9 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -20,33 +20,6 @@ class MCInst;
 class raw_ostream;
 template<typename T> class SmallVectorImpl;
 
-/// MCFixupKindInfo - Target independent information on a fixup kind.
-struct MCFixupKindInfo {
-  enum FixupKindFlags {
-    /// Is this fixup kind PCrelative. This is used by the assembler backend to
-    /// evaluate fixup values in a target independent manner when possible.
-    FKF_IsPCRel = (1 << 0)
-  };
-
-  /// A target specific name for the fixup kind. The names will be unique for
-  /// distinct kinds on any given target.
-  const char *Name;
-
-  /// The bit offset to write the relocation into.
-  //
-  // FIXME: These two fields are under-specified and not general enough, but it
-  // is covers many things, and is enough to let the AsmStreamer pretty-print
-  // the encoding.
-  unsigned TargetOffset;
-
-  /// The number of bits written by this fixup. The bits are assumed to be
-  /// contiguous.
-  unsigned TargetSize;
-
-  /// Flags describing additional information on this fixup kind.
-  unsigned Flags;
-};
-
 /// MCCodeEmitter - Generic instruction encoding interface.
 class MCCodeEmitter {
 private:
@@ -58,17 +31,6 @@ protected: // Can only create subclasses.
 public:
   virtual ~MCCodeEmitter();
 
-  /// @name Target Independent Fixup Information
-  /// @{
-
-  /// getNumFixupKinds - Get the number of target specific fixup kinds.
-  virtual unsigned getNumFixupKinds() const = 0;
-
-  /// getFixupKindInfo - Get information on a fixup kind.
-  virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
-
-  /// @}
-
   /// EncodeInstruction - Encode the given \arg Inst to bytes on the output
   /// stream \arg OS.
   virtual void EncodeInstruction(const MCInst &Inst, raw_ostream &OS,
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index d22868cdbd0c..7b26d5493776 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -29,7 +29,9 @@ namespace llvm {
   class MCLineSection;
   class StringRef;
   class Twine;
+  class TargetAsmInfo;
   class MCSectionMachO;
+  class MCSectionELF;
 
   /// MCContext - Context object for machine code objects.  This class owns all
   /// of the sections that it creates.
@@ -41,9 +43,15 @@ namespace llvm {
     /// The MCAsmInfo for this target.
     const MCAsmInfo &MAI;
 
+    const TargetAsmInfo *TAI;
+
     /// Symbols - Bindings of names to symbols.
     StringMap<MCSymbol*> Symbols;
 
+    /// UsedNames - Keeps tracks of names that were used both for used declared
+    /// and artificial symbols.
+    StringMap<bool> UsedNames;
+
     /// NextUniqueID - The next ID to dole out to an unnamed assembler temporary
     /// symbol.
     unsigned NextUniqueID;
@@ -56,8 +64,8 @@ namespace llvm {
     /// GetInstance() gets the current instance of the directional local label
     /// for the LocalLabelVal and adds it to the map if needed.
     unsigned GetInstance(int64_t LocalLabelVal);
-    
-    /// The file name of the log file from the enviromment variable
+
+    /// The file name of the log file from the environment variable
     /// AS_SECURE_LOG_FILE.  Which must be set before the .secure_log_unique
     /// directive is used or it is an error.
     char *SecureLogFile;
@@ -79,29 +87,37 @@ namespace llvm {
     /// The dwarf line information from the .loc directives for the sections
     /// with assembled machine instructions have after seeing .loc directives.
     DenseMap<const MCSection *, MCLineSection *> MCLineSections;
+    /// We need a deterministic iteration order, so we remember the order
+    /// the elements were added.
+    std::vector<const MCSection *> MCLineSectionOrder;
 
     /// Allocator - Allocator object used for creating machine code objects.
     ///
     /// We use a bump pointer allocator to avoid the need to track all allocated
     /// objects.
     BumpPtrAllocator Allocator;
-    
+
     void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap;
+
+    MCSymbol *CreateSymbol(StringRef Name);
+
   public:
-    explicit MCContext(const MCAsmInfo &MAI);
+    explicit MCContext(const MCAsmInfo &MAI, const TargetAsmInfo *TAI);
     ~MCContext();
-    
+
     const MCAsmInfo &getAsmInfo() const { return MAI; }
 
-    /// @name Symbol Managment
+    const TargetAsmInfo &getTargetAsmInfo() const { return *TAI; }
+
+    /// @name Symbol Management
     /// @{
-    
+
     /// CreateTempSymbol - Create and return a new assembler temporary symbol
     /// with a unique but unspecified name.
     MCSymbol *CreateTempSymbol();
 
-    /// CreateDirectionalLocalSymbol - Create the defintion of a directional
-    /// local symbol for numbered label (used for "1:" defintions).
+    /// CreateDirectionalLocalSymbol - Create the definition of a directional
+    /// local symbol for numbered label (used for "1:" definitions).
     MCSymbol *CreateDirectionalLocalSymbol(int64_t LocalLabelVal);
 
     /// GetDirectionalLocalSymbol - Create and return a directional local
@@ -120,8 +136,8 @@ namespace llvm {
     MCSymbol *LookupSymbol(StringRef Name) const;
 
     /// @}
-    
-    /// @name Section Managment
+
+    /// @name Section Management
     /// @{
 
     /// getMachOSection - Return the MCSection for the specified mach-o section.
@@ -137,11 +153,15 @@ namespace llvm {
                                           SectionKind K) {
       return getMachOSection(Segment, Section, TypeAndAttributes, 0, K);
     }
-    
-    const MCSection *getELFSection(StringRef Section, unsigned Type,
-                                   unsigned Flags, SectionKind Kind,
-                                   bool IsExplicit = false,
-                                   unsigned EntrySize = 0);
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind);
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind,
+                                      unsigned EntrySize, StringRef Group);
+
+    const MCSectionELF *CreateELFGroupSection();
 
     const MCSection *getCOFFSection(StringRef Section, unsigned Characteristics,
                                     int Selection, SectionKind Kind);
@@ -151,16 +171,20 @@ namespace llvm {
       return getCOFFSection (Section, Characteristics, 0, Kind);
     }
 
-    
+
     /// @}
 
-    /// @name Dwarf Managment
+    /// @name Dwarf Management
     /// @{
 
     /// GetDwarfFile - creates an entry in the dwarf file and directory tables.
     unsigned GetDwarfFile(StringRef FileName, unsigned FileNumber);
 
-    bool ValidateDwarfFileNumber(unsigned FileNumber);
+    bool isValidDwarfFileNumber(unsigned FileNumber);
+
+    bool hasDwarfFiles() const {
+      return !MCDwarfFiles.empty();
+    }
 
     const std::vector<MCDwarfFile *> &getMCDwarfFiles() {
       return MCDwarfFiles;
@@ -168,23 +192,35 @@ namespace llvm {
     const std::vector<StringRef> &getMCDwarfDirs() {
       return MCDwarfDirs;
     }
-    DenseMap<const MCSection *, MCLineSection *> &getMCLineSections() {
+
+    const DenseMap<const MCSection *, MCLineSection *>
+    &getMCLineSections() const {
       return MCLineSections;
     }
+    const std::vector<const MCSection *> &getMCLineSectionOrder() const {
+      return MCLineSectionOrder;
+    }
+    void addMCLineSection(const MCSection *Sec, MCLineSection *Line) {
+      MCLineSections[Sec] = Line;
+      MCLineSectionOrder.push_back(Sec);
+    }
 
     /// setCurrentDwarfLoc - saves the information from the currently parsed
-    /// dwarf .loc directive and sets DwarfLocSeen.  When the next instruction      /// is assembled an entry in the line number table with this information and
+    /// dwarf .loc directive and sets DwarfLocSeen.  When the next instruction
+    /// is assembled an entry in the line number table with this information and
     /// the address of the instruction will be created.
     void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column,
-                            unsigned Flags, unsigned Isa) {
+                            unsigned Flags, unsigned Isa,
+                            unsigned Discriminator) {
       CurrentDwarfLoc.setFileNum(FileNum);
       CurrentDwarfLoc.setLine(Line);
       CurrentDwarfLoc.setColumn(Column);
       CurrentDwarfLoc.setFlags(Flags);
       CurrentDwarfLoc.setIsa(Isa);
+      CurrentDwarfLoc.setDiscriminator(Discriminator);
       DwarfLocSeen = true;
     }
-    void clearDwarfLocSeen() { DwarfLocSeen = false; }
+    void ClearDwarfLocSeen() { DwarfLocSeen = false; }
 
     bool getDwarfLocSeen() { return DwarfLocSeen; }
     const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; }
diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h
index 223b09ef7aee..1df55dc252e3 100644
--- a/include/llvm/MC/MCDirectives.h
+++ b/include/llvm/MC/MCDirectives.h
@@ -26,6 +26,7 @@ enum MCSymbolAttr {
   MCSA_ELF_TypeTLS,         ///< .type _foo, STT_TLS     # aka @tls_object
   MCSA_ELF_TypeCommon,      ///< .type _foo, STT_COMMON  # aka @common
   MCSA_ELF_TypeNoType,      ///< .type _foo, STT_NOTYPE  # aka @notype
+  MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object
   MCSA_Global,              ///< .globl
   MCSA_Hidden,              ///< .hidden (ELF)
   MCSA_IndirectSymbol,      ///< .indirect_symbol (MachO)
@@ -33,6 +34,7 @@ enum MCSymbolAttr {
   MCSA_LazyReference,       ///< .lazy_reference (MachO)
   MCSA_Local,               ///< .local (ELF)
   MCSA_NoDeadStrip,         ///< .no_dead_strip (MachO)
+  MCSA_SymbolResolver,      ///< .symbol_resolver (MachO)
   MCSA_PrivateExtern,       ///< .private_extern (MachO)
   MCSA_Protected,           ///< .protected (ELF)
   MCSA_Reference,           ///< .reference (MachO)
@@ -43,9 +45,12 @@ enum MCSymbolAttr {
 };
 
 enum MCAssemblerFlag {
-  MCAF_SubsectionsViaSymbols  ///< .subsections_via_symbols (MachO)
+  MCAF_SyntaxUnified,         ///< .syntax (ARM/ELF)
+  MCAF_SubsectionsViaSymbols, ///< .subsections_via_symbols (MachO)
+  MCAF_Code16,                ///< .code 16
+  MCAF_Code32                 ///< .code 32
 };
-  
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index dfb8ed5e8a10..c9e42eb6c798 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -9,7 +9,7 @@
 #ifndef MCDISASSEMBLER_H
 #define MCDISASSEMBLER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index dac875cf1b67..07a7bad15b1e 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -8,8 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains the declaration of the MCDwarfFile to support the dwarf
-// .file directive.
-// TODO: add the support needed for the .loc directive.
+// .file directive and the .loc directive.
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,12 +16,21 @@
 #define LLVM_MC_MCDWARF_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineLocation.h" // FIXME
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Dwarf.h"
 #include <vector>
 
 namespace llvm {
+  class MachineMove;
   class MCContext;
+  class MCExpr;
   class MCSection;
+  class MCSectionData;
+  class MCStreamer;
   class MCSymbol;
+  class MCObjectStreamer;
   class raw_ostream;
 
   /// MCDwarfFile - Instances of this class represent the name of the dwarf
@@ -78,6 +86,11 @@ namespace llvm {
     unsigned Flags;
     // Isa
     unsigned Isa;
+    // Discriminator
+    unsigned Discriminator;
+
+// Flag that indicates the initial value of the is_stmt_start flag.
+#define DWARF2_LINE_DEFAULT_IS_STMT     1
 
 #define DWARF2_FLAG_IS_STMT        (1 << 0)
 #define DWARF2_FLAG_BASIC_BLOCK    (1 << 1)
@@ -88,13 +101,32 @@ namespace llvm {
     friend class MCContext;
     friend class MCLineEntry;
     MCDwarfLoc(unsigned fileNum, unsigned line, unsigned column, unsigned flags,
-               unsigned isa)
-      : FileNum(fileNum), Line(line), Column(column), Flags(flags), Isa(isa) {}
+               unsigned isa, unsigned discriminator)
+      : FileNum(fileNum), Line(line), Column(column), Flags(flags), Isa(isa),
+        Discriminator(discriminator) {}
 
     // Allow the default copy constructor and assignment operator to be used
     // for an MCDwarfLoc object.
 
   public:
+    /// getFileNum - Get the FileNum of this MCDwarfLoc.
+    unsigned getFileNum() const { return FileNum; }
+
+    /// getLine - Get the Line of this MCDwarfLoc.
+    unsigned getLine() const { return Line; }
+
+    /// getColumn - Get the Column of this MCDwarfLoc.
+    unsigned getColumn() const { return Column; }
+
+    /// getFlags - Get the Flags of this MCDwarfLoc.
+    unsigned getFlags() const { return Flags; }
+
+    /// getIsa - Get the Isa of this MCDwarfLoc.
+    unsigned getIsa() const { return Isa; }
+
+    /// getDiscriminator - Get the Discriminator of this MCDwarfLoc.
+    unsigned getDiscriminator() const { return Discriminator; }
+
     /// setFileNum - Set the FileNum of this MCDwarfLoc.
     void setFileNum(unsigned fileNum) { FileNum = fileNum; }
 
@@ -109,6 +141,11 @@ namespace llvm {
 
     /// setIsa - Set the Isa of this MCDwarfLoc.
     void setIsa(unsigned isa) { Isa = isa; }
+
+    /// setDiscriminator - Set the Discriminator of this MCDwarfLoc.
+    void setDiscriminator(unsigned discriminator) {
+      Discriminator = discriminator;
+    }
   };
 
   /// MCLineEntry - Instances of this class represent the line information for
@@ -127,6 +164,13 @@ namespace llvm {
     // Constructor to create an MCLineEntry given a symbol and the dwarf loc.
     MCLineEntry(MCSymbol *label, const MCDwarfLoc loc) : MCDwarfLoc(loc),
                 Label(label) {}
+
+    MCSymbol *getLabel() const { return Label; }
+
+    // This is called when an instruction is assembled into the specified
+    // section and if there is information from the last .loc directive that
+    // has yet to have a line entry made for it is made.
+    static void Make(MCStreamer *MCOS, const MCSection *Section);
   };
 
   /// MCLineSection - Instances of this class represent the line information
@@ -134,7 +178,6 @@ namespace llvm {
   /// .loc directives.  This is the information used to build the dwarf line
   /// table for a section.
   class MCLineSection {
-    std::vector<MCLineEntry> MCLineEntries;
 
   private:
     MCLineSection(const MCLineSection&);  // DO NOT IMPLEMENT
@@ -149,8 +192,88 @@ namespace llvm {
     void addLineEntry(const MCLineEntry &LineEntry) {
       MCLineEntries.push_back(LineEntry);
     }
+
+    typedef std::vector<MCLineEntry> MCLineEntryCollection;
+    typedef MCLineEntryCollection::iterator iterator;
+    typedef MCLineEntryCollection::const_iterator const_iterator;
+
+  private:
+    MCLineEntryCollection MCLineEntries;
+
+  public:
+    const MCLineEntryCollection *getMCLineEntries() const {
+      return &MCLineEntries;
+    }
   };
 
+  class MCDwarfFileTable {
+  public:
+    //
+    // This emits the Dwarf file and the line tables.
+    //
+    static void Emit(MCStreamer *MCOS);
+  };
+
+  class MCDwarfLineAddr {
+  public:
+    /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+    static void Encode(int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
+
+    /// Utility function to emit the encoding to a streamer.
+    static void Emit(MCStreamer *MCOS,
+                     int64_t LineDelta,uint64_t AddrDelta);
+
+    /// Utility function to write the encoding to an object writer.
+    static void Write(MCObjectWriter *OW,
+                      int64_t LineDelta, uint64_t AddrDelta);
+  };
+
+  class MCCFIInstruction {
+  public:
+    enum OpType { Remember, Restore, Move };
+  private:
+    OpType Operation;
+    MCSymbol *Label;
+    // Move to & from location.
+    MachineLocation Destination;
+    MachineLocation Source;
+  public:
+    MCCFIInstruction(OpType Op, MCSymbol *L)
+      : Operation(Op), Label(L) {
+      assert(Op == Remember || Op == Restore);
+    }
+    MCCFIInstruction(MCSymbol *L, const MachineLocation &D,
+                     const MachineLocation &S)
+      : Operation(Move), Label(L), Destination(D), Source(S) {
+    }
+    OpType getOperation() const { return Operation; }
+    MCSymbol *getLabel() const { return Label; }
+    const MachineLocation &getDestination() const { return Destination; }
+    const MachineLocation &getSource() const { return Source; }
+  };
+
+  struct MCDwarfFrameInfo {
+    MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0),
+                         Instructions(), PersonalityEncoding(0),
+                         LsdaEncoding(0) {}
+    MCSymbol *Begin;
+    MCSymbol *End;
+    const MCSymbol *Personality;
+    const MCSymbol *Lsda;
+    std::vector<MCCFIInstruction> Instructions;
+    unsigned PersonalityEncoding;
+    unsigned LsdaEncoding;
+  };
+
+  class MCDwarfFrameEmitter {
+  public:
+    //
+    // This emits the frame info section.
+    //
+    static void Emit(MCStreamer &streamer);
+    static void EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta);
+    static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS);
+  };
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
new file mode 100644
index 000000000000..3c150dca9e62
--- /dev/null
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -0,0 +1,47 @@
+//===-- llvm/MC/MCELFObjectWriter.h - ELF Object Writer ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFOBJECTWRITER_H
+#define LLVM_MC_MCELFOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCELFObjectTargetWriter {
+  const Triple::OSType OSType;
+  const uint16_t EMachine;
+  const unsigned HasRelocationAddend : 1;
+  const unsigned Is64Bit : 1;
+protected:
+  MCELFObjectTargetWriter(bool Is64Bit_, Triple::OSType OSType_,
+                          uint16_t EMachine_,  bool HasRelocationAddend_);
+
+public:
+  virtual ~MCELFObjectTargetWriter();
+
+  /// @name Accessors
+  /// @{
+  Triple::OSType getOSType() { return OSType; }
+  uint16_t getEMachine() { return EMachine; }
+  bool hasRelocationAddend() { return HasRelocationAddend; }
+  bool is64Bit() { return Is64Bit; }
+  /// @}
+};
+
+/// \brief Construct a new ELF writer instance.
+///
+/// \param MOTW - The target specific ELF writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+MCObjectWriter *createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                      raw_ostream &OS, bool IsLittleEndian);
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCELFSymbolFlags.h b/include/llvm/MC/MCELFSymbolFlags.h
index eb7978b18c5c..d798fb077272 100644
--- a/include/llvm/MC/MCELFSymbolFlags.h
+++ b/include/llvm/MC/MCELFSymbolFlags.h
@@ -21,9 +21,10 @@
 
 namespace llvm {
   enum {
-    ELF_STT_Shift = 0, // Shift value for STT_* flags.
-    ELF_STB_Shift = 4, // Shift value for STB_* flags.
-    ELF_STV_Shift = 8  // Shift value ofr STV_* flags.
+    ELF_STT_Shift   = 0, // Shift value for STT_* flags.
+    ELF_STB_Shift   = 4, // Shift value for STB_* flags.
+    ELF_STV_Shift   = 8, // Shift value for STV_* flags.
+    ELF_Other_Shift = 10 // Shift value for other flags.
   };
 
   enum SymbolFlags {
@@ -46,7 +47,9 @@ namespace llvm {
       ELF_STV_Default   = (ELF::STV_DEFAULT   << ELF_STV_Shift),
       ELF_STV_Internal  = (ELF::STV_INTERNAL  << ELF_STV_Shift),
       ELF_STV_Hidden    = (ELF::STV_HIDDEN    << ELF_STV_Shift),
-      ELF_STV_Protected = (ELF::STV_PROTECTED << ELF_STV_Shift)
+      ELF_STV_Protected = (ELF::STV_PROTECTED << ELF_STV_Shift),
+
+      ELF_Other_Weakref = (1                  << ELF_Other_Shift)
   };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 1f9b8f256743..fea5249eaba0 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -10,17 +10,21 @@
 #ifndef LLVM_MC_MCEXPR_H
 #define LLVM_MC_MCEXPR_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 class MCAsmInfo;
 class MCAsmLayout;
+class MCAssembler;
 class MCContext;
+class MCSectionData;
 class MCSymbol;
 class MCValue;
 class raw_ostream;
 class StringRef;
+typedef DenseMap<const MCSectionData*, uint64_t> SectionAddrMap;
 
 /// MCExpr - Base class for the full range of assembler expressions which are
 /// needed for parsing.
@@ -40,9 +44,16 @@ private:
   MCExpr(const MCExpr&); // DO NOT IMPLEMENT
   void operator=(const MCExpr&); // DO NOT IMPLEMENT
 
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+                          const MCAsmLayout *Layout,
+                          const SectionAddrMap *Addrs) const;
 protected:
   explicit MCExpr(ExprKind _Kind) : Kind(_Kind) {}
 
+  bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
+                                 const MCAsmLayout *Layout,
+                                 const SectionAddrMap *Addrs,
+                                 bool InSet) const;
 public:
   /// @name Accessors
   /// @{
@@ -67,7 +78,11 @@ public:
   /// values. If not given, then only non-symbolic expressions will be
   /// evaluated.
   /// @result - True on success.
-  bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout = 0) const;
+  bool EvaluateAsAbsolute(int64_t &Res) const;
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const;
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const;
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout,
+                          const SectionAddrMap &Addrs) const;
 
   /// EvaluateAsRelocatable - Try to evaluate the expression to a relocatable
   /// value, i.e. an expression of the fixed form (a - b + constant).
@@ -75,7 +90,7 @@ public:
   /// @param Res - The relocatable value, if evaluation succeeds.
   /// @param Layout - The assembler layout object to use for evaluating values.
   /// @result - True on success.
-  bool EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout = 0) const;
+  bool EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout &Layout) const;
 
   /// @}
 
@@ -132,12 +147,25 @@ public:
     VK_GOTTPOFF,
     VK_INDNTPOFF,
     VK_NTPOFF,
+    VK_GOTNTPOFF,
     VK_PLT,
     VK_TLSGD,
+    VK_TLSLD,
+    VK_TLSLDM,
     VK_TPOFF,
-    VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the asm file)
-    VK_ARM_LO16, // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the asm file)
-    VK_TLVP // Mach-O thread local variable relocation
+    VK_DTPOFF,
+    VK_TLVP,      // Mach-O thread local variable relocation
+    // FIXME: We'd really like to use the generic Kinds listed above for these.
+    VK_ARM_PLT,   // ARM-style PLT references. i.e., (PLT) instead of @PLT
+    VK_ARM_TLSGD, //   ditto for TLSGD, GOT, GOTOFF, TPOFF and GOTTPOFF
+    VK_ARM_GOT,
+    VK_ARM_GOTOFF,
+    VK_ARM_TPOFF,
+    VK_ARM_GOTTPOFF,
+
+    VK_PPC_TOC,
+    VK_PPC_HA16,  // ha16(symbol)
+    VK_PPC_LO16   // lo16(symbol)
   };
 
 private:
@@ -162,7 +190,7 @@ public:
                                        MCContext &Ctx);
   static const MCSymbolRefExpr *Create(StringRef Name, VariantKind Kind,
                                        MCContext &Ctx);
-  
+
   /// @}
   /// @name Accessors
   /// @{
@@ -391,7 +419,7 @@ public:
   virtual void PrintImpl(raw_ostream &OS) const = 0;
   virtual bool EvaluateAsRelocatableImpl(MCValue &Res,
                                          const MCAsmLayout *Layout) const = 0;
-
+  virtual void AddValueSymbols(MCAssembler *) const = 0;
 
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index eed4c349e848..6fde797e40fd 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_MC_MCFIXUP_H
 #define LLVM_MC_MCFIXUP_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
@@ -22,6 +22,10 @@ enum MCFixupKind {
   FK_Data_2,     ///< A two-byte fixup.
   FK_Data_4,     ///< A four-byte fixup.
   FK_Data_8,     ///< A eight-byte fixup.
+  FK_PCRel_1,    ///< A one-byte pc relative fixup.
+  FK_PCRel_2,    ///< A two-byte pc relative fixup.
+  FK_PCRel_4,    ///< A four-byte pc relative fixup.
+  FK_PCRel_8,    ///< A eight-byte pc relative fixup.
 
   FirstTargetFixupKind = 128,
 
@@ -77,13 +81,13 @@ public:
 
   /// getKindForSize - Return the generic fixup kind for a value with the given
   /// size. It is an error to pass an unsupported size.
-  static MCFixupKind getKindForSize(unsigned Size) {
+  static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) {
     switch (Size) {
     default: assert(0 && "Invalid generic fixup size!");
-    case 1: return FK_Data_1;
-    case 2: return FK_Data_2;
-    case 4: return FK_Data_4;
-    case 8: return FK_Data_8;
+    case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1;
+    case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2;
+    case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4;
+    case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8;
     }
   }
 };
diff --git a/include/llvm/MC/MCFixupKindInfo.h b/include/llvm/MC/MCFixupKindInfo.h
new file mode 100644
index 000000000000..1961687146a8
--- /dev/null
+++ b/include/llvm/MC/MCFixupKindInfo.h
@@ -0,0 +1,43 @@
+//===-- llvm/MC/MCFixupKindInfo.h - Fixup Descriptors -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFIXUPKINDINFO_H
+#define LLVM_MC_MCFIXUPKINDINFO_H
+
+namespace llvm {
+
+/// MCFixupKindInfo - Target independent information on a fixup kind.
+struct MCFixupKindInfo {
+  enum FixupKindFlags {
+    /// Is this fixup kind PCrelative? This is used by the assembler backend to
+    /// evaluate fixup values in a target independent manner when possible.
+    FKF_IsPCRel = (1 << 0),
+    
+    /// Should this fixup kind force a 4-byte aligned effective PC value?
+    FKF_IsAlignedDownTo32Bits = (1 << 1)
+  };
+
+  /// A target specific name for the fixup kind. The names will be unique for
+  /// distinct kinds on any given target.
+  const char *Name;
+
+  /// The bit offset to write the relocation into.
+  unsigned TargetOffset;
+
+  /// The number of bits written by this fixup. The bits are assumed to be
+  /// contiguous.
+  unsigned TargetSize;
+
+  /// Flags describing additional information on this fixup kind.
+  unsigned Flags;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index dc630fe2807f..d6ef7b4c33c1 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -18,7 +18,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 class raw_ostream;
@@ -33,24 +33,27 @@ class MCOperand {
     kInvalid,                 ///< Uninitialized.
     kRegister,                ///< Register operand.
     kImmediate,               ///< Immediate operand.
+    kFPImmediate,             ///< Floating-point immediate operand.
     kExpr                     ///< Relocatable immediate operand.
   };
   unsigned char Kind;
-  
+
   union {
     unsigned RegVal;
     int64_t ImmVal;
+    double FPImmVal;
     const MCExpr *ExprVal;
   };
 public:
-  
-  MCOperand() : Kind(kInvalid) {}
+
+  MCOperand() : Kind(kInvalid), FPImmVal(0.0) {}
 
   bool isValid() const { return Kind != kInvalid; }
   bool isReg() const { return Kind == kRegister; }
   bool isImm() const { return Kind == kImmediate; }
+  bool isFPImm() const { return Kind == kFPImmediate; }
   bool isExpr() const { return Kind == kExpr; }
-  
+
   /// getReg - Returns the register number.
   unsigned getReg() const {
     assert(isReg() && "This is not a register operand!");
@@ -62,7 +65,7 @@ public:
     assert(isReg() && "This is not a register operand!");
     RegVal = Reg;
   }
-  
+
   int64_t getImm() const {
     assert(isImm() && "This is not an immediate");
     return ImmVal;
@@ -71,7 +74,17 @@ public:
     assert(isImm() && "This is not an immediate");
     ImmVal = Val;
   }
-  
+
+  double getFPImm() const {
+    assert(isFPImm() && "This is not an FP immediate");
+    return FPImmVal;
+  }
+
+  void setFPImm(double Val) {
+    assert(isFPImm() && "This is not an FP immediate");
+    FPImmVal = Val;
+  }
+
   const MCExpr *getExpr() const {
     assert(isExpr() && "This is not an expression");
     return ExprVal;
@@ -80,7 +93,7 @@ public:
     assert(isExpr() && "This is not an expression");
     ExprVal = Val;
   }
-  
+
   static MCOperand CreateReg(unsigned Reg) {
     MCOperand Op;
     Op.Kind = kRegister;
@@ -93,6 +106,12 @@ public:
     Op.ImmVal = Val;
     return Op;
   }
+  static MCOperand CreateFPImm(double Val) {
+    MCOperand Op;
+    Op.Kind = kFPImmediate;
+    Op.FPImmVal = Val;
+    return Op;
+  }
   static MCOperand CreateExpr(const MCExpr *Val) {
     MCOperand Op;
     Op.Kind = kExpr;
@@ -104,23 +123,23 @@ public:
   void dump() const;
 };
 
-  
+
 /// MCInst - Instances of this class represent a single low-level machine
-/// instruction. 
+/// instruction.
 class MCInst {
   unsigned Opcode;
   SmallVector<MCOperand, 8> Operands;
 public:
   MCInst() : Opcode(0) {}
-  
+
   void setOpcode(unsigned Op) { Opcode = Op; }
-  
+
   unsigned getOpcode() const { return Opcode; }
 
   const MCOperand &getOperand(unsigned i) const { return Operands[i]; }
   MCOperand &getOperand(unsigned i) { return Operands[i]; }
   unsigned getNumOperands() const { return Operands.size(); }
-  
+
   void addOperand(const MCOperand &Op) {
     Operands.push_back(Op);
   }
@@ -136,6 +155,15 @@ public:
                    StringRef Separator = " ") const;
 };
 
+inline raw_ostream& operator<<(raw_ostream &OS, const MCOperand &MO) {
+  MO.print(OS, 0);
+  return OS;
+}
+
+inline raw_ostream& operator<<(raw_ostream &OS, const MCInst &MI) {
+  MI.print(OS, 0);
+  return OS;
+}
 
 } // end namespace llvm
 
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 4839a83dba5c..96716c775fdf 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -28,21 +28,21 @@ protected:
 public:
   MCInstPrinter(const MCAsmInfo &mai)
     : CommentStream(0), MAI(mai) {}
-  
+
   virtual ~MCInstPrinter();
 
   /// setCommentStream - Specify a stream to emit comments to.
   void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
-  
+
   /// printInst - Print the specified MCInst to the specified raw_ostream.
   ///
   virtual void printInst(const MCInst *MI, raw_ostream &OS) = 0;
-  
+
   /// getOpcodeName - Return the name of the specified opcode enum (e.g.
   /// "MOV32ri") or empty if we can't resolve it.
   virtual StringRef getOpcodeName(unsigned Opcode) const;
 };
-  
+
 } // namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCMachOSymbolFlags.h b/include/llvm/MC/MCMachOSymbolFlags.h
index c938c81f698c..696436dffa6e 100644
--- a/include/llvm/MC/MCMachOSymbolFlags.h
+++ b/include/llvm/MC/MCMachOSymbolFlags.h
@@ -34,9 +34,11 @@ namespace llvm {
     SF_ReferenceTypePrivateUndefinedLazy    = 0x0005,
 
     // Other 'desc' flags.
+    SF_ThumbFunc                            = 0x0008,
     SF_NoDeadStrip                          = 0x0020,
     SF_WeakReference                        = 0x0040,
-    SF_WeakDefinition                       = 0x0080
+    SF_WeakDefinition                       = 0x0080,
+    SF_SymbolResolver                       = 0x0100
   };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
new file mode 100644
index 000000000000..ec51031d0bb3
--- /dev/null
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -0,0 +1,65 @@
+//===-- llvm/MC/MCMachObjectWriter.h - Mach Object Writer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCMACHOBJECTWRITER_H
+#define LLVM_MC_MCMACHOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCMachObjectTargetWriter {
+  const unsigned Is64Bit : 1;
+  const uint32_t CPUType;
+  const uint32_t CPUSubtype;
+  // FIXME: Remove this, we should just always use it once we no longer care
+  // about Darwin 'as' compatibility.
+  const unsigned UseAggressiveSymbolFolding : 1;
+  unsigned LocalDifference_RIT;
+
+protected:
+  MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
+                           uint32_t CPUSubtype_,
+                           bool UseAggressiveSymbolFolding_ = false);
+
+  void setLocalDifferenceRelocationType(unsigned Type) {
+    LocalDifference_RIT = Type;
+  }
+
+public:
+  virtual ~MCMachObjectTargetWriter();
+
+  /// @name Accessors
+  /// @{
+
+  bool is64Bit() const { return Is64Bit; }
+  bool useAggressiveSymbolFolding() const { return UseAggressiveSymbolFolding; }
+  uint32_t getCPUType() const { return CPUType; }
+  uint32_t getCPUSubtype() const { return CPUSubtype; }
+  unsigned getLocalDifferenceRelocationType() const {
+    return LocalDifference_RIT;
+  }
+
+  /// @}
+};
+
+/// \brief Construct a new Mach-O writer instance.
+///
+/// This routine takes ownership of the target writer subclass.
+///
+/// \param MOTW - The target specific Mach-O writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+MCObjectWriter *createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+                                       raw_ostream &OS, bool IsLittleEndian);
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index ea6d9c12338d..833341eb97f5 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -33,6 +33,8 @@ class MCObjectStreamer : public MCStreamer {
   MCAssembler *Assembler;
   MCSectionData *CurSectionData;
 
+  virtual void EmitInstToData(const MCInst &Inst) = 0;
+
 protected:
   MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
                    raw_ostream &_OS, MCCodeEmitter *_Emitter);
@@ -56,7 +58,21 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
-  virtual void SwitchSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitInstruction(const MCInst &Inst);
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label);
+  virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                         const MCSymbol *Label);
   virtual void Finish();
 
   /// @}
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index f1c1cb8a5991..782d844598b4 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -10,8 +10,9 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H
 
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
@@ -19,6 +20,9 @@ class MCAsmLayout;
 class MCAssembler;
 class MCFixup;
 class MCFragment;
+class MCSymbol;
+class MCSymbolData;
+class MCSymbolRefExpr;
 class MCValue;
 class raw_ostream;
 
@@ -61,7 +65,8 @@ public:
   ///
   /// This routine is called by the assembler after layout and relaxation is
   /// complete.
-  virtual void ExecutePostLayoutBinding(MCAssembler &Asm) = 0;
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                        const MCAsmLayout &Layout) = 0;
 
   /// Record a relocation entry.
   ///
@@ -75,12 +80,31 @@ public:
                                 const MCFixup &Fixup, MCValue Target,
                                 uint64_t &FixedValue) = 0;
 
+  /// \brief Check whether the difference (A - B) between two symbol
+  /// references is fully resolved.
+  ///
+  /// Clients are not required to answer precisely and may conservatively return
+  /// false, even when a difference is fully resolved.
+  bool
+  IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+                                     const MCSymbolRefExpr *A,
+                                     const MCSymbolRefExpr *B,
+                                     bool InSet) const;
+
+  virtual bool
+  IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                         const MCSymbolData &DataA,
+                                         const MCFragment &FB,
+                                         bool InSet,
+                                         bool IsPCRel) const;
+
+
   /// Write the object file.
   ///
   /// This routine is called by the assembler after layout and relaxation is
   /// complete, fixups have been evaluated and applied, and relocations
   /// generated.
-  virtual void WriteObject(const MCAssembler &Asm,
+  virtual void WriteObject(MCAssembler &Asm,
                            const MCAsmLayout &Layout) = 0;
 
   /// @}
@@ -160,6 +184,11 @@ public:
   }
 
   /// @}
+
+  /// Utility function to encode a SLEB128 value.
+  static void EncodeSLEB128(int64_t Value, raw_ostream &OS);
+  /// Utility function to encode a ULEB128 value.
+  static void EncodeULEB128(uint64_t Value, raw_ostream &OS);
 };
 
 MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index 21878899cac1..252696bec317 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -17,7 +17,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <string>
 #include <cassert>
 
@@ -29,10 +29,10 @@ class MCAsmInfo;
 /// AsmLexer - Lexer class for assembly files.
 class AsmLexer : public MCAsmLexer {
   const MCAsmInfo &MAI;
-  
+
   const char *CurPtr;
   const MemoryBuffer *CurBuf;
-  
+
   void operator=(const AsmLexer&); // DO NOT IMPLEMENT
   AsmLexer(const AsmLexer&);       // DO NOT IMPLEMENT
 
@@ -43,13 +43,13 @@ protected:
 public:
   AsmLexer(const MCAsmInfo &MAI);
   ~AsmLexer();
-  
+
   void setBuffer(const MemoryBuffer *buf, const char *ptr = NULL);
-  
+
   virtual StringRef LexUntilEndOfStatement();
 
   bool isAtStartOfComment(char Char);
-  
+
   const MCAsmInfo &getMAI() const { return MAI; }
 
 private:
@@ -60,9 +60,11 @@ private:
   AsmToken LexSlash();
   AsmToken LexLineComment();
   AsmToken LexDigit();
+  AsmToken LexSingleQuote();
   AsmToken LexQuote();
+  AsmToken LexFloatLiteral();
 };
-  
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index d690e810bd39..606725a98515 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -11,7 +11,7 @@
 #define LLVM_MC_MCASMLEXER_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/SMLoc.h"
 
 namespace llvm {
@@ -29,13 +29,16 @@ public:
     // String values.
     Identifier,
     String,
-    
+
     // Integer values.
     Integer,
-    
+
+    // Real values.
+    Real,
+
     // Register values (stored in IntVal).  Only used by TargetAsmLexer.
     Register,
-    
+
     // No-value.
     EndOfStatement,
     Colon,
@@ -43,8 +46,8 @@ public:
     Slash,    // '/'
     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     Star, Dot, Comma, Dollar, Equal, EqualEqual,
-    
-    Pipe, PipePipe, Caret, 
+
+    Pipe, PipePipe, Caret,
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     Less, LessEqual, LessLess, LessGreater,
     Greater, GreaterEqual, GreaterGreater, At
@@ -70,7 +73,7 @@ public:
   SMLoc getLoc() const;
 
   /// getStringContents - Get the contents of a string token (without quotes).
-  StringRef getStringContents() const { 
+  StringRef getStringContents() const {
     assert(Kind == String && "This token isn't a string!");
     return Str.slice(1, Str.size() - 1);
   }
@@ -95,11 +98,11 @@ public:
   // FIXME: Don't compute this in advance, it makes every token larger, and is
   // also not generally what we want (it is nicer for recovery etc. to lex 123br
   // as a single token, then diagnose as an invalid number).
-  int64_t getIntVal() const { 
+  int64_t getIntVal() const {
     assert(Kind == Integer && "This token isn't an integer!");
-    return IntVal; 
+    return IntVal;
   }
-  
+
   /// getRegVal - Get the register number for the current token, which should
   /// be a register.
   unsigned getRegVal() const {
@@ -113,7 +116,7 @@ public:
 class MCAsmLexer {
   /// The current token, stored in the base class for faster access.
   AsmToken CurTok;
-  
+
   /// The location and description of the current error
   SMLoc ErrLoc;
   std::string Err;
@@ -126,12 +129,12 @@ protected: // Can only create subclasses.
   MCAsmLexer();
 
   virtual AsmToken LexToken() = 0;
-  
+
   void SetError(const SMLoc &errLoc, const std::string &err) {
     ErrLoc = errLoc;
     Err = err;
   }
-  
+
 public:
   virtual ~MCAsmLexer();
 
@@ -152,12 +155,12 @@ public:
   const AsmToken &getTok() {
     return CurTok;
   }
-  
+
   /// getErrLoc - Get the current error location
   const SMLoc &getErrLoc() {
     return ErrLoc;
   }
-           
+
   /// getErr - Get the current error string
   const std::string &getErr() {
     return Err;
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index b37d46cc5a25..54979d977db7 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_MC_MCASMPARSER_H
 #define LLVM_MC_MCASMPARSER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 class AsmToken;
@@ -99,6 +99,10 @@ public:
   /// will be either the EndOfStatement or EOF.
   virtual StringRef ParseStringToEndOfStatement() = 0;
 
+  /// EatToEndOfStatement - Skip to the end of the current statement, for error
+  /// recovery.
+  virtual void EatToEndOfStatement() = 0;
+
   /// ParseExpression - Parse an arbitrary expression.
   ///
   /// @param Res - The value of the expression. The result is undefined
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 99fa5adae977..91f5773b8df8 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -19,10 +19,10 @@ class raw_ostream;
 /// base class is used by target-independent clients and is the interface
 /// between parsing an asm instruction and recognizing it.
 class MCParsedAsmOperand {
-public:  
+public:
   MCParsedAsmOperand() {}
   virtual ~MCParsedAsmOperand() {}
-  
+
   /// getStartLoc - Get the location of the first token of this operand.
   virtual SMLoc getStartLoc() const = 0;
   /// getEndLoc - Get the location of the last token of this operand.
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 5c997357c9d3..1c01b2f8f3cc 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -32,8 +32,7 @@ namespace llvm {
     enum SectionVariant {
       SV_COFF = 0,
       SV_ELF,
-      SV_MachO,
-      SV_PIC16
+      SV_MachO
     };
 
   private:
@@ -61,6 +60,14 @@ namespace llvm {
       return false;
     }
 
+    // UseCodeAlign - Return true if a .align directive should use
+    // "optimized nops" to fill instead of 0s.
+    virtual bool UseCodeAlign() const = 0;
+
+    /// isVirtualSection - Check whether this section is "virtual", that is
+    /// has no actual object file contents.
+    virtual bool isVirtualSection() const = 0;
+
     static bool classof(const MCSection *) { return true; }
   };
 
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index f828e1060fe6..b154cf59d106 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -19,12 +19,12 @@
 #include "llvm/Support/COFF.h"
 
 namespace llvm {
-  
+
 /// MCSectionCOFF - This represents a section on Windows
   class MCSectionCOFF : public MCSection {
     // The memory for this string is stored in the same MCContext as *this.
     StringRef SectionName;
-    
+
     /// Characteristics - This is the Characteristics field of a section,
     //  drawn from the enums below.
     unsigned Characteristics;
@@ -52,9 +52,11 @@ namespace llvm {
     StringRef getSectionName() const { return SectionName; }
     unsigned getCharacteristics() const { return Characteristics; }
     int getSelection () const { return Selection; }
-    
+
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
                                       raw_ostream &OS) const;
+    virtual bool UseCodeAlign() const;
+    virtual bool isVirtualSection() const;
 
     static bool classof(const MCSection *S) {
       return S->getVariant() == SV_COFF;
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 5de0bf58fe0c..c82de7128202 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -15,38 +15,39 @@
 #define LLVM_MC_MCSECTIONELF_H
 
 #include "llvm/MC/MCSection.h"
+#include "llvm/Support/ELF.h"
 
 namespace llvm {
-  
+
+class MCSymbol;
+
 /// MCSectionELF - This represents a section on linux, lots of unix variants
 /// and some bare metal systems.
 class MCSectionELF : public MCSection {
   /// SectionName - This is the name of the section.  The referenced memory is
   /// owned by TargetLoweringObjectFileELF's ELFUniqueMap.
   StringRef SectionName;
-  
+
   /// Type - This is the sh_type field of a section, drawn from the enums below.
   unsigned Type;
-  
+
   /// Flags - This is the sh_flags field of a section, drawn from the enums.
   /// below.
   unsigned Flags;
 
-  /// IsExplicit - Indicates that this section comes from globals with an
-  /// explicit section specified.
-  bool IsExplicit;
-
   /// EntrySize - The size of each entry in this section. This size only
   /// makes sense for sections that contain fixed-sized entries. If a
   /// section does not contain fixed-sized entries 'EntrySize' will be 0.
   unsigned EntrySize;
-  
+
+  const MCSymbol *Group;
+
 private:
   friend class MCContext;
   MCSectionELF(StringRef Section, unsigned type, unsigned flags,
-               SectionKind K, bool isExplicit, unsigned entrySize)
+               SectionKind K, unsigned entrySize, const MCSymbol *group)
     : MCSection(SV_ELF, K), SectionName(Section), Type(type), Flags(flags),
-      IsExplicit(isExplicit), EntrySize(entrySize) {}
+      EntrySize(entrySize), Group(group) {}
   ~MCSectionELF();
 public:
 
@@ -54,141 +55,31 @@ public:
   /// should be printed before the section name
   bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
 
-  /// ShouldPrintSectionType - Only prints the section type if supported
-  bool ShouldPrintSectionType(unsigned Ty) const;
-
-  /// HasCommonSymbols - True if this section holds common symbols, this is
-  /// indicated on the ELF object file by a symbol with SHN_COMMON section 
-  /// header index.
-  bool HasCommonSymbols() const;
-  
-  /// These are the section type and flags fields.  An ELF section can have
-  /// only one Type, but can have more than one of the flags specified.
-  ///
-  /// Valid section types.
-  enum {
-    // This value marks the section header as inactive.
-    SHT_NULL             = 0x00U,
-
-    // Holds information defined by the program, with custom format and meaning.
-    SHT_PROGBITS         = 0x01U,
-
-    // This section holds a symbol table.
-    SHT_SYMTAB           = 0x02U,
-
-    // The section holds a string table.
-    SHT_STRTAB           = 0x03U,
-
-    // The section holds relocation entries with explicit addends.
-    SHT_RELA             = 0x04U,
-
-    // The section holds a symbol hash table.
-    SHT_HASH             = 0x05U,
-    
-    // Information for dynamic linking.
-    SHT_DYNAMIC          = 0x06U,
-
-    // The section holds information that marks the file in some way.
-    SHT_NOTE             = 0x07U,
-
-    // A section of this type occupies no space in the file.
-    SHT_NOBITS           = 0x08U,
-
-    // The section holds relocation entries without explicit addends.
-    SHT_REL              = 0x09U,
-
-    // This section type is reserved but has unspecified semantics. 
-    SHT_SHLIB            = 0x0AU,
-
-    // This section holds a symbol table.
-    SHT_DYNSYM           = 0x0BU,
-
-    // This section contains an array of pointers to initialization functions.
-    SHT_INIT_ARRAY       = 0x0EU,
-
-    // This section contains an array of pointers to termination functions.
-    SHT_FINI_ARRAY       = 0x0FU,
-
-    // This section contains an array of pointers to functions that are invoked
-    // before all other initialization functions.
-    SHT_PREINIT_ARRAY    = 0x10U,
-
-    // A section group is a set of sections that are related and that must be
-    // treated specially by the linker.
-    SHT_GROUP            = 0x11U,
-
-    // This section is associated with a section of type SHT_SYMTAB, when the
-    // referenced symbol table contain the escape value SHN_XINDEX
-    SHT_SYMTAB_SHNDX     = 0x12U,
-
-    LAST_KNOWN_SECTION_TYPE = SHT_SYMTAB_SHNDX
-  }; 
-
-  /// Valid section flags.
-  enum {
-    // The section contains data that should be writable.
-    SHF_WRITE            = 0x1U,
-
-    // The section occupies memory during execution.
-    SHF_ALLOC            = 0x2U,
-
-    // The section contains executable machine instructions.
-    SHF_EXECINSTR        = 0x4U,
-
-    // The data in the section may be merged to eliminate duplication.
-    SHF_MERGE            = 0x10U,
-
-    // Elements in the section consist of null-terminated character strings.
-    SHF_STRINGS          = 0x20U,
-
-    // A field in this section holds a section header table index.
-    SHF_INFO_LINK        = 0x40U,
-
-    // Adds special ordering requirements for link editors.
-    SHF_LINK_ORDER       = 0x80U,
-
-    // This section requires special OS-specific processing to avoid incorrect
-    // behavior.
-    SHF_OS_NONCONFORMING = 0x100U,
-
-    // This section is a member of a section group.
-    SHF_GROUP            = 0x200U,
-
-    // This section holds Thread-Local Storage.
-    SHF_TLS              = 0x400U,
-
-    
-    // Start of target-specific flags.
-
-    /// XCORE_SHF_CP_SECTION - All sections with the "c" flag are grouped
-    /// together by the linker to form the constant pool and the cp register is
-    /// set to the start of the constant pool by the boot code.
-    XCORE_SHF_CP_SECTION = 0x800U,
-    
-    /// XCORE_SHF_DP_SECTION - All sections with the "d" flag are grouped
-    /// together by the linker to form the data section and the dp register is
-    /// set to the start of the section by the boot code.
-    XCORE_SHF_DP_SECTION = 0x1000U
-  };
-
   StringRef getSectionName() const { return SectionName; }
   unsigned getType() const { return Type; }
   unsigned getFlags() const { return Flags; }
   unsigned getEntrySize() const { return EntrySize; }
-  
+  const MCSymbol *getGroup() const { return Group; }
+
   void PrintSwitchToSection(const MCAsmInfo &MAI,
                             raw_ostream &OS) const;
-  
+  virtual bool UseCodeAlign() const;
+  virtual bool isVirtualSection() const;
+
   /// isBaseAddressKnownZero - We know that non-allocatable sections (like
   /// debug info) have a base of zero.
   virtual bool isBaseAddressKnownZero() const {
-    return (getFlags() & SHF_ALLOC) == 0;
+    return (getFlags() & ELF::SHF_ALLOC) == 0;
   }
 
   static bool classof(const MCSection *S) {
     return S->getVariant() == SV_ELF;
   }
   static bool classof(const MCSectionELF *) { return true; }
+
+  // Return the entry size for sections with fixed-width data.
+  static unsigned DetermineEntrySize(SectionKind Kind);
+
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 2d9d1333dbe2..7633515f2744 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -17,36 +17,36 @@
 #include "llvm/MC/MCSection.h"
 
 namespace llvm {
-  
+
 /// MCSectionMachO - This represents a section on a Mach-O system (used by
 /// Mac OS X).  On a Mac system, these are also described in
 /// /usr/include/mach-o/loader.h.
 class MCSectionMachO : public MCSection {
   char SegmentName[16];  // Not necessarily null terminated!
   char SectionName[16];  // Not necessarily null terminated!
-  
+
   /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES
   /// field of a section, drawn from the enums below.
   unsigned TypeAndAttributes;
-  
+
   /// Reserved2 - The 'reserved2' field of a section, used to represent the
   /// size of stubs, for example.
   unsigned Reserved2;
-  
+
   MCSectionMachO(StringRef Segment, StringRef Section,
-                 unsigned TAA, unsigned reserved2, SectionKind K);  
+                 unsigned TAA, unsigned reserved2, SectionKind K);
   friend class MCContext;
 public:
-  
+
   /// These are the section type and attributes fields.  A MachO section can
   /// have only one Type, but can have any of the attributes specified.
   enum {
     // TypeAndAttributes bitmasks.
     SECTION_TYPE       = 0x000000FFU,
     SECTION_ATTRIBUTES = 0xFFFFFF00U,
-    
+
     // Valid section types.
-    
+
     /// S_REGULAR - Regular section.
     S_REGULAR                    = 0x00U,
     /// S_ZEROFILL - Zero fill on demand section.
@@ -101,10 +101,10 @@ public:
     S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15U,
 
     LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
-    
+
 
     // Valid section attributes.
-    
+
     /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine
     /// instructions.
     S_ATTR_PURE_INSTRUCTIONS   = 1U << 31,
@@ -165,6 +165,8 @@ public:
 
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
                                     raw_ostream &OS) const;
+  virtual bool UseCodeAlign() const;
+  virtual bool isVirtualSection() const;
 
   static bool classof(const MCSection *S) {
     return S->getVariant() == SV_MachO;
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 1ce1b0e09d4a..fc2451f9c19c 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -14,8 +14,10 @@
 #ifndef LLVM_MC_MCSTREAMER_H
 #define LLVM_MC_MCSTREAMER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
 
 namespace llvm {
   class MCAsmInfo;
@@ -28,6 +30,7 @@ namespace llvm {
   class MCSymbol;
   class StringRef;
   class TargetAsmBackend;
+  class TargetLoweringObjectFile;
   class Twine;
   class raw_ostream;
   class formatted_raw_ostream;
@@ -47,29 +50,44 @@ namespace llvm {
     MCStreamer(const MCStreamer&); // DO NOT IMPLEMENT
     MCStreamer &operator=(const MCStreamer&); // DO NOT IMPLEMENT
 
-  protected:
-    MCStreamer(MCContext &Ctx);
+    void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                         bool isPCRel, unsigned AddrSpace);
+
+    std::vector<MCDwarfFrameInfo> FrameInfos;
+    MCDwarfFrameInfo *getCurrentFrameInfo();
+    void EnsureValidFrame();
+
+    /// CurSectionStack - This is stack of CurSection values saved by
+    /// PushSection.
+    SmallVector<const MCSection *, 4> CurSectionStack;
 
-    /// CurSection - This is the current section code is being emitted to, it is
-    /// kept up to date by SwitchSection.
-    const MCSection *CurSection;
+    /// PrevSectionStack - This is stack of PrevSection values saved by
+    /// PushSection.
+    SmallVector<const MCSection *, 4> PrevSectionStack;
 
-    /// PrevSection - This is the previous section code is being emitted to, it is
-    /// kept up to date by SwitchSection.
-    const MCSection *PrevSection;
+  protected:
+    MCStreamer(MCContext &Ctx);
 
   public:
     virtual ~MCStreamer();
 
     MCContext &getContext() const { return Context; }
 
+    unsigned getNumFrameInfos() {
+      return FrameInfos.size();
+    }
+
+    const MCDwarfFrameInfo &getFrameInfo(unsigned i) {
+      return FrameInfos[i];
+    }
+
     /// @name Assembly File Formatting.
     /// @{
-    
+
     /// isVerboseAsm - Return true if this streamer supports verbose assembly
     /// and if it is enabled.
     virtual bool isVerboseAsm() const { return false; }
-    
+
     /// hasRawTextSupport - Return true if this asm streamer supports emitting
     /// unformatted text to the .s file with EmitRawText.
     virtual bool hasRawTextSupport() const { return false; }
@@ -82,34 +100,83 @@ namespace llvm {
     /// If the comment includes embedded \n's, they will each get the comment
     /// prefix as appropriate.  The added comment should not end with a \n.
     virtual void AddComment(const Twine &T) {}
-    
+
     /// GetCommentOS - Return a raw_ostream that comments can be written to.
     /// Unlike AddComment, you are required to terminate comments with \n if you
     /// use this method.
     virtual raw_ostream &GetCommentOS();
-    
+
     /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
     virtual void AddBlankLine() {}
-    
+
     /// @}
-    
+
     /// @name Symbol & Section Management
     /// @{
-    
+
     /// getCurrentSection - Return the current section that the streamer is
     /// emitting code to.
-    const MCSection *getCurrentSection() const { return CurSection; }
+    const MCSection *getCurrentSection() const {
+      if (!CurSectionStack.empty())
+        return CurSectionStack.back();
+      return NULL;
+    }
 
     /// getPreviousSection - Return the previous section that the streamer is
     /// emitting code to.
-    const MCSection *getPreviousSection() const { return PrevSection; }
+    const MCSection *getPreviousSection() const {
+      if (!PrevSectionStack.empty())
+        return PrevSectionStack.back();
+      return NULL;
+    }
+
+    /// ChangeSection - Update streamer for a new active section.
+    ///
+    /// This is called by PopSection and SwitchSection, if the current
+    /// section changes.
+    virtual void ChangeSection(const MCSection *) = 0;
+
+    /// pushSection - Save the current and previous section on the
+    /// section stack.
+    void PushSection() {
+      PrevSectionStack.push_back(getPreviousSection());
+      CurSectionStack.push_back(getCurrentSection());
+    }
+
+    /// popSection - Restore the current and previous section from
+    /// the section stack.  Calls ChangeSection as needed.
+    ///
+    /// Returns false if the stack was empty.
+    bool PopSection() {
+      if (PrevSectionStack.size() <= 1)
+        return false;
+      assert(CurSectionStack.size() > 1);
+      PrevSectionStack.pop_back();
+      const MCSection *oldSection = CurSectionStack.pop_back_val();
+      const MCSection *curSection = CurSectionStack.back();
+
+      if (oldSection != curSection)
+        ChangeSection(curSection);
+      return true;
+    }
 
     /// SwitchSection - Set the current section where code is being emitted to
     /// @p Section.  This is required to update CurSection.
     ///
     /// This corresponds to assembler directives like .section, .text, etc.
-    virtual void SwitchSection(const MCSection *Section) = 0;
-    
+    void SwitchSection(const MCSection *Section) {
+      assert(Section && "Cannot switch to a null section!");
+      const MCSection *curSection = CurSectionStack.back();
+      PrevSectionStack.back() = curSection;
+      if (Section != curSection) {
+        CurSectionStack.back() = Section;
+        ChangeSection(Section);
+      }
+    }
+
+    /// InitSections - Create the default sections and set the initial one.
+    virtual void InitSections() = 0;
+
     /// EmitLabel - Emit a label for @p Symbol into the current section.
     ///
     /// This corresponds to an assembler statement such as:
@@ -123,6 +190,10 @@ namespace llvm {
     /// EmitAssemblerFlag - Note in the output the specified @p Flag
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) = 0;
 
+    /// EmitThumbFunc - Note in the output that the specified @p Func is
+    /// a Thumb mode function (ARM target only).
+    virtual void EmitThumbFunc(MCSymbol *Func) = 0;
+
     /// EmitAssignment - Emit an assignment of @p Value to @p Symbol.
     ///
     /// This corresponds to an assembler statement such as:
@@ -136,6 +207,15 @@ namespace llvm {
     /// @param Value - The value for the symbol.
     virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) = 0;
 
+    /// EmitWeakReference - Emit an weak reference from @p Alias to @p Symbol.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///  .weakref alias, symbol
+    ///
+    /// @param Alias - The alias that is being created.
+    /// @param Symbol - The symbol being aliased.
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) = 0;
+
     /// EmitSymbolAttribute - Add the given @p Attribute to @p Symbol.
     virtual void EmitSymbolAttribute(MCSymbol *Symbol,
                                      MCSymbolAttr Attribute) = 0;
@@ -170,7 +250,7 @@ namespace llvm {
     ///  .size symbol, expression
     ///
     virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) = 0;
-    
+
     /// EmitCommonSymbol - Emit a common symbol.
     ///
     /// @param Symbol - The common symbol to emit.
@@ -185,7 +265,7 @@ namespace llvm {
     /// @param Symbol - The common symbol to emit.
     /// @param Size - The size of the common symbol.
     virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) = 0;
-    
+
     /// EmitZerofill - Emit the zerofill section and an optional symbol.
     ///
     /// @param Section - The zerofill section to create and or to put the symbol
@@ -204,7 +284,7 @@ namespace llvm {
     /// @param ByteAlignment - The alignment of the thread local common symbol
     /// if non-zero.  This must be a power of 2 on some targets.
     virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                                uint64_t Size, unsigned ByteAlignment = 0) = 0;                                
+                                uint64_t Size, unsigned ByteAlignment = 0) = 0;
     /// @}
     /// @name Generating Data
     /// @{
@@ -224,38 +304,67 @@ namespace llvm {
     /// @param Value - The value to emit.
     /// @param Size - The size of the integer (in bytes) to emit. This must
     /// match a native machine width.
-    virtual void EmitValue(const MCExpr *Value, unsigned Size,
-                           unsigned AddrSpace = 0) = 0;
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               bool isPCRel, unsigned AddrSpace) = 0;
+
+    void EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace = 0);
+
+    void EmitPCRelValue(const MCExpr *Value, unsigned Size,
+                        unsigned AddrSpace = 0);
 
     /// EmitIntValue - Special case of EmitValue that avoids the client having
     /// to pass in a MCExpr for constant integers.
     virtual void EmitIntValue(uint64_t Value, unsigned Size,
                               unsigned AddrSpace = 0);
-    
+
+    /// EmitAbsValue - Emit the Value, but try to avoid relocations. On MachO
+    /// this is done by producing
+    /// foo = value
+    /// .long foo
+    void EmitAbsValue(const MCExpr *Value, unsigned Size,
+                      unsigned AddrSpace = 0);
+
+    virtual void EmitULEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) = 0;
+
+    virtual void EmitSLEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) = 0;
+
+    /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+    /// client having to pass in a MCExpr for constant integers.
+    void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0);
+
+    /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+    /// client having to pass in a MCExpr for constant integers.
+    void EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace = 0);
+
     /// EmitSymbolValue - Special case of EmitValue that avoids the client
     /// having to pass in a MCExpr for MCSymbols.
-    virtual void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
-                                 unsigned AddrSpace);
-    
+    void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                         unsigned AddrSpace = 0);
+
+    void EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
+                              unsigned AddrSpace = 0);
+
     /// EmitGPRel32Value - Emit the expression @p Value into the output as a
     /// gprel32 (32-bit GP relative) value.
     ///
     /// This is used to implement assembler directives such as .gprel32 on
     /// targets that support them.
-    virtual void EmitGPRel32Value(const MCExpr *Value) = 0;
-    
+    virtual void EmitGPRel32Value(const MCExpr *Value);
+
     /// EmitFill - Emit NumBytes bytes worth of the value specified by
     /// FillValue.  This implements directives such as '.space'.
     virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
                           unsigned AddrSpace);
-    
+
     /// EmitZeros - Emit NumBytes worth of zeros.  This is a convenience
     /// function that just wraps EmitFill.
     void EmitZeros(uint64_t NumBytes, unsigned AddrSpace) {
       EmitFill(NumBytes, 0, AddrSpace);
     }
 
-    
+
     /// EmitValueToAlignment - Emit some number of copies of @p Value until
     /// the byte alignment @p ByteAlignment is reached.
     ///
@@ -301,17 +410,47 @@ namespace llvm {
     /// @param Value - The value to use when filling bytes.
     virtual void EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value = 0) = 0;
-    
+
     /// @}
-    
+
     /// EmitFileDirective - Switch to a new logical file.  This is used to
     /// implement the '.file "foo.c"' assembler directive.
     virtual void EmitFileDirective(StringRef Filename) = 0;
-    
+
     /// EmitDwarfFileDirective - Associate a filename with a specified logical
     /// file number.  This implements the DWARF2 '.file 4 "foo.c"' assembler
     /// directive.
-    virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) = 0;
+    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename);
+
+    /// EmitDwarfLocDirective - This implements the DWARF2
+    // '.loc fileno lineno ...' assembler directive.
+    virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa,
+                                       unsigned Discriminator);
+
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                          const MCSymbol *Label) = 0;
+
+    virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                           const MCSymbol *Label) {
+    }
+
+    void EmitDwarfSetLineAddr(int64_t LineDelta, const MCSymbol *Label,
+                              int PointerSize);
+
+    virtual bool EmitCFIStartProc();
+    virtual bool EmitCFIEndProc();
+    virtual bool EmitCFIDefCfa(int64_t Register, int64_t Offset);
+    virtual bool EmitCFIDefCfaOffset(int64_t Offset);
+    virtual bool EmitCFIDefCfaRegister(int64_t Register);
+    virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
+    virtual bool EmitCFIPersonality(const MCSymbol *Sym,
+                                    unsigned Encoding);
+    virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+    virtual bool EmitCFIRememberState();
+    virtual bool EmitCFIRestoreState();
 
     /// EmitInstruction - Emit the given @p Instruction into the current
     /// section.
@@ -322,7 +461,7 @@ namespace llvm {
     /// indicated by the hasRawTextSupport() predicate.  By default this aborts.
     virtual void EmitRawText(StringRef String);
     void EmitRawText(const Twine &String);
-    
+
     /// Finish - Finish emission of machine code.
     virtual void Finish() = 0;
   };
@@ -342,12 +481,18 @@ namespace llvm {
   /// \param CE - If given, a code emitter to use to show the instruction
   /// encoding inline with the assembly. This method takes ownership of \arg CE.
   ///
+  /// \param TAB - If given, a target asm backend to use to show the fixup
+  /// information in conjunction with encoding information. This method takes
+  /// ownership of \arg TAB.
+  ///
   /// \param ShowInst - Whether to show the MCInst representation inline with
   /// the assembly.
   MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
-                                bool isLittleEndian, bool isVerboseAsm,
+                                bool isVerboseAsm,
+                                bool useLoc,
                                 MCInstPrinter *InstPrint = 0,
                                 MCCodeEmitter *CE = 0,
+                                TargetAsmBackend *TAB = 0,
                                 bool ShowInst = false);
 
   /// createMachOStreamer - Create a machine code streamer which will generate
@@ -371,7 +516,7 @@ namespace llvm {
   /// ELF format object files.
   MCStreamer *createELFStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
 				raw_ostream &OS, MCCodeEmitter *CE,
-				bool RelaxAll = false);
+				bool RelaxAll, bool NoExecStack);
 
   /// createLoggingStreamer - Create a machine code streamer which just logs the
   /// API calls and then dispatches to another streamer.
@@ -379,6 +524,13 @@ namespace llvm {
   /// The new streamer takes ownership of the \arg Child.
   MCStreamer *createLoggingStreamer(MCStreamer *Child, raw_ostream &OS);
 
+  /// createPureStreamer - Create a machine code streamer which will generate
+  /// "pure" MC object files, for use with MC-JIT and testing tools.
+  ///
+  /// Takes ownership of \arg TAB and \arg CE.
+  MCStreamer *createPureStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
+                                 raw_ostream &OS, MCCodeEmitter *CE);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 1b432c2b0a84..7da4d7c15e3b 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -52,15 +52,14 @@ namespace llvm {
     /// "Lfoo" or ".foo".
     unsigned IsTemporary : 1;
 
-    /// IsUsedInExpr - True if this symbol has been used in an expression and
-    /// cannot be redefined.
-    unsigned IsUsedInExpr : 1;
+    /// IsUsed - True if this symbol has been used.
+    mutable unsigned IsUsed : 1;
 
   private:  // MCContext creates and uniques these.
     friend class MCContext;
     MCSymbol(StringRef name, bool isTemporary)
       : Name(name), Section(0), Value(0),
-        IsTemporary(isTemporary), IsUsedInExpr(false) {}
+        IsTemporary(isTemporary), IsUsed(false) {}
 
     MCSymbol(const MCSymbol&);       // DO NOT IMPLEMENT
     void operator=(const MCSymbol&); // DO NOT IMPLEMENT
@@ -74,9 +73,9 @@ namespace llvm {
     /// isTemporary - Check if this is an assembler temporary symbol.
     bool isTemporary() const { return IsTemporary; }
 
-    /// isUsedInExpr - Check if this is an assembler temporary symbol.
-    bool isUsedInExpr() const { return IsUsedInExpr; }
-    void setUsedInExpr(bool Value) { IsUsedInExpr = Value; }
+    /// isUsed - Check if this is used.
+    bool isUsed() const { return IsUsed; }
+    void setUsed(bool Value) const { IsUsed = Value; }
 
     /// @}
     /// @name Associated Sections
@@ -135,9 +134,15 @@ namespace llvm {
     /// getValue() - Get the value for variable symbols.
     const MCExpr *getVariableValue() const {
       assert(isVariable() && "Invalid accessor!");
+      IsUsed = true;
       return Value;
     }
 
+    // AliasedSymbol() - If this is an alias (a = b), return the symbol
+    // we ultimately point to. For a non alias, this just returns the symbol
+    // itself.
+    const MCSymbol &AliasedSymbol() const;
+
     void setVariableValue(const MCExpr *Value);
 
     /// @}
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 11b6c2a17b70..df8dbd930bf7 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_MC_MCVALUE_H
 #define LLVM_MC_MCVALUE_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/MC/MCSymbol.h"
 #include <cassert>
 
diff --git a/include/llvm/MC/MachObjectWriter.h b/include/llvm/MC/MachObjectWriter.h
deleted file mode 100644
index 9b1ff1db8471..000000000000
--- a/include/llvm/MC/MachObjectWriter.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- llvm/MC/MachObjectWriter.h - Mach-O File Writer ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MACHOBJECTWRITER_H
-#define LLVM_MC_MACHOBJECTWRITER_H
-
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-namespace llvm {
-class MCAssembler;
-class MCFragment;
-class MCFixup;
-class MCValue;
-class raw_ostream;
-
-class MachObjectWriter : public MCObjectWriter {
-  void *Impl;
-
-public:
-  MachObjectWriter(raw_ostream &OS, bool Is64Bit, bool IsLittleEndian = true);
-  virtual ~MachObjectWriter();
-
-  virtual void ExecutePostLayoutBinding(MCAssembler &Asm);
-
-  virtual void RecordRelocation(const MCAssembler &Asm,
-                                const MCAsmLayout &Layout,
-                                const MCFragment *Fragment,
-                                const MCFixup &Fixup, MCValue Target,
-                                uint64_t &FixedValue);
-
-  virtual void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index f5a80a3dced2..a6c3f039a11e 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -144,9 +144,6 @@ public:
   unsigned getNumOperands() const { return NumOperands; }
   
   /// isFunctionLocal - Return whether MDNode is local to a function.
-  /// Note: MDNodes are designated as function-local when created, and keep
-  ///       that designation even if their operands are modified to no longer
-  ///       refer to function-local IR.
   bool isFunctionLocal() const {
     return (getSubclassDataFromValue() & FunctionLocalBit) != 0;
   }
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index b7880ca2cb76..f95895e95773 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -20,7 +20,7 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/Metadata.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h
new file mode 100644
index 000000000000..31cd523ea219
--- /dev/null
+++ b/include/llvm/Object/MachOFormat.h
@@ -0,0 +1,367 @@
+//===- MachOFormat.h - Mach-O Format Structures And Constants ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares various structures and constants which are platform
+// independent and can be shared by any client which wishes to interact with
+// Mach object files.
+//
+// The definitions here are purposely chosen to match the LLVM style as opposed
+// to following the platform specific definition of the format.
+//
+// On a Mach system, see the <mach-o/...> includes for more information, in
+// particular <mach-o/loader.h>.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOFORMAT_H
+#define LLVM_OBJECT_MACHOFORMAT_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+namespace object {
+
+/// General Mach platform information.
+namespace mach {
+  /// @name CPU Type and Subtype Information
+  /// {
+
+  /// \brief Capability bits used in CPU type encoding.
+  enum CPUTypeFlagsMask {
+    CTFM_ArchMask =  0xFF000000,
+    CTFM_ArchABI64 = 0x01000000
+  };
+
+  /// \brief Machine type IDs used in CPU type encoding.
+  enum CPUTypeMachine {
+    CTM_i386      = 7,
+    CTM_x86_64    = CTM_i386 | CTFM_ArchABI64,
+    CTM_ARM       = 12,
+    CTM_SPARC     = 14,
+    CTM_PowerPC   = 18,
+    CTM_PowerPC64 = CTM_PowerPC | CTFM_ArchABI64
+  };
+
+  /// \brief Capability bits used in CPU subtype encoding.
+  enum CPUSubtypeFlagsMask {
+    CSFM_SubtypeMask =  0xFF000000,
+    CSFM_SubtypeLib64 = 0x80000000
+  };
+
+  /// \brief ARM Machine Subtypes.
+  enum CPUSubtypeARM {
+    CSARM_ALL    = 0,
+    CSARM_V4T    = 5,
+    CSARM_V6     = 6,
+    CSARM_V5TEJ  = 7,
+    CSARM_XSCALE = 8,
+    CSARM_V7     = 9
+  };
+
+  /// \brief PowerPC Machine Subtypes.
+  enum CPUSubtypePowerPC {
+    CSPPC_ALL = 0
+  };
+
+  /// \brief SPARC Machine Subtypes.
+  enum CPUSubtypeSPARC {
+    CSSPARC_ALL = 0
+  };
+
+  /// \brief x86 Machine Subtypes.
+  enum CPUSubtypeX86 {
+    CSX86_ALL = 3
+  };
+
+  /// @}
+
+} // end namespace mach
+
+/// Format information for Mach object files.
+namespace macho {
+  /// \brief Constants for structure sizes.
+  enum StructureSizes {
+    Header32Size = 28,
+    Header64Size = 32,
+    SegmentLoadCommand32Size = 56,
+    SegmentLoadCommand64Size = 72,
+    Section32Size = 68,
+    Section64Size = 80,
+    SymtabLoadCommandSize = 24,
+    DysymtabLoadCommandSize = 80,
+    Nlist32Size = 12,
+    Nlist64Size = 16,
+    RelocationInfoSize = 8
+  };
+
+  /// \brief Constants for header magic field.
+  enum HeaderMagic {
+    HM_Object32 = 0xFEEDFACE,  ///< 32-bit mach object file
+    HM_Object64 = 0xFEEDFACF,  ///< 64-bit mach object file
+    HM_Universal = 0xCAFEBABE  ///< Universal object file
+  };
+
+  /// \brief Header common to all Mach object files.
+  struct Header {
+    uint32_t Magic;
+    uint32_t CPUType;
+    uint32_t CPUSubtype;
+    uint32_t FileType;
+    uint32_t NumLoadCommands;
+    uint32_t SizeOfLoadCommands;
+    uint32_t Flags;
+  };
+
+  /// \brief Extended header for 64-bit object files.
+  struct Header64Ext {
+    uint32_t Reserved;
+  };
+
+  // See <mach-o/loader.h>.
+  enum HeaderFileType {
+    HFT_Object = 0x1
+  };
+
+  enum HeaderFlags {
+    HF_SubsectionsViaSymbols = 0x2000
+  };
+
+  enum LoadCommandType {
+    LCT_Segment = 0x1,
+    LCT_Symtab = 0x2,
+    LCT_Dysymtab = 0xb,
+    LCT_Segment64 = 0x19,
+    LCT_UUID = 0x1b
+  };
+
+  /// \brief Load command structure.
+  struct LoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+  };
+
+  /// @name Load Command Structures
+  /// @{
+
+  struct SegmentLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    char Name[16];
+    uint32_t VMAddress;
+    uint32_t VMSize;
+    uint32_t FileOffset;
+    uint32_t FileSize;
+    uint32_t MaxVMProtection;
+    uint32_t InitialVMProtection;
+    uint32_t NumSections;
+    uint32_t Flags;
+  };
+
+  struct Segment64LoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    char Name[16];
+    uint64_t VMAddress;
+    uint64_t VMSize;
+    uint64_t FileOffset;
+    uint64_t FileSize;
+    uint32_t MaxVMProtection;
+    uint32_t InitialVMProtection;
+    uint32_t NumSections;
+    uint32_t Flags;
+  };
+
+  struct SymtabLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    uint32_t SymbolTableOffset;
+    uint32_t NumSymbolTableEntries;
+    uint32_t StringTableOffset;
+    uint32_t StringTableSize;
+  };
+
+  struct DysymtabLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+
+    uint32_t LocalSymbolsIndex;
+    uint32_t NumLocalSymbols;
+
+    uint32_t ExternalSymbolsIndex;
+    uint32_t NumExternalSymbols;
+
+    uint32_t UndefinedSymbolsIndex;
+    uint32_t NumUndefinedSymbols;
+
+    uint32_t TOCOffset;
+    uint32_t NumTOCEntries;
+
+    uint32_t ModuleTableOffset;
+    uint32_t NumModuleTableEntries;
+
+    uint32_t ReferenceSymbolTableOffset;
+    uint32_t NumReferencedSymbolTableEntries;
+
+    uint32_t IndirectSymbolTableOffset;
+    uint32_t NumIndirectSymbolTableEntries;
+
+    uint32_t ExternalRelocationTableOffset;
+    uint32_t NumExternalRelocationTableEntries;
+
+    uint32_t LocalRelocationTableOffset;
+    uint32_t NumLocalRelocationTableEntries;
+  };
+
+  /// @}
+  /// @name Section Data
+  /// @{
+
+  struct Section {
+    char Name[16];
+    char SegmentName[16];
+    uint32_t Address;
+    uint32_t Size;
+    uint32_t Offset;
+    uint32_t Align;
+    uint32_t RelocationTableOffset;
+    uint32_t NumRelocationTableEntries;
+    uint32_t Flags;
+    uint32_t Reserved1;
+    uint32_t Reserved2;
+  };
+  struct Section64 {
+    char Name[16];
+    char SegmentName[16];
+    uint64_t Address;
+    uint64_t Size;
+    uint32_t Offset;
+    uint32_t Align;
+    uint32_t RelocationTableOffset;
+    uint32_t NumRelocationTableEntries;
+    uint32_t Flags;
+    uint32_t Reserved1;
+    uint32_t Reserved2;
+    uint32_t Reserved3;
+  };
+
+  /// @}
+  /// @name Symbol Table Entries
+  /// @{
+
+  struct SymbolTableEntry {
+    uint32_t StringIndex;
+    uint8_t Type;
+    uint8_t SectionIndex;
+    uint16_t Flags;
+    uint32_t Value;
+  };
+  struct Symbol64TableEntry {
+    uint32_t StringIndex;
+    uint8_t Type;
+    uint8_t SectionIndex;
+    uint16_t Flags;
+    uint64_t Value;
+  };
+
+  /// @}
+  /// @name Indirect Symbol Table
+  /// @{
+
+  struct IndirectSymbolTableEntry {
+    uint32_t Index;
+  };
+
+  /// @}
+  /// @name Relocation Data
+  /// @{
+
+  struct RelocationEntry {
+    uint32_t Word0;
+    uint32_t Word1;
+  };
+
+  /// @}
+
+  // See <mach-o/nlist.h>.
+  enum SymbolTypeType {
+    STT_Undefined = 0x00,
+    STT_Absolute  = 0x02,
+    STT_Section   = 0x0e
+  };
+
+  enum SymbolTypeFlags {
+    // If any of these bits are set, then the entry is a stab entry number (see
+    // <mach-o/stab.h>. Otherwise the other masks apply.
+    STF_StabsEntryMask = 0xe0,
+
+    STF_TypeMask       = 0x0e,
+    STF_External       = 0x01,
+    STF_PrivateExtern  = 0x10
+  };
+
+  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
+  /// symbol entry.
+  enum IndirectSymbolFlags {
+    ISF_Local    = 0x80000000,
+    ISF_Absolute = 0x40000000
+  };
+
+  /// RelocationFlags - Special flags for addresses.
+  enum RelocationFlags {
+    RF_Scattered = 0x80000000
+  };
+
+  /// Common relocation info types.
+  enum RelocationInfoType {
+    RIT_Vanilla             = 0,
+    RIT_Pair                = 1,
+    RIT_Difference          = 2
+  };
+
+  /// Generic relocation info types, which are shared by some (but not all)
+  /// platforms.
+  enum RelocationInfoType_Generic {
+    RIT_Generic_PreboundLazyPointer = 3,
+    RIT_Generic_LocalDifference     = 4,
+    RIT_Generic_TLV                 = 5
+  };
+
+  /// X86_64 uses its own relocation types.
+  enum RelocationInfoTypeX86_64 {
+    // Note that x86_64 doesn't even share the common relocation types.
+    RIT_X86_64_Unsigned   = 0,
+    RIT_X86_64_Signed     = 1,
+    RIT_X86_64_Branch     = 2,
+    RIT_X86_64_GOTLoad    = 3,
+    RIT_X86_64_GOT        = 4,
+    RIT_X86_64_Subtractor = 5,
+    RIT_X86_64_Signed1    = 6,
+    RIT_X86_64_Signed2    = 7,
+    RIT_X86_64_Signed4    = 8,
+    RIT_X86_64_TLV        = 9
+  };
+
+  /// ARM uses its own relocation types.
+  enum RelocationInfoTypeARM {
+    RIT_ARM_LocalDifference = 3,
+    RIT_ARM_PreboundLazyPointer = 4,
+    RIT_ARM_Branch24Bit = 5,
+    RIT_ARM_ThumbBranch22Bit = 6,
+    RIT_ARM_ThumbBranch32Bit = 7,
+    RIT_ARM_Half = 8,
+    RIT_ARM_HalfDifference = 9
+
+  };
+
+} // end namespace macho
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
new file mode 100644
index 000000000000..03d9c147b413
--- /dev/null
+++ b/include/llvm/Object/MachOObject.h
@@ -0,0 +1,180 @@
+//===- MachOObject.h - Mach-O Object File Wrapper ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOOBJECT_H
+#define LLVM_OBJECT_MACHOOBJECT_H
+
+#include <string>
+#include "llvm/ADT/InMemoryStruct.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/MachOFormat.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace object {
+
+/// \brief Wrapper object for manipulating Mach-O object files.
+///
+/// This class is designed to implement a full-featured, efficient, portable,
+/// and robust Mach-O interface to Mach-O object files. It does not attempt to
+/// smooth over rough edges in the Mach-O format or generalize access to object
+/// independent features.
+///
+/// The class is designed around accessing the Mach-O object which is expected
+/// to be fully loaded into memory.
+///
+/// This class is *not* suitable for concurrent use. For efficient operation,
+/// the class uses APIs which rely on the ability to cache the results of
+/// certain calls in internal objects which are not safe for concurrent
+/// access. This allows the API to be zero-copy on the common paths.
+//
+// FIXME: It would be cool if we supported a "paged" MemoryBuffer
+// implementation. This would allow us to implement a more sensible version of
+// MemoryObject which can work like a MemoryBuffer, but be more efficient for
+// objects which are in the current address space.
+class MachOObject {
+public:
+  struct LoadCommandInfo {
+    /// The load command information.
+    macho::LoadCommand Command;
+
+    /// The offset to the start of the load command in memory.
+    uint64_t Offset;
+  };
+
+private:
+  OwningPtr<MemoryBuffer> Buffer;
+
+  /// Whether the object is little endian.
+  bool IsLittleEndian;
+  /// Whether the object is 64-bit.
+  bool Is64Bit;
+  /// Whether the object is swapped endianness from the host.
+  bool IsSwappedEndian;
+  /// Whether the string table has been registered.
+  bool HasStringTable;
+
+  /// The cached information on the load commands.
+  LoadCommandInfo *LoadCommands;
+  mutable unsigned NumLoadedCommands;
+
+  /// The cached copy of the header.
+  macho::Header Header;
+  macho::Header64Ext Header64Ext;
+
+  /// Cache string table information.
+  StringRef StringTable;
+
+private:
+  MachOObject(MemoryBuffer *Buffer, bool IsLittleEndian, bool Is64Bit);
+
+public:
+  ~MachOObject();
+
+  /// \brief Load a Mach-O object from a MemoryBuffer object.
+  ///
+  /// \param Buffer - The buffer to load the object from. This routine takes
+  /// exclusive ownership of the buffer (which is passed to the returned object
+  /// on success).
+  /// \param ErrorStr [out] - If given, will be set to a user readable error
+  /// message on failure.
+  /// \returns The loaded object, or null on error.
+  static MachOObject *LoadFromBuffer(MemoryBuffer *Buffer,
+                                     std::string *ErrorStr = 0);
+
+  /// @name File Information
+  /// @{
+
+  bool isLittleEndian() const { return IsLittleEndian; }
+  bool isSwappedEndian() const { return IsSwappedEndian; }
+  bool is64Bit() const { return Is64Bit; }
+
+  unsigned getHeaderSize() const {
+    return Is64Bit ? macho::Header64Size : macho::Header32Size;
+  }
+
+  StringRef getData(size_t Offset, size_t Size) const;
+
+  /// @}
+  /// @name String Table Data
+  /// @{
+
+  StringRef getStringTableData() const {
+    assert(HasStringTable && "String table has not been registered!");
+    return StringTable;
+  }
+
+  StringRef getStringAtIndex(unsigned Index) const {
+    size_t End = getStringTableData().find('\0', Index);
+    return getStringTableData().slice(Index, End);
+  }
+
+  void RegisterStringTable(macho::SymtabLoadCommand &SLC);
+
+  /// @}
+  /// @name Object Header Access
+  /// @{
+
+  const macho::Header &getHeader() const { return Header; }
+  const macho::Header64Ext &getHeader64Ext() const {
+    assert(is64Bit() && "Invalid access!");
+    return Header64Ext;
+  }
+
+  /// @}
+  /// @name Object Structure Access
+  /// @{
+
+  /// \brief Retrieve the information for the given load command.
+  const LoadCommandInfo &getLoadCommandInfo(unsigned Index) const;
+
+  void ReadSegmentLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::SegmentLoadCommand> &Res) const;
+  void ReadSegment64LoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::Segment64LoadCommand> &Res) const;
+  void ReadSymtabLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::SymtabLoadCommand> &Res) const;
+  void ReadDysymtabLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::DysymtabLoadCommand> &Res) const;
+  void ReadIndirectSymbolTableEntry(
+    const macho::DysymtabLoadCommand &DLC,
+    unsigned Index,
+    InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const;
+  void ReadSection(
+    const LoadCommandInfo &LCI,
+    unsigned Index,
+    InMemoryStruct<macho::Section> &Res) const;
+  void ReadSection64(
+    const LoadCommandInfo &LCI,
+    unsigned Index,
+    InMemoryStruct<macho::Section64> &Res) const;
+  void ReadRelocationEntry(
+    uint64_t RelocationTableOffset, unsigned Index,
+    InMemoryStruct<macho::RelocationEntry> &Res) const;
+  void ReadSymbolTableEntry(
+    uint64_t SymbolTableOffset, unsigned Index,
+    InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+  void ReadSymbol64TableEntry(
+    uint64_t SymbolTableOffset, unsigned Index,
+    InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
+
+  /// @}
+};
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
new file mode 100644
index 000000000000..eee9d447cddf
--- /dev/null
+++ b/include/llvm/Object/ObjectFile.h
@@ -0,0 +1,262 @@
+//===- ObjectFile.h - File format independent object file -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_OBJECT_FILE_H
+#define LLVM_OBJECT_OBJECT_FILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
+
+namespace llvm {
+
+class MemoryBuffer;
+class StringRef;
+
+namespace object {
+
+class ObjectFile;
+
+union DataRefImpl {
+  struct {
+    uint32_t a, b;
+  } d;
+  intptr_t p;
+};
+
+static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
+  // Check bitwise identical. This is the only legal way to compare a union w/o
+  // knowing which member is in use.
+  return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0;
+}
+
+/// SymbolRef - This is a value type class that represents a single symbol in
+/// the list of symbols in the object file.
+class SymbolRef {
+  DataRefImpl SymbolPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
+
+  bool operator==(const SymbolRef &Other) const;
+
+  SymbolRef getNext() const;
+
+  StringRef getName() const;
+  uint64_t  getAddress() const;
+  uint64_t  getSize() const;
+
+  /// Returns the ascii char that should be displayed in a symbol table dump via
+  /// nm for this symbol.
+  char      getNMTypeChar() const;
+
+  /// Returns true for symbols that are internal to the object file format such
+  /// as section symbols.
+  bool      isInternal() const;
+};
+
+/// SectionRef - This is a value type class that represents a single section in
+/// the list of sections in the object file.
+class SectionRef {
+  DataRefImpl SectionPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  SectionRef(DataRefImpl SectionP, const ObjectFile *Owner);
+
+  bool operator==(const SectionRef &Other) const;
+
+  SectionRef getNext() const;
+
+  StringRef getName() const;
+  uint64_t  getAddress() const;
+  uint64_t  getSize() const;
+  StringRef getContents() const;
+
+  // FIXME: Move to the normalization layer when it's created.
+  bool      isText() const;
+};
+
+const uint64_t UnknownAddressOrSize = ~0ULL;
+
+/// ObjectFile - This class is the base class for all object file types.
+/// Concrete instances of this object are created by createObjectFile, which
+/// figure out which type to create.
+class ObjectFile {
+private:
+  ObjectFile(); // = delete
+  ObjectFile(const ObjectFile &other); // = delete
+
+protected:
+  MemoryBuffer *MapFile;
+  const uint8_t *base;
+
+  ObjectFile(MemoryBuffer *Object);
+
+  // These functions are for SymbolRef to call internally. The main goal of
+  // this is to allow SymbolRef::SymbolPimpl to point directly to the symbol
+  // entry in the memory mapped object file. SymbolPimpl cannot contain any
+  // virtual functions because then it could not point into the memory mapped
+  // file.
+  friend class SymbolRef;
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const = 0;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const = 0;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const = 0;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const = 0;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const = 0;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const = 0;
+
+  // Same as above for SectionRef.
+  friend class SectionRef;
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const = 0;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const = 0;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const = 0;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const = 0;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const = 0;
+  virtual bool       isSectionText(DataRefImpl Sec) const = 0;
+
+
+public:
+  template<class content_type>
+  class content_iterator {
+    content_type Current;
+  public:
+    content_iterator(content_type symb)
+      : Current(symb) {}
+
+    const content_type* operator->() const {
+      return &Current;
+    }
+
+    bool operator==(const content_iterator &other) const {
+      return Current == other.Current;
+    }
+
+    bool operator!=(const content_iterator &other) const {
+      return !(*this == other);
+    }
+
+    content_iterator& operator++() {  // Preincrement
+      Current = Current.getNext();
+      return *this;
+    }
+  };
+
+  typedef content_iterator<SymbolRef> symbol_iterator;
+  typedef content_iterator<SectionRef> section_iterator;
+
+  virtual ~ObjectFile();
+
+  virtual symbol_iterator begin_symbols() const = 0;
+  virtual symbol_iterator end_symbols() const = 0;
+
+  virtual section_iterator begin_sections() const = 0;
+  virtual section_iterator end_sections() const = 0;
+
+  /// @brief The number of bytes used to represent an address in this object
+  ///        file format.
+  virtual uint8_t getBytesInAddress() const = 0;
+
+  virtual StringRef getFileFormatName() const = 0;
+  virtual /* Triple::ArchType */ unsigned getArch() const = 0;
+
+  StringRef getFilename() const;
+
+  /// @returns Pointer to ObjectFile subclass to handle this type of object.
+  /// @param ObjectPath The path to the object file. ObjectPath.isObject must
+  ///        return true.
+  /// @brief Create ObjectFile from path.
+  static ObjectFile *createObjectFile(StringRef ObjectPath);
+  static ObjectFile *createObjectFile(MemoryBuffer *Object);
+
+private:
+  static ObjectFile *createCOFFObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createELFObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createMachOObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createArchiveObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createLibObjectFile(MemoryBuffer *Object);
+};
+
+// Inline function definitions.
+inline SymbolRef::SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner)
+  : SymbolPimpl(SymbolP)
+  , OwningObject(Owner) {}
+
+inline bool SymbolRef::operator==(const SymbolRef &Other) const {
+  return SymbolPimpl == Other.SymbolPimpl;
+}
+
+inline SymbolRef SymbolRef::getNext() const {
+  return OwningObject->getSymbolNext(SymbolPimpl);
+}
+
+inline StringRef SymbolRef::getName() const {
+  return OwningObject->getSymbolName(SymbolPimpl);
+}
+
+inline uint64_t SymbolRef::getAddress() const {
+  return OwningObject->getSymbolAddress(SymbolPimpl);
+}
+
+inline uint64_t SymbolRef::getSize() const {
+  return OwningObject->getSymbolSize(SymbolPimpl);
+}
+
+inline char SymbolRef::getNMTypeChar() const {
+  return OwningObject->getSymbolNMTypeChar(SymbolPimpl);
+}
+
+inline bool SymbolRef::isInternal() const {
+  return OwningObject->isSymbolInternal(SymbolPimpl);
+}
+
+
+/// SectionRef
+inline SectionRef::SectionRef(DataRefImpl SectionP,
+                              const ObjectFile *Owner)
+  : SectionPimpl(SectionP)
+  , OwningObject(Owner) {}
+
+inline bool SectionRef::operator==(const SectionRef &Other) const {
+  return SectionPimpl == Other.SectionPimpl;
+}
+
+inline SectionRef SectionRef::getNext() const {
+  return OwningObject->getSectionNext(SectionPimpl);
+}
+
+inline StringRef SectionRef::getName() const {
+  return OwningObject->getSectionName(SectionPimpl);
+}
+
+inline uint64_t SectionRef::getAddress() const {
+  return OwningObject->getSectionAddress(SectionPimpl);
+}
+
+inline uint64_t SectionRef::getSize() const {
+  return OwningObject->getSectionSize(SectionPimpl);
+}
+
+inline StringRef SectionRef::getContents() const {
+  return OwningObject->getSectionContents(SectionPimpl);
+}
+
+inline bool SectionRef::isText() const {
+  return OwningObject->isSectionText(SectionPimpl);
+}
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/OperandTraits.h b/include/llvm/OperandTraits.h
index b614ccbc3777..f0df5fa9bde8 100644
--- a/include/llvm/OperandTraits.h
+++ b/include/llvm/OperandTraits.h
@@ -27,27 +27,17 @@ namespace llvm {
 /// when it is a prefix to the User object, and the number of Use objects is
 /// known at compile time.
 
-template <unsigned ARITY>
+template <typename SubClass, unsigned ARITY>
 struct FixedNumOperandTraits {
-  static Use *op_begin(User* U) {
+  static Use *op_begin(SubClass* U) {
     return reinterpret_cast<Use*>(U) - ARITY;
   }
-  static Use *op_end(User* U) {
+  static Use *op_end(SubClass* U) {
     return reinterpret_cast<Use*>(U);
   }
   static unsigned operands(const User*) {
     return ARITY;
   }
-  struct prefix {
-    Use Ops[ARITY];
-    prefix(); // DO NOT IMPLEMENT
-  };
-  template <class U>
-  struct Layout {
-    struct overlay : public prefix, public U {
-      overlay(); // DO NOT IMPLEMENT
-    };
-  };
 };
 
 //===----------------------------------------------------------------------===//
@@ -57,8 +47,8 @@ struct FixedNumOperandTraits {
 /// OptionalOperandTraits - when the number of operands may change at runtime.
 /// Naturally it may only decrease, because the allocations may not change.
 
-template <unsigned ARITY = 1>
-struct OptionalOperandTraits : public FixedNumOperandTraits<ARITY> {
+template <typename SubClass, unsigned ARITY = 1>
+struct OptionalOperandTraits : public FixedNumOperandTraits<SubClass, ARITY> {
   static unsigned operands(const User *U) {
     return U->getNumOperands();
   }
@@ -72,12 +62,12 @@ struct OptionalOperandTraits : public FixedNumOperandTraits<ARITY> {
 /// when it is a prefix to the User object, and the number of Use objects is
 /// only known at allocation time.
 
-template <unsigned MINARITY = 0>
+template <typename SubClass, unsigned MINARITY = 0>
 struct VariadicOperandTraits {
-  static Use *op_begin(User* U) {
-    return reinterpret_cast<Use*>(U) - U->getNumOperands();
+  static Use *op_begin(SubClass* U) {
+    return reinterpret_cast<Use*>(U) - static_cast<User*>(U)->getNumOperands();
   }
-  static Use *op_end(User* U) {
+  static Use *op_end(SubClass* U) {
     return reinterpret_cast<Use*>(U);
   }
   static unsigned operands(const User *U) {
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 60865aa8ad45..ff2a0ad5e4e9 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -99,19 +99,21 @@ public:
   /// hasNoSignedWrap - Test whether this operation is known to never
   /// undergo signed overflow, aka the nsw property.
   bool hasNoSignedWrap() const {
-    return SubclassOptionalData & NoSignedWrap;
+    return (SubclassOptionalData & NoSignedWrap) != 0;
   }
 
   static inline bool classof(const OverflowingBinaryOperator *) { return true; }
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Add ||
            I->getOpcode() == Instruction::Sub ||
-           I->getOpcode() == Instruction::Mul;
+           I->getOpcode() == Instruction::Mul ||
+           I->getOpcode() == Instruction::Shl;
   }
   static inline bool classof(const ConstantExpr *CE) {
     return CE->getOpcode() == Instruction::Add ||
            CE->getOpcode() == Instruction::Sub ||
-           CE->getOpcode() == Instruction::Mul;
+           CE->getOpcode() == Instruction::Mul ||
+           CE->getOpcode() == Instruction::Shl;
   }
   static inline bool classof(const Value *V) {
     return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
@@ -119,105 +121,97 @@ public:
   }
 };
 
-/// AddOperator - Utility class for integer addition operators.
-///
-class AddOperator : public OverflowingBinaryOperator {
-  ~AddOperator(); // do not implement
-public:
-  static inline bool classof(const AddOperator *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Add;
-  }
-  static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Instruction::Add;
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
-};
-
-/// SubOperator - Utility class for integer subtraction operators.
-///
-class SubOperator : public OverflowingBinaryOperator {
-  ~SubOperator(); // do not implement
-public:
-  static inline bool classof(const SubOperator *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Sub;
-  }
-  static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Instruction::Sub;
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
-};
-
-/// MulOperator - Utility class for integer multiplication operators.
-///
-class MulOperator : public OverflowingBinaryOperator {
-  ~MulOperator(); // do not implement
-public:
-  static inline bool classof(const MulOperator *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Mul;
-  }
-  static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Instruction::Mul;
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
-};
-
-/// SDivOperator - An Operator with opcode Instruction::SDiv.
-///
-class SDivOperator : public Operator {
+/// PossiblyExactOperator - A udiv or sdiv instruction, which can be marked as
+/// "exact", indicating that no bits are destroyed.
+class PossiblyExactOperator : public Operator {
 public:
   enum {
     IsExact = (1 << 0)
   };
-
-private:
-  ~SDivOperator(); // do not implement
-
+  
   friend class BinaryOperator;
   friend class ConstantExpr;
   void setIsExact(bool B) {
     SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
   }
-
+  
+private:
+  ~PossiblyExactOperator(); // do not implement
 public:
   /// isExact - Test whether this division is known to be exact, with
   /// zero remainder.
   bool isExact() const {
     return SubclassOptionalData & IsExact;
   }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const SDivOperator *) { return true; }
+  
+  static bool isPossiblyExactOpcode(unsigned OpC) {
+    return OpC == Instruction::SDiv ||
+           OpC == Instruction::UDiv ||
+           OpC == Instruction::AShr ||
+           OpC == Instruction::LShr;
+  }
   static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Instruction::SDiv;
+    return isPossiblyExactOpcode(CE->getOpcode());
   }
   static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::SDiv;
+    return isPossiblyExactOpcode(I->getOpcode());
   }
   static inline bool classof(const Value *V) {
     return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
            (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
   }
 };
+  
 
-class GEPOperator : public Operator {
+  
+/// ConcreteOperator - A helper template for defining operators for individual
+/// opcodes.
+template<typename SuperClass, unsigned Opc>
+class ConcreteOperator : public SuperClass {
+  ~ConcreteOperator(); // DO NOT IMPLEMENT
+public:
+  static inline bool classof(const ConcreteOperator<SuperClass, Opc> *) {
+    return true;
+  }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Opc;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Opc;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+class AddOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {};
+class SubOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {};
+class MulOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {};
+class ShlOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {};
+
+  
+class SDivOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {};
+class UDivOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {};
+class AShrOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {};
+class LShrOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {};
+  
+  
+  
+class GEPOperator
+  : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
   enum {
     IsInBounds = (1 << 0)
   };
 
-  ~GEPOperator(); // do not implement
-
   friend class GetElementPtrInst;
   friend class ConstantExpr;
   void setIsInBounds(bool B) {
@@ -266,8 +260,8 @@ public:
   /// value, just potentially different types.
   bool hasAllZeroIndices() const {
     for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
-      if (Constant *C = dyn_cast<Constant>(I))
-        if (C->isNullValue())
+      if (ConstantInt *C = dyn_cast<ConstantInt>(I))
+        if (C->isZero())
           continue;
       return false;
     }
@@ -284,21 +278,6 @@ public:
     }
     return true;
   }
-  
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const GEPOperator *) { return true; }
-  static inline bool classof(const GetElementPtrInst *) { return true; }
-  static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Instruction::GetElementPtr;
-  }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::GetElementPtr;
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index f4c6eed2cf9a..ed0fb39f5d6c 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -57,6 +57,7 @@ enum PassManagerType {
   PMT_CallGraphPassManager,  ///< CGPassManager
   PMT_FunctionPassManager,   ///< FPPassManager
   PMT_LoopPassManager,       ///< LPPassManager
+  PMT_RegionPassManager,     ///< RGPassManager
   PMT_BasicBlockPassManager, ///< BBPassManager
   PMT_Last
 };
@@ -64,13 +65,14 @@ enum PassManagerType {
 // Different types of passes.
 enum PassKind {
   PT_BasicBlock,
+  PT_Region,
   PT_Loop,
   PT_Function,
   PT_CallGraphSCC,
   PT_Module,
   PT_PassManager
 };
-  
+
 //===----------------------------------------------------------------------===//
 /// Pass interface - Implemented by all 'passes'.  Subclass this if you are an
 /// interprocedural optimization or you do not fit into any of the more
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index 17f4a0592fbb..c4f409ef525c 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -106,6 +106,7 @@ enum PassDebuggingString {
   ON_BASICBLOCK_MSG, // "'  on BasicBlock '" + PassName + "'...\n"
   ON_FUNCTION_MSG, // "' on Function '" + FunctionName + "'...\n"
   ON_MODULE_MSG, // "' on Module '" + ModuleName + "'...\n"
+  ON_REGION_MSG, // " 'on Region ...\n'"
   ON_LOOP_MSG, // " 'on Loop ...\n'"
   ON_CG_MSG // "' on Call Graph ...\n'"
 };  
@@ -184,10 +185,10 @@ public:
   void schedulePass(Pass *P);
 
   /// Set pass P as the last user of the given analysis passes.
-  void setLastUser(SmallVector<Pass *, 12> &AnalysisPasses, Pass *P);
+  void setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses, Pass *P);
 
   /// Collect passes whose last user is P
-  void collectLastUses(SmallVector<Pass *, 12> &LastUses, Pass *P);
+  void collectLastUses(SmallVectorImpl<Pass *> &LastUses, Pass *P);
 
   /// Find the pass that implements Analysis AID. Search immutable
   /// passes and all pass managers. If desired pass is not found
@@ -205,7 +206,7 @@ public:
     ImmutablePasses.push_back(P);
   }
 
-  inline SmallVector<ImmutablePass *, 8>& getImmutablePasses() {
+  inline SmallVectorImpl<ImmutablePass *>& getImmutablePasses() {
     return ImmutablePasses;
   }
 
@@ -313,8 +314,8 @@ public:
   /// Populate RequiredPasses with analysis pass that are required by
   /// pass P and are available. Populate ReqPassNotAvailable with analysis
   /// pass that are required by pass P but are not available.
-  void collectRequiredAnalysis(SmallVector<Pass *, 8> &RequiredPasses,
-                               SmallVector<AnalysisID, 8> &ReqPassNotAvailable,
+  void collectRequiredAnalysis(SmallVectorImpl<Pass *> &RequiredPasses,
+                               SmallVectorImpl<AnalysisID> &ReqPassNotAvailable,
                                Pass *P);
 
   /// All Required analyses should be available to the pass as it runs!  Here
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 59071391520a..5d89c492218d 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -8,61 +8,74 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines PassRegistry, a class that is used in the initialization
-// and registration of passes.  At initialization, passes are registered with
-// the PassRegistry, which is later provided to the PassManager for dependency
-// resolution and similar tasks.
+// and registration of passes.  At application startup, passes are registered
+// with the PassRegistry, which is later provided to the PassManager for 
+// dependency resolution and similar tasks.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_PASSREGISTRY_H
 #define LLVM_PASSREGISTRY_H
 
-#include "llvm/ADT/StringMap.h"
-#include "llvm/System/DataTypes.h"
-#include "llvm/System/Mutex.h"
-#include <map>
-#include <set>
-#include <vector>
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
 class PassInfo;
 struct PassRegistrationListener;
 
+/// PassRegistry - This class manages the registration and intitialization of
+/// the pass subsystem as application startup, and assists the PassManager
+/// in resolving pass dependencies.
+/// NOTE: PassRegistry is NOT thread-safe.  If you want to use LLVM on multiple
+/// threads simultaneously, you will need to use a separate PassRegistry on
+/// each thread.
 class PassRegistry {
-  /// Guards the contents of this class.
-  mutable sys::SmartMutex<true> Lock;
-  
-  /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
-  typedef std::map<const void*, const PassInfo*> MapType;
-  MapType PassInfoMap;
-  
-  typedef StringMap<const PassInfo*> StringMapType;
-  StringMapType PassInfoStringMap;
-  
-  /// AnalysisGroupInfo - Keep track of information for each analysis group.
-  struct AnalysisGroupInfo {
-    std::set<const PassInfo *> Implementations;
-  };
-  std::map<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
-  
-  std::vector<PassRegistrationListener*> Listeners;
-
+  mutable void *pImpl;
+  void *getImpl() const;
+   
 public:
+  PassRegistry() : pImpl(0) { }
+  ~PassRegistry();
+  
+  /// getPassRegistry - Access the global registry object, which is 
+  /// automatically initialized at application launch and destroyed by
+  /// llvm_shutdown.
   static PassRegistry *getPassRegistry();
   
+  /// getPassInfo - Look up a pass' corresponding PassInfo, indexed by the pass'
+  /// type identifier (&MyPass::ID).
   const PassInfo *getPassInfo(const void *TI) const;
+  
+  /// getPassInfo - Look up a pass' corresponding PassInfo, indexed by the pass'
+  /// argument string.
   const PassInfo *getPassInfo(StringRef Arg) const;
   
-  void registerPass(const PassInfo &PI);
+  /// registerPass - Register a pass (by means of its PassInfo) with the 
+  /// registry.  Required in order to use the pass with a PassManager.
+  void registerPass(const PassInfo &PI, bool ShouldFree = false);
+  
+  /// registerPass - Unregister a pass (by means of its PassInfo) with the 
+  /// registry.
   void unregisterPass(const PassInfo &PI);
   
-  /// Analysis Group Mechanisms.
+  /// registerAnalysisGroup - Register an analysis group (or a pass implementing
+  // an analysis group) with the registry.  Like registerPass, this is required 
+  // in order for a PassManager to be able to use this group/pass.
   void registerAnalysisGroup(const void *InterfaceID, const void *PassID,
-                             PassInfo& Registeree, bool isDefault);
+                             PassInfo& Registeree, bool isDefault,
+                             bool ShouldFree = false);
   
+  /// enumerateWith - Enumerate the registered passes, calling the provided
+  /// PassRegistrationListener's passEnumerate() callback on each of them.
   void enumerateWith(PassRegistrationListener *L);
-  void addRegistrationListener(PassRegistrationListener* L);
+  
+  /// addRegistrationListener - Register the given PassRegistrationListener
+  /// to receive passRegistered() callbacks whenever a new pass is registered.
+  void addRegistrationListener(PassRegistrationListener *L);
+  
+  /// removeRegistrationListener - Unregister a PassRegistrationListener so that
+  /// it no longer receives passRegistered() callbacks.
   void removeRegistrationListener(PassRegistrationListener *L);
 };
 
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index 0f559d6df736..082790956c46 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -23,6 +23,9 @@
 
 #include "Pass.h"
 #include "llvm/PassRegistry.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Atomic.h"
+#include <vector>
 
 namespace llvm {
 
@@ -54,17 +57,14 @@ public:
            NormalCtor_t normal, bool isCFGOnly, bool is_analysis)
     : PassName(name), PassArgument(arg), PassID(pi), 
       IsCFGOnlyPass(isCFGOnly), 
-      IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal) {
-    PassRegistry::getPassRegistry()->registerPass(*this);
-  }
+      IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal) { }
   /// PassInfo ctor - Do not call this directly, this should only be invoked
   /// through RegisterPass. This version is for use by analysis groups; it
   /// does not auto-register the pass.
   PassInfo(const char *name, const void *pi)
     : PassName(name), PassArgument(""), PassID(pi), 
       IsCFGOnlyPass(false), 
-      IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(0) {
-  }
+      IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(0) { }
 
   /// getPassName - Return the friendly name for the pass, never returns null
   ///
@@ -129,8 +129,50 @@ private:
   PassInfo(const PassInfo &);       // do not implement
 };
 
+#define CALL_ONCE_INITIALIZATION(function) \
+  static volatile sys::cas_flag initialized = 0; \
+  sys::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); \
+  if (old_val == 0) { \
+    function(Registry); \
+    sys::MemoryFence(); \
+    initialized = 2; \
+  } else { \
+    sys::cas_flag tmp = initialized; \
+    sys::MemoryFence(); \
+    while (tmp != 2) { \
+      tmp = initialized; \
+      sys::MemoryFence(); \
+    } \
+  }
+
 #define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \
-  static RegisterPass<passName> passName ## _info(arg, name, cfg, analysis)
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
+    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    return PI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
+
+#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis) \
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) {
+
+#define INITIALIZE_PASS_DEPENDENCY(depName) \
+    initialize##depName##Pass(Registry);
+#define INITIALIZE_AG_DEPENDENCY(depName) \
+    initialize##depName##AnalysisGroup(Registry);
+
+#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis) \
+    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    return PI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
 
 template<typename PassName>
 Pass *callDefaultCtor() { return new PassName(); }
@@ -161,7 +203,7 @@ struct RegisterPass : public PassInfo {
     : PassInfo(Name, PassArg, &passName::ID,
                PassInfo::NormalCtor_t(callDefaultCtor<passName>),
                CFGOnly, is_analysis) {
-    
+    PassRegistry::getPassRegistry()->registerPass(*this);
   }
 };
 
@@ -186,7 +228,7 @@ struct RegisterPass : public PassInfo {
 /// a nice name with the interface.
 ///
 class RegisterAGBase : public PassInfo {
-protected:
+public:
   RegisterAGBase(const char *Name,
                  const void *InterfaceID,
                  const void *PassID = 0,
@@ -206,9 +248,52 @@ struct RegisterAnalysisGroup : public RegisterAGBase {
   }
 };
 
+#define INITIALIZE_ANALYSIS_GROUP(agName, name, defaultPass) \
+  static void* initialize##agName##AnalysisGroupOnce(PassRegistry &Registry) { \
+    initialize##defaultPass##Pass(Registry); \
+    PassInfo *AI = new PassInfo(name, & agName :: ID); \
+    Registry.registerAnalysisGroup(& agName ::ID, 0, *AI, false, true); \
+    return AI; \
+  } \
+  void llvm::initialize##agName##AnalysisGroup(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##agName##AnalysisGroupOnce) \
+  }
+
+
 #define INITIALIZE_AG_PASS(passName, agName, arg, name, cfg, analysis, def) \
-  static RegisterPass<passName> passName ## _info(arg, name, cfg, analysis); \
-  static RegisterAnalysisGroup<agName, def> passName ## _ag(passName ## _info)
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
+    if (!def) initialize##agName##AnalysisGroup(Registry); \
+    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    \
+    PassInfo *AI = new PassInfo(name, & agName :: ID); \
+    Registry.registerAnalysisGroup(& agName ::ID, & passName ::ID, \
+                                   *AI, def, true); \
+    return AI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
+
+
+#define INITIALIZE_AG_PASS_BEGIN(passName, agName, arg, n, cfg, analysis, def) \
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
+    if (!def) initialize##agName##AnalysisGroup(Registry);
+
+#define INITIALIZE_AG_PASS_END(passName, agName, arg, n, cfg, analysis, def) \
+    PassInfo *PI = new PassInfo(n, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    \
+    PassInfo *AI = new PassInfo(n, & agName :: ID); \
+    Registry.registerAnalysisGroup(& agName ::ID, & passName ::ID, \
+                                   *AI, def, true); \
+    return AI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
 
 //===---------------------------------------------------------------------------
 /// PassRegistrationListener class - This class is meant to be derived from by
diff --git a/include/llvm/Support/AIXDataTypesFix.h b/include/llvm/Support/AIXDataTypesFix.h
new file mode 100644
index 000000000000..a9a9147de294
--- /dev/null
+++ b/include/llvm/Support/AIXDataTypesFix.h
@@ -0,0 +1,25 @@
+//===-- llvm/Support/AIXDataTypesFix.h - Fix datatype defs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file overrides default system-defined types and limits which cannot be
+// done in DataTypes.h.in because it is processed by autoheader first, which
+// comments out any #undef statement
+//
+//===----------------------------------------------------------------------===//
+
+// No include guards desired!
+
+#ifndef SUPPORT_DATATYPES_H
+#error "AIXDataTypesFix.h must only be included via DataTypes.h!"
+#endif
+
+// GCC is strict about defining large constants: they must have LL modifier.
+// These will be defined properly at the end of DataTypes.h
+#undef INT64_MAX
+#undef INT64_MIN
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index 6a7a1a6bd223..cebfa7982d6d 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -49,12 +49,12 @@ struct AlignOf {
 
 };
 
-/// alignof - A templated function that returns the mininum alignment of
+/// alignOf - A templated function that returns the minimum alignment of
 ///  of a type.  This provides no extra functionality beyond the AlignOf
 ///  class besides some cosmetic cleanliness.  Example usage:
-///  alignof<int>() returns the alignment of an int.
+///  alignOf<int>() returns the alignment of an int.
 template <typename T>
-static inline unsigned alignof() { return AlignOf<T>::Alignment; }
+static inline unsigned alignOf() { return AlignOf<T>::Alignment; }
 
 } // end namespace llvm
 #endif
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index 4a7251fa1ef3..c6807099f85e 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -16,13 +16,15 @@
 
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdlib>
 #include <cstddef>
 
 namespace llvm {
+template <typename T> struct ReferenceAdder { typedef T& result; };
+template <typename T> struct ReferenceAdder<T&> { typedef T result; };
 
 class MallocAllocator {
 public:
@@ -201,7 +203,7 @@ public:
       char *End = Slab == Allocator.CurSlab ? Allocator.CurPtr :
                                               (char *)Slab + Slab->Size;
       for (char *Ptr = (char*)(Slab+1); Ptr < End; Ptr += sizeof(T)) {
-        Ptr = Allocator.AlignPtr(Ptr, alignof<T>());
+        Ptr = Allocator.AlignPtr(Ptr, alignOf<T>());
         if (Ptr + sizeof(T) <= End)
           reinterpret_cast<T*>(Ptr)->~T();
       }
@@ -221,16 +223,12 @@ public:
 inline void *operator new(size_t Size, llvm::BumpPtrAllocator &Allocator) {
   struct S {
     char c;
-#ifdef __GNUC__
-    char x __attribute__((aligned));
-#else
     union {
       double D;
       long double LD;
       long long L;
       void *P;
     } x;
-#endif
   };
   return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size),
                                            offsetof(S, x)));
diff --git a/include/llvm/Support/Atomic.h b/include/llvm/Support/Atomic.h
new file mode 100644
index 000000000000..1a6c606aa5f6
--- /dev/null
+++ b/include/llvm/Support/Atomic.h
@@ -0,0 +1,39 @@
+//===- llvm/Support/Atomic.h - Atomic Operations -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_ATOMIC_H
+#define LLVM_SYSTEM_ATOMIC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  namespace sys {
+    void MemoryFence();
+
+#ifdef _MSC_VER
+    typedef long cas_flag;
+#else
+    typedef uint32_t cas_flag;
+#endif
+    cas_flag CompareAndSwap(volatile cas_flag* ptr,
+                            cas_flag new_value,
+                            cas_flag old_value);
+    cas_flag AtomicIncrement(volatile cas_flag* ptr);
+    cas_flag AtomicDecrement(volatile cas_flag* ptr);
+    cas_flag AtomicAdd(volatile cas_flag* ptr, cas_flag val);
+    cas_flag AtomicMul(volatile cas_flag* ptr, cas_flag val);
+    cas_flag AtomicDiv(volatile cas_flag* ptr, cas_flag val);
+  }
+}
+
+#endif
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 78254ae9921f..673925593e6a 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -11,7 +11,7 @@
 //
 // Structures and enums defined within this file where created using
 // information from Microsoft's publicly available PE/COFF format document:
-// 
+//
 // Microsoft Portable Executable and Common Object File Format Specification
 // Revision 8.1 - February 15, 2008
 //
@@ -23,7 +23,7 @@
 #ifndef LLVM_SUPPORT_WIN_COFF_H
 #define LLVM_SUPPORT_WIN_COFF_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cstring>
 
 namespace llvm {
@@ -69,7 +69,7 @@ namespace COFF {
     SF_ClassMask = 0x00FF0000,
     SF_ClassShift = 16,
 
-    SF_WeakReference = 0x01000000
+    SF_WeakExternal = 0x01000000
   };
 
   enum SymbolSectionNumber {
@@ -133,13 +133,13 @@ namespace COFF {
   };
 
   enum SymbolComplexType {
-    IMAGE_SYM_DTYPE_NULL     = 0, ///< No complex type; simple scalar variable. 
+    IMAGE_SYM_DTYPE_NULL     = 0, ///< No complex type; simple scalar variable.
     IMAGE_SYM_DTYPE_POINTER  = 1, ///< A pointer to base type.
     IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
     IMAGE_SYM_DTYPE_ARRAY    = 3, ///< An array of base type.
-    
+
     /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
-    SCT_COMPLEX_TYPE_SHIFT   = 8
+    SCT_COMPLEX_TYPE_SHIFT   = 4
   };
 
   struct section {
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index 9b6a4098b617..8a998a8cd0d1 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -52,12 +52,7 @@ public:
   CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
   CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
   CallSiteBase(ValTy *II) { *this = get(II); }
-  CallSiteBase(InstrTy *II) {
-    assert(II && "Null instruction given?");
-    *this = get(II);
-    assert(I.getPointer() && "Not a call?");
-  }
-
+protected:
   /// CallSiteBase::get - This static method is sort of like a constructor.  It
   /// will create an appropriate call site for a Call or Invoke instruction, but
   /// it can also create a null initialized CallSiteBase object for something
@@ -72,7 +67,7 @@ public:
     }
     return CallSiteBase();
   }
-
+public:
   /// isCall - true if a CallInst is enclosed.
   /// Note that !isCall() does not mean it is an InvokeInst enclosed,
   /// it also could signify a NULL Instruction pointer.
@@ -282,16 +277,6 @@ public:
 
   bool operator==(const CallSite &CS) const { return I == CS.I; }
   bool operator!=(const CallSite &CS) const { return I != CS.I; }
-
-  /// CallSite::get - This static method is sort of like a constructor.  It will
-  /// create an appropriate call site for a Call or Invoke instruction, but it
-  /// can also create a null initialized CallSite object for something which is
-  /// NOT a call site.
-  ///
-  static CallSite get(Value *V) {
-    return Base::get(V);
-  }
-
   bool operator<(const CallSite &CS) const {
     return getInstruction() < CS.getInstruction();
   }
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index c589171bbafe..6bb98064382e 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -232,8 +232,8 @@ inline typename cast_retty<X, Y>::ret_type dyn_cast(const Y &Val) {
 // value is accepted.
 //
 template <class X, class Y>
-inline typename cast_retty<X, Y>::ret_type dyn_cast_or_null(const Y &Val) {
-  return (Val && isa<X>(Val)) ? cast<X, Y>(Val) : 0;
+inline typename cast_retty<X, Y*>::ret_type dyn_cast_or_null(Y *Val) {
+  return (Val && isa<X>(Val)) ? cast<X>(Val) : 0;
 }
 
 } // End llvm namespace
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 14b36f80522d..67f0fd7e0dc6 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -15,48 +15,50 @@
 #ifndef LLVM_SUPPORT_COMPILER_H
 #define LLVM_SUPPORT_COMPILER_H
 
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
 /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
 /// into a shared library, then the class should be private to the library and
 /// not accessible from outside it.  Can also be used to mark variables and
 /// functions, making them private to any shared library they are linked into.
-
-/// LLVM_GLOBAL_VISIBILITY - If a class marked with this attribute is linked
-/// into a shared library, then the class will be accessible from outside the
-/// the library.  Can also be used to mark variables and functions, making them
-/// accessible from outside any shared library they are linked into.
-#if defined(__MINGW32__) || defined(__CYGWIN__)
-#define LLVM_LIBRARY_VISIBILITY
-#define LLVM_GLOBAL_VISIBILITY __declspec(dllexport)
-#elif (__GNUC__ >= 4)
+#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
 #define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
-#define LLVM_GLOBAL_VISIBILITY __attribute__ ((visibility("default")))
 #else
 #define LLVM_LIBRARY_VISIBILITY
-#define LLVM_GLOBAL_VISIBILITY
 #endif
 
 #if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
-#define ATTRIBUTE_USED __attribute__((__used__))
+#define LLVM_ATTRIBUTE_USED __attribute__((__used__))
 #else
-#define ATTRIBUTE_USED
+#define LLVM_ATTRIBUTE_USED
 #endif
 
+// Some compilers warn about unused functions. When a function is sometimes
+// used or not depending on build settings (e.g. a function only called from
+// within "assert"), this attribute can be used to suppress such warnings.
+//
+// However, it shouldn't be used for unused *variables*, as those have a much
+// more portable solution:
+//   (void)unused_var_name;
+// Prefer cast-to-void wherever it is sufficient.
 #if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
-#define ATTRIBUTE_UNUSED __attribute__((__unused__))
+#define LLVM_ATTRIBUTE_UNUSED __attribute__((__unused__))
 #else
-#define ATTRIBUTE_UNUSED
+#define LLVM_ATTRIBUTE_UNUSED
 #endif
 
 #ifdef __GNUC__ // aka 'ATTRIBUTE_CONST' but following LLVM Conventions.
-#define ATTRIBUTE_READNONE __attribute__((__const__))
+#define LLVM_ATTRIBUTE_READNONE __attribute__((__const__))
 #else
-#define ATTRIBUTE_READNONE
+#define LLVM_ATTRIBUTE_READNONE
 #endif
 
 #ifdef __GNUC__  // aka 'ATTRIBUTE_PURE' but following LLVM Conventions.
-#define ATTRIBUTE_READONLY __attribute__((__pure__))
+#define LLVM_ATTRIBUTE_READONLY __attribute__((__pure__))
 #else
-#define ATTRIBUTE_READONLY
+#define LLVM_ATTRIBUTE_READONLY
 #endif
 
 #if (__GNUC__ >= 4)
@@ -78,34 +80,50 @@
 #define TEMPLATE_INSTANTIATION(X)
 #endif
 
-// DISABLE_INLINE - On compilers where we have a directive to do so, mark a
-// method "not for inlining".
+// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
+// mark a method "not for inlining".
 #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-#define DISABLE_INLINE __attribute__((noinline))
+#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
-#define DISABLE_INLINE __declspec(noinline)
+#define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
 #else
-#define DISABLE_INLINE
+#define LLVM_ATTRIBUTE_NOINLINE
 #endif
 
-// ALWAYS_INLINE - On compilers where we have a directive to do so, mark a
-// method "always inline" because it is performance sensitive.
-// GCC 3.4 supported this but is buggy in various cases and produces
-// unimplemented errors, just use it in GCC 4.0 and later.
+// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
+// so, mark a method "always inline" because it is performance sensitive. GCC
+// 3.4 supported this but is buggy in various cases and produces unimplemented
+// errors, just use it in GCC 4.0 and later.
 #if __GNUC__ > 3
-#define ALWAYS_INLINE __attribute__((always_inline))
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
 #else
-// TODO: No idea how to do this with MSVC.
-#define ALWAYS_INLINE
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE
 #endif
 
 
 #ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
+#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define LLVM_ATTRIBUTE_NORETURN
+#endif
+
+// LLVM_ATTRIBUTE_DEPRECATED(decl, "message")
+#if __has_feature(attribute_deprecated_with_message)
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  decl __attribute__((deprecated(message)))
+#elif defined(__GNUC__)
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  decl __attribute__((deprecated))
 #elif defined(_MSC_VER)
-#define NORETURN __declspec(noreturn)
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  __declspec(deprecated(message)) decl
 #else
-#define NORETURN
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  decl
 #endif
 
 #endif
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index ea6c5fd82a08..bd3765d592db 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -33,50 +33,34 @@ public:
   // Binary Operators
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getAdd(LHS, RHS);
-  }
-  Constant *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getNSWAdd(LHS, RHS);
-  }
-  Constant *CreateNUWAdd(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getNUWAdd(LHS, RHS);
+  Constant *CreateAdd(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getAdd(LHS, RHS, HasNUW, HasNSW);
   }
   Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFAdd(LHS, RHS);
   }
-  Constant *CreateSub(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getSub(LHS, RHS);
-  }
-  Constant *CreateNSWSub(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getNSWSub(LHS, RHS);
-  }
-  Constant *CreateNUWSub(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getNUWSub(LHS, RHS);
+  Constant *CreateSub(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW);
   }
   Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFSub(LHS, RHS);
   }
-  Constant *CreateMul(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getMul(LHS, RHS);
-  }
-  Constant *CreateNSWMul(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getNSWMul(LHS, RHS);
-  }
-  Constant *CreateNUWMul(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getNUWMul(LHS, RHS);
+  Constant *CreateMul(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW);
   }
   Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFMul(LHS, RHS);
   }
-  Constant *CreateUDiv(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getUDiv(LHS, RHS);
-  }
-  Constant *CreateSDiv(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getSDiv(LHS, RHS);
+  Constant *CreateUDiv(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getUDiv(LHS, RHS, isExact);
   }
-  Constant *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getExactSDiv(LHS, RHS);
+  Constant *CreateSDiv(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getSDiv(LHS, RHS, isExact);
   }
   Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFDiv(LHS, RHS);
@@ -90,14 +74,17 @@ public:
   Constant *CreateFRem(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFRem(LHS, RHS);
   }
-  Constant *CreateShl(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getShl(LHS, RHS);
+  Constant *CreateShl(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW);
   }
-  Constant *CreateLShr(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getLShr(LHS, RHS);
+  Constant *CreateLShr(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getLShr(LHS, RHS, isExact);
   }
-  Constant *CreateAShr(Constant *LHS, Constant *RHS) const {
-    return ConstantExpr::getAShr(LHS, RHS);
+  Constant *CreateAShr(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getAShr(LHS, RHS, isExact);
   }
   Constant *CreateAnd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getAnd(LHS, RHS);
@@ -118,14 +105,9 @@ public:
   // Unary Operators
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateNeg(Constant *C) const {
-    return ConstantExpr::getNeg(C);
-  }
-  Constant *CreateNSWNeg(Constant *C) const {
-    return ConstantExpr::getNSWNeg(C);
-  }
-  Constant *CreateNUWNeg(Constant *C) const {
-    return ConstantExpr::getNUWNeg(C);
+  Constant *CreateNeg(Constant *C,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getNeg(C, HasNUW, HasNSW);
   }
   Constant *CreateFNeg(Constant *C) const {
     return ConstantExpr::getFNeg(C);
diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h
index 29086b2ac4f2..ced3a2cf2dbd 100644
--- a/include/llvm/Support/ConstantRange.h
+++ b/include/llvm/Support/ConstantRange.h
@@ -33,7 +33,7 @@
 #define LLVM_SUPPORT_CONSTANT_RANGE_H
 
 #include "llvm/ADT/APInt.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -54,7 +54,7 @@ public:
   /// @brief Initialize a range of values explicitly. This will assert out if
   /// Lower==Upper and Lower != Min or Max value for its type. It will also
   /// assert out if the two APInt's are not the same bit width.
-  ConstantRange(const APInt& Lower, const APInt& Upper);
+  ConstantRange(const APInt &Lower, const APInt &Upper);
 
   /// makeICmpRegion - Produce the smallest range that contains all values that
   /// might satisfy the comparison specified by Pred when compared to any value
@@ -92,6 +92,11 @@ public:
   ///
   bool isWrappedSet() const;
 
+  /// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+  /// its bitwidth, for example: i8 [120, 140).
+  ///
+  bool isSignWrappedSet() const;
+
   /// contains - Return true if the specified value is in the set.
   ///
   bool contains(const APInt &Val) const;
@@ -219,6 +224,14 @@ public:
   /// \p Other.
   ConstantRange udiv(const ConstantRange &Other) const;
 
+  /// binaryAnd - return a new range representing the possible values resulting
+  /// from a binary-and of a value in this range by a value in \p Other.
+  ConstantRange binaryAnd(const ConstantRange &Other) const;
+
+  /// binaryOr - return a new range representing the possible values resulting
+  /// from a binary-or of a value in this range by a value in \p Other.
+  ConstantRange binaryOr(const ConstantRange &Other) const;
+
   /// shl - Return a new range representing the possible values resulting
   /// from a left shift of a value in this range by a value in \p Other.
   /// TODO: This isn't fully implemented yet.
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
index d66609fddfec..2e9b5d4aa541 100644
--- a/include/llvm/Support/CrashRecoveryContext.h
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -67,6 +67,14 @@ public:
   /// the backtrace of the crash on failures.
   bool RunSafely(void (*Fn)(void*), void *UserData);
 
+  /// \brief Execute the provide callback function (with the given arguments) in
+  /// a protected context which is run in another thread (optionally with a
+  /// requested stack size).
+  ///
+  /// See RunSafely() and llvm_execute_on_thread().
+  bool RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+                         unsigned RequestedStackSize = 0);
+
   /// \brief Explicitly trigger a crash recovery in the current process, and
   /// return failure from RunSafely(). This function does not return.
   void HandleCrash();
diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake
new file mode 100644
index 000000000000..72c451873c0f
--- /dev/null
+++ b/include/llvm/Support/DataTypes.h.cmake
@@ -0,0 +1,189 @@
+/*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functinons.                        *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
+
+#ifndef SUPPORT_DATATYPES_H
+#define SUPPORT_DATATYPES_H
+
+#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
+#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
+#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
+#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
+#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+#ifndef _MSC_VER
+
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
+#if !defined(__STDC_LIMIT_MACROS)
+# error "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
+#endif
+
+#if !defined(__STDC_CONSTANT_MACROS)
+# error "Must #define __STDC_CONSTANT_MACROS before " \
+        "#including Support/DataTypes.h"
+#endif
+
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#ifdef _AIX
+#include "llvm/Support/AIXDataTypesFix.h"
+#endif
+
+/* Handle incorrect definition of uint64_t as u_int64_t */
+#ifndef HAVE_UINT64_T
+#ifdef HAVE_U_INT64_T
+typedef u_int64_t uint64_t;
+#else
+# error "Don't have a definition for uint64_t on this platform"
+#endif
+#endif
+
+#ifdef _OpenBSD_
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#endif
+
+#else /* _MSC_VER */
+/* Visual C++ doesn't provide standard integer headers, but it does provide
+   built-in data types. */
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/types.h>
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed int ssize_t;
+#ifndef INT8_MAX
+# define INT8_MAX 127
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN -128
+#endif
+#ifndef UINT8_MAX
+# define UINT8_MAX 255
+#endif
+#ifndef INT16_MAX
+# define INT16_MAX 32767
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN -32768
+#endif
+#ifndef UINT16_MAX
+# define UINT16_MAX 65535
+#endif
+#ifndef INT32_MAX
+# define INT32_MAX 2147483647
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN -2147483648
+#endif
+#ifndef UINT32_MAX
+# define UINT32_MAX 4294967295U
+#endif
+/* Certain compatibility updates to VC++ introduce the `cstdint'
+ * header, which defines the INT*_C macros. On default installs they
+ * are absent. */
+#ifndef INT8_C
+# define INT8_C(C)   C##i8
+#endif
+#ifndef UINT8_C
+# define UINT8_C(C)  C##ui8
+#endif
+#ifndef INT16_C
+# define INT16_C(C)  C##i16
+#endif
+#ifndef UINT16_C
+# define UINT16_C(C) C##ui16
+#endif
+#ifndef INT32_C
+# define INT32_C(C)  C##i32
+#endif
+#ifndef UINT32_C
+# define UINT32_C(C) C##ui32
+#endif
+#ifndef INT64_C
+# define INT64_C(C)  C##i64
+#endif
+#ifndef UINT64_C
+# define UINT64_C(C) C##ui64
+#endif
+#endif /* _MSC_VER */
+
+/* Set defaults for constants which we cannot find. */
+#if !defined(INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+#endif
+#if !defined(INT64_MIN)
+# define INT64_MIN ((-INT64_MAX)-1)
+#endif
+#if !defined(UINT64_MAX)
+# define UINT64_MAX 0xffffffffffffffffULL
+#endif
+
+#if __GNUC__ > 3
+#define END_WITH_NULL __attribute__((sentinel))
+#else
+#define END_WITH_NULL
+#endif
+
+#ifndef HUGE_VALF
+#define HUGE_VALF (float)HUGE_VAL
+#endif
+
+#endif  /* SUPPORT_DATATYPES_H */
diff --git a/include/llvm/Support/DataTypes.h.in b/include/llvm/Support/DataTypes.h.in
new file mode 100644
index 000000000000..5965e8c0b2a9
--- /dev/null
+++ b/include/llvm/Support/DataTypes.h.in
@@ -0,0 +1,111 @@
+/*===-- include/System/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functions.                         *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
+
+#ifndef SUPPORT_DATATYPES_H
+#define SUPPORT_DATATYPES_H
+
+#undef HAVE_SYS_TYPES_H
+#undef HAVE_INTTYPES_H
+#undef HAVE_STDINT_H
+#undef HAVE_UINT64_T
+#undef HAVE_U_INT64_T
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
+#if !defined(__STDC_LIMIT_MACROS)
+# error "Must #define __STDC_LIMIT_MACROS before #including System/DataTypes.h"
+#endif
+
+#if !defined(__STDC_CONSTANT_MACROS)
+# error "Must #define __STDC_CONSTANT_MACROS before " \
+        "#including System/DataTypes.h"
+#endif
+
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#ifdef _AIX
+#include "llvm/Support/AIXDataTypesFix.h"
+#endif
+
+/* Handle incorrect definition of uint64_t as u_int64_t */
+#ifndef HAVE_UINT64_T
+#ifdef HAVE_U_INT64_T
+typedef u_int64_t uint64_t;
+#else
+# error "Don't have a definition for uint64_t on this platform"
+#endif
+#endif
+
+#ifdef _OpenBSD_
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#endif
+
+/* Set defaults for constants which we cannot find. */
+#if !defined(INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+#endif
+#if !defined(INT64_MIN)
+# define INT64_MIN ((-INT64_MAX)-1)
+#endif
+#if !defined(UINT64_MAX)
+# define UINT64_MAX 0xffffffffffffffffULL
+#endif
+
+#if __GNUC__ > 3
+#define END_WITH_NULL __attribute__((sentinel))
+#else
+#define END_WITH_NULL
+#endif
+
+#ifndef HUGE_VALF
+#define HUGE_VALF (float)HUGE_VAL
+#endif
+
+#endif  /* SUPPORT_DATATYPES_H */
diff --git a/include/llvm/Support/Disassembler.h b/include/llvm/Support/Disassembler.h
new file mode 100644
index 000000000000..6d1cc0fdcb50
--- /dev/null
+++ b/include/llvm/Support/Disassembler.h
@@ -0,0 +1,35 @@
+//===- llvm/Support/Disassembler.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_DISASSEMBLER_H
+#define LLVM_SYSTEM_DISASSEMBLER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+/// This function returns true, if there is possible to use some external
+/// disassembler library. False otherwise.
+bool hasDisassembler();
+
+/// This function provides some "glue" code to call external disassembler
+/// libraries.
+std::string disassembleBuffer(uint8_t* start, size_t length, uint64_t pc = 0);
+
+}
+}
+
+#endif // LLVM_SYSTEM_DISASSEMBLER_H
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 3ca8d96dfc37..5d0b5a943d56 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -22,7 +22,8 @@ namespace llvm {
 // Debug info constants.
 
 enum {
-  LLVMDebugVersion = (8 << 16),         // Current version of debug information.
+  LLVMDebugVersion = (9 << 16),         // Current version of debug information.
+  LLVMDebugVersion8 = (8 << 16),         // Cconstant for version 8.
   LLVMDebugVersion7 = (7 << 16),        // Constant for version 7.
   LLVMDebugVersion6 = (6 << 16),        // Constant for version 6.
   LLVMDebugVersion5 = (5 << 16),        // Constant for version 5.
@@ -44,11 +45,9 @@ enum llvm_dwarf_constants {
   // llvm mock tags
   DW_TAG_invalid = ~0U,                 // Tag for invalid results.
 
-  DW_TAG_anchor = 0,                    // Tag for descriptor anchors.
   DW_TAG_auto_variable = 0x100,         // Tag for local (auto) variables.
   DW_TAG_arg_variable = 0x101,          // Tag for argument variables.
   DW_TAG_return_variable = 0x102,       // Tag for return variables.
-
   DW_TAG_vector_type = 0x103,           // Tag for vector types.
 
   DW_TAG_user_base = 0x1000,            // Recommended base for user tags.
@@ -118,6 +117,7 @@ enum dwarf_constants {
   DW_TAG_imported_unit = 0x3d,
   DW_TAG_condition = 0x3f,
   DW_TAG_shared_type = 0x40,
+  DW_TAG_rvalue_reference_type = 0x41,
   DW_TAG_lo_user = 0x4080,
   DW_TAG_hi_user = 0xffff,
 
@@ -509,6 +509,7 @@ enum dwarf_constants {
   DW_DSC_range = 0x01,
 
   // Line Number Standard Opcode Encodings
+  DW_LNS_extended_op = 0x00,
   DW_LNS_copy = 0x01,
   DW_LNS_advance_pc = 0x02,
   DW_LNS_advance_line = 0x03,
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
new file mode 100644
index 000000000000..e6d9ff57ae83
--- /dev/null
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -0,0 +1,86 @@
+//===-- llvm/Support/DynamicLibrary.h - Portable Dynamic Library -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the sys::DynamicLibrary class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_DYNAMIC_LIBRARY_H
+#define LLVM_SYSTEM_DYNAMIC_LIBRARY_H
+
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+  /// This class provides a portable interface to dynamic libraries which also
+  /// might be known as shared libraries, shared objects, dynamic shared
+  /// objects, or dynamic link libraries. Regardless of the terminology or the
+  /// operating system interface, this class provides a portable interface that
+  /// allows dynamic libraries to be loaded and searched for externally
+  /// defined symbols. This is typically used to provide "plug-in" support.
+  /// It also allows for symbols to be defined which don't live in any library,
+  /// but rather the main program itself, useful on Windows where the main
+  /// executable cannot be searched.
+  class DynamicLibrary {
+    DynamicLibrary(); // DO NOT IMPLEMENT
+  public:
+    /// This function allows a library to be loaded without instantiating a
+    /// DynamicLibrary object. Consequently, it is marked as being permanent
+    /// and will only be unloaded when the program terminates.  This returns
+    /// false on success or returns true and fills in *ErrMsg on failure.
+    /// @brief Open a dynamic library permanently.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static bool LoadLibraryPermanently(const char *filename,
+                                       std::string *ErrMsg = 0);
+
+    /// This function will search through all previously loaded dynamic
+    /// libraries for the symbol \p symbolName. If it is found, the addressof
+    /// that symbol is returned. If not, null is returned. Note that this will
+    /// search permanently loaded libraries (LoadLibraryPermanently) as well
+    /// as ephemerally loaded libraries (constructors).
+    /// @throws std::string on error.
+    /// @brief Search through libraries for address of a symbol
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void *SearchForAddressOfSymbol(const char *symbolName);
+
+    /// @brief Convenience function for C++ophiles.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void *SearchForAddressOfSymbol(const std::string &symbolName) {
+      return SearchForAddressOfSymbol(symbolName.c_str());
+    }
+
+    /// This functions permanently adds the symbol \p symbolName with the
+    /// value \p symbolValue.  These symbols are searched before any
+    /// libraries.
+    /// @brief Add searchable symbol/value pair.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void AddSymbol(const char *symbolName, void *symbolValue);
+
+    /// @brief Convenience function for C++ophiles.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void AddSymbol(const std::string &symbolName, void *symbolValue) {
+      AddSymbol(symbolName.c_str(), symbolValue);
+    }
+  };
+
+} // End sys namespace
+} // End llvm namespace
+
+#endif // LLVM_SYSTEM_DYNAMIC_LIBRARY_H
diff --git a/include/llvm/Support/DynamicLinker.h b/include/llvm/Support/DynamicLinker.h
deleted file mode 100644
index b60ffa875c63..000000000000
--- a/include/llvm/Support/DynamicLinker.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- llvm/Support/DynamicLinker.h - Portable Dynamic Linker --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Lightweight interface to dynamic library linking and loading, and dynamic
-// symbol lookup functionality, in whatever form the operating system
-// provides it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_DYNAMICLINKER_H
-#define LLVM_SUPPORT_DYNAMICLINKER_H
-
-#include <string>
-
-namespace llvm {
-
-/// LinkDynamicObject - Load the named file as a dynamic library
-/// and link it with the currently running process. Returns false
-/// on success, true if there is an error (and sets ErrorMessage
-/// if it is not NULL). Analogous to dlopen().
-///
-bool LinkDynamicObject (const char *filename, std::string *ErrorMessage);
-
-/// GetAddressOfSymbol - Returns the address of the named symbol in
-/// the currently running process, as reported by the dynamic linker,
-/// or NULL if the symbol does not exist or some other error has
-/// occurred.
-///
-void *GetAddressOfSymbol (const char *symbolName);
-void *GetAddressOfSymbol (const std::string &symbolName);
-
-} // End llvm namespace
-
-#endif // SUPPORT_DYNAMICLINKER_H
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 83478b75cbc4..cc72bd59cb70 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -20,7 +20,7 @@
 #ifndef LLVM_SUPPORT_ELF_H
 #define LLVM_SUPPORT_ELF_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cstring>
 
 namespace llvm {
@@ -126,25 +126,27 @@ enum {
 
 // Machine architectures
 enum {
-  EM_NONE = 0,  // No machine
-  EM_M32 = 1,   // AT&T WE 32100
-  EM_SPARC = 2, // SPARC
-  EM_386 = 3,   // Intel 386
-  EM_68K = 4,   // Motorola 68000
-  EM_88K = 5,   // Motorola 88000
-  EM_486 = 6,   // Intel 486 (deprecated)
-  EM_860 = 7,   // Intel 80860
-  EM_MIPS = 8,     // MIPS R3000
-  EM_PPC = 20,     // PowerPC
-  EM_PPC64 = 21,   // PowerPC64
-  EM_ARM = 40,     // ARM
-  EM_ALPHA = 41,   // DEC Alpha
-  EM_SPARCV9 = 43, // SPARC V9
-  EM_X86_64 = 62   // AMD64
+  EM_NONE = 0,      // No machine
+  EM_M32 = 1,       // AT&T WE 32100
+  EM_SPARC = 2,     // SPARC
+  EM_386 = 3,       // Intel 386
+  EM_68K = 4,       // Motorola 68000
+  EM_88K = 5,       // Motorola 88000
+  EM_486 = 6,       // Intel 486 (deprecated)
+  EM_860 = 7,       // Intel 80860
+  EM_MIPS = 8,      // MIPS R3000
+  EM_PPC = 20,      // PowerPC
+  EM_PPC64 = 21,    // PowerPC64
+  EM_ARM = 40,      // ARM
+  EM_ALPHA = 41,    // DEC Alpha
+  EM_SPARCV9 = 43,  // SPARC V9
+  EM_X86_64 = 62,   // AMD64
+  EM_MBLAZE = 47787 // Xilinx MicroBlaze
 };
 
 // Object file classes.
 enum {
+  ELFCLASSNONE = 0,
   ELFCLASS32 = 1, // 32-bit object file
   ELFCLASS64 = 2  // 64-bit object file
 };
@@ -231,12 +233,206 @@ enum {
   R_386_GOTOFF        = 9,
   R_386_GOTPC         = 10,
   R_386_32PLT         = 11,
+  R_386_TLS_TPOFF     = 14,
+  R_386_TLS_IE        = 15,
+  R_386_TLS_GOTIE     = 16,
+  R_386_TLS_LE        = 17,
+  R_386_TLS_GD        = 18,
+  R_386_TLS_LDM       = 19,
   R_386_16            = 20,
   R_386_PC16          = 21,
   R_386_8             = 22,
-  R_386_PC8           = 23
+  R_386_PC8           = 23,
+  R_386_TLS_GD_32     = 24,
+  R_386_TLS_GD_PUSH   = 25,
+  R_386_TLS_GD_CALL   = 26,
+  R_386_TLS_GD_POP    = 27,
+  R_386_TLS_LDM_32    = 28,
+  R_386_TLS_LDM_PUSH  = 29,
+  R_386_TLS_LDM_CALL  = 30,
+  R_386_TLS_LDM_POP   = 31,
+  R_386_TLS_LDO_32    = 32,
+  R_386_TLS_IE_32     = 33,
+  R_386_TLS_LE_32     = 34,
+  R_386_TLS_DTPMOD32  = 35,
+  R_386_TLS_DTPOFF32  = 36,
+  R_386_TLS_TPOFF32   = 37,
+  R_386_TLS_GOTDESC   = 39,
+  R_386_TLS_DESC_CALL = 40,
+  R_386_TLS_DESC      = 41,
+  R_386_IRELATIVE     = 42,
+  R_386_NUM           = 43
+};
+
+// MBlaze relocations.
+enum {
+  R_MICROBLAZE_NONE           = 0,
+  R_MICROBLAZE_32             = 1,
+  R_MICROBLAZE_32_PCREL       = 2,
+  R_MICROBLAZE_64_PCREL       = 3,
+  R_MICROBLAZE_32_PCREL_LO    = 4,
+  R_MICROBLAZE_64             = 5,
+  R_MICROBLAZE_32_LO          = 6,
+  R_MICROBLAZE_SRO32          = 7,
+  R_MICROBLAZE_SRW32          = 8,
+  R_MICROBLAZE_64_NONE        = 9,
+  R_MICROBLAZE_32_SYM_OP_SYM  = 10,
+  R_MICROBLAZE_GNU_VTINHERIT  = 11,
+  R_MICROBLAZE_GNU_VTENTRY    = 12,
+  R_MICROBLAZE_GOTPC_64       = 13,
+  R_MICROBLAZE_GOT_64         = 14,
+  R_MICROBLAZE_PLT_64         = 15,
+  R_MICROBLAZE_REL            = 16,
+  R_MICROBLAZE_JUMP_SLOT      = 17,
+  R_MICROBLAZE_GLOB_DAT       = 18,
+  R_MICROBLAZE_GOTOFF_64      = 19,
+  R_MICROBLAZE_GOTOFF_32      = 20,
+  R_MICROBLAZE_COPY           = 21
+};
+
+
+// ARM Specific e_flags
+enum { EF_ARM_EABIMASK = 0xFF000000U };
+
+// ELF Relocation types for ARM
+// Meets 2.08 ABI Specs.
+
+enum {
+  R_ARM_NONE                  = 0x00,
+  R_ARM_PC24                  = 0x01,
+  R_ARM_ABS32                 = 0x02,
+  R_ARM_REL32                 = 0x03,
+  R_ARM_LDR_PC_G0             = 0x04,
+  R_ARM_ABS16                 = 0x05,
+  R_ARM_ABS12                 = 0x06,
+  R_ARM_THM_ABS5              = 0x07,
+  R_ARM_ABS8                  = 0x08,
+  R_ARM_SBREL32               = 0x09,
+  R_ARM_THM_CALL              = 0x0a,
+  R_ARM_THM_PC8               = 0x0b,
+  R_ARM_BREL_ADJ              = 0x0c,
+  R_ARM_TLS_DESC              = 0x0d,
+  R_ARM_THM_SWI8              = 0x0e,
+  R_ARM_XPC25                 = 0x0f,
+  R_ARM_THM_XPC22             = 0x10,
+  R_ARM_TLS_DTPMOD32          = 0x11,
+  R_ARM_TLS_DTPOFF32          = 0x12,
+  R_ARM_TLS_TPOFF32           = 0x13,
+  R_ARM_COPY                  = 0x14,
+  R_ARM_GLOB_DAT              = 0x15,
+  R_ARM_JUMP_SLOT             = 0x16,
+  R_ARM_RELATIVE              = 0x17,
+  R_ARM_GOTOFF32              = 0x18,
+  R_ARM_BASE_PREL             = 0x19,
+  R_ARM_GOT_BREL              = 0x1a,
+  R_ARM_PLT32                 = 0x1b,
+  R_ARM_CALL                  = 0x1c,
+  R_ARM_JUMP24                = 0x1d,
+  R_ARM_THM_JUMP24            = 0x1e,
+  R_ARM_BASE_ABS              = 0x1f,
+  R_ARM_ALU_PCREL_7_0         = 0x20,
+  R_ARM_ALU_PCREL_15_8        = 0x21,
+  R_ARM_ALU_PCREL_23_15       = 0x22,
+  R_ARM_LDR_SBREL_11_0_NC     = 0x23,
+  R_ARM_ALU_SBREL_19_12_NC    = 0x24,
+  R_ARM_ALU_SBREL_27_20_CK    = 0x25,
+  R_ARM_TARGET1               = 0x26,
+  R_ARM_SBREL31               = 0x27,
+  R_ARM_V4BX                  = 0x28,
+  R_ARM_TARGET2               = 0x29,
+  R_ARM_PREL31                = 0x2a,
+  R_ARM_MOVW_ABS_NC           = 0x2b,
+  R_ARM_MOVT_ABS              = 0x2c,
+  R_ARM_MOVW_PREL_NC          = 0x2d,
+  R_ARM_MOVT_PREL             = 0x2e,
+  R_ARM_THM_MOVW_ABS_NC       = 0x2f,
+  R_ARM_THM_MOVT_ABS          = 0x30,
+  R_ARM_THM_MOVW_PREL_NC      = 0x31,
+  R_ARM_THM_MOVT_PREL         = 0x32,
+  R_ARM_THM_JUMP19            = 0x33,
+  R_ARM_THM_JUMP6             = 0x34,
+  R_ARM_THM_ALU_PREL_11_0     = 0x35,
+  R_ARM_THM_PC12              = 0x36,
+  R_ARM_ABS32_NOI             = 0x37,
+  R_ARM_REL32_NOI             = 0x38,
+  R_ARM_ALU_PC_G0_NC          = 0x39,
+  R_ARM_ALU_PC_G0             = 0x3a,
+  R_ARM_ALU_PC_G1_NC          = 0x3b,
+  R_ARM_ALU_PC_G1             = 0x3c,
+  R_ARM_ALU_PC_G2             = 0x3d,
+  R_ARM_LDR_PC_G1             = 0x3e,
+  R_ARM_LDR_PC_G2             = 0x3f,
+  R_ARM_LDRS_PC_G0            = 0x40,
+  R_ARM_LDRS_PC_G1            = 0x41,
+  R_ARM_LDRS_PC_G2            = 0x42,
+  R_ARM_LDC_PC_G0             = 0x43,
+  R_ARM_LDC_PC_G1             = 0x44,
+  R_ARM_LDC_PC_G2             = 0x45,
+  R_ARM_ALU_SB_G0_NC          = 0x46,
+  R_ARM_ALU_SB_G0             = 0x47,
+  R_ARM_ALU_SB_G1_NC          = 0x48,
+  R_ARM_ALU_SB_G1             = 0x49,
+  R_ARM_ALU_SB_G2             = 0x4a,
+  R_ARM_LDR_SB_G0             = 0x4b,
+  R_ARM_LDR_SB_G1             = 0x4c,
+  R_ARM_LDR_SB_G2             = 0x4d,
+  R_ARM_LDRS_SB_G0            = 0x4e,
+  R_ARM_LDRS_SB_G1            = 0x4f,
+  R_ARM_LDRS_SB_G2            = 0x50,
+  R_ARM_LDC_SB_G0             = 0x51,
+  R_ARM_LDC_SB_G1             = 0x52,
+  R_ARM_LDC_SB_G2             = 0x53,
+  R_ARM_MOVW_BREL_NC          = 0x54,
+  R_ARM_MOVT_BREL             = 0x55,
+  R_ARM_MOVW_BREL             = 0x56,
+  R_ARM_THM_MOVW_BREL_NC      = 0x57,
+  R_ARM_THM_MOVT_BREL         = 0x58,
+  R_ARM_THM_MOVW_BREL         = 0x59,
+  R_ARM_TLS_GOTDESC           = 0x5a,
+  R_ARM_TLS_CALL              = 0x5b,
+  R_ARM_TLS_DESCSEQ           = 0x5c,
+  R_ARM_THM_TLS_CALL          = 0x5d,
+  R_ARM_PLT32_ABS             = 0x5e,
+  R_ARM_GOT_ABS               = 0x5f,
+  R_ARM_GOT_PREL              = 0x60,
+  R_ARM_GOT_BREL12            = 0x61,
+  R_ARM_GOTOFF12              = 0x62,
+  R_ARM_GOTRELAX              = 0x63,
+  R_ARM_GNU_VTENTRY           = 0x64,
+  R_ARM_GNU_VTINHERIT         = 0x65,
+  R_ARM_THM_JUMP11            = 0x66,
+  R_ARM_THM_JUMP8             = 0x67,
+  R_ARM_TLS_GD32              = 0x68,
+  R_ARM_TLS_LDM32             = 0x69,
+  R_ARM_TLS_LDO32             = 0x6a,
+  R_ARM_TLS_IE32              = 0x6b,
+  R_ARM_TLS_LE32              = 0x6c,
+  R_ARM_TLS_LDO12             = 0x6d,
+  R_ARM_TLS_LE12              = 0x6e,
+  R_ARM_TLS_IE12GP            = 0x6f,
+  R_ARM_PRIVATE_0             = 0x70,
+  R_ARM_PRIVATE_1             = 0x71,
+  R_ARM_PRIVATE_2             = 0x72,
+  R_ARM_PRIVATE_3             = 0x73,
+  R_ARM_PRIVATE_4             = 0x74,
+  R_ARM_PRIVATE_5             = 0x75,
+  R_ARM_PRIVATE_6             = 0x76,
+  R_ARM_PRIVATE_7             = 0x77,
+  R_ARM_PRIVATE_8             = 0x78,
+  R_ARM_PRIVATE_9             = 0x79,
+  R_ARM_PRIVATE_10            = 0x7a,
+  R_ARM_PRIVATE_11            = 0x7b,
+  R_ARM_PRIVATE_12            = 0x7c,
+  R_ARM_PRIVATE_13            = 0x7d,
+  R_ARM_PRIVATE_14            = 0x7e,
+  R_ARM_PRIVATE_15            = 0x7f,
+  R_ARM_ME_TOO                = 0x80,
+  R_ARM_THM_TLS_DESCSEQ16     = 0x81,
+  R_ARM_THM_TLS_DESCSEQ32     = 0x82
 };
 
+
+
 // Section header.
 struct Elf32_Shdr {
   Elf32_Word sh_name;      // Section name (index into string table)
@@ -273,6 +469,7 @@ enum {
   SHN_HIPROC    = 0xff1f, // Highest processor-specific index
   SHN_ABS       = 0xfff1, // Symbol has absolute value; does not need relocation
   SHN_COMMON    = 0xfff2, // FORTRAN COMMON or C external global variables
+  SHN_XINDEX    = 0xffff, // Mark that the index is >= SHN_LORESERVE
   SHN_HIRESERVE = 0xffff  // Highest reserved index
 };
 
@@ -298,6 +495,18 @@ enum {
   SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
   SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
   SHT_LOPROC        = 0x70000000, // Lowest processor architecture-specific type.
+  // Fixme: All this is duplicated in MCSectionELF. Why??
+  // Exception Index table
+  SHT_ARM_EXIDX           = 0x70000001U,
+  // BPABI DLL dynamic linking pre-emption map
+  SHT_ARM_PREEMPTMAP      = 0x70000002U,
+  //  Object file compatibility attributes
+  SHT_ARM_ATTRIBUTES      = 0x70000003U,
+  SHT_ARM_DEBUGOVERLAY    = 0x70000004U,
+  SHT_ARM_OVERLAYSECTION  = 0x70000005U,
+
+  SHT_X86_64_UNWIND       = 0x70000001, // Unwind information
+
   SHT_HIPROC        = 0x7fffffff, // Highest processor architecture-specific type.
   SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
   SHT_HIUSER        = 0xffffffff  // Highest type reserved for applications.
@@ -305,10 +514,58 @@ enum {
 
 // Section flags.
 enum {
-  SHF_WRITE     = 0x1, // Section data should be writable during execution.
-  SHF_ALLOC     = 0x2, // Section occupies memory during program execution.
-  SHF_EXECINSTR = 0x4, // Section contains executable machine instructions.
-  SHF_MASKPROC  = 0xf0000000 // Bits indicating processor-specific flags.
+  // Section data should be writable during execution.
+  SHF_WRITE = 0x1,
+
+  // Section occupies memory during program execution.
+  SHF_ALLOC = 0x2,
+
+  // Section contains executable machine instructions.
+  SHF_EXECINSTR = 0x4,
+
+  // The data in this section may be merged.
+  SHF_MERGE = 0x10,
+
+  // The data in this section is null-terminated strings.
+  SHF_STRINGS = 0x20,
+
+  // A field in this section holds a section header table index.
+  SHF_INFO_LINK = 0x40U,
+
+  // Adds special ordering requirements for link editors.
+  SHF_LINK_ORDER = 0x80U,
+
+  // This section requires special OS-specific processing to avoid incorrect
+  // behavior.
+  SHF_OS_NONCONFORMING = 0x100U,
+
+  // This section is a member of a section group.
+  SHF_GROUP = 0x200U,
+
+  // This section holds Thread-Local Storage.
+  SHF_TLS = 0x400U,
+
+  // Start of target-specific flags.
+
+  /// XCORE_SHF_CP_SECTION - All sections with the "c" flag are grouped
+  /// together by the linker to form the constant pool and the cp register is
+  /// set to the start of the constant pool by the boot code.
+  XCORE_SHF_CP_SECTION = 0x800U,
+
+  /// XCORE_SHF_DP_SECTION - All sections with the "d" flag are grouped
+  /// together by the linker to form the data section and the dp register is
+  /// set to the start of the section by the boot code.
+  XCORE_SHF_DP_SECTION = 0x1000U,
+
+  // Bits indicating processor-specific flags.
+  SHF_MASKPROC = 0xf0000000
+};
+
+// Section Group Flags
+enum {
+  GRP_COMDAT = 0x1,
+  GRP_MASKOS = 0x0ff00000,
+  GRP_MASKPROC = 0xf0000000
 };
 
 // Symbol table entries for ELF32.
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
new file mode 100644
index 000000000000..f62eab0702b4
--- /dev/null
+++ b/include/llvm/Support/Endian.h
@@ -0,0 +1,213 @@
+//===- Endian.h - Utilities for IO with endian specific data ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares generic functions to read and write endian specific data.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ENDIAN_H
+#define LLVM_SUPPORT_ENDIAN_H
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/type_traits.h"
+
+namespace llvm {
+namespace support {
+
+enum endianness {big, little};
+enum alignment {unaligned, aligned};
+
+namespace detail {
+
+template<typename value_type, alignment align>
+struct alignment_access_helper;
+
+template<typename value_type>
+struct alignment_access_helper<value_type, aligned>
+{
+  value_type val;
+};
+
+// Provides unaligned loads and stores.
+#pragma pack(push)
+#pragma pack(1)
+template<typename value_type>
+struct alignment_access_helper<value_type, unaligned>
+{
+  value_type val;
+};
+#pragma pack(pop)
+
+} // end namespace detail
+
+namespace endian {
+  template<typename value_type, alignment align>
+  static value_type read_le(const void *memory) {
+    value_type t =
+      reinterpret_cast<const detail::alignment_access_helper
+        <value_type, align> *>(memory)->val;
+    if (sys::isBigEndianHost())
+      return sys::SwapByteOrder(t);
+    return t;
+  }
+
+  template<typename value_type, alignment align>
+  static void write_le(void *memory, value_type value) {
+    if (sys::isBigEndianHost())
+      value = sys::SwapByteOrder(value);
+    reinterpret_cast<detail::alignment_access_helper<value_type, align> *>
+      (memory)->val = value;
+  }
+
+  template<typename value_type, alignment align>
+  static value_type read_be(const void *memory) {
+    value_type t =
+      reinterpret_cast<const detail::alignment_access_helper
+        <value_type, align> *>(memory)->val;
+    if (sys::isLittleEndianHost())
+      return sys::SwapByteOrder(t);
+    return t;
+  }
+
+  template<typename value_type, alignment align>
+  static void write_be(void *memory, value_type value) {
+    if (sys::isLittleEndianHost())
+      value = sys::SwapByteOrder(value);
+    reinterpret_cast<detail::alignment_access_helper<value_type, align> *>
+      (memory)->val = value;
+  }
+}
+
+namespace detail {
+
+template<typename value_type,
+         endianness endian,
+         alignment  align>
+class packed_endian_specific_integral;
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, little, unaligned> {
+public:
+  operator value_type() const {
+    return endian::read_le<value_type, unaligned>(Value);
+  }
+private:
+  uint8_t Value[sizeof(value_type)];
+};
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, big, unaligned> {
+public:
+  operator value_type() const {
+    return endian::read_be<value_type, unaligned>(Value);
+  }
+private:
+  uint8_t Value[sizeof(value_type)];
+};
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, little, aligned> {
+public:
+  operator value_type() const {
+    return endian::read_le<value_type, aligned>(&Value);
+  }
+private:
+  value_type Value;
+};
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, big, aligned> {
+public:
+  operator value_type() const {
+    return endian::read_be<value_type, aligned>(&Value);
+  }
+private:
+  value_type Value;
+};
+
+} // end namespace detail
+
+typedef detail::packed_endian_specific_integral
+                  <uint8_t, little, unaligned>  ulittle8_t;
+typedef detail::packed_endian_specific_integral
+                  <uint16_t, little, unaligned> ulittle16_t;
+typedef detail::packed_endian_specific_integral
+                  <uint32_t, little, unaligned> ulittle32_t;
+typedef detail::packed_endian_specific_integral
+                  <uint64_t, little, unaligned> ulittle64_t;
+
+typedef detail::packed_endian_specific_integral
+                   <int8_t, little, unaligned>  little8_t;
+typedef detail::packed_endian_specific_integral
+                   <int16_t, little, unaligned> little16_t;
+typedef detail::packed_endian_specific_integral
+                   <int32_t, little, unaligned> little32_t;
+typedef detail::packed_endian_specific_integral
+                   <int64_t, little, unaligned> little64_t;
+
+typedef detail::packed_endian_specific_integral
+                    <uint8_t, little, aligned>  aligned_ulittle8_t;
+typedef detail::packed_endian_specific_integral
+                    <uint16_t, little, aligned> aligned_ulittle16_t;
+typedef detail::packed_endian_specific_integral
+                    <uint32_t, little, aligned> aligned_ulittle32_t;
+typedef detail::packed_endian_specific_integral
+                    <uint64_t, little, aligned> aligned_ulittle64_t;
+
+typedef detail::packed_endian_specific_integral
+                     <int8_t, little, aligned>  aligned_little8_t;
+typedef detail::packed_endian_specific_integral
+                     <int16_t, little, aligned> aligned_little16_t;
+typedef detail::packed_endian_specific_integral
+                     <int32_t, little, aligned> aligned_little32_t;
+typedef detail::packed_endian_specific_integral
+                     <int64_t, little, aligned> aligned_little64_t;
+
+typedef detail::packed_endian_specific_integral
+                  <uint8_t, big, unaligned>     ubig8_t;
+typedef detail::packed_endian_specific_integral
+                  <uint16_t, big, unaligned>    ubig16_t;
+typedef detail::packed_endian_specific_integral
+                  <uint32_t, big, unaligned>    ubig32_t;
+typedef detail::packed_endian_specific_integral
+                  <uint64_t, big, unaligned>    ubig64_t;
+
+typedef detail::packed_endian_specific_integral
+                   <int8_t, big, unaligned>     big8_t;
+typedef detail::packed_endian_specific_integral
+                   <int16_t, big, unaligned>    big16_t;
+typedef detail::packed_endian_specific_integral
+                   <int32_t, big, unaligned>    big32_t;
+typedef detail::packed_endian_specific_integral
+                   <int64_t, big, unaligned>    big64_t;
+
+typedef detail::packed_endian_specific_integral
+                    <uint8_t, big, aligned>     aligned_ubig8_t;
+typedef detail::packed_endian_specific_integral
+                    <uint16_t, big, aligned>    aligned_ubig16_t;
+typedef detail::packed_endian_specific_integral
+                    <uint32_t, big, aligned>    aligned_ubig32_t;
+typedef detail::packed_endian_specific_integral
+                    <uint64_t, big, aligned>    aligned_ubig64_t;
+
+typedef detail::packed_endian_specific_integral
+                     <int8_t, big, aligned>     aligned_big8_t;
+typedef detail::packed_endian_specific_integral
+                     <int16_t, big, aligned>    aligned_big16_t;
+typedef detail::packed_endian_specific_integral
+                     <int32_t, big, aligned>    aligned_big32_t;
+typedef detail::packed_endian_specific_integral
+                     <int64_t, big, aligned>    aligned_big64_t;
+
+} // end namespace llvm
+} // end namespace support
+
+#endif
diff --git a/include/llvm/Support/Errno.h b/include/llvm/Support/Errno.h
new file mode 100644
index 000000000000..150bdb701626
--- /dev/null
+++ b/include/llvm/Support/Errno.h
@@ -0,0 +1,34 @@
+//===- llvm/Support/Errno.h - Portable+convenient errno handling -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares some portable and convenient functions to deal with errno.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_ERRNO_H
+#define LLVM_SYSTEM_ERRNO_H
+
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+/// Returns a string representation of the errno value, using whatever
+/// thread-safe variant of strerror() is available.  Be sure to call this
+/// immediately after the function that set errno, or errno may have been
+/// overwritten by an intervening call.
+std::string StrError();
+
+/// Like the no-argument version above, but uses \p errnum instead of errno.
+std::string StrError(int errnum);
+
+}  // namespace sys
+}  // namespace llvm
+
+#endif  // LLVM_SYSTEM_ERRNO_H
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 9854657c756f..5eca438d8b4a 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_ERRORHANDLING_H
 
 #include "llvm/Support/Compiler.h"
+#include "llvm/ADT/StringRef.h"
 #include <string>
 
 namespace llvm {
@@ -72,15 +73,17 @@ namespace llvm {
   /// standard error, followed by a newline.
   /// After the error handler is called this function will call exit(1), it 
   /// does not return.
-  NORETURN void report_fatal_error(const char *reason);
-  NORETURN void report_fatal_error(const std::string &reason);
-  NORETURN void report_fatal_error(const Twine &reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason);
 
   /// This function calls abort(), and prints the optional message to stderr.
   /// Use the llvm_unreachable macro (that adds location info), instead of
   /// calling this function directly.
-  NORETURN void llvm_unreachable_internal(const char *msg=0,
-                                          const char *file=0, unsigned line=0);
+  LLVM_ATTRIBUTE_NORETURN void llvm_unreachable_internal(const char *msg=0,
+                                                         const char *file=0,
+                                                         unsigned line=0);
 }
 
 /// Prints the message and location info to stderr in !NDEBUG builds.
diff --git a/include/llvm/Support/FEnv.h b/include/llvm/Support/FEnv.h
new file mode 100644
index 000000000000..f6f43337bd29
--- /dev/null
+++ b/include/llvm/Support/FEnv.h
@@ -0,0 +1,56 @@
+//===- llvm/Support/FEnv.h - Host floating-point exceptions ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an operating system independent interface to
+// floating-point exception interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_FENV_H
+#define LLVM_SYSTEM_FENV_H
+
+#include "llvm/Config/config.h"
+#include <cerrno>
+#ifdef HAVE_FENV_H
+#include <fenv.h>
+#endif
+
+// FIXME: Clang's #include handling apparently doesn't work for libstdc++'s
+// fenv.h; see PR6907 for details.
+#if defined(__clang__) && defined(_GLIBCXX_FENV_H)
+#undef HAVE_FENV_H
+#endif
+
+namespace llvm {
+namespace sys {
+
+/// llvm_fenv_clearexcept - Clear the floating-point exception state.
+static inline void llvm_fenv_clearexcept() {
+#ifdef HAVE_FENV_H
+  feclearexcept(FE_ALL_EXCEPT);
+#endif
+  errno = 0;
+}
+
+/// llvm_fenv_testexcept - Test if a floating-point exception was raised.
+static inline bool llvm_fenv_testexcept() {
+  int errno_val = errno;
+  if (errno_val == ERANGE || errno_val == EDOM)
+    return true;
+#ifdef HAVE_FENV_H
+  if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
+    return true;
+#endif
+  return false;
+}
+
+} // End sys namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
new file mode 100644
index 000000000000..4001bf0b84e3
--- /dev/null
+++ b/include/llvm/Support/FileSystem.h
@@ -0,0 +1,690 @@
+//===- llvm/Support/FileSystem.h - File System OS Concept -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::fs namespace. It is designed after
+// TR2/boost filesystem (v3), but modified to remove exception handling and the
+// path class.
+//
+// All functions return an error_code and their actual work via the last out
+// argument. The out argument is defined if and only if errc::success is
+// returned. A function may return any error code in the generic or system
+// category. However, they shall be equivalent to any error conditions listed
+// in each functions respective documentation if the condition applies. [ note:
+// this does not guarantee that error_code will be in the set of explicitly
+// listed codes, but it does guarantee that if any of the explicitly listed
+// errors occur, the correct error_code will be used ]. All functions may
+// return errc::not_enough_memory if there is not enough memory to complete the
+// operation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILE_SYSTEM_H
+#define LLVM_SUPPORT_FILE_SYSTEM_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/PathV1.h"
+#include "llvm/Support/system_error.h"
+#include <ctime>
+#include <iterator>
+#include <string>
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+/// file_type - An "enum class" enumeration for the file system's view of the
+///             type.
+struct file_type {
+  enum _ {
+    status_error,
+    file_not_found,
+    regular_file,
+    directory_file,
+    symlink_file,
+    block_file,
+    character_file,
+    fifo_file,
+    socket_file,
+    type_unknown
+  };
+
+  file_type(_ v) : v_(v) {}
+  explicit file_type(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+
+private:
+  int v_;
+};
+
+/// copy_option - An "enum class" enumeration of copy semantics for copy
+///               operations.
+struct copy_option {
+  enum _ {
+    fail_if_exists,
+    overwrite_if_exists
+  };
+
+  copy_option(_ v) : v_(v) {}
+  explicit copy_option(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+
+private:
+  int v_;
+};
+
+/// space_info - Self explanatory.
+struct space_info {
+  uint64_t capacity;
+  uint64_t free;
+  uint64_t available;
+};
+
+/// file_status - Represents the result of a call to stat and friends. It has
+///               a platform specific member to store the result.
+class file_status
+{
+  // implementation defined status field.
+  file_type Type;
+public:
+  explicit file_status(file_type v=file_type::status_error)
+    : Type(v) {}
+
+  file_type type() const { return Type; }
+  void type(file_type v) { Type = v; }
+};
+
+/// @}
+/// @name Physical Operators
+/// @{
+
+/// @brief Make \a path an absolute path.
+///
+/// Makes \a path absolute using the current directory if it is not already. An
+/// empty \a path will result in the current directory.
+///
+/// /absolute/path   => /absolute/path
+/// relative/../path => <current-directory>/relative/../path
+///
+/// @param path A path that is modified to be an absolute path.
+/// @returns errc::success if \a path has been made absolute, otherwise a
+///          platform specific error_code.
+error_code make_absolute(SmallVectorImpl<char> &path);
+
+/// @brief Copy the file at \a from to the path \a to.
+///
+/// @param from The path to copy the file from.
+/// @param to The path to copy the file to.
+/// @param copt Behavior if \a to already exists.
+/// @returns errc::success if the file has been successfully copied.
+///          errc::file_exists if \a to already exists and \a copt ==
+///          copy_option::fail_if_exists. Otherwise a platform specific
+///          error_code.
+error_code copy_file(const Twine &from, const Twine &to,
+                     copy_option copt = copy_option::fail_if_exists);
+
+/// @brief Create all the non-existent directories in path.
+///
+/// @param path Directories to create.
+/// @param existed Set to true if \a path already existed, false otherwise.
+/// @returns errc::success if is_directory(path) and existed have been set,
+///          otherwise a platform specific error_code.
+error_code create_directories(const Twine &path, bool &existed);
+
+/// @brief Create the directory in path.
+///
+/// @param path Directory to create.
+/// @param existed Set to true if \a path already existed, false otherwise.
+/// @returns errc::success if is_directory(path) and existed have been set,
+///          otherwise a platform specific error_code.
+error_code create_directory(const Twine &path, bool &existed);
+
+/// @brief Create a hard link from \a from to \a to.
+///
+/// @param to The path to hard link to.
+/// @param from The path to hard link from. This is created.
+/// @returns errc::success if exists(to) && exists(from) && equivalent(to, from)
+///          , otherwise a platform specific error_code.
+error_code create_hard_link(const Twine &to, const Twine &from);
+
+/// @brief Create a symbolic link from \a from to \a to.
+///
+/// @param to The path to symbolically link to.
+/// @param from The path to symbolically link from. This is created.
+/// @returns errc::success if exists(to) && exists(from) && is_symlink(from),
+///          otherwise a platform specific error_code.
+error_code create_symlink(const Twine &to, const Twine &from);
+
+/// @brief Get the current path.
+///
+/// @param result Holds the current path on return.
+/// @results errc::success if the current path has been stored in result,
+///          otherwise a platform specific error_code.
+error_code current_path(SmallVectorImpl<char> &result);
+
+/// @brief Remove path. Equivalent to POSIX remove().
+///
+/// @param path Input path.
+/// @param existed Set to true if \a path existed, false if it did not.
+///                undefined otherwise.
+/// @results errc::success if path has been removed and existed has been
+///          successfully set, otherwise a platform specific error_code.
+error_code remove(const Twine &path, bool &existed);
+
+/// @brief Recursively remove all files below \a path, then \a path. Files are
+///        removed as if by POSIX remove().
+///
+/// @param path Input path.
+/// @param num_removed Number of files removed.
+/// @results errc::success if path has been removed and num_removed has been
+///          successfully set, otherwise a platform specific error_code.
+error_code remove_all(const Twine &path, uint32_t &num_removed);
+
+/// @brief Rename \a from to \a to. Files are renamed as if by POSIX rename().
+///
+/// @param from The path to rename from.
+/// @param to The path to rename to. This is created.
+error_code rename(const Twine &from, const Twine &to);
+
+/// @brief Resize path to size. File is resized as if by POSIX truncate().
+///
+/// @param path Input path.
+/// @param size Size to resize to.
+/// @returns errc::success if \a path has been resized to \a size, otherwise a
+///          platform specific error_code.
+error_code resize_file(const Twine &path, uint64_t size);
+
+/// @brief Make file readable.
+///
+/// @param path Input path.
+/// @param value If true, make readable, else, make unreadable.
+/// @results errc::success if readability has been successfully set, otherwise a
+///          platform specific error_code.
+error_code set_read(const Twine &path, bool value);
+
+/// @brief Make file writeable.
+///
+/// @param path Input path.
+/// @param value If true, make writeable, else, make unwriteable.
+/// @results errc::success if writeability has been successfully set, otherwise
+///          a platform specific error_code.
+error_code set_write(const Twine &path, bool value);
+
+/// @brief Make file executable.
+///
+/// @param path Input path.
+/// @param value If true, make executable, else, make unexecutable.
+/// @results errc::success if executability has been successfully set, otherwise
+///          a platform specific error_code.
+error_code set_execute(const Twine &path, bool value);
+
+/// @}
+/// @name Physical Observers
+/// @{
+
+/// @brief Does file exist?
+///
+/// @param status A file_status previously returned from stat.
+/// @results True if the file represented by status exists, false if it does
+///          not.
+bool exists(file_status status);
+
+/// @brief Does file exist?
+///
+/// @param path Input path.
+/// @param result Set to true if the file represented by status exists, false if
+///               it does not. Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code exists(const Twine &path, bool &result);
+
+/// @brief Do file_status's represent the same thing?
+///
+/// @param A Input file_status.
+/// @param B Input file_status.
+///
+/// assert(status_known(A) || status_known(B));
+///
+/// @results True if A and B both represent the same file system entity, false
+///          otherwise.
+bool equivalent(file_status A, file_status B);
+
+/// @brief Do paths represent the same thing?
+///
+/// @param A Input path A.
+/// @param B Input path B.
+/// @param result Set to true if stat(A) and stat(B) have the same device and
+///               inode (or equivalent).
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code equivalent(const Twine &A, const Twine &B, bool &result);
+
+/// @brief Get file size.
+///
+/// @param path Input path.
+/// @param result Set to the size of the file in \a path.
+/// @returns errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code file_size(const Twine &path, uint64_t &result);
+
+/// @brief Does status represent a directory?
+///
+/// @param status A file_status previously returned from status.
+/// @results status.type() == file_type::directory_file.
+bool is_directory(file_status status);
+
+/// @brief Is path a directory?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a directory, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_directory(const Twine &path, bool &result);
+
+/// @brief Is path an empty file?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a an empty file, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_empty(const Twine &path, bool &result);
+
+/// @brief Does status represent a regular file?
+///
+/// @param status A file_status previously returned from status.
+/// @results status_known(status) && status.type() == file_type::regular_file.
+bool is_regular_file(file_status status);
+
+/// @brief Is path a regular file?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a regular file, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_regular_file(const Twine &path, bool &result);
+
+/// @brief Does this status represent something that exists but is not a
+///        directory, regular file, or symlink?
+///
+/// @param status A file_status previously returned from status.
+/// @results exists(s) && !is_regular_file(s) && !is_directory(s) &&
+///          !is_symlink(s)
+bool is_other(file_status status);
+
+/// @brief Is path something that exists but is not a directory,
+///        regular file, or symlink?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path exists, but is not a directory, regular
+///               file, or a symlink, false if it does not. Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_other(const Twine &path, bool &result);
+
+/// @brief Does status represent a symlink?
+///
+/// @param status A file_status previously returned from stat.
+/// @param result status.type() == symlink_file.
+bool is_symlink(file_status status);
+
+/// @brief Is path a symlink?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a symlink, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_symlink(const Twine &path, bool &result);
+
+/// @brief Get last write time without changing it.
+///
+/// @param path Input path.
+/// @param result Set to the last write time (UNIX time) of \a path if it
+///               exists.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code last_write_time(const Twine &path, std::time_t &result);
+
+/// @brief Set last write time.
+///
+/// @param path Input path.
+/// @param value Time to set (UNIX time) \a path's last write time to.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code set_last_write_time(const Twine &path, std::time_t value);
+
+/// @brief Read a symlink's value.
+///
+/// @param path Input path.
+/// @param result Set to the value of the symbolic link \a path.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code read_symlink(const Twine &path, SmallVectorImpl<char> &result);
+
+/// @brief Get disk space usage information.
+///
+/// @param path Input path.
+/// @param result Set to the capacity, free, and available space on the device
+///               \a path is on.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code disk_space(const Twine &path, space_info &result);
+
+/// @brief Get file status as if by POSIX stat().
+///
+/// @param path Input path.
+/// @param result Set to the file status.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code status(const Twine &path, file_status &result);
+
+/// @brief Is status available?
+///
+/// @param path Input path.
+/// @results True if status() != status_error.
+bool status_known(file_status s);
+
+/// @brief Is status available?
+///
+/// @param path Input path.
+/// @param result Set to true if status() != status_error.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code status_known(const Twine &path, bool &result);
+
+/// @brief Get file status as if by POSIX lstat().
+///
+/// Does not resolve symlinks.
+///
+/// @param path Input path.
+/// @param result Set to the file status.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code symlink_status(const Twine &path, file_status &result);
+
+/// @brief Generate a unique path and open it as a file.
+///
+/// Generates a unique path suitable for a temporary file and then opens it as a
+/// file. The name is based on \a model with '%' replaced by a random char in
+/// [0-9a-f]. If \a model is not an absolute path, a suitable temporary
+/// directory will be prepended.
+///
+/// This is an atomic operation. Either the file is created and opened, or the
+/// file system is left untouched.
+///
+/// clang-%%-%%-%%-%%-%%.s => /tmp/clang-a0-b1-c2-d3-e4.s
+///
+/// @param model Name to base unique path off of.
+/// @param result_fs Set to the opened file's file descriptor.
+/// @param result_path Set to the opened file's absolute path.
+/// @results errc::success if result_{fd,path} have been successfully set,
+///          otherwise a platform specific error_code.
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path);
+
+/// @brief Canonicalize path.
+///
+/// Sets result to the file system's idea of what path is. The result is always
+/// absolute and has the same capitalization as the file system.
+///
+/// @param path Input path.
+/// @param result Set to the canonicalized version of \a path.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code canonicalize(const Twine &path, SmallVectorImpl<char> &result);
+
+/// @brief Are \a path's first bytes \a magic?
+///
+/// @param path Input path.
+/// @param magic Byte sequence to compare \a path's first len(magic) bytes to.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code has_magic(const Twine &path, const Twine &magic, bool &result);
+
+/// @brief Get \a path's first \a len bytes.
+///
+/// @param path Input path.
+/// @param len Number of magic bytes to get.
+/// @param result Set to the first \a len bytes in the file pointed to by
+///               \a path. Or the entire file if file_size(path) < len, in which
+///               case result.size() returns the size of the file.
+/// @results errc::success if result has been successfully set,
+///          errc::value_too_large if len is larger then the file pointed to by
+///          \a path, otherwise a platform specific error_code.
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result);
+
+/// @brief Get and identify \a path's type based on its content.
+///
+/// @param path Input path.
+/// @param result Set to the type of file, or LLVMFileType::Unknown_FileType.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code identify_magic(const Twine &path, LLVMFileType &result);
+
+/// @brief Is file bitcode?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a bitcode file, false if it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_bitcode(const Twine &path, bool &result);
+
+/// @brief Is file a dynamic library?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a dynamic library, false if it is
+///               not, undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_dynamic_library(const Twine &path, bool &result);
+
+/// @brief Is an object file?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is an object file, false if it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_object_file(const Twine &path, bool &result);
+
+/// @brief Can file be read?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is readable, false it it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code can_read(const Twine &path, bool &result);
+
+/// @brief Can file be written?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is writeable, false it it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code can_write(const Twine &path, bool &result);
+
+/// @brief Can file be executed?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is executable, false it it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code can_execute(const Twine &path, bool &result);
+
+/// @brief Get library paths the system linker uses.
+///
+/// @param result Set to the list of system library paths.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code GetSystemLibraryPaths(SmallVectorImpl<std::string> &result);
+
+/// @brief Get bitcode library paths the system linker uses
+///        + LLVM_LIB_SEARCH_PATH + LLVM_LIBDIR.
+///
+/// @param result Set to the list of bitcode library paths.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code GetBitcodeLibraryPaths(SmallVectorImpl<std::string> &result);
+
+/// @brief Find a library.
+///
+/// Find the path to a library using its short name. Use the system
+/// dependent library paths to locate the library.
+///
+/// c => /usr/lib/libc.so
+///
+/// @param short_name Library name one would give to the system linker.
+/// @param result Set to the absolute path \a short_name represents.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code FindLibrary(const Twine &short_name, SmallVectorImpl<char> &result);
+
+/// @brief Get absolute path of main executable.
+///
+/// @param argv0 The program name as it was spelled on the command line.
+/// @param MainAddr Address of some symbol in the executable (not in a library).
+/// @param result Set to the absolute path of the current executable.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code GetMainExecutable(const char *argv0, void *MainAddr,
+                             SmallVectorImpl<char> &result);
+
+/// @}
+/// @name Iterators
+/// @{
+
+/// directory_entry - A single entry in a directory. Caches the status either
+/// from the result of the iteration syscall, or the first time status or
+/// symlink_status is called.
+class directory_entry {
+  std::string Path;
+  mutable file_status Status;
+  mutable file_status SymlinkStatus;
+
+public:
+  explicit directory_entry(const Twine &path, file_status st = file_status(),
+                                       file_status symlink_st = file_status())
+    : Path(path.str())
+    , Status(st)
+    , SymlinkStatus(symlink_st) {}
+
+  directory_entry() {}
+
+  void assign(const Twine &path, file_status st = file_status(),
+                          file_status symlink_st = file_status()) {
+    Path = path.str();
+    Status = st;
+    SymlinkStatus = symlink_st;
+  }
+
+  void replace_filename(const Twine &filename, file_status st = file_status(),
+                              file_status symlink_st = file_status());
+
+  StringRef path() const { return Path; }
+  error_code status(file_status &result) const;
+  error_code symlink_status(file_status &result) const;
+
+  bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; }
+  bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); }
+  bool operator< (const directory_entry& rhs) const;
+  bool operator<=(const directory_entry& rhs) const;
+  bool operator> (const directory_entry& rhs) const;
+  bool operator>=(const directory_entry& rhs) const;
+};
+
+/// directory_iterator - Iterates through the entries in path. There is no
+/// operator++ because we need an error_code. If it's really needed we can make
+/// it call report_fatal_error on error.
+class directory_iterator {
+  intptr_t IterationHandle;
+  directory_entry CurrentEntry;
+
+  // Platform implementations implement these functions to handle iteration.
+  friend error_code directory_iterator_construct(directory_iterator &it,
+                                                 StringRef path);
+  friend error_code directory_iterator_increment(directory_iterator &it);
+  friend error_code directory_iterator_destruct(directory_iterator &it);
+
+public:
+  explicit directory_iterator(const Twine &path, error_code &ec)
+  : IterationHandle(0) {
+    SmallString<128> path_storage;
+    ec = directory_iterator_construct(*this, path.toStringRef(path_storage));
+  }
+
+  /// Construct end iterator.
+  directory_iterator() : IterationHandle(0) {}
+
+  ~directory_iterator() {
+    directory_iterator_destruct(*this);
+  }
+
+  // No operator++ because we need error_code.
+  directory_iterator &increment(error_code &ec) {
+    ec = directory_iterator_increment(*this);
+    return *this;
+  }
+
+  const directory_entry &operator*() const { return CurrentEntry; }
+  const directory_entry *operator->() const { return &CurrentEntry; }
+
+  bool operator!=(const directory_iterator &RHS) const {
+    return CurrentEntry != RHS.CurrentEntry;
+  }
+  // Other members as required by
+  // C++ Std, 24.1.1 Input iterators [input.iterators]
+};
+
+/// recursive_directory_iterator - Same as directory_iterator except for it
+/// recurses down into child directories.
+class recursive_directory_iterator {
+  uint16_t  Level;
+  bool HasNoPushRequest;
+  // implementation directory iterator status
+
+public:
+  explicit recursive_directory_iterator(const Twine &path, error_code &ec);
+  // No operator++ because we need error_code.
+  directory_iterator &increment(error_code &ec);
+
+  const directory_entry &operator*() const;
+  const directory_entry *operator->() const;
+
+  // observers
+  /// Gets the current level. path is at level 0.
+  int level() const;
+  /// Returns true if no_push has been called for this directory_entry.
+  bool no_push_request() const;
+
+  // modifiers
+  /// Goes up one level if Level > 0.
+  void pop();
+  /// Does not go down into the current directory_entry.
+  void no_push();
+
+  // Other members as required by
+  // C++ Std, 24.1.1 Input iterators [input.iterators]
+};
+
+/// @}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h
index d0dd4a759888..748ce7cea7bd 100644
--- a/include/llvm/Support/FileUtilities.h
+++ b/include/llvm/Support/FileUtilities.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SUPPORT_FILEUTILITIES_H
 #define LLVM_SUPPORT_FILEUTILITIES_H
 
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 287c5ba01eeb..7573ef0dc9e7 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -26,7 +26,7 @@
 #include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include <vector>
 #include <cassert>
 
@@ -89,14 +89,28 @@ class GraphWriter {
 
 public:
   GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) {
-  DTraits = DOTTraits(SN);
-}
+    DTraits = DOTTraits(SN);
+  }
+
+  void writeGraph(const std::string &Title = "") {
+    // Output the header for the graph...
+    writeHeader(Title);
+
+    // Emit all of the nodes in the graph...
+    writeNodes();
+
+    // Output any customizations on the graph
+    DOTGraphTraits<GraphType>::addCustomGraphFeatures(G, *this);
 
-  void writeHeader(const std::string &Name) {
+    // Output the end of the graph
+    writeFooter();
+  }
+
+  void writeHeader(const std::string &Title) {
     std::string GraphName = DTraits.getGraphName(G);
 
-    if (!Name.empty())
-      O << "digraph \"" << DOT::EscapeString(Name) << "\" {\n";
+    if (!Title.empty())
+      O << "digraph \"" << DOT::EscapeString(Title) << "\" {\n";
     else if (!GraphName.empty())
       O << "digraph \"" << DOT::EscapeString(GraphName) << "\" {\n";
     else
@@ -105,8 +119,8 @@ public:
     if (DTraits.renderGraphFromBottomUp())
       O << "\trankdir=\"BT\";\n";
 
-    if (!Name.empty())
-      O << "\tlabel=\"" << DOT::EscapeString(Name) << "\";\n";
+    if (!Title.empty())
+      O << "\tlabel=\"" << DOT::EscapeString(Title) << "\";\n";
     else if (!GraphName.empty())
       O << "\tlabel=\"" << DOT::EscapeString(GraphName) << "\";\n";
     O << DTraits.getGraphProperties(G);
@@ -282,22 +296,13 @@ public:
 template<typename GraphType>
 raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G,
                         bool ShortNames = false,
-                        const std::string &Name = "",
                         const std::string &Title = "") {
   // Start the graph emission process...
   GraphWriter<GraphType> W(O, G, ShortNames);
 
-  // Output the header for the graph...
-  W.writeHeader(Title);
-
-  // Emit all of the nodes in the graph...
-  W.writeNodes();
-
-  // Output any customizations on the graph
-  DOTGraphTraits<GraphType>::addCustomGraphFeatures(G, W);
+  // Emit the graph.
+  W.writeGraph(Title);
 
-  // Output the end of the graph
-  W.writeFooter();
   return O;
 }
 
@@ -322,7 +327,7 @@ sys::Path WriteGraph(const GraphType &G, const std::string &Name,
   raw_fd_ostream O(Filename.c_str(), ErrorInfo);
 
   if (ErrorInfo.empty()) {
-    llvm::WriteGraph(O, G, ShortNames, Name, Title);
+    llvm::WriteGraph(O, G, ShortNames, Title);
     errs() << " done. \n";
   } else {
     errs() << "error opening file '" << Filename.str() << "' for writing!\n";
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
new file mode 100644
index 000000000000..f77d4c1182bb
--- /dev/null
+++ b/include/llvm/Support/Host.h
@@ -0,0 +1,66 @@
+//===- llvm/Support/Host.h - Host machine characteristics --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods for querying the nature of the host machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_HOST_H
+#define LLVM_SYSTEM_HOST_H
+
+#include "llvm/ADT/StringMap.h"
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+  inline bool isLittleEndianHost() {
+    union {
+      int i;
+      char c;
+    };
+    i = 1;
+    return c;
+  }
+
+  inline bool isBigEndianHost() {
+    return !isLittleEndianHost();
+  }
+
+  /// getHostTriple() - Return the target triple of the running
+  /// system.
+  ///
+  /// The target triple is a string in the format of:
+  ///   CPU_TYPE-VENDOR-OPERATING_SYSTEM
+  /// or
+  ///   CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM
+  std::string getHostTriple();
+
+  /// getHostCPUName - Get the LLVM name for the host CPU. The particular format
+  /// of the name is target dependent, and suitable for passing as -mcpu to the
+  /// target which matches the host.
+  ///
+  /// \return - The host CPU name, or empty if the CPU could not be determined.
+  std::string getHostCPUName();
+
+  /// getHostCPUFeatures - Get the LLVM names for the host CPU features.
+  /// The particular format of the names are target dependent, and suitable for
+  /// passing as -mattr to the target which matches the host.
+  ///
+  /// \param Features - A string mapping feature names to either
+  /// true (if enabled) or false (if disabled). This routine makes no guarantees
+  /// about exactly which features may appear in this map, except that they are
+  /// all valid LLVM feature names.
+  ///
+  /// \return - True on success.
+  bool getHostCPUFeatures(StringMap<bool> &Features);
+}
+}
+
+#endif
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index c827ccedd6f1..2394a59c09cb 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -46,32 +46,39 @@ protected:
   BasicBlock::iterator InsertPt;
   LLVMContext &Context;
 public:
-  
+
   IRBuilderBase(LLVMContext &context)
     : Context(context) {
     ClearInsertionPoint();
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Builder configuration methods
   //===--------------------------------------------------------------------===//
-  
+
   /// ClearInsertionPoint - Clear the insertion point: created instructions will
   /// not be inserted into a block.
   void ClearInsertionPoint() {
     BB = 0;
   }
-  
+
   BasicBlock *GetInsertBlock() const { return BB; }
   BasicBlock::iterator GetInsertPoint() const { return InsertPt; }
   LLVMContext &getContext() const { return Context; }
-  
+
   /// SetInsertPoint - This specifies that created instructions should be
   /// appended to the end of the specified block.
   void SetInsertPoint(BasicBlock *TheBB) {
     BB = TheBB;
     InsertPt = BB->end();
   }
+
+  /// SetInsertPoint - This specifies that created instructions should be
+  /// inserted before the specified instruction.
+  void SetInsertPoint(Instruction *I) {
+    BB = I->getParent();
+    InsertPt = I;
+  }
   
   /// SetInsertPoint - This specifies that created instructions should be
   /// inserted at the specified point.
@@ -79,17 +86,17 @@ public:
     BB = TheBB;
     InsertPt = IP;
   }
-  
+
   /// SetCurrentDebugLocation - Set location information used by debugging
   /// information.
   void SetCurrentDebugLocation(const DebugLoc &L) {
     CurDbgLocation = L;
   }
-  
+
   /// getCurrentDebugLocation - Get location information used by debugging
   /// information.
   const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
-  
+
   /// SetInstDebugLocation - If this builder has a current debug location, set
   /// it on the specified instruction.
   void SetInstDebugLocation(Instruction *I) const {
@@ -142,7 +149,7 @@ public:
   //===--------------------------------------------------------------------===//
   // Miscellaneous creation methods.
   //===--------------------------------------------------------------------===//
-  
+
   /// CreateGlobalString - Make a new global variable with an initializer that
   /// has array of i8 type filled in with the nul terminated string value
   /// specified.  If Name is specified, it is the name of the global variable
@@ -178,65 +185,100 @@ public:
   ConstantInt *getInt32(uint32_t C) {
     return ConstantInt::get(getInt32Ty(), C);
   }
-  
+
   /// getInt64 - Get a constant 64-bit value.
   ConstantInt *getInt64(uint64_t C) {
     return ConstantInt::get(getInt64Ty(), C);
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Type creation methods
   //===--------------------------------------------------------------------===//
-  
+
   /// getInt1Ty - Fetch the type representing a single bit
   const IntegerType *getInt1Ty() {
     return Type::getInt1Ty(Context);
   }
-  
+
   /// getInt8Ty - Fetch the type representing an 8-bit integer.
   const IntegerType *getInt8Ty() {
     return Type::getInt8Ty(Context);
   }
-  
+
   /// getInt16Ty - Fetch the type representing a 16-bit integer.
   const IntegerType *getInt16Ty() {
     return Type::getInt16Ty(Context);
   }
-  
+
   /// getInt32Ty - Fetch the type resepresenting a 32-bit integer.
   const IntegerType *getInt32Ty() {
     return Type::getInt32Ty(Context);
   }
-  
+
   /// getInt64Ty - Fetch the type representing a 64-bit integer.
   const IntegerType *getInt64Ty() {
     return Type::getInt64Ty(Context);
   }
-  
+
   /// getFloatTy - Fetch the type representing a 32-bit floating point value.
   const Type *getFloatTy() {
     return Type::getFloatTy(Context);
   }
-  
+
   /// getDoubleTy - Fetch the type representing a 64-bit floating point value.
   const Type *getDoubleTy() {
     return Type::getDoubleTy(Context);
   }
-  
+
   /// getVoidTy - Fetch the type representing void.
   const Type *getVoidTy() {
     return Type::getVoidTy(Context);
   }
-  
-  const PointerType *getInt8PtrTy() {
-    return Type::getInt8PtrTy(Context);
+
+  const PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
+    return Type::getInt8PtrTy(Context, AddrSpace);
   }
-  
+
   /// getCurrentFunctionReturnType - Get the return type of the current function
   /// that we're emitting into.
   const Type *getCurrentFunctionReturnType() const;
-};
   
+  /// CreateMemSet - Create and insert a memset to the specified pointer and the
+  /// specified value.  If the pointer isn't an i8*, it will be converted.  If a
+  /// TBAA tag is specified, it will be added to the instruction.
+  CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+  
+  CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0);
+
+  /// CreateMemCpy - Create and insert a memcpy between the specified pointers.
+  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction.
+  CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+  
+  CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0);
+
+  /// CreateMemMove - Create and insert a memmove between the specified
+  /// pointers.  If the pointers aren't i8*, they will be converted.  If a TBAA
+  /// tag is specified, it will be added to the instruction.
+  CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+                          bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+  
+  CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+                          bool isVolatile = false, MDNode *TBAATag = 0);  
+private:
+  Value *getCastedInt8PtrValue(Value *Ptr);
+};
+
 /// IRBuilder - This provides a uniform API for creating instructions and
 /// inserting them into a basic block: either at the end of a BasicBlock, or
 /// at a specific iterator location in a block.
@@ -258,25 +300,30 @@ public:
   IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter())
     : IRBuilderBase(C), Inserter(I), Folder(F) {
   }
-  
+
   explicit IRBuilder(LLVMContext &C) : IRBuilderBase(C), Folder(C) {
   }
-  
+
   explicit IRBuilder(BasicBlock *TheBB, const T &F)
     : IRBuilderBase(TheBB->getContext()), Folder(F) {
     SetInsertPoint(TheBB);
   }
-  
+
   explicit IRBuilder(BasicBlock *TheBB)
     : IRBuilderBase(TheBB->getContext()), Folder(Context) {
     SetInsertPoint(TheBB);
   }
+
+  explicit IRBuilder(Instruction *IP)
+    : IRBuilderBase(IP->getContext()), Folder(Context) {
+    SetInsertPoint(IP);
+  }
   
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F)
     : IRBuilderBase(TheBB->getContext()), Folder(F) {
     SetInsertPoint(TheBB, IP);
   }
-  
+
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP)
     : IRBuilderBase(TheBB->getContext()), Folder(Context) {
     SetInsertPoint(TheBB, IP);
@@ -288,7 +335,7 @@ public:
   /// isNamePreserving - Return true if this builder is configured to actually
   /// add the requested names to IR created through it.
   bool isNamePreserving() const { return preserveNames; }
-  
+
   /// Insert - Insert and return the specified instruction.
   template<typename InstTy>
   InstTy *Insert(InstTy *I, const Twine &Name = "") const {
@@ -298,6 +345,11 @@ public:
     return I;
   }
 
+  /// Insert - No-op overload to handle constants.
+  Constant *Insert(Constant *C, const Twine& = "") const {
+    return C;
+  }
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Terminators
   //===--------------------------------------------------------------------===//
@@ -313,7 +365,7 @@ public:
   ReturnInst *CreateRet(Value *V) {
     return Insert(ReturnInst::Create(Context, V));
   }
-  
+
   /// CreateAggregateRet - Create a sequence of N insertvalue instructions,
   /// with one Value from the retVals array each, that build a aggregate
   /// return value one value at a time, and a ret instruction to return
@@ -375,10 +427,12 @@ public:
                                      Args+3), Name);
   }
   /// CreateInvoke - Create an invoke instruction.
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
-                           BasicBlock *UnwindDest, InputIterator ArgBegin,
-                           InputIterator ArgEnd, const Twine &Name = "") {
+                           BasicBlock *UnwindDest,
+                           RandomAccessIterator ArgBegin,
+                           RandomAccessIterator ArgEnd,
+                           const Twine &Name = "") {
     return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
                                      ArgBegin, ArgEnd), Name);
   }
@@ -394,177 +448,179 @@ public:
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Binary Operators
   //===--------------------------------------------------------------------===//
-
-  Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+private:
+  BinaryOperator *CreateInsertNUWNSWBinOp(BinaryOperator::BinaryOps Opc,
+                                          Value *LHS, Value *RHS,
+                                          const Twine &Name,
+                                          bool HasNUW, bool HasNSW) {
+    BinaryOperator *BO = Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+public:
+  Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateAdd(LC, RC);
-    return Insert(BinaryOperator::CreateAdd(LHS, RHS), Name);
+        return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
   }
   Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateNSWAdd(LC, RC);
-    return Insert(BinaryOperator::CreateNSWAdd(LHS, RHS), Name);
+    return CreateAdd(LHS, RHS, Name, false, true);
   }
   Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateNUWAdd(LC, RC);
-    return Insert(BinaryOperator::CreateNUWAdd(LHS, RHS), Name);
+    return CreateAdd(LHS, RHS, Name, true, false);
   }
   Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateFAdd(LC, RC);
+        return Insert(Folder.CreateFAdd(LC, RC), Name);
     return Insert(BinaryOperator::CreateFAdd(LHS, RHS), Name);
   }
-  Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateSub(LC, RC);
-    return Insert(BinaryOperator::CreateSub(LHS, RHS), Name);
+        return Insert(Folder.CreateSub(LC, RC), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
   }
   Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateNSWSub(LC, RC);
-    return Insert(BinaryOperator::CreateNSWSub(LHS, RHS), Name);
+    return CreateSub(LHS, RHS, Name, false, true);
   }
   Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateNUWSub(LC, RC);
-    return Insert(BinaryOperator::CreateNUWSub(LHS, RHS), Name);
+    return CreateSub(LHS, RHS, Name, true, false);
   }
   Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateFSub(LC, RC);
+        return Insert(Folder.CreateFSub(LC, RC), Name);
     return Insert(BinaryOperator::CreateFSub(LHS, RHS), Name);
   }
-  Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateMul(LC, RC);
-    return Insert(BinaryOperator::CreateMul(LHS, RHS), Name);
+        return Insert(Folder.CreateMul(LC, RC), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
   }
   Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateNSWMul(LC, RC);
-    return Insert(BinaryOperator::CreateNSWMul(LHS, RHS), Name);
+    return CreateMul(LHS, RHS, Name, false, true);
   }
   Value *CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateNUWMul(LC, RC);
-    return Insert(BinaryOperator::CreateNUWMul(LHS, RHS), Name);
+    return CreateMul(LHS, RHS, Name, true, false);
   }
   Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateFMul(LC, RC);
+        return Insert(Folder.CreateFMul(LC, RC), Name);
     return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name);
   }
-  Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateUDiv(LC, RC);
-    return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
+        return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name);
   }
-  Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateSDiv(LC, RC);
-    return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
+  Value *CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateUDiv(LHS, RHS, Name, true);
   }
-  Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateExactSDiv(LC, RC);
+        return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
     return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
   }
+  Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSDiv(LHS, RHS, Name, true);
+  }
   Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateFDiv(LC, RC);
+        return Insert(Folder.CreateFDiv(LC, RC), Name);
     return Insert(BinaryOperator::CreateFDiv(LHS, RHS), Name);
   }
   Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateURem(LC, RC);
+        return Insert(Folder.CreateURem(LC, RC), Name);
     return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
   }
   Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateSRem(LC, RC);
+        return Insert(Folder.CreateSRem(LC, RC), Name);
     return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
   }
   Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateFRem(LC, RC);
+        return Insert(Folder.CreateFRem(LC, RC), Name);
     return Insert(BinaryOperator::CreateFRem(LHS, RHS), Name);
   }
 
-  Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateShl(LC, RC);
-    return Insert(BinaryOperator::CreateShl(LHS, RHS), Name);
+        return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
   }
-  Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateShl(LC, RHSC);
-    return Insert(BinaryOperator::CreateShl(LHS, RHSC), Name);
+  Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
+                     HasNUW, HasNSW);
   }
-  Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateShl(LC, RHSC);
-    return Insert(BinaryOperator::CreateShl(LHS, RHSC), Name);
+  Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
+                     HasNUW, HasNSW);
   }
 
-  Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateLShr(LC, RC);
-    return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
+        return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name);
   }
-  Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateLShr(LC, RHSC);
-    return Insert(BinaryOperator::CreateLShr(LHS, RHSC), Name);
+  Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
-  Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateLShr(LC, RHSC);
-    return Insert(BinaryOperator::CreateLShr(LHS, RHSC), Name);
+  Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
 
-  Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "") {
+  Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateAShr(LC, RC);
-    return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
+        return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name);
   }
-  Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateAShr(LC, RHSC);
-    return Insert(BinaryOperator::CreateAShr(LHS, RHSC), Name);
+  Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
-  Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateAShr(LC, RHSC);
-    return Insert(BinaryOperator::CreateAShr(LHS, RHSC), Name);
+  Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
   }
 
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
@@ -572,21 +628,15 @@ public:
       if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
         return LHS;  // LHS & -1 -> LHS
       if (Constant *LC = dyn_cast<Constant>(LHS))
-        return Folder.CreateAnd(LC, RC);
+        return Insert(Folder.CreateAnd(LC, RC), Name);
     }
     return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
   }
   Value *CreateAnd(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateAnd(LC, RHSC);
-    return Insert(BinaryOperator::CreateAnd(LHS, RHSC), Name);
+    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
   Value *CreateAnd(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateAnd(LC, RHSC);
-    return Insert(BinaryOperator::CreateAnd(LHS, RHSC), Name);
+    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
@@ -594,73 +644,61 @@ public:
       if (RC->isNullValue())
         return LHS;  // LHS | 0 -> LHS
       if (Constant *LC = dyn_cast<Constant>(LHS))
-        return Folder.CreateOr(LC, RC);
+        return Insert(Folder.CreateOr(LC, RC), Name);
     }
     return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
   }
   Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateOr(LC, RHSC);
-    return Insert(BinaryOperator::CreateOr(LHS, RHSC), Name);
+    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
   Value *CreateOr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateOr(LC, RHSC);
-    return Insert(BinaryOperator::CreateOr(LHS, RHSC), Name);
+    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateXor(LC, RC);
+        return Insert(Folder.CreateXor(LC, RC), Name);
     return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
   }
   Value *CreateXor(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateXor(LC, RHSC);
-    return Insert(BinaryOperator::CreateXor(LHS, RHSC), Name);
+    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
   Value *CreateXor(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      return Folder.CreateXor(LC, RHSC);
-    return Insert(BinaryOperator::CreateXor(LHS, RHSC), Name);
+    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
   Value *CreateBinOp(Instruction::BinaryOps Opc,
                      Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateBinOp(Opc, LC, RC);
+        return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
     return Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
   }
 
-  Value *CreateNeg(Value *V, const Twine &Name = "") {
+  Value *CreateNeg(Value *V, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateNeg(VC);
-    return Insert(BinaryOperator::CreateNeg(V), Name);
+      return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
+    BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
   }
   Value *CreateNSWNeg(Value *V, const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateNSWNeg(VC);
-    return Insert(BinaryOperator::CreateNSWNeg(V), Name);
+    return CreateNeg(V, Name, false, true);
   }
   Value *CreateNUWNeg(Value *V, const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateNUWNeg(VC);
-    return Insert(BinaryOperator::CreateNUWNeg(V), Name);
+    return CreateNeg(V, Name, true, false);
   }
   Value *CreateFNeg(Value *V, const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateFNeg(VC);
+      return Insert(Folder.CreateFNeg(VC), Name);
     return Insert(BinaryOperator::CreateFNeg(V), Name);
   }
   Value *CreateNot(Value *V, const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateNot(VC);
+      return Insert(Folder.CreateNot(VC), Name);
     return Insert(BinaryOperator::CreateNot(V), Name);
   }
 
@@ -686,33 +724,39 @@ public:
   StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
     return Insert(new StoreInst(Val, Ptr, isVolatile));
   }
-  template<typename InputIterator>
-  Value *CreateGEP(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd,
+  template<typename RandomAccessIterator>
+  Value *CreateGEP(Value *Ptr,
+                   RandomAccessIterator IdxBegin,
+                   RandomAccessIterator IdxEnd,
                    const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
-      InputIterator i;
+      RandomAccessIterator i;
       for (i = IdxBegin; i < IdxEnd; ++i)
         if (!isa<Constant>(*i))
           break;
       if (i == IdxEnd)
-        return Folder.CreateGetElementPtr(PC, &IdxBegin[0], IdxEnd - IdxBegin);
+        return Insert(Folder.CreateGetElementPtr(PC, &IdxBegin[0],
+                                                 IdxEnd - IdxBegin),
+                      Name);
     }
     return Insert(GetElementPtrInst::Create(Ptr, IdxBegin, IdxEnd), Name);
   }
-  template<typename InputIterator>
-  Value *CreateInBoundsGEP(Value *Ptr, InputIterator IdxBegin,
-                           InputIterator IdxEnd, const Twine &Name = "") {
+  template<typename RandomAccessIterator>
+  Value *CreateInBoundsGEP(Value *Ptr, RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
+                           const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr)) {
       // Every index must be constant.
-      InputIterator i;
+      RandomAccessIterator i;
       for (i = IdxBegin; i < IdxEnd; ++i)
         if (!isa<Constant>(*i))
           break;
       if (i == IdxEnd)
-        return Folder.CreateInBoundsGetElementPtr(PC,
-                                                  &IdxBegin[0],
-                                                  IdxEnd - IdxBegin);
+        return Insert(Folder.CreateInBoundsGetElementPtr(PC,
+                                                         &IdxBegin[0],
+                                                         IdxEnd - IdxBegin),
+                      Name);
     }
     return Insert(GetElementPtrInst::CreateInBounds(Ptr, IdxBegin, IdxEnd),
                   Name);
@@ -720,33 +764,33 @@ public:
   Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Folder.CreateGetElementPtr(PC, &IC, 1);
+        return Insert(Folder.CreateGetElementPtr(PC, &IC, 1), Name);
     return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
   }
   Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
     if (Constant *PC = dyn_cast<Constant>(Ptr))
       if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Folder.CreateInBoundsGetElementPtr(PC, &IC, 1);
+        return Insert(Folder.CreateInBoundsGetElementPtr(PC, &IC, 1), Name);
     return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
   }
   Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateGetElementPtr(PC, &Idx, 1);
+      return Insert(Folder.CreateGetElementPtr(PC, &Idx, 1), Name);
 
-    return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);    
+    return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);
   }
   Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0,
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1);
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name);
   }
-  Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1, 
+  Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
                     const Twine &Name = "") {
     Value *Idxs[] = {
       ConstantInt::get(Type::getInt32Ty(Context), Idx0),
@@ -754,9 +798,9 @@ public:
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateGetElementPtr(PC, Idxs, 2);
+      return Insert(Folder.CreateGetElementPtr(PC, Idxs, 2), Name);
 
-    return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);    
+    return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);
   }
   Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
                                     const Twine &Name = "") {
@@ -766,7 +810,7 @@ public:
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2);
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name);
   }
@@ -774,16 +818,16 @@ public:
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateGetElementPtr(PC, &Idx, 1);
+      return Insert(Folder.CreateGetElementPtr(PC, &Idx, 1), Name);
 
-    return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);    
+    return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);
   }
   Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1);
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name);
   }
@@ -795,9 +839,9 @@ public:
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateGetElementPtr(PC, Idxs, 2);
+      return Insert(Folder.CreateGetElementPtr(PC, Idxs, 2), Name);
 
-    return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);    
+    return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);
   }
   Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
                                     const Twine &Name = "") {
@@ -807,14 +851,14 @@ public:
     };
 
     if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2);
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2), Name);
 
     return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name);
   }
   Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
     return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name);
   }
-  
+
   /// CreateGlobalStringPtr - Same as CreateGlobalString, but return a pointer
   /// with "i8*" type instead of a pointer to array of i8.
   Value *CreateGlobalStringPtr(const char *Str = "", const Twine &Name = "") {
@@ -823,7 +867,7 @@ public:
     Value *Args[] = { zero, zero };
     return CreateInBoundsGEP(gv, Args, Args+2, Name);
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
@@ -873,7 +917,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateZExtOrBitCast(VC, DestTy);
+      return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
   }
   Value *CreateSExtOrBitCast(Value *V, const Type *DestTy,
@@ -881,7 +925,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateSExtOrBitCast(VC, DestTy);
+      return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
   }
   Value *CreateTruncOrBitCast(Value *V, const Type *DestTy,
@@ -889,7 +933,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateTruncOrBitCast(VC, DestTy);
+      return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
     return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
   }
   Value *CreateCast(Instruction::CastOps Op, Value *V, const Type *DestTy,
@@ -897,7 +941,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateCast(Op, VC, DestTy);
+      return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
     return Insert(CastInst::Create(Op, V, DestTy), Name);
   }
   Value *CreatePointerCast(Value *V, const Type *DestTy,
@@ -905,7 +949,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreatePointerCast(VC, DestTy);
+      return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
     return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
   }
   Value *CreateIntCast(Value *V, const Type *DestTy, bool isSigned,
@@ -913,7 +957,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateIntCast(VC, DestTy, isSigned);
+      return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
     return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
   }
 private:
@@ -925,7 +969,7 @@ public:
     if (V->getType() == DestTy)
       return V;
     if (Constant *VC = dyn_cast<Constant>(V))
-      return Folder.CreateFPCast(VC, DestTy);
+      return Insert(Folder.CreateFPCast(VC, DestTy), Name);
     return Insert(CastInst::CreateFPCast(V, DestTy), Name);
   }
 
@@ -1011,14 +1055,14 @@ public:
                     const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateICmp(P, LC, RC);
+        return Insert(Folder.CreateICmp(P, LC, RC), Name);
     return Insert(new ICmpInst(P, LHS, RHS), Name);
   }
   Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
                     const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateFCmp(P, LC, RC);
+        return Insert(Folder.CreateFCmp(P, LC, RC), Name);
     return Insert(new FCmpInst(P, LHS, RHS), Name);
   }
 
@@ -1057,9 +1101,9 @@ public:
     return Insert(CallInst::Create(Callee, Args, Args+5), Name);
   }
 
-  template<typename InputIterator>
-  CallInst *CreateCall(Value *Callee, InputIterator ArgBegin,
-                       InputIterator ArgEnd, const Twine &Name = "") {
+  template<typename RandomAccessIterator>
+  CallInst *CreateCall(Value *Callee, RandomAccessIterator ArgBegin,
+                       RandomAccessIterator ArgEnd, const Twine &Name = "") {
     return Insert(CallInst::Create(Callee, ArgBegin, ArgEnd), Name);
   }
 
@@ -1068,7 +1112,7 @@ public:
     if (Constant *CC = dyn_cast<Constant>(C))
       if (Constant *TC = dyn_cast<Constant>(True))
         if (Constant *FC = dyn_cast<Constant>(False))
-          return Folder.CreateSelect(CC, TC, FC);
+          return Insert(Folder.CreateSelect(CC, TC, FC), Name);
     return Insert(SelectInst::Create(C, True, False), Name);
   }
 
@@ -1080,7 +1124,7 @@ public:
                               const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Folder.CreateExtractElement(VC, IC);
+        return Insert(Folder.CreateExtractElement(VC, IC), Name);
     return Insert(ExtractElementInst::Create(Vec, Idx), Name);
   }
 
@@ -1089,7 +1133,7 @@ public:
     if (Constant *VC = dyn_cast<Constant>(Vec))
       if (Constant *NC = dyn_cast<Constant>(NewElt))
         if (Constant *IC = dyn_cast<Constant>(Idx))
-          return Folder.CreateInsertElement(VC, NC, IC);
+          return Insert(Folder.CreateInsertElement(VC, NC, IC), Name);
     return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name);
   }
 
@@ -1098,24 +1142,25 @@ public:
     if (Constant *V1C = dyn_cast<Constant>(V1))
       if (Constant *V2C = dyn_cast<Constant>(V2))
         if (Constant *MC = dyn_cast<Constant>(Mask))
-          return Folder.CreateShuffleVector(V1C, V2C, MC);
+          return Insert(Folder.CreateShuffleVector(V1C, V2C, MC), Name);
     return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
   }
 
   Value *CreateExtractValue(Value *Agg, unsigned Idx,
                             const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
-      return Folder.CreateExtractValue(AggC, &Idx, 1);
+      return Insert(Folder.CreateExtractValue(AggC, &Idx, 1), Name);
     return Insert(ExtractValueInst::Create(Agg, Idx), Name);
   }
 
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   Value *CreateExtractValue(Value *Agg,
-                            InputIterator IdxBegin,
-                            InputIterator IdxEnd,
+                            RandomAccessIterator IdxBegin,
+                            RandomAccessIterator IdxEnd,
                             const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
-      return Folder.CreateExtractValue(AggC, IdxBegin, IdxEnd - IdxBegin);
+      return Insert(Folder.CreateExtractValue(AggC, IdxBegin, IdxEnd-IdxBegin),
+                    Name);
     return Insert(ExtractValueInst::Create(Agg, IdxBegin, IdxEnd), Name);
   }
 
@@ -1123,18 +1168,20 @@ public:
                            const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
-        return Folder.CreateInsertValue(AggC, ValC, &Idx, 1);
+        return Insert(Folder.CreateInsertValue(AggC, ValC, &Idx, 1), Name);
     return Insert(InsertValueInst::Create(Agg, Val, Idx), Name);
   }
 
-  template<typename InputIterator>
+  template<typename RandomAccessIterator>
   Value *CreateInsertValue(Value *Agg, Value *Val,
-                           InputIterator IdxBegin,
-                           InputIterator IdxEnd,
+                           RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
                            const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
-        return Folder.CreateInsertValue(AggC, ValC, IdxBegin, IdxEnd-IdxBegin);
+        return Insert(Folder.CreateInsertValue(AggC, ValC, IdxBegin,
+                                               IdxEnd - IdxBegin),
+                      Name);
     return Insert(InsertValueInst::Create(Agg, Val, IdxBegin, IdxEnd), Name);
   }
 
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
index a44da528acfc..292c001e09f4 100644
--- a/include/llvm/Support/IRReader.h
+++ b/include/llvm/Support/IRReader.h
@@ -19,10 +19,12 @@
 #ifndef LLVM_SUPPORT_IRREADER_H
 #define LLVM_SUPPORT_IRREADER_H
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Assembly/Parser.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
 
 namespace llvm {
 
@@ -56,15 +58,14 @@ namespace llvm {
   inline Module *getLazyIRFileModule(const std::string &Filename,
                                      SMDiagnostic &Err,
                                      LLVMContext &Context) {
-    std::string ErrMsg;
-    MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
-    if (F == 0) {
-      Err = SMDiagnostic(Filename, 
-                         "Could not open input file: " + ErrMsg);
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+      Err = SMDiagnostic(Filename,
+                         "Could not open input file: " + ec.message());
       return 0;
     }
 
-    return getLazyIRModule(F, Err, Context);
+    return getLazyIRModule(File.take(), Err, Context);
   }
 
   /// If the given MemoryBuffer holds a bitcode image, return a Module
@@ -94,15 +95,14 @@ namespace llvm {
   inline Module *ParseIRFile(const std::string &Filename,
                              SMDiagnostic &Err,
                              LLVMContext &Context) {
-    std::string ErrMsg;
-    MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
-    if (F == 0) {
-      Err = SMDiagnostic(Filename, 
-                         "Could not open input file: " + ErrMsg);
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+      Err = SMDiagnostic(Filename,
+                         "Could not open input file: " + ec.message());
       return 0;
     }
 
-    return ParseIR(F, Err, Context);
+    return ParseIR(File.take(), Err, Context);
   }
 
 }
diff --git a/include/llvm/Support/IncludeFile.h b/include/llvm/Support/IncludeFile.h
new file mode 100644
index 000000000000..a9319725d477
--- /dev/null
+++ b/include/llvm/Support/IncludeFile.h
@@ -0,0 +1,79 @@
+//===- llvm/Support/IncludeFile.h - Ensure Linking Of Library ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FORCE_DEFINING_FILE_TO_BE_LINKED and DEFINE_FILE_FOR
+// macros.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_INCLUDEFILE_H
+#define LLVM_SYSTEM_INCLUDEFILE_H
+
+/// This macro is the public interface that IncludeFile.h exports. This gives
+/// us the option to implement the "link the definition" capability in any
+/// manner that we choose. All header files that depend on a specific .cpp
+/// file being linked at run time should use this macro instead of the
+/// IncludeFile class directly.
+///
+/// For example, foo.h would use:<br/>
+/// <tt>FORCE_DEFINING_FILE_TO_BE_LINKED(foo)</tt><br/>
+///
+/// And, foo.cp would use:<br/>
+/// <tt>DEFINING_FILE_FOR(foo)</tt><br/>
+#ifdef __GNUC__
+// If the `used' attribute is available, use it to create a variable
+// with an initializer that will force the linking of the defining file.
+#define FORCE_DEFINING_FILE_TO_BE_LINKED(name) \
+  namespace llvm { \
+    extern const char name ## LinkVar; \
+    __attribute__((used)) static const char *const name ## LinkObj = \
+      &name ## LinkVar; \
+  }
+#else
+// Otherwise use a constructor call.
+#define FORCE_DEFINING_FILE_TO_BE_LINKED(name) \
+  namespace llvm { \
+    extern const char name ## LinkVar; \
+    static const IncludeFile name ## LinkObj ( &name ## LinkVar ); \
+  }
+#endif
+
+/// This macro is the counterpart to FORCE_DEFINING_FILE_TO_BE_LINKED. It should
+/// be used in a .cpp file to define the name referenced in a header file that
+/// will cause linkage of the .cpp file. It should only be used at extern level.
+#define DEFINING_FILE_FOR(name) \
+  namespace llvm { const char name ## LinkVar = 0; }
+
+namespace llvm {
+
+/// This class is used in the implementation of FORCE_DEFINING_FILE_TO_BE_LINKED
+/// macro to make sure that the implementation of a header file is included
+/// into a tool that uses the header.  This is solely
+/// to overcome problems linking .a files and not getting the implementation
+/// of compilation units we need. This is commonly an issue with the various
+/// Passes but also occurs elsewhere in LLVM. We like to use .a files because
+/// they link faster and provide the smallest executables. However, sometimes
+/// those executables are too small, if the program doesn't reference something
+/// that might be needed, especially by a loaded share object. This little class
+/// helps to resolve that problem. The basic strategy is to use this class in
+/// a header file and pass the address of a variable to the constructor. If the
+/// variable is defined in the header file's corresponding .cpp file then all
+/// tools/libraries that \#include the header file will require the .cpp as
+/// well.
+/// For example:<br/>
+/// <tt>extern int LinkMyCodeStub;</tt><br/>
+/// <tt>static IncludeFile LinkMyModule(&LinkMyCodeStub);</tt><br/>
+/// @brief Class to ensure linking of corresponding object file.
+struct IncludeFile {
+  explicit IncludeFile(const void *);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Support/LICENSE.TXT b/include/llvm/Support/LICENSE.TXT
new file mode 100644
index 000000000000..3479b3fd74d5
--- /dev/null
+++ b/include/llvm/Support/LICENSE.TXT
@@ -0,0 +1,6 @@
+LLVM System Interface Library
+-------------------------------------------------------------------------------
+The LLVM System Interface Library is licensed under the Illinois Open Source
+License and has the following additional copyright:
+
+Copyright (C) 2004 eXtensible Systems, Inc.
diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h
index 4c13177926d2..6841a0f1fc15 100644
--- a/include/llvm/Support/MachO.h
+++ b/include/llvm/Support/MachO.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_SUPPORT_MACHO_H
 #define LLVM_SUPPORT_MACHO_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 // NOTE: The enums in this file are intentially named to be different than those
 // in the headers in /usr/include/mach (on darwin systems) to avoid conflicts
@@ -23,7 +23,7 @@ namespace llvm {
   namespace MachO {
     // Enums from <mach-o/loader.h>
     enum {
-      // Constants for the "magic" field in llvm::MachO::mach_header and 
+      // Constants for the "magic" field in llvm::MachO::mach_header and
       // llvm::MachO::mach_header_64
       HeaderMagic32         = 0xFEEDFACEu, // MH_MAGIC
       HeaderMagic32Swapped  = 0xCEFAEDFEu, // MH_CIGAM
@@ -71,7 +71,7 @@ namespace llvm {
       HeaderFlagBitNoReexportedDylibs     = 0x00100000u, // MH_NO_REEXPORTED_DYLIBS
       HeaderFlagBitPIE                    = 0x00200000u, // MH_PIE
       HeaderFlagBitDeadStrippableDylib    = 0x00400000u, // MH_DEAD_STRIPPABLE_DYLIB
-      
+
       // Constants for the "cmd" field in llvm::MachO::load_command
       LoadCommandDynamicLinkerRequired    = 0x80000000u, // LC_REQ_DYLD
       LoadCommandSegment32                = 0x00000001u, // LC_SEGMENT
@@ -110,7 +110,7 @@ namespace llvm {
       LoadCommandDynamicLinkerInfo        = 0x00000022u, // LC_DYLD_INFO
       LoadCommandDynamicLinkerInfoOnly    = 0x80000022u, // LC_DYLD_INFO_ONLY
       LoadCommandDylibLoadUpward          = 0x80000023u, // LC_LOAD_UPWARD_DYLIB
-      
+
       // Constant bits for the "flags" field in llvm::MachO::segment_command
       SegmentCommandFlagBitHighVM             = 0x1u, // SG_HIGHVM
       SegmentCommandFlagBitFixedVMLibrary     = 0x2u, // SG_FVMLIB
@@ -243,20 +243,20 @@ namespace llvm {
       StabFunction              = 0x24u,  // N_FUN	
       StabStaticSymbol          = 0x26u,  // N_STSYM	
       StabLocalCommon           = 0x28u,  // N_LCSYM	
-      StabBeginSymbol           = 0x2Eu,  // N_BNSYM   
+      StabBeginSymbol           = 0x2Eu,  // N_BNSYM
       StabSourceFileOptions     = 0x3Cu,  // N_OPT	
       StabRegisterSymbol        = 0x40u,  // N_RSYM	
       StabSourceLine            = 0x44u,  // N_SLINE	
-      StabEndSymbol             = 0x4Eu,  // N_ENSYM   
+      StabEndSymbol             = 0x4Eu,  // N_ENSYM
       StabStructureType         = 0x60u,  // N_SSYM	
       StabSourceFileName        = 0x64u,  // N_SO	
       StabObjectFileName        = 0x66u,  // N_OSO	
       StabLocalSymbol           = 0x80u,  // N_LSYM	
       StabBeginIncludeFileName  = 0x82u,  // N_BINCL	
       StabIncludeFileName       = 0x84u,  // N_SOL	
-      StabCompilerParameters    = 0x86u,  // N_PARAMS  
-      StabCompilerVersion       = 0x88u,  // N_VERSION 
-      StabCompilerOptLevel      = 0x8Au,  // N_OLEVEL  
+      StabCompilerParameters    = 0x86u,  // N_PARAMS
+      StabCompilerVersion       = 0x88u,  // N_VERSION
+      StabCompilerOptLevel      = 0x8Au,  // N_OLEVEL
       StabParameter             = 0xA0u,  // N_PSYM	
       StabEndIncludeFile        = 0xA2u,  // N_EINCL	
       StabAlternateEntry        = 0xA4u,  // N_ENTRY	
@@ -269,9 +269,9 @@ namespace llvm {
       StabLength                = 0xFEu   // N_LENG	
 
     };
-    
+
     // Structs from <mach-o/loader.h>
-    
+
     struct mach_header {
       uint32_t magic;
       uint32_t cputype;
@@ -636,12 +636,12 @@ namespace llvm {
     };
 
     // Get/Set functions from <mach-o/nlist.h>
-    
+
     static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc)
     {
       return (((n_desc) >> 8u) & 0xffu);
     }
-      
+
     static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal)
     {
       n_desc = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8));
@@ -651,7 +651,7 @@ namespace llvm {
     {
       return (n_desc >> 8u) & 0x0fu;
     }
-    
+
     static inline void SET_COMM_ALIGN (uint16_t &n_desc, uint8_t align)
     {
       n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u));
@@ -662,7 +662,7 @@ namespace llvm {
       // Capability bits used in the definition of cpu_type.
       CPUArchMask = 0xff000000,   // Mask for architecture bits
       CPUArchABI64 = 0x01000000,  // 64 bit ABI
-      
+
       // Constants for the cputype field.
       CPUTypeI386      = 7,
       CPUTypeX86_64    = CPUTypeI386 | CPUArchABI64,
@@ -673,19 +673,21 @@ namespace llvm {
 
 
       // Constants for the cpusubtype field.
-      
+
       // X86
       CPUSubType_I386_ALL    = 3,
       CPUSubType_X86_64_ALL  = 3,
-      
+
       // ARM
       CPUSubType_ARM_ALL     = 0,
       CPUSubType_ARM_V4T     = 5,
+      CPUSubType_ARM_V5      = 7,
       CPUSubType_ARM_V6      = 6,
+      CPUSubType_ARM_V7      = 9,
 
       // PowerPC
       CPUSubType_POWERPC_ALL = 0,
-      
+
       CPUSubType_SPARC_ALL   = 0
     };
   } // end namespace MachO
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index b8e223587fbd..53e73ad35f49 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_SUPPORT_MANAGED_STATIC_H
 #define LLVM_SUPPORT_MANAGED_STATIC_H
 
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Threading.h"
 
 namespace llvm {
 
@@ -91,12 +91,6 @@ public:
   }
 };
 
-template<void (*CleanupFn)(void*)>
-class ManagedCleanup : public ManagedStaticBase {
-public:
-  void Register() { RegisterManagedStatic(0, CleanupFn); }
-};
-
 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
 void llvm_shutdown();
 
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 982813f7186f..4627557f7f1f 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_SUPPORT_MATHEXTRAS_H
 #define LLVM_SUPPORT_MATHEXTRAS_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/SwapByteOrder.h"
 
 namespace llvm {
 
@@ -70,6 +70,18 @@ inline bool isUInt<32>(uint64_t x) {
   return static_cast<uint32_t>(x) == x;
 }
 
+/// isUIntN - Checks if an unsigned integer fits into the given (dynamic)
+/// bit width.
+inline bool isUIntN(unsigned N, uint64_t x) {
+  return x == (x & (~0ULL >> (64 - N)));
+}
+
+/// isIntN - Checks if an signed integer fits into the given (dynamic)
+/// bit width.
+inline bool isIntN(unsigned N, int64_t x) {
+  return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1)));
+}
+
 /// isMask_32 - This function returns true if the argument is a sequence of ones
 /// starting at the least significant bit with the remainder zero (32 bit
 /// version).   Ex. isMask_32(0x0000FFFFU) == true.
@@ -112,47 +124,19 @@ inline bool isPowerOf2_64(uint64_t Value) {
 /// ByteSwap_16 - This function returns a byte-swapped representation of the
 /// 16-bit argument, Value.
 inline uint16_t ByteSwap_16(uint16_t Value) {
-#if defined(_MSC_VER) && !defined(_DEBUG)
-  // The DLL version of the runtime lacks these functions (bug!?), but in a
-  // release build they're replaced with BSWAP instructions anyway.
-  return _byteswap_ushort(Value);
-#else
-  uint16_t Hi = Value << 8;
-  uint16_t Lo = Value >> 8;
-  return Hi | Lo;
-#endif
+  return sys::SwapByteOrder_16(Value);
 }
 
 /// ByteSwap_32 - This function returns a byte-swapped representation of the
 /// 32-bit argument, Value.
 inline uint32_t ByteSwap_32(uint32_t Value) {
-#if defined(__llvm__) || \
-    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
-  return __builtin_bswap32(Value);
-#elif defined(_MSC_VER) && !defined(_DEBUG)
-  return _byteswap_ulong(Value);
-#else
-  uint32_t Byte0 = Value & 0x000000FF;
-  uint32_t Byte1 = Value & 0x0000FF00;
-  uint32_t Byte2 = Value & 0x00FF0000;
-  uint32_t Byte3 = Value & 0xFF000000;
-  return (Byte0 << 24) | (Byte1 << 8) | (Byte2 >> 8) | (Byte3 >> 24);
-#endif
+  return sys::SwapByteOrder_32(Value);
 }
 
 /// ByteSwap_64 - This function returns a byte-swapped representation of the
 /// 64-bit argument, Value.
 inline uint64_t ByteSwap_64(uint64_t Value) {
-#if defined(__llvm__) || \
-    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
-  return __builtin_bswap64(Value);
-#elif defined(_MSC_VER) && !defined(_DEBUG)
-  return _byteswap_uint64(Value);
-#else
-  uint64_t Hi = ByteSwap_32(uint32_t(Value));
-  uint32_t Lo = ByteSwap_32(uint32_t(Value >> 32));
-  return (Hi << 32) | Lo;
-#endif
+  return sys::SwapByteOrder_64(Value);
 }
 
 /// CountLeadingZeros_32 - this function performs the platform optimal form of
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
new file mode 100644
index 000000000000..9c3f85b958bc
--- /dev/null
+++ b/include/llvm/Support/Memory.h
@@ -0,0 +1,96 @@
+//===- llvm/Support/Memory.h - Memory Support --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Memory class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_MEMORY_H
+#define LLVM_SYSTEM_MEMORY_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+  /// This class encapsulates the notion of a memory block which has an address
+  /// and a size. It is used by the Memory class (a friend) as the result of
+  /// various memory allocation operations.
+  /// @see Memory
+  /// @brief Memory block abstraction.
+  class MemoryBlock {
+  public:
+    MemoryBlock() : Address(0), Size(0) { }
+    MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
+    void *base() const { return Address; }
+    size_t size() const { return Size; }
+  private:
+    void *Address;    ///< Address of first byte of memory area
+    size_t Size;      ///< Size, in bytes of the memory area
+    friend class Memory;
+  };
+
+  /// This class provides various memory handling functions that manipulate
+  /// MemoryBlock instances.
+  /// @since 1.4
+  /// @brief An abstraction for memory operations.
+  class Memory {
+  public:
+    /// This method allocates a block of Read/Write/Execute memory that is
+    /// suitable for executing dynamically generated code (e.g. JIT). An
+    /// attempt to allocate \p NumBytes bytes of virtual memory is made.
+    /// \p NearBlock may point to an existing allocation in which case
+    /// an attempt is made to allocate more memory near the existing block.
+    ///
+    /// On success, this returns a non-null memory block, otherwise it returns
+    /// a null memory block and fills in *ErrMsg.
+    ///
+    /// @brief Allocate Read/Write/Execute memory.
+    static MemoryBlock AllocateRWX(size_t NumBytes,
+                                   const MemoryBlock *NearBlock,
+                                   std::string *ErrMsg = 0);
+
+    /// This method releases a block of Read/Write/Execute memory that was
+    /// allocated with the AllocateRWX method. It should not be used to
+    /// release any memory block allocated any other way.
+    ///
+    /// On success, this returns false, otherwise it returns true and fills
+    /// in *ErrMsg.
+    /// @brief Release Read/Write/Execute memory.
+    static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = 0);
+
+
+    /// InvalidateInstructionCache - Before the JIT can run a block of code
+    /// that has been emitted it must invalidate the instruction cache on some
+    /// platforms.
+    static void InvalidateInstructionCache(const void *Addr, size_t Len);
+
+    /// setExecutable - Before the JIT can run a block of code, it has to be
+    /// given read and executable privilege. Return true if it is already r-x
+    /// or the system is able to change its previlege.
+    static bool setExecutable (MemoryBlock &M, std::string *ErrMsg = 0);
+
+    /// setWritable - When adding to a block of code, the JIT may need
+    /// to mark a block of code as RW since the protections are on page
+    /// boundaries, and the JIT internal allocations are not page aligned.
+    static bool setWritable (MemoryBlock &M, std::string *ErrMsg = 0);
+
+    /// setRangeExecutable - Mark the page containing a range of addresses
+    /// as executable.
+    static bool setRangeExecutable(const void *Addr, size_t Size);
+
+    /// setRangeWritable - Mark the page containing a range of addresses
+    /// as writable.
+    static bool setRangeWritable(const void *Addr, size_t Size);
+  };
+}
+}
+
+#endif
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 8a41aa5f94fa..b6243b7b10dd 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -15,12 +15,13 @@
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
-#include <string>
-#include <sys/stat.h>
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
+class error_code;
+template<class T> class OwningPtr;
+
 /// MemoryBuffer - This interface provides simple read-only access to a block
 /// of memory, and provides simple methods for reading files and standard input
 /// into a memory buffer.  In addition to basic access to the characters in the
@@ -47,8 +48,8 @@ public:
   const char *getBufferEnd() const   { return BufferEnd; }
   size_t getBufferSize() const { return BufferEnd-BufferStart; }
 
-  StringRef getBuffer() const { 
-    return StringRef(BufferStart, getBufferSize()); 
+  StringRef getBuffer() const {
+    return StringRef(BufferStart, getBufferSize());
   }
 
   /// getBufferIdentifier - Return an identifier for this buffer, typically the
@@ -61,23 +62,26 @@ public:
   /// MemoryBuffer if successful, otherwise returning null.  If FileSize is
   /// specified, this means that the client knows that the file exists and that
   /// it has the specified size.
-  static MemoryBuffer *getFile(StringRef Filename,
-                               std::string *ErrStr = 0,
-                               int64_t FileSize = -1,
-                               struct stat *FileInfo = 0);
-  static MemoryBuffer *getFile(const char *Filename,
-                               std::string *ErrStr = 0,
-                               int64_t FileSize = -1,
-                               struct stat *FileInfo = 0);
+  static error_code getFile(StringRef Filename, OwningPtr<MemoryBuffer> &result,
+                            int64_t FileSize = -1);
+  static error_code getFile(const char *Filename,
+                            OwningPtr<MemoryBuffer> &result,
+                            int64_t FileSize = -1);
+
+  /// getOpenFile - Given an already-open file descriptor, read the file and
+  /// return a MemoryBuffer.
+  static error_code getOpenFile(int FD, const char *Filename,
+                                OwningPtr<MemoryBuffer> &result,
+                                int64_t FileSize = -1);
 
   /// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
-  /// that EndPtr[0] must be a null byte and be accessible!
+  /// that InputData must be null terminated.
   static MemoryBuffer *getMemBuffer(StringRef InputData,
                                     StringRef BufferName = "");
 
   /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
-  /// copying the contents and taking ownership of it.  This has no requirements
-  /// on EndPtr[0].
+  /// copying the contents and taking ownership of it.  InputData does not
+  /// have to be null terminated.
   static MemoryBuffer *getMemBufferCopy(StringRef InputData,
                                         StringRef BufferName = "");
 
@@ -95,21 +99,19 @@ public:
                                              StringRef BufferName = "");
 
   /// getSTDIN - Read all of stdin into a file buffer, and return it.
-  /// If an error occurs, this returns null and fills in *ErrStr with a reason.
-  static MemoryBuffer *getSTDIN(std::string *ErrStr = 0);
+  /// If an error occurs, this returns null and sets ec.
+  static error_code getSTDIN(OwningPtr<MemoryBuffer> &result);
 
 
   /// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
-  /// if the Filename is "-".  If an error occurs, this returns null and fills
-  /// in *ErrStr with a reason.
-  static MemoryBuffer *getFileOrSTDIN(StringRef Filename,
-                                      std::string *ErrStr = 0,
-                                      int64_t FileSize = -1,
-                                      struct stat *FileInfo = 0);
-  static MemoryBuffer *getFileOrSTDIN(const char *Filename,
-                                      std::string *ErrStr = 0,
-                                      int64_t FileSize = -1,
-                                      struct stat *FileInfo = 0);
+  /// if the Filename is "-".  If an error occurs, this returns null and sets
+  /// ec.
+  static error_code getFileOrSTDIN(StringRef Filename,
+                                   OwningPtr<MemoryBuffer> &result,
+                                   int64_t FileSize = -1);
+  static error_code getFileOrSTDIN(const char *Filename,
+                                   OwningPtr<MemoryBuffer> &result,
+                                   int64_t FileSize = -1);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h
index e193ca2f2bc5..dec0f134b306 100644
--- a/include/llvm/Support/MemoryObject.h
+++ b/include/llvm/Support/MemoryObject.h
@@ -10,7 +10,7 @@
 #ifndef MEMORYOBJECT_H
 #define MEMORYOBJECT_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
new file mode 100644
index 000000000000..42ea63060f66
--- /dev/null
+++ b/include/llvm/Support/Mutex.h
@@ -0,0 +1,154 @@
+//===- llvm/Support/Mutex.h - Mutex Operating System Concept -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_MUTEX_H
+#define LLVM_SYSTEM_MUTEX_H
+
+#include "llvm/Support/Threading.h"
+#include <cassert>
+
+namespace llvm
+{
+  namespace sys
+  {
+    /// @brief Platform agnostic Mutex class.
+    class MutexImpl
+    {
+    /// @name Constructors
+    /// @{
+    public:
+
+      /// Initializes the lock but doesn't acquire it. if \p recursive is set
+      /// to false, the lock will not be recursive which makes it cheaper but
+      /// also more likely to deadlock (same thread can't acquire more than
+      /// once).
+      /// @brief Default Constructor.
+      explicit MutexImpl(bool recursive = true);
+
+      /// Releases and removes the lock
+      /// @brief Destructor
+      ~MutexImpl();
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+
+      /// Attempts to unconditionally acquire the lock. If the lock is held by
+      /// another thread, this method will wait until it can acquire the lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock.
+      bool acquire();
+
+      /// Attempts to release the lock. If the lock is held by the current
+      /// thread, the lock is released allowing other threads to acquire the
+      /// lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock.
+      bool release();
+
+      /// Attempts to acquire the lock without blocking. If the lock is not
+      /// available, this function returns false quickly (without blocking). If
+      /// the lock is available, it is acquired.
+      /// @returns false if any kind of error occurs or the lock is not
+      /// available, true otherwise.
+      /// @brief Try to acquire the lock.
+      bool tryacquire();
+
+    //@}
+    /// @name Platform Dependent Data
+    /// @{
+    private:
+      void* data_; ///< We don't know what the data will be
+
+    /// @}
+    /// @name Do Not Implement
+    /// @{
+    private:
+      MutexImpl(const MutexImpl & original);
+      void operator=(const MutexImpl &);
+    /// @}
+    };
+
+
+    /// SmartMutex - A mutex with a compile time constant parameter that
+    /// indicates whether this mutex should become a no-op when we're not
+    /// running in multithreaded mode.
+    template<bool mt_only>
+    class SmartMutex : public MutexImpl {
+      unsigned acquired;
+      bool recursive;
+    public:
+      explicit SmartMutex(bool rec = true) :
+        MutexImpl(rec), acquired(0), recursive(rec) { }
+
+      bool acquire() {
+        if (!mt_only || llvm_is_multithreaded()) {
+          return MutexImpl::acquire();
+        } else {
+          // Single-threaded debugging code.  This would be racy in
+          // multithreaded mode, but provides not sanity checks in single
+          // threaded mode.
+          assert((recursive || acquired == 0) && "Lock already acquired!!");
+          ++acquired;
+          return true;
+        }
+      }
+
+      bool release() {
+        if (!mt_only || llvm_is_multithreaded()) {
+          return MutexImpl::release();
+        } else {
+          // Single-threaded debugging code.  This would be racy in
+          // multithreaded mode, but provides not sanity checks in single
+          // threaded mode.
+          assert(((recursive && acquired) || (acquired == 1)) &&
+                 "Lock not acquired before release!");
+          --acquired;
+          return true;
+        }
+      }
+
+      bool tryacquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return MutexImpl::tryacquire();
+        else return true;
+      }
+
+      private:
+        SmartMutex(const SmartMutex<mt_only> & original);
+        void operator=(const SmartMutex<mt_only> &);
+    };
+
+    /// Mutex - A standard, always enforced mutex.
+    typedef SmartMutex<false> Mutex;
+
+    template<bool mt_only>
+    class SmartScopedLock  {
+      SmartMutex<mt_only>& mtx;
+
+    public:
+      SmartScopedLock(SmartMutex<mt_only>& m) : mtx(m) {
+        mtx.acquire();
+      }
+
+      ~SmartScopedLock() {
+        mtx.release();
+      }
+    };
+
+    typedef SmartScopedLock<false> ScopedLock;
+  }
+}
+
+#endif
diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h
index 9958b97a3e64..cd13bfe6eeb0 100644
--- a/include/llvm/Support/MutexGuard.h
+++ b/include/llvm/Support/MutexGuard.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SUPPORT_MUTEXGUARD_H
 #define LLVM_SUPPORT_MUTEXGUARD_H
 
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 
 namespace llvm {
   /// Instances of this class acquire a given Mutex Lock when constructed and
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 01256e18a5ce..d7b5b42924c3 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -15,8 +15,7 @@
 // llvm/Analysis/ConstantFolding.h.
 //
 // Note: since it is not actually possible to create unfolded constants, this
-// class returns values rather than constants.  The values do not have names,
-// even if names were provided to IRBuilder, which may be confusing.
+// class returns instructions rather than constants.
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,7 +29,7 @@ namespace llvm {
 
 class LLVMContext;
 
-/// NoFolder - Create "constants" (actually, values) with no folding.
+/// NoFolder - Create "constants" (actually, instructions) with no folding.
 class NoFolder {
 public:
   explicit NoFolder(LLVMContext &) {}
@@ -39,84 +38,87 @@ public:
   // Binary Operators
   //===--------------------------------------------------------------------===//
 
-  Value *CreateAdd(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAdd(LHS, RHS);
   }
-  Value *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNSWAdd(LHS, RHS);
   }
-  Value *CreateNUWAdd(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateNUWAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNUWAdd(LHS, RHS);
   }
-  Value *CreateFAdd(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFAdd(LHS, RHS);
   }
-  Value *CreateSub(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSub(LHS, RHS);
   }
-  Value *CreateNSWSub(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateNSWSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNSWSub(LHS, RHS);
   }
-  Value *CreateNUWSub(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateNUWSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNUWSub(LHS, RHS);
   }
-  Value *CreateFSub(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateFSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFSub(LHS, RHS);
   }
-  Value *CreateMul(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateMul(LHS, RHS);
   }
-  Value *CreateNSWMul(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateNSWMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNSWMul(LHS, RHS);
   }
-  Value *CreateNUWMul(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateNUWMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNUWMul(LHS, RHS);
   }
-  Value *CreateFMul(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateFMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFMul(LHS, RHS);
   }
-  Value *CreateUDiv(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateUDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateUDiv(LHS, RHS);
   }
-  Value *CreateSDiv(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateExactUDiv(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateExactUDiv(LHS, RHS);
+  }
+  Instruction *CreateSDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSDiv(LHS, RHS);
   }
-  Value *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateExactSDiv(LHS, RHS);
   }
-  Value *CreateFDiv(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFDiv(LHS, RHS);
   }
-  Value *CreateURem(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateURem(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateURem(LHS, RHS);
   }
-  Value *CreateSRem(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateSRem(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSRem(LHS, RHS);
   }
-  Value *CreateFRem(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateFRem(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFRem(LHS, RHS);
   }
-  Value *CreateShl(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateShl(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateShl(LHS, RHS);
   }
-  Value *CreateLShr(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateLShr(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateLShr(LHS, RHS);
   }
-  Value *CreateAShr(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateAShr(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAShr(LHS, RHS);
   }
-  Value *CreateAnd(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateAnd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAnd(LHS, RHS);
   }
-  Value *CreateOr(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateOr(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateOr(LHS, RHS);
   }
-  Value *CreateXor(Constant *LHS, Constant *RHS) const {
+  Instruction *CreateXor(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateXor(LHS, RHS);
   }
 
-  Value *CreateBinOp(Instruction::BinaryOps Opc,
-                     Constant *LHS, Constant *RHS) const {
+  Instruction *CreateBinOp(Instruction::BinaryOps Opc,
+                           Constant *LHS, Constant *RHS) const {
     return BinaryOperator::Create(Opc, LHS, RHS);
   }
 
@@ -124,16 +126,19 @@ public:
   // Unary Operators
   //===--------------------------------------------------------------------===//
 
-  Value *CreateNeg(Constant *C) const {
+  Instruction *CreateNeg(Constant *C) const {
     return BinaryOperator::CreateNeg(C);
   }
-  Value *CreateNSWNeg(Constant *C) const {
+  Instruction *CreateNSWNeg(Constant *C) const {
     return BinaryOperator::CreateNSWNeg(C);
   }
-  Value *CreateNUWNeg(Constant *C) const {
+  Instruction *CreateNUWNeg(Constant *C) const {
     return BinaryOperator::CreateNUWNeg(C);
   }
-  Value *CreateNot(Constant *C) const {
+  Instruction *CreateFNeg(Constant *C) const {
+    return BinaryOperator::CreateFNeg(C);
+  }
+  Instruction *CreateNot(Constant *C) const {
     return BinaryOperator::CreateNot(C);
   }
 
@@ -145,8 +150,8 @@ public:
                                 unsigned NumIdx) const {
     return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
   }
-  Value *CreateGetElementPtr(Constant *C, Value* const *IdxList,
-                             unsigned NumIdx) const {
+  Instruction *CreateGetElementPtr(Constant *C, Value* const *IdxList,
+                                   unsigned NumIdx) const {
     return GetElementPtrInst::Create(C, IdxList, IdxList+NumIdx);
   }
 
@@ -154,8 +159,8 @@ public:
                                         unsigned NumIdx) const {
     return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
   }
-  Value *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
-                                     unsigned NumIdx) const {
+  Instruction *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                           unsigned NumIdx) const {
     return GetElementPtrInst::CreateInBounds(C, IdxList, IdxList+NumIdx);
   }
 
@@ -163,23 +168,51 @@ public:
   // Cast/Conversion Operators
   //===--------------------------------------------------------------------===//
 
-  Value *CreateCast(Instruction::CastOps Op, Constant *C,
+  Instruction *CreateCast(Instruction::CastOps Op, Constant *C,
                     const Type *DestTy) const {
     return CastInst::Create(Op, C, DestTy);
   }
-  Value *CreateIntCast(Constant *C, const Type *DestTy,
+  Instruction *CreatePointerCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreatePointerCast(C, DestTy);
+  }
+  Instruction *CreateIntCast(Constant *C, const Type *DestTy,
                        bool isSigned) const {
     return CastInst::CreateIntegerCast(C, DestTy, isSigned);
   }
+  Instruction *CreateFPCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateFPCast(C, DestTy);
+  }
+
+  Instruction *CreateBitCast(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::BitCast, C, DestTy);
+  }
+  Instruction *CreateIntToPtr(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::IntToPtr, C, DestTy);
+  }
+  Instruction *CreatePtrToInt(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::PtrToInt, C, DestTy);
+  }
+  Instruction *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateZExtOrBitCast(C, DestTy);
+  }
+  Instruction *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateSExtOrBitCast(C, DestTy);
+  }
+
+  Instruction *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateTruncOrBitCast(C, DestTy);
+  }
 
   //===--------------------------------------------------------------------===//
   // Compare Instructions
   //===--------------------------------------------------------------------===//
 
-  Value *CreateICmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const {
+  Instruction *CreateICmp(CmpInst::Predicate P,
+                          Constant *LHS, Constant *RHS) const {
     return new ICmpInst(P, LHS, RHS);
   }
-  Value *CreateFCmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const {
+  Instruction *CreateFCmp(CmpInst::Predicate P,
+                          Constant *LHS, Constant *RHS) const {
     return new FCmpInst(P, LHS, RHS);
   }
 
@@ -187,30 +220,33 @@ public:
   // Other Instructions
   //===--------------------------------------------------------------------===//
 
-  Value *CreateSelect(Constant *C, Constant *True, Constant *False) const {
+  Instruction *CreateSelect(Constant *C,
+                            Constant *True, Constant *False) const {
     return SelectInst::Create(C, True, False);
   }
 
-  Value *CreateExtractElement(Constant *Vec, Constant *Idx) const {
+  Instruction *CreateExtractElement(Constant *Vec, Constant *Idx) const {
     return ExtractElementInst::Create(Vec, Idx);
   }
 
-  Value *CreateInsertElement(Constant *Vec, Constant *NewElt,
-                             Constant *Idx) const {
+  Instruction *CreateInsertElement(Constant *Vec, Constant *NewElt,
+                                   Constant *Idx) const {
     return InsertElementInst::Create(Vec, NewElt, Idx);
   }
 
-  Value *CreateShuffleVector(Constant *V1, Constant *V2, Constant *Mask) const {
+  Instruction *CreateShuffleVector(Constant *V1, Constant *V2,
+                                   Constant *Mask) const {
     return new ShuffleVectorInst(V1, V2, Mask);
   }
 
-  Value *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
-                            unsigned NumIdx) const {
+  Instruction *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
+                                  unsigned NumIdx) const {
     return ExtractValueInst::Create(Agg, IdxList, IdxList+NumIdx);
   }
 
-  Value *CreateInsertValue(Constant *Agg, Constant *Val,
-                           const unsigned *IdxList, unsigned NumIdx) const {
+  Instruction *CreateInsertValue(Constant *Agg, Constant *Val,
+                                 const unsigned *IdxList,
+                                 unsigned NumIdx) const {
     return InsertValueInst::Create(Agg, Val, IdxList, IdxList+NumIdx);
   }
 };
diff --git a/include/llvm/Support/Path.h b/include/llvm/Support/Path.h
new file mode 100644
index 000000000000..196eecce8185
--- /dev/null
+++ b/include/llvm/Support/Path.h
@@ -0,0 +1,16 @@
+//===- llvm/Support/Path.h - Path Operating System Concept ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file currently includes both PathV1 and PathV2 to facilitate moving
+// clients over to the new interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV1.h"
+#include "llvm/Support/PathV2.h"
diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h
new file mode 100644
index 000000000000..a1c3f6a49a19
--- /dev/null
+++ b/include/llvm/Support/PathV1.h
@@ -0,0 +1,755 @@
+//===- llvm/Support/PathV1.h - Path Operating System Concept ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Path class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_PATH_H
+#define LLVM_SYSTEM_PATH_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/TimeValue.h"
+#include <set>
+#include <string>
+#include <vector>
+
+#define LLVM_PATH_DEPRECATED_MSG(replacement) \
+  "PathV1 has been deprecated and will be removed as soon as all LLVM and" \
+  " Clang clients have been moved over to PathV2. Please use `" #replacement \
+  "` from PathV2 instead."
+
+namespace llvm {
+namespace sys {
+
+  /// This structure provides basic file system information about a file. It
+  /// is patterned after the stat(2) Unix operating system call but made
+  /// platform independent and eliminates many of the unix-specific fields.
+  /// However, to support llvm-ar, the mode, user, and group fields are
+  /// retained. These pertain to unix security and may not have a meaningful
+  /// value on non-Unix platforms. However, the other fields should
+  /// always be applicable on all platforms.  The structure is filled in by
+  /// the PathWithStatus class.
+  /// @brief File status structure
+  class FileStatus {
+  public:
+    uint64_t    fileSize;   ///< Size of the file in bytes
+    TimeValue   modTime;    ///< Time of file's modification
+    uint32_t    mode;       ///< Mode of the file, if applicable
+    uint32_t    user;       ///< User ID of owner, if applicable
+    uint32_t    group;      ///< Group ID of owner, if applicable
+    uint64_t    uniqueID;   ///< A number to uniquely ID this file
+    bool        isDir  : 1; ///< True if this is a directory.
+    bool        isFile : 1; ///< True if this is a file.
+
+    FileStatus() : fileSize(0), modTime(0,0), mode(0777), user(999),
+                   group(999), uniqueID(0), isDir(false), isFile(false) { }
+
+    TimeValue getTimestamp() const { return modTime; }
+    uint64_t getSize() const { return fileSize; }
+    uint32_t getMode() const { return mode; }
+    uint32_t getUser() const { return user; }
+    uint32_t getGroup() const { return group; }
+    uint64_t getUniqueID() const { return uniqueID; }
+  };
+
+  /// This class provides an abstraction for the path to a file or directory
+  /// in the operating system's filesystem and provides various basic operations
+  /// on it.  Note that this class only represents the name of a path to a file
+  /// or directory which may or may not be valid for a given machine's file
+  /// system. The class is patterned after the java.io.File class with various
+  /// extensions and several omissions (not relevant to LLVM).  A Path object
+  /// ensures that the path it encapsulates is syntactically valid for the
+  /// operating system it is running on but does not ensure correctness for
+  /// any particular file system. That is, a syntactically valid path might
+  /// specify path components that do not exist in the file system and using
+  /// such a Path to act on the file system could produce errors. There is one
+  /// invalid Path value which is permitted: the empty path.  The class should
+  /// never allow a syntactically invalid non-empty path name to be assigned.
+  /// Empty paths are required in order to indicate an error result in some
+  /// situations. If the path is empty, the isValid operation will return
+  /// false. All operations will fail if isValid is false. Operations that
+  /// change the path will either return false if it would cause a syntactically
+  /// invalid path name (in which case the Path object is left unchanged) or
+  /// throw an std::string exception indicating the error. The methods are
+  /// grouped into four basic categories: Path Accessors (provide information
+  /// about the path without accessing disk), Disk Accessors (provide
+  /// information about the underlying file or directory), Path Mutators
+  /// (change the path information, not the disk), and Disk Mutators (change
+  /// the disk file/directory referenced by the path). The Disk Mutator methods
+  /// all have the word "disk" embedded in their method name to reinforce the
+  /// notion that the operation modifies the file system.
+  /// @since 1.4
+  /// @brief An abstraction for operating system paths.
+  class Path {
+    /// @name Constructors
+    /// @{
+    public:
+      /// Construct a path to the root directory of the file system. The root
+      /// directory is a top level directory above which there are no more
+      /// directories. For example, on UNIX, the root directory is /. On Windows
+      /// it is file:///. Other operating systems may have different notions of
+      /// what the root directory is or none at all. In that case, a consistent
+      /// default root directory will be used.
+      LLVM_ATTRIBUTE_DEPRECATED(static Path GetRootDirectory(),
+        LLVM_PATH_DEPRECATED_MSG(NOTHING));
+
+      /// Construct a path to a unique temporary directory that is created in
+      /// a "standard" place for the operating system. The directory is
+      /// guaranteed to be created on exit from this function. If the directory
+      /// cannot be created, the function will throw an exception.
+      /// @returns an invalid path (empty) on error
+      /// @param ErrMsg Optional place for an error message if an error occurs
+      /// @brief Construct a path to an new, unique, existing temporary
+      /// directory.
+      static Path GetTemporaryDirectory(std::string* ErrMsg = 0);
+
+      /// Construct a vector of sys::Path that contains the "standard" system
+      /// library paths suitable for linking into programs.
+      /// @brief Construct a path to the system library directory
+      static void GetSystemLibraryPaths(std::vector<sys::Path>& Paths);
+
+      /// Construct a vector of sys::Path that contains the "standard" bitcode
+      /// library paths suitable for linking into an llvm program. This function
+      /// *must* return the value of LLVM_LIB_SEARCH_PATH as well as the value
+      /// of LLVM_LIBDIR. It also must provide the System library paths as
+      /// returned by GetSystemLibraryPaths.
+      /// @see GetSystemLibraryPaths
+      /// @brief Construct a list of directories in which bitcode could be
+      /// found.
+      static void GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths);
+
+      /// Find the path to a library using its short name. Use the system
+      /// dependent library paths to locate the library.
+      /// @brief Find a library.
+      static Path FindLibrary(std::string& short_name);
+
+      /// Construct a path to the default LLVM configuration directory. The
+      /// implementation must ensure that this is a well-known (same on many
+      /// systems) directory in which llvm configuration files exist. For
+      /// example, on Unix, the /etc/llvm directory has been selected.
+      /// @brief Construct a path to the default LLVM configuration directory
+      static Path GetLLVMDefaultConfigDir();
+
+      /// Construct a path to the LLVM installed configuration directory. The
+      /// implementation must ensure that this refers to the "etc" directory of
+      /// the LLVM installation. This is the location where configuration files
+      /// will be located for a particular installation of LLVM on a machine.
+      /// @brief Construct a path to the LLVM installed configuration directory
+      static Path GetLLVMConfigDir();
+
+      /// Construct a path to the current user's home directory. The
+      /// implementation must use an operating system specific mechanism for
+      /// determining the user's home directory. For example, the environment
+      /// variable "HOME" could be used on Unix. If a given operating system
+      /// does not have the concept of a user's home directory, this static
+      /// constructor must provide the same result as GetRootDirectory.
+      /// @brief Construct a path to the current user's "home" directory
+      static Path GetUserHomeDirectory();
+
+      /// Construct a path to the current directory for the current process.
+      /// @returns The current working directory.
+      /// @brief Returns the current working directory.
+      static Path GetCurrentDirectory();
+
+      /// Return the suffix commonly used on file names that contain an
+      /// executable.
+      /// @returns The executable file suffix for the current platform.
+      /// @brief Return the executable file suffix.
+      static StringRef GetEXESuffix();
+
+      /// Return the suffix commonly used on file names that contain a shared
+      /// object, shared archive, or dynamic link library. Such files are
+      /// linked at runtime into a process and their code images are shared
+      /// between processes.
+      /// @returns The dynamic link library suffix for the current platform.
+      /// @brief Return the dynamic link library suffix.
+      static StringRef GetDLLSuffix();
+
+      /// GetMainExecutable - Return the path to the main executable, given the
+      /// value of argv[0] from program startup and the address of main itself.
+      /// In extremis, this function may fail and return an empty path.
+      static Path GetMainExecutable(const char *argv0, void *MainAddr);
+
+      /// This is one of the very few ways in which a path can be constructed
+      /// with a syntactically invalid name. The only *legal* invalid name is an
+      /// empty one. Other invalid names are not permitted. Empty paths are
+      /// provided so that they can be used to indicate null or error results in
+      /// other lib/System functionality.
+      /// @brief Construct an empty (and invalid) path.
+      Path() : path() {}
+      Path(const Path &that) : path(that.path) {}
+
+      /// This constructor will accept a char* or std::string as a path. No
+      /// checking is done on this path to determine if it is valid. To
+      /// determine validity of the path, use the isValid method.
+      /// @param p The path to assign.
+      /// @brief Construct a Path from a string.
+      explicit Path(StringRef p);
+
+      /// This constructor will accept a character range as a path.  No checking
+      /// is done on this path to determine if it is valid.  To determine
+      /// validity of the path, use the isValid method.
+      /// @param StrStart A pointer to the first character of the path name
+      /// @param StrLen The length of the path name at StrStart
+      /// @brief Construct a Path from a string.
+      Path(const char *StrStart, unsigned StrLen);
+
+    /// @}
+    /// @name Operators
+    /// @{
+    public:
+      /// Makes a copy of \p that to \p this.
+      /// @returns \p this
+      /// @brief Assignment Operator
+      Path &operator=(const Path &that) {
+        path = that.path;
+        return *this;
+      }
+
+      /// Makes a copy of \p that to \p this.
+      /// @param that A StringRef denoting the path
+      /// @returns \p this
+      /// @brief Assignment Operator
+      Path &operator=(StringRef that);
+
+      /// Compares \p this Path with \p that Path for equality.
+      /// @returns true if \p this and \p that refer to the same thing.
+      /// @brief Equality Operator
+      bool operator==(const Path &that) const;
+
+      /// Compares \p this Path with \p that Path for inequality.
+      /// @returns true if \p this and \p that refer to different things.
+      /// @brief Inequality Operator
+      bool operator!=(const Path &that) const { return !(*this == that); }
+
+      /// Determines if \p this Path is less than \p that Path. This is required
+      /// so that Path objects can be placed into ordered collections (e.g.
+      /// std::map). The comparison is done lexicographically as defined by
+      /// the std::string::compare method.
+      /// @returns true if \p this path is lexicographically less than \p that.
+      /// @brief Less Than Operator
+      bool operator<(const Path& that) const;
+
+    /// @}
+    /// @name Path Accessors
+    /// @{
+    public:
+      /// This function will use an operating system specific algorithm to
+      /// determine if the current value of \p this is a syntactically valid
+      /// path name for the operating system. The path name does not need to
+      /// exist, validity is simply syntactical. Empty paths are always invalid.
+      /// @returns true iff the path name is syntactically legal for the
+      /// host operating system.
+      /// @brief Determine if a path is syntactically valid or not.
+      bool isValid() const;
+
+      /// This function determines if the contents of the path name are empty.
+      /// That is, the path name has a zero length. This does NOT determine if
+      /// if the file is empty. To get the length of the file itself, Use the
+      /// PathWithStatus::getFileStatus() method and then the getSize() method
+      /// on the returned FileStatus object.
+      /// @returns true iff the path is empty.
+      /// @brief Determines if the path name is empty (invalid).
+      bool isEmpty() const { return path.empty(); }
+
+       /// This function returns the last component of the path name. The last
+      /// component is the file or directory name occurring after the last
+      /// directory separator. If no directory separator is present, the entire
+      /// path name is returned (i.e. same as toString).
+      /// @returns StringRef containing the last component of the path name.
+      /// @brief Returns the last component of the path name.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        StringRef getLast() const,
+        LLVM_PATH_DEPRECATED_MSG(path::filename));
+
+      /// This function strips off the path and suffix of the file or directory
+      /// name and returns just the basename. For example /a/foo.bar would cause
+      /// this function to return "foo".
+      /// @returns StringRef containing the basename of the path
+      /// @brief Get the base name of the path
+      LLVM_ATTRIBUTE_DEPRECATED(StringRef getBasename() const,
+        LLVM_PATH_DEPRECATED_MSG(path::stem));
+
+      /// This function strips off the suffix of the path beginning with the
+      /// path separator ('/' on Unix, '\' on Windows) and returns the result.
+      LLVM_ATTRIBUTE_DEPRECATED(StringRef getDirname() const,
+        LLVM_PATH_DEPRECATED_MSG(path::parent_path));
+
+      /// This function strips off the path and basename(up to and
+      /// including the last dot) of the file or directory name and
+      /// returns just the suffix. For example /a/foo.bar would cause
+      /// this function to return "bar".
+      /// @returns StringRef containing the suffix of the path
+      /// @brief Get the suffix of the path
+      LLVM_ATTRIBUTE_DEPRECATED(StringRef getSuffix() const,
+        LLVM_PATH_DEPRECATED_MSG(path::extension));
+
+      /// Obtain a 'C' string for the path name.
+      /// @returns a 'C' string containing the path name.
+      /// @brief Returns the path as a C string.
+      const char *c_str() const { return path.c_str(); }
+      const std::string &str() const { return path; }
+
+
+      /// size - Return the length in bytes of this path name.
+      size_t size() const { return path.size(); }
+
+      /// empty - Returns true if the path is empty.
+      unsigned empty() const { return path.empty(); }
+
+    /// @}
+    /// @name Disk Accessors
+    /// @{
+    public:
+      /// This function determines if the path name is absolute, as opposed to
+      /// relative.
+      /// @brief Determine if the path is absolute.
+//FIXME:      LLVM_ATTRIBUTE_DEPRECATED(
+      bool isAbsolute() const;
+//FIXME:      LLVMV_PATH_DEPRECATED_MSG(path::is_absolute));
+
+      /// This function determines if the path name is absolute, as opposed to
+      /// relative.
+      /// @brief Determine if the path is absolute.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        static bool isAbsolute(const char *NameStart, unsigned NameLen),
+        LLVM_PATH_DEPRECATED_MSG(path::is_absolute));
+
+      /// This function opens the file associated with the path name provided by
+      /// the Path object and reads its magic number. If the magic number at the
+      /// start of the file matches \p magic, true is returned. In all other
+      /// cases (file not found, file not accessible, etc.) it returns false.
+      /// @returns true if the magic number of the file matches \p magic.
+      /// @brief Determine if file has a specific magic number
+      LLVM_ATTRIBUTE_DEPRECATED(bool hasMagicNumber(StringRef magic) const,
+        LLVM_PATH_DEPRECATED_MSG(fs::has_magic));
+
+      /// This function retrieves the first \p len bytes of the file associated
+      /// with \p this. These bytes are returned as the "magic number" in the
+      /// \p Magic parameter.
+      /// @returns true if the Path is a file and the magic number is retrieved,
+      /// false otherwise.
+      /// @brief Get the file's magic number.
+      bool getMagicNumber(std::string& Magic, unsigned len) const;
+
+      /// This function determines if the path name in the object references an
+      /// archive file by looking at its magic number.
+      /// @returns true if the file starts with the magic number for an archive
+      /// file.
+      /// @brief Determine if the path references an archive file.
+      bool isArchive() const;
+
+      /// This function determines if the path name in the object references an
+      /// LLVM Bitcode file by looking at its magic number.
+      /// @returns true if the file starts with the magic number for LLVM
+      /// bitcode files.
+      /// @brief Determine if the path references a bitcode file.
+      bool isBitcodeFile() const;
+
+      /// This function determines if the path name in the object references a
+      /// native Dynamic Library (shared library, shared object) by looking at
+      /// the file's magic number. The Path object must reference a file, not a
+      /// directory.
+      /// @returns true if the file starts with the magic number for a native
+      /// shared library.
+      /// @brief Determine if the path references a dynamic library.
+      bool isDynamicLibrary() const;
+
+      /// This function determines if the path name in the object references a
+      /// native object file by looking at it's magic number. The term object
+      /// file is defined as "an organized collection of separate, named
+      /// sequences of binary data." This covers the obvious file formats such
+      /// as COFF and ELF, but it also includes llvm ir bitcode, archives,
+      /// libraries, etc...
+      /// @returns true if the file starts with the magic number for an object
+      /// file.
+      /// @brief Determine if the path references an object file.
+      bool isObjectFile() const;
+
+      /// This function determines if the path name references an existing file
+      /// or directory in the file system.
+      /// @returns true if the pathname references an existing file or
+      /// directory.
+      /// @brief Determines if the path is a file or directory in
+      /// the file system.
+      LLVM_ATTRIBUTE_DEPRECATED(bool exists() const,
+        LLVM_PATH_DEPRECATED_MSG(fs::exists));
+
+      /// This function determines if the path name references an
+      /// existing directory.
+      /// @returns true if the pathname references an existing directory.
+      /// @brief Determines if the path is a directory in the file system.
+      LLVM_ATTRIBUTE_DEPRECATED(bool isDirectory() const,
+        LLVM_PATH_DEPRECATED_MSG(fs::is_directory));
+
+      /// This function determines if the path name references an
+      /// existing symbolic link.
+      /// @returns true if the pathname references an existing symlink.
+      /// @brief Determines if the path is a symlink in the file system.
+      LLVM_ATTRIBUTE_DEPRECATED(bool isSymLink() const,
+        LLVM_PATH_DEPRECATED_MSG(fs::is_symlink));
+
+      /// This function determines if the path name references a readable file
+      /// or directory in the file system. This function checks for
+      /// the existence and readability (by the current program) of the file
+      /// or directory.
+      /// @returns true if the pathname references a readable file.
+      /// @brief Determines if the path is a readable file or directory
+      /// in the file system.
+      bool canRead() const;
+
+      /// This function determines if the path name references a writable file
+      /// or directory in the file system. This function checks for the
+      /// existence and writability (by the current program) of the file or
+      /// directory.
+      /// @returns true if the pathname references a writable file.
+      /// @brief Determines if the path is a writable file or directory
+      /// in the file system.
+      bool canWrite() const;
+
+      /// This function checks that what we're trying to work only on a regular
+      /// file. Check for things like /dev/null, any block special file, or
+      /// other things that aren't "regular" regular files.
+      /// @returns true if the file is S_ISREG.
+      /// @brief Determines if the file is a regular file
+      bool isRegularFile() const;
+
+      /// This function determines if the path name references an executable
+      /// file in the file system. This function checks for the existence and
+      /// executability (by the current program) of the file.
+      /// @returns true if the pathname references an executable file.
+      /// @brief Determines if the path is an executable file in the file
+      /// system.
+      bool canExecute() const;
+
+      /// This function builds a list of paths that are the names of the
+      /// files and directories in a directory.
+      /// @returns true if an error occurs, true otherwise
+      /// @brief Build a list of directory's contents.
+      bool getDirectoryContents(
+        std::set<Path> &paths, ///< The resulting list of file & directory names
+        std::string* ErrMsg    ///< Optional place to return an error message.
+      ) const;
+
+    /// @}
+    /// @name Path Mutators
+    /// @{
+    public:
+      /// The path name is cleared and becomes empty. This is an invalid
+      /// path name but is the *only* invalid path name. This is provided
+      /// so that path objects can be used to indicate the lack of a
+      /// valid path being found.
+      /// @brief Make the path empty.
+      void clear() { path.clear(); }
+
+      /// This method sets the Path object to \p unverified_path. This can fail
+      /// if the \p unverified_path does not pass the syntactic checks of the
+      /// isValid() method. If verification fails, the Path object remains
+      /// unchanged and false is returned. Otherwise true is returned and the
+      /// Path object takes on the path value of \p unverified_path
+      /// @returns true if the path was set, false otherwise.
+      /// @param unverified_path The path to be set in Path object.
+      /// @brief Set a full path from a StringRef
+      bool set(StringRef unverified_path);
+
+      /// One path component is removed from the Path. If only one component is
+      /// present in the path, the Path object becomes empty. If the Path object
+      /// is empty, no change is made.
+      /// @returns false if the path component could not be removed.
+      /// @brief Removes the last directory component of the Path.
+      bool eraseComponent();
+
+      /// The \p component is added to the end of the Path if it is a legal
+      /// name for the operating system. A directory separator will be added if
+      /// needed.
+      /// @returns false if the path component could not be added.
+      /// @brief Appends one path component to the Path.
+      bool appendComponent(StringRef component);
+
+      /// A period and the \p suffix are appended to the end of the pathname.
+      /// When the \p suffix is empty, no action is performed.
+      /// @brief Adds a period and the \p suffix to the end of the pathname.
+      void appendSuffix(StringRef suffix);
+
+      /// The suffix of the filename is erased. The suffix begins with and
+      /// includes the last . character in the filename after the last directory
+      /// separator and extends until the end of the name. If no . character is
+      /// after the last directory separator, then the file name is left
+      /// unchanged (i.e. it was already without a suffix) but the function
+      /// returns false.
+      /// @returns false if there was no suffix to remove, true otherwise.
+      /// @brief Remove the suffix from a path name.
+      bool eraseSuffix();
+
+      /// The current Path name is made unique in the file system. Upon return,
+      /// the Path will have been changed to make a unique file in the file
+      /// system or it will not have been changed if the current path name is
+      /// already unique.
+      /// @throws std::string if an unrecoverable error occurs.
+      /// @brief Make the current path name unique in the file system.
+      bool makeUnique( bool reuse_current /*= true*/, std::string* ErrMsg );
+
+      /// The current Path name is made absolute by prepending the
+      /// current working directory if necessary.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        void makeAbsolute(),
+        LLVM_PATH_DEPRECATED_MSG(fs::make_absolute));
+
+    /// @}
+    /// @name Disk Mutators
+    /// @{
+    public:
+      /// This method attempts to make the file referenced by the Path object
+      /// available for reading so that the canRead() method will return true.
+      /// @brief Make the file readable;
+      bool makeReadableOnDisk(std::string* ErrMsg = 0);
+
+      /// This method attempts to make the file referenced by the Path object
+      /// available for writing so that the canWrite() method will return true.
+      /// @brief Make the file writable;
+      bool makeWriteableOnDisk(std::string* ErrMsg = 0);
+
+      /// This method attempts to make the file referenced by the Path object
+      /// available for execution so that the canExecute() method will return
+      /// true.
+      /// @brief Make the file readable;
+      bool makeExecutableOnDisk(std::string* ErrMsg = 0);
+
+      /// This method allows the last modified time stamp and permission bits
+      /// to be set on the disk object referenced by the Path.
+      /// @throws std::string if an error occurs.
+      /// @returns true on error.
+      /// @brief Set the status information.
+      bool setStatusInfoOnDisk(const FileStatus &SI,
+                               std::string *ErrStr = 0) const;
+
+      /// This method attempts to create a directory in the file system with the
+      /// same name as the Path object. The \p create_parents parameter controls
+      /// whether intermediate directories are created or not. if \p
+      /// create_parents is true, then an attempt will be made to create all
+      /// intermediate directories, as needed. If \p create_parents is false,
+      /// then only the final directory component of the Path name will be
+      /// created. The created directory will have no entries.
+      /// @returns true if the directory could not be created, false otherwise
+      /// @brief Create the directory this Path refers to.
+      bool createDirectoryOnDisk(
+        bool create_parents = false, ///<  Determines whether non-existent
+           ///< directory components other than the last one (the "parents")
+           ///< are created or not.
+        std::string* ErrMsg = 0 ///< Optional place to put error messages.
+      );
+
+      /// This method attempts to create a file in the file system with the same
+      /// name as the Path object. The intermediate directories must all exist
+      /// at the time this method is called. Use createDirectoriesOnDisk to
+      /// accomplish that. The created file will be empty upon return from this
+      /// function.
+      /// @returns true if the file could not be created, false otherwise.
+      /// @brief Create the file this Path refers to.
+      bool createFileOnDisk(
+        std::string* ErrMsg = 0 ///< Optional place to put error messages.
+      );
+
+      /// This is like createFile except that it creates a temporary file. A
+      /// unique temporary file name is generated based on the contents of
+      /// \p this before the call. The new name is assigned to \p this and the
+      /// file is created.  Note that this will both change the Path object
+      /// *and* create the corresponding file. This function will ensure that
+      /// the newly generated temporary file name is unique in the file system.
+      /// @returns true if the file couldn't be created, false otherwise.
+      /// @brief Create a unique temporary file
+      bool createTemporaryFileOnDisk(
+        bool reuse_current = false, ///< When set to true, this parameter
+          ///< indicates that if the current file name does not exist then
+          ///< it will be used without modification.
+        std::string* ErrMsg = 0 ///< Optional place to put error messages
+      );
+
+      /// This method renames the file referenced by \p this as \p newName. The
+      /// file referenced by \p this must exist. The file referenced by
+      /// \p newName does not need to exist.
+      /// @returns true on error, false otherwise
+      /// @brief Rename one file as another.
+      bool renamePathOnDisk(const Path& newName, std::string* ErrMsg);
+
+      /// This method attempts to destroy the file or directory named by the
+      /// last component of the Path. If the Path refers to a directory and the
+      /// \p destroy_contents is false, an attempt will be made to remove just
+      /// the directory (the final Path component). If \p destroy_contents is
+      /// true, an attempt will be made to remove the entire contents of the
+      /// directory, recursively. If the Path refers to a file, the
+      /// \p destroy_contents parameter is ignored.
+      /// @param destroy_contents Indicates whether the contents of a destroyed
+      /// @param Err An optional string to receive an error message.
+      /// directory should also be destroyed (recursively).
+      /// @returns false if the file/directory was destroyed, true on error.
+      /// @brief Removes the file or directory from the filesystem.
+      bool eraseFromDisk(bool destroy_contents = false,
+                         std::string *Err = 0) const;
+
+
+      /// MapInFilePages - This is a low level system API to map in the file
+      /// that is currently opened as FD into the current processes' address
+      /// space for read only access.  This function may return null on failure
+      /// or if the system cannot provide the following constraints:
+      ///  1) The pages must be valid after the FD is closed, until
+      ///     UnMapFilePages is called.
+      ///  2) Any padding after the end of the file must be zero filled, if
+      ///     present.
+      ///  3) The pages must be contiguous.
+      ///
+      /// This API is not intended for general use, clients should use
+      /// MemoryBuffer::getFile instead.
+      static const char *MapInFilePages(int FD, uint64_t FileSize);
+
+      /// UnMapFilePages - Free pages mapped into the current process by
+      /// MapInFilePages.
+      ///
+      /// This API is not intended for general use, clients should use
+      /// MemoryBuffer::getFile instead.
+      static void UnMapFilePages(const char *Base, uint64_t FileSize);
+
+    /// @}
+    /// @name Data
+    /// @{
+    protected:
+      // Our win32 implementation relies on this string being mutable.
+      mutable std::string path;   ///< Storage for the path name.
+
+
+    /// @}
+  };
+
+  /// This class is identical to Path class except it allows you to obtain the
+  /// file status of the Path as well. The reason for the distinction is one of
+  /// efficiency. First, the file status requires additional space and the space
+  /// is incorporated directly into PathWithStatus without an additional malloc.
+  /// Second, obtaining status information is an expensive operation on most
+  /// operating systems so we want to be careful and explicit about where we
+  /// allow this operation in LLVM.
+  /// @brief Path with file status class.
+  class PathWithStatus : public Path {
+    /// @name Constructors
+    /// @{
+    public:
+      /// @brief Default constructor
+      PathWithStatus() : Path(), status(), fsIsValid(false) {}
+
+      /// @brief Copy constructor
+      PathWithStatus(const PathWithStatus &that)
+        : Path(static_cast<const Path&>(that)), status(that.status),
+           fsIsValid(that.fsIsValid) {}
+
+      /// This constructor allows construction from a Path object
+      /// @brief Path constructor
+      PathWithStatus(const Path &other)
+        : Path(other), status(), fsIsValid(false) {}
+
+      /// This constructor will accept a char* or std::string as a path. No
+      /// checking is done on this path to determine if it is valid. To
+      /// determine validity of the path, use the isValid method.
+      /// @brief Construct a Path from a string.
+      explicit PathWithStatus(
+        StringRef p ///< The path to assign.
+      ) : Path(p), status(), fsIsValid(false) {}
+
+      /// This constructor will accept a character range as a path.  No checking
+      /// is done on this path to determine if it is valid.  To determine
+      /// validity of the path, use the isValid method.
+      /// @brief Construct a Path from a string.
+      explicit PathWithStatus(
+        const char *StrStart,  ///< Pointer to the first character of the path
+        unsigned StrLen        ///< Length of the path.
+      ) : Path(StrStart, StrLen), status(), fsIsValid(false) {}
+
+      /// Makes a copy of \p that to \p this.
+      /// @returns \p this
+      /// @brief Assignment Operator
+      PathWithStatus &operator=(const PathWithStatus &that) {
+        static_cast<Path&>(*this) = static_cast<const Path&>(that);
+        status = that.status;
+        fsIsValid = that.fsIsValid;
+        return *this;
+      }
+
+      /// Makes a copy of \p that to \p this.
+      /// @returns \p this
+      /// @brief Assignment Operator
+      PathWithStatus &operator=(const Path &that) {
+        static_cast<Path&>(*this) = static_cast<const Path&>(that);
+        fsIsValid = false;
+        return *this;
+      }
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+      /// This function returns status information about the file. The type of
+      /// path (file or directory) is updated to reflect the actual contents
+      /// of the file system.
+      /// @returns 0 on failure, with Error explaining why (if non-zero)
+      /// @returns a pointer to a FileStatus structure on success.
+      /// @brief Get file status.
+      const FileStatus *getFileStatus(
+        bool forceUpdate = false, ///< Force an update from the file system
+        std::string *Error = 0    ///< Optional place to return an error msg.
+      ) const;
+
+    /// @}
+    /// @name Data
+    /// @{
+    private:
+      mutable FileStatus status; ///< Status information.
+      mutable bool fsIsValid;    ///< Whether we've obtained it or not
+
+    /// @}
+  };
+
+  /// This enumeration delineates the kinds of files that LLVM knows about.
+  enum LLVMFileType {
+    Unknown_FileType = 0,              ///< Unrecognized file
+    Bitcode_FileType,                  ///< Bitcode file
+    Archive_FileType,                  ///< ar style archive file
+    ELF_Relocatable_FileType,          ///< ELF Relocatable object file
+    ELF_Executable_FileType,           ///< ELF Executable image
+    ELF_SharedObject_FileType,         ///< ELF dynamically linked shared lib
+    ELF_Core_FileType,                 ///< ELF core image
+    Mach_O_Object_FileType,            ///< Mach-O Object file
+    Mach_O_Executable_FileType,        ///< Mach-O Executable
+    Mach_O_FixedVirtualMemorySharedLib_FileType, ///< Mach-O Shared Lib, FVM
+    Mach_O_Core_FileType,              ///< Mach-O Core File
+    Mach_O_PreloadExecutable_FileType, ///< Mach-O Preloaded Executable
+    Mach_O_DynamicallyLinkedSharedLib_FileType, ///< Mach-O dynlinked shared lib
+    Mach_O_DynamicLinker_FileType,     ///< The Mach-O dynamic linker
+    Mach_O_Bundle_FileType,            ///< Mach-O Bundle file
+    Mach_O_DynamicallyLinkedSharedLibStub_FileType, ///< Mach-O Shared lib stub
+    COFF_FileType                      ///< COFF object file or lib
+  };
+
+  /// This utility function allows any memory block to be examined in order
+  /// to determine its file type.
+  LLVMFileType IdentifyFileType(const char*magic, unsigned length);
+
+  /// This function can be used to copy the file specified by Src to the
+  /// file specified by Dest. If an error occurs, Dest is removed.
+  /// @returns true if an error occurs, false otherwise
+  /// @brief Copy one file to another.
+  bool CopyFile(const Path& Dest, const Path& Src, std::string* ErrMsg);
+
+  /// This is the OS-specific path separator: a colon on Unix or a semicolon
+  /// on Windows.
+  extern const char PathSeparator;
+}
+
+}
+
+#endif
diff --git a/include/llvm/Support/PathV2.h b/include/llvm/Support/PathV2.h
new file mode 100644
index 000000000000..251563398fb4
--- /dev/null
+++ b/include/llvm/Support/PathV2.h
@@ -0,0 +1,347 @@
+//===- llvm/Support/PathV2.h - Path Operating System Concept ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::path namespace. It is designed after
+// TR2/boost filesystem (v3), but modified to remove exception handling and the
+// path class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PATHV2_H
+#define LLVM_SUPPORT_PATHV2_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/DataTypes.h"
+#include <iterator>
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+/// @name Lexical Component Iterator
+/// @{
+
+/// @brief Path iterator.
+///
+/// This is a bidirectional iterator that iterates over the individual
+/// components in \a path. The forward traversal order is as follows:
+/// * The root-name element, if present.
+/// * The root-directory element, if present.
+/// * Each successive filename element, if present.
+/// * Dot, if one or more trailing non-root slash characters are present.
+/// The backwards traversal order is the reverse of forward traversal.
+///
+/// Iteration examples. Each component is separated by ',':
+/// /          => /
+/// /foo       => /,foo
+/// foo/       => foo,.
+/// /foo/bar   => /,foo,bar
+/// ../        => ..,.
+/// C:\foo\bar => C:,/,foo,bar
+///
+class const_iterator {
+  StringRef Path;      //< The entire path.
+  StringRef Component; //< The current component. Not necessarily in Path.
+  size_t    Position;  //< The iterators current position within Path.
+
+  // An end iterator has Position = Path.size() + 1.
+  friend const_iterator begin(StringRef path);
+  friend const_iterator end(StringRef path);
+
+public:
+  typedef const StringRef value_type;
+  typedef ptrdiff_t difference_type;
+  typedef value_type &reference;
+  typedef value_type *pointer;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  reference operator*() const { return Component; }
+  pointer   operator->() const { return &Component; }
+  const_iterator &operator++();    // preincrement
+  const_iterator &operator++(int); // postincrement
+  const_iterator &operator--();    // predecrement
+  const_iterator &operator--(int); // postdecrement
+  bool operator==(const const_iterator &RHS) const;
+  bool operator!=(const const_iterator &RHS) const;
+
+  /// @brief Difference in bytes between this and RHS.
+  ptrdiff_t operator-(const const_iterator &RHS) const;
+};
+
+typedef std::reverse_iterator<const_iterator> reverse_iterator;
+
+/// @brief Get begin iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized with the first component of \a path.
+const_iterator begin(StringRef path);
+
+/// @brief Get end iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized to the end of \a path.
+const_iterator end(StringRef path);
+
+/// @brief Get reverse begin iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized with the first reverse component of \a path.
+inline reverse_iterator rbegin(StringRef path) {
+  return reverse_iterator(end(path));
+}
+
+/// @brief Get reverse end iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized to the reverse end of \a path.
+inline reverse_iterator rend(StringRef path) {
+  return reverse_iterator(begin(path));
+}
+
+/// @}
+/// @name Lexical Modifiers
+/// @{
+
+/// @brief Remove the last component from \a path unless it is the root dir.
+///
+/// directory/filename.cpp => directory/
+/// directory/             => directory
+/// /                      => /
+///
+/// @param path A path that is modified to not have a file component.
+void remove_filename(SmallVectorImpl<char> &path);
+
+/// @brief Replace the file extension of \a path with \a extension.
+///
+/// ./filename.cpp => ./filename.extension
+/// ./filename     => ./filename.extension
+/// ./             => ./.extension
+///
+/// @param path A path that has its extension replaced with \a extension.
+/// @param extension The extension to be added. It may be empty. It may also
+///                  optionally start with a '.', if it does not, one will be
+///                  prepended.
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension);
+
+/// @brief Append to path.
+///
+/// /foo  + bar/f => /foo/bar/f
+/// /foo/ + bar/f => /foo/bar/f
+/// foo   + bar/f => foo/bar/f
+///
+/// @param path Set to \a path + \a component.
+/// @param component The component to be appended to \a path.
+void append(SmallVectorImpl<char> &path, const Twine &a,
+                                         const Twine &b = "",
+                                         const Twine &c = "",
+                                         const Twine &d = "");
+
+/// @brief Append to path.
+///
+/// /foo  + [bar,f] => /foo/bar/f
+/// /foo/ + [bar,f] => /foo/bar/f
+/// foo   + [bar,f] => foo/bar/f
+///
+/// @param path Set to \a path + [\a begin, \a end).
+/// @param begin Start of components to append.
+/// @param end One past the end of components to append.
+void append(SmallVectorImpl<char> &path,
+            const_iterator begin, const_iterator end);
+
+/// @}
+/// @name Transforms (or some other better name)
+/// @{
+
+/// Convert path to the native form. This is used to give paths to users and
+/// operating system calls in the platform's normal way. For example, on Windows
+/// all '/' are converted to '\'.
+///
+/// @param path A path that is transformed to native format.
+/// @param result Holds the result of the transformation.
+void native(const Twine &path, SmallVectorImpl<char> &result);
+
+/// @}
+/// @name Lexical Observers
+/// @{
+
+/// @brief Get root name.
+///
+/// //net/hello => //net
+/// c:/hello    => c: (on Windows, on other platforms nothing)
+/// /hello      => <empty>
+///
+/// @param path Input path.
+/// @result The root name of \a path if it has one, otherwise "".
+const StringRef root_name(StringRef path);
+
+/// @brief Get root directory.
+///
+/// /goo/hello => /
+/// c:/hello   => /
+/// d/file.txt => <empty>
+///
+/// @param path Input path.
+/// @result The root directory of \a path if it has one, otherwise
+///               "".
+const StringRef root_directory(StringRef path);
+
+/// @brief Get root path.
+///
+/// Equivalent to root_name + root_directory.
+///
+/// @param path Input path.
+/// @result The root path of \a path if it has one, otherwise "".
+const StringRef root_path(StringRef path);
+
+/// @brief Get relative path.
+///
+/// C:\hello\world => hello\world
+/// foo/bar        => foo/bar
+/// /foo/bar       => foo/bar
+///
+/// @param path Input path.
+/// @result The path starting after root_path if one exists, otherwise "".
+const StringRef relative_path(StringRef path);
+
+/// @brief Get parent path.
+///
+/// /          => <empty>
+/// /foo       => /
+/// foo/../bar => foo/..
+///
+/// @param path Input path.
+/// @result The parent path of \a path if one exists, otherwise "".
+const StringRef parent_path(StringRef path);
+
+/// @brief Get filename.
+///
+/// /foo.txt    => foo.txt
+/// .          => .
+/// ..         => ..
+/// /          => /
+///
+/// @param path Input path.
+/// @result The filename part of \a path. This is defined as the last component
+///         of \a path.
+const StringRef filename(StringRef path);
+
+/// @brief Get stem.
+///
+/// If filename contains a dot but not solely one or two dots, result is the
+/// substring of filename ending at (but not including) the last dot. Otherwise
+/// it is filename.
+///
+/// /foo/bar.txt => bar
+/// /foo/bar     => bar
+/// /foo/.txt    => <empty>
+/// /foo/.       => .
+/// /foo/..      => ..
+///
+/// @param path Input path.
+/// @result The stem of \a path.
+const StringRef stem(StringRef path);
+
+/// @brief Get extension.
+///
+/// If filename contains a dot but not solely one or two dots, result is the
+/// substring of filename starting at (and including) the last dot, and ending
+/// at the end of \a path. Otherwise "".
+///
+/// /foo/bar.txt => .txt
+/// /foo/bar     => <empty>
+/// /foo/.txt    => .txt
+///
+/// @param path Input path.
+/// @result The extension of \a path.
+const StringRef extension(StringRef path);
+
+/// @brief Check whether the given char is a path separator on the host OS.
+///
+/// @param value a character
+/// @result true if \a value is a path separator character on the host OS
+bool is_separator(char value);
+
+/// @brief Has root name?
+///
+/// root_name != ""
+///
+/// @param path Input path.
+/// @result True if the path has a root name, false otherwise.
+bool has_root_name(const Twine &path);
+
+/// @brief Has root directory?
+///
+/// root_directory != ""
+///
+/// @param path Input path.
+/// @result True if the path has a root directory, false otherwise.
+bool has_root_directory(const Twine &path);
+
+/// @brief Has root path?
+///
+/// root_path != ""
+///
+/// @param path Input path.
+/// @result True if the path has a root path, false otherwise.
+bool has_root_path(const Twine &path);
+
+/// @brief Has relative path?
+///
+/// relative_path != ""
+///
+/// @param path Input path.
+/// @result True if the path has a relative path, false otherwise.
+bool has_relative_path(const Twine &path);
+
+/// @brief Has parent path?
+///
+/// parent_path != ""
+///
+/// @param path Input path.
+/// @result True if the path has a parent path, false otherwise.
+bool has_parent_path(const Twine &path);
+
+/// @brief Has filename?
+///
+/// filename != ""
+///
+/// @param path Input path.
+/// @result True if the path has a filename, false otherwise.
+bool has_filename(const Twine &path);
+
+/// @brief Has stem?
+///
+/// stem != ""
+///
+/// @param path Input path.
+/// @result True if the path has a stem, false otherwise.
+bool has_stem(const Twine &path);
+
+/// @brief Has extension?
+///
+/// extension != ""
+///
+/// @param path Input path.
+/// @result True if the path has a extension, false otherwise.
+bool has_extension(const Twine &path);
+
+/// @brief Is path absolute?
+///
+/// @param path Input path.
+/// @result True if the path is absolute, false if it is not.
+bool is_absolute(const Twine &path);
+
+/// @brief Is path relative?
+///
+/// @param path Input path.
+/// @result True if the path is relative, false if it is not.
+bool is_relative(const Twine &path);
+
+} // end namespace path
+} // end namespace sys
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index bee676863780..948ae5176eeb 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -41,18 +41,62 @@ bool match(Val *V, const Pattern &P) {
 }
 
 template<typename Class>
-struct leaf_ty {
+struct class_match {
   template<typename ITy>
   bool match(ITy *V) { return isa<Class>(V); }
 };
 
 /// m_Value() - Match an arbitrary value and ignore it.
-inline leaf_ty<Value> m_Value() { return leaf_ty<Value>(); }
+inline class_match<Value> m_Value() { return class_match<Value>(); }
 /// m_ConstantInt() - Match an arbitrary ConstantInt and ignore it.
-inline leaf_ty<ConstantInt> m_ConstantInt() { return leaf_ty<ConstantInt>(); }
+inline class_match<ConstantInt> m_ConstantInt() {
+  return class_match<ConstantInt>();
+}
+/// m_Undef() - Match an arbitrary undef constant.
+inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
 
+inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
+  
+struct match_zero {
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const Constant *C = dyn_cast<Constant>(V))
+      return C->isNullValue();
+    return false;
+  }
+};
+  
+/// m_Zero() - Match an arbitrary zero/null constant.  This includes
+/// zero_initializer for vectors and ConstantPointerNull for pointers.
+inline match_zero m_Zero() { return match_zero(); }
+  
+  
+struct apint_match {
+  const APInt *&Res;
+  apint_match(const APInt *&R) : Res(R) {}
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      Res = &CI->getValue();
+      return true;
+    }
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+      if (ConstantInt *CI =
+          dyn_cast_or_null<ConstantInt>(CV->getSplatValue())) {
+        Res = &CI->getValue();
+        return true;
+      }
+    return false;
+  }
+};
+  
+/// m_APInt - Match a ConstantInt or splatted ConstantVector, binding the
+/// specified pointer to the contained APInt.
+inline apint_match m_APInt(const APInt *&Res) { return Res; }
+
+  
 template<int64_t Val>
-struct constantint_ty {
+struct constantint_match {
   template<typename ITy>
   bool match(ITy *V) {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -68,37 +112,82 @@ struct constantint_ty {
   }
 };
 
-/// m_ConstantInt(int64_t) - Match a ConstantInt with a specific value
-/// and ignore it.
+/// m_ConstantInt<int64_t> - Match a ConstantInt with a specific value.
 template<int64_t Val>
-inline constantint_ty<Val> m_ConstantInt() {
-  return constantint_ty<Val>();
+inline constantint_match<Val> m_ConstantInt() {
+  return constantint_match<Val>();
 }
 
-struct zero_ty {
+/// cst_pred_ty - This helper class is used to match scalar and vector constants
+/// that satisfy a specified predicate.
+template<typename Predicate>
+struct cst_pred_ty : public Predicate {
   template<typename ITy>
   bool match(ITy *V) {
-    if (const Constant *C = dyn_cast<Constant>(V))
-      return C->isNullValue();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return this->isValue(CI->getValue());
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
+        return this->isValue(CI->getValue());
     return false;
   }
 };
-
-/// m_Zero() - Match an arbitrary zero/null constant.
-inline zero_ty m_Zero() { return zero_ty(); }
-
-struct one_ty {
+  
+/// api_pred_ty - This helper class is used to match scalar and vector constants
+/// that satisfy a specified predicate, and bind them to an APInt.
+template<typename Predicate>
+struct api_pred_ty : public Predicate {
+  const APInt *&Res;
+  api_pred_ty(const APInt *&R) : Res(R) {}
   template<typename ITy>
   bool match(ITy *V) {
-    if (const ConstantInt *C = dyn_cast<ConstantInt>(V))
-      return C->isOne();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      if (this->isValue(CI->getValue())) {
+        Res = &CI->getValue();
+        return true;
+      }
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
+        if (this->isValue(CI->getValue())) {
+          Res = &CI->getValue();
+          return true;
+        }
     return false;
   }
 };
+  
+  
+struct is_one {
+  bool isValue(const APInt &C) { return C == 1; }
+};
 
-/// m_One() - Match a an integer 1.
-inline one_ty m_One() { return one_ty(); }
+/// m_One() - Match an integer 1 or a vector with all elements equal to 1.
+inline cst_pred_ty<is_one> m_One() { return cst_pred_ty<is_one>(); }
+inline api_pred_ty<is_one> m_One(const APInt *&V) { return V; }
+    
+struct is_all_ones {
+  bool isValue(const APInt &C) { return C.isAllOnesValue(); }
+};
   
+/// m_AllOnes() - Match an integer or vector with all bits set to true.
+inline cst_pred_ty<is_all_ones> m_AllOnes() {return cst_pred_ty<is_all_ones>();}
+inline api_pred_ty<is_all_ones> m_AllOnes(const APInt *&V) { return V; }
+
+struct is_sign_bit {
+  bool isValue(const APInt &C) { return C.isSignBit(); }
+};
+
+/// m_SignBit() - Match an integer or vector with only the sign bit(s) set.
+inline cst_pred_ty<is_sign_bit> m_SignBit() {return cst_pred_ty<is_sign_bit>();}
+inline api_pred_ty<is_sign_bit> m_SignBit(const APInt *&V) { return V; }
+
+struct is_power2 {
+  bool isValue(const APInt &C) { return C.isPowerOf2(); }
+};
+
+/// m_Power2() - Match an integer or vector power of 2.
+inline cst_pred_ty<is_power2> m_Power2() { return cst_pred_ty<is_power2>(); }
+inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { return V; }
 
 template<typename Class>
 struct bind_ty {
@@ -121,6 +210,9 @@ inline bind_ty<Value> m_Value(Value *&V) { return V; }
 /// m_ConstantInt - Match a ConstantInt, capturing the value if we match.
 inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
 
+/// m_Constant - Match a Constant, capturing the value if we match.
+inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
+
 /// specificval_ty - Match a specified Value*.
 struct specificval_ty {
   const Value *Val;
@@ -140,8 +232,7 @@ inline specificval_ty m_Specific(const Value *V) { return V; }
 // Matchers for specific binary operators.
 //
 
-template<typename LHS_t, typename RHS_t,
-         unsigned Opcode, typename ConcreteTy = BinaryOperator>
+template<typename LHS_t, typename RHS_t, unsigned Opcode>
 struct BinaryOp_match {
   LHS_t L;
   RHS_t R;
@@ -151,9 +242,8 @@ struct BinaryOp_match {
   template<typename OpTy>
   bool match(OpTy *V) {
     if (V->getValueID() == Value::InstructionVal + Opcode) {
-      ConcreteTy *I = cast<ConcreteTy>(V);
-      return I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
-             R.match(I->getOperand(1));
+      BinaryOperator *I = cast<BinaryOperator>(V);
+      return L.match(I->getOperand(0)) && R.match(I->getOperand(1));
     }
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
       return CE->getOpcode() == Opcode && L.match(CE->getOperand(0)) &&
@@ -163,193 +253,156 @@ struct BinaryOp_match {
 };
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::Add>
+m_Add(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::FAdd>
+m_FAdd(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::Sub>
+m_Sub(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::FSub>
+m_FSub(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::Mul>
+m_Mul(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::FMul>
+m_FMul(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::UDiv>
+m_UDiv(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::SDiv>
+m_SDiv(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::FDiv>
+m_FDiv(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::URem>
+m_URem(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::SRem>
+m_SRem(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::FRem>
+m_FRem(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::And>
+m_And(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
-                                                      const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::Or>
+m_Or(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::Xor>
+m_Xor(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
-                                                        const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::Shl>
+m_Shl(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::LShr>
+m_LShr(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
-                                                          const RHS &R) {
+inline BinaryOp_match<LHS, RHS, Instruction::AShr>
+m_AShr(const LHS &L, const RHS &R) {
   return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
 }
 
 //===----------------------------------------------------------------------===//
-// Matchers for either AShr or LShr .. for convenience
+// Class that matches two different binary ops.
 //
-template<typename LHS_t, typename RHS_t, typename ConcreteTy = BinaryOperator>
-struct Shr_match {
+template<typename LHS_t, typename RHS_t, unsigned Opc1, unsigned Opc2>
+struct BinOp2_match {
   LHS_t L;
   RHS_t R;
 
-  Shr_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
+  BinOp2_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
 
   template<typename OpTy>
   bool match(OpTy *V) {
-    if (V->getValueID() == Value::InstructionVal + Instruction::LShr ||
-        V->getValueID() == Value::InstructionVal + Instruction::AShr) {
-      ConcreteTy *I = cast<ConcreteTy>(V);
-      return (I->getOpcode() == Instruction::AShr ||
-              I->getOpcode() == Instruction::LShr) &&
-             L.match(I->getOperand(0)) &&
-             R.match(I->getOperand(1));
+    if (V->getValueID() == Value::InstructionVal + Opc1 ||
+        V->getValueID() == Value::InstructionVal + Opc2) {
+      BinaryOperator *I = cast<BinaryOperator>(V);
+      return L.match(I->getOperand(0)) && R.match(I->getOperand(1));
     }
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      return (CE->getOpcode() == Instruction::LShr ||
-              CE->getOpcode() == Instruction::AShr) &&
-             L.match(CE->getOperand(0)) &&
-             R.match(CE->getOperand(1));
+      return (CE->getOpcode() == Opc1 || CE->getOpcode() == Opc2) &&
+             L.match(CE->getOperand(0)) && R.match(CE->getOperand(1));
     return false;
   }
 };
 
+/// m_Shr - Matches LShr or AShr.
 template<typename LHS, typename RHS>
-inline Shr_match<LHS, RHS> m_Shr(const LHS &L, const RHS &R) {
-  return Shr_match<LHS, RHS>(L, R);
+inline BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::AShr>
+m_Shr(const LHS &L, const RHS &R) {
+  return BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::AShr>(L, R);
 }
 
-//===----------------------------------------------------------------------===//
-// Matchers for binary classes
-//
-
-template<typename LHS_t, typename RHS_t, typename Class, typename OpcType>
-struct BinaryOpClass_match {
-  OpcType *Opcode;
-  LHS_t L;
-  RHS_t R;
-
-  BinaryOpClass_match(OpcType &Op, const LHS_t &LHS,
-                      const RHS_t &RHS)
-    : Opcode(&Op), L(LHS), R(RHS) {}
-  BinaryOpClass_match(const LHS_t &LHS, const RHS_t &RHS)
-    : Opcode(0), L(LHS), R(RHS) {}
-
-  template<typename OpTy>
-  bool match(OpTy *V) {
-    if (Class *I = dyn_cast<Class>(V))
-      if (L.match(I->getOperand(0)) &&
-          R.match(I->getOperand(1))) {
-        if (Opcode)
-          *Opcode = I->getOpcode();
-        return true;
-      }
-#if 0  // Doesn't handle constantexprs yet!
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      return CE->getOpcode() == Opcode && L.match(CE->getOperand(0)) &&
-             R.match(CE->getOperand(1));
-#endif
-    return false;
-  }
-};
-
+/// m_LogicalShift - Matches LShr or Shl.
 template<typename LHS, typename RHS>
-inline BinaryOpClass_match<LHS, RHS, BinaryOperator, Instruction::BinaryOps>
-m_Shift(Instruction::BinaryOps &Op, const LHS &L, const RHS &R) {
-  return BinaryOpClass_match<LHS, RHS,
-                             BinaryOperator, Instruction::BinaryOps>(Op, L, R);
+inline BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::Shl>
+m_LogicalShift(const LHS &L, const RHS &R) {
+  return BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::Shl>(L, R);
 }
 
+/// m_IDiv - Matches UDiv and SDiv.
 template<typename LHS, typename RHS>
-inline BinaryOpClass_match<LHS, RHS, BinaryOperator, Instruction::BinaryOps>
-m_Shift(const LHS &L, const RHS &R) {
-  return BinaryOpClass_match<LHS, RHS,
-                             BinaryOperator, Instruction::BinaryOps>(L, R);
+inline BinOp2_match<LHS, RHS, Instruction::SDiv, Instruction::UDiv>
+m_IDiv(const LHS &L, const RHS &R) {
+  return BinOp2_match<LHS, RHS, Instruction::SDiv, Instruction::UDiv>(L, R);
 }
 
 //===----------------------------------------------------------------------===//
@@ -362,15 +415,13 @@ struct CmpClass_match {
   LHS_t L;
   RHS_t R;
 
-  CmpClass_match(PredicateTy &Pred, const LHS_t &LHS,
-                 const RHS_t &RHS)
+  CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
     : Predicate(Pred), L(LHS), R(RHS) {}
 
   template<typename OpTy>
   bool match(OpTy *V) {
     if (Class *I = dyn_cast<Class>(V))
-      if (L.match(I->getOperand(0)) &&
-          R.match(I->getOperand(1))) {
+      if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
         Predicate = I->getPredicate();
         return true;
       }
@@ -425,11 +476,9 @@ m_Select(const Cond &C, const LHS &L, const RHS &R) {
 /// m_SelectCst - This matches a select of two constants, e.g.:
 ///    m_SelectCst<-1, 0>(m_Value(V))
 template<int64_t L, int64_t R, typename Cond>
-inline SelectClass_match<Cond, constantint_ty<L>, constantint_ty<R> >
+inline SelectClass_match<Cond, constantint_match<L>, constantint_match<R> >
 m_SelectCst(const Cond &C) {
-  return SelectClass_match<Cond, constantint_ty<L>,
-                           constantint_ty<R> >(C, m_ConstantInt<L>(),
-                                           m_ConstantInt<R>());
+  return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
 }
 
 
@@ -507,20 +556,14 @@ struct not_match {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
       if (CE->getOpcode() == Instruction::Xor)
         return matchIfNot(CE->getOperand(0), CE->getOperand(1));
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-      return L.match(ConstantExpr::getNot(CI));
     return false;
   }
 private:
   bool matchIfNot(Value *LHS, Value *RHS) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
       return CI->isAllOnesValue() && L.match(LHS);
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(LHS))
-      return CI->isAllOnesValue() && L.match(RHS);
     if (ConstantVector *CV = dyn_cast<ConstantVector>(RHS))
       return CV->isAllOnesValue() && L.match(LHS);
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(LHS))
-      return CV->isAllOnesValue() && L.match(RHS);
     return false;
   }
 };
@@ -543,17 +586,17 @@ struct neg_match {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
       if (CE->getOpcode() == Instruction::Sub)
         return matchIfNeg(CE->getOperand(0), CE->getOperand(1));
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-      return L.match(ConstantExpr::getNeg(CI));
     return false;
   }
 private:
   bool matchIfNeg(Value *LHS, Value *RHS) {
-    return LHS == ConstantFP::getZeroValueForNegation(LHS->getType()) &&
-           L.match(RHS);
+    if (ConstantInt *C = dyn_cast<ConstantInt>(LHS))
+      return C->isZero() && L.match(RHS);
+    return false;
   }
 };
 
+/// m_Neg - Match an integer negate.
 template<typename LHS>
 inline neg_match<LHS> m_Neg(const LHS &L) { return L; }
 
@@ -572,23 +615,23 @@ struct fneg_match {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
       if (CE->getOpcode() == Instruction::FSub)
         return matchIfFNeg(CE->getOperand(0), CE->getOperand(1));
-    if (ConstantFP *CF = dyn_cast<ConstantFP>(V))
-      return L.match(ConstantExpr::getFNeg(CF));
     return false;
   }
 private:
   bool matchIfFNeg(Value *LHS, Value *RHS) {
-    return LHS == ConstantFP::getZeroValueForNegation(LHS->getType()) &&
-           L.match(RHS);
+    if (ConstantFP *C = dyn_cast<ConstantFP>(LHS))
+      return C->isNegativeZeroValue() && L.match(RHS);
+    return false;
   }
 };
 
+/// m_FNeg - Match a floating point negate.
 template<typename LHS>
 inline fneg_match<LHS> m_FNeg(const LHS &L) { return L; }
 
 
 //===----------------------------------------------------------------------===//
-// Matchers for control flow
+// Matchers for control flow.
 //
 
 template<typename Cond_t>
@@ -602,12 +645,10 @@ struct brc_match {
   template<typename OpTy>
   bool match(OpTy *V) {
     if (BranchInst *BI = dyn_cast<BranchInst>(V))
-      if (BI->isConditional()) {
-        if (Cond.match(BI->getCondition())) {
-          T = BI->getSuccessor(0);
-          F = BI->getSuccessor(1);
-          return true;
-        }
+      if (BI->isConditional() && Cond.match(BI->getCondition())) {
+        T = BI->getSuccessor(0);
+        F = BI->getSuccessor(1);
+        return true;
       }
     return false;
   }
diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h
index b85140480064..837082139214 100644
--- a/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/include/llvm/Support/PointerLikeTypeTraits.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   
diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h
new file mode 100644
index 000000000000..33799229ff35
--- /dev/null
+++ b/include/llvm/Support/Process.h
@@ -0,0 +1,146 @@
+//===- llvm/Support/Process.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_PROCESS_H
+#define LLVM_SYSTEM_PROCESS_H
+
+#include "llvm/Support/TimeValue.h"
+
+namespace llvm {
+namespace sys {
+
+  /// This class provides an abstraction for getting information about the
+  /// currently executing process.
+  /// @since 1.4
+  /// @brief An abstraction for operating system processes.
+  class Process {
+    /// @name Accessors
+    /// @{
+    public:
+      /// This static function will return the operating system's virtual memory
+      /// page size.
+      /// @returns The number of bytes in a virtual memory page.
+      /// @brief Get the virtual memory page size
+      static unsigned GetPageSize();
+
+      /// This static function will return the total amount of memory allocated
+      /// by the process. This only counts the memory allocated via the malloc,
+      /// calloc and realloc functions and includes any "free" holes in the
+      /// allocated space.
+      /// @brief Return process memory usage.
+      static size_t GetMallocUsage();
+
+      /// This static function will return the total memory usage of the
+      /// process. This includes code, data, stack and mapped pages usage. Notei
+      /// that the value returned here is not necessarily the Running Set Size,
+      /// it is the total virtual memory usage, regardless of mapped state of
+      /// that memory.
+      static size_t GetTotalMemoryUsage();
+
+      /// This static function will set \p user_time to the amount of CPU time
+      /// spent in user (non-kernel) mode and \p sys_time to the amount of CPU
+      /// time spent in system (kernel) mode.  If the operating system does not
+      /// support collection of these metrics, a zero TimeValue will be for both
+      /// values.
+      static void GetTimeUsage(
+        TimeValue& elapsed,
+          ///< Returns the TimeValue::now() giving current time
+        TimeValue& user_time,
+          ///< Returns the current amount of user time for the process
+        TimeValue& sys_time
+          ///< Returns the current amount of system time for the process
+      );
+
+      /// This static function will return the process' current user id number.
+      /// Not all operating systems support this feature. Where it is not
+      /// supported, the function should return 65536 as the value.
+      static int GetCurrentUserId();
+
+      /// This static function will return the process' current group id number.
+      /// Not all operating systems support this feature. Where it is not
+      /// supported, the function should return 65536 as the value.
+      static int GetCurrentGroupId();
+
+      /// This function makes the necessary calls to the operating system to
+      /// prevent core files or any other kind of large memory dumps that can
+      /// occur when a program fails.
+      /// @brief Prevent core file generation.
+      static void PreventCoreFiles();
+
+      /// This function determines if the standard input is connected directly
+      /// to a user's input (keyboard probably), rather than coming from a file
+      /// or pipe.
+      static bool StandardInIsUserInput();
+
+      /// This function determines if the standard output is connected to a
+      /// "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool StandardOutIsDisplayed();
+
+      /// This function determines if the standard error is connected to a
+      /// "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool StandardErrIsDisplayed();
+
+      /// This function determines if the given file descriptor is connected to
+      /// a "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool FileDescriptorIsDisplayed(int fd);
+
+      /// This function determines the number of columns in the window
+      /// if standard output is connected to a "tty" or "console"
+      /// window. If standard output is not connected to a tty or
+      /// console, or if the number of columns cannot be determined,
+      /// this routine returns zero.
+      static unsigned StandardOutColumns();
+
+      /// This function determines the number of columns in the window
+      /// if standard error is connected to a "tty" or "console"
+      /// window. If standard error is not connected to a tty or
+      /// console, or if the number of columns cannot be determined,
+      /// this routine returns zero.
+      static unsigned StandardErrColumns();
+
+      /// This function determines whether the terminal connected to standard
+      /// output supports colors. If standard output is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardOutHasColors();
+
+      /// This function determines whether the terminal connected to standard
+      /// error supports colors. If standard error is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardErrHasColors();
+
+      /// Whether changing colors requires the output to be flushed.
+      /// This is needed on systems that don't support escape sequences for
+      /// changing colors.
+      static bool ColorNeedsFlush();
+
+      /// This function returns the colorcode escape sequences.
+      /// If ColorNeedsFlush() is true then this function will change the colors
+      /// and return an empty escape sequence. In that case it is the
+      /// responsibility of the client to flush the output stream prior to
+      /// calling this function.
+      static const char *OutputColor(char c, bool bold, bool bg);
+
+      /// Same as OutputColor, but only enables the bold attribute.
+      static const char *OutputBold(bool bg);
+
+      /// Resets the terminals colors, or returns an escape sequence to do so.
+      static const char *ResetColor();
+    /// @}
+  };
+}
+}
+
+#endif
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
new file mode 100644
index 000000000000..78a495ef2105
--- /dev/null
+++ b/include/llvm/Support/Program.h
@@ -0,0 +1,157 @@
+//===- llvm/Support/Program.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_PROGRAM_H
+#define LLVM_SYSTEM_PROGRAM_H
+
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace sys {
+
+  // TODO: Add operations to communicate with the process, redirect its I/O,
+  // etc.
+
+  /// This class provides an abstraction for programs that are executable by the
+  /// operating system. It provides a platform generic way to find executable
+  /// programs from the path and to execute them in various ways. The sys::Path
+  /// class is used to specify the location of the Program.
+  /// @since 1.4
+  /// @brief An abstraction for finding and executing programs.
+  class Program {
+    /// Opaque handle for target specific data.
+    void *Data_;
+
+    // Noncopyable.
+    Program(const Program& other);
+    Program& operator=(const Program& other);
+
+    /// @name Methods
+    /// @{
+  public:
+
+    Program();
+    ~Program();
+
+    /// Return process ID of this program.
+    unsigned GetPid() const;
+
+    /// This function executes the program using the \p arguments provided.  The
+    /// invoked program will inherit the stdin, stdout, and stderr file
+    /// descriptors, the environment and other configuration settings of the
+    /// invoking program. If Path::executable() does not return true when this
+    /// function is called then a std::string is thrown.
+    /// @returns false in case of error, true otherwise.
+    /// @see FindProgramByName
+    /// @brief Executes the program with the given set of \p args.
+    bool Execute
+    ( const Path& path,  ///< sys::Path object providing the path of the
+      ///< program to be executed. It is presumed this is the result of
+      ///< the FindProgramByName method.
+      const char** args, ///< A vector of strings that are passed to the
+      ///< program.  The first element should be the name of the program.
+      ///< The list *must* be terminated by a null char* entry.
+      const char ** env = 0, ///< An optional vector of strings to use for
+      ///< the program's environment. If not provided, the current program's
+      ///< environment will be used.
+      const sys::Path** redirects = 0, ///< An optional array of pointers to
+      ///< Paths. If the array is null, no redirection is done. The array
+      ///< should have a size of at least three. If the pointer in the array
+      ///< are not null, then the inferior process's stdin(0), stdout(1),
+      ///< and stderr(2) will be redirected to the corresponding Paths.
+      ///< When an empty Path is passed in, the corresponding file
+      ///< descriptor will be disconnected (ie, /dev/null'd) in a portable
+      ///< way.
+      unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount
+      ///< of memory can be allocated by process. If memory usage will be
+      ///< higher limit, the child is killed and this call returns. If zero
+      ///< - no memory limit.
+      std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while invoking the
+      ///< program.
+      );
+
+    /// This function waits for the program to exit. This function will block
+    /// the current program until the invoked program exits.
+    /// @returns an integer result code indicating the status of the program.
+    /// A zero or positive value indicates the result code of the program. A
+    /// negative value is the signal number on which it terminated.
+    /// @see Execute
+    /// @brief Waits for the program to exit.
+    int Wait
+    ( const Path& path, ///< The path to the child process executable.
+      unsigned secondsToWait, ///< If non-zero, this specifies the amount
+      ///< of time to wait for the child process to exit. If the time
+      ///< expires, the child is killed and this call returns. If zero,
+      ///< this function will wait until the child finishes or forever if
+      ///< it doesn't.
+      std::string* ErrMsg ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while waiting.
+      );
+
+    /// This function terminates the program.
+    /// @returns true if an error occured.
+    /// @see Execute
+    /// @brief Terminates the program.
+    bool Kill
+    ( std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while killing the
+      ///< program.
+      );
+
+    /// This static constructor (factory) will attempt to locate a program in
+    /// the operating system's file system using some pre-determined set of
+    /// locations to search (e.g. the PATH on Unix). Paths with slashes are
+    /// returned unmodified.
+    /// @returns A Path object initialized to the path of the program or a
+    /// Path object that is empty (invalid) if the program could not be found.
+    /// @brief Construct a Program by finding it by name.
+    static Path FindProgramByName(const std::string& name);
+
+    // These methods change the specified standard stream (stdin,
+    // stdout, or stderr) to binary mode. They return true if an error
+    // occurred
+    static bool ChangeStdinToBinary();
+    static bool ChangeStdoutToBinary();
+    static bool ChangeStderrToBinary();
+
+    /// A convenience function equivalent to Program prg; prg.Execute(..);
+    /// prg.Wait(..);
+    /// @see Execute, Wait
+    static int ExecuteAndWait(const Path& path,
+                              const char** args,
+                              const char ** env = 0,
+                              const sys::Path** redirects = 0,
+                              unsigned secondsToWait = 0,
+                              unsigned memoryLimit = 0,
+                              std::string* ErrMsg = 0);
+
+    /// A convenience function equivalent to Program prg; prg.Execute(..);
+    /// @see Execute
+    static void ExecuteNoWait(const Path& path,
+                              const char** args,
+                              const char ** env = 0,
+                              const sys::Path** redirects = 0,
+                              unsigned memoryLimit = 0,
+                              std::string* ErrMsg = 0);
+
+    /// @}
+
+  };
+}
+}
+
+#endif
diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h
new file mode 100644
index 000000000000..0d4cb81de397
--- /dev/null
+++ b/include/llvm/Support/RWMutex.h
@@ -0,0 +1,173 @@
+//===- RWMutex.h - Reader/Writer Mutual Exclusion Lock ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_RWMUTEX_H
+#define LLVM_SYSTEM_RWMUTEX_H
+
+#include "llvm/Support/Threading.h"
+#include <cassert>
+
+namespace llvm
+{
+  namespace sys
+  {
+    /// @brief Platform agnostic RWMutex class.
+    class RWMutexImpl
+    {
+    /// @name Constructors
+    /// @{
+    public:
+
+      /// Initializes the lock but doesn't acquire it.
+      /// @brief Default Constructor.
+      explicit RWMutexImpl();
+
+      /// Releases and removes the lock
+      /// @brief Destructor
+      ~RWMutexImpl();
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+
+      /// Attempts to unconditionally acquire the lock in reader mode. If the
+      /// lock is held by a writer, this method will wait until it can acquire
+      /// the lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock in reader mode.
+      bool reader_acquire();
+
+      /// Attempts to release the lock in reader mode.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock in reader mode.
+      bool reader_release();
+
+      /// Attempts to unconditionally acquire the lock in reader mode. If the
+      /// lock is held by any readers, this method will wait until it can
+      /// acquire the lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock in writer mode.
+      bool writer_acquire();
+
+      /// Attempts to release the lock in writer mode.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock in write mode.
+      bool writer_release();
+
+    //@}
+    /// @name Platform Dependent Data
+    /// @{
+    private:
+      void* data_; ///< We don't know what the data will be
+
+    /// @}
+    /// @name Do Not Implement
+    /// @{
+    private:
+      RWMutexImpl(const RWMutexImpl & original);
+      void operator=(const RWMutexImpl &);
+    /// @}
+    };
+
+    /// SmartMutex - An R/W mutex with a compile time constant parameter that
+    /// indicates whether this mutex should become a no-op when we're not
+    /// running in multithreaded mode.
+    template<bool mt_only>
+    class SmartRWMutex : public RWMutexImpl {
+      unsigned readers, writers;
+    public:
+      explicit SmartRWMutex() : RWMutexImpl(), readers(0), writers(0) { }
+
+      bool reader_acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::reader_acquire();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        ++readers;
+        return true;
+      }
+
+      bool reader_release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::reader_release();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(readers > 0 && "Reader lock not acquired before release!");
+        --readers;
+        return true;
+      }
+
+      bool writer_acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::writer_acquire();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(writers == 0 && "Writer lock already acquired!");
+        ++writers;
+        return true;
+      }
+
+      bool writer_release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::writer_release();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(writers == 1 && "Writer lock not acquired before release!");
+        --writers;
+        return true;
+      }
+
+    private:
+      SmartRWMutex(const SmartRWMutex<mt_only> & original);
+      void operator=(const SmartRWMutex<mt_only> &);
+    };
+    typedef SmartRWMutex<false> RWMutex;
+
+    /// ScopedReader - RAII acquisition of a reader lock
+    template<bool mt_only>
+    struct SmartScopedReader {
+      SmartRWMutex<mt_only>& mutex;
+
+      explicit SmartScopedReader(SmartRWMutex<mt_only>& m) : mutex(m) {
+        mutex.reader_acquire();
+      }
+
+      ~SmartScopedReader() {
+        mutex.reader_release();
+      }
+    };
+    typedef SmartScopedReader<false> ScopedReader;
+
+    /// ScopedWriter - RAII acquisition of a writer lock
+    template<bool mt_only>
+    struct SmartScopedWriter {
+      SmartRWMutex<mt_only>& mutex;
+
+      explicit SmartScopedWriter(SmartRWMutex<mt_only>& m) : mutex(m) {
+        mutex.writer_acquire();
+      }
+
+      ~SmartScopedWriter() {
+        mutex.writer_release();
+      }
+    };
+    typedef SmartScopedWriter<false> ScopedWriter;
+  }
+}
+
+#endif
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
new file mode 100644
index 000000000000..9a84df68ddba
--- /dev/null
+++ b/include/llvm/Support/Signals.h
@@ -0,0 +1,59 @@
+//===- llvm/Support/Signals.h - Signal Handling support ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_SIGNALS_H
+#define LLVM_SYSTEM_SIGNALS_H
+
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace sys {
+
+  /// This function runs all the registered interrupt handlers, including the
+  /// removal of files registered by RemoveFileOnSignal.
+  void RunInterruptHandlers();
+
+  /// This function registers signal handlers to ensure that if a signal gets
+  /// delivered that the named file is removed.
+  /// @brief Remove a file if a fatal signal occurs.
+  bool RemoveFileOnSignal(const Path &Filename, std::string* ErrMsg = 0);
+
+  /// This function removes a file from the list of files to be removed on
+  /// signal delivery.
+  void DontRemoveFileOnSignal(const Path &Filename);
+
+  /// When an error signal (such as SIBABRT or SIGSEGV) is delivered to the
+  /// process, print a stack trace and then exit.
+  /// @brief Print a stack trace if a fatal signal occurs.
+  void PrintStackTraceOnErrorSignal();
+
+  /// AddSignalHandler - Add a function to be called when an abort/kill signal
+  /// is delivered to the process.  The handler can have a cookie passed to it
+  /// to identify what instance of the handler it is.
+  void AddSignalHandler(void (*FnPtr)(void *), void *Cookie);
+
+  /// This function registers a function to be called when the user "interrupts"
+  /// the program (typically by pressing ctrl-c).  When the user interrupts the
+  /// program, the specified interrupt function is called instead of the program
+  /// being killed, and the interrupt function automatically disabled.  Note
+  /// that interrupt functions are not allowed to call any non-reentrant
+  /// functions.  An null interrupt function pointer disables the current
+  /// installed function.  Note also that the handler may be executed on a
+  /// different thread on some platforms.
+  /// @brief Register a function to be called when ctrl-c is pressed.
+  void SetInterruptFunction(void (*IF)());
+} // End sys namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Support/Solaris.h b/include/llvm/Support/Solaris.h
new file mode 100644
index 000000000000..57eee2cb4973
--- /dev/null
+++ b/include/llvm/Support/Solaris.h
@@ -0,0 +1,40 @@
+/*===- llvm/Support/Solaris.h ------------------------------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file contains portability fixes for Solaris hosts.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_SYSTEM_SOLARIS_H
+#define LLVM_SYSTEM_SOLARIS_H
+
+#include <sys/types.h>
+#include <sys/regset.h>
+
+#undef CS
+#undef DS
+#undef ES
+#undef FS
+#undef GS
+#undef SS
+#undef EAX
+#undef ECX
+#undef EDX
+#undef EBX
+#undef ESP
+#undef EBP
+#undef ESI
+#undef EDI
+#undef EIP
+#undef UESP
+#undef EFL
+#undef ERR
+#undef TRAPNO
+
+#endif
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 270ab2b2f85c..a41a633ba6b6 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -26,6 +26,7 @@ namespace llvm {
   class MemoryBuffer;
   class SourceMgr;
   class SMDiagnostic;
+  class Twine;
   class raw_ostream;
 
 /// SourceMgr - This owns the files read by a parser, handles include stacks,
@@ -35,8 +36,7 @@ public:
   /// DiagHandlerTy - Clients that want to handle their own diagnostics in a
   /// custom way can register a function pointer+context as a diagnostic
   /// handler.  It gets called each time PrintMessage is invoked.
-  typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context,
-                                unsigned LocCookie);
+  typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context);
 private:
   struct SrcBuffer {
     /// Buffer - The memory buffer for the file.
@@ -60,7 +60,6 @@ private:
 
   DiagHandlerTy DiagHandler;
   void *DiagContext;
-  unsigned DiagLocCookie;
   
   SourceMgr(const SourceMgr&);    // DO NOT IMPLEMENT
   void operator=(const SourceMgr&); // DO NOT IMPLEMENT
@@ -73,12 +72,10 @@ public:
   }
 
   /// setDiagHandler - Specify a diagnostic handler to be invoked every time
-  /// PrintMessage is called.  Ctx and Cookie are passed into the handler when
-  /// it is invoked.
-  void setDiagHandler(DiagHandlerTy DH, void *Ctx = 0, unsigned Cookie = 0) {
+  /// PrintMessage is called. Ctx is passed into the handler when it is invoked.
+  void setDiagHandler(DiagHandlerTy DH, void *Ctx = 0) {
     DiagHandler = DH;
     DiagContext = Ctx;
-    DiagLocCookie = Cookie;
   }
 
   const SrcBuffer &getBufferInfo(unsigned i) const {
@@ -125,7 +122,7 @@ public:
   /// @param Type - If non-null, the kind of message (e.g., "error") which is
   /// prefixed to the message.
   /// @param ShowLine - Should the diagnostic show the source line.
-  void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type,
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
                     bool ShowLine = true) const;
 
 
@@ -136,7 +133,7 @@ public:
   /// prefixed to the message.
   /// @param ShowLine - Should the diagnostic show the source line.
   SMDiagnostic GetMessage(SMLoc Loc,
-                          const std::string &Msg, const char *Type,
+                          const Twine &Msg, const char *Type,
                           bool ShowLine = true) const;
 
 
diff --git a/include/llvm/Support/StableBasicBlockNumbering.h b/include/llvm/Support/StableBasicBlockNumbering.h
deleted file mode 100644
index 5e0f87e48950..000000000000
--- a/include/llvm/Support/StableBasicBlockNumbering.h
+++ /dev/null
@@ -1,59 +0,0 @@
-//===- StableBasicBlockNumbering.h - Provide BB identifiers -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class provides a *stable* numbering of basic blocks that does not depend
-// on their address in memory (which is nondeterministic).  When requested, this
-// class simply provides a unique ID for each basic block in the function
-// specified and the inverse mapping.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_STABLEBASICBLOCKNUMBERING_H
-#define LLVM_SUPPORT_STABLEBASICBLOCKNUMBERING_H
-
-#include "llvm/Function.h"
-#include "llvm/ADT/UniqueVector.h"
-
-namespace llvm {
-  class StableBasicBlockNumbering {
-    // BBNumbering - Holds the numbering.
-    UniqueVector<BasicBlock*> BBNumbering;
-  public:
-    StableBasicBlockNumbering(Function *F = 0) {
-      if (F) compute(*F);
-    }
-
-    /// compute - If we have not computed a numbering for the function yet, do
-    /// so.
-    void compute(Function &F) {
-      if (BBNumbering.empty()) {
-        for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-          BBNumbering.insert(I);
-      }
-    }
-
-    /// getNumber - Return the ID number for the specified BasicBlock.
-    ///
-    unsigned getNumber(BasicBlock *BB) const {
-      unsigned Idx = BBNumbering.idFor(BB);
-      assert(Idx && "Invalid basic block or numbering not computed!");
-      return Idx-1;
-    }
-
-    /// getBlock - Return the BasicBlock corresponding to a particular ID.
-    ///
-    BasicBlock *getBlock(unsigned N) const {
-      assert(N < BBNumbering.size() &&
-             "Block ID out of range or numbering not computed!");
-      return BBNumbering[N+1];
-    }
-  };
-}
-
-#endif
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index bb3bddd3c799..d774faf38642 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -20,20 +20,35 @@
 #define LLVM_SUPPORT_STANDARDPASSES_H
 
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/IPO.h"
 
 namespace llvm {
+
+  static inline void createStandardAliasAnalysisPasses(PassManagerBase *PM) {
+    // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+    // BasicAliasAnalysis wins if they disagree. This is intended to help
+    // support "obvious" type-punning idioms.
+    PM->add(createTypeBasedAliasAnalysisPass());
+    PM->add(createBasicAliasAnalysisPass());
+  }
+
   /// createStandardFunctionPasses - Add the standard list of function passes to
   /// the provided pass manager.
   ///
   /// \arg OptimizationLevel - The optimization level, corresponding to -O0,
   /// -O1, etc.
   static inline void createStandardFunctionPasses(PassManagerBase *PM,
-                                                  unsigned OptimizationLevel);
+                                                  unsigned OptimizationLevel) {
+    if (OptimizationLevel > 0) {
+      createStandardAliasAnalysisPasses(PM);
+      PM->add(createCFGSimplificationPass());
+      PM->add(createScalarReplAggregatesPass());
+      PM->add(createEarlyCSEPass());
+    }
+  }
 
   /// createStandardModulePasses - Add the standard list of module passes to the
   /// provided pass manager.
@@ -47,42 +62,6 @@ namespace llvm {
   /// \arg HaveExceptions - Whether the module may have code using exceptions.
   /// \arg InliningPass - The inlining pass to use, if any, or null. This will
   /// always be added, even at -O0.a
-  static inline void createStandardModulePasses(PassManagerBase *PM,
-                                                unsigned OptimizationLevel,
-                                                bool OptimizeSize,
-                                                bool UnitAtATime,
-                                                bool UnrollLoops,
-                                                bool SimplifyLibCalls,
-                                                bool HaveExceptions,
-                                                Pass *InliningPass);
-
-  /// createStandardLTOPasses - Add the standard list of module passes suitable
-  /// for link time optimization.
-  ///
-  /// Internalize - Run the internalize pass.
-  /// RunInliner - Use a function inlining pass.
-  /// VerifyEach - Run the verifier after each pass.
-  static inline void createStandardLTOPasses(PassManagerBase *PM,
-                                             bool Internalize,
-                                             bool RunInliner,
-                                             bool VerifyEach);
-
-  // Implementations
-
-  static inline void createStandardFunctionPasses(PassManagerBase *PM,
-                                                  unsigned OptimizationLevel) {
-    if (OptimizationLevel > 0) {
-      PM->add(createCFGSimplificationPass());
-      if (OptimizationLevel == 1)
-        PM->add(createPromoteMemoryToRegisterPass());
-      else
-        PM->add(createScalarReplAggregatesPass());
-      PM->add(createInstructionCombiningPass());
-    }
-  }
-
-  /// createStandardModulePasses - Add the standard module passes.  This is
-  /// expected to be run after the standard function passes.
   static inline void createStandardModulePasses(PassManagerBase *PM,
                                                 unsigned OptimizationLevel,
                                                 bool OptimizeSize,
@@ -91,6 +70,8 @@ namespace llvm {
                                                 bool SimplifyLibCalls,
                                                 bool HaveExceptions,
                                                 Pass *InliningPass) {
+    createStandardAliasAnalysisPasses(PM);
+
     if (OptimizationLevel == 0) {
       if (InliningPass)
         PM->add(InliningPass);
@@ -108,7 +89,7 @@ namespace llvm {
     
     // Start of CallGraph SCC passes.
     if (UnitAtATime && HaveExceptions)
-      PM->add(createPruneEHPass());           // Remove dead EH info
+      PM->add(createPruneEHPass());             // Remove dead EH info
     if (InliningPass)
       PM->add(InliningPass);
     if (UnitAtATime)
@@ -117,11 +98,13 @@ namespace llvm {
       PM->add(createArgumentPromotionPass());   // Scalarize uninlined fn args
     
     // Start of function pass.
-    PM->add(createScalarReplAggregatesPass());  // Break up aggregate allocas
+    // Break up aggregate allocas, using SSAUpdater.
+    PM->add(createScalarReplAggregatesPass(-1, false));
+    PM->add(createEarlyCSEPass());              // Catch trivial redundancies
     if (SimplifyLibCalls)
       PM->add(createSimplifyLibCallsPass());    // Library Call Optimizations
-    PM->add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
     PM->add(createJumpThreadingPass());         // Thread jumps.
+    PM->add(createCorrelatedValuePropagationPass()); // Propagate conditionals
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
     PM->add(createInstructionCombiningPass());  // Combine silly seq's
     
@@ -133,6 +116,7 @@ namespace llvm {
     PM->add(createLoopUnswitchPass(OptimizeSize || OptimizationLevel < 3));
     PM->add(createInstructionCombiningPass());  
     PM->add(createIndVarSimplifyPass());        // Canonicalize indvars
+    PM->add(createLoopIdiomPass());             // Recognize idioms like memset.
     PM->add(createLoopDeletionPass());          // Delete dead loops
     if (UnrollLoops)
       PM->add(createLoopUnrollPass());          // Unroll small loops
@@ -172,10 +156,19 @@ namespace llvm {
       PM->add(createVerifierPass());
   }
 
+  /// createStandardLTOPasses - Add the standard list of module passes suitable
+  /// for link time optimization.
+  ///
+  /// Internalize - Run the internalize pass.
+  /// RunInliner - Use a function inlining pass.
+  /// VerifyEach - Run the verifier after each pass.
   static inline void createStandardLTOPasses(PassManagerBase *PM,
                                              bool Internalize,
                                              bool RunInliner,
                                              bool VerifyEach) {
+    // Provide AliasAnalysis services for optimizations.
+    createStandardAliasAnalysisPasses(PM);
+
     // Now that composite has been compiled, scan through the module, looking
     // for a main function.  If main is defined, mark all other functions
     // internal.
diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h
new file mode 100644
index 000000000000..6c0592c05ad7
--- /dev/null
+++ b/include/llvm/Support/SwapByteOrder.h
@@ -0,0 +1,101 @@
+//===- SwapByteOrder.h - Generic and optimized byte swaps -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares generic and optimized functions to swap the byte order of
+// an integral type.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_SWAP_BYTE_ORDER_H
+#define LLVM_SYSTEM_SWAP_BYTE_ORDER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstddef>
+#include <limits>
+
+namespace llvm {
+namespace sys {
+
+/// SwapByteOrder_16 - This function returns a byte-swapped representation of
+/// the 16-bit argument.
+inline uint16_t SwapByteOrder_16(uint16_t value) {
+#if defined(_MSC_VER) && !defined(_DEBUG)
+  // The DLL version of the runtime lacks these functions (bug!?), but in a
+  // release build they're replaced with BSWAP instructions anyway.
+  return _byteswap_ushort(value);
+#else
+  uint16_t Hi = value << 8;
+  uint16_t Lo = value >> 8;
+  return Hi | Lo;
+#endif
+}
+
+/// SwapByteOrder_32 - This function returns a byte-swapped representation of
+/// the 32-bit argument.
+inline uint32_t SwapByteOrder_32(uint32_t value) {
+#if defined(__llvm__) || \
+(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
+  return __builtin_bswap32(value);
+#elif defined(_MSC_VER) && !defined(_DEBUG)
+  return _byteswap_ulong(value);
+#else
+  uint32_t Byte0 = value & 0x000000FF;
+  uint32_t Byte1 = value & 0x0000FF00;
+  uint32_t Byte2 = value & 0x00FF0000;
+  uint32_t Byte3 = value & 0xFF000000;
+  return (Byte0 << 24) | (Byte1 << 8) | (Byte2 >> 8) | (Byte3 >> 24);
+#endif
+}
+
+/// SwapByteOrder_64 - This function returns a byte-swapped representation of
+/// the 64-bit argument.
+inline uint64_t SwapByteOrder_64(uint64_t value) {
+#if defined(__llvm__) || \
+(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
+  return __builtin_bswap64(value);
+#elif defined(_MSC_VER) && !defined(_DEBUG)
+  return _byteswap_uint64(value);
+#else
+  uint64_t Hi = SwapByteOrder_32(uint32_t(value));
+  uint32_t Lo = SwapByteOrder_32(uint32_t(value >> 32));
+  return (Hi << 32) | Lo;
+#endif
+}
+
+inline unsigned char  SwapByteOrder(unsigned char C) { return C; }
+inline   signed char  SwapByteOrder(signed char C) { return C; }
+inline          char  SwapByteOrder(char C) { return C; }
+
+inline unsigned short SwapByteOrder(unsigned short C) { return SwapByteOrder_16(C); }
+inline   signed short SwapByteOrder(  signed short C) { return SwapByteOrder_16(C); }
+
+inline unsigned int   SwapByteOrder(unsigned int   C) { return SwapByteOrder_32(C); }
+inline   signed int   SwapByteOrder(  signed int   C) { return SwapByteOrder_32(C); }
+
+#if __LONG_MAX__ == __INT_MAX__
+inline unsigned long  SwapByteOrder(unsigned long  C) { return SwapByteOrder_32(C); }
+inline   signed long  SwapByteOrder(  signed long  C) { return SwapByteOrder_32(C); }
+#elif __LONG_MAX__ == __LONG_LONG_MAX__
+inline unsigned long  SwapByteOrder(unsigned long  C) { return SwapByteOrder_64(C); }
+inline   signed long  SwapByteOrder(  signed long  C) { return SwapByteOrder_64(C); }
+#else
+#error "Unknown long size!"
+#endif
+
+inline unsigned long long SwapByteOrder(unsigned long long C) {
+  return SwapByteOrder_64(C);
+}
+inline signed long long SwapByteOrder(signed long long C) {
+  return SwapByteOrder_64(C);
+}
+
+} // end namespace sys
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h
index 3c182c1ca8b0..399aee51eb7b 100644
--- a/include/llvm/Support/SystemUtils.h
+++ b/include/llvm/Support/SystemUtils.h
@@ -30,13 +30,14 @@ bool CheckBitcodeOutputToConsole(
   bool print_warning = true     ///< Control whether warnings are printed
 );
 
-/// FindExecutable - Find a named executable, giving the argv[0] of program
-/// being executed. This allows us to find another LLVM tool if it is built in
-/// the same directory.  If the executable cannot be found, return an
-/// empty string.
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
 /// @brief Find a named executable.
-sys::Path FindExecutable(const std::string &ExeName,
-                         const char *Argv0, void *MainAddr);
+sys::Path PrependMainExecutablePath(const std::string &ExeName,
+                                    const char *Argv0, void *MainAddr);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index d34f35fe0d2b..20ca5571ffa3 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -46,50 +46,32 @@ public:
   // Binary Operators
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateAdd(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getAdd(LHS, RHS));
-  }
-  Constant *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getNSWAdd(LHS, RHS));
-  }
-  Constant *CreateNUWAdd(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getNUWAdd(LHS, RHS));
+  Constant *CreateAdd(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getAdd(LHS, RHS, HasNUW, HasNSW));
   }
   Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFAdd(LHS, RHS));
   }
-  Constant *CreateSub(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getSub(LHS, RHS));
-  }
-  Constant *CreateNSWSub(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getNSWSub(LHS, RHS));
-  }
-  Constant *CreateNUWSub(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getNUWSub(LHS, RHS));
+  Constant *CreateSub(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW));
   }
   Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFSub(LHS, RHS));
   }
-  Constant *CreateMul(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getMul(LHS, RHS));
-  }
-  Constant *CreateNSWMul(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getNSWMul(LHS, RHS));
-  }
-  Constant *CreateNUWMul(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getNUWMul(LHS, RHS));
+  Constant *CreateMul(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW));
   }
   Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFMul(LHS, RHS));
   }
-  Constant *CreateUDiv(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getUDiv(LHS, RHS));
-  }
-  Constant *CreateSDiv(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getSDiv(LHS, RHS));
+  Constant *CreateUDiv(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getUDiv(LHS, RHS, isExact));
   }
-  Constant *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getExactSDiv(LHS, RHS));
+  Constant *CreateSDiv(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getSDiv(LHS, RHS, isExact));
   }
   Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFDiv(LHS, RHS));
@@ -103,14 +85,15 @@ public:
   Constant *CreateFRem(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getFRem(LHS, RHS));
   }
-  Constant *CreateShl(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getShl(LHS, RHS));
+  Constant *CreateShl(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW));
   }
-  Constant *CreateLShr(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getLShr(LHS, RHS));
+  Constant *CreateLShr(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getLShr(LHS, RHS, isExact));
   }
-  Constant *CreateAShr(Constant *LHS, Constant *RHS) const {
-    return Fold(ConstantExpr::getAShr(LHS, RHS));
+  Constant *CreateAShr(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getAShr(LHS, RHS, isExact));
   }
   Constant *CreateAnd(Constant *LHS, Constant *RHS) const {
     return Fold(ConstantExpr::getAnd(LHS, RHS));
@@ -131,14 +114,9 @@ public:
   // Unary Operators
   //===--------------------------------------------------------------------===//
 
-  Constant *CreateNeg(Constant *C) const {
-    return Fold(ConstantExpr::getNeg(C));
-  }
-  Constant *CreateNSWNeg(Constant *C) const {
-    return Fold(ConstantExpr::getNSWNeg(C));
-  }
-  Constant *CreateNUWNeg(Constant *C) const {
-    return Fold(ConstantExpr::getNUWNeg(C));
+  Constant *CreateNeg(Constant *C,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getNeg(C, HasNUW, HasNSW));
   }
   Constant *CreateFNeg(Constant *C) const {
     return Fold(ConstantExpr::getFNeg(C));
diff --git a/include/llvm/Support/ThreadLocal.h b/include/llvm/Support/ThreadLocal.h
new file mode 100644
index 000000000000..15350a7afff7
--- /dev/null
+++ b/include/llvm/Support/ThreadLocal.h
@@ -0,0 +1,54 @@
+//===- llvm/Support/ThreadLocal.h - Thread Local Data ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_THREAD_LOCAL_H
+#define LLVM_SYSTEM_THREAD_LOCAL_H
+
+#include "llvm/Support/Threading.h"
+#include <cassert>
+
+namespace llvm {
+  namespace sys {
+    // ThreadLocalImpl - Common base class of all ThreadLocal instantiations.
+    // YOU SHOULD NEVER USE THIS DIRECTLY.
+    class ThreadLocalImpl {
+      void* data;
+    public:
+      ThreadLocalImpl();
+      virtual ~ThreadLocalImpl();
+      void setInstance(const void* d);
+      const void* getInstance();
+      void removeInstance();
+    };
+
+    /// ThreadLocal - A class used to abstract thread-local storage.  It holds,
+    /// for each thread, a pointer a single object of type T.
+    template<class T>
+    class ThreadLocal : public ThreadLocalImpl {
+    public:
+      ThreadLocal() : ThreadLocalImpl() { }
+
+      /// get - Fetches a pointer to the object associated with the current
+      /// thread.  If no object has yet been associated, it returns NULL;
+      T* get() { return static_cast<T*>(getInstance()); }
+
+      // set - Associates a pointer to an object with the current thread.
+      void set(T* d) { setInstance(d); }
+
+      // erase - Removes the pointer associated with the current thread.
+      void erase() { removeInstance(); }
+    };
+  }
+}
+
+#endif
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
new file mode 100644
index 000000000000..c0e842c2fe73
--- /dev/null
+++ b/include/llvm/Support/Threading.h
@@ -0,0 +1,59 @@
+//===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TThis file defines llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_THREADING_H
+#define LLVM_SYSTEM_THREADING_H
+
+namespace llvm {
+  /// llvm_start_multithreaded - Allocate and initialize structures needed to
+  /// make LLVM safe for multithreading.  The return value indicates whether
+  /// multithreaded initialization succeeded.  LLVM will still be operational
+  /// on "failed" return, and will still be safe for hosting threading
+  /// applications in the JIT, but will not be safe for concurrent calls to the
+  /// LLVM APIs.
+  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
+  bool llvm_start_multithreaded();
+
+  /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM
+  /// safe for multithreading.
+  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
+  void llvm_stop_multithreaded();
+
+  /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe
+  /// mode or not.
+  bool llvm_is_multithreaded();
+
+  /// acquire_global_lock - Acquire the global lock.  This is a no-op if called
+  /// before llvm_start_multithreaded().
+  void llvm_acquire_global_lock();
+
+  /// release_global_lock - Release the global lock.  This is a no-op if called
+  /// before llvm_start_multithreaded().
+  void llvm_release_global_lock();
+
+  /// llvm_execute_on_thread - Execute the given \arg UserFn on a separate
+  /// thread, passing it the provided \arg UserData.
+  ///
+  /// This function does not guarantee that the code will actually be executed
+  /// on a separate thread or honoring the requested stack size, but tries to do
+  /// so where system support is available.
+  ///
+  /// \param UserFn - The callback to execute.
+  /// \param UserData - An argument to pass to the callback function.
+  /// \param RequestedStackSize - If non-zero, a requested size (in bytes) for
+  /// the thread stack.
+  void llvm_execute_on_thread(void (*UserFn)(void*), void *UserData,
+                              unsigned RequestedStackSize = 0);
+}
+
+#endif
diff --git a/include/llvm/Support/TimeValue.h b/include/llvm/Support/TimeValue.h
new file mode 100644
index 000000000000..e1227118c22c
--- /dev/null
+++ b/include/llvm/Support/TimeValue.h
@@ -0,0 +1,382 @@
+//===-- TimeValue.h - Declare OS TimeValue Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file declares the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+#ifndef LLVM_SYSTEM_TIMEVALUE_H
+#define LLVM_SYSTEM_TIMEVALUE_H
+
+namespace llvm {
+namespace sys {
+  /// This class is used where a precise fixed point in time is required. The
+  /// range of TimeValue spans many hundreds of billions of years both past and
+  /// present.  The precision of TimeValue is to the nanosecond. However, the
+  /// actual precision of its values will be determined by the resolution of
+  /// the system clock. The TimeValue class is used in conjunction with several
+  /// other lib/System interfaces to specify the time at which a call should
+  /// timeout, etc.
+  /// @since 1.4
+  /// @brief Provides an abstraction for a fixed point in time.
+  class TimeValue {
+
+  /// @name Constants
+  /// @{
+  public:
+
+    /// A constant TimeValue representing the smallest time
+    /// value permissable by the class. MinTime is some point
+    /// in the distant past, about 300 billion years BCE.
+    /// @brief The smallest possible time value.
+    static const TimeValue MinTime;
+
+    /// A constant TimeValue representing the largest time
+    /// value permissable by the class. MaxTime is some point
+    /// in the distant future, about 300 billion years AD.
+    /// @brief The largest possible time value.
+    static const TimeValue MaxTime;
+
+    /// A constant TimeValue representing the base time,
+    /// or zero time of 00:00:00 (midnight) January 1st, 2000.
+    /// @brief 00:00:00 Jan 1, 2000 UTC.
+    static const TimeValue ZeroTime;
+
+    /// A constant TimeValue for the Posix base time which is
+    /// 00:00:00 (midnight) January 1st, 1970.
+    /// @brief 00:00:00 Jan 1, 1970 UTC.
+    static const TimeValue PosixZeroTime;
+
+    /// A constant TimeValue for the Win32 base time which is
+    /// 00:00:00 (midnight) January 1st, 1601.
+    /// @brief 00:00:00 Jan 1, 1601 UTC.
+    static const TimeValue Win32ZeroTime;
+
+  /// @}
+  /// @name Types
+  /// @{
+  public:
+    typedef int64_t SecondsType;    ///< Type used for representing seconds.
+    typedef int32_t NanoSecondsType;///< Type used for representing nanoseconds.
+
+    enum TimeConversions {
+      NANOSECONDS_PER_SECOND = 1000000000,  ///< One Billion
+      MICROSECONDS_PER_SECOND = 1000000,    ///< One Million
+      MILLISECONDS_PER_SECOND = 1000,       ///< One Thousand
+      NANOSECONDS_PER_MICROSECOND = 1000,   ///< One Thousand
+      NANOSECONDS_PER_MILLISECOND = 1000000,///< One Million
+      NANOSECONDS_PER_POSIX_TICK = 100,     ///< Posix tick is 100 Hz (10ms)
+      NANOSECONDS_PER_WIN32_TICK = 100      ///< Win32 tick is 100 Hz (10ms)
+    };
+
+  /// @}
+  /// @name Constructors
+  /// @{
+  public:
+    /// Caller provides the exact value in seconds and nanoseconds. The
+    /// \p nanos argument defaults to zero for convenience.
+    /// @brief Explicit constructor
+    explicit TimeValue (SecondsType seconds, NanoSecondsType nanos = 0)
+      : seconds_( seconds ), nanos_( nanos ) { this->normalize(); }
+
+    /// Caller provides the exact value as a double in seconds with the
+    /// fractional part representing nanoseconds.
+    /// @brief Double Constructor.
+    explicit TimeValue( double new_time )
+      : seconds_( 0 ) , nanos_ ( 0 ) {
+      SecondsType integer_part = static_cast<SecondsType>( new_time );
+      seconds_ = integer_part;
+      nanos_ = static_cast<NanoSecondsType>( (new_time -
+               static_cast<double>(integer_part)) * NANOSECONDS_PER_SECOND );
+      this->normalize();
+    }
+
+    /// This is a static constructor that returns a TimeValue that represents
+    /// the current time.
+    /// @brief Creates a TimeValue with the current time (UTC).
+    static TimeValue now();
+
+  /// @}
+  /// @name Operators
+  /// @{
+  public:
+    /// Add \p that to \p this.
+    /// @returns this
+    /// @brief Incrementing assignment operator.
+    TimeValue& operator += (const TimeValue& that ) {
+      this->seconds_ += that.seconds_  ;
+      this->nanos_ += that.nanos_ ;
+      this->normalize();
+      return *this;
+    }
+
+    /// Subtract \p that from \p this.
+    /// @returns this
+    /// @brief Decrementing assignment operator.
+    TimeValue& operator -= (const TimeValue &that ) {
+      this->seconds_ -= that.seconds_ ;
+      this->nanos_ -= that.nanos_ ;
+      this->normalize();
+      return *this;
+    }
+
+    /// Determine if \p this is less than \p that.
+    /// @returns True iff *this < that.
+    /// @brief True if this < that.
+    int operator < (const TimeValue &that) const { return that > *this; }
+
+    /// Determine if \p this is greather than \p that.
+    /// @returns True iff *this > that.
+    /// @brief True if this > that.
+    int operator > (const TimeValue &that) const {
+      if ( this->seconds_ > that.seconds_ ) {
+          return 1;
+      } else if ( this->seconds_ == that.seconds_ ) {
+          if ( this->nanos_ > that.nanos_ ) return 1;
+      }
+      return 0;
+    }
+
+    /// Determine if \p this is less than or equal to \p that.
+    /// @returns True iff *this <= that.
+    /// @brief True if this <= that.
+    int operator <= (const TimeValue &that) const { return that >= *this; }
+
+    /// Determine if \p this is greater than or equal to \p that.
+    /// @returns True iff *this >= that.
+    /// @brief True if this >= that.
+    int operator >= (const TimeValue &that) const {
+      if ( this->seconds_ > that.seconds_ ) {
+          return 1;
+      } else if ( this->seconds_ == that.seconds_ ) {
+          if ( this->nanos_ >= that.nanos_ ) return 1;
+      }
+      return 0;
+    }
+
+    /// Determines if two TimeValue objects represent the same moment in time.
+    /// @brief True iff *this == that.
+    /// @brief True if this == that.
+    int operator == (const TimeValue &that) const {
+      return (this->seconds_ == that.seconds_) &&
+             (this->nanos_ == that.nanos_);
+    }
+
+    /// Determines if two TimeValue objects represent times that are not the
+    /// same.
+    /// @return True iff *this != that.
+    /// @brief True if this != that.
+    int operator != (const TimeValue &that) const { return !(*this == that); }
+
+    /// Adds two TimeValue objects together.
+    /// @returns The sum of the two operands as a new TimeValue
+    /// @brief Addition operator.
+    friend TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2);
+
+    /// Subtracts two TimeValue objects.
+    /// @returns The difference of the two operands as a new TimeValue
+    /// @brief Subtraction operator.
+    friend TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2);
+
+  /// @}
+  /// @name Accessors
+  /// @{
+  public:
+
+    /// Returns only the seconds component of the TimeValue. The nanoseconds
+    /// portion is ignored. No rounding is performed.
+    /// @brief Retrieve the seconds component
+    SecondsType seconds() const { return seconds_; }
+
+    /// Returns only the nanoseconds component of the TimeValue. The seconds
+    /// portion is ignored.
+    /// @brief Retrieve the nanoseconds component.
+    NanoSecondsType nanoseconds() const { return nanos_; }
+
+    /// Returns only the fractional portion of the TimeValue rounded down to the
+    /// nearest microsecond (divide by one thousand).
+    /// @brief Retrieve the fractional part as microseconds;
+    uint32_t microseconds() const {
+      return nanos_ / NANOSECONDS_PER_MICROSECOND;
+    }
+
+    /// Returns only the fractional portion of the TimeValue rounded down to the
+    /// nearest millisecond (divide by one million).
+    /// @brief Retrieve the fractional part as milliseconds;
+    uint32_t milliseconds() const {
+      return nanos_ / NANOSECONDS_PER_MILLISECOND;
+    }
+
+    /// Returns the TimeValue as a number of microseconds. Note that the value
+    /// returned can overflow because the range of a uint64_t is smaller than
+    /// the range of a TimeValue. Nevertheless, this is useful on some operating
+    /// systems and is therefore provided.
+    /// @brief Convert to a number of microseconds (can overflow)
+    uint64_t usec() const {
+      return seconds_ * MICROSECONDS_PER_SECOND +
+             ( nanos_ / NANOSECONDS_PER_MICROSECOND );
+    }
+
+    /// Returns the TimeValue as a number of milliseconds. Note that the value
+    /// returned can overflow because the range of a uint64_t is smaller than
+    /// the range of a TimeValue. Nevertheless, this is useful on some operating
+    /// systems and is therefore provided.
+    /// @brief Convert to a number of milliseconds (can overflow)
+    uint64_t msec() const {
+      return seconds_ * MILLISECONDS_PER_SECOND +
+             ( nanos_ / NANOSECONDS_PER_MILLISECOND );
+    }
+
+    /// Converts the TimeValue into the corresponding number of "ticks" for
+    /// Posix, correcting for the difference in Posix zero time.
+    /// @brief Convert to unix time (100 nanoseconds since 12:00:00a Jan 1,1970)
+    uint64_t toPosixTime() const {
+      uint64_t result = seconds_ - PosixZeroTime.seconds_;
+      result += nanos_ / NANOSECONDS_PER_POSIX_TICK;
+      return result;
+    }
+
+    /// Converts the TimeValue into the corresponding number of seconds
+    /// since the epoch (00:00:00 Jan 1,1970).
+    uint64_t toEpochTime() const {
+      return seconds_ - PosixZeroTime.seconds_;
+    }
+
+    /// Converts the TimeValue into the corresponding number of "ticks" for
+    /// Win32 platforms, correcting for the difference in Win32 zero time.
+    /// @brief Convert to windows time (seconds since 12:00:00a Jan 1, 1601)
+    uint64_t toWin32Time() const {
+      uint64_t result = seconds_ - Win32ZeroTime.seconds_;
+      result += nanos_ / NANOSECONDS_PER_WIN32_TICK;
+      return result;
+    }
+
+    /// Provides the seconds and nanoseconds as results in its arguments after
+    /// correction for the Posix zero time.
+    /// @brief Convert to timespec time (ala POSIX.1b)
+    void getTimespecTime( uint64_t& seconds, uint32_t& nanos ) const {
+      seconds = seconds_ - PosixZeroTime.seconds_;
+      nanos = nanos_;
+    }
+
+    /// Provides conversion of the TimeValue into a readable time & date.
+    /// @returns std::string containing the readable time value
+    /// @brief Convert time to a string.
+    std::string str() const;
+
+  /// @}
+  /// @name Mutators
+  /// @{
+  public:
+    /// The seconds component of the TimeValue is set to \p sec without
+    /// modifying the nanoseconds part.  This is useful for whole second
+    /// arithmetic.
+    /// @brief Set the seconds component.
+    void seconds (SecondsType sec ) {
+      this->seconds_ = sec;
+      this->normalize();
+    }
+
+    /// The nanoseconds component of the TimeValue is set to \p nanos without
+    /// modifying the seconds part. This is useful for basic computations
+    /// involving just the nanoseconds portion. Note that the TimeValue will be
+    /// normalized after this call so that the fractional (nanoseconds) portion
+    /// will have the smallest equivalent value.
+    /// @brief Set the nanoseconds component using a number of nanoseconds.
+    void nanoseconds ( NanoSecondsType nanos ) {
+      this->nanos_ = nanos;
+      this->normalize();
+    }
+
+    /// The seconds component remains unchanged.
+    /// @brief Set the nanoseconds component using a number of microseconds.
+    void microseconds ( int32_t micros ) {
+      this->nanos_ = micros * NANOSECONDS_PER_MICROSECOND;
+      this->normalize();
+    }
+
+    /// The seconds component remains unchanged.
+    /// @brief Set the nanoseconds component using a number of milliseconds.
+    void milliseconds ( int32_t millis ) {
+      this->nanos_ = millis * NANOSECONDS_PER_MILLISECOND;
+      this->normalize();
+    }
+
+    /// @brief Converts from microsecond format to TimeValue format
+    void usec( int64_t microseconds ) {
+      this->seconds_ = microseconds / MICROSECONDS_PER_SECOND;
+      this->nanos_ = NanoSecondsType(microseconds % MICROSECONDS_PER_SECOND) *
+        NANOSECONDS_PER_MICROSECOND;
+      this->normalize();
+    }
+
+    /// @brief Converts from millisecond format to TimeValue format
+    void msec( int64_t milliseconds ) {
+      this->seconds_ = milliseconds / MILLISECONDS_PER_SECOND;
+      this->nanos_ = NanoSecondsType(milliseconds % MILLISECONDS_PER_SECOND) *
+        NANOSECONDS_PER_MILLISECOND;
+      this->normalize();
+    }
+
+    /// Converts the \p seconds argument from PosixTime to the corresponding
+    /// TimeValue and assigns that value to \p this.
+    /// @brief Convert seconds form PosixTime to TimeValue
+    void fromEpochTime( SecondsType seconds ) {
+      seconds_ = seconds + PosixZeroTime.seconds_;
+      nanos_ = 0;
+      this->normalize();
+    }
+
+    /// Converts the \p win32Time argument from Windows FILETIME to the
+    /// corresponding TimeValue and assigns that value to \p this.
+    /// @brief Convert seconds form Windows FILETIME to TimeValue
+    void fromWin32Time( uint64_t win32Time ) {
+      this->seconds_ = win32Time / 10000000 + Win32ZeroTime.seconds_;
+      this->nanos_ = NanoSecondsType(win32Time  % 10000000) * 100;
+    }
+
+  /// @}
+  /// @name Implementation
+  /// @{
+  private:
+    /// This causes the values to be represented so that the fractional
+    /// part is minimized, possibly incrementing the seconds part.
+    /// @brief Normalize to canonical form.
+    void normalize();
+
+  /// @}
+  /// @name Data
+  /// @{
+  private:
+    /// Store the values as a <timeval>.
+    SecondsType      seconds_;///< Stores the seconds part of the TimeVal
+    NanoSecondsType  nanos_;  ///< Stores the nanoseconds part of the TimeVal
+  /// @}
+
+  };
+
+inline TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2) {
+  TimeValue sum (tv1.seconds_ + tv2.seconds_, tv1.nanos_ + tv2.nanos_);
+  sum.normalize ();
+  return sum;
+}
+
+inline TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2) {
+  TimeValue difference (tv1.seconds_ - tv2.seconds_, tv1.nanos_ - tv2.nanos_ );
+  difference.normalize ();
+  return difference;
+}
+
+}
+}
+
+#endif
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index f959136f86a0..404cb6d6c8b6 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_SUPPORT_TIMER_H
 #define LLVM_SUPPORT_TIMER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
diff --git a/include/llvm/Support/ToolOutputFile.h b/include/llvm/Support/ToolOutputFile.h
new file mode 100644
index 000000000000..65b182a24535
--- /dev/null
+++ b/include/llvm/Support/ToolOutputFile.h
@@ -0,0 +1,62 @@
+//===- ToolOutputFile.h - Output files for compiler-like tools -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TOOL_OUTPUT_FILE_H
+#define LLVM_SUPPORT_TOOL_OUTPUT_FILE_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// tool_output_file - This class contains a raw_fd_ostream and adds a
+/// few extra features commonly needed for compiler-like tool output files:
+///   - The file is automatically deleted if the process is killed.
+///   - The file is automatically deleted when the tool_output_file
+///     object is destroyed unless the client calls keep().
+class tool_output_file {
+  /// Installer - This class is declared before the raw_fd_ostream so that
+  /// it is constructed before the raw_fd_ostream is constructed and
+  /// destructed after the raw_fd_ostream is destructed. It installs
+  /// cleanups in its constructor and uninstalls them in its destructor.
+  class CleanupInstaller {
+    /// Filename - The name of the file.
+    std::string Filename;
+  public:
+    /// Keep - The flag which indicates whether we should not delete the file.
+    bool Keep;
+
+    explicit CleanupInstaller(const char *filename);
+    ~CleanupInstaller();
+  } Installer;
+
+  /// OS - The contained stream. This is intentionally declared after
+  /// Installer.
+  raw_fd_ostream OS;
+
+public:
+  /// tool_output_file - This constructor's arguments are passed to
+  /// to raw_fd_ostream's constructor.
+  tool_output_file(const char *filename, std::string &ErrorInfo,
+                   unsigned Flags = 0);
+
+  /// os - Return the contained raw_fd_ostream.
+  raw_fd_ostream &os() { return OS; }
+
+  /// keep - Indicate that the tool's job wrt this output file has been
+  /// successful and the file should not be deleted.
+  void keep() { Installer.Keep = true; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/Support/TypeBuilder.h b/include/llvm/Support/TypeBuilder.h
index 81c2747b6c05..ea63da00edcd 100644
--- a/include/llvm/Support/TypeBuilder.h
+++ b/include/llvm/Support/TypeBuilder.h
@@ -88,6 +88,8 @@ class ieee_double {};
 class x86_fp80 {};
 class fp128 {};
 class ppc_fp128 {};
+// X86 MMX.
+class x86_mmx {};
 }  // namespace types
 
 // LLVM doesn't have const or volatile types.
@@ -219,6 +221,10 @@ template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
 public:
   static const Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
 };
+template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
+};
 
 template<bool cross> class TypeBuilder<void, cross> {
 public:
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
new file mode 100644
index 000000000000..7662eaaff5a9
--- /dev/null
+++ b/include/llvm/Support/Valgrind.h
@@ -0,0 +1,32 @@
+//===- llvm/Support/Valgrind.h - Communication with Valgrind -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods for communicating with a valgrind instance this program is running
+// under.  These are all no-ops unless LLVM was configured on a system with the
+// valgrind headers installed and valgrind is controlling this process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_VALGRIND_H
+#define LLVM_SYSTEM_VALGRIND_H
+
+#include <stddef.h>
+
+namespace llvm {
+namespace sys {
+  // True if Valgrind is controlling this process.
+  bool RunningOnValgrind();
+
+  // Discard valgrind's translation of code in the range [Addr .. Addr + Len).
+  // Otherwise valgrind may continue to execute the old version of the code.
+  void ValgrindDiscardTranslations(const void *Addr, size_t Len);
+}
+}
+
+#endif
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 39bdbd804c27..6bfae5e29822 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -15,7 +15,7 @@
 #define LLVM_SUPPORT_RAW_OSTREAM_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class format_object_base;
@@ -165,7 +165,7 @@ public:
   }
 
   raw_ostream &operator<<(const char *Str) {
-    // Inline fast path, particulary for constant strings where a sufficiently
+    // Inline fast path, particularly for constant strings where a sufficiently
     // smart compiler will simplify strlen.
 
     return this->operator<<(StringRef(Str));
@@ -196,7 +196,7 @@ public:
 
   /// write_escaped - Output \arg Str, turning '\\', '\t', '\n', '"', and
   /// anything that doesn't satisfy std::isprint into an escape sequence.
-  raw_ostream &write_escaped(StringRef Str);
+  raw_ostream &write_escaped(StringRef Str, bool UseHexEscapes = false);
 
   raw_ostream &write(unsigned char C);
   raw_ostream &write(const char *Ptr, size_t Size);
@@ -301,6 +301,10 @@ class raw_fd_ostream : public raw_ostream {
   ///
   bool Error;
 
+  /// Controls whether the stream should attempt to use atomic writes, when
+  /// possible.
+  bool UseAtomicWrites;
+
   uint64_t pos;
 
   /// write_impl - See raw_ostream::write_impl.
@@ -349,10 +353,7 @@ public:
 
   /// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
   /// ShouldClose is true, this closes the file when the stream is destroyed.
-  raw_fd_ostream(int fd, bool shouldClose,
-                 bool unbuffered=false) : raw_ostream(unbuffered), FD(fd),
-                                          ShouldClose(shouldClose),
-                                          Error(false) {}
+  raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false);
 
   ~raw_fd_ostream();
 
@@ -361,9 +362,19 @@ public:
   void close();
 
   /// seek - Flushes the stream and repositions the underlying file descriptor
-  /// positition to the offset specified from the beginning of the file.
+  /// position to the offset specified from the beginning of the file.
   uint64_t seek(uint64_t off);
 
+  /// SetUseAtomicWrite - Set the stream to attempt to use atomic writes for
+  /// individual output routines where possible.
+  ///
+  /// Note that because raw_ostream's are typically buffered, this flag is only
+  /// sensible when used on unbuffered streams which will flush their output
+  /// immediately.
+  void SetUseAtomicWrites(bool Value) {
+    UseAtomicWrites = Value;
+  }
+
   virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
                                    bool bg=false);
   virtual raw_ostream &resetColor();
@@ -475,45 +486,6 @@ public:
   ~raw_null_ostream();
 };
 
-/// tool_output_file - This class contains a raw_fd_ostream and adds a
-/// few extra features commonly needed for compiler-like tool output files:
-///   - The file is automatically deleted if the process is killed.
-///   - The file is automatically deleted when the tool_output_file
-///     object is destroyed unless the client calls keep().
-class tool_output_file {
-  /// Installer - This class is declared before the raw_fd_ostream so that
-  /// it is constructed before the raw_fd_ostream is constructed and
-  /// destructed after the raw_fd_ostream is destructed. It installs
-  /// cleanups in its constructor and uninstalls them in its destructor.
-  class CleanupInstaller {
-    /// Filename - The name of the file.
-    std::string Filename;
-  public:
-    /// Keep - The flag which indicates whether we should not delete the file.
-    bool Keep;
-
-    explicit CleanupInstaller(const char *filename);
-    ~CleanupInstaller();
-  } Installer;
-
-  /// OS - The contained stream. This is intentionally declared after
-  /// Installer.
-  raw_fd_ostream OS;
-
-public:
-  /// tool_output_file - This constructor's arguments are passed to
-  /// to raw_fd_ostream's constructor.
-  tool_output_file(const char *filename, std::string &ErrorInfo,
-                   unsigned Flags = 0);
-
-  /// os - Return the contained raw_fd_ostream.
-  raw_fd_ostream &os() { return OS; }
-
-  /// keep - Indicate that the tool's job wrt this output file has been
-  /// successful and the file should not be deleted.
-  void keep() { Installer.Keep = true; }
-};
-
 } // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
new file mode 100644
index 000000000000..e5306ecfb35c
--- /dev/null
+++ b/include/llvm/Support/system_error.h
@@ -0,0 +1,910 @@
+//===---------------------------- system_error ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03. This is called
+// system_error even though it does not define that class because that's what
+// it's called in C++0x. We don't define system_error because it is only used
+// for exception handling, which we don't use in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_SYSTEM_ERROR_H
+#define LLVM_SYSTEM_SYSTEM_ERROR_H
+
+/*
+    system_error synopsis
+
+namespace std
+{
+
+class error_category
+{
+public:
+    virtual ~error_category();
+
+    error_category(const error_category&) = delete;
+    error_category& operator=(const error_category&) = delete;
+
+    virtual const char* name() const = 0;
+    virtual error_condition default_error_condition(int ev) const;
+    virtual bool equivalent(int code, const error_condition& condition) const;
+    virtual bool equivalent(const error_code& code, int condition) const;
+    virtual std::string message(int ev) const = 0;
+
+    bool operator==(const error_category& rhs) const;
+    bool operator!=(const error_category& rhs) const;
+    bool operator<(const error_category& rhs) const;
+};
+
+const error_category& generic_category();
+const error_category& system_category();
+
+template <class T> struct is_error_code_enum
+    : public false_type {};
+
+template <class T> struct is_error_condition_enum
+    : public false_type {};
+
+class error_code
+{
+public:
+    // constructors:
+    error_code();
+    error_code(int val, const error_category& cat);
+    template <class ErrorCodeEnum>
+        error_code(ErrorCodeEnum e);
+
+    // modifiers:
+    void assign(int val, const error_category& cat);
+    template <class ErrorCodeEnum>
+        error_code& operator=(ErrorCodeEnum e);
+    void clear();
+
+    // observers:
+    int value() const;
+    const error_category& category() const;
+    error_condition default_error_condition() const;
+    std::string message() const;
+    explicit operator bool() const;
+};
+
+// non-member functions:
+bool operator<(const error_code& lhs, const error_code& rhs);
+template <class charT, class traits>
+    basic_ostream<charT,traits>&
+    operator<<(basic_ostream<charT,traits>& os, const error_code& ec);
+
+class error_condition
+{
+public:
+    // constructors:
+    error_condition();
+    error_condition(int val, const error_category& cat);
+    template <class ErrorConditionEnum>
+        error_condition(ErrorConditionEnum e);
+
+    // modifiers:
+    void assign(int val, const error_category& cat);
+    template <class ErrorConditionEnum>
+        error_condition& operator=(ErrorConditionEnum e);
+    void clear();
+
+    // observers:
+    int value() const;
+    const error_category& category() const;
+    std::string message() const;
+    explicit operator bool() const;
+};
+
+bool operator<(const error_condition& lhs, const error_condition& rhs);
+
+class system_error
+    : public runtime_error
+{
+public:
+    system_error(error_code ec, const std::string& what_arg);
+    system_error(error_code ec, const char* what_arg);
+    system_error(error_code ec);
+    system_error(int ev, const error_category& ecat, const std::string& what_arg);
+    system_error(int ev, const error_category& ecat, const char* what_arg);
+    system_error(int ev, const error_category& ecat);
+
+    const error_code& code() const throw();
+    const char* what() const throw();
+};
+
+enum class errc
+{
+    address_family_not_supported,       // EAFNOSUPPORT
+    address_in_use,                     // EADDRINUSE
+    address_not_available,              // EADDRNOTAVAIL
+    already_connected,                  // EISCONN
+    argument_list_too_long,             // E2BIG
+    argument_out_of_domain,             // EDOM
+    bad_address,                        // EFAULT
+    bad_file_descriptor,                // EBADF
+    bad_message,                        // EBADMSG
+    broken_pipe,                        // EPIPE
+    connection_aborted,                 // ECONNABORTED
+    connection_already_in_progress,     // EALREADY
+    connection_refused,                 // ECONNREFUSED
+    connection_reset,                   // ECONNRESET
+    cross_device_link,                  // EXDEV
+    destination_address_required,       // EDESTADDRREQ
+    device_or_resource_busy,            // EBUSY
+    directory_not_empty,                // ENOTEMPTY
+    executable_format_error,            // ENOEXEC
+    file_exists,                        // EEXIST
+    file_too_large,                     // EFBIG
+    filename_too_long,                  // ENAMETOOLONG
+    function_not_supported,             // ENOSYS
+    host_unreachable,                   // EHOSTUNREACH
+    identifier_removed,                 // EIDRM
+    illegal_byte_sequence,              // EILSEQ
+    inappropriate_io_control_operation, // ENOTTY
+    interrupted,                        // EINTR
+    invalid_argument,                   // EINVAL
+    invalid_seek,                       // ESPIPE
+    io_error,                           // EIO
+    is_a_directory,                     // EISDIR
+    message_size,                       // EMSGSIZE
+    network_down,                       // ENETDOWN
+    network_reset,                      // ENETRESET
+    network_unreachable,                // ENETUNREACH
+    no_buffer_space,                    // ENOBUFS
+    no_child_process,                   // ECHILD
+    no_link,                            // ENOLINK
+    no_lock_available,                  // ENOLCK
+    no_message_available,               // ENODATA
+    no_message,                         // ENOMSG
+    no_protocol_option,                 // ENOPROTOOPT
+    no_space_on_device,                 // ENOSPC
+    no_stream_resources,                // ENOSR
+    no_such_device_or_address,          // ENXIO
+    no_such_device,                     // ENODEV
+    no_such_file_or_directory,          // ENOENT
+    no_such_process,                    // ESRCH
+    not_a_directory,                    // ENOTDIR
+    not_a_socket,                       // ENOTSOCK
+    not_a_stream,                       // ENOSTR
+    not_connected,                      // ENOTCONN
+    not_enough_memory,                  // ENOMEM
+    not_supported,                      // ENOTSUP
+    operation_canceled,                 // ECANCELED
+    operation_in_progress,              // EINPROGRESS
+    operation_not_permitted,            // EPERM
+    operation_not_supported,            // EOPNOTSUPP
+    operation_would_block,              // EWOULDBLOCK
+    owner_dead,                         // EOWNERDEAD
+    permission_denied,                  // EACCES
+    protocol_error,                     // EPROTO
+    protocol_not_supported,             // EPROTONOSUPPORT
+    read_only_file_system,              // EROFS
+    resource_deadlock_would_occur,      // EDEADLK
+    resource_unavailable_try_again,     // EAGAIN
+    result_out_of_range,                // ERANGE
+    state_not_recoverable,              // ENOTRECOVERABLE
+    stream_timeout,                     // ETIME
+    text_file_busy,                     // ETXTBSY
+    timed_out,                          // ETIMEDOUT
+    too_many_files_open_in_system,      // ENFILE
+    too_many_files_open,                // EMFILE
+    too_many_links,                     // EMLINK
+    too_many_symbolic_link_levels,      // ELOOP
+    value_too_large,                    // EOVERFLOW
+    wrong_protocol_type                 // EPROTOTYPE
+};
+
+template <> struct is_error_condition_enum<errc> : true_type { }
+
+error_code make_error_code(errc e);
+error_condition make_error_condition(errc e);
+
+// Comparison operators:
+bool operator==(const error_code& lhs, const error_code& rhs);
+bool operator==(const error_code& lhs, const error_condition& rhs);
+bool operator==(const error_condition& lhs, const error_code& rhs);
+bool operator==(const error_condition& lhs, const error_condition& rhs);
+bool operator!=(const error_code& lhs, const error_code& rhs);
+bool operator!=(const error_code& lhs, const error_condition& rhs);
+bool operator!=(const error_condition& lhs, const error_code& rhs);
+bool operator!=(const error_condition& lhs, const error_condition& rhs);
+
+template <> struct hash<std::error_code>;
+
+}  // std
+
+*/
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/type_traits.h"
+#include <cerrno>
+#include <string>
+
+// This must be here instead of a .inc file because it is used in the definition
+// of the enum values below.
+#ifdef LLVM_ON_WIN32
+
+  // The following numbers were taken from VS2010.
+# ifndef EAFNOSUPPORT
+#   define EAFNOSUPPORT 102
+# endif
+# ifndef EADDRINUSE
+#   define EADDRINUSE 100
+# endif
+# ifndef EADDRNOTAVAIL
+#   define EADDRNOTAVAIL 101
+# endif
+# ifndef EISCONN
+#   define EISCONN 113
+# endif
+# ifndef E2BIG
+#   define E2BIG 7
+# endif
+# ifndef EDOM
+#   define EDOM 33
+# endif
+# ifndef EFAULT
+#   define EFAULT 14
+# endif
+# ifndef EBADF
+#   define EBADF 9
+# endif
+# ifndef EBADMSG
+#   define EBADMSG 104
+# endif
+# ifndef EPIPE
+#   define EPIPE 32
+# endif
+# ifndef ECONNABORTED
+#   define ECONNABORTED 106
+# endif
+# ifndef EALREADY
+#   define EALREADY 103
+# endif
+# ifndef ECONNREFUSED
+#   define ECONNREFUSED 107
+# endif
+# ifndef ECONNRESET
+#   define ECONNRESET 108
+# endif
+# ifndef EXDEV
+#   define EXDEV 18
+# endif
+# ifndef EDESTADDRREQ
+#   define EDESTADDRREQ 109
+# endif
+# ifndef EBUSY
+#   define EBUSY 16
+# endif
+# ifndef ENOTEMPTY
+#   define ENOTEMPTY 41
+# endif
+# ifndef ENOEXEC
+#   define ENOEXEC 8
+# endif
+# ifndef EEXIST
+#   define EEXIST 17
+# endif
+# ifndef EFBIG
+#   define EFBIG 27
+# endif
+# ifndef ENAMETOOLONG
+#   define ENAMETOOLONG 38
+# endif
+# ifndef ENOSYS
+#   define ENOSYS 40
+# endif
+# ifndef EHOSTUNREACH
+#   define EHOSTUNREACH 110
+# endif
+# ifndef EIDRM
+#   define EIDRM 111
+# endif
+# ifndef EILSEQ
+#   define EILSEQ 42
+# endif
+# ifndef ENOTTY
+#   define ENOTTY 25
+# endif
+# ifndef EINTR
+#   define EINTR 4
+# endif
+# ifndef EINVAL
+#   define EINVAL 22
+# endif
+# ifndef ESPIPE
+#   define ESPIPE 29
+# endif
+# ifndef EIO
+#   define EIO 5
+# endif
+# ifndef EISDIR
+#   define EISDIR 21
+# endif
+# ifndef EMSGSIZE
+#   define EMSGSIZE 115
+# endif
+# ifndef ENETDOWN
+#   define ENETDOWN 116
+# endif
+# ifndef ENETRESET
+#   define ENETRESET 117
+# endif
+# ifndef ENETUNREACH
+#   define ENETUNREACH 118
+# endif
+# ifndef ENOBUFS
+#   define ENOBUFS 119
+# endif
+# ifndef ECHILD
+#   define ECHILD 10
+# endif
+# ifndef ENOLINK
+#   define ENOLINK 121
+# endif
+# ifndef ENOLCK
+#   define ENOLCK 39
+# endif
+# ifndef ENODATA
+#   define ENODATA 120
+# endif
+# ifndef ENOMSG
+#   define ENOMSG 122
+# endif
+# ifndef ENOPROTOOPT
+#   define ENOPROTOOPT 123
+# endif
+# ifndef ENOSPC
+#   define ENOSPC 28
+# endif
+# ifndef ENOSR
+#   define ENOSR 124
+# endif
+# ifndef ENXIO
+#   define ENXIO 6
+# endif
+# ifndef ENODEV
+#   define ENODEV 19
+# endif
+# ifndef ENOENT
+#   define ENOENT 2
+# endif
+# ifndef ESRCH
+#   define ESRCH 3
+# endif
+# ifndef ENOTDIR
+#   define ENOTDIR 20
+# endif
+# ifndef ENOTSOCK
+#   define ENOTSOCK 128
+# endif
+# ifndef ENOSTR
+#   define ENOSTR 125
+# endif
+# ifndef ENOTCONN
+#   define ENOTCONN 126
+# endif
+# ifndef ENOMEM
+#   define ENOMEM 12
+# endif
+# ifndef ENOTSUP
+#   define ENOTSUP 129
+# endif
+# ifndef ECANCELED
+#   define ECANCELED 105
+# endif
+# ifndef EINPROGRESS
+#   define EINPROGRESS 112
+# endif
+# ifndef EPERM
+#   define EPERM 1
+# endif
+# ifndef EOPNOTSUPP
+#   define EOPNOTSUPP 130
+# endif
+# ifndef EWOULDBLOCK
+#   define EWOULDBLOCK 140
+# endif
+# ifndef EOWNERDEAD
+#   define EOWNERDEAD 133
+# endif
+# ifndef EACCES
+#   define EACCES 13
+# endif
+# ifndef EPROTO
+#   define EPROTO 134
+# endif
+# ifndef EPROTONOSUPPORT
+#   define EPROTONOSUPPORT 135
+# endif
+# ifndef EROFS
+#   define EROFS 30
+# endif
+# ifndef EDEADLK
+#   define EDEADLK 36
+# endif
+# ifndef EAGAIN
+#   define EAGAIN 11
+# endif
+# ifndef ERANGE
+#   define ERANGE 34
+# endif
+# ifndef ENOTRECOVERABLE
+#   define ENOTRECOVERABLE 127
+# endif
+# ifndef ETIME
+#   define ETIME 137
+# endif
+# ifndef ETXTBSY
+#   define ETXTBSY 139
+# endif
+# ifndef ETIMEDOUT
+#   define ETIMEDOUT 138
+# endif
+# ifndef ENFILE
+#   define ENFILE 23
+# endif
+# ifndef EMFILE
+#   define EMFILE 24
+# endif
+# ifndef EMLINK
+#   define EMLINK 31
+# endif
+# ifndef ELOOP
+#   define ELOOP 114
+# endif
+# ifndef EOVERFLOW
+#   define EOVERFLOW 132
+# endif
+# ifndef EPROTOTYPE
+#   define EPROTOTYPE 136
+# endif
+#endif
+
+namespace llvm {
+
+template <class T, T v>
+struct integral_constant {
+  typedef T value_type;
+  static const value_type value = v;
+  typedef integral_constant<T,v> type;
+  operator value_type() { return value; }
+};
+
+typedef integral_constant<bool, true> true_type;
+typedef integral_constant<bool, false> false_type;
+
+// is_error_code_enum
+
+template <class Tp> struct is_error_code_enum : public false_type {};
+
+// is_error_condition_enum
+
+template <class Tp> struct is_error_condition_enum : public false_type {};
+
+// Some error codes are not present on all platforms, so we provide equivalents
+// for them:
+
+//enum class errc
+struct errc {
+enum _ {
+  success                             = 0,
+  address_family_not_supported        = EAFNOSUPPORT,
+  address_in_use                      = EADDRINUSE,
+  address_not_available               = EADDRNOTAVAIL,
+  already_connected                   = EISCONN,
+  argument_list_too_long              = E2BIG,
+  argument_out_of_domain              = EDOM,
+  bad_address                         = EFAULT,
+  bad_file_descriptor                 = EBADF,
+#ifdef EBADMSG
+  bad_message                         = EBADMSG,
+#else
+  bad_message                         = EINVAL,
+#endif
+  broken_pipe                         = EPIPE,
+  connection_aborted                  = ECONNABORTED,
+  connection_already_in_progress      = EALREADY,
+  connection_refused                  = ECONNREFUSED,
+  connection_reset                    = ECONNRESET,
+  cross_device_link                   = EXDEV,
+  destination_address_required        = EDESTADDRREQ,
+  device_or_resource_busy             = EBUSY,
+  directory_not_empty                 = ENOTEMPTY,
+  executable_format_error             = ENOEXEC,
+  file_exists                         = EEXIST,
+  file_too_large                      = EFBIG,
+  filename_too_long                   = ENAMETOOLONG,
+  function_not_supported              = ENOSYS,
+  host_unreachable                    = EHOSTUNREACH,
+  identifier_removed                  = EIDRM,
+  illegal_byte_sequence               = EILSEQ,
+  inappropriate_io_control_operation  = ENOTTY,
+  interrupted                         = EINTR,
+  invalid_argument                    = EINVAL,
+  invalid_seek                        = ESPIPE,
+  io_error                            = EIO,
+  is_a_directory                      = EISDIR,
+  message_size                        = EMSGSIZE,
+  network_down                        = ENETDOWN,
+  network_reset                       = ENETRESET,
+  network_unreachable                 = ENETUNREACH,
+  no_buffer_space                     = ENOBUFS,
+  no_child_process                    = ECHILD,
+#ifdef ENOLINK
+  no_link                             = ENOLINK,
+#else
+  no_link                             = EINVAL,
+#endif
+  no_lock_available                   = ENOLCK,
+#ifdef ENODATA
+  no_message_available                = ENODATA,
+#else
+  no_message_available                = ENOMSG,
+#endif
+  no_message                          = ENOMSG,
+  no_protocol_option                  = ENOPROTOOPT,
+  no_space_on_device                  = ENOSPC,
+#ifdef ENOSR
+  no_stream_resources                 = ENOSR,
+#else
+  no_stream_resources                 = ENOMEM,
+#endif
+  no_such_device_or_address           = ENXIO,
+  no_such_device                      = ENODEV,
+  no_such_file_or_directory           = ENOENT,
+  no_such_process                     = ESRCH,
+  not_a_directory                     = ENOTDIR,
+  not_a_socket                        = ENOTSOCK,
+#ifdef ENOSTR
+  not_a_stream                        = ENOSTR,
+#else
+  not_a_stream                        = EINVAL,
+#endif
+  not_connected                       = ENOTCONN,
+  not_enough_memory                   = ENOMEM,
+  not_supported                       = ENOTSUP,
+#ifdef ECANCELED
+  operation_canceled                  = ECANCELED,
+#else
+  operation_canceled                  = EINVAL,
+#endif
+  operation_in_progress               = EINPROGRESS,
+  operation_not_permitted             = EPERM,
+  operation_not_supported             = EOPNOTSUPP,
+  operation_would_block               = EWOULDBLOCK,
+#ifdef EOWNERDEAD
+  owner_dead                          = EOWNERDEAD,
+#else
+  owner_dead                          = EINVAL,
+#endif
+  permission_denied                   = EACCES,
+#ifdef EPROTO
+  protocol_error                      = EPROTO,
+#else
+  protocol_error                      = EINVAL,
+#endif
+  protocol_not_supported              = EPROTONOSUPPORT,
+  read_only_file_system               = EROFS,
+  resource_deadlock_would_occur       = EDEADLK,
+  resource_unavailable_try_again      = EAGAIN,
+  result_out_of_range                 = ERANGE,
+#ifdef ENOTRECOVERABLE
+  state_not_recoverable               = ENOTRECOVERABLE,
+#else
+  state_not_recoverable               = EINVAL,
+#endif
+#ifdef ETIME
+  stream_timeout                      = ETIME,
+#else
+  stream_timeout                      = ETIMEDOUT,
+#endif
+  text_file_busy                      = ETXTBSY,
+  timed_out                           = ETIMEDOUT,
+  too_many_files_open_in_system       = ENFILE,
+  too_many_files_open                 = EMFILE,
+  too_many_links                      = EMLINK,
+  too_many_symbolic_link_levels       = ELOOP,
+  value_too_large                     = EOVERFLOW,
+  wrong_protocol_type                 = EPROTOTYPE
+};
+
+  _ v_;
+
+  errc(_ v) : v_(v) {}
+  operator int() const {return v_;}
+};
+
+template <> struct is_error_condition_enum<errc> : true_type { };
+
+template <> struct is_error_condition_enum<errc::_> : true_type { };
+
+class error_condition;
+class error_code;
+
+// class error_category
+
+class _do_message;
+
+class error_category
+{
+public:
+  virtual ~error_category();
+
+private:
+  error_category();
+  error_category(const error_category&);// = delete;
+  error_category& operator=(const error_category&);// = delete;
+
+public:
+  virtual const char* name() const = 0;
+  virtual error_condition default_error_condition(int _ev) const;
+  virtual bool equivalent(int _code, const error_condition& _condition) const;
+  virtual bool equivalent(const error_code& _code, int _condition) const;
+  virtual std::string message(int _ev) const = 0;
+
+  bool operator==(const error_category& _rhs) const {return this == &_rhs;}
+
+  bool operator!=(const error_category& _rhs) const {return !(*this == _rhs);}
+
+  bool operator< (const error_category& _rhs) const {return this < &_rhs;}
+
+  friend class _do_message;
+};
+
+class _do_message : public error_category
+{
+public:
+  virtual std::string message(int ev) const;
+};
+
+const error_category& generic_category();
+const error_category& system_category();
+
+/// Get the error_category used for errno values from POSIX functions. This is
+/// the same as the system_category on POISIX systems, but is the same as the
+/// generic_category on Windows.
+const error_category& posix_category();
+
+class error_condition
+{
+  int _val_;
+  const error_category* _cat_;
+public:
+  error_condition() : _val_(0), _cat_(&generic_category()) {}
+
+  error_condition(int _val, const error_category& _cat)
+    : _val_(_val), _cat_(&_cat) {}
+
+  template <class E>
+  error_condition(E _e, typename enable_if_c<
+                          is_error_condition_enum<E>::value
+                        >::type* = 0)
+    {*this = make_error_condition(_e);}
+
+  void assign(int _val, const error_category& _cat) {
+    _val_ = _val;
+    _cat_ = &_cat;
+  }
+
+  template <class E>
+    typename enable_if_c
+    <
+      is_error_condition_enum<E>::value,
+      error_condition&
+    >::type
+    operator=(E _e)
+      {*this = make_error_condition(_e); return *this;}
+
+  void clear() {
+    _val_ = 0;
+    _cat_ = &generic_category();
+  }
+
+  int value() const {return _val_;}
+
+  const error_category& category() const {return *_cat_;}
+  std::string message() const;
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  operator unspecified_bool_type() const { // true if error
+    return _val_ == 0 ? 0 : unspecified_bool_true;
+  }
+};
+
+inline error_condition make_error_condition(errc _e) {
+  return error_condition(static_cast<int>(_e), generic_category());
+}
+
+inline bool operator<(const error_condition& _x, const error_condition& _y) {
+  return _x.category() < _y.category()
+      || (_x.category() == _y.category() && _x.value() < _y.value());
+}
+
+// error_code
+
+class error_code {
+  int _val_;
+  const error_category* _cat_;
+public:
+  error_code() : _val_(0), _cat_(&system_category()) {}
+
+  error_code(int _val, const error_category& _cat)
+    : _val_(_val), _cat_(&_cat) {}
+
+  template <class E>
+  error_code(E _e, typename enable_if_c<
+                     is_error_code_enum<E>::value
+                   >::type* = 0) {
+    *this = make_error_code(_e);
+  }
+
+  void assign(int _val, const error_category& _cat) {
+      _val_ = _val;
+      _cat_ = &_cat;
+  }
+
+  template <class E>
+    typename enable_if_c
+    <
+      is_error_code_enum<E>::value,
+      error_code&
+    >::type
+    operator=(E _e)
+      {*this = make_error_code(_e); return *this;}
+
+  void clear() {
+    _val_ = 0;
+    _cat_ = &system_category();
+  }
+
+  int value() const {return _val_;}
+
+  const error_category& category() const {return *_cat_;}
+
+  error_condition default_error_condition() const
+    {return _cat_->default_error_condition(_val_);}
+
+  std::string message() const;
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  operator unspecified_bool_type() const { // true if error
+    return _val_ == 0 ? 0 : unspecified_bool_true;
+  }
+};
+
+inline error_code make_error_code(errc _e) {
+  return error_code(static_cast<int>(_e), generic_category());
+}
+
+inline bool operator<(const error_code& _x, const error_code& _y) {
+  return _x.category() < _y.category()
+      || (_x.category() == _y.category() && _x.value() < _y.value());
+}
+
+inline bool operator==(const error_code& _x, const error_code& _y) {
+  return _x.category() == _y.category() && _x.value() == _y.value();
+}
+
+inline bool operator==(const error_code& _x, const error_condition& _y) {
+  return _x.category().equivalent(_x.value(), _y)
+      || _y.category().equivalent(_x, _y.value());
+}
+
+inline bool operator==(const error_condition& _x, const error_code& _y) {
+  return _y == _x;
+}
+
+inline bool operator==(const error_condition& _x, const error_condition& _y) {
+   return _x.category() == _y.category() && _x.value() == _y.value();
+}
+
+inline bool operator!=(const error_code& _x, const error_code& _y) {
+  return !(_x == _y);
+}
+
+inline bool operator!=(const error_code& _x, const error_condition& _y) {
+  return !(_x == _y);
+}
+
+inline bool operator!=(const error_condition& _x, const error_code& _y) {
+  return !(_x == _y);
+}
+
+inline bool operator!=(const error_condition& _x, const error_condition& _y) {
+  return !(_x == _y);
+}
+
+// Windows errors.
+
+//  To construct an error_code after an API error:
+//
+//      error_code( ::GetLastError(), system_category() )
+struct windows_error {
+enum _ {
+  success = 0,
+  // These names and values are based on Windows WinError.h
+  // This is not a complete list. Add to this list if you need to explicitly
+  // check for it.
+  invalid_function        = 1, // ERROR_INVALID_FUNCTION,
+  file_not_found          = 2, // ERROR_FILE_NOT_FOUND,
+  path_not_found          = 3, // ERROR_PATH_NOT_FOUND,
+  too_many_open_files     = 4, // ERROR_TOO_MANY_OPEN_FILES,
+  access_denied           = 5, // ERROR_ACCESS_DENIED,
+  invalid_handle          = 6, // ERROR_INVALID_HANDLE,
+  arena_trashed           = 7, // ERROR_ARENA_TRASHED,
+  not_enough_memory       = 8, // ERROR_NOT_ENOUGH_MEMORY,
+  invalid_block           = 9, // ERROR_INVALID_BLOCK,
+  bad_environment         = 10, // ERROR_BAD_ENVIRONMENT,
+  bad_format              = 11, // ERROR_BAD_FORMAT,
+  invalid_access          = 12, // ERROR_INVALID_ACCESS,
+  outofmemory             = 14, // ERROR_OUTOFMEMORY,
+  invalid_drive           = 15, // ERROR_INVALID_DRIVE,
+  current_directory       = 16, // ERROR_CURRENT_DIRECTORY,
+  not_same_device         = 17, // ERROR_NOT_SAME_DEVICE,
+  no_more_files           = 18, // ERROR_NO_MORE_FILES,
+  write_protect           = 19, // ERROR_WRITE_PROTECT,
+  bad_unit                = 20, // ERROR_BAD_UNIT,
+  not_ready               = 21, // ERROR_NOT_READY,
+  bad_command             = 22, // ERROR_BAD_COMMAND,
+  crc                     = 23, // ERROR_CRC,
+  bad_length              = 24, // ERROR_BAD_LENGTH,
+  seek                    = 25, // ERROR_SEEK,
+  not_dos_disk            = 26, // ERROR_NOT_DOS_DISK,
+  sector_not_found        = 27, // ERROR_SECTOR_NOT_FOUND,
+  out_of_paper            = 28, // ERROR_OUT_OF_PAPER,
+  write_fault             = 29, // ERROR_WRITE_FAULT,
+  read_fault              = 30, // ERROR_READ_FAULT,
+  gen_failure             = 31, // ERROR_GEN_FAILURE,
+  sharing_violation       = 32, // ERROR_SHARING_VIOLATION,
+  lock_violation          = 33, // ERROR_LOCK_VIOLATION,
+  wrong_disk              = 34, // ERROR_WRONG_DISK,
+  sharing_buffer_exceeded = 36, // ERROR_SHARING_BUFFER_EXCEEDED,
+  handle_eof              = 38, // ERROR_HANDLE_EOF,
+  handle_disk_full        = 39, // ERROR_HANDLE_DISK_FULL,
+  rem_not_list            = 51, // ERROR_REM_NOT_LIST,
+  dup_name                = 52, // ERROR_DUP_NAME,
+  bad_net_path            = 53, // ERROR_BAD_NETPATH,
+  network_busy            = 54, // ERROR_NETWORK_BUSY,
+  file_exists             = 80, // ERROR_FILE_EXISTS,
+  cannot_make             = 82, // ERROR_CANNOT_MAKE,
+  broken_pipe             = 109, // ERROR_BROKEN_PIPE,
+  open_failed             = 110, // ERROR_OPEN_FAILED,
+  buffer_overflow         = 111, // ERROR_BUFFER_OVERFLOW,
+  disk_full               = 112, // ERROR_DISK_FULL,
+  insufficient_buffer     = 122, // ERROR_INSUFFICIENT_BUFFER,
+  lock_failed             = 167, // ERROR_LOCK_FAILED,
+  busy                    = 170, // ERROR_BUSY,
+  cancel_violation        = 173, // ERROR_CANCEL_VIOLATION,
+  already_exists          = 183  // ERROR_ALREADY_EXISTS
+};
+  _ v_;
+
+  windows_error(_ v) : v_(v) {}
+  explicit windows_error(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+};
+
+
+template <> struct is_error_code_enum<windows_error> : true_type { };
+
+template <> struct is_error_code_enum<windows_error::_> : true_type { };
+
+inline error_code make_error_code(windows_error e) {
+  return error_code(static_cast<int>(e), system_category());
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/System/AIXDataTypesFix.h b/include/llvm/System/AIXDataTypesFix.h
deleted file mode 100644
index 8dbf02f28269..000000000000
--- a/include/llvm/System/AIXDataTypesFix.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- llvm/System/AIXDataTypesFix.h - Fix datatype defs ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file overrides default system-defined types and limits which cannot be
-// done in DataTypes.h.in because it is processed by autoheader first, which
-// comments out any #undef statement
-//
-//===----------------------------------------------------------------------===//
-
-// No include guards desired!
-
-#ifndef SUPPORT_DATATYPES_H
-#error "AIXDataTypesFix.h must only be included via DataTypes.h!"
-#endif
-
-// GCC is strict about defining large constants: they must have LL modifier.
-// These will be defined properly at the end of DataTypes.h
-#undef INT64_MAX
-#undef INT64_MIN
diff --git a/include/llvm/System/Alarm.h b/include/llvm/System/Alarm.h
deleted file mode 100644
index 7c284167c2ce..000000000000
--- a/include/llvm/System/Alarm.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//===- llvm/System/Alarm.h - Alarm Generation support  ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides an operating system independent interface to alarm(2)
-// type functionality. The Alarm class allows a one-shot alarm to be set up
-// at some number of seconds in the future. When the alarm triggers, a method
-// is called to process the event
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_ALARM_H
-#define LLVM_SYSTEM_ALARM_H
-
-namespace llvm {
-namespace sys {
-
-  /// This function registers an alarm to trigger some number of \p seconds in
-  /// the future. When that time arrives, the AlarmStatus function will begin
-  /// to return 1 instead of 0. The user must poll the status of the alarm by
-  /// making occasional calls to AlarmStatus. If the user sends an interrupt
-  /// signal, AlarmStatus will begin returning -1, even if the alarm event
-  /// occurred.
-  /// @returns nothing
-  void SetupAlarm(
-    unsigned seconds ///< Number of seconds in future when alarm arrives
-  );
-
-  /// This function terminates the alarm previously set up
-  /// @returns nothing
-  void TerminateAlarm();
-
-  /// This function acquires the status of the alarm.
-  /// @returns -1=cancelled, 0=untriggered, 1=triggered
-  int AlarmStatus();
-
-  /// Sleep for n seconds. Warning: mixing calls to Sleep() and other *Alarm
-  /// calls may be a bad idea on some platforms (source: Linux man page).
-  /// @returns nothing.
-  void Sleep(unsigned n);
-
-
-} // End sys namespace
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/System/Atomic.h b/include/llvm/System/Atomic.h
deleted file mode 100644
index fc19369d11bd..000000000000
--- a/include/llvm/System/Atomic.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===- llvm/System/Atomic.h - Atomic Operations -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys atomic operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_ATOMIC_H
-#define LLVM_SYSTEM_ATOMIC_H
-
-#include "llvm/System/DataTypes.h"
-
-namespace llvm {
-  namespace sys {
-    void MemoryFence();
-
-#ifdef _MSC_VER
-    typedef long cas_flag;
-#else
-    typedef uint32_t cas_flag;
-#endif
-    cas_flag CompareAndSwap(volatile cas_flag* ptr,
-                            cas_flag new_value,
-                            cas_flag old_value);
-    cas_flag AtomicIncrement(volatile cas_flag* ptr);
-    cas_flag AtomicDecrement(volatile cas_flag* ptr);
-    cas_flag AtomicAdd(volatile cas_flag* ptr, cas_flag val);
-    cas_flag AtomicMul(volatile cas_flag* ptr, cas_flag val);
-    cas_flag AtomicDiv(volatile cas_flag* ptr, cas_flag val);
-  }
-}
-
-#endif
diff --git a/include/llvm/System/DataTypes.h.cmake b/include/llvm/System/DataTypes.h.cmake
deleted file mode 100644
index 9efe75a56ebc..000000000000
--- a/include/llvm/System/DataTypes.h.cmake
+++ /dev/null
@@ -1,189 +0,0 @@
-/*===-- include/System/DataTypes.h - Define fixed size types -----*- C -*-===*\
-|*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*|
-|*                                                                            *|
-|* This file contains definitions to figure out the size of _HOST_ data types.*|
-|* This file is important because different host OS's define different macros,*|
-|* which makes portability tough.  This file exports the following            *|
-|* definitions:                                                               *|
-|*                                                                            *|
-|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
-|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
-|*                                                                            *|
-|* No library is required when using these functinons.                        *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*/
-
-/* Please leave this file C-compatible. */
-
-#ifndef SUPPORT_DATATYPES_H
-#define SUPPORT_DATATYPES_H
-
-#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
-#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
-#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
-#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
-#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
-
-#ifdef __cplusplus
-#include <cmath>
-#else
-#include <math.h>
-#endif
-
-#ifndef _MSC_VER
-
-/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
-   being defined.  We would define it here, but in order to prevent Bad Things
-   happening when system headers or C++ STL headers include stdint.h before we
-   define it here, we define it on the g++ command line (in Makefile.rules). */
-#if !defined(__STDC_LIMIT_MACROS)
-# error "Must #define __STDC_LIMIT_MACROS before #including System/DataTypes.h"
-#endif
-
-#if !defined(__STDC_CONSTANT_MACROS)
-# error "Must #define __STDC_CONSTANT_MACROS before " \
-        "#including System/DataTypes.h"
-#endif
-
-/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-#ifdef _AIX
-#include "llvm/System/AIXDataTypesFix.h"
-#endif
-
-/* Handle incorrect definition of uint64_t as u_int64_t */
-#ifndef HAVE_UINT64_T
-#ifdef HAVE_U_INT64_T
-typedef u_int64_t uint64_t;
-#else
-# error "Don't have a definition for uint64_t on this platform"
-#endif
-#endif
-
-#ifdef _OpenBSD_
-#define INT8_MAX 127
-#define INT8_MIN -128
-#define UINT8_MAX 255
-#define INT16_MAX 32767
-#define INT16_MIN -32768
-#define UINT16_MAX 65535
-#define INT32_MAX 2147483647
-#define INT32_MIN -2147483648
-#define UINT32_MAX 4294967295U
-#endif
-
-#else /* _MSC_VER */
-/* Visual C++ doesn't provide standard integer headers, but it does provide
-   built-in data types. */
-#include <stdlib.h>
-#include <stddef.h>
-#include <sys/types.h>
-#ifdef __cplusplus
-#include <cmath>
-#else
-#include <math.h>
-#endif
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-typedef signed int int32_t;
-typedef unsigned int uint32_t;
-typedef short int16_t;
-typedef unsigned short uint16_t;
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef signed int ssize_t;
-#ifndef INT8_MAX
-# define INT8_MAX 127
-#endif
-#ifndef INT8_MIN
-# define INT8_MIN -128
-#endif
-#ifndef UINT8_MAX
-# define UINT8_MAX 255
-#endif
-#ifndef INT16_MAX
-# define INT16_MAX 32767
-#endif
-#ifndef INT16_MIN
-# define INT16_MIN -32768
-#endif
-#ifndef UINT16_MAX
-# define UINT16_MAX 65535
-#endif
-#ifndef INT32_MAX
-# define INT32_MAX 2147483647
-#endif
-#ifndef INT32_MIN
-# define INT32_MIN -2147483648
-#endif
-#ifndef UINT32_MAX
-# define UINT32_MAX 4294967295U
-#endif
-/* Certain compatibility updates to VC++ introduce the `cstdint'
- * header, which defines the INT*_C macros. On default installs they
- * are absent. */
-#ifndef INT8_C
-# define INT8_C(C)   C##i8
-#endif
-#ifndef UINT8_C
-# define UINT8_C(C)  C##ui8
-#endif
-#ifndef INT16_C
-# define INT16_C(C)  C##i16
-#endif
-#ifndef UINT16_C
-# define UINT16_C(C) C##ui16
-#endif
-#ifndef INT32_C
-# define INT32_C(C)  C##i32
-#endif
-#ifndef UINT32_C
-# define UINT32_C(C) C##ui32
-#endif
-#ifndef INT64_C
-# define INT64_C(C)  C##i64
-#endif
-#ifndef UINT64_C
-# define UINT64_C(C) C##ui64
-#endif
-#endif /* _MSC_VER */
-
-/* Set defaults for constants which we cannot find. */
-#if !defined(INT64_MAX)
-# define INT64_MAX 9223372036854775807LL
-#endif
-#if !defined(INT64_MIN)
-# define INT64_MIN ((-INT64_MAX)-1)
-#endif
-#if !defined(UINT64_MAX)
-# define UINT64_MAX 0xffffffffffffffffULL
-#endif
-
-#if __GNUC__ > 3
-#define END_WITH_NULL __attribute__((sentinel))
-#else
-#define END_WITH_NULL
-#endif
-
-#ifndef HUGE_VALF
-#define HUGE_VALF (float)HUGE_VAL
-#endif
-
-#endif  /* SUPPORT_DATATYPES_H */
diff --git a/include/llvm/System/DataTypes.h.in b/include/llvm/System/DataTypes.h.in
deleted file mode 100644
index 6537f3010fae..000000000000
--- a/include/llvm/System/DataTypes.h.in
+++ /dev/null
@@ -1,111 +0,0 @@
-/*===-- include/System/DataTypes.h - Define fixed size types -----*- C -*-===*\
-|*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*|
-|*                                                                            *|
-|* This file contains definitions to figure out the size of _HOST_ data types.*|
-|* This file is important because different host OS's define different macros,*|
-|* which makes portability tough.  This file exports the following            *|
-|* definitions:                                                               *|
-|*                                                                            *|
-|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
-|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
-|*                                                                            *|
-|* No library is required when using these functions.                         *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*/
-
-/* Please leave this file C-compatible. */
-
-#ifndef SUPPORT_DATATYPES_H
-#define SUPPORT_DATATYPES_H
-
-#undef HAVE_SYS_TYPES_H
-#undef HAVE_INTTYPES_H
-#undef HAVE_STDINT_H
-#undef HAVE_UINT64_T
-#undef HAVE_U_INT64_T
-
-#ifdef __cplusplus
-#include <cmath>
-#else
-#include <math.h>
-#endif
-
-/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
-   being defined.  We would define it here, but in order to prevent Bad Things
-   happening when system headers or C++ STL headers include stdint.h before we
-   define it here, we define it on the g++ command line (in Makefile.rules). */
-#if !defined(__STDC_LIMIT_MACROS)
-# error "Must #define __STDC_LIMIT_MACROS before #including System/DataTypes.h"
-#endif
-
-#if !defined(__STDC_CONSTANT_MACROS)
-# error "Must #define __STDC_CONSTANT_MACROS before " \
-        "#including System/DataTypes.h"
-#endif
-
-/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-#ifdef _AIX
-#include "llvm/System/AIXDataTypesFix.h"
-#endif
-
-/* Handle incorrect definition of uint64_t as u_int64_t */
-#ifndef HAVE_UINT64_T
-#ifdef HAVE_U_INT64_T
-typedef u_int64_t uint64_t;
-#else
-# error "Don't have a definition for uint64_t on this platform"
-#endif
-#endif
-
-#ifdef _OpenBSD_
-#define INT8_MAX 127
-#define INT8_MIN -128
-#define UINT8_MAX 255
-#define INT16_MAX 32767
-#define INT16_MIN -32768
-#define UINT16_MAX 65535
-#define INT32_MAX 2147483647
-#define INT32_MIN -2147483648
-#define UINT32_MAX 4294967295U
-#endif
-
-/* Set defaults for constants which we cannot find. */
-#if !defined(INT64_MAX)
-# define INT64_MAX 9223372036854775807LL
-#endif
-#if !defined(INT64_MIN)
-# define INT64_MIN ((-INT64_MAX)-1)
-#endif
-#if !defined(UINT64_MAX)
-# define UINT64_MAX 0xffffffffffffffffULL
-#endif
-
-#if __GNUC__ > 3
-#define END_WITH_NULL __attribute__((sentinel))
-#else
-#define END_WITH_NULL
-#endif
-
-#ifndef HUGE_VALF
-#define HUGE_VALF (float)HUGE_VAL
-#endif
-
-#endif  /* SUPPORT_DATATYPES_H */
diff --git a/include/llvm/System/Disassembler.h b/include/llvm/System/Disassembler.h
deleted file mode 100644
index e11e792de85a..000000000000
--- a/include/llvm/System/Disassembler.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- llvm/Support/Disassembler.h ------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the necessary glue to call external disassembler
-// libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_DISASSEMBLER_H
-#define LLVM_SYSTEM_DISASSEMBLER_H
-
-#include "llvm/System/DataTypes.h"
-#include <string>
-
-namespace llvm {
-namespace sys {
-
-/// This function returns true, if there is possible to use some external
-/// disassembler library. False otherwise.
-bool hasDisassembler();
-
-/// This function provides some "glue" code to call external disassembler
-/// libraries.
-std::string disassembleBuffer(uint8_t* start, size_t length, uint64_t pc = 0);
-
-}
-}
-
-#endif // LLVM_SYSTEM_DISASSEMBLER_H
diff --git a/include/llvm/System/DynamicLibrary.h b/include/llvm/System/DynamicLibrary.h
deleted file mode 100644
index 745b8f8b5b4b..000000000000
--- a/include/llvm/System/DynamicLibrary.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//===-- llvm/System/DynamicLibrary.h - Portable Dynamic Library -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the sys::DynamicLibrary class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_DYNAMIC_LIBRARY_H
-#define LLVM_SYSTEM_DYNAMIC_LIBRARY_H
-
-#include <string>
-
-namespace llvm {
-namespace sys {
-
-  /// This class provides a portable interface to dynamic libraries which also
-  /// might be known as shared libraries, shared objects, dynamic shared
-  /// objects, or dynamic link libraries. Regardless of the terminology or the
-  /// operating system interface, this class provides a portable interface that
-  /// allows dynamic libraries to be loaded and searched for externally
-  /// defined symbols. This is typically used to provide "plug-in" support.
-  /// It also allows for symbols to be defined which don't live in any library,
-  /// but rather the main program itself, useful on Windows where the main
-  /// executable cannot be searched.
-  class DynamicLibrary {
-    DynamicLibrary(); // DO NOT IMPLEMENT
-  public:
-    /// This function allows a library to be loaded without instantiating a
-    /// DynamicLibrary object. Consequently, it is marked as being permanent
-    /// and will only be unloaded when the program terminates.  This returns
-    /// false on success or returns true and fills in *ErrMsg on failure.
-    /// @brief Open a dynamic library permanently.
-    ///
-    /// NOTE: This function is not thread safe.
-    ///
-    static bool LoadLibraryPermanently(const char *filename,
-                                       std::string *ErrMsg = 0);
-
-    /// This function will search through all previously loaded dynamic
-    /// libraries for the symbol \p symbolName. If it is found, the addressof
-    /// that symbol is returned. If not, null is returned. Note that this will
-    /// search permanently loaded libraries (LoadLibraryPermanently) as well
-    /// as ephemerally loaded libraries (constructors).
-    /// @throws std::string on error.
-    /// @brief Search through libraries for address of a symbol
-    ///
-    /// NOTE: This function is not thread safe.
-    ///
-    static void *SearchForAddressOfSymbol(const char *symbolName);
-
-    /// @brief Convenience function for C++ophiles.
-    ///
-    /// NOTE: This function is not thread safe.
-    ///
-    static void *SearchForAddressOfSymbol(const std::string &symbolName) {
-      return SearchForAddressOfSymbol(symbolName.c_str());
-    }
-
-    /// This functions permanently adds the symbol \p symbolName with the
-    /// value \p symbolValue.  These symbols are searched before any
-    /// libraries.
-    /// @brief Add searchable symbol/value pair.
-    ///
-    /// NOTE: This function is not thread safe.
-    ///
-    static void AddSymbol(const char *symbolName, void *symbolValue);
-
-    /// @brief Convenience function for C++ophiles.
-    ///
-    /// NOTE: This function is not thread safe.
-    ///
-    static void AddSymbol(const std::string &symbolName, void *symbolValue) {
-      AddSymbol(symbolName.c_str(), symbolValue);
-    }
-  };
-
-} // End sys namespace
-} // End llvm namespace
-
-#endif // LLVM_SYSTEM_DYNAMIC_LIBRARY_H
diff --git a/include/llvm/System/Errno.h b/include/llvm/System/Errno.h
deleted file mode 100644
index 6e292ba62651..000000000000
--- a/include/llvm/System/Errno.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- llvm/System/Errno.h - Portable+convenient errno handling -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares some portable and convenient functions to deal with errno.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_ERRNO_H
-#define LLVM_SYSTEM_ERRNO_H
-
-#include <string>
-
-namespace llvm {
-namespace sys {
-
-/// Returns a string representation of the errno value, using whatever
-/// thread-safe variant of strerror() is available.  Be sure to call this
-/// immediately after the function that set errno, or errno may have been
-/// overwritten by an intervening call.
-std::string StrError();
-
-/// Like the no-argument version above, but uses \p errnum instead of errno.
-std::string StrError(int errnum);
-
-}  // namespace sys
-}  // namespace llvm
-
-#endif  // LLVM_SYSTEM_ERRNO_H
diff --git a/include/llvm/System/Host.h b/include/llvm/System/Host.h
deleted file mode 100644
index 4fbf5c177c6e..000000000000
--- a/include/llvm/System/Host.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//===- llvm/System/Host.h - Host machine characteristics --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Methods for querying the nature of the host machine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_HOST_H
-#define LLVM_SYSTEM_HOST_H
-
-#include "llvm/ADT/StringMap.h"
-#include <string>
-
-namespace llvm {
-namespace sys {
-
-  inline bool isLittleEndianHost() {
-    union {
-      int i;
-      char c;
-    };
-    i = 1;
-    return c;
-  }
-
-  inline bool isBigEndianHost() {
-    return !isLittleEndianHost();
-  }
-
-  /// getHostTriple() - Return the target triple of the running
-  /// system.
-  ///
-  /// The target triple is a string in the format of:
-  ///   CPU_TYPE-VENDOR-OPERATING_SYSTEM
-  /// or
-  ///   CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM
-  std::string getHostTriple();
-
-  /// getHostCPUName - Get the LLVM name for the host CPU. The particular format
-  /// of the name is target dependent, and suitable for passing as -mcpu to the
-  /// target which matches the host.
-  ///
-  /// \return - The host CPU name, or empty if the CPU could not be determined.
-  std::string getHostCPUName();
-
-  /// getHostCPUFeatures - Get the LLVM names for the host CPU features.
-  /// The particular format of the names are target dependent, and suitable for
-  /// passing as -mattr to the target which matches the host.
-  ///
-  /// \param Features - A string mapping feature names to either
-  /// true (if enabled) or false (if disabled). This routine makes no guarantees
-  /// about exactly which features may appear in this map, except that they are
-  /// all valid LLVM feature names.
-  ///
-  /// \return - True on success.
-  bool getHostCPUFeatures(StringMap<bool> &Features);
-}
-}
-
-#endif
diff --git a/include/llvm/System/IncludeFile.h b/include/llvm/System/IncludeFile.h
deleted file mode 100644
index 3268ea225f51..000000000000
--- a/include/llvm/System/IncludeFile.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===- llvm/System/IncludeFile.h - Ensure Linking Of Library ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the FORCE_DEFINING_FILE_TO_BE_LINKED and DEFINE_FILE_FOR
-// macros.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_INCLUDEFILE_H
-#define LLVM_SYSTEM_INCLUDEFILE_H
-
-/// This macro is the public interface that IncludeFile.h exports. This gives
-/// us the option to implement the "link the definition" capability in any 
-/// manner that we choose. All header files that depend on a specific .cpp
-/// file being linked at run time should use this macro instead of the
-/// IncludeFile class directly. 
-/// 
-/// For example, foo.h would use:<br/>
-/// <tt>FORCE_DEFINING_FILE_TO_BE_LINKED(foo)</tt><br/>
-/// 
-/// And, foo.cp would use:<br/>
-/// <tt>DEFINING_FILE_FOR(foo)</tt><br/>
-#ifdef __GNUC__
-// If the `used' attribute is available, use it to create a variable
-// with an initializer that will force the linking of the defining file.
-#define FORCE_DEFINING_FILE_TO_BE_LINKED(name) \
-  namespace llvm { \
-    extern const char name ## LinkVar; \
-    __attribute__((used)) static const char *const name ## LinkObj = \
-      &name ## LinkVar; \
-  } 
-#else
-// Otherwise use a constructor call.
-#define FORCE_DEFINING_FILE_TO_BE_LINKED(name) \
-  namespace llvm { \
-    extern const char name ## LinkVar; \
-    static const IncludeFile name ## LinkObj ( &name ## LinkVar ); \
-  } 
-#endif
-
-/// This macro is the counterpart to FORCE_DEFINING_FILE_TO_BE_LINKED. It should
-/// be used in a .cpp file to define the name referenced in a header file that
-/// will cause linkage of the .cpp file. It should only be used at extern level.
-#define DEFINING_FILE_FOR(name) \
-  namespace llvm { const char name ## LinkVar = 0; }
-
-namespace llvm {
-
-/// This class is used in the implementation of FORCE_DEFINING_FILE_TO_BE_LINKED
-/// macro to make sure that the implementation of a header file is included 
-/// into a tool that uses the header.  This is solely 
-/// to overcome problems linking .a files and not getting the implementation 
-/// of compilation units we need. This is commonly an issue with the various
-/// Passes but also occurs elsewhere in LLVM. We like to use .a files because
-/// they link faster and provide the smallest executables. However, sometimes
-/// those executables are too small, if the program doesn't reference something
-/// that might be needed, especially by a loaded share object. This little class
-/// helps to resolve that problem. The basic strategy is to use this class in
-/// a header file and pass the address of a variable to the constructor. If the
-/// variable is defined in the header file's corresponding .cpp file then all
-/// tools/libraries that \#include the header file will require the .cpp as
-/// well.
-/// For example:<br/>
-/// <tt>extern int LinkMyCodeStub;</tt><br/>
-/// <tt>static IncludeFile LinkMyModule(&LinkMyCodeStub);</tt><br/>
-/// @brief Class to ensure linking of corresponding object file.
-struct IncludeFile {
-  explicit IncludeFile(const void *);
-};
-
-}
-
-#endif
diff --git a/include/llvm/System/LICENSE.TXT b/include/llvm/System/LICENSE.TXT
deleted file mode 100644
index f569da205289..000000000000
--- a/include/llvm/System/LICENSE.TXT
+++ /dev/null
@@ -1,6 +0,0 @@
-LLVM System Interface Library
--------------------------------------------------------------------------------
-The LLVM System Interface Library is licensed under the Illinois Open Source 
-License and has the following additional copyright:
-
-Copyright (C) 2004 eXtensible Systems, Inc.
diff --git a/include/llvm/System/Memory.h b/include/llvm/System/Memory.h
deleted file mode 100644
index 2dd36e8ab147..000000000000
--- a/include/llvm/System/Memory.h
+++ /dev/null
@@ -1,96 +0,0 @@
-//===- llvm/System/Memory.h - Memory Support --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::Memory class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_MEMORY_H
-#define LLVM_SYSTEM_MEMORY_H
-
-#include "llvm/System/DataTypes.h"
-#include <string>
-
-namespace llvm {
-namespace sys {
-
-  /// This class encapsulates the notion of a memory block which has an address
-  /// and a size. It is used by the Memory class (a friend) as the result of
-  /// various memory allocation operations.
-  /// @see Memory
-  /// @brief Memory block abstraction.
-  class MemoryBlock {
-  public:
-    MemoryBlock() : Address(0), Size(0) { }
-    MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
-    void *base() const { return Address; }
-    size_t size() const { return Size; }
-  private:
-    void *Address;    ///< Address of first byte of memory area
-    size_t Size;      ///< Size, in bytes of the memory area
-    friend class Memory;
-  };
-
-  /// This class provides various memory handling functions that manipulate
-  /// MemoryBlock instances.
-  /// @since 1.4
-  /// @brief An abstraction for memory operations.
-  class Memory {
-  public:
-    /// This method allocates a block of Read/Write/Execute memory that is
-    /// suitable for executing dynamically generated code (e.g. JIT). An
-    /// attempt to allocate \p NumBytes bytes of virtual memory is made.
-    /// \p NearBlock may point to an existing allocation in which case
-    /// an attempt is made to allocate more memory near the existing block.
-    ///
-    /// On success, this returns a non-null memory block, otherwise it returns
-    /// a null memory block and fills in *ErrMsg.
-    /// 
-    /// @brief Allocate Read/Write/Execute memory.
-    static MemoryBlock AllocateRWX(size_t NumBytes,
-                                   const MemoryBlock *NearBlock,
-                                   std::string *ErrMsg = 0);
-
-    /// This method releases a block of Read/Write/Execute memory that was
-    /// allocated with the AllocateRWX method. It should not be used to
-    /// release any memory block allocated any other way.
-    ///
-    /// On success, this returns false, otherwise it returns true and fills
-    /// in *ErrMsg.
-    /// @brief Release Read/Write/Execute memory.
-    static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = 0);
-    
-    
-    /// InvalidateInstructionCache - Before the JIT can run a block of code
-    /// that has been emitted it must invalidate the instruction cache on some
-    /// platforms.
-    static void InvalidateInstructionCache(const void *Addr, size_t Len);
-
-    /// setExecutable - Before the JIT can run a block of code, it has to be
-    /// given read and executable privilege. Return true if it is already r-x
-    /// or the system is able to change its previlege.
-    static bool setExecutable (MemoryBlock &M, std::string *ErrMsg = 0);
-
-    /// setWritable - When adding to a block of code, the JIT may need
-    /// to mark a block of code as RW since the protections are on page
-    /// boundaries, and the JIT internal allocations are not page aligned.
-    static bool setWritable (MemoryBlock &M, std::string *ErrMsg = 0);
-
-    /// setRangeExecutable - Mark the page containing a range of addresses 
-    /// as executable.
-    static bool setRangeExecutable(const void *Addr, size_t Size);
-
-    /// setRangeWritable - Mark the page containing a range of addresses 
-    /// as writable.
-    static bool setRangeWritable(const void *Addr, size_t Size);
-  };
-}
-}
-
-#endif
diff --git a/include/llvm/System/Mutex.h b/include/llvm/System/Mutex.h
deleted file mode 100644
index 71d10067c303..000000000000
--- a/include/llvm/System/Mutex.h
+++ /dev/null
@@ -1,154 +0,0 @@
-//===- llvm/System/Mutex.h - Mutex Operating System Concept -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_MUTEX_H
-#define LLVM_SYSTEM_MUTEX_H
-
-#include "llvm/System/Threading.h"
-#include <cassert>
-
-namespace llvm
-{
-  namespace sys
-  {
-    /// @brief Platform agnostic Mutex class.
-    class MutexImpl
-    {
-    /// @name Constructors
-    /// @{
-    public:
-
-      /// Initializes the lock but doesn't acquire it. if \p recursive is set
-      /// to false, the lock will not be recursive which makes it cheaper but
-      /// also more likely to deadlock (same thread can't acquire more than
-      /// once).
-      /// @brief Default Constructor.
-      explicit MutexImpl(bool recursive = true);
-
-      /// Releases and removes the lock
-      /// @brief Destructor
-      ~MutexImpl();
-
-    /// @}
-    /// @name Methods
-    /// @{
-    public:
-
-      /// Attempts to unconditionally acquire the lock. If the lock is held by
-      /// another thread, this method will wait until it can acquire the lock.
-      /// @returns false if any kind of error occurs, true otherwise.
-      /// @brief Unconditionally acquire the lock.
-      bool acquire();
-
-      /// Attempts to release the lock. If the lock is held by the current
-      /// thread, the lock is released allowing other threads to acquire the
-      /// lock.
-      /// @returns false if any kind of error occurs, true otherwise.
-      /// @brief Unconditionally release the lock.
-      bool release();
-
-      /// Attempts to acquire the lock without blocking. If the lock is not
-      /// available, this function returns false quickly (without blocking). If
-      /// the lock is available, it is acquired.
-      /// @returns false if any kind of error occurs or the lock is not
-      /// available, true otherwise.
-      /// @brief Try to acquire the lock.
-      bool tryacquire();
-
-    //@}
-    /// @name Platform Dependent Data
-    /// @{
-    private:
-      void* data_; ///< We don't know what the data will be
-
-    /// @}
-    /// @name Do Not Implement
-    /// @{
-    private:
-      MutexImpl(const MutexImpl & original);
-      void operator=(const MutexImpl &);
-    /// @}
-    };
-    
-    
-    /// SmartMutex - A mutex with a compile time constant parameter that 
-    /// indicates whether this mutex should become a no-op when we're not
-    /// running in multithreaded mode.
-    template<bool mt_only>
-    class SmartMutex : public MutexImpl {
-      unsigned acquired;
-      bool recursive;
-    public:
-      explicit SmartMutex(bool rec = true) :
-        MutexImpl(rec), acquired(0), recursive(rec) { }
-      
-      bool acquire() {
-        if (!mt_only || llvm_is_multithreaded()) {
-          return MutexImpl::acquire();
-        } else {
-          // Single-threaded debugging code.  This would be racy in
-          // multithreaded mode, but provides not sanity checks in single
-          // threaded mode.
-          assert((recursive || acquired == 0) && "Lock already acquired!!");
-          ++acquired;
-          return true;
-        }
-      }
-
-      bool release() {
-        if (!mt_only || llvm_is_multithreaded()) {
-          return MutexImpl::release();
-        } else {
-          // Single-threaded debugging code.  This would be racy in
-          // multithreaded mode, but provides not sanity checks in single
-          // threaded mode.
-          assert(((recursive && acquired) || (acquired == 1)) &&
-                 "Lock not acquired before release!");
-          --acquired;
-          return true;
-        }
-      }
-
-      bool tryacquire() {
-        if (!mt_only || llvm_is_multithreaded())
-          return MutexImpl::tryacquire();
-        else return true;
-      }
-      
-      private:
-        SmartMutex(const SmartMutex<mt_only> & original);
-        void operator=(const SmartMutex<mt_only> &);
-    };
-    
-    /// Mutex - A standard, always enforced mutex.
-    typedef SmartMutex<false> Mutex;
-    
-    template<bool mt_only>
-    class SmartScopedLock  {
-      SmartMutex<mt_only>& mtx;
-      
-    public:
-      SmartScopedLock(SmartMutex<mt_only>& m) : mtx(m) {
-        mtx.acquire();
-      }
-      
-      ~SmartScopedLock() {
-        mtx.release();
-      }
-    };
-    
-    typedef SmartScopedLock<false> ScopedLock;
-  }
-}
-
-#endif
diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h
deleted file mode 100644
index 23b18d47145a..000000000000
--- a/include/llvm/System/Path.h
+++ /dev/null
@@ -1,716 +0,0 @@
-//===- llvm/System/Path.h - Path Operating System Concept -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::Path class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_PATH_H
-#define LLVM_SYSTEM_PATH_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/System/TimeValue.h"
-#include <set>
-#include <string>
-#include <vector>
-
-namespace llvm {
-namespace sys {
-
-  /// This structure provides basic file system information about a file. It
-  /// is patterned after the stat(2) Unix operating system call but made
-  /// platform independent and eliminates many of the unix-specific fields.
-  /// However, to support llvm-ar, the mode, user, and group fields are
-  /// retained. These pertain to unix security and may not have a meaningful
-  /// value on non-Unix platforms. However, the other fields should
-  /// always be applicable on all platforms.  The structure is filled in by
-  /// the PathWithStatus class.
-  /// @brief File status structure
-  class FileStatus {
-  public:
-    uint64_t    fileSize;   ///< Size of the file in bytes
-    TimeValue   modTime;    ///< Time of file's modification
-    uint32_t    mode;       ///< Mode of the file, if applicable
-    uint32_t    user;       ///< User ID of owner, if applicable
-    uint32_t    group;      ///< Group ID of owner, if applicable
-    uint64_t    uniqueID;   ///< A number to uniquely ID this file
-    bool        isDir  : 1; ///< True if this is a directory.
-    bool        isFile : 1; ///< True if this is a file.
-
-    FileStatus() : fileSize(0), modTime(0,0), mode(0777), user(999),
-                   group(999), uniqueID(0), isDir(false), isFile(false) { }
-
-    TimeValue getTimestamp() const { return modTime; }
-    uint64_t getSize() const { return fileSize; }
-    uint32_t getMode() const { return mode; }
-    uint32_t getUser() const { return user; }
-    uint32_t getGroup() const { return group; }
-    uint64_t getUniqueID() const { return uniqueID; }
-  };
-
-  /// This class provides an abstraction for the path to a file or directory
-  /// in the operating system's filesystem and provides various basic operations
-  /// on it.  Note that this class only represents the name of a path to a file
-  /// or directory which may or may not be valid for a given machine's file
-  /// system. The class is patterned after the java.io.File class with various
-  /// extensions and several omissions (not relevant to LLVM).  A Path object
-  /// ensures that the path it encapsulates is syntactically valid for the
-  /// operating system it is running on but does not ensure correctness for
-  /// any particular file system. That is, a syntactically valid path might
-  /// specify path components that do not exist in the file system and using
-  /// such a Path to act on the file system could produce errors. There is one
-  /// invalid Path value which is permitted: the empty path.  The class should
-  /// never allow a syntactically invalid non-empty path name to be assigned.
-  /// Empty paths are required in order to indicate an error result in some
-  /// situations. If the path is empty, the isValid operation will return
-  /// false. All operations will fail if isValid is false. Operations that
-  /// change the path will either return false if it would cause a syntactically
-  /// invalid path name (in which case the Path object is left unchanged) or
-  /// throw an std::string exception indicating the error. The methods are
-  /// grouped into four basic categories: Path Accessors (provide information
-  /// about the path without accessing disk), Disk Accessors (provide
-  /// information about the underlying file or directory), Path Mutators
-  /// (change the path information, not the disk), and Disk Mutators (change
-  /// the disk file/directory referenced by the path). The Disk Mutator methods
-  /// all have the word "disk" embedded in their method name to reinforce the
-  /// notion that the operation modifies the file system.
-  /// @since 1.4
-  /// @brief An abstraction for operating system paths.
-  class Path {
-    /// @name Constructors
-    /// @{
-    public:
-      /// Construct a path to the root directory of the file system. The root
-      /// directory is a top level directory above which there are no more
-      /// directories. For example, on UNIX, the root directory is /. On Windows
-      /// it is C:\. Other operating systems may have different notions of
-      /// what the root directory is or none at all. In that case, a consistent
-      /// default root directory will be used.
-      static Path GetRootDirectory();
-
-      /// Construct a path to a unique temporary directory that is created in
-      /// a "standard" place for the operating system. The directory is
-      /// guaranteed to be created on exit from this function. If the directory
-      /// cannot be created, the function will throw an exception.
-      /// @returns an invalid path (empty) on error
-      /// @param ErrMsg Optional place for an error message if an error occurs
-      /// @brief Constrct a path to an new, unique, existing temporary
-      /// directory.
-      static Path GetTemporaryDirectory(std::string* ErrMsg = 0);
-
-      /// Construct a vector of sys::Path that contains the "standard" system
-      /// library paths suitable for linking into programs. This function *must*
-      /// return the value of LLVM_LIB_SEARCH_PATH as the first item in \p Paths
-      /// if that environment variable is set and it references a directory.
-      /// @brief Construct a path to the system library directory
-      static void GetSystemLibraryPaths(std::vector<sys::Path>& Paths);
-
-      /// Construct a vector of sys::Path that contains the "standard" bitcode
-      /// library paths suitable for linking into an llvm program. This function
-      /// *must* return the value of LLVM_LIB_SEARCH_PATH as well as the value
-      /// of LLVM_LIBDIR. It also must provide the System library paths as
-      /// returned by GetSystemLibraryPaths.
-      /// @see GetSystemLibraryPaths
-      /// @brief Construct a list of directories in which bitcode could be
-      /// found.
-      static void GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths);
-
-      /// Find the path to a library using its short name. Use the system
-      /// dependent library paths to locate the library.
-      /// @brief Find a library.
-      static Path FindLibrary(std::string& short_name);
-
-      /// Construct a path to the default LLVM configuration directory. The
-      /// implementation must ensure that this is a well-known (same on many
-      /// systems) directory in which llvm configuration files exist. For
-      /// example, on Unix, the /etc/llvm directory has been selected.
-      /// @brief Construct a path to the default LLVM configuration directory
-      static Path GetLLVMDefaultConfigDir();
-
-      /// Construct a path to the LLVM installed configuration directory. The
-      /// implementation must ensure that this refers to the "etc" directory of
-      /// the LLVM installation. This is the location where configuration files
-      /// will be located for a particular installation of LLVM on a machine.
-      /// @brief Construct a path to the LLVM installed configuration directory
-      static Path GetLLVMConfigDir();
-
-      /// Construct a path to the current user's home directory. The
-      /// implementation must use an operating system specific mechanism for
-      /// determining the user's home directory. For example, the environment
-      /// variable "HOME" could be used on Unix. If a given operating system
-      /// does not have the concept of a user's home directory, this static
-      /// constructor must provide the same result as GetRootDirectory.
-      /// @brief Construct a path to the current user's "home" directory
-      static Path GetUserHomeDirectory();
-
-      /// Construct a path to the current directory for the current process.
-      /// @returns The current working directory.
-      /// @brief Returns the current working directory.
-      static Path GetCurrentDirectory();
-
-      /// Return the suffix commonly used on file names that contain a shared
-      /// object, shared archive, or dynamic link library. Such files are
-      /// linked at runtime into a process and their code images are shared
-      /// between processes.
-      /// @returns The dynamic link library suffix for the current platform.
-      /// @brief Return the dynamic link library suffix.
-      static StringRef GetDLLSuffix();
-
-      /// GetMainExecutable - Return the path to the main executable, given the
-      /// value of argv[0] from program startup and the address of main itself.
-      /// In extremis, this function may fail and return an empty path.
-      static Path GetMainExecutable(const char *argv0, void *MainAddr);
-
-      /// This is one of the very few ways in which a path can be constructed
-      /// with a syntactically invalid name. The only *legal* invalid name is an
-      /// empty one. Other invalid names are not permitted. Empty paths are
-      /// provided so that they can be used to indicate null or error results in
-      /// other lib/System functionality.
-      /// @brief Construct an empty (and invalid) path.
-      Path() : path() {}
-      Path(const Path &that) : path(that.path) {}
-
-      /// This constructor will accept a char* or std::string as a path. No
-      /// checking is done on this path to determine if it is valid. To
-      /// determine validity of the path, use the isValid method.
-      /// @param p The path to assign.
-      /// @brief Construct a Path from a string.
-      explicit Path(StringRef p);
-
-      /// This constructor will accept a character range as a path.  No checking
-      /// is done on this path to determine if it is valid.  To determine
-      /// validity of the path, use the isValid method.
-      /// @param StrStart A pointer to the first character of the path name
-      /// @param StrLen The length of the path name at StrStart
-      /// @brief Construct a Path from a string.
-      Path(const char *StrStart, unsigned StrLen);
-
-    /// @}
-    /// @name Operators
-    /// @{
-    public:
-      /// Makes a copy of \p that to \p this.
-      /// @returns \p this
-      /// @brief Assignment Operator
-      Path &operator=(const Path &that) {
-        path = that.path;
-        return *this;
-      }
-
-      /// Makes a copy of \p that to \p this.
-      /// @param that A StringRef denoting the path
-      /// @returns \p this
-      /// @brief Assignment Operator
-      Path &operator=(StringRef that);
-
-      /// Compares \p this Path with \p that Path for equality.
-      /// @returns true if \p this and \p that refer to the same thing.
-      /// @brief Equality Operator
-      bool operator==(const Path &that) const;
-
-      /// Compares \p this Path with \p that Path for inequality.
-      /// @returns true if \p this and \p that refer to different things.
-      /// @brief Inequality Operator
-      bool operator!=(const Path &that) const { return !(*this == that); }
-
-      /// Determines if \p this Path is less than \p that Path. This is required
-      /// so that Path objects can be placed into ordered collections (e.g.
-      /// std::map). The comparison is done lexicographically as defined by
-      /// the std::string::compare method.
-      /// @returns true if \p this path is lexicographically less than \p that.
-      /// @brief Less Than Operator
-      bool operator<(const Path& that) const;
-
-    /// @}
-    /// @name Path Accessors
-    /// @{
-    public:
-      /// This function will use an operating system specific algorithm to
-      /// determine if the current value of \p this is a syntactically valid
-      /// path name for the operating system. The path name does not need to
-      /// exist, validity is simply syntactical. Empty paths are always invalid.
-      /// @returns true iff the path name is syntactically legal for the
-      /// host operating system.
-      /// @brief Determine if a path is syntactically valid or not.
-      bool isValid() const;
-
-      /// This function determines if the contents of the path name are empty.
-      /// That is, the path name has a zero length. This does NOT determine if
-      /// if the file is empty. To get the length of the file itself, Use the
-      /// PathWithStatus::getFileStatus() method and then the getSize() method
-      /// on the returned FileStatus object.
-      /// @returns true iff the path is empty.
-      /// @brief Determines if the path name is empty (invalid).
-      bool isEmpty() const { return path.empty(); }
-
-       /// This function returns the last component of the path name. The last
-      /// component is the file or directory name occuring after the last
-      /// directory separator. If no directory separator is present, the entire
-      /// path name is returned (i.e. same as toString).
-      /// @returns StringRef containing the last component of the path name.
-      /// @brief Returns the last component of the path name.
-      StringRef getLast() const;
-
-      /// This function strips off the path and suffix of the file or directory
-      /// name and returns just the basename. For example /a/foo.bar would cause
-      /// this function to return "foo".
-      /// @returns StringRef containing the basename of the path
-      /// @brief Get the base name of the path
-      StringRef getBasename() const;
-
-      /// This function strips off the suffix of the path beginning with the
-      /// path separator ('/' on Unix, '\' on Windows) and returns the result.
-      StringRef getDirname() const;
-
-      /// This function strips off the path and basename(up to and
-      /// including the last dot) of the file or directory name and
-      /// returns just the suffix. For example /a/foo.bar would cause
-      /// this function to return "bar".
-      /// @returns StringRef containing the suffix of the path
-      /// @brief Get the suffix of the path
-      StringRef getSuffix() const;
-
-      /// Obtain a 'C' string for the path name.
-      /// @returns a 'C' string containing the path name.
-      /// @brief Returns the path as a C string.
-      const char *c_str() const { return path.c_str(); }
-      const std::string &str() const { return path; }
-
-
-      /// size - Return the length in bytes of this path name.
-      size_t size() const { return path.size(); }
-
-      /// empty - Returns true if the path is empty.
-      unsigned empty() const { return path.empty(); }
-
-    /// @}
-    /// @name Disk Accessors
-    /// @{
-    public:
-      /// This function determines if the path name is absolute, as opposed to
-      /// relative.
-      /// @brief Determine if the path is absolute.
-      bool isAbsolute() const;
-
-      /// This function determines if the path name is absolute, as opposed to
-      /// relative.
-      /// @brief Determine if the path is absolute.
-      static bool isAbsolute(const char *NameStart, unsigned NameLen);
-
-      /// This function opens the file associated with the path name provided by
-      /// the Path object and reads its magic number. If the magic number at the
-      /// start of the file matches \p magic, true is returned. In all other
-      /// cases (file not found, file not accessible, etc.) it returns false.
-      /// @returns true if the magic number of the file matches \p magic.
-      /// @brief Determine if file has a specific magic number
-      bool hasMagicNumber(StringRef magic) const;
-
-      /// This function retrieves the first \p len bytes of the file associated
-      /// with \p this. These bytes are returned as the "magic number" in the
-      /// \p Magic parameter.
-      /// @returns true if the Path is a file and the magic number is retrieved,
-      /// false otherwise.
-      /// @brief Get the file's magic number.
-      bool getMagicNumber(std::string& Magic, unsigned len) const;
-
-      /// This function determines if the path name in the object references an
-      /// archive file by looking at its magic number.
-      /// @returns true if the file starts with the magic number for an archive
-      /// file.
-      /// @brief Determine if the path references an archive file.
-      bool isArchive() const;
-
-      /// This function determines if the path name in the object references an
-      /// LLVM Bitcode file by looking at its magic number.
-      /// @returns true if the file starts with the magic number for LLVM
-      /// bitcode files.
-      /// @brief Determine if the path references a bitcode file.
-      bool isBitcodeFile() const;
-
-      /// This function determines if the path name in the object references a
-      /// native Dynamic Library (shared library, shared object) by looking at
-      /// the file's magic number. The Path object must reference a file, not a
-      /// directory.
-      /// @returns true if the file starts with the magic number for a native
-      /// shared library.
-      /// @brief Determine if the path references a dynamic library.
-      bool isDynamicLibrary() const;
-
-      /// This function determines if the path name references an existing file
-      /// or directory in the file system.
-      /// @returns true if the pathname references an existing file or
-      /// directory.
-      /// @brief Determines if the path is a file or directory in
-      /// the file system.
-      bool exists() const;
-
-      /// This function determines if the path name refences an
-      /// existing directory.
-      /// @returns true if the pathname references an existing directory.
-      /// @brief Determins if the path is a directory in the file system.
-      bool isDirectory() const;
-
-      /// This function determines if the path name references a readable file
-      /// or directory in the file system. This function checks for
-      /// the existence and readability (by the current program) of the file
-      /// or directory.
-      /// @returns true if the pathname references a readable file.
-      /// @brief Determines if the path is a readable file or directory
-      /// in the file system.
-      bool canRead() const;
-
-      /// This function determines if the path name references a writable file
-      /// or directory in the file system. This function checks for the
-      /// existence and writability (by the current program) of the file or
-      /// directory.
-      /// @returns true if the pathname references a writable file.
-      /// @brief Determines if the path is a writable file or directory
-      /// in the file system.
-      bool canWrite() const;
-
-      /// This function checks that what we're trying to work only on a regular file.
-      /// Check for things like /dev/null, any block special file,
-      /// or other things that aren't "regular" regular files.
-      /// @returns true if the file is S_ISREG.
-      /// @brief Determines if the file is a regular file
-      bool isRegularFile() const;
-
-      /// This function determines if the path name references an executable
-      /// file in the file system. This function checks for the existence and
-      /// executability (by the current program) of the file.
-      /// @returns true if the pathname references an executable file.
-      /// @brief Determines if the path is an executable file in the file
-      /// system.
-      bool canExecute() const;
-
-      /// This function builds a list of paths that are the names of the
-      /// files and directories in a directory.
-      /// @returns true if an error occurs, true otherwise
-      /// @brief Build a list of directory's contents.
-      bool getDirectoryContents(
-        std::set<Path> &paths, ///< The resulting list of file & directory names
-        std::string* ErrMsg    ///< Optional place to return an error message.
-      ) const;
-
-    /// @}
-    /// @name Path Mutators
-    /// @{
-    public:
-      /// The path name is cleared and becomes empty. This is an invalid
-      /// path name but is the *only* invalid path name. This is provided
-      /// so that path objects can be used to indicate the lack of a
-      /// valid path being found.
-      /// @brief Make the path empty.
-      void clear() { path.clear(); }
-
-      /// This method sets the Path object to \p unverified_path. This can fail
-      /// if the \p unverified_path does not pass the syntactic checks of the
-      /// isValid() method. If verification fails, the Path object remains
-      /// unchanged and false is returned. Otherwise true is returned and the
-      /// Path object takes on the path value of \p unverified_path
-      /// @returns true if the path was set, false otherwise.
-      /// @param unverified_path The path to be set in Path object.
-      /// @brief Set a full path from a StringRef
-      bool set(StringRef unverified_path);
-
-      /// One path component is removed from the Path. If only one component is
-      /// present in the path, the Path object becomes empty. If the Path object
-      /// is empty, no change is made.
-      /// @returns false if the path component could not be removed.
-      /// @brief Removes the last directory component of the Path.
-      bool eraseComponent();
-
-      /// The \p component is added to the end of the Path if it is a legal
-      /// name for the operating system. A directory separator will be added if
-      /// needed.
-      /// @returns false if the path component could not be added.
-      /// @brief Appends one path component to the Path.
-      bool appendComponent(StringRef component);
-
-      /// A period and the \p suffix are appended to the end of the pathname.
-      /// The precondition for this function is that the Path reference a file
-      /// name (i.e. isFile() returns true). If the Path is not a file, no
-      /// action is taken and the function returns false. If the path would
-      /// become invalid for the host operating system, false is returned.
-      /// @returns false if the suffix could not be added, true if it was.
-      /// @brief Adds a period and the \p suffix to the end of the pathname.
-      bool appendSuffix(StringRef suffix);
-
-      /// The suffix of the filename is erased. The suffix begins with and
-      /// includes the last . character in the filename after the last directory
-      /// separator and extends until the end of the name. If no . character is
-      /// after the last directory separator, then the file name is left
-      /// unchanged (i.e. it was already without a suffix) but the function
-      /// returns false.
-      /// @returns false if there was no suffix to remove, true otherwise.
-      /// @brief Remove the suffix from a path name.
-      bool eraseSuffix();
-
-      /// The current Path name is made unique in the file system. Upon return,
-      /// the Path will have been changed to make a unique file in the file
-      /// system or it will not have been changed if the current path name is
-      /// already unique.
-      /// @throws std::string if an unrecoverable error occurs.
-      /// @brief Make the current path name unique in the file system.
-      bool makeUnique( bool reuse_current /*= true*/, std::string* ErrMsg );
-
-      /// The current Path name is made absolute by prepending the
-      /// current working directory if necessary.
-      void makeAbsolute();
-
-    /// @}
-    /// @name Disk Mutators
-    /// @{
-    public:
-      /// This method attempts to make the file referenced by the Path object
-      /// available for reading so that the canRead() method will return true.
-      /// @brief Make the file readable;
-      bool makeReadableOnDisk(std::string* ErrMsg = 0);
-
-      /// This method attempts to make the file referenced by the Path object
-      /// available for writing so that the canWrite() method will return true.
-      /// @brief Make the file writable;
-      bool makeWriteableOnDisk(std::string* ErrMsg = 0);
-
-      /// This method attempts to make the file referenced by the Path object
-      /// available for execution so that the canExecute() method will return
-      /// true.
-      /// @brief Make the file readable;
-      bool makeExecutableOnDisk(std::string* ErrMsg = 0);
-
-      /// This method allows the last modified time stamp and permission bits
-      /// to be set on the disk object referenced by the Path.
-      /// @throws std::string if an error occurs.
-      /// @returns true on error.
-      /// @brief Set the status information.
-      bool setStatusInfoOnDisk(const FileStatus &SI,
-                               std::string *ErrStr = 0) const;
-
-      /// This method attempts to create a directory in the file system with the
-      /// same name as the Path object. The \p create_parents parameter controls
-      /// whether intermediate directories are created or not. if \p
-      /// create_parents is true, then an attempt will be made to create all
-      /// intermediate directories, as needed. If \p create_parents is false,
-      /// then only the final directory component of the Path name will be
-      /// created. The created directory will have no entries.
-      /// @returns true if the directory could not be created, false otherwise
-      /// @brief Create the directory this Path refers to.
-      bool createDirectoryOnDisk(
-        bool create_parents = false, ///<  Determines whether non-existent
-           ///< directory components other than the last one (the "parents")
-           ///< are created or not.
-        std::string* ErrMsg = 0 ///< Optional place to put error messages.
-      );
-
-      /// This method attempts to create a file in the file system with the same
-      /// name as the Path object. The intermediate directories must all exist
-      /// at the time this method is called. Use createDirectoriesOnDisk to
-      /// accomplish that. The created file will be empty upon return from this
-      /// function.
-      /// @returns true if the file could not be created, false otherwise.
-      /// @brief Create the file this Path refers to.
-      bool createFileOnDisk(
-        std::string* ErrMsg = 0 ///< Optional place to put error messages.
-      );
-
-      /// This is like createFile except that it creates a temporary file. A
-      /// unique temporary file name is generated based on the contents of
-      /// \p this before the call. The new name is assigned to \p this and the
-      /// file is created.  Note that this will both change the Path object
-      /// *and* create the corresponding file. This function will ensure that
-      /// the newly generated temporary file name is unique in the file system.
-      /// @returns true if the file couldn't be created, false otherwise.
-      /// @brief Create a unique temporary file
-      bool createTemporaryFileOnDisk(
-        bool reuse_current = false, ///< When set to true, this parameter
-          ///< indicates that if the current file name does not exist then
-          ///< it will be used without modification.
-        std::string* ErrMsg = 0 ///< Optional place to put error messages
-      );
-
-      /// This method renames the file referenced by \p this as \p newName. The
-      /// file referenced by \p this must exist. The file referenced by
-      /// \p newName does not need to exist.
-      /// @returns true on error, false otherwise
-      /// @brief Rename one file as another.
-      bool renamePathOnDisk(const Path& newName, std::string* ErrMsg);
-
-      /// This method attempts to destroy the file or directory named by the
-      /// last component of the Path. If the Path refers to a directory and the
-      /// \p destroy_contents is false, an attempt will be made to remove just
-      /// the directory (the final Path component). If \p destroy_contents is
-      /// true, an attempt will be made to remove the entire contents of the
-      /// directory, recursively. If the Path refers to a file, the
-      /// \p destroy_contents parameter is ignored.
-      /// @param destroy_contents Indicates whether the contents of a destroyed
-      /// @param Err An optional string to receive an error message.
-      /// directory should also be destroyed (recursively).
-      /// @returns false if the file/directory was destroyed, true on error.
-      /// @brief Removes the file or directory from the filesystem.
-      bool eraseFromDisk(bool destroy_contents = false,
-                         std::string *Err = 0) const;
-
-
-      /// MapInFilePages - This is a low level system API to map in the file
-      /// that is currently opened as FD into the current processes' address
-      /// space for read only access.  This function may return null on failure
-      /// or if the system cannot provide the following constraints:
-      ///  1) The pages must be valid after the FD is closed, until
-      ///     UnMapFilePages is called.
-      ///  2) Any padding after the end of the file must be zero filled, if
-      ///     present.
-      ///  3) The pages must be contiguous.
-      ///
-      /// This API is not intended for general use, clients should use
-      /// MemoryBuffer::getFile instead.
-      static const char *MapInFilePages(int FD, uint64_t FileSize);
-
-      /// UnMapFilePages - Free pages mapped into the current process by
-      /// MapInFilePages.
-      ///
-      /// This API is not intended for general use, clients should use
-      /// MemoryBuffer::getFile instead.
-      static void UnMapFilePages(const char *Base, uint64_t FileSize);
-
-    /// @}
-    /// @name Data
-    /// @{
-    protected:
-      // Our win32 implementation relies on this string being mutable.
-      mutable std::string path;   ///< Storage for the path name.
-
-
-    /// @}
-  };
-
-  /// This class is identical to Path class except it allows you to obtain the
-  /// file status of the Path as well. The reason for the distinction is one of
-  /// efficiency. First, the file status requires additional space and the space
-  /// is incorporated directly into PathWithStatus without an additional malloc.
-  /// Second, obtaining status information is an expensive operation on most
-  /// operating systems so we want to be careful and explicity about where we
-  /// allow this operation in LLVM.
-  /// @brief Path with file status class.
-  class PathWithStatus : public Path {
-    /// @name Constructors
-    /// @{
-    public:
-      /// @brief Default constructor
-      PathWithStatus() : Path(), status(), fsIsValid(false) {}
-
-      /// @brief Copy constructor
-      PathWithStatus(const PathWithStatus &that)
-        : Path(static_cast<const Path&>(that)), status(that.status),
-           fsIsValid(that.fsIsValid) {}
-
-      /// This constructor allows construction from a Path object
-      /// @brief Path constructor
-      PathWithStatus(const Path &other)
-        : Path(other), status(), fsIsValid(false) {}
-
-      /// This constructor will accept a char* or std::string as a path. No
-      /// checking is done on this path to determine if it is valid. To
-      /// determine validity of the path, use the isValid method.
-      /// @brief Construct a Path from a string.
-      explicit PathWithStatus(
-        StringRef p ///< The path to assign.
-      ) : Path(p), status(), fsIsValid(false) {}
-
-      /// This constructor will accept a character range as a path.  No checking
-      /// is done on this path to determine if it is valid.  To determine
-      /// validity of the path, use the isValid method.
-      /// @brief Construct a Path from a string.
-      explicit PathWithStatus(
-        const char *StrStart,  ///< Pointer to the first character of the path
-        unsigned StrLen        ///< Length of the path.
-      ) : Path(StrStart, StrLen), status(), fsIsValid(false) {}
-
-      /// Makes a copy of \p that to \p this.
-      /// @returns \p this
-      /// @brief Assignment Operator
-      PathWithStatus &operator=(const PathWithStatus &that) {
-        static_cast<Path&>(*this) = static_cast<const Path&>(that);
-        status = that.status;
-        fsIsValid = that.fsIsValid;
-        return *this;
-      }
-
-      /// Makes a copy of \p that to \p this.
-      /// @returns \p this
-      /// @brief Assignment Operator
-      PathWithStatus &operator=(const Path &that) {
-        static_cast<Path&>(*this) = static_cast<const Path&>(that);
-        fsIsValid = false;
-        return *this;
-      }
-
-    /// @}
-    /// @name Methods
-    /// @{
-    public:
-      /// This function returns status information about the file. The type of
-      /// path (file or directory) is updated to reflect the actual contents
-      /// of the file system.
-      /// @returns 0 on failure, with Error explaining why (if non-zero)
-      /// @returns a pointer to a FileStatus structure on success.
-      /// @brief Get file status.
-      const FileStatus *getFileStatus(
-        bool forceUpdate = false, ///< Force an update from the file system
-        std::string *Error = 0    ///< Optional place to return an error msg.
-      ) const;
-
-    /// @}
-    /// @name Data
-    /// @{
-    private:
-      mutable FileStatus status; ///< Status information.
-      mutable bool fsIsValid;    ///< Whether we've obtained it or not
-
-    /// @}
-  };
-
-  /// This enumeration delineates the kinds of files that LLVM knows about.
-  enum LLVMFileType {
-    Unknown_FileType = 0,              ///< Unrecognized file
-    Bitcode_FileType,                  ///< Bitcode file
-    Archive_FileType,                  ///< ar style archive file
-    ELF_Relocatable_FileType,          ///< ELF Relocatable object file
-    ELF_Executable_FileType,           ///< ELF Executable image
-    ELF_SharedObject_FileType,         ///< ELF dynamically linked shared lib
-    ELF_Core_FileType,                 ///< ELF core image
-    Mach_O_Object_FileType,            ///< Mach-O Object file
-    Mach_O_Executable_FileType,        ///< Mach-O Executable
-    Mach_O_FixedVirtualMemorySharedLib_FileType, ///< Mach-O Shared Lib, FVM
-    Mach_O_Core_FileType,              ///< Mach-O Core File
-    Mach_O_PreloadExectuable_FileType, ///< Mach-O Preloaded Executable
-    Mach_O_DynamicallyLinkedSharedLib_FileType, ///< Mach-O dynlinked shared lib
-    Mach_O_DynamicLinker_FileType,     ///< The Mach-O dynamic linker
-    Mach_O_Bundle_FileType,            ///< Mach-O Bundle file
-    Mach_O_DynamicallyLinkedSharedLibStub_FileType, ///< Mach-O Shared lib stub
-    COFF_FileType                      ///< COFF object file or lib
-  };
-
-  /// This utility function allows any memory block to be examined in order
-  /// to determine its file type.
-  LLVMFileType IdentifyFileType(const char*magic, unsigned length);
-
-  /// This function can be used to copy the file specified by Src to the
-  /// file specified by Dest. If an error occurs, Dest is removed.
-  /// @returns true if an error occurs, false otherwise
-  /// @brief Copy one file to another.
-  bool CopyFile(const Path& Dest, const Path& Src, std::string* ErrMsg);
-
-  /// This is the OS-specific path separator: a colon on Unix or a semicolon
-  /// on Windows.
-  extern const char PathSeparator;
-}
-
-}
-
-#endif
diff --git a/include/llvm/System/Process.h b/include/llvm/System/Process.h
deleted file mode 100644
index 41bcd69b6a44..000000000000
--- a/include/llvm/System/Process.h
+++ /dev/null
@@ -1,146 +0,0 @@
-//===- llvm/System/Process.h ------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::Process class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_PROCESS_H
-#define LLVM_SYSTEM_PROCESS_H
-
-#include "llvm/System/TimeValue.h"
-
-namespace llvm {
-namespace sys {
-
-  /// This class provides an abstraction for getting information about the
-  /// currently executing process.
-  /// @since 1.4
-  /// @brief An abstraction for operating system processes.
-  class Process {
-    /// @name Accessors
-    /// @{
-    public:
-      /// This static function will return the operating system's virtual memory
-      /// page size.
-      /// @returns The number of bytes in a virtual memory page.
-      /// @brief Get the virtual memory page size
-      static unsigned GetPageSize();
-
-      /// This static function will return the total amount of memory allocated
-      /// by the process. This only counts the memory allocated via the malloc,
-      /// calloc and realloc functions and includes any "free" holes in the
-      /// allocated space.
-      /// @brief Return process memory usage.
-      static size_t GetMallocUsage();
-
-      /// This static function will return the total memory usage of the
-      /// process. This includes code, data, stack and mapped pages usage. Notei
-      /// that the value returned here is not necessarily the Running Set Size,
-      /// it is the total virtual memory usage, regardless of mapped state of
-      /// that memory.
-      static size_t GetTotalMemoryUsage();
-
-      /// This static function will set \p user_time to the amount of CPU time
-      /// spent in user (non-kernel) mode and \p sys_time to the amount of CPU
-      /// time spent in system (kernel) mode.  If the operating system does not
-      /// support collection of these metrics, a zero TimeValue will be for both
-      /// values.
-      static void GetTimeUsage(
-        TimeValue& elapsed,
-          ///< Returns the TimeValue::now() giving current time
-        TimeValue& user_time,
-          ///< Returns the current amount of user time for the process
-        TimeValue& sys_time
-          ///< Returns the current amount of system time for the process
-      );
-
-      /// This static function will return the process' current user id number.
-      /// Not all operating systems support this feature. Where it is not
-      /// supported, the function should return 65536 as the value.
-      static int GetCurrentUserId();
-
-      /// This static function will return the process' current group id number.
-      /// Not all operating systems support this feature. Where it is not
-      /// supported, the function should return 65536 as the value.
-      static int GetCurrentGroupId();
-
-      /// This function makes the necessary calls to the operating system to
-      /// prevent core files or any other kind of large memory dumps that can
-      /// occur when a program fails.
-      /// @brief Prevent core file generation.
-      static void PreventCoreFiles();
-
-      /// This function determines if the standard input is connected directly
-      /// to a user's input (keyboard probably), rather than coming from a file
-      /// or pipe.
-      static bool StandardInIsUserInput();
-
-      /// This function determines if the standard output is connected to a
-      /// "tty" or "console" window. That is, the output would be displayed to
-      /// the user rather than being put on a pipe or stored in a file.
-      static bool StandardOutIsDisplayed();
-
-      /// This function determines if the standard error is connected to a
-      /// "tty" or "console" window. That is, the output would be displayed to
-      /// the user rather than being put on a pipe or stored in a file.
-      static bool StandardErrIsDisplayed();
-
-      /// This function determines if the given file descriptor is connected to
-      /// a "tty" or "console" window. That is, the output would be displayed to
-      /// the user rather than being put on a pipe or stored in a file.
-      static bool FileDescriptorIsDisplayed(int fd);
-
-      /// This function determines the number of columns in the window
-      /// if standard output is connected to a "tty" or "console"
-      /// window. If standard output is not connected to a tty or
-      /// console, or if the number of columns cannot be determined,
-      /// this routine returns zero.
-      static unsigned StandardOutColumns();
-
-      /// This function determines the number of columns in the window
-      /// if standard error is connected to a "tty" or "console"
-      /// window. If standard error is not connected to a tty or
-      /// console, or if the number of columns cannot be determined,
-      /// this routine returns zero.
-      static unsigned StandardErrColumns();
-
-      /// This function determines whether the terminal connected to standard
-      /// output supports colors. If standard output is not connected to a
-      /// terminal, this function returns false.
-      static bool StandardOutHasColors();
-
-      /// This function determines whether the terminal connected to standard
-      /// error supports colors. If standard error is not connected to a
-      /// terminal, this function returns false.
-      static bool StandardErrHasColors();
-
-      /// Whether changing colors requires the output to be flushed.
-      /// This is needed on systems that don't support escape sequences for
-      /// changing colors.
-      static bool ColorNeedsFlush();
-
-      /// This function returns the colorcode escape sequences.
-      /// If ColorNeedsFlush() is true then this function will change the colors
-      /// and return an empty escape sequence. In that case it is the
-      /// responsibility of the client to flush the output stream prior to
-      /// calling this function.
-      static const char *OutputColor(char c, bool bold, bool bg);
-
-      /// Same as OutputColor, but only enables the bold attribute.
-      static const char *OutputBold(bool bg);
-
-      /// Resets the terminals colors, or returns an escape sequence to do so.
-      static const char *ResetColor();
-    /// @}
-  };
-}
-}
-
-#endif
diff --git a/include/llvm/System/Program.h b/include/llvm/System/Program.h
deleted file mode 100644
index 7017305a2eb6..000000000000
--- a/include/llvm/System/Program.h
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- llvm/System/Program.h ------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::Program class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_PROGRAM_H
-#define LLVM_SYSTEM_PROGRAM_H
-
-#include "llvm/System/Path.h"
-
-namespace llvm {
-namespace sys {
-
-  // TODO: Add operations to communicate with the process, redirect its I/O,
-  // etc.
-
-  /// This class provides an abstraction for programs that are executable by the
-  /// operating system. It provides a platform generic way to find executable
-  /// programs from the path and to execute them in various ways. The sys::Path
-  /// class is used to specify the location of the Program.
-  /// @since 1.4
-  /// @brief An abstraction for finding and executing programs.
-  class Program {
-    /// Opaque handle for target specific data.
-    void *Data_;
-
-    // Noncopyable.
-    Program(const Program& other);
-    Program& operator=(const Program& other);
-
-    /// @name Methods
-    /// @{
-  public:
-
-    Program();
-    ~Program();
-
-    /// Return process ID of this program.
-    unsigned GetPid() const;
-
-    /// This function executes the program using the \p arguments provided.  The
-    /// invoked program will inherit the stdin, stdout, and stderr file
-    /// descriptors, the environment and other configuration settings of the
-    /// invoking program. If Path::executable() does not return true when this
-    /// function is called then a std::string is thrown.
-    /// @returns false in case of error, true otherwise.
-    /// @see FindProgramByName
-    /// @brief Executes the program with the given set of \p args.
-    bool Execute
-    ( const Path& path,  ///< sys::Path object providing the path of the
-      ///< program to be executed. It is presumed this is the result of
-      ///< the FindProgramByName method.
-      const char** args, ///< A vector of strings that are passed to the
-      ///< program.  The first element should be the name of the program.
-      ///< The list *must* be terminated by a null char* entry.
-      const char ** env = 0, ///< An optional vector of strings to use for
-      ///< the program's environment. If not provided, the current program's
-      ///< environment will be used.
-      const sys::Path** redirects = 0, ///< An optional array of pointers to
-      ///< Paths. If the array is null, no redirection is done. The array
-      ///< should have a size of at least three. If the pointer in the array
-      ///< are not null, then the inferior process's stdin(0), stdout(1),
-      ///< and stderr(2) will be redirected to the corresponding Paths.
-      ///< When an empty Path is passed in, the corresponding file
-      ///< descriptor will be disconnected (ie, /dev/null'd) in a portable
-      ///< way.
-      unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount
-      ///< of memory can be allocated by process. If memory usage will be
-      ///< higher limit, the child is killed and this call returns. If zero
-      ///< - no memory limit.
-      std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
-      ///< instance in which error messages will be returned. If the string
-      ///< is non-empty upon return an error occurred while invoking the
-      ///< program.
-      );
-
-    /// This function waits for the program to exit. This function will block
-    /// the current program until the invoked program exits.
-    /// @returns an integer result code indicating the status of the program.
-    /// A zero or positive value indicates the result code of the program. A
-    /// negative value is the signal number on which it terminated.
-    /// @see Execute
-    /// @brief Waits for the program to exit.
-    int Wait
-    ( unsigned secondsToWait = 0, ///< If non-zero, this specifies the amount
-      ///< of time to wait for the child process to exit. If the time
-      ///< expires, the child is killed and this call returns. If zero,
-      ///< this function will wait until the child finishes or forever if
-      ///< it doesn't.
-      std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
-      ///< instance in which error messages will be returned. If the string
-      ///< is non-empty upon return an error occurred while waiting.
-      );
-
-    /// This function terminates the program.
-    /// @returns true if an error occured.
-    /// @see Execute
-    /// @brief Terminates the program.
-    bool Kill
-    ( std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
-      ///< instance in which error messages will be returned. If the string
-      ///< is non-empty upon return an error occurred while killing the
-      ///< program.
-      );
-
-    /// This static constructor (factory) will attempt to locate a program in
-    /// the operating system's file system using some pre-determined set of
-    /// locations to search (e.g. the PATH on Unix).
-    /// @returns A Path object initialized to the path of the program or a
-    /// Path object that is empty (invalid) if the program could not be found.
-    /// @brief Construct a Program by finding it by name.
-    static Path FindProgramByName(const std::string& name);
-
-    // These methods change the specified standard stream (stdin,
-    // stdout, or stderr) to binary mode. They return true if an error
-    // occurred
-    static bool ChangeStdinToBinary();
-    static bool ChangeStdoutToBinary();
-    static bool ChangeStderrToBinary();
-
-    /// A convenience function equivalent to Program prg; prg.Execute(..);
-    /// prg.Wait(..);
-    /// @see Execute, Wait
-    static int ExecuteAndWait(const Path& path,
-                              const char** args,
-                              const char ** env = 0,
-                              const sys::Path** redirects = 0,
-                              unsigned secondsToWait = 0,
-                              unsigned memoryLimit = 0,
-                              std::string* ErrMsg = 0);
-
-    /// A convenience function equivalent to Program prg; prg.Execute(..);
-    /// @see Execute
-    static void ExecuteNoWait(const Path& path,
-                              const char** args,
-                              const char ** env = 0,
-                              const sys::Path** redirects = 0,
-                              unsigned memoryLimit = 0,
-                              std::string* ErrMsg = 0);
-
-    /// @}
-
-  };
-}
-}
-
-#endif
diff --git a/include/llvm/System/RWMutex.h b/include/llvm/System/RWMutex.h
deleted file mode 100644
index 3a288180bf07..000000000000
--- a/include/llvm/System/RWMutex.h
+++ /dev/null
@@ -1,173 +0,0 @@
-//===- RWMutex.h - Reader/Writer Mutual Exclusion Lock ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::RWMutex class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_RWMUTEX_H
-#define LLVM_SYSTEM_RWMUTEX_H
-
-#include "llvm/System/Threading.h"
-#include <cassert>
-
-namespace llvm
-{
-  namespace sys
-  {
-    /// @brief Platform agnostic RWMutex class.
-    class RWMutexImpl
-    {
-    /// @name Constructors
-    /// @{
-    public:
-
-      /// Initializes the lock but doesn't acquire it.
-      /// @brief Default Constructor.
-      explicit RWMutexImpl();
-
-      /// Releases and removes the lock
-      /// @brief Destructor
-      ~RWMutexImpl();
-
-    /// @}
-    /// @name Methods
-    /// @{
-    public:
-
-      /// Attempts to unconditionally acquire the lock in reader mode. If the
-      /// lock is held by a writer, this method will wait until it can acquire
-      /// the lock. 
-      /// @returns false if any kind of error occurs, true otherwise.
-      /// @brief Unconditionally acquire the lock in reader mode.
-      bool reader_acquire();
-
-      /// Attempts to release the lock in reader mode.
-      /// @returns false if any kind of error occurs, true otherwise.
-      /// @brief Unconditionally release the lock in reader mode.
-      bool reader_release();
-
-      /// Attempts to unconditionally acquire the lock in reader mode. If the
-      /// lock is held by any readers, this method will wait until it can
-      /// acquire the lock. 
-      /// @returns false if any kind of error occurs, true otherwise.
-      /// @brief Unconditionally acquire the lock in writer mode.
-      bool writer_acquire();
-
-      /// Attempts to release the lock in writer mode.
-      /// @returns false if any kind of error occurs, true otherwise.
-      /// @brief Unconditionally release the lock in write mode.
-      bool writer_release();
-
-    //@}
-    /// @name Platform Dependent Data
-    /// @{
-    private:
-      void* data_; ///< We don't know what the data will be
-
-    /// @}
-    /// @name Do Not Implement
-    /// @{
-    private:
-      RWMutexImpl(const RWMutexImpl & original);
-      void operator=(const RWMutexImpl &);
-    /// @}
-    };
-    
-    /// SmartMutex - An R/W mutex with a compile time constant parameter that 
-    /// indicates whether this mutex should become a no-op when we're not
-    /// running in multithreaded mode.
-    template<bool mt_only>
-    class SmartRWMutex : public RWMutexImpl {
-      unsigned readers, writers;
-    public:
-      explicit SmartRWMutex() : RWMutexImpl(), readers(0), writers(0) { }
-      
-      bool reader_acquire() {
-        if (!mt_only || llvm_is_multithreaded())
-          return RWMutexImpl::reader_acquire();
-        
-        // Single-threaded debugging code.  This would be racy in multithreaded
-        // mode, but provides not sanity checks in single threaded mode.
-        ++readers;
-        return true;
-      }
-      
-      bool reader_release() {
-        if (!mt_only || llvm_is_multithreaded())
-          return RWMutexImpl::reader_release();
-        
-        // Single-threaded debugging code.  This would be racy in multithreaded
-        // mode, but provides not sanity checks in single threaded mode.
-        assert(readers > 0 && "Reader lock not acquired before release!");
-        --readers;
-        return true;
-      }
-      
-      bool writer_acquire() {
-        if (!mt_only || llvm_is_multithreaded())
-          return RWMutexImpl::writer_acquire();
-        
-        // Single-threaded debugging code.  This would be racy in multithreaded
-        // mode, but provides not sanity checks in single threaded mode.
-        assert(writers == 0 && "Writer lock already acquired!");
-        ++writers;
-        return true;
-      }
-      
-      bool writer_release() {
-        if (!mt_only || llvm_is_multithreaded())
-          return RWMutexImpl::writer_release();
-        
-        // Single-threaded debugging code.  This would be racy in multithreaded
-        // mode, but provides not sanity checks in single threaded mode.
-        assert(writers == 1 && "Writer lock not acquired before release!");
-        --writers;
-        return true;
-      }
-      
-    private:
-      SmartRWMutex(const SmartRWMutex<mt_only> & original);
-      void operator=(const SmartRWMutex<mt_only> &);
-    };
-    typedef SmartRWMutex<false> RWMutex;
-    
-    /// ScopedReader - RAII acquisition of a reader lock
-    template<bool mt_only>
-    struct SmartScopedReader {
-      SmartRWMutex<mt_only>& mutex;
-      
-      explicit SmartScopedReader(SmartRWMutex<mt_only>& m) : mutex(m) {
-        mutex.reader_acquire();
-      }
-      
-      ~SmartScopedReader() {
-        mutex.reader_release();
-      }
-    };
-    typedef SmartScopedReader<false> ScopedReader;
-    
-    /// ScopedWriter - RAII acquisition of a writer lock
-    template<bool mt_only>
-    struct SmartScopedWriter {
-      SmartRWMutex<mt_only>& mutex;
-      
-      explicit SmartScopedWriter(SmartRWMutex<mt_only>& m) : mutex(m) {
-        mutex.writer_acquire();
-      }
-      
-      ~SmartScopedWriter() {
-        mutex.writer_release();
-      }
-    };
-    typedef SmartScopedWriter<false> ScopedWriter;
-  }
-}
-
-#endif
diff --git a/include/llvm/System/Signals.h b/include/llvm/System/Signals.h
deleted file mode 100644
index 7f1c87c3d55a..000000000000
--- a/include/llvm/System/Signals.h
+++ /dev/null
@@ -1,59 +0,0 @@
-//===- llvm/System/Signals.h - Signal Handling support ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines some helpful functions for dealing with the possibility of
-// unix signals occuring while your program is running.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_SIGNALS_H
-#define LLVM_SYSTEM_SIGNALS_H
-
-#include "llvm/System/Path.h"
-
-namespace llvm {
-namespace sys {
-
-  /// This function runs all the registered interrupt handlers, including the
-  /// removal of files registered by RemoveFileOnSignal.
-  void RunInterruptHandlers();
-
-  /// This function registers signal handlers to ensure that if a signal gets
-  /// delivered that the named file is removed.
-  /// @brief Remove a file if a fatal signal occurs.
-  bool RemoveFileOnSignal(const Path &Filename, std::string* ErrMsg = 0);
-
-  /// This function removes a file from the list of files to be removed on
-  /// signal delivery.
-  void DontRemoveFileOnSignal(const Path &Filename);
-
-  /// When an error signal (such as SIBABRT or SIGSEGV) is delivered to the
-  /// process, print a stack trace and then exit.
-  /// @brief Print a stack trace if a fatal signal occurs.
-  void PrintStackTraceOnErrorSignal();
-
-  /// AddSignalHandler - Add a function to be called when an abort/kill signal
-  /// is delivered to the process.  The handler can have a cookie passed to it
-  /// to identify what instance of the handler it is.
-  void AddSignalHandler(void (*FnPtr)(void *), void *Cookie);
-
-  /// This function registers a function to be called when the user "interrupts"
-  /// the program (typically by pressing ctrl-c).  When the user interrupts the
-  /// program, the specified interrupt function is called instead of the program
-  /// being killed, and the interrupt function automatically disabled.  Note
-  /// that interrupt functions are not allowed to call any non-reentrant
-  /// functions.  An null interrupt function pointer disables the current
-  /// installed function.  Note also that the handler may be executed on a
-  /// different thread on some platforms.
-  /// @brief Register a function to be called when ctrl-c is pressed.
-  void SetInterruptFunction(void (*IF)());
-} // End sys namespace
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/System/Solaris.h b/include/llvm/System/Solaris.h
deleted file mode 100644
index 15adb7472c10..000000000000
--- a/include/llvm/System/Solaris.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*===- llvm/System/Solaris.h ------------------------------------*- C++ -*-===*
- *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
- *
- *===----------------------------------------------------------------------===*
- *
- * This file contains portability fixes for Solaris hosts.
- *
- *===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_SYSTEM_SOLARIS_H
-#define LLVM_SYSTEM_SOLARIS_H
-
-#include <sys/types.h>
-#include <sys/regset.h>
-
-#undef CS
-#undef DS
-#undef ES
-#undef FS
-#undef GS
-#undef SS
-#undef EAX
-#undef ECX
-#undef EDX
-#undef EBX
-#undef ESP
-#undef EBP
-#undef ESI
-#undef EDI
-#undef EIP
-#undef UESP
-#undef EFL
-#undef ERR
-#undef TRAPNO
-
-#endif
diff --git a/include/llvm/System/ThreadLocal.h b/include/llvm/System/ThreadLocal.h
deleted file mode 100644
index e6edd79d6ff1..000000000000
--- a/include/llvm/System/ThreadLocal.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- llvm/System/ThreadLocal.h - Thread Local Data ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::ThreadLocal class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_THREAD_LOCAL_H
-#define LLVM_SYSTEM_THREAD_LOCAL_H
-
-#include "llvm/System/Threading.h"
-#include <cassert>
-
-namespace llvm {
-  namespace sys {
-    // ThreadLocalImpl - Common base class of all ThreadLocal instantiations.
-    // YOU SHOULD NEVER USE THIS DIRECTLY.
-    class ThreadLocalImpl {
-      void* data;
-    public:
-      ThreadLocalImpl();
-      virtual ~ThreadLocalImpl();
-      void setInstance(const void* d);
-      const void* getInstance();
-      void removeInstance();
-    };
-    
-    /// ThreadLocal - A class used to abstract thread-local storage.  It holds,
-    /// for each thread, a pointer a single object of type T.
-    template<class T>
-    class ThreadLocal : public ThreadLocalImpl {
-    public:
-      ThreadLocal() : ThreadLocalImpl() { }
-      
-      /// get - Fetches a pointer to the object associated with the current
-      /// thread.  If no object has yet been associated, it returns NULL;
-      T* get() { return static_cast<T*>(getInstance()); }
-      
-      // set - Associates a pointer to an object with the current thread.
-      void set(T* d) { setInstance(d); }
-      
-      // erase - Removes the pointer associated with the current thread.
-      void erase() { removeInstance(); }
-    };
-  }
-}
-
-#endif
diff --git a/include/llvm/System/Threading.h b/include/llvm/System/Threading.h
deleted file mode 100644
index 42d2f89bcb82..000000000000
--- a/include/llvm/System/Threading.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===-- llvm/System/Threading.h - Control multithreading mode --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TThis file defines llvm_start_multithreaded() and friends.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_THREADING_H
-#define LLVM_SYSTEM_THREADING_H
-
-namespace llvm {
-  /// llvm_start_multithreaded - Allocate and initialize structures needed to
-  /// make LLVM safe for multithreading.  The return value indicates whether
-  /// multithreaded initialization succeeded.  LLVM will still be operational
-  /// on "failed" return, and will still be safe for hosting threading 
-  /// applications in the JIT, but will not be safe for concurrent calls to the
-  /// LLVM APIs.
-  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
-  bool llvm_start_multithreaded();
-  
-  /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM
-  /// safe for multithreading.
-  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
-  void llvm_stop_multithreaded();
-  
-  /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe
-  /// mode or not.
-  bool llvm_is_multithreaded();
-  
-  /// acquire_global_lock - Acquire the global lock.  This is a no-op if called
-  /// before llvm_start_multithreaded().
-  void llvm_acquire_global_lock();
-  
-  /// release_global_lock - Release the global lock.  This is a no-op if called
-  /// before llvm_start_multithreaded().
-  void llvm_release_global_lock();
-}
-
-#endif
diff --git a/include/llvm/System/TimeValue.h b/include/llvm/System/TimeValue.h
deleted file mode 100644
index b82647f74ed5..000000000000
--- a/include/llvm/System/TimeValue.h
+++ /dev/null
@@ -1,382 +0,0 @@
-//===-- TimeValue.h - Declare OS TimeValue Concept --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file declares the operating system TimeValue concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/DataTypes.h"
-#include <string>
-
-#ifndef LLVM_SYSTEM_TIMEVALUE_H
-#define LLVM_SYSTEM_TIMEVALUE_H
-
-namespace llvm {
-namespace sys {
-  /// This class is used where a precise fixed point in time is required. The
-  /// range of TimeValue spans many hundreds of billions of years both past and
-  /// present.  The precision of TimeValue is to the nanosecond. However, the
-  /// actual precision of its values will be determined by the resolution of
-  /// the system clock. The TimeValue class is used in conjunction with several
-  /// other lib/System interfaces to specify the time at which a call should
-  /// timeout, etc.
-  /// @since 1.4
-  /// @brief Provides an abstraction for a fixed point in time.
-  class TimeValue {
-
-  /// @name Constants
-  /// @{
-  public:
-
-    /// A constant TimeValue representing the smallest time
-    /// value permissable by the class. MinTime is some point
-    /// in the distant past, about 300 billion years BCE.
-    /// @brief The smallest possible time value.
-    static const TimeValue MinTime;
-
-    /// A constant TimeValue representing the largest time
-    /// value permissable by the class. MaxTime is some point
-    /// in the distant future, about 300 billion years AD.
-    /// @brief The largest possible time value.
-    static const TimeValue MaxTime;
-
-    /// A constant TimeValue representing the base time,
-    /// or zero time of 00:00:00 (midnight) January 1st, 2000.
-    /// @brief 00:00:00 Jan 1, 2000 UTC.
-    static const TimeValue ZeroTime;
-
-    /// A constant TimeValue for the Posix base time which is
-    /// 00:00:00 (midnight) January 1st, 1970.
-    /// @brief 00:00:00 Jan 1, 1970 UTC.
-    static const TimeValue PosixZeroTime;
-
-    /// A constant TimeValue for the Win32 base time which is
-    /// 00:00:00 (midnight) January 1st, 1601.
-    /// @brief 00:00:00 Jan 1, 1601 UTC.
-    static const TimeValue Win32ZeroTime;
-
-  /// @}
-  /// @name Types
-  /// @{
-  public:
-    typedef int64_t SecondsType;    ///< Type used for representing seconds.
-    typedef int32_t NanoSecondsType;///< Type used for representing nanoseconds.
-
-    enum TimeConversions {
-      NANOSECONDS_PER_SECOND = 1000000000,  ///< One Billion
-      MICROSECONDS_PER_SECOND = 1000000,    ///< One Million
-      MILLISECONDS_PER_SECOND = 1000,       ///< One Thousand
-      NANOSECONDS_PER_MICROSECOND = 1000,   ///< One Thousand
-      NANOSECONDS_PER_MILLISECOND = 1000000,///< One Million
-      NANOSECONDS_PER_POSIX_TICK = 100,     ///< Posix tick is 100 Hz (10ms)
-      NANOSECONDS_PER_WIN32_TICK = 100      ///< Win32 tick is 100 Hz (10ms)
-    };
-
-  /// @}
-  /// @name Constructors
-  /// @{
-  public:
-    /// Caller provides the exact value in seconds and nanoseconds. The
-    /// \p nanos argument defaults to zero for convenience.
-    /// @brief Explicit constructor
-    explicit TimeValue (SecondsType seconds, NanoSecondsType nanos = 0)
-      : seconds_( seconds ), nanos_( nanos ) { this->normalize(); }
-
-    /// Caller provides the exact value as a double in seconds with the
-    /// fractional part representing nanoseconds.
-    /// @brief Double Constructor.
-    explicit TimeValue( double new_time )
-      : seconds_( 0 ) , nanos_ ( 0 ) {
-      SecondsType integer_part = static_cast<SecondsType>( new_time );
-      seconds_ = integer_part;
-      nanos_ = static_cast<NanoSecondsType>( (new_time -
-               static_cast<double>(integer_part)) * NANOSECONDS_PER_SECOND );
-      this->normalize();
-    }
-
-    /// This is a static constructor that returns a TimeValue that represents
-    /// the current time.
-    /// @brief Creates a TimeValue with the current time (UTC).
-    static TimeValue now();
-
-  /// @}
-  /// @name Operators
-  /// @{
-  public:
-    /// Add \p that to \p this.
-    /// @returns this
-    /// @brief Incrementing assignment operator.
-    TimeValue& operator += (const TimeValue& that ) {
-      this->seconds_ += that.seconds_  ;
-      this->nanos_ += that.nanos_ ;
-      this->normalize();
-      return *this;
-    }
-
-    /// Subtract \p that from \p this.
-    /// @returns this
-    /// @brief Decrementing assignment operator.
-    TimeValue& operator -= (const TimeValue &that ) {
-      this->seconds_ -= that.seconds_ ;
-      this->nanos_ -= that.nanos_ ;
-      this->normalize();
-      return *this;
-    }
-
-    /// Determine if \p this is less than \p that.
-    /// @returns True iff *this < that.
-    /// @brief True if this < that.
-    int operator < (const TimeValue &that) const { return that > *this; }
-
-    /// Determine if \p this is greather than \p that.
-    /// @returns True iff *this > that.
-    /// @brief True if this > that.
-    int operator > (const TimeValue &that) const {
-      if ( this->seconds_ > that.seconds_ ) {
-          return 1;
-      } else if ( this->seconds_ == that.seconds_ ) {
-          if ( this->nanos_ > that.nanos_ ) return 1;
-      }
-      return 0;
-    }
-
-    /// Determine if \p this is less than or equal to \p that.
-    /// @returns True iff *this <= that.
-    /// @brief True if this <= that.
-    int operator <= (const TimeValue &that) const { return that >= *this; }
-
-    /// Determine if \p this is greater than or equal to \p that.
-    /// @returns True iff *this >= that.
-    /// @brief True if this >= that.
-    int operator >= (const TimeValue &that) const {
-      if ( this->seconds_ > that.seconds_ ) {
-          return 1;
-      } else if ( this->seconds_ == that.seconds_ ) {
-          if ( this->nanos_ >= that.nanos_ ) return 1;
-      }
-      return 0;
-    }
-
-    /// Determines if two TimeValue objects represent the same moment in time.
-    /// @brief True iff *this == that.
-    /// @brief True if this == that.
-    int operator == (const TimeValue &that) const {
-      return (this->seconds_ == that.seconds_) &&
-             (this->nanos_ == that.nanos_);
-    }
-
-    /// Determines if two TimeValue objects represent times that are not the
-    /// same.
-    /// @return True iff *this != that.
-    /// @brief True if this != that.
-    int operator != (const TimeValue &that) const { return !(*this == that); }
-
-    /// Adds two TimeValue objects together.
-    /// @returns The sum of the two operands as a new TimeValue
-    /// @brief Addition operator.
-    friend TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2);
-
-    /// Subtracts two TimeValue objects.
-    /// @returns The difference of the two operands as a new TimeValue
-    /// @brief Subtraction operator.
-    friend TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2);
-
-  /// @}
-  /// @name Accessors
-  /// @{
-  public:
-
-    /// Returns only the seconds component of the TimeValue. The nanoseconds
-    /// portion is ignored. No rounding is performed.
-    /// @brief Retrieve the seconds component
-    SecondsType seconds() const { return seconds_; }
-
-    /// Returns only the nanoseconds component of the TimeValue. The seconds
-    /// portion is ignored.
-    /// @brief Retrieve the nanoseconds component.
-    NanoSecondsType nanoseconds() const { return nanos_; }
-
-    /// Returns only the fractional portion of the TimeValue rounded down to the
-    /// nearest microsecond (divide by one thousand).
-    /// @brief Retrieve the fractional part as microseconds;
-    uint32_t microseconds() const {
-      return nanos_ / NANOSECONDS_PER_MICROSECOND;
-    }
-
-    /// Returns only the fractional portion of the TimeValue rounded down to the
-    /// nearest millisecond (divide by one million).
-    /// @brief Retrieve the fractional part as milliseconds;
-    uint32_t milliseconds() const {
-      return nanos_ / NANOSECONDS_PER_MILLISECOND;
-    }
-
-    /// Returns the TimeValue as a number of microseconds. Note that the value
-    /// returned can overflow because the range of a uint64_t is smaller than
-    /// the range of a TimeValue. Nevertheless, this is useful on some operating
-    /// systems and is therefore provided.
-    /// @brief Convert to a number of microseconds (can overflow)
-    uint64_t usec() const {
-      return seconds_ * MICROSECONDS_PER_SECOND +
-             ( nanos_ / NANOSECONDS_PER_MICROSECOND );
-    }
-
-    /// Returns the TimeValue as a number of milliseconds. Note that the value
-    /// returned can overflow because the range of a uint64_t is smaller than
-    /// the range of a TimeValue. Nevertheless, this is useful on some operating
-    /// systems and is therefore provided.
-    /// @brief Convert to a number of milliseconds (can overflow)
-    uint64_t msec() const {
-      return seconds_ * MILLISECONDS_PER_SECOND +
-             ( nanos_ / NANOSECONDS_PER_MILLISECOND );
-    }
-
-    /// Converts the TimeValue into the corresponding number of "ticks" for
-    /// Posix, correcting for the difference in Posix zero time.
-    /// @brief Convert to unix time (100 nanoseconds since 12:00:00a Jan 1,1970)
-    uint64_t toPosixTime() const {
-      uint64_t result = seconds_ - PosixZeroTime.seconds_;
-      result += nanos_ / NANOSECONDS_PER_POSIX_TICK;
-      return result;
-    }
-
-    /// Converts the TimeValue into the corresponding number of seconds
-    /// since the epoch (00:00:00 Jan 1,1970).
-    uint64_t toEpochTime() const {
-      return seconds_ - PosixZeroTime.seconds_;
-    }
-
-    /// Converts the TimeValue into the corresponding number of "ticks" for
-    /// Win32 platforms, correcting for the difference in Win32 zero time.
-    /// @brief Convert to windows time (seconds since 12:00:00a Jan 1, 1601)
-    uint64_t toWin32Time() const {
-      uint64_t result = seconds_ - Win32ZeroTime.seconds_;
-      result += nanos_ / NANOSECONDS_PER_WIN32_TICK;
-      return result;
-    }
-
-    /// Provides the seconds and nanoseconds as results in its arguments after
-    /// correction for the Posix zero time.
-    /// @brief Convert to timespec time (ala POSIX.1b)
-    void getTimespecTime( uint64_t& seconds, uint32_t& nanos ) const {
-      seconds = seconds_ - PosixZeroTime.seconds_;
-      nanos = nanos_;
-    }
-
-    /// Provides conversion of the TimeValue into a readable time & date.
-    /// @returns std::string containing the readable time value
-    /// @brief Convert time to a string.
-    std::string str() const;
-
-  /// @}
-  /// @name Mutators
-  /// @{
-  public:
-    /// The seconds component of the TimeValue is set to \p sec without
-    /// modifying the nanoseconds part.  This is useful for whole second
-    /// arithmetic.
-    /// @brief Set the seconds component.
-    void seconds (SecondsType sec ) {
-      this->seconds_ = sec;
-      this->normalize();
-    }
-
-    /// The nanoseconds component of the TimeValue is set to \p nanos without
-    /// modifying the seconds part. This is useful for basic computations
-    /// involving just the nanoseconds portion. Note that the TimeValue will be
-    /// normalized after this call so that the fractional (nanoseconds) portion
-    /// will have the smallest equivalent value.
-    /// @brief Set the nanoseconds component using a number of nanoseconds.
-    void nanoseconds ( NanoSecondsType nanos ) {
-      this->nanos_ = nanos;
-      this->normalize();
-    }
-
-    /// The seconds component remains unchanged.
-    /// @brief Set the nanoseconds component using a number of microseconds.
-    void microseconds ( int32_t micros ) {
-      this->nanos_ = micros * NANOSECONDS_PER_MICROSECOND;
-      this->normalize();
-    }
-
-    /// The seconds component remains unchanged.
-    /// @brief Set the nanoseconds component using a number of milliseconds.
-    void milliseconds ( int32_t millis ) {
-      this->nanos_ = millis * NANOSECONDS_PER_MILLISECOND;
-      this->normalize();
-    }
-
-    /// @brief Converts from microsecond format to TimeValue format
-    void usec( int64_t microseconds ) {
-      this->seconds_ = microseconds / MICROSECONDS_PER_SECOND;
-      this->nanos_ = NanoSecondsType(microseconds % MICROSECONDS_PER_SECOND) *
-        NANOSECONDS_PER_MICROSECOND;
-      this->normalize();
-    }
-
-    /// @brief Converts from millisecond format to TimeValue format
-    void msec( int64_t milliseconds ) {
-      this->seconds_ = milliseconds / MILLISECONDS_PER_SECOND;
-      this->nanos_ = NanoSecondsType(milliseconds % MILLISECONDS_PER_SECOND) *
-        NANOSECONDS_PER_MILLISECOND;
-      this->normalize();
-    }
-
-    /// Converts the \p seconds argument from PosixTime to the corresponding
-    /// TimeValue and assigns that value to \p this.
-    /// @brief Convert seconds form PosixTime to TimeValue
-    void fromEpochTime( SecondsType seconds ) {
-      seconds_ = seconds + PosixZeroTime.seconds_;
-      nanos_ = 0;
-      this->normalize();
-    }
-
-    /// Converts the \p win32Time argument from Windows FILETIME to the
-    /// corresponding TimeValue and assigns that value to \p this.
-    /// @brief Convert seconds form Windows FILETIME to TimeValue
-    void fromWin32Time( uint64_t win32Time ) {
-      this->seconds_ = win32Time / 10000000 + Win32ZeroTime.seconds_;
-      this->nanos_ = NanoSecondsType(win32Time  % 10000000) * 100;
-    }
-
-  /// @}
-  /// @name Implementation
-  /// @{
-  private:
-    /// This causes the values to be represented so that the fractional
-    /// part is minimized, possibly incrementing the seconds part.
-    /// @brief Normalize to canonical form.
-    void normalize();
-
-  /// @}
-  /// @name Data
-  /// @{
-  private:
-    /// Store the values as a <timeval>.
-    SecondsType      seconds_;///< Stores the seconds part of the TimeVal
-    NanoSecondsType  nanos_;  ///< Stores the nanoseconds part of the TimeVal
-  /// @}
-
-  };
-
-inline TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2) {
-  TimeValue sum (tv1.seconds_ + tv2.seconds_, tv1.nanos_ + tv2.nanos_);
-  sum.normalize ();
-  return sum;
-}
-
-inline TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2) {
-  TimeValue difference (tv1.seconds_ - tv2.seconds_, tv1.nanos_ - tv2.nanos_ );
-  difference.normalize ();
-  return difference;
-}
-
-}
-}
-
-#endif
diff --git a/include/llvm/System/Valgrind.h b/include/llvm/System/Valgrind.h
deleted file mode 100644
index 5ec79c3c5573..000000000000
--- a/include/llvm/System/Valgrind.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===- llvm/System/Valgrind.h - Communication with Valgrind -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Methods for communicating with a valgrind instance this program is running
-// under.  These are all no-ops unless LLVM was configured on a system with the
-// valgrind headers installed and valgrind is controlling this process.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_VALGRIND_H
-#define LLVM_SYSTEM_VALGRIND_H
-
-#include <stddef.h>
-
-namespace llvm {
-namespace sys {
-  // True if Valgrind is controlling this process.
-  bool RunningOnValgrind();
-
-  // Discard valgrind's translation of code in the range [Addr .. Addr + Len).
-  // Otherwise valgrind may continue to execute the old version of the code.
-  void ValgrindDiscardTranslations(const void *Addr, size_t Len);
-}
-}
-
-#endif
diff --git a/include/llvm/Target/Mangler.h b/include/llvm/Target/Mangler.h
index a9f3576559d4..c1c118b08cab 100644
--- a/include/llvm/Target/Mangler.h
+++ b/include/llvm/Target/Mangler.h
@@ -15,7 +15,6 @@
 #define LLVM_SUPPORT_MANGLER_H
 
 #include "llvm/ADT/DenseMap.h"
-#include <string>
 
 namespace llvm {
 class StringRef;
@@ -69,12 +68,6 @@ public:
   /// empty.
   void getNameWithPrefix(SmallVectorImpl<char> &OutName, const Twine &GVName,
                          ManglerPrefixTy PrefixTy = Mangler::Default);
-
-  /// getNameWithPrefix - Return the name of the appropriate prefix
-  /// and the specified global variable's name.  If the global variable doesn't
-  /// have a name, this fills in a unique name for the global.
-  std::string getNameWithPrefix(const GlobalValue *GV,
-                                bool isImplicitlyPrivate = false);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h
index 45468714a3bc..6c21ae9583e0 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/Target/SubtargetFeature.h
@@ -22,7 +22,7 @@
 #include <vector>
 #include <cstring>
 #include "llvm/ADT/Triple.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class raw_ostream;
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index b141a77df4f2..0f7e6aaaf2fa 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -1,10 +1,10 @@
 //===- Target.td - Target Independent TableGen interface ---*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the target-independent interfaces which should be
@@ -47,7 +47,7 @@ class Register<string n> {
   // modification of this register can potentially read or modify the aliased
   // registers.
   list<Register> Aliases = [];
-  
+
   // SubRegs - A list of registers that are parts of this register. Note these
   // are "immediate" sub-registers and the registers within the list do not
   // themselves overlap. e.g. For X86, EAX's SubRegs list contains only [AX],
@@ -84,7 +84,7 @@ class Register<string n> {
 // need to specify sub-registers.
 // List "subregs" specifies which registers are sub-registers to this one. This
 // is used to populate the SubRegs and AliasSet fields of TargetRegisterDesc.
-// This allows the code generator to be careful not to put two values with 
+// This allows the code generator to be careful not to put two values with
 // overlapping live ranges into registers which alias.
 class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
   let SubRegs = subregs;
@@ -101,7 +101,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
 
   // RegType - Specify the list ValueType of the registers in this register
   // class.  Note that all registers in a register class must have the same
-  // ValueTypes.  This is a list because some targets permit storing different 
+  // ValueTypes.  This is a list because some targets permit storing different
   // types in same register, for example vector values with 128-bit total size,
   // but different count/size of items, like SSE on x86.
   //
@@ -127,13 +127,13 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // allocation used by the register allocator.
   //
   list<Register> MemberList = regList;
-  
+
   // SubRegClasses - Specify the register class of subregisters as a list of
   // dags: (RegClass SubRegIndex, SubRegindex, ...)
   list<dag> SubRegClasses = [];
 
   // MethodProtos/MethodBodies - These members can be used to insert arbitrary
-  // code into a generated register class.   The normal usage of this is to 
+  // code into a generated register class.   The normal usage of this is to
   // overload virtual methods.
   code MethodProtos = [{}];
   code MethodBodies = [{}];
@@ -150,8 +150,8 @@ class DwarfRegNum<list<int> Numbers> {
   // These values can be determined by locating the <target>.h file in the
   // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
   // order of these names correspond to the enumeration used by gcc.  A value of
-  // -1 indicates that the gcc number is undefined and -2 that register number is 
-  // invalid for this mode/flavour.
+  // -1 indicates that the gcc number is undefined and -2 that register number
+  // is invalid for this mode/flavour.
   list<int> DwarfNumbers = Numbers;
 }
 
@@ -199,6 +199,7 @@ class Instruction {
   bit isBranch     = 0;     // Is this instruction a branch instruction?
   bit isIndirectBranch = 0; // Is this instruction an indirect branch?
   bit isCompare    = 0;     // Is this instruction a comparison instruction?
+  bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
   bit isBarrier    = 0;     // Can control flow fall through this instruction?
   bit isCall       = 0;     // Is this instruction a call instruction?
   bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
@@ -243,14 +244,29 @@ class Instruction {
   /// be encoded into the output machineinstr.
   string DisableEncoding = "";
 
+  string PostEncoderMethod = "";
+  string DecoderMethod = "";
+
   /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
   bits<64> TSFlags = 0;
+
+  ///@name Assembler Parser Support
+  ///@{
+
+  string AsmMatchConverter = "";
+
+  ///@}
 }
 
 /// Predicates - These are extra conditionals which are turned into instruction
 /// selector matching code. Currently each predicate is just a string.
 class Predicate<string cond> {
   string CondString = cond;
+
+  /// AssemblerMatcherPredicate - If this feature can be used by the assembler
+  /// matcher, this is true.  Targets should set this by inheriting their
+  /// feature from the AssemblerPredicate class in addition to Predicate.
+  bit AssemblerMatcherPredicate = 0;
 }
 
 /// NoHonorSignDependentRounding - This predicate is true if support for
@@ -262,9 +278,9 @@ class Requires<list<Predicate> preds> {
   list<Predicate> Predicates = preds;
 }
 
-/// ops definition - This is just a simple marker used to identify the operands
-/// list for an instruction. outs and ins are identical both syntatically and
-/// semantically, they are used to define def operands and use operands to
+/// ops definition - This is just a simple marker used to identify the operand
+/// list for an instruction. outs and ins are identical both syntactically and
+/// semanticallyr; they are used to define def operands and use operands to
 /// improve readibility. This should be used like this:
 ///     (outs R32:$dst), (ins R32:$src1, R32:$src2) or something similar.
 def ops;
@@ -326,18 +342,26 @@ class AsmOperandClass {
   /// signature should be:
   ///   void addFooOperands(MCInst &Inst, unsigned N) const;
   string RenderMethod = ?;
+
+  /// The name of the method on the target specific operand to call to custom
+  /// handle the operand parsing. This is useful when the operands do not relate
+  /// to immediates or registers and are very instruction specific (as flags to
+  /// set in a processor register, coprocessor number, ...).
+  string ParserMethod = ?;
 }
 
 def ImmAsmOperand : AsmOperandClass {
   let Name = "Imm";
 }
-   
+
 /// Operand Types - These provide the built-in operand types that may be used
 /// by a target.  Targets can optionally provide their own operand types as
 /// needed, though this should not be needed for RISC targets.
 class Operand<ValueType ty> {
   ValueType Type = ty;
   string PrintMethod = "printOperand";
+  string EncoderMethod = "";
+  string DecoderMethod = "";
   string AsmOperandLowerMethod = ?;
   dag MIOperandInfo = (ops);
 
@@ -409,6 +433,7 @@ def INLINEASM : Instruction {
   let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
   let AsmString = "";
+  let neverHasSideEffects = 1;  // Note side effect is encoded in an operand.
 }
 def PROLOG_LABEL : Instruction {
   let OutOperandList = (outs);
@@ -475,7 +500,7 @@ def DBG_VALUE : Instruction {
   let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
   let AsmString = "DBG_VALUE";
-  let isAsCheapAsAMove = 1;
+  let neverHasSideEffects = 1;
 }
 def REG_SEQUENCE : Instruction {
   let OutOperandList = (outs unknown:$dst);
@@ -506,9 +531,9 @@ class AsmParser {
   // name.
   string AsmParserClassName  = "AsmParser";
 
-  // AsmParserInstCleanup - If non-empty, this is the name of a custom function on the
-  // AsmParser class to call on every matched instruction. This can be used to
-  // perform target specific instruction post-processing.
+  // AsmParserInstCleanup - If non-empty, this is the name of a custom member
+  // function of the AsmParser class to call on every matched instruction.
+  // This can be used to perform target specific instruction post-processing.
   string AsmParserInstCleanup  = "";
 
   // Variant - AsmParsers can be of multiple different variants.  Variants are
@@ -529,6 +554,49 @@ class AsmParser {
 }
 def DefaultAsmParser : AsmParser;
 
+/// AssemblerPredicate - This is a Predicate that can be used when the assembler
+/// matches instructions and aliases.
+class AssemblerPredicate {
+  bit AssemblerMatcherPredicate = 1;
+}
+
+
+
+/// MnemonicAlias - This class allows targets to define assembler mnemonic
+/// aliases.  This should be used when all forms of one mnemonic are accepted
+/// with a different mnemonic.  For example, X86 allows:
+///   sal %al, 1    -> shl %al, 1
+///   sal %ax, %cl  -> shl %ax, %cl
+///   sal %eax, %cl -> shl %eax, %cl
+/// etc.  Though "sal" is accepted with many forms, all of them are directly
+/// translated to a shl, so it can be handled with (in the case of X86, it
+/// actually has one for each suffix as well):
+///   def : MnemonicAlias<"sal", "shl">;
+///
+/// Mnemonic aliases are mapped before any other translation in the match phase,
+/// and do allow Requires predicates, e.g.:
+///
+///  def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+///  def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+///
+class MnemonicAlias<string From, string To> {
+  string FromMnemonic = From;
+  string ToMnemonic = To;
+
+  // Predicates - Predicates that must be true for this remapping to happen.
+  list<Predicate> Predicates = [];
+}
+
+/// InstAlias - This defines an alternate assembly syntax that is allowed to
+/// match an instruction that has a different (more canonical) assembly
+/// representation.
+class InstAlias<string Asm, dag Result> {
+  string AsmString = Asm;      // The .s format to match the instruction with.
+  dag ResultInst = Result;     // The MCInst to generate.
+
+  // Predicates - Predicates that must be true for this to match.
+  list<Predicate> Predicates = [];
+}
 
 //===----------------------------------------------------------------------===//
 // AsmWriter - This class can be implemented by targets that need to customize
@@ -543,10 +611,6 @@ class AsmWriter {
   // name.
   string AsmWriterClassName  = "AsmPrinter";
 
-  // InstFormatName - AsmWriters can specify the name of the format string to
-  // print instructions with.
-  string InstFormatName = "AsmString";
-
   // Variant - AsmWriters can be of multiple different variants.  Variants are
   // used to support targets that need to emit assembly code in ways that are
   // mostly the same for different targets, but have minor differences in
@@ -554,17 +618,22 @@ class AsmWriter {
   // will specify which alternative to use.  For example "{x|y|z}" with Variant
   // == 1, will expand to "y".
   int Variant = 0;
-  
-  
+
+
   // FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar
   // layout, the asmwriter can actually generate output in this columns (in
   // verbose-asm mode).  These two values indicate the width of the first column
   // (the "opcode" area) and the width to reserve for subsequent operands.  When
   // verbose asm mode is enabled, operands will be indented to respect this.
   int FirstOperandColumn = -1;
-  
+
   // OperandSpacing - Space between operand columns.
   int OperandSpacing = -1;
+
+  // isMCAsmWriter - Is this assembly writer for an MC emitter? This controls
+  // generation of the printInstruction() method. For MC printers, it takes
+  // an MCInstr* operand, otherwise it takes a MachineInstr*.
+  bit isMCAsmWriter = 0;
 }
 def DefaultAsmWriter : AsmWriter;
 
@@ -592,15 +661,15 @@ class SubtargetFeature<string n, string a,  string v, string d,
   // appropriate target chip.
   //
   string Name = n;
-  
+
   // Attribute - Attribute to be set by feature.
   //
   string Attribute = a;
-  
+
   // Value - Value the attribute to be set to by feature.
   //
   string Value = v;
-  
+
   // Desc - Feature description.  Used by command line (-mattr=) to display help
   // information.
   //
@@ -622,12 +691,12 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> {
   // appropriate target chip.
   //
   string Name = n;
-  
+
   // ProcItin - The scheduling information for the target processor.
   //
   ProcessorItineraries ProcItin = pi;
-  
-  // Features - list of 
+
+  // Features - list of
   list<SubtargetFeature> Features = f;
 }
 
diff --git a/include/llvm/Target/TargetAsmBackend.h b/include/llvm/Target/TargetAsmBackend.h
index 979595ad4f42..7527298efa9e 100644
--- a/include/llvm/Target/TargetAsmBackend.h
+++ b/include/llvm/Target/TargetAsmBackend.h
@@ -10,17 +10,18 @@
 #ifndef LLVM_TARGET_TARGETASMBACKEND_H
 #define LLVM_TARGET_TARGETASMBACKEND_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-class MCDataFragment;
 class MCFixup;
 class MCInst;
 class MCObjectWriter;
 class MCSection;
 template<typename T>
 class SmallVectorImpl;
-class Target;
 class raw_ostream;
 
 /// TargetAsmBackend - Generic interface to target specific assembler backends.
@@ -28,37 +29,17 @@ class TargetAsmBackend {
   TargetAsmBackend(const TargetAsmBackend &);   // DO NOT IMPLEMENT
   void operator=(const TargetAsmBackend &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
-  TargetAsmBackend(const Target &);
+  TargetAsmBackend();
 
-  /// TheTarget - The Target that this machine was created for.
-  const Target &TheTarget;
-
-  unsigned HasAbsolutizedSet : 1;
   unsigned HasReliableSymbolDifference : 1;
-  unsigned HasScatteredSymbols : 1;
 
 public:
   virtual ~TargetAsmBackend();
 
-  const Target &getTarget() const { return TheTarget; }
-
   /// createObjectWriter - Create a new MCObjectWriter instance for use by the
   /// assembler backend to emit the final object file.
   virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
 
-  /// hasAbsolutizedSet - Check whether this target "absolutizes"
-  /// assignments. That is, given code like:
-  ///   a:
-  ///   ...
-  ///   b:
-  ///   tmp = a - b
-  ///       .long tmp
-  /// will the value of 'tmp' be a relocatable expression, or the assembly time
-  /// value of L0 - L1. This distinction is only relevant for platforms that
-  /// support scattered symbols, since in the absence of scattered symbols (a -
-  /// b) cannot change after assembly.
-  bool hasAbsolutizedSet() const { return HasAbsolutizedSet; }
-
   /// hasReliableSymbolDifference - Check whether this target implements
   /// accurate relocations for differences between symbols. If not, differences
   /// between symbols will always be relocatable expressions and any references
@@ -68,21 +49,11 @@ public:
   /// This should always be true (since it results in fewer relocations with no
   /// loss of functionality), but is currently supported as a way to maintain
   /// exact object compatibility with Darwin 'as' (on non-x86_64). It should
-  /// eventually should be eliminated. See also \see hasAbsolutizedSet.
+  /// eventually should be eliminated.
   bool hasReliableSymbolDifference() const {
     return HasReliableSymbolDifference;
   }
 
-  /// hasScatteredSymbols - Check whether this target supports scattered
-  /// symbols. If so, the assembler should assume that atoms can be scattered by
-  /// the linker. In particular, this means that the offsets between symbols
-  /// which are in distinct atoms is not known at link time, and the assembler
-  /// must generate fixups and relocations appropriately.
-  ///
-  /// Note that the assembler currently does not reason about atoms, instead it
-  /// assumes all temporary symbols reside in the "current atom".
-  bool hasScatteredSymbols() const { return HasScatteredSymbols; }
-
   /// doesSectionRequireSymbols - Check whether the given section requires that
   /// all symbols (even temporaries) have symbol table entries.
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -97,16 +68,28 @@ public:
     return true;
   }
 
-  /// isVirtualSection - Check whether the given section is "virtual", that is
-  /// has no actual object file contents.
-  virtual bool isVirtualSection(const MCSection &Section) const = 0;
+  /// @name Target Fixup Interfaces
+  /// @{
+
+  /// getNumFixupKinds - Get the number of target specific fixup kinds.
+  virtual unsigned getNumFixupKinds() const = 0;
+
+  /// getFixupKindInfo - Get information on a fixup kind.
+  virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
+
+  /// @}
 
   /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
   /// data fragment, at the offset specified by the fixup and following the
   /// fixup kind as appropriate.
-  virtual void ApplyFixup(const MCFixup &Fixup, MCDataFragment &Fragment,
+  virtual void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                           uint64_t Value) const = 0;
 
+  /// @}
+
+  /// @name Target Relaxation Interfaces
+  /// @{
+
   /// MayNeedRelaxation - Check whether the given instruction may need
   /// relaxation.
   ///
@@ -121,12 +104,18 @@ public:
   /// \parm Res [output] - On return, the relaxed instruction.
   virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const = 0;
 
+  /// @}
+
   /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
   /// output. If the target cannot generate such a sequence, it should return an
   /// error.
   ///
   /// \return - True on success.
   virtual bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const = 0;
+
+  /// HandleAssemblerFlag - Handle any target-specific assembler flags.
+  /// By default, do nothing.
+  virtual void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetAsmInfo.h b/include/llvm/Target/TargetAsmInfo.h
new file mode 100644
index 000000000000..98aab142b8e4
--- /dev/null
+++ b/include/llvm/Target/TargetAsmInfo.h
@@ -0,0 +1,75 @@
+//===-- llvm/Target/TargetAsmInfo.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to provide the information necessary for producing assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETASMINFO_H
+#define LLVM_TARGET_TARGETASMINFO_H
+
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class MCSection;
+  class MCContext;
+  class TargetMachine;
+  class TargetLoweringObjectFile;
+
+class TargetAsmInfo {
+  unsigned PointerSize;
+  bool IsLittleEndian;
+  TargetFrameLowering::StackDirection StackDir;
+  const TargetRegisterInfo *TRI;
+  std::vector<MachineMove> InitialFrameState;
+  const TargetLoweringObjectFile *TLOF;
+
+public:
+  explicit TargetAsmInfo(const TargetMachine &TM);
+
+  /// getPointerSize - Get the pointer size in bytes.
+  unsigned getPointerSize() const {
+    return PointerSize;
+  }
+
+  /// islittleendian - True if the target is little endian.
+  bool isLittleEndian() const {
+    return IsLittleEndian;
+  }
+
+  TargetFrameLowering::StackDirection getStackGrowthDirection() const {
+    return StackDir;
+  }
+
+  const MCSection *getDwarfLineSection() const {
+    return TLOF->getDwarfLineSection();
+  }
+
+  const MCSection *getEHFrameSection() const {
+    return TLOF->getEHFrameSection();
+  }
+
+  unsigned getDwarfRARegNum(bool isEH) const {
+    return TRI->getDwarfRegNum(TRI->getRARegister(), isEH);
+  }
+
+  const std::vector<MachineMove> &getInitialFrameState() const {
+    return InitialFrameState;
+  }
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+    return TRI->getDwarfRegNum(RegNum, isEH);
+  }
+};
+
+}
+#endif
diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h
index 5830d1f99f5c..9ff50cb275be 100644
--- a/include/llvm/Target/TargetAsmParser.h
+++ b/include/llvm/Target/TargetAsmParser.h
@@ -13,7 +13,7 @@
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
 
 namespace llvm {
-class MCInst;
+class MCStreamer;
 class StringRef;
 class Target;
 class SMLoc;
@@ -42,6 +42,8 @@ public:
   unsigned getAvailableFeatures() const { return AvailableFeatures; }
   void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; }
 
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
+
   /// ParseInstruction - Parse one assembly instruction.
   ///
   /// The parser is positioned following the instruction name. The target
@@ -70,16 +72,16 @@ public:
   /// \param DirectiveID - the identifier token of the directive.
   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
   
-  /// MatchInstruction - Recognize a series of operands of a parsed instruction
-  /// as an actual MCInst.  This returns false and fills in Inst on success and
-  /// returns true on failure to match.
+  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
+  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
+  /// This returns false on success and returns true on failure to match.
   ///
   /// On failure, the target parser is responsible for emitting a diagnostic
   /// explaining the match failure.
   virtual bool 
-  MatchInstruction(SMLoc IDLoc,
-                   const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                   MCInst &Inst) = 0;
+  MatchAndEmitInstruction(SMLoc IDLoc,
+                          SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                          MCStreamer &Out) = 0;
   
 };
 
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index f368a2e38c42..275957e01532 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -106,14 +106,13 @@ namespace ISD {
   ///
   struct InputArg {
     ArgFlagsTy Flags;
-    EVT VT;
+    MVT VT;
     bool Used;
 
     InputArg() : VT(MVT::Other), Used(false) {}
     InputArg(ArgFlagsTy flags, EVT vt, bool used)
-      : Flags(flags), VT(vt), Used(used) {
-      assert(VT.isSimple() &&
-             "InputArg value type must be Simple!");
+      : Flags(flags), Used(used) {
+      VT = vt.getSimpleVT();
     }
   };
 
@@ -123,16 +122,15 @@ namespace ISD {
   ///
   struct OutputArg {
     ArgFlagsTy Flags;
-    EVT VT;
+    MVT VT;
 
     /// IsFixed - Is this a "fixed" value, ie not passed through a vararg "...".
     bool IsFixed;
 
     OutputArg() : IsFixed(false) {}
     OutputArg(ArgFlagsTy flags, EVT vt, bool isfixed)
-      : Flags(flags), VT(vt), IsFixed(isfixed) {
-      assert(VT.isSimple() &&
-             "OutputArg value type must be Simple!");
+      : Flags(flags), IsFixed(isfixed) {
+      VT = vt.getSimpleVT();
     }
   };
 }
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index b89cbe0133f8..25065d30bb6e 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -22,6 +22,7 @@
 
 #include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -143,7 +144,7 @@ public:
   std::string getStringRepresentation() const;
   
   /// isLegalInteger - This function returns true if the specified type is
-  /// known tobe a native integer type supported by the CPU.  For example,
+  /// known to be a native integer type supported by the CPU.  For example,
   /// i64 is not native on most 32-bit CPUs and i37 is not native on any known
   /// one.  This returns false if the integer width is not legal.
   ///
diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h
index 7cb693155c29..b97f3e2f4d0f 100644
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@@ -28,7 +28,6 @@ namespace llvm {
     // EMachine - This field is the target specific value to emit as the
     // e_machine member of the ELF header.
     unsigned short EMachine;
-    TargetMachine &TM;
     bool is64Bit, isLittleEndian;
   public:
 
@@ -62,7 +61,7 @@ namespace llvm {
       ELFDATA2MSB = 2  // Big-endian object file
     };
 
-    explicit TargetELFWriterInfo(TargetMachine &tm);
+    explicit TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
     virtual ~TargetELFWriterInfo();
 
     unsigned short getEMachine() const { return EMachine; }
diff --git a/include/llvm/Target/TargetFrameInfo.h b/include/llvm/Target/TargetFrameInfo.h
deleted file mode 100644
index 975d15659c15..000000000000
--- a/include/llvm/Target/TargetFrameInfo.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- llvm/Target/TargetFrameInfo.h ---------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface to describe the layout of a stack frame on the target machine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGETFRAMEINFO_H
-#define LLVM_TARGET_TARGETFRAMEINFO_H
-
-#include <utility>
-
-namespace llvm {
-
-/// Information about stack frame layout on the target.  It holds the direction
-/// of stack growth, the known stack alignment on entry to each function, and
-/// the offset to the locals area.
-///
-/// The offset to the local area is the offset from the stack pointer on
-/// function entry to the first location where function data (local variables,
-/// spill locations) can be stored.
-class TargetFrameInfo {
-public:
-  enum StackDirection {
-    StackGrowsUp,        // Adding to the stack increases the stack address
-    StackGrowsDown       // Adding to the stack decreases the stack address
-  };
-
-  // Maps a callee saved register to a stack slot with a fixed offset.
-  struct SpillSlot {
-    unsigned Reg;
-    int Offset; // Offset relative to stack pointer on function entry.
-  };
-private:
-  StackDirection StackDir;
-  unsigned StackAlignment;
-  unsigned TransientStackAlignment;
-  int LocalAreaOffset;
-public:
-  TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO,
-                  unsigned TransAl = 1)
-    : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
-      LocalAreaOffset(LAO) {}
-
-  virtual ~TargetFrameInfo();
-
-  // These methods return information that describes the abstract stack layout
-  // of the target machine.
-
-  /// getStackGrowthDirection - Return the direction the stack grows
-  ///
-  StackDirection getStackGrowthDirection() const { return StackDir; }
-
-  /// getStackAlignment - This method returns the number of bytes to which the
-  /// stack pointer must be aligned on entry to a function.  Typically, this
-  /// is the largest alignment for any data object in the target.
-  ///
-  unsigned getStackAlignment() const { return StackAlignment; }
-
-  /// getTransientStackAlignment - This method returns the number of bytes to
-  /// which the stack pointer must be aligned at all times, even between
-  /// calls.
-  ///
-  unsigned getTransientStackAlignment() const {
-    return TransientStackAlignment;
-  }
-
-  /// getOffsetOfLocalArea - This method returns the offset of the local area
-  /// from the stack pointer on entrance to a function.
-  ///
-  int getOffsetOfLocalArea() const { return LocalAreaOffset; }
-
-  /// getCalleeSavedSpillSlots - This method returns a pointer to an array of
-  /// pairs, that contains an entry for each callee saved register that must be
-  /// spilled to a particular stack location if it is spilled.
-  ///
-  /// Each entry in this array contains a <register,offset> pair, indicating the
-  /// fixed offset from the incoming stack pointer that each register should be
-  /// spilled at. If a register is not listed here, the code generator is
-  /// allowed to spill it anywhere it chooses.
-  ///
-  virtual const SpillSlot *
-  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
-    NumEntries = 0;
-    return 0;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
new file mode 100644
index 000000000000..e104b1663fdd
--- /dev/null
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -0,0 +1,196 @@
+//===-- llvm/Target/TargetFrameLowering.h ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETFRAMELOWERING_H
+#define LLVM_TARGET_TARGETFRAMELOWERING_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+#include <utility>
+#include <vector>
+
+namespace llvm {
+  class CalleeSavedInfo;
+  class MachineFunction;
+  class MachineBasicBlock;
+  class MachineMove;
+  class RegScavenger;
+
+/// Information about stack frame layout on the target.  It holds the direction
+/// of stack growth, the known stack alignment on entry to each function, and
+/// the offset to the locals area.
+///
+/// The offset to the local area is the offset from the stack pointer on
+/// function entry to the first location where function data (local variables,
+/// spill locations) can be stored.
+class TargetFrameLowering {
+public:
+  enum StackDirection {
+    StackGrowsUp,        // Adding to the stack increases the stack address
+    StackGrowsDown       // Adding to the stack decreases the stack address
+  };
+
+  // Maps a callee saved register to a stack slot with a fixed offset.
+  struct SpillSlot {
+    unsigned Reg;
+    int Offset; // Offset relative to stack pointer on function entry.
+  };
+private:
+  StackDirection StackDir;
+  unsigned StackAlignment;
+  unsigned TransientStackAlignment;
+  int LocalAreaOffset;
+public:
+  TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1)
+    : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+      LocalAreaOffset(LAO) {}
+
+  virtual ~TargetFrameLowering();
+
+  // These methods return information that describes the abstract stack layout
+  // of the target machine.
+
+  /// getStackGrowthDirection - Return the direction the stack grows
+  ///
+  StackDirection getStackGrowthDirection() const { return StackDir; }
+
+  /// getStackAlignment - This method returns the number of bytes to which the
+  /// stack pointer must be aligned on entry to a function.  Typically, this
+  /// is the largest alignment for any data object in the target.
+  ///
+  unsigned getStackAlignment() const { return StackAlignment; }
+
+  /// getTransientStackAlignment - This method returns the number of bytes to
+  /// which the stack pointer must be aligned at all times, even between
+  /// calls.
+  ///
+  unsigned getTransientStackAlignment() const {
+    return TransientStackAlignment;
+  }
+
+  /// getOffsetOfLocalArea - This method returns the offset of the local area
+  /// from the stack pointer on entrance to a function.
+  ///
+  int getOffsetOfLocalArea() const { return LocalAreaOffset; }
+
+  /// getCalleeSavedSpillSlots - This method returns a pointer to an array of
+  /// pairs, that contains an entry for each callee saved register that must be
+  /// spilled to a particular stack location if it is spilled.
+  ///
+  /// Each entry in this array contains a <register,offset> pair, indicating the
+  /// fixed offset from the incoming stack pointer that each register should be
+  /// spilled at. If a register is not listed here, the code generator is
+  /// allowed to spill it anywhere it chooses.
+  ///
+  virtual const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    NumEntries = 0;
+    return 0;
+  }
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  virtual bool targetHandlesStackFrameRounding() const {
+    return false;
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  virtual void emitPrologue(MachineFunction &MF) const = 0;
+  virtual void emitEpilogue(MachineFunction &MF,
+                            MachineBasicBlock &MBB) const = 0;
+
+  /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
+  /// saved registers and returns true if it isn't possible / profitable to do
+  /// so by issuing a series of store instructions via
+  /// storeRegToStackSlot(). Returns false otherwise.
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
+  /// saved registers and returns true if it isn't possible / profitable to do
+  /// so by issuing a series of load instructions via loadRegToStackSlot().
+  /// Returns false otherwise.
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// hasFP - Return true if the specified function should have a dedicated
+  /// frame pointer register. For most targets this is true only if the function
+  /// has variable sized allocas or if frame pointer elimination is disabled.
+  virtual bool hasFP(const MachineFunction &MF) const = 0;
+
+  /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+  /// not required, we reserve argument space for call sites in the function
+  /// immediately on entry to the current function. This eliminates the need for
+  /// add/sub sp brackets around call sites. Returns true if the call frame is
+  /// included as part of the stack frame.
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const {
+    return !hasFP(MF);
+  }
+
+  /// canSimplifyCallFramePseudos - When possible, it's best to simplify the
+  /// call frame pseudo ops before doing frame index elimination. This is
+  /// possible only when frame index references between the pseudos won't
+  /// need adjusting for the call frame adjustments. Normally, that's true
+  /// if the function has a reserved call frame or a frame pointer. Some
+  /// targets (Thumb2, for example) may have more complicated criteria,
+  /// however, and can override this behavior.
+  virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+    return hasReservedCallFrame(MF) || hasFP(MF);
+  }
+
+  /// getInitialFrameState - Returns a list of machine moves that are assumed
+  /// on entry to all functions.  Note that LabelID is ignored (assumed to be
+  /// the beginning of the function.)
+  virtual void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+  /// getFrameIndexOffset - Returns the displacement from the frame register to
+  /// the stack frame of the specified index.
+  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  /// getFrameIndexReference - This method should return the base register
+  /// and offset used to reference a frame index location. The offset is
+  /// returned directly, and the base register is returned via FrameReg.
+  virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
+                                     unsigned &FrameReg) const;
+
+  /// processFunctionBeforeCalleeSavedScan - This method is called immediately
+  /// before PrologEpilogInserter scans the physical registers used to determine
+  /// what callee saved registers should be spilled. This method is optional.
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                RegScavenger *RS = NULL) const {
+
+  }
+
+  /// processFunctionBeforeFrameFinalized - This method is called immediately
+  /// before the specified function's frame layout (MF.getFrameInfo()) is
+  /// finalized.  Once the frame is finalized, MO_FrameIndex operands are
+  /// replaced with direct constants.  This method is optional.
+  ///
+  virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index a127aed8f6df..8823d5a4d17e 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_TARGET_TARGETINSTRDESC_H
 #define LLVM_TARGET_TARGETINSTRDESC_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -103,13 +103,14 @@ namespace TID {
     Terminator,
     Branch,
     IndirectBranch,
-    Predicable,
-    NotDuplicable,
     Compare,
+    MoveImm,
     DelaySlot,
     FoldableAsLoad,
     MayLoad,
     MayStore,
+    Predicable,
+    NotDuplicable,
     UnmodeledSideEffects,
     Commutable,
     ConvertibleTo3Addr,
@@ -352,6 +353,12 @@ public:
     return Flags & (1 << TID::Compare);
   }
   
+  /// isMoveImmediate - Return true if this instruction is a move immediate
+  /// (including conditional moves) instruction. 
+  bool isMoveImmediate() const {
+    return Flags & (1 << TID::MoveImm);
+  }
+  
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 520c41be7428..fc7b51ec6c2c 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -19,16 +19,17 @@
 
 namespace llvm {
 
-class CalleeSavedInfo;
 class InstrItineraryData;
 class LiveVariables;
 class MCAsmInfo;
 class MachineMemOperand;
+class MachineRegisterInfo;
 class MDNode;
 class MCInst;
 class SDNode;
 class ScheduleHazardRecognizer;
 class SelectionDAG;
+class ScheduleDAG;
 class TargetRegisterClass;
 class TargetRegisterInfo;
 
@@ -134,7 +135,7 @@ public:
                                     int &FrameIndex) const {
     return 0;
   }
-  
+
   /// isStoreToStackSlot - If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
   /// the source reg along with the FrameIndex of the loaded stack slot.  If
@@ -227,9 +228,12 @@ public:
 
   /// produceSameValue - Return true if two machine instructions would produce
   /// identical values. By default, this is only true when the two instructions
-  /// are deemed identical except for defs.
+  /// are deemed identical except for defs. If this function is called when the
+  /// IR is still in SSA form, the caller can pass the MachineRegisterInfo for
+  /// aggressive checks.
   virtual bool produceSameValue(const MachineInstr *MI0,
-                                const MachineInstr *MI1) const = 0;
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI = 0) const = 0;
 
   /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -267,7 +271,7 @@ public:
   /// This is only invoked in cases where AnalyzeBranch returns success. It
   /// returns the number of instructions that were removed.
   virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!"); 
+    assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!");
     return 0;
   }
 
@@ -285,7 +289,7 @@ public:
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!"); 
+    assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!");
     return 0;
   }
 
@@ -303,31 +307,45 @@ public:
     return true;
   }
 
-  /// isProfitableToIfCvt - Return true if it's profitable to first "NumInstrs"
-  /// of the specified basic block.
+  /// isProfitableToIfCvt - Return true if it's profitable to predicate
+  /// instructions with accumulated instruction latency of "NumCycles"
+  /// of the specified basic block, where the probability of the instructions
+  /// being executed is given by Probability, and Confidence is a measure
+  /// of our confidence that it will be properly predicted.
   virtual
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const {
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                           unsigned ExtraPredCycles,
+                           float Probability, float Confidence) const {
     return false;
   }
-  
+
   /// isProfitableToIfCvt - Second variant of isProfitableToIfCvt, this one
   /// checks for the case where two basic blocks from true and false path
   /// of a if-then-else (diamond) are predicated on mutally exclusive
-  /// predicates.
+  /// predicates, where the probability of the true path being taken is given
+  /// by Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
   virtual bool
-  isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
-                      MachineBasicBlock &FMBB, unsigned NumFInstrs) const {
+  isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                      unsigned NumTCycles, unsigned ExtraTCycles,
+                      MachineBasicBlock &FMBB,
+                      unsigned NumFCycles, unsigned ExtraFCycles,
+                      float Probability, float Confidence) const {
     return false;
   }
 
   /// isProfitableToDupForIfCvt - Return true if it's profitable for
-  /// if-converter to duplicate a specific number of instructions in the
-  /// specified MBB to enable if-conversion.
+  /// if-converter to duplicate instructions of specified accumulated
+  /// instruction latencies in the specified MBB to enable if-conversion.
+  /// The probability of the instructions being executed is given by
+  /// Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
   virtual bool
-  isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs) const {
+  isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                            float Probability, float Confidence) const {
     return false;
   }
-  
+
   /// copyPhysReg - Emit instructions to copy a pair of physical registers.
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -360,29 +378,7 @@ public:
                                     const TargetRegisterInfo *TRI) const {
   assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
   }
-  
-  /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
-  /// saved registers and returns true if it isn't possible / profitable to do
-  /// so by issuing a series of store instructions via
-  /// storeRegToStackSlot(). Returns false otherwise.
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                         const TargetRegisterInfo *TRI) const {
-    return false;
-  }
 
-  /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
-  /// saved registers and returns true if it isn't possible / profitable to do
-  /// so by issuing a series of load instructions via loadRegToStackSlot().
-  /// Returns false otherwise.
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                        const TargetRegisterInfo *TRI) const {
-    return false;
-  }
-  
   /// emitFrameIndexDebugValue - Emit a target-dependent form of
   /// DBG_VALUE encoding the address of a frame index.  Addresses would
   /// normally be lowered the same way as other addresses on the target,
@@ -493,7 +489,7 @@ public:
                                        unsigned NumLoads) const {
     return false;
   }
-  
+
   /// ReverseBranchCondition - Reverses the branch condition of the specified
   /// condition list, returning false on success and true if it cannot be
   /// reversed.
@@ -501,19 +497,19 @@ public:
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
     return true;
   }
-  
+
   /// insertNoop - Insert a noop into the instruction stream at the specified
   /// point.
-  virtual void insertNoop(MachineBasicBlock &MBB, 
+  virtual void insertNoop(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI) const;
-  
-  
+
+
   /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
   virtual void getNoopForMachoTarget(MCInst &NopInst) const {
     // Default to just using 'nop' string.
   }
-  
-  
+
+
   /// isPredicated - Returns true if the instruction is already predicated.
   ///
   virtual bool isPredicated(const MachineInstr *MI) const {
@@ -571,26 +567,98 @@ public:
   virtual unsigned getInlineAsmLength(const char *Str,
                                       const MCAsmInfo &MAI) const;
 
-  /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer
-  /// to use for this target when scheduling the machine instructions after
+  /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer to
+  /// use for this target when scheduling the machine instructions before
   /// register allocation.
   virtual ScheduleHazardRecognizer*
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const = 0;
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const = 0;
+
+  /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard
+  /// recognizer to use for this target when scheduling the machine instructions
+  /// after register allocation.
+  virtual ScheduleHazardRecognizer*
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+                                     const ScheduleDAG *DAG) const = 0;
 
   /// AnalyzeCompare - For a comparison instruction, return the source register
   /// in SrcReg and the value it compares against in CmpValue. Return true if
   /// the comparison instruction can be analyzed.
   virtual bool AnalyzeCompare(const MachineInstr *MI,
-                              unsigned &SrcReg, int &CmpValue) const {
+                              unsigned &SrcReg, int &Mask, int &Value) const {
+    return false;
+  }
+
+  /// OptimizeCompareInstr - See if the comparison instruction can be converted
+  /// into something more efficient. E.g., on ARM most instructions can set the
+  /// flags register, obviating the need for a separate CMP.
+  virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr,
+                                    unsigned SrcReg, int Mask, int Value,
+                                    const MachineRegisterInfo *MRI) const {
     return false;
   }
 
-  /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
-  /// that we can remove a "comparison with zero".
-  virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
-                                    MachineInstr *CmpInstr) const {
+  /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+  /// instruction, try to fold the immediate into the use instruction.
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const {
     return false;
   }
+
+  /// getNumMicroOps - Return the number of u-operations the given machine
+  /// instruction will be decoded to on the target cpu.
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
+
+  /// isZeroCost - Return true for pseudo instructions that don't consume any
+  /// machine resources in their current form. These are common cases that the
+  /// scheduler should consider free, rather than conservatively handling them
+  /// as instructions with no itinerary.
+  bool isZeroCost(unsigned Opcode) const {
+    return Opcode <= TargetOpcode::COPY;
+  }
+
+  /// getOperandLatency - Compute and return the use operand latency of a given
+  /// pair of def and use.
+  /// In most cases, the static scheduling itinerary was enough to determine the
+  /// operand latency. But it may not be possible for instructions with variable
+  /// number of defs / uses.
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *DefMI, unsigned DefIdx,
+                              const MachineInstr *UseMI, unsigned UseIdx) const;
+
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                SDNode *DefNode, unsigned DefIdx,
+                                SDNode *UseNode, unsigned UseIdx) const;
+
+  /// getInstrLatency - Compute the instruction latency of a given instruction.
+  /// If the instruction has higher cost when predicated, it's returned via
+  /// PredCost.
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *MI,
+                              unsigned *PredCost = 0) const;
+
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const;
+
+  /// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
+  /// and an use in the current loop, return true if the target considered
+  /// it 'high'. This is used by optimization passes such as machine LICM to
+  /// determine whether it makes sense to hoist an instruction out even in
+  /// high register pressure situation.
+  virtual
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+    return false;
+  }
+
+  /// hasLowDefLatency - Compute operand latency of a def of 'Reg', return true
+  /// if the target considered it 'low'.
+  virtual
+  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
 };
 
 /// TargetInstrInfoImpl - This is the default implementation of
@@ -620,13 +688,20 @@ public:
   virtual MachineInstr *duplicate(MachineInstr *Orig,
                                   MachineFunction &MF) const;
   virtual bool produceSameValue(const MachineInstr *MI0,
-                                const MachineInstr *MI1) const;
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI) const;
   virtual bool isSchedulingBoundary(const MachineInstr *MI,
                                     const MachineBasicBlock *MBB,
                                     const MachineFunction &MF) const;
 
+  bool usePreRAHazardRecognizer() const;
+
+  virtual ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const;
+
   virtual ScheduleHazardRecognizer *
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const;
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+                                     const ScheduleDAG*) const;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h
index 39648c233fa8..a95b70f6b997 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -95,6 +95,7 @@ struct InstrStage {
 /// operands are read and written.
 ///
 struct InstrItinerary {
+  unsigned NumMicroOps;        ///< # of micro-ops, 0 means it's variable
   unsigned FirstStage;         ///< Index of first stage in itinerary
   unsigned LastStage;          ///< Index of last + 1 stage in itinerary
   unsigned FirstOperandCycle;  ///< Index of first operand rd/wr
@@ -110,38 +111,42 @@ class InstrItineraryData {
 public:
   const InstrStage     *Stages;         ///< Array of stages selected
   const unsigned       *OperandCycles;  ///< Array of operand cycles selected
-  const InstrItinerary *Itineratries;   ///< Array of itineraries selected
+  const unsigned       *Forwardings;    ///< Array of pipeline forwarding pathes
+  const InstrItinerary *Itineraries;    ///< Array of itineraries selected
+  unsigned              IssueWidth;     ///< Max issue per cycle. 0=Unknown.
 
   /// Ctors.
   ///
-  InstrItineraryData() : Stages(0), OperandCycles(0), Itineratries(0) {}
+  InstrItineraryData() : Stages(0), OperandCycles(0), Forwardings(0),
+                         Itineraries(0), IssueWidth(0) {}
+
   InstrItineraryData(const InstrStage *S, const unsigned *OS,
-                     const InstrItinerary *I)
-    : Stages(S), OperandCycles(OS), Itineratries(I) {}
-  
+                     const unsigned *F, const InstrItinerary *I)
+    : Stages(S), OperandCycles(OS), Forwardings(F), Itineraries(I) {}
+
   /// isEmpty - Returns true if there are no itineraries.
   ///
-  bool isEmpty() const { return Itineratries == 0; }
+  bool isEmpty() const { return Itineraries == 0; }
 
   /// isEndMarker - Returns true if the index is for the end marker
   /// itinerary.
   ///
   bool isEndMarker(unsigned ItinClassIndx) const {
-    return ((Itineratries[ItinClassIndx].FirstStage == ~0U) &&
-            (Itineratries[ItinClassIndx].LastStage == ~0U));
+    return ((Itineraries[ItinClassIndx].FirstStage == ~0U) &&
+            (Itineraries[ItinClassIndx].LastStage == ~0U));
   }
 
   /// beginStage - Return the first stage of the itinerary.
-  /// 
+  ///
   const InstrStage *beginStage(unsigned ItinClassIndx) const {
-    unsigned StageIdx = Itineratries[ItinClassIndx].FirstStage;
+    unsigned StageIdx = Itineraries[ItinClassIndx].FirstStage;
     return Stages + StageIdx;
   }
 
   /// endStage - Return the last+1 stage of the itinerary.
-  /// 
+  ///
   const InstrStage *endStage(unsigned ItinClassIndx) const {
-    unsigned StageIdx = Itineratries[ItinClassIndx].LastStage;
+    unsigned StageIdx = Itineraries[ItinClassIndx].LastStage;
     return Stages + StageIdx;
   }
 
@@ -173,13 +178,68 @@ public:
     if (isEmpty())
       return -1;
 
-    unsigned FirstIdx = Itineratries[ItinClassIndx].FirstOperandCycle;
-    unsigned LastIdx = Itineratries[ItinClassIndx].LastOperandCycle;
+    unsigned FirstIdx = Itineraries[ItinClassIndx].FirstOperandCycle;
+    unsigned LastIdx = Itineraries[ItinClassIndx].LastOperandCycle;
     if ((FirstIdx + OperandIdx) >= LastIdx)
       return -1;
 
     return (int)OperandCycles[FirstIdx + OperandIdx];
   }
+
+  /// hasPipelineForwarding - Return true if there is a pipeline forwarding
+  /// between instructions of itinerary classes DefClass and UseClasses so that
+  /// value produced by an instruction of itinerary class DefClass, operand
+  /// index DefIdx can be bypassed when it's read by an instruction of
+  /// itinerary class UseClass, operand index UseIdx.
+  bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx,
+                             unsigned UseClass, unsigned UseIdx) const {
+    unsigned FirstDefIdx = Itineraries[DefClass].FirstOperandCycle;
+    unsigned LastDefIdx = Itineraries[DefClass].LastOperandCycle;
+    if ((FirstDefIdx + DefIdx) >= LastDefIdx)
+      return false;
+    if (Forwardings[FirstDefIdx + DefIdx] == 0)
+      return false;
+
+    unsigned FirstUseIdx = Itineraries[UseClass].FirstOperandCycle;
+    unsigned LastUseIdx = Itineraries[UseClass].LastOperandCycle;
+    if ((FirstUseIdx + UseIdx) >= LastUseIdx)
+      return false;
+
+    return Forwardings[FirstDefIdx + DefIdx] ==
+      Forwardings[FirstUseIdx + UseIdx];
+  }
+
+  /// getOperandLatency - Compute and return the use operand latency of a given
+  /// itinerary class and operand index if the value is produced by an
+  /// instruction of the specified itinerary class and def operand index.
+  int getOperandLatency(unsigned DefClass, unsigned DefIdx,
+                        unsigned UseClass, unsigned UseIdx) const {
+    if (isEmpty())
+      return -1;
+
+    int DefCycle = getOperandCycle(DefClass, DefIdx);
+    if (DefCycle == -1)
+      return -1;
+
+    int UseCycle = getOperandCycle(UseClass, UseIdx);
+    if (UseCycle == -1)
+      return -1;
+
+    UseCycle = DefCycle - UseCycle + 1;
+    if (UseCycle > 0 &&
+        hasPipelineForwarding(DefClass, DefIdx, UseClass, UseIdx))
+      // FIXME: This assumes one cycle benefit for every pipeline forwarding.
+      --UseCycle;
+    return UseCycle;
+  }
+
+  /// isMicroCoded - Return true if the instructions in the given class decode
+  /// to more than one micro-ops.
+  bool isMicroCoded(unsigned ItinClassIndx) const {
+    if (isEmpty())
+      return false;
+    return Itineraries[ItinClassIndx].NumMicroOps != 1;
+  }
 };
 
 
diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h
index 7208a8dc4464..b198eb62f0c6 100644
--- a/include/llvm/Target/TargetJITInfo.h
+++ b/include/llvm/Target/TargetJITInfo.h
@@ -19,7 +19,7 @@
 
 #include <cassert>
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class Function;
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
new file mode 100644
index 000000000000..bdd214b6b743
--- /dev/null
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -0,0 +1,66 @@
+//===-- llvm/Target/TargetLibraryInfo.h - Library information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETLIBRARYINFO_H
+#define LLVM_TARGET_TARGETLIBRARYINFO_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+  class Triple;
+
+  namespace LibFunc {
+    enum Func {
+      /// void *memset(void *b, int c, size_t len);
+      memset,
+      
+      // void *memcpy(void *s1, const void *s2, size_t n);
+      memcpy,
+      
+      /// void memset_pattern16(void *b, const void *pattern16, size_t len);
+      memset_pattern16,
+      
+      NumLibFuncs
+    };
+  }
+
+/// TargetLibraryInfo - This immutable pass captures information about what
+/// library functions are available for the current target, and allows a
+/// frontend to disable optimizations through -fno-builtin etc.
+class TargetLibraryInfo : public ImmutablePass {
+  unsigned char AvailableArray[(LibFunc::NumLibFuncs+7)/8];
+public:
+  static char ID;
+  TargetLibraryInfo();
+  TargetLibraryInfo(const Triple &T);
+  
+  /// has - This function is used by optimizations that want to match on or form
+  /// a given library function.
+  bool has(LibFunc::Func F) const {
+    return (AvailableArray[F/8] & (1 << (F&7))) != 0;
+  }
+
+  /// setUnavailable - this can be used by whatever sets up TargetLibraryInfo to
+  /// ban use of specific library functions.
+  void setUnavailable(LibFunc::Func F) {
+    AvailableArray[F/8] &= ~(1 << (F&7));
+  }
+
+  void setAvailable(LibFunc::Func F) {
+    AvailableArray[F/8] |= 1 << (F&7);
+  }
+  
+  /// disableAllFunctions - This disables all builtins, which is used for
+  /// options like -fno-builtin.
+  void disableAllFunctions();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 29de994a21c9..5141b7b56229 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -25,13 +25,9 @@
 #include "llvm/CallingConv.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Attributes.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Target/TargetCallingConv.h"
 #include "llvm/Target/TargetMachine.h"
@@ -41,10 +37,12 @@
 
 namespace llvm {
   class AllocaInst;
+  class APFloat;
   class CallInst;
   class Function;
   class FastISel;
   class FunctionLoweringInfo;
+  class ImmutableCallSite;
   class MachineBasicBlock;
   class MachineFunction;
   class MachineFrameInfo;
@@ -55,6 +53,7 @@ namespace llvm {
   class SDNode;
   class SDValue;
   class SelectionDAG;
+  template<typename T> class SmallVectorImpl;
   class TargetData;
   class TargetMachine;
   class TargetRegisterClass;
@@ -126,6 +125,10 @@ public:
   /// srl/add/sra.
   bool isPow2DivCheap() const { return Pow2DivIsCheap; }
 
+  /// isJumpExpensive() - Return true if Flow Control is an expensive operation
+  /// that should be avoided.
+  bool isJumpExpensive() const { return JumpIsExpensive; }
+
   /// getSetCCResultType - Return the ValueType of the result of SETCC
   /// operations.  Also used to obtain the target's preferred type for
   /// the condition operand of SELECT and BRCOND nodes.  In the case of
@@ -203,13 +206,6 @@ public:
     return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != 0;
   }
 
-  /// isTypeSynthesizable - Return true if it's OK for the compiler to create
-  /// new operations of this type.  All Legal types are synthesizable except
-  /// MMX vector types on X86.  Non-Legal types are not synthesizable.
-  bool isTypeSynthesizable(EVT VT) const {
-    return isTypeLegal(VT) && Synthesizable[VT.getSimpleVT().SimpleTy];
-  }
-
   class ValueTypeActionImpl {
     /// ValueTypeActions - For each value type, keep a LegalizeAction enum
     /// that indicates how instruction selection should deal with the type.
@@ -441,7 +437,7 @@ public:
   /// for it.
   LegalizeAction getLoadExtAction(unsigned ExtType, EVT VT) const {
     assert(ExtType < ISD::LAST_LOADEXT_TYPE &&
-           (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     return (LegalizeAction)LoadExtActions[VT.getSimpleVT().SimpleTy][ExtType];
   }
@@ -459,8 +455,8 @@ public:
   /// to be expanded to some other code sequence, or the target has a custom
   /// expander for it.
   LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
-    assert((unsigned)ValVT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
-           (unsigned)MemVT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+    assert(ValVT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           MemVT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     return (LegalizeAction)TruncStoreActions[ValVT.getSimpleVT().SimpleTy]
                                             [MemVT.getSimpleVT().SimpleTy];
@@ -480,8 +476,8 @@ public:
   /// for it.
   LegalizeAction
   getIndexedLoadAction(unsigned IdxMode, EVT VT) const {
-    assert( IdxMode < ISD::LAST_INDEXED_MODE &&
-           ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE &&
+    assert(IdxMode < ISD::LAST_INDEXED_MODE &&
+           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
     return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
@@ -501,8 +497,8 @@ public:
   /// for it.
   LegalizeAction
   getIndexedStoreAction(unsigned IdxMode, EVT VT) const {
-    assert( IdxMode < ISD::LAST_INDEXED_MODE &&
-           ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE &&
+    assert(IdxMode < ISD::LAST_INDEXED_MODE &&
+           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
     return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
@@ -646,21 +642,30 @@ public:
 
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memset. The value is set by the target at the
-  /// performance threshold for such a replacement.
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
   /// @brief Get maximum # of store operations permitted for llvm.memset
-  unsigned getMaxStoresPerMemset() const { return maxStoresPerMemset; }
+  unsigned getMaxStoresPerMemset(bool OptSize) const {
+    return OptSize ? maxStoresPerMemsetOptSize : maxStoresPerMemset;
+  }
 
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memcpy. The value is set by the target at the
-  /// performance threshold for such a replacement.
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
   /// @brief Get maximum # of store operations permitted for llvm.memcpy
-  unsigned getMaxStoresPerMemcpy() const { return maxStoresPerMemcpy; }
+  unsigned getMaxStoresPerMemcpy(bool OptSize) const {
+    return OptSize ? maxStoresPerMemcpyOptSize : maxStoresPerMemcpy;
+  }
 
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memmove. The value is set by the target at the
-  /// performance threshold for such a replacement.
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
   /// @brief Get maximum # of store operations permitted for llvm.memmove
-  unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; }
+  unsigned getMaxStoresPerMemmove(bool OptSize) const {
+    return OptSize ? maxStoresPerMemmoveOptSize : maxStoresPerMemmove;
+  }
 
   /// This function returns true if the target allows unaligned memory accesses.
   /// of the specified type. This is used, for example, in situations where an
@@ -958,6 +963,13 @@ public:
     return isTypeLegal(VT);
   }
 
+  /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
+  /// to transform a floating point op of specified opcode to a equivalent op of
+  /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
+  virtual bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const {
+    return false;
+  }
+
   /// IsDesirableToPromoteOp - This method query the target whether it is
   /// beneficial for dag combiner to promote the specified node. If true, it
   /// should return the desired promotion type by reference.
@@ -1021,7 +1033,16 @@ protected:
 
   /// SelectIsExpensive - Tells the code generator not to expand operations
   /// into sequences that use the select operations if possible.
-  void setSelectIsExpensive() { SelectIsExpensive = true; }
+  void setSelectIsExpensive(bool isExpensive = true) { 
+    SelectIsExpensive = isExpensive; 
+  }
+
+  /// JumpIsExpensive - Tells the code generator not to expand sequence of 
+  /// operations into a seperate sequences that increases the amount of 
+  /// flow control.
+  void setJumpIsExpensive(bool isExpensive = true) {
+    JumpIsExpensive = isExpensive;
+  }
 
   /// setIntDivIsCheap - Tells the code generator that integer divide is
   /// expensive, and if possible, should be replaced by an alternate sequence
@@ -1036,12 +1057,10 @@ protected:
   /// addRegisterClass - Add the specified register class as an available
   /// regclass for the specified value type.  This indicates the selector can
   /// handle values of that class natively.
-  void addRegisterClass(EVT VT, TargetRegisterClass *RC,
-                        bool isSynthesizable = true) {
+  void addRegisterClass(EVT VT, TargetRegisterClass *RC) {
     assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
     AvailableRegClasses.push_back(std::make_pair(VT, RC));
     RegClassForVT[VT.getSimpleVT().SimpleTy] = RC;
-    Synthesizable[VT.getSimpleVT().SimpleTy] = isSynthesizable;
   }
 
   /// findRepresentativeClass - Return the largest legal super-reg register class
@@ -1065,8 +1084,7 @@ protected:
   /// not work with the specified type and indicate what to do about it.
   void setLoadExtAction(unsigned ExtType, MVT VT,
                         LegalizeAction Action) {
-    assert(ExtType < ISD::LAST_LOADEXT_TYPE &&
-           (unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE && VT < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     LoadExtActions[VT.SimpleTy][ExtType] = (uint8_t)Action;
   }
@@ -1075,8 +1093,7 @@ protected:
   /// not work with the specified type and indicate what to do about it.
   void setTruncStoreAction(MVT ValVT, MVT MemVT,
                            LegalizeAction Action) {
-    assert((unsigned)ValVT.SimpleTy < MVT::LAST_VALUETYPE &&
-           (unsigned)MemVT.SimpleTy < MVT::LAST_VALUETYPE &&
+    assert(ValVT < MVT::LAST_VALUETYPE && MemVT < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
     TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action;
   }
@@ -1087,10 +1104,8 @@ protected:
   /// TargetLowering.cpp
   void setIndexedLoadAction(unsigned IdxMode, MVT VT,
                             LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
-           IdxMode < ISD::LAST_INDEXED_MODE &&
-           (unsigned)Action < 0xf &&
-           "Table isn't big enough!");
+    assert(VT < MVT::LAST_VALUETYPE && IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf && "Table isn't big enough!");
     // Load action are kept in the upper half.
     IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
     IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
@@ -1102,10 +1117,8 @@ protected:
   /// TargetLowering.cpp
   void setIndexedStoreAction(unsigned IdxMode, MVT VT,
                              LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
-           IdxMode < ISD::LAST_INDEXED_MODE &&
-           (unsigned)Action < 0xf &&
-           "Table isn't big enough!");
+    assert(VT < MVT::LAST_VALUETYPE && IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf && "Table isn't big enough!");
     // Store action are kept in the lower half.
     IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
     IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
@@ -1115,7 +1128,7 @@ protected:
   /// supported on the target and indicate what to do about it.
   void setCondCodeAction(ISD::CondCode CC, MVT VT,
                          LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
+    assert(VT < MVT::LAST_VALUETYPE &&
            (unsigned)CC < array_lengthof(CondCodeActions) &&
            "Table isn't big enough!");
     CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL)  << VT.SimpleTy*2);
@@ -1261,6 +1274,13 @@ public:
     return SDValue();    // this is here to silence compiler errors
   }
 
+  /// isUsedByReturnOnly - Return true if result of the specified node is used
+  /// by a return node only. This is used to determine whether it is possible
+  /// to codegen a libcall as tail call at legalization time.
+  virtual bool isUsedByReturnOnly(SDNode *N) const {
+    return false;
+  }
+
   /// LowerOperationWrapper - This callback is invoked by the type legalizer
   /// to legalize nodes with an illegal operand type but legal result types.
   /// It replaces the LowerOperation callback in the type Legalizer.
@@ -1328,6 +1348,22 @@ public:
     C_Unknown              // Unsupported constraint.
   };
 
+  enum ConstraintWeight {
+    // Generic weights.
+    CW_Invalid  = -1,     // No match.
+    CW_Okay     = 0,      // Acceptable.
+    CW_Good     = 1,      // Good weight.
+    CW_Better   = 2,      // Better weight.
+    CW_Best     = 3,      // Best weight.
+    
+    // Well-known weights.
+    CW_SpecificReg  = CW_Okay,    // Specific register operands.
+    CW_Register     = CW_Good,    // Register operands.
+    CW_Memory       = CW_Better,  // Memory operands.
+    CW_Constant     = CW_Best,    // Constant operand.
+    CW_Default      = CW_Okay     // Default or don't know type.
+  };
+
   /// AsmOperandInfo - This contains information for each constraint that we are
   /// lowering.
   struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
@@ -1356,12 +1392,41 @@ public:
     /// returns the output operand it matches.
     unsigned getMatchedOperand() const;
 
+    /// Copy constructor for copying from an AsmOperandInfo.
+    AsmOperandInfo(const AsmOperandInfo &info)
+      : InlineAsm::ConstraintInfo(info),
+        ConstraintCode(info.ConstraintCode),
+        ConstraintType(info.ConstraintType),
+        CallOperandVal(info.CallOperandVal),
+        ConstraintVT(info.ConstraintVT) {
+    }
+
+    /// Copy constructor for copying from a ConstraintInfo.
     AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
       : InlineAsm::ConstraintInfo(info),
         ConstraintType(TargetLowering::C_Unknown),
         CallOperandVal(0), ConstraintVT(MVT::Other) {
     }
   };
+  
+  typedef std::vector<AsmOperandInfo> AsmOperandInfoVector;
+  
+  /// ParseConstraints - Split up the constraint string from the inline
+  /// assembly value into the specific constraints and their prefixes,
+  /// and also tie in the associated operand values.
+  /// If this returns an empty vector, and if the constraint string itself
+  /// isn't empty, there was an error parsing.
+  virtual AsmOperandInfoVector ParseConstraints(ImmutableCallSite CS) const;
+  
+  /// Examine constraint type and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getMultipleConstraintMatchWeight(
+      AsmOperandInfo &info, int maIndex) const;
+  
+  /// Examine constraint string and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
 
   /// ComputeConstraintToUse - Determines the constraint code and constraint
   /// type to use for the specific AsmOperandInfo, setting
@@ -1568,6 +1633,11 @@ private:
   /// it.
   bool Pow2DivIsCheap;
 
+  /// JumpIsExpensive - Tells the code generator that it shouldn't generate
+  /// extra flow control instructions and should attempt to combine flow
+  /// control instructions via predication.
+  bool JumpIsExpensive;
+
   /// UseUnderscoreSetJmp - This target prefers to use _setjmp to implement
   /// llvm.setjmp.  Defaults to false.
   bool UseUnderscoreSetJmp;
@@ -1643,11 +1713,6 @@ private:
   /// approximate register pressure.
   uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
 
-  /// Synthesizable indicates whether it is OK for the compiler to create new
-  /// operations using this type.  All Legal types are Synthesizable except
-  /// MMX types on X86.  Non-Legal types are not Synthesizable.
-  bool Synthesizable[MVT::LAST_VALUETYPE];
-
   /// TransformToType - For any value types we are promoting or expanding, this
   /// contains the value type that we are changing to.  For Expanded types, this
   /// contains one step of the expand (e.g. i64 -> i32), even if there are
@@ -1727,6 +1792,10 @@ protected:
   /// @brief Specify maximum number of store instructions per memset call.
   unsigned maxStoresPerMemset;
 
+  /// Maximum number of stores operations that may be substituted for the call
+  /// to memset, used for functions with OptSize attribute.
+  unsigned maxStoresPerMemsetOptSize;
+
   /// When lowering \@llvm.memcpy this field specifies the maximum number of
   /// store operations that may be substituted for a call to memcpy. Targets
   /// must set this value based on the cost threshold for that target. Targets
@@ -1739,6 +1808,10 @@ protected:
   /// @brief Specify maximum bytes of store instructions per memcpy call.
   unsigned maxStoresPerMemcpy;
 
+  /// Maximum number of store operations that may be substituted for a call
+  /// to memcpy, used for functions with OptSize attribute.
+  unsigned maxStoresPerMemcpyOptSize;
+
   /// When lowering \@llvm.memmove this field specifies the maximum number of
   /// store instructions that may be substituted for a call to memmove. Targets
   /// must set this value based on the cost threshold for that target. Targets
@@ -1750,6 +1823,10 @@ protected:
   /// @brief Specify maximum bytes of store instructions per memmove call.
   unsigned maxStoresPerMemmove;
 
+  /// Maximum number of store instructions that may be substituted for a call
+  /// to memmove, used for functions with OpSize attribute.
+  unsigned maxStoresPerMemmoveOptSize;
+
   /// This field specifies whether the target can benefit from code placement
   /// optimization.
   bool benefitFromCodePlacementOpt;
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 819709fa20c2..34bf27132de5 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -69,10 +69,6 @@ protected:
   /// the section the Language Specific Data Area information is emitted to.
   const MCSection *LSDASection;
   
-  /// EHFrameSection - If exception handling is supported by the target, this is
-  /// the section the EH Frame is emitted to.
-  const MCSection *EHFrameSection;
-  
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
   const MCSection *DwarfAbbrevSection;
@@ -92,6 +88,11 @@ protected:
   // information for a TLS variable, it'll go here.
   const MCSection *TLSExtraDataSection;
   
+  /// CommDirectiveSupportsAlignment - True if .comm supports alignment.  This
+  /// is a hack for as long as we support 10.4 Tiger, whose assembler doesn't
+  /// support alignment on comm.
+  bool CommDirectiveSupportsAlignment;
+  
   /// SupportsWeakEmptyEHFrame - True if target object file supports a
   /// weak_definition of constant 0 for an omitted EH frame.
   bool SupportsWeakOmittedEHFrame;
@@ -128,13 +129,17 @@ public:
     return SupportsWeakOmittedEHFrame;
   }
   
+  bool getCommDirectiveSupportsAlignment() const {
+    return CommDirectiveSupportsAlignment;
+  }
+
   const MCSection *getTextSection() const { return TextSection; }
   const MCSection *getDataSection() const { return DataSection; }
   const MCSection *getBSSSection() const { return BSSSection; }
   const MCSection *getStaticCtorSection() const { return StaticCtorSection; }
   const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
   const MCSection *getLSDASection() const { return LSDASection; }
-  const MCSection *getEHFrameSection() const { return EHFrameSection; }
+  virtual const MCSection *getEHFrameSection() const = 0;
   const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
   const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 42e99e015644..030bf5b89f77 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -29,7 +29,7 @@ class TargetIntrinsicInfo;
 class TargetJITInfo;
 class TargetLowering;
 class TargetSelectionDAGInfo;
-class TargetFrameInfo;
+class TargetFrameLowering;
 class JITCodeEmitter;
 class MCContext;
 class TargetRegisterInfo;
@@ -98,12 +98,14 @@ protected: // Can only create subclasses.
 
   /// TheTarget - The Target that this machine was created for.
   const Target &TheTarget;
-  
+
   /// AsmInfo - Contains target specific asm information.
   ///
   const MCAsmInfo *AsmInfo;
 
   unsigned MCRelaxAll : 1;
+  unsigned MCNoExecStack : 1;
+  unsigned MCUseLoc : 1;
 
 public:
   virtual ~TargetMachine();
@@ -116,16 +118,16 @@ public:
   // -- Stack frame information
   // -- Selection DAG lowering information
   //
-  virtual const TargetInstrInfo        *getInstrInfo() const { return 0; }
-  virtual const TargetFrameInfo        *getFrameInfo() const { return 0; }
+  virtual const TargetInstrInfo         *getInstrInfo() const { return 0; }
+  virtual const TargetFrameLowering *getFrameLowering() const { return 0; }
   virtual const TargetLowering    *getTargetLowering() const { return 0; }
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; }
-  virtual const TargetData            *getTargetData() const { return 0; }
-  
+  virtual const TargetData             *getTargetData() const { return 0; }
+
   /// getMCAsmInfo - Return target specific asm information.
   ///
   const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
-  
+
   /// getSubtarget - This method returns a pointer to the specified type of
   /// TargetSubtarget.  In debug builds, it verifies that the object being
   /// returned is of the correct type.
@@ -138,7 +140,7 @@ public:
   /// details of graph coloring register allocation removed from it.
   ///
   virtual const TargetRegisterInfo *getRegisterInfo() const { return 0; }
-  
+
   /// getIntrinsicInfo - If intrinsic information is available, return it.  If
   /// not, return null.
   ///
@@ -148,17 +150,17 @@ public:
   /// otherwise return null.
   ///
   virtual TargetJITInfo *getJITInfo() { return 0; }
-  
+
   /// getInstrItineraryData - Returns instruction itinerary data for the target
   /// or specific subtarget.
   ///
-  virtual const InstrItineraryData getInstrItineraryData() const {  
-    return InstrItineraryData();
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return 0;
   }
 
   /// getELFWriterInfo - If this target supports an ELF writer, return
   /// information for it, otherwise return null.
-  /// 
+  ///
   virtual const TargetELFWriterInfo *getELFWriterInfo() const { return 0; }
 
   /// hasMCRelaxAll - Check whether all machine code instructions should be
@@ -169,6 +171,18 @@ public:
   /// relaxed.
   void setMCRelaxAll(bool Value) { MCRelaxAll = Value; }
 
+  /// hasMCNoExecStack - Check whether an executable stack is not needed.
+  bool hasMCNoExecStack() const { return MCNoExecStack; }
+
+  /// setMCNoExecStack - Set whether an executabel stack is not needed.
+  void setMCNoExecStack(bool Value) { MCNoExecStack = Value; }
+
+  /// hasMCUseLoc - Check whether we should use dwarf's .loc directive.
+  bool hasMCUseLoc() const { return MCUseLoc; }
+
+  /// setMCUseLoc - Set whether all we should use dwarf's .loc directive.
+  void setMCUseLoc(bool Value) { MCUseLoc = Value; }
+
   /// getRelocationModel - Returns the code generation relocation model. The
   /// choices are static, PIC, and dynamic-no-pic, and target default.
   static Reloc::Model getRelocationModel();
@@ -267,7 +281,7 @@ class LLVMTargetMachine : public TargetMachine {
 
 protected: // Can only create subclasses.
   LLVMTargetMachine(const Target &T, const std::string &TargetTriple);
-  
+
 private:
   /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for
   /// both emitting to assembly files or machine code output.
@@ -277,9 +291,11 @@ private:
 
   virtual void setCodeModelForJIT();
   virtual void setCodeModelForStatic();
-  
+
 public:
-  
+
+  const std::string &getTargetTriple() const { return TargetTriple; }
+
   /// addPassesToEmitFile - Add passes to the specified pass manager to get the
   /// specified file emitted.  Typically this will involve several steps of code
   /// generation.  If OptLevel is None, the code generator should emit code as
@@ -289,7 +305,7 @@ public:
                                    CodeGenFileType FileType,
                                    CodeGenOpt::Level,
                                    bool DisableVerify = true);
-  
+
   /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
   /// get machine code emitted.  This uses a JITCodeEmitter object to handle
   /// actually outputting the machine code and resolving things like the address
@@ -310,7 +326,7 @@ public:
                                  MCContext *&Ctx,
                                  CodeGenOpt::Level OptLevel,
                                  bool DisableVerify = true);
-  
+
   /// Target-Independent Code Generator Pass Configuration Options.
 
   /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
@@ -347,15 +363,15 @@ public:
   virtual bool addPreSched2(PassManagerBase &, CodeGenOpt::Level) {
     return false;
   }
-  
+
   /// addPreEmitPass - This pass may be implemented by targets that want to run
   /// passes immediately before machine code is emitted.  This should return
   /// true if -print-machineinstrs should print out the code after the passes.
   virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level) {
     return false;
   }
-  
-  
+
+
   /// addCodeEmitter - This pass should be overridden by the target to add a
   /// code emitter, if supported.  If this is not supported, 'true' should be
   /// returned.
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 81dec3e5b78d..121091c9b49b 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -29,21 +29,21 @@ class MachineFunction;
 class MachineMove;
 class RegScavenger;
 template<class T> class SmallVectorImpl;
+class raw_ostream;
 
 /// TargetRegisterDesc - This record contains all of the information known about
-/// a particular register.  The AliasSet field (if not null) contains a pointer
-/// to a Zero terminated array of registers that this register aliases.  This is
-/// needed for architectures like X86 which have AL alias AX alias EAX.
-/// Registers that this does not apply to simply should set this to null.
-/// The SubRegs field is a zero terminated array of registers that are
-/// sub-registers of the specific register, e.g. AL, AH are sub-registers of AX.
-/// The SuperRegs field is a zero terminated array of registers that are
+/// a particular register.  The Overlaps field contains a pointer to a zero
+/// terminated array of registers that this register aliases, starting with
+/// itself. This is needed for architectures like X86 which have AL alias AX
+/// alias EAX. The SubRegs field is a zero terminated array of registers that
+/// are sub-registers of the specific register, e.g. AL, AH are sub-registers of
+/// AX. The SuperRegs field is a zero terminated array of registers that are
 /// super-registers of the specific register, e.g. RAX, EAX, are super-registers
 /// of AX.
 ///
 struct TargetRegisterDesc {
   const char     *Name;         // Printable name for the reg (for debugging)
-  const unsigned *AliasSet;     // Register Alias Set, described above
+  const unsigned *Overlaps;     // Overlapping registers, described above
   const unsigned *SubRegs;      // Sub-register set, described above
   const unsigned *SuperRegs;    // Super-register set, described above
 };
@@ -123,7 +123,7 @@ public:
   /// hasType - return true if this TargetRegisterClass has the ValueType vt.
   ///
   bool hasType(EVT vt) const {
-    for(int i = 0; VTs[i].getSimpleVT().SimpleTy != MVT::Other; ++i)
+    for(int i = 0; VTs[i] != MVT::Other; ++i)
       if (VTs[i] == vt)
         return true;
     return false;
@@ -137,7 +137,7 @@ public:
 
   vt_iterator vt_end() const {
     vt_iterator I = VTs;
-    while (I->getSimpleVT().SimpleTy != MVT::Other) ++I;
+    while (*I != MVT::Other) ++I;
     return I;
   }
 
@@ -227,9 +227,12 @@ public:
   /// cheaper to allocate caller saved registers.
   ///
   /// These methods take a MachineFunction argument, which can be used to tune
-  /// the allocatable registers based on the characteristics of the function.
-  /// One simple example is that the frame pointer register can be used if
-  /// frame-pointer-elimination is performed.
+  /// the allocatable registers based on the characteristics of the function,
+  /// subtarget, or other criteria.
+  ///
+  /// Register allocators should account for the fact that an allocation
+  /// order iterator may return a reserved register and always check
+  /// if the register is allocatable (getAllocatableSet()) before using it.
   ///
   /// By default, these methods return all registers in the class.
   ///
@@ -292,30 +295,68 @@ protected:
   virtual ~TargetRegisterInfo();
 public:
 
-  enum {                        // Define some target independent constants
-    /// NoRegister - This physical register is not a real target register.  It
-    /// is useful as a sentinal.
-    NoRegister = 0,
+  // Register numbers can represent physical registers, virtual registers, and
+  // sometimes stack slots. The unsigned values are divided into these ranges:
+  //
+  //   0           Not a register, can be used as a sentinel.
+  //   [1;2^30)    Physical registers assigned by TableGen.
+  //   [2^30;2^31) Stack slots. (Rarely used.)
+  //   [2^31;2^32) Virtual registers assigned by MachineRegisterInfo.
+  //
+  // Further sentinels can be allocated from the small negative integers.
+  // DenseMapInfo<unsigned> uses -1u and -2u.
 
-    /// FirstVirtualRegister - This is the first register number that is
-    /// considered to be a 'virtual' register, which is part of the SSA
-    /// namespace.  This must be the same for all targets, which means that each
-    /// target is limited to this fixed number of registers.
-    FirstVirtualRegister = 16384
-  };
+  /// isStackSlot - Sometimes it is useful the be able to store a non-negative
+  /// frame index in a variable that normally holds a register. isStackSlot()
+  /// returns true if Reg is in the range used for stack slots.
+  ///
+  /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
+  /// slots, so if a variable may contains a stack slot, always check
+  /// isStackSlot() first.
+  ///
+  static bool isStackSlot(unsigned Reg) {
+    return int(Reg) >= (1 << 30);
+  }
+
+  /// stackSlot2Index - Compute the frame index from a register value
+  /// representing a stack slot.
+  static int stackSlot2Index(unsigned Reg) {
+    assert(isStackSlot(Reg) && "Not a stack slot");
+    return int(Reg - (1u << 30));
+  }
+
+  /// index2StackSlot - Convert a non-negative frame index to a stack slot
+  /// register value.
+  static unsigned index2StackSlot(int FI) {
+    assert(FI >= 0 && "Cannot hold a negative frame index.");
+    return FI + (1u << 30);
+  }
 
   /// isPhysicalRegister - Return true if the specified register number is in
   /// the physical register namespace.
   static bool isPhysicalRegister(unsigned Reg) {
-    assert(Reg && "this is not a register!");
-    return Reg < FirstVirtualRegister;
+    assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+    return int(Reg) > 0;
   }
 
   /// isVirtualRegister - Return true if the specified register number is in
   /// the virtual register namespace.
   static bool isVirtualRegister(unsigned Reg) {
-    assert(Reg && "this is not a register!");
-    return Reg >= FirstVirtualRegister;
+    assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+    return int(Reg) < 0;
+  }
+
+  /// virtReg2Index - Convert a virtual register number to a 0-based index.
+  /// The first virtual register in a function will get the index 0.
+  static unsigned virtReg2Index(unsigned Reg) {
+    assert(isVirtualRegister(Reg) && "Not a virtual register");
+    return Reg - (1u << 31);
+  }
+
+  /// index2VirtReg - Convert a 0-based index to a virtual register number.
+  /// This is the inverse operation of VirtReg2IndexFunctor below.
+  static unsigned index2VirtReg(unsigned Index) {
+    return Index + (1u << 31);
   }
 
   /// getMinimalPhysRegClass - Returns the Register Class of a physical
@@ -348,7 +389,17 @@ public:
   /// terminated.
   ///
   const unsigned *getAliasSet(unsigned RegNo) const {
-    return get(RegNo).AliasSet;
+    // The Overlaps set always begins with Reg itself.
+    return get(RegNo).Overlaps + 1;
+  }
+
+  /// getOverlaps - Return a list of registers that overlap Reg, including
+  /// itself. This is the same as the alias set except Reg is included in the
+  /// list.
+  /// These are exactly the registers in { x | regsOverlap(x, Reg) }.
+  ///
+  const unsigned *getOverlaps(unsigned RegNo) const {
+    return get(RegNo).Overlaps;
   }
 
   /// getSubRegisters - Return the list of registers that are sub-registers of
@@ -574,13 +625,6 @@ public:
     // Do nothing.
   }
 
-  /// targetHandlesStackFrameRounding - Returns true if the target is
-  /// responsible for rounding up the stack frame (probably at emitPrologue
-  /// time).
-  virtual bool targetHandlesStackFrameRounding() const {
-    return false;
-  }
-
   /// requiresRegisterScavenging - returns true if the target requires (and can
   /// make use of) the register scavenger.
   virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
@@ -600,31 +644,6 @@ public:
     return false;
   }
 
-  /// hasFP - Return true if the specified function should have a dedicated
-  /// frame pointer register. For most targets this is true only if the function
-  /// has variable sized allocas or if frame pointer elimination is disabled.
-  virtual bool hasFP(const MachineFunction &MF) const = 0;
-
-  /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-  /// not required, we reserve argument space for call sites in the function
-  /// immediately on entry to the current function. This eliminates the need for
-  /// add/sub sp brackets around call sites. Returns true if the call frame is
-  /// included as part of the stack frame.
-  virtual bool hasReservedCallFrame(const MachineFunction &MF) const {
-    return !hasFP(MF);
-  }
-
-  /// canSimplifyCallFramePseudos - When possible, it's best to simplify the
-  /// call frame pseudo ops before doing frame index elimination. This is
-  /// possible only when frame index references between the pseudos won't
-  /// need adjusting for the call frame adjustments. Normally, that's true
-  /// if the function has a reserved call frame or a frame pointer. Some
-  /// targets (Thumb2, for example) may have more complicated criteria,
-  /// however, and can override this behavior.
-  virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const {
-    return hasReservedCallFrame(MF) || hasFP(MF);
-  }
-
   /// hasReservedSpillSlot - Return true if target has reserved a spill slot in
   /// the stack frame of the given function for the specified register. e.g. On
   /// x86, if the frame register is required, the first fixed stack object is
@@ -644,7 +663,7 @@ public:
   }
 
   /// getFrameIndexInstrOffset - Get the offset from the referenced frame
-  /// index in the instruction, if the is one.
+  /// index in the instruction, if there is one.
   virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
                                            int Idx) const {
     return 0;
@@ -660,7 +679,7 @@ public:
 
   /// materializeFrameBaseRegister - Insert defining instruction(s) for
   /// BaseReg to be a pointer to FrameIdx before insertion point I.
-  virtual void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+  virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
                                             unsigned BaseReg, int FrameIdx,
                                             int64_t Offset) const {
     assert(0 && "materializeFrameBaseRegister does not exist on this target");
@@ -707,21 +726,6 @@ public:
     assert(0 && "Call Frame Pseudo Instructions do not exist on this target!");
   }
 
-  /// processFunctionBeforeCalleeSavedScan - This method is called immediately
-  /// before PrologEpilogInserter scans the physical registers used to determine
-  /// what callee saved registers should be spilled. This method is optional.
-  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                RegScavenger *RS = NULL) const {
-
-  }
-
-  /// processFunctionBeforeFrameFinalized - This method is called immediately
-  /// before the specified function's frame layout (MF.getFrameInfo()) is
-  /// finalized.  Once the frame is finalized, MO_FrameIndex operands are
-  /// replaced with direct constants.  This method is optional.
-  ///
-  virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-  }
 
   /// saveScavengerRegister - Spill the register so it can be used by the
   /// register scavenger. Return true if the register was spilled, false
@@ -746,12 +750,6 @@ public:
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                    int SPAdj, RegScavenger *RS=NULL) const = 0;
 
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  virtual void emitPrologue(MachineFunction &MF) const = 0;
-  virtual void emitEpilogue(MachineFunction &MF,
-                            MachineBasicBlock &MBB) const = 0;
-
   //===--------------------------------------------------------------------===//
   /// Debug information queries.
 
@@ -765,37 +763,16 @@ public:
   /// for values allocated in the current stack frame.
   virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
 
-  /// getFrameIndexOffset - Returns the displacement from the frame register to
-  /// the stack frame of the specified index.
-  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
-  /// getFrameIndexReference - This method should return the base register
-  /// and offset used to reference a frame index location. The offset is
-  /// returned directly, and the base register is returned via FrameReg.
-  virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
-                                     unsigned &FrameReg) const {
-    // By default, assume all frame indices are referenced via whatever
-    // getFrameRegister() says. The target can override this if it's doing
-    // something different.
-    FrameReg = getFrameRegister(MF);
-    return getFrameIndexOffset(MF, FI);
-  }
-
   /// getRARegister - This method should return the register where the return
   /// address can be found.
   virtual unsigned getRARegister() const = 0;
-
-  /// getInitialFrameState - Returns a list of machine moves that are assumed
-  /// on entry to all functions.  Note that LabelID is ignored (assumed to be
-  /// the beginning of the function.)
-  virtual void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 };
 
 
 // This is useful when building IndexedMaps keyed on virtual registers
 struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
   unsigned operator()(unsigned Reg) const {
-    return Reg - TargetRegisterInfo::FirstVirtualRegister;
+    return TargetRegisterInfo::virtReg2Index(Reg);
   }
 };
 
@@ -804,6 +781,33 @@ struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
 const TargetRegisterClass *getCommonSubClass(const TargetRegisterClass *A,
                                              const TargetRegisterClass *B);
 
+/// PrintReg - Helper class for printing registers on a raw_ostream.
+/// Prints virtual and physical registers with or without a TRI instance.
+///
+/// The format is:
+///   %noreg          - NoRegister
+///   %vreg5          - a virtual register.
+///   %vreg5:sub_8bit - a virtual register with sub-register index (with TRI).
+///   %EAX            - a physical register
+///   %physreg17      - a physical register when no TRI instance given.
+///
+/// Usage: OS << PrintReg(Reg, TRI) << '\n';
+///
+class PrintReg {
+  const TargetRegisterInfo *TRI;
+  unsigned Reg;
+  unsigned SubIdx;
+public:
+  PrintReg(unsigned reg, const TargetRegisterInfo *tri = 0, unsigned subidx = 0)
+    : TRI(tri), Reg(reg), SubIdx(subidx) {}
+  void print(raw_ostream&) const;
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) {
+  PR.print(OS);
+  return OS;
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index 2817b0c421ed..f851ad0a9bfb 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -39,6 +39,15 @@ namespace llvm {
   class TargetAsmParser;
   class TargetMachine;
   class raw_ostream;
+  class formatted_raw_ostream;
+
+  MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+                                bool isVerboseAsm,
+                                bool useLoc,
+                                MCInstPrinter *InstPrint,
+                                MCCodeEmitter *CE,
+                                TargetAsmBackend *TAB,
+                                bool ShowInst);
 
   /// Target - Wrapper for Target specific information.
   ///
@@ -80,7 +89,16 @@ namespace llvm {
                                                 TargetAsmBackend &TAB,
                                                 raw_ostream &_OS,
                                                 MCCodeEmitter *_Emitter,
-                                                bool RelaxAll);
+                                                bool RelaxAll,
+                                                bool NoExecStack);
+    typedef MCStreamer *(*AsmStreamerCtorTy)(MCContext &Ctx,
+                                             formatted_raw_ostream &OS,
+                                             bool isVerboseAsm,
+                                             bool useLoc,
+                                             MCInstPrinter *InstPrint,
+                                             MCCodeEmitter *CE,
+                                             TargetAsmBackend *TAB,
+                                             bool ShowInst);
 
   private:
     /// Next - The next registered target in the linked list, maintained by the
@@ -138,7 +156,13 @@ namespace llvm {
     /// ObjectStreamer, if registered.
     ObjectStreamerCtorTy ObjectStreamerCtorFn;
 
+    /// AsmStreamerCtorFn - Construction function for this target's
+    /// AsmStreamer, if registered (default = llvm::createAsmStreamer).
+    AsmStreamerCtorTy AsmStreamerCtorFn;
+
   public:
+    Target() : AsmStreamerCtorFn(llvm::createAsmStreamer) {}
+
     /// @name Target Information
     /// @{
 
@@ -185,6 +209,9 @@ namespace llvm {
     /// hasObjectStreamer - Check if this target supports streaming to files.
     bool hasObjectStreamer() const { return ObjectStreamerCtorFn != 0; }
 
+    /// hasAsmStreamer - Check if this target supports streaming to files.
+    bool hasAsmStreamer() const { return AsmStreamerCtorFn != 0; }
+
     /// @}
     /// @name Feature Constructors
     /// @{
@@ -282,14 +309,31 @@ namespace llvm {
     /// \arg _OS - The stream object.
     /// \arg _Emitter - The target independent assembler object.Takes ownership.
     /// \arg RelaxAll - Relax all fixups?
+    /// \arg NoExecStack - Mark file as not needing a executable stack.
     MCStreamer *createObjectStreamer(const std::string &TT, MCContext &Ctx,
                                      TargetAsmBackend &TAB,
                                      raw_ostream &_OS,
                                      MCCodeEmitter *_Emitter,
-                                     bool RelaxAll) const {
+                                     bool RelaxAll,
+                                     bool NoExecStack) const {
       if (!ObjectStreamerCtorFn)
         return 0;
-      return ObjectStreamerCtorFn(*this, TT, Ctx, TAB, _OS, _Emitter, RelaxAll);
+      return ObjectStreamerCtorFn(*this, TT, Ctx, TAB, _OS, _Emitter, RelaxAll,
+                                  NoExecStack);
+    }
+
+    /// createAsmStreamer - Create a target specific MCStreamer.
+    MCStreamer *createAsmStreamer(MCContext &Ctx,
+                                  formatted_raw_ostream &OS,
+                                  bool isVerboseAsm,
+                                  bool useLoc,
+                                  MCInstPrinter *InstPrint,
+                                  MCCodeEmitter *CE,
+                                  TargetAsmBackend *TAB,
+                                  bool ShowInst) const {
+      // AsmStreamerCtorFn is default to llvm::createAsmStreamer
+      return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc,
+                               InstPrint, CE, TAB, ShowInst);
     }
 
     /// @}
@@ -513,7 +557,7 @@ namespace llvm {
         T.CodeEmitterCtorFn = Fn;
     }
 
-    /// RegisterObjectStreamer - Register an MCStreamer implementation
+    /// RegisterObjectStreamer - Register a object code MCStreamer implementation
     /// for the given target.
     ///
     /// Clients are responsible for ensuring that registration doesn't occur
@@ -527,6 +571,20 @@ namespace llvm {
         T.ObjectStreamerCtorFn = Fn;
     }
 
+    /// RegisterAsmStreamer - Register an assembly MCStreamer implementation
+    /// for the given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCStreamer for the target.
+    static void RegisterAsmStreamer(Target &T, Target::AsmStreamerCtorTy Fn) {
+      if (T.AsmStreamerCtorFn == createAsmStreamer)
+        T.AsmStreamerCtorFn = Fn;
+    }
+
     /// @}
   };
 
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 96c83674cb03..97ea82ab9e3d 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -22,6 +22,13 @@
 //  
 class FuncUnit;
 
+//===----------------------------------------------------------------------===//
+// Pipeline bypass / forwarding - These values specifies the symbolic names of
+// pipeline bypasses which can be used to forward results of instructions
+// that are forwarded to uses.
+class Bypass;
+def NoBypass : Bypass;
+
 class ReservationKind<bits<1> val> {
   int Value = val;
 }
@@ -66,30 +73,58 @@ class InstrStage<int cycles, list<FuncUnit> units,
 // across all chip sets.  Thus a new chip set can be added without modifying
 // instruction information.
 //
-class InstrItinClass;
+// NumMicroOps represents the number of micro-operations that each instruction
+// in the class are decoded to. If the number is zero, then it means the
+// instruction can decode into variable number of micro-ops and it must be
+// determined dynamically.
+//
+class InstrItinClass<int ops = 1> {
+  int NumMicroOps = ops;
+}
 def NoItinerary : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Instruction itinerary data - These values provide a runtime map of an 
 // instruction itinerary class (name) to its itinerary data.
 //
+// OperandCycles are optional "cycle counts". They specify the cycle after
+// instruction issue the values which correspond to specific operand indices
+// are defined or read. Bypasses are optional "pipeline forwarding pathes", if
+// a def by an instruction is available on a specific bypass and the use can
+// read from the same bypass, then the operand use latency is reduced by one.
+//
+//  InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>,
+//                               InstrStage<1, [A9_AGU]>],
+//                              [3, 1], [A9_LdBypass]>,
+//  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+//                              [1, 1], [NoBypass, A9_LdBypass]>,
+//
+// In this example, the instruction of IIC_iLoadi reads its input on cycle 1
+// (after issue) and the result of the load is available on cycle 3. The result
+// is available via forwarding path A9_LdBypass. If it's used by the first
+// source operand of instructions of IIC_iMVNr class, then the operand latency
+// is reduced by 1.
 class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
-                    list<int> operandcycles = []> {
+                    list<int> operandcycles = [],
+                    list<Bypass> bypasses = []> {
   InstrItinClass TheClass = Class;
   list<InstrStage> Stages = stages;
   list<int> OperandCycles = operandcycles;
+  list<Bypass> Bypasses = bypasses;
 }
 
 //===----------------------------------------------------------------------===//
 // Processor itineraries - These values represent the set of all itinerary
 // classes for a given chip set.
 //
-class ProcessorItineraries<list<FuncUnit> fu, list<InstrItinData> iid> {
+class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
+                           list<InstrItinData> iid> {
   list<FuncUnit> FU = fu;
+  list<Bypass> BP = bp;
   list<InstrItinData> IID = iid;
 }
 
 // NoItineraries - A marker that can be used by processors without schedule
 // info.
-def NoItineraries : ProcessorItineraries<[], []>;
+def NoItineraries : ProcessorItineraries<[], [], []>;
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 58ccfbacc6f7..c9be40d23f00 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -1,10 +1,10 @@
 //===- TargetSelectionDAG.td - Common code for DAG isels ---*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the target-independent interfaces used by SelectionDAG
@@ -61,6 +61,13 @@ class SDTCisEltOfVec<int ThisOp, int OtherOp>
   int OtherOpNum = OtherOp;
 }
 
+/// SDTCisSubVecOfVec - This indicates that ThisOp is a vector type
+/// with length less that of OtherOp, which is a vector type.
+class SDTCisSubVecOfVec<int ThisOp, int OtherOp>
+  : SDTypeConstraint<ThisOp> {
+  int OtherOpNum = OtherOp;
+}
+
 //===----------------------------------------------------------------------===//
 // Selection DAG Type Profile definitions.
 //
@@ -123,10 +130,10 @@ def SDTFPRoundOp  : SDTypeProfile<1, 1, [   // fround
 def SDTFPExtendOp  : SDTypeProfile<1, 1, [  // fextend
   SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>
 ]>;
-def SDTIntToFPOp : SDTypeProfile<1, 1, [    // [su]int_to_fp 
+def SDTIntToFPOp : SDTypeProfile<1, 1, [    // [su]int_to_fp
   SDTCisFP<0>, SDTCisInt<1>
 ]>;
-def SDTFPToIntOp : SDTypeProfile<1, 1, [    // fp_to_[su]int 
+def SDTFPToIntOp : SDTypeProfile<1, 1, [    // fp_to_[su]int
   SDTCisInt<0>, SDTCisFP<1>
 ]>;
 def SDTExtInreg : SDTypeProfile<1, 2, [     // sext_inreg
@@ -138,7 +145,7 @@ def SDTSetCC : SDTypeProfile<1, 3, [        // setcc
   SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
 ]>;
 
-def SDTSelect : SDTypeProfile<1, 3, [       // select 
+def SDTSelect : SDTypeProfile<1, 3, [       // select
   SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>
 ]>;
 
@@ -162,11 +169,11 @@ def SDTBrind : SDTypeProfile<0, 1, [        // brind
 def SDTNone : SDTypeProfile<0, 0, []>;      // ret, trap
 
 def SDTLoad : SDTypeProfile<1, 1, [         // load
-  SDTCisPtrTy<1>  
+  SDTCisPtrTy<1>
 ]>;
 
 def SDTStore : SDTypeProfile<0, 2, [        // store
-  SDTCisPtrTy<1>  
+  SDTCisPtrTy<1>
 ]>;
 
 def SDTIStore : SDTypeProfile<1, 3, [       // indexed store
@@ -183,18 +190,25 @@ def SDTVecInsert : SDTypeProfile<1, 3, [    // vector insert
   SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
 ]>;
 
-def STDPrefetch : SDTypeProfile<0, 3, [     // prefetch
+def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
+  SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
+]>;
+def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert
+  SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3>
+]>;
+
+def SDTPrefetch : SDTypeProfile<0, 3, [     // prefetch
   SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
 ]>;
 
-def STDMemBarrier : SDTypeProfile<0, 5, [   // memory barier
+def SDTMemBarrier : SDTypeProfile<0, 5, [   // memory barier
   SDTCisSameAs<0,1>,  SDTCisSameAs<0,2>,  SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
   SDTCisInt<0>
 ]>;
-def STDAtomic3 : SDTypeProfile<1, 3, [
+def SDTAtomic3 : SDTypeProfile<1, 3, [
   SDTCisSameAs<0,2>,  SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1>
 ]>;
-def STDAtomic2 : SDTypeProfile<1, 2, [
+def SDTAtomic2 : SDTypeProfile<1, 2, [
   SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
 ]>;
 
@@ -216,20 +230,27 @@ class SDNodeProperty;
 def SDNPCommutative : SDNodeProperty;   // X op Y == Y op X
 def SDNPAssociative : SDNodeProperty;   // (X op Y) op Z == X op (Y op Z)
 def SDNPHasChain    : SDNodeProperty;   // R/W chain operand and result
-def SDNPOutFlag     : SDNodeProperty;   // Write a flag result
-def SDNPInFlag      : SDNodeProperty;   // Read a flag operand
-def SDNPOptInFlag   : SDNodeProperty;   // Optionally read a flag operand
+def SDNPOutGlue     : SDNodeProperty;   // Write a flag result
+def SDNPInGlue      : SDNodeProperty;   // Read a flag operand
+def SDNPOptInGlue   : SDNodeProperty;   // Optionally read a flag operand
 def SDNPMayStore    : SDNodeProperty;   // May write to memory, sets 'mayStore'.
 def SDNPMayLoad     : SDNodeProperty;   // May read memory, sets 'mayLoad'.
 def SDNPSideEffect  : SDNodeProperty;   // Sets 'HasUnmodelledSideEffects'.
 def SDNPMemOperand  : SDNodeProperty;   // Touches memory, has assoc MemOperand
 def SDNPVariadic    : SDNodeProperty;   // Node has variable arguments.
+def SDNPWantRoot    : SDNodeProperty;   // ComplexPattern gets the root of match
+def SDNPWantParent  : SDNodeProperty;   // ComplexPattern gets the parent
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Pattern Operations
+class SDPatternOperator;
 
 //===----------------------------------------------------------------------===//
 // Selection DAG Node definitions.
 //
 class SDNode<string opcode, SDTypeProfile typeprof,
-             list<SDNodeProperty> props = [], string sdclass = "SDNode"> {
+             list<SDNodeProperty> props = [], string sdclass = "SDNode">
+             : SDPatternOperator {
   string Opcode  = opcode;
   string SDClass = sdclass;
   list<SDNodeProperty> Properties = props;
@@ -305,14 +326,14 @@ def or         : SDNode<"ISD::OR"        , SDTIntBinOp,
 def xor        : SDNode<"ISD::XOR"       , SDTIntBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def addc       : SDNode<"ISD::ADDC"      , SDTIntBinOp,
-                        [SDNPCommutative, SDNPOutFlag]>;
+                        [SDNPCommutative, SDNPOutGlue]>;
 def adde       : SDNode<"ISD::ADDE"      , SDTIntBinOp,
-                        [SDNPCommutative, SDNPOutFlag, SDNPInFlag]>;
+                        [SDNPCommutative, SDNPOutGlue, SDNPInGlue]>;
 def subc       : SDNode<"ISD::SUBC"      , SDTIntBinOp,
-                        [SDNPOutFlag]>;
+                        [SDNPOutGlue]>;
 def sube       : SDNode<"ISD::SUBE"      , SDTIntBinOp,
-                        [SDNPOutFlag, SDNPInFlag]>;
-                        
+                        [SDNPOutGlue, SDNPInGlue]>;
+
 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
 def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
 def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
@@ -322,11 +343,11 @@ def sext       : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
 def zext       : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
 def anyext     : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
 def trunc      : SDNode<"ISD::TRUNCATE"   , SDTIntTruncOp>;
-def bitconvert : SDNode<"ISD::BIT_CONVERT", SDTUnaryOp>;
+def bitconvert : SDNode<"ISD::BITCAST"    , SDTUnaryOp>;
 def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
 def insertelt  : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
 
-                        
+
 def fadd       : SDNode<"ISD::FADD"       , SDTFPBinOp, [SDNPCommutative]>;
 def fsub       : SDNode<"ISD::FSUB"       , SDTFPBinOp>;
 def fmul       : SDNode<"ISD::FMUL"       , SDTFPBinOp, [SDNPCommutative]>;
@@ -367,35 +388,36 @@ def br         : SDNode<"ISD::BR"         , SDTBr,     [SDNPHasChain]>;
 def trap       : SDNode<"ISD::TRAP"       , SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
 
-def prefetch   : SDNode<"ISD::PREFETCH"   , STDPrefetch,
-                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+def prefetch   : SDNode<"ISD::PREFETCH"   , SDTPrefetch,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+                         SDNPMemOperand]>;
 
-def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier,
+def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier,
                         [SDNPHasChain, SDNPSideEffect]>;
 
-def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , STDAtomic3,
+def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , STDAtomic2,
+def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_swap     : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2,
+def atomic_swap     : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , STDAtomic2,
+def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2,
+def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_or  : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2,
+def atomic_load_or  : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2,
+def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", STDAtomic2,
+def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2,
+def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2,
+def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2,
+def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2,
+def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
 
 // Do not use ld, st directly. Use load, extload, sextload, zextload, store,
@@ -415,16 +437,26 @@ def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
     SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
 def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
     SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
-    
+
+// This operator does not do subvector type checking.  The ARM
+// backend, at least, needs it.
+def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
+    SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, 
+    []>;
+
+// This operator does subvector type checking.
+def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>;
+def insert_subvector : SDNode<"ISD::INSERT_SUBVECTOR", SDTSubVecInsert, []>;
+
 // Nodes for intrinsics, you should use the intrinsic itself and let tblgen use
 // these internally.  Don't reference these directly.
-def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID", 
+def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID",
                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
                             [SDNPHasChain]>;
-def intrinsic_w_chain : SDNode<"ISD::INTRINSIC_W_CHAIN", 
+def intrinsic_w_chain : SDNode<"ISD::INTRINSIC_W_CHAIN",
                                SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>,
                                [SDNPHasChain]>;
-def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN", 
+def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN",
                                 SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>, []>;
 
 // Do not use cvt directly. Use cvt forms below
@@ -469,10 +501,10 @@ def NOOP_SDNodeXForm : SDNodeXForm<imm, [{}]>;
 //
 
 /// PatFrag - Represents a pattern fragment.  This can match something on the
-/// DAG, frame a single node to multiply nested other fragments.
+/// DAG, from a single node to multiple nested other fragments.
 ///
 class PatFrag<dag ops, dag frag, code pred = [{}],
-              SDNodeXForm xform = NOOP_SDNodeXForm> {
+              SDNodeXForm xform = NOOP_SDNodeXForm> : SDPatternOperator {
   dag Operands = ops;
   dag Fragment = frag;
   code Predicate = pred;
@@ -822,7 +854,7 @@ def cvtfu : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
 //===----------------------------------------------------------------------===//
 // Selection DAG Pattern Support.
 //
-// Patterns are what are actually matched against the target-flavored
+// Patterns are what are actually matched against by the target-flavored
 // instruction selection DAG.  Instructions defined by the target implicitly
 // define patterns in most cases, but patterns can also be explicitly added when
 // an operation is defined by a sequence of instructions (e.g. loading a large
@@ -834,7 +866,7 @@ class Pattern<dag patternToMatch, list<dag> resultInstrs> {
   dag             PatternToMatch  = patternToMatch;
   list<dag>       ResultInstrs    = resultInstrs;
   list<Predicate> Predicates      = [];  // See class Instruction in Target.td.
-  int             AddedComplexity = 0;  // See class Instruction in Target.td.
+  int             AddedComplexity = 0;   // See class Instruction in Target.td.
 }
 
 // Pat - A simple (but common) form of a pattern, which produces a simple result
diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h
index 2be183440224..c9ca7223b5f5 100644
--- a/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -59,8 +59,8 @@ public:
                           SDValue Op1, SDValue Op2,
                           SDValue Op3, unsigned Align, bool isVolatile,
                           bool AlwaysInline,
-                          const Value *DstSV, uint64_t DstOff,
-                          const Value *SrcSV, uint64_t SrcOff) const {
+                          MachinePointerInfo DstPtrInfo,
+                          MachinePointerInfo SrcPtrInfo) const {
     return SDValue();
   }
 
@@ -75,8 +75,8 @@ public:
                            SDValue Chain,
                            SDValue Op1, SDValue Op2,
                            SDValue Op3, unsigned Align, bool isVolatile,
-                           const Value *DstSV, uint64_t DstOff,
-                           const Value *SrcSV, uint64_t SrcOff) const {
+                           MachinePointerInfo DstPtrInfo,
+                           MachinePointerInfo SrcPtrInfo) const {
     return SDValue();
   }
 
@@ -91,7 +91,7 @@ public:
                           SDValue Chain,
                           SDValue Op1, SDValue Op2,
                           SDValue Op3, unsigned Align, bool isVolatile,
-                          const Value *DstSV, uint64_t DstOff) const {
+                          MachinePointerInfo DstPtrInfo) const {
     return SDValue();
   }
 };
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 0de100348d0f..12398813cc76 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -99,6 +99,8 @@ ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool
 /// createFunctionInliningPass - Return a new pass object that uses a heuristic
 /// to inline direct function calls to small functions.
 ///
+/// The -inline-threshold command line option takes precedence over the
+/// threshold given here.
 Pass *createFunctionInliningPass();
 Pass *createFunctionInliningPass(int Threshold);
 
@@ -186,12 +188,6 @@ ModulePass *createBlockExtractorPass();
 /// (prototypes) that are not used.
 ModulePass *createStripDeadPrototypesPass();
 
-//===----------------------------------------------------------------------===//
-/// createPartialSpecializationPass - This pass specializes functions for
-/// constant arguments.
-///
-ModulePass *createPartialSpecializationPass();
-
 //===----------------------------------------------------------------------===//
 /// createFunctionAttrsPass - This pass discovers functions that do not access
 /// memory, or only read memory, and gives them the readnone/readonly attribute.
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 9c579ac76105..aa9873fb8afa 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -25,6 +25,9 @@ ModulePass *createEdgeProfilerPass();
 // Insert optimal edge profiling instrumentation
 ModulePass *createOptimalEdgeProfilerPass();
 
+// Insert path profiling instrumentation
+ModulePass *createPathProfilerPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/RSProfiling.h b/include/llvm/Transforms/RSProfiling.h
deleted file mode 100644
index 02439e8e2388..000000000000
--- a/include/llvm/Transforms/RSProfiling.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- RSProfiling.cpp - Various profiling using random sampling ----------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the abstract interface that a profiler must implement to
-// support the random profiling transform.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_RSPROFILING_H
-#define LLVM_TRANSFORMS_RSPROFILING_H
-
-#include "llvm/Pass.h"
-
-namespace llvm {
-  class Value;
-  
-  //===--------------------------------------------------------------------===//
-  /// RSProfilers - The basic Random Sampling Profiler Interface  Any profiler 
-  /// that implements this interface can be transformed by the random sampling
-  /// pass to be sample based rather than always on.
-  ///
-  /// The only exposed function can be queried to find out if an instruction
-  /// was original or if it was inserted by the profiler.  Implementations of
-  /// this interface are expected to chain to other implementations, such that
-  /// multiple profilers can be support simultaniously.
-  struct RSProfilers : public ModulePass {
-    static char ID; // Pass identification, replacement for typeinfo
-    RSProfilers() : ModulePass(&ID) {}
-
-    /// isProfiling - This method returns true if the value passed it was 
-    /// inserted by the profiler.
-    virtual bool isProfiling(Value* v) = 0;
-  };
-}
-
-#endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 0c35d7e01fa4..6f2a38e5840c 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -73,7 +73,8 @@ FunctionPass *createAggressiveDCEPass();
 // ScalarReplAggregates - Break up alloca's of aggregates into multiple allocas
 // if possible.
 //
-FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1);
+FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1,
+                                             bool UseDomTree = true);
 
 //===----------------------------------------------------------------------===//
 //
@@ -117,6 +118,12 @@ Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
 //
 Pass *createLoopUnswitchPass(bool OptimizeForSize = false);
 
+//===----------------------------------------------------------------------===//
+//
+// LoopInstSimplify - This pass simplifies instructions in a loop's body.
+//
+Pass *createLoopInstSimplifyPass();
+
 //===----------------------------------------------------------------------===//
 //
 // LoopUnroll - This pass is a simple loop unrolling pass.
@@ -131,11 +138,10 @@ Pass *createLoopRotatePass();
 
 //===----------------------------------------------------------------------===//
 //
-// LoopIndexSplit - This pass divides loop's iteration range by spliting loop
-// such that each individual loop is executed efficiently.
+// LoopIdiom - This pass recognizes and replaces idioms in loops.
 //
-Pass *createLoopIndexSplitPass();
-
+Pass *createLoopIdiomPass();
+  
 //===----------------------------------------------------------------------===//
 //
 // PromoteMemoryToRegister - This pass is used to promote memory references to
@@ -259,6 +265,13 @@ FunctionPass *createBlockPlacementPass();
 Pass *createLCSSAPass();
 extern char &LCSSAID;
 
+//===----------------------------------------------------------------------===//
+//
+// EarlyCSE - This pass performs a simple and fast CSE pass over the dominator
+// tree.
+//
+FunctionPass *createEarlyCSEPass();
+  
 //===----------------------------------------------------------------------===//
 //
 // GVN - This pass performs global value numbering and redundant load 
@@ -329,6 +342,13 @@ Pass *createLowerAtomicPass();
 //
 Pass *createCorrelatedValuePropagationPass();
 
+//===----------------------------------------------------------------------===//
+//
+// InstructionSimplifier - Remove redundant instructions.
+//
+FunctionPass *createInstructionSimplifierPass();
+extern char &InstructionSimplifierID;
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/AddrModeMatcher.h b/include/llvm/Transforms/Utils/AddrModeMatcher.h
index be601e257b8c..0678eccb5d69 100644
--- a/include/llvm/Transforms/Utils/AddrModeMatcher.h
+++ b/include/llvm/Transforms/Utils/AddrModeMatcher.h
@@ -39,6 +39,12 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
   ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
   void print(raw_ostream &OS) const;
   void dump() const;
+  
+  bool operator==(const ExtAddrMode& O) const {
+    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+  }
 };
 
 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
@@ -84,7 +90,7 @@ public:
     bool Success = 
       AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
                             MemoryInst, Result).MatchAddr(V, 0);
-    Success = Success; assert(Success && "Couldn't select *anything*?");
+    (void)Success; assert(Success && "Couldn't select *anything*?");
     return Result;
   }
 private:
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 0f5445077bef..533586028700 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -22,9 +22,10 @@
 
 namespace llvm {
 
+class AliasAnalysis;
 class Instruction;
 class Pass;
-class AliasAnalysis;
+class ReturnInst;
 
 /// DeleteDeadBlock - Delete the specified block, which must have no
 /// predecessors.
@@ -35,7 +36,7 @@ void DeleteDeadBlock(BasicBlock *BB);
 /// any single-entry PHI nodes in it, fold them away.  This handles the case
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
-void FoldSingleEntryPHINodes(BasicBlock *BB);
+void FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P = 0);
 
 /// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
 /// is dead. Also recursively delete any operands that become dead as
@@ -46,7 +47,7 @@ bool DeleteDeadPHIs(BasicBlock *BB);
 
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
-bool MergeBlockIntoPredecessor(BasicBlock* BB, Pass* P = 0);
+bool MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P = 0);
 
 // ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
 // with a value, then remove and delete the original instruction.
@@ -75,15 +76,6 @@ void FindFunctionBackedges(const Function &F,
       SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result);
   
 
-// RemoveSuccessor - Change the specified terminator instruction such that its
-// successor #SuccNum no longer exists.  Because this reduces the outgoing
-// degree of the current basic block, the actual terminator instruction itself
-// may have to be changed.  In the case where the last successor of the block is
-// deleted, a return instruction is inserted in its place which can cause a
-// suprising change in program behavior if it is not expected.
-//
-void RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum);
-
 /// GetSuccessorNumber - Search for the specified successor of basic block BB
 /// and return its position in the terminator instruction's list of
 /// successors.  It is an error to call this with a block that is not a
@@ -180,7 +172,15 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
 BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
                                    unsigned NumPreds, const char *Suffix,
                                    Pass *P = 0);
-  
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+                                       BasicBlock *Pred);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index c75c14277ff6..e82593838467 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -47,11 +47,6 @@ namespace llvm {
   /// specified pointer arguments and length.
   Value *EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
                     const TargetData *TD, StringRef Name = "strncpy");
-  
-  /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
-  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
-                    bool isVolatile, IRBuilder<> &B, const TargetData *TD);
 
   /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
   /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
@@ -59,11 +54,6 @@ namespace llvm {
   Value *EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
                        IRBuilder<> &B, const TargetData *TD);
 
-  /// EmitMemMove - Emit a call to the memmove function to the builder.  This
-  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
-                     bool isVolatile, IRBuilder<> &B, const TargetData *TD);
-
   /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
   /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
   Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
@@ -73,10 +63,6 @@ namespace llvm {
   Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                     const TargetData *TD);
 
-  /// EmitMemSet - Emit a call to the memset function
-  Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
-                    IRBuilder<> &B, const TargetData *TD);
-
   /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
   /// (e.g.  'floor').  This function is known to take a single of type matching
   /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 62bf92aced49..24ebb109a0ad 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 
 namespace llvm {
 
@@ -46,7 +47,7 @@ class AllocaInst;
 /// CloneModule - Return an exact copy of the specified module
 ///
 Module *CloneModule(const Module *M);
-Module *CloneModule(const Module *M, ValueMap<const Value*, Value*> &VMap);
+Module *CloneModule(const Module *M, ValueToValueMapTy &VMap);
 
 /// ClonedCodeInfo - This struct can be used to capture information about code
 /// being cloned, while it is being cloned.
@@ -102,7 +103,7 @@ struct ClonedCodeInfo {
 /// parameter.
 ///
 BasicBlock *CloneBasicBlock(const BasicBlock *BB,
-                            ValueMap<const Value*, Value*> &VMap,
+                            ValueToValueMapTy &VMap,
                             const Twine &NameSuffix = "", Function *F = 0,
                             ClonedCodeInfo *CodeInfo = 0);
 
@@ -110,7 +111,7 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB,
 /// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate
 /// VMap using old blocks to new blocks mapping.
 Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, 
-                ValueMap<const Value *, Value *> &VMap, Pass *P);
+                ValueToValueMapTy &VMap, Pass *P);
 
 /// CloneFunction - Return a copy of the specified function, but without
 /// embedding the function into another module.  Also, any references specified
@@ -125,14 +126,14 @@ Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI,
 /// mappings.
 ///
 Function *CloneFunction(const Function *F,
-                        ValueMap<const Value*, Value*> &VMap,
+                        ValueToValueMapTy &VMap,
                         bool ModuleLevelChanges,
                         ClonedCodeInfo *CodeInfo = 0);
 
 /// CloneFunction - Version of the function that doesn't need the VMap.
 ///
 inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   return CloneFunction(F, VMap, CodeInfo);
 }
 
@@ -146,7 +147,7 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
 /// mappings.
 ///
 void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                       ValueMap<const Value*, Value*> &VMap,
+                       ValueToValueMapTy &VMap,
                        bool ModuleLevelChanges,
                        SmallVectorImpl<ReturnInst*> &Returns,
                        const char *NameSuffix = "", 
@@ -164,7 +165,7 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 /// mappings.
 ///
 void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                               ValueMap<const Value*, Value*> &VMap,
+                               ValueToValueMapTy &VMap,
                                bool ModuleLevelChanges,
                                SmallVectorImpl<ReturnInst*> &Returns,
                                const char *NameSuffix = "", 
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index caae27f47a44..26b5dd8365f1 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -69,6 +69,10 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN);
 ///
 /// This returns true if it changed the code, note that it can delete
 /// instructions in other blocks as well in this block.
+///
+/// WARNING: Do not use this function on unreachable blocks, as recursive
+/// simplification is not able to handle corner-case scenarios that can
+/// arise in them.
 bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0);
     
 //===----------------------------------------------------------------------===//
@@ -141,6 +145,18 @@ AllocaInst *DemoteRegToStack(Instruction &X,
 /// The phi node is deleted and it returns the pointer to the alloca inserted. 
 AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = 0);
 
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+                                    const TargetData *TD = 0);
+
+/// getKnownAlignment - Try to infer an alignment for the specified pointer.
+static inline unsigned getKnownAlignment(Value *V, const TargetData *TD = 0) {
+  return getOrEnforceKnownAlignment(V, 0, TD);
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h
index 35cfaddb7379..98d51a29ad71 100644
--- a/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -38,8 +38,7 @@ bool isAllocaPromotable(const AllocaInst *AI);
 /// made to the IR.
 ///
 void PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
-                     DominatorTree &DT, DominanceFrontier &DF,
-                     AliasSetTracker *AST = 0);
+                     DominatorTree &DT, AliasSetTracker *AST = 0);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index e50a6b15df81..b4048b9b4409 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -108,6 +108,55 @@ private:
   void operator=(const SSAUpdater&); // DO NOT IMPLEMENT
   SSAUpdater(const SSAUpdater&);     // DO NOT IMPLEMENT
 };
+  
+/// LoadAndStorePromoter - This little helper class provides a convenient way to
+/// promote a collection of loads and stores into SSA Form using the SSAUpdater.
+/// This handles complexities that SSAUpdater doesn't, such as multiple loads
+/// and stores in one block.
+///
+/// Clients of this class are expected to subclass this and implement the
+/// virtual methods.
+///
+class LoadAndStorePromoter {
+protected:
+  SSAUpdater &SSA;
+public:
+  LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+                       SSAUpdater &S, StringRef Name = StringRef());
+  virtual ~LoadAndStorePromoter() {}
+  
+  /// run - This does the promotion.  Insts is a list of loads and stores to
+  /// promote, and Name is the basename for the PHIs to insert.  After this is
+  /// complete, the loads and stores are removed from the code.
+  void run(const SmallVectorImpl<Instruction*> &Insts) const;
+  
+  
+  /// Return true if the specified instruction is in the Inst list (which was
+  /// passed into the run method).  Clients should implement this with a more
+  /// efficient version if possible.
+  virtual bool isInstInList(Instruction *I,
+                            const SmallVectorImpl<Instruction*> &Insts) const {
+    for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+      if (Insts[i] == I)
+        return true;
+    return false;
+  }
+  
+  /// doExtraRewritesBeforeFinalDeletion - This hook is invoked after all the
+  /// stores are found and inserted as available values, but 
+  virtual void doExtraRewritesBeforeFinalDeletion() const {
+  }
+  
+  /// replaceLoadWithValue - Clients can choose to implement this to get
+  /// notified right before a load is RAUW'd another value.
+  virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+  }
+
+  /// This is called before each instruction is deleted.
+  virtual void instructionDeleted(Instruction *I) const {
+  }
+
+};
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index a5060e6f5860..54506cfff4c3 100644
--- a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -27,7 +27,9 @@ struct UnifyFunctionExitNodes : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
   UnifyFunctionExitNodes() : FunctionPass(ID),
-                             ReturnBlock(0), UnwindBlock(0) {}
+                             ReturnBlock(0), UnwindBlock(0) {
+    initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
+  }
 
   // We can preserve non-critical-edgeness when we unify function exit nodes
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 5274112897b7..d612213a8717 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -20,12 +20,31 @@
 namespace llvm {
   class Value;
   class Instruction;
-  typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
+  typedef ValueMap<const Value *, TrackingVH<Value> > ValueToValueMapTy;
 
+  /// RemapFlags - These are flags that the value mapping APIs allow.
+  enum RemapFlags {
+    RF_None = 0,
+    
+    /// RF_NoModuleLevelChanges - If this flag is set, the remapper knows that
+    /// only local values within a function (such as an instruction or argument)
+    /// are mapped, not global values like functions and global metadata.
+    RF_NoModuleLevelChanges = 1,
+    
+    /// RF_IgnoreMissingEntries - If this flag is set, the remapper ignores
+    /// entries that are not in the value map.  If it is unset, it aborts if an
+    /// operand is asked to be remapped which doesn't exist in the mapping.
+    RF_IgnoreMissingEntries = 2
+  };
+  
+  static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
+    return RemapFlags(unsigned(LHS)|unsigned(RHS));
+  }
+  
   Value *MapValue(const Value *V, ValueToValueMapTy &VM,
-                  bool ModuleLevelChanges);
+                  RemapFlags Flags = RF_None);
   void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
-                        bool ModuleLevelChanges);
+                        RemapFlags Flags = RF_None);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index f7d6fd57a23c..0939d67265b0 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -12,7 +12,6 @@
 
 #include "llvm/AbstractTypeUser.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/System/DataTypes.h"
 #include "llvm/ADT/GraphTraits.h"
 #include <string>
 #include <vector>
@@ -76,19 +75,20 @@ public:
     PPC_FP128TyID,   ///<  5: 128 bit floating point type (two 64-bits)
     LabelTyID,       ///<  6: Labels
     MetadataTyID,    ///<  7: Metadata
+    X86_MMXTyID,     ///<  8: MMX vectors (64 bits)
 
     // Derived types... see DerivedTypes.h file...
     // Make sure FirstDerivedTyID stays up to date!!!
-    IntegerTyID,     ///<  8: Arbitrary bit width integers
-    FunctionTyID,    ///<  9: Functions
-    StructTyID,      ///< 10: Structures
-    ArrayTyID,       ///< 11: Arrays
-    PointerTyID,     ///< 12: Pointers
-    OpaqueTyID,      ///< 13: Opaque: type with unknown structure
-    VectorTyID,      ///< 14: SIMD 'packed' format, or other vector type
+    IntegerTyID,     ///<  9: Arbitrary bit width integers
+    FunctionTyID,    ///< 10: Functions
+    StructTyID,      ///< 11: Structures
+    ArrayTyID,       ///< 12: Arrays
+    PointerTyID,     ///< 13: Pointers
+    OpaqueTyID,      ///< 14: Opaque: type with unknown structure
+    VectorTyID,      ///< 15: SIMD 'packed' format, or other vector type
 
     NumTypeIDs,                         // Must remain as last defined ID
-    LastPrimitiveTyID = MetadataTyID,
+    LastPrimitiveTyID = X86_MMXTyID,
     FirstDerivedTyID = IntegerTyID
   };
 
@@ -212,6 +212,9 @@ public:
   bool isFloatingPointTy() const { return ID == FloatTyID || ID == DoubleTyID ||
       ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; }
 
+  /// isX86_MMXTy - Return true if this is X86 MMX.
+  bool isX86_MMXTy() const { return ID == X86_MMXTyID; }
+
   /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
   ///
   bool isFPOrFPVectorTy() const;
@@ -310,7 +313,8 @@ public:
   ///
   bool isSized() const {
     // If it's a primitive, it is always sized.
-    if (ID == IntegerTyID || isFloatingPointTy() || ID == PointerTyID)
+    if (ID == IntegerTyID || isFloatingPointTy() || ID == PointerTyID ||
+        ID == X86_MMXTyID)
       return true;
     // If it is not something that can have a size (e.g. a function or label),
     // it doesn't have a size.
@@ -400,6 +404,7 @@ public:
   static const Type *getX86_FP80Ty(LLVMContext &C);
   static const Type *getFP128Ty(LLVMContext &C);
   static const Type *getPPC_FP128Ty(LLVMContext &C);
+  static const Type *getX86_MMXTy(LLVMContext &C);
   static const IntegerType *getIntNTy(LLVMContext &C, unsigned N);
   static const IntegerType *getInt1Ty(LLVMContext &C);
   static const IntegerType *getInt8Ty(LLVMContext &C);
@@ -416,6 +421,7 @@ public:
   static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
   static const PointerType *getIntNPtrTy(LLVMContext &C, unsigned N,
                                          unsigned AS = 0);
   static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h
index 26b1dbf2df41..9fdcb983232c 100644
--- a/include/llvm/TypeSymbolTable.h
+++ b/include/llvm/TypeSymbolTable.h
@@ -16,12 +16,11 @@
 
 #include "llvm/Type.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 #include <map>
 
 namespace llvm {
 
-class StringRef;
-
 /// This class provides a symbol table of name/type pairs with operations to
 /// support constructing, searching and iterating over the symbol table. The
 /// class derives from AbstractTypeUser so that the contents of the symbol
diff --git a/include/llvm/Use.h b/include/llvm/Use.h
index e1ebc6a51be5..ccbdd7fcae13 100644
--- a/include/llvm/Use.h
+++ b/include/llvm/Use.h
@@ -25,7 +25,6 @@
 #ifndef LLVM_USE_H
 #define LLVM_USE_H
 
-#include "llvm/Support/Casting.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include <cstddef>
 #include <iterator>
@@ -35,9 +34,8 @@ namespace llvm {
 class Value;
 class User;
 class Use;
-
-/// Tag - generic tag type for (at least 32 bit) pointers
-enum Tag { noTag, tagOne, tagTwo, tagThree };
+template<typename>
+struct simplify_type;
 
 // Use** is only 4-byte aligned.
 template<>
@@ -67,17 +65,19 @@ private:
   Use(const Use &U);
 
   /// Destructor - Only for zap()
-  inline ~Use() {
+  ~Use() {
     if (Val) removeFromList();
   }
 
-  /// Default ctor - This leaves the Use completely uninitialized.  The only
-  /// thing that is valid to do with this use is to call the "init" method.
-  inline Use() {}
-  enum PrevPtrTag { zeroDigitTag = noTag
-                  , oneDigitTag = tagOne
-                  , stopTag = tagTwo
-                  , fullStopTag = tagThree };
+  enum PrevPtrTag { zeroDigitTag
+                  , oneDigitTag
+                  , stopTag
+                  , fullStopTag };
+
+  /// Constructor
+  Use(PrevPtrTag tag) : Val(0) {
+    Prev.setInt(tag);
+  }
 
 public:
   /// Normally Use will just implicitly convert to a Value* that it holds.
@@ -112,11 +112,9 @@ public:
   /// a User changes.
   static void zap(Use *Start, const Use *Stop, bool del = false);
 
-  /// getPrefix - Return deletable pointer if appropriate
-  Use *getPrefix();
 private:
   const Use* getImpliedUser() const;
-  static Use *initTags(Use *Start, Use *Stop, ptrdiff_t Done = 0);
+  static Use *initTags(Use *Start, Use *Stop);
   
   Value *Val;
   Use *Next;
@@ -210,6 +208,15 @@ public:
   unsigned getOperandNo() const;
 };
 
+//===----------------------------------------------------------------------===//
+//                         AugmentedUse layout struct
+//===----------------------------------------------------------------------===//
+
+struct AugmentedUse : public Use {
+  PointerIntPair<User*, 1, unsigned> ref;
+  AugmentedUse(); // not implemented
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/User.h b/include/llvm/User.h
index f8277952ee4b..1363495f7c07 100644
--- a/include/llvm/User.h
+++ b/include/llvm/User.h
@@ -29,20 +29,6 @@ namespace llvm {
 template <class>
 struct OperandTraits;
 
-class User;
-
-/// OperandTraits<User> - specialization to User
-template <>
-struct OperandTraits<User> {
-  static inline Use *op_begin(User*);
-  static inline Use *op_end(User*);
-  static inline unsigned operands(const User*);
-  template <class U>
-  struct Layout {
-    typedef U overlay;
-  };
-};
-
 class User : public Value {
   User(const User &);             // Do not implement
   void *operator new(size_t);     // Do not implement
@@ -61,21 +47,18 @@ protected:
   unsigned NumOperands;
 
   void *operator new(size_t s, unsigned Us);
-  void *operator new(size_t s, unsigned Us, bool Prefix);
   User(const Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
     : Value(ty, vty), OperandList(OpList), NumOperands(NumOps) {}
   Use *allocHungoffUses(unsigned) const;
-  void dropHungoffUses(Use *U) {
-    if (OperandList == U) {
-      OperandList = 0;
-      NumOperands = 0;
-    }
-    Use::zap(U, U->getImpliedUser(), true);
+  void dropHungoffUses() {
+    Use::zap(OperandList, OperandList + NumOperands, true);
+    OperandList = 0;
+    // Reset NumOperands so User::operator delete() does the right thing.
+    NumOperands = 0;
   }
 public:
   ~User() {
-    if ((intptr_t(OperandList) & 1) == 0)
-      Use::zap(OperandList, OperandList + NumOperands);
+    Use::zap(OperandList, OperandList + NumOperands);
   }
   /// operator delete - free memory allocated for User and Use objects
   void operator delete(void *Usr);
@@ -158,18 +141,6 @@ public:
   }
 };
 
-inline Use *OperandTraits<User>::op_begin(User *U) {
-  return U->op_begin();
-}
-
-inline Use *OperandTraits<User>::op_end(User *U) {
-  return U->op_end();
-}
-
-inline unsigned OperandTraits<User>::operands(const User *U) {
-  return U->getNumOperands();
-}
-
 template<> struct simplify_type<User::op_iterator> {
   typedef Value* SimpleType;
 
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index 8740f353ab51..130e2735f525 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -252,6 +252,12 @@ public:
     return SubclassOptionalData;
   }
 
+  /// clearSubclassOptionalData - Clear the optional flags contained in
+  /// this value.
+  void clearSubclassOptionalData() {
+    SubclassOptionalData = 0;
+  }
+
   /// hasSameSubclassOptionalData - Test whether the optional flags contained
   /// in this value are equal to the optional flags in the given value.
   bool hasSameSubclassOptionalData(const Value *V) const {
@@ -285,15 +291,9 @@ public:
     return const_cast<Value*>(this)->stripPointerCasts();
   }
 
-  /// getUnderlyingObject - This method strips off any GEP address adjustments
-  /// and pointer casts from the specified value, returning the original object
-  /// being addressed.  Note that the returned value has pointer type if the
-  /// specified value does.  If the MaxLookup value is non-zero, it limits the
-  /// number of instructions to be stripped off.
-  Value *getUnderlyingObject(unsigned MaxLookup = 6);
-  const Value *getUnderlyingObject(unsigned MaxLookup = 6) const {
-    return const_cast<Value*>(this)->getUnderlyingObject(MaxLookup);
-  }
+  /// isDereferenceablePointer - Test if this value is always a pointer to
+  /// allocated and suitably aligned memory for a simple load or store.
+  bool isDereferenceablePointer() const;
   
   /// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
   /// return the value in the PHI node corresponding to PredBB.  If not, return
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index 35fc97b2d3ce..1738cc4a7a79 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -16,7 +16,7 @@
 
 #include "llvm/Value.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   template<typename ValueSubClass, typename ItemParentClass>
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 1f2528fa560f..be02ddbaa534 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -30,12 +30,13 @@
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 // Register the AliasAnalysis interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<AliasAnalysis> Z("Alias Analysis");
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
 char AliasAnalysis::ID = 0;
 
 //===----------------------------------------------------------------------===//
@@ -43,15 +44,15 @@ char AliasAnalysis::ID = 0;
 //===----------------------------------------------------------------------===//
 
 AliasAnalysis::AliasResult
-AliasAnalysis::alias(const Value *V1, unsigned V1Size,
-                     const Value *V2, unsigned V2Size) {
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->alias(V1, V1Size, V2, V2Size);
+  return AA->alias(LocA, LocB);
 }
 
-bool AliasAnalysis::pointsToConstantMemory(const Value *P) {
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                           bool OrLocal) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->pointsToConstantMemory(P);
+  return AA->pointsToConstantMemory(Loc, OrLocal);
 }
 
 void AliasAnalysis::deleteValue(Value *V) {
@@ -64,49 +65,55 @@ void AliasAnalysis::copyValue(Value *From, Value *To) {
   AA->copyValue(From, To);
 }
 
+void AliasAnalysis::addEscapingUse(Use &U) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->addEscapingUse(U);
+}
+
+
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                             const Value *P, unsigned Size) {
-  // Don't assert AA because BasicAA calls us in order to make use of the
-  // logic here.
+                             const Location &Loc) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
 
   ModRefBehavior MRB = getModRefBehavior(CS);
   if (MRB == DoesNotAccessMemory)
     return NoModRef;
 
   ModRefResult Mask = ModRef;
-  if (MRB == OnlyReadsMemory)
+  if (onlyReadsMemory(MRB))
     Mask = Ref;
-  else if (MRB == AliasAnalysis::AccessesArguments) {
+
+  if (onlyAccessesArgPointees(MRB)) {
     bool doesAlias = false;
-    for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-         AI != AE; ++AI)
-      if (!isNoAlias(*AI, ~0U, P, Size)) {
-        doesAlias = true;
-        break;
-      }
+    if (doesAccessArgPointees(MRB))
+      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI)
+        if (!isNoAlias(Location(*AI), Loc)) {
+          doesAlias = true;
+          break;
+        }
 
     if (!doesAlias)
       return NoModRef;
   }
 
-  // If P points to a constant memory location, the call definitely could not
+  // If Loc is a constant memory location, the call definitely could not
   // modify the memory location.
-  if ((Mask & Mod) && pointsToConstantMemory(P))
+  if ((Mask & Mod) && pointsToConstantMemory(Loc))
     Mask = ModRefResult(Mask & ~Mod);
 
-  // If this is BasicAA, don't forward.
+  // If this is the end of the chain, don't forward.
   if (!AA) return Mask;
 
   // Otherwise, fall back to the next AA in the chain. But we can merge
   // in any mask we've managed to compute.
-  return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask);
+  return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
 }
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
-  // Don't assert AA because BasicAA calls us in order to make use of the
-  // logic here.
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
 
   // If CS1 or CS2 are readnone, they don't interact.
   ModRefBehavior CS1B = getModRefBehavior(CS1);
@@ -116,45 +123,47 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
   if (CS2B == DoesNotAccessMemory) return NoModRef;
 
   // If they both only read from memory, there is no dependence.
-  if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
+  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
     return NoModRef;
 
   AliasAnalysis::ModRefResult Mask = ModRef;
 
   // If CS1 only reads memory, the only dependence on CS2 can be
   // from CS1 reading memory written by CS2.
-  if (CS1B == OnlyReadsMemory)
+  if (onlyReadsMemory(CS1B))
     Mask = ModRefResult(Mask & Ref);
 
   // If CS2 only access memory through arguments, accumulate the mod/ref
   // information from CS1's references to the memory referenced by
   // CS2's arguments.
-  if (CS2B == AccessesArguments) {
+  if (onlyAccessesArgPointees(CS2B)) {
     AliasAnalysis::ModRefResult R = NoModRef;
-    for (ImmutableCallSite::arg_iterator
-         I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
-      R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
-      if (R == Mask)
-        break;
-    }
+    if (doesAccessArgPointees(CS2B))
+      for (ImmutableCallSite::arg_iterator
+           I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+        R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+        if (R == Mask)
+          break;
+      }
     return R;
   }
 
   // If CS1 only accesses memory through arguments, check if CS2 references
   // any of the memory referenced by CS1's arguments. If not, return NoModRef.
-  if (CS1B == AccessesArguments) {
+  if (onlyAccessesArgPointees(CS1B)) {
     AliasAnalysis::ModRefResult R = NoModRef;
-    for (ImmutableCallSite::arg_iterator
-         I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
-      if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
-        R = Mask;
-        break;
-      }
+    if (doesAccessArgPointees(CS1B))
+      for (ImmutableCallSite::arg_iterator
+           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
+        if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+          R = Mask;
+          break;
+        }
     if (R == NoModRef)
       return R;
   }
 
-  // If this is BasicAA, don't forward.
+  // If this is the end of the chain, don't forward.
   if (!AA) return Mask;
 
   // Otherwise, fall back to the next AA in the chain. But we can merge
@@ -164,8 +173,7 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
 
 AliasAnalysis::ModRefBehavior
 AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
-  // Don't assert AA because BasicAA calls us in order to make use of the
-  // logic here.
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
 
   ModRefBehavior Min = UnknownModRefBehavior;
 
@@ -174,12 +182,12 @@ AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
   if (const Function *F = CS.getCalledFunction())
     Min = getModRefBehavior(F);
 
-  // If this is BasicAA, don't forward.
+  // If this is the end of the chain, don't forward.
   if (!AA) return Min;
 
   // Otherwise, fall back to the next AA in the chain. But we can merge
   // in any result we've managed to compute.
-  return std::min(AA->getModRefBehavior(CS), Min);
+  return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
 }
 
 AliasAnalysis::ModRefBehavior
@@ -188,20 +196,66 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
   return AA->getModRefBehavior(F);
 }
 
-
 //===----------------------------------------------------------------------===//
 // AliasAnalysis non-virtual helper method implementation
 //===----------------------------------------------------------------------===//
 
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+  return Location(LI->getPointerOperand(),
+                  getTypeStoreSize(LI->getType()),
+                  LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+  return Location(SI->getPointerOperand(),
+                  getTypeStoreSize(SI->getValueOperand()->getType()),
+                  SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+  return Location(VI->getPointerOperand(),
+                  UnknownSize,
+                  VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+
+AliasAnalysis::Location 
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have TBAA tags. For memcpy, they apply
+  // to both the source and the destination.
+  MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+  return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location 
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have TBAA tags. For memcpy, they apply
+  // to both the source and the destination.
+  MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+  
+  return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) {
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
   // Be conservative in the face of volatile.
   if (L->isVolatile())
     return ModRef;
 
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
-  if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size))
+  if (!alias(getLocation(L), Loc))
     return NoModRef;
 
   // Otherwise, a load just reads.
@@ -209,20 +263,19 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) {
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) {
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
   // Be conservative in the face of volatile.
   if (S->isVolatile())
     return ModRef;
 
   // If the store address cannot alias the pointer in question, then the
   // specified memory cannot be modified by the store.
-  if (!alias(S->getOperand(1),
-             getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
+  if (!alias(getLocation(S), Loc))
     return NoModRef;
 
   // If the pointer is a pointer to constant memory, then it could not have been
   // modified by this store.
-  if (pointsToConstantMemory(P))
+  if (pointsToConstantMemory(Loc))
     return NoModRef;
 
   // Otherwise, a store just writes.
@@ -230,29 +283,21 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size)
 }
 
 AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) {
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
   // If the va_arg address cannot alias the pointer in question, then the
   // specified memory cannot be accessed by the va_arg.
-  if (!alias(V->getOperand(0), UnknownSize, P, Size))
+  if (!alias(getLocation(V), Loc))
     return NoModRef;
 
   // If the pointer is a pointer to constant memory, then it could not have been
   // modified by this va_arg.
-  if (pointsToConstantMemory(P))
+  if (pointsToConstantMemory(Loc))
     return NoModRef;
 
   // Otherwise, a va_arg reads and writes.
   return ModRef;
 }
 
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
-}
-
 // AliasAnalysis destructor: DO NOT move this to the header file for
 // AliasAnalysis or else clients of the AliasAnalysis class may not depend on
 // the AliasAnalysis.o file in the current .a file, causing alias analysis
@@ -277,16 +322,16 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 /// getTypeStoreSize - Return the TargetData store size for the given type,
 /// if known, or a conservative value otherwise.
 ///
-unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) {
-  return TD ? TD->getTypeStoreSize(Ty) : ~0u;
+uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+  return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
 }
 
 /// canBasicBlockModify - Return true if it is possible for execution of the
 /// specified basic block to modify the value pointed to by Ptr.
 ///
 bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
-                                        const Value *Ptr, unsigned Size) {
-  return canInstructionRangeModify(BB.front(), BB.back(), Ptr, Size);
+                                        const Location &Loc) {
+  return canInstructionRangeModify(BB.front(), BB.back(), Loc);
 }
 
 /// canInstructionRangeModify - Return true if it is possible for the execution
@@ -296,7 +341,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
 ///
 bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
                                               const Instruction &I2,
-                                              const Value *Ptr, unsigned Size) {
+                                              const Location &Loc) {
   assert(I1.getParent() == I2.getParent() &&
          "Instructions not in same basic block!");
   BasicBlock::const_iterator I = &I1;
@@ -304,7 +349,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
   ++E;  // Convert from inclusive to exclusive range.
 
   for (; I != E; ++I) // Check every instruction in range
-    if (getModRefInfo(I, Ptr, Size) & Mod)
+    if (getModRefInfo(I, Loc) & Mod)
       return true;
   return false;
 }
@@ -336,9 +381,3 @@ bool llvm::isIdentifiedObject(const Value *V) {
     return A->hasNoAliasAttr() || A->hasByValAttr();
   return false;
 }
-
-// Because of the way .a files work, we must force the BasicAA implementation to
-// be pulled in if the AliasAnalysis classes are pulled in.  Otherwise we run
-// the risk of AliasAnalysis being used, but the default implementation not
-// being linked into the tool that uses it.
-DEFINING_FILE_FOR(AliasAnalysis)
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index b17804186a63..d947220e078d 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -29,13 +29,14 @@ PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
 
 namespace {
   class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
-    unsigned No, May, Must;
+    unsigned No, May, Partial, Must;
     unsigned NoMR, JustRef, JustMod, MR;
     Module *M;
   public:
     static char ID; // Class identification, replacement for typeinfo
     AliasAnalysisCounter() : ModulePass(ID) {
-      No = May = Must = 0;
+      initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+      No = May = Partial = Must = 0;
       NoMR = JustRef = JustMod = MR = 0;
     }
 
@@ -44,7 +45,7 @@ namespace {
              << Val*100/Sum << "%)\n";
     }
     ~AliasAnalysisCounter() {
-      unsigned AASum = No+May+Must;
+      unsigned AASum = No+May+Partial+Must;
       unsigned MRSum = NoMR+JustRef+JustMod+MR;
       if (AASum + MRSum) { // Print a report if any counted queries occurred...
         errs() << "\n===== Alias Analysis Counter Report =====\n"
@@ -53,9 +54,12 @@ namespace {
         if (AASum) {
           printLine("no alias",     No, AASum);
           printLine("may alias",   May, AASum);
+          printLine("partial alias", Partial, AASum);
           printLine("must alias", Must, AASum);
           errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
-                 << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
+                 << May*100/AASum << "%/"
+                 << Partial*100/AASum << "%/"
+                 << Must*100/AASum<<"%\n\n";
         }
 
         errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
@@ -94,17 +98,16 @@ namespace {
     }
     
     // FIXME: We could count these too...
-    bool pointsToConstantMemory(const Value *P) {
-      return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P);
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+      return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
     }
 
     // Forwarding functions: just delegate to a real AA implementation, counting
     // the number of responses...
-    AliasResult alias(const Value *V1, unsigned V1Size,
-                      const Value *V2, unsigned V2Size);
+    AliasResult alias(const Location &LocA, const Location &LocB);
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Value *P, unsigned Size);
+                               const Location &Loc);
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
@@ -114,32 +117,32 @@ namespace {
 
 char AliasAnalysisCounter::ID = 0;
 INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
-                   "Count Alias Analysis Query Responses", false, true, false);
+                   "Count Alias Analysis Query Responses", false, true, false)
 
 ModulePass *llvm::createAliasAnalysisCounterPass() {
   return new AliasAnalysisCounter();
 }
 
 AliasAnalysis::AliasResult
-AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
-                            const Value *V2, unsigned V2Size) {
-  AliasResult R = getAnalysis<AliasAnalysis>().alias(V1, V1Size, V2, V2Size);
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+  AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
 
   const char *AliasString;
   switch (R) {
   default: llvm_unreachable("Unknown alias type!");
   case NoAlias:   No++;   AliasString = "No alias"; break;
   case MayAlias:  May++;  AliasString = "May alias"; break;
+  case PartialAlias: Partial++; AliasString = "Partial alias"; break;
   case MustAlias: Must++; AliasString = "Must alias"; break;
   }
 
   if (PrintAll || (PrintAllFailures && R == MayAlias)) {
     errs() << AliasString << ":\t";
-    errs() << "[" << V1Size << "B] ";
-    WriteAsOperand(errs(), V1, true, M);
+    errs() << "[" << LocA.Size << "B] ";
+    WriteAsOperand(errs(), LocA.Ptr, true, M);
     errs() << ", ";
-    errs() << "[" << V2Size << "B] ";
-    WriteAsOperand(errs(), V2, true, M);
+    errs() << "[" << LocB.Size << "B] ";
+    WriteAsOperand(errs(), LocB.Ptr, true, M);
     errs() << "\n";
   }
 
@@ -148,8 +151,8 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
 
 AliasAnalysis::ModRefResult
 AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
-                                    const Value *P, unsigned Size) {
-  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size);
+                                    const Location &Loc) {
+  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
 
   const char *MRString;
   switch (R) {
@@ -162,8 +165,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
 
   if (PrintAll || (PrintAllFailures && R == ModRef)) {
     errs() << MRString << ":  Ptr: ";
-    errs() << "[" << Size << "B] ";
-    WriteAsOperand(errs(), P, true, M);
+    errs() << "[" << Loc.Size << "B] ";
+    WriteAsOperand(errs(), Loc.Ptr, true, M);
     errs() << "\t<->" << *CS.getInstruction() << '\n';
   }
   return R;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index ce363cbc7bbd..1afc1b71d93e 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -36,6 +36,7 @@ static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
 
 static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
 static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
 static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
 
 static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
@@ -45,12 +46,14 @@ static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
 
 namespace {
   class AAEval : public FunctionPass {
-    unsigned NoAlias, MayAlias, MustAlias;
+    unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
     unsigned NoModRef, Mod, Ref, ModRef;
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    AAEval() : FunctionPass(ID) {}
+    AAEval() : FunctionPass(ID) {
+      initializeAAEvalPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<AliasAnalysis>();
@@ -58,11 +61,12 @@ namespace {
     }
 
     bool doInitialization(Module &M) {
-      NoAlias = MayAlias = MustAlias = 0;
+      NoAlias = MayAlias = PartialAlias = MustAlias = 0;
       NoModRef = Mod = Ref = ModRef = 0;
 
       if (PrintAll) {
-        PrintNoAlias = PrintMayAlias = PrintMustAlias = true;
+        PrintNoAlias = PrintMayAlias = true;
+        PrintPartialAlias = PrintMustAlias = true;
         PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
       }
       return false;
@@ -74,8 +78,11 @@ namespace {
 }
 
 char AAEval::ID = 0;
-INITIALIZE_PASS(AAEval, "aa-eval",
-                "Exhaustive Alias Analysis Precision Evaluator", false, true);
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
 
 FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
 
@@ -155,7 +162,7 @@ bool AAEval::runOnFunction(Function &F) {
     }
   }
 
-  if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
+  if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
       PrintNoModRef || PrintMod || PrintRef || PrintModRef)
     errs() << "Function: " << F.getName() << ": " << Pointers.size()
            << " pointers, " << CallSites.size() << " call sites\n";
@@ -163,12 +170,12 @@ bool AAEval::runOnFunction(Function &F) {
   // iterate over the worklist, and run the full (n^2)/2 disambiguations
   for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
-    unsigned I1Size = ~0u;
+    uint64_t I1Size = AliasAnalysis::UnknownSize;
     const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
     if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
 
     for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
-      unsigned I2Size = ~0u;
+      uint64_t I2Size = AliasAnalysis::UnknownSize;
       const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
       if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
 
@@ -179,6 +186,10 @@ bool AAEval::runOnFunction(Function &F) {
       case AliasAnalysis::MayAlias:
         PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
         ++MayAlias; break;
+      case AliasAnalysis::PartialAlias:
+        PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                     F.getParent());
+        ++PartialAlias; break;
       case AliasAnalysis::MustAlias:
         PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
         ++MustAlias; break;
@@ -195,7 +206,7 @@ bool AAEval::runOnFunction(Function &F) {
 
     for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
          V != Ve; ++V) {
-      unsigned Size = ~0u;
+      uint64_t Size = AliasAnalysis::UnknownSize;
       const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
       if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
 
@@ -250,7 +261,7 @@ static void PrintPercent(unsigned Num, unsigned Sum) {
 }
 
 bool AAEval::doFinalization(Module &M) {
-  unsigned AliasSum = NoAlias + MayAlias + MustAlias;
+  unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
   errs() << "===== Alias Analysis Evaluator Report =====\n";
   if (AliasSum == 0) {
     errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
@@ -260,10 +271,13 @@ bool AAEval::doFinalization(Module &M) {
     PrintPercent(NoAlias, AliasSum);
     errs() << "  " << MayAlias << " may alias responses ";
     PrintPercent(MayAlias, AliasSum);
+    errs() << "  " << PartialAlias << " partial alias responses ";
+    PrintPercent(PartialAlias, AliasSum);
     errs() << "  " << MustAlias << " must alias responses ";
     PrintPercent(MustAlias, AliasSum);
     errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
            << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
+           << PartialAlias*100/AliasSum << "%/"
            << MustAlias*100/AliasSum << "%\n";
   }
 
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index b9fe64608c01..f15c05153e10 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -39,7 +39,9 @@ namespace {
     
   public:
     static char ID; // Class identification, replacement for typeinfo
-    AliasDebugger() : ModulePass(ID) {}
+    AliasDebugger() : ModulePass(ID) {
+      initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M) {
       InitializeAliasAnalysis(this);                 // set up super class
@@ -92,17 +94,18 @@ namespace {
     //------------------------------------------------
     // Implement the AliasAnalysis API
     //
-    AliasResult alias(const Value *V1, unsigned V1Size,
-                      const Value *V2, unsigned V2Size) {
-      assert(Vals.find(V1) != Vals.end() && "Never seen value in AA before");
-      assert(Vals.find(V2) != Vals.end() && "Never seen value in AA before");    
-      return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+    AliasResult alias(const Location &LocA, const Location &LocB) {
+      assert(Vals.find(LocA.Ptr) != Vals.end() &&
+             "Never seen value in AA before");
+      assert(Vals.find(LocB.Ptr) != Vals.end() &&
+             "Never seen value in AA before");
+      return AliasAnalysis::alias(LocA, LocB);
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Value *P, unsigned Size) {
-      assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
-      return AliasAnalysis::getModRefInfo(CS, P, Size);
+                               const Location &Loc) {
+      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+      return AliasAnalysis::getModRefInfo(CS, Loc);
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
@@ -110,9 +113,9 @@ namespace {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
     
-    bool pointsToConstantMemory(const Value *P) {
-      assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
-      return AliasAnalysis::pointsToConstantMemory(P);
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
     }
 
     virtual void deleteValue(Value *V) {
@@ -129,7 +132,7 @@ namespace {
 
 char AliasDebugger::ID = 0;
 INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
-                   "AA use debugger", false, true, false);
+                   "AA use debugger", false, true, false)
 
 Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
 
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index e74543bb508a..3a46976d66f7 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
 #include "llvm/Target/TargetData.h"
@@ -45,7 +46,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
     PointerRec *R = AS.getSomePointer();
 
     // If the pointers are not a must-alias pair, this set becomes a may alias.
-    if (AA.alias(L->getValue(), L->getSize(), R->getValue(), R->getSize())
+    if (AA.alias(AliasAnalysis::Location(L->getValue(),
+                                         L->getSize(),
+                                         L->getTBAAInfo()),
+                 AliasAnalysis::Location(R->getValue(),
+                                         R->getSize(),
+                                         R->getTBAAInfo()))
         != AliasAnalysis::MustAlias)
       AliasTy = MayAlias;
   }
@@ -87,7 +93,8 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
 }
 
 void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
-                          unsigned Size, bool KnownMustAlias) {
+                          uint64_t Size, const MDNode *TBAAInfo,
+                          bool KnownMustAlias) {
   assert(!Entry.hasAliasSet() && "Entry already in set!");
 
   // Check to see if we have to downgrade to _may_ alias.
@@ -95,16 +102,18 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
     if (PointerRec *P = getSomePointer()) {
       AliasAnalysis &AA = AST.getAliasAnalysis();
       AliasAnalysis::AliasResult Result =
-        AA.alias(P->getValue(), P->getSize(), Entry.getValue(), Size);
-      if (Result == AliasAnalysis::MayAlias)
+        AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+                                         P->getTBAAInfo()),
+                 AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+      if (Result != AliasAnalysis::MustAlias)
         AliasTy = MayAlias;
       else                  // First entry of must alias must have maximum size!
-        P->updateSize(Size);
+        P->updateSizeAndTBAAInfo(Size, TBAAInfo);
       assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
     }
 
   Entry.setAliasSet(this);
-  Entry.updateSize(Size);
+  Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
 
   // Add it to the end of the list...
   assert(*PtrListEnd == 0 && "End of list is not null?");
@@ -120,7 +129,7 @@ void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
   AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
   if (Behavior == AliasAnalysis::DoesNotAccessMemory)
     return;
-  else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
     AliasTy = MayAlias;
     AccessTy |= Refs;
     return;
@@ -134,7 +143,8 @@ void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
 /// aliasesPointer - Return true if the specified pointer "may" (or must)
 /// alias one of the members in the set.
 ///
-bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+                              const MDNode *TBAAInfo,
                               AliasAnalysis &AA) const {
   if (AliasTy == MustAlias) {
     assert(CallSites.empty() && "Illegal must alias set!");
@@ -143,19 +153,26 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
     // SOME value in the set.
     PointerRec *SomePtr = getSomePointer();
     assert(SomePtr && "Empty must-alias set??");
-    return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size);
+    return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+                                            SomePtr->getSize(),
+                                            SomePtr->getTBAAInfo()),
+                    AliasAnalysis::Location(Ptr, Size, TBAAInfo));
   }
 
   // If this is a may-alias set, we have to check all of the pointers in the set
   // to be sure it doesn't alias the set...
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.alias(Ptr, Size, I.getPointer(), I.getSize()))
+    if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+                 AliasAnalysis::Location(I.getPointer(), I.getSize(),
+                                         I.getTBAAInfo())))
       return true;
 
   // Check the call sites list and invoke list...
   if (!CallSites.empty()) {
     for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
-      if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef)
+      if (AA.getModRefInfo(CallSites[i],
+                           AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+            AliasAnalysis::NoModRef)
         return true;
   }
 
@@ -198,10 +215,11 @@ void AliasSetTracker::clear() {
 /// that may alias the pointer, merge them together and return the unified set.
 ///
 AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
-                                                  unsigned Size) {
+                                                  uint64_t Size,
+                                                  const MDNode *TBAAInfo) {
   AliasSet *FoundSet = 0;
   for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue;
+    if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
     
     if (FoundSet == 0) {  // If this is the first alias set ptr can go into.
       FoundSet = I;       // Remember it.
@@ -216,9 +234,10 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
 /// containsPointer - Return true if the specified location is represented by
 /// this alias set, false otherwise.  This does not modify the AST object or
 /// alias sets.
-bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const {
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+                                      const MDNode *TBAAInfo) const {
   for (const_iterator I = begin(), E = end(); I != E; ++I)
-    if (!I->Forward && I->aliasesPointer(Ptr, Size, AA))
+    if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
       return true;
   return false;
 }
@@ -244,33 +263,34 @@ AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
 
 /// getAliasSetForPointer - Return the alias set that the specified pointer
 /// lives in.
-AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size,
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+                                                 const MDNode *TBAAInfo,
                                                  bool *New) {
   AliasSet::PointerRec &Entry = getEntryFor(Pointer);
 
   // Check to see if the pointer is already known.
   if (Entry.hasAliasSet()) {
-    Entry.updateSize(Size);
+    Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
     // Return the set!
     return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
   }
   
-  if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
+  if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
     // Add it to the alias set it aliases.
-    AS->addPointer(*this, Entry, Size);
+    AS->addPointer(*this, Entry, Size, TBAAInfo);
     return *AS;
   }
   
   if (New) *New = true;
   // Otherwise create a new alias set to hold the loaded pointer.
   AliasSets.push_back(new AliasSet());
-  AliasSets.back().addPointer(*this, Entry, Size);
+  AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
   return AliasSets.back();
 }
 
-bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
   bool NewPtr;
-  addPointer(Ptr, Size, AliasSet::NoModRef, NewPtr);
+  addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
   return NewPtr;
 }
 
@@ -279,6 +299,7 @@ bool AliasSetTracker::add(LoadInst *LI) {
   bool NewPtr;
   AliasSet &AS = addPointer(LI->getOperand(0),
                             AA.getTypeStoreSize(LI->getType()),
+                            LI->getMetadata(LLVMContext::MD_tbaa),
                             AliasSet::Refs, NewPtr);
   if (LI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -289,6 +310,7 @@ bool AliasSetTracker::add(StoreInst *SI) {
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
                             AA.getTypeStoreSize(Val->getType()),
+                            SI->getMetadata(LLVMContext::MD_tbaa),
                             AliasSet::Mods, NewPtr);
   if (SI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -296,7 +318,9 @@ bool AliasSetTracker::add(StoreInst *SI) {
 
 bool AliasSetTracker::add(VAArgInst *VAAI) {
   bool NewPtr;
-  addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr);
+  addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, 
+             VAAI->getMetadata(LLVMContext::MD_tbaa),
+             AliasSet::ModRef, NewPtr);
   return NewPtr;
 }
 
@@ -358,6 +382,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
     bool X;
     for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
       AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+                                   ASI.getTBAAInfo(),
                                    (AliasSet::AccessType)AS.AccessTy, X);
       if (AS.isVolatile()) NewAS.setVolatile();
     }
@@ -393,31 +418,36 @@ void AliasSetTracker::remove(AliasSet &AS) {
     AS.removeFromTracker(*this);
 }
 
-bool AliasSetTracker::remove(Value *Ptr, unsigned Size) {
-  AliasSet *AS = findAliasSetForPointer(Ptr, Size);
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+  AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
   if (!AS) return false;
   remove(*AS);
   return true;
 }
 
 bool AliasSetTracker::remove(LoadInst *LI) {
-  unsigned Size = AA.getTypeStoreSize(LI->getType());
-  AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size);
+  uint64_t Size = AA.getTypeStoreSize(LI->getType());
+  const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+  AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
   if (!AS) return false;
   remove(*AS);
   return true;
 }
 
 bool AliasSetTracker::remove(StoreInst *SI) {
-  unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
-  AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size);
+  uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+  const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+  AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
   if (!AS) return false;
   remove(*AS);
   return true;
 }
 
 bool AliasSetTracker::remove(VAArgInst *VAAI) {
-  AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0);
+  AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+                                        AliasAnalysis::UnknownSize,
+                                        VAAI->getMetadata(LLVMContext::MD_tbaa));
   if (!AS) return false;
   remove(*AS);
   return true;
@@ -507,7 +537,9 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
   // Add it to the alias set it aliases...
   I = PointerMap.find(From);
   AliasSet *AS = I->second->getAliasSet(*this);
-  AS->addPointer(*this, Entry, I->second->getSize(), true);
+  AS->addPointer(*this, Entry, I->second->getSize(),
+                 I->second->getTBAAInfo(),
+                 true);
 }
 
 
@@ -587,7 +619,9 @@ namespace {
     AliasSetTracker *Tracker;
   public:
     static char ID; // Pass identification, replacement for typeid
-    AliasSetPrinter() : FunctionPass(ID) {}
+    AliasSetPrinter() : FunctionPass(ID) {
+      initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -607,5 +641,8 @@ namespace {
 }
 
 char AliasSetPrinter::ID = 0;
-INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets",
-                "Alias Set Printer", false, true);
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 398dec7dd0a1..1af1c35f5392 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -8,22 +8,83 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Analysis.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Analysis/Verifier.h"
 #include <cstring>
 
 using namespace llvm;
 
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+  initializeAliasAnalysisAnalysisGroup(Registry);
+  initializeAliasAnalysisCounterPass(Registry);
+  initializeAAEvalPass(Registry);
+  initializeAliasDebuggerPass(Registry);
+  initializeAliasSetPrinterPass(Registry);
+  initializeNoAAPass(Registry);
+  initializeBasicAliasAnalysisPass(Registry);
+  initializeCFGViewerPass(Registry);
+  initializeCFGPrinterPass(Registry);
+  initializeCFGOnlyViewerPass(Registry);
+  initializeCFGOnlyPrinterPass(Registry);
+  initializePrintDbgInfoPass(Registry);
+  initializeDominanceFrontierPass(Registry);
+  initializeDomViewerPass(Registry);
+  initializeDomPrinterPass(Registry);
+  initializeDomOnlyViewerPass(Registry);
+  initializePostDomViewerPass(Registry);
+  initializeDomOnlyPrinterPass(Registry);
+  initializePostDomPrinterPass(Registry);
+  initializePostDomOnlyViewerPass(Registry);
+  initializePostDomOnlyPrinterPass(Registry);
+  initializeIVUsersPass(Registry);
+  initializeInstCountPass(Registry);
+  initializeIntervalPartitionPass(Registry);
+  initializeLazyValueInfoPass(Registry);
+  initializeLibCallAliasAnalysisPass(Registry);
+  initializeLintPass(Registry);
+  initializeLiveValuesPass(Registry);
+  initializeLoopDependenceAnalysisPass(Registry);
+  initializeLoopInfoPass(Registry);
+  initializeMemDepPrinterPass(Registry);
+  initializeMemoryDependenceAnalysisPass(Registry);
+  initializeModuleDebugInfoPrinterPass(Registry);
+  initializePostDominatorTreePass(Registry);
+  initializePostDominanceFrontierPass(Registry);
+  initializeProfileEstimatorPassPass(Registry);
+  initializeNoProfileInfoPass(Registry);
+  initializeNoPathProfileInfoPass(Registry);
+  initializeProfileInfoAnalysisGroup(Registry);
+  initializePathProfileInfoAnalysisGroup(Registry);
+  initializeLoaderPassPass(Registry);
+  initializePathProfileLoaderPassPass(Registry);
+  initializeProfileVerifierPassPass(Registry);
+  initializePathProfileVerifierPass(Registry);
+  initializeRegionInfoPass(Registry);
+  initializeRegionViewerPass(Registry);
+  initializeRegionPrinterPass(Registry);
+  initializeRegionOnlyViewerPass(Registry);
+  initializeRegionOnlyPrinterPass(Registry);
+  initializeScalarEvolutionPass(Registry);
+  initializeScalarEvolutionAliasAnalysisPass(Registry);
+  initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+  initializeAnalysis(*unwrap(R));
+}
+
 LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
                           char **OutMessages) {
   std::string Messages;
-  
+
   LLVMBool Result = verifyModule(*unwrap(M),
                             static_cast<VerifierFailureAction>(Action),
                             OutMessages? &Messages : 0);
-  
+
   if (OutMessages)
     *OutMessages = strdup(Messages.c_str());
-  
+
   return Result;
 }
 
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 113c72b94dac..f7bcd9ec44d8 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- BasicAliasAnalysis.cpp - Local Alias Analysis Impl -----------------===//
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the default implementation of the Alias Analysis interface
-// that simply implements a few identities (two different globals cannot alias,
-// etc), but otherwise does no analysis.
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,10 +22,12 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -95,104 +97,54 @@ static bool isEscapeSource(const Value *V) {
   return false;
 }
 
-/// isObjectSmallerThan - Return true if we can prove that the object specified
-/// by V is smaller than Size.
-static bool isObjectSmallerThan(const Value *V, unsigned Size,
-                                const TargetData &TD) {
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
   const Type *AccessTy;
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (!GV->hasDefinitiveInitializer())
+      return AliasAnalysis::UnknownSize;
     AccessTy = GV->getType()->getElementType();
   } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     if (!AI->isArrayAllocation())
       AccessTy = AI->getType()->getElementType();
     else
-      return false;
+      return AliasAnalysis::UnknownSize;
   } else if (const CallInst* CI = extractMallocCall(V)) {
     if (!isArrayMalloc(V, &TD))
       // The size is the argument to the malloc call.
       if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
-        return (C->getZExtValue() < Size);
-    return false;
+        return C->getZExtValue();
+    return AliasAnalysis::UnknownSize;
   } else if (const Argument *A = dyn_cast<Argument>(V)) {
     if (A->hasByValAttr())
       AccessTy = cast<PointerType>(A->getType())->getElementType();
     else
-      return false;
+      return AliasAnalysis::UnknownSize;
   } else {
-    return false;
+    return AliasAnalysis::UnknownSize;
   }
   
   if (AccessTy->isSized())
-    return TD.getTypeAllocSize(AccessTy) < Size;
-  return false;
+    return TD.getTypeAllocSize(AccessTy);
+  return AliasAnalysis::UnknownSize;
 }
 
-//===----------------------------------------------------------------------===//
-// NoAA Pass
-//===----------------------------------------------------------------------===//
-
-namespace {
-  /// NoAA - This class implements the -no-aa pass, which always returns "I
-  /// don't know" for alias queries.  NoAA is unlike other alias analysis
-  /// implementations, in that it does not chain to a previous analysis.  As
-  /// such it doesn't follow many of the rules that other alias analyses must.
-  ///
-  struct NoAA : public ImmutablePass, public AliasAnalysis {
-    static char ID; // Class identification, replacement for typeinfo
-    NoAA() : ImmutablePass(ID) {}
-    explicit NoAA(char &PID) : ImmutablePass(PID) { }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    }
-
-    virtual void initializePass() {
-      TD = getAnalysisIfAvailable<TargetData>();
-    }
-
-    virtual AliasResult alias(const Value *V1, unsigned V1Size,
-                              const Value *V2, unsigned V2Size) {
-      return MayAlias;
-    }
-
-    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
-      return UnknownModRefBehavior;
-    }
-    virtual ModRefBehavior getModRefBehavior(const Function *F) {
-      return UnknownModRefBehavior;
-    }
-
-    virtual bool pointsToConstantMemory(const Value *P) { return false; }
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Value *P, unsigned Size) {
-      return ModRef;
-    }
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                       ImmutableCallSite CS2) {
-      return ModRef;
-    }
-
-    virtual void deleteValue(Value *V) {}
-    virtual void copyValue(Value *From, Value *To) {}
-    
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const void *ID) {
-      if (ID == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char NoAA::ID = 0;
-INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
-                   "No Alias Analysis (always returns 'may' alias)",
-                   true, true, false);
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+                                const TargetData &TD) {
+  uint64_t ObjectSize = getObjectSize(V, TD);
+  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
 
-ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+                         const TargetData &TD) {
+  uint64_t ObjectSize = getObjectSize(V, TD);
+  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
 
 //===----------------------------------------------------------------------===//
 // GetElementPtr Instruction Decomposition and Analysis
@@ -272,14 +224,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
     Value *CastOp = cast<CastInst>(V)->getOperand(0);
     unsigned OldWidth = Scale.getBitWidth();
     unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
-    Scale.trunc(SmallWidth);
-    Offset.trunc(SmallWidth);
+    Scale = Scale.trunc(SmallWidth);
+    Offset = Offset.trunc(SmallWidth);
     Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
 
     Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
                                         TD, Depth+1);
-    Scale.zext(OldWidth);
-    Offset.zext(OldWidth);
+    Scale = Scale.zext(OldWidth);
+    Offset = Offset.zext(OldWidth);
     
     return Result;
   }
@@ -299,7 +251,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
 /// the gep cannot necessarily be reconstructed from its decomposed form.
 ///
 /// When TargetData is around, this function is capable of analyzing everything
-/// that Value::getUnderlyingObject() can look through.  When not, it just looks
+/// that GetUnderlyingObject can look through.  When not, it just looks
 /// through pointer casts.
 ///
 static const Value *
@@ -328,6 +280,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       V = Op->getOperand(0);
       continue;
     }
+
+    if (const Instruction *I = dyn_cast<Instruction>(V))
+      // TODO: Get a DominatorTree and use it here.
+      if (const Value *Simplified =
+            SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+        V = Simplified;
+        continue;
+      }
     
     const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
     if (GEPOp == 0)
@@ -386,8 +346,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-      BaseOffs += IndexOffset.getZExtValue()*Scale;
-      Scale *= IndexScale.getZExtValue();
+      BaseOffs += IndexOffset.getSExtValue()*Scale;
+      Scale *= IndexScale.getSExtValue();
       
       
       // If we already had an occurrance of this index variable, merge this
@@ -407,7 +367,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       // pointer size.
       if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
         Scale <<= ShiftBits;
-        Scale >>= ShiftBits;
+        Scale = (int64_t)Scale >> ShiftBits;
       }
       
       if (Scale) {
@@ -485,25 +445,34 @@ static bool notDifferentParent(const Value *O1, const Value *O2) {
 #endif
 
 namespace {
-  /// BasicAliasAnalysis - This is the default alias analysis implementation.
-  /// Because it doesn't chain to a previous alias analysis (like -no-aa), it
-  /// derives from the NoAA class.
-  struct BasicAliasAnalysis : public NoAA {
+  /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+  struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
     static char ID; // Class identification, replacement for typeinfo
-    BasicAliasAnalysis() : NoAA(ID) {}
+    BasicAliasAnalysis() : ImmutablePass(ID) {
+      initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+    }
 
-    virtual AliasResult alias(const Value *V1, unsigned V1Size,
-                              const Value *V2, unsigned V2Size) {
+    virtual AliasResult alias(const Location &LocA,
+                              const Location &LocB) {
       assert(Visited.empty() && "Visited must be cleared after use!");
-      assert(notDifferentParent(V1, V2) &&
+      assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
              "BasicAliasAnalysis doesn't support interprocedural queries.");
-      AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size);
+      AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+                                     LocB.Ptr, LocB.Size, LocB.TBAATag);
       Visited.clear();
       return Alias;
     }
 
     virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Value *P, unsigned Size);
+                                       const Location &Loc);
 
     virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                        ImmutableCallSite CS2) {
@@ -513,7 +482,7 @@ namespace {
 
     /// pointsToConstantMemory - Chase pointers until we find a (constant
     /// global) or not.
-    virtual bool pointsToConstantMemory(const Value *P);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
 
     /// getModRefBehavior - Return the behavior when calling the given
     /// call site.
@@ -539,46 +508,102 @@ namespace {
 
     // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
     // instruction against another.
-    AliasResult aliasGEP(const GEPOperator *V1, unsigned V1Size,
-                         const Value *V2, unsigned V2Size,
+    AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+                         const Value *V2, uint64_t V2Size,
+                         const MDNode *V2TBAAInfo,
                          const Value *UnderlyingV1, const Value *UnderlyingV2);
 
     // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
     // instruction against another.
-    AliasResult aliasPHI(const PHINode *PN, unsigned PNSize,
-                         const Value *V2, unsigned V2Size);
+    AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+                         const MDNode *PNTBAAInfo,
+                         const Value *V2, uint64_t V2Size,
+                         const MDNode *V2TBAAInfo);
 
     /// aliasSelect - Disambiguate a Select instruction against another value.
-    AliasResult aliasSelect(const SelectInst *SI, unsigned SISize,
-                            const Value *V2, unsigned V2Size);
-
-    AliasResult aliasCheck(const Value *V1, unsigned V1Size,
-                           const Value *V2, unsigned V2Size);
+    AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+                            const MDNode *SITBAAInfo,
+                            const Value *V2, uint64_t V2Size,
+                            const MDNode *V2TBAAInfo);
+
+    AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+                           const MDNode *V1TBAATag,
+                           const Value *V2, uint64_t V2Size,
+                           const MDNode *V2TBAATag);
   };
 }  // End of anonymous namespace
 
 // Register this pass...
 char BasicAliasAnalysis::ID = 0;
 INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
-                   "Basic Alias Analysis (default AA impl)",
-                   false, true, true);
+                   "Basic Alias Analysis (stateless AA impl)",
+                   false, true, false)
 
 ImmutablePass *llvm::createBasicAliasAnalysisPass() {
   return new BasicAliasAnalysis();
 }
 
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+  assert(Visited.empty() && "Visited must be cleared after use!");
+
+  unsigned MaxLookup = 8;
+  SmallVector<const Value *, 16> Worklist;
+  Worklist.push_back(Loc.Ptr);
+  do {
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+    if (!Visited.insert(V)) {
+      Visited.clear();
+      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    // An alloca instruction defines local memory.
+    if (OrLocal && isa<AllocaInst>(V))
+      continue;
+
+    // A global constant counts as local memory for our purposes.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+      // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+      // global to be marked constant in some modules and non-constant in
+      // others.  GV may even be a declaration, not a definition.
+      if (!GV->isConstant()) {
+        Visited.clear();
+        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      }
+      continue;
+    }
+
+    // If both select values point to local memory, then so does the select.
+    if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    // If all values incoming to a phi node point to local memory, then so does
+    // the phi.
+    if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+      // Don't bother inspecting phi nodes with many operands.
+      if (PN->getNumIncomingValues() > MaxLookup) {
+        Visited.clear();
+        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      }
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
 
-/// pointsToConstantMemory - Chase pointers until we find a (constant
-/// global) or not.
-bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
-  if (const GlobalVariable *GV = 
-        dyn_cast<GlobalVariable>(P->getUnderlyingObject()))
-    // Note: this doesn't require GV to be "ODR" because it isn't legal for a
-    // global to be marked constant in some modules and non-constant in others.
-    // GV may even be a declaration, not a definition.
-    return GV->isConstant();
+    // Otherwise be conservative.
+    Visited.clear();
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
 
-  return NoAA::pointsToConstantMemory(P);
+  } while (!Worklist.empty() && --MaxLookup);
+
+  Visited.clear();
+  return Worklist.empty();
 }
 
 /// getModRefBehavior - Return the behavior when calling the given call site.
@@ -596,22 +621,32 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
     Min = OnlyReadsMemory;
 
   // The AliasAnalysis base class has some smarts, lets use them.
-  return std::min(AliasAnalysis::getModRefBehavior(CS), Min);
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
 }
 
 /// getModRefBehavior - Return the behavior when calling the given function.
 /// For use when the call site is not known.
 AliasAnalysis::ModRefBehavior
 BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+  // If the function declares it doesn't access memory, we can't do better.
   if (F->doesNotAccessMemory())
-    // Can't do better than this.
     return DoesNotAccessMemory;
+
+  // For intrinsics, we can check the table.
+  if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+  }
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If the function declares it only reads memory, go with that.
   if (F->onlyReadsMemory())
-    return OnlyReadsMemory;
-  if (unsigned id = F->getIntrinsicID())
-    return getIntrinsicModRefBehavior(id);
+    Min = OnlyReadsMemory;
 
-  return NoAA::getModRefBehavior(F);
+  // Otherwise be conservative.
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
 }
 
 /// getModRefInfo - Check to see if the specified callsite can clobber the
@@ -620,13 +655,13 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
 /// simple "address taken" analysis on local objects.
 AliasAnalysis::ModRefResult
 BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                  const Value *P, unsigned Size) {
-  assert(notDifferentParent(CS.getInstruction(), P) &&
+                                  const Location &Loc) {
+  assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
-  const Value *Object = P->getUnderlyingObject();
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
   
-  // If this is a tail call and P points to a stack location, we know that
+  // If this is a tail call and Loc.Ptr points to a stack location, we know that
   // the tail call cannot access or modify the local stack.
   // We cannot exclude byval arguments here; these belong to the caller of
   // the current function not to the current function, and a tail callee
@@ -650,11 +685,11 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
           !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
         continue;
       
-      // If  this is a no-capture pointer argument, see if we can tell that it
+      // If this is a no-capture pointer argument, see if we can tell that it
       // is impossible to alias the pointer we're checking.  If not, we have to
       // assume that the call could touch the pointer, even though it doesn't
       // escape.
-      if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) {
+      if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
         PassedAsArg = true;
         break;
       }
@@ -664,6 +699,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       return NoModRef;
   }
 
+  ModRefResult Min = ModRef;
+
   // Finally, handle specific knowledge of intrinsics.
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
   if (II != 0)
@@ -671,15 +708,20 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     default: break;
     case Intrinsic::memcpy:
     case Intrinsic::memmove: {
-      unsigned Len = UnknownSize;
+      uint64_t Len = UnknownSize;
       if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
         Len = LenCI->getZExtValue();
       Value *Dest = II->getArgOperand(0);
       Value *Src = II->getArgOperand(1);
-      if (isNoAlias(Dest, Len, P, Size)) {
-        if (isNoAlias(Src, Len, P, Size))
+      // If it can't overlap the source dest, then it doesn't modref the loc.
+      if (isNoAlias(Location(Dest, Len), Loc)) {
+        if (isNoAlias(Location(Src, Len), Loc))
           return NoModRef;
-        return Ref;
+        // If it can't overlap the dest, then worst case it reads the loc.
+        Min = Ref;
+      } else if (isNoAlias(Location(Src, Len), Loc)) {
+        // If it can't overlap the source, then worst case it mutates the loc.
+        Min = Mod;
       }
       break;
     }
@@ -687,11 +729,13 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // Since memset is 'accesses arguments' only, the AliasAnalysis base class
       // will handle it for the variable length case.
       if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
-        unsigned Len = LenCI->getZExtValue();
+        uint64_t Len = LenCI->getZExtValue();
         Value *Dest = II->getArgOperand(0);
-        if (isNoAlias(Dest, Len, P, Size))
+        if (isNoAlias(Location(Dest, Len), Loc))
           return NoModRef;
       }
+      // We know that memset doesn't load anything.
+      Min = Mod;
       break;
     case Intrinsic::atomic_cmp_swap:
     case Intrinsic::atomic_swap:
@@ -707,42 +751,49 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     case Intrinsic::atomic_load_umin:
       if (TD) {
         Value *Op1 = II->getArgOperand(0);
-        unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
-        if (isNoAlias(Op1, Op1Size, P, Size))
+        uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
+        MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
+        if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
           return NoModRef;
       }
       break;
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start: {
-      unsigned PtrSize =
+      uint64_t PtrSize =
         cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
-      if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
+      if (isNoAlias(Location(II->getArgOperand(1),
+                             PtrSize,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
         return NoModRef;
       break;
     }
     case Intrinsic::invariant_end: {
-      unsigned PtrSize =
+      uint64_t PtrSize =
         cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
-      if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
+      if (isNoAlias(Location(II->getArgOperand(2),
+                             PtrSize,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
         return NoModRef;
       break;
     }
     }
 
   // The AliasAnalysis base class has some smarts, lets use them.
-  return AliasAnalysis::getModRefInfo(CS, P, Size);
+  return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
 }
 
-
 /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
 /// against another pointer.  We know that V1 is a GEP, but we don't know
-/// anything about V2.  UnderlyingV1 is GEP1->getUnderlyingObject(),
+/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
 /// UnderlyingV2 is the same for V2.
 ///
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
-                             const Value *V2, unsigned V2Size,
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+                             const Value *V2, uint64_t V2Size,
+                             const MDNode *V2TBAAInfo,
                              const Value *UnderlyingV1,
                              const Value *UnderlyingV2) {
   // If this GEP has been visited before, we're on a use-def cycle.
@@ -759,8 +810,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
   // out if the indexes to the GEP tell us anything about the derived pointer.
   if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
     // Do the base pointers alias?
-    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize,
-                                       UnderlyingV2, UnknownSize);
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                                       UnderlyingV2, UnknownSize, 0);
     
     // If we get a No or May, then return it immediately, no amount of analysis
     // will improve this situation.
@@ -782,7 +833,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
     // to handle without it.
     if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
       assert(TD == 0 &&
-             "DecomposeGEPExpression and getUnderlyingObject disagree!");
+             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
     
@@ -800,7 +851,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
     if (V1Size == UnknownSize && V2Size == UnknownSize)
       return MayAlias;
 
-    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size);
+    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                               V2, V2Size, V2TBAAInfo);
     if (R != MustAlias)
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
@@ -817,7 +869,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
     // to handle without it.
     if (GEP1BasePtr != UnderlyingV1) {
       assert(TD == 0 &&
-             "DecomposeGEPExpression and getUnderlyingObject disagree!");
+             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
   }
@@ -831,6 +883,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
   if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
     return MustAlias;
 
+  // If there is a difference betwen the pointers, but the difference is
+  // less than the size of the associated memory object, then we know
+  // that the objects are partially overlapping.
+  if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+    if (GEP1BaseOffset >= 0 ?
+        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
+        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
+         GEP1BaseOffset != INT64_MIN))
+      return PartialAlias;
+  }
+
   // If we have a known constant offset, see if this offset is larger than the
   // access size being queried.  If so, and if no variable indices can remove
   // pieces of this constant, then we know we have a no-alias.  For example,
@@ -850,8 +913,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
   // If our known offset is bigger than the access size, we know we don't have
   // an alias.
   if (GEP1BaseOffset) {
-    if (GEP1BaseOffset >= (int64_t)V2Size ||
-        GEP1BaseOffset <= -(int64_t)V1Size)
+    if (GEP1BaseOffset >= 0 ?
+        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
+        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
+         GEP1BaseOffset != INT64_MIN))
       return NoAlias;
   }
   
@@ -861,8 +926,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
 /// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
 /// instruction against another.
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
-                                const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+                                const MDNode *SITBAAInfo,
+                                const Value *V2, uint64_t V2Size,
+                                const MDNode *V2TBAAInfo) {
   // If this select has been visited before, we're on a use-def cycle.
   // Such cycles are only valid when PHI nodes are involved or in unreachable
   // code. The visitPHI function catches cycles containing PHIs, but there
@@ -875,13 +942,13 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
     if (SI->getCondition() == SI2->getCondition()) {
       AliasResult Alias =
-        aliasCheck(SI->getTrueValue(), SISize,
-                   SI2->getTrueValue(), V2Size);
+        aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+                   SI2->getTrueValue(), V2Size, V2TBAAInfo);
       if (Alias == MayAlias)
         return MayAlias;
       AliasResult ThisAlias =
-        aliasCheck(SI->getFalseValue(), SISize,
-                   SI2->getFalseValue(), V2Size);
+        aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+                   SI2->getFalseValue(), V2Size, V2TBAAInfo);
       if (ThisAlias != Alias)
         return MayAlias;
       return Alias;
@@ -890,7 +957,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
   AliasResult Alias =
-    aliasCheck(V2, V2Size, SI->getTrueValue(), SISize);
+    aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
   if (Alias == MayAlias)
     return MayAlias;
 
@@ -900,7 +967,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
   Visited.erase(V2);
 
   AliasResult ThisAlias =
-    aliasCheck(V2, V2Size, SI->getFalseValue(), SISize);
+    aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
   if (ThisAlias != Alias)
     return MayAlias;
   return Alias;
@@ -909,8 +976,10 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
 // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
 // against another.
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
-                             const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+                             const MDNode *PNTBAAInfo,
+                             const Value *V2, uint64_t V2Size,
+                             const MDNode *V2TBAAInfo) {
   // The PHI node has already been visited, avoid recursion any further.
   if (!Visited.insert(PN))
     return MayAlias;
@@ -921,16 +990,16 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
   if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
     if (PN2->getParent() == PN->getParent()) {
       AliasResult Alias =
-        aliasCheck(PN->getIncomingValue(0), PNSize,
+        aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
                    PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
-                   V2Size);
+                   V2Size, V2TBAAInfo);
       if (Alias == MayAlias)
         return MayAlias;
       for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
         AliasResult ThisAlias =
-          aliasCheck(PN->getIncomingValue(i), PNSize,
+          aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
                      PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
-                     V2Size);
+                     V2Size, V2TBAAInfo);
         if (ThisAlias != Alias)
           return MayAlias;
       }
@@ -951,7 +1020,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
       V1Srcs.push_back(PV1);
   }
 
-  AliasResult Alias = aliasCheck(V2, V2Size, V1Srcs[0], PNSize);
+  AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+                                 V1Srcs[0], PNSize, PNTBAAInfo);
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
   if (Alias == MayAlias)
@@ -967,7 +1037,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
     // don't need to assume that V2 is being visited recursively.
     Visited.erase(V2);
 
-    AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize);
+    AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+                                       V, PNSize, PNTBAAInfo);
     if (ThisAlias != Alias || ThisAlias == MayAlias)
       return MayAlias;
   }
@@ -979,8 +1050,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
 // such as array references.
 //
 AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
-                               const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+                               const MDNode *V1TBAAInfo,
+                               const Value *V2, uint64_t V2Size,
+                               const MDNode *V2TBAAInfo) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
   if (V1Size == 0 || V2Size == 0)
@@ -997,8 +1070,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
     return NoAlias;  // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = V1->getUnderlyingObject();
-  const Value *O2 = V2->getUnderlyingObject();
+  const Value *O1 = GetUnderlyingObject(V1, TD);
+  const Value *O2 = GetUnderlyingObject(V2, TD);
 
   // Null values in the default address space don't point to any object, so they
   // don't alias any other pointer.
@@ -1059,25 +1132,39 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
     std::swap(V1Size, V2Size);
     std::swap(O1, O2);
   }
-  if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1))
-    return aliasGEP(GV1, V1Size, V2, V2Size, O1, O2);
+  if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+    AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
+    if (Result != MayAlias) return Result;
+  }
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
     std::swap(V1, V2);
     std::swap(V1Size, V2Size);
   }
-  if (const PHINode *PN = dyn_cast<PHINode>(V1))
-    return aliasPHI(PN, V1Size, V2, V2Size);
+  if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+    AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+                                  V2, V2Size, V2TBAAInfo);
+    if (Result != MayAlias) return Result;
+  }
 
   if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
     std::swap(V1, V2);
     std::swap(V1Size, V2Size);
   }
-  if (const SelectInst *S1 = dyn_cast<SelectInst>(V1))
-    return aliasSelect(S1, V1Size, V2, V2Size);
+  if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+    AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+                                     V2, V2Size, V2TBAAInfo);
+    if (Result != MayAlias) return Result;
+  }
 
-  return NoAA::alias(V1, V1Size, V2, V2Size);
-}
+  // If both pointers are pointing into the same object and one of them
+  // accesses is accessing the entire object, then the accesses must
+  // overlap in some way.
+  if (TD && O1 == O2)
+    if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
+        (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
+      return PartialAlias;
 
-// Make sure that anything that uses AliasAnalysis pulls in this file.
-DEFINING_FILE_FOR(BasicAliasAnalysis)
+  return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+                              Location(V2, V2Size, V2TBAAInfo));
+}
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 617a362062fc..7bb063fbbbcf 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -25,7 +25,9 @@ using namespace llvm;
 namespace {
   struct CFGViewer : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
-    CFGViewer() : FunctionPass(ID) {}
+    CFGViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F) {
       F.viewCFG();
@@ -41,12 +43,14 @@ namespace {
 }
 
 char CFGViewer::ID = 0;
-INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true);
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
 
 namespace {
   struct CFGOnlyViewer : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
-    CFGOnlyViewer() : FunctionPass(ID) {}
+    CFGOnlyViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F) {
       F.viewCFGOnly();
@@ -63,13 +67,14 @@ namespace {
 
 char CFGOnlyViewer::ID = 0;
 INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
-                "View CFG of function (with no function bodies)", false, true);
+                "View CFG of function (with no function bodies)", false, true)
 
 namespace {
   struct CFGPrinter : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGPrinter() : FunctionPass(ID) {}
-    explicit CFGPrinter(char &pid) : FunctionPass(pid) {}
+    CFGPrinter() : FunctionPass(ID) {
+      initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F) {
       std::string Filename = "cfg." + F.getNameStr() + ".dot";
@@ -96,13 +101,15 @@ namespace {
 
 char CFGPrinter::ID = 0;
 INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
-                false, true);
+                false, true)
 
 namespace {
   struct CFGOnlyPrinter : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGOnlyPrinter() : FunctionPass(ID) {}
-    explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {}
+    CFGOnlyPrinter() : FunctionPass(ID) {
+      initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+    
     virtual bool runOnFunction(Function &F) {
       std::string Filename = "cfg." + F.getNameStr() + ".dot";
       errs() << "Writing '" << Filename << "'...";
@@ -128,7 +135,7 @@ namespace {
 char CFGOnlyPrinter::ID = 0;
 INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
    "Print CFG of function to 'dot' file (with no function bodies)",
-   false, true);
+   false, true)
 
 /// viewCFG - This function is meant for use from the debugger.  You can just
 /// say 'call F->viewCFG()' and a ghostview window should pop up from the
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 6a2ab681d1ac..1a738fae837d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -11,6 +11,8 @@ add_llvm_library(LLVMAnalysis
   ConstantFolding.cpp
   DbgInfoPrinter.cpp
   DebugInfo.cpp
+  DIBuilder.cpp
+  DominanceFrontier.cpp
   DomPrinter.cpp
   IVUsers.cpp
   InlineCost.cpp
@@ -27,11 +29,15 @@ add_llvm_library(LLVMAnalysis
   LoopDependenceAnalysis.cpp
   LoopInfo.cpp
   LoopPass.cpp
+  MemDepPrinter.cpp
   MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
   ModuleDebugInfoPrinter.cpp
+  PathNumbering.cpp
+  PathProfileInfo.cpp
+  PathProfileVerifier.cpp
+  NoAliasAnalysis.cpp
   PHITransAddr.cpp
-  PointerTracking.cpp
   PostDominators.cpp
   ProfileEstimatorPass.cpp
   ProfileInfo.cpp
@@ -39,6 +45,7 @@ add_llvm_library(LLVMAnalysis
   ProfileInfoLoaderPass.cpp
   ProfileVerifierPass.cpp
   RegionInfo.cpp
+  RegionPass.cpp
   RegionPrinter.cpp
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
@@ -50,4 +57,4 @@ add_llvm_library(LLVMAnalysis
   ValueTracking.cpp
   )
 
-target_link_libraries (LLVMAnalysis LLVMSupport)
+add_subdirectory(IPA)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 90eae20858fb..42a54d9d1eb3 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -95,6 +95,9 @@ bool llvm::PointerMayBeCaptured(const Value *V,
     case Instruction::Load:
       // Loading from a pointer does not cause it to be captured.
       break;
+    case Instruction::VAArg:
+      // "va-arg" from a pointer does not cause it to be captured.
+      break;
     case Instruction::Ret:
       if (ReturnCaptures)
         return true;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 0bf7967e83b1..cd8d52c1c465 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FEnv.h"
 #include <cerrno>
 #include <cmath>
 using namespace llvm;
@@ -53,7 +54,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
   // vector so the code below can handle it uniformly.
   if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
     Constant *Ops = C; // don't take the address of C!
-    return FoldBitCast(ConstantVector::get(&Ops, 1), DestTy, TD);
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
   }
   
   // If this is a bitcast from constant vector -> vector, fold it.
@@ -166,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
     }
   }
   
-  return ConstantVector::get(Result.data(), Result.size());
+  return ConstantVector::get(Result);
 }
 
 
@@ -339,6 +340,13 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     return true;
   }
   
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->getOpcode() == Instruction::IntToPtr &&
+        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) 
+        return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 
+                                  BytesLeft, TD);
+  }
+
   // Otherwise, unknown initializer type.
   return false;
 }
@@ -466,7 +474,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
   
   // If this load comes from anywhere in a constant global, and if the global
   // is all undef or zero, we know what it loads.
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getUnderlyingObject())){
+  if (GlobalVariable *GV =
+        dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
     if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
       const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
       if (GV->getInitializer()->isNullValue())
@@ -537,7 +546,7 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
   for (unsigned i = 1; i != NumOps; ++i) {
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
-                                                            reinterpret_cast<Value *const *>(Ops+1),
+                                        reinterpret_cast<Value *const *>(Ops+1),
                                                             i-1))) &&
         Ops[i]->getType() != IntPtrTy) {
       Any = true;
@@ -567,16 +576,35 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   Constant *Ptr = Ops[0];
   if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
     return 0;
-
-  unsigned BitWidth =
-    TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext()));
+  
+  const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
   for (unsigned i = 1; i != NumOps; ++i)
-    if (!isa<ConstantInt>(Ops[i]))
+    if (!isa<ConstantInt>(Ops[i])) {
+      
+      // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+      // "inttoptr (sub (ptrtoint Ptr), V)"
+      if (NumOps == 2 &&
+          cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+        assert((CE == 0 || CE->getType() == IntPtrTy) &&
+               "CastGEPIndices didn't canonicalize index types!");
+        if (CE && CE->getOpcode() == Instruction::Sub &&
+            CE->getOperand(0)->isNullValue()) {
+          Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+          Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+          Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+          if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+            Res = ConstantFoldConstantExpression(ResCE, TD);
+          return Res;
+        }
+      }
       return 0;
+    }
   
+  unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
   APInt Offset = APInt(BitWidth,
                        TD->getIndexedOffset(Ptr->getType(),
                                             (Value**)Ops+1, NumOps-1));
@@ -609,10 +637,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   APInt BasePtr(BitWidth, 0);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
     if (CE->getOpcode() == Instruction::IntToPtr)
-      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
-        BasePtr = Base->getValue();
-        BasePtr.zextOrTrunc(BitWidth);
-      }
+      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+        BasePtr = Base->getValue().zextOrTrunc(BitWidth);
   if (Ptr->isNullValue() || BasePtr != 0) {
     Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
     return ConstantExpr::getIntToPtr(C, ResultTy);
@@ -638,12 +664,19 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
         
       // Determine which element of the array the offset points into.
       APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+      const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
       if (ElemSize == 0)
-        return 0;
-      APInt NewIdx = Offset.udiv(ElemSize);
-      Offset -= NewIdx * ElemSize;
-      NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()),
-                                         NewIdx));
+        // The element size is 0. This may be [0 x Ty]*, so just use a zero
+        // index for this level and proceed to the next level to see if it can
+        // accommodate the offset.
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+      else {
+        // The element size is non-zero divide the offset by the element
+        // size (rounding down), to compute the index at this level.
+        APInt NewIdx = Offset.udiv(ElemSize);
+        Offset -= NewIdx * ElemSize;
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+      }
       Ty = ATy->getElementType();
     } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
       // Determine which field of the struct the offset points into. The
@@ -687,27 +720,34 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
 // Constant Folding public APIs
 //===----------------------------------------------------------------------===//
 
-
-/// ConstantFoldInstruction - Attempt to constant fold the specified
-/// instruction.  If successful, the constant result is returned, if not, null
-/// is returned.  Note that this function can only fail when attempting to fold
-/// instructions like loads and stores, which have no constant expression form.
-///
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant.  Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
 Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+  // Handle PHI nodes quickly here...
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
-    if (PN->getNumIncomingValues() == 0)
-      return UndefValue::get(PN->getType());
-
-    Constant *Result = dyn_cast<Constant>(PN->getIncomingValue(0));
-    if (Result == 0) return 0;
-
-    // Handle PHI nodes specially here...
-    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (PN->getIncomingValue(i) != Result && PN->getIncomingValue(i) != PN)
-        return 0;   // Not all the same incoming constants...
+    Constant *CommonValue = 0;
+
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *Incoming = PN->getIncomingValue(i);
+      // If the incoming value is undef then skip it.  Note that while we could
+      // skip the value if it is equal to the phi node itself we choose not to
+      // because that would break the rule that constant folding only applies if
+      // all operands are constants.
+      if (isa<UndefValue>(Incoming))
+        continue;
+      // If the incoming value is not a constant, or is a different constant to
+      // the one we saw previously, then give up.
+      Constant *C = dyn_cast<Constant>(Incoming);
+      if (!C || (CommonValue && C != CommonValue))
+        return 0;
+      CommonValue = C;
+    }
 
-    // If we reach here, all incoming values are the same constant.
-    return Result;
+    // If we reach here, all incoming values are the same constant or undef.
+    return CommonValue ? CommonValue : UndefValue::get(PN->getType());
   }
 
   // Scan the operand list, checking to see if they are all constants, if so,
@@ -725,7 +765,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
   
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     return ConstantFoldLoadInst(LI, TD);
-  
+
+  if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+    return ConstantExpr::getInsertValue(
+                                cast<Constant>(IVI->getAggregateOperand()),
+                                cast<Constant>(IVI->getInsertedValueOperand()),
+                                IVI->idx_begin(), IVI->getNumIndices());
+
+  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+    return ConstantExpr::getExtractValue(
+                                    cast<Constant>(EVI->getAggregateOperand()),
+                                    EVI->idx_begin(), EVI->getNumIndices());
+
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
                                   Ops.data(), Ops.size(), TD);
 }
@@ -736,7 +787,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
                                                const TargetData *TD) {
   SmallVector<Constant*, 8> Ops;
-  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) {
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+       i != e; ++i) {
     Constant *NewC = cast<Constant>(*i);
     // Recursively fold the ConstantExpr's operands.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
@@ -1000,8 +1052,17 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::usub_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
+  case Intrinsic::smul_with_overflow:
   case Intrinsic::convert_from_fp16:
   case Intrinsic::convert_to_fp16:
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64:
     return true;
   default:
     return false;
@@ -1039,10 +1100,10 @@ llvm::canConstantFoldCallTo(const Function *F) {
 
 static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, 
                                 const Type *Ty) {
-  errno = 0;
+  sys::llvm_fenv_clearexcept();
   V = NativeFP(V);
-  if (errno != 0) {
-    errno = 0;
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
     return 0;
   }
   
@@ -1056,10 +1117,10 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
                                       double V, double W, const Type *Ty) {
-  errno = 0;
+  sys::llvm_fenv_clearexcept();
   V = NativeFP(V, W);
-  if (errno != 0) {
-    errno = 0;
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
     return 0;
   }
   
@@ -1071,6 +1132,36 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
   return 0; // dummy return to suppress warning
 }
 
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+                                          const Type *Ty) {
+  assert(Op && "Called with NULL operand");
+  APFloat Val(Op->getValueAPF());
+
+  // All of these conversion intrinsics form an integer of at most 64bits.
+  unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+  assert(ResultWidth <= 64 &&
+         "Can only constant fold conversions to 64 and 32 bit ints");
+
+  uint64_t UIntVal;
+  bool isExact = false;
+  APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+                                              : APFloat::rmNearestTiesToEven;
+  APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+                                                  /*isSigned=*/true, mode,
+                                                  &isExact);
+  if (status != APFloat::opOK && status != APFloat::opInexact)
+    return 0;
+  return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
 Constant *
@@ -1082,7 +1173,7 @@ llvm::ConstantFoldCall(Function *F,
   const Type *Ty = F->getReturnType();
   if (NumOperands == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
-      if (Name == "llvm.convert.to.fp16") {
+      if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
         APFloat Val(Op->getValueAPF());
 
         bool lost = false;
@@ -1093,6 +1184,13 @@ llvm::ConstantFoldCall(Function *F,
 
       if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
+
+      /// We only fold functions with finite arguments. Folding NaN and inf is
+      /// likely to be aborted with an exception anyway, and some host libms
+      /// have known errors raising exceptions.
+      if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+        return 0;
+
       /// Currently APFloat versions of these functions do not exist, so we use
       /// the host native double versions.  Float versions are not called
       /// directly but for all these it is true (float)(f((double)arg)) ==
@@ -1133,8 +1231,8 @@ llvm::ConstantFoldCall(Function *F,
           return ConstantFoldFP(log, V, Ty);
         else if (Name == "log10" && V > 0)
           return ConstantFoldFP(log10, V, Ty);
-        else if (Name == "llvm.sqrt.f32" ||
-                 Name == "llvm.sqrt.f64") {
+        else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+                 (Ty->isFloatTy() || Ty->isDoubleTy())) {
           if (V >= -0.0)
             return ConstantFoldFP(sqrt, V, Ty);
           else // Undefined
@@ -1164,18 +1262,18 @@ llvm::ConstantFoldCall(Function *F,
       }
       return 0;
     }
-    
-    
+
     if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
-      if (Name.startswith("llvm.bswap"))
+      switch (F->getIntrinsicID()) {
+      case Intrinsic::bswap:
         return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
-      else if (Name.startswith("llvm.ctpop"))
+      case Intrinsic::ctpop:
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
-      else if (Name.startswith("llvm.cttz"))
+      case Intrinsic::cttz:
         return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
-      else if (Name.startswith("llvm.ctlz"))
+      case Intrinsic::ctlz:
         return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
-      else if (Name == "llvm.convert.from.fp16") {
+      case Intrinsic::convert_from_fp16: {
         APFloat Val(Op->getValue());
 
         bool lost = false;
@@ -1183,24 +1281,44 @@ llvm::ConstantFoldCall(Function *F,
           Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
 
         // Conversion is always precise.
-        status = status;
+        (void)status;
         assert(status == APFloat::opOK && !lost &&
                "Precision lost during fp16 constfolding");
 
         return ConstantFP::get(F->getContext(), Val);
       }
-      return 0;
+      default:
+        return 0;
+      }
     }
-    
+
+    if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+      switch (F->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::x86_sse_cvtss2si:
+      case Intrinsic::x86_sse_cvtss2si64:
+      case Intrinsic::x86_sse2_cvtsd2si:
+      case Intrinsic::x86_sse2_cvtsd2si64:
+        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+      case Intrinsic::x86_sse_cvttss2si:
+      case Intrinsic::x86_sse_cvttss2si64:
+      case Intrinsic::x86_sse2_cvttsd2si:
+      case Intrinsic::x86_sse2_cvttsd2si64:
+        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+      }
+    }
+
     if (isa<UndefValue>(Operands[0])) {
-      if (Name.startswith("llvm.bswap"))
+      if (F->getIntrinsicID() == Intrinsic::bswap)
         return Operands[0];
       return 0;
     }
 
     return 0;
   }
-  
+
   if (NumOperands == 2) {
     if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
       if (!Ty->isFloatTy() && !Ty->isDoubleTy())
@@ -1223,11 +1341,11 @@ llvm::ConstantFoldCall(Function *F,
         if (Name == "atan2")
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
-        if (Name == "llvm.powi.f32")
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
           return ConstantFP::get(F->getContext(),
                                  APFloat((float)std::pow((float)Op1V,
                                                  (int)Op2C->getZExtValue())));
-        if (Name == "llvm.powi.f64")
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
           return ConstantFP::get(F->getContext(),
                                  APFloat((double)std::pow((double)Op1V,
                                                    (int)Op2C->getZExtValue())));
@@ -1240,42 +1358,37 @@ llvm::ConstantFoldCall(Function *F,
       if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
         switch (F->getIntrinsicID()) {
         default: break;
-        case Intrinsic::uadd_with_overflow: {
-          Constant *Res = ConstantExpr::getAdd(Op1, Op2);           // result.
-          Constant *Ops[] = {
-            Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow.
-          };
-          return ConstantStruct::get(F->getContext(), Ops, 2, false);
-        }
-        case Intrinsic::usub_with_overflow: {
-          Constant *Res = ConstantExpr::getSub(Op1, Op2);           // result.
+        case Intrinsic::sadd_with_overflow:
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::smul_with_overflow: {
+          APInt Res;
+          bool Overflow;
+          switch (F->getIntrinsicID()) {
+          default: assert(0 && "Invalid case");
+          case Intrinsic::sadd_with_overflow:
+            Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::uadd_with_overflow:
+            Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::ssub_with_overflow:
+            Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::usub_with_overflow:
+            Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::smul_with_overflow:
+            Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+            break;
+          }
           Constant *Ops[] = {
-            Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow.
+            ConstantInt::get(F->getContext(), Res),
+            ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
           };
           return ConstantStruct::get(F->getContext(), Ops, 2, false);
         }
-        case Intrinsic::sadd_with_overflow: {
-          Constant *Res = ConstantExpr::getAdd(Op1, Op2);           // result.
-          Constant *Overflow = ConstantExpr::getSelect(
-              ConstantExpr::getICmp(CmpInst::ICMP_SGT,
-                ConstantInt::get(Op1->getType(), 0), Op1),
-              ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), 
-              ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow.
-
-          Constant *Ops[] = { Res, Overflow };
-          return ConstantStruct::get(F->getContext(), Ops, 2, false);
-        }
-        case Intrinsic::ssub_with_overflow: {
-          Constant *Res = ConstantExpr::getSub(Op1, Op2);           // result.
-          Constant *Overflow = ConstantExpr::getSelect(
-              ConstantExpr::getICmp(CmpInst::ICMP_SGT,
-                ConstantInt::get(Op2->getType(), 0), Op2),
-              ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), 
-              ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow.
-
-          Constant *Ops[] = { Res, Overflow };
-          return ConstantStruct::get(F->getContext(), Ops, 2, false);
-        }
         }
       }
       
@@ -1285,4 +1398,3 @@ llvm::ConstantFoldCall(Function *F,
   }
   return 0;
 }
-
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
new file mode 100644
index 000000000000..c1072df72925
--- /dev/null
+++ b/lib/Analysis/DIBuilder.cpp
@@ -0,0 +1,801 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+  assert((Tag & LLVMDebugVersionMask) == 0 &&
+         "Tag too large for debug encoding!");
+  return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+  : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+
+/// CreateCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, 
+                                  StringRef Directory, StringRef Producer, 
+                                  bool isOptimized, StringRef Flags, 
+                                  unsigned RunTimeVer) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    MDString::get(VMContext, Producer),
+    // Deprecate isMain field.
+    ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    MDString::get(VMContext, Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+  };
+  TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) {
+  assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    TheCU
+  };
+  return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+  };
+  return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateBasicType - Create debugging information entry for a basic 
+/// type, e.g 'char'.
+DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, 
+                                  uint64_t AlignInBits,
+                                  unsigned Encoding) {
+  // Basic types are encoded in DIBasicType format. Line number, filename,
+  // offset and flags are always empty here.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+    TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateQaulifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) {
+  // Qualified types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    TheCU,
+    MDString::get(VMContext, StringRef()), // Empty name.
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FromTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreatePointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits,
+                                    uint64_t AlignInBits, StringRef Name) {
+  // Pointer types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+    TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    PointeeTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::CreateReferenceType(DIType RTy) {
+  // References are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
+    TheCU,
+    NULL, // Name
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    RTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File,
+                                unsigned LineNo) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid typedef type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+    Ty.getContext(),
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    Ty
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid type!");
+  assert(FriendTy.Verify() && "Invalid friend type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+    Ty,
+    NULL, // Name
+    Ty.getFile(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FriendTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateInheritance - Create debugging information entry to establish
+/// inheritnace relationship between two types.
+DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, 
+                                    uint64_t BaseOffset, unsigned Flags) {
+  // TAG_inheritance is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+    Ty,
+    NULL, // Name
+    Ty.getFile(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    BaseTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateMemberType - Create debugging information entry for a member.
+DIType DIBuilder::CreateMemberType(StringRef Name, 
+                                   DIFile File, unsigned LineNumber, 
+                                   uint64_t SizeInBits, uint64_t AlignInBits,
+                                   uint64_t OffsetInBits, unsigned Flags, 
+                                   DIType Ty) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File, // Or TheCU ? Ty ?
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateClassType - Create debugging information entry for a class.
+DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, 
+                                  DIFile File, unsigned LineNumber, 
+                                  uint64_t SizeInBits, uint64_t AlignInBits,
+                                  uint64_t OffsetInBits, unsigned Flags,
+                                  DIType DerivedFrom, DIArray Elements,
+                                  MDNode *VTableHoder, MDNode *TemplateParams) {
+ // TAG_class_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    VTableHoder,
+    TemplateParams
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter 
+DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+                                       DIType Ty, MDNode *File, unsigned LineNo,
+                                       unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+    Context,
+    MDString::get(VMContext, Name),
+    Ty,
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], 
+                                             array_lengthof(Elts)));
+}
+
+/// CreateTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter 
+DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name,
+                                        DIType Ty, uint64_t Val,
+                                        MDNode *File, unsigned LineNo,
+                                        unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+    Context,
+    MDString::get(VMContext, Name),
+    Ty,
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], 
+                                              array_lengthof(Elts)));
+}
+
+/// CreateStructType - Create debugging information entry for a struct.
+DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, 
+                                   DIFile File, unsigned LineNumber, 
+                                   uint64_t SizeInBits, uint64_t AlignInBits,
+                                   unsigned Flags, DIArray Elements, 
+                                   unsigned RunTimeLang) {
+ // TAG_structure_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateUnionType - Create debugging information entry for an union.
+DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, 
+                                  DIFile File,
+                                  unsigned LineNumber, uint64_t SizeInBits,
+                                  uint64_t AlignInBits, unsigned Flags,
+                                  DIArray Elements, unsigned RunTimeLang) {
+  // TAG_union_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateSubroutineType - Create subroutine type.
+DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) {
+  // TAG_subroutine_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+    File,
+    MDString::get(VMContext, ""),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ParameterTypes,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateEnumerationType - Create debugging information entry for an 
+/// enumeration.
+DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, 
+                                        DIFile File, unsigned LineNumber, 
+                                        uint64_t SizeInBits, 
+                                        uint64_t AlignInBits, DIArray Elements) {
+  // TAG_enumeration_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+  NMD->addOperand(Node);
+  return DIType(Node);
+}
+
+/// CreateArrayType - Create debugging information entry for an array.
+DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, 
+                                  DIType Ty, DIArray Subscripts) {
+  // TAG_array_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    TheCU,
+    MDString::get(VMContext, ""),
+    TheCU,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, 
+                                   DIType Ty, DIArray Subscripts) {
+  // TAG_vector_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
+    TheCU,
+    MDString::get(VMContext, ""),
+    TheCU,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::CreateArtificialType(DIType Ty) {
+  if (Ty.isArtificial())
+    return Ty;
+
+  SmallVector<Value *, 9> Elts;
+  MDNode *N = Ty;
+  assert (N && "Unexpected input DIType!");
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i))
+      Elts.push_back(V);
+    else
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  }
+
+  unsigned CurFlags = Ty.getFlags();
+  CurFlags = CurFlags | DIType::FlagArtificial;
+
+  // Flags are stored at this slot.
+  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+  return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
+
+/// RetainType - Retain DIType in a module even if it is not referenced 
+/// through debug info anchors.
+void DIBuilder::RetainType(DIType T) {
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+  NMD->addOperand(T);
+}
+
+/// CreateUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::CreateUnspecifiedParameter() {
+  Value *Elts[] = { 
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
+  };
+  return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+}
+
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::CreateTemporaryType() {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  return DIType(Node);
+}
+
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::CreateTemporaryType(DIFile F) {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, DW_TAG_base_type),
+    F.getCompileUnit(),
+    NULL,
+    F
+  };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  return DIType(Node);
+}
+
+/// GetOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements) {
+  if (NumElements == 0) {
+    Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+    return DIArray(MDNode::get(VMContext, &Null, 1));
+  }
+  return DIArray(MDNode::get(VMContext, Elements, NumElements));
+}
+
+/// GetOrCreateSubrange - Create a descriptor for a value range.  This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+  };
+
+  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
+}
+
+/// CreateGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, 
+                     DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    TheCU,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addOperand(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// CreateStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+CreateStaticVariable(DIDescriptor Context, StringRef Name, 
+                     StringRef LinkageName, DIFile F, unsigned LineNumber, 
+                     DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addOperand(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// CreateVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope,
+                                          StringRef Name, DIFile File,
+                                          unsigned LineNo, DIType Ty, 
+                                          bool AlwaysPreserve, unsigned Flags) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  if (AlwaysPreserve) {
+    // The optimizer may remove local variable. If there is an interest
+    // to preserve variable info in such situation then stash it in a
+    // named mdnode.
+    DISubprogram Fn(getDISubprogram(Scope));
+    StringRef FName = "fn";
+    if (Fn.getFunction())
+      FName = Fn.getFunction()->getName();
+    char One = '\1';
+    if (FName.startswith(StringRef(&One, 1)))
+      FName = FName.substr(1);
+    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+    FnLocals->addOperand(Node);
+  }
+  return DIVariable(Node);
+}
+
+/// CreateComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope,
+                                            StringRef Name, DIFile F,
+                                            unsigned LineNo,
+                                            DIType Ty, Value *const *Addr,
+                                            unsigned NumAddr) {
+  SmallVector<Value *, 15> Elts;
+  Elts.push_back(GetTagConstant(VMContext, Tag));
+  Elts.push_back(Scope);
+  Elts.push_back(MDString::get(VMContext, Name));
+  Elts.push_back(F);
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
+  Elts.push_back(Ty);
+  Elts.append(Addr, Addr+NumAddr);
+
+  return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
+
+/// CreateFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::CreateFunction(DIDescriptor Context,
+                                       StringRef Name,
+                                       StringRef LinkageName,
+                                       DIFile File, unsigned LineNo,
+                                       DIType Ty,
+                                       bool isLocalToUnit, bool isDefinition,
+                                       unsigned Flags, bool isOptimized,
+                                       Function *Fn) {
+
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+  NMD->addOperand(Node);
+  return DISubprogram(Node);
+}
+
+/// CreateMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::CreateMethod(DIDescriptor Context,
+                                     StringRef Name,
+                                     StringRef LinkageName,
+                                     DIFile F,
+                                     unsigned LineNo, DIType Ty,
+                                     bool isLocalToUnit,
+                                     bool isDefinition,
+                                     unsigned VK, unsigned VIndex,
+                                     MDNode *VTableHolder,
+                                     unsigned Flags,
+                                     bool isOptimized,
+                                     Function *Fn) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+    VTableHolder,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+  NMD->addOperand(Node);
+  return DISubprogram(Node);
+}
+
+/// CreateNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name,
+                                       DIFile File, unsigned LineNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+  };
+  return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File,
+                                             unsigned Line, unsigned Col) {
+  // Defeat MDNode uniqing for lexical blocks by using unique id.
+  static unsigned int unique_id = 0;
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    Scope,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+  };
+  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
+                                      Instruction *InsertBefore) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
+                                      BasicBlock *InsertAtEnd) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+
+  // If this block already has a terminator then insert this intrinsic
+  // before the terminator.
+  if (TerminatorInst *T = InsertAtEnd->getTerminator())
+    return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+  else
+    return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+}
+
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                Instruction *InsertBefore) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+}
+
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                BasicBlock *InsertAtEnd) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+}
+
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 056775060610..b23c3514d0bd 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/Passes.h"
@@ -40,7 +41,9 @@ namespace {
     void printVariableDeclaration(const Value *V);
   public:
     static char ID; // Pass identification
-    PrintDbgInfo() : FunctionPass(ID), Out(errs()) {}
+    PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
+      initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -48,12 +51,124 @@ namespace {
     }
   };
   char PrintDbgInfo::ID = 0;
-  INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
-                  "Print debug info in human readable form", false, false);
 }
 
+INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+                "Print debug info in human readable form", false, false)
+
 FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
 
+/// Find the debug info descriptor corresponding to this global variable.
+static Value *findDbgGlobalDeclare(GlobalVariable *V) {
+  const Module *M = V->getParent();
+  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+  if (!NMD)
+    return 0;
+
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+    if (!DIG.isGlobalVariable())
+      continue;
+    if (DIGlobalVariable(DIG).getGlobal() == V)
+      return DIG;
+  }
+  return 0;
+}
+
+/// Find the debug info descriptor corresponding to this function.
+static Value *findDbgSubprogramDeclare(Function *V) {
+  const Module *M = V->getParent();
+  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
+  if (!NMD)
+    return 0;
+
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+    if (!DIG.isSubprogram())
+      continue;
+    if (DISubprogram(DIG).getFunction() == V)
+      return DIG;
+  }
+  return 0;
+}
+
+/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+/// It looks through pointer casts too.
+static const DbgDeclareInst *findDbgDeclare(const Value *V) {
+  V = V->stripPointerCasts();
+
+  if (!isa<Instruction>(V) && !isa<Argument>(V))
+    return 0;
+
+  const Function *F = NULL;
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    F = I->getParent()->getParent();
+  else if (const Argument *A = dyn_cast<Argument>(V))
+    F = A->getParent();
+
+  for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+    for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+         BI != BE; ++BI)
+      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        if (DDI->getAddress() == V)
+          return DDI;
+
+  return 0;
+}
+
+static bool getLocationInfo(const Value *V, std::string &DisplayName,
+                            std::string &Type, unsigned &LineNo,
+                            std::string &File, std::string &Dir) {
+  DICompileUnit Unit;
+  DIType TypeD;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+    Value *DIGV = findDbgGlobalDeclare(GV);
+    if (!DIGV) return false;
+    DIGlobalVariable Var(cast<MDNode>(DIGV));
+
+    StringRef D = Var.getDisplayName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
+    Value *DIF = findDbgSubprogramDeclare(F);
+    if (!DIF) return false;
+    DISubprogram Var(cast<MDNode>(DIF));
+
+    StringRef D = Var.getDisplayName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  } else {
+    const DbgDeclareInst *DDI = findDbgDeclare(V);
+    if (!DDI) return false;
+    DIVariable Var(cast<MDNode>(DDI->getVariable()));
+
+    StringRef D = Var.getName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  }
+
+  StringRef T = TypeD.getName();
+  if (!T.empty())
+    Type = T;
+  StringRef F = Unit.getFilename();
+  if (!F.empty())
+    File = F;
+  StringRef D = Unit.getDirectory();
+  if (!D.empty())
+    Dir = D;
+  return true;
+}
+
 void PrintDbgInfo::printVariableDeclaration(const Value *V) {
   std::string DisplayName, File, Directory, Type;
   unsigned LineNo;
@@ -63,8 +178,12 @@ void PrintDbgInfo::printVariableDeclaration(const Value *V) {
 
   Out << "; ";
   WriteAsOperand(Out, V, false, 0);
-  Out << " is variable " << DisplayName
-      << " of type " << Type << " declared at ";
+  if (isa<Function>(V)) 
+    Out << " is function " << DisplayName
+        << " of type " << Type << " declared at ";
+  else
+    Out << " is variable " << DisplayName
+        << " of type " << Type << " declared at ";
 
   if (PrintDirectory)
     Out << Directory << "/";
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 5ca89c658df6..9db1456edd05 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -109,7 +109,9 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const {
 }
 
 unsigned DIVariable::getNumAddrElements() const {
-  return DbgNode->getNumOperands()-6;
+  if (getVersion() <= llvm::LLVMDebugVersion8)
+    return DbgNode->getNumOperands()-6;
+  return DbgNode->getNumOperands()-7;
 }
 
 
@@ -197,6 +199,12 @@ bool DIDescriptor::isGlobal() const {
   return isGlobalVariable();
 }
 
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
 /// isScope - Return true if the specified tag is one of the scope
 /// related tag.
 bool DIDescriptor::isScope() const {
@@ -213,6 +221,18 @@ bool DIDescriptor::isScope() const {
   return false;
 }
 
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
 /// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
 bool DIDescriptor::isCompileUnit() const {
   return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
@@ -280,6 +300,26 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) {
   }
 }
 
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
 /// Verify - Verify that a compile unit is well formed.
 bool DICompileUnit::Verify() const {
   if (!DbgNode)
@@ -297,9 +337,13 @@ bool DIType::Verify() const {
     return false;
   if (!getContext().Verify())
     return false;
-
-  DICompileUnit CU = getCompileUnit();
-  if (!CU.Verify())
+  unsigned Tag = getTag();
+  if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+      Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+      Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type 
+      && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
+      && Tag != dwarf::DW_TAG_enumeration_type 
+      && getFilename().empty())
     return false;
   return true;
 }
@@ -701,15 +745,13 @@ Constant *DIFactory::GetTagConstant(unsigned TAG) {
 /// GetOrCreateArray - Create an descriptor for an array of descriptors.
 /// This implicitly uniques the arrays created.
 DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
-  SmallVector<Value*, 16> Elts;
-
-  if (NumTys == 0)
-    Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
-  else
-    for (unsigned i = 0; i != NumTys; ++i)
-      Elts.push_back(Tys[i]);
+  if (NumTys == 0) {
+    Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+    return DIArray(MDNode::get(VMContext, &Null, 1));
+  }
 
-  return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size()));
+  SmallVector<Value *, 16> Elts(Tys, Tys+NumTys);
+  return DIArray(MDNode::get(VMContext, Elts.data(), Elts.size()));
 }
 
 /// GetOrCreateSubrange - Create a descriptor for a value range.  This
@@ -724,7 +766,14 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
   return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
 }
 
-
+/// CreateUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIFactory::CreateUnspecifiedParameter() {
+  Value *Elts[] = {
+    GetTagConstant(dwarf::DW_TAG_unspecified_parameters)
+  };
+  return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+}
 
 /// CreateCompileUnit - Create a new descriptor for the specified compile
 /// unit.  Note that this does not unique compile units within the module.
@@ -946,7 +995,6 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
   return DICompositeType(Node);
 }
 
-
 /// CreateTemporaryType - Create a temporary forward-declared type.
 DIType DIFactory::CreateTemporaryType() {
   // Give the temporary MDNode a tag. It doesn't matter what tag we
@@ -958,6 +1006,19 @@ DIType DIFactory::CreateTemporaryType() {
   return DIType(Node);
 }
 
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIFactory::CreateTemporaryType(DIFile F) {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = {
+    GetTagConstant(DW_TAG_base_type),
+    F.getCompileUnit(),
+    NULL,
+    F
+  };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  return DIType(Node);
+}
 
 /// CreateCompositeType - Create a composite type like array, struct, etc.
 DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
@@ -1011,7 +1072,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
                                          bool isDefinition,
                                          unsigned VK, unsigned VIndex,
                                          DIType ContainingType,
-                                         bool isArtificial,
+                                         unsigned Flags,
                                          bool isOptimized,
                                          Function *Fn) {
 
@@ -1030,7 +1091,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
     ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
     ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
     ContainingType,
-    ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     Fn
   };
@@ -1064,7 +1125,7 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){
     DeclNode->getOperand(11), // Virtuality
     DeclNode->getOperand(12), // VIndex
     DeclNode->getOperand(13), // Containting Type
-    DeclNode->getOperand(14), // isArtificial
+    DeclNode->getOperand(14), // Flags
     DeclNode->getOperand(15), // isOptimized
     SPDeclaration.getFunction()
   };
@@ -1142,12 +1203,47 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
   return DIGlobalVariable(Node);
 }
 
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(std::string &Str) {
+  for (size_t i = 0, e = Str.size(); i < e; ++i) {
+    char C = Str[i];
+    if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
+        C == '(' || C == ')')
+      Str[i] = '.';
+  }
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+  SmallString<32> Out;
+  if (FuncName.find('[') == StringRef::npos)
+    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
+                                      .toStringRef(Out)); 
+  std::string Name = FuncName;
+  fixupObjcLikeName(Name);
+  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
+                                    .toStringRef(Out));
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+  if (FuncName.find('[') == StringRef::npos)
+    return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
+  std::string Name = FuncName;
+  fixupObjcLikeName(Name);
+  return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+}
+
 /// CreateVariable - Create a new descriptor for the specified variable.
 DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
                                      StringRef Name,
                                      DIFile F,
                                      unsigned LineNo,
-                                     DIType Ty, bool AlwaysPreserve) {
+                                     DIType Ty, bool AlwaysPreserve,
+                                     unsigned Flags) {
   Value *Elts[] = {
     GetTagConstant(Tag),
     Context,
@@ -1155,8 +1251,9 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
     F,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], 6);
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], 7);
   if (AlwaysPreserve) {
     // The optimizer may remove local variable. If there is an interest
     // to preserve variable info in such situation then stash it in a
@@ -1169,9 +1266,8 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
     if (FName.startswith(StringRef(&One, 1)))
       FName = FName.substr(1);
 
-    SmallString<32> Out;
-    NamedMDNode *FnLocals =
-      M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out));
+
+    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
     FnLocals->addOperand(Node);
   }
   return DIVariable(Node);
@@ -1181,21 +1277,20 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
 /// CreateComplexVariable - Create a new descriptor for the specified variable
 /// which has a complex address expression for its address.
 DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
-                                            const std::string &Name,
-                                            DIFile F,
+                                            StringRef Name, DIFile F,
                                             unsigned LineNo,
-                                            DIType Ty,
-                                            SmallVector<Value *, 9> &addr) {
-  SmallVector<Value *, 9> Elts;
+                                            DIType Ty, Value *const *Addr,
+                                            unsigned NumAddr) {
+  SmallVector<Value *, 15> Elts;
   Elts.push_back(GetTagConstant(Tag));
   Elts.push_back(Context);
   Elts.push_back(MDString::get(VMContext, Name));
   Elts.push_back(F);
   Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
   Elts.push_back(Ty);
-  Elts.insert(Elts.end(), addr.begin(), addr.end());
+  Elts.append(Addr, Addr+NumAddr);
 
-  return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size()));
+  return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
 }
 
 
@@ -1309,6 +1404,14 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
 }
 
+// RecordType - Record DIType in a module such that it is not lost even if
+// it is not referenced through debug info anchors.
+void DIFactory::RecordType(DIType T) {
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+  NMD->addOperand(T);
+}
+
+
 //===----------------------------------------------------------------------===//
 // DebugInfoFinder implementations.
 //===----------------------------------------------------------------------===//
@@ -1472,89 +1575,6 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
   return true;
 }
 
-/// Find the debug info descriptor corresponding to this global variable.
-static Value *findDbgGlobalDeclare(GlobalVariable *V) {
-  const Module *M = V->getParent();
-  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
-  if (!NMD)
-    return 0;
-
-  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
-    if (!DIG.isGlobalVariable())
-      continue;
-    if (DIGlobalVariable(DIG).getGlobal() == V)
-      return DIG;
-  }
-  return 0;
-}
-
-/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
-/// It looks through pointer casts too.
-static const DbgDeclareInst *findDbgDeclare(const Value *V) {
-  V = V->stripPointerCasts();
-
-  if (!isa<Instruction>(V) && !isa<Argument>(V))
-    return 0;
-
-  const Function *F = NULL;
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    F = I->getParent()->getParent();
-  else if (const Argument *A = dyn_cast<Argument>(V))
-    F = A->getParent();
-
-  for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
-    for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
-         BI != BE; ++BI)
-      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
-        if (DDI->getAddress() == V)
-          return DDI;
-
-  return 0;
-}
-
-bool llvm::getLocationInfo(const Value *V, std::string &DisplayName,
-                           std::string &Type, unsigned &LineNo,
-                           std::string &File, std::string &Dir) {
-  DICompileUnit Unit;
-  DIType TypeD;
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
-    Value *DIGV = findDbgGlobalDeclare(GV);
-    if (!DIGV) return false;
-    DIGlobalVariable Var(cast<MDNode>(DIGV));
-
-    StringRef D = Var.getDisplayName();
-    if (!D.empty())
-      DisplayName = D;
-    LineNo = Var.getLineNumber();
-    Unit = Var.getCompileUnit();
-    TypeD = Var.getType();
-  } else {
-    const DbgDeclareInst *DDI = findDbgDeclare(V);
-    if (!DDI) return false;
-    DIVariable Var(cast<MDNode>(DDI->getVariable()));
-
-    StringRef D = Var.getName();
-    if (!D.empty())
-      DisplayName = D;
-    LineNo = Var.getLineNumber();
-    Unit = Var.getCompileUnit();
-    TypeD = Var.getType();
-  }
-
-  StringRef T = TypeD.getName();
-  if (!T.empty())
-    Type = T;
-  StringRef F = Unit.getFilename();
-  if (!F.empty())
-    File = F;
-  StringRef D = Unit.getDirectory();
-  if (!D.empty())
-    Dir = D;
-  return true;
-}
-
 /// getDISubprogram - Find subprogram that is enclosing this scope.
 DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
   DIDescriptor D(Scope);
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 9f340942f2cc..cde431459d50 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -19,8 +19,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DomPrinter.h"
-
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
 #include "llvm/Analysis/PostDominators.h"
 
@@ -86,74 +84,90 @@ namespace {
 struct DomViewer
   : public DOTGraphTraitsViewer<DominatorTree, false> {
   static char ID;
-  DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){}
+  DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+    initializeDomViewerPass(*PassRegistry::getPassRegistry());
+  }
 };
 
 struct DomOnlyViewer
   : public DOTGraphTraitsViewer<DominatorTree, true> {
   static char ID;
-  DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){}
+  DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+    initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
 };
 
 struct PostDomViewer
   : public DOTGraphTraitsViewer<PostDominatorTree, false> {
   static char ID;
   PostDomViewer() :
-    DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){}
+    DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+      initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+    }
 };
 
 struct PostDomOnlyViewer
   : public DOTGraphTraitsViewer<PostDominatorTree, true> {
   static char ID;
   PostDomOnlyViewer() :
-    DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){}
+    DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+      initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
 };
 } // end anonymous namespace
 
 char DomViewer::ID = 0;
 INITIALIZE_PASS(DomViewer, "view-dom",
-                "View dominance tree of function", false, false);
+                "View dominance tree of function", false, false)
 
 char DomOnlyViewer::ID = 0;
 INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
                 "View dominance tree of function (with no function bodies)",
-                false, false);
+                false, false)
 
 char PostDomViewer::ID = 0;
 INITIALIZE_PASS(PostDomViewer, "view-postdom",
-                "View postdominance tree of function", false, false);
+                "View postdominance tree of function", false, false)
 
 char PostDomOnlyViewer::ID = 0;
 INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
                 "View postdominance tree of function "
                 "(with no function bodies)",
-                false, false);
+                false, false)
 
 namespace {
 struct DomPrinter
   : public DOTGraphTraitsPrinter<DominatorTree, false> {
   static char ID;
-  DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {}
+  DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+    initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+  }
 };
 
 struct DomOnlyPrinter
   : public DOTGraphTraitsPrinter<DominatorTree, true> {
   static char ID;
-  DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {}
+  DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+    initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  }
 };
 
 struct PostDomPrinter
   : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
   static char ID;
   PostDomPrinter() :
-    DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {}
+    DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+      initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+    }
 };
 
 struct PostDomOnlyPrinter
   : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
   static char ID;
   PostDomOnlyPrinter() :
-    DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {}
+    DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+      initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
 };
 } // end anonymous namespace
 
@@ -162,24 +176,24 @@ struct PostDomOnlyPrinter
 char DomPrinter::ID = 0;
 INITIALIZE_PASS(DomPrinter, "dot-dom",
                 "Print dominance tree of function to 'dot' file",
-                false, false);
+                false, false)
 
 char DomOnlyPrinter::ID = 0;
 INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
                 "Print dominance tree of function to 'dot' file "
                 "(with no function bodies)",
-                false, false);
+                false, false)
 
 char PostDomPrinter::ID = 0;
 INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
                 "Print postdominance tree of function to 'dot' file",
-                false, false);
+                false, false)
 
 char PostDomOnlyPrinter::ID = 0;
 INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
                 "Print postdominance tree of function to 'dot' file "
                 "(with no function bodies)",
-                false, false);
+                false, false)
 
 // Create methods available outside of this file, to use them
 // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 000000000000..6de4e1e1d7de
--- /dev/null
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,137 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+
+namespace {
+  class DFCalculateWorkObject {
+  public:
+    DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, 
+                          const DomTreeNode *N,
+                          const DomTreeNode *PN)
+    : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+    BasicBlock *currentBB;
+    BasicBlock *parentBB;
+    const DomTreeNode *Node;
+    const DomTreeNode *parentNode;
+  };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+                             const DomTreeNode *Node) {
+  BasicBlock *BB = Node->getBlock();
+  DomSetType *Result = NULL;
+
+  std::vector<DFCalculateWorkObject> workList;
+  SmallPtrSet<BasicBlock *, 32> visited;
+
+  workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+  do {
+    DFCalculateWorkObject *currentW = &workList.back();
+    assert (currentW && "Missing work object.");
+
+    BasicBlock *currentBB = currentW->currentBB;
+    BasicBlock *parentBB = currentW->parentBB;
+    const DomTreeNode *currentNode = currentW->Node;
+    const DomTreeNode *parentNode = currentW->parentNode;
+    assert (currentBB && "Invalid work object. Missing current Basic Block");
+    assert (currentNode && "Invalid work object. Missing current Node");
+    DomSetType &S = Frontiers[currentBB];
+
+    // Visit each block only once.
+    if (visited.count(currentBB) == 0) {
+      visited.insert(currentBB);
+
+      // Loop over CFG successors to calculate DFlocal[currentNode]
+      for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+           SI != SE; ++SI) {
+        // Does Node immediately dominate this successor?
+        if (DT[*SI]->getIDom() != currentNode)
+          S.insert(*SI);
+      }
+    }
+
+    // At this point, S is DFlocal.  Now we union in DFup's of our children...
+    // Loop through and visit the nodes that Node immediately dominates (Node's
+    // children in the IDomTree)
+    bool visitChild = false;
+    for (DomTreeNode::const_iterator NI = currentNode->begin(), 
+           NE = currentNode->end(); NI != NE; ++NI) {
+      DomTreeNode *IDominee = *NI;
+      BasicBlock *childBB = IDominee->getBlock();
+      if (visited.count(childBB) == 0) {
+        workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+                                                 IDominee, currentNode));
+        visitChild = true;
+      }
+    }
+
+    // If all children are visited or there is any child then pop this block
+    // from the workList.
+    if (!visitChild) {
+
+      if (!parentBB) {
+        Result = &S;
+        break;
+      }
+
+      DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+      DomSetType &parentSet = Frontiers[parentBB];
+      for (; CDFI != CDFE; ++CDFI) {
+        if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+          parentSet.insert(*CDFI);
+      }
+      workList.pop_back();
+    }
+
+  } while (!workList.empty());
+
+  return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  DomFrontier for BB ";
+    if (I->first)
+      WriteAsOperand(OS, I->first, false);
+    else
+      OS << " <<exit node>>";
+    OS << " is:\t";
+    
+    const std::set<BasicBlock*> &BBs = I->second;
+    
+    for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+         I != E; ++I) {
+      OS << ' ';
+      if (*I)
+        WriteAsOperand(OS, *I, false);
+      else
+        OS << "<<exit node>>";
+    }
+    OS << "\n";
+  }
+}
+
+void DominanceFrontierBase::dump() const {
+  print(dbgs());
+}
+
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 007ad228ae56..8ffef29870ae 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -3,4 +3,5 @@ add_llvm_library(LLVMipa
   CallGraphSCCPass.cpp
   FindUsedTypes.cpp
   GlobalsModRef.cpp
+  IPA.cpp
   )
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index b3635283fda5..690c4b4b6f1a 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -43,7 +43,9 @@ class BasicCallGraph : public ModulePass, public CallGraph {
 public:
   static char ID; // Class identification, replacement for typeinfo
   BasicCallGraph() : ModulePass(ID), Root(0), 
-    ExternalCallingNode(0), CallsExternalNode(0) {}
+    ExternalCallingNode(0), CallsExternalNode(0) {
+      initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+    }
 
   // runOnModule - Compute the call graph for the specified module.
   virtual bool runOnModule(Module &M) {
@@ -171,9 +173,9 @@ private:
 
 } //End anonymous namespace
 
-static RegisterAnalysisGroup<CallGraph> X("Call Graph");
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
 INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
-                   "Basic CallGraph Construction", false, true, true);
+                   "Basic CallGraph Construction", false, true, true)
 
 char CallGraph::ID = 0;
 char BasicCallGraph::ID = 0;
@@ -228,6 +230,21 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
   return F;
 }
 
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+  assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+  assert(!FunctionMap.count(To) &&
+         "Pointing CallGraphNode at a function that already exists");
+  FunctionMapTy::iterator I = FunctionMap.find(From);
+  I->second->F = const_cast<Function*>(To);
+  FunctionMap[To] = I->second;
+  FunctionMap.erase(I);
+}
+
 // getOrInsertFunction - This method is identical to calling operator[], but
 // it will insert a new CallGraphNode for the specified function if one does
 // not already exist.
@@ -274,7 +291,6 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) {
   }
 }
 
-
 // removeAnyCallEdgeTo - This method removes any call edges from this node to
 // the specified callee function.  This takes more time to execute than
 // removeCallEdgeTo, so it should not be used unless necessary.
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index b7a27cb288d9..725ab72f5595 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -582,7 +582,6 @@ namespace {
     
   public:
     static char ID;
-    PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {}
     PrintCallGraphPass(const std::string &B, raw_ostream &o)
       : CallGraphSCCPass(ID), Banner(B), Out(o) {}
     
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 8eed9d6f68bc..06ae34cfd989 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 
 char FindUsedTypes::ID = 0;
 INITIALIZE_PASS(FindUsedTypes, "print-used-types",
-                "Find Used Types", false, true);
+                "Find Used Types", false, true)
 
 // IncorporateType - Incorporate one type and all of its subtypes into the
 // collection of used types.
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 6759b0afdce3..116aaf418ea0 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -88,7 +89,9 @@ namespace {
 
   public:
     static char ID;
-    GlobalsModRef() : ModulePass(ID) {}
+    GlobalsModRef() : ModulePass(ID) {
+      initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M) {
       InitializeAliasAnalysis(this);                 // set up super class
@@ -106,10 +109,9 @@ namespace {
     //------------------------------------------------
     // Implement the AliasAnalysis API
     //
-    AliasResult alias(const Value *V1, unsigned V1Size,
-                      const Value *V2, unsigned V2Size);
+    AliasResult alias(const Location &LocA, const Location &LocB);
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Value *P, unsigned Size);
+                               const Location &Loc);
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
                                ImmutableCallSite CS2) {
       return AliasAnalysis::getModRefInfo(CS1, CS2);
@@ -119,32 +121,38 @@ namespace {
     /// called from the specified call site.  The call site may be null in which
     /// case the most generic behavior of this function should be returned.
     ModRefBehavior getModRefBehavior(const Function *F) {
+      ModRefBehavior Min = UnknownModRefBehavior;
+
       if (FunctionRecord *FR = getFunctionInfo(F)) {
         if (FR->FunctionEffect == 0)
-          return DoesNotAccessMemory;
+          Min = DoesNotAccessMemory;
         else if ((FR->FunctionEffect & Mod) == 0)
-          return OnlyReadsMemory;
+          Min = OnlyReadsMemory;
       }
-      return AliasAnalysis::getModRefBehavior(F);
+
+      return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
     }
     
     /// getModRefBehavior - Return the behavior of the specified function if
     /// called from the specified call site.  The call site may be null in which
     /// case the most generic behavior of this function should be returned.
     ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
-      const Function* F = CS.getCalledFunction();
-      if (!F) return AliasAnalysis::getModRefBehavior(CS);
-      if (FunctionRecord *FR = getFunctionInfo(F)) {
-        if (FR->FunctionEffect == 0)
-          return DoesNotAccessMemory;
-        else if ((FR->FunctionEffect & Mod) == 0)
-          return OnlyReadsMemory;
-      }
-      return AliasAnalysis::getModRefBehavior(CS);
+      ModRefBehavior Min = UnknownModRefBehavior;
+
+      if (const Function* F = CS.getCalledFunction())
+        if (FunctionRecord *FR = getFunctionInfo(F)) {
+          if (FR->FunctionEffect == 0)
+            Min = DoesNotAccessMemory;
+          else if ((FR->FunctionEffect & Mod) == 0)
+            Min = OnlyReadsMemory;
+        }
+
+      return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
     }
 
     virtual void deleteValue(Value *V);
     virtual void copyValue(Value *From, Value *To);
+    virtual void addEscapingUse(Use &U);
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
@@ -177,9 +185,13 @@ namespace {
 }
 
 char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis,
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
                 "globalsmodref-aa", "Simple mod/ref analysis for globals",    
-                false, true, false);
+                false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
+                false, true, false)
 
 Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
 
@@ -314,7 +326,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
         continue;
 
       // Check the value being stored.
-      Value *Ptr = SI->getOperand(0)->getUnderlyingObject();
+      Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
 
       if (isMalloc(Ptr)) {
         // Okay, easy case.
@@ -476,11 +488,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
 /// other is some random pointer, we know there cannot be an alias, because the
 /// address of the global isn't taken.
 AliasAnalysis::AliasResult
-GlobalsModRef::alias(const Value *V1, unsigned V1Size,
-                     const Value *V2, unsigned V2Size) {
+GlobalsModRef::alias(const Location &LocA,
+                     const Location &LocB) {
   // Get the base object these pointers point to.
-  const Value *UV1 = V1->getUnderlyingObject();
-  const Value *UV2 = V2->getUnderlyingObject();
+  const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+  const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
 
   // If either of the underlying values is a global, they may be non-addr-taken
   // globals, which we can answer queries about.
@@ -528,17 +540,18 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
   if ((GV1 || GV2) && GV1 != GV2)
     return NoAlias;
 
-  return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+  return AliasAnalysis::alias(LocA, LocB);
 }
 
 AliasAnalysis::ModRefResult
 GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
-                             const Value *P, unsigned Size) {
+                             const Location &Loc) {
   unsigned Known = ModRef;
 
   // If we are asking for mod/ref info of a direct call with a pointer to a
   // global we are tracking, return information if we have it.
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
+  if (const GlobalValue *GV =
+        dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
     if (GV->hasLocalLinkage())
       if (const Function *F = CS.getCalledFunction())
         if (NonAddressTakenGlobals.count(GV))
@@ -547,7 +560,7 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
 
   if (Known == NoModRef)
     return NoModRef; // No need to query other mod/ref analyses
-  return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, P, Size));
+  return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
 }
 
 
@@ -584,3 +597,13 @@ void GlobalsModRef::deleteValue(Value *V) {
 void GlobalsModRef::copyValue(Value *From, Value *To) {
   AliasAnalysis::copyValue(From, To);
 }
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+  // For the purposes of this analysis, it is conservatively correct to treat
+  // a newly escaping value equivalently to a deleted one.  We could perhaps
+  // be more precise by processing the new use and attempting to update our
+  // saved analysis results to accomodate it.
+  deleteValue(U);
+  
+  AliasAnalysis::addEscapingUse(U);
+}
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 000000000000..0ba2e04c6302
--- /dev/null
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,29 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+  initializeBasicCallGraphPass(Registry);
+  initializeCallGraphAnalysisGroup(Registry);
+  initializeFindUsedTypesPass(Registry);
+  initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+  initializeIPA(*unwrap(R));
+}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index cdf667ad6eed..c8382186df3a 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -28,7 +29,13 @@
 using namespace llvm;
 
 char IVUsers::ID = 0;
-INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true);
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
 
 Pass *llvm::createIVUsersPass() {
   return new IVUsers();
@@ -143,7 +150,8 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
 }
 
 IVUsers::IVUsers()
- : LoopPass(ID) {
+    : LoopPass(ID) {
+  initializeIVUsersPass(*PassRegistry::getPassRegistry());
 }
 
 void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 3e550f35c255..47f91cfc3bed 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -16,97 +16,8 @@
 #include "llvm/CallingConv.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallPtrSet.h"
-using namespace llvm;
-
-// CountCodeReductionForConstant - Figure out an approximation for how many
-// instructions will be constant folded if the specified value is constant.
-//
-unsigned InlineCostAnalyzer::FunctionInfo::
-CountCodeReductionForConstant(Value *V) {
-  unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    User *U = *UI;
-    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
-      // We will be able to eliminate all but one of the successors.
-      const TerminatorInst &TI = cast<TerminatorInst>(*U);
-      const unsigned NumSucc = TI.getNumSuccessors();
-      unsigned Instrs = 0;
-      for (unsigned I = 0; I != NumSucc; ++I)
-        Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)];
-      // We don't know which blocks will be eliminated, so use the average size.
-      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
-    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
-      // Turning an indirect call into a direct call is a BIG win
-      if (CI->getCalledValue() == V)
-        Reduction += InlineConstants::IndirectCallBonus;
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
-      // Turning an indirect call into a direct call is a BIG win
-      if (II->getCalledValue() == V)
-        Reduction += InlineConstants::IndirectCallBonus;
-    } else {
-      // Figure out if this instruction will be removed due to simple constant
-      // propagation.
-      Instruction &Inst = cast<Instruction>(*U);
-
-      // We can't constant propagate instructions which have effects or
-      // read memory.
-      //
-      // FIXME: It would be nice to capture the fact that a load from a
-      // pointer-to-constant-global is actually a *really* good thing to zap.
-      // Unfortunately, we don't know the pointer that may get propagated here,
-      // so we can't make this decision.
-      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
-          isa<AllocaInst>(Inst))
-        continue;
-
-      bool AllOperandsConstant = true;
-      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
-        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
-          AllOperandsConstant = false;
-          break;
-        }
 
-      if (AllOperandsConstant) {
-        // We will get to remove this instruction...
-        Reduction += InlineConstants::InstrCost;
-
-        // And any other instructions that use it which become constants
-        // themselves.
-        Reduction += CountCodeReductionForConstant(&Inst);
-      }
-    }
-  }
-  return Reduction;
-}
-
-// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
-// the function will be if it is inlined into a context where an argument
-// becomes an alloca.
-//
-unsigned InlineCostAnalyzer::FunctionInfo::
-         CountCodeReductionForAlloca(Value *V) {
-  if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
-  unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    Instruction *I = cast<Instruction>(*UI);
-    if (isa<LoadInst>(I) || isa<StoreInst>(I))
-      Reduction += InlineConstants::InstrCost;
-    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-      // If the GEP has variable indices, we won't be able to do much with it.
-      if (GEP->hasAllConstantIndices())
-        Reduction += CountCodeReductionForAlloca(GEP);
-    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
-      // Track pointer through bitcasts.
-      Reduction += CountCodeReductionForAlloca(BCI);
-    } else {
-      // If there is some other strange instruction, we're not going to be able
-      // to do much if we inline this.
-      return 0;
-    }
-  }
-
-  return Reduction;
-}
+using namespace llvm;
 
 /// callIsSmall - If a call is likely to lower to a single target instruction,
 /// or is otherwise deemed small return true.
@@ -160,6 +71,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
       // variables as volatile if they are live across a setjmp call, and they
       // probably won't do this in callers.
       if (const Function *F = CS.getCalledFunction()) {
+        // If a function is both internal and has a single use, then it is 
+        // extremely likely to get inlined in the future (it was probably 
+        // exposed by an interleaved devirtualization pass).
+        if (F->hasInternalLinkage() && F->hasOneUse())
+          ++NumInlineCandidates;
+        
         if (F->isDeclaration() && 
             (F->getName() == "setjmp" || F->getName() == "_setjmp"))
           callsSetJmp = true;
@@ -226,6 +143,86 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
   NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
 }
 
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+      // We will be able to eliminate all but one of the successors.
+      const TerminatorInst &TI = cast<TerminatorInst>(*U);
+      const unsigned NumSucc = TI.getNumSuccessors();
+      unsigned Instrs = 0;
+      for (unsigned I = 0; I != NumSucc; ++I)
+        Instrs += NumBBInsts[TI.getSuccessor(I)];
+      // We don't know which blocks will be eliminated, so use the average size.
+      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+    } else {
+      // Figure out if this instruction will be removed due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(*U);
+
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocaInst>(Inst))
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant) {
+        // We will get to remove this instruction...
+        Reduction += InlineConstants::InstrCost;
+
+        // And any other instructions that use it which become constants
+        // themselves.
+        Reduction += CountCodeReductionForConstant(&Inst);
+      }
+    }
+  }
+  return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *I = cast<Instruction>(*UI);
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      Reduction += InlineConstants::InstrCost;
+    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      // If the GEP has variable indices, we won't be able to do much with it.
+      if (GEP->hasAllConstantIndices())
+        Reduction += CountCodeReductionForAlloca(GEP);
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+      // Track pointer through bitcasts.
+      Reduction += CountCodeReductionForAlloca(BCI);
+    } else {
+      // If there is some other strange instruction, we're not going to be able
+      // to do much if we inline this.
+      return 0;
+    }
+  }
+
+  return Reduction;
+}
+
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
 void CodeMetrics::analyzeFunction(Function *F) {
@@ -245,76 +242,246 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
   if (Metrics.NumRets==1)
     --Metrics.NumInsts;
 
-  // Don't bother calculating argument weights if we are never going to inline
-  // the function anyway.
-  if (NeverInline())
-    return;
-
   // Check out all of the arguments to the function, figuring out how much
   // code can be eliminated if one of the arguments is a constant.
   ArgumentWeights.reserve(F->arg_size());
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I),
-                                      CountCodeReductionForAlloca(I)));
+    ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
+                                      Metrics.CountCodeReductionForAlloca(I)));
 }
 
 /// NeverInline - returns true if the function should never be inlined into
 /// any caller
-bool InlineCostAnalyzer::FunctionInfo::NeverInline()
-{
+bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
   return (Metrics.callsSetJmp || Metrics.isRecursive || 
           Metrics.containsIndirectBr);
+}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+         SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+  if (Callee->mayBeOverridden())
+    return 0;
+  
+  int Bonus = 0;
+  // If this function uses the coldcc calling convention, prefer not to
+  // specialize it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    Bonus -= InlineConstants::ColdccPenalty;
+  
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
 
+  unsigned ArgNo = 0;
+  unsigned i = 0;
+  for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
+       I != E; ++I, ++ArgNo)
+    if (ArgNo == SpecializedArgNos[i]) {
+      ++i;
+      Bonus += CountBonusForConstant(I);
+    }
+
+  // Calls usually take a long time, so they make the specialization gain 
+  // smaller.
+  Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+  return Bonus;
 }
-// getInlineCost - The heuristic used to determine if we should inline the
-// function call or not.
-//
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               SmallPtrSet<const Function*, 16> &NeverInline) {
-  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+
+// ConstantFunctionBonus - Figure out how much of a bonus we can get for
+// possibly devirtualizing a function. We'll subtract the size of the function
+// we may wish to inline from the indirect call bonus providing a limit on
+// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
+// inlining because we decide we don't want to give a bonus for
+// devirtualizing.
+int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+  
+  // This could just be NULL.
+  if (!C) return 0;
+  
+  Function *F = dyn_cast<Function>(C);
+  if (!F) return 0;
+  
+  int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
+  return (Bonus > 0) ? 0 : Bonus;
 }
 
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               Function *Callee,
-                               SmallPtrSet<const Function*, 16> &NeverInline) {
-  Instruction *TheCall = CS.getInstruction();
-  Function *Caller = TheCall->getParent()->getParent();
-  bool isDirectCall = CS.getCalledFunction() == Callee;
+// CountBonusForConstant - Figure out an approximation for how much per-call
+// performance boost we can expect if the specified value is constant.
+int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
+  unsigned Bonus = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Turning an indirect call into a direct call is a BIG win
+      if (CI->getCalledValue() == V)
+        Bonus += ConstantFunctionBonus(CallSite(CI), C);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      // Turning an indirect call into a direct call is a BIG win
+      if (II->getCalledValue() == V)
+        Bonus += ConstantFunctionBonus(CallSite(II), C);
+    }
+    // FIXME: Eliminating conditional branches and switches should
+    // also yield a per-call performance boost.
+    else {
+      // Figure out the bonuses that wll accrue due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(*U);
 
-  // Don't inline functions which can be redefined at link-time to mean
-  // something else.  Don't inline functions marked noinline or call sites
-  // marked noinline.
-  if (Callee->mayBeOverridden() ||
-      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
-      CS.isNoInline())
-    return llvm::InlineCost::getNever();
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocaInst>(Inst))
+        continue;
 
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant)
+        Bonus += CountBonusForConstant(&Inst);
+    }
+  }
+  
+  return Bonus;
+}
+
+int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+  
   // InlineCost - This value measures how good of an inline candidate this call
   // site is to inline.  A lower inline cost make is more likely for the call to
   // be inlined.  This value may go negative.
   //
   int InlineCost = 0;
 
+  // Compute any size reductions we can expect due to arguments being passed into
+  // the function.
+  //
+  unsigned ArgNo = 0;
+  CallSite::arg_iterator I = CS.arg_begin();
+  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+       FI != FE; ++I, ++FI, ++ArgNo) {
+
+    // If an alloca is passed in, inlining this function is likely to allow
+    // significant future optimization possibilities (like scalar promotion, and
+    // scalarization), so encourage the inlining of the function.
+    //
+    if (isa<AllocaInst>(I))
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
+
+    // If this is a constant being passed into the function, use the argument
+    // weights calculated for the callee to determine how much will be folded
+    // away with this information.
+    else if (isa<Constant>(I))
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;       
+  }
+  
+  // Each argument passed in has a cost at both the caller and the callee
+  // sides.  Measurements show that each argument costs about the same as an
+  // instruction.
+  InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+
+  // Now that we have considered all of the factors that make the call site more
+  // likely to be inlined, look at factors that make us not want to inline it.
+
+  // Calls usually take a long time, so they make the inlining gain smaller.
+  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+  // Look at the size of the callee. Each instruction counts as 5.
+  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+  
+  return InlineCost;
+}
+
+int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+    
+  bool isDirectCall = CS.getCalledFunction() == Callee;
+  Instruction *TheCall = CS.getInstruction();
+  int Bonus = 0;
+  
   // If there is only one call of the function, and it has internal linkage,
   // make it almost guaranteed to be inlined.
   //
   if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
-    InlineCost += InlineConstants::LastCallToStaticBonus;
-  
-  // If this function uses the coldcc calling convention, prefer not to inline
-  // it.
-  if (Callee->getCallingConv() == CallingConv::Cold)
-    InlineCost += InlineConstants::ColdccPenalty;
+    Bonus += InlineConstants::LastCallToStaticBonus;
   
   // If the instruction after the call, or if the normal destination of the
   // invoke is an unreachable instruction, the function is noreturn.  As such,
   // there is little point in inlining this.
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
     if (isa<UnreachableInst>(II->getNormalDest()->begin()))
-      InlineCost += InlineConstants::NoreturnPenalty;
+      Bonus += InlineConstants::NoreturnPenalty;
   } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
-    InlineCost += InlineConstants::NoreturnPenalty;
+    Bonus += InlineConstants::NoreturnPenalty;
+  
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    Bonus += InlineConstants::ColdccPenalty;
   
+  // Add to the inline quality for properties that make the call valuable to
+  // inline.  This includes factors that indicate that the result of inlining
+  // the function will be optimizable.  Currently this just looks at arguments
+  // passed into the function.
+  //
+  CallSite::arg_iterator I = CS.arg_begin();
+  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+       FI != FE; ++I, ++FI)
+    // Compute any constant bonus due to inlining we want to give here.
+    if (isa<Constant>(I))
+      Bonus += CountBonusForConstant(FI, cast<Constant>(I));
+      
+  return Bonus;
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               Function *Callee,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  Instruction *TheCall = CS.getInstruction();
+  Function *Caller = TheCall->getParent()->getParent();
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline or call sites
+  // marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+      CS.isNoInline())
+    return llvm::InlineCost::getNever();
+
   // Get information about the callee.
   FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
   
@@ -353,46 +520,45 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
       return InlineCost::getNever();
   }
 
-  // Add to the inline quality for properties that make the call valuable to
-  // inline.  This includes factors that indicate that the result of inlining
-  // the function will be optimizable.  Currently this just looks at arguments
-  // passed into the function.
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative due to the fact that bonuses
+  // are negative numbers.
   //
-  unsigned ArgNo = 0;
-  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-       I != E; ++I, ++ArgNo) {
-    // Each argument passed in has a cost at both the caller and the callee
-    // sides.  Measurements show that each argument costs about the same as an
-    // instruction.
-    InlineCost -= InlineConstants::InstrCost;
+  int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
+  return llvm::InlineCost::get(InlineCost);
+}
 
-    // If an alloca is passed in, inlining this function is likely to allow
-    // significant future optimization possibilities (like scalar promotion, and
-    // scalarization), so encourage the inlining of the function.
-    //
-    if (isa<AllocaInst>(I)) {
-      if (ArgNo < CalleeFI->ArgumentWeights.size())
-        InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
-
-      // If this is a constant being passed into the function, use the argument
-      // weights calculated for the callee to determine how much will be folded
-      // away with this information.
-    } else if (isa<Constant>(I)) {
-      if (ArgNo < CalleeFI->ArgumentWeights.size())
-        InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
-    }
-  }
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+                               SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+  // Don't specialize functions which can be redefined at link-time to mean
+  // something else.
+  if (Callee->mayBeOverridden())
+    return llvm::InlineCost::getNever();
   
-  // Now that we have considered all of the factors that make the call site more
-  // likely to be inlined, look at factors that make us not want to inline it.
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
 
-  // Calls usually take a long time, so they make the inlining gain smaller.
-  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+  int Cost = 0;
+  
+  // Look at the orginal size of the callee.  Each instruction counts as 5.
+  Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
 
-  // Look at the size of the callee. Each instruction counts as 5.
-  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+  // Offset that with the amount of code that can be constant-folded
+  // away with the given arguments replaced by constants.
+  for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+       ae = SpecializedArgNos.end(); an != ae; ++an)
+    Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
 
-  return llvm::InlineCost::get(InlineCost);
+  return llvm::InlineCost::get(Cost);
 }
 
 // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index dcbcac005a2f..3b385d26ba3c 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -51,7 +51,9 @@ namespace {
     }
   public:
     static char ID; // Pass identification, replacement for typeid
-    InstCount() : FunctionPass(ID) {}
+    InstCount() : FunctionPass(ID) {
+      initializeInstCountPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -65,7 +67,7 @@ namespace {
 
 char InstCount::ID = 0;
 INITIALIZE_PASS(InstCount, "instcount",
-                "Counts the various types of Instructions", false, true);
+                "Counts the various types of Instructions", false, true)
 
 FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 24cd3433a2ca..a2f9862383fd 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -8,179 +8,1267 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements routines for folding instructions into simpler forms
-// that do not require creating new instructions.  For example, this does
-// constant folding, and can handle identities like (X&0)->0.
+// that do not require creating new instructions.  This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x").  All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand,  "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
+                            const DominatorTree *, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
+                             const DominatorTree *, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    // Arguments and constants dominate all instructions.
+    return true;
+
+  // If we have a DominatorTree then do a precise test.
+  if (DT)
+    return DT->dominates(I, P);
+
+  // Otherwise, if the instruction is in the entry block, and is not an invoke,
+  // then it obviously dominates all phi nodes.
+  if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+      !isa<InvokeInst>(I))
+    return true;
+
+  return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)".  Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                          unsigned OpcToExpand, const TargetData *TD,
+                          const DominatorTree *DT, unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Check whether the expression has the form "(A op' B) op C".
+  if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+    if (Op0->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op C) op' (B op C)".
+      Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+      // Do "A op C" and "B op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+          if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == B && R == A)) {
+            ++NumExpand;
+            return LHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+                                       MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  // Check whether the expression has the form "A op (B op' C)".
+  if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+    if (Op1->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op B) op' (A op C)".
+      Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+      // Do "A op B" and "A op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+          if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == C && R == B)) {
+            ++NumExpand;
+            return RHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+                                       MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract.  For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                             unsigned OpcToExtract, const TargetData *TD,
+                             const DominatorTree *DT, unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+      !Op1 || Op1->getOpcode() != OpcodeToExtract)
+    return 0;
+
+  // The expression has the form "(A op' B) op (C op' D)".
+  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+  Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+  // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+  // commutative case, "(A op' B) op (C op' A)"?
+  if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+    Value *DD = A == C ? D : C;
+    // Form "A op' (B op DD)" if it simplifies completely.
+    // Does "B op DD" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+      // It does!  Return "A op' V" if it simplifies or is already available.
+      // If V equals B then "A op' V" is just the LHS.  If V equals DD then
+      // "A op' V" is just the RHS.
+      if (V == B || V == DD) {
+        ++NumFactor;
+        return V == B ? LHS : RHS;
+      }
+      // Otherwise return "A op' V" if it simplifies.
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+        ++NumFactor;
+        return W;
+      }
+    }
+  }
+
+  // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+  // commutative case, "(A op' B) op (B op' D)"?
+  if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+    Value *CC = B == D ? C : D;
+    // Form "(A op CC) op' B" if it simplifies completely..
+    // Does "A op CC" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op' B" if it simplifies or is already available.
+      // If V equals A then "V op' B" is just the LHS.  If V equals CC then
+      // "V op' B" is just the RHS.
+      if (V == A || V == CC) {
+        ++NumFactor;
+        return V == A ? LHS : RHS;
+      }
+      // Otherwise return "V op' B" if it simplifies.
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+        ++NumFactor;
+        return W;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations.  Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+                                       const TargetData *TD,
+                                       const DominatorTree *DT,
+                                       unsigned MaxRecurse) {
+  Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+  assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "B op C" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+      // It does!  Return "A op V" if it simplifies or is already available.
+      // If V equals B then "A op V" is just the LHS.
+      if (V == B) return LHS;
+      // Otherwise return "A op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "A op B" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op C" if it simplifies or is already available.
+      // If V equals B then "V op C" is just the RHS.
+      if (V == B) return RHS;
+      // Otherwise return "V op C" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // The remaining transforms require commutativity as well as associativity.
+  if (!Instruction::isCommutative(Opcode))
+    return 0;
+
+  // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op B" if it simplifies or is already available.
+      // If V equals A then "V op B" is just the LHS.
+      if (V == A) return LHS;
+      // Otherwise return "V op B" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+      // It does!  Return "B op V" if it simplifies or is already available.
+      // If V equals C then "B op V" is just the RHS.
+      if (V == C) return RHS;
+      // Otherwise return "B op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+                                    const TargetData *TD,
+                                    const DominatorTree *DT,
+                                    unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  SelectInst *SI;
+  if (isa<SelectInst>(LHS)) {
+    SI = cast<SelectInst>(LHS);
+  } else {
+    assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+    SI = cast<SelectInst>(RHS);
+  }
+
+  // Evaluate the BinOp on the true and false branches of the select.
+  Value *TV;
+  Value *FV;
+  if (SI == LHS) {
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+  } else {
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+  }
+
+  // If they simplified to the same value, then return the common value.
+  // If they both failed to simplify then return null.
+  if (TV == FV)
+    return TV;
+
+  // If one branch simplified to undef, return the other one.
+  if (TV && isa<UndefValue>(TV))
+    return FV;
+  if (FV && isa<UndefValue>(FV))
+    return TV;
+
+  // If applying the operation did not change the true and false select values,
+  // then the result of the binop is the select itself.
+  if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+    return SI;
+
+  // If one branch simplified and the other did not, and the simplified
+  // value is equal to the unsimplified one, return the simplified value.
+  // For example, select (cond, X, X & Z) & Z -> X & Z.
+  if ((FV && !TV) || (TV && !FV)) {
+    // Check that the simplified value has the form "X op Y" where "op" is the
+    // same as the original operation.
+    Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+    if (Simplified && Simplified->getOpcode() == Opcode) {
+      // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+      // We already know that "op" is the same as for the simplified value.  See
+      // if the operands match too.  If so, return the simplified value.
+      Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+      Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+      Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+      if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+          Simplified->getOperand(1) == UnsimplifiedRHS)
+        return Simplified;
+      if (Simplified->isCommutative() &&
+          Simplified->getOperand(1) == UnsimplifiedLHS &&
+          Simplified->getOperand(0) == UnsimplifiedRHS)
+        return Simplified;
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value.  Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+                                  Value *RHS, const TargetData *TD,
+                                  const DominatorTree *DT,
+                                  unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Make sure the select is on the LHS.
+  if (!isa<SelectInst>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+  SelectInst *SI = cast<SelectInst>(LHS);
+
+  // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+  // Does "cmp TV, RHS" simplify?
+  if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
+                                    MaxRecurse)) {
+    // It does!  Does "cmp FV, RHS" simplify?
+    if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
+                                      MaxRecurse)) {
+      // It does!  If they simplified to the same value, then use it as the
+      // result of the original comparison.
+      if (TCmp == FCmp)
+        return TCmp;
+      Value *Cond = SI->getCondition();
+      // If the false value simplified to false, then the result of the compare
+      // is equal to "Cond && TCmp".  This also catches the case when the false
+      // value simplified to false and the true value to true, returning "Cond".
+      if (match(FCmp, m_Zero()))
+        if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+          return V;
+      // If the true value simplified to true, then the result of the compare
+      // is equal to "Cond || FCmp".
+      if (match(TCmp, m_One()))
+        if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+          return V;
+      // Finally, if the false value simplified to true and the true value to
+      // false, then the result of the compare is equal to "!Cond".
+      if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+        if (Value *V =
+            SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+                            TD, DT, MaxRecurse))
+          return V;
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value.  If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+                                 const TargetData *TD, const DominatorTree *DT,
+                                 unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  PHINode *PI;
+  if (isa<PHINode>(LHS)) {
+    PI = cast<PHINode>(LHS);
+    // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(RHS, PI, DT))
+      return 0;
+  } else {
+    assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+    PI = cast<PHINode>(RHS);
+    // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(LHS, PI, DT))
+      return 0;
+  }
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = 0;
+  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PI->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = PI == LHS ?
+      SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return 0;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time.  If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Make sure the phi is on the LHS.
+  if (!isa<PHINode>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+  PHINode *PI = cast<PHINode>(LHS);
+
+  // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+  if (!ValueDominatesPHI(RHS, PI, DT))
+    return 0;
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = 0;
+  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PI->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return 0;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
 /// SimplifyAddInst - Given operands for an Add, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD) {
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
                                       Ops, 2, TD);
     }
-    
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X + undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // X + 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X + (Y - X) -> Y
+  // (Y - X) + X -> Y
+  // Eg: X + -X -> 0
+  Value *Y = 0;
+  if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+      match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+    return Y;
+
+  // X + ~X -> -1   since   ~X = -X-1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  /// i1 add -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // Threading Add over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A + select(cond, B, C)" means evaluating
+  // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A+B" and "A+C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0))
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+  // X - undef -> undef
+  // undef - X -> undef
+  if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+    return UndefValue::get(Op0->getType());
+
+  // X - 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X - X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // (X*2) - X -> X
+  // (X<<1) - X -> X
+  Value *X = 0;
+  if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+      match(Op0, m_Shl(m_Specific(Op1), m_One())))
+    return Op1;
+
+  // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+  // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+  Value *Y = 0, *Z = Op1;
+  if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+    // See if "V === Y - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "X + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "Y + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+  // For example, X - (X + 1) -> -1
+  X = Op0;
+  if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+    // See if "V === X - Y" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V - Z" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V - Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+  // For example, X - (X - Y) -> Y.
+  Z = Op0;
+  if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+    // See if "V === Z - X" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V + Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+
+  // Mul distributes over Sub.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // i1 sub -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Threading Sub over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A - select(cond, B, C)" means evaluating
+  // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A-B" and "A-C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
     // Canonicalize the constant to the RHS.
     std::swap(Op0, Op1);
   }
-  
-  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-    // X + undef -> undef
-    if (isa<UndefValue>(Op1C))
-      return Op1C;
-    
-    // X + 0 --> X
-    if (Op1C->isNullValue())
-      return Op0;
-  }
-  
-  // FIXME: Could pull several more out of instcombine.
+
+  // X * undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X * 0 -> 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X * 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  // (X / Y) * Y -> X if the division is exact.
+  Value *X = 0, *Y = 0;
+  if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
+      (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
+    BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
+    if (Div->isExact())
+      return X;
+  }
+
+  // i1 mul -> and.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const TargetData *TD, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  bool isSigned = Opcode == Instruction::SDiv;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef / X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 / X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X / 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be division by zero, hence it must be division by one.
+    return Op0;
+
+  // X / X -> 1
+  if (Op0 == Op1)
+    return ConstantInt::get(Op0->getType(), 1);
+
+  // (X * Y) / Y -> X if the multiplication does not overflow.
+  Value *X = 0, *Y = 0;
+  if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+    if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+    BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+    // If the Mul knows it does not overflow, then we are good to go.
+    if ((isSigned && Mul->hasNoSignedWrap()) ||
+        (!isSigned && Mul->hasNoUnsignedWrap()))
+      return X;
+    // If X has the form X = A / Y then X * Y cannot overflow.
+    if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+      if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+        return X;
+  }
+
+  // (X rem Y) / Y -> 0
+  if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+      (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+    return Constant::getNullValue(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+                               const DominatorTree *, unsigned) {
+  // undef / X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+                            const TargetData *TD, const DominatorTree *DT,
+                            unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  // 0 shift by X -> 0
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X shift by 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X shift by undef -> undef because it may shift by the bitwidth.
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // Shifting by the bitwidth or more is undefined.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+    if (CI->getValue().getLimitedValue() >=
+        Op0->getType()->getScalarSizeInBits())
+      return UndefValue::get(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // undef << X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X >> A) << A -> X
+  Value *X;
+  if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
+      cast<PossiblyExactOperator>(Op0)->isExact())
+    return X;
+  return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // undef >>l X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+      cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+    return X;
+
   return 0;
 }
 
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // all ones >>a X -> all ones
+  if (match(Op0, m_AllOnes()))
+    return Op0;
+
+  // undef >>a X -> all ones
+  if (match(Op0, m_Undef()))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+      cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+    return X;
+
+  return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
 /// SimplifyAndInst - Given operands for an And, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD) {
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
                                       Ops, 2, TD);
     }
-  
+
     // Canonicalize the constant to the RHS.
     std::swap(Op0, Op1);
   }
-  
+
   // X & undef -> 0
-  if (isa<UndefValue>(Op1))
+  if (match(Op1, m_Undef()))
     return Constant::getNullValue(Op0->getType());
-  
+
   // X & X = X
   if (Op0 == Op1)
     return Op0;
-  
-  // X & <0,0> = <0,0>
-  if (isa<ConstantAggregateZero>(Op1))
+
+  // X & 0 = 0
+  if (match(Op1, m_Zero()))
     return Op1;
-  
-  // X & <-1,-1> = X
-  if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
-    if (CP->isAllOnesValue())
-      return Op0;
-  
-  if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
-    // X & 0 = 0
-    if (Op1CI->isZero())
-      return Op1CI;
-    // X & -1 = X
-    if (Op1CI->isAllOnesValue())
-      return Op0;
-  }
-  
+
+  // X & -1 = X
+  if (match(Op1, m_AllOnes()))
+    return Op0;
+
   // A & ~A  =  ~A & A  =  0
-  Value *A, *B;
-  if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
-      (match(Op1, m_Not(m_Value(A))) && A == Op0))
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
     return Constant::getNullValue(Op0->getType());
-  
+
   // (A | ?) & A = A
+  Value *A = 0, *B = 0;
   if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
       (A == Op1 || B == Op1))
     return Op1;
-  
+
   // A & (A | ?) = A
   if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
       (A == Op0 || B == Op0))
     return Op0;
-  
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
   return 0;
 }
 
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
 /// SimplifyOrInst - Given operands for an Or, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD) {
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
                                       Ops, 2, TD);
     }
-    
+
     // Canonicalize the constant to the RHS.
     std::swap(Op0, Op1);
   }
-  
+
   // X | undef -> -1
-  if (isa<UndefValue>(Op1))
+  if (match(Op1, m_Undef()))
     return Constant::getAllOnesValue(Op0->getType());
-  
+
   // X | X = X
   if (Op0 == Op1)
     return Op0;
 
-  // X | <0,0> = X
-  if (isa<ConstantAggregateZero>(Op1))
+  // X | 0 = X
+  if (match(Op1, m_Zero()))
     return Op0;
-  
-  // X | <-1,-1> = <-1,-1>
-  if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
-    if (CP->isAllOnesValue())            
-      return Op1;
-  
-  if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
-    // X | 0 = X
-    if (Op1CI->isZero())
-      return Op0;
-    // X | -1 = -1
-    if (Op1CI->isAllOnesValue())
-      return Op1CI;
-  }
-  
+
+  // X | -1 = -1
+  if (match(Op1, m_AllOnes()))
+    return Op1;
+
   // A | ~A  =  ~A | A  =  -1
-  Value *A, *B;
-  if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
-      (match(Op1, m_Not(m_Value(A))) && A == Op0))
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
     return Constant::getAllOnesValue(Op0->getType());
-  
+
   // (A & ?) | A = A
+  Value *A = 0, *B = 0;
   if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
       (A == Op1 || B == Op1))
     return Op1;
-  
+
   // A | (A & ?) = A
   if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
       (A == Op0 || B == Op0))
     return Op0;
-  
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
   return 0;
 }
 
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                            const DominatorTree *DT) {
+  return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // A ^ undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // A ^ 0 = A
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // A ^ A = 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // A ^ ~A  =  ~A ^ A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // Threading Xor over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+  // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A^B" and "A^C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+}
 
 static const Type *GetCompareTy(Value *Op) {
   return CmpInst::makeCmpResultType(Op->getType());
 }
 
-
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD) {
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
-  
+
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
       return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
@@ -189,70 +1277,400 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     std::swap(LHS, RHS);
     Pred = CmpInst::getSwappedPredicate(Pred);
   }
-  
-  // ITy - This is the return type of the compare we're considering.
-  const Type *ITy = GetCompareTy(LHS);
-  
+
+  const Type *ITy = GetCompareTy(LHS); // The return type.
+  const Type *OpTy = LHS->getType();   // The operand type.
+
   // icmp X, X -> true/false
   // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
   // because X could be 0.
   if (LHS == RHS || isa<UndefValue>(RHS))
     return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-  
-  // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
-  // addresses never equal each other!  We already know that Op0 != Op1.
-  if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) || 
-       isa<ConstantPointerNull>(LHS)) &&
-      (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || 
-       isa<ConstantPointerNull>(RHS)))
-    return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
-  
-  // See if we are doing a comparison with a constant.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-    // If we have an icmp le or icmp ge instruction, turn it into the
-    // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
-    // them being folded in the code below.
+
+  // Special case logic when the operands have i1 type.
+  if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
+       cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
     switch (Pred) {
     default: break;
+    case ICmpInst::ICMP_EQ:
+      // X == 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_NE:
+      // X != 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGT:
+      // X >u 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGE:
+      // X >=u 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLT:
+      // X <s 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLE:
+      // X <=s -1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    }
+  }
+
+  // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
+  // different addresses, and what's more the address of a stack variable is
+  // never null or equal to the address of a global.  Note that generalizing
+  // to the case where LHS is a global variable address or null is pointless,
+  // since if both LHS and RHS are constants then we already constant folded
+  // the compare, and if only one of them is then we moved it to RHS already.
+  if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+                               isa<ConstantPointerNull>(RHS)))
+    // We already know that LHS != LHS.
+    return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+  // If we are comparing with zero then try hard since this is a common case.
+  if (match(RHS, m_Zero())) {
+    bool LHSKnownNonNegative, LHSKnownNegative;
+    switch (Pred) {
+    default:
+      assert(false && "Unknown ICmp predicate!");
+    case ICmpInst::ICMP_ULT:
+      return ConstantInt::getFalse(LHS->getContext());
+    case ICmpInst::ICMP_UGE:
+      return ConstantInt::getTrue(LHS->getContext());
+    case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULE:
-      if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
-        return ConstantInt::getTrue(CI->getContext());
+      if (isKnownNonZero(LHS, TD))
+        return ConstantInt::getFalse(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+      if (isKnownNonZero(LHS, TD))
+        return ConstantInt::getTrue(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SLT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getTrue(LHS->getContext());
+      if (LHSKnownNonNegative)
+        return ConstantInt::getFalse(LHS->getContext());
       break;
     case ICmpInst::ICMP_SLE:
-      if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
-        return ConstantInt::getTrue(CI->getContext());
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getTrue(LHS->getContext());
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+        return ConstantInt::getFalse(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getFalse(LHS->getContext());
+      if (LHSKnownNonNegative)
+        return ConstantInt::getTrue(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SGT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getFalse(LHS->getContext());
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+        return ConstantInt::getTrue(LHS->getContext());
+      break;
+    }
+  }
+
+  // See if we are doing a comparison with a constant integer.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    switch (Pred) {
+    default: break;
+    case ICmpInst::ICMP_UGT:
+      if (CI->isMaxValue(false))                 // A >u MAX -> FALSE
+        return ConstantInt::getFalse(CI->getContext());
       break;
     case ICmpInst::ICMP_UGE:
       if (CI->isMinValue(false))                 // A >=u MIN -> TRUE
         return ConstantInt::getTrue(CI->getContext());
       break;
+    case ICmpInst::ICMP_ULT:
+      if (CI->isMinValue(false))                 // A <u MIN -> FALSE
+        return ConstantInt::getFalse(CI->getContext());
+      break;
+    case ICmpInst::ICMP_ULE:
+      if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE
+        return ConstantInt::getTrue(CI->getContext());
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (CI->isMaxValue(true))                  // A >s MAX -> FALSE
+        return ConstantInt::getFalse(CI->getContext());
+      break;
     case ICmpInst::ICMP_SGE:
       if (CI->isMinValue(true))                  // A >=s MIN -> TRUE
         return ConstantInt::getTrue(CI->getContext());
       break;
+    case ICmpInst::ICMP_SLT:
+      if (CI->isMinValue(true))                  // A <s MIN -> FALSE
+        return ConstantInt::getFalse(CI->getContext());
+      break;
+    case ICmpInst::ICMP_SLE:
+      if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE
+        return ConstantInt::getTrue(CI->getContext());
+      break;
+    }
+  }
+
+  // Compare of cast, for example (zext X) != 0 -> X != 0
+  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+    Instruction *LI = cast<CastInst>(LHS);
+    Value *SrcOp = LI->getOperand(0);
+    const Type *SrcTy = SrcOp->getType();
+    const Type *DstTy = LI->getType();
+
+    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+    // if the integer type is the same size as the pointer type.
+    if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
+        TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+        // Transfer the cast to the constant.
+        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
+                                        TD, DT, MaxRecurse-1))
+          return V;
+      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+        if (RI->getOperand(0)->getType() == SrcTy)
+          // Compare without the cast.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          TD, DT, MaxRecurse-1))
+            return V;
+      }
+    }
+
+    if (isa<ZExtInst>(LHS)) {
+      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+      // same type.
+      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that signed predicates become unsigned.
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, RI->getOperand(0), TD, DT,
+                                          MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two zero-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, Trunc, TD, DT, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+        // there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default:
+            assert(false && "Unknown ICmp predicate!");
+          // LHS <u RHS.
+          case ICmpInst::ICMP_EQ:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            return ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
+          // is non-negative then LHS <s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+          }
+        }
+      }
+    }
+
+    if (isa<SExtInst>(LHS)) {
+      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+      // same type.
+      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that the predicate does not change.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          TD, DT, MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two sign-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+                                          MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are all equal, while RHS has varying
+        // bits there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default:
+            assert(false && "Unknown ICmp predicate!");
+          case ICmpInst::ICMP_EQ:
+            return ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_NE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
+          // LHS >s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+
+          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
+          // LHS >u RHS.
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            // Comparison is true iff the LHS <s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              TD, DT, MaxRecurse-1))
+                return V;
+            break;
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            // Comparison is true iff the LHS >=s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              TD, DT, MaxRecurse-1))
+                return V;
+            break;
+          }
+        }
+      }
     }
   }
-  
-  
+
+  // Special logic for binary operators.
+  BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+  if (MaxRecurse && (LBO || RBO)) {
+    // Analyze the case when either LHS or RHS is an add instruction.
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+    bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+    if (LBO && LBO->getOpcode() == Instruction::Add) {
+      A = LBO->getOperand(0); B = LBO->getOperand(1);
+      NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+    }
+    if (RBO && RBO->getOpcode() == Instruction::Add) {
+      C = RBO->getOperand(0); D = RBO->getOperand(1);
+      NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+    }
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+                                      Constant::getNullValue(RHS->getType()),
+                                      TD, DT, MaxRecurse-1))
+        return V;
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred,
+                                      Constant::getNullValue(LHS->getType()),
+                                      C == LHS ? D : C, TD, DT, MaxRecurse-1))
+        return V;
+
+    // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoLHSWrapProblem && NoRHSWrapProblem) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y = (A == C || A == D) ? B : A;
+      Value *Z = (C == A || C == B) ? D : C;
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+        return V;
+    }
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
   return 0;
 }
 
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
 /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD) {
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
       return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
-   
+
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
     Pred = CmpInst::getSwappedPredicate(Pred);
   }
-  
+
   // Fold trivial predicates.
   if (Pred == FCmpInst::FCMP_FALSE)
     return ConstantInt::get(GetCompareTy(LHS), 0);
@@ -269,7 +1687,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     if (CmpInst::isFalseWhenEqual(Pred))
       return ConstantInt::get(GetCompareTy(LHS), 0);
   }
-  
+
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
     // If the constant is a nan, see if we can fold the comparison based on it.
@@ -310,23 +1728,40 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       }
     }
   }
-  
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
   return 0;
 }
 
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
 /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
 /// the result.  If not, this returns null.
 Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
-                                const TargetData *TD) {
+                                const TargetData *TD, const DominatorTree *) {
   // select true, X, Y  -> X
   // select false, X, Y -> Y
   if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
     return CB->getZExtValue() ? TrueVal : FalseVal;
-  
+
   // select C, X, X -> X
   if (TrueVal == FalseVal)
     return TrueVal;
-  
+
   if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
     return FalseVal;
   if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
@@ -336,98 +1771,249 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
       return TrueVal;
     return FalseVal;
   }
-  
-  
-  
+
   return 0;
 }
 
-
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
 Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
-                             const TargetData *TD) {
+                             const TargetData *TD, const DominatorTree *) {
+  // The type of the GEP pointer operand.
+  const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+
   // getelementptr P -> P.
   if (NumOps == 1)
     return Ops[0];
 
-  // TODO.
-  //if (isa<UndefValue>(Ops[0]))
-  //  return UndefValue::get(GEP.getType());
+  if (isa<UndefValue>(Ops[0])) {
+    // Compute the (pointer) type returned by the GEP instruction.
+    const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
+                                                             NumOps-1);
+    const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+    return UndefValue::get(GEPTy);
+  }
 
-  // getelementptr P, 0 -> P.
-  if (NumOps == 2)
+  if (NumOps == 2) {
+    // getelementptr P, 0 -> P.
     if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
       if (C->isZero())
         return Ops[0];
-  
+    // getelementptr P, N -> P if P points to a type of zero size.
+    if (TD) {
+      const Type *Ty = PtrTy->getElementType();
+      if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+        return Ops[0];
+    }
+  }
+
   // Check to see if this is constant foldable.
   for (unsigned i = 0; i != NumOps; ++i)
     if (!isa<Constant>(Ops[i]))
       return 0;
-  
+
   return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
                                         (Constant *const*)Ops+1, NumOps-1);
 }
 
+/// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+  // If all of the PHI's incoming values are the same then replace the PHI node
+  // with the common value.
+  Value *CommonValue = 0;
+  bool HasUndefInput = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PN->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PN) continue;
+    if (isa<UndefValue>(Incoming)) {
+      // Remember that we saw an undef value, but otherwise ignore them.
+      HasUndefInput = true;
+      continue;
+    }
+    if (CommonValue && Incoming != CommonValue)
+      return 0;  // Not the same, bail out.
+    CommonValue = Incoming;
+  }
+
+  // If CommonValue is null then all of the incoming values were either undef or
+  // equal to the phi node itself.
+  if (!CommonValue)
+    return UndefValue::get(PN->getType());
+
+  // If we have a PHI node like phi(X, undef, X), where X is defined by some
+  // instruction, we cannot return X as the result of the PHI node unless it
+  // dominates the PHI block.
+  if (HasUndefInput)
+    return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+
+  return CommonValue;
+}
+
 
 //=== Helper functions for higher up the class hierarchy.
 
 /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, 
-                           const TargetData *TD) {
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                            const TargetData *TD, const DominatorTree *DT,
+                            unsigned MaxRecurse) {
   switch (Opcode) {
-  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD);
-  case Instruction::Or:  return SimplifyOrInst(LHS, RHS, TD);
+  case Instruction::Add:
+    return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::Sub:
+    return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Shl:
+    return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::LShr:
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+  case Instruction::AShr:
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
   default:
     if (Constant *CLHS = dyn_cast<Constant>(LHS))
       if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
         Constant *COps[] = {CLHS, CRHS};
         return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
       }
+
+    // If the operation is associative, try some generic simplifications.
+    if (Instruction::isAssociative(Opcode))
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+                                              MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a select instruction, check whether
+    // operating on either branch of the select always yields the same value.
+    if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+                                           MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a phi instruction, check whether
+    // operating on all incoming values of the phi always yields the same value.
+    if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+        return V;
+
     return 0;
   }
 }
 
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                           const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+}
+
 /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
 /// fold the result.
-Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const TargetData *TD) {
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
-    return SimplifyICmpInst(Predicate, LHS, RHS, TD);
-  return SimplifyFCmpInst(Predicate, LHS, RHS, TD);
+    return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
 }
 
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
 
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+                                 const DominatorTree *DT) {
+  Value *Result;
+
   switch (I->getOpcode()) {
   default:
-    return ConstantFoldInstruction(I, TD);
+    Result = ConstantFoldInstruction(I, TD);
+    break;
   case Instruction::Add:
-    return SimplifyAddInst(I->getOperand(0), I->getOperand(1),
-                           cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                           cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD);
+    Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::Sub:
+    Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::Mul:
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::SDiv:
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::UDiv:
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FDiv:
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Shl:
+    Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::LShr:
+    Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(),
+                              TD, DT);
+    break;
+  case Instruction::AShr:
+    Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(),
+                              TD, DT);
+    break;
   case Instruction::And:
-    return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD);
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
   case Instruction::Or:
-    return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD);
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Xor:
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
   case Instruction::ICmp:
-    return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
-                            I->getOperand(0), I->getOperand(1), TD);
+    Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
   case Instruction::FCmp:
-    return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
-                            I->getOperand(0), I->getOperand(1), TD);
+    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
   case Instruction::Select:
-    return SimplifySelectInst(I->getOperand(0), I->getOperand(1),
-                              I->getOperand(2), TD);
+    Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+                                I->getOperand(2), TD, DT);
+    break;
   case Instruction::GetElementPtr: {
     SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
-    return SimplifyGEPInst(&Ops[0], Ops.size(), TD);
+    Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+    break;
   }
+  case Instruction::PHI:
+    Result = SimplifyPHINode(cast<PHINode>(I), DT);
+    break;
   }
+
+  /// If called on unreachable code, the above logic may report that the
+  /// instruction simplified to itself.  Make life easier for users by
+  /// detecting that case here, returning a safe value instead.
+  return Result == I ? UndefValue::get(I->getType()) : Result;
 }
 
 /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
@@ -437,15 +2023,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
 /// simplifies and deletes scalar operations, it does not change the CFG.
 ///
 void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
-                                     const TargetData *TD) {
+                                     const TargetData *TD,
+                                     const DominatorTree *DT) {
   assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
-  
+
   // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
   // we can know if it gets deleted out from under us or replaced in a
   // recursive simplification.
   WeakVH FromHandle(From);
   WeakVH ToHandle(To);
-  
+
   while (!From->use_empty()) {
     // Update the instruction to use the new value.
     Use &TheUse = From->use_begin().getUse();
@@ -460,27 +2047,26 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
       // Sanity check to make sure 'User' doesn't dangle across
       // SimplifyInstruction.
       AssertingVH<> UserHandle(User);
-    
-      SimplifiedVal = SimplifyInstruction(User, TD);
+
+      SimplifiedVal = SimplifyInstruction(User, TD, DT);
       if (SimplifiedVal == 0) continue;
     }
-    
+
     // Recursively simplify this user to the new value.
-    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD);
+    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
     From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
     To = ToHandle;
-      
+
     assert(ToHandle && "To value deleted by recursive simplification?");
-      
+
     // If the recursive simplification ended up revisiting and deleting
     // 'From' then we're done.
     if (From == 0)
       return;
   }
-  
+
   // If 'From' has value handles referring to it, do a real RAUW to update them.
   From->replaceAllUsesWith(To);
-  
+
   From->eraseFromParent();
 }
-
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index 1c9e14884316..2e259b147b8b 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
 
 char IntervalPartition::ID = 0;
 INITIALIZE_PASS(IntervalPartition, "intervals",
-                "Interval Partition Construction", true, true);
+                "Interval Partition Construction", true, true)
 
 //===----------------------------------------------------------------------===//
 // IntervalPartition Implementation
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index e32dbc444713..9e7da6ce2de9 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -14,8 +14,10 @@
 
 #define DEBUG_TYPE "lazy-value-info"
 #include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CFG.h"
@@ -26,11 +28,14 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include <map>
+#include <set>
+#include <stack>
 using namespace llvm;
 
 char LazyValueInfo::ID = 0;
 INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
-                "Lazy Value Information Analysis", false, true);
+                "Lazy Value Information Analysis", false, true)
 
 namespace llvm {
   FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
@@ -50,18 +55,18 @@ namespace llvm {
 namespace {
 class LVILatticeVal {
   enum LatticeValueTy {
-    /// undefined - This LLVM Value has no known value yet.
+    /// undefined - This Value has no known value yet.
     undefined,
     
-    /// constant - This LLVM Value has a specific constant value.
+    /// constant - This Value has a specific constant value.
     constant,
-    /// notconstant - This LLVM value is known to not have the specified value.
+    /// notconstant - This Value is known to not have the specified value.
     notconstant,
     
-    /// constantrange
+    /// constantrange - The Value falls within this range.
     constantrange,
     
-    /// overdefined - This instruction is not known to be constant, and we know
+    /// overdefined - This value is not known to be constant, and we know that
     /// it has a value.
     overdefined
   };
@@ -77,17 +82,13 @@ public:
 
   static LVILatticeVal get(Constant *C) {
     LVILatticeVal Res;
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1));
-    else if (!isa<UndefValue>(C))
+    if (!isa<UndefValue>(C))
       Res.markConstant(C);
     return Res;
   }
   static LVILatticeVal getNot(Constant *C) {
     LVILatticeVal Res;
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
-    else
+    if (!isa<UndefValue>(C))
       Res.markNotConstant(C);
     return Res;
   }
@@ -129,32 +130,34 @@ public:
 
   /// markConstant - Return true if this is a change in status.
   bool markConstant(Constant *V) {
-    if (isConstant()) {
-      assert(getConstant() == V && "Marking constant with different value");
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()));
+    if (isa<UndefValue>(V))
       return false;
-    }
-    
+
+    assert((!isConstant() || getConstant() == V) &&
+           "Marking constant with different value");
     assert(isUndefined());
     Tag = constant;
-    assert(V && "Marking constant with NULL");
     Val = V;
     return true;
   }
   
   /// markNotConstant - Return true if this is a change in status.
   bool markNotConstant(Constant *V) {
-    if (isNotConstant()) {
-      assert(getNotConstant() == V && "Marking !constant with different value");
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+    if (isa<UndefValue>(V))
       return false;
-    }
-    
-    if (isConstant())
-      assert(getConstant() != V && "Marking not constant with different value");
-    else
-      assert(isUndefined());
 
+    assert((!isConstant() || getConstant() != V) &&
+           "Marking constant !constant with same value");
+    assert((!isNotConstant() || getNotConstant() == V) &&
+           "Marking !constant with different value");
+    assert(isUndefined() || isConstant());
     Tag = notconstant;
-    assert(V && "Marking constant with NULL");
     Val = V;
     return true;
   }
@@ -185,63 +188,81 @@ public:
     if (RHS.isUndefined() || isOverdefined()) return false;
     if (RHS.isOverdefined()) return markOverdefined();
 
-    if (RHS.isNotConstant()) {
-      if (isNotConstant()) {
-        if (getNotConstant() != RHS.getNotConstant() ||
-            isa<ConstantExpr>(getNotConstant()) ||
-            isa<ConstantExpr>(RHS.getNotConstant()))
-          return markOverdefined();
-        return false;
-      } else if (isConstant()) {
-        if (getConstant() == RHS.getNotConstant() ||
-            isa<ConstantExpr>(RHS.getNotConstant()) ||
-            isa<ConstantExpr>(getConstant()))
+    if (isUndefined()) {
+      Tag = RHS.Tag;
+      Val = RHS.Val;
+      Range = RHS.Range;
+      return true;
+    }
+
+    if (isConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
           return markOverdefined();
-        return markNotConstant(RHS.getNotConstant());
-      } else if (isConstantRange()) {
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        // FIXME: use TargetData for smarter constant folding.
+        if (ConstantInt *Res = dyn_cast<ConstantInt>(
+                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+                                                getConstant(),
+                                                RHS.getNotConstant())))
+          if (Res->isOne())
+            return markNotConstant(RHS.getNotConstant());
+
         return markOverdefined();
       }
-      
-      assert(isUndefined() && "Unexpected lattice");
-      return markNotConstant(RHS.getNotConstant());
+
+      // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+      // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+      // a function. The correct result is to pick up RHS.
+
+      return markOverdefined();
     }
-    
-    if (RHS.isConstantRange()) {
-      if (isConstantRange()) {
-        ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
-        if (NewR.isFullSet())
+
+    if (isNotConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
           return markOverdefined();
-        else
-          return markConstantRange(NewR);
-      } else if (!isUndefined()) {
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        // FIXME: use TargetData for smarter constant folding.
+        if (ConstantInt *Res = dyn_cast<ConstantInt>(
+                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+                                                getNotConstant(),
+                                                RHS.getConstant())))
+          if (Res->isOne())
+            return false;
+
         return markOverdefined();
       }
-      
-      assert(isUndefined() && "Unexpected lattice");
-      return markConstantRange(RHS.getConstantRange());
-    }
-    
-    // RHS must be a constant, we must be undef, constant, or notconstant.
-    assert(!isConstantRange() &&
-           "Constant and ConstantRange cannot be merged.");
-    
-    if (isUndefined())
-      return markConstant(RHS.getConstant());
-    
-    if (isConstant()) {
-      if (getConstant() != RHS.getConstant())
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return false;
         return markOverdefined();
-      return false;
+      }
+
+      return markOverdefined();
     }
 
-    // If we are known "!=4" and RHS is "==5", stay at "!=4".
-    if (getNotConstant() == RHS.getConstant() ||
-        isa<ConstantExpr>(getNotConstant()) ||
-        isa<ConstantExpr>(RHS.getConstant()))
+    assert(isConstantRange() && "New LVILattice type?");
+    if (!RHS.isConstantRange())
       return markOverdefined();
-    return false;
+
+    ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+    if (NewR.isFullSet())
+      return markOverdefined();
+    return markConstantRange(NewR);
   }
-  
 };
   
 } // end anonymous namespace.
@@ -267,49 +288,136 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
 //===----------------------------------------------------------------------===//
 
 namespace {
+  /// LVIValueHandle - A callback value handle update the cache when
+  /// values are erased.
+  class LazyValueInfoCache;
+  struct LVIValueHandle : public CallbackVH {
+    LazyValueInfoCache *Parent;
+      
+    LVIValueHandle(Value *V, LazyValueInfoCache *P)
+      : CallbackVH(V), Parent(P) { }
+      
+    void deleted();
+    void allUsesReplacedWith(Value *V) {
+      deleted();
+    }
+  };
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LVIValueHandle> {
+    typedef DenseMapInfo<Value*> PointerInfo;
+    static inline LVIValueHandle getEmptyKey() {
+      return LVIValueHandle(PointerInfo::getEmptyKey(),
+                            static_cast<LazyValueInfoCache*>(0));
+    }
+    static inline LVIValueHandle getTombstoneKey() {
+      return LVIValueHandle(PointerInfo::getTombstoneKey(),
+                            static_cast<LazyValueInfoCache*>(0));
+    }
+    static unsigned getHashValue(const LVIValueHandle &Val) {
+      return PointerInfo::getHashValue(Val);
+    }
+    static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
+      return LHS == RHS;
+    }
+  };
+  
+  template<>
+  struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
+    typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
+    typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
+    typedef DenseMapInfo<Value*> BPointerInfo;
+    static inline PairTy getEmptyKey() {
+      return std::make_pair(APointerInfo::getEmptyKey(),
+                            BPointerInfo::getEmptyKey());
+    }
+    static inline PairTy getTombstoneKey() {
+      return std::make_pair(APointerInfo::getTombstoneKey(), 
+                            BPointerInfo::getTombstoneKey());
+    }
+    static unsigned getHashValue( const PairTy &Val) {
+      return APointerInfo::getHashValue(Val.first) ^ 
+             BPointerInfo::getHashValue(Val.second);
+    }
+    static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
+      return APointerInfo::isEqual(LHS.first, RHS.first) &&
+             BPointerInfo::isEqual(LHS.second, RHS.second);
+    }
+  };
+}
+
+namespace { 
   /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
   /// maintains information about queries across the clients' queries.
   class LazyValueInfoCache {
-  public:
-    /// BlockCacheEntryTy - This is a computed lattice value at the end of the
-    /// specified basic block for a Value* that depends on context.
-    typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy;
-    
     /// ValueCacheEntryTy - This is all of the cached block information for
     /// exactly one Value*.  The entries are sorted by the BasicBlock* of the
     /// entries, allowing us to do a lookup with a binary search.
     typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
 
-  private:
-     /// LVIValueHandle - A callback value handle update the cache when
-     /// values are erased.
-    struct LVIValueHandle : public CallbackVH {
+    /// ValueCache - This is all of the cached information for all values,
+    /// mapped from Value* to key information.
+    DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+    
+    /// OverDefinedCache - This tracks, on a per-block basis, the set of 
+    /// values that are over-defined at the end of that block.  This is required
+    /// for cache updating.
+    typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+    DenseSet<OverDefinedPairTy> OverDefinedCache;
+    
+    /// BlockValueStack - This stack holds the state of the value solver
+    /// during a query.  It basically emulates the callstack of the naive
+    /// recursive value lookup process.
+    std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+    
+    friend struct LVIValueHandle;
+    
+    /// OverDefinedCacheUpdater - A helper object that ensures that the
+    /// OverDefinedCache is updated whenever solveBlockValue returns.
+    struct OverDefinedCacheUpdater {
       LazyValueInfoCache *Parent;
+      Value *Val;
+      BasicBlock *BB;
+      LVILatticeVal &BBLV;
       
-      LVIValueHandle(Value *V, LazyValueInfoCache *P)
-        : CallbackVH(V), Parent(P) { }
+      OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+                       LazyValueInfoCache *P)
+        : Parent(P), Val(V), BB(B), BBLV(LV) { }
       
-      void deleted();
-      void allUsesReplacedWith(Value* V) {
-        deleted();
-      }
-
-      LVIValueHandle &operator=(Value *V) {
-        return *this = LVIValueHandle(V, Parent);
+      bool markResult(bool changed) { 
+        if (changed && BBLV.isOverdefined())
+          Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+        return changed;
       }
     };
+    
 
-    /// ValueCache - This is all of the cached information for all values,
-    /// mapped from Value* to key information.
-    std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+    LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+    bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+                      LVILatticeVal &Result);
+    bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+    // These methods process one work item and may add more. A false value
+    // returned means that the work item was not completely processed and must
+    // be revisited after going through the new items.
+    bool solveBlockValue(Value *Val, BasicBlock *BB);
+    bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                 Value *Val, BasicBlock *BB);
+    bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                PHINode *PN, BasicBlock *BB);
+    bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                      Instruction *BBI, BasicBlock *BB);
+
+    void solve();
     
-    /// OverDefinedCache - This tracks, on a per-block basis, the set of 
-    /// values that are over-defined at the end of that block.  This is required
-    /// for cache updating.
-    std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache;
+    ValueCacheEntryTy &lookup(Value *V) {
+      return ValueCache[LVIValueHandle(V, this)];
+    }
 
   public:
-    
     /// getValueInBlock - This is the query interface to determine the lattice
     /// value for the specified Value* at the end of the specified block.
     LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
@@ -335,199 +443,112 @@ namespace {
   };
 } // end anonymous namespace
 
-//===----------------------------------------------------------------------===//
-//                              LVIQuery Impl
-//===----------------------------------------------------------------------===//
-
-namespace {
-  /// LVIQuery - This is a transient object that exists while a query is
-  /// being performed.
-  ///
-  /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids
-  /// reallocation of the densemap on every query.
-  class LVIQuery {
-    typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy;
-    typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy;
-    
-    /// This is the current value being queried for.
-    Value *Val;
-    
-    /// This is a pointer to the owning cache, for recursive queries.
-    LazyValueInfoCache &Parent;
-
-    /// This is all of the cached information about this value.
-    ValueCacheEntryTy &Cache;
-    
-    /// This tracks, for each block, what values are overdefined.
-    std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache;
-    
-    ///  NewBlocks - This is a mapping of the new BasicBlocks which have been
-    /// added to cache but that are not in sorted order.
-    DenseSet<BasicBlock*> NewBlockInfo;
-    
-  public:
-    
-    LVIQuery(Value *V, LazyValueInfoCache &P,
-             ValueCacheEntryTy &VC,
-             std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC)
-      : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) {
-    }
-
-    ~LVIQuery() {
-      // When the query is done, insert the newly discovered facts into the
-      // cache in sorted order.
-      if (NewBlockInfo.empty()) return;
-      
-      for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(),
-           E = NewBlockInfo.end(); I != E; ++I) {
-        if (Cache[*I].isOverdefined())
-          OverDefinedCache.insert(std::make_pair(*I, Val));
-      }
-    }
-
-    LVILatticeVal getBlockValue(BasicBlock *BB);
-    LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB);
-
-  private:
-    LVILatticeVal getCachedEntryForBlock(BasicBlock *BB);
-  };
-} // end anonymous namespace
-
-void LazyValueInfoCache::LVIValueHandle::deleted() {
-  for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+void LVIValueHandle::deleted() {
+  typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+  
+  SmallVector<OverDefinedPairTy, 4> ToErase;
+  for (DenseSet<OverDefinedPairTy>::iterator 
        I = Parent->OverDefinedCache.begin(),
        E = Parent->OverDefinedCache.end();
-       I != E; ) {
-    std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
-    ++I;
-    if (tmp->second == getValPtr())
-      Parent->OverDefinedCache.erase(tmp);
+       I != E; ++I) {
+    if (I->second == getValPtr())
+      ToErase.push_back(*I);
   }
   
+  for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+       E = ToErase.end(); I != E; ++I)
+    Parent->OverDefinedCache.erase(*I);
+  
   // This erasure deallocates *this, so it MUST happen after we're done
   // using any and all members of *this.
   Parent->ValueCache.erase(*this);
 }
 
 void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
-  for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
-       I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) {
-    std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
-    ++I;
-    if (tmp->first == BB)
-      OverDefinedCache.erase(tmp);
+  SmallVector<OverDefinedPairTy, 4> ToErase;
+  for (DenseSet<OverDefinedPairTy>::iterator  I = OverDefinedCache.begin(),
+       E = OverDefinedCache.end(); I != E; ++I) {
+    if (I->first == BB)
+      ToErase.push_back(*I);
   }
+  
+  for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+       E = ToErase.end(); I != E; ++I)
+    OverDefinedCache.erase(*I);
 
-  for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
+  for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
        I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
     I->second.erase(BB);
 }
 
-/// getCachedEntryForBlock - See if we already have a value for this block.  If
-/// so, return it, otherwise create a new entry in the Cache map to use.
-LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
-  NewBlockInfo.insert(BB);
-  return Cache[BB];
+void LazyValueInfoCache::solve() {
+  while (!BlockValueStack.empty()) {
+    std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+    if (solveBlockValue(e.second, e.first))
+      BlockValueStack.pop();
+  }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (isa<Constant>(Val))
+    return true;
+
+  LVIValueHandle ValHandle(Val, this);
+  if (!ValueCache.count(ValHandle)) return false;
+  return ValueCache[ValHandle].count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val))
+    return LVILatticeVal::get(VC);
+
+  return lookup(Val)[BB];
 }
 
-LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
-  // See if we already have a value for this block.
-  LVILatticeVal BBLV = getCachedEntryForBlock(BB);
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+  if (isa<Constant>(Val))
+    return true;
+
+  ValueCacheEntryTy &Cache = lookup(Val);
+  LVILatticeVal &BBLV = Cache[BB];
   
+  // OverDefinedCacheUpdater is a helper object that will update
+  // the OverDefinedCache for us when this method exits.  Make sure to
+  // call markResult on it as we exist, passing a bool to indicate if the
+  // cache needs updating, i.e. if we have solve a new value or not.
+  OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
   // If we've already computed this block's value, return it.
   if (!BBLV.isUndefined()) {
     DEBUG(dbgs() << "  reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
-    return BBLV;
+    
+    // Since we're reusing a cached value here, we don't need to update the 
+    // OverDefinedCahce.  The cache will have been properly updated 
+    // whenever the cached value was inserted.
+    ODCacheUpdater.markResult(false);
+    return true;
   }
 
   // Otherwise, this is the first time we're seeing this block.  Reset the
   // lattice value to overdefined, so that cycles will terminate and be
   // conservatively correct.
   BBLV.markOverdefined();
-  Cache[BB] = BBLV;
   
   Instruction *BBI = dyn_cast<Instruction>(Val);
   if (BBI == 0 || BBI->getParent() != BB) {
-    LVILatticeVal Result;  // Start Undefined.
-    
-    // If this is a pointer, and there's a load from that pointer in this BB,
-    // then we know that the pointer can't be NULL.
-    bool NotNull = false;
-    if (Val->getType()->isPointerTy()) {
-      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
-        LoadInst *L = dyn_cast<LoadInst>(BI);
-        if (L && L->getPointerAddressSpace() == 0 &&
-            L->getPointerOperand()->getUnderlyingObject() ==
-              Val->getUnderlyingObject()) {
-          NotNull = true;
-          break;
-        }
-      }
-    }
-    
-    unsigned NumPreds = 0;    
-    // Loop over all of our predecessors, merging what we know from them into
-    // result.
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      Result.mergeIn(getEdgeValue(*PI, BB));
-      
-      // If we hit overdefined, exit early.  The BlockVals entry is already set
-      // to overdefined.
-      if (Result.isOverdefined()) {
-        DEBUG(dbgs() << " compute BB '" << BB->getName()
-                     << "' - overdefined because of pred.\n");
-        // If we previously determined that this is a pointer that can't be null
-        // then return that rather than giving up entirely.
-        if (NotNull) {
-          const PointerType *PTy = cast<PointerType>(Val->getType());
-          Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
-        }
-        
-        return Result;
-      }
-      ++NumPreds;
-    }
-    
-    
-    // If this is the entry block, we must be asking about an argument.  The
-    // value is overdefined.
-    if (NumPreds == 0 && BB == &BB->getParent()->front()) {
-      assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
-      Result.markOverdefined();
-      return Result;
-    }
-    
-    // Return the merged value, which is more precise than 'overdefined'.
-    assert(!Result.isOverdefined());
-    return Cache[BB] = Result;
+    return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
   }
-  
-  // If this value is defined by an instruction in this block, we have to
-  // process it here somehow or return overdefined.
+
   if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
-    LVILatticeVal Result;  // Start Undefined.
-    
-    // Loop over all of our predecessors, merging what we know from them into
-    // result.
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      Value* PhiVal = PN->getIncomingValueForBlock(*PI);
-      Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB));
-      
-      // If we hit overdefined, exit early.  The BlockVals entry is already set
-      // to overdefined.
-      if (Result.isOverdefined()) {
-        DEBUG(dbgs() << " compute BB '" << BB->getName()
-                     << "' - overdefined because of pred.\n");
-        return Result;
-      }
-    }
-    
-    // Return the merged value, which is more precise than 'overdefined'.
-    assert(!Result.isOverdefined());
-    return Cache[BB] = Result;
+    return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
   }
 
-  assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?");
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+    BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+    return ODCacheUpdater.markResult(true);
+  }
 
   // We can only analyze the definitions of certain classes of instructions
   // (integral binops and casts at the moment), so bail if this isn't one.
@@ -536,10 +557,10 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
      !BBI->getType()->isIntegerTy()) {
     DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - overdefined because inst def found.\n");
-    Result.markOverdefined();
-    return Result;
+    BBLV.markOverdefined();
+    return ODCacheUpdater.markResult(true);
   }
-   
+
   // FIXME: We're currently limited to binops with a constant RHS.  This should
   // be improved.
   BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
@@ -547,34 +568,177 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
     DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - overdefined because inst def found.\n");
 
-    Result.markOverdefined();
-    return Result;
-  }  
+    BBLV.markOverdefined();
+    return ODCacheUpdater.markResult(true);
+  }
+
+  return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+    return L->getPointerAddressSpace() == 0 &&
+        GetUnderlyingObject(L->getPointerOperand()) ==
+        GetUnderlyingObject(Ptr);
+  }
+  if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+    return S->getPointerAddressSpace() == 0 &&
+        GetUnderlyingObject(S->getPointerOperand()) ==
+        GetUnderlyingObject(Ptr);
+  }
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+    if (MI->isVolatile()) return false;
+    if (MI->getAddressSpace() != 0) return false;
+
+    // FIXME: check whether it has a valuerange that excludes zero?
+    ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+    if (!Len || Len->isZero()) return false;
+
+    if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+      return true;
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+      return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr;
+  }
+  return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                                 Value *Val, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // If this is a pointer, and there's a load from that pointer in this BB,
+  // then we know that the pointer can't be NULL.
+  bool NotNull = false;
+  if (Val->getType()->isPointerTy()) {
+    if (isa<AllocaInst>(Val)) {
+      NotNull = true;
+    } else {
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+        if (InstructionDereferencesPointer(BI, Val)) {
+          NotNull = true;
+          break;
+        }
+      }
+    }
+  }
+
+  // If this is the entry block, we must be asking about an argument.  The
+  // value is overdefined.
+  if (BB == &BB->getParent()->getEntryBlock()) {
+    assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+    if (NotNull) {
+      const PointerType *PTy = cast<PointerType>(Val->getType());
+      Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+    } else {
+      Result.markOverdefined();
+    }
+    BBLV = Result;
+    return true;
+  }
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
 
+    Result.mergeIn(EdgeResult);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      // If we previously determined that this is a pointer that can't be null
+      // then return that rather than giving up entirely.
+      if (NotNull) {
+        const PointerType *PTy = cast<PointerType>(Val->getType());
+        Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+      }
+      
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined());
+  BBLV = Result;
+  return true;
+}
+  
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                                PHINode *PN, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PhiBB = PN->getIncomingBlock(i);
+    Value *PhiVal = PN->getIncomingValue(i);
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+  BBLV = Result;
+  return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                                      Instruction *BBI,
+                                                      BasicBlock *BB) {
   // Figure out the range of the LHS.  If that fails, bail.
-  LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB);
+  if (!hasBlockValue(BBI->getOperand(0), BB)) {
+    BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+    return false;
+  }
+
+  LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
   if (!LHSVal.isConstantRange()) {
-    Result.markOverdefined();
-    return Result;
+    BBLV.markOverdefined();
+    return true;
   }
   
-  ConstantInt *RHS = 0;
   ConstantRange LHSRange = LHSVal.getConstantRange();
   ConstantRange RHSRange(1);
   const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
   if (isa<BinaryOperator>(BBI)) {
-    RHS = dyn_cast<ConstantInt>(BBI->getOperand(1));
-    if (!RHS) {
-      Result.markOverdefined();
-      return Result;
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+      RHSRange = ConstantRange(RHS->getValue());
+    } else {
+      BBLV.markOverdefined();
+      return true;
     }
-    
-    RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1);
   }
-      
+
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
+  LVILatticeVal Result;
   switch (BBI->getOpcode()) {
   case Instruction::Add:
     Result.markConstantRange(LHSRange.add(RHSRange));
@@ -606,6 +770,12 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
   case Instruction::BitCast:
     Result.markConstantRange(LHSRange);
     break;
+  case Instruction::And:
+    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+    break;
+  case Instruction::Or:
+    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+    break;
   
   // Unhandled instructions are overdefined.
   default:
@@ -615,12 +785,19 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
     break;
   }
   
-  return Cache[BB] = Result;
+  BBLV = Result;
+  return true;
 }
 
-
 /// getEdgeValue - This method attempts to infer more complex 
-LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+                                      BasicBlock *BBTo, LVILatticeVal &Result) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val)) {
+    Result = LVILatticeVal::get(VC);
+    return true;
+  }
+  
   // TODO: Handle more complex conditionals.  If (v == 0 || v2 < 1) is false, we
   // know that v != 0.
   if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
@@ -634,9 +811,11 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
       
       // If V is the condition of the branch itself, then we know exactly what
       // it is.
-      if (BI->getCondition() == Val)
-        return LVILatticeVal::get(ConstantInt::get(
+      if (BI->getCondition() == Val) {
+        Result = LVILatticeVal::get(ConstantInt::get(
                               Type::getInt1Ty(Val->getContext()), isTrueDest));
+        return true;
+      }
       
       // If the condition of the branch is an equality comparison, we may be
       // able to infer the value.
@@ -647,30 +826,40 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
           // We know that V has the RHS constant if this is a true SETEQ or
           // false SETNE. 
           if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
-            return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
-          return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+            Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+          else
+            Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+          return true;
         }
-          
+
         if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
           // Calculate the range of values that would satisfy the comparison.
           ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
           ConstantRange TrueValues =
             ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
-            
+
           // If we're interested in the false dest, invert the condition.
           if (!isTrueDest) TrueValues = TrueValues.inverse();
           
           // Figure out the possible values of the query BEFORE this branch.  
-          LVILatticeVal InBlock = getBlockValue(BBFrom);
-          if (!InBlock.isConstantRange())
-            return LVILatticeVal::getRange(TrueValues);
-            
+          if (!hasBlockValue(Val, BBFrom)) {
+            BlockValueStack.push(std::make_pair(BBFrom, Val));
+            return false;
+          }
+          
+          LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+          if (!InBlock.isConstantRange()) {
+            Result = LVILatticeVal::getRange(TrueValues);
+            return true;
+          }
+
           // Find all potential values that satisfy both the input and output
           // conditions.
           ConstantRange PossibleValues =
             TrueValues.intersectWith(InBlock.getConstantRange());
-            
-          return LVILatticeVal::getRange(PossibleValues);
+
+          Result = LVILatticeVal::getRange(PossibleValues);
+          return true;
         }
       }
     }
@@ -682,9 +871,8 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
     if (SI->getCondition() == Val) {
       // We don't know anything in the default case.
       if (SI->getDefaultDest() == BBTo) {
-        LVILatticeVal Result;
         Result.markOverdefined();
-        return Result;
+        return true;
       }
       
       // We only know something if there is exactly one value that goes from
@@ -697,51 +885,48 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
         EdgeVal = SI->getCaseValue(i);
       }
       assert(EdgeVal && "Missing successor?");
-      if (NumEdges == 1)
-        return LVILatticeVal::get(EdgeVal);
+      if (NumEdges == 1) {
+        Result = LVILatticeVal::get(EdgeVal);
+        return true;
+      }
     }
   }
   
   // Otherwise see if the value is known in the block.
-  return getBlockValue(BBFrom);
+  if (hasBlockValue(Val, BBFrom)) {
+    Result = getBlockValue(Val, BBFrom);
+    return true;
+  }
+  BlockValueStack.push(std::make_pair(BBFrom, Val));
+  return false;
 }
 
-
-//===----------------------------------------------------------------------===//
-//                         LazyValueInfoCache Impl
-//===----------------------------------------------------------------------===//
-
 LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
-  // If already a constant, there is nothing to compute.
-  if (Constant *VC = dyn_cast<Constant>(V))
-    return LVILatticeVal::get(VC);
-  
   DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
         << BB->getName() << "'\n");
   
-  LVILatticeVal Result = LVIQuery(V, *this,
-                                ValueCache[LVIValueHandle(V, this)], 
-                                OverDefinedCache).getBlockValue(BB);
-  
+  BlockValueStack.push(std::make_pair(BB, V));
+  solve();
+  LVILatticeVal Result = getBlockValue(V, BB);
+
   DEBUG(dbgs() << "  Result = " << Result << "\n");
   return Result;
 }
 
 LVILatticeVal LazyValueInfoCache::
 getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
-  // If already a constant, there is nothing to compute.
-  if (Constant *VC = dyn_cast<Constant>(V))
-    return LVILatticeVal::get(VC);
-  
   DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
         << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
   
-  LVILatticeVal Result =
-    LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)],
-             OverDefinedCache).getEdgeValue(FromBB, ToBB);
-  
+  LVILatticeVal Result;
+  if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+    solve();
+    bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+    (void)WasFastQuery;
+    assert(WasFastQuery && "More work to do after problem solved?");
+  }
+
   DEBUG(dbgs() << "  Result = " << Result << "\n");
-  
   return Result;
 }
 
@@ -761,8 +946,8 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
   worklist.push_back(OldSucc);
   
   DenseSet<Value*> ClearSet;
-  for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
-       I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) {
+  for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+       E = OverDefinedCache.end(); I != E; ++I) {
     if (I->first == OldSucc)
       ClearSet.insert(I->second);
   }
@@ -779,17 +964,17 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
     if (ToUpdate == NewSucc) continue;
     
     bool changed = false;
-    for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end();
+    for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
          I != E; ++I) {
       // If a value was marked overdefined in OldSucc, and is here too...
-      std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI =
+      DenseSet<OverDefinedPairTy>::iterator OI =
         OverDefinedCache.find(std::make_pair(ToUpdate, *I));
       if (OI == OverDefinedCache.end()) continue;
 
       // Remove it from the caches.
       ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
       ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
-        
+
       assert(CI != Entry.end() && "Couldn't find entry to update?");
       Entry.erase(CI);
       OverDefinedCache.erase(OI);
@@ -798,7 +983,7 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
       // blocks successors too.
       changed = true;
     }
-        
+
     if (!changed) continue;
     
     worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
@@ -838,7 +1023,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
   
   if (Result.isConstant())
     return Result.getConstant();
-  else if (Result.isConstantRange()) {
+  if (Result.isConstantRange()) {
     ConstantRange CR = Result.getConstantRange();
     if (const APInt *SingleVal = CR.getSingleElement())
       return ConstantInt::get(V->getContext(), *SingleVal);
@@ -854,7 +1039,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
   
   if (Result.isConstant())
     return Result.getConstant();
-  else if (Result.isConstantRange()) {
+  if (Result.isConstantRange()) {
     ConstantRange CR = Result.getConstantRange();
     if (const APInt *SingleVal = CR.getSingleElement())
       return ConstantInt::get(V->getContext(), *SingleVal);
@@ -874,7 +1059,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
   Constant *Res = 0;
   if (Result.isConstant()) {
     Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
-    if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res))
+    if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
       return ResCI->isZero() ? False : True;
     return Unknown;
   }
@@ -899,13 +1084,12 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
     }
     
     // Handle more complex predicates.
-    ConstantRange RHS(CI->getValue(), CI->getValue()+1);
-    ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS);
-    if (CR.intersectWith(TrueValues).isEmptySet())
-      return False;
-    else if (TrueValues.contains(CR))
+    ConstantRange TrueValues =
+        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+    if (TrueValues.contains(CR))
       return True;
-    
+    if (TrueValues.inverse().contains(CR))
+      return False;
     return Unknown;
   }
   
@@ -932,7 +1116,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
 }
 
 void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
-                               BasicBlock* NewSucc) {
+                               BasicBlock *NewSucc) {
   if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
 }
 
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index 7f51202ecb55..efb722bb97c4 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 // Register this pass...
 char LibCallAliasAnalysis::ID = 0;
 INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
-                   "LibCall Alias Analysis", false, true, false);
+                   "LibCall Alias Analysis", false, true, false)
 
 FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
   return new LibCallAliasAnalysis(LCI);
@@ -43,8 +43,8 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 /// vs the specified pointer/size.
 AliasAnalysis::ModRefResult
 LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
-                                            ImmutableCallSite CS, const Value *P,
-                                            unsigned Size) {
+                                            ImmutableCallSite CS,
+                                            const Location &Loc) {
   // If we have a function, check to see what kind of mod/ref effects it
   // has.  Start by including any info globally known about the function.
   AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
@@ -64,9 +64,9 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
   if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
     // Find out if the pointer refers to a known location.
     for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
-      const LibCallLocationInfo &Loc =
+      const LibCallLocationInfo &LocInfo =
       LCI->getLocationInfo(Details[i].LocationID);
-      LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size);
+      LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
       if (Res != LibCallLocationInfo::Yes) continue;
       
       // If we find a match against a location that we 'do not' interact with,
@@ -85,9 +85,9 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
   // Find out if the pointer refers to a known location.
   bool NoneMatch = true;
   for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
-    const LibCallLocationInfo &Loc =
+    const LibCallLocationInfo &LocInfo =
     LCI->getLocationInfo(Details[i].LocationID);
-    LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size);
+    LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
     if (Res == LibCallLocationInfo::No) continue;
     
     // If we don't know if this pointer points to the location, then we have to
@@ -118,7 +118,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
 //
 AliasAnalysis::ModRefResult
 LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                    const Value *P, unsigned Size) {
+                                    const Location &Loc) {
   ModRefResult MRInfo = ModRef;
   
   // If this is a direct call to a function that LCI knows about, get the
@@ -126,12 +126,12 @@ LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
   if (LCI) {
     if (const Function *F = CS.getCalledFunction()) {
       if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
-        MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size));
+        MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
         if (MRInfo == NoModRef) return NoModRef;
       }
     }
   }
   
   // The AliasAnalysis base class has some smarts, lets use them.
-  return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, P, Size));
+  return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
 }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index a9d972435f5f..fc7edc0525f9 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -70,7 +70,7 @@ namespace {
 
     void visitCallSite(CallSite CS);
     void visitMemoryReference(Instruction &I, Value *Ptr,
-                              unsigned Size, unsigned Align,
+                              uint64_t Size, unsigned Align,
                               const Type *Ty, unsigned Flags);
 
     void visitCallInst(CallInst &I);
@@ -108,7 +108,9 @@ namespace {
     raw_string_ostream MessagesStr;
 
     static char ID; // Pass identification, replacement for typeid
-    Lint() : FunctionPass(ID), MessagesStr(Messages) {}
+    Lint() : FunctionPass(ID), MessagesStr(Messages) {
+      initializeLintPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -129,12 +131,6 @@ namespace {
       }
     }
 
-    void WriteType(const Type *T) {
-      if (!T) return;
-      MessagesStr << ' ';
-      WriteTypeSymbolic(MessagesStr, T, Mod);
-    }
-
     // CheckFailed - A check failed, so print out the condition and the message
     // that failed.  This provides a nice place to put a breakpoint if you want
     // to see why something is not correct.
@@ -147,27 +143,16 @@ namespace {
       WriteValue(V3);
       WriteValue(V4);
     }
-
-    void CheckFailed(const Twine &Message, const Value *V1,
-                     const Type *T2, const Value *V3 = 0) {
-      MessagesStr << Message.str() << "\n";
-      WriteValue(V1);
-      WriteType(T2);
-      WriteValue(V3);
-    }
-
-    void CheckFailed(const Twine &Message, const Type *T1,
-                     const Type *T2 = 0, const Type *T3 = 0) {
-      MessagesStr << Message.str() << "\n";
-      WriteType(T1);
-      WriteType(T2);
-      WriteType(T3);
-    }
   };
 }
 
 char Lint::ID = 0;
-INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true);
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+                    false, true)
 
 // Assert - We know that cond should be true, if not print an error message.
 #define Assert(C, M) \
@@ -208,7 +193,8 @@ void Lint::visitCallSite(CallSite CS) {
   Instruction &I = *CS.getInstruction();
   Value *Callee = CS.getCalledValue();
 
-  visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee);
+  visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+                       0, 0, MemRef::Callee);
 
   if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
     Assert1(CS.getCallingConv() == F->getCallingConv(),
@@ -240,15 +226,17 @@ void Lint::visitCallSite(CallSite CS) {
                 "Undefined behavior: Call argument type mismatches "
                 "callee parameter type", &I);
 
-        // Check that noalias arguments don't alias other arguments. The
-        // AliasAnalysis API isn't expressive enough for what we really want
-        // to do. Known partial overlap is not distinguished from the case
-        // where nothing is known.
+        // Check that noalias arguments don't alias other arguments. This is
+        // not fully precise because we don't know the sizes of the dereferenced
+        // memory regions.
         if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
-          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
-            Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias,
-                    "Unusual: noalias argument aliases another argument", &I);
-          }
+          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+            if (AI != BI && (*BI)->getType()->isPointerTy()) {
+              AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+              Assert1(Result != AliasAnalysis::MustAlias &&
+                      Result != AliasAnalysis::PartialAlias,
+                      "Unusual: noalias argument aliases another argument", &I);
+            }
 
         // Check that an sret argument points to valid memory.
         if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
@@ -281,15 +269,17 @@ void Lint::visitCallSite(CallSite CS) {
     case Intrinsic::memcpy: {
       MemCpyInst *MCI = cast<MemCpyInst>(&I);
       // TODO: If the size is known, use it.
-      visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0,
+      visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+                           MCI->getAlignment(), 0,
                            MemRef::Write);
-      visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0,
+      visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+                           MCI->getAlignment(), 0,
                            MemRef::Read);
 
       // Check that the memcpy arguments don't overlap. The AliasAnalysis API
       // isn't expressive enough for what we really want to do. Known partial
       // overlap is not distinguished from the case where nothing is known.
-      unsigned Size = 0;
+      uint64_t Size = 0;
       if (const ConstantInt *Len =
             dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                             /*OffsetOk=*/false)))
@@ -303,16 +293,19 @@ void Lint::visitCallSite(CallSite CS) {
     case Intrinsic::memmove: {
       MemMoveInst *MMI = cast<MemMoveInst>(&I);
       // TODO: If the size is known, use it.
-      visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0,
+      visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+                           MMI->getAlignment(), 0,
                            MemRef::Write);
-      visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0,
+      visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+                           MMI->getAlignment(), 0,
                            MemRef::Read);
       break;
     }
     case Intrinsic::memset: {
       MemSetInst *MSI = cast<MemSetInst>(&I);
       // TODO: If the size is known, use it.
-      visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0,
+      visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+                           MSI->getAlignment(), 0,
                            MemRef::Write);
       break;
     }
@@ -322,24 +315,26 @@ void Lint::visitCallSite(CallSite CS) {
               "Undefined behavior: va_start called in a non-varargs function",
               &I);
 
-      visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
-                           MemRef::Read | MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
       break;
     case Intrinsic::vacopy:
-      visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write);
-      visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read);
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read);
       break;
     case Intrinsic::vaend:
-      visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
-                           MemRef::Read | MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
       break;
 
     case Intrinsic::stackrestore:
       // Stackrestore doesn't read or write memory, but it sets the
       // stack pointer, which the compiler may read from or write to
       // at any time, so check it for both readability and writeability.
-      visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
-                           MemRef::Read | MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
       break;
     }
 }
@@ -368,7 +363,7 @@ void Lint::visitReturnInst(ReturnInst &I) {
 // TODO: Check that the reference is in bounds.
 // TODO: Check readnone/readonly function attributes.
 void Lint::visitMemoryReference(Instruction &I,
-                                Value *Ptr, unsigned Size, unsigned Align,
+                                Value *Ptr, uint64_t Size, unsigned Align,
                                 const Type *Ty, unsigned Flags) {
   // If no memory is being referenced, it doesn't matter if the pointer
   // is valid.
@@ -512,12 +507,13 @@ void Lint::visitAllocaInst(AllocaInst &I) {
 }
 
 void Lint::visitVAArgInst(VAArgInst &I) {
-  visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0,
+  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
                        MemRef::Read | MemRef::Write);
 }
 
 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
-  visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee);
+  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+                       MemRef::Branchee);
 
   Assert1(I.getNumDestinations() != 0,
           "Undefined behavior: indirectbr with no destinations", &I);
@@ -571,7 +567,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
   // TODO: Look through eliminable cast pairs.
   // TODO: Look through calls with unique return values.
   // TODO: Look through vector insert/extract/shuffle.
-  V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts();
+  V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
   if (LoadInst *L = dyn_cast<LoadInst>(V)) {
     BasicBlock::iterator BBI = L;
     BasicBlock *BB = L->getParent();
@@ -587,8 +583,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
       BBI = BB->end();
     }
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
-    if (Value *W = PN->hasConstantValue(DT))
-      return findValueImpl(W, OffsetOk, Visited);
+    if (Value *W = PN->hasConstantValue())
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
     if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
                             Type::getInt64Ty(V->getContext())))
@@ -620,9 +617,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, TD))
-      if (W != Inst)
-        return findValueImpl(W, OffsetOk, Visited);
+    if (Value *W = SimplifyInstruction(Inst, TD, DT))
+      return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (Value *W = ConstantFoldConstantExpression(CE, TD))
       if (W != V)
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 0225f4fa2548..a0e603419f57 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LiveValues.h"
+#include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 using namespace llvm;
@@ -22,10 +23,16 @@ namespace llvm {
 }
 
 char LiveValues::ID = 0;
-INITIALIZE_PASS(LiveValues, "live-values",
-                "Value Liveness Analysis", false, true);
-
-LiveValues::LiveValues() : FunctionPass(ID) {}
+INITIALIZE_PASS_BEGIN(LiveValues, "live-values",
+                "Value Liveness Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LiveValues, "live-values",
+                "Value Liveness Analysis", false, true)
+
+LiveValues::LiveValues() : FunctionPass(ID) {
+  initializeLiveValuesPass(*PassRegistry::getPassRegistry());
+}
 
 void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<DominatorTree>();
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 2ba1d86cdb40..2ea27fb62fcb 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -49,7 +49,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
 /// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
 /// bitcasts to get back to the underlying object being addressed, keeping
 /// track of the offset in bytes from the GEPs relative to the result.
-/// This is closely related to Value::getUnderlyingObject but is located
+/// This is closely related to GetUnderlyingObject but is located
 /// here to avoid making VMCore depend on TargetData.
 static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
                                             uint64_t &ByteOffset,
@@ -166,7 +166,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
   if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
 
   // If we're using alias analysis to disambiguate get the size of *Ptr.
-  unsigned AccessSize = 0;
+  uint64_t AccessSize = 0;
   if (AA) {
     const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
     AccessSize = AA->getTypeStoreSize(AccessTy);
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index 82c02dcd1342..c1afe8fbd618 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -27,6 +27,8 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Instructions.h"
 #include "llvm/Operator.h"
 #include "llvm/Support/Allocator.h"
@@ -46,8 +48,12 @@ LoopPass *llvm::createLoopDependenceAnalysisPass() {
   return new LoopDependenceAnalysis();
 }
 
-INITIALIZE_PASS(LoopDependenceAnalysis, "lda",
-                "Loop Dependence Analysis", false, true);
+INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda",
+                "Loop Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda",
+                "Loop Dependence Analysis", false, true)
 char LoopDependenceAnalysis::ID = 0;
 
 //===----------------------------------------------------------------------===//
@@ -86,8 +92,8 @@ static Value *GetPointerOperand(Value *I) {
 static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
                                                          const Value *A,
                                                          const Value *B) {
-  const Value *aObj = A->getUnderlyingObject();
-  const Value *bObj = B->getUnderlyingObject();
+  const Value *aObj = GetUnderlyingObject(A);
+  const Value *bObj = GetUnderlyingObject(B);
   return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
                    bObj, AA->getTypeStoreSize(bObj->getType()));
 }
@@ -128,7 +134,7 @@ void LoopDependenceAnalysis::getLoops(const SCEV *S,
                                       DenseSet<const Loop*>* Loops) const {
   // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
   for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
-    if (!S->isLoopInvariant(L))
+    if (!SE->isLoopInvariant(S, L))
       Loops->insert(L);
 }
 
@@ -217,6 +223,7 @@ LoopDependenceAnalysis::analysePair(DependencePair *P) const {
 
   switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
   case AliasAnalysis::MayAlias:
+  case AliasAnalysis::PartialAlias:
     // We can not analyse objects if we do not know about their aliasing.
     DEBUG(dbgs() << "---> [?] may alias\n");
     return Unknown;
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 46219d1b6f55..05831402f409 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -38,7 +38,9 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
                 cl::desc("Verify loop info (time consuming)"));
 
 char LoopInfo::ID = 0;
-INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true);
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
 
 //===----------------------------------------------------------------------===//
 // Loop implementation
@@ -48,15 +50,18 @@ INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true);
 ///
 bool Loop::isLoopInvariant(Value *V) const {
   if (Instruction *I = dyn_cast<Instruction>(V))
-    return isLoopInvariant(I);
+    return !contains(I);
   return true;  // All non-instructions are loop invariant
 }
 
-/// isLoopInvariant - Return true if the specified instruction is
-/// loop-invariant.
-///
-bool Loop::isLoopInvariant(Instruction *I) const {
-  return !contains(I);
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant. 
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!isLoopInvariant(I->getOperand(i)))
+      return false;
+  
+  return true;
 }
 
 /// makeLoopInvariant - If the given value is an instruciton inside of the
@@ -105,6 +110,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
       return false;
+  
   // Hoist.
   I->moveBefore(InsertPt);
   Changed = true;
@@ -192,7 +198,7 @@ Value *Loop::getTripCount() const {
 
 /// getSmallConstantTripCount - Returns the trip count of this loop as a
 /// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// of not constant. Will also return 0 if the trip count is very large
+/// or not constant. Will also return 0 if the trip count is very large
 /// (>= 2^32)
 unsigned Loop::getSmallConstantTripCount() const {
   Value* TripCount = this->getTripCount();
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 15d4db8f5f98..8e1a7bfef699 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -30,7 +30,6 @@ private:
 
 public:
   static char ID;
-  PrintLoopPass() : LoopPass(ID), Out(dbgs()) {}
   PrintLoopPass(const std::string &B, raw_ostream &o)
       : LoopPass(ID), Banner(B), Out(o) {}
 
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 000000000000..64d215c37cc7
--- /dev/null
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,167 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+namespace {
+  struct MemDepPrinter : public FunctionPass {
+    const Function *F;
+
+    typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
+    typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+    typedef SmallSetVector<Dep, 4> DepSet;
+    typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+    DepSetMap Deps;
+
+    static char ID; // Pass identifcation, replacement for typeid
+    MemDepPrinter() : FunctionPass(ID) {
+      initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    void print(raw_ostream &OS, const Module * = 0) const;
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredTransitive<AliasAnalysis>();
+      AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    virtual void releaseMemory() {
+      Deps.clear();
+      F = 0;
+    }
+  };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+  return new MemDepPrinter();
+}
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+  this->F = &F;
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+  // All this code uses non-const interfaces because MemDep is not
+  // const-friendly, though nothing is actually modified.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+      continue;
+
+    MemDepResult Res = MDA.getDependency(Inst);
+    if (!Res.isNonLocal()) {
+      assert(Res.isClobber() != Res.isDef() &&
+             "Local dep should be def or clobber!");
+      Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       static_cast<BasicBlock *>(0)));
+    } else if (CallSite CS = cast<Value>(Inst)) {
+      const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+        MDA.getNonLocalCallDependency(CS);
+
+      DepSet &InstDeps = Deps[Inst];
+      for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        assert(Res.isClobber() != Res.isDef() &&
+               "Resolved non-local call dep should be def or clobber!");
+        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       I->getBB()));
+      }
+    } else {
+      SmallVector<NonLocalDepResult, 4> NLDI;
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        // FIXME: Volatile is not handled properly here.
+        AliasAnalysis::Location Loc = AA.getLocation(LI);
+        MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
+                                         LI->getParent(), NLDI);
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        // FIXME: Volatile is not handled properly here.
+        AliasAnalysis::Location Loc = AA.getLocation(SI);
+        MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+        AliasAnalysis::Location Loc = AA.getLocation(VI);
+        MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+      } else {
+        llvm_unreachable("Unknown memory instruction!");
+      }
+
+      DepSet &InstDeps = Deps[Inst];
+      for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        assert(Res.isClobber() != Res.isDef() &&
+               "Resolved non-local pointer dep should be def or clobber!");
+        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       I->getBB()));
+      }
+    }
+  }
+
+  return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+  for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+    const Instruction *Inst = &*I;
+
+    DepSetMap::const_iterator DI = Deps.find(Inst);
+    if (DI == Deps.end())
+      continue;
+
+    const DepSet &InstDeps = DI->second;
+
+    for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+         I != E; ++I) {
+      const Instruction *DepInst = I->first.getPointer();
+      bool isClobber = I->first.getInt();
+      const BasicBlock *DepBB = I->second;
+
+      OS << "    " << (isClobber ? "Clobber" : "    Def");
+      if (DepBB) {
+        OS << " in block ";
+        WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+      }
+      OS << " from: ";
+      if (DepInst == Inst)
+        OS << "<unspecified>";
+      else
+        DepInst->print(OS);
+      OS << "\n";
+    }
+
+    Inst->print(OS);
+    OS << "\n\n";
+  }
+}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index d18d5ce0ea4c..35043bddfaf6 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -19,15 +19,18 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
@@ -46,11 +49,15 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
 char MemoryDependenceAnalysis::ID = 0;
   
 // Register this pass...
-INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep",
-                "Memory Dependence Analysis", false, true);
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+                "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+                      "Memory Dependence Analysis", false, true)
 
 MemoryDependenceAnalysis::MemoryDependenceAnalysis()
 : FunctionPass(ID), PredCache(0) {
+  initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
 }
 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
 }
@@ -77,6 +84,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
   if (PredCache == 0)
     PredCache.reset(new PredIteratorCache());
   return false;
@@ -92,11 +100,79 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,
   InstIt = ReverseMap.find(Inst);
   assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
   bool Found = InstIt->second.erase(Val);
-  assert(Found && "Invalid reverse map!"); Found=Found;
+  assert(Found && "Invalid reverse map!"); (void)Found;
   if (InstIt->second.empty())
     ReverseMap.erase(InstIt);
 }
 
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+                                        AliasAnalysis::Location &Loc,
+                                        AliasAnalysis *AA) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    if (LI->isVolatile()) {
+      Loc = AliasAnalysis::Location();
+      return AliasAnalysis::ModRef;
+    }
+    Loc = AA->getLocation(LI);
+    return AliasAnalysis::Ref;
+  }
+
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->isVolatile()) {
+      Loc = AliasAnalysis::Location();
+      return AliasAnalysis::ModRef;
+    }
+    Loc = AA->getLocation(SI);
+    return AliasAnalysis::Mod;
+  }
+
+  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+    Loc = AA->getLocation(V);
+    return AliasAnalysis::ModRef;
+  }
+
+  if (const CallInst *CI = isFreeCall(Inst)) {
+    // calls to free() deallocate the entire structure
+    Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+    return AliasAnalysis::Mod;
+  }
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      Loc = AliasAnalysis::Location(II->getArgOperand(1),
+                                    cast<ConstantInt>(II->getArgOperand(0))
+                                      ->getZExtValue(),
+                                    II->getMetadata(LLVMContext::MD_tbaa));
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return AliasAnalysis::Mod;
+    case Intrinsic::invariant_end:
+      Loc = AliasAnalysis::Location(II->getArgOperand(2),
+                                    cast<ConstantInt>(II->getArgOperand(1))
+                                      ->getZExtValue(),
+                                    II->getMetadata(LLVMContext::MD_tbaa));
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return AliasAnalysis::Mod;
+    default:
+      break;
+    }
+
+  // Otherwise, just do the coarse-grained thing that always works.
+  if (Inst->mayWriteToMemory())
+    return AliasAnalysis::ModRef;
+  if (Inst->mayReadFromMemory())
+    return AliasAnalysis::Ref;
+  return AliasAnalysis::NoModRef;
+}
 
 /// getCallSiteDependencyFrom - Private helper for finding the local
 /// dependencies of a call site.
@@ -108,19 +184,16 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     Instruction *Inst = --ScanIt;
     
     // If this inst is a memory op, get the pointer it accessed
-    Value *Pointer = 0;
-    uint64_t PointerSize = 0;
-    if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
-      Pointer = S->getPointerOperand();
-      PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
-    } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
-      Pointer = V->getOperand(0);
-      PointerSize = AA->getTypeStoreSize(V->getType());
-    } else if (const CallInst *CI = isFreeCall(Inst)) {
-      Pointer = CI->getArgOperand(0);
-      // calls to free() erase the entire structure
-      PointerSize = ~0ULL;
-    } else if (CallSite InstCS = cast<Value>(Inst)) {
+    AliasAnalysis::Location Loc;
+    AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+    if (Loc.Ptr) {
+      // A simple instruction.
+      if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+        return MemDepResult::getClobber(Inst);
+      continue;
+    }
+
+    if (CallSite InstCS = cast<Value>(Inst)) {
       // Debug intrinsics don't cause dependences.
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
       // If these two calls do not interfere, look past it.
@@ -128,23 +201,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       case AliasAnalysis::NoModRef:
         // If the two calls are the same, return InstCS as a Def, so that
         // CS can be found redundant and eliminated.
-        if (isReadOnlyCall && InstCS.onlyReadsMemory() &&
+        if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
             CS.getInstruction()->isIdenticalToWhenDefined(Inst))
           return MemDepResult::getDef(Inst);
 
         // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
         // keep scanning.
-        continue;
+        break;
       default:
         return MemDepResult::getClobber(Inst);
       }
-    } else {
-      // Non-memory instruction.
-      continue;
     }
-    
-    if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef)
-      return MemDepResult::getClobber(Inst);
   }
   
   // No dependence found.  If this is the entry block of the function, it is a
@@ -155,10 +222,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 }
 
 /// getPointerDependencyFrom - Return the instruction on which a memory
-/// location depends.  If isLoad is true, this routine ignore may-aliases with
-/// read-only operations.
+/// location depends.  If isLoad is true, this routine ignores may-aliases with
+/// read-only operations.  If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
 MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, 
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
   Value *InvariantTag = 0;
@@ -175,8 +243,8 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     }
     
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-      // Debug intrinsics don't cause dependences.
-      if (isa<DbgInfoIntrinsic>(Inst)) continue;
+      // Debug intrinsics don't (and can't) cause dependences.
+      if (isa<DbgInfoIntrinsic>(II)) continue;
       
       // If we pass an invariant-end marker, then we've just entered an
       // invariant region and can start ignoring dependencies.
@@ -184,43 +252,53 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
         // FIXME: This only considers queries directly on the invariant-tagged
         // pointer, not on query pointers that are indexed off of them.  It'd
         // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr);
-        if (R == AliasAnalysis::MustAlias) {
+        AliasAnalysis::AliasResult R =
+          AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc);
+        if (R == AliasAnalysis::MustAlias)
           InvariantTag = II->getArgOperand(0);
-          continue;
-        }
-      
+
+        continue;
+      }
+
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
-      } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
         // FIXME: This only considers queries directly on the invariant-tagged
         // pointer, not on query pointers that are indexed off of them.  It'd
         // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr);
+        AliasAnalysis::AliasResult R =
+          AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc);
         if (R == AliasAnalysis::MustAlias)
           return MemDepResult::getDef(II);
+        continue;
       }
     }
 
     // If we're querying on a load and we're in an invariant region, we're done
     // at this point. Nothing a load depends on can live in an invariant region.
+    //
+    // FIXME: this will prevent us from returning load/load must-aliases, so GVN
+    // won't remove redundant loads.
     if (isLoad && InvariantTag) continue;
 
     // Values depend on loads if the pointers are must aliased.  This means that
     // a load depends on another must aliased load from the same value.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-      Value *Pointer = LI->getPointerOperand();
-      uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
+      AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
       
       // If we found a pointer, check if it could be the same as our pointer.
-      AliasAnalysis::AliasResult R =
-        AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+      AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
       if (R == AliasAnalysis::NoAlias)
         continue;
       
       // May-alias loads don't depend on each other without a dependence.
-      if (isLoad && R == AliasAnalysis::MayAlias)
+      if (isLoad && R != AliasAnalysis::MustAlias)
         continue;
+
+      // Stores don't alias loads from read-only memory.
+      if (!isLoad && AA->pointsToConstantMemory(LoadLoc))
+        continue;
+
       // Stores depend on may and must aliased loads, loads depend on must-alias
       // loads.
       return MemDepResult::getDef(Inst);
@@ -234,23 +312,21 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
-      if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef)
+      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
         continue;
 
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
-      Value *Pointer = SI->getPointerOperand();
-      uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
+      AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
       
       // If we found a pointer, check if it could be the same as our pointer.
-      AliasAnalysis::AliasResult R =
-        AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+      AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
       
       if (R == AliasAnalysis::NoAlias)
         continue;
-      if (R == AliasAnalysis::MayAlias)
-        return MemDepResult::getClobber(Inst);
-      return MemDepResult::getDef(Inst);
+      if (R == AliasAnalysis::MustAlias)
+        return MemDepResult::getDef(Inst);
+      return MemDepResult::getClobber(Inst);
     }
 
     // If this is an allocation, and if we know that the accessed pointer is to
@@ -263,7 +339,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // need to continue scanning until the malloc call.
     if (isa<AllocaInst>(Inst) ||
         (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
-      Value *AccessPtr = MemPtr->getUnderlyingObject();
+      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
       
       if (AccessPtr == Inst ||
           AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
@@ -272,7 +348,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     }
 
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
-    switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
+    switch (AA->getModRefInfo(Inst, MemLoc)) {
     case AliasAnalysis::NoModRef:
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
@@ -322,9 +398,6 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   
   BasicBlock *QueryParent = QueryInst->getParent();
   
-  Value *MemPtr = 0;
-  uint64_t MemSize = 0;
-  
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
     // No dependence found.  If this is the entry block of the function, it is a
@@ -333,65 +406,25 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       LocalCache = MemDepResult::getNonLocal();
     else
       LocalCache = MemDepResult::getClobber(QueryInst);
-  } else if (StoreInst *SI = dyn_cast<StoreInst>(QueryInst)) {
-    // If this is a volatile store, don't mess around with it.  Just return the
-    // previous instruction as a clobber.
-    if (SI->isVolatile())
-      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
-    else {
-      MemPtr = SI->getPointerOperand();
-      MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
-    }
-  } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
-    // If this is a volatile load, don't mess around with it.  Just return the
-    // previous instruction as a clobber.
-    if (LI->isVolatile())
-      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
-    else {
-      MemPtr = LI->getPointerOperand();
-      MemSize = AA->getTypeStoreSize(LI->getType());
-    }
-  } else if (const CallInst *CI = isFreeCall(QueryInst)) {
-    MemPtr = CI->getArgOperand(0);
-    // calls to free() erase the entire structure, not just a field.
-    MemSize = ~0UL;
-  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
-    int IntrinsicID = 0;  // Intrinsic IDs start at 1.
-    IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst);
-    if (II)
-      IntrinsicID = II->getIntrinsicID();
-
-    switch (IntrinsicID) {
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-    case Intrinsic::invariant_start:
-      MemPtr = II->getArgOperand(1);
-      MemSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
-      break;
-    case Intrinsic::invariant_end:
-      MemPtr = II->getArgOperand(2);
-      MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
-      break;
-    default:
+  } else {
+    AliasAnalysis::Location MemLoc;
+    AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+    if (MemLoc.Ptr) {
+      // If we can do a pointer scan, make it happen.
+      bool isLoad = !(MR & AliasAnalysis::Mod);
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+
+      LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+                                            QueryParent);
+    } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
       CallSite QueryCS(QueryInst);
       bool isReadOnly = AA->onlyReadsMemory(QueryCS);
       LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
                                              QueryParent);
-      break;
-    }
-  } else {
-    // Non-memory instruction.
-    LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
-  }
-  
-  // If we need to do a pointer scan, make it happen.
-  if (MemPtr) {
-    bool isLoad = !QueryInst->mayWriteToMemory();
-    if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) {
-      isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
-    }
-    LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos,
-                                          QueryParent);
+    } else
+      // Non-memory instruction.
+      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
   }
   
   // Remember the result!
@@ -565,31 +598,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 /// own block.
 ///
 void MemoryDependenceAnalysis::
-getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+                             BasicBlock *FromBB,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-  assert(Pointer->getType()->isPointerTy() &&
+  assert(Loc.Ptr->getType()->isPointerTy() &&
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
   
-  // We know that the pointer value is live into FromBB find the def/clobbers
-  // from presecessors.
-  const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
-  uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
-  
-  PHITransAddr Address(Pointer, TD);
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
   
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
   // a block with multiple different pointers.  This can happen during PHI
   // translation.
   DenseMap<BasicBlock*, Value*> Visited;
-  if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB,
+  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
                                    Result, Visited, true))
     return;
   Result.clear();
   Result.push_back(NonLocalDepResult(FromBB,
                                      MemDepResult::getClobber(FromBB->begin()),
-                                     Pointer));
+                                     const_cast<Value *>(Loc.Ptr)));
 }
 
 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with
@@ -597,7 +626,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
 /// lookup (which may use dirty cache info if available).  If we do a lookup,
 /// add the result to the cache.
 MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                         bool isLoad, BasicBlock *BB,
                         NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
   
@@ -631,15 +660,14 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
     ScanPos = ExistingResult->getResult().getInst();
     
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
-    ValueIsLoadPair CacheKey(Pointer, isLoad);
+    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
   } else {
     ++NumUncacheNonLocalPtr;
   }
   
   // Scan the block for the dependency.
-  MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad, 
-                                              ScanPos, BB);
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
   
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
@@ -658,7 +686,7 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
   // update MemDep when we remove instructions.
   Instruction *Inst = Dep.getInst();
   assert(Inst && "Didn't depend on anything?");
-  ValueIsLoadPair CacheKey(Pointer, isLoad);
+  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
   ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
   return Dep;
 }
@@ -712,7 +740,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
 /// not compute dependence information for some reason.  This should be treated
 /// as a clobber dependence on the first instruction in the predecessor block.
 bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+                            const AliasAnalysis::Location &Loc,
                             bool isLoad, BasicBlock *StartBB,
                             SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
@@ -720,14 +749,68 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
   
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
-  
-  std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
-    &NonLocalPointerDeps[CacheKey];
-  NonLocalDepInfo *Cache = &CacheInfo->second;
+
+  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+  // CacheKey, this value will be inserted as the associated value. Otherwise,
+  // it'll be ignored, and we'll have to check to see if the cached size and
+  // tbaa tag are consistent with the current query.
+  NonLocalPointerInfo InitialNLPI;
+  InitialNLPI.Size = Loc.Size;
+  InitialNLPI.TBAATag = Loc.TBAATag;
+
+  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+  // already have one.
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+  // If we already have a cache entry for this CacheKey, we may need to do some
+  // work to reconcile the cache entry and the current query.
+  if (!Pair.second) {
+    if (CacheInfo->Size < Loc.Size) {
+      // The query's Size is greater than the cached one. Throw out the
+      // cached data and procede with the query at the greater size.
+      CacheInfo->Pair = BBSkipFirstBlockPair();
+      CacheInfo->Size = Loc.Size;
+      for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+           DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+        if (Instruction *Inst = DI->getResult().getInst())
+          RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+      CacheInfo->NonLocalDeps.clear();
+    } else if (CacheInfo->Size > Loc.Size) {
+      // This query's Size is less than the cached one. Conservatively restart
+      // the query using the greater size.
+      return getNonLocalPointerDepFromBB(Pointer,
+                                         Loc.getWithNewSize(CacheInfo->Size),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+
+    // If the query's TBAATag is inconsistent with the cached one,
+    // conservatively throw out the cached data and restart the query with
+    // no tag if needed.
+    if (CacheInfo->TBAATag != Loc.TBAATag) {
+      if (CacheInfo->TBAATag) {
+        CacheInfo->Pair = BBSkipFirstBlockPair();
+        CacheInfo->TBAATag = 0;
+        for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+             DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+          if (Instruction *Inst = DI->getResult().getInst())
+            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+        CacheInfo->NonLocalDeps.clear();
+      }
+      if (Loc.TBAATag)
+        return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+                                           isLoad, StartBB, Result, Visited,
+                                           SkipFirstBlock);
+    }
+  }
+
+  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
 
   // If we have valid cached information for exactly the block we are
   // investigating, just return it with no recomputation.
-  if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
     // We have a fully cached result for this query then we can just return the
     // cached results and populate the visited set.  However, we have to verify
     // that we don't already have conflicting results for these blocks.  Check
@@ -763,9 +846,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
   // than its valid cache info.  If empty, the result will be valid cache info,
   // otherwise it isn't.
   if (Cache->empty())
-    CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   else
-    CacheInfo->first = BBSkipFirstBlockPair();
+    CacheInfo->Pair = BBSkipFirstBlockPair();
   
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(StartBB);
@@ -790,8 +873,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       // Get the dependency info for Pointer in BB.  If we have cached
       // information, we will use it, otherwise we compute it.
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
-      MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize,
-                                                 isLoad, BB, Cache,
+      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
                                                  NumSortedEntries);
       
       // If we got a Def or Clobber, add this to the list of results.
@@ -888,7 +970,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
         // queries.  Mark this in NonLocalPointerDeps by setting the
         // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
         // cached value to do more work but not miss the phi trans failure.
-        NonLocalPointerDeps[CacheKey].first = BBSkipFirstBlockPair();
+        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+        NLPI.Pair = BBSkipFirstBlockPair();
         continue;
       }
 
@@ -899,21 +982,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       
       // If we have a problem phi translating, fall through to the code below
       // to handle the failure condition.
-      if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred,
+      if (getNonLocalPointerDepFromBB(PredPointer,
+                                      Loc.getWithNewPtr(PredPointer.getAddr()),
+                                      isLoad, Pred,
                                       Result, Visited))
         goto PredTranslationFailure;
     }
     
     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
     CacheInfo = &NonLocalPointerDeps[CacheKey];
-    Cache = &CacheInfo->second;
+    Cache = &CacheInfo->NonLocalDeps;
     NumSortedEntries = Cache->size();
     
     // Since we did phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
     // results from the set"  Clear out the indicator for this.
-    CacheInfo->first = BBSkipFirstBlockPair();
+    CacheInfo->Pair = BBSkipFirstBlockPair();
     SkipFirstBlock = false;
     continue;
 
@@ -922,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
     if (Cache == 0) {
       // Refresh the CacheInfo/Cache pointer if it got invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
-      Cache = &CacheInfo->second;
+      Cache = &CacheInfo->NonLocalDeps;
       NumSortedEntries = Cache->size();
     }
     
@@ -930,7 +1015,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
     // results from the set".  Clear out the indicator for this.
-    CacheInfo->first = BBSkipFirstBlockPair();
+    CacheInfo->Pair = BBSkipFirstBlockPair();
     
     // If *nothing* works, mark the pointer as being clobbered by the first
     // instruction in this block.
@@ -972,7 +1057,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
   
   // Remove all of the entries in the BB->val map.  This involves removing
   // instructions from the reverse map.
-  NonLocalDepInfo &PInfo = It->second.second;
+  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
   
   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
     Instruction *Target = PInfo[i].getResult().getInst();
@@ -1143,10 +1228,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       assert(P.getPointer() != RemInst &&
              "Already removed NonLocalPointerDeps info for RemInst");
       
-      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
+      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
       
       // The cache is not valid for any specific block anymore.
-      NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
+      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
       
       // Update any entries for RemInst to use the instruction after it.
       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
@@ -1192,7 +1277,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
   for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
        E = NonLocalPointerDeps.end(); I != E; ++I) {
     assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
-    const NonLocalDepInfo &Val = I->second.second;
+    const NonLocalDepInfo &Val = I->second.NonLocalDeps;
     for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
          II != E; ++II)
       assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 2cc1c2aa005c..e7e999cebeb9 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -30,7 +30,9 @@ namespace {
     DebugInfoFinder Finder;
   public:
     static char ID; // Pass identification, replacement for typeid
-    ModuleDebugInfoPrinter() : ModulePass(ID) {}
+    ModuleDebugInfoPrinter() : ModulePass(ID) {
+      initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnModule(Module &M);
 
@@ -43,7 +45,7 @@ namespace {
 
 char ModuleDebugInfoPrinter::ID = 0;
 INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
-                "Decodes module-level debug info", false, true);
+                "Decodes module-level debug info", false, true)
 
 ModulePass *llvm::createModuleDebugInfoPrinterPass() {
   return new ModuleDebugInfoPrinter();
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 000000000000..101c2d5b0285
--- /dev/null
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace {
+  /// NoAA - This class implements the -no-aa pass, which always returns "I
+  /// don't know" for alias queries.  NoAA is unlike other alias analysis
+  /// implementations, in that it does not chain to a previous analysis.  As
+  /// such it doesn't follow many of the rules that other alias analyses must.
+  ///
+  struct NoAA : public ImmutablePass, public AliasAnalysis {
+    static char ID; // Class identification, replacement for typeinfo
+    NoAA() : ImmutablePass(ID) {
+      initializeNoAAPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    }
+
+    virtual void initializePass() {
+      // Note: NoAA does not call InitializeAliasAnalysis because it's
+      // special and does not support chaining.
+      TD = getAnalysisIfAvailable<TargetData>();
+    }
+
+    virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+      return MayAlias;
+    }
+
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+      return UnknownModRefBehavior;
+    }
+    virtual ModRefBehavior getModRefBehavior(const Function *F) {
+      return UnknownModRefBehavior;
+    }
+
+    virtual bool pointsToConstantMemory(const Location &Loc,
+                                        bool OrLocal) {
+      return false;
+    }
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc) {
+      return ModRef;
+    }
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2) {
+      return ModRef;
+    }
+
+    virtual void deleteValue(Value *V) {}
+    virtual void copyValue(Value *From, Value *To) {}
+    virtual void addEscapingUse(Use &U) {}
+    
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+      if (ID == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+                   "No Alias Analysis (always returns 'may' alias)",
+                   true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 8e4fa03f2134..93da5a48518d 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,22 +12,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static bool CanPHITrans(Instruction *Inst) {
   if (isa<PHINode>(Inst) ||
-      isa<BitCastInst>(Inst) ||
       isa<GetElementPtrInst>(Inst))
     return true;
-  
+
+  if (isa<CastInst>(Inst) &&
+      Inst->isSafeToSpeculativelyExecute())
+    return true;
+
   if (Inst->getOpcode() == Instruction::Add &&
       isa<ConstantInt>(Inst->getOperand(1)))
     return true;
-  
+
   //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
   //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
   //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
@@ -50,7 +55,7 @@ static bool VerifySubExpr(Value *Expr,
   // If this is a non-instruction value, there is nothing to do.
   Instruction *I = dyn_cast<Instruction>(Expr);
   if (I == 0) return true;
-  
+
   // If it's an instruction, it is either in Tmp or its operands recursively
   // are.
   SmallVectorImpl<Instruction*>::iterator Entry =
@@ -59,16 +64,17 @@ static bool VerifySubExpr(Value *Expr,
     InstInputs.erase(Entry);
     return true;
   }
-  
+
   // If it isn't in the InstInputs list it is a subexpr incorporated into the
   // address.  Sanity check that it is phi translatable.
   if (!CanPHITrans(I)) {
-    errs() << "Non phi translatable instruction found in PHITransAddr, either "
-              "something is missing from InstInputs or CanPHITrans is wrong:\n";
+    errs() << "Non phi translatable instruction found in PHITransAddr:\n";
     errs() << *I << '\n';
+    llvm_unreachable("Either something is missing from InstInputs or "
+                     "CanPHITrans is wrong.");
     return false;
   }
-  
+
   // Validate the operands of the instruction.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (!VerifySubExpr(I->getOperand(i), InstInputs))
@@ -82,19 +88,20 @@ static bool VerifySubExpr(Value *Expr,
 /// returns false.
 bool PHITransAddr::Verify() const {
   if (Addr == 0) return true;
-  
-  SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());  
-  
+
+  SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
   if (!VerifySubExpr(Addr, Tmp))
     return false;
-  
+
   if (!Tmp.empty()) {
-    errs() << "PHITransAddr inconsistent, contains extra instructions:\n";
+    errs() << "PHITransAddr contains extra instructions:\n";
     for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
       errs() << "  InstInput #" << i << " is " << *InstInputs[i] << "\n";
+    llvm_unreachable("This is unexpected.");
     return false;
   }
-  
+
   // a-ok.
   return true;
 }
@@ -111,11 +118,11 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const {
 }
 
 
-static void RemoveInstInputs(Value *V, 
+static void RemoveInstInputs(Value *V,
                              SmallVectorImpl<Instruction*> &InstInputs) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return;
-  
+
   // If the instruction is in the InstInputs list, remove it.
   SmallVectorImpl<Instruction*>::iterator Entry =
     std::find(InstInputs.begin(), InstInputs.end(), I);
@@ -123,9 +130,9 @@ static void RemoveInstInputs(Value *V,
     InstInputs.erase(Entry);
     return;
   }
-  
+
   assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
-  
+
   // Otherwise, it must have instruction inputs itself.  Zap them recursively.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
     if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
@@ -139,7 +146,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   // If this is a non-instruction value, it can't require PHI translation.
   Instruction *Inst = dyn_cast<Instruction>(V);
   if (Inst == 0) return V;
-  
+
   // Determine whether 'Inst' is an input to our PHI translatable expression.
   bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
 
@@ -156,16 +163,16 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
     // In either case, the instruction itself isn't an input any longer.
     InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
-    
+
     // If this is a PHI, go ahead and translate it.
     if (PHINode *PN = dyn_cast<PHINode>(Inst))
       return AddAsInput(PN->getIncomingValueForBlock(PredBB));
-    
+
     // If this is a non-phi value, and it is analyzable, we can incorporate it
     // into the expression by making all instruction operands be inputs.
     if (!CanPHITrans(Inst))
       return 0;
-   
+
     // All instruction operands are now inputs (and of course, they may also be
     // defined in this block, so they may need to be phi translated themselves.
     for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
@@ -176,31 +183,34 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   // Ok, it must be an intermediate result (either because it started that way
   // or because we just incorporated it into the expression).  See if its
   // operands need to be phi translated, and if so, reconstruct it.
-  
-  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
-    Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT);
+
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
     if (PHIIn == 0) return 0;
-    if (PHIIn == BC->getOperand(0))
-      return BC;
-    
+    if (PHIIn == Cast->getOperand(0))
+      return Cast;
+
     // Find an available version of this cast.
-    
+
     // Constants are trivial to find.
     if (Constant *C = dyn_cast<Constant>(PHIIn))
-      return AddAsInput(ConstantExpr::getBitCast(C, BC->getType()));
-    
-    // Otherwise we have to see if a bitcasted version of the incoming pointer
+      return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+                                              C, Cast->getType()));
+
+    // Otherwise we have to see if a casted version of the incoming pointer
     // is available.  If so, we can use it, otherwise we have to fail.
     for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
          UI != E; ++UI) {
-      if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
-        if (BCI->getType() == BC->getType() &&
-            (!DT || DT->dominates(BCI->getParent(), PredBB)))
-          return BCI;
+      if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+        if (CastI->getOpcode() == Cast->getOpcode() &&
+            CastI->getType() == Cast->getType() &&
+            (!DT || DT->dominates(CastI->getParent(), PredBB)))
+          return CastI;
     }
     return 0;
   }
-  
+
   // Handle getelementptr with at least one PHI translatable operand.
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
     SmallVector<Value*, 8> GEPOps;
@@ -208,22 +218,22 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
       Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
       if (GEPOp == 0) return 0;
-      
+
       AnyChanged |= GEPOp != GEP->getOperand(i);
       GEPOps.push_back(GEPOp);
     }
-    
+
     if (!AnyChanged)
       return GEP;
-    
+
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
-    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) {
+    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
         RemoveInstInputs(GEPOps[i], InstInputs);
-      
+
       return AddAsInput(V);
     }
-    
+
     // Scan to see if we have this GEP available.
     Value *APHIOp = GEPOps[0];
     for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
@@ -245,7 +255,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     }
     return 0;
   }
-  
+
   // Handle add with a constant RHS.
   if (Inst->getOpcode() == Instruction::Add &&
       isa<ConstantInt>(Inst->getOperand(1))) {
@@ -253,10 +263,10 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
     bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
     bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
-    
+
     Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
     if (LHS == 0) return 0;
-    
+
     // If the PHI translated LHS is an add of a constant, fold the immediates.
     if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
       if (BOp->getOpcode() == Instruction::Add)
@@ -264,16 +274,16 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
           LHS = BOp->getOperand(0);
           RHS = ConstantExpr::getAdd(RHS, CI);
           isNSW = isNUW = false;
-          
+
           // If the old 'LHS' was an input, add the new 'LHS' as an input.
           if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
             RemoveInstInputs(BOp, InstInputs);
             AddAsInput(LHS);
           }
         }
-    
+
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) {
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
@@ -283,7 +293,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     // If we didn't modify the add, just return it.
     if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
       return Inst;
-    
+
     // Otherwise, see if we have this add available somewhere.
     for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
          UI != E; ++UI) {
@@ -294,10 +304,10 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
             (!DT || DT->dominates(BO->getParent(), PredBB)))
           return BO;
     }
-    
+
     return 0;
   }
-  
+
   // Otherwise, we failed.
   return 0;
 }
@@ -335,13 +345,13 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
                           const DominatorTree &DT,
                           SmallVectorImpl<Instruction*> &NewInsts) {
   unsigned NISize = NewInsts.size();
-  
+
   // Attempt to PHI translate with insertion.
   Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
-  
+
   // If successful, return the new value.
   if (Addr) return Addr;
-  
+
   // If not, destroy any intermediate instructions inserted.
   while (NewInsts.size() != NISize)
     NewInsts.pop_back_val()->eraseFromParent();
@@ -367,21 +377,23 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
   // If we don't have an available version of this value, it must be an
   // instruction.
   Instruction *Inst = cast<Instruction>(InVal);
-  
-  // Handle bitcast of PHI translatable value.
-  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
-    Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0),
+
+  // Handle cast of PHI translatable value.
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
                                               CurBB, PredBB, DT, NewInsts);
     if (OpVal == 0) return 0;
-    
-    // Otherwise insert a bitcast at the end of PredBB.
-    BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
-                                       InVal->getName()+".phi.trans.insert",
-                                       PredBB->getTerminator());
+
+    // Otherwise insert a cast at the end of PredBB.
+    CastInst *New = CastInst::Create(Cast->getOpcode(),
+                                     OpVal, InVal->getType(),
+                                     InVal->getName()+".phi.trans.insert",
+                                     PredBB->getTerminator());
     NewInsts.push_back(New);
     return New;
   }
-  
+
   // Handle getelementptr with at least one PHI operand.
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
     SmallVector<Value*, 8> GEPOps;
@@ -392,8 +404,8 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
       if (OpVal == 0) return 0;
       GEPOps.push_back(OpVal);
     }
-    
-    GetElementPtrInst *Result = 
+
+    GetElementPtrInst *Result =
     GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
                               InVal->getName()+".phi.trans.insert",
                               PredBB->getTerminator());
@@ -401,12 +413,12 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
     NewInsts.push_back(Result);
     return Result;
   }
-  
+
 #if 0
   // FIXME: This code works, but it is unclear that we actually want to insert
   // a big chain of computation in order to make a value available in a block.
   // This needs to be evaluated carefully to consider its cost trade offs.
-  
+
   // Handle add with a constant RHS.
   if (Inst->getOpcode() == Instruction::Add &&
       isa<ConstantInt>(Inst->getOperand(1))) {
@@ -414,7 +426,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
     Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
                                               CurBB, PredBB, DT, NewInsts);
     if (OpVal == 0) return 0;
-    
+
     BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
                                            InVal->getName()+".phi.trans.insert",
                                                     PredBB->getTerminator());
@@ -424,6 +436,6 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
     return Res;
   }
 #endif
-  
+
   return 0;
 }
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 000000000000..5d3f6bbc7b6e
--- /dev/null
+++ b/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,525 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96].  For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner.  As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <queue>
+#include <set>
+#include <stack>
+#include <string>
+#include <utility>
+#include <vector>
+#include <sstream>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+  "path-profile-early-termination", cl::Hidden,
+  cl::desc("In path profiling, insert extra instrumentation to account for "
+           "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+  return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+  return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+  _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+  return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+  _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+  return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+  return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges.  Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+  return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+  return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+  return(_succEdges.end());
+}
+
+// Returns the number of successor edges.  Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+  return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+  _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+  removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+  _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+  removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented.  If BasicBlock
+// is null then returns "<null>".  If BasicBlock has no name, then
+// "<unnamed>" is returned.  Intended for use with debug output.
+std::string BallLarusNode::getName() {
+  std::stringstream name;
+
+  if(getBlock() != NULL) {
+    if(getBlock()->hasName()) {
+      std::string tempName(getBlock()->getName());
+      name << tempName.c_str() << " (" << _uid << ")";
+    } else
+      name << "<unnamed> (" << _uid << ")";
+  } else
+    name << "<null> (" << _uid << ")";
+
+  return name.str();
+}
+
+// Removes an edge from an edgeVector.  Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+  // TODO: Avoid linear scan by using a set instead
+  for(BLEdgeIterator i = v.begin(),
+        end = v.end();
+      i != end;
+      ++i) {
+    if((*i) == e) {
+      v.erase(i);
+      break;
+    }
+  }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+  return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+  return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+  return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+  _edgeType = type;
+}
+
+// Returns the weight of this edge.  Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+  return(_weight);
+}
+
+// Sets the weight of the edge.  Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+  _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+  return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+  _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+  return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+  _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+  return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+  _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+  return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+  BLBlockNodeMap inDag;
+  std::stack<BallLarusNode*> dfsStack;
+
+  _root = addNode(&(_function.getEntryBlock()));
+  _exit = addNode(NULL);
+
+  // start search from root
+  dfsStack.push(getRoot());
+
+  // dfs to add each bb into the dag
+  while(dfsStack.size())
+    buildNode(inDag, dfsStack);
+
+  // put in the final edge
+  addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+      ++edge)
+    delete (*edge);
+
+  for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+      ++node)
+    delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+  BallLarusNode* node;
+  std::queue<BallLarusNode*> bfsQueue;
+  bfsQueue.push(getExit());
+
+  while(bfsQueue.size() > 0) {
+    node = bfsQueue.front();
+
+    DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+    bfsQueue.pop();
+    unsigned prevPathNumber = node->getNumberPaths();
+    calculatePathNumbersFrom(node);
+
+    // Check for DAG splitting
+    if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+      // Add new phony edge from the split-node to the DAG's exit
+      BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+      exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+      // Counters to handle the possibilty of a multi-graph
+      BasicBlock* oldTarget = 0;
+      unsigned duplicateNumber = 0;
+
+      // Iterate through each successor edge, adding phony edges
+      for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+           succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+        if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+          // is this edge a duplicate?
+          if( oldTarget != (*succ)->getTarget()->getBlock() )
+            duplicateNumber = 0;
+
+          // create the new phony edge: root -> succ
+          BallLarusEdge* rootEdge =
+            addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+          rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+          rootEdge->setRealEdge(*succ);
+
+          // split on this edge and reference it's exit/root phony edges
+          (*succ)->setType(BallLarusEdge::SPLITEDGE);
+          (*succ)->setPhonyRoot(rootEdge);
+          (*succ)->setPhonyExit(exitEdge);
+          (*succ)->setWeight(0);
+        }
+      }
+
+      calculatePathNumbersFrom(node);
+    }
+
+    DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+          << node->getNumberPaths() << ".\n");
+
+    if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+      DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+      for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+          pred != end; pred++) {
+        if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+            (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+          continue;
+
+        BallLarusNode* nextNode = (*pred)->getSource();
+        // not yet visited?
+        if(nextNode->getNumberPaths() == 0)
+          bfsQueue.push(nextNode);
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+  return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+  return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+  return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+  return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+  for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+    (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+  BallLarusNode* currentNode = dfsStack.top();
+  BasicBlock* currentBlock = currentNode->getBlock();
+
+  if(currentNode->getColor() != BallLarusNode::WHITE) {
+    // we have already visited this node
+    dfsStack.pop();
+    currentNode->setColor(BallLarusNode::BLACK);
+  } else {
+    // are there any external procedure calls?
+    if( ProcessEarlyTermination ) {
+      for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+             bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+           bbCurrent++ ) {
+        Instruction& instr = *bbCurrent;
+        if( instr.getOpcode() == Instruction::Call ) {
+          BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+          callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+          break;
+        }
+      }
+    }
+
+    TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+    if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
+       || isa<UnwindInst>(terminator))
+      addEdge(currentNode, getExit(),0);
+
+    currentNode->setColor(BallLarusNode::GRAY);
+    inDag[currentBlock] = currentNode;
+
+    BasicBlock* oldSuccessor = 0;
+    unsigned duplicateNumber = 0;
+
+    // iterate through this node's successors
+    for(succ_iterator successor = succ_begin(currentBlock),
+          succEnd = succ_end(currentBlock); successor != succEnd;
+        oldSuccessor = *successor, ++successor ) {
+      BasicBlock* succBB = *successor;
+
+      // is this edge a duplicate?
+      if (oldSuccessor == succBB)
+        duplicateNumber++;
+      else
+        duplicateNumber = 0;
+
+      buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+    }
+  }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+                             dfsStack, BallLarusNode* currentNode,
+                             BasicBlock* succBB, unsigned duplicateCount) {
+  BallLarusNode* succNode = inDag[succBB];
+
+  if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+    // visited node and forward edge
+    addEdge(currentNode, succNode, duplicateCount);
+  } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+    // visited node and back edge
+    DEBUG(dbgs() << "Backedge detected.\n");
+    addBackedge(currentNode, succNode, duplicateCount);
+  } else {
+    BallLarusNode* childNode;
+    // not visited node and forward edge
+    if(succNode) // an unvisited node that is child of a gray node
+      childNode = succNode;
+    else { // an unvisited node that is a child of a an unvisted node
+      childNode = addNode(succBB);
+      inDag[succBB] = childNode;
+    }
+    addEdge(currentNode, childNode, duplicateCount);
+    dfsStack.push(childNode);
+  }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+  if(node == getExit())
+    // The Exit node must be base case
+    node->setNumberPaths(1);
+  else {
+    unsigned sumPaths = 0;
+    BallLarusNode* succNode;
+
+    for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+        succ != end; succ++) {
+      if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+          (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+        continue;
+
+      (*succ)->setWeight(sumPaths);
+      succNode = (*succ)->getTarget();
+
+      if( !succNode->getNumberPaths() )
+        return;
+      sumPaths += succNode->getNumberPaths();
+    }
+
+    node->setNumberPaths(sumPaths);
+  }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+  return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+                                        BallLarusNode* target,
+                                        unsigned duplicateCount) {
+  return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor.  Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+  BallLarusNode* newNode = createNode(BB);
+  _nodes.push_back(newNode);
+  return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+                                     BallLarusNode* target,
+                                     unsigned duplicateCount) {
+  BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+  _edges.push_back(newEdge);
+  source->addSuccEdge(newEdge);
+  target->addPredEdge(newEdge);
+  return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+                               unsigned duplicateCount) {
+  BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+  childEdge->setType(BallLarusEdge::BACKEDGE);
+
+  childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+  childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+  childEdge->getPhonyRoot()->setRealEdge(childEdge);
+  childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+  childEdge->getPhonyExit()->setRealEdge(childEdge);
+  childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+  _backEdges.push_back(childEdge);
+}
diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 000000000000..b361d3f4fa94
--- /dev/null
+++ b/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,434 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+  cl::value_desc("filename"),
+  cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+  class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+  public:
+    PathProfileLoaderPass() : ModulePass(ID) { }
+    ~PathProfileLoaderPass();
+
+    // this pass doesn't change anything (only loads information)
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    // the full name of the loader pass
+    virtual const char* getPassName() const {
+      return "Path Profiling Information Loader";
+    }
+
+    // required since this pass implements multiple inheritance
+                virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &PathProfileInfo::ID)
+        return (PathProfileInfo*)this;
+      return this;
+    }
+
+    // entry point to run the pass
+    bool runOnModule(Module &M);
+
+    // pass identification
+    static char ID;
+
+  private:
+    // make a reference table to refer to function by number
+    void buildFunctionRefs(Module &M);
+
+    // process argument info of a program from the input file
+    void handleArgumentInfo();
+
+    // process path number information from the input file
+    void handlePathInfo();
+
+    // array of references to the functions in the module
+    std::vector<Function*> _functions;
+
+    // path profile file handle
+    FILE* _file;
+
+    // path profile file name
+    std::string _filename;
+  };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+                          NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+                   "path-profile-loader",
+                   "Load path profile information from file",
+                   false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+  return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+                                  unsigned duplicateNumber)
+  : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+                          double countStdDev,   PathProfileInfo* ppi)
+  : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+  return 100 * double(_count) /
+    double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+                                   unsigned int pathNumber) {
+  BallLarusEdge* best = 0;
+
+  for( BLEdgeIterator next = node->succBegin(),
+         end = node->succEnd(); next != end; next++ ) {
+    if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+        (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+        (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+        (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+      best = *next;
+  }
+
+  return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+  unsigned int increment = _number;
+  ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+  while (currentNode != _ppi->_currentDag->getExit()) {
+    BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+    increment -= next->getWeight();
+
+    if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+        next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+        next->getTarget() != _ppi->_currentDag->getExit() )
+      pev->push_back(ProfilePathEdge(
+                       next->getSource()->getBlock(),
+                       next->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+        next->getTarget() == _ppi->_currentDag->getExit() )
+      pev->push_back(ProfilePathEdge(
+                       next->getRealEdge()->getSource()->getBlock(),
+                       next->getRealEdge()->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+        next->getSource() == _ppi->_currentDag->getRoot() )
+      pev->push_back(ProfilePathEdge(
+                       next->getRealEdge()->getSource()->getBlock(),
+                       next->getRealEdge()->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    // set the new node
+    currentNode = next->getTarget();
+  }
+
+  return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+  unsigned int increment = _number;
+  ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+  while (currentNode != _ppi->_currentDag->getExit()) {
+    BallLarusEdge* next = getNextEdge(currentNode, increment);
+    increment -= next->getWeight();
+
+    // add block to the block list if it is a real edge
+    if( next->getType() == BallLarusEdge::NORMAL)
+      pbv->push_back (currentNode->getBlock());
+    // make the back edge the last edge since we are at the end
+    else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+      pbv->push_back (currentNode->getBlock());
+      pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+    }
+
+    // set the new node
+    currentNode = next->getTarget();
+  }
+
+  return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+  BallLarusNode* root = _ppi->_currentDag->getRoot();
+  BallLarusEdge* edge = getNextEdge(root, _number);
+
+  if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+               edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+    return edge->getTarget()->getBlock();
+
+  return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+  if (_currentDag)
+    delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+  // Make sure it exists
+  if (!F) return;
+
+  if (_currentDag)
+    delete _currentDag;
+
+  _currentFunction = F;
+  _currentDag = new BallLarusDag(*F);
+  _currentDag->init();
+  _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+  return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+  return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+  return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+  return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+  return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+  return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+  return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+  for( FunctionPathIterator funcNext = _functionPaths.begin(),
+         funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+    for( ProfilePathIterator pathNext = funcNext->second.begin(),
+           pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+      delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+  // get the filename and setup the module's function references
+  _filename = PathProfileInfoFilename;
+  buildFunctionRefs (M);
+
+  if (!(_file = fopen(_filename.c_str(), "rb"))) {
+    errs () << "error: input '" << _filename << "' file does not exist.\n";
+    return false;
+  }
+
+  ProfilingType profType;
+
+  while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+    switch (profType) {
+    case ArgumentInfo:
+      handleArgumentInfo ();
+      break;
+    case PathInfo:
+      handlePathInfo ();
+      break;
+    default:
+      errs () << "error: bad path profiling file syntax, " << profType << "\n";
+      fclose (_file);
+      return false;
+    }
+  }
+
+  fclose (_file);
+
+  return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+  _functions.push_back(0); // make the 0 index a null pointer
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+    if (F->isDeclaration())
+      continue;
+    _functions.push_back(F);
+  }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+  // get the argument list's length
+  unsigned savedArgsLength;
+  if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+    errs() << "warning: argument info header/data mismatch\n";
+    return;
+  }
+
+  // allocate a buffer, and get the arguments
+  char* args = new char[savedArgsLength+1];
+  if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+    errs() << "warning: argument info header/data mismatch\n";
+
+  args[savedArgsLength] = '\0';
+  argList = std::string(args);
+  delete [] args; // cleanup dynamic string
+
+  // byte alignment
+  if (savedArgsLength & 3)
+    fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+  // get the number of functions in this profile
+  unsigned functionCount;
+  if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+    errs() << "warning: path info header/data mismatch\n";
+    return;
+  }
+
+  // gather path information for each function
+  for (unsigned i = 0; i < functionCount; i++) {
+    PathProfileHeader pathHeader;
+    if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+      errs() << "warning: bad header for path function info\n";
+      break;
+    }
+
+    Function* f = _functions[pathHeader.fnNumber];
+
+    // dynamically allocate a table to store path numbers
+    PathProfileTableEntry* pathTable =
+      new PathProfileTableEntry[pathHeader.numEntries];
+
+    if( fread(pathTable, sizeof(PathProfileTableEntry),
+              pathHeader.numEntries, _file) != pathHeader.numEntries) {
+      delete [] pathTable;
+      errs() << "warning: path function info header/data mismatch\n";
+      return;
+    }
+
+    // Build a new path for the current function
+    unsigned int totalPaths = 0;
+    for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+      totalPaths += pathTable[j].pathCounter;
+      _functionPaths[f][pathTable[j].pathNumber]
+        = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+                          0, this);
+    }
+
+    _functionPathCounts[f] = totalPaths;
+
+    delete [] pathTable;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  NoProfile PathProfileInfo implementation
+//
+
+namespace {
+  struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+    static char ID; // Class identification, replacement for typeinfo
+    NoPathProfileInfo() : ImmutablePass(ID) {
+      initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &PathProfileInfo::ID)
+        return (PathProfileInfo*)this;
+      return this;
+    }
+
+    virtual const char *getPassName() const {
+      return "NoPathProfileInfo";
+    }
+  };
+}  // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+                   "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 000000000000..c54977314207
--- /dev/null
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,207 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+  class PathProfileVerifier : public ModulePass {
+  private:
+    bool runOnModule(Module &M);
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PathProfileVerifier() : ModulePass(ID) {
+      initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+
+    virtual const char *getPassName() const {
+      return "Path Profiler Verifier";
+    }
+
+    // The verifier requires the path profile and edge profile.
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+  };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+  cl::init("edgefrompath.llvmprof.out"),
+  cl::value_desc("filename"),
+  cl::desc("Edge profile file generated by -path-profile-verifier"),
+  cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+                "Compare the path profile derived edge profile against the "
+                "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+  return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+  AU.addRequired<PathProfileInfo>();
+  AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+  PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+  // setup a data structure to map path edges which index an
+  // array of edge counters
+  NestedBlockToIndexMap arrayMap;
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+
+    arrayMap[0][F->begin()][0] = i++;
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+
+      unsigned duplicate = 0;
+      BasicBlock* prev = 0;
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+           prev = TI->getSuccessor(s), ++s) {
+        if (prev == TI->getSuccessor(s))
+          duplicate++;
+        else duplicate = 0;
+
+        arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+      }
+    }
+  }
+
+  std::vector<unsigned> edgeArray(i);
+
+  // iterate through each path and increment the edge counters as needed
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+
+    pathProfileInfo.setCurrentFunction(F);
+
+    DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+          << pathProfileInfo.pathsRun()
+          << "/" << pathProfileInfo.getPotentialPathCount()
+          << " potential paths\n");
+
+    for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+           endPath = pathProfileInfo.pathEnd();
+         nextPath != endPath; nextPath++ ) {
+      ProfilePath* currentPath = nextPath->second;
+
+      ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+      DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+            << currentPath->getCount() << "\n");
+      // setup the entry edge (normally path profiling doens't care about this)
+      if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+        edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+          += currentPath->getCount();
+
+      for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+             endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+        if (nextEdge != pev->begin())
+          DEBUG(dbgs() << " :: ");
+
+        BasicBlock* source = nextEdge->getSource();
+        BasicBlock* target = nextEdge->getTarget();
+        unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+        DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
+              << "}--> " << target->getNameStr());
+
+        // Ensure all the referenced edges exist
+        // TODO: make this a separate function
+        if( !arrayMap.count(source) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: source '"
+                 << source->getNameStr()
+                 << "' does not exist in the array map.\n";
+        } else if( !arrayMap[source].count(target) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: target '"
+                 << target->getNameStr()
+                 << "' does not exist in the array map.\n";
+        } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: edge "
+                 << source->getNameStr() << " -> " << target->getNameStr()
+                 << " duplicate number " << duplicateNumber
+                 << " does not exist in the array map.\n";
+        } else {
+          edgeArray[arrayMap[source][target][duplicateNumber]]
+            += currentPath->getCount();
+        }
+      }
+
+      DEBUG(errs() << "\n");
+
+      delete pev;
+    }
+  }
+
+  std::string errorInfo;
+  std::string filename = EdgeProfileFilename;
+
+  // Open a handle to the file
+  FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+  if (!edgeFile) {
+    errs() << "error: unable to open file '" << filename << "' for output.\n";
+    return false;
+  }
+
+  errs() << "Generating edge profile '" << filename << "' ...\n";
+
+  // write argument info
+  unsigned type = ArgumentInfo;
+  unsigned num = pathProfileInfo.argList.size();
+  int zeros = 0;
+
+  fwrite(&type,sizeof(unsigned),1,edgeFile);
+  fwrite(&num,sizeof(unsigned),1,edgeFile);
+  fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+  if (num&3)
+    fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+  type = EdgeInfo;
+  num = edgeArray.size();
+  fwrite(&type,sizeof(unsigned),1,edgeFile);
+  fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+  // write each edge to the file
+  for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+         e = edgeArray.end(); s != e; s++)
+    fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+  fclose (edgeFile);
+
+  return true;
+}
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
deleted file mode 100644
index 07f46824700a..000000000000
--- a/lib/Analysis/PointerTracking.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements tracking of pointer bounds.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/PointerTracking.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Value.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
-using namespace llvm;
-
-char PointerTracking::ID = 0;
-PointerTracking::PointerTracking() : FunctionPass(ID) {}
-
-bool PointerTracking::runOnFunction(Function &F) {
-  predCache.clear();
-  assert(analyzing.empty());
-  FF = &F;
-  TD = getAnalysisIfAvailable<TargetData>();
-  SE = &getAnalysis<ScalarEvolution>();
-  LI = &getAnalysis<LoopInfo>();
-  DT = &getAnalysis<DominatorTree>();
-  return false;
-}
-
-void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequiredTransitive<DominatorTree>();
-  AU.addRequiredTransitive<LoopInfo>();
-  AU.addRequiredTransitive<ScalarEvolution>();
-  AU.setPreservesAll();
-}
-
-bool PointerTracking::doInitialization(Module &M) {
-  const Type *PTy = Type::getInt8PtrTy(M.getContext());
-
-  // Find calloc(i64, i64) or calloc(i32, i32).
-  callocFunc = M.getFunction("calloc");
-  if (callocFunc) {
-    const FunctionType *Ty = callocFunc->getFunctionType();
-
-    std::vector<const Type*> args, args2;
-    args.push_back(Type::getInt64Ty(M.getContext()));
-    args.push_back(Type::getInt64Ty(M.getContext()));
-    args2.push_back(Type::getInt32Ty(M.getContext()));
-    args2.push_back(Type::getInt32Ty(M.getContext()));
-    const FunctionType *Calloc1Type =
-      FunctionType::get(PTy, args, false);
-    const FunctionType *Calloc2Type =
-      FunctionType::get(PTy, args2, false);
-    if (Ty != Calloc1Type && Ty != Calloc2Type)
-      callocFunc = 0; // Give up
-  }
-
-  // Find realloc(i8*, i64) or realloc(i8*, i32).
-  reallocFunc = M.getFunction("realloc");
-  if (reallocFunc) {
-    const FunctionType *Ty = reallocFunc->getFunctionType();
-    std::vector<const Type*> args, args2;
-    args.push_back(PTy);
-    args.push_back(Type::getInt64Ty(M.getContext()));
-    args2.push_back(PTy);
-    args2.push_back(Type::getInt32Ty(M.getContext()));
-
-    const FunctionType *Realloc1Type =
-      FunctionType::get(PTy, args, false);
-    const FunctionType *Realloc2Type =
-      FunctionType::get(PTy, args2, false);
-    if (Ty != Realloc1Type && Ty != Realloc2Type)
-      reallocFunc = 0; // Give up
-  }
-  return false;
-}
-
-// Calculates the number of elements allocated for pointer P,
-// the type of the element is stored in Ty.
-const SCEV *PointerTracking::computeAllocationCount(Value *P,
-                                                    const Type *&Ty) const {
-  Value *V = P->stripPointerCasts();
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    Value *arraySize = AI->getArraySize();
-    Ty = AI->getAllocatedType();
-    // arraySize elements of type Ty.
-    return SE->getSCEV(arraySize);
-  }
-
-  if (CallInst *CI = extractMallocCall(V)) {
-    Value *arraySize = getMallocArraySize(CI, TD);
-    const Type* AllocTy = getMallocAllocatedType(CI);
-    if (!AllocTy || !arraySize) return SE->getCouldNotCompute();
-    Ty = AllocTy;
-    // arraySize elements of type Ty.
-    return SE->getSCEV(arraySize);
-  }
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
-    if (GV->hasDefinitiveInitializer()) {
-      Constant *C = GV->getInitializer();
-      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
-        Ty = ATy->getElementType();
-        return SE->getConstant(Type::getInt32Ty(P->getContext()),
-                               ATy->getNumElements());
-      }
-    }
-    Ty = GV->getType();
-    return SE->getConstant(Type::getInt32Ty(P->getContext()), 1);
-    //TODO: implement more tracking for globals
-  }
-
-  if (CallInst *CI = dyn_cast<CallInst>(V)) {
-    CallSite CS(CI);
-    Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
-    const Loop *L = LI->getLoopFor(CI->getParent());
-    if (F == callocFunc) {
-      Ty = Type::getInt8Ty(P->getContext());
-      // calloc allocates arg0*arg1 bytes.
-      return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)),
-                                               SE->getSCEV(CS.getArgument(1))),
-                                L);
-    } else if (F == reallocFunc) {
-      Ty = Type::getInt8Ty(P->getContext());
-      // realloc allocates arg1 bytes.
-      return SE->getSCEVAtScope(CS.getArgument(1), L);
-    }
-  }
-
-  return SE->getCouldNotCompute();
-}
-
-Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const 
-{
-  Value *V = P->stripPointerCasts();
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    Ty = AI->getAllocatedType();
-    // arraySize elements of type Ty.
-    return AI->getArraySize();
-  }
-
-  if (CallInst *CI = extractMallocCall(V)) {
-    Ty = getMallocAllocatedType(CI);
-    if (!Ty)
-      return 0;
-    Value *arraySize = getMallocArraySize(CI, TD);
-    if (!arraySize) {
-      Ty = Type::getInt8Ty(P->getContext());
-      return CI->getArgOperand(0);
-    }
-    // arraySize elements of type Ty.
-    return arraySize;
-  }
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
-    if (GV->hasDefinitiveInitializer()) {
-      Constant *C = GV->getInitializer();
-      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
-        Ty = ATy->getElementType();
-        return ConstantInt::get(Type::getInt32Ty(P->getContext()),
-                               ATy->getNumElements());
-      }
-    }
-    Ty = cast<PointerType>(GV->getType())->getElementType();
-    return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1);
-    //TODO: implement more tracking for globals
-  }
-
-  if (CallInst *CI = dyn_cast<CallInst>(V)) {
-    CallSite CS(CI);
-    Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
-    if (F == reallocFunc) {
-      Ty = Type::getInt8Ty(P->getContext());
-      // realloc allocates arg1 bytes.
-      return CS.getArgument(1);
-    }
-  }
-
-  return 0;
-}
-
-// Calculates the number of elements of type Ty allocated for P.
-const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
-                                                           const Type *Ty)
-  const {
-    const Type *elementTy;
-    const SCEV *Count = computeAllocationCount(P, elementTy);
-    if (isa<SCEVCouldNotCompute>(Count))
-      return Count;
-    if (elementTy == Ty)
-      return Count;
-
-    if (!TD) // need TargetData from this point forward
-      return SE->getCouldNotCompute();
-
-    uint64_t elementSize = TD->getTypeAllocSize(elementTy);
-    uint64_t wantSize = TD->getTypeAllocSize(Ty);
-    if (elementSize == wantSize)
-      return Count;
-    if (elementSize % wantSize) //fractional counts not possible
-      return SE->getCouldNotCompute();
-    return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
-                                                 elementSize/wantSize));
-}
-
-const SCEV *PointerTracking::getAllocationElementCount(Value *V) const {
-  // We only deal with pointers.
-  const PointerType *PTy = cast<PointerType>(V->getType());
-  return computeAllocationCountForType(V, PTy->getElementType());
-}
-
-const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const {
-  return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext()));
-}
-
-// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too
-enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
-                                                   Predicate Pred,
-                                                   const SCEV *A,
-                                                   const SCEV *B) const {
-  if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
-    return AlwaysTrue;
-  Pred = ICmpInst::getSwappedPredicate(Pred);
-  if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
-    return AlwaysTrue;
-
-  Pred = ICmpInst::getInversePredicate(Pred);
-  if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
-    return AlwaysFalse;
-  Pred = ICmpInst::getSwappedPredicate(Pred);
-  if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
-    return AlwaysTrue;
-  return Unknown;
-}
-
-enum SolverResult PointerTracking::checkLimits(const SCEV *Offset,
-                                               const SCEV *Limit,
-                                               BasicBlock *BB)
-{
-  //FIXME: merge implementation
-  return Unknown;
-}
-
-void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base,
-                                       const SCEV *&Limit,
-                                       const SCEV *&Offset) const
-{
-    Pointer = Pointer->stripPointerCasts();
-    Base = Pointer->getUnderlyingObject();
-    Limit = getAllocationSizeInBytes(Base);
-    if (isa<SCEVCouldNotCompute>(Limit)) {
-      Base = 0;
-      Offset = Limit;
-      return;
-    }
-
-    Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base));
-    if (isa<SCEVCouldNotCompute>(Offset)) {
-      Base = 0;
-      Limit = Offset;
-    }
-}
-
-void PointerTracking::print(raw_ostream &OS, const Module* M) const {
-  // Calling some PT methods may cause caches to be updated, however
-  // this should be safe for the same reason its safe for SCEV.
-  PointerTracking &PT = *const_cast<PointerTracking*>(this);
-  for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
-    if (!I->getType()->isPointerTy())
-      continue;
-    Value *Base;
-    const SCEV *Limit, *Offset;
-    getPointerOffset(&*I, Base, Limit, Offset);
-    if (!Base)
-      continue;
-
-    if (Base == &*I) {
-      const SCEV *S = getAllocationElementCount(Base);
-      OS << *Base << " ==> " << *S << " elements, ";
-      OS << *Limit << " bytes allocated\n";
-      continue;
-    }
-    OS << &*I << " -- base: " << *Base;
-    OS << " offset: " << *Offset;
-
-    enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent());
-    switch (res) {
-    case AlwaysTrue:
-      OS << " always safe\n";
-      break;
-    case AlwaysFalse:
-      OS << " always unsafe\n";
-      break;
-    case Unknown:
-      OS << " <<unknown>>\n";
-      break;
-    }
-  }
-}
-
-INITIALIZE_PASS(PointerTracking, "pointertracking",
-                "Track pointer bounds", false, true);
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index cbe8d1867e4f..3f0deab9ea87 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Analysis/DominatorInternals.h"
 using namespace llvm;
 
@@ -29,7 +30,7 @@ using namespace llvm;
 char PostDominatorTree::ID = 0;
 char PostDominanceFrontier::ID = 0;
 INITIALIZE_PASS(PostDominatorTree, "postdomtree",
-                "Post-Dominator Tree Construction", true, true);
+                "Post-Dominator Tree Construction", true, true)
 
 bool PostDominatorTree::runOnFunction(Function &F) {
   DT->recalculate(F);
@@ -53,8 +54,11 @@ FunctionPass* llvm::createPostDomTree() {
 //  PostDominanceFrontier Implementation
 //===----------------------------------------------------------------------===//
 
-INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier",
-                "Post-Dominance Frontier Construction", true, true);
+INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier",
+                "Post-Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier",
+                "Post-Dominance Frontier Construction", true, true)
 
 const DominanceFrontier::DomSetType &
 PostDominanceFrontier::calculate(const PostDominatorTree &DT,
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index ecc0a1845307..667ee1cc348a 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -39,7 +39,8 @@ namespace {
   public:
     static char ID; // Class identification, replacement for typeinfo
     explicit ProfileEstimatorPass(const double execcount = 0)
-      : FunctionPass(ID), ExecCount(execcount) {
+        : FunctionPass(ID), ExecCount(execcount) {
+      initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
       if (execcount == 0) ExecCount = LoopWeight;
     }
 
@@ -72,8 +73,11 @@ namespace {
 }  // End of anonymous namespace
 
 char ProfileEstimatorPass::ID = 0;
-INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
-                "Estimate profiling information", false, true, false);
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+                "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+                "Estimate profiling information", false, true, false)
 
 namespace llvm {
   char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
@@ -319,6 +323,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
   FunctionInformation.erase(&F);
   BlockInformation[&F].clear();
   EdgeInformation[&F].clear();
+  BBToVisit.clear();
 
   // Mark all blocks as to visit.
   for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index fc7f28662c01..36f211e858d2 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -24,8 +24,12 @@
 #include <limits>
 using namespace llvm;
 
+namespace llvm {
+  template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
 // Register the ProfileInfo interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information");
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
 
 namespace llvm {
 
@@ -43,9 +47,6 @@ ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
   if (MachineProfile) delete MachineProfile;
 }
 
-template<>
-char ProfileInfoT<Function,BasicBlock>::ID = 0;
-
 template<>
 char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
 
@@ -888,7 +889,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
     FI = Unvisited.begin(), FE = Unvisited.end();
     while(FI != FE && !FoundPath) {
       const BasicBlock *BB = *FI; ++FI;
-      const BasicBlock *Dest;
+      const BasicBlock *Dest = 0;
       Path P;
       bool BackEdgeFound = false;
       for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
@@ -1076,7 +1077,9 @@ raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, con
 namespace {
   struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
     static char ID; // Class identification, replacement for typeinfo
-    NoProfileInfo() : ImmutablePass(ID) {}
+    NoProfileInfo() : ImmutablePass(ID) {
+      initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+    }
     
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
@@ -1097,6 +1100,6 @@ namespace {
 char NoProfileInfo::ID = 0;
 // Register this pass...
 INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
-                   "No Profile Information", false, true, true);
+                   "No Profile Information", false, true, true)
 
 ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index d325b574e848..098079bcffc4 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -46,6 +46,7 @@ namespace {
     static char ID; // Class identification, replacement for typeinfo
     explicit LoaderPass(const std::string &filename = "")
       : ModulePass(ID), Filename(filename) {
+      initializeLoaderPassPass(*PassRegistry::getPassRegistry());
       if (filename.empty()) Filename = ProfileInfoFilename;
     }
 
@@ -80,7 +81,7 @@ namespace {
 
 char LoaderPass::ID = 0;
 INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
-              "Load profile information from llvmprof.out", false, true, false);
+              "Load profile information from llvmprof.out", false, true, false)
 
 char &llvm::ProfileLoaderPassID = LoaderPass::ID;
 
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 3f01b2d592bc..a01751849c51 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -60,10 +60,12 @@ namespace llvm {
     static char ID; // Class identification, replacement for typeinfo
 
     explicit ProfileVerifierPassT () : FunctionPass(ID) {
+      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
       DisableAssertions = ProfileVerifierDisableAssertions;
     }
     explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), 
                                               DisableAssertions(da) {
+      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -287,7 +289,7 @@ namespace llvm {
            i != ie; ++i) {
         if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
           FType *F = CI->getCalledFunction();
-          if (F && (F->getNameStr() == "_setjmp")) {
+          if (F && (F->getName() == "_setjmp")) {
             isSetJmpTarget = true; break;
           }
         }
@@ -366,8 +368,11 @@ namespace llvm {
   char ProfileVerifierPassT<FType, BType>::ID = 0;
 }
 
-INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier",
-                "Verify profiling information", false, true);
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+                "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+                "Verify profiling information", false, true)
 
 namespace llvm {
   FunctionPass *createProfileVerifierPass() {
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index abc057a773a9..e2f6a8bf5d9a 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -16,8 +16,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
 
 #define DEBUG_TYPE "region"
 #include "llvm/Support/Debug.h"
@@ -45,7 +45,7 @@ STATISTIC(numSimpleRegions, "The # of simple regions");
 /// PrintStyle - Print region in difference ways.
 enum PrintStyle { PrintNone, PrintBB, PrintRN  };
 
-cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
+static cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
   cl::desc("style of printing regions"),
   cl::values(
     clEnumValN(PrintNone, "none",  "print no details"),
@@ -72,6 +72,15 @@ Region::~Region() {
     delete *I;
 }
 
+void Region::replaceEntry(BasicBlock *BB) {
+  entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+  assert(exit && "No exit to replace!");
+  exit = BB;
+}
+
 bool Region::contains(const BasicBlock *B) const {
   BasicBlock *BB = const_cast<BasicBlock*>(B);
 
@@ -125,41 +134,49 @@ Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
   return outermostLoopInRegion(L);
 }
 
-bool Region::isSimple() const {
-  bool isSimple = true;
-  bool found = false;
-
-  BasicBlock *entry = getEntry(), *exit = getExit();
-
-  // TopLevelRegion
-  if (!exit)
-    return false;
+BasicBlock *Region::getEnteringBlock() const {
+  BasicBlock *entry = getEntry();
+  BasicBlock *Pred;
+  BasicBlock *enteringBlock = 0;
 
   for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
        ++PI) {
-    BasicBlock *Pred = *PI;
+    Pred = *PI;
     if (DT->getNode(Pred) && !contains(Pred)) {
-      if (found) {
-        isSimple = false;
-        break;
-      }
-      found = true;
+      if (enteringBlock)
+        return 0;
+
+      enteringBlock = Pred;
     }
   }
 
-  found = false;
+  return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+  BasicBlock *exit = getExit();
+  BasicBlock *Pred;
+  BasicBlock *exitingBlock = 0;
+
+  if (!exit)
+    return 0;
 
   for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
-       ++PI)
-    if (contains(*PI)) {
-      if (found) {
-        isSimple = false;
-        break;
-      }
-      found = true;
+       ++PI) {
+    Pred = *PI;
+    if (contains(Pred)) {
+      if (exitingBlock)
+        return 0;
+
+      exitingBlock = Pred;
     }
+  }
 
-  return isSimple;
+  return exitingBlock;
+}
+
+bool Region::isSimple() const {
+  return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
 }
 
 std::string Region::getNameStr() const {
@@ -311,13 +328,38 @@ void Region::transferChildrenTo(Region *To) {
   children.clear();
 }
 
-void Region::addSubRegion(Region *SubRegion) {
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
   assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+  assert(std::find(begin(), end(), SubRegion) == children.end()
+         && "Subregion already exists!");
+
   SubRegion->parent = this;
-  // Set up the region node.
-  assert(std::find(children.begin(), children.end(), SubRegion) == children.end()
-         && "Node already exist!");
   children.push_back(SubRegion);
+
+  if (!moveChildren)
+    return;
+
+  assert(SubRegion->children.size() == 0
+         && "SubRegions that contain children are not supported");
+
+  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+    if (!(*I)->isSubRegion()) {
+      BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+      if (SubRegion->contains(BB))
+        RI->setRegionFor(BB, SubRegion);
+    }
+
+  std::vector<Region*> Keep;
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (SubRegion->contains(*I) && *I != SubRegion) {
+      SubRegion->children.push_back(*I);
+      (*I)->parent = SubRegion;
+    } else
+      Keep.push_back(*I);
+
+  children.clear();
+  children.insert(children.begin(), Keep.begin(), Keep.end());
 }
 
 
@@ -339,6 +381,38 @@ unsigned Region::getDepth() const {
   return Depth;
 }
 
+Region *Region::getExpandedRegion() const {
+  unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+  if (NumSuccessors == 0)
+    return NULL;
+
+  for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+       PI != PE; ++PI)
+    if (!DT->dominates(getEntry(), *PI))
+      return NULL;
+
+  Region *R = RI->getRegionFor(exit);
+
+  if (R->getEntry() != exit) {
+    if (exit->getTerminator()->getNumSuccessors() == 1)
+      return new Region(getEntry(), *succ_begin(exit), RI, DT);
+    else
+      return NULL;
+  }
+
+  while (R->getParent() && R->getParent()->getEntry() == exit)
+    R = R->getParent();
+
+  if (!DT->dominates(getEntry(), R->getExit()))
+    for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+         PI != PE; ++PI)
+    if (!DT->dominates(R->getExit(), *PI))
+      return NULL;
+
+  return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
 void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
   if (print_tree)
     OS.indent(level*2) << "[" << level << "] " << getNameStr();
@@ -376,6 +450,11 @@ void Region::dump() const {
 }
 
 void Region::clearNodeCache() {
+  // Free the cached nodes.
+  for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+       IE = BBNodeMap.end(); I != IE; ++I)
+    delete I->second;
+
   BBNodeMap.clear();
   for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
     (*RI)->clearNodeCache();
@@ -592,6 +671,7 @@ void RegionInfo::releaseMemory() {
 }
 
 RegionInfo::RegionInfo() : FunctionPass(ID) {
+  initializeRegionInfoPass(*PassRegistry::getPassRegistry());
   TopLevelRegion = 0;
 }
 
@@ -654,11 +734,14 @@ Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
   return I != BBtoRegion.end() ? I->second : 0;
 }
 
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+  BBtoRegion[BB] = R;
+}
+
 Region *RegionInfo::operator[](BasicBlock *BB) const {
   return getRegionFor(BB);
 }
 
-
 BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
   BasicBlock *Exit = NULL;
 
@@ -733,9 +816,28 @@ RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
   return ret;
 }
 
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+  Region *R = getRegionFor(OldBB);
+
+  setRegionFor(NewBB, R);
+
+  while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+    R->replaceEntry(NewBB);
+    R = R->getParent();
+  }
+
+  setRegionFor(OldBB, R);
+}
+
 char RegionInfo::ID = 0;
-INITIALIZE_PASS(RegionInfo, "regions",
-                "Detect single entry single exit regions", true, true);
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+                "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+                "Detect single entry single exit regions", true, true)
 
 // Create methods available outside of this file, to use them
 // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
new file mode 100644
index 000000000000..3269dcc63d5e
--- /dev/null
+++ b/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager(int Depth)
+  : FunctionPass(ID), PMDataManager(Depth) {
+  skipThisRegion = false;
+  redoThisRegion = false;
+  RI = NULL;
+  CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions  into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+  RQ.push_back(R);
+  for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+    addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  Info.addRequired<RegionInfo>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+  RI = &getAnalysis<RegionInfo>();
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+  if (RQ.empty()) // No regions, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+       I != E; ++I) {
+    Region *R = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *RP = (RegionPass *)getContainedPass(Index);
+      Changed |= RP->doInitialization(R, *this);
+    }
+  }
+
+  // Walk Regions
+  while (!RQ.empty()) {
+
+    CurrentRegion  = RQ.back();
+    skipThisRegion = false;
+    redoThisRegion = false;
+
+    // Run all passes on the current Region.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+      dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+                   CurrentRegion->getNameStr());
+      dumpRequiredSet(P);
+
+      initializeAnalysisImpl(P);
+
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+        TimeRegion PassTimer(getPassTimer(P));
+        Changed |= P->runOnRegion(CurrentRegion, *this);
+      }
+
+      if (Changed)
+        dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+                     skipThisRegion ? "<deleted>" :
+                                    CurrentRegion->getNameStr());
+      dumpPreservedSet(P);
+
+      if (!skipThisRegion) {
+        // Manually check that this region is still healthy. This is done
+        // instead of relying on RegionInfo::verifyRegion since RegionInfo
+        // is a function pass and it's really expensive to verify every
+        // Region in the function every time. That level of checking can be
+        // enabled with the -verify-region-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(P));
+          CurrentRegion->verifyRegion();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P,
+                       skipThisRegion ? "<deleted>" :
+                                      CurrentRegion->getNameStr(),
+                       ON_REGION_MSG);
+
+      if (skipThisRegion)
+        // Do not run other passes on this region.
+        break;
+    }
+
+    // If the region was deleted, release all the region passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisRegion)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_REGION_MSG);
+      }
+
+    // Pop the region from queue after running all passes.
+    RQ.pop_back();
+
+    if (redoThisRegion)
+      RQ.push_back(CurrentRegion);
+
+    // Free all region nodes created in region passes.
+    RI->clearNodeCache();
+  }
+
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    RegionPass *P = (RegionPass*)getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  // Print the region tree after all pass.
+  DEBUG(
+    dbgs() << "\nRegion tree of function " << F.getName()
+           << " after all region Pass:\n";
+    RI->dump();
+    dbgs() << "\n";
+    );
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Region Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+  PrintRegionPass(const std::string &B, raw_ostream &o)
+      : RegionPass(ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+    Out << Banner;
+    for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+         I != E; ++I)
+      (*I)->getEntry()->print(Out);
+
+    return false;
+  }
+};
+
+char PrintRegionPass::ID = 0;
+}  //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new RGPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+    !PMS.top()->preserveHigherLevelAnalysis(this))
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+  RGPassManager *RGPM;
+
+  // Create new Region Pass Manager if it does not exist.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+    RGPM = (RGPassManager*)PMS.top();
+  else {
+
+    assert (!PMS.empty() && "Unable to create Region Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    RGPM = new RGPassManager(PMD->getDepth() + 1);
+    RGPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(RGPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    TPM->schedulePass(RGPM);
+
+    // [4] Push new manager into PMS
+    PMS.push(RGPM);
+  }
+
+  RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintRegionPass(Banner, O);
+}
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index fee5c1bae976..0cf0f9050504 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -121,35 +121,41 @@ namespace {
 struct RegionViewer
   : public DOTGraphTraitsViewer<RegionInfo, false> {
   static char ID;
-  RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){}
+  RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+    initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+  }
 };
-
 char RegionViewer::ID = 0;
-INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
-                true, true);
 
 struct RegionOnlyViewer
   : public DOTGraphTraitsViewer<RegionInfo, true> {
   static char ID;
-  RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){}
+  RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+    initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
 };
-
 char RegionOnlyViewer::ID = 0;
-INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
-                "View regions of function (with no function bodies)",
-                true, true);
 
 struct RegionPrinter
   : public DOTGraphTraitsPrinter<RegionInfo, false> {
   static char ID;
   RegionPrinter() :
-    DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {}
+    DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+      initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+    }
 };
+char RegionPrinter::ID = 0;
 } //end anonymous namespace
 
-char RegionPrinter::ID = 0;
 INITIALIZE_PASS(RegionPrinter, "dot-regions",
-                "Print regions of function to 'dot' file", true, true);
+                "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+                true, true)
+                
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+                "View regions of function (with no function bodies)",
+                true, true)
 
 namespace {
 
@@ -157,7 +163,9 @@ struct RegionOnlyPrinter
   : public DOTGraphTraitsPrinter<RegionInfo, true> {
   static char ID;
   RegionOnlyPrinter() :
-    DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {}
+    DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+      initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
 };
 
 }
@@ -166,7 +174,7 @@ char RegionOnlyPrinter::ID = 0;
 INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
                 "Print regions of function to 'dot' file "
                 "(with no function bodies)",
-                true, true);
+                true, true)
 
 FunctionPass* llvm::createRegionViewerPass() {
   return new RegionViewer();
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index b892d85f9f4a..62244ccb3a03 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -69,6 +69,7 @@
 #include "llvm/Operator.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
@@ -103,8 +104,12 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
                                  "derived loop"),
                         cl::init(100));
 
-INITIALIZE_PASS(ScalarEvolution, "scalar-evolution",
-                "Scalar Evolution Analysis", false, true);
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+                "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+                "Scalar Evolution Analysis", false, true)
 char ScalarEvolution::ID = 0;
 
 //===----------------------------------------------------------------------===//
@@ -115,13 +120,139 @@ char ScalarEvolution::ID = 0;
 // Implementation of the SCEV class.
 //
 
-SCEV::~SCEV() {}
-
 void SCEV::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
 
+void SCEV::print(raw_ostream &OS) const {
+  switch (getSCEVType()) {
+  case scConstant:
+    WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+    return;
+  case scTruncate: {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+    const SCEV *Op = Trunc->getOperand();
+    OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+       << *Trunc->getType() << ")";
+    return;
+  }
+  case scZeroExtend: {
+    const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+    const SCEV *Op = ZExt->getOperand();
+    OS << "(zext " << *Op->getType() << " " << *Op << " to "
+       << *ZExt->getType() << ")";
+    return;
+  }
+  case scSignExtend: {
+    const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+    const SCEV *Op = SExt->getOperand();
+    OS << "(sext " << *Op->getType() << " " << *Op << " to "
+       << *SExt->getType() << ")";
+    return;
+  }
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+    OS << "{" << *AR->getOperand(0);
+    for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+      OS << ",+," << *AR->getOperand(i);
+    OS << "}<";
+    if (AR->hasNoUnsignedWrap())
+      OS << "nuw><";
+    if (AR->hasNoSignedWrap())
+      OS << "nsw><";
+    WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+    OS << ">";
+    return;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+    const char *OpStr = 0;
+    switch (NAry->getSCEVType()) {
+    case scAddExpr: OpStr = " + "; break;
+    case scMulExpr: OpStr = " * "; break;
+    case scUMaxExpr: OpStr = " umax "; break;
+    case scSMaxExpr: OpStr = " smax "; break;
+    }
+    OS << "(";
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      OS << **I;
+      if (llvm::next(I) != E)
+        OS << OpStr;
+    }
+    OS << ")";
+    return;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+    OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+    return;
+  }
+  case scUnknown: {
+    const SCEVUnknown *U = cast<SCEVUnknown>(this);
+    const Type *AllocTy;
+    if (U->isSizeOf(AllocTy)) {
+      OS << "sizeof(" << *AllocTy << ")";
+      return;
+    }
+    if (U->isAlignOf(AllocTy)) {
+      OS << "alignof(" << *AllocTy << ")";
+      return;
+    }
+  
+    const Type *CTy;
+    Constant *FieldNo;
+    if (U->isOffsetOf(CTy, FieldNo)) {
+      OS << "offsetof(" << *CTy << ", ";
+      WriteAsOperand(OS, FieldNo, false);
+      OS << ")";
+      return;
+    }
+  
+    // Otherwise just print it normally.
+    WriteAsOperand(OS, U->getValue(), false);
+    return;
+  }
+  case scCouldNotCompute:
+    OS << "***COULDNOTCOMPUTE***";
+    return;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+const Type *SCEV::getType() const {
+  switch (getSCEVType()) {
+  case scConstant:
+    return cast<SCEVConstant>(this)->getType();
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return cast<SCEVCastExpr>(this)->getType();
+  case scAddRecExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr:
+    return cast<SCEVNAryExpr>(this)->getType();
+  case scAddExpr:
+    return cast<SCEVAddExpr>(this)->getType();
+  case scUDivExpr:
+    return cast<SCEVUDivExpr>(this)->getType();
+  case scUnknown:
+    return cast<SCEVUnknown>(this)->getType();
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return 0;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return 0;
+}
+
 bool SCEV::isZero() const {
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
     return SC->getValue()->isZero();
@@ -143,30 +274,6 @@ bool SCEV::isAllOnesValue() const {
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
 
-bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
-  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  return false;
-}
-
-const Type *SCEVCouldNotCompute::getType() const {
-  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  return 0;
-}
-
-bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
-  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  return false;
-}
-
-bool SCEVCouldNotCompute::hasOperand(const SCEV *) const {
-  llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  return false;
-}
-
-void SCEVCouldNotCompute::print(raw_ostream &OS) const {
-  OS << "***COULDNOTCOMPUTE***";
-}
-
 bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
 }
@@ -192,24 +299,10 @@ ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
   return getConstant(ConstantInt::get(ITy, V, isSigned));
 }
 
-const Type *SCEVConstant::getType() const { return V->getType(); }
-
-void SCEVConstant::print(raw_ostream &OS) const {
-  WriteAsOperand(OS, V, false);
-}
-
 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
                            unsigned SCEVTy, const SCEV *op, const Type *ty)
   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
 
-bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
-  return Op->dominates(BB, DT);
-}
-
-bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-  return Op->properlyDominates(BB, DT);
-}
-
 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                                    const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scTruncate, op, ty) {
@@ -218,10 +311,6 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
          "Cannot truncate non-integer value!");
 }
 
-void SCEVTruncateExpr::print(raw_ostream &OS) const {
-  OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
-}
-
 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
@@ -230,10 +319,6 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
          "Cannot zero extend non-integer value!");
 }
 
-void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
-  OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
-}
-
 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scSignExtend, op, ty) {
@@ -242,139 +327,9 @@ SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
          "Cannot sign extend non-integer value!");
 }
 
-void SCEVSignExtendExpr::print(raw_ostream &OS) const {
-  OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
-}
-
-void SCEVCommutativeExpr::print(raw_ostream &OS) const {
-  const char *OpStr = getOperationStr();
-  OS << "(";
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
-    OS << **I;
-    if (llvm::next(I) != E)
-      OS << OpStr;
-  }
-  OS << ")";
-}
-
-bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
-    if (!(*I)->dominates(BB, DT))
-      return false;
-  return true;
-}
-
-bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
-    if (!(*I)->properlyDominates(BB, DT))
-      return false;
-  return true;
-}
-
-bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const {
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
-    if (!(*I)->isLoopInvariant(L))
-      return false;
-  return true;
-}
-
-// hasComputableLoopEvolution - N-ary expressions have computable loop
-// evolutions iff they have at least one operand that varies with the loop,
-// but that all varying operands are computable.
-bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const {
-  bool HasVarying = false;
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
-    const SCEV *S = *I;
-    if (!S->isLoopInvariant(L)) {
-      if (S->hasComputableLoopEvolution(L))
-        HasVarying = true;
-      else
-        return false;
-    }
-  }
-  return HasVarying;
-}
-
-bool SCEVNAryExpr::hasOperand(const SCEV *O) const {
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
-    const SCEV *S = *I;
-    if (O == S || S->hasOperand(O))
-      return true;
-  }
-  return false;
-}
-
-bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
-  return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
-}
-
-bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-  return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT);
-}
-
-void SCEVUDivExpr::print(raw_ostream &OS) const {
-  OS << "(" << *LHS << " /u " << *RHS << ")";
-}
-
-const Type *SCEVUDivExpr::getType() const {
-  // In most cases the types of LHS and RHS will be the same, but in some
-  // crazy cases one or the other may be a pointer. ScalarEvolution doesn't
-  // depend on the type for correctness, but handling types carefully can
-  // avoid extra casts in the SCEVExpander. The LHS is more likely to be
-  // a pointer type than the RHS, so use the RHS' type here.
-  return RHS->getType();
-}
-
-bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
-  // Add recurrences are never invariant in the function-body (null loop).
-  if (!QueryLoop)
-    return false;
-
-  // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L.
-  if (QueryLoop->contains(L))
-    return false;
-
-  // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop.
-  if (L->contains(QueryLoop))
-    return true;
-
-  // This recurrence is variant w.r.t. QueryLoop if any of its operands
-  // are variant.
-  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
-    if (!(*I)->isLoopInvariant(QueryLoop))
-      return false;
-
-  // Otherwise it's loop-invariant.
-  return true;
-}
-
-bool
-SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
-  return DT->dominates(L->getHeader(), BB) &&
-         SCEVNAryExpr::dominates(BB, DT);
-}
-
-bool
-SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-  // This uses a "dominates" query instead of "properly dominates" query because
-  // the instruction which produces the addrec's value is a PHI, and a PHI
-  // effectively properly dominates its entire containing block.
-  return DT->dominates(L->getHeader(), BB) &&
-         SCEVNAryExpr::properlyDominates(BB, DT);
-}
-
-void SCEVAddRecExpr::print(raw_ostream &OS) const {
-  OS << "{" << *Operands[0];
-  for (unsigned i = 1, e = NumOperands; i != e; ++i)
-    OS << ",+," << *Operands[i];
-  OS << "}<";
-  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
-  OS << ">";
-}
-
 void SCEVUnknown::deleted() {
-  // Clear this SCEVUnknown from ValuesAtScopes.
-  SE->ValuesAtScopes.erase(this);
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
 
   // Remove this SCEVUnknown from the uniquing map.
   SE->UniqueSCEVs.RemoveNode(this);
@@ -384,8 +339,8 @@ void SCEVUnknown::deleted() {
 }
 
 void SCEVUnknown::allUsesReplacedWith(Value *New) {
-  // Clear this SCEVUnknown from ValuesAtScopes.
-  SE->ValuesAtScopes.erase(this);
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
 
   // Remove this SCEVUnknown from the uniquing map.
   SE->UniqueSCEVs.RemoveNode(this);
@@ -396,32 +351,6 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) {
   setValPtr(New);
 }
 
-bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
-  // All non-instruction values are loop invariant.  All instructions are loop
-  // invariant if they are not contained in the specified loop.
-  // Instructions are never considered invariant in the function body
-  // (null loop) because they are defined within the "loop".
-  if (Instruction *I = dyn_cast<Instruction>(getValue()))
-    return L && !L->contains(I);
-  return true;
-}
-
-bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const {
-  if (Instruction *I = dyn_cast<Instruction>(getValue()))
-    return DT->dominates(I->getParent(), BB);
-  return true;
-}
-
-bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
-  if (Instruction *I = dyn_cast<Instruction>(getValue()))
-    return DT->properlyDominates(I->getParent(), BB);
-  return true;
-}
-
-const Type *SCEVUnknown::getType() const {
-  return getValue()->getType();
-}
-
 bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
     if (VCE->getOpcode() == Instruction::PtrToInt)
@@ -486,30 +415,6 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
   return false;
 }
 
-void SCEVUnknown::print(raw_ostream &OS) const {
-  const Type *AllocTy;
-  if (isSizeOf(AllocTy)) {
-    OS << "sizeof(" << *AllocTy << ")";
-    return;
-  }
-  if (isAlignOf(AllocTy)) {
-    OS << "alignof(" << *AllocTy << ")";
-    return;
-  }
-
-  const Type *CTy;
-  Constant *FieldNo;
-  if (isOffsetOf(CTy, FieldNo)) {
-    OS << "offsetof(" << *CTy << ", ";
-    WriteAsOperand(OS, FieldNo, false);
-    OS << ")";
-    return;
-  }
-
-  // Otherwise just print it normally.
-  WriteAsOperand(OS, getValue(), false);
-}
-
 //===----------------------------------------------------------------------===//
 //                               SCEV Utilities
 //===----------------------------------------------------------------------===//
@@ -914,6 +819,36 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
 
+  // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+  // eliminate all the truncates.
+  if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+      hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getAddExpr(Operands, false, false);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+  // eliminate all the truncates.
+  if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+      hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getMulExpr(Operands, false, false);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
   // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
@@ -965,6 +900,19 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 
+  // zext(trunc(x)) --> zext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all zero bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getUnsignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+            CR.zextOrTrunc(NewBits)))
+      return getTruncateOrZeroExtend(X, Ty);
+  }
+
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can zero extend all of the
   // operands (often constants).  This allows analysis of something like
@@ -1089,6 +1037,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
     return getSignExtendExpr(SS->getOperand(), Ty);
 
+  // sext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
   // Before doing any expensive analysis, check to see if we've already
   // computed a SCEV for this Op and Ty.
   FoldingSetNodeID ID;
@@ -1098,6 +1050,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   void *IP = 0;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 
+  // If the input value is provably positive, build a zext instead.
+  if (isKnownNonNegative(Op))
+    return getZeroExtendExpr(Op, Ty);
+
+  // sext(trunc(x)) --> sext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all sign bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getSignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+            CR.sextOrTrunc(NewBits)))
+      return getTruncateOrSignExtend(X, Ty);
+  }
+
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can sign extend all of the
   // operands (often constants).  This allows analysis of something like
@@ -1639,7 +1608,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (Ops[i]->isLoopInvariant(AddRecLoop)) {
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
@@ -1711,7 +1680,6 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scAddExpr);
-  ID.AddInteger(Ops.size());
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
@@ -1846,7 +1814,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     const Loop *AddRecLoop = AddRec->getLoop();
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (Ops[i]->isLoopInvariant(AddRecLoop)) {
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
         LIOps.push_back(Ops[i]);
         Ops.erase(Ops.begin()+i);
         --i; --e;
@@ -1917,7 +1885,6 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scMulExpr);
-  ID.AddInteger(Ops.size());
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
@@ -2066,6 +2033,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
            "SCEVAddRecExpr operand types don't match!");
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    assert(isLoopInvariant(Operands[i], L) &&
+           "SCEVAddRecExpr operand is not loop-invariant!");
 #endif
 
   if (Operands.back()->isZero()) {
@@ -2106,7 +2076,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
       // requirement.
       bool AllInvariant = true;
       for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-        if (!Operands[i]->isLoopInvariant(L)) {
+        if (!isLoopInvariant(Operands[i], L)) {
           AllInvariant = false;
           break;
         }
@@ -2114,7 +2084,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
         NestedOperands[0] = getAddRecExpr(Operands, L);
         AllInvariant = true;
         for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
-          if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) {
+          if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
             AllInvariant = false;
             break;
           }
@@ -2131,7 +2101,6 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scAddRecExpr);
-  ID.AddInteger(Operands.size());
   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
     ID.AddPointer(Operands[i]);
   ID.AddPointer(L);
@@ -2242,7 +2211,6 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scSMaxExpr);
-  ID.AddInteger(Ops.size());
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
@@ -2347,7 +2315,6 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
   ID.AddInteger(scUMaxExpr);
-  ID.AddInteger(Ops.size());
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
@@ -2543,24 +2510,24 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   return getMinusSCEV(AllOnes, V);
 }
 
-/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS.
-///
-const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
-                                          const SCEV *RHS) {
+/// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1,
+/// and thus the HasNUW and HasNSW bits apply to the resultant add, not
+/// whether the sub would have overflowed.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                                          bool HasNUW, bool HasNSW) {
   // Fast path: X - X --> 0.
   if (LHS == RHS)
     return getConstant(LHS->getType(), 0);
 
   // X - Y --> X + -Y
-  return getAddExpr(LHS, getNegativeSCEV(RHS));
+  return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW);
 }
 
 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
 /// input value to the specified type.  If the type must be extended, it is zero
 /// extended.
 const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
-                                         const Type *Ty) {
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -2714,9 +2681,11 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
     ValueExprMapType::iterator It =
       ValueExprMap.find(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
+      const SCEV *Old = It->second;
+
       // Short-circuit the def-use traversal if the symbolic name
       // ceases to appear in expressions.
-      if (It->second != SymName && !It->second->hasOperand(SymName))
+      if (Old != SymName && !hasOperand(Old, SymName))
         continue;
 
       // SCEVUnknown for a PHI either means that it has an unrecognized
@@ -2727,9 +2696,9 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
       // updates on its own when it gets to that point. In the third, we do
       // want to forget the SCEVUnknown.
       if (!isa<PHINode>(I) ||
-          !isa<SCEVUnknown>(It->second) ||
-          (I != PN && It->second == SymName)) {
-        ValuesAtScopes.erase(It->second);
+          !isa<SCEVUnknown>(Old) ||
+          (I != PN && Old == SymName)) {
+        forgetMemoizedResults(Old);
         ValueExprMap.erase(It);
       }
     }
@@ -2801,7 +2770,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
             // This is not a valid addrec if the step amount is varying each
             // loop iteration, but is not itself an addrec in this loop.
-            if (Accum->isLoopInvariant(L) ||
+            if (isLoopInvariant(Accum, L) ||
                 (isa<SCEVAddRecExpr>(Accum) &&
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
               bool HasNUW = false;
@@ -2814,6 +2783,23 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                   HasNUW = true;
                 if (OBO->hasNoSignedWrap())
                   HasNSW = true;
+              } else if (const GEPOperator *GEP = 
+                            dyn_cast<GEPOperator>(BEValueV)) {
+                // If the increment is a GEP, then we know it won't perform a
+                // signed overflow, because the address space cannot be
+                // wrapped around.
+                //
+                // NOTE: This isn't strictly true, because you could have an
+                // object straddling the 2G address boundary in a 32-bit address
+                // space (for example).  We really want to model this as a "has
+                // no signed/unsigned wrap" where the base pointer is treated as
+                // unsigned and the increment is known to not have signed
+                // wrapping.
+                //
+                // This is a highly theoretical concern though, and this is good
+                // enough for all cases we know of at this point. :)
+                //                
+                HasNSW |= GEP->isInBounds();
               }
 
               const SCEV *StartVal = getSCEV(StartValueV);
@@ -2822,7 +2808,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
               // Since the no-wrap flags are on the increment, they apply to the
               // post-incremented value as well.
-              if (Accum->isLoopInvariant(L))
+              if (isLoopInvariant(Accum, L))
                 (void)getAddRecExpr(getAddExpr(StartVal, Accum),
                                     Accum, L, HasNUW, HasNSW);
 
@@ -2867,17 +2853,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = PN->hasConstantValue(DT)) {
-    bool AllSameLoop = true;
-    Loop *PNLoop = LI->getLoopFor(PN->getParent());
-    for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) {
-        AllSameLoop = false;
-        break;
-      }
-    if (AllSameLoop)
+  if (Value *V = SimplifyInstruction(PN, TD, DT))
+    if (LI->replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
-  }
 
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
@@ -2892,6 +2870,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   // Add expression, because the Instruction may be guarded by control flow
   // and the no-overflow bits may not be valid for the expression in any
   // context.
+  bool isInBounds = GEP->isInBounds();
 
   const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
   Value *Base = GEP->getOperand(0);
@@ -2920,7 +2899,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
       IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
 
       // Multiply the index by the element size to compute the element offset.
-      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize);
+      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false,
+                                           /*NSW*/ isInBounds);
 
       // Add the element offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -2931,7 +2911,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   const SCEV *BaseS = getSCEV(Base);
 
   // Add the total offset from all the GEP indices to the base.
-  return getAddExpr(BaseS, TotalOffset);
+  return getAddExpr(BaseS, TotalOffset, /*NUW*/ false,
+                    /*NSW*/ isInBounds);
 }
 
 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3019,9 +3000,13 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
 ///
 ConstantRange
 ScalarEvolution::getUnsignedRange(const SCEV *S) {
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+  if (I != UnsignedRanges.end())
+    return I->second;
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return ConstantRange(C->getValue()->getValue());
+    return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
 
   unsigned BitWidth = getTypeSizeInBits(S->getType());
   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3038,49 +3023,52 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
     ConstantRange X = getUnsignedRange(Add->getOperand(0));
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
       X = X.add(getUnsignedRange(Add->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
     ConstantRange X = getUnsignedRange(Mul->getOperand(0));
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
       X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
     ConstantRange X = getUnsignedRange(SMax->getOperand(0));
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
       X = X.smax(getUnsignedRange(SMax->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
     ConstantRange X = getUnsignedRange(UMax->getOperand(0));
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
       X = X.umax(getUnsignedRange(UMax->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getUnsignedRange(UDiv->getLHS());
     ConstantRange Y = getUnsignedRange(UDiv->getRHS());
-    return ConservativeResult.intersectWith(X.udiv(Y));
+    return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
     ConstantRange X = getUnsignedRange(ZExt->getOperand());
-    return ConservativeResult.intersectWith(X.zeroExtend(BitWidth));
+    return setUnsignedRange(ZExt,
+      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
     ConstantRange X = getUnsignedRange(SExt->getOperand());
-    return ConservativeResult.intersectWith(X.signExtend(BitWidth));
+    return setUnsignedRange(SExt,
+      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
     ConstantRange X = getUnsignedRange(Trunc->getOperand());
-    return ConservativeResult.intersectWith(X.truncate(BitWidth));
+    return setUnsignedRange(Trunc,
+      ConservativeResult.intersectWith(X.truncate(BitWidth)));
   }
 
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3120,19 +3108,20 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
         ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
         if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
             ExtEndRange)
-          return ConservativeResult;
+          return setUnsignedRange(AddRec, ConservativeResult);
 
         APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
                                    EndRange.getUnsignedMin());
         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
                                    EndRange.getUnsignedMax());
         if (Min.isMinValue() && Max.isMaxValue())
-          return ConservativeResult;
-        return ConservativeResult.intersectWith(ConstantRange(Min, Max+1));
+          return setUnsignedRange(AddRec, ConservativeResult);
+        return setUnsignedRange(AddRec,
+          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
       }
     }
 
-    return ConservativeResult;
+    return setUnsignedRange(AddRec, ConservativeResult);
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -3141,20 +3130,25 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
     ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
     if (Ones == ~Zeros + 1)
-      return ConservativeResult;
-    return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+      return setUnsignedRange(U, ConservativeResult);
+    return setUnsignedRange(U,
+      ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
   }
 
-  return ConservativeResult;
+  return setUnsignedRange(S, ConservativeResult);
 }
 
 /// getSignedRange - Determine the signed range for a particular SCEV.
 ///
 ConstantRange
 ScalarEvolution::getSignedRange(const SCEV *S) {
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+  if (I != SignedRanges.end())
+    return I->second;
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return ConstantRange(C->getValue()->getValue());
+    return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
 
   unsigned BitWidth = getTypeSizeInBits(S->getType());
   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3171,49 +3165,52 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
     ConstantRange X = getSignedRange(Add->getOperand(0));
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
       X = X.add(getSignedRange(Add->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setSignedRange(Add, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
     ConstantRange X = getSignedRange(Mul->getOperand(0));
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
       X = X.multiply(getSignedRange(Mul->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setSignedRange(Mul, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
     ConstantRange X = getSignedRange(SMax->getOperand(0));
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
       X = X.smax(getSignedRange(SMax->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setSignedRange(SMax, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
     ConstantRange X = getSignedRange(UMax->getOperand(0));
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
       X = X.umax(getSignedRange(UMax->getOperand(i)));
-    return ConservativeResult.intersectWith(X);
+    return setSignedRange(UMax, ConservativeResult.intersectWith(X));
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getSignedRange(UDiv->getLHS());
     ConstantRange Y = getSignedRange(UDiv->getRHS());
-    return ConservativeResult.intersectWith(X.udiv(Y));
+    return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
     ConstantRange X = getSignedRange(ZExt->getOperand());
-    return ConservativeResult.intersectWith(X.zeroExtend(BitWidth));
+    return setSignedRange(ZExt,
+      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
     ConstantRange X = getSignedRange(SExt->getOperand());
-    return ConservativeResult.intersectWith(X.signExtend(BitWidth));
+    return setSignedRange(SExt,
+      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
     ConstantRange X = getSignedRange(Trunc->getOperand());
-    return ConservativeResult.intersectWith(X.truncate(BitWidth));
+    return setSignedRange(Trunc,
+      ConservativeResult.intersectWith(X.truncate(BitWidth)));
   }
 
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3263,34 +3260,35 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
         ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
         if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
             ExtEndRange)
-          return ConservativeResult;
+          return setSignedRange(AddRec, ConservativeResult);
 
         APInt Min = APIntOps::smin(StartRange.getSignedMin(),
                                    EndRange.getSignedMin());
         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
                                    EndRange.getSignedMax());
         if (Min.isMinSignedValue() && Max.isMaxSignedValue())
-          return ConservativeResult;
-        return ConservativeResult.intersectWith(ConstantRange(Min, Max+1));
+          return setSignedRange(AddRec, ConservativeResult);
+        return setSignedRange(AddRec,
+          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
       }
     }
 
-    return ConservativeResult;
+    return setSignedRange(AddRec, ConservativeResult);
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     if (!U->getValue()->getType()->isIntegerTy() && !TD)
-      return ConservativeResult;
+      return setSignedRange(U, ConservativeResult);
     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
     if (NS == 1)
-      return ConservativeResult;
-    return ConservativeResult.intersectWith(
+      return setSignedRange(U, ConservativeResult);
+    return setSignedRange(U, ConservativeResult.intersectWith(
       ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
-                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1));
+                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
   }
 
-  return ConservativeResult;
+  return setSignedRange(S, ConservativeResult);
 }
 
 /// createSCEV - We know that there is no SCEV for the specified value.
@@ -3458,8 +3456,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
               // If C is a single bit, it may be in the sign-bit position
               // before the zero-extend. In this case, represent the xor
               // using an add, which is equivalent, and re-apply the zext.
-              APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize);
-              if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+              APInt Trunc = CI->getValue().trunc(Z0TySize);
+              if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
                   Trunc.isSignBit())
                 return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
                                          UTy);
@@ -3699,58 +3697,61 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // backedge-taken count, which could result in infinite recursion.
   std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
     BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
-  if (Pair.second) {
-    BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
-    if (BECount.Exact != getCouldNotCompute()) {
-      assert(BECount.Exact->isLoopInvariant(L) &&
-             BECount.Max->isLoopInvariant(L) &&
-             "Computed backedge-taken count isn't loop invariant for loop!");
-      ++NumTripCountsComputed;
+  if (!Pair.second)
+    return Pair.first->second;
 
+  BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
+  if (BECount.Exact != getCouldNotCompute()) {
+    assert(isLoopInvariant(BECount.Exact, L) &&
+           isLoopInvariant(BECount.Max, L) &&
+           "Computed backedge-taken count isn't loop invariant for loop!");
+    ++NumTripCountsComputed;
+
+    // Update the value in the map.
+    Pair.first->second = BECount;
+  } else {
+    if (BECount.Max != getCouldNotCompute())
       // Update the value in the map.
       Pair.first->second = BECount;
-    } else {
-      if (BECount.Max != getCouldNotCompute())
-        // Update the value in the map.
-        Pair.first->second = BECount;
-      if (isa<PHINode>(L->getHeader()->begin()))
-        // Only count loops that have phi nodes as not being computable.
-        ++NumTripCountsNotComputed;
-    }
-
-    // Now that we know more about the trip count for this loop, forget any
-    // existing SCEV values for PHI nodes in this loop since they are only
-    // conservative estimates made without the benefit of trip count
-    // information. This is similar to the code in forgetLoop, except that
-    // it handles SCEVUnknown PHI nodes specially.
-    if (BECount.hasAnyInfo()) {
-      SmallVector<Instruction *, 16> Worklist;
-      PushLoopPHIs(L, Worklist);
-
-      SmallPtrSet<Instruction *, 8> Visited;
-      while (!Worklist.empty()) {
-        Instruction *I = Worklist.pop_back_val();
-        if (!Visited.insert(I)) continue;
-
-        ValueExprMapType::iterator It =
-          ValueExprMap.find(static_cast<Value *>(I));
-        if (It != ValueExprMap.end()) {
-          // SCEVUnknown for a PHI either means that it has an unrecognized
-          // structure, or it's a PHI that's in the progress of being computed
-          // by createNodeForPHI.  In the former case, additional loop trip
-          // count information isn't going to change anything. In the later
-          // case, createNodeForPHI will perform the necessary updates on its
-          // own when it gets to that point.
-          if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
-            ValuesAtScopes.erase(It->second);
-            ValueExprMap.erase(It);
-          }
-          if (PHINode *PN = dyn_cast<PHINode>(I))
-            ConstantEvolutionLoopExitValue.erase(PN);
+    if (isa<PHINode>(L->getHeader()->begin()))
+      // Only count loops that have phi nodes as not being computable.
+      ++NumTripCountsNotComputed;
+  }
+
+  // Now that we know more about the trip count for this loop, forget any
+  // existing SCEV values for PHI nodes in this loop since they are only
+  // conservative estimates made without the benefit of trip count
+  // information. This is similar to the code in forgetLoop, except that
+  // it handles SCEVUnknown PHI nodes specially.
+  if (BECount.hasAnyInfo()) {
+    SmallVector<Instruction *, 16> Worklist;
+    PushLoopPHIs(L, Worklist);
+
+    SmallPtrSet<Instruction *, 8> Visited;
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
+      if (!Visited.insert(I)) continue;
+
+      ValueExprMapType::iterator It =
+        ValueExprMap.find(static_cast<Value *>(I));
+      if (It != ValueExprMap.end()) {
+        const SCEV *Old = It->second;
+
+        // SCEVUnknown for a PHI either means that it has an unrecognized
+        // structure, or it's a PHI that's in the progress of being computed
+        // by createNodeForPHI.  In the former case, additional loop trip
+        // count information isn't going to change anything. In the later
+        // case, createNodeForPHI will perform the necessary updates on its
+        // own when it gets to that point.
+        if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+          forgetMemoizedResults(Old);
+          ValueExprMap.erase(It);
         }
-
-        PushDefUseChildren(I, Worklist);
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          ConstantEvolutionLoopExitValue.erase(PN);
       }
+
+      PushDefUseChildren(I, Worklist);
     }
   }
   return Pair.first->second;
@@ -3774,7 +3775,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
 
     ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
-      ValuesAtScopes.erase(It->second);
+      forgetMemoizedResults(It->second);
       ValueExprMap.erase(It);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
@@ -3782,6 +3783,11 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
 
     PushDefUseChildren(I, Worklist);
   }
+
+  // Forget all contained loops too, to avoid dangling entries in the
+  // ValuesAtScopes map.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    forgetLoop(*I);
 }
 
 /// forgetValue - This method should be called by the client when it has
@@ -3802,7 +3808,7 @@ void ScalarEvolution::forgetValue(Value *V) {
 
     ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
-      ValuesAtScopes.erase(It->second);
+      forgetMemoizedResults(It->second);
       ValueExprMap.erase(It);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
@@ -4016,6 +4022,105 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
   return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
+static const SCEVAddRecExpr *
+isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) {
+  const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S);
+  
+  // The SCEV must be an addrec of this loop.
+  if (!SA || SA->getLoop() != L || !SA->isAffine())
+    return 0;
+  
+  // The SCEV must be known to not wrap in some way to be interesting.
+  if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap())
+    return 0;
+
+  // The stride must be a constant so that we know if it is striding up or down.
+  if (!isa<SCEVConstant>(SA->getOperand(1)))
+    return 0;
+  return SA;
+}
+
+/// getMinusSCEVForExitTest - When considering an exit test for a loop with a
+/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0,
+/// and this function returns the expression to use for x-y.  We know and take
+/// advantage of the fact that this subtraction is only being used in a
+/// comparison by zero context.
+///
+static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS,
+                                           const Loop *L, ScalarEvolution &SE) {
+  // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not
+  // wrap (either NSW or NUW), then we know that the value will either become
+  // the other one (and thus the loop terminates), that the loop will terminate
+  // through some other exit condition first, or that the loop has undefined
+  // behavior.  This information is useful when the addrec has a stride that is
+  // != 1 or -1, because it means we can't "miss" the exit value.
+  //
+  // In any of these three cases, it is safe to turn the exit condition into a
+  // "counting down" AddRec (to zero) by subtracting the two inputs as normal,
+  // but since we know that the "end cannot be missed" we can force the
+  // resulting AddRec to be a NUW addrec.  Since it is counting down, this means
+  // that the AddRec *cannot* pass zero.
+
+  // See if LHS and RHS are addrec's we can handle.
+  const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L);
+  const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L);
+  
+  // If neither addrec is interesting, just return a minus.
+  if (RHSA == 0 && LHSA == 0)
+    return SE.getMinusSCEV(LHS, RHS);
+  
+  // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS.
+  if (RHSA && LHSA == 0) {
+    // Safe because a-b === b-a for comparisons against zero.
+    std::swap(LHS, RHS);
+    std::swap(LHSA, RHSA);
+  }
+  
+  // Handle the case when only one is advancing in a non-overflowing way.
+  if (RHSA == 0) {
+    // If RHS is loop varying, then we can't predict when LHS will cross it.
+    if (!SE.isLoopInvariant(RHS, L))
+      return SE.getMinusSCEV(LHS, RHS);
+    
+    // If LHS has a positive stride, then we compute RHS-LHS, because the loop
+    // is counting up until it crosses RHS (which must be larger than LHS).  If
+    // it is negative, we compute LHS-RHS because we're counting down to RHS.
+    const ConstantInt *Stride =
+      cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
+    if (Stride->getValue().isNegative())
+      std::swap(LHS, RHS);
+
+    return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/);
+  }
+  
+  // If both LHS and RHS are interesting, we have something like:
+  //  a+i*4 != b+i*8.
+  const ConstantInt *LHSStride =
+    cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
+  const ConstantInt *RHSStride =
+    cast<SCEVConstant>(RHSA->getOperand(1))->getValue();
+  
+  // If the strides are equal, then this is just a (complex) loop invariant
+  // comparison of a and b.
+  if (LHSStride == RHSStride)
+    return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart());
+  
+  // If the signs of the strides differ, then the negative stride is counting
+  // down to the positive stride.
+  if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){
+    if (RHSStride->getValue().isNegative())
+      std::swap(LHS, RHS);
+  } else {
+    // If LHS's stride is smaller than RHS's stride, then "b" must be less than
+    // "a" and "b" is RHS is counting up (catching up) to LHS.  This is true
+    // whether the strides are positive or negative.
+    if (RHSStride->getValue().slt(LHSStride->getValue()))
+      std::swap(LHS, RHS);
+  }
+    
+  return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/);
+}
+
 /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
@@ -4050,7 +4155,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
 
   // At this point, we would like to compute how many iterations of the
   // loop the predicate will return true for these inputs.
-  if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) {
+  if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
     // If there is a loop-invariant, force it into the RHS.
     std::swap(LHS, RHS);
     Cond = ICmpInst::getSwappedPredicate(Cond);
@@ -4075,7 +4180,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L,
+                                                                 *this), L);
     if (BTI.hasAnyInfo()) return BTI;
     break;
   }
@@ -4212,7 +4318,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
   // We can only recognize very limited forms of loop index expressions, in
   // particular, only affine AddRec's like {C1,+,C2}.
   const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
-  if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) ||
+  if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
     return getCouldNotCompute();
@@ -4686,7 +4792,7 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
   // bit width during computations.
   APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
   APInt Mod(BW + 1, 0);
-  Mod.set(BW - Mult2);  // Mod = N / D
+  Mod.setBit(BW - Mult2);  // Mod = N / D
   APInt I = AD.multiplicativeInverse(Mod);
 
   // 4. Compute the minimum unsigned root of the equation:
@@ -4778,58 +4884,26 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
-  if (AddRec->isAffine()) {
-    // If this is an affine expression, the execution count of this branch is
-    // the minimum unsigned root of the following equation:
-    //
-    //     Start + Step*N = 0 (mod 2^BW)
-    //
-    // equivalent to:
-    //
-    //             Step*N = -Start (mod 2^BW)
-    //
-    // where BW is the common bit width of Start and Step.
-
-    // Get the initial value for the loop.
-    const SCEV *Start = getSCEVAtScope(AddRec->getStart(),
-                                       L->getParentLoop());
-    const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1),
-                                      L->getParentLoop());
-
-    if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) {
-      // For now we handle only constant steps.
-
-      // First, handle unitary steps.
-      if (StepC->getValue()->equalsInt(1))      // 1*N = -Start (mod 2^BW), so:
-        return getNegativeSCEV(Start);          //   N = -Start (as unsigned)
-      if (StepC->getValue()->isAllOnesValue())  // -1*N = -Start (mod 2^BW), so:
-        return Start;                           //    N = Start (as unsigned)
-
-      // Then, try to solve the above equation provided that Start is constant.
-      if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
-        return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
-                                            -StartC->getValue()->getValue(),
-                                            *this);
-    }
-  } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
-    // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
-    // the quadratic equation to solve it.
-    std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
-                                                                    *this);
+  // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+  // the quadratic equation to solve it.
+  if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+    std::pair<const SCEV *,const SCEV *> Roots =
+      SolveQuadraticEquation(AddRec, *this);
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
-    if (R1) {
+    if (R1 && R2) {
 #if 0
       dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
              << "  sol#2: " << *R2 << "\n";
 #endif
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
-          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
-                                   R1->getValue(), R2->getValue()))) {
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+                                                      R1->getValue(),
+                                                      R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
-
+        
         // We can only use this value if the chrec ends up with an exact zero
         // value at this index.  When solving for "X*X != 5", for example, we
         // should not accept a root of 2.
@@ -4838,8 +4912,54 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
           return R1;  // We found a quadratic root!
       }
     }
+    return getCouldNotCompute();
   }
 
+  // Otherwise we can only handle this if it is affine.
+  if (!AddRec->isAffine())
+    return getCouldNotCompute();
+
+  // If this is an affine expression, the execution count of this branch is
+  // the minimum unsigned root of the following equation:
+  //
+  //     Start + Step*N = 0 (mod 2^BW)
+  //
+  // equivalent to:
+  //
+  //             Step*N = -Start (mod 2^BW)
+  //
+  // where BW is the common bit width of Start and Step.
+
+  // Get the initial value for the loop.
+  const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+  const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+  // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up
+  // to wrap to 0, it must be counting down to equal 0.  Also, while counting
+  // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what
+  // the stride is.  As such, NUW addrec's will always become zero in
+  // "start / -stride" steps, and we know that the division is exact.
+  if (AddRec->hasNoUnsignedWrap())
+    // FIXME: We really want an "isexact" bit for udiv.
+    return getUDivExpr(Start, getNegativeSCEV(Step));
+  
+  // For now we handle only constant steps.
+  const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+  if (StepC == 0)
+    return getCouldNotCompute();
+
+  // First, handle unitary steps.
+  if (StepC->getValue()->equalsInt(1))      // 1*N = -Start (mod 2^BW), so:
+    return getNegativeSCEV(Start);          //   N = -Start (as unsigned)
+  
+  if (StepC->getValue()->isAllOnesValue())  // -1*N = -Start (mod 2^BW), so:
+    return Start;                           //    N = Start (as unsigned)
+
+  // Then, try to solve the above equation provided that Start is constant.
+  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+    return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+                                        -StartC->getValue()->getValue(),
+                                        *this);
   return getCouldNotCompute();
 }
 
@@ -4939,7 +5059,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // as both operands could be addrecs loop-invariant in each other's loop.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
     const Loop *L = AR->getLoop();
-    if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) {
+    if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
       std::swap(LHS, RHS);
       Pred = ICmpInst::getSwappedPredicate(Pred);
       Changed = true;
@@ -5159,13 +5279,13 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
 
 trivially_true:
   // Return 0 == 0.
-  LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
   Pred = ICmpInst::ICMP_EQ;
   return true;
 
 trivially_false:
   // Return 0 != 0.
-  LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
   Pred = ICmpInst::ICMP_NE;
   return true;
 }
@@ -5556,7 +5676,7 @@ ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool isSigned) {
   // Only handle:  "ADDREC < LoopInvariant".
-  if (!RHS->isLoopInvariant(L)) return getCouldNotCompute();
+  if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
 
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
   if (!AddRec || AddRec->getLoop() != L)
@@ -5836,6 +5956,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 
 ScalarEvolution::ScalarEvolution()
   : FunctionPass(ID), FirstUnknown(0) {
+  initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
 }
 
 bool ScalarEvolution::runOnFunction(Function &F) {
@@ -5857,6 +5978,10 @@ void ScalarEvolution::releaseMemory() {
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
   ValuesAtScopes.clear();
+  LoopDispositions.clear();
+  BlockDispositions.clear();
+  UnsignedRanges.clear();
+  SignedRanges.clear();
   UniqueSCEVs.clear();
   SCEVAllocator.Reset();
 }
@@ -5936,7 +6061,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
       if (L) {
         OS << "\t\t" "Exits: ";
         const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
-        if (!ExitValue->isLoopInvariant(L)) {
+        if (!SE.isLoopInvariant(ExitValue, L)) {
           OS << "<<Unknown>>";
         } else {
           OS << *ExitValue;
@@ -5953,3 +6078,240 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
     PrintLoopInfo(OS, &SE, *I);
 }
 
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+  std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+  std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, LoopVariant));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  LoopDisposition D = computeLoopDisposition(S, L);
+  return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return LoopInvariant;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+    // If L is the addrec's loop, it's computable.
+    if (AR->getLoop() == L)
+      return LoopComputable;
+
+    // Add recurrences are never invariant in the function-body (null loop).
+    if (!L)
+      return LoopVariant;
+
+    // This recurrence is variant w.r.t. L if L contains AR's loop.
+    if (L->contains(AR->getLoop()))
+      return LoopVariant;
+
+    // This recurrence is invariant w.r.t. L if AR's loop contains L.
+    if (AR->getLoop()->contains(L))
+      return LoopInvariant;
+
+    // This recurrence is variant w.r.t. L if any of its operands
+    // are variant.
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      if (!isLoopInvariant(*I, L))
+        return LoopVariant;
+
+    // Otherwise it's loop-invariant.
+    return LoopInvariant;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool HasVarying = false;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      LoopDisposition D = getLoopDisposition(*I, L);
+      if (D == LoopVariant)
+        return LoopVariant;
+      if (D == LoopComputable)
+        HasVarying = true;
+    }
+    return HasVarying ? LoopComputable : LoopInvariant;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+    if (LD == LoopVariant)
+      return LoopVariant;
+    LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+    if (RD == LoopVariant)
+      return LoopVariant;
+    return (LD == LoopInvariant && RD == LoopInvariant) ?
+           LoopInvariant : LoopComputable;
+  }
+  case scUnknown:
+    // All non-instruction values are loop invariant.  All instructions are loop
+    // invariant if they are not contained in the specified loop.
+    // Instructions are never considered invariant in the function body
+    // (null loop) because they are defined within the "loop".
+    if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+      return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+    return LoopInvariant;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return LoopVariant;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return LoopVariant;
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+  std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+    Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BlockDisposition D = computeBlockDisposition(S, BB);
+  return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return ProperlyDominatesBlock;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+  case scAddRecExpr: {
+    // This uses a "dominates" query instead of "properly dominates" query
+    // to test for proper dominance too, because the instruction which
+    // produces the addrec's value is a PHI, and a PHI effectively properly
+    // dominates its entire containing block.
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+    if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+      return DoesNotDominateBlock;
+  }
+  // FALL THROUGH into SCEVNAryExpr handling.
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool Proper = true;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      BlockDisposition D = getBlockDisposition(*I, BB);
+      if (D == DoesNotDominateBlock)
+        return DoesNotDominateBlock;
+      if (D == DominatesBlock)
+        Proper = false;
+    }
+    return Proper ? ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    BlockDisposition LD = getBlockDisposition(LHS, BB);
+    if (LD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    BlockDisposition RD = getBlockDisposition(RHS, BB);
+    if (RD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+      ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUnknown:
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+      if (I->getParent() == BB)
+        return DominatesBlock;
+      if (DT->properlyDominates(I->getParent(), BB))
+        return ProperlyDominatesBlock;
+      return DoesNotDominateBlock;
+    }
+    return ProperlyDominatesBlock;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return DoesNotDominateBlock;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return DoesNotDominateBlock;
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return false;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend: {
+    const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
+    const SCEV *CastOp = Cast->getOperand();
+    return Op == CastOp || hasOperand(CastOp, Op);
+  }
+  case scAddRecExpr:
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      const SCEV *NAryOp = *I;
+      if (NAryOp == Op || hasOperand(NAryOp, Op))
+        return true;
+    }
+    return false;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    return LHS == Op || hasOperand(LHS, Op) ||
+           RHS == Op || hasOperand(RHS, Op);
+  }
+  case scUnknown:
+    return false;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return false;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return false;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+  ValuesAtScopes.erase(S);
+  LoopDispositions.erase(S);
+  BlockDispositions.erase(S);
+  UnsignedRanges.erase(S);
+  SignedRanges.erase(S);
+}
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 93b2a8b06fbe..e9edb3e083de 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -34,7 +34,10 @@ namespace {
 
   public:
     static char ID; // Class identification, replacement for typeinfo
-    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {}
+    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+      initializeScalarEvolutionAliasAnalysisPass(
+        *PassRegistry::getPassRegistry());
+    }
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
@@ -49,8 +52,7 @@ namespace {
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual bool runOnFunction(Function &F);
-    virtual AliasResult alias(const Value *V1, unsigned V1Size,
-                              const Value *V2, unsigned V2Size);
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
 
     Value *GetBaseValue(const SCEV *S);
   };
@@ -58,8 +60,11 @@ namespace {
 
 // Register this pass...
 char ScalarEvolutionAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
-                   "ScalarEvolution-based Alias Analysis", false, true, false);
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+                   "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+                    "ScalarEvolution-based Alias Analysis", false, true, false)
 
 FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
   return new ScalarEvolutionAliasAnalysis();
@@ -101,17 +106,17 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
 }
 
 AliasAnalysis::AliasResult
-ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
-                                    const Value *B, unsigned BSize) {
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+                                    const Location &LocB) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are. This allows the code below to ignore this special
   // case.
-  if (ASize == 0 || BSize == 0)
+  if (LocA.Size == 0 || LocB.Size == 0)
     return NoAlias;
 
   // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
-  const SCEV *AS = SE->getSCEV(const_cast<Value *>(A));
-  const SCEV *BS = SE->getSCEV(const_cast<Value *>(B));
+  const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+  const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
 
   // If they evaluate to the same expression, it's a MustAlias.
   if (AS == BS) return MustAlias;
@@ -121,8 +126,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
   if (SE->getEffectiveSCEVType(AS->getType()) ==
       SE->getEffectiveSCEVType(BS->getType())) {
     unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
-    APInt ASizeInt(BitWidth, ASize);
-    APInt BSizeInt(BitWidth, BSize);
+    APInt ASizeInt(BitWidth, LocA.Size);
+    APInt BSizeInt(BitWidth, LocB.Size);
 
     // Compute the difference between the two pointers.
     const SCEV *BA = SE->getMinusSCEV(BS, AS);
@@ -154,11 +159,15 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
   // inttoptr and ptrtoint operators.
   Value *AO = GetBaseValue(AS);
   Value *BO = GetBaseValue(BS);
-  if ((AO && AO != A) || (BO && BO != B))
-    if (alias(AO ? AO : A, AO ? UnknownSize : ASize,
-              BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias)
+  if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+    if (alias(Location(AO ? AO : LocA.Ptr,
+                       AO ? +UnknownSize : LocA.Size,
+                       AO ? 0 : LocA.TBAATag),
+              Location(BO ? BO : LocB.Ptr,
+                       BO ? +UnknownSize : LocB.Size,
+                       BO ? 0 : LocB.TBAATag)) == NoAlias)
       return NoAlias;
 
   // Forward the query to the next analysis.
-  return AliasAnalysis::alias(A, ASize, B, BSize);
+  return AliasAnalysis::alias(LocA, LocB);
 }
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 66a06aeac43c..b7c110f28cf9 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -608,15 +608,22 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
   return A; // Arbitrarily break the tie.
 }
 
-/// GetRelevantLoop - Get the most relevant loop associated with the given
+/// getRelevantLoop - Get the most relevant loop associated with the given
 /// expression, according to PickMostRelevantLoop.
-static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
-                                   DominatorTree &DT) {
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+  // Test whether we've already computed the most relevant loop for this SCEV.
+  std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+    RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+  if (!Pair.second)
+    return Pair.first->second;
+
   if (isa<SCEVConstant>(S))
+    // A constant has no relevant loops.
     return 0;
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
-      return LI.getLoopFor(I->getParent());
+      return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+    // A non-instruction has no relevant loops.
     return 0;
   }
   if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
@@ -625,16 +632,22 @@ static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
       L = AR->getLoop();
     for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
          I != E; ++I)
-      L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT);
-    return L;
+      L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+    return RelevantLoops[N] = L;
+  }
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+    const Loop *Result = getRelevantLoop(C->getOperand());
+    return RelevantLoops[C] = Result;
+  }
+  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+    const Loop *Result =
+      PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+                           getRelevantLoop(D->getRHS()),
+                           *SE.DT);
+    return RelevantLoops[D] = Result;
   }
-  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
-    return GetRelevantLoop(C->getOperand(), LI, DT);
-  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S))
-    return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT),
-                                GetRelevantLoop(D->getRHS(), LI, DT),
-                                DT);
   llvm_unreachable("Unexpected SCEV type!");
+  return 0;
 }
 
 namespace {
@@ -682,8 +695,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
   for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
        E(S->op_begin()); I != E; ++I)
-    OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
-                                         *I));
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
 
   // Sort by loop. Use a stable sort so that constants follow non-constants and
   // pointer operands precede non-pointer operands.
@@ -752,8 +764,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
   SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
   for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
        E(S->op_begin()); I != E; ++I)
-    OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
-                                         *I));
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
 
   // Sort by loop. Use a stable sort so that constants follow non-constants.
   std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
@@ -990,7 +1001,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Strip off any non-loop-dominating component from the addrec start.
   const SCEV *Start = Normalized->getStart();
   const SCEV *PostLoopOffset = 0;
-  if (!Start->properlyDominates(L->getHeader(), SE.DT)) {
+  if (!SE.properlyDominates(Start, L->getHeader())) {
     PostLoopOffset = Start;
     Start = SE.getConstant(Normalized->getType(), 0);
     Normalized =
@@ -1002,7 +1013,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Strip off any non-loop-dominating component from the addrec step.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
   const SCEV *PostLoopScale = 0;
-  if (!Step->dominates(L->getHeader(), SE.DT)) {
+  if (!SE.dominates(Step, L->getHeader())) {
     PostLoopScale = Step;
     Step = SE.getConstant(Normalized->getType(), 1);
     Normalized =
@@ -1278,7 +1289,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Instruction *InsertPt = Builder.GetInsertPoint();
   for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
        L = L->getParentLoop())
-    if (S->isLoopInvariant(L)) {
+    if (SE.isLoopInvariant(S, L)) {
       if (!L) break;
       if (BasicBlock *Preheader = L->getLoopPreheader())
         InsertPt = Preheader->getTerminator();
@@ -1286,7 +1297,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
+      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
         InsertPt = L->getHeader()->getFirstNonPHI();
       while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bbfdcec3f9b4..40e18ab2fbfa 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -12,29 +12,65 @@
 //
 // In LLVM IR, memory does not have types, so LLVM's own type system is not
 // suitable for doing TBAA. Instead, metadata is added to the IR to describe
-// a type system of a higher level language.
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
 //
-// This pass is language-independent. The type system is encoded in
-// metadata. This allows this pass to support typical C and C++ TBAA, but
-// it can also support custom aliasing behavior for other languages.
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+//   !0 = metadata !{ metadata !"an example type tree" }
+//   !1 = metadata !{ metadata !"int", metadata !0 }
+//   !2 = metadata !{ metadata !"float", metadata !0 }
+//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
 //
-// This is a work-in-progress. It doesn't work yet, and the metadata
-// format isn't stable.
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
 //
-// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to
-//       be extended.
-// TODO: AA chaining
-// TODO: struct fields
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its decendents and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+//   struct X {
+//     double d;
+//     int i;
+//   };
+//   void foo(struct X *x, struct X *y, double *p) {
+//     *x = *y;
+//     *p = 0.0;
+//   }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Metadata.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
 namespace {
   /// TBAANode - This is a simple wrapper around an MDNode which provides a
   /// higher-level interface by hiding the details of how alias analysis
@@ -44,16 +80,16 @@ namespace {
 
   public:
     TBAANode() : Node(0) {}
-    explicit TBAANode(MDNode *N) : Node(N) {}
+    explicit TBAANode(const MDNode *N) : Node(N) {}
 
     /// getNode - Get the MDNode for this TBAANode.
     const MDNode *getNode() const { return Node; }
 
-    /// getParent - Get this TBAANode's Alias DAG parent.
+    /// getParent - Get this TBAANode's Alias tree parent.
     TBAANode getParent() const {
       if (Node->getNumOperands() < 2)
         return TBAANode();
-      MDNode *P = dyn_cast<MDNode>(Node->getOperand(1));
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
       if (!P)
         return TBAANode();
       // Ok, this node has a valid parent. Return it.
@@ -69,8 +105,7 @@ namespace {
       ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
       if (!CI)
         return false;
-      // TODO: Think about the encoding.
-      return CI->isOne();
+      return CI->getValue()[0];
     }
   };
 }
@@ -82,7 +117,13 @@ namespace {
                                  public AliasAnalysis {
   public:
     static char ID; // Class identification, replacement for typeinfo
-    TypeBasedAliasAnalysis() : ImmutablePass(ID) {}
+    TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
@@ -94,18 +135,25 @@ namespace {
       return this;
     }
 
+    bool Aliases(const MDNode *A, const MDNode *B) const;
+
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual AliasResult alias(const Value *V1, unsigned V1Size,
-                              const Value *V2, unsigned V2Size);
-    virtual bool pointsToConstantMemory(const Value *P);
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
   };
 }  // End of anonymous namespace
 
 // Register this pass...
 char TypeBasedAliasAnalysis::ID = 0;
 INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
-                   "Type-Based Alias Analysis", false, true, false);
+                   "Type-Based Alias Analysis", false, true, false)
 
 ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
   return new TypeBasedAliasAnalysis();
@@ -117,34 +165,19 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AliasAnalysis::getAnalysisUsage(AU);
 }
 
-AliasAnalysis::AliasResult
-TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
-                              const Value *B, unsigned BSize) {
-  // Currently, metadata can only be attached to Instructions.
-  const Instruction *AI = dyn_cast<Instruction>(A);
-  if (!AI) return MayAlias;
-  const Instruction *BI = dyn_cast<Instruction>(B);
-  if (!BI) return MayAlias;
-
-  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
-  // be conservative.
-  MDNode *AM =
-    AI->getMetadata(AI->getParent()->getParent()->getParent()
-                      ->getMDKindID("tbaa"));
-  if (!AM) return MayAlias;
-  MDNode *BM =
-    BI->getMetadata(BI->getParent()->getParent()->getParent()
-                      ->getMDKindID("tbaa"));
-  if (!BM) return MayAlias;
-
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+                                const MDNode *B) const {
   // Keep track of the root node for A and B.
   TBAANode RootA, RootB;
 
-  // Climb the DAG from A to see if we reach B.
-  for (TBAANode T(AM); ; ) {
-    if (T.getNode() == BM)
+  // Climb the tree from A to see if we reach B.
+  for (TBAANode T(A); ; ) {
+    if (T.getNode() == B)
       // B is an ancestor of A.
-      return MayAlias;
+      return true;
 
     RootA = T;
     T = T.getParent();
@@ -152,11 +185,11 @@ TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
       break;
   }
 
-  // Climb the DAG from B to see if we reach A.
-  for (TBAANode T(BM); ; ) {
-    if (T.getNode() == AM)
+  // Climb the tree from B to see if we reach A.
+  for (TBAANode T(B); ; ) {
+    if (T.getNode() == A)
       // A is an ancestor of B.
-      return MayAlias;
+      return true;
 
     RootB = T;
     T = T.getParent();
@@ -166,26 +199,101 @@ TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
 
   // Neither node is an ancestor of the other.
   
-  // If they have the same root, then we've proved there's no alias.
-  if (RootA.getNode() == RootB.getNode())
-    return NoAlias;
-
   // If they have different roots, they're part of different potentially
   // unrelated type systems, so we must be conservative.
-  return MayAlias;
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+                              const Location &LocB) {
+  if (!EnableTBAA)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+  // be conservative.
+  const MDNode *AM = LocA.TBAATag;
+  if (!AM) return AliasAnalysis::alias(LocA, LocB);
+  const MDNode *BM = LocB.TBAATag;
+  if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+  // If they may alias, chain to the next AliasAnalysis.
+  if (Aliases(AM, BM))
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // Otherwise return a definitive result.
+  return NoAlias;
 }
 
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) {
-  // Currently, metadata can only be attached to Instructions.
-  const Instruction *I = dyn_cast<Instruction>(P);
-  if (!I) return false;
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                                    bool OrLocal) {
+  if (!EnableTBAA)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
 
-  MDNode *M =
-    I->getMetadata(I->getParent()->getParent()->getParent()
-                    ->getMDKindID("tbaa"));
-  if (!M) return false;
+  const MDNode *M = Loc.TBAATag;
+  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  return TBAANode(M).TypeIsImmutable();
+  if (TBAANode(M).TypeIsImmutable())
+    return true;
+
+  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefBehavior(CS);
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If this is an "immutable" type, we can assume the call doesn't write
+  // to memory.
+  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (TBAANode(M).TypeIsImmutable())
+      Min = OnlyReadsMemory;
+
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+  // Functions don't have metadata. Just chain to the next implementation.
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                      const Location &Loc) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  if (const MDNode *L = Loc.TBAATag)
+    if (const MDNode *M =
+          CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(L, M))
+        return NoModRef;
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                      ImmutableCallSite CS2) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+  if (const MDNode *M1 =
+        CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (const MDNode *M2 =
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(M1, M2))
+        return NoModRef;
+
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 181c9b01980c..1060bc5349e4 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/GlobalVariable.h"
@@ -23,9 +24,22 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include <cstring>
 using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0).  For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+  if (unsigned BitWidth = Ty->getScalarSizeInBits())
+    return BitWidth;
+  assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+  return TD ? TD->getPointerSizeInBits() : 0;
+}
 
 /// ComputeMaskedBits - Determine which of the bits specified in Mask are
 /// known to be either zero or one and return them in the KnownZero/KnownOne
@@ -46,7 +60,6 @@ using namespace llvm;
 void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
                              APInt &KnownZero, APInt &KnownOne,
                              const TargetData *TD, unsigned Depth) {
-  const unsigned MaxDepth = 6;
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Mask.getBitWidth();
@@ -69,14 +82,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   // Null and aggregate-zero are all-zeros.
   if (isa<ConstantPointerNull>(V) ||
       isa<ConstantAggregateZero>(V)) {
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     KnownZero = Mask;
     return;
   }
   // Handle a constant vector by taking the intersection of the known bits of
   // each element.
   if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
-    KnownZero.set(); KnownOne.set();
+    KnownZero.setAllBits(); KnownOne.setAllBits();
     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
       APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
       ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
@@ -103,15 +116,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
                                               CountTrailingZeros_32(Align));
     else
-      KnownZero.clear();
-    KnownOne.clear();
+      KnownZero.clearAllBits();
+    KnownOne.clearAllBits();
     return;
   }
   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
   // the bits of its aliasee.
   if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
     if (GA->mayBeOverridden()) {
-      KnownZero.clear(); KnownOne.clear();
+      KnownZero.clearAllBits(); KnownOne.clearAllBits();
     } else {
       ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
                         TD, Depth+1);
@@ -119,7 +132,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     return;
   }
 
-  KnownZero.clear(); KnownOne.clear();   // Start out not knowing anything.
+  KnownZero.clearAllBits(); KnownOne.clearAllBits();   // Start out not knowing anything.
 
   if (Depth == MaxDepth || Mask == 0)
     return;  // Limit search depth.
@@ -185,7 +198,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Also compute a conserative estimate for high known-0 bits.
     // More trickiness is possible, but this is sufficient for the
     // interesting case of alignment computation.
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     unsigned TrailZ = KnownZero.countTrailingOnes() +
                       KnownZero2.countTrailingOnes();
     unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
@@ -208,8 +221,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
                       AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
-    KnownOne2.clear();
-    KnownZero2.clear();
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
     ComputeMaskedBits(I->getOperand(1),
                       AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -255,14 +268,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     else
       SrcBitWidth = SrcTy->getScalarSizeInBits();
     
-    APInt MaskIn(Mask);
-    MaskIn.zextOrTrunc(SrcBitWidth);
-    KnownZero.zextOrTrunc(SrcBitWidth);
-    KnownOne.zextOrTrunc(SrcBitWidth);
+    APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
+    KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+    KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
     ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
                       Depth+1);
-    KnownZero.zextOrTrunc(BitWidth);
-    KnownOne.zextOrTrunc(BitWidth);
+    KnownZero = KnownZero.zextOrTrunc(BitWidth);
+    KnownOne = KnownOne.zextOrTrunc(BitWidth);
     // Any top bits are known to be zero.
     if (BitWidth > SrcBitWidth)
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
@@ -284,15 +296,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
       
-    APInt MaskIn(Mask); 
-    MaskIn.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
+    APInt MaskIn = Mask.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
     ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
                       Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
 
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
@@ -338,7 +349,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       
       // Signed shift right.
       APInt Mask2(Mask.shl(ShiftAmt));
@@ -474,7 +485,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
 
     unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
     break;
   }
@@ -579,6 +590,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       }
     }
 
+    // Unreachable blocks may have zero-operand PHI nodes.
+    if (P->getNumIncomingValues() == 0)
+      return;
+
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
     if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
@@ -621,6 +636,156 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
 }
 
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one.  Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                          const TargetData *TD, unsigned Depth) {
+  unsigned BitWidth = getBitWidth(V->getType(), TD);
+  if (!BitWidth) {
+    KnownZero = false;
+    KnownOne = false;
+    return;
+  }
+  APInt ZeroBits(BitWidth, 0);
+  APInt OneBits(BitWidth, 0);
+  ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
+                    Depth);
+  KnownOne = OneBits[BitWidth - 1];
+  KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined.  Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return CI->getValue().isPowerOf2();
+  // TODO: Handle vector constants.
+
+  // 1 << X is clearly a power of two if the one is not shifted off the end.  If
+  // it is shifted off the end then the result is undefined.
+  if (match(V, m_Shl(m_One(), m_Value())))
+    return true;
+
+  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+  // bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignBit(), m_Value())))
+    return true;
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+    return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(V))
+    return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
+      isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+
+  return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined.  For vectors return true if every element is known to be
+/// non-zero when defined.  Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->isNullValue())
+      return false;
+    if (isa<ConstantInt>(C))
+      // Must be non-zero due to null test above.
+      return true;
+    // TODO: Handle vectors
+    return false;
+  }
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  unsigned BitWidth = getBitWidth(V->getType(), TD);
+
+  // X | Y != 0 if X != 0 or Y != 0.
+  Value *X = 0, *Y = 0;
+  if (match(V, m_Or(m_Value(X), m_Value(Y))))
+    return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+  // ext X != 0 if X != 0.
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+  // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
+  // if the lowest bit is shifted off the end.
+  if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+    if (KnownOne[0])
+      return true;
+  }
+  // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
+  // defined if the sign bit is shifted off the end.
+  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+    bool XKnownNonNegative, XKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+    if (XKnownNegative)
+      return true;
+  }
+  // X + Y.
+  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+    bool XKnownNonNegative, XKnownNegative;
+    bool YKnownNonNegative, YKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+    // If X and Y are both non-negative (as signed values) then their sum is not
+    // zero unless both X and Y are zero.
+    if (XKnownNonNegative && YKnownNonNegative)
+      if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+        return true;
+
+    // If X and Y are both negative (as signed values) then their sum is not
+    // zero unless both X and Y equal INT_MIN.
+    if (BitWidth && XKnownNegative && YKnownNegative) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      APInt Mask = APInt::getSignedMaxValue(BitWidth);
+      // The sign bit of X is set.  If some other bit is set then X is not equal
+      // to INT_MIN.
+      ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+      if ((KnownOne & Mask) != 0)
+        return true;
+      // The sign bit of Y is set.  If some other bit is set then Y is not equal
+      // to INT_MIN.
+      ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+      if ((KnownOne & Mask) != 0)
+        return true;
+    }
+
+    // The sum of a non-negative number and a power of two is not zero.
+    if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+      return true;
+    if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+      return true;
+  }
+  // (C ? X : Y) != 0 if X != 0 and Y != 0.
+  else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+        isKnownNonZero(SI->getFalseValue(), TD, Depth))
+      return true;
+  }
+
+  if (!BitWidth) return false;
+  APInt KnownZero(BitWidth, 0);
+  APInt KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
+                    TD, Depth);
+  return KnownOne != 0;
+}
+
 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
 /// this predicate to simplify operations downstream.  Mask is known to be zero
 /// for bits that V cannot have.
@@ -679,6 +844,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
       Tmp += C->getZExtValue();
       if (Tmp > TyBits) Tmp = TyBits;
     }
+    // vector ashr X, <C, C, C, C>  -> adds C sign bits
+    if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+        Tmp += CI->getZExtValue();
+        if (Tmp > TyBits) Tmp = TyBits;
+      }
+    }
     return Tmp;
   case Instruction::Shl:
     if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
@@ -875,8 +1047,9 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
       // Turn Op0 << Op1 into Op0 * 2^Op1
       APInt Op1Int = Op1CI->getValue();
       uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
-      Op1 = ConstantInt::get(V->getContext(), 
-                             APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
+      APInt API(Op1Int.getBitWidth(), 0);
+      API.setBit(BitToSet);
+      Op1 = ConstantInt::get(V->getContext(), API);
     }
 
     Value *Mul0 = NULL;
@@ -982,6 +1155,80 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   return false;
 }
 
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with.  This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+  // All byte-wide stores are splatable, even of arbitrary variables.
+  if (V->getType()->isIntegerTy(8)) return V;
+
+  // Handle 'null' ConstantArrayZero etc.
+  if (Constant *C = dyn_cast<Constant>(V))
+    if (C->isNullValue())
+      return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+  
+  // Constant float and double values can be handled as integer values if the
+  // corresponding integer value is "byteable".  An important case is 0.0. 
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isFloatTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+    if (CFP->getType()->isDoubleTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+    // Don't handle long double formats, which have strange constraints.
+  }
+  
+  // We can handle constant integers that are power of two in size and a 
+  // multiple of 8 bits.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    unsigned Width = CI->getBitWidth();
+    if (isPowerOf2_32(Width) && Width > 8) {
+      // We can handle this value if the recursive binary decomposition is the
+      // same at all levels.
+      APInt Val = CI->getValue();
+      APInt Val2;
+      while (Val.getBitWidth() != 8) {
+        unsigned NextWidth = Val.getBitWidth()/2;
+        Val2  = Val.lshr(NextWidth);
+        Val2 = Val2.trunc(Val.getBitWidth()/2);
+        Val = Val.trunc(Val.getBitWidth()/2);
+        
+        // If the top/bottom halves aren't the same, reject it.
+        if (Val != Val2)
+          return 0;
+      }
+      return ConstantInt::get(V->getContext(), Val);
+    }
+  }
+  
+  // A ConstantArray is splatable if all its members are equal and also
+  // splatable.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+    if (CA->getNumOperands() == 0)
+      return 0;
+    
+    Value *Val = isBytewiseValue(CA->getOperand(0));
+    if (!Val)
+      return 0;
+    
+    for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
+      if (CA->getOperand(I-1) != CA->getOperand(I))
+        return 0;
+    
+    return Val;
+  }
+  
+  // Conceptually, we could handle things like:
+  //   %a = zext i8 %X to i16
+  //   %b = shl i16 %a, 8
+  //   %c = or i16 %a, %b
+  // but until there is an example that actually needs this, it doesn't seem
+  // worth worrying about.
+  return 0;
+}
+
+
 // This is the recursive version of BuildSubAggregate. It takes a few different
 // arguments. Idxs is the index within the nested struct From that we are
 // looking at now (which is of type IndexedType). IdxSkip is the number of
@@ -1159,6 +1406,47 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
   return 0;
 }
 
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset.  Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                              const TargetData &TD) {
+  Operator *PtrOp = dyn_cast<Operator>(Ptr);
+  if (PtrOp == 0) return Ptr;
+  
+  // Just look through bitcasts.
+  if (PtrOp->getOpcode() == Instruction::BitCast)
+    return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+  
+  // If this is a GEP with constant indices, we can look through it.
+  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+  
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = cast<ConstantInt>(*I);
+    if (OpC->isZero()) continue;
+    
+    // Handle a struct and array indices which add their offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += OpC->getSExtValue()*Size;
+    }
+  }
+  
+  // Re-sign extend from the pointer size if needed to get overflow edge cases
+  // right.
+  unsigned PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize < 64)
+    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+  
+  return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
 /// GetConstantStringInfo - This function computes the length of a
 /// null-terminated C string pointed to by V.  If successful, it returns true
 /// and returns the string in Str.  If unsuccessful, it returns false.
@@ -1386,3 +1674,32 @@ uint64_t llvm::GetStringLength(Value *V) {
   // an empty string as a length.
   return Len == ~0ULL ? 1 : Len;
 }
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
+  if (!V->getType()->isPointerTy())
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      // See if InstructionSimplify knows any relevant tricks.
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Aquire a DominatorTree and use it.
+        if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+          V = Simplified;
+          continue;
+        }
+
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  }
+  return V;
+}
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index 54c715c604d2..1eab27d3eba3 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -15,8 +15,10 @@
 #include "ArchiveInternals.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Module.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
 #include <memory>
 #include <cstring>
 using namespace llvm;
@@ -65,8 +67,9 @@ ArchiveMember::ArchiveMember(Archive* PAR)
 // different file, presumably as an update to the member. It also makes sure
 // the flags are reset correctly.
 bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
-  if (!newFile.exists()) {
-    if (ErrMsg) 
+  bool Exists;
+  if (sys::fs::exists(newFile.str(), Exists) || !Exists) {
+    if (ErrMsg)
       *ErrMsg = "Can not replace an archive member with a non-existent file";
     return true;
   }
@@ -113,11 +116,10 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
 
   // Get the signature and status info
   const char* signature = (const char*) data;
-  std::string magic;
+  SmallString<4> magic;
   if (!signature) {
-    path.getMagicNumber(magic,4);
+    sys::fs::get_magic(path.str(), magic.capacity(), magic);
     signature = magic.c_str();
-    std::string err;
     const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
     if (FSinfo)
       info = *FSinfo;
@@ -147,9 +149,13 @@ Archive::Archive(const sys::Path& filename, LLVMContext& C)
 
 bool
 Archive::mapToMemory(std::string* ErrMsg) {
-  mapfile = MemoryBuffer::getFile(archPath.c_str(), ErrMsg);
-  if (mapfile == 0)
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) {
+    if (ErrMsg)
+      *ErrMsg = ec.message();
     return true;
+  }
+  mapfile = File.take();
   base = mapfile->getBufferStart();
   return false;
 }
@@ -159,19 +165,19 @@ void Archive::cleanUpMemory() {
   delete mapfile;
   mapfile = 0;
   base = 0;
-  
+
   // Forget the entire symbol table
   symTab.clear();
   symTabSize = 0;
-  
+
   firstFileOffset = 0;
-  
+
   // Free the foreign symbol table member
   if (foreignST) {
     delete foreignST;
     foreignST = 0;
   }
-  
+
   // Delete any Modules and ArchiveMember's we've allocated as a result of
   // symbol table searches.
   for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
@@ -193,7 +199,7 @@ static void getSymbols(Module*M, std::vector<std::string>& symbols) {
     if (!GI->isDeclaration() && !GI->hasLocalLinkage())
       if (!GI->getName().empty())
         symbols.push_back(GI->getName());
-  
+
   // Loop over functions
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
     if (!FI->isDeclaration() && !FI->hasLocalLinkage())
@@ -213,20 +219,20 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
                              LLVMContext& Context,
                              std::vector<std::string>& symbols,
                              std::string* ErrMsg) {
-  std::auto_ptr<MemoryBuffer> Buffer(
-                       MemoryBuffer::getFileOrSTDIN(fName.c_str()));
-  if (!Buffer.get()) {
-    if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'";
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) {
+    if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": "
+                        + ec.message();
     return true;
   }
-  
+
   Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
   if (!M)
     return true;
-  
+
   // Get the symbols
   getSymbols(M, symbols);
-  
+
   // Done with the module.
   delete M;
   return true;
@@ -239,16 +245,16 @@ llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
                         std::vector<std::string>& symbols,
                         std::string* ErrMsg) {
   // Get the module.
-  std::auto_ptr<MemoryBuffer> Buffer(
+  OwningPtr<MemoryBuffer> Buffer(
     MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
-  
+
   Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
   if (!M)
     return 0;
-  
+
   // Get the symbols
   getSymbols(M, symbols);
-  
+
   // Done with the module. Note that it's the caller's responsibility to delete
   // the Module.
   return M;
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index 08f20e74811e..55684f7023d2 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -15,7 +15,7 @@
 #define LIB_ARCHIVE_ARCHIVEINTERNALS_H
 
 #include "llvm/Bitcode/Archive.h"
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/TimeValue.h"
 #include "llvm/ADT/StringExtras.h"
 
 #include <cstring>
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 7eeeb59896d3..c5ad5fc41cd1 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -15,9 +15,12 @@
 #include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include <fstream>
 #include <ostream>
 #include <iomanip>
@@ -25,7 +28,7 @@ using namespace llvm;
 
 // Write an integer using variable bit rate encoding. This saves a few bytes
 // per entry in the symbol table.
-static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+static inline void writeInteger(unsigned num, raw_ostream& ARFile) {
   while (1) {
     if (num < 0x80) { // done?
       ARFile << (unsigned char)num;
@@ -153,9 +156,10 @@ Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
 // Insert a file into the archive before some other member. This also takes care
 // of extracting the necessary flags and information from the file.
 bool
-Archive::addFileBefore(const sys::Path& filePath, iterator where, 
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
                         std::string* ErrMsg) {
-  if (!filePath.exists()) {
+  bool Exists;
+  if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
     if (ErrMsg)
       *ErrMsg = "Can not add a non-existent file to archive";
     return true;
@@ -178,9 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
     flags |= ArchiveMember::HasPathFlag;
   if (hasSlash || filePath.str().length() > 15)
     flags |= ArchiveMember::HasLongFilenameFlag;
-  std::string magic;
-  mbr->path.getMagicNumber(magic,4);
-  switch (sys::IdentifyFileType(magic.c_str(),4)) {
+
+  sys::LLVMFileType type;
+  if (sys::fs::identify_magic(mbr->path.str(), type))
+    type = sys::Unknown_FileType;
+  switch (type) {
     case sys::Bitcode_FileType:
       flags |= ArchiveMember::BitcodeFlag;
       break;
@@ -196,14 +202,14 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
 bool
 Archive::writeMember(
   const ArchiveMember& member,
-  std::ofstream& ARFile,
+  raw_ostream& ARFile,
   bool CreateSymbolTable,
   bool TruncateNames,
   bool ShouldCompress,
   std::string* ErrMsg
 ) {
 
-  unsigned filepos = ARFile.tellp();
+  unsigned filepos = ARFile.tell();
   filepos -= 8;
 
   // Get the data and its size either from the
@@ -212,9 +218,13 @@ Archive::writeMember(
   const char *data = (const char*)member.getData();
   MemoryBuffer *mFile = 0;
   if (!data) {
-    mFile = MemoryBuffer::getFile(member.getPath().c_str(), ErrMsg);
-    if (mFile == 0)
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
+      if (ErrMsg)
+        *ErrMsg = ec.message();
       return true;
+    }
+    mFile = File.take();
     data = mFile->getBufferStart();
     fSize = mFile->getBufferSize();
   }
@@ -225,7 +235,7 @@ Archive::writeMember(
     std::vector<std::string> symbols;
     std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
       + ")";
-    Module* M = 
+    Module* M =
       GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
 
     // If the bitcode parsed successfully
@@ -272,7 +282,7 @@ Archive::writeMember(
   ARFile.write(data,fSize);
 
   // Make sure the member is an even length
-  if ((ARFile.tellp() & 1) == 1)
+  if ((ARFile.tell() & 1) == 1)
     ARFile << ARFILE_PAD;
 
   // Close the mapped file if it was opened
@@ -282,7 +292,7 @@ Archive::writeMember(
 
 // Write out the LLVM symbol table as an archive member to the file.
 void
-Archive::writeSymbolTable(std::ofstream& ARFile) {
+Archive::writeSymbolTable(raw_ostream& ARFile) {
 
   // Construct the symbol table's header
   ArchiveMemberHeader Hdr;
@@ -306,7 +316,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) {
 
 #ifndef NDEBUG
   // Save the starting position of the symbol tables data content.
-  unsigned startpos = ARFile.tellp();
+  unsigned startpos = ARFile.tell();
 #endif
 
   // Write out the symbols sequentially
@@ -323,7 +333,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) {
 
 #ifndef NDEBUG
   // Now that we're done with the symbol table, get the ending file position
-  unsigned endpos = ARFile.tellp();
+  unsigned endpos = ARFile.tell();
 #endif
 
   // Make sure that the amount we wrote is what we pre-computed. This is
@@ -352,25 +362,20 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
   }
 
   // Create a temporary file to store the archive in
-  sys::Path TmpArchive = archPath;
-  if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+  SmallString<128> TempArchivePath;
+  int ArchFD;
+  if (error_code ec =
+      sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()),
+                           ArchFD, TempArchivePath)) {
+    if (ErrMsg) *ErrMsg = ec.message();
     return true;
+  }
 
   // Make sure the temporary gets removed if we crash
-  sys::RemoveFileOnSignal(TmpArchive);
+  sys::RemoveFileOnSignal(sys::Path(TempArchivePath.str()));
 
   // Create archive file for output.
-  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
-                               std::ios::binary;
-  std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
-
-  // Check for errors opening or creating archive file.
-  if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
-    TmpArchive.eraseFromDisk();
-    if (ErrMsg)
-      *ErrMsg = "Error opening archive file: " + archPath.str();
-    return true;
-  }
+  raw_fd_ostream ArchiveFile(ArchFD, true);
 
   // If we're creating a symbol table, reset it now
   if (CreateSymbolTable) {
@@ -386,8 +391,9 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
   for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
     if (writeMember(*I, ArchiveFile, CreateSymbolTable,
                      TruncateNames, Compress, ErrMsg)) {
-      TmpArchive.eraseFromDisk();
       ArchiveFile.close();
+      bool existed;
+      sys::fs::remove(TempArchivePath.str(), existed);
       return true;
     }
   }
@@ -402,27 +408,29 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     // ensure compatibility with other archivers we need to put the symbol
     // table first in the file. Unfortunately, this means mapping the file
     // we just wrote back in and copying it to the destination file.
-    sys::Path FinalFilePath = archPath;
+    SmallString<128> TempArchiveWithSymbolTablePath;
 
     // Map in the archive we just wrote.
     {
-    OwningPtr<MemoryBuffer> arch(MemoryBuffer::getFile(TmpArchive.c_str()));
-    if (arch == 0) return true;
+    OwningPtr<MemoryBuffer> arch;
+    if (error_code ec = MemoryBuffer::getFile(TempArchivePath.c_str(), arch)) {
+      if (ErrMsg)
+        *ErrMsg = ec.message();
+      return true;
+    }
     const char* base = arch->getBufferStart();
 
-    // Open another temporary file in order to avoid invalidating the 
+    // Open another temporary file in order to avoid invalidating the
     // mmapped data
-    if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
-      return true;
-    sys::RemoveFileOnSignal(FinalFilePath);
-
-    std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
-    if (!FinalFile.is_open() || FinalFile.bad()) {
-      TmpArchive.eraseFromDisk();
-      if (ErrMsg)
-        *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+    if (error_code ec =
+      sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()),
+                           ArchFD, TempArchiveWithSymbolTablePath)) {
+      if (ErrMsg) *ErrMsg = ec.message();
       return true;
     }
+    sys::RemoveFileOnSignal(sys::Path(TempArchiveWithSymbolTablePath.str()));
+
+    raw_fd_ostream FinalFile(ArchFD, true);
 
     // Write the file magic number
     FinalFile << ARFILE_MAGIC;
@@ -435,7 +443,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     if (foreignST) {
       if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
         FinalFile.close();
-        TmpArchive.eraseFromDisk();
+        bool existed;
+        sys::fs::remove(TempArchiveWithSymbolTablePath.str(), existed);
         return true;
       }
     }
@@ -451,19 +460,25 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     // Close up shop
     FinalFile.close();
     } // free arch.
-    
+
     // Move the final file over top of TmpArchive
-    if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+    if (error_code ec = sys::fs::rename(TempArchiveWithSymbolTablePath.str(),
+                                        TempArchivePath.str())) {
+      if (ErrMsg) *ErrMsg = ec.message();
       return true;
+    }
   }
-  
+
   // Before we replace the actual archive, we need to forget all the
   // members, since they point to data in that old archive. We need to do
   // this because we cannot replace an open file on Windows.
   cleanUpMemory();
-  
-  if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+
+  if (error_code ec = sys::fs::rename(TempArchivePath.str(),
+                                      archPath.str())) {
+    if (ErrMsg) *ErrMsg = ec.message();
     return true;
+  }
 
   // Set correct read and write permissions after temporary file is moved
   // to final destination path.
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 032753a3b2c6..857fa1ef626f 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -15,18 +15,20 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instruction.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Assembly/Parser.h"
+#include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 using namespace llvm;
 
-bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
+bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
   ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
   return true;
 }
@@ -507,6 +509,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(default);
   KEYWORD(hidden);
   KEYWORD(protected);
+  KEYWORD(unnamed_addr);
   KEYWORD(extern_weak);
   KEYWORD(external);
   KEYWORD(thread_local);
@@ -544,6 +547,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(arm_aapcscc);
   KEYWORD(arm_aapcs_vfpcc);
   KEYWORD(msp430_intrcc);
+  KEYWORD(ptx_kernel);
+  KEYWORD(ptx_device);
 
   KEYWORD(cc);
   KEYWORD(c);
@@ -570,6 +575,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(noredzone);
   KEYWORD(noimplicitfloat);
   KEYWORD(naked);
+  KEYWORD(hotpatch);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -595,6 +601,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
   TYPEKEYWORD("label",     Type::getLabelTy(Context));
   TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
+  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
 #undef TYPEKEYWORD
 
   // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
@@ -677,7 +684,7 @@ lltok::Kind LLLexer::LexIdentifier() {
     APInt Tmp(bits, StringRef(TokStart+3, len), 16);
     uint32_t activeBits = Tmp.getActiveBits();
     if (activeBits > 0 && activeBits < bits)
-      Tmp.trunc(activeBits);
+      Tmp = Tmp.trunc(activeBits);
     APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
     return lltok::APSInt;
   }
@@ -804,12 +811,12 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     if (TokStart[0] == '-') {
       uint32_t minBits = Tmp.getMinSignedBits();
       if (minBits > 0 && minBits < numBits)
-        Tmp.trunc(minBits);
+        Tmp = Tmp.trunc(minBits);
       APSIntVal = APSInt(Tmp, false);
     } else {
       uint32_t activeBits = Tmp.getActiveBits();
       if (activeBits > 0 && activeBits < numBits)
-        Tmp.trunc(activeBits);
+        Tmp = Tmp.trunc(activeBits);
       APSIntVal = APSInt(Tmp, true);
     }
     return lltok::APSInt;
@@ -828,7 +835,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     }
   }
 
-  APFloatVal = APFloat(atof(TokStart));
+  APFloatVal = APFloat(std::atof(TokStart));
   return lltok::APFloat;
 }
 
@@ -862,6 +869,6 @@ lltok::Kind LLLexer::LexPositive() {
     }
   }
 
-  APFloatVal = APFloat(atof(TokStart));
+  APFloatVal = APFloat(std::atof(TokStart));
   return lltok::APFloat;
 }
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 70f1cfdbfd8c..09ae8017f404 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -62,8 +62,8 @@ namespace llvm {
     const APFloat &getAPFloatVal() const { return APFloatVal; }
 
 
-    bool Error(LocTy L, const std::string &Msg) const;
-    bool Error(const std::string &Msg) const { return Error(getLoc(), Msg); }
+    bool Error(LocTy L, const Twine &Msg) const;
+    bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
     std::string getFilename() const;
 
   private:
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index f21a065473b6..cdfacbebbfc3 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Operator.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -52,7 +51,7 @@ bool LLParser::ValidateEndOfModule() {
         
         if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
           return Error(MDList[i].Loc, "use of undefined metadata '!" +
-                       utostr(SlotNo) + "'");
+                       Twine(SlotNo) + "'");
         Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
       }
     }
@@ -109,7 +108,7 @@ bool LLParser::ValidateEndOfModule() {
   if (!ForwardRefTypeIDs.empty())
     return Error(ForwardRefTypeIDs.begin()->second.second,
                  "use of undefined type '%" +
-                 utostr(ForwardRefTypeIDs.begin()->first) + "'");
+                 Twine(ForwardRefTypeIDs.begin()->first) + "'");
 
   if (!ForwardRefVals.empty())
     return Error(ForwardRefVals.begin()->second.second,
@@ -119,12 +118,12 @@ bool LLParser::ValidateEndOfModule() {
   if (!ForwardRefValIDs.empty())
     return Error(ForwardRefValIDs.begin()->second.second,
                  "use of undefined value '@" +
-                 utostr(ForwardRefValIDs.begin()->first) + "'");
+                 Twine(ForwardRefValIDs.begin()->first) + "'");
 
   if (!ForwardRefMDNodes.empty())
     return Error(ForwardRefMDNodes.begin()->second.second,
                  "use of undefined metadata '!" +
-                 utostr(ForwardRefMDNodes.begin()->first) + "'");
+                 Twine(ForwardRefMDNodes.begin()->first) + "'");
 
 
   // Look for intrinsic functions and CallInst that need to be upgraded
@@ -195,7 +194,8 @@ bool LLParser::ParseTopLevelEntities() {
     // The Global variable production with no name can have many different
     // optional leading prefixes, the production is:
     // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
-    //               OptionalAddrSpace ('constant'|'global') ...
+    //               OptionalAddrSpace OptionalUnNammedAddr
+    //               ('constant'|'global') ...
     case lltok::kw_private:             // OptionalLinkage
     case lltok::kw_linker_private:      // OptionalLinkage
     case lltok::kw_linker_private_weak: // OptionalLinkage
@@ -317,7 +317,7 @@ bool LLParser::ParseUnnamedType() {
   if (Lex.getKind() == lltok::LocalVarID) {
     if (Lex.getUIntVal() != TypeID)
       return Error(Lex.getLoc(), "type expected to be numbered '%" +
-                   utostr(TypeID) + "'");
+                   Twine(TypeID) + "'");
     Lex.Lex(); // eat LocalVarID;
 
     if (ParseToken(lltok::equal, "expected '=' after name"))
@@ -444,7 +444,7 @@ bool LLParser::ParseUnnamedGlobal() {
   if (Lex.getKind() == lltok::GlobalID) {
     if (Lex.getUIntVal() != VarID)
       return Error(Lex.getLoc(), "variable expected to be numbered '%" +
-                   utostr(VarID) + "'");
+                   Twine(VarID) + "'");
     Lex.Lex(); // eat GlobalID;
 
     if (ParseToken(lltok::equal, "expected '=' after name"))
@@ -676,16 +676,16 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
 
   // Insert into the module, we know its name won't collide now.
   M->getAliasList().push_back(GA);
-  assert(GA->getNameStr() == Name && "Should not be a name conflict!");
+  assert(GA->getName() == Name && "Should not be a name conflict!");
 
   return false;
 }
 
 /// ParseGlobal
 ///   ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
-///       OptionalAddrSpace GlobalType Type Const
+///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
 ///   ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
-///       OptionalAddrSpace GlobalType Type Const
+///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
 ///
 /// Everything through visibility has been parsed already.
 ///
@@ -693,12 +693,15 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
                            unsigned Linkage, bool HasLinkage,
                            unsigned Visibility) {
   unsigned AddrSpace;
-  bool ThreadLocal, IsConstant;
+  bool ThreadLocal, IsConstant, UnnamedAddr;
+  LocTy UnnamedAddrLoc;
   LocTy TyLoc;
 
   PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
       ParseOptionalAddrSpace(AddrSpace) ||
+      ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+                         &UnnamedAddrLoc) ||
       ParseGlobalType(IsConstant) ||
       ParseType(Ty, TyLoc))
     return true;
@@ -756,6 +759,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
   GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
   GV->setThreadLocal(ThreadLocal);
+  GV->setUnnamedAddr(UnnamedAddr);
 
   // Parse attributes on the global.
   while (Lex.getKind() == lltok::comma) {
@@ -855,7 +859,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
   // If we have the value in the symbol table or fwd-ref table, return it.
   if (Val) {
     if (Val->getType() == Ty) return Val;
-    Error(Loc, "'@" + utostr(ID) + "' defined with type '" +
+    Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
           Val->getType()->getDescription() + "'");
     return 0;
   }
@@ -983,6 +987,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noredzone:       Attrs |= Attribute::NoRedZone; break;
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
+    case lltok::kw_hotpatch:        Attrs |= Attribute::Hotpatch; break;
 
     case lltok::kw_alignstack: {
       unsigned Alignment;
@@ -1084,6 +1089,8 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
 ///   ::= 'arm_aapcscc'
 ///   ::= 'arm_aapcs_vfpcc'
 ///   ::= 'msp430_intrcc'
+///   ::= 'ptx_kernel'
+///   ::= 'ptx_device'
 ///   ::= 'cc' UINT
 ///
 bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
@@ -1099,6 +1106,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
   case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
   case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
   case lltok::kw_msp430_intrcc:  CC = CallingConv::MSP430_INTR; break;
+  case lltok::kw_ptx_kernel:     CC = CallingConv::PTX_Kernel; break;
+  case lltok::kw_ptx_device:     CC = CallingConv::PTX_Device; break;
   case lltok::kw_cc: {
       unsigned ArbitraryCC;
       Lex.Lex();
@@ -1128,7 +1137,6 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
     Lex.Lex();
 
     MDNode *Node;
-    unsigned NodeID;
     SMLoc Loc = Lex.getLoc();
 
     if (ParseToken(lltok::exclaim, "expected '!' here"))
@@ -1145,6 +1153,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
       assert(ID.Kind == ValID::t_MDNode);
       Inst->setMetadata(MDK, ID.MDNodeVal);
     } else {
+      unsigned NodeID = 0;
       if (ParseMDNodeID(Node, NodeID))
         return true;
       if (Node) {
@@ -1196,8 +1205,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
     
     if (Lex.getKind() != lltok::kw_align)
       return Error(Lex.getLoc(), "expected metadata or 'align'");
-    
-    LocTy AlignLoc = Lex.getLoc();
+
     if (ParseOptionalAlignment(Alignment)) return true;
   }
 
@@ -1245,7 +1253,7 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
       AteExtraComma = true;
       return false;
     }
-    unsigned Idx;
+    unsigned Idx = 0;
     if (ParseUInt32(Idx)) return true;
     Indices.push_back(Idx);
   }
@@ -1778,7 +1786,7 @@ bool LLParser::PerFunctionState::FinishFunction() {
   if (!ForwardRefValIDs.empty())
     return P.Error(ForwardRefValIDs.begin()->second.second,
                    "use of undefined value '%" +
-                   utostr(ForwardRefValIDs.begin()->first) + "'");
+                   Twine(ForwardRefValIDs.begin()->first) + "'");
   return false;
 }
 
@@ -1846,9 +1854,9 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
   if (Val) {
     if (Val->getType() == Ty) return Val;
     if (Ty->isLabelTy())
-      P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block");
+      P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
     else
-      P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" +
+      P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
               Val->getType()->getDescription() + "'");
     return 0;
   }
@@ -1890,7 +1898,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
 
     if (unsigned(NameID) != NumberedVals.size())
       return P.Error(NameLoc, "instruction expected to be numbered '%" +
-                     utostr(NumberedVals.size()) + "'");
+                     Twine(NumberedVals.size()) + "'");
 
     std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
       ForwardRefValIDs.find(NameID);
@@ -1922,7 +1930,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
   // Set the name on the instruction.
   Inst->setName(NameStr);
 
-  if (Inst->getNameStr() != NameStr)
+  if (Inst->getName() != NameStr)
     return P.Error(NameLoc, "multiple definition of local value named '" +
                    NameStr + "'");
   return false;
@@ -2068,10 +2076,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     for (unsigned i = 1, e = Elts.size(); i != e; ++i)
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
-                     "vector element #" + utostr(i) +
+                     "vector element #" + Twine(i) +
                     " is not of type '" + Elts[0]->getType()->getDescription());
 
-    ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size());
+    ID.ConstantVal = ConstantVector::get(Elts);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2101,7 +2109,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
-                     "array element #" + utostr(i) +
+                     "array element #" + Twine(i) +
                      " is not of type '" +Elts[0]->getType()->getDescription());
     }
 
@@ -2278,7 +2286,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   case lltok::kw_fdiv:
   case lltok::kw_urem:
   case lltok::kw_srem:
-  case lltok::kw_frem: {
+  case lltok::kw_frem:
+  case lltok::kw_shl:
+  case lltok::kw_lshr:
+  case lltok::kw_ashr: {
     bool NUW = false;
     bool NSW = false;
     bool Exact = false;
@@ -2286,9 +2297,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     Constant *Val0, *Val1;
     Lex.Lex();
     LocTy ModifierLoc = Lex.getLoc();
-    if (Opc == Instruction::Add ||
-        Opc == Instruction::Sub ||
-        Opc == Instruction::Mul) {
+    if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+        Opc == Instruction::Mul || Opc == Instruction::Shl) {
       if (EatIfPresent(lltok::kw_nuw))
         NUW = true;
       if (EatIfPresent(lltok::kw_nsw)) {
@@ -2296,7 +2306,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         if (EatIfPresent(lltok::kw_nuw))
           NUW = true;
       }
-    } else if (Opc == Instruction::SDiv) {
+    } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+               Opc == Instruction::LShr || Opc == Instruction::AShr) {
       if (EatIfPresent(lltok::kw_exact))
         Exact = true;
     }
@@ -2323,6 +2334,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     case Instruction::SDiv:
     case Instruction::URem:
     case Instruction::SRem:
+    case Instruction::Shl:
+    case Instruction::AShr:
+    case Instruction::LShr:
       if (!Val0->getType()->isIntOrIntVectorTy())
         return Error(ID.Loc, "constexpr requires integer operands");
       break;
@@ -2339,7 +2353,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     unsigned Flags = 0;
     if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
     if (NSW)   Flags |= OverflowingBinaryOperator::NoSignedWrap;
-    if (Exact) Flags |= SDivOperator::IsExact;
+    if (Exact) Flags |= PossiblyExactOperator::IsExact;
     Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
     ID.ConstantVal = C;
     ID.Kind = ValID::t_Constant;
@@ -2347,9 +2361,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   }
 
   // Logical Operations
-  case lltok::kw_shl:
-  case lltok::kw_lshr:
-  case lltok::kw_ashr:
   case lltok::kw_and:
   case lltok::kw_or:
   case lltok::kw_xor: {
@@ -2572,7 +2583,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
   case ValID::t_APSInt:
     if (!Ty->isIntegerTy())
       return Error(ID.Loc, "integer constant must have integer type");
-    ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+    ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
     V = ConstantInt::get(Context, ID.APSIntVal);
     return false;
   case ValID::t_APFloat:
@@ -2654,7 +2665,7 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
 
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
-///       Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+///       OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
 ///       OptionalAlign OptGC
 bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Parse the linkage.
@@ -2714,7 +2725,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
     if (NameID != NumberedVals.size())
       return TokError("function expected to be numbered '%" +
-                      utostr(NumberedVals.size()) + "'");
+                      Twine(NumberedVals.size()) + "'");
   } else {
     return TokError("expected function name");
   }
@@ -2730,8 +2741,12 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   std::string Section;
   unsigned Alignment;
   std::string GC;
+  bool UnnamedAddr;
+  LocTy UnnamedAddrLoc;
 
   if (ParseArgumentList(ArgList, isVarArg, false) ||
+      ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+                         &UnnamedAddrLoc) ||
       ParseOptionalAttrs(FuncAttrs, 2) ||
       (EatIfPresent(lltok::kw_section) &&
        ParseStringConstant(Section)) ||
@@ -2821,7 +2836,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       Fn = cast<Function>(I->second.first);
       if (Fn->getType() != PFT)
         return Error(NameLoc, "type of definition and forward reference of '@" +
-                     utostr(NumberedVals.size()) +"' disagree");
+                     Twine(NumberedVals.size()) + "' disagree");
       ForwardRefValIDs.erase(I);
     }
   }
@@ -2838,6 +2853,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
   Fn->setCallingConv(CC);
   Fn->setAttributes(PAL);
+  Fn->setUnnamedAddr(UnnamedAddr);
   Fn->setAlignment(Alignment);
   Fn->setSection(Section);
   if (!GC.empty()) Fn->setGC(GC.c_str());
@@ -2855,7 +2871,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
     // Set the name, if it conflicted, it will be auto-renamed.
     ArgIt->setName(ArgList[i].Name);
 
-    if (ArgIt->getNameStr() != ArgList[i].Name)
+    if (ArgIt->getName() != ArgList[i].Name)
       return Error(ArgList[i].Loc, "redefinition of argument '%" +
                    ArgList[i].Name + "'");
   }
@@ -2989,55 +3005,38 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
-  case lltok::kw_mul: {
-    bool NUW = false;
-    bool NSW = false;
+  case lltok::kw_mul:
+  case lltok::kw_shl: {
     LocTy ModifierLoc = Lex.getLoc();
-    if (EatIfPresent(lltok::kw_nuw))
-      NUW = true;
-    if (EatIfPresent(lltok::kw_nsw)) {
-      NSW = true;
-      if (EatIfPresent(lltok::kw_nuw))
-        NUW = true;
-    }
-    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
-    if (!Result) {
-      if (!Inst->getType()->isIntOrIntVectorTy()) {
-        if (NUW)
-          return Error(ModifierLoc, "nuw only applies to integer operations");
-        if (NSW)
-          return Error(ModifierLoc, "nsw only applies to integer operations");
-      }
-      if (NUW)
-        cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
-      if (NSW)
-        cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
-    }
-    return Result;
+    bool NUW = EatIfPresent(lltok::kw_nuw);
+    bool NSW = EatIfPresent(lltok::kw_nsw);
+    if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+    
+    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    
+    if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+    if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+    return false;
   }
   case lltok::kw_fadd:
   case lltok::kw_fsub:
   case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
 
-  case lltok::kw_sdiv: {
-    bool Exact = false;
-    if (EatIfPresent(lltok::kw_exact))
-      Exact = true;
-    bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
-    if (!Result)
-      if (Exact)
-        cast<BinaryOperator>(Inst)->setIsExact(true);
-    return Result;
+  case lltok::kw_sdiv:
+  case lltok::kw_udiv:
+  case lltok::kw_lshr:
+  case lltok::kw_ashr: {
+    bool Exact = EatIfPresent(lltok::kw_exact);
+
+    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
+    return false;
   }
 
-  case lltok::kw_udiv:
   case lltok::kw_urem:
   case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
   case lltok::kw_fdiv:
   case lltok::kw_frem:   return ParseArithmetic(Inst, PFS, KeywordVal, 2);
-  case lltok::kw_shl:
-  case lltok::kw_lshr:
-  case lltok::kw_ashr:
   case lltok::kw_and:
   case lltok::kw_or:
   case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 404cec3ed7c7..93e7f778ebcb 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -142,10 +142,10 @@ namespace llvm {
 
   private:
 
-    bool Error(LocTy L, const std::string &Msg) const {
+    bool Error(LocTy L, const Twine &Msg) const {
       return Lex.Error(L, Msg);
     }
-    bool TokError(const std::string &Msg) const {
+    bool TokError(const Twine &Msg) const {
       return Error(Lex.getLoc(), Msg);
     }
 
@@ -162,10 +162,12 @@ namespace llvm {
       Lex.Lex();
       return true;
     }
-    bool ParseOptionalToken(lltok::Kind T, bool &Present) {
+    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
       if (Lex.getKind() != T) {
         Present = false;
       } else {
+        if (Loc)
+          *Loc = Lex.getLoc();
         Lex.Lex();
         Present = true;
       }
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 61f93a427498..576da191aecf 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -42,6 +42,7 @@ namespace lltok {
     kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
     kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
     kw_default, kw_hidden, kw_protected,
+    kw_unnamed_addr,
     kw_extern_weak,
     kw_external, kw_thread_local,
     kw_zeroinitializer,
@@ -72,6 +73,7 @@ namespace lltok {
     kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
     kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
     kw_msp430_intrcc,
+    kw_ptx_kernel, kw_ptx_device,
 
     kw_signext,
     kw_zeroext,
@@ -95,6 +97,7 @@ namespace lltok {
     kw_noredzone,
     kw_noimplicitfloat,
     kw_naked,
+    kw_hotpatch,
 
     kw_type,
     kw_opaque,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index e7cef9b5c3c5..59fb471f2b93 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 #include <cstring>
 using namespace llvm;
 
@@ -41,15 +42,14 @@ Module *llvm::ParseAssembly(MemoryBuffer *F,
 
 Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
                                 LLVMContext &Context) {
-  std::string ErrorStr;
-  MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
-  if (F == 0) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
     Err = SMDiagnostic(Filename,
-                       "Could not open input file: " + ErrorStr);
+                       "Could not open input file: " + ec.message());
     return 0;
   }
 
-  return ParseAssembly(F, 0, Err, Context);
+  return ParseAssembly(File.take(), 0, Err, Context);
 }
 
 Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
diff --git a/lib/Bitcode/CMakeLists.txt b/lib/Bitcode/CMakeLists.txt
new file mode 100644
index 000000000000..ff7e290cad1b
--- /dev/null
+++ b/lib/Bitcode/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(Reader)
+add_subdirectory(Writer)
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 830c79aa3b54..dbf8da027996 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -136,7 +136,6 @@ namespace {
   /// @brief A class for maintaining the slot number definition
   /// as a placeholder for the actual definition for forward constants defs.
   class ConstantPlaceHolder : public ConstantExpr {
-    ConstantPlaceHolder();                       // DO NOT IMPLEMENT
     void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
   public:
     // allocate space for exactly one operand
@@ -149,7 +148,7 @@ namespace {
     }
 
     /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
-    static inline bool classof(const ConstantPlaceHolder *) { return true; }
+    //static inline bool classof(const ConstantPlaceHolder *) { return true; }
     static bool classof(const Value *V) {
       return isa<ConstantExpr>(V) &&
              cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
@@ -163,7 +162,8 @@ namespace {
 
 // FIXME: can we inherit this from ConstantExpr?
 template <>
-struct OperandTraits<ConstantPlaceHolder> : public FixedNumOperandTraits<1> {
+struct OperandTraits<ConstantPlaceHolder> :
+  public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
 };
 }
 
@@ -298,7 +298,7 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
         NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
                                          UserCS->getType()->isPacked());
       } else if (isa<ConstantVector>(UserC)) {
-        NewC = ConstantVector::get(&NewOps[0], NewOps.size());
+        NewC = ConstantVector::get(NewOps);
       } else {
         assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
         NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0],
@@ -550,6 +550,9 @@ bool BitcodeReader::ParseTypeTable() {
     case bitc::TYPE_CODE_METADATA:  // METADATA
       ResultTy = Type::getMetadataTy(Context);
       break;
+    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
     case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
       if (Record.size() < 1)
         return Error("Invalid Integer type record");
@@ -794,7 +797,7 @@ bool BitcodeReader::ParseMetadata() {
       if (NextBitCode == bitc::METADATA_NAMED_NODE) {
         LLVM2_7MetadataDetected = true;
       } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
-        assert ( 0 && "Inavlid Named Metadata record");
+        assert ( 0 && "Invalid Named Metadata record");
 
       // Read named metadata elements.
       unsigned Size = Record.size();
@@ -832,7 +835,8 @@ bool BitcodeReader::ParseMetadata() {
       unsigned Size = Record.size();
       SmallVector<Value*, 8> Elts;
       for (unsigned i = 0; i != Size; i += 2) {
-        const Type *Ty = getTypeByID(Record[i], false);
+        const Type *Ty = getTypeByID(Record[i]);
+        if (!Ty) return Error("Invalid METADATA_NODE2 record");
         if (Ty->isMetadataTy())
           Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
         else if (!Ty->isVoidTy())
@@ -1081,13 +1085,17 @@ bool BitcodeReader::ParseConstants() {
         if (Record.size() >= 4) {
           if (Opc == Instruction::Add ||
               Opc == Instruction::Sub ||
-              Opc == Instruction::Mul) {
+              Opc == Instruction::Mul ||
+              Opc == Instruction::Shl) {
             if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
               Flags |= OverflowingBinaryOperator::NoSignedWrap;
             if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
               Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
-          } else if (Opc == Instruction::SDiv) {
-            if (Record[3] & (1 << bitc::SDIV_EXACT))
+          } else if (Opc == Instruction::SDiv ||
+                     Opc == Instruction::UDiv ||
+                     Opc == Instruction::LShr ||
+                     Opc == Instruction::AShr) {
+            if (Record[3] & (1 << bitc::PEO_EXACT))
               Flags |= SDivOperator::IsExact;
           }
         }
@@ -1167,7 +1175,8 @@ bool BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
       const VectorType *RTy = dyn_cast<VectorType>(CurTy);
-      const VectorType *OpTy = dyn_cast<VectorType>(getTypeByID(Record[0]));
+      const VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (Record.size() < 4 || RTy == 0 || OpTy == 0)
         return Error("Invalid CE_SHUFVEC_EX record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
@@ -1418,11 +1427,13 @@ bool BitcodeReader::ParseModule() {
       break;
     }
     // GLOBALVAR: [pointer type, isconst, initid,
-    //             linkage, alignment, section, visibility, threadlocal]
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
     case bitc::MODULE_CODE_GLOBALVAR: {
       if (Record.size() < 6)
         return Error("Invalid MODULE_CODE_GLOBALVAR record");
       const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
       if (!Ty->isPointerTy())
         return Error("Global not a pointer type!");
       unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
@@ -1444,6 +1455,10 @@ bool BitcodeReader::ParseModule() {
       if (Record.size() > 7)
         isThreadLocal = Record[7];
 
+      bool UnnamedAddr = false;
+      if (Record.size() > 8)
+        UnnamedAddr = Record[8];
+
       GlobalVariable *NewGV =
         new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
                            isThreadLocal, AddressSpace);
@@ -1452,6 +1467,7 @@ bool BitcodeReader::ParseModule() {
         NewGV->setSection(Section);
       NewGV->setVisibility(Visibility);
       NewGV->setThreadLocal(isThreadLocal);
+      NewGV->setUnnamedAddr(UnnamedAddr);
 
       ValueList.push_back(NewGV);
 
@@ -1461,11 +1477,12 @@ bool BitcodeReader::ParseModule() {
       break;
     }
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattr,
-    //             alignment, section, visibility, gc]
+    //             alignment, section, visibility, gc, unnamed_addr]
     case bitc::MODULE_CODE_FUNCTION: {
       if (Record.size() < 8)
         return Error("Invalid MODULE_CODE_FUNCTION record");
       const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
       if (!Ty->isPointerTy())
         return Error("Function not a pointer type!");
       const FunctionType *FTy =
@@ -1493,6 +1510,10 @@ bool BitcodeReader::ParseModule() {
           return Error("Invalid GC ID");
         Func->setGC(GCTable[Record[8]-1].c_str());
       }
+      bool UnnamedAddr = false;
+      if (Record.size() > 9)
+        UnnamedAddr = Record[9];
+      Func->setUnnamedAddr(UnnamedAddr);
       ValueList.push_back(Func);
 
       // If this is a function with a body, remember the prototype we are
@@ -1507,6 +1528,7 @@ bool BitcodeReader::ParseModule() {
       if (Record.size() < 3)
         return Error("Invalid MODULE_ALIAS record");
       const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_ALIAS record");
       if (!Ty->isPointerTy())
         return Error("Function not a pointer type!");
 
@@ -1598,6 +1620,112 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
   return false;
 }
 
+bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this module.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of module block");
+
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #0 is supported so far.
+      if (Record[0] != 0)
+        return Error("Unknown bitstream version!");
+      break;
+    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_TRIPLE record");
+      Triple = S;
+      break;
+    }
+    }
+    Record.clear();
+  }
+
+  return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseTriple(std::string &Triple) {
+  if (Buffer->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+
+  StreamFile.init(BufPtr, BufEnd);
+  Stream.init(StreamFile);
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return Error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+
+    if (Code != bitc::ENTER_SUBBLOCK)
+      return Error("Invalid record at top-level");
+
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    // We only know the MODULE subblock ID.
+    switch (BlockID) {
+    case bitc::MODULE_BLOCK_ID:
+      if (ParseModuleTriple(Triple))
+        return true;
+      break;
+    default:
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      break;
+    }
+  }
+
+  return false;
+}
+
 /// ParseMetadataAttachment - Parse metadata attachments.
 bool BitcodeReader::ParseMetadataAttachment() {
   if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
@@ -1776,13 +1904,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (OpNum < Record.size()) {
         if (Opc == Instruction::Add ||
             Opc == Instruction::Sub ||
-            Opc == Instruction::Mul) {
+            Opc == Instruction::Mul ||
+            Opc == Instruction::Shl) {
           if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
             cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
           if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
             cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
-        } else if (Opc == Instruction::SDiv) {
-          if (Record[OpNum] & (1 << bitc::SDIV_EXACT))
+        } else if (Opc == Instruction::SDiv ||
+                   Opc == Instruction::UDiv ||
+                   Opc == Instruction::LShr ||
+                   Opc == Instruction::AShr) {
+          if (Record[OpNum] & (1 << bitc::PEO_EXACT))
             cast<BinaryOperator>(I)->setIsExact(true);
         }
       }
@@ -2535,7 +2667,24 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
   // Read in the entire module, and destroy the BitcodeReader.
   if (M->MaterializeAllPermanently(ErrMsg)) {
     delete M;
-    return NULL;
+    return 0;
   }
+
   return M;
 }
+
+std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+                                         LLVMContext& Context,
+                                         std::string *ErrMsg) {
+  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  // Don't let the BitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(false);
+
+  std::string Triple("");
+  if (R->ParseTriple(Triple))
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+  delete R;
+  return Triple;
+}
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 053121bdad6e..f8fc079c73d9 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -212,6 +212,10 @@ public:
   /// @brief Main interface to parsing a bitcode buffer.
   /// @returns true if an error occurred.
   bool ParseBitcodeInto(Module *M);
+
+  /// @brief Cheap mechanism to just extract module triple
+  /// @returns true if an error occurred.
+  bool ParseTriple(std::string &Triple);
 private:
   const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
   Value *getFnValueByID(unsigned ID, const Type *Ty) {
@@ -270,6 +274,7 @@ private:
   bool ResolveGlobalAndAliasInits();
   bool ParseMetadata();
   bool ParseMetadataAttachment();
+  bool ParseModuleTriple(std::string &Triple);
 };
   
 } // End llvm namespace
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7b6fc6cd928d..f8ef8c668c47 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -26,7 +26,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Program.h"
+#include <cctype>
 using namespace llvm;
 
 /// These are manifest constants used by the bitcode writer. They do not need to
@@ -211,6 +212,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     case Type::LabelTyID:  Code = bitc::TYPE_CODE_LABEL;  break;
     case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break;
     case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+    case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
     case Type::IntegerTyID:
       // INTEGER: [width]
       Code = bitc::TYPE_CODE_INTEGER;
@@ -402,7 +404,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     unsigned AbbrevToUse = 0;
 
     // GLOBALVAR: [type, isconst, initid,
-    //             linkage, alignment, section, visibility, threadlocal]
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
     Vals.push_back(VE.getTypeID(GV->getType()));
     Vals.push_back(GV->isConstant());
     Vals.push_back(GV->isDeclaration() ? 0 :
@@ -411,9 +414,11 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(Log2_32(GV->getAlignment())+1);
     Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
     if (GV->isThreadLocal() ||
-        GV->getVisibility() != GlobalValue::DefaultVisibility) {
+        GV->getVisibility() != GlobalValue::DefaultVisibility ||
+        GV->hasUnnamedAddr()) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(GV->isThreadLocal());
+      Vals.push_back(GV->hasUnnamedAddr());
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }
@@ -425,7 +430,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   // Emit the function proto information.
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
     // FUNCTION:  [type, callingconv, isproto, paramattr,
-    //             linkage, alignment, section, visibility, gc]
+    //             linkage, alignment, section, visibility, gc, unnamed_addr]
     Vals.push_back(VE.getTypeID(F->getType()));
     Vals.push_back(F->getCallingConv());
     Vals.push_back(F->isDeclaration());
@@ -435,6 +440,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
     Vals.push_back(getEncodedVisibility(F));
     Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+    Vals.push_back(F->hasUnnamedAddr());
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -464,9 +470,10 @@ static uint64_t GetOptimizationFlags(const Value *V) {
       Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
     if (OBO->hasNoUnsignedWrap())
       Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
-  } else if (const SDivOperator *Div = dyn_cast<SDivOperator>(V)) {
-    if (Div->isExact())
-      Flags |= 1 << bitc::SDIV_EXACT;
+  } else if (const PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(V)) {
+    if (PEO->isExact())
+      Flags |= 1 << bitc::PEO_EXACT;
   }
 
   return Flags;
@@ -1641,9 +1648,12 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
 /// WriteBitcodeToStream - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
-  // If this is darwin, emit a file header and trailer if needed.
-  bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
-  if (isDarwin)
+  // If this is darwin or another generic macho target, emit a file header and
+  // trailer if needed.
+  bool isMacho =
+    M->getTargetTriple().find("-darwin") != std::string::npos ||
+    M->getTargetTriple().find("-macho") != std::string::npos;
+  if (isMacho)
     EmitDarwinBCHeader(Stream, M->getTargetTriple());
 
   // Emit the file header.
@@ -1657,6 +1667,6 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
   // Emit the module.
   WriteModule(M, Stream);
 
-  if (isDarwin)
+  if (isMacho)
     EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
 }
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
new file mode 100644
index 000000000000..e2838c373a39
--- /dev/null
+++ b/lib/CMakeLists.txt
@@ -0,0 +1,14 @@
+# `Support' library is added on the top-level CMakeLists.txt
+
+add_subdirectory(VMCore)
+add_subdirectory(CodeGen)
+add_subdirectory(Bitcode)
+add_subdirectory(Transforms)
+add_subdirectory(Linker)
+add_subdirectory(Analysis)
+add_subdirectory(MC)
+add_subdirectory(Object)
+add_subdirectory(ExecutionEngine)
+add_subdirectory(Target)
+add_subdirectory(AsmParser)
+add_subdirectory(Archive)
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5a634d6ccb01..b520d8fcedc0 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -155,16 +155,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
          E = MRI.liveout_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      State->UnionGroups(Reg, 0);
-      KillIndices[Reg] = BB->size();
-      DefIndices[Reg] = ~0u;
-      // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        unsigned AliasReg = *Alias;
-        State->UnionGroups(AliasReg, 0);
-        KillIndices[AliasReg] = BB->size();
-        DefIndices[AliasReg] = ~0u;
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
       }
     }
   }
@@ -176,16 +171,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          SE = BB->succ_end(); SI != SE; ++SI)
     for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
            E = (*SI)->livein_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      State->UnionGroups(Reg, 0);
-      KillIndices[Reg] = BB->size();
-      DefIndices[Reg] = ~0u;
-      // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        unsigned AliasReg = *Alias;
-        State->UnionGroups(AliasReg, 0);
-        KillIndices[AliasReg] = BB->size();
-        DefIndices[AliasReg] = ~0u;
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
       }
     }
 
@@ -197,12 +187,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
-    State->UnionGroups(Reg, 0);
-    KillIndices[Reg] = BB->size();
-    DefIndices[Reg] = ~0u;
-    // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-      unsigned AliasReg = *Alias;
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias) {
       State->UnionGroups(AliasReg, 0);
       KillIndices[AliasReg] = BB->size();
       DefIndices[AliasReg] = ~0u;
@@ -435,12 +421,9 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
       continue;
 
     // Update def for Reg and aliases.
-    DefIndices[Reg] = Count;
-    for (const unsigned *Alias = TRI->getAliasSet(Reg);
-         *Alias; ++Alias) {
-      unsigned AliasReg = *Alias;
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias)
       DefIndices[AliasReg] = Count;
-    }
   }
 }
 
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..20c7625f3253
--- /dev/null
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,68 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+                                 const VirtRegMap &VRM,
+                                 const BitVector &ReservedRegs)
+  : Pos(0), Reserved(ReservedRegs) {
+  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+  std::pair<unsigned, unsigned> HintPair =
+    VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+  // HintPair.second is a register, phys or virt.
+  Hint = HintPair.second;
+
+  // Translate to physreg, or 0 if not assigned yet.
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = VRM.getPhys(Hint);
+
+  // The remaining allocation order may depend on the hint.
+  tie(Begin, End) = VRM.getTargetRegInfo()
+        .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction());
+
+  // Target-dependent hints require resolution.
+  if (HintPair.first)
+    Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint,
+                                                      VRM.getMachineFunction());
+
+  // The hint must be a valid physreg for allocation.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || ReservedRegs.test(Hint)))
+    Hint = 0;
+}
+
+unsigned AllocationOrder::next() {
+  // First take the hint.
+  if (!Pos) {
+    Pos = Begin;
+    if (Hint)
+      return Hint;
+  }
+  // Then look at the order from TRI.
+  while(Pos != End) {
+    unsigned Reg = *Pos++;
+    if (Reg != Hint && !Reserved.test(Reg))
+      return Reg;
+  }
+  return 0;
+}
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..3db4b6925fca
--- /dev/null
+++ b/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class BitVector;
+class VirtRegMap;
+
+class AllocationOrder {
+  const unsigned *Begin;
+  const unsigned *End;
+  const unsigned *Pos;
+  const BitVector &Reserved;
+  unsigned Hint;
+public:
+
+  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+  /// @param VirtReg      Virtual register to allocate for.
+  /// @param VRM          Virtual register map for function.
+  /// @param ReservedRegs Set of reserved registers as returned by
+  ///        TargetRegisterInfo::getReservedRegs().
+  AllocationOrder(unsigned VirtReg,
+                  const VirtRegMap &VRM,
+                  const BitVector &ReservedRegs);
+
+  /// next - Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next again after it returned 0.
+  /// It will keep returning 0 until rewind() is called.
+  unsigned next();
+
+  /// rewind - Start over from the beginning.
+  void rewind() { Pos = 0; }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index e3dd646c952e..36638c36de67 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -19,6 +19,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -30,7 +31,7 @@ using namespace llvm;
 /// of insertvalue or extractvalue indices that identify a member, return
 /// the linearized index of the start of the member.
 ///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
                                   const unsigned *Indices,
                                   const unsigned *IndicesEnd,
                                   unsigned CurIndex) {
@@ -45,8 +46,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
                                       EE = STy->element_end();
         EI != EE; ++EI) {
       if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+        return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
     }
     return CurIndex;
   }
@@ -55,8 +56,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
     const Type *EltTy = ATy->getElementType();
     for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
       if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+        return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
     }
     return CurIndex;
   }
@@ -125,7 +126,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
 /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
 /// processed uses a memory 'm' constraint.
 bool
-llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
                                 const TargetLowering &TLI) {
   for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
     InlineAsm::ConstraintInfo &CI = CInfos[i];
@@ -283,3 +284,20 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   return true;
 }
 
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                const TargetLowering &TLI) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (CallerRetAttr & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Check if the only use is a function return node.
+  return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d358ab20ffc5..43e8990a9da1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -178,16 +179,24 @@ bool AsmPrinter::doInitialization(Module &M) {
   if (!M.getModuleInlineAsm().empty()) {
     OutStreamer.AddComment("Start of file scope inline assembly");
     OutStreamer.AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n");
     OutStreamer.AddComment("End of file scope inline assembly");
     OutStreamer.AddBlankLine();
   }
 
   if (MAI->doesSupportDebugInformation())
     DD = new DwarfDebug(this, &M);
-    
+
   if (MAI->doesSupportExceptionHandling())
-    DE = new DwarfException(this);
+    switch (MAI->getExceptionHandlingType()) {
+    default:
+    case ExceptionHandling::DwarfTable:
+      DE = new DwarfTableException(this);
+      break;
+    case ExceptionHandling::DwarfCFI:
+      DE = new DwarfCFIException(this);
+      break;
+    }
 
   return false;
 }
@@ -282,8 +291,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     
     // Handle common symbols.
     if (GVKind.isCommon()) {
+      unsigned Align = 1 << AlignLog;
+      if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+        Align = 0;
+          
       // .comm _foo, 42, 4
-      OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+      OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
       return;
     }
     
@@ -301,11 +314,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
       return;
     }
+
+    unsigned Align = 1 << AlignLog;
+    if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+      Align = 0;
     
     // .local _foo
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
     // .comm _foo, 42, 4
-    OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+    OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
     return;
   }
   
@@ -327,6 +344,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // Handle thread local data for mach-o which requires us to output an
   // additional structure of data and mangle the original symbol so that we
   // can reference it later.
+  //
+  // TODO: This should become an "emit thread local global" method on TLOF.
+  // All of this macho specific stuff should be sunk down into TLOFMachO and
+  // stuff like "TLSExtraDataSection" should no longer be part of the parent
+  // TLOF class.  This will also make it more obvious that stuff like
+  // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+  // specific code.
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
     MCSymbol *MangSym = 
@@ -623,7 +647,7 @@ void AsmPrinter::EmitFunctionBody() {
 
       if (ShouldPrintDebugScopes) {
         NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-        DD->beginScope(II);
+        DD->beginInstruction(II);
       }
       
       if (isVerbose())
@@ -657,7 +681,7 @@ void AsmPrinter::EmitFunctionBody() {
       
       if (ShouldPrintDebugScopes) {
         NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-        DD->endScope(II);
+        DD->endInstruction(II);
       }
     }
   }
@@ -729,7 +753,20 @@ bool AsmPrinter::doFinalization(Module &M) {
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I)
     EmitGlobalVariable(I);
-  
+
+  // Emit visibility info for declarations
+  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    const Function &F = *I;
+    if (!F.isDeclaration())
+      continue;
+    GlobalValue::VisibilityTypes V = F.getVisibility();
+    if (V == GlobalValue::DefaultVisibility)
+      continue;
+
+    MCSymbol *Name = Mang->getSymbol(&F);
+    EmitVisibility(Name, V);
+  }
+
   // Finalize debug and EH information.
   if (DE) {
     {
@@ -905,14 +942,6 @@ void AsmPrinter::EmitConstantPool() {
 
       const Type *Ty = CPE.getType();
       Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
-
-      // Emit the label with a comment on it.
-      if (isVerbose()) {
-        OutStreamer.GetCommentOS() << "constant pool ";
-        WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(),
-                          MF->getFunction()->getParent());
-        OutStreamer.GetCommentOS() << '\n';
-      }
       OutStreamer.EmitLabel(GetCPISymbol(CPI));
 
       if (CPE.isMachineConstantPoolEntry())
@@ -983,7 +1012,7 @@ void AsmPrinter::EmitJumpTableInfo() {
       }
     }          
     
-    // On some targets (e.g. Darwin) we want to emit two consequtive labels
+    // On some targets (e.g. Darwin) we want to emit two consecutive labels
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
@@ -1004,6 +1033,7 @@ void AsmPrinter::EmitJumpTableInfo() {
 void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                     const MachineBasicBlock *MBB,
                                     unsigned UID) const {
+  assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
   const MCExpr *Value = 0;
   switch (MJTI->getEntryKind()) {
   case MachineJumpTableInfo::EK_Inline:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ce4519c541e3..98a1bf2f1ce4 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -19,7 +19,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -36,9 +36,8 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
     
-  if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) {
-    // FIXME: MCize.
-    OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
+  if (MAI->hasLEB128()) {
+    OutStreamer.EmitSLEB128IntValue(Value);
     return;
   }
 
@@ -60,10 +59,10 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
                              unsigned PadTo) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
- 
-  if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) {
-    // FIXME: MCize.
-    OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
+
+  // FIXME: Should we add a PadTo option to the streamer?
+  if (MAI->hasLEB128() && PadTo == 0) {
+    OutStreamer.EmitULEB128IntValue(Value); 
     return;
   }
   
@@ -157,7 +156,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
   
   const MCExpr *Exp =
     TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
-  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+  OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
 }
 
 void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
@@ -215,8 +214,8 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
   
   int stackGrowth = TM.getTargetData()->getPointerSize();
-  if (TM.getFrameInfo()->getStackGrowthDirection() !=
-      TargetFrameInfo::StackGrowsUp)
+  if (TM.getFrameLowering()->getStackGrowthDirection() !=
+      TargetFrameLowering::StackGrowsUp)
     stackGrowth *= -1;
   
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
@@ -277,3 +276,43 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
     }
   }
 }
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  int stackGrowth = TM.getTargetData()->getPointerSize();
+  if (TM.getFrameLowering()->getStackGrowthDirection() !=
+      TargetFrameLowering::StackGrowsUp)
+    stackGrowth *= -1;
+
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+      } else {
+        assert("Machine move not supported yet");
+        // Reg + Offset
+      }
+    } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+    } else {
+      assert(!Dst.isReg() && "Machine move not supported yet.");
+      OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+                                Dst.getOffset());
+    }
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index df0316814c08..c6166e2365a5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -34,15 +34,47 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+namespace {
+  struct SrcMgrDiagInfo {
+    const MDNode *LocInfo;
+    LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+    void *DiagContext;
+  };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it.  diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+  SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+  assert(DiagInfo && "Diagnostic context not passed down?");
+  
+  // If the inline asm had metadata associated with it, pull out a location
+  // cookie corresponding to which line the error occurred on.
+  unsigned LocCookie = 0;
+  if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+    unsigned ErrorLine = Diag.getLineNo()-1;
+    if (ErrorLine >= LocInfo->getNumOperands())
+      ErrorLine = 0;
+    
+    if (LocInfo->getNumOperands() != 0)
+      if (const ConstantInt *CI =
+          dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+        LocCookie = CI->getZExtValue();
+  }
+  
+  DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
   assert(!Str.empty() && "Can't emit empty inline asm block");
-  
+
   // Remember if the buffer is nul terminated or not so we can avoid a copy.
   bool isNullTerminated = Str.back() == 0;
   if (isNullTerminated)
     Str = Str.substr(0, Str.size()-1);
-  
+
   // If the output streamer is actually a .s file, just emit the blob textually.
   // This is useful in case the asm parser doesn't handle something but the
   // system assembler does.
@@ -50,18 +82,23 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
     OutStreamer.EmitRawText(Str);
     return;
   }
-  
+
   SourceMgr SrcMgr;
-  
+  SrcMgrDiagInfo DiagInfo;
+
   // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
   LLVMContext &LLVMCtx = MMI->getModule()->getContext();
   bool HasDiagHandler = false;
-  if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) {
-    SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler,
-                          LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie);
+  if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+    // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+    // to be invoked, getting DiagInfo passed into it.
+    DiagInfo.LocInfo = LocMDNode;
+    DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+    DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+    SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
     HasDiagHandler = true;
   }
-  
+
   MemoryBuffer *Buffer;
   if (isNullTerminated)
     Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
@@ -70,7 +107,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
 
   // Tell SrcMgr about this buffer, it takes ownership of the buffer.
   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
-  
+
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
                                                   OutContext, OutStreamer,
                                                   *MAI));
@@ -92,15 +129,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
 /// instruction that is an inline asm.
 void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
-  
+
   unsigned NumOperands = MI->getNumOperands();
-  
+
   // Count the number of register definitions to find the asm string.
   unsigned NumDefs = 0;
   for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
        ++NumDefs)
     assert(NumDefs != NumOperands-2 && "No asm string?");
-  
+
   assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
 
   // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
@@ -128,22 +165,23 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   // Get the !srcloc metadata node if we have it, and decode the loc cookie from
   // it.
   unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
   for (unsigned i = MI->getNumOperands(); i != 0; --i) {
-    if (MI->getOperand(i-1).isMetadata())
-      if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
-        if (SrcLoc->getNumOperands() != 0)
-          if (const ConstantInt *CI =
-              dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
-            LocCookie = CI->getZExtValue();
-            break;
-          }
+    if (MI->getOperand(i-1).isMetadata() &&
+        (LocMD = MI->getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
   }
-  
+
   // Emit the inline asm to a temporary string so we can emit it through
   // EmitInlineAsm.
   SmallString<256> StringData;
   raw_svector_ostream OS(StringData);
-  
+
   OS << '\t';
 
   // The variant of the current asmprinter.
@@ -151,7 +189,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
 
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
-  
+
   while (*LastEmitted) {
     switch (*LastEmitted) {
     default: {
@@ -199,18 +237,18 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         ++LastEmitted;  // consume ')' character.
         if (CurVariant == -1)
           OS << '}';     // this is gcc's behavior for } outside a variant
-        else 
+        else
           CurVariant = -1;
         break;
       }
       if (Done) break;
-      
+
       bool HasCurlyBraces = false;
       if (*LastEmitted == '{') {     // ${variable}
         ++LastEmitted;               // Consume '{' character.
         HasCurlyBraces = true;
       }
-      
+
       // If we have ${:foo}, then this is not a real operand reference, it is a
       // "magic" string reference, just like in .td files.  Arrange to call
       // PrintSpecial.
@@ -221,25 +259,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         if (StrEnd == 0)
           report_fatal_error("Unterminated ${:foo} operand in inline asm"
                              " string: '" + Twine(AsmStr) + "'");
-        
+
         std::string Val(StrStart, StrEnd);
         PrintSpecial(MI, OS, Val.c_str());
         LastEmitted = StrEnd+1;
         break;
       }
-            
+
       const char *IDStart = LastEmitted;
       const char *IDEnd = IDStart;
-      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;      
-      
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
       unsigned Val;
       if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
         report_fatal_error("Bad $ operand number in inline asm string: '" +
                            Twine(AsmStr) + "'");
       LastEmitted = IDEnd;
-      
+
       char Modifier[2] = { 0, 0 };
-      
+
       if (HasCurlyBraces) {
         // If we have curly braces, check for a modifier character.  This
         // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
@@ -248,25 +286,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
           if (*LastEmitted == 0)
             report_fatal_error("Bad ${:} expression in inline asm string: '" +
                                Twine(AsmStr) + "'");
-          
+
           Modifier[0] = *LastEmitted;
           ++LastEmitted;    // Consume modifier character.
         }
-        
+
         if (*LastEmitted != '}')
           report_fatal_error("Bad ${} expression in inline asm string: '" +
                              Twine(AsmStr) + "'");
         ++LastEmitted;    // Consume '}' character.
       }
-      
+
       if (Val >= NumOperands-1)
         report_fatal_error("Invalid $ operand number in inline asm string: '" +
                            Twine(AsmStr) + "'");
-      
+
       // Okay, we finally have a value number.  Ask the target to print this
       // operand!
       if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
-        unsigned OpNo = 2;
+        unsigned OpNo = InlineAsm::MIOp_FirstOperand;
 
         bool Error = false;
 
@@ -310,8 +348,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     }
   }
   OS << '\n' << (char)0;  // null terminate string.
-  EmitInlineAsm(OS.str(), LocCookie);
-  
+  EmitInlineAsm(OS.str(), LocMD);
+
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use EmitRawText.
   if (OutStreamer.hasRawTextSupport())
@@ -335,7 +373,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
   } else if (!strcmp(Code, "uid")) {
     // Comparing the address of MI isn't sufficient, because machineinstrs may
     // be allocated to the same address across functions.
-    
+
     // If this is a new LastFn instruction, bump the counter.
     if (LastMI != MI || LastFn != getFunctionNumber()) {
       ++Counter;
@@ -349,7 +387,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
     Msg << "Unknown special formatter '" << Code
          << "' for machine instr: " << *MI;
     report_fatal_error(Msg.str());
-  }    
+  }
 }
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index ca8b8436c11f..306efade7d92 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -3,9 +3,10 @@ add_llvm_library(LLVMAsmPrinter
   AsmPrinterDwarf.cpp
   AsmPrinterInlineAsm.cpp
   DIE.cpp
+  DwarfCFIException.cpp
   DwarfDebug.cpp
   DwarfException.cpp
+  DwarfTableException.cpp
   OcamlGCPrinter.cpp
   )
 
-target_link_libraries (LLVMAsmPrinter LLVMMCParser)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..68be2eed8f0e
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,138 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  if (!shouldEmitTableModule)
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  // Begin eh frame section.
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  // Emit references to all used personality functions
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+  for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
+    Asm->EmitReference(Personalities[i], PerEncoding);
+  }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitTable = shouldEmitMoves = false;
+
+  // If any landing pads survive, we need an EH table.
+  shouldEmitTable = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+
+  shouldEmitTableModule |= shouldEmitTable;
+
+  if (shouldEmitMoves) {
+    const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
+    Asm->OutStreamer.EmitCFIStartProc();
+
+    // Indicate locations of general callee saved registers in frame.
+    std::vector<MachineMove> Moves;
+    TFL->getInitialFrameState(Moves);
+    Asm->EmitCFIFrameMoves(Moves);
+    Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
+  }
+
+  if (!shouldEmitTable)
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Provide LSDA information.
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  if (LSDAEncoding != dwarf::DW_EH_PE_omit)
+    Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+                                                    Asm->getFunctionNumber()),
+                                 LSDAEncoding);
+
+  // Indicate personality routine, if any.
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  if (PerEncoding != dwarf::DW_EH_PE_omit &&
+      MMI->getPersonalities()[MMI->getPersonalityIndex()])
+    Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
+                                                    MMI->getPersonalityIndex()),
+                                        PerEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  if (shouldEmitMoves)
+    Asm->OutStreamer.EmitCFIEndProc();
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (shouldEmitTable)
+    EmitExceptionTable();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c886a5ecc615..5106d5778c29 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,6 +16,7 @@
 #include "DIE.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
+#include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -24,12 +25,13 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
@@ -38,7 +40,7 @@
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Timer.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 using namespace llvm;
 
 static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::desc("Make an absense of debug location information explicit."),
      cl::init(false));
 
+#ifndef NDEBUG
+STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number");
+#endif
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -507,8 +513,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
     return;
 
   unsigned Line = V.getLineNumber();
-  unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(),
-                                        V.getContext().getFilename());
+  if (Line == 0)
+    return;
+  unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -522,8 +529,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
     return;
 
   unsigned Line = G.getLineNumber();
-  unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(),
-                                        G.getContext().getFilename());
+  if (Line == 0)
+    return;
+  unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -542,8 +550,7 @@ void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
   unsigned Line = SP.getLineNumber();
   if (!SP.getContext().Verify())
     return;
-  unsigned FileID = GetOrCreateSourceID(SP.getDirectory(),
-                                        SP.getFilename());
+  unsigned FileID = GetOrCreateSourceID(SP.getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -557,10 +564,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
     return;
 
   unsigned Line = Ty.getLineNumber();
-  if (!Ty.getContext().Verify())
+  if (Line == 0 || !Ty.getContext().Verify())
     return;
-  unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(),
-                                        Ty.getContext().getFilename());
+  unsigned FileID = GetOrCreateSourceID(Ty.getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -574,10 +580,11 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
     return;
 
   unsigned Line = NS.getLineNumber();
+  if (Line == 0)
+    return;
   StringRef FN = NS.getFilename();
-  StringRef Dir = NS.getDirectory();
 
-  unsigned FileID = GetOrCreateSourceID(Dir, FN);
+  unsigned FileID = GetOrCreateSourceID(FN);
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -588,8 +595,8 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
 void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
   MachineLocation Location;
   unsigned FrameReg;
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
   Location.set(FrameReg, Offset);
 
   if (DV->variableHasComplexAddress())
@@ -620,8 +627,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
     if (Reg < 32) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     } else {
-      Reg = Reg - dwarf::DW_OP_reg0;
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
       addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
@@ -760,8 +766,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
     if (Reg < 32)
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     else {
-      Reg = Reg - dwarf::DW_OP_reg0;
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
       addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
@@ -812,6 +817,15 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
+  if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
+      && Location.getOffset()) {
+    // If variable offset is based in frame register then use fbreg.
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+    addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addBlock(Die, Attribute, 0, Block);
+    return;
+  }
+
   if (Location.isReg()) {
     if (Reg < 32) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
@@ -834,35 +848,28 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
 }
 
 /// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS,
-                                    const MachineOperand &MO) {
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
   assert (MO.isReg() && "Invalid machine operand!");
   if (!MO.getReg())
     return false;
   MachineLocation Location;
   Location.set(MO.getReg());
   addAddress(Die, dwarf::DW_AT_location, Location);
-  if (VS)
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS,
-                                  const MachineOperand &MO) {
+bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
   assert (MO.isImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   unsigned Imm = MO.getImm();
   addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  if (VS)
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
-                                    const MachineOperand &MO) {
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   assert (MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -883,11 +890,42 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
             (unsigned char)0xFF & FltPtr[Start]);
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  if (VS)
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
+                                  bool Unsigned) {
+  if (CI->getBitWidth() <= 64) {
+    if (Unsigned)
+      addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+              CI->getZExtValue());
+    else
+      addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+              CI->getSExtValue());
+    return true;
+  }
+
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  // Get the raw data form of the large APInt.
+  const APInt Val = CI->getValue();
+  const char *Ptr = (const char*)Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & Ptr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
 
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
@@ -898,8 +936,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
     DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
     ContextDIE->addChild(Die);
   } else if (Context.isSubprogram()) {
-    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context),
-                                          /*MakeDecl=*/false);
+    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
     ContextDIE->addChild(Die);
   } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
     ContextDIE->addChild(Die);
@@ -1033,16 +1070,23 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     DIDescriptor RTy = Elements.getElement(0);
     addType(&Buffer, DIType(RTy));
 
-    // Add prototype flag.
-    addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-
+    bool isPrototyped = true;
     // Add arguments.
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
-      DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      addType(Arg, DIType(Ty));
-      Buffer.addChild(Arg);
+      if (Ty.isUnspecifiedParameter()) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+        Buffer.addChild(Arg);
+        isPrototyped = false;
+      } else {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        addType(Arg, DIType(Ty));
+        Buffer.addChild(Arg);
+      }
     }
+    // Add prototype flag.
+    if (isPrototyped)
+      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
   }
     break;
   case dwarf::DW_TAG_structure_type:
@@ -1060,8 +1104,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     for (unsigned i = 0; i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
       DIE *ElemDie = NULL;
-      if (Element.isSubprogram())
+      if (Element.isSubprogram()) {
+        DISubprogram SP(Element);
         ElemDie = createSubprogramDIE(DISubprogram(Element));
+        if (SP.isProtected())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_protected);
+        else if (SP.isPrivate())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_private);
+        else 
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+        if (SP.isExplicit())
+          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+      }
       else if (Element.isVariable()) {
         DIVariable DV(Element);
         ElemDie = new DIE(dwarf::DW_TAG_variable);
@@ -1094,6 +1151,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       DIDescriptor Context = CTy.getContext();
       addToContextOwner(&Buffer, Context);
     }
+
+    if (Tag == dwarf::DW_TAG_class_type) {
+      DIArray TParams = CTy.getTemplateParams();
+      unsigned N = TParams.getNumElements();
+      // Add template parameters.
+      for (unsigned i = 0; i < N; ++i) {
+        DIDescriptor Element = TParams.getElement(i);
+        if (Element.isTemplateTypeParameter())
+          Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+                            DITemplateTypeParameter(Element)));
+        else if (Element.isTemplateValueParameter())
+          Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+                            DITemplateValueParameter(Element)));
+      }
+    }
     break;
   }
   default:
@@ -1124,6 +1196,38 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   }
 }
 
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateTypeParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+  CompileUnit *TypeCU = getCompileUnit(TP);
+  DIE *ParamDIE = TypeCU->getDIE(TP);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+  addType(ParamDIE, TP.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateValueParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+  CompileUnit *TVCU = getCompileUnit(TPV);
+  DIE *ParamDIE = TVCU->getDIE(TPV);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  addType(ParamDIE, TPV.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+          TPV.getValue());
+  return ParamDIE;
+}
+
 /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
 void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   int64_t L = SR.getLo();
@@ -1258,7 +1362,8 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
   else if (DT.isPrivate())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
             dwarf::DW_ACCESS_private);
-  else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+  // Otherwise C++ member and base classes are considered public.
+  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
@@ -1268,7 +1373,7 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
 }
 
 /// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
   CompileUnit *SPCU = getCompileUnit(SP);
   DIE *SPDie = SPCU->getDIE(SP);
   if (SPDie)
@@ -1286,10 +1391,7 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
 
   addSourceLine(SPDie, SP);
 
-  // Add prototyped tag, if C or ObjC.
-  unsigned Lang = SP.getCompileUnit().getLanguage();
-  if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
-      Lang == dwarf::DW_LANG_ObjC)
+  if (SP.isPrototyped()) 
     addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
@@ -1307,13 +1409,13 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+    addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
     ContainingTypeMap.insert(std::make_pair(SPDie,
                                             SP.getContainingType()));
   }
 
-  if (MakeDecl || !SP.isDefinition()) {
+  if (!SP.isDefinition()) {
     addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
@@ -1603,6 +1705,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
     addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+  else if (DIVariable(DV->getVariable()).isArtificial())
+    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
 
   if (Scope->isAbstractScope()) {
     DV->setDIE(VariableDie);
@@ -1625,7 +1729,6 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
     DbgVariableToDbgInstMap.find(DV);
   if (DVI != DbgVariableToDbgInstMap.end()) {
     const MachineInstr *DVInsn = DVI->second;
-    const MCSymbol *DVLabel = findVariableLabel(DV);
     bool updated = false;
     // FIXME : Handle getNumOperands != 3
     if (DVInsn->getNumOperands() == 3) {
@@ -1637,20 +1740,17 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
           addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
           updated = true;
         } else
-          updated = addRegisterAddress(VariableDie, DVLabel, RegOp);
+          updated = addRegisterAddress(VariableDie, RegOp);
       }
       else if (DVInsn->getOperand(0).isImm())
-        updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+        updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
       else if (DVInsn->getOperand(0).isFPImm())
         updated =
-          addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+          addConstantFPValue(VariableDie, DVInsn->getOperand(0));
     } else {
       MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
       if (Location.getReg()) {
         addAddress(VariableDie, dwarf::DW_AT_location, Location);
-        if (DVLabel)
-          addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                   DVLabel);
         updated = true;
       }
     }
@@ -1700,6 +1800,16 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
+  SmallVector <DIE *, 8> Children;
+  // Collect lexical scope childrens first.
+  const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+  for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+    if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+      Children.push_back(Variable);
+  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+    if (DIE *Nested = constructScopeDIE(Scopes[j]))
+      Children.push_back(Nested);
   DIScope DS(Scope->getScopeNode());
   DIE *ScopeDIE = NULL;
   if (Scope->getInlinedAt())
@@ -1715,26 +1825,19 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
     else
       ScopeDIE = updateSubprogramScopeDIE(DS);
   }
-  else
+  else {
+    // There is no need to emit empty lexical block DIE.
+    if (Children.empty())
+      return NULL;
     ScopeDIE = constructLexicalScopeDIE(Scope);
-  if (!ScopeDIE) return NULL;
-
-  // Add variables to scope.
-  const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
-  for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
-    DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
-    if (VariableDIE)
-      ScopeDIE->addChild(VariableDIE);
   }
+  
+  if (!ScopeDIE) return NULL;
 
-  // Add nested scopes.
-  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
-  for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
-    // Define the Scope debug information entry.
-    DIE *NestedDIE = constructScopeDIE(Scopes[j]);
-    if (NestedDIE)
-      ScopeDIE->addChild(NestedDIE);
-  }
+  // Add children
+  for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+         E = Children.end(); I != E; ++I)
+    ScopeDIE->addChild(*I);
 
   if (DS.isSubprogram())
     addPubTypes(DISubprogram(DS));
@@ -1746,37 +1849,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
-  unsigned DId;
-  assert (DirName.empty() == false && "Invalid directory name!");
 
-  StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
-  if (DI != DirectoryIdMap.end()) {
-    DId = DI->getValue();
-  } else {
-    DId = DirectoryNames.size() + 1;
-    DirectoryIdMap[DirName] = DId;
-    DirectoryNames.push_back(DirName);
-  }
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>");
 
-  unsigned FId;
-  StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
-  if (FI != SourceFileIdMap.end()) {
-    FId = FI->getValue();
-  } else {
-    FId = SourceFileNames.size() + 1;
-    SourceFileIdMap[FileName] = FId;
-    SourceFileNames.push_back(FileName);
-  }
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
 
-  DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
-    SourceIdMap.find(std::make_pair(DId, FId));
-  if (SI != SourceIdMap.end())
-    return SI->second;
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
 
-  unsigned SrcId = SourceIds.size() + 1;  // DW_AT_decl_file cannot be 0.
-  SourceIdMap[std::make_pair(DId, FId)] = SrcId;
-  SourceIds.push_back(std::make_pair(DId, FId));
+  // Print out a .file directive to specify files for .loc directives.
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName);
 
   return SrcId;
 }
@@ -1802,7 +1889,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(Dir, FN);
+  unsigned ID = GetOrCreateSourceID(FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
   addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
@@ -1886,6 +1973,32 @@ static bool isUnsignedDIType(DIType Ty) {
   return false;
 }
 
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+  const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+  if (!CE || CE->getNumOperands() != 3 ||
+      CE->getOpcode() != Instruction::GetElementPtr)
+    return NULL;
+
+  // First operand points to a global value.
+  if (!isa<GlobalValue>(CE->getOperand(0)))
+    return NULL;
+
+  // Second operand is zero.
+  const ConstantInt *CI = 
+    dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+  if (!CI || !CI->isZero())
+    return NULL;
+
+  // Third operand is offset.
+  if (!isa<ConstantInt>(CE->getOperand(2)))
+    return NULL;
+
+  return CE;
+}
+
 /// constructGlobalVariableDIE - Construct global variable DIE.
 void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
   DIGlobalVariable GV(N);
@@ -1952,16 +2065,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     } 
-  } else if (Constant *C = GV.getConstant()) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-      if (isUnsignedDIType(GTy))
-          addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
-                  CI->getZExtValue());
-        else
-          addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
-                 CI->getSExtValue());
-    }
+  } else if (ConstantInt *CI = 
+             dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+    addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+  else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    // GV is a merged global.
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+    ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
+
   return;
 }
 
@@ -2043,25 +2162,12 @@ void DwarfDebug::beginModule(Module *M) {
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
       getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
 
+  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
-
-  // Print out .file directives to specify files for .loc directives. These are
-  // printed out early so that they precede any .loc directives.
-  if (Asm->MAI->hasDotLocAndDotFile()) {
-    for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
-      // Remember source id starts at 1.
-      std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
-      // FIXME: don't use sys::path for this!  This should not depend on the
-      // host.
-      sys::Path FullPath(getSourceDirectoryName(Id.first));
-      bool AppendOk =
-        FullPath.appendComponent(getSourceFileName(Id.second));
-      assert(AppendOk && "Could not append filename to directory!");
-      AppendOk = false;
-      Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
-    }
-  }
 }
 
 /// endModule - Emit all Dwarf sections that should come after the content.
@@ -2081,8 +2187,7 @@ void DwarfDebug::endModule() {
       StringRef FName = SP.getLinkageName();
       if (FName.empty())
         FName = SP.getName();
-      NamedMDNode *NMD =
-        M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
+      NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
       if (!NMD) continue;
       unsigned E = NMD->getNumOperands();
       if (!E) continue;
@@ -2152,9 +2257,6 @@ void DwarfDebug::endModule() {
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
-  // Emit source line correspondence into a debug line section.
-  emitDebugLines();
-
   // Emit info into a debug pubnames section.
   emitDebugPubNames();
 
@@ -2242,15 +2344,6 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
   }
 }
 
-/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in undefined reg.
-static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
-  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
-  if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg())
-    return true;
-  return false;
-}
-
 /// isDbgValueInDefinedReg - Return true if debug value, encoded by
 /// DBG_VALUE instruction, is in a defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
@@ -2275,7 +2368,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn))
+      if (!MInsn->isDebugValue())
         continue;
       DbgValues.push_back(MInsn);
     }
@@ -2297,19 +2390,18 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
            ME = DbgValues.end(); MI != ME; ++MI) {
       const MDNode *Var =
         (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
-      if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+      if (Var == DV && 
           !PrevMI->isIdenticalTo(*MI))
         MultipleValues.push_back(*MI);
       PrevMI = *MI;
     }
 
-    DbgScope *Scope = findDbgScope(MInsn);
-    bool CurFnArg = false;
+    DbgScope *Scope = NULL;
     if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
         DISubprogram(DV.getContext()).describes(MF->getFunction()))
-      CurFnArg = true;
-    if (!Scope && CurFnArg)
       Scope = CurrentFnDbgScope;
+    else
+      Scope = findDbgScope(MInsn);
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
@@ -2317,8 +2409,6 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     Processed.insert(DV);
     DbgVariable *RegVar = new DbgVariable(DV);
     Scope->addVariable(RegVar);
-    if (!CurFnArg)
-      DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
     if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
       DbgVariableToDbgInstMap[AbsVar] = MInsn;
       VarToAbstractVarMap[RegVar] = AbsVar;
@@ -2375,10 +2465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
 
   // Collect info for variables that were optimized out.
   const Function *F = MF->getFunction();
-  const Module *M = F->getParent();
-  if (NamedMDNode *NMD =
-      M->getNamedMetadata(Twine("llvm.dbg.lv.",
-                                getRealLinkageName(F->getName())))) {
+  if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
       if (!DV || !Processed.insert(DV))
@@ -2409,8 +2496,8 @@ const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
   return I->second;
 }
 
-/// beginScope - Process beginning of a scope.
-void DwarfDebug::beginScope(const MachineInstr *MI) {
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (InsnNeedsLabel.count(MI) == 0) {
     LabelsBeforeInsn[MI] = PrevLabel;
     return;
@@ -2444,8 +2531,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
   assert (0 && "Instruction is not processed!");
 }
 
-/// endScope - Process end of a scope.
-void DwarfDebug::endScope(const MachineInstr *MI) {
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
   if (InsnsEndScopeSet.count(MI) != 0) {
     // Emit a label if this instruction ends a scope.
     MCSymbol *Label = MMI->getContext().CreateTempSymbol();
@@ -2624,6 +2711,10 @@ bool DwarfDebug::extractScopeInformation() {
         continue;
       }
 
+      // Ignore DBG_VALUE. It does not contribute any instruction in output.
+      if (MInsn->isDebugValue())
+        continue;
+
       if (RangeBeginMI) {
         // If we have alread seen a beginning of a instruction range and
         // current instruction scope does not match scope of first instruction
@@ -2727,12 +2818,37 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
   return DebugLoc();
 }
 
+#ifndef NDEBUG
+/// CheckLineNumbers - Count basicblocks whose instructions do not have any
+/// line number information.
+static void CheckLineNumbers(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    bool FoundLineNo = false;
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+      if (!MI->getDebugLoc().isUnknown()) {
+        FoundLineNo = true;
+        break;
+      }
+    }
+    if (!FoundLineNo && I->size())
+      ++BlocksWithoutLineNo;      
+  }
+}
+#endif
+
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   if (!extractScopeInformation()) return;
 
+#ifndef NDEBUG
+  CheckLineNumbers(MF);
+#endif
+
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
@@ -2775,16 +2891,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
         DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
         if (!DV.Verify()) continue;
         // If DBG_VALUE is for a local variable then it needs a label.
-        if (DV.getTag() != dwarf::DW_TAG_arg_variable
-            && isDbgValueInUndefinedReg(MI) == false)
+        if (DV.getTag() != dwarf::DW_TAG_arg_variable)
           InsnNeedsLabel.insert(MI);
         // DBG_VALUE for inlined functions argument needs a label.
         else if (!DISubprogram(getDISubprogram(DV.getContext())).
                  describes(MF->getFunction()))
           InsnNeedsLabel.insert(MI);
         // DBG_VALUE indicating argument location change needs a label.
-        else if (isDbgValueInUndefinedReg(MI) == false
-                 && !ProcessedArgs.insert(DV))
+        else if (!ProcessedArgs.insert(DV))
           InsnNeedsLabel.insert(MI);
       } else {
         // If location is unknown then instruction needs a location only if
@@ -2820,17 +2934,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     SmallPtrSet<const MDNode *, 16> ProcessedVars;
     collectVariableInfo(MF, ProcessedVars);
 
-    // Get function line info.
-    if (!Lines.empty()) {
-      // Get section line info.
-      unsigned ID = SectionMap.insert(Asm->getCurrentSection());
-      if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
-      std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
-      // Append the function info to section info.
-      SectionLineInfos.insert(SectionLineInfos.end(),
-                              Lines.begin(), Lines.end());
-    }
-
     // Construct abstract scopes.
     for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
            AE = AbstractScopesList.end(); AI != AE; ++AI) {
@@ -2840,10 +2943,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
         StringRef FName = SP.getLinkageName();
         if (FName.empty())
           FName = SP.getName();
-        const Module *M = MF->getFunction()->getParent();
-        if (NamedMDNode *NMD =
-            M->getNamedMetadata(Twine("llvm.dbg.lv.",
-                                      getRealLinkageName(FName)))) {
+        if (NamedMDNode *NMD = 
+            getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
           for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
           DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
           if (!DV || !ProcessedVars.insert(DV))
@@ -2875,7 +2976,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   DbgVariableToFrameIndexMap.clear();
   VarToAbstractVarMap.clear();
   DbgVariableToDbgInstMap.clear();
-  DbgVariableLabelsMap.clear();
   DeleteContainerSeconds(DbgScopeMap);
   InsnsEndScopeSet.clear();
   ConcreteScopes.clear();
@@ -2884,7 +2984,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   AbstractVariables.clear();
   LabelsBeforeInsn.clear();
   LabelsAfterInsn.clear();
-  Lines.clear();
   PrevLabel = NULL;
 }
 
@@ -2906,15 +3005,6 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
   return true;
 }
 
-/// findVariableLabel - Find MCSymbol for the variable.
-const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
-  DenseMap<const DbgVariable *, const MCSymbol *>::iterator I
-    = DbgVariableLabelsMap.find(V);
-  if (I == DbgVariableLabelsMap.end())
-    return NULL;
-  else return I->second;
-}
-
 /// findDbgScope - Find DbgScope for the debug loc attached with an
 /// instruction.
 DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
@@ -2940,7 +3030,6 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
 /// the source line list.
 MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
                                        const MDNode *S) {
-  StringRef Dir;
   StringRef Fn;
 
   unsigned Src = 1;
@@ -2949,25 +3038,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
 
     if (Scope.isCompileUnit()) {
       DICompileUnit CU(S);
-      Dir = CU.getDirectory();
       Fn = CU.getFilename();
+    } else if (Scope.isFile()) {
+      DIFile F(S);
+      Fn = F.getFilename();
     } else if (Scope.isSubprogram()) {
       DISubprogram SP(S);
-      Dir = SP.getDirectory();
       Fn = SP.getFilename();
     } else if (Scope.isLexicalBlock()) {
       DILexicalBlock DB(S);
-      Dir = DB.getDirectory();
       Fn = DB.getFilename();
     } else
       assert(0 && "Unexpected scope info");
 
-    Src = GetOrCreateSourceID(Dir, Fn);
+    Src = GetOrCreateSourceID(Fn);
   }
 
-  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
-  Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
+  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
+                                         0, 0);
 
+  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
   Asm->OutStreamer.EmitLabel(Label);
   return Label;
 }
@@ -3151,6 +3241,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
         Values[i]->EmitValue(Asm, Form);
       break;
     }
+    case dwarf::DW_AT_accessibility: {
+      if (Asm->isVerbose()) {
+        DIEInteger *V = cast<DIEInteger>(Values[i]);
+        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+      }
+      Values[i]->EmitValue(Asm, Form);
+      break;
+    }
     default:
       // Emit an attribute using the defined form.
       Values[i]->EmitValue(Asm, Form);
@@ -3270,185 +3368,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitDebugLines - Emit source line information.
-///
-void DwarfDebug::emitDebugLines() {
-  // If the target is using .loc/.file, the assembler will be emitting the
-  // .debug_line table automatically.
-  if (Asm->MAI->hasDotLocAndDotFile())
-    return;
-
-  // Minimum line delta, thus ranging from -10..(255-10).
-  const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
-  // Maximum line delta, thus ranging from -10..(255-10).
-  const int MaxLineDelta = 255 + MinLineDelta;
-
-  // Start the dwarf line section.
-  Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfLineSection());
-
-  // Construct the section header.
-  Asm->OutStreamer.AddComment("Length of Source Line Info");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
-                           Asm->GetTempSymbol("line_begin"), 4);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
-
-  Asm->OutStreamer.AddComment("DWARF version number");
-  Asm->EmitInt16(dwarf::DWARF_VERSION);
-
-  Asm->OutStreamer.AddComment("Prolog Length");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
-                           Asm->GetTempSymbol("line_prolog_begin"), 4);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin"));
-
-  Asm->OutStreamer.AddComment("Minimum Instruction Length");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("Default is_stmt_start flag");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)");
-  Asm->EmitInt8(MinLineDelta);
-  Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)");
-  Asm->EmitInt8(MaxLineDelta);
-  Asm->OutStreamer.AddComment("Special Opcode Base");
-  Asm->EmitInt8(-MinLineDelta);
-
-  // Line number standard opcode encodings argument count
-  Asm->OutStreamer.AddComment("DW_LNS_copy arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_set_file arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_set_column arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count");
-  Asm->EmitInt8(1);
-
-  // Emit directories.
-  for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
-    const std::string &Dir = getSourceDirectoryName(DI);
-    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory");
-    Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0);
-  }
-
-  Asm->OutStreamer.AddComment("End of directories");
-  Asm->EmitInt8(0);
-
-  // Emit files.
-  for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
-    // Remember source id starts at 1.
-    std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
-    const std::string &FN = getSourceFileName(Id.second);
-    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
-    Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
-
-    Asm->EmitULEB128(Id.first, "Directory #");
-    Asm->EmitULEB128(0, "Mod date");
-    Asm->EmitULEB128(0, "File size");
-  }
-
-  Asm->OutStreamer.AddComment("End of files");
-  Asm->EmitInt8(0);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end"));
-
-  // A sequence for each text section.
-  unsigned SecSrcLinesSize = SectionSourceLines.size();
-
-  for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
-    // Isolate current sections line info.
-    const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
-
-    // Dwarf assumes we start with first line of first source file.
-    unsigned Source = 1;
-    unsigned Line = 1;
-
-    // Construct rows of the address, source, line, column matrix.
-    for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
-      const SrcLineInfo &LineInfo = LineInfos[i];
-      MCSymbol *Label = LineInfo.getLabel();
-      if (!Label->isDefined()) continue; // Not emitted, in dead code.
-
-      if (Asm->isVerbose()) {
-        std::pair<unsigned, unsigned> SrcID =
-          getSourceDirectoryAndFileIds(LineInfo.getSourceID());
-        Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) +
-                                    "/" +
-                                    Twine(getSourceFileName(SrcID.second)) +
-                                    ":" + Twine(LineInfo.getLine()));
-      }
-
-      // Define the line address.
-      Asm->OutStreamer.AddComment("Extended Op");
-      Asm->EmitInt8(0);
-      Asm->OutStreamer.AddComment("Op size");
-      Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
-
-      Asm->OutStreamer.AddComment("DW_LNE_set_address");
-      Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
-      Asm->OutStreamer.AddComment("Location label");
-      Asm->OutStreamer.EmitSymbolValue(Label,
-                                       Asm->getTargetData().getPointerSize(),
-                                       0/*AddrSpace*/);
-
-      // If change of source, then switch to the new source.
-      if (Source != LineInfo.getSourceID()) {
-        Source = LineInfo.getSourceID();
-        Asm->OutStreamer.AddComment("DW_LNS_set_file");
-        Asm->EmitInt8(dwarf::DW_LNS_set_file);
-        Asm->EmitULEB128(Source, "New Source");
-      }
-
-      // If change of line.
-      if (Line != LineInfo.getLine()) {
-        // Determine offset.
-        int Offset = LineInfo.getLine() - Line;
-        int Delta = Offset - MinLineDelta;
-
-        // Update line.
-        Line = LineInfo.getLine();
-
-        // If delta is small enough and in range...
-        if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
-          // ... then use fast opcode.
-          Asm->OutStreamer.AddComment("Line Delta");
-          Asm->EmitInt8(Delta - MinLineDelta);
-        } else {
-          // ... otherwise use long hand.
-          Asm->OutStreamer.AddComment("DW_LNS_advance_line");
-          Asm->EmitInt8(dwarf::DW_LNS_advance_line);
-          Asm->EmitSLEB128(Offset, "Line Offset");
-          Asm->OutStreamer.AddComment("DW_LNS_copy");
-          Asm->EmitInt8(dwarf::DW_LNS_copy);
-        }
-      } else {
-        // Copy the previous row (different address or source)
-        Asm->OutStreamer.AddComment("DW_LNS_copy");
-        Asm->EmitInt8(dwarf::DW_LNS_copy);
-      }
-    }
-
-    emitEndOfLineMatrix(j + 1);
-  }
-
-  if (SecSrcLinesSize == 0)
-    // Because we're emitting a debug_line section, we still need a line
-    // table. The linker and friends expect it to exist. If there's nothing to
-    // put into it, emit an empty table.
-    emitEndOfLineMatrix(1);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end"));
-}
-
 /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
 void DwarfDebug::emitCommonDebugFrame() {
@@ -3456,8 +3375,8 @@ void DwarfDebug::emitCommonDebugFrame() {
     return;
 
   int stackGrowth = Asm->getTargetData().getPointerSize();
-  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-      TargetFrameInfo::StackGrowsDown)
+  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+      TargetFrameLowering::StackGrowsDown)
     stackGrowth *= -1;
 
   // Start the dwarf frame section.
@@ -3480,10 +3399,11 @@ void DwarfDebug::emitCommonDebugFrame() {
   Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
   Asm->OutStreamer.AddComment("CIE RA Column");
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
 
   std::vector<MachineMove> Moves;
-  RI->getInitialFrameState(Moves);
+  TFI->getInitialFrameState(Moves);
 
   Asm->EmitFrameMoves(Moves, 0, false);
 
@@ -3667,6 +3587,14 @@ void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
 
+  for (SmallVector<DotDebugLocEntry, 4>::iterator
+         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+       I != E; ++I) {
+    DotDebugLocEntry &Entry = *I;
+    if (I + 1 != DotDebugLocEntries.end())
+      Entry.Merge(I+1);
+  }
+
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfLocSection());
@@ -3676,7 +3604,8 @@ void DwarfDebug::emitDebugLoc() {
   for (SmallVector<DotDebugLocEntry, 4>::iterator
          I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
        I != E; ++I, ++index) {
-    DotDebugLocEntry Entry = *I;
+    DotDebugLocEntry &Entry = *I;
+    if (Entry.isMerged()) continue;
     if (Entry.isEmpty()) {
       Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
       Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index f0ff3bc71699..7df0510fbfba 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/UniqueVector.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
@@ -51,6 +52,8 @@ class DIType;
 class DINameSpace;
 class DISubrange;
 class DICompositeType;
+class DITemplateTypeParameter;
+class DITemplateValueParameter;
 
 //===----------------------------------------------------------------------===//
 /// SrcLineInfo - This class is used to record source line correspondence.
@@ -71,6 +74,28 @@ public:
   MCSymbol *getLabel() const { return Label; }
 };
 
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+  const MCSymbol *Begin;
+  const MCSymbol *End;
+  MachineLocation Loc;
+  bool Merged;
+  DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) 
+    : Begin(B), End(E), Loc(L), Merged(false) {}
+  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// labels are referenced is used to find debug_loc offset for a given DIE.
+  bool isEmpty() { return Begin == 0 && End == 0; }
+  bool isMerged() { return Merged; }
+  void Merge(DotDebugLocEntry *Next) {
+    if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+      return;
+    Next->Begin = Begin;
+    Merged = true;
+  }
+} DotDebugLocEntry;
+
 class DwarfDebug {
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
@@ -93,30 +118,9 @@ class DwarfDebug {
   ///
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// DirectoryIdMap - Directory name to directory id map.
-  ///
-  StringMap<unsigned> DirectoryIdMap;
-
-  /// DirectoryNames - A list of directory names.
-  SmallVector<std::string, 8> DirectoryNames;
-
-  /// SourceFileIdMap - Source file name to source file id map.
-  ///
-  StringMap<unsigned> SourceFileIdMap;
-
-  /// SourceFileNames - A list of source file names.
-  SmallVector<std::string, 8> SourceFileNames;
-
   /// SourceIdMap - Source id map, i.e. pair of directory id and source file
   /// id mapped to a unique id.
-  DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
-
-  /// SourceIds - Reverse map from source id to directory id + file id pair.
-  ///
-  SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
-
-  /// Lines - List of source line correspondence.
-  std::vector<SrcLineInfo> Lines;
+  StringMap<unsigned> SourceIdMap;
 
   /// DIEBlocks - A list of all the DIEBlocks in use.
   std::vector<DIEBlock *> DIEBlocks;
@@ -135,10 +139,6 @@ class DwarfDebug {
   ///
   UniqueVector<const MCSection*> SectionMap;
 
-  /// SectionSourceLines - Tracks line numbers per text section.
-  ///
-  std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
-
   // CurrentFnDbgScope - Top level scope for the current function.
   //
   DbgScope *CurrentFnDbgScope;
@@ -175,23 +175,6 @@ class DwarfDebug {
   /// machine instruction.
   DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
 
-  /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
-  DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
-
-  /// DotDebugLocEntry - This struct describes location entries emitted in
-  /// .debug_loc section.
-  typedef struct DotDebugLocEntry {
-    const MCSymbol *Begin;
-    const MCSymbol *End;
-    MachineLocation Loc;
-    DotDebugLocEntry() : Begin(0), End(0) {}
-    DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, 
-                  MachineLocation &L) : Begin(B), End(E), Loc(L) {}
-    /// Empty entries are also used as a trigger to emit temp label. Such
-    /// labels are referenced is used to find debug_loc offset for a given DIE.
-    bool isEmpty() { return Begin == 0 && End == 0; }
-  } DotDebugLocEntry;
-
   /// DotDebugLocEntries - Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
@@ -265,35 +248,10 @@ class DwarfDebug {
 
   DIEInteger *DIEIntegerOne;
 private:
-  
-  /// getSourceDirectoryAndFileIds - Return the directory and file ids that
-  /// maps to the source id. Source id starts at 1.
-  std::pair<unsigned, unsigned>
-  getSourceDirectoryAndFileIds(unsigned SId) const {
-    return SourceIds[SId-1];
-  }
-
-  /// getNumSourceDirectories - Return the number of source directories in the
-  /// debug info.
-  unsigned getNumSourceDirectories() const {
-    return DirectoryNames.size();
-  }
-
-  /// getSourceDirectoryName - Return the name of the directory corresponding
-  /// to the id.
-  const std::string &getSourceDirectoryName(unsigned Id) const {
-    return DirectoryNames[Id - 1];
-  }
-
-  /// getSourceFileName - Return the name of the source file corresponding
-  /// to the id.
-  const std::string &getSourceFileName(unsigned Id) const {
-    return SourceFileNames[Id - 1];
-  }
 
   /// getNumSourceIds - Return the number of unique source ids.
   unsigned getNumSourceIds() const {
-    return SourceIds.size();
+    return SourceIdMap.size();
   }
 
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -349,13 +307,14 @@ private:
                   const MachineLocation &Location);
 
   /// addRegisterAddress - Add register location entry in variable DIE.
-  bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+  bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
 
   /// addConstantValue - Add constant value entry in variable DIE.
-  bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
-  bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
 
   /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
@@ -393,6 +352,14 @@ private:
   /// given DIType.
   DIE *getOrCreateTypeDIE(DIType Ty);
 
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateTypeParameter.
+  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateValueParameter.
+  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
   void addPubTypes(DISubprogram SP);
 
   /// constructTypeDIE - Construct basic type die from DIBasicType.
@@ -421,7 +388,7 @@ private:
   DIE *createMemberDIE(DIDerivedType DT);
 
   /// createSubprogramDIE - Create new DIE using SP.
-  DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
+  DIE *createSubprogramDIE(DISubprogram SP);
 
   /// getOrCreateDbgScope - Create DbgScope for the scope.
   DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
@@ -481,10 +448,6 @@ private:
   ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitDebugLines - Emit source line information.
-  ///
-  void emitDebugLines();
-
   /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
   ///
   void emitCommonDebugFrame();
@@ -543,9 +506,8 @@ private:
 
   /// GetOrCreateSourceID - Look up the source id with the given directory and
   /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
-  /// maps as well.
-  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
+  /// in the SourceIds map.
+  unsigned GetOrCreateSourceID(StringRef FullName);
 
   /// constructCompileUnit - Create new CompileUnit for the given 
   /// metadata node with tag DW_TAG_compile_unit.
@@ -565,12 +527,6 @@ private:
   /// the source line list.
   MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
   
-  /// getSourceLineCount - Return the number of source lines in the debug
-  /// info.
-  unsigned getSourceLineCount() const {
-    return Lines.size();
-  }
-  
   /// recordVariableFrameIndex - Record a variable's index.
   void recordVariableFrameIndex(const DbgVariable *V, int Index);
 
@@ -578,9 +534,6 @@ private:
   /// is found. Update FI to hold value of the index.
   bool findVariableFrameIndex(const DbgVariable *V, int *FI);
 
-  /// findVariableLabel - Find MCSymbol for the variable.
-  const MCSymbol *findVariableLabel(const DbgVariable *V);
-
   /// findDbgScope - Find DbgScope for the debug loc attached with an 
   /// instruction.
   DbgScope *findDbgScope(const MachineInstr *MI);
@@ -630,11 +583,11 @@ public:
   /// getLabelAfterInsn - Return Label immediately following the instruction.
   const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
-  /// beginScope - Process beginning of a scope.
-  void beginScope(const MachineInstr *MI);
+  /// beginInstruction - Process beginning of an instruction.
+  void beginInstruction(const MachineInstr *MI);
 
-  /// endScope - Prcess end of a scope.
-  void endScope(const MachineInstr *MI);
+  /// endInstruction - Prcess end of an instruction.
+  void endInstruction(const MachineInstr *MI);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 86a368831e0e..967a2783da14 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -26,7 +26,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -39,238 +39,10 @@
 using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
-  : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
-    shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+  : Asm(A), MMI(Asm->MMI) {}
 
 DwarfException::~DwarfException() {}
 
-/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
-/// is shared among many Frame Description Entries.  There is at least one CIE
-/// in every non-empty .debug_frame section.
-void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
-  // Size and sign of stack growth.
-  int stackGrowth = Asm->getTargetData().getPointerSize();
-  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-      TargetFrameInfo::StackGrowsDown)
-    stackGrowth *= -1;
-
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  // Begin eh frame section.
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
-  MCSymbol *EHFrameSym;
-  if (TLOF.isFunctionEHFrameSymbolPrivate())
-    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
-  else
-    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + 
-                                                   Twine(Index));
-  Asm->OutStreamer.EmitLabel(EHFrameSym);
-  
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
-
-  // Define base labels.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
-
-  // Define the eh frame length.
-  Asm->OutStreamer.AddComment("Length of Common Information Entry");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
-                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
-                           4);
-
-  // EH frame header.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
-  Asm->OutStreamer.AddComment("CIE Identifier Tag");
-  Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
-  Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
-
-  // The personality presence indicates that language specific information will
-  // show up in the eh frame.  Find out how we are supposed to lower the
-  // personality function reference:
-
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
-  unsigned PerEncoding = TLOF.getPersonalityEncoding();
-
-  char Augmentation[6] = { 0 };
-  unsigned AugmentationSize = 0;
-  char *APtr = Augmentation + 1;
-
-  if (PersonalityFn) {
-    // There is a personality function.
-    *APtr++ = 'P';
-    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
-  }
-
-  if (UsesLSDA[Index]) {
-    // An LSDA pointer is in the FDE augmentation.
-    *APtr++ = 'L';
-    ++AugmentationSize;
-  }
-
-  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
-    // A non-default pointer encoding for the FDE.
-    *APtr++ = 'R';
-    ++AugmentationSize;
-  }
-
-  if (APtr != Augmentation + 1)
-    Augmentation[0] = 'z';
-
-  Asm->OutStreamer.AddComment("CIE Augmentation");
-  Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
-
-  // Round out reader.
-  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
-  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
-  Asm->OutStreamer.AddComment("CIE Return Address Column");
-
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
-
-  if (Augmentation[0]) {
-    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
-
-    // If there is a personality, we need to indicate the function's location.
-    if (PersonalityFn) {
-      Asm->EmitEncodingByte(PerEncoding, "Personality");
-      Asm->OutStreamer.AddComment("Personality");
-      Asm->EmitReference(PersonalityFn, PerEncoding);
-    }
-    if (UsesLSDA[Index])
-      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
-    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
-      Asm->EmitEncodingByte(FDEEncoding, "FDE");
-  }
-
-  // Indicate locations of general callee saved registers in frame.
-  std::vector<MachineMove> Moves;
-  RI->getInitialFrameState(Moves);
-  Asm->EmitFrameMoves(Moves, 0, true);
-
-  // On Darwin the linker honors the alignment of eh_frame, which means it must
-  // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
-  // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
-}
-
-/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
-  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
-         "Should not emit 'available externally' functions at all");
-
-  const Function *TheFunc = EHFrameInfo.function;
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
-
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
-  // Externally visible entry into the functions eh frame info. If the
-  // corresponding function is static, this should not be externally visible.
-  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
-    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
-
-  // If corresponding function is weak definition, this should be too.
-  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
-    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                         MCSA_WeakDefinition);
-
-  // If corresponding function is hidden, this should be too.
-  if (TheFunc->hasHiddenVisibility())
-    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
-      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                           HiddenAttr);
-
-  // If there are no calls then you can't unwind.  This may mean we can omit the
-  // EH Frame, but some environments do not handle weak absolute symbols. If
-  // UnwindTablesMandatory is set we cannot do this optimization; the unwind
-  // info is to be available for non-EH uses.
-  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
-      (!TheFunc->isWeakForLinker() ||
-       !Asm->MAI->getWeakDefDirective() ||
-       TLOF.getSupportsWeakOmittedEHFrame())) {
-    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
-                                    MCConstantExpr::Create(0, Asm->OutContext));
-    // This name has no connection to the function, so it might get
-    // dead-stripped when the function is not, erroneously.  Prohibit
-    // dead-stripping unconditionally.
-    if (Asm->MAI->hasNoDeadStrip())
-      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                           MCSA_NoDeadStrip);
-  } else {
-    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
-
-    // EH frame header.
-    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
-    Asm->EmitLabelDifference(
-                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
-                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
-
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
-                                                  EHFrameInfo.Number));
-
-    Asm->OutStreamer.AddComment("FDE CIE offset");
-    Asm->EmitLabelDifference(
-                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
-                       Asm->GetTempSymbol("eh_frame_common",
-                                          EHFrameInfo.PersonalityIndex), 4);
-
-    MCSymbol *EHFuncBeginSym =
-      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
-
-    Asm->OutStreamer.AddComment("FDE initial location");
-    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
-    
-    Asm->OutStreamer.AddComment("FDE address range");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
-                                                EHFrameInfo.Number),
-                             EHFuncBeginSym,
-                             Asm->GetSizeOfEncodedValue(FDEEncoding));
-
-    // If there is a personality and landing pads then point to the language
-    // specific data area in the exception table.
-    if (MMI->getPersonalities()[0] != NULL) {
-      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
-
-      Asm->EmitULEB128(Size, "Augmentation size");
-      Asm->OutStreamer.AddComment("Language Specific Data Area");
-      if (EHFrameInfo.hasLandingPads)
-        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
-                           LSDAEncoding);
-      else
-        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
-
-    } else {
-      Asm->EmitULEB128(0, "Augmentation size");
-    }
-
-    // Indicate locations of function specific callee saved registers in frame.
-    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
-
-    // On Darwin the linker honors the alignment of eh_frame, which means it
-    // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
-    // get holes which confuse readers of eh_frame.
-    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
-                                                  EHFrameInfo.Number));
-
-    // If the function is marked used, this table should be also.  We cannot
-    // make the mark unconditional in this case, since retaining the table also
-    // retains the function in this case, and there is code around that depends
-    // on unused functions (calling undefined externals) being dead-stripped to
-    // link correctly.  Yes, there really is.
-    if (MMI->isUsedFunction(EHFrameInfo.function))
-      if (Asm->MAI->hasNoDeadStrip())
-        Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                             MCSA_NoDeadStrip);
-  }
-  Asm->OutStreamer.AddBlankLine();
-}
-
 /// SharedTypeIds - How many leading type ids two landing pads have in common.
 unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
                                        const LandingPadInfo *R) {
@@ -422,7 +194,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(I);
 
     if (!MO.isGlobal()) continue;
-    
+
     const Function *F = dyn_cast<Function>(MO.getGlobal());
     if (F == 0) continue;
 
@@ -430,7 +202,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
       // Be conservative. If we have more than one function operand for this
       // call, then we can't make the assumption that it's the callee and
       // not a parameter to the call.
-      // 
+      //
       // FIXME: Determine if there's a way to say that `F' is the callee or
       // parameter.
       MarkedNoUnwind = false;
@@ -497,8 +269,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       // instruction between the previous try-range and this one may throw,
       // create a call-site entry with no landing pad for the region between the
       // try-ranges.
-      if (SawPotentiallyThrowing &&
-          Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+      if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
         CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
@@ -520,8 +291,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         };
 
         // Try to merge with the previous call-site. SJLJ doesn't do this
-        if (PreviousIsInvoke &&
-          Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+        if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
           CallSiteEntry &Prev = CallSites.back();
           if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
@@ -531,7 +301,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         }
 
         // Otherwise, create a new call-site.
-        if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+        if (Asm->MAI->isExceptionHandlingDwarf())
           CallSites.push_back(Site);
         else {
           // SjLj EH must maintain the call sites in the order assigned
@@ -549,8 +319,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
   // If some instruction between the previous try-range and the end of the
   // function may throw, create a call-site entry with no landing pad for the
   // region following the try-range.
-  if (SawPotentiallyThrowing &&
-      Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+  if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
     CallSiteEntry Site = { LastLabel, 0, 0, 0 };
     CallSites.push_back(Site);
   }
@@ -620,7 +389,7 @@ void DwarfException::EmitExceptionTable() {
   // Call sites.
   bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
-  
+
   unsigned CallSiteTableLength;
   if (IsSJLJ)
     CallSiteTableLength = 0;
@@ -628,7 +397,7 @@ void DwarfException::EmitExceptionTable() {
     unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
     unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
     unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
-    CallSiteTableLength = 
+    CallSiteTableLength =
       CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
   }
 
@@ -656,15 +425,15 @@ void DwarfException::EmitExceptionTable() {
     // mode, this reference will require a relocation by the dynamic linker.
     //
     // Because of this, we have a couple of options:
-    // 
+    //
     //   1) If we are in -static mode, we can always use an absolute reference
     //      from the LSDA, because the static linker will resolve it.
-    //      
+    //
     //   2) Otherwise, if the LSDA section is writable, we can output the direct
     //      reference to the typeinfo and allow the dynamic linker to relocate
     //      it.  Since it is in a writable section, the dynamic linker won't
     //      have a problem.
-    //      
+    //
     //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
     //      we need to use some form of indirection.  For example, on Darwin,
     //      we can output a statically-relocatable reference to a dyld stub. The
@@ -682,11 +451,14 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Begin the exception table.
-  Asm->OutStreamer.SwitchSection(LSDASection);
+  // Sometimes we want not to emit the data into separate section (e.g. ARM
+  // EHABI). In this case LSDASection will be NULL.
+  if (LSDASection)
+    Asm->OutStreamer.SwitchSection(LSDASection);
   Asm->EmitAlignment(2);
 
   // Emit the LSDA.
-  MCSymbol *GCCETSym = 
+  MCSymbol *GCCETSym =
     Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
                                       Twine(Asm->getFunctionNumber()));
   Asm->OutStreamer.EmitLabel(GCCETSym);
@@ -764,7 +536,7 @@ void DwarfException::EmitExceptionTable() {
     }
   } else {
     // DWARF Exception handling
-    assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+    assert(Asm->MAI->isExceptionHandlingDwarf());
 
     // The call-site table is a list of all call sites that may throw an
     // exception (including C++ 'throw' statements) in the procedure
@@ -793,23 +565,23 @@ void DwarfException::EmitExceptionTable() {
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
-      
+
       MCSymbol *EHFuncBeginSym =
         Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-      
+
       MCSymbol *BeginLabel = S.BeginLabel;
       if (BeginLabel == 0)
         BeginLabel = EHFuncBeginSym;
       MCSymbol *EndLabel = S.EndLabel;
       if (EndLabel == 0)
         EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-        
+
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       Asm->OutStreamer.AddComment("Region start");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-      
+
       Asm->OutStreamer.AddComment("Region length");
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
@@ -834,7 +606,7 @@ void DwarfException::EmitExceptionTable() {
     Asm->OutStreamer.AddComment("-- Action Record Table --");
     Asm->OutStreamer.AddBlankLine();
   }
-  
+
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
@@ -888,73 +660,17 @@ void DwarfException::EmitExceptionTable() {
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
-  if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
-    return;
-
-  if (!shouldEmitMovesModule && !shouldEmitTableModule)
-    return;
-
-  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-
-  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
-    EmitCIE(Personalities[I], I);
-
-  for (std::vector<FunctionEHFrameInfo>::iterator
-         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
-    EmitFDE(*I);
+  assert(0 && "Should be implemented");
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  shouldEmitTable = shouldEmitMoves = false;
-
-  // If any landing pads survive, we need an EH table.
-  shouldEmitTable = !MMI->getLandingPads().empty();
-
-  // See if we need frame move info.
-  shouldEmitMoves =
-    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
-
-  if (shouldEmitMoves || shouldEmitTable)
-    // Assumes in correct section after the entry point.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                  Asm->getFunctionNumber()));
-
-  shouldEmitTableModule |= shouldEmitTable;
-  shouldEmitMovesModule |= shouldEmitMoves;
+  assert(0 && "Should be implemented");
 }
 
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  if (!shouldEmitMoves && !shouldEmitTable) return;
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
-
-  // Record if this personality index uses a landing pad.
-  bool HasLandingPad = !MMI->getLandingPads().empty();
-  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
-  
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
-  if (HasLandingPad)
-    EmitExceptionTable();
-
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  MCSymbol *FunctionEHSym =
-    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
-                                      TLOF.isFunctionEHFrameSymbolPrivate());
-  
-  // Save EH frame information
-  EHFrames.
-    push_back(FunctionEHFrameInfo(FunctionEHSym,
-                                  Asm->getFunctionNumber(),
-                                  MMI->getPersonalityIndex(),
-                                  Asm->MF->getFrameInfo()->adjustsStack(),
-                                  !MMI->getLandingPads().empty(),
-                                  MMI->getFrameMoves(),
-                                  Asm->MF->getFunction()));
+  assert(0 && "Should be implemented");
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index bc311e67054e..a172e53f8ac7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -35,60 +35,13 @@ class AsmPrinter;
 /// DwarfException - Emits Dwarf exception handling directives.
 ///
 class DwarfException {
+protected:
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
 
   /// MMI - Collected machine module information.
   MachineModuleInfo *MMI;
 
-  struct FunctionEHFrameInfo {
-    MCSymbol *FunctionEHSym;  // L_foo.eh
-    unsigned Number;
-    unsigned PersonalityIndex;
-    bool adjustsStack;
-    bool hasLandingPads;
-    std::vector<MachineMove> Moves;
-    const Function *function;
-
-    FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
-                        bool hC, bool hL,
-                        const std::vector<MachineMove> &M,
-                        const Function *f):
-      FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
-      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
-  };
-
-  std::vector<FunctionEHFrameInfo> EHFrames;
-
-  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
-  /// uses an LSDA. If so, then we need to encode that information in the CIE's
-  /// augmentation.
-  DenseMap<unsigned, bool> UsesLSDA;
-
-  /// shouldEmitTable - Per-function flag to indicate if EH tables should
-  /// be emitted.
-  bool shouldEmitTable;
-
-  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
-  /// should be emitted.
-  bool shouldEmitMoves;
-
-  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
-  /// should be emitted.
-  bool shouldEmitTableModule;
-
-  /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
-  /// should be emitted.
-  bool shouldEmitMovesModule;
-
-  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
-  /// that is shared among many Frame Description Entries.  There is at least
-  /// one CIE in every non-empty .debug_frame section.
-  void EmitCIE(const Function *Personality, unsigned Index);
-
-  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
-
   /// EmitExceptionTable - Emit landing pads and actions.
   ///
   /// The general organization of the table is complex, but the basic concepts
@@ -172,18 +125,116 @@ public:
   // Main entry points.
   //
   DwarfException(AsmPrinter *A);
-  ~DwarfException();
+  virtual ~DwarfException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfCFIException(AsmPrinter *A);
+  virtual ~DwarfCFIException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfTableException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+
+  /// shouldEmitMovesModule - Per-module flag to indicate if frame moves
+  /// should be emitted.
+  bool shouldEmitMovesModule;
+
+  struct FunctionEHFrameInfo {
+    MCSymbol *FunctionEHSym;  // L_foo.eh
+    unsigned Number;
+    unsigned PersonalityIndex;
+    bool adjustsStack;
+    bool hasLandingPads;
+    std::vector<MachineMove> Moves;
+    const Function *function;
+
+    FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
+                        bool hC, bool hL,
+                        const std::vector<MachineMove> &M,
+                        const Function *f):
+      FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
+      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
+  };
+
+  std::vector<FunctionEHFrameInfo> EHFrames;
+
+  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+  /// uses an LSDA. If so, then we need to encode that information in the CIE's
+  /// augmentation.
+  DenseMap<unsigned, bool> UsesLSDA;
+
+  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+  /// that is shared among many Frame Description Entries.  There is at least
+  /// one CIE in every non-empty .debug_frame section.
+  void EmitCIE(const Function *Personality, unsigned Index);
+
+  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfTableException(AsmPrinter *A);
+  virtual ~DwarfTableException();
 
   /// EndModule - Emit all exception information that should come after the
   /// content.
-  void EndModule();
+  virtual void EndModule();
 
   /// BeginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  void BeginFunction(const MachineFunction *MF);
+  virtual void BeginFunction(const MachineFunction *MF);
 
   /// EndFunction - Gather and emit post-function exception information.
-  void EndFunction();
+  virtual void EndFunction();
 };
 
 } // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
new file mode 100644
index 000000000000..751901183cd0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
@@ -0,0 +1,349 @@
+//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+// The implementation emits all the necessary tables "by hands".
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfTableException::DwarfTableException(AsmPrinter *A)
+  :  DwarfException(A),
+     shouldEmitTable(false), shouldEmitMoves(false),
+     shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+
+DwarfTableException::~DwarfTableException() {}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries.  There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
+  // Size and sign of stack growth.
+  int stackGrowth = Asm->getTargetData().getPointerSize();
+  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+      TargetFrameLowering::StackGrowsDown)
+    stackGrowth *= -1;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Begin eh frame section.
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  MCSymbol *EHFrameSym;
+  if (TLOF.isFunctionEHFrameSymbolPrivate())
+    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
+  else
+    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
+                                                   Twine(Index));
+  Asm->OutStreamer.EmitLabel(EHFrameSym);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
+
+  // Define base labels.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
+
+  // Define the eh frame length.
+  Asm->OutStreamer.AddComment("Length of Common Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
+                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
+                           4);
+
+  // EH frame header.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
+  Asm->OutStreamer.AddComment("CIE Identifier Tag");
+  Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+  Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
+
+  // The personality presence indicates that language specific information will
+  // show up in the eh frame.  Find out how we are supposed to lower the
+  // personality function reference:
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  char Augmentation[6] = { 0 };
+  unsigned AugmentationSize = 0;
+  char *APtr = Augmentation + 1;
+
+  if (PersonalityFn) {
+    // There is a personality function.
+    *APtr++ = 'P';
+    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
+  }
+
+  if (UsesLSDA[Index]) {
+    // An LSDA pointer is in the FDE augmentation.
+    *APtr++ = 'L';
+    ++AugmentationSize;
+  }
+
+  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+    // A non-default pointer encoding for the FDE.
+    *APtr++ = 'R';
+    ++AugmentationSize;
+  }
+
+  if (APtr != Augmentation + 1)
+    Augmentation[0] = 'z';
+
+  Asm->OutStreamer.AddComment("CIE Augmentation");
+  Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
+
+  // Round out reader.
+  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->OutStreamer.AddComment("CIE Return Address Column");
+
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+  if (Augmentation[0]) {
+    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
+
+    // If there is a personality, we need to indicate the function's location.
+    if (PersonalityFn) {
+      Asm->EmitEncodingByte(PerEncoding, "Personality");
+      Asm->OutStreamer.AddComment("Personality");
+      Asm->EmitReference(PersonalityFn, PerEncoding);
+    }
+    if (UsesLSDA[Index])
+      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
+    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+      Asm->EmitEncodingByte(FDEEncoding, "FDE");
+  }
+
+  // Indicate locations of general callee saved registers in frame.
+  std::vector<MachineMove> Moves;
+  TFI->getInitialFrameState(Moves);
+  Asm->EmitFrameMoves(Moves, 0, true);
+
+  // On Darwin the linker honors the alignment of eh_frame, which means it must
+  // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
+  // holes which confuse readers of eh_frame.
+  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
+}
+
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+         "Should not emit 'available externally' functions at all");
+
+  const Function *TheFunc = EHFrameInfo.function;
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  // Externally visible entry into the functions eh frame info. If the
+  // corresponding function is static, this should not be externally visible.
+  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
+
+  // If corresponding function is weak definition, this should be too.
+  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                         MCSA_WeakDefinition);
+
+  // If corresponding function is hidden, this should be too.
+  if (TheFunc->hasHiddenVisibility())
+    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
+      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                           HiddenAttr);
+
+  // If there are no calls then you can't unwind.  This may mean we can omit the
+  // EH Frame, but some environments do not handle weak absolute symbols. If
+  // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+  // info is to be available for non-EH uses.
+  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
+      (!TheFunc->isWeakForLinker() ||
+       !Asm->MAI->getWeakDefDirective() ||
+       TLOF.getSupportsWeakOmittedEHFrame())) {
+    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
+                                    MCConstantExpr::Create(0, Asm->OutContext));
+    // This name has no connection to the function, so it might get
+    // dead-stripped when the function is not, erroneously.  Prohibit
+    // dead-stripping unconditionally.
+    if (Asm->MAI->hasNoDeadStrip())
+      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                           MCSA_NoDeadStrip);
+  } else {
+    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
+
+    // EH frame header.
+    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+    Asm->EmitLabelDifference(
+                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
+                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
+                                                  EHFrameInfo.Number));
+
+    Asm->OutStreamer.AddComment("FDE CIE offset");
+    Asm->EmitLabelDifference(
+                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
+                       Asm->GetTempSymbol("eh_frame_common",
+                                          EHFrameInfo.PersonalityIndex), 4);
+
+    MCSymbol *EHFuncBeginSym =
+      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
+
+    Asm->OutStreamer.AddComment("FDE initial location");
+    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
+
+    Asm->OutStreamer.AddComment("FDE address range");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
+                                                EHFrameInfo.Number),
+                             EHFuncBeginSym,
+                             Asm->GetSizeOfEncodedValue(FDEEncoding));
+
+    // If there is a personality and landing pads then point to the language
+    // specific data area in the exception table.
+    if (MMI->getPersonalities()[0] != NULL) {
+      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
+
+      Asm->EmitULEB128(Size, "Augmentation size");
+      Asm->OutStreamer.AddComment("Language Specific Data Area");
+      if (EHFrameInfo.hasLandingPads)
+        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
+                           LSDAEncoding);
+      else
+        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
+
+    } else {
+      Asm->EmitULEB128(0, "Augmentation size");
+    }
+
+    // Indicate locations of function specific callee saved registers in frame.
+    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
+
+    // On Darwin the linker honors the alignment of eh_frame, which means it
+    // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
+    // get holes which confuse readers of eh_frame.
+    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
+                                                  EHFrameInfo.Number));
+
+    // If the function is marked used, this table should be also.  We cannot
+    // make the mark unconditional in this case, since retaining the table also
+    // retains the function in this case, and there is code around that depends
+    // on unused functions (calling undefined externals) being dead-stripped to
+    // link correctly.  Yes, there really is.
+    if (MMI->isUsedFunction(EHFrameInfo.function))
+      if (Asm->MAI->hasNoDeadStrip())
+        Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                             MCSA_NoDeadStrip);
+  }
+  Asm->OutStreamer.AddBlankLine();
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfTableException::EndModule() {
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  if (!shouldEmitMovesModule && !shouldEmitTableModule)
+    return;
+
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+
+  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+    EmitCIE(Personalities[I], I);
+
+  for (std::vector<FunctionEHFrameInfo>::iterator
+         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+    EmitFDE(*I);
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfTableException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitTable = shouldEmitMoves = false;
+
+  // If any landing pads survive, we need an EH table.
+  shouldEmitTable = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+
+  shouldEmitTableModule |= shouldEmitTable;
+  shouldEmitMovesModule |= shouldEmitMoves;
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfTableException::EndFunction() {
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Record if this personality index uses a landing pad.
+  bool HasLandingPad = !MMI->getLandingPads().empty();
+  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (HasLandingPad)
+    EmitExceptionTable();
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  MCSymbol *FunctionEHSym =
+    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
+                                      TLOF.isFunctionEHFrameSymbolPrivate());
+
+  // Save EH frame information
+  EHFrames.
+    push_back(FunctionEHFrameInfo(FunctionEHSym,
+                                  Asm->getFunctionNumber(),
+                                  MMI->getPersonalityIndex(),
+                                  Asm->MF->getFrameInfo()->adjustsStack(),
+                                  !MMI->getLandingPads().empty(),
+                                  MMI->getFrameMoves(),
+                                  Asm->MF->getFunction()));
+}
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c8a63cf2393b..115381767751 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include <cctype>
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2ef115dbd205..d7d0e1b3812b 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,15 +1,19 @@
 add_llvm_library(LLVMCodeGen
   AggressiveAntiDepBreaker.cpp
+  AllocationOrder.cpp
   Analysis.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
   CallingConvLower.cpp
+  CodeGen.cpp
   CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
   DwarfEHPrepare.cpp
+  EdgeBundles.cpp
   ELFCodeEmitter.cpp
   ELFWriter.cpp
+  ExpandISelPseudos.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCStrategy.cpp
@@ -18,10 +22,13 @@ add_llvm_library(LLVMCodeGen
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
+  LiveDebugVariables.cpp
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
+  LiveIntervalUnion.cpp
   LiveStackAnalysis.cpp
   LiveVariables.cpp
+  LiveRangeEdit.cpp
   LocalStackSlotAllocation.cpp
   LowerSubregs.cpp
   MachineBasicBlock.cpp
@@ -34,6 +41,7 @@ add_llvm_library(LLVMCodeGen
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
+  MachineLoopRanges.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
@@ -45,15 +53,17 @@ add_llvm_library(LLVMCodeGen
   OcamlGC.cpp
   OptimizePHIs.cpp
   PHIElimination.cpp
+  PHIEliminationUtils.cpp
   Passes.cpp
   PeepholeOptimizer.cpp
-  PostRAHazardRecognizer.cpp
   PostRASchedulerList.cpp
   PreAllocSplitting.cpp
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocBasic.cpp
   RegAllocFast.cpp
+  RegAllocGreedy.cpp
   RegAllocLinearScan.cpp
   RegAllocPBQP.cpp
   RegisterCoalescer.cpp
@@ -63,12 +73,14 @@ add_llvm_library(LLVMCodeGen
   ScheduleDAGEmit.cpp
   ScheduleDAGInstrs.cpp
   ScheduleDAGPrinter.cpp
+  ScoreboardHazardRecognizer.cpp
   ShadowStackGC.cpp
   ShrinkWrapping.cpp
   SimpleRegisterCoalescing.cpp
   SjLjEHPrepare.cpp
   SlotIndexes.cpp
   Spiller.cpp
+  SpillPlacement.cpp
   SplitKit.cpp
   Splitter.cpp
   StackProtector.cpp
@@ -83,4 +95,5 @@ add_llvm_library(LLVMCodeGen
   VirtRegRewriter.cpp
   )
 
-target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts)
+add_subdirectory(SelectionDAG)
+add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 1b7e08a8b6bb..76bb3d148b0b 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,12 @@
 using namespace llvm;
 
 char CalculateSpillWeights::ID = 0;
-INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
-                "Calculate spill weights", false, false);
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
 
 void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<LiveIntervals>();
@@ -170,8 +174,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       totalWeight *= 0.5F;
   }
 
-  li.weight = totalWeight;
-  lis_.normalizeSpillWeight(li);
+  li.weight = normalizeSpillWeight(totalWeight, li.getSize());
 }
 
 void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
@@ -218,7 +221,7 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
 
   if (rc == orc)
     return;
-  DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
-               << rc->getName() <<".\n");
+  DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
+               << " to " << rc->getName() <<".\n");
   mri.setRegClass(reg, rc);
 }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 62ad8171a9d4..2ad80b4d3a75 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -34,8 +34,8 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
 // HandleByVal - Allocate a stack slot large enough to pass an argument by
 // value. The size and alignment information of the argument is encoded in its
 // parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
-                          EVT LocVT, CCValAssign::LocInfo LocInfo,
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
                           int MinSize, int MinAlign,
                           ISD::ArgFlagsTy ArgFlags) {
   unsigned Align = ArgFlags.getByValAlign();
@@ -51,11 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
 
 /// MarkAllocated - Mark a register and all of its aliases as allocated.
 void CCState::MarkAllocated(unsigned Reg) {
-  UsedRegs[Reg/32] |= 1 << (Reg&31);
-  
-  if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
-    for (; (Reg = *RegAliases); ++RegAliases)
-      UsedRegs[Reg/32] |= 1 << (Reg&31);
+  for (const unsigned *Alias = TRI.getOverlaps(Reg);
+       unsigned Reg = *Alias; ++Alias)
+    UsedRegs[Reg/32] |= 1 << (Reg&31);
 }
 
 /// AnalyzeFormalArguments - Analyze an array of argument values,
@@ -66,12 +64,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
   unsigned NumArgs = Ins.size();
 
   for (unsigned i = 0; i != NumArgs; ++i) {
-    EVT ArgVT = Ins[i].VT;
+    MVT ArgVT = Ins[i].VT;
     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Formal argument #" << i << " has unhandled type "
-             << ArgVT.getEVTString();
+             << EVT(ArgVT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -84,7 +82,7 @@ bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                           CCAssignFn Fn) {
   // Determine which register each value should be copied into.
   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    EVT VT = Outs[i].VT;
+    MVT VT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
       return false;
@@ -98,12 +96,12 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                             CCAssignFn Fn) {
   // Determine which register each value should be copied into.
   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    EVT VT = Outs[i].VT;
+    MVT VT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Return operand #" << i << " has unhandled type "
-             << VT.getEVTString();
+             << EVT(VT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -116,12 +114,12 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   CCAssignFn Fn) {
   unsigned NumOps = Outs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    EVT ArgVT = Outs[i].VT;
+    MVT ArgVT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << ArgVT.getEVTString();
+             << EVT(ArgVT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -130,17 +128,17 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
 
 /// AnalyzeCallOperands - Same as above except it takes vectors of types
 /// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
                                   SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                                   CCAssignFn Fn) {
   unsigned NumOps = ArgVTs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    EVT ArgVT = ArgVTs[i];
+    MVT ArgVT = ArgVTs[i];
     ISD::ArgFlagsTy ArgFlags = Flags[i];
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << ArgVT.getEVTString();
+             << EVT(ArgVT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -152,12 +150,12 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
 void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                                 CCAssignFn Fn) {
   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-    EVT VT = Ins[i].VT;
+    MVT VT = Ins[i].VT;
     ISD::ArgFlagsTy Flags = Ins[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call result #" << i << " has unhandled type "
-             << VT.getEVTString();
+             << EVT(VT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -166,11 +164,11 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
 
 /// AnalyzeCallResult - Same as above except it's specialized for calls which
 /// produce a single value.
-void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
     dbgs() << "Call result has unhandled type "
-           << VT.getEVTString();
+           << EVT(VT).getEVTString();
 #endif
     llvm_unreachable(0);
   }
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..515e6f9fde87
--- /dev/null
+++ b/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,61 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeCalculateSpillWeightsPass(Registry);
+  initializeDeadMachineInstructionElimPass(Registry);
+  initializeGCModuleInfoPass(Registry);
+  initializeIfConverterPass(Registry);
+  initializeLiveDebugVariablesPass(Registry);
+  initializeLiveIntervalsPass(Registry);
+  initializeLiveStacksPass(Registry);
+  initializeLiveVariablesPass(Registry);
+  initializeMachineCSEPass(Registry);
+  initializeMachineDominatorTreePass(Registry);
+  initializeMachineLICMPass(Registry);
+  initializeMachineLoopInfoPass(Registry);
+  initializeMachineModuleInfoPass(Registry);
+  initializeMachineSinkingPass(Registry);
+  initializeMachineVerifierPassPass(Registry);
+  initializeOptimizePHIsPass(Registry);
+  initializePHIEliminationPass(Registry);
+  initializePeepholeOptimizerPass(Registry);
+  initializePreAllocSplittingPass(Registry);
+  initializeProcessImplicitDefsPass(Registry);
+  initializePEIPass(Registry);
+  initializeRALinScanPass(Registry);
+  initializeRegisterCoalescerAnalysisGroup(Registry);
+  initializeRenderMachineFunctionPass(Registry);
+  initializeSimpleRegisterCoalescingPass(Registry);
+  initializeSlotIndexesPass(Registry);
+  initializeLoopSplitterPass(Registry);
+  initializeStackProtectorPass(Registry);
+  initializeStackSlotColoringPass(Registry);
+  initializeStrongPHIEliminationPass(Registry);
+  initializeTwoAddressInstructionPassPass(Registry);
+  initializeUnreachableBlockElimPass(Registry);
+  initializeUnreachableMachineBlockElimPass(Registry);
+  initializeVirtRegMapPass(Registry);
+  initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+  initializeCodeGen(*unwrap(R));
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 335d2d8e9bac..f79598de1d9e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -130,21 +130,25 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
     return;
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
-  // Any register which was defined within the previous scheduling region
-  // may have been rescheduled and its lifetime may overlap with registers
-  // in ways not reflected in our current liveness state. For each such
-  // register, adjust the liveness state to be conservatively correct.
-  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
-    if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
-      assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
-
-      // Mark this register to be non-renamable.
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+    if (KillIndices[Reg] != ~0u) {
+      // If Reg is currently live, then mark that it can't be renamed as
+      // we don't know the extent of its live-range anymore (now that it
+      // has been scheduled).
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = Count;
+    } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+      // Any register which was defined within the previous scheduling region
+      // may have been rescheduled and its lifetime may overlap with registers
+      // in ways not reflected in our current liveness state. For each such
+      // register, adjust the liveness state to be conservatively correct.
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
 
       // Move the def index to the end of the previous region, to reflect
       // that the def could theoretically have been scheduled at the end.
       DefIndices[Reg] = InsertPosIndex;
     }
+  }
 
   PrescanInstruction(MI);
   ScanInstruction(MI, Count);
@@ -177,7 +181,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
   // that have special allocation requirements. Also assume all registers
   // used in a call must not be changed (ABI).
   // FIXME: The issue with predicated instruction is more complex. We are being
-  // conservatively here because the kill markers cannot be trusted after
+  // conservative here because the kill markers cannot be trusted after
   // if-conversion:
   // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
   // ...
@@ -321,8 +325,62 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   }
 }
 
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                                RegRefIter RegRefEnd,
+                                                unsigned NewReg)
+{
+  for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+    MachineOperand *RefOper = I->second;
+
+    // Don't allow the instruction defining AntiDepReg to earlyclobber its
+    // operands, in case they may be assigned to NewReg. In this case antidep
+    // breaking must fail, but it's too rare to bother optimizing.
+    if (RefOper->isDef() && RefOper->isEarlyClobber())
+      return true;
+
+    // Handle cases in which this instructions defines NewReg.
+    MachineInstr *MI = RefOper->getParent();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &CheckOper = MI->getOperand(i);
+
+      if (!CheckOper.isReg() || !CheckOper.isDef() ||
+          CheckOper.getReg() != NewReg)
+        continue;
+
+      // Don't allow the instruction to define NewReg and AntiDepReg.
+      // When AntiDepReg is renamed it will be an illegal op.
+      if (RefOper->isDef())
+        return true;
+
+      // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+      // NewReg
+      if (CheckOper.isEarlyClobber())
+        return true;
+
+      // Don't allow inline asm to define NewReg at all. Who know what it's
+      // doing with it.
+      if (MI->isInlineAsm())
+        return true;
+    }
+  }
+  return false;
+}
+
 unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                                 RegRefIter RegRefEnd,
                                                  unsigned AntiDepReg,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
@@ -338,10 +396,10 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
     // an anti-dependence with this AntiDepReg, because that would
     // re-introduce that anti-dependence.
     if (NewReg == LastNewReg) continue;
-    // If the instruction already has a def of the NewReg, it's not suitable.
-    // For example, Instruction with multiple definitions can result in this
-    // condition.
-    if (MI->modifiesRegister(NewReg, TRI)) continue;
+    // If any instructions that define AntiDepReg also define the NewReg, it's
+    // not suitable.  For example, Instruction with multiple definitions can
+    // result in this condition.
+    if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
     // If NewReg is dead and NewReg's most recent def is not before
     // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
     assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
@@ -548,7 +606,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
     if (AntiDepReg != 0) {
-      if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
+      std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                std::multimap<unsigned, MachineOperand *>::iterator>
+        Range = RegRefs.equal_range(AntiDepReg);
+      if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+                                                     AntiDepReg,
                                                      LastNewReg[AntiDepReg],
                                                      RC)) {
         DEBUG(dbgs() << "Breaking anti-dependence edge on "
@@ -558,9 +620,6 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
 
         // Update the references to the old register to refer to the new
         // register.
-        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
-                  std::multimap<unsigned, MachineOperand *>::iterator>
-           Range = RegRefs.equal_range(AntiDepReg);
         for (std::multimap<unsigned, MachineOperand *>::iterator
              Q = Range.first, QE = Range.second; Q != QE; ++Q) {
           Q->second->setReg(NewReg);
@@ -580,7 +639,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         }
 
         // We just went back in time and modified history; the
-        // liveness information for the anti-depenence reg is now
+        // liveness information for the anti-dependence reg is now
         // inconsistent. Set the state as if it were dead.
         Classes[NewReg] = Classes[AntiDepReg];
         DefIndices[NewReg] = DefIndices[AntiDepReg];
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 0ed7c35b0f0c..0daaef273448 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -48,8 +48,10 @@ class TargetRegisterInfo;
     /// pointer.
     std::vector<const TargetRegisterClass*> Classes;
 
-    /// RegRegs - Map registers to all their references within a live range.
+    /// RegRefs - Map registers to all their references within a live range.
     std::multimap<unsigned, MachineOperand *> RegRefs;
+    typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+      RegRefIter;
 
     /// KillIndices - The index of the most recent kill (proceding bottom-up),
     /// or ~0u if the register is not live.
@@ -90,10 +92,14 @@ class TargetRegisterInfo;
   private:
     void PrescanInstruction(MachineInstr *MI);
     void ScanInstruction(MachineInstr *MI, unsigned Count);
-    unsigned findSuitableFreeRegister(MachineInstr *MI,
+    bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                 RegRefIter RegRefEnd,
+                                 unsigned NewReg);
+    unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                      RegRefIter RegRefEnd,
                                       unsigned AntiDepReg,
                                       unsigned LastNewReg,
-                                      const TargetRegisterClass *);
+                                      const TargetRegisterClass *RC);
   };
 }
 
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 318d922adebf..fdc1d9142140 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
+    DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+     initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     bool isDead(const MachineInstr *MI) const;
@@ -45,13 +47,19 @@ namespace {
 char DeadMachineInstructionElim::ID = 0;
 
 INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
-                "Remove dead machine instructions", false, false);
+                "Remove dead machine instructions", false, false)
 
 FunctionPass *llvm::createDeadMachineInstructionElimPass() {
   return new DeadMachineInstructionElim();
 }
 
 bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI->isInlineAsm())
+    return false;
+
   // Don't delete instructions with side effects.
   bool SawStore = false;
   if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
@@ -151,7 +159,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &MO = MI->getOperand(i);
         if (MO.isReg() && MO.isDef()) {
           unsigned Reg = MO.getReg();
-          if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
             LivePhysRegs.reset(Reg);
             // Check the subreg set, not the alias set, because a def
             // of a super-register may still be partially live after
@@ -168,7 +176,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &MO = MI->getOperand(i);
         if (MO.isReg() && MO.isUse()) {
           unsigned Reg = MO.getReg();
-          if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
             LivePhysRegs.set(Reg);
             for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
                  *AliasSet; ++AliasSet)
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 550fd3e25fb7..0ebb5b0db70e 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -43,7 +43,7 @@ namespace {
     // The eh.selector intrinsic.
     Function *SelectorIntrinsic;
 
-    // _Unwind_Resume_or_Rethrow call.
+    // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
     Constant *URoR;
 
     // The EH language-specific catch-all type.
@@ -82,11 +82,11 @@ namespace {
     /// FindAllURoRInvokes - Find all URoR invokes in the function.
     void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
 
-    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow"
-    /// calls. The "unwind" part of these invokes jump to a landing pad within
-    /// the current function. This is a candidate to merge the selector
-    /// associated with the URoR invoke with the one from the URoR's landing
-    /// pad.
+    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+    /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+    /// a landing pad within the current function. This is a candidate to merge
+    /// the selector associated with the URoR invoke with the one from the
+    /// URoR's landing pad.
     bool HandleURoRInvokes();
 
     /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
@@ -100,7 +100,9 @@ namespace {
     DwarfEHPrepare(const TargetMachine *tm) :
       FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
       ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
-      URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
+      URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+        initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnFunction(Function &Fn);
 
@@ -224,10 +226,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
   return Changed;
 }
 
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
-/// "unwind" part of these invokes jump to a landing pad within the current
-/// function. This is a candidate to merge the selector associated with the URoR
-/// invoke with the one from the URoR's landing pad.
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
 bool DwarfEHPrepare::HandleURoRInvokes() {
   if (!EHCatchAllValue) {
     EHCatchAllValue =
@@ -247,7 +250,10 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
 
   if (!URoR) {
     URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
-    if (!URoR) return CleanupSelectors(CatchAllSels);
+    if (!URoR) {
+      URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume");
+      if (!URoR) return CleanupSelectors(CatchAllSels);
+    }
   }
 
   SmallPtrSet<InvokeInst*, 32> URoRInvokes;
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index fb884c9e8b71..e08feeb27539 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -23,7 +23,7 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class GlobalValue;
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index d14728d8a36c..0fd1e8e83bd7 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -45,6 +45,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -64,7 +65,7 @@ char ELFWriter::ID = 0;
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(ID), O(o), TM(tm),
-    OutContext(*new MCContext(*TM.getMCAsmInfo())),
+    OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
     TLOF(TM.getTargetLowering()->getObjFileLowering()),
     is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
     isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -327,6 +328,18 @@ void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
   }
 }
 
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+  // FIXME: this is wrong, a common symbol can be in .data for example.
+  if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+    return true;
+
+  return false;
+}
+
+
 // EmitGlobal - Choose the right section for global and emit it
 void ELFWriter::EmitGlobal(const GlobalValue *GV) {
 
@@ -363,7 +376,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) {
     unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
     GblSym->Size = Size;
 
-    if (S->HasCommonSymbols()) { // Symbol must go to a common section
+    if (HasCommonSymbols(*S)) { // Symbol must go to a common section
       GblSym->SectionIdx = ELF::SHN_COMMON;
 
       // A new linkonce section is created for each global in the
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..aed8bc947991
--- /dev/null
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,86 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+                cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+                /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  EC.clear();
+  EC.grow(2 * MF->size());
+
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+       ++I) {
+    const MachineBasicBlock &MBB = *I;
+    unsigned OutE = 2 * MBB.getNumber() + 1;
+    // Join the outgoing bundle with the ingoing bundles of all successors.
+    for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+           SE = MBB.succ_end(); SI != SE; ++SI)
+      EC.join(OutE, 2 * (*SI)->getNumber());
+  }
+  EC.compress();
+  if (ViewEdgeBundles)
+    view();
+  return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+  ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+                              bool ShortNames,
+                              const std::string &Title) {
+  const MachineFunction *MF = G.getMachineFunction();
+
+  O << "digraph {\n";
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    unsigned BB = I->getNumber();
+    O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+      << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+      << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+    for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+           SE = I->succ_end(); SI != SE; ++SI)
+      O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+        << "\" [ color=lightgray ]\n";
+  }
+  O << "}\n";
+  return O;
+}
+
+
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..b5ec303f5d93
--- /dev/null
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class ExpandISelPseudos : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+  private:
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const {
+      return "Expand ISel Pseudo-instructions";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+                "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+  return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+  // Iterate through each instruction in the function, looking for pseudos.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE; ) {
+      MachineInstr *MI = MBBI++;
+
+      // If MI is a pseudo, expand it.
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.usesCustomInsertionHook()) {
+        Changed = true;
+        MachineBasicBlock *NewMBB =
+          TLI->EmitInstrWithCustomInserter(MI, MBB);
+        // The expansion may involve new basic blocks.
+        if (NewMBB != MBB) {
+          MBB = NewMBB;
+          I = NewMBB;
+          MBBI = NewMBB->begin();
+          MBBE = NewMBB->end();
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 0f6e882a7be4..d757cf409d50 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -30,7 +30,6 @@ namespace {
     raw_ostream &OS;
     
   public:
-    Printer() : FunctionPass(ID), OS(errs()) {}
     explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
     
@@ -56,7 +55,7 @@ namespace {
 }
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
-                "Create Garbage Collector Module Metadata", false, false);
+                "Create Garbage Collector Module Metadata", false, false)
 
 // -----------------------------------------------------------------------------
 
@@ -70,7 +69,9 @@ GCFunctionInfo::~GCFunctionInfo() {}
 char GCModuleInfo::ID = 0;
 
 GCModuleInfo::GCModuleInfo()
-  : ImmutablePass(ID) {}
+    : ImmutablePass(ID) {
+  initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
 
 GCModuleInfo::~GCModuleInfo() {
   clear();
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 719fa194d8da..766c6ee542a9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -19,11 +19,12 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -123,6 +124,11 @@ GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
 
 // -----------------------------------------------------------------------------
 
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
 FunctionPass *llvm::createGCLoweringPass() {
   return new LowerIntrinsics();
 }
@@ -130,7 +136,9 @@ FunctionPass *llvm::createGCLoweringPass() {
 char LowerIntrinsics::ID = 0;
 
 LowerIntrinsics::LowerIntrinsics()
-  : FunctionPass(ID) {}
+  : FunctionPass(ID) {
+    initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+  }
 
 const char *LowerIntrinsics::getPassName() const {
   return "Lower Garbage Collection Instructions";
@@ -139,6 +147,7 @@ const char *LowerIntrinsics::getPassName() const {
 void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
   FunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<DominatorTree>();
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now.
@@ -249,9 +258,16 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
   if (NeedsDefaultLoweringPass(S))
     MadeChange |= PerformDefaultLowering(F, S);
   
-  if (NeedsCustomLoweringPass(S))
+  bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+  if (UseCustomLoweringPass)
     MadeChange |= S.performCustomLowering(F);
-  
+
+  // Custom lowering may modify the CFG, so dominators must be recomputed.
+  if (UseCustomLoweringPass) {
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+      DT->DT->recalculate(F);
+  }
+
   return MadeChange;
 }
 
@@ -345,13 +361,15 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
   MachineBasicBlock::iterator RAI = CI; 
   ++RAI;                                
   
-  if (FI->getStrategy().needsSafePoint(GC::PreCall))
-    FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
-                                              CI->getDebugLoc()));
+  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+  }
   
-  if (FI->getStrategy().needsSafePoint(GC::PostCall))
-    FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
-                                               CI->getDebugLoc()));
+  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+  }
 }
 
 void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
@@ -364,12 +382,12 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
 }
 
 void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
-  const TargetRegisterInfo *TRI = TM->getRegisterInfo();
-  assert(TRI && "TargetRegisterInfo not available!");
+  const TargetFrameLowering *TFI = TM->getFrameLowering();
+  assert(TFI && "TargetRegisterInfo not available!");
   
   for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
                                       RE = FI->roots_end(); RI != RE; ++RI)
-    RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+    RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
 }
 
 bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 0ea30d7a7929..db53b0473a9a 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -17,7 +17,9 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -26,6 +28,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
@@ -91,6 +94,8 @@ namespace {
     /// ClobbersPred    - True if BB could modify predicates (e.g. has
     ///                   cmp, call, etc.)
     /// NonPredSize     - Number of non-predicated instructions.
+    /// ExtraCost       - Extra cost for multi-cycle instructions.
+    /// ExtraCost2      - Some instructions are slower when predicated
     /// BB              - Corresponding MachineBasicBlock.
     /// TrueBB / FalseBB- See AnalyzeBranch().
     /// BrCond          - Conditions for end of block conditional branches.
@@ -106,6 +111,8 @@ namespace {
       bool CannotBeCopied  : 1;
       bool ClobbersPred    : 1;
       unsigned NonPredSize;
+      unsigned ExtraCost;
+      unsigned ExtraCost2;
       MachineBasicBlock *BB;
       MachineBasicBlock *TrueBB;
       MachineBasicBlock *FalseBB;
@@ -115,7 +122,7 @@ namespace {
                  IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
                  HasFallThrough(false), IsUnpredicable(false),
                  CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
-                 BB(0), TrueBB(0), FalseBB(0) {}
+                 ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
     };
 
     /// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -150,20 +157,31 @@ namespace {
     const TargetLowering *TLI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
+    const MachineLoopInfo *MLI;
     bool MadeChange;
     int FnNum;
   public:
     static char ID;
-    IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
+    IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+      initializeIfConverterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const { return "If Converter"; }
 
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
-    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                     float Prediction, float Confidence) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                       bool FalseBranch, unsigned &Dups) const;
+                       bool FalseBranch, unsigned &Dups,
+                       float Prediction, float Confidence) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
                       unsigned &Dups1, unsigned &Dups2) const;
     void ScanInstructions(BBInfo &BBI);
@@ -188,14 +206,21 @@ namespace {
                                bool IgnoreBr = false);
     void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const {
-      return Size > 0 && TII->isProfitableToIfCvt(BB, Size);
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+                            unsigned Cycle, unsigned Extra,
+                            float Prediction, float Confidence) const {
+      return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+                                                   Prediction, Confidence);
     }
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
-                            MachineBasicBlock &FBB, unsigned FSize) const {
-      return TSize > 0 && FSize > 0 &&
-        TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize);
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+                            unsigned TCycle, unsigned TExtra,
+                            MachineBasicBlock &FBB,
+                            unsigned FCycle, unsigned FExtra,
+                            float Prediction, float Confidence) const {
+      return TCycle > 0 && FCycle > 0 &&
+        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+                                 Prediction, Confidence);
     }
 
     // blockAlwaysFallThrough - Block ends without a terminator.
@@ -230,7 +255,9 @@ namespace {
   char IfConverter::ID = 0;
 }
 
-INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
 FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
 
@@ -238,6 +265,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TLI = MF.getTarget().getTargetLowering();
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  InstrItins = MF.getTarget().getInstrItineraryData();
   if (!TII) return false;
 
   // Tail merge tend to expose more if-conversion opportunities.
@@ -431,7 +460,8 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
 /// predecessor) forms a valid simple shape for ifcvt. It also returns the
 /// number of instructions that the ifcvt would need to duplicate if performed
 /// in Dups.
-bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                              float Prediction, float Confidence) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -441,7 +471,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
 
   if (TrueBBI.BB->pred_size() > 1) {
     if (TrueBBI.CannotBeCopied ||
-        !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize))
+        !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+                                        Prediction, Confidence))
       return false;
     Dups = TrueBBI.NonPredSize;
   }
@@ -456,7 +487,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
 /// returns the number of instructions that the ifcvt would need to duplicate
 /// if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                                bool FalseBranch, unsigned &Dups) const {
+                                bool FalseBranch, unsigned &Dups,
+                                float Prediction, float Confidence) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -478,7 +510,8 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
           ++Size;
       }
     }
-    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size))
+    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
+                                        Prediction, Confidence))
       return false;
     Dups = Size;
   }
@@ -493,18 +526,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
   return TExit && TExit == FalseBBI.BB;
 }
 
-static
-MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
-                                               const TargetInstrInfo *TII) {
-  MachineBasicBlock::iterator I = BB->end();
-  while (I != BB->begin()) {
-    --I;
-    if (!I->getDesc().isBranch())
-      break;
-  }
-  return I;
-}
-
 /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
 /// with their common predecessor) forms a valid diamond shape for ifcvt.
 bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -533,64 +554,70 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
       (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
     return false;
 
-  MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
-  MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+  // Count duplicate instructions at the beginning of the true and false blocks.
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
   MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
   MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
-  // Skip dbg_value instructions
-  while (TI != TIE && TI->isDebugValue())
-    ++TI;
-  while (FI != FIE && FI->isDebugValue())
-    ++FI;
-  while (TI != TIE && FI != FIE) {
+  while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
-    if (TI->isDebugValue()) {
-      while (TI != TIE && TI->isDebugValue())
-        ++TI;
-      if (TI == TIE)
+    if (TIB->isDebugValue()) {
+      while (TIB != TIE && TIB->isDebugValue())
+        ++TIB;
+      if (TIB == TIE)
         break;
     }
-    if (FI->isDebugValue()) {
-      while (FI != FIE && FI->isDebugValue())
-        ++FI;
-      if (FI == FIE)
+    if (FIB->isDebugValue()) {
+      while (FIB != FIE && FIB->isDebugValue())
+        ++FIB;
+      if (FIB == FIE)
         break;
     }
-    if (!TI->isIdenticalTo(FI))
+    if (!TIB->isIdenticalTo(FIB))
       break;
     ++Dups1;
-    ++TI;
-    ++FI;
+    ++TIB;
+    ++FIB;
   }
 
-  TI = firstNonBranchInst(TrueBBI.BB, TII);
-  FI = firstNonBranchInst(FalseBBI.BB, TII);
-  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
-  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
-  // Skip dbg_value instructions at end of the bb's.
-  while (TI != TIB && TI->isDebugValue())
-    --TI;
-  while (FI != FIB && FI->isDebugValue())
-    --FI;
-  while (TI != TIB && FI != FIB) {
+  // Now, in preparation for counting duplicate instructions at the ends of the
+  // blocks, move the end iterators up past any branch instructions.
+  while (TIE != TIB) {
+    --TIE;
+    if (!TIE->getDesc().isBranch())
+      break;
+  }
+  while (FIE != FIB) {
+    --FIE;
+    if (!FIE->getDesc().isBranch())
+      break;
+  }
+
+  // If Dups1 includes all of a block, then don't count duplicate
+  // instructions at the end of the blocks.
+  if (TIB == TIE || FIB == FIE)
+    return true;
+
+  // Count duplicate instructions at the ends of the blocks.
+  while (TIE != TIB && FIE != FIB) {
     // Skip dbg_value instructions. These do not count.
-    if (TI->isDebugValue()) {
-      while (TI != TIB && TI->isDebugValue())
-        --TI;
-      if (TI == TIB)
+    if (TIE->isDebugValue()) {
+      while (TIE != TIB && TIE->isDebugValue())
+        --TIE;
+      if (TIE == TIB)
         break;
     }
-    if (FI->isDebugValue()) {
-      while (FI != FIB && FI->isDebugValue())
-        --FI;
-      if (FI == FIB)
+    if (FIE->isDebugValue()) {
+      while (FIE != FIB && FIE->isDebugValue())
+        --FIE;
+      if (FIE == FIB)
         break;
     }
-    if (!TI->isIdenticalTo(FI))
+    if (!TIE->isIdenticalTo(FIE))
       break;
     ++Dups2;
-    --TI;
-    --FI;
+    --TIE;
+    --FIE;
   }
 
   return true;
@@ -627,6 +654,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
 
   // Then scan all the instructions.
   BBI.NonPredSize = 0;
+  BBI.ExtraCost = 0;
+  BBI.ExtraCost2 = 0;
   BBI.ClobbersPred = false;
   for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
        I != E; ++I) {
@@ -641,9 +670,15 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
 
     if (!isCondBr) {
-      if (!isPredicated)
+      if (!isPredicated) {
         BBI.NonPredSize++;
-      else if (!AlreadyPredicated) {
+        unsigned ExtraPredCost = 0;
+        unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+                                                  &ExtraPredCost);
+        if (NumCycles > 1)
+          BBI.ExtraCost += NumCycles-1;
+        BBI.ExtraCost2 += ExtraPredCost;
+      } else if (!AlreadyPredicated) {
         // FIXME: This instruction is already predicated before the
         // if-conversion pass. It's probably something like a conditional move.
         // Mark this block unpredicable for now.
@@ -765,9 +800,35 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   bool TNeedSub = TrueBBI.Predicate.size() > 0;
   bool FNeedSub = FalseBBI.Predicate.size() > 0;
   bool Enqueued = false;
+  
+  // Try to predict the branch, using loop info to guide us.
+  // General heuristics are:
+  //   - backedge -> 90% taken
+  //   - early exit -> 20% taken
+  //   - branch predictor confidence -> 90%
+  float Prediction = 0.5f;
+  float Confidence = 0.9f;
+  MachineLoop *Loop = MLI->getLoopFor(BB);
+  if (Loop) {
+    if (TrueBBI.BB == Loop->getHeader())
+      Prediction = 0.9f;
+    else if (FalseBBI.BB == Loop->getHeader())
+      Prediction = 0.1f;
+
+    MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+    MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+    if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+      Prediction = 0.2f;
+    else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+      Prediction = 0.8f;
+  }
+  
   if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
-                         *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+                         *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+                         Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
       FeasibilityAnalysis(FalseBBI, RevCond)) {
     // Diamond:
@@ -783,8 +844,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
     // Triangle:
     //   EBB
@@ -797,15 +859,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
     Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
     Enqueued = true;
   }
 
-  if (ValidSimple(TrueBBI, Dups) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+  if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
     // Simple (split, no rejoin):
     //   EBB
@@ -820,22 +884,30 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   if (CanRevCond) {
     // Try the other path...
-    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+                      1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+                      1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidSimple(FalseBBI, Dups) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+    if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond)) {
       Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
       Enqueued = true;
@@ -1365,6 +1437,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     MachineInstr *MI = MF.CloneMachineInstr(I);
     ToBBI.BB->insert(ToBBI.BB->end(), MI);
     ToBBI.NonPredSize++;
+    unsigned ExtraPredCost = 0;
+    unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+    if (NumCycles > 1)
+      ToBBI.ExtraCost += NumCycles-1;
+    ToBBI.ExtraCost2 += ExtraPredCost;
 
     if (!TII->isPredicated(I) && !MI->isDebugValue()) {
       if (!TII->PredicateInstruction(MI, Cond)) {
@@ -1438,7 +1515,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   FromBBI.Predicate.clear();
 
   ToBBI.NonPredSize += FromBBI.NonPredSize;
+  ToBBI.ExtraCost += FromBBI.ExtraCost;
+  ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
   FromBBI.NonPredSize = 0;
+  FromBBI.ExtraCost = 0;
+  FromBBI.ExtraCost2 = 0;
 
   ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
   ToBBI.HasFallThrough = FromBBI.HasFallThrough;
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index b965bfdcf3b8..a1bd972d38e2 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -12,28 +12,34 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "spiller"
+#define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "SplitKit.h"
+#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static cl::opt<bool>
+VerifySpills("verify-spills", cl::desc("Verify after each spill/split"));
+
 namespace {
 class InlineSpiller : public Spiller {
   MachineFunctionPass &pass_;
   MachineFunction &mf_;
   LiveIntervals &lis_;
-  MachineLoopInfo &loops_;
+  LiveStacks &lss_;
+  AliasAnalysis *aa_;
   VirtRegMap &vrm_;
   MachineFrameInfo &mfi_;
   MachineRegisterInfo &mri_;
@@ -41,19 +47,12 @@ class InlineSpiller : public Spiller {
   const TargetRegisterInfo &tri_;
   const BitVector reserved_;
 
-  SplitAnalysis splitAnalysis_;
-
   // Variables that are valid during spill(), but used by multiple methods.
-  LiveInterval *li_;
-  SmallVectorImpl<LiveInterval*> *newIntervals_;
+  LiveRangeEdit *edit_;
   const TargetRegisterClass *rc_;
   int stackSlot_;
-  const SmallVectorImpl<LiveInterval*> *spillIs_;
 
-  // Values of the current interval that can potentially remat.
-  SmallPtrSet<VNInfo*, 8> reMattable_;
-
-  // Values in reMattable_ that failed to remat at some point.
+  // Values that failed to remat at some point.
   SmallPtrSet<VNInfo*, 8> usedValues_;
 
   ~InlineSpiller() {}
@@ -65,30 +64,29 @@ public:
     : pass_(pass),
       mf_(mf),
       lis_(pass.getAnalysis<LiveIntervals>()),
-      loops_(pass.getAnalysis<MachineLoopInfo>()),
+      lss_(pass.getAnalysis<LiveStacks>()),
+      aa_(&pass.getAnalysis<AliasAnalysis>()),
       vrm_(vrm),
       mfi_(*mf.getFrameInfo()),
       mri_(mf.getRegInfo()),
       tii_(*mf.getTarget().getInstrInfo()),
       tri_(*mf.getTarget().getRegisterInfo()),
-      reserved_(tri_.getReservedRegs(mf_)),
-      splitAnalysis_(mf, lis_, loops_) {}
+      reserved_(tri_.getReservedRegs(mf_)) {}
 
   void spill(LiveInterval *li,
              SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &spillIs);
+             const SmallVectorImpl<LiveInterval*> &spillIs);
 
-private:
-  bool split();
+  void spill(LiveRangeEdit &);
 
-  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx);
+private:
   bool reMaterializeFor(MachineBasicBlock::iterator MI);
   void reMaterializeAll();
 
   bool coalesceStackAccess(MachineInstr *MI);
   bool foldMemoryOperand(MachineBasicBlock::iterator MI,
-                         const SmallVectorImpl<unsigned> &Ops);
+                         const SmallVectorImpl<unsigned> &Ops,
+                         MachineInstr *LoadMI = 0);
   void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
   void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
 };
@@ -98,106 +96,41 @@ namespace llvm {
 Spiller *createInlineSpiller(MachineFunctionPass &pass,
                              MachineFunction &mf,
                              VirtRegMap &vrm) {
+  if (VerifySpills)
+    mf.verify(&pass, "When creating inline spiller");
   return new InlineSpiller(pass, mf, vrm);
 }
 }
 
-/// split - try splitting the current interval into pieces that may allocate
-/// separately. Return true if successful.
-bool InlineSpiller::split() {
-  splitAnalysis_.analyze(li_);
-
-  if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
-    // We can split, but li_ may be left intact with fewer uses.
-    if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
-          .splitAroundLoop(loop))
-      return true;
-  }
-
-  // Try splitting into single block intervals.
-  SplitAnalysis::BlockPtrSet blocks;
-  if (splitAnalysis_.getMultiUseBlocks(blocks)) {
-    if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
-          .splitSingleBlocks(blocks))
-      return true;
-  }
-
-  // Try splitting inside a basic block.
-  if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) {
-    if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
-          .splitInsideBlock(MBB))
-      return true;
-  }
-
-  // We may have been able to split out some uses, but the original interval is
-  // intact, and it should still be spilled.
-  return false;
-}
-
-/// allUsesAvailableAt - Return true if all registers used by OrigMI at
-/// OrigIdx are also available with the same value at UseIdx.
-bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
-                                       SlotIndex OrigIdx,
-                                       SlotIndex UseIdx) {
-  OrigIdx = OrigIdx.getUseIndex();
-  UseIdx = UseIdx.getUseIndex();
-  for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = OrigMI->getOperand(i);
-    if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
-      continue;
-    // Reserved registers are OK.
-    if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
-      continue;
-    // We don't want to move any defs.
-    if (MO.isDef())
-      return false;
-    // We cannot depend on virtual registers in spillIs_. They will be spilled.
-    for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
-      if ((*spillIs_)[si]->reg == MO.getReg())
-        return false;
-
-    LiveInterval &LI = lis_.getInterval(MO.getReg());
-    const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
-    if (!OVNI)
-      continue;
-    if (OVNI != LI.getVNInfoAt(UseIdx))
-      return false;
-  }
-  return true;
-}
-
-/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
+/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of
 /// reloading it.
 bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
   SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
-  VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
+  VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
+
   if (!OrigVNI) {
     DEBUG(dbgs() << "\tadding <undef> flags: ");
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+      if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg())
         MO.setIsUndef();
     }
     DEBUG(dbgs() << UseIdx << '\t' << *MI);
     return true;
   }
-  if (!reMattable_.count(OrigVNI)) {
-    DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
-                 << UseIdx << '\t' << *MI);
-    return false;
-  }
-  MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
-  if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
+
+  LiveRangeEdit::Remat RM(OrigVNI);
+  if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) {
     usedValues_.insert(OrigVNI);
     DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
     return false;
   }
 
-  // If the instruction also writes li_->reg, it had better not require the same
-  // register for uses and defs.
+  // If the instruction also writes edit_->getReg(), it had better not require
+  // the same register for uses and defs.
   bool Reads, Writes;
   SmallVector<unsigned, 8> Ops;
-  tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+  tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops);
   if (Writes) {
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(Ops[i]);
@@ -209,62 +142,57 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
     }
   }
 
+  // Before rematerializing into a register for a single instruction, try to
+  // fold a load into the instruction. That avoids allocating a new register.
+  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+      foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+    edit_->markRematerialized(RM.ParentVNI);
+    return true;
+  }
+
   // Alocate a new register for the remat.
-  unsigned NewVReg = mri_.createVirtualRegister(rc_);
-  vrm_.grow();
-  LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+  LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_);
   NewLI.markNotSpillable();
-  newIntervals_->push_back(&NewLI);
+
+  // Rematting for a copy: Set allocation hint to be the destination register.
+  if (MI->isCopy())
+    mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg());
 
   // Finally we can rematerialize OrigMI before MI.
-  MachineBasicBlock &MBB = *MI->getParent();
-  tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
-  MachineBasicBlock::iterator RematMI = MI;
-  SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
-  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t' << *RematMI);
+  SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+                                            lis_, tii_, tri_);
+  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
+               << *lis_.getInstructionFromIndex(DefIdx));
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(Ops[i]);
-    if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
-      MO.setReg(NewVReg);
+    if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) {
+      MO.setReg(NewLI.reg);
       MO.setIsKill();
     }
   }
   DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
 
-  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
-                                       lis_.getVNInfoAllocator());
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator());
   NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
   DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   return true;
 }
 
-/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
 /// and trim the live ranges after.
 void InlineSpiller::reMaterializeAll() {
   // Do a quick scan of the interval values to find if any are remattable.
-  reMattable_.clear();
-  usedValues_.clear();
-  for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
-       E = li_->vni_end(); I != E; ++I) {
-    VNInfo *VNI = *I;
-    if (VNI->isUnused() || !VNI->isDefAccurate())
-      continue;
-    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
-    if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
-      continue;
-    reMattable_.insert(VNI);
-  }
-
-  // Often, no defs are remattable.
-  if (reMattable_.empty())
+  if (!edit_->anyRematerializable(lis_, tii_, aa_))
     return;
 
-  // Try to remat before all uses of li_->reg.
+  usedValues_.clear();
+
+  // Try to remat before all uses of edit_->getReg().
   bool anyRemat = false;
   for (MachineRegisterInfo::use_nodbg_iterator
-       RI = mri_.use_nodbg_begin(li_->reg);
+       RI = mri_.use_nodbg_begin(edit_->getReg());
        MachineInstr *MI = RI.skipInstruction();)
      anyRemat |= reMaterializeFor(MI);
 
@@ -273,33 +201,35 @@ void InlineSpiller::reMaterializeAll() {
 
   // Remove any values that were completely rematted.
   bool anyRemoved = false;
-  for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
-       E = reMattable_.end(); I != E; ++I) {
+  for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(),
+       E = edit_->getParent().vni_end(); I != E; ++I) {
     VNInfo *VNI = *I;
-    if (VNI->hasPHIKill() || usedValues_.count(VNI))
+    if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) ||
+        usedValues_.count(VNI))
       continue;
     MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
     DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
     lis_.RemoveMachineInstrFromMaps(DefMI);
     vrm_.RemoveMachineInstrFromMaps(DefMI);
     DefMI->eraseFromParent();
-    VNI->setIsDefAccurate(false);
+    VNI->def = SlotIndex();
     anyRemoved = true;
   }
 
   if (!anyRemoved)
     return;
 
-  // Removing values may cause debug uses where li_ is not live.
-  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
+  // Removing values may cause debug uses where parent is not live.
+  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg());
        MachineInstr *MI = RI.skipInstruction();) {
     if (!MI->isDebugValue())
       continue;
-    // Try to preserve the debug value if li_ is live immediately after it.
+    // Try to preserve the debug value if parent is live immediately after it.
     MachineBasicBlock::iterator NextMI = MI;
     ++NextMI;
     if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
-      VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
+      SlotIndex Idx = lis_.getInstructionIndex(NextMI);
+      VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
       if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
         continue;
     }
@@ -317,7 +247,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
     return false;
 
   // We have a stack access. Is it the right register and slot?
-  if (reg != li_->reg || FI != stackSlot_)
+  if (reg != edit_->getReg() || FI != stackSlot_)
     return false;
 
   DEBUG(dbgs() << "Coalescing stack access: " << *MI);
@@ -327,9 +257,13 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
 }
 
 /// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// Return true on success, and MI will be erased.
+/// @param MI     Instruction using or defining the current register.
+/// @param Ops    Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return       True on success, and MI will be erased.
 bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                      const SmallVectorImpl<unsigned> &Ops) {
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) {
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
   SmallVector<unsigned, 8> FoldOps;
@@ -341,16 +275,22 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
     // FIXME: Teach targets to deal with subregs.
     if (MO.getSubReg())
       return false;
+    // We cannot fold a load instruction into a def.
+    if (LoadMI && MO.isDef())
+      return false;
     // Tied use operands should not be passed to foldMemoryOperand.
     if (!MI->isRegTiedToDefOperand(Idx))
       FoldOps.push_back(Idx);
   }
 
-  MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+  MachineInstr *FoldMI =
+                LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI)
+                       : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
   if (!FoldMI)
     return false;
   lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
-  vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+  if (!LoadMI)
+    vrm_.addSpillSlotUse(stackSlot_, FoldMI);
   MI->eraseFromParent();
   DEBUG(dbgs() << "\tfolded: " << *FoldMI);
   return true;
@@ -366,7 +306,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
   SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
   vrm_.addSpillSlotUse(stackSlot_, MI);
   DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
-  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
+  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
                                        lis_.getVNInfoAllocator());
   NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
 }
@@ -375,44 +315,58 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
 void InlineSpiller::insertSpill(LiveInterval &NewLI,
                                 MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
+
+  // Get the defined value. It could be an early clobber so keep the def index.
   SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+  VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
+  assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
+  Idx = VNI->def;
+
   tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
   --MI; // Point to store instruction.
   SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
   vrm_.addSpillSlotUse(stackSlot_, MI);
   DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
-  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
-                                        lis_.getVNInfoAllocator());
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator());
   NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
 }
 
 void InlineSpiller::spill(LiveInterval *li,
                           SmallVectorImpl<LiveInterval*> &newIntervals,
-                          SmallVectorImpl<LiveInterval*> &spillIs) {
-  DEBUG(dbgs() << "Inline spilling " << *li << "\n");
-  assert(li->isSpillable() && "Attempting to spill already spilled value.");
-  assert(!li->isStackSlot() && "Trying to spill a stack slot.");
-
-  li_ = li;
-  newIntervals_ = &newIntervals;
-  rc_ = mri_.getRegClass(li->reg);
-  spillIs_ = &spillIs;
+                          const SmallVectorImpl<LiveInterval*> &spillIs) {
+  LiveRangeEdit edit(*li, newIntervals, spillIs);
+  spill(edit);
+  if (VerifySpills)
+    mf_.verify(&pass_, "After inline spill");
+}
 
-  if (split())
-    return;
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+  edit_ = &edit;
+  assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+         && "Trying to spill a stack slot.");
+  DEBUG(dbgs() << "Inline spilling "
+               << mri_.getRegClass(edit.getReg())->getName()
+               << ':' << edit.getParent() << "\n");
+  assert(edit.getParent().isSpillable() &&
+         "Attempting to spill already spilled value.");
 
   reMaterializeAll();
 
   // Remat may handle everything.
-  if (li_->empty())
+  if (edit_->getParent().empty())
     return;
 
-  stackSlot_ = vrm_.getStackSlot(li->reg);
-  if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
-    stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+  rc_ = mri_.getRegClass(edit.getReg());
+  stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg());
+
+  // Update LiveStacks now that we are committed to spilling.
+  LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
+  assert(stacklvr.empty() && "Just created stack slot not empty");
+  stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
+  stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
 
   // Iterate over instructions using register.
-  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg());
        MachineInstr *MI = RI.skipInstruction();) {
 
     // Debug values are not allowed to affect codegen.
@@ -440,7 +394,7 @@ void InlineSpiller::spill(LiveInterval *li,
     // Analyze instruction.
     bool Reads, Writes;
     SmallVector<unsigned, 8> Ops;
-    tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+    tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops);
 
     // Attempt to fold memory ops.
     if (foldMemoryOperand(MI, Ops))
@@ -448,9 +402,7 @@ void InlineSpiller::spill(LiveInterval *li,
 
     // Allocate interval around instruction.
     // FIXME: Infer regclass from instruction alone.
-    unsigned NewVReg = mri_.createVirtualRegister(rc_);
-    vrm_.grow();
-    LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+    LiveInterval &NewLI = edit.create(mri_, lis_, vrm_);
     NewLI.markNotSpillable();
 
     if (Reads)
@@ -460,7 +412,7 @@ void InlineSpiller::spill(LiveInterval *li,
     bool hasLiveDef = false;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(Ops[i]);
-      MO.setReg(NewVReg);
+      MO.setReg(NewLI.reg);
       if (MO.isUse()) {
         if (!MI->isRegTiedToDefOperand(Ops[i]))
           MO.setIsKill();
@@ -475,6 +427,5 @@ void InlineSpiller::spill(LiveInterval *li,
       insertSpill(NewLI, MI);
 
     DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
-    newIntervals.push_back(&NewLI);
   }
 }
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3852ebaf6425..3861ddadf655 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -85,9 +85,11 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 }
 
 // VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
 #endif
 
 void IntrinsicLowering::AddPrototypes(Module &M) {
@@ -536,3 +538,27 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
          "Lowering should have eliminated any uses of the intrinsic call!");
   CI->eraseFromParent();
 }
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+  // Verify this is a simple bswap.
+  if (CI->getNumArgOperands() != 1 ||
+      CI->getType() != CI->getArgOperand(0)->getType() ||
+      !CI->getType()->isIntegerTy())
+    return false;
+
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty)
+    return false;
+
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+  Value *Op = CI->getArgOperand(0);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 36038027b259..80dfc763af69 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -20,9 +20,11 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
@@ -30,6 +32,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/StandardPasses.h"
 using namespace llvm;
 
 namespace llvm {
@@ -140,13 +143,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
-    if (ShowMCEncoding)
+    TargetAsmBackend *TAB = 0;
+    if (ShowMCEncoding) {
       MCE = getTarget().createCodeEmitter(*this, *Context);
-
-    AsmStreamer.reset(createAsmStreamer(*Context, Out,
-                                        getTargetData()->isLittleEndian(),
-                                        getVerboseAsm(), InstPrinter,
-                                        MCE, ShowMCInst));
+      TAB = getTarget().createAsmBackend(TargetTriple);
+    }
+
+    MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+                                                  getVerboseAsm(),
+                                                  hasMCUseLoc(),
+                                                  InstPrinter,
+                                                  MCE, TAB,
+                                                  ShowMCInst);
+    AsmStreamer.reset(S);
     break;
   }
   case CGFT_ObjectFile: {
@@ -159,7 +168,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
     AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
                                                        *TAB, Out, MCE,
-                                                       hasMCRelaxAll()));
+                                                       hasMCRelaxAll(),
+                                                       hasMCNoExecStack()));
+    AsmStreamer.get()->InitSections();
     break;
   }
   case CGFT_Null:
@@ -241,7 +252,7 @@ static void printAndVerify(PassManagerBase &PM,
     PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 
   if (VerifyMachineCode)
-    PM.add(createMachineVerifierPass());
+    PM.add(createMachineVerifierPass(Banner));
 }
 
 /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
@@ -253,6 +264,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                                MCContext *&OutContext) {
   // Standard LLVM-Level Passes.
 
+  // Basic AliasAnalysis support.
+  createStandardAliasAnalysisPasses(&PM);
+
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
   if (!DisableVerify)
@@ -288,7 +302,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
     // FALLTHROUGH
-  case ExceptionHandling::Dwarf:
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::DwarfTable:
     PM.add(createDwarfEHPass(this));
     break;
   case ExceptionHandling::None:
@@ -320,7 +335,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Install a MachineModuleInfo class, which is an immutable pass that holds
   // all the per-module stuff we're generating, including MCContext.
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+  TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
   PM.add(MMI);
   OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
 
@@ -339,6 +355,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Print the instruction selected machine code...
   printAndVerify(PM, "After Instruction Selection");
 
+  // Expand pseudo-instructions emitted by ISel.
+  PM.add(createExpandISelPseudosPass());
+
   // Optimize PHIs before DCE: removing dead PHI cycles may make more
   // instructions dead.
   if (OptLevel != CodeGenOpt::None)
@@ -356,13 +375,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     PM.add(createDeadMachineInstructionElimPass());
     printAndVerify(PM, "After codegen DCE pass");
 
-    PM.add(createPeepholeOptimizerPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
     PM.add(createMachineCSEPass());
     if (!DisableMachineSink)
       PM.add(createMachineSinkingPass());
     printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+    PM.add(createPeepholeOptimizerPass());
+    printAndVerify(PM, "After codegen peephole optimization pass");
   }
 
   // Pre-ra tail duplication.
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index b9527fafbee8..0eb009ddac29 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -16,6 +16,7 @@
 #define DEBUG_TYPE "scheduler"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
@@ -35,14 +36,14 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
   unsigned RHSLatency = PQ->getLatency(RHSNum);
   if (LHSLatency < RHSLatency) return true;
   if (LHSLatency > RHSLatency) return false;
-  
+
   // After that, if two nodes have identical latencies, look to see if one will
   // unblock more other nodes than the other.
   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
   if (LHSBlocked < RHSBlocked) return true;
   if (LHSBlocked > RHSBlocked) return false;
-  
+
   // Finally, just to provide a stable ordering, use the node number as a
   // deciding factor.
   return LHSNum < RHSNum;
@@ -64,7 +65,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
       OnlyAvailablePred = &Pred;
     }
   }
-      
+
   return OnlyAvailablePred;
 }
 
@@ -78,7 +79,7 @@ void LatencyPriorityQueue::push(SUnit *SU) {
       ++NumNodesBlocking;
   }
   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
-  
+
   Queue.push_back(SU);
 }
 
@@ -102,10 +103,10 @@ void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
 /// node of the same priority that will not make a node available.
 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
   if (SU->isAvailable) return;  // All preds scheduled.
-  
+
   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
   if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
-  
+
   // Okay, we found a single predecessor that is available, but not scheduled.
   // Since it is available, it must be in the priority queue.  First remove it.
   remove(OnlyAvailablePred);
@@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
     std::swap(*I, Queue.back());
   Queue.pop_back();
 }
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+  LatencyPriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..853ec1ac7c13
--- /dev/null
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,711 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+          cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequiredTransitive<LiveIntervals>();
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class UserValue {
+  const MDNode *variable; ///< The debug info variable we are part of.
+  unsigned offset;        ///< Byte offset into variable.
+  DebugLoc dl;            ///< The debug location for the variable. This is
+                          ///< used by dwarf writer to find lexical scope.
+  UserValue *leader;      ///< Equivalence class leader.
+  UserValue *next;        ///< Next value in equivalence class, or null.
+
+  /// Numbered locations referenced by locmap.
+  SmallVector<MachineOperand, 4> locations;
+
+  /// Map of slot indices where this value is live.
+  LocMap locInts;
+
+  /// coalesceLocation - After LocNo was changed, check if it has become
+  /// identical to another location, and coalesce them. This may cause LocNo or
+  /// a later location to be erased, but no earlier location will be erased.
+  void coalesceLocation(unsigned LocNo);
+
+  /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+  void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+                        LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+  /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx.
+  void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+                       LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+  /// UserValue - Create a new UserValue.
+  UserValue(const MDNode *var, unsigned o, DebugLoc L, 
+            LocMap::Allocator &alloc)
+    : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+  {}
+
+  /// getLeader - Get the leader of this value's equivalence class.
+  UserValue *getLeader() {
+    UserValue *l = leader;
+    while (l != l->leader)
+      l = l->leader;
+    return leader = l;
+  }
+
+  /// getNext - Return the next UserValue in the equivalence class.
+  UserValue *getNext() const { return next; }
+
+  /// match - Does this UserValue match the aprameters?
+  bool match(const MDNode *Var, unsigned Offset) const {
+    return Var == variable && Offset == offset;
+  }
+
+  /// merge - Merge equivalence classes.
+  static UserValue *merge(UserValue *L1, UserValue *L2) {
+    L2 = L2->getLeader();
+    if (!L1)
+      return L2;
+    L1 = L1->getLeader();
+    if (L1 == L2)
+      return L1;
+    // Splice L2 before L1's members.
+    UserValue *End = L2;
+    while (End->next)
+      End->leader = L1, End = End->next;
+    End->leader = L1;
+    End->next = L1->next;
+    L1->next = L2;
+    return L1;
+  }
+
+  /// getLocationNo - Return the location number that matches Loc.
+  unsigned getLocationNo(const MachineOperand &LocMO) {
+    if (LocMO.isReg() && LocMO.getReg() == 0)
+      return ~0u;
+    for (unsigned i = 0, e = locations.size(); i != e; ++i)
+      if (LocMO.isIdenticalTo(locations[i]))
+        return i;
+    locations.push_back(LocMO);
+    // We are storing a MachineOperand outside a MachineInstr.
+    locations.back().clearParent();
+    return locations.size() - 1;
+  }
+
+  /// addDef - Add a definition point to this value.
+  void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+    // Add a singular (Idx,Idx) -> Loc mapping.
+    LocMap::iterator I = locInts.find(Idx);
+    if (!I.valid() || I.start() != Idx)
+      I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+  }
+
+  /// extendDef - Extend the current definition as far as possible down the
+  /// dominator tree. Stop when meeting an existing def or when leaving the live
+  /// range of VNI.
+  /// @param Idx   Starting point for the definition.
+  /// @param LocNo Location number to propagate.
+  /// @param LI    Restrict liveness to where LI has the value VNI. May be null.
+  /// @param VNI   When LI is not null, this is the value to restrict to.
+  /// @param LIS   Live intervals analysis.
+  /// @param MDT   Dominator tree.
+  void extendDef(SlotIndex Idx, unsigned LocNo,
+                 LiveInterval *LI, const VNInfo *VNI,
+                 LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// computeIntervals - Compute the live intervals of all locations after
+  /// collecting all their def points.
+  void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+                      const TargetRegisterInfo *TRI);
+
+  /// rewriteLocations - Rewrite virtual register locations according to the
+  /// provided virtual register map.
+  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM,
+                       LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+  /// variable may have more than one corresponding DBG_VALUE instructions. 
+  /// Only first one needs DebugLoc to identify variable's lexical scope
+  /// in source file.
+  DebugLoc findDebugLoc();
+  void print(raw_ostream&, const TargetRegisterInfo*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+  LiveDebugVariables &pass;
+  LocMap::Allocator allocator;
+  MachineFunction *MF;
+  LiveIntervals *LIS;
+  MachineDominatorTree *MDT;
+  const TargetRegisterInfo *TRI;
+
+  /// userValues - All allocated UserValue instances.
+  SmallVector<UserValue*, 8> userValues;
+
+  /// Map virtual register to eq class leader.
+  typedef DenseMap<unsigned, UserValue*> VRMap;
+  VRMap virtRegToEqClass;
+
+  /// Map user variable to eq class leader.
+  typedef DenseMap<const MDNode *, UserValue*> UVMap;
+  UVMap userVarMap;
+
+  /// getUserValue - Find or create a UserValue.
+  UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+  /// lookupVirtReg - Find the EC leader for VirtReg or null.
+  UserValue *lookupVirtReg(unsigned VirtReg);
+
+  /// mapVirtReg - Map virtual register to an equivalence class.
+  void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+  /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+  /// @param MI  DBG_VALUE instruction
+  /// @param Idx Last valid SLotIndex before instruction.
+  /// @return    True if the DBG_VALUE instruction should be deleted.
+  bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+  /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+  /// a UserValue def for each instruction.
+  /// @param mf MachineFunction to be scanned.
+  /// @return True if any debug values were found.
+  bool collectDebugValues(MachineFunction &mf);
+
+  /// computeIntervals - Compute the live intervals of all user values after
+  /// collecting all their def points.
+  void computeIntervals();
+
+public:
+  LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+  bool runOnMachineFunction(MachineFunction &mf);
+
+  /// clear - Relase all memory.
+  void clear() {
+    DeleteContainerPointers(userValues);
+    userValues.clear();
+    virtRegToEqClass.clear();
+    userVarMap.clear();
+  }
+
+  /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+  if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+    OS << "!\"" << MDS->getString() << "\"\t";
+  if (offset)
+    OS << '+' << offset;
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+    OS << " [" << I.start() << ';' << I.stop() << "):";
+    if (I.value() == ~0u)
+      OS << "undef";
+    else
+      OS << I.value();
+  }
+  for (unsigned i = 0, e = locations.size(); i != e; ++i)
+    OS << " Loc" << i << '=' << locations[i];
+  OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+  OS << "********** DEBUG VARIABLES **********\n";
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->print(OS, TRI);
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+  unsigned KeepLoc = 0;
+  for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+    if (KeepLoc == LocNo)
+      continue;
+    if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+      break;
+  }
+  // No matches.
+  if (KeepLoc == locations.size())
+    return;
+
+  // Keep the smaller location, erase the larger one.
+  unsigned EraseLoc = LocNo;
+  if (KeepLoc > EraseLoc)
+    std::swap(KeepLoc, EraseLoc);
+  locations.erase(locations.begin() + EraseLoc);
+
+  // Rewrite values.
+  for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+    unsigned v = I.value();
+    if (v == EraseLoc)
+      I.setValue(KeepLoc);      // Coalesce when possible.
+    else if (v > EraseLoc)
+      I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+  }
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+                                 DebugLoc DL) {
+  UserValue *&Leader = userVarMap[Var];
+  if (Leader) {
+    UserValue *UV = Leader->getLeader();
+    Leader = UV;
+    for (; UV; UV = UV->getNext())
+      if (UV->match(Var, Offset))
+        return UV;
+  }
+
+  UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+  userValues.push_back(UV);
+  Leader = UserValue::merge(Leader, UV);
+  return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+  UserValue *&Leader = virtRegToEqClass[VirtReg];
+  Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+  if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+    return UV->getLeader();
+  return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+  // DBG_VALUE loc, offset, variable
+  if (MI->getNumOperands() != 3 ||
+      !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+    DEBUG(dbgs() << "Can't handle " << *MI);
+    return false;
+  }
+
+  // Get or create the UserValue for (variable,offset).
+  unsigned Offset = MI->getOperand(1).getImm();
+  const MDNode *Var = MI->getOperand(2).getMetadata();
+  UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+
+  // If the location is a virtual register, make sure it is mapped.
+  if (MI->getOperand(0).isReg()) {
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      mapVirtReg(Reg, UV);
+  }
+
+  UV->addDef(Idx, MI->getOperand(0));
+  return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+  bool Changed = false;
+  for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE;) {
+      if (!MBBI->isDebugValue()) {
+        ++MBBI;
+        continue;
+      }
+      // DBG_VALUE has no slot index, use the previous instruction instead.
+      SlotIndex Idx = MBBI == MBB->begin() ?
+        LIS->getMBBStartIdx(MBB) :
+        LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+      // Handle consecutive DBG_VALUE instructions with the same slot index.
+      do {
+        if (handleDebugValue(MBBI, Idx)) {
+          MBBI = MBB->erase(MBBI);
+          Changed = true;
+        } else
+          ++MBBI;
+      } while (MBBI != MBBE && MBBI->isDebugValue());
+    }
+  }
+  return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+                          LiveInterval *LI, const VNInfo *VNI,
+                          LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<SlotIndex, 16> Todo;
+  Todo.push_back(Idx);
+
+  do {
+    SlotIndex Start = Todo.pop_back_val();
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+    LocMap::iterator I = locInts.find(Start);
+
+    // Limit to VNI's live range.
+    bool ToEnd = true;
+    if (LI && VNI) {
+      LiveRange *Range = LI->getLiveRangeContaining(Start);
+      if (!Range || Range->valno != VNI)
+        continue;
+      if (Range->end < Stop)
+        Stop = Range->end, ToEnd = false;
+    }
+
+    // There could already be a short def at Start.
+    if (I.valid() && I.start() <= Start) {
+      // Stop when meeting a different location or an already extended interval.
+      Start = Start.getNextSlot();
+      if (I.value() != LocNo || I.stop() != Start)
+        continue;
+      // This is a one-slot placeholder. Just skip it.
+      ++I;
+    }
+
+    // Limited by the next def.
+    if (I.valid() && I.start() < Stop)
+      Stop = I.start(), ToEnd = false;
+
+    if (Start >= Stop)
+      continue;
+
+    I.insert(Start, Stop, LocNo);
+
+    // If we extended to the MBB end, propagate down the dominator tree.
+    if (!ToEnd)
+      continue;
+    const std::vector<MachineDomTreeNode*> &Children =
+      MDT.getNode(MBB)->getChildren();
+    for (unsigned i = 0, e = Children.size(); i != e; ++i)
+      Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+  } while (!Todo.empty());
+}
+
+void
+UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+  // Collect all defs to be extended (Skipping undefs).
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+    if (I.value() != ~0u)
+      Defs.push_back(std::make_pair(I.start(), I.value()));
+
+  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+    SlotIndex Idx = Defs[i].first;
+    unsigned LocNo = Defs[i].second;
+    const MachineOperand &Loc = locations[LocNo];
+
+    // Register locations are constrained to where the register value is live.
+    if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+      LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+      const VNInfo *VNI = LI->getVNInfoAt(Idx);
+      extendDef(Idx, LocNo, LI, VNI, LIS, MDT);
+    } else
+      extendDef(Idx, LocNo, 0, 0, LIS, MDT);
+  }
+
+  // Finally, erase all the undefs.
+  for (LocMap::iterator I = locInts.begin(); I.valid();)
+    if (I.value() == ~0u)
+      I.erase();
+    else
+      ++I;
+}
+
+void LDVImpl::computeIntervals() {
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->computeIntervals(*LIS, *MDT);
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  LIS = &pass.getAnalysis<LiveIntervals>();
+  MDT = &pass.getAnalysis<MachineDominatorTree>();
+  TRI = mf.getTarget().getRegisterInfo();
+  clear();
+  DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+               << ((Value*)mf.getFunction())->getName()
+               << " **********\n");
+
+  bool Changed = collectDebugValues(mf);
+  computeIntervals();
+  DEBUG(print(dbgs()));
+  return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+  if (!EnableLDV)
+    return false;
+  if (!pImpl)
+    pImpl = new LDVImpl(this);
+  return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+  if (pImpl)
+    delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+               const TargetRegisterInfo *TRI) {
+  for (unsigned i = locations.size(); i; --i) {
+    unsigned LocNo = i - 1;
+    MachineOperand &Loc = locations[LocNo];
+    if (!Loc.isReg() || Loc.getReg() != OldReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      Loc.substPhysReg(NewReg, *TRI);
+    else
+      Loc.substVirtReg(NewReg, SubIdx, *TRI);
+    coalesceLocation(LocNo);
+  }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  UserValue *UV = lookupVirtReg(OldReg);
+  if (!UV)
+    return;
+
+  if (TargetRegisterInfo::isVirtualRegister(NewReg))
+    mapVirtReg(NewReg, UV);
+  virtRegToEqClass.erase(OldReg);
+
+  do {
+    UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+    UV = UV->getNext();
+  } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+  // Iterate over locations in reverse makes it easier to handle coalescing.
+  for (unsigned i = locations.size(); i ; --i) {
+    unsigned LocNo = i-1;
+    MachineOperand &Loc = locations[LocNo];
+    // Only virtual registers are rewritten.
+    if (!Loc.isReg() || !Loc.getReg() ||
+        !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+      continue;
+    unsigned VirtReg = Loc.getReg();
+    if (VRM.isAssignedReg(VirtReg) &&
+        TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+      Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+               VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+      // FIXME: Translate SubIdx to a stackslot offset.
+      Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+    } else {
+      Loc.setReg(0);
+      Loc.setSubReg(0);
+    }
+    coalesceLocation(LocNo);
+  }
+  DEBUG(print(dbgs(), &TRI));
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+                   LiveIntervals &LIS) {
+  SlotIndex Start = LIS.getMBBStartIdx(MBB);
+  Idx = Idx.getBaseIndex();
+
+  // Try to find an insert location by going backwards from Idx.
+  MachineInstr *MI;
+  while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+    // We've reached the beginning of MBB.
+    if (Idx == Start) {
+      MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+      return I;
+    }
+    Idx = Idx.getPrevIndex();
+  }
+
+  // Don't insert anything after the first terminator, though.
+  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+                                    llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+  DebugLoc D = dl;
+  dl = DebugLoc();
+  return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+                                 unsigned LocNo,
+                                 LiveIntervals &LIS,
+                                 const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  MachineOperand &Loc = locations[LocNo];
+
+  // Frame index locations may require a target callback.
+  if (Loc.isFI()) {
+    MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+                                          Loc.getIndex(), offset, variable, 
+                                                    findDebugLoc());
+    if (MI) {
+      MBB->insert(I, MI);
+      return;
+    }
+  }
+  // This is not a frame index, or the target is happy with a standard FI.
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+                               LiveIntervals &LIS, const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0)
+    .addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+                                const TargetInstrInfo &TII) {
+  MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+  for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+    SlotIndex Start = I.start();
+    SlotIndex Stop = I.stop();
+    unsigned LocNo = I.value();
+    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+    MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+    DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+    insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+    // This interval may span multiple basic blocks.
+    // Insert a DBG_VALUE into each one.
+    while(Stop > MBBEnd) {
+      // Move to the next block.
+      Start = MBBEnd;
+      if (++MBB == MFEnd)
+        break;
+      MBBEnd = LIS.getMBBEndIdx(MBB);
+      DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+      insertDebugValue(MBB, Start, LocNo, LIS, TII);
+    }
+    DEBUG(dbgs() << '\n');
+    if (MBB == MFEnd)
+      break;
+
+    ++I;
+    if (Stop == MBBEnd)
+      continue;
+    // The current interval ends before MBB.
+    // Insert a kill if there is a gap.
+    if (!I.valid() || I.start() > Stop)
+      insertDebugKill(MBB, Stop, LIS, TII);
+  }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+  DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    userValues[i]->rewriteLocations(*VRM, *TRI);
+    userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+  }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..a6e40a198456
--- /dev/null
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,63 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+  void *pImpl;
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  LiveDebugVariables();
+  ~LiveDebugVariables();
+
+  /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+  /// @param OldReg Old virtual register that is going away.
+  /// @param NewReg New register holding the user variables.
+  /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+  ///               register.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+  /// that happened during register allocation.
+  /// @param VRM Rename virtual registers according to map.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  /// dump - Print data structures to dbgs().
+  void dump();
+
+private:
+
+  virtual bool runOnMachineFunction(MachineFunction &);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 59f380ad2641..c2dbd6ab75a1 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -30,58 +30,19 @@
 #include <algorithm>
 using namespace llvm;
 
-// An example for liveAt():
-//
-// this = [1,4), liveAt(0) will return false. The instruction defining this
-// spans slots [0,3]. The interval belongs to an spilled definition of the
-// variable it represents. This is because slot 1 is used (def slot) and spans
-// up to slot 3 (store slot).
-//
-bool LiveInterval::liveAt(SlotIndex I) const {
-  Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  return r->contains(I);
-}
-
-// liveBeforeAndAt - Check if the interval is live at the index and the index
-// just before it. If index is liveAt, check if it starts a new live range.
-// If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
-  Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  if (!r->contains(I))
-    return false;
-  if (I != r->start)
-    return true;
-  // I is the start of a live range. Check if the previous live range ends
-  // at I-1.
-  if (r == ranges.begin())
-    return false;
-  return r->end == I;
+// CompEnd - Compare LiveRange ends.
+namespace {
+struct CompEnd {
+  bool operator()(const LiveRange &A, const LiveRange &B) const {
+    return A.end < B.end;
+  }
+};
 }
 
-/// killedAt - Return true if a live range ends at index. Note that the kill
-/// point is not contained in the half-open live range. It is usually the
-/// getDefIndex() slot following its last use.
-bool LiveInterval::killedAt(SlotIndex I) const {
-  Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
-
-  // Now r points to the first interval with start >= I, or ranges.end().
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  // Now r points to the last interval with end <= I.
-  // r->end is the kill point.
-  return r->end == I;
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+  assert(Pos.isValid() && "Cannot search for an invalid index");
+  return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0),
+                          CompEnd());
 }
 
 /// killedInRange - Return true if the interval has kills in [Start,End).
@@ -330,25 +291,14 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
   return ranges.insert(it, LR);
 }
 
-/// isInOneLiveRange - Return true if the range specified is entirely in
-/// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
-  Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
-  if (I == ranges.begin())
-    return false;
-  --I;
-  return I->containsRange(Start, End);
-}
-
 
 /// removeRange - Remove the specified range from this interval.  Note that
 /// the range must be in a single LiveRange in its entirety.
 void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
                                bool RemoveDeadValNo) {
   // Find the LiveRange containing this span.
-  Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
-  assert(I != ranges.begin() && "Range is not in interval!");
-  --I;
+  Ranges::iterator I = find(Start);
+  assert(I != ranges.end() && "Range is not in interval!");
   assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
 
   // If the span we are removing is at the start of the LiveRange, adjust it.
@@ -405,32 +355,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
   markValNoForDeletion(ValNo);
 }
 
-/// getLiveRangeContaining - Return the live range that contains the
-/// specified index, or null if there is none.
-LiveInterval::const_iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
-  const_iterator It = std::upper_bound(begin(), end(), Idx);
-  if (It != ranges.begin()) {
-    --It;
-    if (It->contains(Idx))
-      return It;
-  }
-
-  return end();
-}
-
-LiveInterval::iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
-  iterator It = std::upper_bound(begin(), end(), Idx);
-  if (It != begin()) {
-    --It;
-    if (It->contains(Idx))
-      return It;
-  }
-
-  return end();
-}
-
 /// findDefinedVNInfo - Find the VNInfo defined by the specified
 /// index (register interval).
 VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
@@ -443,17 +367,6 @@ VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
   return 0;
 }
 
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
-  for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
-       i != e; ++i) {
-    if ((*i)->getReg() == reg)
-      return *i;
-  }
-  return 0;
-}
-
 /// join - Join two live intervals (this, and other) together.  This applies
 /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
 /// the intervals are not joinable, this aborts.
@@ -616,103 +529,6 @@ void LiveInterval::MergeValueInAsValue(
 }
 
 
-/// MergeInClobberRanges - For any live ranges that are not defined in the
-/// current interval, but are defined in the Clobbers interval, mark them
-/// used with an unknown definition value.
-void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
-                                        const LiveInterval &Clobbers,
-                                        VNInfo::Allocator &VNInfoAllocator) {
-  if (Clobbers.empty()) return;
-
-  DenseMap<VNInfo*, VNInfo*> ValNoMaps;
-  VNInfo *UnusedValNo = 0;
-  iterator IP = begin();
-  for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
-    // For every val# in the Clobbers interval, create a new "unknown" val#.
-    VNInfo *ClobberValNo = 0;
-    DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
-    if (VI != ValNoMaps.end())
-      ClobberValNo = VI->second;
-    else if (UnusedValNo)
-      ClobberValNo = UnusedValNo;
-    else {
-      UnusedValNo = ClobberValNo =
-        getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-      ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
-    }
-
-    bool Done = false;
-    SlotIndex Start = I->start, End = I->end;
-    // If a clobber range starts before an existing range and ends after
-    // it, the clobber range will need to be split into multiple ranges.
-    // Loop until the entire clobber range is handled.
-    while (!Done) {
-      Done = true;
-      IP = std::upper_bound(IP, end(), Start);
-      SlotIndex SubRangeStart = Start;
-      SlotIndex SubRangeEnd = End;
-
-      // If the start of this range overlaps with an existing liverange, trim it.
-      if (IP != begin() && IP[-1].end > SubRangeStart) {
-        SubRangeStart = IP[-1].end;
-        // Trimmed away the whole range?
-        if (SubRangeStart >= SubRangeEnd) continue;
-      }
-      // If the end of this range overlaps with an existing liverange, trim it.
-      if (IP != end() && SubRangeEnd > IP->start) {
-        // If the clobber live range extends beyond the existing live range,
-        // it'll need at least another live range, so set the flag to keep
-        // iterating.
-        if (SubRangeEnd > IP->end) {
-          Start = IP->end;
-          Done = false;
-        }
-        SubRangeEnd = IP->start;
-        // If this trimmed away the whole range, ignore it.
-        if (SubRangeStart == SubRangeEnd) continue;
-      }
-
-      // Insert the clobber interval.
-      IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
-                        IP);
-      UnusedValNo = 0;
-    }
-  }
-
-  if (UnusedValNo) {
-    // Delete the last unused val#.
-    valnos.pop_back();
-  }
-}
-
-void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
-                                       SlotIndex Start,
-                                       SlotIndex End,
-                                       VNInfo::Allocator &VNInfoAllocator) {
-  // Find a value # to use for the clobber ranges.  If there is already a value#
-  // for unknown values, use it.
-  VNInfo *ClobberValNo =
-    getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
-  iterator IP = begin();
-  IP = std::upper_bound(IP, end(), Start);
-
-  // If the start of this range overlaps with an existing liverange, trim it.
-  if (IP != begin() && IP[-1].end > Start) {
-    Start = IP[-1].end;
-    // Trimmed away the whole range?
-    if (Start >= End) return;
-  }
-  // If the end of this range overlaps with an existing liverange, trim it.
-  if (IP != end() && End > IP->start) {
-    End = IP->start;
-    // If this trimmed away the whole range, ignore it.
-    if (Start == End) return;
-  }
-
-  // Insert the clobber interval.
-  addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
-}
 
 /// MergeValueNumberInto - This method is called when two value nubmers
 /// are found to be equivalent.  This eliminates V1, replacing all
@@ -767,6 +583,9 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
     }
   }
 
+  // Merge the relevant flags.
+  V2->mergeFlags(V1);
+
   // Now that V1 is dead, remove it.
   markValNoForDeletion(V1);
 
@@ -831,14 +650,9 @@ void LiveRange::dump() const {
 }
 
 void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
-  if (isStackSlot())
-    OS << "SS#" << getStackSlotIndex();
-  else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
-    OS << TRI->getName(reg);
-  else
-    OS << "%reg" << reg;
-
-  OS << ',' << weight;
+  OS << PrintReg(reg, TRI);
+  if (weight != 0)
+    OS << ',' << weight;
 
   if (empty())
     OS << " EMPTY";
@@ -863,10 +677,9 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
       if (vni->isUnused()) {
         OS << "x";
       } else {
-        if (!vni->isDefAccurate() && !vni->isPHIDef())
-          OS << "?";
-        else
-          OS << vni->def;
+        OS << vni->def;
+        if (vni->isPHIDef())
+          OS << "-phidef";
         if (vni->hasPHIKill())
           OS << "-phikill";
         if (vni->hasRedefByEC())
@@ -884,3 +697,84 @@ void LiveInterval::dump() const {
 void LiveRange::print(raw_ostream &os) const {
   os << *this;
 }
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+  // Create initial equivalence classes.
+  eqClass_.clear();
+  eqClass_.grow(LI->getNumValNums());
+
+  const VNInfo *used = 0, *unused = 0;
+
+  // Determine connections.
+  for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+       I != E; ++I) {
+    const VNInfo *VNI = *I;
+    // Group all unused values into one class.
+    if (VNI->isUnused()) {
+      if (unused)
+        eqClass_.join(unused->id, VNI->id);
+      unused = VNI;
+      continue;
+    }
+    used = VNI;
+    if (VNI->isPHIDef()) {
+      const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def);
+      assert(MBB && "Phi-def has no defining MBB");
+      // Connect to values live out of predecessors.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI)
+        if (const VNInfo *PVNI =
+              LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot()))
+          eqClass_.join(VNI->id, PVNI->id);
+    } else {
+      // Normal value defined by an instruction. Check for two-addr redef.
+      // FIXME: This could be coincidental. Should we really check for a tied
+      // operand constraint?
+      // Note that VNI->def may be a use slot for an early clobber def.
+      if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+        eqClass_.join(VNI->id, UVNI->id);
+    }
+  }
+
+  // Lump all the unused values in with the last used value.
+  if (used && unused)
+    eqClass_.join(used->id, unused->id);
+
+  eqClass_.compress();
+  return eqClass_.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
+  assert(LIV[0] && "LIV[0] must be set");
+  LiveInterval &LI = *LIV[0];
+
+  // First move runs to new intervals.
+  LiveInterval::iterator J = LI.begin(), E = LI.end();
+  while (J != E && eqClass_[J->valno->id] == 0)
+    ++J;
+  for (LiveInterval::iterator I = J; I != E; ++I) {
+    if (unsigned eq = eqClass_[I->valno->id]) {
+      assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+             "New intervals should be empty");
+      LIV[eq]->ranges.push_back(*I);
+    } else
+      *J++ = *I;
+  }
+  LI.ranges.erase(J, E);
+
+  // Transfer VNInfos to their new owners and renumber them.
+  unsigned j = 0, e = LI.getNumValNums();
+  while (j != e && eqClass_[j] == 0)
+    ++j;
+  for (unsigned i = j; i != e; ++i) {
+    VNInfo *VNI = LI.getValNumInfo(i);
+    if (unsigned eq = eqClass_[i]) {
+      VNI->id = LIV[eq]->getNumValNums();
+      LIV[eq]->valnos.push_back(VNI);
+    } else {
+      VNI->id = j;
+      LI.valnos[j++] = VNI;
+    }
+  }
+  LI.valnos.resize(j);
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2726fc337539..aef5b5f77e78 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,6 +20,7 @@
 #include "VirtRegMap.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -55,8 +56,17 @@ STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
 STATISTIC(numSplits    , "Number of intervals split");
 
 char LiveIntervals::ID = 0;
-INITIALIZE_PASS(LiveIntervals, "liveintervals",
-                "Live Interval Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
 
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
@@ -132,19 +142,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
 
 void LiveIntervals::printInstrs(raw_ostream &OS) const {
   OS << "********** MACHINEINSTRS **********\n";
-
-  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
-       mbbi != mbbe; ++mbbi) {
-    OS << "BB#" << mbbi->getNumber()
-       << ":\t\t# derived from " << mbbi->getName() << "\n";
-    for (MachineBasicBlock::iterator mii = mbbi->begin(),
-           mie = mbbi->end(); mii != mie; ++mii) {
-      if (mii->isDebugValue())
-        OS << "    \t" << *mii;
-      else
-        OS << getInstructionIndex(mii) << '\t' << *mii;
-    }
-  }
+  mf_->print(OS, indexes_);
 }
 
 void LiveIntervals::dumpInstrs() const {
@@ -248,15 +246,6 @@ bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
   return false;
 }
 
-#ifndef NDEBUG
-static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
-  if (TargetRegisterInfo::isPhysicalRegister(reg))
-    dbgs() << tri_->getName(reg);
-  else
-    dbgs() << "%reg" << reg;
-}
-#endif
-
 static
 bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
   unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -285,8 +274,8 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
   SlotIndex RedefIndex = MIIdx.getDefIndex();
   const LiveRange *OldLR =
     interval.getLiveRangeContaining(RedefIndex.getUseIndex());
-  if (OldLR->valno->isDefAccurate()) {
-    MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+  MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+  if (DefMI != 0) {
     return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
   }
   return false;
@@ -298,10 +287,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              MachineOperand& MO,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
-  DEBUG({
-      dbgs() << "\t\tregister: ";
-      printRegName(interval.reg, tri_);
-    });
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
 
   // Virtual registers may be defined multiple times (due to phi
   // elimination and 2-addr elimination).  Much of what we do only has to be
@@ -326,8 +312,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       CopyMI = mi;
     }
 
-    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
-                                          VNInfoAllocator);
+    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
     // Loop over all of the blocks that the vreg is defined in.  There are
@@ -393,8 +378,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // Create interval with one of a NEW value number.  Note that this value
       // number isn't actually defined by an instruction, weird huh? :)
       if (PHIJoin) {
-        ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
-                                      VNInfoAllocator);
+        assert(getInstructionFromIndex(Start) == 0 &&
+               "PHI def index points at actual instruction.");
+        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
         ValNo->setIsPHIDef(true);
       }
       LiveRange LR(Start, killIdx, ValNo);
@@ -440,10 +426,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
 
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
-      VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
-                                            false, // update at *
-                                            VNInfoAllocator);
-      ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+      VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
 
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
@@ -481,7 +464,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       MachineInstr *CopyMI = NULL;
       if (mi->isCopyLike())
         CopyMI = mi;
-      ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
 
       SlotIndex killIndex = getMBBEndIdx(mbb);
       LiveRange LR(defIndex, killIndex, ValNo);
@@ -504,10 +487,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineInstr *CopyMI) {
   // A physical register cannot be live across basic block, so its
   // lifetime must end somewhere in its defining basic block.
-  DEBUG({
-      dbgs() << "\t\tregister: ";
-      printRegName(interval.reg, tri_);
-    });
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
 
   SlotIndex baseIndex = MIIdx;
   SlotIndex start = baseIndex.getDefIndex();
@@ -573,11 +553,11 @@ exit:
   assert(start < end && "did not find end of interval?");
 
   // Already exists? Extend old live interval.
-  LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
-  bool Extend = OldLR != interval.end();
-  VNInfo *ValNo = Extend
-    ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
-  if (MO.isEarlyClobber() && Extend)
+  VNInfo *ValNo = interval.getVNInfoAt(start);
+  bool Extend = ValNo != 0;
+  if (!Extend)
+    ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+  if (Extend && MO.isEarlyClobber())
     ValNo->setHasRedefByEC(true);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
@@ -611,10 +591,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                                          SlotIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
-  DEBUG({
-      dbgs() << "\t\tlivein register: ";
-      printRegName(interval.reg, tri_);
-    });
+  DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
 
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
@@ -672,9 +649,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
     }
   }
 
+  SlotIndex defIdx = getMBBStartIdx(MBB);
+  assert(getInstructionFromIndex(defIdx) == 0 &&
+         "PHI def index points at actual instruction.");
   VNInfo *vni =
-    interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
-                          0, false, VNInfoAllocator);
+    interval.getNextValue(defIdx, 0, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
 
@@ -764,10 +743,177 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
   return NewLI;
 }
 
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+void LiveIntervals::shrinkToUses(LiveInterval *li) {
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+  assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+         && "Can't only shrink physical registers");
+  // Find all the values used, including PHI kills.
+  SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+  // Visit all instructions reading li->reg.
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+      continue;
+    SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+    VNInfo *VNI = li->getVNInfoAt(Idx);
+    assert(VNI && "Live interval not live into reading instruction");
+    if (VNI->def == Idx) {
+      // Special case: An early-clobber tied operand reads and writes the
+      // register one slot early.
+      Idx = Idx.getPrevSlot();
+      VNI = li->getVNInfoAt(Idx);
+      assert(VNI && "Early-clobber tied value not available");
+    }
+    WorkList.push_back(std::make_pair(Idx, VNI));
+  }
+
+  // Create a new live interval with only minimal live segments per def.
+  LiveInterval NewLI(li->reg, 0);
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+  }
+
+  // Extend intervals to reach all uses in WorkList.
+  while (!WorkList.empty()) {
+    SlotIndex Idx = WorkList.back().first;
+    VNInfo *VNI = WorkList.back().second;
+    WorkList.pop_back();
+
+    // Extend the live range for VNI to be live at Idx.
+    LiveInterval::iterator I = NewLI.find(Idx);
+
+    // Already got it?
+    if (I != NewLI.end() && I->start <= Idx) {
+      assert(I->valno == VNI && "Unexpected existing value number");
+      continue;
+    }
+
+    // Is there already a live range in the block containing Idx?
+    const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+    SlotIndex BlockStart = getMBBStartIdx(MBB);
+    DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx
+                 << " in BB#" << MBB->getNumber() << '@' << BlockStart);
+    if (I != NewLI.begin() && (--I)->end > BlockStart) {
+      assert(I->valno == VNI && "Wrong reaching def");
+      DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n");
+      // Is this the first use of a PHIDef in its defining block?
+      if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) {
+        // The PHI is live, make sure the predecessors are live-out.
+        for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+          SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+          VNInfo *PVNI = li->getVNInfoAt(Stop);
+          // A predecessor is not required to have a live-out value for a PHI.
+          if (PVNI) {
+            assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
+            WorkList.push_back(std::make_pair(Stop, PVNI));
+          }
+        }
+      }
+
+      // Extend the live range in the block to include Idx.
+      NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI));
+      continue;
+    }
+
+    // VNI is live-in to MBB.
+    DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+    NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+    // Make sure VNI is live-out from the predecessors.
+    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+      assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+      WorkList.push_back(std::make_pair(Stop, VNI));
+    }
+  }
+
+  // Handle dead values.
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+    assert(LII != NewLI.end() && "Missing live range for PHI");
+    if (LII->end != VNI->def.getNextSlot())
+      continue;
+    if (!VNI->isPHIDef()) {
+      // This is a dead PHI. Remove it.
+      VNI->setIsUnused(true);
+      NewLI.removeRange(*LII);
+    } else {
+      // This is a dead def. Make sure the instruction knows.
+      MachineInstr *MI = getInstructionFromIndex(VNI->def);
+      assert(MI && "No instruction defining live value");
+      MI->addRegisterDead(li->reg, tri_);
+    }
+  }
+
+  // Move the trimmed ranges back.
+  li->ranges.swap(NewLI.ranges);
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+}
+
+
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.
 //
 
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+                                 MachineBasicBlock *mbb) const {
+  const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+  // If li is not live into a landing pad, we can insert spill code before the
+  // first terminator.
+  if (!lpad || !isLiveInToMBB(li, lpad))
+    return mbb->getFirstTerminator();
+
+  // When there is a landing pad, spill code must go before the call instruction
+  // that can throw.
+  MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+  while (I != B) {
+    --I;
+    if (I->getDesc().isCall())
+      return I;
+  }
+  // The block contains no calls that can throw, so use the first terminator.
+  return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    unsigned Reg = I->first;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (mri_->reg_nodbg_empty(Reg))
+      continue;
+    LiveInterval *LI = I->second;
+
+    // Every instruction that kills Reg corresponds to a live range end point.
+    for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+         ++RI) {
+      // A LOAD index indicates an MBB edge.
+      if (RI->end.isLoad())
+        continue;
+      MachineInstr *MI = getInstructionFromIndex(RI->end);
+      if (!MI)
+        continue;
+      MI->addRegisterKilled(Reg, NULL);
+    }
+  }
+}
+
 /// getReMatImplicitUse - If the remat definition MI has one (for now, we only
 /// allow one) virtual register operand, then its uses are implicitly using
 /// the register. Returns the virtual register.
@@ -800,18 +946,17 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
 /// which reaches the given instruction also reaches the specified use index.
 bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
                                        SlotIndex UseIdx) const {
-  SlotIndex Index = getInstructionIndex(MI);
-  VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
-  LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
-  return UI != li.end() && UI->valno == ValNo;
+  VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+  return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
 }
 
 /// isReMaterializable - Returns true if the definition MI of the specified
 /// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                       const VNInfo *ValNo, MachineInstr *MI,
-                                       SmallVectorImpl<LiveInterval*> &SpillIs,
-                                       bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const VNInfo *ValNo, MachineInstr *MI,
+                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  bool &isLoad) {
   if (DisableReMat)
     return false;
 
@@ -829,7 +974,7 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
          ri != re; ++ri) {
       MachineInstr *UseMI = &*ri;
       SlotIndex UseIdx = getInstructionIndex(UseMI);
-      if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+      if (li.getVNInfoAt(UseIdx) != ValNo)
         continue;
       if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
         return false;
@@ -855,9 +1000,10 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
 
 /// isReMaterializable - Returns true if every definition of MI of every
 /// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                       SmallVectorImpl<LiveInterval*> &SpillIs,
-                                       bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  bool &isLoad) {
   isLoad = false;
   for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
        i != e; ++i) {
@@ -865,9 +1011,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
     if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    if (!VNI->isDefAccurate())
-      return false;
     MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+    if (!ReMatDefMI)
+      return false;
     bool DefIsLoad = false;
     if (!ReMatDefMI ||
         !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1010,7 +1156,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (!vrm.isReMaterialized(Reg))
       continue;
@@ -1044,7 +1190,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     if (!mop.isReg())
       continue;
     unsigned Reg = mop.getReg();
-    if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (Reg != li.reg)
       continue;
@@ -1140,11 +1286,14 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       rewriteImplicitOps(li, MI, NewVReg, vrm);
 
     // Reuse NewVReg for other reads.
+    bool HasEarlyClobber = false;
     for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
       MachineOperand &mopj = MI->getOperand(Ops[j]);
       mopj.setReg(NewVReg);
       if (mopj.isImplicit())
         rewriteImplicitOps(li, MI, NewVReg, vrm);
+      if (mopj.isEarlyClobber())
+        HasEarlyClobber = true;
     }
 
     if (CreatedNewVReg) {
@@ -1190,7 +1339,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     if (HasUse) {
       if (CreatedNewVReg) {
         LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
-                     nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+                     nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
         DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
       } else {
@@ -1203,8 +1352,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       }
     }
     if (HasDef) {
-      LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
-                   nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+      // An early clobber starts at the use slot, except for an early clobber
+      // tied to a use operand (yes, that is a thing).
+      LiveRange LR(HasEarlyClobber && !HasUse ?
+                   index.getUseIndex() : index.getDefIndex(),
+                   index.getStoreIndex(),
+                   nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
       DEBUG(dbgs() << " +" << LR);
       nI.addRange(LR);
     }
@@ -1554,15 +1707,15 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
   return (isDef + isUse) * lc;
 }
 
-void
-LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
   for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
-    normalizeSpillWeight(*NewLIs[i]);
+    NewLIs[i]->weight =
+      normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
 }
 
 std::vector<LiveInterval*> LiveIntervals::
 addIntervalsForSpills(const LiveInterval &li,
-                      SmallVectorImpl<LiveInterval*> &SpillIs,
+                      const SmallVectorImpl<LiveInterval*> &SpillIs,
                       const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
   assert(li.isSpillable() && "attempt to spill already spilled interval!");
 
@@ -1653,8 +1806,7 @@ addIntervalsForSpills(const LiveInterval &li,
     if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    MachineInstr *ReMatDefMI = VNI->isDefAccurate()
-      ? getInstructionFromIndex(VNI->def) : 0;
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
     bool dummy;
     if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
       // Remember how to remat the def of this val#.
@@ -1926,6 +2078,9 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
                                             unsigned PhysReg, VirtRegMap &vrm) {
   unsigned SpillReg = getRepresentativeReg(PhysReg);
 
+  DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+               << " represented by " << tri_->getName(SpillReg) << '\n');
+
   for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
     // If there are registers which alias PhysReg, but which are not a
     // sub-register of the chosen representative super register. Assert
@@ -1937,15 +2092,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
   SmallVector<unsigned, 4> PRegs;
   if (hasInterval(SpillReg))
     PRegs.push_back(SpillReg);
-  else {
-    SmallSet<unsigned, 4> Added;
-    for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS)
-      if (Added.insert(*AS) && hasInterval(*AS)) {
-        PRegs.push_back(*AS);
-        for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS)
-          Added.insert(*ASS);
-      }
-  }
+  for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+    if (hasInterval(*SR))
+      PRegs.push_back(*SR);
+
+  DEBUG({
+    dbgs() << "Trying to spill:";
+    for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+      dbgs() << ' ' << tri_->getName(PRegs[i]);
+    dbgs() << '\n';
+  });
 
   SmallPtrSet<MachineInstr*, 8> SeenMIs;
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
@@ -1956,18 +2112,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
       continue;
     SeenMIs.insert(MI);
     SlotIndex Index = getInstructionIndex(MI);
+    bool LiveReg = false;
     for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
       unsigned PReg = PRegs[i];
       LiveInterval &pli = getInterval(PReg);
       if (!pli.liveAt(Index))
         continue;
-      vrm.addEmergencySpill(PReg, MI);
+      LiveReg = true;
       SlotIndex StartIdx = Index.getLoadIndex();
       SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
-      if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
-        pli.removeRange(StartIdx, EndIdx);
-        Cut = true;
-      } else {
+      if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
         std::string msg;
         raw_string_ostream Msg(msg);
         Msg << "Ran out of registers during register allocation!";
@@ -1978,15 +2132,14 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
         }
         report_fatal_error(Msg.str());
       }
-      for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
-        if (!hasInterval(*AS))
-          continue;
-        LiveInterval &spli = getInterval(*AS);
-        if (spli.liveAt(Index))
-          spli.removeRange(Index.getLoadIndex(),
-                           Index.getNextIndex().getBaseIndex());
-      }
+      pli.removeRange(StartIdx, EndIdx);
+      LiveReg = true;
     }
+    if (!LiveReg)
+      continue;
+    DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+    vrm.addEmergencySpill(SpillReg, MI);
+    Cut = true;
   }
   return Cut;
 }
@@ -1996,7 +2149,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
-    startInst, true, getVNInfoAllocator());
+    startInst, getVNInfoAllocator());
   VN->setHasPHIKill(true);
   LiveRange LR(
      SlotIndex(getInstructionIndex(startInst).getDefIndex()),
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..205f28a0d65a
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,315 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Insert each of the virtual register's live segments into the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+    if (++RegPos == RegEnd)
+      return;
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Remove each of the virtual register's live segments from the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+    SegPos.erase();
+    if (!SegPos.valid())
+      return;
+
+    // Skip all segments that may have been coalesced.
+    RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+    if (RegPos == RegEnd)
+      return;
+
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+  OS << "LIU " << PrintReg(RepReg, TRI);
+  if (empty()) {
+    OS << " empty\n";
+    return;
+  }
+  for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+    OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+       << PrintReg(SI.value()->reg, TRI);
+  }
+  OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+                                          const TargetRegisterInfo *TRI) const {
+  OS << '[' << start() << ';' << stop() << "):"
+     << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+                                     const TargetRegisterInfo *TRI) {
+  OS << "Interferences with ";
+  LiveUnion->print(OS, TRI);
+  InterferenceResult IR = firstInterference();
+  while (isInterference(IR)) {
+    OS << "  ";
+    IR.print(OS, TRI);
+    OS << '\n';
+    nextInterference(IR);
+  }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+  for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+    VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments.  This may
+// be ok since most virtual registers have very few segments.  If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+  // Search until reaching the end of the LiveUnion segments.
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  if (IR.VirtRegI == VirtRegEnd)
+    return;
+  while (IR.LiveUnionI.valid()) {
+    // Slowly advance the live virtual reg iterator until we surpass the next
+    // segment in LiveUnion.
+    //
+    // Note: If this is ever used for coalescing of fixed registers and we have
+    // a live vreg with thousands of segments, then change this code to use
+    // upperBound instead.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break; // Retain current (nonoverlapping) LiveUnionI
+
+    // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+    // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+    if (!IR.LiveUnionI.valid())
+      break;
+    if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+      assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+             "upperBound postcondition");
+      break;
+    }
+  }
+  if (!IR.LiveUnionI.valid())
+    IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+  if (CheckedFirstInterference)
+    return FirstInterference;
+  CheckedFirstInterference = true;
+  InterferenceResult &IR = FirstInterference;
+
+  // Quickly skip interference check for empty sets.
+  if (VirtReg->empty() || LiveUnion->empty()) {
+    IR.VirtRegI = VirtReg->end();
+  } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+    // VirtReg starts first, perform double binary search.
+    IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+    if (IR.VirtRegI != VirtReg->end())
+      IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+  } else {
+    // LiveUnion starts first, perform double binary search.
+    IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+    if (IR.LiveUnionI.valid())
+      IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+    else
+      IR.VirtRegI = VirtReg->end();
+  }
+  findIntersection(FirstInterference);
+  assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+         && "Uninitialized iterator");
+  return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+  assert(isInterference(IR) && "iteration past end of interferences");
+
+  // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+  // unique overlapping pairs.
+  if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+    if (++IR.VirtRegI == VirtReg->end())
+      return false;
+  }
+  else {
+    if (!(++IR.LiveUnionI).valid()) {
+      IR.VirtRegI = VirtReg->end();
+      return false;
+    }
+  }
+  // Short-circuit findIntersection() if possible.
+  if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+    return true;
+
+  // Find the next intersection.
+  findIntersection(IR);
+  return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+  SmallVectorImpl<LiveInterval*>::const_iterator I =
+    std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+  return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+  InterferenceResult IR = firstInterference();
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  LiveInterval *RecentInterferingVReg = NULL;
+  if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+    // Advance the union's iterator to reach an unseen interfering vreg.
+    do {
+      if (IR.LiveUnionI.value() == RecentInterferingVReg)
+        continue;
+
+      if (!isSeenInterference(IR.LiveUnionI.value()))
+        break;
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+
+    } while ((++IR.LiveUnionI).valid());
+    if (!IR.LiveUnionI.valid())
+      break;
+
+    // Advance the VirtReg iterator until surpassing the next segment in
+    // LiveUnion.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break;
+
+    // Check for intersection with the union's segment.
+    if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+      if (!IR.LiveUnionI.value()->isSpillable())
+        SeenUnspillableVReg = true;
+
+      if (InterferingVRegs.size() == MaxInterferingRegs)
+        // Leave SeenAllInterferences set to false to indicate that at least one
+        // interference exists beyond those we collected.
+        return MaxInterferingRegs;
+
+      InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+      ++IR.LiveUnionI;
+      continue;
+    }
+    // VirtRegI may have advanced far beyond LiveUnionI,
+    // do a fast intersection test to "catch up"
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+  }
+  SeenAllInterferences = true;
+  return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+  // overlaps.
+  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+    Overlaps(LiveUnion->getMap(), Loop->getMap());
+  if (!Overlaps.valid())
+    return false;
+
+  // The loop is overlapping an LIU assignment. Check VirtReg as well.
+  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+  for (;;) {
+    if (VRI == VirtReg->end())
+      return false;
+    if (VRI->start < Overlaps.stop())
+      return true;
+
+    Overlaps.advanceTo(VRI->start);
+    if (!Overlaps.valid())
+      return false;
+    if (Overlaps.start() < VRI->end)
+      return true;
+
+    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+  }
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 000000000000..6f9c5f4455e9
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,258 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context).  We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+  // A set of live virtual register segments that supports fast insertion,
+  // intersection, and removal.
+  // Mapping SlotIndex intervals to virtual register numbers.
+  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+  // SegmentIter can advance to the next segment ordered by starting position
+  // which may belong to a different live virtual register. We also must be able
+  // to reach the current segment's containing virtual register.
+  typedef LiveSegments::iterator SegmentIter;
+
+  // LiveIntervalUnions share an external allocator.
+  typedef LiveSegments::Allocator Allocator;
+
+  class InterferenceResult;
+  class Query;
+
+private:
+  const unsigned RepReg;  // representative register number
+  unsigned Tag;           // unique tag for current contents.
+  LiveSegments Segments;  // union of virtual reg segments
+
+public:
+  LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+    {}
+
+  // Iterate over all segments in the union of live virtual registers ordered
+  // by their starting position.
+  SegmentIter begin() { return Segments.begin(); }
+  SegmentIter end() { return Segments.end(); }
+  SegmentIter find(SlotIndex x) { return Segments.find(x); }
+  bool empty() const { return Segments.empty(); }
+  SlotIndex startIndex() const { return Segments.start(); }
+
+  // Provide public access to the underlying map to allow overlap iteration.
+  typedef LiveSegments Map;
+  const Map &getMap() { return Segments; }
+
+  /// getTag - Return an opaque tag representing the current state of the union.
+  unsigned getTag() const { return Tag; }
+
+  /// changedSince - Return true if the union change since getTag returned tag.
+  bool changedSince(unsigned tag) const { return tag != Tag; }
+
+  // Add a live virtual register to this union and merge its segments.
+  void unify(LiveInterval &VirtReg);
+
+  // Remove a live virtual register's segments from this union.
+  void extract(LiveInterval &VirtReg);
+
+  // Print union, using TRI to translate register names
+  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+  // Verify the live intervals in this union and add them to the visited set.
+  void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+  /// Cache a single interference test result in the form of two intersecting
+  /// segments. This allows efficiently iterating over the interferences. The
+  /// iteration logic is handled by LiveIntervalUnion::Query which may
+  /// filter interferences depending on the type of query.
+  class InterferenceResult {
+    friend class Query;
+
+    LiveInterval::iterator VirtRegI; // current position in VirtReg
+    SegmentIter LiveUnionI;          // current position in LiveUnion
+
+    // Internal ctor.
+    InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+      : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+  public:
+    // Public default ctor.
+    InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+    /// start - Return the start of the current overlap.
+    SlotIndex start() const {
+      return std::max(VirtRegI->start, LiveUnionI.start());
+    }
+
+    /// stop - Return the end of the current overlap.
+    SlotIndex stop() const {
+      return std::min(VirtRegI->end, LiveUnionI.stop());
+    }
+
+    /// interference - Return the register that is interfering here.
+    LiveInterval *interference() const { return LiveUnionI.value(); }
+
+    // Note: this interface provides raw access to the iterators because the
+    // result has no way to tell if it's valid to dereference them.
+
+    // Access the VirtReg segment.
+    LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+    // Access the LiveUnion segment.
+    const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+    bool operator==(const InterferenceResult &IR) const {
+      return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+    }
+    bool operator!=(const InterferenceResult &IR) const {
+      return !operator==(IR);
+    }
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+  };
+
+  /// Query interferences between a single live virtual register and a live
+  /// interval union.
+  class Query {
+    LiveIntervalUnion *LiveUnion;
+    LiveInterval *VirtReg;
+    InterferenceResult FirstInterference;
+    SmallVector<LiveInterval*,4> InterferingVRegs;
+    bool CheckedFirstInterference;
+    bool SeenAllInterferences;
+    bool SeenUnspillableVReg;
+    unsigned Tag;
+
+  public:
+    Query(): LiveUnion(), VirtReg() {}
+
+    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+      SeenAllInterferences(false), SeenUnspillableVReg(false)
+    {}
+
+    void clear() {
+      LiveUnion = NULL;
+      VirtReg = NULL;
+      InterferingVRegs.clear();
+      CheckedFirstInterference = false;
+      SeenAllInterferences = false;
+      SeenUnspillableVReg = false;
+      Tag = 0;
+    }
+
+    void init(LiveInterval *VReg, LiveIntervalUnion *LIU) {
+      assert(VReg && LIU && "Invalid arguments");
+      if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) {
+        // Retain cached results, e.g. firstInterference.
+        return;
+      }
+      clear();
+      LiveUnion = LIU;
+      VirtReg = VReg;
+      Tag = LIU->getTag();
+    }
+
+    LiveInterval &virtReg() const {
+      assert(VirtReg && "uninitialized");
+      return *VirtReg;
+    }
+
+    bool isInterference(const InterferenceResult &IR) const {
+      if (IR.VirtRegI != VirtReg->end()) {
+        assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+               "invalid segment iterators");
+        return true;
+      }
+      return false;
+    }
+
+    // Does this live virtual register interfere with the union?
+    bool checkInterference() { return isInterference(firstInterference()); }
+
+    // Get the first pair of interfering segments, or a noninterfering result.
+    // This initializes the firstInterference_ cache.
+    const InterferenceResult &firstInterference();
+
+    // Treat the result as an iterator and advance to the next interfering pair
+    // of segments. Visiting each unique interfering pairs means that the same
+    // VirtReg or LiveUnion segment may be visited multiple times.
+    bool nextInterference(InterferenceResult &IR) const;
+
+    // Count the virtual registers in this union that interfere with this
+    // query's live virtual register, up to maxInterferingRegs.
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+    // Was this virtual register visited during collectInterferingVRegs?
+    bool isSeenInterference(LiveInterval *VReg) const;
+
+    // Did collectInterferingVRegs collect all interferences?
+    bool seenAllInterferences() const { return SeenAllInterferences; }
+
+    // Did collectInterferingVRegs encounter an unspillable vreg?
+    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+    // Vector generated by collectInterferingVRegs.
+    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+      return InterferingVRegs;
+    }
+
+    /// checkLoopInterference - Return true if there is interference overlapping
+    /// Loop.
+    bool checkLoopInterference(MachineLoopRange*);
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+  private:
+    Query(const Query&);          // DO NOT IMPLEMENT
+    void operator=(const Query&); // DO NOT IMPLEMENT
+
+    // Private interface for queries
+    void findIntersection(InterferenceResult &IR) const;
+  };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..3bbda1c2e609
--- /dev/null
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,129 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri,
+                                    LiveIntervals &lis,
+                                    VirtRegMap &vrm) {
+  const TargetRegisterClass *RC = mri.getRegClass(getReg());
+  unsigned VReg = mri.createVirtualRegister(RC);
+  vrm.grow();
+  vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg()));
+  LiveInterval &li = lis.getOrCreateInterval(VReg);
+  newRegs_.push_back(&li);
+  return li;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+                                   const TargetInstrInfo &tii,
+                                   AliasAnalysis *aa) {
+  for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+       E = parent_.vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+    if (!DefMI)
+      continue;
+    if (tii.isTriviallyReMaterializable(DefMI, aa))
+      remattable_.insert(VNI);
+  }
+  scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+                                        const TargetInstrInfo &tii,
+                                        AliasAnalysis *aa) {
+  if (!scannedRemattable_)
+    scanRemattable(lis, tii, aa);
+  return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+                                       SlotIndex OrigIdx,
+                                       SlotIndex UseIdx,
+                                       LiveIntervals &lis) {
+  OrigIdx = OrigIdx.getUseIndex();
+  UseIdx = UseIdx.getUseIndex();
+  for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = OrigMI->getOperand(i);
+    if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg())
+      continue;
+    // Reserved registers are OK.
+    if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+      continue;
+    // We don't want to move any defs.
+    if (MO.isDef())
+      return false;
+    // We cannot depend on virtual registers in uselessRegs_.
+    for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui)
+      if (uselessRegs_[ui]->reg == MO.getReg())
+        return false;
+
+    LiveInterval &li = lis.getInterval(MO.getReg());
+    const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+    if (!OVNI)
+      continue;
+    if (OVNI != li.getVNInfoAt(UseIdx))
+      return false;
+  }
+  return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+                                       SlotIndex UseIdx,
+                                       bool cheapAsAMove,
+                                       LiveIntervals &lis) {
+  assert(scannedRemattable_ && "Call anyRematerializable first");
+
+  // Use scanRemattable info.
+  if (!remattable_.count(RM.ParentVNI))
+    return false;
+
+  // No defining instruction.
+  RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def);
+  assert(RM.OrigMI && "Defining instruction for remattable value disappeared");
+
+  // If only cheap remats were requested, bail out early.
+  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+    return false;
+
+  // Verify that all used registers are available with the same values.
+  if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis))
+    return false;
+
+  return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         unsigned DestReg,
+                                         const Remat &RM,
+                                         LiveIntervals &lis,
+                                         const TargetInstrInfo &tii,
+                                         const TargetRegisterInfo &tri) {
+  assert(RM.OrigMI && "Invalid remat");
+  tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+  rematted_.insert(RM.ParentVNI);
+  return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+}
+
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 000000000000..73f69ed63983
--- /dev/null
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,135 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+  LiveInterval &parent_;
+  SmallVectorImpl<LiveInterval*> &newRegs_;
+  const SmallVectorImpl<LiveInterval*> &uselessRegs_;
+
+  /// firstNew_ - Index of the first register added to newRegs_.
+  const unsigned firstNew_;
+
+  /// scannedRemattable_ - true when remattable values have been identified.
+  bool scannedRemattable_;
+
+  /// remattable_ - Values defined by remattable instructions as identified by
+  /// tii.isTriviallyReMaterializable().
+  SmallPtrSet<VNInfo*,4> remattable_;
+
+  /// rematted_ - Values that were actually rematted, and so need to have their
+  /// live range trimmed or entirely removed.
+  SmallPtrSet<VNInfo*,4> rematted_;
+
+  /// scanRemattable - Identify the parent_ values that may rematerialize.
+  void scanRemattable(LiveIntervals &lis,
+                      const TargetInstrInfo &tii,
+                      AliasAnalysis *aa);
+
+  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+  /// OrigIdx are also available with the same value at UseIdx.
+  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                          SlotIndex UseIdx, LiveIntervals &lis);
+
+public:
+  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+  /// @param parent The register being spilled or split.
+  /// @param newRegs List to receive any new registers created. This needn't be
+  ///                empty initially, any existing registers are ignored.
+  /// @param uselessRegs List of registers that can't be used when
+  ///        rematerializing values because they are about to be removed.
+  LiveRangeEdit(LiveInterval &parent,
+                SmallVectorImpl<LiveInterval*> &newRegs,
+                const SmallVectorImpl<LiveInterval*> &uselessRegs)
+    : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs),
+      firstNew_(newRegs.size()), scannedRemattable_(false) {}
+
+  LiveInterval &getParent() const { return parent_; }
+  unsigned getReg() const { return parent_.reg; }
+
+  /// Iterator for accessing the new registers added by this edit.
+  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+  iterator begin() const { return newRegs_.begin()+firstNew_; }
+  iterator end() const { return newRegs_.end(); }
+  unsigned size() const { return newRegs_.size()-firstNew_; }
+  bool empty() const { return size() == 0; }
+  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+  /// create - Create a new register with the same class and stack slot as
+  /// parent.
+  LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&);
+
+  /// anyRematerializable - Return true if any parent values may be
+  /// rematerializable.
+  /// This function must be called before ny rematerialization is attempted.
+  bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+                           AliasAnalysis*);
+
+  /// Remat - Information needed to rematerialize at a specific location.
+  struct Remat {
+    VNInfo *ParentVNI;      // parent_'s value at the remat location.
+    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
+    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+  };
+
+  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+  /// When cheapAsAMove is set, only cheap remats are allowed.
+  bool canRematerializeAt(Remat &RM,
+                          SlotIndex UseIdx,
+                          bool cheapAsAMove,
+                          LiveIntervals &lis);
+
+  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+  /// instruction into MBB before MI. The new instruction is mapped, but
+  /// liveness is not updated.
+  /// Return the SlotIndex of the new instruction.
+  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg,
+                            const Remat &RM,
+                            LiveIntervals&,
+                            const TargetInstrInfo&,
+                            const TargetRegisterInfo&);
+
+  /// markRematerialized - explicitly mark a value as rematerialized after doing
+  /// it manually.
+  void markRematerialized(VNInfo *ParentVNI) {
+    rematted_.insert(ParentVNI);
+  }
+
+  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+  bool didRematerialize(VNInfo *ParentVNI) const {
+    return rematted_.count(ParentVNI);
+  }
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b5c385f77239..c75196a47210 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -26,7 +26,9 @@ using namespace llvm;
 
 char LiveStacks::ID = 0;
 INITIALIZE_PASS(LiveStacks, "livestacks",
-                "Live Stack Slot Analysis", false, false);
+                "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
 
 void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
@@ -48,6 +50,22 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) {
   return false;
 }
 
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+  assert(Slot >= 0 && "Spill slot indice must be >= 0");
+  SS2IntervalMap::iterator I = S2IMap.find(Slot);
+  if (I == S2IMap.end()) {
+    I = S2IMap.insert(I, std::make_pair(Slot,
+            LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+    S2RCMap.insert(std::make_pair(Slot, RC));
+  } else {
+    // Use the largest common subclass register class.
+    const TargetRegisterClass *OldRC = S2RCMap[Slot];
+    S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+  }
+  return I->second;
+}
+
 /// print - Implement the dump method.
 void LiveStacks::print(raw_ostream &OS, const Module*) const {
 
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 375307b973a9..dd43ef2530c1 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -31,7 +31,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -42,8 +41,11 @@
 using namespace llvm;
 
 char LiveVariables::ID = 0;
-INITIALIZE_PASS(LiveVariables, "livevars",
-                "Live Variable Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
 
 
 void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -79,13 +81,7 @@ void LiveVariables::VarInfo::dump() const {
 LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
   assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
          "getVarInfo: not a virtual register!");
-  RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
-  if (RegIdx >= VirtRegInfo.size()) {
-    if (RegIdx >= 2*VirtRegInfo.size())
-      VirtRegInfo.resize(RegIdx*2);
-    else
-      VirtRegInfo.resize(2*VirtRegInfo.size());
-  }
+  VirtRegInfo.grow(RegIdx);
   return VirtRegInfo[RegIdx];
 }
 
@@ -498,9 +494,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
   PHIJoins.clear();
 
-  /// Get some space for a respectable number of registers.
-  VirtRegInfo.resize(64);
-
   analyzePHINodes(mf);
 
   // Calculate live variable information in depth first order on the CFG of the
@@ -628,19 +621,14 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
 
   // Convert and transfer the dead / killed information we have gathered into
   // VirtRegInfo onto MI's.
-  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
-    for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
-      if (VirtRegInfo[i].Kills[j] ==
-          MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
-        VirtRegInfo[i]
-          .Kills[j]->addRegisterDead(i +
-                                     TargetRegisterInfo::FirstVirtualRegister,
-                                     TRI);
+  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+    const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+      if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+        VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
       else
-        VirtRegInfo[i]
-          .Kills[j]->addRegisterKilled(i +
-                                       TargetRegisterInfo::FirstVirtualRegister,
-                                       TRI);
+        VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+  }
 
   // Check to make sure there are no unreachable blocks in the MC CFG for the
   // function.  If so, it is due to a bug in the instruction selector or some
@@ -775,8 +763,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
         getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
 
   // Update info for all live variables
-  for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
-         E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     VarInfo &VI = getVarInfo(Reg);
     if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
       VI.AliveBlocks.set(NumNew);
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 7e366f0ceec0..1318d6212497 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -9,7 +9,7 @@
 //
 // This pass assigns local frame indices to stack slots relative to one another
 // and allocates additional base registers to access them when the target
-// estimates the are likely to be out of range of stack pointer and frame
+// estimates they are likely to be out of range of stack pointer and frame
 // pointer relative addressing.
 //
 //===----------------------------------------------------------------------===//
@@ -34,7 +34,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 using namespace llvm;
 
@@ -152,9 +152,9 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
 void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *MFI = Fn.getFrameInfo();
-  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
   bool StackGrowsDown =
-    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
   int64_t Offset = 0;
   unsigned MaxAlign = 0;
 
@@ -227,27 +227,28 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
 
   MachineFrameInfo *MFI = Fn.getFrameInfo();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
-  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
   bool StackGrowsDown =
-    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
-  MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin();
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Collect all of the instructions in the block that reference
   // a frame index. Also store the frame index referenced to ease later
   // lookup. (For any insn that has more than one FI reference, we arbitrarily
   // choose the first one).
   SmallVector<FrameRef, 64> FrameReferenceInsns;
-  // A base register definition is a register+offset pair.
-  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
 
+  // A base register definition is a register + offset pair.
+  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
 
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
       MachineInstr *MI = I;
+
       // Debug value instructions can't be out of range, so they don't need
       // any updates.
       if (MI->isDebugValue())
         continue;
+
       // For now, allocate the base register(s) within the basic block
       // where they're used, and don't try to keep them around outside
       // of that. It may be beneficial to try sharing them more broadly
@@ -268,11 +269,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
       }
     }
   }
+
   // Sort the frame references by local offset
   array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
 
+  MachineBasicBlock *Entry = Fn.begin();
 
-  // Loop throught the frame references and allocate for them as necessary
+  // Loop through the frame references and allocate for them as necessary.
   for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
     MachineBasicBlock::iterator I =
       FrameReferenceInsns[ref].getMachineInstr();
@@ -321,10 +324,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
             DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
                   " at frame local offset " <<
                   LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
             // Tell the target to insert the instruction to initialize
             // the base register.
-            TRI->materializeFrameBaseRegister(InsertionPt, BaseReg,
-                                              FrameIdx, InstrOffset);
+            //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
+            TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+                                              InstrOffset);
 
             // The base register already includes any offset specified
             // by the instruction, so account for that so it doesn't get
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 50f3f672dced..ccbff0af5b2c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -146,27 +147,46 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
   return I;
 }
 
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+  while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+    ++I;
+  return I;
+}
+
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   iterator I = end();
-  while (I != begin() && (--I)->getDesc().isTerminator())
+  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
     ; /*noop */
-  if (I != end() && !I->getDesc().isTerminator()) ++I;
+  while (I != end() && !I->getDesc().isTerminator())
+    ++I;
   return I;
 }
 
-void MachineBasicBlock::dump() const {
-  print(dbgs());
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+  iterator B = begin(), I = end();
+  while (I != B) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+  // A block with a landing pad successor only has one other successor.
+  if (succ_size() > 2)
+    return 0;
+  for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+    if ((*I)->isLandingPad())
+      return *I;
+  return 0;
 }
 
-static inline void OutputReg(raw_ostream &os, unsigned RegNo,
-                             const TargetRegisterInfo *TRI = 0) {
-  if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
-    if (TRI)
-      os << " %" << TRI->get(RegNo).Name;
-    else
-      os << " %physreg" << RegNo;
-  } else
-    os << " %reg" << RegNo;
+void MachineBasicBlock::dump() const {
+  print(dbgs());
 }
 
 StringRef MachineBasicBlock::getName() const {
@@ -176,7 +196,7 @@ StringRef MachineBasicBlock::getName() const {
     return "(null)";
 }
 
-void MachineBasicBlock::print(raw_ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
     OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -186,6 +206,9 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
 
   if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
 
+  if (Indexes)
+    OS << Indexes->getMBBStartIdx(this) << '\t';
+
   OS << "BB#" << getNumber() << ": ";
 
   const char *Comma = "";
@@ -198,28 +221,36 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
   if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
   OS << '\n';
 
-  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();  
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
   if (!livein_empty()) {
+    if (Indexes) OS << '\t';
     OS << "    Live Ins:";
     for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
-      OutputReg(OS, *I, TRI);
+      OS << ' ' << PrintReg(*I, TRI);
     OS << '\n';
   }
   // Print the preds of this block according to the CFG.
   if (!pred_empty()) {
+    if (Indexes) OS << '\t';
     OS << "    Predecessors according to CFG:";
     for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
       OS << " BB#" << (*PI)->getNumber();
     OS << '\n';
   }
-  
+
   for (const_iterator I = begin(); I != end(); ++I) {
+    if (Indexes) {
+      if (Indexes->hasIndex(I))
+        OS << Indexes->getInstructionIndex(I);
+      OS << '\t';
+    }
     OS << '\t';
     I->print(OS, &getParent()->getTarget());
   }
 
   // Print the successors of this block according to the CFG.
   if (!succ_empty()) {
+    if (Indexes) OS << '\t';
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
       OS << " BB#" << (*SI)->getNumber();
@@ -431,14 +462,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   MachineFunction *MF = getParent();
   DebugLoc dl;  // FIXME: this is nowhere
 
-  // We may need to update this's terminator, but we can't do that if AnalyzeBranch
-  // fails. If this uses a jump table, we won't touch it.
+  // We may need to update this's terminator, but we can't do that if
+  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
   if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
     return NULL;
 
+  // Avoid bugpoint weirdness: A block may end with a conditional branch but
+  // jumps to the same MBB is either case. We have duplicate CFG edges in that
+  // case that we can't handle. Since this never happens in properly optimized
+  // code, just skip those edges.
+  if (TBB && TBB == FBB) {
+    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+                 << getNumber() << '\n');
+    return NULL;
+  }
+
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
   MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
   DEBUG(dbgs() << "Splitting critical edge:"
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 272b54dea1fa..07a7d27b019f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -22,15 +22,18 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
 
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
-STATISTIC(NumPhysCSEs,  "Number of phyreg defining common subexpr eliminated");
+STATISTIC(NumPhysCSEs,
+          "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
 
 namespace {
   class MachineCSE : public MachineFunctionPass {
@@ -41,7 +44,9 @@ namespace {
     MachineRegisterInfo *MRI;
   public:
     static char ID; // Pass identification
-    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
+    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     
@@ -61,10 +66,13 @@ namespace {
 
   private:
     const unsigned LookAheadLimit;
-    typedef ScopedHashTableScope<MachineInstr*, unsigned,
-                                 MachineInstrExpressionTrait> ScopeType;
+    typedef RecyclingAllocator<BumpPtrAllocator,
+        ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+    typedef ScopedHashTable<MachineInstr*, unsigned,
+        MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+    typedef ScopedHTType::ScopeTy ScopeType;
     DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
-    ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+    ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
 
@@ -72,11 +80,11 @@ namespace {
     bool isPhysDefTriviallyDead(unsigned Reg,
                                 MachineBasicBlock::const_iterator I,
                                 MachineBasicBlock::const_iterator E) const ;
-    bool hasLivePhysRegDefUse(const MachineInstr *MI,
-                              const MachineBasicBlock *MBB,
-                              unsigned &PhysDef) const;
-    bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
-                           unsigned PhysDef) const;
+    bool hasLivePhysRegDefUses(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB,
+                               SmallSet<unsigned,8> &PhysRefs) const;
+    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                          SmallSet<unsigned,8> &PhysRefs) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
@@ -91,8 +99,12 @@ namespace {
 } // end anonymous namespace
 
 char MachineCSE::ID = 0;
-INITIALIZE_PASS(MachineCSE, "machine-cse",
-                "Machine Common Subexpression Elimination", false, false);
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
 
 FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
 
@@ -104,7 +116,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
     if (!MO.isReg() || !MO.isUse())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (!MRI->hasOneNonDBGUse(Reg))
       // Only coalesce single use copies. This ensure the copy will be
@@ -120,17 +132,12 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
       continue;
     if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
       continue;
-    const TargetRegisterClass *SRC   = MRI->getRegClass(SrcReg);
-    const TargetRegisterClass *RC    = MRI->getRegClass(Reg);
-    const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
-    if (!NewRC)
+    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
       continue;
     DEBUG(dbgs() << "Coalescing: " << *DefMI);
-    DEBUG(dbgs() << "*** to: " << *MI);
+    DEBUG(dbgs() << "***     to: " << *MI);
     MO.setReg(SrcReg);
     MRI->clearKillFlags(SrcReg);
-    if (NewRC != SRC)
-      MRI->setRegClass(SrcReg, NewRC);
     DefMI->eraseFromParent();
     ++NumCoalesces;
     Changed = true;
@@ -176,14 +183,14 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
   return false;
 }
 
-/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
 /// physical registers (except for dead defs of physical registers). It also
 /// returns the physical register def by reference if it's the only one and the
 /// instruction does not uses a physical register.
-bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
-                                      const MachineBasicBlock *MBB,
-                                      unsigned &PhysDef) const {
-  PhysDef = 0;
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+                                       const MachineBasicBlock *MBB,
+                                       SmallSet<unsigned,8> &PhysRefs) const {
+  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
@@ -193,35 +200,22 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
       continue;
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-    if (MO.isUse()) {
-      // Can't touch anything to read a physical register.
-      PhysDef = 0;
-      return true;
-    }
-    if (MO.isDead())
-      // If the def is dead, it's ok.
-      continue;
-    // Ok, this is a physical register def that's not marked "dead". That's
+    // If the def is dead, it's ok. But the def may not marked "dead". That's
     // common since this pass is run before livevariables. We can scan
     // forward a few instructions and check if it is obviously dead.
-    if (PhysDef) {
-      // Multiple physical register defs. These are rare, forget about it.
-      PhysDef = 0;
-      return true;
-    }
-    PhysDef = Reg;
+    if (MO.isDef() &&
+        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+      continue;
+    PhysRefs.insert(Reg);
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+      PhysRefs.insert(*Alias);
   }
 
-  if (PhysDef) {
-    MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
-    if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
-      return true;
-  }
-  return false;
+  return !PhysRefs.empty();
 }
 
-bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
-                                  unsigned PhysDef) const {
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                                  SmallSet<unsigned,8> &PhysRefs) const {
   // For now conservatively returns false if the common subexpression is
   // not in the same basic block as the given instruction.
   MachineBasicBlock *MBB = MI->getParent();
@@ -237,8 +231,17 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
 
     if (I == E)
       return true;
-    if (I->modifiesRegister(PhysDef, TRI))
-      return false;
+
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(MOReg))
+        continue;
+      if (PhysRefs.count(MOReg))
+        return false;
+    }
 
     --LookAheadLeft;
     ++I;
@@ -259,7 +262,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
   // Ignore stuff that we obviously can't move.
   const TargetInstrDesc &TID = MI->getDesc();  
   if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
-      TID.hasUnmodeledSideEffects())
+      MI->hasUnmodeledSideEffects())
     return false;
 
   if (TID.mayLoad()) {
@@ -281,14 +284,13 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
                                    MachineInstr *CSMI, MachineInstr *MI) {
   // FIXME: Heuristics that works around the lack the live range splitting.
 
-  // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
-  // immediate predecessor. We don't want to increase register pressure and end up
-  // causing other computation to be spilled.
+  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+  // an immediate predecessor. We don't want to increase register pressure and
+  // end up causing other computation to be spilled.
   if (MI->getDesc().isAsCheapAsAMove()) {
     MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
-    if (CSBB != BB && 
-        find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
+    if (CSBB != BB && !CSBB->isSuccessor(BB))
       return false;
   }
 
@@ -297,7 +299,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   bool HasVRegUse = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isUse() && MO.getReg() &&
+    if (MO.isReg() && MO.isUse() &&
         TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
       HasVRegUse = true;
       break;
@@ -359,7 +361,6 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     if (!isCSECandidate(MI))
       continue;
 
-    bool DefPhys = false;
     bool FoundCSE = VNT.count(MI);
     if (!FoundCSE) {
       // Look for trivial copy coalescing opportunities.
@@ -370,24 +371,37 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         FoundCSE = VNT.count(MI);
       }
     }
-    // FIXME: commute commutable instructions?
 
-    // If the instruction defines a physical register and the value *may* be
+    // Commute commutable instructions.
+    bool Commuted = false;
+    if (!FoundCSE && MI->getDesc().isCommutable()) {
+      MachineInstr *NewMI = TII->commuteInstruction(MI);
+      if (NewMI) {
+        Commuted = true;
+        FoundCSE = VNT.count(NewMI);
+        if (NewMI != MI)
+          // New instruction. It doesn't need to be kept.
+          NewMI->eraseFromParent();
+        else if (!FoundCSE)
+          // MI was changed but it didn't help, commute it back!
+          (void)TII->commuteInstruction(MI);
+      }
+    }
+
+    // If the instruction defines physical registers and the values *may* be
     // used, then it's not safe to replace it with a common subexpression.
-    unsigned PhysDef = 0;
-    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
+    // It's also not safe if the instruction uses physical registers.
+    SmallSet<unsigned,8> PhysRefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
       FoundCSE = false;
 
       // ... Unless the CS is local and it also defines the physical register
-      // which is not clobbered in between.
-      if (PhysDef) {
-        unsigned CSVN = VNT.lookup(MI);
-        MachineInstr *CSMI = Exps[CSVN];
-        if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
-          FoundCSE = true;
-          DefPhys = true;
-        }
-      }
+      // which is not clobbered in between and the physical register uses 
+      // were not clobbered.
+      unsigned CSVN = VNT.lookup(MI);
+      MachineInstr *CSMI = Exps[CSVN];
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+        FoundCSE = true;
     }
 
     if (!FoundCSE) {
@@ -432,8 +446,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       }
       MI->eraseFromParent();
       ++NumCSEs;
-      if (DefPhys)
+      if (!PhysRefs.empty())
         ++NumPhysCSEs;
+      if (Commuted)
+        ++NumCommutes;
     } else {
       DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
       VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 3c674789244a..04c8ecbf9bdc 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -25,7 +25,7 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
 char MachineDominatorTree::ID = 0;
 
 INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
-                "MachineDominator Tree Construction", true, true);
+                "MachineDominator Tree Construction", true, true)
 
 char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
 
@@ -42,6 +42,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
 
 MachineDominatorTree::MachineDominatorTree()
     : MachineFunctionPass(ID) {
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
   DT = new DominatorTreeBase<MachineBasicBlock>(false);
 }
 
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 017170076ceb..85532407ca43 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -33,7 +33,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/GraphWriter.h"
@@ -52,14 +52,15 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
 }
 
 MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
-                                 unsigned FunctionNum, MachineModuleInfo &mmi)
-  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
+                                 unsigned FunctionNum, MachineModuleInfo &mmi,
+                                 GCModuleInfo* gmi)
+  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
   if (TM.getRegisterInfo())
     RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
   if (Fn->hasFnAttr(Attribute::StackAlignment))
     FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
         Fn->getAttributes().getFnAttributes()));
@@ -190,20 +191,21 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
 }
 
 MachineMemOperand *
-MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
-                                      int64_t o, uint64_t s,
-                                      unsigned base_alignment) {
-  return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+                                      uint64_t s, unsigned base_alignment,
+                                      const MDNode *TBAAInfo) {
+  return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+                                           TBAAInfo);
 }
 
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                       int64_t Offset, uint64_t Size) {
   return new (Allocator)
-             MachineMemOperand(MMO->getValue(), MMO->getFlags(),
-                               int64_t(uint64_t(MMO->getOffset()) +
-                                       uint64_t(Offset)),
-                               Size, MMO->getBaseAlignment());
+             MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+                                                  MMO->getOffset()+Offset),
+                               MMO->getFlags(), Size,
+                               MMO->getBaseAlignment(), 0);
 }
 
 MachineInstr::mmo_iterator
@@ -231,10 +233,10 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
       else {
         // Clone the MMO and unset the store flag.
         MachineMemOperand *JustLoad =
-          getMachineMemOperand((*I)->getValue(),
+          getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOStore,
-                               (*I)->getOffset(), (*I)->getSize(),
-                               (*I)->getBaseAlignment());
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
         Result[Index] = JustLoad;
       }
       ++Index;
@@ -263,10 +265,10 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
       else {
         // Clone the MMO and unset the load flag.
         MachineMemOperand *JustStore =
-          getMachineMemOperand((*I)->getValue(),
+          getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOLoad,
-                               (*I)->getOffset(), (*I)->getSize(),
-                               (*I)->getBaseAlignment());
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
         Result[Index] = JustStore;
       }
       ++Index;
@@ -279,7 +281,7 @@ void MachineFunction::dump() const {
   print(dbgs());
 }
 
-void MachineFunction::print(raw_ostream &OS) const {
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   OS << "# Machine code for function " << Fn->getName() << ":\n";
 
   // Print Frame Information
@@ -328,7 +330,7 @@ void MachineFunction::print(raw_ostream &OS) const {
   
   for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
     OS << '\n';
-    BB->print(OS);
+    BB->print(OS, Indexes);
   }
 
   OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
@@ -346,17 +348,15 @@ namespace llvm {
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
                              const MachineFunction *Graph) {
-      if (isSimple () && Node->getBasicBlock() &&
-          !Node->getBasicBlock()->getName().empty())
-        return Node->getBasicBlock()->getNameStr() + ":";
-
       std::string OutStr;
       {
         raw_string_ostream OSS(OutStr);
-        
-        if (isSimple())
-          OSS << Node->getNumber() << ':';
-        else
+
+        if (isSimple()) {
+          OSS << "BB#" << Node->getNumber();
+          if (const BasicBlock *BB = Node->getBasicBlock())
+            OSS << ": " << BB->getName();
+        } else
           Node->print(OSS);
       }
 
@@ -396,7 +396,8 @@ void MachineFunction::viewCFGOnly() const
 /// addLiveIn - Add the specified physical register as a live-in value and
 /// create a corresponding virtual register for it.
 unsigned MachineFunction::addLiveIn(unsigned PReg,
-                                    const TargetRegisterClass *RC) {
+                                    const TargetRegisterClass *RC,
+                                    DebugLoc DL) {
   MachineRegisterInfo &MRI = getRegInfo();
   unsigned VReg = MRI.getLiveInVirtReg(PReg);
   if (VReg) {
@@ -405,6 +406,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   }
   VReg = MRI.createVirtualRegister(RC);
   MRI.addLiveIn(PReg, VReg);
+  MRI.addLiveInLoc(VReg, DL);
   return VReg;
 }
 
@@ -426,6 +428,13 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   return Ctx.GetOrCreateSymbol(Name.str());
 }
 
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+  const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+                               Twine(getFunctionNumber())+"$pb");
+}
 
 //===----------------------------------------------------------------------===//
 //  MachineFrameInfo implementation
@@ -485,7 +494,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
 void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   if (Objects.empty()) return;
 
-  const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+  const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
   int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
 
   OS << "Frame Objects:\n";
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 4f84b952e061..054c750c9f2b 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -12,22 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 using namespace llvm;
 
-// Register this pass with PassInfo directly to avoid having to define
-// a default constructor.
-static PassInfo
-X("Machine Function Analysis", "machine-function-analysis",
-   &MachineFunctionAnalysis::ID, 0,
-  /*CFGOnly=*/false, /*is_analysis=*/true);
-
 char MachineFunctionAnalysis::ID = 0;
 
 MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
                                                  CodeGenOpt::Level OL) :
   FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
 MachineFunctionAnalysis::~MachineFunctionAnalysis() {
@@ -52,7 +47,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
 bool MachineFunctionAnalysis::runOnFunction(Function &F) {
   assert(!MF && "MachineFunctionAnalysis already initialized!");
   MF = new MachineFunction(&F, TM, NextFnNum++,
-                           getAnalysis<MachineModuleInfo>());
+                           getAnalysis<MachineModuleInfo>(),
+                           getAnalysisIfAvailable<GCModuleInfo>());
   return false;
 }
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 446e461d5460..aa9ea61acec7 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -102,13 +102,13 @@ void MachineOperand::setReg(unsigned Reg) {
     if (MachineBasicBlock *MBB = MI->getParent())
       if (MachineFunction *MF = MBB->getParent()) {
         RemoveRegOperandFromRegInfo();
-        Contents.Reg.RegNo = Reg;
+        SmallContents.RegNo = Reg;
         AddRegOperandToRegInfo(&MF->getRegInfo());
         return;
       }
         
   // Otherwise, just change the register, no problem.  :)
-  Contents.Reg.RegNo = Reg;
+  SmallContents.RegNo = Reg;
 }
 
 void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
@@ -159,7 +159,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   } else {
     // Otherwise, change this to a register and set the reg#.
     OpKind = MO_Register;
-    Contents.Reg.RegNo = Reg;
+    SmallContents.RegNo = Reg;
 
     // If this operand is embedded in a function, add the operand to the
     // register's use/def list.
@@ -227,24 +227,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
       if (const MachineBasicBlock *MBB = MI->getParent())
         if (const MachineFunction *MF = MBB->getParent())
           TM = &MF->getTarget();
+  const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
 
   switch (getType()) {
   case MachineOperand::MO_Register:
-    if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
-      OS << "%reg" << getReg();
-    } else {
-      if (TM)
-        OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
-      else
-        OS << "%physreg" << getReg();
-    }
-
-    if (getSubReg() != 0) {
-      if (TM)
-        OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
-      else
-        OS << ':' << getSubReg();
-    }
+    OS << PrintReg(getReg(), TRI, getSubReg());
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
         isEarlyClobber()) {
@@ -335,10 +322,45 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
 // MachineMemOperand Implementation
 //===----------------------------------------------------------------------===//
 
-MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
-                                     int64_t o, uint64_t s, unsigned int a)
-  : Offset(o), Size(s), V(v),
-    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+  if (V == 0) return 0;
+  return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+  return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+  return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+  return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+  return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+  return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+                                     uint64_t s, unsigned int a,
+                                     const MDNode *TBAAInfo)
+  : PtrInfo(ptrinfo), Size(s),
+    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+    TBAAInfo(TBAAInfo) {
+  assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+         "invalid pointer value");
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 }
@@ -346,9 +368,9 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
 /// Profile - Gather unique data for the object.
 ///
 void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(Offset);
+  ID.AddInteger(getOffset());
   ID.AddInteger(Size);
-  ID.AddPointer(V);
+  ID.AddPointer(getValue());
   ID.AddInteger(Flags);
 }
 
@@ -364,8 +386,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
       ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
     // Also update the base and offset, because the new alignment may
     // not be applicable with the old ones.
-    V = MMO->getValue();
-    Offset = MMO->getOffset();
+    PtrInfo = MMO->PtrInfo;
   }
 }
 
@@ -410,6 +431,16 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
       MMO.getBaseAlignment() != MMO.getSize())
     OS << "(align=" << MMO.getAlignment() << ")";
 
+  // Print TBAA info.
+  if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+    OS << "(tbaa=";
+    if (TBAAInfo->getNumOperands() > 0)
+      WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
   return OS;
 }
 
@@ -782,6 +813,14 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
+bool MachineInstr::isStackAligningInlineAsm() const {
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      return true;
+  }
+  return false;
+}
 
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
@@ -881,14 +920,15 @@ int MachineInstr::findFirstPredOperandIdx() const {
 bool MachineInstr::
 isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
   if (isInlineAsm()) {
-    assert(DefOpIdx >= 3);
+    assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
     const MachineOperand &MO = getOperand(DefOpIdx);
     if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
       return false;
     // Determine the actual operand index that corresponds to this index.
     unsigned DefNo = 0;
     unsigned DefPart = 0;
-    for (unsigned i = 2, e = getNumOperands(); i < e; ) {
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i < e; ) {
       const MachineOperand &FMO = getOperand(i);
       // After the normal asm operands there may be additional imp-def regs.
       if (!FMO.isImm())
@@ -903,7 +943,8 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
       }
       ++DefNo;
     }
-    for (unsigned i = 2, e = getNumOperands(); i != e; ++i) {
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i != e; ++i) {
       const MachineOperand &FMO = getOperand(i);
       if (!FMO.isImm())
         continue;
@@ -946,7 +987,8 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
 
     // Find the flag operand corresponding to UseOpIdx
     unsigned FlagIdx, NumOps=0;
-    for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+    for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+         FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
       const MachineOperand &UFMO = getOperand(FlagIdx);
       // After the normal asm operands there may be additional imp-def regs.
       if (!UFMO.isImm())
@@ -964,9 +1006,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
       if (!DefOpIdx)
         return true;
 
-      unsigned DefIdx = 2;
+      unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
       // Remember to adjust the index. First operand is asm string, second is
-      // the AlignStack bit, then there is a flag for each.
+      // the HasSideEffects and AlignStack bits, then there is a flag for each.
       while (DefNo) {
         const MachineOperand &FMO = getOperand(DefIdx);
         assert(FMO.isImm());
@@ -1071,7 +1113,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
     SawStore = true;
     return false;
   }
-  if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+
+  if (isLabel() || isDebugValue() ||
+      TID->isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1122,7 +1166,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
   if (!TID->mayStore() &&
       !TID->mayLoad() &&
       !TID->isCall() &&
-      !TID->hasUnmodeledSideEffects())
+      !hasUnmodeledSideEffects())
     return false;
 
   // Otherwise, if the instruction has no memory reference information,
@@ -1166,7 +1210,9 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
         if (PSV->isConstant(MFI))
           continue;
       // If we have an AliasAnalysis, ask it whether the memory is constant.
-      if (AA && AA->pointsToConstantMemory(V))
+      if (AA && AA->pointsToConstantMemory(
+                      AliasAnalysis::Location(V, (*I)->getSize(),
+                                              (*I)->getTBAAInfo())))
         continue;
     }
 
@@ -1194,6 +1240,18 @@ unsigned MachineInstr::isConstantValuePHI() const {
   return Reg;
 }
 
+bool MachineInstr::hasUnmodeledSideEffects() const {
+  if (getDesc().hasUnmodeledSideEffects())
+    return true;
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      return true;
+  }
+
+  return false;
+}
+
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
 ///
 bool MachineInstr::allDefsAreDead() const {
@@ -1207,6 +1265,17 @@ bool MachineInstr::allDefsAreDead() const {
   return true;
 }
 
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      addOperand(MO);
+  }
+}
+
 void MachineInstr::dump() const {
   dbgs() << "  " << *this;
 }
@@ -1257,7 +1326,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (StartOp != 0) OS << ", ";
     getOperand(StartOp).print(OS, TM);
     unsigned Reg = getOperand(StartOp).getReg();
-    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
       VirtRegs.push_back(Reg);
   }
 
@@ -1270,11 +1339,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
   bool FirstOp = true;
+
+  if (isInlineAsm()) {
+    // Print asm string.
+    OS << " ";
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+    // Print HasSideEffects, IsAlignStack
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      OS << " [sideeffect]";
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      OS << " [alignstack]";
+
+    StartOp = InlineAsm::MIOp_FirstOperand;
+    FirstOp = false;
+  }
+
+
   for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
 
-    if (MO.isReg() && MO.getReg() &&
-        TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+    if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       VirtRegs.push_back(MO.getReg());
 
     // Omit call-clobbered registers which aren't used anywhere. This makes
@@ -1284,7 +1370,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (MF && getDesc().isCall() &&
         MO.isReg() && MO.isImplicit() && MO.isDef()) {
       unsigned Reg = MO.getReg();
-      if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         const MachineRegisterInfo &MRI = MF->getRegInfo();
         if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
           bool HasAliasLive = false;
@@ -1348,14 +1434,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (!HaveSemi) OS << ";"; HaveSemi = true;
     for (unsigned i = 0; i != VirtRegs.size(); ++i) {
       const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
-      OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+      OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
       for (unsigned j = i+1; j != VirtRegs.size();) {
         if (MRI->getRegClass(VirtRegs[j]) != RC) {
           ++j;
           continue;
         }
         if (VirtRegs[i] != VirtRegs[j])
-          OS << "," << VirtRegs[j];
+          OS << "," << PrintReg(VirtRegs[j]);
         VirtRegs.erase(VirtRegs.begin()+j);
       }
     }
@@ -1533,8 +1619,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
     switch (MO.getType()) {
     default: break;
     case MachineOperand::MO_Register:
-      if (MO.isDef() && MO.getReg() &&
-          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
         continue;  // Skip virtual register defs.
       Key |= MO.getReg();
       break;
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 1a74b747e9f2..443fc2d97bdf 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,8 +28,10 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
@@ -40,8 +42,14 @@
 
 using namespace llvm;
 
-STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
-STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
+STATISTIC(NumHoisted,
+          "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+          "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+          "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+          "Number of hoisted machine instructions CSEed");
 STATISTIC(NumPostRAHoisted,
           "Number of machine instructions hoisted out of loops post regalloc");
 
@@ -51,9 +59,11 @@ namespace {
 
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
+    const TargetLowering *TLI;
     const TargetRegisterInfo *TRI;
     const MachineFrameInfo *MFI;
-    MachineRegisterInfo *RegInfo;
+    MachineRegisterInfo *MRI;
+    const InstrItineraryData *InstrItins;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
@@ -68,23 +78,37 @@ namespace {
 
     BitVector AllocatableSet;
 
+    // Track 'estimated' register pressure.
+    SmallSet<unsigned, 32> RegSeen;
+    SmallVector<unsigned, 8> RegPressure;
+
+    // Register pressure "limit" per register class. If the pressure
+    // is higher than the limit, then it's considered high.
+    SmallVector<unsigned, 8> RegLimit;
+
+    // Register pressure on path leading from loop preheader to current BB.
+    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
     // For each opcode, keep a list of potential CSE instructions.
     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
 
   public:
     static char ID; // Pass identification, replacement for typeid
     MachineLICM() :
-      MachineFunctionPass(ID), PreRegAlloc(true) {}
+      MachineFunctionPass(ID), PreRegAlloc(true) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
 
     explicit MachineLICM(bool PreRA) :
-      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+      MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     const char *getPassName() const { return "Machine Instruction LICM"; }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<AliasAnalysis>();
@@ -94,6 +118,13 @@ namespace {
     }
 
     virtual void releaseMemory() {
+      RegSeen.clear();
+      RegPressure.clear();
+      RegLimit.clear();
+      BackTrace.clear();
+      for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+             CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+        CI->second.clear();
       CSEMap.clear();
     }
 
@@ -138,6 +169,24 @@ namespace {
     /// 
     bool IsLoopInvariantInst(MachineInstr &I);
 
+    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+    /// and an use in the current loop, return true if the target considered
+    /// it 'high'.
+    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+                               unsigned Reg) const;
+
+    bool IsCheapInstruction(MachineInstr &MI) const;
+
+    /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+    /// check if hoisting an instruction of the given cost matrix can cause high
+    /// register pressure.
+    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+    /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+    /// the current block and update their register pressures to reflect the
+    /// effect of hoisting MI from the current block to the preheader.
+    void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
     /// IsProfitableToHoist - Return true if it is potentially profitable to
     /// hoist the given loop invariant.
     bool IsProfitableToHoist(MachineInstr &MI);
@@ -148,11 +197,16 @@ namespace {
     /// visit definitions before uses, allowing us to hoist a loop body in one
     /// pass without iteration.
     ///
-    void HoistRegion(MachineDomTreeNode *N);
+    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+    /// InitRegPressure - Find all virtual register references that are liveout
+    /// of the preheader to initialize the starting "register pressure". Note
+    /// this does not count live through (livein but not used) registers.
+    void InitRegPressure(MachineBasicBlock *BB);
 
-    /// isLoadFromConstantMemory - Return true if the given instruction is a
-    /// load from constant memory.
-    bool isLoadFromConstantMemory(MachineInstr *MI);
+    /// UpdateRegPressure - Update estimate of register pressure after the
+    /// specified instruction.
+    void UpdateRegPressure(const MachineInstr *MI);
 
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
@@ -174,8 +228,8 @@ namespace {
 
     /// Hoist - When an instruction is found to only use loop invariant operands
     /// that is safe to hoist, this instruction is called to do the dirty work.
-    ///
-    void Hoist(MachineInstr *MI);
+    /// It returns true if the instruction is hoisted.
+    bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
 
     /// InitCSEMap - Initialize the CSE map with instructions that are in the
     /// current loop preheader that may become duplicates of instructions that
@@ -189,8 +243,13 @@ namespace {
 } // end anonymous namespace
 
 char MachineLICM::ID = 0;
-INITIALIZE_PASS(MachineLICM, "machinelicm",
-                "Machine Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
 
 FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
   return new MachineLICM(PreRegAlloc);
@@ -212,18 +271,32 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
 
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   if (PreRegAlloc)
-    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
   else
-    DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
 
   Changed = FirstInLoop = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
+  TLI = TM->getTargetLowering();
   TRI = TM->getRegisterInfo();
   MFI = MF.getFrameInfo();
-  RegInfo = &MF.getRegInfo();
+  MRI = &MF.getRegInfo();
+  InstrItins = TM->getInstrItineraryData();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
+  if (PreRegAlloc) {
+    // Estimate register pressure during pre-regalloc pass.
+    unsigned NumRC = TRI->getNumRegClasses();
+    RegPressure.resize(NumRC);
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+    RegLimit.resize(NumRC);
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+           E = TRI->regclass_end(); I != E; ++I)
+      RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF);
+  }
+
   // Get our Loop information...
   MLI = &getAnalysis<MachineLoopInfo>();
   DT  = &getAnalysis<MachineDominatorTree>();
@@ -248,7 +321,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       // being hoisted.
       MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
       FirstInLoop = true;
-      HoistRegion(N);
+      HoistRegion(N, true);
       CSEMap.clear();
     }
   }
@@ -474,17 +547,33 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
 /// first order w.r.t the DominatorTree. This allows us to visit definitions
 /// before uses, allowing us to hoist a loop body in one pass without iteration.
 ///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
   assert(N != 0 && "Null dominator tree node?");
   MachineBasicBlock *BB = N->getBlock();
 
   // If this subregion is not in the top level loop at all, exit.
   if (!CurLoop->contains(BB)) return;
 
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  if (IsHeader) {
+    // Compute registers which are livein into the loop headers.
+    RegSeen.clear();
+    BackTrace.clear();
+    InitRegPressure(Preheader);
+  }
+
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+
   for (MachineBasicBlock::iterator
          MII = BB->begin(), E = BB->end(); MII != E; ) {
     MachineBasicBlock::iterator NextMII = MII; ++NextMII;
-    Hoist(&*MII);
+    MachineInstr *MI = &*MII;
+    if (!Hoist(MI, Preheader))
+      UpdateRegPressure(MI);
     MII = NextMII;
   }
 
@@ -496,6 +585,99 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
     for (unsigned I = 0, E = Children.size(); I != E; ++I)
       HoistRegion(Children[I]);
   }
+
+  BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+  std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+  // If the preheader has only a single predecessor and it ends with a
+  // fallthrough or an unconditional branch, then scan its predecessor for live
+  // defs as well. This happens whenever the preheader is created by splitting
+  // the critical edge from the loop predecessor to the loop header.
+  if (BB->pred_size() == 1) {
+    MachineBasicBlock *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 4> Cond;
+    if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+      InitRegPressure(*BB->pred_begin());
+  }
+
+  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+       MII != E; ++MII) {
+    MachineInstr *MI = &*MII;
+    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      bool isNew = RegSeen.insert(Reg);
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (MO.isDef())
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      else {
+        bool isKill = isOperandKill(MO, MRI);
+        if (isNew && !isKill)
+          // Haven't seen this, it must be a livein.
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+        else if (!isNew && isKill)
+          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      }
+    }
+  }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  SmallVector<unsigned, 4> Defs;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    bool isNew = RegSeen.insert(Reg);
+    if (MO.isDef())
+      Defs.push_back(Reg);
+    else if (!isNew && isOperandKill(MO, MRI)) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+      if (RCCost > RegPressure[RCId])
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= RCCost;
+    }
+  }
+
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.pop_back_val();
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    RegPressure[RCId] += RCCost;
+  }
 }
 
 /// IsLICMCandidate - Returns true if the instruction may be a suitable
@@ -535,14 +717,14 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!RegInfo->def_empty(Reg))
+        if (!MRI->def_empty(Reg))
           return false;
         if (AllocatableSet.test(Reg))
           return false;
         // Check for a def among the register's aliases too.
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
-          if (!RegInfo->def_empty(AliasReg))
+          if (!MRI->def_empty(AliasReg))
             return false;
           if (AllocatableSet.test(AliasReg))
             return false;
@@ -562,12 +744,12 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     if (!MO.isUse())
       continue;
 
-    assert(RegInfo->getVRegDef(Reg) &&
+    assert(MRI->getVRegDef(Reg) &&
            "Machine instr not mapped for this vreg?!");
 
     // If the loop contains the definition of an operand, then the instruction
     // isn't loop invariant.
-    if (CurLoop->contains(RegInfo->getVRegDef(Reg)))
+    if (CurLoop->contains(MRI->getVRegDef(Reg)))
       return false;
   }
 
@@ -577,9 +759,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 
 
 /// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
-  for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
-         UE = RegInfo->use_end(); UI != UE; ++UI) {
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
     if (UseMI->isPHI())
       return true;
@@ -587,37 +769,210 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
   return false;
 }
 
-/// isLoadFromConstantMemory - Return true if the given instruction is a
-/// load from constant memory. Machine LICM will hoist these even if they are
-/// not re-materializable.
-bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
-  if (!MI->getDesc().mayLoad()) return false;
-  if (!MI->hasOneMemOperand()) return false;
-  MachineMemOperand *MMO = *MI->memoperands_begin();
-  if (MMO->isVolatile()) return false;
-  if (!MMO->getValue()) return false;
-  const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
-  if (PSV) {
-    MachineFunction &MF = *MI->getParent()->getParent();
-    return PSV->isConstant(MF.getFrameInfo());
-  } else {
-    return AA->pointsToConstantMemory(MMO->getValue());
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+                                        unsigned DefIdx, unsigned Reg) const {
+  if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+    return false;
+
+  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+         E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (UseMI->isCopyLike())
+      continue;
+    if (!CurLoop->contains(UseMI->getParent()))
+      continue;
+    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = UseMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (MOReg != Reg)
+        continue;
+
+      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+        return true;
+    }
+
+    // Only look at the first in loop use.
+    break;
+  }
+
+  return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+    return true;
+  if (!InstrItins || InstrItins->isEmpty())
+    return false;
+
+  bool isCheap = false;
+  unsigned NumDefs = MI.getDesc().getNumDefs();
+  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+    MachineOperand &DefMO = MI.getOperand(i);
+    if (!DefMO.isReg() || !DefMO.isDef())
+      continue;
+    --NumDefs;
+    unsigned Reg = DefMO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+
+    if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+      return false;
+    isCheap = true;
+  }
+
+  return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+       CI != CE; ++CI) {
+    if (CI->second <= 0) 
+      continue;
+
+    unsigned RCId = CI->first;
+    for (unsigned i = BackTrace.size(); i != 0; --i) {
+      SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+      if (RP[RCId] + CI->second >= RegLimit[RCId])
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  // First compute the 'cost' of the instruction, i.e. its contribution
+  // to register pressure.
+  DenseMap<unsigned, int> Cost;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    if (MO.isDef()) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second += RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, RCCost));
+    } else if (isOperandKill(MO, MRI)) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second -= RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, -RCCost));
+    }
+  }
+
+  // Update register pressure of blocks from loop header to current block.
+  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+    SmallVector<unsigned, 8> &RP = BackTrace[i];
+    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+         CI != CE; ++CI) {
+      unsigned RCId = CI->first;
+      RP[RCId] += CI->second;
+    }
   }
 }
 
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
-  // FIXME: For now, only hoist re-materilizable instructions. LICM will
-  // increase register pressure. We want to make sure it doesn't increase
-  // spilling.
+  if (MI.isImplicitDef())
+    return true;
+
+  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+  // will increase register pressure. It's probably not worth it if the
+  // instruction is cheap.
   // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
   // these tend to help performance in low register pressure situation. The
   // trade off is it may cause spill in high pressure situation. It will end up
   // adding a store in the loop preheader. But the reload is no more expensive.
   // The side benefit is these loads are frequently CSE'ed.
-  if (!TII->isTriviallyReMaterializable(&MI, AA)) {
-    if (!isLoadFromConstantMemory(&MI))
+  if (IsCheapInstruction(MI)) {
+    if (!TII->isTriviallyReMaterializable(&MI, AA))
+      return false;
+  } else {
+    // Estimate register pressure to determine whether to LICM the instruction.
+    // In low register pressure situation, we can be more aggressive about 
+    // hoisting. Also, favors hoisting long latency instructions even in
+    // moderately high pressure situation.
+    // FIXME: If there are long latency loop-invariant instructions inside the
+    // loop at this point, why didn't the optimizer's LICM hoist them?
+    DenseMap<unsigned, int> Cost;
+    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      if (MO.isDef()) {
+        if (HasHighOperandLatency(MI, i, Reg)) {
+          ++NumHighLatency;
+          return true;
+        }
+
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second += RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, RCCost));
+      } else if (isOperandKill(MO, MRI)) {
+        // Is a virtual register use is a kill, hoisting it out of the loop
+        // may actually reduce register pressure or be register pressure
+        // neutral.
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second -= RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, -RCCost));
+      }
+    }
+
+    // Visit BBs from header to current BB, if hoisting this doesn't cause
+    // high register pressure, then it's safe to proceed.
+    if (!CanCauseHighRegPressure(Cost)) {
+      ++NumLowRP;
+      return true;
+    }
+
+    // High register pressure situation, only hoist if the instruction is going to
+    // be remat'ed.
+    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+        !MI.isInvariantLoad(AA))
       return false;
   }
 
@@ -628,7 +983,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isDef())
       continue;
-    if (HasPHIUses(MO.getReg(), RegInfo))
+    if (HasPHIUses(MO.getReg(), MRI))
       return false;
   }
 
@@ -636,10 +991,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 }
 
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+  // Don't unfold simple loads.
+  if (MI->getDesc().canFoldAsLoad())
+    return 0;
+
   // If not, we may be able to unfold a load and hoist that.
   // First test whether the instruction is loading from an amenable
   // memory location.
-  if (!isLoadFromConstantMemory(MI))
+  if (!MI->isInvariantLoad(AA))
     return 0;
 
   // Next determine the register class for a temporary register.
@@ -654,7 +1013,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   if (TID.getNumDefs() != 1) return 0;
   const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
-  unsigned Reg = RegInfo->createVirtualRegister(RC);
+  unsigned Reg = MRI->createVirtualRegister(RC);
 
   MachineFunction &MF = *MI->getParent()->getParent();
   SmallVector<MachineInstr *, 2> NewMIs;
@@ -678,6 +1037,10 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
     NewMIs[1]->eraseFromParent();
     return 0;
   }
+
+  // Update register pressure for the unfolded instruction.
+  UpdateRegPressure(NewMIs[1]);
+
   // Otherwise we successfully unfolded a load that we can hoist.
   MI->eraseFromParent();
   return NewMIs[0];
@@ -686,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
   for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
     const MachineInstr *MI = &*I;
-    // FIXME: For now, only hoist re-materilizable instructions. LICM will
-    // increase register pressure. We want to make sure it doesn't increase
-    // spilling.
-    if (TII->isTriviallyReMaterializable(MI, AA)) {
-      unsigned Opcode = MI->getOpcode();
-      DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
-        CI = CSEMap.find(Opcode);
-      if (CI != CSEMap.end())
-        CI->second.push_back(MI);
-      else {
-        std::vector<const MachineInstr*> CSEMIs;
-        CSEMIs.push_back(MI);
-        CSEMap.insert(std::make_pair(Opcode, CSEMIs));
-      }
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+      CI = CSEMap.find(Opcode);
+    if (CI != CSEMap.end())
+      CI->second.push_back(MI);
+    else {
+      std::vector<const MachineInstr*> CSEMIs;
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
     }
   }
 }
@@ -709,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
                               std::vector<const MachineInstr*> &PrevMIs) {
   for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
     const MachineInstr *PrevMI = PrevMIs[i];
-    if (TII->produceSameValue(MI, PrevMI))
+    if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
       return PrevMI;
   }
   return 0;
@@ -738,8 +1096,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 
       if (MO.isReg() && MO.isDef() &&
           !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-        RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
-        RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+        MRI->clearKillFlags(Dup->getOperand(i).getReg());
       }
     }
     MI->eraseFromParent();
@@ -752,15 +1110,12 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 /// Hoist - When an instruction is found to use only loop invariant operands
 /// that are safe to hoist, this instruction is called to do the dirty work.
 ///
-void MachineLICM::Hoist(MachineInstr *MI) {
-  MachineBasicBlock *Preheader = getCurPreheader();
-  if (!Preheader) return;
-
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
   // First check whether we should hoist this instruction.
   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
     // If not, try unfolding a hoistable load.
     MI = ExtractHoistableLoad(MI);
-    if (!MI) return;
+    if (!MI) return false;
   }
 
   // Now move the instructions to the predecessor, inserting it before any
@@ -791,13 +1146,16 @@ void MachineLICM::Hoist(MachineInstr *MI) {
     // Otherwise, splice the instruction to the preheader.
     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
 
+    // Update register pressure for BBs from header to this block.
+    UpdateBackTraceRegPressure(MI);
+
     // Clear the kill flags of any register this instruction defines,
     // since they may need to be live throughout the entire loop
     // rather than just live for part of it.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg() && MO.isDef() && !MO.isDead())
-        RegInfo->clearKillFlags(MO.getReg());
+        MRI->clearKillFlags(MO.getReg());
     }
 
     // Add to the CSE map.
@@ -812,6 +1170,8 @@ void MachineLICM::Hoist(MachineInstr *MI) {
 
   ++NumHoisted;
   Changed = true;
+
+  return true;
 }
 
 MachineBasicBlock *MachineLICM::getCurPreheader() {
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index bca4b0c28985..189cb2ba5d1d 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,8 +30,11 @@ TEMPLATE_INSTANTIATION(MLIB);
 }
 
 char MachineLoopInfo::ID = 0;
-INITIALIZE_PASS(MachineLoopInfo, "machine-loops",
-                "Machine Natural Loop Construction", true, true);
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
 
 char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 000000000000..17fe67f65045
--- /dev/null
+++ b/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<SlotIndexes>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+  releaseMemory();
+  Indexes = &getAnalysis<SlotIndexes>();
+  return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+  DeleteContainerSeconds(Cache);
+  Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+  MachineLoopRange *&Range = Cache[Loop];
+  if (!Range)
+    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+  return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+                                   MachineLoopRange::Allocator &alloc,
+                                   SlotIndexes &Indexes)
+  : Loop(loop), Intervals(alloc), Area(0) {
+  // Compute loop coverage.
+  for (MachineLoop::block_iterator I = Loop->block_begin(),
+         E = Loop->block_end(); I != E; ++I) {
+    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+    Intervals.insert(Range.first, Range.second, 1u);
+    Area += Range.first.distance(Range.second);
+  }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+  Map::const_iterator I = Intervals.find(Start);
+  return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+  return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  unsigned na = a->getNumber();
+  unsigned nb = b->getNumber();
+  if (na < nb)
+    return -1;
+  if (na > nb)
+    return 1;
+  return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  if (a->getArea() != b->getArea())
+    return a->getArea() > b->getArea() ? -1 : 1;
+  return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+  OS << "Loop#" << getNumber() << " =";
+  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+    OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+  MLR.print(OS);
+  return OS;
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index b647a4dcc530..fadc594efcb2 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -29,7 +29,7 @@ using namespace llvm::dwarf;
 
 // Handle the Pass registration stuff necessary to use TargetData's.
 INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
-                "Machine Module Information", false, false);
+                "Machine Module Information", false, false)
 char MachineModuleInfo::ID = 0;
 
 // Out of line virtual method.
@@ -41,30 +41,30 @@ class MMIAddrLabelMapCallbackPtr : CallbackVH {
 public:
   MMIAddrLabelMapCallbackPtr() : Map(0) {}
   MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
-  
+
   void setPtr(BasicBlock *BB) {
     ValueHandleBase::operator=(BB);
   }
-    
+
   void setMap(MMIAddrLabelMap *map) { Map = map; }
-  
+
   virtual void deleted();
   virtual void allUsesReplacedWith(Value *V2);
 };
-  
+
 class MMIAddrLabelMap {
   MCContext &Context;
   struct AddrLabelSymEntry {
     /// Symbols - The symbols for the label.  This is a pointer union that is
     /// either one symbol (the common case) or a list of symbols.
     PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
-    
+
     Function *Fn;   // The containing function of the BasicBlock.
     unsigned Index; // The index in BBCallbacks for the BasicBlock.
   };
-  
+
   DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-  
+
   /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
   /// use this so we get notified if a block is deleted or RAUWd.
   std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
@@ -76,23 +76,23 @@ class MMIAddrLabelMap {
   DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
     DeletedAddrLabelsNeedingEmission;
 public:
-  
+
   MMIAddrLabelMap(MCContext &context) : Context(context) {}
   ~MMIAddrLabelMap() {
     assert(DeletedAddrLabelsNeedingEmission.empty() &&
            "Some labels for deleted blocks never got emitted");
-    
+
     // Deallocate any of the 'list of symbols' case.
     for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
          I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
       if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
         delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
   }
-  
+
   MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
   std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
 
-  void takeDeletedSymbolsForFunction(Function *F, 
+  void takeDeletedSymbolsForFunction(Function *F,
                                      std::vector<MCSymbol*> &Result);
 
   void UpdateForDeletedBlock(BasicBlock *BB);
@@ -104,7 +104,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
   assert(BB->hasAddressTaken() &&
          "Shouldn't get label for block without address taken");
   AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-  
+
   // If we already had an entry for this block, just return it.
   if (!Entry.Symbols.isNull()) {
     assert(BB->getParent() == Entry.Fn && "Parent changed");
@@ -112,7 +112,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
       return Entry.Symbols.get<MCSymbol*>();
     return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
   }
-  
+
   // Otherwise, this is a new entry, create a new symbol for it and add an
   // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
   BBCallbacks.push_back(BB);
@@ -129,9 +129,9 @@ MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
   assert(BB->hasAddressTaken() &&
          "Shouldn't get label for block without address taken");
   AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-  
+
   std::vector<MCSymbol*> Result;
-  
+
   // If we already had an entry for this block, just return it.
   if (Entry.Symbols.isNull())
     Result.push_back(getAddrLabelSymbol(BB));
@@ -152,7 +152,7 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
 
   // If there are no entries for the function, just return.
   if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-  
+
   // Otherwise, take the list.
   std::swap(Result, I->second);
   DeletedAddrLabelsNeedingEmission.erase(I);
@@ -175,7 +175,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
   if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
     if (Sym->isDefined())
       return;
-  
+
     // If the block is not yet defined, we need to emit it at the end of the
     // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
     // for the containing Function.  Since the block is being deleted, its
@@ -187,7 +187,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
     for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
       MCSymbol *Sym = (*Syms)[i];
       if (Sym->isDefined()) continue;  // Ignore already emitted labels.
-      
+
       // If the block is not yet defined, we need to emit it at the end of the
       // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
       // for the containing Function.  Since the block is being deleted, its
@@ -195,7 +195,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
       // 'Entry'.
       DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
     }
-    
+
     // The entry is deleted, free the memory associated with the symbol list.
     delete Syms;
   }
@@ -225,7 +225,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
     SymList->push_back(PrevSym);
     NewEntry.Symbols = SymList;
   }
-      
+
   std::vector<MCSymbol*> *SymList =
     NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
 
@@ -234,7 +234,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
     SymList->push_back(Sym);
     return;
   }
-  
+
   // Otherwise, concatenate the list.
   std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
   SymList->insert(SymList->end(), Syms->begin(), Syms->end());
@@ -253,10 +253,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 
 //===----------------------------------------------------------------------===//
 
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(ID), Context(MAI),
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+                                     const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
   ObjFileMMI(0),
-  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
+  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+  CallsExternalVAFunctionWithFloatingPointArguments(false) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
   AddrLabelSymbols = 0;
@@ -264,7 +267,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
 }
 
 MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
   assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
          "should always be explicitly constructed by LLVMTargetMachine");
   abort();
@@ -272,7 +275,7 @@ MachineModuleInfo::MachineModuleInfo()
 
 MachineModuleInfo::~MachineModuleInfo() {
   delete ObjFileMMI;
-  
+
   // FIXME: Why isn't doFinalization being called??
   //assert(AddrLabelSymbols == 0 && "doFinalization not called");
   delete AddrLabelSymbols;
@@ -472,7 +475,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
            (LPMap && (*LPMap)[BeginLabel] != 0)) &&
           (EndLabel->isDefined() ||
            (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-      
+
       LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
       LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
       --j, --e;
@@ -562,20 +565,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   // in the zero index.
   return 0;
 }
-
-namespace {
-  /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
-  /// by source location, to avoid depending on the arbitrary order that
-  /// instruction selection visits variables in.
-  struct VariableDebugSorter {
-    bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
-                    const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
-                  const {
-       if (A.second.second.getLine() != B.second.second.getLine())
-         return A.second.second.getLine() < B.second.second.getLine();
-       if (A.second.second.getCol() != B.second.second.getCol())
-         return A.second.second.getCol() < B.second.second.getCol();
-       return false;
-    }
-  };
-}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 5d852f26beda..b3fb33736ffc 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -30,8 +30,9 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
 
 MachineRegisterInfo::~MachineRegisterInfo() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
-    assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+           "Vreg use list non-empty still?");
   for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
     assert(!PhysRegUseDefLists[i] &&
            "PhysRegUseDefLists has entries after all instructions are deleted");
@@ -44,20 +45,32 @@ MachineRegisterInfo::~MachineRegisterInfo() {
 ///
 void
 MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
-  unsigned VR = Reg;
-  Reg -= TargetRegisterInfo::FirstVirtualRegister;
-  assert(Reg < VRegInfo.size() && "Invalid vreg!");
   const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
   VRegInfo[Reg].first = RC;
 
   // Remove from old register class's vregs list. This may be slow but
   // fortunately this operation is rarely needed.
   std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
-  std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
+  std::vector<unsigned>::iterator I =
+    std::find(VRegs.begin(), VRegs.end(), Reg);
   VRegs.erase(I);
 
   // Add to new register class's vregs list.
-  RegClass2VRegMap[RC->getID()].push_back(VR);
+  RegClass2VRegMap[RC->getID()].push_back(Reg);
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+                                       const TargetRegisterClass *RC) {
+  const TargetRegisterClass *OldRC = getRegClass(Reg);
+  if (OldRC == RC)
+    return RC;
+  const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+  if (!NewRC)
+    return 0;
+  if (NewRC != OldRC)
+    setRegClass(Reg, NewRC);
+  return NewRC;
 }
 
 /// createVirtualRegister - Create and return a new virtual register in the
@@ -66,17 +79,22 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
 unsigned
 MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   assert(RegClass && "Cannot create register without RegClass!");
+
+  // New virtual register number.
+  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
   // Add a reg, but keep track of whether the vector reallocated or not.
-  void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
-  VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
-  RegAllocHints.push_back(std::make_pair(0, 0));
+  const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+  void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+  VRegInfo.grow(Reg);
+  VRegInfo[Reg].first = RegClass;
+  RegAllocHints.grow(Reg);
 
-  if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+  if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
     // The vector reallocated, handle this now.
     HandleVRegListReallocation();
-  unsigned VR = getLastVirtReg();
-  RegClass2VRegMap[RegClass->getID()].push_back(VR);
-  return VR;
+  RegClass2VRegMap[RegClass->getID()].push_back(Reg);
+  return Reg;
 }
 
 /// HandleVRegListReallocation - We just added a virtual register to the
@@ -85,11 +103,12 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
 void MachineRegisterInfo::HandleVRegListReallocation() {
   // The back pointers for the vreg lists point into the previous vector.
   // Update them to point to their correct slots.
-  for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
-    MachineOperand *List = VRegInfo[i].second;
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    MachineOperand *List = VRegInfo[Reg].second;
     if (!List) continue;
     // Update the back-pointer to be accurate once more.
-    List->Contents.Reg.Prev = &VRegInfo[i].second;
+    List->Contents.Reg.Prev = &VRegInfo[Reg].second;
   }
 }
 
@@ -112,8 +131,6 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 /// register or null if none is found.  This assumes that the code is in SSA
 /// form, so there should only be one definition.
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
-  assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
-         "Invalid vreg!");
   // Since we are in SSA form, we can use the first definition.
   if (!def_empty(Reg))
     return &*def_begin(Reg);
@@ -193,8 +210,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
         LiveIns.erase(LiveIns.begin() + i);
         --i; --e;
       } else {
+        DebugLoc DL;
+        // If there is a location for this live in then use it.
+        DenseMap<unsigned, DebugLoc>::iterator DLI = 
+          LiveInLocs.find(LiveIns[i].second);
+        if (DLI != LiveInLocs.end())
+          DL = DLI->second;
+
         // Emit a copy.
-        BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+        BuildMI(*EntryMBB, EntryMBB->begin(), DL,
                 TII.get(TargetOpcode::COPY), LiveIns[i].second)
           .addReg(LiveIns[i].first);
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c8f8fafe227e..8a93a24287b6 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -34,27 +35,31 @@ using namespace llvm;
 static cl::opt<bool> 
 SplitEdges("machine-sink-split",
            cl::desc("Split critical edges during machine sinking"),
-           cl::init(false), cl::Hidden);
-static cl::opt<unsigned>
-SplitLimit("split-limit",
-           cl::init(~0u), cl::Hidden);
+           cl::init(true), cl::Hidden);
 
-STATISTIC(NumSunk,  "Number of machine instructions sunk");
-STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumSunk,      "Number of machine instructions sunk");
+STATISTIC(NumSplit,     "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
 
 namespace {
   class MachineSinking : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    MachineRegisterInfo  *RegInfo; // Machine register information
+    MachineRegisterInfo  *MRI;  // Machine register information
     MachineDominatorTree *DT;   // Machine dominator tree
     MachineLoopInfo *LI;
     AliasAnalysis *AA;
     BitVector AllocatableSet;   // Which physregs are allocatable?
 
+    // Remember which edges have been considered for breaking.
+    SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+    CEBCandidates;
+
   public:
     static char ID; // Pass identification
-    MachineSinking() : MachineFunctionPass(ID) {}
+    MachineSinking() : MachineFunctionPass(ID) {
+      initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -67,43 +72,125 @@ namespace {
       AU.addPreserved<MachineDominatorTree>();
       AU.addPreserved<MachineLoopInfo>();
     }
+
+    virtual void releaseMemory() {
+      CEBCandidates.clear();
+    }
+
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
-    MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From,
-                                         MachineBasicBlock *To);
+    bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                     MachineBasicBlock *From,
+                                     MachineBasicBlock *To);
+    MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+                                         MachineBasicBlock *From,
+                                         MachineBasicBlock *To,
+                                         bool BreakPHIEdge);
     bool SinkInstruction(MachineInstr *MI, bool &SawStore);
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
-                               MachineBasicBlock *DefMBB, bool &LocalUse) const;
+                                 MachineBasicBlock *DefMBB,
+                                 bool &BreakPHIEdge, bool &LocalUse) const;
+    bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                         MachineBasicBlock *MBB);
   };
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
-INITIALIZE_PASS(MachineSinking, "machine-sink",
-                "Machine code sinking", false, false);
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
 
 FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
 
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                                     MachineBasicBlock *MBB) {
+  if (!MI->isCopy())
+    return false;
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+      !MRI->hasOneNonDBGUse(SrcReg))
+    return false;
+
+  const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+  const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+  if (SRC != DRC)
+    return false;
+
+  MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+  if (DefMI->isCopyLike())
+    return false;
+  DEBUG(dbgs() << "Coalescing: " << *DefMI);
+  DEBUG(dbgs() << "*** to: " << *MI);
+  MRI->replaceRegWith(DstReg, SrcReg);
+  MI->eraseFromParent();
+  ++NumCoalesces;
+  return true;
+}
+
 /// AllUsesDominatedByBlock - Return true if all uses of the specified register
 /// occur in blocks dominated by the specified block. If any use is in the
 /// definition block, then return false since it is never legal to move def
 /// after uses.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
-                                             MachineBasicBlock *MBB,
-                                             MachineBasicBlock *DefMBB,
-                                             bool &LocalUse) const {
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+                                        MachineBasicBlock *MBB,
+                                        MachineBasicBlock *DefMBB,
+                                        bool &BreakPHIEdge,
+                                        bool &LocalUse) const {
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
+
+  if (MRI->use_nodbg_empty(Reg))
+    return true;
+
   // Ignoring debug uses is necessary so debug info doesn't affect the code.
   // This may leave a referencing dbg_value in the original block, before
   // the definition of the vreg.  Dwarf generator handles this although the
   // user might not get the right info at runtime.
+
+  // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+  // into and they are all PHI nodes. In this case, machine-sink must break
+  // the critical edge first. e.g.
+  //
+  // BB#1: derived from LLVM BB %bb4.preheader
+  //   Predecessors according to CFG: BB#0
+  //     ...
+  //     %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+  //     ...
+  //     JE_4 <BB#37>, %EFLAGS<imp-use>
+  //   Successors according to CFG: BB#37 BB#2
+  //
+  // BB#2: derived from LLVM BB %bb.nph
+  //   Predecessors according to CFG: BB#0 BB#1
+  //     %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+  BreakPHIEdge = true;
   for (MachineRegisterInfo::use_nodbg_iterator
-         I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
        I != E; ++I) {
-    // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (!(UseBlock == MBB && UseInst->isPHI() &&
+          UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+      BreakPHIEdge = false;
+      break;
+    }
+  }
+  if (BreakPHIEdge)
+    return true;
 
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    // Determine the block of the use.
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
     if (UseInst->isPHI()) {
       // PHI nodes use the operand in the predecessor block, not the block with
       // the PHI.
@@ -127,7 +214,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
-  RegInfo = &MF.getRegInfo();
+  MRI = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
@@ -139,6 +226,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     bool MadeChange = false;
 
     // Process all basic blocks.
+    CEBCandidates.clear();
     for (MachineFunction::iterator I = MF.begin(), E = MF.end();
          I != E; ++I)
       MadeChange |= ProcessBlock(*I);
@@ -177,6 +265,9 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     if (MI->isDebugValue())
       continue;
 
+    if (PerformTrivialForwardCoalescing(MI, &MBB))
+      continue;
+
     if (SinkInstruction(MI, SawStore))
       ++NumSunk, MadeChange = true;
 
@@ -186,51 +277,92 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
   return MadeChange;
 }
 
-MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
-                                                     MachineBasicBlock *ToBB) {
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                                 MachineBasicBlock *From,
+                                                 MachineBasicBlock *To) {
+  // FIXME: Need much better heuristics.
+
+  // If the pass has already considered breaking this edge (during this pass
+  // through the function), then let's go ahead and break it. This means
+  // sinking multiple "cheap" instructions into the same block.
+  if (!CEBCandidates.insert(std::make_pair(From, To)))
+    return true;
+
+  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+    return true;
+
+  // MI is cheap, we probably don't want to break the critical edge for it.
+  // However, if this would allow some definitions of its source operands
+  // to be sunk then it's probably worth it.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (MRI->hasOneNonDBGUse(Reg))
+      return true;
+  }
+
+  return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+                                                     MachineBasicBlock *FromBB,
+                                                     MachineBasicBlock *ToBB,
+                                                     bool BreakPHIEdge) {
+  if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+    return 0;
+
   // Avoid breaking back edge. From == To means backedge for single BB loop.
-  if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB)
+  if (!SplitEdges || FromBB == ToBB)
+    return 0;
+
+  // Check for backedges of more "complex" loops.
+  if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+      LI->isLoopHeader(ToBB))
     return 0;
 
-  // Check for more "complex" loops.
-  if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) ||
-      !LI->isLoopHeader(ToBB)) {
-    // It's not always legal to break critical edges and sink the computation
-    // to the edge.
-    //
-    // BB#1:
-    // v1024
-    // Beq BB#3
-    // <fallthrough>
-    // BB#2:
-    // ... no uses of v1024
-    // <fallthrough>
-    // BB#3:
-    // ...
-    //       = v1024
-    //
-    // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
-    //
-    // BB#1:
-    // ...
-    // Bne BB#2
-    // BB#4:
-    // v1024 =
-    // B BB#3
-    // BB#2:
-    // ... no uses of v1024
-    // <fallthrough>
-    // BB#3:
-    // ...
-    //       = v1024
-    //
-    // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
-    // flow. We need to ensure the new basic block where the computation is
-    // sunk to dominates all the uses.
-    // It's only legal to break critical edge and sink the computation to the
-    // new block if all the predecessors of "To", except for "From", are
-    // not dominated by "From". Given SSA property, this means these
-    // predecessors are dominated by "To".
+  // It's not always legal to break critical edges and sink the computation
+  // to the edge.
+  //
+  // BB#1:
+  // v1024
+  // Beq BB#3
+  // <fallthrough>
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+  //
+  // BB#1:
+  // ...
+  // Bne BB#2
+  // BB#4:
+  // v1024 =
+  // B BB#3
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+  // flow. We need to ensure the new basic block where the computation is
+  // sunk to dominates all the uses.
+  // It's only legal to break critical edge and sink the computation to the
+  // new block if all the predecessors of "To", except for "From", are
+  // not dominated by "From". Given SSA property, this means these
+  // predecessors are dominated by "To".
+  //
+  // There is no need to do this check if all the uses are PHI nodes. PHI
+  // sources are only defined on the specific predecessor edges.
+  if (!BreakPHIEdge) {
     for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
            E = ToBB->pred_end(); PI != E; ++PI) {
       if (*PI == FromBB)
@@ -238,17 +370,23 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
       if (!DT->dominates(ToBB, *PI))
         return 0;
     }
-
-    // FIXME: Determine if it's cost effective to break this edge.
-    return FromBB->SplitCriticalEdge(ToBB, this);
   }
 
-  return 0;
+  return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+  return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
 }
 
 /// SinkInstruction - Determine whether it is safe to sink the specified machine
 /// instruction out of its current block into a successor.
 bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
+    return false;
+
   // Check if it's safe to move the instruction.
   if (!MI->isSafeToMove(TII, AA, SawStore))
     return false;
@@ -269,6 +407,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // decide.
   MachineBasicBlock *SuccToSinkTo = 0;
 
+  bool BreakPHIEdge = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;  // Ignore non-register operands.
@@ -281,7 +420,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!RegInfo->def_empty(Reg))
+        if (!MRI->def_empty(Reg))
           return false;
 
         if (AllocatableSet.test(Reg))
@@ -290,7 +429,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // Check for a def among the register's aliases too.
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
-          if (!RegInfo->def_empty(AliasReg))
+          if (!MRI->def_empty(AliasReg))
             return false;
 
           if (AllocatableSet.test(AliasReg))
@@ -305,7 +444,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
       if (MO.isUse()) continue;
 
       // If it's not safe to move defs of the register class, then abort.
-      if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+      if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
         return false;
 
       // FIXME: This picks a successor to sink into based on having one
@@ -327,7 +466,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If a previous operand picked a block to sink to, then this operand
         // must be sinkable to the same block.
         bool LocalUse = false;
-        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse))
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+                                     BreakPHIEdge, LocalUse))
           return false;
 
         continue;
@@ -338,7 +478,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
       for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
            E = ParentBlock->succ_end(); SI != E; ++SI) {
         bool LocalUse = false;
-        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) {
+        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+                                    BreakPHIEdge, LocalUse)) {
           SuccToSinkTo = *SI;
           break;
         }
@@ -384,7 +525,6 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // If the block has multiple predecessors, this would introduce computation on
   // a path that it doesn't already exist.  We could split the critical edge,
   // but for now we just punt.
-  // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
     // We cannot sink a load across a critical edge - there may be stores in
     // other code paths.
@@ -412,10 +552,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     if (!TryBreak)
       DEBUG(dbgs() << "Sinking along critical edge.\n");
     else {
-      MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo);
+      MachineBasicBlock *NewSucc =
+        SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
       if (!NewSucc) {
-        DEBUG(dbgs() <<
-              " *** PUNTING: Not legal or profitable to break critical edge\n");
+        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+                        "break critical edge\n");
         return false;
       } else {
         DEBUG(dbgs() << " *** Splitting critical edge:"
@@ -424,10 +565,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
               << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
         SuccToSinkTo = NewSucc;
         ++NumSplit;
+        BreakPHIEdge = false;
       }
     }
   }
 
+  if (BreakPHIEdge) {
+    // BreakPHIEdge is true if all the uses are in the successor MBB being
+    // sunken into and they are all PHI nodes. In this case, machine-sink must
+    // break the critical edge first.
+    MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+                                                   SuccToSinkTo, BreakPHIEdge);
+    if (!NewSucc) {
+      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+            "break critical edge\n");
+      return false;
+    }
+
+    DEBUG(dbgs() << " *** Splitting critical edge:"
+          " BB#" << ParentBlock->getNumber()
+          << " -- BB#" << NewSucc->getNumber()
+          << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+    SuccToSinkTo = NewSucc;
+    ++NumSplit;
+  }
+
   // Determine where to insert into. Skip phi nodes.
   MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 1e88562935ea..7351119f4728 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -45,14 +46,16 @@ using namespace llvm;
 namespace {
   struct MachineVerifier {
 
-    MachineVerifier(Pass *pass) :
+    MachineVerifier(Pass *pass, const char *b) :
       PASS(pass),
+      Banner(b),
       OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
       {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
     Pass *const PASS;
+    const char *Banner;
     const char *const OutFileName;
     raw_ostream *OS;
     const MachineFunction *MF;
@@ -71,6 +74,8 @@ namespace {
     RegVector regsDefined, regsDead, regsKilled;
     RegSet regsLiveInButUnused;
 
+    SlotIndex lastIndex;
+
     // Add Reg and any sub-registers to RV
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
       RV.push_back(Reg);
@@ -167,7 +172,9 @@ namespace {
 
     // Analysis information if available
     LiveVariables *LiveVars;
-    const LiveIntervals *LiveInts;
+    LiveIntervals *LiveInts;
+    LiveStacks *LiveStks;
+    SlotIndexes *Indexes;
 
     void visitMachineFunctionBefore();
     void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -193,9 +200,12 @@ namespace {
 
   struct MachineVerifierPass : public MachineFunctionPass {
     static char ID; // Pass ID, replacement for typeid
+    const char *const Banner;
 
-    MachineVerifierPass()
-      : MachineFunctionPass(ID) {}
+    MachineVerifierPass(const char *b = 0)
+      : MachineFunctionPass(ID), Banner(b) {
+        initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+      }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -203,7 +213,7 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &MF) {
-      MF.verify(this);
+      MF.verify(this, Banner);
       return false;
     }
   };
@@ -212,14 +222,15 @@ namespace {
 
 char MachineVerifierPass::ID = 0;
 INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
-                "Verify generated machine code", false, false);
+                "Verify generated machine code", false, false)
 
-FunctionPass *llvm::createMachineVerifierPass() {
-  return new MachineVerifierPass();
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+  return new MachineVerifierPass(Banner);
 }
 
-void MachineFunction::verify(Pass *p) const {
-  MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+  MachineVerifier(p, Banner)
+    .runOnMachineFunction(const_cast<MachineFunction&>(*this));
 }
 
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -247,11 +258,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
   LiveVars = NULL;
   LiveInts = NULL;
+  LiveStks = NULL;
+  Indexes = NULL;
   if (PASS) {
     LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
     // We don't want to verify LiveVariables if LiveIntervals is available.
     if (!LiveInts)
       LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+    LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+    Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
   }
 
   visitMachineFunctionBefore();
@@ -260,6 +275,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
     visitMachineBasicBlockBefore(MFI);
     for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
            MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+      if (MBBI->getParent() != MFI) {
+        report("Bad instruction parent pointer", MFI);
+        *OS << "Instruction: " << *MBBI;
+        continue;
+      }
       visitMachineInstrBefore(MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
@@ -288,8 +308,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   assert(MF);
   *OS << '\n';
-  if (!foundErrors++)
-    MF->print(*OS);
+  if (!foundErrors++) {
+    if (Banner)
+      *OS << "# " << Banner << '\n';
+    MF->print(*OS, Indexes);
+  }
   *OS << "*** Bad machine code: " << msg << " ***\n"
       << "- function:    " << MF->getFunction()->getNameStr() << "\n";
 }
@@ -299,13 +322,19 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   report(msg, MBB->getParent());
   *OS << "- basic block: " << MBB->getName()
       << " " << (void*)MBB
-      << " (BB#" << MBB->getNumber() << ")\n";
+      << " (BB#" << MBB->getNumber() << ")";
+  if (Indexes)
+    *OS << " [" << Indexes->getMBBStartIdx(MBB)
+        << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
+  *OS << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   assert(MI);
   report(msg, MI->getParent());
   *OS << "- instruction: ";
+  if (Indexes && Indexes->hasIndex(MI))
+    *OS << Indexes->getInstructionIndex(MI) << '\t';
   MI->print(*OS, TM);
 }
 
@@ -329,6 +358,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
 }
 
 void MachineVerifier::visitMachineFunctionBefore() {
+  lastIndex = SlotIndex();
   regsReserved = TRI->getReservedRegs(*MF);
 
   // A sub-register of a reserved register is also reserved
@@ -357,6 +387,16 @@ void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
+  // Count the number of landing pad successors.
+  SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    if ((*I)->isLandingPad())
+      LandingPadSuccs.insert(*I);
+  }
+  if (LandingPadSuccs.size() > 1)
+    report("MBB has more than one landing pad successor", MBB);
+
   // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
@@ -372,14 +412,14 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         // It's possible that the block legitimately ends with a noreturn
         // call or an unreachable, in which case it won't actually fall
         // out the bottom of the function.
-      } else if (MBB->succ_empty()) {
+      } else if (MBB->succ_size() == LandingPadSuccs.size()) {
         // It's possible that the block legitimately ends with a noreturn
         // call or an unreachable, in which case it won't actuall fall
         // out of the block.
-      } else if (MBB->succ_size() != 1) {
+      } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
         report("MBB exits via unconditional fall-through but doesn't have "
                "exactly one CFG successor!", MBB);
-      } else if (MBB->succ_begin()[0] != MBBI) {
+      } else if (!MBB->isSuccessor(MBBI)) {
         report("MBB exits via unconditional fall-through but its successor "
                "differs from its CFG successor!", MBB);
       }
@@ -394,10 +434,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       }
     } else if (TBB && !FBB && Cond.empty()) {
       // Block unconditionally branches somewhere.
-      if (MBB->succ_size() != 1) {
+      if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
         report("MBB exits via unconditional branch but doesn't have "
                "exactly one CFG successor!", MBB);
-      } else if (MBB->succ_begin()[0] != TBB) {
+      } else if (!MBB->isSuccessor(TBB)) {
         report("MBB exits via unconditional branch but the CFG "
                "successor doesn't match the actual successor!", MBB);
       }
@@ -487,6 +527,9 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
 
   regsKilled.clear();
   regsDefined.clear();
+
+  if (Indexes)
+    lastIndex = Indexes->getMBBStartIdx(MBB);
 }
 
 void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
@@ -525,6 +568,7 @@ void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const TargetInstrDesc &TI = MI->getDesc();
+  const TargetOperandInfo &TOI = TI.OpInfo[MONum];
 
   // The first TI.NumDefs operands must be explicit register defines
   if (MONum < TI.getNumDefs()) {
@@ -535,9 +579,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
   } else if (MONum < TI.getNumOperands()) {
-    if (MO->isReg()) {
-      if (MO->isDef())
-        report("Explicit operand marked as def", MO, MONum);
+    // Don't check if it's the last operand in a variadic instruction. See,
+    // e.g., LDM_RET in the arm back end.
+    if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
+      if (MO->isDef() && !TOI.isOptionalDef())
+          report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
         report("Explicit operand marked as implicit", MO, MONum);
     }
@@ -554,7 +600,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       return;
 
     // Check Live Variables.
-    if (MO->isUndef()) {
+    if (MI->isDebugValue()) {
+      // Liveness checks are not valid for debug values.
+    } else if (MO->isUndef()) {
       // An <undef> doesn't refer to any register, so just skip it.
     } else if (MO->isUse()) {
       regsLiveInButUnused.erase(Reg);
@@ -566,7 +614,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         unsigned DefReg = MI->getOperand(defIdx).getReg();
         if (Reg == DefReg) {
           isKill = true;
-          // ANd in that case an explicit kill flag is not allowed.
+          // And in that case an explicit kill flag is not allowed.
           if (MO->isKill())
             report("Illegal kill flag on two-address instruction operand",
                    MO, MONum);
@@ -590,7 +638,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       }
 
       // Check LiveInts liveness and kill.
-      if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+          LiveInts && !LiveInts->isNotInMIMap(MI)) {
         SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
         if (LiveInts->hasInterval(Reg)) {
           const LiveInterval &LI = LiveInts->getInterval(Reg);
@@ -598,8 +647,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
             report("No live range at use", MO, MONum);
             *OS << UseIdx << " is not live in " << LI << '\n';
           }
-          // TODO: Verify isKill == LI.killedAt.
-        } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+          // Check for extra kill flags.
+          // Note that we allow missing kill flags for now.
+          if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+            report("Live range continues after kill flag", MO, MONum);
+            *OS << "Live range: " << LI << '\n';
+          }
+        } else {
           report("Virtual register has no Live interval", MO, MONum);
         }
       }
@@ -636,11 +690,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
         if (LiveInts->hasInterval(Reg)) {
           const LiveInterval &LI = LiveInts->getInterval(Reg);
-          if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) {
-            assert(LR->valno && "NULL valno is not allowed");
-            if (LR->valno->def != DefIdx) {
+          if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+            assert(VNI && "NULL valno is not allowed");
+            if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
               report("Inconsistent valno->def", MO, MONum);
-              *OS << "Valno " << LR->valno->id << " is not defined at "
+              *OS << "Valno " << VNI->id << " is not defined at "
                   << DefIdx << " in " << LI << '\n';
             }
           } else {
@@ -655,7 +709,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
 
     // Check register classes.
     if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
-      const TargetOperandInfo &TOI = TI.OpInfo[MONum];
       unsigned SubIdx = MO->getSubReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -706,6 +759,22 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       report("PHI operand is not in the CFG", MO, MONum);
     break;
 
+  case MachineOperand::MO_FrameIndex:
+    if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+        LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+      SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+      if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+        report("Instruction loads from dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+      if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+        report("Instruction stores to dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+    }
+    break;
+
   default:
     break;
   }
@@ -717,12 +786,31 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
   set_subtract(regsLive, regsKilled); regsKilled.clear();
   set_subtract(regsLive, regsDead);   regsDead.clear();
   set_union(regsLive, regsDefined);   regsDefined.clear();
+
+  if (Indexes && Indexes->hasIndex(MI)) {
+    SlotIndex idx = Indexes->getInstructionIndex(MI);
+    if (!(idx > lastIndex)) {
+      report("Instruction index out of order", MI);
+      *OS << "Last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = idx;
+  }
 }
 
 void
 MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
   MBBInfoMap[MBB].regsLiveOut = regsLive;
   regsLive.clear();
+
+  if (Indexes) {
+    SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+    if (!(stop > lastIndex)) {
+      report("Block ends before last instruction index", MBB);
+      *OS << "Block ends at " << stop
+          << " last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = stop;
+  }
 }
 
 // Calculate the largest possible vregsPassed sets. These are the registers that
@@ -854,8 +942,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
 
 void MachineVerifier::verifyLiveVariables() {
   assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
-  for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
-         RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) {
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
     for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
          MFI != MFE; ++MFI) {
@@ -865,13 +953,13 @@ void MachineVerifier::verifyLiveVariables() {
       if (MInfo.vregsRequired.count(Reg)) {
         if (!VI.AliveBlocks.test(MFI->getNumber())) {
           report("LiveVariables: Block missing from AliveBlocks", MFI);
-          *OS << "Virtual register %reg" << Reg
+          *OS << "Virtual register " << PrintReg(Reg)
               << " must be live through the block.\n";
         }
       } else {
         if (VI.AliveBlocks.test(MFI->getNumber())) {
           report("LiveVariables: Block should not be in AliveBlocks", MFI);
-          *OS << "Virtual register %reg" << Reg
+          *OS << "Virtual register " << PrintReg(Reg)
               << " is not needed live through the block.\n";
         }
       }
@@ -884,14 +972,24 @@ void MachineVerifier::verifyLiveIntervals() {
   for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
        LVE = LiveInts->end(); LVI != LVE; ++LVI) {
     const LiveInterval &LI = *LVI->second;
+
+    // Spilling and splitting may leave unused registers around. Skip them.
+    if (MRI->use_empty(LI.reg))
+      continue;
+
+    // Physical registers have much weirdness going on, mostly from coalescing.
+    // We should probably fix it, but for now just ignore them.
+    if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+      continue;
+
     assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
 
     for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
          I!=E; ++I) {
       VNInfo *VNI = *I;
-      const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def);
+      const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
 
-      if (!DefLR) {
+      if (!DefVNI) {
         if (!VNI->isUnused()) {
           report("Valno not live at def and not marked unused", MF);
           *OS << "Valno #" << VNI->id << " in " << LI << '\n';
@@ -902,31 +1000,216 @@ void MachineVerifier::verifyLiveIntervals() {
       if (VNI->isUnused())
         continue;
 
-      if (DefLR->valno != VNI) {
+      if (DefVNI != VNI) {
         report("Live range at def has different valno", MF);
-        DefLR->print(*OS);
-        *OS << " should use valno #" << VNI->id << " in " << LI << '\n';
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+        continue;
       }
 
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+      if (!MBB) {
+        report("Invalid definition index", MF);
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " in " << LI << '\n';
+        continue;
+      }
+
+      if (VNI->isPHIDef()) {
+        if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+          report("PHIDef value is not defined at MBB start", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << ", not at the beginning of BB#" << MBB->getNumber()
+              << " in " << LI << '\n';
+        }
+      } else {
+        // Non-PHI def.
+        const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+        if (!MI) {
+          report("No instruction at def index", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+          report("Defining instruction does not modify register", MI);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+        }
+
+        bool isEarlyClobber = false;
+        if (MI) {
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+                MOI->isEarlyClobber()) {
+              isEarlyClobber = true;
+              break;
+            }
+          }
+        }
+
+        // Early clobber defs begin at USE slots, but other defs must begin at
+        // DEF slots.
+        if (isEarlyClobber) {
+          if (!VNI->def.isUse()) {
+            report("Early clobber def must be at a USE slot", MF);
+            *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+                << " in " << LI << '\n';
+          }
+        } else if (!VNI->def.isDef()) {
+          report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        }
+      }
     }
 
     for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
-      const LiveRange &LR = *I;
-      assert(LR.valno && "Live range has no valno");
+      const VNInfo *VNI = I->valno;
+      assert(VNI && "Live range has no valno");
 
-      if (LR.valno->id >= LI.getNumValNums() ||
-          LR.valno != LI.getValNumInfo(LR.valno->id)) {
+      if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
         report("Foreign valno in live range", MF);
-        LR.print(*OS);
+        I->print(*OS);
         *OS << " has a valno not in " << LI << '\n';
       }
 
-      if (LR.valno->isUnused()) {
+      if (VNI->isUnused()) {
         report("Live range valno is marked unused", MF);
-        LR.print(*OS);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+      }
+
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+      if (!MBB) {
+        report("Bad start of live segment, no basic block", MF);
+        I->print(*OS);
         *OS << " in " << LI << '\n';
+        continue;
+      }
+      SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+      if (I->start != MBBStartIdx && I->start != VNI->def) {
+        report("Live segment must begin at MBB entry or valno def", MBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+      }
+
+      const MachineBasicBlock *EndMBB =
+                                LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+      if (!EndMBB) {
+        report("Bad end of live segment, no basic block", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+        continue;
+      }
+      if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+        // The live segment is ending inside EndMBB
+        const MachineInstr *MI =
+                        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+        if (!MI) {
+          report("Live segment doesn't end at a valid instruction", EndMBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+        } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+                   !MI->readsVirtualRegister(LI.reg)) {
+          // A live range can end with either a redefinition, a kill flag on a
+          // use, or a dead flag on a def.
+          // FIXME: Should we check for each of these?
+          bool hasDeadDef = false;
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+              hasDeadDef = true;
+              break;
+            }
+          }
+
+          if (!hasDeadDef) {
+            report("Instruction killing live segment neither defines nor reads "
+                   "register", MI);
+            I->print(*OS);
+            *OS << " in " << LI << '\n';
+          }
+        }
+      }
+
+      // Now check all the basic blocks in this live segment.
+      MachineFunction::const_iterator MFI = MBB;
+      // Is this live range the beginning of a non-PHIDef VN?
+      if (I->start == VNI->def && !VNI->isPHIDef()) {
+        // Not live-in to any blocks.
+        if (MBB == EndMBB)
+          continue;
+        // Skip this block.
+        ++MFI;
+      }
+      for (;;) {
+        assert(LiveInts->isLiveInToMBB(LI, MFI));
+        // We don't know how to track physregs into a landing pad.
+        if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+            MFI->isLandingPad()) {
+          if (&*MFI == EndMBB)
+            break;
+          ++MFI;
+          continue;
+        }
+        // Check that VNI is live-out of all predecessors.
+        for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+             PE = MFI->pred_end(); PI != PE; ++PI) {
+          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+          const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+          if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+            if (PVNI && !PVNI->hasPHIKill()) {
+              report("Value live out of predecessor doesn't have PHIKill", MF);
+              *OS << "Valno #" << PVNI->id << " live out of BB#"
+                  << (*PI)->getNumber() << '@' << PEnd
+                  << " doesn't have PHIKill, but Valno #" << VNI->id
+                  << " is PHIDef and defined at the beginning of BB#"
+                  << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+                  << " in " << LI << '\n';
+            }
+            continue;
+          }
+
+          if (!PVNI) {
+            report("Register not marked live out of predecessor", *PI);
+            *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+                << PEnd << " in " << LI << '\n';
+            continue;
+          }
+
+          if (PVNI != VNI) {
+            report("Different value live out of predecessor", *PI);
+            *OS << "Valno #" << PVNI->id << " live out of BB#"
+                << (*PI)->getNumber() << '@' << PEnd
+                << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+          }
+        }
+        if (&*MFI == EndMBB)
+          break;
+        ++MFI;
       }
+    }
 
+    // Check the LI only has one connected component.
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+      unsigned NumComp = ConEQ.Classify(&LI);
+      if (NumComp > 1) {
+        report("Multiple connected components in live interval", MF);
+        *OS << NumComp << " components in " << LI << '\n';
+        for (unsigned comp = 0; comp != NumComp; ++comp) {
+          *OS << comp << ": valnos";
+          for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+               E = LI.vni_end(); I!=E; ++I)
+            if (comp == ConEQ.getEqClass(*I))
+              *OS << ' ' << (*I)->id;
+          *OS << '\n';
+        }
+      }
     }
   }
 }
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index edb4eea71b8a..c05be130ec61 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,9 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    OptimizePHIs() : MachineFunctionPass(ID) {}
+    OptimizePHIs() : MachineFunctionPass(ID) {
+      initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -55,7 +57,7 @@ namespace {
 
 char OptimizePHIs::ID = 0;
 INITIALIZE_PASS(OptimizePHIs, "opt-phis",
-                "Optimize machine instruction PHIs", false, false);
+                "Optimize machine instruction PHIs", false, false)
 
 FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
 
diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h
deleted file mode 100644
index b2224cb051dc..000000000000
--- a/lib/CodeGen/PBQP/Graph.h
+++ /dev/null
@@ -1,425 +0,0 @@
-//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Graph class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
-#define LLVM_CODEGEN_PBQP_GRAPH_H
-
-#include "Math.h"
-
-#include <list>
-#include <vector>
-#include <map>
-
-namespace PBQP {
-
-  /// PBQP Graph class.
-  /// Instances of this class describe PBQP problems.
-  class Graph {
-  private:
-
-    // ----- TYPEDEFS -----
-    class NodeEntry;
-    class EdgeEntry;
-
-    typedef std::list<NodeEntry> NodeList;
-    typedef std::list<EdgeEntry> EdgeList;
-
-  public:
-
-    typedef NodeList::iterator NodeItr;
-    typedef NodeList::const_iterator ConstNodeItr;
-
-    typedef EdgeList::iterator EdgeItr;
-    typedef EdgeList::const_iterator ConstEdgeItr;
-
-  private:
-
-    typedef std::list<EdgeItr> AdjEdgeList;
-  
-  public:
-
-    typedef AdjEdgeList::iterator AdjEdgeItr;
-
-  private:
-
-    class NodeEntry {
-    private:
-      Vector costs;      
-      AdjEdgeList adjEdges;
-      unsigned degree;
-      void *data;
-    public:
-      NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
-      Vector& getCosts() { return costs; }
-      const Vector& getCosts() const { return costs; }
-      unsigned getDegree() const { return degree; }
-      AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
-      AdjEdgeItr edgesEnd() { return adjEdges.end(); }
-      AdjEdgeItr addEdge(EdgeItr e) {
-        ++degree;
-        return adjEdges.insert(adjEdges.end(), e);
-      }
-      void removeEdge(AdjEdgeItr ae) {
-        --degree;
-        adjEdges.erase(ae);
-      }
-      void setData(void *data) { this->data = data; }
-      void* getData() { return data; }
-    };
-
-    class EdgeEntry {
-    private:
-      NodeItr node1, node2;
-      Matrix costs;
-      AdjEdgeItr node1AEItr, node2AEItr;
-      void *data;
-    public:
-      EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
-        : node1(node1), node2(node2), costs(costs) {}
-      NodeItr getNode1() const { return node1; }
-      NodeItr getNode2() const { return node2; }
-      Matrix& getCosts() { return costs; }
-      const Matrix& getCosts() const { return costs; }
-      void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
-      AdjEdgeItr getNode1AEItr() { return node1AEItr; }
-      void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
-      AdjEdgeItr getNode2AEItr() { return node2AEItr; }
-      void setData(void *data) { this->data = data; }
-      void *getData() { return data; }
-    };
-
-    // ----- MEMBERS -----
-
-    NodeList nodes;
-    unsigned numNodes;
-
-    EdgeList edges;
-    unsigned numEdges;
-
-    // ----- INTERNAL METHODS -----
-
-    NodeEntry& getNode(NodeItr nItr) { return *nItr; }
-    const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
-
-    EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
-    const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
-
-    NodeItr addConstructedNode(const NodeEntry &n) {
-      ++numNodes;
-      return nodes.insert(nodes.end(), n);
-    }
-
-    EdgeItr addConstructedEdge(const EdgeEntry &e) {
-      assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
-             "Attempt to add duplicate edge.");
-      ++numEdges;
-      EdgeItr edgeItr = edges.insert(edges.end(), e);
-      EdgeEntry &ne = getEdge(edgeItr);
-      NodeEntry &n1 = getNode(ne.getNode1());
-      NodeEntry &n2 = getNode(ne.getNode2());
-      // Sanity check on matrix dimensions:
-      assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
-             (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
-             "Edge cost dimensions do not match node costs dimensions.");
-      ne.setNode1AEItr(n1.addEdge(edgeItr));
-      ne.setNode2AEItr(n2.addEdge(edgeItr));
-      return edgeItr;
-    }
-
-    inline void copyFrom(const Graph &other);
-  public:
-
-    /// \brief Construct an empty PBQP graph.
-    Graph() : numNodes(0), numEdges(0) {}
-
-    /// \brief Copy construct this graph from "other". Note: Does not copy node
-    ///        and edge data, only graph structure and costs.
-    /// @param other Source graph to copy from.
-    Graph(const Graph &other) : numNodes(0), numEdges(0) {
-      copyFrom(other);
-    }
-
-    /// \brief Make this graph a copy of "other". Note: Does not copy node and
-    ///        edge data, only graph structure and costs.
-    /// @param other The graph to copy from.
-    /// @return A reference to this graph.
-    ///
-    /// This will clear the current graph, erasing any nodes and edges added,
-    /// before copying from other.
-    Graph& operator=(const Graph &other) {
-      clear();      
-      copyFrom(other);
-      return *this;
-    }
-
-    /// \brief Add a node with the given costs.
-    /// @param costs Cost vector for the new node.
-    /// @return Node iterator for the added node.
-    NodeItr addNode(const Vector &costs) {
-      return addConstructedNode(NodeEntry(costs));
-    }
-
-    /// \brief Add an edge between the given nodes with the given costs.
-    /// @param n1Itr First node.
-    /// @param n2Itr Second node.
-    /// @return Edge iterator for the added edge.
-    EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
-                    const Matrix &costs) {
-      assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
-             getNodeCosts(n2Itr).getLength() == costs.getCols() &&
-             "Matrix dimensions mismatch.");
-      return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); 
-    }
-
-    /// \brief Get the number of nodes in the graph.
-    /// @return Number of nodes in the graph.
-    unsigned getNumNodes() const { return numNodes; }
-
-    /// \brief Get the number of edges in the graph.
-    /// @return Number of edges in the graph.
-    unsigned getNumEdges() const { return numEdges; }
-
-    /// \brief Get a node's cost vector.
-    /// @param nItr Node iterator.
-    /// @return Node cost vector.
-    Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
-
-    /// \brief Get a node's cost vector (const version).
-    /// @param nItr Node iterator.
-    /// @return Node cost vector.
-    const Vector& getNodeCosts(ConstNodeItr nItr) const {
-      return getNode(nItr).getCosts();
-    }
-
-    /// \brief Set a node's data pointer.
-    /// @param nItr Node iterator.
-    /// @param data Pointer to node data.
-    ///
-    /// Typically used by a PBQP solver to attach data to aid in solution.
-    void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
-
-    /// \brief Get the node's data pointer.
-    /// @param nItr Node iterator.
-    /// @return Pointer to node data.
-    void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
-    
-    /// \brief Get an edge's cost matrix.
-    /// @param eItr Edge iterator.
-    /// @return Edge cost matrix.
-    Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
-
-    /// \brief Get an edge's cost matrix (const version).
-    /// @param eItr Edge iterator.
-    /// @return Edge cost matrix.
-    const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
-      return getEdge(eItr).getCosts();
-    }
-
-    /// \brief Set an edge's data pointer.
-    /// @param eItr Edge iterator.
-    /// @param data Pointer to edge data.
-    ///
-    /// Typically used by a PBQP solver to attach data to aid in solution.
-    void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
-
-    /// \brief Get an edge's data pointer.
-    /// @param eItr Edge iterator.
-    /// @return Pointer to edge data. 
-    void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
-
-    /// \brief Get a node's degree.
-    /// @param nItr Node iterator.
-    /// @return The degree of the node.
-    unsigned getNodeDegree(NodeItr nItr) const {
-      return getNode(nItr).getDegree();
-    }
-
-    /// \brief Begin iterator for node set.
-    NodeItr nodesBegin() { return nodes.begin(); }
-
-    /// \brief Begin const iterator for node set.
-    ConstNodeItr nodesBegin() const { return nodes.begin(); }
-
-    /// \brief End iterator for node set.
-    NodeItr nodesEnd() { return nodes.end(); }
-
-    /// \brief End const iterator for node set.
-    ConstNodeItr nodesEnd() const { return nodes.end(); }
-
-    /// \brief Begin iterator for edge set.
-    EdgeItr edgesBegin() { return edges.begin(); }
-
-    /// \brief End iterator for edge set.
-    EdgeItr edgesEnd() { return edges.end(); }
-
-    /// \brief Get begin iterator for adjacent edge set.
-    /// @param nItr Node iterator.
-    /// @return Begin iterator for the set of edges connected to the given node.
-    AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
-      return getNode(nItr).edgesBegin();
-    }
-
-    /// \brief Get end iterator for adjacent edge set.
-    /// @param nItr Node iterator.
-    /// @return End iterator for the set of edges connected to the given node.
-    AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
-      return getNode(nItr).edgesEnd();
-    }
-
-    /// \brief Get the first node connected to this edge.
-    /// @param eItr Edge iterator.
-    /// @return The first node connected to the given edge. 
-    NodeItr getEdgeNode1(EdgeItr eItr) {
-      return getEdge(eItr).getNode1();
-    }
-
-    /// \brief Get the second node connected to this edge.
-    /// @param eItr Edge iterator.
-    /// @return The second node connected to the given edge. 
-    NodeItr getEdgeNode2(EdgeItr eItr) {
-      return getEdge(eItr).getNode2();
-    } 
-
-    /// \brief Get the "other" node connected to this edge.
-    /// @param eItr Edge iterator.
-    /// @param nItr Node iterator for the "given" node.
-    /// @return The iterator for the "other" node connected to this edge. 
-    NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
-      EdgeEntry &e = getEdge(eItr);
-      if (e.getNode1() == nItr) {
-        return e.getNode2();
-      } // else
-      return e.getNode1();
-    }
-
-    /// \brief Get the edge connecting two nodes.
-    /// @param n1Itr First node iterator.
-    /// @param n2Itr Second node iterator.
-    /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
-    ///         otherwise returns edgesEnd(). 
-    EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
-      for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
-         aeItr != aeEnd; ++aeItr) {
-        if ((getEdgeNode1(*aeItr) == n2Itr) ||
-            (getEdgeNode2(*aeItr) == n2Itr)) {
-          return *aeItr;
-        }
-      }
-      return edges.end();
-    }
-
-    /// \brief Remove a node from the graph.
-    /// @param nItr Node iterator.
-    void removeNode(NodeItr nItr) {
-      NodeEntry &n = getNode(nItr);
-      for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
-        EdgeItr eItr = *itr;
-        ++itr;
-        removeEdge(eItr); 
-      }
-      nodes.erase(nItr);
-      --numNodes;
-    }
-
-    /// \brief Remove an edge from the graph.
-    /// @param eItr Edge iterator.
-    void removeEdge(EdgeItr eItr) {
-      EdgeEntry &e = getEdge(eItr);
-      NodeEntry &n1 = getNode(e.getNode1());
-      NodeEntry &n2 = getNode(e.getNode2());
-      n1.removeEdge(e.getNode1AEItr());
-      n2.removeEdge(e.getNode2AEItr());
-      edges.erase(eItr);
-      --numEdges;
-    }
-
-    /// \brief Remove all nodes and edges from the graph.
-    void clear() {
-      nodes.clear();
-      edges.clear();
-      numNodes = numEdges = 0;
-    }
-
-    /// \brief Print a representation of this graph in DOT format.
-    /// @param os Output stream to print on.
-    template <typename OStream>
-    void printDot(OStream &os) {
-    
-      os << "graph {\n";
-
-      for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
-           nodeItr != nodeEnd; ++nodeItr) {
-
-        os << "  node" << nodeItr << " [ label=\""
-           << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
-      }
-
-      os << "  edge [ len=" << getNumNodes() << " ]\n";
-
-      for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
-           edgeItr != edgeEnd; ++edgeItr) {
-
-        os << "  node" << getEdgeNode1(edgeItr)
-           << " -- node" << getEdgeNode2(edgeItr)
-           << " [ label=\"";
-
-        const Matrix &edgeCosts = getEdgeCosts(edgeItr);
-
-        for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
-          os << edgeCosts.getRowAsVector(i) << "\\n";
-        }
-        os << "\" ]\n";
-      }
-      os << "}\n";
-    }
-
-  };
-
-  class NodeItrComparator {
-  public:
-    bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
-      return &*n1 < &*n2;
-    }
-
-    bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
-      return &*n1 < &*n2;
-    }
-  };
-
-  class EdgeItrCompartor {
-  public:
-    bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
-      return &*e1 < &*e2;
-    }
-
-    bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
-      return &*e1 < &*e2;
-    }
-  };
-
-  void Graph::copyFrom(const Graph &other) {
-    std::map<Graph::ConstNodeItr, Graph::NodeItr,
-             NodeItrComparator> nodeMap;
-
-     for (Graph::ConstNodeItr nItr = other.nodesBegin(),
-                             nEnd = other.nodesEnd();
-         nItr != nEnd; ++nItr) {
-      nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
-    }
-      
-  }
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
deleted file mode 100644
index 791c227f0d07..000000000000
--- a/lib/CodeGen/PBQP/HeuristicBase.h
+++ /dev/null
@@ -1,246 +0,0 @@
-//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-
-#include "HeuristicSolver.h"
-
-namespace PBQP {
-
-  /// \brief Abstract base class for heuristic implementations.
-  ///
-  /// This class provides a handy base for heuristic implementations with common
-  /// solver behaviour implemented for a number of methods.
-  ///
-  /// To implement your own heuristic using this class as a base you'll have to
-  /// implement, as a minimum, the following methods:
-  /// <ul>
-  ///   <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
-  ///        heuristic reduction list.
-  ///   <li> void heuristicReduce() : Perform a single heuristic reduction.
-  ///   <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
-  ///        change to the cost matrix on the given edge (by R2).
-  ///   <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new 
-  ///        costs on the given edge.
-  ///   <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
-  ///        edge into the PBQP graph (by R2).
-  ///   <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
-  ///        disconnection of the given edge from the given node.
-  ///   <li> A constructor for your derived class : to pass back a reference to
-  ///        the solver which is using this heuristic.
-  /// </ul>
-  ///
-  /// These methods are implemented in this class for documentation purposes,
-  /// but will assert if called.
-  /// 
-  /// Note that this class uses the curiously recursive template idiom to
-  /// forward calls to the derived class. These methods need not be made
-  /// virtual, and indeed probably shouldn't for performance reasons.
-  ///
-  /// You'll also need to provide NodeData and EdgeData structs in your class.
-  /// These can be used to attach data relevant to your heuristic to each
-  /// node/edge in the PBQP graph.
-
-  template <typename HImpl>
-  class HeuristicBase {
-  private:
-
-    typedef std::list<Graph::NodeItr> OptimalList;
-
-    HeuristicSolverImpl<HImpl> &s;
-    Graph &g;
-    OptimalList optimalList;
-
-    // Return a reference to the derived heuristic.
-    HImpl& impl() { return static_cast<HImpl&>(*this); }
-
-    // Add the given node to the optimal reductions list. Keep an iterator to
-    // its location for fast removal. 
-    void addToOptimalReductionList(Graph::NodeItr nItr) {
-      optimalList.insert(optimalList.end(), nItr);
-    }
-
-  public:
-
-    /// \brief Construct an instance with a reference to the given solver.
-    /// @param solver The solver which is using this heuristic instance.
-    HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
-      : s(solver), g(s.getGraph()) { }
-
-    /// \brief Get the solver which is using this heuristic instance.
-    /// @return The solver which is using this heuristic instance.
-    ///
-    /// You can use this method to get access to the solver in your derived
-    /// heuristic implementation.
-    HeuristicSolverImpl<HImpl>& getSolver() { return s; }
-
-    /// \brief Get the graph representing the problem to be solved.
-    /// @return The graph representing the problem to be solved.
-    Graph& getGraph() { return g; }
-
-    /// \brief Tell the solver to simplify the graph before the reduction phase.
-    /// @return Whether or not the solver should run a simplification phase
-    ///         prior to the main setup and reduction.
-    ///
-    /// HeuristicBase returns true from this method as it's a sensible default,
-    /// however you can over-ride it in your derived class if you want different
-    /// behaviour.
-    bool solverRunSimplify() const { return true; }
-
-    /// \brief Decide whether a node should be optimally or heuristically 
-    ///        reduced.
-    /// @return Whether or not the given node should be listed for optimal
-    ///         reduction (via R0, R1 or R2).
-    ///
-    /// HeuristicBase returns true for any node with degree less than 3. This is
-    /// sane and sensible for many situations, but not all. You can over-ride
-    /// this method in your derived class if you want a different selection
-    /// criteria. Note however that your criteria for selecting optimal nodes
-    /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
-    /// higher should not be selected under any circumstances.
-    bool shouldOptimallyReduce(Graph::NodeItr nItr) {
-      if (g.getNodeDegree(nItr) < 3)
-        return true;
-      // else
-      return false;
-    }
-
-    /// \brief Add the given node to the list of nodes to be optimally reduced.
-    /// @return nItr Node iterator to be added.
-    ///
-    /// You probably don't want to over-ride this, except perhaps to record
-    /// statistics before calling this implementation. HeuristicBase relies on
-    /// its behaviour.
-    void addToOptimalReduceList(Graph::NodeItr nItr) {
-      optimalList.push_back(nItr);
-    }
-
-    /// \brief Initialise the heuristic.
-    ///
-    /// HeuristicBase iterates over all nodes in the problem and adds them to
-    /// the appropriate list using addToOptimalReduceList or
-    /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
-    ///
-    /// This behaviour should be fine for most situations.
-    void setup() {
-      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
-           nItr != nEnd; ++nItr) {
-        if (impl().shouldOptimallyReduce(nItr)) {
-          addToOptimalReduceList(nItr);
-        } else {
-          impl().addToHeuristicReduceList(nItr);
-        }
-      }
-    }
-
-    /// \brief Optimally reduce one of the nodes in the optimal reduce list.
-    /// @return True if a reduction takes place, false if the optimal reduce
-    ///         list is empty.
-    ///
-    /// Selects a node from the optimal reduce list and removes it, applying
-    /// R0, R1 or R2 as appropriate based on the selected node's degree.
-    bool optimalReduce() {
-      if (optimalList.empty())
-        return false;
-
-      Graph::NodeItr nItr = optimalList.front();
-      optimalList.pop_front();
-
-      switch (s.getSolverDegree(nItr)) {
-        case 0: s.applyR0(nItr); break;
-        case 1: s.applyR1(nItr); break;
-        case 2: s.applyR2(nItr); break;
-        default: assert(false &&
-                        "Optimal reductions of degree > 2 nodes is invalid.");
-      }
-
-      return true;
-    }
-
-    /// \brief Perform the PBQP reduction process.
-    ///
-    /// Reduces the problem to the empty graph by repeated application of the
-    /// reduction rules R0, R1, R2 and RN.
-    /// R0, R1 or R2 are always applied if possible before RN is used.
-    void reduce() {
-      bool finished = false;
-
-      while (!finished) {
-        if (!optimalReduce()) {
-          if (impl().heuristicReduce()) {
-            getSolver().recordRN();
-          } else {
-            finished = true;
-          }
-        }
-      }
-    }
-
-    /// \brief Add a node to the heuristic reduce list.
-    /// @param nItr Node iterator to add to the heuristic reduce list.
-    void addToHeuristicList(Graph::NodeItr nItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Heuristically reduce one of the nodes in the heuristic
-    ///        reduce list.
-    /// @return True if a reduction takes place, false if the heuristic reduce
-    ///         list is empty.
-    void heuristicReduce() {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Prepare a change in the costs on the given edge.
-    /// @param eItr Edge iterator.    
-    void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Handle the change in the costs on the given edge.
-    /// @param eItr Edge iterator.
-    void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Handle the addition of a new edge into the PBQP graph.
-    /// @param eItr Edge iterator for the added edge.
-    void handleAddEdge(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Handle disconnection of an edge from a node.
-    /// @param eItr Edge iterator for edge being disconnected.
-    /// @param nItr Node iterator for the node being disconnected from.
-    ///
-    /// Edges are frequently removed due to the removal of a node. This
-    /// method allows for the effect to be computed only for the remaining
-    /// node in the graph.
-    void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Clean up any structures used by HeuristicBase.
-    ///
-    /// At present this just performs a sanity check: that the optimal reduce
-    /// list is empty now that reduction has completed.
-    ///
-    /// If your derived class has more complex structures which need tearing
-    /// down you should over-ride this method but include a call back to this
-    /// implementation.
-    void cleanup() {
-      assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
-    }
-
-  };
-
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
deleted file mode 100644
index 35514f967478..000000000000
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ /dev/null
@@ -1,616 +0,0 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Heuristic PBQP solver. This solver is able to perform optimal reductions for
-// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
-// used to select a node for reduction. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-
-#include "Graph.h"
-#include "Solution.h"
-#include <vector>
-#include <limits>
-
-namespace PBQP {
-
-  /// \brief Heuristic PBQP solver implementation.
-  ///
-  /// This class should usually be created (and destroyed) indirectly via a call
-  /// to HeuristicSolver<HImpl>::solve(Graph&).
-  /// See the comments for HeuristicSolver.
-  ///
-  /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
-  /// backpropagation phase, and maintains the internal copy of the graph on
-  /// which the reduction is carried out (the original being kept to facilitate
-  /// backpropagation).
-  template <typename HImpl>
-  class HeuristicSolverImpl {
-  private:
-
-    typedef typename HImpl::NodeData HeuristicNodeData;
-    typedef typename HImpl::EdgeData HeuristicEdgeData;
-
-    typedef std::list<Graph::EdgeItr> SolverEdges;
-
-  public:
-  
-    /// \brief Iterator type for edges in the solver graph.
-    typedef SolverEdges::iterator SolverEdgeItr;
-
-  private:
-
-    class NodeData {
-    public:
-      NodeData() : solverDegree(0) {}
-
-      HeuristicNodeData& getHeuristicData() { return hData; }
-
-      SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
-        ++solverDegree;
-        return solverEdges.insert(solverEdges.end(), eItr);
-      }
-
-      void removeSolverEdge(SolverEdgeItr seItr) {
-        --solverDegree;
-        solverEdges.erase(seItr);
-      }
-
-      SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
-      SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
-      unsigned getSolverDegree() const { return solverDegree; }
-      void clearSolverEdges() {
-        solverDegree = 0;
-        solverEdges.clear(); 
-      }
-      
-    private:
-      HeuristicNodeData hData;
-      unsigned solverDegree;
-      SolverEdges solverEdges;
-    };
- 
-    class EdgeData {
-    public:
-      HeuristicEdgeData& getHeuristicData() { return hData; }
-
-      void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
-        this->n1SolverEdgeItr = n1SolverEdgeItr;
-      }
-
-      SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
-
-      void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
-        this->n2SolverEdgeItr = n2SolverEdgeItr;
-      }
-
-      SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
-
-    private:
-
-      HeuristicEdgeData hData;
-      SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
-    };
-
-    Graph &g;
-    HImpl h;
-    Solution s;
-    std::vector<Graph::NodeItr> stack;
-
-    typedef std::list<NodeData> NodeDataList;
-    NodeDataList nodeDataList;
-
-    typedef std::list<EdgeData> EdgeDataList;
-    EdgeDataList edgeDataList;
-
-  public:
-
-    /// \brief Construct a heuristic solver implementation to solve the given
-    ///        graph.
-    /// @param g The graph representing the problem instance to be solved.
-    HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}  
-
-    /// \brief Get the graph being solved by this solver.
-    /// @return The graph representing the problem instance being solved by this
-    ///         solver.
-    Graph& getGraph() { return g; }
-
-    /// \brief Get the heuristic data attached to the given node.
-    /// @param nItr Node iterator.
-    /// @return The heuristic data attached to the given node.
-    HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
-      return getSolverNodeData(nItr).getHeuristicData();
-    }
-
-    /// \brief Get the heuristic data attached to the given edge.
-    /// @param eItr Edge iterator.
-    /// @return The heuristic data attached to the given node.
-    HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
-      return getSolverEdgeData(eItr).getHeuristicData();
-    }
-
-    /// \brief Begin iterator for the set of edges adjacent to the given node in
-    ///        the solver graph.
-    /// @param nItr Node iterator.
-    /// @return Begin iterator for the set of edges adjacent to the given node
-    ///         in the solver graph. 
-    SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
-      return getSolverNodeData(nItr).solverEdgesBegin();
-    }
-
-    /// \brief End iterator for the set of edges adjacent to the given node in
-    ///        the solver graph.
-    /// @param nItr Node iterator.
-    /// @return End iterator for the set of edges adjacent to the given node in
-    ///         the solver graph. 
-    SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
-      return getSolverNodeData(nItr).solverEdgesEnd();
-    }
-
-    /// \brief Remove a node from the solver graph.
-    /// @param eItr Edge iterator for edge to be removed.
-    ///
-    /// Does <i>not</i> notify the heuristic of the removal. That should be
-    /// done manually if necessary.
-    void removeSolverEdge(Graph::EdgeItr eItr) {
-      EdgeData &eData = getSolverEdgeData(eItr);
-      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
-               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
-      n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
-      n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
-    }
-
-    /// \brief Compute a solution to the PBQP problem instance with which this
-    ///        heuristic solver was constructed.
-    /// @return A solution to the PBQP problem.
-    ///
-    /// Performs the full PBQP heuristic solver algorithm, including setup,
-    /// calls to the heuristic (which will call back to the reduction rules in
-    /// this class), and cleanup.
-    Solution computeSolution() {
-      setup();
-      h.setup();
-      h.reduce();
-      backpropagate();
-      h.cleanup();
-      cleanup();
-      return s;
-    }
-
-    /// \brief Add to the end of the stack.
-    /// @param nItr Node iterator to add to the reduction stack.
-    void pushToStack(Graph::NodeItr nItr) {
-      getSolverNodeData(nItr).clearSolverEdges();
-      stack.push_back(nItr);
-    }
-
-    /// \brief Returns the solver degree of the given node.
-    /// @param nItr Node iterator for which degree is requested.
-    /// @return Node degree in the <i>solver</i> graph (not the original graph).
-    unsigned getSolverDegree(Graph::NodeItr nItr) {
-      return  getSolverNodeData(nItr).getSolverDegree();
-    }
-
-    /// \brief Set the solution of the given node.
-    /// @param nItr Node iterator to set solution for.
-    /// @param selection Selection for node.
-    void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
-      s.setSelection(nItr, selection);
-
-      for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
-                             aeEnd = g.adjEdgesEnd(nItr);
-           aeItr != aeEnd; ++aeItr) {
-        Graph::EdgeItr eItr(*aeItr);
-        Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
-        getSolverNodeData(anItr).addSolverEdge(eItr);
-      }
-    }
-
-    /// \brief Apply rule R0.
-    /// @param nItr Node iterator for node to apply R0 to.
-    ///
-    /// Node will be automatically pushed to the solver stack.
-    void applyR0(Graph::NodeItr nItr) {
-      assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
-             "R0 applied to node with degree != 0.");
-
-      // Nothing to do. Just push the node onto the reduction stack.
-      pushToStack(nItr);
-
-      s.recordR0();
-    }
-
-    /// \brief Apply rule R1.
-    /// @param xnItr Node iterator for node to apply R1 to.
-    ///
-    /// Node will be automatically pushed to the solver stack.
-    void applyR1(Graph::NodeItr xnItr) {
-      NodeData &nd = getSolverNodeData(xnItr);
-      assert(nd.getSolverDegree() == 1 &&
-             "R1 applied to node with degree != 1.");
-
-      Graph::EdgeItr eItr = *nd.solverEdgesBegin();
-
-      const Matrix &eCosts = g.getEdgeCosts(eItr);
-      const Vector &xCosts = g.getNodeCosts(xnItr);
-      
-      // Duplicate a little to avoid transposing matrices.
-      if (xnItr == g.getEdgeNode1(eItr)) {
-        Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
-        Vector &yCosts = g.getNodeCosts(ynItr);
-        for (unsigned j = 0; j < yCosts.getLength(); ++j) {
-          PBQPNum min = eCosts[0][j] + xCosts[0];
-          for (unsigned i = 1; i < xCosts.getLength(); ++i) {
-            PBQPNum c = eCosts[i][j] + xCosts[i];
-            if (c < min)
-              min = c;
-          }
-          yCosts[j] += min;
-        }
-        h.handleRemoveEdge(eItr, ynItr);
-     } else {
-        Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
-        Vector &yCosts = g.getNodeCosts(ynItr);
-        for (unsigned i = 0; i < yCosts.getLength(); ++i) {
-          PBQPNum min = eCosts[i][0] + xCosts[0];
-          for (unsigned j = 1; j < xCosts.getLength(); ++j) {
-            PBQPNum c = eCosts[i][j] + xCosts[j];
-            if (c < min)
-              min = c;
-          }
-          yCosts[i] += min;
-        }
-        h.handleRemoveEdge(eItr, ynItr);
-      }
-      removeSolverEdge(eItr);
-      assert(nd.getSolverDegree() == 0 &&
-             "Degree 1 with edge removed should be 0.");
-      pushToStack(xnItr);
-      s.recordR1();
-    }
-
-    /// \brief Apply rule R2.
-    /// @param xnItr Node iterator for node to apply R2 to.
-    ///
-    /// Node will be automatically pushed to the solver stack.
-    void applyR2(Graph::NodeItr xnItr) {
-      assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
-             "R2 applied to node with degree != 2.");
-
-      NodeData &nd = getSolverNodeData(xnItr);
-      const Vector &xCosts = g.getNodeCosts(xnItr);
-
-      SolverEdgeItr aeItr = nd.solverEdgesBegin();
-      Graph::EdgeItr yxeItr = *aeItr,
-                     zxeItr = *(++aeItr);
-
-      Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
-                     znItr = g.getEdgeOtherNode(zxeItr, xnItr);
-
-      bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
-           flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
-
-      const Matrix *yxeCosts = flipEdge1 ?
-        new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
-        &g.getEdgeCosts(yxeItr);
-
-      const Matrix *zxeCosts = flipEdge2 ?
-        new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
-        &g.getEdgeCosts(zxeItr);
-
-      unsigned xLen = xCosts.getLength(),
-               yLen = yxeCosts->getRows(),
-               zLen = zxeCosts->getRows();
-               
-      Matrix delta(yLen, zLen);
-
-      for (unsigned i = 0; i < yLen; ++i) {
-        for (unsigned j = 0; j < zLen; ++j) {
-          PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
-          for (unsigned k = 1; k < xLen; ++k) {
-            PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
-            if (c < min) {
-              min = c;
-            }
-          }
-          delta[i][j] = min;
-        }
-      }
-
-      if (flipEdge1)
-        delete yxeCosts;
-
-      if (flipEdge2)
-        delete zxeCosts;
-
-      Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
-      bool addedEdge = false;
-
-      if (yzeItr == g.edgesEnd()) {
-        yzeItr = g.addEdge(ynItr, znItr, delta);
-        addedEdge = true;
-      } else {
-        Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
-        h.preUpdateEdgeCosts(yzeItr);
-        if (ynItr == g.getEdgeNode1(yzeItr)) {
-          yzeCosts += delta;
-        } else {
-          yzeCosts += delta.transpose();
-        }
-      }
-
-      bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
-
-      if (!addedEdge) {
-        // If we modified the edge costs let the heuristic know.
-        h.postUpdateEdgeCosts(yzeItr);
-      }
- 
-      if (nullCostEdge) {
-        // If this edge ended up null remove it.
-        if (!addedEdge) {
-          // We didn't just add it, so we need to notify the heuristic
-          // and remove it from the solver.
-          h.handleRemoveEdge(yzeItr, ynItr);
-          h.handleRemoveEdge(yzeItr, znItr);
-          removeSolverEdge(yzeItr);
-        }
-        g.removeEdge(yzeItr);
-      } else if (addedEdge) {
-        // If the edge was added, and non-null, finish setting it up, add it to
-        // the solver & notify heuristic.
-        edgeDataList.push_back(EdgeData());
-        g.setEdgeData(yzeItr, &edgeDataList.back());
-        addSolverEdge(yzeItr);
-        h.handleAddEdge(yzeItr);
-      }
-
-      h.handleRemoveEdge(yxeItr, ynItr);
-      removeSolverEdge(yxeItr);
-      h.handleRemoveEdge(zxeItr, znItr);
-      removeSolverEdge(zxeItr);
-
-      pushToStack(xnItr);
-      s.recordR2();
-    }
-
-    /// \brief Record an application of the RN rule.
-    ///
-    /// For use by the HeuristicBase.
-    void recordRN() { s.recordRN(); } 
-
-  private:
-
-    NodeData& getSolverNodeData(Graph::NodeItr nItr) {
-      return *static_cast<NodeData*>(g.getNodeData(nItr));
-    }
-
-    EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
-      return *static_cast<EdgeData*>(g.getEdgeData(eItr));
-    }
-
-    void addSolverEdge(Graph::EdgeItr eItr) {
-      EdgeData &eData = getSolverEdgeData(eItr);
-      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
-               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
-      eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
-      eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
-    }
-
-    void setup() {
-      if (h.solverRunSimplify()) {
-        simplify();
-      }
-
-      // Create node data objects.
-      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
-           nItr != nEnd; ++nItr) {
-        nodeDataList.push_back(NodeData());
-        g.setNodeData(nItr, &nodeDataList.back());
-      }
-
-      // Create edge data objects.
-      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
-           eItr != eEnd; ++eItr) {
-        edgeDataList.push_back(EdgeData());
-        g.setEdgeData(eItr, &edgeDataList.back());
-        addSolverEdge(eItr);
-      }
-    }
-
-    void simplify() {
-      disconnectTrivialNodes();
-      eliminateIndependentEdges();
-    }
-
-    // Eliminate trivial nodes.
-    void disconnectTrivialNodes() {
-      unsigned numDisconnected = 0;
-
-      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
-           nItr != nEnd; ++nItr) {
-
-        if (g.getNodeCosts(nItr).getLength() == 1) {
-
-          std::vector<Graph::EdgeItr> edgesToRemove;
-
-          for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
-                                 aeEnd = g.adjEdgesEnd(nItr);
-               aeItr != aeEnd; ++aeItr) {
-
-            Graph::EdgeItr eItr = *aeItr;
-
-            if (g.getEdgeNode1(eItr) == nItr) {
-              Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
-              g.getNodeCosts(otherNodeItr) +=
-                g.getEdgeCosts(eItr).getRowAsVector(0);
-            }
-            else {
-              Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
-              g.getNodeCosts(otherNodeItr) +=
-                g.getEdgeCosts(eItr).getColAsVector(0);
-            }
-
-            edgesToRemove.push_back(eItr);
-          }
-
-          if (!edgesToRemove.empty())
-            ++numDisconnected;
-
-          while (!edgesToRemove.empty()) {
-            g.removeEdge(edgesToRemove.back());
-            edgesToRemove.pop_back();
-          }
-        }
-      }
-    }
-
-    void eliminateIndependentEdges() {
-      std::vector<Graph::EdgeItr> edgesToProcess;
-      unsigned numEliminated = 0;
-
-      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
-           eItr != eEnd; ++eItr) {
-        edgesToProcess.push_back(eItr);
-      }
-
-      while (!edgesToProcess.empty()) {
-        if (tryToEliminateEdge(edgesToProcess.back()))
-          ++numEliminated;
-        edgesToProcess.pop_back();
-      }
-    }
-
-    bool tryToEliminateEdge(Graph::EdgeItr eItr) {
-      if (tryNormaliseEdgeMatrix(eItr)) {
-        g.removeEdge(eItr);
-        return true; 
-      }
-      return false;
-    }
-
-    bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
-
-      const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
-
-      Matrix &edgeCosts = g.getEdgeCosts(eItr);
-      Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
-             &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
-
-      for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
-        PBQPNum rowMin = infinity;
-
-        for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
-          if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
-            rowMin = edgeCosts[r][c];
-        }
-
-        uCosts[r] += rowMin;
-
-        if (rowMin != infinity) {
-          edgeCosts.subFromRow(r, rowMin);
-        }
-        else {
-          edgeCosts.setRow(r, 0);
-        }
-      }
-
-      for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
-        PBQPNum colMin = infinity;
-
-        for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
-          if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
-            colMin = edgeCosts[r][c];
-        }
-
-        vCosts[c] += colMin;
-
-        if (colMin != infinity) {
-          edgeCosts.subFromCol(c, colMin);
-        }
-        else {
-          edgeCosts.setCol(c, 0);
-        }
-      }
-
-      return edgeCosts.isZero();
-    }
-
-    void backpropagate() {
-      while (!stack.empty()) {
-        computeSolution(stack.back());
-        stack.pop_back();
-      }
-    }
-
-    void computeSolution(Graph::NodeItr nItr) {
-
-      NodeData &nodeData = getSolverNodeData(nItr);
-
-      Vector v(g.getNodeCosts(nItr));
-
-      // Solve based on existing solved edges.
-      for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
-                         solvedEdgeEnd = nodeData.solverEdgesEnd();
-           solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
-
-        Graph::EdgeItr eItr(*solvedEdgeItr);
-        Matrix &edgeCosts = g.getEdgeCosts(eItr);
-
-        if (nItr == g.getEdgeNode1(eItr)) {
-          Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
-          unsigned adjSolution = s.getSelection(adjNode);
-          v += edgeCosts.getColAsVector(adjSolution);
-        }
-        else {
-          Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
-          unsigned adjSolution = s.getSelection(adjNode);
-          v += edgeCosts.getRowAsVector(adjSolution);
-        }
-
-      }
-
-      setSolution(nItr, v.minIndex());
-    }
-
-    void cleanup() {
-      h.cleanup();
-      nodeDataList.clear();
-      edgeDataList.clear();
-    }
-  };
-
-  /// \brief PBQP heuristic solver class.
-  ///
-  /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
-  /// by calling
-  /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
-  ///
-  /// The choice of heuristic for the H parameter will affect both the solver
-  /// speed and solution quality. The heuristic should be chosen based on the
-  /// nature of the problem being solved.
-  /// Currently the only solver included with LLVM is the Briggs heuristic for
-  /// register allocation.
-  template <typename HImpl>
-  class HeuristicSolver {
-  public:
-    static Solution solve(Graph &g) {
-      HeuristicSolverImpl<HImpl> hs(g);
-      return hs.computeSolution();
-    }
-  };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
deleted file mode 100644
index 18eaf7c0da9b..000000000000
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ /dev/null
@@ -1,460 +0,0 @@
-//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class implements the Briggs test for "allocability" of nodes in a
-// PBQP graph representing a register allocation problem. Nodes which can be
-// proven allocable (by a safe and relatively accurate test) are removed from
-// the PBQP graph first. If no provably allocable node is present in the graph
-// then the node with the minimal spill-cost to degree ratio is removed.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-
-#include "../HeuristicSolver.h"
-#include "../HeuristicBase.h"
-
-#include <set>
-#include <limits>
-
-namespace PBQP {
-  namespace Heuristics {
-
-    /// \brief PBQP Heuristic which applies an allocability test based on
-    ///        Briggs.
-    /// 
-    /// This heuristic assumes that the elements of cost vectors in the PBQP
-    /// problem represent storage options, with the first being the spill
-    /// option and subsequent elements representing legal registers for the
-    /// corresponding node. Edge cost matrices are likewise assumed to represent
-    /// register constraints.
-    /// If one or more nodes can be proven allocable by this heuristic (by
-    /// inspection of their constraint matrices) then the allocable node of
-    /// highest degree is selected for the next reduction and pushed to the
-    /// solver stack. If no nodes can be proven allocable then the node with
-    /// the lowest estimated spill cost is selected and push to the solver stack
-    /// instead.
-    /// 
-    /// This implementation is built on top of HeuristicBase.       
-    class Briggs : public HeuristicBase<Briggs> {
-    private:
-
-      class LinkDegreeComparator {
-      public:
-        LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
-        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
-          if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
-            return true;
-          return false;
-        }
-      private:
-        HeuristicSolverImpl<Briggs> *s;
-      };
-
-      class SpillCostComparator {
-      public:
-        SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
-          : s(&s), g(&s.getGraph()) {}
-        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
-          PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr),
-                  cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
-          if (cost1 < cost2)
-            return true;
-          return false;
-        }
-
-      private:
-        HeuristicSolverImpl<Briggs> *s;
-        Graph *g;
-      };
-
-      typedef std::list<Graph::NodeItr> RNAllocableList;
-      typedef RNAllocableList::iterator RNAllocableListItr;
-
-      typedef std::list<Graph::NodeItr> RNUnallocableList;  
-      typedef RNUnallocableList::iterator RNUnallocableListItr;
-
-    public:
-
-      struct NodeData {
-        typedef std::vector<unsigned> UnsafeDegreesArray;
-        bool isHeuristic, isAllocable, isInitialized;
-        unsigned numDenied, numSafe;
-        UnsafeDegreesArray unsafeDegrees;
-        RNAllocableListItr rnaItr;
-        RNUnallocableListItr rnuItr;
-
-        NodeData()
-          : isHeuristic(false), isAllocable(false), isInitialized(false),
-            numDenied(0), numSafe(0) { }
-      };
-
-      struct EdgeData {
-        typedef std::vector<unsigned> UnsafeArray;
-        unsigned worst, reverseWorst;
-        UnsafeArray unsafe, reverseUnsafe;
-        bool isUpToDate;
-
-        EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
-      };
-
-      /// \brief Construct an instance of the Briggs heuristic.
-      /// @param solver A reference to the solver which is using this heuristic.
-      Briggs(HeuristicSolverImpl<Briggs> &solver) :
-        HeuristicBase<Briggs>(solver) {}
-
-      /// \brief Determine whether a node should be reduced using optimal
-      ///        reduction.
-      /// @param nItr Node iterator to be considered.
-      /// @return True if the given node should be optimally reduced, false
-      ///         otherwise.
-      ///
-      /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
-      /// exception. Nodes whose spill cost (element 0 of their cost vector) is
-      /// infinite are checked for allocability first. Allocable nodes may be
-      /// optimally reduced, but nodes whose allocability cannot be proven are
-      /// selected for heuristic reduction instead.
-      bool shouldOptimallyReduce(Graph::NodeItr nItr) {
-        if (getSolver().getSolverDegree(nItr) < 3) {
-          return true;
-        }
-        // else
-        return false;
-      }
-
-      /// \brief Add a node to the heuristic reduce list.
-      /// @param nItr Node iterator to add to the heuristic reduce list.
-      void addToHeuristicReduceList(Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-        initializeNode(nItr);
-        nd.isHeuristic = true;
-        if (nd.isAllocable) {
-          nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
-        } else {
-          nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
-        }
-      }
-
-      /// \brief Heuristically reduce one of the nodes in the heuristic
-      ///        reduce list.
-      /// @return True if a reduction takes place, false if the heuristic reduce
-      ///         list is empty.
-      ///
-      /// If the list of allocable nodes is non-empty a node is selected
-      /// from it and pushed to the stack. Otherwise if the non-allocable list
-      /// is non-empty a node is selected from it and pushed to the stack.
-      /// If both lists are empty the method simply returns false with no action
-      /// taken.
-      bool heuristicReduce() {
-        if (!rnAllocableList.empty()) {
-          RNAllocableListItr rnaItr =
-            min_element(rnAllocableList.begin(), rnAllocableList.end(),
-                        LinkDegreeComparator(getSolver()));
-          Graph::NodeItr nItr = *rnaItr;
-          rnAllocableList.erase(rnaItr);
-          handleRemoveNode(nItr);
-          getSolver().pushToStack(nItr);
-          return true;
-        } else if (!rnUnallocableList.empty()) {
-          RNUnallocableListItr rnuItr =
-            min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
-                        SpillCostComparator(getSolver()));
-          Graph::NodeItr nItr = *rnuItr;
-          rnUnallocableList.erase(rnuItr);
-          handleRemoveNode(nItr);
-          getSolver().pushToStack(nItr);
-          return true;
-        }
-        // else
-        return false;
-      }
-
-      /// \brief Prepare a change in the costs on the given edge.
-      /// @param eItr Edge iterator.    
-      void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
-        Graph &g = getGraph();
-        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
-                       n2Itr = g.getEdgeNode2(eItr);
-        NodeData &n1 = getHeuristicNodeData(n1Itr),
-                 &n2 = getHeuristicNodeData(n2Itr);
-
-        if (n1.isHeuristic)
-          subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
-        if (n2.isHeuristic)
-          subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
-
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-        ed.isUpToDate = false;
-      }
-
-      /// \brief Handle the change in the costs on the given edge.
-      /// @param eItr Edge iterator.
-      void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
-        // This is effectively the same as adding a new edge now, since
-        // we've factored out the costs of the old one.
-        handleAddEdge(eItr);
-      }
-
-      /// \brief Handle the addition of a new edge into the PBQP graph.
-      /// @param eItr Edge iterator for the added edge.
-      ///
-      /// Updates allocability of any nodes connected by this edge which are
-      /// being managed by the heuristic. If allocability changes they are
-      /// moved to the appropriate list.
-      void handleAddEdge(Graph::EdgeItr eItr) {
-        Graph &g = getGraph();
-        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
-                       n2Itr = g.getEdgeNode2(eItr);
-        NodeData &n1 = getHeuristicNodeData(n1Itr),
-                 &n2 = getHeuristicNodeData(n2Itr);
-
-        // If neither node is managed by the heuristic there's nothing to be
-        // done.
-        if (!n1.isHeuristic && !n2.isHeuristic)
-          return;
-
-        // Ok - we need to update at least one node.
-        computeEdgeContributions(eItr);
-
-        // Update node 1 if it's managed by the heuristic.
-        if (n1.isHeuristic) {
-          bool n1WasAllocable = n1.isAllocable;
-          addEdgeContributions(eItr, n1Itr);
-          updateAllocability(n1Itr);
-          if (n1WasAllocable && !n1.isAllocable) {
-            rnAllocableList.erase(n1.rnaItr);
-            n1.rnuItr =
-              rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
-          }
-        }
-
-        // Likewise for node 2.
-        if (n2.isHeuristic) {
-          bool n2WasAllocable = n2.isAllocable;
-          addEdgeContributions(eItr, n2Itr);
-          updateAllocability(n2Itr);
-          if (n2WasAllocable && !n2.isAllocable) {
-            rnAllocableList.erase(n2.rnaItr);
-            n2.rnuItr =
-              rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
-          }
-        }
-      }
-
-      /// \brief Handle disconnection of an edge from a node.
-      /// @param eItr Edge iterator for edge being disconnected.
-      /// @param nItr Node iterator for the node being disconnected from.
-      ///
-      /// Updates allocability of the given node and, if appropriate, moves the
-      /// node to a new list.
-      void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-
-        // If the node is not managed by the heuristic there's nothing to be
-        // done.
-        if (!nd.isHeuristic)
-          return;
-
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-        (void)ed;
-        assert(ed.isUpToDate && "Edge data is not up to date.");
-
-        // Update node.
-        bool ndWasAllocable = nd.isAllocable;
-        subtractEdgeContributions(eItr, nItr);
-        updateAllocability(nItr);
-
-        // If the node has gone optimal...
-        if (shouldOptimallyReduce(nItr)) {
-          nd.isHeuristic = false;
-          addToOptimalReduceList(nItr);
-          if (ndWasAllocable) {
-            rnAllocableList.erase(nd.rnaItr);
-          } else {
-            rnUnallocableList.erase(nd.rnuItr);
-          }
-        } else {
-          // Node didn't go optimal, but we might have to move it
-          // from "unallocable" to "allocable".
-          if (!ndWasAllocable && nd.isAllocable) {
-            rnUnallocableList.erase(nd.rnuItr);
-            nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
-          }
-        }
-      }
-
-    private:
-
-      NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
-        return getSolver().getHeuristicNodeData(nItr);
-      }
-
-      EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
-        return getSolver().getHeuristicEdgeData(eItr);
-      }
-
-      // Work out what this edge will contribute to the allocability of the
-      // nodes connected to it.
-      void computeEdgeContributions(Graph::EdgeItr eItr) {
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-
-        if (ed.isUpToDate)
-          return; // Edge data is already up to date.
-
-        Matrix &eCosts = getGraph().getEdgeCosts(eItr);
-
-        unsigned numRegs = eCosts.getRows() - 1,
-                 numReverseRegs = eCosts.getCols() - 1;
-
-        std::vector<unsigned> rowInfCounts(numRegs, 0),
-                              colInfCounts(numReverseRegs, 0);        
-
-        ed.worst = 0;
-        ed.reverseWorst = 0;
-        ed.unsafe.clear();
-        ed.unsafe.resize(numRegs, 0);
-        ed.reverseUnsafe.clear();
-        ed.reverseUnsafe.resize(numReverseRegs, 0);
-
-        for (unsigned i = 0; i < numRegs; ++i) {
-          for (unsigned j = 0; j < numReverseRegs; ++j) {
-            if (eCosts[i + 1][j + 1] ==
-                  std::numeric_limits<PBQPNum>::infinity()) {
-              ed.unsafe[i] = 1;
-              ed.reverseUnsafe[j] = 1;
-              ++rowInfCounts[i];
-              ++colInfCounts[j];
-
-              if (colInfCounts[j] > ed.worst) {
-                ed.worst = colInfCounts[j];
-              }
-
-              if (rowInfCounts[i] > ed.reverseWorst) {
-                ed.reverseWorst = rowInfCounts[i];
-              }
-            }
-          }
-        }
-
-        ed.isUpToDate = true;
-      }
-
-      // Add the contributions of the given edge to the given node's 
-      // numDenied and safe members. No action is taken other than to update
-      // these member values. Once updated these numbers can be used by clients
-      // to update the node's allocability.
-      void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-
-        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
-        NodeData &nd = getHeuristicNodeData(nItr);
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-        
-        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
-        EdgeData::UnsafeArray &unsafe =
-          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
-        nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
-
-        for (unsigned r = 0; r < numRegs; ++r) {
-          if (unsafe[r]) {
-            if (nd.unsafeDegrees[r]==0) {
-              --nd.numSafe;
-            }
-            ++nd.unsafeDegrees[r];
-          }
-        }
-      }
-
-      // Subtract the contributions of the given edge to the given node's 
-      // numDenied and safe members. No action is taken other than to update
-      // these member values. Once updated these numbers can be used by clients
-      // to update the node's allocability.
-      void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-
-        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
-        NodeData &nd = getHeuristicNodeData(nItr);
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-        
-        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
-        EdgeData::UnsafeArray &unsafe =
-          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
-        nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
-
-        for (unsigned r = 0; r < numRegs; ++r) {
-          if (unsafe[r]) { 
-            if (nd.unsafeDegrees[r] == 1) {
-              ++nd.numSafe;
-            }
-            --nd.unsafeDegrees[r];
-          }
-        }
-      }
-
-      void updateAllocability(Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-        nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
-      }
-
-      void initializeNode(Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-
-        if (nd.isInitialized)
-          return; // Node data is already up to date.
-
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
-        nd.numDenied = 0;
-        nd.numSafe = numRegs;
-        nd.unsafeDegrees.resize(numRegs, 0);
-
-        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
-
-        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
-                           aeEnd = getSolver().solverEdgesEnd(nItr);
-             aeItr != aeEnd; ++aeItr) {
-          
-          Graph::EdgeItr eItr = *aeItr;
-          computeEdgeContributions(eItr);
-          addEdgeContributions(eItr, nItr);
-        }
-
-        updateAllocability(nItr);
-        nd.isInitialized = true;
-      }
-
-      void handleRemoveNode(Graph::NodeItr xnItr) {
-        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
-        std::vector<Graph::EdgeItr> edgesToRemove;
-        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
-                           aeEnd = getSolver().solverEdgesEnd(xnItr);
-             aeItr != aeEnd; ++aeItr) {
-          Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
-          handleRemoveEdge(*aeItr, ynItr);
-          edgesToRemove.push_back(*aeItr);
-        }
-        while (!edgesToRemove.empty()) {
-          getSolver().removeSolverEdge(edgesToRemove.back());
-          edgesToRemove.pop_back();
-        }
-      }
-
-      RNAllocableList rnAllocableList;
-      RNUnallocableList rnUnallocableList;
-    };
-
-  }
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/lib/CodeGen/PBQP/Math.h b/lib/CodeGen/PBQP/Math.h
deleted file mode 100644
index e7598bf3e3f1..000000000000
--- a/lib/CodeGen/PBQP/Math.h
+++ /dev/null
@@ -1,288 +0,0 @@
-//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_MATH_H 
-#define LLVM_CODEGEN_PBQP_MATH_H
-
-#include <cassert>
-#include <algorithm>
-#include <functional>
-
-namespace PBQP {
-
-typedef float PBQPNum;
-
-/// \brief PBQP Vector class.
-class Vector {
-  public:
-
-    /// \brief Construct a PBQP vector of the given size.
-    explicit Vector(unsigned length) :
-      length(length), data(new PBQPNum[length]) {
-      }
-
-    /// \brief Construct a PBQP vector with initializer.
-    Vector(unsigned length, PBQPNum initVal) :
-      length(length), data(new PBQPNum[length]) {
-        std::fill(data, data + length, initVal);
-      }
-
-    /// \brief Copy construct a PBQP vector.
-    Vector(const Vector &v) :
-      length(v.length), data(new PBQPNum[length]) {
-        std::copy(v.data, v.data + length, data);
-      }
-
-    /// \brief Destroy this vector, return its memory.
-    ~Vector() { delete[] data; }
-
-    /// \brief Assignment operator.
-    Vector& operator=(const Vector &v) {
-      delete[] data;
-      length = v.length;
-      data = new PBQPNum[length];
-      std::copy(v.data, v.data + length, data);
-      return *this;
-    }
-
-    /// \brief Return the length of the vector
-    unsigned getLength() const {
-      return length;
-    }
-
-    /// \brief Element access.
-    PBQPNum& operator[](unsigned index) {
-      assert(index < length && "Vector element access out of bounds.");
-      return data[index];
-    }
-
-    /// \brief Const element access.
-    const PBQPNum& operator[](unsigned index) const {
-      assert(index < length && "Vector element access out of bounds.");
-      return data[index];
-    }
-
-    /// \brief Add another vector to this one.
-    Vector& operator+=(const Vector &v) {
-      assert(length == v.length && "Vector length mismatch.");
-      std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); 
-      return *this;
-    }
-
-    /// \brief Subtract another vector from this one.
-    Vector& operator-=(const Vector &v) {
-      assert(length == v.length && "Vector length mismatch.");
-      std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); 
-      return *this;
-    }
-
-    /// \brief Returns the index of the minimum value in this vector
-    unsigned minIndex() const {
-      return std::min_element(data, data + length) - data;
-    }
-
-  private:
-    unsigned length;
-    PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given vector on the given
-///        output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Vector &v) {
-  assert((v.getLength() != 0) && "Zero-length vector badness.");
-
-  os << "[ " << v[0];
-  for (unsigned i = 1; i < v.getLength(); ++i) {
-    os << ", " << v[i];
-  }
-  os << " ]";
-
-  return os;
-} 
-
-
-/// \brief PBQP Matrix class
-class Matrix {
-  public:
-
-    /// \brief Construct a PBQP Matrix with the given dimensions.
-    Matrix(unsigned rows, unsigned cols) :
-      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
-    }
-
-    /// \brief Construct a PBQP Matrix with the given dimensions and initial
-    /// value.
-    Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
-      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
-        std::fill(data, data + (rows * cols), initVal);
-    }
-
-    /// \brief Copy construct a PBQP matrix.
-    Matrix(const Matrix &m) :
-      rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
-        std::copy(m.data, m.data + (rows * cols), data);  
-    }
-
-    /// \brief Destroy this matrix, return its memory.
-    ~Matrix() { delete[] data; }
-
-    /// \brief Assignment operator.
-    Matrix& operator=(const Matrix &m) {
-      delete[] data;
-      rows = m.rows; cols = m.cols;
-      data = new PBQPNum[rows * cols];
-      std::copy(m.data, m.data + (rows * cols), data);
-      return *this;
-    }
-
-    /// \brief Return the number of rows in this matrix.
-    unsigned getRows() const { return rows; }
-
-    /// \brief Return the number of cols in this matrix.
-    unsigned getCols() const { return cols; }
-
-    /// \brief Matrix element access.
-    PBQPNum* operator[](unsigned r) {
-      assert(r < rows && "Row out of bounds.");
-      return data + (r * cols);
-    }
-
-    /// \brief Matrix element access.
-    const PBQPNum* operator[](unsigned r) const {
-      assert(r < rows && "Row out of bounds.");
-      return data + (r * cols);
-    }
-
-    /// \brief Returns the given row as a vector.
-    Vector getRowAsVector(unsigned r) const {
-      Vector v(cols);
-      for (unsigned c = 0; c < cols; ++c)
-        v[c] = (*this)[r][c];
-      return v; 
-    }
-
-    /// \brief Returns the given column as a vector.
-    Vector getColAsVector(unsigned c) const {
-      Vector v(rows);
-      for (unsigned r = 0; r < rows; ++r)
-        v[r] = (*this)[r][c];
-      return v;
-    }
-
-    /// \brief Reset the matrix to the given value.
-    Matrix& reset(PBQPNum val = 0) {
-      std::fill(data, data + (rows * cols), val);
-      return *this;
-    }
-
-    /// \brief Set a single row of this matrix to the given value.
-    Matrix& setRow(unsigned r, PBQPNum val) {
-      assert(r < rows && "Row out of bounds.");
-      std::fill(data + (r * cols), data + ((r + 1) * cols), val);
-      return *this;
-    }
-
-    /// \brief Set a single column of this matrix to the given value.
-    Matrix& setCol(unsigned c, PBQPNum val) {
-      assert(c < cols && "Column out of bounds.");
-      for (unsigned r = 0; r < rows; ++r)
-        (*this)[r][c] = val;
-      return *this;
-    }
-
-    /// \brief Matrix transpose.
-    Matrix transpose() const {
-      Matrix m(cols, rows);
-      for (unsigned r = 0; r < rows; ++r)
-        for (unsigned c = 0; c < cols; ++c)
-          m[c][r] = (*this)[r][c];
-      return m;
-    }
-
-    /// \brief Returns the diagonal of the matrix as a vector.
-    ///
-    /// Matrix must be square.
-    Vector diagonalize() const {
-      assert(rows == cols && "Attempt to diagonalize non-square matrix.");
-
-      Vector v(rows);
-      for (unsigned r = 0; r < rows; ++r)
-        v[r] = (*this)[r][r];
-      return v;
-    } 
-
-    /// \brief Add the given matrix to this one.
-    Matrix& operator+=(const Matrix &m) {
-      assert(rows == m.rows && cols == m.cols &&
-          "Matrix dimensions mismatch.");
-      std::transform(data, data + (rows * cols), m.data, data,
-          std::plus<PBQPNum>());
-      return *this;
-    }
-
-    /// \brief Returns the minimum of the given row
-    PBQPNum getRowMin(unsigned r) const {
-      assert(r < rows && "Row out of bounds");
-      return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
-    }
-
-    /// \brief Returns the minimum of the given column
-    PBQPNum getColMin(unsigned c) const {
-      PBQPNum minElem = (*this)[0][c];
-      for (unsigned r = 1; r < rows; ++r)
-        if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
-      return minElem;
-    }
-
-    /// \brief Subtracts the given scalar from the elements of the given row.
-    Matrix& subFromRow(unsigned r, PBQPNum val) {
-      assert(r < rows && "Row out of bounds");
-      std::transform(data + (r * cols), data + ((r + 1) * cols),
-          data + (r * cols),
-          std::bind2nd(std::minus<PBQPNum>(), val));
-      return *this;
-    }
-
-    /// \brief Subtracts the given scalar from the elements of the given column.
-    Matrix& subFromCol(unsigned c, PBQPNum val) {
-      for (unsigned r = 0; r < rows; ++r)
-        (*this)[r][c] -= val;
-      return *this;
-    }
-
-    /// \brief Returns true if this is a zero matrix.
-    bool isZero() const {
-      return find_if(data, data + (rows * cols),
-          std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
-        data + (rows * cols);
-    }
-
-  private:
-    unsigned rows, cols;
-    PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given matrix on the given
-///        output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Matrix &m) {
-
-  assert((m.getRows() != 0) && "Zero-row matrix badness.");
-
-  for (unsigned i = 0; i < m.getRows(); ++i) {
-    os << m.getRowAsVector(i);
-  }
-
-  return os;
-}
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
deleted file mode 100644
index 047fd04c7cb8..000000000000
--- a/lib/CodeGen/PBQP/Solution.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Solution class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
-#define LLVM_CODEGEN_PBQP_SOLUTION_H
-
-#include "Math.h"
-#include "Graph.h"
-
-#include <map>
-
-namespace PBQP {
-
-  /// \brief Represents a solution to a PBQP problem.
-  ///
-  /// To get the selection for each node in the problem use the getSelection method.
-  class Solution {
-  private:
-
-    typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
-    SelectionsMap selections;
-
-    unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
-
-  public:
-
-    /// \brief Number of nodes for which selections have been made.
-    /// @return Number of nodes for which selections have been made.
-    unsigned numNodes() const { return selections.size(); }
-
-    /// \brief Records a reduction via the R0 rule. Should be called from the
-    ///        solver only.
-    void recordR0() { ++r0Reductions; }
-
-    /// \brief Returns the number of R0 reductions applied to solve the problem.
-    unsigned numR0Reductions() const { return r0Reductions; }
-
-    /// \brief Records a reduction via the R1 rule. Should be called from the
-    ///        solver only.
-    void recordR1() { ++r1Reductions; }
-
-    /// \brief Returns the number of R1 reductions applied to solve the problem.
-    unsigned numR1Reductions() const { return r1Reductions; }
-
-    /// \brief Records a reduction via the R2 rule. Should be called from the
-    ///        solver only.
-    void recordR2() { ++r2Reductions; }
-
-    /// \brief Returns the number of R2 reductions applied to solve the problem.
-    unsigned numR2Reductions() const { return r2Reductions; }
-
-    /// \brief Records a reduction via the RN rule. Should be called from the
-    ///        solver only.
-    void recordRN() { ++ rNReductions; }
-
-    /// \brief Returns the number of RN reductions applied to solve the problem.
-    unsigned numRNReductions() const { return rNReductions; }
-
-    /// \brief Set the selection for a given node.
-    /// @param nItr Node iterator.
-    /// @param selection Selection for nItr.
-    void setSelection(Graph::NodeItr nItr, unsigned selection) {
-      selections[nItr] = selection;
-    }
-
-    /// \brief Get a node's selection.
-    /// @param nItr Node iterator.
-    /// @return The selection for nItr;
-    unsigned getSelection(Graph::NodeItr nItr) const {
-      SelectionsMap::const_iterator sItr = selections.find(nItr);
-      assert(sItr != selections.end() && "No selection for node.");
-      return sItr->second;
-    }
-
-  };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index d4df4c548711..5f7cf582c960 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,7 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
-#include "PHIElimination.h"
+#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -34,23 +34,72 @@
 #include <map>
 using namespace llvm;
 
+namespace {
+  class PHIElimination : public MachineFunctionPass {
+    MachineRegisterInfo *MRI; // Machine register information
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PHIElimination() : MachineFunctionPass(ID) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    /// Split critical edges where necessary for good coalescer performance.
+    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+                       LiveVariables &LV, MachineLoopInfo *MLI);
+
+    typedef std::pair<unsigned, unsigned> BBVRegPair;
+    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+
+    // Defs of PHI sources which are implicit_def.
+    SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+    // Map reusable lowered PHI node -> incoming join register.
+    typedef DenseMap<MachineInstr*, unsigned,
+                     MachineInstrExpressionTrait> LoweredPHIMap;
+    LoweredPHIMap LoweredPHIs;
+  };
+}
+
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
 STATISTIC(NumReused, "Number of reused lowered phis");
 
 char PHIElimination::ID = 0;
 INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
-                "Eliminate PHI nodes for register allocation", false, false);
+                "Eliminate PHI nodes for register allocation", false, false)
 
-char &llvm::PHIEliminationID = PHIElimination::ID;
+char& llvm::PHIEliminationID = PHIElimination::ID;
 
-void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveVariables>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addPreserved<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
 
   bool Changed = false;
@@ -93,14 +142,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
 ///
-bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
                                              MachineBasicBlock &MBB) {
   if (MBB.empty() || !MBB.front().isPHI())
     return false;   // Quick exit for basic blocks without PHIs.
 
   // Get an iterator to the first instruction after the last PHI node (this may
   // also be the end of the basic block).
-  MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+  MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
 
   while (MBB.front().isPHI())
     LowerAtomicPHINode(MBB, AfterPHIsIt);
@@ -121,58 +170,14 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
   return true;
 }
 
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
-// when following the CFG edge to SuccMBB. This needs to be after any def of
-// SrcReg, but before any subsequent point where control flow might jump out of
-// the basic block.
-MachineBasicBlock::iterator
-llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                          MachineBasicBlock &SuccMBB,
-                                          unsigned SrcReg) {
-  // Handle the trivial case trivially.
-  if (MBB.empty())
-    return MBB.begin();
-
-  // Usually, we just want to insert the copy before the first terminator
-  // instruction. However, for the edge going to a landing pad, we must insert
-  // the copy before the call/invoke instruction.
-  if (!SuccMBB.isLandingPad())
-    return MBB.getFirstTerminator();
-
-  // Discover any defs/uses in this basic block.
-  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineInstr *DefUseMI = &*RI;
-    if (DefUseMI->getParent() == &MBB)
-      DefUsesInMBB.insert(DefUseMI);
-  }
 
-  MachineBasicBlock::iterator InsertPoint;
-  if (DefUsesInMBB.empty()) {
-    // No defs.  Insert the copy at the start of the basic block.
-    InsertPoint = MBB.begin();
-  } else if (DefUsesInMBB.size() == 1) {
-    // Insert the copy immediately after the def/use.
-    InsertPoint = *DefUsesInMBB.begin();
-    ++InsertPoint;
-  } else {
-    // Insert the copy immediately after the last def/use.
-    InsertPoint = MBB.end();
-    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
-    ++InsertPoint;
-  }
-
-  // Make sure the copy goes after any phi nodes however.
-  return SkipPHIsAndLabels(MBB, InsertPoint);
-}
 
 /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
 /// under the assuption that it needs to be lowered in a way that supports
 /// atomic execution of PHIs.  This lowering method is always correct all of the
 /// time.
 ///
-void llvm::PHIElimination::LowerAtomicPHINode(
+void PHIElimination::LowerAtomicPHINode(
                                       MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator AfterPHIsIt) {
   ++NumAtomic;
@@ -207,7 +212,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       IncomingReg = entry;
       reusedIncoming = true;
       ++NumReused;
-      DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
     } else {
       const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
       entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     // Find a safe location to insert the copy, this may be the first terminator
     // in the block (or end()).
     MachineBasicBlock::iterator InsertPos =
-      FindCopyInsertPoint(opBlock, MBB, SrcReg);
+      findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
 
     // Insert the copy.
     if (!reusedIncoming && IncomingReg)
@@ -335,6 +340,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 #ifndef NDEBUG
         for (MachineBasicBlock::iterator TI = llvm::next(Term);
              TI != opBlock.end(); ++TI) {
+          if (TI->isDebugValue())
+            continue;
           assert(!TI->readsRegister(SrcReg) &&
                  "Terminator instructions cannot use virtual registers unless"
                  "they are the first terminator in a block!");
@@ -343,9 +350,13 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       } else if (reusedIncoming || !IncomingReg) {
         // We may have to rewind a bit if we didn't insert a copy this time.
         KillInst = Term;
-        while (KillInst != opBlock.begin())
-          if ((--KillInst)->readsRegister(SrcReg))
+        while (KillInst != opBlock.begin()) {
+          --KillInst;
+          if (KillInst->isDebugValue())
+            continue;
+          if (KillInst->readsRegister(SrcReg))
             break;
+        }
       } else {
         // We just inserted this copy.
         KillInst = prior(InsertPos);
@@ -371,7 +382,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 /// used in a PHI node. We map that to the BB the vreg is coming from. This is
 /// used later to determine when the vreg is killed in the BB.
 ///
-void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
        I != E; ++I)
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
@@ -381,10 +392,10 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
                                      BBI->getOperand(i).getReg())];
 }
 
-bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
-                                         MachineBasicBlock &MBB,
-                                         LiveVariables &LV,
-                                         MachineLoopInfo *MLI) {
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+                                   MachineBasicBlock &MBB,
+                                   LiveVariables &LV,
+                                   MachineLoopInfo *MLI) {
   if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
     return false;   // Quick exit for basic blocks without PHIs.
 
@@ -403,10 +414,14 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
           !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
         if (!MLI ||
             !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
-              MLI->isLoopHeader(&MBB)))
-          Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0;
+              MLI->isLoopHeader(&MBB))) {
+          if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+            Changed = true;
+            ++NumCriticalEdgesSplit;
+          }
+        }
       }
     }
   }
-  return true;
+  return Changed;
 }
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
deleted file mode 100644
index 45a97182e71c..000000000000
--- a/lib/CodeGen/PHIElimination.h
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
-#define LLVM_CODEGEN_PHIELIMINATION_HPP
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-namespace llvm {
-  class LiveVariables;
-  class MachineRegisterInfo;
-  class MachineLoopInfo;
-  
-  /// Lower PHI instructions to copies.  
-  class PHIElimination : public MachineFunctionPass {
-    MachineRegisterInfo *MRI; // Machine register information
-
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    PHIElimination() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
-  private:
-    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
-    /// in predecessor basic blocks.
-    ///
-    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
-    void LowerAtomicPHINode(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator AfterPHIsIt);
-
-    /// analyzePHINodes - Gather information about the PHI nodes in
-    /// here. In particular, we want to map the number of uses of a virtual
-    /// register which is used in a PHI node. We map that to the BB the
-    /// vreg is coming from. This is used later to determine when the vreg
-    /// is killed in the BB.
-    ///
-    void analyzePHINodes(const MachineFunction& Fn);
-
-    /// Split critical edges where necessary for good coalescer performance.
-    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
-                       LiveVariables &LV, MachineLoopInfo *MLI);
-
-    /// SplitCriticalEdge - Split a critical edge from A to B by
-    /// inserting a new MBB. Update branches in A and PHI instructions
-    /// in B. Return the new block.
-    MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
-                                         MachineBasicBlock *B);
-
-    /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
-    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
-    /// any def of SrcReg, but before any subsequent point where control flow
-    /// might jump out of the basic block.
-    MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                                    MachineBasicBlock &SuccMBB,
-                                                    unsigned SrcReg);
-
-    // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
-    // also after any exception handling labels: in landing pads execution
-    // starts at the label, so any copies placed before it won't be executed!
-    // We also deal with DBG_VALUEs, which are a bit tricky:
-    //  PHI
-    //  DBG_VALUE
-    //  LABEL
-    // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
-    // needs to be annulled or, better, moved to follow the label, as well.
-    //  PHI
-    //  DBG_VALUE
-    //  no label
-    // Here it is not a good idea to skip the DBG_VALUE.
-    // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
-    // maximally stupid.
-    MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
-                                                MachineBasicBlock::iterator I) {
-      // Rather than assuming that EH labels come before other kinds of labels,
-      // just skip all labels.
-      while (I != MBB.end() && 
-             (I->isPHI() || I->isLabel() || I->isDebugValue())) {
-        if (I->isDebugValue() && I->getNumOperands()==3 && 
-            I->getOperand(0).isReg())
-          I->getOperand(0).setReg(0U);
-        ++I;
-      }
-      return I;
-    }
-
-    typedef std::pair<unsigned, unsigned> BBVRegPair;
-    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
-
-    VRegPHIUse VRegPHIUseCount;
-
-    // Defs of PHI sources which are implicit_def.
-    SmallPtrSet<MachineInstr*, 4> ImpDefs;
-
-    // Map reusable lowered PHI node -> incoming join register.
-    typedef DenseMap<MachineInstr*, unsigned,
-                     MachineInstrExpressionTrait> LoweredPHIMap;
-    LoweredPHIMap LoweredPHIs;
-  };
-
-}
-
-#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..10bfdcce6769
--- /dev/null
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                             unsigned SrcReg) {
+  // Handle the trivial case trivially.
+  if (MBB->empty())
+    return MBB->begin();
+
+  // Usually, we just want to insert the copy before the first terminator
+  // instruction. However, for the edge going to a landing pad, we must insert
+  // the copy before the call/invoke instruction.
+  if (!SuccMBB->isLandingPad())
+    return MBB->getFirstTerminator();
+
+  // Discover any defs/uses in this basic block.
+  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+  MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+         RE = MRI.reg_end(); RI != RE; ++RI) {
+    MachineInstr* DefUseMI = &*RI;
+    if (DefUseMI->getParent() == MBB)
+      DefUsesInMBB.insert(DefUseMI);
+  }
+
+  MachineBasicBlock::iterator InsertPoint;
+  if (DefUsesInMBB.empty()) {
+    // No defs.  Insert the copy at the start of the basic block.
+    InsertPoint = MBB->begin();
+  } else if (DefUsesInMBB.size() == 1) {
+    // Insert the copy immediately after the def/use.
+    InsertPoint = *DefUsesInMBB.begin();
+    ++InsertPoint;
+  } else {
+    // Insert the copy immediately after the last def/use.
+    InsertPoint = MBB->end();
+    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+    ++InsertPoint;
+  }
+
+  // Make sure the copy goes after any phi nodes however.
+  return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..9ac47fb4c505
--- /dev/null
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+    /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+    /// any def of SrcReg, but before any subsequent point where control flow
+    /// might jump out of the basic block.
+    MachineBasicBlock::iterator
+    findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                           unsigned SrcReg);
+}
+
+#endif
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 17cee46ca16c..5d7123caa017 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -41,7 +41,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -50,8 +52,13 @@ static cl::opt<bool>
 Aggressive("aggressive-ext-opt", cl::Hidden,
            cl::desc("Aggressive extension optimization"));
 
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+                cl::desc("Disable the peephole optimizer"));
+
 STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumImmFold,    "Number of move immediate foled");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -62,7 +69,9 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    PeepholeOptimizer() : MachineFunctionPass(ID) {}
+    PeepholeOptimizer() : MachineFunctionPass(ID) {
+      initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -79,12 +88,21 @@ namespace {
     bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool isMoveImmediate(MachineInstr *MI,
+                         SmallSet<unsigned, 4> &ImmDefRegs,
+                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+    bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                       SmallSet<unsigned, 4> &ImmDefRegs,
+                       DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
   };
 }
 
 char PeepholeOptimizer::ID = 0;
-INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
-                "Peephole Optimizations", false, false);
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
 
 FunctionPass *llvm::createPeepholeOptimizerPass() {
   return new PeepholeOptimizer();
@@ -102,12 +120,10 @@ FunctionPass *llvm::createPeepholeOptimizerPass() {
 bool PeepholeOptimizer::
 OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                  SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
-  LocalMIs.insert(MI);
-
   unsigned SrcReg, DstReg, SubIdx;
   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
     return false;
-
+  
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
       TargetRegisterInfo::isPhysicalRegister(SrcReg))
     return false;
@@ -232,22 +248,17 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
 /// set) the same flag as the compare, then we can remove the comparison and use
 /// the flag from the previous instruction.
 bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
-                                         MachineBasicBlock *MBB) {
+                                         MachineBasicBlock *MBB){
   // If this instruction is a comparison against zero and isn't comparing a
   // physical register, we can try to optimize it.
   unsigned SrcReg;
-  int CmpValue;
-  if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
-      TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
-    return false;
-
-  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
-  if (llvm::next(DI) != MRI->def_end())
-    // Only support one definition.
+  int CmpMask, CmpValue;
+  if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+      TargetRegisterInfo::isPhysicalRegister(SrcReg))
     return false;
 
-  // Attempt to convert the defining instruction to set the "zero" flag.
-  if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
+  // Attempt to optimize the comparison instruction.
+  if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
     ++NumEliminated;
     return true;
   }
@@ -255,7 +266,53 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
   return false;
 }
 
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+                                        SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isMoveImmediate())
+    return false;
+  if (TID.getNumDefs() != 1)
+    return false;
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    ImmDefMIs.insert(std::make_pair(Reg, MI));
+    ImmDefRegs.insert(Reg);
+    return true;
+  }
+  
+  return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                                      SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (ImmDefRegs.count(Reg) == 0)
+      continue;
+    DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+    assert(II != ImmDefMIs.end());
+    if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+      ++NumImmFold;
+      return true;
+    }
+  }
+  return false;
+}
+
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  if (DisablePeephole)
+    return false;
+  
   TM  = &MF.getTarget();
   TII = TM->getInstrInfo();
   MRI = &MF.getRegInfo();
@@ -264,22 +321,50 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  SmallSet<unsigned, 4> ImmDefRegs;
+  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
+    
+    bool SeenMoveImm = false;
     LocalMIs.clear();
+    ImmDefRegs.clear();
+    ImmDefMIs.clear();
 
+    bool First = true;
+    MachineBasicBlock::iterator PMII;
     for (MachineBasicBlock::iterator
-           MII = I->begin(), ME = I->end(); MII != ME; ) {
+           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
       MachineInstr *MI = &*MII;
+      LocalMIs.insert(MI);
 
-      if (MI->getDesc().isCompare() &&
-          !MI->getDesc().hasUnmodeledSideEffects()) {
-        ++MII; // The iterator may become invalid if the compare is deleted.
-        Changed |= OptimizeCmpInstr(MI, MBB);
+      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+          MI->hasUnmodeledSideEffects()) {
+        ++MII;
+        continue;
+      }
+
+      if (MI->getDesc().isCompare()) {
+        if (OptimizeCmpInstr(MI, MBB)) {
+          // MI is deleted.
+          Changed = true;
+          MII = First ? I->begin() : llvm::next(PMII);
+          continue;
+        }
+      }
+
+      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+        SeenMoveImm = true;
       } else {
         Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
-        ++MII;
+        if (SeenMoveImm)
+          Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
+
+      First = false;
+      PMII = MII;
+      ++MII;
     }
   }
 
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
deleted file mode 100644
index cbde2b01eeaf..000000000000
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements a hazard recognizer using the instructions itineraries
-// defined for the current target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "post-RA-sched"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrItineraries.h"
-
-using namespace llvm;
-
-PostRAHazardRecognizer::
-PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
-  ScheduleHazardRecognizer(), ItinData(LItinData) {
-  // Determine the maximum depth of any itinerary. This determines the
-  // depth of the scoreboard. We always make the scoreboard at least 1
-  // cycle deep to avoid dealing with the boundary condition.
-  unsigned ScoreboardDepth = 1;
-  if (!ItinData.isEmpty()) {
-    for (unsigned idx = 0; ; ++idx) {
-      if (ItinData.isEndMarker(idx))
-        break;
-
-      const InstrStage *IS = ItinData.beginStage(idx);
-      const InstrStage *E = ItinData.endStage(idx);
-      unsigned ItinDepth = 0;
-      for (; IS != E; ++IS)
-        ItinDepth += IS->getCycles();
-
-      ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
-    }
-  }
-
-  ReservedScoreboard.reset(ScoreboardDepth);
-  RequiredScoreboard.reset(ScoreboardDepth);
-
-  DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " 
-               << ScoreboardDepth << '\n');
-}
-
-void PostRAHazardRecognizer::Reset() {
-  RequiredScoreboard.reset();
-  ReservedScoreboard.reset();
-}
-
-void PostRAHazardRecognizer::ScoreBoard::dump() const {
-  dbgs() << "Scoreboard:\n";
-
-  unsigned last = Depth - 1;
-  while ((last > 0) && ((*this)[last] == 0))
-    last--;
-
-  for (unsigned i = 0; i <= last; i++) {
-    unsigned FUs = (*this)[i];
-    dbgs() << "\t";
-    for (int j = 31; j >= 0; j--)
-      dbgs() << ((FUs & (1 << j)) ? '1' : '0');
-    dbgs() << '\n';
-  }
-}
-
-ScheduleHazardRecognizer::HazardType
-PostRAHazardRecognizer::getHazardType(SUnit *SU) {
-  if (ItinData.isEmpty())
-    return NoHazard;
-
-  unsigned cycle = 0;
-
-  // Use the itinerary for the underlying instruction to check for
-  // free FU's in the scoreboard at the appropriate future cycles.
-  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx),
-         *E = ItinData.endStage(idx); IS != E; ++IS) {
-    // We must find one of the stage's units free for every cycle the
-    // stage is occupied. FIXME it would be more accurate to find the
-    // same unit free in all the cycles.
-    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
-             "Scoreboard depth exceeded!");
-
-      unsigned freeUnits = IS->getUnits();
-      switch (IS->getReservationKind()) {
-      default:
-       assert(0 && "Invalid FU reservation");
-      case InstrStage::Required:
-        // Required FUs conflict with both reserved and required ones
-        freeUnits &= ~ReservedScoreboard[cycle + i];
-        // FALLTHROUGH
-      case InstrStage::Reserved:
-        // Reserved FUs can conflict only with required ones.
-        freeUnits &= ~RequiredScoreboard[cycle + i];
-        break;
-      }
-
-      if (!freeUnits) {
-        DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
-        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
-        DEBUG(SU->getInstr()->dump());
-        return Hazard;
-      }
-    }
-
-    // Advance the cycle to the next stage.
-    cycle += IS->getNextCycles();
-  }
-
-  return NoHazard;
-}
-
-void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
-  if (ItinData.isEmpty())
-    return;
-
-  unsigned cycle = 0;
-
-  // Use the itinerary for the underlying instruction to reserve FU's
-  // in the scoreboard at the appropriate future cycles.
-  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx),
-         *E = ItinData.endStage(idx); IS != E; ++IS) {
-    // We must reserve one of the stage's units for every cycle the
-    // stage is occupied. FIXME it would be more accurate to reserve
-    // the same unit free in all the cycles.
-    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
-             "Scoreboard depth exceeded!");
-
-      unsigned freeUnits = IS->getUnits();
-      switch (IS->getReservationKind()) {
-      default:
-       assert(0 && "Invalid FU reservation");
-      case InstrStage::Required:
-        // Required FUs conflict with both reserved and required ones
-        freeUnits &= ~ReservedScoreboard[cycle + i];
-        // FALLTHROUGH
-      case InstrStage::Reserved:
-        // Reserved FUs can conflict only with required ones.
-        freeUnits &= ~RequiredScoreboard[cycle + i];
-        break;
-      }
-
-      // reduce to a single unit
-      unsigned freeUnit = 0;
-      do {
-        freeUnit = freeUnits;
-        freeUnits = freeUnit & (freeUnit - 1);
-      } while (freeUnits);
-
-      assert(freeUnit && "No function unit available!");
-      if (IS->getReservationKind() == InstrStage::Required)
-        RequiredScoreboard[cycle + i] |= freeUnit;
-      else
-        ReservedScoreboard[cycle + i] |= freeUnit;
-    }
-
-    // Advance the cycle to the next stage.
-    cycle += IS->getNextCycles();
-  }
-
-  DEBUG(ReservedScoreboard.dump());
-  DEBUG(RequiredScoreboard.dump());
-}
-
-void PostRAHazardRecognizer::AdvanceCycle() {
-  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
-  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
-}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f0bd6d1372be..60c24b710792 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -133,18 +133,12 @@ namespace {
     std::vector<unsigned> KillIndices;
 
   public:
-    SchedulePostRATDList(MachineFunction &MF,
-                         const MachineLoopInfo &MLI,
-                         const MachineDominatorTree &MDT,
-                         ScheduleHazardRecognizer *HR,
-                         AntiDepBreaker *ADB,
-                         AliasAnalysis *aa)
-      : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
-        HazardRec(HR), AntiDepBreak(ADB), AA(aa),
-        KillIndices(TRI->getNumRegs()) {}
-
-    ~SchedulePostRATDList() {
-    }
+    SchedulePostRATDList(
+      MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+      AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+      SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+    ~SchedulePostRATDList();
 
     /// StartBlock - Initialize register live-range state for scheduling in
     /// this block.
@@ -183,9 +177,34 @@ namespace {
   };
 }
 
+SchedulePostRATDList::SchedulePostRATDList(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+  SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+  : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+    KillIndices(TRI->getNumRegs())
+{
+  const TargetMachine &TM = MF.getTarget();
+  const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+  HazardRec =
+    TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+  AntiDepBreak =
+    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
+     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+  delete HazardRec;
+  delete AntiDepBreak;
+}
+
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
-  AA = &getAnalysis<AliasAnalysis>();
   TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
 
   // Check for explicit enable/disable of post-ra scheduling.
   TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       return false;
   } else {
     // Check that post-RA scheduling is enabled for this target.
+    // This may upgrade the AntiDepMode.
     const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
     if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
       return false;
@@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   DEBUG(dbgs() << "PostRAScheduler\n");
 
-  const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
-  const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
-  const TargetMachine &TM = Fn.getTarget();
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  ScheduleHazardRecognizer *HR =
-    TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
-  AntiDepBreaker *ADB =
-    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
-     (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
-     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
-      (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
-
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+                                 CriticalPathRCs);
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     Scheduler.FixupKills(MBB);
   }
 
-  delete HR;
-  delete ADB;
-
   return true;
 }
 
@@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
         MinDepth = PendingQueue[i]->getDepth();
     }
 
-    DEBUG(dbgs() << "\n*** Examining Available\n";
-          LatencyPriorityQueue q = AvailableQueue;
-          while (!q.empty()) {
-            SUnit *su = q.pop();
-            dbgs() << "Height " << su->getHeight() << ": ";
-            su->dump(this);
-          });
+    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
 
     SUnit *FoundSUnit = 0;
     bool HasNoopHazards = false;
@@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       SUnit *CurSUnit = AvailableQueue.pop();
 
       ScheduleHazardRecognizer::HazardType HT =
-        HazardRec->getHazardType(CurSUnit);
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
       if (HT == ScheduleHazardRecognizer::NoHazard) {
         FoundSUnit = CurSUnit;
         break;
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index cd9d83eeb684..d6e31dae9d13 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -91,8 +91,9 @@ namespace {
 
   public:
     static char ID;
-    PreAllocSplitting()
-      : MachineFunctionPass(ID) {}
+    PreAllocSplitting() : MachineFunctionPass(ID) {
+      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -106,10 +107,8 @@ namespace {
       AU.addPreserved<LiveStacks>();
       AU.addPreserved<RegisterCoalescer>();
       AU.addPreserved<CalculateSpillWeights>();
-      if (StrongPHIElim)
-        AU.addPreservedID(StrongPHIEliminationID);
-      else
-        AU.addPreservedID(PHIEliminationID);
+      AU.addPreservedID(StrongPHIEliminationID);
+      AU.addPreservedID(PHIEliminationID);
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<VirtRegMap>();
@@ -203,9 +202,18 @@ namespace {
 
 char PreAllocSplitting::ID = 0;
 
-INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting",
+INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
+                "Pre-Register Allocation Live Interval Splitting",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
                 "Pre-Register Allocation Live Interval Splitting",
-                false, false);
+                false, false)
 
 char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
 
@@ -324,7 +332,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
                                        LSs->getVNInfoAllocator());
   return SS;
 }
@@ -585,7 +593,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
 
   SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
   VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false,
+    LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
                      LIs->getVNInfoAllocator());
 
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
@@ -674,7 +682,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     DefIdx = DefIdx.getDefIndex();
     
     assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
-    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
+    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
     
     // If the def is a move, set the copy field.
     if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
@@ -807,7 +815,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
   MachineBasicBlock& MBB = *RestorePt->getParent();
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
+  if (!DefMI || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
   else
     KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
@@ -872,7 +880,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
                                          LSs->getVNInfoAllocator());
   }
   
@@ -967,8 +975,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
 
   assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
 
-  MachineInstr *DefMI = ValNo->isDefAccurate()
-    ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+  MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
 
   // If this would create a new join point, do not split.
   if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
@@ -1005,7 +1012,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   SlotIndex SpillIndex;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
-  if (!ValNo->isDefAccurate()) {
+  if (!DefMI) {
     // If we don't know where the def is we must split just before the barrier.
     if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
                             BarrierMBB, SS, RefsInMBB))) {
@@ -1199,12 +1206,12 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
       
       // We also don't try to handle the results of PHI joins, since there's
       // no defining instruction to analyze.
-      if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
+      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
+      if (!DefMI || CurrVN->isUnused()) continue;
     
       // We're only interested in eliminating cruft introduced by the splitter,
       // is of the form load-use or load-use-store.  First, check that the
       // definition is a load, and remember what stack slot we loaded it from.
-      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
       int FrameIndex;
       if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
       
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b8831db1d118..9cd9941e56b3 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,11 @@
 using namespace llvm;
 
 char ProcessImplicitDefs::ID = 0;
-INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
-                "Process Implicit Definitions.", false, false);
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
 
 void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e2802c1fdf4a..ad7b6e4aa97f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,6 +21,7 @@
 
 #define DEBUG_TYPE "pei"
 #include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -29,7 +30,7 @@
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -44,8 +45,12 @@ using namespace llvm;
 
 char PEI::ID = 0;
 
-INITIALIZE_PASS(PEI, "prologepilog",
-                "Prologue/Epilogue Insertion", false, false);
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
 
 STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
 STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
@@ -61,6 +66,8 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
 bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   const Function* F = Fn.getFunction();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
 
@@ -71,7 +78,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   // Allow the target machine to make some adjustments to the function
   // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
-  TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+  TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
 
   // Scan the function for modified callee saved registers and insert spill code
   // for any callee saved registers that are modified.
@@ -91,7 +98,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
-  TRI->processFunctionBeforeFrameFinalized(Fn);
+  TFI->processFunctionBeforeFrameFinalized(Fn);
 
   // Calculate actual frame offsets for all abstract stack objects...
   calculateFrameObjectOffsets(Fn);
@@ -138,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
@@ -165,7 +173,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
         FrameSDOps.push_back(I);
       } else if (I->isInlineAsm()) {
         // Some inline asm's need a stack frame, as indicated by operand 1.
-        if (I->getOperand(1).getImm())
+        unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+        if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
           AdjustsStack = true;
       }
 
@@ -180,7 +189,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
     // the target doesn't indicate otherwise, remove the call frame pseudos
     // here. The sub/add sp instruction pairs are still inserted, but we don't
     // need to track the SP adjustment for frame index elimination.
-    if (RegInfo->canSimplifyCallFramePseudos(Fn))
+    if (TFI->canSimplifyCallFramePseudos(Fn))
       RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
   }
 }
@@ -190,7 +199,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
 /// registers.
 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
@@ -229,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;   // Early exit if no callee saved registers are modified!
 
   unsigned NumFixedSpillSlots;
-  const TargetFrameInfo::SpillSlot *FixedSpillSlots =
+  const TargetFrameLowering::SpillSlot *FixedSpillSlots =
     TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
 
   // Now that we know which registers need to be saved and restored, allocate
@@ -247,7 +256,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
 
     // Check to see if this physreg must be spilled to a particular stack slot
     // on this target.
-    const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
+    const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
     while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
            FixedSlot->Reg != Reg)
       ++FixedSlot;
@@ -290,13 +299,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
     return;
 
   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   MachineBasicBlock::iterator I;
 
   if (! ShrinkWrapThisFunction) {
     // Spill using target interface.
     I = EntryBlock->begin();
-    if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+    if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
       for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
         // Add the callee-saved register as live-in.
         // It's killed at the spill.
@@ -328,7 +338,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
       // Restore all registers immediately before the return and any
       // terminators that preceed it.
-      if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+      if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
           unsigned Reg = CSI[i].getReg();
           const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -480,10 +490,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 /// abstract stack objects.
 ///
 void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
-  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
 
   bool StackGrowsDown =
-    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -549,7 +559,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+  if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -631,17 +641,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Make sure the special register scavenging spill slot is closest to the
   // stack pointer.
-  if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
-  if (!RegInfo->targetHandlesStackFrameRounding()) {
+  if (!TFI.targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
+    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
       Offset += MFI->getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
@@ -672,16 +682,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 /// prolog and epilog code to the function.
 ///
 void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
-  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
 
   // Add prologue to the function...
-  TRI->emitPrologue(Fn);
+  TFI.emitPrologue(Fn);
 
   // Add epilogue to restore the callee-save registers in each exiting block
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
     if (!I->empty() && I->back().getDesc().isReturn())
-      TRI->emitEpilogue(Fn, *I);
+      TFI.emitEpilogue(Fn, *I);
   }
 }
 
@@ -694,9 +704,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetMachine &TM = Fn.getTarget();
   assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
-  const TargetFrameInfo *TFI = TM.getFrameInfo();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
   bool StackGrowsDown =
-    TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
   int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
   int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
 
@@ -755,8 +765,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
           // If this instruction has a FrameIndex operand, we need to
           // use that target machine register info object to eliminate
           // it.
-            TRI.eliminateFrameIndex(MI, SPAdj,
-                                    FrameIndexVirtualScavenging ?  NULL : RS);
+          TRI.eliminateFrameIndex(MI, SPAdj,
+                                  FrameIndexVirtualScavenging ?  NULL : RS);
 
           // Reset the iterator if we were at the beginning of the BB.
           if (AtBeginning) {
@@ -825,7 +835,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
             ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
             ++NumScavengedRegs;
           }
-          // replace this reference to the virtual register with the
+          // Replace this reference to the virtual register with the
           // scratch register.
           assert (ScratchReg && "Missing scratch register!");
           MI->getOperand(i).setReg(ScratchReg);
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index d575124a6b3e..e2391591ad06 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,9 @@ namespace llvm {
   class PEI : public MachineFunctionPass {
   public:
     static char ID;
-    PEI() : MachineFunctionPass(ID) {}
+    PEI() : MachineFunctionPass(ID) {
+      initializePEIPass(*PassRegistry::getPassRegistry());
+    }
 
     const char *getPassName() const {
       return "Prolog/Epilog Insertion & Frame Finalization";
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 5e86e5a9447e..73b66d868f3d 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include <map>
 using namespace llvm;
 
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..8c7e5f53b824
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,181 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include <queue>
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They may also override getPriority() which otherwise
+/// defaults to the spill weight computed by CalculateSpillWeights.
+class RegAllocBase {
+  LiveIntervalUnion::Allocator UnionAllocator;
+protected:
+  // Array of LiveIntervalUnions indexed by physical register.
+  class LiveUnionArray {
+    unsigned NumRegs;
+    LiveIntervalUnion *Array;
+  public:
+    LiveUnionArray(): NumRegs(0), Array(0) {}
+    ~LiveUnionArray() { clear(); }
+
+    unsigned numRegs() const { return NumRegs; }
+
+    void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+    void clear();
+
+    LiveIntervalUnion& operator[](unsigned PhysReg) {
+      assert(PhysReg <  NumRegs && "physReg out of bounds");
+      return Array[PhysReg];
+    }
+  };
+
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  VirtRegMap *VRM;
+  LiveIntervals *LIS;
+  LiveUnionArray PhysReg2LiveUnion;
+
+  // Current queries, one per physreg. They must be reinitialized each time we
+  // query on a new live virtual register.
+  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+  RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+  virtual ~RegAllocBase() {}
+
+  // A RegAlloc pass should call this before allocatePhysRegs.
+  void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+  // Get an initialized query to check interferences between lvr and preg.  Note
+  // that Query::init must be called at least once for each physical register
+  // before querying a new live virtual register. This ties Queries and
+  // PhysReg2LiveUnion together.
+  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+    Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]);
+    return Queries[PhysReg];
+  }
+
+  // The top-level driver. The output is a VirtRegMap that us updated with
+  // physical register assignments.
+  //
+  // If an implementation wants to override the LiveInterval comparator, we
+  // should modify this interface to allow passing in an instance derived from
+  // LiveVirtRegQueue.
+  void allocatePhysRegs();
+
+  // Get a temporary reference to a Spiller instance.
+  virtual Spiller &spiller() = 0;
+
+  // getPriority - Calculate the allocation priority for VirtReg.
+  // Virtual registers with higher priorities are allocated first.
+  virtual float getPriority(LiveInterval *LI) = 0;
+
+  // A RegAlloc pass should override this to provide the allocation heuristics.
+  // Each call must guarantee forward progess by returning an available PhysReg
+  // or new set of split live virtual registers. It is up to the splitter to
+  // converge quickly toward fully spilled live ranges.
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+  // A RegAlloc pass should call this when PassManager releases its memory.
+  virtual void releaseMemory();
+
+  // Helper for checking interference between a live virtual register and a
+  // physical register, including all its register aliases. If an interference
+  // exists, return the interfering register, which may be preg or an alias.
+  unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+  /// assign - Assign VirtReg to PhysReg.
+  /// This should not be called from selectOrSplit for the current register.
+  void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+  /// This can be invoked from selectOrSplit, but be careful to guarantee that
+  /// allocation is making progress.
+  void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  // Helper for spilling all live virtual registers currently unified under preg
+  // that interfere with the most recently queried lvr.  Return true if spilling
+  // was successful, and append any new spilled/split intervals to splitLVRs.
+  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// addMBBLiveIns - Add physreg liveins to basic blocks.
+  void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+  // Verify each LiveIntervalUnion.
+  void verify();
+#endif
+
+  // Use this group name for NamedRegionTimer.
+  static const char *TimerGroupName;
+
+public:
+  /// VerifyEnabled - True when -verify-regalloc is given.
+  static bool VerifyEnabled;
+
+private:
+  void seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> >&);
+
+  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+                SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..045c8db9dadb
--- /dev/null
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,523 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "RegAllocBase.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned     , "Number of registers assigned");
+STATISTIC(NumUnassigned   , "Number of registers unassigned");
+STATISTIC(NumNewQueued    , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+                                      createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+               cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+  // context
+  MachineFunction *MF;
+  BitVector ReservedRegs;
+
+  // analyses
+  LiveStacks *LS;
+  RenderMachineFunction *RMF;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+
+public:
+  RABasic();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Basic Register Allocator";
+  }
+
+  /// RABasic analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory();
+
+  virtual Spiller &spiller() { return *SpillerInstance; }
+
+  virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<SlotIndexes>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  DEBUG(AU.addRequired<RenderMachineFunction>());
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+  SpillerInstance.reset(0);
+  RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+  LiveVirtRegBitSet VisitedVRegs;
+  OwningArrayPtr<LiveVirtRegBitSet>
+    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+  // Verify disjoint unions.
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+    PhysReg2LiveUnion[PhysReg].verify(VRegs);
+    // Union + intersection test could be done efficiently in one pass, but
+    // don't add a method to SparseBitVector unless we really need it.
+    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+    VisitedVRegs |= VRegs;
+  }
+
+  // Verify vreg coverage.
+  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+       liItr != liEnd; ++liItr) {
+    unsigned reg = liItr->first;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+    if (!VRM->hasPhys(reg)) continue; // spilled?
+    unsigned PhysReg = VRM->getPhys(reg);
+    if (!unionVRegs[PhysReg].test(reg)) {
+      dbgs() << "LiveVirtReg " << reg << " not in union " <<
+        TRI->getName(PhysReg) << "\n";
+      llvm_unreachable("unallocated live vreg");
+    }
+  }
+  // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+//                         RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+                                        unsigned NRegs) {
+  NumRegs = NRegs;
+  Array =
+    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+  for (unsigned r = 0; r != NRegs; ++r)
+    new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+  TRI = &vrm.getTargetRegInfo();
+  MRI = &vrm.getRegInfo();
+  VRM = &vrm;
+  LIS = &lis;
+  PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
+  // Cache an interferece query for each physical reg
+  Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+  if (!Array)
+    return;
+  for (unsigned r = 0; r != NumRegs; ++r)
+    Array[r].~LiveIntervalUnion();
+  free(Array);
+  NumRegs =  0;
+  Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+  PhysReg2LiveUnion.clear();
+}
+
+// Visit all the live virtual registers. If they are already assigned to a
+// physical register, unify them with the corresponding LiveIntervalUnion,
+// otherwise push them on the priority queue for later assignment.
+void RegAllocBase::
+seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> > &VirtRegQ) {
+  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+    unsigned RegNum = I->first;
+    LiveInterval &VirtReg = *I->second;
+    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+      PhysReg2LiveUnion[RegNum].unify(VirtReg);
+    else
+      VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum));
+  }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+               << " to " << PrintReg(PhysReg, TRI) << '\n');
+  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+  ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+               << " from " << PrintReg(PhysReg, TRI) << '\n');
+  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+  VRM->clearVirt(VirtReg.reg);
+  ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+
+  // Push each vreg onto a queue or "precolor" by adding it to a physreg union.
+  std::priority_queue<std::pair<float, unsigned> > VirtRegQ;
+  seedLiveVirtRegs(VirtRegQ);
+
+  // Continue assigning vregs one at a time to available physical registers.
+  while (!VirtRegQ.empty()) {
+    // Pop the highest priority vreg.
+    LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second);
+    VirtRegQ.pop();
+
+    // selectOrSplit requests the allocator to return an available physical
+    // register if possible and populate a list of new live intervals that
+    // result from splitting.
+    DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName()
+                 << ':' << VirtReg << '\n');
+    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+    VirtRegVec SplitVRegs;
+    unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs);
+
+    if (AvailablePhysReg)
+      assign(VirtReg, AvailablePhysReg);
+
+    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+         I != E; ++I) {
+      LiveInterval* SplitVirtReg = *I;
+      if (SplitVirtReg->empty()) continue;
+      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+             "expect split value in virtual register");
+      VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg),
+                                   SplitVirtReg->reg));
+      ++NumNewQueued;
+    }
+  }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+                                                unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    if (query(VirtReg, *AliasI).checkInterference())
+      return *AliasI;
+  return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+                            SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+  assert(Q.seenAllInterferences() && "need collectInterferences()");
+  const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+  for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+         E = PendingSpills.end(); I != E; ++I) {
+    LiveInterval &SpilledVReg = **I;
+    DEBUG(dbgs() << "extracting from " <<
+          TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+    // Deallocate the interfering vreg by removing it from the union.
+    // A LiveInterval instance may not be in a union during modification!
+    unassign(SpilledVReg, PhysReg);
+
+    // Spill the extracted interval.
+    spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills);
+  }
+  // After extracting segments, the query's results are invalid. But keep the
+  // contents valid until we're done accessing pendingSpills.
+  Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Record each interference and determine if all are spillable before mutating
+  // either the union or live intervals.
+  unsigned NumInterferences = 0;
+  // Collect interferences assigned to any alias of the physical register.
+  for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+    LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+    NumInterferences += QAlias.collectInterferingVRegs();
+    if (QAlias.seenUnspillableVReg()) {
+      return false;
+    }
+  }
+  DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+        " interferences with " << VirtReg << "\n");
+  assert(NumInterferences > 0 && "expect interference");
+
+  // Spill each interfering vreg allocated to PhysReg or an alias.
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    spillReg(VirtReg, *AliasI, SplitVRegs);
+  return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+  typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
+  MBBVec liveInMBBs;
+  MachineBasicBlock &entryMBB = *MF->begin();
+
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+    if (LiveUnion.empty())
+      continue;
+    for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
+         ++SI) {
+
+      // Find the set of basic blocks which this range is live into...
+      liveInMBBs.clear();
+      if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
+
+      // And add the physreg for this interval to their live-in sets.
+      for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
+           I != E; ++I) {
+        MachineBasicBlock *MBB = *I;
+        if (MBB == &entryMBB) continue;
+        if (MBB->isLiveIn(PhysReg)) continue;
+        MBB->addLiveIn(PhysReg);
+      }
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+                                SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Populate a list of physical register spill candidates.
+  SmallVector<unsigned, 8> PhysRegSpillCands;
+
+  // Check for an available register in this class.
+  const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg);
+
+  for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF),
+         E = TRC->allocation_order_end(*MF);
+       I != E; ++I) {
+
+    unsigned PhysReg = *I;
+    if (ReservedRegs.test(PhysReg)) continue;
+
+    // Check interference and as a side effect, intialize queries for this
+    // VirtReg and its aliases.
+    unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+    if (interfReg == 0) {
+      // Found an available register.
+      return PhysReg;
+    }
+    LiveInterval *interferingVirtReg =
+      Queries[interfReg].firstInterference().liveUnionPos().value();
+
+    // The current VirtReg must either be spillable, or one of its interferences
+    // must have less spill weight.
+    if (interferingVirtReg->weight < VirtReg.weight ) {
+      PhysRegSpillCands.push_back(PhysReg);
+    }
+  }
+  // Try to spill another interfering reg with less spill weight.
+  for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+         PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+    if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+    assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+           "Interference after spill.");
+    // Tell the caller to allocate to this newly freed physical register.
+    return *PhysRegI;
+  }
+  // No other spill candidates were found, so spill the current VirtReg.
+  DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+  SmallVector<LiveInterval*, 1> pendingSpills;
+
+  spiller().spill(&VirtReg, SplitVRegs, pendingSpills);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+
+  ReservedRegs = TRI->getReservedRegs(*MF);
+
+  SpillerInstance.reset(createSpiller(*this, *MF, *VRM));
+
+  allocatePhysRegs();
+
+  addMBBLiveIns(MF);
+
+  // Diagnostic output before rewriting
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+  // optional HTML output
+  DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+  // FIXME: Verification currently must run before VirtRegRewriter. We should
+  // make the rewriter a separate pass and override verifyAnalysis instead. When
+  // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+  if (VerifyEnabled) {
+    // Verify accuracy of LiveIntervals. The standard machine code verifier
+    // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+    // FIXME: MachineVerifier is badly broken when using the standard
+    // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+    // inline spiller, some tests fail to verify because the coalescer does not
+    // always generate verifiable code.
+    MF->verify(this, "In RABasic::verify");
+
+    // Verify that LiveIntervals are partitioned into unions and disjoint within
+    // the unions.
+    verify();
+  }
+#endif // !NDEBUG
+
+  // Run rewriter
+  VRM->rewrite(LIS->getSlotIndexes());
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+  return new RABasic();
+}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index fc150d55e226..15036e38b893 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -48,7 +48,10 @@ namespace {
   public:
     static char ID;
     RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
-               isBulkSpilling(false) {}
+               isBulkSpilling(false) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
   private:
     const TargetMachine *TM;
     MachineFunction *MF;
@@ -259,8 +262,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // instruction, not on the spill.
     bool SpillKill = LR.LastUse != MI;
     LR.Dirty = false;
-    DEBUG(dbgs() << "Spilling %reg" << LRI->first
-                 << " in " << TRI->getName(LR.PhysReg));
+    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+                 << " in " << PrintReg(LR.PhysReg, TRI));
     const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
     int FI = getStackSpaceFor(LRI->first, RC);
     DEBUG(dbgs() << " to stack slot #" << FI << "\n");
@@ -331,7 +334,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     MO.setIsKill();
     return;
   default:
-    // The physreg was allocated to a virtual register. That means to value we
+    // The physreg was allocated to a virtual register. That means the value we
     // wanted has been clobbered.
     llvm_unreachable("Instruction uses an allocated register");
   }
@@ -458,8 +461,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
 /// register must not be used for anything else when this is called.
 ///
 void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
-  DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
-               << TRI->getName(PhysReg) << "\n");
+  DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+               << PrintReg(PhysReg, TRI) << "\n");
   PhysRegState[PhysReg] = LRE.first;
   assert(!LRE.second.PhysReg && "Already assigned a physreg");
   LRE.second.PhysReg = PhysReg;
@@ -503,8 +506,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
       return assignVirtToPhysReg(LRE, PhysReg);
   }
 
-  DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
-               << "\n");
+  DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+               << RC->getName() << "\n");
 
   unsigned BestReg = 0, BestCost = spillImpossible;
   for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
@@ -584,8 +587,8 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
     allocVirtReg(MI, *LRI, Hint);
     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
-                 << TRI->getName(LR.PhysReg) << "\n");
+    DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+                 << PrintReg(LR.PhysReg, TRI) << "\n");
     TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
     ++NumLoads;
   } else if (LR.Dirty) {
@@ -653,11 +656,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
     if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
         (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
       if (ThroughRegs.insert(Reg))
-        DEBUG(dbgs() << " %reg" << Reg);
+        DEBUG(dbgs() << ' ' << PrintReg(Reg));
     }
   }
 
@@ -685,7 +689,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
     if (MO.isUse()) {
       unsigned DefIdx = 0;
       if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
@@ -731,6 +735,27 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
 void RAFast::AllocateBasicBlock() {
   DEBUG(dbgs() << "\nAllocating " << *MBB);
 
+  // FIXME: This should probably be added by instruction selection instead?
+  // If the last instruction in the block is a return, make sure to mark it as
+  // using all of the live-out values in the function.  Things marked both call
+  // and return are tail calls; do not do this for them.  The tail callee need
+  // not take the same registers as input that it produces as output, and there
+  // are dependencies for its input registers elsewhere.
+  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+      !MBB->back().getDesc().isCall()) {
+    MachineInstr *Ret = &MBB->back();
+
+    for (MachineRegisterInfo::liveout_iterator
+         I = MF->getRegInfo().liveout_begin(),
+         E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+      assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+             "Cannot have a live-out virtual register.");
+
+      // Add live-out registers as implicit uses.
+      Ret->addRegisterKilled(*I, TRI, true);
+    }
+  }
+
   PhysRegState.assign(TRI->getNumRegs(), regDisabled);
   assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
 
@@ -761,7 +786,7 @@ void RAFast::AllocateBasicBlock() {
             dbgs() << "*";
             break;
           default:
-            dbgs() << "=%reg" << PhysRegState[Reg];
+            dbgs() << '=' << PrintReg(PhysRegState[Reg]);
             if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
               dbgs() << "*";
             assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
@@ -791,16 +816,18 @@ void RAFast::AllocateBasicBlock() {
           MachineOperand &MO = MI->getOperand(i);
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
-          if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
           LiveDbgValueMap[Reg] = MI;
           LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
           if (LRI != LiveVirtRegs.end())
             setPhysReg(MI, i, LRI->second.PhysReg);
           else {
             int SS = StackSlotForVirtReg[Reg];
-            if (SS == -1)
+            if (SS == -1) {
               // We can't allocate a physreg for a DebugValue, sorry!
+              DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
               MO.setReg(0);
+            }
             else {
               // Modify DBG_VALUE now that the value is in a spill slot.
               int64_t Offset = MI->getOperand(1).getImm();
@@ -817,9 +844,11 @@ void RAFast::AllocateBasicBlock() {
                 MI = NewDV;
                 ScanDbgValue = true;
                 break;
-              } else
+              } else {
                 // We can't allocate a physreg for a DebugValue; sorry!
+                DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
                 MO.setReg(0);
+              }
             }
           }
         }
@@ -902,7 +931,7 @@ void RAFast::AllocateBasicBlock() {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
-      if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+      if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
       if (MO.isUse()) {
         LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
         unsigned PhysReg = LRI->second.PhysReg;
@@ -1017,8 +1046,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
-  unsigned LastVirtReg = MRI->getLastVirtReg();
-  StackSlotForVirtReg.grow(LastVirtReg);
+  StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
 
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..c1372cd038cf
--- /dev/null
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1285 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits,  "Number of split local live ranges");
+STATISTIC(NumReassigned,   "Number of interferences reassigned");
+STATISTIC(NumEvicted,      "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+                                       createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass, public RegAllocBase {
+  // context
+  MachineFunction *MF;
+  BitVector ReservedRegs;
+
+  // analyses
+  SlotIndexes *Indexes;
+  LiveStacks *LS;
+  MachineDominatorTree *DomTree;
+  MachineLoopInfo *Loops;
+  MachineLoopRanges *LoopRanges;
+  EdgeBundles *Bundles;
+  SpillPlacement *SpillPlacer;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+  std::auto_ptr<SplitAnalysis> SA;
+
+  // splitting state.
+
+  /// All basic blocks where the current register is live.
+  SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints;
+
+  /// For every instruction in SA->UseSlots, store the previous non-copy
+  /// instruction.
+  SmallVector<SlotIndex, 8> PrevSlot;
+
+public:
+  RAGreedy();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Greedy Register Allocator";
+  }
+
+  /// RAGreedy analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory();
+
+  virtual Spiller &spiller() { return *SpillerInstance; }
+
+  virtual float getPriority(LiveInterval *LI);
+
+  virtual unsigned selectOrSplit(LiveInterval&,
+                                 SmallVectorImpl<LiveInterval*>&);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+
+private:
+  bool checkUncachedInterference(LiveInterval&, unsigned);
+  LiveInterval *getSingleInterference(LiveInterval&, unsigned);
+  bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg);
+  float calcInterferenceWeight(LiveInterval&, unsigned);
+  float calcInterferenceInfo(LiveInterval&, unsigned);
+  float calcGlobalSplitCost(const BitVector&);
+  void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+                         SmallVectorImpl<LiveInterval*>&);
+  void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+  SlotIndex getPrevMappedIndex(const MachineInstr*);
+  void calcPrevSlots();
+  unsigned nextSplitPoint(unsigned);
+
+  unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&,
+                              SmallVectorImpl<LiveInterval*>&);
+  unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+                          SmallVectorImpl<LiveInterval*>&);
+  unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+    SmallVectorImpl<LiveInterval*>&);
+  unsigned trySplit(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&);
+  unsigned trySpillInterferences(LiveInterval&, AllocationOrder&,
+                                 SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+  return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+  initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<MachineLoopRanges>();
+  AU.addPreserved<MachineLoopRanges>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  AU.addRequired<EdgeBundles>();
+  AU.addRequired<SpillPlacement>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RAGreedy::releaseMemory() {
+  SpillerInstance.reset(0);
+  RegAllocBase::releaseMemory();
+}
+
+float RAGreedy::getPriority(LiveInterval *LI) {
+  float Priority = LI->weight;
+
+  // Prioritize hinted registers so they are allocated first.
+  std::pair<unsigned, unsigned> Hint;
+  if (Hint.first || Hint.second) {
+    // The hint can be target specific, a virtual register, or a physreg.
+    Priority *= 2;
+
+    // Prefer physreg hints above anything else.
+    if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second))
+      Priority *= 2;
+  }
+  return Priority;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Register Reassignment
+//===----------------------------------------------------------------------===//
+
+// Check interference without using the cache.
+bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg,
+                                         unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]);
+    if (subQ.checkInterference())
+      return true;
+  }
+  return false;
+}
+
+/// getSingleInterference - Return the single interfering virtual register
+/// assigned to PhysReg. Return 0 if more than one virtual register is
+/// interfering.
+LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
+                                              unsigned PhysReg) {
+  // Check physreg and aliases.
+  LiveInterval *Interference = 0;
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    if (Q.checkInterference()) {
+      if (Interference)
+        return 0;
+      Q.collectInterferingVRegs(1);
+      if (!Q.seenAllInterferences())
+        return 0;
+      Interference = Q.interferingVRegs().front();
+    }
+  }
+  return Interference;
+}
+
+// Attempt to reassign this virtual register to a different physical register.
+//
+// FIXME: we are not yet caching these "second-level" interferences discovered
+// in the sub-queries. These interferences can change with each call to
+// selectOrSplit. However, we could implement a "may-interfere" cache that
+// could be conservatively dirtied when we reassign or split.
+//
+// FIXME: This may result in a lot of alias queries. We could summarize alias
+// live intervals in their parent register's live union, but it's messy.
+bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
+                            unsigned WantedPhysReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) &&
+         "Can only reassign virtual registers");
+  assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) &&
+         "inconsistent phys reg assigment");
+
+  AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs);
+  while (unsigned PhysReg = Order.next()) {
+    // Don't reassign to a WantedPhysReg alias.
+    if (TRI->regsOverlap(PhysReg, WantedPhysReg))
+      continue;
+
+    if (checkUncachedInterference(InterferingVReg, PhysReg))
+      continue;
+
+    // Reassign the interfering virtual reg to this physical reg.
+    unsigned OldAssign = VRM->getPhys(InterferingVReg.reg);
+    DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " <<
+          TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n');
+    unassign(InterferingVReg, OldAssign);
+    assign(InterferingVReg, PhysReg);
+    ++NumReassigned;
+    return true;
+  }
+  return false;
+}
+
+/// tryReassignOrEvict - Try to reassign a single interferences to a different
+/// physreg, or evict a single interference with a lower spill weight.
+/// @param  VirtReg Currently unassigned virtual register.
+/// @param  Order   Physregs to try.
+/// @return         Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg,
+                                      AllocationOrder &Order,
+                                      SmallVectorImpl<LiveInterval*> &NewVRegs){
+  NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
+
+  // Keep track of the lightest single interference seen so far.
+  float BestWeight = VirtReg.weight;
+  LiveInterval *BestVirt = 0;
+  unsigned BestPhys = 0;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
+    if (!InterferingVReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg))
+      continue;
+    if (reassignVReg(*InterferingVReg, PhysReg))
+      return PhysReg;
+
+    // Cannot reassign, is this an eviction candidate?
+    if (InterferingVReg->weight < BestWeight) {
+      BestVirt = InterferingVReg;
+      BestPhys = PhysReg;
+      BestWeight = InterferingVReg->weight;
+    }
+  }
+
+  // Nothing reassigned, can we evict a lighter single interference?
+  if (BestVirt) {
+    DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n');
+    unassign(*BestVirt, VRM->getPhys(BestVirt->reg));
+    ++NumEvicted;
+    NewVRegs.push_back(BestVirt);
+    return BestPhys;
+  }
+
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                              Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints
+/// when considering interference from PhysReg. Also compute an optimistic local
+/// cost of this interference pattern.
+///
+/// The final cost of a split is the local cost + global cost of preferences
+/// broken by SpillPlacement.
+///
+float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
+  // Reset interference dependent info.
+  SpillConstraints.resize(SA->LiveBlocks.size());
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+    BC.Number = BI.MBB->getNumber();
+    BC.Entry = (BI.Uses && BI.LiveIn) ?
+      SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BC.Exit = (BI.Uses && BI.LiveOut) ?
+      SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BI.OverlapEntry = BI.OverlapExit = false;
+  }
+
+  // Add interference info from each PhysReg alias.
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(VirtReg, *AI).checkInterference())
+      continue;
+    LiveIntervalUnion::SegmentIter IntI =
+      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+    if (!IntI.valid())
+      continue;
+
+    // Determine which blocks have interference live in or after the last split
+    // point.
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+      // Skip interference-free blocks.
+      if (IntI.start() >= Stop)
+        continue;
+
+      // Is the interference live-in?
+      if (BI.LiveIn) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() <= Start)
+          BC.Entry = SpillPlacement::MustSpill;
+      }
+
+      // Is the interference overlapping the last split point?
+      if (BI.LiveOut) {
+        if (IntI.stop() < BI.LastSplitPoint)
+          IntI.advanceTo(BI.LastSplitPoint.getPrevSlot());
+        if (!IntI.valid())
+          break;
+        if (IntI.start() < Stop)
+          BC.Exit = SpillPlacement::MustSpill;
+      }
+    }
+
+    // Rewind iterator and check other interferences.
+    IntI.find(VirtReg.beginIndex());
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+      // Skip interference-free blocks.
+      if (IntI.start() >= Stop)
+        continue;
+
+      // Handle transparent blocks with interference separately.
+      // Transparent blocks never incur any fixed cost.
+      if (BI.LiveThrough && !BI.Uses) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() >= Stop)
+          continue;
+
+        if (BC.Entry != SpillPlacement::MustSpill)
+          BC.Entry = SpillPlacement::PrefSpill;
+        if (BC.Exit != SpillPlacement::MustSpill)
+          BC.Exit = SpillPlacement::PrefSpill;
+        continue;
+      }
+
+      // Now we only have blocks with uses left.
+      // Check if the interference overlaps the uses.
+      assert(BI.Uses && "Non-transparent block without any uses");
+
+      // Check interference on entry.
+      if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        // Not live in, but before the first use.
+        if (IntI.start() < BI.FirstUse)
+          BC.Entry = SpillPlacement::PrefSpill;
+      }
+
+      // Does interference overlap the uses in the entry segment
+      // [FirstUse;Kill)?
+      if (BI.LiveIn && !BI.OverlapEntry) {
+        IntI.advanceTo(BI.FirstUse);
+        if (!IntI.valid())
+          break;
+        // A live-through interval has no kill.
+        // Check [FirstUse;LastUse) instead.
+        if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+          BI.OverlapEntry = true;
+      }
+
+      // Does interference overlap the uses in the exit segment [Def;LastUse)?
+      if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) {
+        IntI.advanceTo(BI.Def);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() < BI.LastUse)
+          BI.OverlapExit = true;
+      }
+
+      // Check interference on exit.
+      if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) {
+        // Check interference between LastUse and Stop.
+        if (BC.Exit != SpillPlacement::PrefSpill) {
+          IntI.advanceTo(BI.LastUse);
+          if (!IntI.valid())
+            break;
+          if (IntI.start() < Stop)
+            BC.Exit = SpillPlacement::PrefSpill;
+        }
+      }
+    }
+  }
+
+  // Accumulate a local cost of this interference pattern.
+  float LocalCost = 0;
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    if (!BI.Uses)
+      continue;
+    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+    unsigned Inserts = 0;
+
+    // Do we need spill code for the entry segment?
+    if (BI.LiveIn)
+      Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg;
+
+    // For the exit segment?
+    if (BI.LiveOut)
+      Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg;
+
+    // The local cost of spill code in this block is the block frequency times
+    // the number of spill instructions inserted.
+    if (Inserts)
+      LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB);
+  }
+  DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = "
+               << LocalCost << '\n');
+  return LocalCost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SpillConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
+  float GlobalCost = 0;
+  for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) {
+    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+    unsigned Inserts = 0;
+    // Broken entry preference?
+    Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] !=
+                 (BC.Entry == SpillPlacement::PrefReg);
+    // Broken exit preference?
+    Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] !=
+                 (BC.Exit == SpillPlacement::PrefReg);
+    if (Inserts)
+      GlobalCost +=
+        Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB);
+  }
+  DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n');
+  return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
+                                 const BitVector &LiveBundles,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  DEBUG({
+    dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+           << " with bundles";
+    for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+      dbgs() << " EB#" << i;
+    dbgs() << ".\n";
+  });
+
+  // First compute interference ranges in the live blocks.
+  typedef std::pair<SlotIndex, SlotIndex> IndexPair;
+  SmallVector<IndexPair, 8> InterferenceRanges;
+  InterferenceRanges.resize(SA->LiveBlocks.size());
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(VirtReg, *AI).checkInterference())
+      continue;
+    LiveIntervalUnion::SegmentIter IntI =
+      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+    if (!IntI.valid())
+      continue;
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      IndexPair &IP = InterferenceRanges[i];
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+      // Skip interference-free blocks.
+      if (IntI.start() >= Stop)
+        continue;
+
+      // First interference in block.
+      if (BI.LiveIn) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() >= Stop)
+          continue;
+        if (!IP.first.isValid() || IntI.start() < IP.first)
+          IP.first = IntI.start();
+      }
+
+      // Last interference in block.
+      if (BI.LiveOut) {
+        IntI.advanceTo(Stop);
+        if (!IntI.valid() || IntI.start() >= Stop)
+          --IntI;
+        if (IntI.stop() <= Start)
+          continue;
+        if (!IP.second.isValid() || IntI.stop() > IP.second)
+          IP.second = IntI.stop();
+      }
+    }
+  }
+
+  SmallVector<LiveInterval*, 4> SpillRegs;
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+  SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+  // Create the main cross-block interval.
+  SE.openIntv();
+
+  // First add all defs that are live out of a block.
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Should the register be live out?
+    if (!BI.LiveOut || !RegOut)
+      continue;
+
+    IndexPair &IP = InterferenceRanges[i];
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+    DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
+                 << Bundles->getBundle(BI.MBB->getNumber(), 1)
+                 << " intf [" << IP.first << ';' << IP.second << ')');
+
+    // The interference interval should either be invalid or overlap MBB.
+    assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference");
+    assert((!IP.second.isValid() || IP.second > Start) && "Bad interference");
+
+    // Check interference leaving the block.
+    if (!IP.second.isValid()) {
+      // Block is interference-free.
+      DEBUG(dbgs() << ", no interference");
+      if (!BI.Uses) {
+        assert(BI.LiveThrough && "No uses, but not live through block?");
+        // Block is live-through without interference.
+        DEBUG(dbgs() << ", no uses"
+                     << (RegIn ? ", live-through.\n" : ", stack in.\n"));
+        if (!RegIn)
+          SE.enterIntvAtEnd(*BI.MBB);
+        continue;
+      }
+      if (!BI.LiveThrough) {
+        DEBUG(dbgs() << ", not live-through.\n");
+        SE.useIntv(SE.enterIntvBefore(BI.Def), Stop);
+        continue;
+      }
+      if (!RegIn) {
+        // Block is live-through, but entry bundle is on the stack.
+        // Reload just before the first use.
+        DEBUG(dbgs() << ", not live-in, enter before first use.\n");
+        SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop);
+        continue;
+      }
+      DEBUG(dbgs() << ", live-through.\n");
+      continue;
+    }
+
+    // Block has interference.
+    DEBUG(dbgs() << ", interference to " << IP.second);
+
+    if (!BI.LiveThrough && IP.second <= BI.Def) {
+      // The interference doesn't reach the outgoing segment.
+      DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
+      SE.useIntv(BI.Def, Stop);
+      continue;
+    }
+
+
+    if (!BI.Uses) {
+      // No uses in block, avoid interference by reloading as late as possible.
+      DEBUG(dbgs() << ", no uses.\n");
+      SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+      assert(SegStart >= IP.second && "Couldn't avoid interference");
+      continue;
+    }
+
+    if (IP.second.getBoundaryIndex() < BI.LastUse) {
+      // There are interference-free uses at the end of the block.
+      // Find the first use that can get the live-out register.
+      SmallVectorImpl<SlotIndex>::const_iterator UI =
+        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+                         IP.second.getBoundaryIndex());
+      assert(UI != SA->UseSlots.end() && "Couldn't find last use");
+      SlotIndex Use = *UI;
+      assert(Use <= BI.LastUse && "Couldn't find last use");
+      // Only attempt a split befroe the last split point.
+      if (Use.getBaseIndex() <= BI.LastSplitPoint) {
+        DEBUG(dbgs() << ", free use at " << Use << ".\n");
+        SlotIndex SegStart = SE.enterIntvBefore(Use);
+        assert(SegStart >= IP.second && "Couldn't avoid interference");
+        assert(SegStart < BI.LastSplitPoint && "Impossible split point");
+        SE.useIntv(SegStart, Stop);
+        continue;
+      }
+    }
+
+    // Interference is after the last use.
+    DEBUG(dbgs() << " after last use.\n");
+    SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+    assert(SegStart >= IP.second && "Couldn't avoid interference");
+  }
+
+  // Now all defs leading to live bundles are handled, do everything else.
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Is the register live-in?
+    if (!BI.LiveIn || !RegIn)
+      continue;
+
+    // We have an incoming register. Check for interference.
+    IndexPair &IP = InterferenceRanges[i];
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+    DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
+                 << " -> BB#" << BI.MBB->getNumber());
+
+    // Check interference entering the block.
+    if (!IP.first.isValid()) {
+      // Block is interference-free.
+      DEBUG(dbgs() << ", no interference");
+      if (!BI.Uses) {
+        assert(BI.LiveThrough && "No uses, but not live through block?");
+        // Block is live-through without interference.
+        if (RegOut) {
+          DEBUG(dbgs() << ", no uses, live-through.\n");
+          SE.useIntv(Start, Stop);
+        } else {
+          DEBUG(dbgs() << ", no uses, stack-out.\n");
+          SE.leaveIntvAtTop(*BI.MBB);
+        }
+        continue;
+      }
+      if (!BI.LiveThrough) {
+        DEBUG(dbgs() << ", killed in block.\n");
+        SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill));
+        continue;
+      }
+      if (!RegOut) {
+        // Block is live-through, but exit bundle is on the stack.
+        // Spill immediately after the last use.
+        if (BI.LastUse < BI.LastSplitPoint) {
+          DEBUG(dbgs() << ", uses, stack-out.\n");
+          SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse));
+          continue;
+        }
+        // The last use is after the last split point, it is probably an
+        // indirect jump.
+        DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
+                     << BI.LastSplitPoint << ", stack-out.\n");
+        SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint);
+        SE.useIntv(Start, SegEnd);
+        // Run a double interval from the split to the last use.
+        // This makes it possible to spill the complement without affecting the
+        // indirect branch.
+        SE.overlapIntv(SegEnd, BI.LastUse);
+        continue;
+      }
+      // Register is live-through.
+      DEBUG(dbgs() << ", uses, live-through.\n");
+      SE.useIntv(Start, Stop);
+      continue;
+    }
+
+    // Block has interference.
+    DEBUG(dbgs() << ", interference from " << IP.first);
+
+    if (!BI.LiveThrough && IP.first >= BI.Kill) {
+      // The interference doesn't reach the outgoing segment.
+      DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
+      SE.useIntv(Start, BI.Kill);
+      continue;
+    }
+
+    if (!BI.Uses) {
+      // No uses in block, avoid interference by spilling as soon as possible.
+      DEBUG(dbgs() << ", no uses.\n");
+      SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+      assert(SegEnd <= IP.first && "Couldn't avoid interference");
+      continue;
+    }
+    if (IP.first.getBaseIndex() > BI.FirstUse) {
+      // There are interference-free uses at the beginning of the block.
+      // Find the last use that can get the register.
+      SmallVectorImpl<SlotIndex>::const_iterator UI =
+        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+                         IP.first.getBaseIndex());
+      assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
+      SlotIndex Use = (--UI)->getBoundaryIndex();
+      DEBUG(dbgs() << ", free use at " << *UI << ".\n");
+      SlotIndex SegEnd = SE.leaveIntvAfter(Use);
+      assert(SegEnd <= IP.first && "Couldn't avoid interference");
+      SE.useIntv(Start, SegEnd);
+      continue;
+    }
+
+    // Interference is before the first use.
+    DEBUG(dbgs() << " before first use.\n");
+    SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+    assert(SegEnd <= IP.first && "Couldn't avoid interference");
+  }
+
+  SE.closeIntv();
+
+  // FIXME: Should we be more aggressive about splitting the stack region into
+  // per-block segments? The current approach allows the stack region to
+  // separate into connected components. Some components may be allocatable.
+  SE.finish();
+  ++NumGlobalSplits;
+
+  if (VerifyEnabled) {
+    MF->verify(this, "After splitting live range around region");
+
+#ifndef NDEBUG
+    // Make sure that at least one of the new intervals can allocate to PhysReg.
+    // That was the whole point of splitting the live range.
+    bool found = false;
+    for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E;
+         ++I)
+      if (!checkUncachedInterference(**I, PhysReg)) {
+        found = true;
+        break;
+      }
+    assert(found && "No allocatable intervals after pointless splitting");
+#endif
+  }
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  BitVector LiveBundles, BestBundles;
+  float BestCost = 0;
+  unsigned BestReg = 0;
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    float Cost = calcInterferenceInfo(VirtReg, PhysReg);
+    if (BestReg && Cost >= BestCost)
+      continue;
+
+    SpillPlacer->placeSpills(SpillConstraints, LiveBundles);
+    // No live bundles, defer to splitSingleBlocks().
+    if (!LiveBundles.any())
+      continue;
+
+    Cost += calcGlobalSplitCost(LiveBundles);
+    if (!BestReg || Cost < BestCost) {
+      BestReg = PhysReg;
+      BestCost = Cost;
+      BestBundles.swap(LiveBundles);
+    }
+  }
+
+  if (!BestReg)
+    return 0;
+
+  splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                             Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+                              SmallVectorImpl<float> &GapWeight) {
+  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned NumGaps = Uses.size()-1;
+
+  // Start and end points for the interference check.
+  SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+  SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+  GapWeight.assign(NumGaps, 0.0f);
+
+  // Add interference from each overlapping register.
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+           .checkInterference())
+      continue;
+
+    // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+    // so we don't need InterferenceQuery.
+    //
+    // Interference that overlaps an instruction is counted in both gaps
+    // surrounding the instruction. The exception is interference before
+    // StartIdx and after StopIdx.
+    //
+    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+    for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+      // Skip the gaps before IntI.
+      while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+        if (++Gap == NumGaps)
+          break;
+      if (Gap == NumGaps)
+        break;
+
+      // Update the gaps covered by IntI.
+      const float weight = IntI.value()->weight;
+      for (; Gap != NumGaps; ++Gap) {
+        GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+        if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+          break;
+      }
+      if (Gap == NumGaps)
+        break;
+    }
+  }
+}
+
+/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
+/// before MI that has a slot index. If MI is the first mapped instruction in
+/// its block, return the block start index instead.
+///
+SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
+  assert(MI && "Missing MachineInstr");
+  const MachineBasicBlock *MBB = MI->getParent();
+  MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
+  while (I != B)
+    if (!(--I)->isDebugValue() && !I->isCopy())
+      return Indexes->getInstructionIndex(I);
+  return Indexes->getMBBStartIdx(MBB);
+}
+
+/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
+/// real non-copy instruction for each instruction in SA->UseSlots.
+///
+void RAGreedy::calcPrevSlots() {
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  PrevSlot.clear();
+  PrevSlot.reserve(Uses.size());
+  for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+    const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
+    PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
+  }
+}
+
+/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
+/// be beneficial to split before UseSlots[i].
+///
+/// 0 is always a valid split point
+unsigned RAGreedy::nextSplitPoint(unsigned i) {
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned Size = Uses.size();
+  assert(i != Size && "No split points after the end");
+  // Allow split before i when Uses[i] is not adjacent to the previous use.
+  while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
+    ;
+  return i;
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+
+  // Note that it is possible to have an interval that is live-in or live-out
+  // while only covering a single block - A phi-def can use undef values from
+  // predecessors, and the block could be a single-block loop.
+  // We don't bother doing anything clever about such a case, we simply assume
+  // that the interval is continuous from FirstUse to LastUse. We should make
+  // sure that we don't do anything illegal to such an interval, though.
+
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  if (Uses.size() <= 2)
+    return 0;
+  const unsigned NumGaps = Uses.size()-1;
+
+  DEBUG({
+    dbgs() << "tryLocalSplit: ";
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+      dbgs() << ' ' << SA->UseSlots[i];
+    dbgs() << '\n';
+  });
+
+  // For every use, find the previous mapped non-copy instruction.
+  // We use this to detect valid split points, and to estimate new interval
+  // sizes.
+  calcPrevSlots();
+
+  unsigned BestBefore = NumGaps;
+  unsigned BestAfter = 0;
+  float BestDiff = 0;
+
+  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB);
+  SmallVector<float, 8> GapWeight;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    // Keep track of the largest spill weight that would need to be evicted in
+    // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+    calcGapWeights(PhysReg, GapWeight);
+
+    // Try to find the best sequence of gaps to close.
+    // The new spill weight must be larger than any gap interference.
+
+    // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+    unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+
+    // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+    // It is the spill weight that needs to be evicted.
+    float MaxGap = GapWeight[0];
+    for (unsigned i = 1; i != SplitAfter; ++i)
+      MaxGap = std::max(MaxGap, GapWeight[i]);
+
+    for (;;) {
+      // Live before/after split?
+      const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+      const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+                   << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+                   << " i=" << MaxGap);
+
+      // Stop before the interval gets so big we wouldn't be making progress.
+      if (!LiveBefore && !LiveAfter) {
+        DEBUG(dbgs() << " all\n");
+        break;
+      }
+      // Should the interval be extended or shrunk?
+      bool Shrink = true;
+      if (MaxGap < HUGE_VALF) {
+        // Estimate the new spill weight.
+        //
+        // Each instruction reads and writes the register, except the first
+        // instr doesn't read when !FirstLive, and the last instr doesn't write
+        // when !LastLive.
+        //
+        // We will be inserting copies before and after, so the total number of
+        // reads and writes is 2 * EstUses.
+        //
+        const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
+                                 2*(LiveBefore + LiveAfter);
+
+        // Try to guess the size of the new interval. This should be trivial,
+        // but the slot index of an inserted copy can be a lot smaller than the
+        // instruction it is inserted before if there are many dead indexes
+        // between them.
+        //
+        // We measure the distance from the instruction before SplitBefore to
+        // get a conservative estimate.
+        //
+        // The final distance can still be different if inserting copies
+        // triggers a slot index renumbering.
+        //
+        const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
+                              PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+        // Would this split be possible to allocate?
+        // Never allocate all gaps, we wouldn't be making progress.
+        float Diff = EstWeight - MaxGap;
+        DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
+        if (Diff > 0) {
+          Shrink = false;
+          if (Diff > BestDiff) {
+            DEBUG(dbgs() << " (best)");
+            BestDiff = Diff;
+            BestBefore = SplitBefore;
+            BestAfter = SplitAfter;
+          }
+        }
+      }
+
+      // Try to shrink.
+      if (Shrink) {
+        SplitBefore = nextSplitPoint(SplitBefore);
+        if (SplitBefore < SplitAfter) {
+          DEBUG(dbgs() << " shrink\n");
+          // Recompute the max when necessary.
+          if (GapWeight[SplitBefore - 1] >= MaxGap) {
+            MaxGap = GapWeight[SplitBefore];
+            for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+              MaxGap = std::max(MaxGap, GapWeight[i]);
+          }
+          continue;
+        }
+        MaxGap = 0;
+      }
+
+      // Try to extend the interval.
+      if (SplitAfter >= NumGaps) {
+        DEBUG(dbgs() << " end\n");
+        break;
+      }
+
+      DEBUG(dbgs() << " extend\n");
+      for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
+           SplitAfter != e; ++SplitAfter)
+        MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
+          continue;
+    }
+  }
+
+  // Didn't find any candidates?
+  if (BestBefore == NumGaps)
+    return 0;
+
+  DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+               << '-' << Uses[BestAfter] << ", " << BestDiff
+               << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+  SmallVector<LiveInterval*, 4> SpillRegs;
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+  SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+  SE.openIntv();
+  SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]);
+  SlotIndex SegStop  = SE.leaveIntvAfter(Uses[BestAfter]);
+  SE.useIntv(SegStart, SegStop);
+  SE.closeIntv();
+  SE.finish();
+  ++NumLocalSplits;
+
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                          Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*>&NewVRegs) {
+  SA->analyze(&VirtReg);
+
+  // Local intervals are handled separately.
+  if (LIS->intervalIsInOneMBB(VirtReg)) {
+    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    return tryLocalSplit(VirtReg, Order, NewVRegs);
+  }
+
+  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+  // First try to split around a region spanning multiple blocks.
+  unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Then isolate blocks with multiple uses.
+  SplitAnalysis::BlockPtrSet Blocks;
+  if (SA->getMultiUseBlocks(Blocks)) {
+    SmallVector<LiveInterval*, 4> SpillRegs;
+    LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+    SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks);
+    if (VerifyEnabled)
+      MF->verify(this, "After splitting live range around basic blocks");
+  }
+
+  // Don't assign any physregs.
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                Spilling
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceWeight - Calculate the combined spill weight of
+/// interferences when assigning VirtReg to PhysReg.
+float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){
+  float Sum = 0;
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+    Q.collectInterferingVRegs();
+    if (Q.seenUnspillableVReg())
+      return HUGE_VALF;
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i)
+      Sum += Q.interferingVRegs()[i]->weight;
+  }
+  return Sum;
+}
+
+/// trySpillInterferences - Try to spill interfering registers instead of the
+/// current one. Only do it if the accumulated spill weight is smaller than the
+/// current spill weight.
+unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg,
+                                         AllocationOrder &Order,
+                                     SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled);
+  unsigned BestPhys = 0;
+  float BestWeight = 0;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    float Weight = calcInterferenceWeight(VirtReg, PhysReg);
+    if (Weight == HUGE_VALF || Weight >= VirtReg.weight)
+      continue;
+    if (!BestPhys || Weight < BestWeight)
+      BestPhys = PhysReg, BestWeight = Weight;
+  }
+
+  // No candidates found.
+  if (!BestPhys)
+    return 0;
+
+  // Collect all interfering registers.
+  SmallVector<LiveInterval*, 8> Spills;
+  for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+    Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end());
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *VReg = Q.interferingVRegs()[i];
+      unassign(*VReg, *AI);
+    }
+  }
+
+  // Spill them all.
+  DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight "
+               << BestWeight << '\n');
+  for (unsigned i = 0, e = Spills.size(); i != e; ++i)
+    spiller().spill(Spills[i], NewVRegs, Spills);
+  return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // First try assigning a free register.
+  AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
+  while (unsigned PhysReg = Order.next()) {
+    if (!checkPhysRegInterference(VirtReg, PhysReg))
+      return PhysReg;
+  }
+
+  // Try to reassign interferences.
+  if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs))
+    return PhysReg;
+
+  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+  // Try splitting VirtReg or interferences.
+  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Try to spill another interfering reg with less spill weight.
+  PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs);
+  if (PhysReg)
+    return PhysReg;
+
+  // Finally spill VirtReg itself.
+  NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+  SmallVector<LiveInterval*, 1> pendingSpills;
+  spiller().spill(&VirtReg, NewVRegs, pendingSpills);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  if (VerifyEnabled)
+    MF->verify(this, "Before greedy register allocator");
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+  Indexes = &getAnalysis<SlotIndexes>();
+  DomTree = &getAnalysis<MachineDominatorTree>();
+  ReservedRegs = TRI->getReservedRegs(*MF);
+  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+  Loops = &getAnalysis<MachineLoopInfo>();
+  LoopRanges = &getAnalysis<MachineLoopRanges>();
+  Bundles = &getAnalysis<EdgeBundles>();
+  SpillPlacer = &getAnalysis<SpillPlacement>();
+
+  SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+
+  allocatePhysRegs();
+  addMBBLiveIns(MF);
+  LIS->addKillFlags();
+
+  // Run rewriter
+  {
+    NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+    VRM->rewrite(Indexes);
+  }
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5c62354a8872..b959878bcdba 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -12,13 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
 #include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -91,6 +92,19 @@ namespace {
   struct RALinScan : public MachineFunctionPass {
     static char ID;
     RALinScan() : MachineFunctionPass(ID) {
+      initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeRegisterCoalescerAnalysisGroup(
+        *PassRegistry::getPassRegistry());
+      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+      initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      
       // Initialize the queue to record recently-used registers.
       if (NumRecentlyUsedRegs > 0)
         RecentRegs.resize(NumRecentlyUsedRegs, 0);
@@ -127,7 +141,6 @@ namespace {
     BitVector allocatableRegs_;
     BitVector reservedRegs_;
     LiveIntervals* li_;
-    LiveStacks* ls_;
     MachineLoopInfo *loopInfo;
 
     /// handled_ - Intervals are added to the handled_ set in the order of their
@@ -183,6 +196,8 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
       AU.addRequired<LiveIntervals>();
       AU.addPreserved<SlotIndexes>();
       if (StrongPHIElim)
@@ -193,12 +208,15 @@ namespace {
       AU.addRequired<CalculateSpillWeights>();
       if (PreSplitIntervals)
         AU.addRequiredID(PreAllocSplittingID);
-      AU.addRequired<LiveStacks>();
-      AU.addPreserved<LiveStacks>();
+      AU.addRequiredID(LiveStacksID);
+      AU.addPreservedID(LiveStacksID);
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addRequired<VirtRegMap>();
       AU.addPreserved<VirtRegMap>();
+      AU.addRequired<LiveDebugVariables>();
+      AU.addPreserved<LiveDebugVariables>();
+      AU.addRequiredID(MachineDominatorsID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -370,8 +388,19 @@ namespace {
   char RALinScan::ID = 0;
 }
 
-INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
-                "Linear Scan Register Allocator", false, false);
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+                "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+                "Linear Scan Register Allocator", false, false)
 
 void RALinScan::ComputeRelatedRegClasses() {
   // First pass, add all reg classes to the union, and determine at least one
@@ -402,8 +431,12 @@ void RALinScan::ComputeRelatedRegClasses() {
     for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
          I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
          I != E; ++I)
-      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
-        RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+        const TargetRegisterClass *AliasClass = 
+          OneClassForEachPhysReg.lookup(*AS);
+        if (AliasClass)
+          RelatedRegClasses.unionSets(I->second, AliasClass);
+      }
 }
 
 /// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
@@ -431,8 +464,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
   unsigned CandReg;
   {
     MachineInstr *CopyMI;
-    if (vni->def != SlotIndex() && vni->isDefAccurate() &&
-        (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+    if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
       // Defined by a copy, try to extend SrcReg forward
       CandReg = CopyMI->getOperand(1).getReg();
     else if (TrivCoalesceEnds &&
@@ -442,6 +474,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
       CandReg = CopyMI->getOperand(0).getReg();
     else
       return Reg;
+
+    // If the target of the copy is a sub-register then don't coalesce.
+    if(CopyMI->getOperand(0).getSubReg())
+      return Reg;
   }
 
   if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
@@ -478,7 +514,6 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   allocatableRegs_ = tri_->getAllocatableSet(fn);
   reservedRegs_ = tri_->getReservedRegs(fn);
   li_ = &getAnalysis<LiveIntervals>();
-  ls_ = &getAnalysis<LiveStacks>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
   // We don't run the coalescer here because we have no reason to
@@ -505,6 +540,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   // Rewrite spill code and update the PhysRegsUsed set.
   rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
 
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
   assert(unhandled_.empty() && "Unhandled live intervals remain!");
 
   finalizeRegUses();
@@ -638,8 +676,6 @@ void RALinScan::linearScan() {
 
   // Look for physical registers that end up not being allocated even though
   // register allocator had to spill other registers in its register class.
-  if (ls_->getNumIntervals() == 0)
-    return;
   if (!vrm_->FindUnusedRegisters(li_))
     return;
 }
@@ -784,30 +820,6 @@ static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
   }
 }
 
-/// addStackInterval - Create a LiveInterval for stack if the specified live
-/// interval has been spilled.
-static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
-                             LiveIntervals *li_,
-                             MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
-  int SS = vrm_.getStackSlot(cur->reg);
-  if (SS == VirtRegMap::NO_STACK_SLOT)
-    return;
-
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
-  LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
-
-  VNInfo *VNI;
-  if (SI.hasAtLeastOneValue())
-    VNI = SI.getValNumInfo(0);
-  else
-    VNI = SI.getNextValue(SlotIndex(), 0, false,
-                          ls_->getVNInfoAllocator());
-
-  LiveInterval &RI = li_->getInterval(cur->reg);
-  // FIXME: This may be overly conservative.
-  SI.MergeRangesInAsValue(RI, VNI);
-}
-
 /// getConflictWeight - Return the number of conflicts between cur
 /// live interval and defs and uses of Reg weighted by loop depthes.
 static
@@ -925,13 +937,9 @@ LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
 }
 
 void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
-  bool isNew = DowngradedRegs.insert(Reg);
-  isNew = isNew; // Silence compiler warning.
-  assert(isNew && "Multiple reloads holding the same register?");
-  DowngradeMap.insert(std::make_pair(li->reg, Reg));
-  for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
-    isNew = DowngradedRegs.insert(*AS);
-    isNew = isNew; // Silence compiler warning.
+  for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+    bool isNew = DowngradedRegs.insert(*AS);
+    (void)isNew; // Silence compiler warning.
     assert(isNew && "Multiple reloads holding the same register?");
     DowngradeMap.insert(std::make_pair(li->reg, *AS));
   }
@@ -957,10 +965,11 @@ namespace {
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
 void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
-  DEBUG(dbgs() << "\tallocating current interval: ");
+  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+  DEBUG(dbgs() << "\tallocating current interval from "
+               << RC->getName() << ": ");
 
   // This is an implicitly defined live interval, just assign any register.
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
   if (cur->empty()) {
     unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
@@ -984,8 +993,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
   if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if ((vni->def != SlotIndex()) && !vni->isUnused() &&
-         vni->isDefAccurate()) {
+    if (!vni->isUnused()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       if (CopyMI && CopyMI->isCopy()) {
         unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
@@ -1225,7 +1233,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     spiller_->spill(cur, added, spillIs);
 
     std::sort(added.begin(), added.end(), LISorter());
-    addStackInterval(cur, ls_, li_, mri_, *vrm_);
     if (added.empty())
       return;  // Early exit if all spills were folded.
 
@@ -1300,7 +1307,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     if (sli->beginIndex() < earliestStart)
       earliestStart = sli->beginIndex();
     spiller_->spill(sli, added, spillIs);
-    addStackInterval(sli, ls_, li_, mri_, *vrm_);
     spilled.insert(sli->reg);
   }
 
@@ -1419,8 +1425,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
   // Resolve second part of the hint (if possible) given the current allocation.
   unsigned physReg = Hint.second;
-  if (physReg &&
-      TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
     physReg = vrm_->getPhys(physReg);
 
   TargetRegisterClass::iterator I, E;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 61f337bab49c..ea0d1fe0233f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,9 +31,6 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "PBQP/HeuristicSolver.h"
-#include "PBQP/Graph.h"
-#include "PBQP/Heuristics/Briggs.h"
 #include "RenderMachineFunction.h"
 #include "Splitter.h"
 #include "VirtRegMap.h"
@@ -41,9 +38,13 @@
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Support/Debug.h"
@@ -51,7 +52,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <limits>
-#include <map>
 #include <memory>
 #include <set>
 #include <vector>
@@ -60,7 +60,7 @@ using namespace llvm;
 
 static RegisterRegAlloc
 registerPBQPRepAlloc("pbqp", "PBQP register allocator",
-                       llvm::createPBQPRegisterAllocator);
+                       createDefaultPBQPRegisterAllocator);
 
 static cl::opt<bool>
 pbqpCoalescing("pbqp-coalescing",
@@ -69,698 +69,471 @@ pbqpCoalescing("pbqp-coalescing",
 
 static cl::opt<bool>
 pbqpPreSplitting("pbqp-pre-splitting",
-                 cl::desc("Pre-splite before PBQP register allocation."),
+                 cl::desc("Pre-split before PBQP register allocation."),
                  cl::init(false), cl::Hidden);
 
 namespace {
 
-  ///
-  /// PBQP based allocators solve the register allocation problem by mapping
-  /// register allocation problems to Partitioned Boolean Quadratic
-  /// Programming problems.
-  class PBQPRegAlloc : public MachineFunctionPass {
-  public:
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  /// Construct a PBQP register allocator.
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
+      : MachineFunctionPass(ID), builder(b) {
+    initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+    initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+    initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+  }
 
-    static char ID;
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "PBQP Register Allocator";
+  }
 
-    /// Construct a PBQP register allocator.
-    PBQPRegAlloc() : MachineFunctionPass(ID) {}
+  /// PBQP analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
-    /// Return the pass name.
-    virtual const char* getPassName() const {
-      return "PBQP Register Allocator";
-    }
+  /// Perform register allocation
+  virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    /// PBQP analysis usage.
-    virtual void getAnalysisUsage(AnalysisUsage &au) const {
-      au.addRequired<SlotIndexes>();
-      au.addPreserved<SlotIndexes>();
-      au.addRequired<LiveIntervals>();
-      //au.addRequiredID(SplitCriticalEdgesID);
-      au.addRequired<RegisterCoalescer>();
-      au.addRequired<CalculateSpillWeights>();
-      au.addRequired<LiveStacks>();
-      au.addPreserved<LiveStacks>();
-      au.addRequired<MachineLoopInfo>();
-      au.addPreserved<MachineLoopInfo>();
-      if (pbqpPreSplitting)
-        au.addRequired<LoopSplitter>();
-      au.addRequired<VirtRegMap>();
-      au.addRequired<RenderMachineFunction>();
-      MachineFunctionPass::getAnalysisUsage(au);
-    }
+private:
 
-    /// Perform register allocation
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+  typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+  typedef std::vector<const LiveInterval*> Node2LIMap;
+  typedef std::vector<unsigned> AllowedSet;
+  typedef std::vector<AllowedSet> AllowedSetMap;
+  typedef std::pair<unsigned, unsigned> RegPair;
+  typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+  typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+  typedef std::set<unsigned> RegSet;
 
-  private:
 
-    class LIOrdering {
-    public:
-      bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
-        return li1->reg < li2->reg;
-      }
-    };
-
-    typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
-    typedef std::vector<const LiveInterval*> Node2LIMap;
-    typedef std::vector<unsigned> AllowedSet;
-    typedef std::vector<AllowedSet> AllowedSetMap;
-    typedef std::set<unsigned> RegSet;
-    typedef std::pair<unsigned, unsigned> RegPair;
-    typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
-
-    typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
-
-    typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
-
-    MachineFunction *mf;
-    const TargetMachine *tm;
-    const TargetRegisterInfo *tri;
-    const TargetInstrInfo *tii;
-    const MachineLoopInfo *loopInfo;
-    MachineRegisterInfo *mri;
-    RenderMachineFunction *rmf;
-
-    LiveIntervals *lis;
-    LiveStacks *lss;
-    VirtRegMap *vrm;
-
-    LI2NodeMap li2Node;
-    Node2LIMap node2LI;
-    AllowedSetMap allowedSets;
-    LiveIntervalSet vregIntervalsToAlloc,
-                    emptyVRegIntervals;
-    NodeVector problemNodes;
-
-
-    /// Builds a PBQP cost vector.
-    template <typename RegContainer>
-    PBQP::Vector buildCostVector(unsigned vReg,
-                                 const RegContainer &allowed,
-                                 const CoalesceMap &cealesces,
-                                 PBQP::PBQPNum spillCost) const;
-
-    /// \brief Builds a PBQP interference matrix.
-    ///
-    /// @return Either a pointer to a non-zero PBQP matrix representing the
-    ///         allocation option costs, or a null pointer for a zero matrix.
-    ///
-    /// Expects allowed sets for two interfering LiveIntervals. These allowed
-    /// sets should contain only allocable registers from the LiveInterval's
-    /// register class, with any interfering pre-colored registers removed.
-    template <typename RegContainer>
-    PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
-                                          const RegContainer &allowed2) const;
-
-    ///
-    /// Expects allowed sets for two potentially coalescable LiveIntervals,
-    /// and an estimated benefit due to coalescing. The allowed sets should
-    /// contain only allocable registers from the LiveInterval's register
-    /// classes, with any interfering pre-colored registers removed.
-    template <typename RegContainer>
-    PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
-                                        const RegContainer &allowed2,
-                                        PBQP::PBQPNum cBenefit) const;
-
-    /// \brief Finds coalescing opportunities and returns them as a map.
-    ///
-    /// Any entries in the map are guaranteed coalescable, even if their
-    /// corresponding live intervals overlap.
-    CoalesceMap findCoalesces();
-
-    /// \brief Finds the initial set of vreg intervals to allocate.
-    void findVRegIntervalsToAlloc();
-
-    /// \brief Constructs a PBQP problem representation of the register
-    /// allocation problem for this function.
-    ///
-    /// @return a PBQP solver object for the register allocation problem.
-    PBQP::Graph constructPBQPProblem();
-
-    /// \brief Adds a stack interval if the given live interval has been
-    /// spilled. Used to support stack slot coloring.
-    void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
-    /// \brief Given a solved PBQP problem maps this solution back to a register
-    /// assignment.
-    bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
-
-    /// \brief Postprocessing before final spilling. Sets basic block "live in"
-    /// variables.
-    void finalizeAlloc() const;
-
-  };
-
-  char PBQPRegAlloc::ID = 0;
-}
+  std::auto_ptr<PBQPBuilder> builder;
 
+  MachineFunction *mf;
+  const TargetMachine *tm;
+  const TargetRegisterInfo *tri;
+  const TargetInstrInfo *tii;
+  const MachineLoopInfo *loopInfo;
+  MachineRegisterInfo *mri;
+  RenderMachineFunction *rmf;
 
-template <typename RegContainer>
-PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
-                                           const RegContainer &allowed,
-                                           const CoalesceMap &coalesces,
-                                           PBQP::PBQPNum spillCost) const {
+  LiveIntervals *lis;
+  LiveStacks *lss;
+  VirtRegMap *vrm;
 
-  typedef typename RegContainer::const_iterator AllowedItr;
+  RegSet vregsToAlloc, emptyIntervalVRegs;
 
-  // Allocate vector. Additional element (0th) used for spill option
-  PBQP::Vector v(allowed.size() + 1, 0);
+  /// \brief Finds the initial set of vreg intervals to allocate.
+  void findVRegIntervalsToAlloc();
 
-  v[0] = spillCost;
+  /// \brief Adds a stack interval if the given live interval has been
+  /// spilled. Used to support stack slot coloring.
+  void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
 
-  // Iterate over the allowed registers inserting coalesce benefits if there
-  // are any.
-  unsigned ai = 0;
-  for (AllowedItr itr = allowed.begin(), end = allowed.end();
-       itr != end; ++itr, ++ai) {
+  /// \brief Given a solved PBQP problem maps this solution back to a register
+  /// assignment.
+  bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                         const PBQP::Solution &solution);
 
-    unsigned pReg = *itr;
+  /// \brief Postprocessing before final spilling. Sets basic block "live in"
+  /// variables.
+  void finalizeAlloc() const;
 
-    CoalesceMap::const_iterator cmItr =
-      coalesces.find(RegPair(vReg, pReg));
+};
 
-    // No coalesce - on to the next preg.
-    if (cmItr == coalesces.end())
-      continue;
+char RegAllocPBQP::ID = 0;
 
-    // We have a coalesce - insert the benefit.
-    v[ai + 1] = -cmItr->second;
-  }
+} // End anonymous namespace.
 
-  return v;
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+  Node2VReg::const_iterator vregItr = node2VReg.find(node);
+  assert(vregItr != node2VReg.end() && "No vreg for node.");
+  return vregItr->second;
 }
 
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
-      const RegContainer &allowed1, const RegContainer &allowed2) const {
-
-  typedef typename RegContainer::const_iterator RegContainerIterator;
-
-  // Construct a PBQP matrix representing the cost of allocation options. The
-  // rows and columns correspond to the allocation options for the two live
-  // intervals.  Elements will be infinite where corresponding registers alias,
-  // since we cannot allocate aliasing registers to interfering live intervals.
-  // All other elements (non-aliasing combinations) will have zero cost. Note
-  // that the spill option (element 0,0) has zero cost, since we can allocate
-  // both intervals to memory safely (the cost for each individual allocation
-  // to memory is accounted for by the cost vectors for each live interval).
-  PBQP::Matrix *m =
-    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
-  // Assume this is a zero matrix until proven otherwise.  Zero matrices occur
-  // between interfering live ranges with non-overlapping register sets (e.g.
-  // non-overlapping reg classes, or disjoint sets of allowed regs within the
-  // same class). The term "overlapping" is used advisedly: sets which do not
-  // intersect, but contain registers which alias, will have non-zero matrices.
-  // We optimize zero matrices away to improve solver speed.
-  bool isZeroMatrix = true;
-
-
-  // Row index. Starts at 1, since the 0th row is for the spill option, which
-  // is always zero.
-  unsigned ri = 1;
-
-  // Iterate over allowed sets, insert infinities where required.
-  for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
-       a1Itr != a1End; ++a1Itr) {
-
-    // Column index, starts at 1 as for row index.
-    unsigned ci = 1;
-    unsigned reg1 = *a1Itr;
-
-    for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
-         a2Itr != a2End; ++a2Itr) {
-
-      unsigned reg2 = *a2Itr;
-
-      // If the row/column regs are identical or alias insert an infinity.
-      if (tri->regsOverlap(reg1, reg2)) {
-        (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
-        isZeroMatrix = false;
-      }
-
-      ++ci;
-    }
-
-    ++ri;
-  }
-
-  // If this turns out to be a zero matrix...
-  if (isZeroMatrix) {
-    // free it and return null.
-    delete m;
-    return 0;
-  }
-
-  // ...otherwise return the cost matrix.
-  return m;
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+  VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+  assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+  return nodeItr->second;
+  
 }
 
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
-      const RegContainer &allowed1, const RegContainer &allowed2,
-      PBQP::PBQPNum cBenefit) const {
-
-  typedef typename RegContainer::const_iterator RegContainerIterator;
-
-  // Construct a PBQP Matrix representing the benefits of coalescing. As with
-  // interference matrices the rows and columns represent allowed registers
-  // for the LiveIntervals which are (potentially) to be coalesced. The amount
-  // -cBenefit will be placed in any element representing the same register
-  // for both intervals.
-  PBQP::Matrix *m =
-    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
-  // Reset costs to zero.
-  m->reset(0);
-
-  // Assume the matrix is zero till proven otherwise. Zero matrices will be
-  // optimized away as in the interference case.
-  bool isZeroMatrix = true;
-
-  // Row index. Starts at 1, since the 0th row is for the spill option, which
-  // is always zero.
-  unsigned ri = 1;
-
-  // Iterate over the allowed sets, insert coalescing benefits where
-  // appropriate.
-  for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
-       a1Itr != a1End; ++a1Itr) {
-
-    // Column index, starts at 1 as for row index.
-    unsigned ci = 1;
-    unsigned reg1 = *a1Itr;
-
-    for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
-         a2Itr != a2End; ++a2Itr) {
-
-      // If the row and column represent the same register insert a beneficial
-      // cost to preference this allocation - it would allow us to eliminate a
-      // move instruction.
-      if (reg1 == *a2Itr) {
-        (*m)[ri][ci] = -cBenefit;
-        isZeroMatrix = false;
-      }
-
-      ++ci;
-    }
-
-    ++ri;
-  }
-
-  // If this turns out to be a zero matrix...
-  if (isZeroMatrix) {
-    // ...free it and return null.
-    delete m;
-    return 0;
-  }
-
-  return m;
+const PBQPRAProblem::AllowedSet&
+  PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+  AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+  assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+  const AllowedSet &allowedSet = allowedSetItr->second;
+  return allowedSet;
 }
 
-PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
-
-  typedef MachineFunction::const_iterator MFIterator;
-  typedef MachineBasicBlock::const_iterator MBBIterator;
-  typedef LiveInterval::const_vni_iterator VNIIterator;
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+  assert(isPRegOption(vreg, option) && "Not a preg option.");
 
-  CoalesceMap coalescesFound;
+  const AllowedSet& allowedSet = getAllowedSet(vreg);
+  assert(option <= allowedSet.size() && "Option outside allowed set.");
+  return allowedSet[option - 1];
+}
 
-  // To find coalesces we need to iterate over the function looking for
-  // copy instructions.
-  for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
-       bbItr != bbEnd; ++bbItr) {
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
 
-    const MachineBasicBlock *mbb = &*bbItr;
+  typedef std::vector<const LiveInterval*> LIVector;
 
-    for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
-         iItr != iEnd; ++iItr) {
+  MachineRegisterInfo *mri = &mf->getRegInfo();
+  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();  
 
-      const MachineInstr *instr = &*iItr;
+  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  PBQP::Graph &g = p->getGraph();
+  RegSet pregs;
 
-      // If this isn't a copy then continue to the next instruction.
-      if (!instr->isCopy())
-        continue;
-
-      unsigned srcReg = instr->getOperand(1).getReg();
-      unsigned dstReg = instr->getOperand(0).getReg();
+  // Collect the set of preg intervals, record that they're used in the MF.
+  for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+      pregs.insert(itr->first);
+      mri->setPhysRegUsed(itr->first);
+    }
+  }
 
-      // If the registers are already the same our job is nice and easy.
-      if (dstReg == srcReg)
-        continue;
+  BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+  // Iterate over vregs. 
+  for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+       vregItr != vregEnd; ++vregItr) {
+    unsigned vreg = *vregItr;
+    const TargetRegisterClass *trc = mri->getRegClass(vreg);
+    const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+    // Compute an initial allowed set for the current vreg.
+    typedef std::vector<unsigned> VRAllowed;
+    VRAllowed vrAllowed;
+    for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
+                                       aoEnd = trc->allocation_order_end(*mf);
+         aoItr != aoEnd; ++aoItr) {
+      unsigned preg = *aoItr;
+      if (!reservedRegs.test(preg)) {
+        vrAllowed.push_back(preg);
+      }
+    }
 
-      bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
-           dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+    // Remove any physical registers which overlap.
+    for (RegSet::const_iterator pregItr = pregs.begin(),
+                                pregEnd = pregs.end();
+         pregItr != pregEnd; ++pregItr) {
+      unsigned preg = *pregItr;
+      const LiveInterval *pregLI = &lis->getInterval(preg);
 
-      // If both registers are physical then we can't coalesce.
-      if (srcRegIsPhysical && dstRegIsPhysical)
+      if (pregLI->empty()) {
         continue;
+      }
 
-      // If it's a copy that includes two virtual register but the source and
-      // destination classes differ then we can't coalesce.
-      if (!srcRegIsPhysical && !dstRegIsPhysical &&
-          mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
+      if (!vregLI->overlaps(*pregLI)) {
         continue;
-
-      // If one is physical and one is virtual, check that the physical is
-      // allocatable in the class of the virtual.
-      if (srcRegIsPhysical && !dstRegIsPhysical) {
-        const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
-        if (std::find(dstRegClass->allocation_order_begin(*mf),
-                      dstRegClass->allocation_order_end(*mf), srcReg) ==
-            dstRegClass->allocation_order_end(*mf))
-          continue;
       }
-      if (!srcRegIsPhysical && dstRegIsPhysical) {
-        const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
-        if (std::find(srcRegClass->allocation_order_begin(*mf),
-                      srcRegClass->allocation_order_end(*mf), dstReg) ==
-            srcRegClass->allocation_order_end(*mf))
-          continue;
-      }
-
-      // If we've made it here we have a copy with compatible register classes.
-      // We can probably coalesce, but we need to consider overlap.
-      const LiveInterval *srcLI = &lis->getInterval(srcReg),
-                         *dstLI = &lis->getInterval(dstReg);
 
-      if (srcLI->overlaps(*dstLI)) {
-        // Even in the case of an overlap we might still be able to coalesce,
-        // but we need to make sure that no definition of either range occurs
-        // while the other range is live.
+      // Remove the register from the allowed set.
+      VRAllowed::iterator eraseItr =
+        std::find(vrAllowed.begin(), vrAllowed.end(), preg);
 
-        // Otherwise start by assuming we're ok.
-        bool badDef = false;
-
-        // Test all defs of the source range.
-        for (VNIIterator
-               vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
-               vniItr != vniEnd; ++vniItr) {
+      if (eraseItr != vrAllowed.end()) {
+        vrAllowed.erase(eraseItr);
+      }
 
-          // If we find a poorly defined def we err on the side of caution.
-          if (!(*vniItr)->def.isValid()) {
-            badDef = true;
-            break;
-          }
+      // Also remove any aliases.
+      const unsigned *aliasItr = tri->getAliasSet(preg);
+      if (aliasItr != 0) {
+        for (; *aliasItr != 0; ++aliasItr) {
+          VRAllowed::iterator eraseItr =
+            std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
 
-          // If we find a def that kills the coalescing opportunity then
-          // record it and break from the loop.
-          if (dstLI->liveAt((*vniItr)->def)) {
-            badDef = true;
-            break;
+          if (eraseItr != vrAllowed.end()) {
+            vrAllowed.erase(eraseItr);
           }
         }
+      }
+    }
 
-        // If we have a bad def give up, continue to the next instruction.
-        if (badDef)
-          continue;
-
-        // Otherwise test definitions of the destination range.
-        for (VNIIterator
-               vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
-               vniItr != vniEnd; ++vniItr) {
+    // Construct the node.
+    PBQP::Graph::NodeItr node = 
+      g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
 
-          // We want to make sure we skip the copy instruction itself.
-          if ((*vniItr)->getCopy() == instr)
-            continue;
+    // Record the mapping and allowed set in the problem.
+    p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
 
-          if (!(*vniItr)->def.isValid()) {
-            badDef = true;
-            break;
-          }
+    PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+        vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
 
-          if (srcLI->liveAt((*vniItr)->def)) {
-            badDef = true;
-            break;
-          }
-        }
+    addSpillCosts(g.getNodeCosts(node), spillCost);
+  }
 
-        // As before a bad def we give up and continue to the next instr.
-        if (badDef)
-          continue;
+  for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+         vr1Itr != vrEnd; ++vr1Itr) {
+    unsigned vr1 = *vr1Itr;
+    const LiveInterval &l1 = lis->getInterval(vr1);
+    const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+    for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+         vr2Itr != vrEnd; ++vr2Itr) {
+      unsigned vr2 = *vr2Itr;
+      const LiveInterval &l2 = lis->getInterval(vr2);
+      const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+      assert(!l2.empty() && "Empty interval in vreg set?");
+      if (l1.overlaps(l2)) {
+        PBQP::Graph::EdgeItr edge =
+          g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+                    PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+        addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
       }
-
-      // If we make it to here then either the ranges didn't overlap, or they
-      // did, but none of their definitions would prevent us from coalescing.
-      // We're good to go with the coalesce.
-
-      float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
-
-      coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
-      coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
     }
-
   }
 
-  return coalescesFound;
+  return p;
 }
 
-void PBQPRegAlloc::findVRegIntervalsToAlloc() {
-
-  // Iterate over all live ranges.
-  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
-       itr != end; ++itr) {
-
-    // Ignore physical ones.
-    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
-      continue;
-
-    LiveInterval *li = itr->second;
-
-    // If this live interval is non-empty we will use pbqp to allocate it.
-    // Empty intervals we allocate in a simple post-processing stage in
-    // finalizeAlloc.
-    if (!li->empty()) {
-      vregIntervalsToAlloc.insert(li);
-    }
-    else {
-      emptyVRegIntervals.insert(li);
-    }
-  }
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+                                PBQP::PBQPNum spillCost) {
+  costVec[0] = spillCost;
 }
 
-PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
-
-  typedef std::vector<const LiveInterval*> LIVector;
-  typedef std::vector<unsigned> RegVector;
+void PBQPBuilder::addInterferenceCosts(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    const TargetRegisterInfo *tri) {
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
 
-  // This will store the physical intervals for easy reference.
-  LIVector physIntervals;
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
 
-  // Start by clearing the old node <-> live interval mappings & allowed sets
-  li2Node.clear();
-  node2LI.clear();
-  allowedSets.clear();
-
-  // Populate physIntervals, update preg use:
-  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
-       itr != end; ++itr) {
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
 
-    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
-      physIntervals.push_back(itr->second);
-      mri->setPhysRegUsed(itr->second->reg);
+      if (tri->regsOverlap(preg1, preg2)) {
+        costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+      }
     }
   }
+}
 
-  // Iterate over vreg intervals, construct live interval <-> node number
-  //  mappings.
-  for (LiveIntervalSet::const_iterator
-       itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
-       itr != end; ++itr) {
-    const LiveInterval *li = *itr;
-
-    li2Node[li] = node2LI.size();
-    node2LI.push_back(li);
-  }
-
-  // Get the set of potential coalesces.
-  CoalesceMap coalesces;
-
-  if (pbqpCoalescing) {
-    coalesces = findCoalesces();
-  }
-
-  // Construct a PBQP solver for this problem
-  PBQP::Graph problem;
-  problemNodes.resize(vregIntervalsToAlloc.size());
-
-  // Resize allowedSets container appropriately.
-  allowedSets.resize(vregIntervalsToAlloc.size());
-
-  BitVector ReservedRegs = tri->getReservedRegs(*mf);
-
-  // Iterate over virtual register intervals to compute allowed sets...
-  for (unsigned node = 0; node < node2LI.size(); ++node) {
-
-    // Grab pointers to the interval and its register class.
-    const LiveInterval *li = node2LI[node];
-    const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+                                                MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
 
-    // Start by assuming all allocable registers in the class are allowed...
-    RegVector liAllowed;
-    TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
-    TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
-    for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
-      if (!ReservedRegs.test(*it))
-        liAllowed.push_back(*it);
+  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  PBQP::Graph &g = p->getGraph();
 
-    // Eliminate the physical registers which overlap with this range, along
-    // with all their aliases.
-    for (LIVector::iterator pItr = physIntervals.begin(),
-       pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+  const TargetMachine &tm = mf->getTarget();
+  CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
 
-      if (!li->overlaps(**pItr))
-        continue;
+  // Scan the machine function and add a coalescing cost whenever CoalescerPair
+  // gives the Ok.
+  for (MachineFunction::const_iterator mbbItr = mf->begin(),
+                                       mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    const MachineBasicBlock *mbb = &*mbbItr;
 
-      unsigned pReg = (*pItr)->reg;
-
-      // If we get here then the live intervals overlap, but we're still ok
-      // if they're coalescable.
-      if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
-        continue;
+    for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+                                           miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      const MachineInstr *mi = &*miItr;
 
-      // If we get here then we have a genuine exclusion.
+      if (!cp.setRegisters(mi)) {
+        continue; // Not coalescable.
+      }
 
-      // Remove the overlapping reg...
-      RegVector::iterator eraseItr =
-        std::find(liAllowed.begin(), liAllowed.end(), pReg);
+      if (cp.getSrcReg() == cp.getDstReg()) {
+        continue; // Already coalesced.
+      }
 
-      if (eraseItr != liAllowed.end())
-        liAllowed.erase(eraseItr);
+      unsigned dst = cp.getDstReg(),
+               src = cp.getSrcReg();
 
-      const unsigned *aliasItr = tri->getAliasSet(pReg);
+      const float copyFactor = 0.5; // Cost of copy relative to load. Current
+      // value plucked randomly out of the air.
+                                      
+      PBQP::PBQPNum cBenefit =
+        copyFactor * LiveIntervals::getSpillWeight(false, true,
+                                                   loopInfo->getLoopDepth(mbb));
 
-      if (aliasItr != 0) {
-        // ...and its aliases.
-        for (; *aliasItr != 0; ++aliasItr) {
-          RegVector::iterator eraseItr =
-            std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+      if (cp.isPhys()) {
+        if (!lis->isAllocatable(dst)) {
+          continue;
+        }
 
-          if (eraseItr != liAllowed.end()) {
-            liAllowed.erase(eraseItr);
+        const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+        unsigned pregOpt = 0;  
+        while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+          ++pregOpt;
+        }
+        if (pregOpt < allowed.size()) {
+          ++pregOpt; // +1 to account for spill option.
+          PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+          addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+        }
+      } else {
+        const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+        const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+        PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+        PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+        PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+        if (edge == g.edgesEnd()) {
+          edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+                                                      allowed2->size() + 1,
+                                                      0));
+        } else {
+          if (g.getEdgeNode1(edge) == node2) {
+            std::swap(node1, node2);
+            std::swap(allowed1, allowed2);
           }
         }
+            
+        addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+                           cBenefit);
       }
     }
+  }
 
-    // Copy the allowed set into a member vector for use when constructing cost
-    // vectors & matrices, and mapping PBQP solutions back to assignments.
-    allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+  return p;
+}
 
-    // Set the spill cost to the interval weight, or epsilon if the
-    // interval weight is zero
-    PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
-        li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+                                                   unsigned pregOption,
+                                                   PBQP::PBQPNum benefit) {
+  costVec[pregOption] += -benefit;
+}
 
-    // Build a cost vector for this interval.
-    problemNodes[node] =
-      problem.addNode(
-        buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    PBQP::PBQPNum benefit) {
 
-  }
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
 
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
+
+      if (preg1 == preg2) {
+        costMat[i + 1][j + 1] += -benefit;
+      } 
+    }
+  }
+}
 
-  // Now add the cost matrices...
-  for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
-    const LiveInterval *li = node2LI[node1];
 
-    // Test for live range overlaps and insert interference matrices.
-    for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
-      const LiveInterval *li2 = node2LI[node2];
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<SlotIndexes>();
+  au.addPreserved<SlotIndexes>();
+  au.addRequired<LiveIntervals>();
+  //au.addRequiredID(SplitCriticalEdgesID);
+  au.addRequired<RegisterCoalescer>();
+  au.addRequired<CalculateSpillWeights>();
+  au.addRequired<LiveStacks>();
+  au.addPreserved<LiveStacks>();
+  au.addRequired<MachineLoopInfo>();
+  au.addPreserved<MachineLoopInfo>();
+  if (pbqpPreSplitting)
+    au.addRequired<LoopSplitter>();
+  au.addRequired<VirtRegMap>();
+  au.addRequired<RenderMachineFunction>();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
 
-      CoalesceMap::const_iterator cmItr =
-        coalesces.find(RegPair(li->reg, li2->reg));
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
 
-      PBQP::Matrix *m = 0;
+  // Iterate over all live ranges.
+  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
 
-      if (cmItr != coalesces.end()) {
-        m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
-                                  cmItr->second);
-      }
-      else if (li->overlaps(*li2)) {
-        m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
-      }
+    // Ignore physical ones.
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+      continue;
 
-      if (m != 0) {
-        problem.addEdge(problemNodes[node1],
-                        problemNodes[node2],
-                        *m);
+    LiveInterval *li = itr->second;
 
-        delete m;
-      }
+    // If this live interval is non-empty we will use pbqp to allocate it.
+    // Empty intervals we allocate in a simple post-processing stage in
+    // finalizeAlloc.
+    if (!li->empty()) {
+      vregsToAlloc.insert(li->reg);
+    } else {
+      emptyIntervalVRegs.insert(li->reg);
     }
   }
-
-  assert(problem.getNumNodes() == allowedSets.size());
-/*
-  std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
-            << problem.getNumEdges() << " edges.\n";
-
-  problem.printDot(std::cerr);
-*/
-  // We're done, PBQP problem constructed - return it.
-  return problem;
 }
 
-void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
                                     MachineRegisterInfo* mri) {
   int stackSlot = vrm->getStackSlot(spilled->reg);
 
-  if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+  if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
     return;
+  }
 
   const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
   LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
 
   VNInfo *vni;
-  if (stackInterval.getNumValNums() != 0)
+  if (stackInterval.getNumValNums() != 0) {
     vni = stackInterval.getValNumInfo(0);
-  else
+  } else {
     vni = stackInterval.getNextValue(
-      SlotIndex(), 0, false, lss->getVNInfoAllocator());
+      SlotIndex(), 0, lss->getVNInfoAllocator());
+  }
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
 }
 
-bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
-
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                                     const PBQP::Solution &solution) {
   // Set to true if we have any spills
   bool anotherRoundNeeded = false;
 
   // Clear the existing allocation.
   vrm->clearAllVirt();
 
-  // Iterate over the nodes mapping the PBQP solution to a register assignment.
-  for (unsigned node = 0; node < node2LI.size(); ++node) {
-    unsigned virtReg = node2LI[node]->reg,
-             allocSelection = solution.getSelection(problemNodes[node]);
-
-
-    // If the PBQP solution is non-zero it's a physical register...
-    if (allocSelection != 0) {
-      // Get the physical reg, subtracting 1 to account for the spill option.
-      unsigned physReg = allowedSets[node][allocSelection - 1];
-
-      DEBUG(dbgs() << "VREG " << virtReg << " -> "
-                   << tri->getName(physReg) << "\n");
-
-      assert(physReg != 0);
-
-      // Add to the virt reg map and update the used phys regs.
-      vrm->assignVirt2Phys(virtReg, physReg);
-    }
-    // ...Otherwise it's a spill.
-    else {
-
-      // Make sure we ignore this virtual reg on the next round
-      // of allocation
-      vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
-
-      // Insert spill ranges for this live range
-      const LiveInterval *spillInterval = node2LI[node];
-      double oldSpillWeight = spillInterval->weight;
+  const PBQP::Graph &g = problem.getGraph();
+  // Iterate over the nodes mapping the PBQP solution to a register
+  // assignment.
+  for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+                                 nodeEnd = g.nodesEnd();
+       node != nodeEnd; ++node) {
+    unsigned vreg = problem.getVRegForNode(node);
+    unsigned alloc = solution.getSelection(node);
+
+    if (problem.isPRegOption(vreg, alloc)) {
+      unsigned preg = problem.getPRegForOption(vreg, alloc);    
+      DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+      assert(preg != 0 && "Invalid preg selected.");
+      vrm->assignVirt2Phys(vreg, preg);      
+    } else if (problem.isSpillOption(vreg, alloc)) {
+      vregsToAlloc.erase(vreg);
+      const LiveInterval* spillInterval = &lis->getInterval(vreg);
+      double oldWeight = spillInterval->weight;
       SmallVector<LiveInterval*, 8> spillIs;
       rmf->rememberUseDefs(spillInterval);
       std::vector<LiveInterval*> newSpills =
@@ -768,42 +541,42 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
       addStackInterval(spillInterval, mri);
       rmf->rememberSpills(spillInterval, newSpills);
 
-      (void) oldSpillWeight;
-      DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
-                   << oldSpillWeight << ", New vregs: ");
+      (void) oldWeight;
+      DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+                   << oldWeight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
       // allocate.
       for (std::vector<LiveInterval*>::const_iterator
            itr = newSpills.begin(), end = newSpills.end();
            itr != end; ++itr) {
-
         assert(!(*itr)->empty() && "Empty spill range.");
-
         DEBUG(dbgs() << (*itr)->reg << " ");
-
-        vregIntervalsToAlloc.insert(*itr);
+        vregsToAlloc.insert((*itr)->reg);
       }
 
       DEBUG(dbgs() << ")\n");
 
       // We need another round if spill intervals were added.
       anotherRoundNeeded |= !newSpills.empty();
+    } else {
+      assert(false && "Unknown allocation option.");
     }
   }
 
   return !anotherRoundNeeded;
 }
 
-void PBQPRegAlloc::finalizeAlloc() const {
+
+void RegAllocPBQP::finalizeAlloc() const {
   typedef LiveIntervals::iterator LIIterator;
   typedef LiveInterval::Ranges::const_iterator LRIterator;
 
   // First allocate registers for the empty intervals.
-  for (LiveIntervalSet::const_iterator
-         itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+  for (RegSet::const_iterator
+         itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
          itr != end; ++itr) {
-    LiveInterval *li = *itr;
+    LiveInterval *li = &lis->getInterval(*itr);
 
     unsigned physReg = vrm->getRegAllocPref(li->reg);
 
@@ -828,11 +601,9 @@ void PBQPRegAlloc::finalizeAlloc() const {
     // Get the physical register for this interval
     if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
       reg = li->reg;
-    }
-    else if (vrm->isAssignedReg(li->reg)) {
+    } else if (vrm->isAssignedReg(li->reg)) {
       reg = vrm->getPhys(li->reg);
-    }
-    else {
+    } else {
       // Ranges which are assigned a stack slot only are ignored.
       continue;
     }
@@ -849,7 +620,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
       // Find the set of basic blocks which this range is live into...
       if (lis->findLiveInMBBs(lrItr->start, lrItr->end,  liveInMBBs)) {
         // And add the physreg for this interval to their live-in sets.
-        for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+        for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
           if (liveInMBBs[i] != entryMBB) {
             if (!liveInMBBs[i]->isLiveIn(reg)) {
               liveInMBBs[i]->addLiveIn(reg);
@@ -863,7 +634,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
 
 }
 
-bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
   mf = &MF;
   tm = &mf->getTarget();
@@ -894,7 +665,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   findVRegIntervalsToAlloc();
 
   // If there are non-empty intervals allocate them using pbqp.
-  if (!vregIntervalsToAlloc.empty()) {
+  if (!vregsToAlloc.empty()) {
 
     bool pbqpAllocComplete = false;
     unsigned round = 0;
@@ -902,11 +673,13 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      PBQP::Graph problem = constructPBQPProblem();
+      std::auto_ptr<PBQPRAProblem> problem =
+        builder->build(mf, lis, loopInfo, vregsToAlloc);
       PBQP::Solution solution =
-        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
+        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+          problem->getGraph());
 
-      pbqpAllocComplete = mapPBQPToRegAlloc(solution);
+      pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
 
       ++round;
     }
@@ -917,12 +690,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   rmf->renderMachineFunction("After PBQP register allocation.", vrm);
 
-  vregIntervalsToAlloc.clear();
-  emptyVRegIntervals.clear();
-  li2Node.clear();
-  node2LI.clear();
-  allowedSets.clear();
-  problemNodes.clear();
+  vregsToAlloc.clear();
+  emptyIntervalVRegs.clear();
 
   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
@@ -934,9 +703,18 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
-FunctionPass* llvm::createPBQPRegisterAllocator() {
-  return new PBQPRegAlloc();
+FunctionPass* llvm::createPBQPRegisterAllocator(
+                                           std::auto_ptr<PBQPBuilder> builder) {
+  return new RegAllocPBQP(builder);
 }
 
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+  if (pbqpCoalescing) {
+    return createPBQPRegisterAllocator(
+             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+  } // else
+  return createPBQPRegisterAllocator(
+           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
 
 #undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 02b5539f0f4f..407559a211a0 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -24,7 +24,8 @@
 using namespace llvm;
 
 // Register the RegisterCoalescer interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", 
+                          SimpleRegisterCoalescing)
 char RegisterCoalescer::ID = 0;
 
 // RegisterCoalescer destructor: DO NOT move this to the header file
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 93426eecbbc1..cbfd5a23d63d 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -30,9 +30,14 @@
 using namespace llvm;
 
 char RenderMachineFunction::ID = 0;
-INITIALIZE_PASS(RenderMachineFunction, "rendermf",
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
                 "Render machine functions (and related info) to HTML pages",
-                false, false);
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+                "Render machine functions (and related info) to HTML pages",
+                false, false)
 
 static cl::opt<std::string>
 outputFileSuffix("rmf-file-suffix",
@@ -458,14 +463,9 @@ namespace llvm {
          liItr != liEnd; ++liItr) {
       LiveInterval *li = liItr->second;
 
-      const TargetRegisterClass *liTRC;
-
       if (TargetRegisterInfo::isPhysicalRegister(li->reg))
         continue;
       
-      liTRC = mri->getRegClass(li->reg);
-     
-
       // For all ranges in the current interal.
       for (LiveInterval::iterator lrItr = li->begin(),
              lrEnd = li->end();
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
index 8d56a8292ac5..85719923c0c6 100644
--- a/lib/CodeGen/RenderMachineFunction.h
+++ b/lib/CodeGen/RenderMachineFunction.h
@@ -202,7 +202,9 @@ namespace llvm {
   public:
     static char ID;
 
-    RenderMachineFunction() : MachineFunctionPass(ID) {}
+    RenderMachineFunction() : MachineFunctionPass(ID) {
+      initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 7d39dc496afe..3388889c9e91 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "pre-RA-sched"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
 
 ScheduleDAG::~ScheduleDAG() {}
 
+/// getInstrDesc helper to handle SDNodes.
+const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
+}
+
 /// dump - dump the schedule.
 void ScheduleDAG::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -68,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
 /// addPred - This adds the specified edge as a pred of the current node if
 /// not already.  It also adds the current node as a successor of the
 /// specified node.
-void SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D) {
   // If this node already has this depenence, don't add a redundant one.
   for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
        I != E; ++I)
     if (*I == D)
-      return;
+      return false;
   // Now add a corresponding succ to N.
   SDep P = D;
   P.setSUnit(this);
@@ -99,6 +106,7 @@ void SUnit::addPred(const SDep &D) {
     this->setDepthDirty();
     N->setHeightDirty();
   }
+  return true;
 }
 
 /// removePred - This removes the specified edge as a pred of the current
@@ -278,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 
   dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
   dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  dbgs() << "  # rdefs left       : " << NumRegDefsLeft << "\n";
   dbgs() << "  Latency            : " << Latency << "\n";
   dbgs() << "  Depth              : " << Depth << "\n";
   dbgs() << "  Height             : " << Height << "\n";
@@ -492,7 +501,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
 /// all nodes affected by the edge insertion. These nodes will later get new
 /// topological indexes by means of the Shift method.
 void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
-                                     bool& HasLoop) {
+                                     bool &HasLoop) {
   std::vector<const SUnit*> WorkList;
   WorkList.reserve(SUnits.size());
 
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 0a2fb3796a42..6b7a8c6491bd 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -57,7 +57,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
       assert(I->getReg() && "Unknown physical register!");
       unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
       bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
-      isNew = isNew; // Silence compiler warning.
+      (void)isNew; // Silence compiler warning.
       assert(isNew && "Node emitted out of order - early");
       BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
         .addReg(I->getReg());
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ea93dd5c6663..f17023eabb72 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -16,6 +16,7 @@
 #include "ScheduleDAGInstrs.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -32,9 +33,9 @@ using namespace llvm;
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo &mli,
                                      const MachineDominatorTree &mdt)
-  : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
-    Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
-  MFI = mf.getFrameInfo();
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+    InstrItins(mf.getTarget().getInstrItineraryData()),
+    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
   DbgValueVec.clear();
 }
 
@@ -78,12 +79,12 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
   } while (1);
 }
 
-/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
 static const Value *getUnderlyingObject(const Value *V) {
   // First just call Value::getUnderlyingObject to let it do what it does.
   do {
-    V = V->getUnderlyingObject();
+    V = GetUnderlyingObject(V);
     // If it found an inttoptr, use special code to continue climing.
     if (Operator::getOpcode(V) != Instruction::IntToPtr)
       break;
@@ -141,6 +142,46 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
     }
 }
 
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+  ExitSU.setInstr(ExitMI);
+  bool AllDepKnown = ExitMI &&
+    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+  if (ExitMI && AllDepKnown) {
+    // If it's a call or a barrier, add dependencies on the defs and uses of
+    // instruction.
+    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = ExitMI->getOperand(i);
+      if (!MO.isReg() || MO.isDef()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+      Uses[Reg].push_back(&ExitSU);
+    }
+  } else {
+    // For others, e.g. fallthrough, conditional branch, assume the exit
+    // uses all the registers that are livein to the successor blocks.
+    SmallSet<unsigned, 8> Seen;
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI)
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {    
+        unsigned Reg = *I;
+        if (Seen.insert(Reg))
+          Uses[Reg].push_back(&ExitSU);
+      }
+  }
+}
+
 void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // We'll be allocating one SUnit for each instruction, plus one for
   // the region exit node.
@@ -175,6 +216,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // without emitting the info from the previous call.
   DbgValueVec.clear();
 
+  // Model data dependencies between instructions being scheduled and the
+  // ExitSU.
+  AddSchedBarrierDeps();
+
   // Walk the list of instructions, from bottom moving up.
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
        MII != MIE; --MII) {
@@ -194,6 +239,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
            "Cannot schedule terminators or labels!");
     // Create the SUnit for this MI.
     SUnit *SU = NewSUnit(MI);
+    SU->isCall = TID.isCall();
+    SU->isCommutable = TID.isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
     if (UnitLatencies)
@@ -228,6 +275,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
       for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
         SUnit *DefSU = DefList[i];
+        if (DefSU == &ExitSU)
+          continue;
         if (DefSU != SU &&
             (Kind != SDep::Output || !MO.isDead() ||
              !DefSU->getInstr()->registerDefIsDead(Reg)))
@@ -237,6 +286,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         std::vector<SUnit *> &DefList = Defs[*Alias];
         for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
           SUnit *DefSU = DefList[i];
+          if (DefSU == &ExitSU)
+            continue;
           if (DefSU != SU &&
               (Kind != SDep::Output || !MO.isDead() ||
                !DefSU->getInstr()->registerDefIsDead(*Alias)))
@@ -258,12 +309,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           // TODO: Perhaps we should get rid of
           // SpecialAddressLatency and just move this into
           // adjustSchedDependency for the targets that care about it.
-          if (SpecialAddressLatency != 0 && !UnitLatencies) {
+          if (SpecialAddressLatency != 0 && !UnitLatencies &&
+              UseSU != &ExitSU) {
             MachineInstr *UseMI = UseSU->getInstr();
             const TargetInstrDesc &UseTID = UseMI->getDesc();
             int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
             assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
-            if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+            if (RegUseIndex >= 0 &&
+                (UseTID.mayLoad() || UseTID.mayStore()) &&
                 (unsigned)RegUseIndex < UseTID.getNumOperands() &&
                 UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
               LDataLatency += SpecialAddressLatency;
@@ -357,7 +410,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() && 
          (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
@@ -446,6 +499,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         // Treat all other stores conservatively.
         goto new_alias_chain;
       }
+
+      if (!ExitSU.isPred(SU))
+        // Push store's up a bit to avoid them getting in between cmp
+        // and branches.
+        ExitSU.addPred(SDep(SU, SDep::Order, 0,
+                            /*Reg=*/0, /*isNormalMemory=*/false,
+                            /*isMustAlias=*/false,
+                            /*isArtificial=*/true));
     } else if (TID.mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
@@ -498,23 +559,22 @@ void ScheduleDAGInstrs::FinishBlock() {
 }
 
 void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-
   // Compute the latency for the node.
-  SU->Latency =
-    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+  if (!InstrItins || InstrItins->isEmpty()) {
+    SU->Latency = 1;
 
-  // Simplistic target-independent heuristic: assume that loads take
-  // extra time.
-  if (InstrItins.isEmpty())
+    // Simplistic target-independent heuristic: assume that loads take
+    // extra time.
     if (SU->getInstr()->getDesc().mayLoad())
       SU->Latency += 2;
+  } else {
+    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+  }
 }
 
 void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
                                               SDep& dep) const {
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  if (InstrItins.isEmpty())
+  if (!InstrItins || InstrItins->isEmpty())
     return;
   
   // For a data dependency with a known register...
@@ -528,14 +588,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
   MachineInstr *DefMI = Def->getInstr();
   int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
   if (DefIdx != -1) {
-    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
-                                              DefIdx);
-    if (DefCycle >= 0) {
-      MachineInstr *UseMI = Use->getInstr();
-      const unsigned UseClass = UseMI->getDesc().getSchedClass();
-
-      // For all uses of the register, calculate the maxmimum latency
-      int Latency = -1;
+    const MachineOperand &MO = DefMI->getOperand(DefIdx);
+    if (MO.isReg() && MO.isImplicit() &&
+        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+      // This is an implicit def, getOperandLatency() won't return the correct
+      // latency. e.g.
+      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+      // What we want is to compute latency between def of %D6/%D7 and use of
+      // %Q3 instead.
+      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+    }
+    MachineInstr *UseMI = Use->getInstr();
+    // For all uses of the register, calculate the maxmimum latency
+    int Latency = -1;
+    if (UseMI) {
       for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
         const MachineOperand &MO = UseMI->getOperand(i);
         if (!MO.isReg() || !MO.isUse())
@@ -544,15 +611,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
         if (MOReg != Reg)
           continue;
 
-        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
-        if (UseCycle >= 0)
-          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+                                              UseMI, i);
+        Latency = std::max(Latency, UseCycle);
       }
-
-      // If we found a latency, then replace the existing dependence latency.
-      if (Latency >= 0)
-        dep.setLatency(Latency);
+    } else {
+      // UseMI is null, then it must be a scheduling barrier.
+      if (!InstrItins || InstrItins->isEmpty())
+        return;
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
     }
+
+    // If we found a latency, then replace the existing dependence latency.
+    if (Latency >= 0)
+      dep.setLatency(Latency);
   }
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index c8f543f7146d..c878287d9c8c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -101,6 +101,7 @@ namespace llvm {
     const MachineLoopInfo &MLI;
     const MachineDominatorTree &MDT;
     const MachineFrameInfo *MFI;
+    const InstrItineraryData *InstrItins;
 
     /// Defs, Uses - Remember where defs and uses of each physical register
     /// are as we iterate upward through the instructions. This is allocated
@@ -163,6 +164,15 @@ namespace llvm {
     /// input.
     virtual void BuildSchedGraph(AliasAnalysis *AA);
 
+    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+    /// list of instructions being scheduled to scheduling barrier. We want to
+    /// make sure instructions which define registers that are either used by
+    /// the terminator or are live-out are properly scheduled. This is
+    /// especially important when the definition latency of the return value(s)
+    /// are too high to be hidden by the branch or when the liveout registers
+    /// used by instructions in the fallthrough block.
+    void AddSchedBarrierDeps();
+
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 000000000000..e6d7ded8a784
--- /dev/null
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,243 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+                           const ScheduleDAG *SchedDAG,
+                           const char *ParentDebugType) :
+  ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+  IssueCount(0) {
+
+#ifndef NDEBUG
+  DebugType = ParentDebugType;
+#endif
+
+  // Determine the maximum depth of any itinerary. This determines the
+  // depth of the scoreboard. We always make the scoreboard at least 1
+  // cycle deep to avoid dealing with the boundary condition.
+  unsigned ScoreboardDepth = 1;
+  if (ItinData && !ItinData->isEmpty()) {
+    IssueWidth = ItinData->IssueWidth;
+
+    for (unsigned idx = 0; ; ++idx) {
+      if (ItinData->isEndMarker(idx))
+        break;
+
+      const InstrStage *IS = ItinData->beginStage(idx);
+      const InstrStage *E = ItinData->endStage(idx);
+      unsigned CurCycle = 0;
+      unsigned ItinDepth = 0;
+      for (; IS != E; ++IS) {
+        unsigned StageDepth = CurCycle + IS->getCycles();
+        if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+        CurCycle += IS->getNextCycles();
+      }
+
+      // Find the next power-of-2 >= ItinDepth
+      while (ItinDepth > ScoreboardDepth) {
+        ScoreboardDepth *= 2;
+      }
+    }
+    MaxLookAhead = ScoreboardDepth;
+  }
+
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
+
+  DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+               << ScoreboardDepth << '\n');
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+  IssueCount = 0;
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
+}
+
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+  dbgs() << "Scoreboard:\n";
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
+    last--;
+
+  for (unsigned i = 0; i <= last; i++) {
+    unsigned FUs = (*this)[i];
+    dbgs() << "\t";
+    for (int j = 31; j >= 0; j--)
+      dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+    dbgs() << '\n';
+  }
+}
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+  if (IssueWidth == 0)
+    return false;
+
+  return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  if (!ItinData || ItinData->isEmpty())
+    return NoHazard;
+
+  // Note that stalls will be negative for bottom-up scheduling.
+  int cycle = Stalls;
+
+  // Use the itinerary for the underlying instruction to check for
+  // free FU's in the scoreboard at the appropriate future cycles.
+
+  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+  if (TID == NULL) {
+    // Don't check hazards for non-machineinstr Nodes.
+    return NoHazard;
+  }
+  unsigned idx = TID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
+    // We must find one of the stage's units free for every cycle the
+    // stage is occupied. FIXME it would be more accurate to find the
+    // same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      int StageCycle = cycle + (int)i;
+      if (StageCycle < 0)
+        continue;
+
+      if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+        assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+               "Scoreboard depth exceeded!");
+        // This stage was stalled beyond pipeline depth, so cannot conflict.
+        break;
+      }
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[StageCycle];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[StageCycle];
+        break;
+      }
+
+      if (!freeUnits) {
+        DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(DAG->dumpNode(SU));
+        return Hazard;
+      }
+    }
+
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+  if (!ItinData || ItinData->isEmpty())
+    return;
+
+  // Use the itinerary for the underlying instruction to reserve FU's
+  // in the scoreboard at the appropriate future cycles.
+  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+  assert(TID && "The scheduler must filter non-machineinstrs");
+  if (DAG->TII->isZeroCost(TID->Opcode))
+    return;
+
+  ++IssueCount;
+
+  unsigned cycle = 0;
+
+  unsigned idx = TID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
+    // We must reserve one of the stage's units for every cycle the
+    // stage is occupied. FIXME it would be more accurate to reserve
+    // the same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+             "Scoreboard depth exceeded!");
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
+      // reduce to a single unit
+      unsigned freeUnit = 0;
+      do {
+        freeUnit = freeUnits;
+        freeUnits = freeUnit & (freeUnit - 1);
+      } while (freeUnits);
+
+      assert(freeUnit && "No function unit available!");
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
+    }
+
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+  ReservedScoreboard.recede();
+  RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+  RequiredScoreboard.recede();
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 799988a4c862..15932c03a190 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -21,5 +21,3 @@ add_llvm_library(LLVMSelectionDAG
   TargetLowering.cpp
   TargetSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c9c4d91e9736..90356021f602 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,7 +25,6 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -43,6 +42,7 @@ STATISTIC(NodesCombined   , "Number of dag nodes combined");
 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
 
 namespace {
   static cl::opt<bool>
@@ -185,7 +185,7 @@ namespace {
     SDValue visitANY_EXTEND(SDNode *N);
     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
     SDValue visitTRUNCATE(SDNode *N);
-    SDValue visitBIT_CONVERT(SDNode *N);
+    SDValue visitBITCAST(SDNode *N);
     SDValue visitBUILD_PAIR(SDNode *N);
     SDValue visitFADD(SDNode *N);
     SDValue visitFSUB(SDNode *N);
@@ -229,12 +229,13 @@ namespace {
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
-    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue TransformFPLoadStorePair(SDNode *N);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -248,16 +249,19 @@ namespace {
     bool isAlias(SDValue Ptr1, int64_t Size1,
                  const Value *SrcValue1, int SrcValueOffset1,
                  unsigned SrcValueAlign1,
+                 const MDNode *TBAAInfo1,
                  SDValue Ptr2, int64_t Size2,
                  const Value *SrcValue2, int SrcValueOffset2,
-                 unsigned SrcValueAlign2) const;
+                 unsigned SrcValueAlign2,
+                 const MDNode *TBAAInfo2) const;
 
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
                        SDValue &Ptr, int64_t &Size,
                        const Value *&SrcValue, int &SrcValueOffset,
-                       unsigned &SrcValueAlignment) const;
+                       unsigned &SrcValueAlignment,
+                       const MDNode *&TBAAInfo) const;
 
     /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
     /// looking for a better chain (aliasing node.)
@@ -270,15 +274,15 @@ namespace {
 
     /// Run - runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
-    
+
     SelectionDAG &getDAG() const { return DAG; }
-    
+
     /// getShiftAmountTy - Returns a type large enough to hold any valid
     /// shift amount - before type legalization these can be huge.
     EVT getShiftAmountTy() {
       return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
     }
-    
+
     /// isTypeLegal - This method returns true if we are running before type
     /// legalization or if the specified VT is legal.
     bool isTypeLegal(const EVT &VT) {
@@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 
   // Replace the old value with the new one.
   ++NodesCombined;
-  DEBUG(dbgs() << "\nReplacing.2 "; 
+  DEBUG(dbgs() << "\nReplacing.2 ";
         TLO.Old.getNode()->dump(&DAG);
         dbgs() << "\nWith: ";
         TLO.New.getNode()->dump(&DAG);
@@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
     EVT MemVT = LD->getMemoryVT();
     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
-      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 
+                                                  : ISD::EXTLOAD)
       : LD->getExtensionType();
     Replace = true;
-    return DAG.getExtLoad(ExtType, PVT, dl,
+    return DAG.getExtLoad(ExtType, dl, PVT,
                           LD->getChain(), LD->getBasePtr(),
-                          LD->getSrcValue(), LD->getSrcValueOffset(),
+                          LD->getPointerInfo(),
                           MemVT, LD->isVolatile(),
                           LD->isNonTemporal(), LD->getAlignment());
   }
@@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
     unsigned ExtOpc =
       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
     return DAG.getNode(ExtOpc, dl, PVT, Op);
-  }    
+  }
   }
 
   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
@@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT MemVT = LD->getMemoryVT();
     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
-      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 
+                                                  : ISD::EXTLOAD)
       : LD->getExtensionType();
-    SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
                                    LD->getChain(), LD->getBasePtr(),
-                                   LD->getSrcValue(), LD->getSrcValueOffset(),
+                                   LD->getPointerInfo(),
                                    MemVT, LD->isVolatile(),
                                    LD->isNonTemporal(), LD->getAlignment());
     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
@@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
-    DEBUG(dbgs() << "\nReplacing.3 "; 
+    DEBUG(dbgs() << "\nReplacing.3 ";
           N->dump(&DAG);
           dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
@@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   case ISD::TRUNCATE:           return visitTRUNCATE(N);
-  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N);
+  case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   case ISD::FADD:               return visitFADD(N);
   case ISD::FSUB:               return visitFSUB(N);
@@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       }
     }
   }
-  
+
   SDValue Result;
 
   // If we've change things around then replace token factor.
@@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                        N0.getOperand(0).getOperand(1),
                                        N0.getOperand(1)));
 
+  if (N1.getOpcode() == ISD::AND) {
+    SDValue AndOp0 = N1.getOperand(0);
+    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+    // and similar xforms where the inner op is either ~0 or 0.
+    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+      DebugLoc DL = N->getDebugLoc();
+      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+    }
+  }
+
+  // add (sext i1), X -> sub X, (zext i1)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+      N0.getOperand(0).getValueType() == MVT::i1 &&
+      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+    DebugLoc DL = N->getDebugLoc();
+    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+  }
+
   return SDValue();
 }
 
@@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   if (N->hasNUsesOfValue(0, 1))
     return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
                      DAG.getNode(ISD::CARRY_FALSE,
-                                 N->getDebugLoc(), MVT::Flag));
+                                 N->getDebugLoc(), MVT::Glue));
 
   // canonicalize constant to RHS.
   if (N0C && !N1C)
@@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   // fold (addc x, 0) -> x + no carry out
   if (N1C && N1C->isNullValue())
     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
-                                        N->getDebugLoc(), MVT::Flag));
+                                        N->getDebugLoc(), MVT::Glue));
 
   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   APInt LHSZero, LHSOne;
@@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
         (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
       return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
                        DAG.getNode(ISD::CARRY_FALSE,
-                                   N->getDebugLoc(), MVT::Flag));
+                                   N->getDebugLoc(), MVT::Glue));
   }
 
   return SDValue();
@@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
   return SDValue();
 }
 
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+                             SelectionDAG &DAG, bool LegalOperations) {                            
+  if (!VT.isVector()) {
+    return DAG.getConstant(0, VT);
+  } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+    // Produce a vector of zeros.
+    SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+    std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+      &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   }
 
   // fold (sub x, x) -> 0
+  // FIXME: Refactor this and xor and other similar operations together.
   if (N0 == N1)
-    return DAG.getConstant(0, N->getValueType(0));
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
   // fold (sub c1, c2) -> c1-c2
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   if (N0C && N0C->isAllOnesValue())
     return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold A-(A-B) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+    return N1.getOperand(1);
   // fold (A+B)-A -> B
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
     return N0.getOperand(1);
@@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
 
   // fold (mulhs x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
     return DAG.getConstant(0, VT);
 
+  // If the type twice as wide is legal, transform the mulhs to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
 
   // fold (mulhu x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
     return DAG.getConstant(0, VT);
 
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
   if (Res.getNode()) return Res;
 
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
   if (Res.getNode()) return Res;
 
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+  
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N0Op0 = N0.getOperand(0);
     APInt Mask = ~N1C->getAPIntValue();
-    Mask.trunc(N0Op0.getValueSizeInBits());
+    Mask = Mask.trunc(N0Op0.getValueSizeInBits());
     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
                                  N0.getValueType(), N0Op0);
@@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(), LN0->getBasePtr(),
-                                       LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getPointerInfo(), MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       AddToWorkList(N);
@@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       AddToWorkList(N);
@@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         if (ExtVT == LoadedVT &&
             (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
           EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-          
-          SDValue NewLoad = 
-            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+
+          SDValue NewLoad =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), LN0->getBasePtr(),
-                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           LN0->getPointerInfo(),
                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
                            LN0->getAlignment());
           AddToWorkList(N);
           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
-        
+
         // Do not change the width of a volatile load.
         // Do not generate loads of non-round integer types since these can
         // be expensive (and would be wrong if the type is not byte sized).
@@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
           }
 
           AddToWorkList(NewPtr.getNode());
-          
+
           EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
           SDValue Load =
-            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), NewPtr,
-                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           LN0->getPointerInfo(),
                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
                            Alignment);
           AddToWorkList(N);
@@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
                                          N01C->getAPIntValue(), VT));
   }
   // fold (xor x, x) -> 0
-  if (N0 == N1) {
-    if (!VT.isVector()) {
-      return DAG.getConstant(0, VT);
-    } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
-      // Produce a vector of zeros.
-      SDValue El = DAG.getConstant(0, VT.getVectorElementType());
-      std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
-      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
-                         &Ops[0], Ops.size());
-    }
-  }
+  if (N0 == N1)
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
 
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
@@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
                                LHS->getOperand(1), N->getOperand(1));
 
   // Create the new shift.
-  SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+  SDValue NewShift = DAG.getNode(N->getOpcode(),
+                                 LHS->getOperand(0).getDebugLoc(),
                                  VT, LHS->getOperand(0), N->getOperand(1));
 
   // Create the new binop.
@@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getSizeInBits());
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
       return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
                                      DAG.getNode(ISD::TRUNCATE,
@@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 > OpSizeInBits)
+    if (c1 + c2 >= OpSizeInBits)
       return DAG.getConstant(0, VT);
     return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
+
+  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+  // For this to be valid, the second form must not preserve any of the bits
+  // that are shifted out by the inner shift in the first form.  This means
+  // the outer shift size must be >= the number of bits added by the ext.
+  // As a corollary, we don't care what kind of ext it is.
+  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+              N0.getOpcode() == ISD::ANY_EXTEND ||
+              N0.getOpcode() == ISD::SIGN_EXTEND) &&
+      N0.getOperand(0).getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 = 
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    if (c2 >= OpSizeInBits - InnerShiftSize) {
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+                         DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+                                     N0.getOperand(0)->getOperand(0)),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
+  }
+
   // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
   //                               (srl (and x, (shl -1, c1)), (sub c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRL &&
@@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     if (N01C && N1C) {
       // Determine what the truncate's result bitsize and type would be.
       EVT TruncVT =
-        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+        EVT::getIntegerVT(*DAG.getContext(),
+                          OpSizeInBits - N1C->getZExtValue());
       // Determine the residual right-shift amount.
       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
 
@@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
       return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(),
                                      TruncVT,
@@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     }
   }
 
+  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+  //      if c1 is equal to the number of bits the trunc removes
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (N0.getOperand(0).getOpcode() == ISD::SRL ||
+       N0.getOperand(0).getOpcode() == ISD::SRA) &&
+      N0.getOperand(0).hasOneUse() &&
+      N0.getOperand(0).getOperand(1).hasOneUse() &&
+      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+    EVT LargeVT = N0.getOperand(0).getValueType();
+    ConstantSDNode *LargeShiftAmt =
+      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+        LargeShiftAmt->getZExtValue()) {
+      SDValue Amt =
+        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+                        getShiftAmountTy());
+      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+                                N0.getOperand(0).getOperand(0), Amt);
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+    }
+  }
+
   // Simplify, based on bits shifted out of the LHS.
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
@@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 > OpSizeInBits)
+    if (c1 + c2 >= OpSizeInBits)
       return DAG.getConstant(0, VT);
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
-  
+
+  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(0).getOpcode() == ISD::SRL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 = 
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+    if (c1 + OpSizeInBits == InnerShiftSize) {
+      if (c1 + c2 >= InnerShiftSize)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 
+                                     N0.getOperand(0)->getOperand(0),
+                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+    }
+  }
+
   // fold (srl (shl x, c), c) -> (and x, cst2)
   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
       N0.getValueSizeInBits() <= 64) {
@@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(~0ULL >> ShAmt, VT));
   }
-  
+
 
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getSizeInBits());
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
       return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(),
                                      TruncVT,
@@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   //   brcond i32 %c ...
   //
   // into
-  // 
+  //
   //   %a = ...
   //   %b = and %a, 2
   //   %c = setcc eq %b, 0
@@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
     }
     if (BothLiveOut)
       // Both unextended and extended values are live out. There had better be
-      // good a reason for the transformation.
+      // a good reason for the transformation.
       return ExtendNodes.size();
   }
   return true;
@@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
                                        N0.getValueType(),
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
@@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
         TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       CombineTo(N, ExtLoad);
@@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                       N0.getOperand(0), N0.getOperand(1),
                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
                          NegOne, DAG.getConstant(0, VT));
-  }  
+  }
 
   // fold (sext x) -> (zext x) if the sign bit is known zero.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (truncate x)) -> (and x, mask)
   if (N0.getOpcode() == ISD::TRUNCATE &&
       (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+    // fold (zext (truncate (load x))) -> (zext (smaller load x))
+    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
     SDValue Op = N0.getOperand(0);
     if (Op.getValueType().bitsLT(VT)) {
       Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    Mask.zext(VT.getSizeInBits());
+    Mask = Mask.zext(VT.getSizeInBits());
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
                        X, DAG.getConstant(Mask, VT));
   }
@@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
                                        N0.getValueType(),
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
@@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       CombineTo(N, ExtLoad);
@@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       isa<ConstantSDNode>(N0.getOperand(1)) &&
       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
       N0.hasOneUse()) {
+    SDValue ShAmt = N0.getOperand(1);
+    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
     if (N0.getOpcode() == ISD::SHL) {
+      SDValue InnerZExt = N0.getOperand(0);
       // If the original shl may be shifting out bits, do not perform this
       // transformation.
-      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
-        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
-      if (ShAmt > KnownZeroBits)
+      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+        InnerZExt.getOperand(0).getValueType().getSizeInBits();
+      if (ShAmtVal > KnownZeroBits)
         return SDValue();
     }
-    DebugLoc dl = N->getDebugLoc();
-    return DAG.getNode(N0.getOpcode(), dl, VT,
-                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
-                       DAG.getNode(ISD::ZERO_EXTEND, dl,
-                                   N0.getOperand(1).getValueType(),
-                                   N0.getOperand(1)));
+
+    DebugLoc DL = N->getDebugLoc();
+    
+    // Ensure that the shift amount is wide enough for the shifted value. 
+    if (VT.getSizeInBits() >= 256)
+      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+    
+    return DAG.getNode(N0.getOpcode(), DL, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+                       ShAmt);
   }
 
   return SDValue();
@@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    Mask.zext(VT.getSizeInBits());
+    Mask = Mask.zext(VT.getSizeInBits());
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
                        X, DAG.getConstant(Mask, VT));
   }
@@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
                                        N0.getValueType(),
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
@@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     EVT MemVT = LN0->getMemoryVT();
-    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
-                                     N->getDebugLoc(),
-                                     LN0->getChain(), LN0->getBasePtr(),
-                                     LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), MemVT,
+    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+                                     VT, LN0->getChain(), LN0->getBasePtr(),
+                                     LN0->getPointerInfo(), MemVT,
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
@@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
-      return SDValue();
   } else if (Opc == ISD::SRL) {
-    // Annother special-case: SRL is basically zero-extending a narrower
-    // value.
+    // Another special-case: SRL is basically zero-extending a narrower value.
     ExtType = ISD::ZEXTLOAD;
     N0 = SDValue(N, 0);
     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
                               VT.getSizeInBits() - N01->getZExtValue());
   }
+  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+    return SDValue();
 
   unsigned EVTBits = ExtVT.getSizeInBits();
+  
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (!ExtVT.isRound())
+    return SDValue();
+  
   unsigned ShAmt = 0;
-  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       ShAmt = N01->getZExtValue();
       // Is the shift amount a multiple of size of VT?
@@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
           return SDValue();
       }
+
+      // At this point, we must have a load or else we can't do the transform.
+      if (!isa<LoadSDNode>(N0)) return SDValue();
+      
+      // If the shift amount is larger than the input type then we're not
+      // accessing any of the loaded bytes.  If the load was a zextload/extload
+      // then the result of the shift+trunc is zero/undef (handled elsewhere).
+      // If the load was a sextload then the result is a splat of the sign bit
+      // of the extended byte.  This is not worth optimizing for.
+      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+        return SDValue();
     }
   }
 
-  // Do not generate loads of non-round integer types since these can
-  // be expensive (and would be wrong if the type is not byte sized).
-  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
-      // Do not change the width of a volatile load.
-      !cast<LoadSDNode>(N0)->isVolatile()) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT PtrType = N0.getOperand(1).getValueType();
-
-    // For big endian targets, we need to adjust the offset to the pointer to
-    // load the correct bytes.
-    if (TLI.isBigEndian()) {
-      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
-      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
-      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
-    }
-
-    uint64_t PtrOff =  ShAmt / 8;
-    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
-    SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
-                                 PtrType, LN0->getBasePtr(),
-                                 DAG.getConstant(PtrOff, PtrType));
-    AddToWorkList(NewPtr.getNode());
-
-    SDValue Load = (ExtType == ISD::NON_EXTLOAD)
-      ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
-                    LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
-      : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
-                       LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                       NewAlign);
-
-    // Replace the old load's chain with the new load's chain.
-    WorkListRemover DeadNodes(*this);
-    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
-                                  &DeadNodes);
+  // If the load is shifted left (and the result isn't shifted back right),
+  // we can fold the truncate through the shift.
+  unsigned ShLeftAmt = 0;
+  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+      ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShLeftAmt = N01->getZExtValue();
+      N0 = N0.getOperand(0);
+    }
+  }
+  
+  // If we haven't found a load, we can't narrow it.  Don't transform one with
+  // multiple uses, this would require adding a new load.
+  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+      // Don't change the width of a volatile load.
+      cast<LoadSDNode>(N0)->isVolatile())
+    return SDValue();
+  
+  // Verify that we are actually reducing a load width here.
+  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+    return SDValue();
+  
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  EVT PtrType = N0.getOperand(1).getValueType();
+
+  // For big endian targets, we need to adjust the offset to the pointer to
+  // load the correct bytes.
+  if (TLI.isBigEndian()) {
+    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+  }
+
+  uint64_t PtrOff = ShAmt / 8;
+  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+                               PtrType, LN0->getBasePtr(),
+                               DAG.getConstant(PtrOff, PtrType));
+  AddToWorkList(NewPtr.getNode());
+
+  SDValue Load;
+  if (ExtType == ISD::NON_EXTLOAD)
+    Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+                        LN0->getPointerInfo().getWithOffset(PtrOff),
+                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+  else
+    Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+                          LN0->getPointerInfo().getWithOffset(PtrOff),
+                          ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                          NewAlign);
+
+  // Replace the old load's chain with the new load's chain.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+                                &DeadNodes);
 
-    // Return the new loaded value.
-    return Load;
+  // Shift the result left, if we've swallowed a left shift.
+  SDValue Result = Load;
+  if (ShLeftAmt != 0) {
+    EVT ShImmTy = getShiftAmountTy();
+    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+      ShImmTy = VT;
+    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));
   }
 
-  return SDValue();
+  // Return the new loaded value.
+  return Result;
 }
 
 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
@@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                      LN0->getChain(),
-                                     LN0->getBasePtr(), LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
@@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                      LN0->getChain(),
-                                     LN0->getBasePtr(), LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
@@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
 
   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
-  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+      LD1->getPointerInfo().getAddrSpace() !=
+         LD2->getPointerInfo().getAddrSpace())
     return SDValue();
   EVT LD1VT = LD1->getValueType(0);
 
@@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
-                         LD1->getBasePtr(), LD1->getSrcValue(),
-                         LD1->getSrcValueOffset(), false, false, Align);
+                         LD1->getBasePtr(), LD1->getPointerInfo(),
+                         false, false, Align);
   }
 
   return SDValue();
 }
 
-SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
@@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
     assert(!DestEltVT.isVector() &&
            "Element type of vector ValueType must not be vector!");
     if (isSimple)
-      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
   }
 
   // If the input is a constant, let getNode fold it.
   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
-    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+    SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
     if (Res.getNode() != N) {
       if (!LegalOperations ||
           TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
@@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   }
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
-  if (N0.getOpcode() == ISD::BIT_CONVERT)
-    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+  if (N0.getOpcode() == ISD::BITCAST)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
                        N0.getOperand(0));
 
   // fold (conv (load x)) -> (load (conv*)x)
@@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
 
     if (Align <= OrigAlign) {
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
-                                 LN0->getBasePtr(),
-                                 LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                                 LN0->getBasePtr(), LN0->getPointerInfo(),
                                  LN0->isVolatile(), LN0->isNonTemporal(),
                                  OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
-                DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+                DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
                             N0.getValueType(), Load),
                 Load.getValue(1));
       return Load;
@@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   // This often reduces constant pool loads.
   if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
       N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
-    SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+    SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
                                   N0.getOperand(0));
     AddToWorkList(NewConv.getNode());
 
@@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
     if (isTypeLegal(IntXVT)) {
-      SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+      SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
       AddToWorkList(X.getNode());
 
@@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
                       X, DAG.getConstant(SignBit, VT));
       AddToWorkList(X.getNode());
 
-      SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+      SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
                                 VT, N0.getOperand(0));
       Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
                         Cst, DAG.getConstant(~SignBit, VT));
@@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   return CombineConsecutiveLoads(N, VT);
 }
 
-/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
 /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
 /// destination element value type.
 SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
 
   // If this is already the right type, we're done.
@@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     // Due to the FP element handling below calling this routine recursively,
     // we can end up with a scalar-to-vector node here.
     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 
-                         DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                         DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
                                      DstEltVT, BV->getOperand(0)));
-      
+
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
       SDValue Op = BV->getOperand(i);
@@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       // are promoted and implicitly truncated.  Make that explicit here.
       if (Op.getValueType() != SrcEltVT)
         Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
-      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+      Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
                                 DstEltVT, Op));
       AddToWorkList(Ops.back().getNode());
     }
@@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     // same sizes.
     assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
-    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
     SrcEltVT = IntVT;
   }
 
@@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   if (DstEltVT.isFloatingPoint()) {
     assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
-    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
 
     // Next, convert to FP elements of the same size.
-    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   }
 
   // Okay, we know the src/dst types are both integers of differing types.
@@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
         if (Op.getOpcode() == ISD::UNDEF) continue;
         EltIsUndef = false;
 
-        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
       }
 
@@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       continue;
     }
 
-    APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
-                        getAPIntValue()).zextOrTrunc(SrcBitSize);
+    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+                  getAPIntValue().zextOrTrunc(SrcBitSize);
 
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
-      APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+      APInt ThisVal = OpVal.trunc(DstBitSize);
       Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
-      if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
         // Simply turn this into a SCALAR_TO_VECTOR of the new type.
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
                            Ops[0]);
@@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
                                      LN0->getChain(),
-                                     LN0->getBasePtr(), LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
                                      N0.getValueType(),
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
@@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BIT_CONVERT && 
+  if (N0.getOpcode() == ISD::BITCAST &&
       !VT.isVector() &&
       N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger()) {
@@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
               DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
       AddToWorkList(Int.getNode());
-      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                          VT, Int);
     }
   }
@@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
 
   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
@@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
       Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
              DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
       AddToWorkList(Int.getNode());
-      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                          N->getValueType(0), Int);
     }
   }
@@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                        N1.getOperand(0), N1.getOperand(1), N2);
   }
 
-  SDNode *Trunc = 0;
-  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
-    // Look past truncate.
-    Trunc = N1.getNode();
-    N1 = N1.getOperand(0);
-  }
+  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+       (N1.getOperand(0).hasOneUse() &&
+        N1.getOperand(0).getOpcode() == ISD::SRL))) {
+    SDNode *Trunc = 0;
+    if (N1.getOpcode() == ISD::TRUNCATE) {
+      // Look pass the truncate.
+      Trunc = N1.getNode();
+      N1 = N1.getOperand(0);
+    }
 
-  if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
     // Match this pattern so that we can generate simpler code:
     //
     //   %a = ...
@@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     //   brcond i32 %c ...
     //
     // into
-    // 
+    //
     //   %a = ...
     //   %b = and i32 %a, 2
     //   %c = setcc eq %b, 0
@@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
         }
       }
     }
+
+    if (Trunc)
+      // Restore N1 if the above transformation doesn't match.
+      N1 = N->getOperand(1);
   }
-  
+
   // Transform br(xor(x, y)) -> br(x != y)
   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
@@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
           Equal = true;
         }
 
-      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
-      
-      EVT SetCCVT = NodeToReplace.getValueType();
+      EVT SetCCVT = N1.getValueType();
       if (LegalTypes)
         SetCCVT = TLI.getSetCCResultType(SetCCVT);
       SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                                    Equal ? ISD::SETEQ : ISD::SETNE);
       // Replace the uses of XOR with SETCC
       WorkListRemover DeadNodes(*this);
-      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
-      removeFromWorkList(NodeToReplace.getNode());
-      DAG.DeleteNode(NodeToReplace.getNode());
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
       return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
                          MVT::Other, Chain, SetCC, N2);
     }
@@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > LD->getAlignment())
-        return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
-                              N->getDebugLoc(),
-                              Chain, Ptr, LD->getSrcValue(),
-                              LD->getSrcValueOffset(), LD->getMemoryVT(),
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getPointerInfo(),
+                              LD->getMemoryVT(),
                               LD->isVolatile(), LD->isNonTemporal(), Align);
     }
   }
@@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // Replace the chain to void dependency.
       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
-                               BetterChain, Ptr,
-                               LD->getSrcValue(), LD->getSrcValueOffset(),
+                               BetterChain, Ptr, LD->getPointerInfo(),
                                LD->isVolatile(), LD->isNonTemporal(),
                                LD->getAlignment());
       } else {
-        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
-                                  LD->getDebugLoc(),
-                                  BetterChain, Ptr, LD->getSrcValue(),
-                                  LD->getSrcValueOffset(),
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getPointerInfo(),
                                   LD->getMemoryVT(),
                                   LD->isVolatile(),
                                   LD->isNonTemporal(),
@@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // Create token factor to keep old chain connected.
       SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                   MVT::Other, Chain, ReplLoad.getValue(1));
-      
+
       // Make sure the new and old chains are cleaned up.
       AddToWorkList(Token.getNode());
-      
+
       // Replace uses with load result and token factor. Don't add users
       // to work list.
       return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
 static std::pair<unsigned, unsigned>
 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   std::pair<unsigned, unsigned> Result(0, 0);
-  
+
   // Check for the structure we're looking for.
   if (V->getOpcode() != ISD::AND ||
       !isa<ConstantSDNode>(V->getOperand(1)) ||
       !ISD::isNormalLoad(V->getOperand(0).getNode()))
     return Result;
-  
+
   // Check the chain and pointer.
   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
-  
+
   // The store should be chained directly to the load or be an operand of a
   // tokenfactor.
   if (LD == Chain.getNode())
@@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
       }
     if (!isOk) return Result;
   }
-  
+
   // This only handles simple types.
   if (V.getValueType() != MVT::i16 &&
       V.getValueType() != MVT::i32 &&
@@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   if (NotMaskLZ == 64) return Result;  // All zero mask.
-  
+
   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
     return Result;
@@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   if (V.getValueType() != MVT::i64 && NotMaskLZ)
     NotMaskLZ -= 64-V.getValueSizeInBits();
-  
+
   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   switch (MaskedBytes) {
-  case 1: 
-  case 2: 
+  case 1:
+  case 2:
   case 4: break;
   default: return Result; // All one mask, or 5-byte mask.
   }
-  
+
   // Verify that the first bit starts at a multiple of mask so that the access
   // is aligned the same as the access width.
   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
-  
+
   Result.first = MaskedBytes;
   Result.second = NotMaskTZ/8;
   return Result;
@@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   unsigned NumBytes = MaskInfo.first;
   unsigned ByteShift = MaskInfo.second;
   SelectionDAG &DAG = DC->getDAG();
-  
+
   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   // that uses this.  If not, this is not a replacement.
   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
                                   ByteShift*8, (ByteShift+NumBytes)*8);
   if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
-  
+
   // Check that it is legal on the target to do this.  It is legal if the new
   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   // legalization.
   MVT VT = MVT::getIntegerVT(NumBytes*8);
   if (!DC->isTypeLegal(VT))
     return 0;
-  
+
   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   // shifted by ByteShift and truncated down to NumBytes.
   if (ByteShift)
@@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
     StOffset = ByteShift;
   else
     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
-  
+
   SDValue Ptr = St->getBasePtr();
   if (StOffset) {
     Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
                       Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
     NewAlign = MinAlign(NewAlign, StOffset);
   }
-  
+
   // Truncate down to the new size.
   IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
-  
+
   ++OpsNarrowed;
-  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 
-                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+                      St->getPointerInfo().getWithOffset(StOffset),
                       false, false, NewAlign).getNode();
 }
 
@@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     return SDValue();
 
   unsigned Opc = Value.getOpcode();
-  
+
   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   // is a byte mask indicating a consecutive number of bytes, check to see if
   // Y is known to provide just those bytes.  If so, we try to replace the
@@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
                                                   Value.getOperand(1), ST,this))
         return SDValue(NewST, 0);
-                                           
+
     // Or is commutative, so try swapping X and Y.
     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
     if (MaskedLoad.first)
@@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                                   Value.getOperand(0), ST,this))
         return SDValue(NewST, 0);
   }
-  
+
   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
       Value.getOperand(1).getOpcode() != ISD::Constant)
     return SDValue();
@@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
       Chain == SDValue(N0.getNode(), 1)) {
     LoadSDNode *LD = cast<LoadSDNode>(N0);
-    if (LD->getBasePtr() != Ptr)
+    if (LD->getBasePtr() != Ptr ||
+        LD->getPointerInfo().getAddrSpace() !=
+        ST->getPointerInfo().getAddrSpace())
       return SDValue();
 
     // Find the type to narrow it the load / op / store to.
@@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                    DAG.getConstant(PtrOff, Ptr.getValueType()));
       SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
                                   LD->getChain(), NewPtr,
-                                  LD->getSrcValue(), LD->getSrcValueOffset(),
+                                  LD->getPointerInfo().getWithOffset(PtrOff),
                                   LD->isVolatile(), LD->isNonTemporal(),
                                   NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
                                    NewVal, NewPtr,
-                                   ST->getSrcValue(), ST->getSrcValueOffset(),
+                                   ST->getPointerInfo().getWithOffset(PtrOff),
                                    false, false, NewAlign);
 
       AddToWorkList(NewPtr.getNode());
@@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   return SDValue();
 }
 
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+      Value.hasOneUse() &&
+      Chain == SDValue(Value.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(Value);
+    EVT VT = LD->getMemoryVT();
+    if (!VT.isFloatingPoint() ||
+        VT != ST->getMemoryVT() ||
+        LD->isNonTemporal() ||
+        ST->isNonTemporal() ||
+        LD->getPointerInfo().getAddrSpace() != 0 ||
+        ST->getPointerInfo().getAddrSpace() != 0)
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+      return SDValue();
+
+    unsigned LDAlign = LD->getAlignment();
+    unsigned STAlign = ST->getAlignment();
+    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    if (LDAlign < ABIAlign || STAlign < ABIAlign)
+      return SDValue();
+
+    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+                                LD->getChain(), LD->getBasePtr(),
+                                LD->getPointerInfo(),
+                                false, false, LDAlign);
+
+    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+                                 NewLD, ST->getBasePtr(),
+                                 ST->getPointerInfo(),
+                                 false, false, STAlign);
+
+    AddToWorkList(NewLD.getNode());
+    AddToWorkList(NewST.getNode());
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+                                  &DeadNodes);
+    ++LdStFP2Int;
+    return NewST;
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
   SDValue Chain = ST->getChain();
@@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // If this is a store of a bit convert, store the input value if the
   // resultant store does not need a higher alignment than the original.
-  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
     EVT SVT = Value.getOperand(0).getValueType();
@@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         ((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
       return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
-                          Ptr, ST->getSrcValue(),
-                          ST->getSrcValueOffset(), ST->isVolatile(),
+                          Ptr, ST->getPointerInfo(), ST->isVolatile(),
                           ST->isNonTemporal(), OrigAlign);
   }
 
@@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
                               bitcastToAPInt().getZExtValue(), MVT::i32);
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
-                              Ptr, ST->getSrcValue(),
-                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
                               ST->isNonTemporal(), ST->getAlignment());
         }
         break;
@@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                 getZExtValue(), MVT::i64);
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
-                              Ptr, ST->getSrcValue(),
-                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
                               ST->isNonTemporal(), ST->getAlignment());
         } else if (!ST->isVolatile() &&
                    TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
@@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
           if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
-          int SVOffset = ST->getSrcValueOffset();
           unsigned Alignment = ST->getAlignment();
           bool isVolatile = ST->isVolatile();
           bool isNonTemporal = ST->isNonTemporal();
 
           SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
-                                     Ptr, ST->getSrcValue(),
-                                     ST->getSrcValueOffset(),
+                                     Ptr, ST->getPointerInfo(),
                                      isVolatile, isNonTemporal,
                                      ST->getAlignment());
           Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
                             DAG.getConstant(4, Ptr.getValueType()));
-          SVOffset += 4;
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
-                                     Ptr, ST->getSrcValue(),
-                                     SVOffset, isVolatile, isNonTemporal,
+                                     Ptr, ST->getPointerInfo().getWithOffset(4),
+                                     isVolatile, isNonTemporal,
                                      Alignment);
           return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                              St0, St1);
@@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > ST->getAlignment())
         return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
-                                 Ptr, ST->getSrcValue(),
-                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
+                                 Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                  ST->isVolatile(), ST->isNonTemporal(), Align);
     }
   }
 
+  // Try transforming a pair floating point load / store ops to integer
+  // load / store ops.
+  SDValue NewST = TransformFPLoadStorePair(N);
+  if (NewST.getNode())
+    return NewST;
+
   if (CombinerAA) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       // Replace the chain to avoid dependency.
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
-                                      ST->getSrcValue(),ST->getSrcValueOffset(),
+                                      ST->getPointerInfo(),
                                       ST->getMemoryVT(), ST->isVolatile(),
                                       ST->isNonTemporal(), ST->getAlignment());
       } else {
         ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
-                                 ST->getSrcValue(), ST->getSrcValueOffset(),
+                                 ST->getPointerInfo(),
                                  ST->isVolatile(), ST->isNonTemporal(),
                                  ST->getAlignment());
       }
@@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     AddToWorkList(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
-                               Ptr, ST->getSrcValue(),
-                               ST->getSrcValueOffset(), ST->getMemoryVT(),
+                               Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                ST->isVolatile(), ST->isNonTemporal(),
                                ST->getAlignment());
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
     if (SimplifyDemandedBits(Value,
-                             APInt::getLowBitsSet(
-                               Value.getValueType().getScalarType().getSizeInBits(),
-                               ST->getMemoryVT().getScalarType().getSizeInBits())))
+                        APInt::getLowBitsSet(
+                          Value.getValueType().getScalarType().getSizeInBits(),
+                          ST->getMemoryVT().getScalarType().getSizeInBits())))
       return SDValue(N, 0);
   }
 
@@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
                             ST->getMemoryVT())) {
     return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
-                             Ptr, ST->getSrcValue(),
-                             ST->getSrcValueOffset(), ST->getMemoryVT(),
+                             Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                              ST->isVolatile(), ST->isNonTemporal(),
                              ST->getAlignment());
   }
@@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   if (InVal.getOpcode() == ISD::UNDEF)
     return InVec;
 
+  EVT VT = InVec.getValueType();
+
+  // If we can't generate a legal BUILD_VECTOR, exit 
+  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+    return SDValue();
+
   // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
   // vector with the inserted element.
   if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     if (Elt < Ops.size())
       Ops[Elt] = InVal;
     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                       InVec.getValueType(), &Ops[0], Ops.size());
+                       VT, &Ops[0], Ops.size());
   }
-  // If the invec is an UNDEF and if EltNo is a constant, create a new 
+  // If the invec is an UNDEF and if EltNo is a constant, create a new
   // BUILD_VECTOR with undef elements and the inserted element.
-  if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 
+  if (InVec.getOpcode() == ISD::UNDEF &&
       isa<ConstantSDNode>(EltNo)) {
-    EVT VT = InVec.getValueType();
     EVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
@@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     if (Elt < Ops.size())
       Ops[Elt] = InVal;
     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                       InVec.getValueType(), &Ops[0], Ops.size());
+                       VT, &Ops[0], Ops.size());
   }
   return SDValue();
 }
@@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue EltNo = N->getOperand(1);
 
   if (isa<ConstantSDNode>(EltNo)) {
-    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
     EVT VT = InVec.getValueType();
     EVT ExtVT = VT.getVectorElementType();
     EVT LVT = ExtVT;
 
-    if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+    if (InVec.getOpcode() == ISD::BITCAST) {
       EVT BCVT = InVec.getOperand(0).getValueType();
       if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
@@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
       // Select the input vector, guarding against out of range extract vector.
       unsigned NumElems = VT.getVectorNumElements();
-      int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
 
-      if (InVec.getOpcode() == ISD::BIT_CONVERT)
+      if (InVec.getOpcode() == ISD::BITCAST)
         InVec = InVec.getOperand(0);
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
@@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
       return SDValue();
 
+    // If Idx was -1 above, Elt is going to be -1, so just return undef.
+    if (Elt == -1)
+      return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
     unsigned Align = LN0->getAlignment();
     if (NewLoad) {
       // Check the resultant load doesn't need a higher alignment than the
       // original load.
       unsigned NewAlign =
-        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+        TLI.getTargetData()
+            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
 
       if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
         return SDValue();
@@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     }
 
     SDValue NewPtr = LN0->getBasePtr();
+    unsigned PtrOff = 0;
+
     if (Elt) {
-      unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+      PtrOff = LVT.getSizeInBits() * Elt / 8;
       EVT PtrType = NewPtr.getValueType();
       if (TLI.isBigEndian())
         PtrOff = VT.getSizeInBits() / 8 - PtrOff;
@@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     }
 
     return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
-                       LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                       LN0->getPointerInfo().getWithOffset(PtrOff),
                        LN0->isVolatile(), LN0->isNonTemporal(), Align);
   }
 
@@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
         unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
         if (ExtIndex > VT.getVectorNumElements())
           return SDValue();
-        
+
         Mask.push_back(ExtIndex);
         continue;
       }
@@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
   // FIXME: implement canonicalizations from DAG.getVectorShuffle()
 
-  // If it is a splat, check if the argument vector is a build_vector with
-  // all scalar elements the same.
-  if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+  // If it is a splat, check if the argument vector is another splat or a
+  // build_vector with all scalar elements the same.
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
     // If this is a bit convert that changes the element type of the vector but
     // not the number of vector elements, look through it.  Be careful not to
     // look though conversions that change things like v4f32 to v2f64.
-    if (V->getOpcode() == ISD::BIT_CONVERT) {
+    if (V->getOpcode() == ISD::BITCAST) {
       SDValue ConvInput = V->getOperand(0);
       if (ConvInput.getValueType().isVector() &&
           ConvInput.getValueType().getVectorNumElements() == NumElts)
@@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     }
 
     if (V->getOpcode() == ISD::BUILD_VECTOR) {
-      unsigned NumElems = V->getNumOperands();
-      unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
-      if (NumElems > BaseIdx) {
-        SDValue Base;
-        bool AllSame = true;
-        for (unsigned i = 0; i != NumElems; ++i) {
-          if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
-            Base = V->getOperand(i);
-            break;
-          }
+      assert(V->getNumOperands() == NumElts &&
+             "BUILD_VECTOR has wrong number of operands");
+      SDValue Base;
+      bool AllSame = true;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+          Base = V->getOperand(i);
+          break;
         }
-        // Splat of <u, u, u, u>, return <u, u, u, u>
-        if (!Base.getNode())
-          return N0;
-        for (unsigned i = 0; i != NumElems; ++i) {
-          if (V->getOperand(i) != Base) {
-            AllSame = false;
-            break;
-          }
+      }
+      // Splat of <u, u, u, u>, return <u, u, u, u>
+      if (!Base.getNode())
+        return N0;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i) != Base) {
+          AllSame = false;
+          break;
         }
-        // Splat of <x, x, x, x>, return <x, x, x, x>
-        if (AllSame)
-          return N0;
       }
+      // Splat of <x, x, x, x>, return <x, x, x, x>
+      if (AllSame)
+        return N0;
     }
   }
   return SDValue();
@@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   if (N->getOpcode() == ISD::AND) {
-    if (RHS.getOpcode() == ISD::BIT_CONVERT)
+    if (RHS.getOpcode() == ISD::BITCAST)
       RHS = RHS.getOperand(0);
     if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
       SmallVector<int, 8> Indices;
@@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
                                      DAG.getConstant(0, EltVT));
       SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
                                  RVT, &ZeroOps[0], ZeroOps.size());
-      LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
       SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
     }
   }
 
@@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   // things. Simplifying them may result in a loss of legality.
   if (LegalOperations) return SDValue();
 
-  EVT VT = N->getValueType(0);
-  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+  assert(N->getValueType(0).isVector() &&
+         "SimplifyVBinOp only works on vectors!");
 
-  EVT EltType = VT.getVectorElementType();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   SDValue Shuffle = XformToShuffleWithZero(N);
@@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
           break;
       }
 
-      // If the vector element type is not legal, the BUILD_VECTOR operands
-      // are promoted and implicitly truncated.  Make that explicit here.
-      if (LHSOp.getValueType() != EltType)
-        LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
-      if (RHSOp.getValueType() != EltType)
-        RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
-
-      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+      EVT VT = LHSOp.getValueType();
+      assert(RHSOp.getValueType() == VT &&
+             "SimplifyVBinOp with different BUILD_VECTOR element types");
+      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
                                    LHSOp, RHSOp);
       if (FoldOp.getOpcode() != ISD::UNDEF &&
           FoldOp.getOpcode() != ISD::Constant &&
@@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       AddToWorkList(FoldOp.getNode());
     }
 
-    if (Ops.size() == LHS.getNumOperands()) {
-      EVT VT = LHS.getValueType();
-      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
-                         &Ops[0], Ops.size());
-    }
+    if (Ops.size() == LHS.getNumOperands())
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                         LHS.getValueType(), &Ops[0], Ops.size());
   }
 
   return SDValue();
@@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                     SDValue RHS) {
 
+  // Cannot simplify select with vector condition
+  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
   // If this is a select from two identical things, try to pull the operation
   // through the select.
-  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
-    // If this is a load and the token chain is identical, replace the select
-    // of two loads with a load through a select of the address to load from.
-    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
-    // constants have been dropped into the constant pool.
-    if (LHS.getOpcode() == ISD::LOAD &&
+  if (LHS.getOpcode() != RHS.getOpcode() ||
+      !LHS.hasOneUse() || !RHS.hasOneUse())
+    return false;
+
+  // If this is a load and the token chain is identical, replace the select
+  // of two loads with a load through a select of the address to load from.
+  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+  // constants have been dropped into the constant pool.
+  if (LHS.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+    LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+    // Token chains must be identical.
+    if (LHS.getOperand(0) != RHS.getOperand(0) ||
         // Do not let this transformation reduce the number of volatile loads.
-        !cast<LoadSDNode>(LHS)->isVolatile() &&
-        !cast<LoadSDNode>(RHS)->isVolatile() &&
-        // Token chains must be identical.
-        LHS.getOperand(0) == RHS.getOperand(0)) {
-      LoadSDNode *LLD = cast<LoadSDNode>(LHS);
-      LoadSDNode *RLD = cast<LoadSDNode>(RHS);
-
-      // If this is an EXTLOAD, the VT's must match.
-      if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+        LLD->isVolatile() || RLD->isVolatile() ||
+        // If this is an EXTLOAD, the VT's must match.
+        LLD->getMemoryVT() != RLD->getMemoryVT() ||
+        // If this is an EXTLOAD, the kind of extension must match.
+        (LLD->getExtensionType() != RLD->getExtensionType() &&
+         // The only exception is if one of the extensions is anyext.
+         LLD->getExtensionType() != ISD::EXTLOAD &&
+         RLD->getExtensionType() != ISD::EXTLOAD) ||
         // FIXME: this discards src value information.  This is
         // over-conservative. It would be beneficial to be able to remember
         // both potential memory locations.  Since we are discarding
         // src value info, don't do the transformation if the memory
         // locations are not in the default address space.
-        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
-        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
-          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
-            LLDAddrSpace = PT->getAddressSpace();
-        }
-        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
-          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
-            RLDAddrSpace = PT->getAddressSpace();
-        }
-        SDValue Addr;
-        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
-          if (TheSelect->getOpcode() == ISD::SELECT) {
-            // Check that the condition doesn't reach either load.  If so, folding
-            // this will induce a cycle into the DAG.
-            if ((!LLD->hasAnyUseOfValue(1) ||
-                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
-                (!RLD->hasAnyUseOfValue(1) ||
-                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
-              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
-                                 LLD->getBasePtr().getValueType(),
-                                 TheSelect->getOperand(0), LLD->getBasePtr(),
-                                 RLD->getBasePtr());
-            }
-          } else {
-            // Check that the condition doesn't reach either load.  If so, folding
-            // this will induce a cycle into the DAG.
-            if ((!LLD->hasAnyUseOfValue(1) ||
-                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
-                (!RLD->hasAnyUseOfValue(1) ||
-                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
-              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
-                                 LLD->getBasePtr().getValueType(),
-                                 TheSelect->getOperand(0),
-                                 TheSelect->getOperand(1),
-                                 LLD->getBasePtr(), RLD->getBasePtr(),
-                                 TheSelect->getOperand(4));
-            }
-          }
-        }
-
-        if (Addr.getNode()) {
-          SDValue Load;
-          if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
-            Load = DAG.getLoad(TheSelect->getValueType(0),
-                               TheSelect->getDebugLoc(),
-                               LLD->getChain(),
-                               Addr, 0, 0,
-                               LLD->isVolatile(),
-                               LLD->isNonTemporal(),
-                               LLD->getAlignment());
-          } else {
-            Load = DAG.getExtLoad(LLD->getExtensionType(),
-                                  TheSelect->getValueType(0),
-                                  TheSelect->getDebugLoc(),
-                                  LLD->getChain(), Addr, 0, 0,
-                                  LLD->getMemoryVT(),
-                                  LLD->isVolatile(),
-                                  LLD->isNonTemporal(),
-                                  LLD->getAlignment());
-          }
+        LLD->getPointerInfo().getAddrSpace() != 0 ||
+        RLD->getPointerInfo().getAddrSpace() != 0)
+      return false;
 
-          // Users of the select now use the result of the load.
-          CombineTo(TheSelect, Load);
+    // Check that the select condition doesn't reach either load.  If so,
+    // folding this will induce a cycle into the DAG.  If not, this is safe to
+    // xform, so create a select of the addresses.
+    SDValue Addr;
+    if (TheSelect->getOpcode() == ISD::SELECT) {
+      SDNode *CondNode = TheSelect->getOperand(0).getNode();
+      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+        return false;
+      Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0), LLD->getBasePtr(),
+                         RLD->getBasePtr());
+    } else {  // Otherwise SELECT_CC
+      SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+      SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+      if ((LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+          (LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+        return false;
 
-          // Users of the old loads now use the new load's chain.  We know the
-          // old-load value is dead now.
-          CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
-          CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
-          return true;
-        }
-      }
-    }
+      Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0),
+                         TheSelect->getOperand(1),
+                         LLD->getBasePtr(), RLD->getBasePtr(),
+                         TheSelect->getOperand(4));
+    }
+
+    SDValue Load;
+    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+      Load = DAG.getLoad(TheSelect->getValueType(0),
+                         TheSelect->getDebugLoc(),
+                         // FIXME: Discards pointer info.
+                         LLD->getChain(), Addr, MachinePointerInfo(),
+                         LLD->isVolatile(), LLD->isNonTemporal(),
+                         LLD->getAlignment());
+    } else {
+      Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+                            RLD->getExtensionType() : LLD->getExtensionType(),
+                            TheSelect->getDebugLoc(),
+                            TheSelect->getValueType(0),
+                            // FIXME: Discards pointer info.
+                            LLD->getChain(), Addr, MachinePointerInfo(),
+                            LLD->getMemoryVT(), LLD->isVolatile(),
+                            LLD->isNonTemporal(), LLD->getAlignment());
+    }
+
+    // Users of the select now use the result of the load.
+    CombineTo(TheSelect, Load);
+
+    // Users of the old loads now use the new load's chain.  We know the
+    // old-load value is dead now.
+    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+    return true;
   }
 
   return false;
@@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
                                       ISD::CondCode CC, bool NotExtCompare) {
   // (x ? y : y) -> y.
   if (N2 == N3) return N2;
-  
+
   EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         return DAG.getNode(ISD::FABS, DL, VT, N3);
     }
   }
-  
+
   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   // in it.  This is a win when the constant is not otherwise available because
@@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         };
         const Type *FPTy = Elts[0]->getType();
         const TargetData &TD = *TLI.getTargetData();
-        
+
         // Create a ConstantArray of the two constants.
         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
         SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
@@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         SDValue Zero = DAG.getIntPtrConstant(0);
         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
         SDValue One = DAG.getIntPtrConstant(EltSize);
-        
+
         SDValue Cond = DAG.getSetCC(DL,
                                     TLI.getSetCCResultType(N0.getValueType()),
                                     N0, N1, CC);
@@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
                             CstOffset);
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
-                           PseudoSourceValue::getConstantPool(), 0, false,
+                           MachinePointerInfo::getConstantPool(), false,
                            false, Alignment);
 
       }
-    }  
+    }
 
   // Check to see if we can perform the "gzip trick", transforming
   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
@@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
     }
   }
 
+  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+  // where y is has a single bit set.
+  // A plaintext description would be, we can turn the SELECT_CC into an AND
+  // when the condition can be materialized as an all-ones register.  Any
+  // single bit-test can be materialized as an all-ones register with
+  // shift-left and shift-right-arith.
+  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+      N0->getValueType(0) == VT &&
+      N1C && N1C->isNullValue() &&
+      N2C && N2C->isNullValue()) {
+    SDValue AndLHS = N0->getOperand(0);
+    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+      // Shift the tested bit over the sign bit.
+      APInt AndMask = ConstAndRHS->getAPIntValue();
+      SDValue ShlAmt =
+        DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy());
+      SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+      // Now arithmetic right shift it all the way over, so the result is either
+      // all-ones, or zero.
+      SDValue ShrAmt =
+        DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy());
+      SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+    }
+  }
+
   // fold select C, 16, 0 -> shl C, 4
   if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
       TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
@@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
 }
 
 /// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself.  Provides base object and offset as results.
+// to alias with anything but itself.  Provides base object and offset as
+// results.
 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
                            const GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
@@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
       Offset += C->getZExtValue();
     }
   }
-  
+
   // Return the underlying GlobalValue, and update the Offset.  Return false
   // for GlobalAddressSDNode since the same GlobalAddress may be represented
   // by multiple nodes with different offsets.
@@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
 bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
                           const Value *SrcValue1, int SrcValueOffset1,
                           unsigned SrcValueAlign1,
+                          const MDNode *TBAAInfo1,
                           SDValue Ptr2, int64_t Size2,
                           const Value *SrcValue2, int SrcValueOffset2,
-                          unsigned SrcValueAlign2) const {
+                          unsigned SrcValueAlign2,
+                          const MDNode *TBAAInfo2) const {
   // If they are the same then they must be aliases.
   if (Ptr1 == Ptr2) return true;
 
@@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
     return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
 
-  // If we know what the bases are, and they aren't identical, then we know they
-  // cannot alias.
+  // It is possible for different frame indices to alias each other, mostly
+  // when tail call optimization reuses return address slots for arguments.
+  // To catch this case, look up the actual index of frame indices to compute
+  // the real alias relationship.
+  if (isFrameIndex1 && isFrameIndex2) {
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+  }
+
+  // Otherwise, if we know what the bases are, and they aren't identical, then
+  // we know they cannot alias.
   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
     return false;
 
@@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
       (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
     int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
     int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
-    
+
     // There is no overlap between these relatively aligned accesses of similar
     // size, return no alias.
     if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
       return false;
   }
-  
+
   if (CombinerGlobalAA) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
     int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
     int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
     AliasAnalysis::AliasResult AAResult =
-                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
     if (AAResult == AliasAnalysis::NoAlias)
       return false;
   }
@@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
                         SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue, 
+                        const Value *&SrcValue,
                         int &SrcValueOffset,
-                        unsigned &SrcValueAlign) const {
+                        unsigned &SrcValueAlign,
+                        const MDNode *&TBAAInfo) const {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     Size = LD->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = LD->getSrcValue();
     SrcValueOffset = LD->getSrcValueOffset();
     SrcValueAlign = LD->getOriginalAlignment();
+    TBAAInfo = LD->getTBAAInfo();
     return true;
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
@@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
     SrcValue = ST->getSrcValue();
     SrcValueOffset = ST->getSrcValueOffset();
     SrcValueAlign = ST->getOriginalAlignment();
+    TBAAInfo = ST->getTBAAInfo();
   } else {
     llvm_unreachable("FindAliasInfo expected a memory operand");
   }
@@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   const Value *SrcValue;
   int SrcValueOffset;
   unsigned SrcValueAlign;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 
-                              SrcValueAlign);
+  const MDNode *SrcTBAAInfo;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+                              SrcValueAlign, SrcTBAAInfo);
 
   // Starting off.
   Chains.push_back(OriginalChain);
   unsigned Depth = 0;
-  
+
   // Look at each chain and determine if it is an alias.  If so, add it to the
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
     SDValue Chain = Chains.back();
     Chains.pop_back();
-    
-    // For TokenFactor nodes, look at each operand and only continue up the 
-    // chain until we find two aliases.  If we've seen two aliases, assume we'll 
+
+    // For TokenFactor nodes, look at each operand and only continue up the
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll
     // find more and revert to original chain since the xform is unlikely to be
     // profitable.
-    // 
-    // FIXME: The depth check could be made to return the last non-aliasing 
+    //
+    // FIXME: The depth check could be made to return the last non-aliasing
     // chain we found before we hit a tokenfactor rather than the original
     // chain.
     if (Depth > 6 || Aliases.size() == 2) {
@@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
       const Value *OpSrcValue;
       int OpSrcValueOffset;
       unsigned OpSrcValueAlign;
+      const MDNode *OpSrcTBAAInfo;
       bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
                                     OpSrcValue, OpSrcValueOffset,
-                                    OpSrcValueAlign);
+                                    OpSrcValueAlign,
+                                    OpSrcTBAAInfo);
 
       // If chain is alias then stop here.
       if (!(IsLoad && IsOpLoad) &&
           isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  SrcTBAAInfo,
                   OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
-                  OpSrcValueAlign)) {
+                  OpSrcValueAlign, OpSrcTBAAInfo)) {
         Aliases.push_back(Chain);
       } else {
         // Look further up the chain.
@@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
     // If a single operand then chain to it.  We don't need to revisit it.
     return Aliases[0];
   }
-  
+
   // Construct a custom tailored token factor.
-  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                      &Aliases[0], Aliases.size());
 }
 
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a4eed71e65c0..490b857b0e9c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 /// startNewBlock - Set the current block to which generated machine
@@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
             TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
   }
-  
+
   // If target-independent code couldn't handle the value, give target-specific
   // code a try.
   if (!Reg && isa<Constant>(V))
     Reg = TargetMaterializeConstant(cast<Constant>(V));
-  
+
   // Don't cache constant materializations in the general ValueMap.
   // To do so would require tracking what uses they dominate.
   if (Reg != 0) {
@@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
     LocalValueMap[I] = Reg;
     return Reg;
   }
-  
+
   unsigned &AssignedReg = FuncInfo.ValueMap[I];
   if (AssignedReg == 0)
     // Use the new register.
@@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
       // If this is a constant subscript, handle it quickly.
       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero()) continue;
-        uint64_t Offs = 
+        uint64_t Offs =
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
         if (N == 0)
@@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
         NIsKill = true;
         continue;
       }
-      
+
       // N = N + Idx * ElementSize;
       uint64_t ElementSize = TD.getTypeAllocSize(Ty);
       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
@@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) {
       return true;
 
     const Value *Address = DI->getAddress();
-    if (!Address)
+    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
       return true;
-    if (isa<UndefValue>(Address))
-      return true;
-    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
-      // Building the map above is target independent.  Generating DBG_VALUE
-      // inline is target dependent; do this now.
-      (void)TargetSelectInstruction(cast<Instruction>(I));
+
+    unsigned Reg = 0;
+    unsigned Offset = 0;
+    if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+      if (Arg->hasByValAttr()) {
+        // Byval arguments' frame index is recorded during argument lowering.
+        // Use this info directly.
+        Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+        if (Offset)
+          Reg = TRI.getFrameRegister(*FuncInfo.MF);
+      }
+    }
+    if (!Reg)
+      Reg = getRegForValue(Address);
+
+    if (Reg)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(Reg, RegState::Debug).addImm(Offset)
+        .addMetadata(DI->getVariable());
     return true;
   }
   case Intrinsic::dbg_value: {
@@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) {
     } else {
       // We can't yet handle anything else here because it would require
       // generating code, thus altering codegen because of debug info.
-      // Insert an undef so we can see what we dropped.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
-        .addReg(0U).addImm(DI->getOffset())
-        .addMetadata(DI->getVariable());
-    }     
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+    }
     return true;
   }
   case Intrinsic::eh_exception: {
@@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) {
 bool FastISel::SelectCast(const User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
-    
+
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
-    
+
   // Check if the destination type is legal. Or as a special case,
   // it may be i1 if we're doing a truncate because that's
   // easy and somewhat common.
@@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
                                   InputReg, InputRegIsKill);
   if (!ResultReg)
     return false;
-    
+
   UpdateValueMap(I, ResultReg);
   return true;
 }
@@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) {
     return true;
   }
 
-  // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+  // Bitcasts of other values become reg-reg copies or BITCAST operators.
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
-  
+
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple() ||
       !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
     // Unhandled type. Halt "fast" selection and bail.
     return false;
-  
+
   unsigned Op0 = getRegForValue(I->getOperand(0));
   if (Op0 == 0)
     // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
-  
+
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
   if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
@@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) {
               ResultReg).addReg(Op0);
     }
   }
-  
-  // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+
+  // If the reg-reg copy failed, select a BITCAST opcode.
   if (!ResultReg)
     ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
-                           ISD::BIT_CONVERT, Op0, Op0IsKill);
-  
+                           ISD::BITCAST, Op0, Op0IsKill);
+
   if (!ResultReg)
     return false;
-  
+
   UpdateValueMap(I, ResultReg);
   return true;
 }
@@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) {
     return false;
 
   unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
-                               ISD::BIT_CONVERT, OpReg, OpRegIsKill);
+                               ISD::BITCAST, OpReg, OpRegIsKill);
   if (IntReg == 0)
     return false;
 
@@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) {
     return false;
 
   ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
-                         ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
+                         ISD::BITCAST, IntResultReg, /*Kill=*/true);
   if (ResultReg == 0)
     return false;
 
@@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
 
     // Dynamic-sized alloca is not handled yet.
     return false;
-    
+
   case Instruction::Call:
     return SelectCall(I);
-  
+
   case Instruction::BitCast:
     return SelectBitCast(I);
 
@@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT,
   return 0;
 }
 
-unsigned FastISel::FastEmit_rr(MVT, MVT, 
+unsigned FastISel::FastEmit_rr(MVT, MVT,
                                unsigned,
                                unsigned /*Op0*/, bool /*Op0IsKill*/,
                                unsigned /*Op1*/, bool /*Op1IsKill*/) {
@@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                   uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-  
+
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
   else {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5ef6404ee5d6..98582ba99f14 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 61c2a90e7edc..e309defba20f 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -31,11 +31,11 @@
 using namespace llvm;
 
 /// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
+/// operands first, then an optional chain, and optional glue operands (which do
 /// not go into the resulting MachineInstr).
 unsigned InstrEmitter::CountResults(SDNode *Node) {
   unsigned N = Node->getNumValues();
-  while (N && Node->getValueType(N - 1) == MVT::Flag)
+  while (N && Node->getValueType(N - 1) == MVT::Glue)
     --N;
   if (N && Node->getValueType(N - 1) == MVT::Other)
     --N;    // Skip over chain result.
@@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
 }
 
 /// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by an optional chain operand, then an optional flag operand.
+/// followed by an optional chain operand, then an optional glue operand.
 /// Compute the number of actual operands that will go into the resulting
 /// MachineInstr.
 unsigned InstrEmitter::CountOperands(SDNode *Node) {
   unsigned N = Node->getNumOperands();
-  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
     --N;
   if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
     --N; // Ignore chain if it exists.
@@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
     if (IsClone)
       VRBaseMap.erase(Op);
     bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
-    isNew = isNew; // Silence compiler warning.
+    (void)isNew; // Silence compiler warning.
     assert(isNew && "Node emitted out of order - early");
     return;
   }
@@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
           if (Op.getNode() != Node || Op.getResNo() != ResNo)
             continue;
           EVT VT = Node->getValueType(Op.getResNo());
-          if (VT == MVT::Other || VT == MVT::Flag)
+          if (VT == MVT::Other || VT == MVT::Glue)
             continue;
           Match = false;
           if (User->isMachineOpcode()) {
@@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   if (IsClone)
     VRBaseMap.erase(Op);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
     if (IsClone)
       VRBaseMap.erase(Op);
     bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
-    isNew = isNew; // Silence compiler warning.
+    (void)isNew; // Silence compiler warning.
     assert(isNew && "Node emitted out of order - early");
   }
 }
@@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
                                  bool IsDebug, bool IsClone, bool IsCloned) {
   assert(Op.getValueType() != MVT::Other &&
-         Op.getValueType() != MVT::Flag &&
-         "Chain and flag operands should occur at end of operand list!");
+         Op.getValueType() != MVT::Glue &&
+         "Chain and glue operands should occur at end of operand list!");
   // Get/emit the operand.
   unsigned VReg = getVR(Op, VRBaseMap);
   assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
@@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                                             BA->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
-           Op.getValueType() != MVT::Flag &&
-           "Chain and flag operands should occur at end of operand list!");
+           Op.getValueType() != MVT::Glue &&
+           "Chain and glue operands should occur at end of operand list!");
     AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
                        IsDebug, IsClone, IsCloned);
   }
@@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
 
     // Figure out the register class to create for the destreg.
     unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
-    const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
-    const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
-    assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
-
-    // Figure out the register class to create for the destreg.
-    // Note that if we're going to directly use an existing register,
-    // it must be precisely the required class, and not a subclass
-    // thereof.
-    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
-      // Create the reg
-      assert(SRC && "Couldn't find source register class");
-      VRBase = MRI->createVirtualRegister(SRC);
-    }
+    MachineInstr *DefMI = MRI->getVRegDef(VReg);
+    unsigned SrcReg, DstReg, DefSubIdx;
+    if (DefMI &&
+        TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+        SubIdx == DefSubIdx) {
+      // Optimize these:
+      // r1025 = s/zext r1024, 4
+      // r1026 = extract_subreg r1025, 4
+      // to a copy
+      // r1026 = copy r1024
+      const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+      VRBase = MRI->createVirtualRegister(TRC);
+      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+    } else {
+      const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+      const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+      assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+      // Figure out the register class to create for the destreg.
+      // Note that if we're going to directly use an existing register,
+      // it must be precisely the required class, and not a subclass
+      // thereof.
+      if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+        // Create the reg
+        assert(SRC && "Couldn't find source register class");
+        VRBase = MRI->createVirtualRegister(SRC);
+      }
 
-    // Create the extract_subreg machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                               TII->get(TargetOpcode::COPY), VRBase);
+      // Create the extract_subreg machine instruction.
+      MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                                 TII->get(TargetOpcode::COPY), VRBase);
 
-    // Add source, and subreg index
-    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
-               IsClone, IsCloned);
-    assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
-           "Cannot yet extract from physregs");
-    MI->getOperand(1).setSubReg(SubIdx);
-    MBB->insert(InsertPos, MI);
+      // Add source, and subreg index
+      AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
+      assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+             "Cannot yet extract from physregs");
+      MI->getOperand(1).setSubReg(SubIdx);
+      MBB->insert(InsertPos, MI);
+    }
   } else if (Opc == TargetOpcode::INSERT_SUBREG ||
              Opc == TargetOpcode::SUBREG_TO_REG) {
     SDValue N0 = Node->getOperand(0);
@@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
      
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
 
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
       const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
       const TargetRegisterClass *SRC =
         TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
-      if (!SRC)
-        llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
-      if (SRC != RC) {
+      if (SRC && SRC != RC) {
         MRI->setRegClass(NewVReg, SRC);
         RC = SRC;
       }
@@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   MBB->insert(InsertPos, MI);
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // The MachineInstr constructor adds implicit-def operands. Scan through
   // these to determine which are dead.
   if (MI->getNumOperands() != 0 &&
-      Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
     // First, collect all used registers.
     SmallVector<unsigned, 8> UsedRegs;
-    for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
       if (F->getOpcode() == ISD::CopyFromReg)
         UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
       else {
@@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
           if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
             unsigned Reg = R->getReg();
-            if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
               UsedRegs.push_back(Reg);
           }
       }
@@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // hook knows where in the block to insert the replacement code.
   MBB->insert(InsertPos, MI);
 
-  if (II.usesCustomInsertionHook()) {
-    // Insert this instruction into the basic block using a target
-    // specific inserter which may returns a new basic block.
-    bool AtEnd = InsertPos == MBB->end();
-    MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
-    if (NewMBB != MBB) {
-      if (AtEnd)
-        InsertPos = NewMBB->end();
-      MBB = NewMBB;
-    }
-    return;
-  }
-  
-  // Additional results must be an physical register def.
+  // Additional results must be physical register defs.
   if (HasPhysRegOuts) {
     for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
       unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
@@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
       // If there are no uses, mark the register as dead now, so that
       // MachineLICM/Sink can see that it's dead. Don't do this if the
-      // node has a Flag value, for the benefit of targets still using
-      // Flag for values in physregs.
-      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+      // node has a Glue value, for the benefit of targets still using
+      // Glue for values in physregs.
+      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
         MI->addRegisterDead(Reg, TRI);
     }
   }
   
   // If the instruction has implicit defs and the node doesn't, mark the
-  // implicit def as dead.  If the node has any flag outputs, we don't do this
-  // because we don't know what implicit defs are being used by flagged nodes.
-  if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+  // implicit def as dead.  If the node has any glue outputs, we don't do this
+  // because we don't know what implicit defs are being used by glued nodes.
+  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
     if (const unsigned *IDList = II.getImplicitDefs()) {
       for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
            i != e; ++i)
@@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       
   case ISD::INLINEASM: {
     unsigned NumOps = Node->getNumOperands();
-    if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
-      --NumOps;  // Ignore the flag operand.
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+      --NumOps;  // Ignore the glue operand.
       
     // Create the inline asm machine instruction.
     MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
     MI->addOperand(MachineOperand::CreateES(AsmStr));
       
-    // Add the isAlignStack bit.
-    int64_t isAlignStack =
-      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+    // Add the HasSideEffect and isAlignStack bits.
+    int64_t ExtraInfo =
+      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
                           getZExtValue();
-    MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
 
     // Add all of the operand registers to the instruction.
     for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2981cd3f1cab..49c862ce3e0b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -65,11 +66,6 @@ class SelectionDAGLegalize {
   /// against each other, including inserted libcalls.
   SDValue LastCALLSEQ_END;
 
-  /// IsLegalizingCall - This member is used *only* for purposes of providing
-  /// helpful assertions that a libcall isn't created while another call is
-  /// being legalized (which could lead to non-serialized call sequences).
-  bool IsLegalizingCall;
-
   enum LegalizeAction {
     Legal,      // The target natively supports this operation.
     Promote,    // This operation should be executed in a larger type.
@@ -91,6 +87,9 @@ class SelectionDAGLegalize {
     // If someone requests legalization of the new node, return itself.
     if (From != To)
       LegalizedNodes.insert(std::make_pair(To, To));
+    
+    // Transfer SDDbgValues.
+    DAG.TransferDbgValues(From, To);
   }
 
 public:
@@ -172,6 +171,7 @@ private:
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandInsertToVectorThroughStack(SDValue Op);
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
 
 void SelectionDAGLegalize::LegalizeDAG() {
   LastCALLSEQ_END = DAG.getEntryNode();
-  IsLegalizingCall = false;
 
   // The legalize process is inherently a bottom-up recursive process (users
   // legalize their uses before themselves).  Given infinite stack space, we
@@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() {
 
 /// FindCallEndFromCallStart - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node) {
-  if (Node->getOpcode() == ISD::CALLSEQ_END)
-    return Node;
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+  // Nested CALLSEQ_START/END constructs aren't yet legal,
+  // but we can DTRT and handle them correctly here.
+  if (Node->getOpcode() == ISD::CALLSEQ_START)
+    depth++;
+  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    depth--;
+    if (depth == 0)
+      return Node;
+  }
   if (Node->use_empty())
     return 0;   // No CallSeqEnd
 
@@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
     SDNode *User = *UI;
     for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
       if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User))
+        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
           return Result;
   }
   return 0;
@@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
 /// FindCallStartFromCallEnd - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_START node that initiates the call sequence.
 static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  int nested = 0;
   assert(Node && "Didn't find callseq_start for a call??");
-  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-
-  assert(Node->getOperand(0).getValueType() == MVT::Other &&
-         "Node doesn't have a token chain argument!");
-  return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+    Node = Node->getOperand(0).getNode();
+    assert(Node->getOperand(0).getValueType() == MVT::Other &&
+           "Node doesn't have a token chain argument!");
+    switch (Node->getOpcode()) {
+    default:
+      break;
+    case ISD::CALLSEQ_START:
+      if (!nested)
+        return Node;
+      nested--;
+      break;
+    case ISD::CALLSEQ_END:
+      nested++;
+      break;
+    }
+  }
+  return 0;
 }
 
 /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
   SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   if (Extend)
-    return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
                           DAG.getEntryNode(),
-                          CPIdx, PseudoSourceValue::getConstantPool(),
-                          0, VT, false, false, Alignment);
+                          CPIdx, MachinePointerInfo::getConstantPool(),
+                          VT, false, false, Alignment);
   return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     PseudoSourceValue::getConstantPool(), 0, false, false,
+                     MachinePointerInfo::getConstantPool(), false, false,
                      Alignment);
 }
 
@@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   SDValue Val = ST->getValue();
   EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
-  int SVOffset = ST->getSrcValueOffset();
   DebugLoc dl = ST->getDebugLoc();
   if (ST->getMemoryVT().isFloatingPoint() ||
       ST->getMemoryVT().isVector()) {
@@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Expand to a bitconvert of the value to the integer type of the
       // same size, then a (misaligned) int store.
       // FIXME: Does not handle truncating floating point stores!
-      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
-      return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
-                          SVOffset, ST->isVolatile(), ST->isNonTemporal(),
-                          Alignment);
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+                          ST->isVolatile(), ST->isNonTemporal(), Alignment);
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
@@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 
       // Perform the original store, only redirected to the stack slot.
       SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, NULL, 0, StoredVT,
-                                        false, false, 0);
+                                        Val, StackPtr, MachinePointerInfo(),
+                                        StoredVT, false, false, 0);
       SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
       SmallVector<SDValue, 8> Stores;
       unsigned Offset = 0;
@@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Do all but one copies using the full register width.
       for (unsigned i = 1; i < NumRegs; i++) {
         // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                   MachinePointerInfo(),
                                    false, false, 0);
         // Store it to the final location.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
-                                      ST->getSrcValue(), SVOffset + Offset,
+                                    ST->getPointerInfo().getWithOffset(Offset),
                                       ST->isVolatile(), ST->isNonTemporal(),
                                       MinAlign(ST->getAlignment(), Offset)));
         // Increment the pointers.
@@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                                     8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
-                                    NULL, 0, MemVT, false, false, 0);
+      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                    MachinePointerInfo(),
+                                    MemVT, false, false, 0);
 
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
-                                         ST->getSrcValue(), SVOffset + Offset,
+                                         ST->getPointerInfo()
+                                           .getWithOffset(Offset),
                                          MemVT, ST->isVolatile(),
                                          ST->isNonTemporal(),
                                          MinAlign(ST->getAlignment(), Offset)));
@@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   // Store the two parts
   SDValue Store1, Store2;
   Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
-                             ST->getSrcValue(), SVOffset, NewStoredVT,
+                             ST->getPointerInfo(), NewStoredVT,
                              ST->isVolatile(), ST->isNonTemporal(), Alignment);
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getConstant(IncrementSize, TLI.getPointerTy()));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
-                             ST->getSrcValue(), SVOffset + IncrementSize,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
                              NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
                              Alignment);
 
@@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 static
 SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                             const TargetLowering &TLI) {
-  int SVOffset = LD->getSrcValueOffset();
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0);
@@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     if (TLI.isTypeLegal(intVT)) {
       // Expand to a (misaligned) integer load of the same size,
       // then bitconvert to floating point or vector.
-      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
-                                    SVOffset, LD->isVolatile(),
+      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                                    LD->isVolatile(),
                                     LD->isNonTemporal(), LD->getAlignment());
-      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
 
       SDValue Ops[] = { Result, Chain };
       return DAG.getMergeValues(Ops, 2, dl);
-    } else {
-      // Copy the value to a (aligned) stack slot using (unaligned) integer
-      // loads and stores, then do a (aligned) load from the stack slot.
-      EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
-      unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
-      unsigned RegBytes = RegVT.getSizeInBits() / 8;
-      unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
-      // Make sure the stack slot is also aligned for the register type.
-      SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
-      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
-      SmallVector<SDValue, 8> Stores;
-      SDValue StackPtr = StackBase;
-      unsigned Offset = 0;
-
-      // Do all but one copies using the full register width.
-      for (unsigned i = 1; i < NumRegs; i++) {
-        // Load one integer register's worth from the original location.
-        SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
-                                   SVOffset + Offset, LD->isVolatile(),
-                                   LD->isNonTemporal(),
-                                   MinAlign(LD->getAlignment(), Offset));
-        // Follow the load with a store to the stack slot.  Remember the store.
-        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
-                                      NULL, 0, false, false, 0));
-        // Increment the pointers.
-        Offset += RegBytes;
-        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
-        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                               Increment);
-      }
+    }
 
-      // The last copy may be partial.  Do an extending load.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
-                                    8 * (LoadedBytes - Offset));
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
-                                    LD->getSrcValue(), SVOffset + Offset,
-                                    MemVT, LD->isVolatile(),
-                                    LD->isNonTemporal(),
-                                    MinAlign(LD->getAlignment(), Offset));
+    // Copy the value to a (aligned) stack slot using (unaligned) integer
+    // loads and stores, then do a (aligned) load from the stack slot.
+    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+    unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    SDValue StackPtr = StackBase;
+    unsigned Offset = 0;
+
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the original location.
+      SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+                                 LD->getPointerInfo().getWithOffset(Offset),
+                                 LD->isVolatile(), LD->isNonTemporal(),
+                                 MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
-      // On big-endian machines this requires a truncating store to ensure
-      // that the bits end up in the right place.
-      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
-                                         NULL, 0, MemVT, false, false, 0));
-
-      // The order of the stores doesn't matter - say it with a TokenFactor.
-      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                               Stores.size());
-
-      // Finally, perform the original load only redirected to the stack slot.
-      Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
-                            NULL, 0, LoadedVT, false, false, 0);
-
-      // Callers expect a MERGE_VALUES node.
-      SDValue Ops[] = { Load, TF };
-      return DAG.getMergeValues(Ops, 2, dl);
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+                                    MachinePointerInfo(), false, false, 0));
+      // Increment the pointers.
+      Offset += RegBytes;
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
     }
+
+    // The last copy may be partial.  Do an extending load.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (LoadedBytes - Offset));
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+                                  LD->getPointerInfo().getWithOffset(Offset),
+                                  MemVT, LD->isVolatile(),
+                                  LD->isNonTemporal(),
+                                  MinAlign(LD->getAlignment(), Offset));
+    // Follow the load with a store to the stack slot.  Remember the store.
+    // On big-endian machines this requires a truncating store to ensure
+    // that the bits end up in the right place.
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+                                       MachinePointerInfo(), MemVT,
+                                       false, false, 0));
+
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                             Stores.size());
+
+    // Finally, perform the original load only redirected to the stack slot.
+    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+                          MachinePointerInfo(), LoadedVT, false, false, 0);
+
+    // Callers expect a MERGE_VALUES node.
+    SDValue Ops[] = { Load, TF };
+    return DAG.getMergeValues(Ops, 2, dl);
   }
   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
          "Unaligned load of unsupported type.");
@@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   // Load the value in two parts
   SDValue Lo, Hi;
   if (TLI.isLittleEndian()) {
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(),
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
   } else {
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(),
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
   }
 
@@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
 
   // Store the vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
-                            PseudoSourceValue::getFixedStack(SPFI), 0,
+                            MachinePointerInfo::getFixedStack(SPFI),
                             false, false, 0);
 
   // Truncate or zero extend offset to target pointer type.
@@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
-  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
-                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
                          false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0,
-                     false, false, 0);
+                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
 }
 
 
@@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   SDValue Tmp1 = ST->getChain();
   SDValue Tmp2 = ST->getBasePtr();
   SDValue Tmp3;
-  int SVOffset = ST->getSrcValueOffset();
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
   bool isNonTemporal = ST->isNonTemporal();
@@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
       Tmp3 = DAG.getConstant(CFP->getValueAPF().
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
-      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, isNonTemporal, Alignment);
-    } else if (CFP->getValueType(0) == MVT::f64) {
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                          isVolatile, isNonTemporal, Alignment);
+    }
+
+    if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (getTypeAction(MVT::i64) == Legal) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
-        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                            SVOffset, isVolatile, isNonTemporal, Alignment);
-      } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                            isVolatile, isNonTemporal, Alignment);
+      }
+
+      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
         // xform is certainly not worth it.
         const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
-        SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
         SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
-        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, isNonTemporal, Alignment);
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+                          isNonTemporal, Alignment);
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(4));
-        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+                          ST->getPointerInfo().getWithOffset(4),
                           isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
 
         return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
@@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   bool isCustom = false;
 
   // Figure out the correct action; the way to query this varies by opcode
-  TargetLowering::LegalizeAction Action;
+  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
   bool SimpleFinishLegalizing = true;
   switch (Node->getOpcode()) {
   case ISD::INTRINSIC_W_CHAIN:
@@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_SJLJ_SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     }
     break;
   case ISD::CALLSEQ_START: {
+    static int depth = 0;
     SDNode *CallEnd = FindCallEndFromCallStart(Node);
 
     // Recursively Legalize all of the inputs of the call end that do not lead
@@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     // Merge in the last call to ensure that this call starts after the last
     // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
       Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                          Tmp1, LastCALLSEQ_END);
       Tmp1 = LegalizeOp(Tmp1);
@@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // sequence have been legalized, legalize the call itself.  During this
     // process, no libcalls can/will be inserted, guaranteeing that no calls
     // can overlap.
-    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+
+    SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
     // Note that we are selecting this call!
     LastCALLSEQ_END = SDValue(CallEnd, 0);
-    IsLegalizingCall = true;
 
+    depth++;
     // Legalize the call, starting from the CALLSEQ_END.
     LegalizeOp(LastCALLSEQ_END);
-    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    depth--;
+    assert(depth >= 0 && "Un-matched CALLSEQ_START?");
+    if (depth > 0)
+      LastCALLSEQ_END = Saved_LastCALLSEQ_END;
     return Result;
   }
   case ISD::CALLSEQ_END:
@@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
     // Do not try to legalize the target-specific arguments (#1+), except for
     // an optional flag input.
-    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
       if (Tmp1 != Node->getOperand(0)) {
         SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
         Ops[0] = Tmp1;
@@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                          Result.getResNo());
       }
     }
-    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
     // This finishes up call legalization.
-    IsLegalizingCall = false;
-
     // If the CALLSEQ_END node has a flag, remember that we legalized it.
     AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
     if (Node->getNumValues() == 2)
@@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Change base type to a different vector type.
         EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 
-        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
-                           LD->getSrcValueOffset(),
+        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
                            LD->isVolatile(), LD->isNonTemporal(),
                            LD->getAlignment());
-        Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
         Tmp4 = LegalizeOp(Tmp1.getValue(1));
         break;
       }
@@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       AddLegalizedOperand(SDValue(Node, 0), Tmp3);
       AddLegalizedOperand(SDValue(Node, 1), Tmp4);
       return Op.getResNo() ? Tmp4 : Tmp3;
-    } else {
-      EVT SrcVT = LD->getMemoryVT();
-      unsigned SrcWidth = SrcVT.getSizeInBits();
-      int SVOffset = LD->getSrcValueOffset();
-      unsigned Alignment = LD->getAlignment();
-      bool isVolatile = LD->isVolatile();
-      bool isNonTemporal = LD->isNonTemporal();
-
-      if (SrcWidth != SrcVT.getStoreSizeInBits() &&
-          // Some targets pretend to have an i1 loading operation, and actually
-          // load an i8.  This trick is correct for ZEXTLOAD because the top 7
-          // bits are guaranteed to be zero; it helps the optimizers understand
-          // that these bits are zero.  It is also useful for EXTLOAD, since it
-          // tells the optimizers that those bits are undefined.  It would be
-          // nice to have an effective generic way of getting these benefits...
-          // Until such a way is found, don't insist on promoting i1 here.
-          (SrcVT != MVT::i1 ||
-           TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
-        // Promote to a byte-sized load if not loading an integral number of
-        // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
-        unsigned NewWidth = SrcVT.getStoreSizeInBits();
-        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
-        SDValue Ch;
-
-        // The extra bits are guaranteed to be zero, since we stored them that
-        // way.  A zext load from NVT thus automatically gives zext from SrcVT.
-
-        ISD::LoadExtType NewExtType =
-          ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
-        Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
-                                Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
-                                NVT, isVolatile, isNonTemporal, Alignment);
-
-        Ch = Result.getValue(1); // The chain.
-
-        if (ExtType == ISD::SEXTLOAD)
-          // Having the top bits zero doesn't help when sign extending.
-          Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
-                               Result.getValueType(),
-                               Result, DAG.getValueType(SrcVT));
-        else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
-          // All the top bits are guaranteed to be zero - inform the optimizers.
-          Result = DAG.getNode(ISD::AssertZext, dl,
-                               Result.getValueType(), Result,
-                               DAG.getValueType(SrcVT));
-
-        Tmp1 = LegalizeOp(Result);
-        Tmp2 = LegalizeOp(Ch);
-      } else if (SrcWidth & (SrcWidth - 1)) {
-        // If not loading a power-of-2 number of bits, expand as two loads.
-        assert(!SrcVT.isVector() && "Unsupported extload!");
-        unsigned RoundWidth = 1 << Log2_32(SrcWidth);
-        assert(RoundWidth < SrcWidth);
-        unsigned ExtraWidth = SrcWidth - RoundWidth;
-        assert(ExtraWidth < RoundWidth);
-        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
-               "Load size not an integral number of bytes!");
-        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
-        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
-        SDValue Lo, Hi, Ch;
-        unsigned IncrementSize;
+    }
 
-        if (TLI.isLittleEndian()) {
-          // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
-          // Load the bottom RoundWidth bits.
-          Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
-                              Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              isNonTemporal, Alignment);
-
-          // Load the remaining ExtraWidth bits.
-          IncrementSize = RoundWidth / 8;
-          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                             DAG.getIntPtrConstant(IncrementSize));
-          Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile, isNonTemporal,
-                              MinAlign(Alignment, IncrementSize));
-
-          // Build a factor node to remember that this load is independent of
-          // the other one.
-          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
-                           Hi.getValue(1));
-
-          // Move the top bits to the right place.
-          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+    EVT SrcVT = LD->getMemoryVT();
+    unsigned SrcWidth = SrcVT.getSizeInBits();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+    bool isNonTemporal = LD->isNonTemporal();
+
+    if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+        // Some targets pretend to have an i1 loading operation, and actually
+        // load an i8.  This trick is correct for ZEXTLOAD because the top 7
+        // bits are guaranteed to be zero; it helps the optimizers understand
+        // that these bits are zero.  It is also useful for EXTLOAD, since it
+        // tells the optimizers that those bits are undefined.  It would be
+        // nice to have an effective generic way of getting these benefits...
+        // Until such a way is found, don't insist on promoting i1 here.
+        (SrcVT != MVT::i1 ||
+         TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+      // Promote to a byte-sized load if not loading an integral number of
+      // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+      unsigned NewWidth = SrcVT.getStoreSizeInBits();
+      EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+      SDValue Ch;
+
+      // The extra bits are guaranteed to be zero, since we stored them that
+      // way.  A zext load from NVT thus automatically gives zext from SrcVT.
+
+      ISD::LoadExtType NewExtType =
+        ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                              Tmp1, Tmp2, LD->getPointerInfo(),
+                              NVT, isVolatile, isNonTemporal, Alignment);
+
+      Ch = Result.getValue(1); // The chain.
+
+      if (ExtType == ISD::SEXTLOAD)
+        // Having the top bits zero doesn't help when sign extending.
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                             Result.getValueType(),
+                             Result, DAG.getValueType(SrcVT));
+      else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+        // All the top bits are guaranteed to be zero - inform the optimizers.
+        Result = DAG.getNode(ISD::AssertZext, dl,
+                             Result.getValueType(), Result,
+                             DAG.getValueType(SrcVT));
+
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else if (SrcWidth & (SrcWidth - 1)) {
+      // If not loading a power-of-2 number of bits, expand as two loads.
+      assert(!SrcVT.isVector() && "Unsupported extload!");
+      unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+      assert(RoundWidth < SrcWidth);
+      unsigned ExtraWidth = SrcWidth - RoundWidth;
+      assert(ExtraWidth < RoundWidth);
+      assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+             "Load size not an integral number of bytes!");
+      EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+      EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+      SDValue Lo, Hi, Ch;
+      unsigned IncrementSize;
+
+      if (TLI.isLittleEndian()) {
+        // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+        // Load the bottom RoundWidth bits.
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+                            Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      } else {
+        // Big endian - avoid unaligned loads.
+        // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+        // Load the top RoundWidth bits.
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+                            dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      }
 
-          // Join the hi and lo parts.
-          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else {
+      switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                Tmp1, Tmp2, LD->getOffset()),
+                         Result.getResNo());
+        Tmp1 = Result.getValue(0);
+        Tmp2 = Result.getValue(1);
+
+        if (isCustom) {
+          Tmp3 = TLI.LowerOperation(Result, DAG);
+          if (Tmp3.getNode()) {
+            Tmp1 = LegalizeOp(Tmp3);
+            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+          }
         } else {
-          // Big endian - avoid unaligned loads.
-          // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
-          // Load the top RoundWidth bits.
-          Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              isNonTemporal, Alignment);
-
-          // Load the remaining ExtraWidth bits.
-          IncrementSize = RoundWidth / 8;
-          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                             DAG.getIntPtrConstant(IncrementSize));
-          Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
-                              Node->getValueType(0), dl, Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile, isNonTemporal,
-                              MinAlign(Alignment, IncrementSize));
-
-          // Build a factor node to remember that this load is independent of
-          // the other one.
-          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
-                           Hi.getValue(1));
-
-          // Move the top bits to the right place.
-          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-
-          // Join the hi and lo parts.
-          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
-        }
-
-        Tmp1 = LegalizeOp(Result);
-        Tmp2 = LegalizeOp(Ch);
-      } else {
-        switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: assert(0 && "This action is not supported yet!");
-        case TargetLowering::Custom:
-          isCustom = true;
-          // FALLTHROUGH
-        case TargetLowering::Legal:
-          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                  Tmp1, Tmp2, LD->getOffset()),
-                           Result.getResNo());
-          Tmp1 = Result.getValue(0);
-          Tmp2 = Result.getValue(1);
-
-          if (isCustom) {
-            Tmp3 = TLI.LowerOperation(Result, DAG);
-            if (Tmp3.getNode()) {
-              Tmp1 = LegalizeOp(Tmp3);
-              Tmp2 = LegalizeOp(Tmp3.getValue(1));
-            }
-          } else {
-            // If this is an unaligned load and the target doesn't support it,
-            // expand it.
-            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-              const Type *Ty =
-                LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-              unsigned ABIAlignment =
-                TLI.getTargetData()->getABITypeAlignment(Ty);
-              if (LD->getAlignment() < ABIAlignment){
-                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
-                                             DAG, TLI);
-                Tmp1 = Result.getOperand(0);
-                Tmp2 = Result.getOperand(1);
-                Tmp1 = LegalizeOp(Tmp1);
-                Tmp2 = LegalizeOp(Tmp2);
-              }
+          // If this is an unaligned load and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+            const Type *Ty =
+              LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment =
+              TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (LD->getAlignment() < ABIAlignment){
+              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+                                           DAG, TLI);
+              Tmp1 = Result.getOperand(0);
+              Tmp2 = Result.getOperand(1);
+              Tmp1 = LegalizeOp(Tmp1);
+              Tmp2 = LegalizeOp(Tmp2);
             }
           }
-          break;
-        case TargetLowering::Expand:
-          if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
-            SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
-                                       LD->getSrcValueOffset(),
-                                       LD->isVolatile(), LD->isNonTemporal(),
-                                       LD->getAlignment());
-            unsigned ExtendOp;
-            switch (ExtType) {
-            case ISD::EXTLOAD:
-              ExtendOp = (SrcVT.isFloatingPoint() ?
-                          ISD::FP_EXTEND : ISD::ANY_EXTEND);
-              break;
-            case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
-            case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
-            default: llvm_unreachable("Unexpected extend load type!");
-            }
-            Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
-            Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
-            Tmp2 = LegalizeOp(Load.getValue(1));
+        }
+        break;
+      case TargetLowering::Expand:
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+          SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+                                     LD->getPointerInfo(),
+                                     LD->isVolatile(), LD->isNonTemporal(),
+                                     LD->getAlignment());
+          unsigned ExtendOp;
+          switch (ExtType) {
+          case ISD::EXTLOAD:
+            ExtendOp = (SrcVT.isFloatingPoint() ?
+                        ISD::FP_EXTEND : ISD::ANY_EXTEND);
             break;
+          case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+          case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+          default: llvm_unreachable("Unexpected extend load type!");
           }
-          // FIXME: This does not work for vectors on most targets.  Sign- and
-          // zero-extend operations are currently folded into extending loads,
-          // whether they are legal or not, and then we end up here without any
-          // support for legalizing them.
-          assert(ExtType != ISD::EXTLOAD &&
-                 "EXTLOAD should always be supported!");
-          // Turn the unsupported load into an EXTLOAD followed by an explicit
-          // zero/sign extend inreg.
-          Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
-                                  Tmp1, Tmp2, LD->getSrcValue(),
-                                  LD->getSrcValueOffset(), SrcVT,
-                                  LD->isVolatile(), LD->isNonTemporal(),
-                                  LD->getAlignment());
-          SDValue ValRes;
-          if (ExtType == ISD::SEXTLOAD)
-            ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
-                                 Result.getValueType(),
-                                 Result, DAG.getValueType(SrcVT));
-          else
-            ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
-          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+        // FIXME: This does not work for vectors on most targets.  Sign- and
+        // zero-extend operations are currently folded into extending loads,
+        // whether they are legal or not, and then we end up here without any
+        // support for legalizing them.
+        assert(ExtType != ISD::EXTLOAD &&
+               "EXTLOAD should always be supported!");
+        // Turn the unsupported load into an EXTLOAD followed by an explicit
+        // zero/sign extend inreg.
+        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+                                LD->isVolatile(), LD->isNonTemporal(),
+                                LD->getAlignment());
+        SDValue ValRes;
+        if (ExtType == ISD::SEXTLOAD)
+          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                               Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else
+          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        break;
       }
-
-      // Since loads produce two values, make sure to remember that we legalized
-      // both of them.
-      AddLegalizedOperand(SDValue(Node, 0), Tmp1);
-      AddLegalizedOperand(SDValue(Node, 1), Tmp2);
-      return Op.getResNo() ? Tmp2 : Tmp1;
     }
+
+    // Since loads produce two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+    AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+    return Op.getResNo() ? Tmp2 : Tmp1;
   }
   case ISD::STORE: {
     StoreSDNode *ST = cast<StoreSDNode>(Node);
     Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
     Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
-    int SVOffset = ST->getSrcValueOffset();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
     bool isNonTemporal = ST->isNonTemporal();
@@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           break;
         case TargetLowering::Promote:
           assert(VT.isVector() && "Unknown legal promote case!");
-          Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+          Tmp3 = DAG.getNode(ISD::BITCAST, dl,
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getSrcValue(), SVOffset, isVolatile,
+                                ST->getPointerInfo(), isVolatile,
                                 isNonTemporal, Alignment);
           break;
         }
@@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
                                     StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
-        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                   SVOffset, NVT, isVolatile, isNonTemporal,
-                                   Alignment);
+        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                   NVT, isVolatile, isNonTemporal, Alignment);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         if (TLI.isLittleEndian()) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
           // Store the bottom RoundWidth bits.
-          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT,
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                 RoundVT,
                                  isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
@@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
-          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset + IncrementSize, ExtraVT, isVolatile,
-                                 isNonTemporal,
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         } else {
           // Big endian - avoid unaligned stores.
@@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT, isVolatile, isNonTemporal,
-                                 Alignment);
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+                                 RoundVT, isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                              DAG.getIntPtrConstant(IncrementSize));
-          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                 SVOffset + IncrementSize, ExtraVT, isVolatile,
-                                 isNonTemporal,
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+                              ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         }
 
@@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, isNonTemporal,
-                                Alignment);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
           break;
         }
       }
@@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   DebugLoc dl = Op.getDebugLoc();
   // Store the value to a temporary stack slot, then LOAD the returned part.
   SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
-                            false, false, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
 
   // Add the offset to the index.
   unsigned EltSize =
@@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
                        false, false, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                        MachinePointerInfo(),
+                        Vec.getValueType().getVectorElementType(),
+                        false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+  SDValue Vec  = Op.getOperand(0);
+  SDValue Part = Op.getOperand(1);
+  SDValue Idx  = Op.getOperand(2);
+  DebugLoc dl  = Op.getDebugLoc();
+
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // First store the whole vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+                            false, false, 0);
+
+  // Then store the inserted part.
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
   else
-    return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
-                          NULL, 0, Vec.getValueType().getVectorElementType(),
-                          false, false, 0);
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                    StackPtr);
+
+  // Store the subvector.
+  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+                    MachinePointerInfo(), false, false, 0);
+
+  // Finally, load the updated vector.
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+                     false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
 
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
@@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     // element type, only store the bits necessary.
     if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
       Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
-                                         Node->getOperand(i), Idx, SV, Offset,
+                                         Node->getOperand(i), Idx,
+                                         PtrInfo.getWithOffset(Offset),
                                          EltVT, false, false, 0));
     } else
       Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
-                                    Node->getOperand(i), Idx, SV, Offset,
+                                    Node->getOperand(i), Idx,
+                                    PtrInfo.getWithOffset(Offset),
                                     false, false, 0));
   }
 
@@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
   if (isTypeLegal(IVT)) {
     // Convert to an integer with the same sign bit.
-    SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
   } else {
     // Store the float to memory, then load the sign part out as an integer.
     MVT LoadTy = TLI.getPointerTy();
@@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
     SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
     // Then store the float to it.
     SDValue Ch =
-      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
                    false, false, 0);
     if (TLI.isBigEndian()) {
       assert(FloatVT.isByteSized() && "Unsupported floating point type!");
       // Load out a legal integer with the same sign bit as the float.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+                            false, false, 0);
     } else { // Little endian
       SDValue LoadPtr = StackPtr;
       // The float may be wider than the integer we are going to load.  Advance
@@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
                             LoadPtr, DAG.getIntPtrConstant(ByteOffset));
       // Load a legal integer containing the sign bit.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+                            false, false, 0);
       // Move the sign bit to the top bit of the loaded integer.
       unsigned BitShift = LoadTy.getSizeInBits() -
         (FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Chain = SP.getValue(1);
   unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   if (Align > StackAlign)
     SP = DAG.getNode(ISD::AND, dl, VT, SP,
                       DAG.getConstant(-(uint64_t)Align, VT));
@@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
   int SPFI = StackPtrFI->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
 
   unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   if (SrcSize > SlotSize)
     Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                              SV, 0, SlotVT, false, false, SrcAlign);
+                              PtrInfo, SlotVT, false, false, SrcAlign);
   else {
     assert(SrcSize == SlotSize && "Invalid store");
     Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                         SV, 0, false, false, SrcAlign);
+                         PtrInfo, false, false, SrcAlign);
   }
 
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
-    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
-                       DestAlign);
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+                       false, false, DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
-  return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
-                        false, false, DestAlign);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+                        PtrInfo, SlotVT, false, false, DestAlign);
 }
 
 SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
 
   SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
                                  StackPtr,
-                                 PseudoSourceValue::getFixedStack(SPFI), 0,
+                                 MachinePointerInfo::getFixedStack(SPFI),
                                  Node->getValueType(0).getVectorElementType(),
                                  false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     MachinePointerInfo::getFixedStack(SPFI),
                      false, false, 0);
 }
 
@@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                       PseudoSourceValue::getConstantPool(), 0,
+                       MachinePointerInfo::getConstantPool(),
                        false, false, Alignment);
   }
 
@@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 // and leave the Hi part unset.
 SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   // The input chain to this libcall is the entry node of the function.
   // Legalizing the call will automatically add the previous call to the
   // dependence.
@@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
 
   // Splice the libcall in wherever FindInputOutputChains tells us to.
   const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, TLI.getLibcallCallingConv(LC), false,
+                    0, TLI.getLibcallCallingConv(LC), isTailCall,
                     /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
   // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue>
 SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                          SDNode *Node,
                                          bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   SDValue InChain = Node->getOperand(0);
 
   TargetLowering::ArgListTy Args;
@@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
   const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, TLI.getLibcallCallingConv(LC), false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
                     /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
@@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     }
     // store the lo of the constructed double - based on integer input
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
-                                  Op0Mapped, Lo, NULL, 0,
+                                  Op0Mapped, Lo, MachinePointerInfo(),
                                   false, false, 0);
     // initial hi portion of constructed double
     SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
     // store the hi of the constructed double - biased exponent
-    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
-                                false, false, 0);
+    SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
     // load the constructed double
-    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
-                               false, false, 0);
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+                               MachinePointerInfo(), false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                              DAG.getConstant(32, MVT::i64));
     SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
     SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
-    SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
-    SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+    SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
     SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
                                 TwoP84PlusTwoP52);
     return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
   }
 
-  // Implementation of unsigned i64 to f32.  This implementation has the
-  // advantage of performing rounding correctly.
+  // Implementation of unsigned i64 to f32.
   // TODO: Generalize this for use with other types.
   if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+    // For unsigned conversions, convert them to signed conversions using the
+    // algorithm from the x86_64 __floatundidf in compiler_rt.
+    if (!isSigned) {
+      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+      SDValue AndConst = DAG.getConstant(1, MVT::i64);
+      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+      // TODO: This really should be implemented using a branch rather than a
+      // select.  We happen to get lucky and machinesink does the right
+      // thing most of the time.  This would be a good candidate for a
+      //pseudo-op, or, even better, for whole-function isel.
+      SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+    }
+
+    // Otherwise, implement the fully general conversion.
     EVT SHVT = TLI.getShiftAmountTy();
 
     SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
@@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
     SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
                    Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
-                    ISD::SETUGE);
+                   ISD::SETUGE);
     SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
@@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
     return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
                        DAG.getIntPtrConstant(0));
-
   }
 
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
@@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   SDValue FudgeInReg;
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
-                             PseudoSourceValue::getConstantPool(), 0,
+                             MachinePointerInfo::getConstantPool(),
                              false, false, Alignment);
   else {
     FudgeInReg =
-      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                 DAG.getEntryNode(), CPIdx,
-                                PseudoSourceValue::getConstantPool(), 0,
+                                MachinePointerInfo::getConstantPool(),
                                 MVT::f32, false, false, Alignment));
   }
 
@@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   }
 }
 
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
 /// ExpandBitCount - Expand the specified bitcount instruction into operations.
 ///
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
   switch (Opc) {
   default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
-    static const uint64_t mask[6] = {
-      0x5555555555555555ULL, 0x3333333333333333ULL,
-      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
-      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
-    };
     EVT VT = Op.getValueType();
     EVT ShVT = TLI.getShiftAmountTy();
-    unsigned len = VT.getSizeInBits();
-    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
-      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
-      unsigned EltSize = VT.isVector() ?
-        VT.getVectorElementType().getSizeInBits() : len;
-      SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
-      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
-      Op = DAG.getNode(ISD::ADD, dl, VT,
-                       DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
-                       DAG.getNode(ISD::AND, dl, VT,
-                                   DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
-                                   Tmp2));
-    }
+    unsigned Len = VT.getSizeInBits();
+
+    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+           "CTPOP not implemented for this type.");
+
+    // This is the "best" algorithm from
+    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+    // v = v - ((v >> 1) & 0x55555555...)
+    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(1, ShVT)),
+                                 Mask55));
+    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+    Op = DAG.getNode(ISD::ADD, dl, VT,
+                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(2, ShVT)),
+                                 Mask33));
+    // v = (v + (v >> 4)) & 0x0F0F0F0F...
+    Op = DAG.getNode(ISD::AND, dl, VT,
+                     DAG.getNode(ISD::ADD, dl, VT, Op,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(4, ShVT))),
+                     Mask0F);
+    // v = (v * 0x01010101...) >> (Len - 8)
+    Op = DAG.getNode(ISD::SRL, dl, VT,
+                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+                     DAG.getConstant(Len - 8, ShVT));
+    
     return Op;
   }
   case ISD::CTLZ: {
@@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::PREFETCH:
   case ISD::VAEND:
   case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
+    // If the target didn't expand these, there's nothing to do, so just
+    // preserve the chain and be done.
     Results.push_back(Node->getOperand(0));
     break;
   case ISD::EH_SJLJ_SETJMP:
+    // If the target didn't expand this, just return 'zero' and preserve the
+    // chain.
     Results.push_back(DAG.getConstant(0, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
@@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
-                      false, false, false, false, 0, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
                       /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("__sync_synchronize",
                                             TLI.getPointerTy()),
@@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   // By default, atomic intrinsics are marked Legal and lowered. Targets
   // which don't support them directly, however, may want libcalls, in which
   // case they mark them Expand, and we get here.
-  // FIXME: Unimplemented for now. Add libcalls.
   case ISD::ATOMIC_SWAP:
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
@@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
-                      false, false, false, false, 0, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
                       /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
@@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::FP_ROUND:
-  case ISD::BIT_CONVERT:
+  case ISD::BITCAST:
     Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
                             Node->getValueType(0), dl);
     Results.push_back(Tmp1);
@@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue True, False;
     EVT VT =  Node->getOperand(0).getValueType();
     EVT NVT = Node->getValueType(0);
-    const uint64_t zero[] = {0, 0};
-    APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+    APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
     (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
     Tmp1 = DAG.getConstantFP(apf, VT);
@@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Tmp2 = Node->getOperand(1);
     unsigned Align = Node->getConstantOperandVal(3);
 
-    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
-                                     false, false, 0);
+    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+                                     MachinePointerInfo(V), false, false, 0);
     SDValue VAList = VAListLoad;
 
     if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                            TLI.getPointerTy()));
 
       VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
-                           DAG.getConstant(-Align,
+                           DAG.getConstant(-(int64_t)Align,
                                            TLI.getPointerTy()));
     }
 
@@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                           getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
-    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
-                        false, false, 0);
+    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+                        MachinePointerInfo(V), false, false, 0);
     // Load the actual argument out of the pointer VAList
-    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
                                   false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
@@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
-                       Node->getOperand(2), VS, 0, false, false, 0);
-    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
-                        false, false, 0);
+                       Node->getOperand(2), MachinePointerInfo(VS),
+                       false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+                        MachinePointerInfo(VD), false, false, 0);
     Results.push_back(Tmp1);
     break;
   }
   case ISD::EXTRACT_VECTOR_ELT:
     if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
       // This must be an access of the only element.  Return it.
-      Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+      Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
                          Node->getOperand(0));
     else
       Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::EXTRACT_SUBVECTOR:
     Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
     break;
+  case ISD::INSERT_SUBVECTOR:
+    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+    break;
   case ISD::CONCAT_VECTORS: {
     Results.push_back(ExpandVectorBuildThroughStack(Node));
     break;
@@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else {
-      // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
-                                               VT.getSizeInBits() * 2)) &&
-             "Don't know how to expand this operation yet!");
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                                 VT.getSizeInBits() * 2))) {
       EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
                             DAG.getIntPtrConstant(1));
+    } else {
+      // We can fall back to a libcall with an illegal type for the MUL if we
+      // have a libcall big enough.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      if (WideVT == MVT::i16)
+        LC = RTLIB::MUL_I16;
+      else if (WideVT == MVT::i32)
+        LC = RTLIB::MUL_I32;
+      else if (WideVT == MVT::i64)
+        LC = RTLIB::MUL_I64;
+      else if (WideVT == MVT::i128)
+        LC = RTLIB::MUL_I128;
+      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      
+      SDValue Ret = ExpandLibCall(LC, Node, isSigned);
+      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
+      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
+                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
+      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
-                                PseudoSourceValue::getJumpTable(), 0, MemVT,
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+                                MachinePointerInfo::getJumpTable(), MemVT,
                                 false, false, 0);
     Addr = LD;
     if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
   case ISD::XOR: {
     unsigned ExtOp, TruncOp;
     if (OVT.isVector()) {
-      ExtOp   = ISD::BIT_CONVERT;
-      TruncOp = ISD::BIT_CONVERT;
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
     } else {
       assert(OVT.isInteger() && "Cannot promote logic operation");
       ExtOp   = ISD::ANY_EXTEND;
@@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
   case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
     if (Node->getValueType(0).isVector()) {
-      ExtOp   = ISD::BIT_CONVERT;
-      TruncOp = ISD::BIT_CONVERT;
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
     } else if (Node->getValueType(0).isInteger()) {
       ExtOp   = ISD::ANY_EXTEND;
       TruncOp = ISD::TRUNCATE;
@@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
 
     // Cast the two input vectors.
-    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
-    Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
 
     // Convert the shuffle mask to the right # elements.
     Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
-    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
     Results.push_back(Tmp1);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 650ee5a0721c..27752123aac4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to soften the result of this operator!");
 
-    case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;
     case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
     case ISD::ConstantFP:
       R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
@@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     SetSoftenedFloat(SDValue(N, ResNo), R);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
   return BitConvertToInteger(N->getOperand(0));
 }
 
@@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
   unsigned Size = NVT.getSizeInBits();
 
   // Mask = ~(1 << (Size-1))
-  SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
-                                 NVT);
+  APInt API = APInt::getAllOnesValue(Size);
+  API.clearBit(Size-1);
+  SDValue Mask = DAG.getConstant(API, NVT);
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
 }
@@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
                        NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
-                       L->getSrcValue(), L->getSrcValueOffset(), NVT,
+                       L->getPointerInfo(), NVT,
                        L->isVolatile(), L->isNonTemporal(), L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
@@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   // Do a non-extending load followed by FP_EXTEND.
   NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
                      L->getMemoryVT(), dl, L->getChain(),
-                     L->getBasePtr(), L->getOffset(),
-                     L->getSrcValue(), L->getSrcValueOffset(),
+                     L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
                      L->getMemoryVT(), L->isVolatile(),
                      L->isNonTemporal(), L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
@@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
 #endif
     llvm_unreachable("Do not know how to soften this operator's operand!");
 
-  case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;
   case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
   }
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
                      GetSoftenedFloat(N->getOperand(0)));
 }
 
@@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
     Val = GetSoftenedFloat(Val);
 
   return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
-                      ST->getSrcValue(), ST->getSrcValueOffset(),
+                      ST->getPointerInfo(),
                       ST->isVolatile(), ST->isNonTemporal(),
                       ST->getAlignment());
 }
@@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
 
-  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
   case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
   case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
-  Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
-                      LD->getSrcValue(), LD->getSrcValueOffset(),
-                      LD->getMemoryVT(), LD->isVolatile(),
+  Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+                      LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
                       LD->isNonTemporal(), LD->getAlignment());
 
   // Remember the chain.
@@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   #endif
       llvm_unreachable("Do not know how to expand this operator's operand!");
 
-    case ISD::BIT_CONVERT:     Res = ExpandOp_BIT_CONVERT(N); break;
+    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
     case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
     case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
 
@@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   GetExpandedOp(ST->getValue(), Lo, Hi);
 
   return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
-                           ST->getSrcValue(), ST->getSrcValueOffset(),
+                           ST->getPointerInfo(),
                            ST->getMemoryVT(), ST->isVolatile(),
                            ST->isNonTemporal(), ST->getAlignment());
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f8c589071921..f0752df80f12 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
     llvm_unreachable("Do not know how to promote this operator!");
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
   case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
-  case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+  case ISD::BITCAST:     Res = PromoteIntRes_BITCAST(N); break;
   case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
   case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
@@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
   SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
                               N->getMemoryVT(),
                               N->getChain(), N->getBasePtr(),
-                              Op2, N->getSrcValue(), N->getAlignment());
+                              Op2, N->getMemOperand());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
   SDValue Op3 = GetPromotedInteger(N->getOperand(3));
   SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
                               N->getMemoryVT(), N->getChain(), N->getBasePtr(),
-                              Op2, Op3, N->getSrcValue(), N->getAlignment());
+                              Op2, Op3, N->getMemOperand());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   EVT InVT = InOp.getValueType();
   EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
@@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
   case PromoteInteger:
     if (NOutVT.bitsEq(NInVT))
       // The input promotes to the same size.  Convert the promoted value.
-      return DAG.getNode(ISD::BIT_CONVERT, dl,
-                         NOutVT, GetPromotedInteger(InOp));
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
     break;
   case SoftenFloat:
     // Promote the integer operand by hand.
@@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
                        BitConvertToInteger(GetScalarizedVector(InOp)));
   case SplitVector: {
-    // For example, i32 = BIT_CONVERT v2i16 on alpha.  Convert the split
+    // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
     SDValue Lo, Hi;
     GetSplitVector(N->getOperand(0), Lo, Hi);
@@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
                        EVT::getIntegerVT(*DAG.getContext(),
                                          NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
-    return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+    return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
   }
   case WidenVector:
     if (OutVT.bitsEq(NInVT))
       // The input is widened to the same size.  Convert to the widened value.
-      return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
   }
 
   return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   // value was zero.  This can be handled by setting the bit just off
   // the top of the original type.
   APInt TopBit(NVT.getSizeInBits(), 0);
-  TopBit.set(OVT.getSizeInBits());
+  TopBit.setBit(OVT.getSizeInBits());
   Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
   return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
 }
@@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   ISD::LoadExtType ExtType =
     ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
   DebugLoc dl = N->getDebugLoc();
-  SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
-                               N->getSrcValue(), N->getSrcValueOffset(),
+  SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+                               N->getPointerInfo(),
                                N->getMemoryVT(), N->isVolatile(),
                                N->isNonTemporal(), N->getAlignment());
 
@@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
   return Res;
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+  // Promote the overflow bit trivially.
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  EVT SmallVT = LHS.getValueType();
+
+  // To determine if the result overflowed in a larger type, we extend the input
+  // to the larger type, do the multiply, then check the high bits of the result
+  // to see if the overflow happened.
+  if (N->getOpcode() == ISD::SMULO) {
+    LHS = SExtPromotedInteger(LHS);
+    RHS = SExtPromotedInteger(RHS);
+  } else {
+    LHS = ZExtPromotedInteger(LHS);
+    RHS = ZExtPromotedInteger(RHS);
+  }
+  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+  // Overflow occurred iff the high part of the result does not zero/sign-extend
+  // the low part.
+  SDValue Overflow;
+  if (N->getOpcode() == ISD::UMULO) {
+    // Unsigned overflow occurred iff the high part is non-zero.
+    SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+                             DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+  } else {
+    // Signed overflow occurred iff the high part does not sign extend the low.
+    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+                               Mul, DAG.getValueType(SmallVT));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+  }
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Overflow);
+  return Mul;
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
   // Zero extend the input.
   SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
@@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   return Res;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
-  assert(ResNo == 1 && "Only boolean result promotion currently supported!");
-  return PromoteIntRes_Overflow(N);
-}
-
 //===----------------------------------------------------------------------===//
 //  Integer Operand Promotion
 //===----------------------------------------------------------------------===//
@@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
     llvm_unreachable("Do not know how to promote this operator's operand!");
 
   case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
-  case ISD::BIT_CONVERT:  Res = PromoteIntOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:      Res = PromoteIntOp_BITCAST(N); break;
   case ISD::BR_CC:        Res = PromoteIntOp_BR_CC(N, OpNo); break;
   case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;
   case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break;
@@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
   return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
   // This should only occur in unusual situations like bitcasting to an
   // x86_fp80, so just turn it into a store+load
   return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
@@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
   SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
-  int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
@@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
 
   // Truncate the value and store the result.
-  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
-                           SVOffset, N->getMemoryVT(),
+  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+                           N->getMemoryVT(),
                            isVolatile, isNonTemporal, Alignment);
 }
 
@@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
 
-  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
   case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
   case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UREM:        ExpandIntRes_UREM(N, Lo, Hi); break;
   case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
 
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP: {
+    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+    SplitInteger(Tmp.first, Lo, Hi);
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+    break;
+  }
+
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
@@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
 }
 
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+  unsigned Opc = Node->getOpcode();
+  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+  RTLIB::Libcall LC;
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic intrinsic Expand!");
+    break;
+  case ISD::ATOMIC_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+    }
+    break;
+  case ISD::ATOMIC_CMP_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_ADD:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_SUB:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_AND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_OR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_XOR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_NAND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+    }
+    break;
+  }
+
+  return ExpandChainLibCall(LC, Node, false);
+}
+
 /// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
 /// and the shift amount is a constant 'Amt'.  Expand the operation.
 void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                                              SDValue &Lo, SDValue &Hi) {
-  DebugLoc dl = N->getDebugLoc();
+  DebugLoc DL = N->getDebugLoc();
   // Expand the incoming operand to be shifted, so that we have its parts
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
@@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       Lo = Hi = DAG.getConstant(0, NVT);
     } else if (Amt > NVTBits) {
       Lo = DAG.getConstant(0, NVT);
-      Hi = DAG.getNode(ISD::SHL, dl,
-                       NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+      Hi = DAG.getNode(ISD::SHL, DL,
+                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
     } else if (Amt == NVTBits) {
       Lo = DAG.getConstant(0, NVT);
       Hi = InL;
@@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                TLI.isOperationLegalOrCustom(ISD::ADDC,
                               TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
       // Emit this X << 1 as X+X.
-      SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+      SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
       SDValue LoOps[2] = { InL, InL };
-      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+      Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
       SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
-      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+      Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
     } else {
-      Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
-      Hi = DAG.getNode(ISD::OR, dl, NVT,
-                       DAG.getNode(ISD::SHL, dl, NVT, InH,
+      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
                                    DAG.getConstant(Amt, ShTy)),
-                       DAG.getNode(ISD::SRL, dl, NVT, InL,
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
                                    DAG.getConstant(NVTBits-Amt, ShTy)));
     }
     return;
@@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       Lo = DAG.getConstant(0, NVT);
       Hi = DAG.getConstant(0, NVT);
     } else if (Amt > NVTBits) {
-      Lo = DAG.getNode(ISD::SRL, dl,
+      Lo = DAG.getNode(ISD::SRL, DL,
                        NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
       Hi = DAG.getConstant(0, NVT);
     } else if (Amt == NVTBits) {
       Lo = InH;
       Hi = DAG.getConstant(0, NVT);
     } else {
-      Lo = DAG.getNode(ISD::OR, dl, NVT,
-                       DAG.getNode(ISD::SRL, dl, NVT, InL,
+      Lo = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
                                    DAG.getConstant(Amt, ShTy)),
-                       DAG.getNode(ISD::SHL, dl, NVT, InH,
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
                                    DAG.getConstant(NVTBits-Amt, ShTy)));
-      Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
     }
     return;
   }
 
   assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
   if (Amt > VTBits) {
-    Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
                           DAG.getConstant(NVTBits-1, ShTy));
   } else if (Amt > NVTBits) {
-    Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(Amt-NVTBits, ShTy));
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(NVTBits-1, ShTy));
   } else if (Amt == NVTBits) {
     Lo = InH;
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(NVTBits-1, ShTy));
   } else {
-    Lo = DAG.getNode(ISD::OR, dl, NVT,
-                     DAG.getNode(ISD::SRL, dl, NVT, InL,
+    Lo = DAG.getNode(ISD::OR, DL, NVT,
+                     DAG.getNode(ISD::SRL, DL, NVT, InL,
                                  DAG.getConstant(Amt, ShTy)),
-                     DAG.getNode(ISD::SHL, dl, NVT, InH,
+                     DAG.getNode(ISD::SHL, DL, NVT, InH,
                                  DAG.getConstant(NVTBits-Amt, ShTy)));
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
   }
 }
 
@@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
   // them.  TODO: Teach operation legalization how to expand unsupported
   // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate
-  // a carry of type MVT::Flag, but there doesn't seem to be any way to
+  // a carry of type MVT::Glue, but there doesn't seem to be any way to
   // generate a value of this type in the expanded code sequence.
   bool hasCarry =
     TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
@@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
                                  TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
 
   if (hasCarry) {
-    SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+    SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
     if (N->getOpcode() == ISD::ADD) {
       Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
       HiOps[2] = Lo.getValue(1);
@@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
       HiOps[2] = Lo.getValue(1);
       Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
     }
+    return;    
+  }
+  
+  if (N->getOpcode() == ISD::ADD) {
+    Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+    SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+                                ISD::SETULT);
+    SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+                                ISD::SETULT);
+    SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+                                 DAG.getConstant(1, NVT), Carry1);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
   } else {
-    if (N->getOpcode() == ISD::ADD) {
-      Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
-      Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
-      SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
-                                  ISD::SETULT);
-      SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
-      SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
-                                  ISD::SETULT);
-      SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
-                                   DAG.getConstant(1, NVT), Carry1);
-      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
-    } else {
-      Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
-      Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
-      SDValue Cmp =
-        DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
-                     LoOps[0], LoOps[1], ISD::SETULT);
-      SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
-      Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
-    }
+    Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+    SDValue Cmp =
+      DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+                   LoOps[0], LoOps[1], ISD::SETULT);
+    SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
   }
 }
 
@@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
   GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
-  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
   SDValue LoOps[2] = { LHSL, RHSL };
   SDValue HiOps[3] = { LHSH, RHSH };
 
@@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
   GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
-  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
   SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
   SDValue HiOps[3] = { LHSH, RHSH };
 
@@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned NBitWidth = NVT.getSizeInBits();
   const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
-  Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
   Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
 }
 
@@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   ISD::LoadExtType ExtType = N->getExtensionType();
-  int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
@@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   if (N->getMemoryVT().bitsLE(NVT)) {
     EVT MemVT = N->getMemoryVT();
 
-    Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
                         MemVT, isVolatile, isNonTemporal, Alignment);
 
     // Remember the chain.
@@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     }
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
-    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
                      isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
@@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getIntPtrConstant(IncrementSize));
-    Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
-                        SVOffset+IncrementSize, NEVT,
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
                         isVolatile, isNonTemporal,
                         MinAlign(Alignment, IncrementSize));
 
@@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
 
     // Load both the high bits and maybe some of the low bits.
-    Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
                         EVT::getIntegerVT(*DAG.getContext(),
                                           MemVT.getSizeInBits() - ExcessBits),
                         isVolatile, isNonTemporal, Alignment);
@@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getIntPtrConstant(IncrementSize));
     // Load the rest of the low bits.
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
-                        SVOffset+IncrementSize,
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize),
                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
                         isVolatile, isNonTemporal,
                         MinAlign(Alignment, IncrementSize));
@@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
+                                              SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+  SplitInteger(Ret, Lo, Hi);
+  
+  // Now calculate overflow.
+  SDValue Ofl;
+  if (N->getOpcode() == ISD::UMULO)
+    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
+                       DAG.getConstant(0, VT), ISD::SETNE);
+  else {
+    SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
+    Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
+    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
+  }
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
@@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   #endif
     llvm_unreachable("Do not know how to expand this operator's operand!");
 
-  case ISD::BIT_CONVERT:       Res = ExpandOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:           Res = ExpandOp_BITCAST(N); break;
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
   case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
   case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
-  int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
@@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   if (N->getMemoryVT().bitsLE(NVT)) {
     GetExpandedInteger(N->getValue(), Lo, Hi);
-    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
                              N->getMemoryVT(), isVolatile, isNonTemporal,
                              Alignment);
-  } else if (TLI.isLittleEndian()) {
+  }
+
+  if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
-    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
                       isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
@@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getIntPtrConstant(IncrementSize));
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset+IncrementSize, NEVT,
-                           isVolatile, isNonTemporal,
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
+                           NEVT, isVolatile, isNonTemporal,
                            MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-  } else {
-    // Big-endian - high bits are at low addresses.  Favor aligned stores at
-    // the cost of some bit-fiddling.
-    GetExpandedInteger(N->getValue(), Lo, Hi);
-
-    EVT ExtVT = N->getMemoryVT();
-    unsigned EBytes = ExtVT.getStoreSize();
-    unsigned IncrementSize = NVT.getSizeInBits()/8;
-    unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
-                                 ExtVT.getSizeInBits() - ExcessBits);
+  }
 
-    if (ExcessBits < NVT.getSizeInBits()) {
-      // Transfer high bits from the top of Lo to the bottom of Hi.
-      Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
-                                       TLI.getPointerTy()));
-      Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
-                       DAG.getNode(ISD::SRL, dl, NVT, Lo,
-                                   DAG.getConstant(ExcessBits,
-                                                   TLI.getPointerTy())));
-    }
+  // Big-endian - high bits are at low addresses.  Favor aligned stores at
+  // the cost of some bit-fiddling.
+  GetExpandedInteger(N->getValue(), Lo, Hi);
+
+  EVT ExtVT = N->getMemoryVT();
+  unsigned EBytes = ExtVT.getStoreSize();
+  unsigned IncrementSize = NVT.getSizeInBits()/8;
+  unsigned ExcessBits = (EBytes - IncrementSize)*8;
+  EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                               ExtVT.getSizeInBits() - ExcessBits);
+
+  if (ExcessBits < NVT.getSizeInBits()) {
+    // Transfer high bits from the top of Lo to the bottom of Hi.
+    Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                     TLI.getPointerTy()));
+    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                     DAG.getNode(ISD::SRL, dl, NVT, Lo,
+                                 DAG.getConstant(ExcessBits,
+                                                 TLI.getPointerTy())));
+  }
 
-    // Store both the high bits and maybe some of the low bits.
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset, HiVT, isVolatile, isNonTemporal,
-                           Alignment);
+  // Store both the high bits and maybe some of the low bits.
+  Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+                         HiVT, isVolatile, isNonTemporal, Alignment);
 
-    // Increment the pointer to the other half.
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getIntPtrConstant(IncrementSize));
-    // Store the lowest ExcessBits bits in the second half.
-    Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
-                           SVOffset+IncrementSize,
-                           EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                           isVolatile, isNonTemporal,
-                           MinAlign(Alignment, IncrementSize));
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-  }
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  // Store the lowest ExcessBits bits in the second half.
+  Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+                         N->getPointerInfo().getWithOffset(IncrementSize),
+                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                         isVolatile, isNonTemporal,
+                         MinAlign(Alignment, IncrementSize));
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
@@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
 
     // Load the value out, extending it from f32 to the destination float type.
     // FIXME: Avoid the extend by constructing the right constant pool?
-    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
-                                   FudgePtr, NULL, 0, MVT::f32,
+    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+                                   FudgePtr,
+                                   MachinePointerInfo::getConstantPool(),
+                                   MVT::f32,
                                    false, false, Alignment);
     return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6e56c98e9b56..cedda7e7075a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
           if (M->getNodeId() == Processed)
             RemapValue(NewVal);
           DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+          // OldVal may be a target of the ReplacedValues map which was marked
+          // NewNode to force reanalysis because it was updated.  Ensure that
+          // anything that ReplacedValues mapped to OldVal will now be mapped
+          // all the way to NewVal.
+          ReplacedValues[OldVal] = NewVal;
         }
         // The original node continues to exist in the DAG, marked NewNode.
       }
@@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
 /// BitConvertToInteger - Convert to an integer of the same size.
 SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
   unsigned BitWidth = Op.getValueType().getSizeInBits();
-  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
                      EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
 }
 
@@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
   unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
   EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
   unsigned NumElts = Op.getValueType().getVectorNumElements();
-  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
                      EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
 }
 
@@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
   // the source and destination types.
   SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
-                               false, false, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
   // Result is a load from the stack slot.
-  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+                     false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
@@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
   return CallInfo.first;
 }
 
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+                                         SDNode *Node,
+                                         bool isSigned) {
+  SDValue InChain = Node->getOperand(0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  return CallInfo;
+}
+
 /// PromoteTargetBoolean - Promote the given target boolean to a target boolean
 /// of the given type.  A target boolean is an integer value, not necessarily of
 /// type i1, the bits of which conform to getBooleanContents.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d56029208e61..3f81bbbe4061 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -99,7 +99,7 @@ private:
           return SoftenFloat;
         return ExpandFloat;
       }
-        
+
       if (VT.getVectorNumElements() == 1)
         return ScalarizeVector;
       return SplitVector;
@@ -192,6 +192,10 @@ private:
   SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
                       const SDValue *Ops, unsigned NumOps, bool isSigned,
                       DebugLoc dl);
+	std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+									                               SDNode *Node, bool isSigned);
+	std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
   SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
   void ReplaceValueWith(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
@@ -244,7 +248,7 @@ private:
   SDValue PromoteIntRes_AssertZext(SDNode *N);
   SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
   SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
-  SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+  SDValue PromoteIntRes_BITCAST(SDNode *N);
   SDValue PromoteIntRes_BSWAP(SDNode *N);
   SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntRes_Constant(SDNode *N);
@@ -278,7 +282,7 @@ private:
   // Integer Operand Promotion.
   bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
   SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
-  SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+  SDValue PromoteIntOp_BITCAST(SDNode *N);
   SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
@@ -344,6 +348,7 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UMULSMULO	      (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
@@ -352,7 +357,7 @@ private:
 
   // Integer Operand Expansion.
   bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
-  SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+  SDValue ExpandIntOp_BITCAST(SDNode *N);
   SDValue ExpandIntOp_BR_CC(SDNode *N);
   SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
   SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
@@ -387,7 +392,7 @@ private:
 
   // Result Float to Integer Conversion.
   void SoftenFloatResult(SDNode *N, unsigned OpNo);
-  SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+  SDValue SoftenFloatRes_BITCAST(SDNode *N);
   SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
   SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
   SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
@@ -426,7 +431,7 @@ private:
 
   // Operand Float to Integer Conversion.
   bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
-  SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+  SDValue SoftenFloatOp_BITCAST(SDNode *N);
   SDValue SoftenFloatOp_BR_CC(SDNode *N);
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
@@ -515,7 +520,7 @@ private:
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
   SDValue ScalarizeVecRes_InregOp(SDNode *N);
 
-  SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+  SDValue ScalarizeVecRes_BITCAST(SDNode *N);
   SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
   SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -532,7 +537,7 @@ private:
 
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
-  SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+  SDValue ScalarizeVecOp_BITCAST(SDNode *N);
   SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -557,7 +562,7 @@ private:
   void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
 
-  void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -577,11 +582,12 @@ private:
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
 
-  SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+  SDValue SplitVecOp_BITCAST(SDNode *N);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Widening Support: LegalizeVectorTypes.cpp
@@ -603,7 +609,7 @@ private:
 
   // Widen Vector Result Promotion.
   void WidenVectorResult(SDNode *N, unsigned ResNo);
-  SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+  SDValue WidenVecRes_BITCAST(SDNode* N);
   SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
   SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
   SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
@@ -628,7 +634,7 @@ private:
 
   // Widen Vector Operand.
   bool WidenVectorOperand(SDNode *N, unsigned ResNo);
-  SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+  SDValue WidenVecOp_BITCAST(SDNode *N);
   SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -721,7 +727,7 @@ private:
   }
 
   // Generic Result Expansion.
-  void ExpandRes_BIT_CONVERT       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_BITCAST           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandRes_BUILD_PAIR        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandRes_EXTRACT_ELEMENT   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -729,7 +735,7 @@ private:
   void ExpandRes_VAARG             (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   // Generic Operand Expansion.
-  SDValue ExpandOp_BIT_CONVERT      (SDNode *N);
+  SDValue ExpandOp_BITCAST          (SDNode *N);
   SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
   SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
   SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9c2b1d9ed73d..a75ae87f3cbe 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -32,8 +32,7 @@ using namespace llvm;
 // little/big-endian machines, followed by the Hi/Lo part.  This means that
 // they cannot be used as is on vectors, for which Lo is always stored first.
 
-void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
-                                             SDValue &Hi) {
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   SDValue InOp = N->getOperand(0);
@@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
     case SoftenFloat:
       // Convert the integer operand instead.
       SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case ExpandInteger:
     case ExpandFloat:
       // Convert the expanded pieces of the input.
       GetExpandedOp(InOp, Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case SplitVector:
       GetSplitVector(InOp, Lo, Hi);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case ScalarizeVector:
       // Convert the element instead.
       SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case WidenVector: {
-      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
       InOp = GetWidenedVector(InOp);
       EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                    InVT.getVectorNumElements()/2);
@@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                        DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     }
   }
 
   if (InVT.isVector() && OutVT.isInteger()) {
-    // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+    // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
     // is legal but the result is not.
     EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
 
     if (isTypeLegal(NVT)) {
-      SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+      SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
       Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
                        DAG.getIntPtrConstant(0));
       Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
@@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                               getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
 
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
                                false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                          DAG.getIntPtrConstant(IncrementSize));
 
   // Load the second half from the stack slot.
-  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+                   PtrInfo.getWithOffset(IncrementSize), false,
                    false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
@@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT OldVT = N->getValueType(0);
   EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
-  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
                                EVT::getVectorVT(*DAG.getContext(),
                                                 NewVT, 2*OldElts),
                                OldVec);
@@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
-  int SVOffset = LD->getSrcValueOffset();
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
   bool isNonTemporal = LD->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
-  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
                    isVolatile, isNonTemporal, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
-  Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
-                   SVOffset+IncrementSize,
+  Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
                    isVolatile, isNonTemporal,
                    MinAlign(Alignment, IncrementSize));
 
@@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
 // Generic Operand Expansion.
 //===--------------------------------------------------------------------===//
 
-SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   if (N->getValueType(0).isVector()) {
     // An illegal expanding type is being converted to a legal vector type.
     // Make a two element vector out of the expanded parts and convert that
     // instead, but only if the new vector type is legal (otherwise there
     // is no point, and it might create expansion loops).  For example, on
-    // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+    // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
     EVT OVT = N->getOperand(0).getValueType();
     EVT NVT = EVT::getVectorVT(*DAG.getContext(),
                                TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
@@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
         std::swap(Parts[0], Parts[1]);
 
       SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+      return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
     }
   }
 
@@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
                                &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
 }
 
 SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
@@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   // Bitconvert to a vector of twice the length with elements of the expanded
   // type, insert the expanded vector elements, and then convert back.
   EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
-  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
                                NewVecVT, N->getOperand(0));
 
   SDValue Lo, Hi;
@@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
 
   // Convert the new vector to the old vector type.
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
 }
 
 SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
@@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
                                      St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
-  int SVOffset = St->getSrcValueOffset();
   unsigned Alignment = St->getAlignment();
   bool isVolatile = St->isVolatile();
   bool isNonTemporal = St->isNonTemporal();
@@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   if (TLI.isBigEndian())
     std::swap(Lo, Hi);
 
-  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
                     isVolatile, isNonTemporal, Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
-  Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
-                    SVOffset + IncrementSize,
+  Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+                    St->getPointerInfo().getWithOffset(IncrementSize),
                     isVolatile, isNonTemporal,
                     MinAlign(Alignment, IncrementSize));
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 621c08724210..167dbe0377b3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
 
   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     if (Op.getOperand(j).getValueType().isVector())
-      Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+      Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
     else
       Operands[j] = Op.getOperand(j);
   }
 
   Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
 
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
 
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93bc2d04928e..182f8fcbfbf3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to scalarize the result of this operator!");
 
-  case ISD::BIT_CONVERT:       R = ScalarizeVecRes_BIT_CONVERT(N); break;
+  case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
   case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
@@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
                      LHS.getValueType(), LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
   EVT NewVT = N->getValueType(0).getVectorElementType();
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                      NewVT, N->getOperand(0));
 }
 
@@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                N->getDebugLoc(),
                                N->getChain(), N->getBasePtr(),
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
-                               N->getSrcValue(), N->getSrcValueOffset(),
+                               N->getPointerInfo(),
                                N->getMemoryVT().getVectorElementType(),
                                N->isVolatile(), N->isNonTemporal(),
                                N->getOriginalAlignment());
@@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
       dbgs() << "\n";
 #endif
       llvm_unreachable("Do not know how to scalarize this operator's operand!");
-    case ISD::BIT_CONVERT:
-      Res = ScalarizeVecOp_BIT_CONVERT(N);
+    case ISD::BITCAST:
+      Res = ScalarizeVecOp_BITCAST(N);
       break;
     case ISD::CONCAT_VECTORS:
       Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
-/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
 /// to be scalarized, it must be <1 x ty>.  Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
   SDValue Elt = GetScalarizedVector(N->getOperand(0));
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                      N->getValueType(0), Elt);
 }
 
@@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
   if (N->isTruncatingStore())
     return DAG.getTruncStore(N->getChain(), dl,
                              GetScalarizedVector(N->getOperand(1)),
-                             N->getBasePtr(),
-                             N->getSrcValue(), N->getSrcValueOffset(),
+                             N->getBasePtr(), N->getPointerInfo(),
                              N->getMemoryVT().getVectorElementType(),
                              N->isVolatile(), N->isNonTemporal(),
                              N->getAlignment());
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
-                      N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+                      N->getBasePtr(), N->getPointerInfo(),
                       N->isVolatile(), N->isNonTemporal(),
                       N->getOriginalAlignment());
 }
@@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
 
-  case ISD::BIT_CONVERT:       SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BITCAST:           SplitVecRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
   case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
   case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
@@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
 }
 
-void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
-                                               SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
   // We know the result is a vector.  The input may be either a vector or a
   // scalar value.
   EVT LoVT, HiVT;
@@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
       GetExpandedOp(InOp, Lo, Hi);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
       return;
     }
     break;
@@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
     // If the input is a vector that needs to be split, convert each split
     // piece of the input now.
     GetSplitVector(InOp, Lo, Hi);
-    Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
-    Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+    Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+    Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
     return;
   }
 
@@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
   if (TLI.isBigEndian())
     std::swap(Lo, Hi);
-  Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
-  Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+  Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+  Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
 }
 
 void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
@@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
     EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(0));
+                      DAG.getIntPtrConstant(0));
     VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
     break;
   }
   }
@@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
   SDValue Idx = N->getOperand(1);
-  EVT IdxVT = Idx.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
-  Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
-                    DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
-  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+                   DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
 }
 
 void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
-                               false, false, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
@@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
     TLI.getTargetData()->getPrefTypeAlignment(VecType);
-  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
                             false, false, 0);
 
   // Load the Lo part from the stack slot.
-  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
                    false, false, 0);
 
   // Increment the pointer to the other part.
@@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                          DAG.getIntPtrConstant(IncrementSize));
 
   // Load the Hi part from the stack slot.
-  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
-                   false, MinAlign(Alignment, IncrementSize));
+  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   SDValue Ch = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
-  const Value *SV = LD->getSrcValue();
-  int SVOffset = LD->getSrcValueOffset();
   EVT MemoryVT = LD->getMemoryVT();
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
@@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
-                   SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
+                   LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+                   Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
-  SVOffset += IncrementSize;
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
-                   SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
+                   HiMemVT, isVolatile, isNonTemporal, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
 #endif
       llvm_unreachable("Do not know how to split this operator's operand!");
 
-    case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break;
+    case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+    case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
@@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::FP_TO_UINT:
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
+    case ISD::FP_EXTEND:
+    case ISD::FTRUNC:
     case ISD::TRUNCATE:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
@@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
-  // For example, i64 = BIT_CONVERT v4i16 on alpha.  Typically the vector will
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+  // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will
   // end up being split all the way down to individual components.  Convert the
   // split pieces into integers and reassemble.
   SDValue Lo, Hi;
@@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
   if (TLI.isBigEndian())
     std::swap(Lo, Hi);
 
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
                      JoinIntegers(Lo, Hi));
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
-  // We know that the extracted result type is legal.  For now, assume the index
-  // is a constant.
+  // We know that the extracted result type is legal.
   EVT SubVT = N->getValueType(0);
   SDValue Idx = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
@@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   EVT EltVT = VecVT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
-                               false, false, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
-  return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
-                        SV, 0, EltVT, false, false, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        MachinePointerInfo(), EltVT, false, false, 0);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   bool isTruncating = N->isTruncatingStore();
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
-  int SVOffset = N->getSrcValueOffset();
   EVT MemoryVT = N->getMemoryVT();
   unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
@@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
 
   if (isTruncating)
-    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
                            LoMemVT, isVol, isNT, Alignment);
   else
-    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
                       isVol, isNT, Alignment);
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
-  SVOffset += IncrementSize;
 
   if (isTruncating)
-    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
                            HiMemVT, isVol, isNT, Alignment);
   else
-    Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+                      N->getPointerInfo().getWithOffset(IncrementSize),
                       isVol, isNT, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
@@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   DebugLoc DL = N->getDebugLoc();
-  
+
   // The input operands all must have the same type, and we know the result the
   // result type is valid.  Convert this to a buildvector which extracts all the
   // input elements.
@@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
 
     }
   }
-  
+
   return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
                      &Elts[0], Elts.size());
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+  
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+  
+  Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+  
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}  
+
+
 
 //===----------------------------------------------------------------------===//
 //  Result Vector Widening
@@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to widen the result of this operator!");
 
-  case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
+  case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
   case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
   case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
@@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   EVT WidenEltVT = WidenVT.getVectorElementType();
   EVT VT = WidenVT;
   unsigned NumElts =  VT.getVectorNumElements();
-  while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
     NumElts = NumElts / 2;
     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
   }
@@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     SDValue InOp2 = GetWidenedVector(N->getOperand(1));
     return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
   }
-  
+
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
     return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
-  
+
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;
   SDValue InOp1 = GetWidenedVector(N->getOperand(0));
@@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   unsigned ConcatEnd = 0;  // Current ConcatOps index.
   int Idx = 0;        // Current Idx into input vectors.
 
-  // NumElts := greatest synthesizable vector size (at most WidenVT)
+  // NumElts := greatest legal vector size (at most WidenVT)
   // while (orig. vector has unhandled elements) {
   //   take munches of size NumElts from the beginning and add to ConcatOps
   //   NumElts := next smaller supported vector size or 1
@@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     do {
       NumElts = NumElts / 2;
       VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
-    } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+    } while (!TLI.isTypeLegal(VT) && NumElts != 1);
 
     if (NumElts == 1) {
       for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
-        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
                                    InOp1, DAG.getIntPtrConstant(Idx));
-        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
                                    InOp2, DAG.getIntPtrConstant(Idx));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
                                              EOp1, EOp2);
@@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     do {
       NextSize *= 2;
       NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
-    } while (!TLI.isTypeSynthesizable(NextVT));
+    } while (!TLI.isTypeLegal(NextVT));
 
     if (!VT.isVector()) {
       // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
@@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     if (VT == WidenVT)
       return ConcatOps[0];
   }
-  
+
   // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
   unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
   if (NumOps != ConcatEnd ) {
@@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   SDValue InOp = N->getOperand(0);
-  DebugLoc dl = N->getDebugLoc();
+  DebugLoc DL = N->getDebugLoc();
 
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     InOp = GetWidenedVector(N->getOperand(0));
     InVT = InOp.getValueType();
     InVTNumElts = InVT.getVectorNumElements();
-    if (InVTNumElts == WidenNumElts)
-      return DAG.getNode(Opcode, dl, WidenVT, InOp);
+    if (InVTNumElts == WidenNumElts) {
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InOp);
+      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+    }
   }
 
-  if (TLI.isTypeSynthesizable(InWidenVT)) {
+  if (TLI.isTypeLegal(InWidenVT)) {
     // Because the result and the input are different vector types, widening
     // the result could create a legal type but widening the input might make
     // it an illegal type that might lead to repeatedly splitting the input
@@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       SDValue UndefVal = DAG.getUNDEF(InVT);
       for (unsigned i = 1; i != NumConcat; ++i)
         Ops[i] = UndefVal;
-      return DAG.getNode(Opcode, dl, WidenVT,
-                         DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
-                         &Ops[0], NumConcat));
+      SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+                                  &Ops[0], NumConcat);
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVec);
+      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
     }
 
     if (InVTNumElts % WidenNumElts == 0) {
+      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+                                  InOp, DAG.getIntPtrConstant(0));
       // Extract the input and convert the shorten input vector.
-      return DAG.getNode(Opcode, dl, WidenVT,
-                         DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
-                                     InOp, DAG.getIntPtrConstant(0)));
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVal);
+      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
     }
   }
 
@@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   EVT EltVT = WidenVT.getVectorElementType();
   unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
   unsigned i;
-  for (i=0; i < MinElts; ++i)
-    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                     DAG.getIntPtrConstant(i)));
+  for (i=0; i < MinElts; ++i) {
+    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+                              DAG.getIntPtrConstant(i));
+    if (N->getNumOperands() == 1)
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+    else
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+  }
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
     Ops[i] = UndefVal;
 
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
                      WidenVT, WidenLHS, DAG.getValueType(ExtVT));
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   EVT InVT = InOp.getValueType();
   EVT VT = N->getValueType(0);
@@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     InOp = GetPromotedInteger(InOp);
     InVT = InOp.getValueType();
     if (WidenVT.bitsEq(InVT))
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
   case SoftenFloat:
   case ExpandInteger:
@@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     InVT = InOp.getValueType();
     if (WidenVT.bitsEq(InVT))
       // The input widens to the same size. Convert to the widen value.
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
   }
 
   unsigned WidenSize = WidenVT.getSizeInBits();
   unsigned InSize = InVT.getSizeInBits();
-  if (WidenSize % InSize == 0) {
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
     // Determine new input vector type.  The new input vector type will use
     // the same element type (if its a vector) or use the input type as a
     // vector.  It is the same size as the type to widen to.
@@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
       NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
 
-    if (TLI.isTypeSynthesizable(NewInVT)) {
+    if (TLI.isTypeLegal(NewInVT)) {
       // Because the result and the input are different vector types, widening
       // the result could create a legal type but widening the input might make
       // it an illegal type that might lead to repeatedly splitting the input
@@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
       else
         NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
                              NewInVT, &Ops[0], NewNumElts);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
     }
   }
 
@@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
                                   SatOp, CvtCode);
   }
 
-  if (TLI.isTypeSynthesizable(InWidenVT)) {
+  if (TLI.isTypeLegal(InWidenVT)) {
     // Because the result and the input are different vector types, widening
     // the result could create a legal type but widening the input might make
     // it an illegal type that might lead to repeatedly splitting the input
@@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
 
   EVT InVT = InOp.getValueType();
 
-  ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
-  if (CIdx) {
-    unsigned IdxVal = CIdx->getZExtValue();
-    // Check if we can just return the input vector after widening.
-    if (IdxVal == 0 && InVT == WidenVT)
-      return InOp;
-
-    // Check if we can extract from the vector.
-    unsigned InNumElts = InVT.getVectorNumElements();
-    if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
-        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
-  }
+  // Check if we can just return the input vector after widening.
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0 && InVT == WidenVT)
+    return InOp;
+
+  // Check if we can extract from the vector.
+  unsigned InNumElts = InVT.getVectorNumElements();
+  if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
 
   // We could try widening the input to the right length but for now, extract
   // the original elements, fill the rest with undefs and build a vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   EVT EltVT = VT.getVectorElementType();
-  EVT IdxVT = Idx.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
   unsigned i;
-  if (CIdx) {
-    unsigned IdxVal = CIdx->getZExtValue();
-    for (i=0; i < NumElts; ++i)
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                           DAG.getConstant(IdxVal+i, IdxVT));
-  } else {
-    Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
-    for (i=1; i < NumElts; ++i) {
-      SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
-                                   DAG.getConstant(i, IdxVT));
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
-    }
-  }
+  for (i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                         DAG.getIntPtrConstant(IdxVal+i));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
@@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to widen this operator's operand!");
 
-  case ISD::BIT_CONVERT:        Res = WidenVecOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;
   case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
@@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
 }
 
-SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   EVT InWidenVT = InOp.getValueType();
@@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
   // Check if we can convert between two legal vector types and extract.
   unsigned InWidenSize = InWidenVT.getSizeInBits();
   unsigned Size = VT.getSizeInBits();
-  if (InWidenSize % Size == 0 && !VT.isVector()) {
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
     unsigned NewNumElts = InWidenSize / Size;
     EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
-    if (TLI.isTypeSynthesizable(NewVT)) {
-      SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+    if (TLI.isTypeLegal(NewVT)) {
+      SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
                          DAG.getIntPtrConstant(0));
     }
@@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
   if (Width == WidenEltWidth)
     return RetVT;
 
-  // See if there is larger legal integer than the element type to load/store 
+  // See if there is larger legal integer than the element type to load/store
   unsigned VT;
   for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
        VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
-    if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       RetVT = MemVT;
@@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
        VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
     EVT MemVT = (MVT::SimpleValueType) VT;
     unsigned MemVTWidth = MemVT.getSizeInBits();
-    if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
         (WidenWidth % MemVTWidth) == 0 &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
     if (NewLdTy != LdTy) {
       NumElts = Width / NewLdTy.getSizeInBits();
       NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
-      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+      VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
       // Readjust position and vector position based on new load type
       Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
       LdTy = NewLdTy;
@@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
     VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
                         DAG.getIntPtrConstant(Idx++));
   }
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+  return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
 
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
-                                              LoadSDNode * LD) {
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+                                              LoadSDNode *LD) {
   // The strategy assumes that we can efficiently load powers of two widths.
   // The routines chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
@@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   // Load information
   SDValue   Chain = LD->getChain();
   SDValue   BasePtr = LD->getBasePtr();
-  int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
   bool      isNonTemporal = LD->isNonTemporal();
-  const Value *SV = LD->getSrcValue();
 
   int LdWidth = LdVT.getSizeInBits();
   int WidthDiff = WidenWidth - LdWidth;          // Difference
@@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   // Find the vector type that can load from.
   EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
   int NewVTWidth = NewVT.getSizeInBits();
-  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
                              isVolatile, isNonTemporal, Align);
   LdChain.push_back(LdOp.getValue(1));
 
@@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       unsigned NumElts = WidenWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
       SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
     }
     if (NewVT == WidenVT)
       return LdOp;
@@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       NewVTWidth = NewVT.getSizeInBits();
     }
 
-    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
-                               SVOffset+Offset, isVolatile,
+    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                               LD->getPointerInfo().getWithOffset(Offset),
+                               isVolatile,
                                isNonTemporal, MinAlign(Align, Increment));
     LdChain.push_back(LdOp.getValue(1));
     LdOps.push_back(LdOp);
@@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   if (!LdOps[0].getValueType().isVector())
     // All the loads are scalar loads.
     return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
-  
+
   // If the load contains vectors, build the vector using concat vector.
   // All of the vectors used to loads are power of 2 and the scalars load
   // can be combined to make a power of 2 vector.
@@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
   // Load information
   SDValue   Chain = LD->getChain();
   SDValue   BasePtr = LD->getBasePtr();
-  int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
   bool      isNonTemporal = LD->isNonTemporal();
-  const Value *SV = LD->getSrcValue();
 
   EVT EltVT = WidenVT.getVectorElementType();
   EVT LdEltVT = LdVT.getVectorElementType();
@@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   unsigned Increment = LdEltVT.getSizeInBits() / 8;
-  Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
+  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+                          LD->getPointerInfo(),
                           LdEltVT, isVolatile, isNonTemporal, Align);
   LdChain.push_back(Ops[0].getValue(1));
   unsigned i = 0, Offset = Increment;
   for (i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr, DAG.getIntPtrConstant(Offset));
-    Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
-                            SVOffset + Offset, LdEltVT, isVolatile,
-                            isNonTemporal, Align);
+    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+                            LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+                            isVolatile, isNonTemporal, Align);
     LdChain.push_back(Ops[i].getValue(1));
   }
 
@@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
   // element type or scalar stores.
   SDValue  Chain = ST->getChain();
   SDValue  BasePtr = ST->getBasePtr();
-  const    Value *SV = ST->getSrcValue();
-  int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
   bool     isNonTemporal = ST->isNonTemporal();
@@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
                                    DAG.getIntPtrConstant(Idx));
-        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                       SVOffset + Offset, isVolatile,
-                                       isNonTemporal,
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
                                        MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
@@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       // Cast the vector to the scalar type we can store
       unsigned NumElts = ValWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
-      SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+      SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
       // Readjust index position based on new vector type
       Idx = Idx * ValEltWidth / NewVTWidth;
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
                       DAG.getIntPtrConstant(Idx++));
-        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                       SVOffset + Offset, isVolatile,
-                                       isNonTemporal, MinAlign(Align, Offset)));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
+                                       MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
   // and then store it.  Instead, we extract each element and then store it.
   SDValue  Chain = ST->getChain();
   SDValue  BasePtr = ST->getBasePtr();
-  const    Value *SV = ST->getSrcValue();
-  int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
   bool     isNonTemporal = ST->isNonTemporal();
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = ST->getDebugLoc();
-  
+
   EVT StVT = ST->getMemoryVT();
   EVT ValVT = ValOp.getValueType();
 
@@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
   unsigned NumElts = StVT.getVectorNumElements();
   SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
                             DAG.getIntPtrConstant(0));
-  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
-                                      SVOffset, StEltVT,
+  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+                                      ST->getPointerInfo(), StEltVT,
                                       isVolatile, isNonTemporal, Align));
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
                                      BasePtr, DAG.getIntPtrConstant(Offset));
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
                             DAG.getIntPtrConstant(0));
-    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
-                                        SVOffset + Offset, StEltVT,
-                                        isVolatile, isNonTemporal,
+    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+                                      ST->getPointerInfo().getWithOffset(Offset),
+                                        StEltVT, isVolatile, isNonTemporal,
                                         MinAlign(Align, Offset)));
   }
 }
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ac2d33884b26..2dcb22957325 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,7 +16,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index fae27294e364..e3da2084529a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
 /// successors to the newly created node.
 SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
-  if (SU->getNode()->getFlaggedNode())
+  if (SU->getNode()->getGluedNode())
     return NULL;
 
   SDNode *N = SU->getNode();
@@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
     EVT VT = N->getValueType(i);
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
     else if (VT == MVT::Other)
       TryUnfold = true;
@@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
     EVT VT = Op.getNode()->getValueType(Op.getResNo());
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
   }
 
@@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     }
   }
 
-  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
     if (Node->getOpcode() == ISD::INLINEASM) {
       // Inline asm can clobber physical defs.
       unsigned NumOps = Node->getNumOperands();
-      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
-        --NumOps;  // Ignore the flag operand.
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
 
       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index 56f5ded50083..430283d5eff9 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
 static RegisterScheduler
   tdListDAGScheduler("list-td", "Top-down list scheduler",
                      createTDListDAGScheduler);
-   
+
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGList - The actual list scheduler implementation.  This supports
@@ -51,7 +51,7 @@ private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   ///
   SchedulingPriorityQueue *AvailableQueue;
-  
+
   /// PendingQueue - This contains all of the instructions whose operands have
   /// been issued, but their results are not ready yet (due to the latency of
   /// the operation).  Once the operands become available, the instruction is
@@ -63,11 +63,12 @@ private:
 
 public:
   ScheduleDAGList(MachineFunction &mf,
-                  SchedulingPriorityQueue *availqueue,
-                  ScheduleHazardRecognizer *HR)
-    : ScheduleDAGSDNodes(mf),
-      AvailableQueue(availqueue), HazardRec(HR) {
-    }
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+    const TargetMachine &tm = mf.getTarget();
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
 
   ~ScheduleDAGList() {
     delete HazardRec;
@@ -87,14 +88,14 @@ private:
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGList::Schedule() {
   DEBUG(dbgs() << "********** List Scheduling **********\n");
-  
+
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
 
   AvailableQueue->initNodes(SUnits);
-  
+
   ListScheduleTopDown();
-  
+
   AvailableQueue->releaseState();
 }
 
@@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
   --SuccSU->NumPredsLeft;
 
   SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-  
+
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
-  
+
   Sequence.push_back(SU);
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
   SU->setDepthToAtLeast(CurCycle);
@@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       SUnits[i].isAvailable = true;
     }
   }
-  
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   std::vector<SUnit*> NotReady;
@@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
         assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
       }
     }
-    
+
     // If there are no instructions available, don't try to issue anything, and
     // don't advance the hazard recognizer.
     if (AvailableQueue->empty()) {
@@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() {
     }
 
     SUnit *FoundSUnit = 0;
-    
+
     bool HasNoopHazards = false;
     while (!AvailableQueue->empty()) {
       SUnit *CurSUnit = AvailableQueue->pop();
-      
+
       ScheduleHazardRecognizer::HazardType HT =
-        HazardRec->getHazardType(CurSUnit);
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
       if (HT == ScheduleHazardRecognizer::NoHazard) {
         FoundSUnit = CurSUnit;
         break;
       }
-    
+
       // Remember if this is a noop hazard.
       HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-      
+
       NotReady.push_back(CurSUnit);
     }
-    
+
     // Add the nodes that aren't ready back onto the available list.
     if (!NotReady.empty()) {
       AvailableQueue->push_all(NotReady);
@@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       // If this is a pseudo-op node, we don't want to increment the current
       // cycle.
       if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
-        ++CurCycle;        
+        ++CurCycle;
     } else if (!HasNoopHazards) {
       // Otherwise, we have a pipeline stall, but no other problem, just advance
       // the current cycle and try again.
@@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-/// createTDListDAGScheduler - This creates a top-down list scheduler with a
-/// new hazard recognizer. This scheduler takes ownership of the hazard
-/// recognizer and deletes it when done.
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
 ScheduleDAGSDNodes *
 llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
-  return new ScheduleDAGList(*IS->MF,
-                             new LatencyPriorityQueue(),
-                             IS->CreateTargetHazardRecognizer());
+  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4c3e4e3b0768..0b548b277f4c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -20,6 +20,7 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -65,6 +66,10 @@ static RegisterScheduler
                       "which tries to balance ILP and register pressure",
                       createILPListDAGScheduler);
 
+static cl::opt<bool> DisableSchedCycles(
+  "disable-sched-cycles", cl::Hidden, cl::init(false),
+  cl::desc("Disable cycle-level precision during preRA scheduling"));
+
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -83,31 +88,56 @@ private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   SchedulingPriorityQueue *AvailableQueue;
 
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+  /// CurCycle - The current scheduler state corresponds to this cycle.
+  unsigned CurCycle;
+
+  /// MinAvailableCycle - Cycle of the soonest available instruction.
+  unsigned MinAvailableCycle;
+
   /// LiveRegDefs - A set of physical registers and their definition
   /// that are "live". These nodes must be scheduled before any other nodes that
   /// modifies the registers can be scheduled.
   unsigned NumLiveRegs;
   std::vector<SUnit*> LiveRegDefs;
-  std::vector<unsigned> LiveRegCycles;
+  std::vector<SUnit*> LiveRegGens;
 
   /// Topo - A topological ordering for SUnits which permits fast IsReachable
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
 
 public:
-  ScheduleDAGRRList(MachineFunction &mf,
-                    bool isbottomup, bool needlatency,
-                    SchedulingPriorityQueue *availqueue)
-    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
-      AvailableQueue(availqueue), Topo(SUnits) {
-    }
+  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+                    SchedulingPriorityQueue *availqueue,
+                    CodeGenOpt::Level OptLevel)
+    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+      Topo(SUnits) {
+
+    const TargetMachine &tm = mf.getTarget();
+    if (DisableSchedCycles || !NeedLatency)
+      HazardRec = new ScheduleHazardRecognizer();
+    else
+      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
 
   ~ScheduleDAGRRList() {
+    delete HazardRec;
     delete AvailableQueue;
   }
 
   void Schedule();
 
+  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
   /// IsReachable - Checks if SU is reachable from TargetSU.
   bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
     return Topo.IsReachable(SU, TargetSU);
@@ -136,24 +166,37 @@ public:
   }
 
 private:
+  bool isReady(SUnit *SU) {
+    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+      AvailableQueue->isReady(SU);
+  }
+
   void ReleasePred(SUnit *SU, const SDep *PredEdge);
-  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+  void ReleasePredecessors(SUnit *SU);
   void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
   void ReleaseSuccessors(SUnit *SU);
+  void ReleasePending();
+  void AdvanceToCycle(unsigned NextCycle);
+  void AdvancePastStalls(SUnit *SU);
+  void EmitNode(SUnit *SU);
+  void ScheduleNodeBottomUp(SUnit*);
   void CapturePred(SDep *PredEdge);
-  void ScheduleNodeBottomUp(SUnit*, unsigned);
-  void ScheduleNodeTopDown(SUnit*, unsigned);
   void UnscheduleNodeBottomUp(SUnit*);
-  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+  void RestoreHazardCheckerBottomUp();
+  void BacktrackBottomUp(SUnit*, SUnit*);
   SUnit *CopyAndMoveSuccessors(SUnit*);
   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
                                 const TargetRegisterClass*,
                                 const TargetRegisterClass*,
                                 SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
-  void ListScheduleTopDown();
+
+  SUnit *PickNodeToScheduleBottomUp();
   void ListScheduleBottomUp();
 
+  void ScheduleNodeTopDown(SUnit*);
+  void ListScheduleTopDown();
+
 
   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
   /// Updates the topological ordering if required.
@@ -190,11 +233,13 @@ private:
 void ScheduleDAGRRList::Schedule() {
   DEBUG(dbgs()
         << "********** List Scheduling BB#" << BB->getNumber()
-        << " **********\n");
+        << " '" << BB->getName() << "' **********\n");
 
+  CurCycle = 0;
+  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
-  LiveRegCycles.resize(TRI->getNumRegs(), 0);
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+  LiveRegGens.resize(TRI->getNumRegs(), NULL);
 
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() {
   Topo.InitDAGTopologicalSorting();
 
   AvailableQueue->initNodes(SUnits);
-  
+
+  HazardRec->Reset();
+
   // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
   if (isBottomUp)
     ListScheduleBottomUp();
   else
     ListScheduleTopDown();
-  
+
   AvailableQueue->releaseState();
 }
 
@@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
     PredSU->isAvailable = true;
-    AvailableQueue->push(PredSU);
+
+    unsigned Height = PredSU->getHeight();
+    if (Height < MinAvailableCycle)
+      MinAvailableCycle = Height;
+
+    if (isReady(SU)) {
+      AvailableQueue->push(PredSU);
+    }
+    // CapturePred and others may have left the node in the pending queue, avoid
+    // adding it twice.
+    else if (!PredSU->isPending) {
+      PredSU->isPending = true;
+      PendingQueue.push_back(PredSU);
+    }
   }
 }
 
-void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
   // Bottom up: release predecessors
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     ReleasePred(SU, &*I);
     if (I->isAssignedRegDep()) {
       // This is a physical register dependency and it's impossible or
-      // expensive to copy the register. Make sure nothing that can 
+      // expensive to copy the register. Make sure nothing that can
       // clobber the register is scheduled between the predecessor and
       // this node.
-      if (!LiveRegDefs[I->getReg()]) {
+      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+             "interference on register dependence");
+      LiveRegDefs[I->getReg()] = I->getSUnit();
+      if (!LiveRegGens[I->getReg()]) {
         ++NumLiveRegs;
-        LiveRegDefs[I->getReg()] = I->getSUnit();
-        LiveRegCycles[I->getReg()] = CurCycle;
+        LiveRegGens[I->getReg()] = SU;
       }
     }
   }
 }
 
+/// Check to see if any of the pending instructions are ready to issue.  If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+  if (DisableSchedCycles) {
+    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+    return;
+  }
+
+  // If the available queue is empty, it is safe to reset MinAvailableCycle.
+  if (AvailableQueue->empty())
+    MinAvailableCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+    unsigned ReadyCycle =
+      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+    if (ReadyCycle < MinAvailableCycle)
+      MinAvailableCycle = ReadyCycle;
+
+    if (PendingQueue[i]->isAvailable) {
+      if (!isReady(PendingQueue[i]))
+          continue;
+      AvailableQueue->push(PendingQueue[i]);
+    }
+    PendingQueue[i]->isPending = false;
+    PendingQueue[i] = PendingQueue.back();
+    PendingQueue.pop_back();
+    --i; --e;
+  }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+  if (NextCycle <= CurCycle)
+    return;
+
+  AvailableQueue->setCurCycle(NextCycle);
+  if (!HazardRec->isEnabled()) {
+    // Bypass lots of virtual calls in case of long latency.
+    CurCycle = NextCycle;
+  }
+  else {
+    for (; CurCycle != NextCycle; ++CurCycle) {
+      if (isBottomUp)
+        HazardRec->RecedeCycle();
+      else
+        HazardRec->AdvanceCycle();
+    }
+  }
+  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+  // available Q to release pending nodes at least once before popping.
+  ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+  if (DisableSchedCycles)
+    return;
+
+  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+  // Bump CurCycle to account for latency. We assume the latency of other
+  // available instructions may be hidden by the stall (not a full pipe stall).
+  // This updates the hazard recognizer's cycle before reserving resources for
+  // this instruction.
+  AdvanceToCycle(ReadyCycle);
+
+  // Calls are scheduled in their preceding cycle, so don't conflict with
+  // hazards from instructions after the call. EmitNode will reset the
+  // scoreboard state before emitting the call.
+  if (isBottomUp && SU->isCall)
+    return;
+
+  // FIXME: For resource conflicts in very long non-pipelined stages, we
+  // should probably skip ahead here to avoid useless scoreboard checks.
+  int Stalls = 0;
+  while (true) {
+    ScheduleHazardRecognizer::HazardType HT =
+      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+    if (HT == ScheduleHazardRecognizer::NoHazard)
+      break;
+
+    ++Stalls;
+  }
+  AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+  if (!HazardRec->isEnabled())
+    return;
+
+  // Check for phys reg copy.
+  if (!SU->getNode())
+    return;
+
+  switch (SU->getNode()->getOpcode()) {
+  default:
+    assert(SU->getNode()->isMachineOpcode() &&
+           "This target-independent node should not be scheduled.");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor:
+  case ISD::CopyToReg:
+  case ISD::CopyFromReg:
+  case ISD::EH_LABEL:
+    // Noops don't affect the scoreboard state. Copies are likely to be
+    // removed.
+    return;
+  case ISD::INLINEASM:
+    // For inline asm, clear the pipeline state.
+    HazardRec->Reset();
+    return;
+  }
+  if (isBottomUp && SU->isCall) {
+    // Calls are scheduled with their preceding instructions. For bottom-up
+    // scheduling, clear the pipeline state before emitting.
+    HazardRec->Reset();
+  }
+
+  HazardRec->EmitInstruction(SU);
+
+  if (!isBottomUp && SU->isCall) {
+    HazardRec->Reset();
+  }
+}
+
 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
   DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
@@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
     DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
 #endif
 
-  // FIXME: Handle noop hazard.
+  // FIXME: Do not modify node height. It may interfere with
+  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+  // node it's ready cycle can aid heuristics, and after scheduling it can
+  // indicate the scheduled cycle.
   SU->setHeightToAtLeast(CurCycle);
+
+  // Reserve resources for the scheduled intruction.
+  EmitNode(SU);
+
   Sequence.push_back(SU);
 
   AvailableQueue->ScheduledNode(SU);
 
-  ReleasePredecessors(SU, CurCycle);
+  // Update liveness of predecessors before successors to avoid treating a
+  // two-address node as a live range def.
+  ReleasePredecessors(SU);
 
   // Release all the implicit physical register defs that are live.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (I->isAssignedRegDep()) {
-      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
-        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
-        assert(LiveRegDefs[I->getReg()] == SU &&
-               "Physical register dependency violated?");
-        --NumLiveRegs;
-        LiveRegDefs[I->getReg()] = NULL;
-        LiveRegCycles[I->getReg()] = 0;
-      }
+    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegGens[I->getReg()] = NULL;
     }
   }
 
   SU->isScheduled = true;
+
+  // Conditions under which the scheduler should eagerly advance the cycle:
+  // (1) No available instructions
+  // (2) All pipelines full, so available instructions must have hazards.
+  //
+  // If HazardRec is disabled, count each inst as one cycle.
+  if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
+      || AvailableQueue->empty())
+    AdvanceToCycle(CurCycle + 1);
 }
 
 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
 /// unscheduled, incrcease the succ left count of its predecessors. Remove
 /// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {  
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
   if (PredSU->isAvailable) {
     PredSU->isAvailable = false;
@@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     CapturePred(&*I);
-    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
+    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
       assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
              "Physical register dependency violated?");
       --NumLiveRegs;
       LiveRegDefs[I->getReg()] = NULL;
-      LiveRegCycles[I->getReg()] = 0;
+      LiveRegGens[I->getReg()] = NULL;
     }
   }
 
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
+      // This becomes the nearest def. Note that an earlier def may still be
+      // pending if this is a two-address node.
+      LiveRegDefs[I->getReg()] = SU;
       if (!LiveRegDefs[I->getReg()]) {
-        LiveRegDefs[I->getReg()] = SU;
         ++NumLiveRegs;
       }
-      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
-        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+      if (LiveRegGens[I->getReg()] == NULL ||
+          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+        LiveRegGens[I->getReg()] = I->getSUnit();
     }
   }
+  if (SU->getHeight() < MinAvailableCycle)
+    MinAvailableCycle = SU->getHeight();
 
   SU->setHeightDirty();
   SU->isScheduled = false;
   SU->isAvailable = true;
-  AvailableQueue->push(SU);
+  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+    // Don't make available until backtracking is complete.
+    SU->isPending = true;
+    PendingQueue.push_back(SU);
+  }
+  else {
+    AvailableQueue->push(SU);
+  }
   AvailableQueue->UnscheduledNode(SU);
 }
 
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+  HazardRec->Reset();
+
+  unsigned LookAhead = std::min((unsigned)Sequence.size(),
+                                HazardRec->getMaxLookAhead());
+  if (LookAhead == 0)
+    return;
+
+  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+  unsigned HazardCycle = (*I)->getHeight();
+  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+    SUnit *SU = *I;
+    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+      HazardRec->RecedeCycle();
+    }
+    EmitNode(SU);
+  }
+}
+
 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
 /// BTCycle in order to schedule a specific node.
-void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
-                                          unsigned &CurCycle) {
-  SUnit *OldSU = NULL;
-  while (CurCycle > BtCycle) {
-    OldSU = Sequence.back();
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+  SUnit *OldSU = Sequence.back();
+  while (true) {
     Sequence.pop_back();
     if (SU->isSucc(OldSU))
       // Don't try to remove SU from AvailableQueue.
       SU->isAvailable = false;
+    // FIXME: use ready cycle instead of height
+    CurCycle = OldSU->getHeight();
     UnscheduleNodeBottomUp(OldSU);
-    --CurCycle;
     AvailableQueue->setCurCycle(CurCycle);
+    if (OldSU == BtSU)
+      break;
+    OldSU = Sequence.back();
   }
 
   assert(!SU->isSucc(OldSU) && "Something is wrong!");
 
+  RestoreHazardCheckerBottomUp();
+
+  ReleasePending();
+
   ++NumBacktracks;
 }
 
 static bool isOperandOf(const SUnit *SU, SDNode *N) {
   for (const SDNode *SUNode = SU->getNode(); SUNode;
-       SUNode = SUNode->getFlaggedNode()) {
+       SUNode = SUNode->getGluedNode()) {
     if (SUNode->isOperandOf(N))
       return true;
   }
@@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
 /// successors to the newly created node.
 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
-  if (SU->getNode()->getFlaggedNode())
-    return NULL;
-
   SDNode *N = SU->getNode();
   if (!N)
     return NULL;
 
+  if (SU->getNode()->getGluedNode())
+    return NULL;
+
   SUnit *NewSU;
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
     EVT VT = N->getValueType(i);
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
     else if (VT == MVT::Other)
       TryUnfold = true;
@@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
     EVT VT = Op.getNode()->getValueType(Op.getResNo());
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
   }
 
@@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     } else {
       LoadSU = CreateNewSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
+
+      InitNumRegDefsLeft(LoadSU);
       ComputeLatency(LoadSU);
     }
 
     SUnit *NewSU = CreateNewSUnit(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
-      
+
     const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
     for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
       if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
@@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     }
     if (TID.isCommutable())
       NewSU->isCommutable = true;
+
+    InitNumRegDefsLeft(NewSU);
     ComputeLatency(NewSU);
 
     // Record all the edges to and from the old SU, by category.
@@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       RemovePred(SuccDep, D);
       D.setSUnit(NewSU);
       AddPred(SuccDep, D);
+      // Balance register pressure.
+      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+        --NewSU->NumRegDefsLeft;
     }
     for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
       SDep D = ChainSuccs[i];
@@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
         D.setSUnit(LoadSU);
         AddPred(SuccDep, D);
       }
-    } 
+    }
 
     // Add a data dependency to reflect that NewSU reads the value defined
     // by LoadSU.
@@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 
 /// CheckForLiveRegDef - Return true and update live register vector if the
 /// specified register def of the specified SUnit clobbers any "live" registers.
-static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                std::vector<SUnit*> &LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVector<unsigned, 4> &LRegs,
                                const TargetRegisterInfo *TRI) {
-  bool Added = false;
-  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
-    if (RegAdded.insert(Reg)) {
+  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+    // Check if Ref is live.
+    if (!LiveRegDefs[Reg]) continue;
+
+    // Allow multiple uses of the same def.
+    if (LiveRegDefs[Reg] == SU) continue;
+
+    // Add Reg to the set of interfering live regs.
+    if (RegAdded.insert(Reg))
       LRegs.push_back(Reg);
-      Added = true;
-    }
   }
-  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
-    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
-      if (RegAdded.insert(*Alias)) {
-        LRegs.push_back(*Alias);
-        Added = true;
-      }
-    }
-  return Added;
 }
 
 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
 /// scheduling of the given node to satisfy live physical register dependencies.
 /// If the specific node is the last one that's available to schedule, do
 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
-bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
-                                                 SmallVector<unsigned, 4> &LRegs){
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
   if (NumLiveRegs == 0)
     return false;
 
   SmallSet<unsigned, 4> RegAdded;
   // If this node would clobber any "live" register, then it's not ready.
+  //
+  // If SU is the currently live definition of the same register that it uses,
+  // then we are free to schedule it.
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
-    if (I->isAssignedRegDep())
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
       CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
                          RegAdded, LRegs, TRI);
   }
 
-  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
     if (Node->getOpcode() == ISD::INLINEASM) {
       // Inline asm can clobber physical defs.
       unsigned NumOps = Node->getNumOperands();
-      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
-        --NumOps;  // Ignore the flag operand.
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
 
       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
@@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
     for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
+
   return !LRegs.empty();
 }
 
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+  SmallVector<SUnit*, 4> Interferences;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+  SUnit *CurSU = AvailableQueue->pop();
+  while (CurSU) {
+    SmallVector<unsigned, 4> LRegs;
+    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+      break;
+    LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+    Interferences.push_back(CurSU);
+    CurSU = AvailableQueue->pop();
+  }
+  if (CurSU) {
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+      Interferences[i]->isPending = false;
+      assert(Interferences[i]->isAvailable && "must still be available");
+      AvailableQueue->push(Interferences[i]);
+    }
+    return CurSU;
+  }
+
+  // All candidates are delayed due to live physical reg dependencies.
+  // Try backtracking, code duplication, or inserting cross class copies
+  // to resolve it.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    SUnit *TrySU = Interferences[i];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+    // Try unscheduling up to the point where it's safe to schedule
+    // this node.
+    SUnit *BtSU = NULL;
+    unsigned LiveCycle = UINT_MAX;
+    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+      unsigned Reg = LRegs[j];
+      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+        BtSU = LiveRegGens[Reg];
+        LiveCycle = BtSU->getHeight();
+      }
+    }
+    if (!WillCreateCycle(TrySU, BtSU))  {
+      BacktrackBottomUp(TrySU, BtSU);
+
+      // Force the current node to be scheduled before the node that
+      // requires the physical reg dep.
+      if (BtSU->isAvailable) {
+        BtSU->isAvailable = false;
+        if (!BtSU->isPending)
+          AvailableQueue->remove(BtSU);
+      }
+      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false, /*isArtificial=*/true));
+
+      // If one or more successors has been unscheduled, then the current
+      // node is no longer avaialable. Schedule a successor that's now
+      // available instead.
+      if (!TrySU->isAvailable) {
+        CurSU = AvailableQueue->pop();
+      }
+      else {
+        CurSU = TrySU;
+        TrySU->isPending = false;
+        Interferences.erase(Interferences.begin()+i);
+      }
+      break;
+    }
+  }
+
+  if (!CurSU) {
+    // Can't backtrack. If it's too expensive to copy the value, then try
+    // duplicate the nodes that produces these "too expensive to copy"
+    // values to break the dependency. In case even that doesn't work,
+    // insert cross class copies.
+    // If it's not too expensive, i.e. cost != -1, issue copies.
+    SUnit *TrySU = Interferences[0];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+    assert(LRegs.size() == 1 && "Can't handle this yet!");
+    unsigned Reg = LRegs[0];
+    SUnit *LRDef = LiveRegDefs[Reg];
+    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+    const TargetRegisterClass *RC =
+      TRI->getMinimalPhysRegClass(Reg, VT);
+    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+    // If cross copy register class is null, then it must be possible copy
+    // the value directly. Do not try duplicate the def.
+    SUnit *NewDef = 0;
+    if (DestRC)
+      NewDef = CopyAndMoveSuccessors(LRDef);
+    else
+      DestRC = RC;
+    if (!NewDef) {
+      // Issue copies, these can be expensive cross register class copies.
+      SmallVector<SUnit*, 2> Copies;
+      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
+            << " to SU #" << Copies.front()->NodeNum << "\n");
+      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false,
+                          /*isArtificial=*/true));
+      NewDef = Copies.back();
+    }
+
+    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
+          << " to SU #" << TrySU->NodeNum << "\n");
+    LiveRegDefs[Reg] = NewDef;
+    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                         /*Reg=*/0, /*isNormalMemory=*/false,
+                         /*isMustAlias=*/false,
+                         /*isArtificial=*/true));
+    TrySU->isAvailable = false;
+    CurSU = NewDef;
+  }
+
+  assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+  // Add the nodes that aren't ready back onto the available list.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    Interferences[i]->isPending = false;
+    // May no longer be available due to backtracking.
+    if (Interferences[i]->isAvailable) {
+      AvailableQueue->push(Interferences[i]);
+    }
+  }
+  return CurSU;
+}
 
 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
 /// schedulers.
 void ScheduleDAGRRList::ListScheduleBottomUp() {
-  unsigned CurCycle = 0;
-
   // Release any predecessors of the special Exit node.
-  ReleasePredecessors(&ExitSU, CurCycle);
+  ReleasePredecessors(&ExitSU);
 
   // Add root to Available queue.
   if (!SUnits.empty()) {
@@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
 
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
-  SmallVector<SUnit*, 4> NotReady;
-  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
-    bool Delayed = false;
-    LRegsMap.clear();
-    SUnit *CurSU = AvailableQueue->pop();
-    while (CurSU) {
-      SmallVector<unsigned, 4> LRegs;
-      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
-        break;
-      Delayed = true;
-      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+    DEBUG(dbgs() << "\n*** Examining Available\n";
+          AvailableQueue->dump(this));
 
-      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
-      NotReady.push_back(CurSU);
-      CurSU = AvailableQueue->pop();
-    }
+    // Pick the best node to schedule taking all constraints into
+    // consideration.
+    SUnit *SU = PickNodeToScheduleBottomUp();
 
-    // All candidates are delayed due to live physical reg dependencies.
-    // Try backtracking, code duplication, or inserting cross class copies
-    // to resolve it.
-    if (Delayed && !CurSU) {
-      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
-        SUnit *TrySU = NotReady[i];
-        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-
-        // Try unscheduling up to the point where it's safe to schedule
-        // this node.
-        unsigned LiveCycle = CurCycle;
-        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
-          unsigned Reg = LRegs[j];
-          unsigned LCycle = LiveRegCycles[Reg];
-          LiveCycle = std::min(LiveCycle, LCycle);
-        }
-        SUnit *OldSU = Sequence[LiveCycle];
-        if (!WillCreateCycle(TrySU, OldSU))  {
-          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
-          // Force the current node to be scheduled before the node that
-          // requires the physical reg dep.
-          if (OldSU->isAvailable) {
-            OldSU->isAvailable = false;
-            AvailableQueue->remove(OldSU);
-          }
-          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
-                              /*Reg=*/0, /*isNormalMemory=*/false,
-                              /*isMustAlias=*/false, /*isArtificial=*/true));
-          // If one or more successors has been unscheduled, then the current
-          // node is no longer avaialable. Schedule a successor that's now
-          // available instead.
-          if (!TrySU->isAvailable)
-            CurSU = AvailableQueue->pop();
-          else {
-            CurSU = TrySU;
-            TrySU->isPending = false;
-            NotReady.erase(NotReady.begin()+i);
-          }
-          break;
-        }
-      }
+    AdvancePastStalls(SU);
 
-      if (!CurSU) {
-        // Can't backtrack. If it's too expensive to copy the value, then try
-        // duplicate the nodes that produces these "too expensive to copy"
-        // values to break the dependency. In case even that doesn't work,
-        // insert cross class copies.
-        // If it's not too expensive, i.e. cost != -1, issue copies.
-        SUnit *TrySU = NotReady[0];
-        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-        assert(LRegs.size() == 1 && "Can't handle this yet!");
-        unsigned Reg = LRegs[0];
-        SUnit *LRDef = LiveRegDefs[Reg];
-        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
-        const TargetRegisterClass *RC =
-          TRI->getMinimalPhysRegClass(Reg, VT);
-        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-
-        // If cross copy register class is null, then it must be possible copy
-        // the value directly. Do not try duplicate the def.
-        SUnit *NewDef = 0;
-        if (DestRC)
-          NewDef = CopyAndMoveSuccessors(LRDef);
-        else
-          DestRC = RC;
-        if (!NewDef) {
-          // Issue copies, these can be expensive cross register class copies.
-          SmallVector<SUnit*, 2> Copies;
-          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
-                       << " to SU #" << Copies.front()->NodeNum << "\n");
-          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
-                              /*Reg=*/0, /*isNormalMemory=*/false,
-                              /*isMustAlias=*/false,
-                              /*isArtificial=*/true));
-          NewDef = Copies.back();
-        }
+    ScheduleNodeBottomUp(SU);
 
-        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
-                     << " to SU #" << TrySU->NodeNum << "\n");
-        LiveRegDefs[Reg] = NewDef;
-        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
-                             /*Reg=*/0, /*isNormalMemory=*/false,
-                             /*isMustAlias=*/false,
-                             /*isArtificial=*/true));
-        TrySU->isAvailable = false;
-        CurSU = NewDef;
-      }
-
-      assert(CurSU && "Unable to resolve live physical register dependencies!");
-    }
-
-    // Add the nodes that aren't ready back onto the available list.
-    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
-      NotReady[i]->isPending = false;
-      // May no longer be available due to backtracking.
-      if (NotReady[i]->isAvailable)
-        AvailableQueue->push(NotReady[i]);
+    while (AvailableQueue->empty() && !PendingQueue.empty()) {
+      // Advance the cycle to free resources. Skip ahead to the next ready SU.
+      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
     }
-    NotReady.clear();
-
-    if (CurSU)
-      ScheduleNodeBottomUp(CurSU, CurCycle);
-    ++CurCycle;
-    AvailableQueue->setCurCycle(CurCycle);
   }
 
   // Reverse the order if it is bottom up.
   std::reverse(Sequence.begin(), Sequence.end());
-  
+
 #ifndef NDEBUG
   VerifySchedule(isBottomUp);
 #endif
@@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
   DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
@@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
 void ScheduleDAGRRList::ListScheduleTopDown() {
-  unsigned CurCycle = 0;
   AvailableQueue->setCurCycle(CurCycle);
 
   // Release any successors of the special Entry node.
@@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
       SUnits[i].isAvailable = true;
     }
   }
-  
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
     SUnit *CurSU = AvailableQueue->pop();
-    
+
     if (CurSU)
-      ScheduleNodeTopDown(CurSU, CurCycle);
+      ScheduleNodeTopDown(CurSU);
     ++CurCycle;
     AvailableQueue->setCurCycle(CurCycle);
   }
-  
+
 #ifndef NDEBUG
   VerifySchedule(isBottomUp);
 #endif
@@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
 
 
 //===----------------------------------------------------------------------===//
-//                RegReductionPriorityQueue Implementation
+//                RegReductionPriorityQueue Definition
 //===----------------------------------------------------------------------===//
 //
 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
 // to reduce register pressure.
-// 
+//
 namespace {
-  template<class SF>
-  class RegReductionPriorityQueue;
-  
-  /// bu_ls_rr_sort - Priority function for bottom up register pressure
-  // reduction scheduler.
-  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
-    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
-    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
   };
 
-  // td_ls_rr_sort - Priority function for top down register pressure reduction
-  // scheduler.
-  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
-    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
-    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
+  RegReductionPQBase *SPQ;
+  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = false,
+    HasReadyFilter = false
   };
 
-  // src_ls_rr_sort - Priority function for source order scheduler.
-  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
-    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
-      : SPQ(spq) {}
-    src_ls_rr_sort(const src_ls_rr_sort &RHS)
-      : SPQ(RHS.SPQ) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
+  RegReductionPQBase *SPQ;
+  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
   };
 
-  // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
-  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
-    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
-      : SPQ(spq) {}
-    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
-      : SPQ(RHS.SPQ) {}
+  RegReductionPQBase *SPQ;
+  src_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  src_ls_rr_sort(const src_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
 
-    bool operator()(const SUnit* left, const SUnit* right) const;
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = true
   };
 
-  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
-  // scheduler.
-  struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
-    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
-      : SPQ(spq) {}
-    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
-      : SPQ(RHS.SPQ) {}
+  RegReductionPQBase *SPQ;
+  hybrid_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
 
-    bool operator()(const SUnit* left, const SUnit* right) const;
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = true
   };
-}  // end anonymous namespace
+
+  RegReductionPQBase *SPQ;
+  ilp_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+  std::vector<SUnit*> Queue;
+  unsigned CurQueueId;
+  bool TracksRegPressure;
+
+  // SUnits - The SUnits for the current graph.
+  std::vector<SUnit> *SUnits;
+
+  MachineFunction &MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+  ScheduleDAGRRList *scheduleDAG;
+
+  // SethiUllmanNumbers - The SethiUllman number for each node.
+  std::vector<unsigned> SethiUllmanNumbers;
+
+  /// RegPressure - Tracking current reg pressure per register class.
+  ///
+  std::vector<unsigned> RegPressure;
+
+  /// RegLimit - Tracking the number of allocatable registers per register
+  /// class.
+  std::vector<unsigned> RegLimit;
+
+public:
+  RegReductionPQBase(MachineFunction &mf,
+                     bool hasReadyFilter,
+                     bool tracksrp,
+                     const TargetInstrInfo *tii,
+                     const TargetRegisterInfo *tri,
+                     const TargetLowering *tli)
+    : SchedulingPriorityQueue(hasReadyFilter),
+      CurQueueId(0), TracksRegPressure(tracksrp),
+      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+    if (TracksRegPressure) {
+      unsigned NumRC = TRI->getNumRegClasses();
+      RegLimit.resize(NumRC);
+      RegPressure.resize(NumRC);
+      std::fill(RegLimit.begin(), RegLimit.end(), 0);
+      std::fill(RegPressure.begin(), RegPressure.end(), 0);
+      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I)
+        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+    }
+  }
+
+  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+    scheduleDAG = scheduleDag;
+  }
+
+  ScheduleHazardRecognizer* getHazardRec() {
+    return scheduleDAG->getHazardRec();
+  }
+
+  void initNodes(std::vector<SUnit> &sunits);
+
+  void addNode(const SUnit *SU);
+
+  void updateNode(const SUnit *SU);
+
+  void releaseState() {
+    SUnits = 0;
+    SethiUllmanNumbers.clear();
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+  }
+
+  unsigned getNodePriority(const SUnit *SU) const;
+
+  unsigned getNodeOrdering(const SUnit *SU) const {
+    return scheduleDAG->DAG->GetOrdering(SU->getNode());
+  }
+
+  bool empty() const { return Queue.empty(); }
+
+  void push(SUnit *U) {
+    assert(!U->NodeQueueId && "Node in the queue already");
+    U->NodeQueueId = ++CurQueueId;
+    Queue.push_back(U);
+  }
+
+  void remove(SUnit *SU) {
+    assert(!Queue.empty() && "Queue is empty!");
+    assert(SU->NodeQueueId != 0 && "Not in queue!");
+    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+                                                 SU);
+    if (I != prior(Queue.end()))
+      std::swap(*I, Queue.back());
+    Queue.pop_back();
+    SU->NodeQueueId = 0;
+  }
+
+  bool tracksRegPressure() const { return TracksRegPressure; }
+
+  void dumpRegPressure() const;
+
+  bool HighRegPressure(const SUnit *SU) const;
+
+  bool MayReduceRegPressure(SUnit *SU);
+
+  void ScheduledNode(SUnit *SU);
+
+  void UnscheduledNode(SUnit *SU);
+
+protected:
+  bool canClobber(const SUnit *SU, const SUnit *Op);
+  void AddPseudoTwoAddrDeps();
+  void PrescheduleNodesWithMultipleUses();
+  void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
+    std::vector<SUnit *>::iterator Best = Q.begin();
+    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+           E = Q.end(); I != E; ++I)
+      if (Picker(*Best, *I))
+        Best = I;
+    SUnit *V = *Best;
+    if (Best != prior(Q.end()))
+      std::swap(*Best, Q.back());
+    Q.pop_back();
+    return V;
+  }
+
+  SF Picker;
+
+public:
+  RegReductionPriorityQueue(MachineFunction &mf,
+                            bool tracksrp,
+                            const TargetInstrInfo *tii,
+                            const TargetRegisterInfo *tri,
+                            const TargetLowering *tli)
+    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+      Picker(this) {}
+
+  bool isBottomUp() const { return SF::IsBottomUp; }
+
+  bool isReady(SUnit *U) const {
+    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+  }
+
+  SUnit *pop() {
+    if (Queue.empty()) return NULL;
+
+    SUnit *V = popFromQueue(Queue, Picker);
+    V->NodeQueueId = 0;
+    return V;
+  }
+
+  void dump(ScheduleDAG *DAG) const {
+    // Emulate pop() without clobbering NodeQueueIds.
+    std::vector<SUnit*> DumpQueue = Queue;
+    SF DumpPicker = Picker;
+    while (!DumpQueue.empty()) {
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+      if (isBottomUp())
+        dbgs() << "Height " << SU->getHeight() << ": ";
+      else
+        dbgs() << "Depth " << SU->getDepth() << ": ";
+      SU->dump(DAG);
+    }
+  }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//           Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
 
 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
 /// Smaller number is the higher priority.
@@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
 
   if (SethiUllmanNumber == 0)
     SethiUllmanNumber = 1;
-  
+
   return SethiUllmanNumber;
 }
 
-namespace {
-  template<class SF>
-  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
-    std::vector<SUnit*> Queue;
-    SF Picker;
-    unsigned CurQueueId;
-    bool TracksRegPressure;
-
-  protected:
-    // SUnits - The SUnits for the current graph.
-    std::vector<SUnit> *SUnits;
-
-    MachineFunction &MF;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    const TargetLowering *TLI;
-    ScheduleDAGRRList *scheduleDAG;
-
-    // SethiUllmanNumbers - The SethiUllman number for each node.
-    std::vector<unsigned> SethiUllmanNumbers;
-
-    /// RegPressure - Tracking current reg pressure per register class.
-    ///
-    std::vector<unsigned> RegPressure;
-
-    /// RegLimit - Tracking the number of allocatable registers per register
-    /// class.
-    std::vector<unsigned> RegLimit;
-
-  public:
-    RegReductionPriorityQueue(MachineFunction &mf,
-                              bool tracksrp,
-                              const TargetInstrInfo *tii,
-                              const TargetRegisterInfo *tri,
-                              const TargetLowering *tli)
-      : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
-        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
-      if (TracksRegPressure) {
-        unsigned NumRC = TRI->getNumRegClasses();
-        RegLimit.resize(NumRC);
-        RegPressure.resize(NumRC);
-        std::fill(RegLimit.begin(), RegLimit.end(), 0);
-        std::fill(RegPressure.begin(), RegPressure.end(), 0);
-        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-               E = TRI->regclass_end(); I != E; ++I)
-          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
-      }
-    }
-    
-    void initNodes(std::vector<SUnit> &sunits) {
-      SUnits = &sunits;
-      // Add pseudo dependency edges for two-address nodes.
-      AddPseudoTwoAddrDeps();
-      // Reroute edges to nodes with multiple uses.
-      PrescheduleNodesWithMultipleUses();
-      // Calculate node priorities.
-      CalculateSethiUllmanNumbers();
-    }
-
-    void addNode(const SUnit *SU) {
-      unsigned SUSize = SethiUllmanNumbers.size();
-      if (SUnits->size() > SUSize)
-        SethiUllmanNumbers.resize(SUSize*2, 0);
-      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
-    }
-
-    void updateNode(const SUnit *SU) {
-      SethiUllmanNumbers[SU->NodeNum] = 0;
-      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
-    }
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
 
-    void releaseState() {
-      SUnits = 0;
-      SethiUllmanNumbers.clear();
-      std::fill(RegPressure.begin(), RegPressure.end(), 0);
-    }
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
 
-    unsigned getNodePriority(const SUnit *SU) const {
-      assert(SU->NodeNum < SethiUllmanNumbers.size());
-      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
-      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
-        // CopyToReg should be close to its uses to facilitate coalescing and
-        // avoid spilling.
-        return 0;
-      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
-          Opc == TargetOpcode::SUBREG_TO_REG ||
-          Opc == TargetOpcode::INSERT_SUBREG)
-        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
-        // close to their uses to facilitate coalescing.
-        return 0;
-      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
-        // If SU does not have a register use, i.e. it doesn't produce a value
-        // that would be consumed (e.g. store), then it terminates a chain of
-        // computation.  Give it a large SethiUllman number so it will be
-        // scheduled right before its predecessors that it doesn't lengthen
-        // their live ranges.
-        return 0xffff;
-      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
-        // If SU does not have a register def, schedule it close to its uses
-        // because it does not lengthen any live ranges.
-        return 0;
-      return SethiUllmanNumbers[SU->NodeNum];
-    }
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+}
 
-    unsigned getNodeOrdering(const SUnit *SU) const {
-      return scheduleDAG->DAG->GetOrdering(SU->getNode());
-    }
+void RegReductionPQBase::addNode(const SUnit *SU) {
+  unsigned SUSize = SethiUllmanNumbers.size();
+  if (SUnits->size() > SUSize)
+    SethiUllmanNumbers.resize(SUSize*2, 0);
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
 
-    bool empty() const { return Queue.empty(); }
-    
-    void push(SUnit *U) {
-      assert(!U->NodeQueueId && "Node in the queue already");
-      U->NodeQueueId = ++CurQueueId;
-      Queue.push_back(U);
-    }
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+  SethiUllmanNumbers[SU->NodeNum] = 0;
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
 
-    SUnit *pop() {
-      if (empty()) return NULL;
-      std::vector<SUnit *>::iterator Best = Queue.begin();
-      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
-           E = Queue.end(); I != E; ++I)
-        if (Picker(*Best, *I))
-          Best = I;
-      SUnit *V = *Best;
-      if (Best != prior(Queue.end()))
-        std::swap(*Best, Queue.back());
-      Queue.pop_back();
-      V->NodeQueueId = 0;
-      return V;
-    }
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+  assert(SU->NodeNum < SethiUllmanNumbers.size());
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return 0;
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return 0;
+  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+    // If SU does not have a register use, i.e. it doesn't produce a value
+    // that would be consumed (e.g. store), then it terminates a chain of
+    // computation.  Give it a large SethiUllman number so it will be
+    // scheduled right before its predecessors that it doesn't lengthen
+    // their live ranges.
+    return 0xffff;
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return 0;
+  return SethiUllmanNumbers[SU->NodeNum];
+}
 
-    void remove(SUnit *SU) {
-      assert(!Queue.empty() && "Queue is empty!");
-      assert(SU->NodeQueueId != 0 && "Not in queue!");
-      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
-                                                   SU);
-      if (I != prior(Queue.end()))
-        std::swap(*I, Queue.back());
-      Queue.pop_back();
-      SU->NodeQueueId = 0;
-    }
+//===----------------------------------------------------------------------===//
+//                     Register Pressure Tracking
+//===----------------------------------------------------------------------===//
 
-    bool HighRegPressure(const SUnit *SU) const {
-      if (!TLI)
-        return false;
+void RegReductionPQBase::dumpRegPressure() const {
+  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+    const TargetRegisterClass *RC = *I;
+    unsigned Id = RC->getID();
+    unsigned RP = RegPressure[Id];
+    if (!RP) continue;
+    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+          << '\n');
+  }
+}
 
-      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
-           I != E; ++I) {
-        if (I->isCtrl())
-          continue;
-        SUnit *PredSU = I->getSUnit();
-        const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyFromReg) {
-            EVT VT = PN->getValueType(0);
-            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-            unsigned Cost = TLI->getRepRegClassCostFor(VT);
-            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-              return true;
-          }
-          continue;
-        }
-        unsigned POpc = PN->getMachineOpcode();
-        if (POpc == TargetOpcode::IMPLICIT_DEF)
-          continue;
-        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-          EVT VT = PN->getOperand(0).getValueType();
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          unsigned Cost = TLI->getRepRegClassCostFor(VT);
-          // Check if this increases register pressure of the specific register
-          // class to the point where it would cause spills.
-          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-            return true;
-          continue;            
-        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-                   POpc == TargetOpcode::SUBREG_TO_REG) {
-          EVT VT = PN->getValueType(0);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          unsigned Cost = TLI->getRepRegClassCostFor(VT);
-          // Check if this increases register pressure of the specific register
-          // class to the point where it would cause spills.
-          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-            return true;
-          continue;
-        }
-        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = PN->getValueType(i);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          if (RegPressure[RCId] >= RegLimit[RCId])
-            return true; // Reg pressure already high.
-          unsigned Cost = TLI->getRepRegClassCostFor(VT);
-          if (!PN->hasAnyUseOfValue(i))
-            continue;
-          // Check if this increases register pressure of the specific register
-          // class to the point where it would cause spills.
-          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-            return true;
-        }
-      }
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+  if (!TLI)
+    return false;
 
-      return false;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned Cost = TLI->getRepRegClassCostFor(VT);
+      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+        return true;
     }
+  }
+  return false;
+}
 
-    void ScheduledNode(SUnit *SU) {
-      if (!TracksRegPressure)
-        return;
-
-      const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode()) {
-        if (N->getOpcode() != ISD::CopyToReg)
-          return;
-      } else {
-        unsigned Opc = N->getMachineOpcode();
-        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
-            Opc == TargetOpcode::INSERT_SUBREG ||
-            Opc == TargetOpcode::SUBREG_TO_REG ||
-            Opc == TargetOpcode::REG_SEQUENCE ||
-            Opc == TargetOpcode::IMPLICIT_DEF)
-          return;
-      }
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+  const SDNode *N = SU->getNode();
 
-      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-           I != E; ++I) {
-        if (I->isCtrl())
-          continue;
-        SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
-          continue;
-        const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyFromReg) {
-            EVT VT = PN->getValueType(0);
-            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          }
-          continue;
-        }
-        unsigned POpc = PN->getMachineOpcode();
-        if (POpc == TargetOpcode::IMPLICIT_DEF)
-          continue;
-        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-          EVT VT = PN->getOperand(0).getValueType();
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;            
-        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-                   POpc == TargetOpcode::SUBREG_TO_REG) {
-          EVT VT = PN->getValueType(0);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;
-        }
-        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = PN->getValueType(i);
-          if (!PN->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-        }
-      }
+  if (!N->isMachineOpcode() || !SU->NumSuccs)
+    return false;
 
-      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
-      // may transfer data dependencies to CopyToReg.
-      if (SU->NumSuccs && N->isMachineOpcode()) {
-        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = N->getValueType(i);
-          if (!N->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
-            // Register pressure tracking is imprecise. This can happen.
-            RegPressure[RCId] = 0;
-          else
-            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
-        }
-      }
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      return true;
+  }
+  return false;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
 
-      dumpRegPressure();
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    // FIXME: The ScheduleDAG currently loses information about which of a
+    // node's values is consumed by each dependence. Consequently, if the node
+    // defines multiple register classes, we don't know which to pressurize
+    // here. Instead the following loop consumes the register defs in an
+    // arbitrary order. At least it handles the common case of clustered loads
+    // to the same class. For precise liveness, each SDep needs to indicate the
+    // result number. But that tightly couples the ScheduleDAG with the
+    // SelectionDAG making updates tricky. A simpler hack would be to attach a
+    // value type or register class to SDep.
+    //
+    // The most important aspect of register tracking is balancing the increase
+    // here with the reduction further below. Note that this SU may use multiple
+    // defs in PredSU. The can't be determined here, but we've already
+    // compensated by reducing NumRegDefsLeft in PredSU during
+    // ScheduleDAGSDNodes::AddSchedEdges.
+    --PredSU->NumRegDefsLeft;
+    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+      if (SkipRegDefs)
+        continue;
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      break;
     }
+  }
 
-    void UnscheduledNode(SUnit *SU) {
-      if (!TracksRegPressure)
-        return;
-
-      const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode()) {
-        if (N->getOpcode() != ISD::CopyToReg)
-          return;
-      } else {
-        unsigned Opc = N->getMachineOpcode();
-        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
-            Opc == TargetOpcode::INSERT_SUBREG ||
-            Opc == TargetOpcode::SUBREG_TO_REG ||
-            Opc == TargetOpcode::REG_SEQUENCE ||
-            Opc == TargetOpcode::IMPLICIT_DEF)
-          return;
-      }
+  // We should have this assert, but there may be dead SDNodes that never
+  // materialize as SUnits, so they don't appear to generate liveness.
+  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+  int SkipRegDefs = (int)SU->NumRegDefsLeft;
+  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+    if (SkipRegDefs > 0)
+      continue;
+    EVT VT = RegDefPos.GetValue();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+      // Register pressure tracking is imprecise. This can happen. But we try
+      // hard not to let it happen because it likely results in poor scheduling.
+      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
+      RegPressure[RCId] = 0;
+    }
+    else {
+      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+    }
+  }
+  dumpRegPressure();
+}
 
-      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-           I != E; ++I) {
-        if (I->isCtrl())
-          continue;
-        SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
-          continue;
-        const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyFromReg) {
-            EVT VT = PN->getValueType(0);
-            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          }
-          continue;
-        }
-        unsigned POpc = PN->getMachineOpcode();
-        if (POpc == TargetOpcode::IMPLICIT_DEF)
-          continue;
-        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-          EVT VT = PN->getOperand(0).getValueType();
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;            
-        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-                   POpc == TargetOpcode::SUBREG_TO_REG) {
-          EVT VT = PN->getValueType(0);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;
-        }
-        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = PN->getValueType(i);
-          if (!PN->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
-            // Register pressure tracking is imprecise. This can happen.
-            RegPressure[RCId] = 0;
-          else
-            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
-        }
-      }
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
+
+  const SDNode *N = SU->getNode();
+  if (!N->isMachineOpcode()) {
+    if (N->getOpcode() != ISD::CopyToReg)
+      return;
+  } else {
+    unsigned Opc = N->getMachineOpcode();
+    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+        Opc == TargetOpcode::INSERT_SUBREG ||
+        Opc == TargetOpcode::SUBREG_TO_REG ||
+        Opc == TargetOpcode::REG_SEQUENCE ||
+        Opc == TargetOpcode::IMPLICIT_DEF)
+      return;
+  }
 
-      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
-      // may transfer data dependencies to CopyToReg.
-      if (SU->NumSuccs && N->isMachineOpcode()) {
-        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-          EVT VT = N->getValueType(i);
-          if (VT == MVT::Flag || VT == MVT::Other)
-            continue;
-          if (!N->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-        }
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+    // counts data deps.
+    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+      continue;
+    const SDNode *PN = PredSU->getNode();
+    if (!PN->isMachineOpcode()) {
+      if (PN->getOpcode() == ISD::CopyFromReg) {
+        EVT VT = PN->getValueType(0);
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
       }
-
-      dumpRegPressure();
+      continue;
     }
-
-    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 
-      scheduleDAG = scheduleDag; 
+    unsigned POpc = PN->getMachineOpcode();
+    if (POpc == TargetOpcode::IMPLICIT_DEF)
+      continue;
+    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+      EVT VT = PN->getOperand(0).getValueType();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
+    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+               POpc == TargetOpcode::SUBREG_TO_REG) {
+      EVT VT = PN->getValueType(0);
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
     }
-
-    void dumpRegPressure() const {
-      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-             E = TRI->regclass_end(); I != E; ++I) {
-        const TargetRegisterClass *RC = *I;
-        unsigned Id = RC->getID();
-        unsigned RP = RegPressure[Id];
-        if (!RP) continue;
-        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
-              << '\n');
-      }
+    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+    for (unsigned i = 0; i != NumDefs; ++i) {
+      EVT VT = PN->getValueType(i);
+      if (!PN->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+        // Register pressure tracking is imprecise. This can happen.
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
     }
+  }
 
-  protected:
-    bool canClobber(const SUnit *SU, const SUnit *Op);
-    void AddPseudoTwoAddrDeps();
-    void PrescheduleNodesWithMultipleUses();
-    void CalculateSethiUllmanNumbers();
-  };
-
-  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
-    BURegReductionPriorityQueue;
-
-  typedef RegReductionPriorityQueue<td_ls_rr_sort>
-    TDRegReductionPriorityQueue;
-
-  typedef RegReductionPriorityQueue<src_ls_rr_sort>
-    SrcRegReductionPriorityQueue;
-
-  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
-    HybridBURRPriorityQueue;
+  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+  // may transfer data dependencies to CopyToReg.
+  if (SU->NumSuccs && N->isMachineOpcode()) {
+    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Glue || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+    }
+  }
 
-  typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
-    ILPBURRPriorityQueue;
+  dumpRegPressure();
 }
 
+//===----------------------------------------------------------------------===//
+//           Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
 /// closestSucc - Returns the scheduled cycle of the successor which is
 /// closest to the current cycle.
 static unsigned closestSucc(const SUnit *SU) {
@@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) {
   return Scratches;
 }
 
-template <typename RRSort>
-static bool BURRSort(const SUnit *left, const SUnit *right,
-                     const RegReductionPriorityQueue<RRSort> *SPQ) {
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+  SmallSet<const SUnit*, 4> Preds;
+  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Preds.insert(I->getSUnit());
+  }
+  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (Preds.count(I->getSUnit()))
+      return true;
+  }
+  return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+  if ((int)SPQ->getCurCycle() < Height) return true;
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return true;
+  return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+                            RegReductionPQBase *SPQ) {
+  // If the two nodes share an operand and one of them has a single
+  // use that is a live out copy, favor the one that is live out. Otherwise
+  // it will be difficult to eliminate the copy if the instruction is a
+  // loop induction variable update. e.g.
+  // BB:
+  // sub r1, r3, #1
+  // str r0, [r2, r3]
+  // mov r3, r1
+  // cmp
+  // bne BB
+  bool SharePred = UnitsSharePred(left, right);
+  // FIXME: Only adjust if BB is a loop back edge.
+  // FIXME: What's the cost of a copy?
+  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+  int LHeight = (int)left->getHeight() - LBonus;
+  int RHeight = (int)right->getHeight() - RBonus;
+
+  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+    BUHasStall(left, LHeight, SPQ);
+  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+    BUHasStall(right, RHeight, SPQ);
+
+  // If scheduling one of the node will cause a pipeline stall, delay it.
+  // If scheduling either one of the node will cause a pipeline stall, sort
+  // them according to their height.
+  if (LStall) {
+    if (!RStall)
+      return 1;
+    if (LHeight != RHeight)
+      return LHeight > RHeight ? 1 : -1;
+  } else if (RStall)
+    return -1;
+
+  // If either node is scheduling for latency, sort them by height/depth
+  // and latency.
+  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+                     right->SchedulingPref == Sched::Latency)) {
+    if (DisableSchedCycles) {
+      if (LHeight != RHeight)
+        return LHeight > RHeight ? 1 : -1;
+    }
+    else {
+      // If neither instruction stalls (!LStall && !RStall) then
+      // it's height is already covered so only its depth matters. We also reach
+      // this if both stall but have the same height.
+      unsigned LDepth = left->getDepth();
+      unsigned RDepth = right->getDepth();
+      if (LDepth != RDepth) {
+        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
+              << ") depth " << LDepth << " vs SU (" << right->NodeNum
+              << ") depth " << RDepth << "\n");
+        return LDepth < RDepth ? 1 : -1;
+      }
+    }
+    if (left->Latency != right->Latency)
+      return left->Latency > right->Latency ? 1 : -1;
+  }
+  return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
   if (LPriority != RPriority)
@@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right,
   if (LScratch != RScratch)
     return LScratch > RScratch;
 
-  if (left->getHeight() != right->getHeight())
-    return left->getHeight() > right->getHeight();
-  
-  if (left->getDepth() != right->getDepth())
-    return left->getDepth() < right->getDepth();
+  if (!DisableSchedCycles) {
+    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
+  }
+  else {
+    if (left->getHeight() != right->getHeight())
+      return left->getHeight() > right->getHeight();
 
-  assert(left->NodeQueueId && right->NodeQueueId && 
+    if (left->getDepth() != right->getDepth())
+      return left->getDepth() < right->getDepth();
+  }
+
+  assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
 // Bottom up
-bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   return BURRSort(left, right, SPQ);
 }
 
 // Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   unsigned LOrder = SPQ->getNodeOrdering(left);
   unsigned ROrder = SPQ->getNodeOrdering(right);
 
@@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   return BURRSort(left, right, SPQ);
 }
 
-bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  static const unsigned ReadyDelay = 3;
+
+  if (SPQ->MayReduceRegPressure(SU)) return true;
+
+  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
   bool LHigh = SPQ->HighRegPressure(left);
   bool RHigh = SPQ->HighRegPressure(right);
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
-  if (LHigh && !RHigh)
+  if (LHigh && !RHigh) {
+    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
+          << right->NodeNum << ")\n");
     return true;
-  else if (!LHigh && RHigh)
+  }
+  else if (!LHigh && RHigh) {
+    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
+          << left->NodeNum << ")\n");
     return false;
+  }
   else if (!LHigh && !RHigh) {
-    // Low register pressure situation, schedule for latency if possible.
-    bool LStall = left->SchedulingPref == Sched::Latency &&
-      SPQ->getCurCycle() < left->getHeight();
-    bool RStall = right->SchedulingPref == Sched::Latency &&
-      SPQ->getCurCycle() < right->getHeight();
-    // If scheduling one of the node will cause a pipeline stall, delay it.
-    // If scheduling either one of the node will cause a pipeline stall, sort
-    // them according to their height.
-    // If neither will cause a pipeline stall, try to reduce register pressure.
-    if (LStall) {
-      if (!RStall)
-        return true;
-      if (left->getHeight() != right->getHeight())
-        return left->getHeight() > right->getHeight();
-    } else if (RStall)
-      return false;
-
-    // If either node is scheduling for latency, sort them by height and latency
-    // first.
-    if (left->SchedulingPref == Sched::Latency ||
-        right->SchedulingPref == Sched::Latency) {
-      if (left->getHeight() != right->getHeight())
-        return left->getHeight() > right->getHeight();
-      if (left->Latency != right->Latency)
-        return left->Latency > right->Latency;
-    }
+    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
   }
-
   return BURRSort(left, right, SPQ);
 }
 
-bool ilp_ls_rr_sort::operator()(const SUnit *left,
-                                const SUnit *right) const {
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  if (SU->getHeight() > CurCycle) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return SU->getHeight() <= CurCycle;
+}
+
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
   bool LHigh = SPQ->HighRegPressure(left);
   bool RHigh = SPQ->HighRegPressure(right);
   // Avoid causing spills. If register pressure is high, schedule for
@@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left,
   return BURRSort(left, right, SPQ);
 }
 
-template<class SF>
-bool
-RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+//===----------------------------------------------------------------------===//
+//                    Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
   if (SU->isTwoAddress) {
     unsigned Opc = SU->getNode()->getMachineOpcode();
     const TargetInstrDesc &TID = TII->get(Opc);
@@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
   return false;
 }
 
-/// hasCopyToRegUse - Return true if SU has a value successor that is a
-/// CopyToReg node.
-static bool hasCopyToRegUse(const SUnit *SU) {
-  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    if (I->isCtrl()) continue;
-    const SUnit *SuccSU = I->getSUnit();
-    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
-      return true;
-  }
-  return false;
-}
-
 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
 /// physical register defs.
 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
@@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
   const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
   assert(ImpDefs && "Caller should check hasPhysRegDefs");
   for (const SDNode *SUNode = SU->getNode(); SUNode;
-       SUNode = SUNode->getFlaggedNode()) {
+       SUNode = SUNode->getGluedNode()) {
     if (!SUNode->isMachineOpcode())
       continue;
     const unsigned *SUImpDefs =
@@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
       return false;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
       EVT VT = N->getValueType(i);
-      if (VT == MVT::Flag || VT == MVT::Other)
+      if (VT == MVT::Glue || VT == MVT::Other)
         continue;
       if (!N->hasAnyUseOfValue(i))
         continue;
@@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
 /// after N, which shortens the U->N live range, reducing
 /// register pressure.
 ///
-template<class SF>
-void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
   // Visit all the nodes in topological order, working top-down.
   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
     SUnit *SU = &(*SUnits)[i];
@@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
     if (PredSU->NumSuccs == 1)
       continue;
     // Avoid prescheduling to copies from virtual registers, which don't behave
-    // like other nodes from the perspective of scheduling // heuristics.
+    // like other nodes from the perspective of scheduling heuristics.
     if (SDNode *N = SU->getNode())
       if (N->getOpcode() == ISD::CopyFromReg &&
           TargetRegisterInfo::isVirtualRegister
@@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 /// one that has a CopyToReg use (more likely to be a loop induction update).
 /// If both are two-address, but one is commutable while the other is not
 /// commutable, favor the one that's not commutable.
-template<class SF>
-void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
     SUnit *SU = &(*SUnits)[i];
     if (!SU->isTwoAddress)
       continue;
 
     SDNode *Node = SU->getNode();
-    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
       continue;
 
+    bool isLiveOut = hasOnlyLiveOutUses(SU);
     unsigned Opc = Node->getMachineOpcode();
     const TargetInstrDesc &TID = TII->get(Opc);
     unsigned NumRes = TID.getNumDefs();
@@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
             SuccOpc == TargetOpcode::SUBREG_TO_REG)
           continue;
         if ((!canClobber(SuccSU, DUSU) ||
-             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
           DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
@@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
   }
 }
 
-/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
-/// scheduling units.
-template<class SF>
-void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
-  SethiUllmanNumbers.assign(SUnits->size(), 0);
-  
-  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
-    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
-}
-
 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
 /// predecessors of the successors of the SUnit SU. Stop when the provided
 /// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
                                                     unsigned Limit) {
   unsigned Sum = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   if (left->NumSuccsLeft != right->NumSuccsLeft)
     return left->NumSuccsLeft > right->NumSuccsLeft;
 
-  assert(left->NodeQueueId && right->NodeQueueId && 
+  assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
   return (left->NodeQueueId > right->NodeQueueId);
 }
@@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
 //===----------------------------------------------------------------------===//
 
 llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  
+
   BURegReductionPriorityQueue *PQ =
     new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  
+
   TDRegReductionPriorityQueue *PQ =
     new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  
+
   SrcRegReductionPriorityQueue *PQ =
     new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   const TargetLowering *TLI = &IS->getTargetLowering();
-  
+
   HybridBURRPriorityQueue *PQ =
     new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+                                CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   const TargetLowering *TLI = &IS->getTargetLowering();
-  
+
   ILPBURRPriorityQueue *PQ =
     new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f1bf82ab145a..477c1ffe65d3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -34,8 +34,8 @@ using namespace llvm;
 STATISTIC(LoadsClustered, "Number of loads clustered together");
 
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
-  : ScheduleDAG(mf) {
-}
+  : ScheduleDAG(mf),
+    InstrItins(mf.getTarget().getInstrItineraryData()) {}
 
 /// Run - perform scheduling.
 ///
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SUnit *SU = NewSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
+  SU->isCall = Old->isCall;
   SU->isTwoAddress = Old->isTwoAddress;
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
 /// a specified operand is a physical register dependency. If so, returns the
 /// register and the cost of copying the register.
 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
-                                      const TargetRegisterInfo *TRI, 
+                                      const TargetRegisterInfo *TRI,
                                       const TargetInstrInfo *TII,
                                       unsigned &PhysReg, int &Cost) {
   if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
@@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
   }
 }
 
-static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
-                     SelectionDAG *DAG) {
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
   SmallVector<EVT, 4> VTs;
-  SDNode *FlagDestNode = Flag.getNode();
+  SDNode *GlueDestNode = Glue.getNode();
 
-  // Don't add a flag from a node to itself.
-  if (FlagDestNode == N) return;
+  // Don't add glue from a node to itself.
+  if (GlueDestNode == N) return;
 
-  // Don't add a flag to something which already has a flag.
-  if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+  // Don't add glue to something which already has glue.
+  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
 
   for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
     VTs.push_back(N->getValueType(I));
 
-  if (AddFlag)
-    VTs.push_back(MVT::Flag);
+  if (AddGlue)
+    VTs.push_back(MVT::Glue);
 
   SmallVector<SDValue, 4> Ops;
   for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
     Ops.push_back(N->getOperand(I));
 
-  if (FlagDestNode)
-    Ops.push_back(Flag);
+  if (GlueDestNode)
+    Ops.push_back(Glue);
 
   SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
   MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
     MN->setMemRefs(Begin, End);
 }
 
-/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
 /// This function finds loads of the same base and different offsets. If the
-/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
 /// outputs to ensure they are scheduled together and in order. This
 /// optimization may benefit some targets by improving cache locality.
 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
@@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
   if (NumLoads == 0)
     return;
 
-  // Cluster loads by adding MVT::Flag outputs and inputs. This also
+  // Cluster loads by adding MVT::Glue outputs and inputs. This also
   // ensure they are scheduled in order of increasing addresses.
   SDNode *Lead = Loads[0];
-  AddFlags(Lead, SDValue(0, 0), true, DAG);
+  AddGlue(Lead, SDValue(0, 0), true, DAG);
 
-  SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+  SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
   for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
-    bool OutFlag = I < E - 1;
+    bool OutGlue = I < E - 1;
     SDNode *Load = Loads[I];
 
-    AddFlags(Load, InFlag, OutFlag, DAG);
+    AddGlue(Load, InGlue, OutGlue, DAG);
 
-    if (OutFlag)
-      InFlag = SDValue(Load, Load->getNumValues() - 1);
+    if (OutGlue)
+      InGlue = SDValue(Load, Load->getNumValues() - 1);
 
     ++LoadsClustered;
   }
@@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   // FIXME: Multiply by 2 because we may clone nodes during scheduling.
   // This is a temporary workaround.
   SUnits.reserve(NumNodes * 2);
-  
+
   // Add all nodes in depth first order.
   SmallVector<SDNode*, 64> Worklist;
   SmallPtrSet<SDNode*, 64> Visited;
   Worklist.push_back(DAG->getRoot().getNode());
   Visited.insert(DAG->getRoot().getNode());
-  
+
   while (!Worklist.empty()) {
     SDNode *NI = Worklist.pop_back_val();
-    
+
     // Add all operands to the worklist unless they've already been added.
     for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
       if (Visited.insert(NI->getOperand(i).getNode()))
         Worklist.push_back(NI->getOperand(i).getNode());
-  
+
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
       continue;
-    
+
     // If this node has already been processed, stop now.
     if (NI->getNodeId() != -1) continue;
-    
+
     SUnit *NodeSUnit = NewSUnit(NI);
-    
-    // See if anything is flagged to this node, if so, add them to flagged
-    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
-    // are required to be the last operand and result of a node.
-    
-    // Scan up to find flagged preds.
+
+    // See if anything is glued to this node, if so, add them to glued
+    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
+    // is required to be the last operand and result of a node.
+
+    // Scan up to find glued preds.
     SDNode *N = NI;
     while (N->getNumOperands() &&
-           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
       N = N->getOperand(N->getNumOperands()-1).getNode();
       assert(N->getNodeId() == -1 && "Node already inserted!");
       N->setNodeId(NodeSUnit->NodeNum);
+      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+        NodeSUnit->isCall = true;
     }
-    
-    // Scan down to find any flagged succs.
+
+    // Scan down to find any glued succs.
     N = NI;
-    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
-      SDValue FlagVal(N, N->getNumValues()-1);
-      
-      // There are either zero or one users of the Flag result.
-      bool HasFlagUse = false;
-      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
+    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+      SDValue GlueVal(N, N->getNumValues()-1);
+
+      // There are either zero or one users of the Glue result.
+      bool HasGlueUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
            UI != E; ++UI)
-        if (FlagVal.isOperandOf(*UI)) {
-          HasFlagUse = true;
+        if (GlueVal.isOperandOf(*UI)) {
+          HasGlueUse = true;
           assert(N->getNodeId() == -1 && "Node already inserted!");
           N->setNodeId(NodeSUnit->NodeNum);
           N = *UI;
+          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+            NodeSUnit->isCall = true;
           break;
         }
-      if (!HasFlagUse) break;
+      if (!HasGlueUse) break;
     }
-    
-    // If there are flag operands involved, N is now the bottom-most node
-    // of the sequence of nodes that are flagged together.
+
+    // If there are glue operands involved, N is now the bottom-most node
+    // of the sequence of nodes that are glued together.
     // Update the SUnit.
     NodeSUnit->setNode(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NodeSUnit->NodeNum);
 
+    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+    InitNumRegDefsLeft(NodeSUnit);
+
     // Assign the Latency field of NodeSUnit using target-provided information.
     ComputeLatency(NodeSUnit);
   }
@@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
     SUnit *SU = &SUnits[su];
     SDNode *MainNode = SU->getNode();
-    
+
     if (MainNode->isMachineOpcode()) {
       unsigned Opc = MainNode->getMachineOpcode();
       const TargetInstrDesc &TID = TII->get(Opc);
@@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
       if (TID.isCommutable())
         SU->isCommutable = true;
     }
-    
+
     // Find all predecessors and successors of the group.
-    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
       if (N->isMachineOpcode() &&
           TII->get(N->getMachineOpcode()).getImplicitDefs()) {
         SU->hasPhysRegClobbers = true;
@@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
           SU->hasPhysRegDefs = true;
       }
-      
+
       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
         SDNode *OpN = N->getOperand(i).getNode();
         if (isPassiveNode(OpN)) continue;   // Not scheduled.
@@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         if (OpSU == SU) continue;           // In the same group.
 
         EVT OpVT = N->getOperand(i).getValueType();
-        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
         bool isChain = OpVT == MVT::Other;
 
         unsigned PhysReg = 0;
@@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
-        SU->addPred(dep);
+        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+          // Multiple register uses are combined in the same SUnit. For example,
+          // we could have a set of glued nodes with all their defs consumed by
+          // another set of glued nodes. Register pressure tracking sees this as
+          // a single use, so to keep pressure balanced we reduce the defs.
+          --OpSU->NumRegDefsLeft;
+        }
       }
     }
   }
@@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
 /// are input.  This SUnit graph is similar to the SelectionDAG, but
 /// excludes nodes that aren't interesting to scheduling, and represents
-/// flagged together nodes with a single SUnit.
+/// glued together nodes with a single SUnit.
 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
   // Cluster certain nodes which should be scheduled together.
   ClusterNodes();
@@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
   AddSchedEdges();
 }
 
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+  if (!Node->isMachineOpcode()) {
+    if (Node->getOpcode() == ISD::CopyFromReg)
+      NodeNumDefs = 1;
+    else
+      NodeNumDefs = 0;
+    return;
+  }
+  unsigned POpc = Node->getMachineOpcode();
+  if (POpc == TargetOpcode::IMPLICIT_DEF) {
+    // No register need be allocated for this.
+    NodeNumDefs = 0;
+    return;
+  }
+  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+  // Some instructions define regs that are not represented in the selection DAG
+  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+  DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+                                           const ScheduleDAGSDNodes *SD)
+  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+  InitNodeNumDefs();
+  Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+  for (;Node;) { // Visit all glued nodes.
+    for (;DefIdx < NodeNumDefs; ++DefIdx) {
+      if (!Node->hasAnyUseOfValue(DefIdx))
+        continue;
+      if (Node->isMachineOpcode() &&
+          Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+        // Propagate the incoming (full-register) type. I doubt it's needed.
+        ValueType = Node->getOperand(0).getValueType();
+      }
+      else {
+        ValueType = Node->getValueType(DefIdx);
+      }
+      ++DefIdx;
+      return; // Found a normal regdef.
+    }
+    Node = Node->getGluedNode();
+    if (Node == NULL) {
+      return; // No values left to visit.
+    }
+    InitNodeNumDefs();
+  }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+  assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+  for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+    ++SU->NumRegDefsLeft;
+  }
+}
+
 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   // Check to see if the scheduler cares about latencies.
   if (ForceUnitLatencies()) {
@@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
     return;
   }
 
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  if (InstrItins.isEmpty()) {
+  if (!InstrItins || InstrItins->isEmpty()) {
     SU->Latency = 1;
     return;
   }
-  
+
   // Compute the latency for the node.  We use the sum of the latencies for
-  // all nodes flagged together into this SUnit.
+  // all nodes glued together into this SUnit.
   SU->Latency = 0;
-  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
-    if (N->isMachineOpcode()) {
-      SU->Latency += InstrItins.
-        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
-    }
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode())
+      SU->Latency += TII->getInstrLatency(InstrItins, N);
 }
 
 void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
@@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
   if (ForceUnitLatencies())
     return;
 
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  if (InstrItins.isEmpty())
-    return;
-  
   if (dep.getKind() != SDep::Data)
     return;
 
   unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
-  if (Def->isMachineOpcode()) {
-    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
-    if (DefIdx >= II.getNumDefs())
-      return;
-    int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
-    if (DefCycle < 0)
-      return;
-    int UseCycle = 1;
-    if (Use->isMachineOpcode()) {
-      const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
-      UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
-    }
-    if (UseCycle >= 0) {
-      int Latency = DefCycle - UseCycle + 1;
-      if (Latency >= 0)
-        dep.setLatency(Latency);
-    }
+  if (Use->isMachineOpcode())
+    // Adjust the use operand index by num of defs.
+    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+      !BB->succ_empty()) {
+    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      // This copy is a liveout value. It is likely coalesced, so reduce the
+      // latency so not to penalize the def.
+      // FIXME: need target specific adjustment here?
+      Latency = (Latency > 1) ? Latency - 1 : 1;
   }
+  if (Latency >= 0)
+    dep.setLatency(Latency);
 }
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
@@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
 
   SU->getNode()->dump(DAG);
   dbgs() << "\n";
-  SmallVector<SDNode *, 4> FlaggedNodes;
-  for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
-    FlaggedNodes.push_back(N);
-  while (!FlaggedNodes.empty()) {
+  SmallVector<SDNode *, 4> GluedNodes;
+  for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+    GluedNodes.push_back(N);
+  while (!GluedNodes.empty()) {
     dbgs() << "    ";
-    FlaggedNodes.back()->dump(DAG);
+    GluedNodes.back()->dump(DAG);
     dbgs() << "\n";
-    FlaggedNodes.pop_back();
+    GluedNodes.pop_back();
   }
 }
 
@@ -507,37 +573,25 @@ namespace {
   };
 }
 
-// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule uses to determine how to insert dbg_value
-// instructions in the right order.
-static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
-                           InstrEmitter &Emitter,
-                           DenseMap<SDValue, unsigned> &VRBaseMap,
+/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+                               InstrEmitter &Emitter,
                     SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
-                           SmallSet<unsigned, 8> &Seen) {
-  unsigned Order = DAG->GetOrdering(N);
-  if (!Order || !Seen.insert(Order))
-    return;
-
-  MachineBasicBlock *BB = Emitter.getBlock();
-  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
-    // Did not insert any instruction.
-    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
-    return;
-  }
-
-  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                            unsigned Order) {
   if (!N->getHasDebugValue())
     return;
+
   // Opportunistically insert immediate dbg_value uses, i.e. those with source
   // order number right after the N.
+  MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
   SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
   for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
     if (DVs[i]->isInvalidated())
       continue;
     unsigned DVOrder = DVs[i]->getOrder();
-    if (DVOrder == ++Order) {
+    if (!Order || DVOrder == ++Order) {
       MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
       if (DbgMI) {
         Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
   }
 }
 
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+                           InstrEmitter &Emitter,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                           SmallSet<unsigned, 8> &Seen) {
+  unsigned Order = DAG->GetOrdering(N);
+  if (!Order || !Seen.insert(Order)) {
+    // Process any valid SDDbgValues even if node does not have any order
+    // assigned.
+    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+    return;
+  }
+
+  MachineBasicBlock *BB = Emitter.getBlock();
+  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+    // Did not insert any instruction.
+    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+    return;
+  }
+
+  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
 
 /// EmitSchedule - Emit the machine code in scheduled order.
 MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
@@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     }
 
     // For pre-regalloc scheduling, create instructions corresponding to the
-    // SDNode and any flagged SDNodes and append them to the block.
+    // SDNode and any glued SDNodes and append them to the block.
     if (!SU->getNode()) {
       // Emit a copy.
       EmitPhysRegCopy(SU, CopyVRBaseMap);
       continue;
     }
 
-    SmallVector<SDNode *, 4> FlaggedNodes;
-    for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
-         N = N->getFlaggedNode())
-      FlaggedNodes.push_back(N);
-    while (!FlaggedNodes.empty()) {
-      SDNode *N = FlaggedNodes.back();
-      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode()->getGluedNode(); N;
+         N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
+      SDNode *N = GluedNodes.back();
+      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
                        VRBaseMap);
       // Remember the source order of the inserted instruction.
       if (HasDbg)
         ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
-      FlaggedNodes.pop_back();
+      GluedNodes.pop_back();
     }
     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
                      VRBaseMap);
@@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
       // Insert all SDDbgValue's whose order(s) are before "Order".
       if (!MI)
         continue;
-#ifndef NDEBUG
-      unsigned LastDIOrder = 0;
-#endif
       for (; DI != DE &&
              (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
-#ifndef NDEBUG
-        assert((*DI)->getOrder() >= LastDIOrder &&
-               "SDDbgValue nodes must be in source order!");
-        LastDIOrder = (*DI)->getOrder();
-#endif
         if ((*DI)->isInvalidated())
           continue;
         MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 842fc8c72703..cc7310e4ca42 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,13 +20,13 @@
 
 namespace llvm {
   /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
-  /// 
+  ///
   /// Edges between SUnits are initially based on edges in the SelectionDAG,
   /// and additional edges can be added by the schedulers as heuristics.
   /// SDNodes such as Constants, Registers, and a few others that are not
   /// interesting to schedulers are not allocated SUnits.
   ///
-  /// SDNodes with MVT::Flag operands are grouped along with the flagged
+  /// SDNodes with MVT::Glue operands are grouped along with the flagged
   /// nodes into a single SUnit so that they are scheduled together.
   ///
   /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
@@ -36,6 +36,7 @@ namespace llvm {
   class ScheduleDAGSDNodes : public ScheduleDAG {
   public:
     SelectionDAG *DAG;                    // DAG of the current basic block
+    const InstrItineraryData *InstrItins;
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
@@ -72,13 +73,17 @@ namespace llvm {
     /// predecessors / successors info nor the temporary scheduling states.
     ///
     SUnit *Clone(SUnit *N);
-    
+
     /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
     virtual void BuildSchedGraph(AliasAnalysis *AA);
 
+    /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+    ///
+    void InitNumRegDefsLeft(SUnit *SU);
+
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
@@ -105,6 +110,30 @@ namespace llvm {
 
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
+    /// RegDefIter - In place iteration over the values defined by an
+    /// SUnit. This does not need copies of the iterator or any other STLisms.
+    /// The iterator creates itself, rather than being provided by the SchedDAG.
+    class RegDefIter {
+      const ScheduleDAGSDNodes *SchedDAG;
+      const SDNode *Node;
+      unsigned DefIdx;
+      unsigned NodeNumDefs;
+      EVT ValueType;
+    public:
+      RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+      bool IsValid() const { return Node != NULL; }
+
+      EVT GetValue() const {
+        assert(IsValid() && "bad iterator");
+        return ValueType;
+      }
+
+      void Advance();
+    private:
+      void InitNodeNumDefs();
+    };
+
   private:
     /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
     /// combined SUnits.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ad06ebda5b00..2fb2f2d8aa1e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
 /// BUILD_VECTOR where all of the elements are ~0 or undef.
 bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   // Look through a bit convert.
-  if (N->getOpcode() == ISD::BIT_CONVERT)
+  if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0).getNode();
 
   if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
 /// BUILD_VECTOR where all of the elements are 0 or undef.
 bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   // Look through a bit convert.
-  if (N->getOpcode() == ISD::BIT_CONVERT)
+  if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0).getNode();
 
   if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) {
   if (N->getOperand(0).getOpcode() == ISD::UNDEF)
     return false;
   unsigned NumElems = N->getNumOperands();
+  if (NumElems == 1)
+    return false;
   for (unsigned i = 1; i < NumElems; ++i) {
     SDValue V = N->getOperand(i);
     if (V.getOpcode() != ISD::UNDEF)
@@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
 
 /// doNotCSE - Return true if CSE should not be performed for this node.
 static bool doNotCSE(SDNode *N) {
-  if (N->getValueType(0) == MVT::Flag)
+  if (N->getValueType(0) == MVT::Glue)
     return true; // Never CSE anything that produces a flag.
 
   switch (N->getOpcode()) {
@@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) {
 
   // Check that remaining values produced are not flags.
   for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
-    if (N->getValueType(i) == MVT::Flag)
+    if (N->getValueType(i) == MVT::Glue)
       return true; // Never CSE anything that produces a flag.
 
   return false;
@@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
 bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   bool Erased = false;
   switch (N->getOpcode()) {
-  case ISD::EntryToken:
-    llvm_unreachable("EntryToken should not be in CSEMaps!");
-    return false;
   case ISD::HANDLENODE: return false;  // noop.
   case ISD::CONDCODE:
     assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
@@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   }
   default:
     // Remove it from the CSE Map.
+    assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+    assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
     Erased = CSEMap.RemoveNode(N);
     break;
   }
@@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   // Verify that the node was actually in one of the CSE maps, unless it has a
   // flag result (which cannot be CSE'd) or is one of the special cases that are
   // not subject to CSE.
-  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
       !N->isMachineOpcode() && !doNotCSE(N)) {
     N->dump(this);
     dbgs() << "\n";
@@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   return Node;
 }
 
-/// VerifyNode - Sanity check the given node.  Aborts if it is invalid.
-void SelectionDAG::VerifyNode(SDNode *N) {
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node.  Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
   switch (N->getOpcode()) {
   default:
     break;
@@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) {
   }
 }
 
+/// VerifySDNode - Sanity check the given SDNode.  Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+  // The SDNode allocators cannot be used to allocate nodes with fields that are
+  // not present in an SDNode!
+  assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+  assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+  assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+  assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+  assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+  assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+  assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+  assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+  assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+  assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+  assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+  assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+  assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+  assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+  assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+  assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+  assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+  assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+  assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+  VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode.  Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+  // The MachineNode allocators cannot be used to allocate nodes with fields
+  // that are not present in a MachineNode!
+  // Currently there are no such nodes.
+
+  VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
 /// getEVTAlignment - Compute the default alignment value for the
 /// given type.
 ///
@@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-  
+
   SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
@@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
   ID.AddPointer(MD);
-  
+
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-  
+
   SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
@@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // Also compute a conserative estimate for high known-0 bits.
     // More trickiness is possible, but this is sufficient for the
     // interesting case of alignment computation.
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     unsigned TrailZ = KnownZero.countTrailingOnes() +
                       KnownZero2.countTrailingOnes();
     unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
@@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
                       AllOnes, KnownZero2, KnownOne2, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
-    KnownOne2.clear();
-    KnownZero2.clear();
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
     ComputeMaskedBits(Op.getOperand(1),
                       AllOnes, KnownZero2, KnownOne2, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     // If the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    InSignBit.zext(BitWidth);
+    InSignBit = InSignBit.zext(BitWidth);
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignBit;
 
@@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::CTPOP: {
     unsigned LowBits = Log2_32(BitWidth)+1;
     KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     return;
   }
   case ISD::LOAD: {
@@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask    = Mask;
-    InMask.trunc(InBits);
-    KnownZero.trunc(InBits);
-    KnownOne.trunc(InBits);
+    APInt InMask    = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
     return;
   }
@@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask = Mask;
-    InMask.trunc(InBits);
+    APInt InMask = Mask.trunc(InBits);
 
     // If any of the sign extended bits are demanded, we know that the sign
     // bit is demanded. Temporarily set this bit in the mask for our callee.
     if (NewBits.getBoolValue())
       InMask |= InSignBit;
 
-    KnownZero.trunc(InBits);
-    KnownOne.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
 
     // Note if the sign bit is known to be zero or one.
@@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // If the sign bit wasn't actually demanded by our caller, we don't
     // want it set in the KnownZero and KnownOne result values. Reset the
     // mask and reapply it to the result values.
-    InMask = Mask;
-    InMask.trunc(InBits);
+    InMask = Mask.trunc(InBits);
     KnownZero &= InMask;
     KnownOne  &= InMask;
 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
 
     // If the sign bit is known zero or one, the top bits match.
     if (SignBitKnownZero)
@@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask;
-    InMask.trunc(InBits);
-    KnownZero.trunc(InBits);
-    KnownOne.trunc(InBits);
+    APInt InMask = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     return;
   }
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask;
-    InMask.zext(InBits);
-    KnownZero.zext(InBits);
-    KnownOne.zext(InBits);
+    APInt InMask = Mask.zext(InBits);
+    KnownZero = KnownZero.zext(InBits);
+    KnownOne = KnownOne.zext(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
     break;
   }
   case ISD::AssertZext: {
@@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     }
   }
   // fall through
-  case ISD::ADD: {
+  case ISD::ADD:
+  case ISD::ADDE: {
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
@@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownZeroOut = std::min(KnownZeroOut,
                             KnownZero2.countTrailingOnes());
 
-    KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+    if (Op.getOpcode() == ISD::ADD) {
+      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+      return;
+    }
+
+    // With ADDE, a carry bit may be added in, so we can only use this
+    // information if we know (at least) that the low two bits are clear.  We
+    // then return to the caller that the low bit is unknown but that other bits
+    // are known zero.
+    if (KnownZeroOut >= 2) // ADDE
+      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
     return;
   }
   case ISD::SREM:
@@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
     return;
   }
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    if (unsigned Align = InferPtrAlignment(Op)) {
+      // The low bits are known zero if the pointer is aligned.
+      KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+      return;
+    }
+    break;
+      
   default:
     // Allow the target to implement this method for its nodes.
     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
 }
 
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD.  This handles the equivalence:
+///     X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+  if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+      !isa<ConstantSDNode>(Op.getOperand(1)))
+    return false;
+  
+  if (Op.getOpcode() == ISD::OR && 
+      !MaskedValueIsZero(Op.getOperand(0),
+                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+    return false;
+  
+  return true;
+}
+
+
 bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   // If we're told that NaNs won't happen, assume they won't.
   if (NoNaNsFPMath)
@@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
-      const uint64_t zero[] = {0, 0};
       // No compile time operations on ppcf128.
       if (VT == MVT::ppcf128) break;
-      APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+      APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
       return getConstantFP(apf, VT);
     }
-    case ISD::BIT_CONVERT:
+    case ISD::BITCAST:
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
         return getConstantFP(Val.bitsToFloat(), VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         APInt api(VT.getSizeInBits(), 2, x);
         return getConstant(api, VT);
       }
-      case ISD::BIT_CONVERT:
+      case ISD::BITCAST:
         if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
           return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
         else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
@@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         return Operand.getNode()->getOperand(0);
     }
     break;
-  case ISD::BIT_CONVERT:
+  case ISD::BITCAST:
     // Basic sanity checking.
     assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
-           && "Cannot BIT_CONVERT between types of different sizes!");
+           && "Cannot BITCAST between types of different sizes!");
     if (VT == Operand.getValueType()) return Operand;  // noop conversion.
-    if (OpOpcode == ISD::BIT_CONVERT)  // bitconv(bitconv(x)) -> bitconv(x)
-      return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
     break;
@@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
 
   SDNode *N;
   SDVTList VTs = getVTList(VT);
-  if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+  if (VT != MVT::Glue) { // Don't CSE flag producing nodes
     FoldingSetNodeID ID;
     SDValue Ops[1] = { Operand };
     AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
@@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
            "Shift operators return type must be the same as their first arg");
     assert(VT.isInteger() && N2.getValueType().isInteger() &&
            "Shifts only work on integers");
+    // Verify that the shift amount VT is bit enough to hold valid shift
+    // amounts.  This catches things like trying to shift an i1024 value by an
+    // i8, which is easy to fall into in generic code that uses
+    // TLI.getShiftAmount().
+    assert(N2.getValueType().getSizeInBits() >=
+                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) && 
+           "Invalid use of small shift amount with oversized value!");
 
     // Always fold shifts of i1 values so the code generator doesn't need to
     // handle them.  Since we know the size of the shift has to be less than the
@@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       return getConstant(ShiftedVal.trunc(ElementSize), VT);
     }
     break;
-  case ISD::EXTRACT_SUBVECTOR:
-    if (N1.getValueType() == VT) // Trivial extraction.
-      return N1;
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Index = N2;
+    if (VT.isSimple() && N1.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             "Extract subvector VTs must be a vectors!");
+      assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+             "Extract subvector VTs must have the same element type!");
+      assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Extract subvector must be from larger vector to smaller vector!");
+
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((VT.getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= N1.getValueType().getVectorNumElements())
+               && "Extract subvector overflow!");
+      }
+
+      // Trivial extraction.
+      if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+        return N1;
+    }
     break;
   }
+  }
 
   if (N1C) {
     if (N2C) {
@@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   // Memoize this node if possible.
   SDNode *N;
   SDVTList VTs = getVTList(VT);
-  if (VT != MVT::Flag) {
+  if (VT != MVT::Glue) {
     SDValue Ops[] = { N1, N2 };
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
@@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
     break;
-  case ISD::BIT_CONVERT:
+  case ISD::INSERT_SUBVECTOR: {
+    SDValue Index = N3;
+    if (VT.isSimple() && N1.getValueType().isSimple()
+        && N2.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             N2.getValueType().isVector() &&
+             "Insert subvector VTs must be a vectors");
+      assert(VT == N1.getValueType() &&
+             "Dest and insert subvector source types must match!");
+      assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Insert subvector must be from smaller vector to larger vector!");
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((N2.getValueType().getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= VT.getVectorNumElements())
+               && "Insert subvector overflow!");
+      }
+
+      // Trivial insertion.
+      if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+        return N2;
+    }
+    break;
+  }
+  case ISD::BITCAST:
     // Fold bit_convert nodes from a type to themselves.
     if (N1.getValueType() == VT)
       return N1;
@@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   // Memoize node if it doesn't produce a flag.
   SDNode *N;
   SDVTList VTs = getVTList(VT);
-  if (VT != MVT::Flag) {
+  if (VT != MVT::Glue) {
     SDValue Ops[] = { N1, N2, N3 };
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
@@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
                  &ArgChains[0], ArgChains.size());
 }
 
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
 /// getMemsetValue - Vectorized representation of the memset value
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
 
   unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
-    APInt Val = APInt(NumBits, C->getZExtValue() & 255);
-    unsigned Shift = 8;
-    for (unsigned i = NumBits; i > 8; i >>= 1) {
-      Val = (Val << Shift) | Val;
-      Shift <<= 1;
-    }
+    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
     if (VT.isInteger())
       return DAG.getConstant(Val, VT);
     return DAG.getConstantFP(APFloat(Val), VT);
   }
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
-  unsigned Shift = 8;
-  for (unsigned i = NumBits; i > 8; i >>= 1) {
-    Value = DAG.getNode(ISD::OR, dl, VT,
-                        DAG.getNode(ISD::SHL, dl, VT, Value,
-                                    DAG.getConstant(Shift,
-                                                    TLI.getShiftAmountTy())),
-                        Value);
-    Shift <<= 1;
+  if (NumBits > 8) {
+    // Use a multiplication with 0x010101... to extend the input to the
+    // required length.
+    APInt Magic = SplatByte(NumBits, 0x01);
+    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
   }
 
   return Value;
@@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   if (Str.empty()) {
     if (VT.isInteger())
       return DAG.getConstant(0, VT);
-    else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
-             VT.getSimpleVT().SimpleTy == MVT::f64)
+    else if (VT == MVT::f32 || VT == MVT::f64)
       return DAG.getConstantFP(0.0, VT);
     else if (VT.isVector()) {
       unsigned NumElts = VT.getVectorNumElements();
       MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
-      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+      return DAG.getNode(ISD::BITCAST, dl, VT,
                          DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
                                                              EltVT, NumElts)));
     } else
@@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
     if (VT.bitsGT(LVT))
       VT = LVT;
   }
-  
-  // If we're optimizing for size, and there is a limit, bump the maximum number
-  // of operations inserted down to 4.  This is a wild guess that approximates
-  // the size of a call to memcpy or memset (3 arguments + call).
-  if (Limit != ~0U) {
-    const Function *F = DAG.getMachineFunction().getFunction();
-    if (F->hasFnAttr(Attribute::OptimizeForSize))
-      Limit = 4;
-  }
 
   unsigned NumMemOps = 0;
   while (Size != 0) {
@@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                        SDValue Src, uint64_t Size,
                                        unsigned Align, bool isVol,
                                        bool AlwaysInline,
-                                       const Value *DstSV, uint64_t DstSVOff,
-                                       const Value *SrcSV, uint64_t SrcSVOff) {
+                                       MachinePointerInfo DstPtrInfo,
+                                       MachinePointerInfo SrcPtrInfo) {
   // Turn a memcpy of undef to nop.
   if (Src.getOpcode() == ISD::UNDEF)
     return Chain;
 
   // Expand memcpy to a series of load and store ops if the size operand falls
   // below a certain threshold.
+  // TODO: In the AlwaysInline case, if the size is big then generate a loop
+  // rather than maybe a humongous number of loads and stores.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   std::string Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
-  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
-  
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
                                 (isZeroStr ? 0 : SrcAlign),
@@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, isVol, false, Align);
+                           DstPtrInfo.getWithOffset(DstOff), isVol,
+                           false, Align);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // FIXME does the case above also need this?
       EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
       assert(NVT.bitsGE(VT));
-      Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+                             SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
                              MinAlign(SrcAlign, SrcOff));
       Store = DAG.getTruncStore(Chain, dl, Value,
                                 getMemBasePlusOffset(Dst, DstOff, DAG),
-                                DstSV, DstSVOff + DstOff, VT, isVol, false,
-                                Align);
+                                DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+                                false, Align);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                         SDValue Src, uint64_t Size,
                                         unsigned Align,  bool isVol,
                                         bool AlwaysInline,
-                                        const Value *DstSV, uint64_t DstSVOff,
-                                        const Value *SrcSV, uint64_t SrcSVOff) {
+                                        MachinePointerInfo DstPtrInfo,
+                                        MachinePointerInfo SrcPtrInfo) {
   // Turn a memmove of undef to nop.
   if (Src.getOpcode() == ISD::UNDEF)
     return Chain;
@@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
     SrcAlign = Align;
-  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
@@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
+                        SrcPtrInfo.getWithOffset(SrcOff), isVol,
+                        false, SrcAlign);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, isVol, false, Align);
+                         DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                                SDValue Chain, SDValue Dst,
                                SDValue Src, uint64_t Size,
                                unsigned Align, bool isVol,
-                               const Value *DstSV, uint64_t DstSVOff) {
+                               MachinePointerInfo DstPtrInfo) {
   // Turn a memset of undef to nop.
   if (Src.getOpcode() == ISD::UNDEF)
     return Chain;
@@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   bool NonScalarIntSafe =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
-  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
                                 NonScalarIntSafe, false, DAG, TLI))
     return SDValue();
@@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> OutChains;
   uint64_t DstOff = 0;
   unsigned NumMemOps = MemOps.size();
+
+  // Find the largest store and generate the bit pattern for it.
+  EVT LargestVT = MemOps[0];
+  for (unsigned i = 1; i < NumMemOps; i++)
+    if (MemOps[i].bitsGT(LargestVT))
+      LargestVT = MemOps[i];
+  SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
-    unsigned VTSize = VT.getSizeInBits() / 8;
-    SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+    // If this store is smaller than the largest store see whether we can get
+    // the smaller value for free with a truncate.
+    SDValue Value = MemSetValue;
+    if (VT.bitsLT(LargestVT)) {
+      if (!LargestVT.isVector() && !VT.isVector() &&
+          TLI.isTruncateFree(LargestVT, VT))
+        Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+      else
+        Value = getMemsetValue(Src, VT, DAG, dl);
+    }
+    assert(Value.getValueType() == VT && "Value with wrong type.");
     SDValue Store = DAG.getStore(Chain, dl, Value,
                                  getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff, isVol, false, 0);
+                                 DstPtrInfo.getWithOffset(DstOff),
+                                 isVol, false, Align);
     OutChains.push_back(Store);
-    DstOff += VTSize;
+    DstOff += VT.getSizeInBits() / 8;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
-                                const Value *DstSV, uint64_t DstSVOff,
-                                const Value *SrcSV, uint64_t SrcSVOff) {
+                                MachinePointerInfo DstPtrInfo,
+                                MachinePointerInfo SrcPtrInfo) {
 
   // Check to see if we should lower the memcpy to loads and stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
 
     SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
                                              ConstantSize->getZExtValue(),Align,
-                                isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                isVol, false, DstPtrInfo, SrcPtrInfo);
     if (Result.getNode())
       return Result;
   }
@@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   SDValue Result =
     TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
                                 isVol, AlwaysInline,
-                                DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                DstPtrInfo, SrcPtrInfo);
   if (Result.getNode())
     return Result;
 
@@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
     assert(ConstantSize && "AlwaysInline requires a constant size!");
     return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
                                    ConstantSize->getZExtValue(), Align, isVol,
-                                   true, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                   true, DstPtrInfo, SrcPtrInfo);
   }
 
   // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
@@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
 SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
                                  unsigned Align, bool isVol,
-                                 const Value *DstSV, uint64_t DstSVOff,
-                                 const Value *SrcSV, uint64_t SrcSVOff) {
+                                 MachinePointerInfo DstPtrInfo,
+                                 MachinePointerInfo SrcPtrInfo) {
 
   // Check to see if we should lower the memmove to loads and stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
     SDValue Result =
       getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
                                ConstantSize->getZExtValue(), Align, isVol,
-                               false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                               false, DstPtrInfo, SrcPtrInfo);
     if (Result.getNode())
       return Result;
   }
@@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
     TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
-                                 DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                 DstPtrInfo, SrcPtrInfo);
   if (Result.getNode())
     return Result;
 
@@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
 SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol,
-                                const Value *DstSV, uint64_t DstSVOff) {
+                                MachinePointerInfo DstPtrInfo) {
 
   // Check to see if we should lower the memset to stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
 
     SDValue Result =
       getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
-                      Align, isVol, DstSV, DstSVOff);
+                      Align, isVol, DstPtrInfo);
 
     if (Result.getNode())
       return Result;
@@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
     TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
-                                DstSV, DstSVOff);
+                                DstPtrInfo);
   if (Result.getNode())
     return Result;
 
-  // Emit a library call.  
+  // Emit a library call.
   const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
-                                SDValue Chain,
-                                SDValue Ptr, SDValue Cmp,
-                                SDValue Swp, const Value* PtrVal,
+                                SDValue Chain, SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachinePointerInfo PtrInfo,
                                 unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
 
-  // Check if the memory reference references a frame index
-  if (!PtrVal)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
   MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
@@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   Flags |= MachineMemOperand::MOVolatile;
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrVal, Flags, 0,
-                            MemVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
 
   return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
 }
@@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
 
-  // Check if the memory reference references a frame index
-  if (!PtrVal)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
   MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
@@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   Flags |= MachineMemOperand::MOVolatile;
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrVal, Flags, 0,
+    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
                             MemVT.getStoreSize(), Alignment);
 
   return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
@@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
 }
 
 /// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
 SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
                                      DebugLoc dl) {
   if (NumOps == 1)
@@ -3803,18 +3931,18 @@ SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
                                   const EVT *VTs, unsigned NumVTs,
                                   const SDValue *Ops, unsigned NumOps,
-                                  EVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
   return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
-                             MemVT, srcValue, SVOff, Align, Vol,
+                             MemVT, PtrInfo, Align, Vol,
                              ReadMem, WriteMem);
 }
 
 SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                                   const SDValue *Ops, unsigned NumOps,
-                                  EVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
   if (Align == 0)  // Ensure that codegen never sees alignment 0
@@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
   if (Vol)
     Flags |= MachineMemOperand::MOVolatile;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(srcValue, Flags, SVOff,
-                            MemVT.getStoreSize(), Align);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
 
   return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
 }
@@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                                   EVT MemVT, MachineMemOperand *MMO) {
   assert((Opcode == ISD::INTRINSIC_VOID ||
           Opcode == ISD::INTRINSIC_W_CHAIN ||
+          Opcode == ISD::PREFETCH ||
           (Opcode <= INT_MAX &&
            (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
          "Opcode is not a memory-accessing opcode!");
 
   // Memoize the node unless it returns a flag.
   MemIntrinsicSDNode *N;
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
@@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
   return SDValue(N, 0);
 }
 
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+  // If this is FI+Offset, we can model it.
+  if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+    return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+  // If this is (FI+Offset1)+Offset2, we can model it.
+  if (Ptr.getOpcode() != ISD::ADD ||
+      !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+      !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+    return MachinePointerInfo();
+
+  int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+  return MachinePointerInfo::getFixedStack(FI, Offset+
+                       cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+  // If the 'Offset' value isn't a constant, we can't handle this.
+  if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+    return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+  if (OffsetOp.getOpcode() == ISD::UNDEF)
+    return InferPointerInfo(Ptr);
+  return MachinePointerInfo();
+}
+
+
 SDValue
 SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       EVT VT, DebugLoc dl, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
-                      const Value *SV, int SVOffset, EVT MemVT,
+                      MachinePointerInfo PtrInfo, EVT MemVT,
                       bool isVolatile, bool isNonTemporal,
-                      unsigned Alignment) {
+                      unsigned Alignment, const MDNode *TBAAInfo) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
-  // Check if the memory reference references a frame index
-  if (!SV)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
-  MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOLoad;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+
+  // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+  // clients.
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr, Offset);
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(SV, Flags, SVOffset,
-                            MemVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+                            TBAAInfo);
   return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
 }
 
 SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, 
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       EVT VT, DebugLoc dl, SDValue Chain,
                       SDValue Ptr, SDValue Offset, EVT MemVT,
                       MachineMemOperand *MMO) {
@@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
 
 SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
-                              const Value *SV, int SVOffset,
+                              MachinePointerInfo PtrInfo,
                               bool isVolatile, bool isNonTemporal,
-                              unsigned Alignment) {
+                              unsigned Alignment, const MDNode *TBAAInfo) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
-                 SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
+                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
 }
 
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
-                                 const Value *SV,
-                                 int SVOffset, EVT MemVT,
+                                 MachinePointerInfo PtrInfo, EVT MemVT,
                                  bool isVolatile, bool isNonTemporal,
-                                 unsigned Alignment) {
+                                 unsigned Alignment, const MDNode *TBAAInfo) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
-                 SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
+                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+                 TBAAInfo);
 }
 
+
 SDValue
 SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
                              SDValue Offset, ISD::MemIndexedMode AM) {
@@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
   assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
          "Load is already a indexed load!");
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
-                 LD->getChain(), Base, Offset, LD->getSrcValue(),
-                 LD->getSrcValueOffset(), LD->getMemoryVT(),
+                 LD->getChain(), Base, Offset, LD->getPointerInfo(),
+                 LD->getMemoryVT(),
                  LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
-                               SDValue Ptr, const Value *SV, int SVOffset,
+                               SDValue Ptr, MachinePointerInfo PtrInfo,
                                bool isVolatile, bool isNonTemporal,
-                               unsigned Alignment) {
+                               unsigned Alignment, const MDNode *TBAAInfo) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
-  // Check if the memory reference references a frame index
-  if (!SV)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
-  MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(SV, Flags, SVOffset,
-                            Val.getValueType().getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags,
+                            Val.getValueType().getStoreSize(), Alignment,
+                            TBAAInfo);
 
   return getStore(Chain, dl, Val, Ptr, MMO);
 }
@@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 }
 
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
-                                    SDValue Ptr, const Value *SV,
-                                    int SVOffset, EVT SVT,
-                                    bool isVolatile, bool isNonTemporal,
-                                    unsigned Alignment) {
+                                    SDValue Ptr, MachinePointerInfo PtrInfo,
+                                    EVT SVT,bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment,
+                                    const MDNode *TBAAInfo) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
-  // Check if the memory reference references a frame index
-  if (!SV)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
-  MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+                            TBAAInfo);
 
   return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
 }
@@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   SDNode *N;
   SDVTList VTs = getVTList(VT);
 
-  if (VT != MVT::Flag) {
+  if (VT != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
     void *IP = 0;
@@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
 
   // Memoize the node unless it returns a flag.
   SDNode *N;
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
@@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
   }
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
                                   unsigned NumOps) {
   // If an identical node already exists, use it.
   void *IP = 0;
-  if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+  if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
     if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
                              const SDValue *Ops, unsigned NumOps) {
-  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
   MachineSDNode *N;
-  void *IP;
+  void *IP = 0;
 
   if (DoCSE) {
     FoldingSetNodeID ID;
@@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifyMachineNode(N);
 #endif
   return N;
 }
@@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
 /// else return NULL.
 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
                                       const SDValue *Ops, unsigned NumOps) {
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
@@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
     SD->setHasDebugValue(true);
 }
 
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+  if (From == To || !From.getNode()->getHasDebugValue())
+    return;
+  SDNode *FromNode = From.getNode();
+  SDNode *ToNode = To.getNode();
+  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+  SmallVector<SDDbgValue *, 2> ClonedDVs;
+  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+       I != E; ++I) {
+    SDDbgValue *Dbg = *I;
+    if (Dbg->getKind() == SDDbgValue::SDNODE) {
+      SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+                                      Dbg->getOffset(), Dbg->getDebugLoc(),
+                                      Dbg->getOrder());
+      ClonedDVs.push_back(Clone);
+    }
+  }
+  for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+         E = ClonedDVs.end(); I != E; ++I)
+    AddDbgValue(*I, ToNode, false);
+}
+
 //===----------------------------------------------------------------------===//
 //                              SDNode Class
 //===----------------------------------------------------------------------===//
@@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
-                     const SDValue *Ops, unsigned NumOps, EVT memvt, 
+                     const SDValue *Ops, unsigned NumOps, EVT memvt,
                      MachineMemOperand *mmo)
    : SDNode(Opc, dl, VTs, Ops, NumOps),
      MemoryVT(memvt), MMO(mmo) {
@@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
 namespace {
   struct EVTArray {
     std::vector<EVT> VTs;
-    
+
     EVTArray() {
       VTs.reserve(MVT::LAST_VALUETYPE);
       for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
@@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
     sys::SmartScopedLock<true> Lock(*VTMutex);
     return &(*EVTs->insert(VT).first);
   } else {
-    assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+    assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Value type out of range!");
     return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
   }
@@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const {
 
 /// reachesChainWithoutSideEffects - Return true if this operand (which must
 /// be a chain) reaches the specified operand without crossing any
-/// side-effecting instructions.  In practice, this looks through token
-/// factors and non-volatile loads.  In order to remain efficient, this only
-/// looks a couple of nodes in, it does not do an exhaustive search.
+/// side-effecting instructions on any chain path.  In practice, this looks
+/// through token factors and non-volatile loads.  In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
 bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
                                                unsigned Depth) const {
   if (*this == Dest) return true;
@@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
   if (Depth == 0) return false;
 
   // If this is a token factor, all inputs to the TF happen in parallel.  If any
-  // of the operands of the TF reach dest, then we can do the xform.
+  // of the operands of the TF does not reach dest, then we cannot do the xform.
   if (getOpcode() == ISD::TokenFactor) {
     for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-      if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
-        return true;
-    return false;
+      if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+        return false;
+    return true;
   }
 
   // Loads don't have side effects, look through them.
@@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_RETURN: return "EH_RETURN";
   case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
   case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
   case ISD::ConstantPool:  return "ConstantPool";
   case ISD::ExternalSymbol: return "ExternalSymbol";
   case ISD::BlockAddress:  return "BlockAddress";
@@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
   case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
   case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
   case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
   case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
   case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
@@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UINT_TO_FP:  return "uint_to_fp";
   case ISD::FP_TO_SINT:  return "fp_to_sint";
   case ISD::FP_TO_UINT:  return "fp_to_uint";
-  case ISD::BIT_CONVERT: return "bit_convert";
+  case ISD::BITCAST:     return "bit_convert";
   case ISD::FP16_TO_FP32: return "fp16_to_fp32";
   case ISD::FP32_TO_FP16: return "fp32_to_fp16";
 
@@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << LBB->getName() << " ";
     OS << (const void*)BBDN->getBasicBlock() << ">";
   } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
-    if (G && R->getReg() &&
-        TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
-      OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
-    } else {
-      OS << " %reg" << R->getReg();
-    }
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
     OS << "'" << ES->getSymbol() << "'";
@@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     const char *AM = getIndexedModeName(ST->getAddressingMode());
     if (*AM)
       OS << ", " << AM;
-    
+
     OS << ">";
   } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
     OS << "<" << *M->getMemOperand() << ">";
@@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
 
 static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
                                   const SelectionDAG *G, unsigned depth,
-                                  unsigned indent) 
+                                  unsigned indent)
 {
   if (depth == 0)
     return;
@@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
 void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
                             unsigned depth) const {
   printrWithDepthHelper(OS, this, G, depth, 0);
-} 
+}
 
 void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
   // Don't print impossibly deep things.
@@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
 void SDNode::dumprFull(const SelectionDAG *G) const {
   // Don't print impossibly deep things.
   dumprWithDepth(G, 100);
-} 
+}
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
 }
 
 
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
-/// location that is 'Dist' units away from the location that the 'Base' load 
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
 /// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
                                      unsigned Bytes, int Dist) const {
   if (LD->getChain() != Base->getChain())
     return false;
@@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
     if (FS != BFS || FS != (int)Bytes) return false;
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
-  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
-    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
-    if (V && (V->getSExtValue() == Dist*Bytes))
-      return true;
-  }
+
+  // Handle X+C
+  if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
 
   const GlobalValue *GV1 = NULL;
   const GlobalValue *GV2 = NULL;
@@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   int64_t FrameOffset = 0;
   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
     FrameIdx = FI->getIndex();
-  } else if (Ptr.getOpcode() == ISD::ADD &&
-             isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+  } else if (isBaseWithConstantOffset(Ptr) &&
              isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+    // Handle FI+Cst
     FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
     FrameOffset = Ptr.getConstantOperandVal(1);
   }
 
   if (FrameIdx != (1 << 31)) {
-    // FIXME: Handle FI+CST.
     const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
     unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
                                     FrameOffset);
@@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
     if (OpVal.getOpcode() == ISD::UNDEF)
       SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
     else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
-      SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+      SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
                     zextOrTrunc(sz) << BitPos;
     else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
       SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
@@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   while (sz > 8) {
 
     unsigned HalfSize = sz / 2;
-    APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
-    APInt LowValue = APInt(SplatValue).trunc(HalfSize);
-    APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
-    APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+    APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = SplatValue.trunc(HalfSize);
+    APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+    APInt LowUndef = SplatUndef.trunc(HalfSize);
 
     // If the two halves do not match (ignoring undef bits), stop here.
     if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
@@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N,
   // If this node has already been checked, don't check it again.
   if (Checked.count(N))
     return;
-  
+
   // If a node has already been visited on this depth-first walk, reject it as
   // a cycle.
   if (!Visited.insert(N)) {
@@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N,
     errs() << "Detected cycle in SelectionDAG\n";
     abort();
   }
-  
+
   for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
     checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
-  
+
   Checked.insert(N);
   Visited.erase(N);
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e65744592c8b..452f5614b7bf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
 #include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -43,9 +44,8 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision",
                  cl::location(LimitFloatPrecision),
                  cl::init(0));
 
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static cl::opt<unsigned>
+MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
+                  cl::init(64), cl::Hidden);
+
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
                                       EVT PartVT, EVT ValueVT);
-  
+
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
 /// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   if (ValueVT.isVector())
     return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
-  
+
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Val = Parts[0];
@@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
         Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
                               RoundParts / 2, PartVT, HalfVT);
       } else {
-        Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
-        Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
+        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
       }
 
       if (TLI.isBigEndian())
@@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
       assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
              "Unexpected split");
       SDValue Lo, Hi;
-      Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
+      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
@@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
   }
 
   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
-    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
   return SDValue();
@@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Val = Parts[0];
-  
+
   // Handle a multi-element vector.
   if (NumParts > 1) {
     EVT IntermediateVT, RegisterVT;
@@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
     assert(RegisterVT == Parts[0].getValueType() &&
            "Part type doesn't match part!");
-    
+
     // Assemble the parts into intermediate operands.
     SmallVector<SDValue, 8> Ops(NumIntermediates);
     if (NumIntermediates == NumParts) {
@@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
                                   PartVT, IntermediateVT);
     }
-    
+
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
     // intermediate operands.
     Val = DAG.getNode(IntermediateVT.isVector() ?
                       ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
                       ValueVT, &Ops[0], NumIntermediates);
   }
-  
+
   // There is now one part, held in Val.  Correct it to match ValueVT.
   PartVT = Val.getValueType();
-  
+
   if (PartVT == ValueVT)
     return Val;
-  
+
   if (PartVT.isVector()) {
     // If the element type of the source/dest vectors are the same, but the
     // parts vector has more elements than the value vector, then we have a
@@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
                          DAG.getIntPtrConstant(0));
-    }                                      
-    
+    }
+
     // Vector/Vector bitcast.
-    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
   }
-  
+
   assert(ValueVT.getVectorElementType() == PartVT &&
          ValueVT.getVectorNumElements() == 1 &&
          "Only trivial scalar-to-vector conversions should get here!");
@@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
                                  EVT PartVT);
-  
+
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
@@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                            EVT PartVT,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   EVT ValueVT = Val.getValueType();
-  
+
   // Handle the vector case separately.
   if (ValueVT.isVector())
     return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
-  
+
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
@@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
       assert(PartVT.isInteger() && ValueVT.isInteger() &&
-             "Unknown mismatch!");             
+             "Unknown mismatch!");
       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
     assert(NumParts == 1 && PartVT != ValueVT);
-    Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
     assert(PartVT.isInteger() && ValueVT.isInteger() &&
@@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
 
   // The number of parts is a power of 2.  Repeatedly bisect the value using
   // EXTRACT_ELEMENT.
-  Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
                          EVT::getIntegerVT(*DAG.getContext(),
                                            ValueVT.getSizeInBits()),
                          Val);
@@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                           ThisVT, Part0, DAG.getIntPtrConstant(0));
 
       if (ThisBits == PartBits && ThisVT != PartVT) {
-        Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
-        Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
+        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
       }
     }
   }
@@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  
+
   if (NumParts == 1) {
     if (PartVT == ValueVT) {
       // Nothing to do.
     } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
       // Bitconvert vector->vector case.
-      Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
                PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
                PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
@@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                                   ElementVT, Val, DAG.getIntPtrConstant(i)));
-      
+
       for (unsigned i = ValueVT.getVectorNumElements(),
            e = PartVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getUNDEF(ElementVT));
@@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
 
       // FIXME: Use CONCAT for 2x -> 4x.
-      
+
       //SDValue UndefElts = DAG.getUNDEF(VectorTy);
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
     } else {
@@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                         PartVT, Val, DAG.getIntPtrConstant(0));
     }
-    
+
     Parts[0] = Val;
     return;
   }
-  
+
   // Handle a multi-element vector.
   EVT IntermediateVT, RegisterVT;
   unsigned NumIntermediates;
@@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                                 IntermediateVT,
                                                 NumIntermediates, RegisterVT);
   unsigned NumElements = ValueVT.getVectorNumElements();
-  
+
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
   NumParts = NumRegs; // Silence a compiler warning.
   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-  
+
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                            IntermediateVT, Val, DAG.getIntPtrConstant(i));
   }
-  
+
   // Split the intermediate operands into legal parts.
   if (NumParts == NumIntermediates) {
     // If the register was not expanded, promote or copy the value,
@@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
       }
 
       Chain = P.getValue(1);
+      Parts[i] = P;
 
       // If the source register was virtual and if we know something about it,
       // add an assert node.
-      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
-          RegisterVT.isInteger() && !RegisterVT.isVector()) {
-        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
-        if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
-          const FunctionLoweringInfo::LiveOutInfo &LOI =
-            FuncInfo.LiveOutRegInfo[SlotNo];
-
-          unsigned RegSize = RegisterVT.getSizeInBits();
-          unsigned NumSignBits = LOI.NumSignBits;
-          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
-          // FIXME: We capture more information than the dag can represent.  For
-          // now, just use the tightest assertzext/assertsext possible.
-          bool isSExt = true;
-          EVT FromVT(MVT::Other);
-          if (NumSignBits == RegSize)
-            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
-          else if (NumZeroBits >= RegSize-1)
-            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
-          else if (NumSignBits > RegSize-8)
-            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
-          else if (NumZeroBits >= RegSize-8)
-            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
-          else if (NumSignBits > RegSize-16)
-            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
-          else if (NumZeroBits >= RegSize-16)
-            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
-          else if (NumSignBits > RegSize-32)
-            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
-          else if (NumZeroBits >= RegSize-32)
-            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
-          if (FromVT != MVT::Other)
-            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
-                            RegisterVT, P, DAG.getValueType(FromVT));
-        }
-      }
+      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+          !RegisterVT.isInteger() || RegisterVT.isVector() ||
+          !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+        continue;
+      
+      const FunctionLoweringInfo::LiveOutInfo &LOI =
+        FuncInfo.LiveOutRegInfo[Regs[Part+i]];
+
+      unsigned RegSize = RegisterVT.getSizeInBits();
+      unsigned NumSignBits = LOI.NumSignBits;
+      unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+      // FIXME: We capture more information than the dag can represent.  For
+      // now, just use the tightest assertzext/assertsext possible.
+      bool isSExt = true;
+      EVT FromVT(MVT::Other);
+      if (NumSignBits == RegSize)
+        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+      else if (NumZeroBits >= RegSize-1)
+        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+      else if (NumSignBits > RegSize-8)
+        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+      else if (NumZeroBits >= RegSize-8)
+        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+      else if (NumSignBits > RegSize-16)
+        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+      else if (NumZeroBits >= RegSize-16)
+        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+      else if (NumSignBits > RegSize-32)
+        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+      else if (NumZeroBits >= RegSize-32)
+        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+      else
+        continue;
 
-      Parts[i] = P;
+      // Add an assertion node.
+      assert(FromVT != MVT::Other);
+      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                             RegisterVT, P, DAG.getValueType(FromVT));
     }
 
     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
                               Val.getResNo(), Offset, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
-    } else {
-      SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
-                            Offset, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, 0, false);
-    }
+    } else 
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
     DanglingDebugInfoMap[V] = DanglingDebugInfo();
   }
 }
@@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     unsigned InReg = It->second;
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
     SDValue Chain = DAG.getEntryNode();
-    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    resolveDanglingDebugInfo(V, N);
+    return N;
   }
 
   // Otherwise create a new SDValue and remember it.
@@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
       Chains[i] =
         DAG.getStore(Chain, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                     Add, NULL, Offsets[i], false, false, 0);
+                     // FIXME: better loc info would be nice.
+                     Add, MachinePointerInfo(), false, false, 0);
     }
 
     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
     if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
       return false;
   }
-  
+
   return true;
 }
 
@@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
 
   // If this is a series of conditions that are or'd or and'd together, emit
   // this as a sequence of branches instead of setcc's with and/or operations.
+  // As long as jumps are not expensive, this should improve performance.
   // For example, instead of something like:
   //     cmp A, B
   //     C = seteq
@@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   //     jle foo
   //
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (BOp->hasOneUse() &&
+    if (!TLI.isJumpExpensive() && 
+        BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
                                MVT::Other, getControlRoot(), Cond,
                                DAG.getBasicBlock(CB.TrueBB));
 
-  // Insert the false branch.
-  if (CB.FalseBB != NextBlock)
-    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
-                         DAG.getBasicBlock(CB.FalseBB));
+  // Insert the false branch. Do this even if it's a fall through branch,
+  // this makes it easier to do DAG optimizations which require inverting
+  // the branch condition.
+  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                       DAG.getBasicBlock(CB.FalseBB));
 
   DAG.setRoot(BrCond);
 }
@@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                                   Sub, DAG.getConstant(B.Range, VT),
                                   ISD::SETUGT);
 
-  SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
-                                       TLI.getPointerTy());
+  // Determine the type of the test operands.
+  bool UsePtrType = false;
+  if (!TLI.isTypeLegal(VT))
+    UsePtrType = true;
+  else {
+    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+      if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+        // Switch table case range are encoded into series of masks.
+        // Just use pointer type, it's guaranteed to fit.
+        UsePtrType = true;
+        break;
+      }
+  }
+  if (UsePtrType) {
+    VT = TLI.getPointerTy();
+    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+  }
 
-  B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+  B.RegVT = VT;
+  B.Reg = FuncInfo.CreateReg(VT);
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
-                                    B.Reg, ShiftOp);
+                                    B.Reg, Sub);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 }
 
 /// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+                                           MachineBasicBlock* NextMBB,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
-  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
-                                       TLI.getPointerTy());
+  EVT VT = BB.RegVT;
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                       Reg, VT);
   SDValue Cmp;
   if (CountPopulation_64(B.Mask) == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(getCurDebugLoc(),
-                       TLI.getSetCCResultType(ShiftOp.getValueType()),
+                       TLI.getSetCCResultType(VT),
                        ShiftOp,
-                       DAG.getConstant(CountTrailingZeros_64(B.Mask),
-                                       TLI.getPointerTy()),
+                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                        ISD::SETEQ);
   } else {
     // Make desired shift
-    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
-                                    TLI.getPointerTy(),
-                                    DAG.getConstant(1, TLI.getPointerTy()),
-                                    ShiftOp);
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+                                    DAG.getConstant(1, VT), ShiftOp);
 
     // Emit bit tests and jumps
     SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
-                                TLI.getPointerTy(), SwitchVal,
-                                DAG.getConstant(B.Mask, TLI.getPointerTy()));
+                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
     Cmp = DAG.getSetCC(getCurDebugLoc(),
-                       TLI.getSetCCResultType(AndOp.getValueType()),
-                       AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+                       TLI.getSetCCResultType(VT),
+                       AndOp, DAG.getConstant(0, VT),
                        ISD::SETNE);
   }
 
@@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
-  // TODO: If any two of the cases has the same destination, and if one value
+  // If any two of the cases has the same destination, and if one value
   // is the same as the other, but has one bit unset that the other has set,
   // use bit manipulation to do two compares at once.  For example:
   // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+  // TODO: Handle cases where CR.CaseBB != SwitchBB.
+  if (Size == 2 && CR.CaseBB == SwitchBB) {
+    Case &Small = *CR.Range.first;
+    Case &Big = *(CR.Range.second-1);
+
+    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(SV);
+        EVT VT = CondLHS.getValueType();
+        DebugLoc DL = getCurDebugLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+                                    Or, DAG.getConstant(BigValue, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        SwitchBB->addSuccessor(Small.BB);
+        SwitchBB->addSuccessor(Default);
+
+        // Insert the true branch.
+        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+                                     getControlRoot(), Cond,
+                                     DAG.getBasicBlock(Small.BB));
+
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(Default));
+
+        DAG.setRoot(BrCond);
+        return true;
+      }
+    }
+  }
 
   // Rearrange the case blocks so that the last one falls through if possible.
   if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
 }
 
 static APInt ComputeRange(const APInt &First, const APInt &Last) {
-  APInt LastExt(Last), FirstExt(First);
   uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
   return (LastExt - FirstExt + 1ULL);
 }
 
@@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   }
 
   BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == SwitchBB),
+                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
                    CR.CaseBB, Default, BTC);
 
   if (CR.CaseBB == SwitchBB)
@@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   if (Cases.size() >= 2)
     // Must recompute end() each iteration because it may be
     // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
       MachineBasicBlock* nextBB = J->BB;
@@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   return numCmps;
 }
 
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+                                           MachineBasicBlock *Last) {
+  // Update JTCases.
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+    if (JTCases[i].first.HeaderBB == First)
+      JTCases[i].first.HeaderBB = Last;
+
+  // Update BitTestCases.
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+    if (BitTestCases[i].Parent == First)
+      BitTestCases[i].Parent = Last;
+}
+
 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
 
@@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
 void SelectionDAGBuilder::visitFSub(const User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
-  if (Ty->isVectorTy()) {
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      const Type *ElTy = DestTy->getElementType();
-      unsigned VL = DestTy->getNumElements();
-      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-      if (CV == CNZ) {
-        SDValue Op2 = getValue(I.getOperand(1));
-        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                 Op2.getValueType(), Op2));
-        return;
-      }
-    }
+  if (isa<Constant>(I.getOperand(0)) &&
+      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+    SDValue Op2 = getValue(I.getOperand(1));
+    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                             Op2.getValueType(), Op2));
+    return;
   }
 
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
-      SDValue Op2 = getValue(I.getOperand(1));
-      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                               Op2.getValueType(), Op2));
-      return;
-    }
-
   visitBinary(I, ISD::FSUB);
 }
 
@@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  if (!I.getType()->isVectorTy() &&
-      Op2.getValueType() != TLI.getShiftAmountTy()) {
+  
+  MVT ShiftTy = TLI.getShiftAmountTy();
+  
+  // Coerce the shift amount to the right type if we can.
+  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+    unsigned ShiftSize = ShiftTy.getSizeInBits();
+    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    DebugLoc DL = getCurDebugLoc();
+    
     // If the operand is smaller than the shift count type, promote it.
-    EVT PTy = TLI.getPointerTy();
-    EVT STy = TLI.getShiftAmountTy();
-    if (STy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
+    if (ShiftSize > Op2Size)
+      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+    
     // If the operand is larger than the shift count type but the shift
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
     // optimization early.
-    else if (STy.getSizeInBits() >=
-             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
-    // Otherwise we'll need to temporarily settle for some other
-    // convenient type; type legalization will make adjustments as
-    // needed.
-    else if (PTy.bitsLT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
-    else if (PTy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+    // Otherwise we'll need to temporarily settle for some other convenient
+    // type.  Type legalization will make adjustments once the shiftee is split.
+    else
+      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
   }
 
   setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
@@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
   EVT DestVT = TLI.getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this is
-  // either a BIT_CONVERT or a no-op.
+  // either a BITCAST or a no-op.
   if (DestVT != N.getValueType())
-    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                              DestVT, N)); // convert types.
   else
     setValue(&I, N);            // noop cast.
@@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         } else {
           StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
           if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts < SrcNumElts)
+              StartIdx[Input] + MaskNumElts <= SrcNumElts)
             RangeUse[Input] = 1; // Extract from a multiple of the mask length.
         }
       }
@@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   bool IntoUndef = isa<UndefValue>(Op0);
   bool FromUndef = isa<UndefValue>(Op1);
 
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
 
   SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Type *ValTy = I.getType();
   bool OutOfUndef = isa<UndefValue>(Op0);
 
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
 
   SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   if (Align <= StackAlign)
     Align = 0;
 
@@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   SDValue Root;
   bool ConstantMemory = false;
-  if (I.isVolatile())
+  if (I.isVolatile() || NumValues > MaxParallelChains)
     // Serialize volatile loads with other side effects.
     Root = getRoot();
-  else if (AA->pointsToConstantMemory(SV)) {
+  else if (AA->pointsToConstantMemory(
+             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   }
 
   SmallVector<SDValue, 4> Values(NumValues);
-  SmallVector<SDValue, 4> Chains(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
   EVT PtrVT = Ptr.getValueType();
-  for (unsigned i = 0; i != NumValues; ++i) {
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // Serializing loads here may result in excessive register pressure, and
+    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+    // could recover a bit by hoisting nodes upward in the chain by recognizing
+    // they are side-effect free or do not alias. The optimizer should really
+    // avoid this case by converting large object/array copies to llvm.memcpy
+    // (MaxParallelChains should always remain as failsafe).
+    if (ChainI == MaxParallelChains) {
+      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
     SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
                             PtrVT, Ptr,
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            A, SV, Offsets[i], isVolatile, 
-                            isNonTemporal, Alignment);
+                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+                            isNonTemporal, Alignment, TBAAInfo);
 
     Values[i] = L;
-    Chains[i] = L.getValue(1);
+    Chains[ChainI] = L.getValue(1);
   }
 
   if (!ConstantMemory) {
     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                                MVT::Other, &Chains[0], NumValues);
+                                MVT::Other, &Chains[0], ChainI);
     if (isVolatile)
       DAG.setRoot(Chain);
     else
@@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   SDValue Ptr = getValue(PtrV);
 
   SDValue Root = getRoot();
-  SmallVector<SDValue, 4> Chains(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
   EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
-
-  for (unsigned i = 0; i != NumValues; ++i) {
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // See visitLoad comments.
+    if (ChainI == MaxParallelChains) {
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
     SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
                               DAG.getConstant(Offsets[i], PtrVT));
-    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
-                             SDValue(Src.getNode(), Src.getResNo() + i),
-                             Add, PtrV, Offsets[i], isVolatile, 
-                             isNonTemporal, Alignment);
-  }
-
-  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                          MVT::Other, &Chains[0], NumValues));
+    SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+                              SDValue(Src.getNode(), Src.getResNo() + i),
+                              Add, MachinePointerInfo(PtrV, Offsets[i]),
+                              isVolatile, isNonTemporal, Alignment, TBAAInfo);
+    Chains[ChainI] = St;
+  }
+
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+  ++SDNodeOrder;
+  AssignOrderingToNode(StoreNode.getNode());
+  DAG.setRoot(StoreNode);
 }
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
 
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
-  if (!IsTgtIntrinsic)
+  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+      Info.opc == ISD::INTRINSIC_W_CHAIN)
     Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
@@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     // This is target intrinsic that touches memory
     Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
                                      VTs, &Ops[0], Ops.size(),
-                                     Info.memVT, Info.ptrVal, Info.offset,
+                                     Info.memVT,
+                                   MachinePointerInfo(Info.ptrVal, Info.offset),
                                      Info.align, Info.vol,
                                      Info.readMem, Info.writeMem);
   } else if (!HasChain) {
@@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   if (!I.getType()->isVoidTy()) {
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
       EVT VT = TLI.getValueType(PTy);
-      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
     }
 
     setValue(&I, Result);
@@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
                            DAG.getConstant(0x007fffff, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
                            DAG.getConstant(0x3f800000, MVT::i32));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
 }
 
 /// GetExponent - Get the exponent:
@@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
 
       // Add the exponent into the result in integer domain.
       SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                TwoToFracPartOfX, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
 
       // Add the exponent into the result in integer domain.
       SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                TwoToFracPartOfX, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                 getF32Constant(DAG, 0x3f800000));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
                                              MVT::i32, t13);
 
       // Add the exponent into the result in integer domain.
       SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 TwoToFracPartOfX, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
     }
   } else {
     // No special expansion.
@@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log(2) [0.69314718f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Get the exponent.
     SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log10(2) [0.30102999f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
@@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
@@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                 getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     }
   } else {
@@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
@@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
@@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                 getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     }
   } else {
@@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
 /// At the end of instruction selection, they will be inserted to the entry BB.
 bool
 SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
-                                              int64_t Offset, 
+                                              int64_t Offset,
                                               const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
 
   MachineFunction &MF = DAG.getMachineFunction();
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
   // Ignore inlined function arguments here.
   DIVariable DV(Variable);
   if (DV.isInlinedFnArgument(MF.getFunction()))
@@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   if (Arg->hasByValAttr()) {
     // Byval arguments' frame index is recorded during argument lowering.
     // Use this info directly.
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
     Reg = TRI->getFrameRegister(MF);
     Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+    // If byval argument ofset is not recorded then ignore this.
+    if (!Offset)
+      Reg = 0;
   }
 
   if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
     Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
-    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       MachineRegisterInfo &RegInfo = MF.getRegInfo();
       unsigned PR = RegInfo.getLiveInPhysReg(Reg);
       if (PR)
@@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   }
 
   if (!Reg) {
+    // Check if ValueMap has reg number.
     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI == FuncInfo.ValueMap.end())
-      return false;
-    Reg = VMI->second;
+    if (VMI != FuncInfo.ValueMap.end())
+      Reg = VMI->second;
   }
 
-  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  if (!Reg && N.getNode()) {
+    // Check if frame index is available.
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+      if (FrameIndexSDNode *FINode =
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+        Reg = TRI->getFrameRegister(MF);
+        Offset = FINode->getIndex();
+      }
+  }
+
+  if (!Reg)
+    return false;
+
   MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
                                     TII->get(TargetOpcode::DBG_VALUE))
     .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
@@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
 }
 
 // VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
 #endif
 
 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
@@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
-                              I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                              MachinePointerInfo(I.getArgOperand(0)),
+                              MachinePointerInfo(I.getArgOperand(1))));
     return 0;
   }
   case Intrinsic::memset: {
@@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
-                              I.getArgOperand(0), 0));
+                              MachinePointerInfo(I.getArgOperand(0))));
     return 0;
   }
   case Intrinsic::memmove: {
@@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op3 = getValue(I.getArgOperand(2));
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-
-    // If the source and destination are known to not be aliases, we can
-    // lower memmove as memcpy.
-    uint64_t Size = -1ULL;
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
-      Size = C->getZExtValue();
-    if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
-        AliasAnalysis::NoAlias) {
-      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
-                                false, I.getArgOperand(0), 0,
-                                I.getArgOperand(1), 0));
-      return 0;
-    }
-
     DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
-                               I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1))));
     return 0;
   }
   case Intrinsic::dbg_declare: {
@@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Check if address has undef value.
     if (isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
-      SDDbgValue*SDV = 
-        DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
-                        0, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, 0, false);
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
       return 0;
     }
 
@@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDDbgValue *SDV;
     if (N.getNode()) {
       // Parameters are handled specially.
-      bool isParameter = 
+      bool isParameter =
         DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
@@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
           // Byval parameter.  We have a frame index at this point.
           SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
                                 0, dl, SDNodeOrder);
-        else
+        else {
           // Can't do anything with other non-AI cases yet.  This might be a
           // parameter of a callee function that got inlined, for example.
+          DEBUG(dbgs() << "Dropping debug info for " << DI);
           return 0;
+        }
       } else if (AI)
         SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
                               0, dl, SDNodeOrder);
-      else
+      else {
         // Can't do anything with other non-AI cases yet.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
         return 0;
+      }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
     } else {
-      // If Address is an arugment then try to emits its dbg value using
-      // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
-      // to help track missing debug info.
+      // If Address is an argument then try to emit its dbg value using
+      // virtual register info from the FuncInfo.ValueMap.
       if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
-        SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
-                              0, dl, SDNodeOrder);
-        DAG.AddDbgValue(SDV, 0, false);
+        // If variable is pinned by a alloca in dominating bb then
+        // use StaticAllocaMap.
+        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+          if (AI->getParent() != DI.getParent()) {
+            DenseMap<const AllocaInst*, int>::iterator SI =
+              FuncInfo.StaticAllocaMap.find(AI);
+            if (SI != FuncInfo.StaticAllocaMap.end()) {
+              SDV = DAG.getDbgValue(Variable, SI->second,
+                                    0, dl, SDNodeOrder);
+              DAG.AddDbgValue(SDV, 0, false);
+              return 0;
+            }
+          }
+        }
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
       }
     }
     return 0;
@@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                 N.getResNo(), Offset, dl, SDNodeOrder);
           DAG.AddDbgValue(SDV, N.getNode(), false);
         }
-      } else if (isa<PHINode>(V) && !V->use_empty() ) {
+      } else if (!V->use_empty() ) {
         // Do not call getValue(V) yet, as we don't want to generate code.
         // Remember it for later.
         DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
         DanglingDebugInfoMap[V] = DDI;
       } else {
         // We may expand this to cover more cases.  One case where we have no
-        // data available is an unreferenced parameter; we need this fallback.
-        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
-                              Offset, dl, SDNodeOrder);
-        DAG.AddDbgValue(SDV, 0, false);
+        // data available is an unreferenced parameter.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
       }
     }
 
@@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (SI == FuncInfo.StaticAllocaMap.end())
       return 0; // VLAs.
     int FI = SI->second;
-    
+
     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
     if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
@@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_sjlj_longjmp: {
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
-                            getRoot(),
-                            getValue(I.getArgOperand(0))));
+                            getRoot(), getValue(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_dispatch_setup: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                            getRoot(), getValue(I.getArgOperand(0))));
     return 0;
   }
 
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = getValue(I.getArgOperand(1));
+    if (isa<ConstantSDNode>(ShAmt)) {
+      visitTargetIntrinsic(I, Intrinsic);
+      return 0;
+    }
+    unsigned NewIntrinsic = 0;
+    EVT ShAmtVT = MVT::v2i32;
+    switch (Intrinsic) {
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+      break;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+      break;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+      break;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+      break;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+      break;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+      break;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+      break;
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+      break;
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    // We must do this early because v2i32 is not a legal type.
+    DebugLoc dl = getCurDebugLoc();
+    SDValue ShOps[2];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    EVT DestVT = TLI.getValueType(I.getType());
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       getValue(I.getArgOperand(0)), ShAmt);
+    setValue(&I, Res);
+    return 0;
+  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     // Store the stack protector onto the stack.
     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
-                       PseudoSourceValue::getFixedStack(FI),
-                       0, true, false, 0);
+                       MachinePointerInfo::getFixedStack(FI),
+                       true, false, 0);
     setValue(&I, Res);
     DAG.setRoot(Res);
     return 0;
@@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::prefetch: {
     SDValue Ops[4];
+    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
     Ops[0] = getRoot();
     Ops[1] = getValue(I.getArgOperand(0));
     Ops[2] = getValue(I.getArgOperand(1));
     Ops[3] = getValue(I.getArgOperand(2));
-    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+                                        DAG.getVTList(MVT::Other),
+                                        &Ops[0], 4,
+                                        EVT::getIntegerVT(*Context, 8),
+                                        MachinePointerInfo(I.getArgOperand(0)),
+                                        0, /* align */
+                                        false, /* volatile */
+                                        rw==0, /* read */
+                                        rw==1)); /* write */
     return 0;
   }
-
   case Intrinsic::memory_barrier: {
     SDValue Ops[6];
     Ops[0] = getRoot();
@@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                     getValue(I.getArgOperand(0)),
                     getValue(I.getArgOperand(1)),
                     getValue(I.getArgOperand(2)),
-                    I.getArgOperand(0));
+                    MachinePointerInfo(I.getArgOperand(0)));
     setValue(&I, L);
     DAG.setRoot(L.getValue(1));
     return 0;
@@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                         FTy->isVarArg(), Outs, FTy->getContext());
 
   SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
 
   if (!CanLowerReturn) {
     uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
                       FTy->getReturnType());
     MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
     const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
 
-    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
     Entry.Node = DemoteStackSlot;
     Entry.Ty = StackSlotPtrType;
     Entry.isSExt = false;
@@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                 DemoteStackSlot,
                                 DAG.getConstant(Offsets[i], PtrVT));
       SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
-                              Add, NULL, Offsets[i], false, false, 1);
+                              Add,
+                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+                              false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                                 MVT::Other, &Chains[0], NumValues);
     PendingLoads.push_back(Chain);
-    
+
     // Collect the legal value parts into potentially illegal values
     // that correspond to the original function's return values.
     SmallVector<EVT, 4> RetTys;
@@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       EVT VT = RetTys[I];
       EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
       unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-  
+
       SDValue ReturnValue =
         getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
                          RegisterVT, VT, AssertOp);
@@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
 
   SDValue Ptr = Builder.getValue(PtrVal);
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
-                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+                                        Ptr, MachinePointerInfo(PtrVal),
                                         false /*volatile*/,
                                         false /*nontemporal*/, 1 /* align=1 */);
 
@@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     visitInlineAsm(&I);
     return;
   }
-  
+
+  // See if any floating point values are being passed to this function. This is
+  // used to emit an undefined reference to fltused on Windows.
+  const FunctionType *FT =
+    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  if (FT->isVarArg() &&
+      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      const Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (!i->isFloatingPointTy()) continue;
+        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+        break;
+      }
+    }
+  }
+
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
@@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
       }
     }
   }
-  
+
   SDValue Callee;
   if (!RenameFn)
     Callee = getValue(I.getCalledValue());
@@ -5008,7 +5226,7 @@ public:
   /// contains the set of register corresponding to the operand.
   RegsForValue AssignedRegs;
 
-  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
   }
 
@@ -5083,6 +5301,8 @@ private:
   }
 };
 
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
 } // end llvm namespace.
 
 /// isAllocatableRegister - If the specified register is safe to allocate,
@@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
       // vector types).
       EVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
         // machine.
         RegVT = EVT::getIntegerVT(Context,
                                   OpInfo.ConstraintVT.getSizeInBits());
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       }
@@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
   /// ConstraintOperands - Information about all of the constraints.
-  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+  SDISelAsmOperandInfoVector ConstraintOperands;
 
   std::set<unsigned> OutputRegs, InputRegs;
 
-  // Do a prepass over the constraints, canonicalizing them, and building up the
-  // ConstraintOperands list.
-  std::vector<InlineAsm::ConstraintInfo>
-    ConstraintInfos = IA->ParseConstraints();
-
-  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-
-  SDValue Chain, Flag;
-
-  // We won't need to flush pending loads if this asm doesn't touch
-  // memory and is nonvolatile.
-  if (hasMemory || IA->hasSideEffects())
-    Chain = getRoot();
-  else
-    Chain = DAG.getRoot();
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+  bool hasMemory = false;
 
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
   unsigned ResNo = 0;   // ResNo - The result number of the next output.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
     EVT OpVT = MVT::Other;
@@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this is an input or an indirect output, process the call argument.
     // BasicBlocks are labels, currently appearing only in asm's.
     if (OpInfo.CallOperandVal) {
-      // Strip bitcasts, if any.  This mostly comes up for functions.
-      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
       if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
@@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     }
 
     OpInfo.ConstraintVT = OpVT;
+
+    // Indirect operand accesses access memory.
+    if (OpInfo.isIndirect)
+      hasMemory = true;
+    else {
+      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+        TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        if (CType == TargetLowering::C_Memory) {
+          hasMemory = true;
+          break;
+        }
+      }
+    }
   }
 
+  SDValue Chain, Flag;
+
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
   // Second pass over the constraints: compute which constraint option to use
   // and assign registers to constraints that want a specific physreg.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
 
     // If this is an output operand with a matching input operand, look up the
@@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-      
+
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
@@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // need to to provide an address for the memory input.
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         !OpInfo.isIndirect) {
-      assert(OpInfo.Type == InlineAsm::isInput &&
+      assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
              "Can only indirectify direct input operands!");
 
       // Memory operands really want the address of the value.  If we don't have
@@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
         Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0,
+                             OpInfo.CallOperand, StackSlot,
+                             MachinePointerInfo::getFixedStack(SSFI),
                              false, false, 0);
         OpInfo.CallOperand = StackSlot;
       }
@@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
   }
 
-  ConstraintInfos.clear();
-
   // Second pass - Loop over all of the operands, assigning virtual or physregs
   // to register class operands.
   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
@@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
   AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
-  // Remember the AlignStack bit as operand 3.
-  AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
-                                            MVT::i1));
+  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  unsigned ExtraInfo = 0;
+  if (IA->hasSideEffects())
+    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+  if (IA->isAlignStack())
+    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+                                                  TLI.getPointerTy()));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                           " don't know how to handle tied "
                           "indirect register inputs");
           }
-          
+
           RegsForValue MatchedRegs;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
           EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
@@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                            DAG, AsmNodeOperands);
           break;
         }
-        
+
         assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
         assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
                "Unexpected number of operands");
@@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       }
 
       // Treat indirect 'X' constraint as memory.
-      if (OpInfo.ConstraintType == TargetLowering::C_Other && 
-          OpInfo.isIndirect) 
+      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+          OpInfo.isIndirect)
         OpInfo.ConstraintType = TargetLowering::C_Memory;
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
       }
-      
+
       if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
@@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
 
   Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
-                      DAG.getVTList(MVT::Other, MVT::Flag),
+                      DAG.getVTList(MVT::Other, MVT::Glue),
                       &AsmNodeOperands[0], AsmNodeOperands.size());
   Flag = Chain.getValue(1);
 
@@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // not have the same VT as was expected.  Convert it to the right type
       // with bit_convert.
       if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
-        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                           ResultType, Val);
 
       } else if (ResultType != Val.getValueType() &&
@@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
                                StoresToEmit[i].first,
                                getValue(StoresToEmit[i].second),
-                               StoresToEmit[i].second, 0,
+                               MachinePointerInfo(StoresToEmit[i].second),
                                false, false, 0);
     OutChains.push_back(Val);
   }
@@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
     for (unsigned i = 0; i != NumRegs; ++i) {
       ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
+      MyFlags.VT = RegisterVT.getSimpleVT();
       MyFlags.Used = isReturnValueUsed;
       if (RetSExt)
         MyFlags.Flags.setSExt();
@@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
   DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
           assert(InVals[i].getNode() &&
                  "LowerCall emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
+          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
                  "LowerCall emitted a value with the wrong type!");
         });
 
@@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         assert(InVals[i].getNode() &&
                "LowerFormalArguments emitted a null value!");
-        assert(Ins[i].VT == InVals[i].getValueType() &&
+        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
                "LowerFormalArguments emitted a value with the wrong type!");
       }
     });
@@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     // Note down frame index for byval arguments.
     if (I->hasByValAttr() && !ArgValues.empty())
-      if (FrameIndexSDNode *FI = 
+      if (FrameIndexSDNode *FI =
           dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
         FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f400e9c83ac..a1a70c394a51 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -258,15 +258,16 @@ private:
 
   struct BitTestBlock {
     BitTestBlock(APInt F, APInt R, const Value* SV,
-                 unsigned Rg, bool E,
+                 unsigned Rg, EVT RgVT, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
-      First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+      First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
       Parent(P), Default(D), Cases(C) { }
     APInt First;
     APInt Range;
     const Value *SValue;
     unsigned Reg;
+    EVT RegVT;
     bool Emitted;
     MachineBasicBlock *Parent;
     MachineBasicBlock *Default;
@@ -347,7 +348,7 @@ public:
   SDValue getControlRoot();
 
   DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-
+  void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }
   unsigned getSDNodeOrder() const { return SDNodeOrder; }
 
   void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
@@ -398,6 +399,10 @@ public:
   void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
                    MachineBasicBlock *LandingPad = NULL);
 
+  /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+  /// references that ned to refer to the last resulting block.
+  void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
 private:
   // Terminator instructions.
   void visitRet(const ReturnInst &I);
@@ -431,7 +436,8 @@ public:
   void visitSwitchCase(CaseBlock &CB,
                        MachineBasicBlock *SwitchBB);
   void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
-  void visitBitTestCase(MachineBasicBlock* NextMBB,
+  void visitBitTestCase(BitTestBlock &BB,
+                        MachineBasicBlock* NextMBB,
                         unsigned Reg,
                         BitTestCase &B,
                         MachineBasicBlock *SwitchBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 66cb5ceb09e5..62ebc81ef86e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -53,8 +54,17 @@
 using namespace llvm;
 
 STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
+#ifndef NDEBUG
+STATISTIC(NumBBWithOutOfOrderLineInfo,
+          "Number of blocks with out of order line number info");
+STATISTIC(NumMBBWithOutOfOrderLineInfo,
+          "Number of machine blocks with out of order line number info");
+#endif
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+                                   CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
   CurDAG(new SelectionDAG(tm)),
   SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
-  DAGSize(0)
-{}
+  DAGSize(0) {
+    initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+    initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+  }
 
 SelectionDAGISel::~SelectionDAGISel() {
   delete SDB;
@@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 static bool FunctionCallsSetJmp(const Function *F) {
   const Module *M = F->getParent();
   static const char *ReturnsTwiceFns[] = {
+    "_setjmp",
     "setjmp",
     "sigsetjmp",
     "setjmp_syscall",
@@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) {
 #undef NUM_RETURNS_TWICE_FNS
 }
 
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it.  In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+  // Loop for blocks with phi nodes.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    PHINode *PN = dyn_cast<PHINode>(BB->begin());
+    if (PN == 0) continue;
+
+  ReprocessBlock:
+    // For each block with a PHI node, check to see if any of the input values
+    // are potentially trapping constant expressions.  Constant expressions are
+    // the only potentially trapping value that can occur as the argument to a
+    // PHI.
+    for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+        if (CE == 0 || !CE->canTrap()) continue;
+
+        // The only case we have to worry about is when the edge is critical.
+        // Since this block has a PHI Node, we assume it has multiple input
+        // edges: check to see if the pred has multiple successors.
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        if (Pred->getTerminator()->getNumSuccessors() == 1)
+          continue;
+
+        // Okay, we have to split this edge.
+        SplitCriticalEdge(Pred->getTerminator(),
+                          GetSuccessorNumber(Pred, BB), SDISel, true);
+        goto ReprocessBlock;
+      }
+  }
+}
+
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
+  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF);
   SDB->init(GFI, *AA);
@@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   if (!FuncInfo->ArgDbgValues.empty())
     for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
            E = RegInfo->livein_end(); LI != E; ++LI)
-      if (LI->second) 
+      if (LI->second)
         LiveInMap.insert(std::make_pair(LI->first, LI->second));
 
   // Insert DBG_VALUE instructions for function arguments to the entry block.
@@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     if (LDI != LiveInMap.end()) {
       MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
       MachineBasicBlock::iterator InsertPos = Def;
-      const MDNode *Variable = 
+      const MDNode *Variable =
         MI->getOperand(MI->getNumOperands()-1).getMetadata();
       unsigned Offset = MI->getOperand(1).getImm();
       // Def is never a terminator here, so it is ok to increment InsertPos.
-      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), 
+      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
               TII.get(TargetOpcode::DBG_VALUE))
         .addReg(LDI->second, RegState::Debug)
         .addImm(Offset).addMetadata(Variable);
+
+      // If this vreg is directly copied into an exported register then
+      // that COPY instructions also need DBG_VALUE, if it is the only
+      // user of LDI->second.
+      MachineInstr *CopyUseMI = NULL;
+      for (MachineRegisterInfo::use_iterator
+             UI = RegInfo->use_begin(LDI->second);
+           MachineInstr *UseMI = UI.skipInstruction();) {
+        if (UseMI->isDebugValue()) continue;
+        if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+          CopyUseMI = UseMI; continue;
+        }
+        // Otherwise this is another use or second copy use.
+        CopyUseMI = NULL; break;
+      }
+      if (CopyUseMI) {
+        MachineInstr *NewMI =
+          BuildMI(*MF, CopyUseMI->getDebugLoc(),
+                  TII.get(TargetOpcode::DBG_VALUE))
+          .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+          .addImm(Offset).addMetadata(Variable);
+        EntryMBB->insertAfter(CopyUseMI, NewMI);
+      }
     }
   }
 
@@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
              II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
         const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
 
-        // Operand 1 of an inline asm instruction indicates whether the asm
-        // needs stack or not.
-        if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
-            (TID.isCall() && !TID.isReturn())) {
+        if ((TID.isCall() && !TID.isReturn()) ||
+            II->isStackAligningInlineAsm()) {
           MFI->setHasCalls(true);
           goto done;
         }
@@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
 
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
+  return;
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 
     // Only install this information if it tells us something.
     if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
-      DestReg -= TargetRegisterInfo::FirstVirtualRegister;
-      if (DestReg >= FuncInfo->LiveOutRegInfo.size())
-        FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+      FuncInfo->LiveOutRegInfo.grow(DestReg);
       FunctionLoweringInfo::LiveOutInfo &LOI =
         FuncInfo->LiveOutRegInfo[DestReg];
       LOI.NumSignBits = NumSignBits;
@@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Emit machine code to BB.  This can change 'BB' to the last block being
   // inserted into.
+  MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
   {
     NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
 
-    FuncInfo->MBB = Scheduler->EmitSchedule();
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
     FuncInfo->InsertPt = Scheduler->InsertPos;
   }
 
+  // If the block was split, make sure we update any references that are used to
+  // update PHI nodes later on.
+  if (FirstMBB != LastMBB)
+    SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
   // Free the scheduler state.
   {
     NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
@@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() {
   DEBUG(errs() << "===== Instruction selection begins:\n");
 
   PreprocessISelDAG();
-  
+
   // Select target instructions for the DAG.
   {
     // Number all nodes with a topological order and set DAGSize.
     DAGSize = CurDAG->AssignTopologicalOrder();
-    
+
     // Create a dummy node (which is not added to allnodes), that adds
     // a reference to the root node, preventing it from being deleted,
     // and tracking any changes of the root.
     HandleSDNode Dummy(CurDAG->getRoot());
     ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
     ++ISelPosition;
-    
+
     // The AllNodes list is now topological-sorted. Visit the
     // nodes by starting at the end of the list (the root of the
     // graph) and preceding back toward the beginning (the entry
@@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() {
       // makes it theoretically possible to disable the DAGCombiner.
       if (Node->use_empty())
         continue;
-      
+
       SDNode *ResNode = Select(Node);
-      
+
       // FIXME: This is pretty gross.  'Select' should be changed to not return
       // anything at all and this code should be nuked with a tactical strike.
-      
+
       // If node should not be replaced, continue with the next one.
       if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
         continue;
       // Replace node.
       if (ResNode)
         ReplaceUses(Node, ResNode);
-      
+
       // If after the replacement this node is not used any more,
       // remove this dead node.
       if (Node->use_empty()) { // Don't delete EntryToken, etc.
@@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() {
         CurDAG->RemoveDeadNode(Node, &ISU);
       }
     }
-    
+
     CurDAG->setRoot(Dummy.getValue());
-  }    
+  }
 
   DEBUG(errs() << "===== Instruction selection ends:\n");
 
@@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   }
 }
 
+
+
+
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             FastISel *FastIS) {
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile()) return false;
+
+  // Figure out which vreg this is going into.
+  unsigned LoadReg = FastIS->getRegForValue(LI);
+  assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+  // Check to see what the uses of this vreg are.  If it has no uses, or more
+  // than one use (at the machine instr level) then we can't fold it.
+  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+  if (RI == RegInfo->reg_end())
+    return false;
+
+  // See if there is exactly one use of the vreg.  If there are multiple uses,
+  // then the instruction got lowered to multiple machine instructions or the
+  // use of the loaded value ended up being multiple operands of the result, in
+  // either case, we can't fold this.
+  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+  if (PostRI != RegInfo->reg_end())
+    return false;
+
+  assert(RI.getOperand().isUse() &&
+         "The only use of the vreg must be a use, we haven't emitted the def!");
+
+  MachineInstr *User = &*RI;
+  
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes, make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo->InsertPt = User;
+  FuncInfo->MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Check if basic block instructions follow source order
+/// or not.
+static void CheckLineNumbers(const BasicBlock *BB) {
+  unsigned Line = 0;
+  unsigned Col = 0;
+  for (BasicBlock::const_iterator BI = BB->begin(),
+         BE = BB->end(); BI != BE; ++BI) {
+    const DebugLoc DL = BI->getDebugLoc();
+    if (DL.isUnknown()) continue;
+    unsigned L = DL.getLine();
+    unsigned C = DL.getCol();
+    if (L < Line || (L == Line && C < Col)) {
+      ++NumBBWithOutOfOrderLineInfo;
+      return;
+    }
+    Line = L;
+    Col = C;
+  }
+}
+
+/// CheckLineNumbers - Check if machine basic block instructions follow source
+/// order or not.
+static void CheckLineNumbers(const MachineBasicBlock *MBB) {
+  unsigned Line = 0;
+  unsigned Col = 0;
+  for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
+         MBE = MBB->end(); MBI != MBE; ++MBI) {
+    const DebugLoc DL = MBI->getDebugLoc();
+    if (DL.isUnknown()) continue;
+    unsigned L = DL.getLine();
+    unsigned C = DL.getCol();
+    if (L < Line || (L == Line && C < Col)) {
+      ++NumMBBWithOutOfOrderLineInfo;
+      return;
+    }
+    Line = L;
+    Col = C;
+  }
+}
+#endif
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
@@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Iterate over all basic blocks in the function.
   for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     const BasicBlock *LLVMBB = &*I;
+#ifndef NDEBUG
+    CheckLineNumbers(LLVMBB);
+#endif
     FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
 
@@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     // Setup an EH landing-pad block.
     if (FuncInfo->MBB->isLandingPad())
       PrepareEHLandingPad();
-    
+
     // Lower any arguments needed in this block if this is the entry block.
-    if (LLVMBB == &Fn.getEntryBlock())
+    if (LLVMBB == &Fn.getEntryBlock()) {
+      for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end();
+           DBI != DBE; ++DBI) {
+        if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) {
+          const DebugLoc DL = DI->getDebugLoc();
+          SDB->setCurDebugLoc(DL);
+          break;
+        }
+      }
       LowerArguments(LLVMBB);
+    }
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS) {
@@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         FastIS->recomputeInsertPt();
 
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(Inst))
+        if (FastIS->SelectInstruction(Inst)) {
+          // If fast isel succeeded, check to see if there is a single-use
+          // non-volatile load right before the selected instruction, and see if
+          // the load is used by the instruction.  If so, try to fold it.
+          const Instruction *BeforeInst = 0;
+          if (Inst != Begin)
+            BeforeInst = llvm::prior(llvm::prior(BI));
+          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
+            --BI; // If we succeeded, don't re-select the load.
           continue;
+        }
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {
@@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FastIS->recomputeInsertPt();
     }
 
+    if (Begin != BI)
+      ++NumDAGBlocks;
+    else
+      ++NumFastIselBlocks;
+
     // Run SelectionDAG instruction selection on the remainder of the block
     // not handled by FastISel. If FastISel is not run, this is the entire
     // block.
@@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   }
 
   delete FastIS;
+#ifndef NDEBUG
+  for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
+       MBI != MBE; ++MBI)
+    CheckLineNumbers(MBI);
+#endif
 }
 
 void
@@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() {
       FuncInfo->InsertPt = FuncInfo->MBB->end();
       // Emit the code
       if (j+1 != ej)
-        SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Cases[j+1].ThisBB,
                               SDB->BitTestCases[i].Reg,
                               SDB->BitTestCases[i].Cases[j],
                               FuncInfo->MBB);
       else
-        SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Default,
                               SDB->BitTestCases[i].Reg,
                               SDB->BitTestCases[i].Cases[j],
                               FuncInfo->MBB);
@@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() {
   // additional DAGs necessary.
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+    FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
     FuncInfo->InsertPt = FuncInfo->MBB->end();
 
     // Determine the unique successors.
@@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() {
     if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
       Succs.push_back(SDB->SwitchCases[i].FalseBB);
 
-    // Emit the code. Note that this could result in ThisBB being split, so
-    // we need to check for updates.
+    // Emit the code. Note that this could result in FuncInfo->MBB being split.
     SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
     CurDAG->setRoot(SDB->getRoot());
     SDB->clear();
     CodeGenAndEmitDAG();
-    ThisBB = FuncInfo->MBB;
+
+    // Remember the last block, now that any splitting is done, for use in
+    // populating PHI nodes in successors.
+    MachineBasicBlock *ThisBB = FuncInfo->MBB;
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
@@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
   return Ctor(this, OptLevel);
 }
 
-ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
-  return new ScheduleHazardRecognizer();
-}
-
 //===----------------------------------------------------------------------===//
 // Helper functions used by the generated instruction selector.
 //===----------------------------------------------------------------------===//
@@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
   Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
   Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
   Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
-  Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]);  // 3
+  Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]);  // 3 (SideEffect, AlignStack)
 
   unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
-  if (InOps[e-1].getValueType() == MVT::Flag)
-    --e;  // Don't process a flag operand if it is here.
+  if (InOps[e-1].getValueType() == MVT::Glue)
+    --e;  // Don't process a glue operand if it is here.
 
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
@@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
     }
   }
 
-  // Add the flag input back if present.
+  // Add the glue input back if present.
   if (e != InOps.size())
     Ops.push_back(InOps.back());
 }
 
-/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
 /// SDNode.
 ///
-static SDNode *findFlagUse(SDNode *N) {
+static SDNode *findGlueUse(SDNode *N) {
   unsigned FlagResNo = N->getNumValues()-1;
   for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
     SDUse &Use = I.getUse();
@@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
   // never find it.
   //
   // The Use may be -1 (unassigned) if it is a newly allocated node.  This can
-  // happen because we scan down to newly selected nodes in the case of flag
+  // happen because we scan down to newly selected nodes in the case of glue
   // uses.
   if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
     return false;
-  
+
   // Don't revisit nodes if we already scanned it and didn't fail, we know we
   // won't fail if we scan it again.
   if (!Visited.insert(Use))
@@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
     // Ignore chain uses, they are validated by HandleMergeInputChains.
     if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
       continue;
-    
+
     SDNode *N = Use->getOperand(i).getNode();
     if (N == Def) {
       if (Use == ImmedUse || Use == Root)
@@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
   //
   // * indicates nodes to be folded together.
   //
-  // If Root produces a flag, then it gets (even more) interesting. Since it
-  // will be "glued" together with its flag use in the scheduler, we need to
+  // If Root produces glue, then it gets (even more) interesting. Since it
+  // will be "glued" together with its glue use in the scheduler, we need to
   // check if it might reach N.
   //
   //          [N*]           //
@@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
   //           ^   /         //
   //           f  /          //
   //           | /           //
-  //          [FU]           //
+  //          [GU]           //
   //
-  // If FU (flag use) indirectly reaches N (the load), and Root folds N
-  // (call it Fold), then X is a predecessor of FU and a successor of
-  // Fold. But since Fold and FU are flagged together, this will create
+  // If GU (glue use) indirectly reaches N (the load), and Root folds N
+  // (call it Fold), then X is a predecessor of GU and a successor of
+  // Fold. But since Fold and GU are glued together, this will create
   // a cycle in the scheduling graph.
 
-  // If the node has flags, walk down the graph to the "lowest" node in the
-  // flagged set.
+  // If the node has glue, walk down the graph to the "lowest" node in the
+  // glueged set.
   EVT VT = Root->getValueType(Root->getNumValues()-1);
-  while (VT == MVT::Flag) {
-    SDNode *FU = findFlagUse(Root);
-    if (FU == NULL)
+  while (VT == MVT::Glue) {
+    SDNode *GU = findGlueUse(Root);
+    if (GU == NULL)
       break;
-    Root = FU;
+    Root = GU;
     VT = Root->getValueType(Root->getNumValues()-1);
-    
-    // If our query node has a flag result with a use, we've walked up it.  If
+
+    // If our query node has a glue result with a use, we've walked up it.  If
     // the user (which has already been selected) has a chain or indirectly uses
     // the chain, our WalkChainUsers predicate will not consider it.  Because of
     // this, we cannot ignore chains in this predicate.
     IgnoreChains = false;
   }
-  
+
 
   SmallPtrSet<SDNode*, 16> Visited;
   return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
@@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
   std::vector<SDValue> Ops(N->op_begin(), N->op_end());
   SelectInlineAsmMemoryOperands(Ops);
-    
+
   std::vector<EVT> VTs;
   VTs.push_back(MVT::Other);
-  VTs.push_back(MVT::Flag);
+  VTs.push_back(MVT::Glue);
   SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
   New->setNodeId(-1);
@@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
 }
 
 /// GetVBR - decode a vbr encoding whose top bit is set.
-ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
 GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
   assert(Val >= 128 && "Not a VBR");
   Val &= 127;  // Remove first vbr bit.
-  
+
   unsigned Shift = 7;
   uint64_t NextBits;
   do {
@@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
     Val |= (NextBits&127) << Shift;
     Shift += 7;
   } while (NextBits & 128);
-  
+
   return Val;
 }
 
 
-/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
-/// interior flag and chain results to use the new flag and chain results.
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
 void SelectionDAGISel::
-UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
-                     const SmallVectorImpl<SDNode*> &ChainNodesMatched,
-                     SDValue InputFlag,
-                     const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
-                     bool isMorphNodeTo) {
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+                    const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                    SDValue InputGlue,
+                    const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+                    bool isMorphNodeTo) {
   SmallVector<SDNode*, 4> NowDeadNodes;
-  
+
   ISelUpdater ISU(ISelPosition);
 
   // Now that all the normal results are replaced, we replace the chain and
-  // flag results if present.
+  // glue results if present.
   if (!ChainNodesMatched.empty()) {
     assert(InputChain.getNode() != 0 &&
            "Matched input chains but didn't produce a chain");
@@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
     // Replace all the chain results with the final chain we ended up with.
     for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
       SDNode *ChainNode = ChainNodesMatched[i];
-      
+
       // If this node was already deleted, don't look at it.
       if (ChainNode->getOpcode() == ISD::DELETED_NODE)
         continue;
-      
+
       // Don't replace the results of the root node if we're doing a
       // MorphNodeTo.
       if (ChainNode == NodeToMatch && isMorphNodeTo)
         continue;
-      
+
       SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
-      if (ChainVal.getValueType() == MVT::Flag)
+      if (ChainVal.getValueType() == MVT::Glue)
         ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
       assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
       CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
-      
+
       // If the node became dead and we haven't already seen it, delete it.
       if (ChainNode->use_empty() &&
           !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
         NowDeadNodes.push_back(ChainNode);
     }
   }
-  
-  // If the result produces a flag, update any flag results in the matched
-  // pattern with the flag result.
-  if (InputFlag.getNode() != 0) {
+
+  // If the result produces glue, update any glue results in the matched
+  // pattern with the glue result.
+  if (InputGlue.getNode() != 0) {
     // Handle any interior nodes explicitly marked.
-    for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
-      SDNode *FRN = FlagResultNodesMatched[i];
-      
+    for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+      SDNode *FRN = GlueResultNodesMatched[i];
+
       // If this node was already deleted, don't look at it.
       if (FRN->getOpcode() == ISD::DELETED_NODE)
         continue;
-      
-      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
-             "Doesn't have a flag result");
+
+      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+             "Doesn't have a glue result");
       CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
-                                        InputFlag, &ISU);
-      
+                                        InputGlue, &ISU);
+
       // If the node became dead and we haven't already seen it, delete it.
       if (FRN->use_empty() &&
           !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
         NowDeadNodes.push_back(FRN);
     }
   }
-  
+
   if (!NowDeadNodes.empty())
     CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
-  
+
   DEBUG(errs() << "ISEL: Match complete!\n");
 }
 
@@ -1392,17 +1589,17 @@ enum ChainResult {
 ///
 /// The walk we do here is guaranteed to be small because we quickly get down to
 /// already selected nodes "below" us.
-static ChainResult 
+static ChainResult
 WalkChainUsers(SDNode *ChainedNode,
                SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
                SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
   ChainResult Result = CR_Simple;
-  
+
   for (SDNode::use_iterator UI = ChainedNode->use_begin(),
          E = ChainedNode->use_end(); UI != E; ++UI) {
     // Make sure the use is of the chain, not some other value we produce.
     if (UI.getUse().getValueType() != MVT::Other) continue;
-    
+
     SDNode *User = *UI;
 
     // If we see an already-selected machine node, then we've gone beyond the
@@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode,
     if (User->isMachineOpcode() ||
         User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
       continue;
-    
+
     if (User->getOpcode() == ISD::CopyToReg ||
         User->getOpcode() == ISD::CopyFromReg ||
         User->getOpcode() == ISD::INLINEASM ||
@@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode,
       if (!std::count(ChainedNodesInPattern.begin(),
                       ChainedNodesInPattern.end(), User))
         return CR_InducesCycle;
-      
+
       // Otherwise we found a node that is part of our pattern.  For example in:
       //   x = load ptr
       //   y = x+4
@@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode,
       InteriorChainedNodes.push_back(User);
       continue;
     }
-    
+
     // If we found a TokenFactor, there are two cases to consider: first if the
     // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
     // uses of the TF are in our pattern) we just want to ignore it.  Second,
@@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode,
     case CR_LeadsToInteriorNode:
       break;  // Otherwise, keep processing.
     }
-    
+
     // Okay, we know we're in the interesting interior case.  The TokenFactor
     // is now going to be considered part of the pattern so that we rewrite its
     // uses (it may have uses that are not part of the pattern) with the
@@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode,
     InteriorChainedNodes.push_back(User);
     continue;
   }
-  
+
   return Result;
 }
 
@@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
                        InteriorChainedNodes) == CR_InducesCycle)
       return SDValue(); // Would induce a cycle.
   }
-  
+
   // Okay, we have walked all the matched nodes and collected TokenFactor nodes
   // that we are interested in.  Form our input TokenFactor node.
   SmallVector<SDValue, 3> InputChains;
@@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
     if (N->getOpcode() != ISD::TokenFactor) {
       if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
         continue;
-      
+
       // Otherwise, add the input chain.
       SDValue InChain = ChainNodesMatched[i]->getOperand(0);
       assert(InChain.getValueType() == MVT::Other && "Not a chain");
       InputChains.push_back(InChain);
       continue;
     }
-    
+
     // If we have a token factor, we want to add all inputs of the token factor
     // that are not part of the pattern we're matching.
     for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
@@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
         InputChains.push_back(N->getOperand(op));
     }
   }
-  
+
   SDValue Res;
   if (InputChains.size() == 1)
     return InputChains[0];
   return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
                          MVT::Other, &InputChains[0], InputChains.size());
-}  
+}
 
 /// MorphNode - Handle morphing a node in place for the selector.
 SDNode *SelectionDAGISel::
@@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
           const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
   // It is possible we're using MorphNodeTo to replace a node with no
   // normal results with one that has a normal result (or we could be
-  // adding a chain) and the input could have flags and chains as well.
+  // adding a chain) and the input could have glue and chains as well.
   // In this case we need to shift the operands down.
   // FIXME: This is a horrible hack and broken in obscure cases, no worse
   // than the old isel though.
-  int OldFlagResultNo = -1, OldChainResultNo = -1;
+  int OldGlueResultNo = -1, OldChainResultNo = -1;
 
   unsigned NTMNumResults = Node->getNumValues();
-  if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
-    OldFlagResultNo = NTMNumResults-1;
+  if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+    OldGlueResultNo = NTMNumResults-1;
     if (NTMNumResults != 1 &&
         Node->getValueType(NTMNumResults-2) == MVT::Other)
       OldChainResultNo = NTMNumResults-2;
@@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
   }
 
   unsigned ResNumResults = Res->getNumValues();
-  // Move the flag if needed.
-  if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
-      (unsigned)OldFlagResultNo != ResNumResults-1)
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), 
+  // Move the glue if needed.
+  if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+      (unsigned)OldGlueResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
                                       SDValue(Res, ResNumResults-1));
 
-  if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+  if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
     --ResNumResults;
 
   // Move the chain reference if needed.
   if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
       (unsigned)OldChainResultNo != ResNumResults-1)
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), 
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
                                       SDValue(Res, ResNumResults-1));
 
   // Otherwise, no replacement happened because the node already exists. Replace
   // Uses of the old node with the new one.
   if (Res != Node)
     CurDAG->ReplaceAllUsesWith(Node, Res);
-  
+
   return Res;
 }
 
 /// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
-          SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+          SDValue N,
+          const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
   // Accept if it is exactly the same as a previously recorded node.
   unsigned RecNo = MatcherTable[MatcherIndex++];
   assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-  return N == RecordedNodes[RecNo];
+  return N == RecordedNodes[RecNo].first;
 }
-  
+
 /// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                       SelectionDAGISel &SDISel) {
   return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
 }
 
 /// CheckNodePredicate - Implements OP_CheckNodePredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                    SelectionDAGISel &SDISel, SDNode *N) {
   return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDNode *N) {
   uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return N->getOpcode() == Opc;
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
           SDValue N, const TargetLowering &TLI) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
   if (N.getValueType() == VT) return true;
-  
+
   // Handle the case when VT is iPTR.
   return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering &TLI,
                unsigned ChildNo) {
@@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 }
 
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
               SDValue N) {
   return cast<CondCodeSDNode>(N)->get() ==
       (ISD::CondCode)MatcherTable[MatcherIndex++];
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering &TLI) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
   if (cast<VTSDNode>(N)->getVT() == VT)
     return true;
-  
+
   // Handle the case when VT is iPTR.
   return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
              SDValue N) {
   int64_t Val = MatcherTable[MatcherIndex++];
   if (Val & 128)
     Val = GetVBR(Val, MatcherTable, MatcherIndex);
-  
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
   return C != 0 && C->getSExtValue() == Val;
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDValue N, SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
   if (Val & 128)
     Val = GetVBR(Val, MatcherTable, MatcherIndex);
-  
+
   if (N->getOpcode() != ISD::AND) return false;
-  
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
            SDValue N, SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
   if (Val & 128)
     Val = GetVBR(Val, MatcherTable, MatcherIndex);
-  
+
   if (N->getOpcode() != ISD::OR) return false;
-  
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
 }
@@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 /// fail, set Result=true and return anything.  If the current predicate is
 /// known to pass, set Result=false and return the MatcherIndex to continue
 /// with.  If the current predicate is unknown, set Result=false and return the
-/// MatcherIndex to continue with. 
+/// MatcherIndex to continue with.
 static unsigned IsPredicateKnownToFail(const unsigned char *Table,
                                        unsigned Index, SDValue N,
                                        bool &Result, SelectionDAGISel &SDISel,
-                                       SmallVectorImpl<SDValue> &RecordedNodes){
+                 SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
   switch (Table[Index++]) {
   default:
     Result = false;
@@ -1782,21 +1980,21 @@ namespace {
 struct MatchScope {
   /// FailIndex - If this match fails, this is the index to continue with.
   unsigned FailIndex;
-  
+
   /// NodeStack - The node stack when the scope was formed.
   SmallVector<SDValue, 4> NodeStack;
-  
+
   /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
   unsigned NumRecordedNodes;
-  
+
   /// NumMatchedMemRefs - The number of matched memref entries.
   unsigned NumMatchedMemRefs;
-  
-  /// InputChain/InputFlag - The current chain/flag 
-  SDValue InputChain, InputFlag;
+
+  /// InputChain/InputGlue - The current chain/glue
+  SDValue InputChain, InputGlue;
 
   /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
-  bool HasChainNodesMatched, HasFlagResultNodesMatched;
+  bool HasChainNodesMatched, HasGlueResultNodesMatched;
 };
 
 }
@@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
   case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
   }
-  
+
   assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
 
   // Set up the node stack with NodeToMatch as the only node on the stack.
@@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   // MatchScopes - Scopes used when matching, if a match failure happens, this
   // indicates where to continue checking.
   SmallVector<MatchScope, 8> MatchScopes;
-  
+
   // RecordedNodes - This is the set of nodes that have been recorded by the
-  // state machine.
-  SmallVector<SDValue, 8> RecordedNodes;
-  
+  // state machine.  The second value is the parent of the node, or null if the
+  // root is recorded.
+  SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
   // MatchedMemRefs - This is the set of MemRef's we've seen in the input
   // pattern.
   SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
-  
-  // These are the current input chain and flag for use when generating nodes.
+
+  // These are the current input chain and glue for use when generating nodes.
   // Various Emit operations change these.  For example, emitting a copytoreg
   // uses and updates these.
-  SDValue InputChain, InputFlag;
-  
+  SDValue InputChain, InputGlue;
+
   // ChainNodesMatched - If a pattern matches nodes that have input/output
   // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
   // which ones they are.  The result is captured into this list so that we can
   // update the chain results when the pattern is complete.
   SmallVector<SDNode*, 3> ChainNodesMatched;
-  SmallVector<SDNode*, 3> FlagResultNodesMatched;
-  
+  SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
   DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
         NodeToMatch->dump(CurDAG);
         errs() << '\n');
-  
+
   // Determine where to start the interpreter.  Normally we start at opcode #0,
   // but if the state machine starts with an OPC_SwitchOpcode, then we
   // accelerate the first lookup (which is guaranteed to be hot) with the
   // OpcodeOffset table.
   unsigned MatcherIndex = 0;
-  
+
   if (!OpcodeOffset.empty()) {
     // Already computed the OpcodeOffset table, just index into it.
     if (N.getOpcode() < OpcodeOffset.size())
@@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     if (N.getOpcode() < OpcodeOffset.size())
       MatcherIndex = OpcodeOffset[N.getOpcode()];
   }
-  
+
   while (1) {
     assert(MatcherIndex < TableSize && "Invalid index");
 #ifndef NDEBUG
@@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       // determine immediately that the first check (or first several) will
       // immediately fail, don't even bother pushing a scope for them.
       unsigned FailIndex;
-      
+
       while (1) {
         unsigned NumToSkip = MatcherTable[MatcherIndex++];
         if (NumToSkip & 128)
@@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
           FailIndex = 0;
           break;
         }
-        
+
         FailIndex = MatcherIndex+NumToSkip;
-        
+
         unsigned MatcherIndexOfPredicate = MatcherIndex;
         (void)MatcherIndexOfPredicate; // silence warning.
-        
+
         // If we can't evaluate this predicate without pushing a scope (e.g. if
         // it is a 'MoveParent') or if the predicate succeeds on this node, we
         // push the scope and evaluate the full predicate chain.
@@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                                               Result, *this, RecordedNodes);
         if (!Result)
           break;
-        
+
         DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
                      << "index " << MatcherIndexOfPredicate
                      << ", continuing at " << FailIndex << "\n");
         ++NumDAGIselRetries;
-        
+
         // Otherwise, we know that this case of the Scope is guaranteed to fail,
         // move to the next case.
         MatcherIndex = FailIndex;
       }
-      
+
       // If the whole scope failed to match, bail.
       if (FailIndex == 0) break;
-      
+
       // Push a MatchScope which indicates where to go if the first child fails
       // to match.
       MatchScope NewEntry;
@@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       NewEntry.NumRecordedNodes = RecordedNodes.size();
       NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
       NewEntry.InputChain = InputChain;
-      NewEntry.InputFlag = InputFlag;
+      NewEntry.InputGlue = InputGlue;
       NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
-      NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+      NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
       MatchScopes.push_back(NewEntry);
       continue;
     }
-    case OPC_RecordNode:
+    case OPC_RecordNode: {
       // Remember this node, it may end up being an operand in the pattern.
-      RecordedNodes.push_back(N);
+      SDNode *Parent = 0;
+      if (NodeStack.size() > 1)
+        Parent = NodeStack[NodeStack.size()-2].getNode();
+      RecordedNodes.push_back(std::make_pair(N, Parent));
       continue;
-        
+    }
+
     case OPC_RecordChild0: case OPC_RecordChild1:
     case OPC_RecordChild2: case OPC_RecordChild3:
     case OPC_RecordChild4: case OPC_RecordChild5:
@@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (ChildNo >= N.getNumOperands())
         break;  // Match fails if out of range child #.
 
-      RecordedNodes.push_back(N->getOperand(ChildNo));
+      RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+                                             N.getNode()));
       continue;
     }
     case OPC_RecordMemRef:
       MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
       continue;
-        
-    case OPC_CaptureFlagInput:
-      // If the current node has an input flag, capture it in InputFlag.
+
+    case OPC_CaptureGlueInput:
+      // If the current node has an input glue, capture it in InputGlue.
       if (N->getNumOperands() != 0 &&
-          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
-        InputFlag = N->getOperand(N->getNumOperands()-1);
+          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+        InputGlue = N->getOperand(N->getNumOperands()-1);
       continue;
-        
+
     case OPC_MoveChild: {
       unsigned ChildNo = MatcherTable[MatcherIndex++];
       if (ChildNo >= N.getNumOperands())
@@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       NodeStack.push_back(N);
       continue;
     }
-        
+
     case OPC_MoveParent:
       // Pop the current node off the NodeStack.
       NodeStack.pop_back();
       assert(!NodeStack.empty() && "Node stack imbalance!");
-      N = NodeStack.back();  
+      N = NodeStack.back();
       continue;
-     
+
     case OPC_CheckSame:
       if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
       continue;
@@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       unsigned CPNum = MatcherTable[MatcherIndex++];
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
-      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+                               RecordedNodes[RecNo].first, CPNum,
                                RecordedNodes))
         break;
       continue;
@@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     case OPC_CheckOpcode:
       if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
       continue;
-        
+
     case OPC_CheckType:
       if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
       continue;
-        
+
     case OPC_SwitchOpcode: {
       unsigned CurNodeOpcode = N.getOpcode();
       unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // If the opcode matches, then we will execute this case.
         if (CurNodeOpcode == Opc)
           break;
-      
+
         // Otherwise, skip over this case.
         MatcherIndex += CaseSize;
       }
-      
+
       // If no cases matched, bail out.
       if (CaseSize == 0) break;
-      
+
       // Otherwise, execute the case we found.
       DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
                    << " to " << MatcherIndex << "\n");
       continue;
     }
-        
+
     case OPC_SwitchType: {
-      MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+      MVT CurNodeVT = N.getValueType().getSimpleVT();
       unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
       unsigned CaseSize;
       while (1) {
@@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (CaseSize & 128)
           CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
         if (CaseSize == 0) break;
-        
-        MVT::SimpleValueType CaseVT =
-          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+
+        MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
         if (CaseVT == MVT::iPTR)
-          CaseVT = TLI.getPointerTy().SimpleTy;
-        
+          CaseVT = TLI.getPointerTy();
+
         // If the VT matches, then we will execute this case.
         if (CurNodeVT == CaseVT)
           break;
-        
+
         // Otherwise, skip over this case.
         MatcherIndex += CaseSize;
       }
-      
+
       // If no cases matched, bail out.
       if (CaseSize == 0) break;
-      
+
       // Otherwise, execute the case we found.
       DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
                    << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
@@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     case OPC_CheckOrImm:
       if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
       continue;
-        
+
     case OPC_CheckFoldableChainNode: {
       assert(NodeStack.size() != 1 && "No parent node");
       // Verify that all intermediate nodes between the root and this one have
@@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                          NodeToMatch, OptLevel,
                          true/*We validate our own chains*/))
         break;
-      
+
       continue;
     }
     case OPC_EmitInteger: {
@@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       int64_t Val = MatcherTable[MatcherIndex++];
       if (Val & 128)
         Val = GetVBR(Val, MatcherTable, MatcherIndex);
-      RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
       continue;
     }
     case OPC_EmitRegister: {
       MVT::SimpleValueType VT =
         (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
       unsigned RegNo = MatcherTable[MatcherIndex++];
-      RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
       continue;
     }
-        
+
     case OPC_EmitConvertToTarget:  {
       // Convert from IMM/FPIMM to target version.
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-      SDValue Imm = RecordedNodes[RecNo];
+      SDValue Imm = RecordedNodes[RecNo].first;
 
       if (Imm->getOpcode() == ISD::Constant) {
         int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
@@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
         Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
       }
-      
-      RecordedNodes.push_back(Imm);
+
+      RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
       continue;
     }
-        
+
     case OPC_EmitMergeInputChains1_0:    // OPC_EmitMergeInputChains, 1, 0
     case OPC_EmitMergeInputChains1_1: {  // OPC_EmitMergeInputChains, 1, 1
       // These are space-optimized forms of OPC_EmitMergeInputChains.
@@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
              "EmitMergeInputChains should be the first chain producing node");
       assert(ChainNodesMatched.empty() &&
              "Should only have one EmitMergeInputChains per match");
-      
+
       // Read all of the chained nodes.
       unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-      ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-        
+      ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
       // FIXME: What if other value results of the node have uses not matched
       // by this pattern?
       if (ChainNodesMatched.back() != NodeToMatch &&
-          !RecordedNodes[RecNo].hasOneUse()) {
+          !RecordedNodes[RecNo].first.hasOneUse()) {
         ChainNodesMatched.clear();
         break;
       }
-      
+
       // Merge the input chains if they are not intra-pattern references.
       InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-      
+
       if (InputChain.getNode() == 0)
         break;  // Failed to merge.
       continue;
     }
-        
+
     case OPC_EmitMergeInputChains: {
       assert(InputChain.getNode() == 0 &&
              "EmitMergeInputChains should be the first chain producing node");
@@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       for (unsigned i = 0; i != NumChains; ++i) {
         unsigned RecNo = MatcherTable[MatcherIndex++];
         assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-        ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-        
+        ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
         // FIXME: What if other value results of the node have uses not matched
         // by this pattern?
         if (ChainNodesMatched.back() != NodeToMatch &&
-            !RecordedNodes[RecNo].hasOneUse()) {
+            !RecordedNodes[RecNo].first.hasOneUse()) {
           ChainNodesMatched.clear();
           break;
         }
       }
-      
+
       // If the inner loop broke out, the match fails.
       if (ChainNodesMatched.empty())
         break;
 
       // Merge the input chains if they are not intra-pattern references.
       InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-      
+
       if (InputChain.getNode() == 0)
         break;  // Failed to merge.
 
       continue;
     }
-        
+
     case OPC_EmitCopyToReg: {
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
       unsigned DestPhysReg = MatcherTable[MatcherIndex++];
-      
+
       if (InputChain.getNode() == 0)
         InputChain = CurDAG->getEntryNode();
-      
+
       InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
-                                        DestPhysReg, RecordedNodes[RecNo],
-                                        InputFlag);
-      
-      InputFlag = InputChain.getValue(1);
+                                        DestPhysReg, RecordedNodes[RecNo].first,
+                                        InputGlue);
+
+      InputGlue = InputChain.getValue(1);
       continue;
     }
-        
+
     case OPC_EmitNodeXForm: {
       unsigned XFormNo = MatcherTable[MatcherIndex++];
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-      RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+      SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+      RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
       continue;
     }
-        
+
     case OPC_EmitNode:
     case OPC_MorphNodeTo: {
       uint16_t TargetOpc = MatcherTable[MatcherIndex++];
@@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
         VTs.push_back(VT);
       }
-      
+
       if (EmitNodeInfo & OPFL_Chain)
         VTs.push_back(MVT::Other);
-      if (EmitNodeInfo & OPFL_FlagOutput)
-        VTs.push_back(MVT::Flag);
-      
+      if (EmitNodeInfo & OPFL_GlueOutput)
+        VTs.push_back(MVT::Glue);
+
       // This is hot code, so optimize the two most common cases of 1 and 2
       // results.
       SDVTList VTList;
@@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         unsigned RecNo = MatcherTable[MatcherIndex++];
         if (RecNo & 128)
           RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-        
+
         assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
-        Ops.push_back(RecordedNodes[RecNo]);
+        Ops.push_back(RecordedNodes[RecNo].first);
       }
-      
+
       // If there are variadic operands to add, handle them now.
       if (EmitNodeInfo & OPFL_VariadicInfo) {
         // Determine the start index to copy from.
@@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
         assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
                "Invalid variadic node");
-        // Copy all of the variadic operands, not including a potential flag
+        // Copy all of the variadic operands, not including a potential glue
         // input.
         for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
              i != e; ++i) {
           SDValue V = NodeToMatch->getOperand(i);
-          if (V.getValueType() == MVT::Flag) break;
+          if (V.getValueType() == MVT::Glue) break;
           Ops.push_back(V);
         }
       }
-      
-      // If this has chain/flag inputs, add them.
+
+      // If this has chain/glue inputs, add them.
       if (EmitNodeInfo & OPFL_Chain)
         Ops.push_back(InputChain);
-      if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
-        Ops.push_back(InputFlag);
-      
+      if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+        Ops.push_back(InputGlue);
+
       // Create the node.
       SDNode *Res = 0;
       if (Opcode != OPC_MorphNodeTo) {
@@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // add the results to the RecordedNodes list.
         Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
                                      VTList, Ops.data(), Ops.size());
-        
-        // Add all the non-flag/non-chain results to the RecordedNodes list.
+
+        // Add all the non-glue/non-chain results to the RecordedNodes list.
         for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
-          if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
-          RecordedNodes.push_back(SDValue(Res, i));
+          if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+          RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+                                                             (SDNode*) 0));
         }
-        
+
       } else {
         Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
                         EmitNodeInfo);
       }
-      
-      // If the node had chain/flag results, update our notion of the current
-      // chain and flag.
-      if (EmitNodeInfo & OPFL_FlagOutput) {
-        InputFlag = SDValue(Res, VTs.size()-1);
+
+      // If the node had chain/glue results, update our notion of the current
+      // chain and glue.
+      if (EmitNodeInfo & OPFL_GlueOutput) {
+        InputGlue = SDValue(Res, VTs.size()-1);
         if (EmitNodeInfo & OPFL_Chain)
           InputChain = SDValue(Res, VTs.size()-2);
       } else if (EmitNodeInfo & OPFL_Chain)
         InputChain = SDValue(Res, VTs.size()-1);
 
-      // If the OPFL_MemRefs flag is set on this node, slap all of the
+      // If the OPFL_MemRefs glue is set on this node, slap all of the
       // accumulated memrefs onto it.
       //
       // FIXME: This is vastly incorrect for patterns with multiple outputs
@@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         cast<MachineSDNode>(Res)
           ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
       }
-      
+
       DEBUG(errs() << "  "
                    << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
                    << " node: "; Res->dump(CurDAG); errs() << "\n");
-      
+
       // If this was a MorphNodeTo then we're completely done!
       if (Opcode == OPC_MorphNodeTo) {
-        // Update chain and flag uses.
-        UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
-                             InputFlag, FlagResultNodesMatched, true);
+        // Update chain and glue uses.
+        UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                            InputGlue, GlueResultNodesMatched, true);
         return Res;
       }
-      
+
       continue;
     }
-        
-    case OPC_MarkFlagResults: {
+
+    case OPC_MarkGlueResults: {
       unsigned NumNodes = MatcherTable[MatcherIndex++];
-      
-      // Read and remember all the flag-result nodes.
+
+      // Read and remember all the glue-result nodes.
       for (unsigned i = 0; i != NumNodes; ++i) {
         unsigned RecNo = MatcherTable[MatcherIndex++];
         if (RecNo & 128)
           RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
 
         assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-        FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+        GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
       }
       continue;
     }
-      
+
     case OPC_CompleteMatch: {
       // The match has been completed, and any new nodes (if any) have been
       // created.  Patch up references to the matched dag to use the newly
@@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         unsigned ResSlot = MatcherTable[MatcherIndex++];
         if (ResSlot & 128)
           ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
-        
+
         assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
-        SDValue Res = RecordedNodes[ResSlot];
-        
+        SDValue Res = RecordedNodes[ResSlot].first;
+
         assert(i < NodeToMatch->getNumValues() &&
                NodeToMatch->getValueType(i) != MVT::Other &&
-               NodeToMatch->getValueType(i) != MVT::Flag &&
+               NodeToMatch->getValueType(i) != MVT::Glue &&
                "Invalid number of results to complete!");
         assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
                 NodeToMatch->getValueType(i) == MVT::iPTR ||
@@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
       }
 
-      // If the root node defines a flag, add it to the flag nodes to update
-      // list.
-      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
-        FlagResultNodesMatched.push_back(NodeToMatch);
-      
-      // Update chain and flag uses.
-      UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
-                           InputFlag, FlagResultNodesMatched, false);
-      
+      // If the root node defines glue, add it to the glue nodes to update list.
+      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+        GlueResultNodesMatched.push_back(NodeToMatch);
+
+      // Update chain and glue uses.
+      UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                          InputGlue, GlueResultNodesMatched, false);
+
       assert(NodeToMatch->use_empty() &&
              "Didn't replace all uses of the node?");
-      
+
       // FIXME: We just return here, which interacts correctly with SelectRoot
       // above.  We should fix this to not return an SDNode* anymore.
       return 0;
     }
     }
-    
+
     // If the code reached this point, then the match failed.  See if there is
     // another child to try in the current 'Scope', otherwise pop it until we
     // find a case to check.
@@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
         MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
       MatcherIndex = LastScope.FailIndex;
-      
+
       DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
-    
+
       InputChain = LastScope.InputChain;
-      InputFlag = LastScope.InputFlag;
+      InputGlue = LastScope.InputGlue;
       if (!LastScope.HasChainNodesMatched)
         ChainNodesMatched.clear();
-      if (!LastScope.HasFlagResultNodesMatched)
-        FlagResultNodesMatched.clear();
+      if (!LastScope.HasGlueResultNodesMatched)
+        GlueResultNodesMatched.clear();
 
       // Check to see what the offset is at the new MatcherIndex.  If it is zero
       // we have reached the end of this scope, otherwise we have another child
@@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         LastScope.FailIndex = MatcherIndex+NumToSkip;
         break;
       }
-      
+
       // End of this scope, pop it and try the next child in the containing
       // scope.
       MatchScopes.pop_back();
     }
   }
 }
-    
+
 
 
 void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
-  Msg << "Cannot yet select: ";
-  
+  Msg << "Cannot select: ";
+
   if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
       N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
       N->getOpcode() != ISD::INTRINSIC_VOID) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 8313de5e32bb..76eb9453561e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -93,7 +93,7 @@ namespace llvm {
     static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
       SDValue Op = EI.getNode()->getOperand(EI.getOperand());
       EVT VT = Op.getValueType();
-      if (VT == MVT::Flag)
+      if (VT == MVT::Glue)
         return "color=red,style=bold";
       else if (VT == MVT::Other)
         return "color=blue,style=dashed";
@@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
   raw_string_ostream O(s);
   O << "SU(" << SU->NodeNum << "): ";
   if (SU->getNode()) {
-    SmallVector<SDNode *, 4> FlaggedNodes;
-    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
-      FlaggedNodes.push_back(N);
-    while (!FlaggedNodes.empty()) {
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
       O << DOTGraphTraits<SelectionDAG*>
-        ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
-      FlaggedNodes.pop_back();
-      if (!FlaggedNodes.empty())
+        ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+      GluedNodes.pop_back();
+      if (!GluedNodes.empty())
         O << "\n    ";
     }
   } else {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b74f600cfa2d..691390e2a0e4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <cctype>
 using namespace llvm;
 
 namespace llvm {
@@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
       setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
       setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
     }
-    
+
     // These operations default to expand.
     setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
 
   // Most targets ignore the @llvm.prefetch intrinsic.
   setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-  
-  // ConstantFP nodes default to expand.  Targets can either change this to 
+
+  // ConstantFP nodes default to expand.  Targets can either change this to
   // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
@@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
-    
+
   IsLittleEndian = TD->isLittleEndian();
   ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+    = maxStoresPerMemmoveOptSize = 4;
   benefitFromCodePlacementOpt = false;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
   SelectIsExpensive = false;
   IntDivIsCheap = false;
   Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
   StackPointerRegisterToSaveRestore = 0;
   ExceptionPointerRegister = 0;
   ExceptionSelectorRegister = 0;
@@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   // Figure out the right, legal destination reg to copy into.
   unsigned NumElts = VT.getVectorNumElements();
   MVT EltTy = VT.getVectorElementType();
-  
+
   unsigned NumVectorRegs = 1;
-  
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
   // could break down into LHS/RHS like LegalizeDAG does.
   if (!isPowerOf2_32(NumElts)) {
     NumVectorRegs = NumElts;
     NumElts = 1;
   }
-  
+
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
   while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
@@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   }
 
   NumIntermediates = NumVectorRegs;
-  
+
   MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
   if (!TLI->isTypeLegal(NewVT))
     NewVT = EltTy;
@@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   RegisterVT = DestVT;
   if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
     return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-  
+
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
   return NumVectorRegs;
@@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() {
     RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
     TransformToType[MVT::ppcf128] = MVT::f64;
     ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
-  }    
+  }
 
   // Decide how to handle f64. If the target does not have native f64 support,
   // expand it to i64 and we will be generating soft float library calls.
@@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() {
       ValueTypeActions.setTypeAction(MVT::f32, Expand);
     }
   }
-  
+
   // Loop over all of the vector value types to see which need transformations.
   for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
     MVT VT = (MVT::SimpleValueType)i;
     if (isTypeLegal(VT)) continue;
-    
+
     // Determine if there is a legal wider type.  If so, we should promote to
     // that wider vector type.
     EVT EltVT = VT.getVectorElementType();
@@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() {
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
         if (SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts && 
-            isTypeSynthesizable(SVT)) {
+            SVT.getVectorNumElements() > NElts &&
+            isTypeLegal(SVT)) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
@@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() {
       }
       if (IsLegalWiderType) continue;
     }
-    
+
     MVT IntermediateVT;
     EVT RegisterVT;
     unsigned NumIntermediates;
@@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() {
       getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
                                 RegisterVT, this);
     RegisterTypeForVT[i] = RegisterVT;
-    
+
     EVT NVT = VT.getPow2VectorType();
     if (NVT == VT) {
       // Type is already a power of 2.  The default action is to split.
@@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
                                                 unsigned &NumIntermediates,
                                                 EVT &RegisterVT) const {
   unsigned NumElts = VT.getVectorNumElements();
-  
+
   // If there is a wider vector type with the same element type as this one,
   // we should widen to that legal vector type.  This handles things like
   // <2 x float> -> <4 x float>.
@@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
       return 1;
     }
   }
-  
+
   // Figure out the right, legal destination reg to copy into.
   EVT EltTy = VT.getVectorElementType();
-  
+
   unsigned NumVectorRegs = 1;
-  
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
   // could break down into LHS/RHS like LegalizeDAG does.
   if (!isPowerOf2_32(NumElts)) {
     NumVectorRegs = NumElts;
     NumElts = 1;
   }
-  
+
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
   while (NumElts > 1 && !isTypeLegal(
@@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   }
 
   NumIntermediates = NumVectorRegs;
-  
+
   EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
   if (!isTypeLegal(NewVT))
     NewVT = EltTy;
@@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   RegisterVT = DestVT;
   if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
     return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-  
+
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
   return NumVectorRegs;
 }
 
-/// Get the EVTs and ArgFlags collections that represent the legalized return 
+/// Get the EVTs and ArgFlags collections that represent the legalized return
 /// type of the given function.  This does not require a DAG or a return value,
 /// and is suitable for use before any DAGs for the function are constructed.
 /// TODO: Move this out of TargetLowering.cpp.
@@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const {
   // In non-pic modes, just use the address of a block.
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
     return MachineJumpTableInfo::EK_BlockAddress;
-  
+
   // In PIC mode, if the target supports a GPRel32 directive, use it.
   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
-  
+
   // Otherwise, use a label difference.
   return MachineJumpTableInfo::EK_LabelDifference32;
 }
@@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 //  Optimization Methods
 //===----------------------------------------------------------------------===//
 
-/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
 /// specified instruction is a constant integer.  If so, check to see if there
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, 
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
                                                         const APInt &Demanded) {
   DebugLoc dl = Op.getDebugLoc();
 
@@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
       EVT VT = Op.getValueType();
       SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
                                 DAG.getConstant(Demanded &
-                                                C->getAPIntValue(), 
+                                                C->getAPIntValue(),
                                                 VT));
       return CombineTo(Op, New);
     }
@@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   KnownZero = KnownOne = APInt(BitWidth, 0);
 
   // Other users may use these bits.
-  if (!Op.getNode()->hasOneUse()) { 
+  if (!Op.getNode()->hasOneUse()) {
     if (Depth != 0) {
-      // If not at the root, Just compute the KnownZero/KnownOne bits to 
+      // If not at the root, Just compute the KnownZero/KnownOne bits to
       // simplify things downstream.
       TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
       return false;
@@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If this is the root being simplified, allow it to have multiple uses,
     // just set the NewMask to all bits.
     NewMask = APInt::getAllOnesValue(BitWidth);
-  } else if (DemandedMask == 0) {   
+  } else if (DemandedMask == 0) {
     // Not demanding any bits from Op.
     if (Op.getOpcode() != ISD::UNDEF)
       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
@@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // the RHS.
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       APInt LHSZero, LHSOne;
+      // Do not increment Depth here; that can cause an infinite loop.
       TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
-                                LHSZero, LHSOne, Depth+1);
+                                LHSZero, LHSOne, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
       if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
         return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
         return true;
     }
-    
+
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
                              KnownZero2, KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-      
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known one on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
     if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
@@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownZero |= KnownZero2;
     break;
   case ISD::OR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
                              KnownZero2, KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
     if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
@@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownOne |= KnownOne2;
     break;
   case ISD::XOR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
                              KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
     if ((KnownZero & NewMask) == NewMask)
@@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
                                                Op.getOperand(0),
                                                Op.getOperand(1)));
-    
+
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
     KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-    
+
     // If all of the demanded bits on one side are known, and all of the set
     // bits on that side are also known to be set on the other side, turn this
     // into an AND, as we know the bits will be cleared.
@@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if ((KnownOne & KnownOne2) == KnownOne) {
         EVT VT = Op.getValueType();
         SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, 
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
                                                  Op.getOperand(0), ANDC));
       }
     }
-    
+
     // If the RHS is a constant, see if we can simplify it.
     // for XOR, we prefer to force bits to 1 if they will make a -1.
     // if we can't force bits, try to shrink constant
@@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownOne  = KnownOneOut;
     break;
   case ISD::SELECT:
-    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
                              KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If the operands are constants, see if we can simplify them.
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
-    
+
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
     break;
   case ISD::SELECT_CC:
-    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
     if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
                              KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If the operands are constants, see if we can simplify them.
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
-      
+
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
@@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           if (Diff < 0) {
             Diff = -Diff;
             Opc = ISD::SRL;
-          }          
-          
-          SDValue NewSA = 
+          }
+
+          SDValue NewSA =
             TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
           EVT VT = Op.getValueType();
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
-      }      
-      
+      }
+
       if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
@@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       unsigned ShAmt = SA->getZExtValue();
       unsigned VTSize = VT.getSizeInBits();
       SDValue InOp = Op.getOperand(0);
-      
+
       // If the shift count is an invalid immediate, don't do anything.
       if (ShAmt >= BitWidth)
         break;
@@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           if (Diff < 0) {
             Diff = -Diff;
             Opc = ISD::SHL;
-          }          
-          
+          }
+
           SDValue NewSA =
             TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
-      }      
-      
+      }
+
       // Compute the new bits that are at the top now.
       if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
 
@@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       EVT VT = Op.getValueType();
       unsigned ShAmt = SA->getZExtValue();
-      
+
       // If the shift count is an invalid immediate, don't do anything.
       if (ShAmt >= BitWidth)
         break;
@@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       if (HighBits.intersects(NewMask))
         InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
-      
+
       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
-      
+
       // Handle the sign bit, adjusted to where it is now in the mask.
       APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
-      
+
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
       if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, 
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
                                                  Op.getOperand(0),
                                                  Op.getOperand(1)));
       } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
@@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
 
-    // Sign extension.  Compute the demanded bits in the result that are not 
+    // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
                             BitWidth - EVT.getScalarType().getSizeInBits());
-    
+
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
-    APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
-    InSignBit.zext(BitWidth);
+    APInt InSignBit =
+      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
                            EVT.getScalarType().getSizeInBits()) &
       NewMask;
-    
+
     // Since the sign extended bits are demanded, we know that the sign
     // bit is demanded.
     InputDemandedBits |= InSignBit;
@@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
-    
+
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
-      return TLO.CombineTo(Op, 
+      return TLO.CombineTo(Op,
                            TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
-    
+
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
       KnownZero &= ~NewBits;
@@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::ZERO_EXTEND: {
     unsigned OperandBitWidth =
       Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
-    APInt InMask = NewMask;
-    InMask.trunc(OperandBitWidth);
-    
+    APInt InMask = NewMask.trunc(OperandBitWidth);
+
     // If none of the top bits are demanded, convert this into an any_extend.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
     if (!NewBits.intersects(NewMask))
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
-                                               Op.getValueType(), 
+                                               Op.getValueType(),
                                                Op.getOperand(0)));
-    
+
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
     break;
   }
@@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
     APInt NewBits   = ~InMask & NewMask;
-    
+
     // If none of the top bits are demanded, convert this into an any_extend.
     if (NewBits == 0)
       return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
                                               Op.getValueType(),
                                               Op.getOperand(0)));
-    
+
     // Since some of the sign extended bits are demanded, we know that the sign
     // bit is demanded.
     APInt InDemandedBits = InMask & NewMask;
     InDemandedBits |= InSignBit;
-    InDemandedBits.trunc(InBits);
-    
-    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+    InDemandedBits = InDemandedBits.trunc(InBits);
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
-    
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
     // If the sign bit is known zero, convert this to a zero extend.
     if (KnownZero.intersects(InSignBit))
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
-                                               Op.getValueType(), 
+                                               Op.getValueType(),
                                                Op.getOperand(0)));
-    
+
     // If the sign bit is known one, the top bits match.
     if (KnownOne.intersects(InSignBit)) {
       KnownOne  |= NewBits;
@@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::ANY_EXTEND: {
     unsigned OperandBitWidth =
       Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
-    APInt InMask = NewMask;
-    InMask.trunc(OperandBitWidth);
+    APInt InMask = NewMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
@@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // zero/one bits live out.
     unsigned OperandBitWidth =
       Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
-    APInt TruncMask = NewMask;
-    TruncMask.zext(OperandBitWidth);
+    APInt TruncMask = NewMask.zext(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
-    
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+
     // If the input is only used by this truncate, see if we can shrink it based
     // on the known demanded bits.
     if (Op.getOperand(0).getNode()->hasOneUse()) {
@@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           break;
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
                                                OperandBitWidth - BitWidth);
-        HighBits = HighBits.lshr(ShAmt->getZExtValue());
-        HighBits.trunc(BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
 
         if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
           SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
-                                             Op.getValueType(), 
+                                             Op.getValueType(),
                                              In.getOperand(0));
           return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
                                                    Op.getValueType(),
-                                                   NewTrunc, 
+                                                   NewTrunc,
                                                    In.getOperand(1)));
         }
         break;
       }
     }
-    
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     break;
   }
   case ISD::AssertZext: {
@@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 
     EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth,
@@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownZero |= ~InMask & NewMask;
     break;
   }
-  case ISD::BIT_CONVERT:
+  case ISD::BITCAST:
 #if 0
     // If this is an FP->Int bitcast and if the sign bit is the only thing that
     // is demanded, turn this into a FGETSIGN.
@@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
-        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), 
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
                                          Op.getOperand(0));
         unsigned ShVal = Op.getValueType().getSizeInBits()-1;
         SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
@@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
     break;
   }
-  
+
   // If we know the value of all of the demanded bits, return this as a
   // constant.
   if ((NewMask & (KnownZero|KnownOne)) == NewMask)
     return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
-  
+
   return false;
 }
 
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified 
-/// in Mask are known to be either zero or one and return them in the 
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
 /// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
                                                     const APInt &Mask,
-                                                    APInt &KnownZero, 
+                                                    APInt &KnownZero,
                                                     APInt &KnownOne,
                                                     const SelectionDAG &DAG,
                                                     unsigned Depth) const {
@@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
          (KnownOne.countPopulation() == 1);
 }
 
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands 
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
 /// and cc. If it is unable to simplify it, return a null SDValue.
 SDValue
 TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       }
     }
 
+    SDValue CTPOP = N0;
+    // Look through truncs that don't change the value of a ctpop.
+    if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+      CTPOP = N0.getOperand(0);
+
+    if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+        (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+      EVT CTVT = CTPOP.getValueType();
+      SDValue CTOp = CTPOP.getOperand(0);
+
+      // (ctpop x) u< 2 -> (x & x-1) == 0
+      // (ctpop x) u> 1 -> (x & x-1) != 0
+      if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+        SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+                                  DAG.getConstant(1, CTVT));
+        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+        ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+      }
+
+      // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+    }
+
     // If the LHS is '(and load, const)', the RHS is 0,
     // the test is for equality or unsigned, and all 1 bits of the const are
     // in the same partial word, see if we can shorten the load.
@@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (!Lod->isVolatile() && Lod->isUnindexed()) {
         unsigned origWidth = N0.getValueType().getSizeInBits();
         unsigned maskWidth = origWidth;
-        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
         // 8 bits, but have to be careful...
         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
           origWidth = Lod->getMemoryVT().getSizeInBits();
@@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               DAG.getConstant(bestOffset, PtrType));
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
-                                        Lod->getSrcValue(), 
-                                        Lod->getSrcValueOffset() + bestOffset,
+                                Lod->getPointerInfo().getWithOffset(bestOffset),
                                         false, false, NewAlign);
-          return DAG.getSetCC(dl, VT, 
+          return DAG.getSetCC(dl, VT,
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
                                                       newVT)),
@@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
             (isOperationLegal(ISD::SETCC, newVT) &&
               getCondCodeAction(Cond, newVT)==Legal))
           return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                              DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+                              DAG.getConstant(C1.trunc(InSize), newVT),
                               Cond);
         break;
       }
@@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // the sign extension, it is impossible for both sides to be equal.
       if (C1.getMinSignedBits() > ExtSrcTyBits)
         return DAG.getConstant(Cond == ISD::SETNE, VT);
-      
+
       SDValue ZextOp;
       EVT Op0Ty = N0.getOperand(0).getValueType();
       if (Op0Ty == ExtSrcTy) {
@@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (!DCI.isCalledByLegalizer())
         DCI.AddToWorklist(ZextOp.getNode());
       // Otherwise, make this a use of a zext.
-      return DAG.getSetCC(dl, VT, ZextOp, 
+      return DAG.getSetCC(dl, VT, ZextOp,
                           DAG.getConstant(C1 & APInt::getLowBitsSet(
                                                               ExtDstTyBits,
-                                                              ExtSrcTyBits), 
+                                                              ExtSrcTyBits),
                                           ExtDstTy),
                           Cond);
     } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
@@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
         if (TrueWhenTrue)
-          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);        
+          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
         // Invert the condition.
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-        CC = ISD::getSetCCInverse(CC, 
+        CC = ISD::getSetCCInverse(CC,
                                   N0.getOperand(0).getValueType().isInteger());
         return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
 
       if ((N0.getOpcode() == ISD::XOR ||
-           (N0.getOpcode() == ISD::AND && 
+           (N0.getOpcode() == ISD::AND &&
             N0.getOperand(0).getOpcode() == ISD::XOR &&
             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
           isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (N0.getOpcode() == ISD::XOR)
             Val = N0.getOperand(0);
           else {
-            assert(N0.getOpcode() == ISD::AND && 
+            assert(N0.getOpcode() == ISD::AND &&
                     N0.getOperand(0).getOpcode() == ISD::XOR);
             // ((X^1)&1)^1 -> X & 1
             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
@@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       }
     }
-    
+
     APInt MinVal, MaxVal;
     unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
     if (ISD::isSignedIntSetCC(Cond)) {
@@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
       if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
       // X >= C0 --> X > (C0-1)
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(C1-1, N1.getValueType()),
                           (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
     }
@@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
       if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
       // X <= C0 --> X < (C0+1)
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(C1+1, N1.getValueType()),
                           (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
     }
@@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // If we have setult X, 1, turn it into seteq X, 0
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
-      return DAG.getSetCC(dl, VT, N0, 
-                          DAG.getConstant(MinVal, N0.getValueType()), 
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(MinVal, N0.getValueType()),
                           ISD::SETEQ);
     // If we have setugt X, Max-1, turn it into seteq X, Max
     else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(MaxVal, N0.getValueType()),
                           ISD::SETEQ);
 
@@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // by changing cc.
 
     // SETUGT X, SINTMAX  -> SETLT X, 0
-    if (Cond == ISD::SETUGT && 
+    if (Cond == ISD::SETUGT &&
         C1 == APInt::getSignedMaxValue(OperandBitSize))
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(0, N1.getValueType()),
                           ISD::SETLT);
 
@@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         return DAG.getUNDEF(VT);
       }
     }
-    
+
     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
     // constant if knowing that the operand is non-nan is enough.  We prefer to
     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
@@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (DAG.isCommutativeBinOp(N0.getOpcode())) {
           // If X op Y == Y op X, try other combinations.
           if (N0.getOperand(0) == N1.getOperand(1))
-            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), 
+            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
                                 Cond);
           if (N0.getOperand(1) == N1.getOperand(0))
-            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), 
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
                                 Cond);
         }
       }
-      
+
       if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
         if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
           // Turn (X+C1) == C2 --> X == C2-C1
@@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                                 LHSR->getAPIntValue(),
                                 N0.getValueType()), Cond);
           }
-          
+
           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
           if (N0.getOpcode() == ISD::XOR)
             // If we know that all of the inverted bits are zero, don't bother
@@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                              N0.getValueType()),
                              Cond);
         }
-        
+
         // Turn (C1-X) == C2 --> X == C1-C2
         if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
@@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                            N0.getValueType()),
                            Cond);
           }
-        }          
+        }
       }
 
       // Simplify (X+Z) == X -->  Z == 0
@@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
           // (Z-X) == X  --> Z == X<<1
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
-                                     N1, 
+                                     N1,
                                      DAG.getConstant(1, getShiftAmountTy()));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
@@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         } else if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
-          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, 
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
                                      DAG.getConstant(1, getShiftAmountTy()));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
@@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
                                     int64_t &Offset) const {
   if (isa<GlobalAddressSDNode>(N)) {
     GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
       }
     }
   }
+  
   return false;
 }
 
@@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
       return C_Memory;
     case 'i':    // Simple Integer or Relocatable Constant
     case 'n':    // Simple Integer
+    case 'E':    // Floating Point Constant
+    case 'F':    // Floating Point Constant
     case 's':    // Relocatable Constant
+    case 'p':    // Address.
     case 'X':    // Allow ANY value.
     case 'I':    // Target registers.
     case 'J':
@@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
     case 'N':
     case 'O':
     case 'P':
+    case '<':
+    case '>':
       return C_Other;
     }
   }
-  
-  if (Constraint.size() > 1 && Constraint[0] == '{' && 
+
+  if (Constraint.size() > 1 && Constraint[0] == '{' &&
       Constraint[Constraint.size()-1] == '}')
     return C_Register;
   return C_Unknown;
@@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     // is possible and fine if either GV or C are missing.
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
     GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-    
+
     // If we have "(add GV, C)", pull out GV/C
     if (Op.getOpcode() == ISD::ADD) {
       C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       if (C == 0 || GA == 0)
         C = 0, GA = 0;
     }
-    
+
     // If we find a valid operand, map to the TargetXXX version so that the
     // value itself doesn't get selected.
     if (GA) {   // Either &GV   or   &GV+C
       if (ConstraintLetter != 'n') {
         int64_t Offs = GA->getOffset();
         if (C) Offs += C->getZExtValue();
-        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), 
+        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
                                                  C ? C->getDebugLoc() : DebugLoc(),
                                                  Op.getValueType(), Offs));
         return;
@@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
   for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
        E = RI->regclass_end(); RCI != E; ++RCI) {
     const TargetRegisterClass *RC = *RCI;
-    
-    // If none of the value types for this register class are valid, we 
+
+    // If none of the value types for this register class are valid, we
     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
     bool isLegal = false;
     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
         break;
       }
     }
-    
+
     if (!isLegal) continue;
-    
-    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
+
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
          I != E; ++I) {
       if (RegName.equals_lower(RI->getName(*I)))
         return std::make_pair(*I, RC);
     }
   }
-  
+
   return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
 }
 
@@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 }
 
 
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+    ImmutableCallSite CS) const {
+  /// ConstraintOperands - Information about all of the constraints.
+  AsmOperandInfoVector ConstraintOperands;
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+  unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  InlineAsm::ConstraintInfoVector
+    ConstraintInfos = IA->ParseConstraints();
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+    AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    // Update multiple alternative constraint count.
+    if (OpInfo.multipleAlternatives.size() > maCount)
+      maCount = OpInfo.multipleAlternatives.size();
+
+    OpInfo.ConstraintVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpInfo.ConstraintVT = getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    if (OpInfo.CallOperandVal) {
+      const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+      if (OpInfo.isIndirect) {
+        const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+        if (!PtrTy)
+          report_fatal_error("Indirect operand for inline asm not a pointer!");
+        OpTy = PtrTy->getElementType();
+      }
+      // If OpTy is not a single value, it may be a struct/union that we
+      // can tile with integers.
+      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        switch (BitSize) {
+        default: break;
+        case 1:
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+        case 128:
+          OpInfo.ConstraintVT =
+              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+          break;
+        }
+      } else if (dyn_cast<PointerType>(OpTy)) {
+        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+      } else {
+        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+      }
+    }
+  }
+
+  // If we have multiple alternative constraints, select the best alternative.
+  if (ConstraintInfos.size()) {
+    if (maCount) {
+      unsigned bestMAIndex = 0;
+      int bestWeight = -1;
+      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
+      int weight = -1;
+      unsigned maIndex;
+      // Compute the sums of the weights for each alternative, keeping track
+      // of the best (highest weight) one so far.
+      for (maIndex = 0; maIndex < maCount; ++maIndex) {
+        int weightSum = 0;
+        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+            cIndex != eIndex; ++cIndex) {
+          AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+          if (OpInfo.Type == InlineAsm::isClobber)
+            continue;
+
+          // If this is an output operand with a matching input operand,
+          // look up the matching input. If their types mismatch, e.g. one
+          // is an integer, the other is floating point, or their sizes are
+          // different, flag it as an maCantMatch.
+          if (OpInfo.hasMatchingInput()) {
+            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+            if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+              if ((OpInfo.ConstraintVT.isInteger() !=
+                   Input.ConstraintVT.isInteger()) ||
+                  (OpInfo.ConstraintVT.getSizeInBits() !=
+                   Input.ConstraintVT.getSizeInBits())) {
+                weightSum = -1;  // Can't match.
+                break;
+              }
+            }
+          }
+          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+          if (weight == -1) {
+            weightSum = -1;
+            break;
+          }
+          weightSum += weight;
+        }
+        // Update best.
+        if (weightSum > bestWeight) {
+          bestWeight = weightSum;
+          bestMAIndex = maIndex;
+        }
+      }
+
+      // Now select chosen alternative in each constraint.
+      for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+          cIndex != eIndex; ++cIndex) {
+        AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+        if (cInfo.Type == InlineAsm::isClobber)
+          continue;
+        cInfo.selectAlternative(bestMAIndex);
+      }
+    }
+  }
+
+  // Check and hook up tied operands, choose constraint code to use.
+  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+      cIndex != eIndex; ++cIndex) {
+    AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (OpInfo.ConstraintVT.getSizeInBits() !=
+             Input.ConstraintVT.getSizeInBits())) {
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
+        }
+      }
+
+    }
+  }
+
+  return ConstraintOperands;
+}
+
+
 /// getConstraintGenerality - Return an integer indicating how general CT
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
@@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   }
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getMultipleConstraintMatchWeight(
+    AsmOperandInfo &info, int maIndex) const {
+  InlineAsm::ConstraintCodeVector *rCodes;
+  if (maIndex >= (int)info.multipleAlternatives.size())
+    rCodes = &info.Codes;
+  else
+    rCodes = &info.multipleAlternatives[maIndex].Codes;
+  ConstraintWeight BestWeight = CW_Invalid;
+
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+    ConstraintWeight weight =
+      getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+    if (weight > BestWeight)
+      BestWeight = weight;
+  }
+
+  return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+    case 'i': // immediate integer.
+    case 'n': // immediate integer with a known value.
+      if (isa<ConstantInt>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 's': // non-explicit intregal immediate.
+      if (isa<GlobalValue>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 'E': // immediate float if host format.
+    case 'F': // immediate float.
+      if (isa<ConstantFP>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case '<': // memory operand with autodecrement.
+    case '>': // memory operand with autoincrement.
+    case 'm': // memory operand.
+    case 'o': // offsettable memory operand
+    case 'V': // non-offsettable memory operand
+      weight = CW_Memory;
+      break;
+    case 'r': // general register.
+    case 'g': // general register, memory operand or immediate integer.
+              // note: Clang converts "g" to "imr".
+      if (CallOperandVal->getType()->isIntegerTy())
+        weight = CW_Register;
+      break;
+    case 'X': // any operand.
+    default:
+      weight = CW_Default;
+      break;
+  }
+  return weight;
+}
+
 /// ChooseConstraint - If there are multiple different constraints that we
 /// could pick for this operand (e.g. "imr") try to pick the 'best' one.
 /// This is somewhat tricky: constraints fall into four classes:
@@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
         break;
       }
     }
-    
+
     // Things with matching constraints can only be registers, per gcc
     // documentation.  This mainly affects "g" constraints.
     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
       continue;
-    
+
     // This constraint letter is more general than the previous one, use it.
     int Generality = getConstraintGenerality(CType);
     if (Generality > BestGenerality) {
@@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
       BestGenerality = Generality;
     }
   }
-  
+
   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
   OpInfo.ConstraintType = BestType;
 }
@@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
 /// type to use for the specific AsmOperandInfo, setting
 /// OpInfo.ConstraintCode and OpInfo.ConstraintType.
 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
-                                            SDValue Op, 
+                                            SDValue Op,
                                             SelectionDAG *DAG) const {
   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
-  
+
   // Single-letter constraints ('r') are very common.
   if (OpInfo.Codes.size() == 1) {
     OpInfo.ConstraintCode = OpInfo.Codes[0];
@@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
   } else {
     ChooseConstraint(OpInfo, *this, Op, DAG);
   }
-  
+
   // 'X' matches anything.
   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
     // Labels and constants are handled elsewhere ('X' is the only thing
@@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
       OpInfo.CallOperandVal = v;
       return;
     }
-    
+
     // Otherwise, try to resolve it to something we know about by looking at
     // the actual operand type.
     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                            const Type *Ty) const {
   // The default implementation of this implements a conservative RISCy, r+r and
   // r+i addr mode.
@@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   // Allows a sign-extended 16-bit immediate field.
   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
     return false;
-  
+
   // No global is ever allowed as a base.
   if (AM.BaseGV)
     return false;
-  
-  // Only support r+r, 
+
+  // Only support r+r,
   switch (AM.Scale) {
   case 0:  // "r+i" or just "i", depending on HasBaseReg.
     break;
@@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
     // Allow 2*r as r+r.
     break;
   }
-  
+
   return true;
 }
 
@@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
                                   std::vector<SDNode*>* Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
-  
+
   // Check to see if we can do this.
   // FIXME: We should be more aggressive here.
   if (!isTypeLegal(VT))
     return SDValue();
-  
+
   APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
   APInt::ms magics = d.magic();
-  
+
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
   SDValue Q;
@@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   else
     return SDValue();       // No mulhs or equvialent
   // If d > 0 and m < 0, add the numerator
-  if (d.isStrictlyPositive() && magics.m.isNegative()) { 
+  if (d.isStrictlyPositive() && magics.m.isNegative()) {
     Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
     if (Created)
       Created->push_back(Q.getNode());
@@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   }
   // Shift right algebraic if shift value is nonzero
   if (magics.s > 0) {
-    Q = DAG.getNode(ISD::SRA, dl, VT, Q, 
+    Q = DAG.getNode(ISD::SRA, dl, VT, Q,
                     DAG.getConstant(magics.s, getShiftAmountTy()));
     if (Created)
       Created->push_back(Q.getNode());
@@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   if (magics.a == 0) {
     assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
            "We shouldn't generate an undefined shift!");
-    return DAG.getNode(ISD::SRL, dl, VT, Q, 
+    return DAG.getNode(ISD::SRL, dl, VT, Q,
                        DAG.getConstant(magics.s, getShiftAmountTy()));
   } else {
     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
     if (Created)
       Created->push_back(NPQ.getNode());
-    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, 
+    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
                       DAG.getConstant(1, getShiftAmountTy()));
     if (Created)
       Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     if (Created)
       Created->push_back(NPQ.getNode());
-    return DAG.getNode(ISD::SRL, dl, VT, NPQ, 
+    return DAG.getNode(ISD::SRL, dl, VT, NPQ,
                        DAG.getConstant(magics.s-1, getShiftAmountTy()));
   }
 }
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index aeaa38b56433..7b5bca495206 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -226,7 +226,7 @@ bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
   // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
   CSRegSet prevAnticIn = AnticIn[MBB];
   AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
-  if (prevAnticIn |= AnticIn[MBB])
+  if (prevAnticIn != AnticIn[MBB])
     changed = true;
   return changed;
 }
@@ -264,7 +264,7 @@ bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
   // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
   CSRegSet prevAvailOut = AvailOut[MBB];
   AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
-  if (prevAvailOut |= AvailOut[MBB])
+  if (prevAvailOut != AvailOut[MBB])
     changed = true;
   return changed;
 }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b29ea19835bc..2843c1a5b6d8 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "regcoalescing"
 #include "SimpleRegisterCoalescing.h"
 #include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -64,9 +65,25 @@ DisablePhysicalJoin("disable-physical-join",
                cl::desc("Avoid coalescing physical register copies"),
                cl::init(false), cl::Hidden);
 
-INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer,
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+         cl::desc("Verify machine instrs before and after register coalescing"),
+         cl::Hidden);
+
+INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
                 "simple-register-coalescing", "Simple Register Coalescing", 
-                false, false, true);
+                false, false, true)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
+                "simple-register-coalescing", "Simple Register Coalescing", 
+                false, false, true)
 
 char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
 
@@ -75,14 +92,14 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreservedID(MachineDominatorsID);
-  if (StrongPHIElim)
-    AU.addPreservedID(StrongPHIEliminationID);
-  else
-    AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(StrongPHIEliminationID);
+  AU.addPreservedID(PHIEliminationID);
   AU.addPreservedID(TwoAddressInstructionPassID);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -124,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // Get the location that B is defined at.  Two options: either this value has
   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->getCopy()) return false;
+  if (!BValNo->isDefByCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
@@ -218,7 +235,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
         continue;
       LiveInterval &SRLI = li_->getInterval(*SR);
       SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                              SRLI.getNextValue(FillerStart, 0, true,
+                              SRLI.getNextValue(FillerStart, 0,
                                                 li_->getVNInfoAllocator())));
     }
   }
@@ -266,9 +283,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
     for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
       if (BI->valno == BValNo)
         continue;
-      // When BValNo is null, we're looking for a dummy clobber-value for a subreg.
-      if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy())
-        continue;
       if (BI->start <= AI->start && BI->end > AI->start)
         return true;
       if (BI->start > AI->start && BI->start < AI->end)
@@ -278,16 +292,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
   return false;
 }
 
-static void
-TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
-  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
-       i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
-      NewMI->addOperand(MO);
-  }
-}
-
 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
 /// IntA being the source and IntB being the dest, thus this defines a value
 /// number in IntB.  If the source value number (in IntA) is defined by a
@@ -324,8 +328,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   if (!li_->hasInterval(CP.getDstReg()))
     return false;
 
-  SlotIndex CopyIdx =
-    li_->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
 
   LiveInterval &IntA =
     li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -334,27 +337,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
 
   // BValNo is a value number in B that is defined by a copy from A. 'B3' in
   // the example above.
-  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
-  if (BLR == IntB.end()) return false;
-  VNInfo *BValNo = BLR->valno;
+  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+  if (!BValNo || !BValNo->isDefByCopy())
+    return false;
 
-  // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
-  // can't process it.
-  if (!BValNo->getCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveInterval::iterator ALR =
-    IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); // 
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  assert(AValNo && "COPY source not live");
 
-  assert(ALR != IntA.end() && "Live range not found!");
-  VNInfo *AValNo = ALR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
-  // tested?
-  if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
-      AValNo->isUnused() || AValNo->hasPHIKill())
+  // the optimization.
+  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
@@ -411,7 +406,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
       return false;
   }
 
-  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI);
+  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+               << *DefMI);
 
   // At this point we have decided that it is legal to do this
   // transformation.  Start by commuting the instruction.
@@ -427,10 +423,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
   NewMI->getOperand(OpIdx).setIsKill();
 
-  bool BHasPHIKill = BValNo->hasPHIKill();
-  SmallVector<VNInfo*, 4> BDeadValNos;
-  std::map<SlotIndex, SlotIndex> BExtend;
-
   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
   // A = or A, B
   // ...
@@ -439,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // C = A<kill>
   // ...
   //   = B
-  bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
-  if (Extended)
-    BExtend[ALR->end] = BLR->end;
 
   // Update uses of IntA of the specific Val# with IntB.
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
@@ -467,52 +456,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
       UseMO.setReg(NewReg);
     if (UseMI == CopyMI)
       continue;
-    if (UseMO.isKill()) {
-      if (Extended)
-        UseMO.setIsKill(false);
-    }
     if (!UseMI->isCopy())
       continue;
     if (UseMI->getOperand(0).getReg() != IntB.reg ||
         UseMI->getOperand(0).getSubReg())
       continue;
 
-    // This copy will become a noop. If it's defining a new val#,
-    // remove that val# as well. However this live range is being
-    // extended to the end of the existing live range defined by the copy.
+    // This copy will become a noop. If it's defining a new val#, merge it into
+    // BValNo.
     SlotIndex DefIdx = UseIdx.getDefIndex();
-    const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
-    if (!DLR)
+    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+    if (!DVNI)
       continue;
-    BHasPHIKill |= DLR->valno->hasPHIKill();
-    assert(DLR->valno->def == DefIdx);
-    BDeadValNos.push_back(DLR->valno);
-    BExtend[DLR->start] = DLR->end;
+    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+    assert(DVNI->def == DefIdx);
+    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
     JoinedCopies.insert(UseMI);
   }
 
-  // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
-  // simply extend BLR if CopyMI doesn't end the range.
-  DEBUG({
-      dbgs() << "Extending: ";
-      IntB.print(dbgs(), tri_);
-    });
-
-  // Remove val#'s defined by copies that will be coalesced away.
-  for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
-    VNInfo *DeadVNI = BDeadValNos[i];
-    if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-      for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) {
-        if (!li_->hasInterval(*AS))
-          continue;
-        LiveInterval &ASLI = li_->getInterval(*AS);
-        if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def))
-          ASLI.removeValNo(ASLR->valno);
-      }
-    }
-    IntB.removeValNo(BDeadValNos[i]);
-  }
-
   // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
   // is updated.
   VNInfo *ValNo = BValNo;
@@ -521,30 +482,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
-    SlotIndex End = AI->end;
-    std::map<SlotIndex, SlotIndex>::iterator
-      EI = BExtend.find(End);
-    if (EI != BExtend.end())
-      End = EI->second;
-    IntB.addRange(LiveRange(AI->start, End, ValNo));
+    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
   }
-  ValNo->setHasPHIKill(BHasPHIKill);
-
-  DEBUG({
-      dbgs() << "   result = ";
-      IntB.print(dbgs(), tri_);
-      dbgs() << "\nShortening: ";
-      IntA.print(dbgs(), tri_);
-    });
+  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
 
   IntA.removeValNo(AValNo);
-
-  DEBUG({
-      dbgs() << "   result = ";
-      IntA.print(dbgs(), tri_);
-      dbgs() << '\n';
-    });
-
+  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
   ++numCommutes;
   return true;
 }
@@ -644,6 +587,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
 /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
 /// computation, replace the copy by rematerialize the definition.
 bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+                                                       bool preserveSrcInt,
                                                        unsigned DstReg,
                                                        unsigned DstSubIdx,
                                                        MachineInstr *CopyMI) {
@@ -652,12 +596,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
-  // tested?
-  if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
-      ValNo->isUnused() || ValNo->hasPHIKill())
+  // the optimization.
+  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+  if (!DefMI)
+    return false;
   assert(DefMI && "Defining instruction disappeared");
   const TargetInstrDesc &TID = DefMI->getDesc();
   if (!TID.isAsCheapAsAMove())
@@ -681,8 +625,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       return false;
   }
 
-  // If destination register has a sub-register index on it, make sure it mtches
-  // the instruction register class.
+  // If destination register has a sub-register index on it, make sure it
+  // matches the instruction register class.
   if (DstSubIdx) {
     const TargetInstrDesc &TID = DefMI->getDesc();
     if (TID.getNumDefs() != 1)
@@ -699,30 +643,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
 
   RemoveCopyFlag(DstReg, CopyMI);
 
-  // If copy kills the source register, find the last use and propagate
-  // kill.
-  bool checkForDeadDef = false;
   MachineBasicBlock *MBB = CopyMI->getParent();
-  if (SrcLR->end == CopyIdx.getDefIndex())
-    if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
-      checkForDeadDef = true;
-    }
-
   MachineBasicBlock::iterator MII =
     llvm::next(MachineBasicBlock::iterator(CopyMI));
   tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
   MachineInstr *NewMI = prior(MII);
 
-  if (checkForDeadDef) {
-    // PR4090 fix: Trim interval failed because there was no use of the
-    // source interval in this MBB. If the def is in this MBB too then we
-    // should mark it dead:
-    if (DefMI->getParent() == MBB) {
-      DefMI->addRegisterDead(SrcInt.reg, tri_);
-      SrcLR->end = SrcLR->start.getNextSlot();
-    }
-  }
-
   // CopyMI may have implicit operands, transfer them over to the newly
   // rematerialized instruction. And update implicit def interval valnos.
   for (unsigned i = CopyMI->getDesc().getNumOperands(),
@@ -734,13 +660,18 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       RemoveCopyFlag(MO.getReg(), CopyMI);
   }
 
-  TransferImplicitOps(CopyMI, NewMI);
+  NewMI->copyImplicitOps(CopyMI);
   li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
   DEBUG(dbgs() << "Remat: " << *NewMI);
   ++NumReMats;
+
+  // The source interval can become smaller because we removed a use.
+  if (preserveSrcInt)
+    li_->shrinkToUses(&SrcInt);
+
   return true;
 }
 
@@ -756,6 +687,9 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
   unsigned DstReg = CP.getDstReg();
   unsigned SubIdx = CP.getSubIdx();
 
+  // Update LiveDebugVariables.
+  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
        MachineInstr *UseMI = I.skipInstruction();) {
     // A PhysReg copy that won't be coalesced can perhaps be rematerialized
@@ -768,7 +702,7 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
           UseMI->getOperand(0).getReg() != SrcReg &&
           UseMI->getOperand(0).getReg() != DstReg &&
           !JoinedCopies.count(UseMI) &&
-          ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
                                   UseMI->getOperand(0).getReg(), 0, UseMI))
         continue;
     }
@@ -874,7 +808,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
   if (li_->hasInterval(DstReg)) {
     LiveInterval &LI = li_->getInterval(DstReg);
     if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->getCopy() == CopyMI)
+      if (LR->valno->def == DefIdx)
         LR->valno->setCopy(0);
   }
   if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
@@ -884,7 +818,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
       continue;
     LiveInterval &LI = li_->getInterval(*AS);
     if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->getCopy() == CopyMI)
+      if (LR->valno->def == DefIdx)
         LR->valno->setCopy(0);
   }
 }
@@ -1044,23 +978,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;
   }
 
-  DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_));
 
   // Enforce policies.
   if (CP.isPhys()) {
-    DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
+    DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n");
     // Only coalesce to allocatable physreg.
     if (!li_->isAllocatable(CP.getDstReg())) {
       DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
       return false;  // Not coalescable.
     }
   } else {
-    DEBUG({
-      dbgs() << " with reg%" << CP.getDstReg();
-      if (CP.getSubIdx())
-        dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx());
-      dbgs() << " to " << CP.getNewRC()->getName() << "\n";
-    });
+    DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+                 << " to " << CP.getNewRC()->getName() << "\n");
 
     // Avoid constraining virtual register regclass too much.
     if (CP.isCrossClass()) {
@@ -1114,7 +1044,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       // Before giving up coalescing, if definition of source is defined by
       // trivial computation, try rematerializing it.
       if (!CP.isFlipped() &&
-          ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+          ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
         return true;
 
       ++numAborts;
@@ -1134,7 +1064,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // If definition of source is defined by trivial computation, try
     // rematerializing it.
     if (!CP.isFlipped() &&
-        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
                                 CP.getDstReg(), 0, CopyMI))
       return true;
 
@@ -1317,7 +1247,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
       continue;
 
     // Never join with a register that has EarlyClobber redefs.
@@ -1341,7 +1271,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
       continue;
 
     // Never join with a register that has EarlyClobber redefs.
@@ -1495,9 +1425,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
   DEBUG(dbgs() << MBB->getName() << ":\n");
 
-  std::vector<CopyRec> VirtCopies;
-  std::vector<CopyRec> PhysCopies;
-  std::vector<CopyRec> ImpDefCopies;
+  SmallVector<CopyRec, 8> VirtCopies;
+  SmallVector<CopyRec, 8> PhysCopies;
+  SmallVector<CopyRec, 8> ImpDefCopies;
   for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
        MII != E;) {
     MachineInstr *Inst = MII++;
@@ -1690,6 +1620,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   tri_ = tm_->getRegisterInfo();
   tii_ = tm_->getInstrInfo();
   li_ = &getAnalysis<LiveIntervals>();
+  ldv_ = &getAnalysis<LiveDebugVariables>();
   AA = &getAnalysis<AliasAnalysis>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
@@ -1697,6 +1628,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
+  if (VerifyCoalescing)
+    mf_->verify(this, "Before register coalescing");
+
   for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
          E = tri_->regclass_end(); I != E; ++I)
     allocatableRCRegs_.insert(std::make_pair(*I,
@@ -1739,9 +1673,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
           DoDelete = false;
         
         if (MI->allDefsAreDead()) {
-          LiveInterval &li = li_->getInterval(SrcReg);
-          if (!ShortenDeadCopySrcLiveRange(li, MI))
-            ShortenDeadCopyLiveRange(li, MI);
+          if (li_->hasInterval(SrcReg)) {
+            LiveInterval &li = li_->getInterval(SrcReg);
+            if (!ShortenDeadCopySrcLiveRange(li, MI))
+              ShortenDeadCopyLiveRange(li, MI);
+          }
           DoDelete = true;
         }
         if (!DoDelete) {
@@ -1821,13 +1757,26 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         if (!MO.isReg() || !MO.isKill()) continue;
         unsigned reg = MO.getReg();
         if (!reg || !li_->hasInterval(reg)) continue;
-        if (!li_->getInterval(reg).killedAt(DefIdx))
+        if (!li_->getInterval(reg).killedAt(DefIdx)) {
           MO.setIsKill(false);
+          continue;
+        }
+        // When leaving a kill flag on a physreg, check if any subregs should
+        // remain alive.
+        if (!TargetRegisterInfo::isPhysicalRegister(reg))
+          continue;
+        for (const unsigned *SR = tri_->getSubRegisters(reg);
+             unsigned S = *SR; ++SR)
+          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+            MI->addRegisterDefined(S, tri_);
       }
     }
   }
 
   DEBUG(dump());
+  DEBUG(ldv_->dump());
+  if (VerifyCoalescing)
+    mf_->verify(this, "After register coalescing");
   return true;
 }
 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 855bdb98b36c..56703dfa2ddd 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -21,7 +21,7 @@
 
 namespace llvm {
   class SimpleRegisterCoalescing;
-  class LiveVariables;
+  class LiveDebugVariables;
   class TargetRegisterInfo;
   class TargetInstrInfo;
   class VirtRegMap;
@@ -44,6 +44,7 @@ namespace llvm {
     const TargetRegisterInfo* tri_;
     const TargetInstrInfo* tii_;
     LiveIntervals *li_;
+    LiveDebugVariables *ldv_;
     const MachineLoopInfo* loopInfo;
     AliasAnalysis *AA;
     
@@ -63,7 +64,9 @@ namespace llvm {
 
   public:
     static char ID; // Pass identifcation, replacement for typeid
-    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {}
+    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
+      initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
+    }
 
     struct InstrSlots {
       enum {
@@ -140,8 +143,10 @@ namespace llvm {
 
     /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
     /// computation, replace the copy by rematerialize the definition.
-    bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
-                                 unsigned DstSubIdx, MachineInstr *CopyMI);
+    /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+    bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+                                 unsigned DstReg, unsigned DstSubIdx,
+                                 MachineInstr *CopyMI);
 
     /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
     /// two virtual registers from different register classes.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index b637980f885c..13e1454fa5f3 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -21,15 +21,14 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
 using namespace llvm;
 
 STATISTIC(NumInvokes, "Number of invokes replaced");
@@ -53,6 +52,7 @@ namespace {
     Constant *SelectorFn;
     Constant *ExceptionFn;
     Constant *CallSiteFn;
+    Constant *DispatchSetupFn;
 
     Value *CallSite;
   public:
@@ -116,6 +116,8 @@ bool SjLjEHPass::doInitialization(Module &M) {
   SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
   ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
   CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+  DispatchSetupFn
+    = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
   PersonalityFn = 0;
 
   return true;
@@ -317,8 +319,12 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
       Unwinds.push_back(UI);
     }
   }
-  // If we don't have any invokes or unwinds, there's nothing to do.
-  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // If we don't have any invokes, there's nothing to do.
+  if (Invokes.empty()) return false;
 
   // Find the eh.selector.*, eh.exception and alloca calls.
   //
@@ -332,6 +338,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   SmallVector<CallInst*,16> EH_Selectors;
   SmallVector<CallInst*,16> EH_Exceptions;
   SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
   // Note: Skip the entry block since there's nothing there that interests
   // us. eh.selector and eh.exception shouldn't ever be there, and we
   // want to disregard any allocas that are there.
@@ -351,228 +358,231 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
       }
     }
   }
+
   // If we don't have any eh.selector calls, we can't determine the personality
   // function. Without a personality function, we can't process exceptions.
   if (!PersonalityFn) return false;
 
-  NumInvokes += Invokes.size();
-  NumUnwinds += Unwinds.size();
+  // We have invokes, so we need to add register/unregister calls to get this
+  // function onto the global unwind stack.
+  //
+  // First thing we need to do is scan the whole function for values that are
+  // live across unwind edges.  Each value that is live across an unwind edge we
+  // spill into a stack location, guaranteeing that there is nothing live across
+  // the unwind edge.  This process also splits all critical edges coming out of
+  // invoke's.
+  splitLiveRangesAcrossInvokes(Invokes);
+
+  BasicBlock *EntryBB = F.begin();
+  // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+  // that needs to be restored on all exits from the function.  This is an
+  // alloca because the value needs to be added to the global context list.
+  unsigned Align = 4; // FIXME: Should be a TLI check?
+  AllocaInst *FunctionContext =
+    new AllocaInst(FunctionContextTy, 0, Align,
+                   "fcn_context", F.begin()->begin());
+
+  Value *Idxs[2];
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  Value *Zero = ConstantInt::get(Int32Ty, 0);
+  // We need to also keep around a reference to the call_site field
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                       "call_site",
+                                       EntryBB->getTerminator());
+
+  // The exception selector comes back in context->data[1]
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                            "fc_data",
+                                            EntryBB->getTerminator());
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                  "exc_selector_gep",
+                                                  EntryBB->getTerminator());
+  // The exception value comes back in context->data[0]
+  Idxs[1] = Zero;
+  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                   "exception_gep",
+                                                   EntryBB->getTerminator());
+
+  // The result of the eh.selector call will be replaced with a a reference to
+  // the selector value returned in the function context. We leave the selector
+  // itself so the EH analysis later can use it.
+  for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+    CallInst *I = EH_Selectors[i];
+    Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+    I->replaceAllUsesWith(SelectorVal);
+  }
 
-  if (!Invokes.empty()) {
-    // We have invokes, so we need to add register/unregister calls to get
-    // this function onto the global unwind stack.
-    //
-    // First thing we need to do is scan the whole function for values that are
-    // live across unwind edges.  Each value that is live across an unwind edge
-    // we spill into a stack location, guaranteeing that there is nothing live
-    // across the unwind edge.  This process also splits all critical edges
-    // coming out of invoke's.
-    splitLiveRangesAcrossInvokes(Invokes);
-
-    BasicBlock *EntryBB = F.begin();
-    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
-    // that needs to be restored on all exits from the function.  This is an
-    // alloca because the value needs to be added to the global context list.
-    unsigned Align = 4; // FIXME: Should be a TLI check?
-    AllocaInst *FunctionContext =
-      new AllocaInst(FunctionContextTy, 0, Align,
-                     "fcn_context", F.begin()->begin());
-
-    Value *Idxs[2];
-    const Type *Int32Ty = Type::getInt32Ty(F.getContext());
-    Value *Zero = ConstantInt::get(Int32Ty, 0);
-    // We need to also keep around a reference to the call_site field
-    Idxs[0] = Zero;
-    Idxs[1] = ConstantInt::get(Int32Ty, 1);
-    CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                         "call_site",
-                                         EntryBB->getTerminator());
-
-    // The exception selector comes back in context->data[1]
-    Idxs[1] = ConstantInt::get(Int32Ty, 2);
-    Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                              "fc_data",
-                                              EntryBB->getTerminator());
-    Idxs[1] = ConstantInt::get(Int32Ty, 1);
-    Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
-                                                    "exc_selector_gep",
-                                                    EntryBB->getTerminator());
-    // The exception value comes back in context->data[0]
-    Idxs[1] = Zero;
-    Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
-                                                     "exception_gep",
-                                                     EntryBB->getTerminator());
-
-    // The result of the eh.selector call will be replaced with a
-    // a reference to the selector value returned in the function
-    // context. We leave the selector itself so the EH analysis later
-    // can use it.
-    for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
-      CallInst *I = EH_Selectors[i];
-      Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
-      I->replaceAllUsesWith(SelectorVal);
-    }
-    // eh.exception calls are replaced with references to the proper
-    // location in the context. Unlike eh.selector, the eh.exception
-    // calls are removed entirely.
-    for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
-      CallInst *I = EH_Exceptions[i];
-      // Possible for there to be duplicates, so check to make sure
-      // the instruction hasn't already been removed.
-      if (!I->getParent()) continue;
-      Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
-      const Type *Ty = Type::getInt8PtrTy(F.getContext());
-      Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
-      I->replaceAllUsesWith(Val);
-      I->eraseFromParent();
-    }
+  // eh.exception calls are replaced with references to the proper location in
+  // the context. Unlike eh.selector, the eh.exception calls are removed
+  // entirely.
+  for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+    CallInst *I = EH_Exceptions[i];
+    // Possible for there to be duplicates, so check to make sure the
+    // instruction hasn't already been removed.
+    if (!I->getParent()) continue;
+    Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+    const Type *Ty = Type::getInt8PtrTy(F.getContext());
+    Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+    I->replaceAllUsesWith(Val);
+    I->eraseFromParent();
+  }
 
-    // The entry block changes to have the eh.sjlj.setjmp, with a conditional
-    // branch to a dispatch block for non-zero returns. If we return normally,
-    // we're not handling an exception and just register the function context
-    // and continue.
-
-    // Create the dispatch block.  The dispatch block is basically a big switch
-    // statement that goes to all of the invoke landing pads.
-    BasicBlock *DispatchBlock =
-            BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
-    // Insert a load in the Catch block, and a switch on its value.  By default,
-    // we go to a block that just does an unwind (which is the correct action
-    // for a standard call).
-    BasicBlock *UnwindBlock =
-      BasicBlock::Create(F.getContext(), "unwindbb", &F);
-    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
-
-    Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
-                                       DispatchBlock);
-    SwitchInst *DispatchSwitch =
-      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
-                         DispatchBlock);
-    // Split the entry block to insert the conditional branch for the setjmp.
-    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
-                                                     "eh.sjlj.setjmp.cont");
-
-    // Populate the Function Context
-    //   1. LSDA address
-    //   2. Personality function address
-    //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
-    // LSDA address
-    Idxs[0] = Zero;
-    Idxs[1] = ConstantInt::get(Int32Ty, 4);
-    Value *LSDAFieldPtr =
-      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                "lsda_gep",
-                                EntryBB->getTerminator());
-    Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
-                                   EntryBB->getTerminator());
-    new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
-    Idxs[1] = ConstantInt::get(Int32Ty, 3);
-    Value *PersonalityFieldPtr =
-      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                "lsda_gep",
+  // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+  // branch to a dispatch block for non-zero returns. If we return normally,
+  // we're not handling an exception and just register the function context and
+  // continue.
+
+  // Create the dispatch block.  The dispatch block is basically a big switch
+  // statement that goes to all of the invoke landing pads.
+  BasicBlock *DispatchBlock =
+    BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+  // Add a call to dispatch_setup at the start of the dispatch block. This is
+  // expanded to any target-specific setup that needs to be done.
+  Value *SetupArg =
+    CastInst::Create(Instruction::BitCast, FunctionContext,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     DispatchBlock);
+  CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock);
+
+  // Insert a load of the callsite in the dispatch block, and a switch on its
+  // value.  By default, we go to a block that just does an unwind (which is the
+  // correct action for a standard call).
+  BasicBlock *UnwindBlock =
+    BasicBlock::Create(F.getContext(), "unwindbb", &F);
+  Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+  Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+                                     DispatchBlock);
+  SwitchInst *DispatchSwitch =
+    SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+                       DispatchBlock);
+  // Split the entry block to insert the conditional branch for the setjmp.
+  BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                   "eh.sjlj.setjmp.cont");
+
+  // Populate the Function Context
+  //   1. LSDA address
+  //   2. Personality function address
+  //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+  // LSDA address
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 4);
+  Value *LSDAFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+                                 EntryBB->getTerminator());
+  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+  Idxs[1] = ConstantInt::get(Int32Ty, 3);
+  Value *PersonalityFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+                EntryBB->getTerminator());
+
+  // Save the frame pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 5);
+  Value *JBufPtr
+    = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "jbuf_gep",
                                 EntryBB->getTerminator());
-    new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
-                  EntryBB->getTerminator());
-
-    // Save the frame pointer.
-    Idxs[1] = ConstantInt::get(Int32Ty, 5);
-    Value *JBufPtr
-      = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                  "jbuf_gep",
-                                  EntryBB->getTerminator());
-    Idxs[1] = ConstantInt::get(Int32Ty, 0);
-    Value *FramePtr =
-      GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+  Idxs[1] = ConstantInt::get(Int32Ty, 0);
+  Value *FramePtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+                              EntryBB->getTerminator());
+
+  Value *Val = CallInst::Create(FrameAddrFn,
+                                ConstantInt::get(Int32Ty, 0),
+                                "fp",
                                 EntryBB->getTerminator());
+  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+  // Save the stack pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *StackPtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+                              EntryBB->getTerminator());
+
+  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+  Value *SetjmpArg =
+    CastInst::Create(Instruction::BitCast, JBufPtr,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     EntryBB->getTerminator());
+  Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+                                        "dispatch",
+                                        EntryBB->getTerminator());
+  // check the return value of the setjmp. non-zero goes to dispatcher.
+  Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                 ICmpInst::ICMP_EQ, DispatchVal, Zero,
+                                 "notunwind");
+  // Nuke the uncond branch.
+  EntryBB->getTerminator()->eraseFromParent();
+
+  // Put in a new condbranch in its place.
+  BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+  // Register the function context and make sure it's known to not throw
+  CallInst *Register =
+    CallInst::Create(RegisterFn, FunctionContext, "",
+                     ContBlock->getTerminator());
+  Register->setDoesNotThrow();
+
+  // At this point, we are all set up, update the invoke instructions to mark
+  // their call_site values, and fill in the dispatch switch accordingly.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+    markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+  // Mark call instructions that aren't nounwind as no-action (call_site ==
+  // -1). Skip the entry block, as prior to then, no function context has been
+  // created for this function and any unexpected exceptions thrown will go
+  // directly to the caller's context, which is what we want anyway, so no need
+  // to do anything here.
+  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        // Ignore calls to the EH builtins (eh.selector, eh.exception)
+        Constant *Callee = CI->getCalledFunction();
+        if (Callee != SelectorFn && Callee != ExceptionFn
+            && !CI->doesNotThrow())
+          insertCallSiteStore(CI, -1, CallSite);
+      }
+  }
 
-    Value *Val = CallInst::Create(FrameAddrFn,
-                                  ConstantInt::get(Int32Ty, 0),
-                                  "fp",
-                                  EntryBB->getTerminator());
-    new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
-    // Save the stack pointer.
-    Idxs[1] = ConstantInt::get(Int32Ty, 2);
-    Value *StackPtr =
-      GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
-                                EntryBB->getTerminator());
-
-    Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
-    new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
-    // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
-    Value *SetjmpArg =
-      CastInst::Create(Instruction::BitCast, JBufPtr,
-                       Type::getInt8PtrTy(F.getContext()), "",
-                       EntryBB->getTerminator());
-    Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
-                                          "dispatch",
-                                          EntryBB->getTerminator());
-    // check the return value of the setjmp. non-zero goes to dispatcher.
-    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
-                                   ICmpInst::ICMP_EQ, DispatchVal, Zero,
-                                   "notunwind");
-    // Nuke the uncond branch.
-    EntryBB->getTerminator()->eraseFromParent();
-
-    // Put in a new condbranch in its place.
-    BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
-    // Register the function context and make sure it's known to not throw
-    CallInst *Register =
-      CallInst::Create(RegisterFn, FunctionContext, "",
-                       ContBlock->getTerminator());
-    Register->setDoesNotThrow();
-
-    // At this point, we are all set up, update the invoke instructions
-    // to mark their call_site values, and fill in the dispatch switch
-    // accordingly.
-    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-      markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
-    // Mark call instructions that aren't nounwind as no-action
-    // (call_site == -1). Skip the entry block, as prior to then, no function
-    // context has been created for this function and any unexpected exceptions
-    // thrown will go directly to the caller's context, which is what we want
-    // anyway, so no need to do anything here.
-    for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
-      for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
-        if (CallInst *CI = dyn_cast<CallInst>(I)) {
-          // Ignore calls to the EH builtins (eh.selector, eh.exception)
-          Constant *Callee = CI->getCalledFunction();
-          if (Callee != SelectorFn && Callee != ExceptionFn
-              && !CI->doesNotThrow())
-            insertCallSiteStore(CI, -1, CallSite);
-        }
-    }
-
-    // Replace all unwinds with a branch to the unwind handler.
-    // ??? Should this ever happen with sjlj exceptions?
-    for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-      BranchInst::Create(UnwindBlock, Unwinds[i]);
-      Unwinds[i]->eraseFromParent();
-    }
-
-    // Following any allocas not in the entry block, update the saved SP
-    // in the jmpbuf to the new value.
-    for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
-      Instruction *AI = JmpbufUpdatePoints[i];
-      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
-      StackAddr->insertAfter(AI);
-      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
-      StoreStackAddr->insertAfter(StackAddr);
-    }
+  // Replace all unwinds with a branch to the unwind handler.
+  // ??? Should this ever happen with sjlj exceptions?
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(UnwindBlock, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
 
-    // Finally, for any returns from this function, if this function contains an
-    // invoke, add a call to unregister the function context.
-    for (unsigned i = 0, e = Returns.size(); i != e; ++i)
-      CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+  // Following any allocas not in the entry block, update the saved SP in the
+  // jmpbuf to the new value.
+  for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+    Instruction *AI = JmpbufUpdatePoints[i];
+    Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+    StackAddr->insertAfter(AI);
+    Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+    StoreStackAddr->insertAfter(StackAddr);
   }
 
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, add a call to unregister the function context.
+  for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+    CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
   return true;
 }
 
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 1bc148f160bc..6e3fa90e4341 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -41,7 +41,7 @@ namespace {
 
 char SlotIndexes::ID = 0;
 INITIALIZE_PASS(SlotIndexes, "slotindexes",
-                "Slot index numbering", false, false);
+                "Slot index numbering", false, false)
 
 IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
   return &*IndexListEntryEmptyKey;
@@ -61,7 +61,6 @@ void SlotIndexes::releaseMemory() {
   mi2iMap.clear();
   mbb2IdxMap.clear();
   idx2MBBMap.clear();
-  terminatorGaps.clear();
   clearList();
 }
 
@@ -112,13 +111,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       if (mi->isDebugValue())
         continue;
 
-      if (miItr == mbb->getFirstTerminator()) {
-        push_back(createEntry(0, index));
-        terminatorGaps.insert(
-          std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
-        index += SlotIndex::NUM;
-      }
-
       // Insert a store index for the instr.
       push_back(createEntry(mi, index));
 
@@ -135,15 +127,12 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       index += (Slots + 1) * SlotIndex::NUM;
     }
 
-    if (mbb->getFirstTerminator() == mbb->end()) {
-      push_back(createEntry(0, index));
-      terminatorGaps.insert(
-        std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
-      index += SlotIndex::NUM;
-    }
+    // We insert two blank instructions between basic blocks.
+    // One to represent live-out registers and one to represent live-ins.
+    push_back(createEntry(0, index));
+    index += SlotIndex::NUM;
 
-    // One blank instruction at the end.
-    push_back(createEntry(0, index));    
+    push_back(createEntry(0, index));
 
     SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
     mbb2IdxMap.insert(
@@ -169,6 +158,7 @@ void SlotIndexes::renumberIndexes() {
   // resulting numbering will match what would have been generated by the
   // pass during the initial numbering of the function if the new instructions
   // had been present.
+  DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
 
   functionSize = 0;
   unsigned index = 0;
@@ -179,7 +169,7 @@ void SlotIndexes::renumberIndexes() {
     curEntry->setIndex(index);
 
     if (curEntry->getInstr() == 0) {
-      // MBB start entry or terminator gap. Just step index by 1.
+      // MBB start entry. Just step index by 1.
       index += SlotIndex::NUM;
     }
     else {
@@ -214,11 +204,10 @@ void SlotIndexes::dump() const {
 
 // Print a SlotIndex to a raw_ostream.
 void SlotIndex::print(raw_ostream &os) const {
-  os << entry().getIndex();
-  if (isPHI())
-    os << "*";
+  if (isValid())
+    os << entry().getIndex() << "LudS"[getSlot()];
   else
-    os << "LudS"[getSlot()];
+    os << "invalid";
 }
 
 // Dump a SlotIndex to stderr.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..9c0bf1629a14
--- /dev/null
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,330 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+//   E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+                      "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+                    "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<EdgeBundles>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+  /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle.
+  /// Ideally, these two numbers should be identical, but inaccuracies in the
+  /// block frequency estimates means that we need to normalize ingoing and
+  /// outgoing frequencies separately so they are commensurate.
+  float Frequency[2];
+
+  /// Bias - Normalized contributions from non-transparent blocks.
+  /// A bundle connected to a MustSpill block has a huge negative bias,
+  /// otherwise it is a number in the range [-2;2].
+  float Bias;
+
+  /// Value - Output value of this node computed from the Bias and links.
+  /// This is always in the range [-1;1]. A positive number means the variable
+  /// should go in a register through this bundle.
+  float Value;
+
+  typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+  /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+  /// bundles. The weights are all positive and add up to at most 2, weights
+  /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+  /// sum can be less than 2 when the variable is not live into / out of some
+  /// connected basic blocks.
+  LinkVector Links;
+
+  /// preferReg - Return true when this node prefers to be in a register.
+  bool preferReg() const {
+    // Undecided nodes (Value==0) go on the stack.
+    return Value > 0;
+  }
+
+  /// mustSpill - Return True if this node is so biased that it must spill.
+  bool mustSpill() const {
+    // Actually, we must spill if Bias < sum(weights).
+    // It may be worth it to compute the weight sum here?
+    return Bias < -2.0f;
+  }
+
+  /// Node - Create a blank Node.
+  Node() {
+    Frequency[0] = Frequency[1] = 0;
+  }
+
+  /// clear - Reset per-query data, but preserve frequencies that only depend on
+  // the CFG.
+  void clear() {
+    Bias = Value = 0;
+    Links.clear();
+  }
+
+  /// addLink - Add a link to bundle b with weight w.
+  /// out=0 for an ingoing link, and 1 for an outgoing link.
+  void addLink(unsigned b, float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w /= Frequency[out];
+
+    // There can be multiple links to the same bundle, add them up.
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      if (I->second == b) {
+        I->first += w;
+        return;
+      }
+    // This must be the first link to b.
+    Links.push_back(std::make_pair(w, b));
+  }
+
+  /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+  void addBias(float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w /= Frequency[out];
+    Bias += w;
+  }
+
+  /// update - Recompute Value from Bias and Links. Return true when node
+  /// preference changes.
+  bool update(const Node nodes[]) {
+    // Compute the weighted sum of inputs.
+    float Sum = Bias;
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      Sum += I->first * nodes[I->second].Value;
+
+    // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+    // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+    // two reasons:
+    //  1. It avoids arbitrary bias when all links are 0 as is possible during
+    //     initial iterations.
+    //  2. It helps tame rounding errors when the links nominally sum to 0.
+    const float Thres = 1e-4f;
+    bool Before = preferReg();
+    if (Sum < -Thres)
+      Value = -1;
+    else if (Sum > Thres)
+      Value = 1;
+    else
+      Value = 0;
+    return Before != preferReg();
+  }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  bundles = &getAnalysis<EdgeBundles>();
+  loops = &getAnalysis<MachineLoopInfo>();
+
+  assert(!nodes && "Leaking node array");
+  nodes = new Node[bundles->getNumBundles()];
+
+  // Compute total ingoing and outgoing block frequencies for all bundles.
+  for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+    float Freq = getBlockFrequency(I);
+    unsigned Num = I->getNumber();
+    nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq;
+    nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq;
+  }
+
+  // We never change the function.
+  return false;
+}
+
+void SpillPlacement::releaseMemory() {
+  delete[] nodes;
+  nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+  if (ActiveNodes->test(n))
+    return;
+  ActiveNodes->set(n);
+  nodes[n].clear();
+}
+
+
+/// prepareNodes - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::
+prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
+  for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(),
+       E = LiveBlocks.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number);
+    float Freq = getBlockFrequency(MBB);
+
+    // Is this a transparent block? Link ingoing and outgoing bundles.
+    if (I->Entry == DontCare && I->Exit == DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      unsigned ob = bundles->getBundle(I->Number, 1);
+
+      // Ignore self-loops.
+      if (ib == ob)
+        continue;
+      activate(ib);
+      activate(ob);
+      nodes[ib].addLink(ob, Freq, 1);
+      nodes[ob].addLink(ib, Freq, 0);
+      continue;
+    }
+
+    // This block is not transparent, but it can still add bias.
+    const float Bias[] = {
+      0,           // DontCare,
+      1,           // PrefReg,
+      -1,          // PrefSpill
+      -HUGE_VALF   // MustSpill
+    };
+
+    // Live-in to block?
+    if (I->Entry != DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      activate(ib);
+      nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+    }
+
+    // Live-out from block?
+    if (I->Exit != DontCare) {
+      unsigned ob = bundles->getBundle(I->Number, 1);
+      activate(ob);
+      nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+    }
+  }
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+  if (Linked.empty())
+    return;
+
+  // Run up to 10 iterations. The edge bundle numbering is closely related to
+  // basic block numbering, so there is a strong tendency towards chains of
+  // linked nodes with sequential numbers. By scanning the linked nodes
+  // backwards and forwards, we make it very likely that a single node can
+  // affect the entire network in a single iteration. That means very fast
+  // convergence, usually in a single iteration.
+  for (unsigned iteration = 0; iteration != 10; ++iteration) {
+    // Scan backwards, skipping the last node which was just updated.
+    bool Changed = false;
+    for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+           llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+      unsigned n = *I;
+      bool C = nodes[n].update(nodes);
+      Changed |= C;
+    }
+    if (!Changed)
+      return;
+
+    // Scan forwards, skipping the first node which was just updated.
+    Changed = false;
+    for (SmallVectorImpl<unsigned>::const_iterator I =
+           llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+      unsigned n = *I;
+      bool C = nodes[n].update(nodes);
+      Changed |= C;
+    }
+    if (!Changed)
+      return;
+  }
+}
+
+bool
+SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+                            BitVector &RegBundles) {
+  // Reuse RegBundles as our ActiveNodes vector.
+  ActiveNodes = &RegBundles;
+  ActiveNodes->clear();
+  ActiveNodes->resize(bundles->getNumBundles());
+
+  // Compute active nodes, links and biases.
+  prepareNodes(LiveBlocks);
+
+  // Update all active nodes, and find the ones that are actually linked to
+  // something so their value may change when iterating.
+  SmallVector<unsigned, 8> Linked;
+  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) {
+    nodes[n].update(nodes);
+    // A node that must spill, or a node without any links is not going to
+    // change its value ever again, so exclude it from iterations.
+    if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
+      Linked.push_back(n);
+  }
+
+  // Iterate the network to convergence.
+  iterate(Linked);
+
+  // Write preferences back to RegBundles.
+  bool Perfect = true;
+  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n))
+    if (!nodes[n].preferReg()) {
+      RegBundles.reset(n);
+      Perfect = false;
+    }
+  return Perfect;
+}
+
+/// getBlockFrequency - Return our best estimate of the block frequency which is
+/// the expected number of block executions per function invocation.
+float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) {
+  // Use the unnormalized spill weight for real block frequencies.
+  return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB));
+}
+
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..ef2d516cdce7
--- /dev/null
+++ b/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,108 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by placeSpills() which is called by the
+// register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+template <typename> class SmallVectorImpl;
+
+class SpillPlacement  : public MachineFunctionPass {
+  struct Node;
+  const MachineFunction *MF;
+  const EdgeBundles *bundles;
+  const MachineLoopInfo *loops;
+  Node *nodes;
+
+  // Nodes that are active in the current computation. Owned by the placeSpills
+  // caller.
+  BitVector *ActiveNodes;
+
+public:
+  static char ID; // Pass identification, replacement for typeid.
+
+  SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+  ~SpillPlacement() { releaseMemory(); }
+
+  /// BorderConstraint - A basic block has separate constraints for entry and
+  /// exit.
+  enum BorderConstraint {
+    DontCare,  ///< Block doesn't care / variable not live.
+    PrefReg,   ///< Block entry/exit prefers a register.
+    PrefSpill, ///< Block entry/exit prefers a stack slot.
+    MustSpill  ///< A register is impossible, variable must be spilled.
+  };
+
+  /// BlockConstraint - Entry and exit constraints for a basic block.
+  struct BlockConstraint {
+    unsigned Number;            ///< Basic block number (from MBB::getNumber()).
+    BorderConstraint Entry : 8; ///< Constraint on block entry.
+    BorderConstraint Exit : 8;  ///< Constraint on block exit.
+  };
+
+  /// placeSpills - Compute the optimal spill code placement given the
+  /// constraints. No MustSpill constraints will be violated, and the smallest
+  /// possible number of PrefX constraints will be violated, weighted by
+  /// expected execution frequencies.
+  /// @param LiveBlocks Constraints for blocks that have the variable live in or
+  ///                   live out. DontCare/DontCare means the variable is live
+  ///                   through the block. DontCare/X means the variable is live
+  ///                   out, but not live in.
+  /// @param RegBundles Bit vector to receive the edge bundles where the
+  ///                   variable should be kept in a register. Each bit
+  ///                   corresponds to an edge bundle, a set bit means the
+  ///                   variable should be kept in a register through the
+  ///                   bundle. A clear bit means the variable should be
+  ///                   spilled.
+  /// @return True if a perfect solution was found, allowing the variable to be
+  ///         in a register through all relevant bundles.
+  bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+                   BitVector &RegBundles);
+
+  /// getBlockFrequency - Return the estimated block execution frequency per
+  /// function invocation.
+  float getBlockFrequency(const MachineBasicBlock*);
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  virtual void releaseMemory();
+
+  void activate(unsigned);
+  void prepareNodes(const SmallVectorImpl<BlockConstraint>&);
+  void iterate(const SmallVectorImpl<unsigned>&);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 59d5ab33c994..fd385824aff9 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -12,6 +12,7 @@
 #include "Spiller.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,7 +29,7 @@
 using namespace llvm;
 
 namespace {
-  enum SpillerName { trivial, standard, splitting, inline_ };
+  enum SpillerName { trivial, standard, inline_ };
 }
 
 static cl::opt<SpillerName>
@@ -37,7 +38,6 @@ spillerOpt("spiller",
            cl::Prefix,
            cl::values(clEnumVal(trivial,   "trivial spiller"),
                       clEnumVal(standard,  "default spiller"),
-                      clEnumVal(splitting, "splitting spiller"),
                       clEnumValN(inline_,  "inline", "inline spiller"),
                       clEnumValEnd),
            cl::init(standard));
@@ -80,7 +80,7 @@ protected:
     assert(li->weight != HUGE_VALF &&
            "Attempting to spill already spilled value.");
 
-    assert(!li->isStackSlot() &&
+    assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
            "Trying to spill a stack slot.");
 
     DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
@@ -144,7 +144,7 @@ protected:
         vrm->addSpillSlotUse(ss, loadInstr);
         SlotIndex endIndex = loadIndex.getNextIndex();
         VNInfo *loadVNI =
-          newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+          newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
       }
 
@@ -158,7 +158,7 @@ protected:
         vrm->addSpillSlotUse(ss, storeInstr);
         SlotIndex beginIndex = storeIndex.getPrevIndex();
         VNInfo *storeVNI =
-          newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+          newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
       }
 
@@ -182,7 +182,7 @@ public:
 
   void spill(LiveInterval *li,
              SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &) {
+             const SmallVectorImpl<LiveInterval*> &) {
     // Ignore spillIs - we don't use it.
     trivialSpillEverywhere(li, newIntervals);
   }
@@ -195,315 +195,42 @@ namespace {
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
 protected:
+  MachineFunction *mf;
   LiveIntervals *lis;
+  LiveStacks *lss;
   MachineLoopInfo *loopInfo;
   VirtRegMap *vrm;
 public:
   StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
                   VirtRegMap &vrm)
-    : lis(&pass.getAnalysis<LiveIntervals>()),
+    : mf(&mf),
+      lis(&pass.getAnalysis<LiveIntervals>()),
+      lss(&pass.getAnalysis<LiveStacks>()),
       loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
       vrm(&vrm) {}
 
   /// Falls back on LiveIntervals::addIntervalsForSpills.
   void spill(LiveInterval *li,
              SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &spillIs) {
+             const SmallVectorImpl<LiveInterval*> &spillIs) {
     std::vector<LiveInterval*> added =
       lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
     newIntervals.insert(newIntervals.end(), added.begin(), added.end());
-  }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// When a call to spill is placed this spiller will first try to break the
-/// interval up into its component values (one new interval per value).
-/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
-class SplittingSpiller : public StandardSpiller {
-public:
-  SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
-                   VirtRegMap &vrm)
-    : StandardSpiller(pass, mf, vrm) {
-    mri = &mf.getRegInfo();
-    tii = mf.getTarget().getInstrInfo();
-    tri = mf.getTarget().getRegisterInfo();
-  }
 
-  void spill(LiveInterval *li,
-             SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &spillIs) {
-    if (worthTryingToSplit(li))
-      tryVNISplit(li);
-    else
-      StandardSpiller::spill(li, newIntervals, spillIs);
+    // Update LiveStacks.
+    int SS = vrm->getStackSlot(li->reg);
+    if (SS == VirtRegMap::NO_STACK_SLOT)
+      return;
+    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg);
+    LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+    if (!SI.hasAtLeastOneValue())
+      SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+    SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0));
   }
-
-private:
-
-  MachineRegisterInfo *mri;
-  const TargetInstrInfo *tii;
-  const TargetRegisterInfo *tri;
-  DenseSet<LiveInterval*> alreadySplit;
-
-  bool worthTryingToSplit(LiveInterval *li) const {
-    return (!alreadySplit.count(li) && li->getNumValNums() > 1);
-  }
-
-  /// Try to break a LiveInterval into its component values.
-  std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) {
-
-    DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
-
-    std::vector<LiveInterval*> added;
-    SmallVector<VNInfo*, 4> vnis;
-
-    std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
-    for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
-         vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
-      VNInfo *vni = *vniItr;
-
-      // Skip unused VNIs.
-      if (vni->isUnused())
-        continue;
-
-      DEBUG(dbgs() << "  Extracted Val #" << vni->id << " as ");
-      LiveInterval *splitInterval = extractVNI(li, vni);
-
-      if (splitInterval != 0) {
-        DEBUG(dbgs() << *splitInterval << "\n");
-        added.push_back(splitInterval);
-        alreadySplit.insert(splitInterval);
-      } else {
-        DEBUG(dbgs() << "0\n");
-      }
-    }
-
-    DEBUG(dbgs() << "Original LI: " << *li << "\n");
-
-    // If there original interval still contains some live ranges
-    // add it to added and alreadySplit.
-    if (!li->empty()) {
-      added.push_back(li);
-      alreadySplit.insert(li);
-    }
-
-    return added;
-  }
-
-  /// Extract the given value number from the interval.
-  LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
-    assert(vni->isDefAccurate() || vni->isPHIDef());
-
-    // Create a new vreg and live interval, copy VNI ranges over.
-    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
-    unsigned newVReg = mri->createVirtualRegister(trc);
-    vrm->grow();
-    LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
-    VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
-
-    // Start by copying all live ranges in the VN to the new interval.
-    for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
-         rItr != rEnd; ++rItr) {
-      if (rItr->valno == vni) {
-        newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
-      }
-    }
-
-    // Erase the old VNI & ranges.
-    li->removeValNo(vni);
-
-    // Collect all current uses of the register belonging to the given VNI.
-    // We'll use this to rename the register after we've dealt with the def.
-    std::set<MachineInstr*> uses;
-    for (MachineRegisterInfo::use_iterator
-         useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
-         useItr != useEnd; ++useItr) {
-      uses.insert(&*useItr);
-    }
-
-    // Process the def instruction for this VNI.
-    if (newVNI->isPHIDef()) {
-      // Insert a copy at the start of the MBB. The range proceeding the
-      // copy will be attached to the original LiveInterval.
-      MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
-      MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
-                                     tii->get(TargetOpcode::COPY), newVReg)
-                               .addReg(li->reg, RegState::Kill);
-      SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-      VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
-                                           0, false, lis->getVNInfoAllocator());
-      phiDefVNI->setIsPHIDef(true);
-      li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
-      LiveRange *oldPHIDefRange =
-        newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
-
-      // If the old phi def starts in the middle of the range chop it up.
-      if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
-        LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
-                                  oldPHIDefRange->valno);
-        oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
-        newLI->addRange(oldPHIDefRange2);
-      } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
-        // Otherwise if it's at the start of the range just trim it.
-        oldPHIDefRange->start = copyIdx.getDefIndex();
-      } else {
-        assert(false && "PHI def range doesn't cover PHI def?");
-      }
-
-      newVNI->def = copyIdx.getDefIndex();
-      newVNI->setCopy(copyMI);
-      newVNI->setIsPHIDef(false); // not a PHI def anymore.
-      newVNI->setIsDefAccurate(true);
-    } else {
-      // non-PHI def. Rename the def. If it's two-addr that means renaming the
-      // use and inserting a new copy too.
-      MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
-      // We'll rename this now, so we can remove it from uses.
-      uses.erase(defInst);
-      unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
-      bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
-        twoAddrUseIsUndef = false;
-
-      for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
-        MachineOperand &mo = defInst->getOperand(i);
-        if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
-          mo.setReg(newVReg);
-          if (isTwoAddr && mo.isUse() && mo.isUndef())
-            twoAddrUseIsUndef = true;
-        }
-      }
-
-      SlotIndex defIdx = lis->getInstructionIndex(defInst);
-      newVNI->def = defIdx.getDefIndex();
-
-      if (isTwoAddr && !twoAddrUseIsUndef) {
-        MachineBasicBlock *defMBB = defInst->getParent();
-        MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
-                                       tii->get(TargetOpcode::COPY), newVReg)
-                                 .addReg(li->reg, RegState::Kill);
-        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-        LiveRange *origUseRange =
-          li->getLiveRangeContaining(newVNI->def.getUseIndex());
-        origUseRange->end = copyIdx.getDefIndex();
-        VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
-                                              true, lis->getVNInfoAllocator());
-        LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
-        newLI->addRange(copyRange);
-      }
-    }
-
-    for (std::set<MachineInstr*>::iterator
-         usesItr = uses.begin(), usesEnd = uses.end();
-         usesItr != usesEnd; ++usesItr) {
-      MachineInstr *useInst = *usesItr;
-      SlotIndex useIdx = lis->getInstructionIndex(useInst);
-      LiveRange *useRange =
-        newLI->getLiveRangeContaining(useIdx.getUseIndex());
-
-      // If this use doesn't belong to the new interval skip it.
-      if (useRange == 0)
-        continue;
-
-      // This use doesn't belong to the VNI, skip it.
-      if (useRange->valno != newVNI)
-        continue;
-
-      // Check if this instr is two address.
-      unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
-      bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
-      // Rename uses (and defs for two-address instrs).
-      for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
-        MachineOperand &mo = useInst->getOperand(i);
-        if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
-            (mo.getReg() == li->reg)) {
-          mo.setReg(newVReg);
-        }
-      }
-
-      // If this is a two address instruction we've got some extra work to do.
-      if (isTwoAddress) {
-        // We modified the def operand, so we need to copy back to the original
-        // reg.
-        MachineBasicBlock *useMBB = useInst->getParent();
-        MachineBasicBlock::iterator useItr(useInst);
-        MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
-                                       tii->get(TargetOpcode::COPY), newVReg)
-                                 .addReg(li->reg, RegState::Kill);
-        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
-        // Change the old two-address defined range & vni to start at
-        // (and be defined by) the copy.
-        LiveRange *origDefRange =
-          li->getLiveRangeContaining(useIdx.getDefIndex());
-        origDefRange->start = copyIdx.getDefIndex();
-        origDefRange->valno->def = copyIdx.getDefIndex();
-        origDefRange->valno->setCopy(copyMI);
-
-        // Insert a new range & vni for the two-address-to-copy value. This
-        // will be attached to the new live interval.
-        VNInfo *copyVNI =
-          newLI->getNextValue(useIdx.getDefIndex(), 0, true,
-                              lis->getVNInfoAllocator());
-        LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
-        newLI->addRange(copyRange);
-      }
-    }
-
-    // Iterate over any PHI kills - we'll need to insert new copies for them.
-    for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
-         LRI != LRE; ++LRI) {
-      if (LRI->valno != newVNI || LRI->end.isPHI())
-        continue;
-      SlotIndex killIdx = LRI->end;
-      MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
-      MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
-                                     DebugLoc(), tii->get(TargetOpcode::COPY),
-                                     li->reg)
-                               .addReg(newVReg, RegState::Kill);
-      SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
-      // Save the current end. We may need it to add a new range if the
-      // current range runs of the end of the MBB.
-      SlotIndex newKillRangeEnd = LRI->end;
-      LRI->end = copyIdx.getDefIndex();
-
-      if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
-        assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
-               "PHI kill range doesn't reach kill-block end. Not sane.");
-        newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
-                                  newKillRangeEnd, newVNI));
-      }
-
-      VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
-                                            copyMI, true,
-                                            lis->getVNInfoAllocator());
-      newKillVNI->setHasPHIKill(true);
-      li->addRange(LiveRange(copyIdx.getDefIndex(),
-                             lis->getMBBEndIdx(killMBB),
-                             newKillVNI));
-    }
-    newVNI->setHasPHIKill(false);
-
-    return newLI;
-  }
-
 };
 
 } // end anonymous namespace
 
-
-namespace llvm {
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
-                             MachineFunction &mf,
-                             VirtRegMap &vrm);
-}
-
 llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
                                    MachineFunction &mf,
                                    VirtRegMap &vrm) {
@@ -511,7 +238,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
   default: assert(0 && "unknown spiller");
   case trivial: return new TrivialSpiller(pass, mf, vrm);
   case standard: return new StandardSpiller(pass, mf, vrm);
-  case splitting: return new SplittingSpiller(pass, mf, vrm);
   case inline_: return createInlineSpiller(pass, mf, vrm);
   }
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 59bc0ec6ae70..f017583494ed 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -10,14 +10,13 @@
 #ifndef LLVM_CODEGEN_SPILLER_H
 #define LLVM_CODEGEN_SPILLER_H
 
-#include "llvm/ADT/SmallVector.h"
-
 namespace llvm {
 
   class LiveInterval;
   class MachineFunction;
   class MachineFunctionPass;
   class SlotIndex;
+  template <typename T> class SmallVectorImpl;
   class VirtRegMap;
 
   /// Spiller interface.
@@ -37,7 +36,7 @@ namespace llvm {
     /// @param newIntervals  The newly created intervals will be appended here.
     virtual void spill(LiveInterval *li,
                        SmallVectorImpl<LiveInterval*> &newIntervals,
-                       SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+                       const SmallVectorImpl<LiveInterval*> &spillIs) = 0;
 
   };
 
@@ -45,6 +44,13 @@ namespace llvm {
   Spiller* createSpiller(MachineFunctionPass &pass,
                          MachineFunction &mf,
                          VirtRegMap &vrm);
+
+  /// Create and return a spiller that will insert spill code directly instead
+  /// of deferring though VirtRegMap.
+  Spiller *createInlineSpiller(MachineFunctionPass &pass,
+                               MachineFunction &mf,
+                               VirtRegMap &vrm);
+
 }
 
 #endif
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 29474f0d5512..5663936bf3aa 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -12,13 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "splitter"
+#define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
+#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -36,371 +37,231 @@ AllowSplit("spiller-splits-edges",
 //                                 Split Analysis
 //===----------------------------------------------------------------------===//
 
-SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
                              const LiveIntervals &lis,
                              const MachineLoopInfo &mli)
-  : mf_(mf),
-    lis_(lis),
-    loops_(mli),
-    tii_(*mf.getTarget().getInstrInfo()),
-    curli_(0) {}
+  : MF(vrm.getMachineFunction()),
+    VRM(vrm),
+    LIS(lis),
+    Loops(mli),
+    TII(*MF.getTarget().getInstrInfo()),
+    CurLI(0) {}
 
 void SplitAnalysis::clear() {
-  usingInstrs_.clear();
-  usingBlocks_.clear();
-  usingLoops_.clear();
-  curli_ = 0;
+  UseSlots.clear();
+  UsingInstrs.clear();
+  UsingBlocks.clear();
+  LiveBlocks.clear();
+  CurLI = 0;
 }
 
 bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
   MachineBasicBlock *T, *F;
   SmallVector<MachineOperand, 4> Cond;
-  return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+  return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
 }
 
-/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
 void SplitAnalysis::analyzeUses() {
-  const MachineRegisterInfo &MRI = mf_.getRegInfo();
-  for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
-       MachineInstr *MI = I.skipInstruction();) {
-    if (MI->isDebugValue() || !usingInstrs_.insert(MI))
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg),
+       E = MRI.reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    if (MO.isUse() && MO.isUndef())
       continue;
-    MachineBasicBlock *MBB = MI->getParent();
-    if (usingBlocks_[MBB]++)
+    MachineInstr *MI = MO.getParent();
+    if (MI->isDebugValue() || !UsingInstrs.insert(MI))
       continue;
-    if (MachineLoop *Loop = loops_.getLoopFor(MBB))
-      usingLoops_[Loop]++;
+    UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex());
+    MachineBasicBlock *MBB = MI->getParent();
+    UsingBlocks[MBB]++;
   }
+  array_pod_sort(UseSlots.begin(), UseSlots.end());
+  calcLiveBlockInfo();
   DEBUG(dbgs() << "  counted "
-               << usingInstrs_.size() << " instrs, "
-               << usingBlocks_.size() << " blocks, "
-               << usingLoops_.size()  << " loops.\n");
+               << UsingInstrs.size() << " instrs, "
+               << UsingBlocks.size() << " blocks.\n");
 }
 
-/// removeUse - Update statistics by noting that MI no longer uses curli.
-void SplitAnalysis::removeUse(const MachineInstr *MI) {
-  if (!usingInstrs_.erase(MI))
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+void SplitAnalysis::calcLiveBlockInfo() {
+  if (CurLI->empty())
     return;
 
-  // Decrement MBB count.
-  const MachineBasicBlock *MBB = MI->getParent();
-  BlockCountMap::iterator bi = usingBlocks_.find(MBB);
-  assert(bi != usingBlocks_.end() && "MBB missing");
-  assert(bi->second && "0 count in map");
-  if (--bi->second)
-    return;
-  // No more uses in MBB.
-  usingBlocks_.erase(bi);
+  LiveInterval::const_iterator LVI = CurLI->begin();
+  LiveInterval::const_iterator LVE = CurLI->end();
+
+  SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+  UseI = UseSlots.begin();
+  UseE = UseSlots.end();
+
+  // Loop over basic blocks where CurLI is live.
+  MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  for (;;) {
+    BlockInfo BI;
+    BI.MBB = MFI;
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+    // The last split point is the latest possible insertion point that dominates
+    // all successor blocks. If interference reaches LastSplitPoint, it is not
+    // possible to insert a split or reload that makes CurLI live in the
+    // outgoing bundle.
+    MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB);
+    if (LSP == BI.MBB->end())
+      BI.LastSplitPoint = Stop;
+    else
+      BI.LastSplitPoint = LIS.getInstructionIndex(LSP);
+
+    // LVI is the first live segment overlapping MBB.
+    BI.LiveIn = LVI->start <= Start;
+    if (!BI.LiveIn)
+      BI.Def = LVI->start;
+
+    // Find the first and last uses in the block.
+    BI.Uses = hasUses(MFI);
+    if (BI.Uses && UseI != UseE) {
+      BI.FirstUse = *UseI;
+      assert(BI.FirstUse >= Start);
+      do ++UseI;
+      while (UseI != UseE && *UseI < Stop);
+      BI.LastUse = UseI[-1];
+      assert(BI.LastUse < Stop);
+    }
 
-  // Decrement loop count.
-  MachineLoop *Loop = loops_.getLoopFor(MBB);
-  if (!Loop)
-    return;
-  LoopCountMap::iterator li = usingLoops_.find(Loop);
-  assert(li != usingLoops_.end() && "Loop missing");
-  assert(li->second && "0 count in map");
-  if (--li->second)
-    return;
-  // No more blocks in Loop.
-  usingLoops_.erase(li);
-}
+    // Look for gaps in the live range.
+    bool hasGap = false;
+    BI.LiveOut = true;
+    while (LVI->end < Stop) {
+      SlotIndex LastStop = LVI->end;
+      if (++LVI == LVE || LVI->start >= Stop) {
+        BI.Kill = LastStop;
+        BI.LiveOut = false;
+        break;
+      }
+      if (LastStop < LVI->start) {
+        hasGap = true;
+        BI.Kill = LastStop;
+        BI.Def = LVI->start;
+      }
+    }
 
-// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
-// predecessor blocks, and exit blocks.
-void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
-  Blocks.clear();
-
-  // Blocks in the loop.
-  Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
-
-  // Predecessor blocks.
-  const MachineBasicBlock *Header = Loop->getHeader();
-  for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
-       E = Header->pred_end(); I != E; ++I)
-    if (!Blocks.Loop.count(*I))
-      Blocks.Preds.insert(*I);
-
-  // Exit blocks.
-  for (MachineLoop::block_iterator I = Loop->block_begin(),
-       E = Loop->block_end(); I != E; ++I) {
-    const MachineBasicBlock *MBB = *I;
-    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
-       SE = MBB->succ_end(); SI != SE; ++SI)
-      if (!Blocks.Loop.count(*SI))
-        Blocks.Exits.insert(*SI);
-  }
-}
+    // Don't set LiveThrough when the block has a gap.
+    BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
+    LiveBlocks.push_back(BI);
 
-/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-/// and around the Loop.
-SplitAnalysis::LoopPeripheralUse SplitAnalysis::
-analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
-  LoopPeripheralUse use = ContainedInLoop;
-  for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
-       I != E; ++I) {
-    const MachineBasicBlock *MBB = I->first;
-    // Is this a peripheral block?
-    if (use < MultiPeripheral &&
-        (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
-      if (I->second > 1) use = MultiPeripheral;
-      else               use = SinglePeripheral;
-      continue;
-    }
-    // Is it a loop block?
-    if (Blocks.Loop.count(MBB))
-      continue;
-    // It must be an unrelated block.
-    return OutsideLoop;
-  }
-  return use;
-}
+    // LVI is now at LVE or LVI->end >= Stop.
+    if (LVI == LVE)
+      break;
 
-/// getCriticalExits - It may be necessary to partially break critical edges
-/// leaving the loop if an exit block has phi uses of curli. Collect the exit
-/// blocks that need special treatment into CriticalExits.
-void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
-                                     BlockPtrSet &CriticalExits) {
-  CriticalExits.clear();
-
-  // A critical exit block contains a phi def of curli, and has a predecessor
-  // that is not in the loop nor a loop predecessor.
-  // For such an exit block, the edges carrying the new variable must be moved
-  // to a new pre-exit block.
-  for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
-       I != E; ++I) {
-    const MachineBasicBlock *Succ = *I;
-    SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
-    VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
-    // This exit may not have curli live in at all. No need to split.
-    if (!SuccVNI)
-      continue;
-    // If this is not a PHI def, it is either using a value from before the
-    // loop, or a value defined inside the loop. Both are safe.
-    if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
-      continue;
-    // This exit block does have a PHI. Does it also have a predecessor that is
-    // not a loop block or loop predecessor?
-    for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
-         PE = Succ->pred_end(); PI != PE; ++PI) {
-      const MachineBasicBlock *Pred = *PI;
-      if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
-        continue;
-      // This is a critical exit block, and we need to split the exit edge.
-      CriticalExits.insert(Succ);
+    // Live segment ends exactly at Stop. Move to the next segment.
+    if (LVI->end == Stop && ++LVI == LVE)
       break;
-    }
+
+    // Pick the next basic block.
+    if (LVI->start < Stop)
+      ++MFI;
+    else
+      MFI = LIS.getMBBFromIndex(LVI->start);
   }
 }
 
-/// canSplitCriticalExits - Return true if it is possible to insert new exit
-/// blocks before the blocks in CriticalExits.
-bool
-SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
-                                     BlockPtrSet &CriticalExits) {
-  // If we don't allow critical edge splitting, require no critical exits.
-  if (!AllowSplit)
-    return CriticalExits.empty();
-
-  for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
-       I != E; ++I) {
-    const MachineBasicBlock *Succ = *I;
-    // We want to insert a new pre-exit MBB before Succ, and change all the
-    // in-loop blocks to branch to the pre-exit instead of Succ.
-    // Check that all the in-loop predecessors can be changed.
-    for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
-         PE = Succ->pred_end(); PI != PE; ++PI) {
-      const MachineBasicBlock *Pred = *PI;
-      // The external predecessors won't be altered.
-      if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
-        continue;
-      if (!canAnalyzeBranch(Pred))
-        return false;
-    }
-
-    // If Succ's layout predecessor falls through, that too must be analyzable.
-    // We need to insert the pre-exit block in the gap.
-    MachineFunction::const_iterator MFI = Succ;
-    if (MFI == mf_.begin())
-      continue;
-    if (!canAnalyzeBranch(--MFI))
-      return false;
+void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
+  for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    unsigned count = UsingBlocks.lookup(*I);
+    OS << " BB#" << (*I)->getNumber();
+    if (count)
+      OS << '(' << count << ')';
   }
-  // No problems found.
-  return true;
 }
 
 void SplitAnalysis::analyze(const LiveInterval *li) {
   clear();
-  curli_ = li;
+  CurLI = li;
   analyzeUses();
 }
 
-const MachineLoop *SplitAnalysis::getBestSplitLoop() {
-  assert(curli_ && "Call analyze() before getBestSplitLoop");
-  if (usingLoops_.empty())
-    return 0;
-
-  LoopPtrSet Loops, SecondLoops;
-  LoopBlocks Blocks;
-  BlockPtrSet CriticalExits;
-
-  // Find first-class and second class candidate loops.
-  // We prefer to split around loops where curli is used outside the periphery.
-  for (LoopCountMap::const_iterator I = usingLoops_.begin(),
-       E = usingLoops_.end(); I != E; ++I) {
-    const MachineLoop *Loop = I->first;
-    getLoopBlocks(Loop, Blocks);
-
-    // FIXME: We need an SSA updater to properly handle multiple exit blocks.
-    if (Blocks.Exits.size() > 1) {
-      DEBUG(dbgs() << "  multiple exits from " << *Loop);
-      continue;
-    }
-
-    LoopPtrSet *LPS = 0;
-    switch(analyzeLoopPeripheralUse(Blocks)) {
-    case OutsideLoop:
-      LPS = &Loops;
-      break;
-    case MultiPeripheral:
-      LPS = &SecondLoops;
-      break;
-    case ContainedInLoop:
-      DEBUG(dbgs() << "  contained in " << *Loop);
-      continue;
-    case SinglePeripheral:
-      DEBUG(dbgs() << "  single peripheral use in " << *Loop);
-      continue;
-    }
-    // Will it be possible to split around this loop?
-    getCriticalExits(Blocks, CriticalExits);
-    DEBUG(dbgs() << "  " << CriticalExits.size() << " critical exits from "
-                 << *Loop);
-    if (!canSplitCriticalExits(Blocks, CriticalExits))
-      continue;
-    // This is a possible split.
-    assert(LPS);
-    LPS->insert(Loop);
-  }
-
-  DEBUG(dbgs() << "  getBestSplitLoop found " << Loops.size() << " + "
-               << SecondLoops.size() << " candidate loops.\n");
-
-  // If there are no first class loops available, look at second class loops.
-  if (Loops.empty())
-    Loops = SecondLoops;
 
-  if (Loops.empty())
-    return 0;
+//===----------------------------------------------------------------------===//
+//                               LiveIntervalMap
+//===----------------------------------------------------------------------===//
 
-  // Pick the earliest loop.
-  // FIXME: Are there other heuristics to consider?
-  const MachineLoop *Best = 0;
-  SlotIndex BestIdx;
-  for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
-       ++I) {
-    SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
-    if (!Best || Idx < BestIdx)
-      Best = *I, BestIdx = Idx;
-  }
-  DEBUG(dbgs() << "  getBestSplitLoop found " << *Best);
-  return Best;
+// Work around the fact that the std::pair constructors are broken for pointer
+// pairs in some implementations. makeVV(x, 0) works.
+static inline std::pair<const VNInfo*, VNInfo*>
+makeVV(const VNInfo *a, VNInfo *b) {
+  return std::make_pair(a, b);
 }
 
-/// getMultiUseBlocks - if curli has more than one use in a basic block, it
-/// may be an advantage to split curli for the duration of the block.
-bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
-  // If curli is local to one block, there is no point to splitting it.
-  if (usingBlocks_.size() <= 1)
-    return false;
-  // Add blocks with multiple uses.
-  for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
-       I != E; ++I)
-    switch (I->second) {
-    case 0:
-    case 1:
-      continue;
-    case 2: {
-      // It doesn't pay to split a 2-instr block if it redefines curli.
-      VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first));
-      VNInfo *VN2 =
-        curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex());
-      // live-in and live-out with a different value.
-      if (VN1 && VN2 && VN1 != VN2)
-        continue;
-    } // Fall through.
-    default:
-      Blocks.insert(I->first);
-    }
-  return !Blocks.empty();
+void LiveIntervalMap::reset(LiveInterval *li) {
+  LI = li;
+  Values.clear();
+  LiveOutCache.clear();
 }
 
-//===----------------------------------------------------------------------===//
-//                               LiveIntervalMap
-//===----------------------------------------------------------------------===//
+bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const {
+  ValueMap::const_iterator i = Values.find(ParentVNI);
+  return i != Values.end() && i->second == 0;
+}
 
-// defValue - Introduce a li_ def for ParentVNI that could be later than
+// defValue - Introduce a LI def for ParentVNI that could be later than
 // ParentVNI->def.
 VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+  assert(LI && "call reset first");
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
-  assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
-
-  // Is this a simple 1-1 mapping? Not likely.
-  if (Idx == ParentVNI->def)
-    return mapValue(ParentVNI, Idx);
-
-  // This is a complex def. Mark with a NULL in valueMap.
-  VNInfo *OldVNI =
-    valueMap_.insert(
-      ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second;
-      // The static_cast<VNInfo *> is only needed to work around a bug in an
-      // old version of the C++0x standard which the following compilers
-      // implemented and have yet to fix:
-      //
-      // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
-      // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
-      //
-      // If/When we move to C++0x, this can be replaced by nullptr.
-  (void)OldVNI;
-  assert(OldVNI == 0 && "Simple/Complex values mixed");
-
-  // Should we insert a minimal snippet of VNI LiveRange, or can we count on
-  // callers to do that? We need it for lookups of complex values.
-  VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator());
+  assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+  // Create a new value.
+  VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+  // Preserve the PHIDef bit.
+  if (ParentVNI->isPHIDef() && Idx == ParentVNI->def)
+    VNI->setIsPHIDef(true);
+
+  // Use insert for lookup, so we can add missing values with a second lookup.
+  std::pair<ValueMap::iterator,bool> InsP =
+    Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0));
+
+  // This is now a complex def. Mark with a NULL in valueMap.
+  if (!InsP.second)
+    InsP.first->second = 0;
+
   return VNI;
 }
 
+
 // mapValue - Find the mapped value for ParentVNI at Idx.
 // Potentially create phi-def values.
-VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx,
+                                  bool *simple) {
+  assert(LI && "call reset first");
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
-  assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+  assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
 
   // Use insert for lookup, so we can add missing values with a second lookup.
   std::pair<ValueMap::iterator,bool> InsP =
-    valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0)));
-    // The static_cast<VNInfo *> is only needed to work around a bug in an
-    // old version of the C++0x standard which the following compilers
-    // implemented and have yet to fix:
-    //
-    // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
-    // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
-    //
-    // If/When we move to C++0x, this can be replaced by nullptr.
+    Values.insert(makeVV(ParentVNI, 0));
 
   // This was an unknown value. Create a simple mapping.
-  if (InsP.second)
-    return InsP.first->second = li_.createValueCopy(ParentVNI,
-                                                    lis_.getVNInfoAllocator());
+  if (InsP.second) {
+    if (simple) *simple = true;
+    return InsP.first->second = LI->createValueCopy(ParentVNI,
+                                                     LIS.getVNInfoAllocator());
+  }
+
   // This was a simple mapped value.
-  if (InsP.first->second)
+  if (InsP.first->second) {
+    if (simple) *simple = true;
     return InsP.first->second;
+  }
 
   // This is a complex mapped value. There may be multiple defs, and we may need
   // to create phi-defs.
-  MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx);
+  if (simple) *simple = false;
+  MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
   assert(IdxMBB && "No MBB at Idx");
 
   // Is there a def in the same MBB we can extend?
@@ -409,157 +270,260 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
 
   // Now for the fun part. We know that ParentVNI potentially has multiple defs,
   // and we may need to create even more phi-defs to preserve VNInfo SSA form.
-  // Perform a depth-first search for predecessor blocks where we know the
-  // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
-
-  // Track MBBs where we have created or learned the dominating value.
-  // This may change during the DFS as we create new phi-defs.
-  typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap;
-  MBBValueMap DomValue;
-
-  for (idf_iterator<MachineBasicBlock*>
-         IDFI = idf_begin(IdxMBB),
-         IDFE = idf_end(IdxMBB); IDFI != IDFE;) {
-    MachineBasicBlock *MBB = *IDFI;
-    SlotIndex End = lis_.getMBBEndIdx(MBB);
-
-    // We are operating on the restricted CFG where ParentVNI is live.
-    if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) {
-      IDFI.skipChildren();
-      continue;
-    }
-
-    // Do we have a dominating value in this block?
-    VNInfo *VNI = extendTo(MBB, End);
-    if (!VNI) {
-      ++IDFI;
-      continue;
+  // Perform a search for all predecessor blocks where we know the dominating
+  // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+  DEBUG(dbgs() << "\n  Reaching defs for BB#" << IdxMBB->getNumber()
+               << " at " << Idx << " in " << *LI << '\n');
+
+  // Blocks where LI should be live-in.
+  SmallVector<MachineDomTreeNode*, 16> LiveIn;
+  LiveIn.push_back(MDT[IdxMBB]);
+
+  // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+  for (unsigned i = 0; i != LiveIn.size(); ++i) {
+    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+       MachineBasicBlock *Pred = *PI;
+       // Is this a known live-out block?
+       std::pair<LiveOutMap::iterator,bool> LOIP =
+         LiveOutCache.insert(std::make_pair(Pred, LiveOutPair()));
+       // Yes, we have been here before.
+       if (!LOIP.second) {
+         DEBUG(if (VNInfo *VNI = LOIP.first->second.first)
+                 dbgs() << "    known valno #" << VNI->id
+                        << " at BB#" << Pred->getNumber() << '\n');
+         continue;
+       }
+
+       // Does Pred provide a live-out value?
+       SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot();
+       if (VNInfo *VNI = extendTo(Pred, Last)) {
+         MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def);
+         DEBUG(dbgs() << "    found valno #" << VNI->id
+                      << " from BB#" << DefMBB->getNumber()
+                      << " at BB#" << Pred->getNumber() << '\n');
+         LiveOutPair &LOP = LOIP.first->second;
+         LOP.first = VNI;
+         LOP.second = MDT[DefMBB];
+         continue;
+       }
+       // No, we need a live-in value for Pred as well
+       if (Pred != IdxMBB)
+         LiveIn.push_back(MDT[Pred]);
     }
+  }
 
-    // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs
-    // as needed along the way.
-    for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) {
-      // Start from MBB's immediate successor. End at IdxMBB.
-      MachineBasicBlock *Succ = IDFI.getPath(PI-1);
-      std::pair<MBBValueMap::iterator, bool> InsP =
-        DomValue.insert(MBBValueMap::value_type(Succ, VNI));
-
-      // This is the first time we backtrack to Succ.
-      if (InsP.second)
-        continue;
-
-      // We reached Succ again with the same VNI. Nothing is going to change.
-      VNInfo *OVNI = InsP.first->second;
-      if (OVNI == VNI)
-        break;
+  // We may need to add phi-def values to preserve the SSA form.
+  // This is essentially the same iterative algorithm that SSAUpdater uses,
+  // except we already have a dominator tree, so we don't have to recompute it.
+  VNInfo *IdxVNI = 0;
+  unsigned Changes;
+  do {
+    Changes = 0;
+    DEBUG(dbgs() << "  Iterating over " << LiveIn.size() << " blocks.\n");
+    // Propagate live-out values down the dominator tree, inserting phi-defs when
+    // necessary. Since LiveIn was created by a BFS, going backwards makes it more
+    // likely for us to visit immediate dominators before their children.
+    for (unsigned i = LiveIn.size(); i; --i) {
+      MachineDomTreeNode *Node = LiveIn[i-1];
+      MachineBasicBlock *MBB = Node->getBlock();
+      MachineDomTreeNode *IDom = Node->getIDom();
+      LiveOutPair IDomValue;
+      // We need a live-in value to a block with no immediate dominator?
+      // This is probably an unreachable block that has survived somehow.
+      bool needPHI = !IDom;
+
+      // Get the IDom live-out value.
+      if (!needPHI) {
+        LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock());
+        if (I != LiveOutCache.end())
+          IDomValue = I->second;
+        else
+          // If IDom is outside our set of live-out blocks, there must be new
+          // defs, and we need a phi-def here.
+          needPHI = true;
+      }
 
-      // Succ already has a phi-def. No need to continue.
-      SlotIndex Start = lis_.getMBBStartIdx(Succ);
-      if (OVNI->def == Start)
-        break;
+      // IDom dominates all of our predecessors, but it may not be the immediate
+      // dominator. Check if any of them have live-out values that are properly
+      // dominated by IDom. If so, we need a phi-def here.
+      if (!needPHI) {
+        for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+               PE = MBB->pred_end(); PI != PE; ++PI) {
+          LiveOutPair Value = LiveOutCache[*PI];
+          if (!Value.first || Value.first == IDomValue.first)
+            continue;
+          // This predecessor is carrying something other than IDomValue.
+          // It could be because IDomValue hasn't propagated yet, or it could be
+          // because MBB is in the dominance frontier of that value.
+          if (MDT.dominates(IDom, Value.second)) {
+            needPHI = true;
+            break;
+          }
+        }
+      }
 
-      // We have a collision between the old and new VNI at Succ. That means
-      // neither dominates and we need a new phi-def.
-      VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator());
-      VNI->setIsPHIDef(true);
-      InsP.first->second = VNI;
-
-      // Replace OVNI with VNI in the remaining path.
-      for (; PI > 1 ; --PI) {
-        MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2));
-        if (I == DomValue.end() || I->second != OVNI)
-          break;
-        I->second = VNI;
+      // Create a phi-def if required.
+      if (needPHI) {
+        ++Changes;
+        SlotIndex Start = LIS.getMBBStartIdx(MBB);
+        VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+        VNI->setIsPHIDef(true);
+        DEBUG(dbgs() << "    - BB#" << MBB->getNumber()
+                     << " phi-def #" << VNI->id << " at " << Start << '\n');
+        // We no longer need LI to be live-in.
+        LiveIn.erase(LiveIn.begin()+(i-1));
+        // Blocks in LiveIn are either IdxMBB, or have a value live-through.
+        if (MBB == IdxMBB)
+          IdxVNI = VNI;
+        // Check if we need to update live-out info.
+        LiveOutMap::iterator I = LiveOutCache.find(MBB);
+        if (I == LiveOutCache.end() || I->second.second == Node) {
+          // We already have a live-out defined in MBB, so this must be IdxMBB.
+          assert(MBB == IdxMBB && "Adding phi-def to known live-out");
+          LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
+        } else {
+          // This phi-def is also live-out, so color the whole block.
+          LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+          I->second = LiveOutPair(VNI, Node);
+        }
+      } else if (IDomValue.first) {
+        // No phi-def here. Remember incoming value for IdxMBB.
+        if (MBB == IdxMBB)
+          IdxVNI = IDomValue.first;
+        // Propagate IDomValue if needed:
+        // MBB is live-out and doesn't define its own value.
+        LiveOutMap::iterator I = LiveOutCache.find(MBB);
+        if (I != LiveOutCache.end() && I->second.second != Node &&
+            I->second.first != IDomValue.first) {
+          ++Changes;
+          I->second = IDomValue;
+          DEBUG(dbgs() << "    - BB#" << MBB->getNumber()
+                       << " idom valno #" << IDomValue.first->id
+                       << " from BB#" << IDom->getBlock()->getNumber() << '\n');
+        }
       }
     }
+    DEBUG(dbgs() << "  - made " << Changes << " changes.\n");
+  } while (Changes);
 
-    // No need to search the children, we found a dominating value.
-    IDFI.skipChildren();
-  }
+  assert(IdxVNI && "Didn't find value for Idx");
 
-  // The search should at least find a dominating value for IdxMBB.
-  assert(!DomValue.empty() && "Couldn't find a reaching definition");
+#ifndef NDEBUG
+  // Check the LiveOutCache invariants.
+  for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+         I != E; ++I) {
+    assert(I->first && "Null MBB entry in cache");
+    assert(I->second.first && "Null VNInfo in cache");
+    assert(I->second.second && "Null DomTreeNode in cache");
+    if (I->second.second->getBlock() == I->first)
+      continue;
+    for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+           PE = I->first->pred_end(); PI != PE; ++PI)
+      assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant");
+  }
+#endif
 
-  // Since we went through the trouble of a full DFS visiting all reaching defs,
-  // the values in DomValue are now accurate. No more phi-defs are needed for
-  // these blocks, so we can color the live ranges.
+  // Since we went through the trouble of a full BFS visiting all reaching defs,
+  // the values in LiveIn are now accurate. No more phi-defs are needed
+  // for these blocks, so we can color the live ranges.
   // This makes the next mapValue call much faster.
-  VNInfo *IdxVNI = 0;
-  for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E;
-       ++I) {
-     MachineBasicBlock *MBB = I->first;
-     VNInfo *VNI = I->second;
-     SlotIndex Start = lis_.getMBBStartIdx(MBB);
-     if (MBB == IdxMBB) {
-       // Don't add full liveness to IdxMBB, stop at Idx.
-       if (Start != Idx)
-         li_.addRange(LiveRange(Start, Idx, VNI));
-       // The caller had better add some liveness to IdxVNI, or it leaks.
-       IdxVNI = VNI;
-     } else
-      li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+  for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    VNInfo *VNI = LiveOutCache.lookup(MBB).first;
+
+    // Anything in LiveIn other than IdxMBB is live-through.
+    // In IdxMBB, we should stop at Idx unless the same value is live-out.
+    if (MBB == IdxMBB && IdxVNI != VNI)
+      LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
+    else
+      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
   }
 
-  assert(IdxVNI && "Didn't find value for Idx");
   return IdxVNI;
 }
 
-// extendTo - Find the last li_ value defined in MBB at or before Idx. The
-// parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+#ifndef NDEBUG
+void LiveIntervalMap::dumpCache() {
+  for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+         I != E; ++I) {
+    assert(I->first && "Null MBB entry in cache");
+    assert(I->second.first && "Null VNInfo in cache");
+    assert(I->second.second && "Null DomTreeNode in cache");
+    dbgs() << "    cache: BB#" << I->first->getNumber()
+           << " has valno #" << I->second.first->id << " from BB#"
+           << I->second.second->getBlock()->getNumber() << ", preds";
+    for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+           PE = I->first->pred_end(); PI != PE; ++PI)
+      dbgs() << " BB#" << (*PI)->getNumber();
+    dbgs() << '\n';
+  }
+  dbgs() << "    cache: " << LiveOutCache.size() << " entries.\n";
+}
+#endif
+
+// extendTo - Find the last LI value defined in MBB at or before Idx. The
+// ParentLI is assumed to be live at Idx. Extend the live range to Idx.
 // Return the found VNInfo, or NULL.
-VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) {
-  LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx);
-  if (I == li_.begin())
+VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) {
+  assert(LI && "call reset first");
+  LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx);
+  if (I == LI->begin())
     return 0;
   --I;
-  if (I->start < lis_.getMBBStartIdx(MBB))
+  if (I->end <= LIS.getMBBStartIdx(MBB))
     return 0;
-  if (I->end < Idx)
-    I->end = Idx;
+  if (I->end <= Idx)
+    I->end = Idx.getNextSlot();
   return I->valno;
 }
 
-// addSimpleRange - Add a simple range from parentli_ to li_.
+// addSimpleRange - Add a simple range from ParentLI to LI.
 // ParentVNI must be live in the [Start;End) interval.
 void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
                                      const VNInfo *ParentVNI) {
-  VNInfo *VNI = mapValue(ParentVNI, Start);
-  // A simple mappoing is easy.
-  if (VNI->def == ParentVNI->def) {
-    li_.addRange(LiveRange(Start, End, VNI));
+  assert(LI && "call reset first");
+  bool simple;
+  VNInfo *VNI = mapValue(ParentVNI, Start, &simple);
+  // A simple mapping is easy.
+  if (simple) {
+    LI->addRange(LiveRange(Start, End, VNI));
     return;
   }
 
   // ParentVNI is a complex value. We must map per MBB.
-  MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start);
-  MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End);
+  MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+  MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot());
 
   if (MBB == MBBE) {
-    li_.addRange(LiveRange(Start, End, VNI));
+    LI->addRange(LiveRange(Start, End, VNI));
     return;
   }
 
   // First block.
-  li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+  LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
 
   // Run sequence of full blocks.
   for (++MBB; MBB != MBBE; ++MBB) {
-    Start = lis_.getMBBStartIdx(MBB);
-    li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB),
-                           mapValue(ParentVNI, Start)));
+    Start = LIS.getMBBStartIdx(MBB);
+    LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB),
+                            mapValue(ParentVNI, Start)));
   }
 
   // Final block.
-  Start = lis_.getMBBStartIdx(MBB);
+  Start = LIS.getMBBStartIdx(MBB);
   if (Start != End)
-    li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
+    LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
 }
 
-/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
 /// All needed values whose def is not inside [Start;End) must be defined
 /// beforehand so mapValue will work.
 void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
-  LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end();
+  assert(LI && "call reset first");
+  LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end();
   LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
 
   // Check if --I begins before Start and overlaps.
@@ -575,403 +539,374 @@ void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
     addSimpleRange(I->start, std::min(End, I->end), I->valno);
 }
 
+
 //===----------------------------------------------------------------------===//
 //                               Split Editor
 //===----------------------------------------------------------------------===//
 
 /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
-                         SmallVectorImpl<LiveInterval*> &intervals)
-  : sa_(sa), lis_(lis), vrm_(vrm),
-    mri_(vrm.getMachineFunction().getRegInfo()),
-    tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
-    curli_(sa_.getCurLI()),
-    dupli_(0), openli_(0),
-    intervals_(intervals),
-    firstInterval(intervals_.size())
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+                         LiveIntervals &lis,
+                         VirtRegMap &vrm,
+                         MachineDominatorTree &mdt,
+                         LiveRangeEdit &edit)
+  : SA(sa), LIS(lis), VRM(vrm),
+    MRI(vrm.getMachineFunction().getRegInfo()),
+    MDT(mdt),
+    TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+    TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+    Edit(edit),
+    OpenIdx(0),
+    RegAssign(Allocator)
 {
-  assert(curli_ && "SplitEditor created from empty SplitAnalysis");
-
-  // Make sure curli_ is assigned a stack slot, so all our intervals get the
-  // same slot as curli_.
-  if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
-    vrm_.assignVirt2StackSlot(curli_->reg);
-
+  // We don't need an AliasAnalysis since we will only be performing
+  // cheap-as-a-copy remats anyway.
+  Edit.anyRematerializable(LIS, TII, 0);
 }
 
-LiveInterval *SplitEditor::createInterval() {
-  unsigned curli = sa_.getCurLI()->reg;
-  unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
-  LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
-  vrm_.grow();
-  vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
-  return &Intv;
+void SplitEditor::dump() const {
+  if (RegAssign.empty()) {
+    dbgs() << " empty\n";
+    return;
+  }
+
+  for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+    dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+  dbgs() << '\n';
 }
 
-LiveInterval *SplitEditor::getDupLI() {
-  if (!dupli_) {
-    // Create an interval for dupli that is a copy of curli.
-    dupli_ = createInterval();
-    dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+                                   VNInfo *ParentVNI,
+                                   SlotIndex UseIdx,
+                                   MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I) {
+  MachineInstr *CopyMI = 0;
+  SlotIndex Def;
+  LiveInterval *LI = Edit.get(RegIdx);
+
+  // Attempt cheap-as-a-copy rematerialization.
+  LiveRangeEdit::Remat RM(ParentVNI);
+  if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) {
+    Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+  } else {
+    // Can't remat, just insert a copy from parent.
+    CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+               .addReg(Edit.getReg());
+    Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
   }
-  return dupli_;
-}
 
-VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
-  VNInfo *&VNI = valueMap_[curliVNI];
-  if (!VNI)
-    VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
-  return VNI;
-}
+  // Define the value in Reg.
+  VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def);
+  VNI->setCopy(CopyMI);
 
-/// Insert a COPY instruction curli -> li. Allocate a new value from li
-/// defined by the COPY. Note that rewrite() will deal with the curli
-/// register, so this function can be used to copy from any interval - openli,
-/// curli, or dupli.
-VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
-                                MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I) {
-  MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
-                             LI.reg).addReg(curli_->reg);
-  SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-  return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+  // Add minimal liveness for the new value.
+  Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  return VNI;
 }
 
 /// Create a new virtual register and live interval.
 void SplitEditor::openIntv() {
-  assert(!openli_ && "Previous LI not closed before openIntv");
-  openli_ = createInterval();
-  intervals_.push_back(openli_);
-  liveThrough_ = false;
-}
+  assert(!OpenIdx && "Previous LI not closed before openIntv");
 
-/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
-/// not live before Idx, a COPY is not inserted.
-void SplitEditor::enterIntvBefore(SlotIndex Idx) {
-  assert(openli_ && "openIntv not called before enterIntvBefore");
-
-  // Copy from curli_ if it is live.
-  if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) {
-    MachineInstr *MI = lis_.getInstructionFromIndex(Idx);
-    assert(MI && "enterIntvBefore called with invalid index");
-    VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI);
-    openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI));
-
-    // Make sure CurVNI is properly mapped.
-    VNInfo *&mapVNI = valueMap_[CurVNI];
-    // We dont have SSA update yet, so only one entry per value is allowed.
-    assert(!mapVNI && "enterIntvBefore called more than once for the same value");
-    mapVNI = VNI;
+  // Create the complement as index 0.
+  if (Edit.empty()) {
+    Edit.create(MRI, LIS, VRM);
+    LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+    LIMappers.back().reset(Edit.get(0));
   }
-  DEBUG(dbgs() << "    enterIntvBefore " << Idx << ": " << *openli_ << '\n');
-}
 
-/// enterIntvAtEnd - Enter openli at the end of MBB.
-/// PhiMBB is a successor inside openli where a PHI value is created.
-/// Currently, all entries must share the same PhiMBB.
-void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
-  assert(openli_ && "openIntv not called before enterIntvAtEnd");
-
-  SlotIndex EndA = lis_.getMBBEndIdx(&A);
-  VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
-  if (!CurVNIA) {
-    DEBUG(dbgs() << "    enterIntvAtEnd, curli not live out of BB#"
-                 << A.getNumber() << ".\n");
-    return;
-  }
+  // Create the open interval.
+  OpenIdx = Edit.size();
+  Edit.create(MRI, LIS, VRM);
+  LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+  LIMappers[OpenIdx].reset(Edit.get(OpenIdx));
+}
 
-  // Add a phi kill value and live range out of A.
-  VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
-  openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
-
-  // FIXME: If this is the only entry edge, we don't need the extra PHI value.
-  // FIXME: If there are multiple entry blocks (so not a loop), we need proper
-  // SSA update.
-
-  // Now look at the start of B.
-  SlotIndex StartB = lis_.getMBBStartIdx(&B);
-  SlotIndex EndB = lis_.getMBBEndIdx(&B);
-  const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
-  if (!CurB) {
-    DEBUG(dbgs() << "    enterIntvAtEnd: curli not live in to BB#"
-                 << B.getNumber() << ".\n");
-    return;
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvBefore");
+  DEBUG(dbgs() << "    enterIntvBefore " << Idx);
+  Idx = Idx.getBaseIndex();
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
   }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvBefore called with invalid index");
 
-  VNInfo *VNIB = openli_->getVNInfoAt(StartB);
-  if (!VNIB) {
-    // Create a phi value.
-    VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
-                                 lis_.getVNInfoAllocator());
-    VNIB->setIsPHIDef(true);
-    VNInfo *&mapVNI = valueMap_[CurB->valno];
-    if (mapVNI) {
-      // Multiple copies - must create PHI value.
-      abort();
-    } else {
-      // This is the first copy of dupLR. Mark the mapping.
-      mapVNI = VNIB;
-    }
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
+}
 
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+  SlotIndex End = LIS.getMBBEndIdx(&MBB);
+  SlotIndex Last = End.getPrevSlot();
+  DEBUG(dbgs() << "    enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return End;
   }
-
-  DEBUG(dbgs() << "    enterIntvAtEnd: " << *openli_ << '\n');
+  DEBUG(dbgs() << ": valno " << ParentVNI->id);
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+                              LIS.getLastSplitPoint(Edit.getParent(), &MBB));
+  RegAssign.insert(VNI->def, End, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
 }
 
-/// useIntv - indicate that all instructions in MBB should use openli.
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
 void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
-  useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+  useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
 }
 
 void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
-  assert(openli_ && "openIntv not called before useIntv");
+  assert(OpenIdx && "openIntv not called before useIntv");
+  DEBUG(dbgs() << "    useIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
+}
 
-  // Map the curli values from the interval into openli_
-  LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
-  LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+  DEBUG(dbgs() << "    leaveIntvAfter " << Idx);
 
-  if (I != B) {
-    --I;
-    // I begins before Start, but overlaps.
-    if (I->end > Start)
-      openli_->addRange(LiveRange(Start, std::min(End, I->end),
-                        mapValue(I->valno)));
-    ++I;
+  // The interval must be live beyond the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
   }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
 
-  // The remaining ranges begin after Start.
-  for (;I != E && I->start < End; ++I)
-    openli_->addRange(LiveRange(I->start, std::min(End, I->end),
-                                mapValue(I->valno)));
-  DEBUG(dbgs() << "    use [" << Start << ';' << End << "): " << *openli_
-               << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
 }
 
-/// leaveIntvAfter - Leave openli after the instruction at Idx.
-void SplitEditor::leaveIntvAfter(SlotIndex Idx) {
-  assert(openli_ && "openIntv not called before leaveIntvAfter");
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+  DEBUG(dbgs() << "    leaveIntvBefore " << Idx);
 
-  const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex());
-  if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) {
-    DEBUG(dbgs() << "    leaveIntvAfter " << Idx << ": not live\n");
-    return;
+  // The interval must be live into the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
   }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
 
-  // Was this value of curli live through openli?
-  if (!openli_->liveAt(CurLR->valno->def)) {
-    DEBUG(dbgs() << "    leaveIntvAfter " << Idx << ": using external value\n");
-    liveThrough_ = true;
-    return;
-  }
-
-  // We are going to insert a back copy, so we must have a dupli_.
-  LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex());
-  assert(DupLR && "dupli not live into black, but curli is?");
-
-  // Insert the COPY instruction.
-  MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx);
-  MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(),
-                             tii_.get(TargetOpcode::COPY), dupli_->reg)
-                       .addReg(openli_->reg);
-  SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-  openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx,
-                    mapValue(CurLR->valno)));
-  DupLR->valno->def = CopyIdx;
-  DEBUG(dbgs() << "    leaveIntvAfter " << Idx << ": " << *openli_ << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
 }
 
-/// leaveIntvAtTop - Leave the interval at the top of MBB.
-/// Currently, only one value can leave the interval.
-void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
-  assert(openli_ && "openIntv not called before leaveIntvAtTop");
-
-  SlotIndex Start = lis_.getMBBStartIdx(&MBB);
-  const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
-
-  // Is curli even live-in to MBB?
-  if (!CurLR) {
-    DEBUG(dbgs() << "    leaveIntvAtTop at " << Start << ": not live\n");
-    return;
-  }
-
-  // Is curli defined by PHI at the beginning of MBB?
-  bool isPHIDef = CurLR->valno->isPHIDef() &&
-                  CurLR->valno->def.getBaseIndex() == Start;
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+  SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+  DEBUG(dbgs() << "    leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
 
-  // If MBB is using a value of curli that was defined outside the openli range,
-  // we don't want to copy it back here.
-  if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
-    DEBUG(dbgs() << "    leaveIntvAtTop at " << Start
-                 << ": using external value\n");
-    liveThrough_ = true;
-    return;
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Start;
   }
 
-  // We are going to insert a back copy, so we must have a dupli_.
-  LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
-  assert(DupLR && "dupli not live into black, but curli is?");
-
-  // Insert the COPY instruction.
-  MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
-                             tii_.get(TargetOpcode::COPY), dupli_->reg)
-                       .addReg(openli_->reg);
-  SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-
-  // Adjust dupli and openli values.
-  if (isPHIDef) {
-    // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
-    // and shift the dupli def down to the COPY.
-    VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
-                                        lis_.getVNInfoAllocator());
-    VNI->setIsPHIDef(true);
-    openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
-    dupli_->removeRange(Start, Idx);
-    DupLR->valno->def = Idx;
-    DupLR->valno->setIsPHIDef(false);
-  } else {
-    // The dupli value was defined somewhere inside the openli range.
-    DEBUG(dbgs() << "    leaveIntvAtTop source value defined at "
-                 << DupLR->valno->def << "\n");
-    // FIXME: We may not need a PHI here if all predecessors have the same
-    // value.
-    VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
-                                        lis_.getVNInfoAllocator());
-    VNI->setIsPHIDef(true);
-    openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
-    // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
-
-    // closeIntv is going to remove the superfluous live ranges.
-    DupLR->valno->def = Idx;
-    DupLR->valno->setIsPHIDef(false);
-  }
+  VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+                              MBB.SkipPHIsAndLabels(MBB.begin()));
+  RegAssign.insert(Start, VNI->def, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
+}
 
-  DEBUG(dbgs() << "    leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+  assert(OpenIdx && "openIntv not called before overlapIntv");
+  assert(Edit.getParent().getVNInfoAt(Start) ==
+         Edit.getParent().getVNInfoAt(End.getPrevSlot()) &&
+         "Parent changes value in extended range");
+  assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*");
+  assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+         "Range cannot span basic blocks");
+
+  // Treat this as useIntv() for now. The complement interval will be extended
+  // as needed by mapValue().
+  DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
 }
 
 /// closeIntv - Indicate that we are done editing the currently open
 /// LiveInterval, and ranges can be trimmed.
 void SplitEditor::closeIntv() {
-  assert(openli_ && "openIntv not called before closeIntv");
-
-  DEBUG(dbgs() << "    closeIntv cleaning up\n");
-  DEBUG(dbgs() << "    open " << *openli_ << '\n');
-
-  if (liveThrough_) {
-    DEBUG(dbgs() << "    value live through region, leaving dupli as is.\n");
-  } else {
-    // live out with copies inserted, or killed by region. Either way we need to
-    // remove the overlapping region from dupli.
-    getDupLI();
-    for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
-         I != E; ++I) {
-      dupli_->removeRange(I->start, I->end);
-    }
-    // FIXME: A block branching to the entry block may also branch elsewhere
-    // curli is live. We need both openli and curli to be live in that case.
-    DEBUG(dbgs() << "    dup2 " << *dupli_ << '\n');
-  }
-  openli_ = 0;
-  valueMap_.clear();
+  assert(OpenIdx && "openIntv not called before closeIntv");
+  OpenIdx = 0;
 }
 
-/// rewrite - after all the new live ranges have been created, rewrite
-/// instructions using curli to use the new intervals.
-void SplitEditor::rewrite() {
-  assert(!openli_ && "Previous LI not closed before rewrite");
-  const LiveInterval *curli = sa_.getCurLI();
-  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
-       RE = mri_.reg_end(); RI != RE;) {
+/// rewriteAssigned - Rewrite all uses of Edit.getReg().
+void SplitEditor::rewriteAssigned() {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()),
+       RE = MRI.reg_end(); RI != RE;) {
     MachineOperand &MO = RI.getOperand();
     MachineInstr *MI = MO.getParent();
     ++RI;
+    // LiveDebugVariables should have handled all DBG_VALUE instructions.
     if (MI->isDebugValue()) {
       DEBUG(dbgs() << "Zapping " << *MI);
-      // FIXME: We can do much better with debug values.
       MO.setReg(0);
       continue;
     }
-    SlotIndex Idx = lis_.getInstructionIndex(MI);
-    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
-    LiveInterval *LI = dupli_;
-    for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
-      LiveInterval *testli = intervals_[i];
-      if (testli->liveAt(Idx)) {
-        LI = testli;
-        break;
-      }
-    }
-    if (LI) {
-      MO.setReg(LI->reg);
-      sa_.removeUse(MI);
-      DEBUG(dbgs() << "  rewrite " << Idx << '\t' << *MI);
-    }
-  }
 
-  // dupli_ goes in last, after rewriting.
-  if (dupli_) {
-    if (dupli_->empty()) {
-      DEBUG(dbgs() << "  dupli became empty?\n");
-      lis_.removeInterval(dupli_->reg);
-      dupli_ = 0;
-    } else {
-      dupli_->RenumberValues(lis_);
-      intervals_.push_back(dupli_);
+    // <undef> operands don't really read the register, so just assign them to
+    // the complement.
+    if (MO.isUse() && MO.isUndef()) {
+      MO.setReg(Edit.get(0)->reg);
+      continue;
     }
+
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+    // Rewrite to the mapped register at Idx.
+    unsigned RegIdx = RegAssign.lookup(Idx);
+    MO.setReg(Edit.get(RegIdx)->reg);
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':' << RegIdx << '\t' << *MI);
+
+    // Extend liveness to Idx.
+    const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+    LIMappers[RegIdx].mapValue(ParentVNI, Idx);
   }
+}
 
-  // Calculate spill weight and allocation hints for new intervals.
-  VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
-  for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
-    LiveInterval &li = *intervals_[i];
-    vrai.CalculateRegClass(li.reg);
-    vrai.CalculateWeightAndHint(li);
-    DEBUG(dbgs() << "  new interval " << mri_.getRegClass(li.reg)->getName()
-                 << ":" << li << '\n');
+/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping.
+void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+                                    const ConnectedVNInfoEqClasses &ConEq) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    if (MO.isUse() && MO.isUndef())
+      continue;
+    // DBG_VALUE instructions should have been eliminated earlier.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':');
+    const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx);
+    assert(VNI && "Interval not live at use.");
+    MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg);
+    DEBUG(dbgs() << VNI->id << '\t' << *MI);
   }
 }
 
+void SplitEditor::finish() {
+  assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
 
-//===----------------------------------------------------------------------===//
-//                               Loop Splitting
-//===----------------------------------------------------------------------===//
+  // At this point, the live intervals in Edit contain VNInfos corresponding to
+  // the inserted copies.
 
-bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
-  SplitAnalysis::LoopBlocks Blocks;
-  sa_.getLoopBlocks(Loop, Blocks);
+  // Add the original defs from the parent interval.
+  for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+         E = Edit.getParent().vni_end(); I != E; ++I) {
+    const VNInfo *ParentVNI = *I;
+    if (ParentVNI->isUnused())
+      continue;
+    LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)];
+    VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def);
+    LIM.getLI()->addRange(LiveRange(ParentVNI->def,
+                                    ParentVNI->def.getNextSlot(), VNI));
+    // Mark all values as complex to force liveness computation.
+    // This should really only be necessary for remat victims, but we are lazy.
+    LIM.markComplexMapped(ParentVNI);
+  }
 
-  // Break critical edges as needed.
-  SplitAnalysis::BlockPtrSet CriticalExits;
-  sa_.getCriticalExits(Blocks, CriticalExits);
-  assert(CriticalExits.empty() && "Cannot break critical exits yet");
+#ifndef NDEBUG
+  // Every new interval must have a def by now, otherwise the split is bogus.
+  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+    assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
+#endif
+
+  // FIXME: Don't recompute the liveness of all values, infer it from the
+  // overlaps between the parent live interval and RegAssign.
+  // The mapValue algorithm is only necessary when:
+  // - The parent value maps to multiple defs, and new phis are needed, or
+  // - The value has been rematerialized before some uses, and we want to
+  //   minimize the live range so it only reaches the remaining uses.
+  // All other values have simple liveness that can be computed from RegAssign
+  // and the parent live interval.
+
+  // Extend live ranges to be live-out for successor PHI values.
+  for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+       E = Edit.getParent().vni_end(); I != E; ++I) {
+    const VNInfo *PHIVNI = *I;
+    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+    LiveIntervalMap &LIM = LIMappers[RegIdx];
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+    DEBUG(dbgs() << "  map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def
+                 << " -> " << RegIdx << '\n');
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+      DEBUG(dbgs() << "    pred BB#" << (*PI)->getNumber() << '@' << End);
+      // The predecessor may not have a live-out value. That is OK, like an
+      // undef PHI operand.
+      if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) {
+        DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n");
+        assert(RegAssign.lookup(End) == RegIdx &&
+               "Different register assignment in phi predecessor");
+        LIM.mapValue(VNI, End);
+      }
+      else
+        DEBUG(dbgs() << " is not live-out\n");
+    }
+    DEBUG(dbgs() << "    " << *LIM.getLI() << '\n');
+  }
 
-  // Create new live interval for the loop.
-  openIntv();
+  // Rewrite instructions.
+  rewriteAssigned();
 
-  // Insert copies in the predecessors.
-  for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
-       E = Blocks.Preds.end(); I != E; ++I) {
-    MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
-    enterIntvAtEnd(MBB, *Loop->getHeader());
-  }
+  // FIXME: Delete defs that were rematted everywhere.
 
-  // Switch all loop blocks.
-  for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
-       E = Blocks.Loop.end(); I != E; ++I)
-     useIntv(**I);
+  // Get rid of unused values and set phi-kill flags.
+  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+    (*I)->RenumberValues(LIS);
 
-  // Insert back copies in the exit blocks.
-  for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
-       E = Blocks.Exits.end(); I != E; ++I) {
-    MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
-    leaveIntvAtTop(MBB);
+  // Now check if any registers were separated into multiple components.
+  ConnectedVNInfoEqClasses ConEQ(LIS);
+  for (unsigned i = 0, e = Edit.size(); i != e; ++i) {
+    // Don't use iterators, they are invalidated by create() below.
+    LiveInterval *li = Edit.get(i);
+    unsigned NumComp = ConEQ.Classify(li);
+    if (NumComp <= 1)
+      continue;
+    DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
+    SmallVector<LiveInterval*, 8> dups;
+    dups.push_back(li);
+    for (unsigned i = 1; i != NumComp; ++i)
+      dups.push_back(&Edit.create(MRI, LIS, VRM));
+    rewriteComponents(dups, ConEQ);
+    ConEQ.Distribute(&dups[0]);
   }
 
-  // Done.
-  closeIntv();
-  rewrite();
-  return dupli_;
+  // Calculate spill weight and allocation hints for new intervals.
+  VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops);
+  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){
+    LiveInterval &li = **I;
+    vrai.CalculateRegClass(li.reg);
+    vrai.CalculateWeightAndHint(li);
+    DEBUG(dbgs() << "  new interval " << MRI.getRegClass(li.reg)->getName()
+                 << ":" << li << '\n');
+  }
 }
 
 
@@ -979,45 +914,50 @@ bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
 //                            Single Block Splitting
 //===----------------------------------------------------------------------===//
 
-/// splitSingleBlocks - Split curli into a separate live interval inside each
-/// basic block in Blocks. Return true if curli has been completely replaced,
-/// false if curli is still intact, and needs to be spilled or split further.
-bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
-  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
-  // Determine the first and last instruction using curli in each block.
-  typedef std::pair<SlotIndex,SlotIndex> IndexPair;
-  typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap;
-  IndexPairMap MBBRange;
-  for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
-       E = sa_.usingInstrs_.end(); I != E; ++I) {
-    const MachineBasicBlock *MBB = (*I)->getParent();
-    if (!Blocks.count(MBB))
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+  // If CurLI is local to one block, there is no point to splitting it.
+  if (LiveBlocks.size() <= 1)
+    return false;
+  // Add blocks with multiple uses.
+  for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) {
+    const BlockInfo &BI = LiveBlocks[i];
+    if (!BI.Uses)
       continue;
-    SlotIndex Idx = lis_.getInstructionIndex(*I);
-    DEBUG(dbgs() << "  BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I);
-    IndexPair &IP = MBBRange[MBB];
-    if (!IP.first.isValid() || Idx < IP.first)
-      IP.first = Idx;
-    if (!IP.second.isValid() || Idx > IP.second)
-      IP.second = Idx;
+    unsigned Instrs = UsingBlocks.lookup(BI.MBB);
+    if (Instrs <= 1)
+      continue;
+    if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough)
+      continue;
+    Blocks.insert(BI.MBB);
   }
+  return !Blocks.empty();
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
 
-  // Create a new interval for each block.
-  for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(),
-       E = Blocks.end(); I != E; ++I) {
-    IndexPair &IP = MBBRange[*I];
-    DEBUG(dbgs() << "  splitting for BB#" << (*I)->getNumber() << ": ["
-                 << IP.first << ';' << IP.second << ")\n");
-    assert(IP.first.isValid() && IP.second.isValid());
+  for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) {
+    const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i];
+    if (!BI.Uses || !Blocks.count(BI.MBB))
+      continue;
 
     openIntv();
-    enterIntvBefore(IP.first);
-    useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex());
-    leaveIntvAfter(IP.second);
+    SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
+    if (BI.LastUse < BI.LastSplitPoint) {
+      useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+    } else {
+      // THe last use os after tha last valid split point.
+      SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
+      useIntv(SegStart, SegStop);
+      overlapIntv(SegStop, BI.LastUse);
+    }
     closeIntv();
   }
-  rewrite();
-  return dupli_;
+  finish();
 }
 
 
@@ -1025,31 +965,29 @@ bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
 //                            Sub Block Splitting
 //===----------------------------------------------------------------------===//
 
-/// getBlockForInsideSplit - If curli is contained inside a single basic block,
+/// getBlockForInsideSplit - If CurLI is contained inside a single basic block,
 /// and it wou pay to subdivide the interval inside that block, return it.
 /// Otherwise return NULL. The returned block can be passed to
 /// SplitEditor::splitInsideBlock.
 const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
   // The interval must be exclusive to one block.
-  if (usingBlocks_.size() != 1)
+  if (UsingBlocks.size() != 1)
     return 0;
   // Don't to this for less than 4 instructions. We want to be sure that
   // splitting actually reduces the instruction count per interval.
-  if (usingInstrs_.size() < 4)
+  if (UsingInstrs.size() < 4)
     return 0;
-  return usingBlocks_.begin()->first;
+  return UsingBlocks.begin()->first;
 }
 
-/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
-/// true if curli has been completely replaced, false if curli is still
-/// intact, and needs to be spilled or split further.
-bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
+/// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
   SmallVector<SlotIndex, 32> Uses;
-  Uses.reserve(sa_.usingInstrs_.size());
-  for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
-       E = sa_.usingInstrs_.end(); I != E; ++I)
+  Uses.reserve(SA.UsingInstrs.size());
+  for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(),
+       E = SA.UsingInstrs.end(); I != E; ++I)
     if ((*I)->getParent() == MBB)
-      Uses.push_back(lis_.getInstructionIndex(*I));
+      Uses.push_back(LIS.getInstructionIndex(*I));
   DEBUG(dbgs() << "  splitInsideBlock BB#" << MBB->getNumber() << " for "
                << Uses.size() << " instructions.\n");
   assert(Uses.size() >= 3 && "Need at least 3 instructions");
@@ -1077,21 +1015,16 @@ bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
   // First interval before the gap. Don't create single-instr intervals.
   if (bestPos > 1) {
     openIntv();
-    enterIntvBefore(Uses.front());
-    useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex());
-    leaveIntvAfter(Uses[bestPos-1]);
+    useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1]));
     closeIntv();
   }
 
   // Second interval after the gap.
   if (bestPos < Uses.size()-1) {
     openIntv();
-    enterIntvBefore(Uses[bestPos]);
-    useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex());
-    leaveIntvAfter(Uses.back());
+    useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back()));
     closeIntv();
   }
 
-  rewrite();
-  return dupli_;
+  finish();
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index ddef7461dc3d..5c34afd1c819 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,125 +12,132 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 
 namespace llvm {
 
+class ConnectedVNInfoEqClasses;
 class LiveInterval;
 class LiveIntervals;
+class LiveRangeEdit;
 class MachineInstr;
-class MachineLoop;
 class MachineLoopInfo;
 class MachineRegisterInfo;
 class TargetInstrInfo;
+class TargetRegisterInfo;
 class VirtRegMap;
 class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
 
 /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
 /// opportunities.
 class SplitAnalysis {
 public:
-  const MachineFunction &mf_;
-  const LiveIntervals &lis_;
-  const MachineLoopInfo &loops_;
-  const TargetInstrInfo &tii_;
+  const MachineFunction &MF;
+  const VirtRegMap &VRM;
+  const LiveIntervals &LIS;
+  const MachineLoopInfo &Loops;
+  const TargetInstrInfo &TII;
 
   // Instructions using the the current register.
   typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
-  InstrPtrSet usingInstrs_;
+  InstrPtrSet UsingInstrs;
+
+  // Sorted slot indexes of using instructions.
+  SmallVector<SlotIndex, 8> UseSlots;
 
-  // The number of instructions using curli in each basic block.
+  // The number of instructions using CurLI in each basic block.
   typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
-  BlockCountMap usingBlocks_;
+  BlockCountMap UsingBlocks;
+
+  /// Additional information about basic blocks where the current variable is
+  /// live. Such a block will look like one of these templates:
+  ///
+  ///  1. |   o---x   | Internal to block. Variable is only live in this block.
+  ///  2. |---x       | Live-in, kill.
+  ///  3. |       o---| Def, live-out.
+  ///  4. |---x   o---| Live-in, kill, def, live-out.
+  ///  5. |---o---o---| Live-through with uses or defs.
+  ///  6. |-----------| Live-through without uses. Transparent.
+  ///
+  struct BlockInfo {
+    MachineBasicBlock *MBB;
+    SlotIndex FirstUse;   ///< First instr using current reg.
+    SlotIndex LastUse;    ///< Last instr using current reg.
+    SlotIndex Kill;       ///< Interval end point inside block.
+    SlotIndex Def;        ///< Interval start point inside block.
+    /// Last possible point for splitting live ranges.
+    SlotIndex LastSplitPoint;
+    bool Uses;            ///< Current reg has uses or defs in block.
+    bool LiveThrough;     ///< Live in whole block (Templ 5. or 6. above).
+    bool LiveIn;          ///< Current reg is live in.
+    bool LiveOut;         ///< Current reg is live out.
+
+    // Per-interference pattern scratch data.
+    bool OverlapEntry;    ///< Interference overlaps entering interval.
+    bool OverlapExit;     ///< Interference overlaps exiting interval.
+  };
 
-  // The number of basic block using curli in each loop.
-  typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap;
-  LoopCountMap usingLoops_;
+  /// Basic blocks where var is live. This array is parallel to
+  /// SpillConstraints.
+  SmallVector<BlockInfo, 8> LiveBlocks;
 
 private:
   // Current live interval.
-  const LiveInterval *curli_;
+  const LiveInterval *CurLI;
 
-  // Sumarize statistics by counting instructions using curli_.
+  // Sumarize statistics by counting instructions using CurLI.
   void analyzeUses();
 
+  /// calcLiveBlockInfo - Compute per-block information about CurLI.
+  void calcLiveBlockInfo();
+
   /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
   /// analyzed.
   bool canAnalyzeBranch(const MachineBasicBlock *MBB);
 
 public:
-  SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+  SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
                 const MachineLoopInfo &mli);
 
-  /// analyze - set curli to the specified interval, and analyze how it may be
+  /// analyze - set CurLI to the specified interval, and analyze how it may be
   /// split.
   void analyze(const LiveInterval *li);
 
-  /// removeUse - Update statistics by noting that mi no longer uses curli.
-  void removeUse(const MachineInstr *mi);
-
-  const LiveInterval *getCurLI() { return curli_; }
-
   /// clear - clear all data structures so SplitAnalysis is ready to analyze a
   /// new interval.
   void clear();
 
-  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
-  typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
-
-  // Sets of basic blocks surrounding a machine loop.
-  struct LoopBlocks {
-    BlockPtrSet Loop;  // Blocks in the loop.
-    BlockPtrSet Preds; // Loop predecessor blocks.
-    BlockPtrSet Exits; // Loop exit blocks.
-
-    void clear() {
-      Loop.clear();
-      Preds.clear();
-      Exits.clear();
-    }
-  };
-
-  // Calculate the block sets surrounding the loop.
-  void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
-
-  /// LoopPeripheralUse - how is a variable used in and around a loop?
-  /// Peripheral blocks are the loop predecessors and exit blocks.
-  enum LoopPeripheralUse {
-    ContainedInLoop,  // All uses are inside the loop.
-    SinglePeripheral, // At most one instruction per peripheral block.
-    MultiPeripheral,  // Multiple instructions in some peripheral blocks.
-    OutsideLoop       // Uses outside loop periphery.
-  };
-
-  /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-  /// and around the Loop.
-  LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
+  /// getParent - Return the last analyzed interval.
+  const LiveInterval &getParent() const { return *CurLI; }
 
-  /// getCriticalExits - It may be necessary to partially break critical edges
-  /// leaving the loop if an exit block has phi uses of curli. Collect the exit
-  /// blocks that need special treatment into CriticalExits.
-  void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
+  /// hasUses - Return true if MBB has any uses of CurLI.
+  bool hasUses(const MachineBasicBlock *MBB) const {
+    return UsingBlocks.lookup(MBB);
+  }
 
-  /// canSplitCriticalExits - Return true if it is possible to insert new exit
-  /// blocks before the blocks in CriticalExits.
-  bool canSplitCriticalExits(const LoopBlocks &Blocks,
-                             BlockPtrSet &CriticalExits);
+  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
 
-  /// getBestSplitLoop - Return the loop where curli may best be split to a
-  /// separate register, or NULL.
-  const MachineLoop *getBestSplitLoop();
+  // Print a set of blocks with use counts.
+  void print(const BlockPtrSet&, raw_ostream&) const;
 
   /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
-  /// having curli split to a new live interval. Return true if Blocks can be
+  /// having CurLI split to a new live interval. Return true if Blocks can be
   /// passed to SplitEditor::splitSingleBlocks.
   bool getMultiUseBlocks(BlockPtrSet &Blocks);
 
-  /// getBlockForInsideSplit - If curli is contained inside a single basic block,
-  /// and it wou pay to subdivide the interval inside that block, return it.
-  /// Otherwise return NULL. The returned block can be passed to
+  /// getBlockForInsideSplit - If CurLI is contained inside a single basic
+  /// block, and it would pay to subdivide the interval inside that block,
+  /// return it. Otherwise return NULL. The returned block can be passed to
   /// SplitEditor::splitInsideBlock.
   const MachineBasicBlock *getBlockForInsideSplit();
 };
@@ -140,58 +147,102 @@ public:
 /// interval that is a subset. Insert phi-def values as needed. This class is
 /// used by SplitEditor to create new smaller LiveIntervals.
 ///
-/// parentli_ is the larger interval, li_ is the subset interval. Every value
-/// in li_ corresponds to exactly one value in parentli_, and the live range
-/// of the value is contained within the live range of the parentli_ value.
-/// Values in parentli_ may map to any number of openli_ values, including 0.
+/// ParentLI is the larger interval, LI is the subset interval. Every value
+/// in LI corresponds to exactly one value in ParentLI, and the live range
+/// of the value is contained within the live range of the ParentLI value.
+/// Values in ParentLI may map to any number of OpenLI values, including 0.
 class LiveIntervalMap {
-  LiveIntervals &lis_;
+  LiveIntervals &LIS;
+  MachineDominatorTree &MDT;
 
   // The parent interval is never changed.
-  const LiveInterval &parentli_;
+  const LiveInterval &ParentLI;
 
-  // The child interval's values are fully contained inside parentli_ values.
-  LiveInterval &li_;
+  // The child interval's values are fully contained inside ParentLI values.
+  LiveInterval *LI;
 
   typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
 
-  // Map parentli_ values to simple values in li_ that are defined at the same
-  // SlotIndex, or NULL for parentli_ values that have complex li_ defs.
+  // Map ParentLI values to simple values in LI that are defined at the same
+  // SlotIndex, or NULL for ParentLI values that have complex LI defs.
   // Note there is a difference between values mapping to NULL (complex), and
   // values not present (unknown/unmapped).
-  ValueMap valueMap_;
-
-  // extendTo - Find the last li_ value defined in MBB at or before Idx. The
-  // parentli_ is assumed to be live at Idx. Extend the live range to Idx.
-  // Return the found VNInfo, or NULL.
-  VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx);
-
-  // addSimpleRange - Add a simple range from parentli_ to li_.
-  // ParentVNI must be live in the [Start;End) interval.
-  void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+  ValueMap Values;
+
+  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+  typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap;
+
+  // LiveOutCache - Map each basic block where LI is live out to the live-out
+  // value and its defining block. One of these conditions shall be true:
+  //
+  //  1. !LiveOutCache.count(MBB)
+  //  2. LiveOutCache[MBB].second.getNode() == MBB
+  //  3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+  //
+  // This is only a cache, the values can be computed as:
+  //
+  //  VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+  //  Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+  //
+  // The cache is also used as a visiteed set by mapValue().
+  LiveOutMap LiveOutCache;
+
+  // Dump the live-out cache to dbgs().
+  void dumpCache();
 
 public:
   LiveIntervalMap(LiveIntervals &lis,
-                  const LiveInterval &parentli,
-                  LiveInterval &li)
-    : lis_(lis), parentli_(parentli), li_(li) {}
+                  MachineDominatorTree &mdt,
+                  const LiveInterval &parentli)
+    : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {}
+
+  /// reset - clear all data structures and start a new live interval.
+  void reset(LiveInterval *);
+
+  /// getLI - return the current live interval.
+  LiveInterval *getLI() const { return LI; }
 
-  /// defValue - define a value in li_ from the parentli_ value VNI and Idx.
+  /// defValue - define a value in LI from the ParentLI value VNI and Idx.
   /// Idx does not have to be ParentVNI->def, but it must be contained within
-  /// ParentVNI's live range in parentli_.
-  /// Return the new li_ value.
+  /// ParentVNI's live range in ParentLI.
+  /// Return the new LI value.
   VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
 
-  /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is
+  /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is
   /// assumed that ParentVNI is live at Idx.
   /// If ParentVNI has not been defined by defValue, it is assumed that
   /// ParentVNI->def dominates Idx.
   /// If ParentVNI has been defined by defValue one or more times, a value that
   /// dominates Idx will be returned. This may require creating extra phi-def
-  /// values and adding live ranges to li_.
-  VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx);
+  /// values and adding live ranges to LI.
+  /// If simple is not NULL, *simple will indicate if ParentVNI is a simply
+  /// mapped value.
+  VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0);
+
+  // extendTo - Find the last LI value defined in MBB at or before Idx. The
+  // parentli is assumed to be live at Idx. Extend the live range to include
+  // Idx. Return the found VNInfo, or NULL.
+  VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx);
+
+  /// isMapped - Return true is ParentVNI is a known mapped value. It may be a
+  /// simple 1-1 mapping or a complex mapping to later defs.
+  bool isMapped(const VNInfo *ParentVNI) const {
+    return Values.count(ParentVNI);
+  }
+
+  /// isComplexMapped - Return true if ParentVNI has received new definitions
+  /// with defValue.
+  bool isComplexMapped(const VNInfo *ParentVNI) const;
+
+  /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the
+  /// number of definitions.
+  void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; }
+
+  // addSimpleRange - Add a simple range from ParentLI to LI.
+  // ParentVNI must be live in the [Start;End) interval.
+  void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
 
-  /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+  /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
   /// All needed values whose def is not inside [Start;End) must be defined
   /// beforehand so mapValue will work.
   void addRange(SlotIndex Start, SlotIndex End);
@@ -207,115 +258,129 @@ public:
 /// - Mark the ranges where the new interval is used with useIntv* 
 /// - Mark the places where the interval is exited with exitIntv*.
 /// - Finish the current interval with closeIntv and repeat from 2.
-/// - Rewrite instructions with rewrite().
+/// - Rewrite instructions with finish().
 ///
 class SplitEditor {
-  SplitAnalysis &sa_;
-  LiveIntervals &lis_;
-  VirtRegMap &vrm_;
-  MachineRegisterInfo &mri_;
-  const TargetInstrInfo &tii_;
-
-  /// curli_ - The immutable interval we are currently splitting.
-  const LiveInterval *const curli_;
-
-  /// dupli_ - Created as a copy of curli_, ranges are carved out as new
-  /// intervals get added through openIntv / closeIntv. This is used to avoid
-  /// editing curli_.
-  LiveInterval *dupli_;
-
-  /// Currently open LiveInterval.
-  LiveInterval *openli_;
-
-  /// createInterval - Create a new virtual register and LiveInterval with same
-  /// register class and spill slot as curli.
-  LiveInterval *createInterval();
-
-  /// getDupLI - Ensure dupli is created and return it.
-  LiveInterval *getDupLI();
-
-  /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1
-  /// mappings, and do not include values created by inserted copies.
-  DenseMap<const VNInfo*, VNInfo*> valueMap_;
-
-  /// mapValue - Return the openIntv value that corresponds to the given curli
-  /// value.
-  VNInfo *mapValue(const VNInfo *curliVNI);
-
-  /// A dupli value is live through openIntv.
-  bool liveThrough_;
-
-  /// All the new intervals created for this split are added to intervals_.
-  SmallVectorImpl<LiveInterval*> &intervals_;
-
-  /// The index into intervals_ of the first interval we added. There may be
-  /// others from before we got it.
-  unsigned firstInterval;
-
-  /// Insert a COPY instruction curli -> li. Allocate a new value from li
-  /// defined by the COPY
-  VNInfo *insertCopy(LiveInterval &LI,
-                     MachineBasicBlock &MBB,
-                     MachineBasicBlock::iterator I);
+  SplitAnalysis &SA;
+  LiveIntervals &LIS;
+  VirtRegMap &VRM;
+  MachineRegisterInfo &MRI;
+  MachineDominatorTree &MDT;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+
+  /// Edit - The current parent register and new intervals created.
+  LiveRangeEdit &Edit;
+
+  /// Index into Edit of the currently open interval.
+  /// The index 0 is used for the complement, so the first interval started by
+  /// openIntv will be 1.
+  unsigned OpenIdx;
+
+  typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+  /// Allocator for the interval map. This will eventually be shared with
+  /// SlotIndexes and LiveIntervals.
+  RegAssignMap::Allocator Allocator;
+
+  /// RegAssign - Map of the assigned register indexes.
+  /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+  /// Idx.
+  RegAssignMap RegAssign;
+
+  /// LIMappers - One LiveIntervalMap or each interval in Edit.
+  SmallVector<LiveIntervalMap, 4> LIMappers;
+
+  /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+  /// rematerialization or a COPY from parent. Return the new value.
+  VNInfo *defFromParent(unsigned RegIdx,
+                        VNInfo *ParentVNI,
+                        SlotIndex UseIdx,
+                        MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I);
+
+  /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+  void rewriteAssigned();
+
+  /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq
+  /// classes in ConEQ.
+  /// This must be done when Intvs[0] is styill live at all uses, before calling
+  /// ConEq.Distribute().
+  void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+                         const ConnectedVNInfoEqClasses &ConEq);
 
 public:
   /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
   /// Newly created intervals will be appended to newIntervals.
   SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
-              SmallVectorImpl<LiveInterval*> &newIntervals);
+              MachineDominatorTree&, LiveRangeEdit&);
 
   /// getAnalysis - Get the corresponding analysis.
-  SplitAnalysis &getAnalysis() { return sa_; }
+  SplitAnalysis &getAnalysis() { return SA; }
 
   /// Create a new virtual register and live interval.
   void openIntv();
 
-  /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
-  /// not live before Idx, a COPY is not inserted.
-  void enterIntvBefore(SlotIndex Idx);
+  /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+  /// If the parent interval is not live before Idx, a COPY is not inserted.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvBefore(SlotIndex Idx);
 
-  /// enterIntvAtEnd - Enter openli at the end of MBB.
-  /// PhiMBB is a successor inside openli where a PHI value is created.
-  /// Currently, all entries must share the same PhiMBB.
-  void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+  /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+  /// Use the open interval from he inserted copy to the MBB end.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
 
-  /// useIntv - indicate that all instructions in MBB should use openli.
+  /// useIntv - indicate that all instructions in MBB should use OpenLI.
   void useIntv(const MachineBasicBlock &MBB);
 
-  /// useIntv - indicate that all instructions in range should use openli.
+  /// useIntv - indicate that all instructions in range should use OpenLI.
   void useIntv(SlotIndex Start, SlotIndex End);
 
-  /// leaveIntvAfter - Leave openli after the instruction at Idx.
-  void leaveIntvAfter(SlotIndex Idx);
+  /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+  /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvBefore(SlotIndex Idx);
 
   /// leaveIntvAtTop - Leave the interval at the top of MBB.
-  /// Currently, only one value can leave the interval.
-  void leaveIntvAtTop(MachineBasicBlock &MBB);
+  /// Add liveness from the MBB top to the copy.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+  /// overlapIntv - Indicate that all instructions in range should use the open
+  /// interval, but also let the complement interval be live.
+  ///
+  /// This doubles the register pressure, but is sometimes required to deal with
+  /// register uses after the last valid split point.
+  ///
+  /// The Start index should be a return value from a leaveIntv* call, and End
+  /// should be in the same basic block. The parent interval must have the same
+  /// value across the range.
+  ///
+  void overlapIntv(SlotIndex Start, SlotIndex End);
 
   /// closeIntv - Indicate that we are done editing the currently open
   /// LiveInterval, and ranges can be trimmed.
   void closeIntv();
 
-  /// rewrite - after all the new live ranges have been created, rewrite
-  /// instructions using curli to use the new intervals.
-  void rewrite();
+  /// finish - after all the new live ranges have been created, compute the
+  /// remaining live range, and rewrite instructions to use the new registers.
+  void finish();
 
-  // ===--- High level methods ---===
+  /// dump - print the current interval maping to dbgs().
+  void dump() const;
 
-  /// splitAroundLoop - Split curli into a separate live interval inside
-  /// the loop. Return true if curli has been completely replaced, false if
-  /// curli is still intact, and needs to be spilled or split further.
-  bool splitAroundLoop(const MachineLoop*);
+  // ===--- High level methods ---===
 
-  /// splitSingleBlocks - Split curli into a separate live interval inside each
-  /// basic block in Blocks. Return true if curli has been completely replaced,
-  /// false if curli is still intact, and needs to be spilled or split further.
-  bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+  /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+  /// basic block in Blocks.
+  void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
 
-  /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
-  /// true if curli has been completely replaced, false if curli is still
-  /// intact, and needs to be spilled or split further.
-  bool splitInsideBlock(const MachineBasicBlock *);
+  /// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+  void splitInsideBlock(const MachineBasicBlock *);
 };
 
 }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
index 38f3b1f4d35e..08aee82b8c5c 100644
--- a/lib/CodeGen/Splitter.cpp
+++ b/lib/CodeGen/Splitter.cpp
@@ -29,8 +29,14 @@
 using namespace llvm;
 
 char LoopSplitter::ID = 0;
-INITIALIZE_PASS(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false);
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
 
 namespace llvm {
 
@@ -140,7 +146,6 @@ namespace llvm {
       VNInfo *newVal = getNewVNI(preHeaderRange->valno);
       newVal->def = copyDefIdx;
       newVal->setCopy(copy);
-      newVal->setIsDefAccurate(true);
       li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
 
       getNewLI()->addRange(LiveRange(copyDefIdx,
@@ -174,13 +179,13 @@ namespace llvm {
         
         // Blow away output range definition.
         outRange->valno->def = ls.lis->getInvalidIndex();
-        outRange->valno->setIsDefAccurate(false);
         li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
 
+        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
         VNInfo *newVal =
-          getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock),
-                                             true),
-                                   0, false, ls.lis->getVNInfoAllocator());
+          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
 
         getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
                                        copyDefIdx, newVal));
@@ -514,8 +519,10 @@ namespace llvm {
       if (!insertRange)
         continue;
 
-      VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader),
-                                       0, false, lis->getVNInfoAllocator());
+      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+             "PHI def index points at actual instruction.");
+      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
       li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
                             lis->getMBBEndIdx(preHeader),
                             newVal));
@@ -612,8 +619,11 @@ namespace llvm {
                          lis->getMBBEndIdx(splitBlock), true);
         }
       } else if (intersects) {
-        VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock),
-                                         0, false, lis->getVNInfoAllocator());
+        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
+        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+                                         lis->getVNInfoAllocator());
         li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
                               lis->getMBBEndIdx(splitBlock),
                               newVal));
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
index a726a7b834fb..9fb1b8b30139 100644
--- a/lib/CodeGen/Splitter.h
+++ b/lib/CodeGen/Splitter.h
@@ -36,7 +36,9 @@ namespace llvm {
   public:
     static char ID;
 
-    LoopSplitter() : MachineFunctionPass(ID) {}
+    LoopSplitter() : MachineFunctionPass(ID) {
+      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 9f51778da756..fcaee4208ba3 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "stack-protector"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Attributes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -45,6 +46,8 @@ namespace {
     Function *F;
     Module *M;
 
+    DominatorTree* DT;
+
     /// InsertStackProtectors - Insert code into the prologue and epilogue of
     /// the function.
     ///
@@ -62,9 +65,17 @@ namespace {
     bool RequiresStackProtector() const;
   public:
     static char ID;             // Pass identification, replacement for typeid.
-    StackProtector() : FunctionPass(ID), TLI(0) {}
+    StackProtector() : FunctionPass(ID), TLI(0) {
+      initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+    }
     StackProtector(const TargetLowering *tli)
-      : FunctionPass(ID), TLI(tli) {}
+      : FunctionPass(ID), TLI(tli) {
+        initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+    }
 
     virtual bool runOnFunction(Function &Fn);
   };
@@ -72,7 +83,7 @@ namespace {
 
 char StackProtector::ID = 0;
 INITIALIZE_PASS(StackProtector, "stack-protector",
-                "Insert stack protectors", false, false);
+                "Insert stack protectors", false, false)
 
 FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
   return new StackProtector(tli);
@@ -81,6 +92,7 @@ FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
 bool StackProtector::runOnFunction(Function &Fn) {
   F = &Fn;
   M = F->getParent();
+  DT = getAnalysisIfAvailable<DominatorTree>();
 
   if (!RequiresStackProtector()) return false;
   
@@ -135,6 +147,7 @@ bool StackProtector::RequiresStackProtector() const {
 ///    value. It calls __stack_chk_fail if they differ.
 bool StackProtector::InsertStackProtectors() {
   BasicBlock *FailBB = 0;       // The basic block to jump to if check fails.
+  BasicBlock *FailBBDom = 0;    // FailBB's dominator.
   AllocaInst *AI = 0;           // Place on stack that stores the stack guard.
   Value *StackGuardVar = 0;  // The stack guard variable.
 
@@ -178,6 +191,8 @@ bool StackProtector::InsertStackProtectors() {
 
       // Create the basic block to jump to when the guard check fails.
       FailBB = CreateFailBB();
+      if (DT)
+        FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0;
     }
 
     // For each block with a return instruction, convert this:
@@ -204,6 +219,10 @@ bool StackProtector::InsertStackProtectors() {
 
     // Split the basic block before the return instruction.
     BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+    if (DT) {
+      DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0);
+      FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB);
+    }
 
     // Remove default branch instruction to the new BB.
     BB->getTerminator()->eraseFromParent();
@@ -223,6 +242,9 @@ bool StackProtector::InsertStackProtectors() {
   // statements in the function.
   if (!FailBB) return false;
 
+  if (DT)
+    DT->addNewBlock(FailBB, FailBBDom);
+
   return true;
 }
 
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 8d57ae95dde2..01f5b5627f4f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,13 @@ namespace {
   public:
     static char ID; // Pass identification
     StackSlotColoring() :
-      MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
+      MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
     StackSlotColoring(bool RegColor) :
-      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
+      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
@@ -145,8 +149,14 @@ namespace {
 
 char StackSlotColoring::ID = 0;
 
-INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
-                "Stack Slot Coloring", false, false);
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
 
 FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
   return new StackSlotColoring(RegColor);
@@ -208,7 +218,7 @@ void StackSlotColoring::InitializeSlots() {
   for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
     LiveInterval &li = i->second;
     DEBUG(li.dump());
-    int FI = li.getStackSlotIndex();
+    int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
     if (MFI->isDeadObjectIndex(FI))
       continue;
     SSIntervals.push_back(&li);
@@ -251,7 +261,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
   DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
-    int SS = li->getStackSlotIndex();
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
     if (!UsedColors[SS] || li->weight < 20)
       // If the weight is < 20, i.e. two references in a loop with depth 1,
       // don't bother with it.
@@ -340,7 +350,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
 
   // Record the assignment.
   Assignments[Color].push_back(li);
-  int FI = li->getStackSlotIndex();
+  int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
   DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
@@ -369,7 +379,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   bool Changed = false;
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
-    int SS = li->getStackSlotIndex();
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
     int NewSS = ColorSlot(li);
     assert(NewSS >= 0 && "Stack coloring failed?");
     SlotMapping[SS] = NewSS;
@@ -382,7 +392,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   DEBUG(dbgs() << "\nSpill slots after coloring:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
-    int SS = li->getStackSlotIndex();
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
     li->weight = SlotWeights[SS];
   }
   // Sort them by new weight.
@@ -636,7 +646,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
   } else {
     SmallVector<MachineInstr*, 4> NewMIs;
     bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
-    Success = Success; // Silence compiler warning.
+    (void)Success; // Silence compiler warning.
     assert(Success && "Failed to unfold!");
     MachineInstr *NewMI = NewMIs[0];
     MBB->insert(MI, NewMI);
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 894dbfa28bac..ec7829ec39fe 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -1,4 +1,4 @@
-//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,1039 +7,823 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass eliminates machine instruction PHI nodes by inserting copy
-// instructions, using an intelligent copy-folding technique based on
-// dominator information.  This is technique is derived from:
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
 // 
 //    Budimlic, et al. Fast copy coalescing and live-range identification.
 //    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
 //    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
 //    PLDI '02. ACM, New York, NY, 25-32.
-//    DOI= http://doi.acm.org/10.1145/512529.512534
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest.  This technique is explained in:
+//
+//   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+//     Quality and Efficiency,
+//   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+//   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+//   CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
-  struct StrongPHIElimination : public MachineFunctionPass {
+  class StrongPHIElimination : public MachineFunctionPass {
+  public:
     static char ID; // Pass identification, replacement for typeid
-    StrongPHIElimination() : MachineFunctionPass(ID) {}
-
-    // Waiting stores, for each MBB, the set of copies that need to
-    // be inserted into that MBB
-    DenseMap<MachineBasicBlock*,
-             std::multimap<unsigned, unsigned> > Waiting;
-    
-    // Stacks holds the renaming stack for each register
-    std::map<unsigned, std::vector<unsigned> > Stacks;
-    
-    // Registers in UsedByAnother are PHI nodes that are themselves
-    // used as operands to another PHI node
-    std::set<unsigned> UsedByAnother;
-    
-    // RenameSets are the is a map from a PHI-defined register
-    // to the input registers to be coalesced along with the 
-    // predecessor block for those input registers.
-    std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
-    
-    // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
-    // eliminating, indexed by the register defined by that phi.
-    std::map<unsigned, unsigned> PhiValueNumber;
-
-    // Store the DFS-in number of each block
-    DenseMap<MachineBasicBlock*, unsigned> preorder;
-    
-    // Store the DFS-out number of each block
-    DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
-
-    bool runOnMachineFunction(MachineFunction &Fn);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<MachineDominatorTree>();
-      AU.addRequired<SlotIndexes>();
-      AU.addPreserved<SlotIndexes>();
-      AU.addRequired<LiveIntervals>();
-      
-      // TODO: Actually make this true.
-      AU.addPreserved<LiveIntervals>();
-      AU.addPreserved<RegisterCoalescer>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-    
-    virtual void releaseMemory() {
-      preorder.clear();
-      maxpreorder.clear();
-      
-      Waiting.clear();
-      Stacks.clear();
-      UsedByAnother.clear();
-      RenameSets.clear();
+    StrongPHIElimination() : MachineFunctionPass(ID) {
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
     }
 
+    virtual void getAnalysisUsage(AnalysisUsage&) const;
+    bool runOnMachineFunction(MachineFunction&);
+
   private:
-    
-    /// DomForestNode - Represents a node in the "dominator forest".  This is
-    /// a forest in which the nodes represent registers and the edges
-    /// represent a dominance relation in the block defining those registers.
-    struct DomForestNode {
-    private:
-      // Store references to our children
-      std::vector<DomForestNode*> children;
-      // The register we represent
-      unsigned reg;
-      
-      // Add another node as our child
-      void addChild(DomForestNode* DFN) { children.push_back(DFN); }
-      
-    public:
-      typedef std::vector<DomForestNode*>::iterator iterator;
-      
-      // Create a DomForestNode by providing the register it represents, and
-      // the node to be its parent.  The virtual root node has register 0
-      // and a null parent.
-      DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
-        if (parent)
-          parent->addChild(this);
-      }
-      
-      ~DomForestNode() {
-        for (iterator I = begin(), E = end(); I != E; ++I)
-          delete *I;
-      }
-      
-      /// getReg - Return the regiser that this node represents
-      inline unsigned getReg() { return reg; }
-      
-      // Provide iterator access to our children
-      inline DomForestNode::iterator begin() { return children.begin(); }
-      inline DomForestNode::iterator end() { return children.end(); }
+    /// This struct represents a single node in the union-find data structure
+    /// representing the variable congruence classes. There is one difference
+    /// from a normal union-find data structure. We steal two bits from the parent
+    /// pointer . One of these bits is used to represent whether the register
+    /// itself has been isolated, and the other is used to represent whether the
+    /// PHI with that register as its destination has been isolated.
+    ///
+    /// Note that this leads to the strange situation where the leader of a
+    /// congruence class may no longer logically be a member, due to being
+    /// isolated.
+    struct Node {
+      enum Flags {
+        kRegisterIsolatedFlag = 1,
+        kPHIIsolatedFlag = 2
+      };
+      Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+      Node *getLeader();
+
+      PointerIntPair<Node*, 2> parent;
+      unsigned value;
+      unsigned rank;
     };
-    
-    void computeDFS(MachineFunction& MF);
-    void processBlock(MachineBasicBlock* MBB);
-    
-    std::vector<DomForestNode*> computeDomForest(
-                           std::map<unsigned, MachineBasicBlock*>& instrs,
-                                                 MachineRegisterInfo& MRI);
-    void processPHIUnion(MachineInstr* Inst,
-                         std::map<unsigned, MachineBasicBlock*>& PHIUnion,
-                         std::vector<StrongPHIElimination::DomForestNode*>& DF,
-                         std::vector<std::pair<unsigned, unsigned> >& locals);
-    void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
-    void InsertCopies(MachineDomTreeNode* MBB,
-                      SmallPtrSet<MachineBasicBlock*, 16>& v);
-    bool mergeLiveIntervals(unsigned primary, unsigned secondary);
-  };
-}
 
-char StrongPHIElimination::ID = 0;
-INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination",
-  "Eliminate PHI nodes for register allocation, intelligently", false, false);
+    /// Add a register in a new congruence class containing only itself.
+    void addReg(unsigned);
 
-char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+    /// Join the congruence classes of two registers. This function is biased
+    /// towards the left argument, i.e. after
+    ///
+    /// addReg(r2);
+    /// unionRegs(r1, r2);
+    ///
+    /// the leader of the unioned congruence class is the same as the leader of
+    /// r1's congruence class prior to the union. This is actually relied upon
+    /// in the copy insertion code.
+    void unionRegs(unsigned, unsigned);
 
-/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
-/// of the given MachineFunction.  These numbers are then used in other parts
-/// of the PHI elimination process.
-void StrongPHIElimination::computeDFS(MachineFunction& MF) {
-  SmallPtrSet<MachineDomTreeNode*, 8> frontier;
-  SmallPtrSet<MachineDomTreeNode*, 8> visited;
-  
-  unsigned time = 0;
-  
-  MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
-  
-  MachineDomTreeNode* node = DT.getRootNode();
-  
-  std::vector<MachineDomTreeNode*> worklist;
-  worklist.push_back(node);
-  
-  while (!worklist.empty()) {
-    MachineDomTreeNode* currNode = worklist.back();
-    
-    if (!frontier.count(currNode)) {
-      frontier.insert(currNode);
-      ++time;
-      preorder.insert(std::make_pair(currNode->getBlock(), time));
-    }
-    
-    bool inserted = false;
-    for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
-         I != E; ++I)
-      if (!frontier.count(*I) && !visited.count(*I)) {
-        worklist.push_back(*I);
-        inserted = true;
-        break;
-      }
-    
-    if (!inserted) {
-      frontier.erase(currNode);
-      visited.insert(currNode);
-      maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
-      
-      worklist.pop_back();
+    /// Get the color of a register. The color is 0 if the register has been
+    /// isolated.
+    unsigned getRegColor(unsigned);
+
+    // Isolate a register.
+    void isolateReg(unsigned);
+
+    /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+    /// isolated. Otherwise, it is the original color of its destination and
+    /// all of its operands (before they were isolated, if they were).
+    unsigned getPHIColor(MachineInstr*);
+
+    /// Isolate a PHI.
+    void isolatePHI(MachineInstr*);
+
+    /// Traverses a basic block, splitting any interferences found between
+    /// registers in the same congruence class. It takes two DenseMaps as
+    /// arguments that it also updates: CurrentDominatingParent, which maps
+    /// a color to the register in that congruence class whose definition was
+    /// most recently seen, and ImmediateDominatingParent, which maps a register
+    /// to the register in the same congruence class that most immediately
+    /// dominates it.
+    ///
+    /// This function assumes that it is being called in a depth-first traversal
+    /// of the dominator tree.
+    void SplitInterferencesForBasicBlock(
+      MachineBasicBlock&,
+      DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+      DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+    // Lowers a PHI instruction, inserting copies of the source and destination
+    // registers as necessary.
+    void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+    // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+    // everywhere that Reg appears. Requires Reg and NewReg to have non-
+    // overlapping lifetimes.
+    void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    MachineDominatorTree *DT;
+    LiveIntervals *LI;
+
+    BumpPtrAllocator Allocator;
+
+    DenseMap<unsigned, Node*> RegNodeMap;
+
+    // Maps a basic block to a list of its defs of registers that appear as PHI
+    // sources.
+    DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+    // Maps a color to a pair of a MachineInstr* and a virtual register, which
+    // is the operand of that PHI corresponding to the current basic block.
+    DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+    // FIXME: Can these two data structures be combined? Would a std::multimap
+    // be any better?
+
+    // Stores pairs of predecessor basic blocks and the source registers of
+    // inserted copy instructions.
+    typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+    SrcCopySet InsertedSrcCopySet;
+
+    // Maps pairs of predecessor basic blocks and colors to their defining copy
+    // instructions.
+    typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+      SrcCopyMap;
+    SrcCopyMap InsertedSrcCopyMap;
+
+    // Maps inserted destination copy registers to their defining copy
+    // instructions.
+    typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+    DestCopyMap InsertedDestCopies;
+  };
+
+  struct MIIndexCompare {
+    MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
     }
-  }
-}
 
-namespace {
+    LiveIntervals *LI;
+  };
+} // namespace
 
-/// PreorderSorter - a helper class that is used to sort registers
-/// according to the preorder number of their defining blocks
-class PreorderSorter {
-private:
-  DenseMap<MachineBasicBlock*, unsigned>& preorder;
-  MachineRegisterInfo& MRI;
-  
-public:
-  PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
-                MachineRegisterInfo& M) : preorder(p), MRI(M) { }
-  
-  bool operator()(unsigned A, unsigned B) {
-    if (A == B)
-      return false;
-    
-    MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
-    MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
-    
-    if (preorder[ABlock] < preorder[BBlock])
-      return true;
-    else if (preorder[ABlock] > preorder[BBlock])
-      return false;
-    
-    return false;
-  }
-};
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
 
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-/// computeDomForest - compute the subforest of the DomTree corresponding
-/// to the defining blocks of the registers in question
-std::vector<StrongPHIElimination::DomForestNode*>
-StrongPHIElimination::computeDomForest(
-                  std::map<unsigned, MachineBasicBlock*>& regs, 
-                                       MachineRegisterInfo& MRI) {
-  // Begin by creating a virtual root node, since the actual results
-  // may well be a forest.  Assume this node has maximum DFS-out number.
-  DomForestNode* VirtualRoot = new DomForestNode(0, 0);
-  maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
-  
-  // Populate a worklist with the registers
-  std::vector<unsigned> worklist;
-  worklist.reserve(regs.size());
-  for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
-       E = regs.end(); I != E; ++I)
-    worklist.push_back(I->first);
-  
-  // Sort the registers by the DFS-in number of their defining block
-  PreorderSorter PS(preorder, MRI);
-  std::sort(worklist.begin(), worklist.end(), PS);
-  
-  // Create a "current parent" stack, and put the virtual root on top of it
-  DomForestNode* CurrentParent = VirtualRoot;
-  std::vector<DomForestNode*> stack;
-  stack.push_back(VirtualRoot);
-  
-  // Iterate over all the registers in the previously computed order
-  for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
-       I != E; ++I) {
-    unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
-    MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
-                 MRI.getVRegDef(CurrentParent->getReg())->getParent() :
-                 0;
-    
-    // If the DFS-in number of the register is greater than the DFS-out number
-    // of the current parent, repeatedly pop the parent stack until it isn't.
-    while (pre > maxpreorder[parentBlock]) {
-      stack.pop_back();
-      CurrentParent = stack.back();
-      
-      parentBlock = CurrentParent->getReg() ?
-                   MRI.getVRegDef(CurrentParent->getReg())->getParent() :
-                   0;
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+  // FIXME: This only needs to check from the first terminator, as only the
+  // first terminator can use a virtual register.
+  for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+    assert (RI != MBB->rend());
+    MachineInstr *MI = &*RI;
+
+    for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+         OE = MI->operands_end(); OI != OE; ++OI) {
+      MachineOperand &MO = *OI;
+      if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+        return &MO;
     }
-    
-    // Now that we've found the appropriate parent, create a DomForestNode for
-    // this register and attach it to the forest
-    DomForestNode* child = new DomForestNode(*I, CurrentParent);
-    
-    // Push this new node on the "current parent" stack
-    stack.push_back(child);
-    CurrentParent = child;
   }
-  
-  // Return a vector containing the children of the virtual root node
-  std::vector<DomForestNode*> ret;
-  ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
-  return ret;
+  return NULL;
 }
 
-/// isLiveIn - helper method that determines, from a regno, if a register
-/// is live into a block
-static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
-                     LiveIntervals& LI) {
-  LiveInterval& I = LI.getOrCreateInterval(r);
-  SlotIndex idx = LI.getMBBStartIdx(MBB);
-  return I.liveAt(idx);
-}
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+  TII = MF.getTarget().getInstrInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<LiveIntervals>();
 
-/// isLiveOut - help method that determines, from a regno, if a register is
-/// live out of a block.
-static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
-                      LiveIntervals& LI) {
-  for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
-       E = MBB->succ_end(); PI != E; ++PI)
-    if (isLiveIn(r, *PI, LI))
-      return true;
-  
-  return false;
-}
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+      PHISrcDefs[I].push_back(BBI);
 
-/// interferes - checks for local interferences by scanning a block.  The only
-/// trick parameter is 'mode' which tells it the relationship of the two
-/// registers. 0 - defined in the same block, 1 - first properly dominates
-/// second, 2 - second properly dominates first 
-static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
-                       LiveIntervals& LV, unsigned mode) {
-  MachineInstr* def = 0;
-  MachineInstr* kill = 0;
-  
-  // The code is still in SSA form at this point, so there is only one
-  // definition per VReg.  Thus we can safely use MRI->getVRegDef().
-  const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
-  
-  bool interference = false;
-  
-  // Wallk the block, checking for interferences
-  for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
-       MBI != MBE; ++MBI) {
-    MachineInstr* curr = MBI;
-    
-    // Same defining block...
-    if (mode == 0) {
-      if (curr == MRI->getVRegDef(a)) {
-        // If we find our first definition, save it
-        if (!def) {
-          def = curr;
-        // If there's already an unkilled definition, then 
-        // this is an interference
-        } else if (!kill) {
-          interference = true;
-          break;
-        // If there's a definition followed by a KillInst, then
-        // they can't interfere
-        } else {
-          interference = false;
-          break;
-        }
-      // Symmetric with the above
-      } else if (curr == MRI->getVRegDef(b)) {
-        if (!def) {
-          def = curr;
-        } else if (!kill) {
-          interference = true;
-          break;
-        } else {
-          interference = false;
-          break;
-        }
-      // Store KillInsts if they match up with the definition
-      } else if (curr->killsRegister(a)) {
-        if (def == MRI->getVRegDef(a)) {
-          kill = curr;
-        } else if (curr->killsRegister(b)) {
-          if (def == MRI->getVRegDef(b)) {
-            kill = curr;
-          }
-        }
-      }
-    // First properly dominates second...
-    } else if (mode == 1) {
-      if (curr == MRI->getVRegDef(b)) {
-        // Definition of second without kill of first is an interference
-        if (!kill) {
-          interference = true;
-          break;
-        // Definition after a kill is a non-interference
-        } else {
-          interference = false;
-          break;
-        }
-      // Save KillInsts of First
-      } else if (curr->killsRegister(a)) {
-        kill = curr;
-      }
-    // Symmetric with the above
-    } else if (mode == 2) {
-      if (curr == MRI->getVRegDef(a)) {
-        if (!kill) {
-          interference = true;
-          break;
-        } else {
-          interference = false;
-          break;
-        }
-      } else if (curr->killsRegister(b)) {
-        kill = curr;
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        MachineOperand &SrcMO = BBI->getOperand(i);
+        unsigned SrcReg = SrcMO.getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
+
+        MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+        if (DefMI)
+          PHISrcDefs[DefMI->getParent()].push_back(DefMI);
       }
     }
   }
-  
-  return interference;
-}
 
-/// processBlock - Determine how to break up PHIs in the current block.  Each
-/// PHI is broken up by some combination of renaming its operands and inserting
-/// copies.  This method is responsible for determining which operands receive
-/// which treatment.
-void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-  
-  // Holds names that have been added to a set in any PHI within this block
-  // before the current one.
-  std::set<unsigned> ProcessedNames;
-  
-  // Iterate over all the PHI nodes in this block
-  MachineBasicBlock::iterator P = MBB->begin();
-  while (P != MBB->end() && P->isPHI()) {
-    unsigned DestReg = P->getOperand(0).getReg();
-    
-    // Don't both doing PHI elimination for dead PHI's.
-    if (P->registerDefIsDead(DestReg)) {
-      ++P;
-      continue;
-    }
+  // Perform a depth-first traversal of the dominator tree, splitting
+  // interferences amongst PHI-congruence classes.
+  DenseMap<unsigned, unsigned> CurrentDominatingParent;
+  DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+    SplitInterferencesForBasicBlock(*DI->getBlock(),
+                                    CurrentDominatingParent,
+                                    ImmediateDominatingParent);
+  }
 
-    LiveInterval& PI = LI.getOrCreateInterval(DestReg);
-    SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
-    VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
-    PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
-
-    // PHIUnion is the set of incoming registers to the PHI node that
-    // are going to be renames rather than having copies inserted.  This set
-    // is refinded over the course of this function.  UnionedBlocks is the set
-    // of corresponding MBBs.
-    std::map<unsigned, MachineBasicBlock*> PHIUnion;
-    SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
-  
-    // Iterate over the operands of the PHI node
-    for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
-      unsigned SrcReg = P->getOperand(i-1).getReg();
-      
-      // Don't need to try to coalesce a register with itself.
-      if (SrcReg == DestReg) {
-        ProcessedNames.insert(SrcReg);
-        continue;
-      }
-      
-      // We don't need to insert copies for implicit_defs.
-      MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
-      if (DefMI->isImplicitDef())
-        ProcessedNames.insert(SrcReg);
-    
-      // Check for trivial interferences via liveness information, allowing us
-      // to avoid extra work later.  Any registers that interfere cannot both
-      // be in the renaming set, so choose one and add copies for it instead.
-      // The conditions are:
-      //   1) if the operand is live into the PHI node's block OR
-      //   2) if the PHI node is live out of the operand's defining block OR
-      //   3) if the operand is itself a PHI node and the original PHI is
-      //      live into the operand's defining block OR
-      //   4) if the operand is already being renamed for another PHI node
-      //      in this block OR
-      //   5) if any two operands are defined in the same block, insert copies
-      //      for one of them
-      if (isLiveIn(SrcReg, P->getParent(), LI) ||
-          isLiveOut(P->getOperand(0).getReg(),
-                    MRI.getVRegDef(SrcReg)->getParent(), LI) ||
-          ( MRI.getVRegDef(SrcReg)->isPHI() &&
-            isLiveIn(P->getOperand(0).getReg(),
-                     MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
-          ProcessedNames.count(SrcReg) ||
-          UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
-        
-        // Add a copy for the selected register
-        MachineBasicBlock* From = P->getOperand(i).getMBB();
-        Waiting[From].insert(std::make_pair(SrcReg, DestReg));
-        UsedByAnother.insert(SrcReg);
-      } else {
-        // Otherwise, add it to the renaming set
-        PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
-        UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
-      }
+  // Insert copies for all PHI source and destination registers.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      InsertCopiesForPHI(BBI, I);
     }
-    
-    // Compute the dominator forest for the renaming set.  This is a forest
-    // where the nodes are the registers and the edges represent dominance 
-    // relations between the defining blocks of the registers
-    std::vector<StrongPHIElimination::DomForestNode*> DF = 
-                                                computeDomForest(PHIUnion, MRI);
-    
-    // Walk DomForest to resolve interferences at an inter-block level.  This
-    // will remove registers from the renaming set (and insert copies for them)
-    // if interferences are found.
-    std::vector<std::pair<unsigned, unsigned> > localInterferences;
-    processPHIUnion(P, PHIUnion, DF, localInterferences);
-    
-    // If one of the inputs is defined in the same block as the current PHI
-    // then we need to check for a local interference between that input and
-    // the PHI.
-    for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
-         E = PHIUnion.end(); I != E; ++I)
-      if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
-        localInterferences.push_back(std::make_pair(I->first,
-                                                    P->getOperand(0).getReg()));
-    
-    // The dominator forest walk may have returned some register pairs whose
-    // interference cannot be determined from dominator analysis.  We now 
-    // examine these pairs for local interferences.
-    for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
-        localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
-      std::pair<unsigned, unsigned> p = *I;
-      
-      MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-      
-      // Determine the block we need to scan and the relationship between
-      // the two registers
-      MachineBasicBlock* scan = 0;
-      unsigned mode = 0;
-      if (MRI.getVRegDef(p.first)->getParent() ==
-          MRI.getVRegDef(p.second)->getParent()) {
-        scan = MRI.getVRegDef(p.first)->getParent();
-        mode = 0; // Same block
-      } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
-                               MRI.getVRegDef(p.second)->getParent())) {
-        scan = MRI.getVRegDef(p.second)->getParent();
-        mode = 1; // First dominates second
-      } else {
-        scan = MRI.getVRegDef(p.first)->getParent();
-        mode = 2; // Second dominates first
-      }
-      
-      // If there's an interference, we need to insert  copies
-      if (interferes(p.first, p.second, scan, LI, mode)) {
-        // Insert copies for First
-        for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
-          if (P->getOperand(i-1).getReg() == p.first) {
-            unsigned SrcReg = p.first;
-            MachineBasicBlock* From = P->getOperand(i).getMBB();
-            
-            Waiting[From].insert(std::make_pair(SrcReg,
-                                                P->getOperand(0).getReg()));
-            UsedByAnother.insert(SrcReg);
-            
-            PHIUnion.erase(SrcReg);
-          }
-        }
+  }
+
+  // FIXME: Preserve the equivalence classes during copy insertion and use
+  // the preversed equivalence classes instead of recomputing them.
+  RegNodeMap.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        unsigned SrcReg = BBI->getOperand(i).getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
       }
     }
-    
-    // Add the renaming set for this PHI node to our overall renaming information
-    for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
-         QE = PHIUnion.end(); QI != QE; ++QI) {
-      DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
-                   << P->getOperand(0).getReg() << "\n");
-    }
-    
-    RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
-    
-    // Remember which registers are already renamed, so that we don't try to 
-    // rename them for another PHI node in this block
-    for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
-         E = PHIUnion.end(); I != E; ++I)
-      ProcessedNames.insert(I->first);
-    
-    ++P;
   }
-}
 
-/// processPHIUnion - Take a set of candidate registers to be coalesced when
-/// decomposing the PHI instruction.  Use the DominanceForest to remove the ones
-/// that are known to interfere, and flag others that need to be checked for
-/// local interferences.
-void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
-                        std::map<unsigned, MachineBasicBlock*>& PHIUnion,
-                        std::vector<StrongPHIElimination::DomForestNode*>& DF,
-                        std::vector<std::pair<unsigned, unsigned> >& locals) {
-  
-  std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
-  SmallPtrSet<DomForestNode*, 4> visited;
-  
-  // Code is still in SSA form, so we can use MRI::getVRegDef()
-  MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  unsigned DestReg = Inst->getOperand(0).getReg();
-  
-  // DF walk on the DomForest
-  while (!worklist.empty()) {
-    DomForestNode* DFNode = worklist.back();
-    
-    visited.insert(DFNode);
-    
-    bool inserted = false;
-    for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
-         CI != CE; ++CI) {
-      DomForestNode* child = *CI;   
-      
-      // If the current node is live-out of the defining block of one of its
-      // children, insert a copy for it.  NOTE: The paper actually calls for
-      // a more elaborate heuristic for determining whether to insert copies
-      // for the child or the parent.  In the interest of simplicity, we're
-      // just always choosing the parent.
-      if (isLiveOut(DFNode->getReg(),
-          MRI.getVRegDef(child->getReg())->getParent(), LI)) {
-        // Insert copies for parent
-        for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
-          if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
-            unsigned SrcReg = DFNode->getReg();
-            MachineBasicBlock* From = Inst->getOperand(i).getMBB();
-            
-            Waiting[From].insert(std::make_pair(SrcReg, DestReg));
-            UsedByAnother.insert(SrcReg);
-            
-            PHIUnion.erase(SrcReg);
-          }
-        }
-      
-      // If a node is live-in to the defining block of one of its children, but
-      // not live-out, then we need to scan that block for local interferences.
-      } else if (isLiveIn(DFNode->getReg(),
-                          MRI.getVRegDef(child->getReg())->getParent(), LI) ||
-                 MRI.getVRegDef(DFNode->getReg())->getParent() ==
-                                 MRI.getVRegDef(child->getReg())->getParent()) {
-        // Add (p, c) to possible local interferences
-        locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+  DenseMap<unsigned, unsigned> RegRenamingMap;
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+    while (BBI != BBE && BBI->isPHI()) {
+      MachineInstr *PHI = BBI;
+
+      assert(PHI->getNumOperands() > 0);
+
+      unsigned SrcReg = PHI->getOperand(1).getReg();
+      unsigned SrcColor = getRegColor(SrcReg);
+      unsigned NewReg = RegRenamingMap[SrcColor];
+      if (!NewReg) {
+        NewReg = SrcReg;
+        RegRenamingMap[SrcColor] = SrcReg;
       }
-      
-      if (!visited.count(child)) {
-        worklist.push_back(child);
-        inserted = true;
+      MergeLIsAndRename(SrcReg, NewReg);
+
+      unsigned DestReg = PHI->getOperand(0).getReg();
+      if (!InsertedDestCopies.count(DestReg))
+        MergeLIsAndRename(DestReg, NewReg);
+
+      for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+        unsigned SrcReg = PHI->getOperand(i).getReg();
+        MergeLIsAndRename(SrcReg, NewReg);
       }
+
+      ++BBI;
+      LI->RemoveMachineInstrFromMaps(PHI);
+      PHI->eraseFromParent();
+      Changed = true;
     }
-    
-    if (!inserted) worklist.pop_back();
   }
-}
 
-/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
-/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
-/// problems.
-///
-/// Based on "Practical Improvements to the Construction and Destruction
-/// of Static Single Assignment Form" by Briggs, et al.
-void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
-                                          std::set<unsigned>& pushed) {
-  // FIXME: This function needs to update LiveIntervals
-  std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
-  
-  std::multimap<unsigned, unsigned> worklist;
-  std::map<unsigned, unsigned> map;
-  
-  // Setup worklist of initial copies
-  for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
-       E = copy_set.end(); I != E; ) {
-    map.insert(std::make_pair(I->first, I->first));
-    map.insert(std::make_pair(I->second, I->second));
-         
-    if (!UsedByAnother.count(I->second)) {
-      worklist.insert(*I);
-      
-      // Avoid iterator invalidation
-      std::multimap<unsigned, unsigned>::iterator OI = I;
-      ++I;
-      copy_set.erase(OI);
-    } else {
-      ++I;
+  // Due to the insertion of copies to split live ranges, the live intervals are
+  // guaranteed to not overlap, except in one case: an original PHI source and a
+  // PHI destination copy. In this case, they have the same value and thus don't
+  // truly intersect, so we merge them into the value live at that point.
+  // FIXME: Is there some better way we can handle this?
+  for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+       E = InsertedDestCopies.end(); I != E; ++I) {
+    unsigned DestReg = I->first;
+    unsigned DestColor = getRegColor(DestReg);
+    unsigned NewReg = RegRenamingMap[DestColor];
+
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+    LiveInterval &NewLI = LI->getInterval(NewReg);
+
+    assert(DestLI.ranges.size() == 1
+           && "PHI destination copy's live interval should be a single live "
+               "range from the beginning of the BB to the copy instruction.");
+    LiveRange *DestLR = DestLI.begin();
+    VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+    if (!NewVNI) {
+      NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+      MachineInstr *CopyInstr = I->second;
+      CopyInstr->getOperand(1).setIsKill(true);
     }
+
+    LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+    NewLI.addRange(NewLR);
+
+    LI->removeInterval(DestReg);
+    MRI->replaceRegWith(DestReg, NewReg);
   }
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  MachineFunction* MF = MBB->getParent();
-  MachineRegisterInfo& MRI = MF->getRegInfo();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-  
-  SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
-  
-  // Iterate over the worklist, inserting copies
-  while (!worklist.empty() || !copy_set.empty()) {
-    while (!worklist.empty()) {
-      std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
-      std::pair<unsigned, unsigned> curr = *WI;
-      worklist.erase(WI);
-      
-      const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
-      
-      if (isLiveOut(curr.second, MBB, LI)) {
-        // Create a temporary
-        unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-        
-        // Insert copy from curr.second to a temporary at
-        // the Phi defining curr.second
-        MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
-        BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
-                t).addReg(curr.second);
-        DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
-                     << "\n");
-        
-        // Push temporary on Stacks
-        Stacks[curr.second].push_back(t);
-        
-        // Insert curr.second in pushed
-        pushed.insert(curr.second);
-        
-        // Create a live interval for this temporary
-        InsertedPHIDests.push_back(std::make_pair(t, --PI));
-      }
-      
-      // Insert copy from map[curr.first] to curr.second
-      BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
-             TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
-      map[curr.first] = curr.second;
-      DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
-                   << curr.second << "\n");
-      
-      // Push this copy onto InsertedPHICopies so we can
-      // update LiveIntervals with it.
-      MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
-      InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
-      
-      // If curr.first is a destination in copy_set...
-      for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
-           E = copy_set.end(); I != E; )
-        if (curr.first == I->second) {
-          std::pair<unsigned, unsigned> temp = *I;
-          worklist.insert(temp);
-          
-          // Avoid iterator invalidation
-          std::multimap<unsigned, unsigned>::iterator OI = I;
-          ++I;
-          copy_set.erase(OI);
-          
-          break;
-        } else {
-          ++I;
-        }
-    }
-    
-    if (!copy_set.empty()) {
-      std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
-      std::pair<unsigned, unsigned> curr = *CI;
-      worklist.insert(curr);
-      copy_set.erase(CI);
-      
-      LiveInterval& I = LI.getInterval(curr.second);
-      MachineBasicBlock::iterator term = MBB->getFirstTerminator();
-      SlotIndex endIdx = SlotIndex();
-      if (term != MBB->end())
-        endIdx = LI.getInstructionIndex(term);
-      else
-        endIdx = LI.getMBBEndIdx(MBB);
-      
-      if (I.liveAt(endIdx)) {
-        const TargetRegisterClass *RC =
-                                       MF->getRegInfo().getRegClass(curr.first);
-        
-        // Insert a copy from dest to a new temporary t at the end of b
-        unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-        BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
-                TII->get(TargetOpcode::COPY), t).addReg(curr.second);
-        map[curr.second] = t;
-        
-        MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
-        InsertedPHIDests.push_back(std::make_pair(t, --TI));
+
+  // Adjust the live intervals of all PHI source registers to handle the case
+  // where the PHIs in successor blocks were the only later uses of the source
+  // register.
+  for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+       E = InsertedSrcCopySet.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->first;
+    unsigned SrcReg = I->second;
+    if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+      SrcReg = RenamedRegister;
+
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+    bool isLiveOut = false;
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+        isLiveOut = true;
+        break;
       }
     }
+
+    if (isLiveOut)
+      continue;
+
+    MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+    assert(LastUse);
+    SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+    LastUse->setIsKill(true);
   }
-  
-  // Renumber the instructions so that we can perform the index computations
-  // needed to create new live intervals.
-  LI.renumber();
-  
-  // For copies that we inserted at the ends of predecessors, we construct
-  // live intervals.  This is pretty easy, since we know that the destination
-  // register cannot have be in live at that point previously.  We just have
-  // to make sure that, for registers that serve as inputs to more than one
-  // PHI, we don't create multiple overlapping live intervals.
-  std::set<unsigned> RegHandled;
-  for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
-       InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
-    if (RegHandled.insert(I->first).second) {
-      LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      SlotIndex instrIdx = LI.getInstructionIndex(I->second);
-      if (Int.liveAt(instrIdx.getDefIndex()))
-        Int.removeRange(instrIdx.getDefIndex(),
-                        LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
-                        true);
-      
-      LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
-      R.valno->setCopy(I->second);
-      R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
-    }
+
+  LI->renumber();
+
+  Allocator.Reset();
+  RegNodeMap.clear();
+  PHISrcDefs.clear();
+  InsertedSrcCopySet.clear();
+  InsertedSrcCopyMap.clear();
+  InsertedDestCopies.clear();
+
+  return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+  if (RegNodeMap.count(Reg))
+    return;
+  RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+  Node *N = this;
+  Node *Parent = parent.getPointer();
+  Node *Grandparent = Parent->parent.getPointer();
+
+  while (Parent != Grandparent) {
+    N->parent.setPointer(Grandparent);
+    N = Grandparent;
+    Parent = Parent->parent.getPointer();
+    Grandparent = Parent->parent.getPointer();
   }
+
+  return Parent;
 }
 
-/// InsertCopies - insert copies into MBB and all of its successors
-void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
-                                 SmallPtrSet<MachineBasicBlock*, 16>& visited) {
-  MachineBasicBlock* MBB = MDTN->getBlock();
-  visited.insert(MBB);
-  
-  std::set<unsigned> pushed;
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  // Rewrite register uses from Stacks
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-      I != E; ++I) {
-    if (I->isPHI())
-      continue;
-    
-    for (unsigned i = 0; i < I->getNumOperands(); ++i)
-      if (I->getOperand(i).isReg() &&
-          Stacks[I->getOperand(i).getReg()].size()) {
-        // Remove the live range for the old vreg.
-        LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
-        LiveInterval::iterator OldLR =
-          OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
-        if (OldLR != OldInt.end())
-          OldInt.removeRange(*OldLR, true);
-        
-        // Change the register
-        I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
-        
-        // Add a live range for the new vreg
-        LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
-        VNInfo* FirstVN = *Int.vni_begin();
-        FirstVN->setHasPHIKill(false);
-        LiveRange LR (LI.getMBBStartIdx(I->getParent()),
-                      LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
-                      FirstVN);
-        
-        Int.addRange(LR);
-      }
-  }    
-  
-  // Schedule the copies for this block
-  ScheduleCopies(MBB, pushed);
-  
-  // Recur down the dominator tree.
-  for (MachineDomTreeNode::iterator I = MDTN->begin(),
-       E = MDTN->end(); I != E; ++I)
-    if (!visited.count((*I)->getBlock()))
-      InsertCopies(*I, visited);
-  
-  // As we exit this block, pop the names we pushed while processing it
-  for (std::set<unsigned>::iterator I = pushed.begin(), 
-       E = pushed.end(); I != E; ++I)
-    Stacks[*I].pop_back();
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+  DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+  if (RI == RegNodeMap.end())
+    return 0;
+  Node *Node = RI->second;
+  if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+    return 0;
+  return Node->getLeader()->value;
 }
 
-bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
-                                              unsigned secondary) {
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  LiveInterval& LHS = LI.getOrCreateInterval(primary);
-  LiveInterval& RHS = LI.getOrCreateInterval(secondary);
-  
-  LI.renumber();
-  
-  DenseMap<VNInfo*, VNInfo*> VNMap;
-  for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
-    LiveRange R = *I;
- 
-    SlotIndex Start = R.start;
-    SlotIndex End = R.end;
-    if (LHS.getLiveRangeContaining(Start))
-      return false;
-    
-    if (LHS.getLiveRangeContaining(End))
-      return false;
-    
-    LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
-    if (RI != LHS.end() && RI->start < End)
-      return false;
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+  Node *Node1 = RegNodeMap[Reg1]->getLeader();
+  Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+  if (Node1->rank > Node2->rank) {
+    Node2->parent.setPointer(Node1->getLeader());
+  } else if (Node1->rank < Node2->rank) {
+    Node1->parent.setPointer(Node2->getLeader());
+  } else if (Node1 != Node2) {
+    Node2->parent.setPointer(Node1->getLeader());
+    Node1->rank++;
   }
-  
-  for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
-    LiveRange R = *I;
-    VNInfo* OldVN = R.valno;
-    VNInfo*& NewVN = VNMap[OldVN];
-    if (!NewVN) {
-      NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
-    }
-    
-    LiveRange LR (R.start, R.end, NewVN);
-    LHS.addRange(LR);
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+  Node *Node = RegNodeMap[Reg];
+  Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  Node *DestNode = RegNodeMap[DestReg];
+  if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+    return 0;
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+    if (SrcColor)
+      return SrcColor;
   }
-  
-  LI.removeInterval(RHS.reg);
-  
-  return true;
+  return 0;
 }
 
-bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  
-  // Compute DFS numbers of each block
-  computeDFS(Fn);
-  
-  // Determine which phi node operands need copies
-  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-    if (!I->empty() && I->begin()->isPHI())
-      processBlock(I);
-  
-  // Break interferences where two different phis want to coalesce
-  // in the same register.
-  std::set<unsigned> seen;
-  typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
-          RenameSetType;
-  for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
-       I != E; ++I) {
-    for (std::map<unsigned, MachineBasicBlock*>::iterator
-         OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
-      if (!seen.count(OI->first)) {
-        seen.insert(OI->first);
-        ++OI;
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+  Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+  Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+///    congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+///    same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+///   def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+/// 
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+    MachineBasicBlock &MBB,
+    DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+    DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+  // Sort defs by their order in the original basic block, as the code below
+  // assumes that it is processing definitions in dominance order.
+  std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+  std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+  for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+       BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+    for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+         E = (*BBI)->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+
+      // FIXME: This would be faster if it were possible to bail out of checking
+      // an instruction's operands after the explicit defs, but this is incorrect
+      // for variadic instructions, which may appear before register allocation
+      // in the future.
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      unsigned DestReg = MO.getReg();
+      if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+        continue;
+
+      // If the virtual register being defined is not used in any PHI or has
+      // already been isolated, then there are no more interferences to check.
+      unsigned DestColor = getRegColor(DestReg);
+      if (!DestColor)
+        continue;
+
+      // The input to this pass sometimes is not in SSA form in every basic
+      // block, as some virtual registers have redefinitions. We could eliminate
+      // this by fixing the passes that generate the non-SSA code, or we could
+      // handle it here by tracking defining machine instructions rather than
+      // virtual registers. For now, we just handle the situation conservatively
+      // in a way that will possibly lead to false interferences.
+      unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+      unsigned NewParent = CurrentParent;
+      if (NewParent == DestReg)
+        continue;
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+                           || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+
+      // If NewParent is nonzero, then its definition dominates the current
+      // instruction, so it is only necessary to check for the liveness of
+      // NewParent in order to check for an interference.
+      if (NewParent
+          && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+        // If there is an interference, always isolate the new register. This
+        // could be improved by using a heuristic that decides which of the two
+        // registers to isolate.
+        isolateReg(DestReg);
+        CurrentParent = NewParent;
       } else {
-        Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
-        unsigned reg = OI->first;
-        ++OI;
-        I->second.erase(reg);
-        DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
-                     << "\n");
+        // If there is no interference, update ImmediateDominatingParent and set
+        // the CurrentDominatingParent for this color to the current register.
+        ImmediateDominatingParent[DestReg] = NewParent;
+        CurrentParent = DestReg;
       }
     }
   }
-  
-  // Insert copies
-  // FIXME: This process should probably preserve LiveIntervals
-  SmallPtrSet<MachineBasicBlock*, 16> visited;
-  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-  InsertCopies(MDT.getRootNode(), visited);
-  
-  // Perform renaming
-  for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
-       I != E; ++I)
-    while (I->second.size()) {
-      std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
-      
-      DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
-      
-      if (SI->first != I->first) {
-        if (mergeLiveIntervals(I->first, SI->first)) {
-          Fn.getRegInfo().replaceRegWith(SI->first, I->first);
-      
-          if (RenameSets.count(SI->first)) {
-            I->second.insert(RenameSets[SI->first].begin(),
-                             RenameSets[SI->first].end());
-            RenameSets.erase(SI->first);
-          }
-        } else {
-          // Insert a last-minute copy if a conflict was detected.
-          const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
-          BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
-                  TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
-          
-          LI.renumber();
-          
-          LiveInterval& Int = LI.getOrCreateInterval(I->first);
-          SlotIndex instrIdx =
-                     LI.getInstructionIndex(--SI->second->getFirstTerminator());
-          if (Int.liveAt(instrIdx.getDefIndex()))
-            Int.removeRange(instrIdx.getDefIndex(),
-                            LI.getMBBEndIdx(SI->second).getNextSlot(), true);
-
-          LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
-                                            --SI->second->getFirstTerminator());
-          R.valno->setCopy(--SI->second->getFirstTerminator());
-          R.valno->def = instrIdx.getDefIndex();
-          
-          DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
-                       << I->first << "\n");
-        }
+
+  // We now walk the PHIs in successor blocks and check for interferences. This
+  // is necesary because the use of a PHI's operands are logically contained in
+  // the predecessor block. The def of a PHI's destination register is processed
+  // along with the other defs in a basic block.
+
+  CurrentPHIForColor.clear();
+
+  for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+       SE = MBB.succ_end(); SI != SE; ++SI) {
+    for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      MachineInstr *PHI = BBI;
+
+      // If a PHI is already isolated, either by being isolated directly or
+      // having all of its operands isolated, ignore it.
+      unsigned Color = getPHIColor(PHI);
+      if (!Color)
+        continue;
+
+      // Find the index of the PHI operand that corresponds to this basic block.
+      unsigned PredIndex;
+      for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+        if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+          break;
       }
-      
-      LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      const LiveRange* LR =
-                       Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
-      LR->valno->setHasPHIKill(true);
-      
-      I->second.erase(SI->first);
+      assert(PredIndex < PHI->getNumOperands());
+      unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      unsigned &CurrentParent = CurrentDominatingParent[Color];
+      unsigned NewParent = CurrentParent;
+      while (NewParent
+             && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+                 || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+      CurrentParent = NewParent;
+
+      // If there is an interference with a register, always isolate the
+      // register rather than the PHI. It is also possible to isolate the
+      // PHI, but that introduces copies for all of the registers involved
+      // in that PHI.
+      if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+                    && NewParent != PredOperandReg)
+        isolateReg(NewParent);
+
+      std::pair<MachineInstr*, unsigned>
+        &CurrentPHI = CurrentPHIForColor[Color];
+
+      // If two PHIs have the same operand from every shared predecessor, then
+      // they don't actually interfere. Otherwise, isolate the current PHI. This
+      // could possibly be improved, e.g. we could isolate the PHI with the
+      // fewest operands.
+      if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+        isolatePHI(PHI);
+      else
+        CurrentPHI = std::make_pair(PHI, PredOperandReg);
     }
-  
-  // Remove PHIs
-  std::vector<MachineInstr*> phis;
-  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
-         BI != BE; ++BI)
-      if (BI->isPHI())
-        phis.push_back(BI);
   }
-  
-  for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
-       I != E; ) {
-    MachineInstr* PInstr = *(I++);
-    
-    // If this is a dead PHI node, then remove it from LiveIntervals.
-    unsigned DestReg = PInstr->getOperand(0).getReg();
-    LiveInterval& PI = LI.getInterval(DestReg);
-    if (PInstr->registerDefIsDead(DestReg)) {
-      if (PI.containsOneValue()) {
-        LI.removeInterval(DestReg);
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+                                              MachineBasicBlock *MBB) {
+  assert(PHI->isPHI());
+  ++NumPHIsLowered;
+  unsigned PHIColor = getPHIColor(PHI);
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    MachineOperand &SrcMO = PHI->getOperand(i);
+
+    // If a source is defined by an implicit def, there is no need to insert a
+    // copy in the predecessor.
+    if (SrcMO.isUndef())
+      continue;
+
+    unsigned SrcReg = SrcMO.getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+    unsigned SrcColor = getRegColor(SrcReg);
+
+    // If neither the PHI nor the operand were isolated, then we only need to
+    // set the phi-kill flag on the VNInfo at this PHI.
+    if (PHIColor && SrcColor == PHIColor) {
+      LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+      SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+      VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+      assert(SrcVNI);
+      SrcVNI->setHasPHIKill(true);
+      continue;
+    }
+
+    unsigned CopyReg = 0;
+    if (PHIColor) {
+      SrcCopyMap::const_iterator I
+        = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+      CopyReg
+        = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+    }
+
+    if (!CopyReg) {
+      const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+      CopyReg = MRI->createVirtualRegister(RC);
+
+      MachineBasicBlock::iterator
+        CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+      unsigned SrcSubReg = SrcMO.getSubReg();
+      MachineInstr *CopyInstr = BuildMI(*PredBB,
+                                        CopyInsertPoint,
+                                        PHI->getDebugLoc(),
+                                        TII->get(TargetOpcode::COPY),
+                                        CopyReg).addReg(SrcReg, 0, SrcSubReg);
+      LI->InsertMachineInstrInMaps(CopyInstr);
+      ++NumSrcCopiesInserted;
+
+      // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+      // the newly added range.
+      LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+      InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+      addReg(CopyReg);
+      if (PHIColor) {
+        unionRegs(PHIColor, CopyReg);
+        assert(getRegColor(CopyReg) != CopyReg);
       } else {
-        SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
-        PI.removeRange(*PI.getLiveRangeContaining(idx), true);
-      }
-    } else {
-      // Trim live intervals of input registers.  They are no longer live into
-      // this block if they died after the PHI.  If they lived after it, don't
-      // trim them because they might have other legitimate uses.
-      for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
-        unsigned reg = PInstr->getOperand(i).getReg();
-        
-        MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
-        LiveInterval& InputI = LI.getInterval(reg);
-        if (MBB != PInstr->getParent() &&
-            InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
-            InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
-          InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
-                             LI.getInstructionIndex(PInstr),
-                             true);
+        PHIColor = CopyReg;
+        assert(getRegColor(CopyReg) == CopyReg);
       }
-      
-      // If the PHI is not dead, then the valno defined by the PHI
-      // now has an unknown def.
-      SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
-      const LiveRange* PLR = PI.getLiveRangeContaining(idx);
-      PLR->valno->setIsPHIDef(true);
-      LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
-                   PLR->start, PLR->valno);
-      PI.addRange(R);
+
+      if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+        InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
     }
-    
-    LI.RemoveMachineInstrFromMaps(PInstr);
-    PInstr->eraseFromParent();
+
+    SrcMO.setReg(CopyReg);
+
+    // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+    // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+    // processed later, but this is still correct to do at this point because we
+    // never rely on LiveIntervals being correct while inserting copies.
+    // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+    // pass does?
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+    if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+      SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
   }
-  
-  LI.renumber();
-  
-  return true;
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  unsigned DestColor = getRegColor(DestReg);
+
+  if (PHIColor && DestColor == PHIColor) {
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+
+    // Set the phi-def flag for the VN at this PHI.
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+    assert(DestVNI);
+    DestVNI->setIsPHIDef(true);
+  
+    // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+    // instruction. After PHI elimination, PHI instructions are replaced by VNs
+    // with the phi-def flag set, and the live ranges of these VNs start at the
+    // beginning of the basic block.
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    DestVNI->def = MBBStartIndex;
+    DestLI.addRange(LiveRange(MBBStartIndex,
+                              PHIIndex.getDefIndex(),
+                              DestVNI));
+    return;
+  }
+
+  const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+  unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+  MachineInstr *CopyInstr = BuildMI(*MBB,
+                                    MBB->SkipPHIsAndLabels(MBB->begin()),
+                                    PHI->getDebugLoc(),
+                                    TII->get(TargetOpcode::COPY),
+                                    DestReg).addReg(CopyReg);
+  LI->InsertMachineInstrInMaps(CopyInstr);
+  PHI->getOperand(0).setReg(CopyReg);
+  ++NumDestCopiesInserted;
+
+  // Add the region from the beginning of MBB to the copy instruction to
+  // CopyReg's live interval, and give the VNInfo the phidef flag.
+  LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+  SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+  SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+  VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+                                        CopyInstr,
+                                        LI->getVNInfoAllocator());
+  CopyVNI->setIsPHIDef(true);
+  CopyLI.addRange(LiveRange(MBBStartIndex,
+                            DestCopyIndex.getDefIndex(),
+                            CopyVNI));
+
+  // Adjust DestReg's live interval to adjust for its new definition at
+  // CopyInstr.
+  LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+  SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+  DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+  assert(DestVNI);
+  DestVNI->def = DestCopyIndex.getDefIndex();
+
+  InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+  if (Reg == NewReg)
+    return;
+
+  LiveInterval &OldLI = LI->getInterval(Reg);
+  LiveInterval &NewLI = LI->getInterval(NewReg);
+
+  // Merge the live ranges of the two registers.
+  DenseMap<VNInfo*, VNInfo*> VNMap;
+  for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+       LRI != LRE; ++LRI) {
+    LiveRange OldLR = *LRI;
+    VNInfo *OldVN = OldLR.valno;
+
+    VNInfo *&NewVN = VNMap[OldVN];
+    if (!NewVN) {
+      NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+      VNMap[OldVN] = NewVN;
+    }
+
+    LiveRange LR(OldLR.start, OldLR.end, NewVN);
+    NewLI.addRange(LR);
+  }
+
+  // Remove the LiveInterval for the register being renamed and replace all
+  // of its defs and uses with the new register.
+  LI->removeInterval(Reg);
+  MRI->replaceRegWith(Reg, NewReg);
 }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index a815b364d54e..04d3d311b416 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -350,7 +350,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (MO.isDef()) {
       const TargetRegisterClass *RC = MRI->getRegClass(Reg);
@@ -459,15 +459,19 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+  if (TailDuplicateSize.getNumOccurrences() == 0 &&
+      MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
 
   if (PreRegAlloc) {
-      // Pre-regalloc tail duplication hurts compile time and doesn't help
-      // much except for indirect branches.
-    if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
+    if (TailBB->empty())
+      return false;
+    const TargetInstrDesc &TID = TailBB->back().getDesc();
+    // Pre-regalloc tail duplication hurts compile time and doesn't help
+    // much except for indirect branches and returns.
+    if (!TID.isIndirectBranch() && !TID.isReturn())
       return false;
     // If the target has hardware branch prediction that can handle indirect
     // branches, duplicating them can often make them predictable when there
@@ -500,9 +504,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
   }
-  // Heuristically, don't tail-duplicate calls if it would expand code size,
-  // as it's less likely to be worth the extra cost.
-  if (InstrCount > 1 && HasCall)
+  // Don't tail-duplicate calls before register allocation. Calls presents a
+  // barrier to register allocation so duplicating them may end up increasing
+  // spills.
+  if (InstrCount > 1 && (PreRegAlloc && HasCall))
     return false;
 
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 6e4a0d837ecd..15340a3f1084 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -22,13 +22,18 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<bool> DisableHazardRecognizer(
+  "disable-sched-hazard", cl::Hidden, cl::init(false),
+  cl::desc("Disable hazard detection during preRA scheduling"));
+
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void
@@ -135,7 +140,7 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isPredicable())
     return false;
-  
+
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
     if (TID.OpInfo[i].isPredicate()) {
       MachineOperand &MO = MI->getOperand(i);
@@ -166,8 +171,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
   MBB.insert(I, MI);
 }
 
-bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
-                                           const MachineInstr *MI1) const {
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+                                      const MachineInstr *MI1,
+                                      const MachineRegisterInfo *MRI) const {
   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
@@ -252,9 +259,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
     const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              Flags, /*Offset=*/0,
-                              MFI.getObjectSize(FI),
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              Flags, MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
 
@@ -329,8 +336,13 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
   const TargetInstrDesc &TID = MI->getDesc();
 
   // Avoid instructions obviously unsafe for remat.
-  if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
-      TID.mayStore())
+  if (TID.isNotDuplicable() || TID.mayStore() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  // Don't remat inline asm. We have no idea how expensive it is
+  // even if it's side effect free.
+  if (MI->isInlineAsm())
     return false;
 
   // Avoid instructions which load from potentially varying memory.
@@ -414,8 +426,24 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
   return false;
 }
 
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+  return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  // Dummy hazard recognizer allows all instructions to issue.
+  return new ScheduleHazardRecognizer();
+}
+
 // Default implementation of CreateTargetPostRAHazardRecognizer.
 ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
-  return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  return (ScheduleHazardRecognizer *)
+    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f1e10eec724c..0b7bd98cc692 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -29,10 +29,12 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 using namespace llvm;
 using namespace dwarf;
 
@@ -45,81 +47,81 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   BSSSection =
-    getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getBSS());
 
   TextSection =
-    getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_EXECINSTR |
-                               MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+                               ELF::SHF_EXECINSTR |
+                               ELF::SHF_ALLOC,
                                SectionKind::getText());
 
   DataSection =
-    getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getDataRel());
 
   ReadOnlySection =
-    getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
                                SectionKind::getReadOnly());
 
   TLSDataSection =
-    getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                               MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
                                SectionKind::getThreadData());
 
   TLSBSSSection =
-    getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                               MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
                                SectionKind::getThreadBSS());
 
   DataRelSection =
-    getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRel());
 
   DataRelLocalSection =
-    getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRelLocal());
 
   DataRelROSection =
-    getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getReadOnlyWithRel());
 
   DataRelROLocalSection =
-    getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getReadOnlyWithRelLocal());
 
   MergeableConst4Section =
-    getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+    getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
                                SectionKind::getMergeableConst4());
 
   MergeableConst8Section =
-    getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+    getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
                                SectionKind::getMergeableConst8());
 
   MergeableConst16Section =
-    getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+    getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
                                SectionKind::getMergeableConst16());
 
   StaticCtorSection =
-    getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRel());
 
   StaticDtorSection =
-    getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRel());
 
   // Exception Handling Sections.
@@ -129,50 +131,50 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
   // runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
   LSDASection =
-    getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
                                SectionKind::getReadOnly());
-  EHFrameSection =
-    getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
-                               SectionKind::getDataRel());
-
   // Debug Info Sections.
   DwarfAbbrevSection =
-    getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfInfoSection =
-    getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfLineSection =
-    getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfFrameSection =
-    getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfStrSection =
-    getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfLocSection =
-    getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfARangesSection =
-    getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfRangesSection =
-    getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
 }
 
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+  return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC,
+                                    SectionKind::getDataRel());
+}
 
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
@@ -208,18 +210,18 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
 static unsigned getELFSectionType(StringRef Name, SectionKind K) {
 
   if (Name == ".init_array")
-    return MCSectionELF::SHT_INIT_ARRAY;
+    return ELF::SHT_INIT_ARRAY;
 
   if (Name == ".fini_array")
-    return MCSectionELF::SHT_FINI_ARRAY;
+    return ELF::SHT_FINI_ARRAY;
 
   if (Name == ".preinit_array")
-    return MCSectionELF::SHT_PREINIT_ARRAY;
+    return ELF::SHT_PREINIT_ARRAY;
 
   if (K.isBSS() || K.isThreadBSS())
-    return MCSectionELF::SHT_NOBITS;
+    return ELF::SHT_NOBITS;
 
-  return MCSectionELF::SHT_PROGBITS;
+  return ELF::SHT_PROGBITS;
 }
 
 
@@ -228,24 +230,24 @@ getELFSectionFlags(SectionKind K) {
   unsigned Flags = 0;
 
   if (!K.isMetadata())
-    Flags |= MCSectionELF::SHF_ALLOC;
+    Flags |= ELF::SHF_ALLOC;
 
   if (K.isText())
-    Flags |= MCSectionELF::SHF_EXECINSTR;
+    Flags |= ELF::SHF_EXECINSTR;
 
   if (K.isWriteable())
-    Flags |= MCSectionELF::SHF_WRITE;
+    Flags |= ELF::SHF_WRITE;
 
   if (K.isThreadLocal())
-    Flags |= MCSectionELF::SHF_TLS;
+    Flags |= ELF::SHF_TLS;
 
   // K.isMergeableConst() is left out to honour PR4650
   if (K.isMergeableCString() || K.isMergeableConst4() ||
       K.isMergeableConst8() || K.isMergeableConst16())
-    Flags |= MCSectionELF::SHF_MERGE;
+    Flags |= ELF::SHF_MERGE;
 
   if (K.isMergeableCString())
-    Flags |= MCSectionELF::SHF_STRINGS;
+    Flags |= ELF::SHF_STRINGS;
 
   return Flags;
 }
@@ -261,23 +263,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 
   return getContext().getELFSection(SectionName,
                                     getELFSectionType(SectionName, Kind),
-                                    getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
-  if (Kind.isText())                 return ".gnu.linkonce.t.";
-  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
-
-  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
-  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
-
-  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
-  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
-  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
-  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".gnu.linkonce.d.rel.ro.";
+                                    getELFSectionFlags(Kind), Kind);
 }
 
 /// getSectionPrefixForGlobal - Return the section prefix name used by options
@@ -307,7 +293,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   bool EmitUniquedSection;
   if (Kind.isText())
     EmitUniquedSection = TM.getFunctionSections();
-  else 
+  else
     EmitUniquedSection = TM.getDataSections();
 
   // If this global is linkonce/weak and the target handles this by emitting it
@@ -315,19 +301,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if ((GV->isWeakForLinker() || EmitUniquedSection) &&
       !Kind.isCommon() && !Kind.isBSS()) {
     const char *Prefix;
-    if (GV->isWeakForLinker())
-      Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    else {
-      assert(EmitUniquedSection);
-      Prefix = getSectionPrefixForGlobal(Kind);
-    }
+    Prefix = getSectionPrefixForGlobal(Kind);
 
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
     Name.append(Sym->getName().begin(), Sym->getName().end());
+    StringRef Group = "";
+    unsigned Flags = getELFSectionFlags(Kind);
+    if (GV->isWeakForLinker()) {
+      Group = Sym->getName();
+      Flags |= ELF::SHF_GROUP;
+    }
+
     return getContext().getELFSection(Name.str(),
                                       getELFSectionType(Name.str(), Kind),
-                                      getELFSectionFlags(Kind), Kind);
+                                      Flags, Kind, 0, Group);
   }
 
   if (Kind.isText()) return TextSection;
@@ -352,10 +340,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
 
     std::string Name = SizeSpec + utostr(Align);
-    return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                                      MCSectionELF::SHF_ALLOC |
-                                      MCSectionELF::SHF_MERGE |
-                                      MCSectionELF::SHF_STRINGS,
+    return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                      ELF::SHF_ALLOC |
+                                      ELF::SHF_MERGE |
+                                      ELF::SHF_STRINGS,
                                       Kind);
   }
 
@@ -450,7 +438,16 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   IsFunctionEHSymbolGlobal = true;
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
-  
+
+  Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+  if (T.getOS() == Triple::Darwin) {
+    unsigned MajNum = T.getDarwinMajorNumber();
+    if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger
+      CommDirectiveSupportsAlignment = false;
+    if (MajNum > 9)                 // 10.6 SnowLeopard
+      IsFunctionEHSymbolGlobal = false;
+  }
+
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   TextSection // .text
@@ -469,20 +466,20 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
     = getContext().getMachOSection("__DATA", "__thread_bss",
                                    MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
                                    SectionKind::getThreadBSS());
-                                   
+
   // TODO: Verify datarel below.
   TLSTLVSection // .tlv
     = getContext().getMachOSection("__DATA", "__thread_vars",
                                    MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
                                    SectionKind::getDataRel());
-                                   
+
   TLSThreadInitSection
     = getContext().getMachOSection("__DATA", "__thread_init",
                           MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
                           SectionKind::getDataRel());
-                                   
+
   CStringSection // .cstring
-    = getContext().getMachOSection("__TEXT", "__cstring", 
+    = getContext().getMachOSection("__TEXT", "__cstring",
                                    MCSectionMachO::S_CSTRING_LITERALS,
                                    SectionKind::getMergeable1ByteCString());
   UStringSection
@@ -493,7 +490,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                    MCSectionMachO::S_4BYTE_LITERALS,
                                    SectionKind::getMergeableConst4());
   EightByteConstantSection // .literal8
-    = getContext().getMachOSection("__TEXT", "__literal8", 
+    = getContext().getMachOSection("__TEXT", "__literal8",
                                    MCSectionMachO::S_8BYTE_LITERALS,
                                    SectionKind::getMergeableConst8());
 
@@ -517,14 +514,14 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                    SectionKind::getText());
   ConstTextCoalSection
-    = getContext().getMachOSection("__TEXT", "__const_coal", 
+    = getContext().getMachOSection("__TEXT", "__const_coal",
                                    MCSectionMachO::S_COALESCED,
                                    SectionKind::getReadOnly());
   ConstDataSection  // .const_data
     = getContext().getMachOSection("__DATA", "__const", 0,
                                    SectionKind::getReadOnlyWithRel());
   DataCoalSection
-    = getContext().getMachOSection("__DATA","__datacoal_nt", 
+    = getContext().getMachOSection("__DATA","__datacoal_nt",
                                    MCSectionMachO::S_COALESCED,
                                    SectionKind::getDataRel());
   DataCommonSection
@@ -534,7 +531,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   DataBSSSection
     = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
                                    SectionKind::getBSS());
-  
+
 
   LazySymbolPointerSection
     = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
@@ -566,17 +563,9 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   // Exception Handling.
   LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                              SectionKind::getReadOnlyWithRel());
-  EHFrameSection =
-    getContext().getMachOSection("__TEXT", "__eh_frame",
-                                 MCSectionMachO::S_COALESCED |
-                                 MCSectionMachO::S_ATTR_NO_TOC |
-                                 MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                                 MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                                 SectionKind::getReadOnly());
-
   // Debug Information.
   DwarfAbbrevSection =
-    getContext().getMachOSection("__DWARF", "__debug_abbrev", 
+    getContext().getMachOSection("__DWARF", "__debug_abbrev",
                                  MCSectionMachO::S_ATTR_DEBUG,
                                  SectionKind::getMetadata());
   DwarfInfoSection =
@@ -623,10 +612,19 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
     getContext().getMachOSection("__DWARF", "__debug_inlined",
                                  MCSectionMachO::S_ATTR_DEBUG,
                                  SectionKind::getMetadata());
-                                 
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+  return getContext().getMachOSection("__TEXT", "__eh_frame",
+                                      MCSectionMachO::S_COALESCED |
+                                      MCSectionMachO::S_ATTR_NO_TOC |
+                                      MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                      MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                      SectionKind::getReadOnly());
+}
+
 const MCSection *TargetLoweringObjectFileMachO::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
@@ -665,7 +663,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-  
+
   // Handle thread local data.
   if (Kind.isThreadBSS()) return TLSBSSSection;
   if (Kind.isThreadData()) return TLSDataSection;
@@ -685,7 +683,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (Kind.isMergeable1ByteCString() &&
       TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
-      
+
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
@@ -721,7 +719,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // with the .zerofill directive (aka .lcomm).
   if (Kind.isBSSLocal())
     return DataBSSSection;
-  
+
   // Otherwise, just drop the variable in the normal data section.
   return DataSection;
 }
@@ -858,13 +856,6 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                 COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                                 COFF::IMAGE_SCN_MEM_READ,
                                 SectionKind::getReadOnly());
-  EHFrameSection =
-    getContext().getCOFFSection(".eh_frame",
-                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                COFF::IMAGE_SCN_MEM_READ |
-                                COFF::IMAGE_SCN_MEM_WRITE,
-                                SectionKind::getDataRel());
-
   // Debug info.
   DwarfAbbrevSection =
     getContext().getCOFFSection(".debug_abbrev",
@@ -928,6 +919,15 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                 SectionKind::getMetadata());
 }
 
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+  return getContext().getCOFFSection(".eh_frame",
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+
 static unsigned
 getCOFFSectionFlags(SectionKind K) {
   unsigned Flags = 0;
@@ -938,6 +938,7 @@ getCOFFSectionFlags(SectionKind K) {
   else if (K.isText())
     Flags |=
       COFF::IMAGE_SCN_MEM_EXECUTE |
+      COFF::IMAGE_SCN_MEM_READ |
       COFF::IMAGE_SCN_CNT_CODE;
   else if (K.isBSS ())
     Flags |=
@@ -967,12 +968,12 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 
 static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
   if (Kind.isText())
-    return ".text$linkonce";
+    return ".text$";
   if (Kind.isBSS ())
-    return ".bss$linkonce";
+    return ".bss$";
   if (Kind.isWriteable())
-    return ".data$linkonce";
-  return ".rdata$linkonce";
+    return ".data$";
+  return ".rdata$";
 }
 
 
@@ -987,14 +988,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
     const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
-    Name.append(Sym->getName().begin(), Sym->getName().end());
+    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
 
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
 
     return getContext().getCOFFSection(Name.str(), Characteristics,
-                          COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
+                          COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
   }
 
   if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 78989c567e42..b3120b8be1ab 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -110,7 +110,7 @@ namespace {
     bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                             MachineBasicBlock::iterator &nmi,
                             MachineFunction::iterator &mbbi,
-                            unsigned RegB, unsigned Dist);
+                            unsigned RegA, unsigned RegB, unsigned Dist);
 
     typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
     bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
@@ -138,7 +138,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
+    TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
@@ -146,10 +148,7 @@ namespace {
       AU.addPreserved<LiveVariables>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
-      if (StrongPHIElim)
-        AU.addPreservedID(StrongPHIEliminationID);
-      else
-        AU.addPreservedID(PHIEliminationID);
+      AU.addPreservedID(PHIEliminationID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -159,8 +158,11 @@ namespace {
 }
 
 char TwoAddressInstructionPass::ID = 0;
-INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction",
-                "Two-Address instruction pass", false, false);
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
 
 char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
 
@@ -548,8 +550,9 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
   unsigned FromRegC = getMappedReg(regC, SrcRegMap);
   unsigned ToRegB = getMappedReg(regB, DstRegMap);
   unsigned ToRegC = getMappedReg(regC, DstRegMap);
-  if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
-      (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+  if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+      ((!FromRegC && !ToRegC) ||
+       regsAreCompatible(FromRegB, ToRegC, TRI) ||
        regsAreCompatible(FromRegC, ToRegB, TRI)))
     return true;
 
@@ -630,7 +633,8 @@ bool
 TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               MachineBasicBlock::iterator &nmi,
                                               MachineFunction::iterator &mbbi,
-                                              unsigned RegB, unsigned Dist) {
+                                              unsigned RegA, unsigned RegB,
+                                              unsigned Dist) {
   MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
   if (NewMI) {
     DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
@@ -650,6 +654,10 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
       mi = NewMI;
       nmi = llvm::next(mi);
     }
+
+    // Update source and destination register maps.
+    SrcRegMap.erase(RegA);
+    DstRegMap.erase(RegB);
     return true;
   }
 
@@ -740,7 +748,7 @@ static bool isSafeToDelete(MachineInstr *MI,
   const TargetInstrDesc &TID = MI->getDesc();
   if (TID.mayStore() || TID.isCall())
     return false;
-  if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+  if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -884,7 +892,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA)) {
       // Try to convert it.
-      if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+      if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
         ++NumConvertedTo3Addr;
         return true; // Done with this instruction.
       }
@@ -951,7 +959,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
           if (LV) {
             for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
               MachineOperand &MO = mi->getOperand(i);
-              if (MO.isReg() && MO.getReg() != 0 &&
+              if (MO.isReg() && 
                   TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
                 if (MO.isUse()) {
                   if (MO.isKill()) {
@@ -1013,8 +1021,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         << MF.getFunction()->getName() << '\n');
 
   // ReMatRegs - Keep track of the registers whose def's are remat'ed.
-  BitVector ReMatRegs;
-  ReMatRegs.resize(MRI->getLastVirtReg()+1);
+  BitVector ReMatRegs(MRI->getNumVirtRegs());
 
   typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
     TiedOperandMap;
@@ -1143,7 +1150,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
             DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
             unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
             TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
-            ReMatRegs.set(regB);
+            ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
             ++NumReMats;
           } else {
             BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
@@ -1229,13 +1236,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
   }
 
   // Some remat'ed instructions are dead.
-  int VReg = ReMatRegs.find_first();
-  while (VReg != -1) {
+  for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI->use_nodbg_empty(VReg)) {
       MachineInstr *DefMI = MRI->getVRegDef(VReg);
       DefMI->eraseFromParent();
     }
-    VReg = ReMatRegs.find_next(VReg);
   }
 
   // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
@@ -1346,7 +1352,6 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
       continue;
 
     // Insert a copy to replace the original.
-    MachineBasicBlock::iterator InsertLoc = SomeMI;
     MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
                                    SomeMI->getDebugLoc(),
                                    TII->get(TargetOpcode::COPY))
@@ -1412,6 +1417,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
     SmallSet<unsigned, 4> Seen;
     for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
       unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
       if (MI->getOperand(i).getSubReg() ||
           TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
         DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
@@ -1431,7 +1437,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
 
       bool isKill = MI->getOperand(i).isKill();
       if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
-          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+                                         MRI->getRegClass(SrcReg), SubIdx)) {
         // REG_SEQUENCE cannot have duplicated operands, add a copy.
         // Also add an copy if the source is live-in the block. We don't want
         // to end up with a partial-redef of a livein, e.g.
@@ -1460,7 +1468,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
         MachineBasicBlock::iterator InsertLoc = MI;
         MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
                                 MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
-            .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+            .addReg(DstReg, RegState::Define, SubIdx)
             .addReg(SrcReg, getKillRegState(isKill));
         MI->getOperand(i).setReg(0);
         if (LV && isKill)
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 6dd333358bc4..48d8ab1658da 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,16 +44,19 @@ namespace {
     virtual bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    UnreachableBlockElim() : FunctionPass(ID) {}
+    UnreachableBlockElim() : FunctionPass(ID) {
+      initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
     }
   };
 }
 char UnreachableBlockElim::ID = 0;
 INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
-                "Remove unreachable blocks from the CFG", false, false);
+                "Remove unreachable blocks from the CFG", false, false)
 
 FunctionPass *llvm::createUnreachableBlockEliminationPass() {
   return new UnreachableBlockElim();
@@ -106,7 +110,7 @@ namespace {
 char UnreachableMachineBlockElim::ID = 0;
 
 INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
-  "Remove unreachable machine basic blocks", false, false);
+  "Remove unreachable machine basic blocks", false, false)
 
 char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
 
@@ -118,6 +122,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
   SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  bool ModifiedPHI = false;
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -179,6 +184,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         if (!preds.count(phi->getOperand(i).getMBB())) {
           phi->RemoveOperand(i);
           phi->RemoveOperand(i-1);
+          ModifiedPHI = true;
         }
 
       if (phi->getNumOperands() == 3) {
@@ -188,6 +194,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         MachineInstr* temp = phi;
         ++phi;
         temp->eraseFromParent();
+        ModifiedPHI = true;
 
         if (Input != Output)
           F.getRegInfo().replaceRegWith(Output, Input);
@@ -201,5 +208,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
 
   F.RenumberBlocks();
 
-  return DeadBlocks.size();
+  return (DeadBlocks.size() || ModifiedPHI);
 }
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 20ffcffa70d3..734b87e62f62 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +49,7 @@ STATISTIC(NumSpills  , "Number of register spills");
 
 char VirtRegMap::ID = 0;
 
-INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
 
 bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   MRI = &mf.getRegInfo();
@@ -74,8 +75,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   EmergencySpillSlots.clear();
   
   SpillSlotToUsesMap.resize(8);
-  ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
-                       TargetRegisterInfo::FirstVirtualRegister);
+  ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
 
   allocatableRCRegs.clear();
   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
@@ -89,24 +89,37 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
 }
 
 void VirtRegMap::grow() {
-  unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
-  Virt2PhysMap.grow(LastVirtReg);
-  Virt2StackSlotMap.grow(LastVirtReg);
-  Virt2ReMatIdMap.grow(LastVirtReg);
-  Virt2SplitMap.grow(LastVirtReg);
-  Virt2SplitKillMap.grow(LastVirtReg);
-  ReMatMap.grow(LastVirtReg);
-  ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+  unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+  Virt2PhysMap.resize(NumRegs);
+  Virt2StackSlotMap.resize(NumRegs);
+  Virt2ReMatIdMap.resize(NumRegs);
+  Virt2SplitMap.resize(NumRegs);
+  Virt2SplitKillMap.resize(NumRegs);
+  ReMatMap.resize(NumRegs);
+  ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                      RC->getAlignment());
+  if (LowSpillSlot == NO_STACK_SLOT)
+    LowSpillSlot = SS;
+  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+    HighSpillSlot = SS;
+  assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+  unsigned Idx = SS-LowSpillSlot;
+  while (Idx >= SpillSlotToUsesMap.size())
+    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+  return SS;
 }
 
 unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
   unsigned physReg = Hint.second;
-  if (physReg &&
-      TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
     physReg = getPhys(physReg);
   if (Hint.first == 0)
-    return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+    return (TargetRegisterInfo::isPhysicalRegister(physReg))
       ? physReg : 0;
   return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
 }
@@ -116,18 +129,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
          "attempt to assign stack slot to already spilled register");
   const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
-  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                 RC->getAlignment());
-  if (LowSpillSlot == NO_STACK_SLOT)
-    LowSpillSlot = SS;
-  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
-    HighSpillSlot = SS;
-  unsigned Idx = SS-LowSpillSlot;
-  while (Idx >= SpillSlotToUsesMap.size())
-    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
-  Virt2StackSlotMap[virtReg] = SS;
   ++NumSpills;
-  return SS;
+  return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
 }
 
 void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
@@ -160,14 +163,7 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
     EmergencySpillSlots.find(RC);
   if (I != EmergencySpillSlots.end())
     return I->second;
-  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                 RC->getAlignment());
-  if (LowSpillSlot == NO_STACK_SLOT)
-    LowSpillSlot = SS;
-  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
-    HighSpillSlot = SS;
-  EmergencySpillSlots[RC] = SS;
-  return SS;
+  return EmergencySpillSlots[RC] = createSpillSlot(RC);
 }
 
 void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
@@ -232,10 +228,11 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   UnusedRegs.resize(NumRegs);
 
   BitVector Used(NumRegs);
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
-         e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
-    if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
-      Used.set(Virt2PhysMap[i]);
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+      Used.set(Virt2PhysMap[Reg]);
+  }
 
   BitVector Allocatable = TRI->getAllocatableSet(*MF);
   bool AnyUnused = false;
@@ -258,23 +255,97 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   return AnyUnused;
 }
 
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+  DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+               << "********** Function: "
+               << MF->getFunction()->getName() << '\n');
+
+  SmallVector<unsigned, 8> SuperKills;
+
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    DEBUG(MBBI->print(dbgs(), Indexes));
+    for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+         MII != MIE;) {
+      MachineInstr *MI = MII;
+      ++MII;
+
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+           MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+        MachineOperand &MO = *MOI;
+        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;
+        unsigned VirtReg = MO.getReg();
+        unsigned PhysReg = getPhys(VirtReg);
+        assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+        // Preserve semantics of sub-register operands.
+        if (MO.getSubReg()) {
+          // A virtual register kill refers to the whole register, so we may
+          // have to add <imp-use,kill> operands for the super-register.
+          if (MO.isUse() && MO.isKill() && !MO.isUndef())
+            SuperKills.push_back(PhysReg);
+
+          // We don't have to deal with sub-register defs because
+          // LiveIntervalAnalysis already added the necessary <imp-def>
+          // operands.
+
+          // PhysReg operands cannot have subregister indexes.
+          PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+          assert(PhysReg && "Invalid SubReg for physical register");
+          MO.setSubReg(0);
+        }
+        // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+        // we need the inlining here.
+        MO.setReg(PhysReg);
+      }
+
+      // Add any missing super-register kills after rewriting the whole
+      // instruction.
+      while (!SuperKills.empty())
+        MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+      DEBUG(dbgs() << "> " << *MI);
+
+      // Finally, remove any identity copies.
+      if (MI->isIdentityCopy()) {
+        DEBUG(dbgs() << "Deleting identity copy.\n");
+        RemoveMachineInstrFromMaps(MI);
+        if (Indexes)
+          Indexes->removeMachineInstrFromMaps(MI);
+        // It's safe to erase MI because MII has already been incremented.
+        MI->eraseFromParent();
+      }
+    }
+  }
+
+  // Tell MRI about physical registers in use.
+  for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+    if (!MRI->reg_nodbg_empty(Reg))
+      MRI->setPhysRegUsed(Reg);
+}
+
 void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
   const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
   const MachineRegisterInfo &MRI = MF->getRegInfo();
 
   OS << "********** REGISTER MAP **********\n";
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
-         e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
-    if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
-      OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
-         << "] " << MRI.getRegClass(i)->getName() << "\n";
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> "
+         << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+         << MRI.getRegClass(Reg)->getName() << "\n";
+    }
   }
 
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
-         e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
-    if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
-      OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
-         << "] " << MRI.getRegClass(i)->getName() << "\n";
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+         << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+    }
+  }
   OS << '\n';
 }
 
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 8b6082d18193..ba50f4e42302 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -35,6 +35,7 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterInfo;
   class raw_ostream;
+  class SlotIndexes;
 
   class VirtRegMap : public MachineFunctionPass {
   public:
@@ -80,7 +81,7 @@ namespace llvm {
 
     /// Virt2SplitKillMap - This is splitted virtual register to its last use
     /// (kill) index mapping.
-    IndexedMap<SlotIndex> Virt2SplitKillMap;
+    IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
 
     /// ReMatMap - This is virtual register to re-materialized instruction
     /// mapping. Each virtual register whose definition is going to be
@@ -134,6 +135,9 @@ namespace llvm {
     /// UnusedRegs - A list of physical registers that have not been used.
     BitVector UnusedRegs;
 
+    /// createSpillSlot - Allocate a spill slot for RC from MFI.
+    unsigned createSpillSlot(const TargetRegisterClass *RC);
+
     VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
     void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
 
@@ -153,10 +157,13 @@ namespace llvm {
     }
 
     MachineFunction &getMachineFunction() const {
-      assert(MF && "getMachineFunction called before runOnMAchineFunction");
+      assert(MF && "getMachineFunction called before runOnMachineFunction");
       return *MF;
     }
 
+    MachineRegisterInfo &getRegInfo() const { return *MRI; }
+    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
     void grow();
 
     /// @brief returns true if the specified virtual register is
@@ -207,10 +214,19 @@ namespace llvm {
     }
 
     /// @brief returns the live interval virtReg is split from.
-    unsigned getPreSplitReg(unsigned virtReg) {
+    unsigned getPreSplitReg(unsigned virtReg) const {
       return Virt2SplitMap[virtReg];
     }
 
+    /// getOriginal - Return the original virtual register that VirtReg descends
+    /// from through splitting.
+    /// A register that was not created by splitting is its own original.
+    /// This operation is idempotent.
+    unsigned getOriginal(unsigned VirtReg) const {
+      unsigned Orig = getPreSplitReg(VirtReg);
+      return Orig ? Orig : VirtReg;
+    }
+
     /// @brief returns true if the specified virtual register is not
     /// mapped to a stack slot or rematerialized.
     bool isAssignedReg(unsigned virtReg) const {
@@ -426,12 +442,12 @@ namespace llvm {
 
     /// @brief Mark the specified register as being implicitly defined.
     void setIsImplicitlyDefined(unsigned VirtReg) {
-      ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+      ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
     }
 
     /// @brief Returns true if the virtual register is implicitly defined.
     bool isImplicitlyDefined(unsigned VirtReg) const {
-      return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+      return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
     }
 
     /// @brief Updates information about the specified virtual register's value
@@ -487,6 +503,13 @@ namespace llvm {
       return 0;
     }
 
+    /// rewrite - Rewrite all instructions in MF to use only physical registers
+    /// by mapping all virtual register operands to their assigned physical
+    /// registers.
+    ///
+    /// @param Indexes Optionally remove deleted instructions from indexes.
+    void rewrite(SlotIndexes *Indexes);
+
     void print(raw_ostream &OS, const Module* M = 0) const;
     void dump() const;
   };
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 240d28cf3011..458a2134bf4a 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -22,8 +22,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include <algorithm>
 using namespace llvm;
 
 STATISTIC(NumDSE     , "Number of dead stores elided");
@@ -216,7 +216,8 @@ public:
                    << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
     else
       DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
-    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
+    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+          << (CanClobber ? " canclobber" : "") << "\n");
   }
 
   /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -297,7 +298,7 @@ ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
   const TargetLowering *TL = MF.getTarget().getTargetLowering();
 
   if (!TL->isTypeLegal(TL->getPointerTy()))
-    // Believe it or not, this is true on PIC16.
+    // Believe it or not, this is true on 16-bit targets like PIC16.
     return InsertLoc;
 
   const TargetRegisterClass *ptrRegClass =
@@ -462,25 +463,70 @@ static void findSinglePredSuccessor(MachineBasicBlock *MBB,
   }
 }
 
-/// InvalidateKill - Invalidate register kill information for a specific
-/// register. This also unsets the kills marker on the last kill operand.
-static void InvalidateKill(unsigned Reg,
-                           const TargetRegisterInfo* TRI,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps) {
-  if (RegKills[Reg]) {
-    KillOps[Reg]->setIsKill(false);
-    // KillOps[Reg] might be a def of a super-register.
-    unsigned KReg = KillOps[Reg]->getReg();
-    KillOps[KReg] = NULL;
-    RegKills.reset(KReg);
-    for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
-      if (RegKills[*SR]) {
-        KillOps[*SR]->setIsKill(false);
-        KillOps[*SR] = NULL;
-        RegKills.reset(*SR);
-      }
-    }
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+  MachineOperand *KillOp = KillOps[Reg];
+  KillOp->setIsKill(false);
+  // KillOps[Reg] might be a def of a super-register.
+  unsigned KReg = KillOp->getReg();
+  if (!RegKills[KReg])
+    return;
+
+  assert(KillOps[KReg] == KillOp && "invalid superreg kill flags");
+  KillOps[KReg] = NULL;
+  RegKills.reset(KReg);
+
+  // If it's a def of a super-register. Its other sub-regsters are no
+  // longer killed as well.
+  for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+    DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+    assert(KillOps[*SR] == KillOp && "invalid subreg kill flags");
+    KillOps[*SR] = NULL;
+    RegKills.reset(*SR);
+  }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+                          const TargetRegisterInfo* TRI, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+  if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+    ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+    return;
+  }
+  // No previous kill for this reg. Check for subreg kills as well.
+  // d4 =
+  // store d4, fi#0
+  // ...
+  //    = s8<kill>
+  // ...
+  //    = d4  <avoiding reload>
+  for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+    unsigned SReg = *SR;
+    if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+      ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
   }
 }
 
@@ -502,15 +548,22 @@ static void InvalidateKills(MachineInstr &MI,
       KillRegs->push_back(Reg);
     assert(Reg < KillOps.size());
     if (KillOps[Reg] == &MO) {
+      // This operand was the kill, now no longer.
       KillOps[Reg] = NULL;
       RegKills.reset(Reg);
       for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
         if (RegKills[*SR]) {
+          assert(KillOps[*SR] == &MO && "bad subreg kill flags");
           KillOps[*SR] = NULL;
           RegKills.reset(*SR);
         }
       }
     }
+    else {
+      // This operand may have reused a previously killed reg. Keep it live in
+      // case it continues to be used after erasing this instruction.
+      ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+    }
   }
 }
 
@@ -578,44 +631,8 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
     if (Reg == 0)
       continue;
 
-    if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
-      // That can't be right. Register is killed but not re-defined and it's
-      // being reused. Let's fix that.
-      KillOps[Reg]->setIsKill(false);
-      // KillOps[Reg] might be a def of a super-register.
-      unsigned KReg = KillOps[Reg]->getReg();
-      KillOps[KReg] = NULL;
-      RegKills.reset(KReg);
-
-      // Must be a def of a super-register. Its other sub-regsters are no
-      // longer killed as well.
-      for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
-        KillOps[*SR] = NULL;
-        RegKills.reset(*SR);
-      }
-    } else {
-      // Check for subreg kills as well.
-      // d4 =
-      // store d4, fi#0
-      // ...
-      //    = s8<kill>
-      // ...
-      //    = d4  <avoiding reload>
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-        unsigned SReg = *SR;
-        if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
-          KillOps[SReg]->setIsKill(false);
-          unsigned KReg = KillOps[SReg]->getReg();
-          KillOps[KReg] = NULL;
-          RegKills.reset(KReg);
-
-          for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
-            KillOps[*SSR] = NULL;
-            RegKills.reset(*SSR);
-          }
-        }
-      }
-    }
+    // This operand may have reused a previously killed reg. Keep it live.
+    ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
 
     if (MO.isKill()) {
       RegKills.set(Reg);
@@ -770,7 +787,8 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
       NotAvailable.insert(Reg);
     else {
       MBB.addLiveIn(Reg);
-      InvalidateKill(Reg, TRI, RegKills, KillOps);
+      if (RegKills[Reg])
+        ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
     }
 
     // Skip over the same register.
@@ -1056,6 +1074,7 @@ class LocalRewriter : public VirtRegRewriter {
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
   VirtRegMap *VRM;
+  LiveIntervals *LIs;
   BitVector AllocatableRegs;
   DenseMap<MachineInstr*, unsigned> DistanceMap;
   DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
@@ -1068,6 +1087,11 @@ public:
                             LiveIntervals* LIs);
 
 private:
+  void EraseInstr(MachineInstr *MI) {
+    VRM->RemoveMachineInstrFromMaps(MI);
+    LIs->RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+  }
 
   bool OptimizeByUnfold2(unsigned VirtReg, int SS,
                          MachineBasicBlock::iterator &MII,
@@ -1110,6 +1134,12 @@ private:
 
   bool InsertSpills(MachineInstr *MI);
 
+  void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                   std::vector<MachineInstr*> &MaybeDeadStores,
+                   BitVector &RegKills,
+                   ReuseInfo &ReusedOperands,
+                   std::vector<MachineOperand*> &KillOps);
+
   void RewriteMBB(LiveIntervals *LIs,
                   AvailableSpills &Spills, BitVector &RegKills,
                   std::vector<MachineOperand*> &KillOps);
@@ -1117,17 +1147,18 @@ private:
 }
 
 bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
-                                         LiveIntervals* LIs) {
+                                         LiveIntervals* lis) {
   MRI = &MF.getRegInfo();
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
   VRM = &vrm;
+  LIs = lis;
   AllocatableRegs = TRI->getAllocatableSet(MF);
   DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
         << MF.getFunction()->getName() << "':\n");
   DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
         " reloads!) ****\n");
-  DEBUG(MF.dump());
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
 
   // Spills - Keep track of which spilled values are available in physregs
   // so that we can choose to reuse the physregs instead of emitting
@@ -1178,7 +1209,7 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
   }
 
   DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-  DEBUG(MF.dump());
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
 
   // Mark unused spill slots.
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1190,10 +1221,8 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
         MFI->RemoveStackObject(SS);
         for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
           MachineInstr *DVMI = DbgValues[j];
-          MachineBasicBlock *DVMBB = DVMI->getParent();
           DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
-          VRM->RemoveMachineInstrFromMaps(DVMI);
-          DVMBB->erase(DVMI);
+          EraseInstr(DVMI);
         }
         ++NumDSS;
       }
@@ -1273,8 +1302,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
   VRM->transferRestorePts(&MI, NewMIs[0]);
   MII = MBB->insert(MII, NewMIs[0]);
   InvalidateKills(MI, TRI, RegKills, KillOps);
-  VRM->RemoveMachineInstrFromMaps(&MI);
-  MBB->erase(&MI);
+  EraseInstr(&MI);
   ++NumModRefUnfold;
 
   // Unfold next instructions that fold the same SS.
@@ -1289,8 +1317,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
     VRM->transferRestorePts(&NextMI, NewMIs[0]);
     MBB->insert(NextMII, NewMIs[0]);
     InvalidateKills(NextMI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(&NextMI);
-    MBB->erase(&NextMI);
+    EraseInstr(&NextMI);
     ++NumModRefUnfold;
     // Skip over dbg_value instructions.
     while (NextMII != MBB->end() && NextMII->isDebugValue())
@@ -1417,8 +1444,7 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
         VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
         MII = FoldedMI;
         InvalidateKills(MI, TRI, RegKills, KillOps);
-        VRM->RemoveMachineInstrFromMaps(&MI);
-        MBB->erase(&MI);
+        EraseInstr(&MI);
         return true;
       }
     }
@@ -1524,14 +1550,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
 
     // Delete all 3 old instructions.
     InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(ReloadMI);
-    MBB->erase(ReloadMI);
+    EraseInstr(ReloadMI);
     InvalidateKills(*DefMI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(DefMI);
-    MBB->erase(DefMI);
+    EraseInstr(DefMI);
     InvalidateKills(MI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(&MI);
-    MBB->erase(&MI);
+    EraseInstr(&MI);
 
     // If NewReg was previously holding value of some SS, it's now clobbered.
     // This has to be done now because it's a physical register. When this
@@ -1574,8 +1597,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
     bool CheckDef = PrevMII != MBB->begin();
     if (CheckDef)
       --PrevMII;
-    VRM->RemoveMachineInstrFromMaps(LastStore);
-    MBB->erase(LastStore);
+    EraseInstr(LastStore);
     if (CheckDef) {
       // Look at defs of killed registers on the store. Mark the defs
       // as dead since the store has been deleted and they aren't
@@ -1586,8 +1608,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
           MachineInstr *DeadDef = PrevMII;
           if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
             // FIXME: This assumes a remat def does not have side effects.
-            VRM->RemoveMachineInstrFromMaps(DeadDef);
-            MBB->erase(DeadDef);
+            EraseInstr(DeadDef);
             ++NumDRM;
           }
         }
@@ -1612,10 +1633,18 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
 /// effect and all of its defs are dead.
 static bool isSafeToDelete(MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
-  if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+  if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
       TID.isCall() || TID.isBarrier() || TID.isReturn() ||
-      TID.hasUnmodeledSideEffects())
+      MI.isLabel() || MI.isDebugValue() ||
+      MI.hasUnmodeledSideEffects())
     return false;
+
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI.isInlineAsm())
+    return false;
+
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.getReg())
@@ -1675,8 +1704,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills,
         LastUD->setIsDead();
         break;
       }
-      VRM->RemoveMachineInstrFromMaps(LastUDMI);
-      MBB->erase(LastUDMI);
+      EraseInstr(LastUDMI);
     } else {
       LastUD->setIsKill();
       RegKills.set(Reg);
@@ -1764,6 +1792,10 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
                    << TRI->getName(InReg) << " for vreg"
                    << VirtReg <<" instead of reloading into physreg "
                    << TRI->getName(Phys) << '\n');
+
+      // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+      // the kill flags for the current instruction after processing it.
+
       ++NumOmitted;
       continue;
     } else if (InReg && InReg != Phys) {
@@ -1828,7 +1860,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
   return true;
 }
 
-/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
 /// true if spills were inserted.
 bool LocalRewriter::InsertSpills(MachineInstr *MI) {
   if (!VRM->isSpillPt(MI))
@@ -1856,6 +1888,349 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
 }
 
 
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                                std::vector<MachineInstr*> &MaybeDeadStores,
+                                BitVector &RegKills,
+                                ReuseInfo &ReusedOperands,
+                                std::vector<MachineOperand*> &KillOps) {
+  // Clear kill info.
+  SmallSet<unsigned, 2> KilledMIRegs;
+  SmallVector<unsigned, 4> VirtUseOps;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;   // Ignore non-register operands.
+
+    unsigned VirtReg = MO.getReg();
+
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+      // Ignore physregs for spilling, but remember that it is used by this
+      // function.
+      MRI->setPhysRegUsed(VirtReg);
+      continue;
+    }
+
+    // We want to process implicit virtual register uses first.
+    if (MO.isImplicit())
+      // If the virtual register is implicitly defined, emit a implicit_def
+      // before so scavenger knows it's "defined".
+      // FIXME: This is a horrible hack done the by register allocator to
+      // remat a definition with virtual register operand.
+      VirtUseOps.insert(VirtUseOps.begin(), i);
+    else
+      VirtUseOps.push_back(i);
+
+    // A partial def causes problems because the same operand both reads and
+    // writes the register. This rewriter is designed to rewrite uses and defs
+    // separately, so a partial def would already have been rewritten to a
+    // physreg by the time we get to processing defs.
+    // Add an implicit use operand to model the partial def.
+    if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+        MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+      VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+      MI.addOperand(MachineOperand::CreateReg(VirtReg,
+                                              false,  // isDef
+                                              true)); // isImplicit
+      DEBUG(dbgs() << "Partial redef: " << MI);
+    }
+  }
+
+  // Process all of the spilled uses and all non spilled reg references.
+  SmallVector<int, 2> PotentialDeadStoreSlots;
+  KilledMIRegs.clear();
+  for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+    unsigned i = VirtUseOps[j];
+    unsigned VirtReg = MI.getOperand(i).getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+           "Not a virtual register?");
+
+    unsigned SubIdx = MI.getOperand(i).getSubReg();
+    if (VRM->isAssignedReg(VirtReg)) {
+      // This virtual register was assigned a physreg!
+      unsigned Phys = VRM->getPhys(VirtReg);
+      MRI->setPhysRegUsed(Phys);
+      if (MI.getOperand(i).isDef())
+        ReusedOperands.markClobbered(Phys);
+      substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+      if (VRM->isImplicitlyDefined(VirtReg))
+        // FIXME: Is this needed?
+        BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+      continue;
+    }
+
+    // This virtual register is now known to be a spilled value.
+    if (!MI.getOperand(i).isUse())
+      continue;  // Handle defs in the loop below (handle use&def here though)
+
+    bool AvoidReload = MI.getOperand(i).isUndef();
+    // Check if it is defined by an implicit def. It should not be spilled.
+    // Note, this is for correctness reason. e.g.
+    // 8   %reg1024<def> = IMPLICIT_DEF
+    // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+    // The live range [12, 14) are not part of the r1024 live interval since
+    // it's defined by an implicit def. It will not conflicts with live
+    // interval of r1025. Now suppose both registers are spilled, you can
+    // easily see a situation where both registers are reloaded before
+    // the INSERT_SUBREG and both target registers that would overlap.
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    int ReuseSlot = SSorRMId;
+
+    // Check to see if this stack slot is available.
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+    // If this is a sub-register use, make sure the reuse register is in the
+    // right register class. For example, for x86 not all of the 32-bit
+    // registers have accessible sub-registers.
+    // Similarly so for EXTRACT_SUBREG. Consider this:
+    // EDI = op
+    // MOV32_mr fi#1, EDI
+    // ...
+    //       = EXTRACT_SUBREG fi#1
+    // fi#1 is available in EDI, but it cannot be reused because it's not in
+    // the right register file.
+    if (PhysReg && !AvoidReload && SubIdx) {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      if (!RC->contains(PhysReg))
+        PhysReg = 0;
+    }
+
+    if (PhysReg && !AvoidReload) {
+      // This spilled operand might be part of a two-address operand.  If this
+      // is the case, then changing it will necessarily require changing the
+      // def part of the instruction as well.  However, in some cases, we
+      // aren't allowed to modify the reused register.  If none of these cases
+      // apply, reuse it.
+      bool CanReuse = true;
+      bool isTied = MI.isRegTiedToDefOperand(i);
+      if (isTied) {
+        // Okay, we have a two address operand.  We can reuse this physreg as
+        // long as we are allowed to clobber the value and there isn't an
+        // earlier def that has already clobbered the physreg.
+        CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+          Spills.canClobberPhysReg(PhysReg);
+      }
+      // If this is an asm, and a PhysReg alias is used elsewhere as an
+      // earlyclobber operand, we can't also use it as an input.
+      if (MI.isInlineAsm()) {
+        for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+          MachineOperand &MOk = MI.getOperand(k);
+          if (MOk.isReg() && MOk.isEarlyClobber() &&
+              TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+            CanReuse = false;
+            DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+                         << " for vreg" << VirtReg << ": " << MOk << '\n');
+            break;
+          }
+        }
+      }
+
+      if (CanReuse) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg "
+              << TRI->getName(PhysReg) << " for vreg"
+              << VirtReg <<" instead of reloading into physreg "
+              << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+
+        // Reusing a physreg may resurrect it. But we expect ProcessUses to
+        // update the kill flags for the current instr after processing it.
+
+        // The only technical detail we have is that we don't know that
+        // PhysReg won't be clobbered by a reloaded stack slot that occurs
+        // later in the instruction.  In particular, consider 'op V1, V2'.
+        // If V1 is available in physreg R0, we would choose to reuse it
+        // here, instead of reloading it into the register the allocator
+        // indicated (say R1).  However, V2 might have to be reloaded
+        // later, and it might indicate that it needs to live in R0.  When
+        // this occurs, we need to have information available that
+        // indicates it is safe to use R1 for the reload instead of R0.
+        //
+        // To further complicate matters, we might conflict with an alias,
+        // or R0 and R1 might not be compatible with each other.  In this
+        // case, we actually insert a reload for V1 in R1, ensuring that
+        // we can get at R0 or its alias.
+        ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+                                VRM->getPhys(VirtReg), VirtReg);
+        if (isTied)
+          // Only mark it clobbered if this is a use&def operand.
+          ReusedOperands.markClobbered(PhysReg);
+        ++NumReused;
+
+        if (MI.getOperand(i).isKill() &&
+            ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+          // The store of this spilled value is potentially dead, but we
+          // won't know for certain until we've confirmed that the re-use
+          // above is valid, which means waiting until the other operands
+          // are processed. For now we just track the spill slot, we'll
+          // remove it after the other operands are processed if valid.
+
+          PotentialDeadStoreSlots.push_back(ReuseSlot);
+        }
+
+        // Mark is isKill if it's there no other uses of the same virtual
+        // register and it's not a two-address operand. IsKill will be
+        // unset if reg is reused.
+        if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+          MI.getOperand(i).setIsKill();
+          KilledMIRegs.insert(VirtReg);
+        }
+        continue;
+      }  // CanReuse
+
+      // Otherwise we have a situation where we have a two-address instruction
+      // whose mod/ref operand needs to be reloaded.  This reload is already
+      // available in some register "PhysReg", but if we used PhysReg as the
+      // operand to our 2-addr instruction, the instruction would modify
+      // PhysReg.  This isn't cool if something later uses PhysReg and expects
+      // to get its initial value.
+      //
+      // To avoid this problem, and to avoid doing a load right after a store,
+      // we emit a copy from PhysReg into the designated register for this
+      // operand.
+      //
+      // This case also applies to an earlyclobber'd PhysReg.
+      unsigned DesignatedReg = VRM->getPhys(VirtReg);
+      assert(DesignatedReg && "Must map virtreg to physreg!");
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        DesignatedReg = ReusedOperands.
+          GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+                          MaybeDeadStores, RegKills, KillOps, *VRM);
+
+      // If the mapped designated register is actually the physreg we have
+      // incoming, we don't need to inserted a dead copy.
+      if (DesignatedReg == PhysReg) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+              << " for vreg" << VirtReg
+              << " instead of reloading into same physreg.\n");
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+        ReusedOperands.markClobbered(RReg);
+        ++NumReused;
+        continue;
+      }
+
+      MRI->setPhysRegUsed(DesignatedReg);
+      ReusedOperands.markClobbered(DesignatedReg);
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+                                     TII->get(TargetOpcode::COPY),
+                                     DesignatedReg).addReg(PhysReg);
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      // This invalidates DesignatedReg.
+      Spills.ClobberPhysReg(DesignatedReg);
+
+      Spills.addAvailable(ReuseSlot, DesignatedReg);
+      unsigned RReg =
+        SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+      ++NumReused;
+      continue;
+    } // if (PhysReg)
+
+    // Otherwise, reload it and remember that we have it.
+    PhysReg = VRM->getPhys(VirtReg);
+    assert(PhysReg && "Must map virtreg to physreg!");
+
+    // Note that, if we reused a register for a previous operand, the
+    // register we want to reload into might not actually be
+    // available.  If this occurs, use the register indicated by the
+    // reuser.
+    if (ReusedOperands.hasReuses())
+      PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                  Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+    MRI->setPhysRegUsed(PhysReg);
+    ReusedOperands.markClobbered(PhysReg);
+    if (AvoidReload)
+      ++NumAvoided;
+    else {
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+
+      if (DoReMat) {
+        ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+      } else {
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+        MachineInstr *LoadMI = prior(InsertLoc);
+        VRM->addSpillSlotUse(SSorRMId, LoadMI);
+        ++NumLoads;
+        DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+      }
+      // This invalidates PhysReg.
+      Spills.ClobberPhysReg(PhysReg);
+
+      // Any stores to this stack slot are not dead anymore.
+      if (!DoReMat)
+        MaybeDeadStores[SSorRMId] = NULL;
+      Spills.addAvailable(SSorRMId, PhysReg);
+      // Assumes this is the last use. IsKill will be unset if reg is reused
+      // unless it's a two-address operand.
+      if (!MI.isRegTiedToDefOperand(i) &&
+          KilledMIRegs.count(VirtReg) == 0) {
+        MI.getOperand(i).setIsKill();
+        KilledMIRegs.insert(VirtReg);
+      }
+
+      UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+    }
+    unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+    MI.getOperand(i).setReg(RReg);
+    MI.getOperand(i).setSubReg(0);
+  }
+
+  // Ok - now we can remove stores that have been confirmed dead.
+  for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+    // This was the last use and the spilled value is still available
+    // for reuse. That means the spill was unnecessary!
+    int PDSSlot = PotentialDeadStoreSlots[j];
+    MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+    if (DeadStore) {
+      DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+      InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+      EraseInstr(DeadStore);
+      MaybeDeadStores[PDSSlot] = NULL;
+      ++NumDSE;
+    }
+  }
+}
+
 /// rewriteMBB - Keep track of which spills are available even after the
 /// register allocator is done with them.  If possible, avoid reloading vregs.
 void
@@ -1880,9 +2255,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
   // ReMatDefs - These are rematerializable def MIs which are not deleted.
   SmallSet<MachineInstr*, 4> ReMatDefs;
 
-  // Clear kill info.
-  SmallSet<unsigned, 2> KilledMIRegs;
-
   // Keep track of the registers we have already spilled in case there are
   // multiple defs of the same register in MI.
   SmallSet<unsigned, 8> SpilledMIRegs;
@@ -1918,323 +2290,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     /// ReusedOperands - Keep track of operand reuse in case we need to undo
     /// reuse.
     ReuseInfo ReusedOperands(MI, TRI);
-    SmallVector<unsigned, 4> VirtUseOps;
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;   // Ignore non-register operands.
-
-      unsigned VirtReg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
-        // Ignore physregs for spilling, but remember that it is used by this
-        // function.
-        MRI->setPhysRegUsed(VirtReg);
-        continue;
-      }
-
-      // We want to process implicit virtual register uses first.
-      if (MO.isImplicit())
-        // If the virtual register is implicitly defined, emit a implicit_def
-        // before so scavenger knows it's "defined".
-        // FIXME: This is a horrible hack done the by register allocator to
-        // remat a definition with virtual register operand.
-        VirtUseOps.insert(VirtUseOps.begin(), i);
-      else
-        VirtUseOps.push_back(i);
-    }
-
-    // Process all of the spilled uses and all non spilled reg references.
-    SmallVector<int, 2> PotentialDeadStoreSlots;
-    KilledMIRegs.clear();
-    for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
-      unsigned i = VirtUseOps[j];
-      unsigned VirtReg = MI.getOperand(i).getReg();
-      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-             "Not a virtual register?");
-
-      unsigned SubIdx = MI.getOperand(i).getSubReg();
-      if (VRM->isAssignedReg(VirtReg)) {
-        // This virtual register was assigned a physreg!
-        unsigned Phys = VRM->getPhys(VirtReg);
-        MRI->setPhysRegUsed(Phys);
-        if (MI.getOperand(i).isDef())
-          ReusedOperands.markClobbered(Phys);
-        substitutePhysReg(MI.getOperand(i), Phys, *TRI);
-        if (VRM->isImplicitlyDefined(VirtReg))
-          // FIXME: Is this needed?
-          BuildMI(*MBB, &MI, MI.getDebugLoc(),
-                  TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
-        continue;
-      }
-
-      // This virtual register is now known to be a spilled value.
-      if (!MI.getOperand(i).isUse())
-        continue;  // Handle defs in the loop below (handle use&def here though)
-
-      bool AvoidReload = MI.getOperand(i).isUndef();
-      // Check if it is defined by an implicit def. It should not be spilled.
-      // Note, this is for correctness reason. e.g.
-      // 8   %reg1024<def> = IMPLICIT_DEF
-      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-      // The live range [12, 14) are not part of the r1024 live interval since
-      // it's defined by an implicit def. It will not conflicts with live
-      // interval of r1025. Now suppose both registers are spilled, you can
-      // easily see a situation where both registers are reloaded before
-      // the INSERT_SUBREG and both target registers that would overlap.
-      bool DoReMat = VRM->isReMaterialized(VirtReg);
-      int SSorRMId = DoReMat
-        ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
-      int ReuseSlot = SSorRMId;
-
-      // Check to see if this stack slot is available.
-      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
-      // If this is a sub-register use, make sure the reuse register is in the
-      // right register class. For example, for x86 not all of the 32-bit
-      // registers have accessible sub-registers.
-      // Similarly so for EXTRACT_SUBREG. Consider this:
-      // EDI = op
-      // MOV32_mr fi#1, EDI
-      // ...
-      //       = EXTRACT_SUBREG fi#1
-      // fi#1 is available in EDI, but it cannot be reused because it's not in
-      // the right register file.
-      if (PhysReg && !AvoidReload && SubIdx) {
-        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-        if (!RC->contains(PhysReg))
-          PhysReg = 0;
-      }
-
-      if (PhysReg && !AvoidReload) {
-        // This spilled operand might be part of a two-address operand.  If this
-        // is the case, then changing it will necessarily require changing the
-        // def part of the instruction as well.  However, in some cases, we
-        // aren't allowed to modify the reused register.  If none of these cases
-        // apply, reuse it.
-        bool CanReuse = true;
-        bool isTied = MI.isRegTiedToDefOperand(i);
-        if (isTied) {
-          // Okay, we have a two address operand.  We can reuse this physreg as
-          // long as we are allowed to clobber the value and there isn't an
-          // earlier def that has already clobbered the physreg.
-          CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
-            Spills.canClobberPhysReg(PhysReg);
-        }
-        // If this is an asm, and a PhysReg alias is used elsewhere as an
-        // earlyclobber operand, we can't also use it as an input.
-        if (MI.isInlineAsm()) {
-          for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
-            MachineOperand &MOk = MI.getOperand(k);
-            if (MOk.isReg() && MOk.isEarlyClobber() &&
-                TRI->regsOverlap(MOk.getReg(), PhysReg)) {
-              CanReuse = false;
-              DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
-                           << " for vreg" << VirtReg << ": " << MOk << '\n');
-              break;
-            }
-          }
-        }
-
-        if (CanReuse) {
-          // If this stack slot value is already available, reuse it!
-          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-            DEBUG(dbgs() << "Reusing RM#"
-                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-          else
-            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-          DEBUG(dbgs() << " from physreg "
-                << TRI->getName(PhysReg) << " for vreg"
-                << VirtReg <<" instead of reloading into physreg "
-                << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
-          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-          MI.getOperand(i).setReg(RReg);
-          MI.getOperand(i).setSubReg(0);
-
-          // The only technical detail we have is that we don't know that
-          // PhysReg won't be clobbered by a reloaded stack slot that occurs
-          // later in the instruction.  In particular, consider 'op V1, V2'.
-          // If V1 is available in physreg R0, we would choose to reuse it
-          // here, instead of reloading it into the register the allocator
-          // indicated (say R1).  However, V2 might have to be reloaded
-          // later, and it might indicate that it needs to live in R0.  When
-          // this occurs, we need to have information available that
-          // indicates it is safe to use R1 for the reload instead of R0.
-          //
-          // To further complicate matters, we might conflict with an alias,
-          // or R0 and R1 might not be compatible with each other.  In this
-          // case, we actually insert a reload for V1 in R1, ensuring that
-          // we can get at R0 or its alias.
-          ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
-                                  VRM->getPhys(VirtReg), VirtReg);
-          if (isTied)
-            // Only mark it clobbered if this is a use&def operand.
-            ReusedOperands.markClobbered(PhysReg);
-          ++NumReused;
-
-          if (MI.getOperand(i).isKill() &&
-              ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
-            // The store of this spilled value is potentially dead, but we
-            // won't know for certain until we've confirmed that the re-use
-            // above is valid, which means waiting until the other operands
-            // are processed. For now we just track the spill slot, we'll
-            // remove it after the other operands are processed if valid.
-
-            PotentialDeadStoreSlots.push_back(ReuseSlot);
-          }
-
-          // Mark is isKill if it's there no other uses of the same virtual
-          // register and it's not a two-address operand. IsKill will be
-          // unset if reg is reused.
-          if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
-            MI.getOperand(i).setIsKill();
-            KilledMIRegs.insert(VirtReg);
-          }
-
-          continue;
-        }  // CanReuse
-
-        // Otherwise we have a situation where we have a two-address instruction
-        // whose mod/ref operand needs to be reloaded.  This reload is already
-        // available in some register "PhysReg", but if we used PhysReg as the
-        // operand to our 2-addr instruction, the instruction would modify
-        // PhysReg.  This isn't cool if something later uses PhysReg and expects
-        // to get its initial value.
-        //
-        // To avoid this problem, and to avoid doing a load right after a store,
-        // we emit a copy from PhysReg into the designated register for this
-        // operand.
-        //
-        // This case also applies to an earlyclobber'd PhysReg.
-        unsigned DesignatedReg = VRM->getPhys(VirtReg);
-        assert(DesignatedReg && "Must map virtreg to physreg!");
-
-        // Note that, if we reused a register for a previous operand, the
-        // register we want to reload into might not actually be
-        // available.  If this occurs, use the register indicated by the
-        // reuser.
-        if (ReusedOperands.hasReuses())
-          DesignatedReg = ReusedOperands.
-            GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
-                            MaybeDeadStores, RegKills, KillOps, *VRM);
-
-        // If the mapped designated register is actually the physreg we have
-        // incoming, we don't need to inserted a dead copy.
-        if (DesignatedReg == PhysReg) {
-          // If this stack slot value is already available, reuse it!
-          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-            DEBUG(dbgs() << "Reusing RM#"
-                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-          else
-            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-          DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-                << " for vreg" << VirtReg
-                << " instead of reloading into same physreg.\n");
-          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-          MI.getOperand(i).setReg(RReg);
-          MI.getOperand(i).setSubReg(0);
-          ReusedOperands.markClobbered(RReg);
-          ++NumReused;
-          continue;
-        }
-
-        MRI->setPhysRegUsed(DesignatedReg);
-        ReusedOperands.markClobbered(DesignatedReg);
-
-        // Back-schedule reloads and remats.
-        MachineBasicBlock::iterator InsertLoc =
-          ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
-                           SSorRMId, TII, MF);
-        MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
-                                       TII->get(TargetOpcode::COPY),
-                                       DesignatedReg).addReg(PhysReg);
-        CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-        UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-        // This invalidates DesignatedReg.
-        Spills.ClobberPhysReg(DesignatedReg);
-
-        Spills.addAvailable(ReuseSlot, DesignatedReg);
-        unsigned RReg =
-          SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-        DEBUG(dbgs() << '\t' << *prior(MII));
-        ++NumReused;
-        continue;
-      } // if (PhysReg)
-
-        // Otherwise, reload it and remember that we have it.
-      PhysReg = VRM->getPhys(VirtReg);
-      assert(PhysReg && "Must map virtreg to physreg!");
-
-      // Note that, if we reused a register for a previous operand, the
-      // register we want to reload into might not actually be
-      // available.  If this occurs, use the register indicated by the
-      // reuser.
-      if (ReusedOperands.hasReuses())
-        PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
-                    Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
-      MRI->setPhysRegUsed(PhysReg);
-      ReusedOperands.markClobbered(PhysReg);
-      if (AvoidReload)
-        ++NumAvoided;
-      else {
-        // Back-schedule reloads and remats.
-        MachineBasicBlock::iterator InsertLoc =
-          ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
-                           SSorRMId, TII, MF);
-
-        if (DoReMat) {
-          ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
-        } else {
-          const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-          TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
-          MachineInstr *LoadMI = prior(InsertLoc);
-          VRM->addSpillSlotUse(SSorRMId, LoadMI);
-          ++NumLoads;
-          DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-        }
-        // This invalidates PhysReg.
-        Spills.ClobberPhysReg(PhysReg);
-
-        // Any stores to this stack slot are not dead anymore.
-        if (!DoReMat)
-          MaybeDeadStores[SSorRMId] = NULL;
-        Spills.addAvailable(SSorRMId, PhysReg);
-        // Assumes this is the last use. IsKill will be unset if reg is reused
-        // unless it's a two-address operand.
-        if (!MI.isRegTiedToDefOperand(i) &&
-            KilledMIRegs.count(VirtReg) == 0) {
-          MI.getOperand(i).setIsKill();
-          KilledMIRegs.insert(VirtReg);
-        }
-
-        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-      }
-      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-      MI.getOperand(i).setReg(RReg);
-      MI.getOperand(i).setSubReg(0);
-    }
-
-    // Ok - now we can remove stores that have been confirmed dead.
-    for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
-      // This was the last use and the spilled value is still available
-      // for reuse. That means the spill was unnecessary!
-      int PDSSlot = PotentialDeadStoreSlots[j];
-      MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
-      if (DeadStore) {
-        DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-        InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-        VRM->RemoveMachineInstrFromMaps(DeadStore);
-        MBB->erase(DeadStore);
-        MaybeDeadStores[PDSSlot] = NULL;
-        ++NumDSE;
-      }
-    }
 
+    ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
 
     DEBUG(dbgs() << '\t' << MI);
 
@@ -2288,14 +2345,13 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
               BackTracked = true;
             } else {
               DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-              // Unset last kill since it's being reused.
-              InvalidateKill(InReg, TRI, RegKills, KillOps);
+              // InvalidateKills resurrects any prior kill of the copy's source
+              // allowing the source reg to be reused in place of the copy.
               Spills.disallowClobberPhysReg(InReg);
             }
 
             InvalidateKills(MI, TRI, RegKills, KillOps);
-            VRM->RemoveMachineInstrFromMaps(&MI);
-            MBB->erase(&MI);
+            EraseInstr(&MI);
             Erased = true;
             goto ProcessNextInst;
           }
@@ -2306,8 +2362,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
               TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
             MBB->insert(MII, NewMIs[0]);
             InvalidateKills(MI, TRI, RegKills, KillOps);
-            VRM->RemoveMachineInstrFromMaps(&MI);
-            MBB->erase(&MI);
+            EraseInstr(&MI);
             Erased = true;
             --NextMII;  // backtrack to the unfolded instruction.
             BackTracked = true;
@@ -2343,8 +2398,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
               MBB->insert(MII, NewStore);
               VRM->addSpillSlotUse(SS, NewStore);
               InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM->RemoveMachineInstrFromMaps(&MI);
-              MBB->erase(&MI);
+              EraseInstr(&MI);
               Erased = true;
               --NextMII;
               --NextMII;  // backtrack to the unfolded instruction.
@@ -2359,8 +2413,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
           // If we get here, the store is dead, nuke it now.
           DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
           InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-          VRM->RemoveMachineInstrFromMaps(DeadStore);
-          MBB->erase(DeadStore);
+          EraseInstr(DeadStore);
           if (!NewStore)
             ++NumDSE;
         }
@@ -2437,8 +2490,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
             // Last def is now dead.
             TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
           }
-          VRM->RemoveMachineInstrFromMaps(&MI);
-          MBB->erase(&MI);
+          EraseInstr(&MI);
           Erased = true;
           Spills.disallowClobberPhysReg(VirtReg);
           goto ProcessNextInst;
@@ -2514,8 +2566,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
           ++NumDCE;
           DEBUG(dbgs() << "Removing now-noop copy: " << MI);
           InvalidateKills(MI, TRI, RegKills, KillOps);
-          VRM->RemoveMachineInstrFromMaps(&MI);
-          MBB->erase(&MI);
+          EraseInstr(&MI);
           Erased = true;
           UpdateKills(*LastStore, TRI, RegKills, KillOps);
           goto ProcessNextInst;
@@ -2526,8 +2577,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     // Delete dead instructions without side effects.
     if (!Erased && !BackTracked && isSafeToDelete(MI)) {
       InvalidateKills(MI, TRI, RegKills, KillOps);
-      VRM->RemoveMachineInstrFromMaps(&MI);
-      MBB->erase(&MI);
+      EraseInstr(&MI);
       Erased = true;
     }
     if (!Erased)
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 0be80496a3cb..a8d625c7ac04 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -14,11 +14,12 @@
 #include "llvm/CompilerDriver/Action.h"
 #include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/Main.h"
 
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Program.h"
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TimeValue.h"
 
 #include <stdexcept>
 #include <string>
@@ -28,7 +29,6 @@ using namespace llvmc;
 
 namespace llvmc {
 
-extern int Main(int argc, char** argv);
 extern const char* ProgramName;
 
 }
@@ -53,15 +53,19 @@ namespace {
 #endif
   }
 
-  int ExecuteProgram (const std::string& name,
-                      const StrVector& args) {
-    sys::Path prog = sys::Program::FindProgramByName(name);
+  int ExecuteProgram (const std::string& name, const StrVector& args) {
+    sys::Path prog(name);
 
-    if (prog.isEmpty()) {
-      prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
-      if (prog.isEmpty()) {
-        PrintError("Can't find program '" + name + "'");
-        return -1;
+    if (sys::path::is_relative(prog.str())) {
+      prog = PrependMainExecutablePath(name, ProgramName,
+                                       (void *)(intptr_t)&Main);
+
+      if (!prog.canExecute()) {
+        prog = sys::Program::FindProgramByName(name);
+        if (prog.isEmpty()) {
+          PrintError("Can't find program '" + name + "'");
+          return -1;
+        }
       }
     }
     if (!prog.canExecute()) {
diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt
index 153dd443cbf2..2248de01b954 100644
--- a/lib/CompilerDriver/CMakeLists.txt
+++ b/lib/CompilerDriver/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS support system)
+set(LLVM_LINK_COMPONENTS support)
 set(LLVM_REQUIRES_EH 1)
 
 add_llvm_tool(llvmc
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index d0c0e15bcdb7..33c6566499b8 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -32,7 +32,8 @@ using namespace llvmc;
 namespace llvmc {
 
   const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
-    StringRef suf = File.getSuffix();
+    // Remove the '.'.
+    StringRef suf = sys::path::extension(File.str()).substr(1);
     LanguageMap::const_iterator Lang =
       this->find(suf.empty() ? "*empty*" : suf);
     if (Lang == this->end()) {
@@ -218,10 +219,11 @@ FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
               InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
 
   // Determine the input language.
-  const std::string* InLang = LangMap.GetLanguage(In);
+  const std::string* InLang = (ForceLanguage ? ForceLanguage
+                               : LangMap.GetLanguage(In));
   if (InLang == 0)
     return 0;
-  const std::string& InLanguage = (ForceLanguage ? *ForceLanguage : *InLang);
+  const std::string& InLanguage = *InLang;
 
   // Add the current input language to the input language set.
   InLangs.insert(InLanguage);
@@ -439,13 +441,17 @@ int CompilationGraph::CheckLanguageNames() const {
           continue;
         }
 
-        const char* OutLang = N1.ToolPtr->OutputLanguage();
+        const char** OutLangs = N1.ToolPtr->OutputLanguages();
         const char** InLangs = N2->ToolPtr->InputLanguages();
         bool eq = false;
-        for (;*InLangs; ++InLangs) {
-          if (std::strcmp(OutLang, *InLangs) == 0) {
-            eq = true;
-            break;
+        const char* OutLang = 0;
+        for (;*OutLangs; ++OutLangs) {
+          OutLang = *OutLangs;
+          for (;*InLangs; ++InLangs) {
+            if (std::strcmp(OutLang, *InLangs) == 0) {
+              eq = true;
+              break;
+            }
           }
         }
 
@@ -480,7 +486,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
   for (const_nodes_iterator B = this->NodesMap.begin(),
          E = this->NodesMap.end(); B != E; ++B) {
     const Node& N = B->second;
-    int MaxWeight = 0;
+    int MaxWeight = -1024;
 
     // Ignore the root node.
     if (!N.ToolPtr)
@@ -572,6 +578,26 @@ int CompilationGraph::Check () {
 
 // Code related to graph visualization.
 
+namespace {
+
+std::string SquashStrArray (const char** StrArr) {
+  std::string ret;
+
+  for (; *StrArr; ++StrArr) {
+    if (*(StrArr + 1)) {
+      ret += *StrArr;
+      ret +=  ", ";
+    }
+    else {
+      ret += *StrArr;
+    }
+  }
+
+  return ret;
+}
+
+} // End anonymous namespace.
+
 namespace llvm {
   template <>
   struct DOTGraphTraits<llvmc::CompilationGraph*>
@@ -586,7 +612,8 @@ namespace llvm {
         if (N->ToolPtr->IsJoin())
           return N->Name() + "\n (join" +
             (N->HasChildren() ? ")"
-             : std::string(": ") + N->ToolPtr->OutputLanguage() + ')');
+             : std::string(": ") +
+             SquashStrArray(N->ToolPtr->OutputLanguages()) + ')');
         else
           return N->Name();
       else
@@ -596,28 +623,15 @@ namespace llvm {
     template<typename EdgeIter>
     static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
       if (N->ToolPtr) {
-        return N->ToolPtr->OutputLanguage();
+        return SquashStrArray(N->ToolPtr->OutputLanguages());
       }
       else {
-        const char** InLangs = I->ToolPtr->InputLanguages();
-        std::string ret;
-
-        for (; *InLangs; ++InLangs) {
-          if (*(InLangs + 1)) {
-            ret += *InLangs;
-            ret +=  ", ";
-          }
-          else {
-            ret += *InLangs;
-          }
-        }
-
-        return ret;
+        return SquashStrArray(I->ToolPtr->InputLanguages());
       }
     }
   };
 
-}
+} // End namespace llvm
 
 int CompilationGraph::writeGraph(const std::string& OutputFilename) {
   std::string ErrorInfo;
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index 0a6613aa77a3..7120027f7ce0 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -16,8 +16,9 @@
 #include "llvm/CompilerDriver/CompilationGraph.h"
 #include "llvm/CompilerDriver/Error.h"
 
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 
 #include <sstream>
 #include <string>
@@ -43,15 +44,15 @@ namespace {
       return 0;
     }
     else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
-      tempDir = OutputFilename;
-      tempDir = tempDir.getDirname();
+      tempDir = sys::path::parent_path(OutputFilename);
     }
     else {
       // SaveTemps == Cwd --> use current dir (leave tempDir empty).
       return 0;
     }
 
-    if (!tempDir.exists()) {
+    bool Exists;
+    if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) {
       std::string ErrMsg;
       if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
         PrintError(ErrMsg);
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
index 8e8b73ca8f83..10cfa4f02923 100644
--- a/lib/CompilerDriver/Makefile
+++ b/lib/CompilerDriver/Makefile
@@ -13,7 +13,7 @@ LEVEL = ../..
 # name doesn't start with "LLVM" and NO_LLVM_CONFIG is set.
 
 LIBRARYNAME = CompilerDriver
-LINK_COMPONENTS = support system
+LINK_COMPONENTS = support
 NO_LLVM_CONFIG = 1
 
 
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index c8488b226e28..876759aa72b0 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -15,7 +15,7 @@
 #include "llvm/CompilerDriver/Tool.h"
 
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 
 #include <algorithm>
 
@@ -61,7 +61,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
       Out.appendSuffix(OutputSuffix);
     }
     else {
-      Out.set(In.getBasename());
+      Out.set(sys::path::stem(In.str()));
       Out.appendSuffix(OutputSuffix);
     }
   }
@@ -69,7 +69,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
     if (IsJoin())
       Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
     else
-      Out = MakeTempFile(TempDir, In.getBasename(), OutputSuffix);
+      Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix);
   }
   return Out;
 }
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 0e118ccd904f..b5632d2bc5c3 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -2,3 +2,7 @@ add_llvm_library(LLVMExecutionEngine
   ExecutionEngine.cpp
   ExecutionEngineBindings.cpp
   )
+
+add_subdirectory(Interpreter)
+add_subdirectory(JIT)
+add_subdirectory(MCJIT)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index be7f1f56a958..f28697530b3d 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -19,14 +19,15 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/DynamicLibrary.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Target/TargetData.h"
 #include <cmath>
 #include <cstring>
@@ -45,14 +46,24 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
   StringRef MArch,
   StringRef MCPU,
   const SmallVectorImpl<std::string>& MAttrs) = 0;
+ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
+  Module *M,
+  std::string *ErrorStr,
+  JITMemoryManager *JMM,
+  CodeGenOpt::Level OptLevel,
+  bool GVsWithCode,
+  CodeModel::Model CMM,
+  StringRef MArch,
+  StringRef MCPU,
+  const SmallVectorImpl<std::string>& MAttrs) = 0;
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
                                                 std::string *ErrorStr) = 0;
-ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
-
 
 ExecutionEngine::ExecutionEngine(Module *M)
   : EEState(*this),
-    LazyFunctionCreator(0) {
+    LazyFunctionCreator(0),
+    ExceptionTableRegister(0),
+    ExceptionTableDeregister(0) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
@@ -66,16 +77,25 @@ ExecutionEngine::~ExecutionEngine() {
     delete Modules[i];
 }
 
+void ExecutionEngine::DeregisterAllTables() {
+  if (ExceptionTableDeregister) {
+    for (std::vector<void*>::iterator it = AllExceptionTables.begin(),
+           ie = AllExceptionTables.end(); it != ie; ++it)
+      ExceptionTableDeregister(*it);
+    AllExceptionTables.clear();
+  }
+}
+
 namespace {
-// This class automatically deletes the memory block when the GlobalVariable is
-// destroyed.
+/// \brief Helper class which uses a value handler to automatically deletes the
+/// memory block when the GlobalVariable is destroyed.
 class GVMemoryBlock : public CallbackVH {
   GVMemoryBlock(const GlobalVariable *GV)
     : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
 
 public:
-  // Returns the address the GlobalVariable should be written into.  The
-  // GVMemoryBlock object prefixes that.
+  /// \brief Returns the address the GlobalVariable should be written into.  The
+  /// GVMemoryBlock object prefixes that.
   static char *Create(const GlobalVariable *GV, const TargetData& TD) {
     const Type *ElTy = GV->getType()->getElementType();
     size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
@@ -97,13 +117,12 @@ public:
 };
 }  // anonymous namespace
 
-char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) {
+char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
   return GVMemoryBlock::Create(GV, *getTargetData());
 }
 
-/// removeModule - Remove a Module from the list of modules.
 bool ExecutionEngine::removeModule(Module *M) {
-  for(SmallVector<Module *, 1>::iterator I = Modules.begin(), 
+  for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
         E = Modules.end(); I != E; ++I) {
     Module *Found = *I;
     if (Found == M) {
@@ -115,9 +134,6 @@ bool ExecutionEngine::removeModule(Module *M) {
   return false;
 }
 
-/// FindFunctionNamed - Search all of the active modules to find the one that
-/// defines FnName.  This is very slow operation and shouldn't be used for
-/// general code.
 Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
   for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
     if (Function *F = Modules[i]->getFunction(FnName))
@@ -127,10 +143,13 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
 }
 
 
-void *ExecutionEngineState::RemoveMapping(
-  const MutexGuard &, const GlobalValue *ToUnmap) {
+void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
+                                          const GlobalValue *ToUnmap) {
   GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
   void *OldVal;
+
+  // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
+  // GlobalAddressMap.
   if (I == GlobalAddressMap.end())
     OldVal = 0;
   else {
@@ -142,21 +161,16 @@ void *ExecutionEngineState::RemoveMapping(
   return OldVal;
 }
 
-/// addGlobalMapping - Tell the execution engine that the specified global is
-/// at the specified location.  This is used internally as functions are JIT'd
-/// and as global variables are laid out in memory.  It can and should also be
-/// used by clients of the EE that want to have an LLVM global overlay
-/// existing data in memory.
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
 
-  DEBUG(dbgs() << "JIT: Map \'" << GV->getName() 
+  DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
         << "\' to [" << Addr << "]\n";);
   void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
   assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
   CurVal = Addr;
-  
-  // If we are using the reverse mapping, add it too
+
+  // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
     AssertingVH<const GlobalValue> &V =
       EEState.getGlobalAddressReverseMap(locked)[Addr];
@@ -165,32 +179,23 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   }
 }
 
-/// clearAllGlobalMappings - Clear all global mappings and start over again
-/// use in dynamic compilation scenarios when you want to move globals
 void ExecutionEngine::clearAllGlobalMappings() {
   MutexGuard locked(lock);
-  
+
   EEState.getGlobalAddressMap(locked).clear();
   EEState.getGlobalAddressReverseMap(locked).clear();
 }
 
-/// clearGlobalMappingsFromModule - Clear all global mappings that came from a
-/// particular module, because it has been removed from the JIT.
 void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
   MutexGuard locked(lock);
-  
-  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
     EEState.RemoveMapping(locked, FI);
-  }
-  for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); 
-       GI != GE; ++GI) {
+  for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+       GI != GE; ++GI)
     EEState.RemoveMapping(locked, GI);
-  }
 }
 
-/// updateGlobalMapping - Replace an existing mapping for GV with a new
-/// address.  This updates both maps as required.  If "Addr" is null, the
-/// entry for the global is removed from the mappings.
 void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
 
@@ -198,18 +203,17 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
     EEState.getGlobalAddressMap(locked);
 
   // Deleting from the mapping?
-  if (Addr == 0) {
+  if (Addr == 0)
     return EEState.RemoveMapping(locked, GV);
-  }
-  
+
   void *&CurVal = Map[GV];
   void *OldVal = CurVal;
 
   if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
     EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
   CurVal = Addr;
-  
-  // If we are using the reverse mapping, add it too
+
+  // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
     AssertingVH<const GlobalValue> &V =
       EEState.getGlobalAddressReverseMap(locked)[Addr];
@@ -219,20 +223,14 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
   return OldVal;
 }
 
-/// getPointerToGlobalIfAvailable - This returns the address of the specified
-/// global value if it is has already been codegen'd, otherwise it returns null.
-///
 void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
   MutexGuard locked(lock);
-  
+
   ExecutionEngineState::GlobalAddressMapTy::iterator I =
     EEState.getGlobalAddressMap(locked).find(GV);
   return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
 }
 
-/// getGlobalValueAtAddress - Return the LLVM global value object that starts
-/// at the specified address.
-///
 const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
   MutexGuard locked(lock);
 
@@ -241,8 +239,8 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
     for (ExecutionEngineState::GlobalAddressMapTy::iterator
          I = EEState.getGlobalAddressMap(locked).begin(),
          E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
-      EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second,
-                                                                     I->first));
+      EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+                                                          I->second, I->first));
   }
 
   std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
@@ -301,54 +299,50 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
   return Array;
 }
 
-
-/// runStaticConstructorsDestructors - This method is used to execute all of
-/// the static constructors or destructors for a module, depending on the
-/// value of isDtors.
 void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
                                                        bool isDtors) {
   const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
-  
-  // Execute global ctors/dtors for each module in the program.
-  
- GlobalVariable *GV = module->getNamedGlobal(Name);
-
- // If this global has internal linkage, or if it has a use, then it must be
- // an old-style (llvmgcc3) static ctor with __main linked in and in use.  If
- // this is the case, don't execute any of the global ctors, __main will do
- // it.
- if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
- 
- // Should be an array of '{ int, void ()* }' structs.  The first value is
- // the init priority, which we ignore.
- ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!InitList) return;
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-   if (ConstantStruct *CS = 
-       dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
-     if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
-   
-     Constant *FP = CS->getOperand(1);
-     if (FP->isNullValue())
-       break;  // Found a null terminator, exit.
-   
-     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
-       if (CE->isCast())
-         FP = CE->getOperand(0);
-     if (Function *F = dyn_cast<Function>(FP)) {
-       // Execute the ctor/dtor function!
-       runFunction(F, std::vector<GenericValue>());
-     }
-   }
+  GlobalVariable *GV = module->getNamedGlobal(Name);
+
+  // If this global has internal linkage, or if it has a use, then it must be
+  // an old-style (llvmgcc3) static ctor with __main linked in and in use.  If
+  // this is the case, don't execute any of the global ctors, __main will do
+  // it.
+  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+  // Should be an array of '{ int, void ()* }' structs.  The first value is
+  // the init priority, which we ignore.
+  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!InitList) return;
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    ConstantStruct *CS =
+      dyn_cast<ConstantStruct>(InitList->getOperand(i));
+    if (!CS) continue;
+    if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+    Constant *FP = CS->getOperand(1);
+    if (FP->isNullValue())
+      break;  // Found a null terminator, exit.
+
+    // Strip off constant expression casts.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+      if (CE->isCast())
+        FP = CE->getOperand(0);
+
+    // Execute the ctor/dtor function!
+    if (Function *F = dyn_cast<Function>(FP))
+      runFunction(F, std::vector<GenericValue>());
+
+    // FIXME: It is marginally lame that we just do nothing here if we see an
+    // entry we don't recognize. It might not be unreasonable for the verifier
+    // to not even allow this and just assert here.
+  }
 }
 
-/// runStaticConstructorsDestructors - This method is used to execute all of
-/// the static constructors or destructors for a program, depending on the
-/// value of isDtors.
 void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
   // Execute global ctors/dtors for each module in the program.
-  for (unsigned m = 0, e = Modules.size(); m != e; ++m)
-    runStaticConstructorsDestructors(Modules[m], isDtors);
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+    runStaticConstructorsDestructors(Modules[i], isDtors);
 }
 
 #ifndef NDEBUG
@@ -362,9 +356,6 @@ static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
 }
 #endif
 
-/// runFunctionAsMain - This is a helper function which wraps runFunction to
-/// handle the common task of starting up main with the specified argc, argv,
-/// and envp parameters.
 int ExecutionEngine::runFunctionAsMain(Function *Fn,
                                        const std::vector<std::string> &argv,
                                        const char * const * envp) {
@@ -376,32 +367,20 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
   unsigned NumArgs = Fn->getFunctionType()->getNumParams();
   const FunctionType *FTy = Fn->getFunctionType();
   const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
-  switch (NumArgs) {
-  case 3:
-   if (FTy->getParamType(2) != PPInt8Ty) {
-     report_fatal_error("Invalid type for third argument of main() supplied");
-   }
-   // FALLS THROUGH
-  case 2:
-   if (FTy->getParamType(1) != PPInt8Ty) {
-     report_fatal_error("Invalid type for second argument of main() supplied");
-   }
-   // FALLS THROUGH
-  case 1:
-   if (!FTy->getParamType(0)->isIntegerTy(32)) {
-     report_fatal_error("Invalid type for first argument of main() supplied");
-   }
-   // FALLS THROUGH
-  case 0:
-   if (!FTy->getReturnType()->isIntegerTy() &&
-       !FTy->getReturnType()->isVoidTy()) {
-     report_fatal_error("Invalid return type of main() supplied");
-   }
-   break;
-  default:
-   report_fatal_error("Invalid number of arguments of main() supplied");
-  }
-  
+
+  // Check the argument types.
+  if (NumArgs > 3)
+    report_fatal_error("Invalid number of arguments of main() supplied");
+  if (NumArgs >= 3 && FTy->getParamType(2) != PPInt8Ty)
+    report_fatal_error("Invalid type for third argument of main() supplied");
+  if (NumArgs >= 2 && FTy->getParamType(1) != PPInt8Ty)
+    report_fatal_error("Invalid type for second argument of main() supplied");
+  if (NumArgs >= 1 && !FTy->getParamType(0)->isIntegerTy(32))
+    report_fatal_error("Invalid type for first argument of main() supplied");
+  if (!FTy->getReturnType()->isIntegerTy() &&
+      !FTy->getReturnType()->isVoidTy())
+    report_fatal_error("Invalid return type of main() supplied");
+
   ArgvArray CArgv;
   ArgvArray CEnv;
   if (NumArgs) {
@@ -420,13 +399,10 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
       }
     }
   }
+
   return runFunction(Fn, GVArgs).IntVal.getZExtValue();
 }
 
-/// If possible, create a JIT, unless the caller specifically requests an
-/// Interpreter or there's an error. If even an Interpreter cannot be created,
-/// NULL is returned.
-///
 ExecutionEngine *ExecutionEngine::create(Module *M,
                                          bool ForceInterpreter,
                                          std::string *ErrorStr,
@@ -464,7 +440,13 @@ ExecutionEngine *EngineBuilder::create() {
   // Unless the interpreter was explicitly selected or the JIT is not linked,
   // try making a JIT.
   if (WhichEngine & EngineKind::JIT) {
-    if (ExecutionEngine::JITCtor) {
+    if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+      ExecutionEngine *EE =
+        ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+                                   AllocateGVsWithCode, CMModel,
+                                   MArch, MCPU, MAttrs);
+      if (EE) return EE;
+    } else if (ExecutionEngine::JITCtor) {
       ExecutionEngine *EE =
         ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
                                  AllocateGVsWithCode, CMModel,
@@ -486,21 +468,18 @@ ExecutionEngine *EngineBuilder::create() {
   if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
     if (ErrorStr)
       *ErrorStr = "JIT has not been linked in.";
-  }    
+  }
+
   return 0;
 }
 
-/// getPointerToGlobal - This returns the address of the specified global
-/// value.  This may involve code generation if it's a function.
-///
 void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
   if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
     return getPointerToFunction(F);
 
   MutexGuard locked(lock);
-  void *p = EEState.getGlobalAddressMap(locked)[GV];
-  if (p)
-    return p;
+  if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+    return P;
 
   // Global variable might have been added since interpreter started.
   if (GlobalVariable *GVar =
@@ -508,12 +487,12 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     EmitGlobalVariable(GVar);
   else
     llvm_unreachable("Global hasn't had an address allocated yet!");
+
   return EEState.getGlobalAddressMap(locked)[GV];
 }
 
-/// This function converts a Constant* into a GenericValue. The interesting 
-/// part is if C is a ConstantExpr.
-/// @brief Get a GenericValue for a Constant*
+/// \brief Converts a Constant* into a GenericValue, including handling of
+/// ConstantExpr values.
 GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   // If its undefined, return the garbage.
   if (isa<UndefValue>(C)) {
@@ -533,12 +512,12 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     return Result;
   }
 
-  // If the value is a ConstantExpr
+  // Otherwise, if the value is a ConstantExpr...
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     Constant *Op0 = CE->getOperand(0);
     switch (CE->getOpcode()) {
     case Instruction::GetElementPtr: {
-      // Compute the index 
+      // Compute the index
       GenericValue Result = getConstantValue(Op0);
       SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
       uint64_t Offset =
@@ -585,9 +564,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       else if (CE->getType()->isDoubleTy())
         GV.DoubleVal = GV.IntVal.roundToDouble();
       else if (CE->getType()->isX86_FP80Ty()) {
-        const uint64_t zero[] = {0, 0};
-        APFloat apf = APFloat(APInt(80, 2, zero));
-        (void)apf.convertFromAPInt(GV.IntVal, 
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        (void)apf.convertFromAPInt(GV.IntVal,
                                    false,
                                    APFloat::rmNearestTiesToEven);
         GV.IntVal = apf.bitcastToAPInt();
@@ -601,9 +579,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       else if (CE->getType()->isDoubleTy())
         GV.DoubleVal = GV.IntVal.signedRoundToDouble();
       else if (CE->getType()->isX86_FP80Ty()) {
-        const uint64_t zero[] = { 0, 0};
-        APFloat apf = APFloat(APInt(80, 2, zero));
-        (void)apf.convertFromAPInt(GV.IntVal, 
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        (void)apf.convertFromAPInt(GV.IntVal,
                                    true,
                                    APFloat::rmNearestTiesToEven);
         GV.IntVal = apf.bitcastToAPInt();
@@ -623,7 +600,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
         uint64_t v;
         bool ignored;
         (void)apf.convertToInteger(&v, BitWidth,
-                                   CE->getOpcode()==Instruction::FPToSI, 
+                                   CE->getOpcode()==Instruction::FPToSI,
                                    APFloat::rmTowardZero, &ignored);
         GV.IntVal = v; // endian?
       }
@@ -656,13 +633,13 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
           else if (DestTy->isDoubleTy())
             GV.DoubleVal = GV.IntVal.bitsToDouble();
           break;
-        case Type::FloatTyID: 
+        case Type::FloatTyID:
           assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
-          GV.IntVal.floatToBits(GV.FloatVal);
+          GV.IntVal = APInt::floatToBits(GV.FloatVal);
           break;
         case Type::DoubleTyID:
           assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
-          GV.IntVal.doubleToBits(GV.DoubleVal);
+          GV.IntVal = APInt::doubleToBits(GV.DoubleVal);
           break;
         case Type::PointerTyID:
           assert(DestTy->isPointerTy() && "Invalid bitcast");
@@ -712,9 +689,9 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
             GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
           case Instruction::FMul:
             GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
-          case Instruction::FDiv: 
+          case Instruction::FDiv:
             GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
-          case Instruction::FRem: 
+          case Instruction::FRem:
             GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
         }
         break;
@@ -727,9 +704,9 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
             GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
           case Instruction::FMul:
             GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
-          case Instruction::FDiv: 
+          case Instruction::FDiv:
             GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
-          case Instruction::FRem: 
+          case Instruction::FRem:
             GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
         }
         break;
@@ -738,7 +715,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::FP128TyID: {
         APFloat apfLHS = APFloat(LHS.IntVal);
         switch (CE->getOpcode()) {
-          default: llvm_unreachable("Invalid long double opcode");llvm_unreachable(0);
+          default: llvm_unreachable("Invalid long double opcode");
           case Instruction::FAdd:
             apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
@@ -751,11 +728,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
             apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
-          case Instruction::FDiv: 
+          case Instruction::FDiv:
             apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
-          case Instruction::FRem: 
+          case Instruction::FRem:
             apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
@@ -768,16 +745,18 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     default:
       break;
     }
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "ConstantExpr not handled: " << *CE;
-    report_fatal_error(Msg.str());
+
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ConstantExpr not handled: " << *CE;
+    report_fatal_error(OS.str());
   }
 
+  // Otherwise, we have a simple constant.
   GenericValue Result;
   switch (C->getType()->getTypeID()) {
-  case Type::FloatTyID: 
-    Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat(); 
+  case Type::FloatTyID:
+    Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
     break;
   case Type::DoubleTyID:
     Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
@@ -804,11 +783,12 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       llvm_unreachable("Unknown constant pointer type!");
     break;
   default:
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "ERROR: Constant unimplemented for type: " << *C->getType();
-    report_fatal_error(Msg.str());
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ERROR: Constant unimplemented for type: " << *C->getType();
+    report_fatal_error(OS.str());
   }
+
   return Result;
 }
 
@@ -819,11 +799,11 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
   uint8_t *Src = (uint8_t *)IntVal.getRawData();
 
-  if (sys::isLittleEndianHost())
+  if (sys::isLittleEndianHost()) {
     // Little-endian host - the source is ordered from LSB to MSB.  Order the
     // destination from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, StoreBytes);
-  else {
+  } else {
     // Big-endian host - the source is an array of 64 bit words ordered from
     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
@@ -838,10 +818,6 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   }
 }
 
-/// StoreValueToMemory - Stores the data in Val of type Ty at address Ptr.  Ptr
-/// is the address of the memory at which to store Val, cast to GenericValue *.
-/// It is not a pointer to a GenericValue containing the address at which to
-/// store Val.
 void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
                                          GenericValue *Ptr, const Type *Ty) {
   const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
@@ -932,16 +908,13 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     break;
   }
   default:
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "Cannot load value of type " << *Ty << "!";
-    report_fatal_error(Msg.str());
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "Cannot load value of type " << *Ty << "!";
+    report_fatal_error(OS.str());
   }
 }
 
-// InitializeMemory - Recursive function to apply a Constant value into the
-// specified memory location...
-//
 void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
   DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
   DEBUG(Init->dump());
@@ -974,20 +947,17 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
     return;
   }
 
-  dbgs() << "Bad Type: " << *Init->getType() << "\n";
+  DEBUG(dbgs() << "Bad Type: " << *Init->getType() << "\n");
   llvm_unreachable("Unknown constant type to initialize memory with!");
 }
 
 /// EmitGlobals - Emit all of the global variables to memory, storing their
 /// addresses into GlobalAddress.  This must make sure to copy the contents of
 /// their initializers into the memory.
-///
 void ExecutionEngine::emitGlobals() {
-
   // Loop over all of the global variables in the program, allocating the memory
   // to hold them.  If there is more than one module, do a prepass over globals
   // to figure out how the different modules should link together.
-  //
   std::map<std::pair<std::string, const Type*>,
            const GlobalValue*> LinkedGlobalsMap;
 
@@ -1000,8 +970,8 @@ void ExecutionEngine::emitGlobals() {
         if (GV->hasLocalLinkage() || GV->isDeclaration() ||
             GV->hasAppendingLinkage() || !GV->hasName())
           continue;// Ignore external globals and globals with internal linkage.
-          
-        const GlobalValue *&GVEntry = 
+
+        const GlobalValue *&GVEntry =
           LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
 
         // If this is the first time we've seen this global, it is the canonical
@@ -1010,13 +980,13 @@ void ExecutionEngine::emitGlobals() {
           GVEntry = GV;
           continue;
         }
-        
+
         // If the existing global is strong, never replace it.
         if (GVEntry->hasExternalLinkage() ||
             GVEntry->hasDLLImportLinkage() ||
             GVEntry->hasDLLExportLinkage())
           continue;
-        
+
         // Otherwise, we know it's linkonce/weak, replace it if this is a strong
         // symbol.  FIXME is this right for common?
         if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
@@ -1024,7 +994,7 @@ void ExecutionEngine::emitGlobals() {
       }
     }
   }
-  
+
   std::vector<const GlobalValue*> NonCanonicalGlobals;
   for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
     Module &M = *Modules[m];
@@ -1032,7 +1002,7 @@ void ExecutionEngine::emitGlobals() {
          I != E; ++I) {
       // In the multi-module case, see what this global maps to.
       if (!LinkedGlobalsMap.empty()) {
-        if (const GlobalValue *GVEntry = 
+        if (const GlobalValue *GVEntry =
               LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
           // If something else is the canonical global, ignore this one.
           if (GVEntry != &*I) {
@@ -1041,7 +1011,7 @@ void ExecutionEngine::emitGlobals() {
           }
         }
       }
-      
+
       if (!I->isDeclaration()) {
         addGlobalMapping(I, getMemoryForGV(I));
       } else {
@@ -1056,7 +1026,7 @@ void ExecutionEngine::emitGlobals() {
         }
       }
     }
-    
+
     // If there are multiple modules, map the non-canonical globals to their
     // canonical location.
     if (!NonCanonicalGlobals.empty()) {
@@ -1069,14 +1039,14 @@ void ExecutionEngine::emitGlobals() {
         addGlobalMapping(GV, Ptr);
       }
     }
-    
-    // Now that all of the globals are set up in memory, loop through them all 
+
+    // Now that all of the globals are set up in memory, loop through them all
     // and initialize their contents.
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
       if (!I->isDeclaration()) {
         if (!LinkedGlobalsMap.empty()) {
-          if (const GlobalValue *GVEntry = 
+          if (const GlobalValue *GVEntry =
                 LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
             if (GVEntry != &*I)  // Not the canonical variable.
               continue;
@@ -1098,11 +1068,11 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
     GA = getMemoryForGV(GV);
     addGlobalMapping(GV, GA);
   }
-  
+
   // Don't initialize if it's thread local, let the client do it.
   if (!GV->isThreadLocal())
     InitializeMemory(GV->getInitializer(), GA);
-  
+
   const Type *ElTy = GV->getType()->getElementType();
   size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
   NumInitBytes += (unsigned)GVSize;
@@ -1113,18 +1083,20 @@ ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
   : EE(EE), GlobalAddressMap(this) {
 }
 
-sys::Mutex *ExecutionEngineState::AddressMapConfig::getMutex(
-  ExecutionEngineState *EES) {
+sys::Mutex *
+ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
   return &EES->EE.lock;
 }
-void ExecutionEngineState::AddressMapConfig::onDelete(
-  ExecutionEngineState *EES, const GlobalValue *Old) {
+
+void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
+                                                      const GlobalValue *Old) {
   void *OldVal = EES->GlobalAddressMap.lookup(Old);
   EES->GlobalAddressReverseMap.erase(OldVal);
 }
 
-void ExecutionEngineState::AddressMapConfig::onRAUW(
-  ExecutionEngineState *, const GlobalValue *, const GlobalValue *) {
+void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
+                                                    const GlobalValue *,
+                                                    const GlobalValue *) {
   assert(false && "The ExecutionEngine doesn't know how to handle a"
          " RAUW on a value it has a global mapping for.");
 }
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index dff97fa26e8f..d331f830b62e 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -1,5 +1,17 @@
+# Make sure that the path to libffi headers is on the command
+# line. That path can be a compiler's non-default path even when
+# FFI_INCLUDE_DIR was not used, because cmake has its own paths for
+# searching for headers (CMAKE_SYSTEM_INCLUDE_PATH, for instance):
+if( FFI_INCLUDE_PATH )
+  include_directories( ${FFI_INCLUDE_PATH} )
+endif()
+
 add_llvm_library(LLVMInterpreter
   Execution.cpp
   ExternalFunctions.cpp
   Interpreter.cpp
   )
+
+if( LLVM_ENABLE_FFI )
+  target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
+endif()
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 59ebe6e2a885..498063bf6555 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1060,11 +1060,9 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
     Dest.PointerVal = Src.PointerVal;
   } else if (DstTy->isIntegerTy()) {
     if (SrcTy->isFloatTy()) {
-      Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
-      Dest.IntVal.floatToBits(Src.FloatVal);
+      Dest.IntVal = APInt::floatToBits(Src.FloatVal);
     } else if (SrcTy->isDoubleTy()) {
-      Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
-      Dest.IntVal.doubleToBits(Src.DoubleVal);
+      Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
     } else if (SrcTy->isIntegerTy()) {
       Dest.IntVal = Src.IntVal;
     } else 
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 57d12606bc77..062256a2ac73 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -24,10 +24,10 @@
 #include "llvm/Module.h"
 #include "llvm/Config/config.h"     // Detect libffi
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include <csignal>
 #include <cstdio>
 #include <map>
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 564e9abad9e7..bfebe3debfcd 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -19,7 +19,7 @@
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index 274f816f39e1..169e1bae547b 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -17,7 +17,7 @@
 
 #include "JIT.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 63125b79c8e2..cc76b138a8a6 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -30,7 +30,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Config/config.h"
 
 using namespace llvm;
@@ -66,8 +66,15 @@ static struct RegisterJIT {
 extern "C" void LLVMLinkInJIT() {
 }
 
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
 
-#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+#if HAVE_EHTABLE_SUPPORT
  
 // libgcc defines the __register_frame function to dynamically register new
 // dwarf frames for exception handling. This functionality is not portable
@@ -87,6 +94,7 @@ extern "C" void LLVMLinkInJIT() {
 // values of an opaque key, used by libgcc to find dwarf tables.
 
 extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
 
 #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
 # define USE_KEYMGR 1
@@ -190,7 +198,7 @@ void DarwinRegisterFrame(void* FrameBegin) {
 
 }
 #endif // __APPLE__
-#endif // __GNUC__
+#endif // HAVE_EHTABLE_SUPPORT
 
 /// createJIT - This is the factory method for creating a JIT for the current
 /// machine, it does not fall back to the interpreter.  This takes ownership
@@ -306,7 +314,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   }
   
   // Register routine for informing unwinding runtime about new EH frames
-#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+#if HAVE_EHTABLE_SUPPORT
 #if USE_KEYMGR
   struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
     _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
@@ -318,16 +326,21 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
     LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); 
   _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
   InstallExceptionTableRegister(DarwinRegisterFrame);
+  // Not sure about how to deregister on Darwin.
 #else
   InstallExceptionTableRegister(__register_frame);
+  InstallExceptionTableDeregister(__deregister_frame);
 #endif // __APPLE__
-#endif // __GNUC__
+#endif // HAVE_EHTABLE_SUPPORT
   
   // Initialize passes.
   PM.doInitialization();
 }
 
 JIT::~JIT() {
+  // Unregister all exception tables registered by this JIT.
+  DeregisterAllTables();
+  // Cleanup.
   AllJits->Remove(this);
   delete jitstate;
   delete JCE;
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 6e11a3cd9368..3b5acb7ecc48 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -25,7 +25,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include <string>
 #include <vector>
 
@@ -35,7 +35,7 @@ namespace llvm {
 extern "C" {
 
   // Debuggers puts a breakpoint in this function.
-  DISABLE_INLINE void __jit_debug_register_code() { }
+  LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
 
   // We put information about the JITed function in this global, which the
   // debugger reads.  Make sure to specify the version statically, because the
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
index 7e53d7847139..dce506bbfefd 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -16,7 +16,7 @@
 #define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <string>
 
 // This must be kept in sync with gdb/gdb/jit.h .
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 1105bcc0437f..f54cccadea65 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -26,7 +26,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
@@ -43,8 +43,9 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
 
   const TargetMachine& TM = F.getTarget();
   TD = TM.getTargetData();
-  stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
+  stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
   RI = TM.getRegisterInfo();
+  TFI = TM.getFrameLowering();
   JCE = &jce;
   
   unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
@@ -66,7 +67,7 @@ void
 JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
                                 const std::vector<MachineMove> &Moves) const {
   unsigned PointerSize = TD->getPointerSize();
-  int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
   MCSymbol *BaseLabel = 0;
 
@@ -481,7 +482,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
 unsigned char*
 JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
   unsigned PointerSize = TD->getPointerSize();
-  int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
   
   unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
@@ -523,7 +524,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
   }
 
   std::vector<MachineMove> Moves;
-  RI->getInitialFrameState(Moves);
+  TFI->getInitialFrameState(Moves);
   EmitFrameMoves(0, Moves);
 
   JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index 30956820f357..9495697a1aa4 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -23,6 +23,7 @@ class MachineFunction;
 class MachineModuleInfo;
 class MachineMove;
 class TargetData;
+class TargetFrameLowering;
 class TargetMachine;
 class TargetRegisterInfo;
 
@@ -30,6 +31,7 @@ class JITDwarfEmitter {
   const TargetData* TD;
   JITCodeEmitter* JCE;
   const TargetRegisterInfo* RI;
+  const TargetFrameLowering *TFI;
   MachineModuleInfo* MMI;
   JIT& Jit;
   bool stackGrowthDirection;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 4c0d0789cced..4cd8757ad0b8 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -42,8 +42,8 @@
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Disassembler.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 653e6f1fc07c..eec23cec0af9 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
 #include <vector>
 #include <cassert>
 #include <climits>
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 1ca084b5808b..670fa7da1fed 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -26,7 +26,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Errno.h"
+#include "llvm/Support/Errno.h"
 #include "llvm/Config/config.h"
 #include <stddef.h>
 using namespace llvm;
diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp
index 3349c338052b..6b7173cece18 100644
--- a/lib/ExecutionEngine/JIT/TargetSelect.cpp
+++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -18,7 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
new file mode 100644
index 000000000000..f7ed176fef78
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMCJIT
+  MCJIT.cpp
+  TargetSelect.cpp
+  )
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
new file mode 100644
index 000000000000..f1e9dab250bf
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -0,0 +1,92 @@
+//===-- JIT.cpp - MC-based Just-in-Time Compiler --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+using namespace llvm;
+
+namespace {
+
+static struct RegisterJIT {
+  RegisterJIT() { MCJIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInMCJIT() {
+}
+
+ExecutionEngine *MCJIT::createJIT(Module *M,
+                                  std::string *ErrorStr,
+                                  JITMemoryManager *JMM,
+                                  CodeGenOpt::Level OptLevel,
+                                  bool GVsWithCode,
+                                  CodeModel::Model CMM,
+                                  StringRef MArch,
+                                  StringRef MCPU,
+                                  const SmallVectorImpl<std::string>& MAttrs) {
+  // Try to register the program as a source of symbols to resolve against.
+  //
+  // FIXME: Don't do this here.
+  sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+  // Pick a target either via -march or by guessing the native arch.
+  //
+  // FIXME: This should be lifted out of here, it isn't something which should
+  // be part of the JIT policy, rather the burden for this selection should be
+  // pushed to clients.
+  TargetMachine *TM = MCJIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+  if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+  TM->setCodeModel(CMM);
+
+  // If the target supports JIT code generation, create the JIT.
+  if (TargetJITInfo *TJ = TM->getJITInfo())
+    return new MCJIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+
+  if (ErrorStr)
+    *ErrorStr = "target does not support JIT code generation";
+  return 0;
+}
+
+MCJIT::MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+             JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+             bool AllocateGVsWithCode)
+  : ExecutionEngine(M) {
+}
+
+MCJIT::~MCJIT() {
+}
+
+void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
+  report_fatal_error("not yet implemented");
+  return 0;
+}
+
+void *MCJIT::getPointerToFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+  return 0;
+}
+
+void *MCJIT::recompileAndRelinkFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+}
+
+void MCJIT::freeMachineCodeForFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+}
+
+GenericValue MCJIT::runFunction(Function *F,
+                                const std::vector<GenericValue> &ArgValues) {
+  report_fatal_error("not yet implemented");
+  return GenericValue();
+}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
new file mode 100644
index 000000000000..cd1f989b10c7
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -0,0 +1,68 @@
+//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
+namespace llvm {
+
+class MCJIT : public ExecutionEngine {
+  MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+        JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+        bool AllocateGVsWithCode);
+public:
+  ~MCJIT();
+
+  /// @name ExecutionEngine interface implementation
+  /// @{
+
+  virtual void *getPointerToBasicBlock(BasicBlock *BB);
+
+  virtual void *getPointerToFunction(Function *F);
+
+  virtual void *recompileAndRelinkFunction(Function *F);
+
+  virtual void freeMachineCodeForFunction(Function *F);
+
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// @}
+  /// @name (Private) Registration Interfaces
+  /// @{
+
+  static void Register() {
+    MCJITCtor = createJIT;
+  }
+
+  // FIXME: This routine is scheduled for termination. Do not use it.
+  static TargetMachine *selectTarget(Module *M,
+                                     StringRef MArch,
+                                     StringRef MCPU,
+                                     const SmallVectorImpl<std::string>& MAttrs,
+                                     std::string *Err);
+
+  static ExecutionEngine *createJIT(Module *M,
+                                    std::string *ErrorStr,
+                                    JITMemoryManager *JMM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool GVsWithCode,
+                                    CodeModel::Model CMM,
+                                    StringRef MArch,
+                                    StringRef MCPU,
+                                    const SmallVectorImpl<std::string>& MAttrs);
+
+  // @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/ExecutionEngine/MCJIT/Makefile b/lib/ExecutionEngine/MCJIT/Makefile
new file mode 100644
index 000000000000..967efbc0efa4
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCJIT
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/MCJIT/TargetSelect.cpp b/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
new file mode 100644
index 000000000000..50f65938bb0a
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
@@ -0,0 +1,91 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch.  Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *MCJIT::selectTarget(Module *Mod,
+                                 StringRef MArch,
+                                 StringRef MCPU,
+                                 const SmallVectorImpl<std::string>& MAttrs,
+                                 std::string *ErrorStr) {
+  Triple TheTriple(Mod->getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Adjust the triple to match what the user requested.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      *ErrorStr = "No available targets are compatible with this -march, "
+        "see -version for the available targets.\n";
+      return 0;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
+    std::string Error;
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+    if (TheTarget == 0) {
+      if (ErrorStr)
+        *ErrorStr = Error;
+      return 0;
+    }
+  }
+
+  if (!TheTarget->hasJIT()) {
+    errs() << "WARNING: This target JIT is not designed for the host you are"
+           << " running.  If bad things happen, please choose a different "
+           << "-march switch.\n";
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (!MCPU.empty() || !MAttrs.empty()) {
+    SubtargetFeatures Features;
+    Features.setCPU(MCPU);
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  // Allocate a target...
+  TargetMachine *Target =
+    TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
+  assert(Target && "Could not allocate target machine!");
+  return Target;
+}
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index e0e050e89728..1858d776616c 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -8,6 +8,6 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../..
 LIBRARYNAME = LLVMExecutionEngine
-PARALLEL_DIRS = Interpreter JIT
+PARALLEL_DIRS = Interpreter JIT MCJIT
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index 1be2becc86c3..52a0d175a5cd 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -15,9 +15,10 @@
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 
 // LinkItems - This function is the main entry point into linking. It takes a
@@ -160,19 +161,19 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
   // Check for a file of name "-", which means "read standard input"
   if (File.str() == "-") {
     std::auto_ptr<Module> M;
-    if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(&Error)) {
+    OwningPtr<MemoryBuffer> Buffer;
+    error_code ec;
+    if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
       if (!Buffer->getBufferSize()) {
-        delete Buffer;
         Error = "standard input is empty";
       } else {
-        M.reset(ParseBitcodeFile(Buffer, Context, &Error));
-        delete Buffer;
+        M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
         if (M.get())
           if (!LinkInModule(M.get(), &Error))
             return false;
       }
     }
-    return error("Cannot link stdin: " + Error);
+    return error("Cannot link stdin: " + ec.message());
   }
 
   // Determine what variety of file it is.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 7e8245a9e3a6..5aa06abdd989 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -28,7 +28,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/ADT/DenseMap.h"
 using namespace llvm;
@@ -434,8 +434,10 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
   }
 
   // Check visibility
-  if (Dest && Src->getVisibility() != Dest->getVisibility())
-    if (!Src->isDeclaration() && !Dest->isDeclaration())
+  if (Dest && Src->getVisibility() != Dest->getVisibility() &&
+      !Src->isDeclaration() && !Dest->isDeclaration() &&
+      !Src->hasAvailableExternallyLinkage() &&
+      !Dest->hasAvailableExternallyLinkage())
       return Error(Err, "Linking globals named '" + Src->getName() +
                    "': symbols have different visibilities!");
   return false;
@@ -449,10 +451,9 @@ static void LinkNamedMDNodes(Module *Dest, Module *Src,
     const NamedMDNode *SrcNMD = I;
     NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
     // Add Src elements into Dest node.
-    for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i) 
+    for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
       DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
-                                                ValueMap,
-                                                true)));
+                                                ValueMap)));
   }
 }
 
@@ -520,6 +521,8 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
       continue;
     }
 
+    bool HasUnnamedAddr = SGV->hasUnnamedAddr() && DGV->hasUnnamedAddr();
+
     // If the visibilities of the symbols disagree and the destination is a
     // prototype, take the visibility of its input.
     if (DGV->isDeclaration())
@@ -559,14 +562,17 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
       // we are replacing may be a function (if a prototype, weak, etc) or a
       // global variable.
       GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(), 
-                           SGV->isConstant(), NewLinkage, /*init*/0, 
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
+                           SGV->isConstant(), NewLinkage, /*init*/0,
                            DGV->getName(), 0, false,
                            SGV->getType()->getAddressSpace());
 
+      // Set the unnamed_addr.
+      NewDGV->setUnnamedAddr(HasUnnamedAddr);
+
       // Propagate alignment, section, and visibility info.
       CopyGVAttributes(NewDGV, SGV);
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, 
+      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV,
                                                               DGV->getType()));
 
       // DGV will conflict with NewDGV because they both had the same
@@ -608,8 +614,9 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
                      "': symbol multiple defined");
     }
 
-    // Set calculated linkage
+    // Set calculated linkage and unnamed_addr
     DGV->setLinkage(NewLinkage);
+    DGV->setUnnamedAddr(HasUnnamedAddr);
 
     // Make sure to remember this mapping...
     ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType());
@@ -668,6 +675,13 @@ static bool LinkAlias(Module *Dest, const Module *Src,
     GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
     GlobalValue* DGV = NULL;
 
+    // Fixup aliases to bitcasts.  Note that aliases to GEPs are still broken
+    // by this, but aliases to GEPs are broken to a lot of other things, so
+    // it's less important.
+    Constant *DAliaseeConst = DAliasee;
+    if (SGA->getType() != DAliasee->getType())
+      DAliaseeConst = ConstantExpr::getBitCast(DAliasee, SGA->getType());
+
     // Try to find something 'similar' to SGA in destination module.
     if (!DGV && !SGA->hasLocalLinkage()) {
       DGV = Dest->getNamedAlias(SGA->getName());
@@ -721,7 +735,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
                        "': aliasee is not global variable");
 
         NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                                SGA->getName(), DAliasee, Dest);
+                                SGA->getName(), DAliaseeConst, Dest);
         CopyGVAttributes(NewGA, SGA);
 
         // Any uses of DGV need to change to NewGA, with cast, if needed.
@@ -750,7 +764,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
                        "': aliasee is not function");
 
         NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                                SGA->getName(), DAliasee, Dest);
+                                SGA->getName(), DAliaseeConst, Dest);
         CopyGVAttributes(NewGA, SGA);
 
         // Any uses of DF need to change to NewGA, with cast, if needed.
@@ -772,14 +786,8 @@ static bool LinkAlias(Module *Dest, const Module *Src,
     } else {
       // No linking to be performed, simply create an identical version of the
       // alias over in the dest module...
-      Constant *Aliasee = DAliasee;
-      // Fixup aliases to bitcasts.  Note that aliases to GEPs are still broken
-      // by this, but aliases to GEPs are broken to a lot of other things, so
-      // it's less important.
-      if (SGA->getType() != DAliasee->getType())
-        Aliasee = ConstantExpr::getBitCast(DAliasee, SGA->getType());
       NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                              SGA->getName(), Aliasee, Dest);
+                              SGA->getName(), DAliaseeConst, Dest);
       CopyGVAttributes(NewGA, SGA);
 
       // Proceed to 'common' steps
@@ -813,9 +821,9 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
     const GlobalVariable *SGV = I;
 
     if (SGV->hasInitializer()) {      // Only process initialized GV's
-      // Figure out what the initializer looks like in the dest module...
+      // Figure out what the initializer looks like in the dest module.
       Constant *SInit =
-        cast<Constant>(MapValue(SGV->getInitializer(), ValueMap, true));
+        cast<Constant>(MapValue(SGV->getInitializer(), ValueMap));
       // Grab destination global variable or alias.
       GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
 
@@ -927,7 +935,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
       CopyGVAttributes(NewDF, SF);
 
       // Any uses of DF need to change to NewDF, with cast
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, 
+      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF,
                                                               DGV->getType()));
 
       // DF will conflict with NewDF because they both had the same. We must
@@ -995,32 +1003,10 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
   // At this point, all of the instructions and values of the function are now
   // copied over.  The only problem is that they are still referencing values in
   // the Source function as operands.  Loop through all of the operands of the
-  // functions and patch them up to point to the local versions...
-  //
-  // This is the same as RemapInstruction, except that it avoids remapping
-  // instruction and basic block operands.
-  //
+  // functions and patch them up to point to the local versions.
   for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      // Remap operands.
-      for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
-           OI != OE; ++OI)
-        if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
-          *OI = MapValue(*OI, ValueMap, true);
-
-      // Remap attached metadata.
-      SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-      I->getAllMetadata(MDs);
-      for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
-           MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
-        Value *Old = MI->second;
-        if (!isa<Instruction>(Old) && !isa<BasicBlock>(Old)) {
-          Value *New = MapValue(Old, ValueMap, true);
-          if (New != Old) 
-            I->setMetadata(MI->first, cast<MDNode>(New));
-        }
-      }
-    }
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries);
 
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
@@ -1099,7 +1085,7 @@ static bool LinkAppendingVars(Module *M,
          "Appending variables with different section name need to be linked!");
 
       unsigned NewSize = T1->getNumElements() + T2->getNumElements();
-      ArrayType *NewType = ArrayType::get(T1->getElementType(), 
+      ArrayType *NewType = ArrayType::get(T1->getElementType(),
                                                          NewSize);
 
       G1->setName("");   // Clear G1's name in case of a conflict!
@@ -1143,7 +1129,7 @@ static bool LinkAppendingVars(Module *M,
       // getelementptr instructions to not use the Cast!
       G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
                              G1->getType()));
-      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, 
+      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
                              G2->getType()));
 
       // Remove the two globals from the module now...
@@ -1217,8 +1203,13 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
       Src->getDataLayout() != Dest->getDataLayout())
     errs() << "WARNING: Linking two modules of different data layouts!\n";
   if (!Src->getTargetTriple().empty() &&
-      Dest->getTargetTriple() != Src->getTargetTriple())
-    errs() << "WARNING: Linking two modules of different target triples!\n";
+      Dest->getTargetTriple() != Src->getTargetTriple()) {
+    errs() << "WARNING: Linking two modules of different target triples: ";
+    if (!Src->getModuleIdentifier().empty())
+      errs() << Src->getModuleIdentifier() << ": ";
+    errs() << "'" << Src->getTargetTriple() << "' and '" 
+           << Dest->getTargetTriple() << "'\n";
+  }
 
   // Append the module inline asm string.
   if (!Src->getModuleInlineAsm().empty()) {
@@ -1300,10 +1291,9 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
 
   // If the source library's module id is in the dependent library list of the
   // destination library, remove it since that module is now linked in.
-  sys::Path modId;
-  modId.set(Src->getModuleIdentifier());
-  if (!modId.isEmpty())
-    Dest->removeLibrary(modId.getBasename());
+  const std::string &modId = Src->getModuleIdentifier();
+  if (!modId.empty())
+    Dest->removeLibrary(sys::path::stem(modId));
 
   return false;
 }
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index 32aa0f901121..fba91da5ddd1 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -14,10 +14,11 @@
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 
 Linker::Linker(StringRef progname, StringRef modname,
@@ -97,13 +98,14 @@ std::auto_ptr<Module>
 Linker::LoadObject(const sys::Path &FN) {
   std::string ParseErrorMessage;
   Module *Result = 0;
-  
-  std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(FN.c_str()));
-  if (Buffer.get())
-    Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer))
+    ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": "
+                      + ec.message();
   else
-    ParseErrorMessage = "Error reading file '" + FN.str() + "'";
-    
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
   if (Result)
     return std::auto_ptr<Module>(Result);
   Error = "Bitcode file '" + FN.str() + "' could not be loaded";
@@ -133,7 +135,7 @@ static inline sys::Path IsLibrary(StringRef Name,
 
   // Try the libX.so (or .dylib) form
   FullPath.eraseSuffix();
-  FullPath.appendSuffix(&(LTDL_SHLIB_EXT[1]));
+  FullPath.appendSuffix(sys::Path::GetDLLSuffix());
   if (FullPath.isDynamicLibrary())  // Native shared library?
     return FullPath;
   if (FullPath.isBitcodeFile())    // .so file containing bitcode?
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 60a3a3e3e312..f1811a1716fb 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMMC
   MCCodeEmitter.cpp
   MCContext.cpp
   MCDisassembler.cpp
+  MCELFObjectTargetWriter.cpp
   MCELFStreamer.cpp
   MCExpr.cpp
   MCInst.cpp
@@ -16,9 +17,11 @@ add_llvm_library(LLVMMC
   MCDwarf.cpp
   MCLoggingStreamer.cpp
   MCMachOStreamer.cpp
+  MCMachObjectTargetWriter.cpp
   MCNullStreamer.cpp
   MCObjectStreamer.cpp
   MCObjectWriter.cpp
+  MCPureStreamer.cpp
   MCSection.cpp
   MCSectionCOFF.cpp
   MCSectionELF.cpp
@@ -31,3 +34,6 @@ add_llvm_library(LLVMMC
   WinCOFFObjectWriter.cpp
   TargetAsmBackend.cpp
   )
+
+add_subdirectory(MCParser)
+add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index cf35b45715e1..8a00a16cfb4a 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -11,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/ELFObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
@@ -20,6 +21,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
@@ -28,27 +30,76 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringSwitch.h"
 
 #include "../Target/X86/X86FixupKinds.h"
+#include "../Target/ARM/ARMFixupKinds.h"
 
 #include <vector>
 using namespace llvm;
 
-namespace {
+static unsigned GetType(const MCSymbolData &SD) {
+  uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+  return Type;
+}
 
-  class ELFObjectWriterImpl {
-    static bool isFixupKindX86PCRel(unsigned Kind) {
-      switch (Kind) {
-      default:
-        return false;
-      case X86::reloc_pcrel_1byte:
-      case X86::reloc_pcrel_4byte:
-      case X86::reloc_riprel_4byte:
-      case X86::reloc_riprel_4byte_movq_load:
-        return true;
-      }
-    }
+static unsigned GetBinding(const MCSymbolData &SD) {
+  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  return Binding;
+}
+
+static void SetBinding(MCSymbolData &SD, unsigned Binding) {
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
 
+static unsigned GetVisibility(MCSymbolData &SD) {
+  unsigned Visibility =
+    (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift;
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+  return Visibility;
+}
+
+
+static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+  switch (Variant) {
+  default:
+    return false;
+  case MCSymbolRefExpr::VK_GOT:
+  case MCSymbolRefExpr::VK_PLT:
+  case MCSymbolRefExpr::VK_GOTPCREL:
+  case MCSymbolRefExpr::VK_TPOFF:
+  case MCSymbolRefExpr::VK_TLSGD:
+  case MCSymbolRefExpr::VK_GOTTPOFF:
+  case MCSymbolRefExpr::VK_INDNTPOFF:
+  case MCSymbolRefExpr::VK_NTPOFF:
+  case MCSymbolRefExpr::VK_GOTNTPOFF:
+  case MCSymbolRefExpr::VK_TLSLDM:
+  case MCSymbolRefExpr::VK_DTPOFF:
+  case MCSymbolRefExpr::VK_TLSLD:
+    return true;
+  }
+}
+
+static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI =
+    Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
+
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+namespace {
+  class ELFObjectWriter : public MCObjectWriter {
+  protected:
     /*static bool isFixupKindX86RIPRel(unsigned Kind) {
       return Kind == X86::reloc_riprel_4byte ||
         Kind == X86::reloc_riprel_4byte_movq_load;
@@ -64,6 +115,10 @@ namespace {
 
       // Support lexicographic sorting.
       bool operator<(const ELFSymbolData &RHS) const {
+        if (GetType(*SymbolData) == ELF::STT_FILE)
+          return true;
+        if (GetType(*RHS.SymbolData) == ELF::STT_FILE)
+          return false;
         return SymbolData->getSymbol().getName() <
                RHS.SymbolData->getSymbol().getName();
       }
@@ -75,15 +130,33 @@ namespace {
     struct ELFRelocationEntry {
       // Make these big enough for both 32-bit and 64-bit
       uint64_t r_offset;
-      uint64_t r_info;
+      int Index;
+      unsigned Type;
+      const MCSymbol *Symbol;
       uint64_t r_addend;
 
+      ELFRelocationEntry()
+        : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
+
+      ELFRelocationEntry(uint64_t RelocOffset, int Idx,
+                         unsigned RelType, const MCSymbol *Sym,
+                         uint64_t Addend)
+        : r_offset(RelocOffset), Index(Idx), Type(RelType),
+          Symbol(Sym), r_addend(Addend) {}
+
       // Support lexicographic sorting.
       bool operator<(const ELFRelocationEntry &RE) const {
         return RE.r_offset < r_offset;
       }
     };
 
+    /// The target specific ELF writer instance.
+    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
     llvm::DenseMap<const MCSectionData*,
                    std::vector<ELFRelocationEntry> > Relocations;
     DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
@@ -99,49 +172,52 @@ namespace {
 
     /// @}
 
-    ELFObjectWriter *Writer;
-
-    raw_ostream &OS;
-
-    // This holds the current offset into the object file.
-    size_t FileOff;
-
-    unsigned Is64Bit : 1;
+    bool NeedsGOT;
 
-    bool HasRelocationAddend;
+    bool NeedsSymtabShndx;
 
     // This holds the symbol table index of the last local symbol.
     unsigned LastLocalSymbolIndex;
     // This holds the .strtab section index.
     unsigned StringTableIndex;
+    // This holds the .symtab section index.
+    unsigned SymbolTableIndex;
 
     unsigned ShstrtabIndex;
 
+
+    const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+                                  const MCValue &Target,
+                                  const MCFragment &F) const;
+
+    // For arch-specific emission of explicit reloc symbol
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           bool IsBSS) const {
+      return NULL;
+    }
+
+    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+    bool hasRelocationAddend() const {
+      return TargetObjectWriter->hasRelocationAddend();
+    }
+
   public:
-    ELFObjectWriterImpl(ELFObjectWriter *_Writer, bool _Is64Bit,
-                        bool _HasRelAddend)
-      : Writer(_Writer), OS(Writer->getStream()), FileOff(0),
-        Is64Bit(_Is64Bit), HasRelocationAddend(_HasRelAddend) {
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                    raw_ostream &_OS, bool IsLittleEndian)
+      : MCObjectWriter(_OS, IsLittleEndian),
+        TargetObjectWriter(MOTW),
+        NeedsGOT(false), NeedsSymtabShndx(false){
     }
 
-    void Write8(uint8_t Value) { Writer->Write8(Value); }
-    void Write16(uint16_t Value) { Writer->Write16(Value); }
-    void Write32(uint32_t Value) { Writer->Write32(Value); }
-    //void Write64(uint64_t Value) { Writer->Write64(Value); }
-    void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
-    //void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
-    //  Writer->WriteBytes(Str, ZeroFillSize);
-    //}
+    virtual ~ELFObjectWriter();
 
     void WriteWord(uint64_t W) {
-      if (Is64Bit)
-        Writer->Write64(W);
+      if (is64Bit())
+        Write64(W);
       else
-        Writer->Write32(W);
-    }
-
-    void String8(char *buf, uint8_t Value) {
-      buf[0] = Value;
+        Write32(W);
     }
 
     void StringLE16(char *buf, uint16_t Value) {
@@ -174,86 +250,191 @@ namespace {
       StringBE32(buf + 4, uint32_t(Value >> 0));
     }
 
-    void String16(char *buf, uint16_t Value) {
-      if (Writer->isLittleEndian())
+    void String8(MCDataFragment &F, uint8_t Value) {
+      char buf[1];
+      buf[0] = Value;
+      F.getContents() += StringRef(buf, 1);
+    }
+
+    void String16(MCDataFragment &F, uint16_t Value) {
+      char buf[2];
+      if (isLittleEndian())
         StringLE16(buf, Value);
       else
         StringBE16(buf, Value);
+      F.getContents() += StringRef(buf, 2);
     }
 
-    void String32(char *buf, uint32_t Value) {
-      if (Writer->isLittleEndian())
+    void String32(MCDataFragment &F, uint32_t Value) {
+      char buf[4];
+      if (isLittleEndian())
         StringLE32(buf, Value);
       else
         StringBE32(buf, Value);
+      F.getContents() += StringRef(buf, 4);
     }
 
-    void String64(char *buf, uint64_t Value) {
-      if (Writer->isLittleEndian())
+    void String64(MCDataFragment &F, uint64_t Value) {
+      char buf[8];
+      if (isLittleEndian())
         StringLE64(buf, Value);
       else
         StringBE64(buf, Value);
+      F.getContents() += StringRef(buf, 8);
     }
 
-    void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+    virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+    /// Default e_flags = 0
+    virtual void WriteEFlags() { Write32(0); }
 
-    void WriteSymbolEntry(MCDataFragment *F, uint64_t name, uint8_t info,
+    virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          uint64_t name, uint8_t info,
                           uint64_t value, uint64_t size,
-                          uint8_t other, uint16_t shndx);
+                          uint8_t other, uint32_t shndx,
+                          bool Reserved);
 
-    void WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
+    virtual void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
+                     ELFSymbolData &MSD,
                      const MCAsmLayout &Layout);
 
-    void WriteSymbolTable(MCDataFragment *F, const MCAssembler &Asm,
-                          const MCAsmLayout &Layout);
+    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+    virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          const MCAssembler &Asm,
+                          const MCAsmLayout &Layout,
+                          const SectionIndexMapTy &SectionIndexMap);
 
-    void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                          const MCFragment *Fragment, const MCFixup &Fixup,
-                          MCValue Target, uint64_t &FixedValue);
+    virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                                  const MCFragment *Fragment, const MCFixup &Fixup,
+                                  MCValue Target, uint64_t &FixedValue);
 
-    uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+    virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
                                          const MCSymbol *S);
 
+    // Map from a group section to the signature symbol
+    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+    // Map from a signature symbol to the group section
+    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+
     /// ComputeSymbolTable - Compute the symbol table data
     ///
     /// \param StringTable [out] - The string table data.
     /// \param StringIndexMap [out] - Map from symbol names to offsets in the
     /// string table.
-    void ComputeSymbolTable(MCAssembler &Asm);
+    virtual void ComputeSymbolTable(MCAssembler &Asm,
+                            const SectionIndexMapTy &SectionIndexMap,
+                            RevGroupMapTy RevGroupMap);
 
-    void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+    virtual void ComputeIndexMap(MCAssembler &Asm,
+                         SectionIndexMapTy &SectionIndexMap);
+
+    virtual void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
                          const MCSectionData &SD);
 
-    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
+    virtual void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
       for (MCAssembler::const_iterator it = Asm.begin(),
              ie = Asm.end(); it != ie; ++it) {
         WriteRelocation(Asm, Layout, *it);
       }
     }
 
-    void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout);
+    virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                const SectionIndexMapTy &SectionIndexMap);
 
-    void ExecutePostLayoutBinding(MCAssembler &Asm) {
-      // Compute symbol table information.
-      ComputeSymbolTable(Asm);
-    }
+    // Create the sections that show up in the symbol table. Currently
+    // those are the .note.GNU-stack section and the group sections.
+    virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                       GroupMapTy &GroupMap,
+                                       RevGroupMapTy &RevGroupMap);
 
-    void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                          const MCAsmLayout &Layout);
+
+    virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
                           uint64_t Address, uint64_t Offset,
                           uint64_t Size, uint32_t Link, uint32_t Info,
                           uint64_t Alignment, uint64_t EntrySize);
 
-    void WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F,
-                                  const MCSectionData *SD);
+    virtual void WriteRelocationsFragment(const MCAssembler &Asm,
+                                          MCDataFragment *F,
+                                          const MCSectionData *SD);
+
+    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+    virtual void WriteSection(MCAssembler &Asm,
+                      const SectionIndexMapTy &SectionIndexMap,
+                      uint32_t GroupSymbolIndex,
+                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
+                      const MCSectionELF &Section);
+
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) = 0;
+  };
+
+  //===- X86ELFObjectWriter -------------------------------------------===//
+
+  class X86ELFObjectWriter : public ELFObjectWriter {
+  public:
+    X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~X86ELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+
+
+  //===- ARMELFObjectWriter -------------------------------------------===//
+
+  class ARMELFObjectWriter : public ELFObjectWriter {
+  public:
+    // FIXME: MCAssembler can't yet return the Subtarget,
+    enum { DefaultEABIVersion = 0x05000000U };
+
+    ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
 
-    void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+    virtual ~ARMELFObjectWriter();
+
+    virtual void WriteEFlags();
+  protected:
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           bool IsBSS) const;
+
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
   };
 
+  //===- MBlazeELFObjectWriter -------------------------------------------===//
+
+  class MBlazeELFObjectWriter : public ELFObjectWriter {
+  public:
+    MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                          raw_ostream &_OS,
+                          bool IsLittleEndian);
+
+    virtual ~MBlazeELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
 }
 
+ELFObjectWriter::~ELFObjectWriter()
+{}
+
 // Emit the ELF header.
-void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize,
-                                      unsigned NumberOfSections) {
+void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
+                                  unsigned NumberOfSections) {
   // ELF Header
   // ----------
   //
@@ -267,140 +448,193 @@ void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize,
   Write8('L');  // e_ident[EI_MAG2]
   Write8('F');  // e_ident[EI_MAG3]
 
-  Write8(Is64Bit ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+  Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
 
   // e_ident[EI_DATA]
-  Write8(Writer->isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+  Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
 
   Write8(ELF::EV_CURRENT);        // e_ident[EI_VERSION]
-  Write8(ELF::ELFOSABI_LINUX);    // e_ident[EI_OSABI]
+  // e_ident[EI_OSABI]
+  switch (TargetObjectWriter->getOSType()) {
+    case Triple::FreeBSD:  Write8(ELF::ELFOSABI_FREEBSD); break;
+    case Triple::Linux:    Write8(ELF::ELFOSABI_LINUX); break;
+    default:               Write8(ELF::ELFOSABI_NONE); break;
+  }
   Write8(0);                  // e_ident[EI_ABIVERSION]
 
   WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
 
   Write16(ELF::ET_REL);             // e_type
 
-  // FIXME: Make this configurable
-  Write16(Is64Bit ? ELF::EM_X86_64 : ELF::EM_386); // e_machine = target
+  Write16(TargetObjectWriter->getEMachine()); // e_machine = target
 
   Write32(ELF::EV_CURRENT);         // e_version
   WriteWord(0);                    // e_entry, no entry point in .o file
   WriteWord(0);                    // e_phoff, no program header for .o
-  WriteWord(SectionDataSize + (Is64Bit ? sizeof(ELF::Elf64_Ehdr) :
+  WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
             sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
 
-  // FIXME: Make this configurable.
-  Write32(0);   // e_flags = whatever the target wants
+  // e_flags = whatever the target wants
+  WriteEFlags();
 
   // e_ehsize = ELF header size
-  Write16(Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+  Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
 
   Write16(0);                  // e_phentsize = prog header entry size
   Write16(0);                  // e_phnum = # prog header entries = 0
 
   // e_shentsize = Section header entry size
-  Write16(Is64Bit ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+  Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
 
   // e_shnum     = # of section header ents
-  Write16(NumberOfSections);
+  if (NumberOfSections >= ELF::SHN_LORESERVE)
+    Write16(0);
+  else
+    Write16(NumberOfSections);
 
   // e_shstrndx  = Section # of '.shstrtab'
-  Write16(ShstrtabIndex);
+  if (NumberOfSections >= ELF::SHN_LORESERVE)
+    Write16(ELF::SHN_XINDEX);
+  else
+    Write16(ShstrtabIndex);
 }
 
-void ELFObjectWriterImpl::WriteSymbolEntry(MCDataFragment *F, uint64_t name,
-                                           uint8_t info, uint64_t value,
-                                           uint64_t size, uint8_t other,
-                                           uint16_t shndx) {
-  if (Is64Bit) {
-    char buf[8];
+void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
+                                       MCDataFragment *ShndxF,
+                                       uint64_t name,
+                                       uint8_t info, uint64_t value,
+                                       uint64_t size, uint8_t other,
+                                       uint32_t shndx,
+                                       bool Reserved) {
+  if (ShndxF) {
+    if (shndx >= ELF::SHN_LORESERVE && !Reserved)
+      String32(*ShndxF, shndx);
+    else
+      String32(*ShndxF, 0);
+  }
 
-    String32(buf, name);
-    F->getContents() += StringRef(buf, 4); // st_name
+  uint16_t Index = (shndx >= ELF::SHN_LORESERVE && !Reserved) ?
+    uint16_t(ELF::SHN_XINDEX) : shndx;
 
-    String8(buf, info);
-    F->getContents() += StringRef(buf, 1);  // st_info
+  if (is64Bit()) {
+    String32(*SymtabF, name);  // st_name
+    String8(*SymtabF, info);   // st_info
+    String8(*SymtabF, other);  // st_other
+    String16(*SymtabF, Index); // st_shndx
+    String64(*SymtabF, value); // st_value
+    String64(*SymtabF, size);  // st_size
+  } else {
+    String32(*SymtabF, name);  // st_name
+    String32(*SymtabF, value); // st_value
+    String32(*SymtabF, size);  // st_size
+    String8(*SymtabF, info);   // st_info
+    String8(*SymtabF, other);  // st_other
+    String16(*SymtabF, Index); // st_shndx
+  }
+}
 
-    String8(buf, other);
-    F->getContents() += StringRef(buf, 1); // st_other
+static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout) {
+  if (Data.isCommon() && Data.isExternal())
+    return Data.getCommonAlignment();
 
-    String16(buf, shndx);
-    F->getContents() += StringRef(buf, 2); // st_shndx
+  const MCSymbol &Symbol = Data.getSymbol();
 
-    String64(buf, value);
-    F->getContents() += StringRef(buf, 8); // st_value
+  if (Symbol.isAbsolute() && Symbol.isVariable()) {
+    if (const MCExpr *Value = Symbol.getVariableValue()) {
+      int64_t IntValue;
+      if (Value->EvaluateAsAbsolute(IntValue, Layout))
+	return (uint64_t)IntValue;
+    }
+  }
 
-    String64(buf, size);
-    F->getContents() += StringRef(buf, 8);  // st_size
-  } else {
-    char buf[4];
+  if (!Symbol.isInSection())
+    return 0;
+
+  if (Data.getFragment())
+    return Layout.getSymbolOffset(&Data);
+
+  return 0;
+}
 
-    String32(buf, name);
-    F->getContents() += StringRef(buf, 4);  // st_name
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  // The presence of symbol versions causes undefined symbols and
+  // versions declared with @@@ to be renamed.
 
-    String32(buf, value);
-    F->getContents() += StringRef(buf, 4); // st_value
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Alias = it->getSymbol();
+    const MCSymbol &Symbol = Alias.AliasedSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+    // Not an alias.
+    if (&Symbol == &Alias)
+      continue;
+
+    StringRef AliasName = Alias.getName();
+    size_t Pos = AliasName.find('@');
+    if (Pos == StringRef::npos)
+      continue;
 
-    String32(buf, size);
-    F->getContents() += StringRef(buf, 4);  // st_size
+    // Aliases defined with .symvar copy the binding from the symbol they alias.
+    // This is the first place we are able to copy this information.
+    it->setExternal(SD.isExternal());
+    SetBinding(*it, GetBinding(SD));
 
-    String8(buf, info);
-    F->getContents() += StringRef(buf, 1);  // st_info
+    StringRef Rest = AliasName.substr(Pos);
+    if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
+      continue;
 
-    String8(buf, other);
-    F->getContents() += StringRef(buf, 1); // st_other
+    // FIXME: produce a better error message.
+    if (Symbol.isUndefined() && Rest.startswith("@@") &&
+        !Rest.startswith("@@@"))
+      report_fatal_error("A @@ version cannot be undefined");
 
-    String16(buf, shndx);
-    F->getContents() += StringRef(buf, 2); // st_shndx
+    Renames.insert(std::make_pair(&Symbol, &Alias));
   }
 }
 
-void ELFObjectWriterImpl::WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
-                                      const MCAsmLayout &Layout) {
-  MCSymbolData &Data = *MSD.SymbolData;
-  uint8_t Info = (Data.getFlags() & 0xff);
-  uint8_t Other = ((Data.getFlags() & 0xf00) >> ELF_STV_Shift);
-  uint64_t Value = 0;
+void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
+                                  MCDataFragment *ShndxF,
+                                  ELFSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &OrigData = *MSD.SymbolData;
+  MCSymbolData &Data =
+    Layout.getAssembler().getSymbolData(OrigData.getSymbol().AliasedSymbol());
+
+  bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
+    Data.getSymbol().isVariable();
+
+  uint8_t Binding = GetBinding(OrigData);
+  uint8_t Visibility = GetVisibility(OrigData);
+  uint8_t Type = GetType(Data);
+
+  uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+  uint8_t Other = Visibility;
+
+  uint64_t Value = SymbolValue(Data, Layout);
   uint64_t Size = 0;
-  const MCExpr *ESize;
 
-  if (Data.isCommon() && Data.isExternal())
-    Value = Data.getCommonAlignment();
-
-  if (!Data.isCommon())
-    if (MCFragment *FF = Data.getFragment())
-      Value = Layout.getSymbolAddress(&Data) -
-              Layout.getSectionAddress(FF->getParent());
-
-  ESize = Data.getSize();
-  if (Data.getSize()) {
-    MCValue Res;
-    if (ESize->getKind() == MCExpr::Binary) {
-      const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(ESize);
-
-      if (BE->EvaluateAsRelocatable(Res, &Layout)) {
-        MCSymbolData &A =
-          Layout.getAssembler().getSymbolData(Res.getSymA()->getSymbol());
-        MCSymbolData &B =
-          Layout.getAssembler().getSymbolData(Res.getSymB()->getSymbol());
-
-        Size = Layout.getSymbolAddress(&A) - Layout.getSymbolAddress(&B);
-      }
-    } else if (ESize->getKind() == MCExpr::Constant) {
-      Size = static_cast<const MCConstantExpr *>(ESize)->getValue();
-    } else {
-      assert(0 && "Unsupported size expression");
-    }
+  assert(!(Data.isCommon() && !Data.isExternal()));
+
+  const MCExpr *ESize = Data.getSize();
+  if (ESize) {
+    int64_t Res;
+    if (!ESize->EvaluateAsAbsolute(Res, Layout))
+      report_fatal_error("Size expression must be absolute.");
+    Size = Res;
   }
 
   // Write out the symbol table entry
-  WriteSymbolEntry(F, MSD.StringIndex, Info, Value,
-                   Size, Other, MSD.SectionIndex);
+  WriteSymbolEntry(SymtabF, ShndxF, MSD.StringIndex, Info, Value,
+                   Size, Other, MSD.SectionIndex, IsReserved);
 }
 
-void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F,
-                                           const MCAssembler &Asm,
-                                           const MCAsmLayout &Layout) {
+void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
+                                       MCDataFragment *ShndxF,
+                                       const MCAssembler &Asm,
+                                       const MCAsmLayout &Layout,
+                                     const SectionIndexMapTy &SectionIndexMap) {
   // The string table must be emitted first because we need the index
   // into the string table for all the symbol names.
   assert(StringTable.size() && "Missing string table");
@@ -408,258 +642,343 @@ void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F,
   // FIXME: Make sure the start of the symbol table is aligned.
 
   // The first entry is the undefined symbol entry.
-  unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
-  F->getContents().append(EntrySize, '\x00');
+  WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
 
   // Write the symbol table entries.
   LastLocalSymbolIndex = LocalSymbolData.size() + 1;
   for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
     ELFSymbolData &MSD = LocalSymbolData[i];
-    WriteSymbol(F, MSD, Layout);
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
   }
 
-  // Write out a symbol table entry for each section.
-  // leaving out the just added .symtab which is at
-  // the very end
-  unsigned Index = 1;
-  for (MCAssembler::const_iterator it = Asm.begin(),
-       ie = Asm.end(); it != ie; ++it, ++Index) {
+  // Write out a symbol table entry for each regular section.
+  for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
+       ++i) {
     const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(it->getSection());
-    // Leave out relocations so we don't have indexes within
-    // the relocations messed up
-    if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL)
-      continue;
-    if (Index == Asm.size())
+      static_cast<const MCSectionELF&>(i->getSection());
+    if (Section.getType() == ELF::SHT_RELA ||
+        Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_STRTAB ||
+        Section.getType() == ELF::SHT_SYMTAB)
       continue;
-    WriteSymbolEntry(F, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, Index);
+    WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
+                     ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
     LastLocalSymbolIndex++;
   }
 
   for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
     ELFSymbolData &MSD = ExternalSymbolData[i];
     MCSymbolData &Data = *MSD.SymbolData;
-    assert((Data.getFlags() & ELF_STB_Global) &&
-           "External symbol requires STB_GLOBAL flag");
-    WriteSymbol(F, MSD, Layout);
-    if (Data.getFlags() & ELF_STB_Local)
+    assert(((Data.getFlags() & ELF_STB_Global) ||
+            (Data.getFlags() & ELF_STB_Weak)) &&
+           "External symbol requires STB_GLOBAL or STB_WEAK flag");
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+    if (GetBinding(Data) == ELF::STB_LOCAL)
       LastLocalSymbolIndex++;
   }
 
   for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
     ELFSymbolData &MSD = UndefinedSymbolData[i];
     MCSymbolData &Data = *MSD.SymbolData;
-    Data.setFlags(Data.getFlags() | ELF_STB_Global);
-    WriteSymbol(F, MSD, Layout);
-    if (Data.getFlags() & ELF_STB_Local)
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+    if (GetBinding(Data) == ELF::STB_LOCAL)
       LastLocalSymbolIndex++;
   }
 }
 
-// FIXME: this is currently X86/X86_64 only
-void ELFObjectWriterImpl::RecordRelocation(const MCAssembler &Asm,
-                                           const MCAsmLayout &Layout,
-                                           const MCFragment *Fragment,
-                                           const MCFixup &Fixup,
-                                           MCValue Target,
-                                           uint64_t &FixedValue) {
+const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
+                                               const MCValue &Target,
+                                               const MCFragment &F) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+  const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+  const MCSymbol *Renamed = Renames.lookup(&Symbol);
+  const MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+  if (ASymbol.isUndefined()) {
+    if (Renamed)
+      return Renamed;
+    return &ASymbol;
+  }
+
+  if (SD.isExternal()) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(ASymbol.getSection());
+  const SectionKind secKind = Section.getKind();
+
+  if (secKind.isBSS())
+    return ExplicitRelSym(Asm, Target, F, true);
+
+  if (secKind.isThreadLocal()) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+  const MCSectionELF &Sec2 =
+    static_cast<const MCSectionELF&>(F.getParent()->getSection());
+
+  if (&Sec2 != &Section &&
+      (Kind == MCSymbolRefExpr::VK_PLT ||
+       Kind == MCSymbolRefExpr::VK_GOTPCREL ||
+       Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  if (Section.getFlags() & ELF::SHF_MERGE) {
+    if (Target.getConstant() == 0)
+      return NULL;
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  return ExplicitRelSym(Asm, Target, F, false);
+}
+
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                       const MCAsmLayout &Layout,
+                                       const MCFragment *Fragment,
+                                       const MCFixup &Fixup,
+                                       MCValue Target,
+                                       uint64_t &FixedValue) {
   int64_t Addend = 0;
-  unsigned Index = 0;
+  int Index = 0;
   int64_t Value = Target.getConstant();
+  const MCSymbol *RelocSymbol = NULL;
 
+  bool IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
   if (!Target.isAbsolute()) {
-    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-    const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
-    MCFragment *F = SD.getFragment();
-
-    if (Base) {
-      if (F && (!Symbol->isInSection() || SD.isCommon()) && !SD.isExternal()) {
-        Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
-        Value += Layout.getSymbolAddress(&SD);
-      } else
-        Index = getSymbolIndexInSymbolTable(Asm, Symbol);
-      if (Base != &SD)
-        Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
-      Addend = Value;
-      // Compensate for the addend on i386.
-      if (Is64Bit)
-        Value = 0;
-    } else {
-      if (F) {
-        // Index of the section in .symtab against this symbol
-        // is being relocated + 2 (empty section + abs. symbols).
-        Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
-
-        MCSectionData *FSD = F->getParent();
-        // Offset of the symbol in the section
-        Addend = Layout.getSymbolAddress(&SD) - Layout.getSectionAddress(FSD);
-      } else {
-        FixedValue = Value;
-        return;
-      }
-    }
-  }
+    const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+    const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+    RelocSymbol = SymbolToReloc(Asm, Target, *Fragment);
 
-  FixedValue = Value;
+    if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+      const MCSymbol &SymbolB = RefB->getSymbol();
+      MCSymbolData &SDB = Asm.getSymbolData(SymbolB);
+      IsPCRel = true;
 
-  // determine the type of the relocation
-  bool IsPCRel = isFixupKindX86PCRel(Fixup.getKind());
-  unsigned Type;
-  if (Is64Bit) {
-    if (IsPCRel) {
-      Type = ELF::R_X86_64_PC32;
-    } else {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-      case FK_Data_8: Type = ELF::R_X86_64_64; break;
-      case X86::reloc_pcrel_4byte:
-      case FK_Data_4:
-        // check that the offset fits within a signed long
-        if (isInt<32>(Target.getConstant()))
-          Type = ELF::R_X86_64_32S;
-        else
-          Type = ELF::R_X86_64_32;
-        break;
-      case FK_Data_2: Type = ELF::R_X86_64_16; break;
-      case X86::reloc_pcrel_1byte:
-      case FK_Data_1: Type = ELF::R_X86_64_8; break;
-      }
+      // Offset of the symbol in the section
+      int64_t a = Layout.getSymbolOffset(&SDB);
+
+      // Ofeset of the relocation in the section
+      int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+      Value += b - a;
     }
-  } else {
-    if (IsPCRel) {
-      Type = ELF::R_386_PC32;
+
+    if (!RelocSymbol) {
+      MCSymbolData &SD = Asm.getSymbolData(ASymbol);
+      MCFragment *F = SD.getFragment();
+
+      Index = F->getParent()->getOrdinal() + 1;
+
+      // Offset of the symbol in the section
+      Value += Layout.getSymbolOffset(&SD);
     } else {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-      case X86::reloc_pcrel_4byte:
-      case FK_Data_4: Type = ELF::R_386_32; break;
-      case FK_Data_2: Type = ELF::R_386_16; break;
-      case X86::reloc_pcrel_1byte:
-      case FK_Data_1: Type = ELF::R_386_8; break;
-      }
+      if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
+        WeakrefUsedInReloc.insert(RelocSymbol);
+      else
+        UsedInReloc.insert(RelocSymbol);
+      Index = -1;
     }
+    Addend = Value;
+    // Compensate for the addend on i386.
+    if (is64Bit())
+      Value = 0;
   }
 
-  ELFRelocationEntry ERE;
+  FixedValue = Value;
+  unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
+                               (RelocSymbol != 0), Addend);
+  
+  uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
+    Fixup.getOffset();
+
+  if (!hasRelocationAddend())
+    Addend = 0;
+  ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+  Relocations[Fragment->getParent()].push_back(ERE);
+}
 
-  if (Is64Bit) {
-    struct ELF::Elf64_Rela ERE64;
-    ERE64.setSymbolAndType(Index, Type);
-    ERE.r_info = ERE64.r_info;
-  } else {
-    struct ELF::Elf32_Rela ERE32;
-    ERE32.setSymbolAndType(Index, Type);
-    ERE.r_info = ERE32.r_info;
-  }
 
-  ERE.r_offset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+uint64_t
+ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                             const MCSymbol *S) {
+  MCSymbolData &SD = Asm.getSymbolData(*S);
+  return SD.getIndex();
+}
 
-  if (HasRelocationAddend)
-    ERE.r_addend = Addend;
-  else
-    ERE.r_addend = 0; // Silence compiler warning.
+static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+                       bool Used, bool Renamed) {
+  if (Data.getFlags() & ELF_Other_Weakref)
+    return false;
 
-  Relocations[Fragment->getParent()].push_back(ERE);
+  if (Used)
+    return true;
+
+  if (Renamed)
+    return false;
+
+  const MCSymbol &Symbol = Data.getSymbol();
+
+  if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
+    return true;
+
+  const MCSymbol &A = Symbol.AliasedSymbol();
+  if (!A.isVariable() && A.isUndefined() && !Data.isCommon())
+    return false;
+
+  if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
+    return false;
+
+  if (Symbol.isTemporary())
+    return false;
+
+  return true;
 }
 
-uint64_t
-ELFObjectWriterImpl::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
-                                                 const MCSymbol *S) {
-  MCSymbolData &SD = Asm.getSymbolData(*S);
+static bool isLocal(const MCSymbolData &Data, bool isSignature,
+                    bool isUsedInReloc) {
+  if (Data.isExternal())
+    return false;
+
+  const MCSymbol &Symbol = Data.getSymbol();
+  const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
 
-  // Local symbol.
-  if (!SD.isExternal() && !S->isUndefined())
-    return SD.getIndex() + /* empty symbol */ 1;
+  if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
+    if (isSignature && !isUsedInReloc)
+      return true;
 
-  // External or undefined symbol.
-  return SD.getIndex() + Asm.size() + /* empty symbol */ 1;
+    return false;
+  }
+
+  return true;
 }
 
-void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) {
-  // Build section lookup table.
-  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
+                                      SectionIndexMapTy &SectionIndexMap) {
   unsigned Index = 1;
   for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it, ++Index)
-    SectionIndexMap[&it->getSection()] = Index;
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() != ELF::SHT_GROUP)
+      continue;
+    SectionIndexMap[&Section] = Index++;
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_GROUP)
+      continue;
+    SectionIndexMap[&Section] = Index++;
+  }
+}
+
+void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
+                                      const SectionIndexMapTy &SectionIndexMap,
+                                      RevGroupMapTy RevGroupMap) {
+  // FIXME: Is this the correct place to do this?
+  if (NeedsGOT) {
+    llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+    MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+    MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
+    Data.setExternal(true);
+    SetBinding(Data, ELF::STB_GLOBAL);
+  }
+
+  // Build section lookup table.
+  int NumRegularSections = Asm.size();
 
   // Index 0 is always the empty string.
   StringMap<uint64_t> StringIndexMap;
   StringTable += '\x00';
 
-  // Add the data for local symbols.
+  // Add the data for the symbols.
   for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
          ie = Asm.symbol_end(); it != ie; ++it) {
     const MCSymbol &Symbol = it->getSymbol();
 
-    // Ignore non-linker visible symbols.
-    if (!Asm.isSymbolLinkerVisible(Symbol))
-      continue;
+    bool Used = UsedInReloc.count(&Symbol);
+    bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
+    bool isSignature = RevGroupMap.count(&Symbol);
 
-    if (it->isExternal() || Symbol.isUndefined())
+    if (!isInSymtab(Asm, *it,
+                    Used || WeakrefUsed || isSignature,
+                    Renames.count(&Symbol)))
       continue;
 
-    uint64_t &Entry = StringIndexMap[Symbol.getName()];
-    if (!Entry) {
-      Entry = StringTable.size();
-      StringTable += Symbol.getName();
-      StringTable += '\x00';
-    }
-
     ELFSymbolData MSD;
     MSD.SymbolData = it;
-    MSD.StringIndex = Entry;
+    const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+    // Undefined symbols are global, but this is the first place we
+    // are able to set it.
+    bool Local = isLocal(*it, isSignature, Used);
+    if (!Local && GetBinding(*it) == ELF::STB_LOCAL) {
+      MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
+      SetBinding(*it, ELF::STB_GLOBAL);
+      SetBinding(SD, ELF::STB_GLOBAL);
+    }
+
+    if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
+      SetBinding(*it, ELF::STB_WEAK);
 
-    if (Symbol.isAbsolute()) {
+    if (it->isCommon()) {
+      assert(!Local);
+      MSD.SectionIndex = ELF::SHN_COMMON;
+    } else if (Symbol.isAbsolute() || RefSymbol.isVariable()) {
       MSD.SectionIndex = ELF::SHN_ABS;
-      LocalSymbolData.push_back(MSD);
+    } else if (RefSymbol.isUndefined()) {
+      if (isSignature && !Used)
+        MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+      else
+        MSD.SectionIndex = ELF::SHN_UNDEF;
     } else {
-      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      const MCSectionELF &Section =
+        static_cast<const MCSectionELF&>(RefSymbol.getSection());
+      MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+      if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+        NeedsSymtabShndx = true;
       assert(MSD.SectionIndex && "Invalid section index!");
-      LocalSymbolData.push_back(MSD);
     }
-  }
-
-  // Now add non-local symbols.
-  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-         ie = Asm.symbol_end(); it != ie; ++it) {
-    const MCSymbol &Symbol = it->getSymbol();
 
-    // Ignore non-linker visible symbols.
-    if (!Asm.isSymbolLinkerVisible(Symbol))
-      continue;
-
-    if (!it->isExternal() && !Symbol.isUndefined())
-      continue;
+    // The @@@ in symbol version is replaced with @ in undefined symbols and
+    // @@ in defined ones.
+    StringRef Name = Symbol.getName();
+    SmallString<32> Buf;
+
+    size_t Pos = Name.find("@@@");
+    if (Pos != StringRef::npos) {
+      Buf += Name.substr(0, Pos);
+      unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+      Buf += Name.substr(Pos + Skip);
+      Name = Buf;
+    }
 
-    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    uint64_t &Entry = StringIndexMap[Name];
     if (!Entry) {
       Entry = StringTable.size();
-      StringTable += Symbol.getName();
+      StringTable += Name;
       StringTable += '\x00';
     }
-
-    ELFSymbolData MSD;
-    MSD.SymbolData = it;
     MSD.StringIndex = Entry;
-
-    if (Symbol.isUndefined()) {
-      MSD.SectionIndex = ELF::SHN_UNDEF;
-      // XXX: for some reason we dont Emit* this
-      it->setFlags(it->getFlags() | ELF_STB_Global);
+    if (MSD.SectionIndex == ELF::SHN_UNDEF)
       UndefinedSymbolData.push_back(MSD);
-    } else if (Symbol.isAbsolute()) {
-      MSD.SectionIndex = ELF::SHN_ABS;
-      ExternalSymbolData.push_back(MSD);
-    } else if (it->isCommon()) {
-      MSD.SectionIndex = ELF::SHN_COMMON;
-      ExternalSymbolData.push_back(MSD);
-    } else {
-      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-      assert(MSD.SectionIndex && "Invalid section index!");
+    else if (Local)
+      LocalSymbolData.push_back(MSD);
+    else
       ExternalSymbolData.push_back(MSD);
-    }
   }
 
   // Symbols are required to be in lexicographic order.
@@ -669,55 +988,56 @@ void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) {
 
   // Set the symbol indices. Local symbols must come before all other
   // symbols with non-local bindings.
-  Index = 0;
+  unsigned Index = 1;
   for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
     LocalSymbolData[i].SymbolData->setIndex(Index++);
+
+  Index += NumRegularSections;
+
   for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
     ExternalSymbolData[i].SymbolData->setIndex(Index++);
   for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
     UndefinedSymbolData[i].SymbolData->setIndex(Index++);
 }
 
-void ELFObjectWriterImpl::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
-                                          const MCSectionData &SD) {
+void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+                                      const MCSectionData &SD) {
   if (!Relocations[&SD].empty()) {
     MCContext &Ctx = Asm.getContext();
-    const MCSection *RelaSection;
+    const MCSectionELF *RelaSection;
     const MCSectionELF &Section =
       static_cast<const MCSectionELF&>(SD.getSection());
 
     const StringRef SectionName = Section.getSectionName();
-    std::string RelaSectionName = HasRelocationAddend ? ".rela" : ".rel";
+    std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
     RelaSectionName += SectionName;
 
     unsigned EntrySize;
-    if (HasRelocationAddend)
-      EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+    if (hasRelocationAddend())
+      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
     else
-      EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
 
-    RelaSection = Ctx.getELFSection(RelaSectionName, HasRelocationAddend ?
+    RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
                                     ELF::SHT_RELA : ELF::SHT_REL, 0,
                                     SectionKind::getReadOnly(),
-                                    false, EntrySize);
+                                    EntrySize, "");
 
     MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
-    RelaSD.setAlignment(1);
+    RelaSD.setAlignment(is64Bit() ? 8 : 4);
 
     MCDataFragment *F = new MCDataFragment(&RelaSD);
 
     WriteRelocationsFragment(Asm, F, &SD);
-
-    Asm.AddSectionToTheEnd(RelaSD, Layout);
   }
 }
 
-void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
-                                           uint64_t Flags, uint64_t Address,
-                                           uint64_t Offset, uint64_t Size,
-                                           uint32_t Link, uint32_t Info,
-                                           uint64_t Alignment,
-                                           uint64_t EntrySize) {
+void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+                                       uint64_t Flags, uint64_t Address,
+                                       uint64_t Offset, uint64_t Size,
+                                       uint32_t Link, uint32_t Info,
+                                       uint64_t Alignment,
+                                       uint64_t EntrySize) {
   Write32(Name);        // sh_name: index into string table
   Write32(Type);        // sh_type
   WriteWord(Flags);     // sh_flags
@@ -730,9 +1050,9 @@ void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
   WriteWord(EntrySize); // sh_entsize
 }
 
-void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm,
-                                                   MCDataFragment *F,
-                                                   const MCSectionData *SD) {
+void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
+                                               MCDataFragment *F,
+                                               const MCSectionData *SD) {
   std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
   // sort by the r_offset just like gnu as does
   array_pod_sort(Relocs.begin(), Relocs.end());
@@ -740,67 +1060,90 @@ void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm,
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
     ELFRelocationEntry entry = Relocs[e - i - 1];
 
-    unsigned WordSize = Is64Bit ? 8 : 4;
-    F->getContents() += StringRef((const char *)&entry.r_offset, WordSize);
-    F->getContents() += StringRef((const char *)&entry.r_info, WordSize);
+    if (!entry.Index)
+      ;
+    else if (entry.Index < 0)
+      entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
+    else
+      entry.Index += LocalSymbolData.size();
+    if (is64Bit()) {
+      String64(*F, entry.r_offset);
+
+      struct ELF::Elf64_Rela ERE64;
+      ERE64.setSymbolAndType(entry.Index, entry.Type);
+      String64(*F, ERE64.r_info);
 
-    if (HasRelocationAddend)
-      F->getContents() += StringRef((const char *)&entry.r_addend, WordSize);
+      if (hasRelocationAddend())
+        String64(*F, entry.r_addend);
+    } else {
+      String32(*F, entry.r_offset);
+
+      struct ELF::Elf32_Rela ERE32;
+      ERE32.setSymbolAndType(entry.Index, entry.Type);
+      String32(*F, ERE32.r_info);
+
+      if (hasRelocationAddend())
+        String32(*F, entry.r_addend);
+    }
   }
 }
 
-void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm,
-                                                 MCAsmLayout &Layout) {
+void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
+                                             MCAsmLayout &Layout,
+                                    const SectionIndexMapTy &SectionIndexMap) {
   MCContext &Ctx = Asm.getContext();
   MCDataFragment *F;
 
-  WriteRelocations(Asm, Layout);
-
-  const MCSection *SymtabSection;
-  unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+  unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
 
-  SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
-                                    SectionKind::getReadOnly(),
-                                    false, EntrySize);
+  // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
+  const MCSectionELF *ShstrtabSection =
+    Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+                      SectionKind::getReadOnly());
+  MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+  ShstrtabSD.setAlignment(1);
+  ShstrtabIndex = Asm.size();
 
+  const MCSectionELF *SymtabSection =
+    Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+                      SectionKind::getReadOnly(),
+                      EntrySize, "");
   MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+  SymtabSD.setAlignment(is64Bit() ? 8 : 4);
+  SymbolTableIndex = Asm.size();
 
-  SymtabSD.setAlignment(Is64Bit ? 8 : 4);
+  MCSectionData *SymtabShndxSD = NULL;
 
-  F = new MCDataFragment(&SymtabSD);
-
-  // Symbol table
-  WriteSymbolTable(F, Asm, Layout);
-  Asm.AddSectionToTheEnd(SymtabSD, Layout);
+  if (NeedsSymtabShndx) {
+    const MCSectionELF *SymtabShndxSection =
+      Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0,
+                        SectionKind::getReadOnly(), 4, "");
+    SymtabShndxSD = &Asm.getOrCreateSectionData(*SymtabShndxSection);
+    SymtabShndxSD->setAlignment(4);
+  }
 
   const MCSection *StrtabSection;
   StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
-                                    SectionKind::getReadOnly(), false);
-
+                                    SectionKind::getReadOnly());
   MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
   StrtabSD.setAlignment(1);
-
-  // FIXME: This isn't right. If the sections get rearranged this will
-  // be wrong. We need a proper lookup.
   StringTableIndex = Asm.size();
 
-  F = new MCDataFragment(&StrtabSD);
-  F->getContents().append(StringTable.begin(), StringTable.end());
-  Asm.AddSectionToTheEnd(StrtabSD, Layout);
+  WriteRelocations(Asm, Layout);
 
-  const MCSection *ShstrtabSection;
-  ShstrtabSection = Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
-                                      SectionKind::getReadOnly(), false);
+  // Symbol table
+  F = new MCDataFragment(&SymtabSD);
+  MCDataFragment *ShndxF = NULL;
+  if (NeedsSymtabShndx) {
+    ShndxF = new MCDataFragment(SymtabShndxSD);
+  }
+  WriteSymbolTable(F, ShndxF, Asm, Layout, SectionIndexMap);
 
-  MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
-  ShstrtabSD.setAlignment(1);
+  F = new MCDataFragment(&StrtabSD);
+  F->getContents().append(StringTable.begin(), StringTable.end());
 
   F = new MCDataFragment(&ShstrtabSD);
 
-  // FIXME: This isn't right. If the sections get rearranged this will
-  // be wrong. We need a proper lookup.
-  ShstrtabIndex = Asm.size();
-
   // Section header string table.
   //
   // The first entry of a string table holds a null character so skip
@@ -808,166 +1151,691 @@ void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm,
   uint64_t Index = 1;
   F->getContents() += '\x00';
 
+  StringMap<uint64_t> SecStringMap;
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
     const MCSectionELF &Section =
       static_cast<const MCSectionELF&>(it->getSection());
+    // FIXME: We could merge suffixes like in .text and .rela.text.
 
+    StringRef Name = Section.getSectionName();
+    if (SecStringMap.count(Name)) {
+      SectionStringTableIndex[&Section] =  SecStringMap[Name];
+      continue;
+    }
     // Remember the index into the string table so we can write it
     // into the sh_name field of the section header table.
-    SectionStringTableIndex[&it->getSection()] = Index;
+    SectionStringTableIndex[&Section] = Index;
+    SecStringMap[Name] = Index;
 
-    Index += Section.getSectionName().size() + 1;
-    F->getContents() += Section.getSectionName();
+    Index += Name.size() + 1;
+    F->getContents() += Name;
     F->getContents() += '\x00';
   }
+}
+
+void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
+                                            MCAsmLayout &Layout,
+                                            GroupMapTy &GroupMap,
+                                            RevGroupMapTy &RevGroupMap) {
+  // Create the .note.GNU-stack section if needed.
+  MCContext &Ctx = Asm.getContext();
+  if (Asm.getNoExecStack()) {
+    const MCSectionELF *GnuStackSection =
+      Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
+                        SectionKind::getReadOnly());
+    Asm.getOrCreateSectionData(*GnuStackSection);
+  }
+
+  // Build the groups
+  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+       it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    if (!(Section.getFlags() & ELF::SHF_GROUP))
+      continue;
+
+    const MCSymbol *SignatureSymbol = Section.getGroup();
+    Asm.getOrCreateSymbolData(*SignatureSymbol);
+    const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
+    if (!Group) {
+      Group = Ctx.CreateELFGroupSection();
+      MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+      Data.setAlignment(4);
+      MCDataFragment *F = new MCDataFragment(&Data);
+      String32(*F, ELF::GRP_COMDAT);
+    }
+    GroupMap[Group] = SignatureSymbol;
+  }
+
+  // Add sections to the groups
+  unsigned Index = 1;
+  unsigned NumGroups = RevGroupMap.size();
+  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+       it != ie; ++it, ++Index) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    if (!(Section.getFlags() & ELF::SHF_GROUP))
+      continue;
+    const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
+    MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+    // FIXME: we could use the previous fragment
+    MCDataFragment *F = new MCDataFragment(&Data);
+    String32(*F, NumGroups + Index);
+  }
+}
+
+void ELFObjectWriter::WriteSection(MCAssembler &Asm,
+                                   const SectionIndexMapTy &SectionIndexMap,
+                                   uint32_t GroupSymbolIndex,
+                                   uint64_t Offset, uint64_t Size,
+                                   uint64_t Alignment,
+                                   const MCSectionELF &Section) {
+  uint64_t sh_link = 0;
+  uint64_t sh_info = 0;
+
+  switch(Section.getType()) {
+  case ELF::SHT_DYNAMIC:
+    sh_link = SectionStringTableIndex[&Section];
+    sh_info = 0;
+    break;
+
+  case ELF::SHT_REL:
+  case ELF::SHT_RELA: {
+    const MCSectionELF *SymtabSection;
+    const MCSectionELF *InfoSection;
+    SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
+                                                   0,
+                                                   SectionKind::getReadOnly());
+    sh_link = SectionIndexMap.lookup(SymtabSection);
+    assert(sh_link && ".symtab not found");
+
+    // Remove ".rel" and ".rela" prefixes.
+    unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+    StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+    InfoSection = Asm.getContext().getELFSection(SectionName,
+                                                 ELF::SHT_PROGBITS, 0,
+                                                 SectionKind::getReadOnly());
+    sh_info = SectionIndexMap.lookup(InfoSection);
+    break;
+  }
+
+  case ELF::SHT_SYMTAB:
+  case ELF::SHT_DYNSYM:
+    sh_link = StringTableIndex;
+    sh_info = LastLocalSymbolIndex;
+    break;
+
+  case ELF::SHT_SYMTAB_SHNDX:
+    sh_link = SymbolTableIndex;
+    break;
+
+  case ELF::SHT_PROGBITS:
+  case ELF::SHT_STRTAB:
+  case ELF::SHT_NOBITS:
+  case ELF::SHT_NOTE:
+  case ELF::SHT_NULL:
+  case ELF::SHT_ARM_ATTRIBUTES:
+  case ELF::SHT_INIT_ARRAY:
+  case ELF::SHT_FINI_ARRAY:
+  case ELF::SHT_PREINIT_ARRAY:
+  case ELF::SHT_X86_64_UNWIND:
+    // Nothing to do.
+    break;
+
+  case ELF::SHT_GROUP: {
+    sh_link = SymbolTableIndex;
+    sh_info = GroupSymbolIndex;
+    break;
+  }
+
+  default:
+    assert(0 && "FIXME: sh_type value not supported!");
+    break;
+  }
 
-  Asm.AddSectionToTheEnd(ShstrtabSD, Layout);
+  WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+                   Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
+                   Alignment, Section.getEntrySize());
 }
 
-void ELFObjectWriterImpl::WriteObject(const MCAssembler &Asm,
-                                      const MCAsmLayout &Layout) {
+static bool IsELFMetaDataSection(const MCSectionData &SD) {
+  return SD.getOrdinal() == ~UINT32_C(0) &&
+    !SD.getSection().isVirtualSection();
+}
+
+static uint64_t DataSectionSize(const MCSectionData &SD) {
+  uint64_t Ret = 0;
+  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+       ++i) {
+    const MCFragment &F = *i;
+    assert(F.getKind() == MCFragment::FT_Data);
+    Ret += cast<MCDataFragment>(F).getContents().size();
+  }
+  return Ret;
+}
+
+static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+                                   const MCSectionData &SD) {
+  if (IsELFMetaDataSection(SD))
+    return DataSectionSize(SD);
+  return Layout.getSectionFileSize(&SD);
+}
+
+static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+                                      const MCSectionData &SD) {
+  if (IsELFMetaDataSection(SD))
+    return DataSectionSize(SD);
+  return Layout.getSectionAddressSize(&SD);
+}
+
+static void WriteDataSectionData(ELFObjectWriter *W, const MCSectionData &SD) {
+  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+       ++i) {
+    const MCFragment &F = *i;
+    assert(F.getKind() == MCFragment::FT_Data);
+    W->WriteBytes(cast<MCDataFragment>(F).getContents().str());
+  }
+}
+
+void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+                                  const MCAsmLayout &Layout) {
+  GroupMapTy GroupMap;
+  RevGroupMapTy RevGroupMap;
+  CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+                        RevGroupMap);
+
+  SectionIndexMapTy SectionIndexMap;
+
+  ComputeIndexMap(Asm, SectionIndexMap);
+
+  // Compute symbol table information.
+  ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap);
+
   CreateMetadataSections(const_cast<MCAssembler&>(Asm),
-                         const_cast<MCAsmLayout&>(Layout));
+                         const_cast<MCAsmLayout&>(Layout),
+                         SectionIndexMap);
+
+  // Update to include the metadata sections.
+  ComputeIndexMap(Asm, SectionIndexMap);
 
   // Add 1 for the null section.
   unsigned NumSections = Asm.size() + 1;
+  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
+  uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+                                    sizeof(ELF::Elf32_Ehdr);
+  uint64_t FileOff = HeaderSize;
+
+  std::vector<const MCSectionELF*> Sections;
+  Sections.resize(NumSections);
+
+  for (SectionIndexMapTy::const_iterator i=
+         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+    Sections[p.second] = p.first;
+  }
 
-  uint64_t SectionDataSize = 0;
+  for (unsigned i = 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
 
-  for (MCAssembler::const_iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionData &SD = *it;
+    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
 
     // Get the size of the section in the output file (including padding).
-    uint64_t Size = Layout.getSectionFileSize(&SD);
-    SectionDataSize += Size;
+    FileOff += GetSectionFileSize(Layout, SD);
   }
 
+  FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
+
   // Write out the ELF header ...
-  WriteHeader(SectionDataSize, NumSections);
-  FileOff = Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr);
+  WriteHeader(FileOff - HeaderSize, NumSections);
+
+  FileOff = HeaderSize;
 
   // ... then all of the sections ...
   DenseMap<const MCSection*, uint64_t> SectionOffsetMap;
 
-  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+  for (unsigned i = 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
 
-  unsigned Index = 1;
-  for (MCAssembler::const_iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    // Remember the offset into the file for this section.
-    SectionOffsetMap[&it->getSection()] = FileOff;
+    uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+    WriteZeros(Padding);
+    FileOff += Padding;
 
-    SectionIndexMap[&it->getSection()] = Index++;
+    // Remember the offset into the file for this section.
+    SectionOffsetMap[&Section] = FileOff;
 
-    const MCSectionData &SD = *it;
-    FileOff += Layout.getSectionFileSize(&SD);
+    FileOff += GetSectionFileSize(Layout, SD);
 
-    Asm.WriteSectionData(it, Layout, Writer);
+    if (IsELFMetaDataSection(SD))
+      WriteDataSectionData(this, SD);
+    else
+      Asm.WriteSectionData(&SD, Layout);
   }
 
+  uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+  WriteZeros(Padding);
+  FileOff += Padding;
+
   // ... and then the section header table.
   // Should we align the section header table?
   //
   // Null section first.
-  WriteSecHdrEntry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+  uint64_t FirstSectionSize =
+    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+  uint32_t FirstSectionLink =
+    ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+  for (unsigned i = 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+    uint32_t GroupSymbolIndex;
+    if (Section.getType() != ELF::SHT_GROUP)
+      GroupSymbolIndex = 0;
+    else
+      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, GroupMap[&Section]);
 
-  for (MCAssembler::const_iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionData &SD = *it;
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF&>(SD.getSection());
+    uint64_t Size = GetSectionAddressSize(Layout, SD);
 
-    uint64_t sh_link = 0;
-    uint64_t sh_info = 0;
+    WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+                 SectionOffsetMap[&Section], Size,
+                 SD.getAlignment(), Section);
+  }
+}
 
-    switch(Section.getType()) {
-    case ELF::SHT_DYNAMIC:
-      sh_link = SectionStringTableIndex[&it->getSection()];
-      sh_info = 0;
-      break;
+MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                            raw_ostream &OS,
+                                            bool IsLittleEndian) {
+  switch (MOTW->getEMachine()) {
+    case ELF::EM_386:
+    case ELF::EM_X86_64:
+      return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    case ELF::EM_ARM:
+      return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    case ELF::EM_MBLAZE:
+      return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    default: llvm_unreachable("Unsupported architecture"); break;
+  }
+}
+
+
+/// START OF SUBCLASSES for ELFObjectWriter
+//===- ARMELFObjectWriter -------------------------------------------===//
+
+ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                       raw_ostream &_OS,
+                                       bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+ARMELFObjectWriter::~ARMELFObjectWriter()
+{}
 
-    case ELF::SHT_REL:
-    case ELF::SHT_RELA: {
-      const MCSection *SymtabSection;
-      const MCSection *InfoSection;
-
-      SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
-                                                     SectionKind::getReadOnly(),
-                                                     false);
-      sh_link = SectionIndexMap[SymtabSection];
-
-      // Remove ".rel" and ".rela" prefixes.
-      unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
-      StringRef SectionName = Section.getSectionName().substr(SecNameLen);
-
-      InfoSection = Asm.getContext().getELFSection(SectionName,
-                                                   ELF::SHT_PROGBITS, 0,
-                                                   SectionKind::getReadOnly(),
-                                                   false);
-      sh_info = SectionIndexMap[InfoSection];
+// FIXME: get the real EABI Version from the Triple.
+void ARMELFObjectWriter::WriteEFlags() {
+  Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is a first-cut approximation of what ARM/gcc does.
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                   const MCValue &Target,
+                                                   const MCFragment &F,
+                                                   bool IsBSS) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+  bool EmitThisSym = false;
+
+  if (IsBSS) {
+    EmitThisSym = StringSwitch<bool>(Symbol.getName())
+      .Case("_MergedGlobals", true)
+      .Default(false);
+  } else {
+    EmitThisSym = StringSwitch<bool>(Symbol.getName())
+      .Case("_MergedGlobals", true)
+      .StartsWith(".L.str", true)
+      .Default(false);
+  }
+  if (EmitThisSym)
+    return &Symbol;
+  if (! Symbol.isTemporary())
+    return &Symbol;
+  return NULL;
+}
+
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) {
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+  unsigned Type = 0;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default: assert(0 && "Unimplemented");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_BASE_PREL;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        assert(0 && "unimplemented");
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_uncondbranch:
+      switch (Modifier) {
+      case MCSymbolRefExpr::VK_ARM_PLT:
+        Type = ELF::R_ARM_PLT32;
+        break;
+      default:
+        Type = ELF::R_ARM_CALL;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+      Type = ELF::R_ARM_MOVT_PREL;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+      Type = ELF::R_ARM_MOVW_PREL_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      Type = ELF::R_ARM_THM_MOVT_PREL;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      Type = ELF::R_ARM_THM_MOVW_PREL_NC;
       break;
     }
-
-    case ELF::SHT_SYMTAB:
-    case ELF::SHT_DYNSYM:
-      sh_link = StringTableIndex;
-      sh_info = LastLocalSymbolIndex;
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier"); break;
+      case MCSymbolRefExpr::VK_ARM_GOT:
+        Type = ELF::R_ARM_GOT_BREL;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        Type = ELF::R_ARM_TLS_GD32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TPOFF:
+        Type = ELF::R_ARM_TLS_LE32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_ABS32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTOFF:
+        Type = ELF::R_ARM_GOTOFF32;
+        break;
+      }
       break;
-
-    case ELF::SHT_PROGBITS:
-    case ELF::SHT_STRTAB:
-    case ELF::SHT_NOBITS:
-    case ELF::SHT_NULL:
-      // Nothing to do.
+    case ARM::fixup_arm_ldst_pcrel_12:
+    case ARM::fixup_arm_pcrel_10:
+    case ARM::fixup_arm_adr_pcrel_12:
+    case ARM::fixup_arm_thumb_bl:
+    case ARM::fixup_arm_thumb_cb:
+    case ARM::fixup_arm_thumb_cp:
+    case ARM::fixup_arm_thumb_br:
+      assert(0 && "Unimplemented");
       break;
-
-    case ELF::SHT_HASH:
-    case ELF::SHT_GROUP:
-    case ELF::SHT_SYMTAB_SHNDX:
-    default:
-      assert(0 && "FIXME: sh_type value not supported!");
+    case ARM::fixup_arm_uncondbranch:
+      Type = ELF::R_ARM_CALL;
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+      Type = ELF::R_ARM_MOVT_ABS;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+      Type = ELF::R_ARM_MOVW_ABS_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+      Type = ELF::R_ARM_THM_MOVT_ABS;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+      Type = ELF::R_ARM_THM_MOVW_ABS_NC;
       break;
     }
-
-    WriteSecHdrEntry(SectionStringTableIndex[&it->getSection()],
-                     Section.getType(), Section.getFlags(),
-                     Layout.getSectionAddress(&SD),
-                     SectionOffsetMap.lookup(&SD.getSection()),
-                     Layout.getSectionSize(&SD), sh_link,
-                     sh_info, SD.getAlignment(),
-                     Section.getEntrySize());
   }
-}
 
-ELFObjectWriter::ELFObjectWriter(raw_ostream &OS,
-                                 bool Is64Bit,
-                                 bool IsLittleEndian,
-                                 bool HasRelocationAddend)
-  : MCObjectWriter(OS, IsLittleEndian)
-{
-  Impl = new ELFObjectWriterImpl(this, Is64Bit, HasRelocationAddend);
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
+
+  return Type;
 }
 
-ELFObjectWriter::~ELFObjectWriter() {
-  delete (ELFObjectWriterImpl*) Impl;
+//===- MBlazeELFObjectWriter -------------------------------------------===//
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                             raw_ostream &_OS,
+                                             bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
 }
 
-void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
-  ((ELFObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
 }
 
-void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
-                                       const MCAsmLayout &Layout,
-                                       const MCFragment *Fragment,
-                                       const MCFixup &Fixup, MCValue Target,
-                                       uint64_t &FixedValue) {
-  ((ELFObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
-                                                  Target, FixedValue);
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+                                             const MCFixup &Fixup,
+                                             bool IsPCRel,
+                                             bool IsRelocWithSymbol,
+                                             int64_t Addend) {
+  // determine the type of the relocation
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented");
+    case FK_PCRel_4:
+      Type = ELF::R_MICROBLAZE_64_PCREL;
+      break;
+    case FK_PCRel_2:
+      Type = ELF::R_MICROBLAZE_32_PCREL;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      Type = ((IsRelocWithSymbol || Addend !=0)
+              ? ELF::R_MICROBLAZE_32
+              : ELF::R_MICROBLAZE_64);
+      break;
+    case FK_Data_2:
+      Type = ELF::R_MICROBLAZE_32;
+      break;
+    }
+  }
+  return Type;
 }
 
-void ELFObjectWriter::WriteObject(const MCAssembler &Asm,
-                                  const MCAsmLayout &Layout) {
-  ((ELFObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
+//===- X86ELFObjectWriter -------------------------------------------===//
+
+
+X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                       raw_ostream &_OS,
+                                       bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) {
+  // determine the type of the relocation
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+  unsigned Type;
+  if (is64Bit()) {
+    if (IsPCRel) {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+      case FK_PCRel_8:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC64;
+        break;
+      case X86::reloc_signed_4byte:
+      case X86::reloc_riprel_4byte_movq_load:
+      case FK_Data_4: // FIXME?
+      case X86::reloc_riprel_4byte:
+      case FK_PCRel_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_PC32;
+          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_X86_64_PLT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_GOTTPOFF:
+          Type = ELF::R_X86_64_GOTTPOFF;
+        break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_X86_64_TLSGD;
+          break;
+        case MCSymbolRefExpr::VK_TLSLD:
+          Type = ELF::R_X86_64_TLSLD;
+          break;
+        }
+        break;
+      case FK_PCRel_2:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC16;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+      case FK_Data_8: Type = ELF::R_X86_64_64; break;
+      case X86::reloc_signed_4byte:
+        assert(isInt<32>(Target.getConstant()));
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_32S;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_X86_64_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_X86_64_TPOFF32;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_X86_64_DTPOFF32;
+          break;
+        }
+        break;
+      case FK_Data_4:
+        Type = ELF::R_X86_64_32;
+        break;
+      case FK_Data_2: Type = ELF::R_X86_64_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_X86_64_8; break;
+      }
+    }
+  } else {
+    if (IsPCRel) {
+      switch (Modifier) {
+      default:
+        llvm_unreachable("Unimplemented");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_386_PC32;
+        break;
+      case MCSymbolRefExpr::VK_PLT:
+        Type = ELF::R_386_PLT32;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case X86::reloc_global_offset_table:
+        Type = ELF::R_386_GOTPC;
+        break;
+
+      // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+      // instead?
+      case X86::reloc_signed_4byte:
+      case FK_PCRel_4:
+      case FK_Data_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_386_32;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_386_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTOFF:
+          Type = ELF::R_386_GOTOFF;
+          break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_386_TLS_GD;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_386_TLS_LE_32;
+          break;
+        case MCSymbolRefExpr::VK_INDNTPOFF:
+          Type = ELF::R_386_TLS_IE;
+          break;
+        case MCSymbolRefExpr::VK_NTPOFF:
+          Type = ELF::R_386_TLS_LE;
+          break;
+        case MCSymbolRefExpr::VK_GOTNTPOFF:
+          Type = ELF::R_386_TLS_GOTIE;
+          break;
+        case MCSymbolRefExpr::VK_TLSLDM:
+          Type = ELF::R_386_TLS_LDM;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_386_TLS_LDO_32;
+          break;
+        }
+        break;
+      case FK_Data_2: Type = ELF::R_386_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_386_8; break;
+      }
+    }
+  }
+
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
+
+  return Type;
 }
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 670b2e9b292a..cc1afbd08926 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <cctype>
 #include <cstring>
 using namespace llvm;
@@ -23,11 +23,13 @@ MCAsmInfo::MCAsmInfo() {
   HasMachoZeroFillDirective = false;
   HasMachoTBSSDirective = false;
   HasStaticCtorDtorReferenceInStaticMode = false;
+  LinkerRequiresNonEmptyDwarfLines = false;
   MaxInstLength = 4;
   PCSymbol = "$";
   SeparatorChar = ';';
   CommentColumn = 40;
   CommentString = "#";
+  LabelSuffix = ":";
   GlobalPrefix = "";
   PrivateGlobalPrefix = ".";
   LinkerPrivateGlobalPrefix = "";
@@ -52,18 +54,19 @@ MCAsmInfo::MCAsmInfo() {
   GPRel32Directive = 0;
   GlobalDirective = "\t.globl\t";
   HasSetDirective = true;
+  HasAggressiveSymbolFolding = true;
   HasLCOMMDirective = false;
   COMMDirectiveAlignmentIsInBytes = true;
   HasDotTypeDotSizeDirective = true;
   HasSingleParameterDotFile = true;
   HasNoDeadStrip = false;
+  HasSymbolResolver = false;
   WeakRefDirective = 0;
   WeakDefDirective = 0;
   LinkOnceDirective = 0;
   HiddenVisibilityAttr = MCSA_Hidden;
   ProtectedVisibilityAttr = MCSA_Protected;
   HasLEB128 = false;
-  HasDotLocAndDotFile = false;
   SupportsDebugInformation = false;
   ExceptionsType = ExceptionHandling::None;
   DwarfRequiresFrameSection = true;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index e0e261a63c70..13776f04437d 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -37,13 +37,20 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   HasMachoZeroFillDirective = true;  // Uses .zerofill
   HasMachoTBSSDirective = true; // Uses .tbss
   HasStaticCtorDtorReferenceInStaticMode = true;
-  
+
+  // FIXME: Darwin 10 and newer don't need this.
+  LinkerRequiresNonEmptyDwarfLines = true;
+
+  // FIXME: Change this once MC is the system assembler.
+  HasAggressiveSymbolFolding = false;
+
   HiddenVisibilityAttr = MCSA_PrivateExtern;
   // Doesn't support protected visibility.
   ProtectedVisibilityAttr = MCSA_Global;
   
   HasDotTypeDotSizeDirective = false;
   HasNoDeadStrip = true;
+  HasSymbolResolver = true;
 
   DwarfUsesAbsoluteLabelForStmtList = false;
   DwarfUsesLabelOffsetForRanges = false;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 1cc8fb0b5486..8d0698216f60 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -23,6 +24,10 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cctype>
 using namespace llvm;
 
 namespace {
@@ -32,29 +37,33 @@ class MCAsmStreamer : public MCStreamer {
   const MCAsmInfo &MAI;
   OwningPtr<MCInstPrinter> InstPrinter;
   OwningPtr<MCCodeEmitter> Emitter;
-  
+  OwningPtr<TargetAsmBackend> AsmBackend;
+
   SmallString<128> CommentToEmit;
   raw_svector_ostream CommentStream;
 
-  unsigned IsLittleEndian : 1;
   unsigned IsVerboseAsm : 1;
   unsigned ShowInst : 1;
+  unsigned UseLoc : 1;
+
+  bool needsSet(const MCExpr *Value);
 
 public:
   MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
-                bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer,
-                MCCodeEmitter *emitter, bool showInst)
+                bool isVerboseAsm,
+                bool useLoc,
+                MCInstPrinter *printer, MCCodeEmitter *emitter,
+                TargetAsmBackend *asmbackend,
+                bool showInst)
     : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
-      InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
-      IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm),
-      ShowInst(showInst) {
+      InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+      CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst), UseLoc(useLoc) {
     if (InstPrinter && IsVerboseAsm)
       InstPrinter->setCommentStream(CommentStream);
   }
   ~MCAsmStreamer() {}
 
-  bool isLittleEndian() const { return IsLittleEndian; }
-
   inline void EmitEOL() {
     // If we don't have any comments, just emit a \n.
     if (!IsVerboseAsm) {
@@ -68,7 +77,7 @@ public:
   /// isVerboseAsm - Return true if this streamer supports verbose assembly at
   /// all.
   virtual bool isVerboseAsm() const { return IsVerboseAsm; }
-  
+
   /// hasRawTextSupport - We support EmitRawText.
   virtual bool hasRawTextSupport() const { return true; }
 
@@ -98,13 +107,26 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
-  virtual void SwitchSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section);
+
+  virtual void InitSections() {
+    // FIXME, this is MachO specific, but the testsuite
+    // expects this.
+    SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                         MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                         0, SectionKind::getText()));
+  }
 
   virtual void EmitLabel(MCSymbol *Symbol);
 
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
 
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label);
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
 
@@ -122,19 +144,26 @@ public:
   /// @param Symbol - The common symbol to emit.
   /// @param Size - The size of the common symbol.
   virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
-  
+
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0);
 
   virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
                                uint64_t Size, unsigned ByteAlignment = 0);
-                               
+
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
 
-  virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
-  virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace);
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace);
+  virtual void EmitIntValue(uint64_t Value, unsigned Size,
+                            unsigned AddrSpace = 0);
+
+  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+
+  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+
   virtual void EmitGPRel32Value(const MCExpr *Value);
-  
+
 
   virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
                         unsigned AddrSpace);
@@ -150,17 +179,28 @@ public:
                                  unsigned char Value = 0);
 
   virtual void EmitFileDirective(StringRef Filename);
-  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                     unsigned Column, unsigned Flags,
+                                     unsigned Isa, unsigned Discriminator);
+
+  virtual bool EmitCFIStartProc();
+  virtual bool EmitCFIEndProc();
+  virtual bool EmitCFIDefCfaOffset(int64_t Offset);
+  virtual bool EmitCFIDefCfaRegister(int64_t Register);
+  virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
+  virtual bool EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+  virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
 
   virtual void EmitInstruction(const MCInst &Inst);
-  
-  /// EmitRawText - If this file is backed by a assembly streamer, this dumps
+
+  /// EmitRawText - If this file is backed by an assembly streamer, this dumps
   /// the specified string in the output .s file.  This capability is
   /// indicated by the hasRawTextSupport() predicate.
   virtual void EmitRawText(StringRef String);
-  
+
   virtual void Finish();
-  
+
   /// @}
 };
 
@@ -172,14 +212,14 @@ public:
 /// verbose assembly output is enabled.
 void MCAsmStreamer::AddComment(const Twine &T) {
   if (!IsVerboseAsm) return;
-  
+
   // Make sure that CommentStream is flushed.
   CommentStream.flush();
-  
+
   T.toVector(CommentToEmit);
   // Each comment goes on its own line.
   CommentToEmit.push_back('\n');
-  
+
   // Tell the comment stream that the vector changed underneath it.
   CommentStream.resync();
 }
@@ -189,10 +229,10 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
     OS << '\n';
     return;
   }
-  
+
   CommentStream.flush();
   StringRef Comments = CommentToEmit.str();
-  
+
   assert(Comments.back() == '\n' &&
          "Comment array not newline terminated");
   do {
@@ -200,10 +240,10 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
     OS.PadToColumn(MAI.getCommentColumn());
     size_t Position = Comments.find('\n');
     OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
-    
+
     Comments = Comments.substr(Position+1);
   } while (!Comments.empty());
-  
+
   CommentToEmit.clear();
   // Tell the comment stream that the vector changed underneath it.
   CommentStream.resync();
@@ -214,33 +254,41 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
   return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
 }
 
-void MCAsmStreamer::SwitchSection(const MCSection *Section) {
+void MCAsmStreamer::ChangeSection(const MCSection *Section) {
   assert(Section && "Cannot switch to a null section!");
-  if (Section != CurSection) {
-    PrevSection = CurSection;
-    CurSection = Section;
-    Section->PrintSwitchToSection(MAI, OS);
-  }
+  Section->PrintSwitchToSection(MAI, OS);
 }
 
 void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(CurSection && "Cannot emit before setting section!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
 
-  OS << *Symbol << ":";
+  OS << *Symbol << MAI.getLabelSuffix();
   EmitEOL();
-  Symbol->setSection(*CurSection);
+  Symbol->setSection(*getCurrentSection());
 }
 
 void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   switch (Flag) {
   default: assert(0 && "Invalid flag!");
+  case MCAF_SyntaxUnified:         OS << "\t.syntax unified"; break;
   case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
+  case MCAF_Code16:                OS << "\t.code\t16"; break;
+  case MCAF_Code32:                OS << "\t.code\t32"; break;
   }
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
+  // This needs to emit to a temporary string to get properly quoted
+  // MCSymbols when they have spaces in them.
+  OS << "\t.thumb_func";
+  if (Func)
+    OS << '\t' << *Func;
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
   OS << *Symbol << " = " << *Value;
   EmitEOL();
@@ -249,6 +297,18 @@ void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
   Symbol->setVariableValue(Value);
 }
 
+void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+  OS << ".weakref " << *Alias << ", " << *Symbol;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                             const MCSymbol *LastLabel,
+                                             const MCSymbol *Label) {
+  EmitDwarfSetLineAddr(LineDelta, Label,
+                       getContext().getTargetAsmInfo().getPointerSize());
+}
+
 void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                         MCSymbolAttr Attribute) {
   switch (Attribute) {
@@ -259,6 +319,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_ELF_TypeTLS:         /// .type _foo, STT_TLS     # aka @tls_object
   case MCSA_ELF_TypeCommon:      /// .type _foo, STT_COMMON  # aka @common
   case MCSA_ELF_TypeNoType:      /// .type _foo, STT_NOTYPE  # aka @notype
+  case MCSA_ELF_TypeGnuUniqueObject:  /// .type _foo, @gnu_unique_object
     assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
     OS << "\t.type\t" << *Symbol << ','
        << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
@@ -270,6 +331,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     case MCSA_ELF_TypeTLS:         OS << "tls_object"; break;
     case MCSA_ELF_TypeCommon:      OS << "common"; break;
     case MCSA_ELF_TypeNoType:      OS << "no_type"; break;
+    case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
     }
     EmitEOL();
     return;
@@ -282,6 +344,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_LazyReference:  OS << "\t.lazy_reference\t";  break;
   case MCSA_Local:          OS << "\t.local\t";           break;
   case MCSA_NoDeadStrip:    OS << "\t.no_dead_strip\t";   break;
+  case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
   case MCSA_PrivateExtern:  OS << "\t.private_extern\t";  break;
   case MCSA_Protected:      OS << "\t.protected\t";       break;
   case MCSA_Reference:      OS << "\t.reference\t";       break;
@@ -352,11 +415,11 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
                                  unsigned Size, unsigned ByteAlignment) {
   // Note: a .zerofill directive does not switch sections.
   OS << ".zerofill ";
-  
+
   // This is a mach-o specific directive.
   const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
   OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
-  
+
   if (Symbol != NULL) {
     OS << ',' << *Symbol << ',' << Size;
     if (ByteAlignment != 0)
@@ -374,11 +437,11 @@ void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
   // Instead of using the Section we'll just use the shortcut.
   // This is a mach-o specific directive and section.
   OS << ".tbss " << *Symbol << ", " << Size;
-  
+
   // Output align if we have it.  We default to 1 so don't bother printing
   // that.
   if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
-  
+
   EmitEOL();
 }
 
@@ -386,19 +449,19 @@ static inline char toOctal(int X) { return (X&7)+'0'; }
 
 static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
   OS << '"';
-  
+
   for (unsigned i = 0, e = Data.size(); i != e; ++i) {
     unsigned char C = Data[i];
     if (C == '"' || C == '\\') {
       OS << '\\' << (char)C;
       continue;
     }
-    
+
     if (isprint((unsigned char)C)) {
       OS << (char)C;
       continue;
     }
-    
+
     switch (C) {
       case '\b': OS << "\\b"; break;
       case '\f': OS << "\\f"; break;
@@ -413,15 +476,15 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
         break;
     }
   }
-  
+
   OS << '"';
 }
 
 
 void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
-  assert(CurSection && "Cannot emit contents before setting section!");
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
   if (Data.empty()) return;
-  
+
   if (Data.size() == 1) {
     OS << MAI.getData8bitsDirective(AddrSpace);
     OS << (unsigned)(unsigned char)Data[0];
@@ -443,11 +506,15 @@ void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
   EmitEOL();
 }
 
-/// EmitIntValue - Special case of EmitValue that avoids the client having
-/// to pass in a MCExpr for constant integers.
 void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
                                  unsigned AddrSpace) {
-  assert(CurSection && "Cannot emit contents before setting section!");
+  EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                  bool isPCRel, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(!isPCRel && "Cannot emit pc relative relocations!");
   const char *Directive = 0;
   switch (Size) {
   default: break;
@@ -458,35 +525,43 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
     Directive = MAI.getData64bitsDirective(AddrSpace);
     // If the target doesn't support 64-bit data, emit as two 32-bit halves.
     if (Directive) break;
-    if (isLittleEndian()) {
-      EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace);
-      EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace);
+    int64_t IntValue;
+    if (!Value->EvaluateAsAbsolute(IntValue))
+      report_fatal_error("Don't know how to emit this value.");
+    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
     } else {
-      EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace);
-      EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
     }
     return;
   }
-  
+
   assert(Directive && "Invalid size for machine code value!");
-  OS << Directive << truncateToSize(Value, Size);
+  OS << Directive << *Value;
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size,
-                              unsigned AddrSpace) {
-  assert(CurSection && "Cannot emit contents before setting section!");
-  const char *Directive = 0;
-  switch (Size) {
-  default: break;
-  case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
-  case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
-  case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
-  case 8: Directive = MAI.getData64bitsDirective(AddrSpace); break;
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue)) {
+    EmitULEB128IntValue(IntValue, AddrSpace);
+    return;
   }
-  
-  assert(Directive && "Invalid size for machine code value!");
-  OS << Directive << *Value;
+  assert(MAI.hasLEB128() && "Cannot print a .uleb");
+  OS << ".uleb128 " << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue)) {
+    EmitSLEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  assert(MAI.hasLEB128() && "Cannot print a .sleb");
+  OS << ".sleb128 " << *Value;
   EmitEOL();
 }
 
@@ -502,7 +577,7 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
 void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
                              unsigned AddrSpace) {
   if (NumBytes == 0) return;
-  
+
   if (AddrSpace == 0)
     if (const char *ZeroDirective = MAI.getZeroDirective()) {
       OS << ZeroDirective << NumBytes;
@@ -530,7 +605,7 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
     case 4: OS << ".p2alignl "; break;
     case 8: llvm_unreachable("Unsupported alignment size!");
     }
-    
+
     if (MAI.getAlignmentIsInBytes())
       OS << ByteAlignment;
     else
@@ -540,13 +615,13 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
       OS << ", 0x";
       OS.write_hex(truncateToSize(Value, ValueSize));
 
-      if (MaxBytesToEmit) 
+      if (MaxBytesToEmit)
         OS << ", " << MaxBytesToEmit;
     }
     EmitEOL();
     return;
   }
-  
+
   // Non-power of two alignment.  This is not widely supported by assemblers.
   // FIXME: Parameterize this based on MAI.
   switch (ValueSize) {
@@ -559,7 +634,7 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
 
   OS << ' ' << ByteAlignment;
   OS << ", " << truncateToSize(Value, ValueSize);
-  if (MaxBytesToEmit) 
+  if (MaxBytesToEmit)
     OS << ", " << MaxBytesToEmit;
   EmitEOL();
 }
@@ -586,10 +661,118 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
-  OS << "\t.file\t" << FileNo << ' ';
-  PrintQuotedString(Filename, OS);
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+  if (UseLoc) {
+    OS << "\t.file\t" << FileNo << ' ';
+    PrintQuotedString(Filename, OS);
+    EmitEOL();
+  }
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                          unsigned Column, unsigned Flags,
+                                          unsigned Isa,
+                                          unsigned Discriminator) {
+  this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+                                          Isa, Discriminator);
+  if (!UseLoc)
+    return;
+
+  OS << "\t.loc\t" << FileNo << " " << Line << " " << Column;
+  if (Flags & DWARF2_FLAG_BASIC_BLOCK)
+    OS << " basic_block";
+  if (Flags & DWARF2_FLAG_PROLOGUE_END)
+    OS << " prologue_end";
+  if (Flags & DWARF2_FLAG_EPILOGUE_BEGIN)
+    OS << " epilogue_begin";
+
+  unsigned OldFlags = getContext().getCurrentDwarfLoc().getFlags();
+  if ((Flags & DWARF2_FLAG_IS_STMT) != (OldFlags & DWARF2_FLAG_IS_STMT)) {
+    OS << " is_stmt ";
+
+    if (Flags & DWARF2_FLAG_IS_STMT)
+      OS << "1";
+    else
+      OS << "0";
+  }
+
+  if (Isa)
+    OS << "isa " << Isa;
+  if (Discriminator)
+    OS << "discriminator " << Discriminator;
+  EmitEOL();
+}
+
+bool MCAsmStreamer::EmitCFIStartProc() {
+  if (this->MCStreamer::EmitCFIStartProc())
+    return true;
+
+  OS << "\t.cfi_startproc";
   EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIEndProc() {
+  if (this->MCStreamer::EmitCFIEndProc())
+    return true;
+
+  OS << "\t.cfi_endproc";
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  if (this->MCStreamer::EmitCFIDefCfaOffset(Offset))
+    return true;
+
+  OS << "\t.cfi_def_cfa_offset " << Offset;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  if (this->MCStreamer::EmitCFIDefCfaRegister(Register))
+    return true;
+
+  OS << "\t.cfi_def_cfa_register " << Register;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  if (this->MCStreamer::EmitCFIOffset(Register, Offset))
+    return true;
+
+  OS << "\t.cfi_offset " << Register << ", " << Offset;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+                                       unsigned Encoding) {
+  if (this->MCStreamer::EmitCFIPersonality(Sym, Encoding))
+    return true;
+
+  OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  if (this->MCStreamer::EmitCFILsda(Sym, Encoding))
+    return true;
+
+  OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+  EmitEOL();
+
+  return false;
 }
 
 void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
@@ -610,7 +793,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     MCFixup &F = Fixups[i];
-    const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+    const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
     for (unsigned j = 0; j != Info.TargetSize; ++j) {
       unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
       assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
@@ -618,6 +801,8 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
     }
   }
 
+  // FIXME: Node the fixup comments for Thumb2 are completely bogus since the
+  // high order halfword of a 32-bit Thumb2 instruction is emitted first.
   OS << "encoding: [";
   for (unsigned i = 0, e = Code.size(); i != e; ++i) {
     if (i)
@@ -637,15 +822,26 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
       if (MapEntry == 0) {
         OS << format("0x%02x", uint8_t(Code[i]));
       } else {
-        assert(Code[i] == 0 && "Encoder wrote into fixed up bit!");
-        OS << char('A' + MapEntry - 1);
+        if (Code[i]) {
+          // FIXME: Some of the 8 bits require fix up.
+          OS << format("0x%02x", uint8_t(Code[i])) << '\''
+             << char('A' + MapEntry - 1) << '\'';
+        } else
+          OS << char('A' + MapEntry - 1);
       }
     } else {
       // Otherwise, write out in binary.
       OS << "0b";
       for (unsigned j = 8; j--;) {
         unsigned Bit = (Code[i] >> j) & 1;
-        if (uint8_t MapEntry = FixupMap[i * 8 + j]) {
+        
+        unsigned FixupBit;
+        if (getContext().getTargetAsmInfo().isLittleEndian())
+          FixupBit = i * 8 + j;
+        else
+          FixupBit = i * 8 + (7-j);
+        
+        if (uint8_t MapEntry = FixupMap[FixupBit]) {
           assert(Bit == 0 && "Encoder wrote into fixed up bit!");
           OS << char('A' + MapEntry - 1);
         } else
@@ -657,14 +853,17 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     MCFixup &F = Fixups[i];
-    const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+    const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
     OS << "  fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
        << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
   }
 }
 
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
-  assert(CurSection && "Cannot emit contents before setting section!");
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+  if (!UseLoc)
+    MCLineEntry::Make(this, getCurrentSection());
 
   // Show the encoding in a comment if we have a code emitter.
   if (Emitter)
@@ -684,7 +883,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   EmitEOL();
 }
 
-/// EmitRawText - If this file is backed by a assembly streamer, this dumps
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
 void MCAsmStreamer::EmitRawText(StringRef String) {
@@ -695,13 +894,16 @@ void MCAsmStreamer::EmitRawText(StringRef String) {
 }
 
 void MCAsmStreamer::Finish() {
+  // Dump out the dwarf file & directory tables and line tables.
+  if (getContext().hasDwarfFiles() && !UseLoc)
+    MCDwarfFileTable::Emit(this);
 }
 
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
-                                    bool isLittleEndian,
-                                    bool isVerboseAsm, MCInstPrinter *IP,
-                                    MCCodeEmitter *CE, bool ShowInst) {
-  return new MCAsmStreamer(Context, OS, isLittleEndian, isVerboseAsm,
-                           IP, CE, ShowInst);
+                                    bool isVerboseAsm, bool useLoc,
+                                    MCInstPrinter *IP, MCCodeEmitter *CE,
+                                    TargetAsmBackend *TAB, bool ShowInst) {
+  return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+                           IP, CE, TAB, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index f0e1d7fbc21c..999264604224 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -11,10 +11,13 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -36,7 +39,6 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts");
 STATISTIC(ObjectBytes, "Number of emitted object file bytes");
 STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
 STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
-STATISTIC(SectionLayouts, "Number of section layouts");
 }
 }
 
@@ -48,131 +50,78 @@ STATISTIC(SectionLayouts, "Number of section layouts");
 /* *** */
 
 MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
-  : Assembler(Asm), LastValidFragment(0)
+  : Assembler(Asm), LastValidFragment()
  {
   // Compute the section layout order. Virtual sections must go last.
   for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-    if (!Asm.getBackend().isVirtualSection(it->getSection()))
+    if (!it->getSection().isVirtualSection())
       SectionOrder.push_back(&*it);
   for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-    if (Asm.getBackend().isVirtualSection(it->getSection()))
+    if (it->getSection().isVirtualSection())
       SectionOrder.push_back(&*it);
 }
 
-bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const {
-  // The first section is always up-to-date.
-  unsigned Index = SD->getLayoutOrder();
-  if (!Index)
-    return true;
-
-  // Otherwise, sections are always implicitly computed when the preceeding
-  // fragment is layed out.
-  const MCSectionData *Prev = getSectionOrder()[Index - 1];
-  return isFragmentUpToDate(&(Prev->getFragmentList().back()));
-}
-
 bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
-  return (LastValidFragment &&
-          F->getLayoutOrder() <= LastValidFragment->getLayoutOrder());
+  const MCSectionData &SD = *F->getParent();
+  const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+  if (!LastValid)
+    return false;
+  assert(LastValid->getParent() == F->getParent());
+  return F->getLayoutOrder() <= LastValid->getLayoutOrder();
 }
 
-void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
+void MCAsmLayout::Invalidate(MCFragment *F) {
   // If this fragment wasn't already up-to-date, we don't need to do anything.
   if (!isFragmentUpToDate(F))
     return;
 
-  // Otherwise, reset the last valid fragment to the predecessor of the
-  // invalidated fragment.
-  LastValidFragment = F->getPrevNode();
-  if (!LastValidFragment) {
-    unsigned Index = F->getParent()->getLayoutOrder();
-    if (Index != 0) {
-      MCSectionData *Prev = getSectionOrder()[Index - 1];
-      LastValidFragment = &(Prev->getFragmentList().back());
-    }
-  }
+  // Otherwise, reset the last valid fragment to this fragment.
+  const MCSectionData &SD = *F->getParent();
+  LastValidFragment[&SD] = F;
 }
 
 void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+  MCSectionData &SD = *F->getParent();
+
+  MCFragment *Cur = LastValidFragment[&SD];
+  if (!Cur)
+    Cur = &*SD.begin();
+  else
+    Cur = Cur->getNextNode();
+
   // Advance the layout position until the fragment is up-to-date.
   while (!isFragmentUpToDate(F)) {
-    // Advance to the next fragment.
-    MCFragment *Cur = LastValidFragment;
-    if (Cur)
-      Cur = Cur->getNextNode();
-    if (!Cur) {
-      unsigned NextIndex = 0;
-      if (LastValidFragment)
-        NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1;
-      Cur = SectionOrder[NextIndex]->begin();
-    }
-
     const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
+    Cur = Cur->getNextNode();
   }
 }
 
-void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) {
-  if (LastValidFragment == Src)
-    LastValidFragment = Dst;
-
-  Dst->Offset = Src->Offset;
-  Dst->EffectiveSize = Src->EffectiveSize;
-}
-
-uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const {
-  assert(F->getParent() && "Missing section()!");
-  return getSectionAddress(F->getParent()) + getFragmentOffset(F);
-}
-
-uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const {
-  EnsureValid(F);
-  assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!");
-  return F->EffectiveSize;
-}
-
 uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
   EnsureValid(F);
   assert(F->Offset != ~UINT64_C(0) && "Address not set!");
   return F->Offset;
 }
 
-uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const {
-  assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!");
-  return getFragmentAddress(SD->getFragment()) + SD->getOffset();
-}
-
-uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const {
-  EnsureValid(SD->begin());
-  assert(SD->Address != ~UINT64_C(0) && "Address not set!");
-  return SD->Address;
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+  assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
+  return getFragmentOffset(SD->getFragment()) + SD->getOffset();
 }
 
 uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
   // The size is the last fragment's end offset.
   const MCFragment &F = SD->getFragmentList().back();
-  return getFragmentOffset(&F) + getFragmentEffectiveSize(&F);
+  return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
 }
 
 uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
   // Virtual sections have no file size.
-  if (getAssembler().getBackend().isVirtualSection(SD->getSection()))
+  if (SD->getSection().isVirtualSection())
     return 0;
 
   // Otherwise, the file size is the same as the address space size.
   return getSectionAddressSize(SD);
 }
 
-uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
-  // The logical size is the address space size minus any tail padding.
-  uint64_t Size = getSectionAddressSize(SD);
-  const MCAlignFragment *AF =
-    dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back()));
-  if (AF && AF->hasOnlyAlignAddress())
-    Size -= getFragmentEffectiveSize(AF);
-
-  return Size;
-}
-
 /* *** */
 
 MCFragment::MCFragment() : Kind(FragmentType(~0)) {
@@ -182,8 +131,7 @@ MCFragment::~MCFragment() {
 }
 
 MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
-  : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)),
-    EffectiveSize(~UINT64_C(0))
+  : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
 {
   if (Parent)
     Parent->getFragmentList().push_back(this);
@@ -195,8 +143,8 @@ MCSectionData::MCSectionData() : Section(0) {}
 
 MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
   : Section(&_Section),
+    Ordinal(~UINT32_C(0)),
     Alignment(1),
-    Address(~UINT64_C(0)),
     HasInstructions(false)
 {
   if (A)
@@ -220,99 +168,17 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
 
 /* *** */
 
-MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
-                         MCCodeEmitter &_Emitter, raw_ostream &_OS)
-  : Context(_Context), Backend(_Backend), Emitter(_Emitter),
-    OS(_OS), RelaxAll(false), SubsectionsViaSymbols(false)
+MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+                         MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+                         raw_ostream &OS_)
+  : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+    OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false)
 {
 }
 
 MCAssembler::~MCAssembler() {
 }
 
-static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
-                                                const MCFixup &Fixup,
-                                                const MCValue Target,
-                                                const MCSection *BaseSection) {
-  // The effective fixup address is
-  //     addr(atom(A)) + offset(A)
-  //   - addr(atom(B)) - offset(B)
-  //   - addr(<base symbol>) + <fixup offset from base symbol>
-  // and the offsets are not relocatable, so the fixup is fully resolved when
-  //  addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0.
-  //
-  // The simple (Darwin, except on x86_64) way of dealing with this was to
-  // assume that any reference to a temporary symbol *must* be a temporary
-  // symbol in the same atom, unless the sections differ. Therefore, any PCrel
-  // relocation to a temporary symbol (in the same section) is fully
-  // resolved. This also works in conjunction with absolutized .set, which
-  // requires the compiler to use .set to absolutize the differences between
-  // symbols which the compiler knows to be assembly time constants, so we don't
-  // need to worry about considering symbol differences fully resolved.
-
-  // Non-relative fixups are only resolved if constant.
-  if (!BaseSection)
-    return Target.isAbsolute();
-
-  // Otherwise, relative fixups are only resolved if not a difference and the
-  // target is a temporary in the same section.
-  if (Target.isAbsolute() || Target.getSymB())
-    return false;
-
-  const MCSymbol *A = &Target.getSymA()->getSymbol();
-  if (!A->isTemporary() || !A->isInSection() ||
-      &A->getSection() != BaseSection)
-    return false;
-
-  return true;
-}
-
-static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
-                                          const MCAsmLayout &Layout,
-                                          const MCFixup &Fixup,
-                                          const MCValue Target,
-                                          const MCSymbolData *BaseSymbol) {
-  // The effective fixup address is
-  //     addr(atom(A)) + offset(A)
-  //   - addr(atom(B)) - offset(B)
-  //   - addr(BaseSymbol) + <fixup offset from base symbol>
-  // and the offsets are not relocatable, so the fixup is fully resolved when
-  //  addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0.
-  //
-  // Note that "false" is almost always conservatively correct (it means we emit
-  // a relocation which is unnecessary), except when it would force us to emit a
-  // relocation which the target cannot encode.
-
-  const MCSymbolData *A_Base = 0, *B_Base = 0;
-  if (const MCSymbolRefExpr *A = Target.getSymA()) {
-    // Modified symbol references cannot be resolved.
-    if (A->getKind() != MCSymbolRefExpr::VK_None)
-      return false;
-
-    A_Base = Asm.getAtom(Layout, &Asm.getSymbolData(A->getSymbol()));
-    if (!A_Base)
-      return false;
-  }
-
-  if (const MCSymbolRefExpr *B = Target.getSymB()) {
-    // Modified symbol references cannot be resolved.
-    if (B->getKind() != MCSymbolRefExpr::VK_None)
-      return false;
-
-    B_Base = Asm.getAtom(Layout, &Asm.getSymbolData(B->getSymbol()));
-    if (!B_Base)
-      return false;
-  }
-
-  // If there is no base, A and B have to be the same atom for this fixup to be
-  // fully resolved.
-  if (!BaseSymbol)
-    return A_Base == B_Base;
-
-  // Otherwise, B must be missing and A must be the base.
-  return !B_Base && BaseSymbol == A_Base;
-}
-
 bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   // Non-temporary labels should always be visible to the linker.
   if (!Symbol.isTemporary())
@@ -326,8 +192,7 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   return getBackend().doesSectionRequireSymbols(Symbol.getSection());
 }
 
-const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
-                                         const MCSymbolData *SD) const {
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
   // Linker visible symbols define atoms.
   if (isSymbolLinkerVisible(SD->getSymbol()))
     return SD;
@@ -351,67 +216,78 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
                                 MCValue &Target, uint64_t &Value) const {
   ++stats::EvaluateFixup;
 
-  if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout))
+  if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
     report_fatal_error("expected relocatable expression");
 
-  // FIXME: How do non-scattered symbols work in ELF? I presume the linker
-  // doesn't support small relocations, but then under what criteria does the
-  // assembler allow symbol differences?
+  bool IsPCRel = Backend.getFixupKindInfo(
+    Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
+
+  bool IsResolved;
+  if (IsPCRel) {
+    if (Target.getSymB()) {
+      IsResolved = false;
+    } else if (!Target.getSymA()) {
+      IsResolved = false;
+    } else {
+      const MCSymbolRefExpr *A = Target.getSymA();
+      const MCSymbol &SA = A->getSymbol();
+      if (A->getKind() != MCSymbolRefExpr::VK_None ||
+          SA.AliasedSymbol().isUndefined()) {
+        IsResolved = false;
+      } else {
+        const MCSymbolData &DataA = getSymbolData(SA);
+        IsResolved =
+          getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
+                                                             *DF, false, true);
+      }
+    }
+  } else {
+    IsResolved = Target.isAbsolute();
+  }
 
   Value = Target.getConstant();
 
-  bool IsPCRel = Emitter.getFixupKindInfo(
-    Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
-  bool IsResolved = true;
+  bool IsThumb = false;
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
-    if (A->getSymbol().isDefined())
-      Value += Layout.getSymbolAddress(&getSymbolData(A->getSymbol()));
-    else
-      IsResolved = false;
+    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    if (Sym.isDefined())
+      Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+    if (isThumbFunc(&Sym))
+      IsThumb = true;
   }
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
-    if (B->getSymbol().isDefined())
-      Value -= Layout.getSymbolAddress(&getSymbolData(B->getSymbol()));
-    else
-      IsResolved = false;
+    const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+    if (Sym.isDefined())
+      Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
   }
 
-  // If we are using scattered symbols, determine whether this value is actually
-  // resolved; scattering may cause atoms to move.
-  if (IsResolved && getBackend().hasScatteredSymbols()) {
-    if (getBackend().hasReliableSymbolDifference()) {
-      // If this is a PCrel relocation, find the base atom (identified by its
-      // symbol) that the fixup value is relative to.
-      const MCSymbolData *BaseSymbol = 0;
-      if (IsPCRel) {
-        BaseSymbol = DF->getAtom();
-        if (!BaseSymbol)
-          IsResolved = false;
-      }
 
-      if (IsResolved)
-        IsResolved = isScatteredFixupFullyResolved(*this, Layout, Fixup, Target,
-                                                   BaseSymbol);
-    } else {
-      const MCSection *BaseSection = 0;
-      if (IsPCRel)
-        BaseSection = &DF->getParent()->getSection();
+  bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+                         MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+  assert((ShouldAlignPC ? IsPCRel : true) &&
+    "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
 
-      IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, Target,
-                                                       BaseSection);
-    }
+  if (IsPCRel) {
+    uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+    
+    // A number of ARM fixups in Thumb mode require that the effective PC
+    // address be determined as the 32-bit aligned version of the actual offset.
+    if (ShouldAlignPC) Offset &= ~0x3;
+    Value -= Offset;
   }
 
-  if (IsPCRel)
-    Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset();
+  // ARM fixups based from a thumb function address need to have the low
+  // bit set. The actual value is always at least 16-bit aligned, so the
+  // low bit is normally clear and available for use as an ISA flag for
+  // interworking.
+  if (IsThumb)
+    Value |= 1;
 
   return IsResolved;
 }
 
-uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout,
-                                          const MCFragment &F,
-                                          uint64_t SectionAddress,
-                                          uint64_t FragmentOffset) const {
+uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+                                          const MCFragment &F) const {
   switch (F.getKind()) {
   case MCFragment::FT_Data:
     return cast<MCDataFragment>(F).getContents().size();
@@ -420,62 +296,48 @@ uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout,
   case MCFragment::FT_Inst:
     return cast<MCInstFragment>(F).getInstSize();
 
+  case MCFragment::FT_LEB:
+    return cast<MCLEBFragment>(F).getContents().size();
+
   case MCFragment::FT_Align: {
     const MCAlignFragment &AF = cast<MCAlignFragment>(F);
-
-    assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) &&
-           "Invalid OnlyAlignAddress bit, not the last fragment!");
-
-    uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset,
-                                      AF.getAlignment());
-
-    // Honor MaxBytesToEmit.
+    unsigned Offset = Layout.getFragmentOffset(&AF);
+    unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
     if (Size > AF.getMaxBytesToEmit())
       return 0;
-
     return Size;
   }
 
   case MCFragment::FT_Org: {
-    const MCOrgFragment &OF = cast<MCOrgFragment>(F);
-
-    // FIXME: We should compute this sooner, we don't want to recurse here, and
-    // we would like to be more functional.
+    MCOrgFragment &OF = cast<MCOrgFragment>(F);
     int64_t TargetLocation;
-    if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
+    if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
       report_fatal_error("expected assembly-time absolute expression");
 
     // FIXME: We need a way to communicate this error.
-    int64_t Offset = TargetLocation - FragmentOffset;
-    if (Offset < 0)
+    uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+    int64_t Size = TargetLocation - FragmentOffset;
+    if (Size < 0 || Size >= 0x40000000)
       report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
-                         "' (at offset '" + Twine(FragmentOffset) + "'");
-
-    return Offset;
+                         "' (at offset '" + Twine(FragmentOffset) + "')");
+    return Size;
   }
+
+  case MCFragment::FT_Dwarf:
+    return cast<MCDwarfLineAddrFragment>(F).getContents().size();
+  case MCFragment::FT_DwarfFrame:
+    return cast<MCDwarfCallFrameFragment>(F).getContents().size();
   }
 
   assert(0 && "invalid fragment kind");
   return 0;
 }
 
-void MCAsmLayout::LayoutFile() {
-  // Initialize the first section and set the valid fragment layout point. All
-  // actual layout computations are done lazily.
-  LastValidFragment = 0;
-  if (!getSectionOrder().empty())
-    getSectionOrder().front()->Address = 0;
-}
-
 void MCAsmLayout::LayoutFragment(MCFragment *F) {
   MCFragment *Prev = F->getPrevNode();
 
   // We should never try to recompute something which is up-to-date.
   assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
-  // We should never try to compute the fragment layout if the section isn't
-  // up-to-date.
-  assert(isSectionUpToDate(F->getParent()) &&
-         "Attempt to compute fragment before it's section!");
   // We should never try to compute the fragment layout if it's predecessor
   // isn't up-to-date.
   assert((!Prev || isFragmentUpToDate(Prev)) &&
@@ -483,55 +345,26 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
 
   ++stats::FragmentLayouts;
 
-  // Compute the fragment start address.
-  uint64_t StartAddress = F->getParent()->Address;
-  uint64_t Address = StartAddress;
-  if (Prev)
-    Address += Prev->Offset + Prev->EffectiveSize;
-
   // Compute fragment offset and size.
-  F->Offset = Address - StartAddress;
-  F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress,
-                                                        F->Offset);
-  LastValidFragment = F;
-
-  // If this is the last fragment in a section, update the next section address.
-  if (!F->getNextNode()) {
-    unsigned NextIndex = F->getParent()->getLayoutOrder() + 1;
-    if (NextIndex != getSectionOrder().size())
-      LayoutSection(getSectionOrder()[NextIndex]);
-  }
-}
-
-void MCAsmLayout::LayoutSection(MCSectionData *SD) {
-  unsigned SectionOrderIndex = SD->getLayoutOrder();
-
-  ++stats::SectionLayouts;
-
-  // Compute the section start address.
-  uint64_t StartAddress = 0;
-  if (SectionOrderIndex) {
-    MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1];
-    StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev);
-  }
-
-  // Honor the section alignment requirements.
-  StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+  uint64_t Offset = 0;
+  if (Prev)
+    Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
 
-  // Set the section address.
-  SD->Address = StartAddress;
+  F->Offset = Offset;
+  LastValidFragment[F->getParent()] = F;
 }
 
 /// WriteFragmentData - Write the \arg F data to the output file.
 static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                              const MCFragment &F, MCObjectWriter *OW) {
+                              const MCFragment &F) {
+  MCObjectWriter *OW = &Asm.getWriter();
   uint64_t Start = OW->getStream().tell();
   (void) Start;
 
   ++stats::EmittedFragments;
 
   // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Layout.getFragmentEffectiveSize(&F);
+  uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
   switch (F.getKind()) {
   case MCFragment::FT_Align: {
     MCAlignFragment &AF = cast<MCAlignFragment>(F);
@@ -598,9 +431,17 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     break;
   }
 
-  case MCFragment::FT_Inst:
-    llvm_unreachable("unexpected inst fragment after lowering");
+  case MCFragment::FT_Inst: {
+    MCInstFragment &IF = cast<MCInstFragment>(F);
+    OW->WriteBytes(StringRef(IF.getCode().begin(), IF.getCode().size()));
+    break;
+  }
+
+  case MCFragment::FT_LEB: {
+    MCLEBFragment &LF = cast<MCLEBFragment>(F);
+    OW->WriteBytes(LF.getContents().str());
     break;
+  }
 
   case MCFragment::FT_Org: {
     MCOrgFragment &OF = cast<MCOrgFragment>(F);
@@ -610,16 +451,26 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
     break;
   }
+
+  case MCFragment::FT_Dwarf: {
+    const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
+    OW->WriteBytes(OF.getContents().str());
+    break;
+  }
+  case MCFragment::FT_DwarfFrame: {
+    const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
+    OW->WriteBytes(CF.getContents().str());
+    break;
+  }
   }
 
   assert(OW->getStream().tell() - Start == FragmentSize);
 }
 
 void MCAssembler::WriteSectionData(const MCSectionData *SD,
-                                   const MCAsmLayout &Layout,
-                                   MCObjectWriter *OW) const {
+                                   const MCAsmLayout &Layout) const {
   // Ignore virtual sections.
-  if (getBackend().isVirtualSection(SD->getSection())) {
+  if (SD->getSection().isVirtualSection()) {
     assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
 
     // Check that contents are only things legal inside a virtual section.
@@ -657,51 +508,34 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
     return;
   }
 
-  uint64_t Start = OW->getStream().tell();
+  uint64_t Start = getWriter().getStream().tell();
   (void) Start;
 
   for (MCSectionData::const_iterator it = SD->begin(),
          ie = SD->end(); it != ie; ++it)
-    WriteFragmentData(*this, Layout, *it, OW);
+    WriteFragmentData(*this, Layout, *it);
 
-  assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD));
+  assert(getWriter().getStream().tell() - Start ==
+         Layout.getSectionAddressSize(SD));
 }
 
-void MCAssembler::AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout) {
-  // Create dummy fragments and assign section ordinals.
-  unsigned SectionIndex = 0;
-  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it)
-    SectionIndex++;
-
-  SD.setOrdinal(SectionIndex);
-
-  // Assign layout order indices to sections and fragments.
-  unsigned FragmentIndex = 0;
-  unsigned i = 0;
-  for (unsigned e = Layout.getSectionOrder().size(); i != e; ++i) {
-    MCSectionData *SD = Layout.getSectionOrder()[i];
 
-    for (MCSectionData::iterator it2 = SD->begin(),
-           ie2 = SD->end(); it2 != ie2; ++it2)
-      FragmentIndex++;
-  }
+uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+                                  MCFragment &F,
+                                  const MCFixup &Fixup) {
+   // Evaluate the fixup.
+   MCValue Target;
+   uint64_t FixedValue;
+   if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+     // The fixup was unresolved, we need a relocation. Inform the object
+     // writer of the relocation, and give it an opportunity to adjust the
+     // fixup value if need be.
+     getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
+   }
+   return FixedValue;
+ }
 
-  SD.setLayoutOrder(i);
-  for (MCSectionData::iterator it2 = SD.begin(),
-         ie2 = SD.end(); it2 != ie2; ++it2) {
-    it2->setLayoutOrder(FragmentIndex++);
-  }
-  Layout.getSectionOrder().push_back(&SD);
-
-  Layout.LayoutSection(&SD);
-
-  // Layout until everything fits.
-  while (LayoutOnce(Layout))
-    continue;
-
-}
-
-void MCAssembler::Finish(MCObjectWriter *Writer) {
+void MCAssembler::Finish() {
   DEBUG_WITH_TYPE("mc-dump", {
       llvm::errs() << "assembler backend - pre-layout\n--\n";
       dump(); });
@@ -709,47 +543,23 @@ void MCAssembler::Finish(MCObjectWriter *Writer) {
   // Create the layout object.
   MCAsmLayout Layout(*this);
 
-  // Insert additional align fragments for concrete sections to explicitly pad
-  // the previous section to match their alignment requirements. This is for
-  // 'gas' compatibility, it shouldn't strictly be necessary.
-  //
-  // FIXME: This may be Mach-O specific.
-  for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) {
-    MCSectionData *SD = Layout.getSectionOrder()[i];
-
-    // Ignore sections without alignment requirements.
-    unsigned Align = SD->getAlignment();
-    if (Align <= 1)
-      continue;
-
-    // Ignore virtual sections, they don't cause file size modifications.
-    if (getBackend().isVirtualSection(SD->getSection()))
-      continue;
-
-    // Otherwise, create a new align fragment at the end of the previous
-    // section.
-    MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align,
-                                              Layout.getSectionOrder()[i - 1]);
-    AF->setOnlyAlignAddress(true);
-  }
-
   // Create dummy fragments and assign section ordinals.
   unsigned SectionIndex = 0;
   for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
     // Create dummy fragments to eliminate any empty sections, this simplifies
     // layout.
     if (it->getFragmentList().empty())
-      new MCFillFragment(0, 1, 0, it);
+      new MCDataFragment(it);
 
     it->setOrdinal(SectionIndex++);
   }
 
   // Assign layout order indices to sections and fragments.
-  unsigned FragmentIndex = 0;
   for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
     MCSectionData *SD = Layout.getSectionOrder()[i];
     SD->setLayoutOrder(i);
 
+    unsigned FragmentIndex = 0;
     for (MCSectionData::iterator it2 = SD->begin(),
            ie2 = SD->end(); it2 != ie2; ++it2)
       it2->setLayoutOrder(FragmentIndex++);
@@ -772,48 +582,39 @@ void MCAssembler::Finish(MCObjectWriter *Writer) {
 
   uint64_t StartOffset = OS.tell();
 
-  llvm::OwningPtr<MCObjectWriter> OwnWriter(0);
-  if (Writer == 0) {
-    //no custom Writer_ : create the default one life-managed by OwningPtr
-    OwnWriter.reset(getBackend().createObjectWriter(OS));
-    Writer = OwnWriter.get();
-    if (!Writer)
-      report_fatal_error("unable to create object writer!");
-  }
-
   // Allow the object writer a chance to perform post-layout binding (for
   // example, to set the index fields in the symbol data).
-  Writer->ExecutePostLayoutBinding(*this);
+  getWriter().ExecutePostLayoutBinding(*this, Layout);
 
   // Evaluate and apply the fixups, generating relocation entries as necessary.
   for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
     for (MCSectionData::iterator it2 = it->begin(),
            ie2 = it->end(); it2 != ie2; ++it2) {
       MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
-      if (!DF)
-        continue;
-
-      for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
-             ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
-        MCFixup &Fixup = *it3;
-
-        // Evaluate the fixup.
-        MCValue Target;
-        uint64_t FixedValue;
-        if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) {
-          // The fixup was unresolved, we need a relocation. Inform the object
-          // writer of the relocation, and give it an opportunity to adjust the
-          // fixup value if need be.
-          Writer->RecordRelocation(*this, Layout, DF, Fixup, Target,FixedValue);
+      if (DF) {
+        for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+               ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+          MCFixup &Fixup = *it3;
+          uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+          getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+                                  DF->getContents().size(), FixedValue);
+        }
+      }
+      MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+      if (IF) {
+        for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
+               ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
+          MCFixup &Fixup = *it3;
+          uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+          getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+                                  IF->getCode().size(), FixedValue);
         }
-
-        getBackend().ApplyFixup(Fixup, *DF, FixedValue);
       }
     }
   }
 
   // Write the object file.
-  Writer->WriteObject(*this, Layout);
+  getWriter().WriteObject(*this, Layout);
 
   stats::ObjectBytes += OS.tell() - StartOffset;
 }
@@ -852,100 +653,144 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
   return false;
 }
 
-bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
-  ++stats::RelaxationSteps;
+bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+                                   MCInstFragment &IF) {
+  if (!FragmentNeedsRelaxation(&IF, Layout))
+    return false;
 
-  // Layout the sections in order.
-  Layout.LayoutFile();
+  ++stats::RelaxedInstructions;
 
+  // FIXME-PERF: We could immediately lower out instructions if we can tell
+  // they are fully resolved, to avoid retesting on later passes.
+
+  // Relax the fragment.
+
+  MCInst Relaxed;
+  getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+
+  // Encode the new instruction.
+  //
+  // FIXME-PERF: If it matters, we could let the target do this. It can
+  // probably do so more efficiently in many cases.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+  VecOS.flush();
+
+  // Update the instruction fragment.
+  IF.setInst(Relaxed);
+  IF.getCode() = Code;
+  IF.getFixups().clear();
+  // FIXME: Eliminate copy.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+    IF.getFixups().push_back(Fixups[i]);
+
+  return true;
+}
+
+bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+  int64_t Value = 0;
+  uint64_t OldSize = LF.getContents().size();
+  LF.getValue().EvaluateAsAbsolute(Value, Layout);
+  SmallString<8> &Data = LF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  if (LF.isSigned())
+    MCObjectWriter::EncodeSLEB128(Value, OSE);
+  else
+    MCObjectWriter::EncodeULEB128(Value, OSE);
+  OSE.flush();
+  return OldSize != LF.getContents().size();
+}
+
+bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
+				     MCDwarfLineAddrFragment &DF) {
+  int64_t AddrDelta = 0;
+  uint64_t OldSize = DF.getContents().size();
+  bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  int64_t LineDelta;
+  LineDelta = DF.getLineDelta();
+  SmallString<8> &Data = DF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE);
+  OSE.flush();
+  return OldSize != Data.size();
+}
+
+bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+                                              MCDwarfCallFrameFragment &DF) {
+  int64_t AddrDelta = 0;
+  uint64_t OldSize = DF.getContents().size();
+  bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  SmallString<8> &Data = DF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+  OSE.flush();
+  return OldSize != Data.size();
+}
+
+bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+                                    MCSectionData &SD) {
+  MCFragment *FirstInvalidFragment = NULL;
   // Scan for fragments that need relaxation.
+  for (MCSectionData::iterator it2 = SD.begin(),
+         ie2 = SD.end(); it2 != ie2; ++it2) {
+    // Check if this is an fragment that needs relaxation.
+    bool relaxedFrag = false;
+    switch(it2->getKind()) {
+    default:
+          break;
+    case MCFragment::FT_Inst:
+      relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+      break;
+    case MCFragment::FT_Dwarf:
+      relaxedFrag = RelaxDwarfLineAddr(Layout,
+                                       *cast<MCDwarfLineAddrFragment>(it2));
+      break;
+    case MCFragment::FT_DwarfFrame:
+      relaxedFrag =
+        RelaxDwarfCallFrameFragment(Layout,
+                                    *cast<MCDwarfCallFrameFragment>(it2));
+      break;
+    case MCFragment::FT_LEB:
+      relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+      break;
+    }
+    // Update the layout, and remember that we relaxed.
+    if (relaxedFrag && !FirstInvalidFragment)
+      FirstInvalidFragment = it2;
+  }
+  if (FirstInvalidFragment) {
+    Layout.Invalidate(FirstInvalidFragment);
+    return true;
+  }
+  return false;
+}
+
+bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+  ++stats::RelaxationSteps;
+
   bool WasRelaxed = false;
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     MCSectionData &SD = *it;
-
-    for (MCSectionData::iterator it2 = SD.begin(),
-           ie2 = SD.end(); it2 != ie2; ++it2) {
-      // Check if this is an instruction fragment that needs relaxation.
-      MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
-      if (!IF || !FragmentNeedsRelaxation(IF, Layout))
-        continue;
-
-      ++stats::RelaxedInstructions;
-
-      // FIXME-PERF: We could immediately lower out instructions if we can tell
-      // they are fully resolved, to avoid retesting on later passes.
-
-      // Relax the fragment.
-
-      MCInst Relaxed;
-      getBackend().RelaxInstruction(IF->getInst(), Relaxed);
-
-      // Encode the new instruction.
-      //
-      // FIXME-PERF: If it matters, we could let the target do this. It can
-      // probably do so more efficiently in many cases.
-      SmallVector<MCFixup, 4> Fixups;
-      SmallString<256> Code;
-      raw_svector_ostream VecOS(Code);
-      getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
-      VecOS.flush();
-
-      // Update the instruction fragment.
-      int SlideAmount = Code.size() - IF->getInstSize();
-      IF->setInst(Relaxed);
-      IF->getCode() = Code;
-      IF->getFixups().clear();
-      // FIXME: Eliminate copy.
-      for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
-        IF->getFixups().push_back(Fixups[i]);
-
-      // Update the layout, and remember that we relaxed.
-      Layout.UpdateForSlide(IF, SlideAmount);
+    while(LayoutSectionOnce(Layout, SD))
       WasRelaxed = true;
-    }
   }
 
   return WasRelaxed;
 }
 
 void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
-  // Lower out any instruction fragments, to simplify the fixup application and
-  // output.
-  //
-  // FIXME-PERF: We don't have to do this, but the assumption is that it is
-  // cheap (we will mostly end up eliminating fragments and appending on to data
-  // fragments), so the extra complexity downstream isn't worth it. Evaluate
-  // this assumption.
-  for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    MCSectionData &SD = *it;
-
-    for (MCSectionData::iterator it2 = SD.begin(),
-           ie2 = SD.end(); it2 != ie2; ++it2) {
-      MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
-      if (!IF)
-        continue;
-
-      // Create a new data fragment for the instruction.
-      //
-      // FIXME-PERF: Reuse previous data fragment if possible.
-      MCDataFragment *DF = new MCDataFragment();
-      SD.getFragmentList().insert(it2, DF);
-
-      // Update the data fragments layout data.
-      DF->setParent(IF->getParent());
-      DF->setAtom(IF->getAtom());
-      DF->setLayoutOrder(IF->getLayoutOrder());
-      Layout.FragmentReplaced(IF, DF);
-
-      // Copy in the data and the fixups.
-      DF->getContents().append(IF->getCode().begin(), IF->getCode().end());
-      for (unsigned i = 0, e = IF->getFixups().size(); i != e; ++i)
-        DF->getFixups().push_back(IF->getFixups()[i]);
-
-      // Delete the instruction fragment and update the iterator.
-      SD.getFragmentList().erase(IF);
-      it2 = DF;
-    }
+  // The layout is done. Mark every fragment as valid.
+  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+    Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
   }
 }
 
@@ -972,18 +817,19 @@ void MCFragment::dump() {
   case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
   case MCFragment::FT_Inst:  OS << "MCInstFragment"; break;
   case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
+  case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+  case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+  case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
   }
 
   OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
-     << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">";
+     << " Offset:" << Offset << ">";
 
   switch (getKind()) {
   case MCFragment::FT_Align: {
     const MCAlignFragment *AF = cast<MCAlignFragment>(this);
     if (AF->hasEmitNops())
       OS << " (emit nops)";
-    if (AF->hasOnlyAlignAddress())
-      OS << " (only align section)";
     OS << "\n       ";
     OS << " Alignment:" << AF->getAlignment()
        << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
@@ -1032,6 +878,25 @@ void MCFragment::dump() {
     OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
     break;
   }
+  case MCFragment::FT_Dwarf:  {
+    const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << OF->getAddrDelta()
+       << " LineDelta:" << OF->getLineDelta();
+    break;
+  }
+  case MCFragment::FT_DwarfFrame:  {
+    const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << CF->getAddrDelta();
+    break;
+  }
+  case MCFragment::FT_LEB: {
+    const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+    OS << "\n       ";
+    OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+    break;
+  }
   }
   OS << ">";
 }
@@ -1040,8 +905,7 @@ void MCSectionData::dump() {
   raw_ostream &OS = llvm::errs();
 
   OS << "<MCSectionData";
-  OS << " Alignment:" << getAlignment() << " Address:" << Address
-     << " Fragments:[\n      ";
+  OS << " Alignment:" << getAlignment() << " Fragments:[\n      ";
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     if (it != begin()) OS << ",\n      ";
     it->dump();
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index d51323785541..c122763b2fe5 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -16,15 +16,3 @@ MCCodeEmitter::MCCodeEmitter() {
 
 MCCodeEmitter::~MCCodeEmitter() {
 }
-
-const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
-  static const MCFixupKindInfo Builtins[] = {
-    { "FK_Data_1", 0, 8, 0 },
-    { "FK_Data_2", 0, 16, 0 },
-    { "FK_Data_4", 0, 32, 0 },
-    { "FK_Data_8", 0, 64, 0 }
-  };
-  
-  assert(Kind <= 3 && "Unknown fixup kind");
-  return Builtins[Kind];
-}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index e5586a0d7c31..018f00c08f6f 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -15,8 +15,10 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCLabel.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
@@ -24,8 +26,9 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
 typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
 
 
-MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0),
-                     CurrentDwarfLoc(0,0,0,0,0) {
+MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
+  MAI(mai), TAI(tai), NextUniqueID(0),
+  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0) {
   MachOUniquingMap = 0;
   ELFUniquingMap = 0;
   COFFUniquingMap = 0;
@@ -40,7 +43,7 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0),
 MCContext::~MCContext() {
   // NOTE: The symbols are all allocated out of a bump pointer allocator,
   // we don't need to free them here.
-  
+
   // If we have the MachO uniquing map, free it.
   delete (MachOUniqueMapTy*)MachOUniquingMap;
   delete (ELFUniqueMapTy*)ELFUniquingMap;
@@ -48,6 +51,8 @@ MCContext::~MCContext() {
 
   // If the stream for the .secure_log_unique directive was created free it.
   delete (raw_ostream*)SecureLog;
+
+  delete TAI;
 }
 
 //===----------------------------------------------------------------------===//
@@ -56,20 +61,42 @@ MCContext::~MCContext() {
 
 MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
   assert(!Name.empty() && "Normal symbols cannot be unnamed!");
-  
-  // Determine whether this is an assembler temporary or normal label.
-  bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
-  
+
   // Do the lookup and get the entire StringMapEntry.  We want access to the
   // key if we are creating the entry.
   StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
-  if (Entry.getValue()) return Entry.getValue();
+  MCSymbol *Sym = Entry.getValue();
+
+  if (Sym)
+    return Sym;
+
+  Sym = CreateSymbol(Name);
+  Entry.setValue(Sym);
+  return Sym;
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+  // Determine whether this is an assembler temporary or normal label.
+  bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
+  StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
+  if (NameEntry->getValue()) {
+    assert(isTemporary && "Cannot rename non temporary symbols");
+    SmallString<128> NewName;
+    do {
+      Twine T = Name + Twine(NextUniqueID++);
+      T.toVector(NewName);
+      StringRef foo = NewName;
+      NameEntry = &UsedNames.GetOrCreateValue(foo);
+    } while (NameEntry->getValue());
+  }
+  NameEntry->setValue(true);
 
   // Ok, the entry doesn't already exist.  Have the MCSymbol object itself refer
-  // to the copy of the string that is embedded in the StringMapEntry.
-  MCSymbol *Result = new (*this) MCSymbol(Entry.getKey(), isTemporary);
-  Entry.setValue(Result);
-  return Result; 
+  // to the copy of the string that is embedded in the UsedNames entry.
+  MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+
+  return Result;
 }
 
 MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
@@ -79,8 +106,11 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
 }
 
 MCSymbol *MCContext::CreateTempSymbol() {
-  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
-                           "tmp" + Twine(NextUniqueID++));
+  SmallString<128> NameSV;
+  Twine Name = Twine(MAI.getPrivateGlobalPrefix()) + "tmp" +
+    Twine(NextUniqueID++);
+  Name.toVector(NameSV);
+  return CreateSymbol(NameSV);
 }
 
 unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
@@ -123,49 +153,70 @@ const MCSectionMachO *MCContext::
 getMachOSection(StringRef Segment, StringRef Section,
                 unsigned TypeAndAttributes,
                 unsigned Reserved2, SectionKind Kind) {
-  
+
   // We unique sections by their segment/section pair.  The returned section
   // may not have the same flags as the requested section, if so this should be
   // diagnosed by the client as an error.
-  
+
   // Create the map if it doesn't already exist.
   if (MachOUniquingMap == 0)
     MachOUniquingMap = new MachOUniqueMapTy();
   MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
-  
+
   // Form the name to look up.
   SmallString<64> Name;
   Name += Segment;
   Name.push_back(',');
   Name += Section;
-  
+
   // Do the lookup, if we have a hit, return it.
   const MCSectionMachO *&Entry = Map[Name.str()];
   if (Entry) return Entry;
-  
+
   // Otherwise, return a new section.
   return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
                                             Reserved2, Kind);
 }
 
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind) {
+  return getELFSection(Section, Type, Flags, Kind, 0, "");
+}
 
-const MCSection *MCContext::
+const MCSectionELF *MCContext::
 getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, bool IsExplicit, unsigned EntrySize) {
+              SectionKind Kind, unsigned EntrySize, StringRef Group) {
   if (ELFUniquingMap == 0)
     ELFUniquingMap = new ELFUniqueMapTy();
   ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
-  
+
   // Do the lookup, if we have a hit, return it.
   StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
   if (Entry.getValue()) return Entry.getValue();
-  
+
+  // Possibly refine the entry size first.
+  if (!EntrySize) {
+    EntrySize = MCSectionELF::DetermineEntrySize(Kind);
+  }
+
+  MCSymbol *GroupSym = NULL;
+  if (!Group.empty())
+    GroupSym = GetOrCreateSymbol(Group);
+
   MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
-                                                  Kind, IsExplicit, EntrySize);
+                                                  Kind, EntrySize, GroupSym);
   Entry.setValue(Result);
   return Result;
 }
 
+const MCSectionELF *MCContext::CreateELFGroupSection() {
+  MCSectionELF *Result =
+    new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
+                             SectionKind::getReadOnly(), 4, NULL);
+  return Result;
+}
+
 const MCSection *MCContext::getCOFFSection(StringRef Section,
                                            unsigned Characteristics,
                                            int Selection,
@@ -173,15 +224,15 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
   if (COFFUniquingMap == 0)
     COFFUniquingMap = new COFFUniqueMapTy();
   COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
-  
+
   // Do the lookup, if we have a hit, return it.
   StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
   if (Entry.getValue()) return Entry.getValue();
-  
+
   MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
                                                     Characteristics,
                                                     Selection, Kind);
-  
+
   Entry.setValue(Result);
   return Result;
 }
@@ -240,7 +291,7 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
     // stored at MCDwarfFiles[FileNumber].Name .
     DirIndex++;
   }
-  
+
   // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
   // vector.
   char *Buf = static_cast<char *>(Allocate(Name.size()));
@@ -251,15 +302,11 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
   return FileNumber;
 }
 
-/// ValidateDwarfFileNumber - takes a dwarf file number and returns true if it
+/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
 /// currently is assigned and false otherwise.
-bool MCContext::ValidateDwarfFileNumber(unsigned FileNumber) {
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
   if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
     return false;
 
-  MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
-  if (ExistingFile)
-    return true;
-  else
-    return false;
+  return MCDwarfFiles[FileNumber] != 0;
 }
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 697b3d9c0515..2fd14db2a45d 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -354,7 +354,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
   
   SourceMgr sourceMgr;
   sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
-  MCContext context(*AsmInfo);
+  MCContext context(*AsmInfo, NULL);
   OwningPtr<MCStreamer> streamer(createNullStreamer(context));
   OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
                                                          context, *streamer,
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index e2f850bcdba9..71e45f0b042f 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -21,7 +21,7 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 
 #include <map>
 #include <set>
@@ -89,8 +89,10 @@ struct EDDisassembler {
     bool operator<(const CPUKey &key) const {
       if(Arch > key.Arch)
         return false;
-      if(Syntax >= key.Syntax)
-        return false;
+      else if (Arch == key.Arch) {
+        if(Syntax > key.Syntax)
+          return false;
+      }
       return true;
     }
   };
diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
index e22408f060b1..63b049fe40fd 100644
--- a/lib/MC/MCDisassembler/EDInst.cpp
+++ b/lib/MC/MCDisassembler/EDInst.cpp
@@ -62,6 +62,8 @@ int EDInst::stringify() {
   
   if (Disassembler.printInst(String, *Inst))
     return StringifyResult.setResult(-1);
+
+  String.push_back('\n');
   
   return StringifyResult.setResult(0);
 }
diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
index 39d264fb7aad..ceb9505028de 100644
--- a/lib/MC/MCDisassembler/EDInst.h
+++ b/lib/MC/MCDisassembler/EDInst.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_EDINST_H
 #define LLVM_EDINST_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 #include <string>
 #include <vector>
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index 2aed123368da..cfeb56fa3dfd 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -260,23 +260,20 @@ int EDOperand::isMemory() {
 }
 
 #ifdef __BLOCKS__
-struct RegisterReaderWrapper {
-  EDOperand::EDRegisterBlock_t regBlock;
-};
+namespace {
+  struct RegisterReaderWrapper {
+    EDOperand::EDRegisterBlock_t regBlock;
+  };
+}
 
-int readerWrapperCallback(uint64_t *value, 
-                          unsigned regID, 
-                          void *arg) {
-  struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
+static int readerWrapperCallback(uint64_t *value, unsigned regID, void *arg) {
+  RegisterReaderWrapper *wrapper = (RegisterReaderWrapper *)arg;
   return wrapper->regBlock(value, regID);
 }
 
-int EDOperand::evaluate(uint64_t &result,
-                        EDRegisterBlock_t regBlock) {
-  struct RegisterReaderWrapper wrapper;
+int EDOperand::evaluate(uint64_t &result, EDRegisterBlock_t regBlock) {
+  RegisterReaderWrapper wrapper;
   wrapper.regBlock = regBlock;
-  return evaluate(result, 
-                  readerWrapperCallback, 
-                  (void*)&wrapper);
+  return evaluate(result, readerWrapperCallback, (void*)&wrapper);
 }
 #endif
diff --git a/lib/MC/MCDisassembler/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h
index 6e695224318c..50260ec965a6 100644
--- a/lib/MC/MCDisassembler/EDOperand.h
+++ b/lib/MC/MCDisassembler/EDOperand.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_EDOPERAND_H
 #define LLVM_EDOPERAND_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h
index 6b2aeac60ba5..ba467078686a 100644
--- a/lib/MC/MCDisassembler/EDToken.h
+++ b/lib/MC/MCDisassembler/EDToken.h
@@ -17,7 +17,7 @@
 #define LLVM_EDTOKEN_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <string>
 #include <vector>
 
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 2da71f96c676..112d7d887a2d 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -7,11 +7,420 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
 using namespace llvm;
 
+// Given a special op, return the address skip amount (in units of
+// DWARF2_LINE_MIN_INSN_LENGTH.
+#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
+
+// The maximum address skip amount that can be encoded with a special op.
+#define MAX_SPECIAL_ADDR_DELTA		SPECIAL_ADDR(255)
+
+// First special line opcode - leave room for the standard opcodes.
+// Note: If you want to change this, you'll have to update the
+// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().  
+#define DWARF2_LINE_OPCODE_BASE		13
+
+// Minimum line offset in a special line info. opcode.  This value
+// was chosen to give a reasonable range of values.
+#define DWARF2_LINE_BASE		-5
+
+// Range of line offsets in a special line info. opcode.
+# define DWARF2_LINE_RANGE		14
+
+// Define the architecture-dependent minimum instruction length (in bytes).
+// This value should be rather too small than too big.
+# define DWARF2_LINE_MIN_INSN_LENGTH	1
+
+// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
+// this routine is a nop and will be optimized away.
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta)
+{
+  if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
+    return AddrDelta;
+  if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
+    // TODO: report this error, but really only once.
+    ;
+  }
+  return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH;
+}
+
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+  if (!MCOS->getContext().getDwarfLocSeen())
+    return;
+
+  // Create a symbol at in the current section for use in the line entry.
+  MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+  // Set the value of the symbol to use for the MCLineEntry.
+  MCOS->EmitLabel(LineSym);
+
+  // Get the current .loc info saved in the context.
+  const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
+
+  // Create a (local) line entry with the symbol and the current .loc info.
+  MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+  // clear DwarfLocSeen saying the current .loc info is now used.
+  MCOS->getContext().ClearDwarfLocSeen();
+
+  // Get the MCLineSection for this section, if one does not exist for this
+  // section create it.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  MCLineSection *LineSection = MCLineSections.lookup(Section);
+  if (!LineSection) {
+    // Create a new MCLineSection.  This will be deleted after the dwarf line
+    // table is created using it by iterating through the MCLineSections
+    // DenseMap.
+    LineSection = new MCLineSection;
+    // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+    MCOS->getContext().addMCLineSection(Section, LineSection);
+  }
+
+  // Add the line entry to this section's entries.
+  LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal .
+// 
+static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
+                                                  const MCSymbol &Start,
+                                                  const MCSymbol &End,
+                                                  int IntVal) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+  const MCExpr *RHS =
+    MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+  const MCExpr *Res1 =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+  const MCExpr *Res2 =
+    MCConstantExpr::Create(IntVal, MCOS.getContext());
+  const MCExpr *Res3 =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+  return Res3;
+}
+
+//
+// This emits the Dwarf line table for the specified section from the entries
+// in the LineSection.
+//
+static inline void EmitDwarfLineTable(MCStreamer *MCOS,
+                                      const MCSection *Section,
+                                      const MCLineSection *LineSection) {
+  unsigned FileNum = 1;
+  unsigned LastLine = 1;
+  unsigned Column = 0;
+  unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+  unsigned Isa = 0;
+  MCSymbol *LastLabel = NULL;
+
+  // Loop through each MCLineEntry and encode the dwarf line number table.
+  for (MCLineSection::const_iterator
+         it = LineSection->getMCLineEntries()->begin(),
+         ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) {
+
+    if (FileNum != it->getFileNum()) {
+      FileNum = it->getFileNum();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
+      MCOS->EmitULEB128IntValue(FileNum);
+    }
+    if (Column != it->getColumn()) {
+      Column = it->getColumn();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
+      MCOS->EmitULEB128IntValue(Column);
+    }
+    if (Isa != it->getIsa()) {
+      Isa = it->getIsa();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
+      MCOS->EmitULEB128IntValue(Isa);
+    }
+    if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
+      Flags = it->getFlags();
+      MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
+    }
+    if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
+    if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
+    if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+    int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
+    MCSymbol *Label = it->getLabel();
+
+    // At this point we want to emit/create the sequence to encode the delta in
+    // line numbers and the increment of the address from the previous Label
+    // and the current Label.
+    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+
+    LastLine = it->getLine();
+    LastLabel = Label;
+  }
+
+  // Emit a DW_LNE_end_sequence for the end of the section.
+  // Using the pointer Section create a temporary label at the end of the
+  // section and use that and the LastLabel to compute the address delta
+  // and use INT64_MAX as the line delta which is the signal that this is
+  // actually a DW_LNE_end_sequence.
+
+  // Switch to the section to be able to create a symbol at its end.
+  MCOS->SwitchSection(Section);
+
+  MCContext &context = MCOS->getContext();
+  // Create a symbol at the end of the section.
+  MCSymbol *SectionEnd = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the end of the section.
+  MCOS->EmitLabel(SectionEnd);
+
+  // Switch back the the dwarf line section.
+  MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd);
+}
+
+//
+// This emits the Dwarf file and the line tables.
+//
+void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  // Switch to the section where the table will be emitted into.
+  MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+  // Create a symbol at the beginning of this section.
+  MCSymbol *LineStartSym = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the start of the section.
+  MCOS->EmitLabel(LineStartSym);
+
+  // Create a symbol for the end of the section (to be set when we get there).
+  MCSymbol *LineEndSym = context.CreateTempSymbol();
+
+  // The first 4 bytes is the total length of the information for this
+  // compilation unit (not including these 4 bytes for the length).
+  MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
+                     4);
+
+  // Next 2 bytes is the Version, which is Dwarf 2.
+  MCOS->EmitIntValue(2, 2);
+
+  // Create a symbol for the end of the prologue (to be set when we get there).
+  MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+
+  // Length of the prologue, is the next 4 bytes.  Which is the start of the
+  // section to the end of the prologue.  Not including the 4 bytes for the
+  // total length, the 2 bytes for the version, and these 4 bytes for the
+  // length of the prologue.
+  MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
+                                        (4 + 2 + 4)),
+                  4, 0);
+
+  // Parameters of the state machine, are next.
+  MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
+
+  // Standard opcode lengths
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+  MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
+
+  // Put out the directory and file tables.
+
+  // First the directory table.
+  const std::vector<StringRef> &MCDwarfDirs =
+    context.getMCDwarfDirs();
+  for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+    MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+  }
+  MCOS->EmitIntValue(0, 1); // Terminate the directory list
+
+  // Second the file table.
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    MCOS->getContext().getMCDwarfFiles();
+  for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+    MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+    // the Directory num
+    MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
+    MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
+    MCOS->EmitIntValue(0, 1); // filesize (always 0)
+  }
+  MCOS->EmitIntValue(0, 1); // Terminate the file list
+
+  // This is the end of the prologue, so set the value of the symbol at the
+  // end of the prologue (that was used in a previous expression).
+  MCOS->EmitLabel(ProEndSym);
+
+  // Put out the line tables.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  const std::vector<const MCSection *> &MCLineSectionOrder =
+    MCOS->getContext().getMCLineSectionOrder();
+  for (std::vector<const MCSection*>::const_iterator it =
+	MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+       ++it) {
+    const MCSection *Sec = *it;
+    const MCLineSection *Line = MCLineSections.lookup(Sec);
+    EmitDwarfLineTable(MCOS, Sec, Line);
+
+    // Now delete the MCLineSections that were created in MCLineEntry::Make()
+    // and used to emit the line table.
+    delete Line;
+  }
+
+  if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
+      && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
+    // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
+    // it requires:  
+    // total_length >= prologue_length + 10
+    // We are 4 bytes short, since we have total_length = 51 and
+    // prologue_length = 45
+
+    // The regular end_sequence should be sufficient.
+    MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
+  }
+
+  // This is the end of the section, so set the value of the symbol at the end
+  // of this section (that was used in a previous expression).
+  MCOS->EmitLabel(LineEndSym);
+}
+
+/// Utility function to write the encoding to an object writer.
+void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta,
+                            uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+  OW->WriteBytes(OS.str());
+}
+
+/// Utility function to emit the encoding to a streamer.
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
+                           uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+  MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
+                             raw_ostream &OS) {
+  uint64_t Temp, Opcode;
+  bool NeedCopy = false;
+
+  // Scale the address delta by the minimum instruction length.
+  AddrDelta = ScaleAddrDelta(AddrDelta);
+
+  // A LineDelta of INT64_MAX is a signal that this is actually a
+  // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the 
+  // end_sequence to emit the matrix entry.
+  if (LineDelta == INT64_MAX) {
+    if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+      OS << char(dwarf::DW_LNS_const_add_pc);
+    else {
+      OS << char(dwarf::DW_LNS_advance_pc);
+      SmallString<32> Tmp;
+      raw_svector_ostream OSE(Tmp);
+      MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+      OS << OSE.str();
+    }
+    OS << char(dwarf::DW_LNS_extended_op);
+    OS << char(1);
+    OS << char(dwarf::DW_LNE_end_sequence);
+    return;
+  }
+
+  // Bias the line delta by the base.
+  Temp = LineDelta - DWARF2_LINE_BASE;
+
+  // If the line increment is out of range of a special opcode, we must encode
+  // it with DW_LNS_advance_line.
+  if (Temp >= DWARF2_LINE_RANGE) {
+    OS << char(dwarf::DW_LNS_advance_line);
+    SmallString<32> Tmp;
+    raw_svector_ostream OSE(Tmp);
+    MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
+    OS << OSE.str();
+
+    LineDelta = 0;
+    Temp = 0 - DWARF2_LINE_BASE;
+    NeedCopy = true;
+  }
+
+  // Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
+  if (LineDelta == 0 && AddrDelta == 0) {
+    OS << char(dwarf::DW_LNS_copy);
+    return;
+  }
+
+  // Bias the opcode by the special opcode base.
+  Temp += DWARF2_LINE_OPCODE_BASE;
+
+  // Avoid overflow when addr_delta is large.
+  if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+    // Try using a special opcode.
+    Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+    if (Opcode <= 255) {
+      OS << char(Opcode);
+      return;
+    }
+
+    // Try using DW_LNS_const_add_pc followed by special op.
+    Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+    if (Opcode <= 255) {
+      OS << char(dwarf::DW_LNS_const_add_pc);
+      OS << char(Opcode);
+      return;
+    }
+  }
+
+  // Otherwise use DW_LNS_advance_pc.
+  OS << char(dwarf::DW_LNS_advance_pc);
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+  OS << OSE.str();
+
+  if (NeedCopy)
+    OS << char(dwarf::DW_LNS_copy);
+  else
+    OS << char(Temp);
+}
+
 void MCDwarfFile::print(raw_ostream &OS) const {
   OS << '"' << getName() << '"';
 }
@@ -19,3 +428,387 @@ void MCDwarfFile::print(raw_ostream &OS) const {
 void MCDwarfFile::dump() const {
   print(dbgs());
 }
+
+static int getDataAlignmentFactor(MCStreamer &streamer) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  int size = asmInfo.getPointerSize();
+  if (asmInfo.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+    return size;
+ else
+   return -size;
+}
+
+static void EmitCFIInstruction(MCStreamer &Streamer,
+                               const MCCFIInstruction &Instr) {
+  int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+
+  switch (Instr.getOperation()) {
+  case MCCFIInstruction::Move: {
+    const MachineLocation &Dst = Instr.getDestination();
+    const MachineLocation &Src = Instr.getSource();
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
+      } else {
+        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+        Streamer.EmitULEB128IntValue(Src.getReg());
+      }
+
+      Streamer.EmitULEB128IntValue(-Src.getOffset(), 1);
+      return;
+    }
+
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+      Streamer.EmitULEB128IntValue(Dst.getReg());
+      return;
+    }
+
+    unsigned Reg = Src.getReg();
+    int Offset = Dst.getOffset() / dataAlignmentFactor;
+
+    if (Offset < 0) {
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+      Streamer.EmitULEB128IntValue(Reg);
+      Streamer.EmitSLEB128IntValue(Offset);
+    } else if (Reg < 64) {
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+      Streamer.EmitULEB128IntValue(Offset, 1);
+    } else {
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+      Streamer.EmitULEB128IntValue(Reg, 1);
+      Streamer.EmitULEB128IntValue(Offset, 1);
+    }
+    return;
+  }
+  case MCCFIInstruction::Remember:
+    Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
+    return;
+  case MCCFIInstruction::Restore:
+    Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
+    return;
+  }
+  llvm_unreachable("Unhandled case in switch");
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+static void EmitCFIInstructions(MCStreamer &streamer,
+                                const std::vector<MCCFIInstruction> &Instrs,
+                                MCSymbol *BaseLabel) {
+  for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
+    const MCCFIInstruction &Instr = Instrs[i];
+    MCSymbol *Label = Instr.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+    // Advance row if new location.
+    if (BaseLabel && Label) {
+      MCSymbol *ThisSym = Label;
+      if (ThisSym != BaseLabel) {
+        streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+        BaseLabel = ThisSym;
+      }
+    }
+
+    EmitCFIInstruction(streamer, Instr);
+  }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+                       unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  unsigned format = symbolEncoding & 0x0f;
+  unsigned application = symbolEncoding & 0x70;
+  unsigned size;
+  switch (format) {
+  default:
+    assert(0 && "Unknown Encoding");
+  case dwarf::DW_EH_PE_absptr:
+  case dwarf::DW_EH_PE_signed:
+    size = asmInfo.getPointerSize();
+    break;
+  case dwarf::DW_EH_PE_udata2:
+  case dwarf::DW_EH_PE_sdata2:
+    size = 2;
+    break;
+  case dwarf::DW_EH_PE_udata4:
+  case dwarf::DW_EH_PE_sdata4:
+    size = 4;
+    break;
+  case dwarf::DW_EH_PE_udata8:
+  case dwarf::DW_EH_PE_sdata8:
+    size = 8;
+    break;
+  }
+  switch (application) {
+  default:
+    assert(0 && "Unknown Encoding");
+    break;
+  case 0:
+    streamer.EmitSymbolValue(&symbol, size);
+    break;
+  case dwarf::DW_EH_PE_pcrel:
+    streamer.EmitPCRelSymbolValue(&symbol, size);
+    break;
+  }
+}
+
+static const MachineLocation TranslateMachineLocation(
+                                                  const TargetAsmInfo &AsmInfo,
+                                                  const MachineLocation &Loc) {
+  unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+    MachineLocation::VirtualFP :
+    unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
+  const MachineLocation &NewLoc = Loc.isReg() ?
+    MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+  return NewLoc;
+}
+
+static const MCSymbol &EmitCIE(MCStreamer &streamer,
+                               const MCSymbol *personality,
+                               unsigned personalityEncoding,
+                               const MCSymbol *lsda,
+                               unsigned lsdaEncoding) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const MCSection &section = *asmInfo.getEHFrameSection();
+  streamer.SwitchSection(&section);
+  MCSymbol *sectionStart = streamer.getContext().CreateTempSymbol();
+  MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol();
+
+  // Length
+  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
+                                               *sectionEnd, 4);
+  streamer.EmitLabel(sectionStart);
+  streamer.EmitValue(Length, 4);
+
+  // CIE ID
+  streamer.EmitIntValue(0, 4);
+
+  // Version
+  streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+
+  // Augmentation String
+  SmallString<8> Augmentation;
+  Augmentation += "z";
+  if (personality)
+    Augmentation += "P";
+  if (lsda)
+    Augmentation += "L";
+  Augmentation += "R";
+  streamer.EmitBytes(Augmentation.str(), 0);
+  streamer.EmitIntValue(0, 1);
+
+  // Code Alignment Factor
+  streamer.EmitULEB128IntValue(1);
+
+  // Data Alignment Factor
+  streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+
+  // Return Address Register
+  streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true));
+
+  // Augmentation Data Length (optional)
+  MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
+  MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
+  const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
+                                                           *augmentationStart,
+                                                           *augmentationEnd, 0);
+  streamer.EmitULEB128Value(augmentationLength);
+
+  // Augmentation Data (optional)
+  streamer.EmitLabel(augmentationStart);
+  if (personality) {
+    // Personality Encoding
+    streamer.EmitIntValue(personalityEncoding, 1);
+    // Personality
+    EmitSymbol(streamer, *personality, personalityEncoding);
+  }
+  if (lsda) {
+    // LSDA Encoding
+    streamer.EmitIntValue(lsdaEncoding, 1);
+  }
+  // Encoding of the FDE pointers
+  streamer.EmitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
+  streamer.EmitLabel(augmentationEnd);
+
+  // Initial Instructions
+
+  const std::vector<MachineMove> Moves = asmInfo.getInitialFrameState();
+  std::vector<MCCFIInstruction> Instructions;
+
+  for (int i = 0, n = Moves.size(); i != n; ++i) {
+    MCSymbol *Label = Moves[i].getLabel();
+    const MachineLocation &Dst =
+      TranslateMachineLocation(asmInfo, Moves[i].getDestination());
+    const MachineLocation &Src =
+      TranslateMachineLocation(asmInfo, Moves[i].getSource());
+    MCCFIInstruction Inst(Label, Dst, Src);
+    Instructions.push_back(Inst);
+  }
+
+  EmitCFIInstructions(streamer, Instructions, NULL);
+
+  // Padding
+  streamer.EmitValueToAlignment(4);
+
+  streamer.EmitLabel(sectionEnd);
+  return *sectionStart;
+}
+
+static MCSymbol *EmitFDE(MCStreamer &streamer,
+                         const MCSymbol &cieStart,
+                         const MCDwarfFrameInfo &frame) {
+  MCContext &context = streamer.getContext();
+  MCSymbol *fdeStart = context.CreateTempSymbol();
+  MCSymbol *fdeEnd = context.CreateTempSymbol();
+
+  // Length
+  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+  streamer.EmitValue(Length, 4);
+
+  streamer.EmitLabel(fdeStart);
+  // CIE Pointer
+  const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
+                                               0);
+  streamer.EmitValue(offset, 4);
+
+  // PC Begin
+  streamer.EmitPCRelSymbolValue(frame.Begin, 4);
+
+  // PC Range
+  const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
+                                              *frame.End, 0);
+  streamer.EmitValue(Range, 4);
+
+  // Augmentation Data Length
+  MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
+  MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
+  const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
+                                                           *augmentationStart,
+                                                           *augmentationEnd, 0);
+  streamer.EmitULEB128Value(augmentationLength);
+
+  // Augmentation Data
+  streamer.EmitLabel(augmentationStart);
+  if (frame.Lsda)
+    EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding);
+  streamer.EmitLabel(augmentationEnd);
+  // Call Frame Instructions
+
+  EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+
+  // Padding
+  streamer.EmitValueToAlignment(4);
+
+  return fdeEnd;
+}
+
+namespace {
+  struct CIEKey {
+    static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
+    static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+
+    CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
+           unsigned LsdaEncoding_) : Personality(Personality_),
+                                     PersonalityEncoding(PersonalityEncoding_),
+                                     LsdaEncoding(LsdaEncoding_) {
+    }
+    const MCSymbol* Personality;
+    unsigned PersonalityEncoding;
+    unsigned LsdaEncoding;
+  };
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<CIEKey> {
+    static CIEKey getEmptyKey() {
+      return CIEKey::getEmptyKey();
+    }
+    static CIEKey getTombstoneKey() {
+      return CIEKey::getTombstoneKey();
+    }
+    static unsigned getHashValue(const CIEKey &Key) {
+      FoldingSetNodeID ID;
+      ID.AddPointer(Key.Personality);
+      ID.AddInteger(Key.PersonalityEncoding);
+      ID.AddInteger(Key.LsdaEncoding);
+      return ID.ComputeHash();
+    }
+    static bool isEqual(const CIEKey &LHS,
+                        const CIEKey &RHS) {
+      return LHS.Personality == RHS.Personality &&
+        LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+        LHS.LsdaEncoding == RHS.LsdaEncoding;
+    }
+  };
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) {
+  const MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  MCSymbol *fdeEnd = NULL;
+  DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+
+  for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
+    CIEKey key(frame.Personality, frame.PersonalityEncoding,
+               frame.LsdaEncoding);
+    const MCSymbol *&cieStart = CIEStarts[key];
+    if (!cieStart)
+      cieStart = &EmitCIE(streamer, frame.Personality,
+                          frame.PersonalityEncoding, frame.Lsda,
+                          frame.LsdaEncoding);
+    fdeEnd = EmitFDE(streamer, *cieStart, frame);
+    if (i != n - 1)
+      streamer.EmitLabel(fdeEnd);
+  }
+
+  streamer.EmitValueToAlignment(asmInfo.getPointerSize());
+  if (fdeEnd)
+    streamer.EmitLabel(fdeEnd);
+}
+
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+                                         uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+  Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
+                                           raw_ostream &OS) {
+  // FIXME: Assumes the code alignment factor is 1.
+  if (AddrDelta == 0) {
+  } else if (isUIntN(6, AddrDelta)) {
+    uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
+    OS << Opcode;
+  } else if (isUInt<8>(AddrDelta)) {
+    OS << uint8_t(dwarf::DW_CFA_advance_loc1);
+    OS << uint8_t(AddrDelta);
+  } else if (isUInt<16>(AddrDelta)) {
+    // FIXME: check what is the correct behavior on a big endian machine.
+    OS << uint8_t(dwarf::DW_CFA_advance_loc2);
+    OS << uint8_t( AddrDelta       & 0xff);
+    OS << uint8_t((AddrDelta >> 8) & 0xff);
+  } else {
+    // FIXME: check what is the correct behavior on a big endian machine.
+    assert(isUInt<32>(AddrDelta));
+    OS << uint8_t(dwarf::DW_CFA_advance_loc4);
+    OS << uint8_t( AddrDelta        & 0xff);
+    OS << uint8_t((AddrDelta >> 8)  & 0xff);
+    OS << uint8_t((AddrDelta >> 16) & 0xff);
+    OS << uint8_t((AddrDelta >> 24) & 0xff);
+
+  }
+}
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
new file mode 100644
index 000000000000..12a02a9e9740
--- /dev/null
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -0,0 +1,23 @@
+//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
+                                                 Triple::OSType OSType_,
+                                                 uint16_t EMachine_,
+                                                 bool HasRelocationAddend_)
+  : OSType(OSType_), EMachine(EMachine_),
+    HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
+}
+
+MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 570c3917ab46..e49074da3994 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCStreamer.h"
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -23,19 +24,51 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
 namespace {
 
+static void SetBinding(MCSymbolData &SD, unsigned Binding) {
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+static unsigned GetBinding(const MCSymbolData &SD) {
+  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  return Binding;
+}
+
+static void SetType(MCSymbolData &SD, unsigned Type) {
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+  SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+static void SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift);
+  SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
 class MCELFStreamer : public MCObjectStreamer {
-  void EmitInstToFragment(const MCInst &Inst);
-  void EmitInstToData(const MCInst &Inst);
 public:
   MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
                   raw_ostream &OS, MCCodeEmitter *Emitter)
@@ -46,9 +79,13 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
   virtual void EmitLabel(MCSymbol *Symbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
     assert(0 && "ELF doesn't support this directive");
@@ -76,9 +113,8 @@ public:
      SD.setSize(Value);
   }
 
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
-    assert(0 && "ELF doesn't support this directive");
-  }
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0) {
     assert(0 && "ELF doesn't support this directive");
@@ -88,49 +124,84 @@ public:
     assert(0 && "ELF doesn't support this directive");
   }
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
-  virtual void EmitGPRel32Value(const MCExpr *Value) {
-    assert(0 && "ELF doesn't support this directive");
-  }
   virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                     unsigned ValueSize = 1,
                                     unsigned MaxBytesToEmit = 0);
   virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                  unsigned MaxBytesToEmit = 0);
-  virtual void EmitValueToOffset(const MCExpr *Offset,
-                                 unsigned char Value = 0);
 
   virtual void EmitFileDirective(StringRef Filename);
-  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
-    DEBUG(dbgs() << "FIXME: MCELFStreamer:EmitDwarfFileDirective not implemented\n");
-  }
 
-  virtual void EmitInstruction(const MCInst &Inst);
   virtual void Finish();
 
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
   /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
 };
 
 } // end anonymous namespace.
 
+void MCELFStreamer::InitSections() {
+  // This emulates the same behavior of GNU as. This makes it easier
+  // to compare the output as the major sections are in the same order.
+  SetSectionText();
+  SetSectionData();
+  SetSectionBss();
+  SetSectionText();
+}
+
 void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
-  // FIXME: This is wasteful, we don't necessarily need to create a data
-  // fragment. Instead, we should mark the symbol as pointing into the data
-  // fragment if it exists, otherwise we should just queue the label and set its
-  // fragment pointer when we emit the next fragment.
-  MCDataFragment *F = getOrCreateDataFragment();
-  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
-  SD.setFragment(F);
-  SD.setOffset(F->getContents().size());
+  MCObjectStreamer::EmitLabel(Symbol);
 
-  Symbol->setSection(*CurSection);
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(Symbol->getSection());
+  MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+  if (Section.getFlags() & ELF::SHF_TLS)
+    SetType(SD, ELF::STT_TLS);
 }
 
 void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   switch (Flag) {
+  case MCAF_SyntaxUnified: return; // no-op here.
+  case MCAF_Code16: return; // no-op here.
+  case MCAF_Code32: return; // no-op here.
   case MCAF_SubsectionsViaSymbols:
     getAssembler().setSubsectionsViaSymbols(true);
     return;
@@ -139,6 +210,10 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   assert(0 && "invalid assembler flag!");
 }
 
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  // FIXME: Anything needed here to flag the function as thumb?
+}
+
 void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
   // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
   // MCObjectStreamer.
@@ -147,6 +222,21 @@ void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
   Symbol->setVariableValue(AddValueSymbols(Value));
 }
 
+void MCELFStreamer::ChangeSection(const MCSection *Section) {
+  const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+  if (Grp)
+    getAssembler().getOrCreateSymbolData(*Grp);
+  this->MCObjectStreamer::ChangeSection(Section);
+}
+
+void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  MCSymbolData &AliasSD = getAssembler().getOrCreateSymbolData(*Alias);
+  AliasSD.setFlags(AliasSD.getFlags() | ELF_Other_Weakref);
+  const MCExpr *Value = MCSymbolRefExpr::Create(Symbol, getContext());
+  Alias->setVariableValue(Value);
+}
+
 void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
   // Indirect symbols are handled differently, to match how 'as' handles
@@ -176,6 +266,7 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_LazyReference:
   case MCSA_Reference:
   case MCSA_NoDeadStrip:
+  case MCSA_SymbolResolver:
   case MCSA_PrivateExtern:
   case MCSA_WeakDefinition:
   case MCSA_WeakDefAutoPrivate:
@@ -185,50 +276,59 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     assert(0 && "Invalid symbol attribute for ELF!");
     break;
 
+  case MCSA_ELF_TypeGnuUniqueObject:
+    // Ignore for now.
+    break;
+
   case MCSA_Global:
-    SD.setFlags(SD.getFlags() | ELF_STB_Global);
+    SetBinding(SD, ELF::STB_GLOBAL);
     SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
     break;
 
   case MCSA_WeakReference:
   case MCSA_Weak:
-    SD.setFlags(SD.getFlags() | ELF_STB_Weak);
+    SetBinding(SD, ELF::STB_WEAK);
+    SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
     break;
 
   case MCSA_Local:
-    SD.setFlags(SD.getFlags() | ELF_STB_Local);
+    SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    BindingExplicitlySet.insert(Symbol);
     break;
 
   case MCSA_ELF_TypeFunction:
-    SD.setFlags(SD.getFlags() | ELF_STT_Func);
+    SetType(SD, ELF::STT_FUNC);
     break;
 
   case MCSA_ELF_TypeObject:
-    SD.setFlags(SD.getFlags() | ELF_STT_Object);
+    SetType(SD, ELF::STT_OBJECT);
     break;
 
   case MCSA_ELF_TypeTLS:
-    SD.setFlags(SD.getFlags() | ELF_STT_Tls);
+    SetType(SD, ELF::STT_TLS);
     break;
 
   case MCSA_ELF_TypeCommon:
-    SD.setFlags(SD.getFlags() | ELF_STT_Common);
+    SetType(SD, ELF::STT_COMMON);
     break;
 
   case MCSA_ELF_TypeNoType:
-    SD.setFlags(SD.getFlags() | ELF_STT_Notype);
+    SetType(SD, ELF::STT_NOTYPE);
     break;
 
   case MCSA_Protected:
-    SD.setFlags(SD.getFlags() | ELF_STV_Protected);
+    SetVisibility(SD, ELF::STV_PROTECTED);
     break;
 
   case MCSA_Hidden:
-    SD.setFlags(SD.getFlags() | ELF_STV_Hidden);
+    SetVisibility(SD, ELF::STV_HIDDEN);
     break;
 
   case MCSA_Internal:
-    SD.setFlags(SD.getFlags() | ELF_STV_Internal);
+    SetVisibility(SD, ELF::STV_INTERNAL);
     break;
   }
 }
@@ -237,24 +337,38 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                        unsigned ByteAlignment) {
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
 
-  if ((SD.getFlags() & (0xf << ELF_STB_Shift)) == ELF_STB_Local) {
+  if (!BindingExplicitlySet.count(Symbol)) {
+    SetBinding(SD, ELF::STB_GLOBAL);
+    SD.setExternal(true);
+  }
+
+  SetType(SD, ELF::STT_OBJECT);
+
+  if (GetBinding(SD) == ELF_STB_Local) {
     const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
-                                                                    MCSectionELF::SHT_NOBITS,
-                                                                    MCSectionELF::SHF_WRITE |
-                                                                    MCSectionELF::SHF_ALLOC,
+                                                                    ELF::SHT_NOBITS,
+                                                                    ELF::SHF_WRITE |
+                                                                    ELF::SHF_ALLOC,
                                                                     SectionKind::getBSS());
-
-    MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
-    MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
-    SD.setFragment(F);
     Symbol->setSection(*Section);
-    SD.setSize(MCConstantExpr::Create(Size, getContext()));
+
+    struct LocalCommon L = {&SD, Size, ByteAlignment};
+    LocalCommons.push_back(L);
+  } else {
+    SD.setCommon(Size, ByteAlignment);
   }
 
-  SD.setFlags(SD.getFlags() | ELF_STB_Global);
-  SD.setExternal(true);
+  SD.setSize(MCConstantExpr::Create(Size, getContext()));
+}
 
-  SD.setCommon(Size, ByteAlignment);
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  // FIXME: Should this be caught and done earlier?
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SetBinding(SD, ELF::STB_LOCAL);
+  SD.setExternal(false);
+  BindingExplicitlySet.insert(Symbol);
+  // FIXME: ByteAlignment is not needed here, but is required.
+  EmitCommonSymbol(Symbol, Size, 1);
 }
 
 void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -263,25 +377,6 @@ void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
   getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
 }
 
-void MCELFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
-                                unsigned AddrSpace) {
-  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
-  // MCObjectStreamer.
-  MCDataFragment *DF = getOrCreateDataFragment();
-
-  // Avoid fixups when possible.
-  int64_t AbsValue;
-  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
-    // FIXME: Endianness assumption.
-    for (unsigned i = 0; i != Size; ++i)
-      DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
-  } else {
-    DF->addFixup(MCFixup::Create(DF->getContents().size(), AddValueSymbols(Value),
-                                 MCFixup::getKindForSize(Size)));
-    DF->getContents().resize(DF->getContents().size() + Size, 0);
-  }
-}
-
 void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                            int64_t Value, unsigned ValueSize,
                                            unsigned MaxBytesToEmit) {
@@ -312,18 +407,11 @@ void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
     getCurrentSectionData()->setAlignment(ByteAlignment);
 }
 
-void MCELFStreamer::EmitValueToOffset(const MCExpr *Offset,
-                                        unsigned char Value) {
-  // TODO: This is exactly the same as MCMachOStreamer. Consider merging into
-  // MCObjectStreamer.
-  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
-}
-
 // Add a symbol for the file name of this module. This is the second
 // entry in the module's symbol table (the first being the null symbol).
 void MCELFStreamer::EmitFileDirective(StringRef Filename) {
   MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
-  Symbol->setSection(*CurSection);
+  Symbol->setSection(*getCurrentSection());
   Symbol->setAbsolute();
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -331,21 +419,52 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) {
   SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
 }
 
-void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+  switch (expr->getKind()) {
+  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Constant:
+    break;
 
-  // Add the fixups and data.
-  //
-  // FIXME: Revisit this design decision when relaxation is done, we may be
-  // able to get away with not storing any extra data in the MCInst.
-  SmallVector<MCFixup, 4> Fixups;
-  SmallString<256> Code;
-  raw_svector_ostream VecOS(Code);
-  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
-  VecOS.flush();
+  case MCExpr::Binary: {
+    const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+    fixSymbolsInTLSFixups(be->getLHS());
+    fixSymbolsInTLSFixups(be->getRHS());
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+    switch (symRef.getKind()) {
+    default:
+      return;
+    case MCSymbolRefExpr::VK_NTPOFF:
+    case MCSymbolRefExpr::VK_GOTNTPOFF:
+    case MCSymbolRefExpr::VK_TLSGD:
+    case MCSymbolRefExpr::VK_TLSLDM:
+    case MCSymbolRefExpr::VK_TPOFF:
+    case MCSymbolRefExpr::VK_DTPOFF:
+    case MCSymbolRefExpr::VK_GOTTPOFF:
+    case MCSymbolRefExpr::VK_TLSLD:
+    case MCSymbolRefExpr::VK_ARM_TLSGD:
+      break;
+    }
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+    SetType(SD, ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+    break;
+  }
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+  this->MCObjectStreamer::EmitInstToFragment(Inst);
+  MCInstFragment &F = *cast<MCInstFragment>(getCurrentFragment());
 
-  IF->getCode() = Code;
-  IF->getFixups() = Fixups;
+  for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
+    fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
 }
 
 void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
@@ -357,6 +476,9 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
   getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
   VecOS.flush();
 
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+    fixSymbolsInTLSFixups(Fixups[i].getValue());
+
   // Add the fixups and data.
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
@@ -365,44 +487,40 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
   DF->getContents().append(Code.begin(), Code.end());
 }
 
-void MCELFStreamer::EmitInstruction(const MCInst &Inst) {
-  // Scan for values.
-  for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
-    if (Inst.getOperand(i).isExpr())
-      AddValueSymbols(Inst.getOperand(i).getExpr());
+void MCELFStreamer::Finish() {
+  if (getNumFrameInfos())
+    MCDwarfFrameEmitter::Emit(*this);
 
-  getCurrentSectionData()->setHasInstructions(true);
+  for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
+                                                e = LocalCommons.end();
+       i != e; ++i) {
+    MCSymbolData *SD = i->SD;
+    uint64_t Size = i->Size;
+    unsigned ByteAlignment = i->ByteAlignment;
+    const MCSymbol &Symbol = SD->getSymbol();
+    const MCSection &Section = Symbol.getSection();
 
-  // If this instruction doesn't need relaxation, just emit it as data.
-  if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
-    EmitInstToData(Inst);
-    return;
-  }
+    MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
+    new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
 
-  // Otherwise, if we are relaxing everything, relax the instruction as much as
-  // possible and emit it as data.
-  if (getAssembler().getRelaxAll()) {
-    MCInst Relaxed;
-    getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
-    while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
-      getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
-    EmitInstToData(Relaxed);
-    return;
-  }
+    MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+    SD->setFragment(F);
 
-  // Otherwise emit to a separate fragment.
-  EmitInstToFragment(Inst);
-}
+    // Update the maximum alignment of the section if necessary.
+    if (ByteAlignment > SectData.getAlignment())
+      SectData.setAlignment(ByteAlignment);
+  }
 
-void MCELFStreamer::Finish() {
-  getAssembler().Finish();
+  this->MCObjectStreamer::Finish();
 }
 
 MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
-                                      raw_ostream &OS, MCCodeEmitter *CE,
-                                      bool RelaxAll) {
+                                    raw_ostream &OS, MCCodeEmitter *CE,
+                                    bool RelaxAll, bool NoExecStack) {
   MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
+  if (NoExecStack)
+    S->getAssembler().setNoExecStack(true);
   return S;
 }
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 343f33475235..54d3743e68e4 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -38,21 +38,31 @@ void MCExpr::print(raw_ostream &OS) const {
   case MCExpr::SymbolRef: {
     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
     const MCSymbol &Sym = SRE.getSymbol();
+    // Parenthesize names that start with $ so that they don't look like
+    // absolute names.
+    bool UseParens = Sym.getName()[0] == '$';
 
-    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 ||
-        SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16)
+    if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_HA16 ||
+        SRE.getKind() == MCSymbolRefExpr::VK_PPC_LO16) {
       OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+      UseParens = true;
+    }
 
-    // Parenthesize names that start with $ so that they don't look like
-    // absolute names.
-    if (Sym.getName()[0] == '$')
+    if (UseParens)
       OS << '(' << Sym << ')';
     else
       OS << Sym;
 
-    if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
-        SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 &&
-        SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16)
+    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+      OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+    else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+             SRE.getKind() != MCSymbolRefExpr::VK_PPC_HA16 &&
+             SRE.getKind() != MCSymbolRefExpr::VK_PPC_LO16)
       OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
 
     return;
@@ -172,12 +182,23 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_GOTTPOFF: return "GOTTPOFF";
   case VK_INDNTPOFF: return "INDNTPOFF";
   case VK_NTPOFF: return "NTPOFF";
+  case VK_GOTNTPOFF: return "GOTNTPOFF";
   case VK_PLT: return "PLT";
   case VK_TLSGD: return "TLSGD";
+  case VK_TLSLD: return "TLSLD";
+  case VK_TLSLDM: return "TLSLDM";
   case VK_TPOFF: return "TPOFF";
-  case VK_ARM_HI16: return ":upper16:";
-  case VK_ARM_LO16: return ":lower16:";
+  case VK_DTPOFF: return "DTPOFF";
   case VK_TLVP: return "TLVP";
+  case VK_ARM_PLT: return "(PLT)";
+  case VK_ARM_GOT: return "(GOT)";
+  case VK_ARM_GOTOFF: return "(GOTOFF)";
+  case VK_ARM_TPOFF: return "(tpoff)";
+  case VK_ARM_GOTTPOFF: return "(gottpoff)";
+  case VK_ARM_TLSGD: return "(tlsgd)";
+  case VK_PPC_TOC: return "toc";
+  case VK_PPC_HA16: return "ha16";
+  case VK_PPC_LO16: return "lo16";
   }
 }
 
@@ -185,15 +206,33 @@ MCSymbolRefExpr::VariantKind
 MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
   return StringSwitch<VariantKind>(Name)
     .Case("GOT", VK_GOT)
+    .Case("got", VK_GOT)
     .Case("GOTOFF", VK_GOTOFF)
+    .Case("gotoff", VK_GOTOFF)
     .Case("GOTPCREL", VK_GOTPCREL)
+    .Case("gotpcrel", VK_GOTPCREL)
     .Case("GOTTPOFF", VK_GOTTPOFF)
+    .Case("gottpoff", VK_GOTTPOFF)
     .Case("INDNTPOFF", VK_INDNTPOFF)
+    .Case("indntpoff", VK_INDNTPOFF)
     .Case("NTPOFF", VK_NTPOFF)
+    .Case("ntpoff", VK_NTPOFF)
+    .Case("GOTNTPOFF", VK_GOTNTPOFF)
+    .Case("gotntpoff", VK_GOTNTPOFF)
     .Case("PLT", VK_PLT)
+    .Case("plt", VK_PLT)
     .Case("TLSGD", VK_TLSGD)
+    .Case("tlsgd", VK_TLSGD)
+    .Case("TLSLD", VK_TLSLD)
+    .Case("tlsld", VK_TLSLD)
+    .Case("TLSLDM", VK_TLSLDM)
+    .Case("tlsldm", VK_TLSLDM)
     .Case("TPOFF", VK_TPOFF)
+    .Case("tpoff", VK_TPOFF)
+    .Case("DTPOFF", VK_DTPOFF)
+    .Case("dtpoff", VK_DTPOFF)
     .Case("TLVP", VK_TLVP)
+    .Case("tlvp", VK_TLVP)
     .Default(VK_Invalid);
 }
 
@@ -203,7 +242,28 @@ void MCTargetExpr::Anchor() {}
 
 /* *** */
 
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+  return EvaluateAsAbsolute(Res, 0, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+                                const MCAsmLayout &Layout) const {
+  return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+                                const MCAsmLayout &Layout,
+                                const SectionAddrMap &Addrs) const {
+  return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+  return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+                                const MCAsmLayout *Layout,
+                                const SectionAddrMap *Addrs) const {
   MCValue Value;
 
   // Fast path constants.
@@ -212,37 +272,159 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
     return true;
   }
 
-  if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute())
-    return false;
+  // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+  // absolutize differences across sections and that is what the MachO writer
+  // uses Addrs for.
+  bool IsRelocatable =
+    EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
 
+  // Record the current value.
   Res = Value.getConstant();
-  return true;
+
+  return IsRelocatable && Value.isAbsolute();
+}
+
+/// \brief Helper method for \see EvaluateSymbolAdd().
+static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+                                                const MCAsmLayout *Layout,
+                                                const SectionAddrMap *Addrs,
+                                                bool InSet,
+                                                const MCSymbolRefExpr *&A,
+                                                const MCSymbolRefExpr *&B,
+                                                int64_t &Addend) {
+  if (!A || !B)
+    return;
+
+  const MCSymbol &SA = A->getSymbol();
+  const MCSymbol &SB = B->getSymbol();
+
+  if (SA.isUndefined() || SB.isUndefined())
+    return;
+
+  if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+    return;
+
+  MCSymbolData &AD = Asm->getSymbolData(SA);
+  MCSymbolData &BD = Asm->getSymbolData(SB);
+
+  if (AD.getFragment() == BD.getFragment()) {
+    Addend += (AD.getOffset() - BD.getOffset());
+
+    // Clear the symbol expr pointers to indicate we have folded these
+    // operands.
+    A = B = 0;
+    return;
+  }
+
+  if (!Layout)
+    return;
+
+  const MCSectionData &SecA = *AD.getFragment()->getParent();
+  const MCSectionData &SecB = *BD.getFragment()->getParent();
+
+  if ((&SecA != &SecB) && !Addrs)
+    return;
+
+  // Eagerly evaluate.
+  Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
+             Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+  if (Addrs && (&SecA != &SecB))
+    Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+  // Clear the symbol expr pointers to indicate we have folded these
+  // operands.
+  A = B = 0;
 }
 
-static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+/// \brief Evaluate the result of an add between (conceptually) two MCValues.
+///
+/// This routine conceptually attempts to construct an MCValue:
+///   Result = (Result_A - Result_B + Result_Cst)
+/// from two MCValue's LHS and RHS where
+///   Result = LHS + RHS
+/// and
+///   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+///
+/// This routine attempts to aggresively fold the operands such that the result
+/// is representable in an MCValue, but may not always succeed.
+///
+/// \returns True on success, false if the result is not representable in an
+/// MCValue.
+
+/// NOTE: It is really important to have both the Asm and Layout arguments.
+/// They might look redundant, but this function can be used before layout
+/// is done (see the object streamer for example) and having the Asm argument
+/// lets us avoid relaxations early.
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+                                const MCAsmLayout *Layout,
+                                const SectionAddrMap *Addrs,
+                                bool InSet,
+                                const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
                                 const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
                                 MCValue &Res) {
-  // We can't add or subtract two symbols.
-  if ((LHS.getSymA() && RHS_A) ||
-      (LHS.getSymB() && RHS_B))
+  // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
+  // about dealing with modifiers. This will ultimately bite us, one day.
+  const MCSymbolRefExpr *LHS_A = LHS.getSymA();
+  const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+  int64_t LHS_Cst = LHS.getConstant();
+
+  // Fold the result constant immediately.
+  int64_t Result_Cst = LHS_Cst + RHS_Cst;
+
+  assert((!Layout || Asm) &&
+         "Must have an assembler object if layout is given!");
+
+  // If we have a layout, we can fold resolved differences.
+  if (Asm) {
+    // First, fold out any differences which are fully resolved. By
+    // reassociating terms in
+    //   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+    // we have the four possible differences:
+    //   (LHS_A - LHS_B),
+    //   (LHS_A - RHS_B),
+    //   (RHS_A - LHS_B),
+    //   (RHS_A - RHS_B).
+    // Since we are attempting to be as aggresive as possible about folding, we
+    // attempt to evaluate each possible alternative.
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
+                                        Result_Cst);
+  }
+
+  // We can't represent the addition or subtraction of two symbols.
+  if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
     return false;
 
-  const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
-  const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
-  if (B) {
-    // If we have a negated symbol, then we must have also have a non-negated
-    // symbol in order to encode the expression. We can do this check later to
-    // permit expressions which eventually fold to a representable form -- such
-    // as (a + (0 - b)) -- if necessary.
-    if (!A)
-      return false;
-  }
-  Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst);
+  // At this point, we have at most one additive symbol and one subtractive
+  // symbol -- find them.
+  const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
+  const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
+
+  // If we have a negated symbol, then we must have also have a non-negated
+  // symbol in order to encode the expression.
+  if (B && !A)
+    return false;
+
+  Res = MCValue::get(A, B, Result_Cst);
   return true;
 }
 
 bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
-                                   const MCAsmLayout *Layout) const {
+                                   const MCAsmLayout &Layout) const {
+  return EvaluateAsRelocatableImpl(Res, &Layout.getAssembler(), &Layout,
+                                   0, false);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                       const MCAssembler *Asm,
+                                       const MCAsmLayout *Layout,
+                                       const SectionAddrMap *Addrs,
+                                       bool InSet) const {
   ++stats::MCExprEvaluate;
 
   switch (getKind()) {
@@ -258,26 +440,15 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
     const MCSymbol &Sym = SRE->getSymbol();
 
     // Evaluate recursively if this is a variable.
-    if (Sym.isVariable()) {
-      if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout))
-        return false;
-
-      // Absolutize symbol differences between defined symbols when we have a
-      // layout object and the target requests it.
-      if (Layout && Res.getSymB() &&
-          Layout->getAssembler().getBackend().hasAbsolutizedSet() &&
-          Res.getSymA()->getSymbol().isDefined() &&
-          Res.getSymB()->getSymbol().isDefined()) {
-        MCSymbolData &A =
-          Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol());
-        MCSymbolData &B =
-          Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol());
-        Res = MCValue::get(+ Layout->getSymbolAddress(&A)
-                           - Layout->getSymbolAddress(&B)
-                           + Res.getConstant());
-      }
-
-      return true;
+    if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+      bool Ret = Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm,
+                                                                   Layout,
+                                                                   Addrs,
+                                                                   true);
+      // If we failed to simplify this to a constant, let the target
+      // handle it.
+      if (Ret && !Res.getSymA() && !Res.getSymB())
+        return true;
     }
 
     Res = MCValue::get(SRE, 0, 0);
@@ -288,7 +459,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
     const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
     MCValue Value;
 
-    if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+    if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+                                                      Addrs, InSet))
       return false;
 
     switch (AUE->getOpcode()) {
@@ -321,8 +493,10 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
     const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
     MCValue LHSValue, RHSValue;
 
-    if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) ||
-        !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout))
+    if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+                                                  Addrs, InSet) ||
+        !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+                                                  Addrs, InSet))
       return false;
 
     // We only support a few operations on non-constant expressions, handle
@@ -333,13 +507,13 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
         return false;
       case MCBinaryExpr::Sub:
         // Negate RHS and add.
-        return EvaluateSymbolicAdd(LHSValue,
+        return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
                                    RHSValue.getSymB(), RHSValue.getSymA(),
                                    -RHSValue.getConstant(),
                                    Res);
 
       case MCBinaryExpr::Add:
-        return EvaluateSymbolicAdd(LHSValue,
+        return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
                                    RHSValue.getSymA(), RHSValue.getSymB(),
                                    RHSValue.getConstant(),
                                    Res);
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
index b96040abd0c1..012c7f62f8af 100644
--- a/lib/MC/MCLoggingStreamer.cpp
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -48,10 +48,14 @@ public:
     return Child->AddBlankLine();
   }
 
-  virtual void SwitchSection(const MCSection *Section) {
-    CurSection = Section;
-    LogCall("SwitchSection");
-    return Child->SwitchSection(Section);
+  virtual void ChangeSection(const MCSection *Section) {
+    LogCall("ChangeSection");
+    return Child->ChangeSection(Section);
+  }
+
+  virtual void InitSections() {
+    LogCall("InitSections");
+    return Child->InitSections();
   }
 
   virtual void EmitLabel(MCSymbol *Symbol) {
@@ -64,11 +68,28 @@ public:
     return Child->EmitAssemblerFlag(Flag);
   }
 
+  virtual void EmitThumbFunc(MCSymbol *Func) {
+    LogCall("EmitThumbFunc");
+    return Child->EmitThumbFunc(Func);
+  }
+
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
     LogCall("EmitAssignment");
     return Child->EmitAssignment(Symbol, Value);
   }
 
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+    LogCall("EmitWeakReference");
+    return Child->EmitWeakReference(Alias, Symbol);
+  }
+
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label) {
+    LogCall("EmitDwarfAdvanceLineAddr");
+    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+  }
+
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
     LogCall("EmitSymbolAttribute");
     return Child->EmitSymbolAttribute(Symbol, Attribute);
@@ -132,14 +153,22 @@ public:
     return Child->EmitBytes(Data, AddrSpace);
   }
 
-  virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace){
     LogCall("EmitValue");
-    return Child->EmitValue(Value, Size, AddrSpace);
+    return Child->EmitValueImpl(Value, Size, isPCRel, AddrSpace);
+  }
+
+  virtual void EmitULEB128Value(const MCExpr *Value,
+                                unsigned AddrSpace = 0) {
+    LogCall("EmitULEB128Value");
+    return Child->EmitULEB128Value(Value, AddrSpace);
   }
 
-  virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) {
-    LogCall("EmitIntValue");
-    return Child->EmitIntValue(Value, Size, AddrSpace);
+  virtual void EmitSLEB128Value(const MCExpr *Value,
+                                unsigned AddrSpace = 0) {
+    LogCall("EmitSLEB128Value");
+    return Child->EmitSLEB128Value(Value, AddrSpace);
   }
 
   virtual void EmitGPRel32Value(const MCExpr *Value) {
@@ -178,12 +207,23 @@ public:
     return Child->EmitFileDirective(Filename);
   }
 
-  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
     LogCall("EmitDwarfFileDirective",
             "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
     return Child->EmitDwarfFileDirective(FileNo, Filename);
   }
 
+  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                     unsigned Column, unsigned Flags,
+                                     unsigned Isa, unsigned Discriminator) {
+    LogCall("EmitDwarfLocDirective",
+            "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
+            " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
+            " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
+            return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+                                                Isa, Discriminator);
+  }
+
   virtual void EmitInstruction(const MCInst &Inst) {
     LogCall("EmitInstruction");
     return Child->EmitInstruction(Inst);
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 671874df2c69..d1f9f5cd568e 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -20,9 +20,11 @@
 #include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
@@ -30,13 +32,7 @@ namespace {
 
 class MCMachOStreamer : public MCObjectStreamer {
 private:
-  void EmitInstToFragment(const MCInst &Inst);
-  void EmitInstToData(const MCInst &Inst);
-  // FIXME: These will likely moved to a better place.
-  void MakeLineEntryForSection(const MCSection *Section);
-  const MCExpr * MakeStartMinusEndExpr(MCSymbol *Start, MCSymbol *End,
-                                                        int IntVal);
-  void EmitDwarfFileTable(void);
+  virtual void EmitInstToData(const MCInst &Inst);
 
 public:
   MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
@@ -46,8 +42,10 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
+  virtual void InitSections();
   virtual void EmitLabel(MCSymbol *Symbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
@@ -76,17 +74,11 @@ public:
   virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
                               uint64_t Size, unsigned ByteAlignment = 0);
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
-  virtual void EmitGPRel32Value(const MCExpr *Value) {
-    assert(0 && "macho doesn't support this directive");
-  }
   virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                     unsigned ValueSize = 1,
                                     unsigned MaxBytesToEmit = 0);
   virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                  unsigned MaxBytesToEmit = 0);
-  virtual void EmitValueToOffset(const MCExpr *Offset,
-                                 unsigned char Value = 0);
 
   virtual void EmitFileDirective(StringRef Filename) {
     // FIXME: Just ignore the .file; it isn't important enough to fail the
@@ -94,14 +86,6 @@ public:
 
     //report_fatal_error("unsupported directive: '.file'");
   }
-  virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
-    // FIXME: Just ignore the .file; it isn't important enough to fail the
-    // entire assembly.
-
-    //report_fatal_error("unsupported directive: '.file'");
-  }
-
-  virtual void EmitInstruction(const MCInst &Inst);
 
   virtual void Finish();
 
@@ -110,31 +94,26 @@ public:
 
 } // end anonymous namespace.
 
-void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
-  // TODO: This is almost exactly the same as WinCOFFStreamer. Consider merging
-  // into MCObjectStreamer.
-  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(CurSection && "Cannot emit before setting section!");
+void MCMachOStreamer::InitSections() {
+  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind::getText()));
 
-  Symbol->setSection(*CurSection);
+}
 
-  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
+  // isSymbolLinkerVisible uses the section.
+  Symbol->setSection(*getCurrentSection());
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
-  if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+  if (getAssembler().isSymbolLinkerVisible(*Symbol))
     new MCDataFragment(getCurrentSectionData());
 
-  // FIXME: This is wasteful, we don't necessarily need to create a data
-  // fragment. Instead, we should mark the symbol as pointing into the data
-  // fragment if it exists, otherwise we should just queue the label and set its
-  // fragment pointer when we emit the next fragment.
-  MCDataFragment *F = getOrCreateDataFragment();
-  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
-  SD.setFragment(F);
-  SD.setOffset(F->getContents().size());
+  MCObjectStreamer::EmitLabel(Symbol);
 
+  MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
   // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
   // to clear the weak reference and weak definition bits too, but the
   // implementation was buggy. For now we just try to match 'as', for
@@ -146,13 +125,31 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
 }
 
 void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  // Let the target do whatever target specific stuff it needs to do.
+  getAssembler().getBackend().HandleAssemblerFlag(Flag);
+  // Do any generic stuff we need to do.
   switch (Flag) {
+  case MCAF_SyntaxUnified: return; // no-op here.
+  case MCAF_Code16: return; // no-op here.
+  case MCAF_Code32: return; // no-op here.
   case MCAF_SubsectionsViaSymbols:
     getAssembler().setSubsectionsViaSymbols(true);
     return;
+  default:
+    llvm_unreachable("invalid assembler flag!");
   }
+}
+
+void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+  // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
 
-  assert(0 && "invalid assembler flag!");
+  // Remember that the function is a thumb function. Fixup and relocation
+  // values will need adjusted.
+  getAssembler().setIsThumbFunc(Symbol);
+
+  // Mark the thumb bit on the symbol.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setFlags(SD.getFlags() | SF_ThumbFunc);
 }
 
 void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -196,6 +193,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_ELF_TypeTLS:
   case MCSA_ELF_TypeCommon:
   case MCSA_ELF_TypeNoType:
+  case MCSA_ELF_TypeGnuUniqueObject:
   case MCSA_IndirectSymbol:
   case MCSA_Hidden:
   case MCSA_Internal:
@@ -230,6 +228,10 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
     break;
 
+  case MCSA_SymbolResolver:
+    SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+    break;
+
   case MCSA_PrivateExtern:
     SD.setExternal(true);
     SD.setPrivateExtern(true);
@@ -313,26 +315,6 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
   getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
 }
 
-void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
-                                unsigned AddrSpace) {
-  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
-  // MCObjectStreamer.
-  MCDataFragment *DF = getOrCreateDataFragment();
-
-  // Avoid fixups when possible.
-  int64_t AbsValue;
-  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
-    // FIXME: Endianness assumption.
-    for (unsigned i = 0; i != Size; ++i)
-      DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
-  } else {
-    DF->addFixup(MCFixup::Create(DF->getContents().size(),
-                                 AddValueSymbols(Value),
-                                 MCFixup::getKindForSize(Size)));
-    DF->getContents().resize(DF->getContents().size() + Size, 0);
-  }
-}
-
 void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                            int64_t Value, unsigned ValueSize,
                                            unsigned MaxBytesToEmit) {
@@ -363,28 +345,6 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
     getCurrentSectionData()->setAlignment(ByteAlignment);
 }
 
-void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset,
-                                        unsigned char Value) {
-  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
-}
-
-void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
-
-  // Add the fixups and data.
-  //
-  // FIXME: Revisit this design decision when relaxation is done, we may be
-  // able to get away with not storing any extra data in the MCInst.
-  SmallVector<MCFixup, 4> Fixups;
-  SmallString<256> Code;
-  raw_svector_ostream VecOS(Code);
-  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
-  VecOS.flush();
-
-  IF->getCode() = Code;
-  IF->getFixups() = Fixups;
-}
-
 void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
   MCDataFragment *DF = getOrCreateDataFragment();
 
@@ -402,240 +362,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
   DF->getContents().append(Code.begin(), Code.end());
 }
 
-void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
-  // Scan for values.
-  for (unsigned i = Inst.getNumOperands(); i--; )
-    if (Inst.getOperand(i).isExpr())
-      AddValueSymbols(Inst.getOperand(i).getExpr());
-
-  getCurrentSectionData()->setHasInstructions(true);
-
-  // Now that a machine instruction has been assembled into this section, make
-  // a line entry for any .loc directive that has been seen.
-  MakeLineEntryForSection(getCurrentSection());
-
-  // If this instruction doesn't need relaxation, just emit it as data.
-  if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
-    EmitInstToData(Inst);
-    return;
-  }
-
-  // Otherwise, if we are relaxing everything, relax the instruction as much as
-  // possible and emit it as data.
-  if (getAssembler().getRelaxAll()) {
-    MCInst Relaxed;
-    getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
-    while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
-      getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
-    EmitInstToData(Relaxed);
-    return;
-  }
-
-  // Otherwise emit to a separate fragment.
-  EmitInstToFragment(Inst);
-}
-
-//
-// This is called when an instruction is assembled into the specified section
-// and if there is information from the last .loc directive that has yet to have
-// a line entry made for it is made.
-//
-void MCMachOStreamer::MakeLineEntryForSection(const MCSection *Section) {
-  if (!getContext().getDwarfLocSeen())
-    return;
-
-  // Create a symbol at in the current section for use in the line entry.
-  MCSymbol *LineSym = getContext().CreateTempSymbol();
-  // Set the value of the symbol to use for the MCLineEntry.
-  EmitLabel(LineSym);
-
-  // Get the current .loc info saved in the context.
-  const MCDwarfLoc &DwarfLoc = getContext().getCurrentDwarfLoc();
-
-  // Create a (local) line entry with the symbol and the current .loc info.
-  MCLineEntry LineEntry(LineSym, DwarfLoc);
-
-  // clear DwarfLocSeen saying the current .loc info is now used.
-  getContext().clearDwarfLocSeen();
-
-  // Get the MCLineSection for this section, if one does not exist for this
-  // section create it.
-  DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
-    getContext().getMCLineSections();
-  MCLineSection *LineSection = MCLineSections[Section];
-  if (!LineSection) {
-    // Create a new MCLineSection.  This will be deleted after the dwarf line
-    // table is created using it by iterating through the MCLineSections
-    // DenseMap.
-    LineSection = new MCLineSection;
-    // Save a pointer to the new LineSection into the MCLineSections DenseMap.
-    MCLineSections[Section] = LineSection;
-  }
-
-  // Add the line entry to this section's entries.
-  LineSection->addLineEntry(LineEntry);
-}
-
-//
-// This helper routine returns an expression of End - Start + IntVal for use
-// by EmitDwarfFileTable() below.
-// 
-const MCExpr * MCMachOStreamer::MakeStartMinusEndExpr(MCSymbol *Start,
-                                                      MCSymbol *End,
-                                                      int IntVal) {
-  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-  const MCExpr *Res =
-    MCSymbolRefExpr::Create(End, Variant, getContext());
-  const MCExpr *RHS =
-    MCSymbolRefExpr::Create(Start, Variant, getContext());
-  const MCExpr *Res1 =
-    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS,getContext());
-  const MCExpr *Res2 =
-    MCConstantExpr::Create(IntVal, getContext());
-  const MCExpr *Res3 =
-    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, getContext());
-  return Res3;
-}
-
-//
-// This emits the Dwarf file (and eventually the line) table.
-//
-void MCMachOStreamer::EmitDwarfFileTable(void) {
-  // For now make sure we don't put out the Dwarf file table if no .file
-  // directives were seen.
-  const std::vector<MCDwarfFile *> &MCDwarfFiles =
-    getContext().getMCDwarfFiles();
-  if (MCDwarfFiles.size() == 0)
-    return;
-
-  // This is the Mach-O section, for ELF it is the .debug_line section.
-  SwitchSection(getContext().getMachOSection("__DWARF", "__debug_line",
-                                         MCSectionMachO::S_ATTR_DEBUG,
-                                         0, SectionKind::getDataRelLocal()));
-
-  // Create a symbol at the beginning of this section.
-  MCSymbol *LineStartSym = getContext().CreateTempSymbol();
-  // Set the value of the symbol, as we are at the start of the section.
-  EmitLabel(LineStartSym);
-
-  // Create a symbol for the end of the section (to be set when we get there).
-  MCSymbol *LineEndSym = getContext().CreateTempSymbol();
-
-  // The first 4 bytes is the total length of the information for this
-  // compilation unit (not including these 4 bytes for the length).
-  EmitValue(MakeStartMinusEndExpr(LineStartSym, LineEndSym, 4), 4, 0);
-
-  // Next 2 bytes is the Version, which is Dwarf 2.
-  EmitIntValue(2, 2);
-
-  // Create a symbol for the end of the prologue (to be set when we get there).
-  MCSymbol *ProEndSym = getContext().CreateTempSymbol(); // Lprologue_end
-
-  // Length of the prologue, is the next 4 bytes.  Which is the start of the
-  // section to the end of the prologue.  Not including the 4 bytes for the
-  // total length, the 2 bytes for the version, and these 4 bytes for the
-  // length of the prologue.
-  EmitValue(MakeStartMinusEndExpr(LineStartSym, ProEndSym, (4 + 2 + 4)), 4, 0);
-
-  // Parameters of the state machine, are next.
-  //  Define the architecture-dependent minimum instruction length (in
-  //  bytes).  This value should be rather too small than too big.  */
-  //  DWARF2_LINE_MIN_INSN_LENGTH
-  EmitIntValue(1, 1);
-  //  Flag that indicates the initial value of the is_stmt_start flag.
-  //  DWARF2_LINE_DEFAULT_IS_STMT
-  EmitIntValue(1, 1);
-  //  Minimum line offset in a special line info. opcode.  This value
-  //  was chosen to give a reasonable range of values.  */
-  //  DWARF2_LINE_BASE
-  EmitIntValue(uint64_t(-5), 1);
-  //  Range of line offsets in a special line info. opcode.
-  //  DWARF2_LINE_RANGE
-  EmitIntValue(14, 1);
-  //  First special line opcode - leave room for the standard opcodes.
-  //  DWARF2_LINE_OPCODE_BASE
-  EmitIntValue(13, 1);
-
-  // Standard opcode lengths
-  EmitIntValue(0, 1); // length of DW_LNS_copy
-  EmitIntValue(1, 1); // length of DW_LNS_advance_pc
-  EmitIntValue(1, 1); // length of DW_LNS_advance_line
-  EmitIntValue(1, 1); // length of DW_LNS_set_file
-  EmitIntValue(1, 1); // length of DW_LNS_set_column
-  EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
-  EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
-  EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
-  EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
-  EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
-  EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
-  EmitIntValue(1, 1); // DW_LNS_set_isa
-
-  // Put out the directory and file tables.
-
-  // First the directory table.
-  const std::vector<StringRef> &MCDwarfDirs =
-    getContext().getMCDwarfDirs();
-  for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
-    EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
-    EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
-  }
-  EmitIntValue(0, 1); // Terminate the directory list
-
-  // Second the file table.
-  for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
-    EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
-    EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
-    // FIXME the Directory number should be a .uleb128 not a .byte
-    EmitIntValue(MCDwarfFiles[i]->getDirIndex(), 1);
-    EmitIntValue(0, 1); // last modification timestamp (always 0)
-    EmitIntValue(0, 1); // filesize (always 0)
-  }
-  EmitIntValue(0, 1); // Terminate the file list
-
-  // This is the end of the prologue, so set the value of the symbol at the
-  // end of the prologue (that was used in a previous expression).
-  EmitLabel(ProEndSym);
-
-  // TODO: This is the point where the line tables would be emitted.
-
-  // Delete the MCLineSections that were created in 
-  // MCMachOStreamer::MakeLineEntryForSection() and used to emit the line
-  // tables.
-  DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
-    getContext().getMCLineSections();
-  for (DenseMap<const MCSection *, MCLineSection *>::iterator it =
-	MCLineSections.begin(), ie = MCLineSections.end(); it != ie; ++it) {
-    delete it->second;
-  }
-
-  // If there are no line tables emited then we emit:
-  // The following DW_LNE_set_address sequence to set the address to zero
-  //   TODO test for 32-bit or 64-bit output
-  //     This is the sequence for 32-bit code
-  EmitIntValue(0, 1);
-  EmitIntValue(5, 1);
-  EmitIntValue(2, 1);
-  EmitIntValue(0, 1);
-  EmitIntValue(0, 1);
-  EmitIntValue(0, 1);
-  EmitIntValue(0, 1);
-
-  // Lastly emit the DW_LNE_end_sequence which consists of 3 bytes '00 01 01'
-  // (00 is the code for extended opcodes, followed by a ULEB128 length of the
-  // extended opcode (01), and the DW_LNE_end_sequence (01).
-  EmitIntValue(0, 1); // DW_LNS_extended_op
-  EmitIntValue(1, 1); // ULEB128 length of the extended opcode
-  EmitIntValue(1, 1); // DW_LNE_end_sequence
-
-  // This is the end of the section, so set the value of the symbol at the end
-  // of this section (that was used in a previous expression).
-  EmitLabel(LineEndSym);
-}
-
 void MCMachOStreamer::Finish() {
-  // Dump out the dwarf file and directory tables (soon to include line table)
-  EmitDwarfFileTable();
-
   // We have to set the fragment atom associations so we can relax properly for
   // Mach-O.
 
diff --git a/lib/MC/MCMachObjectTargetWriter.cpp b/lib/MC/MCMachObjectTargetWriter.cpp
new file mode 100644
index 000000000000..146cebf01a3a
--- /dev/null
+++ b/lib/MC/MCMachObjectTargetWriter.cpp
@@ -0,0 +1,22 @@
+//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+
+using namespace llvm;
+
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(
+  bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
+  bool UseAggressiveSymbolFolding_)
+  : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
+    UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+}
+
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+}
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index f7a2f20ca4bc..08ddf01d1a36 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -25,20 +25,26 @@ namespace {
     /// @name MCStreamer Interface
     /// @{
 
-    virtual void SwitchSection(const MCSection *Section) {
-      PrevSection = CurSection;
-      CurSection = Section;
+    virtual void InitSections() {
+    }
+
+    virtual void ChangeSection(const MCSection *Section) {
     }
 
     virtual void EmitLabel(MCSymbol *Symbol) {
       assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-      assert(CurSection && "Cannot emit before setting section!");
-      Symbol->setSection(*CurSection);
+      assert(getCurrentSection() && "Cannot emit before setting section!");
+      Symbol->setSection(*getCurrentSection());
     }
 
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+    virtual void EmitThumbFunc(MCSymbol *Func) {}
 
     virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                          const MCSymbol *Label) {}
 
     virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
 
@@ -60,8 +66,12 @@ namespace {
                                 uint64_t Size, unsigned ByteAlignment) {}
     virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
 
-    virtual void EmitValue(const MCExpr *Value, unsigned Size,
-                           unsigned AddrSpace) {}
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               bool isPCRel, unsigned AddrSpace) {}
+    virtual void EmitULEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) {}
+    virtual void EmitSLEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) {}
     virtual void EmitGPRel32Value(const MCExpr *Value) {}
     virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                       unsigned ValueSize = 1,
@@ -74,7 +84,12 @@ namespace {
                                    unsigned char Value = 0) {}
     
     virtual void EmitFileDirective(StringRef Filename) {}
-    virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {}
+    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+      return false;
+    }
+    virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa, unsigned Discriminator) {}
     virtual void EmitInstruction(const MCInst &Inst) {}
 
     virtual void Finish() {}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 2b2385ef9156..035826690cdf 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -7,19 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
 
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
-                                   raw_ostream &_OS, MCCodeEmitter *_Emitter)
-  : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB,
-                                                   *_Emitter, _OS)),
+                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
+  : MCStreamer(Context),
+    Assembler(new MCAssembler(Context, TAB,
+                              *Emitter_, *TAB.createObjectWriter(OS),
+                              OS)),
     CurSectionData(0)
 {
 }
@@ -27,6 +34,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
 MCObjectStreamer::~MCObjectStreamer() {
   delete &Assembler->getBackend();
   delete &Assembler->getEmitter();
+  delete &Assembler->getWriter();
   delete Assembler;
 }
 
@@ -48,7 +56,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
 
 const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
   switch (Value->getKind()) {
-  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Target:
+    cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
+    break;
+
   case MCExpr::Constant:
     break;
 
@@ -71,17 +82,173 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
   return Value;
 }
 
-void MCObjectStreamer::SwitchSection(const MCSection *Section) {
-  assert(Section && "Cannot switch to a null section!");
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                     bool isPCRel, unsigned AddrSpace) {
+  assert(AddrSpace == 0 && "Address space must be 0!");
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  // Avoid fixups when possible.
+  int64_t AbsValue;
+  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+    EmitIntValue(AbsValue, Size, AddrSpace);
+    return;
+  }
+  DF->addFixup(MCFixup::Create(DF->getContents().size(),
+                               Value,
+                               MCFixup::getKindForSize(Size, isPCRel)));
+  DF->getContents().resize(DF->getContents().size() + Size, 0);
+}
+
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  Symbol->setSection(*getCurrentSection());
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // FIXME: This is wasteful, we don't necessarily need to create a data
+  // fragment. Instead, we should mark the symbol as pointing into the data
+  // fragment if it exists, otherwise we should just queue the label and set its
+  // fragment pointer when we emit the next fragment.
+  MCDataFragment *F = getOrCreateDataFragment();
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+}
+
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+    EmitULEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  new MCLEBFragment(*Value, false, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+    EmitSLEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  new MCLEBFragment(*Value, true, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
+                                         const MCSymbol *Symbol) {
+  report_fatal_error("This file format doesn't support weak aliases.");
+}
 
-  // If already in this section, then this is a noop.
-  if (Section == CurSection) return;
+void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
 
-  PrevSection = CurSection;
-  CurSection = Section;
   CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
 }
 
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = Inst.getNumOperands(); i--; )
+    if (Inst.getOperand(i).isExpr())
+      AddValueSymbols(Inst.getOperand(i).getExpr());
+
+  getCurrentSectionData()->setHasInstructions(true);
+
+  // Now that a machine instruction has been assembled into this section, make
+  // a line entry for any .loc directive that has been seen.
+  MCLineEntry::Make(this, getCurrentSection());
+
+  // If this instruction doesn't need relaxation, just emit it as data.
+  if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+    EmitInstToData(Inst);
+    return;
+  }
+
+  // Otherwise, if we are relaxing everything, relax the instruction as much as
+  // possible and emit it as data.
+  if (getAssembler().getRelaxAll()) {
+    MCInst Relaxed;
+    getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+    while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+      getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+    EmitInstToData(Relaxed);
+    return;
+  }
+
+  // Otherwise emit to a separate fragment.
+  EmitInstToFragment(Inst);
+}
+
+void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+  raw_svector_ostream VecOS(IF->getCode());
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
+}
+
+static const MCExpr *BuildSymbolDiff(MCContext &Context,
+                                     const MCSymbol *A, const MCSymbol *B) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *ARef =
+    MCSymbolRefExpr::Create(A, Variant, Context);
+  const MCExpr *BRef =
+    MCSymbolRefExpr::Create(B, Variant, Context);
+  const MCExpr *AddrDelta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+  return AddrDelta;
+}
+
+static const MCExpr *ForceExpAbs(MCObjectStreamer *Streamer,
+                                  MCContext &Context, const MCExpr* Expr) {
+ if (Context.getAsmInfo().hasAggressiveSymbolFolding())
+   return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ Streamer->EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                                const MCSymbol *LastLabel,
+                                                const MCSymbol *Label) {
+  if (!LastLabel) {
+    int PointerSize = getContext().getTargetAsmInfo().getPointerSize();
+    EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+    return;
+  }
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  int64_t Res;
+  if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+    MCDwarfLineAddr::Emit(this, LineDelta, Res);
+    return;
+  }
+  AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+  new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                                 const MCSymbol *Label) {
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  int64_t Res;
+  if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+    MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
+    return;
+  }
+  AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+  new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                        unsigned char Value) {
+  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
 void MCObjectStreamer::Finish() {
+  // Dump out the dwarf file & directory tables and line tables.
+  if (getContext().hasDwarfFiles())
+    MCDwarfFileTable::Emit(this);
+
   getAssembler().Finish();
 }
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index d117e82b8a1f..efe9f68ee22b 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -7,9 +7,74 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
 
 using namespace llvm;
 
 MCObjectWriter::~MCObjectWriter() {
 }
+
+/// Utility function to encode a SLEB128 value.
+void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
+  bool More;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    // NOTE: this assumes that this signed shift is an arithmetic right shift.
+    Value >>= 7;
+    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+              ((Value == -1) && ((Byte & 0x40) != 0))));
+    if (More)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (More);
+}
+
+/// Utility function to encode a ULEB128 value.
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (Value != 0);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+                                                   const MCSymbolRefExpr *A,
+                                                   const MCSymbolRefExpr *B,
+                                                   bool InSet) const {
+  // Modified symbol references cannot be resolved.
+  if (A->getKind() != MCSymbolRefExpr::VK_None ||
+      B->getKind() != MCSymbolRefExpr::VK_None)
+    return false;
+
+  const MCSymbol &SA = A->getSymbol();
+  const MCSymbol &SB = B->getSymbol();
+  if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+    return false;
+
+  const MCSymbolData &DataA = Asm.getSymbolData(SA);
+  const MCSymbolData &DataB = Asm.getSymbolData(SB);
+
+  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
+                                                *DataB.getFragment(),
+                                                InSet,
+                                                false);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+  const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+  // On ELF and COFF  A - B is absolute if A and B are in the same section.
+  return &SecA == &SecB;
+}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 086df081a938..89374d0c3fb9 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include <cctype>
 #include <cerrno>
 #include <cstdio>
 #include <cstdlib>
@@ -30,12 +31,12 @@ AsmLexer::~AsmLexer() {
 
 void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
   CurBuf = buf;
-  
+
   if (ptr)
     CurPtr = ptr;
   else
     CurPtr = CurBuf->getBufferStart();
-  
+
   TokStart = 0;
 }
 
@@ -43,7 +44,7 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
 /// location.  This is defined to always return AsmToken::Error.
 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
   SetError(SMLoc::getFromPointer(Loc), Msg);
-  
+
   return AsmToken(AsmToken::Error, StringRef(Loc, 0));
 }
 
@@ -57,23 +58,59 @@ int AsmLexer::getNextChar() {
     // a random nul in the file.  Disambiguate that here.
     if (CurPtr-1 != CurBuf->getBufferEnd())
       return 0;  // Just whitespace.
-    
+
     // Otherwise, return end of file.
-    --CurPtr;  // Another call to lex will return EOF again.  
+    --CurPtr;  // Another call to lex will return EOF again.
     return EOF;
   }
 }
 
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+  // Skip the fractional digit sequence.
+  while (isdigit(*CurPtr))
+    ++CurPtr;
+
+  // Check for exponent; we intentionally accept a slighlty wider set of
+  // literals here and rely on the upstream client to reject invalid ones (e.g.,
+  // "1e+").
+  if (*CurPtr == 'e' || *CurPtr == 'E') {
+    ++CurPtr;
+    if (*CurPtr == '-' || *CurPtr == '+')
+      ++CurPtr;
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+  }
+
+  return AsmToken(AsmToken::Real,
+                  StringRef(TokStart, CurPtr - TokStart));
+}
+
 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
 AsmToken AsmLexer::LexIdentifier() {
-  while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
-         *CurPtr == '.' || *CurPtr == '@')
+  // Check for floating point literals.
+  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+    // Disambiguate a .1243foo identifier from a floating literal.
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+    if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+      return LexFloatLiteral();
+  }
+
+  while (IsIdentifierChar(*CurPtr))
     ++CurPtr;
-  
+
   // Handle . as a special case.
   if (CurPtr == TokStart+1 && TokStart[0] == '.')
     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
-  
+
   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 }
 
@@ -83,7 +120,7 @@ AsmToken AsmLexer::LexSlash() {
   switch (*CurPtr) {
   case '*': break; // C style comment.
   case '/': return ++CurPtr, LexLineComment();
-  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
+  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
   }
 
   // C Style comment.
@@ -96,7 +133,7 @@ AsmToken AsmLexer::LexSlash() {
     case '*':
       // End of the comment?
       if (CurPtr[0] != '/') break;
-      
+
       ++CurPtr;   // End the */.
       return LexToken();
     }
@@ -111,7 +148,7 @@ AsmToken AsmLexer::LexLineComment() {
   int CurChar = getNextChar();
   while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
     CurChar = getNextChar();
-  
+
   if (CurChar == EOF)
     return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
   return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
@@ -124,7 +161,6 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
     CurPtr += 3;
 }
 
-
 /// LexDigit: First character is [0-9].
 ///   Local Label: [0-9][:]
 ///   Forward/Backward Label: [0-9][fb]
@@ -132,32 +168,37 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 ///   Octal integer: 0[0-7]+
 ///   Hex integer: 0x[0-9a-fA-F]+
 ///   Decimal integer: [1-9][0-9]*
-/// TODO: FP literal.
 AsmToken AsmLexer::LexDigit() {
   // Decimal integer: [1-9][0-9]*
-  if (CurPtr[-1] != '0') {
+  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
     while (isdigit(*CurPtr))
       ++CurPtr;
-    
+
+    // Check for floating point literals.
+    if (*CurPtr == '.' || *CurPtr == 'e') {
+      ++CurPtr;
+      return LexFloatLiteral();
+    }
+
     StringRef Result(TokStart, CurPtr - TokStart);
 
     long long Value;
     if (Result.getAsInteger(10, Value)) {
-      // We have to handle minint_as_a_positive_value specially, because
-      // - minint_as_a_positive_value = minint and it is valid.
-      if (Result == "9223372036854775808")
-        Value = -9223372036854775808ULL;
-      else
-        return ReturnError(TokStart, "Invalid decimal number");
+      // Allow positive values that are too large to fit into a signed 64-bit
+      // integer, but that do fit in an unsigned one, we just convert them over.
+      unsigned long long UValue;
+      if (Result.getAsInteger(10, UValue))
+        return ReturnError(TokStart, "invalid decimal number");
+      Value = (long long)UValue;
     }
-    
+
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
     // suffixes on integer literals.
     SkipIgnoredIntegerSuffix(CurPtr);
-    
+
     return AsmToken(AsmToken::Integer, Result, Value);
   }
-  
+
   if (*CurPtr == 'b') {
     ++CurPtr;
     // See if we actually have "0b" as part of something like "jmp 0b\n"
@@ -169,30 +210,30 @@ AsmToken AsmLexer::LexDigit() {
     const char *NumStart = CurPtr;
     while (CurPtr[0] == '0' || CurPtr[0] == '1')
       ++CurPtr;
-    
+
     // Requires at least one binary digit.
     if (CurPtr == NumStart)
       return ReturnError(TokStart, "Invalid binary number");
-    
+
     StringRef Result(TokStart, CurPtr - TokStart);
-    
+
     long long Value;
     if (Result.substr(2).getAsInteger(2, Value))
       return ReturnError(TokStart, "Invalid binary number");
-    
+
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
     // suffixes on integer literals.
     SkipIgnoredIntegerSuffix(CurPtr);
-    
+
     return AsmToken(AsmToken::Integer, Result, Value);
   }
- 
+
   if (*CurPtr == 'x') {
     ++CurPtr;
     const char *NumStart = CurPtr;
     while (isxdigit(CurPtr[0]))
       ++CurPtr;
-    
+
     // Requires at least one hex digit.
     if (CurPtr == NumStart)
       return ReturnError(CurPtr-2, "Invalid hexadecimal number");
@@ -200,31 +241,67 @@ AsmToken AsmLexer::LexDigit() {
     unsigned long long Result;
     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
       return ReturnError(TokStart, "Invalid hexadecimal number");
-      
+
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
     // suffixes on integer literals.
     SkipIgnoredIntegerSuffix(CurPtr);
-    
+
     return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
                     (int64_t)Result);
   }
-  
+
   // Must be an octal number, it starts with 0.
   while (*CurPtr >= '0' && *CurPtr <= '7')
     ++CurPtr;
-  
+
   StringRef Result(TokStart, CurPtr - TokStart);
   long long Value;
   if (Result.getAsInteger(8, Value))
     return ReturnError(TokStart, "Invalid octal number");
-  
+
   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
   // suffixes on integer literals.
   SkipIgnoredIntegerSuffix(CurPtr);
-  
+
   return AsmToken(AsmToken::Integer, Result, Value);
 }
 
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+  int CurChar = getNextChar();
+
+  if (CurChar == '\\')
+    CurChar = getNextChar();
+
+  if (CurChar == EOF)
+    return ReturnError(TokStart, "unterminated single quote");
+
+  CurChar = getNextChar();
+
+  if (CurChar != '\'')
+    return ReturnError(TokStart, "single quote way too long");
+
+  // The idea here being that 'c' is basically just an integral
+  // constant.
+  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+  long long Value;
+
+  if (Res.startswith("\'\\")) {
+    char theChar = Res[2];
+    switch (theChar) {
+      default: Value = theChar; break;
+      case '\'': Value = '\''; break;
+      case 't': Value = '\t'; break;
+      case 'n': Value = '\n'; break;
+      case 'b': Value = '\b'; break;
+    }
+  } else
+    Value = TokStart[1];
+
+  return AsmToken(AsmToken::Integer, Res, Value);
+}
+
+
 /// LexQuote: String: "..."
 AsmToken AsmLexer::LexQuote() {
   int CurChar = getNextChar();
@@ -234,13 +311,13 @@ AsmToken AsmLexer::LexQuote() {
       // Allow \", etc.
       CurChar = getNextChar();
     }
-    
+
     if (CurChar == EOF)
       return ReturnError(TokStart, "unterminated string constant");
 
     CurChar = getNextChar();
   }
-  
+
   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
 }
 
@@ -266,7 +343,7 @@ AsmToken AsmLexer::LexToken() {
   TokStart = CurPtr;
   // This always consumes at least one character.
   int CurChar = getNextChar();
-  
+
   if (isAtStartOfComment(CurChar))
     return LexLineComment();
 
@@ -275,7 +352,7 @@ AsmToken AsmLexer::LexToken() {
     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
       return LexIdentifier();
-    
+
     // Unknown character, emit an error.
     return ReturnError(TokStart, "invalid character in input");
   case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
@@ -301,49 +378,50 @@ AsmToken AsmLexer::LexToken() {
   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
   case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
-  case '=': 
+  case '=':
     if (*CurPtr == '=')
       return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
-  case '|': 
+  case '|':
     if (*CurPtr == '|')
       return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
-  case '&': 
+  case '&':
     if (*CurPtr == '&')
       return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
-  case '!': 
+  case '!':
     if (*CurPtr == '=')
       return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
   case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
   case '/': return LexSlash();
   case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+  case '\'': return LexSingleQuote();
   case '"': return LexQuote();
   case '0': case '1': case '2': case '3': case '4':
   case '5': case '6': case '7': case '8': case '9':
     return LexDigit();
   case '<':
     switch (*CurPtr) {
-    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 
+    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
                                         StringRef(TokStart, 2));
-    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 
+    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
                                         StringRef(TokStart, 2));
-    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 
+    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
                                         StringRef(TokStart, 2));
     default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
     }
   case '>':
     switch (*CurPtr) {
-    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 
+    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
                                         StringRef(TokStart, 2));
-    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 
+    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
                                         StringRef(TokStart, 2));
     default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
     }
-      
+
   // TODO: Quoted identifiers (objc methods etc)
   // local labels: [0-9][:]
   // Forward/backward labels: [0-9][fb]
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index f83cd5eb2a16..c6d0da609b3b 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -18,7 +19,6 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/AsmCond.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
@@ -27,11 +27,12 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCDwarf.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetAsmParser.h"
+#include <cctype>
 #include <vector>
 using namespace llvm;
 
@@ -102,6 +103,9 @@ private:
   /// Boolean tracking whether macro substitution is enabled.
   unsigned MacrosEnabled : 1;
 
+  /// Flag tracking whether any errors have been encountered.
+  unsigned HadError : 1;
+
 public:
   AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
@@ -137,14 +141,18 @@ public:
   /// }
 
 private:
+  void CheckForValidSection();
+
   bool ParseStatement();
 
   bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
   void HandleMacroExit();
 
   void PrintMacroInstantiations();
-  void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
-    
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
+    SrcMgr.PrintMessage(Loc, Msg, Type);
+  }
+
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
   bool EnterIncludeFile(const std::string &Filename);
 
@@ -160,22 +168,27 @@ private:
   /// will be either the EndOfStatement or EOF.
   StringRef ParseStringToEndOfStatement();
 
-  bool ParseAssignment(StringRef Name);
+  bool ParseAssignment(StringRef Name, bool allow_redef);
 
   bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
   bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
   bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
 
   /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
   /// and set \arg Res to the identifier contents.
   bool ParseIdentifier(StringRef &Res);
-  
+
   // Directive Parsing.
-  bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
+
+ // ".ascii", ".asciiz", ".string"
+  bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
   bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+  bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
   bool ParseDirectiveFill(); // ".fill"
   bool ParseDirectiveSpace(); // ".space"
-  bool ParseDirectiveSet(); // ".set"
+  bool ParseDirectiveZero(); // ".zero"
+  bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); // ".set", ".equ", ".equiv"
   bool ParseDirectiveOrg(); // ".org"
   // ".align{,32}", ".p2align{,w,l}"
   bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
@@ -183,7 +196,6 @@ private:
   /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
   /// accepts a single symbol (which should be a label or an external).
   bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
-  bool ParseDirectiveELFType(); // ELF specific ".type"
 
   bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
 
@@ -191,6 +203,8 @@ private:
   bool ParseDirectiveInclude(); // ".include"
 
   bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+  // ".ifdef" or ".ifndef", depending on expect_defined
+  bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
   bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
   bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
   bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
@@ -198,6 +212,9 @@ private:
   /// ParseEscapedString - Parse the current token as a string which may include
   /// escaped characters and return the string contents.
   bool ParseEscapedString(std::string &Data);
+
+  const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+                                    MCSymbolRefExpr::VariantKind Variant);
 };
 
 /// \brief Generic implementations of directive handling, etc. which is shared
@@ -208,7 +225,6 @@ class GenericAsmParser : public MCAsmParserExtension {
     getParser().AddDirectiveHandler(this, Directive,
                                     HandleDirective<GenericAsmParser, Handler>);
   }
-
 public:
   GenericAsmParser() {}
 
@@ -224,6 +240,29 @@ public:
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs");
+
+    // CFI directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>(
+                                                              ".cfi_startproc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>(
+                                                                ".cfi_endproc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>(
+                                                         ".cfi_def_cfa");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
+                                                         ".cfi_def_cfa_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
+                                                       ".cfi_def_cfa_register");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
+                                                                 ".cfi_offset");
+    AddDirectiveHandler<
+     &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
+    AddDirectiveHandler<
+            &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
 
     // Macro directives.
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
@@ -233,15 +272,32 @@ public:
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
+
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128");
   }
 
+  bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+
   bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
 
   bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveLEB128(StringRef, SMLoc);
 };
 
 }
@@ -250,6 +306,7 @@ namespace llvm {
 
 extern MCAsmParserExtension *createDarwinAsmParser();
 extern MCAsmParserExtension *createELFAsmParser();
+extern MCAsmParserExtension *createCOFFAsmParser();
 
 }
 
@@ -269,7 +326,10 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
   //
   // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
   // created.
-  if (_MAI.hasSubsectionsViaSymbols()) {
+  if (_MAI.hasMicrosoftFastStdCallMangling()) {
+    PlatformParser = createCOFFAsmParser();
+    PlatformParser->Initialize(*this);
+  } else if (_MAI.hasSubsectionsViaSymbols()) {
     PlatformParser = createDarwinAsmParser();
     PlatformParser->Initialize(*this);
   } else {
@@ -299,30 +359,26 @@ void AsmParser::PrintMacroInstantiations() {
 }
 
 void AsmParser::Warning(SMLoc L, const Twine &Msg) {
-  PrintMessage(L, Msg.str(), "warning");
+  PrintMessage(L, Msg, "warning");
   PrintMacroInstantiations();
 }
 
 bool AsmParser::Error(SMLoc L, const Twine &Msg) {
-  PrintMessage(L, Msg.str(), "error");
+  HadError = true;
+  PrintMessage(L, Msg, "error");
   PrintMacroInstantiations();
   return true;
 }
 
-void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, 
-                             const char *Type) const {
-  SrcMgr.PrintMessage(Loc, Msg, Type);
-}
-                  
 bool AsmParser::EnterIncludeFile(const std::string &Filename) {
   int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc());
   if (NewBuf == -1)
     return true;
-  
+
   CurBuffer = NewBuf;
-  
+
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
-  
+
   return false;
 }
 
@@ -333,7 +389,7 @@ void AsmParser::JumpToLoc(SMLoc Loc) {
 
 const AsmToken &AsmParser::Lex() {
   const AsmToken *tok = &Lexer.Lex();
-  
+
   if (tok->is(AsmToken::Eof)) {
     // If this is the end of an included file, pop the parent file off the
     // include stack.
@@ -343,35 +399,31 @@ const AsmToken &AsmParser::Lex() {
       tok = &Lexer.Lex();
     }
   }
-    
+
   if (tok->is(AsmToken::Error))
     Error(Lexer.getErrLoc(), Lexer.getErr());
-  
+
   return *tok;
 }
 
 bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // Create the initial section, if requested.
-  //
-  // FIXME: Target hook & command line option for initial section.
   if (!NoInitialTextSection)
-    Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text",
-                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      0, SectionKind::getText()));
+    Out.InitSections();
 
   // Prime the lexer.
   Lex();
-  
-  bool HadError = false;
-  
+
+  HadError = false;
   AsmCond StartingCondState = TheCondState;
 
   // While we have input, parse each statement.
   while (Lexer.isNot(AsmToken::Eof)) {
     if (!ParseStatement()) continue;
-  
-    // We had an error, remember it and recover by skipping to the next line.
-    HadError = true;
+
+    // We had an error, validate that one was emitted and recover by skipping to
+    // the next line.
+    assert(HadError && "Parse statement returned an error, but none emitted!");
     EatToEndOfStatement();
   }
 
@@ -383,26 +435,34 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   const std::vector<MCDwarfFile *> &MCDwarfFiles =
     getContext().getMCDwarfFiles();
   for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
-    if (!MCDwarfFiles[i]){
+    if (!MCDwarfFiles[i])
       TokError("unassigned file number: " + Twine(i) + " for .file directives");
-      HadError = true;
-    }
   }
-  
+
   // Finalize the output stream if there are no errors and if the client wants
   // us to.
-  if (!HadError && !NoFinalize)  
+  if (!HadError && !NoFinalize)
     Out.Finish();
 
   return HadError;
 }
 
+void AsmParser::CheckForValidSection() {
+  if (!getStreamer().getCurrentSection()) {
+    TokError("expected section directive before assembly directive");
+    Out.SwitchSection(Ctx.getMachOSection(
+                        "__TEXT", "__text",
+                        MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                        0, SectionKind::getText()));
+  }
+}
+
 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
 void AsmParser::EatToEndOfStatement() {
   while (Lexer.isNot(AsmToken::EndOfStatement) &&
          Lexer.isNot(AsmToken::Eof))
     Lex();
-  
+
   // Eat EOL.
   if (Lexer.is(AsmToken::EndOfStatement))
     Lex();
@@ -433,6 +493,20 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   return false;
 }
 
+/// ParseBracketExpr - Parse a bracket expression and return it.
+/// NOTE: This assumes the leading '[' has already been consumed.
+///
+/// bracketexpr ::= expr]
+///
+bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  if (ParseExpression(Res)) return true;
+  if (Lexer.isNot(AsmToken::RBrac))
+    return TokError("expected ']' in brackets expression");
+  EndLoc = Lexer.getLoc();
+  Lex();
+  return false;
+}
+
 /// ParsePrimaryExpr - Parse a primary expression and return it.
 ///  primaryexpr ::= (parenexpr
 ///  primaryexpr ::= symbol
@@ -462,19 +536,21 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     std::pair<StringRef, StringRef> Split = Identifier.split('@');
     MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
 
-    // Mark the symbol as used in an expression.
-    Sym->setUsedInExpr(true);
-
     // Lookup the symbol variant if used.
     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-    if (Split.first.size() != Identifier.size())
+    if (Split.first.size() != Identifier.size()) {
       Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+      if (Variant == MCSymbolRefExpr::VK_Invalid) {
+        Variant = MCSymbolRefExpr::VK_None;
+        return TokError("invalid variant '" + Split.second + "'");
+      }
+    }
 
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
     if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
       if (Variant)
-        return Error(EndLoc, "unexpected modified on variable reference");
+        return Error(EndLoc, "unexpected modifier on variable reference");
 
       Res = Sym->getVariableValue();
       return false;
@@ -506,6 +582,13 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     }
     return false;
   }
+  case AsmToken::Real: {
+    APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+    uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+    Res = MCConstantExpr::Create(IntVal, getContext());
+    Lex(); // Eat token.
+    return false;
+  }
   case AsmToken::Dot: {
     // This is a '.' reference, which references the current PC.  Emit a
     // temporary label to the streamer and refer to it.
@@ -516,10 +599,12 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     Lex(); // Eat identifier.
     return false;
   }
-      
   case AsmToken::LParen:
     Lex(); // Eat the '('.
     return ParseParenExpr(Res, EndLoc);
+  case AsmToken::LBrac:
+    Lex(); // Eat the '['.
+    return ParseBracketExpr(Res, EndLoc);
   case AsmToken::Minus:
     Lex(); // Eat the operator.
     if (ParsePrimaryExpr(Res, EndLoc))
@@ -546,8 +631,57 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) {
   return ParseExpression(Res, EndLoc);
 }
 
+const MCExpr *
+AsmParser::ApplyModifierToExpr(const MCExpr *E,
+                               MCSymbolRefExpr::VariantKind Variant) {
+  // Recurse over the given expression, rebuilding it to apply the given variant
+  // if there is exactly one symbol.
+  switch (E->getKind()) {
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    return 0;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+    if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
+      TokError("invalid variant on expression '" +
+               getTok().getIdentifier() + "' (already modified)");
+      return E;
+    }
+
+    return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+    const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+    if (!Sub)
+      return 0;
+    return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
+    const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+
+    if (!LHS && !RHS)
+      return 0;
+
+    if (!LHS) LHS = BE->getLHS();
+    if (!RHS) RHS = BE->getRHS();
+
+    return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+  return 0;
+}
+
 /// ParseExpression - Parse an expression and return it.
-/// 
+///
 ///  expr ::= expr +,- expr          -> lowest.
 ///  expr ::= expr |,^,&,! expr      -> middle.
 ///  expr ::= expr *,/,%,<<,>> expr  -> highest.
@@ -559,6 +693,31 @@ bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
   if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
     return true;
 
+  // As a special case, we support 'a op b @ modifier' by rewriting the
+  // expression to include the modifier. This is inefficient, but in general we
+  // expect users to use 'a@modifier op b'.
+  if (Lexer.getKind() == AsmToken::At) {
+    Lex();
+
+    if (Lexer.isNot(AsmToken::Identifier))
+      return TokError("unexpected symbol modifier following '@'");
+
+    MCSymbolRefExpr::VariantKind Variant =
+      MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+    if (Variant == MCSymbolRefExpr::VK_Invalid)
+      return TokError("invalid variant '" + getTok().getIdentifier() + "'");
+
+    const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+    if (!ModifiedRes) {
+      return TokError("invalid modifier '" + getTok().getIdentifier() +
+                      "' (no symbols present)");
+      return true;
+    }
+
+    Res = ModifiedRes;
+    Lex();
+  }
+
   // Try to constant fold it up front, if possible.
   int64_t Value;
   if (Res->EvaluateAsAbsolute(Value))
@@ -575,7 +734,7 @@ bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
 
 bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
   const MCExpr *Expr;
-  
+
   SMLoc StartLoc = Lexer.getLoc();
   if (ParseExpression(Expr))
     return true;
@@ -586,13 +745,13 @@ bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
   return false;
 }
 
-static unsigned getBinOpPrecedence(AsmToken::TokenKind K, 
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
                                    MCBinaryExpr::Opcode &Kind) {
   switch (K) {
   default:
     return 0;    // not a binop.
 
-    // Lowest Precedence: &&, ||
+    // Lowest Precedence: &&, ||, @
   case AsmToken::AmpAmp:
     Kind = MCBinaryExpr::LAnd;
     return 1;
@@ -600,62 +759,65 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
     Kind = MCBinaryExpr::LOr;
     return 1;
 
-    // Low Precedence: +, -, ==, !=, <>, <, <=, >, >=
-  case AsmToken::Plus:
-    Kind = MCBinaryExpr::Add;
+
+    // Low Precedence: |, &, ^
+    //
+    // FIXME: gas seems to support '!' as an infix operator?
+  case AsmToken::Pipe:
+    Kind = MCBinaryExpr::Or;
     return 2;
-  case AsmToken::Minus:
-    Kind = MCBinaryExpr::Sub;
+  case AsmToken::Caret:
+    Kind = MCBinaryExpr::Xor;
+    return 2;
+  case AsmToken::Amp:
+    Kind = MCBinaryExpr::And;
     return 2;
+
+    // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
   case AsmToken::EqualEqual:
     Kind = MCBinaryExpr::EQ;
-    return 2;
+    return 3;
   case AsmToken::ExclaimEqual:
   case AsmToken::LessGreater:
     Kind = MCBinaryExpr::NE;
-    return 2;
+    return 3;
   case AsmToken::Less:
     Kind = MCBinaryExpr::LT;
-    return 2;
+    return 3;
   case AsmToken::LessEqual:
     Kind = MCBinaryExpr::LTE;
-    return 2;
+    return 3;
   case AsmToken::Greater:
     Kind = MCBinaryExpr::GT;
-    return 2;
+    return 3;
   case AsmToken::GreaterEqual:
     Kind = MCBinaryExpr::GTE;
-    return 2;
-
-    // Intermediate Precedence: |, &, ^
-    //
-    // FIXME: gas seems to support '!' as an infix operator?
-  case AsmToken::Pipe:
-    Kind = MCBinaryExpr::Or;
-    return 3;
-  case AsmToken::Caret:
-    Kind = MCBinaryExpr::Xor;
-    return 3;
-  case AsmToken::Amp:
-    Kind = MCBinaryExpr::And;
     return 3;
 
+    // High Intermediate Precedence: +, -
+  case AsmToken::Plus:
+    Kind = MCBinaryExpr::Add;
+    return 4;
+  case AsmToken::Minus:
+    Kind = MCBinaryExpr::Sub;
+    return 4;
+
     // Highest Precedence: *, /, %, <<, >>
   case AsmToken::Star:
     Kind = MCBinaryExpr::Mul;
-    return 4;
+    return 5;
   case AsmToken::Slash:
     Kind = MCBinaryExpr::Div;
-    return 4;
+    return 5;
   case AsmToken::Percent:
     Kind = MCBinaryExpr::Mod;
-    return 4;
+    return 5;
   case AsmToken::LessLess:
     Kind = MCBinaryExpr::Shl;
-    return 4;
+    return 5;
   case AsmToken::GreaterGreater:
     Kind = MCBinaryExpr::Shr;
-    return 4;
+    return 5;
   }
 }
 
@@ -667,18 +829,18 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
   while (1) {
     MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
     unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
-    
+
     // If the next token is lower precedence than we are allowed to eat, return
     // successfully with what we ate already.
     if (TokPrec < Precedence)
       return false;
-    
+
     Lex();
-    
+
     // Eat the next primary expression.
     const MCExpr *RHS;
     if (ParsePrimaryExpr(RHS, EndLoc)) return true;
-    
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     MCBinaryExpr::Opcode Dummy;
@@ -692,9 +854,9 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
   }
 }
 
-  
-  
-  
+
+
+
 /// ParseStatement:
 ///   ::= EndOfStatement
 ///   ::= Label* Directive ...Operands... EndOfStatement
@@ -706,12 +868,17 @@ bool AsmParser::ParseStatement() {
     return false;
   }
 
-  // Statements always start with an identifier.
+  // Statements always start with an identifier or are a full line comment.
   AsmToken ID = getTok();
   SMLoc IDLoc = ID.getLoc();
   StringRef IDVal;
   int64_t LocalLabelVal = -1;
-  // GUESS allow an integer followed by a ':' as a directional local label
+  // A full line comment is a '#' as the first token.
+  if (Lexer.is(AsmToken::Hash)) {
+    EatToEndOfStatement();
+    return false;
+  }
+  // Allow an integer followed by a ':' as a directional local label.
   if (Lexer.is(AsmToken::Integer)) {
     LocalLabelVal = getTok().getIntVal();
     if (LocalLabelVal < 0) {
@@ -739,24 +906,30 @@ bool AsmParser::ParseStatement() {
   // example.
   if (IDVal == ".if")
     return ParseDirectiveIf(IDLoc);
+  if (IDVal == ".ifdef")
+    return ParseDirectiveIfdef(IDLoc, true);
+  if (IDVal == ".ifndef" || IDVal == ".ifnotdef")
+    return ParseDirectiveIfdef(IDLoc, false);
   if (IDVal == ".elseif")
     return ParseDirectiveElseIf(IDLoc);
   if (IDVal == ".else")
     return ParseDirectiveElse(IDLoc);
   if (IDVal == ".endif")
     return ParseDirectiveEndIf(IDLoc);
-    
+
   // If we are in a ".if 0" block, ignore this statement.
   if (TheCondState.Ignore) {
     EatToEndOfStatement();
     return false;
   }
-  
+
   // FIXME: Recurse on local labels?
 
   // See what kind of statement we have.
   switch (Lexer.getKind()) {
   case AsmToken::Colon: {
+    CheckForValidSection();
+
     // identifier ':'   -> Label.
     Lex();
 
@@ -772,10 +945,10 @@ bool AsmParser::ParseStatement() {
       Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
     if (!Sym->isUndefined() || Sym->isVariable())
       return Error(IDLoc, "invalid symbol redefinition");
-    
+
     // Emit the label.
     Out.EmitLabel(Sym);
-   
+
     // Consume any end of statement token, if present, to avoid spurious
     // AddBlankLine calls().
     if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -791,7 +964,7 @@ bool AsmParser::ParseStatement() {
     // identifier '=' ... -> assignment statement
     Lex();
 
-    return ParseAssignment(IDVal);
+    return ParseAssignment(IDVal, true);
 
   default: // Normal instruction or directive.
     break;
@@ -802,27 +975,43 @@ bool AsmParser::ParseStatement() {
     if (const Macro *M = MacroMap.lookup(IDVal))
       return HandleMacroEntry(IDVal, IDLoc, M);
 
-  // Otherwise, we have a normal instruction or directive.  
+  // Otherwise, we have a normal instruction or directive.
   if (IDVal[0] == '.') {
     // Assembler features
-    if (IDVal == ".set")
-      return ParseDirectiveSet();
+    if (IDVal == ".set" || IDVal == ".equ")
+      return ParseDirectiveSet(IDVal, true);
+    if (IDVal == ".equiv")
+      return ParseDirectiveSet(IDVal, false);
 
     // Data directives
 
     if (IDVal == ".ascii")
-      return ParseDirectiveAscii(false);
-    if (IDVal == ".asciz")
-      return ParseDirectiveAscii(true);
+      return ParseDirectiveAscii(IDVal, false);
+    if (IDVal == ".asciz" || IDVal == ".string")
+      return ParseDirectiveAscii(IDVal, true);
 
     if (IDVal == ".byte")
       return ParseDirectiveValue(1);
     if (IDVal == ".short")
       return ParseDirectiveValue(2);
+    if (IDVal == ".value")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".2byte")
+      return ParseDirectiveValue(2);
     if (IDVal == ".long")
       return ParseDirectiveValue(4);
+    if (IDVal == ".int")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".4byte")
+      return ParseDirectiveValue(4);
     if (IDVal == ".quad")
       return ParseDirectiveValue(8);
+    if (IDVal == ".8byte")
+      return ParseDirectiveValue(8);
+    if (IDVal == ".single" || IDVal == ".float")
+      return ParseDirectiveRealValue(APFloat::IEEEsingle);
+    if (IDVal == ".double")
+      return ParseDirectiveRealValue(APFloat::IEEEdouble);
 
     if (IDVal == ".align") {
       bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
@@ -852,11 +1041,16 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveFill();
     if (IDVal == ".space")
       return ParseDirectiveSpace();
+    if (IDVal == ".zero")
+      return ParseDirectiveZero();
 
     // Symbol attribute directives
 
     if (IDVal == ".globl" || IDVal == ".global")
       return ParseDirectiveSymbolAttribute(MCSA_Global);
+    // ELF only? Should it be here?
+    if (IDVal == ".local")
+      return ParseDirectiveSymbolAttribute(MCSA_Local);
     if (IDVal == ".hidden")
       return ParseDirectiveSymbolAttribute(MCSA_Hidden);
     if (IDVal == ".indirect_symbol")
@@ -867,14 +1061,14 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
     if (IDVal == ".no_dead_strip")
       return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+    if (IDVal == ".symbol_resolver")
+      return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
     if (IDVal == ".private_extern")
       return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
     if (IDVal == ".protected")
       return ParseDirectiveSymbolAttribute(MCSA_Protected);
     if (IDVal == ".reference")
       return ParseDirectiveSymbolAttribute(MCSA_Reference);
-    if (IDVal == ".type")
-      return ParseDirectiveELFType();
     if (IDVal == ".weak")
       return ParseDirectiveSymbolAttribute(MCSA_Weak);
     if (IDVal == ".weak_definition")
@@ -894,6 +1088,9 @@ bool AsmParser::ParseStatement() {
     if (IDVal == ".include")
       return ParseDirectiveInclude();
 
+    if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64")
+      return TokError(Twine(IDVal) + " not supported yet");
+
     // Look up the handler in the handler table.
     std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
       DirectiveMap.lookup(IDVal);
@@ -909,16 +1106,16 @@ bool AsmParser::ParseStatement() {
     return false;
   }
 
+  CheckForValidSection();
+
   // Canonicalize the opcode to lower case.
   SmallString<128> Opcode;
   for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
     Opcode.push_back(tolower(IDVal[i]));
-  
+
   SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
   bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
                                                      ParsedOperands);
-  if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
-    HadError = TokError("unexpected token in argument list");
 
   // Dump the parsed representation, if requested.
   if (getShowParsedOperands()) {
@@ -936,25 +1133,17 @@ bool AsmParser::ParseStatement() {
   }
 
   // If parsing succeeded, match the instruction.
-  if (!HadError) {
-    MCInst Inst;
-    if (!getTargetParser().MatchInstruction(IDLoc, ParsedOperands, Inst)) {
-      // Emit the instruction on success.
-      Out.EmitInstruction(Inst);
-    } else
-      HadError = true;
-  }
-
-  // If there was no error, consume the end-of-statement token. Otherwise this
-  // will be done by our caller.
   if (!HadError)
-    Lex();
+    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
+                                                         Out);
 
   // Free any parsed operands.
   for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
     delete ParsedOperands[i];
 
-  return HadError;
+  // Don't skip the rest of the line, the instruction parser is responsible for
+  // that.
+  return false;
 }
 
 MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
@@ -1083,14 +1272,35 @@ void AsmParser::HandleMacroExit() {
   ActiveMacros.pop_back();
 }
 
-bool AsmParser::ParseAssignment(StringRef Name) {
+static void MarkUsed(const MCExpr *Value) {
+  switch (Value->getKind()) {
+  case MCExpr::Binary:
+    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
+    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+    break;
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    break;
+  case MCExpr::SymbolRef: {
+    static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
+    break;
+  }
+  case MCExpr::Unary:
+    MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+    break;
+  }
+}
+
+bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
   // FIXME: Use better location, we should use proper tokens.
   SMLoc EqualLoc = Lexer.getLoc();
 
   const MCExpr *Value;
   if (ParseExpression(Value))
     return true;
-  
+
+  MarkUsed(Value);
+
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in assignment");
 
@@ -1105,22 +1315,23 @@ bool AsmParser::ParseAssignment(StringRef Name) {
     //
     // FIXME: Diagnostics. Note the location of the definition as a label.
     // FIXME: Diagnose assignment to protected identifier (e.g., register name).
-    if (Sym->isUndefined() && !Sym->isUsedInExpr())
+    if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
       ; // Allow redefinitions of undefined symbols only used in directives.
-    else if (!Sym->isUndefined() && !Sym->isAbsolute())
+    else if (!Sym->isUndefined() && (!Sym->isAbsolute() || !allow_redef))
       return Error(EqualLoc, "redefinition of '" + Name + "'");
     else if (!Sym->isVariable())
       return Error(EqualLoc, "invalid assignment to '" + Name + "'");
     else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
       return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
                    Name + "'");
+
+    // Don't count these checks as uses.
+    Sym->setUsed(false);
   } else
     Sym = getContext().GetOrCreateSymbol(Name);
 
   // FIXME: Handle '.'.
 
-  Sym->setUsedInExpr(true);
-
   // Do the assignment.
   Out.EmitAssignment(Sym, Value);
 
@@ -1167,18 +1378,20 @@ bool AsmParser::ParseIdentifier(StringRef &Res) {
 }
 
 /// ParseDirectiveSet:
+///   ::= .equ identifier ',' expression
+///   ::= .equiv identifier ',' expression
 ///   ::= .set identifier ',' expression
-bool AsmParser::ParseDirectiveSet() {
+bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
   StringRef Name;
 
   if (ParseIdentifier(Name))
-    return TokError("expected identifier after '.set' directive");
-  
+    return TokError("expected identifier after '" + Twine(IDVal) + "'");
+
   if (getLexer().isNot(AsmToken::Comma))
-    return TokError("unexpected token in '.set'");
+    return TokError("unexpected token in '" + Twine(IDVal) + "'");
   Lex();
 
-  return ParseAssignment(Name);
+  return ParseAssignment(Name, allow_redef);
 }
 
 bool AsmParser::ParseEscapedString(std::string &Data) {
@@ -1240,12 +1453,14 @@ bool AsmParser::ParseEscapedString(std::string &Data) {
 }
 
 /// ParseDirectiveAscii:
-///   ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
-bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
+///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
     for (;;) {
       if (getLexer().isNot(AsmToken::String))
-        return TokError("expected string in '.ascii' or '.asciz' directive");
+        return TokError("expected string in '" + Twine(IDVal) + "' directive");
 
       std::string Data;
       if (ParseEscapedString(Data))
@@ -1261,7 +1476,7 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
         break;
 
       if (getLexer().isNot(AsmToken::Comma))
-        return TokError("unexpected token in '.ascii' or '.asciz' directive");
+        return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
       Lex();
     }
   }
@@ -1274,9 +1489,10 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
 bool AsmParser::ParseDirectiveValue(unsigned Size) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
     for (;;) {
       const MCExpr *Value;
-      SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc();
       if (ParseExpression(Value))
         return true;
 
@@ -1288,7 +1504,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
-      
+
       // FIXME: Improve diagnostic.
       if (getLexer().isNot(AsmToken::Comma))
         return TokError("unexpected token in directive");
@@ -1300,9 +1516,61 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
   return false;
 }
 
+/// ParseDirectiveRealValue
+///  ::= (.single | .double) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      // We don't truly support arithmetic on floating point expressions, so we
+      // have to manually parse unary prefixes.
+      bool IsNeg = false;
+      if (getLexer().is(AsmToken::Minus)) {
+        Lex();
+        IsNeg = true;
+      } else if (getLexer().is(AsmToken::Plus))
+        Lex();
+
+      if (getLexer().isNot(AsmToken::Integer) &&
+          getLexer().isNot(AsmToken::Real))
+        return TokError("unexpected token in directive");
+
+      // Convert to an APFloat.
+      APFloat Value(Semantics);
+      if (Value.convertFromString(getTok().getString(),
+                                  APFloat::rmNearestTiesToEven) ==
+          APFloat::opInvalidOp)
+        return TokError("invalid floating point literal");
+      if (IsNeg)
+        Value.changeSign();
+
+      // Consume the numeric token.
+      Lex();
+
+      // Emit the value as an integer.
+      APInt AsInt = Value.bitcastToAPInt();
+      getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+                                 AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
 /// ParseDirectiveSpace
 ///  ::= .space expression [ , expression ]
 bool AsmParser::ParseDirectiveSpace() {
+  CheckForValidSection();
+
   int64_t NumBytes;
   if (ParseAbsoluteExpression(NumBytes))
     return true;
@@ -1312,7 +1580,7 @@ bool AsmParser::ParseDirectiveSpace() {
     if (getLexer().isNot(AsmToken::Comma))
       return TokError("unexpected token in '.space' directive");
     Lex();
-    
+
     if (ParseAbsoluteExpression(FillExpr))
       return true;
 
@@ -1331,9 +1599,37 @@ bool AsmParser::ParseDirectiveSpace() {
   return false;
 }
 
+/// ParseDirectiveZero
+///  ::= .zero expression
+bool AsmParser::ParseDirectiveZero() {
+  CheckForValidSection();
+
+  int64_t NumBytes;
+  if (ParseAbsoluteExpression(NumBytes))
+    return true;
+
+  int64_t Val = 0;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    if (ParseAbsoluteExpression(Val))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.zero' directive");
+
+  Lex();
+
+  getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
 /// ParseDirectiveFill
 ///  ::= .fill expression , expression , expression
 bool AsmParser::ParseDirectiveFill() {
+  CheckForValidSection();
+
   int64_t NumValues;
   if (ParseAbsoluteExpression(NumValues))
     return true;
@@ -1341,7 +1637,7 @@ bool AsmParser::ParseDirectiveFill() {
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in '.fill' directive");
   Lex();
-  
+
   int64_t FillSize;
   if (ParseAbsoluteExpression(FillSize))
     return true;
@@ -1349,14 +1645,14 @@ bool AsmParser::ParseDirectiveFill() {
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("unexpected token in '.fill' directive");
   Lex();
-  
+
   int64_t FillExpr;
   if (ParseAbsoluteExpression(FillExpr))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.fill' directive");
-  
+
   Lex();
 
   if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
@@ -1371,6 +1667,8 @@ bool AsmParser::ParseDirectiveFill() {
 /// ParseDirectiveOrg
 ///  ::= .org expression [ , expression ]
 bool AsmParser::ParseDirectiveOrg() {
+  CheckForValidSection();
+
   const MCExpr *Offset;
   if (ParseExpression(Offset))
     return true;
@@ -1381,7 +1679,7 @@ bool AsmParser::ParseDirectiveOrg() {
     if (getLexer().isNot(AsmToken::Comma))
       return TokError("unexpected token in '.org' directive");
     Lex();
-    
+
     if (ParseAbsoluteExpression(FillExpr))
       return true;
 
@@ -1401,6 +1699,8 @@ bool AsmParser::ParseDirectiveOrg() {
 /// ParseDirectiveAlign
 ///  ::= {.align, ...} expression [ , expression [ , expression ]]
 bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+  CheckForValidSection();
+
   SMLoc AlignmentLoc = getLexer().getLoc();
   int64_t Alignment;
   if (ParseAbsoluteExpression(Alignment))
@@ -1432,7 +1732,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
       MaxBytesLoc = getLexer().getLoc();
       if (ParseAbsoluteExpression(MaxBytesToFill))
         return true;
-      
+
       if (getLexer().isNot(AsmToken::EndOfStatement))
         return TokError("unexpected token in directive");
     }
@@ -1471,12 +1771,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
   // Check whether we should use optimal code alignment for this .align
   // directive.
-  //
-  // FIXME: This should be using a target hook.
-  bool UseCodeAlign = false;
-  if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
-        getStreamer().getCurrentSection()))
-    UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+  bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
   if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
       ValueSize == 1 && UseCodeAlign) {
     getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
@@ -1498,7 +1793,7 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
 
       if (ParseIdentifier(Name))
         return TokError("expected identifier in directive");
-      
+
       MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
 
       getStreamer().EmitSymbolAttribute(Sym, Attr);
@@ -1513,63 +1808,19 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
   }
 
   Lex();
-  return false;  
-}
-
-/// ParseDirectiveELFType
-///  ::= .type identifier , @attribute
-bool AsmParser::ParseDirectiveELFType() {
-  StringRef Name;
-  if (ParseIdentifier(Name))
-    return TokError("expected identifier in directive");
-
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
-
-  if (getLexer().isNot(AsmToken::Comma))
-    return TokError("unexpected token in '.type' directive");
-  Lex();
-
-  if (getLexer().isNot(AsmToken::At))
-    return TokError("expected '@' before type");
-  Lex();
-
-  StringRef Type;
-  SMLoc TypeLoc;
-
-  TypeLoc = getLexer().getLoc();
-  if (ParseIdentifier(Type))
-    return TokError("expected symbol type in directive");
-
-  MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
-    .Case("function", MCSA_ELF_TypeFunction)
-    .Case("object", MCSA_ELF_TypeObject)
-    .Case("tls_object", MCSA_ELF_TypeTLS)
-    .Case("common", MCSA_ELF_TypeCommon)
-    .Case("notype", MCSA_ELF_TypeNoType)
-    .Default(MCSA_Invalid);
-
-  if (Attr == MCSA_Invalid)
-    return Error(TypeLoc, "unsupported attribute in '.type' directive");
-
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.type' directive");
-
-  Lex();
-
-  getStreamer().EmitSymbolAttribute(Sym, Attr);
-
   return false;
 }
 
 /// ParseDirectiveComm
 ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
 bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+  CheckForValidSection();
+
   SMLoc IDLoc = getLexer().getLoc();
   StringRef Name;
   if (ParseIdentifier(Name))
     return TokError("expected identifier in directive");
-  
+
   // Handle the identifier as the key symbol.
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
 
@@ -1589,7 +1840,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
     Pow2AlignmentLoc = getLexer().getLoc();
     if (ParseAbsoluteExpression(Pow2Alignment))
       return true;
-    
+
     // If this target takes alignments in bytes (not log) validate and convert.
     if (Lexer.getMAI().getAlignmentIsInBytes()) {
       if (!isPowerOf2_64(Pow2Alignment))
@@ -1597,10 +1848,10 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
       Pow2Alignment = Log2_64(Pow2Alignment);
     }
   }
-  
+
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.comm' or '.lcomm' directive");
-  
+
   Lex();
 
   // NOTE: a size of zero for a .comm should create a undefined symbol
@@ -1659,17 +1910,17 @@ bool AsmParser::ParseDirectiveAbort() {
 bool AsmParser::ParseDirectiveInclude() {
   if (getLexer().isNot(AsmToken::String))
     return TokError("expected string in '.include' directive");
-  
+
   std::string Filename = getTok().getString();
   SMLoc IncludeLoc = getLexer().getLoc();
   Lex();
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.include' directive");
-  
+
   // Strip the quotes.
   Filename = Filename.substr(1, Filename.size()-2);
-  
+
   // Attempt to switch the lexer to the included file before consuming the end
   // of statement to avoid losing it when we switch.
   if (EnterIncludeFile(Filename)) {
@@ -1695,7 +1946,7 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
 
     if (getLexer().isNot(AsmToken::EndOfStatement))
       return TokError("unexpected token in '.if' directive");
-    
+
     Lex();
 
     TheCondState.CondMet = ExprValue;
@@ -1705,6 +1956,31 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
   return false;
 }
 
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+  StringRef Name;
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+  } else {
+    if (ParseIdentifier(Name))
+      return TokError("expected identifier after '.ifdef'");
+
+    Lex();
+
+    MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+    if (expect_defined)
+      TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+    else
+      TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
 /// ParseDirectiveElseIf
 /// ::= .elseif expression
 bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
@@ -1728,7 +2004,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
 
     if (getLexer().isNot(AsmToken::EndOfStatement))
       return TokError("unexpected token in '.elseif' directive");
-    
+
     Lex();
     TheCondState.CondMet = ExprValue;
     TheCondState.Ignore = !TheCondState.CondMet;
@@ -1742,7 +2018,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
 bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.else' directive");
-  
+
   Lex();
 
   if (TheCondState.TheCond != AsmCond::IfCond &&
@@ -1766,7 +2042,7 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
 bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.endif' directive");
-  
+
   Lex();
 
   if ((TheCondState.TheCond == AsmCond::NoCond) ||
@@ -1808,9 +2084,8 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
   if (FileNumber == -1)
     getStreamer().EmitFileDirective(Filename);
   else {
-     if (getContext().GetDwarfFile(Filename, FileNumber) == 0)
-	Error(FileNumberLoc, "file number already allocated");
-    getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
+    if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+      Error(FileNumberLoc, "file number already allocated");
   }
 
   return false;
@@ -1851,7 +2126,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
   int64_t FileNumber = getTok().getIntVal();
   if (FileNumber < 1)
     return TokError("file number less than one in '.loc' directive");
-  if (!getContext().ValidateDwarfFileNumber(FileNumber))
+  if (!getContext().isValidDwarfFileNumber(FileNumber))
     return TokError("unassigned file number in '.loc' directive");
   Lex();
 
@@ -1871,8 +2146,9 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
     Lex();
   }
 
-  unsigned Flags = 0;
+  unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
   unsigned Isa = 0;
+  int64_t Discriminator = 0;
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       if (getLexer().is(AsmToken::EndOfStatement))
@@ -1903,7 +2179,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
             Flags |= DWARF2_FLAG_IS_STMT;
           else
             return Error(Loc, "is_stmt value not 0 or 1");
-	}
+        }
         else {
           return Error(Loc, "is_stmt value not the constant value of 0 or 1");
         }
@@ -1919,11 +2195,15 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
           if (Value < 0)
             return Error(Loc, "isa number less than zero");
           Isa = Value;
-	}
+        }
         else {
           return Error(Loc, "isa number not a constant value");
         }
       }
+      else if (Name == "discriminator") {
+        if (getParser().ParseAbsoluteExpression(Discriminator))
+          return true;
+      }
       else {
         return Error(Loc, "unknown sub-directive in '.loc' directive");
       }
@@ -1933,11 +2213,176 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
     }
   }
 
-  getContext().setCurrentDwarfLoc(FileNumber, LineNumber, ColumnPos, Flags,Isa);
+  getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
+                                      Isa, Discriminator);
 
   return false;
 }
 
+/// ParseDirectiveStabs
+/// ::= .stabs string, number, number, number
+bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  return TokError("unsupported directive '" + Directive + "'");
+}
+
+/// ParseDirectiveCFIStartProc
+/// ::= .cfi_startproc
+bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
+                                                  SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIStartProc();
+}
+
+/// ParseDirectiveCFIEndProc
+/// ::= .cfi_endproc
+bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIEndProc();
+}
+
+/// ParseRegisterOrRegisterNumber - parse register name or number.
+bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+                                                     SMLoc DirectiveLoc) {
+  unsigned RegNo;
+
+  if (getLexer().is(AsmToken::Percent)) {
+    if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
+      DirectiveLoc))
+      return true;
+    Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true);
+  } else
+    return getParser().ParseAbsoluteExpression(Register);
+
+  return false;
+}
+
+/// ParseDirectiveCFIDefCfa
+/// ::= .cfi_def_cfa register,  offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
+                                               SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  return getStreamer().EmitCFIDefCfa(Register, Offset);
+}
+
+/// ParseDirectiveCFIDefCfaOffset
+/// ::= .cfi_def_cfa_offset offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
+                                                     SMLoc DirectiveLoc) {
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  return getStreamer().EmitCFIDefCfaOffset(Offset);
+}
+
+/// ParseDirectiveCFIDefCfaRegister
+/// ::= .cfi_def_cfa_register register
+bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
+                                                       SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  return getStreamer().EmitCFIDefCfaRegister(Register);
+}
+
+/// ParseDirectiveCFIOffset
+/// ::= .cfi_off register, offset
+bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  int64_t Offset = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  return getStreamer().EmitCFIOffset(Register, Offset);
+}
+
+static bool isValidEncoding(int64_t Encoding) {
+  if (Encoding & ~0xff)
+    return false;
+
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return true;
+
+  const unsigned Format = Encoding & 0xf;
+  if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
+      Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
+      Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
+      Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
+    return false;
+
+  const unsigned Application = Encoding & 0x70;
+  if (Application != dwarf::DW_EH_PE_absptr &&
+      Application != dwarf::DW_EH_PE_pcrel)
+    return false;
+
+  return true;
+}
+
+/// ParseDirectiveCFIPersonalityOrLsda
+/// ::= .cfi_personality encoding, [symbol_name]
+/// ::= .cfi_lsda encoding, [symbol_name]
+bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
+                                                    SMLoc DirectiveLoc) {
+  int64_t Encoding = 0;
+  if (getParser().ParseAbsoluteExpression(Encoding))
+    return true;
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return false;
+
+  if (!isValidEncoding(Encoding))
+    return TokError("unsupported encoding.");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (IDVal == ".cfi_personality")
+    return getStreamer().EmitCFIPersonality(Sym, Encoding);
+  else {
+    assert(IDVal == ".cfi_lsda");
+    return getStreamer().EmitCFILsda(Sym, Encoding);
+  }
+}
+
+/// ParseDirectiveCFIRememberState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
+                                                      SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIRememberState();
+}
+
+/// ParseDirectiveCFIRestoreState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
+                                                     SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIRestoreState();
+}
+
 /// ParseDirectiveMacrosOnOff
 /// ::= .macros_on
 /// ::= .macros_off
@@ -2022,6 +2467,26 @@ bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
                   "no current macro definition");
 }
 
+bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+  getParser().CheckForValidSection();
+
+  const MCExpr *Value;
+
+  if (getParser().ParseExpression(Value))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  if (DirName[1] == 's')
+    getStreamer().EmitSLEB128Value(Value);
+  else
+    getStreamer().EmitULEB128Value(Value);
+
+  return false;
+}
+
+
 /// \brief Create an MCAsmParser instance.
 MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
                                      MCContext &C, MCStreamer &Out,
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index 25a7bf484212..eaea9f6cd3c5 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMMCParser
   AsmLexer.cpp
   AsmParser.cpp
+  COFFAsmParser.cpp
   DarwinAsmParser.cpp
   ELFAsmParser.cpp
   MCAsmLexer.cpp
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
new file mode 100644
index 000000000000..5ecab03b00f0
--- /dev/null
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -0,0 +1,144 @@
+//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/COFF.h"
+using namespace llvm;
+
+namespace {
+
+class COFFAsmParser : public MCAsmParserExtension {
+  template<bool (COFFAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<COFFAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(StringRef Section,
+                          unsigned Characteristics,
+                          SectionKind Kind);
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+  }
+
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch(".text",
+                              COFF::IMAGE_SCN_CNT_CODE
+                            | COFF::IMAGE_SCN_MEM_EXECUTE
+                            | COFF::IMAGE_SCN_MEM_READ,
+                              SectionKind::getText());
+  }
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data",
+                              COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+                            | COFF::IMAGE_SCN_MEM_READ
+                            | COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".bss",
+                              COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+                            | COFF::IMAGE_SCN_MEM_READ
+                            | COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getBSS());
+  }
+
+  bool ParseDirectiveDef(StringRef, SMLoc);
+  bool ParseDirectiveScl(StringRef, SMLoc);
+  bool ParseDirectiveType(StringRef, SMLoc);
+  bool ParseDirectiveEndef(StringRef, SMLoc);
+
+public:
+  COFFAsmParser() {}
+};
+
+} // end annonomous namespace.
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+                                       unsigned Characteristics,
+                                       SectionKind Kind) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  getStreamer().SwitchSection(getContext().getCOFFSection(
+                                Section, Characteristics, Kind));
+
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
+  StringRef SymbolName;
+
+  if (getParser().ParseIdentifier(SymbolName))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+  getStreamer().BeginCOFFSymbolDef(Sym);
+
+  Lex();
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
+  int64_t SymbolStorageClass;
+  if (getParser().ParseAbsoluteExpression(SymbolStorageClass))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+  int64_t Type;
+  if (getParser().ParseAbsoluteExpression(Type))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitCOFFSymbolType(Type);
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EndCOFFSymbolDef();
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createCOFFAsmParser() {
+  return new COFFAsmParser;
+}
+
+}
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 54ddb449b285..44f234566afd 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -305,7 +305,7 @@ bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
   //
   // FIXME: This isn't really what 'as' does; I think it just uses the implicit
   // alignment on the section (e.g., if one manually inserts bytes into the
-  // section, then just issueing the section switch directive will not realign
+  // section, then just issuing the section switch directive will not realign
   // the section. However, this is arguably more reasonable behavior, and there
   // is no good reason for someone to intentionally emit incorrectly sized
   // values into the implicitly aligned sections.
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index f982fdaecb12..bfaf36a451b3 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -8,13 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 namespace {
@@ -47,72 +49,86 @@ public:
     AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
     AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
     AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
     AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".sleb128");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".uleb128");
     AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
   }
 
+  // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
+  // the best way for us to get access to it?
   bool ParseSectionDirectiveData(StringRef, SMLoc) {
-    return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
+                              ELF::SHF_WRITE |ELF::SHF_ALLOC,
                               SectionKind::getDataRel());
   }
   bool ParseSectionDirectiveText(StringRef, SMLoc) {
-    return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_EXECINSTR |
-                              MCSectionELF::SHF_ALLOC, SectionKind::getText());
+    return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
+                              ELF::SHF_EXECINSTR |
+                              ELF::SHF_ALLOC, SectionKind::getText());
   }
   bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
-    return ParseSectionSwitch(".bss", MCSectionELF::SHT_NOBITS,
-                              MCSectionELF::SHF_WRITE |
-                              MCSectionELF::SHF_ALLOC, SectionKind::getBSS());
+    return ParseSectionSwitch(".bss", ELF::SHT_NOBITS,
+                              ELF::SHF_WRITE |
+                              ELF::SHF_ALLOC, SectionKind::getBSS());
   }
   bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
-    return ParseSectionSwitch(".rodata", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_ALLOC,
+    return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC,
                               SectionKind::getReadOnly());
   }
   bool ParseSectionDirectiveTData(StringRef, SMLoc) {
-    return ParseSectionSwitch(".tdata", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_ALLOC |
-                              MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+    return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_TLS | ELF::SHF_WRITE,
                               SectionKind::getThreadData());
   }
   bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
-    return ParseSectionSwitch(".tbss", MCSectionELF::SHT_NOBITS,
-                              MCSectionELF::SHF_ALLOC |
-                              MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+    return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_TLS | ELF::SHF_WRITE,
                               SectionKind::getThreadBSS());
   }
   bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
-    return ParseSectionSwitch(".data.rel", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_ALLOC |
-                              MCSectionELF::SHF_WRITE,
+    return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
                               SectionKind::getDataRel());
   }
   bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
-    return ParseSectionSwitch(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_ALLOC |
-                              MCSectionELF::SHF_WRITE,
+    return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
                               SectionKind::getReadOnlyWithRel());
   }
   bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
-    return ParseSectionSwitch(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_ALLOC |
-                              MCSectionELF::SHF_WRITE,
+    return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
                               SectionKind::getReadOnlyWithRelLocal());
   }
   bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
-    return ParseSectionSwitch(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                              MCSectionELF::SHF_ALLOC |
-                              MCSectionELF::SHF_WRITE,
+    return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
                               SectionKind::getDataRel());
   }
-  bool ParseDirectiveLEB128(StringRef, SMLoc);
+  bool ParseDirectivePushSection(StringRef, SMLoc);
+  bool ParseDirectivePopSection(StringRef, SMLoc);
   bool ParseDirectiveSection(StringRef, SMLoc);
   bool ParseDirectiveSize(StringRef, SMLoc);
   bool ParseDirectivePrevious(StringRef, SMLoc);
+  bool ParseDirectiveType(StringRef, SMLoc);
+  bool ParseDirectiveIdent(StringRef, SMLoc);
+  bool ParseDirectiveSymver(StringRef, SMLoc);
+  bool ParseDirectiveWeakref(StringRef, SMLoc);
+
+private:
+  bool ParseSectionName(StringRef &SectionName);
 };
 
 }
@@ -150,135 +166,359 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
   return false;
 }
 
-// FIXME: This is a work in progress.
-bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
-  StringRef SectionName;
-  // FIXME: This doesn't parse section names like ".note.GNU-stack" correctly.
-  if (getParser().ParseIdentifier(SectionName))
-    return TokError("expected identifier in directive");
-
-  std::string FlagsStr;
-  StringRef TypeName;
-  int64_t Size = 0;
-  if (getLexer().is(AsmToken::Comma)) {
-    Lex();
-
-    if (getLexer().isNot(AsmToken::String))
-      return TokError("expected string in directive");
+bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
+  // A section name can contain -, so we cannot just use
+  // ParseIdentifier.
+  SMLoc FirstLoc = getLexer().getLoc();
+  unsigned Size = 0;
 
-    FlagsStr = getTok().getStringContents();
+  if (getLexer().is(AsmToken::String)) {
+    SectionName = getTok().getIdentifier();
     Lex();
+    return false;
+  }
 
-    AsmToken::TokenKind TypeStartToken;
-    if (getContext().getAsmInfo().getCommentString()[0] == '@')
-      TypeStartToken = AsmToken::Percent;
-    else
-      TypeStartToken = AsmToken::At;
+  for (;;) {
+    StringRef Tmp;
+    unsigned CurSize;
 
-    if (getLexer().is(AsmToken::Comma)) {
+    SMLoc PrevLoc = getLexer().getLoc();
+    if (getLexer().is(AsmToken::Minus)) {
+      CurSize = 1;
+      Lex(); // Consume the "-".
+    } else if (getLexer().is(AsmToken::String)) {
+      CurSize = getTok().getIdentifier().size() + 2;
       Lex();
-      if (getLexer().is(TypeStartToken)) {
-        Lex();
-        if (getParser().ParseIdentifier(TypeName))
-          return TokError("expected identifier in directive");
-
-        if (getLexer().is(AsmToken::Comma)) {
-          Lex();
+    } else if (getLexer().is(AsmToken::Identifier)) {
+      CurSize = getTok().getIdentifier().size();
+      Lex();
+    } else {
+      break;
+    }
 
-          if (getParser().ParseAbsoluteExpression(Size))
-            return true;
+    Size += CurSize;
+    SectionName = StringRef(FirstLoc.getPointer(), Size);
 
-          if (Size <= 0)
-            return TokError("section size must be positive");
-        }
-      }
-    }
+    // Make sure the following token is adjacent.
+    if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer())
+      break;
   }
+  if (Size == 0)
+    return true;
 
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in directive");
+  return false;
+}
 
-  unsigned Flags = 0;
-  for (unsigned i = 0; i < FlagsStr.size(); i++) {
-    switch (FlagsStr[i]) {
+static SectionKind computeSectionKind(unsigned Flags) {
+  if (Flags & ELF::SHF_EXECINSTR)
+    return SectionKind::getText();
+  if (Flags & ELF::SHF_TLS)
+    return SectionKind::getThreadData();
+  return SectionKind::getDataRel();
+}
+
+static int parseSectionFlags(StringRef flagsStr) {
+  int flags = 0;
+
+  for (unsigned i = 0; i < flagsStr.size(); i++) {
+    switch (flagsStr[i]) {
     case 'a':
-      Flags |= MCSectionELF::SHF_ALLOC;
+      flags |= ELF::SHF_ALLOC;
       break;
     case 'x':
-      Flags |= MCSectionELF::SHF_EXECINSTR;
+      flags |= ELF::SHF_EXECINSTR;
       break;
     case 'w':
-      Flags |= MCSectionELF::SHF_WRITE;
+      flags |= ELF::SHF_WRITE;
       break;
     case 'M':
-      Flags |= MCSectionELF::SHF_MERGE;
+      flags |= ELF::SHF_MERGE;
       break;
     case 'S':
-      Flags |= MCSectionELF::SHF_STRINGS;
+      flags |= ELF::SHF_STRINGS;
       break;
     case 'T':
-      Flags |= MCSectionELF::SHF_TLS;
+      flags |= ELF::SHF_TLS;
       break;
     case 'c':
-      Flags |= MCSectionELF::XCORE_SHF_CP_SECTION;
+      flags |= ELF::XCORE_SHF_CP_SECTION;
       break;
     case 'd':
-      Flags |= MCSectionELF::XCORE_SHF_DP_SECTION;
+      flags |= ELF::XCORE_SHF_DP_SECTION;
+      break;
+    case 'G':
+      flags |= ELF::SHF_GROUP;
       break;
     default:
+      return -1;
+    }
+  }
+
+  return flags;
+}
+
+bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
+  getStreamer().PushSection();
+
+  if (ParseDirectiveSection(s, loc)) {
+    getStreamer().PopSection();
+    return true;
+  }
+
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+  if (!getStreamer().PopSection())
+    return TokError(".popsection without corresponding .pushsection");
+  return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  StringRef SectionName;
+
+  if (ParseSectionName(SectionName))
+    return TokError("expected identifier in directive");
+
+  StringRef TypeName;
+  int64_t Size = 0;
+  StringRef GroupName;
+  unsigned Flags = 0;
+
+  // Set the defaults first.
+  if (SectionName == ".fini" || SectionName == ".init" ||
+      SectionName == ".rodata")
+    Flags |= ELF::SHF_ALLOC;
+  if (SectionName == ".fini" || SectionName == ".init")
+    Flags |= ELF::SHF_EXECINSTR;
+
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+
+    if (getLexer().isNot(AsmToken::String))
+      return TokError("expected string in directive");
+
+    StringRef FlagsStr = getTok().getStringContents();
+    Lex();
+
+    int extraFlags = parseSectionFlags(FlagsStr);
+    if (extraFlags < 0)
       return TokError("unknown flag");
+    Flags |= extraFlags;
+
+    bool Mergeable = Flags & ELF::SHF_MERGE;
+    bool Group = Flags & ELF::SHF_GROUP;
+
+    if (getLexer().isNot(AsmToken::Comma)) {
+      if (Mergeable)
+        return TokError("Mergeable section must specify the type");
+      if (Group)
+        return TokError("Group section must specify the type");
+    } else {
+      Lex();
+      if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+        return TokError("expected '@' or '%' before type");
+
+      Lex();
+      if (getParser().ParseIdentifier(TypeName))
+        return TokError("expected identifier in directive");
+
+      if (Mergeable) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected the entry size");
+        Lex();
+        if (getParser().ParseAbsoluteExpression(Size))
+          return true;
+        if (Size <= 0)
+          return TokError("entry size must be positive");
+      }
+
+      if (Group) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected group name");
+        Lex();
+        if (getParser().ParseIdentifier(GroupName))
+          return true;
+        if (getLexer().is(AsmToken::Comma)) {
+          Lex();
+          StringRef Linkage;
+          if (getParser().ParseIdentifier(Linkage))
+            return true;
+          if (Linkage != "comdat")
+            return TokError("Linkage must be 'comdat'");
+        }
+      }
     }
   }
 
-  unsigned Type = MCSectionELF::SHT_NULL;
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  unsigned Type = ELF::SHT_PROGBITS;
+
   if (!TypeName.empty()) {
     if (TypeName == "init_array")
-      Type = MCSectionELF::SHT_INIT_ARRAY;
+      Type = ELF::SHT_INIT_ARRAY;
     else if (TypeName == "fini_array")
-      Type = MCSectionELF::SHT_FINI_ARRAY;
+      Type = ELF::SHT_FINI_ARRAY;
     else if (TypeName == "preinit_array")
-      Type = MCSectionELF::SHT_PREINIT_ARRAY;
+      Type = ELF::SHT_PREINIT_ARRAY;
     else if (TypeName == "nobits")
-      Type = MCSectionELF::SHT_NOBITS;
+      Type = ELF::SHT_NOBITS;
     else if (TypeName == "progbits")
-      Type = MCSectionELF::SHT_PROGBITS;
+      Type = ELF::SHT_PROGBITS;
+    else if (TypeName == "note")
+      Type = ELF::SHT_NOTE;
+    else if (TypeName == "unwind")
+      Type = ELF::SHT_X86_64_UNWIND;
     else
       return TokError("unknown section type");
   }
 
-  SectionKind Kind = (Flags & MCSectionELF::SHF_EXECINSTR)
-                     ? SectionKind::getText()
-                     : SectionKind::getDataRel();
+  SectionKind Kind = computeSectionKind(Flags);
   getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
-                                                         Flags, Kind, false));
+                                                         Flags, Kind, Size,
+                                                         GroupName));
   return false;
 }
 
-bool ELFAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
-  int64_t Value;
-  if (getParser().ParseAbsoluteExpression(Value))
-    return true;
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+  const MCSection *PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection == NULL)
+      return TokError(".previous without corresponding .section");
+  getStreamer().SwitchSection(PreviousSection);
+
+  return false;
+}
+
+/// ParseDirectiveELFType
+///  ::= .type identifier , @attribute
+bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.type' directive");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+    return TokError("expected '@' or '%' before type");
+  Lex();
+
+  StringRef Type;
+  SMLoc TypeLoc;
+
+  TypeLoc = getLexer().getLoc();
+  if (getParser().ParseIdentifier(Type))
+    return TokError("expected symbol type in directive");
+
+  MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+    .Case("function", MCSA_ELF_TypeFunction)
+    .Case("object", MCSA_ELF_TypeObject)
+    .Case("tls_object", MCSA_ELF_TypeTLS)
+    .Case("common", MCSA_ELF_TypeCommon)
+    .Case("notype", MCSA_ELF_TypeNoType)
+    .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+    .Default(MCSA_Invalid);
+
+  if (Attr == MCSA_Invalid)
+    return Error(TypeLoc, "unsupported attribute in '.type' directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in directive");
+    return TokError("unexpected token in '.type' directive");
 
-  // FIXME: Add proper MC support.
-  if (getContext().getAsmInfo().hasLEB128()) {
-    if (DirName[1] == 's')
-      getStreamer().EmitRawText("\t.sleb128\t" + Twine(Value));
-    else
-      getStreamer().EmitRawText("\t.uleb128\t" + Twine(Value));
-    return false;
-  }
-  // FIXME: This shouldn't be an error!
-  return TokError("LEB128 not supported yet");
+  Lex();
+
+  getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+  return false;
 }
 
-bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection != NULL)
-    getStreamer().SwitchSection(PreviousSection);
+/// ParseDirectiveIdent
+///  ::= .ident string
+bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("unexpected token in '.ident' directive");
+
+  StringRef Data = getTok().getIdentifier();
+
+  Lex();
+
+  const MCSection *Comment =
+    getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
+                               ELF::SHF_MERGE |
+                               ELF::SHF_STRINGS,
+                               SectionKind::getReadOnly(),
+                               1, "");
+
+  static bool First = true;
+
+  getStreamer().PushSection();
+  getStreamer().SwitchSection(Comment);
+  if (First)
+    getStreamer().EmitIntValue(0, 1);
+  First = false;
+  getStreamer().EmitBytes(Data, 0);
+  getStreamer().EmitIntValue(0, 1);
+  getStreamer().PopSection();
+  return false;
+}
+
+/// ParseDirectiveSymver
+///  ::= .symver foo, bar2@zed
+bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("expected a comma");
+
+  Lex();
+
+  StringRef AliasName;
+  if (getParser().ParseIdentifier(AliasName))
+    return TokError("expected identifier in directive");
+
+  if (AliasName.find('@') == StringRef::npos)
+    return TokError("expected a '@' in the name");
+
+  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+
+  getStreamer().EmitAssignment(Alias, Value);
+  return false;
+}
+
+/// ParseDirectiveWeakref
+///  ::= .weakref foo, bar
+bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
+  // FIXME: Share code with the other alias building directives.
+
+  StringRef AliasName;
+  if (getParser().ParseIdentifier(AliasName))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("expected a comma");
+
+  Lex();
+
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
 
+  getStreamer().EmitWeakReference(Alias, Sym);
   return false;
 }
 
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
new file mode 100644
index 000000000000..6098e6b8f38b
--- /dev/null
+++ b/lib/MC/MCPureStreamer.cpp
@@ -0,0 +1,234 @@
+//===- lib/MC/MCPureStreamer.cpp - MC "Pure" Object Output ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+// FIXME: Remove this.
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCPureStreamer : public MCObjectStreamer {
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+  MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                 raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+  virtual void Finish();
+
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitThumbFunc(MCSymbol *Func) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCOFFSymbolType(int Type) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EndCOFFSymbolDef() {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitFileDirective(StringRef Filename) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    report_fatal_error("unsupported directive in pure streamer");
+    return false;
+  }
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+void MCPureStreamer::InitSections() {
+  // FIMXE: To what!?
+  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind::getText()));
+
+}
+
+void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  Symbol->setSection(*getCurrentSection());
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // We have to create a new fragment if this is an atom defining symbol,
+  // fragments cannot span atoms.
+  if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+    new MCDataFragment(getCurrentSectionData());
+
+  // FIXME: This is wasteful, we don't necessarily need to create a data
+  // fragment. Instead, we should mark the symbol as pointing into the data
+  // fragment if it exists, otherwise we should just queue the label and set its
+  // fragment pointer when we emit the next fragment.
+  MCDataFragment *F = getOrCreateDataFragment();
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+}
+
+void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                  unsigned Size, unsigned ByteAlignment) {
+  report_fatal_error("not yet implemented in pure streamer");
+}
+
+void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                          int64_t Value, unsigned ValueSize,
+                                          unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                       unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                       unsigned char Value) {
+  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+  // Add the fixups and data.
+  //
+  // FIXME: Revisit this design decision when relaxation is done, we may be
+  // able to get away with not storing any extra data in the MCInst.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  IF->getCode() = Code;
+  IF->getFixups() = Fixups;
+}
+
+void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCPureStreamer::Finish() {
+  // FIXME: Handle DWARF tables?
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                     raw_ostream &OS, MCCodeEmitter *CE) {
+  return new MCPureStreamer(Context, TAB, OS, CE);
+}
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index eb531600f727..90091f06e9ac 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -74,3 +74,11 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
     }
   }
 }
+
+bool MCSectionCOFF::UseCodeAlign() const {
+  return getKind().isText();
+}
+
+bool MCSectionCOFF::isVirtualSection() const {
+  return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+}
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index a7599de1b7b4..d32aea144e6e 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -11,7 +11,9 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 MCSectionELF::~MCSectionELF() {} // anchor.
@@ -29,14 +31,6 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
   return false;
 }
 
-// ShouldPrintSectionType - Only prints the section type if supported
-bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const {
-  if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS))
-    return false;
-
-  return true;
-}
-
 void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
                                         raw_ostream &OS) const {
    
@@ -49,87 +43,88 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
   
   // Handle the weird solaris syntax if desired.
   if (MAI.usesSunStyleELFSectionSwitchSyntax() && 
-      !(Flags & MCSectionELF::SHF_MERGE)) {
-    if (Flags & MCSectionELF::SHF_ALLOC)
+      !(Flags & ELF::SHF_MERGE)) {
+    if (Flags & ELF::SHF_ALLOC)
       OS << ",#alloc";
-    if (Flags & MCSectionELF::SHF_EXECINSTR)
+    if (Flags & ELF::SHF_EXECINSTR)
       OS << ",#execinstr";
-    if (Flags & MCSectionELF::SHF_WRITE)
+    if (Flags & ELF::SHF_WRITE)
       OS << ",#write";
-    if (Flags & MCSectionELF::SHF_TLS)
+    if (Flags & ELF::SHF_TLS)
       OS << ",#tls";
     OS << '\n';
     return;
   }
   
   OS << ",\"";
-  if (Flags & MCSectionELF::SHF_ALLOC)
+  if (Flags & ELF::SHF_ALLOC)
     OS << 'a';
-  if (Flags & MCSectionELF::SHF_EXECINSTR)
+  if (Flags & ELF::SHF_EXECINSTR)
     OS << 'x';
-  if (Flags & MCSectionELF::SHF_WRITE)
+  if (Flags & ELF::SHF_GROUP)
+    OS << 'G';
+  if (Flags & ELF::SHF_WRITE)
     OS << 'w';
-  if (Flags & MCSectionELF::SHF_MERGE)
+  if (Flags & ELF::SHF_MERGE)
     OS << 'M';
-  if (Flags & MCSectionELF::SHF_STRINGS)
+  if (Flags & ELF::SHF_STRINGS)
     OS << 'S';
-  if (Flags & MCSectionELF::SHF_TLS)
+  if (Flags & ELF::SHF_TLS)
     OS << 'T';
   
   // If there are target-specific flags, print them.
-  if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION)
+  if (Flags & ELF::XCORE_SHF_CP_SECTION)
     OS << 'c';
-  if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION)
+  if (Flags & ELF::XCORE_SHF_DP_SECTION)
     OS << 'd';
   
   OS << '"';
 
-  if (ShouldPrintSectionType(Type)) {
-    OS << ',';
- 
-    // If comment string is '@', e.g. as on ARM - use '%' instead
-    if (MAI.getCommentString()[0] == '@')
-      OS << '%';
-    else
-      OS << '@';
-  
-    if (Type == MCSectionELF::SHT_INIT_ARRAY)
-      OS << "init_array";
-    else if (Type == MCSectionELF::SHT_FINI_ARRAY)
-      OS << "fini_array";
-    else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
-      OS << "preinit_array";
-    else if (Type == MCSectionELF::SHT_NOBITS)
-      OS << "nobits";
-    else if (Type == MCSectionELF::SHT_PROGBITS)
-      OS << "progbits";
-  
-    if (getKind().isMergeable1ByteCString()) {
-      OS << ",1";
-    } else if (getKind().isMergeable2ByteCString()) {
-      OS << ",2";
-    } else if (getKind().isMergeable4ByteCString() || 
-               getKind().isMergeableConst4()) {
-      OS << ",4";
-    } else if (getKind().isMergeableConst8()) {
-      OS << ",8";
-    } else if (getKind().isMergeableConst16()) {
-      OS << ",16";
-    }
+  OS << ',';
+
+  // If comment string is '@', e.g. as on ARM - use '%' instead
+  if (MAI.getCommentString()[0] == '@')
+    OS << '%';
+  else
+    OS << '@';
+
+  if (Type == ELF::SHT_INIT_ARRAY)
+    OS << "init_array";
+  else if (Type == ELF::SHT_FINI_ARRAY)
+    OS << "fini_array";
+  else if (Type == ELF::SHT_PREINIT_ARRAY)
+    OS << "preinit_array";
+  else if (Type == ELF::SHT_NOBITS)
+    OS << "nobits";
+  else if (Type == ELF::SHT_NOTE)
+    OS << "note";
+  else if (Type == ELF::SHT_PROGBITS)
+    OS << "progbits";
+
+  if (EntrySize) {
+    assert(Flags & ELF::SHF_MERGE);
+    OS << "," << EntrySize;
   }
-  
+
+  if (Flags & ELF::SHF_GROUP)
+    OS << "," << Group->getName() << ",comdat";
   OS << '\n';
 }
 
-// HasCommonSymbols - True if this section holds common symbols, this is
-// indicated on the ELF object file by a symbol with SHN_COMMON section 
-// header index.
-bool MCSectionELF::HasCommonSymbols() const {
-  
-  if (StringRef(SectionName).startswith(".gnu.linkonce."))
-    return true;
-
-  return false;
+bool MCSectionELF::UseCodeAlign() const {
+  return getFlags() & ELF::SHF_EXECINSTR;
 }
 
+bool MCSectionELF::isVirtualSection() const {
+  return getType() == ELF::SHT_NOBITS;
+}
 
+unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
+  if (Kind.isMergeable1ByteCString()) return 1;
+  if (Kind.isMergeable2ByteCString()) return 2;
+  if (Kind.isMergeable4ByteCString()) return 4;
+  if (Kind.isMergeableConst4())       return 4;
+  if (Kind.isMergeableConst8())       return 8;
+  if (Kind.isMergeableConst16())      return 16;
+  return 0;
+}
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index ded3b20eaf53..b897c0bd6855 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cctype>
 using namespace llvm;
 
 /// SectionTypeDescriptors - These are strings that describe the various section
@@ -81,18 +82,18 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
       SegmentName[i] = Segment[i];
     else
       SegmentName[i] = 0;
-    
+
     if (i < Section.size())
       SectionName[i] = Section[i];
     else
       SectionName[i] = 0;
-  }        
+  }
 }
 
 void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
                                           raw_ostream &OS) const {
   OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
-  
+
   // Get the section type and attributes.
   unsigned TAA = getTypeAndAttributes();
   if (TAA == 0) {
@@ -101,7 +102,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
   }
 
   OS << ',';
-  
+
   unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
   assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
          "Invalid SectionType specified!");
@@ -110,7 +111,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << SectionTypeDescriptors[SectionType].AssemblerName;
   else
     OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>";
-  
+
   // If we don't have any attributes, we're done.
   unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
   if (SectionAttrs == 0) {
@@ -128,10 +129,10 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
     // Check to see if we have this attribute.
     if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
       continue;
-    
+
     // Yep, clear it and print it.
     SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
-    
+
     OS << Separator;
     if (SectionAttrDescriptors[i].AssemblerName)
       OS << SectionAttrDescriptors[i].AssemblerName;
@@ -139,15 +140,25 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
       OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
     Separator = '+';
   }
-  
+
   assert(SectionAttrs == 0 && "Unknown section attributes!");
-  
+
   // If we have a S_SYMBOL_STUBS size specified, print it.
   if (Reserved2 != 0)
     OS << ',' << Reserved2;
   OS << '\n';
 }
 
+bool MCSectionMachO::UseCodeAlign() const {
+  return hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+}
+
+bool MCSectionMachO::isVirtualSection() const {
+  return (getType() == MCSectionMachO::S_ZEROFILL ||
+          getType() == MCSectionMachO::S_GB_ZEROFILL ||
+          getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
 /// StripSpaces - This removes leading and trailing spaces from the StringRef.
 static void StripSpaces(StringRef &Str) {
   while (!Str.empty() && isspace(Str[0]))
@@ -168,12 +179,12 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
                                                   unsigned  &StubSize) { // Out.
   // Find the first comma.
   std::pair<StringRef, StringRef> Comma = Spec.split(',');
-  
+
   // If there is no comma, we fail.
   if (Comma.second.empty())
     return "mach-o section specifier requires a segment and section "
            "separated by a comma";
-  
+
   // Capture segment, remove leading and trailing whitespace.
   Segment = Comma.first;
   StripSpaces(Segment);
@@ -182,14 +193,14 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
   if (Segment.empty() || Segment.size() > 16)
     return "mach-o section specifier requires a segment whose length is "
            "between 1 and 16 characters";
-  
+
   // Split the section name off from any attributes if present.
   Comma = Comma.second.split(',');
 
   // Capture section, remove leading and trailing whitespace.
   Section = Comma.first;
   StripSpaces(Section);
-  
+
   // Verify that the section is present and not too long.
   if (Section.empty() || Section.size() > 16)
     return "mach-o section specifier requires a section whose length is "
@@ -200,25 +211,25 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
   StubSize = 0;
   if (Comma.second.empty())
     return "";
-  
+
   // Otherwise, we need to parse the section type and attributes.
   Comma = Comma.second.split(',');
-  
+
   // Get the section type.
   StringRef SectionType = Comma.first;
   StripSpaces(SectionType);
-  
+
   // Figure out which section type it is.
   unsigned TypeID;
   for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
     if (SectionTypeDescriptors[TypeID].AssemblerName &&
         SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
       break;
-  
+
   // If we didn't find the section type, reject it.
   if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
     return "mach-o section specifier uses an unknown section type";
-  
+
   // Remember the TypeID.
   TAA = TypeID;
 
@@ -235,10 +246,10 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
   // present.
   Comma = Comma.second.split(',');
   StringRef Attrs = Comma.first;
-  
+
   // The attribute list is a '+' separated list of attributes.
   std::pair<StringRef, StringRef> Plus = Attrs.split('+');
-  
+
   while (1) {
     StringRef Attr = Plus.first;
     StripSpaces(Attr);
@@ -247,14 +258,14 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
     for (unsigned i = 0; ; ++i) {
       if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
         return "mach-o section specifier has invalid attribute";
-      
+
       if (SectionAttrDescriptors[i].AssemblerName &&
           Attr == SectionAttrDescriptors[i].AssemblerName) {
         TAA |= SectionAttrDescriptors[i].AttrFlag;
         break;
       }
     }
-    
+
     if (Plus.second.empty()) break;
     Plus = Plus.second.split('+');
   };
@@ -272,15 +283,14 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
   if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
     return "mach-o section specifier cannot have a stub size specified because "
            "it does not have type 'symbol_stubs'";
-  
+
   // Okay, if we do, it must be a number.
   StringRef StubSizeStr = Comma.second;
   StripSpaces(StubSizeStr);
-  
+
   // Convert the stub size from a string to an integer.
   if (StubSizeStr.getAsInteger(0, StubSize))
     return "mach-o section specifier has a malformed stub size";
-  
+
   return "";
 }
-
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 3e9d02ea5ae7..3dcdba13135f 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -7,16 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include <cstdlib>
 using namespace llvm;
 
-MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), CurSection(0),
-                                         PrevSection(0) {
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) {
+  PrevSectionStack.push_back(NULL);
+  CurSectionStack.push_back(NULL);
 }
 
 MCStreamer::~MCStreamer() {
@@ -27,17 +32,90 @@ raw_ostream &MCStreamer::GetCommentOS() {
   return nulls();
 }
 
+void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
+                                      const MCSymbol *Label, int PointerSize) {
+  // emit the sequence to set the address
+  EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+  EmitULEB128IntValue(PointerSize + 1);
+  EmitIntValue(dwarf::DW_LNE_set_address, 1);
+  EmitSymbolValue(Label, PointerSize);
+
+  // emit the sequence for the LineDelta (from 1) and a zero address delta.
+  MCDwarfLineAddr::Emit(this, LineDelta, 0);
+}
 
 /// EmitIntValue - Special case of EmitValue that avoids the client having to
 /// pass in a MCExpr for constant integers.
 void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
                               unsigned AddrSpace) {
-  EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+  assert(Size <= 8 && "Invalid size");
+  assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
+         "Invalid size");
+  char buf[8];
+  // FIXME: Endianness assumption.
+  for (unsigned i = 0; i != Size; ++i)
+    buf[i] = uint8_t(Value >> (i * 8));
+  EmitBytes(StringRef(buf, Size), AddrSpace);
+}
+
+/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeULEB128(Value, OSE);
+  EmitBytes(OSE.str(), AddrSpace);
+}
+
+/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeSLEB128(Value, OSE);
+  EmitBytes(OSE.str(), AddrSpace);
+}
+
+void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
+                              unsigned AddrSpace) {
+  if (getContext().getAsmInfo().hasAggressiveSymbolFolding()) {
+    EmitValue(Value, Size, AddrSpace);
+    return;
+  }
+  MCSymbol *ABS = getContext().CreateTempSymbol();
+  EmitAssignment(ABS, Value);
+  EmitSymbolValue(ABS, Size, AddrSpace);
+}
+
+
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+                           unsigned AddrSpace) {
+  EmitValueImpl(Value, Size, false, AddrSpace);
+}
+
+void MCStreamer::EmitPCRelValue(const MCExpr *Value, unsigned Size,
+                                unsigned AddrSpace) {
+  EmitValueImpl(Value, Size, true, AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                                 bool isPCRel, unsigned AddrSpace) {
+  EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, isPCRel,
+                AddrSpace);
 }
 
 void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
                                  unsigned AddrSpace) {
-  EmitValue(MCSymbolRefExpr::Create(Sym, getContext()), Size, AddrSpace);
+  EmitSymbolValue(Sym, Size, false, AddrSpace);
+}
+
+void MCStreamer::EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
+                                      unsigned AddrSpace) {
+  EmitSymbolValue(Sym, Size, true, AddrSpace);
+}
+
+void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
 }
 
 /// EmitFill - Emit NumBytes bytes worth of the value specified by
@@ -49,6 +127,138 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
     EmitValue(E, 1, AddrSpace);
 }
 
+bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                        StringRef Filename) {
+  return getContext().GetDwarfFile(Filename, FileNo) == 0;
+}
+
+void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa,
+                                       unsigned Discriminator) {
+  getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
+                                  Discriminator);
+}
+
+MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
+  if (FrameInfos.empty())
+    return NULL;
+  return &FrameInfos.back();
+}
+
+void MCStreamer::EnsureValidFrame() {
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  if (!CurFrame || CurFrame->End)
+    report_fatal_error("No open frame");
+}
+
+bool MCStreamer::EmitCFIStartProc() {
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  if (CurFrame && !CurFrame->End) {
+    report_fatal_error("Starting a frame before finishing the previous one!");
+    return true;
+  }
+  MCDwarfFrameInfo Frame;
+  Frame.Begin = getContext().CreateTempSymbol();
+  EmitLabel(Frame.Begin);
+  FrameInfos.push_back(Frame);
+  return false;
+}
+
+bool MCStreamer::EmitCFIEndProc() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->End = getContext().CreateTempSymbol();
+  EmitLabel(CurFrame->End);
+  return false;
+}
+
+bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(Register, -Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(MachineLocation::VirtualFP, -Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register);
+  MachineLocation Source(MachineLocation::VirtualFP);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register, Offset);
+  MachineLocation Source(Register, Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+                                    unsigned Encoding) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Personality = Sym;
+  CurFrame->PersonalityEncoding = Encoding;
+  return false;
+}
+
+bool MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Lsda = Sym;
+  CurFrame->LsdaEncoding = Encoding;
+  return false;
+}
+
+bool MCStreamer::EmitCFIRememberState() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIRestoreState() {
+  // FIXME: Error if there is no matching cfi_remember_state.
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 07751f729844..1c71f267a4b5 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -39,7 +39,20 @@ static bool NameNeedsQuoting(StringRef Str) {
   return false;
 }
 
+const MCSymbol &MCSymbol::AliasedSymbol() const {
+  const MCSymbol *S = this;
+  while (S->isVariable()) {
+    const MCExpr *Value = S->getVariableValue();
+    if (Value->getKind() != MCExpr::SymbolRef)
+      return *S;
+    const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
+    S = &Ref->getSymbol();
+  }
+  return *S;
+}
+
 void MCSymbol::setVariableValue(const MCExpr *Value) {
+  assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
   assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
          "Invalid redefinition!");
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index cffabfadb316..8af07c74fdfe 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAssembler.h"
@@ -18,49 +19,37 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Target/TargetAsmBackend.h"
 
 // FIXME: Gross.
+#include "../Target/ARM/ARMFixupKinds.h"
 #include "../Target/X86/X86FixupKinds.h"
 
 #include <vector>
 using namespace llvm;
+using namespace llvm::object;
 
+// FIXME: this has been copied from (or to) X86AsmBackend.cpp
 static unsigned getFixupKindLog2Size(unsigned Kind) {
   switch (Kind) {
-  default: llvm_unreachable("invalid fixup kind!");
-  case X86::reloc_pcrel_1byte:
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_PCRel_1:
   case FK_Data_1: return 0;
-  case X86::reloc_pcrel_2byte:
+  case FK_PCRel_2:
   case FK_Data_2: return 1;
-  case X86::reloc_pcrel_4byte:
+  case FK_PCRel_4:
+    // FIXME: Remove these!!!
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
   case FK_Data_4: return 2;
   case FK_Data_8: return 3;
   }
 }
 
-static bool isFixupKindPCRel(unsigned Kind) {
-  switch (Kind) {
-  default:
-    return false;
-  case X86::reloc_pcrel_1byte:
-  case X86::reloc_pcrel_2byte:
-  case X86::reloc_pcrel_4byte:
-  case X86::reloc_riprel_4byte:
-  case X86::reloc_riprel_4byte_movq_load:
-    return true;
-  }
-}
-
-static bool isFixupKindRIPRel(unsigned Kind) {
-  return Kind == X86::reloc_riprel_4byte ||
-    Kind == X86::reloc_riprel_4byte_movq_load;
-}
-
 static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
   // Undefined symbols are always extern.
   if (SD->Symbol->isUndefined())
@@ -77,94 +66,7 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
 
 namespace {
 
-class MachObjectWriterImpl {
-  // See <mach-o/loader.h>.
-  enum {
-    Header_Magic32 = 0xFEEDFACE,
-    Header_Magic64 = 0xFEEDFACF
-  };
-
-  enum {
-    Header32Size = 28,
-    Header64Size = 32,
-    SegmentLoadCommand32Size = 56,
-    SegmentLoadCommand64Size = 72,
-    Section32Size = 68,
-    Section64Size = 80,
-    SymtabLoadCommandSize = 24,
-    DysymtabLoadCommandSize = 80,
-    Nlist32Size = 12,
-    Nlist64Size = 16,
-    RelocationInfoSize = 8
-  };
-
-  enum HeaderFileType {
-    HFT_Object = 0x1
-  };
-
-  enum HeaderFlags {
-    HF_SubsectionsViaSymbols = 0x2000
-  };
-
-  enum LoadCommandType {
-    LCT_Segment = 0x1,
-    LCT_Symtab = 0x2,
-    LCT_Dysymtab = 0xb,
-    LCT_Segment64 = 0x19
-  };
-
-  // See <mach-o/nlist.h>.
-  enum SymbolTypeType {
-    STT_Undefined = 0x00,
-    STT_Absolute  = 0x02,
-    STT_Section   = 0x0e
-  };
-
-  enum SymbolTypeFlags {
-    // If any of these bits are set, then the entry is a stab entry number (see
-    // <mach-o/stab.h>. Otherwise the other masks apply.
-    STF_StabsEntryMask = 0xe0,
-
-    STF_TypeMask       = 0x0e,
-    STF_External       = 0x01,
-    STF_PrivateExtern  = 0x10
-  };
-
-  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
-  /// symbol entry.
-  enum IndirectSymbolFlags {
-    ISF_Local    = 0x80000000,
-    ISF_Absolute = 0x40000000
-  };
-
-  /// RelocationFlags - Special flags for addresses.
-  enum RelocationFlags {
-    RF_Scattered = 0x80000000
-  };
-
-  enum RelocationInfoType {
-    RIT_Vanilla             = 0,
-    RIT_Pair                = 1,
-    RIT_Difference          = 2,
-    RIT_PreboundLazyPointer = 3,
-    RIT_LocalDifference     = 4,
-    RIT_TLV                 = 5
-  };
-
-  /// X86_64 uses its own relocation types.
-  enum RelocationInfoTypeX86_64 {
-    RIT_X86_64_Unsigned   = 0,
-    RIT_X86_64_Signed     = 1,
-    RIT_X86_64_Branch     = 2,
-    RIT_X86_64_GOTLoad    = 3,
-    RIT_X86_64_GOT        = 4,
-    RIT_X86_64_Subtractor = 5,
-    RIT_X86_64_Signed1    = 6,
-    RIT_X86_64_Signed2    = 7,
-    RIT_X86_64_Signed4    = 8,
-    RIT_X86_64_TLV        = 9
-  };
-
+class MachObjectWriter : public MCObjectWriter {
   /// MachSymbolData - Helper struct for containing some precomputed information
   /// on symbols.
   struct MachSymbolData {
@@ -179,16 +81,14 @@ class MachObjectWriterImpl {
     }
   };
 
+  /// The target specific Mach-O writer instance.
+  llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
+
   /// @name Relocation Data
   /// @{
 
-  struct MachRelocationEntry {
-    uint32_t Word0;
-    uint32_t Word1;
-  };
-
   llvm::DenseMap<const MCSectionData*,
-                 std::vector<MachRelocationEntry> > Relocations;
+                 std::vector<macho::RelocationEntry> > Relocations;
   llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
 
   /// @}
@@ -202,32 +102,70 @@ class MachObjectWriterImpl {
 
   /// @}
 
-  MachObjectWriter *Writer;
+private:
+  /// @name Utility Methods
+  /// @{
 
-  raw_ostream &OS;
+  bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+    const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+      (MCFixupKind) Kind);
 
-  unsigned Is64Bit : 1;
+    return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+  }
+
+  /// @}
+
+  SectionAddrMap SectionAddress;
+  uint64_t getSectionAddress(const MCSectionData* SD) const {
+    return SectionAddress.lookup(SD);
+  }
+  uint64_t getSymbolAddress(const MCSymbolData* SD,
+                            const MCAsmLayout &Layout) const {
+    return getSectionAddress(SD->getFragment()->getParent()) +
+      Layout.getSymbolOffset(SD);
+  }
+  uint64_t getFragmentAddress(const MCFragment *Fragment,
+                            const MCAsmLayout &Layout) const {
+    return getSectionAddress(Fragment->getParent()) +
+      Layout.getFragmentOffset(Fragment);
+  }
+
+  uint64_t getPaddingSize(const MCSectionData *SD,
+                          const MCAsmLayout &Layout) const {
+    uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+    unsigned Next = SD->getLayoutOrder() + 1;
+    if (Next >= Layout.getSectionOrder().size())
+      return 0;
+
+    const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+    if (NextSD.getSection().isVirtualSection())
+      return 0;
+    return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+  }
 
 public:
-  MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit)
-    : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) {
+  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
+                   bool _IsLittleEndian)
+    : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
   }
 
-  void Write8(uint8_t Value) { Writer->Write8(Value); }
-  void Write16(uint16_t Value) { Writer->Write16(Value); }
-  void Write32(uint32_t Value) { Writer->Write32(Value); }
-  void Write64(uint64_t Value) { Writer->Write64(Value); }
-  void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
-  void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
-    Writer->WriteBytes(Str, ZeroFillSize);
+  /// @name Target Writer Proxy Accessors
+  /// @{
+
+  bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+  bool isARM() const {
+    uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask;
+    return CPUType == mach::CTM_ARM;
   }
 
+  /// @}
+
   void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
                    bool SubsectionsViaSymbols) {
     uint32_t Flags = 0;
 
     if (SubsectionsViaSymbols)
-      Flags |= HF_SubsectionsViaSymbols;
+      Flags |= macho::HF_SubsectionsViaSymbols;
 
     // struct mach_header (28 bytes) or
     // struct mach_header_64 (32 bytes)
@@ -235,21 +173,20 @@ public:
     uint64_t Start = OS.tell();
     (void) Start;
 
-    Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
+    Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+    Write32(TargetObjectWriter->getCPUType());
+    Write32(TargetObjectWriter->getCPUSubtype());
 
-    // FIXME: Support cputype.
-    Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
-    // FIXME: Support cpusubtype.
-    Write32(MachO::CPUSubType_I386_ALL);
-    Write32(HFT_Object);
-    Write32(NumLoadCommands);    // Object files have a single load command, the
-                                 // segment.
+    Write32(macho::HFT_Object);
+    Write32(NumLoadCommands);
     Write32(LoadCommandsSize);
     Write32(Flags);
-    if (Is64Bit)
+    if (is64Bit())
       Write32(0); // reserved
 
-    assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
+    assert(OS.tell() - Start ==
+           (is64Bit() ? macho::Header64Size : macho::Header32Size));
   }
 
   /// WriteSegmentLoadCommand - Write a segment load command.
@@ -266,14 +203,16 @@ public:
     uint64_t Start = OS.tell();
     (void) Start;
 
-    unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
-      SegmentLoadCommand32Size;
-    Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
+    unsigned SegmentLoadCommandSize =
+      is64Bit() ? macho::SegmentLoadCommand64Size:
+      macho::SegmentLoadCommand32Size;
+    Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
     Write32(SegmentLoadCommandSize +
-            NumSections * (Is64Bit ? Section64Size : Section32Size));
+            NumSections * (is64Bit() ? macho::Section64Size :
+                           macho::Section32Size));
 
     WriteBytes("", 16);
-    if (Is64Bit) {
+    if (is64Bit()) {
       Write64(0); // vmaddr
       Write64(VMSize); // vmsize
       Write64(SectionDataStartOffset); // file offset
@@ -295,10 +234,10 @@ public:
   void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
                     const MCSectionData &SD, uint64_t FileOffset,
                     uint64_t RelocationsStart, unsigned NumRelocations) {
-    uint64_t SectionSize = Layout.getSectionSize(&SD);
+    uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
 
     // The offset is unused for virtual sections.
-    if (Asm.getBackend().isVirtualSection(SD.getSection())) {
+    if (SD.getSection().isVirtualSection()) {
       assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
       FileOffset = 0;
     }
@@ -312,11 +251,11 @@ public:
     const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
     WriteBytes(Section.getSectionName(), 16);
     WriteBytes(Section.getSegmentName(), 16);
-    if (Is64Bit) {
-      Write64(Layout.getSectionAddress(&SD)); // address
+    if (is64Bit()) {
+      Write64(getSectionAddress(&SD)); // address
       Write64(SectionSize); // size
     } else {
-      Write32(Layout.getSectionAddress(&SD)); // address
+      Write32(getSectionAddress(&SD)); // address
       Write32(SectionSize); // size
     }
     Write32(FileOffset);
@@ -332,10 +271,11 @@ public:
     Write32(Flags);
     Write32(IndirectSymBase.lookup(&SD)); // reserved1
     Write32(Section.getStubSize()); // reserved2
-    if (Is64Bit)
+    if (is64Bit())
       Write32(0); // reserved3
 
-    assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
+    assert(OS.tell() - Start == is64Bit() ? macho::Section64Size :
+           macho::Section32Size);
   }
 
   void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
@@ -346,14 +286,14 @@ public:
     uint64_t Start = OS.tell();
     (void) Start;
 
-    Write32(LCT_Symtab);
-    Write32(SymtabLoadCommandSize);
+    Write32(macho::LCT_Symtab);
+    Write32(macho::SymtabLoadCommandSize);
     Write32(SymbolOffset);
     Write32(NumSymbols);
     Write32(StringTableOffset);
     Write32(StringTableSize);
 
-    assert(OS.tell() - Start == SymtabLoadCommandSize);
+    assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
   }
 
   void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
@@ -369,8 +309,8 @@ public:
     uint64_t Start = OS.tell();
     (void) Start;
 
-    Write32(LCT_Dysymtab);
-    Write32(DysymtabLoadCommandSize);
+    Write32(macho::LCT_Dysymtab);
+    Write32(macho::DysymtabLoadCommandSize);
     Write32(FirstLocalSymbol);
     Write32(NumLocalSymbols);
     Write32(FirstExternalSymbol);
@@ -390,7 +330,7 @@ public:
     Write32(0); // locreloff
     Write32(0); // nlocrel
 
-    assert(OS.tell() - Start == DysymtabLoadCommandSize);
+    assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
   }
 
   void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
@@ -404,27 +344,27 @@ public:
     //
     // FIXME: Are the prebound or indirect fields possible here?
     if (Symbol.isUndefined())
-      Type = STT_Undefined;
+      Type = macho::STT_Undefined;
     else if (Symbol.isAbsolute())
-      Type = STT_Absolute;
+      Type = macho::STT_Absolute;
     else
-      Type = STT_Section;
+      Type = macho::STT_Section;
 
     // FIXME: Set STAB bits.
 
     if (Data.isPrivateExtern())
-      Type |= STF_PrivateExtern;
+      Type |= macho::STF_PrivateExtern;
 
     // Set external bit.
     if (Data.isExternal() || Symbol.isUndefined())
-      Type |= STF_External;
+      Type |= macho::STF_External;
 
     // Compute the symbol address.
     if (Symbol.isDefined()) {
       if (Symbol.isAbsolute()) {
         Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
       } else {
-        Address = Layout.getSymbolAddress(&Data);
+        Address = getSymbolAddress(&Data, Layout);
       }
     } else if (Data.isCommon()) {
       // Common symbols are encoded with the size in the address
@@ -452,7 +392,7 @@ public:
     // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
     // value.
     Write16(Flags);
-    if (Is64Bit)
+    if (is64Bit())
       Write64(Address);
     else
       Write32(Address);
@@ -472,11 +412,15 @@ public:
   //  - Input errors, where something cannot be correctly encoded. 'as' allows
   //    these through in many cases.
 
+  static bool isFixupKindRIPRel(unsigned Kind) {
+    return Kind == X86::reloc_riprel_4byte ||
+      Kind == X86::reloc_riprel_4byte_movq_load;
+  }
   void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                               const MCFragment *Fragment,
                               const MCFixup &Fixup, MCValue Target,
                               uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
     unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
     unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
 
@@ -484,7 +428,7 @@ public:
     uint32_t FixupOffset =
       Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
     uint32_t FixupAddress =
-      Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
+      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
     int64_t Value = 0;
     unsigned Index = 0;
     unsigned IsExtern = 0;
@@ -503,7 +447,7 @@ public:
 
     if (Target.isAbsolute()) { // constant
       // SymbolNum of 0 indicates the absolute section.
-      Type = RIT_X86_64_Unsigned;
+      Type = macho::RIT_X86_64_Unsigned;
       Index = 0;
 
       // FIXME: I believe this is broken, I don't think the linker can
@@ -513,16 +457,16 @@ public:
       // yet).
       if (IsPCRel) {
         IsExtern = 1;
-        Type = RIT_X86_64_Branch;
+        Type = macho::RIT_X86_64_Branch;
       }
     } else if (Target.getSymB()) { // A - B + constant
       const MCSymbol *A = &Target.getSymA()->getSymbol();
       MCSymbolData &A_SD = Asm.getSymbolData(*A);
-      const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD);
+      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
 
       const MCSymbol *B = &Target.getSymB()->getSymbol();
       MCSymbolData &B_SD = Asm.getSymbolData(*B);
-      const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD);
+      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
 
       // Neither symbol can be modified.
       if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
@@ -534,25 +478,35 @@ public:
       if (IsPCRel)
         report_fatal_error("unsupported pc-relative relocation of difference");
 
-      // We don't currently support any situation where one or both of the
-      // symbols would require a local relocation. This is almost certainly
-      // unused and may not be possible to encode correctly.
-      if (!A_Base || !B_Base)
-        report_fatal_error("unsupported local relocations in difference");
+      // The support for the situation where one or both of the symbols would
+      // require a local relocation is handled just like if the symbols were
+      // external.  This is certainly used in the case of debug sections where
+      // the section has only temporary symbols and thus the symbols don't have
+      // base symbols.  This is encoded using the section ordinal and
+      // non-extern relocation entries.
 
       // Darwin 'as' doesn't emit correct relocations for this (it ends up with
-      // a single SIGNED relocation); reject it for now.
-      if (A_Base == B_Base)
+      // a single SIGNED relocation); reject it for now.  Except the case where
+      // both symbols don't have a base, equal but both NULL.
+      if (A_Base == B_Base && A_Base)
         report_fatal_error("unsupported relocation with identical base");
 
-      Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base);
-      Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base);
+      Value += getSymbolAddress(&A_SD, Layout) -
+        (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
+      Value -= getSymbolAddress(&B_SD, Layout) -
+        (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
 
-      Index = A_Base->getIndex();
-      IsExtern = 1;
-      Type = RIT_X86_64_Unsigned;
+      if (A_Base) {
+        Index = A_Base->getIndex();
+        IsExtern = 1;
+      }
+      else {
+        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+        IsExtern = 0;
+      }
+      Type = macho::RIT_X86_64_Unsigned;
 
-      MachRelocationEntry MRE;
+      macho::RelocationEntry MRE;
       MRE.Word0 = FixupOffset;
       MRE.Word1 = ((Index     <<  0) |
                    (IsPCRel   << 24) |
@@ -561,13 +515,19 @@ public:
                    (Type      << 28));
       Relocations[Fragment->getParent()].push_back(MRE);
 
-      Index = B_Base->getIndex();
-      IsExtern = 1;
-      Type = RIT_X86_64_Subtractor;
+      if (B_Base) {
+        Index = B_Base->getIndex();
+        IsExtern = 1;
+      }
+      else {
+        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+        IsExtern = 0;
+      }
+      Type = macho::RIT_X86_64_Subtractor;
     } else {
       const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
       MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-      const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
+      const MCSymbolData *Base = Asm.getAtom(&SD);
 
       // Relocations inside debug sections always use local relocations when
       // possible. This seems to be done because the debugger doesn't fully
@@ -589,15 +549,26 @@ public:
 
         // Add the local offset, if needed.
         if (Base != &SD)
-          Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
+          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
       } else if (Symbol->isInSection()) {
         // The index is the section ordinal (1-based).
         Index = SD.getFragment()->getParent()->getOrdinal() + 1;
         IsExtern = 0;
-        Value += Layout.getSymbolAddress(&SD);
+        Value += getSymbolAddress(&SD, Layout);
 
         if (IsPCRel)
           Value -= FixupAddress + (1 << Log2Size);
+      } else if (Symbol->isVariable()) {
+        const MCExpr *Value = Symbol->getVariableValue();
+        int64_t Res;
+        bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
+        if (isAbs) {
+          FixedValue = Res;
+          return;
+        } else {
+          report_fatal_error("unsupported relocation of variable '" +
+                             Symbol->getName() + "'");
+        }
       } else {
         report_fatal_error("unsupported relocation of undefined symbol '" +
                            Symbol->getName() + "'");
@@ -611,15 +582,15 @@ public:
             // rewrite the movq to an leaq at link time if the symbol ends up in
             // the same linkage unit.
             if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
-              Type = RIT_X86_64_GOTLoad;
+              Type = macho::RIT_X86_64_GOTLoad;
             else
-              Type = RIT_X86_64_GOT;
+              Type = macho::RIT_X86_64_GOT;
           }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-            Type = RIT_X86_64_TLV;
+            Type = macho::RIT_X86_64_TLV;
           }  else if (Modifier != MCSymbolRefExpr::VK_None) {
             report_fatal_error("unsupported symbol modifier in relocation");
           } else {
-            Type = RIT_X86_64_Signed;
+            Type = macho::RIT_X86_64_Signed;
 
             // The Darwin x86_64 relocation format has a problem where it cannot
             // encode an address (L<foo> + <constant>) which is outside the atom
@@ -636,9 +607,9 @@ public:
             // (the additional bias), but instead appear to just look at the
             // final offset.
             switch (-(Target.getConstant() + (1LL << Log2Size))) {
-            case 1: Type = RIT_X86_64_Signed1; break;
-            case 2: Type = RIT_X86_64_Signed2; break;
-            case 4: Type = RIT_X86_64_Signed4; break;
+            case 1: Type = macho::RIT_X86_64_Signed1; break;
+            case 2: Type = macho::RIT_X86_64_Signed2; break;
+            case 4: Type = macho::RIT_X86_64_Signed4; break;
             }
           }
         } else {
@@ -646,24 +617,24 @@ public:
             report_fatal_error("unsupported symbol modifier in branch "
                               "relocation");
 
-          Type = RIT_X86_64_Branch;
+          Type = macho::RIT_X86_64_Branch;
         }
       } else {
         if (Modifier == MCSymbolRefExpr::VK_GOT) {
-          Type = RIT_X86_64_GOT;
+          Type = macho::RIT_X86_64_GOT;
         } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
           // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
           // which case all we do is set the PCrel bit in the relocation entry;
           // this is used with exception handling, for example. The source is
           // required to include any necessary offset directly.
-          Type = RIT_X86_64_GOT;
+          Type = macho::RIT_X86_64_GOT;
           IsPCRel = 1;
         } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
           report_fatal_error("TLVP symbol modifier should have been rip-rel");
         } else if (Modifier != MCSymbolRefExpr::VK_None)
           report_fatal_error("unsupported symbol modifier in relocation");
         else
-          Type = RIT_X86_64_Unsigned;
+          Type = macho::RIT_X86_64_Unsigned;
       }
     }
 
@@ -671,7 +642,7 @@ public:
     FixedValue = Value;
 
     // struct relocation_info (8 bytes)
-    MachRelocationEntry MRE;
+    macho::RelocationEntry MRE;
     MRE.Word0 = FixupOffset;
     MRE.Word1 = ((Index     <<  0) |
                  (IsPCRel   << 24) |
@@ -685,11 +656,11 @@ public:
                                  const MCAsmLayout &Layout,
                                  const MCFragment *Fragment,
                                  const MCFixup &Fixup, MCValue Target,
+                                 unsigned Log2Size,
                                  uint64_t &FixedValue) {
     uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-    unsigned Type = RIT_Vanilla;
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Type = macho::RIT_Vanilla;
 
     // See <reloc.h>.
     const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -699,7 +670,9 @@ public:
       report_fatal_error("symbol '" + A->getName() +
                         "' can not be undefined in a subtraction expression");
 
-    uint32_t Value = Layout.getSymbolAddress(A_SD);
+    uint32_t Value = getSymbolAddress(A_SD, Layout);
+    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+    FixedValue += SecAddr;
     uint32_t Value2 = 0;
 
     if (const MCSymbolRefExpr *B = Target.getSymB()) {
@@ -714,28 +687,184 @@ public:
       // Note that there is no longer any semantic difference between these two
       // relocation types from the linkers point of view, this is done solely
       // for pedantic compatibility with 'as'.
-      Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
-      Value2 = Layout.getSymbolAddress(B_SD);
+      Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+        (unsigned)macho::RIT_Generic_LocalDifference;
+      Value2 = getSymbolAddress(B_SD, Layout);
+      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    if (Type == macho::RIT_Difference ||
+        Type == macho::RIT_Generic_LocalDifference) {
+      macho::RelocationEntry MRE;
+      MRE.Word0 = ((0         <<  0) |
+                   (macho::RIT_Pair  << 24) |
+                   (Log2Size  << 28) |
+                   (IsPCRel   << 30) |
+                   macho::RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment->getParent()].push_back(MRE);
+    }
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((FixupOffset <<  0) |
+                 (Type        << 24) |
+                 (Log2Size    << 28) |
+                 (IsPCRel     << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordARMScatteredRelocation(const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCFragment *Fragment,
+                                    const MCFixup &Fixup, MCValue Target,
+                                    unsigned Log2Size,
+                                    uint64_t &FixedValue) {
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Type = macho::RIT_Vanilla;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      report_fatal_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = getSymbolAddress(A_SD, Layout);
+    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+    FixedValue += SecAddr;
+    uint32_t Value2 = 0;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      Type = macho::RIT_Difference;
+      Value2 = getSymbolAddress(B_SD, Layout);
+      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
     }
 
     // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == RIT_Difference || Type == RIT_LocalDifference) {
-      MachRelocationEntry MRE;
+    if (Type == macho::RIT_Difference ||
+        Type == macho::RIT_Generic_LocalDifference) {
+      macho::RelocationEntry MRE;
       MRE.Word0 = ((0         <<  0) |
-                   (RIT_Pair  << 24) |
+                   (macho::RIT_Pair  << 24) |
                    (Log2Size  << 28) |
                    (IsPCRel   << 30) |
-                   RF_Scattered);
+                   macho::RF_Scattered);
       MRE.Word1 = Value2;
       Relocations[Fragment->getParent()].push_back(MRE);
     }
 
-    MachRelocationEntry MRE;
+    macho::RelocationEntry MRE;
     MRE.Word0 = ((FixupOffset <<  0) |
                  (Type        << 24) |
                  (Log2Size    << 28) |
                  (IsPCRel     << 30) |
-                 RF_Scattered);
+                 macho::RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
+                                   const MCAsmLayout &Layout,
+                                   const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue) {
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Type = macho::RIT_ARM_Half;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      report_fatal_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = getSymbolAddress(A_SD, Layout);
+    uint32_t Value2 = 0;
+    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+    FixedValue += SecAddr;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      Type = macho::RIT_ARM_HalfDifference;
+      Value2 = getSymbolAddress(B_SD, Layout);
+      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+    //
+    // For these two r_type relocations they always have a pair following them
+    // and the r_length bits are used differently.  The encoding of the
+    // r_length is as follows:
+    // low bit of r_length:
+    //  0 - :lower16: for movw instructions
+    //  1 - :upper16: for movt instructions
+    // high bit of r_length:
+    //  0 - arm instructions
+    //  1 - thumb instructions   
+    // the other half of the relocated expression is in the following pair
+    // relocation entry in the the low 16 bits of r_address field.
+    unsigned ThumbBit = 0;
+    unsigned MovtBit = 0;
+    switch (Fixup.getKind()) {
+    default: break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+      MovtBit = 1;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      MovtBit = 1;
+      // Fallthrough
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      ThumbBit = 1;
+      break;
+    }
+
+
+    if (Type == macho::RIT_ARM_HalfDifference) {
+      uint32_t OtherHalf = MovtBit
+        ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+      macho::RelocationEntry MRE;
+      MRE.Word0 = ((OtherHalf       <<  0) |
+                   (macho::RIT_Pair << 24) |
+                   (MovtBit         << 28) |
+                   (ThumbBit        << 29) |
+                   (IsPCRel         << 30) |
+                   macho::RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment->getParent()].push_back(MRE);
+    }
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((FixupOffset <<  0) |
+                 (Type        << 24) |
+                 (MovtBit     << 28) |
+                 (ThumbBit    << 29) |
+                 (IsPCRel     << 30) |
+                 macho::RF_Scattered);
     MRE.Word1 = Value;
     Relocations[Fragment->getParent()].push_back(MRE);
   }
@@ -746,7 +875,7 @@ public:
                             const MCFixup &Fixup, MCValue Target,
                             uint64_t &FixedValue) {
     assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
-           !Is64Bit &&
+           !is64Bit() &&
            "Should only be called with a 32-bit TLVP relocation!");
 
     unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -764,50 +893,218 @@ public:
     if (Target.getSymB()) {
       // If this is a subtraction then we're pcrel.
       uint32_t FixupAddress =
-      Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
+        getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
       MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
       IsPCRel = 1;
-      FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) +
+      FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
                     Target.getConstant());
       FixedValue += 1ULL << Log2Size;
     } else {
       FixedValue = 0;
     }
-    
+
     // struct relocation_info (8 bytes)
-    MachRelocationEntry MRE;
+    macho::RelocationEntry MRE;
     MRE.Word0 = Value;
+    MRE.Word1 = ((Index                  <<  0) |
+                 (IsPCRel                << 24) |
+                 (Log2Size               << 25) |
+                 (1                      << 27) | // Extern
+                 (macho::RIT_Generic_TLV << 28)); // Type
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+                                       unsigned &Log2Size) {
+    RelocType = unsigned(macho::RIT_Vanilla);
+    Log2Size = ~0U;
+
+    switch (Kind) {
+    default:
+      return false;
+
+    case FK_Data_1:
+      Log2Size = llvm::Log2_32(1);
+      return true;
+    case FK_Data_2:
+      Log2Size = llvm::Log2_32(2);
+      return true;
+    case FK_Data_4:
+      Log2Size = llvm::Log2_32(4);
+      return true;
+    case FK_Data_8:
+      Log2Size = llvm::Log2_32(8);
+      return true;
+
+      // Handle 24-bit branch kinds.
+    case ARM::fixup_arm_ldst_pcrel_12:
+    case ARM::fixup_arm_pcrel_10:
+    case ARM::fixup_arm_adr_pcrel_12:
+    case ARM::fixup_arm_condbranch:
+    case ARM::fixup_arm_uncondbranch:
+      RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+      // Handle Thumb branches.
+    case ARM::fixup_arm_thumb_br:
+      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+      Log2Size = llvm::Log2_32(2);
+      return true;
+
+    case ARM::fixup_arm_thumb_bl:
+      RelocType = unsigned(macho::RIT_ARM_ThumbBranch32Bit);
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+    case ARM::fixup_arm_thumb_blx:
+      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      RelocType = unsigned(macho::RIT_ARM_Half);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+    }
+  }
+  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                           const MCFragment *Fragment, const MCFixup &Fixup,
+                           MCValue Target, uint64_t &FixedValue) {
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Log2Size;
+    unsigned RelocType = macho::RIT_Vanilla;
+    if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+      report_fatal_error("unknown ARM fixup kind!");
+      return;
+    }
+
+    // If this is a difference or a defined symbol plus an offset, then we need
+    // a scattered relocation entry.  Differences always require scattered
+    // relocations.
+    if (Target.getSymB()) {
+      if (RelocType == macho::RIT_ARM_Half ||
+          RelocType == macho::RIT_ARM_HalfDifference)
+        return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
+                                           Target, FixedValue);
+      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                          Target, Log2Size, FixedValue);
+    }
+
+    // Get the symbol data, if any.
+    MCSymbolData *SD = 0;
+    if (Target.getSymA())
+      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+    // FIXME: For other platforms, we need to use scattered relocations for
+    // internal relocations with offsets.  If this is an internal relocation
+    // with an offset, it also needs a scattered relocation entry.
+    //
+    // Is this right for ARM?
+    uint32_t Offset = Target.getConstant();
+    if (IsPCRel && RelocType == macho::RIT_Vanilla)
+      Offset += 1 << Log2Size;
+    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
+                                          Log2Size, FixedValue);
+
+    // See <reloc.h>.
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    if (Target.isAbsolute()) { // constant
+      // FIXME!
+      report_fatal_error("FIXME: relocations to absolute targets "
+                         "not yet implemented");
+    } else if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, SectionAddress)) {
+        FixedValue = Res;
+        return;
+      }
+
+      report_fatal_error("unsupported relocation of variable '" +
+                         SD->getSymbol().getName() + "'");
+    } else {
+      // Check whether we need an external or internal relocation.
+      if (doesSymbolRequireExternRelocation(SD)) {
+        IsExtern = 1;
+        Index = SD->getIndex();
+        // For external relocations, make sure to offset the fixup value to
+        // compensate for the addend of the symbol address, if it was
+        // undefined. This occurs with weak definitions, for example.
+        if (!SD->Symbol->isUndefined())
+          FixedValue -= Layout.getSymbolOffset(SD);
+      } else {
+        // The index is the section ordinal (1-based).
+        Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+        FixedValue += getSectionAddress(SD->getFragment()->getParent());
+      }
+      if (IsPCRel)
+        FixedValue -= getSectionAddress(Fragment->getParent());
+
+      // The type is determined by the fixup kind.
+      Type = RelocType;
+    }
+
+    // struct relocation_info (8 bytes)
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
     MRE.Word1 = ((Index     <<  0) |
                  (IsPCRel   << 24) |
                  (Log2Size  << 25) |
-                 (1         << 27) | // Extern
-                 (RIT_TLV   << 28)); // Type
+                 (IsExtern  << 27) |
+                 (Type      << 28));
     Relocations[Fragment->getParent()].push_back(MRE);
   }
-  
+
   void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                         const MCFragment *Fragment, const MCFixup &Fixup,
                         MCValue Target, uint64_t &FixedValue) {
-    if (Is64Bit) {
+    // FIXME: These needs to be factored into the target Mach-O writer.
+    if (isARM()) {
+      RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+    if (is64Bit()) {
       RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
       return;
     }
 
-    unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
     unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
 
     // If this is a 32-bit TLVP reloc it's handled a bit differently.
-    if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+    if (Target.getSymA() &&
+        Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
       RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
       return;
     }
-    
+
     // If this is a difference or a defined symbol plus an offset, then we need
     // a scattered relocation entry.
     // Differences always require scattered relocations.
     if (Target.getSymB())
         return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                         Target, FixedValue);
+                                         Target, Log2Size, FixedValue);
 
     // Get the symbol data, if any.
     MCSymbolData *SD = 0;
@@ -821,7 +1118,7 @@ public:
       Offset += 1 << Log2Size;
     if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
       return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                       Target, FixedValue);
+                                       Target, Log2Size, FixedValue);
 
     // See <reloc.h>.
     uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
@@ -834,7 +1131,17 @@ public:
       //
       // FIXME: Currently, these are never generated (see code below). I cannot
       // find a case where they are actually emitted.
-      Type = RIT_Vanilla;
+      Type = macho::RIT_Vanilla;
+    } else if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, SectionAddress)) {
+        FixedValue = Res;
+        return;
+      }
+
+      report_fatal_error("unsupported relocation of variable '" +
+                         SD->getSymbol().getName() + "'");
     } else {
       // Check whether we need an external or internal relocation.
       if (doesSymbolRequireExternRelocation(SD)) {
@@ -844,17 +1151,20 @@ public:
         // compensate for the addend of the symbol address, if it was
         // undefined. This occurs with weak definitions, for example.
         if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolAddress(SD);
+          FixedValue -= Layout.getSymbolOffset(SD);
       } else {
         // The index is the section ordinal (1-based).
         Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+        FixedValue += getSectionAddress(SD->getFragment()->getParent());
       }
+      if (IsPCRel)
+        FixedValue -= getSectionAddress(Fragment->getParent());
 
-      Type = RIT_Vanilla;
+      Type = macho::RIT_Vanilla;
     }
 
     // struct relocation_info (8 bytes)
-    MachRelocationEntry MRE;
+    macho::RelocationEntry MRE;
     MRE.Word0 = FixupOffset;
     MRE.Word1 = ((Index     <<  0) |
                  (IsPCRel   << 24) |
@@ -885,7 +1195,7 @@ public:
       // Initialize the section indirect symbol base, if necessary.
       if (!IndirectSymBase.count(it->SectionData))
         IndirectSymBase[it->SectionData] = IndirectIndex;
-      
+
       Asm.getOrCreateSymbolData(*it->Symbol);
     }
 
@@ -1028,7 +1338,25 @@ public:
       StringTable += '\x00';
   }
 
-  void ExecutePostLayoutBinding(MCAssembler &Asm) {
+  void computeSectionAddresses(const MCAssembler &Asm,
+                               const MCAsmLayout &Layout) {
+    uint64_t StartAddress = 0;
+    const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+    for (int i = 0, n = Order.size(); i != n ; ++i) {
+      const MCSectionData *SD = Order[i];
+      StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+      SectionAddress[SD] = StartAddress;
+      StartAddress += Layout.getSectionAddressSize(SD);
+      // Explicitly pad the section to match the alignment requirements of the
+      // following one. This is for 'gas' compatibility, it shouldn't
+      /// strictly be necessary.
+      StartAddress += getPaddingSize(SD, Layout);
+    }
+  }
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
+    computeSectionAddresses(Asm, Layout);
+
     // Create symbol data for any indirect symbols.
     BindIndirectSymbols(Asm);
 
@@ -1037,41 +1365,101 @@ public:
                        UndefinedSymbolData);
   }
 
-  void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) {
+  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+    if (InSet)
+      return true;
+
+    // The effective address is
+    //     addr(atom(A)) + offset(A)
+    //   - addr(atom(B)) - offset(B)
+    // and the offsets are not relocatable, so the fixup is fully resolved when
+    //  addr(atom(A)) - addr(atom(B)) == 0.
+    const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+    const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+    const MCSection &SecA = SA.getSection();
+    const MCSection &SecB = FB.getParent()->getSection();
+
+    if (IsPCRel) {
+      // The simple (Darwin, except on x86_64) way of dealing with this was to
+      // assume that any reference to a temporary symbol *must* be a temporary
+      // symbol in the same atom, unless the sections differ. Therefore, any
+      // PCrel relocation to a temporary symbol (in the same section) is fully
+      // resolved. This also works in conjunction with absolutized .set, which
+      // requires the compiler to use .set to absolutize the differences between
+      // symbols which the compiler knows to be assembly time constants, so we
+      // don't need to worry about considering symbol differences fully
+      // resolved.
+
+      if (!Asm.getBackend().hasReliableSymbolDifference()) {
+        if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+          return false;
+        return true;
+      }
+    } else {
+      if (!TargetObjectWriter->useAggressiveSymbolFolding())
+        return false;
+    }
+
+    const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+
+    A_Base = FA.getAtom();
+    if (!A_Base)
+      return false;
+
+    B_Base = FB.getAtom();
+    if (!B_Base)
+      return false;
+
+    // If the atoms are the same, they are guaranteed to have the same address.
+    if (A_Base == B_Base)
+      return true;
+
+    // Otherwise, we can't prove this is fully resolved.
+    return false;
+  }
+
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
     unsigned NumSections = Asm.size();
 
     // The section data starts after the header, the segment load command (and
     // section headers) and the symbol table.
     unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize = Is64Bit ?
-      SegmentLoadCommand64Size + NumSections * Section64Size :
-      SegmentLoadCommand32Size + NumSections * Section32Size;
+    uint64_t LoadCommandsSize = is64Bit() ?
+      macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+      macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
 
     // Add the symbol table load command sizes, if used.
     unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
       UndefinedSymbolData.size();
     if (NumSymbols) {
       NumLoadCommands += 2;
-      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
+      LoadCommandsSize += (macho::SymtabLoadCommandSize +
+                           macho::DysymtabLoadCommandSize);
     }
 
     // Compute the total size of the section data, as well as its file size and
     // vm size.
-    uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
-      + LoadCommandsSize;
+    uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+                                 macho::Header32Size) + LoadCommandsSize;
     uint64_t SectionDataSize = 0;
     uint64_t SectionDataFileSize = 0;
     uint64_t VMSize = 0;
     for (MCAssembler::const_iterator it = Asm.begin(),
            ie = Asm.end(); it != ie; ++it) {
       const MCSectionData &SD = *it;
-      uint64_t Address = Layout.getSectionAddress(&SD);
-      uint64_t Size = Layout.getSectionSize(&SD);
+      uint64_t Address = getSectionAddress(&SD);
+      uint64_t Size = Layout.getSectionAddressSize(&SD);
       uint64_t FileSize = Layout.getSectionFileSize(&SD);
+      FileSize += getPaddingSize(&SD, Layout);
 
       VMSize = std::max(VMSize, Address + Size);
 
-      if (Asm.getBackend().isVirtualSection(SD.getSection()))
+      if (SD.getSection().isVirtualSection())
         continue;
 
       SectionDataSize = std::max(SectionDataSize, Address + Size);
@@ -1094,11 +1482,11 @@ public:
     uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
     for (MCAssembler::const_iterator it = Asm.begin(),
            ie = Asm.end(); it != ie; ++it) {
-      std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
       unsigned NumRelocs = Relocs.size();
-      uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it);
+      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
       WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
-      RelocTableEnd += NumRelocs * RelocationInfoSize;
+      RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
     }
 
     // Write the symbol table load command, if used.
@@ -1124,8 +1512,8 @@ public:
 
       // The string table is written after symbol table.
       uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
-                                                Nlist32Size);
+        SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+                                                macho::Nlist32Size);
       WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
                              StringTableOffset, StringTable.size());
 
@@ -1137,8 +1525,13 @@ public:
 
     // Write the actual section data.
     for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it)
-      Asm.WriteSectionData(it, Layout, Writer);
+           ie = Asm.end(); it != ie; ++it) {
+      Asm.WriteSectionData(it, Layout);
+
+      uint64_t Pad = getPaddingSize(it, Layout);
+      for (unsigned int i = 0; i < Pad; ++i)
+        Write8(0);
+    }
 
     // Write the extra padding.
     WriteZeros(SectionDataPadding);
@@ -1148,7 +1541,7 @@ public:
            ie = Asm.end(); it != ie; ++it) {
       // Write the section relocation entries, in reverse order to match 'as'
       // (approximately, the exact algorithm is more complicated than this).
-      std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
       for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
         Write32(Relocs[e - i - 1].Word0);
         Write32(Relocs[e - i - 1].Word1);
@@ -1169,9 +1562,9 @@ public:
           // If this symbol is defined and internal, mark it as such.
           if (it->Symbol->isDefined() &&
               !Asm.getSymbolData(*it->Symbol).isExternal()) {
-            uint32_t Flags = ISF_Local;
+            uint32_t Flags = macho::ISF_Local;
             if (it->Symbol->isAbsolute())
-              Flags |= ISF_Absolute;
+              Flags |= macho::ISF_Absolute;
             Write32(Flags);
             continue;
           }
@@ -1198,32 +1591,8 @@ public:
 
 }
 
-MachObjectWriter::MachObjectWriter(raw_ostream &OS,
-                                   bool Is64Bit,
-                                   bool IsLittleEndian)
-  : MCObjectWriter(OS, IsLittleEndian)
-{
-  Impl = new MachObjectWriterImpl(this, Is64Bit);
-}
-
-MachObjectWriter::~MachObjectWriter() {
-  delete (MachObjectWriterImpl*) Impl;
-}
-
-void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
-  ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
-}
-
-void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
-                                        const MCAsmLayout &Layout,
-                                        const MCFragment *Fragment,
-                                        const MCFixup &Fixup, MCValue Target,
-                                        uint64_t &FixedValue) {
-  ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
-                                                   Target, FixedValue);
-}
-
-void MachObjectWriter::WriteObject(const MCAssembler &Asm,
-                                   const MCAsmLayout &Layout) {
-  ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
+MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+                                             raw_ostream &OS,
+                                             bool IsLittleEndian) {
+  return new MachObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp
index bbfddbe9e71f..192755742535 100644
--- a/lib/MC/TargetAsmBackend.cpp
+++ b/lib/MC/TargetAsmBackend.cpp
@@ -10,13 +10,28 @@
 #include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
 
-TargetAsmBackend::TargetAsmBackend(const Target &T)
-  : TheTarget(T),
-    HasAbsolutizedSet(false),
-    HasReliableSymbolDifference(false),
-    HasScatteredSymbols(false)
+TargetAsmBackend::TargetAsmBackend()
+  : HasReliableSymbolDifference(false)
 {
 }
 
 TargetAsmBackend::~TargetAsmBackend() {
 }
+
+const MCFixupKindInfo &
+TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  static const MCFixupKindInfo Builtins[] = {
+    { "FK_Data_1", 0, 8, 0 },
+    { "FK_Data_2", 0, 16, 0 },
+    { "FK_Data_4", 0, 32, 0 },
+    { "FK_Data_8", 0, 64, 0 },
+    { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+  };
+  
+  assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
+         "Unknown fixup kind");
+  return Builtins[Kind];
+}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index eeb2b9675f4b..6ca5d37fc32e 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/TimeValue.h"
 
 #include "../Target/X86/X86FixupKinds.h"
 
@@ -55,6 +55,9 @@ struct AuxSymbol {
   COFF::Auxiliary Aux;
 };
 
+class COFFSymbol;
+class COFFSection;
+
 class COFFSymbol {
 public:
   COFF::symbol Data;
@@ -62,15 +65,19 @@ public:
   typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
 
   name             Name;
-  size_t           Index;
+  int              Index;
   AuxiliarySymbols Aux;
   COFFSymbol      *Other;
+  COFFSection     *Section;
+  int              Relocations;
 
   MCSymbolData const *MCData;
 
-  COFFSymbol(llvm::StringRef name, size_t index);
+  COFFSymbol(llvm::StringRef name);
   size_t size() const;
   void set_name_offset(uint32_t Offset);
+
+  bool should_keep() const;
 };
 
 // This class contains staging data for a COFF relocation entry.
@@ -89,12 +96,12 @@ public:
   COFF::section Header;
 
   std::string          Name;
-  size_t               Number;
+  int                  Number;
   MCSectionData const *MCData;
-  COFFSymbol              *Symb;
+  COFFSymbol          *Symbol;
   relocations          Relocations;
 
-  COFFSection(llvm::StringRef name, size_t Index);
+  COFFSection(llvm::StringRef name);
   static size_t size();
 };
 
@@ -118,11 +125,8 @@ public:
   typedef std::vector<COFFSymbol*>  symbols;
   typedef std::vector<COFFSection*> sections;
 
-  typedef StringMap<COFFSymbol *>  name_symbol_map;
-  typedef StringMap<COFFSection *> name_section_map;
-
-  typedef DenseMap<MCSymbolData const *, COFFSymbol *>   symbol_map;
-  typedef DenseMap<MCSectionData const *, COFFSection *> section_map;
+  typedef DenseMap<MCSymbol  const *, COFFSymbol *>   symbol_map;
+  typedef DenseMap<MCSection const *, COFFSection *> section_map;
 
   // Root level file contents.
   bool Is64Bit;
@@ -138,11 +142,9 @@ public:
   WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
   ~WinCOFFObjectWriter();
 
-  COFFSymbol *createSymbol(llvm::StringRef Name);
-  COFFSection *createSection(llvm::StringRef Name);
-
-  void InitCOFFEntity(COFFSymbol &Symbol);
-  void InitCOFFEntity(COFFSection &Section);
+  COFFSymbol *createSymbol(StringRef Name);
+  COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+  COFFSection *createSection(StringRef Name);
 
   template <typename object_t, typename list_t>
   object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
@@ -150,9 +152,14 @@ public:
   void DefineSection(MCSectionData const &SectionData);
   void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
 
-  bool ExportSection(COFFSection *S);
+  void MakeSymbolReal(COFFSymbol &S, size_t Index);
+  void MakeSectionReal(COFFSection &S, size_t Number);
+
+  bool ExportSection(COFFSection const *S);
   bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
 
+  bool IsPhysicalSection(COFFSection *S);
+
   // Entity writing methods.
 
   void WriteFileHeader(const COFF::header &Header);
@@ -163,7 +170,7 @@ public:
 
   // MCObjectWriter interface implementation.
 
-  void ExecutePostLayoutBinding(MCAssembler &Asm);
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
 
   void RecordRelocation(const MCAssembler &Asm,
                         const MCAsmLayout &Layout,
@@ -172,7 +179,7 @@ public:
                         MCValue Target,
                         uint64_t &FixedValue);
 
-  void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
 };
 }
 
@@ -198,9 +205,12 @@ static inline void write_uint8_le(void *Data, uint8_t const &Value) {
 //------------------------------------------------------------------------------
 // Symbol class implementation
 
-COFFSymbol::COFFSymbol(llvm::StringRef name, size_t index)
-      : Name(name.begin(), name.end()), Index(-1)
-      , Other(NULL), MCData(NULL) {
+COFFSymbol::COFFSymbol(llvm::StringRef name)
+  : Name(name.begin(), name.end())
+  , Other(NULL)
+  , Section(NULL)
+  , Relocations(0)
+  , MCData(NULL) {
   memset(&Data, 0, sizeof(Data));
 }
 
@@ -216,12 +226,41 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
   write_uint32_le(Data.Name + 4, Offset);
 }
 
+/// logic to decide if the symbol should be reported in the symbol table
+bool COFFSymbol::should_keep() const {
+  // no section means its external, keep it
+  if (Section == NULL)
+    return true;
+
+  // if it has relocations pointing at it, keep it
+  if (Relocations > 0)   {
+    assert(Section->Number != -1 && "Sections with relocations must be real!");
+    return true;
+  }
+
+  // if the section its in is being droped, drop it
+  if (Section->Number == -1)
+      return false;
+
+  // if it is the section symbol, keep it
+  if (Section->Symbol == this)
+    return true;
+
+  // if its temporary, drop it
+  if (MCData && MCData->getSymbol().isTemporary())
+      return false;
+
+  // otherwise, keep it
+  return true;
+}
+
 //------------------------------------------------------------------------------
 // Section class implementation
 
-COFFSection::COFFSection(llvm::StringRef name, size_t Index)
-       : Name(name), Number(Index + 1)
-       , MCData(NULL), Symb(NULL) {
+COFFSection::COFFSection(llvm::StringRef name)
+  : Name(name)
+  , MCData(NULL)
+  , Symbol(NULL) {
   memset(&Header, 0, sizeof(Header));
 }
 
@@ -290,43 +329,22 @@ WinCOFFObjectWriter::~WinCOFFObjectWriter() {
     delete *I;
 }
 
-COFFSymbol *WinCOFFObjectWriter::createSymbol(llvm::StringRef Name) {
+COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
   return createCOFFEntity<COFFSymbol>(Name, Symbols);
 }
 
-COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
-  return createCOFFEntity<COFFSection>(Name, Sections);
-}
-
-/// This function initializes a symbol by entering its name into the string
-/// table if it is too long to fit in the symbol table header.
-void WinCOFFObjectWriter::InitCOFFEntity(COFFSymbol &S) {
-  if (S.Name.size() > COFF::NameSize) {
-    size_t StringTableEntry = Strings.insert(S.Name.c_str());
-
-    S.set_name_offset(StringTableEntry);
-  } else
-    memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
+  symbol_map::iterator i = SymbolMap.find(Symbol);
+  if (i != SymbolMap.end())
+    return i->second;
+  COFFSymbol *RetSymbol
+    = createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
+  SymbolMap[Symbol] = RetSymbol;
+  return RetSymbol;
 }
 
-/// This function initializes a section by entering its name into the string
-/// table if it is too long to fit in the section table header.
-void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) {
-  if (S.Name.size() > COFF::NameSize) {
-    size_t StringTableEntry = Strings.insert(S.Name.c_str());
-
-    // FIXME: Why is this number 999999? This number is never mentioned in the
-    // spec. I'm assuming this is due to the printed value needing to fit into
-    // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
-    // 6)? The spec does not state if this entry should be null terminated in
-    // this case, and thus this seems to be the best way to do it. I think I
-    // just solved my own FIXME...
-    if (StringTableEntry > 999999)
-      report_fatal_error("COFF string table is greater than 999999 bytes.");
-
-    sprintf(S.Header.Name, "/%d", (unsigned)StringTableEntry);
-  } else
-    memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+  return createCOFFEntity<COFFSection>(Name, Sections);
 }
 
 /// A template used to lookup or create a symbol/section, and initialize it if
@@ -334,9 +352,7 @@ void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) {
 template <typename object_t, typename list_t>
 object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
                                                 list_t &List) {
-  object_t *Object = new object_t(Name, List.size());
-
-  InitCOFFEntity(*Object);
+  object_t *Object = new object_t(Name);
 
   List.push_back(Object);
 
@@ -346,6 +362,8 @@ object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
 /// This function takes a section data object from the assembler
 /// and creates the associated COFF section staging object.
 void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+  assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
+    && "Got non COFF section in the COFF backend!");
   // FIXME: Not sure how to verify this (at least in a debug build).
   MCSectionCOFF const &Sec =
     static_cast<MCSectionCOFF const &>(SectionData.getSection());
@@ -353,15 +371,14 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
   COFFSection *coff_section = createSection(Sec.getSectionName());
   COFFSymbol  *coff_symbol = createSymbol(Sec.getSectionName());
 
-  coff_section->Symb = coff_symbol;
+  coff_section->Symbol = coff_symbol;
+  coff_symbol->Section = coff_section;
   coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
-  coff_symbol->Data.SectionNumber = coff_section->Number;
 
   // In this case the auxiliary symbol is a Section Definition.
   coff_symbol->Aux.resize(1);
   memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
   coff_symbol->Aux[0].AuxType = ATSectionDefinition;
-  coff_symbol->Aux[0].Aux.SectionDefinition.Number = coff_section->Number;
   coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
 
   coff_section->Header.Characteristics = Sec.getCharacteristics();
@@ -388,18 +405,53 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
 
   // Bind internal COFF section to MC section.
   coff_section->MCData = &SectionData;
-  SectionMap[&SectionData] = coff_section;
+  SectionMap[&SectionData.getSection()] = coff_section;
 }
 
 /// This function takes a section data object from the assembler
 /// and creates the associated COFF symbol staging object.
 void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
-                                        MCAssembler &Assembler) {
-  COFFSymbol *coff_symbol = createSymbol(SymbolData.getSymbol().getName());
+                                       MCAssembler &Assembler) {
+  COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol());
 
   coff_symbol->Data.Type         = (SymbolData.getFlags() & 0x0000FFFF) >>  0;
   coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
 
+  if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+    coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+    if (SymbolData.getSymbol().isVariable()) {
+      coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+      const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+      // FIXME: This assert message isn't very good.
+      assert(Value->getKind() == MCExpr::SymbolRef &&
+              "Value must be a SymbolRef!");
+
+      const MCSymbolRefExpr *SymbolRef =
+        static_cast<const MCSymbolRefExpr *>(Value);
+      coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol());
+    } else {
+      std::string WeakName = std::string(".weak.")
+                           +  SymbolData.getSymbol().getName().str()
+                           + ".default";
+      COFFSymbol *WeakDefault = createSymbol(WeakName);
+      WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+      WeakDefault->Data.StorageClass  = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+      WeakDefault->Data.Type          = 0;
+      WeakDefault->Data.Value         = 0;
+      coff_symbol->Other = WeakDefault;
+    }
+
+    // Setup the Weak External auxiliary symbol.
+    coff_symbol->Aux.resize(1);
+    memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+    coff_symbol->Aux[0].AuxType = ATWeakExternal;
+    coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+    coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+      COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+  }
+
   // If no storage class was specified in the streamer, define it here.
   if (coff_symbol->Data.StorageClass == 0) {
     bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
@@ -408,44 +460,51 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
       external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
   }
 
-  if (SymbolData.getFlags() & COFF::SF_WeakReference) {
-    coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
-
-    const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+  if (SymbolData.Fragment != NULL)
+    coff_symbol->Section =
+      SectionMap[&SymbolData.Fragment->getParent()->getSection()];
 
-    // FIXME: This assert message isn't very good.
-    assert(Value->getKind() == MCExpr::SymbolRef &&
-           "Value must be a SymbolRef!");
+  // Bind internal COFF symbol to MC symbol.
+  coff_symbol->MCData = &SymbolData;
+  SymbolMap[&SymbolData.getSymbol()] = coff_symbol;
+}
 
-    const MCSymbolRefExpr *SymbolRef =
-      static_cast<const MCSymbolRefExpr *>(Value);
+/// making a section real involves assigned it a number and putting
+/// name into the string table if needed
+void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+  if (S.Name.size() > COFF::NameSize) {
+    size_t StringTableEntry = Strings.insert(S.Name.c_str());
 
-    const MCSymbolData &OtherSymbolData =
-      Assembler.getSymbolData(SymbolRef->getSymbol());
+    // FIXME: Why is this number 999999? This number is never mentioned in the
+    // spec. I'm assuming this is due to the printed value needing to fit into
+    // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+    // 6)? The spec does not state if this entry should be null terminated in
+    // this case, and thus this seems to be the best way to do it. I think I
+    // just solved my own FIXME...
+    if (StringTableEntry > 999999)
+      report_fatal_error("COFF string table is greater than 999999 bytes.");
 
-    // FIXME: This assert message isn't very good.
-    assert(SymbolMap.find(&OtherSymbolData) != SymbolMap.end() &&
-           "OtherSymbolData must be in the symbol map!");
+    std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
+  } else
+    std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
 
-    coff_symbol->Other = SymbolMap[&OtherSymbolData];
+  S.Number = Number;
+  S.Symbol->Data.SectionNumber = S.Number;
+  S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
+}
 
-    // Setup the Weak External auxiliary symbol.
-    coff_symbol->Aux.resize(1);
-    memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
-    coff_symbol->Aux[0].AuxType = ATWeakExternal;
-    coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
-    coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
-                                        COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
-  }
+void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
+  if (S.Name.size() > COFF::NameSize) {
+    size_t StringTableEntry = Strings.insert(S.Name.c_str());
 
-  // Bind internal COFF symbol to MC symbol.
-  coff_symbol->MCData = &SymbolData;
-  SymbolMap[&SymbolData] = coff_symbol;
+    S.set_name_offset(StringTableEntry);
+  } else
+    std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+  S.Index = Index;
 }
 
-bool WinCOFFObjectWriter::ExportSection(COFFSection *S) {
-  return (S->Header.Characteristics
-         & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
+  return !S->MCData->getFragmentList().empty();
 }
 
 bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
@@ -455,8 +514,14 @@ bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
 
   // return Asm.isSymbolLinkerVisible (&SymbolData);
 
-  // For now, all symbols are exported, the linker will sort it out for us.
-  return true;
+  // For now, all non-variable symbols are exported,
+  // the linker will sort the rest out for us.
+  return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+}
+
+bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+  return (S->Header.Characteristics
+         & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
 }
 
 //------------------------------------------------------------------------------
@@ -546,9 +611,10 @@ void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
 ////////////////////////////////////////////////////////////////////////////////
 // MCObjectWriter interface implementations
 
-void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                   const MCAsmLayout &Layout) {
   // "Define" each section & symbol. This creates section & symbol
-  // entries in the staging area and gives them their final indexes.
+  // entries in the staging area.
 
   for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
     DefineSection(*i);
@@ -574,19 +640,24 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   MCSectionData const *SectionData = Fragment->getParent();
 
   // Mark this symbol as requiring an entry in the symbol table.
-  assert(SectionMap.find(SectionData) != SectionMap.end() &&
+  assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
          "Section must already have been defined in ExecutePostLayoutBinding!");
-  assert(SymbolMap.find(&A_SD) != SymbolMap.end() &&
+  assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
          "Symbol must already have been defined in ExecutePostLayoutBinding!");
 
-  COFFSection *coff_section = SectionMap[SectionData];
-  COFFSymbol *coff_symbol = SymbolMap[&A_SD];
+  COFFSection *coff_section = SectionMap[&SectionData->getSection()];
+  COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
 
   if (Target.getSymB()) {
+    if (&Target.getSymA()->getSymbol().getSection()
+     != &Target.getSymB()->getSymbol().getSection()) {
+      llvm_unreachable("Symbol relative relocations are only allowed between "
+                       "symbols in the same section");
+    }
     const MCSymbol *B = &Target.getSymB()->getSymbol();
     MCSymbolData &B_SD = Asm.getSymbolData(*B);
 
-    FixedValue = Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(&B_SD);
+    FixedValue = Layout.getSymbolOffset(&A_SD) - Layout.getSymbolOffset(&B_SD);
 
     // In the case where we have SymbA and SymB, we just need to store the delta
     // between the two symbols.  Update FixedValue to account for the delta, and
@@ -600,12 +671,21 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
 
   Reloc.Data.SymbolTableIndex = 0;
   Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
-  Reloc.Symb = coff_symbol;
+
+  // Turn relocations for temporary symbols into section relocations.
+  if (coff_symbol->MCData->getSymbol().isTemporary()) {
+    Reloc.Symb = coff_symbol->Section->Symbol;
+    FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
+                + coff_symbol->MCData->getOffset();
+  } else
+    Reloc.Symb = coff_symbol;
+
+  ++Reloc.Symb->Relocations;
 
   Reloc.Data.VirtualAddress += Fixup.getOffset();
 
-  switch (Fixup.getKind()) {
-  case X86::reloc_pcrel_4byte:
+  switch ((unsigned)Fixup.getKind()) {
+  case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_movq_load:
     Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
@@ -615,6 +695,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
     FixedValue += 4;
     break;
   case FK_Data_4:
+  case X86::reloc_signed_4byte:
     Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
                               : COFF::IMAGE_REL_I386_DIR32;
     break;
@@ -631,9 +712,19 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   coff_section->Relocations.push_back(Reloc);
 }
 
-void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
+void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
                                       const MCAsmLayout &Layout) {
   // Assign symbol and section indexes and offsets.
+  Header.NumberOfSections = 0;
+
+  for (sections::iterator i = Sections.begin(),
+                          e = Sections.end(); i != e; i++) {
+    if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+      MakeSectionReal(**i, ++Header.NumberOfSections);
+    } else {
+      (*i)->Number = -1;
+    }
+  }
 
   Header.NumberOfSymbols = 0;
 
@@ -641,32 +732,35 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
     COFFSymbol *coff_symbol = *i;
     MCSymbolData const *SymbolData = coff_symbol->MCData;
 
-    coff_symbol->Index = Header.NumberOfSymbols++;
-
     // Update section number & offset for symbols that have them.
     if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
-      COFFSection *coff_section = SectionMap[SymbolData->Fragment->getParent()];
+      assert(coff_symbol->Section != NULL);
 
-      coff_symbol->Data.SectionNumber = coff_section->Number;
+      coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
       coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
                               + SymbolData->Offset;
     }
 
-    // Update auxiliary symbol info.
-    coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
-    Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+    if (coff_symbol->should_keep()) {
+      MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+
+      // Update auxiliary symbol info.
+      coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+      Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+    } else
+      coff_symbol->Index = -1;
   }
 
   // Fixup weak external references.
   for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
-    COFFSymbol *symb = *i;
-
-    if (symb->Other != NULL) {
-      assert(symb->Aux.size() == 1 &&
+    COFFSymbol *coff_symbol = *i;
+    if (coff_symbol->Other != NULL) {
+      assert(coff_symbol->Index != -1);
+      assert(coff_symbol->Aux.size() == 1 &&
              "Symbol must contain one aux symbol!");
-      assert(symb->Aux[0].AuxType == ATWeakExternal &&
+      assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
              "Symbol's aux symbol must be a Weak External!");
-      symb->Aux[0].Aux.WeakExternal.TagIndex = symb->Other->Index;
+      coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
     }
   }
 
@@ -675,18 +769,19 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
   unsigned offset = 0;
 
   offset += COFF::HeaderSize;
-  offset += COFF::SectionSize * Asm.size();
-
-  Header.NumberOfSections = Sections.size();
+  offset += COFF::SectionSize * Header.NumberOfSections;
 
   for (MCAssembler::const_iterator i = Asm.begin(),
                                    e = Asm.end();
                                    i != e; i++) {
-    COFFSection *Sec = SectionMap[i];
+    COFFSection *Sec = SectionMap[&i->getSection()];
 
-    Sec->Header.SizeOfRawData = Layout.getSectionFileSize(i);
+    if (Sec->Number == -1)
+      continue;
 
-    if (ExportSection(Sec)) {
+    Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+
+    if (IsPhysicalSection(Sec)) {
       Sec->Header.PointerToRawData = offset;
 
       offset += Sec->Header.SizeOfRawData;
@@ -700,13 +795,15 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
 
       for (relocations::iterator cr = Sec->Relocations.begin(),
                                  er = Sec->Relocations.end();
-                                 cr != er; cr++) {
+                                 cr != er; ++cr) {
+        assert((*cr).Symb->Index != -1);
         (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
       }
     }
 
-    assert(Sec->Symb->Aux.size() == 1 && "Section's symbol must have one aux!");
-    AuxSymbol &Aux = Sec->Symb->Aux[0];
+    assert(Sec->Symbol->Aux.size() == 1
+      && "Section's symbol must have one aux!");
+    AuxSymbol &Aux = Sec->Symbol->Aux[0];
     assert(Aux.AuxType == ATSectionDefinition &&
            "Section's symbol's aux symbol must be a Section Definition!");
     Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
@@ -728,16 +825,21 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
     MCAssembler::const_iterator j, je;
 
     for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
-      WriteSectionHeader((*i)->Header);
+      if ((*i)->Number != -1)
+        WriteSectionHeader((*i)->Header);
 
     for (i = Sections.begin(), ie = Sections.end(),
          j = Asm.begin(), je = Asm.end();
-         (i != ie) && (j != je); i++, j++) {
+         (i != ie) && (j != je); ++i, ++j) {
+
+      if ((*i)->Number == -1)
+        continue;
+
       if ((*i)->Header.PointerToRawData != 0) {
         assert(OS.tell() == (*i)->Header.PointerToRawData &&
                "Section::PointerToRawData is insane!");
 
-        Asm.WriteSectionData(j, Layout, this);
+        Asm.WriteSectionData(j, Layout);
       }
 
       if ((*i)->Relocations.size() > 0) {
@@ -759,7 +861,8 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
          "Header::PointerToSymbolTable is insane!");
 
   for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
-    WriteSymbol(*i);
+    if ((*i)->Index != -1)
+      WriteSymbol(*i);
 
   OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
 }
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 8a194bff2151..46968e601be7 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -48,8 +48,10 @@ public:
 
   // MCStreamer interface
 
+  virtual void InitSections();
   virtual void EmitLabel(MCSymbol *Symbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
@@ -66,18 +68,55 @@ public:
   virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
                               uint64_t Size, unsigned ByteAlignment);
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValue(const MCExpr *Value, unsigned Size,
-                         unsigned AddrSpace);
-  virtual void EmitGPRel32Value(const MCExpr *Value);
   virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
                                    unsigned ValueSize, unsigned MaxBytesToEmit);
   virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                  unsigned MaxBytesToEmit);
-  virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
   virtual void EmitFileDirective(StringRef Filename);
-  virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename);
   virtual void EmitInstruction(const MCInst &Instruction);
   virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst) {
+    llvm_unreachable("Not used by WinCOFF.");
+  }
+  virtual void EmitInstToData(const MCInst &Inst) {
+    llvm_unreachable("Not used by WinCOFF.");
+  }
+
+  void SetSection(StringRef Section,
+                  unsigned Characteristics,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind));
+  }
+
+  void SetSectionText() {
+    SetSection(".text",
+               COFF::IMAGE_SCN_CNT_CODE
+             | COFF::IMAGE_SCN_MEM_EXECUTE
+             | COFF::IMAGE_SCN_MEM_READ,
+               SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+
+  void SetSectionData() {
+    SetSection(".data",
+               COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+
+  void SetSectionBSS() {
+    SetSection(".bss",
+               COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE,
+               SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+
 };
 } // end anonymous namespace.
 
@@ -126,47 +165,81 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 
 // MCStreamer interface
 
+void WinCOFFStreamer::InitSections() {
+  SetSectionText();
+  SetSectionData();
+  SetSectionBSS();
+  SetSectionText();
+}
+
 void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
-  // TODO: This is copied almost exactly from the MachOStreamer. Consider
-  // merging into MCObjectStreamer?
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(CurSection && "Cannot emit before setting section!");
-
-  Symbol->setSection(*CurSection);
-
-  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-
-  // FIXME: This is wasteful, we don't necessarily need to create a data
-  // fragment. Instead, we should mark the symbol as pointing into the data
-  // fragment if it exists, otherwise we should just queue the label and set its
-  // fragment pointer when we emit the next fragment.
-  MCDataFragment *DF = getOrCreateDataFragment();
-
-  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
-  SD.setFragment(DF);
-  SD.setOffset(DF->getContents().size());
+  MCObjectStreamer::EmitLabel(Symbol);
 }
 
 void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("not implemented");
 }
 
+void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  llvm_unreachable("not implemented");
+}
+
 void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  // TODO: This is exactly the same as MachOStreamer. Consider merging into
-  // MCObjectStreamer.
-  getAssembler().getOrCreateSymbolData(*Symbol);
-  AddValueSymbols(Value);
-  Symbol->setVariableValue(Value);
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  // FIXME: This is all very ugly and depressing. What needs to happen here
+  // depends on quite a few things that are all part of relaxation, which we
+  // don't really even do.
+
+  if (Value->getKind() != MCExpr::SymbolRef) {
+    // TODO: This is exactly the same as MachOStreamer. Consider merging into
+    // MCObjectStreamer.
+    getAssembler().getOrCreateSymbolData(*Symbol);
+    AddValueSymbols(Value);
+    Symbol->setVariableValue(Value);
+  } else {
+    // FIXME: This is a horrible way to do this :(. This should really be
+    // handled after we are done with the MC* objects and immediately before
+    // writing out the object file when we know exactly what the symbol should
+    // look like in the coff symbol table. I'm not doing that now because the
+    // COFF object writer doesn't have a clearly defined separation between MC
+    // data structures, the object writers data structures, and the raw, POD,
+    // data structures that get written to disk.
+
+    // Copy over the aliased data.
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData(
+      dyn_cast<const MCSymbolRefExpr>(Value)->getSymbol());
+
+    // FIXME: This is particularly nasty because it breaks as soon as any data
+    // members of MCSymbolData change.
+    SD.CommonAlign     = RealSD.CommonAlign;
+    SD.CommonSize      = RealSD.CommonSize;
+    SD.Flags           = RealSD.Flags;
+    SD.Fragment        = RealSD.Fragment;
+    SD.Index           = RealSD.Index;
+    SD.IsExternal      = RealSD.IsExternal;
+    SD.IsPrivateExtern = RealSD.IsPrivateExtern;
+    SD.Offset          = RealSD.Offset;
+    SD.SymbolSize      = RealSD.SymbolSize;
+  }
 }
 
 void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
+  assert(Symbol && "Symbol must be non-null!");
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
   switch (Attribute) {
   case MCSA_WeakReference:
-    getAssembler().getOrCreateSymbolData(*Symbol).modifyFlags(
-      COFF::SF_WeakReference,
-      COFF::SF_WeakReference);
+  case MCSA_Weak: {
+      MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+      SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+      SD.setExternal(true);
+    }
     break;
 
   case MCSA_Global:
@@ -184,6 +257,9 @@ void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
 }
 
 void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
   assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
                               "to BeginCOFFSymbolDef!");
   CurSymbol = Symbol;
@@ -220,10 +296,16 @@ void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
 
 void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                        unsigned ByteAlignment) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
   AddCommonSymbol(Symbol, Size, ByteAlignment, true);
 }
 
 void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
   AddCommonSymbol(Symbol, Size, 1, false);
 }
 
@@ -243,32 +325,6 @@ void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
   getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
 }
 
-void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
-                                unsigned AddrSpace) {
-  assert(AddrSpace == 0 && "Address space must be 0!");
-
-  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
-  // MCObjectStreamer?
-  MCDataFragment *DF = getOrCreateDataFragment();
-
-  // Avoid fixups when possible.
-  int64_t AbsValue;
-  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
-    // FIXME: Endianness assumption.
-    for (unsigned i = 0; i != Size; ++i)
-      DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
-  } else {
-    DF->addFixup(MCFixup::Create(DF->getContents().size(),
-                                 AddValueSymbols(Value),
-                                 MCFixup::getKindForSize(Size)));
-    DF->getContents().resize(DF->getContents().size() + Size, 0);
-  }
-}
-
-void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) {
-  llvm_unreachable("not implemented");
-}
-
 void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                            int64_t Value,
                                            unsigned ValueSize,
@@ -300,21 +356,11 @@ void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
     getCurrentSectionData()->setAlignment(ByteAlignment);
 }
 
-void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset,
-                                        unsigned char Value) {
-  llvm_unreachable("not implemented");
-}
-
 void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
   // Ignore for now, linkers don't care, and proper debug
   // info will be a much large effort.
 }
 
-void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo,
-                                             StringRef Filename) {
-  llvm_unreachable("not implemented");
-}
-
 void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
   for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
     if (Instruction.getOperand(i).isExpr())
diff --git a/lib/Makefile b/lib/Makefile
index 3807f31c7037..ed27854f22c7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ LEVEL = ..
 include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Linker MC CompilerDriver
+                Target ExecutionEngine Linker MC CompilerDriver Object
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
new file mode 100644
index 000000000000..6a6814fd37d9
--- /dev/null
+++ b/lib/Object/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMObject
+  MachOObject.cpp
+  ObjectFile.cpp
+  COFFObjectFile.cpp
+  ELFObjectFile.cpp
+  )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
new file mode 100644
index 000000000000..cfee82a0b217
--- /dev/null
+++ b/lib/Object/COFFObjectFile.cpp
@@ -0,0 +1,375 @@
+//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+using support::ulittle8_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+using support::little16_t;
+}
+
+namespace {
+struct coff_file_header {
+  ulittle16_t Machine;
+  ulittle16_t NumberOfSections;
+  ulittle32_t TimeDateStamp;
+  ulittle32_t PointerToSymbolTable;
+  ulittle32_t NumberOfSymbols;
+  ulittle16_t SizeOfOptionalHeader;
+  ulittle16_t Characteristics;
+};
+}
+
+extern char coff_file_header_layout_static_assert
+            [sizeof(coff_file_header) == 20 ? 1 : -1];
+
+namespace {
+struct coff_symbol {
+  struct StringTableOffset {
+    ulittle32_t Zeroes;
+    ulittle32_t Offset;
+  };
+
+  union {
+    char ShortName[8];
+    StringTableOffset Offset;
+  } Name;
+
+  ulittle32_t Value;
+  little16_t SectionNumber;
+
+  struct {
+    ulittle8_t BaseType;
+    ulittle8_t ComplexType;
+  } Type;
+
+  ulittle8_t  StorageClass;
+  ulittle8_t  NumberOfAuxSymbols;
+};
+}
+
+extern char coff_coff_symbol_layout_static_assert
+            [sizeof(coff_symbol) == 18 ? 1 : -1];
+
+namespace {
+struct coff_section {
+  char Name[8];
+  ulittle32_t VirtualSize;
+  ulittle32_t VirtualAddress;
+  ulittle32_t SizeOfRawData;
+  ulittle32_t PointerToRawData;
+  ulittle32_t PointerToRelocations;
+  ulittle32_t PointerToLinenumbers;
+  ulittle16_t NumberOfRelocations;
+  ulittle16_t NumberOfLinenumbers;
+  ulittle32_t Characteristics;
+};
+}
+
+extern char coff_coff_section_layout_static_assert
+            [sizeof(coff_section) == 40 ? 1 : -1];
+
+namespace {
+class COFFObjectFile : public ObjectFile {
+private:
+  const coff_file_header *Header;
+  const coff_section     *SectionTable;
+  const coff_symbol      *SymbolTable;
+  const char             *StringTable;
+
+  const coff_section     *getSection(std::size_t index) const;
+  const char             *getString(std::size_t offset) const;
+
+protected:
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
+
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
+  virtual bool       isSectionText(DataRefImpl Sec) const;
+
+public:
+  COFFObjectFile(MemoryBuffer *Object);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+};
+} // end namespace
+
+SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  symb += 1 + symb->NumberOfAuxSymbols;
+  Symb.p = reinterpret_cast<intptr_t>(symb);
+  return SymbolRef(Symb, this);
+}
+
+StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  // Check for string table entry. First 4 bytes are 0.
+  if (symb->Name.Offset.Zeroes == 0) {
+    uint32_t Offset = symb->Name.Offset.Offset;
+    return StringRef(getString(Offset));
+  }
+
+  if (symb->Name.ShortName[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    return StringRef(symb->Name.ShortName);
+  // Not null terminated, use all 8 bytes.
+  return StringRef(symb->Name.ShortName, 8);
+}
+
+uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  const coff_section *Section = getSection(symb->SectionNumber);
+  char Type = getSymbolNMTypeChar(Symb);
+  if (Type == 'U' || Type == 'w')
+    return UnknownAddressOrSize;
+  if (Section)
+    return Section->VirtualAddress + symb->Value;
+  return symb->Value;
+}
+
+uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+  // FIXME: Return the correct size. This requires looking at all the symbols
+  //        in the same section as this symbol, and looking for either the next
+  //        symbol, or the end of the section.
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  const coff_section *Section = getSection(symb->SectionNumber);
+  char Type = getSymbolNMTypeChar(Symb);
+  if (Type == 'U' || Type == 'w')
+    return UnknownAddressOrSize;
+  if (Section)
+    return Section->SizeOfRawData - symb->Value;
+  return 0;
+}
+
+char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  char ret = StringSwitch<char>(getSymbolName(Symb))
+    .StartsWith(".debug", 'N')
+    .StartsWith(".sxdata", 'N')
+    .Default('?');
+
+  if (ret != '?')
+    return ret;
+
+  uint32_t Characteristics = 0;
+  uint32_t PointerToRawData = 0;
+  const coff_section *Section = getSection(symb->SectionNumber);
+  if (Section) {
+    Characteristics = Section->Characteristics;
+    PointerToRawData = Section->PointerToRawData;
+  }
+
+  switch (symb->SectionNumber) {
+  case COFF::IMAGE_SYM_UNDEFINED:
+    // Check storage classes.
+    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+      return 'w'; // Don't do ::toupper.
+    else
+      ret = 'u';
+    break;
+  case COFF::IMAGE_SYM_ABSOLUTE:
+    ret = 'a';
+    break;
+  case COFF::IMAGE_SYM_DEBUG:
+    ret = 'n';
+    break;
+  default:
+    // Check section type.
+    if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+      ret = 't';
+    else if (  Characteristics & COFF::IMAGE_SCN_MEM_READ
+            && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+      ret = 'r';
+    else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+      ret = 'd';
+    else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+      ret = 'b';
+    else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
+      ret = 'i';
+
+    // Check for section symbol.
+    else if (  symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+            && symb->Value == 0)
+       ret = 's';
+  }
+
+  if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+    ret = ::toupper(ret);
+
+  return ret;
+}
+
+bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const {
+  return false;
+}
+
+SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  sec += 1;
+  Sec.p = reinterpret_cast<intptr_t>(sec);
+  return SectionRef(Sec, this);
+}
+
+StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  StringRef name;
+  if (sec->Name[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    name = sec->Name;
+  else
+    // Not null terminated, use all 8 bytes.
+    name = StringRef(sec->Name, 8);
+
+  // Check for string table entry. First byte is '/'.
+  if (name[0] == '/') {
+    uint32_t Offset;
+    name.getAsInteger(10, Offset);
+    return StringRef(getString(Offset));
+  }
+
+  // It's just a normal name.
+  return name;
+}
+
+uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return sec->VirtualAddress;
+}
+
+uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return sec->SizeOfRawData;
+}
+
+StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData),
+                   sec->SizeOfRawData);
+}
+
+bool COFFObjectFile::isSectionText(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
+  : ObjectFile(Object) {
+  Header = reinterpret_cast<const coff_file_header *>(base);
+  SectionTable =
+    reinterpret_cast<const coff_section *>( base
+                                          + sizeof(coff_file_header)
+                                          + Header->SizeOfOptionalHeader);
+  SymbolTable =
+    reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable);
+
+  // Find string table.
+  StringTable = reinterpret_cast<const char *>(base)
+              + Header->PointerToSymbolTable
+              + Header->NumberOfSymbols * 18;
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+  return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
+  // The symbol table ends where the string table begins.
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(StringTable);
+  return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(SectionTable);
+  return section_iterator(SectionRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::end_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
+  return section_iterator(SectionRef(ret, this));
+}
+
+uint8_t COFFObjectFile::getBytesInAddress() const {
+  return getArch() == Triple::x86_64 ? 8 : 4;
+}
+
+StringRef COFFObjectFile::getFileFormatName() const {
+  switch(Header->Machine) {
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return "COFF-i386";
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return "COFF-x86-64";
+  default:
+    return "COFF-<unknown arch>";
+  }
+}
+
+unsigned COFFObjectFile::getArch() const {
+  switch(Header->Machine) {
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return Triple::x86;
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return Triple::x86_64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+const coff_section *COFFObjectFile::getSection(std::size_t index) const {
+  if (index > 0 && index <= Header->NumberOfSections)
+    return SectionTable + (index - 1);
+  return 0;
+}
+
+const char *COFFObjectFile::getString(std::size_t offset) const {
+  const ulittle32_t *StringTableSize =
+    reinterpret_cast<const ulittle32_t *>(StringTable);
+  if (offset < *StringTableSize)
+    return StringTable + offset;
+  return 0;
+}
+
+namespace llvm {
+
+  ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
+    return new COFFObjectFile(Object);
+  }
+
+} // end namespace llvm
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
new file mode 100644
index 000000000000..682be770f48f
--- /dev/null
+++ b/lib/Object/ELFObjectFile.cpp
@@ -0,0 +1,686 @@
+//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <limits>
+#include <utility>
+
+using namespace llvm;
+using namespace object;
+
+// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
+namespace {
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelperCommon {
+  typedef support::detail::packed_endian_specific_integral
+    <uint16_t, target_endianness, support::aligned> Elf_Half;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Word;
+  typedef support::detail::packed_endian_specific_integral
+    <int32_t, target_endianness, support::aligned> Elf_Sword;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Xword;
+  typedef support::detail::packed_endian_specific_integral
+    <int64_t, target_endianness, support::aligned> Elf_Sxword;
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct ELFDataTypeTypedefHelper;
+
+/// ELF 32bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, false>
+  : ELFDataTypeTypedefHelperCommon<target_endianness> {
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Off;
+};
+
+/// ELF 64bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, true>
+  : ELFDataTypeTypedefHelperCommon<target_endianness>{
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Off;
+};
+}
+
+// I really don't like doing this, but the alternative is copypasta.
+#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+
+  // Section header.
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word sh_name;     // Section name (index into string table)
+  Elf_Word sh_type;     // Section type (SHT_*)
+  Elf_Word sh_flags;    // Section flags (SHF_*)
+  Elf_Addr sh_addr;     // Address where section is to be loaded
+  Elf_Off  sh_offset;   // File offset of section data, in bytes
+  Elf_Word sh_size;     // Size of section, in bytes
+  Elf_Word sh_link;     // Section type-specific header table index link
+  Elf_Word sh_info;     // Section type-specific extra information
+  Elf_Word sh_addralign;// Section address alignment
+  Elf_Word sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word  sh_name;     // Section name (index into string table)
+  Elf_Word  sh_type;     // Section type (SHT_*)
+  Elf_Xword sh_flags;    // Section flags (SHF_*)
+  Elf_Addr  sh_addr;     // Address where section is to be loaded
+  Elf_Off   sh_offset;   // File offset of section data, in bytes
+  Elf_Xword sh_size;     // Size of section, in bytes
+  Elf_Word  sh_link;     // Section type-specific header table index link
+  Elf_Word  sh_info;     // Section type-specific extra information
+  Elf_Xword sh_addralign;// Section address alignment
+  Elf_Xword sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+
+  /// @brief Get the number of entities this section contains if it has any.
+  unsigned getEntityCount() const {
+    if (sh_entsize == 0)
+      return 0;
+    return sh_size / sh_entsize;
+  }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Word      st_size;  // Size of the symbol
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+};
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Xword     st_size;  // Size of the symbol
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
+  using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+
+  // These accessors and mutators correspond to the ELF32_ST_BIND,
+  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile : public ObjectFile {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+
+  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
+  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+
+  struct Elf_Ehdr {
+    unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
+    Elf_Half e_type;     // Type of file (see ET_*)
+    Elf_Half e_machine;  // Required architecture for this file (see EM_*)
+    Elf_Word e_version;  // Must be equal to 1
+    Elf_Addr e_entry;    // Address to jump to in order to start program
+    Elf_Off  e_phoff;    // Program header table's file offset, in bytes
+    Elf_Off  e_shoff;    // Section header table's file offset, in bytes
+    Elf_Word e_flags;    // Processor-specific flags
+    Elf_Half e_ehsize;   // Size of ELF header, in bytes
+    Elf_Half e_phentsize;// Size of an entry in the program header table
+    Elf_Half e_phnum;    // Number of entries in the program header table
+    Elf_Half e_shentsize;// Size of an entry in the section header table
+    Elf_Half e_shnum;    // Number of entries in the section header table
+    Elf_Half e_shstrndx; // Section header table index of section name
+                                  // string table
+    bool checkMagic() const {
+      return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+    }
+    unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
+    unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  };
+
+  typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t;
+
+  const Elf_Ehdr *Header;
+  const Elf_Shdr *SectionHeaderTable;
+  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
+  SymbolTableSections_t SymbolTableSections;
+
+  void            validateSymbol(DataRefImpl Symb) const;
+  const Elf_Sym  *getSymbol(DataRefImpl Symb) const;
+  const Elf_Shdr *getSection(DataRefImpl index) const;
+  const Elf_Shdr *getSection(uint16_t index) const;
+  const char     *getString(uint16_t section, uint32_t offset) const;
+  const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
+
+protected:
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
+
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
+  virtual bool       isSectionText(DataRefImpl Sec) const;
+
+public:
+  ELFObjectFile(MemoryBuffer *Object);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+};
+} // end namespace
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+                  ::validateSymbol(DataRefImpl Symb) const {
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+  // FIXME: We really need to do proper error handling in the case of an invalid
+  //        input file. Because we don't use exceptions, I think we'll just pass
+  //        an error object around.
+  if (!(  symb
+        && SymbolTableSection
+        && symb >= (const Elf_Sym*)(base
+                   + SymbolTableSection->sh_offset)
+        && symb <  (const Elf_Sym*)(base
+                   + SymbolTableSection->sh_offset
+                   + SymbolTableSection->sh_size)))
+    // FIXME: Proper error handling.
+    report_fatal_error("Symb must point to a valid symbol!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+SymbolRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSymbolNext(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+
+  ++Symb.d.a;
+  // Check to see if we are at the end of this symbol table.
+  if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
+    // We are at the end. If there are other symbol tables, jump to them.
+    ++Symb.d.b;
+    Symb.d.a = 1; // The 0th symbol in ELF is fake.
+    // Otherwise return the terminator.
+    if (Symb.d.b >= SymbolTableSections.size()) {
+      Symb.d.a = std::numeric_limits<uint32_t>::max();
+      Symb.d.b = std::numeric_limits<uint32_t>::max();
+    }
+  }
+
+  return SymbolRef(Symb, this);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSymbolName(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_name == 0) {
+    const Elf_Shdr *section = getSection(symb->st_shndx);
+    if (!section)
+      return "";
+    return getString(dot_shstrtab_sec, section->sh_name);
+  }
+
+  // Use the default symbol table name section.
+  return getString(dot_strtab_sec, symb->st_name);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSymbolAddress(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section;
+  switch (symb->st_shndx) {
+  case ELF::SHN_COMMON:
+   // Undefined symbols have no address yet.
+  case ELF::SHN_UNDEF: return UnknownAddressOrSize;
+  case ELF::SHN_ABS: return symb->st_value;
+  default: Section = getSection(symb->st_shndx);
+  }
+
+  switch (symb->getType()) {
+  case ELF::STT_SECTION: return Section ? Section->sh_addr
+                                        : UnknownAddressOrSize;
+  case ELF::STT_FUNC:
+  case ELF::STT_OBJECT:
+  case ELF::STT_NOTYPE:
+    return symb->st_value;
+  default: return UnknownAddressOrSize;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSymbolSize(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_size == 0)
+    return UnknownAddressOrSize;
+  return symb->st_size;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+char ELFObjectFile<target_endianness, is64Bits>
+                  ::getSymbolNMTypeChar(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section = getSection(symb->st_shndx);
+
+  char ret = '?';
+
+  if (Section) {
+    switch (Section->sh_type) {
+    case ELF::SHT_PROGBITS:
+    case ELF::SHT_DYNAMIC:
+      switch (Section->sh_flags) {
+      case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
+        ret = 't'; break;
+      case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
+        ret = 'd'; break;
+      case ELF::SHF_ALLOC:
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
+        ret = 'r'; break;
+      }
+      break;
+    case ELF::SHT_NOBITS: ret = 'b';
+    }
+  }
+
+  switch (symb->st_shndx) {
+  case ELF::SHN_UNDEF:
+    if (ret == '?')
+      ret = 'U';
+    break;
+  case ELF::SHN_ABS: ret = 'a'; break;
+  case ELF::SHN_COMMON: ret = 'c'; break;
+  }
+
+  switch (symb->getBinding()) {
+  case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
+  case ELF::STB_WEAK:
+    if (symb->st_shndx == ELF::SHN_UNDEF)
+      ret = 'w';
+    else
+      if (symb->getType() == ELF::STT_OBJECT)
+        ret = 'V';
+      else
+        ret = 'W';
+  }
+
+  if (ret == '?' && symb->getType() == ELF::STT_SECTION)
+    return StringSwitch<char>(getSymbolName(Symb))
+      .StartsWith(".debug", 'N')
+      .StartsWith(".note", 'n');
+
+  return ret;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+bool ELFObjectFile<target_endianness, is64Bits>
+                  ::isSymbolInternal(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  if (  symb->getType() == ELF::STT_FILE
+     || symb->getType() == ELF::STT_SECTION)
+    return true;
+  return false;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+SectionRef ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionNext(DataRefImpl Sec) const {
+  const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
+  sec += Header->e_shentsize;
+  Sec.p = reinterpret_cast<intptr_t>(sec);
+  return SectionRef(Sec, this);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSectionName(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  return StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSectionAddress(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  return sec->sh_addr;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSectionSize(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  return sec->sh_size;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSectionContents(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  const char *start = (char*)base + sec->sh_offset;
+  return StringRef(start, sec->sh_size);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+bool ELFObjectFile<target_endianness, is64Bits>
+                  ::isSectionText(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & ELF::SHF_EXECINSTR)
+    return true;
+  return false;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
+  : ObjectFile(Object)
+  , SectionHeaderTable(0)
+  , dot_shstrtab_sec(0)
+  , dot_strtab_sec(0) {
+  Header = reinterpret_cast<const Elf_Ehdr *>(base);
+
+  if (Header->e_shoff == 0)
+    return;
+
+  SectionHeaderTable =
+    reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff);
+  uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
+  if (!(  (const uint8_t *)SectionHeaderTable + SectionTableSize
+         <= base + MapFile->getBufferSize()))
+    // FIXME: Proper error handling.
+    report_fatal_error("Section table goes past end of file!");
+
+
+  // To find the symbol tables we walk the section table to find SHT_STMTAB.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + Header->e_shnum * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_SYMTAB) {
+      SymbolTableSections.push_back(sh);
+    }
+  }
+
+  // Get string table sections.
+  dot_shstrtab_sec = getSection(Header->e_shstrndx);
+  if (dot_shstrtab_sec) {
+    // Verify that the last byte in the string table in a null.
+    if (((const char*)base + dot_shstrtab_sec->sh_offset)
+        [dot_shstrtab_sec->sh_size - 1] != 0)
+      // FIXME: Proper error handling.
+      report_fatal_error("String table must end with a null terminator!");
+  }
+
+  // Merge this into the above loop.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + Header->e_shnum * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_STRTAB) {
+      StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
+      if (SectionName == ".strtab") {
+        if (dot_strtab_sec != 0)
+          // FIXME: Proper error handling.
+          report_fatal_error("Already found section named .strtab!");
+        dot_strtab_sec = sh;
+        const char *dot_strtab = (const char*)base + sh->sh_offset;
+          if (dot_strtab[sh->sh_size - 1] != 0)
+            // FIXME: Proper error handling.
+            report_fatal_error("String table must end with a null terminator!");
+      }
+    }
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                                         ::begin_symbols() const {
+  DataRefImpl SymbolData;
+  memset(&SymbolData, 0, sizeof(SymbolData));
+  if (SymbolTableSections.size() == 0) {
+    SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+    SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  } else {
+    SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+    SymbolData.d.b = 0;
+  }
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                                         ::end_symbols() const {
+  DataRefImpl SymbolData;
+  memset(&SymbolData, 0, sizeof(SymbolData));
+  SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+  SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+                                          ::begin_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff);
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+                                          ::end_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(base
+                                     + Header->e_shoff
+                                     + (Header->e_shentsize * Header->e_shnum));
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
+  return is64Bits ? 8 : 4;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getFileFormatName() const {
+  switch(Header->e_ident[ELF::EI_CLASS]) {
+  case ELF::ELFCLASS32:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF32-i386";
+    case ELF::EM_X86_64:
+      return "ELF32-x86-64";
+    default:
+      return "ELF32-unknown";
+    }
+  case ELF::ELFCLASS64:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF64-i386";
+    case ELF::EM_X86_64:
+      return "ELF64-x86-64";
+    default:
+      return "ELF64-unknown";
+    }
+  default:
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid ELFCLASS!");
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+  switch(Header->e_machine) {
+  case ELF::EM_386:
+    return Triple::x86;
+  case ELF::EM_X86_64:
+    return Triple::x86_64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
+  return reinterpret_cast<const Elf_Sym *>(
+           base
+           + sec->sh_offset
+           + (Symb.d.a * sec->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = getSection(Symb.d.b);
+  if (sec->sh_type != ELF::SHT_SYMTAB)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid symbol table section!");
+  return sec;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
+  if (index == 0 || index >= ELF::SHN_LORESERVE)
+    return 0;
+  if (!SectionHeaderTable || index >= Header->e_shnum)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid section index!");
+
+  return reinterpret_cast<const Elf_Shdr *>(
+         reinterpret_cast<const char *>(SectionHeaderTable)
+         + (index * Header->e_shentsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(uint16_t section,
+                                     ELF::Elf32_Word offset) const {
+  return getString(getSection(section), offset);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(const Elf_Shdr *section,
+                                     ELF::Elf32_Word offset) const {
+  assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
+  if (offset >= section->sh_size)
+    // FIXME: Proper error handling.
+    report_fatal_error("Sybol name offset outside of string table!");
+  return (const char *)base + section->sh_offset + offset;
+}
+
+// EI_CLASS, EI_DATA.
+static std::pair<unsigned char, unsigned char>
+getElfArchType(MemoryBuffer *Object) {
+  if (Object->getBufferSize() < ELF::EI_NIDENT)
+    return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+  return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+                       , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
+}
+
+namespace llvm {
+
+  ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+    std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+    if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+      return new ELFObjectFile<support::little, false>(Object);
+    else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+      return new ELFObjectFile<support::big, false>(Object);
+    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
+      return new ELFObjectFile<support::little, true>(Object);
+    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+      return new ELFObjectFile<support::big, true>(Object);
+    // FIXME: Proper error handling.
+    report_fatal_error("Not an ELF object file!");
+  }
+
+} // end namespace llvm
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
new file mode 100644
index 000000000000..5e64d6323288
--- /dev/null
+++ b/lib/Object/MachOObject.cpp
@@ -0,0 +1,342 @@
+//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+/* Translation Utilities */
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<typename T>
+static void ReadInMemoryStruct(const MachOObject &MOO,
+                               StringRef Buffer, uint64_t Base,
+                               InMemoryStruct<T> &Res) {
+  typedef T struct_type;
+  uint64_t Size = sizeof(struct_type);
+
+  // Check that the buffer contains the expected data.
+  if (Base + Size >  Buffer.size()) {
+    Res = 0;
+    return;
+  }
+
+  // Check whether we can return a direct pointer.
+  struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
+  if (!MOO.isSwappedEndian()) {
+    Res = Ptr;
+    return;
+  }
+
+  // Otherwise, copy the struct and translate the values.
+  Res = *Ptr;
+  SwapStruct(*Res);
+}
+
+/* *** */
+
+MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
+                         bool Is64Bit_)
+  : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
+    IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+    HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
+  // Load the common header.
+  memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
+  if (IsSwappedEndian) {
+    SwapValue(Header.Magic);
+    SwapValue(Header.CPUType);
+    SwapValue(Header.CPUSubtype);
+    SwapValue(Header.FileType);
+    SwapValue(Header.NumLoadCommands);
+    SwapValue(Header.SizeOfLoadCommands);
+    SwapValue(Header.Flags);
+  }
+
+  if (is64Bit()) {
+    memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
+           sizeof(Header64Ext));
+    if (IsSwappedEndian) {
+      SwapValue(Header64Ext.Reserved);
+    }
+  }
+
+  // Create the load command array if sane.
+  if (getHeader().NumLoadCommands < (1 << 20))
+    LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
+}
+
+MachOObject::~MachOObject() {
+  delete [] LoadCommands;
+}
+
+MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
+                                         std::string *ErrorStr) {
+  // First, check the magic value and initialize the basic object info.
+  bool IsLittleEndian = false, Is64Bit = false;
+  StringRef Magic = Buffer->getBuffer().slice(0, 4);
+  if (Magic == "\xFE\xED\xFA\xCE") {
+  }  else if (Magic == "\xCE\xFA\xED\xFE") {
+    IsLittleEndian = true;
+  } else if (Magic == "\xFE\xED\xFA\xCF") {
+    Is64Bit = true;
+  } else if (Magic == "\xCF\xFA\xED\xFE") {
+    IsLittleEndian = true;
+    Is64Bit = true;
+  } else {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
+    return 0;
+  }
+
+  // Ensure that the at least the full header is present.
+  unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
+  if (Buffer->getBufferSize() < HeaderSize) {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
+    return 0;
+  }
+
+  OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
+                                                Is64Bit));
+
+  // Check for bogus number of load commands.
+  if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
+    return 0;
+  }
+
+  if (ErrorStr) *ErrorStr = "";
+  return Object.take();
+}
+
+StringRef MachOObject::getData(size_t Offset, size_t Size) const {
+  return Buffer->getBuffer().substr(Offset,Size);
+}
+
+void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
+  HasStringTable = true;
+  StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
+                                           SLC.StringTableSize);
+}
+
+const MachOObject::LoadCommandInfo &
+MachOObject::getLoadCommandInfo(unsigned Index) const {
+  assert(Index < getHeader().NumLoadCommands && "Invalid index!");
+
+  // Load the command, if necessary.
+  if (Index >= NumLoadedCommands) {
+    uint64_t Offset;
+    if (Index == 0) {
+      Offset = getHeaderSize();
+    } else {
+      const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
+      Offset = Prev.Offset + Prev.Command.Size;
+    }
+
+    LoadCommandInfo &Info = LoadCommands[Index];
+    memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
+           sizeof(macho::LoadCommand));
+    if (IsSwappedEndian) {
+      SwapValue(Info.Command.Type);
+      SwapValue(Info.Command.Size);
+    }
+    Info.Offset = Offset;
+    NumLoadedCommands = Index + 1;
+  }
+
+  return LoadCommands[Index];
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.VMAddress);
+  SwapValue(Value.VMSize);
+  SwapValue(Value.FileOffset);
+  SwapValue(Value.FileSize);
+  SwapValue(Value.MaxVMProtection);
+  SwapValue(Value.InitialVMProtection);
+  SwapValue(Value.NumSections);
+  SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
+                         InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.VMAddress);
+  SwapValue(Value.VMSize);
+  SwapValue(Value.FileOffset);
+  SwapValue(Value.FileSize);
+  SwapValue(Value.MaxVMProtection);
+  SwapValue(Value.InitialVMProtection);
+  SwapValue(Value.NumSections);
+  SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
+                       InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.SymbolTableOffset);
+  SwapValue(Value.NumSymbolTableEntries);
+  SwapValue(Value.StringTableOffset);
+  SwapValue(Value.StringTableSize);
+}
+void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
+                          InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.LocalSymbolsIndex);
+  SwapValue(Value.NumLocalSymbols);
+  SwapValue(Value.ExternalSymbolsIndex);
+  SwapValue(Value.NumExternalSymbols);
+  SwapValue(Value.UndefinedSymbolsIndex);
+  SwapValue(Value.NumUndefinedSymbols);
+  SwapValue(Value.TOCOffset);
+  SwapValue(Value.NumTOCEntries);
+  SwapValue(Value.ModuleTableOffset);
+  SwapValue(Value.NumModuleTableEntries);
+  SwapValue(Value.ReferenceSymbolTableOffset);
+  SwapValue(Value.NumReferencedSymbolTableEntries);
+  SwapValue(Value.IndirectSymbolTableOffset);
+  SwapValue(Value.NumIndirectSymbolTableEntries);
+  SwapValue(Value.ExternalRelocationTableOffset);
+  SwapValue(Value.NumExternalRelocationTableEntries);
+  SwapValue(Value.LocalRelocationTableOffset);
+  SwapValue(Value.NumLocalRelocationTableEntries);
+}
+void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
+                        InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
+  SwapValue(Value.Index);
+}
+void
+MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+                                          unsigned Index,
+                   InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
+  uint64_t Offset = (DLC.IndirectSymbolTableOffset +
+                     Index * sizeof(macho::IndirectSymbolTableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+
+template<>
+void SwapStruct(macho::Section &Value) {
+  SwapValue(Value.Address);
+  SwapValue(Value.Size);
+  SwapValue(Value.Offset);
+  SwapValue(Value.Align);
+  SwapValue(Value.RelocationTableOffset);
+  SwapValue(Value.NumRelocationTableEntries);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Reserved1);
+  SwapValue(Value.Reserved2);
+}
+void MachOObject::ReadSection(const LoadCommandInfo &LCI,
+                              unsigned Index,
+                              InMemoryStruct<macho::Section> &Res) const {
+  assert(LCI.Command.Type == macho::LCT_Segment &&
+         "Unexpected load command info!");
+  uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+                     Index * sizeof(macho::Section));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Section64 &Value) {
+  SwapValue(Value.Address);
+  SwapValue(Value.Size);
+  SwapValue(Value.Offset);
+  SwapValue(Value.Align);
+  SwapValue(Value.RelocationTableOffset);
+  SwapValue(Value.NumRelocationTableEntries);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Reserved1);
+  SwapValue(Value.Reserved2);
+  SwapValue(Value.Reserved3);
+}
+void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
+                                unsigned Index,
+                                InMemoryStruct<macho::Section64> &Res) const {
+  assert(LCI.Command.Type == macho::LCT_Segment64 &&
+         "Unexpected load command info!");
+  uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+                     Index * sizeof(macho::Section64));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::RelocationEntry &Value) {
+  SwapValue(Value.Word0);
+  SwapValue(Value.Word1);
+}
+void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
+                                      unsigned Index,
+                            InMemoryStruct<macho::RelocationEntry> &Res) const {
+  uint64_t Offset = (RelocationTableOffset +
+                     Index * sizeof(macho::RelocationEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &Value) {
+  SwapValue(Value.StringIndex);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
+                                       unsigned Index,
+                           InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+  uint64_t Offset = (SymbolTableOffset +
+                     Index * sizeof(macho::SymbolTableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &Value) {
+  SwapValue(Value.StringIndex);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
+                                       unsigned Index,
+                         InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+  uint64_t Offset = (SymbolTableOffset +
+                     Index * sizeof(macho::Symbol64TableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
diff --git a/lib/Object/Makefile b/lib/Object/Makefile
new file mode 100644
index 000000000000..79388dc97f1a
--- /dev/null
+++ b/lib/Object/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Object/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMObject
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
new file mode 100644
index 000000000000..161ae3a083f1
--- /dev/null
+++ b/lib/Object/ObjectFile.cpp
@@ -0,0 +1,71 @@
+//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+using namespace object;
+
+ObjectFile::ObjectFile(MemoryBuffer *Object)
+  : MapFile(Object) {
+  assert(MapFile && "Must be a valid MemoryBuffer!");
+  base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart());
+}
+
+ObjectFile::~ObjectFile() {
+  delete MapFile;
+}
+
+StringRef ObjectFile::getFilename() const {
+  return MapFile->getBufferIdentifier();
+}
+
+ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
+  if (!Object || Object->getBufferSize() < 64)
+    return 0;
+  sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
+                                static_cast<unsigned>(Object->getBufferSize()));
+  switch (type) {
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_Executable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::ELF_Core_FileType:
+      return createELFObjectFile(Object);
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_Executable_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_Core_FileType:
+    case sys::Mach_O_PreloadExecutable_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicLinker_FileType:
+    case sys::Mach_O_Bundle_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+      return 0;
+    case sys::COFF_FileType:
+      return createCOFFObjectFile(Object);
+    default:
+      llvm_unreachable("Unknown Object File Type");
+  }
+}
+
+ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+    return NULL;
+  return createObjectFile(File.take());
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index b87ddf9c95b5..e765ba0a27bb 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -175,7 +175,7 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
 {
   int unsignedExponent;
   bool negative, overflow;
-  int exponent;
+  int exponent = 0;
 
   assert(p != end && "Exponent has no digits");
 
@@ -194,11 +194,11 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
     assert(value < 10U && "Invalid character in exponent");
 
     unsignedExponent = unsignedExponent * 10 + value;
-    if (unsignedExponent > 65535)
+    if (unsignedExponent > 32767)
       overflow = true;
   }
 
-  if (exponentAdjustment > 65535 || exponentAdjustment < -65536)
+  if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
     overflow = true;
 
   if (!overflow) {
@@ -206,12 +206,12 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
     if (negative)
       exponent = -exponent;
     exponent += exponentAdjustment;
-    if (exponent > 65535 || exponent < -65536)
+    if (exponent > 32767 || exponent < -32768)
       overflow = true;
   }
 
   if (overflow)
-    exponent = negative ? -65536: 65535;
+    exponent = negative ? -32768: 32767;
 
   return exponent;
 }
@@ -3197,6 +3197,12 @@ APFloat::initFromAPInt(const APInt& api, bool isIEEE)
     llvm_unreachable(0);
 }
 
+APFloat
+APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
+{
+  return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
+}
+
 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
   APFloat Val(Sem, fcNormal, Negative);
 
@@ -3258,14 +3264,12 @@ APFloat::APFloat(const APInt& api, bool isIEEE)
 
 APFloat::APFloat(float f)
 {
-  APInt api = APInt(32, 0);
-  initFromAPInt(api.floatToBits(f));
+  initFromAPInt(APInt::floatToBits(f));
 }
 
 APFloat::APFloat(double d)
 {
-  APInt api = APInt(64, 0);
-  initFromAPInt(api.doubleToBits(d));
+  initFromAPInt(APInt::doubleToBits(d));
 }
 
 namespace {
@@ -3312,7 +3316,7 @@ namespace {
     // Truncate the significand down to its active bit count, but
     // don't try to drop below 32.
     unsigned newPrecision = std::max(32U, significand.getActiveBits());
-    significand.trunc(newPrecision);
+    significand = significand.trunc(newPrecision);
   }
 
 
@@ -3417,7 +3421,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
     // Nothing to do.
   } else if (exp > 0) {
     // Just shift left.
-    significand.zext(semantics->precision + exp);
+    significand = significand.zext(semantics->precision + exp);
     significand <<= exp;
     exp = 0;
   } else { /* exp < 0 */
@@ -3436,7 +3440,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
 
     // Multiply significand by 5^e.
     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
-    significand.zext(precision);
+    significand = significand.zext(precision);
     APInt five_to_the_i(precision, 5);
     while (true) {
       if (texp & 1) significand *= five_to_the_i;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 8a212a291f24..77033428b577 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -361,7 +361,7 @@ APInt& APInt::operator*=(const APInt& RHS) {
   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
   if (!rhsWords) {
     // X * 0 ===> 0
-    clear();
+    clearAllBits();
     return *this;
   }
 
@@ -373,7 +373,7 @@ APInt& APInt::operator*=(const APInt& RHS) {
   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 
   // Copy result back into *this
-  clear();
+  clearAllBits();
   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 
@@ -483,6 +483,7 @@ APInt APInt::operator-(const APInt& RHS) const {
 }
 
 bool APInt::operator[](unsigned bitPosition) const {
+  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
   return (maskBit(bitPosition) &
           (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
 }
@@ -561,12 +562,12 @@ bool APInt::slt(const APInt& RHS) const {
   bool rhsNeg = rhs.isNegative();
   if (lhsNeg) {
     // Sign bit is set so perform two's complement to make it positive
-    lhs.flip();
+    lhs.flipAllBits();
     lhs++;
   }
   if (rhsNeg) {
     // Sign bit is set so perform two's complement to make it positive
-    rhs.flip();
+    rhs.flipAllBits();
     rhs++;
   }
 
@@ -583,22 +584,20 @@ bool APInt::slt(const APInt& RHS) const {
     return lhs.ult(rhs);
 }
 
-APInt& APInt::set(unsigned bitPosition) {
+void APInt::setBit(unsigned bitPosition) {
   if (isSingleWord())
     VAL |= maskBit(bitPosition);
   else
     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
-  return *this;
 }
 
 /// Set the given bit to 0 whose position is given as "bitPosition".
 /// @brief Set a given bit to 0.
-APInt& APInt::clear(unsigned bitPosition) {
+void APInt::clearBit(unsigned bitPosition) {
   if (isSingleWord())
     VAL &= ~maskBit(bitPosition);
   else
     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
-  return *this;
 }
 
 /// @brief Toggle every bit to its opposite value.
@@ -606,11 +605,10 @@ APInt& APInt::clear(unsigned bitPosition) {
 /// Toggle a given bit to its opposite value whose position is given
 /// as "bitPosition".
 /// @brief Toggles a given bit to its opposite value.
-APInt& APInt::flip(unsigned bitPosition) {
+void APInt::flipBit(unsigned bitPosition) {
   assert(bitPosition < BitWidth && "Out of the bit-width range!");
-  if ((*this)[bitPosition]) clear(bitPosition);
-  else set(bitPosition);
-  return *this;
+  if ((*this)[bitPosition]) clearBit(bitPosition);
+  else setBit(bitPosition);
 }
 
 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
@@ -761,10 +759,6 @@ APInt APInt::getLoBits(unsigned numBits) const {
                         BitWidth - numBits);
 }
 
-bool APInt::isPowerOf2() const {
-  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
-}
-
 unsigned APInt::countLeadingZerosSlowCase() const {
   // Treat the most significand word differently because it might have
   // meaningless bits set beyond the precision.
@@ -1001,96 +995,90 @@ double APInt::roundToDouble(bool isSigned) const {
 }
 
 // Truncate to new width.
-APInt &APInt::trunc(unsigned width) {
+APInt APInt::trunc(unsigned width) const {
   assert(width < BitWidth && "Invalid APInt Truncate request");
   assert(width && "Can't truncate to 0 bits");
-  unsigned wordsBefore = getNumWords();
-  BitWidth = width;
-  unsigned wordsAfter = getNumWords();
-  if (wordsBefore != wordsAfter) {
-    if (wordsAfter == 1) {
-      uint64_t *tmp = pVal;
-      VAL = pVal[0];
-      delete [] tmp;
-    } else {
-      uint64_t *newVal = getClearedMemory(wordsAfter);
-      for (unsigned i = 0; i < wordsAfter; ++i)
-        newVal[i] = pVal[i];
-      delete [] pVal;
-      pVal = newVal;
-    }
-  }
-  return clearUnusedBits();
+
+  if (width <= APINT_BITS_PER_WORD)
+    return APInt(width, getRawData()[0]);
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy full words.
+  unsigned i;
+  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+    Result.pVal[i] = pVal[i];
+
+  // Truncate and copy any partial word.
+  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    Result.pVal[i] = pVal[i] << bits >> bits;
+
+  return Result;
 }
 
 // Sign extend to a new width.
-APInt &APInt::sext(unsigned width) {
+APInt APInt::sext(unsigned width) const {
   assert(width > BitWidth && "Invalid APInt SignExtend request");
-  // If the sign bit isn't set, this is the same as zext.
-  if (!isNegative()) {
-    zext(width);
-    return *this;
+
+  if (width <= APINT_BITS_PER_WORD) {
+    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+    val = (int64_t)val >> (width - BitWidth);
+    return APInt(width, val >> (APINT_BITS_PER_WORD - width));
   }
 
-  // The sign bit is set. First, get some facts
-  unsigned wordsBefore = getNumWords();
-  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
-  BitWidth = width;
-  unsigned wordsAfter = getNumWords();
-
-  // Mask the high order word appropriately
-  if (wordsBefore == wordsAfter) {
-    unsigned newWordBits = width % APINT_BITS_PER_WORD;
-    // The extension is contained to the wordsBefore-1th word.
-    uint64_t mask = ~0ULL;
-    if (newWordBits)
-      mask >>= APINT_BITS_PER_WORD - newWordBits;
-    mask <<= wordBits;
-    if (wordsBefore == 1)
-      VAL |= mask;
-    else
-      pVal[wordsBefore-1] |= mask;
-    return clearUnusedBits();
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy full words.
+  unsigned i;
+  uint64_t word = 0;
+  for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
+    word = getRawData()[i];
+    Result.pVal[i] = word;
   }
 
-  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
-  uint64_t *newVal = getMemory(wordsAfter);
-  if (wordsBefore == 1)
-    newVal[0] = VAL | mask;
-  else {
-    for (unsigned i = 0; i < wordsBefore; ++i)
-      newVal[i] = pVal[i];
-    newVal[wordsBefore-1] |= mask;
+  // Read and sign-extend any partial word.
+  unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    word = (int64_t)getRawData()[i] << bits >> bits;
+  else
+    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+
+  // Write remaining full words.
+  for (; i != width / APINT_BITS_PER_WORD; i++) {
+    Result.pVal[i] = word;
+    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
   }
-  for (unsigned i = wordsBefore; i < wordsAfter; i++)
-    newVal[i] = -1ULL;
-  if (wordsBefore != 1)
-    delete [] pVal;
-  pVal = newVal;
-  return clearUnusedBits();
+
+  // Write any partial word.
+  bits = (0 - width) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    Result.pVal[i] = word << bits >> bits;
+
+  return Result;
 }
 
 //  Zero extend to a new width.
-APInt &APInt::zext(unsigned width) {
+APInt APInt::zext(unsigned width) const {
   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
-  unsigned wordsBefore = getNumWords();
-  BitWidth = width;
-  unsigned wordsAfter = getNumWords();
-  if (wordsBefore != wordsAfter) {
-    uint64_t *newVal = getClearedMemory(wordsAfter);
-    if (wordsBefore == 1)
-      newVal[0] = VAL;
-    else
-      for (unsigned i = 0; i < wordsBefore; ++i)
-        newVal[i] = pVal[i];
-    if (wordsBefore != 1)
-      delete [] pVal;
-    pVal = newVal;
-  }
-  return *this;
+
+  if (width <= APINT_BITS_PER_WORD)
+    return APInt(width, VAL);
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy words.
+  unsigned i;
+  for (i = 0; i != getNumWords(); i++)
+    Result.pVal[i] = getRawData()[i];
+
+  // Zero remaining words.
+  memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+
+  return Result;
 }
 
-APInt &APInt::zextOrTrunc(unsigned width) {
+APInt APInt::zextOrTrunc(unsigned width) const {
   if (BitWidth < width)
     return zext(width);
   if (BitWidth > width)
@@ -1098,7 +1086,7 @@ APInt &APInt::zextOrTrunc(unsigned width) {
   return *this;
 }
 
-APInt &APInt::sextOrTrunc(unsigned width) {
+APInt APInt::sextOrTrunc(unsigned width) const {
   if (BitWidth < width)
     return sext(width);
   if (BitWidth > width)
@@ -1873,7 +1861,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
       if (!Quotient->isSingleWord())
         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
     } else
-      Quotient->clear();
+      Quotient->clearAllBits();
 
     // The quotient is in Q. Reconstitute the quotient into Quotient's low
     // order words.
@@ -1904,7 +1892,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
       if (!Remainder->isSingleWord())
         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
     } else
-      Remainder->clear();
+      Remainder->clearAllBits();
 
     // The remainder is in R. Reconstitute the remainder into Remainder's low
     // order words.
@@ -2046,6 +2034,64 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
 }
 
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this+RHS;
+  Overflow = isNonNegative() == RHS.isNonNegative() &&
+             Res.isNonNegative() != isNonNegative();
+  return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this+RHS;
+  Overflow = Res.ult(RHS);
+  return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this - RHS;
+  Overflow = isNonNegative() != RHS.isNonNegative() &&
+             Res.isNonNegative() != isNonNegative();
+  return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this-RHS;
+  Overflow = Res.ugt(*this);
+  return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+  // MININT/-1  -->  overflow.
+  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+  return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this * RHS;
+  
+  if (*this != 0 && RHS != 0)
+    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+  else
+    Overflow = false;
+  return Res;
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+  Overflow = ShAmt >= getBitWidth();
+  if (Overflow)
+    ShAmt = getBitWidth()-1;
+
+  if (isNonNegative()) // Don't allow sign change.
+    Overflow = ShAmt >= countLeadingZeros();
+  else
+    Overflow = ShAmt >= countLeadingOnes();
+  
+  return *this << ShAmt;
+}
+
+
+
+
 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   // Check our assumptions here
   assert(!str.empty() && "Invalid string length");
@@ -2101,7 +2147,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   // If its negative, put it in two's complement form
   if (isNeg) {
     (*this)--;
-    this->flip();
+    this->flipAllBits();
   }
 }
 
@@ -2149,7 +2195,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     // They want to print the signed version and it is a negative value
     // Flip the bits and add one to turn it into the equivalent positive
     // value and put a '-' in the result.
-    Tmp.flip();
+    Tmp.flipAllBits();
     Tmp++;
     Str.push_back('-');
   }
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 90df262336c5..5e27df6628eb 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Recycler.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
 #include <cstring>
 
 namespace llvm {
@@ -44,6 +44,12 @@ char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
 /// StartNewSlab - Allocate a new slab and move the bump pointers over into
 /// the new slab.  Modifies CurPtr and End.
 void BumpPtrAllocator::StartNewSlab() {
+  // If we allocated a big number of slabs already it's likely that we're going
+  // to allocate more. Increase slab size to reduce mallocs and possibly memory
+  // overhead. The factors are chosen conservatively to avoid overallocation.
+  if (BytesAllocated >= SlabSize * 128)
+    SlabSize *= 2;
+
   MemSlab *NewSlab = Allocator.Allocate(SlabSize);
   NewSlab->NextPtr = CurSlab;
   CurSlab = NewSlab;
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
new file mode 100644
index 000000000000..c7b4bff27948
--- /dev/null
+++ b/lib/Support/Atomic.cpp
@@ -0,0 +1,112 @@
+//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Atomic.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#undef MemoryFence
+#endif
+
+void sys::MemoryFence() {
+#if LLVM_MULTITHREADED==0
+  return;
+#else
+#  if defined(__GNUC__)
+  __sync_synchronize();
+#  elif defined(_MSC_VER)
+  MemoryBarrier();
+#  else
+# error No memory fence implementation for your platform!
+#  endif
+#endif
+}
+
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+                                  sys::cas_flag new_value,
+                                  sys::cas_flag old_value) {
+#if LLVM_MULTITHREADED==0
+  sys::cas_flag result = *ptr;
+  if (result == old_value)
+    *ptr = new_value;
+  return result;
+#elif defined(__GNUC__)
+  return __sync_val_compare_and_swap(ptr, old_value, new_value);
+#elif defined(_MSC_VER)
+  return InterlockedCompareExchange(ptr, new_value, old_value);
+#else
+#  error No compare-and-swap implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  ++(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedIncrement(ptr);
+#else
+#  error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  --(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedDecrement(ptr);
+#else
+#  error No atomic decrement implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+#if LLVM_MULTITHREADED==0
+  *ptr += val;
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, val);
+#elif defined(_MSC_VER)
+  return InterlockedExchangeAdd(ptr, val) + val;
+#else
+#  error No atomic add implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original * val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
+
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original / val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 0c70a402654e..a0e997d349f9 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -1,3 +1,9 @@
+## FIXME: This only requires RTTI because tblgen uses it.  Fix that.
+set(LLVM_REQUIRES_RTTI 1)
+if( MINGW )
+  set(LLVM_REQUIRES_EH 1)
+endif()
+
 add_llvm_library(LLVMSupport
   APFloat.cpp
   APInt.cpp
@@ -16,6 +22,8 @@ add_llvm_library(LLVMSupport
   FoldingSet.cpp
   FormattedStream.cpp
   GraphWriter.cpp
+  IntEqClasses.cpp
+  IntervalMap.cpp
   IsInf.cpp
   IsNAN.cpp
   ManagedStatic.cpp
@@ -35,6 +43,7 @@ add_llvm_library(LLVMSupport
   SystemUtils.cpp
   TargetRegistry.cpp
   Timer.cpp
+  ToolOutputFile.cpp
   Triple.cpp
   Twine.cpp
   raw_os_ostream.cpp
@@ -44,6 +53,51 @@ add_llvm_library(LLVMSupport
   regexec.c
   regfree.c
   regstrlcpy.c
-  )
 
-target_link_libraries (LLVMSupport LLVMSystem)
+# System
+  Atomic.cpp
+  Disassembler.cpp
+  DynamicLibrary.cpp
+  Errno.cpp
+  Host.cpp
+  IncludeFile.cpp
+  Memory.cpp
+  Mutex.cpp
+  Path.cpp
+  PathV2.cpp
+  Process.cpp
+  Program.cpp
+  RWMutex.cpp
+  SearchForAddressOfSpecialSymbol.cpp
+  Signals.cpp
+  system_error.cpp
+  ThreadLocal.cpp
+  Threading.cpp
+  TimeValue.cpp
+  Valgrind.cpp
+  Unix/Host.inc
+  Unix/Memory.inc
+  Unix/Mutex.inc
+  Unix/Path.inc
+  Unix/PathV2.inc
+  Unix/Process.inc
+  Unix/Program.inc
+  Unix/RWMutex.inc
+  Unix/Signals.inc
+  Unix/system_error.inc
+  Unix/ThreadLocal.inc
+  Unix/TimeValue.inc
+  Windows/DynamicLibrary.inc
+  Windows/Host.inc
+  Windows/Memory.inc
+  Windows/Mutex.inc
+  Windows/Path.inc
+  Windows/PathV2.inc
+  Windows/Process.inc
+  Windows/Program.inc
+  Windows/RWMutex.inc
+  Windows/Signals.inc
+  Windows/system_error.inc
+  Windows/ThreadLocal.inc
+  Windows/TimeValue.inc
+  )
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index ae66110ded61..7e744993a7cb 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -22,9 +22,10 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/System/Host.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -179,6 +180,45 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value,
   return I->second;
 }
 
+/// LookupNearestOption - Lookup the closest match to the option specified by
+/// the specified option on the command line.  If there is a value specified
+/// (after an equal sign) return that as well.  This assumes that leading dashes
+/// have already been stripped.
+static Option *LookupNearestOption(StringRef Arg,
+                                   const StringMap<Option*> &OptionsMap,
+                                   const char *&NearestString) {
+  // Reject all dashes.
+  if (Arg.empty()) return 0;
+
+  // Split on any equal sign.
+  StringRef LHS = Arg.split('=').first;
+
+  // Find the closest match.
+  Option *Best = 0;
+  unsigned BestDistance = 0;
+  for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
+         ie = OptionsMap.end(); it != ie; ++it) {
+    Option *O = it->second;
+    SmallVector<const char*, 16> OptionNames;
+    O->getExtraOptionNames(OptionNames);
+    if (O->ArgStr[0])
+      OptionNames.push_back(O->ArgStr);
+
+    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+      StringRef Name = OptionNames[i];
+      unsigned Distance = StringRef(Name).edit_distance(
+        Arg, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+      if (!Best || Distance < BestDistance) {
+        Best = O;
+        NearestString = OptionNames[i];
+        BestDistance = Distance;
+      }
+    }
+  }
+
+  return Best;
+}
+
 /// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
 /// does special handling of cl::CommaSeparated options.
 static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
@@ -463,10 +503,6 @@ static void ExpandResponseFiles(unsigned argc, char** argv,
       const sys::FileStatus *FileStat = respFile.getFileStatus();
       if (FileStat && FileStat->getSize() != 0) {
 
-        // Mmap the response file into memory.
-        OwningPtr<MemoryBuffer>
-          respFilePtr(MemoryBuffer::getFile(respFile.c_str()));
-
         // If we could open the file, parse its contents, otherwise
         // pass the @file option verbatim.
 
@@ -475,7 +511,9 @@ static void ExpandResponseFiles(unsigned argc, char** argv,
         // itself contain additional @file options; any such options will be
         // processed recursively.")
 
-        if (respFilePtr != 0) {
+        // Mmap the response file into memory.
+        OwningPtr<MemoryBuffer> respFilePtr;
+        if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
           ParseCStringVector(newArgv, respFilePtr->getBufferStart());
           continue;
         }
@@ -506,7 +544,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
   }
 
   // Copy the program name into ProgName, making sure not to overflow it.
-  std::string ProgName = sys::Path(argv[0]).getLast();
+  std::string ProgName = sys::path::filename(argv[0]);
   size_t Len = std::min(ProgName.size(), size_t(79));
   memcpy(ProgramName, ProgName.data(), Len);
   ProgramName[Len] = '\0';
@@ -572,6 +610,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
   bool DashDashFound = false;  // Have we read '--'?
   for (int i = 1; i < argc; ++i) {
     Option *Handler = 0;
+    Option *NearestHandler = 0;
+    const char *NearestHandlerString = 0;
     StringRef Value;
     StringRef ArgName = "";
 
@@ -645,12 +685,25 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       if (Handler == 0)
         Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
                                                 ErrorParsing, Opts);
+
+      // Otherwise, look for the closest available option to report to the user
+      // in the upcoming error.
+      if (Handler == 0 && SinkOpts.empty())
+        NearestHandler = LookupNearestOption(ArgName, Opts,
+                                             NearestHandlerString);
     }
 
     if (Handler == 0) {
       if (SinkOpts.empty()) {
         errs() << ProgramName << ": Unknown command line argument '"
              << argv[i] << "'.  Try: '" << argv[0] << " -help'\n";
+
+        if (NearestHandler) {
+          // If we know a near match, report it as well.
+          errs() << ProgramName << ": Did you mean '-"
+                 << NearestHandlerString << "'?\n";
+        }
+
         ErrorParsing = true;
       } else {
         for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
@@ -765,6 +818,15 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     }
   }
 
+  // Now that we know if -debug is specified, we can use it.
+  // Note that if ReadResponseFiles == true, this must be done before the
+  // memory allocated for the expanded command line is free()d below.
+  DEBUG(dbgs() << "Args: ";
+        for (int i = 0; i < argc; ++i)
+          dbgs() << argv[i] << ' ';
+        dbgs() << '\n';
+       );
+
   // Free all of the memory allocated to the map.  Command line options may only
   // be processed once!
   Opts.clear();
@@ -779,12 +841,6 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       free(*i);
   }
 
-  DEBUG(dbgs() << "Args: ";
-        for (int i = 0; i < argc; ++i)
-          dbgs() << argv[i] << ' ';
-        dbgs() << '\n';
-       );
-
   // If we had an error processing our arguments, don't let the program execute
   if (ErrorParsing) exit(1);
 }
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 8ef3785f5331..493f7083dbb3 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -51,6 +51,9 @@ ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
 
 ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
                                             const ConstantRange &CR) {
+  if (CR.isEmptySet())
+    return CR;
+
   uint32_t W = CR.getBitWidth();
   switch (Pred) {
     default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
@@ -60,10 +63,18 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
       if (CR.isSingleElement())
         return ConstantRange(CR.getUpper(), CR.getLower());
       return ConstantRange(W);
-    case ICmpInst::ICMP_ULT:
-      return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax());
-    case ICmpInst::ICMP_SLT:
-      return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax());
+    case ICmpInst::ICMP_ULT: {
+      APInt UMax(CR.getUnsignedMax());
+      if (UMax.isMinValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(APInt::getMinValue(W), UMax);
+    }
+    case ICmpInst::ICMP_SLT: {
+      APInt SMax(CR.getSignedMax());
+      if (SMax.isMinSignedValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(APInt::getSignedMinValue(W), SMax);
+    }
     case ICmpInst::ICMP_ULE: {
       APInt UMax(CR.getUnsignedMax());
       if (UMax.isMaxValue())
@@ -72,15 +83,22 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
     }
     case ICmpInst::ICMP_SLE: {
       APInt SMax(CR.getSignedMax());
-      if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue())
+      if (SMax.isMaxSignedValue())
         return ConstantRange(W);
       return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
     }
-    case ICmpInst::ICMP_UGT:
-      return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W));
-    case ICmpInst::ICMP_SGT:
-      return ConstantRange(CR.getSignedMin() + 1,
-                           APInt::getSignedMinValue(W));
+    case ICmpInst::ICMP_UGT: {
+      APInt UMin(CR.getUnsignedMin());
+      if (UMin.isMaxValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(UMin + 1, APInt::getNullValue(W));
+    }
+    case ICmpInst::ICMP_SGT: {
+      APInt SMin(CR.getSignedMin());
+      if (SMin.isMaxSignedValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+    }
     case ICmpInst::ICMP_UGE: {
       APInt UMin(CR.getUnsignedMin());
       if (UMin.isMinValue())
@@ -115,6 +133,14 @@ bool ConstantRange::isWrappedSet() const {
   return Lower.ugt(Upper);
 }
 
+/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+/// its bitwidth, for example: i8 [120, 140).
+///
+bool ConstantRange::isSignWrappedSet() const {
+  return contains(APInt::getSignedMaxValue(getBitWidth())) &&
+         contains(APInt::getSignedMinValue(getBitWidth()));
+}
+
 /// getSetSize - Return the number of elements in this set.
 ///
 APInt ConstantRange::getSetSize() const {
@@ -408,15 +434,15 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
 /// correspond to the possible range of values as if the source range had been
 /// zero extended.
 ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
   unsigned SrcTySize = getBitWidth();
   assert(SrcTySize < DstTySize && "Not a value extension");
-  if (isFullSet())
-    // Change a source full set into [0, 1 << 8*numbytes)
+  if (isFullSet() || isWrappedSet())
+    // Change into [0, 1 << src bit width)
     return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize));
 
-  APInt L = Lower; L.zext(DstTySize);
-  APInt U = Upper; U.zext(DstTySize);
-  return ConstantRange(L, U);
+  return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
 }
 
 /// signExtend - Return a new range in the specified integer type, which must
@@ -424,16 +450,16 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
 /// correspond to the possible range of values as if the source range had been
 /// sign extended.
 ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
   unsigned SrcTySize = getBitWidth();
   assert(SrcTySize < DstTySize && "Not a value extension");
-  if (isFullSet()) {
+  if (isFullSet() || isSignWrappedSet()) {
     return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
                          APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
   }
 
-  APInt L = Lower; L.sext(DstTySize);
-  APInt U = Upper; U.sext(DstTySize);
-  return ConstantRange(L, U);
+  return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
 }
 
 /// truncate - Return a new range in the specified integer type, which must be
@@ -447,9 +473,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   if (isFullSet() || getSetSize().ugt(Size))
     return ConstantRange(DstTySize, /*isFullSet=*/true);
 
-  APInt L = Lower; L.trunc(DstTySize);
-  APInt U = Upper; U.trunc(DstTySize);
-  return ConstantRange(L, U);
+  return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
 }
 
 /// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
@@ -595,6 +619,32 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
   return ConstantRange(Lower, Upper);
 }
 
+ConstantRange
+ConstantRange::binaryAnd(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  // TODO: replace this with something less conservative
+
+  APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
+  if (umin.isAllOnesValue())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+}
+
+ConstantRange
+ConstantRange::binaryOr(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  // TODO: replace this with something less conservative
+
+  APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+  if (umax.isMinValue())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+}
+
 ConstantRange
 ConstantRange::shl(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 49258ede83c1..bf8ca3f844b4 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -10,8 +10,8 @@
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/ThreadLocal.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ThreadLocal.h"
 #include <setjmp.h>
 #include <cstdio>
 using namespace llvm;
@@ -128,6 +128,9 @@ static void CrashRecoverySignalHandler(int Signal) {
     // This call of Disable isn't thread safe, but it doesn't actually matter.
     CrashRecoveryContext::Disable();
     raise(Signal);
+
+    // The signal will be thrown once the signal mask is restored.
+    return;
   }
 
   // Unblock the signal we received.
@@ -202,3 +205,26 @@ const std::string &CrashRecoveryContext::getBacktrace() const {
   assert(CRC->Failed && "No crash was detected!");
   return CRC->Backtrace;
 }
+
+//
+
+namespace {
+struct RunSafelyOnThreadInfo {
+  void (*UserFn)(void*);
+  void *UserData;
+  CrashRecoveryContext *CRC;
+  bool Result;
+};
+}
+
+static void RunSafelyOnThread_Dispatch(void *UserData) {
+  RunSafelyOnThreadInfo *Info =
+    reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+  Info->Result = Info->CRC->RunSafely(Info->UserFn, Info->UserData);
+}
+bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+                                             unsigned RequestedStackSize) {
+  RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+  llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+  return Info.Result;
+}
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 7f48f8aae717..9fdb12ecfdcb 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -26,7 +26,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/circular_raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
 
 using namespace llvm;
 
diff --git a/lib/Support/Disassembler.cpp b/lib/Support/Disassembler.cpp
new file mode 100644
index 000000000000..6362aff43a9d
--- /dev/null
+++ b/lib/Support/Disassembler.cpp
@@ -0,0 +1,75 @@
+//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Disassembler.h"
+
+#include <cassert>
+#include <iomanip>
+#include <string>
+#include <sstream>
+
+#if USE_UDIS86
+#include <udis86.h>
+#endif
+
+using namespace llvm;
+
+bool llvm::sys::hasDisassembler()
+{
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+  // We have option to enable udis86 library.
+# if USE_UDIS86
+  return true;
+#else
+  return false;
+#endif
+#else
+  return false;
+#endif
+}
+
+std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
+                                         uint64_t pc) {
+  std::stringstream res;
+
+#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
+  && USE_UDIS86
+  unsigned bits;
+# if defined(__i386__)
+  bits = 32;
+# else
+  bits = 64;
+# endif
+
+  ud_t ud_obj;
+
+  ud_init(&ud_obj);
+  ud_set_input_buffer(&ud_obj, start, length);
+  ud_set_mode(&ud_obj, bits);
+  ud_set_pc(&ud_obj, pc);
+  ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+  res << std::setbase(16)
+      << std::setw(bits/4);
+
+  while (ud_disassemble(&ud_obj)) {
+    res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
+  }
+#else
+  res << "No disassembler available. See configure help for options.\n";
+#endif
+
+  return res.str();
+}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 96ce9d395b38..9799ef54792b 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -78,6 +78,10 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
   case DW_TAG_shared_type:               return "DW_TAG_shared_type";
   case DW_TAG_lo_user:                   return "DW_TAG_lo_user";
   case DW_TAG_hi_user:                   return "DW_TAG_hi_user";
+  case DW_TAG_auto_variable:             return "DW_TAG_auto_variable";
+  case DW_TAG_arg_variable:              return "DW_TAG_arg_variable";
+  case DW_TAG_return_variable:           return "DW_TAG_return_variable";
+  case DW_TAG_vector_type:               return "DW_TAG_vector_type";
   }
   return 0;
 }
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
new file mode 100644
index 000000000000..455c3801cc68
--- /dev/null
+++ b/lib/Support/DynamicLibrary.cpp
@@ -0,0 +1,170 @@
+//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system DynamicLibrary concept.
+//
+// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
+// not thread safe!
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <vector>
+
+// Collection of symbol name/value pairs to be searched prior to any libraries.
+static std::map<std::string, void*> *ExplicitSymbols = 0;
+
+namespace {
+
+struct ExplicitSymbolsDeleter {
+  ~ExplicitSymbolsDeleter() {
+    if (ExplicitSymbols)
+      delete ExplicitSymbols;
+  }
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
+
+void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
+                                          void *symbolValue) {
+  if (ExplicitSymbols == 0)
+    ExplicitSymbols = new std::map<std::string, void*>();
+  (*ExplicitSymbols)[symbolName] = symbolValue;
+}
+
+#ifdef LLVM_ON_WIN32
+
+#include "Windows/DynamicLibrary.inc"
+
+#else
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<void *> *OpenedHandles = 0;
+
+
+static SmartMutex<true>& getMutex() {
+  static SmartMutex<true> HandlesMutex;
+  return HandlesMutex;
+}
+
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
+  if (H == 0) {
+    if (ErrMsg) *ErrMsg = dlerror();
+    return true;
+  }
+#ifdef __CYGWIN__
+  // Cygwin searches symbols only in the main
+  // with the handle of dlopen(NULL, RTLD_GLOBAL).
+  if (Filename == NULL)
+    H = RTLD_DEFAULT;
+#endif
+  SmartScopedLock<true> Lock(getMutex());
+  if (OpenedHandles == 0)
+    OpenedHandles = new std::vector<void *>();
+  OpenedHandles->push_back(H);
+  return false;
+}
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
+  return true;
+}
+#endif
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName);
+}
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+  // First check symbols added via AddSymbol().
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+
+    if (I != E)
+      return I->second;
+  }
+
+#if HAVE_DLFCN_H
+  // Now search the libraries.
+  SmartScopedLock<true> Lock(getMutex());
+  if (OpenedHandles) {
+    for (std::vector<void *>::iterator I = OpenedHandles->begin(),
+         E = OpenedHandles->end(); I != E; ++I) {
+      //lt_ptr ptr = lt_dlsym(*I, symbolName);
+      void *ptr = dlsym(*I, symbolName);
+      if (ptr) {
+        return ptr;
+      }
+    }
+  }
+#endif
+
+  if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
+    return Result;
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+   if (!strcmp(symbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__)
+  {
+    EXPLICIT_SYMBOL(stderr);
+    EXPLICIT_SYMBOL(stdout);
+    EXPLICIT_SYMBOL(stdin);
+  }
+#else
+  // For everything else, we want to check to make sure the symbol isn't defined
+  // as a macro before using EXPLICIT_SYMBOL.
+  {
+#ifndef stdin
+    EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+    EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+    EXPLICIT_SYMBOL(stderr);
+#endif
+  }
+#endif
+#undef EXPLICIT_SYMBOL
+
+  return 0;
+}
+
+#endif // LLVM_ON_WIN32
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
new file mode 100644
index 000000000000..18c658173a7a
--- /dev/null
+++ b/lib/Support/Errno.cpp
@@ -0,0 +1,74 @@
+//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the errno wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+
+#if HAVE_STRING_H
+#include <string.h>
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+#if HAVE_ERRNO_H
+std::string StrError() {
+  return StrError(errno);
+}
+#endif  // HAVE_ERRNO_H
+
+std::string StrError(int errnum) {
+  const int MaxErrStrLen = 2000;
+  char buffer[MaxErrStrLen];
+  buffer[0] = '\0';
+  char* str = buffer;
+#ifdef HAVE_STRERROR_R
+  // strerror_r is thread-safe.
+  if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+    // glibc defines its own incompatible version of strerror_r
+    // which may not use the buffer supplied.
+    str = strerror_r(errnum,buffer,MaxErrStrLen-1);
+# else
+    strerror_r(errnum,buffer,MaxErrStrLen-1);
+# endif
+#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
+    if (errnum)
+      strerror_s(buffer, errnum);
+#elif defined(HAVE_STRERROR)
+  // Copy the thread un-safe result of strerror into
+  // the buffer as fast as possible to minimize impact
+  // of collision of strerror in multiple threads.
+  if (errnum)
+    strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
+  buffer[MaxErrStrLen-1] = '\0';
+#else
+  // Strange that this system doesn't even have strerror
+  // but, oh well, just use a generic message
+  sprintf(buffer, "Error #%d", errnum);
+#endif
+  return str;
+}
+
+}  // namespace sys
+}  // namespace llvm
+
+#endif  // HAVE_STRING_H
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 0b7af3e5905b..3579546d757d 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -16,8 +16,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Threading.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/config.h"
 #include <cassert>
@@ -58,6 +58,10 @@ void llvm::report_fatal_error(const std::string &Reason) {
   report_fatal_error(Twine(Reason));
 }
 
+void llvm::report_fatal_error(StringRef Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
 void llvm::report_fatal_error(const Twine &Reason) {
   if (ErrorHandler) {
     ErrorHandler(ErrorHandlerUserData, Reason.str());
@@ -69,7 +73,8 @@ void llvm::report_fatal_error(const Twine &Reason) {
     raw_svector_ostream OS(Buffer);
     OS << "LLVM ERROR: " << Reason << "\n";
     StringRef MessageStr = OS.str();
-    (void)::write(2, MessageStr.data(), MessageStr.size());
+    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
+    (void)written; // If something went wrong, we deliberately just give up.
   }
 
   // If we reached here, we are failing ungracefully. Run the interrupt handlers
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 1bde2fe8a871..5dbabee7a7ed 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -15,7 +15,8 @@
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include <cstdlib>
@@ -108,17 +109,17 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P,
       SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
       // Strange exponential notation!
       StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
-      
+
       V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
       F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
     }
-    
+
     if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
       // Copy string into tmp buffer to replace the 'D' with an 'e'.
       SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
       // Strange exponential notation!
       StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
-      
+
       V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
       F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
     }
@@ -199,11 +200,20 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
 
   // Now its safe to mmap the files into memory becasue both files
   // have a non-zero size.
-  OwningPtr<MemoryBuffer> F1(MemoryBuffer::getFile(FileA.c_str(), Error));
-  OwningPtr<MemoryBuffer> F2(MemoryBuffer::getFile(FileB.c_str(), Error));
-  if (F1 == 0 || F2 == 0)
+  error_code ec;
+  OwningPtr<MemoryBuffer> F1;
+  if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
+    if (Error)
+      *Error = ec.message();
     return 2;
-  
+  }
+  OwningPtr<MemoryBuffer> F2;
+  if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) {
+    if (Error)
+      *Error = ec.message();
+    return 2;
+  }
+
   // Okay, now that we opened the files, scan them for the first difference.
   const char *File1Start = F1->getBufferStart();
   const char *File2Start = F2->getBufferStart();
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 29b595220887..a4f80a90d6d0 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
 #include <cassert>
 #include <cstring>
 using namespace llvm;
@@ -110,18 +111,32 @@ void FoldingSetNodeID::AddString(StringRef String) {
     Pos = (Units + 1) * 4;
   } else {
     // Otherwise do it the hard way.
-    for (Pos += 4; Pos <= Size; Pos += 4) {
-      unsigned V = ((unsigned char)String[Pos - 4] << 24) |
-                   ((unsigned char)String[Pos - 3] << 16) |
-                   ((unsigned char)String[Pos - 2] << 8) |
-                    (unsigned char)String[Pos - 1];
-      Bits.push_back(V);
+    // To be compatible with above bulk transfer, we need to take endianness
+    // into account.
+    if (sys::isBigEndianHost()) {
+      for (Pos += 4; Pos <= Size; Pos += 4) {
+        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+                     ((unsigned char)String[Pos - 3] << 16) |
+                     ((unsigned char)String[Pos - 2] << 8) |
+                      (unsigned char)String[Pos - 1];
+        Bits.push_back(V);
+      }
+    } else {
+      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+      for (Pos += 4; Pos <= Size; Pos += 4) {
+        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
+                     ((unsigned char)String[Pos - 2] << 16) |
+                     ((unsigned char)String[Pos - 3] << 8) |
+                      (unsigned char)String[Pos - 4];
+        Bits.push_back(V);
+      }
     }
   }
   
   // With the leftover bits.
   unsigned V = 0;
-  // Pos will have overshot size by 4 - #bytes left over. 
+  // Pos will have overshot size by 4 - #bytes left over.
+  // No need to take endianness into account here - this is always executed.
   switch (Pos - Size) {
   case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
   case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index c72b5a1751b4..231ae48759e2 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include <algorithm>
 
 using namespace llvm;
 
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index fdd6285a8c5e..0dba28a2530c 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/GraphWriter.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
@@ -63,11 +63,37 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(0);
   
   errs() << "Running 'Graphviz' program... ";
-  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg))
-    errs() << "Error viewing graph " << Filename.str() << ": " << ErrMsg
-           << "\n";
-  else
-    Filename.eraseFromDisk();
+  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  Filename.eraseFromDisk();
+  errs() << " done. \n";
+
+#elif HAVE_XDOT_PY
+  std::vector<const char*> args;
+  args.push_back(LLVM_PATH_XDOT_PY);
+  args.push_back(Filename.c_str());
+
+  switch (program) {
+  case GraphProgram::DOT:   args.push_back("-f"); args.push_back("dot"); break;
+  case GraphProgram::FDP:   args.push_back("-f"); args.push_back("fdp"); break;
+  case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
+  case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
+  case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+  default: errs() << "Unknown graph layout name; using default.\n";
+  }
+  
+  args.push_back(0);
+
+  errs() << "Running 'xdot.py' program... ";
+  if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
+                                   &args[0],0,0,0,0,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  Filename.eraseFromDisk();
+  errs() << " done. \n";
 
 #elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
                    HAVE_TWOPI || HAVE_CIRCO))
@@ -128,8 +154,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   errs() << "Running '" << prog.str() << "' program... ";
 
   if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
-     errs() << "Error viewing graph " << Filename.str() << ": '"
-            << ErrMsg << "\n";
+    errs() << "Error: " << ErrMsg << "\n";
     return;
   }
   errs() << " done. \n";
@@ -144,7 +169,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   ErrMsg.clear();
   if (wait) {
      if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
-        errs() << "Error viewing graph: " << ErrMsg << "\n";
+        errs() << "Error: " << ErrMsg << "\n";
      Filename.eraseFromDisk();
      PSFilename.eraseFromDisk();
   }
@@ -163,8 +188,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   
   errs() << "Running 'dotty' program... ";
   if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
-     errs() << "Error viewing graph " << Filename.str() << ": "
-            << ErrMsg << "\n";
+     errs() << "Error: " << ErrMsg << "\n";
   } else {
 // Dotty spawns another app and doesn't wait until it returns
 #if defined (__MINGW32__) || defined (_WINDOWS)
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
new file mode 100644
index 000000000000..4dacf9691d6e
--- /dev/null
+++ b/lib/Support/Host.cpp
@@ -0,0 +1,307 @@
+//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Host concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <string.h>
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Host.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Host.inc"
+#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//
+//  Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
+                                 unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+std::string sys::getHostCPUName() {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+    return "generic";
+  unsigned Family = 0;
+  unsigned Model  = 0;
+  DetectX86FamilyModel(EAX, Family, Model);
+
+  bool HasSSE3 = (ECX & 0x1);
+  GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  bool Em64T = (EDX >> 29) & 0x1;
+
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+  if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+    switch (Family) {
+    case 3:
+      return "i386";
+    case 4:
+      switch (Model) {
+      case 0: // Intel486 DX processors
+      case 1: // Intel486 DX processors
+      case 2: // Intel486 SX processors
+      case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+              // IntelDX2 processors
+      case 4: // Intel486 SL processor
+      case 5: // IntelSX2 processors
+      case 7: // Write-Back Enhanced IntelDX2 processors
+      case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
+      default: return "i486";
+      }
+    case 5:
+      switch (Model) {
+      case  1: // Pentium OverDrive processor for Pentium processor (60, 66),
+               // Pentium processors (60, 66)
+      case  2: // Pentium OverDrive processor for Pentium processor (75, 90,
+               // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+               // 150, 166, 200)
+      case  3: // Pentium OverDrive processors for Intel486 processor-based
+               // systems
+        return "pentium";
+
+      case  4: // Pentium OverDrive processor with MMX technology for Pentium
+               // processor (75, 90, 100, 120, 133), Pentium processor with
+               // MMX technology (166, 200)
+        return "pentium-mmx";
+
+      default: return "pentium";
+      }
+    case 6:
+      switch (Model) {
+      case  1: // Pentium Pro processor
+        return "pentiumpro";
+
+      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
+               // model 03
+      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
+               // model 05, and Intel Celeron processor, model 05
+      case  6: // Celeron processor, model 06
+        return "pentium2";
+
+      case  7: // Pentium III processor, model 07, and Pentium III Xeon
+               // processor, model 07
+      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
+               // model 08, and Celeron processor, model 08
+      case 10: // Pentium III Xeon processor, model 0Ah
+      case 11: // Pentium III processor, model 0Bh
+        return "pentium3";
+
+      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+               // 0Dh. All processors are manufactured using the 90 nm process.
+        return "pentium-m";
+
+      case 14: // Intel Core Duo processor, Intel Core Solo processor, model
+               // 0Eh. All processors are manufactured using the 65 nm process.
+        return "yonah";
+
+      case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+               // mobile processor, Intel Core 2 Extreme processor, Intel
+               // Pentium Dual-Core processor, Intel Xeon processor, model
+               // 0Fh. All processors are manufactured using the 65 nm process.
+      case 22: // Intel Celeron processor model 16h. All processors are
+               // manufactured using the 65 nm process
+        return "core2";
+
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
+        return "i686"; // FIXME: ???
+
+      case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+               // 17h. All processors are manufactured using the 45 nm process.
+               //
+               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+        return "penryn";
+
+      case 26: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 45 nm process.
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+      case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
+               // As found in a Summer 2010 model iMac.
+      case 37: // Intel Core i7, laptop version.
+        return "corei7";
+
+      case 28: // Intel Atom processor. All processors are manufactured using
+               // the 45 nm process
+        return "atom";
+
+      default: return "i686";
+      }
+    case 15: {
+      switch (Model) {
+      case  0: // Pentium 4 processor, Intel Xeon processor. All processors are
+               // model 00h and manufactured using the 0.18 micron process.
+      case  1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+               // processor MP, and Intel Celeron processor. All processors are
+               // model 01h and manufactured using the 0.18 micron process.
+      case  2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
+               // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+               // processor, and Mobile Intel Celeron processor. All processors
+               // are model 02h and manufactured using the 0.13 micron process.
+        return (Em64T) ? "x86-64" : "pentium4";
+
+      case  3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+               // processor. All processors are model 03h and manufactured using
+               // the 90 nm process.
+      case  4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+               // Pentium D processor, Intel Xeon processor, Intel Xeon
+               // processor MP, Intel Celeron D processor. All processors are
+               // model 04h and manufactured using the 90 nm process.
+      case  6: // Pentium 4 processor, Pentium D processor, Pentium processor
+               // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+               // MP, Intel Celeron D processor. All processors are model 06h
+               // and manufactured using the 65 nm process.
+        return (Em64T) ? "nocona" : "prescott";
+
+      default:
+        return (Em64T) ? "x86-64" : "pentium4";
+      }
+    }
+
+    default:
+      return "generic";
+    }
+  } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
+    // appears to be no way to generate the wide variety of AMD-specific targets
+    // from the information returned from CPUID.
+    switch (Family) {
+      case 4:
+        return "i486";
+      case 5:
+        switch (Model) {
+        case 6:
+        case 7:  return "k6";
+        case 8:  return "k6-2";
+        case 9:
+        case 13: return "k6-3";
+        default: return "pentium";
+        }
+      case 6:
+        switch (Model) {
+        case 4:  return "athlon-tbird";
+        case 6:
+        case 7:
+        case 8:  return "athlon-mp";
+        case 10: return "athlon-xp";
+        default: return "athlon";
+        }
+      case 15:
+        if (HasSSE3)
+          return "k8-sse3";
+        switch (Model) {
+        case 1:  return "opteron";
+        case 5:  return "athlon-fx"; // also opteron
+        default: return "athlon64";
+        }
+      case 16:
+        return "amdfam10";
+    default:
+      return "generic";
+    }
+  }
+  return "generic";
+}
+#else
+std::string sys::getHostCPUName() {
+  return "generic";
+}
+#endif
+
+bool sys::getHostCPUFeatures(StringMap<bool> &Features){
+  return false;
+}
diff --git a/lib/Support/IncludeFile.cpp b/lib/Support/IncludeFile.cpp
new file mode 100644
index 000000000000..5da88261ce53
--- /dev/null
+++ b/lib/Support/IncludeFile.cpp
@@ -0,0 +1,20 @@
+//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IncludeFile constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IncludeFile.h"
+
+using namespace llvm;
+
+// This constructor is used to ensure linking of other modules. See the
+// llvm/Support/IncludeFile.h header for details.
+IncludeFile::IncludeFile(const void*) {}
diff --git a/lib/Support/IntEqClasses.cpp b/lib/Support/IntEqClasses.cpp
new file mode 100644
index 000000000000..11344956e4c9
--- /dev/null
+++ b/lib/Support/IntEqClasses.cpp
@@ -0,0 +1,70 @@
+//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+
+using namespace llvm;
+
+void IntEqClasses::grow(unsigned N) {
+  assert(NumClasses == 0 && "grow() called after compress().");
+  EC.reserve(N);
+  while (EC.size() < N)
+    EC.push_back(EC.size());
+}
+
+void IntEqClasses::join(unsigned a, unsigned b) {
+  assert(NumClasses == 0 && "join() called after compress().");
+  unsigned eca = EC[a];
+  unsigned ecb = EC[b];
+  // Update pointers while searching for the leaders, compressing the paths
+  // incrementally. The larger leader will eventually be updated, joining the
+  // classes.
+  while (eca != ecb)
+    if (eca < ecb)
+      EC[b] = eca, b = ecb, ecb = EC[b];
+    else
+      EC[a] = ecb, a = eca, eca = EC[a];
+}
+
+unsigned IntEqClasses::findLeader(unsigned a) const {
+  assert(NumClasses == 0 && "findLeader() called after compress().");
+  while (a != EC[a])
+    a = EC[a];
+  return a;
+}
+
+void IntEqClasses::compress() {
+  if (NumClasses)
+    return;
+  for (unsigned i = 0, e = EC.size(); i != e; ++i)
+    EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]];
+}
+
+void IntEqClasses::uncompress() {
+  if (!NumClasses)
+    return;
+  SmallVector<unsigned, 8> Leader;
+  for (unsigned i = 0, e = EC.size(); i != e; ++i)
+    if (EC[i] < Leader.size())
+      EC[i] = Leader[EC[i]];
+    else
+      Leader.push_back(EC[i] = i);
+  NumClasses = 0;
+}
diff --git a/lib/Support/IntervalMap.cpp b/lib/Support/IntervalMap.cpp
new file mode 100644
index 000000000000..4dfcc404ca42
--- /dev/null
+++ b/lib/Support/IntervalMap.cpp
@@ -0,0 +1,161 @@
+//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the few non-templated functions in IntervalMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+
+namespace llvm {
+namespace IntervalMapImpl {
+
+void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) {
+  assert(!path.empty() && "Can't replace missing root");
+  path.front() = Entry(Root, Size, Offsets.first);
+  path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second));
+}
+
+NodeRef Path::getLeftSibling(unsigned Level) const {
+  // The root has no siblings.
+  if (Level == 0)
+    return NodeRef();
+
+  // Go up the tree until we can go left.
+  unsigned l = Level - 1;
+  while (l && path[l].offset == 0)
+    --l;
+
+  // We can't go left.
+  if (path[l].offset == 0)
+    return NodeRef();
+
+  // NR is the subtree containing our left sibling.
+  NodeRef NR = path[l].subtree(path[l].offset - 1);
+
+  // Keep right all the way down.
+  for (++l; l != Level; ++l)
+    NR = NR.subtree(NR.size() - 1);
+  return NR;
+}
+
+void Path::moveLeft(unsigned Level) {
+  assert(Level != 0 && "Cannot move the root node");
+
+  // Go up the tree until we can go left.
+  unsigned l = 0;
+  if (valid()) {
+    l = Level - 1;
+    while (path[l].offset == 0) {
+      assert(l != 0 && "Cannot move beyond begin()");
+      --l;
+    }
+  } else if (height() < Level)
+    // end() may have created a height=0 path.
+    path.resize(Level + 1, Entry(0, 0, 0));
+
+  // NR is the subtree containing our left sibling.
+  --path[l].offset;
+  NodeRef NR = subtree(l);
+
+  // Get the rightmost node in the subtree.
+  for (++l; l != Level; ++l) {
+    path[l] = Entry(NR, NR.size() - 1);
+    NR = NR.subtree(NR.size() - 1);
+  }
+  path[l] = Entry(NR, NR.size() - 1);
+}
+
+NodeRef Path::getRightSibling(unsigned Level) const {
+  // The root has no siblings.
+  if (Level == 0)
+    return NodeRef();
+
+  // Go up the tree until we can go right.
+  unsigned l = Level - 1;
+  while (l && atLastEntry(l))
+    --l;
+
+  // We can't go right.
+  if (atLastEntry(l))
+    return NodeRef();
+
+  // NR is the subtree containing our right sibling.
+  NodeRef NR = path[l].subtree(path[l].offset + 1);
+
+  // Keep left all the way down.
+  for (++l; l != Level; ++l)
+    NR = NR.subtree(0);
+  return NR;
+}
+
+void Path::moveRight(unsigned Level) {
+  assert(Level != 0 && "Cannot move the root node");
+
+  // Go up the tree until we can go right.
+  unsigned l = Level - 1;
+  while (l && atLastEntry(l))
+    --l;
+
+  // NR is the subtree containing our right sibling. If we hit end(), we have
+  // offset(0) == node(0).size().
+  if (++path[l].offset == path[l].size)
+    return;
+  NodeRef NR = subtree(l);
+
+  for (++l; l != Level; ++l) {
+    path[l] = Entry(NR, 0);
+    NR = NR.subtree(0);
+  }
+  path[l] = Entry(NR, 0);
+}
+
+
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+                   const unsigned *CurSize, unsigned NewSize[],
+                   unsigned Position, bool Grow) {
+  assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
+  assert(Position <= Elements && "Invalid position");
+  if (!Nodes)
+    return IdxPair();
+
+  // Trivial algorithm: left-leaning even distribution.
+  const unsigned PerNode = (Elements + Grow) / Nodes;
+  const unsigned Extra = (Elements + Grow) % Nodes;
+  IdxPair PosPair = IdxPair(Nodes, 0);
+  unsigned Sum = 0;
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Sum += NewSize[n] = PerNode + (n < Extra);
+    if (PosPair.first == Nodes && Sum > Position)
+      PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
+  }
+  assert(Sum == Elements + Grow && "Bad distribution sum");
+
+  // Subtract the Grow element that was added.
+  if (Grow) {
+    assert(PosPair.first < Nodes && "Bad algebra");
+    assert(NewSize[PosPair.first] && "Too few elements to need Grow");
+    --NewSize[PosPair.first];
+  }
+
+#ifndef NDEBUG
+  Sum = 0;
+  for (unsigned n = 0; n != Nodes; ++n) {
+    assert(NewSize[n] <= Capacity && "Overallocated node");
+    Sum += NewSize[n];
+  }
+  assert(Sum == Elements && "Bad distribution sum");
+#endif
+
+  return PosPair;
+}
+
+} // namespace IntervalMapImpl
+} // namespace llvm
+
diff --git a/lib/Support/Makefile b/lib/Support/Makefile
index 48c21f4fd9e0..d68e500ca5f4 100644
--- a/lib/Support/Makefile
+++ b/lib/Support/Makefile
@@ -14,4 +14,9 @@ BUILD_ARCHIVE = 1
 ## FIXME: This only requires RTTI because tblgen uses it.  Fix that.
 REQUIRES_RTTI = 1
 
+EXTRA_DIST = Unix Win32 README.txt
+
 include $(LEVEL)/Makefile.common
+
+CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
+CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 4e655a0f9eec..c767c15e71c9 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -13,7 +13,7 @@
 
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Config/config.h"
-#include "llvm/System/Atomic.h"
+#include "llvm/Support/Atomic.h"
 #include <cassert>
 using namespace llvm;
 
diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp
new file mode 100644
index 000000000000..a9689b2c39f2
--- /dev/null
+++ b/lib/Support/Memory.cpp
@@ -0,0 +1,74 @@
+//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for allocating memory and dealing
+// with memory mapped files
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Memory.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Memory.inc"
+#endif
+
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
+                                                   size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+     defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+  sys_icache_invalidate(Addr, Len);
+#  endif
+
+#else
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+  const size_t LineSize = 32;
+
+  const intptr_t Mask = ~(LineSize - 1);
+  const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+  const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("dcbf 0, %0" : : "r"(Line));
+  asm volatile("sync");
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("icbi 0, %0" : : "r"(Line));
+  asm volatile("isync");
+#  elif defined(__arm__) && defined(__GNUC__)
+  // FIXME: Can we safely always call this for __GNUC__ everywhere?
+  char *Start = (char*) Addr;
+  char *End = Start + Len;
+  __clear_cache(Start, End);
+#  endif
+
+#endif  // end apple
+
+  ValgrindDiscardTranslations(Addr, Len);
+}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 542162d513b9..a0c650d6820b 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -15,14 +15,16 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/System/Errno.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
 #include <cassert>
 #include <cstdio>
 #include <cstring>
 #include <cerrno>
+#include <new>
 #include <sys/types.h>
 #include <sys/stat.h>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -34,6 +36,8 @@
 #include <fcntl.h>
 using namespace llvm;
 
+namespace { const llvm::error_code success; }
+
 //===----------------------------------------------------------------------===//
 // MemoryBuffer implementation itself.
 //===----------------------------------------------------------------------===//
@@ -142,22 +146,20 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
 /// if the Filename is "-".  If an error occurs, this returns null and fills
 /// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
 /// returns an empty buffer.
-MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename,
-                                           std::string *ErrStr,
-                                           int64_t FileSize,
-                                           struct stat *FileInfo) {
+error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
+                                        OwningPtr<MemoryBuffer> &result,
+                                        int64_t FileSize) {
   if (Filename == "-")
-    return getSTDIN(ErrStr);
-  return getFile(Filename, ErrStr, FileSize, FileInfo);
+    return getSTDIN(result);
+  return getFile(Filename, result, FileSize);
 }
 
-MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename,
-                                           std::string *ErrStr,
-                                           int64_t FileSize,
-                                           struct stat *FileInfo) {
+error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
+                                        OwningPtr<MemoryBuffer> &result,
+                                        int64_t FileSize) {
   if (strcmp(Filename, "-") == 0)
-    return getSTDIN(ErrStr);
-  return getFile(Filename, ErrStr, FileSize, FileInfo);
+    return getSTDIN(result);
+  return getFile(Filename, result, FileSize);
 }
 
 //===----------------------------------------------------------------------===//
@@ -177,50 +179,47 @@ public:
     sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
   }
 };
-
-/// FileCloser - RAII object to make sure an FD gets closed properly.
-class FileCloser {
-  int FD;
-public:
-  explicit FileCloser(int FD) : FD(FD) {}
-  ~FileCloser() { ::close(FD); }
-};
 }
 
-MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
-                                    int64_t FileSize, struct stat *FileInfo) {
+error_code MemoryBuffer::getFile(StringRef Filename,
+                                 OwningPtr<MemoryBuffer> &result,
+                                 int64_t FileSize) {
+  // Ensure the path is null terminated.
   SmallString<256> PathBuf(Filename.begin(), Filename.end());
-  return MemoryBuffer::getFile(PathBuf.c_str(), ErrStr, FileSize, FileInfo);
+  return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize);
 }
 
-MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
-                                    int64_t FileSize, struct stat *FileInfo) {
+error_code MemoryBuffer::getFile(const char *Filename,
+                                 OwningPtr<MemoryBuffer> &result,
+                                 int64_t FileSize) {
   int OpenFlags = O_RDONLY;
 #ifdef O_BINARY
   OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
 #endif
   int FD = ::open(Filename, OpenFlags);
   if (FD == -1) {
-    if (ErrStr) *ErrStr = sys::StrError();
-    return 0;
+    return error_code(errno, posix_category());
   }
-  FileCloser FC(FD); // Close FD on return.
-  
+  error_code ret = getOpenFile(FD, Filename, result, FileSize);
+  close(FD);
+  return ret;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+                                     OwningPtr<MemoryBuffer> &result,
+                                     int64_t FileSize) {
   // If we don't know the file size, use fstat to find out.  fstat on an open
   // file descriptor is cheaper than stat on a random path.
-  if (FileSize == -1 || FileInfo) {
-    struct stat MyFileInfo;
-    struct stat *FileInfoPtr = FileInfo? FileInfo : &MyFileInfo;
-    
+  if (FileSize == -1) {
+    struct stat FileInfo;
     // TODO: This should use fstat64 when available.
-    if (fstat(FD, FileInfoPtr) == -1) {
-      if (ErrStr) *ErrStr = sys::StrError();
-      return 0;
+    if (fstat(FD, &FileInfo) == -1) {
+      return error_code(errno, posix_category());
     }
-    FileSize = FileInfoPtr->st_size;
+    FileSize = FileInfo.st_size;
   }
-  
-  
+
+
   // If the file is large, try to use mmap to read it in.  We don't use mmap
   // for small files, because this can severely fragment our address space. Also
   // don't try to map files that are exactly a multiple of the system page size,
@@ -230,16 +229,17 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
   if (FileSize >= 4096*4 &&
       (FileSize & (sys::Process::GetPageSize()-1)) != 0) {
     if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
-      return GetNamedBuffer<MemoryBufferMMapFile>(StringRef(Pages, FileSize),
-                                                  Filename);
+      result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
+        StringRef(Pages, FileSize), Filename));
+      return success;
     }
   }
 
   MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename);
   if (!Buf) {
-    // Failed to create a buffer.
-    if (ErrStr) *ErrStr = "could not allocate buffer";
-    return 0;
+    // Failed to create a buffer. The only way it can fail is if
+    // new(std::nothrow) returns 0.
+    return make_error_code(errc::not_enough_memory);
   }
 
   OwningPtr<MemoryBuffer> SB(Buf);
@@ -252,26 +252,27 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
       if (errno == EINTR)
         continue;
       // Error while reading.
-      if (ErrStr) *ErrStr = sys::StrError();
-      return 0;
+      return error_code(errno, posix_category());
     } else if (NumRead == 0) {
       // We hit EOF early, truncate and terminate buffer.
       Buf->BufferEnd = BufPtr;
       *BufPtr = 0;
-      return SB.take();
+      result.swap(SB);
+      return success;
     }
     BytesLeft -= NumRead;
     BufPtr += NumRead;
   }
 
-  return SB.take();
+  result.swap(SB);
+  return success;
 }
 
 //===----------------------------------------------------------------------===//
 // MemoryBuffer::getSTDIN implementation.
 //===----------------------------------------------------------------------===//
 
-MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) {
+error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
   // Read in all of the data from stdin, we cannot mmap stdin.
   //
   // FIXME: That isn't necessarily true, we should try to mmap stdin and
@@ -287,11 +288,11 @@ MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) {
     ReadBytes = read(0, Buffer.end(), ChunkSize);
     if (ReadBytes == -1) {
       if (errno == EINTR) continue;
-      if (ErrStr) *ErrStr = sys::StrError();
-      return 0;
+      return error_code(errno, posix_category());
     }
     Buffer.set_size(Buffer.size() + ReadBytes);
   } while (ReadBytes != 0);
 
-  return getMemBufferCopy(Buffer, "<stdin>");
+  result.reset(getMemBufferCopy(Buffer, "<stdin>"));
+  return success;
 }
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
new file mode 100644
index 000000000000..b408973bbad1
--- /dev/null
+++ b/lib/Support/Mutex.cpp
@@ -0,0 +1,157 @@
+//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_mutex_init does have an address, then mutex support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_mutex_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a Mutex using pthread calls
+MutexImpl::MutexImpl( bool recursive)
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_mutex data structures
+    pthread_mutex_t* mutex =
+      static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+    pthread_mutexattr_t attr;
+
+    // Initialize the mutex attributes
+    int errorcode = pthread_mutexattr_init(&attr);
+    assert(errorcode == 0);
+
+    // Initialize the mutex as a recursive mutex, if requested, or normal
+    // otherwise.
+    int kind = ( recursive  ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+    errorcode = pthread_mutexattr_settype(&attr, kind);
+    assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+    // Make it a process local mutex
+    errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+    assert(errorcode == 0);
+#endif
+
+    // Initialize the mutex
+    errorcode = pthread_mutex_init(mutex, &attr);
+    assert(errorcode == 0);
+
+    // Destroy the attributes
+    errorcode = pthread_mutexattr_destroy(&attr);
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = mutex;
+  }
+}
+
+// Destruct a Mutex
+MutexImpl::~MutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+    pthread_mutex_destroy(mutex);
+    free(mutex);
+  }
+}
+
+bool
+MutexImpl::acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_lock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+MutexImpl::release()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_unlock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_trylock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/Mutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/Mutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
new file mode 100644
index 000000000000..e5e875bc54d7
--- /dev/null
+++ b/lib/Support/Path.cpp
@@ -0,0 +1,283 @@
+//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Path concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Path.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/FileSystem.h"
+#include <cassert>
+#include <cstring>
+#include <ostream>
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+bool Path::operator==(const Path &that) const {
+  return path == that.path;
+}
+
+bool Path::operator<(const Path& that) const {
+  return path < that.path;
+}
+
+Path
+Path::GetLLVMConfigDir() {
+  Path result;
+#ifdef LLVM_ETCDIR
+  if (result.set(LLVM_ETCDIR))
+    return result;
+#endif
+  return GetLLVMDefaultConfigDir();
+}
+
+LLVMFileType
+sys::IdentifyFileType(const char *magic, unsigned length) {
+  assert(magic && "Invalid magic number string");
+  assert(length >=4 && "Invalid magic number length");
+  switch ((unsigned char)magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return Bitcode_FileType;
+      break;
+    case 'B':
+      if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+        return Bitcode_FileType;
+      break;
+    case '!':
+      if (length >= 8)
+        if (memcmp(magic,"!<arch>\n",8) == 0)
+          return Archive_FileType;
+      break;
+
+    case '\177':
+      if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+        if (length >= 18 && magic[17] == 0)
+          switch (magic[16]) {
+            default: break;
+            case 1: return ELF_Relocatable_FileType;
+            case 2: return ELF_Executable_FileType;
+            case 3: return ELF_SharedObject_FileType;
+            case 4: return ELF_Core_FileType;
+          }
+      }
+      break;
+
+    case 0xCA:
+      if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+          magic[3] == char(0xBE)) {
+        // This is complicated by an overlap with Java class files.
+        // See the Mach-O section in /usr/share/file/magic for details.
+        if (length >= 8 && magic[7] < 43)
+          // FIXME: Universal Binary of any type.
+          return Mach_O_DynamicallyLinkedSharedLib_FileType;
+      }
+      break;
+
+    case 0xFE:
+    case 0xCE: {
+      uint16_t type = 0;
+      if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+          magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
+        /* Native endian */
+        if (length >= 16) type = magic[14] << 8 | magic[15];
+      } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
+                 magic[2] == char(0xED) && magic[3] == char(0xFE)) {
+        /* Reverse endian */
+        if (length >= 14) type = magic[13] << 8 | magic[12];
+      }
+      switch (type) {
+        default: break;
+        case 1: return Mach_O_Object_FileType;
+        case 2: return Mach_O_Executable_FileType;
+        case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
+        case 4: return Mach_O_Core_FileType;
+        case 5: return Mach_O_PreloadExecutable_FileType;
+        case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
+        case 7: return Mach_O_DynamicLinker_FileType;
+        case 8: return Mach_O_Bundle_FileType;
+        case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
+        case 10: break; // FIXME: MH_DSYM companion file with only debug.
+      }
+      break;
+    }
+    case 0xF0: // PowerPC Windows
+    case 0x83: // Alpha 32-bit
+    case 0x84: // Alpha 64-bit
+    case 0x66: // MPS R4000 Windows
+    case 0x50: // mc68K
+    case 0x4c: // 80386 Windows
+      if (magic[1] == 0x01)
+        return COFF_FileType;
+
+    case 0x90: // PA-RISC Windows
+    case 0x68: // mc68K Windows
+      if (magic[1] == 0x02)
+        return COFF_FileType;
+      break;
+    case 0x64: // x86-64 Windows.
+      if (magic[1] == char(0x86))
+        return COFF_FileType;
+      break;
+
+    default:
+      break;
+  }
+  return Unknown_FileType;
+}
+
+bool
+Path::isArchive() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  return type == Archive_FileType;
+}
+
+bool
+Path::isDynamicLibrary() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  switch (type) {
+    default: return false;
+    case Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case ELF_SharedObject_FileType:
+    case COFF_FileType:  return true;
+  }
+}
+
+bool
+Path::isObjectFile() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+    return false;
+  return true;
+}
+
+Path
+Path::FindLibrary(std::string& name) {
+  std::vector<sys::Path> LibPaths;
+  GetSystemLibraryPaths(LibPaths);
+  for (unsigned i = 0; i < LibPaths.size(); ++i) {
+    sys::Path FullPath(LibPaths[i]);
+    FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
+    if (FullPath.isDynamicLibrary())
+      return FullPath;
+    FullPath.eraseSuffix();
+    FullPath.appendSuffix("a");
+    if (FullPath.isArchive())
+      return FullPath;
+  }
+  return sys::Path();
+}
+
+StringRef Path::GetDLLSuffix() {
+  return &(LTDL_SHLIB_EXT[1]);
+}
+
+void
+Path::appendSuffix(StringRef suffix) {
+  if (!suffix.empty()) {
+    path.append(".");
+    path.append(suffix);
+  }
+}
+
+bool
+Path::isBitcodeFile() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  return type == Bitcode_FileType;
+}
+
+bool Path::hasMagicNumber(StringRef Magic) const {
+  std::string actualMagic;
+  if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
+    return Magic == actualMagic;
+  return false;
+}
+
+static void getPathList(const char*path, std::vector<Path>& Paths) {
+  const char* at = path;
+  const char* delim = strchr(at, PathSeparator);
+  Path tmpPath;
+  while (delim != 0) {
+    std::string tmp(at, size_t(delim-at));
+    if (tmpPath.set(tmp))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+    at = delim + 1;
+    delim = strchr(at, PathSeparator);
+  }
+
+  if (*at != 0)
+    if (tmpPath.set(std::string(at)))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+}
+
+static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
+  assert(Sep[0] != '\0' && Sep[1] == '\0' &&
+         "Sep must be a 1-character string literal.");
+  if (path.empty())
+    return ".";
+
+  // If the path is all slashes, return a single slash.
+  // Otherwise, remove all trailing slashes.
+
+  signed pos = static_cast<signed>(path.size()) - 1;
+
+  while (pos >= 0 && path[pos] == Sep[0])
+    --pos;
+
+  if (pos < 0)
+    return path[0] == Sep[0] ? Sep : ".";
+
+  // Any slashes left?
+  signed i = 0;
+
+  while (i < pos && path[i] != Sep[0])
+    ++i;
+
+  if (i == pos) // No slashes?  Return "."
+    return ".";
+
+  // There is at least one slash left.  Remove all trailing non-slashes.
+  while (pos >= 0 && path[pos] != Sep[0])
+    --pos;
+
+  // Remove any trailing slashes.
+  while (pos >= 0 && path[pos] == Sep[0])
+    --pos;
+
+  if (pos < 0)
+    return path[0] == Sep[0] ? Sep : ".";
+
+  return path.substr(0, pos+1);
+}
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/Path.inc"
+#endif
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
new file mode 100644
index 000000000000..896c94c071bc
--- /dev/null
+++ b/lib/Support/PathV2.cpp
@@ -0,0 +1,774 @@
+//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the operating system PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+#include <cstdio>
+#include <cstring>
+
+namespace {
+  using llvm::StringRef;
+  using llvm::sys::path::is_separator;
+
+#ifdef LLVM_ON_WIN32
+  const StringRef separators = "\\/";
+  const char      prefered_separator = '\\';
+#else
+  const StringRef separators = "/";
+  const char      prefered_separator = '/';
+#endif
+
+  const llvm::error_code success;
+
+  StringRef find_first_component(StringRef path) {
+    // Look for this first component in the following order.
+    // * empty (in this case we return an empty string)
+    // * either C: or {//,\\}net.
+    // * {/,\}
+    // * {.,..}
+    // * {file,directory}name
+
+    if (path.empty())
+      return path;
+
+#ifdef LLVM_ON_WIN32
+    // C:
+    if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':')
+      return path.substr(0, 2);
+#endif
+
+    // //net
+    if ((path.size() > 2) &&
+        is_separator(path[0]) &&
+        path[0] == path[1] &&
+        !is_separator(path[2])) {
+      // Find the next directory separator.
+      size_t end = path.find_first_of(separators, 2);
+      return path.substr(0, end);
+    }
+
+    // {/,\}
+    if (is_separator(path[0]))
+      return path.substr(0, 1);
+
+    if (path.startswith(".."))
+      return path.substr(0, 2);
+
+    if (path[0] == '.')
+      return path.substr(0, 1);
+
+    // * {file,directory}name
+    size_t end = path.find_first_of(separators, 2);
+    return path.substr(0, end);
+  }
+
+  size_t filename_pos(StringRef str) {
+    if (str.size() == 2 &&
+        is_separator(str[0]) &&
+        str[0] == str[1])
+      return 0;
+
+    if (str.size() > 0 && is_separator(str[str.size() - 1]))
+      return str.size() - 1;
+
+    size_t pos = str.find_last_of(separators, str.size() - 1);
+
+#ifdef LLVM_ON_WIN32
+    if (pos == StringRef::npos)
+      pos = str.find_last_of(':', str.size() - 2);
+#endif
+
+    if (pos == StringRef::npos ||
+        (pos == 1 && is_separator(str[0])))
+      return 0;
+
+    return pos + 1;
+  }
+
+  size_t root_dir_start(StringRef str) {
+    // case "c:/"
+#ifdef LLVM_ON_WIN32
+    if (str.size() > 2 &&
+        str[1] == ':' &&
+        is_separator(str[2]))
+      return 2;
+#endif
+
+    // case "//"
+    if (str.size() == 2 &&
+        is_separator(str[0]) &&
+        str[0] == str[1])
+      return StringRef::npos;
+
+    // case "//net"
+    if (str.size() > 3 &&
+        is_separator(str[0]) &&
+        str[0] == str[1] &&
+        !is_separator(str[2])) {
+      return str.find_first_of(separators, 2);
+    }
+
+    // case "/"
+    if (str.size() > 0 && is_separator(str[0]))
+      return 0;
+
+    return StringRef::npos;
+  }
+
+  size_t parent_path_end(StringRef path) {
+    size_t end_pos = filename_pos(path);
+
+    bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+
+    // Skip separators except for root dir.
+    size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+
+    while(end_pos > 0 &&
+          (end_pos - 1) != root_dir_pos &&
+          is_separator(path[end_pos - 1]))
+      --end_pos;
+
+    if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
+      return StringRef::npos;
+
+    return end_pos;
+  }
+} // end unnamed namespace
+
+namespace llvm {
+namespace sys  {
+namespace path {
+
+const_iterator begin(StringRef path) {
+  const_iterator i;
+  i.Path      = path;
+  i.Component = find_first_component(path);
+  i.Position  = 0;
+  return i;
+}
+
+const_iterator end(StringRef path) {
+  const_iterator i;
+  i.Path      = path;
+  i.Position  = path.size();
+  return i;
+}
+
+const_iterator &const_iterator::operator++() {
+  assert(Position < Path.size() && "Tried to increment past end!");
+
+  // Increment Position to past the current component
+  Position += Component.size();
+
+  // Check for end.
+  if (Position == Path.size()) {
+    Component = StringRef();
+    return *this;
+  }
+
+  // Both POSIX and Windows treat paths that begin with exactly two separators
+  // specially.
+  bool was_net = Component.size() > 2 &&
+    is_separator(Component[0]) &&
+    Component[1] == Component[0] &&
+    !is_separator(Component[2]);
+
+  // Handle separators.
+  if (is_separator(Path[Position])) {
+    // Root dir.
+    if (was_net
+#ifdef LLVM_ON_WIN32
+        // c:/
+        || Component.endswith(":")
+#endif
+        ) {
+      Component = Path.substr(Position, 1);
+      return *this;
+    }
+
+    // Skip extra separators.
+    while (Position != Path.size() &&
+           is_separator(Path[Position])) {
+      ++Position;
+    }
+
+    // Treat trailing '/' as a '.'.
+    if (Position == Path.size()) {
+      --Position;
+      Component = ".";
+      return *this;
+    }
+  }
+
+  // Find next component.
+  size_t end_pos = Path.find_first_of(separators, Position);
+  Component = Path.slice(Position, end_pos);
+
+  return *this;
+}
+
+const_iterator &const_iterator::operator--() {
+  // If we're at the end and the previous char was a '/', return '.'.
+  if (Position == Path.size() &&
+      Path.size() > 1 &&
+      is_separator(Path[Position - 1])
+#ifdef LLVM_ON_WIN32
+      && Path[Position - 2] != ':'
+#endif
+      ) {
+    --Position;
+    Component = ".";
+    return *this;
+  }
+
+  // Skip separators unless it's the root directory.
+  size_t root_dir_pos = root_dir_start(Path);
+  size_t end_pos = Position;
+
+  while(end_pos > 0 &&
+        (end_pos - 1) != root_dir_pos &&
+        is_separator(Path[end_pos - 1]))
+    --end_pos;
+
+  // Find next separator.
+  size_t start_pos = filename_pos(Path.substr(0, end_pos));
+  Component = Path.slice(start_pos, end_pos);
+  Position = start_pos;
+  return *this;
+}
+
+bool const_iterator::operator==(const const_iterator &RHS) const {
+  return Path.begin() == RHS.Path.begin() &&
+         Position == RHS.Position;
+}
+
+bool const_iterator::operator!=(const const_iterator &RHS) const {
+  return !(*this == RHS);
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+  return Position - RHS.Position;
+}
+
+const StringRef root_path(StringRef path) {
+  const_iterator b = begin(path),
+                 pos = b,
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if (has_net || has_drive) {
+      if ((++pos != e) && is_separator((*pos)[0])) {
+        // {C:/,//net/}, so get the first two components.
+        return path.substr(0, b->size() + pos->size());
+      } else {
+        // just {C:,//net}, return the first component.
+        return *b;
+      }
+    }
+
+    // POSIX style root directory.
+    if (is_separator((*b)[0])) {
+      return *b;
+    }
+  }
+
+  return StringRef();
+}
+
+const StringRef root_name(StringRef path) {
+  const_iterator b = begin(path),
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if (has_net || has_drive) {
+      // just {C:,//net}, return the first component.
+      return *b;
+    }
+  }
+
+  // No path or no name.
+  return StringRef();
+}
+
+const StringRef root_directory(StringRef path) {
+  const_iterator b = begin(path),
+                 pos = b,
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if ((has_net || has_drive) &&
+        // {C:,//net}, skip to the next component.
+        (++pos != e) && is_separator((*pos)[0])) {
+      return *pos;
+    }
+
+    // POSIX style root directory.
+    if (!has_net && is_separator((*b)[0])) {
+      return *b;
+    }
+  }
+
+  // No path or no root.
+  return StringRef();
+}
+
+const StringRef relative_path(StringRef path) {
+  StringRef root = root_path(path);
+  return root.substr(root.size());
+}
+
+void append(SmallVectorImpl<char> &path, const Twine &a,
+                                         const Twine &b,
+                                         const Twine &c,
+                                         const Twine &d) {
+  SmallString<32> a_storage;
+  SmallString<32> b_storage;
+  SmallString<32> c_storage;
+  SmallString<32> d_storage;
+
+  SmallVector<StringRef, 4> components;
+  if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
+  if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
+  if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
+  if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
+
+  for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
+                                                  e = components.end();
+                                                  i != e; ++i) {
+    bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
+    bool component_has_sep = !i->empty() && is_separator((*i)[0]);
+    bool is_root_name = has_root_name(*i);
+
+    if (path_has_sep) {
+      // Strip separators from beginning of component.
+      size_t loc = i->find_first_not_of(separators);
+      StringRef c = i->substr(loc);
+
+      // Append it.
+      path.append(c.begin(), c.end());
+      continue;
+    }
+
+    if (!component_has_sep && !(path.empty() || is_root_name)) {
+      // Add a separator.
+      path.push_back(prefered_separator);
+    }
+
+    path.append(i->begin(), i->end());
+  }
+}
+
+void append(SmallVectorImpl<char> &path,
+            const_iterator begin, const_iterator end) {
+  for (; begin != end; ++begin)
+    path::append(path, *begin);
+}
+
+const StringRef parent_path(StringRef path) {
+  size_t end_pos = parent_path_end(path);
+  if (end_pos == StringRef::npos)
+    return StringRef();
+  else
+    return path.substr(0, end_pos);
+}
+
+void remove_filename(SmallVectorImpl<char> &path) {
+  size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+  if (end_pos != StringRef::npos)
+    path.set_size(end_pos);
+}
+
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+  StringRef p(path.begin(), path.size());
+  SmallString<32> ext_storage;
+  StringRef ext = extension.toStringRef(ext_storage);
+
+  // Erase existing extension.
+  size_t pos = p.find_last_of('.');
+  if (pos != StringRef::npos && pos >= filename_pos(p))
+    path.set_size(pos);
+
+  // Append '.' if needed.
+  if (ext.size() > 0 && ext[0] != '.')
+    path.push_back('.');
+
+  // Append extension.
+  path.append(ext.begin(), ext.end());
+}
+
+void native(const Twine &path, SmallVectorImpl<char> &result) {
+  // Clear result.
+  result.clear();
+#ifdef LLVM_ON_WIN32
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+  result.reserve(p.size());
+  for (StringRef::const_iterator i = p.begin(),
+                                 e = p.end();
+                                 i != e;
+                                 ++i) {
+    if (*i == '/')
+      result.push_back('\\');
+    else
+      result.push_back(*i);
+  }
+#else
+  path.toVector(result);
+#endif
+}
+
+const StringRef filename(StringRef path) {
+  return *(--end(path));
+}
+
+const StringRef stem(StringRef path) {
+  StringRef fname = filename(path);
+  size_t pos = fname.find_last_of('.');
+  if (pos == StringRef::npos)
+    return fname;
+  else
+    if ((fname.size() == 1 && fname == ".") ||
+        (fname.size() == 2 && fname == ".."))
+      return fname;
+    else
+      return fname.substr(0, pos);
+}
+
+const StringRef extension(StringRef path) {
+  StringRef fname = filename(path);
+  size_t pos = fname.find_last_of('.');
+  if (pos == StringRef::npos)
+    return StringRef();
+  else
+    if ((fname.size() == 1 && fname == ".") ||
+        (fname.size() == 2 && fname == ".."))
+      return StringRef();
+    else
+      return fname.substr(pos);
+}
+
+bool is_separator(char value) {
+  switch(value) {
+#ifdef LLVM_ON_WIN32
+    case '\\': // fall through
+#endif
+    case '/': return true;
+    default: return false;
+  }
+}
+
+bool has_root_name(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_name(p).empty();
+}
+
+bool has_root_directory(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_directory(p).empty();
+}
+
+bool has_root_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_path(p).empty();
+}
+
+bool has_relative_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !relative_path(p).empty();
+}
+
+bool has_filename(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !filename(p).empty();
+}
+
+bool has_parent_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !parent_path(p).empty();
+}
+
+bool has_stem(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !stem(p).empty();
+}
+
+bool has_extension(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !extension(p).empty();
+}
+
+bool is_absolute(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  bool rootDir = has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+       rootName = has_root_name(p);
+#else
+       rootName = true;
+#endif
+
+  return rootDir && rootName;
+}
+
+bool is_relative(const Twine &path) {
+  return !is_absolute(path);
+}
+
+} // end namespace path
+
+namespace fs {
+
+error_code make_absolute(SmallVectorImpl<char> &path) {
+  StringRef p(path.data(), path.size());
+
+  bool rootName      = path::has_root_name(p),
+       rootDirectory = path::has_root_directory(p);
+
+  // Already absolute.
+  if (rootName && rootDirectory)
+    return success;
+
+  // All of the following conditions will need the current directory.
+  SmallString<128> current_dir;
+  if (error_code ec = current_path(current_dir)) return ec;
+
+  // Relative path. Prepend the current directory.
+  if (!rootName && !rootDirectory) {
+    // Append path to the current directory.
+    path::append(current_dir, p);
+    // Set path to the result.
+    path.swap(current_dir);
+    return success;
+  }
+
+  if (!rootName && rootDirectory) {
+    StringRef cdrn = path::root_name(current_dir);
+    SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+    path::append(curDirRootName, p);
+    // Set path to the result.
+    path.swap(curDirRootName);
+    return success;
+  }
+
+  if (rootName && !rootDirectory) {
+    StringRef pRootName      = path::root_name(p);
+    StringRef bRootDirectory = path::root_directory(current_dir);
+    StringRef bRelativePath  = path::relative_path(current_dir);
+    StringRef pRelativePath  = path::relative_path(p);
+
+    SmallString<128> res;
+    path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+    path.swap(res);
+    return success;
+  }
+
+  llvm_unreachable("All rootName and rootDirectory combinations should have "
+                   "occurred above!");
+}
+
+error_code create_directories(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  StringRef parent = path::parent_path(p);
+  bool parent_exists;
+
+  if (error_code ec = fs::exists(parent, parent_exists)) return ec;
+
+  if (!parent_exists)
+    return create_directories(parent, existed);
+
+  return create_directory(p, existed);
+}
+
+bool exists(file_status status) {
+  return status_known(status) && status.type() != file_type::file_not_found;
+}
+
+bool status_known(file_status s) {
+  return s.type() != file_type::status_error;
+}
+
+bool is_directory(file_status status) {
+  return status.type() == file_type::directory_file;
+}
+
+error_code is_directory(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_directory(st);
+  return success;
+}
+
+bool is_regular_file(file_status status) {
+  return status.type() == file_type::regular_file;
+}
+
+error_code is_regular_file(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_regular_file(st);
+  return success;
+}
+
+bool is_symlink(file_status status) {
+  return status.type() == file_type::symlink_file;
+}
+
+error_code is_symlink(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_symlink(st);
+  return success;
+}
+
+bool is_other(file_status status) {
+  return exists(status) &&
+         !is_regular_file(status) &&
+         !is_directory(status) &&
+         !is_symlink(status);
+}
+
+void directory_entry::replace_filename(const Twine &filename, file_status st,
+                                       file_status symlink_st) {
+  SmallString<128> path(Path.begin(), Path.end());
+  path::remove_filename(path);
+  path::append(path, filename);
+  Path = path.str();
+  Status = st;
+  SymlinkStatus = symlink_st;
+}
+
+error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
+  SmallString<32>  MagicStorage;
+  StringRef Magic = magic.toStringRef(MagicStorage);
+  SmallString<32> Buffer;
+
+  if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
+    if (ec == errc::value_too_large) {
+      // Magic.size() > file_size(Path).
+      result = false;
+      return success;
+    }
+    return ec;
+  }
+
+  result = Magic == Buffer;
+  return success;
+}
+
+error_code identify_magic(const Twine &path, LLVMFileType &result) {
+  SmallString<32> Magic;
+  error_code ec = get_magic(path, Magic.capacity(), Magic);
+  if (ec && ec != errc::value_too_large)
+    return ec;
+
+  result = IdentifyFileType(Magic.data(), Magic.size());
+  return success;
+}
+
+namespace {
+error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
+  if (ft == file_type::directory_file) {
+    // This code would be a lot better with exceptions ;/.
+    error_code ec;
+    for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+      if (ec) return ec;
+      file_status st;
+      if (error_code ec = i->status(st)) return ec;
+      if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec;
+    }
+    bool obviously_this_exists;
+    if (error_code ec = remove(path, obviously_this_exists)) return ec;
+    assert(obviously_this_exists);
+    ++count; // Include the directory itself in the items removed.
+  } else {
+    bool obviously_this_exists;
+    if (error_code ec = remove(path, obviously_this_exists)) return ec;
+    assert(obviously_this_exists);
+    ++count;
+  }
+
+  return success;
+}
+} // end unnamed namespace
+
+error_code remove_all(const Twine &path, uint32_t &num_removed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  file_status fs;
+  if (error_code ec = status(path, fs))
+    return ec;
+  num_removed = 0;
+  return remove_all_r(p, fs.type(), num_removed);
+}
+
+error_code directory_entry::status(file_status &result) const {
+  return fs::status(Path, result);
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+// Include the truly platform-specific parts.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/PathV2.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/PathV2.inc"
+#endif
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
index 36caecffeede..2924cfa38897 100644
--- a/lib/Support/PluginLoader.cpp
+++ b/lib/Support/PluginLoader.cpp
@@ -15,8 +15,8 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/DynamicLibrary.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 3c8a10849d14..a9f4709e4b93 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -15,8 +15,8 @@
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/ThreadLocal.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadLocal.h"
 #include "llvm/ADT/SmallString.h"
 
 #ifdef HAVE_CRASHREPORTERCLIENT_H
@@ -55,7 +55,7 @@ static void PrintCurStackTrace(raw_ostream &OS) {
 }
 
 // Integrate with crash reporter libraries.
-#if defined (__APPLE__) && defined (HAVE_CRASHREPORTERCLIENT_H)
+#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
 //  If any clients of llvm try to link to libCrashReporterClient.a themselves,
 //  only one crash info struct will be used.
 extern "C" {
@@ -64,7 +64,7 @@ struct crashreporter_annotations_t gCRAnnotations
         __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) 
         = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
 }
-#elif defined (__APPLE__)
+#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
 static const char *__crashreporter_info__ = 0;
 asm(".desc ___crashreporter_info__, 0x10");
 #endif
@@ -86,11 +86,11 @@ static void CrashHandler(void *) {
   }
   
   if (!TmpStr.empty()) {
-#ifndef HAVE_CRASHREPORTERCLIENT_H
-    __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
-#else
+#ifdef HAVE_CRASHREPORTERCLIENT_H
     // Cast to void to avoid warning.
     (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#elif HAVE_CRASHREPORTER_INFO 
+    __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
 #endif
     errs() << TmpStr.str();
   }
@@ -107,7 +107,7 @@ static bool RegisterCrashPrinter() {
 PrettyStackTraceEntry::PrettyStackTraceEntry() {
   // The first time this is called, we register the crash printer.
   static bool HandlerRegistered = RegisterCrashPrinter();
-  HandlerRegistered = HandlerRegistered;
+  (void)HandlerRegistered;
     
   // Link ourselves.
   NextEntry = PrettyStackTraceHead.get();
@@ -131,4 +131,3 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
     OS << ArgV[i] << ' ';
   OS << '\n';
 }
-
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
new file mode 100644
index 000000000000..88ca7c3f220f
--- /dev/null
+++ b/lib/Support/Process.cpp
@@ -0,0 +1,33 @@
+//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Process concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Process.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Process.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Process.inc"
+#endif
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
new file mode 100644
index 000000000000..01860b082d62
--- /dev/null
+++ b/lib/Support/Program.cpp
@@ -0,0 +1,56 @@
+//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Program concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+int
+Program::ExecuteAndWait(const Path& path,
+                        const char** args,
+                        const char** envp,
+                        const Path** redirects,
+                        unsigned secondsToWait,
+                        unsigned memoryLimit,
+                        std::string* ErrMsg) {
+  Program prg;
+  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+    return prg.Wait(path, secondsToWait, ErrMsg);
+  else
+    return -1;
+}
+
+void
+Program::ExecuteNoWait(const Path& path,
+                       const char** args,
+                       const char** envp,
+                       const Path** redirects,
+                       unsigned memoryLimit,
+                       std::string* ErrMsg) {
+  Program prg;
+  prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Program.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Program.inc"
+#endif
diff --git a/lib/Support/README.txt.system b/lib/Support/README.txt.system
new file mode 100644
index 000000000000..7a906b8dba4c
--- /dev/null
+++ b/lib/Support/README.txt.system
@@ -0,0 +1,43 @@
+Design Of lib/System
+====================
+
+The software in this directory is designed to completely shield LLVM from any
+and all operating system specific functionality. It is not intended to be a
+complete operating system wrapper (such as ACE), but only to provide the
+functionality necessary to support LLVM.
+
+The software located here, of necessity, has very specific and stringent design
+rules. Violation of these rules means that cracks in the shield could form and
+the primary goal of the library is defeated. By consistently using this library,
+LLVM becomes more easily ported to new platforms since the only thing requiring
+porting is this library.
+
+Complete documentation for the library can be found in the file:
+  llvm/docs/SystemLibrary.html
+or at this URL:
+  http://llvm.org/docs/SystemLibrary.html
+
+While we recommend that you read the more detailed documentation, for the
+impatient, here's a high level summary of the library's requirements.
+
+ 1. No system header files are to be exposed through the interface.
+ 2. Std C++ and Std C header files are okay to be exposed through the interface.
+ 3. No exposed system-specific functions.
+ 4. No exposed system-specific data.
+ 5. Data in lib/System classes must use only simple C++ intrinsic types.
+ 6. Errors are handled by returning "true" and setting an optional std::string
+ 7. Library must not throw any exceptions, period.
+ 8. Interface functions must not have throw() specifications.
+ 9. No duplicate function impementations are permitted within an operating
+    system class.
+
+To accomplish these requirements, the library has numerous design criteria that
+must be satisfied. Here's a high level summary of the library's design criteria:
+
+ 1. No unused functionality (only what LLVM needs)
+ 2. High-Level Interfaces
+ 3. Use Opaque Classes
+ 4. Common Implementations
+ 5. Multiple Implementations
+ 6. Minimize Memory Allocation
+ 7. No Virtual Methods
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
new file mode 100644
index 000000000000..fc02f9cf7c11
--- /dev/null
+++ b/lib/Support/RWMutex.cpp
@@ -0,0 +1,157 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_rwlock data structures
+    pthread_rwlock_t* rwlock =
+      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+    bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+    // Initialize the rwlock
+    int errorcode = pthread_rwlock_init(rwlock, NULL);
+    (void)errorcode;
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = rwlock;
+  }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+    pthread_rwlock_destroy(rwlock);
+    free(rwlock);
+  }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_rdlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_wrlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
new file mode 100644
index 000000000000..d63830185c32
--- /dev/null
+++ b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -0,0 +1,73 @@
+//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file pulls the addresses of certain symbols out of the linker.  It must
+//  include as few header files as possible because it declares the symbols as
+//  void*, which would conflict with the actual symbol type if any header
+//  declared it.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* symbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+  // If this is darwin, it has some funky issues, try to solve them here.  Some
+  // important symbols are marked 'private external' which doesn't allow
+  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
+  // there is only a small handful of them.
+
+#ifdef __APPLE__
+  {
+    EXPLICIT_SYMBOL(__ashldi3);
+    EXPLICIT_SYMBOL(__ashrdi3);
+    EXPLICIT_SYMBOL(__cmpdi2);
+    EXPLICIT_SYMBOL(__divdi3);
+    EXPLICIT_SYMBOL(__fixdfdi);
+    EXPLICIT_SYMBOL(__fixsfdi);
+    EXPLICIT_SYMBOL(__fixunsdfdi);
+    EXPLICIT_SYMBOL(__fixunssfdi);
+    EXPLICIT_SYMBOL(__floatdidf);
+    EXPLICIT_SYMBOL(__floatdisf);
+    EXPLICIT_SYMBOL(__lshrdi3);
+    EXPLICIT_SYMBOL(__moddi3);
+    EXPLICIT_SYMBOL(__udivdi3);
+    EXPLICIT_SYMBOL(__umoddi3);
+
+    // __eprintf is sometimes used for assert() handling on x86.
+    //
+    // FIXME: Currently disabled when using Clang, as we don't always have our
+    // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+    EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
+  }
+#endif
+
+#ifdef __CYGWIN__
+  {
+    EXPLICIT_SYMBOL(_alloca);
+    EXPLICIT_SYMBOL(__main);
+  }
+#endif
+
+#undef EXPLICIT_SYMBOL
+  return 0;
+}
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
+  return DoSearch(symbolName);
+}
+}  // namespace llvm
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
new file mode 100644
index 000000000000..a3af37d5fe6a
--- /dev/null
+++ b/lib/Support/Signals.cpp
@@ -0,0 +1,34 @@
+//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signals.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Signals.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Signals.inc"
+#endif
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index da5681c5bc09..ef099163c221 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -13,9 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 
 namespace {
@@ -47,18 +50,18 @@ SourceMgr::~SourceMgr() {
 /// ~0, otherwise it returns the buffer ID of the stacked file.
 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
                                    SMLoc IncludeLoc) {
-
-  MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
+  OwningPtr<MemoryBuffer> NewBuf;
+  MemoryBuffer::getFile(Filename.c_str(), NewBuf);
 
   // If the file didn't exist directly, see if it's in an include path.
   for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
     std::string IncFile = IncludeDirectories[i] + "/" + Filename;
-    NewBuf = MemoryBuffer::getFile(IncFile.c_str());
+    MemoryBuffer::getFile(IncFile.c_str(), NewBuf);
   }
 
   if (NewBuf == 0) return ~0U;
 
-  return AddNewSourceBuffer(NewBuf, IncludeLoc);
+  return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
 }
 
 
@@ -135,7 +138,7 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
 ///
 /// @param Type - If non-null, the kind of message (e.g., "error") which is
 /// prefixed to the message.
-SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
                                    const char *Type, bool ShowLine) const {
 
   // First thing to do: find the current buffer containing the specified
@@ -162,27 +165,25 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
   }
 
   std::string PrintedMsg;
-  if (Type) {
-    PrintedMsg = Type;
-    PrintedMsg += ": ";
-  }
-  PrintedMsg += Msg;
+  raw_string_ostream OS(PrintedMsg);
+  if (Type)
+    OS << Type << ": ";
+  OS << Msg;
 
   return SMDiagnostic(*this, Loc,
                       CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
-                      Loc.getPointer()-LineStart, PrintedMsg,
+                      Loc.getPointer()-LineStart, OS.str(),
                       LineStr, ShowLine);
 }
 
-void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg,
+void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
                              const char *Type, bool ShowLine) const {
   // Report the message with the diagnostic handler if present.
   if (DiagHandler) {
-    DiagHandler(GetMessage(Loc, Msg, Type, ShowLine),
-                DiagContext, DiagLocCookie);
+    DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
     return;
   }
-  
+
   raw_ostream &OS = errs();
 
   int CurBuf = FindBufferContainingLoc(Loc);
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index e32ab74a2d4c..f0ed62690fd3 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -26,7 +26,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <cstring>
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index 6f28277890e3..90ec29950262 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -155,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
   const char *VStr = (char*)V + ItemSize;
   StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
-  V2 = V2;
+  (void)V2;
   assert(V == V2 && "Didn't find key?");
 }
 
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 46f26b242aac..539805196450 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/OwningPtr.h"
 #include <bitset>
 
 using namespace llvm;
@@ -67,8 +68,9 @@ int StringRef::compare_numeric(StringRef RHS) const {
 }
 
 // Compute the edit distance between the two given strings.
-unsigned StringRef::edit_distance(llvm::StringRef Other, 
-                                  bool AllowReplacements) {
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+                                  bool AllowReplacements,
+                                  unsigned MaxEditDistance) {
   // The algorithm implemented below is the "classic"
   // dynamic-programming algorithm for computing the Levenshtein
   // distance, which is described here:
@@ -83,17 +85,21 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
 
   const unsigned SmallBufferSize = 64;
   unsigned SmallBuffer[SmallBufferSize];
-  unsigned *Allocated = 0;
+  llvm::OwningArrayPtr<unsigned> Allocated;
   unsigned *previous = SmallBuffer;
-  if (2*(n + 1) > SmallBufferSize)
-    Allocated = previous = new unsigned [2*(n+1)];
+  if (2*(n + 1) > SmallBufferSize) {
+    previous = new unsigned [2*(n+1)];
+    Allocated.reset(previous);
+  }
   unsigned *current = previous + (n + 1);
-  
-  for (unsigned i = 0; i <= n; ++i) 
+
+  for (unsigned i = 0; i <= n; ++i)
     previous[i] = i;
 
   for (size_type y = 1; y <= m; ++y) {
     current[0] = y;
+    unsigned BestThisRow = current[0];
+
     for (size_type x = 1; x <= n; ++x) {
       if (AllowReplacements) {
         current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
@@ -103,16 +109,18 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
         if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
         else current[x] = min(current[x-1], previous[x]) + 1;
       }
+      BestThisRow = min(BestThisRow, current[x]);
     }
-    
+
+    if (MaxEditDistance && BestThisRow > MaxEditDistance)
+      return MaxEditDistance + 1;
+
     unsigned *tmp = current;
     current = previous;
     previous = tmp;
   }
 
   unsigned Result = previous[n];
-  delete [] Allocated;
-  
   return Result;
 }
 
@@ -192,6 +200,21 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
   return npos;
 }
 
+/// find_last_of - Find the last character in the string that is in \arg C,
+/// or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_of(StringRef Chars,
+                                             size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+    if (CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
 
 //===----------------------------------------------------------------------===//
 // Helpful Algorithms
@@ -232,10 +255,10 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
   // Autosense radix if not specified.
   if (Radix == 0)
     Radix = GetAutoSenseRadix(Str);
-  
+
   // Empty strings (after the radix autosense) are invalid.
   if (Str.empty()) return true;
-  
+
   // Parse all the bytes of the string given this radix.  Watch for overflow.
   Result = 0;
   while (!Str.empty()) {
@@ -248,23 +271,23 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
       CharVal = Str[0]-'A'+10;
     else
       return true;
-    
+
     // If the parsed value is larger than the integer radix, the string is
     // invalid.
     if (CharVal >= Radix)
       return true;
-    
+
     // Add in this character.
     unsigned long long PrevResult = Result;
     Result = Result*Radix+CharVal;
-    
+
     // Check for overflow.
     if (Result < PrevResult)
       return true;
 
     Str = Str.substr(1);
   }
-  
+
   return false;
 }
 
@@ -275,7 +298,7 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
 
 bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
   unsigned long long ULLVal;
-  
+
   // Handle positive strings first.
   if (empty() || front() != '-') {
     if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
@@ -285,7 +308,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
     Result = ULLVal;
     return false;
   }
-  
+
   // Get the positive part of the value.
   if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
       // Reject values so large they'd overflow as negative signed, but allow
@@ -293,7 +316,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
       // on signed overflow.
       (long long)-ULLVal > 0)
     return true;
-  
+
   Result = -ULLVal;
   return false;
 }
@@ -314,7 +337,7 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
     return true;
   Result = Val;
   return false;
-}  
+}
 
 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   StringRef Str = *this;
@@ -324,7 +347,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
     Radix = GetAutoSenseRadix(Str);
 
   assert(Radix > 1 && Radix <= 36);
-  
+
   // Empty strings (after the radix autosense) are invalid.
   if (Str.empty()) return true;
 
@@ -348,7 +371,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   if (BitWidth < Result.getBitWidth())
     BitWidth = Result.getBitWidth(); // don't shrink the result
   else
-    Result.zext(BitWidth);
+    Result = Result.zext(BitWidth);
 
   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
   if (!IsPowerOf2Radix) {
@@ -369,12 +392,12 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
       CharVal = Str[0]-'A'+10;
     else
       return true;
-    
+
     // If the parsed value is larger than the integer radix, the string is
     // invalid.
     if (CharVal >= Radix)
       return true;
-    
+
     // Add in this character.
     if (IsPowerOf2Radix) {
       Result <<= Log2Radix;
@@ -387,6 +410,6 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
 
     Str = Str.substr(1);
   }
-  
+
   return false;
 }
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index c8b260c2e3dd..54b5e97bfe18 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -23,43 +23,33 @@ bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
   if (stream_to_check.is_displayed()) {
     if (print_warning) {
       errs() << "WARNING: You're attempting to print out a bitcode file.\n"
-             << "This is inadvisable as it may cause display problems. If\n"
-             << "you REALLY want to taste LLVM bitcode first-hand, you\n"
-             << "can force output with the `-f' option.\n\n";
+                "This is inadvisable as it may cause display problems. If\n"
+                "you REALLY want to taste LLVM bitcode first-hand, you\n"
+                "can force output with the `-f' option.\n\n";
     }
     return true;
   }
   return false;
 }
 
-/// FindExecutable - Find a named executable, giving the argv[0] of program
-/// being executed. This allows us to find another LLVM tool if it is built in
-/// the same directory.  If the executable cannot be found, return an
-/// empty string.
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
 /// @brief Find a named executable.
-#undef FindExecutable   // needed on windows :(
-sys::Path llvm::FindExecutable(const std::string &ExeName,
-                               const char *Argv0, void *MainAddr) {
+sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName,
+                                          const char *Argv0, void *MainAddr) {
   // Check the directory that the calling program is in.  We can do
   // this if ProgramPath contains at least one / character, indicating that it
   // is a relative path to the executable itself.
   sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
   Result.eraseComponent();
+
   if (!Result.isEmpty()) {
     Result.appendComponent(ExeName);
-    if (Result.canExecute())
-      return Result;
-    // If the path is absolute (and it usually is), call FindProgramByName to
-    // allow it to try platform-specific logic, such as appending a .exe suffix
-    // on Windows. Don't do this if we somehow have a relative path, because
-    // we don't want to go searching the PATH and accidentally find an unrelated
-    // version of the program.
-    if (Result.isAbsolute()) {
-      Result = sys::Program::FindProgramByName(Result.str());
-      if (!Result.empty())
-        return Result;
-    }
+    Result.appendSuffix(sys::Path::GetEXESuffix());
   }
 
-  return sys::Path();
+  return Result;
 }
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index 5896447f5ea5..293a5d7a0168 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
 #include <cassert>
 using namespace llvm;
 
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
new file mode 100644
index 000000000000..6b43048da155
--- /dev/null
+++ b/lib/Support/ThreadLocal.cpp
@@ -0,0 +1,84 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { data = 0; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data(0) {
+  pthread_key_t* key = new pthread_key_t;
+  int errorcode = pthread_key_create(key, NULL);
+  assert(errorcode == 0);
+  (void) errorcode;
+  data = (void*)key;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_key_delete(*key);
+  assert(errorcode == 0);
+  (void) errorcode;
+  delete key;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_setspecific(*key, d);
+  assert(errorcode == 0);
+  (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  return pthread_getspecific(*key);
+}
+
+void ThreadLocalImpl::removeInstance() {
+  setInstance(0);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#endif
+#endif
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
new file mode 100644
index 000000000000..29579567ac6c
--- /dev/null
+++ b/lib/Support/Threading.cpp
@@ -0,0 +1,116 @@
+//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(!multithreaded_mode && "Already multithreaded!");
+  multithreaded_mode = true;
+  global_lock = new sys::Mutex(true);
+
+  // We fence here to ensure that all initialization is complete BEFORE we
+  // return from llvm_start_multithreaded().
+  sys::MemoryFence();
+  return true;
+#else
+  return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(multithreaded_mode && "Not currently multithreaded!");
+
+  // We fence here to insure that all threaded operations are complete BEFORE we
+  // return from llvm_stop_multithreaded().
+  sys::MemoryFence();
+
+  multithreaded_mode = false;
+  delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+  return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+  if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+  if (multithreaded_mode) global_lock->release();
+}
+
+#if defined(LLVM_MULTITHREADED) && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+
+struct ThreadInfo {
+  void (*UserFn)(void *);
+  void *UserData;
+};
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+  ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+  TI->UserFn(TI->UserData);
+  return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+                                  unsigned RequestedStackSize) {
+  ThreadInfo Info = { Fn, UserData };
+  pthread_attr_t Attr;
+  pthread_t Thread;
+
+  // Construct the attributes object.
+  if (::pthread_attr_init(&Attr) != 0)
+    return;
+
+  // Set the requested stack size, if given.
+  if (RequestedStackSize != 0) {
+    if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+      goto error;
+  }
+
+  // Construct and execute the thread.
+  if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+    goto error;
+
+  // Wait for the thread and clean up.
+  ::pthread_join(Thread, 0);
+
+ error:
+  ::pthread_attr_destroy(&Attr);
+}
+
+#else
+
+// No non-pthread implementation, currently.
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+                                  unsigned RequestedStackSize) {
+  (void) RequestedStackSize;
+  Fn(UserData);
+}
+
+#endif
diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp
new file mode 100644
index 000000000000..1a0f7bc36394
--- /dev/null
+++ b/lib/Support/TimeValue.cpp
@@ -0,0 +1,57 @@
+//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+const TimeValue TimeValue::MinTime       = TimeValue ( INT64_MIN,0 );
+const TimeValue TimeValue::MaxTime       = TimeValue ( INT64_MAX,0 );
+const TimeValue TimeValue::ZeroTime      = TimeValue ( 0,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
+
+void
+TimeValue::normalize( void ) {
+  if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
+    do {
+      seconds_++;
+      nanos_ -= NANOSECONDS_PER_SECOND;
+    } while ( nanos_ >= NANOSECONDS_PER_SECOND );
+  } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
+    do {
+      seconds_--;
+      nanos_ += NANOSECONDS_PER_SECOND;
+    } while (nanos_ <= -NANOSECONDS_PER_SECOND);
+  }
+
+  if (seconds_ >= 1 && nanos_ < 0) {
+    seconds_--;
+    nanos_ += NANOSECONDS_PER_SECOND;
+  } else if (seconds_ < 0 && nanos_ > 0) {
+    seconds_++;
+    nanos_ -= NANOSECONDS_PER_SECOND;
+  }
+}
+
+}
+
+/// Include the platform specific portion of TimeValue class
+#ifdef LLVM_ON_UNIX
+#include "Unix/TimeValue.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/TimeValue.inc"
+#endif
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 44ee1777cb57..a9ed5eecfa7e 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -17,8 +17,8 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 using namespace llvm;
diff --git a/lib/Support/ToolOutputFile.cpp b/lib/Support/ToolOutputFile.cpp
new file mode 100644
index 000000000000..e7ca927ea537
--- /dev/null
+++ b/lib/Support/ToolOutputFile.cpp
@@ -0,0 +1,43 @@
+//===--- ToolOutputFile.cpp - Implement the tool_output_file class --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+using namespace llvm;
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+  : Filename(filename), Keep(false) {
+  // Arrange for the file to be deleted if the process is killed.
+  if (Filename != "-")
+    sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+  // Delete the file if the client hasn't told us not to.
+  if (!Keep && Filename != "-")
+    sys::Path(Filename).eraseFromDisk();
+
+  // Ok, the file is successfully written and closed, or deleted. There's no
+  // further need to clean it up on signals.
+  if (Filename != "-")
+    sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+                                   unsigned Flags)
+  : Installer(filename),
+    OS(filename, ErrorInfo, Flags) {
+  // If open fails, no cleanup is needed.
+  if (!ErrorInfo.empty())
+    Installer.Keep = true;
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 3a95b65e6900..36edf6eefa70 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/Triple.h"
 
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include <cassert>
 #include <cstring>
@@ -21,7 +22,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   switch (Kind) {
   case InvalidArch: return "<invalid>";
   case UnknownArch: return "unknown";
-    
+
   case alpha:   return "alpha";
   case arm:     return "arm";
   case bfin:    return "bfin";
@@ -29,7 +30,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case mips:    return "mips";
   case mipsel:  return "mipsel";
   case msp430:  return "msp430";
-  case pic16:   return "pic16";
   case ppc64:   return "powerpc64";
   case ppc:     return "powerpc";
   case sparc:   return "sparc";
@@ -41,6 +41,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case x86_64:  return "x86_64";
   case xcore:   return "xcore";
   case mblaze:  return "mblaze";
+  case ptx:     return "ptx";
   }
 
   return "<invalid>";
@@ -70,7 +71,10 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
 
   case x86:
   case x86_64:  return "x86";
+
   case xcore:   return "xcore";
+
+  case ptx:     return "ptx";
   }
 }
 
@@ -97,7 +101,6 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Linux: return "linux";
   case Lv2: return "lv2";
   case MinGW32: return "mingw32";
-  case MinGW64: return "mingw64";
   case NetBSD: return "netbsd";
   case OpenBSD: return "openbsd";
   case Psp: return "psp";
@@ -110,6 +113,18 @@ const char *Triple::getOSTypeName(OSType Kind) {
   return "<invalid>";
 }
 
+const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+  switch (Kind) {
+  case UnknownEnvironment: return "unknown";
+  case GNU: return "gnu";
+  case GNUEABI: return "gnueabi";
+  case EABI: return "eabi";
+  case MachO: return "macho";
+  }
+
+  return "<invalid>";
+}
+
 Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
   if (Name == "alpha")
     return alpha;
@@ -125,8 +140,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     return mipsel;
   if (Name == "msp430")
     return msp430;
-  if (Name == "pic16")
-    return pic16;
   if (Name == "ppc64")
     return ppc64;
   if (Name == "ppc")
@@ -149,6 +162,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     return x86_64;
   if (Name == "xcore")
     return xcore;
+  if (Name == "ptx")
+    return ptx;
 
   return UnknownArch;
 }
@@ -187,6 +202,9 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
       Str == "armv6" || Str == "armv7")
     return Triple::arm;
 
+  if (Str == "ptx")
+    return Triple::ptx;
+
   return Triple::UnknownArch;
 }
 
@@ -210,28 +228,29 @@ const char *Triple::getArchNameForAssembler() {
     return "arm";
   if (Str == "armv4t" || Str == "thumbv4t")
     return "armv4t";
-  if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5" || Str == "thumbv5e")
+  if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
+      || Str == "thumbv5e")
     return "armv5";
   if (Str == "armv6" || Str == "thumbv6")
     return "armv6";
   if (Str == "armv7" || Str == "thumbv7")
     return "armv7";
+  if (Str == "ptx")
+    return "ptx";
   return NULL;
 }
 
 //
 
 Triple::ArchType Triple::ParseArch(StringRef ArchName) {
-  if (ArchName.size() == 4 && ArchName[0] == 'i' && 
-      ArchName[2] == '8' && ArchName[3] == '6' && 
+  if (ArchName.size() == 4 && ArchName[0] == 'i' &&
+      ArchName[2] == '8' && ArchName[3] == '6' &&
       ArchName[1] - '3' < 6) // i[3-9]86
     return x86;
   else if (ArchName == "amd64" || ArchName == "x86_64")
     return x86_64;
   else if (ArchName == "bfin")
     return bfin;
-  else if (ArchName == "pic16")
-    return pic16;
   else if (ArchName == "powerpc")
     return ppc;
   else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
@@ -266,6 +285,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
     return tce;
   else if (ArchName == "xcore")
     return xcore;
+  else if (ArchName == "ptx")
+    return ptx;
   else
     return UnknownArch;
 }
@@ -296,8 +317,6 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
     return Lv2;
   else if (OSName.startswith("mingw32"))
     return MinGW32;
-  else if (OSName.startswith("mingw64"))
-    return MinGW64;
   else if (OSName.startswith("netbsd"))
     return NetBSD;
   else if (OSName.startswith("openbsd"))
@@ -316,12 +335,26 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
     return UnknownOS;
 }
 
+Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
+  if (EnvironmentName.startswith("eabi"))
+    return EABI;
+  else if (EnvironmentName.startswith("gnueabi"))
+    return GNUEABI;
+  else if (EnvironmentName.startswith("gnu"))
+    return GNU;
+  else if (EnvironmentName.startswith("macho"))
+    return MachO;
+  else
+    return UnknownEnvironment;
+}
+
 void Triple::Parse() const {
   assert(!isInitialized() && "Invalid parse call.");
 
   Arch = ParseArch(getArchName());
   Vendor = ParseVendor(getVendorName());
   OS = ParseOS(getOSName());
+  Environment = ParseEnvironment(getEnvironmentName());
 
   assert(isInitialized() && "Failed to initialize!");
 }
@@ -348,24 +381,28 @@ std::string Triple::normalize(StringRef Str) {
   OSType OS = UnknownOS;
   if (Components.size() > 2)
     OS = ParseOS(Components[2]);
+  EnvironmentType Environment = UnknownEnvironment;
+  if (Components.size() > 3)
+    Environment = ParseEnvironment(Components[3]);
 
   // Note which components are already in their final position.  These will not
   // be moved.
-  bool Found[3];
+  bool Found[4];
   Found[0] = Arch != UnknownArch;
   Found[1] = Vendor != UnknownVendor;
   Found[2] = OS != UnknownOS;
+  Found[3] = Environment != UnknownEnvironment;
 
   // If they are not there already, permute the components into their canonical
   // positions by seeing if they parse as a valid architecture, and if so moving
   // the component to the architecture position etc.
-  for (unsigned Pos = 0; Pos != 3; ++Pos) {
+  for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
     if (Found[Pos])
       continue; // Already in the canonical position.
 
     for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
       // Do not reparse any components that already matched.
-      if (Idx < 3 && Found[Idx])
+      if (Idx < array_lengthof(Found) && Found[Idx])
         continue;
 
       // Does this component parse as valid for the target position?
@@ -386,6 +423,10 @@ std::string Triple::normalize(StringRef Str) {
         OS = ParseOS(Comp);
         Valid = OS != UnknownOS;
         break;
+      case 3:
+        Environment = ParseEnvironment(Comp);
+        Valid = Environment != UnknownEnvironment;
+        break;
       }
       if (!Valid)
         continue; // Nope, try the next component.
@@ -404,7 +445,7 @@ std::string Triple::normalize(StringRef Str) {
         // components to the right.
         for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
           // Skip over any fixed components.
-          while (i < 3 && Found[i]) ++i;
+          while (i < array_lengthof(Found) && Found[i]) ++i;
           // Place the component at the new position, getting the component
           // that was at this position - it will be moved right.
           std::swap(CurrentComponent, Components[i]);
@@ -416,22 +457,23 @@ std::string Triple::normalize(StringRef Str) {
         do {
           // Insert one empty component at Idx.
           StringRef CurrentComponent(""); // The empty component.
-          for (unsigned i = Idx; i < Components.size(); ++i) {
-            // Skip over any fixed components.
-            while (i < 3 && Found[i]) ++i;
+          for (unsigned i = Idx; i < Components.size();) {
             // Place the component at the new position, getting the component
             // that was at this position - it will be moved right.
             std::swap(CurrentComponent, Components[i]);
             // If it was placed on top of an empty component then we are done.
             if (CurrentComponent.empty())
               break;
+            // Advance to the next component, skipping any fixed components.
+            while (++i < array_lengthof(Found) && Found[i])
+              ;
           }
           // The last component was pushed off the end - append it.
           if (!CurrentComponent.empty())
             Components.push_back(CurrentComponent);
 
           // Advance Idx to the component's new position.
-          while (++Idx < 3 && Found[Idx]) {}
+          while (++Idx < array_lengthof(Found) && Found[Idx]) {}
         } while (Idx < Pos); // Add more until the final position is reached.
       }
       assert(Pos < Components.size() && Components[Pos] == Comp &&
@@ -482,17 +524,17 @@ StringRef Triple::getOSAndEnvironmentName() const {
 static unsigned EatNumber(StringRef &Str) {
   assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
   unsigned Result = Str[0]-'0';
-  
+
   // Eat the digit.
   Str = Str.substr(1);
-  
+
   // Handle "darwin11".
   if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') {
     Result = Result*10 + (Str[0] - '0');
     // Eat the digit.
     Str = Str.substr(1);
   }
-  
+
   return Result;
 }
 
@@ -505,10 +547,10 @@ void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
   assert(getOS() == Darwin && "Not a darwin target triple!");
   StringRef OSName = getOSName();
   assert(OSName.startswith("darwin") && "Unknown darwin target triple!");
-  
+
   // Strip off "darwin".
   OSName = OSName.substr(6);
-  
+
   Maj = Min = Revision = 0;
 
   if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
@@ -517,27 +559,27 @@ void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
   // The major version is the first digit.
   Maj = EatNumber(OSName);
   if (OSName.empty()) return;
-  
+
   // Handle minor version: 10.4.9 -> darwin8.9.
   if (OSName[0] != '.')
     return;
-  
+
   // Eat the '.'.
   OSName = OSName.substr(1);
 
   if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
     return;
-  
+
   Min = EatNumber(OSName);
   if (OSName.empty()) return;
 
   // Handle revision darwin8.9.1
   if (OSName[0] != '.')
     return;
-  
+
   // Eat the '.'.
   OSName = OSName.substr(1);
-  
+
   if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
     return;
 
@@ -561,6 +603,10 @@ void Triple::setOS(OSType Kind) {
   setOSName(getOSTypeName(Kind));
 }
 
+void Triple::setEnvironment(EnvironmentType Kind) {
+  setEnvironmentName(getEnvironmentTypeName(Kind));
+}
+
 void Triple::setArchName(StringRef Str) {
   // Work around a miscompilation bug for Twines in gcc 4.0.3.
   SmallString<64> Triple;
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index b3ea0132e4ac..75cea2961a9d 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -30,22 +30,42 @@ StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
   return StringRef(Out.data(), Out.size());
 }
 
-void Twine::printOneChild(raw_ostream &OS, const void *Ptr, 
+StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
+  if (isUnary()) {
+    switch (getLHSKind()) {
+    case CStringKind:
+      // Already null terminated, yay!
+      return StringRef(static_cast<const char*>(LHS));
+    case StdStringKind: {
+        const std::string *str = static_cast<const std::string*>(LHS);
+        return StringRef(str->c_str(), str->size());
+      }
+    default:
+      break;
+    }
+  }
+  toVector(Out);
+  Out.push_back(0);
+  Out.pop_back();
+  return StringRef(Out.data(), Out.size());
+}
+
+void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
                           NodeKind Kind) const {
   switch (Kind) {
   case Twine::NullKind: break;
   case Twine::EmptyKind: break;
   case Twine::TwineKind:
-    static_cast<const Twine*>(Ptr)->print(OS); 
+    static_cast<const Twine*>(Ptr)->print(OS);
     break;
-  case Twine::CStringKind: 
-    OS << static_cast<const char*>(Ptr); 
+  case Twine::CStringKind:
+    OS << static_cast<const char*>(Ptr);
     break;
   case Twine::StdStringKind:
-    OS << *static_cast<const std::string*>(Ptr); 
+    OS << *static_cast<const std::string*>(Ptr);
     break;
   case Twine::StringRefKind:
-    OS << *static_cast<const StringRef*>(Ptr); 
+    OS << *static_cast<const StringRef*>(Ptr);
     break;
   case Twine::DecUIKind:
     OS << (unsigned)(uintptr_t)Ptr;
@@ -71,7 +91,7 @@ void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
   }
 }
 
-void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, 
+void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
                               NodeKind Kind) const {
   switch (Kind) {
   case Twine::NullKind:
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
new file mode 100644
index 000000000000..ed74b6759901
--- /dev/null
+++ b/lib/Support/Unix/Host.inc
@@ -0,0 +1,97 @@
+ //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Host support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "Unix.h"
+#include <sys/utsname.h>
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+
+static std::string getOSVersion() {
+  struct utsname info;
+
+  if (uname(&info))
+    return "";
+
+  return info.release;
+}
+
+std::string sys::getHostTriple() {
+  // FIXME: Derive directly instead of relying on the autoconf generated
+  // variable.
+
+  StringRef HostTripleString(LLVM_HOSTTRIPLE);
+  std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
+
+  // Normalize the arch, since the host triple may not actually match the host.
+  std::string Arch = ArchSplit.first;
+
+  // It would be nice to do this in terms of llvm::Triple, but that is in
+  // Support which is layered above us.
+#if defined(__x86_64__)
+  Arch = "x86_64";
+#elif defined(__i386__)
+  Arch = "i386";
+#elif defined(__ppc64__)
+  Arch = "powerpc64";
+#elif defined(__ppc__)
+  Arch = "powerpc";
+#elif defined(__arm__)
+
+  // FIXME: We need to pick the right ARM triple (which involves querying the
+  // chip). However, for now this is most important for LLVM arch selection, so
+  // we only need to make sure to distinguish ARM and Thumb.
+#  if defined(__thumb__)
+  Arch = "thumb";
+#  else
+  Arch = "arm";
+#  endif
+
+#else
+
+  // FIXME: When enough auto-detection is in place, this should just
+  // #error. Then at least the arch selection is done, and we only need the OS
+  // etc selection to kill off the use of LLVM_HOSTTRIPLE.
+
+#endif
+
+  std::string Triple(Arch);
+  Triple += '-';
+  Triple += ArchSplit.second;
+
+  // Force i<N>86 to i386.
+  if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+      Triple[2] == '8' && Triple[3] == '6')
+    Triple[1] = '3';
+
+  // On darwin, we want to update the version to match that of the
+  // host.
+  std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+  if (DarwinDashIdx != std::string::npos) {
+    Triple.resize(DarwinDashIdx + strlen("-darwin"));
+
+    // Only add the major part of the os version.
+    std::string Version = getOSVersion();
+    Triple += Version.substr(0, Version.find('.'));
+  }
+
+  return Triple;
+}
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
new file mode 100644
index 000000000000..4312d67183c4
--- /dev/null
+++ b/lib/Support/Unix/Memory.inc
@@ -0,0 +1,151 @@
+//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some functions for various memory management utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
+
+/// AllocateRWX - Allocate a slab of memory with read/write/execute
+/// permissions.  This is typically used for JIT applications where we want
+/// to emit code to the memory then jump to it.  Getting this type of memory
+/// is very OS specific.
+///
+llvm::sys::MemoryBlock
+llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
+                               std::string *ErrMsg) {
+  if (NumBytes == 0) return MemoryBlock();
+
+  size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+  int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+  static int zero_fd = open("/dev/zero", O_RDWR);
+  if (zero_fd == -1) {
+    MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
+    return MemoryBlock();
+  }
+  fd = zero_fd;
+#endif
+
+  int flags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+  MAP_ANONYMOUS
+#else
+  MAP_ANON
+#endif
+  ;
+
+  void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+                            NearBlock->size() : 0;
+
+#if defined(__APPLE__) && defined(__arm__)
+  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
+                    flags, fd, 0);
+#else
+  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+                    flags, fd, 0);
+#endif
+  if (pa == MAP_FAILED) {
+    if (NearBlock) //Try again without a near hint
+      return AllocateRWX(NumBytes, 0);
+
+    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
+    return MemoryBlock();
+  }
+
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+                                (vm_size_t)(pageSize*NumPages), 0,
+                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  if (KERN_SUCCESS != kr) {
+    MakeErrMsg(ErrMsg, "vm_protect max RX failed");
+    return sys::MemoryBlock();
+  }
+
+  kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+                  (vm_size_t)(pageSize*NumPages), 0,
+                  VM_PROT_READ | VM_PROT_WRITE);
+  if (KERN_SUCCESS != kr) {
+    MakeErrMsg(ErrMsg, "vm_protect RW failed");
+    return sys::MemoryBlock();
+  }
+#endif
+
+  MemoryBlock result;
+  result.Address = pa;
+  result.Size = NumPages*pageSize;
+
+  return result;
+}
+
+bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  if (M.Address == 0 || M.Size == 0) return false;
+  if (0 != ::munmap(M.Address, M.Size))
+    return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
+  return false;
+}
+
+bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+  if (M.Address == 0 || M.Size == 0) return false;
+  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+  if (M.Address == 0 || M.Size == 0) return false;
+  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  return KERN_SUCCESS == kr;
+#else
+  return false;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+                                (vm_size_t)Size, 0,
+                                VM_PROT_READ | VM_PROT_WRITE);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+                                (vm_size_t)Size, 0,
+                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
diff --git a/lib/Support/Unix/Mutex.inc b/lib/Support/Unix/Mutex.inc
new file mode 100644
index 000000000000..fe6b17041457
--- /dev/null
+++ b/lib/Support/Unix/Mutex.inc
@@ -0,0 +1,43 @@
+//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm
+{
+using namespace sys;
+
+MutexImpl::MutexImpl( bool recursive)
+{
+}
+
+MutexImpl::~MutexImpl()
+{
+}
+
+bool
+MutexImpl::release()
+{
+  return true;
+}
+
+bool
+MutexImpl::tryacquire( void )
+{
+  return true;
+}
+
+}
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
new file mode 100644
index 000000000000..0f6e800505e1
--- /dev/null
+++ b/lib/Support/Unix/Path.inc
@@ -0,0 +1,887 @@
+//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_UTIME_H
+#include <utime.h>
+#endif
+#if HAVE_TIME_H
+#include <time.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+// Put in a hack for Cygwin which falsely reports that the mkdtemp function
+// is available when it is not.
+#ifdef __CYGWIN__
+# undef HAVE_MKDTEMP
+#endif
+
+namespace {
+inline bool lastIsSlash(const std::string& path) {
+  return !path.empty() && path[path.length() - 1] == '/';
+}
+
+}
+
+namespace llvm {
+using namespace sys;
+
+const char sys::PathSeparator = ':';
+
+StringRef Path::GetEXESuffix() {
+  return StringRef();
+}
+
+Path::Path(StringRef p)
+  : path(p) {}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+  : path(StrStart, StrLen) {}
+
+Path&
+Path::operator=(StringRef that) {
+  path.assign(that.data(), that.size());
+  return *this;
+}
+
+bool
+Path::isValid() const {
+  // Empty paths are considered invalid here.
+  // This code doesn't check MAXPATHLEN because there's no need. Nothing in
+  // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't
+  // handle names longer than some limit, it'll report this on demand using
+  // ENAMETOLONG.
+  return !path.empty();
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  if (NameLen == 0)
+    return false;
+  return NameStart[0] == '/';
+}
+
+bool
+Path::isAbsolute() const {
+  if (path.empty())
+    return false;
+  return path[0] == '/';
+}
+
+Path
+Path::GetRootDirectory() {
+  Path result;
+  result.set("/");
+  return result;
+}
+
+Path
+Path::GetTemporaryDirectory(std::string *ErrMsg) {
+#if defined(HAVE_MKDTEMP)
+  // The best way is with mkdtemp but that's not available on many systems,
+  // Linux and FreeBSD have it. Others probably won't.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  if (0 == mkdtemp(pathname)) {
+    MakeErrMsg(ErrMsg,
+               std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#elif defined(HAVE_MKSTEMP)
+  // If no mkdtemp is available, mkstemp can be used to create a temporary file
+  // which is then removed and created as a directory. We prefer this over
+  // mktemp because of mktemp's inherent security and threading risks. We still
+  // have a slight race condition from the time the temporary file is created to
+  // the time it is re-created as a directoy.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  int fd = 0;
+  if (-1 == (fd = mkstemp(pathname))) {
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  ::close(fd);
+  ::unlink(pathname); // start race condition, ignore errors
+  if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#elif defined(HAVE_MKTEMP)
+  // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
+  // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
+  // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
+  // the XXXXXX with the pid of the process and a letter. That leads to only
+  // twenty six temporary files that can be generated.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  char *TmpName = ::mktemp(pathname);
+  if (TmpName == 0) {
+    MakeErrMsg(ErrMsg,
+      std::string(TmpName) + ": can't create unique directory name");
+    return Path();
+  }
+  if (-1 == ::mkdir(TmpName, S_IRWXU)) {
+    MakeErrMsg(ErrMsg,
+        std::string(TmpName) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(TmpName);
+#else
+  // This is the worst case implementation. tempnam(3) leaks memory unless its
+  // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
+  // issues. The mktemp(3) function doesn't have enough variability in the
+  // temporary name generated. So, we provide our own implementation that
+  // increments an integer from a random number seeded by the current time. This
+  // should be sufficiently unique that we don't have many collisions between
+  // processes. Generally LLVM processes don't run very long and don't use very
+  // many temporary files so this shouldn't be a big issue for LLVM.
+  static time_t num = ::time(0);
+  char pathname[MAXPATHLEN];
+  do {
+    num++;
+    sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
+  } while ( 0 == access(pathname, F_OK ) );
+  if (-1 == ::mkdir(pathname, S_IRWXU)) {
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#endif
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+#ifdef LTDL_SHLIBPATH_VAR
+  char* env_var = getenv(LTDL_SHLIBPATH_VAR);
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#endif
+  // FIXME: Should this look at LD_LIBRARY_PATH too?
+  Paths.push_back(sys::Path("/usr/local/lib/"));
+  Paths.push_back(sys::Path("/usr/X11R6/lib/"));
+  Paths.push_back(sys::Path("/usr/lib/"));
+  Paths.push_back(sys::Path("/lib/"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#ifdef LLVM_LIBDIR
+  {
+    Path tmpPath;
+    if (tmpPath.set(LLVM_LIBDIR))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+  }
+#endif
+  GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+  return Path("/etc/llvm/");
+}
+
+Path
+Path::GetUserHomeDirectory() {
+  const char* home = getenv("HOME");
+  Path result;
+  if (home && result.set(home))
+    return result;
+  result.set("/");
+  return result;
+}
+
+Path
+Path::GetCurrentDirectory() {
+  char pathname[MAXPATHLEN];
+  if (!getcwd(pathname,MAXPATHLEN)) {
+    assert (false && "Could not query current working directory.");
+    return Path();
+  }
+
+  return Path(pathname);
+}
+
+#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+    defined(__OpenBSD__) || defined(__minix)
+static int
+test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
+    const char *dir, const char *bin)
+{
+  struct stat sb;
+
+  snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
+  if (realpath(buf, ret) == NULL)
+    return (1);
+  if (stat(buf, &sb) != 0)
+    return (1);
+
+  return (0);
+}
+
+static char *
+getprogpath(char ret[PATH_MAX], const char *bin)
+{
+  char *pv, *s, *t, buf[PATH_MAX];
+
+  /* First approach: absolute path. */
+  if (bin[0] == '/') {
+    if (test_dir(buf, ret, "/", bin) == 0)
+      return (ret);
+    return (NULL);
+  }
+
+  /* Second approach: relative path. */
+  if (strchr(bin, '/') != NULL) {
+    if (getcwd(buf, PATH_MAX) == NULL)
+      return (NULL);
+    if (test_dir(buf, ret, buf, bin) == 0)
+      return (ret);
+    return (NULL);
+  }
+
+  /* Third approach: $PATH */
+  if ((pv = getenv("PATH")) == NULL)
+    return (NULL);
+  s = pv = strdup(pv);
+  if (pv == NULL)
+    return (NULL);
+  while ((t = strsep(&s, ":")) != NULL) {
+    if (test_dir(buf, ret, t, bin) == 0) {
+      free(pv);
+      return (ret);
+    }
+  }
+  free(pv);
+  return (NULL);
+}
+#endif // __FreeBSD__ || __NetBSD__
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+#if defined(__APPLE__)
+  // On OS X the executable path is saved to the stack by dyld. Reading it
+  // from there is much faster than calling dladdr, especially for large
+  // binaries with symbols.
+  char exe_path[MAXPATHLEN];
+  uint32_t size = sizeof(exe_path);
+  if (_NSGetExecutablePath(exe_path, &size) == 0) {
+    char link_path[MAXPATHLEN];
+    if (realpath(exe_path, link_path))
+      return Path(link_path);
+  }
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+      defined(__OpenBSD__) || defined(__minix)
+  char exe_path[PATH_MAX];
+
+  if (getprogpath(exe_path, argv0) != NULL)
+    return Path(exe_path);
+#elif defined(__linux__) || defined(__CYGWIN__)
+  char exe_path[MAXPATHLEN];
+  ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
+  if (len >= 0)
+    return Path(StringRef(exe_path, len));
+#elif defined(HAVE_DLFCN_H)
+  // Use dladdr to get executable path if available.
+  Dl_info DLInfo;
+  int err = dladdr(MainAddr, &DLInfo);
+  if (err == 0)
+    return Path();
+
+  // If the filename is a symlink, we need to resolve and return the location of
+  // the actual executable.
+  char link_path[MAXPATHLEN];
+  if (realpath(DLInfo.dli_fname, link_path))
+    return Path(link_path);
+#else
+#error GetMainExecutable is not implemented on this host yet.
+#endif
+  return Path();
+}
+
+
+StringRef Path::getDirname() const {
+  return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+  // Find the last slash
+  std::string::size_type slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  std::string::size_type dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef(path).substr(slash);
+  else
+    return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+  // Find the last slash
+  std::string::size_type slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  std::string::size_type dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef();
+  else
+    return StringRef(path).substr(dot + 1);
+}
+
+bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
+  assert(len < 1024 && "Request for magic string too long");
+  char Buf[1025];
+  int fd = ::open(path.c_str(), O_RDONLY);
+  if (fd < 0)
+    return false;
+  ssize_t bytes_read = ::read(fd, Buf, len);
+  ::close(fd);
+  if (ssize_t(len) != bytes_read)
+    return false;
+  Magic.assign(Buf, len);
+  return true;
+}
+
+bool
+Path::exists() const {
+  return 0 == access(path.c_str(), F_OK );
+}
+
+bool
+Path::isDirectory() const {
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+  return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false;
+}
+
+bool
+Path::isSymLink() const {
+  struct stat buf;
+  if (0 != lstat(path.c_str(), &buf))
+    return false;
+  return S_ISLNK(buf.st_mode);
+}
+
+
+bool
+Path::canRead() const {
+  return 0 == access(path.c_str(), R_OK);
+}
+
+bool
+Path::canWrite() const {
+  return 0 == access(path.c_str(), W_OK);
+}
+
+bool
+Path::isRegularFile() const {
+  // Get the status so we can determine if it's a file or directory
+  struct stat buf;
+
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+
+  if (S_ISREG(buf.st_mode))
+    return true;
+
+  return false;
+}
+
+bool
+Path::canExecute() const {
+  if (0 != access(path.c_str(), R_OK | X_OK ))
+    return false;
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+  if (!S_ISREG(buf.st_mode))
+    return false;
+  return true;
+}
+
+StringRef
+Path::getLast() const {
+  // Find the last slash
+  size_t pos = path.rfind('/');
+
+  // Handle the corner cases
+  if (pos == std::string::npos)
+    return path;
+
+  // If the last character is a slash
+  if (pos == path.length()-1) {
+    // Find the second to last slash
+    size_t pos2 = path.rfind('/', pos-1);
+    if (pos2 == std::string::npos)
+      return StringRef(path).substr(0,pos);
+    else
+      return StringRef(path).substr(pos2+1,pos-pos2-1);
+  }
+  // Return everything after the last slash
+  return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+  if (!fsIsValid || update) {
+    struct stat buf;
+    if (0 != stat(path.c_str(), &buf)) {
+      MakeErrMsg(ErrStr, path + ": can't get status of file");
+      return 0;
+    }
+    status.fileSize = buf.st_size;
+    status.modTime.fromEpochTime(buf.st_mtime);
+    status.mode = buf.st_mode;
+    status.user = buf.st_uid;
+    status.group = buf.st_gid;
+    status.uniqueID = uint64_t(buf.st_ino);
+    status.isDir  = S_ISDIR(buf.st_mode);
+    status.isFile = S_ISREG(buf.st_mode);
+    fsIsValid = true;
+  }
+  return &status;
+}
+
+static bool AddPermissionBits(const Path &File, int bits) {
+  // Get the umask value from the operating system.  We want to use it
+  // when changing the file's permissions. Since calling umask() sets
+  // the umask and returns its old value, we must call it a second
+  // time to reset it to the user's preference.
+  int mask = umask(0777); // The arg. to umask is arbitrary.
+  umask(mask);            // Restore the umask.
+
+  // Get the file's current mode.
+  struct stat buf;
+  if (0 != stat(File.c_str(), &buf))
+    return false;
+  // Change the file to have whichever permissions bits from 'bits'
+  // that the umask would not disable.
+  if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
+      return false;
+  return true;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0444))
+    return MakeErrMsg(ErrMsg, path + ": can't make file readable");
+  return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0222))
+    return MakeErrMsg(ErrMsg, path + ": can't make file writable");
+  return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0111))
+    return MakeErrMsg(ErrMsg, path + ": can't make file executable");
+  return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+  DIR* direntries = ::opendir(path.c_str());
+  if (direntries == 0)
+    return MakeErrMsg(ErrMsg, path + ": can't open directory");
+
+  std::string dirPath = path;
+  if (!lastIsSlash(dirPath))
+    dirPath += '/';
+
+  result.clear();
+  struct dirent* de = ::readdir(direntries);
+  for ( ; de != 0; de = ::readdir(direntries)) {
+    if (de->d_name[0] != '.') {
+      Path aPath(dirPath + (const char*)de->d_name);
+      struct stat st;
+      if (0 != lstat(aPath.path.c_str(), &st)) {
+        if (S_ISLNK(st.st_mode))
+          continue; // dangling symlink -- ignore
+        return MakeErrMsg(ErrMsg,
+                          aPath.path +  ": can't determine file object type");
+      }
+      result.insert(aPath);
+    }
+  }
+
+  closedir(direntries);
+  return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+  if (a_path.empty())
+    return false;
+  path = a_path;
+  return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+  if (name.empty())
+    return false;
+  if (!lastIsSlash(path))
+    path += '/';
+  path += name;
+  return true;
+}
+
+bool
+Path::eraseComponent() {
+  size_t slashpos = path.rfind('/',path.size());
+  if (slashpos == 0 || slashpos == std::string::npos) {
+    path.erase();
+    return true;
+  }
+  if (slashpos == path.size() - 1)
+    slashpos = path.rfind('/',slashpos-1);
+  if (slashpos == std::string::npos) {
+    path.erase();
+    return true;
+  }
+  path.erase(slashpos);
+  return true;
+}
+
+bool
+Path::eraseSuffix() {
+  size_t dotpos = path.rfind('.',path.size());
+  size_t slashpos = path.rfind('/',path.size());
+  if (dotpos != std::string::npos) {
+    if (slashpos == std::string::npos || dotpos > slashpos+1) {
+      path.erase(dotpos, path.size()-dotpos);
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
+
+  if (access(beg, R_OK | W_OK) == 0)
+    return false;
+
+  if (create_parents) {
+
+    char* c = end;
+
+    for (; c != beg; --c)
+      if (*c == '/') {
+
+        // Recurse to handling the parent directory.
+        *c = '\0';
+        bool x = createDirectoryHelper(beg, c, create_parents);
+        *c = '/';
+
+        // Return if we encountered an error.
+        if (x)
+          return true;
+
+        break;
+      }
+  }
+
+  return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+}
+
+bool
+Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
+  // Get a writeable copy of the path name
+  std::string pathname(path);
+
+  // Null-terminate the last component
+  size_t lastchar = path.length() - 1 ;
+
+  if (pathname[lastchar] != '/')
+    ++lastchar;
+
+  pathname[lastchar] = '\0';
+
+  if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents))
+    return MakeErrMsg(ErrMsg, pathname + ": can't create directory");
+
+  return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+  // Create the file
+  int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
+  if (fd < 0)
+    return MakeErrMsg(ErrMsg, path + ": can't create file");
+  ::close(fd);
+  return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+  // Make this into a unique file name
+  if (makeUnique( reuse_current, ErrMsg ))
+    return true;
+
+  // create the file
+  int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+  if (fd < 0)
+    return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
+  ::close(fd);
+  return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+  // Get the status so we can determine if it's a file or directory.
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf)) {
+    MakeErrMsg(ErrStr, path + ": can't get status of file");
+    return true;
+  }
+
+  // Note: this check catches strange situations. In all cases, LLVM should
+  // only be involved in the creation and deletion of regular files.  This
+  // check ensures that what we're trying to erase is a regular file. It
+  // effectively prevents LLVM from erasing things like /dev/null, any block
+  // special file, or other things that aren't "regular" files.
+  if (S_ISREG(buf.st_mode)) {
+    if (unlink(path.c_str()) != 0)
+      return MakeErrMsg(ErrStr, path + ": can't destroy file");
+    return false;
+  }
+
+  if (!S_ISDIR(buf.st_mode)) {
+    if (ErrStr) *ErrStr = "not a file or directory";
+    return true;
+  }
+
+  if (remove_contents) {
+    // Recursively descend the directory to remove its contents.
+    std::string cmd = "/bin/rm -rf " + path;
+    if (system(cmd.c_str()) != 0) {
+      MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
+      return true;
+    }
+    return false;
+  }
+
+  // Otherwise, try to just remove the one directory.
+  std::string pathname(path);
+  size_t lastchar = path.length() - 1;
+  if (pathname[lastchar] == '/')
+    pathname[lastchar] = '\0';
+  else
+    pathname[lastchar+1] = '\0';
+
+  if (rmdir(pathname.c_str()) != 0)
+    return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
+  return false;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+  if (0 != ::rename(path.c_str(), newName.c_str()))
+    return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
+               newName.str() + "'");
+  return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+  struct utimbuf utb;
+  utb.actime = si.modTime.toPosixTime();
+  utb.modtime = utb.actime;
+  if (0 != ::utime(path.c_str(),&utb))
+    return MakeErrMsg(ErrStr, path + ": can't set file modification time");
+  if (0 != ::chmod(path.c_str(),si.mode))
+    return MakeErrMsg(ErrStr, path + ": can't set mode");
+  return false;
+}
+
+bool
+sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
+  int inFile = -1;
+  int outFile = -1;
+  inFile = ::open(Src.c_str(), O_RDONLY);
+  if (inFile == -1)
+    return MakeErrMsg(ErrMsg, Src.str() +
+      ": can't open source file to copy");
+
+  outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
+  if (outFile == -1) {
+    ::close(inFile);
+    return MakeErrMsg(ErrMsg, Dest.str() +
+      ": can't create destination file for copy");
+  }
+
+  char Buffer[16*1024];
+  while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
+    if (Amt == -1) {
+      if (errno != EINTR && errno != EAGAIN) {
+        ::close(inFile);
+        ::close(outFile);
+        return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
+      }
+    } else {
+      char *BufPtr = Buffer;
+      while (Amt) {
+        ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
+        if (AmtWritten == -1) {
+          if (errno != EINTR && errno != EAGAIN) {
+            ::close(inFile);
+            ::close(outFile);
+            return MakeErrMsg(ErrMsg, Dest.str() +
+              ": can't write destination file");
+          }
+        } else {
+          Amt -= AmtWritten;
+          BufPtr += AmtWritten;
+        }
+      }
+    }
+  }
+  ::close(inFile);
+  ::close(outFile);
+  return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+  bool Exists;
+  if (reuse_current && (fs::exists(path, Exists) || !Exists))
+    return false; // File doesn't exist already, just use it!
+
+  // Append an XXXXXX pattern to the end of the file for use with mkstemp,
+  // mktemp or our own implementation.
+  // This uses std::vector instead of SmallVector to avoid a dependence on
+  // libSupport. And performance isn't critical here.
+  std::vector<char> Buf;
+  Buf.resize(path.size()+8);
+  char *FNBuffer = &Buf[0];
+    path.copy(FNBuffer,path.size());
+  bool isdir;
+  if (!fs::is_directory(path, isdir) && isdir)
+    strcpy(FNBuffer+path.size(), "/XXXXXX");
+  else
+    strcpy(FNBuffer+path.size(), "-XXXXXX");
+
+#if defined(HAVE_MKSTEMP)
+  int TempFD;
+  if ((TempFD = mkstemp(FNBuffer)) == -1)
+    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+  // We don't need to hold the temp file descriptor... we will trust that no one
+  // will overwrite/delete the file before we can open it again.
+  close(TempFD);
+
+  // Save the name
+  path = FNBuffer;
+#elif defined(HAVE_MKTEMP)
+  // If we don't have mkstemp, use the old and obsolete mktemp function.
+  if (mktemp(FNBuffer) == 0)
+    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+  // Save the name
+  path = FNBuffer;
+#else
+  // Okay, looks like we have to do it all by our lonesome.
+  static unsigned FCounter = 0;
+  // Try to initialize with unique value.
+  if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
+  char* pos = strstr(FNBuffer, "XXXXXX");
+  do {
+    if (++FCounter > 0xFFFFFF) {
+      return MakeErrMsg(ErrMsg,
+        path + ": can't make unique filename: too many files");
+    }
+    sprintf(pos, "%06X", FCounter);
+    path = FNBuffer;
+  } while (exists());
+  // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
+  // LLVM.
+#endif
+  return false;
+}
+
+const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+  int Flags = MAP_PRIVATE;
+#ifdef MAP_FILE
+  Flags |= MAP_FILE;
+#endif
+  void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0);
+  if (BasePtr == MAP_FAILED)
+    return 0;
+  return (const char*)BasePtr;
+}
+
+void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) {
+  ::munmap((void*)BasePtr, FileSize);
+}
+
+} // end llvm namespace
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
new file mode 100644
index 000000000000..03ff28367e44
--- /dev/null
+++ b/lib/Support/Unix/PathV2.inc
@@ -0,0 +1,507 @@
+//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+
+using namespace llvm;
+
+namespace {
+  /// This class automatically closes the given file descriptor when it goes out
+  /// of scope. You can take back explicit ownership of the file descriptor by
+  /// calling take(). The destructor does not verify that close was successful.
+  /// Therefore, never allow this class to call close on a file descriptor that
+  /// has been read from or written to.
+  struct AutoFD {
+    int FileDescriptor;
+
+    AutoFD(int fd) : FileDescriptor(fd) {}
+    ~AutoFD() {
+      if (FileDescriptor >= 0)
+        ::close(FileDescriptor);
+    }
+
+    int take() {
+      int ret = FileDescriptor;
+      FileDescriptor = -1;
+      return ret;
+    }
+
+    operator int() const {return FileDescriptor;}
+  };
+
+  error_code TempDir(SmallVectorImpl<char> &result) {
+    // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
+    const char *dir = 0;
+    (dir = std::getenv("TMPDIR" )) ||
+    (dir = std::getenv("TMP"    )) ||
+    (dir = std::getenv("TEMP"   )) ||
+    (dir = std::getenv("TEMPDIR")) ||
+#ifdef P_tmpdir
+    (dir = P_tmpdir) ||
+#endif
+    (dir = "/tmp");
+
+    result.clear();
+    StringRef d(dir);
+    result.append(d.begin(), d.end());
+    return success;
+  }
+}
+
+namespace llvm {
+namespace sys  {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+  result.reserve(MAXPATHLEN);
+
+  while (true) {
+    if (::getcwd(result.data(), result.capacity()) == 0) {
+      // See if there was a real error.
+      if (errno != errc::not_enough_memory)
+        return error_code(errno, system_category());
+      // Otherwise there just wasn't enough space.
+      result.reserve(result.capacity() * 2);
+    } else
+      break;
+  }
+
+  result.set_size(strlen(result.data()));
+  return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  const size_t buf_sz = 32768;
+  char buffer[buf_sz];
+  int from_file = -1, to_file = -1;
+
+  // Open from.
+  if ((from_file = ::open(f.begin(), O_RDONLY)) < 0)
+    return error_code(errno, system_category());
+  AutoFD from_fd(from_file);
+
+  // Stat from.
+  struct stat from_stat;
+  if (::stat(f.begin(), &from_stat) != 0)
+    return error_code(errno, system_category());
+
+  // Setup to flags.
+  int to_flags = O_CREAT | O_WRONLY;
+  if (copt == copy_option::fail_if_exists)
+    to_flags |= O_EXCL;
+
+  // Open to.
+  if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0)
+    return error_code(errno, system_category());
+  AutoFD to_fd(to_file);
+
+  // Copy!
+  ssize_t sz, sz_read = 1, sz_write;
+  while (sz_read > 0 &&
+         (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) {
+    // Allow for partial writes - see Advanced Unix Programming (2nd Ed.),
+    // Marc Rochkind, Addison-Wesley, 2004, page 94
+    sz_write = 0;
+    do {
+      if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) {
+        sz_read = sz;  // cause read loop termination.
+        break;         // error.
+      }
+      sz_write += sz;
+    } while (sz_write < sz_read);
+  }
+
+  // After all the file operations above the return value of close actually
+  // matters.
+  if (::close(from_fd.take()) < 0) sz_read = -1;
+  if (::close(to_fd.take()) < 0) sz_read = -1;
+
+  // Check for errors.
+  if (sz_read < 0)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+    if (errno != errc::file_exists)
+      return error_code(errno, system_category());
+    existed = true;
+  } else
+    existed = false;
+
+  return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::link(t.begin(), f.begin()) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::symlink(t.begin(), f.begin()) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::remove(p.begin()) == -1) {
+    if (errno != errc::no_such_file_or_directory)
+      return error_code(errno, system_category());
+    existed = false;
+  } else
+    existed = true;
+
+  return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::rename(f.begin(), t.begin()) == -1) {
+    // If it's a cross device link, copy then delete, otherwise return the error
+    if (errno == EXDEV) {
+      if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists))
+        return ec;
+      bool Existed;
+      if (error_code ec = remove(from, Existed))
+        return ec;
+    } else
+      return error_code(errno, system_category());
+  }
+
+  return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::truncate(p.begin(), size) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code exists(const Twine &path, bool &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) == -1) {
+    if (errno != errc::no_such_file_or_directory)
+      return error_code(errno, system_category());
+    result = false;
+  } else
+    result = true;
+
+  return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  // Get arguments.
+  SmallString<128> a_storage;
+  SmallString<128> b_storage;
+  StringRef a = A.toNullTerminatedStringRef(a_storage);
+  StringRef b = B.toNullTerminatedStringRef(b_storage);
+
+  struct stat stat_a, stat_b;
+  int error_b = ::stat(b.begin(), &stat_b);
+  int error_a = ::stat(a.begin(), &stat_a);
+
+  // If both are invalid, it's an error. If only one is, the result is false.
+  if (error_a != 0 || error_b != 0) {
+    if (error_a == error_b)
+      return error_code(errno, system_category());
+    result = false;
+  } else {
+    result =
+      stat_a.st_dev == stat_b.st_dev &&
+      stat_a.st_ino == stat_b.st_ino;
+  }
+
+  return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) == -1)
+    return error_code(errno, system_category());
+  if (!S_ISREG(status.st_mode))
+    return make_error_code(errc::operation_not_permitted);
+
+  result = status.st_size;
+  return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) != 0) {
+    error_code ec(errno, system_category());
+    if (ec == errc::no_such_file_or_directory)
+      result = file_status(file_type::file_not_found);
+    else
+      result = file_status(file_type::status_error);
+    return ec;
+  }
+
+  if (S_ISDIR(status.st_mode))
+    result = file_status(file_type::directory_file);
+  else if (S_ISREG(status.st_mode))
+    result = file_status(file_type::regular_file);
+  else if (S_ISBLK(status.st_mode))
+    result = file_status(file_type::block_file);
+  else if (S_ISCHR(status.st_mode))
+    result = file_status(file_type::character_file);
+  else if (S_ISFIFO(status.st_mode))
+    result = file_status(file_type::fifo_file);
+  else if (S_ISSOCK(status.st_mode))
+    result = file_status(file_type::socket_file);
+  else
+    result = file_status(file_type::type_unknown);
+
+  return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path) {
+  SmallString<128> Model;
+  model.toVector(Model);
+  // Null terminate.
+  Model.c_str();
+
+  // Make model absolute by prepending a temp directory if it's not already.
+  bool absolute = path::is_absolute(Twine(Model));
+  if (!absolute) {
+    SmallString<128> TDir;
+    if (error_code ec = TempDir(TDir)) return ec;
+    path::append(TDir, Twine(Model));
+    Model.swap(TDir);
+  }
+
+  // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+  // needed if the randomly chosen path already exists.
+  SmallString<128> RandomPath;
+  RandomPath.reserve(Model.size() + 1);
+  ::srand(::time(NULL));
+
+retry_random_path:
+  // This is opened here instead of above to make it easier to track when to
+  // close it. Collisions should be rare enough for the possible extra syscalls
+  // not to matter.
+  FILE *RandomSource = ::fopen("/dev/urandom", "r");
+  RandomPath.set_size(0);
+  for (SmallVectorImpl<char>::const_iterator i = Model.begin(),
+                                             e = Model.end(); i != e; ++i) {
+    if (*i == '%') {
+      char val = 0;
+      if (RandomSource)
+        val = fgetc(RandomSource);
+      else
+        val = ::rand();
+      RandomPath.push_back("0123456789abcdef"[val & 15]);
+    } else
+      RandomPath.push_back(*i);
+  }
+
+  if (RandomSource)
+    ::fclose(RandomSource);
+
+  // Try to open + create the file.
+rety_open_create:
+  int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (RandomFD == -1) {
+    // If the file existed, try again, otherwise, error.
+    if (errno == errc::file_exists)
+      goto retry_random_path;
+    // The path prefix doesn't exist.
+    if (errno == errc::no_such_file_or_directory) {
+      StringRef p(RandomPath.begin(), RandomPath.size());
+      SmallString<64> dir_to_create;
+      for (path::const_iterator i = path::begin(p),
+                                e = --path::end(p); i != e; ++i) {
+        path::append(dir_to_create, *i);
+        bool Exists;
+        if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+        if (!Exists) {
+          // Don't try to create network paths.
+          if (i->size() > 2 && (*i)[0] == '/' &&
+                               (*i)[1] == '/' &&
+                               (*i)[2] != '/')
+            return make_error_code(errc::no_such_file_or_directory);
+          if (::mkdir(dir_to_create.c_str(), 0700) == -1)
+            return error_code(errno, system_category());
+        }
+      }
+      goto rety_open_create;
+    }
+    return error_code(errno, system_category());
+  }
+
+   // Make the path absolute.
+  char real_path_buff[PATH_MAX + 1];
+  if (realpath(RandomPath.c_str(), real_path_buff) == NULL) {
+    int error = errno;
+    ::close(RandomFD);
+    ::unlink(RandomPath.c_str());
+    return error_code(error, system_category());
+  }
+
+  result_path.clear();
+  StringRef d(real_path_buff);
+  result_path.append(d.begin(), d.end());
+
+  result_fd = RandomFD;
+  return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+  SmallString<128> path_null(path);
+  DIR *directory = ::opendir(path_null.c_str());
+  if (directory == 0)
+    return error_code(errno, system_category());
+
+  it.IterationHandle = reinterpret_cast<intptr_t>(directory);
+  // Add something for replace_filename to replace.
+  path::append(path_null, ".");
+  it.CurrentEntry = directory_entry(path_null.str());
+  return directory_iterator_increment(it);
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+  if (it.IterationHandle)
+    ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
+  it.IterationHandle = 0;
+  it.CurrentEntry = directory_entry();
+  return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+  errno = 0;
+  dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
+  if (cur_dir == 0 && errno != 0) {
+    return error_code(errno, system_category());
+  } else if (cur_dir != 0) {
+    StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
+    if ((name.size() == 1 && name[0] == '.') ||
+        (name.size() == 2 && name[0] == '.' && name[1] == '.'))
+      return directory_iterator_increment(it);
+    it.CurrentEntry.replace_filename(name);
+  } else
+    return directory_iterator_destruct(it);
+
+  return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result) {
+  SmallString<128> PathStorage;
+  StringRef Path = path.toNullTerminatedStringRef(PathStorage);
+  result.set_size(0);
+
+  // Open path.
+  std::FILE *file = std::fopen(Path.data(), "rb");
+  if (file == 0)
+    return error_code(errno, system_category());
+
+  // Reserve storage.
+  result.reserve(len);
+
+  // Read magic!
+  size_t size = std::fread(result.data(), 1, len, file);
+  if (std::ferror(file) != 0) {
+    std::fclose(file);
+    return error_code(errno, system_category());
+  } else if (size != result.size()) {
+    if (std::feof(file) != 0) {
+      std::fclose(file);
+      result.set_size(size);
+      return make_error_code(errc::value_too_large);
+    }
+  }
+  std::fclose(file);
+  result.set_size(len);
+  return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
new file mode 100644
index 000000000000..5cdb11ccebc4
--- /dev/null
+++ b/lib/Support/Unix/Process.inc
@@ -0,0 +1,295 @@
+//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
+//  Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
+#include <malloc.h>
+#endif
+#ifdef HAVE_MALLOC_MALLOC_H
+#include <malloc/malloc.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#  include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+#  include <termios.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+using namespace sys;
+
+unsigned
+Process::GetPageSize()
+{
+#if defined(__CYGWIN__)
+  // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
+  // memory protection and mmap() is 4k.
+  // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
+  const int page_size = 0x1000;
+#elif defined(HAVE_GETPAGESIZE)
+  const int page_size = ::getpagesize();
+#elif defined(HAVE_SYSCONF)
+  long page_size = ::sysconf(_SC_PAGE_SIZE);
+#else
+#warning Cannot get the page size on this machine
+#endif
+  return static_cast<unsigned>(page_size);
+}
+
+size_t Process::GetMallocUsage() {
+#if defined(HAVE_MALLINFO)
+  struct mallinfo mi;
+  mi = ::mallinfo();
+  return mi.uordblks;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+  malloc_statistics_t Stats;
+  malloc_zone_statistics(malloc_default_zone(), &Stats);
+  return Stats.size_in_use;   // darwin
+#elif defined(HAVE_SBRK)
+  // Note this is only an approximation and more closely resembles
+  // the value returned by mallinfo in the arena field.
+  static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
+  char *EndOfMemory = (char*)sbrk(0);
+  if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
+    return EndOfMemory - StartOfMemory;
+  else
+    return 0;
+#else
+#warning Cannot get malloc info on this platform
+  return 0;
+#endif
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+#if defined(HAVE_MALLINFO)
+  struct mallinfo mi = ::mallinfo();
+  return mi.uordblks + mi.hblkhd;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+  malloc_statistics_t Stats;
+  malloc_zone_statistics(malloc_default_zone(), &Stats);
+  return Stats.size_allocated;   // darwin
+#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
+  struct rusage usage;
+  ::getrusage(RUSAGE_SELF, &usage);
+  return usage.ru_maxrss;
+#else
+#warning Cannot get total memory size on this platform
+  return 0;
+#endif
+}
+
+void
+Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
+                      TimeValue& sys_time)
+{
+  elapsed = TimeValue::now();
+#if defined(HAVE_GETRUSAGE)
+  struct rusage usage;
+  ::getrusage(RUSAGE_SELF, &usage);
+  user_time = TimeValue(
+    static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
+    static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
+      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+  sys_time = TimeValue(
+    static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
+    static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
+      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+#else
+#warning Cannot get usage times on this platform
+  user_time.seconds(0);
+  user_time.microseconds(0);
+  sys_time.seconds(0);
+  sys_time.microseconds(0);
+#endif
+}
+
+int Process::GetCurrentUserId() {
+  return getuid();
+}
+
+int Process::GetCurrentGroupId() {
+  return getgid();
+}
+
+#ifdef HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this function
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+#if HAVE_SETRLIMIT
+  struct rlimit rlim;
+  rlim.rlim_cur = rlim.rlim_max = 0;
+  setrlimit(RLIMIT_CORE, &rlim);
+#endif
+
+#ifdef HAVE_MACH_MACH_H
+  // Disable crash reporting on Mac OS X 10.0-10.4
+
+  // get information about the original set of exception ports for the task
+  mach_msg_type_number_t Count = 0;
+  exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
+  exception_port_t OriginalPorts[EXC_TYPES_COUNT];
+  exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
+  thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
+  kern_return_t err =
+    task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
+                             &Count, OriginalPorts, OriginalBehaviors,
+                             OriginalFlavors);
+  if (err == KERN_SUCCESS) {
+    // replace each with MACH_PORT_NULL.
+    for (unsigned i = 0; i != Count; ++i)
+      task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+                               MACH_PORT_NULL, OriginalBehaviors[i],
+                               OriginalFlavors[i]);
+  }
+
+  // Disable crash reporting on Mac OS X 10.5
+  signal(SIGABRT, _exit);
+  signal(SIGILL,  _exit);
+  signal(SIGFPE,  _exit);
+  signal(SIGSEGV, _exit);
+  signal(SIGBUS,  _exit);
+#endif
+}
+
+bool Process::StandardInIsUserInput() {
+  return FileDescriptorIsDisplayed(STDIN_FILENO);
+}
+
+bool Process::StandardOutIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDOUT_FILENO);
+}
+
+bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDERR_FILENO);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+#if HAVE_ISATTY
+  return isatty(fd);
+#else
+  // If we don't have isatty, just return false.
+  return false;
+#endif
+}
+
+static unsigned getColumns(int FileID) {
+  // If COLUMNS is defined in the environment, wrap to that many columns.
+  if (const char *ColumnsStr = std::getenv("COLUMNS")) {
+    int Columns = std::atoi(ColumnsStr);
+    if (Columns > 0)
+      return Columns;
+  }
+
+  unsigned Columns = 0;
+
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+  // Try to determine the width of the terminal.
+  struct winsize ws;
+  if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
+    Columns = ws.ws_col;
+#endif
+
+  return Columns;
+}
+
+unsigned Process::StandardOutColumns() {
+  if (!StandardOutIsDisplayed())
+    return 0;
+
+  return getColumns(1);
+}
+
+unsigned Process::StandardErrColumns() {
+  if (!StandardErrIsDisplayed())
+    return 0;
+
+  return getColumns(2);
+}
+
+static bool terminalHasColors() {
+  if (const char *term = std::getenv("TERM")) {
+    // Most modern terminals support ANSI escape sequences for colors.
+    // We could check terminfo, or have a list of known terms that support
+    // colors, but that would be overkill.
+    // The user can always ask for no colors by setting TERM to dumb, or
+    // using a commandline flag.
+    return strcmp(term, "dumb") != 0;
+  }
+  return false;
+}
+
+bool Process::StandardOutHasColors() {
+  if (!StandardOutIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::StandardErrHasColors() {
+  if (!StandardErrIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::ColorNeedsFlush() {
+  // No, we use ANSI escape sequences.
+  return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+    COLOR(FGBG, "0", BOLD),\
+    COLOR(FGBG, "1", BOLD),\
+    COLOR(FGBG, "2", BOLD),\
+    COLOR(FGBG, "3", BOLD),\
+    COLOR(FGBG, "4", BOLD),\
+    COLOR(FGBG, "5", BOLD),\
+    COLOR(FGBG, "6", BOLD),\
+    COLOR(FGBG, "7", BOLD)\
+  }
+
+static const char colorcodes[2][2][8][10] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+  return "\033[1m";
+}
+
+const char *Process::ResetColor() {
+  return "\033[0m";
+}
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
new file mode 100644
index 000000000000..1104bc7503e1
--- /dev/null
+++ b/lib/Support/Unix/Program.inc
@@ -0,0 +1,424 @@
+//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Config/config.h>
+#include "llvm/Support/FileSystem.h"
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+  extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {}
+
+unsigned Program::GetPid() const {
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  return static_cast<unsigned>(pid);
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+  // Check some degenerate cases
+  if (progName.length() == 0) // no program
+    return Path();
+  Path temp;
+  if (!temp.set(progName)) // invalid name
+    return Path();
+  // Use the given path verbatim if it contains any slashes; this matches
+  // the behavior of sh(1) and friends.
+  if (progName.find('/') != std::string::npos)
+    return temp;
+
+  // At this point, the file name is valid and does not contain slashes. Search
+  // for it through the directories specified in the PATH environment variable.
+
+  // Get the path. If its empty, we can't do anything to find it.
+  const char *PathStr = getenv("PATH");
+  if (PathStr == 0)
+    return Path();
+
+  // Now we have a colon separated list of directories to search; try them.
+  size_t PathLen = strlen(PathStr);
+  while (PathLen) {
+    // Find the first colon...
+    const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+
+    // Check to see if this first directory contains the executable...
+    Path FilePath;
+    if (FilePath.set(std::string(PathStr,Colon))) {
+      FilePath.appendComponent(progName);
+      if (FilePath.canExecute())
+        return FilePath;                    // Found the executable!
+    }
+
+    // Nope it wasn't in this directory, check the next path in the list!
+    PathLen -= Colon-PathStr;
+    PathStr = Colon;
+
+    // Advance past duplicate colons
+    while (*PathStr == ':') {
+      PathStr++;
+      PathLen--;
+    }
+  }
+  return Path();
+}
+
+static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  // Open the file
+  int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+  if (InFD == -1) {
+    MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
+              + (FD == 0 ? "input" : "output"));
+    return true;
+  }
+
+  // Install it as the requested FD
+  if (dup2(InFD, FD) == -1) {
+    MakeErrMsg(ErrMsg, "Cannot dup2");
+    close(InFD);
+    return true;
+  }
+  close(InFD);      // Close the original FD
+  return false;
+}
+
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+                          posix_spawn_file_actions_t &FileActions) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD,
+                            File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+    return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+  return false;
+}
+#endif
+
+static void TimeOutHandler(int Sig) {
+}
+
+static void SetMemoryLimits (unsigned size)
+{
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
+  struct rlimit r;
+  __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
+
+  // Heap size
+  getrlimit (RLIMIT_DATA, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_DATA, &r);
+#ifdef RLIMIT_RSS
+  // Resident set size.
+  getrlimit (RLIMIT_RSS, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_RSS, &r);
+#endif
+#ifdef RLIMIT_AS  // e.g. NetBSD doesn't have it.
+  // Virtual memory.
+  getrlimit (RLIMIT_AS, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_AS, &r);
+#endif
+#endif
+}
+
+bool
+Program::Execute(const Path &path, const char **args, const char **envp,
+                 const Path **redirects, unsigned memoryLimit,
+                  std::string *ErrMsg) {
+  // If this OS has posix_spawn and there is no memory limit being implied, use
+  // posix_spawn.  It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+  if (memoryLimit == 0) {
+    posix_spawn_file_actions_t FileActions;
+    posix_spawn_file_actions_init(&FileActions);
+
+    if (redirects) {
+      // Redirect stdin/stdout.
+      if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+          RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+        return false;
+      if (redirects[1] == 0 || redirects[2] == 0 ||
+          *redirects[1] != *redirects[2]) {
+        // Just redirect stderr
+        if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+      } else {
+        // If stdout and stderr should go to the same place, redirect stderr
+        // to the FD already open for stdout.
+        if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
+          return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+      }
+    }
+
+    if (!envp)
+#if !defined(__APPLE__)
+      envp = const_cast<const char **>(environ);
+#else
+      // environ is missing in dylibs.
+      envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+    // Explicitly initialized to prevent what appears to be a valgrind false
+    // positive.
+    pid_t PID = 0;
+    int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
+                          const_cast<char **>(args), const_cast<char **>(envp));
+
+    posix_spawn_file_actions_destroy(&FileActions);
+
+    if (Err)
+     return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+
+    Data_ = reinterpret_cast<void*>(PID);
+    return true;
+  }
+#endif
+
+  // Create a child process.
+  int child = fork();
+  switch (child) {
+    // An error occured:  Return to the caller.
+    case -1:
+      MakeErrMsg(ErrMsg, "Couldn't fork");
+      return false;
+
+    // Child process: Execute the program.
+    case 0: {
+      // Redirect file descriptors...
+      if (redirects) {
+        // Redirect stdin
+        if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
+        // Redirect stdout
+        if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
+        if (redirects[1] && redirects[2] &&
+            *(redirects[1]) == *(redirects[2])) {
+          // If stdout and stderr should go to the same place, redirect stderr
+          // to the FD already open for stdout.
+          if (-1 == dup2(1,2)) {
+            MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
+            return false;
+          }
+        } else {
+          // Just redirect stderr
+          if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
+        }
+      }
+
+      // Set memory limits
+      if (memoryLimit!=0) {
+        SetMemoryLimits(memoryLimit);
+      }
+
+      // Execute!
+      if (envp != 0)
+        execve(path.c_str(),
+               const_cast<char **>(args),
+               const_cast<char **>(envp));
+      else
+        execv(path.c_str(),
+              const_cast<char **>(args));
+      // If the execve() failed, we should exit. Follow Unix protocol and
+      // return 127 if the executable was not found, and 126 otherwise.
+      // Use _exit rather than exit so that atexit functions and static
+      // object destructors cloned from the parent process aren't
+      // redundantly run, and so that any data buffered in stdio buffers
+      // cloned from the parent aren't redundantly written out.
+      _exit(errno == ENOENT ? 127 : 126);
+    }
+
+    // Parent process: Break out of the switch to do our processing.
+    default:
+      break;
+  }
+
+  Data_ = reinterpret_cast<void*>(child);
+
+  return true;
+}
+
+int
+Program::Wait(const sys::Path &path,
+              unsigned secondsToWait,
+              std::string* ErrMsg)
+{
+#ifdef HAVE_SYS_WAIT_H
+  struct sigaction Act, Old;
+
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  // Install a timeout handler.  The handler itself does nothing, but the simple
+  // fact of having a handler at all causes the wait below to return with EINTR,
+  // unlike if we used SIG_IGN.
+  if (secondsToWait) {
+    memset(&Act, 0, sizeof(Act));
+    Act.sa_handler = TimeOutHandler;
+    sigemptyset(&Act.sa_mask);
+    sigaction(SIGALRM, &Act, &Old);
+    alarm(secondsToWait);
+  }
+
+  // Parent process: Wait for the child process to terminate.
+  int status;
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  pid_t child = static_cast<pid_t>(pid);
+  while (waitpid(pid, &status, 0) != child)
+    if (secondsToWait && errno == EINTR) {
+      // Kill the child.
+      kill(child, SIGKILL);
+
+      // Turn off the alarm and restore the signal handler
+      alarm(0);
+      sigaction(SIGALRM, &Old, 0);
+
+      // Wait for child to die
+      if (wait(&status) != child)
+        MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+      else
+        MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+      return -1;   // Timeout detected
+    } else if (errno != EINTR) {
+      MakeErrMsg(ErrMsg, "Error waiting for child process");
+      return -1;
+    }
+
+  // We exited normally without timeout, so turn off the timer.
+  if (secondsToWait) {
+    alarm(0);
+    sigaction(SIGALRM, &Old, 0);
+  }
+
+  // Return the proper exit status. Detect error conditions
+  // so we can return -1 for them and set ErrMsg informatively.
+  int result = 0;
+  if (WIFEXITED(status)) {
+    result = WEXITSTATUS(status);
+#ifdef HAVE_POSIX_SPAWN
+    // The posix_spawn child process returns 127 on any kind of error.
+    // Following the POSIX convention for command-line tools (which posix_spawn
+    // itself apparently does not), check to see if the failure was due to some
+    // reason other than the file not existing, and return 126 in this case.
+    bool Exists;
+    if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists)
+      result = 126;
+#endif
+    if (result == 127) {
+      if (ErrMsg)
+        *ErrMsg = llvm::sys::StrError(ENOENT);
+      return -1;
+    }
+    if (result == 126) {
+      if (ErrMsg)
+        *ErrMsg = "Program could not be executed";
+      return -1;
+    }
+  } else if (WIFSIGNALED(status)) {
+    if (ErrMsg) {
+      *ErrMsg = strsignal(WTERMSIG(status));
+#ifdef WCOREDUMP
+      if (WCOREDUMP(status))
+        *ErrMsg += " (core dumped)";
+#endif
+    }
+    return -1;
+  }
+  return result;
+#else
+  if (ErrMsg)
+    *ErrMsg = "Program::Wait is not implemented on this platform yet!";
+  return -1;
+#endif
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
+  pid_t pid = static_cast<pid_t>(pid64);
+
+  if (kill(pid, SIGKILL) != 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+bool Program::ChangeStdoutToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+bool Program::ChangeStderrToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+}
diff --git a/lib/Support/Unix/README.txt b/lib/Support/Unix/README.txt
new file mode 100644
index 000000000000..3d547c2990d5
--- /dev/null
+++ b/lib/Support/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/Support/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix           - only code that is truly generic to all UNIX platforms
+  Posix        - code that is specific to Posix variants of UNIX
+  SUS          - code that is specific to the Single Unix Specification
+  SysV         - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc
new file mode 100644
index 000000000000..40e87ff13111
--- /dev/null
+++ b/lib/Support/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  return true;
+}
+
+}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
new file mode 100644
index 000000000000..0a617591551d
--- /dev/null
+++ b/lib/Support/Unix/Signals.inc
@@ -0,0 +1,303 @@
+//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+#include <algorithm>
+#if HAVE_EXECINFO_H
+# include <execinfo.h>         // For backtrace().
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_DLFCN_H && __GNUG__
+#include <dlfcn.h>
+#include <cxxabi.h>
+#endif
+using namespace llvm;
+
+static RETSIGTYPE SignalHandler(int Sig);  // defined below.
+
+static SmartMutex<true> SignalsMutex;
+
+/// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<sys::Path> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
+
+// IntSigs - Signals that may interrupt the program at any time.
+static const int IntSigs[] = {
+  SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+};
+static const int *const IntSigsEnd =
+  IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
+
+// KillSigs - Signals that are synchronous with the program that will cause it
+// to die.
+static const int KillSigs[] = {
+  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+  , SIGSYS
+#endif
+#ifdef SIGXCPU
+  , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+  , SIGXFSZ
+#endif
+#ifdef SIGEMT
+  , SIGEMT
+#endif
+};
+static const int *const KillSigsEnd =
+  KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
+
+static unsigned NumRegisteredSignals = 0;
+static struct {
+  struct sigaction SA;
+  int SigNo;
+} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+
+
+static void RegisterHandler(int Signal) {
+  assert(NumRegisteredSignals <
+         sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+         "Out of space for signal handlers!");
+
+  struct sigaction NewHandler;
+
+  NewHandler.sa_handler = SignalHandler;
+  NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
+  sigemptyset(&NewHandler.sa_mask);
+
+  // Install the new handler, save the old one in RegisteredSignalInfo.
+  sigaction(Signal, &NewHandler,
+            &RegisteredSignalInfo[NumRegisteredSignals].SA);
+  RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
+  ++NumRegisteredSignals;
+}
+
+static void RegisterHandlers() {
+  // If the handlers are already registered, we're done.
+  if (NumRegisteredSignals != 0) return;
+
+  std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
+  std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+}
+
+static void UnregisterHandlers() {
+  // Restore all of the signal handlers to how they were before we showed up.
+  for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
+    sigaction(RegisteredSignalInfo[i].SigNo,
+              &RegisteredSignalInfo[i].SA, 0);
+  NumRegisteredSignals = 0;
+}
+
+
+/// RemoveFilesToRemove - Process the FilesToRemove list. This function
+/// should be called with the SignalsMutex lock held.
+static void RemoveFilesToRemove() {
+  while (!FilesToRemove.empty()) {
+    FilesToRemove.back().eraseFromDisk(true);
+    FilesToRemove.pop_back();
+  }
+}
+
+// SignalHandler - The signal handler that runs.
+static RETSIGTYPE SignalHandler(int Sig) {
+  // Restore the signal behavior to default, so that the program actually
+  // crashes when we return and the signal reissues.  This also ensures that if
+  // we crash in our signal handler that the program will terminate immediately
+  // instead of recursing in the signal handler.
+  UnregisterHandlers();
+
+  // Unmask all potentially blocked kill signals.
+  sigset_t SigMask;
+  sigfillset(&SigMask);
+  sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+  SignalsMutex.acquire();
+  RemoveFilesToRemove();
+
+  if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
+    if (InterruptFunction) {
+      void (*IF)() = InterruptFunction;
+      SignalsMutex.release();
+      InterruptFunction = 0;
+      IF();        // run the interrupt function.
+      return;
+    }
+
+    SignalsMutex.release();
+    raise(Sig);   // Execute the default handler.
+    return;
+  }
+
+  SignalsMutex.release();
+
+  // Otherwise if it is a fault (like SEGV) run any handler.
+  for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+    CallBacksToRun[i].first(CallBacksToRun[i].second);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+  SignalsMutex.acquire();
+  RemoveFilesToRemove();
+  SignalsMutex.release();
+}
+
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
+  SignalsMutex.acquire();
+  InterruptFunction = IF;
+  SignalsMutex.release();
+  RegisterHandlers();
+}
+
+// RemoveFileOnSignal - The public API
+bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
+                                   std::string* ErrMsg) {
+  SignalsMutex.acquire();
+  FilesToRemove.push_back(Filename);
+
+  SignalsMutex.release();
+
+  RegisterHandlers();
+  return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+  SignalsMutex.acquire();
+  std::vector<sys::Path>::reverse_iterator I =
+    std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
+  if (I != FilesToRemove.rend())
+    FilesToRemove.erase(I.base()-1);
+  SignalsMutex.release();
+}
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process.  The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+  CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
+  RegisterHandlers();
+}
+
+
+// PrintStackTrace - In the case of a program crash or fault, print out a stack
+// trace so that the user has an indication of why and where we died.
+//
+// On glibc systems we have the 'backtrace' function, which works nicely, but
+// doesn't demangle symbols.
+static void PrintStackTrace(void *) {
+#ifdef HAVE_BACKTRACE
+  static void* StackTrace[256];
+  // Use backtrace() to output a backtrace on Linux systems with glibc.
+  int depth = backtrace(StackTrace,
+                        static_cast<int>(array_lengthof(StackTrace)));
+#if HAVE_DLFCN_H && __GNUG__
+  int width = 0;
+  for (int i = 0; i < depth; ++i) {
+    Dl_info dlinfo;
+    dladdr(StackTrace[i], &dlinfo);
+    const char* name = strrchr(dlinfo.dli_fname, '/');
+
+    int nwidth;
+    if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
+    else              nwidth = strlen(name) - 1;
+
+    if (nwidth > width) width = nwidth;
+  }
+
+  for (int i = 0; i < depth; ++i) {
+    Dl_info dlinfo;
+    dladdr(StackTrace[i], &dlinfo);
+
+    fprintf(stderr, "%-2d", i);
+
+    const char* name = strrchr(dlinfo.dli_fname, '/');
+    if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
+    else              fprintf(stderr, " %-*s", width, name+1);
+
+    fprintf(stderr, " %#0*lx",
+            (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+
+    if (dlinfo.dli_sname != NULL) {
+      int res;
+      fputc(' ', stderr);
+      char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+      if (d == NULL) fputs(dlinfo.dli_sname, stderr);
+      else           fputs(d, stderr);
+      free(d);
+
+      fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
+    }
+    fputc('\n', stderr);
+  }
+#else
+  backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+#endif
+#endif
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void llvm::sys::PrintStackTraceOnErrorSignal() {
+  AddSignalHandler(PrintStackTrace, 0);
+}
+
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+int raise(int sig) {
+  return pthread_kill(pthread_self(), sig);
+}
+
+void __assert_rtn(const char *func,
+                  const char *file,
+                  int line,
+                  const char *expr) {
+  if (func)
+    fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+            expr, func, file, line);
+  else
+    fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+            expr, file, line);
+  abort();
+}
+
+#include <signal.h>
+#include <pthread.h>
+
+void abort() {
+  raise(SIGABRT);
+  usleep(1000);
+  __builtin_trap();
+}
+
+#endif
diff --git a/lib/Support/Unix/ThreadLocal.inc b/lib/Support/Unix/ThreadLocal.inc
new file mode 100644
index 000000000000..2b4c9017cd91
--- /dev/null
+++ b/lib/Support/Unix/ThreadLocal.inc
@@ -0,0 +1,26 @@
+//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
+}
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
new file mode 100644
index 000000000000..5cf5a9d44ed6
--- /dev/null
+++ b/lib/Support/Unix/TimeValue.inc
@@ -0,0 +1,56 @@
+//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+
+namespace llvm {
+  using namespace sys;
+
+std::string TimeValue::str() const {
+  char buffer[32];
+
+  time_t ourTime = time_t(this->toEpochTime());
+#ifdef __hpux
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+  asctime_r(localtime(&ourTime), buffer);
+#else
+  ::asctime_r(::localtime(&ourTime), buffer);
+#endif
+
+  std::string result(buffer);
+  return result.substr(0,24);
+}
+
+TimeValue TimeValue::now() {
+  struct timeval the_time;
+  timerclear(&the_time);
+  if (0 != ::gettimeofday(&the_time,0)) {
+    // This is *really* unlikely to occur because the only gettimeofday
+    // errors concern the timezone parameter which we're passing in as 0.
+    // In the unlikely case it does happen, just return MinTime, no error
+    // message needed.
+    return MinTime;
+  }
+
+  return TimeValue(
+    static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
+    static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+      NANOSECONDS_PER_MICROSECOND ) );
+}
+
+}
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
new file mode 100644
index 000000000000..b7be3111d431
--- /dev/null
+++ b/lib/Support/Unix/Unix.h
@@ -0,0 +1,87 @@
+//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Unix implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_UNIX_UNIX_H
+#define LLVM_SYSTEM_UNIX_UNIX_H
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on all UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Support/Errno.h"
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <string>
+#include <algorithm>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+#include <assert.h>
+#endif
+
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+/// This function builds an error message into \p ErrMsg using the \p prefix
+/// string and the Unix error number given by \p errnum. If errnum is -1, the
+/// default then the value of errno is used.
+/// @brief Make an error message
+///
+/// If the error number can be converted to a string, it will be
+/// separated from prefix by ": ".
+static inline bool MakeErrMsg(
+  std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
+  if (!ErrMsg)
+    return true;
+  if (errnum == -1)
+    errnum = errno;
+  *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
+  return true;
+}
+
+#endif
diff --git a/lib/Support/Unix/system_error.inc b/lib/Support/Unix/system_error.inc
new file mode 100644
index 000000000000..681e919edb4e
--- /dev/null
+++ b/lib/Support/Unix/system_error.inc
@@ -0,0 +1,34 @@
+//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+  return _do_message::message(ev);
+}
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+#ifdef ELAST
+  if (ev > ELAST)
+    return error_condition(ev, system_category());
+#endif  // ELAST
+  return error_condition(ev, generic_category());
+}
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
new file mode 100644
index 000000000000..703448524ed9
--- /dev/null
+++ b/lib/Support/Valgrind.cpp
@@ -0,0 +1,54 @@
+//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
+//  defined.  If we have valgrind.h but valgrind isn't running, its macros are
+//  no-ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+
+static bool InitNotUnderValgrind() {
+  return !RUNNING_ON_VALGRIND;
+}
+
+// This bool is negated from what we'd expect because code may run before it
+// gets initialized.  If that happens, it will appear to be 0 (false), and we
+// want that to cause the rest of the code in this file to run the
+// Valgrind-provided macros.
+static const bool NotUnderValgrind = InitNotUnderValgrind();
+
+bool llvm::sys::RunningOnValgrind() {
+  if (NotUnderValgrind)
+    return false;
+  return RUNNING_ON_VALGRIND;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+  if (NotUnderValgrind)
+    return;
+
+  VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
+}
+
+#else  // !HAVE_VALGRIND_VALGRIND_H
+
+bool llvm::sys::RunningOnValgrind() {
+  return false;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+}
+
+#endif  // !HAVE_VALGRIND_VALGRIND_H
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
new file mode 100644
index 000000000000..2c14366c0761
--- /dev/null
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -0,0 +1,166 @@
+//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+
+#ifdef _MSC_VER
+ #include <ntverp.h>
+#endif
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBIMAGEHLP != 1)
+  #error "libimagehlp.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<HMODULE> OpenedHandles;
+
+#ifdef _WIN64
+  typedef DWORD64 ModuleBaseType;
+#else
+  typedef ULONG ModuleBaseType;
+#endif
+
+extern "C" {
+// Use old callback if:
+//  - Not using Visual Studio
+//  - Visual Studio 2005 or earlier but only if we are not using the Windows SDK
+//    or Windows SDK version is older than 6.0
+// Use new callback if:
+//  - Newer Visual Studio (comes with newer SDK).
+//  - Visual Studio 2005 with Windows SDK 6.0+
+#if defined(_MSC_VER)
+  #if _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
+    #define OLD_ELM_CALLBACK_DECL 1
+  #endif
+#elif defined(__MINGW64__)
+  // Use new callback.
+#elif defined(__MINGW32__)
+  #define OLD_ELM_CALLBACK_DECL 1
+#endif
+
+#ifdef OLD_ELM_CALLBACK_DECL
+  static BOOL CALLBACK ELM_Callback(PSTR  ModuleName,
+                                    ModuleBaseType ModuleBase,
+                                    ULONG ModuleSize,
+                                    PVOID UserContext)
+#else
+  static BOOL CALLBACK ELM_Callback(PCSTR  ModuleName,
+                                    ModuleBaseType ModuleBase,
+                                    ULONG ModuleSize,
+                                    PVOID UserContext)
+#endif
+  {
+    // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
+    // into the process.
+    if (stricmp(ModuleName, "msvci70") != 0 &&
+        stricmp(ModuleName, "msvcirt") != 0 &&
+        stricmp(ModuleName, "msvcp50") != 0 &&
+        stricmp(ModuleName, "msvcp60") != 0 &&
+        stricmp(ModuleName, "msvcp70") != 0 &&
+        stricmp(ModuleName, "msvcr70") != 0 &&
+#ifndef __MINGW32__
+        // Mingw32 uses msvcrt.dll by default. Don't ignore it.
+        // Otherwise, user should be aware, what he's doing :)
+        stricmp(ModuleName, "msvcrt") != 0 &&
+#endif
+        stricmp(ModuleName, "msvcrt20") != 0 &&
+        stricmp(ModuleName, "msvcrt40") != 0) {
+      OpenedHandles.push_back((HMODULE)ModuleBase);
+    }
+    return TRUE;
+  }
+}
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
+                                            std::string *ErrMsg) {
+  if (filename) {
+    HMODULE a_handle = LoadLibrary(filename);
+
+    if (a_handle == 0)
+      return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
+
+    OpenedHandles.push_back(a_handle);
+  } else {
+    // When no file is specified, enumerate all DLLs and EXEs in the
+    // process.
+    EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+  }
+
+  // Because we don't remember the handle, we will never free it; hence,
+  // it is loaded permanently.
+  return false;
+}
+
+// Stack probing routines are in the support library (e.g. libgcc), but we don't
+// have dynamic linking on windows. Provide a hook.
+#define EXPLICIT_SYMBOL(SYM)                    \
+  extern "C" { extern void *SYM; }
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+
+#include "explicit_symbols.inc"
+
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+  // First check symbols added via AddSymbol().
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+    if (I != E)
+      return I->second;
+  }
+
+  // Now search the libraries.
+  for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
+       E = OpenedHandles.end(); I != E; ++I) {
+    FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+    if (ptr) {
+      return (void *) ptr;
+    }
+  }
+
+  #define EXPLICIT_SYMBOL(SYM)                    \
+    if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
+  #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO)        \
+    if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+
+  {
+    #include "explicit_symbols.inc"
+  }
+
+  #undef EXPLICIT_SYMBOL
+  #undef EXPLICIT_SYMBOL2
+
+  return 0;
+}
+
+}
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
new file mode 100644
index 000000000000..733830e82f08
--- /dev/null
+++ b/lib/Support/Windows/Host.inc
@@ -0,0 +1,23 @@
+//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Host support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <string>
+
+using namespace llvm;
+
+std::string sys::getHostTriple() {
+  // FIXME: Adapt to running version.
+  return LLVM_HOSTTRIPLE;
+}
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
new file mode 100644
index 000000000000..9f69e7367e6f
--- /dev/null
+++ b/lib/Support/Windows/Memory.inc
@@ -0,0 +1,73 @@
+//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of various Memory
+// management utilities
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
+                                const MemoryBlock *NearBlock,
+                                std::string *ErrMsg) {
+  if (NumBytes == 0) return MemoryBlock();
+
+  static const size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+  //FIXME: support NearBlock if ever needed on Win64.
+
+  void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
+                  PAGE_EXECUTE_READWRITE);
+  if (pa == NULL) {
+    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
+    return MemoryBlock();
+  }
+
+  MemoryBlock result;
+  result.Address = pa;
+  result.Size = NumPages*pageSize;
+  return result;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  if (M.Address == 0 || M.Size == 0) return false;
+  if (!VirtualFree(M.Address, 0, MEM_RELEASE))
+    return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
+  return false;
+}
+
+bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+  return true;
+}
+
+bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
+  return false;
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+  return true;
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+  return false;
+}
+
+}
diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc
new file mode 100644
index 000000000000..583dc6359a16
--- /dev/null
+++ b/lib/Support/Windows/Mutex.inc
@@ -0,0 +1,58 @@
+//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/Mutex.h"
+
+namespace llvm {
+using namespace sys;
+
+MutexImpl::MutexImpl(bool /*recursive*/)
+{
+  data_ = new CRITICAL_SECTION;
+  InitializeCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+MutexImpl::~MutexImpl()
+{
+  DeleteCriticalSection((LPCRITICAL_SECTION)data_);
+  delete (LPCRITICAL_SECTION)data_;
+  data_ = 0;
+}
+
+bool
+MutexImpl::acquire()
+{
+  EnterCriticalSection((LPCRITICAL_SECTION)data_);
+  return true;
+}
+
+bool
+MutexImpl::release()
+{
+  LeaveCriticalSection((LPCRITICAL_SECTION)data_);
+  return true;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+  return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+}
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
new file mode 100644
index 000000000000..625f67aa912a
--- /dev/null
+++ b/lib/Support/Windows/Path.inc
@@ -0,0 +1,921 @@
+//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <malloc.h>
+#include <cstdio>
+
+// We need to undo a macro defined in Windows.h, otherwise we won't compile:
+#undef CopyFile
+#undef GetCurrentDirectory
+
+// Windows happily accepts either forward or backward slashes, though any path
+// returned by a Win32 API will have backward slashes.  As LLVM code basically
+// assumes forward slashes are used, backward slashs are converted where they
+// can be introduced into a path.
+//
+// Another invariant is that a path ends with a slash if and only if the path
+// is a root directory.  Any other use of a trailing slash is stripped.  Unlike
+// in Unix, Windows has a rather complicated notion of a root path and this
+// invariant helps simply the code.
+
+static void FlipBackSlashes(std::string& s) {
+  for (size_t i = 0; i < s.size(); i++)
+    if (s[i] == '\\')
+      s[i] = '/';
+}
+
+namespace llvm {
+namespace sys {
+
+const char PathSeparator = ';';
+
+StringRef Path::GetEXESuffix() {
+  return "exe";
+}
+
+Path::Path(llvm::StringRef p)
+  : path(p) {
+  FlipBackSlashes(path);
+}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+  : path(StrStart, StrLen) {
+  FlipBackSlashes(path);
+}
+
+Path&
+Path::operator=(StringRef that) {
+  path.assign(that.data(), that.size());
+  FlipBackSlashes(path);
+  return *this;
+}
+
+// push_back 0 on create, and pop_back on delete.
+struct ScopedNullTerminator {
+  std::string &str;
+  ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
+  ~ScopedNullTerminator() {
+    // str.pop_back(); But wait, C++03 doesn't have this...
+    assert(!str.empty() && str[str.size() - 1] == 0
+      && "Null char not present!");
+    str.resize(str.size() - 1);
+  }
+};
+
+bool
+Path::isValid() const {
+  if (path.empty())
+    return false;
+
+  // If there is a colon, it must be the second character, preceded by a letter
+  // and followed by something.
+  size_t len = path.size();
+  // This code assumes that path is null terminated, so make sure it is.
+  ScopedNullTerminator snt(path);
+  size_t pos = path.rfind(':',len);
+  size_t rootslash = 0;
+  if (pos != std::string::npos) {
+    if (pos != 1 || !isalpha(path[0]) || len < 3)
+      return false;
+      rootslash = 2;
+  }
+
+  // Look for a UNC path, and if found adjust our notion of the root slash.
+  if (len > 3 && path[0] == '/' && path[1] == '/') {
+    rootslash = path.find('/', 2);
+    if (rootslash == std::string::npos)
+      rootslash = 0;
+  }
+
+  // Check for illegal characters.
+  if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
+                         "\013\014\015\016\017\020\021\022\023\024\025\026"
+                         "\027\030\031\032\033\034\035\036\037")
+      != std::string::npos)
+    return false;
+
+  // Remove trailing slash, unless it's a root slash.
+  if (len > rootslash+1 && path[len-1] == '/')
+    path.erase(--len);
+
+  // Check each component for legality.
+  for (pos = 0; pos < len; ++pos) {
+    // A component may not end in a space.
+    if (path[pos] == ' ') {
+      if (path[pos+1] == '/' || path[pos+1] == '\0')
+        return false;
+    }
+
+    // A component may not end in a period.
+    if (path[pos] == '.') {
+      if (path[pos+1] == '/' || path[pos+1] == '\0') {
+        // Unless it is the pseudo-directory "."...
+        if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
+          return true;
+        // or "..".
+        if (pos > 0 && path[pos-1] == '.') {
+          if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
+            return true;
+        }
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void Path::makeAbsolute() {
+  TCHAR  FullPath[MAX_PATH + 1] = {0};
+  LPTSTR FilePart = NULL;
+
+  DWORD RetLength = ::GetFullPathNameA(path.c_str(),
+                        sizeof(FullPath)/sizeof(FullPath[0]),
+                        FullPath, &FilePart);
+
+  if (0 == RetLength) {
+    // FIXME: Report the error GetLastError()
+    assert(0 && "Unable to make absolute path!");
+  } else if (RetLength > MAX_PATH) {
+    // FIXME: Report too small buffer (needed RetLength bytes).
+    assert(0 && "Unable to make absolute path!");
+  } else {
+    path = FullPath;
+  }
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
+  switch (NameLen) {
+  case 0:
+    return false;
+  case 1:
+  case 2:
+    return NameStart[0] == '/';
+  default:
+    return
+      (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+      (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
+  }
+}
+
+bool
+Path::isAbsolute() const {
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
+  switch (path.length()) {
+    case 0:
+      return false;
+    case 1:
+    case 2:
+      return path[0] == '/';
+    default:
+      return path[0] == '/' || (path[1] == ':' && path[2] == '/');
+  }
+}
+
+static Path *TempDirectory;
+
+Path
+Path::GetTemporaryDirectory(std::string* ErrMsg) {
+  if (TempDirectory)
+    return *TempDirectory;
+
+  char pathname[MAX_PATH];
+  if (!GetTempPath(MAX_PATH, pathname)) {
+    if (ErrMsg)
+      *ErrMsg = "Can't determine temporary directory";
+    return Path();
+  }
+
+  Path result;
+  result.set(pathname);
+
+  // Append a subdirectory passed on our process id so multiple LLVMs don't
+  // step on each other's toes.
+#ifdef __MINGW32__
+  // Mingw's Win32 header files are broken.
+  sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
+#else
+  sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
+#endif
+  result.appendComponent(pathname);
+
+  // If there's a directory left over from a previous LLVM execution that
+  // happened to have the same process id, get rid of it.
+  result.eraseFromDisk(true);
+
+  // And finally (re-)create the empty directory.
+  result.createDirectoryOnDisk(false);
+  TempDirectory = new Path(result);
+  return *TempDirectory;
+}
+
+// FIXME: the following set of functions don't map to Windows very well.
+Path
+Path::GetRootDirectory() {
+  // This is the only notion that that Windows has of a root directory. Nothing
+  // is here except for drives.
+  return Path("file:///");
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+  char buff[MAX_PATH];
+  // Generic form of C:\Windows\System32
+  HRESULT res =  SHGetFolderPathA(NULL,
+                                  CSIDL_FLAG_CREATE | CSIDL_SYSTEM,
+                                  NULL,
+                                  SHGFP_TYPE_CURRENT,
+                                  buff);
+  if (res != S_OK) {
+    assert(0 && "Failed to get system directory");
+    return;
+  }
+  Paths.push_back(sys::Path(buff));
+
+  // Reset buff.
+  buff[0] = 0;
+  // Generic form of C:\Windows
+  res =  SHGetFolderPathA(NULL,
+                          CSIDL_FLAG_CREATE | CSIDL_WINDOWS,
+                          NULL,
+                          SHGFP_TYPE_CURRENT,
+                          buff);
+  if (res != S_OK) {
+    assert(0 && "Failed to get windows directory");
+    return;
+  }
+  Paths.push_back(sys::Path(buff));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#ifdef LLVM_LIBDIR
+  {
+    Path tmpPath;
+    if (tmpPath.set(LLVM_LIBDIR))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+  }
+#endif
+  GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+  Path ret = GetUserHomeDirectory();
+  if (!ret.appendComponent(".llvm"))
+    assert(0 && "Failed to append .llvm");
+  return ret;
+}
+
+Path
+Path::GetUserHomeDirectory() {
+  char buff[MAX_PATH];
+  HRESULT res = SHGetFolderPathA(NULL,
+                                 CSIDL_FLAG_CREATE | CSIDL_APPDATA,
+                                 NULL,
+                                 SHGFP_TYPE_CURRENT,
+                                 buff);
+  if (res != S_OK)
+    assert(0 && "Failed to get user home directory");
+  return Path(buff);
+}
+
+Path
+Path::GetCurrentDirectory() {
+  char pathname[MAX_PATH];
+  ::GetCurrentDirectoryA(MAX_PATH,pathname);
+  return Path(pathname);
+}
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+  char pathname[MAX_PATH];
+  DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+  return ret != MAX_PATH ? Path(pathname) : Path();
+}
+
+
+// FIXME: the above set of functions don't map to Windows very well.
+
+
+StringRef Path::getDirname() const {
+  return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+  // Find the last slash
+  size_t slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  size_t dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef(path).substr(slash);
+  else
+    return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+  // Find the last slash
+  size_t slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  size_t dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef("");
+  else
+    return StringRef(path).substr(dot + 1);
+}
+
+bool
+Path::exists() const {
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isDirectory() const {
+  DWORD attr = GetFileAttributes(path.c_str());
+  return (attr != INVALID_FILE_ATTRIBUTES) &&
+         (attr & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+bool
+Path::isSymLink() const {
+  DWORD attributes = GetFileAttributes(path.c_str());
+
+  if (attributes == INVALID_FILE_ATTRIBUTES)
+    // There's no sane way to report this :(.
+    assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES");
+
+  // This isn't exactly what defines a NTFS symlink, but it is only true for
+  // paths that act like a symlink.
+  return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
+}
+
+bool
+Path::canRead() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::canWrite() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
+}
+
+bool
+Path::canExecute() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isRegularFile() const {
+  bool res;
+  if (fs::is_regular_file(path, res))
+    return false;
+  return res;
+}
+
+StringRef
+Path::getLast() const {
+  // Find the last slash
+  size_t pos = path.rfind('/');
+
+  // Handle the corner cases
+  if (pos == std::string::npos)
+    return path;
+
+  // If the last character is a slash, we have a root directory
+  if (pos == path.length()-1)
+    return path;
+
+  // Return everything after the last slash
+  return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+  if (!fsIsValid || update) {
+    WIN32_FILE_ATTRIBUTE_DATA fi;
+    if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+      MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
+                      ": Can't get status: ");
+      return 0;
+    }
+
+    status.fileSize = fi.nFileSizeHigh;
+    status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
+    status.fileSize += fi.nFileSizeLow;
+
+    status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
+    status.user = 9999;    // Not applicable to Windows, so...
+    status.group = 9999;   // Not applicable to Windows, so...
+
+    // FIXME: this is only unique if the file is accessed by the same file path.
+    // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
+    // numbers, but the concept doesn't exist in Windows.
+    status.uniqueID = 0;
+    for (unsigned i = 0; i < path.length(); ++i)
+      status.uniqueID += path[i];
+
+    ULARGE_INTEGER ui;
+    ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+    ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+    status.modTime.fromWin32Time(ui.QuadPart);
+
+    status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+    fsIsValid = true;
+  }
+  return &status;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+  // All files are readable on Windows (ignoring security attributes).
+  return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+  DWORD attr = GetFileAttributes(path.c_str());
+
+  // If it doesn't exist, we're done.
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    return false;
+
+  if (attr & FILE_ATTRIBUTE_READONLY) {
+    if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
+      MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+  // All files are executable on Windows (ignoring security attributes).
+  return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+  WIN32_FILE_ATTRIBUTE_DATA fi;
+  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+    MakeErrMsg(ErrMsg, path + ": can't get status of file");
+    return true;
+  }
+
+  if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+    if (ErrMsg)
+      *ErrMsg = path + ": not a directory";
+    return true;
+  }
+
+  result.clear();
+  WIN32_FIND_DATA fd;
+  std::string searchpath = path;
+  if (path.size() == 0 || searchpath[path.size()-1] == '/')
+    searchpath += "*";
+  else
+    searchpath += "/*";
+
+  HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
+  if (h == INVALID_HANDLE_VALUE) {
+    if (GetLastError() == ERROR_FILE_NOT_FOUND)
+      return true; // not really an error, now is it?
+    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+    return true;
+  }
+
+  do {
+    if (fd.cFileName[0] == '.')
+      continue;
+    Path aPath(path);
+    aPath.appendComponent(&fd.cFileName[0]);
+    result.insert(aPath);
+  } while (FindNextFile(h, &fd));
+
+  DWORD err = GetLastError();
+  FindClose(h);
+  if (err != ERROR_NO_MORE_FILES) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+    return true;
+  }
+  return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+  if (a_path.empty())
+    return false;
+  std::string save(path);
+  path = a_path;
+  FlipBackSlashes(path);
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+  if (name.empty())
+    return false;
+  std::string save(path);
+  if (!path.empty()) {
+    size_t last = path.size() - 1;
+    if (path[last] != '/')
+      path += '/';
+  }
+  path += name;
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::eraseComponent() {
+  size_t slashpos = path.rfind('/',path.size());
+  if (slashpos == path.size() - 1 || slashpos == std::string::npos)
+    return false;
+  std::string save(path);
+  path.erase(slashpos);
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::eraseSuffix() {
+  size_t dotpos = path.rfind('.',path.size());
+  size_t slashpos = path.rfind('/',path.size());
+  if (dotpos != std::string::npos) {
+    if (slashpos == std::string::npos || dotpos > slashpos+1) {
+      std::string save(path);
+      path.erase(dotpos, path.size()-dotpos);
+      if (!isValid()) {
+        path = save;
+        return false;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
+  if (ErrMsg)
+    *ErrMsg = std::string(pathname) + ": " + std::string(msg);
+  return true;
+}
+
+bool
+Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
+  // Get a writeable copy of the path name
+  size_t len = path.length();
+  char *pathname = reinterpret_cast<char *>(_alloca(len+2));
+  path.copy(pathname, len);
+  pathname[len] = 0;
+
+  // Make sure it ends with a slash.
+  if (len == 0 || pathname[len - 1] != '/') {
+    pathname[len] = '/';
+    pathname[++len] = 0;
+  }
+
+  // Determine starting point for initial / search.
+  char *next = pathname;
+  if (pathname[0] == '/' && pathname[1] == '/') {
+    // Skip host name.
+    next = strchr(pathname+2, '/');
+    if (next == NULL)
+      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+    // Skip share name.
+    next = strchr(next+1, '/');
+    if (next == NULL)
+      return PathMsg(ErrMsg, pathname,"badly formed remote directory");
+
+    next++;
+    if (*next == 0)
+      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+  } else {
+    if (pathname[1] == ':')
+      next += 2;    // skip drive letter
+    if (*next == '/')
+      next++;       // skip root directory
+  }
+
+  // If we're supposed to create intermediate directories
+  if (create_parents) {
+    // Loop through the directory components until we're done
+    while (*next) {
+      next = strchr(next, '/');
+      *next = 0;
+      if (!CreateDirectory(pathname, NULL) &&
+          GetLastError() != ERROR_ALREADY_EXISTS)
+          return MakeErrMsg(ErrMsg,
+            std::string(pathname) + ": Can't create directory: ");
+      *next++ = '/';
+    }
+  } else {
+    // Drop trailing slash.
+    pathname[len-1] = 0;
+    if (!CreateDirectory(pathname, NULL) &&
+        GetLastError() != ERROR_ALREADY_EXISTS) {
+      return MakeErrMsg(ErrMsg, std::string(pathname) +
+                        ": Can't create directory: ");
+    }
+  }
+  return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+  // Create the file
+  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+                        FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
+
+  CloseHandle(h);
+  return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+  WIN32_FILE_ATTRIBUTE_DATA fi;
+  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
+    return true;
+
+  if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+    // If it doesn't exist, we're done.
+    bool Exists;
+    if (fs::exists(path, Exists) || !Exists)
+      return false;
+
+    char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
+    int lastchar = path.length() - 1 ;
+    path.copy(pathname, lastchar+1);
+
+    // Make path end with '/*'.
+    if (pathname[lastchar] != '/')
+      pathname[++lastchar] = '/';
+    pathname[lastchar+1] = '*';
+    pathname[lastchar+2] = 0;
+
+    if (remove_contents) {
+      WIN32_FIND_DATA fd;
+      HANDLE h = FindFirstFile(pathname, &fd);
+
+      // It's a bad idea to alter the contents of a directory while enumerating
+      // its contents. So build a list of its contents first, then destroy them.
+
+      if (h != INVALID_HANDLE_VALUE) {
+        std::vector<Path> list;
+
+        do {
+          if (strcmp(fd.cFileName, ".") == 0)
+            continue;
+          if (strcmp(fd.cFileName, "..") == 0)
+            continue;
+
+          Path aPath(path);
+          aPath.appendComponent(&fd.cFileName[0]);
+          list.push_back(aPath);
+        } while (FindNextFile(h, &fd));
+
+        DWORD err = GetLastError();
+        FindClose(h);
+        if (err != ERROR_NO_MORE_FILES) {
+          SetLastError(err);
+          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+        }
+
+        for (std::vector<Path>::iterator I = list.begin(); I != list.end();
+             ++I) {
+          Path &aPath = *I;
+          aPath.eraseFromDisk(true);
+        }
+      } else {
+        if (GetLastError() != ERROR_FILE_NOT_FOUND)
+          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+      }
+    }
+
+    pathname[lastchar] = 0;
+    if (!RemoveDirectory(pathname))
+      return MakeErrMsg(ErrStr,
+        std::string(pathname) + ": Can't destroy directory: ");
+    return false;
+  } else {
+    // Read-only files cannot be deleted on Windows.  Must remove the read-only
+    // attribute first.
+    if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+      if (!SetFileAttributes(path.c_str(),
+                             fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+    }
+
+    if (!DeleteFile(path.c_str()))
+      return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+    return false;
+  }
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+  assert(len < 1024 && "Request for magic string too long");
+  char* buf = reinterpret_cast<char*>(alloca(len));
+
+  HANDLE h = CreateFile(path.c_str(),
+                        GENERIC_READ,
+                        FILE_SHARE_READ,
+                        NULL,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return false;
+
+  DWORD nRead = 0;
+  BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
+  CloseHandle(h);
+
+  if (!ret || nRead != len)
+    return false;
+
+  Magic = std::string(buf, len);
+  return true;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+  if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
+    return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
+        + "': ");
+  return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
+  // FIXME: should work on directories also.
+  if (!si.isFile) {
+    return true;
+  }
+
+  HANDLE h = CreateFile(path.c_str(),
+                        FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
+                        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                        NULL,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return true;
+
+  BY_HANDLE_FILE_INFORMATION bhfi;
+  if (!GetFileInformationByHandle(h, &bhfi)) {
+    DWORD err = GetLastError();
+    CloseHandle(h);
+    SetLastError(err);
+    return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
+  }
+
+  ULARGE_INTEGER ui;
+  ui.QuadPart = si.modTime.toWin32Time();
+  FILETIME ft;
+  ft.dwLowDateTime = ui.LowPart;
+  ft.dwHighDateTime = ui.HighPart;
+  BOOL ret = SetFileTime(h, NULL, &ft, &ft);
+  DWORD err = GetLastError();
+  CloseHandle(h);
+  if (!ret) {
+    SetLastError(err);
+    return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
+  }
+
+  // Best we can do with Unix permission bits is to interpret the owner
+  // writable bit.
+  if (si.mode & 0200) {
+    if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+      if (!SetFileAttributes(path.c_str(),
+              bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+    }
+  } else {
+    if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
+      if (!SetFileAttributes(path.c_str(),
+              bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+    }
+  }
+
+  return false;
+}
+
+bool
+CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
+  // Can't use CopyFile macro defined in Windows.h because it would mess up the
+  // above line.  We use the expansion it would have in a non-UNICODE build.
+  if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
+    return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
+               "' to '" + Dest.str() + "': ");
+  return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+  bool Exists;
+  if (reuse_current && (fs::exists(path, Exists) || !Exists))
+    return false; // File doesn't exist already, just use it!
+
+  // Reserve space for -XXXXXX at the end.
+  char *FNBuffer = (char*) alloca(path.size()+8);
+  unsigned offset = path.size();
+  path.copy(FNBuffer, offset);
+
+  // Find a numeric suffix that isn't used by an existing file.  Assume there
+  // won't be more than 1 million files with the same prefix.  Probably a safe
+  // bet.
+  static unsigned FCounter = 0;
+  do {
+    sprintf(FNBuffer+offset, "-%06u", FCounter);
+    if (++FCounter > 999999)
+      FCounter = 0;
+    path = FNBuffer;
+  } while (!fs::exists(path, Exists) && Exists);
+  return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+  // Make this into a unique file name
+  makeUnique(reuse_current, ErrMsg);
+
+  // Now go and create it
+  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+                        FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return MakeErrMsg(ErrMsg, path + ": can't create file");
+
+  CloseHandle(h);
+  return false;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+  return 0;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+void Path::UnMapFilePages(const char *Base, uint64_t FileSize) {
+  assert(0 && "NOT IMPLEMENTED");
+}
+
+}
+}
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
new file mode 100644
index 000000000000..8effb0c737dd
--- /dev/null
+++ b/lib/Support/Windows/PathV2.inc
@@ -0,0 +1,750 @@
+//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//===          is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <wincrypt.h>
+#include <fcntl.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// MinGW doesn't define this.
+#ifndef _ERRNO_T_DEFINED
+#define _ERRNO_T_DEFINED
+typedef int errno_t;
+#endif
+
+using namespace llvm;
+
+namespace {
+  typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
+    /*__in*/ LPCWSTR lpSymlinkFileName,
+    /*__in*/ LPCWSTR lpTargetFileName,
+    /*__in*/ DWORD dwFlags);
+
+  PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
+    ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
+                     "CreateSymbolicLinkW"));
+
+  error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
+    int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                    utf8.begin(), utf8.size(),
+                                    utf16.begin(), 0);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    utf16.reserve(len + 1);
+    utf16.set_size(len);
+
+    len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                    utf8.begin(), utf8.size(),
+                                    utf16.begin(), utf16.size());
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    // Make utf16 null terminated.
+    utf16.push_back(0);
+    utf16.pop_back();
+
+    return success;
+  }
+
+  error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+                               SmallVectorImpl<char> &utf8) {
+    // Get length.
+    int len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                    utf16, utf16_len,
+                                    utf8.begin(), 0,
+                                    NULL, NULL);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    utf8.reserve(len);
+    utf8.set_size(len);
+
+    // Now do the actual conversion.
+    len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                utf16, utf16_len,
+                                utf8.data(), utf8.size(),
+                                NULL, NULL);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    // Make utf8 null terminated.
+    utf8.push_back(0);
+    utf8.pop_back();
+
+    return success;
+  }
+
+  error_code TempDir(SmallVectorImpl<wchar_t> &result) {
+  retry_temp_dir:
+    DWORD len = ::GetTempPathW(result.capacity(), result.begin());
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    if (len > result.capacity()) {
+      result.reserve(len);
+      goto retry_temp_dir;
+    }
+
+    result.set_size(len);
+    return success;
+  }
+
+  // Forwarder for ScopedHandle.
+  BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
+    return ::CryptReleaseContext(Provider, 0);
+  }
+
+  typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
+                       BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
+    ScopedCryptContext;
+  bool is_separator(const wchar_t value) {
+    switch (value) {
+    case L'\\':
+    case L'/':
+      return true;
+    default:
+      return false;
+    }
+  }
+}
+
+namespace llvm {
+namespace sys  {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+  SmallVector<wchar_t, 128> cur_path;
+  cur_path.reserve(128);
+retry_cur_dir:
+  DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
+
+  // A zero return value indicates a failure other than insufficient space.
+  if (len == 0)
+    return windows_error(::GetLastError());
+
+  // If there's insufficient space, the len returned is larger than the len
+  // given.
+  if (len > cur_path.capacity()) {
+    cur_path.reserve(len);
+    goto retry_cur_dir;
+  }
+
+  cur_path.set_size(len);
+  // cur_path now holds the current directory in utf-16. Convert to utf-8.
+
+  // Find out how much space we need. Sadly, this function doesn't return the
+  // size needed unless you tell it the result size is 0, which means you
+  // _always_ have to call it twice.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              cur_path.data(), cur_path.size(),
+                              result.data(), 0,
+                              NULL, NULL);
+
+  if (len == 0)
+    return make_error_code(windows_error(::GetLastError()));
+
+  result.reserve(len);
+  result.set_size(len);
+  // Now do the actual conversion.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              cur_path.data(), cur_path.size(),
+                              result.data(), result.size(),
+                              NULL, NULL);
+  if (len == 0)
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  // Copy the file.
+  BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(),
+                         copt != copy_option::overwrite_if_exists);
+
+  if (res == 0)
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
+    error_code ec = windows_error(::GetLastError());
+    if (ec == windows_error::already_exists)
+      existed = true;
+    else
+      return ec;
+  } else
+    existed = false;
+
+  return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+  // Only do it if the function is available at runtime.
+  if (!create_symbolic_link_api)
+    return make_error_code(errc::function_not_supported);
+
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  if (st.type() == file_type::directory_file) {
+    if (!::RemoveDirectoryW(c_str(path_utf16))) {
+      error_code ec = windows_error(::GetLastError());
+      if (ec != windows_error::file_not_found)
+        return ec;
+      existed = false;
+    } else
+      existed = true;
+  } else {
+    if (!::DeleteFileW(c_str(path_utf16))) {
+      error_code ec = windows_error(::GetLastError());
+      if (ec != windows_error::file_not_found)
+        return ec;
+      existed = false;
+    } else
+      existed = true;
+  }
+
+  return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!::MoveFileExW(wide_from.begin(), wide_to.begin(),
+                     MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+  if (fd == -1)
+    return error_code(errno, generic_category());
+#ifdef HAVE__CHSIZE_S
+  errno_t error = ::_chsize_s(fd, size);
+#else
+  errno_t error = ::_chsize(fd, size);
+#endif
+  ::close(fd);
+  return error_code(error, generic_category());
+}
+
+error_code exists(const Twine &path, bool &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+  if (attributes == INVALID_FILE_ATTRIBUTES) {
+    // See if the file didn't actually exist.
+    error_code ec = make_error_code(windows_error(::GetLastError()));
+    if (ec != windows_error::file_not_found &&
+        ec != windows_error::path_not_found)
+      return ec;
+    result = false;
+  } else
+    result = true;
+  return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  // Get arguments.
+  SmallString<128> a_storage;
+  SmallString<128> b_storage;
+  StringRef a = A.toStringRef(a_storage);
+  StringRef b = B.toStringRef(b_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_a;
+  SmallVector<wchar_t, 128> wide_b;
+  if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
+  if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
+
+  AutoHandle HandleB(
+    ::CreateFileW(wide_b.begin(),
+                  0,
+                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                  0,
+                  OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS,
+                  0));
+
+  AutoHandle HandleA(
+    ::CreateFileW(wide_a.begin(),
+                  0,
+                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                  0,
+                  OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS,
+                  0));
+
+  // If both handles are invalid, it's an error.
+  if (HandleA == INVALID_HANDLE_VALUE &&
+      HandleB == INVALID_HANDLE_VALUE)
+    return windows_error(::GetLastError());
+
+  // If only one is invalid, it's false.
+  if (HandleA == INVALID_HANDLE_VALUE &&
+      HandleB == INVALID_HANDLE_VALUE) {
+    result = false;
+    return success;
+  }
+
+  // Get file information.
+  BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
+  if (!::GetFileInformationByHandle(HandleA, &InfoA))
+    return windows_error(::GetLastError());
+  if (!::GetFileInformationByHandle(HandleB, &InfoB))
+    return windows_error(::GetLastError());
+
+  // See if it's all the same.
+  result =
+    InfoA.dwVolumeSerialNumber           == InfoB.dwVolumeSerialNumber &&
+    InfoA.nFileIndexHigh                 == InfoB.nFileIndexHigh &&
+    InfoA.nFileIndexLow                  == InfoB.nFileIndexLow &&
+    InfoA.nFileSizeHigh                  == InfoB.nFileSizeHigh &&
+    InfoA.nFileSizeLow                   == InfoB.nFileSizeLow &&
+    InfoA.ftLastWriteTime.dwLowDateTime  ==
+      InfoB.ftLastWriteTime.dwLowDateTime &&
+    InfoA.ftLastWriteTime.dwHighDateTime ==
+      InfoB.ftLastWriteTime.dwHighDateTime;
+
+  return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  WIN32_FILE_ATTRIBUTE_DATA FileData;
+  if (!::GetFileAttributesExW(path_utf16.begin(),
+                              ::GetFileExInfoStandard,
+                              &FileData))
+    return windows_error(::GetLastError());
+
+  result =
+    (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+    + FileData.nFileSizeLow;
+
+  return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  DWORD attr = ::GetFileAttributesW(path_utf16.begin());
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    goto handle_status_error;
+
+  // Handle reparse points.
+  if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
+    AutoHandle h(
+      ::CreateFileW(path_utf16.begin(),
+                    0, // Attributes only.
+                    FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                    NULL,
+                    OPEN_EXISTING,
+                    FILE_FLAG_BACKUP_SEMANTICS,
+                    0));
+    if (h == INVALID_HANDLE_VALUE)
+      goto handle_status_error;
+  }
+
+  if (attr & FILE_ATTRIBUTE_DIRECTORY)
+    result = file_status(file_type::directory_file);
+  else
+    result = file_status(file_type::regular_file);
+
+  return success;
+
+handle_status_error:
+  error_code ec = windows_error(::GetLastError());
+  if (ec == windows_error::file_not_found ||
+      ec == windows_error::path_not_found)
+    result = file_status(file_type::file_not_found);
+  else if (ec == windows_error::sharing_violation)
+    result = file_status(file_type::type_unknown);
+  else {
+    result = file_status(file_type::status_error);
+    return ec;
+  }
+
+  return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path) {
+  // Use result_path as temp storage.
+  result_path.set_size(0);
+  StringRef m = model.toStringRef(result_path);
+
+  SmallVector<wchar_t, 128> model_utf16;
+  if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
+
+  // Make model absolute by prepending a temp directory if it's not already.
+  bool absolute = path::is_absolute(m);
+
+  if (!absolute) {
+    SmallVector<wchar_t, 64> temp_dir;
+    if (error_code ec = TempDir(temp_dir)) return ec;
+    // Handle c: by removing it.
+    if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+      model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+    }
+    model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
+  }
+
+  // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+  // needed if the randomly chosen path already exists.
+  SmallVector<wchar_t, 128> random_path_utf16;
+
+  // Get a Crypto Provider for CryptGenRandom.
+  HCRYPTPROV HCPC;
+  if (!::CryptAcquireContextW(&HCPC,
+                              NULL,
+                              NULL,
+                              PROV_RSA_FULL,
+                              CRYPT_VERIFYCONTEXT))
+    return windows_error(::GetLastError());
+  ScopedCryptContext CryptoProvider(HCPC);
+
+retry_random_path:
+  random_path_utf16.set_size(0);
+  for (SmallVectorImpl<wchar_t>::const_iterator i = model_utf16.begin(),
+                                                e = model_utf16.end();
+                                                i != e; ++i) {
+    if (*i == L'%') {
+      BYTE val = 0;
+      if (!::CryptGenRandom(CryptoProvider, 1, &val))
+          return windows_error(::GetLastError());
+      random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+    }
+    else
+      random_path_utf16.push_back(*i);
+  }
+  // Make random_path_utf16 null terminated.
+  random_path_utf16.push_back(0);
+  random_path_utf16.pop_back();
+
+  // Try to create + open the path.
+retry_create_file:
+  HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
+                                        GENERIC_READ | GENERIC_WRITE,
+                                        FILE_SHARE_READ,
+                                        NULL,
+                                        // Return ERROR_FILE_EXISTS if the file
+                                        // already exists.
+                                        CREATE_NEW,
+                                        FILE_ATTRIBUTE_TEMPORARY,
+                                        NULL);
+  if (TempFileHandle == INVALID_HANDLE_VALUE) {
+    // If the file existed, try again, otherwise, error.
+    error_code ec = windows_error(::GetLastError());
+    if (ec == windows_error::file_exists)
+      goto retry_random_path;
+    // Check for non-existing parent directories.
+    if (ec == windows_error::path_not_found) {
+      // Create the directories using result_path as temp storage.
+      if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+                                      random_path_utf16.size(), result_path))
+        return ec;
+      StringRef p(result_path.begin(), result_path.size());
+      SmallString<64> dir_to_create;
+      for (path::const_iterator i = path::begin(p),
+                                e = --path::end(p); i != e; ++i) {
+        path::append(dir_to_create, *i);
+        bool Exists;
+        if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+        if (!Exists) {
+          // If c: doesn't exist, bail.
+          if (i->endswith(":"))
+            return ec;
+
+          SmallVector<wchar_t, 64> dir_to_create_utf16;
+          if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16))
+            return ec;
+
+          // Create the directory.
+          if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL))
+            return windows_error(::GetLastError());
+        }
+      }
+      goto retry_create_file;
+    }
+    return ec;
+  }
+
+  // Set result_path to the utf-8 representation of the path.
+  if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+                                  random_path_utf16.size(), result_path)) {
+    ::CloseHandle(TempFileHandle);
+    ::DeleteFileW(random_path_utf16.begin());
+    return ec;
+  }
+
+  // Convert the Windows API file handle into a C-runtime handle.
+  int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0);
+  if (fd == -1) {
+    ::CloseHandle(TempFileHandle);
+    ::DeleteFileW(random_path_utf16.begin());
+    // MSDN doesn't say anything about _open_osfhandle setting errno or
+    // GetLastError(), so just return invalid_handle.
+    return windows_error::invalid_handle;
+  }
+
+  result_fd = fd;
+  return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+  result.set_size(0);
+
+  // Convert path to UTF-16.
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  // Open file.
+  HANDLE file = ::CreateFileW(c_str(path_utf16),
+                              GENERIC_READ,
+                              FILE_SHARE_READ,
+                              NULL,
+                              OPEN_EXISTING,
+                              FILE_ATTRIBUTE_READONLY,
+                              NULL);
+  if (file == INVALID_HANDLE_VALUE)
+    return windows_error(::GetLastError());
+
+  // Allocate buffer.
+  result.reserve(len);
+
+  // Get magic!
+  DWORD bytes_read = 0;
+  BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
+  error_code ec = windows_error(::GetLastError());
+  ::CloseHandle(file);
+  if (!read_success || (bytes_read != len)) {
+    // Set result size to the number of bytes read if it's valid.
+    if (bytes_read >= 0 && bytes_read <= len)
+      result.set_size(bytes_read);
+    // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
+    return ec;
+  }
+
+  result.set_size(len);
+  return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path,
+                                  path_utf16))
+    return ec;
+
+  // Convert path to the format that Windows is happy with.
+  if (path_utf16.size() > 0 &&
+      !is_separator(path_utf16[path.size() - 1]) &&
+      path_utf16[path.size() - 1] != L':') {
+    path_utf16.push_back(L'\\');
+    path_utf16.push_back(L'*');
+  } else {
+    path_utf16.push_back(L'*');
+  }
+
+  //  Get the first directory entry.
+  WIN32_FIND_DATAW FirstFind;
+  ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
+  if (!FindHandle)
+    return windows_error(::GetLastError());
+
+  size_t FilenameLen = ::wcslen(FirstFind.cFileName);
+  while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
+         (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
+                              FirstFind.cFileName[1] == L'.'))
+    if (!::FindNextFileW(FindHandle, &FirstFind)) {
+      error_code ec = windows_error(::GetLastError());
+      // Check for end.
+      if (ec == windows_error::no_more_files)
+        return directory_iterator_destruct(it);
+      return ec;
+    } else
+      FilenameLen = ::wcslen(FirstFind.cFileName);
+
+  // Construct the current directory entry.
+  SmallString<128> directory_entry_name_utf8;
+  if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
+                                  ::wcslen(FirstFind.cFileName),
+                                  directory_entry_name_utf8))
+    return ec;
+
+  it.IterationHandle = intptr_t(FindHandle.take());
+  SmallString<128> directory_entry_path(path);
+  path::append(directory_entry_path, directory_entry_name_utf8.str());
+  it.CurrentEntry = directory_entry(directory_entry_path.str());
+
+  return success;
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+  if (it.IterationHandle != 0)
+    // Closes the handle if it's valid.
+    ScopedFindHandle close(HANDLE(it.IterationHandle));
+  it.IterationHandle = 0;
+  it.CurrentEntry = directory_entry();
+  return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+  WIN32_FIND_DATAW FindData;
+  if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+    error_code ec = windows_error(::GetLastError());
+    // Check for end.
+    if (ec == windows_error::no_more_files)
+      return directory_iterator_destruct(it);
+    return ec;
+  }
+
+  size_t FilenameLen = ::wcslen(FindData.cFileName);
+  if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
+      (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
+                           FindData.cFileName[1] == L'.'))
+    return directory_iterator_increment(it);
+
+  SmallString<128> directory_entry_path_utf8;
+  if (error_code ec = UTF16ToUTF8(FindData.cFileName,
+                                  ::wcslen(FindData.cFileName),
+                                  directory_entry_path_utf8))
+    return ec;
+
+  it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
+  return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
new file mode 100644
index 000000000000..06a7f0054d50
--- /dev/null
+++ b/lib/Support/Windows/Process.inc
@@ -0,0 +1,222 @@
+//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <psapi.h>
+#include <malloc.h>
+#include <io.h>
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBPSAPI != 1)
+  #error "libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef __MINGW32__
+// This ban should be lifted when MinGW 1.0+ has defined this value.
+#  define _HEAPOK (-2)
+#endif
+
+namespace llvm {
+using namespace sys;
+
+// This function retrieves the page size using GetSystemInfo and is present
+// solely so it can be called once in Process::GetPageSize to initialize the
+// static variable PageSize.
+inline unsigned GetPageSizeOnce() {
+  // NOTE: A 32-bit application running under WOW64 is supposed to use
+  // GetNativeSystemInfo.  However, this interface is not present prior
+  // to Windows XP so to use it requires dynamic linking.  It is not clear
+  // how this affects the reported page size, if at all.  One could argue
+  // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  return static_cast<unsigned>(info.dwPageSize);
+}
+
+unsigned
+Process::GetPageSize() {
+  static const unsigned PageSize = GetPageSizeOnce();
+  return PageSize;
+}
+
+size_t
+Process::GetMallocUsage()
+{
+  _HEAPINFO hinfo;
+  hinfo._pentry = NULL;
+
+  size_t size = 0;
+
+  while (_heapwalk(&hinfo) == _HEAPOK)
+    size += hinfo._size;
+
+  return size;
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+  PROCESS_MEMORY_COUNTERS pmc;
+  GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
+  return pmc.PagefileUsage;
+}
+
+void
+Process::GetTimeUsage(
+  TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
+{
+  elapsed = TimeValue::now();
+
+  uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
+  GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
+                  (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
+                  (FILETIME*)&UserTime);
+
+  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+  user_time.seconds( UserTime / 10000000 );
+  user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
+  sys_time.seconds( KernelTime / 10000000 );
+  sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
+}
+
+int Process::GetCurrentUserId()
+{
+  return 65536;
+}
+
+int Process::GetCurrentGroupId()
+{
+  return 65536;
+}
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this configuration item
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+  // Windows doesn't do core files, but it does do modal pop-up message
+  // boxes.  As this method is used by bugpoint, preventing these pop-ups
+  // is the moral equivalent of suppressing core files.
+  SetErrorMode(SEM_FAILCRITICALERRORS |
+               SEM_NOGPFAULTERRORBOX |
+               SEM_NOOPENFILEERRORBOX);
+}
+
+bool Process::StandardInIsUserInput() {
+  return FileDescriptorIsDisplayed(0);
+}
+
+bool Process::StandardOutIsDisplayed() {
+  return FileDescriptorIsDisplayed(1);
+}
+
+bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(2);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+  DWORD Mode;	// Unused
+  return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
+}
+
+unsigned Process::StandardOutColumns() {
+  unsigned Columns = 0;
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+    Columns = csbi.dwSize.X;
+  return Columns;
+}
+
+unsigned Process::StandardErrColumns() {
+  unsigned Columns = 0;
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
+    Columns = csbi.dwSize.X;
+  return Columns;
+}
+
+// It always has colors.
+bool Process::StandardErrHasColors() {
+  return StandardErrIsDisplayed();
+}
+
+bool Process::StandardOutHasColors() {
+  return StandardOutIsDisplayed();
+}
+
+namespace {
+class DefaultColors
+{
+  private:
+    WORD defaultColor;
+  public:
+    DefaultColors()
+     :defaultColor(GetCurrentColor()) {}
+    static unsigned GetCurrentColor() {
+      CONSOLE_SCREEN_BUFFER_INFO csbi;
+      if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+        return csbi.wAttributes;
+      return 0;
+    }
+    WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+  return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+  WORD colors = DefaultColors::GetCurrentColor();
+  if (bg)
+    colors |= BACKGROUND_INTENSITY;
+  else
+    colors |= FOREGROUND_INTENSITY;
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  WORD colors;
+  if (bg) {
+    colors = ((code&1) ? BACKGROUND_RED : 0) |
+      ((code&2) ? BACKGROUND_GREEN : 0 ) |
+      ((code&4) ? BACKGROUND_BLUE : 0);
+    if (bold)
+      colors |= BACKGROUND_INTENSITY;
+  } else {
+    colors = ((code&1) ? FOREGROUND_RED : 0) |
+      ((code&2) ? FOREGROUND_GREEN : 0 ) |
+      ((code&4) ? FOREGROUND_BLUE : 0);
+    if (bold)
+      colors |= FOREGROUND_INTENSITY;
+  }
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::ResetColor() {
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+  return 0;
+}
+
+}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
new file mode 100644
index 000000000000..350363cf7107
--- /dev/null
+++ b/lib/Support/Windows/Program.inc
@@ -0,0 +1,403 @@
+//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+namespace {
+  struct Win32ProcessInfo {
+    HANDLE hProcess;
+    DWORD  dwProcessId;
+  };
+}
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+}
+
+unsigned Program::GetPid() const {
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  return wpi->dwProcessId;
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+  // Check some degenerate cases
+  if (progName.length() == 0) // no program
+    return Path();
+  Path temp;
+  if (!temp.set(progName)) // invalid name
+    return Path();
+  // Return paths with slashes verbatim.
+  if (progName.find('\\') != std::string::npos ||
+      progName.find('/') != std::string::npos)
+    return temp;
+
+  // At this point, the file name is valid and does not contain slashes.
+  // Let Windows search for it.
+  char buffer[MAX_PATH];
+  char *dummy = NULL;
+  DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
+                         buffer, &dummy);
+
+  // See if it wasn't found.
+  if (len == 0)
+    return Path();
+
+  // See if we got the entire path.
+  if (len < MAX_PATH)
+    return Path(buffer);
+
+  // Buffer was too small; grow and retry.
+  while (true) {
+    char *b = reinterpret_cast<char *>(_alloca(len+1));
+    DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
+
+    // It is unlikely the search failed, but it's always possible some file
+    // was added or removed since the last search, so be paranoid...
+    if (len2 == 0)
+      return Path();
+    else if (len2 <= len)
+      return Path(b);
+
+    len = len2;
+  }
+}
+
+static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
+  HANDLE h;
+  if (path == 0) {
+    DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+                    GetCurrentProcess(), &h,
+                    0, TRUE, DUPLICATE_SAME_ACCESS);
+    return h;
+  }
+
+  const char *fname;
+  if (path->isEmpty())
+    fname = "NUL";
+  else
+    fname = path->c_str();
+
+  SECURITY_ATTRIBUTES sa;
+  sa.nLength = sizeof(sa);
+  sa.lpSecurityDescriptor = 0;
+  sa.bInheritHandle = TRUE;
+
+  h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
+                 &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+                 FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE) {
+    MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
+        (fd ? "input: " : "output: "));
+  }
+
+  return h;
+}
+
+/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess.
+static bool ArgNeedsQuotes(const char *Str) {
+  return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
+}
+
+
+/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess and returns length of quoted arg with escaped quotes
+static unsigned int ArgLenWithQuotes(const char *Str) {
+  unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+
+  while (*Str != '\0') {
+    if (*Str == '\"')
+      ++len;
+
+    ++len;
+    ++Str;
+  }
+
+  return len;
+}
+
+
+bool
+Program::Execute(const Path& path,
+                 const char** args,
+                 const char** envp,
+                 const Path** redirects,
+                 unsigned memoryLimit,
+                 std::string* ErrMsg) {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+
+  if (!path.canExecute()) {
+    if (ErrMsg)
+      *ErrMsg = "program not executable";
+    return false;
+  }
+
+  // Windows wants a command line, not an array of args, to pass to the new
+  // process.  We have to concatenate them all, while quoting the args that
+  // have embedded spaces (or are empty).
+
+  // First, determine the length of the command line.
+  unsigned len = 0;
+  for (unsigned i = 0; args[i]; i++) {
+    len += ArgLenWithQuotes(args[i]) + 1;
+  }
+
+  // Now build the command line.
+  char *command = reinterpret_cast<char *>(_alloca(len+1));
+  char *p = command;
+
+  for (unsigned i = 0; args[i]; i++) {
+    const char *arg = args[i];
+
+    bool needsQuoting = ArgNeedsQuotes(arg);
+    if (needsQuoting)
+      *p++ = '"';
+
+    while (*arg != '\0') {
+      if (*arg == '\"')
+        *p++ = '\\';
+
+      *p++ = *arg++;
+    }
+
+    if (needsQuoting)
+      *p++ = '"';
+    *p++ = ' ';
+  }
+
+  *p = 0;
+
+  // The pointer to the environment block for the new process.
+  char *envblock = 0;
+
+  if (envp) {
+    // An environment block consists of a null-terminated block of
+    // null-terminated strings. Convert the array of environment variables to
+    // an environment block by concatenating them.
+
+    // First, determine the length of the environment block.
+    len = 0;
+    for (unsigned i = 0; envp[i]; i++)
+      len += strlen(envp[i]) + 1;
+
+    // Now build the environment block.
+    envblock = reinterpret_cast<char *>(_alloca(len+1));
+    p = envblock;
+
+    for (unsigned i = 0; envp[i]; i++) {
+      const char *ev = envp[i];
+      size_t len = strlen(ev) + 1;
+      memcpy(p, ev, len);
+      p += len;
+    }
+
+    *p = 0;
+  }
+
+  // Create a child process.
+  STARTUPINFO si;
+  memset(&si, 0, sizeof(si));
+  si.cb = sizeof(si);
+  si.hStdInput = INVALID_HANDLE_VALUE;
+  si.hStdOutput = INVALID_HANDLE_VALUE;
+  si.hStdError = INVALID_HANDLE_VALUE;
+
+  if (redirects) {
+    si.dwFlags = STARTF_USESTDHANDLES;
+
+    si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
+    if (si.hStdInput == INVALID_HANDLE_VALUE) {
+      MakeErrMsg(ErrMsg, "can't redirect stdin");
+      return false;
+    }
+    si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
+    if (si.hStdOutput == INVALID_HANDLE_VALUE) {
+      CloseHandle(si.hStdInput);
+      MakeErrMsg(ErrMsg, "can't redirect stdout");
+      return false;
+    }
+    if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
+      // If stdout and stderr should go to the same place, redirect stderr
+      // to the handle already open for stdout.
+      DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+                      GetCurrentProcess(), &si.hStdError,
+                      0, TRUE, DUPLICATE_SAME_ACCESS);
+    } else {
+      // Just redirect stderr
+      si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
+      if (si.hStdError == INVALID_HANDLE_VALUE) {
+        CloseHandle(si.hStdInput);
+        CloseHandle(si.hStdOutput);
+        MakeErrMsg(ErrMsg, "can't redirect stderr");
+        return false;
+      }
+    }
+  }
+
+  PROCESS_INFORMATION pi;
+  memset(&pi, 0, sizeof(pi));
+
+  fflush(stdout);
+  fflush(stderr);
+  BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
+                          envblock, NULL, &si, &pi);
+  DWORD err = GetLastError();
+
+  // Regardless of whether the process got created or not, we are done with
+  // the handles we created for it to inherit.
+  CloseHandle(si.hStdInput);
+  CloseHandle(si.hStdOutput);
+  CloseHandle(si.hStdError);
+
+  // Now return an error if the process didn't get created.
+  if (!rc) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
+               path.str() + "'");
+    return false;
+  }
+  Win32ProcessInfo* wpi = new Win32ProcessInfo;
+  wpi->hProcess = pi.hProcess;
+  wpi->dwProcessId = pi.dwProcessId;
+  Data_ = wpi;
+
+  // Make sure these get closed no matter what.
+  AutoHandle hThread(pi.hThread);
+
+  // Assign the process to a job if a memory limit is defined.
+  AutoHandle hJob(0);
+  if (memoryLimit != 0) {
+    hJob = CreateJobObject(0, 0);
+    bool success = false;
+    if (hJob != 0) {
+      JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+      memset(&jeli, 0, sizeof(jeli));
+      jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
+      jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
+      if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
+                                  &jeli, sizeof(jeli))) {
+        if (AssignProcessToJobObject(hJob, pi.hProcess))
+          success = true;
+      }
+    }
+    if (!success) {
+      SetLastError(GetLastError());
+      MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
+      TerminateProcess(pi.hProcess, 1);
+      WaitForSingleObject(pi.hProcess, INFINITE);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int
+Program::Wait(const Path &path,
+              unsigned secondsToWait,
+              std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+
+  // Wait for the process to terminate.
+  DWORD millisecondsToWait = INFINITE;
+  if (secondsToWait > 0)
+    millisecondsToWait = secondsToWait * 1000;
+
+  if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+    if (!TerminateProcess(hProcess, 1)) {
+      MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+      return -1;
+    }
+    WaitForSingleObject(hProcess, INFINITE);
+  }
+
+  // Get its exit status.
+  DWORD status;
+  BOOL rc = GetExitCodeProcess(hProcess, &status);
+  DWORD err = GetLastError();
+
+  if (!rc) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, "Failed getting status for program.");
+    return -1;
+  }
+
+  return status;
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+  if (TerminateProcess(hProcess, 1) == 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+  int result = _setmode( _fileno(stdin), _O_BINARY );
+  return result == -1;
+}
+
+bool Program::ChangeStdoutToBinary(){
+  int result = _setmode( _fileno(stdout), _O_BINARY );
+  return result == -1;
+}
+
+bool Program::ChangeStderrToBinary(){
+  int result = _setmode( _fileno(stderr), _O_BINARY );
+  return result == -1;
+}
+
+}
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
new file mode 100644
index 000000000000..471f8fa294be
--- /dev/null
+++ b/lib/Support/Windows/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista.  If you want
+// real reader-writer locks, you a threads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+  data_ = calloc(1, sizeof(CRITICAL_SECTION));
+  InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+  DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+
+}
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
new file mode 100644
index 000000000000..14f3f21f02a1
--- /dev/null
+++ b/lib/Support/Windows/Signals.inc
@@ -0,0 +1,328 @@
+//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Signals class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+#include <psapi.h>
+
+#ifdef __MINGW32__
+ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
+  #error "libimagehlp.a & libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+// Forward declare.
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
+
+// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static bool RegisteredUnhandledExceptionFilter = false;
+static bool CleanupExecuted = false;
+static bool ExitOnUnhandledExceptions = false;
+static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+
+// Windows creates a new thread to execute the console handler when an event
+// (such as CTRL/C) occurs.  This causes concurrency issues with the above
+// globals which this critical section addresses.
+static CRITICAL_SECTION CriticalSection;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef _MSC_VER
+/// CRTReportHook - Function called on a CRT debugging event.
+static int CRTReportHook(int ReportType, char *Message, int *Return) {
+  // Don't cause a DebugBreak() on return.
+  if (Return)
+    *Return = 0;
+
+  switch (ReportType) {
+  default:
+  case _CRT_ASSERT:
+    fprintf(stderr, "CRT assert: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_ERROR:
+    fprintf(stderr, "CRT error: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_WARN:
+    fprintf(stderr, "CRT warn: %s\n", Message);
+    break;
+  }
+
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+#endif
+
+static void RegisterHandler() {
+  if (RegisteredUnhandledExceptionFilter) {
+    EnterCriticalSection(&CriticalSection);
+    return;
+  }
+
+  // Now's the time to create the critical section.  This is the first time
+  // through here, and there's only one thread.
+  InitializeCriticalSection(&CriticalSection);
+
+  // Enter it immediately.  Now if someone hits CTRL/C, the console handler
+  // can't proceed until the globals are updated.
+  EnterCriticalSection(&CriticalSection);
+
+  RegisteredUnhandledExceptionFilter = true;
+  OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
+  SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+
+  // Environment variable to disable any kind of crash dialog.
+  if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+#ifdef _MSC_VER
+    _CrtSetReportHook(CRTReportHook);
+#endif
+    SetErrorMode(SEM_FAILCRITICALERRORS |
+                 SEM_NOGPFAULTERRORBOX |
+                 SEM_NOOPENFILEERRORBOX);
+    ExitOnUnhandledExceptions = true;
+  }
+
+  // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
+  // else multi-threading problems will ensue.
+}
+
+// RemoveFileOnSignal - The public API
+bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
+  RegisterHandler();
+
+  if (CleanupExecuted) {
+    if (ErrMsg)
+      *ErrMsg = "Process terminating -- cannot register for removal";
+    return true;
+  }
+
+  if (FilesToRemove == NULL)
+    FilesToRemove = new std::vector<sys::Path>;
+
+  FilesToRemove->push_back(Filename);
+
+  LeaveCriticalSection(&CriticalSection);
+  return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+  if (FilesToRemove == NULL)
+    return;
+
+  RegisterHandler();
+
+  FilesToRemove->push_back(Filename);
+  std::vector<sys::Path>::reverse_iterator I =
+  std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+  if (I != FilesToRemove->rend())
+    FilesToRemove->erase(I.base()-1);
+
+  LeaveCriticalSection(&CriticalSection);
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void sys::PrintStackTraceOnErrorSignal() {
+  RegisterHandler();
+  LeaveCriticalSection(&CriticalSection);
+}
+
+
+void sys::SetInterruptFunction(void (*IF)()) {
+  RegisterHandler();
+  InterruptFunction = IF;
+  LeaveCriticalSection(&CriticalSection);
+}
+
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process.  The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+  if (CallBacksToRun == 0)
+    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+  CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+  RegisterHandler();
+  LeaveCriticalSection(&CriticalSection);
+}
+}
+
+static void Cleanup() {
+  EnterCriticalSection(&CriticalSection);
+
+  // Prevent other thread from registering new files and directories for
+  // removal, should we be executing because of the console handler callback.
+  CleanupExecuted = true;
+
+  // FIXME: open files cannot be deleted.
+
+  if (FilesToRemove != NULL)
+    while (!FilesToRemove->empty()) {
+      FilesToRemove->back().eraseFromDisk();
+      FilesToRemove->pop_back();
+    }
+
+  if (CallBacksToRun)
+    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+
+  LeaveCriticalSection(&CriticalSection);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+  Cleanup();
+}
+
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
+  Cleanup();
+
+#ifdef _WIN64
+  // TODO: provide a x64 friendly version of the following
+#else
+
+  // Initialize the STACKFRAME structure.
+  STACKFRAME StackFrame;
+  memset(&StackFrame, 0, sizeof(StackFrame));
+
+  StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+  StackFrame.AddrPC.Mode = AddrModeFlat;
+  StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+  StackFrame.AddrStack.Mode = AddrModeFlat;
+  StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+  StackFrame.AddrFrame.Mode = AddrModeFlat;
+
+  HANDLE hProcess = GetCurrentProcess();
+  HANDLE hThread = GetCurrentThread();
+
+  // Initialize the symbol handler.
+  SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+  SymInitialize(hProcess, NULL, TRUE);
+
+  while (true) {
+    if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
+                   ep->ContextRecord, NULL, SymFunctionTableAccess,
+                   SymGetModuleBase, NULL)) {
+      break;
+    }
+
+    if (StackFrame.AddrFrame.Offset == 0)
+      break;
+
+    // Print the PC in hexadecimal.
+    DWORD PC = StackFrame.AddrPC.Offset;
+    fprintf(stderr, "%08lX", PC);
+
+    // Print the parameters.  Assume there are four.
+    fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+            StackFrame.Params[0],
+            StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
+
+    // Verify the PC belongs to a module in this process.
+    if (!SymGetModuleBase(hProcess, PC)) {
+      fputs(" <unknown module>\n", stderr);
+      continue;
+    }
+
+    // Print the symbol name.
+    char buffer[512];
+    IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
+    memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
+    symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
+    symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+
+    DWORD dwDisp;
+    if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
+      fputc('\n', stderr);
+      continue;
+    }
+
+    buffer[511] = 0;
+    if (dwDisp > 0)
+      fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+    else
+      fprintf(stderr, ", %s", symbol->Name);
+
+    // Print the source file and line number information.
+    IMAGEHLP_LINE line;
+    memset(&line, 0, sizeof(line));
+    line.SizeOfStruct = sizeof(line);
+    if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+      fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+      if (dwDisp > 0)
+        fprintf(stderr, "+%04lu byte(s)", dwDisp);
+    }
+
+    fputc('\n', stderr);
+  }
+
+#endif
+
+  if (ExitOnUnhandledExceptions)
+    _exit(-3);
+
+  // Allow dialog box to pop up allowing choice to start debugger.
+  if (OldFilter)
+    return (*OldFilter)(ep);
+  else
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
+  // We are running in our very own thread, courtesy of Windows.
+  EnterCriticalSection(&CriticalSection);
+  Cleanup();
+
+  // If an interrupt function has been set, go and run one it; otherwise,
+  // the process dies.
+  void (*IF)() = InterruptFunction;
+  InterruptFunction = 0;      // Don't run it on another CTRL-C.
+
+  if (IF) {
+    // Note: if the interrupt function throws an exception, there is nothing
+    // to catch it in this thread so it will kill the process.
+    IF();                     // Run it now.
+    LeaveCriticalSection(&CriticalSection);
+    return TRUE;              // Don't kill the process.
+  }
+
+  // Allow normal processing to take place; i.e., the process dies.
+  LeaveCriticalSection(&CriticalSection);
+  return FALSE;
+}
diff --git a/lib/Support/Windows/ThreadLocal.inc b/lib/Support/Windows/ThreadLocal.inc
new file mode 100644
index 000000000000..512462d89005
--- /dev/null
+++ b/lib/Support/Windows/ThreadLocal.inc
@@ -0,0 +1,54 @@
+//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() {
+  DWORD* tls = new DWORD;
+  *tls = TlsAlloc();
+  assert(*tls != TLS_OUT_OF_INDEXES);
+  data = tls;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  TlsFree(*tls);
+  delete tls;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+  DWORD* tls = static_cast<DWORD*>(data);
+  int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+  assert(errorcode != 0);
+  (void)errorcode;
+}
+
+void ThreadLocalImpl::removeInstance() {
+  setInstance(0);
+}
+
+}
diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc
new file mode 100644
index 000000000000..12275526f1c8
--- /dev/null
+++ b/lib/Support/Windows/TimeValue.inc
@@ -0,0 +1,51 @@
+//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 implementation of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <time.h>
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code.
+//===----------------------------------------------------------------------===//
+
+TimeValue TimeValue::now() {
+  uint64_t ft;
+  GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
+
+  TimeValue t(0, 0);
+  t.fromWin32Time(ft);
+  return t;
+}
+
+std::string TimeValue::str() const {
+#ifdef __MINGW32__
+  // This ban may be lifted by either:
+  // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
+  // (ii) configure tests for either the time_t or __time64_t type.
+  time_t ourTime = time_t(this->toEpochTime());
+  struct tm *lt = ::localtime(&ourTime);
+#else
+  __time64_t ourTime = this->toEpochTime();
+  struct tm *lt = ::_localtime64(&ourTime);
+#endif
+
+  char buffer[25];
+  strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
+  return std::string(buffer);
+}
+
+
+}
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
new file mode 100644
index 000000000000..4a1553b599d7
--- /dev/null
+++ b/lib/Support/Windows/Windows.h
@@ -0,0 +1,120 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// mingw-w64 tends to define it as 0x0502 in its headers.
+#undef _WIN32_WINNT
+
+// Require at least Windows 2000 API.
+#define _WIN32_WINNT 0x0500
+#define _WIN32_IE    0x0500 // MinGW at it again.
+#define WIN32_LEAN_AND_MEAN
+
+#include "llvm/Config/config.h" // Get build system configuration settings
+#include <windows.h>
+#include <shlobj.h>
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+  if (!ErrMsg)
+    return true;
+  char *buffer = NULL;
+  FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+  *ErrMsg = prefix + buffer;
+  LocalFree(buffer);
+  return true;
+}
+
+class AutoHandle {
+  HANDLE handle;
+
+public:
+  AutoHandle(HANDLE h) : handle(h) {}
+
+  ~AutoHandle() {
+    if (handle)
+      CloseHandle(handle);
+  }
+
+  operator HANDLE() {
+    return handle;
+  }
+
+  AutoHandle &operator=(HANDLE h) {
+    handle = h;
+    return *this;
+  }
+};
+
+template <class HandleType, uintptr_t InvalidHandle,
+          class DeleterType, DeleterType D>
+class ScopedHandle {
+  HandleType Handle;
+
+public:
+  ScopedHandle() : Handle(InvalidHandle) {}
+  ScopedHandle(HandleType handle) : Handle(handle) {}
+
+  ~ScopedHandle() {
+    if (Handle != HandleType(InvalidHandle))
+      D(Handle);
+  }
+
+  HandleType take() {
+    HandleType temp = Handle;
+    Handle = HandleType(InvalidHandle);
+    return temp;
+  }
+
+  operator HandleType() const { return Handle; }
+
+  ScopedHandle &operator=(HandleType handle) {
+    Handle = handle;
+    return *this;
+  }
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  // True if Handle is valid.
+  operator unspecified_bool_type() const {
+    return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+  }
+
+  bool operator!() const {
+    return Handle == HandleType(InvalidHandle);
+  }
+};
+
+typedef ScopedHandle<HANDLE, uintptr_t(-1),
+                      BOOL (WINAPI*)(HANDLE), ::FindClose>
+  ScopedFindHandle;
+
+namespace llvm {
+template <class T>
+class SmallVectorImpl;
+
+template <class T>
+typename SmallVectorImpl<T>::const_pointer
+c_str(SmallVectorImpl<T> &str) {
+  str.push_back(0);
+  str.pop_back();
+  return str.data();
+}
+} // end namespace llvm.
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
new file mode 100644
index 000000000000..84862d69e2b5
--- /dev/null
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -0,0 +1,66 @@
+/* in libgcc.a */
+
+#ifdef HAVE__ALLOCA
+  EXPLICIT_SYMBOL(_alloca)
+  EXPLICIT_SYMBOL2(alloca, _alloca);
+#endif
+#ifdef HAVE___ALLOCA
+  EXPLICIT_SYMBOL(__alloca)
+#endif
+#ifdef HAVE___CHKSTK
+  EXPLICIT_SYMBOL(__chkstk)
+#endif
+#ifdef HAVE____CHKSTK
+  EXPLICIT_SYMBOL(___chkstk)
+#endif
+#ifdef HAVE___MAIN
+  EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
+#endif
+
+#ifdef HAVE___ASHLDI3
+  EXPLICIT_SYMBOL(__ashldi3)
+#endif
+#ifdef HAVE___ASHRDI3
+  EXPLICIT_SYMBOL(__ashrdi3)
+#endif
+#ifdef HAVE___CMPDI2 // FIXME: unused
+  EXPLICIT_SYMBOL(__cmpdi2)
+#endif
+#ifdef HAVE___DIVDI3
+  EXPLICIT_SYMBOL(__divdi3)
+#endif
+#ifdef HAVE___FIXDFDI
+  EXPLICIT_SYMBOL(__fixdfdi)
+#endif
+#ifdef HAVE___FIXSFDI
+  EXPLICIT_SYMBOL(__fixsfdi)
+#endif
+#ifdef HAVE___FIXUNSDFDI
+  EXPLICIT_SYMBOL(__fixunsdfdi)
+#endif
+#ifdef HAVE___FIXUNSSFDI
+  EXPLICIT_SYMBOL(__fixunssfdi)
+#endif
+#ifdef HAVE___FLOATDIDF
+  EXPLICIT_SYMBOL(__floatdidf)
+#endif
+#ifdef HAVE___FLOATDISF
+  EXPLICIT_SYMBOL(__floatdisf)
+#endif
+#ifdef HAVE___LSHRDI3
+  EXPLICIT_SYMBOL(__lshrdi3)
+#endif
+#ifdef HAVE___MODDI3
+  EXPLICIT_SYMBOL(__moddi3)
+#endif
+#ifdef HAVE___UDIVDI3
+  EXPLICIT_SYMBOL(__udivdi3)
+#endif
+#ifdef HAVE___UMODDI3
+  EXPLICIT_SYMBOL(__umoddi3)
+#endif
+
+/* msvcrt */
+#if defined(_MSC_VER)
+  EXPLICIT_SYMBOL2(alloca, _alloca_probe);
+#endif
diff --git a/lib/Support/Windows/system_error.inc b/lib/Support/Windows/system_error.inc
new file mode 100644
index 000000000000..37ec81dd363c
--- /dev/null
+++ b/lib/Support/Windows/system_error.inc
@@ -0,0 +1,142 @@
+//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Windows specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//===          is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include <windows.h>
+#include <winerror.h>
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+  LPVOID lpMsgBuf = 0;
+  DWORD retval = ::FormatMessageA(
+    FORMAT_MESSAGE_ALLOCATE_BUFFER |
+    FORMAT_MESSAGE_FROM_SYSTEM |
+    FORMAT_MESSAGE_IGNORE_INSERTS,
+    NULL,
+    ev,
+    MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+    (LPSTR) &lpMsgBuf,
+    0,
+    NULL);
+  if (retval == 0) {
+    ::LocalFree(lpMsgBuf);
+    return std::string("Unknown error");
+  }
+
+  std::string str( static_cast<LPCSTR>(lpMsgBuf) );
+  ::LocalFree(lpMsgBuf);
+
+  while (str.size()
+     && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
+    str.erase( str.size()-1 );
+  if (str.size() && str[str.size()-1] == '.')
+    str.erase( str.size()-1 );
+  return str;
+}
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+  switch (ev) {
+  MAP_ERR_TO_COND(0, success);
+  // Windows system -> posix_errno decode table  ---------------------------//
+  // see WinError.h comments for descriptions of errors
+  MAP_ERR_TO_COND(ERROR_ACCESS_DENIED,       permission_denied);
+  MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS,      file_exists);
+  MAP_ERR_TO_COND(ERROR_BAD_UNIT,            no_such_device);
+  MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW,     filename_too_long);
+  MAP_ERR_TO_COND(ERROR_BUSY,                device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_BUSY_DRIVE,          device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_CANNOT_MAKE,         permission_denied);
+  MAP_ERR_TO_COND(ERROR_CANTOPEN,            io_error);
+  MAP_ERR_TO_COND(ERROR_CANTREAD,            io_error);
+  MAP_ERR_TO_COND(ERROR_CANTWRITE,           io_error);
+  MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY,   permission_denied);
+  MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST,       no_such_device);
+  MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE,       device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY,       directory_not_empty);
+  MAP_ERR_TO_COND(ERROR_DIRECTORY,           invalid_argument);
+  MAP_ERR_TO_COND(ERROR_DISK_FULL,           no_space_on_device);
+  MAP_ERR_TO_COND(ERROR_FILE_EXISTS,         file_exists);
+  MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND,      no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL,    no_space_on_device);
+  MAP_ERR_TO_COND(ERROR_HANDLE_EOF,          value_too_large);
+  MAP_ERR_TO_COND(ERROR_INVALID_ACCESS,      permission_denied);
+  MAP_ERR_TO_COND(ERROR_INVALID_DRIVE,       no_such_device);
+  MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION,    function_not_supported);
+  MAP_ERR_TO_COND(ERROR_INVALID_HANDLE,      invalid_argument);
+  MAP_ERR_TO_COND(ERROR_INVALID_NAME,        invalid_argument);
+  MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION,      no_lock_available);
+  MAP_ERR_TO_COND(ERROR_LOCKED,              no_lock_available);
+  MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK,       invalid_argument);
+  MAP_ERR_TO_COND(ERROR_NOACCESS,            permission_denied);
+  MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY,   not_enough_memory);
+  MAP_ERR_TO_COND(ERROR_NOT_READY,           resource_unavailable_try_again);
+  MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE,     cross_device_link);
+  MAP_ERR_TO_COND(ERROR_OPEN_FAILED,         io_error);
+  MAP_ERR_TO_COND(ERROR_OPEN_FILES,          device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED,   operation_canceled);
+  MAP_ERR_TO_COND(ERROR_OUTOFMEMORY,         not_enough_memory);
+  MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND,      no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_BAD_NETPATH,         no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_READ_FAULT,          io_error);
+  MAP_ERR_TO_COND(ERROR_RETRY,               resource_unavailable_try_again);
+  MAP_ERR_TO_COND(ERROR_SEEK,                io_error);
+  MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION,   permission_denied);
+  MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+  MAP_ERR_TO_COND(ERROR_WRITE_FAULT,         io_error);
+  MAP_ERR_TO_COND(ERROR_WRITE_PROTECT,       permission_denied);
+  MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT,         timed_out);
+  MAP_ERR_TO_COND(WSAEACCES,                 permission_denied);
+  MAP_ERR_TO_COND(WSAEADDRINUSE,             address_in_use);
+  MAP_ERR_TO_COND(WSAEADDRNOTAVAIL,          address_not_available);
+  MAP_ERR_TO_COND(WSAEAFNOSUPPORT,           address_family_not_supported);
+  MAP_ERR_TO_COND(WSAEALREADY,               connection_already_in_progress);
+  MAP_ERR_TO_COND(WSAEBADF,                  bad_file_descriptor);
+  MAP_ERR_TO_COND(WSAECONNABORTED,           connection_aborted);
+  MAP_ERR_TO_COND(WSAECONNREFUSED,           connection_refused);
+  MAP_ERR_TO_COND(WSAECONNRESET,             connection_reset);
+  MAP_ERR_TO_COND(WSAEDESTADDRREQ,           destination_address_required);
+  MAP_ERR_TO_COND(WSAEFAULT,                 bad_address);
+  MAP_ERR_TO_COND(WSAEHOSTUNREACH,           host_unreachable);
+  MAP_ERR_TO_COND(WSAEINPROGRESS,            operation_in_progress);
+  MAP_ERR_TO_COND(WSAEINTR,                  interrupted);
+  MAP_ERR_TO_COND(WSAEINVAL,                 invalid_argument);
+  MAP_ERR_TO_COND(WSAEISCONN,                already_connected);
+  MAP_ERR_TO_COND(WSAEMFILE,                 too_many_files_open);
+  MAP_ERR_TO_COND(WSAEMSGSIZE,               message_size);
+  MAP_ERR_TO_COND(WSAENAMETOOLONG,           filename_too_long);
+  MAP_ERR_TO_COND(WSAENETDOWN,               network_down);
+  MAP_ERR_TO_COND(WSAENETRESET,              network_reset);
+  MAP_ERR_TO_COND(WSAENETUNREACH,            network_unreachable);
+  MAP_ERR_TO_COND(WSAENOBUFS,                no_buffer_space);
+  MAP_ERR_TO_COND(WSAENOPROTOOPT,            no_protocol_option);
+  MAP_ERR_TO_COND(WSAENOTCONN,               not_connected);
+  MAP_ERR_TO_COND(WSAENOTSOCK,               not_a_socket);
+  MAP_ERR_TO_COND(WSAEOPNOTSUPP,             operation_not_supported);
+  MAP_ERR_TO_COND(WSAEPROTONOSUPPORT,        protocol_not_supported);
+  MAP_ERR_TO_COND(WSAEPROTOTYPE,             wrong_protocol_type);
+  MAP_ERR_TO_COND(WSAETIMEDOUT,              timed_out);
+  MAP_ERR_TO_COND(WSAEWOULDBLOCK,            operation_would_block);
+  default: return error_condition(ev, system_category());
+  }
+}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index dba46df36256..80ea7407b44e 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -13,13 +13,13 @@
 
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/System/Program.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/Signals.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cctype>
 #include <cerrno>
@@ -32,6 +32,13 @@
 #if defined(HAVE_FCNTL_H)
 # include <fcntl.h>
 #endif
+#if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV)
+#  include <sys/uio.h>
+#endif
+
+#if defined(__CYGWIN__)
+#include <io.h>
+#endif
 
 #if defined(_MSC_VER)
 #include <io.h>
@@ -164,7 +171,8 @@ raw_ostream &raw_ostream::write_hex(unsigned long long N) {
   return write(CurPtr, EndPtr-CurPtr);
 }
 
-raw_ostream &raw_ostream::write_escaped(StringRef Str) {
+raw_ostream &raw_ostream::write_escaped(StringRef Str,
+                                        bool UseHexEscapes) {
   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
     unsigned char c = Str[i];
 
@@ -187,11 +195,18 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str) {
         break;
       }
 
-      // Always expand to a 3-character octal escape.
-      *this << '\\';
-      *this << char('0' + ((c >> 6) & 7));
-      *this << char('0' + ((c >> 3) & 7));
-      *this << char('0' + ((c >> 0) & 7));
+      // Write out the escaped representation.
+      if (UseHexEscapes) {
+        *this << '\\' << 'x';
+        *this << hexdigit((c >> 4 & 0xF));
+        *this << hexdigit((c >> 0) & 0xF);
+      } else {
+        // Always use a full 3-character octal escape.
+        *this << '\\';
+        *this << char('0' + ((c >> 6) & 7));
+        *this << char('0' + ((c >> 3) & 7));
+        *this << char('0' + ((c >> 0) & 7));
+      }
     }
   }
 
@@ -363,7 +378,9 @@ void format_object_base::home() {
 /// stream should be immediately destroyed; the string will be empty
 /// if no error occurred.
 raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
-                               unsigned Flags) : Error(false), pos(0) {
+                               unsigned Flags)
+  : Error(false), UseAtomicWrites(false), pos(0)
+{
   assert(Filename != 0 && "Filename is null");
   // Verify that we don't have both "append" and "excl".
   assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
@@ -410,6 +427,26 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
   ShouldClose = true;
 }
 
+/// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
+/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
+  : raw_ostream(unbuffered), FD(fd),
+    ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
+#ifdef O_BINARY
+  // Setting STDOUT and STDERR to binary mode is necessary in Win32
+  // to avoid undesirable linefeed conversion.
+  if (fd == STDOUT_FILENO || fd == STDERR_FILENO)
+    setmode(fd, O_BINARY);
+#endif
+
+  // Get the starting position.
+  off_t loc = ::lseek(FD, 0, SEEK_CUR);
+  if (loc == (off_t)-1)
+    pos = 0;
+  else
+    pos = static_cast<uint64_t>(loc);
+}
+
 raw_fd_ostream::~raw_fd_ostream() {
   if (FD >= 0) {
     flush();
@@ -435,7 +472,20 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   pos += Size;
 
   do {
-    ssize_t ret = ::write(FD, Ptr, Size);
+    ssize_t ret;
+
+    // Check whether we should attempt to use atomic writes.
+    if (BUILTIN_EXPECT(!UseAtomicWrites, true)) {
+      ret = ::write(FD, Ptr, Size);
+    } else {
+      // Use ::writev() where available.
+#if defined(HAVE_WRITEV)
+      struct iovec IOV = { (void*) Ptr, Size };
+      ret = ::writev(FD, &IOV, 1);
+#else
+      ret = ::write(FD, Ptr, Size);
+#endif
+    }
 
     if (ret < 0) {
       // If it's a recoverable error, swallow it and retry the write.
@@ -665,34 +715,3 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
 uint64_t raw_null_ostream::current_pos() const {
   return 0;
 }
-
-//===----------------------------------------------------------------------===//
-//  tool_output_file
-//===----------------------------------------------------------------------===//
-
-tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
-  : Filename(filename), Keep(false) {
-  // Arrange for the file to be deleted if the process is killed.
-  if (Filename != "-")
-    sys::RemoveFileOnSignal(sys::Path(Filename));
-}
-
-tool_output_file::CleanupInstaller::~CleanupInstaller() {
-  // Delete the file if the client hasn't told us not to.
-  if (!Keep && Filename != "-")
-    sys::Path(Filename).eraseFromDisk();
-
-  // Ok, the file is successfully written and closed, or deleted. There's no
-  // further need to clean it up on signals.
-  if (Filename != "-")
-    sys::DontRemoveFileOnSignal(sys::Path(Filename));
-}
-
-tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
-                                   unsigned Flags)
-  : Installer(filename),
-    OS(filename, ErrorInfo, Flags) {
-  // If open fails, no cleanup is needed.
-  if (!ErrorInfo.empty())
-    Installer.Keep = true;
-}
diff --git a/lib/Support/regexec.c b/lib/Support/regexec.c
index 41fb2ea46c9a..007861675ba1 100644
--- a/lib/Support/regexec.c
+++ b/lib/Support/regexec.c
@@ -54,8 +54,9 @@
 #include "regex2.h"
 
 /* macros for manipulating states, small version */
-#define	states	long
-#define	states1	states		/* for later use in llvm_regexec() decision */
+/* FIXME: 'states' is assumed as 'long' on small version. */
+#define	states1	long		/* for later use in llvm_regexec() decision */
+#define	states	states1
 #define	CLEAR(v)	((v) = 0)
 #define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
 #define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
diff --git a/lib/Support/system_error.cpp b/lib/Support/system_error.cpp
new file mode 100644
index 000000000000..56898de31520
--- /dev/null
+++ b/lib/Support/system_error.cpp
@@ -0,0 +1,130 @@
+//===---------------------- system_error.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Errno.h"
+#include <string>
+#include <cstring>
+
+namespace llvm {
+
+// class error_category
+
+error_category::error_category() {
+}
+
+error_category::~error_category() {
+}
+
+error_condition
+error_category::default_error_condition(int ev) const {
+  return error_condition(ev, *this);
+}
+
+bool
+error_category::equivalent(int code, const error_condition& condition) const {
+  return default_error_condition(code) == condition;
+}
+
+bool
+error_category::equivalent(const error_code& code, int condition) const {
+  return *this == code.category() && code.value() == condition;
+}
+
+std::string
+_do_message::message(int ev) const {
+  return std::string(sys::StrError(ev));
+}
+
+class _generic_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+};
+
+const char*
+_generic_error_category::name() const {
+  return "generic";
+}
+
+std::string
+_generic_error_category::message(int ev) const {
+#ifdef ELAST
+  if (ev > ELAST)
+    return std::string("unspecified generic_category error");
+#endif  // ELAST
+  return _do_message::message(ev);
+}
+
+const error_category&
+generic_category() {
+  static _generic_error_category s;
+  return s;
+}
+
+class _system_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+
+const char*
+_system_error_category::name() const {
+  return "system";
+}
+
+// std::string _system_error_category::message(int ev) const {
+// Is in Platform/system_error.inc
+
+// error_condition _system_error_category::default_error_condition(int ev) const
+// Is in Platform/system_error.inc
+
+const error_category&
+system_category() {
+  static _system_error_category s;
+  return s;
+}
+
+const error_category&
+posix_category() {
+#ifdef LLVM_ON_WIN32
+  return generic_category();
+#else
+  return system_category();
+#endif
+}
+
+// error_condition
+
+std::string
+error_condition::message() const {
+  return _cat_->message(_val_);
+}
+
+// error_code
+
+std::string
+error_code::message() const {
+  return _cat_->message(_val_);
+}
+
+} // end namespace llvm
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/system_error.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/system_error.inc"
+#endif
diff --git a/lib/System/Alarm.cpp b/lib/System/Alarm.cpp
deleted file mode 100644
index 0014ca716b33..000000000000
--- a/lib/System/Alarm.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- Alarm.cpp - Alarm Generation Support ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Alarm functionality 
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Alarm.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-}
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Alarm.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Alarm.inc"
-#endif
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
deleted file mode 100644
index 7ba8b774d5e0..000000000000
--- a/lib/System/Atomic.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file implements atomic operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Atomic.h"
-#include "llvm/Config/config.h"
-
-using namespace llvm;
-
-#if defined(_MSC_VER)
-#include <windows.h>
-#undef MemoryFence
-#endif
-
-void sys::MemoryFence() {
-#if LLVM_MULTITHREADED==0
-  return;
-#else
-#  if defined(__GNUC__)
-  __sync_synchronize();
-#  elif defined(_MSC_VER)
-  MemoryBarrier();
-#  else
-# error No memory fence implementation for your platform!
-#  endif
-#endif
-}
-
-sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
-                                  sys::cas_flag new_value,
-                                  sys::cas_flag old_value) {
-#if LLVM_MULTITHREADED==0
-  sys::cas_flag result = *ptr;
-  if (result == old_value)
-    *ptr = new_value;
-  return result;
-#elif defined(__GNUC__)
-  return __sync_val_compare_and_swap(ptr, old_value, new_value);
-#elif defined(_MSC_VER)
-  return InterlockedCompareExchange(ptr, new_value, old_value);
-#else
-#  error No compare-and-swap implementation for your platform!
-#endif
-}
-
-sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
-#if LLVM_MULTITHREADED==0
-  ++(*ptr);
-  return *ptr;
-#elif defined(__GNUC__)
-  return __sync_add_and_fetch(ptr, 1);
-#elif defined(_MSC_VER)
-  return InterlockedIncrement(ptr);
-#else
-#  error No atomic increment implementation for your platform!
-#endif
-}
-
-sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
-#if LLVM_MULTITHREADED==0
-  --(*ptr);
-  return *ptr;
-#elif defined(__GNUC__)
-  return __sync_sub_and_fetch(ptr, 1);
-#elif defined(_MSC_VER)
-  return InterlockedDecrement(ptr);
-#else
-#  error No atomic decrement implementation for your platform!
-#endif
-}
-
-sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
-#if LLVM_MULTITHREADED==0
-  *ptr += val;
-  return *ptr;
-#elif defined(__GNUC__)
-  return __sync_add_and_fetch(ptr, val);
-#elif defined(_MSC_VER)
-  return InterlockedExchangeAdd(ptr, val) + val;
-#else
-#  error No atomic add implementation for your platform!
-#endif
-}
-
-sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
-  sys::cas_flag original, result;
-  do {
-    original = *ptr;
-    result = original * val;
-  } while (sys::CompareAndSwap(ptr, result, original) != original);
-
-  return result;
-}
-
-sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
-  sys::cas_flag original, result;
-  do {
-    original = *ptr;
-    result = original / val;
-  } while (sys::CompareAndSwap(ptr, result, original) != original);
-
-  return result;
-}
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
deleted file mode 100644
index b43c3afa5248..000000000000
--- a/lib/System/CMakeLists.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-add_llvm_library(LLVMSystem
-  Alarm.cpp
-  Atomic.cpp
-  Disassembler.cpp
-  DynamicLibrary.cpp
-  Errno.cpp
-  Host.cpp
-  IncludeFile.cpp
-  Memory.cpp
-  Mutex.cpp
-  Path.cpp
-  Process.cpp
-  Program.cpp
-  RWMutex.cpp
-  SearchForAddressOfSpecialSymbol.cpp
-  Signals.cpp
-  ThreadLocal.cpp
-  Threading.cpp
-  TimeValue.cpp
-  Valgrind.cpp
-  Unix/Alarm.inc
-  Unix/Host.inc
-  Unix/Memory.inc
-  Unix/Mutex.inc
-  Unix/Path.inc
-  Unix/Process.inc
-  Unix/Program.inc
-  Unix/RWMutex.inc
-  Unix/Signals.inc
-  Unix/ThreadLocal.inc
-  Unix/TimeValue.inc
-  Win32/Alarm.inc
-  Win32/DynamicLibrary.inc
-  Win32/Host.inc
-  Win32/Memory.inc
-  Win32/Mutex.inc
-  Win32/Path.inc
-  Win32/Process.inc
-  Win32/Program.inc
-  Win32/RWMutex.inc
-  Win32/Signals.inc
-  Win32/ThreadLocal.inc
-  Win32/TimeValue.inc
-  )
-
-if( BUILD_SHARED_LIBS AND NOT WIN32 )
-  target_link_libraries(LLVMSystem ${CMAKE_DL_LIBS})
-endif()
diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp
deleted file mode 100644
index 139e3be1aaee..000000000000
--- a/lib/System/Disassembler.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the necessary glue to call external disassembler
-// libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/System/Disassembler.h"
-
-#include <cassert>
-#include <iomanip>
-#include <string>
-#include <sstream>
-
-#if USE_UDIS86
-#include <udis86.h>
-#endif
-
-using namespace llvm;
-
-bool llvm::sys::hasDisassembler()
-{
-#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
-  // We have option to enable udis86 library.
-# if USE_UDIS86
-  return true;
-#else
-  return false;
-#endif
-#else
-  return false;
-#endif
-}
-
-std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
-                                         uint64_t pc) {
-  std::stringstream res;
-
-#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
-  && USE_UDIS86
-  unsigned bits;
-# if defined(__i386__)
-  bits = 32;
-# else
-  bits = 64;
-# endif
-
-  ud_t ud_obj;
-
-  ud_init(&ud_obj);
-  ud_set_input_buffer(&ud_obj, start, length);
-  ud_set_mode(&ud_obj, bits);
-  ud_set_pc(&ud_obj, pc);
-  ud_set_syntax(&ud_obj, UD_SYN_ATT);
-
-  res << std::setbase(16)
-      << std::setw(bits/4);
-
-  while (ud_disassemble(&ud_obj)) {
-    res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
-  }
-#else
-  res << "No disassembler available. See configure help for options.\n";
-#endif
-
-  return res.str();
-}
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
deleted file mode 100644
index 660db492d6b9..000000000000
--- a/lib/System/DynamicLibrary.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file implements the operating system DynamicLibrary concept.
-//
-// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
-// not thread safe!
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-#include <cstdio>
-#include <cstring>
-#include <map>
-#include <vector>
-
-// Collection of symbol name/value pairs to be searched prior to any libraries.
-static std::map<std::string, void*> *ExplicitSymbols = 0;
-
-namespace {
-
-struct ExplicitSymbolsDeleter {
-  ~ExplicitSymbolsDeleter() {
-    if (ExplicitSymbols)
-      delete ExplicitSymbols;
-  }
-};
-
-}
-
-static ExplicitSymbolsDeleter Dummy;
-
-void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
-                                          void *symbolValue) {
-  if (ExplicitSymbols == 0)
-    ExplicitSymbols = new std::map<std::string, void*>();
-  (*ExplicitSymbols)[symbolName] = symbolValue;
-}
-
-#ifdef LLVM_ON_WIN32
-
-#include "Win32/DynamicLibrary.inc"
-
-#else
-
-#if HAVE_DLFCN_H
-#include <dlfcn.h>
-using namespace llvm;
-using namespace llvm::sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-static std::vector<void *> *OpenedHandles = 0;
-
-
-bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
-                                            std::string *ErrMsg) {
-  void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
-  if (H == 0) {
-    if (ErrMsg) *ErrMsg = dlerror();
-    return true;
-  }
-#ifdef __CYGWIN__
-  // Cygwin searches symbols only in the main
-  // with the handle of dlopen(NULL, RTLD_GLOBAL).
-  if (Filename == NULL)
-    H = RTLD_DEFAULT;
-#endif
-  if (OpenedHandles == 0)
-    OpenedHandles = new std::vector<void *>();
-  OpenedHandles->push_back(H);
-  return false;
-}
-#else
-
-using namespace llvm;
-using namespace llvm::sys;
-
-bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
-                                            std::string *ErrMsg) {
-  if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
-  return true;
-}
-#endif
-
-namespace llvm {
-void *SearchForAddressOfSpecialSymbol(const char* symbolName);
-}
-
-void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
-  // First check symbols added via AddSymbol().
-  if (ExplicitSymbols) {
-    std::map<std::string, void *>::iterator I =
-      ExplicitSymbols->find(symbolName);
-    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
-  
-    if (I != E)
-      return I->second;
-  }
-
-#if HAVE_DLFCN_H
-  // Now search the libraries.
-  if (OpenedHandles) {
-    for (std::vector<void *>::iterator I = OpenedHandles->begin(),
-         E = OpenedHandles->end(); I != E; ++I) {
-      //lt_ptr ptr = lt_dlsym(*I, symbolName);
-      void *ptr = dlsym(*I, symbolName);
-      if (ptr) {
-        return ptr;
-      }
-    }
-  }
-#endif
-
-  if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
-    return Result;
-
-// This macro returns the address of a well-known, explicit symbol
-#define EXPLICIT_SYMBOL(SYM) \
-   if (!strcmp(symbolName, #SYM)) return &SYM
-
-// On linux we have a weird situation. The stderr/out/in symbols are both
-// macros and global variables because of standards requirements. So, we 
-// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
-#if defined(__linux__)
-  {
-    EXPLICIT_SYMBOL(stderr);
-    EXPLICIT_SYMBOL(stdout);
-    EXPLICIT_SYMBOL(stdin);
-  }
-#else
-  // For everything else, we want to check to make sure the symbol isn't defined
-  // as a macro before using EXPLICIT_SYMBOL.
-  {
-#ifndef stdin
-    EXPLICIT_SYMBOL(stdin);
-#endif
-#ifndef stdout
-    EXPLICIT_SYMBOL(stdout);
-#endif
-#ifndef stderr
-    EXPLICIT_SYMBOL(stderr);
-#endif
-  }
-#endif
-#undef EXPLICIT_SYMBOL
-
-  return 0;
-}
-
-#endif // LLVM_ON_WIN32
diff --git a/lib/System/Errno.cpp b/lib/System/Errno.cpp
deleted file mode 100644
index 68f66f6e439b..000000000000
--- a/lib/System/Errno.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the errno wrappers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Errno.h"
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
-
-#if HAVE_STRING_H
-#include <string.h>
-
-#if HAVE_ERRNO_H
-#include <errno.h>
-#endif
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-namespace sys {
-
-#if HAVE_ERRNO_H
-std::string StrError() {
-  return StrError(errno);
-}
-#endif  // HAVE_ERRNO_H
-
-std::string StrError(int errnum) {
-  const int MaxErrStrLen = 2000;
-  char buffer[MaxErrStrLen];
-  buffer[0] = '\0';
-  char* str = buffer;
-#ifdef HAVE_STRERROR_R
-  // strerror_r is thread-safe.
-  if (errnum)
-# if defined(__GLIBC__) && defined(_GNU_SOURCE)
-    // glibc defines its own incompatible version of strerror_r
-    // which may not use the buffer supplied.
-    str = strerror_r(errnum,buffer,MaxErrStrLen-1);
-# else
-    strerror_r(errnum,buffer,MaxErrStrLen-1);
-# endif
-#elif defined(HAVE_STRERROR_S)  // Windows.
-    if (errnum)
-      strerror_s(buffer, errnum);
-#elif defined(HAVE_STRERROR)
-  // Copy the thread un-safe result of strerror into
-  // the buffer as fast as possible to minimize impact
-  // of collision of strerror in multiple threads.
-  if (errnum)
-    strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
-  buffer[MaxErrStrLen-1] = '\0';
-#else
-  // Strange that this system doesn't even have strerror
-  // but, oh well, just use a generic message
-  sprintf(buffer, "Error #%d", errnum);
-#endif
-  return str;
-}
-
-}  // namespace sys
-}  // namespace llvm
-
-#endif  // HAVE_STRING_H
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
deleted file mode 100644
index e7193dbce92e..000000000000
--- a/lib/System/Host.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file implements the operating system Host concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Host.h"
-#include "llvm/Config/config.h"
-#include <string.h>
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Host.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Host.inc"
-#endif
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
-//===----------------------------------------------------------------------===//
-//
-//  Implementations of the CPU detection routines
-//
-//===----------------------------------------------------------------------===//
-
-using namespace llvm;
-
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
- || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-
-/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments.  If we can't run cpuid on the host, return true.
-static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
-                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-  #if defined(__GNUC__)
-    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
-    asm ("movq\t%%rbx, %%rsi\n\t"
-         "cpuid\n\t"
-         "xchgq\t%%rbx, %%rsi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    int registers[4];
-    __cpuid(registers, value);
-    *rEAX = registers[0];
-    *rEBX = registers[1];
-    *rECX = registers[2];
-    *rEDX = registers[3];
-    return false;
-  #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  #if defined(__GNUC__)
-    asm ("movl\t%%ebx, %%esi\n\t"
-         "cpuid\n\t"
-         "xchgl\t%%ebx, %%esi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    __asm {
-      mov   eax,value
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-    }
-    return false;
-  #endif
-#endif
-  return true;
-}
-
-static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
-  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
-  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
-  if (Family == 6 || Family == 0xf) {
-    if (Family == 0xf)
-      // Examine extended family ID if family ID is F.
-      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
-    // Examine extended model ID if family ID is 6 or F.
-    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
-  }
-}
-
-std::string sys::getHostCPUName() {
-  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
-  if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
-    return "generic";
-  unsigned Family = 0;
-  unsigned Model  = 0;
-  DetectX86FamilyModel(EAX, Family, Model);
-
-  GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-  bool Em64T = (EDX >> 29) & 0x1;
-  bool HasSSE3 = (ECX & 0x1);
-
-  union {
-    unsigned u[3];
-    char     c[12];
-  } text;
-
-  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
-  if (memcmp(text.c, "GenuineIntel", 12) == 0) {
-    switch (Family) {
-    case 3:
-      return "i386";
-    case 4:
-      switch (Model) {
-      case 0: // Intel486TM DX processors
-      case 1: // Intel486TM DX processors
-      case 2: // Intel486 SX processors
-      case 3: // Intel487TM processors, IntelDX2 OverDrive® processors,
-              // IntelDX2TM processors
-      case 4: // Intel486 SL processor
-      case 5: // IntelSX2TM processors
-      case 7: // Write-Back Enhanced IntelDX2 processors
-      case 8: // IntelDX4 OverDrive processors, IntelDX4TM processors
-      default: return "i486";
-      }
-    case 5:
-      switch (Model) {
-      case  1: // Pentium OverDrive processor for Pentium processor (60, 66),
-               // Pentium® processors (60, 66)
-      case  2: // Pentium OverDrive processor for Pentium processor (75, 90,
-               // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
-               // 150, 166, 200)
-      case  3: // Pentium OverDrive processors for Intel486 processor-based
-               // systems
-        return "pentium";
-
-      case  4: // Pentium OverDrive processor with MMXTM technology for Pentium
-               // processor (75, 90, 100, 120, 133), Pentium processor with
-               // MMXTM technology (166, 200)
-        return "pentium-mmx";
-
-      default: return "pentium";
-      }
-    case 6:
-      switch (Model) {
-      case  1: // Pentium Pro processor
-        return "pentiumpro";
-
-      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
-               // model 03
-      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
-               // model 05, and Intel® Celeron® processor, model 05
-      case  6: // Celeron processor, model 06
-        return "pentium2";
-
-      case  7: // Pentium III processor, model 07, and Pentium III Xeon
-               // processor, model 07
-      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
-               // model 08, and Celeron processor, model 08
-      case 10: // Pentium III Xeon processor, model 0Ah
-      case 11: // Pentium III processor, model 0Bh
-        return "pentium3";
-
-      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
-      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
-               // 0Dh. All processors are manufactured using the 90 nm process.
-        return "pentium-m";
-
-      case 14: // Intel CoreTM Duo processor, Intel CoreTM Solo processor, model
-               // 0Eh. All processors are manufactured using the 65 nm process.
-        return "yonah";
-
-      case 15: // Intel CoreTM2 Duo processor, Intel CoreTM2 Duo mobile
-               // processor, Intel CoreTM2 Quad processor, Intel CoreTM2 Quad
-               // mobile processor, Intel CoreTM2 Extreme processor, Intel
-               // Pentium Dual-Core processor, Intel Xeon processor, model
-               // 0Fh. All processors are manufactured using the 65 nm process.
-      case 22: // Intel Celeron processor model 16h. All processors are
-               // manufactured using the 65 nm process
-        return "core2";
-
-      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-        return "i686"; // FIXME: ???
-
-      case 23: // Intel CoreTM2 Extreme processor, Intel Xeon processor, model
-               // 17h. All processors are manufactured using the 45 nm process.
-               //
-               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
-        return "penryn";
-
-      case 26: // Intel Core i7 processor and Intel Xeon processor. All
-               // processors are manufactured using the 45 nm process.
-      case 29: // Intel Xeon processor MP. All processors are manufactured using
-               // the 45 nm process.
-        return "corei7";
-
-      case 28: // Intel Atom processor. All processors are manufactured using
-               // the 45 nm process
-        return "atom";
-
-      default: return "i686";
-      }
-    case 15: {
-      switch (Model) {
-      case  0: // Pentium 4 processor, Intel Xeon processor. All processors are
-               // model 00h and manufactured using the 0.18 micron process.
-      case  1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
-               // processor MP, and Intel Celeron processor. All processors are
-               // model 01h and manufactured using the 0.18 micron process.
-      case  2: // Pentium 4 processor, Mobile Intel Pentium 4 processor – M,
-               // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
-               // processor, and Mobile Intel Celeron processor. All processors
-               // are model 02h and manufactured using the 0.13 micron process.
-        return (Em64T) ? "x86-64" : "pentium4";
-
-      case  3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
-               // processor. All processors are model 03h and manufactured using
-               // the 90 nm process.
-      case  4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
-               // Pentium D processor, Intel Xeon processor, Intel Xeon
-               // processor MP, Intel Celeron D processor. All processors are
-               // model 04h and manufactured using the 90 nm process.
-      case  6: // Pentium 4 processor, Pentium D processor, Pentium processor
-               // Extreme Edition, Intel Xeon processor, Intel Xeon processor
-               // MP, Intel Celeron D processor. All processors are model 06h
-               // and manufactured using the 65 nm process.
-        return (Em64T) ? "nocona" : "prescott";
-
-      default:
-        return (Em64T) ? "x86-64" : "pentium4";
-      }
-    }
-
-    default:
-      return "generic";
-    }
-  } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
-    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
-    // appears to be no way to generate the wide variety of AMD-specific targets
-    // from the information returned from CPUID.
-    switch (Family) {
-      case 4:
-        return "i486";
-      case 5:
-        switch (Model) {
-        case 6:
-        case 7:  return "k6";
-        case 8:  return "k6-2";
-        case 9:
-        case 13: return "k6-3";
-        default: return "pentium";
-        }
-      case 6:
-        switch (Model) {
-        case 4:  return "athlon-tbird";
-        case 6:
-        case 7:
-        case 8:  return "athlon-mp";
-        case 10: return "athlon-xp";
-        default: return "athlon";
-        }
-      case 15:
-        if (HasSSE3) {
-          return "k8-sse3";
-        } else {
-          switch (Model) {
-          case 1:  return "opteron";
-          case 5:  return "athlon-fx"; // also opteron
-          default: return "athlon64";
-          }
-        }
-      case 16:
-        return "amdfam10";
-    default:
-      return "generic";
-    }
-  }
-  return "generic";
-}
-#else
-std::string sys::getHostCPUName() {
-  return "generic";
-}
-#endif
-
-bool sys::getHostCPUFeatures(StringMap<bool> &Features){
-  return false;
-}
diff --git a/lib/System/IncludeFile.cpp b/lib/System/IncludeFile.cpp
deleted file mode 100644
index 8258d40326f9..000000000000
--- a/lib/System/IncludeFile.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the IncludeFile constructor.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/IncludeFile.h"
-
-using namespace llvm;
-
-// This constructor is used to ensure linking of other modules. See the
-// llvm/System/IncludeFile.h header for details. 
-IncludeFile::IncludeFile(const void*) {}
diff --git a/lib/System/Makefile b/lib/System/Makefile
deleted file mode 100644
index bb013b9f1f16..000000000000
--- a/lib/System/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-##===- lib/System/Makefile ---------------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../..
-LIBRARYNAME = LLVMSystem
-BUILD_ARCHIVE = 1
-REQUIRES_RTTI = 1
-include $(LEVEL)/Makefile.config
-
-ifeq ($(HOST_OS),MingW)
-  REQUIRES_EH := 1
-endif
-
-EXTRA_DIST = Unix Win32 README.txt
-
-include $(LEVEL)/Makefile.common
-
-CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
-CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
diff --git a/lib/System/Memory.cpp b/lib/System/Memory.cpp
deleted file mode 100644
index ef23b8d12aab..000000000000
--- a/lib/System/Memory.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines some helpful functions for allocating memory and dealing
-// with memory mapped files
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Memory.h"
-#include "llvm/System/Valgrind.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-using namespace sys;
-}
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Memory.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Memory.inc"
-#endif
-
-extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
-
-/// InvalidateInstructionCache - Before the JIT can run a block of code
-/// that has been emitted it must invalidate the instruction cache on some
-/// platforms.
-void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
-                                                   size_t Len) {
-  
-// icache invalidation for PPC and ARM.
-#if defined(__APPLE__)
-
-#  if (defined(__POWERPC__) || defined (__ppc__) || \
-     defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
-  sys_icache_invalidate(Addr, Len);
-#  endif
-
-#else
-
-#  if (defined(__POWERPC__) || defined (__ppc__) || \
-       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
-  const size_t LineSize = 32;
-
-  const intptr_t Mask = ~(LineSize - 1);
-  const intptr_t StartLine = ((intptr_t) Addr) & Mask;
-  const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
-
-  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
-    asm volatile("dcbf 0, %0" : : "r"(Line));
-  asm volatile("sync");
-
-  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
-    asm volatile("icbi 0, %0" : : "r"(Line));
-  asm volatile("isync");
-#  elif defined(__arm__) && defined(__GNUC__)
-  // FIXME: Can we safely always call this for __GNUC__ everywhere?
-  char *Start = (char*) Addr;
-  char *End = Start + Len;
-  __clear_cache(Start, End);
-#  endif
-
-#endif  // end apple
-
-  ValgrindDiscardTranslations(Addr, Len);
-}
diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp
deleted file mode 100644
index 8ccd6e52c4d5..000000000000
--- a/lib/System/Mutex.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the llvm::sys::Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/System/Mutex.h"
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
-// Define all methods as no-ops if threading is explicitly disabled
-namespace llvm {
-using namespace sys;
-MutexImpl::MutexImpl( bool recursive) { }
-MutexImpl::~MutexImpl() { }
-bool MutexImpl::acquire() { return true; }
-bool MutexImpl::release() { return true; }
-bool MutexImpl::tryacquire() { return true; }
-}
-#else
-
-#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
-
-#include <cassert>
-#include <pthread.h>
-#include <stdlib.h>
-
-namespace llvm {
-using namespace sys;
-
-
-// This variable is useful for situations where the pthread library has been
-// compiled with weak linkage for its interface symbols. This allows the
-// threading support to be turned off by simply not linking against -lpthread.
-// In that situation, the value of pthread_mutex_init will be 0 and
-// consequently pthread_enabled will be false. In such situations, all the
-// pthread operations become no-ops and the functions all return false. If
-// pthread_mutex_init does have an address, then mutex support is enabled.
-// Note: all LLVM tools will link against -lpthread if its available since it
-//       is configured into the LIBS variable.
-// Note: this line of code generates a warning if pthread_mutex_init is not
-//       declared with weak linkage. It's safe to ignore the warning.
-static const bool pthread_enabled = true;
-
-// Construct a Mutex using pthread calls
-MutexImpl::MutexImpl( bool recursive)
-  : data_(0)
-{
-  if (pthread_enabled)
-  {
-    // Declare the pthread_mutex data structures
-    pthread_mutex_t* mutex =
-      static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
-    pthread_mutexattr_t attr;
-
-    // Initialize the mutex attributes
-    int errorcode = pthread_mutexattr_init(&attr);
-    assert(errorcode == 0);
-
-    // Initialize the mutex as a recursive mutex, if requested, or normal
-    // otherwise.
-    int kind = ( recursive  ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
-    errorcode = pthread_mutexattr_settype(&attr, kind);
-    assert(errorcode == 0);
-
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
-    // Make it a process local mutex
-    errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
-#endif
-
-    // Initialize the mutex
-    errorcode = pthread_mutex_init(mutex, &attr);
-    assert(errorcode == 0);
-
-    // Destroy the attributes
-    errorcode = pthread_mutexattr_destroy(&attr);
-    assert(errorcode == 0);
-
-    // Assign the data member
-    data_ = mutex;
-  }
-}
-
-// Destruct a Mutex
-MutexImpl::~MutexImpl()
-{
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-    pthread_mutex_destroy(mutex);
-    free(mutex);
-  }
-}
-
-bool
-MutexImpl::acquire()
-{
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-
-    int errorcode = pthread_mutex_lock(mutex);
-    return errorcode == 0;
-  } else return false;
-}
-
-bool
-MutexImpl::release()
-{
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-
-    int errorcode = pthread_mutex_unlock(mutex);
-    return errorcode == 0;
-  } else return false;
-}
-
-bool
-MutexImpl::tryacquire()
-{
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-
-    int errorcode = pthread_mutex_trylock(mutex);
-    return errorcode == 0;
-  } else return false;
-}
-
-}
-
-#elif defined(LLVM_ON_UNIX)
-#include "Unix/Mutex.inc"
-#elif defined( LLVM_ON_WIN32)
-#include "Win32/Mutex.inc"
-#else
-#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
-#endif
-#endif
-
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
deleted file mode 100644
index 4445c667d86e..000000000000
--- a/lib/System/Path.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file implements the operating system Path concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Path.h"
-#include "llvm/Config/config.h"
-#include <cassert>
-#include <cstring>
-#include <ostream>
-using namespace llvm;
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-bool Path::operator==(const Path &that) const {
-  return path == that.path;
-}
-
-bool Path::operator<(const Path& that) const {
-  return path < that.path;
-}
-
-Path
-Path::GetLLVMConfigDir() {
-  Path result;
-#ifdef LLVM_ETCDIR
-  if (result.set(LLVM_ETCDIR))
-    return result;
-#endif
-  return GetLLVMDefaultConfigDir();
-}
-
-LLVMFileType
-sys::IdentifyFileType(const char *magic, unsigned length) {
-  assert(magic && "Invalid magic number string");
-  assert(length >=4 && "Invalid magic number length");
-  switch ((unsigned char)magic[0]) {
-    case 0xDE:  // 0x0B17C0DE = BC wraper
-      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
-          magic[3] == (char)0x0B)
-        return Bitcode_FileType;
-      break;
-    case 'B':
-      if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
-        return Bitcode_FileType;
-      break;
-    case '!':
-      if (length >= 8)
-        if (memcmp(magic,"!<arch>\n",8) == 0)
-          return Archive_FileType;
-      break;
-
-    case '\177':
-      if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
-        if (length >= 18 && magic[17] == 0)
-          switch (magic[16]) {
-            default: break;
-            case 1: return ELF_Relocatable_FileType;
-            case 2: return ELF_Executable_FileType;
-            case 3: return ELF_SharedObject_FileType;
-            case 4: return ELF_Core_FileType;
-          }
-      }
-      break;
-
-    case 0xCA:
-      if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
-          magic[3] == char(0xBE)) {
-        // This is complicated by an overlap with Java class files.
-        // See the Mach-O section in /usr/share/file/magic for details.
-        if (length >= 8 && magic[7] < 43)
-          // FIXME: Universal Binary of any type.
-          return Mach_O_DynamicallyLinkedSharedLib_FileType;
-      }
-      break;
-
-    case 0xFE:
-    case 0xCE: {
-      uint16_t type = 0;
-      if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
-          magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
-        /* Native endian */
-        if (length >= 16) type = magic[14] << 8 | magic[15];
-      } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
-                 magic[2] == char(0xED) && magic[3] == char(0xFE)) {
-        /* Reverse endian */
-        if (length >= 14) type = magic[13] << 8 | magic[12];
-      }
-      switch (type) {
-        default: break;
-        case 1: return Mach_O_Object_FileType;
-        case 2: return Mach_O_Executable_FileType;
-        case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
-        case 4: return Mach_O_Core_FileType;
-        case 5: return Mach_O_PreloadExectuable_FileType;
-        case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
-        case 7: return Mach_O_DynamicLinker_FileType;
-        case 8: return Mach_O_Bundle_FileType;
-        case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
-        case 10: break; // FIXME: MH_DSYM companion file with only debug.
-      }
-      break;
-    }
-    case 0xF0: // PowerPC Windows
-    case 0x83: // Alpha 32-bit
-    case 0x84: // Alpha 64-bit
-    case 0x66: // MPS R4000 Windows
-    case 0x50: // mc68K
-    case 0x4c: // 80386 Windows
-      if (magic[1] == 0x01)
-        return COFF_FileType;
-
-    case 0x90: // PA-RISC Windows
-    case 0x68: // mc68K Windows
-      if (magic[1] == 0x02)
-        return COFF_FileType;
-      break;
-
-    default:
-      break;
-  }
-  return Unknown_FileType;
-}
-
-bool
-Path::isArchive() const {
-  return hasMagicNumber("!<arch>\012");
-}
-
-bool
-Path::isDynamicLibrary() const {
-  std::string Magic;
-  if (getMagicNumber(Magic, 64))
-    switch (IdentifyFileType(Magic.c_str(),
-                             static_cast<unsigned>(Magic.length()))) {
-      default: return false;
-      case Mach_O_FixedVirtualMemorySharedLib_FileType:
-      case Mach_O_DynamicallyLinkedSharedLib_FileType:
-      case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-      case ELF_SharedObject_FileType:
-      case COFF_FileType:  return true;
-    }
-
-  return false;
-}
-
-Path
-Path::FindLibrary(std::string& name) {
-  std::vector<sys::Path> LibPaths;
-  GetSystemLibraryPaths(LibPaths);
-  for (unsigned i = 0; i < LibPaths.size(); ++i) {
-    sys::Path FullPath(LibPaths[i]);
-    FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
-    if (FullPath.isDynamicLibrary())
-      return FullPath;
-    FullPath.eraseSuffix();
-    FullPath.appendSuffix("a");
-    if (FullPath.isArchive())
-      return FullPath;
-  }
-  return sys::Path();
-}
-
-StringRef Path::GetDLLSuffix() {
-  return LTDL_SHLIB_EXT;
-}
-
-bool
-Path::isBitcodeFile() const {
-  std::string actualMagic;
-  if (!getMagicNumber(actualMagic, 4))
-    return false;
-  LLVMFileType FT =
-    IdentifyFileType(actualMagic.c_str(),
-                     static_cast<unsigned>(actualMagic.length()));
-  return FT == Bitcode_FileType;
-}
-
-bool Path::hasMagicNumber(StringRef Magic) const {
-  std::string actualMagic;
-  if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
-    return Magic == actualMagic;
-  return false;
-}
-
-static void getPathList(const char*path, std::vector<Path>& Paths) {
-  const char* at = path;
-  const char* delim = strchr(at, PathSeparator);
-  Path tmpPath;
-  while (delim != 0) {
-    std::string tmp(at, size_t(delim-at));
-    if (tmpPath.set(tmp))
-      if (tmpPath.canRead())
-        Paths.push_back(tmpPath);
-    at = delim + 1;
-    delim = strchr(at, PathSeparator);
-  }
-
-  if (*at != 0)
-    if (tmpPath.set(std::string(at)))
-      if (tmpPath.canRead())
-        Paths.push_back(tmpPath);
-}
-
-static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
-  assert(Sep[0] != '\0' && Sep[1] == '\0' &&
-         "Sep must be a 1-character string literal.");
-  if (path.empty())
-    return ".";
-
-  // If the path is all slashes, return a single slash.
-  // Otherwise, remove all trailing slashes.
-
-  signed pos = static_cast<signed>(path.size()) - 1;
-
-  while (pos >= 0 && path[pos] == Sep[0])
-    --pos;
-
-  if (pos < 0)
-    return path[0] == Sep[0] ? Sep : ".";
-
-  // Any slashes left?
-  signed i = 0;
-
-  while (i < pos && path[i] != Sep[0])
-    ++i;
-
-  if (i == pos) // No slashes?  Return "."
-    return ".";
-
-  // There is at least one slash left.  Remove all trailing non-slashes.
-  while (pos >= 0 && path[pos] != Sep[0])
-    --pos;
-
-  // Remove any trailing slashes.
-  while (pos >= 0 && path[pos] == Sep[0])
-    --pos;
-
-  if (pos < 0)
-    return path[0] == Sep[0] ? Sep : ".";
-
-  return path.substr(0, pos+1);
-}
-
-// Include the truly platform-specific parts of this class.
-#if defined(LLVM_ON_UNIX)
-#include "Unix/Path.inc"
-#endif
-#if defined(LLVM_ON_WIN32)
-#include "Win32/Path.inc"
-#endif
-
diff --git a/lib/System/Process.cpp b/lib/System/Process.cpp
deleted file mode 100644
index e93b2af4c12b..000000000000
--- a/lib/System/Process.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file implements the operating system Process concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Process.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-}
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Process.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Process.inc"
-#endif
diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp
deleted file mode 100644
index cd58c2cc578c..000000000000
--- a/lib/System/Program.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file implements the operating system Program concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Program.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-int
-Program::ExecuteAndWait(const Path& path,
-                        const char** args,
-                        const char** envp,
-                        const Path** redirects,
-                        unsigned secondsToWait,
-                        unsigned memoryLimit,
-                        std::string* ErrMsg) {
-  Program prg;
-  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
-    return prg.Wait(secondsToWait, ErrMsg);
-  else
-    return -1;
-}
-
-void
-Program::ExecuteNoWait(const Path& path,
-                       const char** args,
-                       const char** envp,
-                       const Path** redirects,
-                       unsigned memoryLimit,
-                       std::string* ErrMsg) {
-  Program prg;
-  prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
-}
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Program.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Program.inc"
-#endif
diff --git a/lib/System/README.txt b/lib/System/README.txt
deleted file mode 100644
index eacb20094a61..000000000000
--- a/lib/System/README.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-Design Of lib/System
-====================
-
-The software in this directory is designed to completely shield LLVM from any
-and all operating system specific functionality. It is not intended to be a
-complete operating system wrapper (such as ACE), but only to provide the
-functionality necessary to support LLVM.
-
-The software located here, of necessity, has very specific and stringent design
-rules. Violation of these rules means that cracks in the shield could form and
-the primary goal of the library is defeated. By consistently using this library,
-LLVM becomes more easily ported to new platforms since the only thing requiring 
-porting is this library.
-
-Complete documentation for the library can be found in the file:
-  llvm/docs/SystemLibrary.html 
-or at this URL:
-  http://llvm.org/docs/SystemLibrary.html
-
-While we recommend that you read the more detailed documentation, for the 
-impatient, here's a high level summary of the library's requirements.
-
- 1. No system header files are to be exposed through the interface.
- 2. Std C++ and Std C header files are okay to be exposed through the interface.
- 3. No exposed system-specific functions.
- 4. No exposed system-specific data.
- 5. Data in lib/System classes must use only simple C++ intrinsic types.
- 6. Errors are handled by returning "true" and setting an optional std::string
- 7. Library must not throw any exceptions, period.
- 8. Interface functions must not have throw() specifications.
- 9. No duplicate function impementations are permitted within an operating
-    system class.
-
-To accomplish these requirements, the library has numerous design criteria that 
-must be satisfied. Here's a high level summary of the library's design criteria:
-
- 1. No unused functionality (only what LLVM needs)
- 2. High-Level Interfaces
- 3. Use Opaque Classes
- 4. Common Implementations</a></li>
- 5. Multiple Implementations</a></li>
- 6. Minimize Memory Allocation</a></li>
- 7. No Virtual Methods
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
deleted file mode 100644
index deb04709d829..000000000000
--- a/lib/System/RWMutex.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the llvm::sys::RWMutex class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/System/RWMutex.h"
-#include <cstring>
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
-// Define all methods as no-ops if threading is explicitly disabled
-namespace llvm {
-using namespace sys;
-RWMutexImpl::RWMutexImpl() { }
-RWMutexImpl::~RWMutexImpl() { }
-bool RWMutexImpl::reader_acquire() { return true; }
-bool RWMutexImpl::reader_release() { return true; }
-bool RWMutexImpl::writer_acquire() { return true; }
-bool RWMutexImpl::writer_release() { return true; }
-}
-#else
-
-#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
-
-#include <cassert>
-#include <pthread.h>
-#include <stdlib.h>
-
-namespace llvm {
-using namespace sys;
-
-
-// This variable is useful for situations where the pthread library has been
-// compiled with weak linkage for its interface symbols. This allows the
-// threading support to be turned off by simply not linking against -lpthread.
-// In that situation, the value of pthread_mutex_init will be 0 and
-// consequently pthread_enabled will be false. In such situations, all the
-// pthread operations become no-ops and the functions all return false. If
-// pthread_rwlock_init does have an address, then rwlock support is enabled.
-// Note: all LLVM tools will link against -lpthread if its available since it
-//       is configured into the LIBS variable.
-// Note: this line of code generates a warning if pthread_rwlock_init is not
-//       declared with weak linkage. It's safe to ignore the warning.
-static const bool pthread_enabled = true;
-
-// Construct a RWMutex using pthread calls
-RWMutexImpl::RWMutexImpl()
-  : data_(0)
-{
-  if (pthread_enabled)
-  {
-    // Declare the pthread_rwlock data structures
-    pthread_rwlock_t* rwlock =
-      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
-
-#ifdef __APPLE__
-    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
-    bzero(rwlock, sizeof(pthread_rwlock_t));
-#endif
-
-    // Initialize the rwlock
-    int errorcode = pthread_rwlock_init(rwlock, NULL);
-    (void)errorcode;
-    assert(errorcode == 0);
-
-    // Assign the data member
-    data_ = rwlock;
-  }
-}
-
-// Destruct a RWMutex
-RWMutexImpl::~RWMutexImpl()
-{
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-    pthread_rwlock_destroy(rwlock);
-    free(rwlock);
-  }
-}
-
-bool
-RWMutexImpl::reader_acquire()
-{
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_rdlock(rwlock);
-    return errorcode == 0;
-  } else return false;
-}
-
-bool
-RWMutexImpl::reader_release()
-{
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_unlock(rwlock);
-    return errorcode == 0;
-  } else return false;
-}
-
-bool
-RWMutexImpl::writer_acquire()
-{
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_wrlock(rwlock);
-    return errorcode == 0;
-  } else return false;
-}
-
-bool
-RWMutexImpl::writer_release()
-{
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_unlock(rwlock);
-    return errorcode == 0;
-  } else return false;
-}
-
-}
-
-#elif defined(LLVM_ON_UNIX)
-#include "Unix/RWMutex.inc"
-#elif defined( LLVM_ON_WIN32)
-#include "Win32/RWMutex.inc"
-#else
-#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
-#endif
-#endif
diff --git a/lib/System/SearchForAddressOfSpecialSymbol.cpp b/lib/System/SearchForAddressOfSpecialSymbol.cpp
deleted file mode 100644
index 73b484c2e917..000000000000
--- a/lib/System/SearchForAddressOfSpecialSymbol.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file pulls the addresses of certain symbols out of the linker.  It must
-//  include as few header files as possible because it declares the symbols as
-//  void*, which would conflict with the actual symbol type if any header
-//  declared it.
-//
-//===----------------------------------------------------------------------===//
-
-#include <string.h>
-
-// Must declare the symbols in the global namespace.
-static void *DoSearch(const char* symbolName) {
-#define EXPLICIT_SYMBOL(SYM) \
-   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
-
-  // If this is darwin, it has some funky issues, try to solve them here.  Some
-  // important symbols are marked 'private external' which doesn't allow
-  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
-  // there is only a small handful of them.
-
-#ifdef __APPLE__
-  {
-    EXPLICIT_SYMBOL(__ashldi3);
-    EXPLICIT_SYMBOL(__ashrdi3);
-    EXPLICIT_SYMBOL(__cmpdi2);
-    EXPLICIT_SYMBOL(__divdi3);
-    EXPLICIT_SYMBOL(__eprintf);
-    EXPLICIT_SYMBOL(__fixdfdi);
-    EXPLICIT_SYMBOL(__fixsfdi);
-    EXPLICIT_SYMBOL(__fixunsdfdi);
-    EXPLICIT_SYMBOL(__fixunssfdi);
-    EXPLICIT_SYMBOL(__floatdidf);
-    EXPLICIT_SYMBOL(__floatdisf);
-    EXPLICIT_SYMBOL(__lshrdi3);
-    EXPLICIT_SYMBOL(__moddi3);
-    EXPLICIT_SYMBOL(__udivdi3);
-    EXPLICIT_SYMBOL(__umoddi3);
-  }
-#endif
-
-#ifdef __CYGWIN__
-  {
-    EXPLICIT_SYMBOL(_alloca);
-    EXPLICIT_SYMBOL(__main);
-  }
-#endif
-
-#undef EXPLICIT_SYMBOL
-  return 0;
-}
-
-namespace llvm {
-void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
-  return DoSearch(symbolName);
-}
-}  // namespace llvm
diff --git a/lib/System/Signals.cpp b/lib/System/Signals.cpp
deleted file mode 100644
index d345b0a9aed4..000000000000
--- a/lib/System/Signals.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines some helpful functions for dealing with the possibility of
-// Unix signals occuring while your program is running.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Signals.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-}
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Signals.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Signals.inc"
-#endif
diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp
deleted file mode 100644
index f6a55a1c0b9b..000000000000
--- a/lib/System/ThreadLocal.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the llvm::sys::ThreadLocal class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/System/ThreadLocal.h"
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
-// Define all methods as no-ops if threading is explicitly disabled
-namespace llvm {
-using namespace sys;
-ThreadLocalImpl::ThreadLocalImpl() { }
-ThreadLocalImpl::~ThreadLocalImpl() { }
-void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
-const void* ThreadLocalImpl::getInstance() { return data; }
-void ThreadLocalImpl::removeInstance() { data = 0; }
-}
-#else
-
-#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
-
-#include <cassert>
-#include <pthread.h>
-#include <stdlib.h>
-
-namespace llvm {
-using namespace sys;
-
-ThreadLocalImpl::ThreadLocalImpl() : data(0) {
-  pthread_key_t* key = new pthread_key_t;
-  int errorcode = pthread_key_create(key, NULL);
-  assert(errorcode == 0);
-  (void) errorcode;
-  data = (void*)key;
-}
-
-ThreadLocalImpl::~ThreadLocalImpl() {
-  pthread_key_t* key = static_cast<pthread_key_t*>(data);
-  int errorcode = pthread_key_delete(*key);
-  assert(errorcode == 0);
-  (void) errorcode;
-  delete key;
-}
-
-void ThreadLocalImpl::setInstance(const void* d) {
-  pthread_key_t* key = static_cast<pthread_key_t*>(data);
-  int errorcode = pthread_setspecific(*key, d);
-  assert(errorcode == 0);
-  (void) errorcode;
-}
-
-const void* ThreadLocalImpl::getInstance() {
-  pthread_key_t* key = static_cast<pthread_key_t*>(data);
-  return pthread_getspecific(*key);
-}
-
-void ThreadLocalImpl::removeInstance() {
-  setInstance(0);
-}
-
-}
-
-#elif defined(LLVM_ON_UNIX)
-#include "Unix/ThreadLocal.inc"
-#elif defined( LLVM_ON_WIN32)
-#include "Win32/ThreadLocal.inc"
-#else
-#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
-#endif
-#endif
-
diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp
deleted file mode 100644
index 466c46802647..000000000000
--- a/lib/System/Threading.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//===-- llvm/System/Threading.cpp- Control multithreading mode --*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements llvm_start_multithreaded() and friends.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Threading.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/Config/config.h"
-#include <cassert>
-
-using namespace llvm;
-
-static bool multithreaded_mode = false;
-
-static sys::Mutex* global_lock = 0;
-
-bool llvm::llvm_start_multithreaded() {
-#ifdef LLVM_MULTITHREADED
-  assert(!multithreaded_mode && "Already multithreaded!");
-  multithreaded_mode = true;
-  global_lock = new sys::Mutex(true);
-  
-  // We fence here to ensure that all initialization is complete BEFORE we
-  // return from llvm_start_multithreaded().
-  sys::MemoryFence();
-  return true;
-#else
-  return false;
-#endif
-}
-
-void llvm::llvm_stop_multithreaded() {
-#ifdef LLVM_MULTITHREADED
-  assert(multithreaded_mode && "Not currently multithreaded!");
-  
-  // We fence here to insure that all threaded operations are complete BEFORE we
-  // return from llvm_stop_multithreaded().
-  sys::MemoryFence();
-  
-  multithreaded_mode = false;
-  delete global_lock;
-#endif
-}
-
-bool llvm::llvm_is_multithreaded() {
-  return multithreaded_mode;
-}
-
-void llvm::llvm_acquire_global_lock() {
-  if (multithreaded_mode) global_lock->acquire();
-}
-
-void llvm::llvm_release_global_lock() {
-  if (multithreaded_mode) global_lock->release();
-}
diff --git a/lib/System/TimeValue.cpp b/lib/System/TimeValue.cpp
deleted file mode 100644
index cf4984cc4d1b..000000000000
--- a/lib/System/TimeValue.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file implements the operating system TimeValue concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/TimeValue.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-using namespace sys;
-
-const TimeValue TimeValue::MinTime       = TimeValue ( INT64_MIN,0 );
-const TimeValue TimeValue::MaxTime       = TimeValue ( INT64_MAX,0 );
-const TimeValue TimeValue::ZeroTime      = TimeValue ( 0,0 );
-const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
-const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
-
-void
-TimeValue::normalize( void ) {
-  if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
-    do {
-      seconds_++;
-      nanos_ -= NANOSECONDS_PER_SECOND;
-    } while ( nanos_ >= NANOSECONDS_PER_SECOND );
-  } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
-    do {
-      seconds_--;
-      nanos_ += NANOSECONDS_PER_SECOND;
-    } while (nanos_ <= -NANOSECONDS_PER_SECOND);
-  }
-
-  if (seconds_ >= 1 && nanos_ < 0) {
-    seconds_--;
-    nanos_ += NANOSECONDS_PER_SECOND;
-  } else if (seconds_ < 0 && nanos_ > 0) {
-    seconds_++;
-    nanos_ -= NANOSECONDS_PER_SECOND;
-  }
-}
-
-}
-
-/// Include the platform specific portion of TimeValue class
-#ifdef LLVM_ON_UNIX
-#include "Unix/TimeValue.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/TimeValue.inc"
-#endif
-
diff --git a/lib/System/Unix/Alarm.inc b/lib/System/Unix/Alarm.inc
deleted file mode 100644
index fb42b6c65da1..000000000000
--- a/lib/System/Unix/Alarm.inc
+++ /dev/null
@@ -1,72 +0,0 @@
-//===-- Alarm.inc - Implement Unix Alarm Support ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the UNIX Alarm support.
-//
-//===----------------------------------------------------------------------===//
-
-#include <signal.h>
-#include <unistd.h>
-#include <cassert>
-using namespace llvm;
-
-/// AlarmCancelled - This flag is set by the SIGINT signal handler if the
-/// user presses CTRL-C.
-static volatile bool AlarmCancelled = false;
-
-/// AlarmTriggered - This flag is set by the SIGALRM signal handler if the
-/// alarm was triggered.
-static volatile bool AlarmTriggered = false;
-
-/// NestedSOI - Sanity check.  Alarms cannot be nested or run in parallel.
-/// This ensures that they never do.
-static bool NestedSOI = false;
-
-static RETSIGTYPE SigIntHandler(int Sig) {
-  AlarmCancelled = true;
-  signal(SIGINT, SigIntHandler);
-}
-
-static RETSIGTYPE SigAlarmHandler(int Sig) {
-  AlarmTriggered = true;
-}
-
-static void (*OldSigIntHandler) (int);
-
-void sys::SetupAlarm(unsigned seconds) {
-  assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!");
-  NestedSOI = true;
-  AlarmCancelled = false;
-  AlarmTriggered = false;
-  ::signal(SIGALRM, SigAlarmHandler);
-  OldSigIntHandler = ::signal(SIGINT, SigIntHandler);
-  ::alarm(seconds);
-}
-
-void sys::TerminateAlarm() {
-  assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!");
-  ::alarm(0);
-  ::signal(SIGALRM, SIG_DFL);
-  ::signal(SIGINT, OldSigIntHandler);
-  AlarmCancelled = false;
-  AlarmTriggered = false;
-  NestedSOI = false;
-}
-
-int sys::AlarmStatus() {
-  if (AlarmCancelled)
-    return -1;
-  if (AlarmTriggered)
-    return 1;
-  return 0;
-}
-
-void sys::Sleep(unsigned n) {
-  ::sleep(n);
-}
diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc
deleted file mode 100644
index c76d6a4e18f1..000000000000
--- a/lib/System/Unix/Host.inc
+++ /dev/null
@@ -1,96 +0,0 @@
- //===- llvm/System/Unix/Host.inc -------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the UNIX Host support.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/ADT/StringRef.h"
-#include "Unix.h"
-#include <sys/utsname.h>
-#include <string>
-
-using namespace llvm;
-
-static std::string getOSVersion() {
-  struct utsname info;
-
-  if (uname(&info))
-    return "";
-
-  return info.release;
-}
-
-std::string sys::getHostTriple() {
-  // FIXME: Derive directly instead of relying on the autoconf generated
-  // variable.
-
-  StringRef HostTripleString(LLVM_HOSTTRIPLE);
-  std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
-  
-  // Normalize the arch, since the host triple may not actually match the host.
-  std::string Arch = ArchSplit.first;
-
-  // It would be nice to do this in terms of llvm::Triple, but that is in
-  // Support which is layered above us.
-#if defined(__x86_64__)
-  Arch = "x86_64";
-#elif defined(__i386__)
-  Arch = "i386";
-#elif defined(__ppc64__)
-  Arch = "powerpc64";
-#elif defined(__ppc__)
-  Arch = "powerpc";
-#elif defined(__arm__)
-
-  // FIXME: We need to pick the right ARM triple (which involves querying the
-  // chip). However, for now this is most important for LLVM arch selection, so
-  // we only need to make sure to distinguish ARM and Thumb.
-#  if defined(__thumb__)
-  Arch = "thumb";
-#  else
-  Arch = "arm";
-#  endif
-
-#else
-
-  // FIXME: When enough auto-detection is in place, this should just
-  // #error. Then at least the arch selection is done, and we only need the OS
-  // etc selection to kill off the use of LLVM_HOSTTRIPLE.
-
-#endif
-
-  std::string Triple(Arch);
-  Triple += '-';
-  Triple += ArchSplit.second;
-
-  // Force i<N>86 to i386.
-  if (Triple[0] == 'i' && isdigit(Triple[1]) && 
-      Triple[2] == '8' && Triple[3] == '6')
-    Triple[1] = '3';
-
-  // On darwin, we want to update the version to match that of the
-  // host.    
-  std::string::size_type DarwinDashIdx = Triple.find("-darwin");
-  if (DarwinDashIdx != std::string::npos) {
-    Triple.resize(DarwinDashIdx + strlen("-darwin"));
-    
-    // Only add the major part of the os version.
-    std::string Version = getOSVersion();
-    Triple += Version.substr(0, Version.find('.'));
-  }
-
-  return Triple;
-}
diff --git a/lib/System/Unix/Memory.inc b/lib/System/Unix/Memory.inc
deleted file mode 100644
index 1b038f9c6e0b..000000000000
--- a/lib/System/Unix/Memory.inc
+++ /dev/null
@@ -1,151 +0,0 @@
-//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines some functions for various memory management utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Unix.h"
-#include "llvm/System/DataTypes.h"
-#include "llvm/System/Process.h"
-
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-
-#ifdef __APPLE__
-#include <mach/mach.h>
-#endif
-
-/// AllocateRWX - Allocate a slab of memory with read/write/execute
-/// permissions.  This is typically used for JIT applications where we want
-/// to emit code to the memory then jump to it.  Getting this type of memory
-/// is very OS specific.
-///
-llvm::sys::MemoryBlock 
-llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
-                               std::string *ErrMsg) {
-  if (NumBytes == 0) return MemoryBlock();
-
-  size_t pageSize = Process::GetPageSize();
-  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
-
-  int fd = -1;
-#ifdef NEED_DEV_ZERO_FOR_MMAP
-  static int zero_fd = open("/dev/zero", O_RDWR);
-  if (zero_fd == -1) {
-    MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
-    return MemoryBlock();
-  }
-  fd = zero_fd;
-#endif
-
-  int flags = MAP_PRIVATE |
-#ifdef HAVE_MMAP_ANONYMOUS
-  MAP_ANONYMOUS
-#else
-  MAP_ANON
-#endif
-  ;
-
-  void* start = NearBlock ? (unsigned char*)NearBlock->base() + 
-                            NearBlock->size() : 0;
-
-#if defined(__APPLE__) && defined(__arm__)
-  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
-                    flags, fd, 0);
-#else
-  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
-                    flags, fd, 0);
-#endif
-  if (pa == MAP_FAILED) {
-    if (NearBlock) //Try again without a near hint
-      return AllocateRWX(NumBytes, 0);
-
-    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
-    return MemoryBlock();
-  }
-
-#if defined(__APPLE__) && defined(__arm__)
-  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
-                                (vm_size_t)(pageSize*NumPages), 0,
-                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
-  if (KERN_SUCCESS != kr) {
-    MakeErrMsg(ErrMsg, "vm_protect max RX failed");
-    return sys::MemoryBlock();
-  }
-
-  kr = vm_protect(mach_task_self(), (vm_address_t)pa,
-                  (vm_size_t)(pageSize*NumPages), 0,
-                  VM_PROT_READ | VM_PROT_WRITE);
-  if (KERN_SUCCESS != kr) {
-    MakeErrMsg(ErrMsg, "vm_protect RW failed");
-    return sys::MemoryBlock();
-  }
-#endif
-
-  MemoryBlock result;
-  result.Address = pa;
-  result.Size = NumPages*pageSize;
-
-  return result;
-}
-
-bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
-  if (M.Address == 0 || M.Size == 0) return false;
-  if (0 != ::munmap(M.Address, M.Size))
-    return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
-  return false;
-}
-
-bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
-#if defined(__APPLE__) && defined(__arm__)
-  if (M.Address == 0 || M.Size == 0) return false;
-  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
-  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
-    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
-  return KERN_SUCCESS == kr;
-#else
-  return true;
-#endif
-}
-
-bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
-#if defined(__APPLE__) && defined(__arm__)
-  if (M.Address == 0 || M.Size == 0) return false;
-  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
-  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
-    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
-  return KERN_SUCCESS == kr;
-#else
-  return false;
-#endif
-}
-
-bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
-#if defined(__APPLE__) && defined(__arm__)
-  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
-                                (vm_size_t)Size, 0,
-                                VM_PROT_READ | VM_PROT_WRITE);
-  return KERN_SUCCESS == kr;
-#else
-  return true;
-#endif
-}
-
-bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
-#if defined(__APPLE__) && defined(__arm__)
-  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
-                                (vm_size_t)Size, 0,
-                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
-  return KERN_SUCCESS == kr;
-#else
-  return true;
-#endif
-}
diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc
deleted file mode 100644
index 4a5e28de27b0..000000000000
--- a/lib/System/Unix/Mutex.inc
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- llvm/System/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific (non-pthread) Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-namespace llvm
-{
-using namespace sys;
-
-MutexImpl::MutexImpl( bool recursive)
-{
-}
-
-MutexImpl::~MutexImpl()
-{
-}
-
-bool 
-MutexImpl::release()
-{
-  return true;
-}
-
-bool 
-MutexImpl::tryacquire( void )
-{
-  return true;
-}
-
-}
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
deleted file mode 100644
index 47e4d1ac3c6b..000000000000
--- a/lib/System/Unix/Path.inc
+++ /dev/null
@@ -1,923 +0,0 @@
-//===- llvm/System/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific portion of the Path class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include "Unix.h"
-#if HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#if HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#if HAVE_UTIME_H
-#include <utime.h>
-#endif
-#if HAVE_TIME_H
-#include <time.h>
-#endif
-#if HAVE_DIRENT_H
-# include <dirent.h>
-# define NAMLEN(dirent) strlen((dirent)->d_name)
-#else
-# define dirent direct
-# define NAMLEN(dirent) (dirent)->d_namlen
-# if HAVE_SYS_NDIR_H
-#  include <sys/ndir.h>
-# endif
-# if HAVE_SYS_DIR_H
-#  include <sys/dir.h>
-# endif
-# if HAVE_NDIR_H
-#  include <ndir.h>
-# endif
-#endif
-
-#if HAVE_DLFCN_H
-#include <dlfcn.h>
-#endif
-
-#ifdef __APPLE__
-#include <mach-o/dyld.h>
-#endif
-
-// Put in a hack for Cygwin which falsely reports that the mkdtemp function
-// is available when it is not.
-#ifdef __CYGWIN__
-# undef HAVE_MKDTEMP
-#endif
-
-namespace {
-inline bool lastIsSlash(const std::string& path) {
-  return !path.empty() && path[path.length() - 1] == '/';
-}
-
-}
-
-namespace llvm {
-using namespace sys;
-
-const char sys::PathSeparator = ':';
-
-Path::Path(StringRef p)
-  : path(p) {}
-
-Path::Path(const char *StrStart, unsigned StrLen)
-  : path(StrStart, StrLen) {}
-
-Path&
-Path::operator=(StringRef that) {
-  path.assign(that.data(), that.size());
-  return *this;
-}
-
-bool
-Path::isValid() const {
-  // Check some obvious things
-  if (path.empty())
-    return false;
-  return path.length() < MAXPATHLEN;
-}
-
-bool
-Path::isAbsolute(const char *NameStart, unsigned NameLen) {
-  assert(NameStart);
-  if (NameLen == 0)
-    return false;
-  return NameStart[0] == '/';
-}
-
-bool
-Path::isAbsolute() const {
-  if (path.empty())
-    return false;
-  return path[0] == '/';
-}
-
-void Path::makeAbsolute() {
-  if (isAbsolute())
-    return;
-
-  Path CWD = Path::GetCurrentDirectory();
-  assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!");
-
-  CWD.appendComponent(path);
-
-  path = CWD.str();
-}
-
-Path
-Path::GetRootDirectory() {
-  Path result;
-  result.set("/");
-  return result;
-}
-
-Path
-Path::GetTemporaryDirectory(std::string *ErrMsg) {
-#if defined(HAVE_MKDTEMP)
-  // The best way is with mkdtemp but that's not available on many systems,
-  // Linux and FreeBSD have it. Others probably won't.
-  char pathname[MAXPATHLEN];
-  strcpy(pathname,"/tmp/llvm_XXXXXX");
-  if (0 == mkdtemp(pathname)) {
-    MakeErrMsg(ErrMsg,
-      std::string(pathname) + ": can't create temporary directory");
-    return Path();
-  }
-  Path result;
-  result.set(pathname);
-  assert(result.isValid() && "mkdtemp didn't create a valid pathname!");
-  return result;
-#elif defined(HAVE_MKSTEMP)
-  // If no mkdtemp is available, mkstemp can be used to create a temporary file
-  // which is then removed and created as a directory. We prefer this over
-  // mktemp because of mktemp's inherent security and threading risks. We still
-  // have a slight race condition from the time the temporary file is created to
-  // the time it is re-created as a directoy.
-  char pathname[MAXPATHLEN];
-  strcpy(pathname, "/tmp/llvm_XXXXXX");
-  int fd = 0;
-  if (-1 == (fd = mkstemp(pathname))) {
-    MakeErrMsg(ErrMsg,
-      std::string(pathname) + ": can't create temporary directory");
-    return Path();
-  }
-  ::close(fd);
-  ::unlink(pathname); // start race condition, ignore errors
-  if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
-    MakeErrMsg(ErrMsg,
-      std::string(pathname) + ": can't create temporary directory");
-    return Path();
-  }
-  Path result;
-  result.set(pathname);
-  assert(result.isValid() && "mkstemp didn't create a valid pathname!");
-  return result;
-#elif defined(HAVE_MKTEMP)
-  // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
-  // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
-  // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
-  // the XXXXXX with the pid of the process and a letter. That leads to only
-  // twenty six temporary files that can be generated.
-  char pathname[MAXPATHLEN];
-  strcpy(pathname, "/tmp/llvm_XXXXXX");
-  char *TmpName = ::mktemp(pathname);
-  if (TmpName == 0) {
-    MakeErrMsg(ErrMsg,
-      std::string(TmpName) + ": can't create unique directory name");
-    return Path();
-  }
-  if (-1 == ::mkdir(TmpName, S_IRWXU)) {
-    MakeErrMsg(ErrMsg,
-        std::string(TmpName) + ": can't create temporary directory");
-    return Path();
-  }
-  Path result;
-  result.set(TmpName);
-  assert(result.isValid() && "mktemp didn't create a valid pathname!");
-  return result;
-#else
-  // This is the worst case implementation. tempnam(3) leaks memory unless its
-  // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
-  // issues. The mktemp(3) function doesn't have enough variability in the
-  // temporary name generated. So, we provide our own implementation that
-  // increments an integer from a random number seeded by the current time. This
-  // should be sufficiently unique that we don't have many collisions between
-  // processes. Generally LLVM processes don't run very long and don't use very
-  // many temporary files so this shouldn't be a big issue for LLVM.
-  static time_t num = ::time(0);
-  char pathname[MAXPATHLEN];
-  do {
-    num++;
-    sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
-  } while ( 0 == access(pathname, F_OK ) );
-  if (-1 == ::mkdir(pathname, S_IRWXU)) {
-    MakeErrMsg(ErrMsg,
-      std::string(pathname) + ": can't create temporary directory");
-    return Path();
-  }
-  Path result;
-  result.set(pathname);
-  assert(result.isValid() && "mkstemp didn't create a valid pathname!");
-  return result;
-#endif
-}
-
-void
-Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
-#ifdef LTDL_SHLIBPATH_VAR
-  char* env_var = getenv(LTDL_SHLIBPATH_VAR);
-  if (env_var != 0) {
-    getPathList(env_var,Paths);
-  }
-#endif
-  // FIXME: Should this look at LD_LIBRARY_PATH too?
-  Paths.push_back(sys::Path("/usr/local/lib/"));
-  Paths.push_back(sys::Path("/usr/X11R6/lib/"));
-  Paths.push_back(sys::Path("/usr/lib/"));
-  Paths.push_back(sys::Path("/lib/"));
-}
-
-void
-Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
-  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
-  if (env_var != 0) {
-    getPathList(env_var,Paths);
-  }
-#ifdef LLVM_LIBDIR
-  {
-    Path tmpPath;
-    if (tmpPath.set(LLVM_LIBDIR))
-      if (tmpPath.canRead())
-        Paths.push_back(tmpPath);
-  }
-#endif
-  GetSystemLibraryPaths(Paths);
-}
-
-Path
-Path::GetLLVMDefaultConfigDir() {
-  return Path("/etc/llvm/");
-}
-
-Path
-Path::GetUserHomeDirectory() {
-  const char* home = getenv("HOME");
-  if (home) {
-    Path result;
-    if (result.set(home))
-      return result;
-  }
-  return GetRootDirectory();
-}
-
-Path
-Path::GetCurrentDirectory() {
-  char pathname[MAXPATHLEN];
-  if (!getcwd(pathname,MAXPATHLEN)) {
-    assert (false && "Could not query current working directory.");
-    return Path();
-  }
-
-  return Path(pathname);
-}
-
-#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
-static int
-test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
-    const char *dir, const char *bin)
-{
-  struct stat sb;
-
-  snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
-  if (realpath(buf, ret) == NULL)
-    return (1);
-  if (stat(buf, &sb) != 0)
-    return (1);
-
-  return (0);
-}
-
-static char *
-getprogpath(char ret[PATH_MAX], const char *bin)
-{
-  char *pv, *s, *t, buf[PATH_MAX];
-
-  /* First approach: absolute path. */
-  if (bin[0] == '/') {
-    if (test_dir(buf, ret, "/", bin) == 0)
-      return (ret);
-    return (NULL);
-  }
-
-  /* Second approach: relative path. */
-  if (strchr(bin, '/') != NULL) {
-    if (getcwd(buf, PATH_MAX) == NULL)
-      return (NULL);
-    if (test_dir(buf, ret, buf, bin) == 0)
-      return (ret);
-    return (NULL);
-  }
-
-  /* Third approach: $PATH */
-  if ((pv = getenv("PATH")) == NULL)
-    return (NULL);
-  s = pv = strdup(pv);
-  if (pv == NULL)
-    return (NULL);
-  while ((t = strsep(&s, ":")) != NULL) {
-    if (test_dir(buf, ret, t, bin) == 0) {
-      free(pv);
-      return (ret);
-    }
-  }
-  free(pv);
-  return (NULL);
-}
-#endif // __FreeBSD__ || __NetBSD__
-
-/// GetMainExecutable - Return the path to the main executable, given the
-/// value of argv[0] from program startup.
-Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-#if defined(__APPLE__)
-  // On OS X the executable path is saved to the stack by dyld. Reading it
-  // from there is much faster than calling dladdr, especially for large
-  // binaries with symbols.
-  char exe_path[MAXPATHLEN];
-  uint32_t size = sizeof(exe_path);
-  if (_NSGetExecutablePath(exe_path, &size) == 0) {
-    char link_path[MAXPATHLEN];
-    if (realpath(exe_path, link_path))
-      return Path(std::string(link_path));
-  }
-#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
-  char exe_path[PATH_MAX];
-
-  if (getprogpath(exe_path, argv0) != NULL)
-    return Path(std::string(exe_path));
-#elif defined(__linux__) || defined(__CYGWIN__)
-  char exe_path[MAXPATHLEN];
-  ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
-  if (len >= 0)
-    return Path(std::string(exe_path, len));
-#elif defined(HAVE_DLFCN_H)
-  // Use dladdr to get executable path if available.
-  Dl_info DLInfo;
-  int err = dladdr(MainAddr, &DLInfo);
-  if (err == 0)
-    return Path();
-
-  // If the filename is a symlink, we need to resolve and return the location of
-  // the actual executable.
-  char link_path[MAXPATHLEN];
-  if (realpath(DLInfo.dli_fname, link_path))
-    return Path(std::string(link_path));
-#endif
-  return Path();
-}
-
-
-StringRef Path::getDirname() const {
-  return getDirnameCharSep(path, "/");
-}
-
-StringRef
-Path::getBasename() const {
-  // Find the last slash
-  std::string::size_type slash = path.rfind('/');
-  if (slash == std::string::npos)
-    slash = 0;
-  else
-    slash++;
-
-  std::string::size_type dot = path.rfind('.');
-  if (dot == std::string::npos || dot < slash)
-    return StringRef(path).substr(slash);
-  else
-    return StringRef(path).substr(slash, dot - slash);
-}
-
-StringRef
-Path::getSuffix() const {
-  // Find the last slash
-  std::string::size_type slash = path.rfind('/');
-  if (slash == std::string::npos)
-    slash = 0;
-  else
-    slash++;
-
-  std::string::size_type dot = path.rfind('.');
-  if (dot == std::string::npos || dot < slash)
-    return StringRef();
-  else
-    return StringRef(path).substr(dot + 1);
-}
-
-bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
-  assert(len < 1024 && "Request for magic string too long");
-  char Buf[1025];
-  int fd = ::open(path.c_str(), O_RDONLY);
-  if (fd < 0)
-    return false;
-  ssize_t bytes_read = ::read(fd, Buf, len);
-  ::close(fd);
-  if (ssize_t(len) != bytes_read)
-    return false;
-  Magic.assign(Buf, len);
-  return true;
-}
-
-bool
-Path::exists() const {
-  return 0 == access(path.c_str(), F_OK );
-}
-
-bool
-Path::isDirectory() const {
-  struct stat buf;
-  if (0 != stat(path.c_str(), &buf))
-    return false;
-  return buf.st_mode & S_IFDIR ? true : false;
-}
-
-bool
-Path::canRead() const {
-  return 0 == access(path.c_str(), R_OK);
-}
-
-bool
-Path::canWrite() const {
-  return 0 == access(path.c_str(), W_OK);
-}
-
-bool
-Path::isRegularFile() const {
-  // Get the status so we can determine if it's a file or directory
-  struct stat buf;
-
-  if (0 != stat(path.c_str(), &buf))
-    return false;
-
-  if (S_ISREG(buf.st_mode))
-    return true;
-
-  return false;
-}
-
-bool
-Path::canExecute() const {
-  if (0 != access(path.c_str(), R_OK | X_OK ))
-    return false;
-  struct stat buf;
-  if (0 != stat(path.c_str(), &buf))
-    return false;
-  if (!S_ISREG(buf.st_mode))
-    return false;
-  return true;
-}
-
-StringRef
-Path::getLast() const {
-  // Find the last slash
-  size_t pos = path.rfind('/');
-
-  // Handle the corner cases
-  if (pos == std::string::npos)
-    return path;
-
-  // If the last character is a slash
-  if (pos == path.length()-1) {
-    // Find the second to last slash
-    size_t pos2 = path.rfind('/', pos-1);
-    if (pos2 == std::string::npos)
-      return StringRef(path).substr(0,pos);
-    else
-      return StringRef(path).substr(pos2+1,pos-pos2-1);
-  }
-  // Return everything after the last slash
-  return StringRef(path).substr(pos+1);
-}
-
-const FileStatus *
-PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
-  if (!fsIsValid || update) {
-    struct stat buf;
-    if (0 != stat(path.c_str(), &buf)) {
-      MakeErrMsg(ErrStr, path + ": can't get status of file");
-      return 0;
-    }
-    status.fileSize = buf.st_size;
-    status.modTime.fromEpochTime(buf.st_mtime);
-    status.mode = buf.st_mode;
-    status.user = buf.st_uid;
-    status.group = buf.st_gid;
-    status.uniqueID = uint64_t(buf.st_ino);
-    status.isDir  = S_ISDIR(buf.st_mode);
-    status.isFile = S_ISREG(buf.st_mode);
-    fsIsValid = true;
-  }
-  return &status;
-}
-
-static bool AddPermissionBits(const Path &File, int bits) {
-  // Get the umask value from the operating system.  We want to use it
-  // when changing the file's permissions. Since calling umask() sets
-  // the umask and returns its old value, we must call it a second
-  // time to reset it to the user's preference.
-  int mask = umask(0777); // The arg. to umask is arbitrary.
-  umask(mask);            // Restore the umask.
-
-  // Get the file's current mode.
-  struct stat buf;
-  if (0 != stat(File.c_str(), &buf))
-    return false;
-  // Change the file to have whichever permissions bits from 'bits'
-  // that the umask would not disable.
-  if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
-      return false;
-  return true;
-}
-
-bool Path::makeReadableOnDisk(std::string* ErrMsg) {
-  if (!AddPermissionBits(*this, 0444))
-    return MakeErrMsg(ErrMsg, path + ": can't make file readable");
-  return false;
-}
-
-bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
-  if (!AddPermissionBits(*this, 0222))
-    return MakeErrMsg(ErrMsg, path + ": can't make file writable");
-  return false;
-}
-
-bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
-  if (!AddPermissionBits(*this, 0111))
-    return MakeErrMsg(ErrMsg, path + ": can't make file executable");
-  return false;
-}
-
-bool
-Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
-  DIR* direntries = ::opendir(path.c_str());
-  if (direntries == 0)
-    return MakeErrMsg(ErrMsg, path + ": can't open directory");
-
-  std::string dirPath = path;
-  if (!lastIsSlash(dirPath))
-    dirPath += '/';
-
-  result.clear();
-  struct dirent* de = ::readdir(direntries);
-  for ( ; de != 0; de = ::readdir(direntries)) {
-    if (de->d_name[0] != '.') {
-      Path aPath(dirPath + (const char*)de->d_name);
-      struct stat st;
-      if (0 != lstat(aPath.path.c_str(), &st)) {
-        if (S_ISLNK(st.st_mode))
-          continue; // dangling symlink -- ignore
-        return MakeErrMsg(ErrMsg,
-                          aPath.path +  ": can't determine file object type");
-      }
-      result.insert(aPath);
-    }
-  }
-
-  closedir(direntries);
-  return false;
-}
-
-bool
-Path::set(StringRef a_path) {
-  if (a_path.empty())
-    return false;
-  std::string save(path);
-  path = a_path;
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::appendComponent(StringRef name) {
-  if (name.empty())
-    return false;
-  std::string save(path);
-  if (!lastIsSlash(path))
-    path += '/';
-  path += name;
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::eraseComponent() {
-  size_t slashpos = path.rfind('/',path.size());
-  if (slashpos == 0 || slashpos == std::string::npos) {
-    path.erase();
-    return true;
-  }
-  if (slashpos == path.size() - 1)
-    slashpos = path.rfind('/',slashpos-1);
-  if (slashpos == std::string::npos) {
-    path.erase();
-    return true;
-  }
-  path.erase(slashpos);
-  return true;
-}
-
-bool
-Path::appendSuffix(StringRef suffix) {
-  std::string save(path);
-  path.append(".");
-  path.append(suffix);
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::eraseSuffix() {
-  std::string save = path;
-  size_t dotpos = path.rfind('.',path.size());
-  size_t slashpos = path.rfind('/',path.size());
-  if (dotpos != std::string::npos) {
-    if (slashpos == std::string::npos || dotpos > slashpos+1) {
-      path.erase(dotpos, path.size()-dotpos);
-      return true;
-    }
-  }
-  if (!isValid())
-    path = save;
-  return false;
-}
-
-static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
-
-  if (access(beg, R_OK | W_OK) == 0)
-    return false;
-
-  if (create_parents) {
-
-    char* c = end;
-
-    for (; c != beg; --c)
-      if (*c == '/') {
-
-        // Recurse to handling the parent directory.
-        *c = '\0';
-        bool x = createDirectoryHelper(beg, c, create_parents);
-        *c = '/';
-
-        // Return if we encountered an error.
-        if (x)
-          return true;
-
-        break;
-      }
-  }
-
-  return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
-}
-
-bool
-Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
-  // Get a writeable copy of the path name
-  char pathname[MAXPATHLEN];
-  path.copy(pathname,MAXPATHLEN);
-
-  // Null-terminate the last component
-  size_t lastchar = path.length() - 1 ;
-
-  if (pathname[lastchar] != '/')
-    ++lastchar;
-
-  pathname[lastchar] = 0;
-
-  if (createDirectoryHelper(pathname, pathname+lastchar, create_parents))
-    return MakeErrMsg(ErrMsg,
-                      std::string(pathname) + ": can't create directory");
-
-  return false;
-}
-
-bool
-Path::createFileOnDisk(std::string* ErrMsg) {
-  // Create the file
-  int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
-  if (fd < 0)
-    return MakeErrMsg(ErrMsg, path + ": can't create file");
-  ::close(fd);
-  return false;
-}
-
-bool
-Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
-  // Make this into a unique file name
-  if (makeUnique( reuse_current, ErrMsg ))
-    return true;
-
-  // create the file
-  int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
-  if (fd < 0)
-    return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
-  ::close(fd);
-  return false;
-}
-
-bool
-Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
-  // Get the status so we can determine if it's a file or directory.
-  struct stat buf;
-  if (0 != stat(path.c_str(), &buf)) {
-    MakeErrMsg(ErrStr, path + ": can't get status of file");
-    return true;
-  }
-
-  // Note: this check catches strange situations. In all cases, LLVM should
-  // only be involved in the creation and deletion of regular files.  This
-  // check ensures that what we're trying to erase is a regular file. It
-  // effectively prevents LLVM from erasing things like /dev/null, any block
-  // special file, or other things that aren't "regular" files.
-  if (S_ISREG(buf.st_mode)) {
-    if (unlink(path.c_str()) != 0)
-      return MakeErrMsg(ErrStr, path + ": can't destroy file");
-    return false;
-  }
-
-  if (!S_ISDIR(buf.st_mode)) {
-    if (ErrStr) *ErrStr = "not a file or directory";
-    return true;
-  }
-
-  if (remove_contents) {
-    // Recursively descend the directory to remove its contents.
-    std::string cmd = "/bin/rm -rf " + path;
-    if (system(cmd.c_str()) != 0) {
-      MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
-      return true;
-    }
-    return false;
-  }
-
-  // Otherwise, try to just remove the one directory.
-  char pathname[MAXPATHLEN];
-  path.copy(pathname, MAXPATHLEN);
-  size_t lastchar = path.length() - 1;
-  if (pathname[lastchar] == '/')
-    pathname[lastchar] = 0;
-  else
-    pathname[lastchar+1] = 0;
-
-  if (rmdir(pathname) != 0)
-    return MakeErrMsg(ErrStr,
-      std::string(pathname) + ": can't erase directory");
-  return false;
-}
-
-bool
-Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
-  if (0 != ::rename(path.c_str(), newName.c_str()))
-    return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
-               newName.str() + "'");
-  return false;
-}
-
-bool
-Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
-  struct utimbuf utb;
-  utb.actime = si.modTime.toPosixTime();
-  utb.modtime = utb.actime;
-  if (0 != ::utime(path.c_str(),&utb))
-    return MakeErrMsg(ErrStr, path + ": can't set file modification time");
-  if (0 != ::chmod(path.c_str(),si.mode))
-    return MakeErrMsg(ErrStr, path + ": can't set mode");
-  return false;
-}
-
-bool
-sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
-  int inFile = -1;
-  int outFile = -1;
-  inFile = ::open(Src.c_str(), O_RDONLY);
-  if (inFile == -1)
-    return MakeErrMsg(ErrMsg, Src.str() +
-      ": can't open source file to copy");
-
-  outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
-  if (outFile == -1) {
-    ::close(inFile);
-    return MakeErrMsg(ErrMsg, Dest.str() +
-      ": can't create destination file for copy");
-  }
-
-  char Buffer[16*1024];
-  while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
-    if (Amt == -1) {
-      if (errno != EINTR && errno != EAGAIN) {
-        ::close(inFile);
-        ::close(outFile);
-        return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
-      }
-    } else {
-      char *BufPtr = Buffer;
-      while (Amt) {
-        ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
-        if (AmtWritten == -1) {
-          if (errno != EINTR && errno != EAGAIN) {
-            ::close(inFile);
-            ::close(outFile);
-            return MakeErrMsg(ErrMsg, Dest.str() +
-              ": can't write destination file");
-          }
-        } else {
-          Amt -= AmtWritten;
-          BufPtr += AmtWritten;
-        }
-      }
-    }
-  }
-  ::close(inFile);
-  ::close(outFile);
-  return false;
-}
-
-bool
-Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
-  if (reuse_current && !exists())
-    return false; // File doesn't exist already, just use it!
-
-  // Append an XXXXXX pattern to the end of the file for use with mkstemp,
-  // mktemp or our own implementation.
-  // This uses std::vector instead of SmallVector to avoid a dependence on
-  // libSupport. And performance isn't critical here.
-  std::vector<char> Buf;
-  Buf.resize(path.size()+8);
-  char *FNBuffer = &Buf[0];
-    path.copy(FNBuffer,path.size());
-  if (isDirectory())
-    strcpy(FNBuffer+path.size(), "/XXXXXX");
-  else
-    strcpy(FNBuffer+path.size(), "-XXXXXX");
-
-#if defined(HAVE_MKSTEMP)
-  int TempFD;
-  if ((TempFD = mkstemp(FNBuffer)) == -1)
-    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
-
-  // We don't need to hold the temp file descriptor... we will trust that no one
-  // will overwrite/delete the file before we can open it again.
-  close(TempFD);
-
-  // Save the name
-  path = FNBuffer;
-#elif defined(HAVE_MKTEMP)
-  // If we don't have mkstemp, use the old and obsolete mktemp function.
-  if (mktemp(FNBuffer) == 0)
-    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
-
-  // Save the name
-  path = FNBuffer;
-#else
-  // Okay, looks like we have to do it all by our lonesome.
-  static unsigned FCounter = 0;
-  // Try to initialize with unique value.
-  if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
-  char* pos = strstr(FNBuffer, "XXXXXX");
-  do {
-    if (++FCounter > 0xFFFFFF) {
-      return MakeErrMsg(ErrMsg,
-        path + ": can't make unique filename: too many files");
-    }
-    sprintf(pos, "%06X", FCounter);
-    path = FNBuffer;
-  } while (exists());
-  // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
-  // LLVM.
-#endif
-  return false;
-}
-
-const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
-  int Flags = MAP_PRIVATE;
-#ifdef MAP_FILE
-  Flags |= MAP_FILE;
-#endif
-  void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0);
-  if (BasePtr == MAP_FAILED)
-    return 0;
-  return (const char*)BasePtr;
-}
-
-void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) {
-  ::munmap((void*)BasePtr, FileSize);
-}
-
-} // end llvm namespace
diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc
deleted file mode 100644
index cf6a47a31c80..000000000000
--- a/lib/System/Unix/Process.inc
+++ /dev/null
@@ -1,295 +0,0 @@
-//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file provides the generic Unix implementation of the Process class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Unix.h"
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#ifdef HAVE_SYS_RESOURCE_H
-#include <sys/resource.h>
-#endif
-// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
-//  Unix.h includes this for us already.
-#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
-#include <malloc.h>
-#endif
-#ifdef HAVE_MALLOC_MALLOC_H
-#include <malloc/malloc.h>
-#endif
-#ifdef HAVE_SYS_IOCTL_H
-#  include <sys/ioctl.h>
-#endif
-#ifdef HAVE_TERMIOS_H
-#  include <termios.h>
-#endif
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-using namespace llvm;
-using namespace sys;
-
-unsigned 
-Process::GetPageSize() 
-{
-#if defined(__CYGWIN__)
-  // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
-  // memory protection and mmap() is 4k.
-  // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
-  const int page_size = 0x1000;
-#elif defined(HAVE_GETPAGESIZE)
-  const int page_size = ::getpagesize();
-#elif defined(HAVE_SYSCONF)
-  long page_size = ::sysconf(_SC_PAGE_SIZE);
-#else
-#warning Cannot get the page size on this machine
-#endif
-  return static_cast<unsigned>(page_size);
-}
-
-size_t Process::GetMallocUsage() {
-#if defined(HAVE_MALLINFO)
-  struct mallinfo mi;
-  mi = ::mallinfo();
-  return mi.uordblks;
-#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
-  malloc_statistics_t Stats;
-  malloc_zone_statistics(malloc_default_zone(), &Stats);
-  return Stats.size_in_use;   // darwin
-#elif defined(HAVE_SBRK)
-  // Note this is only an approximation and more closely resembles
-  // the value returned by mallinfo in the arena field.
-  static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
-  char *EndOfMemory = (char*)sbrk(0);
-  if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
-    return EndOfMemory - StartOfMemory;
-  else
-    return 0;
-#else
-#warning Cannot get malloc info on this platform
-  return 0;
-#endif
-}
-
-size_t
-Process::GetTotalMemoryUsage()
-{
-#if defined(HAVE_MALLINFO)
-  struct mallinfo mi = ::mallinfo();
-  return mi.uordblks + mi.hblkhd;
-#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
-  malloc_statistics_t Stats;
-  malloc_zone_statistics(malloc_default_zone(), &Stats);
-  return Stats.size_allocated;   // darwin
-#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
-  struct rusage usage;
-  ::getrusage(RUSAGE_SELF, &usage);
-  return usage.ru_maxrss;
-#else
-#warning Cannot get total memory size on this platform
-  return 0;
-#endif
-}
-
-void
-Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time, 
-                      TimeValue& sys_time)
-{
-  elapsed = TimeValue::now();
-#if defined(HAVE_GETRUSAGE)
-  struct rusage usage;
-  ::getrusage(RUSAGE_SELF, &usage);
-  user_time = TimeValue( 
-    static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ), 
-    static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec * 
-      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
-  sys_time = TimeValue( 
-    static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ), 
-    static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec * 
-      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
-#else
-#warning Cannot get usage times on this platform
-  user_time.seconds(0);
-  user_time.microseconds(0);
-  sys_time.seconds(0);
-  sys_time.microseconds(0);
-#endif
-}
-
-int Process::GetCurrentUserId() {
-  return getuid();
-}
-
-int Process::GetCurrentGroupId() {
-  return getgid();
-}
-
-#ifdef HAVE_MACH_MACH_H
-#include <mach/mach.h>
-#endif
-
-// Some LLVM programs such as bugpoint produce core files as a normal part of
-// their operation. To prevent the disk from filling up, this function
-// does what's necessary to prevent their generation.
-void Process::PreventCoreFiles() {
-#if HAVE_SETRLIMIT
-  struct rlimit rlim;
-  rlim.rlim_cur = rlim.rlim_max = 0;
-  setrlimit(RLIMIT_CORE, &rlim);
-#endif
-
-#ifdef HAVE_MACH_MACH_H
-  // Disable crash reporting on Mac OS X 10.0-10.4
-
-  // get information about the original set of exception ports for the task
-  mach_msg_type_number_t Count = 0;
-  exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
-  exception_port_t OriginalPorts[EXC_TYPES_COUNT];
-  exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
-  thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
-  kern_return_t err = 
-    task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
-                             &Count, OriginalPorts, OriginalBehaviors,
-                             OriginalFlavors);
-  if (err == KERN_SUCCESS) {
-    // replace each with MACH_PORT_NULL.
-    for (unsigned i = 0; i != Count; ++i)
-      task_set_exception_ports(mach_task_self(), OriginalMasks[i], 
-                               MACH_PORT_NULL, OriginalBehaviors[i],
-                               OriginalFlavors[i]);
-  }
-
-  // Disable crash reporting on Mac OS X 10.5
-  signal(SIGABRT, _exit);
-  signal(SIGILL,  _exit);
-  signal(SIGFPE,  _exit);
-  signal(SIGSEGV, _exit);
-  signal(SIGBUS,  _exit);
-#endif
-}
-
-bool Process::StandardInIsUserInput() {
-  return FileDescriptorIsDisplayed(STDIN_FILENO);
-}
-
-bool Process::StandardOutIsDisplayed() {
-  return FileDescriptorIsDisplayed(STDOUT_FILENO);
-}
-
-bool Process::StandardErrIsDisplayed() {
-  return FileDescriptorIsDisplayed(STDERR_FILENO);
-}
-
-bool Process::FileDescriptorIsDisplayed(int fd) {
-#if HAVE_ISATTY
-  return isatty(fd);
-#else
-  // If we don't have isatty, just return false.
-  return false;
-#endif
-}
-
-static unsigned getColumns(int FileID) {
-  // If COLUMNS is defined in the environment, wrap to that many columns.
-  if (const char *ColumnsStr = std::getenv("COLUMNS")) {
-    int Columns = std::atoi(ColumnsStr);
-    if (Columns > 0)
-      return Columns;
-  }
-
-  unsigned Columns = 0;
-
-#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
-  // Try to determine the width of the terminal.
-  struct winsize ws;
-  if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
-    Columns = ws.ws_col;
-#endif
-
-  return Columns;
-}
-
-unsigned Process::StandardOutColumns() {
-  if (!StandardOutIsDisplayed())
-    return 0;
-
-  return getColumns(1);
-}
-
-unsigned Process::StandardErrColumns() {
-  if (!StandardErrIsDisplayed())
-    return 0;
-
-  return getColumns(2);
-}
-
-static bool terminalHasColors() {
-  if (const char *term = std::getenv("TERM")) {
-    // Most modern terminals support ANSI escape sequences for colors.
-    // We could check terminfo, or have a list of known terms that support
-    // colors, but that would be overkill.
-    // The user can always ask for no colors by setting TERM to dumb, or
-    // using a commandline flag.
-    return strcmp(term, "dumb") != 0;
-  }
-  return false;
-}
-
-bool Process::StandardOutHasColors() {
-  if (!StandardOutIsDisplayed())
-    return false;
-  return terminalHasColors();
-}
-
-bool Process::StandardErrHasColors() {
-  if (!StandardErrIsDisplayed())
-    return false;
-  return terminalHasColors();
-}
-
-bool Process::ColorNeedsFlush() {
-  // No, we use ANSI escape sequences.
-  return false;
-}
-
-#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
-
-#define ALLCOLORS(FGBG,BOLD) {\
-    COLOR(FGBG, "0", BOLD),\
-    COLOR(FGBG, "1", BOLD),\
-    COLOR(FGBG, "2", BOLD),\
-    COLOR(FGBG, "3", BOLD),\
-    COLOR(FGBG, "4", BOLD),\
-    COLOR(FGBG, "5", BOLD),\
-    COLOR(FGBG, "6", BOLD),\
-    COLOR(FGBG, "7", BOLD)\
-  }
-
-static const char colorcodes[2][2][8][10] = {
- { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
- { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
-};
-
-const char *Process::OutputColor(char code, bool bold, bool bg) {
-  return colorcodes[bg?1:0][bold?1:0][code&7];
-}
-
-const char *Process::OutputBold(bool bg) {
-  return "\033[1m";
-}
-
-const char *Process::ResetColor() {
-  return "\033[0m";
-}
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
deleted file mode 100644
index 0209f5aaf832..000000000000
--- a/lib/System/Unix/Program.inc
+++ /dev/null
@@ -1,402 +0,0 @@
-//===- llvm/System/Unix/Program.cpp -----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific portion of the Program class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include <llvm/Config/config.h>
-#include "Unix.h"
-#if HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#if HAVE_SYS_RESOURCE_H
-#include <sys/resource.h>
-#endif
-#if HAVE_SIGNAL_H
-#include <signal.h>
-#endif
-#if HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-#ifdef HAVE_POSIX_SPAWN
-#include <spawn.h>
-#if !defined(__APPLE__)
-  extern char **environ;
-#else
-#include <crt_externs.h> // _NSGetEnviron
-#endif
-#endif
-
-namespace llvm {
-using namespace sys;
-
-Program::Program() : Data_(0) {}
-
-Program::~Program() {}
-
-unsigned Program::GetPid() const {
-  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
-  return static_cast<unsigned>(pid);
-}
-
-// This function just uses the PATH environment variable to find the program.
-Path
-Program::FindProgramByName(const std::string& progName) {
-
-  // Check some degenerate cases
-  if (progName.length() == 0) // no program
-    return Path();
-  Path temp;
-  if (!temp.set(progName)) // invalid name
-    return Path();
-  // Use the given path verbatim if it contains any slashes; this matches
-  // the behavior of sh(1) and friends.
-  if (progName.find('/') != std::string::npos)
-    return temp;
-
-  // At this point, the file name does not contain slashes. Search for it
-  // through the directories specified in the PATH environment variable.
-
-  // Get the path. If its empty, we can't do anything to find it.
-  const char *PathStr = getenv("PATH");
-  if (PathStr == 0)
-    return Path();
-
-  // Now we have a colon separated list of directories to search; try them.
-  size_t PathLen = strlen(PathStr);
-  while (PathLen) {
-    // Find the first colon...
-    const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
-
-    // Check to see if this first directory contains the executable...
-    Path FilePath;
-    if (FilePath.set(std::string(PathStr,Colon))) {
-      FilePath.appendComponent(progName);
-      if (FilePath.canExecute())
-        return FilePath;                    // Found the executable!
-    }
-
-    // Nope it wasn't in this directory, check the next path in the list!
-    PathLen -= Colon-PathStr;
-    PathStr = Colon;
-
-    // Advance past duplicate colons
-    while (*PathStr == ':') {
-      PathStr++;
-      PathLen--;
-    }
-  }
-  return Path();
-}
-
-static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
-  if (Path == 0) // Noop
-    return false;
-  const char *File;
-  if (Path->isEmpty())
-    // Redirect empty paths to /dev/null
-    File = "/dev/null";
-  else
-    File = Path->c_str();
-
-  // Open the file
-  int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
-  if (InFD == -1) {
-    MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
-              + (FD == 0 ? "input" : "output"));
-    return true;
-  }
-
-  // Install it as the requested FD
-  if (dup2(InFD, FD) == -1) {
-    MakeErrMsg(ErrMsg, "Cannot dup2");
-    close(InFD);
-    return true;
-  }
-  close(InFD);      // Close the original FD
-  return false;
-}
-
-#ifdef HAVE_POSIX_SPAWN
-static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
-                          posix_spawn_file_actions_t &FileActions) {
-  if (Path == 0) // Noop
-    return false;
-  const char *File;
-  if (Path->isEmpty())
-    // Redirect empty paths to /dev/null
-    File = "/dev/null";
-  else
-    File = Path->c_str();
-
-  if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD,
-                            File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
-    return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
-  return false;
-}
-#endif
-
-static void TimeOutHandler(int Sig) {
-}
-
-static void SetMemoryLimits (unsigned size)
-{
-#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
-  struct rlimit r;
-  __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
-
-  // Heap size
-  getrlimit (RLIMIT_DATA, &r);
-  r.rlim_cur = limit;
-  setrlimit (RLIMIT_DATA, &r);
-#ifdef RLIMIT_RSS
-  // Resident set size.
-  getrlimit (RLIMIT_RSS, &r);
-  r.rlim_cur = limit;
-  setrlimit (RLIMIT_RSS, &r);
-#endif
-#ifdef RLIMIT_AS  // e.g. NetBSD doesn't have it.
-  // Virtual memory.
-  getrlimit (RLIMIT_AS, &r);
-  r.rlim_cur = limit;
-  setrlimit (RLIMIT_AS, &r);
-#endif
-#endif
-}
-
-bool
-Program::Execute(const Path &path, const char **args, const char **envp,
-                 const Path **redirects, unsigned memoryLimit,
-                  std::string *ErrMsg) {
-  // If this OS has posix_spawn and there is no memory limit being implied, use
-  // posix_spawn.  It is more efficient than fork/exec.
-#ifdef HAVE_POSIX_SPAWN
-  if (memoryLimit == 0) {
-    posix_spawn_file_actions_t FileActions;
-    posix_spawn_file_actions_init(&FileActions);
-
-    if (redirects) {
-      // Redirect stdin/stdout.
-      if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
-          RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
-        return false;
-      if (redirects[1] == 0 || redirects[2] == 0 ||
-          *redirects[1] != *redirects[2]) {
-        // Just redirect stderr
-        if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
-      } else {       
-        // If stdout and stderr should go to the same place, redirect stderr
-        // to the FD already open for stdout.
-        if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
-          return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
-      }
-    }
-
-    if (!envp)
-#if !defined(__APPLE__)
-      envp = const_cast<const char **>(environ);
-#else
-      // environ is missing in dylibs.
-      envp = const_cast<const char **>(*_NSGetEnviron());
-#endif
-
-    pid_t PID;
-    int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
-                          const_cast<char **>(args), const_cast<char **>(envp));
-                          
-    posix_spawn_file_actions_destroy(&FileActions);
-
-    if (Err)
-     return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
-      
-    Data_ = reinterpret_cast<void*>(PID);
-    return true;
-  }
-#endif
-  
-  if (!path.canExecute()) {
-    if (ErrMsg)
-      *ErrMsg = path.str() + " is not executable";
-    return false;
-  }
-
-  // Create a child process.
-  int child = fork();
-  switch (child) {
-    // An error occured:  Return to the caller.
-    case -1:
-      MakeErrMsg(ErrMsg, "Couldn't fork");
-      return false;
-
-    // Child process: Execute the program.
-    case 0: {
-      // Redirect file descriptors...
-      if (redirects) {
-        // Redirect stdin
-        if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
-        // Redirect stdout
-        if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
-        if (redirects[1] && redirects[2] &&
-            *(redirects[1]) == *(redirects[2])) {
-          // If stdout and stderr should go to the same place, redirect stderr
-          // to the FD already open for stdout.
-          if (-1 == dup2(1,2)) {
-            MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
-            return false;
-          }
-        } else {
-          // Just redirect stderr
-          if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
-        }
-      }
-
-      // Set memory limits
-      if (memoryLimit!=0) {
-        SetMemoryLimits(memoryLimit);
-      }
-
-      // Execute!
-      if (envp != 0)
-        execve(path.c_str(),
-               const_cast<char **>(args),
-               const_cast<char **>(envp));
-      else
-        execv(path.c_str(),
-              const_cast<char **>(args));
-      // If the execve() failed, we should exit. Follow Unix protocol and
-      // return 127 if the executable was not found, and 126 otherwise.
-      // Use _exit rather than exit so that atexit functions and static
-      // object destructors cloned from the parent process aren't
-      // redundantly run, and so that any data buffered in stdio buffers
-      // cloned from the parent aren't redundantly written out.
-      _exit(errno == ENOENT ? 127 : 126);
-    }
-
-    // Parent process: Break out of the switch to do our processing.
-    default:
-      break;
-  }
-
-  Data_ = reinterpret_cast<void*>(child);
-
-  return true;
-}
-
-int
-Program::Wait(unsigned secondsToWait,
-              std::string* ErrMsg)
-{
-#ifdef HAVE_SYS_WAIT_H
-  struct sigaction Act, Old;
-
-  if (Data_ == 0) {
-    MakeErrMsg(ErrMsg, "Process not started!");
-    return -1;
-  }
-
-  // Install a timeout handler.  The handler itself does nothing, but the simple
-  // fact of having a handler at all causes the wait below to return with EINTR,
-  // unlike if we used SIG_IGN.
-  if (secondsToWait) {
-    memset(&Act, 0, sizeof(Act));
-    Act.sa_handler = TimeOutHandler;
-    sigemptyset(&Act.sa_mask);
-    sigaction(SIGALRM, &Act, &Old);
-    alarm(secondsToWait);
-  }
-
-  // Parent process: Wait for the child process to terminate.
-  int status;
-  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
-  pid_t child = static_cast<pid_t>(pid);
-  while (waitpid(pid, &status, 0) != child)
-    if (secondsToWait && errno == EINTR) {
-      // Kill the child.
-      kill(child, SIGKILL);
-
-      // Turn off the alarm and restore the signal handler
-      alarm(0);
-      sigaction(SIGALRM, &Old, 0);
-
-      // Wait for child to die
-      if (wait(&status) != child)
-        MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
-      else
-        MakeErrMsg(ErrMsg, "Child timed out", 0);
-
-      return -1;   // Timeout detected
-    } else if (errno != EINTR) {
-      MakeErrMsg(ErrMsg, "Error waiting for child process");
-      return -1;
-    }
-
-  // We exited normally without timeout, so turn off the timer.
-  if (secondsToWait) {
-    alarm(0);
-    sigaction(SIGALRM, &Old, 0);
-  }
-
-  // Return the proper exit status. 0=success, >0 is programs' exit status,
-  // <0 means a signal was returned, -9999999 means the program dumped core.
-  int result = 0;
-  if (WIFEXITED(status))
-    result = WEXITSTATUS(status);
-  else if (WIFSIGNALED(status))
-    result = 0 - WTERMSIG(status);
-#ifdef WCOREDUMP
-  else if (WCOREDUMP(status))
-    result |= 0x01000000;
-#endif
-  return result;
-#else
-  return -99;
-#endif
-
-}
-
-bool
-Program::Kill(std::string* ErrMsg) {
-  if (Data_ == 0) {
-    MakeErrMsg(ErrMsg, "Process not started!");
-    return true;
-  }
-
-  uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
-  pid_t pid = static_cast<pid_t>(pid64);
-
-  if (kill(pid, SIGKILL) != 0) {
-    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
-    return true;
-  }
-
-  return false;
-}
-
-bool Program::ChangeStdinToBinary(){
-  // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
-}
-
-bool Program::ChangeStdoutToBinary(){
-  // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
-}
-
-bool Program::ChangeStderrToBinary(){
-  // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
-}
-
-}
diff --git a/lib/System/Unix/README.txt b/lib/System/Unix/README.txt
deleted file mode 100644
index b3bace483e5d..000000000000
--- a/lib/System/Unix/README.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-llvm/lib/System/Unix README
-===========================
-
-This directory provides implementations of the lib/System classes that
-are common to two or more variants of UNIX. For example, the directory 
-structure underneath this directory could look like this:
-
-Unix           - only code that is truly generic to all UNIX platforms
-  Posix        - code that is specific to Posix variants of UNIX
-  SUS          - code that is specific to the Single Unix Specification 
-  SysV         - code that is specific to System V variants of UNIX
-
-As a rule, only those directories actually needing to be created should be
-created. Also, further subdirectories could be created to reflect versions of
-the various standards. For example, under SUS there could be v1, v2, and v3
-subdirectories to reflect the three major versions of SUS.
diff --git a/lib/System/Unix/RWMutex.inc b/lib/System/Unix/RWMutex.inc
deleted file mode 100644
index e83d41ef4cfe..000000000000
--- a/lib/System/Unix/RWMutex.inc
+++ /dev/null
@@ -1,43 +0,0 @@
-//= llvm/System/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific (non-pthread) RWMutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-using namespace sys;
-
-RWMutexImpl::RWMutexImpl() { }
-
-RWMutexImpl::~RWMutexImpl() { }
-
-bool RWMutexImpl::reader_acquire() {
-  return true;
-}
-
-bool RWMutexImpl::reader_release() {
-  return true;
-}
-
-bool RWMutexImpl::writer_acquire() {
-  return true;
-}
-
-bool RWMutexImpl::writer_release() {
-  return true;
-}
-
-}
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
deleted file mode 100644
index 7b7c43efc786..000000000000
--- a/lib/System/Unix/Signals.inc
+++ /dev/null
@@ -1,299 +0,0 @@
-//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines some helpful functions for dealing with the possibility of
-// Unix signals occuring while your program is running.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Unix.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/System/Mutex.h"
-#include <vector>
-#include <algorithm>
-#if HAVE_EXECINFO_H
-# include <execinfo.h>         // For backtrace().
-#endif
-#if HAVE_SIGNAL_H
-#include <signal.h>
-#endif
-#if HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#if HAVE_DLFCN_H && __GNUG__
-#include <dlfcn.h>
-#include <cxxabi.h> 
-#endif
-using namespace llvm;
-
-static RETSIGTYPE SignalHandler(int Sig);  // defined below.
-
-static SmartMutex<true> SignalsMutex;
-
-/// InterruptFunction - The function to call if ctrl-c is pressed.
-static void (*InterruptFunction)() = 0;
-
-static std::vector<sys::Path> FilesToRemove;
-static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
-
-// IntSigs - Signals that may interrupt the program at any time.
-static const int IntSigs[] = {
-  SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
-};
-static const int *const IntSigsEnd =
-  IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
-
-// KillSigs - Signals that are synchronous with the program that will cause it
-// to die.
-static const int KillSigs[] = {
-  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
-#ifdef SIGSYS
-  , SIGSYS
-#endif
-#ifdef SIGXCPU
-  , SIGXCPU
-#endif
-#ifdef SIGXFSZ
-  , SIGXFSZ
-#endif
-#ifdef SIGEMT
-  , SIGEMT
-#endif
-};
-static const int *const KillSigsEnd =
-  KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
-
-static unsigned NumRegisteredSignals = 0;
-static struct {
-  struct sigaction SA;
-  int SigNo;
-} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
-
-
-static void RegisterHandler(int Signal) {
-  assert(NumRegisteredSignals <
-         sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
-         "Out of space for signal handlers!");
-
-  struct sigaction NewHandler;
-  
-  NewHandler.sa_handler = SignalHandler;
-  NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
-  sigemptyset(&NewHandler.sa_mask); 
-  
-  // Install the new handler, save the old one in RegisteredSignalInfo.
-  sigaction(Signal, &NewHandler,
-            &RegisteredSignalInfo[NumRegisteredSignals].SA);
-  RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
-  ++NumRegisteredSignals;
-}
-
-static void RegisterHandlers() {
-  // If the handlers are already registered, we're done.
-  if (NumRegisteredSignals != 0) return;
-
-  std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
-  std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
-}
-
-static void UnregisterHandlers() {
-  // Restore all of the signal handlers to how they were before we showed up.
-  for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
-    sigaction(RegisteredSignalInfo[i].SigNo,
-              &RegisteredSignalInfo[i].SA, 0);
-  NumRegisteredSignals = 0;
-}
-
-
-/// RemoveFilesToRemove - Process the FilesToRemove list. This function
-/// should be called with the SignalsMutex lock held.
-static void RemoveFilesToRemove() {
-  while (!FilesToRemove.empty()) {
-    FilesToRemove.back().eraseFromDisk(true);
-    FilesToRemove.pop_back();
-  }
-}
-
-// SignalHandler - The signal handler that runs.
-static RETSIGTYPE SignalHandler(int Sig) {
-  // Restore the signal behavior to default, so that the program actually
-  // crashes when we return and the signal reissues.  This also ensures that if
-  // we crash in our signal handler that the program will terminate immediately
-  // instead of recursing in the signal handler.
-  UnregisterHandlers();
-
-  // Unmask all potentially blocked kill signals.
-  sigset_t SigMask;
-  sigfillset(&SigMask);
-  sigprocmask(SIG_UNBLOCK, &SigMask, 0);
-
-  SignalsMutex.acquire();
-  RemoveFilesToRemove();
-
-  if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
-    if (InterruptFunction) {
-      void (*IF)() = InterruptFunction;
-      SignalsMutex.release();
-      InterruptFunction = 0;
-      IF();        // run the interrupt function.
-      return;
-    }
-    
-    SignalsMutex.release();
-    raise(Sig);   // Execute the default handler.
-    return;
-  }
-
-  SignalsMutex.release();
-
-  // Otherwise if it is a fault (like SEGV) run any handler.
-  for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
-    CallBacksToRun[i].first(CallBacksToRun[i].second);
-}
-
-void llvm::sys::RunInterruptHandlers() {
-  SignalsMutex.acquire();
-  RemoveFilesToRemove();
-  SignalsMutex.release();
-}
-
-void llvm::sys::SetInterruptFunction(void (*IF)()) {
-  SignalsMutex.acquire();
-  InterruptFunction = IF;
-  SignalsMutex.release();
-  RegisterHandlers();
-}
-
-// RemoveFileOnSignal - The public API
-bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
-                                   std::string* ErrMsg) {
-  SignalsMutex.acquire();
-  FilesToRemove.push_back(Filename);
-
-  SignalsMutex.release();
-
-  RegisterHandlers();
-  return false;
-}
-
-// DontRemoveFileOnSignal - The public API
-void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
-  SignalsMutex.acquire();
-  std::vector<sys::Path>::reverse_iterator I =
-    std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
-  if (I != FilesToRemove.rend())
-    FilesToRemove.erase(I.base()-1);
-  SignalsMutex.release();
-}
-
-/// AddSignalHandler - Add a function to be called when a signal is delivered
-/// to the process.  The handler can have a cookie passed to it to identify
-/// what instance of the handler it is.
-void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
-  CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
-  RegisterHandlers();
-}
-
-
-// PrintStackTrace - In the case of a program crash or fault, print out a stack
-// trace so that the user has an indication of why and where we died.
-//
-// On glibc systems we have the 'backtrace' function, which works nicely, but
-// doesn't demangle symbols.  
-static void PrintStackTrace(void *) {
-#ifdef HAVE_BACKTRACE
-  static void* StackTrace[256];
-  // Use backtrace() to output a backtrace on Linux systems with glibc.
-  int depth = backtrace(StackTrace,
-                        static_cast<int>(array_lengthof(StackTrace)));
-#if HAVE_DLFCN_H && __GNUG__
-  int width = 0;
-  for (int i = 0; i < depth; ++i) {
-    Dl_info dlinfo;
-    dladdr(StackTrace[i], &dlinfo);
-    const char* name = strrchr(dlinfo.dli_fname, '/');
-
-    int nwidth;
-    if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
-    else              nwidth = strlen(name) - 1;
-
-    if (nwidth > width) width = nwidth;
-  }
-
-  for (int i = 0; i < depth; ++i) {
-    Dl_info dlinfo;
-    dladdr(StackTrace[i], &dlinfo);
-
-    fprintf(stderr, "%-2d", i);
-
-    const char* name = strrchr(dlinfo.dli_fname, '/');
-    if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
-    else              fprintf(stderr, " %-*s", width, name+1);
-
-    fprintf(stderr, " %#0*lx",
-            (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
-
-    if (dlinfo.dli_sname != NULL) {
-      int res;
-      fputc(' ', stderr);
-      char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
-      if (d == NULL) fputs(dlinfo.dli_sname, stderr);
-      else           fputs(d, stderr);
-      free(d);
-
-      fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
-    }
-    fputc('\n', stderr);
-  }
-#else
-  backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
-#endif
-#endif
-}
-
-/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
-/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void llvm::sys::PrintStackTraceOnErrorSignal() {
-  AddSignalHandler(PrintStackTrace, 0);
-}
-
-
-/***/
-
-// On Darwin, raise sends a signal to the main thread instead of the current
-// thread. This has the unfortunate effect that assert() and abort() will end up
-// bypassing our crash recovery attempts. We work around this for anything in
-// the same linkage unit by just defining our own versions of the assert handler
-// and abort.
-
-#ifdef __APPLE__
-
-void __assert_rtn(const char *func,
-                  const char *file,
-                  int line,
-                  const char *expr) {
-  if (func)
-    fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
-            expr, func, file, line);
-  else
-    fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
-            expr, file, line);
-  abort();
-}
-
-#include <signal.h>
-#include <pthread.h>
-
-void abort() {
-  pthread_kill(pthread_self(), SIGABRT);
-  usleep(1000);
-  __builtin_trap();
-}
-
-#endif
diff --git a/lib/System/Unix/ThreadLocal.inc b/lib/System/Unix/ThreadLocal.inc
deleted file mode 100644
index 6769520a6fb6..000000000000
--- a/lib/System/Unix/ThreadLocal.inc
+++ /dev/null
@@ -1,26 +0,0 @@
-//=== llvm/System/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific (non-pthread) ThreadLocal class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-using namespace sys;
-ThreadLocalImpl::ThreadLocalImpl() { }
-ThreadLocalImpl::~ThreadLocalImpl() { }
-void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
-const void* ThreadLocalImpl::getInstance() { return data; }
-void ThreadLocalImpl::removeInstance() { setInstance(0); }
-}
diff --git a/lib/System/Unix/TimeValue.inc b/lib/System/Unix/TimeValue.inc
deleted file mode 100644
index d8cc8f55eecc..000000000000
--- a/lib/System/Unix/TimeValue.inc
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific portion of the TimeValue class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include "Unix.h"
-
-namespace llvm {
-  using namespace sys;
-
-std::string TimeValue::str() const {
-  char buffer[32];
-
-  time_t ourTime = time_t(this->toEpochTime());
-#ifdef __hpux
-// note that the following line needs -D_REENTRANT on HP-UX to be picked up 
-  asctime_r(localtime(&ourTime), buffer);
-#else
-  ::asctime_r(::localtime(&ourTime), buffer);
-#endif
-
-  std::string result(buffer);
-  return result.substr(0,24);
-}
-
-TimeValue TimeValue::now() {
-  struct timeval the_time;
-  timerclear(&the_time);
-  if (0 != ::gettimeofday(&the_time,0)) {
-    // This is *really* unlikely to occur because the only gettimeofday
-    // errors concern the timezone parameter which we're passing in as 0.
-    // In the unlikely case it does happen, just return MinTime, no error
-    // message needed. 
-    return MinTime;
-  }
-
-  return TimeValue(
-    static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ), 
-    static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec * 
-      NANOSECONDS_PER_MICROSECOND ) );
-}
-
-}
diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h
deleted file mode 100644
index c15866f3d90a..000000000000
--- a/lib/System/Unix/Unix.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===- llvm/System/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines things specific to Unix implementations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYSTEM_UNIX_UNIX_H
-#define LLVM_SYSTEM_UNIX_UNIX_H
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on all UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
-#include "llvm/System/Errno.h"
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
-#include <cerrno>
-#include <string>
-#include <algorithm>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#ifdef HAVE_SYS_PARAM_H
-#include <sys/param.h>
-#endif
-
-#ifdef HAVE_ASSERT_H
-#include <assert.h>
-#endif
-
-#ifdef TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# ifdef HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
-
-#ifdef HAVE_SYS_WAIT_H
-# include <sys/wait.h>
-#endif
-
-#ifndef WEXITSTATUS
-# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
-#endif
-
-#ifndef WIFEXITED
-# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
-#endif
-
-/// This function builds an error message into \p ErrMsg using the \p prefix
-/// string and the Unix error number given by \p errnum. If errnum is -1, the
-/// default then the value of errno is used.
-/// @brief Make an error message
-///
-/// If the error number can be converted to a string, it will be
-/// separated from prefix by ": ".
-static inline bool MakeErrMsg(
-  std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
-  if (!ErrMsg)
-    return true;
-  if (errnum == -1)
-    errnum = errno;
-  *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
-  return true;
-}
-
-#endif
diff --git a/lib/System/Valgrind.cpp b/lib/System/Valgrind.cpp
deleted file mode 100644
index c76cfe40d3e0..000000000000
--- a/lib/System/Valgrind.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
-//  defined.  If we have valgrind.h but valgrind isn't running, its macros are
-//  no-ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Valgrind.h"
-#include "llvm/Config/config.h"
-
-#if HAVE_VALGRIND_VALGRIND_H
-#include <valgrind/valgrind.h>
-
-static bool InitNotUnderValgrind() {
-  return !RUNNING_ON_VALGRIND;
-}
-
-// This bool is negated from what we'd expect because code may run before it
-// gets initialized.  If that happens, it will appear to be 0 (false), and we
-// want that to cause the rest of the code in this file to run the
-// Valgrind-provided macros.
-static const bool NotUnderValgrind = InitNotUnderValgrind();
-
-bool llvm::sys::RunningOnValgrind() {
-  if (NotUnderValgrind)
-    return false;
-  return RUNNING_ON_VALGRIND;
-}
-
-void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
-  if (NotUnderValgrind)
-    return;
-
-  VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
-}
-
-#else  // !HAVE_VALGRIND_VALGRIND_H
-
-bool llvm::sys::RunningOnValgrind() {
-  return false;
-}
-
-void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
-}
-
-#endif  // !HAVE_VALGRIND_VALGRIND_H
diff --git a/lib/System/Win32/Alarm.inc b/lib/System/Win32/Alarm.inc
deleted file mode 100644
index e0d00a0142bf..000000000000
--- a/lib/System/Win32/Alarm.inc
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Alarm.inc - Implement Win32 Alarm Support ---------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 Alarm support.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-using namespace llvm;
-
-/// NestedSOI - Sanity check.  Alarms cannot be nested or run in parallel.
-/// This ensures that they never do.
-static bool NestedSOI = false;
-
-void sys::SetupAlarm(unsigned seconds) {
-  assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!");
-  NestedSOI = true;
-  // FIXME: Implement for Win32
-}
-
-void sys::TerminateAlarm() {
-  assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!");
-  // FIXME: Implement for Win32
-  NestedSOI = false;
-}
-
-int sys::AlarmStatus() {
-  // FIXME: Implement for Win32
-  return 0;
-}
-
-// Don't pull in all of the Windows headers.
-extern "C"  void __stdcall Sleep(unsigned long);
-
-void sys::Sleep(unsigned n) {
-  ::Sleep(n*1000);
-}
diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc
deleted file mode 100644
index c9a89e5b8c49..000000000000
--- a/lib/System/Win32/DynamicLibrary.inc
+++ /dev/null
@@ -1,200 +0,0 @@
-//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 specific implementation of DynamicLibrary.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-
-#ifdef __MINGW32__
- #include <imagehlp.h>
-#else
- #include <dbghelp.h>
-#endif
-
-#ifdef _MSC_VER
- #include <ntverp.h>
-#endif
-
-#ifdef __MINGW32__
- #if (HAVE_LIBIMAGEHLP != 1)
-  #error "libimagehlp.a should be present"
- #endif
-#else
- #pragma comment(lib, "dbghelp.lib")
-#endif
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code 
-//===          and must not be UNIX code.
-//===----------------------------------------------------------------------===//
-
-static std::vector<HMODULE> OpenedHandles;
-
-#ifdef _WIN64
-  typedef DWORD64 ModuleBaseType;
-#else
-  typedef ULONG ModuleBaseType;
-#endif
-
-extern "C" {
-// Use old callback if:
-//  - Not using Visual Studio
-//  - Visual Studio 2005 or earlier but only if we are not using the Windows SDK 
-//    or Windows SDK version is older than 6.0
-// Use new callback if:
-//  - Newer Visual Studio (comes with newer SDK).
-//  - Visual Studio 2005 with Windows SDK 6.0+
-#if !defined(_MSC_VER) || _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
-  static BOOL CALLBACK ELM_Callback(PSTR  ModuleName,
-                                    ModuleBaseType ModuleBase,
-                                    ULONG ModuleSize,
-                                    PVOID UserContext)
-#else
-  static BOOL CALLBACK ELM_Callback(PCSTR  ModuleName,
-                                    ModuleBaseType ModuleBase,
-                                    ULONG ModuleSize,
-                                    PVOID UserContext)
-#endif
-  {
-    // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
-    // into the process.
-    if (stricmp(ModuleName, "msvci70") != 0 &&
-        stricmp(ModuleName, "msvcirt") != 0 &&
-        stricmp(ModuleName, "msvcp50") != 0 &&
-        stricmp(ModuleName, "msvcp60") != 0 &&
-        stricmp(ModuleName, "msvcp70") != 0 &&
-        stricmp(ModuleName, "msvcr70") != 0 &&
-#ifndef __MINGW32__
-        // Mingw32 uses msvcrt.dll by default. Don't ignore it.
-        // Otherwise, user should be aware, what he's doing :)
-        stricmp(ModuleName, "msvcrt") != 0 &&
-#endif
-        stricmp(ModuleName, "msvcrt20") != 0 &&
-        stricmp(ModuleName, "msvcrt40") != 0) {
-      OpenedHandles.push_back((HMODULE)ModuleBase);
-    }
-    return TRUE;
-  }
-}
-
-bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
-                                            std::string *ErrMsg) {                                            
-  if (filename) {
-    HMODULE a_handle = LoadLibrary(filename);
-
-    if (a_handle == 0)
-      return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
-
-    OpenedHandles.push_back(a_handle);
-  } else {
-    // When no file is specified, enumerate all DLLs and EXEs in the
-    // process.
-    EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
-  }
-
-  // Because we don't remember the handle, we will never free it; hence,
-  // it is loaded permanently.
-  return false;
-}
-
-// Stack probing routines are in the support library (e.g. libgcc), but we don't
-// have dynamic linking on windows. Provide a hook.
-#if defined(__MINGW32__) || defined (_MSC_VER)
-  #define EXPLICIT_SYMBOL(SYM)                    \
-    if (!strcmp(symbolName, #SYM)) return (void*)&SYM
-  #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO)        \
-    if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO
-  #define EXPLICIT_SYMBOL_DEF(SYM)                \
-    extern "C" { extern void *SYM; }
-
-  #if defined(__MINGW32__)
-    EXPLICIT_SYMBOL_DEF(_alloca)
-    EXPLICIT_SYMBOL_DEF(__main)
-    EXPLICIT_SYMBOL_DEF(__ashldi3)
-    EXPLICIT_SYMBOL_DEF(__ashrdi3)
-    EXPLICIT_SYMBOL_DEF(__cmpdi2)
-    EXPLICIT_SYMBOL_DEF(__divdi3)
-    EXPLICIT_SYMBOL_DEF(__fixdfdi)
-    EXPLICIT_SYMBOL_DEF(__fixsfdi)
-    EXPLICIT_SYMBOL_DEF(__fixunsdfdi)
-    EXPLICIT_SYMBOL_DEF(__fixunssfdi)
-    EXPLICIT_SYMBOL_DEF(__floatdidf)
-    EXPLICIT_SYMBOL_DEF(__floatdisf)
-    EXPLICIT_SYMBOL_DEF(__lshrdi3)
-    EXPLICIT_SYMBOL_DEF(__moddi3)
-    EXPLICIT_SYMBOL_DEF(__udivdi3)
-    EXPLICIT_SYMBOL_DEF(__umoddi3)
-  #elif defined(_MSC_VER)
-    EXPLICIT_SYMBOL_DEF(_alloca_probe)
-  #endif
-#endif
-
-void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
-  // First check symbols added via AddSymbol().
-  if (ExplicitSymbols) {
-    std::map<std::string, void *>::iterator I = 
-      ExplicitSymbols->find(symbolName);
-    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
-    if (I != E)
-      return I->second;
-  }
-
-  // Now search the libraries.
-  for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
-       E = OpenedHandles.end(); I != E; ++I) {
-    FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
-    if (ptr) {
-      return (void *) ptr;
-    }
-  }
-
-#if defined(__MINGW32__)
-  {
-    EXPLICIT_SYMBOL(_alloca);
-    EXPLICIT_SYMBOL(__main);
-    EXPLICIT_SYMBOL(__ashldi3);
-    EXPLICIT_SYMBOL(__ashrdi3);
-    EXPLICIT_SYMBOL(__cmpdi2);
-    EXPLICIT_SYMBOL(__divdi3);
-    EXPLICIT_SYMBOL(__fixdfdi);
-    EXPLICIT_SYMBOL(__fixsfdi);
-    EXPLICIT_SYMBOL(__fixunsdfdi);
-    EXPLICIT_SYMBOL(__fixunssfdi);
-    EXPLICIT_SYMBOL(__floatdidf);
-    EXPLICIT_SYMBOL(__floatdisf);
-    EXPLICIT_SYMBOL(__lshrdi3);
-    EXPLICIT_SYMBOL(__moddi3);
-    EXPLICIT_SYMBOL(__udivdi3);
-    EXPLICIT_SYMBOL(__umoddi3);
-
-    EXPLICIT_SYMBOL2(alloca, _alloca);
-#undef EXPLICIT_SYMBOL
-#undef EXPLICIT_SYMBOL2
-#undef EXPLICIT_SYMBOL_DEF
-  }
-#elif defined(_MSC_VER)
-  {
-    EXPLICIT_SYMBOL2(alloca, _alloca_probe);
-    EXPLICIT_SYMBOL2(_alloca, _alloca_probe);
-#undef EXPLICIT_SYMBOL
-#undef EXPLICIT_SYMBOL2
-#undef EXPLICIT_SYMBOL_DEF
-  }
-#endif
-
-  return 0;
-}
-
-}
-
diff --git a/lib/System/Win32/Host.inc b/lib/System/Win32/Host.inc
deleted file mode 100644
index 18f00f8bc07b..000000000000
--- a/lib/System/Win32/Host.inc
+++ /dev/null
@@ -1,23 +0,0 @@
-//===- llvm/System/Win32/Host.inc -------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 Host support.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include <cstdio>
-#include <string>
-
-using namespace llvm;
-
-std::string sys::getHostTriple() {
-  // FIXME: Adapt to running version.
-  return LLVM_HOSTTRIPLE;
-}
diff --git a/lib/System/Win32/Memory.inc b/lib/System/Win32/Memory.inc
deleted file mode 100644
index 19fccbddc26b..000000000000
--- a/lib/System/Win32/Memory.inc
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 specific implementation of various Memory
-// management utilities
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include "llvm/System/DataTypes.h"
-#include "llvm/System/Process.h"
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code 
-//===          and must not be UNIX code
-//===----------------------------------------------------------------------===//
-
-MemoryBlock Memory::AllocateRWX(size_t NumBytes,
-                                const MemoryBlock *NearBlock,
-                                std::string *ErrMsg) {
-  if (NumBytes == 0) return MemoryBlock();
-
-  static const size_t pageSize = Process::GetPageSize();
-  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
-
-  //FIXME: support NearBlock if ever needed on Win64.
-
-  void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
-                  PAGE_EXECUTE_READWRITE);
-  if (pa == NULL) {
-    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
-    return MemoryBlock();
-  }
-
-  MemoryBlock result;
-  result.Address = pa;
-  result.Size = NumPages*pageSize;
-  return result;
-}
-
-bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
-  if (M.Address == 0 || M.Size == 0) return false;
-  if (!VirtualFree(M.Address, 0, MEM_RELEASE))
-    return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
-  return false;
-}
-
-bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
-  return true;
-}
-
-bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
-  return false;
-}
-
-bool Memory::setRangeWritable(const void *Addr, size_t Size) {
-  return true;
-}
-
-bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
-  return false;
-}
-
-}
diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc
deleted file mode 100644
index 75f01fefacbb..000000000000
--- a/lib/System/Win32/Mutex.inc
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- llvm/System/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 specific (non-pthread) Mutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//===          is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include "llvm/System/Mutex.h"
-
-namespace llvm {
-using namespace sys;
-
-MutexImpl::MutexImpl(bool /*recursive*/)
-{
-  data_ = new CRITICAL_SECTION;
-  InitializeCriticalSection((LPCRITICAL_SECTION)data_);
-}
-
-MutexImpl::~MutexImpl()
-{
-  DeleteCriticalSection((LPCRITICAL_SECTION)data_);
-  delete (LPCRITICAL_SECTION)data_;
-  data_ = 0;
-}
-
-bool 
-MutexImpl::acquire()
-{
-  EnterCriticalSection((LPCRITICAL_SECTION)data_);
-  return true;
-}
-
-bool 
-MutexImpl::release()
-{
-  LeaveCriticalSection((LPCRITICAL_SECTION)data_);
-  return true;
-}
-
-bool 
-MutexImpl::tryacquire()
-{
-  return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
-}
-
-}
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
deleted file mode 100644
index 4a6dbd3ddf29..000000000000
--- a/lib/System/Win32/Path.inc
+++ /dev/null
@@ -1,872 +0,0 @@
-//===- llvm/System/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-// Modified by Henrik Bach to comply with at least MinGW.
-// Ported to Win32 by Jeff Cohen.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 specific implementation of the Path class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//===          is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include <malloc.h>
-#include <cstdio>
-
-// We need to undo a macro defined in Windows.h, otherwise we won't compile:
-#undef CopyFile
-#undef GetCurrentDirectory
-
-// Windows happily accepts either forward or backward slashes, though any path
-// returned by a Win32 API will have backward slashes.  As LLVM code basically
-// assumes forward slashes are used, backward slashs are converted where they
-// can be introduced into a path.
-//
-// Another invariant is that a path ends with a slash if and only if the path
-// is a root directory.  Any other use of a trailing slash is stripped.  Unlike
-// in Unix, Windows has a rather complicated notion of a root path and this
-// invariant helps simply the code.
-
-static void FlipBackSlashes(std::string& s) {
-  for (size_t i = 0; i < s.size(); i++)
-    if (s[i] == '\\')
-      s[i] = '/';
-}
-
-namespace llvm {
-namespace sys {
-const char PathSeparator = ';';
-
-Path::Path(llvm::StringRef p)
-  : path(p) {
-  FlipBackSlashes(path);
-}
-
-Path::Path(const char *StrStart, unsigned StrLen)
-  : path(StrStart, StrLen) {
-  FlipBackSlashes(path);
-}
-
-Path&
-Path::operator=(StringRef that) {
-  path.assign(that.data(), that.size());
-  FlipBackSlashes(path);
-  return *this;
-}
-
-bool
-Path::isValid() const {
-  if (path.empty())
-    return false;
-
-  // If there is a colon, it must be the second character, preceded by a letter
-  // and followed by something.
-  size_t len = path.size();
-  size_t pos = path.rfind(':',len);
-  size_t rootslash = 0;
-  if (pos != std::string::npos) {
-    if (pos != 1 || !isalpha(path[0]) || len < 3)
-      return false;
-      rootslash = 2;
-  }
-
-  // Look for a UNC path, and if found adjust our notion of the root slash.
-  if (len > 3 && path[0] == '/' && path[1] == '/') {
-    rootslash = path.find('/', 2);
-    if (rootslash == std::string::npos)
-      rootslash = 0;
-  }
-
-  // Check for illegal characters.
-  if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
-                         "\013\014\015\016\017\020\021\022\023\024\025\026"
-                         "\027\030\031\032\033\034\035\036\037")
-      != std::string::npos)
-    return false;
-
-  // Remove trailing slash, unless it's a root slash.
-  if (len > rootslash+1 && path[len-1] == '/')
-    path.erase(--len);
-
-  // Check each component for legality.
-  for (pos = 0; pos < len; ++pos) {
-    // A component may not end in a space.
-    if (path[pos] == ' ') {
-      if (path[pos+1] == '/' || path[pos+1] == '\0')
-        return false;
-    }
-
-    // A component may not end in a period.
-    if (path[pos] == '.') {
-      if (path[pos+1] == '/' || path[pos+1] == '\0') {
-        // Unless it is the pseudo-directory "."...
-        if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
-          return true;
-        // or "..".
-        if (pos > 0 && path[pos-1] == '.') {
-          if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
-            return true;
-        }
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-void Path::makeAbsolute() {
-  TCHAR  FullPath[MAX_PATH + 1] = {0};
-  LPTSTR FilePart = NULL;
-
-  DWORD RetLength = ::GetFullPathNameA(path.c_str(),
-                        sizeof(FullPath)/sizeof(FullPath[0]),
-                        FullPath, &FilePart);
-
-  if (0 == RetLength) {
-    // FIXME: Report the error GetLastError()
-    assert(0 && "Unable to make absolute path!");
-  } else if (RetLength > MAX_PATH) {
-    // FIXME: Report too small buffer (needed RetLength bytes).
-    assert(0 && "Unable to make absolute path!");
-  } else {
-    path = FullPath;
-  }
-}
-
-bool
-Path::isAbsolute(const char *NameStart, unsigned NameLen) {
-  assert(NameStart);
-  // FIXME: This does not handle correctly an absolute path starting from
-  // a drive letter or in UNC format.
-  switch (NameLen) {
-  case 0:
-    return false;
-  case 1:
-  case 2:
-    return NameStart[0] == '/';
-  default:
-    return (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
-           (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
-  }
-}
-
-bool
-Path::isAbsolute() const {
-  // FIXME: This does not handle correctly an absolute path starting from
-  // a drive letter or in UNC format.
-  switch (path.length()) {
-    case 0:
-      return false;
-    case 1:
-    case 2:
-      return path[0] == '/';
-    default:
-      return path[0] == '/' || (path[1] == ':' && path[2] == '/');
-  }
-}
-
-static Path *TempDirectory;
-
-Path
-Path::GetTemporaryDirectory(std::string* ErrMsg) {
-  if (TempDirectory)
-    return *TempDirectory;
-
-  char pathname[MAX_PATH];
-  if (!GetTempPath(MAX_PATH, pathname)) {
-    if (ErrMsg)
-      *ErrMsg = "Can't determine temporary directory";
-    return Path();
-  }
-
-  Path result;
-  result.set(pathname);
-
-  // Append a subdirectory passed on our process id so multiple LLVMs don't
-  // step on each other's toes.
-#ifdef __MINGW32__
-  // Mingw's Win32 header files are broken.
-  sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
-#else
-  sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
-#endif
-  result.appendComponent(pathname);
-
-  // If there's a directory left over from a previous LLVM execution that
-  // happened to have the same process id, get rid of it.
-  result.eraseFromDisk(true);
-
-  // And finally (re-)create the empty directory.
-  result.createDirectoryOnDisk(false);
-  TempDirectory = new Path(result);
-  return *TempDirectory;
-}
-
-// FIXME: the following set of functions don't map to Windows very well.
-Path
-Path::GetRootDirectory() {
-  Path result;
-  result.set("C:/");
-  return result;
-}
-
-void
-Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
-  Paths.push_back(sys::Path("C:/WINDOWS/SYSTEM32"));
-  Paths.push_back(sys::Path("C:/WINDOWS"));
-}
-
-void
-Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
-  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
-  if (env_var != 0) {
-    getPathList(env_var,Paths);
-  }
-#ifdef LLVM_LIBDIR
-  {
-    Path tmpPath;
-    if (tmpPath.set(LLVM_LIBDIR))
-      if (tmpPath.canRead())
-        Paths.push_back(tmpPath);
-  }
-#endif
-  GetSystemLibraryPaths(Paths);
-}
-
-Path
-Path::GetLLVMDefaultConfigDir() {
-  // TODO: this isn't going to fly on Windows
-  return Path("/etc/llvm");
-}
-
-Path
-Path::GetUserHomeDirectory() {
-  // TODO: Typical Windows setup doesn't define HOME.
-  const char* home = getenv("HOME");
-  if (home) {
-    Path result;
-    if (result.set(home))
-      return result;
-  }
-  return GetRootDirectory();
-}
-
-Path
-Path::GetCurrentDirectory() {
-  char pathname[MAX_PATH];
-  ::GetCurrentDirectoryA(MAX_PATH,pathname);
-  return Path(pathname);
-}
-
-/// GetMainExecutable - Return the path to the main executable, given the
-/// value of argv[0] from program startup.
-Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-  char pathname[MAX_PATH];
-  DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
-  return ret != MAX_PATH ? Path(pathname) : Path();
-}
-
-
-// FIXME: the above set of functions don't map to Windows very well.
-
-
-StringRef Path::getDirname() const {
-  return getDirnameCharSep(path, "/");
-}
-
-StringRef
-Path::getBasename() const {
-  // Find the last slash
-  size_t slash = path.rfind('/');
-  if (slash == std::string::npos)
-    slash = 0;
-  else
-    slash++;
-
-  size_t dot = path.rfind('.');
-  if (dot == std::string::npos || dot < slash)
-    return StringRef(path).substr(slash);
-  else
-    return StringRef(path).substr(slash, dot - slash);
-}
-
-StringRef
-Path::getSuffix() const {
-  // Find the last slash
-  size_t slash = path.rfind('/');
-  if (slash == std::string::npos)
-    slash = 0;
-  else
-    slash++;
-
-  size_t dot = path.rfind('.');
-  if (dot == std::string::npos || dot < slash)
-    return StringRef("");
-  else
-    return StringRef(path).substr(dot + 1);
-}
-
-bool
-Path::exists() const {
-  DWORD attr = GetFileAttributes(path.c_str());
-  return attr != INVALID_FILE_ATTRIBUTES;
-}
-
-bool
-Path::isDirectory() const {
-  DWORD attr = GetFileAttributes(path.c_str());
-  return (attr != INVALID_FILE_ATTRIBUTES) &&
-         (attr & FILE_ATTRIBUTE_DIRECTORY);
-}
-
-bool
-Path::canRead() const {
-  // FIXME: take security attributes into account.
-  DWORD attr = GetFileAttributes(path.c_str());
-  return attr != INVALID_FILE_ATTRIBUTES;
-}
-
-bool
-Path::canWrite() const {
-  // FIXME: take security attributes into account.
-  DWORD attr = GetFileAttributes(path.c_str());
-  return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
-}
-
-bool
-Path::canExecute() const {
-  // FIXME: take security attributes into account.
-  DWORD attr = GetFileAttributes(path.c_str());
-  return attr != INVALID_FILE_ATTRIBUTES;
-}
-
-bool
-Path::isRegularFile() const {
-  if (isDirectory())
-    return false;
-  return true;
-}
-
-StringRef
-Path::getLast() const {
-  // Find the last slash
-  size_t pos = path.rfind('/');
-
-  // Handle the corner cases
-  if (pos == std::string::npos)
-    return path;
-
-  // If the last character is a slash, we have a root directory
-  if (pos == path.length()-1)
-    return path;
-
-  // Return everything after the last slash
-  return StringRef(path).substr(pos+1);
-}
-
-const FileStatus *
-PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
-  if (!fsIsValid || update) {
-    WIN32_FILE_ATTRIBUTE_DATA fi;
-    if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
-      MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
-                      ": Can't get status: ");
-      return 0;
-    }
-
-    status.fileSize = fi.nFileSizeHigh;
-    status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
-    status.fileSize += fi.nFileSizeLow;
-
-    status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
-    status.user = 9999;    // Not applicable to Windows, so...
-    status.group = 9999;   // Not applicable to Windows, so...
-
-    // FIXME: this is only unique if the file is accessed by the same file path.
-    // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
-    // numbers, but the concept doesn't exist in Windows.
-    status.uniqueID = 0;
-    for (unsigned i = 0; i < path.length(); ++i)
-      status.uniqueID += path[i];
-
-    ULARGE_INTEGER ui;
-    ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
-    ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
-    status.modTime.fromWin32Time(ui.QuadPart);
-
-    status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
-    fsIsValid = true;
-  }
-  return &status;
-}
-
-bool Path::makeReadableOnDisk(std::string* ErrMsg) {
-  // All files are readable on Windows (ignoring security attributes).
-  return false;
-}
-
-bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
-  DWORD attr = GetFileAttributes(path.c_str());
-
-  // If it doesn't exist, we're done.
-  if (attr == INVALID_FILE_ATTRIBUTES)
-    return false;
-
-  if (attr & FILE_ATTRIBUTE_READONLY) {
-    if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
-      MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
-      return true;
-    }
-  }
-  return false;
-}
-
-bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
-  // All files are executable on Windows (ignoring security attributes).
-  return false;
-}
-
-bool
-Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
-  WIN32_FILE_ATTRIBUTE_DATA fi;
-  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
-    MakeErrMsg(ErrMsg, path + ": can't get status of file");
-    return true;
-  }
-
-  if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
-    if (ErrMsg)
-      *ErrMsg = path + ": not a directory";
-    return true;
-  }
-
-  result.clear();
-  WIN32_FIND_DATA fd;
-  std::string searchpath = path;
-  if (path.size() == 0 || searchpath[path.size()-1] == '/')
-    searchpath += "*";
-  else
-    searchpath += "/*";
-
-  HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
-  if (h == INVALID_HANDLE_VALUE) {
-    if (GetLastError() == ERROR_FILE_NOT_FOUND)
-      return true; // not really an error, now is it?
-    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
-    return true;
-  }
-
-  do {
-    if (fd.cFileName[0] == '.')
-      continue;
-    Path aPath(path);
-    aPath.appendComponent(&fd.cFileName[0]);
-    result.insert(aPath);
-  } while (FindNextFile(h, &fd));
-
-  DWORD err = GetLastError();
-  FindClose(h);
-  if (err != ERROR_NO_MORE_FILES) {
-    SetLastError(err);
-    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
-    return true;
-  }
-  return false;
-}
-
-bool
-Path::set(StringRef a_path) {
-  if (a_path.empty())
-    return false;
-  std::string save(path);
-  path = a_path;
-  FlipBackSlashes(path);
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::appendComponent(StringRef name) {
-  if (name.empty())
-    return false;
-  std::string save(path);
-  if (!path.empty()) {
-    size_t last = path.size() - 1;
-    if (path[last] != '/')
-      path += '/';
-  }
-  path += name;
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::eraseComponent() {
-  size_t slashpos = path.rfind('/',path.size());
-  if (slashpos == path.size() - 1 || slashpos == std::string::npos)
-    return false;
-  std::string save(path);
-  path.erase(slashpos);
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::appendSuffix(StringRef suffix) {
-  std::string save(path);
-  path.append(".");
-  path.append(suffix);
-  if (!isValid()) {
-    path = save;
-    return false;
-  }
-  return true;
-}
-
-bool
-Path::eraseSuffix() {
-  size_t dotpos = path.rfind('.',path.size());
-  size_t slashpos = path.rfind('/',path.size());
-  if (dotpos != std::string::npos) {
-    if (slashpos == std::string::npos || dotpos > slashpos+1) {
-      std::string save(path);
-      path.erase(dotpos, path.size()-dotpos);
-      if (!isValid()) {
-        path = save;
-        return false;
-      }
-      return true;
-    }
-  }
-  return false;
-}
-
-inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
-  if (ErrMsg)
-    *ErrMsg = std::string(pathname) + ": " + std::string(msg);
-  return true;
-}
-
-bool
-Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
-  // Get a writeable copy of the path name
-  size_t len = path.length();
-  char *pathname = reinterpret_cast<char *>(_alloca(len+2));
-  path.copy(pathname, len);
-  pathname[len] = 0;
-
-  // Make sure it ends with a slash.
-  if (len == 0 || pathname[len - 1] != '/') {
-    pathname[len] = '/';
-    pathname[++len] = 0;
-  }
-
-  // Determine starting point for initial / search.
-  char *next = pathname;
-  if (pathname[0] == '/' && pathname[1] == '/') {
-    // Skip host name.
-    next = strchr(pathname+2, '/');
-    if (next == NULL)
-      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
-
-    // Skip share name.
-    next = strchr(next+1, '/');
-    if (next == NULL)
-      return PathMsg(ErrMsg, pathname,"badly formed remote directory");
-
-    next++;
-    if (*next == 0)
-      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
-
-  } else {
-    if (pathname[1] == ':')
-      next += 2;    // skip drive letter
-    if (*next == '/')
-      next++;       // skip root directory
-  }
-
-  // If we're supposed to create intermediate directories
-  if (create_parents) {
-    // Loop through the directory components until we're done
-    while (*next) {
-      next = strchr(next, '/');
-      *next = 0;
-      if (!CreateDirectory(pathname, NULL) &&
-          GetLastError() != ERROR_ALREADY_EXISTS)
-          return MakeErrMsg(ErrMsg,
-            std::string(pathname) + ": Can't create directory: ");
-      *next++ = '/';
-    }
-  } else {
-    // Drop trailing slash.
-    pathname[len-1] = 0;
-    if (!CreateDirectory(pathname, NULL) &&
-        GetLastError() != ERROR_ALREADY_EXISTS) {
-      return MakeErrMsg(ErrMsg, std::string(pathname) + ": Can't create directory: ");
-    }
-  }
-  return false;
-}
-
-bool
-Path::createFileOnDisk(std::string* ErrMsg) {
-  // Create the file
-  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
-                        FILE_ATTRIBUTE_NORMAL, NULL);
-  if (h == INVALID_HANDLE_VALUE)
-    return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
-
-  CloseHandle(h);
-  return false;
-}
-
-bool
-Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
-  WIN32_FILE_ATTRIBUTE_DATA fi;
-  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
-    return true;
-
-  if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
-    // If it doesn't exist, we're done.
-    if (!exists())
-      return false;
-
-    char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
-    int lastchar = path.length() - 1 ;
-    path.copy(pathname, lastchar+1);
-
-    // Make path end with '/*'.
-    if (pathname[lastchar] != '/')
-      pathname[++lastchar] = '/';
-    pathname[lastchar+1] = '*';
-    pathname[lastchar+2] = 0;
-
-    if (remove_contents) {
-      WIN32_FIND_DATA fd;
-      HANDLE h = FindFirstFile(pathname, &fd);
-
-      // It's a bad idea to alter the contents of a directory while enumerating
-      // its contents. So build a list of its contents first, then destroy them.
-
-      if (h != INVALID_HANDLE_VALUE) {
-        std::vector<Path> list;
-
-        do {
-          if (strcmp(fd.cFileName, ".") == 0)
-            continue;
-          if (strcmp(fd.cFileName, "..") == 0)
-            continue;
-
-          Path aPath(path);
-          aPath.appendComponent(&fd.cFileName[0]);
-          list.push_back(aPath);
-        } while (FindNextFile(h, &fd));
-
-        DWORD err = GetLastError();
-        FindClose(h);
-        if (err != ERROR_NO_MORE_FILES) {
-          SetLastError(err);
-          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
-        }
-
-        for (std::vector<Path>::iterator I = list.begin(); I != list.end();
-             ++I) {
-          Path &aPath = *I;
-          aPath.eraseFromDisk(true);
-        }
-      } else {
-        if (GetLastError() != ERROR_FILE_NOT_FOUND)
-          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
-      }
-    }
-
-    pathname[lastchar] = 0;
-    if (!RemoveDirectory(pathname))
-      return MakeErrMsg(ErrStr,
-        std::string(pathname) + ": Can't destroy directory: ");
-    return false;
-  } else {
-    // Read-only files cannot be deleted on Windows.  Must remove the read-only
-    // attribute first.
-    if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
-      if (!SetFileAttributes(path.c_str(),
-                             fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
-        return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
-    }
-
-    if (!DeleteFile(path.c_str()))
-      return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
-    return false;
-  }
-}
-
-bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
-  assert(len < 1024 && "Request for magic string too long");
-  char* buf = reinterpret_cast<char*>(alloca(len));
-
-  HANDLE h = CreateFile(path.c_str(),
-                        GENERIC_READ,
-                        FILE_SHARE_READ,
-                        NULL,
-                        OPEN_EXISTING,
-                        FILE_ATTRIBUTE_NORMAL,
-                        NULL);
-  if (h == INVALID_HANDLE_VALUE)
-    return false;
-
-  DWORD nRead = 0;
-  BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
-  CloseHandle(h);
-
-  if (!ret || nRead != len)
-    return false;
-
-  Magic = std::string(buf, len);
-  return true;
-}
-
-bool
-Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
-  if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
-    return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
-        + "': ");
-  return false;
-}
-
-bool
-Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
-  // FIXME: should work on directories also.
-  if (!si.isFile) {
-    return true;
-  }
-
-  HANDLE h = CreateFile(path.c_str(),
-                        FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
-                        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
-                        NULL,
-                        OPEN_EXISTING,
-                        FILE_ATTRIBUTE_NORMAL,
-                        NULL);
-  if (h == INVALID_HANDLE_VALUE)
-    return true;
-
-  BY_HANDLE_FILE_INFORMATION bhfi;
-  if (!GetFileInformationByHandle(h, &bhfi)) {
-    DWORD err = GetLastError();
-    CloseHandle(h);
-    SetLastError(err);
-    return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
-  }
-
-  ULARGE_INTEGER ui;
-  ui.QuadPart = si.modTime.toWin32Time();
-  FILETIME ft;
-  ft.dwLowDateTime = ui.LowPart;
-  ft.dwHighDateTime = ui.HighPart;
-  BOOL ret = SetFileTime(h, NULL, &ft, &ft);
-  DWORD err = GetLastError();
-  CloseHandle(h);
-  if (!ret) {
-    SetLastError(err);
-    return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
-  }
-
-  // Best we can do with Unix permission bits is to interpret the owner
-  // writable bit.
-  if (si.mode & 0200) {
-    if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
-      if (!SetFileAttributes(path.c_str(),
-              bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
-        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
-    }
-  } else {
-    if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
-      if (!SetFileAttributes(path.c_str(),
-              bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
-        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
-    }
-  }
-
-  return false;
-}
-
-bool
-CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
-  // Can't use CopyFile macro defined in Windows.h because it would mess up the
-  // above line.  We use the expansion it would have in a non-UNICODE build.
-  if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
-    return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
-               "' to '" + Dest.str() + "': ");
-  return false;
-}
-
-bool
-Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
-  if (reuse_current && !exists())
-    return false; // File doesn't exist already, just use it!
-
-  // Reserve space for -XXXXXX at the end.
-  char *FNBuffer = (char*) alloca(path.size()+8);
-  unsigned offset = path.size();
-  path.copy(FNBuffer, offset);
-
-  // Find a numeric suffix that isn't used by an existing file.  Assume there
-  // won't be more than 1 million files with the same prefix.  Probably a safe
-  // bet.
-  static unsigned FCounter = 0;
-  do {
-    sprintf(FNBuffer+offset, "-%06u", FCounter);
-    if (++FCounter > 999999)
-      FCounter = 0;
-    path = FNBuffer;
-  } while (exists());
-  return false;
-}
-
-bool
-Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
-  // Make this into a unique file name
-  makeUnique(reuse_current, ErrMsg);
-
-  // Now go and create it
-  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
-                        FILE_ATTRIBUTE_NORMAL, NULL);
-  if (h == INVALID_HANDLE_VALUE)
-    return MakeErrMsg(ErrMsg, path + ": can't create file");
-
-  CloseHandle(h);
-  return false;
-}
-
-/// MapInFilePages - Not yet implemented on win32.
-const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
-  return 0;
-}
-
-/// MapInFilePages - Not yet implemented on win32.
-void Path::UnMapFilePages(const char *Base, uint64_t FileSize) {
-  assert(0 && "NOT IMPLEMENTED");
-}
-
-}
-}
diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc
deleted file mode 100644
index feb0806116e4..000000000000
--- a/lib/System/Win32/Process.inc
+++ /dev/null
@@ -1,221 +0,0 @@
-//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 specific implementation of the Process class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include <psapi.h>
-#include <malloc.h>
-#include <io.h>
-
-#ifdef __MINGW32__
- #if (HAVE_LIBPSAPI != 1)
-  #error "libpsapi.a should be present"
- #endif
-#else
- #pragma comment(lib, "psapi.lib")
-#endif
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code 
-//===          and must not be UNIX code
-//===----------------------------------------------------------------------===//
-
-#ifdef __MINGW32__
-// This ban should be lifted when MinGW 1.0+ has defined this value.
-#  define _HEAPOK (-2)
-#endif
-
-namespace llvm {
-using namespace sys;
-
-// This function retrieves the page size using GetSystemInfo and is present
-// solely so it can be called once in Process::GetPageSize to initialize the
-// static variable PageSize.
-inline unsigned GetPageSizeOnce() {
-  // NOTE: A 32-bit application running under WOW64 is supposed to use
-  // GetNativeSystemInfo.  However, this interface is not present prior
-  // to Windows XP so to use it requires dynamic linking.  It is not clear
-  // how this affects the reported page size, if at all.  One could argue
-  // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
-  SYSTEM_INFO info;
-  GetSystemInfo(&info);
-  return static_cast<unsigned>(info.dwPageSize);
-}
-
-unsigned 
-Process::GetPageSize() {
-  static const unsigned PageSize = GetPageSizeOnce();
-  return PageSize;
-}
-
-size_t 
-Process::GetMallocUsage()
-{
-  _HEAPINFO hinfo;
-  hinfo._pentry = NULL;
-
-  size_t size = 0;
-
-  while (_heapwalk(&hinfo) == _HEAPOK)
-    size += hinfo._size;
-
-  return size;
-}
-
-size_t
-Process::GetTotalMemoryUsage()
-{
-  PROCESS_MEMORY_COUNTERS pmc;
-  GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
-  return pmc.PagefileUsage;
-}
-
-void
-Process::GetTimeUsage(
-  TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
-{
-  elapsed = TimeValue::now();
-
-  uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
-  GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate, 
-                  (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
-                  (FILETIME*)&UserTime);
-
-  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
-  user_time.seconds( UserTime / 10000000 );
-  user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
-  sys_time.seconds( KernelTime / 10000000 );
-  sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
-}
-
-int Process::GetCurrentUserId()
-{
-  return 65536;
-}
-
-int Process::GetCurrentGroupId()
-{
-  return 65536;
-}
-
-// Some LLVM programs such as bugpoint produce core files as a normal part of
-// their operation. To prevent the disk from filling up, this configuration item
-// does what's necessary to prevent their generation.
-void Process::PreventCoreFiles() {
-  // Windows doesn't do core files, but it does do modal pop-up message
-  // boxes.  As this method is used by bugpoint, preventing these pop-ups
-  // is the moral equivalent of suppressing core files.
-  SetErrorMode(SEM_FAILCRITICALERRORS |
-               SEM_NOGPFAULTERRORBOX |
-               SEM_NOOPENFILEERRORBOX);
-}
-
-bool Process::StandardInIsUserInput() {
-  return FileDescriptorIsDisplayed(0);
-}
-
-bool Process::StandardOutIsDisplayed() {
-  return FileDescriptorIsDisplayed(1);
-}
-
-bool Process::StandardErrIsDisplayed() {
-  return FileDescriptorIsDisplayed(2);
-}
-
-bool Process::FileDescriptorIsDisplayed(int fd) {
-  return GetFileType((HANDLE)_get_osfhandle(fd)) == FILE_TYPE_CHAR;
-}
-
-unsigned Process::StandardOutColumns() {
-  unsigned Columns = 0;
-  CONSOLE_SCREEN_BUFFER_INFO csbi;
-  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
-    Columns = csbi.dwSize.X;
-  return Columns;
-}
-
-unsigned Process::StandardErrColumns() {
-  unsigned Columns = 0;
-  CONSOLE_SCREEN_BUFFER_INFO csbi;
-  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
-    Columns = csbi.dwSize.X;
-  return Columns;
-}
-
-// It always has colors.
-bool Process::StandardErrHasColors() {
-  return StandardErrIsDisplayed();
-}
-
-bool Process::StandardOutHasColors() {
-  return StandardOutIsDisplayed();
-}
-
-namespace {
-class DefaultColors
-{
-  private:
-    WORD defaultColor;
-  public:
-    DefaultColors()
-     :defaultColor(GetCurrentColor()) {}
-    static unsigned GetCurrentColor() {
-      CONSOLE_SCREEN_BUFFER_INFO csbi;
-      if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
-        return csbi.wAttributes;
-      return 0;
-    }
-    WORD operator()() const { return defaultColor; }
-};
-
-DefaultColors defaultColors;
-}
-
-bool Process::ColorNeedsFlush() {
-  return true;
-}
-
-const char *Process::OutputBold(bool bg) {
-  WORD colors = DefaultColors::GetCurrentColor();
-  if (bg)
-    colors |= BACKGROUND_INTENSITY;
-  else
-    colors |= FOREGROUND_INTENSITY;
-  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
-  return 0;
-}
-
-const char *Process::OutputColor(char code, bool bold, bool bg) {
-  WORD colors;
-  if (bg) {
-    colors = ((code&1) ? BACKGROUND_RED : 0) |
-      ((code&2) ? BACKGROUND_GREEN : 0 ) |
-      ((code&4) ? BACKGROUND_BLUE : 0);
-    if (bold)
-      colors |= BACKGROUND_INTENSITY;
-  } else {
-    colors = ((code&1) ? FOREGROUND_RED : 0) |
-      ((code&2) ? FOREGROUND_GREEN : 0 ) |
-      ((code&4) ? FOREGROUND_BLUE : 0);
-    if (bold)
-      colors |= FOREGROUND_INTENSITY;
-  }
-  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
-  return 0;
-}
-
-const char *Process::ResetColor() {
-  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
-  return 0;
-}
-
-}
diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc
deleted file mode 100644
index 16bb28e17a21..000000000000
--- a/lib/System/Win32/Program.inc
+++ /dev/null
@@ -1,409 +0,0 @@
-//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 specific implementation of the Program class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include <cstdio>
-#include <malloc.h>
-#include <io.h>
-#include <fcntl.h>
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code
-//===          and must not be UNIX code
-//===----------------------------------------------------------------------===//
-
-namespace {
-  struct Win32ProcessInfo {
-    HANDLE hProcess;
-    DWORD  dwProcessId;
-  };
-}
-
-namespace llvm {
-using namespace sys;
-
-Program::Program() : Data_(0) {}
-
-Program::~Program() {
-  if (Data_) {
-    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-    CloseHandle(wpi->hProcess);
-    delete wpi;
-    Data_ = 0;
-  }
-}
-
-unsigned Program::GetPid() const {
-  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-  return wpi->dwProcessId;
-}
-
-// This function just uses the PATH environment variable to find the program.
-Path
-Program::FindProgramByName(const std::string& progName) {
-
-  // Check some degenerate cases
-  if (progName.length() == 0) // no program
-    return Path();
-  Path temp;
-  if (!temp.set(progName)) // invalid name
-    return Path();
-  if (temp.canExecute()) // already executable as is
-    return temp;
-
-  // At this point, the file name is valid and its not executable.
-  // Let Windows search for it.
-  char buffer[MAX_PATH];
-  char *dummy = NULL;
-  DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
-                         buffer, &dummy);
-
-  // See if it wasn't found.
-  if (len == 0)
-    return Path();
-
-  // See if we got the entire path.
-  if (len < MAX_PATH)
-    return Path(buffer);
-
-  // Buffer was too small; grow and retry.
-  while (true) {
-    char *b = reinterpret_cast<char *>(_alloca(len+1));
-    DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
-
-    // It is unlikely the search failed, but it's always possible some file
-    // was added or removed since the last search, so be paranoid...
-    if (len2 == 0)
-      return Path();
-    else if (len2 <= len)
-      return Path(b);
-
-    len = len2;
-  }
-}
-
-static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
-  HANDLE h;
-  if (path == 0) {
-    DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
-                    GetCurrentProcess(), &h,
-                    0, TRUE, DUPLICATE_SAME_ACCESS);
-    return h;
-  }
-
-  const char *fname;
-  if (path->isEmpty())
-    fname = "NUL";
-  else
-    fname = path->c_str();
-
-  SECURITY_ATTRIBUTES sa;
-  sa.nLength = sizeof(sa);
-  sa.lpSecurityDescriptor = 0;
-  sa.bInheritHandle = TRUE;
-
-  h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
-                 &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
-                 FILE_ATTRIBUTE_NORMAL, NULL);
-  if (h == INVALID_HANDLE_VALUE) {
-    MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
-        (fd ? "input: " : "output: "));
-  }
-
-  return h;
-}
-
-#ifdef __MINGW32__
-  // Due to unknown reason, mingw32's w32api doesn't have this declaration.
-  extern "C"
-  BOOL WINAPI SetInformationJobObject(HANDLE hJob,
-                                      JOBOBJECTINFOCLASS JobObjectInfoClass,
-                                      LPVOID lpJobObjectInfo,
-                                      DWORD cbJobObjectInfoLength);
-#endif
-
-/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
-/// CreateProcess.
-static bool ArgNeedsQuotes(const char *Str) {
-  return Str[0] == '\0' || strchr(Str, ' ') != 0;
-}
-
-
-/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
-/// CreateProcess and returns length of quoted arg with escaped quotes
-static unsigned int ArgLenWithQuotes(const char *Str) {
-  unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
-
-  while (*Str != '\0') {
-    if (*Str == '\"')
-      ++len;
-
-    ++len;
-    ++Str;
-  }
-
-  return len;
-}
-
-
-bool
-Program::Execute(const Path& path,
-                 const char** args,
-                 const char** envp,
-                 const Path** redirects,
-                 unsigned memoryLimit,
-                 std::string* ErrMsg) {
-  if (Data_) {
-    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-    CloseHandle(wpi->hProcess);
-    delete wpi;
-    Data_ = 0;
-  }
-
-  if (!path.canExecute()) {
-    if (ErrMsg)
-      *ErrMsg = "program not executable";
-    return false;
-  }
-
-  // Windows wants a command line, not an array of args, to pass to the new
-  // process.  We have to concatenate them all, while quoting the args that
-  // have embedded spaces (or are empty).
-
-  // First, determine the length of the command line.
-  unsigned len = 0;
-  for (unsigned i = 0; args[i]; i++) {
-    len += ArgLenWithQuotes(args[i]) + 1;
-  }
-
-  // Now build the command line.
-  char *command = reinterpret_cast<char *>(_alloca(len+1));
-  char *p = command;
-
-  for (unsigned i = 0; args[i]; i++) {
-    const char *arg = args[i];
-
-    bool needsQuoting = ArgNeedsQuotes(arg);
-    if (needsQuoting)
-      *p++ = '"';
-
-    while (*arg != '\0') {
-      if (*arg == '\"')
-        *p++ = '\\';
-
-      *p++ = *arg++;
-    }
-
-    if (needsQuoting)
-      *p++ = '"';
-    *p++ = ' ';
-  }
-
-  *p = 0;
-
-  // The pointer to the environment block for the new process.
-  char *envblock = 0;
-
-  if (envp) {
-    // An environment block consists of a null-terminated block of
-    // null-terminated strings. Convert the array of environment variables to
-    // an environment block by concatenating them.
-
-    // First, determine the length of the environment block.
-    len = 0;
-    for (unsigned i = 0; envp[i]; i++)
-      len += strlen(envp[i]) + 1;
-
-    // Now build the environment block.
-    envblock = reinterpret_cast<char *>(_alloca(len+1));
-    p = envblock;
-
-    for (unsigned i = 0; envp[i]; i++) {
-      const char *ev = envp[i];
-      size_t len = strlen(ev) + 1;
-      memcpy(p, ev, len);
-      p += len;
-    }
-
-    *p = 0;
-  }
-
-  // Create a child process.
-  STARTUPINFO si;
-  memset(&si, 0, sizeof(si));
-  si.cb = sizeof(si);
-  si.hStdInput = INVALID_HANDLE_VALUE;
-  si.hStdOutput = INVALID_HANDLE_VALUE;
-  si.hStdError = INVALID_HANDLE_VALUE;
-
-  if (redirects) {
-    si.dwFlags = STARTF_USESTDHANDLES;
-
-    si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
-    if (si.hStdInput == INVALID_HANDLE_VALUE) {
-      MakeErrMsg(ErrMsg, "can't redirect stdin");
-      return false;
-    }
-    si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
-    if (si.hStdOutput == INVALID_HANDLE_VALUE) {
-      CloseHandle(si.hStdInput);
-      MakeErrMsg(ErrMsg, "can't redirect stdout");
-      return false;
-    }
-    if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
-      // If stdout and stderr should go to the same place, redirect stderr
-      // to the handle already open for stdout.
-      DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
-                      GetCurrentProcess(), &si.hStdError,
-                      0, TRUE, DUPLICATE_SAME_ACCESS);
-    } else {
-      // Just redirect stderr
-      si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
-      if (si.hStdError == INVALID_HANDLE_VALUE) {
-        CloseHandle(si.hStdInput);
-        CloseHandle(si.hStdOutput);
-        MakeErrMsg(ErrMsg, "can't redirect stderr");
-        return false;
-      }
-    }
-  }
-
-  PROCESS_INFORMATION pi;
-  memset(&pi, 0, sizeof(pi));
-
-  fflush(stdout);
-  fflush(stderr);
-  BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
-                          envblock, NULL, &si, &pi);
-  DWORD err = GetLastError();
-
-  // Regardless of whether the process got created or not, we are done with
-  // the handles we created for it to inherit.
-  CloseHandle(si.hStdInput);
-  CloseHandle(si.hStdOutput);
-  CloseHandle(si.hStdError);
-
-  // Now return an error if the process didn't get created.
-  if (!rc) {
-    SetLastError(err);
-    MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
-               path.str() + "'");
-    return false;
-  }
-  Win32ProcessInfo* wpi = new Win32ProcessInfo;
-  wpi->hProcess = pi.hProcess;
-  wpi->dwProcessId = pi.dwProcessId;
-  Data_ = wpi;
-
-  // Make sure these get closed no matter what.
-  AutoHandle hThread(pi.hThread);
-
-  // Assign the process to a job if a memory limit is defined.
-  AutoHandle hJob(0);
-  if (memoryLimit != 0) {
-    hJob = CreateJobObject(0, 0);
-    bool success = false;
-    if (hJob != 0) {
-      JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
-      memset(&jeli, 0, sizeof(jeli));
-      jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
-      jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
-      if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
-                                  &jeli, sizeof(jeli))) {
-        if (AssignProcessToJobObject(hJob, pi.hProcess))
-          success = true;
-      }
-    }
-    if (!success) {
-      SetLastError(GetLastError());
-      MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
-      TerminateProcess(pi.hProcess, 1);
-      WaitForSingleObject(pi.hProcess, INFINITE);
-      return false;
-    }
-  }
-
-  return true;
-}
-
-int
-Program::Wait(unsigned secondsToWait,
-              std::string* ErrMsg) {
-  if (Data_ == 0) {
-    MakeErrMsg(ErrMsg, "Process not started!");
-    return -1;
-  }
-
-  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-  HANDLE hProcess = wpi->hProcess;
-
-  // Wait for the process to terminate.
-  DWORD millisecondsToWait = INFINITE;
-  if (secondsToWait > 0)
-    millisecondsToWait = secondsToWait * 1000;
-
-  if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
-    if (!TerminateProcess(hProcess, 1)) {
-      MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
-      return -1;
-    }
-    WaitForSingleObject(hProcess, INFINITE);
-  }
-
-  // Get its exit status.
-  DWORD status;
-  BOOL rc = GetExitCodeProcess(hProcess, &status);
-  DWORD err = GetLastError();
-
-  if (!rc) {
-    SetLastError(err);
-    MakeErrMsg(ErrMsg, "Failed getting status for program.");
-    return -1;
-  }
-
-  return status;
-}
-
-bool
-Program::Kill(std::string* ErrMsg) {
-  if (Data_ == 0) {
-    MakeErrMsg(ErrMsg, "Process not started!");
-    return true;
-  }
-
-  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-  HANDLE hProcess = wpi->hProcess;
-  if (TerminateProcess(hProcess, 1) == 0) {
-    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
-    return true;
-  }
-
-  return false;
-}
-
-bool Program::ChangeStdinToBinary(){
-  int result = _setmode( _fileno(stdin), _O_BINARY );
-  return result == -1;
-}
-
-bool Program::ChangeStdoutToBinary(){
-  int result = _setmode( _fileno(stdout), _O_BINARY );
-  return result == -1;
-}
-
-bool Program::ChangeStderrToBinary(){
-  int result = _setmode( _fileno(stderr), _O_BINARY );
-  return result == -1;
-}
-
-}
diff --git a/lib/System/Win32/RWMutex.inc b/lib/System/Win32/RWMutex.inc
deleted file mode 100644
index e2692269e3a0..000000000000
--- a/lib/System/Win32/RWMutex.inc
+++ /dev/null
@@ -1,58 +0,0 @@
-//= llvm/System/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 specific (non-pthread) RWMutex class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//===          is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-
-// FIXME: Windows does not have reader-writer locks pre-Vista.  If you want
-// real reader-writer locks, you a pthreads implementation for Windows.
-
-namespace llvm {
-using namespace sys;
-
-RWMutexImpl::RWMutexImpl() {
-  data_ = calloc(1, sizeof(CRITICAL_SECTION));
-  InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
-}
-
-RWMutexImpl::~RWMutexImpl() {
-  DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
-  free(data_);
-}
-
-bool RWMutexImpl::reader_acquire() {
-  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
-  return true;
-}
-
-bool RWMutexImpl::reader_release() {
-  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
-  return true;
-}
-
-bool RWMutexImpl::writer_acquire() {
-  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
-  return true;
-}
-
-bool RWMutexImpl::writer_release() {
-  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
-  return true;
-}
-
-
-}
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
deleted file mode 100644
index 2498a26ea99c..000000000000
--- a/lib/System/Win32/Signals.inc
+++ /dev/null
@@ -1,332 +0,0 @@
-//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 specific implementation of the Signals class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include <stdio.h>
-#include <vector>
-#include <algorithm>
-
-#ifdef __MINGW32__
- #include <imagehlp.h>
-#else
- #include <dbghelp.h>
-#endif
-#include <psapi.h>
-
-#ifdef __MINGW32__
- #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
-  #error "libimagehlp.a & libpsapi.a should be present"
- #endif
-#else
- #pragma comment(lib, "psapi.lib")
- #pragma comment(lib, "dbghelp.lib")
-#endif
-
-// Forward declare.
-static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
-static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
-
-// InterruptFunction - The function to call if ctrl-c is pressed.
-static void (*InterruptFunction)() = 0;
-
-static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
-static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
-static bool RegisteredUnhandledExceptionFilter = false;
-static bool CleanupExecuted = false;
-#ifdef _MSC_VER
-static bool ExitOnUnhandledExceptions = false;
-#endif
-static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
-
-// Windows creates a new thread to execute the console handler when an event
-// (such as CTRL/C) occurs.  This causes concurrency issues with the above
-// globals which this critical section addresses.
-static CRITICAL_SECTION CriticalSection;
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code 
-//===          and must not be UNIX code
-//===----------------------------------------------------------------------===//
-
-#ifdef _MSC_VER
-/// CRTReportHook - Function called on a CRT debugging event.
-static int CRTReportHook(int ReportType, char *Message, int *Return) {
-  // Don't cause a DebugBreak() on return.
-  if (Return)
-    *Return = 0;
-
-  switch (ReportType) {
-  default:
-  case _CRT_ASSERT:
-    fprintf(stderr, "CRT assert: %s\n", Message);
-    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
-    // exception code? Perhaps SetErrorMode() handles this.
-    _exit(3);
-    break;
-  case _CRT_ERROR:
-    fprintf(stderr, "CRT error: %s\n", Message);
-    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
-    // exception code? Perhaps SetErrorMode() handles this.
-    _exit(3);
-    break;
-  case _CRT_WARN:
-    fprintf(stderr, "CRT warn: %s\n", Message);
-    break;
-  }
-
-  // Don't call _CrtDbgReport.
-  return TRUE;
-}
-#endif
-
-static void RegisterHandler() {
-  if (RegisteredUnhandledExceptionFilter) {
-    EnterCriticalSection(&CriticalSection);
-    return;
-  }
-
-  // Now's the time to create the critical section.  This is the first time
-  // through here, and there's only one thread.
-  InitializeCriticalSection(&CriticalSection);
-
-  // Enter it immediately.  Now if someone hits CTRL/C, the console handler
-  // can't proceed until the globals are updated.
-  EnterCriticalSection(&CriticalSection);
-
-  RegisteredUnhandledExceptionFilter = true;
-  OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
-  SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
-
-  // Environment variable to disable any kind of crash dialog.
-#ifdef _MSC_VER
-  if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
-    _CrtSetReportHook(CRTReportHook);
-    ExitOnUnhandledExceptions = true;
-  }
-#endif
-
-  // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
-  // else multi-threading problems will ensue.
-}
-
-// RemoveFileOnSignal - The public API
-bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
-  RegisterHandler();
-
-  if (CleanupExecuted) {
-    if (ErrMsg)
-      *ErrMsg = "Process terminating -- cannot register for removal";
-    return true;
-  }
-
-  if (FilesToRemove == NULL)
-    FilesToRemove = new std::vector<sys::Path>;
-
-  FilesToRemove->push_back(Filename);
-
-  LeaveCriticalSection(&CriticalSection);
-  return false;
-}
-
-// DontRemoveFileOnSignal - The public API
-void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
-  if (FilesToRemove == NULL)
-    return;
-
-  FilesToRemove->push_back(Filename);
-  std::vector<sys::Path>::reverse_iterator I =
-  std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
-  if (I != FilesToRemove->rend())
-    FilesToRemove->erase(I.base()-1);
-
-  LeaveCriticalSection(&CriticalSection);
-}
-
-/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
-/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void sys::PrintStackTraceOnErrorSignal() {
-  RegisterHandler();
-  LeaveCriticalSection(&CriticalSection);
-}
-
-
-void sys::SetInterruptFunction(void (*IF)()) {
-  RegisterHandler();
-  InterruptFunction = IF;
-  LeaveCriticalSection(&CriticalSection);
-}
-
-
-/// AddSignalHandler - Add a function to be called when a signal is delivered
-/// to the process.  The handler can have a cookie passed to it to identify
-/// what instance of the handler it is.
-void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
-  if (CallBacksToRun == 0)
-    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
-  CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
-  RegisterHandler();
-  LeaveCriticalSection(&CriticalSection);
-}
-}
-
-static void Cleanup() {
-  EnterCriticalSection(&CriticalSection);
-
-  // Prevent other thread from registering new files and directories for
-  // removal, should we be executing because of the console handler callback.
-  CleanupExecuted = true;
-
-  // FIXME: open files cannot be deleted.
-
-  if (FilesToRemove != NULL)
-    while (!FilesToRemove->empty()) {
-      FilesToRemove->back().eraseFromDisk();
-      FilesToRemove->pop_back();
-    }
-
-  if (CallBacksToRun)
-    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
-      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
-
-  LeaveCriticalSection(&CriticalSection);
-}
-
-void llvm::sys::RunInterruptHandlers() {
-  Cleanup();
-}
-
-static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
-  try {
-    Cleanup();
-    
-#ifdef _WIN64
-  // TODO: provide a x64 friendly version of the following
-#else
-    
-    // Initialize the STACKFRAME structure.
-    STACKFRAME StackFrame;
-    memset(&StackFrame, 0, sizeof(StackFrame));
-
-    StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
-    StackFrame.AddrPC.Mode = AddrModeFlat;
-    StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
-    StackFrame.AddrStack.Mode = AddrModeFlat;
-    StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
-    StackFrame.AddrFrame.Mode = AddrModeFlat;
-
-    HANDLE hProcess = GetCurrentProcess();
-    HANDLE hThread = GetCurrentThread();
-
-    // Initialize the symbol handler.
-    SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
-    SymInitialize(hProcess, NULL, TRUE);
-
-    while (true) {
-      if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
-                     ep->ContextRecord, NULL, SymFunctionTableAccess,
-                     SymGetModuleBase, NULL)) {
-        break;
-      }
-
-      if (StackFrame.AddrFrame.Offset == 0)
-        break;
-
-      // Print the PC in hexadecimal.
-      DWORD PC = StackFrame.AddrPC.Offset;
-      fprintf(stderr, "%08lX", PC);
-
-      // Print the parameters.  Assume there are four.
-      fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", StackFrame.Params[0],
-              StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
-
-      // Verify the PC belongs to a module in this process.
-      if (!SymGetModuleBase(hProcess, PC)) {
-        fputs(" <unknown module>\n", stderr);
-        continue;
-      }
-
-      // Print the symbol name.
-      char buffer[512];
-      IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
-      memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
-      symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
-      symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
-
-      DWORD dwDisp;
-      if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
-        fputc('\n', stderr);
-        continue;
-      }
-
-      buffer[511] = 0;
-      if (dwDisp > 0)
-        fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
-      else
-        fprintf(stderr, ", %s", symbol->Name);
-
-      // Print the source file and line number information.
-      IMAGEHLP_LINE line;
-      memset(&line, 0, sizeof(line));
-      line.SizeOfStruct = sizeof(line);
-      if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
-        fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
-        if (dwDisp > 0)
-          fprintf(stderr, "+%04lu byte(s)", dwDisp);
-      }
-
-      fputc('\n', stderr);
-    }
-
-#endif
-
-  } catch (...) {
-      assert(0 && "Crashed in LLVMUnhandledExceptionFilter");
-  }
-
-#ifdef _MSC_VER
-  if (ExitOnUnhandledExceptions)
-    _exit(-3);
-#endif
-
-  // Allow dialog box to pop up allowing choice to start debugger.
-  if (OldFilter)
-    return (*OldFilter)(ep);
-  else
-    return EXCEPTION_CONTINUE_SEARCH;
-}
-
-static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
-  // We are running in our very own thread, courtesy of Windows.
-  EnterCriticalSection(&CriticalSection);
-  Cleanup();
-
-  // If an interrupt function has been set, go and run one it; otherwise,
-  // the process dies.
-  void (*IF)() = InterruptFunction;
-  InterruptFunction = 0;      // Don't run it on another CTRL-C.
-
-  if (IF) {
-    // Note: if the interrupt function throws an exception, there is nothing
-    // to catch it in this thread so it will kill the process.
-    IF();                     // Run it now.
-    LeaveCriticalSection(&CriticalSection);
-    return TRUE;              // Don't kill the process.
-  }
-
-  // Allow normal processing to take place; i.e., the process dies.
-  LeaveCriticalSection(&CriticalSection);
-  return FALSE;
-}
-
diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc
deleted file mode 100644
index b8b933c4d29d..000000000000
--- a/lib/System/Win32/ThreadLocal.inc
+++ /dev/null
@@ -1,53 +0,0 @@
-//= llvm/System/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 specific (non-pthread) ThreadLocal class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//===          is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include "llvm/System/ThreadLocal.h"
-
-namespace llvm {
-using namespace sys;
-
-ThreadLocalImpl::ThreadLocalImpl() {
-  DWORD* tls = new DWORD;
-  *tls = TlsAlloc();
-  assert(*tls != TLS_OUT_OF_INDEXES);
-  data = tls;
-}
-
-ThreadLocalImpl::~ThreadLocalImpl() {
-  DWORD* tls = static_cast<DWORD*>(data);
-  TlsFree(*tls);
-  delete tls;
-}
-
-const void* ThreadLocalImpl::getInstance() {
-  DWORD* tls = static_cast<DWORD*>(data);
-  return TlsGetValue(*tls);
-}
-
-void ThreadLocalImpl::setInstance(const void* d){
-  DWORD* tls = static_cast<DWORD*>(data);
-  int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
-  assert(errorcode != 0);
-}
-
-void ThreadLocalImpl::removeInstance() {
-  setInstance(0);
-}
-
-}
diff --git a/lib/System/Win32/TimeValue.inc b/lib/System/Win32/TimeValue.inc
deleted file mode 100644
index e37f111fc77c..000000000000
--- a/lib/System/Win32/TimeValue.inc
+++ /dev/null
@@ -1,51 +0,0 @@
-//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 implementation of the TimeValue class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Win32.h"
-#include <time.h>
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code.
-//===----------------------------------------------------------------------===//
-
-TimeValue TimeValue::now() {
-  uint64_t ft;
-  GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
-
-  TimeValue t(0, 0);
-  t.fromWin32Time(ft);
-  return t;
-}
-
-std::string TimeValue::str() const {
-#ifdef __MINGW32__
-  // This ban may be lifted by either:
-  // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
-  // (ii) configure tests for either the time_t or __time64_t type.
-  time_t ourTime = time_t(this->toEpochTime());
-  struct tm *lt = ::localtime(&ourTime);
-#else
-  __time64_t ourTime = this->toEpochTime();
-  struct tm *lt = ::_localtime64(&ourTime);
-#endif
-
-  char buffer[25];
-  strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
-  return std::string(buffer);
-}
-
-
-}
diff --git a/lib/System/Win32/Win32.h b/lib/System/Win32/Win32.h
deleted file mode 100644
index 8f505b1a6cdb..000000000000
--- a/lib/System/Win32/Win32.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines things specific to Win32 implementations.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//===          is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-// Require at least Windows 2000 API.
-#define _WIN32_WINNT 0x0500
-
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
-#include "windows.h"
-#include <cassert>
-#include <string>
-
-inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
-  if (!ErrMsg)
-    return true;
-  char *buffer = NULL;
-  FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
-      NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
-  *ErrMsg = prefix + buffer;
-  LocalFree(buffer);
-  return true;
-}
-
-class AutoHandle {
-  HANDLE handle;
-
-public:
-  AutoHandle(HANDLE h) : handle(h) {}
-
-  ~AutoHandle() {
-    if (handle)
-      CloseHandle(handle);
-  }
-
-  operator HANDLE() {
-    return handle;
-  }
-
-  AutoHandle &operator=(HANDLE h) {
-    handle = h;
-    return *this;
-  }
-};
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 271ca44c2b69..4679f7443bfc 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_ARM_H
 #define TARGET_ARM_H
 
+#include "ARMBaseInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
@@ -25,97 +26,17 @@ class ARMBaseTargetMachine;
 class FunctionPass;
 class JITCodeEmitter;
 class formatted_raw_ostream;
+class MCCodeEmitter;
+class TargetAsmBackend;
+class MachineInstr;
+class ARMAsmPrinter;
+class MCInst;
 
-// Enums corresponding to ARM condition codes
-namespace ARMCC {
-  // The CondCodes constants map directly to the 4-bit encoding of the
-  // condition field for predicated instructions.
-  enum CondCodes { // Meaning (integer)          Meaning (floating-point)
-    EQ,            // Equal                      Equal
-    NE,            // Not equal                  Not equal, or unordered
-    HS,            // Carry set                  >, ==, or unordered
-    LO,            // Carry clear                Less than
-    MI,            // Minus, negative            Less than
-    PL,            // Plus, positive or zero     >, ==, or unordered
-    VS,            // Overflow                   Unordered
-    VC,            // No overflow                Not unordered
-    HI,            // Unsigned higher            Greater than, or unordered
-    LS,            // Unsigned lower or same     Less than or equal
-    GE,            // Greater than or equal      Greater than or equal
-    LT,            // Less than                  Less than, or unordered
-    GT,            // Greater than               Greater than
-    LE,            // Less than or equal         <, ==, or unordered
-    AL             // Always (unconditional)     Always (unconditional)
-  };
+MCCodeEmitter *createARMMCCodeEmitter(const Target &,
+                                      TargetMachine &TM,
+                                      MCContext &Ctx);
 
-  inline static CondCodes getOppositeCondition(CondCodes CC) {
-    switch (CC) {
-    default: llvm_unreachable("Unknown condition code");
-    case EQ: return NE;
-    case NE: return EQ;
-    case HS: return LO;
-    case LO: return HS;
-    case MI: return PL;
-    case PL: return MI;
-    case VS: return VC;
-    case VC: return VS;
-    case HI: return LS;
-    case LS: return HI;
-    case GE: return LT;
-    case LT: return GE;
-    case GT: return LE;
-    case LE: return GT;
-    }
-  }
-} // namespace ARMCC
-
-inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
-  switch (CC) {
-  default: llvm_unreachable("Unknown condition code");
-  case ARMCC::EQ:  return "eq";
-  case ARMCC::NE:  return "ne";
-  case ARMCC::HS:  return "hs";
-  case ARMCC::LO:  return "lo";
-  case ARMCC::MI:  return "mi";
-  case ARMCC::PL:  return "pl";
-  case ARMCC::VS:  return "vs";
-  case ARMCC::VC:  return "vc";
-  case ARMCC::HI:  return "hi";
-  case ARMCC::LS:  return "ls";
-  case ARMCC::GE:  return "ge";
-  case ARMCC::LT:  return "lt";
-  case ARMCC::GT:  return "gt";
-  case ARMCC::LE:  return "le";
-  case ARMCC::AL:  return "al";
-  }
-}
-
-namespace ARM_MB {
-  // The Memory Barrier Option constants map directly to the 4-bit encoding of
-  // the option field for memory barrier operations.
-  enum MemBOpt {
-    ST    = 14,
-    ISH   = 11,
-    ISHST = 10,
-    NSH   = 7,
-    NSHST = 6,
-    OSH   = 3,
-    OSHST = 2
-  };
-
-  inline static const char *MemBOptToString(unsigned val) {
-    switch (val) {
-    default: llvm_unreachable("Unknown memory opetion");
-    case ST:    return "st";
-    case ISH:   return "ish";
-    case ISHST: return "ishst";
-    case NSH:   return "nsh";
-    case NSHST: return "nshst";
-    case OSH:   return "osh";
-    case OSHST: return "oshst";
-    }
-  }
-} // namespace ARM_MB
+TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
 
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
@@ -127,23 +48,16 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
 FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
 FunctionPass *createARMConstantIslandPass();
-FunctionPass *createNEONPreAllocPass();
 FunctionPass *createNEONMoveFixPass();
+FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
 extern Target TheARMTarget, TheThumbTarget;
 
-} // end namespace llvm;
-
-// Defines symbolic names for ARM registers.  This defines a mapping from
-// register name to register number.
-//
-#include "ARMGenRegisterNames.inc"
-
-// Defines symbolic names for the ARM instructions.
-//
-#include "ARMGenInstrNames.inc"
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                  ARMAsmPrinter &AP);
 
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index d6a8f19724dc..bf4315fc6c3e 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -16,6 +16,7 @@
 
 include "llvm/Target/Target.td"
 
+
 //===----------------------------------------------------------------------===//
 // ARM Subtarget features.
 //
@@ -32,6 +33,8 @@ def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
                                      "Does not support ARM mode execution">;
 def FeatureFP16   : SubtargetFeature<"fp16", "HasFP16", "true",
                                      "Enable half-precision floating point">;
+def FeatureD16    : SubtargetFeature<"d16", "HasD16", "true",
+                                     "Restrict VFP3 to 16 double registers">;
 def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
                                      "Enable divide instructions">;
 def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
@@ -43,14 +46,11 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
 def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
                           "Floating point unit supports single precision only">;
 
-// Some processors have multiply-accumulate instructions that don't
-// play nicely with other VFP instructions, and it's generally better
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
 // to just not use them.
-// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
-// others as well. We should do more benchmarking and confirm one way or
-// the other.
-def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
-                                          "Disable VFP MAC instructions">;
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+                                         "Disable VFP / NEON MAC instructions">;
 // Some processors benefit from using NEON instructions for scalar
 // single-precision FP operations.
 def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
@@ -61,6 +61,9 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
 def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
                                              "Prefer 32-bit Thumb instrs">;
 
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+                                 "Supports Multiprocessing extension">;
 
 // ARM architectures.
 def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
@@ -91,6 +94,18 @@ def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
 
 include "ARMSchedule.td"
 
+// ARM processor families.
+def ProcOthers  : SubtargetFeature<"others", "ARMProcFamily", "Others",
+                                   "One of the other ARM processor families">;
+def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+                                   "Cortex-A8 ARM processors",
+                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
+                                    FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+                                   "Cortex-A9 ARM processors",
+                                   [FeatureHasSlowFPVMLx, FeatureT2XtPk,
+                                    FeatureFP16]>;
+
 class ProcNoItin<string Name, list<SubtargetFeature> Features>
  : Processor<Name, GenericItineraries, Features>;
 
@@ -135,25 +150,27 @@ def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
 // V6 Processors.
 def : Processor<"arm1136j-s",       ARMV6Itineraries, [ArchV6]>;
 def : Processor<"arm1136jf-s",      ARMV6Itineraries, [ArchV6, FeatureVFP2,
-                                                       FeatureHasSlowVMLx]>;
+                                                       FeatureHasSlowFPVMLx]>;
 def : Processor<"arm1176jz-s",      ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [ArchV6, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
 def : Processor<"mpcorenovfp",      ARMV6Itineraries, [ArchV6]>;
-def : Processor<"mpcore",           ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [ArchV6, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
 
 // V6M Processors.
 def : Processor<"cortex-m0",        ARMV6Itineraries, [ArchV6M]>;
 
 // V6T2 Processors.
 def : Processor<"arm1156t2-s",      ARMV6Itineraries, [ArchV6T2]>;
-def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
+def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [ArchV6T2, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
 
 // V7 Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
-                [ArchV7A, FeatureHasSlowVMLx,
-                 FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
+                                    [ArchV7A, ProcA8]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
-                [ArchV7A, FeatureT2XtPk]>;
+                                    [ArchV7A, ProcA9]>;
 
 // V7M Processors.
 def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
@@ -175,6 +192,17 @@ include "ARMInstrInfo.td"
 
 def ARMInstrInfo : InstrInfo;
 
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
@@ -182,4 +210,6 @@ def ARMInstrInfo : InstrInfo;
 def ARM : Target {
   // Pull in Instruction Info:
   let InstructionSet = ARMInstrInfo;
+
+  let AssemblyWriters = [ARMAsmWriter];
 }
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index db481005b3a4..19fbf0548b02 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -50,6 +50,16 @@ namespace ARM_AM {
     }
   }
 
+  static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+    switch (Op) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::asr: return 2;
+    case ARM_AM::lsl: return 0;
+    case ARM_AM::lsr: return 1;
+    case ARM_AM::ror: return 3;
+    }
+  }
+
   static inline ShiftOpc getShiftOpcForNode(SDValue N) {
     switch (N.getOpcode()) {
     default:          return ARM_AM::no_shift;
@@ -566,6 +576,8 @@ namespace ARM_AM {
     return Val;
   }
 
+  AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
 } // end namespace ARM_AM
 } // end namespace llvm
 
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
new file mode 100644
index 000000000000..ec23449d7d42
--- /dev/null
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -0,0 +1,512 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/true) {}
+};
+
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  ARMELFObjectWriter(Triple::OSType OSType)
+    : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+                              /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public TargetAsmBackend {
+  bool isThumbMode;  // Currently emitting Thumb code.
+public:
+  ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+
+  unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name                      Offset (bits) Size (bits)     Flags
+{ "fixup_arm_ldst_pcrel_12", 1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10",      1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10",       0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0,            8,   MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12",  1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12",   0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch",    0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch",  0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch",   0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx",     7,            21,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp",      1,             8,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc",     1,             8,  MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16",     0,            20,  0 },
+{ "fixup_arm_movw_lo16",     0,            20,  0 },
+{ "fixup_t2_movt_hi16",      0,            20,  0 },
+{ "fixup_t2_movw_lo16",      0,            20,  0 },
+{ "fixup_arm_movt_hi16_pcrel", 0,          20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0,          20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0,           20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0,           20,  MCFixupKindInfo::FKF_IsPCRel },
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+    switch (Flag) {
+    default: break;
+    case MCAF_Code16:
+      setIsThumb(true);
+      break;
+    case MCAF_Code32:
+      setIsThumb(false);
+      break;
+    }
+  }
+
+  unsigned getPointerSize() const { return 4; }
+  bool isThumb() const { return isThumbMode; }
+  void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // FIXME: Thumb targets, different move constant targets..
+  return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+  return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if (isThumb()) {
+    // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
+    // use 0x46c0 (which is a 'mov r8, r8' insn).
+    uint64_t NumNops = Count / 2;
+    for (uint64_t i = 0; i != NumNops; ++i)
+      OW->Write16(0xbf00);
+    if (Count & 1)
+      OW->Write8(0);
+    return true;
+  }
+  // ARM mode
+  uint64_t NumNops = Count / 4;
+  for (uint64_t i = 0; i != NumNops; ++i)
+    OW->Write32(0xe1a00000);
+  switch (Count % 4) {
+  default: break; // No leftover bytes to write
+  case 1: OW->Write8(0); break;
+  case 2: OW->Write16(0); break;
+  case 3: OW->Write16(0); OW->Write8(0xa0); break;
+  }
+
+  return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+    return Value;
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+    Value >>= 16;
+    // Fallthrough
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movw_lo16_pcrel: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned Lo12 = Value & 0x0FFF;
+    // inst{19-16} = Hi4;
+    // inst{11-0} = Lo12;
+    Value = (Hi4 << 16) | (Lo12);
+    return Value;
+  }
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    Value >>= 16;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned i = (Value & 0x800) >> 11;
+    unsigned Mid3 = (Value & 0x700) >> 8;
+    unsigned Lo8 = Value & 0x0FF;
+    // inst{19-16} = Hi4;
+    // inst{26} = i;
+    // inst{14-12} = Mid3;
+    // inst{7-0} = Lo8;
+    Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+
+    uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+    swapped |= (Value & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_arm_ldst_pcrel_12:
+    // ARM PC-relative values are offset by 8.
+    Value -= 4;
+    // FALLTHROUGH
+  case ARM::fixup_t2_ldst_pcrel_12: {
+    // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+    Value -= 4;
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+    Value |= isAdd << 23;
+
+    // Same addressing mode as fixup_arm_pcrel_10,
+    // but with 16-bit halfwords swapped.
+    if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+      uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+      swapped |= (Value & 0x0000FFFF) << 16;
+      return swapped;
+    }
+
+    return Value;
+  }
+  case ARM::fixup_thumb_adr_pcrel_10:
+    return ((Value - 4) >> 2) & 0xff;
+  case ARM::fixup_arm_adr_pcrel_12: {
+    // ARM PC-relative values are offset by 8.
+    Value -= 8;
+    unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      opc = 2; // 0b0010
+    }
+    assert(ARM_AM::getSOImmVal(Value) != -1 &&
+           "Out of range pc-relative fixup value!");
+    // Encode the immediate and shift the opcode into place.
+    return ARM_AM::getSOImmVal(Value) | (opc << 21);
+  }
+
+  case ARM::fixup_t2_adr_pcrel_12: {
+    Value -= 4;
+    unsigned opc = 0;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      opc = 5;
+    }
+
+    uint32_t out = (opc << 21);
+    out |= (Value & 0x800) << 14;
+    out |= (Value & 0x700) << 4;
+    out |= (Value & 0x0FF);
+
+    uint64_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    // These values don't encode the low two bits since they're always zero.
+    // Offset by 8 just as above.
+    return 0xffffff & ((Value - 8) >> 2);
+  case ARM::fixup_t2_uncondbranch: {
+    Value = Value - 4;
+    Value >>= 1; // Low bit is not encoded.
+
+    uint32_t out = 0;
+    bool I =  Value & 0x800000;
+    bool J1 = Value & 0x400000;
+    bool J2 = Value & 0x200000;
+    J1 ^= I;
+    J2 ^= I;
+
+    out |= I  << 26; // S bit
+    out |= !J1 << 13; // J1 bit
+    out |= !J2 << 11; // J2 bit
+    out |= (Value & 0x1FF800)  << 5; // imm6 field
+    out |= (Value & 0x0007FF);        // imm11 field
+
+    uint64_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_t2_condbranch: {
+    Value = Value - 4;
+    Value >>= 1; // Low bit is not encoded.
+
+    uint64_t out = 0;
+    out |= (Value & 0x80000) << 7; // S bit
+    out |= (Value & 0x40000) >> 7; // J2 bit
+    out |= (Value & 0x20000) >> 4; // J1 bit
+    out |= (Value & 0x1F800) << 5; // imm6 field
+    out |= (Value & 0x007FF);      // imm11 field
+
+    uint32_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_arm_thumb_bl: {
+    // The value doesn't encode the low bit (always zero) and is offset by
+    // four. The value is encoded into disjoint bit positions in the destination
+    // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+    //
+    //   BL:  xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+    //
+    // Note that the halfwords are stored high first, low second; so we need
+    // to transpose the fixup value here to map properly.
+    unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+    uint32_t Binary = 0;
+    Value = 0x3fffff & ((Value - 4) >> 1);
+    Binary  = (Value & 0x7ff) << 16;    // Low imm11 value.
+    Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+    Binary |= isNeg << 10;              // Sign bit.
+    return Binary;
+  }
+  case ARM::fixup_arm_thumb_blx: {
+    // The value doesn't encode the low two bits (always zero) and is offset by
+    // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+    // positions in the destination opcode. x = unchanged, I = immediate value
+    // bit, S = sign extension bit, 0 = zero.
+    //
+    //   BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+    //
+    // Note that the halfwords are stored high first, low second; so we need
+    // to transpose the fixup value here to map properly.
+    unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+    uint32_t Binary = 0;
+    Value = 0xfffff & ((Value - 2) >> 2);
+    Binary  = (Value & 0x3ff) << 17;    // Low imm10L value.
+    Binary |= (Value & 0xffc00) >> 10;  // High imm10H value.
+    Binary |= isNeg << 10;              // Sign bit.
+    return Binary;
+  }
+  case ARM::fixup_arm_thumb_cp:
+    // Offset by 4, and don't encode the low two bits. Two bytes of that
+    // 'off by 4' is implicitly handled by the half-word ordering of the
+    // Thumb encoding, so we only need to adjust by 2 here.
+    return ((Value - 2) >> 2) & 0xff;
+  case ARM::fixup_arm_thumb_cb: {
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    uint32_t Binary = (Value - 4) >> 1;
+    return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+  }
+  case ARM::fixup_arm_thumb_br:
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    return ((Value - 4) >> 1) & 0x7ff;
+  case ARM::fixup_arm_thumb_bcc:
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    return ((Value - 4) >> 1) & 0xff;
+  case ARM::fixup_arm_pcrel_10:
+    Value = Value - 4; // ARM fixups offset by an additional word and don't
+                       // need to adjust for the half-word ordering.
+    // Fall through.
+  case ARM::fixup_t2_pcrel_10: {
+    // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+    Value = Value - 4;
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    // These values don't encode the low two bits since they're always zero.
+    Value >>= 2;
+    assert ((Value < 256) && "Out of range pc-relative fixup value!");
+    Value |= isAdd << 23;
+
+    // Same addressing mode as fixup_arm_pcrel_10,
+    // but with 16-bit halfwords swapped.
+    if (Kind == ARM::fixup_t2_pcrel_10) {
+      uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+      swapped |= (Value & 0x0000FFFF) << 16;
+      return swapped;
+    }
+
+    return Value;
+  }
+  }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
+    : ARMAsmBackend(T), OSType(_OSType) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+                              /*IsLittleEndian*/ true);
+  }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                  unsigned DataSize, uint64_t Value) const {
+  unsigned NumBytes = 4;        // FIXME: 2 for Thumb
+  Value = adjustFixupValue(Fixup.getKind(), Value);
+  if (!Value) return;           // Doesn't change encoding.
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset % NumBytes == 0 && "Offset mod NumBytes is nonzero!");
+
+  // For each byte of the fragment that the fixup touches, mask in the bits from
+  // the fixup value. The Value has been "split up" into the appropriate
+  // bitfields above.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+  DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    // FIXME: Subtarget info should be derived. Force v7 for now.
+    return createMachObjectWriter(new ARMMachObjectWriter(
+                                    /*Is64Bit=*/false,
+                                    object::mach::CTM_ARM,
+                                    object::mach::CSARM_V7),
+                                  OS,
+                                  /*IsLittleEndian=*/true);
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    return false;
+  }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+
+  case FK_Data_1:
+  case ARM::fixup_arm_thumb_bcc:
+  case ARM::fixup_arm_thumb_cp:
+  case ARM::fixup_thumb_adr_pcrel_10:
+    return 1;
+
+  case FK_Data_2:
+  case ARM::fixup_arm_thumb_br:
+  case ARM::fixup_arm_thumb_cb:
+    return 2;
+
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    return 3;
+
+  case FK_Data_4:
+  case ARM::fixup_t2_ldst_pcrel_12:
+  case ARM::fixup_t2_condbranch:
+  case ARM::fixup_t2_uncondbranch:
+  case ARM::fixup_t2_pcrel_10:
+  case ARM::fixup_t2_adr_pcrel_12:
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    return 4;
+  }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+  Value = adjustFixupValue(Fixup.getKind(), Value);
+  if (!Value) return;           // Doesn't change encoding.
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the
+  // bits from the fixup value.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinARMAsmBackend(T);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    assert(0 && "Windows not supported on ARM");
+  default:
+    return new ELFARMAsmBackend(T, Triple(TT).getOS());
+  }
+}
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 6cfd5961149f..db12b8e4fc2d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -14,28 +14,31 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "ARM.h"
-#include "ARMBuildAttrs.h"
+#include "ARMAsmPrinter.h"
 #include "ARMAddressingModes.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
 #include "ARMConstantPoolValue.h"
-#include "AsmPrinter/ARMInstPrinter.h"
 #include "ARMMachineFunctionInfo.h"
-#include "ARMMCInstLower.h"
+#include "ARMMCExpr.h"
 #include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
@@ -53,270 +56,127 @@
 #include <cctype>
 using namespace llvm;
 
-static cl::opt<bool>
-EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
-            cl::desc("enable experimental asmprinter gunk in the arm backend"));
-
-namespace llvm {
-  namespace ARM {
-    enum DW_ISA {
-      DW_ISA_ARM_thumb = 1,
-      DW_ISA_ARM_arm = 2
-    };
-  }
-}
-
 namespace {
-  class ARMAsmPrinter : public AsmPrinter {
 
-    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
-    /// make the right decision when printing asm code for different targets.
-    const ARMSubtarget *Subtarget;
-
-    /// AFI - Keep a pointer to ARMFunctionInfo for the current
-    /// MachineFunction.
-    ARMFunctionInfo *AFI;
+  // Per section and per symbol attributes are not supported.
+  // To implement them we would need the ability to delay this emission
+  // until the assembly file is fully parsed/generated as only then do we
+  // know the symbol and section numbers.
+  class AttributeEmitter {
+  public:
+    virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+    virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+    virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+    virtual void Finish() = 0;
+    virtual ~AttributeEmitter() {}
+  };
 
-    /// MCP - Keep a pointer to constantpool entries of the current
-    /// MachineFunction.
-    const MachineConstantPool *MCP;
+  class AsmAttributeEmitter : public AttributeEmitter {
+    MCStreamer &Streamer;
 
   public:
-    explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
-      Subtarget = &TM.getSubtarget<ARMSubtarget>();
-    }
+    AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+    void MaybeSwitchVendor(StringRef Vendor) { }
 
-    virtual const char *getPassName() const {
-      return "ARM Assembly Printer";
+    void EmitAttribute(unsigned Attribute, unsigned Value) {
+      Streamer.EmitRawText("\t.eabi_attribute " +
+                           Twine(Attribute) + ", " + Twine(Value));
     }
 
-    void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
-
-    void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                      const char *Modifier = 0);
-    void printSOImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
-    void printSOImm2PartOperand(const MachineInstr *MI, int OpNum,
-                                raw_ostream &O);
-    void printSORegOperand(const MachineInstr *MI, int OpNum,
-                           raw_ostream &O);
-    void printAddrMode2Operand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O);
-    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printAddrMode3Operand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O);
-    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printAddrMode4Operand(const MachineInstr *MI, int OpNum,raw_ostream &O,
-                               const char *Modifier = 0);
-    void printAddrMode5Operand(const MachineInstr *MI, int OpNum,raw_ostream &O,
-                               const char *Modifier = 0);
-    void printAddrMode6Operand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O);
-    void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
-                                raw_ostream &O,
-                                const char *Modifier = 0);
-    void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum,
-                                        raw_ostream &O);
-    void printMemBOption(const MachineInstr *MI, int OpNum,
-                         raw_ostream &O);
-    void printShiftImmOperand(const MachineInstr *MI, int OpNum,
-                              raw_ostream &O);
-
-    void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum,
-                                raw_ostream &O);
-    void printThumbITMask(const MachineInstr *MI, int OpNum, raw_ostream &O);
-    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
-                                      raw_ostream &O,
-                                      unsigned Scale);
-    void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-
-    void printT2SOOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
-    void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-    void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum,
-                                    raw_ostream &O);
-    void printT2AddrModeImm8s4Operand(const MachineInstr *MI, int OpNum,
-                                      raw_ostream &O);
-    void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum,
-                                          raw_ostream &O);
-    void printT2AddrModeImm8s4OffsetOperand(const MachineInstr *MI, int OpNum,
-                                            raw_ostream &O) {}
-    void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O);
-
-    void printCPSOptionOperand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O) {}
-    void printMSRMaskOperand(const MachineInstr *MI, int OpNum,
-                             raw_ostream &O) {}
-    void printNegZeroOperand(const MachineInstr *MI, int OpNum,
-                             raw_ostream &O) {}
-    void printPredicateOperand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O);
-    void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum,
-                                        raw_ostream &O);
-    void printSBitModifierOperand(const MachineInstr *MI, int OpNum,
-                                  raw_ostream &O);
-    void printPCLabel(const MachineInstr *MI, int OpNum,
-                      raw_ostream &O);
-    void printRegisterList(const MachineInstr *MI, int OpNum,
-                           raw_ostream &O);
-    void printCPInstOperand(const MachineInstr *MI, int OpNum,
-                            raw_ostream &O,
-                            const char *Modifier);
-    void printJTBlockOperand(const MachineInstr *MI, int OpNum,
-                             raw_ostream &O);
-    void printJT2BlockOperand(const MachineInstr *MI, int OpNum,
-                              raw_ostream &O);
-    void printTBAddrMode(const MachineInstr *MI, int OpNum,
-                         raw_ostream &O);
-    void printNoHashImmediate(const MachineInstr *MI, int OpNum,
-                              raw_ostream &O);
-    void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O);
-    void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O);
-    void printNEONModImmOperand(const MachineInstr *MI, int OpNum,
-                                raw_ostream &O);
-
-    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                 unsigned AsmVariant, const char *ExtraCode,
-                                 raw_ostream &O);
-    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                                       unsigned AsmVariant,
-                                       const char *ExtraCode, raw_ostream &O);
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen
-    static const char *getRegisterName(unsigned RegNo);
-
-    virtual void EmitInstruction(const MachineInstr *MI);
-    bool runOnMachineFunction(MachineFunction &F);
-
-    virtual void EmitConstantPool() {} // we emit constant pools customly!
-    virtual void EmitFunctionEntryLabel();
-    void EmitStartOfAsmFile(Module &M);
-    void EmitEndOfAsmFile(Module &M);
-
-    MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
-      MachineLocation Location;
-      assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
-      // Frame address.  Currently handles register +- offset only.
-      if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
-        Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
-      else {
-        DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+    void EmitTextAttribute(unsigned Attribute, StringRef String) {
+      switch (Attribute) {
+      case ARMBuildAttrs::CPU_name:
+        Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+        break;
+      default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
       }
-      return Location;
+    }
+    void Finish() { }
+  };
+
+  class ObjectAttributeEmitter : public AttributeEmitter {
+    MCObjectStreamer &Streamer;
+    StringRef CurrentVendor;
+    SmallString<64> Contents;
+
+  public:
+    ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+      Streamer(Streamer_), CurrentVendor("") { }
+
+    void MaybeSwitchVendor(StringRef Vendor) {
+      assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+      if (CurrentVendor.empty())
+        CurrentVendor = Vendor;
+      else if (CurrentVendor == Vendor)
+        return;
+      else
+        Finish();
+
+      CurrentVendor = Vendor;
+
+      assert(Contents.size() == 0);
     }
 
-    virtual unsigned getISAEncoding() {
-      // ARM/Darwin adds ISA to the DWARF info for each function.
-      if (!Subtarget->isTargetDarwin())
-        return 0;
-      return Subtarget->isThumb() ?
-        llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+    void EmitAttribute(unsigned Attribute, unsigned Value) {
+      // FIXME: should be ULEB
+      Contents += Attribute;
+      Contents += Value;
     }
 
-    MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
-                                          const MachineBasicBlock *MBB) const;
-    MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
-
-    /// EmitMachineConstantPoolValue - Print a machine constantpool value to
-    /// the .s file.
-    virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      EmitMachineConstantPoolValue(MCPV, OS);
-      OutStreamer.EmitRawText(OS.str());
+    void EmitTextAttribute(unsigned Attribute, StringRef String) {
+      Contents += Attribute;
+      Contents += UppercaseString(String);
+      Contents += 0;
     }
 
-    void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV,
-                                      raw_ostream &O) {
-      switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) {
-      case 1: O << MAI->getData8bitsDirective(0); break;
-      case 2: O << MAI->getData16bitsDirective(0); break;
-      case 4: O << MAI->getData32bitsDirective(0); break;
-      default: assert(0 && "Unknown CPV size");
-      }
+    void Finish() {
+      const size_t ContentsSize = Contents.size();
 
-      ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
-
-      if (ACPV->isLSDA()) {
-        O << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
-      } else if (ACPV->isBlockAddress()) {
-        O << *GetBlockAddressSymbol(ACPV->getBlockAddress());
-      } else if (ACPV->isGlobalValue()) {
-        const GlobalValue *GV = ACPV->getGV();
-        bool isIndirect = Subtarget->isTargetDarwin() &&
-          Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
-        if (!isIndirect)
-          O << *Mang->getSymbol(GV);
-        else {
-          // FIXME: Remove this when Darwin transition to @GOT like syntax.
-          MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-          O << *Sym;
-
-          MachineModuleInfoMachO &MMIMachO =
-            MMI->getObjFileInfo<MachineModuleInfoMachO>();
-          MachineModuleInfoImpl::StubValueTy &StubSym =
-            GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) :
-                                        MMIMachO.getGVStubEntry(Sym);
-          if (StubSym.getPointer() == 0)
-            StubSym = MachineModuleInfoImpl::
-              StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
-        }
-      } else {
-        assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
-        O << *GetExternalSymbolSymbol(ACPV->getSymbol());
-      }
+      // Vendor size + Vendor name + '\0'
+      const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
 
-      if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
-      if (ACPV->getPCAdjustment() != 0) {
-        O << "-(" << MAI->getPrivateGlobalPrefix() << "PC"
-          << getFunctionNumber() << "_"  << ACPV->getLabelId()
-          << "+" << (unsigned)ACPV->getPCAdjustment();
-         if (ACPV->mustAddCurrentAddress())
-           O << "-.";
-         O << ')';
-      }
+      // Tag + Tag Size
+      const size_t TagHeaderSize = 1 + 4;
+
+      Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+      Streamer.EmitBytes(CurrentVendor, 0);
+      Streamer.EmitIntValue(0, 1); // '\0'
+
+      Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+      Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+      Streamer.EmitBytes(Contents, 0);
+
+      Contents.clear();
     }
   };
+
 } // end of anonymous namespace
 
-#include "ARMGenAsmWriter.inc"
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+    Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+  else {
+    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+  }
+  return Location;
+}
 
 void ARMAsmPrinter::EmitFunctionEntryLabel() {
   if (AFI->isThumbFunction()) {
-    OutStreamer.EmitRawText(StringRef("\t.code\t16"));
-    if (!Subtarget->isTargetDarwin())
-      OutStreamer.EmitRawText(StringRef("\t.thumb_func"));
-    else {
-      // This needs to emit to a temporary string to get properly quoted
-      // MCSymbols when they have spaces in them.
-      SmallString<128> Tmp;
-      raw_svector_ostream OS(Tmp);
-      OS << "\t.thumb_func\t" << *CurrentFnSym;
-      OutStreamer.EmitRawText(OS.str());
-    }
+    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+    OutStreamer.EmitThumbFunc(Subtarget->isTargetDarwin()? CurrentFnSym : 0);
   }
 
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
-/// runOnMachineFunction - This uses the printInstruction()
+/// runOnMachineFunction - This uses the EmitInstruction()
 /// method to print assembly for each instruction.
 ///
 bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -337,32 +197,18 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_Register: {
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-    if (Modifier && strcmp(Modifier, "dregpair") == 0) {
-      unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
-      unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
-      O << '{'
-        << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi)
-        << '}';
-    } else if (Modifier && strcmp(Modifier, "lane") == 0) {
-      unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
-      unsigned DReg =
-        TM.getRegisterInfo()->getMatchingSuperReg(Reg,
-          RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
-      O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
-    } else {
-      assert(!MO.getSubReg() && "Subregs should be eliminated!");
-      O << getRegisterName(Reg);
-    }
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    O << ARMInstPrinter::getRegisterName(Reg);
     break;
   }
   case MachineOperand::MO_Immediate: {
     int64_t Imm = MO.getImm();
     O << '#';
     if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
-        (TF & ARMII::MO_LO16))
+        (TF == ARMII::MO_LO16))
       O << ":lower16:";
     else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
-             (TF & ARMII::MO_HI16))
+             (TF == ARMII::MO_HI16))
       O << ":upper16:";
     O << Imm;
     break;
@@ -371,9 +217,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     const GlobalValue *GV = MO.getGlobal();
-
     if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
         (TF & ARMII::MO_LO16))
       O << ":lower16:";
@@ -383,18 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << *Mang->getSymbol(GV);
 
     printOffset(MO.getOffset(), O);
-
-    if (isCallOp && Subtarget->isTargetELF() &&
-        TM.getRelocationModel() == Reloc::PIC_)
+    if (TF == ARMII::MO_PLT)
       O << "(PLT)";
     break;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     O << *GetExternalSymbolSymbol(MO.getSymbolName());
-
-    if (isCallOp && Subtarget->isTargetELF() &&
-        TM.getRelocationModel() == Reloc::PIC_)
+    if (TF == ARMII::MO_PLT)
       O << "(PLT)";
     break;
   }
@@ -407,538 +246,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   }
 }
 
-static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
-                       const MCAsmInfo *MAI) {
-  // Break it up into two parts that make up a shifter immediate.
-  V = ARM_AM::getSOImmVal(V);
-  assert(V != -1 && "Not a valid so_imm value!");
-
-  unsigned Imm = ARM_AM::getSOImmValImm(V);
-  unsigned Rot = ARM_AM::getSOImmValRot(V);
-
-  // Print low-level immediate formation info, per
-  // A5.1.3: "Data-processing operands - Immediate".
-  if (Rot) {
-    O << "#" << Imm << ", " << Rot;
-    // Pretty printed version.
-    if (VerboseAsm) {
-      O << "\t" << MAI->getCommentString() << ' ';
-      O << (int)ARM_AM::rotr32(Imm, Rot);
-    }
-  } else {
-    O << "#" << Imm;
-  }
-}
-
-/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
-/// immediate in bits 0-7.
-void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum,
-                                      raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  printSOImm(O, MO.getImm(), isVerbose(), MAI);
-}
-
-/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
-/// followed by an 'orr' to materialize.
-void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum,
-                                           raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm());
-  unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm());
-  printSOImm(O, V1, isVerbose(), MAI);
-  O << "\n\torr";
-  printPredicateOperand(MI, 2, O);
-  O << "\t";
-  printOperand(MI, 0, O);
-  O << ", ";
-  printOperand(MI, 0, O);
-  O << ", ";
-  printSOImm(O, V2, isVerbose(), MAI);
-}
-
-// so_reg is a 4-operand unit corresponding to register forms of the A5.1
-// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
-//    REG 0   0           - e.g. R5
-//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
-//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
-void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op,
-                                      raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  const MachineOperand &MO3 = MI->getOperand(Op+2);
-
-  O << getRegisterName(MO1.getReg());
-
-  // Print the shift opc.
-  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
-  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
-  if (MO2.getReg()) {
-    O << ' ' << getRegisterName(MO2.getReg());
-    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
-  } else if (ShOpc != ARM_AM::rrx) {
-    O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
-  }
-}
-
-void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op,
-                                          raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  const MachineOperand &MO3 = MI->getOperand(Op+2);
-
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
-    return;
-  }
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
-      O << ", #"
-        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-        << ARM_AM::getAM2Offset(MO3.getImm());
-    O << "]";
-    return;
-  }
-
-  O << ", "
-    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-    << getRegisterName(MO2.getReg());
-
-  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
-    O << ", "
-      << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
-      << " #" << ShImm;
-  O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op,
-                                                raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-
-  if (!MO1.getReg()) {
-    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    O << "#"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-      << ImmOffs;
-    return;
-  }
-
-  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-    << getRegisterName(MO1.getReg());
-
-  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
-    O << ", "
-      << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
-      << " #" << ShImm;
-}
-
-void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op,
-                                          raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  const MachineOperand &MO3 = MI->getOperand(Op+2);
-
-  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << "[" << getRegisterName(MO1.getReg());
-
-  if (MO2.getReg()) {
-    O << ", "
-      << (char)ARM_AM::getAM3Op(MO3.getImm())
-      << getRegisterName(MO2.getReg())
-      << "]";
-    return;
-  }
-
-  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
-    O << ", #"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
-      << ImmOffs;
-  O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op,
-                                                raw_ostream &O){
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-
-  if (MO1.getReg()) {
-    O << (char)ARM_AM::getAM3Op(MO2.getImm())
-      << getRegisterName(MO1.getReg());
-    return;
-  }
-
-  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  O << "#"
-    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
-    << ImmOffs;
-}
-
-void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
-                                          raw_ostream &O,
-                                          const char *Modifier) {
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
-  if (Modifier && strcmp(Modifier, "submode") == 0) {
-    O << ARM_AM::getAMSubModeStr(Mode);
-  } else if (Modifier && strcmp(Modifier, "wide") == 0) {
-    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
-    if (Mode == ARM_AM::ia)
-      O << ".w";
-  } else {
-    printOperand(MI, Op, O);
-  }
-}
-
-void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
-                                          raw_ostream &O,
-                                          const char *Modifier) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
-    return;
-  }
-
-  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
-    O << ", #"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
-      << ImmOffs*4;
-  }
-  O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
-                                          raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-  if (MO2.getImm()) {
-    // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << (MO2.getImm() << 3);
-  }
-  O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op,
-                                                raw_ostream &O){
-  const MachineOperand &MO = MI->getOperand(Op);
-  if (MO.getReg() == 0)
-    O << "!";
-  else
-    O << ", " << getRegisterName(MO.getReg());
-}
-
-void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  if (Modifier && strcmp(Modifier, "label") == 0) {
-    printPCLabel(MI, Op+1, O);
-    return;
-  }
-
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-  O << "[pc, " << getRegisterName(MO1.getReg()) << "]";
-}
-
-void
-ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op,
-                                              raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(Op);
-  uint32_t v = ~MO.getImm();
-  int32_t lsb = CountTrailingZeros_32(v);
-  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
-  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
-  O << "#" << lsb << ", #" << width;
-}
-
-void
-ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum,
-                               raw_ostream &O) {
-  unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_MB::MemBOptToString(val);
-}
-
-void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum,
-                                         raw_ostream &O) {
-  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
-  ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
-  switch (Opc) {
-  case ARM_AM::no_shift:
-    return;
-  case ARM_AM::lsl:
-    O << ", lsl #";
-    break;
-  case ARM_AM::asr:
-    O << ", asr #";
-    break;
-  default:
-    assert(0 && "unexpected shift opcode for shift immediate operand");
-  }
-  O << ARM_AM::getSORegOffset(ShiftOp);
-}
-
-//===--------------------------------------------------------------------===//
-
-void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op,
-                                           raw_ostream &O) {
-  O << "#" <<  MI->getOperand(Op).getImm() * 4;
-}
-
-void
-ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op,
-                                raw_ostream &O) {
-  // (3 - the number of trailing zeros) is the number of then / else.
-  unsigned Mask = MI->getOperand(Op).getImm();
-  unsigned CondBit0 = Mask >> 4 & 1;
-  unsigned NumTZ = CountTrailingZeros_32(Mask);
-  assert(NumTZ <= 3 && "Invalid IT mask!");
-  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
-    bool T = ((Mask >> Pos) & 1) == CondBit0;
-    if (T)
-      O << 't';
-    else
-      O << 'e';
-  }
-}
-
-void
-ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op,
-                                           raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  O << "[" << getRegisterName(MO1.getReg());
-  O << ", " << getRegisterName(MO2.getReg()) << "]";
-}
-
-void
-ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
-                                            raw_ostream &O,
-                                            unsigned Scale) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  const MachineOperand &MO3 = MI->getOperand(Op+2);
-
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
-    return;
-  }
-
-  O << "[" << getRegisterName(MO1.getReg());
-  if (MO3.getReg())
-    O << ", " << getRegisterName(MO3.getReg());
-  else if (unsigned ImmOffs = MO2.getImm())
-    O << ", #" << ImmOffs * Scale;
-  O << "]";
-}
-
-void
-ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op,
-                                           raw_ostream &O) {
-  printThumbAddrModeRI5Operand(MI, Op, O, 1);
-}
-void
-ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op,
-                                           raw_ostream &O) {
-  printThumbAddrModeRI5Operand(MI, Op, O, 2);
-}
-void
-ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op,
-                                           raw_ostream &O) {
-  printThumbAddrModeRI5Operand(MI, Op, O, 4);
-}
-
-void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op,
-                                                raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(Op);
-  const MachineOperand &MO2 = MI->getOperand(Op+1);
-  O << "[" << getRegisterName(MO1.getReg());
-  if (unsigned ImmOffs = MO2.getImm())
-    O << ", #" << ImmOffs*4;
-  O << "]";
-}
-
-//===--------------------------------------------------------------------===//
-
-// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
-// register with shift forms.
-// REG 0   0           - e.g. R5
-// REG IMM, SH_OPC     - e.g. R5, LSL #3
-void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
-  unsigned Reg = MO1.getReg();
-  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-  O << getRegisterName(Reg);
-
-  // Print the shift opc.
-  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
-  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
-  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
-  if (ShOpc != ARM_AM::rrx)
-    O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
-}
-
-void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
-                                                int OpNum,
-                                                raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  unsigned OffImm = MO2.getImm();
-  if (OffImm)  // Don't print +0.
-    O << ", #" << OffImm;
-  O << "]";
-}
-
-void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
-                                               int OpNum,
-                                               raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm();
-  // Don't print +0.
-  if (OffImm < 0)
-    O << ", #-" << -OffImm;
-  else if (OffImm > 0)
-    O << ", #" << OffImm;
-  O << "]";
-}
-
-void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
-                                                 int OpNum,
-                                                 raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm() / 4;
-  // Don't print +0.
-  if (OffImm < 0)
-    O << ", #-" << -OffImm * 4;
-  else if (OffImm > 0)
-    O << ", #" << OffImm * 4;
-  O << "]";
-}
-
-void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
-                                                     int OpNum,
-                                                     raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  int32_t OffImm = (int32_t)MO1.getImm();
-  // Don't print +0.
-  if (OffImm < 0)
-    O << "#-" << -OffImm;
-  else if (OffImm > 0)
-    O << "#" << OffImm;
-}
-
-void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
-                                                int OpNum,
-                                                raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-  const MachineOperand &MO3 = MI->getOperand(OpNum+2);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  assert(MO2.getReg() && "Invalid so_reg load / store address!");
-  O << ", " << getRegisterName(MO2.getReg());
-
-  unsigned ShAmt = MO3.getImm();
-  if (ShAmt) {
-    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-    O << ", lsl #" << ShAmt;
-  }
-  O << "]";
-}
-
-
 //===--------------------------------------------------------------------===//
 
-void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum,
-                                          raw_ostream &O) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  if (CC != ARMCC::AL)
-    O << ARMCondCodeToString(CC);
-}
-
-void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI,
-                                                   int OpNum,
-                                                   raw_ostream &O) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  O << ARMCondCodeToString(CC);
-}
-
-void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum,
-                                             raw_ostream &O){
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  if (Reg) {
-    assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
-    O << 's';
-  }
-}
-
-void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum,
-                                 raw_ostream &O) {
-  int Id = (int)MI->getOperand(OpNum).getImm();
-  O << MAI->getPrivateGlobalPrefix()
-    << "PC" << getFunctionNumber() << "_" << Id;
-}
-
-void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum,
-                                      raw_ostream &O) {
-  O << "{";
-  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
-    if (MI->getOperand(i).isImplicit())
-      continue;
-    if ((int)i != OpNum) O << ", ";
-    printOperand(MI, i, O);
-  }
-  O << "}";
-}
-
-void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
-                                       raw_ostream &O, const char *Modifier) {
-  assert(Modifier && "This operand only works with a modifier!");
-  // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
-  // data itself.
-  if (!strcmp(Modifier, "label")) {
-    unsigned ID = MI->getOperand(OpNum).getImm();
-    OutStreamer.EmitLabel(GetCPISymbol(ID));
-  } else {
-    assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
-    unsigned CPI = MI->getOperand(OpNum).getIndex();
-
-    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
-
-    if (MCPE.isMachineConstantPoolEntry()) {
-      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
-    } else {
-      EmitGlobalConstant(MCPE.Val.ConstVal);
-    }
-  }
-}
-
 MCSymbol *ARMAsmPrinter::
 GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
                             const MachineBasicBlock *MBB) const {
@@ -957,126 +266,12 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
   return OutContext.GetOrCreateSymbol(Name.str());
 }
 
-void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum,
-                                        raw_ostream &O) {
-  assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!");
-
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-
-  unsigned JTI = MO1.getIndex();
-  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-  // Can't use EmitLabel until instprinter happens, label comes out in the wrong
-  // order.
-  O << "\n" << *JTISymbol << ":\n";
-
-  const char *JTEntryDirective = MAI->getData32bitsDirective();
-
-  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-  bool UseSet= MAI->hasSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
-  SmallPtrSet<MachineBasicBlock*, 8> JTSets;
-  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = JTBBs[i];
-    bool isNew = JTSets.insert(MBB);
-
-    if (UseSet && isNew) {
-      O << "\t.set\t"
-        << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB) << ','
-        << *MBB->getSymbol() << '-' << *JTISymbol << '\n';
-    }
-
-    O << JTEntryDirective << ' ';
-    if (UseSet)
-      O << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB);
-    else if (TM.getRelocationModel() == Reloc::PIC_)
-      O << *MBB->getSymbol() << '-' << *JTISymbol;
-    else
-      O << *MBB->getSymbol();
-
-    if (i != e-1)
-      O << '\n';
-  }
-}
-
-void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
-                                         raw_ostream &O) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-  unsigned JTI = MO1.getIndex();
-
-  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-
-  // Can't use EmitLabel until instprinter happens, label comes out in the wrong
-  // order.
-  O << "\n" << *JTISymbol << ":\n";
-
-  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-  bool ByteOffset = false, HalfWordOffset = false;
-  if (MI->getOpcode() == ARM::t2TBB)
-    ByteOffset = true;
-  else if (MI->getOpcode() == ARM::t2TBH)
-    HalfWordOffset = true;
-
-  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = JTBBs[i];
-    if (ByteOffset)
-      O << MAI->getData8bitsDirective();
-    else if (HalfWordOffset)
-      O << MAI->getData16bitsDirective();
-
-    if (ByteOffset || HalfWordOffset)
-      O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
-    else
-      O << "\tb.w " << *MBB->getSymbol();
-
-    if (i != e-1)
-      O << '\n';
-  }
-}
-
-void ARMAsmPrinter::printTBAddrMode(const MachineInstr *MI, int OpNum,
-                                    raw_ostream &O) {
-  O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
-  if (MI->getOpcode() == ARM::t2TBH)
-    O << ", lsl #1";
-  O << ']';
-}
-
-void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum,
-                                         raw_ostream &O) {
-  O << MI->getOperand(OpNum).getImm();
-}
-
-void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum,
-                                          raw_ostream &O) {
-  const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
-  O << '#' << FP->getValueAPF().convertToFloat();
-  if (isVerbose()) {
-    O << "\t\t" << MAI->getCommentString() << ' ';
-    WriteAsOperand(O, FP, /*PrintType=*/false);
-  }
-}
-
-void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
-                                          raw_ostream &O) {
-  const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
-  O << '#' << FP->getValueAPF().convertToDouble();
-  if (isVerbose()) {
-    O << "\t\t" << MAI->getCommentString() << ' ';
-    WriteAsOperand(O, FP, /*PrintType=*/false);
-  }
-}
 
-void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum,
-                                           raw_ostream &O) {
-  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
-  unsigned EltBits;
-  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << "#0x" << utohexstr(Val);
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+    << getFunctionNumber();
+  return OutContext.GetOrCreateSymbol(Name.str());
 }
 
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -1090,14 +285,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     default: return true;  // Unknown modifier.
     case 'a': // Print as a memory address.
       if (MI->getOperand(OpNum).isReg()) {
-        O << "[" << getRegisterName(MI->getOperand(OpNum).getReg()) << "]";
+        O << "["
+          << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+          << "]";
         return false;
       }
       // Fallthrough
     case 'c': // Don't print "#" before an immediate operand.
       if (!MI->getOperand(OpNum).isImm())
         return true;
-      printNoHashImmediate(MI, OpNum, O);
+      O << MI->getOperand(OpNum).getImm();
       return false;
     case 'P': // Print a VFP double precision register.
     case 'q': // Print a NEON quad precision register.
@@ -1106,7 +303,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     case 'Q':
     case 'R':
     case 'H':
-      report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!");
+      // These modifiers are not yet supported.
       return true;
     }
   }
@@ -1124,48 +321,10 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "[" << getRegisterName(MO.getReg()) << "]";
+  O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
   return false;
 }
 
-void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  if (EnableMCInst) {
-    printInstructionThroughMCStreamer(MI);
-    return;
-  }
-
-  if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY)
-    EmitAlignment(2);
-
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  if (MI->getOpcode() == ARM::DBG_VALUE) {
-    unsigned NOps = MI->getNumOperands();
-    assert(NOps==4);
-    OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
-    // cast away const; DIetc do not take const operands for some reason.
-    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
-    OS << V.getName();
-    OS << " <- ";
-    // Frame address.  Currently handles register +- offset only.
-    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
-    OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
-    OS << ']';
-    OS << "+";
-    printOperand(MI, NOps-2, OS);
-    OutStreamer.EmitRawText(OS.str());
-    return;
-  }
-
-  printInstruction(MI, OS);
-  OutStreamer.EmitRawText(OS.str());
-
-  // Make sure the instruction that follows TBB is 2-byte aligned.
-  // FIXME: Constant island pass should insert an "ALIGN" instruction instead.
-  if (MI->getOpcode() == ARM::t2TBB)
-    EmitAlignment(1);
-}
-
 void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     Reloc::Model RelocM = TM.getRelocationModel();
@@ -1205,49 +364,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   }
 
   // Use unified assembler syntax.
-  OutStreamer.EmitRawText(StringRef("\t.syntax unified"));
+  OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
 
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF()) {
-    // CPU Type
-    std::string CPUString = Subtarget->getCPUString();
-    if (CPUString != "generic")
-      OutStreamer.EmitRawText("\t.cpu " + Twine(CPUString));
-
-    // FIXME: Emit FPU type
-    if (Subtarget->hasVFP2())
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::VFP_arch) + ", 2");
-
-    // Signal various FP modes.
-    if (!UnsafeFPMath) {
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::ABI_FP_denormal) + ", 1");
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1");
-    }
 
-    if (NoInfsFPMath && NoNaNsFPMath)
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1");
-    else
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 3");
-
-    // 8-bytes alignment stuff.
-    OutStreamer.EmitRawText("\t.eabi_attribute " +
-                            Twine(ARMBuildAttrs::ABI_align8_needed) + ", 1");
-    OutStreamer.EmitRawText("\t.eabi_attribute " +
-                            Twine(ARMBuildAttrs::ABI_align8_preserved) + ", 1");
-
-    // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
-    if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::ABI_HardFP_use) + ", 3");
-      OutStreamer.EmitRawText("\t.eabi_attribute " +
-                              Twine(ARMBuildAttrs::ABI_VFP_args) + ", 1");
-    }
-    // FIXME: Should we signal R9 usage?
+    emitAttributes();
   }
 }
 
@@ -1280,10 +402,10 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
         else
           // Internal to current translation unit.
           //
-          // When we place the LSDA into the TEXT section, the type info pointers
-          // need to be indirect and pc-rel. We accomplish this by using NLPs.
-          // However, sometimes the types are local to the file. So we need to
-          // fill in the value for the NLP in those cases.
+          // When we place the LSDA into the TEXT section, the type info
+          // pointers need to be indirect and pc-rel. We accomplish this by
+          // using NLPs; however, sometimes the types are local to the file.
+          // We need to fill in the value for the NLP in those cases.
           OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
                                                         OutContext),
                                 4/*size*/, 0/*addrspace*/);
@@ -1321,38 +443,631 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
 }
 
 //===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+  emitARMAttributeSection();
+
+  AttributeEmitter *AttrEmitter;
+  if (OutStreamer.hasRawTextSupport())
+    AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+  else {
+    MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+    AttrEmitter = new ObjectAttributeEmitter(O);
+  }
+
+  AttrEmitter->MaybeSwitchVendor("aeabi");
+
+  std::string CPUString = Subtarget->getCPUString();
+
+  if (CPUString == "cortex-a8" ||
+      Subtarget->isCortexA8()) {
+    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+                               ARMBuildAttrs::ApplicationProfile);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::AllowThumb32);
+    // Fixme: figure out when this is emitted.
+    //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+    //                           ARMBuildAttrs::AllowWMMXv1);
+    //
+
+    /// ADD additional Else-cases here!
+  } else if (CPUString == "generic") {
+    // FIXME: Why these defaults?
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  // FIXME: Emit FPU type
+  if (Subtarget->hasVFP2())
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv2);
+
+  // Signal various FP modes.
+  if (!UnsafeFPMath) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  if (NoInfsFPMath && NoNaNsFPMath)
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+                               ARMBuildAttrs::Allowed);
+  else
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+                               ARMBuildAttrs::AllowIEE754);
+
+  // FIXME: add more flags to ARMBuildAttrs.h
+  // 8-bytes alignment stuff.
+  AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+  AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+  // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
+  if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+  }
+  // FIXME: Should we signal R9 usage?
+
+  if (Subtarget->hasDivide())
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+  AttrEmitter->Finish();
+  delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+  // <format-version>
+  // [ <section-length> "vendor-name"
+  // [ <file-tag> <size> <attribute>*
+  //   | <section-tag> <size> <section-number>* 0 <attribute>*
+  //   | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+  //   ]+
+  // ]*
+
+  if (OutStreamer.hasRawTextSupport())
+    return;
+
+  const ARMElfTargetObjectFile &TLOFELF =
+    static_cast<const ARMElfTargetObjectFile &>
+    (getObjFileLowering());
+
+  OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+  // Format version
+  OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+                             unsigned LabelId, MCContext &Ctx) {
+
+  MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+                       + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+  return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+  switch (Modifier) {
+  default: llvm_unreachable("Unknown modifier!");
+  case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+  case ARMCP::TLSGD:       return MCSymbolRefExpr::VK_ARM_TLSGD;
+  case ARMCP::TPOFF:       return MCSymbolRefExpr::VK_ARM_TPOFF;
+  case ARMCP::GOTTPOFF:    return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+  case ARMCP::GOT:         return MCSymbolRefExpr::VK_ARM_GOT;
+  case ARMCP::GOTOFF:      return MCSymbolRefExpr::VK_ARM_GOTOFF;
+  }
+  return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+  bool isIndirect = Subtarget->isTargetDarwin() &&
+    Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+  if (!isIndirect)
+    return Mang->getSymbol(GV);
+
+  // FIXME: Remove this when Darwin transition to @GOT like syntax.
+  MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+  MachineModuleInfoMachO &MMIMachO =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  MachineModuleInfoImpl::StubValueTy &StubSym =
+    GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+    MMIMachO.getGVStubEntry(MCSym);
+  if (StubSym.getPointer() == 0)
+    StubSym = MachineModuleInfoImpl::
+      StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+  return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+  ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+  MCSymbol *MCSym;
+  if (ACPV->isLSDA()) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+    OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+    MCSym = OutContext.GetOrCreateSymbol(OS.str());
+  } else if (ACPV->isBlockAddress()) {
+    MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+  } else if (ACPV->isGlobalValue()) {
+    const GlobalValue *GV = ACPV->getGV();
+    MCSym = GetARMGVSymbol(GV);
+  } else {
+    assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+    MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+  }
+
+  // Create an MCSymbol for the reference.
+  const MCExpr *Expr =
+    MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+                            OutContext);
+
+  if (ACPV->getPCAdjustment()) {
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    ACPV->getLabelId(),
+                                    OutContext);
+    const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+    PCRelExpr =
+      MCBinaryExpr::CreateAdd(PCRelExpr,
+                              MCConstantExpr::Create(ACPV->getPCAdjustment(),
+                                                     OutContext),
+                              OutContext);
+    if (ACPV->mustAddCurrentAddress()) {
+      // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+      // label, so just emit a local label end reference that instead.
+      MCSymbol *DotSym = OutContext.CreateTempSymbol();
+      OutStreamer.EmitLabel(DotSym);
+      const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+      PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+    }
+    Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+  }
+  OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  int OpNum = 1;
+  if (Opcode == ARM::BR_JTadd)
+    OpNum = 2;
+  else if (Opcode == ARM::BR_JTm)
+    OpNum = 3;
+
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+
+  // Emit a label for the jump table.
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    // Construct an MCExpr for the entry. We want a value of the form:
+    // (BasicBlockAddr - TableBeginAddr)
+    //
+    // For example, a table with entries jumping to basic blocks BB0 and BB1
+    // would look like:
+    // LJTI_0_0:
+    //    .word (LBB0 - LJTI_0_0)
+    //    .word (LBB1 - LJTI_0_0)
+    const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+                                                                   OutContext),
+                                     OutContext);
+    OutStreamer.EmitValue(Expr, 4);
+  }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+
+  // Emit a label for the jump table.
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  unsigned OffsetWidth = 4;
+  if (MI->getOpcode() == ARM::t2TBB_JT)
+    OffsetWidth = 1;
+  else if (MI->getOpcode() == ARM::t2TBH_JT)
+    OffsetWidth = 2;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+                                                      OutContext);
+    // If this isn't a TBB or TBH, the entries are direct branch instructions.
+    if (OffsetWidth == 4) {
+      MCInst BrInst;
+      BrInst.setOpcode(ARM::t2B);
+      BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+      OutStreamer.EmitInstruction(BrInst);
+      continue;
+    }
+    // Otherwise it's an offset from the dispatch instruction. Construct an
+    // MCExpr for the entry. We want a value of the form:
+    // (BasicBlockAddr - TableBeginAddr) / 2
+    //
+    // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+    // would look like:
+    // LJTI_0_0:
+    //    .byte (LBB0 - LJTI_0_0) / 2
+    //    .byte (LBB1 - LJTI_0_0) / 2
+    const MCExpr *Expr =
+      MCBinaryExpr::CreateSub(MBBSymbolExpr,
+                              MCSymbolRefExpr::Create(JTISymbol, OutContext),
+                              OutContext);
+    Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+                                   OutContext);
+    OutStreamer.EmitValue(Expr, OffsetWidth);
+  }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &OS) {
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==4);
+  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  OS << V.getName();
+  OS << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+  OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+  OS << ']';
+  OS << "+";
+  printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+                                const MCSymbol *Label,
+                                unsigned pred, unsigned ccreg,
+                                MCContext &Ctx) {
+  const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+  Inst.addOperand(MCOperand::CreateReg(Dest));
+  Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+  // Add predicate operands.
+  Inst.addOperand(MCOperand::CreateImm(pred));
+  Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+                                           unsigned Opcode) {
+  MCInst TmpInst;
 
-void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
-  ARMMCInstLower MCInstLowering(OutContext, *Mang, *this);
-  switch (MI->getOpcode()) {
-  case ARM::t2MOVi32imm:
-    assert(0 && "Should be lowered by thumb2it pass");
+  // Emit the instruction as usual, just patch the opcode.
+  LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+  TmpInst.setOpcode(Opcode);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
   default: break;
-  case ARM::PICADD: { // FIXME: Remove asm string from td file.
+  case ARM::t2ADDrSPi:
+  case ARM::t2ADDrSPi12:
+  case ARM::t2SUBrSPi:
+  case ARM::t2SUBrSPi12:
+    assert ((MI->getOperand(1).getReg() == ARM::SP) &&
+            "Unexpected source register!");
+    break;
+
+  case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+  case ARM::DBG_VALUE: {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      SmallString<128> TmpStr;
+      raw_svector_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+  }
+  case ARM::tBfar: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tBL);
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+          MI->getOperand(0).getMBB()->getSymbol(), OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::LEApcrel:
+  case ARM::tLEApcrel:
+  case ARM::t2LEApcrel: {
+    // FIXME: Need to also handle globals and externals
+    MCInst TmpInst;
+    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+                      : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+                         : ARM::ADR));
+    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+                        GetCPISymbol(MI->getOperand(1).getIndex()),
+                        MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+                        OutContext);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::LEApcrelJT:
+  case ARM::tLEApcrelJT:
+  case ARM::t2LEApcrelJT: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+                      : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+                         : ARM::ADR));
+    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+                      GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+                                                  MI->getOperand(2).getImm()),
+                      MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+                      OutContext);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVPCRX: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::MOVr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::BXr9_CALL:
+  case ARM::BX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::BX);
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::BMOVPCRXr9_CALL:
+  case ARM::BMOVPCRX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::MOVi16_ga_pcrel:
+  case ARM::t2MOVi16_ga_pcrel: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+    unsigned TF = MI->getOperand(1).getTargetFlags();
+    bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+    const GlobalValue *GV = MI->getOperand(1).getGlobal();
+    MCSymbol *GVSym = GetARMGVSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    if (isPIC) {
+      MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(2).getImm(), OutContext);
+      const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+      unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+      const MCExpr *PCRelExpr =
+        ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+                                  MCBinaryExpr::CreateAdd(LabelSymExpr,
+                                      MCConstantExpr::Create(PCAdj, OutContext),
+                                          OutContext), OutContext), OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+    } else {
+      const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+    }
+
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVTi16_ga_pcrel:
+  case ARM::t2MOVTi16_ga_pcrel: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+                      ? ARM::MOVTi16 : ARM::t2MOVTi16);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+    unsigned TF = MI->getOperand(2).getTargetFlags();
+    bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+    const GlobalValue *GV = MI->getOperand(2).getGlobal();
+    MCSymbol *GVSym = GetARMGVSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    if (isPIC) {
+      MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(3).getImm(), OutContext);
+      const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+      unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+      const MCExpr *PCRelExpr =
+        ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+                                   MCBinaryExpr::CreateAdd(LabelSymExpr,
+                                      MCConstantExpr::Create(PCAdj, OutContext),
+                                          OutContext), OutContext), OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+    } else {
+      const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+    }
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::tPICADD: {
     // This is a pseudo op for a label + instruction sequence, which looks like:
     // LPC0:
-    //     add r0, pc, r0
+    //     add r0, pc
     // This adds the address of LPC0 to r0.
 
     // Emit the label.
-    // FIXME: MOVE TO SHARED PLACE.
-    unsigned Id = (unsigned)MI->getOperand(2).getImm();
-    const char *Prefix = MAI->getPrivateGlobalPrefix();
-    MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
-                         + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
-    OutStreamer.EmitLabel(Label);
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
 
+    // Form and emit the add.
+    MCInst AddInst;
+    AddInst.setOpcode(ARM::tADDhirr);
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    // Add predicate operands.
+    AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    AddInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(AddInst);
+    return;
+  }
+  case ARM::PICADD: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     add r0, pc, r0
+    // This adds the address of LPC0 to r0.
 
-    // Form and emit tha dd.
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the add.
     MCInst AddInst;
     AddInst.setOpcode(ARM::ADDrr);
     AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
     AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    // Add predicate operands.
+    AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+    // Add 's' bit operand (always reg0 for this)
+    AddInst.addOperand(MCOperand::CreateReg(0));
     OutStreamer.EmitInstruction(AddInst);
     return;
   }
-  case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file.
+  case ARM::PICSTR:
+  case ARM::PICSTRB:
+  case ARM::PICSTRH:
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICLDRH:
+  case ARM::PICLDRSB:
+  case ARM::PICLDRSH: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     OP r0, [pc, r0]
+    // The LCP0 label is referenced by a constant pool entry in order to get
+    // a PC-relative address at the ldr instruction.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the load
+    unsigned Opcode;
+    switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode!");
+    case ARM::PICSTR:   Opcode = ARM::STRrs; break;
+    case ARM::PICSTRB:  Opcode = ARM::STRBrs; break;
+    case ARM::PICSTRH:  Opcode = ARM::STRH; break;
+    case ARM::PICLDR:   Opcode = ARM::LDRrs; break;
+    case ARM::PICLDRB:  Opcode = ARM::LDRBrs; break;
+    case ARM::PICLDRH:  Opcode = ARM::LDRH; break;
+    case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+    case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+    }
+    MCInst LdStInst;
+    LdStInst.setOpcode(Opcode);
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    LdStInst.addOperand(MCOperand::CreateImm(0));
+    // Add predicate operands.
+    LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+    OutStreamer.EmitInstruction(LdStInst);
+
+    return;
+  }
+  case ARM::CONSTPOOL_ENTRY: {
     /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
     /// in the function.  The first operand is the ID# for this instruction, the
     /// second is the index into the MachineConstantPool that this is, the third
@@ -1371,100 +1086,450 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
 
     return;
   }
-  case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file.
-    // This is a hack that lowers as a two instruction sequence.
-    unsigned DstReg = MI->getOperand(0).getReg();
-    unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
+  case ARM::t2BR_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tMOVgpr2gpr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    return;
+  }
+  case ARM::t2TBB_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+
+    TmpInst.setOpcode(ARM::t2TBB);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    // Make sure the next instruction is 2-byte aligned.
+    EmitAlignment(1);
+    return;
+  }
+  case ARM::t2TBH_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+
+    TmpInst.setOpcode(ARM::t2TBH);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    return;
+  }
+  case ARM::tBR_JTr:
+  case ARM::BR_JTr: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // mov pc, target
+    MCInst TmpInst;
+    unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+      ARM::MOVr : ARM::tMOVgpr2gpr;
+    TmpInst.setOpcode(Opc);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    if (Opc == ARM::MOVr)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Make sure the Thumb jump table is 4-byte aligned.
+    if (Opc == ARM::tMOVgpr2gpr)
+      EmitAlignment(2);
 
-    unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
-    unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::BR_JTm: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // ldr pc, target
+    MCInst TmpInst;
+    if (MI->getOperand(1).getReg() == 0) {
+      // literal offset
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+    } else {
+      TmpInst.setOpcode(ARM::LDRrs);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+    }
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
 
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::BR_JTadd: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // add pc, target, idx
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::ADDrr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::TRAP: {
+    // Non-Darwin binutils don't yet support the "trap" mnemonic.
+    // FIXME: Remove this special case when they do.
+    if (!Subtarget->isTargetDarwin()) {
+      //.long 0xe7ffdefe @ trap
+      uint32_t Val = 0xe7ffdefeUL;
+      OutStreamer.AddComment("trap");
+      OutStreamer.EmitIntValue(Val, 4);
+      return;
+    }
+    break;
+  }
+  case ARM::tTRAP: {
+    // Non-Darwin binutils don't yet support the "trap" mnemonic.
+    // FIXME: Remove this special case when they do.
+    if (!Subtarget->isTargetDarwin()) {
+      //.short 57086 @ trap
+      uint16_t Val = 0xdefe;
+      OutStreamer.AddComment("trap");
+      OutStreamer.EmitIntValue(Val, 2);
+      return;
+    }
+    break;
+  }
+  case ARM::t2Int_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp_nofp:
+  case ARM::tInt_eh_sjlj_setjmp: {
+    // Two incoming args: GPR:$src, GPR:$val
+    // mov $val, pc
+    // adds $val, #7
+    // str $val, [$src, #4]
+    // movs r0, #0
+    // b 1f
+    // movs r0, #1
+    // 1:
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ValReg = MI->getOperand(1).getReg();
+    MCSymbol *Label = GetARMSJLJEHLabel();
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVi);
-      TmpInst.addOperand(MCOperand::CreateReg(DstReg));
-      TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1));
-
+      TmpInst.setOpcode(ARM::tMOVgpr2tgpr);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // 's' bit operand
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      OutStreamer.AddComment("eh_setjmp begin");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tADDi3);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      // 's' bit operand
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateImm(7));
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
-      TmpInst.addOperand(MCOperand::CreateReg(0));          // cc_out
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.EmitInstruction(TmpInst);
     }
-
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::ORRri);
-      TmpInst.addOperand(MCOperand::CreateReg(DstReg));     // dstreg
-      TmpInst.addOperand(MCOperand::CreateReg(DstReg));     // inreg
-      TmpInst.addOperand(MCOperand::CreateImm(SOImmValV2)); // so_imm
+      TmpInst.setOpcode(ARM::tSTRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      // The offset immediate is #4. The operand value is scaled by 4 for the
+      // tSTR instruction.
+      TmpInst.addOperand(MCOperand::CreateImm(1));
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVi8);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tB);
+      TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVi8);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp end");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    OutStreamer.EmitLabel(Label);
+    return;
+  }
+
+  case ARM::Int_eh_sjlj_setjmp_nofp:
+  case ARM::Int_eh_sjlj_setjmp: {
+    // Two incoming args: GPR:$src, GPR:$val
+    // add $val, pc, #8
+    // str $val, [$src, #+4]
+    // mov r0, #0
+    // add pc, pc, #0
+    // mov r0, #1
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ValReg = MI->getOperand(1).getReg();
 
-      TmpInst.addOperand(MCOperand::CreateReg(0));          // cc_out
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::ADDri);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateImm(8));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp begin");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::STRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(4));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVi);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::ADDri);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVi);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp end");
       OutStreamer.EmitInstruction(TmpInst);
     }
     return;
   }
-  case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
-    // This is a hack that lowers as a two instruction sequence.
-    unsigned DstReg = MI->getOperand(0).getReg();
-    const MachineOperand &MO = MI->getOperand(1);
-    MCOperand V1, V2;
-    if (MO.isImm()) {
-      unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
-      V1 = MCOperand::CreateImm(ImmVal & 65535);
-      V2 = MCOperand::CreateImm(ImmVal >> 16);
-    } else if (MO.isGlobal()) {
-      MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
-      const MCSymbolRefExpr *SymRef1 =
-        MCSymbolRefExpr::Create(Symbol,
-                                MCSymbolRefExpr::VK_ARM_LO16, OutContext);
-      const MCSymbolRefExpr *SymRef2 =
-        MCSymbolRefExpr::Create(Symbol,
-                                MCSymbolRefExpr::VK_ARM_HI16, OutContext);
-      V1 = MCOperand::CreateExpr(SymRef1);
-      V2 = MCOperand::CreateExpr(SymRef2);
-    } else {
-      MI->dump();
-      llvm_unreachable("cannot handle this operand");
+  case ARM::Int_eh_sjlj_longjmp: {
+    // ldr sp, [$src, #8]
+    // ldr $scratch, [$src, #4]
+    // ldr r7, [$src]
+    // bx $scratch
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(8));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
     }
-
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVi16);
-      TmpInst.addOperand(MCOperand::CreateReg(DstReg));         // dstreg
-      TmpInst.addOperand(V1); // lower16(imm)
-
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(4));
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.EmitInstruction(TmpInst);
     }
-
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVTi16);
-      TmpInst.addOperand(MCOperand::CreateReg(DstReg));         // dstreg
-      TmpInst.addOperand(MCOperand::CreateReg(DstReg));         // srcreg
-      TmpInst.addOperand(V2);   // upper16(imm)
-
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::BX);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.EmitInstruction(TmpInst);
     }
-
     return;
   }
+  case ARM::tInt_eh_sjlj_longjmp: {
+    // ldr $scratch, [$src, #8]
+    // mov sp, $scratch
+    // ldr $scratch, [$src, #4]
+    // ldr r7, [$src]
+    // bx $scratch
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      // The offset immediate is #8. The operand value is scaled by 4 for the
+      // tLDR instruction.
+      TmpInst.addOperand(MCOperand::CreateImm(2));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVtgpr2gpr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tBX_RET_vararg);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  // These are the pseudos created to comply with stricter operand restrictions
+  // on ARMv5. Lower them now to "normal" instructions, since all the
+  // restrictions are already satisfied.
+  case ARM::MULv5:
+    EmitPatchedInstruction(MI, ARM::MUL);
+    return;
+  case ARM::MLAv5:
+    EmitPatchedInstruction(MI, ARM::MLA);
+    return;
+  case ARM::SMULLv5:
+    EmitPatchedInstruction(MI, ARM::SMULL);
+    return;
+  case ARM::UMULLv5:
+    EmitPatchedInstruction(MI, ARM::UMULL);
+    return;
+  case ARM::SMLALv5:
+    EmitPatchedInstruction(MI, ARM::SMLAL);
+    return;
+  case ARM::UMLALv5:
+    EmitPatchedInstruction(MI, ARM::UMLAL);
+    return;
+  case ARM::UMAALv5:
+    EmitPatchedInstruction(MI, ARM::UMAAL);
+    return;
   }
 
   MCInst TmpInst;
-  MCInstLowering.Lower(MI, TmpInst);
+  LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
   OutStreamer.EmitInstruction(TmpInst);
 }
 
@@ -1476,7 +1541,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(MAI, false);
+    return new ARMInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 000000000000..585268442ce4
--- /dev/null
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,112 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+namespace ARM {
+  enum DW_ISA {
+    DW_ISA_ARM_thumb = 1,
+    DW_ISA_ARM_arm = 2
+  };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when printing asm code for different targets.
+  const ARMSubtarget *Subtarget;
+
+  /// AFI - Keep a pointer to ARMFunctionInfo for the current
+  /// MachineFunction.
+  ARMFunctionInfo *AFI;
+
+  /// MCP - Keep a pointer to constantpool entries of the current
+  /// MachineFunction.
+  const MachineConstantPool *MCP;
+
+public:
+  explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    }
+
+  virtual const char *getPassName() const {
+    return "ARM Assembly Printer";
+  }
+
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = 0);
+
+  virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+  virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                                     unsigned AsmVariant,
+                                     const char *ExtraCode, raw_ostream &O);
+
+  void EmitJumpTable(const MachineInstr *MI);
+  void EmitJump2Table(const MachineInstr *MI);
+  virtual void EmitInstruction(const MachineInstr *MI);
+  bool runOnMachineFunction(MachineFunction &F);
+
+  virtual void EmitConstantPool() {} // we emit constant pools customly!
+  virtual void EmitFunctionEntryLabel();
+  void EmitStartOfAsmFile(Module &M);
+  void EmitEndOfAsmFile(Module &M);
+
+private:
+  // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+  void emitAttributes();
+
+  // Helper for ELF .o only
+  void emitARMAttributeSection();
+
+  // Generic helper used to emit e.g. ARMv5 mul pseudos
+  void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+public:
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+  virtual unsigned getISAEncoding() {
+    // ARM/Darwin adds ISA to the DWARF info for each function.
+    if (!Subtarget->isTargetDarwin())
+      return 0;
+    return Subtarget->isThumb() ?
+      llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+  }
+
+  MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                                        const MachineBasicBlock *MBB) const;
+  MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+  MCSymbol *GetARMSJLJEHLabel(void) const;
+
+  MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+  
+  /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+  /// the .s file.
+  virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h
new file mode 100644
index 000000000000..a56cc1a9f249
--- /dev/null
+++ b/lib/Target/ARM/ARMBaseInfo.h
@@ -0,0 +1,249 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+
+// Note that the following auto-generated files only defined enum types, and
+// so are safe to include here.
+
+// Defines symbolic names for ARM registers.  This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
+  enum CondCodes { // Meaning (integer)          Meaning (floating-point)
+    EQ,            // Equal                      Equal
+    NE,            // Not equal                  Not equal, or unordered
+    HS,            // Carry set                  >, ==, or unordered
+    LO,            // Carry clear                Less than
+    MI,            // Minus, negative            Less than
+    PL,            // Plus, positive or zero     >, ==, or unordered
+    VS,            // Overflow                   Unordered
+    VC,            // No overflow                Not unordered
+    HI,            // Unsigned higher            Greater than, or unordered
+    LS,            // Unsigned lower or same     Less than or equal
+    GE,            // Greater than or equal      Greater than or equal
+    LT,            // Less than                  Less than, or unordered
+    GT,            // Greater than               Greater than
+    LE,            // Less than or equal         <, ==, or unordered
+    AL             // Always (unconditional)     Always (unconditional)
+  };
+
+  inline static CondCodes getOppositeCondition(CondCodes CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case EQ: return NE;
+    case NE: return EQ;
+    case HS: return LO;
+    case LO: return HS;
+    case MI: return PL;
+    case PL: return MI;
+    case VS: return VC;
+    case VC: return VS;
+    case HI: return LS;
+    case LS: return HI;
+    case GE: return LT;
+    case LT: return GE;
+    case GT: return LE;
+    case LE: return GT;
+    }
+  }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case ARMCC::EQ:  return "eq";
+  case ARMCC::NE:  return "ne";
+  case ARMCC::HS:  return "hs";
+  case ARMCC::LO:  return "lo";
+  case ARMCC::MI:  return "mi";
+  case ARMCC::PL:  return "pl";
+  case ARMCC::VS:  return "vs";
+  case ARMCC::VC:  return "vc";
+  case ARMCC::HI:  return "hi";
+  case ARMCC::LS:  return "ls";
+  case ARMCC::GE:  return "ge";
+  case ARMCC::LT:  return "lt";
+  case ARMCC::GT:  return "gt";
+  case ARMCC::LE:  return "le";
+  case ARMCC::AL:  return "al";
+  }
+}
+
+namespace ARM_PROC {
+  enum IMod {
+    IE = 2,
+    ID = 3
+  };
+
+  enum IFlags {
+    F = 1,
+    I = 2,
+    A = 4
+  };
+
+  inline static const char *IFlagsToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown iflags operand");
+    case F: return "f";
+    case I: return "i";
+    case A: return "a";
+    }
+  }
+
+  inline static const char *IModToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown imod operand");
+    case IE: return "ie";
+    case ID: return "id";
+    }
+  }
+}
+
+namespace ARM_MB {
+  // The Memory Barrier Option constants map directly to the 4-bit encoding of
+  // the option field for memory barrier operations.
+  enum MemBOpt {
+    SY    = 15,
+    ST    = 14,
+    ISH   = 11,
+    ISHST = 10,
+    NSH   = 7,
+    NSHST = 6,
+    OSH   = 3,
+    OSHST = 2
+  };
+
+  inline static const char *MemBOptToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown memory operation");
+    case SY:    return "sy";
+    case ST:    return "st";
+    case ISH:   return "ish";
+    case ISHST: return "ishst";
+    case NSH:   return "nsh";
+    case NSHST: return "nshst";
+    case OSH:   return "osh";
+    case OSHST: return "oshst";
+    }
+  }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  default:
+    llvm_unreachable("Unknown ARM register!");
+  case R0:  case S0:  case D0:  case Q0:  return 0;
+  case R1:  case S1:  case D1:  case Q1:  return 1;
+  case R2:  case S2:  case D2:  case Q2:  return 2;
+  case R3:  case S3:  case D3:  case Q3:  return 3;
+  case R4:  case S4:  case D4:  case Q4:  return 4;
+  case R5:  case S5:  case D5:  case Q5:  return 5;
+  case R6:  case S6:  case D6:  case Q6:  return 6;
+  case R7:  case S7:  case D7:  case Q7:  return 7;
+  case R8:  case S8:  case D8:  case Q8:  return 8;
+  case R9:  case S9:  case D9:  case Q9:  return 9;
+  case R10: case S10: case D10: case Q10: return 10;
+  case R11: case S11: case D11: case Q11: return 11;
+  case R12: case S12: case D12: case Q12: return 12;
+  case SP:  case S13: case D13: case Q13: return 13;
+  case LR:  case S14: case D14: case Q14: return 14;
+  case PC:  case S15: case D15: case Q15: return 15;
+
+  case S16: case D16: return 16;
+  case S17: case D17: return 17;
+  case S18: case D18: return 18;
+  case S19: case D19: return 19;
+  case S20: case D20: return 20;
+  case S21: case D21: return 21;
+  case S22: case D22: return 22;
+  case S23: case D23: return 23;
+  case S24: case D24: return 24;
+  case S25: case D25: return 25;
+  case S26: case D26: return 26;
+  case S27: case D27: return 27;
+  case S28: case D28: return 28;
+  case S29: case D29: return 29;
+  case S30: case D30: return 30;
+  case S31: case D31: return 31;
+  }
+}
+
+namespace ARMII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // ARM Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_LO16 - On a symbol operand, this represents a relocation containing
+    /// lower 16 bit of the address. Used only via movw instruction.
+    MO_LO16,
+
+    /// MO_HI16 - On a symbol operand, this represents a relocation containing
+    /// higher 16 bit of the address. Used only via movt instruction.
+    MO_HI16,
+
+    /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+    /// i.e. "FOO$non_lazy_ptr".
+    /// Used only via movw instruction.
+    MO_LO16_NONLAZY,
+
+    /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+    /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+    MO_HI16_NONLAZY,
+
+    /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the PC relative address of the
+    /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+    /// Used only via movw instruction.
+    MO_LO16_NONLAZY_PIC,
+
+    /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the PC relative address of the
+    /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+    /// Used only via movt instruction.
+    MO_HI16_NONLAZY_PIC,
+
+    /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+    /// call operand.
+    MO_PLT
+  };
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e4f10f93fb74..2268e59ea7b1 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -15,13 +15,13 @@
 #include "ARM.h"
 #include "ARMAddressingModes.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
 #include "ARMGenInstrInfo.inc"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -34,15 +34,75 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 static cl::opt<bool>
 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                cl::desc("Enable ARM 2-addr to 3-addr conv"));
 
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+  unsigned MLxOpc;     // MLA / MLS opcode
+  unsigned MulOpc;     // Expanded multiplication opcode
+  unsigned AddSubOpc;  // Expanded add / sub opcode
+  bool NegAcc;         // True if the acc is negated before the add / sub.
+  bool HasLane;        // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
+  // fp scalar ops
+  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
+  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
+  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
+  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
+  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
+  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
+  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
+  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
+
+  // fp SIMD ops
+  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
+  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
+  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
+  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
+  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
+  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
+  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
+  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
+};
+
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
   : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
     Subtarget(STI) {
+  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+      assert(false && "Duplicated entries?");
+    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+  }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  if (usePreRAHazardRecognizer()) {
+    const InstrItineraryData *II = TM->getInstrItineraryData();
+    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+  }
+  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+    return (ScheduleHazardRecognizer *)
+      new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
 
 MachineInstr *
@@ -140,7 +200,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     if (isLoad)
       MemMI = BuildMI(MF, MI->getDebugLoc(),
                       get(MemOpc), MI->getOperand(0).getReg())
-        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+        .addReg(WBReg).addImm(0).addImm(Pred);
     else
       MemMI = BuildMI(MF, MI->getDebugLoc(),
                       get(MemOpc)).addReg(MI->getOperand(1).getReg())
@@ -151,7 +211,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     if (isLoad)
       MemMI = BuildMI(MF, MI->getDebugLoc(),
                       get(MemOpc), MI->getOperand(0).getReg())
-        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+        .addReg(BaseReg).addImm(0).addImm(Pred);
     else
       MemMI = BuildMI(MF, MI->getDebugLoc(),
                       get(MemOpc)).addReg(MI->getOperand(1).getReg())
@@ -166,8 +226,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   if (LV) {
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.getReg() &&
-          TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
         unsigned Reg = MO.getReg();
 
         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
@@ -197,43 +256,6 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   return NewMIs[0];
 }
 
-bool
-ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                        const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    bool isKill = true;
-
-    // Add the callee-saved register as live-in unless it's LR and
-    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
-    // then it's already added to the function and entry block live-in sets.
-    if (Reg == ARM::LR) {
-      MachineFunction &MF = *MBB.getParent();
-      if (MF.getFrameInfo()->isReturnAddressTaken() &&
-          MF.getRegInfo().isLiveIn(Reg))
-        isKill = false;
-    }
-
-    if (isKill)
-      MBB.addLiveIn(Reg);
-
-    // Insert the spill to the stack frame. The register is killed at the spill
-    // 
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    storeRegToStackSlot(MBB, MI, Reg, isKill,
-                        CSI[i].getFrameIdx(), RC, TRI);
-  }
-  return true;
-}
-
 // Branch analysis.
 bool
 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
@@ -275,13 +297,31 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
 
   // Get the instruction before it if it is a terminator.
   MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+    while (isUncondBranchOpcode(SecondLastOpc)) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
 
   // If there are three terminators, we don't know what sort of block this is.
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
 
   // If the block ends with a B and a Bcc, handle it.
-  unsigned SecondLastOpc = SecondLastInst->getOpcode();
   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
     TBB =  SecondLastInst->getOperand(0).getMBB();
     Cond.push_back(SecondLastInst->getOperand(1));
@@ -468,7 +508,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
 }
 
 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
-DISABLE_INLINE
+LLVM_ATTRIBUTE_NOINLINE
 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
                                 unsigned JTI);
 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
@@ -513,6 +553,14 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case ARMII::Size2Bytes: return 2;          // Thumb1 instruction.
   case ARMII::SizeSpecial: {
     switch (Opc) {
+    case ARM::MOVi16_ga_pcrel:
+    case ARM::MOVTi16_ga_pcrel:
+    case ARM::t2MOVi16_ga_pcrel:
+    case ARM::t2MOVTi16_ga_pcrel:
+      return 4;
+    case ARM::MOVi32imm:
+    case ARM::t2MOVi32imm:
+      return 8;
     case ARM::CONSTPOOL_ENTRY:
       // If this machine instr is a constant pool entry, its size is recorded as
       // operand #2.
@@ -533,13 +581,13 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case ARM::BR_JTadd:
     case ARM::tBR_JTr:
     case ARM::t2BR_JT:
-    case ARM::t2TBB:
-    case ARM::t2TBH: {
+    case ARM::t2TBB_JT:
+    case ARM::t2TBH_JT: {
       // These are jumptable branches, i.e. a branch followed by an inlined
       // jumptable. The size is 4 + 4 * number of entries. For TBB, each
       // entry is one byte; TBH two byte each.
-      unsigned EntrySize = (Opc == ARM::t2TBB)
-        ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4);
+      unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+        ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
       unsigned NumOps = TID.getNumOperands();
       MachineOperand JTOP =
         MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
@@ -557,7 +605,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // alignment issue.
       unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
       unsigned NumEntries = getNumJTEntries(JT, JTI);
-      if (Opc == ARM::t2TBB && (NumEntries & 1))
+      if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
         // Make sure the instruction that follows TBB is 2-byte aligned.
         // FIXME: Constant island pass should insert an "ALIGN" instruction
         // instead.
@@ -573,84 +621,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   return 0; // Not reached
 }
 
-unsigned
-ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::LDR:
-  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::t2LDRi12:
-  case ARM::tRestore:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::VLDRD:
-  case ARM::VLDRS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-
-  return 0;
-}
-
-unsigned
-ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                     int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::STR:
-  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::t2STRi12:
-  case ARM::tSpill:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::VSTRD:
-  case ARM::VSTRS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-
-  return 0;
-}
-
 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I, DebugLoc DL,
                                    unsigned DestReg, unsigned SrcReg,
@@ -715,8 +685,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   unsigned Align = MFI.getObjectAlignment(FI);
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                            MachineMemOperand::MOStore, 0,
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                         PseudoSourceValue::getFixedStack(FI)),
+                            MachineMemOperand::MOStore,
                             MFI.getObjectSize(FI),
                             Align);
 
@@ -728,9 +699,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   switch (RC->getID()) {
   case ARM::GPRRegClassID:
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
                    .addReg(SrcReg, getKillRegState(isKill))
-                   .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     break;
   case ARM::SPRRegClassID:
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
@@ -747,17 +718,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   case ARM::QPRRegClassID:
   case ARM::QPR_VFP2RegClassID:
   case ARM::QPR_8RegClassID:
-    // FIXME: Neon instructions should support predicates
-    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
+    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
                      .addFrameIndex(FI).addImm(16)
                      .addReg(SrcReg, getKillRegState(isKill))
                      .addMemOperand(MMO));
     } else {
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
                      .addReg(SrcReg, getKillRegState(isKill))
                      .addFrameIndex(FI)
-                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
                      .addMemOperand(MMO));
     }
     break;
@@ -766,18 +735,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
       // FIXME: It's possible to only store part of the QQ register if the
       // spilled def has a sub-register index.
-      MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q))
-        .addFrameIndex(FI).addImm(16);
-      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
-      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
-      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
-      MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
-      AddDefaultPred(MIB.addMemOperand(MMO));
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+                     .addFrameIndex(FI).addImm(16)
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
     } else {
       MachineInstrBuilder MIB =
-        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
-                       .addFrameIndex(FI)
-                       .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                       .addFrameIndex(FI))
         .addMemOperand(MMO);
       MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
       MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -787,9 +752,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     break;
   case ARM::QQQQPRRegClassID: {
     MachineInstrBuilder MIB =
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
-                     .addFrameIndex(FI)
-                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                     .addFrameIndex(FI))
       .addMemOperand(MMO);
     MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
     MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -806,6 +770,53 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                     int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STRrs:
+  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::STRi12:
+  case ARM::t2STRi12:
+  case ARM::tSpill:
+  case ARM::VSTRD:
+  case ARM::VSTRS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VST1q64Pseudo:
+    if (MI->getOperand(0).isFI() &&
+        MI->getOperand(2).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+    break;
+  case ARM::VSTMQIA:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
 void ARMBaseInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
@@ -817,8 +828,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                            MachineMemOperand::MOLoad, 0,
+    MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                            MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FI),
                             Align);
 
@@ -830,8 +842,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   switch (RC->getID()) {
   case ARM::GPRRegClassID:
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
-                   .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     break;
   case ARM::SPRRegClassID:
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
@@ -846,31 +858,26 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   case ARM::QPRRegClassID:
   case ARM::QPR_VFP2RegClassID:
   case ARM::QPR_8RegClassID:
-    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
+    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
                      .addFrameIndex(FI).addImm(16)
                      .addMemOperand(MMO));
     } else {
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
                      .addFrameIndex(FI)
-                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
                      .addMemOperand(MMO));
     }
     break;
   case ARM::QQPRRegClassID:
   case ARM::QQPR_VFP2RegClassID:
     if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
-      MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q));
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
-      AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO));
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+                     .addFrameIndex(FI).addImm(16)
+                     .addMemOperand(MMO));
     } else {
       MachineInstrBuilder MIB =
-        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
-                       .addFrameIndex(FI)
-                       .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                       .addFrameIndex(FI))
         .addMemOperand(MMO);
       MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
       MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -880,9 +887,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     break;
   case ARM::QQQQPRRegClassID: {
     MachineInstrBuilder MIB =
-      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
-                     .addFrameIndex(FI)
-                     .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                     .addFrameIndex(FI))
       .addMemOperand(MMO);
     MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
     MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -899,6 +905,53 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDRrs:
+  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::LDRi12:
+  case ARM::t2LDRi12:
+  case ARM::tRestore:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VLD1q64Pseudo:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VLDMQIA:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
 MachineInstr*
 ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
                                            int FrameIx, uint64_t Offset,
@@ -921,7 +974,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
   ARMConstantPoolValue *ACPV =
     static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
 
-  unsigned PCLabelId = AFI->createConstPoolEntryUId();
+  unsigned PCLabelId = AFI->createPICLabelUId();
   ARMConstantPoolValue *NewCPV = 0;
   // FIXME: The below assumes PIC relocation model and that the function
   // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
@@ -991,12 +1044,18 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
 }
 
 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
-                                        const MachineInstr *MI1) const {
+                                        const MachineInstr *MI1,
+                                        const MachineRegisterInfo *MRI) const {
   int Opcode = MI0->getOpcode();
   if (Opcode == ARM::t2LDRpci ||
       Opcode == ARM::t2LDRpci_pic ||
       Opcode == ARM::tLDRpci ||
-      Opcode == ARM::tLDRpci_pic) {
+      Opcode == ARM::tLDRpci_pic ||
+      Opcode == ARM::MOV_ga_dyn ||
+      Opcode == ARM::MOV_ga_pcrel ||
+      Opcode == ARM::MOV_ga_pcrel_ldr ||
+      Opcode == ARM::t2MOV_ga_dyn ||
+      Opcode == ARM::t2MOV_ga_pcrel) {
     if (MI1->getOpcode() != Opcode)
       return false;
     if (MI0->getNumOperands() != MI1->getNumOperands())
@@ -1007,6 +1066,14 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
     if (MO0.getOffset() != MO1.getOffset())
       return false;
 
+    if (Opcode == ARM::MOV_ga_dyn ||
+        Opcode == ARM::MOV_ga_pcrel ||
+        Opcode == ARM::MOV_ga_pcrel_ldr ||
+        Opcode == ARM::t2MOV_ga_dyn ||
+        Opcode == ARM::t2MOV_ga_pcrel)
+      // Ignore the PC labels.
+      return MO0.getGlobal() == MO1.getGlobal();
+
     const MachineFunction *MF = MI0->getParent()->getParent();
     const MachineConstantPool *MCP = MF->getConstantPool();
     int CPI0 = MO0.getIndex();
@@ -1018,6 +1085,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
     ARMConstantPoolValue *ACPV1 =
       static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
     return ACPV0->hasSameValue(ACPV1);
+  } else if (Opcode == ARM::PICLDR) {
+    if (MI1->getOpcode() != Opcode)
+      return false;
+    if (MI0->getNumOperands() != MI1->getNumOperands())
+      return false;
+
+    unsigned Addr0 = MI0->getOperand(1).getReg();
+    unsigned Addr1 = MI1->getOperand(1).getReg();
+    if (Addr0 != Addr1) {
+      if (!MRI ||
+          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+          !TargetRegisterInfo::isVirtualRegister(Addr1))
+        return false;
+
+      // This assumes SSA form.
+      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+      // Check if the loaded value, e.g. a constantpool of a global address, are
+      // the same.
+      if (!produceSameValue(Def0, Def1, MRI))
+        return false;
+    }
+
+    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+      const MachineOperand &MO0 = MI0->getOperand(i);
+      const MachineOperand &MO1 = MI1->getOperand(i);
+      if (!MO0.isIdenticalTo(MO1))
+        return false;
+    }
+    return true;
   }
 
   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
@@ -1040,8 +1138,8 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   switch (Load1->getMachineOpcode()) {
   default:
     return false;
-  case ARM::LDR:
-  case ARM::LDRB:
+  case ARM::LDRi12:
+  case ARM::LDRBi12:
   case ARM::LDRD:
   case ARM::LDRH:
   case ARM::LDRSB:
@@ -1059,8 +1157,8 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   switch (Load2->getMachineOpcode()) {
   default:
     return false;
-  case ARM::LDR:
-  case ARM::LDRB:
+  case ARM::LDRi12:
+  case ARM::LDRBi12:
   case ARM::LDRD:
   case ARM::LDRH:
   case ARM::LDRSB:
@@ -1164,22 +1262,37 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   return false;
 }
 
-bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const {
-  if (!NumInstrs)
+bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                           unsigned NumCyles,
+                                           unsigned ExtraPredCycles,
+                                           float Probability,
+                                           float Confidence) const {
+  if (!NumCyles)
     return false;
-  if (Subtarget.getCPUString() == "generic")
-    // Generic (and overly aggressive) if-conversion limits for testing.
-    return NumInstrs <= 10;
-  else if (Subtarget.hasV7Ops())
-    return NumInstrs <= 3;
-  return NumInstrs <= 2;
+
+  // Attempt to estimate the relative costs of predication versus branching.
+  float UnpredCost = Probability * NumCyles;
+  UnpredCost += 1.0; // The branch itself
+  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+  return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
 }
-  
+
 bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
-                    MachineBasicBlock &FMBB, unsigned NumF) const {
-  return NumT && NumF && NumT <= 2 && NumF <= 2;
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                    unsigned TCycles, unsigned TExtra,
+                    MachineBasicBlock &FMBB,
+                    unsigned FCycles, unsigned FExtra,
+                    float Probability, float Confidence) const {
+  if (!TCycles || !FCycles)
+    return false;
+
+  // Attempt to estimate the relative costs of predication versus branching.
+  float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
+  UnpredCost += 1.0; // The branch itself
+  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+  return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -1292,6 +1405,12 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     unsigned NumBits = 0;
     unsigned Scale = 1;
     switch (AddrMode) {
+    case ARMII::AddrMode_i12: {
+      ImmIdx = FrameRegIdx + 1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = 12;
+      break;
+    }
     case ARMII::AddrMode2: {
       ImmIdx = FrameRegIdx+2;
       InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
@@ -1342,8 +1461,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
       if ((unsigned)Offset <= Mask * Scale) {
         // Replace the FrameIndex with sp
         MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-        if (isSub)
-          ImmedOffset |= 1 << NumBits;
+        // FIXME: When addrmode2 goes away, this will simplify (like the
+        // T2 version), as the LDR.i12 versions don't need the encoding
+        // tricks for the offset value.
+        if (isSub) {
+          if (AddrMode == ARMII::AddrMode_i12)
+            ImmedOffset = -ImmedOffset;
+          else
+            ImmedOffset |= 1 << NumBits;
+        }
         ImmOp.ChangeToImmediate(ImmedOffset);
         Offset = 0;
         return true;
@@ -1351,8 +1477,12 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
 
       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
       ImmedOffset = ImmedOffset & Mask;
-      if (isSub)
-        ImmedOffset |= 1 << NumBits;
+      if (isSub) {
+        if (AddrMode == ARMII::AddrMode_i12)
+          ImmedOffset = -ImmedOffset;
+        else
+          ImmedOffset |= 1 << NumBits;
+      }
       ImmOp.ChangeToImmediate(ImmedOffset);
       Offset &= ~(Mask*Scale);
     }
@@ -1363,25 +1493,88 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
 }
 
 bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const {
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+               int &CmpValue) const {
   switch (MI->getOpcode()) {
   default: break;
   case ARM::CMPri:
-  case ARM::CMPzri:
   case ARM::t2CMPri:
-  case ARM::t2CMPzri:
     SrcReg = MI->getOperand(0).getReg();
+    CmpMask = ~0;
     CmpValue = MI->getOperand(1).getImm();
     return true;
+  case ARM::TSTri:
+  case ARM::t2TSTri:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = MI->getOperand(1).getImm();
+    CmpValue = 0;
+    return true;
   }
 
   return false;
 }
 
-/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so
-/// that we can remove a "comparison with zero".
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+                              int CmpMask, bool CommonUse) {
+  switch (MI->getOpcode()) {
+    case ARM::ANDri:
+    case ARM::t2ANDri:
+      if (CmpMask != MI->getOperand(2).getImm())
+        return false;
+      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+        return true;
+      break;
+    case ARM::COPY: {
+      // Walk down one instruction which is potentially an 'and'.
+      const MachineInstr &Copy = *MI;
+      MachineBasicBlock::iterator AND(
+        llvm::next(MachineBasicBlock::iterator(MI)));
+      if (AND == MI->getParent()->end()) return false;
+      MI = AND;
+      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+                               CmpMask, true);
+    }
+  }
+
+  return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
 bool ARMBaseInstrInfo::
-ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+                     int CmpValue, const MachineRegisterInfo *MRI) const {
+  if (CmpValue != 0)
+    return false;
+
+  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+  if (llvm::next(DI) != MRI->def_end())
+    // Only support one definition.
+    return false;
+
+  MachineInstr *MI = &*DI;
+
+  // Masked compares sometimes use the same register as the corresponding 'and'.
+  if (CmpMask != ~0) {
+    if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+      MI = 0;
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+           UE = MRI->use_end(); UI != UE; ++UI) {
+        if (UI->getParent() != CmpInstr->getParent()) continue;
+        MachineInstr *PotentialAND = &*UI;
+        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+          continue;
+        MI = PotentialAND;
+        break;
+      }
+      if (!MI) return false;
+    }
+  }
+
   // Conservatively refuse to convert an instruction which isn't in the same BB
   // as the comparison.
   if (MI->getParent() != CmpInstr->getParent())
@@ -1391,16 +1584,20 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
   // want to change.
   MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
     B = MI->getParent()->begin();
+
+  // Early exit if CmpInstr is at the beginning of the BB.
+  if (I == B) return false;
+
   --I;
   for (; I != E; --I) {
     const MachineInstr &Instr = *I;
 
     for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
       const MachineOperand &MO = Instr.getOperand(IO);
-      if (!MO.isReg() || !MO.isDef()) continue;
+      if (!MO.isReg()) continue;
 
-      // This instruction modifies CPSR before the one we want to change. We
-      // can't do this transformation.
+      // This instruction modifies or uses CPSR after the one we want to
+      // change. We can't do this transformation.
       if (MO.getReg() == ARM::CPSR)
         return false;
     }
@@ -1414,15 +1611,713 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
   switch (MI->getOpcode()) {
   default: break;
   case ARM::ADDri:
+  case ARM::ANDri:
+  case ARM::t2ANDri:
   case ARM::SUBri:
   case ARM::t2ADDri:
   case ARM::t2SUBri:
-    MI->RemoveOperand(5);
-    MachineInstrBuilder(MI)
-      .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+    // Toggle the optional operand to CPSR.
+    MI->getOperand(5).setReg(ARM::CPSR);
+    MI->getOperand(5).setIsDef(true);
     CmpInstr->eraseFromParent();
     return true;
   }
 
   return false;
 }
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+                                     MachineInstr *DefMI, unsigned Reg,
+                                     MachineRegisterInfo *MRI) const {
+  // Fold large immediates into add, sub, or, xor.
+  unsigned DefOpc = DefMI->getOpcode();
+  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+    return false;
+  if (!DefMI->getOperand(1).isImm())
+    // Could be t2MOVi32imm <ga:xx>
+    return false;
+
+  if (!MRI->hasOneNonDBGUse(Reg))
+    return false;
+
+  unsigned UseOpc = UseMI->getOpcode();
+  unsigned NewUseOpc = 0;
+  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+  bool Commute = false;
+  switch (UseOpc) {
+  default: return false;
+  case ARM::SUBrr:
+  case ARM::ADDrr:
+  case ARM::ORRrr:
+  case ARM::EORrr:
+  case ARM::t2SUBrr:
+  case ARM::t2ADDrr:
+  case ARM::t2ORRrr:
+  case ARM::t2EORrr: {
+    Commute = UseMI->getOperand(2).getReg() != Reg;
+    switch (UseOpc) {
+    default: break;
+    case ARM::SUBrr: {
+      if (Commute)
+        return false;
+      ImmVal = -ImmVal;
+      NewUseOpc = ARM::SUBri;
+      // Fallthrough
+    }
+    case ARM::ADDrr:
+    case ARM::ORRrr:
+    case ARM::EORrr: {
+      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+        return false;
+      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+      switch (UseOpc) {
+      default: break;
+      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+      }
+      break;
+    }
+    case ARM::t2SUBrr: {
+      if (Commute)
+        return false;
+      ImmVal = -ImmVal;
+      NewUseOpc = ARM::t2SUBri;
+      // Fallthrough
+    }
+    case ARM::t2ADDrr:
+    case ARM::t2ORRrr:
+    case ARM::t2EORrr: {
+      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+        return false;
+      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+      switch (UseOpc) {
+      default: break;
+      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+      }
+      break;
+    }
+    }
+  }
+  }
+
+  unsigned OpIdx = Commute ? 2 : 1;
+  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+  bool isKill = UseMI->getOperand(OpIdx).isKill();
+  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+                                      *UseMI, UseMI->getDebugLoc(),
+                                      get(NewUseOpc), NewReg)
+                              .addReg(Reg1, getKillRegState(isKill))
+                              .addImm(SOImmValV1)));
+  UseMI->setDesc(get(NewUseOpc));
+  UseMI->getOperand(1).setReg(NewReg);
+  UseMI->getOperand(1).setIsKill();
+  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+  DefMI->eraseFromParent();
+  return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                 const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const TargetInstrDesc &Desc = MI->getDesc();
+  unsigned Class = Desc.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unexpected multi-uops instruction!");
+    break;
+  case ARM::VLDMQIA:
+  case ARM::VLDMQDB:
+  case ARM::VSTMQIA:
+  case ARM::VSTMQDB:
+    return 2;
+
+  // The number of uOps for load / store multiple are determined by the number
+  // registers.
+  //
+  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+  // same cycle. The scheduling for the first load / store must be done
+  // separately by assuming the the address is not 64-bit aligned.
+  //
+  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
+  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD: {
+    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+    return (NumRegs / 2) + (NumRegs % 2) + 1;
+  }
+
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tSTMIA:
+  case ARM::tSTMIA_UPD:
+  case ARM::tPOP_RET:
+  case ARM::tPOP:
+  case ARM::tPUSH:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD: {
+    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+    if (Subtarget.isCortexA8()) {
+      if (NumRegs < 4)
+        return 2;
+      // 4 registers would be issued: 2, 2.
+      // 5 registers would be issued: 2, 2, 1.
+      UOps = (NumRegs / 2);
+      if (NumRegs % 2)
+        ++UOps;
+      return UOps;
+    } else if (Subtarget.isCortexA9()) {
+      UOps = (NumRegs / 2);
+      // If there are odd number of registers or if it's not 64-bit aligned,
+      // then it takes an extra AGU (Address Generation Unit) cycle.
+      if ((NumRegs % 2) ||
+          !MI->hasOneMemOperand() ||
+          (*MI->memoperands_begin())->getAlignment() < 8)
+        ++UOps;
+      return UOps;
+    } else {
+      // Assume the worst.
+      return NumRegs;
+    }
+  }
+  }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+                                  const TargetInstrDesc &DefTID,
+                                  unsigned DefClass,
+                                  unsigned DefIdx, unsigned DefAlign) const {
+  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    // Def is the address writeback.
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+
+  int DefCycle;
+  if (Subtarget.isCortexA8()) {
+    // (regno / 2) + (regno % 2) + 1
+    DefCycle = RegNo / 2 + 1;
+    if (RegNo % 2)
+      ++DefCycle;
+  } else if (Subtarget.isCortexA9()) {
+    DefCycle = RegNo;
+    bool isSLoad = false;
+
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::VLDMSIA:
+    case ARM::VLDMSDB:
+    case ARM::VLDMSIA_UPD:
+    case ARM::VLDMSDB_UPD:
+      isSLoad = true;
+      break;
+    }
+
+    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+    // then it takes an extra cycle.
+    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+      ++DefCycle;
+  } else {
+    // Assume the worst.
+    DefCycle = RegNo + 2;
+  }
+
+  return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+                                 const TargetInstrDesc &DefTID,
+                                 unsigned DefClass,
+                                 unsigned DefIdx, unsigned DefAlign) const {
+  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    // Def is the address writeback.
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+
+  int DefCycle;
+  if (Subtarget.isCortexA8()) {
+    // 4 registers would be issued: 1, 2, 1.
+    // 5 registers would be issued: 1, 2, 2.
+    DefCycle = RegNo / 2;
+    if (DefCycle < 1)
+      DefCycle = 1;
+    // Result latency is issue cycle + 2: E2.
+    DefCycle += 2;
+  } else if (Subtarget.isCortexA9()) {
+    DefCycle = (RegNo / 2);
+    // If there are odd number of registers or if it's not 64-bit aligned,
+    // then it takes an extra AGU (Address Generation Unit) cycle.
+    if ((RegNo % 2) || DefAlign < 8)
+      ++DefCycle;
+    // Result latency is AGU cycles + 2.
+    DefCycle += 2;
+  } else {
+    // Assume the worst.
+    DefCycle = RegNo + 2;
+  }
+
+  return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+                                  const TargetInstrDesc &UseTID,
+                                  unsigned UseClass,
+                                  unsigned UseIdx, unsigned UseAlign) const {
+  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    return ItinData->getOperandCycle(UseClass, UseIdx);
+
+  int UseCycle;
+  if (Subtarget.isCortexA8()) {
+    // (regno / 2) + (regno % 2) + 1
+    UseCycle = RegNo / 2 + 1;
+    if (RegNo % 2)
+      ++UseCycle;
+  } else if (Subtarget.isCortexA9()) {
+    UseCycle = RegNo;
+    bool isSStore = false;
+
+    switch (UseTID.getOpcode()) {
+    default: break;
+    case ARM::VSTMSIA:
+    case ARM::VSTMSDB:
+    case ARM::VSTMSIA_UPD:
+    case ARM::VSTMSDB_UPD:
+      isSStore = true;
+      break;
+    }
+
+    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+    // then it takes an extra cycle.
+    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+      ++UseCycle;
+  } else {
+    // Assume the worst.
+    UseCycle = RegNo + 2;
+  }
+
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+                                 const TargetInstrDesc &UseTID,
+                                 unsigned UseClass,
+                                 unsigned UseIdx, unsigned UseAlign) const {
+  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    return ItinData->getOperandCycle(UseClass, UseIdx);
+
+  int UseCycle;
+  if (Subtarget.isCortexA8()) {
+    UseCycle = RegNo / 2;
+    if (UseCycle < 2)
+      UseCycle = 2;
+    // Read in E3.
+    UseCycle += 2;
+  } else if (Subtarget.isCortexA9()) {
+    UseCycle = (RegNo / 2);
+    // If there are odd number of registers or if it's not 64-bit aligned,
+    // then it takes an extra AGU (Address Generation Unit) cycle.
+    if ((RegNo % 2) || UseAlign < 8)
+      ++UseCycle;
+  } else {
+    // Assume the worst.
+    UseCycle = 1;
+  }
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    const TargetInstrDesc &DefTID,
+                                    unsigned DefIdx, unsigned DefAlign,
+                                    const TargetInstrDesc &UseTID,
+                                    unsigned UseIdx, unsigned UseAlign) const {
+  unsigned DefClass = DefTID.getSchedClass();
+  unsigned UseClass = UseTID.getSchedClass();
+
+  if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+  // This may be a def / use of a variable_ops instruction, the operand
+  // latency might be determinable dynamically. Let the target try to
+  // figure it out.
+  int DefCycle = -1;
+  bool LdmBypass = false;
+  switch (DefTID.getOpcode()) {
+  default:
+    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+    break;
+
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+    DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    break;
+
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tPUSH:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+    LdmBypass = 1;
+    DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    break;
+  }
+
+  if (DefCycle == -1)
+    // We can't seem to determine the result latency of the def, assume it's 2.
+    DefCycle = 2;
+
+  int UseCycle = -1;
+  switch (UseTID.getOpcode()) {
+  default:
+    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+    break;
+
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+    UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    break;
+
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tSTMIA:
+  case ARM::tSTMIA_UPD:
+  case ARM::tPOP_RET:
+  case ARM::tPOP:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD:
+    UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    break;
+  }
+
+  if (UseCycle == -1)
+    // Assume it's read in the first stage.
+    UseCycle = 1;
+
+  UseCycle = DefCycle - UseCycle + 1;
+  if (UseCycle > 0) {
+    if (LdmBypass) {
+      // It's a variable_ops instruction so we can't use DefIdx here. Just use
+      // first def operand.
+      if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+                                          UseClass, UseIdx))
+        --UseCycle;
+    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+                                               UseClass, UseIdx)) {
+      --UseCycle;
+    }
+  }
+
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+      DefMI->isRegSequence() || DefMI->isImplicitDef())
+    return 1;
+
+  const TargetInstrDesc &DefTID = DefMI->getDesc();
+  if (!ItinData || ItinData->isEmpty())
+    return DefTID.mayLoad() ? 3 : 1;
+
+  const TargetInstrDesc &UseTID = UseMI->getDesc();
+  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+  if (DefMO.getReg() == ARM::CPSR) {
+    if (DefMI->getOpcode() == ARM::FMSTAT) {
+      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+      return Subtarget.isCortexA9() ? 1 : 20;
+    }
+
+    // CPSR set and branch can be paired in the same cycle.
+    if (UseTID.isBranch())
+      return 0;
+  }
+
+  unsigned DefAlign = DefMI->hasOneMemOperand()
+    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+  unsigned UseAlign = UseMI->hasOneMemOperand()
+    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+                                  UseTID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal = DefMI->getOperand(3).getImm();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt = DefMI->getOperand(3).getImm();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    SDNode *DefNode, unsigned DefIdx,
+                                    SDNode *UseNode, unsigned UseIdx) const {
+  if (!DefNode->isMachineOpcode())
+    return 1;
+
+  const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+
+  if (isZeroCost(DefTID.Opcode))
+    return 0;
+
+  if (!ItinData || ItinData->isEmpty())
+    return DefTID.mayLoad() ? 3 : 1;
+
+  if (!UseNode->isMachineOpcode()) {
+    int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+    if (Subtarget.isCortexA9())
+      return Latency <= 2 ? 1 : Latency - 1;
+    else
+      return Latency <= 3 ? 1 : Latency - 2;
+  }
+
+  const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+  unsigned DefAlign = !DefMN->memoperands_empty()
+    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+  unsigned UseAlign = !UseMN->memoperands_empty()
+    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+                                  UseTID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      const MachineInstr *MI,
+                                      unsigned *PredCost) const {
+  if (MI->isCopyLike() || MI->isInsertSubreg() ||
+      MI->isRegSequence() || MI->isImplicitDef())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Class = TID.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+    // When predicated, CPSR is an additional source operand for CPSR updating
+    // instructions, this apparently increases their latencies.
+    *PredCost = 1;
+  if (UOps)
+    return ItinData->getStageLatency(Class);
+  return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      SDNode *Node) const {
+  if (!Node->isMachineOpcode())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Opcode = Node->getMachineOpcode();
+  switch (Opcode) {
+  default:
+    return ItinData->getStageLatency(get(Opcode).getSchedClass());
+  case ARM::VLDMQIA:
+  case ARM::VLDMQDB:
+  case ARM::VSTMQIA:
+  case ARM::VSTMQDB:
+    return 2;
+  }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+  if (Subtarget.isCortexA8() &&
+      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+    // CortexA8 VFP instructions are not pipelined.
+    return true;
+
+  // Hoist VFP / NEON instructions with 4 or higher latency.
+  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  if (Latency <= 3)
+    return false;
+  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+                 const MachineInstr *DefMI, unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+  if (DDomain == ARMII::DomainGeneral) {
+    unsigned DefClass = DefMI->getDesc().getSchedClass();
+    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+    return (DefCycle != -1 && DefCycle <= 2);
+  }
+  return false;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+                                     unsigned &AddSubOpc,
+                                     bool &NegAcc, bool &HasLane) const {
+  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+  if (I == MLxEntryMap.end())
+    return false;
+
+  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+  MulOpc = Entry.MulOpc;
+  AddSubOpc = Entry.AddSubOpc;
+  NegAcc = Entry.NegAcc;
+  HasLane = Entry.HasLane;
+  return true;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b4f4a33a70ad..1fb88726d0de 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -17,6 +17,8 @@
 #include "ARM.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 
 namespace llvm {
   class ARMSubtarget;
@@ -33,7 +35,7 @@ namespace ARMII {
     //===------------------------------------------------------------------===//
     // This four-bit field describes the addressing mode used.
 
-    AddrModeMask  = 0xf,
+    AddrModeMask  = 0x1f,
     AddrModeNone    = 0,
     AddrMode1       = 1,
     AddrMode2       = 2,
@@ -50,9 +52,10 @@ namespace ARMII {
     AddrModeT2_so   = 13,
     AddrModeT2_pc   = 14, // +/- i12 for pc relative data
     AddrModeT2_i8s4 = 15, // i8 * 4
+    AddrMode_i12    = 16,
 
     // Size* - Flags to keep track of the size of an instruction.
-    SizeShift     = 4,
+    SizeShift     = 5,
     SizeMask      = 7 << SizeShift,
     SizeSpecial   = 1,   // 0 byte pseudo or special case.
     Size8Bytes    = 2,
@@ -61,7 +64,7 @@ namespace ARMII {
 
     // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
     // and store ops only.  Generic "updating" flag is used for ld/st multiple.
-    IndexModeShift = 7,
+    IndexModeShift = 8,
     IndexModeMask  = 3 << IndexModeShift,
     IndexModePre   = 1,
     IndexModePost  = 2,
@@ -70,7 +73,7 @@ namespace ARMII {
     //===------------------------------------------------------------------===//
     // Instruction encoding formats.
     //
-    FormShift     = 9,
+    FormShift     = 10,
     FormMask      = 0x3f << FormShift,
 
     // Pseudo instructions
@@ -143,15 +146,15 @@ namespace ARMII {
 
     // UnaryDP - Indicates this is a unary data processing instruction, i.e.
     // it doesn't have a Rn operand.
-    UnaryDP       = 1 << 15,
+    UnaryDP       = 1 << 16,
 
     // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
     // a 16-bit Thumb instruction if certain conditions are met.
-    Xform16Bit    = 1 << 16,
+    Xform16Bit    = 1 << 17,
 
     //===------------------------------------------------------------------===//
     // Code domain.
-    DomainShift   = 17,
+    DomainShift   = 18,
     DomainMask    = 3 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
@@ -160,6 +163,11 @@ namespace ARMII {
     //===------------------------------------------------------------------===//
     // Field shifts - such shifts are used to set field while generating
     // machine instructions.
+    //
+    // FIXME: This list will need adjusting/fixing as the MC code emitter
+    // takes shape and the ARMCodeEmitter.cpp bits go away.
+    ShiftTypeShift = 4,
+
     M_BitShift     = 5,
     ShiftImmShift  = 5,
     ShiftShift     = 7,
@@ -181,29 +189,15 @@ namespace ARMII {
     I_BitShift     = 25,
     CondShift      = 28
   };
-
-  /// Target Operand Flag enum.
-  enum TOF {
-    //===------------------------------------------------------------------===//
-    // ARM Specific MachineOperand flags.
-
-    MO_NO_FLAG,
-
-    /// MO_LO16 - On a symbol operand, this represents a relocation containing
-    /// lower 16 bit of the address. Used only via movw instruction.
-    MO_LO16,
-
-    /// MO_HI16 - On a symbol operand, this represents a relocation containing
-    /// higher 16 bit of the address. Used only via movt instruction.
-    MO_HI16
-  };
 }
 
 class ARMBaseInstrInfo : public TargetInstrInfoImpl {
   const ARMSubtarget &Subtarget;
+
 protected:
   // Can be only subclassed.
   explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
 public:
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
@@ -216,10 +210,13 @@ public:
   virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
   const ARMSubtarget &getSubtarget() const { return Subtarget; }
 
-  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 const TargetRegisterInfo *TRI) const;
+  ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const;
+
+  ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                     const ScheduleDAG *DAG) const;
 
   // Branch analysis.
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -301,7 +298,8 @@ public:
   MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
 
   virtual bool produceSameValue(const MachineInstr *MI0,
-                                const MachineInstr *MI1) const;
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI) const;
 
   /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
   /// determine if two loads are loading from the same base address. It should
@@ -328,26 +326,117 @@ public:
                                     const MachineFunction &MF) const;
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                   unsigned NumInstrs) const;
+                                   unsigned NumCyles, unsigned ExtraPredCycles,
+                                   float Prob, float Confidence) const;
 
-  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
-                                   MachineBasicBlock &FMBB,unsigned NumF) const;
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
+                                   float Probability, float Confidence) const;
 
   virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
-                                         unsigned NumInstrs) const {
-    return NumInstrs && NumInstrs == 1;
+                                         unsigned NumCyles,
+                                         float Probability,
+                                         float Confidence) const {
+    return NumCyles == 1;
   }
 
   /// AnalyzeCompare - For a comparison instruction, return the source register
   /// in SrcReg and the value it compares against in CmpValue. Return true if
   /// the comparison instruction can be analyzed.
   virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
-                              int &CmpValue) const;
+                              int &CmpMask, int &CmpValue) const;
 
-  /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+  /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
   /// that we can remove a "comparison with zero".
-  virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
-                                    MachineInstr *CmpInstr) const;
+  virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+                                    int CmpMask, int CmpValue,
+                                    const MachineRegisterInfo *MRI) const;
+
+  /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+  /// instruction, try to fold the immediate into the use instruction.
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const;
+
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
+
+  virtual
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx,
+                        const MachineInstr *UseMI, unsigned UseIdx) const;
+  virtual
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        SDNode *DefNode, unsigned DefIdx,
+                        SDNode *UseNode, unsigned UseIdx) const;
+private:
+  int getVLDMDefCycle(const InstrItineraryData *ItinData,
+                      const TargetInstrDesc &DefTID,
+                      unsigned DefClass,
+                      unsigned DefIdx, unsigned DefAlign) const;
+  int getLDMDefCycle(const InstrItineraryData *ItinData,
+                     const TargetInstrDesc &DefTID,
+                     unsigned DefClass,
+                     unsigned DefIdx, unsigned DefAlign) const;
+  int getVSTMUseCycle(const InstrItineraryData *ItinData,
+                      const TargetInstrDesc &UseTID,
+                      unsigned UseClass,
+                      unsigned UseIdx, unsigned UseAlign) const;
+  int getSTMUseCycle(const InstrItineraryData *ItinData,
+                     const TargetInstrDesc &UseTID,
+                     unsigned UseClass,
+                     unsigned UseIdx, unsigned UseAlign) const;
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        const TargetInstrDesc &DefTID,
+                        unsigned DefIdx, unsigned DefAlign,
+                        const TargetInstrDesc &UseTID,
+                        unsigned UseIdx, unsigned UseAlign) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      SDNode *Node) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+  /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+  /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+  /// MLx table.
+  DenseMap<unsigned, unsigned> MLxEntryMap;
+
+  /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+  /// stalls when scheduled together with fp MLA / MLS opcodes.
+  SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+  /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+  /// instruction.
+  bool isFpMLxInstruction(unsigned Opcode) const {
+    return MLxEntryMap.count(Opcode);
+  }
+
+  /// isFpMLxInstruction - This version also returns the multiply opcode and the
+  /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+  /// the MLX instructions with an extra lane operand.
+  bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+                          unsigned &AddSubOpc, bool &NegAcc,
+                          bool &HasLane) const;
+
+  /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+  /// will cause stalls when scheduled after (within 4-cycle window) a fp
+  /// MLA / MLS instruction.
+  bool canCauseFpMLxStall(unsigned Opcode) const {
+    return MLxHazardOpcodes.count(Opcode);
+  }
 };
 
 static inline
@@ -389,7 +478,7 @@ bool isJumpTableBranchOpcode(int Opc) {
 
 static inline
 bool isIndirectBranchOpcode(int Opc) {
-  return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+  return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -413,6 +502,12 @@ void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                             unsigned DestReg, unsigned BaseReg, int NumBytes,
                             ARMCC::CondCodes Pred, unsigned PredReg,
                             const ARMBaseInstrInfo &TII);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const ARMBaseRegisterInfo& MRI,
+                               DebugLoc dl);
 
 
 /// rewriteARMFrameIndex / rewriteT2FrameIndex -
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index eceafad63f17..67a4b7d49398 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -15,6 +15,7 @@
 #include "ARMAddressingModes.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
 #include "ARMInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
@@ -32,120 +33,25 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
 
-namespace llvm {
+using namespace llvm;
+
 static cl::opt<bool>
 ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
           cl::desc("Force use of virtual base registers for stack load/store"));
 static cl::opt<bool>
 EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
           cl::desc("Enable pre-regalloc stack frame index allocation"));
-}
-
-using namespace llvm;
-
 static cl::opt<bool>
 EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
           cl::desc("Enable use of a base pointer for complex stack frames"));
 
-unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
-                                                   bool *isSPVFP) {
-  if (isSPVFP)
-    *isSPVFP = false;
-
-  using namespace ARM;
-  switch (RegEnum) {
-  default:
-    llvm_unreachable("Unknown ARM register!");
-  case R0:  case D0:  case Q0:  return 0;
-  case R1:  case D1:  case Q1:  return 1;
-  case R2:  case D2:  case Q2:  return 2;
-  case R3:  case D3:  case Q3:  return 3;
-  case R4:  case D4:  case Q4:  return 4;
-  case R5:  case D5:  case Q5:  return 5;
-  case R6:  case D6:  case Q6:  return 6;
-  case R7:  case D7:  case Q7:  return 7;
-  case R8:  case D8:  case Q8:  return 8;
-  case R9:  case D9:  case Q9:  return 9;
-  case R10: case D10: case Q10: return 10;
-  case R11: case D11: case Q11: return 11;
-  case R12: case D12: case Q12: return 12;
-  case SP:  case D13: case Q13: return 13;
-  case LR:  case D14: case Q14: return 14;
-  case PC:  case D15: case Q15: return 15;
-
-  case D16: return 16;
-  case D17: return 17;
-  case D18: return 18;
-  case D19: return 19;
-  case D20: return 20;
-  case D21: return 21;
-  case D22: return 22;
-  case D23: return 23;
-  case D24: return 24;
-  case D25: return 25;
-  case D26: return 26;
-  case D27: return 27;
-  case D28: return 28;
-  case D29: return 29;
-  case D30: return 30;
-  case D31: return 31;
-
-  case S0: case S1: case S2: case S3:
-  case S4: case S5: case S6: case S7:
-  case S8: case S9: case S10: case S11:
-  case S12: case S13: case S14: case S15:
-  case S16: case S17: case S18: case S19:
-  case S20: case S21: case S22: case S23:
-  case S24: case S25: case S26: case S27:
-  case S28: case S29: case S30: case S31: {
-    if (isSPVFP)
-      *isSPVFP = true;
-    switch (RegEnum) {
-    default: return 0; // Avoid compile time warning.
-    case S0: return 0;
-    case S1: return 1;
-    case S2: return 2;
-    case S3: return 3;
-    case S4: return 4;
-    case S5: return 5;
-    case S6: return 6;
-    case S7: return 7;
-    case S8: return 8;
-    case S9: return 9;
-    case S10: return 10;
-    case S11: return 11;
-    case S12: return 12;
-    case S13: return 13;
-    case S14: return 14;
-    case S15: return 15;
-    case S16: return 16;
-    case S17: return 17;
-    case S18: return 18;
-    case S19: return 19;
-    case S20: return 20;
-    case S21: return 21;
-    case S22: return 22;
-    case S23: return 23;
-    case S24: return 24;
-    case S25: return 25;
-    case S26: return 26;
-    case S27: return 27;
-    case S28: return 28;
-    case S29: return 29;
-    case S30: return 30;
-    case S31: return 31;
-    }
-  }
-  }
-}
-
 ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
                                          const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
@@ -180,12 +86,14 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector ARMBaseRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   // FIXME: avoid re-calculating this everytime.
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
   Reserved.set(ARM::PC);
   Reserved.set(ARM::FPSCR);
-  if (hasFP(MF))
+  if (TFI->hasFP(MF))
     Reserved.set(FramePtr);
   if (hasBasePointer(MF))
     Reserved.set(BasePtr);
@@ -197,6 +105,8 @@ getReservedRegs(const MachineFunction &MF) const {
 
 bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
                                         unsigned Reg) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   switch (Reg) {
   default: break;
   case ARM::SP:
@@ -208,7 +118,7 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
     break;
   case ARM::R7:
   case ARM::R11:
-    if (FramePtr == Reg && hasFP(MF))
+    if (FramePtr == Reg && TFI->hasFP(MF))
       return true;
     break;
   case ARM::R9:
@@ -444,6 +354,7 @@ std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
 ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
                                         unsigned HintType, unsigned HintReg,
                                         const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   // Alternative register allocation orders when favoring even / odd registers
   // of register pairs.
 
@@ -525,7 +436,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
       return std::make_pair(RC->allocation_order_begin(MF),
                             RC->allocation_order_end(MF));
 
-    if (!hasFP(MF)) {
+    if (!TFI->hasFP(MF)) {
       if (!STI.isR9Reserved())
         return std::make_pair(GPREven1,
                               GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -554,7 +465,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
       return std::make_pair(RC->allocation_order_begin(MF),
                             RC->allocation_order_end(MF));
 
-    if (!hasFP(MF)) {
+    if (!TFI->hasFP(MF)) {
       if (!STI.isR9Reserved())
         return std::make_pair(GPROdd1,
                               GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -606,7 +517,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
   if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
        Hint.first == (unsigned)ARMRI::RegPairEven) &&
-      Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+      TargetRegisterInfo::isVirtualRegister(Hint.second)) {
     // If 'Reg' is one of the even / odd register pair and it's now changed
     // (e.g. coalesced) into a different register. The other register of the
     // pair allocation hint must be updated to reflect the relationship
@@ -619,23 +530,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
   }
 }
 
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register.  This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-///
-bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
-  // Mac OS X requires FP not to be clobbered for backtracing purpose.
-  if (STI.isTargetDarwin())
-    return true;
-
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  // Always eliminate non-leaf frame pointers.
-  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
-          needsStackRealignment(MF) ||
-          MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken());
-}
-
 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -681,7 +575,7 @@ bool ARMBaseRegisterInfo::
 needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
   bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
                                F->hasFnAttr(Attribute::StackAlignment));
 
@@ -697,417 +591,19 @@ cannotEliminateFrame(const MachineFunction &MF) const {
     || needsStackRealignment(MF);
 }
 
-/// estimateStackSize - Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset) Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset+Align-1)/Align*Align;
-  }
-  return (unsigned)Offset;
-}
-
-/// estimateRSStackSizeLimit - Look at each instruction that references stack
-/// frames and return the stack size limit beyond which some of these
-/// instructions will require a scratch register during their expansion later.
-unsigned
-ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
-  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned Limit = (1 << 12) - 1;
-  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
-    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
-         I != E; ++I) {
-      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-        if (!I->getOperand(i).isFI()) continue;
-
-        // When using ADDri to get the address of a stack object, 255 is the
-        // largest offset guaranteed to fit in the immediate offset.
-        if (I->getOpcode() == ARM::ADDri) {
-          Limit = std::min(Limit, (1U << 8) - 1);
-          break;
-        }
-
-        // Otherwise check the addressing mode.
-        switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
-        case ARMII::AddrMode3:
-        case ARMII::AddrModeT2_i8:
-          Limit = std::min(Limit, (1U << 8) - 1);
-          break;
-        case ARMII::AddrMode5:
-        case ARMII::AddrModeT2_i8s4:
-          Limit = std::min(Limit, ((1U << 8) - 1) * 4);
-          break;
-        case ARMII::AddrModeT2_i12:
-          // i12 supports only positive offset so these will be converted to
-          // i8 opcodes. See llvm::rewriteT2FrameIndex.
-          if (hasFP(MF) && AFI->hasStackFrame())
-            Limit = std::min(Limit, (1U << 8) - 1);
-          break;
-        case ARMII::AddrMode6:
-          // Addressing mode 6 (load/store) instructions can't encode an
-          // immediate offset for stack references.
-          return 0;
-        default:
-          break;
-        }
-        break; // At most one FI per instruction
-      }
-    }
-  }
-
-  return Limit;
-}
-
-static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
-                                       const ARMBaseInstrInfo &TII) {
-  unsigned FnSize = 0;
-  for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
-       MBBI != E; ++MBBI) {
-    const MachineBasicBlock &MBB = *MBBI;
-    for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
-         I != E; ++I)
-      FnSize += TII.GetInstSizeInBytes(I);
-  }
-  return FnSize;
-}
-
-void
-ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                       RegScavenger *RS) const {
-  // This tells PEI to spill the FP as if it is any other callee-save register
-  // to take advantage the eliminateFrameIndex machinery. This also ensures it
-  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
-  // to combine multiple loads / stores.
-  bool CanEliminateFrame = true;
-  bool CS1Spilled = false;
-  bool LRSpilled = false;
-  unsigned NumGPRSpills = 0;
-  SmallVector<unsigned, 4> UnspilledCS1GPRs;
-  SmallVector<unsigned, 4> UnspilledCS2GPRs;
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
-  // scratch register.
-  // FIXME: It will be better just to find spare register here.
-  if (needsStackRealignment(MF) &&
-      AFI->isThumb2Function())
-    MF.getRegInfo().setPhysRegUsed(ARM::R4);
-
-  // Spill LR if Thumb1 function uses variable length argument lists.
-  if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
-    MF.getRegInfo().setPhysRegUsed(ARM::LR);
-
-  // Spill the BasePtr if it's used.
-  if (hasBasePointer(MF))
-    MF.getRegInfo().setPhysRegUsed(BasePtr);
-
-  // Don't spill FP if the frame can be eliminated. This is determined
-  // by scanning the callee-save registers to see if any is used.
-  const unsigned *CSRegs = getCalleeSavedRegs();
-  for (unsigned i = 0; CSRegs[i]; ++i) {
-    unsigned Reg = CSRegs[i];
-    bool Spilled = false;
-    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
-      AFI->setCSRegisterIsSpilled(Reg);
-      Spilled = true;
-      CanEliminateFrame = false;
-    } else {
-      // Check alias registers too.
-      for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
-        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
-          Spilled = true;
-          CanEliminateFrame = false;
-        }
-      }
-    }
-
-    if (!ARM::GPRRegisterClass->contains(Reg))
-      continue;
-
-    if (Spilled) {
-      NumGPRSpills++;
-
-      if (!STI.isTargetDarwin()) {
-        if (Reg == ARM::LR)
-          LRSpilled = true;
-        CS1Spilled = true;
-        continue;
-      }
-
-      // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
-      switch (Reg) {
-      case ARM::LR:
-        LRSpilled = true;
-        // Fallthrough
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-        CS1Spilled = true;
-        break;
-      default:
-        break;
-      }
-    } else {
-      if (!STI.isTargetDarwin()) {
-        UnspilledCS1GPRs.push_back(Reg);
-        continue;
-      }
-
-      switch (Reg) {
-      case ARM::R4:
-      case ARM::R5:
-      case ARM::R6:
-      case ARM::R7:
-      case ARM::LR:
-        UnspilledCS1GPRs.push_back(Reg);
-        break;
-      default:
-        UnspilledCS2GPRs.push_back(Reg);
-        break;
-      }
-    }
-  }
-
-  bool ForceLRSpill = false;
-  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
-    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
-    // Force LR to be spilled if the Thumb function size is > 2048. This enables
-    // use of BL to implement far jump. If it turns out that it's not needed
-    // then the branch fix up path will undo it.
-    if (FnSize >= (1 << 11)) {
-      CanEliminateFrame = false;
-      ForceLRSpill = true;
-    }
-  }
-
-  // If any of the stack slot references may be out of range of an immediate
-  // offset, make sure a register (or a spill slot) is available for the
-  // register scavenger. Note that if we're indexing off the frame pointer, the
-  // effective stack size is 4 bytes larger since the FP points to the stack
-  // slot of the previous FP. Also, if we have variable sized objects in the
-  // function, stack slot references will often be negative, and some of
-  // our instructions are positive-offset only, so conservatively consider
-  // that case to want a spill slot (or register) as well. Similarly, if
-  // the function adjusts the stack pointer during execution and the
-  // adjustments aren't already part of our stack size estimate, our offset
-  // calculations may be off, so be conservative.
-  // FIXME: We could add logic to be more precise about negative offsets
-  //        and which instructions will need a scratch register for them. Is it
-  //        worth the effort and added fragility?
-  bool BigStack =
-    (RS &&
-     (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
-      estimateRSStackSizeLimit(MF)))
-    || MFI->hasVarSizedObjects()
-    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
-
-  bool ExtraCSSpill = false;
-  if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
-    AFI->setHasStackFrame(true);
-
-    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
-    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
-    if (!LRSpilled && CS1Spilled) {
-      MF.getRegInfo().setPhysRegUsed(ARM::LR);
-      AFI->setCSRegisterIsSpilled(ARM::LR);
-      NumGPRSpills++;
-      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
-                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
-      ForceLRSpill = false;
-      ExtraCSSpill = true;
-    }
-
-    if (hasFP(MF)) {
-      MF.getRegInfo().setPhysRegUsed(FramePtr);
-      NumGPRSpills++;
-    }
-
-    // If stack and double are 8-byte aligned and we are spilling an odd number
-    // of GPRs. Spill one extra callee save GPR so we won't have to pad between
-    // the integer and double callee save areas.
-    unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
-      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
-        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
-          unsigned Reg = UnspilledCS1GPRs[i];
-          // Don't spill high register if the function is thumb1
-          if (!AFI->isThumb1OnlyFunction() ||
-              isARMLowRegister(Reg) || Reg == ARM::LR) {
-            MF.getRegInfo().setPhysRegUsed(Reg);
-            AFI->setCSRegisterIsSpilled(Reg);
-            if (!isReservedReg(MF, Reg))
-              ExtraCSSpill = true;
-            break;
-          }
-        }
-      } else if (!UnspilledCS2GPRs.empty() &&
-                 !AFI->isThumb1OnlyFunction()) {
-        unsigned Reg = UnspilledCS2GPRs.front();
-        MF.getRegInfo().setPhysRegUsed(Reg);
-        AFI->setCSRegisterIsSpilled(Reg);
-        if (!isReservedReg(MF, Reg))
-          ExtraCSSpill = true;
-      }
-    }
-
-    // Estimate if we might need to scavenge a register at some point in order
-    // to materialize a stack offset. If so, either spill one additional
-    // callee-saved register or reserve a special spill slot to facilitate
-    // register scavenging. Thumb1 needs a spill slot for stack pointer
-    // adjustments also, even when the frame itself is small.
-    if (BigStack && !ExtraCSSpill) {
-      // If any non-reserved CS register isn't spilled, just spill one or two
-      // extra. That should take care of it!
-      unsigned NumExtras = TargetAlign / 4;
-      SmallVector<unsigned, 2> Extras;
-      while (NumExtras && !UnspilledCS1GPRs.empty()) {
-        unsigned Reg = UnspilledCS1GPRs.back();
-        UnspilledCS1GPRs.pop_back();
-        if (!isReservedReg(MF, Reg) &&
-            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
-             Reg == ARM::LR)) {
-          Extras.push_back(Reg);
-          NumExtras--;
-        }
-      }
-      // For non-Thumb1 functions, also check for hi-reg CS registers
-      if (!AFI->isThumb1OnlyFunction()) {
-        while (NumExtras && !UnspilledCS2GPRs.empty()) {
-          unsigned Reg = UnspilledCS2GPRs.back();
-          UnspilledCS2GPRs.pop_back();
-          if (!isReservedReg(MF, Reg)) {
-            Extras.push_back(Reg);
-            NumExtras--;
-          }
-        }
-      }
-      if (Extras.size() && NumExtras == 0) {
-        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
-          MF.getRegInfo().setPhysRegUsed(Extras[i]);
-          AFI->setCSRegisterIsSpilled(Extras[i]);
-        }
-      } else if (!AFI->isThumb1OnlyFunction()) {
-        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
-        // closest to SP or frame pointer.
-        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
-        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                           RC->getAlignment(),
-                                                           false));
-      }
-    }
-  }
-
-  if (ForceLRSpill) {
-    MF.getRegInfo().setPhysRegUsed(ARM::LR);
-    AFI->setCSRegisterIsSpilled(ARM::LR);
-    AFI->setLRIsSpilledForFarJump(true);
-  }
-}
-
 unsigned ARMBaseRegisterInfo::getRARegister() const {
   return ARM::LR;
 }
 
 unsigned
 ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  if (hasFP(MF))
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
     return FramePtr;
   return ARM::SP;
 }
 
-// Provide a base+offset reference to an FI slot for debug info. It's the
-// same as what we use for resolving the code-gen references for now.
-// FIXME: This can go wrong when references are SP-relative and simple call
-//        frames aren't used.
-int
-ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
-                                            unsigned &FrameReg) const {
-  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
-}
-
-int
-ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
-                                                int FI,
-                                                unsigned &FrameReg,
-                                                int SPAdj) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
-  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
-  bool isFixed = MFI->isFixedObjectIndex(FI);
-
-  FrameReg = ARM::SP;
-  Offset += SPAdj;
-  if (AFI->isGPRCalleeSavedArea1Frame(FI))
-    return Offset - AFI->getGPRCalleeSavedArea1Offset();
-  else if (AFI->isGPRCalleeSavedArea2Frame(FI))
-    return Offset - AFI->getGPRCalleeSavedArea2Offset();
-  else if (AFI->isDPRCalleeSavedAreaFrame(FI))
-    return Offset - AFI->getDPRCalleeSavedAreaOffset();
-
-  // When dynamically realigning the stack, use the frame pointer for
-  // parameters, and the stack/base pointer for locals.
-  if (needsStackRealignment(MF)) {
-    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
-    if (isFixed) {
-      FrameReg = getFrameRegister(MF);
-      Offset = FPOffset;
-    } else if (MFI->hasVarSizedObjects()) {
-      assert(hasBasePointer(MF) &&
-             "VLAs and dynamic stack alignment, but missing base pointer!");
-      FrameReg = BasePtr;
-    }
-    return Offset;
-  }
-
-  // If there is a frame pointer, use it when we can.
-  if (hasFP(MF) && AFI->hasStackFrame()) {
-    // Use frame pointer to reference fixed objects. Use it for locals if
-    // there are VLAs (and thus the SP isn't reliable as a base).
-    if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
-      FrameReg = getFrameRegister(MF);
-      return FPOffset;
-    } else if (MFI->hasVarSizedObjects()) {
-      assert(hasBasePointer(MF) && "missing base pointer!");
-      // Use the base register since we have it.
-      FrameReg = BasePtr;
-    } else if (AFI->isThumb2Function()) {
-      // In Thumb2 mode, the negative offset is very limited. Try to avoid
-      // out of range references.
-      if (FPOffset >= -255 && FPOffset < 0) {
-        FrameReg = getFrameRegister(MF);
-        return FPOffset;
-      }
-    } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
-      // Otherwise, use SP or FP, whichever is closer to the stack slot.
-      FrameReg = getFrameRegister(MF);
-      return FPOffset;
-    }
-  }
-  // Use the base pointer if we have one.
-  if (hasBasePointer(MF))
-    FrameReg = BasePtr;
-  return Offset;
-}
-
-int
-ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
-                                         int FI) const {
-  unsigned FrameReg;
-  return getFrameIndexReference(MF, FI, FrameReg);
-}
-
 unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
   return 0;
@@ -1320,7 +816,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
   BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
     .addReg(DestReg, getDefRegState(true), SubIdx)
     .addConstantPoolIndex(Idx)
-    .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+    .addImm(0).addImm(Pred).addReg(PredReg);
 }
 
 bool ARMBaseRegisterInfo::
@@ -1338,34 +834,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const {
   return EnableLocalStackAlloc;
 }
 
-// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-// not required, we reserve argument space for call sites in the function
-// immediately on entry to the current function. This eliminates the need for
-// add/sub sp brackets around call sites. Returns true if the call frame is
-// included as part of the stack frame.
-bool ARMBaseRegisterInfo::
-hasReservedCallFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  unsigned CFSize = FFI->getMaxCallFrameSize();
-  // It's not always a good idea to include the call frame as part of the
-  // stack frame. ARM (especially Thumb) has small immediate offset to
-  // address the stack frame. So a large call frame can cause poor codegen
-  // and may even makes it impossible to scavenge a register.
-  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
-    return false;
-
-  return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-// canSimplifyCallFramePseudos - If there is a reserved call frame, the
-// call frame pseudos can be simplified. Unlike most targets, having a FP
-// is not sufficient here since we still may reference some objects via SP
-// even when FP is available in Thumb2 mode.
-bool ARMBaseRegisterInfo::
-canSimplifyCallFramePseudos(const MachineFunction &MF) const {
-  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
-}
-
 static void
 emitSPUpdate(bool isARM,
              MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -1384,7 +852,8 @@ emitSPUpdate(bool isARM,
 void ARMBaseRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (!TFI->hasReservedCallFrame(MF)) {
     // If we have alloca, convert as follows:
     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
     // ADJCALLSTACKUP   -> add, sp, sp, amount
@@ -1395,7 +864,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
-      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      unsigned Align = TFI->getStackAlignment();
       Amount = (Amount+Align-1)/Align*Align;
 
       ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1433,8 +902,7 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
   switch (AddrMode) {
   case ARMII::AddrModeT2_i8:
   case ARMII::AddrModeT2_i12:
-    // i8 supports only negative, and i12 supports only positive, so
-    // based on Offset sign, consider the appropriate instruction
+  case ARMII::AddrMode_i12:
     InstrOffs = MI->getOperand(Idx+1).getImm();
     Scale = 1;
     break;
@@ -1496,8 +964,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // return false for everything else.
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
-  case ARM::LDR: case ARM::LDRH: case ARM::LDRB:
-  case ARM::STR: case ARM::STRH: case ARM::STRB:
+  case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+  case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
   case ARM::t2LDRi12: case ARM::t2LDRi8:
   case ARM::t2STRi12: case ARM::t2STRi8:
   case ARM::VLDRS: case ARM::VLDRD:
@@ -1516,6 +984,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // Note that the incoming offset is based on the SP value at function entry,
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
@@ -1542,8 +1011,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // The FP is only available if there is no dynamic realignment. We
   // don't know for sure yet whether we'll need that, so we guess based
   // on whether there are any local variables that would trigger it.
-  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-  if (hasFP(MF) &&
+  unsigned StackAlign = TFI->getStackAlignment();
+  if (TFI->hasFP(MF) &&
       !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
     if (isFrameOffsetLegal(MI, FPOffset))
       return false;
@@ -1560,19 +1029,25 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   return true;
 }
 
-/// materializeFrameBaseRegister - Insert defining instruction(s) for
-/// BaseReg to be a pointer to FrameIdx before insertion point I.
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
 void ARMBaseRegisterInfo::
-materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg,
-                             int FrameIdx, int64_t Offset) const {
-  ARMFunctionInfo *AFI =
-    I->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                             unsigned BaseReg, int FrameIdx,
+                             int64_t Offset) const {
+  ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
   unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
     (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
 
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL;                  // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
   MachineInstrBuilder MIB =
-    BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg)
+    BuildMI(*MBB, Ins, DL, TII.get(ADDriOpc), BaseReg)
     .addFrameIndex(FrameIdx).addImm(Offset);
+
   if (!AFI->isThumb1OnlyFunction())
     AddDefaultCC(AddDefaultPred(MIB));
 }
@@ -1640,6 +1115,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     NumBits = 8;
     Scale = 4;
     break;
+  case ARMII::AddrMode_i12:
   case ARMII::AddrMode2:
     NumBits = 12;
     break;
@@ -1679,6 +1155,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const ARMFrameLowering *TFI =
+    static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
          "This eliminateFrameIndex does not support Thumb1!");
@@ -1691,7 +1169,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = MI.getOperand(i).getIndex();
   unsigned FrameReg;
 
-  int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+  int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
 
   // Special handling of dbg_value instructions.
   if (MI.isDebugValue()) {
@@ -1737,339 +1215,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
                              Offset, Pred, PredReg, TII);
     }
+    // Update the original instruction to use the scratch register.
     MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+    if (MI.getOpcode() == ARM::t2ADDrSPi)
+      MI.setDesc(TII.get(ARM::t2ADDri));
+    else if (MI.getOpcode() == ARM::t2SUBrSPi)
+      MI.setDesc(TII.get(ARM::t2SUBri));
   }
 }
 
-/// Move iterator past the next bunch of callee save load / store ops for
-/// the particular spill area (1: integer area 1, 2: integer area 2,
-/// 3: fp area, 0: don't care).
-static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator &MBBI,
-                                   int Opc1, int Opc2, unsigned Area,
-                                   const ARMSubtarget &STI) {
-  while (MBBI != MBB.end() &&
-         ((MBBI->getOpcode() == Opc1) || (MBBI->getOpcode() == Opc2)) &&
-         MBBI->getOperand(1).isFI()) {
-    if (Area != 0) {
-      bool Done = false;
-      unsigned Category = 0;
-      switch (MBBI->getOperand(0).getReg()) {
-      case ARM::R4:  case ARM::R5:  case ARM::R6: case ARM::R7:
-      case ARM::LR:
-        Category = 1;
-        break;
-      case ARM::R8:  case ARM::R9:  case ARM::R10: case ARM::R11:
-        Category = STI.isTargetDarwin() ? 2 : 1;
-        break;
-      case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
-      case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
-        Category = 3;
-        break;
-      default:
-        Done = true;
-        break;
-      }
-      if (Done || Category != Area)
-        break;
-    }
-
-    ++MBBI;
-  }
-}
-
-void ARMBaseRegisterInfo::
-emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo  *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  assert(!AFI->isThumb1OnlyFunction() &&
-         "This emitPrologue does not support Thumb1!");
-  bool isARM = !AFI->isThumbFunction();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  unsigned NumBytes = MFI->getStackSize();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Determine the sizes of each callee-save spill areas and record which frame
-  // belongs to which callee-save spill areas.
-  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
-  int FramePtrSpillFI = 0;
-
-  // Allocate the vararg register save area. This is not counted in NumBytes.
-  if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
-    return;
-  }
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    int FI = CSI[i].getFrameIdx();
-    switch (Reg) {
-    case ARM::R4:
-    case ARM::R5:
-    case ARM::R6:
-    case ARM::R7:
-    case ARM::LR:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      AFI->addGPRCalleeSavedArea1Frame(FI);
-      GPRCS1Size += 4;
-      break;
-    case ARM::R8:
-    case ARM::R9:
-    case ARM::R10:
-    case ARM::R11:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
-        AFI->addGPRCalleeSavedArea2Frame(FI);
-        GPRCS2Size += 4;
-      } else {
-        AFI->addGPRCalleeSavedArea1Frame(FI);
-        GPRCS1Size += 4;
-      }
-      break;
-    default:
-      AFI->addDPRCalleeSavedAreaFrame(FI);
-      DPRCSSize += 8;
-    }
-  }
-
-  // Build the new SUBri to adjust SP for integer callee-save spill area 1.
-  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS1Size);
-  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 1, STI);
-
-  // Set FP to point to the stack slot that contains the previous FP.
-  // For Darwin, FP is R7, which has now been stored in spill area 1.
-  // Otherwise, if this is not Darwin, all the callee-saved registers go
-  // into spill area 1, including the FP in R11.  In either case, it is
-  // now safe to emit this assignment.
-  bool HasFP = hasFP(MF);
-  if (HasFP) {
-    unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
-    AddDefaultCC(AddDefaultPred(MIB));
-  }
-
-  // Build the new SUBri to adjust SP for integer callee-save spill area 2.
-  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS2Size);
-
-  // Build the new SUBri to adjust SP for FP callee-save spill area.
-  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 2, STI);
-  emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRCSSize);
-
-  // Determine starting offsets of spill areas.
-  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  if (HasFP)
-    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
-                                NumBytes);
-  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
-  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
-  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
-  movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI);
-  NumBytes = DPRCSOffset;
-  if (NumBytes) {
-    // Adjust SP after all the callee-save spills.
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
-    if (HasFP)
-      AFI->setShouldRestoreSPFromFP(true);
-  }
-
-  if (STI.isTargetELF() && hasFP(MF)) {
-    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
-                             AFI->getFramePtrSpillOffset());
-    AFI->setShouldRestoreSPFromFP(true);
-  }
-
-  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
-  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
-  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
-
-  // If we need dynamic stack realignment, do it here. Be paranoid and make
-  // sure if we also have VLAs, we have a base pointer for frame access.
-  if (needsStackRealignment(MF)) {
-    unsigned MaxAlign = MFI->getMaxAlignment();
-    assert (!AFI->isThumb1OnlyFunction());
-    if (!AFI->isThumbFunction()) {
-      // Emit bic sp, sp, MaxAlign
-      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
-                                          TII.get(ARM::BICri), ARM::SP)
-                                  .addReg(ARM::SP, RegState::Kill)
-                                  .addImm(MaxAlign-1)));
-    } else {
-      // We cannot use sp as source/dest register here, thus we're emitting the
-      // following sequence:
-      // mov r4, sp
-      // bic r4, r4, MaxAlign
-      // mov sp, r4
-      // FIXME: It will be better just to find spare register here.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
-        .addReg(ARM::SP, RegState::Kill);
-      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
-                                          TII.get(ARM::t2BICri), ARM::R4)
-                                  .addReg(ARM::R4, RegState::Kill)
-                                  .addImm(MaxAlign-1)));
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-        .addReg(ARM::R4, RegState::Kill);
-    }
-
-    AFI->setShouldRestoreSPFromFP(true);
-  }
-
-  // If we need a base pointer, set it up here. It's whatever the value
-  // of the stack pointer is at this point. Any variable size objects
-  // will be allocated after this, so we can still use the base pointer
-  // to reference locals.
-  if (hasBasePointer(MF)) {
-    if (isARM)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
-        .addReg(ARM::SP)
-        .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-    else
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
-        .addReg(ARM::SP);
-  }
-
-  // If the frame has variable sized objects then the epilogue must restore
-  // the sp from fp.
-  if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
-    AFI->setShouldRestoreSPFromFP(true);
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
-static bool isCSRestore(MachineInstr *MI,
-                        const ARMBaseInstrInfo &TII,
-                        const unsigned *CSRegs) {
-  return ((MI->getOpcode() == (int)ARM::VLDRD ||
-           MI->getOpcode() == (int)ARM::LDR ||
-           MI->getOpcode() == (int)ARM::t2LDRi12) &&
-          MI->getOperand(1).isFI() &&
-          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
-}
-
-void ARMBaseRegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert(MBBI->getDesc().isReturn() &&
-         "Can only insert epilog into returning blocks");
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc dl = MBBI->getDebugLoc();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  assert(!AFI->isThumb1OnlyFunction() &&
-         "This emitEpilogue does not support Thumb1!");
-  bool isARM = !AFI->isThumbFunction();
-
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  int NumBytes = (int)MFI->getStackSize();
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
-  } else {
-    // Unwind MBBI to point to first LDR / VLDRD.
-    const unsigned *CSRegs = getCalleeSavedRegs();
-    if (MBBI != MBB.begin()) {
-      do
-        --MBBI;
-      while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
-      if (!isCSRestore(MBBI, TII, CSRegs))
-        ++MBBI;
-    }
-
-    // Move SP to start of FP callee save spill area.
-    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
-                 AFI->getGPRCalleeSavedArea2Size() +
-                 AFI->getDPRCalleeSavedAreaSize());
-
-    // Reset SP based on frame pointer only if the stack frame extends beyond
-    // frame pointer stack slot or target is ELF and the function has FP.
-    if (AFI->shouldRestoreSPFromFP()) {
-      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-      if (NumBytes) {
-        if (isARM)
-          emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
-                                  ARMCC::AL, 0, TII);
-        else
-          emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
-                                 ARMCC::AL, 0, TII);
-      } else {
-        // Thumb2 or ARM.
-        if (isARM)
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
-            .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-        else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(FramePtr);
-      }
-    } else if (NumBytes)
-      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
-
-    // Move SP to start of integer callee save spill area 2.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI);
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize());
-
-    // Move SP to start of integer callee save spill area 1.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 2, STI);
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea2Size());
-
-    // Move SP to SP upon entry to the function.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 1, STI);
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size());
-  }
-
-  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
-      RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
-    // Tail call return: adjust the stack pointer and jump to callee.
-    MBBI = prior(MBB.end());
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-
-    // Jump to label or value in register.
-    if (RetOpcode == ARM::TCRETURNdi) {
-      BuildMI(MBB, MBBI, dl,
-            TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                         JumpTarget.getTargetFlags());
-    } else if (RetOpcode == ARM::TCRETURNdiND) {
-      BuildMI(MBB, MBBI, dl,
-            TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                         JumpTarget.getTargetFlags());
-    } else if (RetOpcode == ARM::TCRETURNri) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    } else if (RetOpcode == ARM::TCRETURNriND) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    }
-
-    MachineInstr *NewMI = prior(MBBI);
-    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
-      NewMI->addOperand(MBBI->getOperand(i));
-
-    // Delete the pseudo instruction TCRETURN.
-    MBB.erase(MBBI);
-  }
-
-  if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
-}
-
 #include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index fa2eb6c10498..ba6bd2b32082 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -44,6 +44,45 @@ static inline bool isARMLowRegister(unsigned Reg) {
   }
 }
 
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case R0:  case R1:  case R2:  case R3:
+    case R4:  case R5:  case R6:  case R7:
+    case LR:  case SP:  case PC:
+      return true;
+    case R8:  case R9:  case R10: case R11:
+      // For darwin we want r7 and lr to be next to each other.
+      return !isDarwin;
+    default:
+      return false;
+  }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case R8: case R9: case R10: case R11:
+      // Darwin has this second area.
+      return isDarwin;
+    default:
+      return false;
+  }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case D15: case D14: case D13: case D12:
+    case D11: case D10: case D9:  case D8:
+      return true;
+    default:
+      return false;
+  }
+}
+
 class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
 protected:
   const ARMBaseInstrInfo &TII;
@@ -65,12 +104,6 @@ protected:
   unsigned getOpcode(int Op) const;
 
 public:
-  /// getRegisterNumbering - Given the enum value for some register, e.g.
-  /// ARM::LR, return the number that it corresponds to (e.g. 14). It
-  /// also returns true in isSPVFP if the register is a single precision
-  /// VFP register.
-  static unsigned getRegisterNumbering(unsigned RegEnum, bool *isSPVFP = 0);
-
   /// Code Generation virtual methods...
   const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
@@ -106,14 +139,13 @@ public:
   void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
   bool hasBasePointer(const MachineFunction &MF) const;
 
   bool canRealignStack(const MachineFunction &MF) const;
   bool needsStackRealignment(const MachineFunction &MF) const;
   int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
-  void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB,
                                     unsigned BaseReg, int FrameIdx,
                                     int64_t Offset) const;
   void resolveFrameIndex(MachineBasicBlock::iterator I,
@@ -122,17 +154,10 @@ public:
 
   bool cannotEliminateFrame(const MachineFunction &MF) const;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS = NULL) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
-  int getFrameIndexReference(const MachineFunction &MF, int FI,
-                             unsigned &FrameReg) const;
-  int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
-                                 unsigned &FrameReg, int SPAdj) const;
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+  unsigned getBaseRegister() const { return BasePtr; }
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
@@ -162,9 +187,6 @@ public:
 
   virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
 
-  virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
-  virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
-
   virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                            MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator I) const;
@@ -172,12 +194,7 @@ public:
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
                                    int SPAdj, RegScavenger *RS = NULL) const;
 
-  virtual void emitPrologue(MachineFunction &MF) const;
-  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
 private:
-  unsigned estimateRSStackSizeLimit(MachineFunction &MF) const;
-
   unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
 
   unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
index 3b38375fbc71..69eddf03ec94 100644
--- a/lib/Target/ARM/ARMBuildAttrs.h
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains enumerations and support routines for ARM build attributes
-// as defined in ARM ABI addenda document (ABI release 2.07).
+// as defined in ARM ABI addenda document (ABI release 2.08).
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,7 +16,14 @@
 #define __TARGET_ARMBUILDATTRS_H__
 
 namespace ARMBuildAttrs {
-  enum {
+  enum SpecialAttr {
+    // This is for the .cpu asm attr. It translates into one or more
+    // AttrType (below) entries in the .ARM.attributes section in the ELF.
+    SEL_CPU 
+  };
+
+  enum AttrType {
+    // Rest correspond to ELF/.ARM.attributes
     File                      = 1,
     Section                   = 2,
     Symbol                    = 3,
@@ -52,12 +59,72 @@ namespace ARMBuildAttrs {
     CPU_unaligned_access      = 34,
     VFP_HP_extension          = 36,
     ABI_FP_16bit_format       = 38,
+    MPextension_use           = 42, // was 70, 2.08 ABI
+    DIV_use                   = 44,
     nodefaults                = 64,
     also_compatible_with      = 65,
     T2EE_use                  = 66,
     conformance               = 67,
     Virtualization_use        = 68,
-    MPextension_use           = 70
+    MPextension_use_old       = 70
+  };
+
+  // Magic numbers for .ARM.attributes
+  enum AttrMagic {
+    Format_Version  = 0x41
+  };
+
+  // Legal Values for CPU_arch, (=6), uleb128
+  enum CPUArch {
+    Pre_v4       = 0,
+    v4       = 1,   // e.g. SA110
+    v4T      = 2,   // e.g. ARM7TDMI
+    v5T      = 3,   // e.g. ARM9TDMI
+    v5TE     = 4,   // e.g. ARM946E_S
+    v5TEJ    = 5,   // e.g. ARM926EJ_S
+    v6       = 6,   // e.g. ARM1136J_S
+    v6KZ     = 7,   // e.g. ARM1176JZ_S
+    v6T2     = 8,   // e.g. ARM1156T2F_S
+    v6K      = 9,   // e.g. ARM1136J_S
+    v7       = 10,  // e.g. Cortex A8, Cortex M3
+    v6_M     = 11,  // e.g. Cortex M1
+    v6S_M    = 12,  // v6_M with the System extensions
+    v7E_M    = 13   // v7_M with DSP extensions
+  };
+
+  enum CPUArchProfile { // (=7), uleb128 
+    Not_Applicable = 0, // pre v7, or cross-profile code
+    ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+    RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+    MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+    SystemProfile = (0x53) // 'S' Application or real-time profile
+  };
+
+  // The following have a lot of common use cases
+  enum { 
+    //ARMISAUse (=8), uleb128  and THUMBISAUse (=9), uleb128
+    Not_Allowed = 0,
+    Allowed = 1,
+
+    // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+    AllowFPv2  = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+    AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+    AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 
+    AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) 
+    AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+    // Tag_WMMX_arch, (=11), uleb128
+    AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+    
+    // Tag_WMMX_arch, (=11), uleb128
+    AllowWMMXv1 = 2,  // The user permitted this entity to use WMMX v2
+
+    // Tag_ABI_FP_denormal, (=20), uleb128 
+    PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+    // Tag_ABI_FP_number_model, (=23), uleb128
+    AllowRTABI = 2,  // numbers, infinities, and one quiet NaN (see [RTABI])
+    AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
   };
 }
 
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 000000000000..ff7db1ff62ed
--- /dev/null
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          CCState &State, bool CanFail) {
+  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+  // Try to get the first register.
+  if (unsigned Reg = State.AllocateReg(RegList, 4))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else {
+    // For the 2nd half of a v2f64, do not fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  // Try to get the second register.
+  if (unsigned Reg = State.AllocateReg(RegList, 4))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4, 4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags,
+                                   CCState &State) {
+  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                           CCValAssign::LocInfo &LocInfo,
+                           CCState &State, bool CanFail) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+  if (Reg == 0) {
+    // For the 2nd half of a v2f64, do not just fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 8),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  unsigned T = State.AllocateReg(LoRegList[i]);
+  (void)T;
+  assert(T == LoRegList[i] && "Could not allocate register");
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                         CCValAssign::LocInfo &LocInfo, CCState &State) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+  if (Reg == 0)
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
+  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  return true;  // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                   State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 293e32aa5376..426ba13a8e11 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -52,6 +52,34 @@ def RetCC_ARM_APCS : CallingConv<[
   CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
 ]>;
 
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
 //===----------------------------------------------------------------------===//
 // ARM AAPCS (EABI) Calling Convention, common parts
 //===----------------------------------------------------------------------===//
@@ -105,6 +133,7 @@ def RetCC_ARM_AAPCS : CallingConv<[
 
 //===----------------------------------------------------------------------===//
 // ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
 //===----------------------------------------------------------------------===//
 
 def CC_ARM_AAPCS_VFP : CallingConv<[
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index b1a702f90cfc..9bbf6a030687 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -74,7 +74,7 @@ namespace {
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -101,7 +101,6 @@ namespace {
                                     unsigned OpIdx);
 
     unsigned getMachineSoImmOpValue(unsigned SoImm);
-
     unsigned getAddrModeSBit(const MachineInstr &MI,
                              const TargetInstrDesc &TID) const;
 
@@ -140,8 +139,6 @@ namespace {
 
     void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
 
-    void emitMiscInstruction(const MachineInstr &MI);
-
     void emitNEONLaneInstruction(const MachineInstr &MI);
     void emitNEONDupInstruction(const MachineInstr &MI);
     void emitNEON1RegModImmInstruction(const MachineInstr &MI);
@@ -150,20 +147,176 @@ namespace {
 
     /// getMachineOpValue - Return binary encoding of operand. If the machine
     /// operand requires relocation, record the relocation and return zero.
-    unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
-    unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) {
+    unsigned getMachineOpValue(const MachineInstr &MI,
+                               const MachineOperand &MO) const;
+    unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
       return getMachineOpValue(MI, MI.getOperand(OpIdx));
     }
 
+    // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+    //  TableGen'erated getBinaryCodeForInstr() function to encode any
+    //  operand values, instead querying getMachineOpValue() directly for
+    //  each operand it needs to encode. Thus, any of the new encoder
+    //  helper functions can simply return 0 as the values the return
+    //  are already handled elsewhere. They are placeholders to allow this
+    //  encoder to continue to function until the MC encoder is sufficiently
+    //  far along that this one can be eliminated entirely.
+    unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) 
+      const { return 0; }
+    unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) 
+      const { return 0; }
+    unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) 
+      const { return 0; }
+    unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+      const { return 0; }
+    unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+      unsigned Op) const { return 0; }
+    unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+                                            unsigned Op) const { return 0; }
+    unsigned getMsbOpValue(const MachineInstr &MI,
+                           unsigned Op) const { return 0; }
+    uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const {return 0; }
+    uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0; }
+
+    unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+      const {
+      // {17-13} = reg
+      // {12}    = (U)nsigned (add == '1', sub == '0')
+      // {11-0}  = imm12
+      const MachineOperand &MO  = MI.getOperand(Op);
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+      uint32_t Binary;
+      Binary = Imm12 & 0xfff;
+      if (Imm12 >= 0)
+        Binary |= (1 << 12);
+      Binary |= (Reg << 13);
+      return Binary;
+    }
+
+    unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+      return 0;
+    }
+
+    uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+      // {17-13} = reg
+      // {12}    = (U)nsigned (add == '1', sub == '0')
+      // {11-0}  = imm12
+      const MachineOperand &MO  = MI.getOperand(Op);
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+
+      // Special value for #-0
+      if (Imm12 == INT32_MIN)
+        Imm12 = 0;
+
+      // Immediate is always encoded as positive. The 'U' bit controls add vs
+      // sub.
+      bool isAdd = true;
+      if (Imm12 < 0) {
+        Imm12 = -Imm12;
+        isAdd = false;
+      }
+
+      uint32_t Binary = Imm12 & 0xfff;
+      if (isAdd)
+        Binary |= (1 << 12);
+      Binary |= (Reg << 13);
+      return Binary;
+    }
+    unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
     /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
     /// machine operand requires relocation, record the relocation and return
     /// zero.
     unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
                             unsigned Reloc);
-    unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
-                            unsigned Reloc) {
-      return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
-    }
 
     /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
     ///
@@ -173,12 +326,12 @@ namespace {
     /// fixed up by the relocation stage.
     void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                            bool MayNeedFarStub,  bool Indirect,
-                           intptr_t ACPV = 0);
-    void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
-    void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
-    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
+                           intptr_t ACPV = 0) const;
+    void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+    void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
     void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
-                               intptr_t JTBase = 0);
+                               intptr_t JTBase = 0) const;
   };
 }
 
@@ -266,9 +419,9 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
 unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                           const MachineOperand &MO) {
+                                           const MachineOperand &MO) const {
   if (MO.isReg())
-    return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+    return getARMRegisterNumbering(MO.getReg());
   else if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
   else if (MO.isGlobal())
@@ -285,12 +438,8 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
     emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
   else if (MO.isMBB())
     emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
-  else {
-#ifndef NDEBUG
-    errs() << MO;
-#endif
-    llvm_unreachable(0);
-  }
+  else
+    llvm_unreachable("Unable to encode MachineOperand!");
   return 0;
 }
 
@@ -298,7 +447,7 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
 ///
 void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
                                        bool MayNeedFarStub, bool Indirect,
-                                       intptr_t ACPV) {
+                                       intptr_t ACPV) const {
   MachineRelocation MR = Indirect
     ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
                                            const_cast<GlobalValue *>(GV),
@@ -312,7 +461,8 @@ void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
 /// emitExternalSymbolAddress - Arrange for the address of an external symbol to
 /// be emitted to the current location in the function, and allow it to be PC
 /// relative.
-void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) {
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
   MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
                                                  Reloc, ES));
 }
@@ -320,7 +470,7 @@ void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) {
 /// emitConstPoolAddress - Arrange for the address of an constant pool
 /// to be emitted to the current location in the function, and allow it to be PC
 /// relative.
-void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) {
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
   // Tell JIT emitter we'll resolve the address.
   MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
                                                     Reloc, CPI, 0, true));
@@ -329,14 +479,16 @@ void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) {
 /// emitJumpTableAddress - Arrange for the address of a jump table to
 /// be emitted to the current location in the function, and allow it to be PC
 /// relative.
-void ARMCodeEmitter::emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) {
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
   MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
                                                     Reloc, JTIndex, 0, true));
 }
 
 /// emitMachineBasicBlock - Emit the specified address basic block.
 void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
-                                           unsigned Reloc, intptr_t JTBase) {
+                                           unsigned Reloc,
+                                           intptr_t JTBase) const {
   MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                              Reloc, BB, JTBase));
 }
@@ -364,6 +516,14 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
     llvm_unreachable("Unhandled instruction encoding format!");
     break;
   }
+  case ARMII::MiscFrm:
+    if (MI.getOpcode() == ARM::LEApcrelJT) {
+      // Materialize jumptable address.
+      emitLEApcrelJTInstruction(MI);
+      break;
+    }
+    llvm_unreachable("Unhandled instruction encoding!");
+    break;
   case ARMII::Pseudo:
     emitPseudoInstruction(MI);
     break;
@@ -418,9 +578,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
   case ARMII::VFPLdStMulFrm:
     emitVFPLoadStoreMultipleInstruction(MI);
     break;
-  case ARMII::VFPMiscFrm:
-    emitMiscInstruction(MI);
-    break;
+
   // NEON instructions.
   case ARMII::NGetLnFrm:
   case ARMII::NSetLnFrm:
@@ -488,7 +646,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
       emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
       emitWordLE(0);
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-      uint32_t Val = *(uint32_t*)CI->getValue().getRawData();
+      uint32_t Val = uint32_t(*CI->getValue().getRawData());
       emitWordLE(Val);
     } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
       if (CFP->getType()->isFloatTy())
@@ -588,7 +746,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Emit the 'add' instruction.
-  unsigned Binary = 0x4 << 21;  // add: Insts{24-31} = 0b0100
+  unsigned Binary = 0x4 << 21;  // add: Insts{24-21} = 0b0100
 
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
@@ -600,7 +758,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
 
   // Encode Rn which is PC.
-  Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+  Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
 
   // Encode the displacement.
   Binary |= 1 << ARMII::I_BitShift;
@@ -628,7 +786,7 @@ void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
   // Encode the shift operation.
   switch (Opcode) {
   default: break;
-  case ARM::MOVrx:
+  case ARM::RRX:
     // rrx
     Binary |= 0x6 << 4;
     break;
@@ -659,10 +817,10 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   switch (Opcode) {
   default:
     llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
-  case ARM::BX:
-  case ARM::BMOVPCRX:
-  case ARM::BXr9:
-  case ARM::BMOVPCRXr9: {
+  case ARM::BX_CALL:
+  case ARM::BMOVPCRX_CALL:
+  case ARM::BXr9_CALL:
+  case ARM::BMOVPCRXr9_CALL: {
     // First emit mov lr, pc
     unsigned Binary = 0x01a0e00f;
     Binary |= II->getPredicate(&MI) << ARMII::CondShift;
@@ -720,18 +878,18 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   }
 
   case ARM::MOVi32imm:
-    emitMOVi32immInstruction(MI);
-    break;
-
-  case ARM::MOVi2pieces:
     // Two instructions to materialize a constant.
-    emitMOVi2piecesInstruction(MI);
+    if (Subtarget->hasV6T2Ops())
+      emitMOVi32immInstruction(MI);
+    else
+      emitMOVi2piecesInstruction(MI);
     break;
+
   case ARM::LEApcrelJT:
     // Materialize jumptable address.
     emitLEApcrelJTInstruction(MI);
     break;
-  case ARM::MOVrx:
+  case ARM::RRX:
   case ARM::MOVsrl_flag:
   case ARM::MOVsra_flag:
     emitPseudoMoveInstruction(MI);
@@ -789,8 +947,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
   if (Rs) {
     // Encode Rs bit[11:8].
     assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
-    return Binary |
-      (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift);
+    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
   }
 
   // Encode shift_imm bit[11:7].
@@ -841,8 +998,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
   else if (ImplicitRd)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
-               << ARMII::RegRdShift);
+    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
 
   if (TID.Opcode == ARM::MOVi16) {
       // Get immediate from MI.
@@ -892,8 +1048,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   if (!isUnary) {
     if (ImplicitRn)
       // Special handling for implicit use (e.g. PC).
-      Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
-                 << ARMII::RegRnShift);
+      Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
     else {
       Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
       ++OpIdx;
@@ -910,7 +1065,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
 
   if (MO.isReg()) {
     // Encode register Rm.
-    emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg()));
+    emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
     return;
   }
 
@@ -930,6 +1085,13 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
+  // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+  if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+      MI.getOpcode() == ARM::STRi12) {
+    emitWordLE(Binary);
+    return;
+  }
+
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
@@ -946,16 +1108,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
   // Set first operand
   if (ImplicitRd)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
-               << ARMII::RegRdShift);
+    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
   else
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
-               << ARMII::RegRnShift);
+    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
   else
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
@@ -978,11 +1138,11 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
     return;
   }
 
-  // Set bit I(25), because this is not in immediate enconding.
+  // Set bit I(25), because this is not in immediate encoding.
   Binary |= 1 << ARMII::I_BitShift;
   assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
   // Set bit[3:0] to the corresponding Rm register
-  Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+  Binary |= getARMRegisterNumbering(MO2.getReg());
 
   // If this instr is in scaled register offset/index instruction, set
   // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1026,8 +1186,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
-               << ARMII::RegRnShift);
+    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
   else
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
@@ -1046,7 +1205,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // If this instr is in register offset/index encoding, set bit[3:0]
   // to the corresponding Rm register.
   if (MO2.getReg()) {
-    Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+    Binary |= getARMRegisterNumbering(MO2.getReg());
     emitWordLE(Binary);
     return;
   }
@@ -1100,8 +1259,8 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // Set addressing mode by modifying bits U(23) and P(24)
-  const MachineOperand &MO = MI.getOperand(OpIdx++);
-  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
 
   // Set bit W(21)
   if (IsUpdating)
@@ -1112,7 +1271,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       break;
-    unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+    unsigned RegNum = getARMRegisterNumbering(MO.getReg());
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            RegNum < 16);
     Binary |= 0x1 << RegNum;
@@ -1349,7 +1508,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
 
   if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
     // The return register is LR.
-    Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
+    Binary |= getARMRegisterNumbering(ARM::LR);
   else
     // otherwise, set the return register
     Binary |= getMachineOpValue(MI, 0);
@@ -1360,8 +1519,8 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
 static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegD = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  bool isSPVFP = false;
-  RegD = ARMRegisterInfo::getRegisterNumbering(RegD, &isSPVFP);
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+  RegD = getARMRegisterNumbering(RegD);
   if (!isSPVFP)
     Binary |=   RegD               << ARMII::RegRdShift;
   else {
@@ -1374,8 +1533,8 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
 static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegN = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  bool isSPVFP = false;
-  RegN = ARMRegisterInfo::getRegisterNumbering(RegN, &isSPVFP);
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+  RegN = getARMRegisterNumbering(RegN);
   if (!isSPVFP)
     Binary |=   RegN               << ARMII::RegRnShift;
   else {
@@ -1388,8 +1547,8 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
 static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegM = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  bool isSPVFP = false;
-  RegM = ARMRegisterInfo::getRegisterNumbering(RegM, &isSPVFP);
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+  RegM = getARMRegisterNumbering(RegM);
   if (!isSPVFP)
     Binary |=   RegM;
   else {
@@ -1548,8 +1707,8 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // Set addressing mode by modifying bits U(23) and P(24)
-  const MachineOperand &MO = MI.getOperand(OpIdx++);
-  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
 
   // Set bit W(21)
   if (IsUpdating)
@@ -1576,63 +1735,10 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
-  unsigned Opcode = MI.getDesc().Opcode;
-  // Part of binary is determined by TableGn.
-  unsigned Binary = getBinaryCodeForInstr(MI);
-
-  // Set the conditional execution predicate
-  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
-  switch(Opcode) {
-  default:
-    llvm_unreachable("ARMCodeEmitter::emitMiscInstruction");
-
-  case ARM::FMSTAT:
-    // No further encoding needed.
-    break;
-
-  case ARM::VMRS:
-  case ARM::VMSR: {
-    const MachineOperand &MO0 = MI.getOperand(0);
-    // Encode Rt.
-    Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg())
-                << ARMII::RegRdShift;
-    break;
-  }
-
-  case ARM::FCONSTD:
-  case ARM::FCONSTS: {
-    // Encode Dd / Sd.
-    Binary |= encodeVFPRd(MI, 0);
-
-    // Encode imm., Table A7-18 VFP modified immediate constants
-    const MachineOperand &MO1 = MI.getOperand(1);
-    unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF()
-                      .bitcastToAPInt().getHiBits(32).getLimitedValue());
-    unsigned ModifiedImm;
-
-    if(Opcode == ARM::FCONSTS)
-      ModifiedImm = (Imm & 0x80000000) >> 24 | // a
-                    (Imm & 0x03F80000) >> 19;  // bcdefgh
-    else // Opcode == ARM::FCONSTD
-      ModifiedImm = (Imm & 0x80000000) >> 24 | // a
-                    (Imm & 0x007F0000) >> 16;  // bcdefgh
-
-    // Insts{19-16} = abcd, Insts{3-0} = efgh
-    Binary |= ((ModifiedImm & 0xF0) >> 4) << 16;
-    Binary |= (ModifiedImm & 0xF);
-    break;
-  }
-  }
-
-  emitWordLE(Binary);
-}
-
 static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegD = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  RegD = ARMRegisterInfo::getRegisterNumbering(RegD);
+  RegD = getARMRegisterNumbering(RegD);
   Binary |= (RegD & 0xf) << ARMII::RegRdShift;
   Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
   return Binary;
@@ -1641,7 +1747,7 @@ static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
 static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegN = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  RegN = ARMRegisterInfo::getRegisterNumbering(RegN);
+  RegN = getARMRegisterNumbering(RegN);
   Binary |= (RegN & 0xf) << ARMII::RegRnShift;
   Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
   return Binary;
@@ -1650,7 +1756,7 @@ static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
 static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
   unsigned RegM = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  RegM = ARMRegisterInfo::getRegisterNumbering(RegM);
+  RegM = getARMRegisterNumbering(RegM);
   Binary |= (RegM & 0xf);
   Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
   return Binary;
@@ -1684,7 +1790,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
   Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
 
   unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
-  RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+  RegT = getARMRegisterNumbering(RegT);
   Binary |= (RegT << ARMII::RegRdShift);
   Binary |= encodeNEONRn(MI, RegNOpIdx);
 
@@ -1713,7 +1819,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
   Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
 
   unsigned RegT = MI.getOperand(1).getReg();
-  RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+  RegT = getARMRegisterNumbering(RegT);
   Binary |= (RegT << ARMII::RegRdShift);
   Binary |= encodeNEONRn(MI, 0);
   emitWordLE(Binary);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 60e923bd2c38..13d1b33d1165 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -316,7 +316,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   }
 
   /// The next UID to take is the first unused one.
-  AFI->initConstPoolEntryUId(CPEMIs.size());
+  AFI->initPICLabelUId(CPEMIs.size());
 
   // Do the initial scan of the function, building up information about the
   // sizes of each block, the location of all the water, and finding all of the
@@ -327,7 +327,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
 
   /// Remove dead constant pool entries.
-  RemoveUnusedCPEntries();
+  MadeChange |= RemoveUnusedCPEntries();
 
   // Iteratively place constant pool entries and fix up branches until there
   // is no change.
@@ -368,6 +368,14 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
     MadeChange |= UndoLRSpillRestore();
 
+  // Save the mapping between original and cloned constpool entries.
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+    for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+      const CPEntry & CPE = CPEntries[i][j];
+      AFI->recordCPEClone(i, CPE.CPI);
+    }
+  }
+
   DEBUG(errs() << '\n'; dumpBBs());
 
   BBSizes.clear();
@@ -482,7 +490,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
         HasInlineAsm = true;
   }
 
-  // Now go back through the instructions and build up our data structures
+  // Now go back through the instructions and build up our data structures.
   unsigned Offset = 0;
   for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
        MBBI != E; ++MBBI) {
@@ -603,7 +611,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
             Scale = 4;
             break;
 
-          case ARM::LDR:
+          case ARM::LDRi12:
           case ARM::LDRcp:
           case ARM::t2LDRpci:
             Bits = 12;  // +-offset_12
@@ -611,7 +619,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
             break;
 
           case ARM::tLDRpci:
-          case ARM::tLDRcp:
             Bits = 8;
             Scale = 4;  // +(offset_8*4)
             break;
@@ -692,7 +699,7 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
 /// machine function, it upsets all of the block numbers.  Renumber the blocks
 /// and update the arrays that parallel this numbering.
 void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
-  // Renumber the MBB's to keep them consequtive.
+  // Renumber the MBB's to keep them consecutive.
   NewBB->getParent()->RenumberBlocks(NewBB);
 
   // Insert a size into BBSizes to align it properly with the (newly
@@ -1242,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
 
   // No existing clone of this CPE is within range.
   // We will be generating a new clone.  Get a UID for it.
-  unsigned ID = AFI->createConstPoolEntryUId();
+  unsigned ID = AFI->createPICLabelUId();
 
   // Look for water where we can place this CPE.
   MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
@@ -1644,7 +1651,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
     unsigned DestOffset = BBOffsets[DestBB->getNumber()];
     if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
       MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
-      if (CmpMI->getOpcode() == ARM::tCMPzi8) {
+      if (CmpMI->getOpcode() == ARM::tCMPi8) {
         unsigned Reg = CmpMI->getOperand(0).getReg();
         Pred = llvm::getInstrPredicate(CmpMI, PredReg);
         if (Pred == ARMCC::AL &&
@@ -1766,7 +1773,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
       if (!OptOk)
         continue;
 
-      unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH;
+      unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
       MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
         .addReg(IdxReg, getKillRegState(IdxRegKill))
         .addJumpTableIndex(JTI, JTOP.getTargetFlags())
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index f13ccc638448..165a1d849ad5 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
                                            ARMCP::ARMCPKind K,
                                            unsigned char PCAdj,
-                                           const char *Modif,
+                                           ARMCP::ARMCPModifier Modif,
                                            bool AddCA)
   : MachineConstantPoolValue((const Type*)cval->getType()),
     CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
@@ -33,17 +33,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
 ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
                                            const char *s, unsigned id,
                                            unsigned char PCAdj,
-                                           const char *Modif,
+                                           ARMCP::ARMCPModifier Modif,
                                            bool AddCA)
   : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
     CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
     PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
 
 ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
-                                           const char *Modif)
+                                           ARMCP::ARMCPModifier Modif)
   : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
     CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
-    Modifier(Modif) {}
+    Modifier(Modif), AddCurrentAddress(false) {}
 
 const GlobalValue *ARMConstantPoolValue::getGV() const {
   return dyn_cast_or_null<GlobalValue>(CVal);
@@ -53,6 +53,14 @@ const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
   return dyn_cast_or_null<BlockAddress>(CVal);
 }
 
+static bool CPV_streq(const char *S1, const char *S2) {
+  if (S1 == S2)
+    return true;
+  if (S1 && S2 && strcmp(S1, S2) == 0)
+    return true;
+  return false;
+}
+
 int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
                                                     unsigned Alignment) {
   unsigned AlignMask = Alignment - 1;
@@ -65,8 +73,8 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
       if (CPV->CVal == CVal &&
           CPV->LabelId == LabelId &&
           CPV->PCAdjust == PCAdjust &&
-          (CPV->S == S || strcmp(CPV->S, S) == 0) &&
-          (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0))
+          CPV_streq(CPV->S, S) &&
+          CPV->Modifier == Modifier)
         return i;
     }
   }
@@ -91,8 +99,8 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
   if (ACPV->Kind == Kind &&
       ACPV->CVal == CVal &&
       ACPV->PCAdjust == PCAdjust &&
-      (ACPV->S == S || strcmp(ACPV->S, S) == 0) &&
-      (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) {
+      CPV_streq(ACPV->S, S) &&
+      ACPV->Modifier == Modifier) {
     if (ACPV->LabelId == LabelId)
       return true;
     // Two PC relative constpool entries containing the same GV address or
@@ -113,7 +121,7 @@ void ARMConstantPoolValue::print(raw_ostream &O) const {
     O << CVal->getName();
   else
     O << S;
-  if (Modifier) O << "(" << Modifier << ")";
+  if (Modifier) O << "(" << getModifierText() << ")";
   if (PCAdjust != 0) {
     O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
     if (AddCurrentAddress) O << "-.";
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 3119b54563de..d008811c40e4 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
 #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
 
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstddef>
 
 namespace llvm {
@@ -31,6 +32,15 @@ namespace ARMCP {
     CPBlockAddress,
     CPLSDA
   };
+
+  enum ARMCPModifier {
+    no_modifier,
+    TLSGD,
+    GOT,
+    GOTOFF,
+    GOTTPOFF,
+    TPOFF
+  };
 }
 
 /// ARMConstantPoolValue - ARM specific constantpool value. This is used to
@@ -43,26 +53,41 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
   ARMCP::ARMCPKind Kind;   // Kind of constant.
   unsigned char PCAdjust;  // Extra adjustment if constantpool is pc-relative.
                            // 8 for ARM, 4 for Thumb.
-  const char *Modifier;    // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+  ARMCP::ARMCPModifier Modifier;   // GV modifier i.e. (&GV(modifier)-(LPIC+8))
   bool AddCurrentAddress;
 
 public:
   ARMConstantPoolValue(const Constant *cval, unsigned id,
                        ARMCP::ARMCPKind Kind = ARMCP::CPValue,
-                       unsigned char PCAdj = 0, const char *Modifier = NULL,
+                       unsigned char PCAdj = 0,
+                       ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
                        bool AddCurrentAddress = false);
   ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
-                       unsigned char PCAdj = 0, const char *Modifier = NULL,
+                       unsigned char PCAdj = 0,
+                       ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
                        bool AddCurrentAddress = false);
-  ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier);
+  ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
   ARMConstantPoolValue();
   ~ARMConstantPoolValue();
 
   const GlobalValue *getGV() const;
   const char *getSymbol() const { return S; }
   const BlockAddress *getBlockAddress() const;
-  const char *getModifier() const { return Modifier; }
-  bool hasModifier() const { return Modifier != NULL; }
+  ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+  const char *getModifierText() const {
+    switch (Modifier) {
+    default: llvm_unreachable("Unknown modifier!");
+    // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+    // strings if that's legal.
+    case ARMCP::no_modifier: return "none";
+    case ARMCP::TLSGD:       return "tlsgd";
+    case ARMCP::GOT:         return "GOT";
+    case ARMCP::GOTOFF:      return "GOTOFF";
+    case ARMCP::GOTTPOFF:    return "gottpoff";
+    case ARMCP::TPOFF:       return "tpoff";
+    }
+  }
+  bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
   bool mustAddCurrentAddress() const { return AddCurrentAddress; }
   unsigned getLabelId() const { return LabelId; }
   unsigned char getPCAdjustment() const { return PCAdjust; }
@@ -71,11 +96,7 @@ public:
   bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
   bool isLSDA() { return Kind == ARMCP::CPLSDA; }
 
-  virtual unsigned getRelocationInfo() const {
-    // FIXME: This is conservatively claiming that these entries require a
-    // relocation, we may be able to do better than this.
-    return 2;
-  }
+  virtual unsigned getRelocationInfo() const { return 2; }
 
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
                                         unsigned Alignment);
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 000000000000..51e68b4553ff
--- /dev/null
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+                        TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch (MachineRelTy) {
+  case ARM::reloc_arm_absolute:
+  case ARM::reloc_arm_relative:
+  case ARM::reloc_arm_cp_entry:
+  case ARM::reloc_arm_vfp_cp_entry:
+  case ARM::reloc_arm_machine_cp_entry:
+  case ARM::reloc_arm_jt_base:
+  case ARM::reloc_arm_pic_jt:
+    assert(0 && "unsupported ARM relocation type"); break;
+    
+  case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+  case ARM::reloc_arm_movt:   return ELF::R_ARM_MOVT_ABS; break;
+  case ARM::reloc_arm_movw:   return ELF::R_ARM_MOVW_ABS_NC; break;
+  default:
+    llvm_unreachable("unknown ARM relocation type"); break;
+  }
+  return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+  return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+  return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+  return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  assert(0 &&
+         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+  return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                             unsigned RelOffset,
+                                             unsigned RelTy) const {
+  assert(0 &&
+         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+  return 0;
+}
diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 000000000000..1c4e5329ac61
--- /dev/null
+++ b/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class ARMELFWriterInfo : public TargetELFWriterInfo {
+  public:
+    ARMELFWriterInfo(TargetMachine &TM);
+    virtual ~ARMELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fc2e3c3fadae..bd753d29abde 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -7,36 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains a pass that expand pseudo instructions into target
+// This file contains a pass that expands pseudo instructions into target
 // instructions to allow proper scheduling, if-conversion, and other late
 // optimizations. This pass should be run after register allocation but before
-// post- regalloc scheduling pass.
+// the post-regalloc scheduling pass.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "arm-pseudo"
 #include "ARM.h"
+#include "ARMAddressingModes.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
 using namespace llvm;
 
 namespace {
   class ARMExpandPseudo : public MachineFunctionPass {
-    // Constants for register spacing in NEON load/store instructions.
-    enum NEONRegSpacing {
-      SingleSpc,
-      EvenDblSpc,
-      OddDblSpc
-    };
-
   public:
     static char ID;
     ARMExpandPseudo() : MachineFunctionPass(ID) {}
 
-    const TargetInstrInfo *TII;
+    const ARMBaseInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
+    ARMFunctionInfo *AFI;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
@@ -47,11 +49,16 @@ namespace {
   private:
     void TransferImpOps(MachineInstr &OldMI,
                         MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+    bool ExpandMI(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MBBI);
     bool ExpandMBB(MachineBasicBlock &MBB);
-    void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
-                   bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
-    void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
-                   bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+    void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+    void ExpandVST(MachineBasicBlock::iterator &MBBI);
+    void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+    void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+                    unsigned Opc, bool IsExt, unsigned NumRegs);
+    void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI);
   };
   char ARMExpandPseudo::ID = 0;
 }
@@ -67,44 +74,349 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
     const MachineOperand &MO = OldMI.getOperand(i);
     assert(MO.isReg() && MO.getReg());
     if (MO.isUse())
-      UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill()));
+      UseMI.addOperand(MO);
     else
-      DefMI.addReg(MO.getReg(),
-                   getDefRegState(true) | getDeadRegState(MO.isDead()));
+      DefMI.addOperand(MO);
+  }
+}
+
+namespace {
+  // Constants for register spacing in NEON load/store instructions.
+  // For quad-register load-lane and store-lane pseudo instructors, the
+  // spacing is initially assumed to be EvenDblSpc, and that is changed to
+  // OddDblSpc depending on the lane number operand.
+  enum NEONRegSpacing {
+    SingleSpc,
+    EvenDblSpc,
+    OddDblSpc
+  };
+
+  // Entries for NEON load/store information table.  The table is sorted by
+  // PseudoOpc for fast binary-search lookups.
+  struct NEONLdStTableEntry {
+    unsigned PseudoOpc;
+    unsigned RealOpc;
+    bool IsLoad;
+    bool HasWriteBack;
+    NEONRegSpacing RegSpacing;
+    unsigned char NumRegs; // D registers loaded or stored
+    unsigned char RegElts; // elements per D register; used for lane ops
+
+    // Comparison methods for binary search of the table.
+    bool operator<(const NEONLdStTableEntry &TE) const {
+      return PseudoOpc < TE.PseudoOpc;
+    }
+    friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+      return TE.PseudoOpc < PseudoOpc;
+    }
+    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+                                                const NEONLdStTableEntry &TE) {
+      return PseudoOpc < TE.PseudoOpc;
+    }
+  };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true,  SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true,  SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD,  ARM::VLD1DUPq8_UPD,  true, true,  SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true,  EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true,  EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD,  true, true,  EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, SingleSpc,  4, 1 },
+{ ARM::VLD1d64QPseudo_UPD,  ARM::VLD1d64Q_UPD, true,  true,  SingleSpc,  4, 1 },
+{ ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, SingleSpc,  3, 1 },
+{ ARM::VLD1d64TPseudo_UPD,  ARM::VLD1d64T_UPD, true,  true,  SingleSpc,  3, 1 },
+
+{ ARM::VLD1q16Pseudo,       ARM::VLD1q16,      true,  false, SingleSpc,  2, 4 },
+{ ARM::VLD1q16Pseudo_UPD,   ARM::VLD1q16_UPD,  true,  true,  SingleSpc,  2, 4 },
+{ ARM::VLD1q32Pseudo,       ARM::VLD1q32,      true,  false, SingleSpc,  2, 2 },
+{ ARM::VLD1q32Pseudo_UPD,   ARM::VLD1q32_UPD,  true,  true,  SingleSpc,  2, 2 },
+{ ARM::VLD1q64Pseudo,       ARM::VLD1q64,      true,  false, SingleSpc,  2, 1 },
+{ ARM::VLD1q64Pseudo_UPD,   ARM::VLD1q64_UPD,  true,  true,  SingleSpc,  2, 1 },
+{ ARM::VLD1q8Pseudo,        ARM::VLD1q8,       true,  false, SingleSpc,  2, 8 },
+{ ARM::VLD1q8Pseudo_UPD,    ARM::VLD1q8_UPD,   true,  true,  SingleSpc,  2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true,  SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true,  SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD,  ARM::VLD2DUPd8_UPD,  true, true,  SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, SingleSpc,  2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true,  SingleSpc,  2, 4 },
+{ ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, SingleSpc,  2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true,  SingleSpc,  2, 2 },
+{ ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, SingleSpc,  2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD,  true, true,  SingleSpc,  2, 8 },
+{ ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true,  EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true,  EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo,       ARM::VLD2d16,      true,  false, SingleSpc,  2, 4 },
+{ ARM::VLD2d16Pseudo_UPD,   ARM::VLD2d16_UPD,  true,  true,  SingleSpc,  2, 4 },
+{ ARM::VLD2d32Pseudo,       ARM::VLD2d32,      true,  false, SingleSpc,  2, 2 },
+{ ARM::VLD2d32Pseudo_UPD,   ARM::VLD2d32_UPD,  true,  true,  SingleSpc,  2, 2 },
+{ ARM::VLD2d8Pseudo,        ARM::VLD2d8,       true,  false, SingleSpc,  2, 8 },
+{ ARM::VLD2d8Pseudo_UPD,    ARM::VLD2d8_UPD,   true,  true,  SingleSpc,  2, 8 },
+
+{ ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, SingleSpc,  4, 4 },
+{ ARM::VLD2q16Pseudo_UPD,   ARM::VLD2q16_UPD,  true,  true,  SingleSpc,  4, 4 },
+{ ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, SingleSpc,  4, 2 },
+{ ARM::VLD2q32Pseudo_UPD,   ARM::VLD2q32_UPD,  true,  true,  SingleSpc,  4, 2 },
+{ ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, SingleSpc,  4, 8 },
+{ ARM::VLD2q8Pseudo_UPD,    ARM::VLD2q8_UPD,   true,  true,  SingleSpc,  4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true,  SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true,  SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD,  true, true,  SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, SingleSpc,  3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true,  SingleSpc,  3, 4 },
+{ ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, SingleSpc,  3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true,  SingleSpc,  3, 2 },
+{ ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, SingleSpc,  3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD,  true, true,  SingleSpc,  3, 8 },
+{ ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true,  EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true,  EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, SingleSpc,  3, 4 },
+{ ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD,  true,  true,  SingleSpc,  3, 4 },
+{ ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, SingleSpc,  3, 2 },
+{ ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD,  true,  true,  SingleSpc,  3, 2 },
+{ ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, SingleSpc,  3, 8 },
+{ ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD,   true,  true,  SingleSpc,  3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true,  true,  EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, OddDblSpc,  3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true,  true,  OddDblSpc,  3, 4 },
+{ ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true,  true,  EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, OddDblSpc,  3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true,  true,  OddDblSpc,  3, 2 },
+{ ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD,  true,  true,  EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, OddDblSpc,  3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD,  true,  true,  OddDblSpc,  3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true,  SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true,  SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD,  true, true,  SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, SingleSpc,  4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true,  SingleSpc,  4, 4 },
+{ ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, SingleSpc,  4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true,  SingleSpc,  4, 2 },
+{ ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, SingleSpc,  4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD,  true, true,  SingleSpc,  4, 8 },
+{ ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true,  EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true,  EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, SingleSpc,  4, 4 },
+{ ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD,  true,  true,  SingleSpc,  4, 4 },
+{ ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, SingleSpc,  4, 2 },
+{ ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD,  true,  true,  SingleSpc,  4, 2 },
+{ ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, SingleSpc,  4, 8 },
+{ ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD,   true,  true,  SingleSpc,  4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true,  true,  EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, OddDblSpc,  4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true,  true,  OddDblSpc,  4, 4 },
+{ ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true,  true,  EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, OddDblSpc,  4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true,  true,  OddDblSpc,  4, 2 },
+{ ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD,  true,  true,  EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, OddDblSpc,  4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD,  true,  true,  OddDblSpc,  4, 8 },
+
+{ ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true,  EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true,  EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true,  EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, SingleSpc,  4, 1 },
+{ ARM::VST1d64QPseudo_UPD,  ARM::VST1d64Q_UPD, false, true,  SingleSpc,  4, 1 },
+{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, SingleSpc,  3, 1 },
+{ ARM::VST1d64TPseudo_UPD,  ARM::VST1d64T_UPD, false, true,  SingleSpc,  3, 1 },
+
+{ ARM::VST1q16Pseudo,       ARM::VST1q16,      false, false, SingleSpc,  2, 4 },
+{ ARM::VST1q16Pseudo_UPD,   ARM::VST1q16_UPD,  false, true,  SingleSpc,  2, 4 },
+{ ARM::VST1q32Pseudo,       ARM::VST1q32,      false, false, SingleSpc,  2, 2 },
+{ ARM::VST1q32Pseudo_UPD,   ARM::VST1q32_UPD,  false, true,  SingleSpc,  2, 2 },
+{ ARM::VST1q64Pseudo,       ARM::VST1q64,      false, false, SingleSpc,  2, 1 },
+{ ARM::VST1q64Pseudo_UPD,   ARM::VST1q64_UPD,  false, true,  SingleSpc,  2, 1 },
+{ ARM::VST1q8Pseudo,        ARM::VST1q8,       false, false, SingleSpc,  2, 8 },
+{ ARM::VST1q8Pseudo_UPD,    ARM::VST1q8_UPD,   false, true,  SingleSpc,  2, 8 },
+
+{ ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true,  SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true,  SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD,  false, true,  SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true,  EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true,  EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo,       ARM::VST2d16,      false, false, SingleSpc,  2, 4 },
+{ ARM::VST2d16Pseudo_UPD,   ARM::VST2d16_UPD,  false, true,  SingleSpc,  2, 4 },
+{ ARM::VST2d32Pseudo,       ARM::VST2d32,      false, false, SingleSpc,  2, 2 },
+{ ARM::VST2d32Pseudo_UPD,   ARM::VST2d32_UPD,  false, true,  SingleSpc,  2, 2 },
+{ ARM::VST2d8Pseudo,        ARM::VST2d8,       false, false, SingleSpc,  2, 8 },
+{ ARM::VST2d8Pseudo_UPD,    ARM::VST2d8_UPD,   false, true,  SingleSpc,  2, 8 },
+
+{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, SingleSpc,  4, 4 },
+{ ARM::VST2q16Pseudo_UPD,   ARM::VST2q16_UPD,  false, true,  SingleSpc,  4, 4 },
+{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, SingleSpc,  4, 2 },
+{ ARM::VST2q32Pseudo_UPD,   ARM::VST2q32_UPD,  false, true,  SingleSpc,  4, 2 },
+{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, SingleSpc,  4, 8 },
+{ ARM::VST2q8Pseudo_UPD,    ARM::VST2q8_UPD,   false, true,  SingleSpc,  4, 8 },
+
+{ ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true,  SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true,  SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD,  false, true,  SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true,  EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true,  EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, SingleSpc,  3, 4 },
+{ ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD,  false, true,  SingleSpc,  3, 4 },
+{ ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, SingleSpc,  3, 2 },
+{ ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD,  false, true,  SingleSpc,  3, 2 },
+{ ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, SingleSpc,  3, 8 },
+{ ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD,   false, true,  SingleSpc,  3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true,  EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, OddDblSpc,  3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true,  OddDblSpc,  3, 4 },
+{ ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true,  EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, OddDblSpc,  3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true,  OddDblSpc,  3, 2 },
+{ ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD,  false, true,  EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, OddDblSpc,  3, 8 },
+{ ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD,  false, true,  OddDblSpc,  3, 8 },
+
+{ ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true,  SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true,  SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD,  false, true,  SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true,  EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true,  EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, SingleSpc,  4, 4 },
+{ ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD,  false, true,  SingleSpc,  4, 4 },
+{ ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, SingleSpc,  4, 2 },
+{ ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD,  false, true,  SingleSpc,  4, 2 },
+{ ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, SingleSpc,  4, 8 },
+{ ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD,   false, true,  SingleSpc,  4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true,  EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, OddDblSpc,  4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true,  OddDblSpc,  4, 4 },
+{ ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true,  EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, OddDblSpc,  4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true,  OddDblSpc,  4, 2 },
+{ ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD,  false, true,  EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, OddDblSpc,  4, 8 },
+{ ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD,  false, true,  OddDblSpc,  4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+  unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+  // Make sure the table is sorted.
+  static bool TableChecked = false;
+  if (!TableChecked) {
+    for (unsigned i = 0; i != NumEntries-1; ++i)
+      assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+             "NEONLdStTable is not sorted!");
+    TableChecked = true;
+  }
+#endif
+
+  const NEONLdStTableEntry *I =
+    std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+  if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+    return I;
+  return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing.  Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+                        const TargetRegisterInfo *TRI, unsigned &D0,
+                        unsigned &D1, unsigned &D2, unsigned &D3) {
+  if (RegSpc == SingleSpc) {
+    D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+  } else if (RegSpc == EvenDblSpc) {
+    D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+  } else {
+    assert(RegSpc == OddDblSpc && "unknown register spacing");
+    D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_7);
   }
 }
 
 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
 /// operands to real VLD instructions with D register operands.
-void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
-                                unsigned Opc, bool hasWriteBack,
-                                NEONRegSpacing RegSpc, unsigned NumRegs) {
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
 
-  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
   unsigned OpIdx = 0;
 
   bool DstIsDead = MI.getOperand(OpIdx).isDead();
   unsigned DstReg = MI.getOperand(OpIdx++).getReg();
   unsigned D0, D1, D2, D3;
-  if (RegSpc == SingleSpc) {
-    D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
-    D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
-    D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
-    D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
-  } else if (RegSpc == EvenDblSpc) {
-    D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
-    D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
-    D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
-    D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
-  } else {
-    assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
-    D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
-    D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
-    D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
-    D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
-  } 
+  GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
   MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
     .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
   if (NumRegs > 2)
@@ -112,107 +424,373 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
   if (NumRegs > 3)
     MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
 
-  if (hasWriteBack) {
-    bool WBIsDead = MI.getOperand(OpIdx).isDead();
-    unsigned WBReg = MI.getOperand(OpIdx++).getReg();
-    MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
-  }
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
   // Copy the addrmode6 operands.
-  bool AddrIsKill = MI.getOperand(OpIdx).isKill();
-  MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
-  MIB.addImm(MI.getOperand(OpIdx++).getImm());
-  if (hasWriteBack) {
-    // Copy the am6offset operand.
-    bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
-    MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
-  }
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
 
-  MIB = AddDefaultPred(MIB);
-  TransferImpOps(MI, MIB, MIB);
-  // For an instruction writing the odd subregs, add an implicit use of the
-  // super-register because the even subregs were loaded separately.
-  if (RegSpc == OddDblSpc)
-    MIB.addReg(DstReg, RegState::Implicit);
+  // For an instruction writing double-spaced subregs, the pseudo instruction
+  // has an extra operand that is a use of the super-register.  Record the
+  // operand index and skip over it.
+  unsigned SrcOpIdx = 0;
+  if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+    SrcOpIdx = OpIdx++;
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the super-register source operand used for double-spaced subregs over
+  // to the new instruction as an implicit operand.
+  if (SrcOpIdx != 0) {
+    MachineOperand MO = MI.getOperand(SrcOpIdx);
+    MO.setImplicit(true);
+    MIB.addOperand(MO);
+  }
   // Add an implicit def for the super-register.
   MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+  TransferImpOps(MI, MIB, MIB);
   MI.eraseFromParent();
 }
 
 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
 /// operands to real VST instructions with D register operands.
-void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
-                                unsigned Opc, bool hasWriteBack,
-                                NEONRegSpacing RegSpc, unsigned NumRegs) {
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
 
-  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
   unsigned OpIdx = 0;
-  if (hasWriteBack) {
-    bool DstIsDead = MI.getOperand(OpIdx).isDead();
-    unsigned DstReg = MI.getOperand(OpIdx++).getReg();
-    MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
-  }
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
   // Copy the addrmode6 operands.
-  bool AddrIsKill = MI.getOperand(OpIdx).isKill();
-  MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
-  MIB.addImm(MI.getOperand(OpIdx++).getImm());
-  if (hasWriteBack) {
-    // Copy the am6offset operand.
-    bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
-    MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
-  }
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
 
   bool SrcIsKill = MI.getOperand(OpIdx).isKill();
-  unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+  unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
   unsigned D0, D1, D2, D3;
-  if (RegSpc == SingleSpc) {
-    D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
-    D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
-    D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
-    D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
-  } else if (RegSpc == EvenDblSpc) {
-    D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
-    D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
-    D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
-    D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
-  } else {
-    assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
-    D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
-    D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
-    D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
-    D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
-  } 
-
+  GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
   MIB.addReg(D0).addReg(D1);
   if (NumRegs > 2)
     MIB.addReg(D2);
   if (NumRegs > 3)
     MIB.addReg(D3);
-  MIB = AddDefaultPred(MIB);
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  if (SrcIsKill)
+    // Add an implicit kill for the super-reg.
+    (*MIB).addRegisterKilled(SrcReg, TRI, true);
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+  unsigned RegElts = TableEntry->RegElts;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+  // The lane operand is always the 3rd from last operand, before the 2
+  // predicate operands.
+  unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+  // Adjust the lane and spacing as needed for Q registers.
+  assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+  if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+    RegSpc = OddDblSpc;
+    Lane -= RegElts;
+  }
+  assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+  unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+  unsigned DstReg = 0;
+  bool DstIsDead = false;
+  if (TableEntry->IsLoad) {
+    DstIsDead = MI.getOperand(OpIdx).isDead();
+    DstReg = MI.getOperand(OpIdx++).getReg();
+    GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+    MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 1)
+      MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 2)
+      MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 3)
+      MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+  }
+
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Grab the super-register source.
+  MachineOperand MO = MI.getOperand(OpIdx++);
+  if (!TableEntry->IsLoad)
+    GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+  // Add the subregs as sources of the new instruction.
+  unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+                       getKillRegState(MO.isKill()));
+  MIB.addReg(D0, SrcFlags);
+  if (NumRegs > 1)
+    MIB.addReg(D1, SrcFlags);
+  if (NumRegs > 2)
+    MIB.addReg(D2, SrcFlags);
+  if (NumRegs > 3)
+    MIB.addReg(D3, SrcFlags);
+
+  // Add the lane number operand.
+  MIB.addImm(Lane);
+  OpIdx += 1;
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the super-register source to be an implicit source.
+  MO.setImplicit(true);
+  MIB.addOperand(MO);
+  if (TableEntry->IsLoad)
+    // Add an implicit def for the super-register.
+    MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
   TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+                                 unsigned Opc, bool IsExt, unsigned NumRegs) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  unsigned OpIdx = 0;
+
+  // Transfer the destination register operand.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  if (IsExt)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0).addReg(D1);
+  if (NumRegs > 2)
+    MIB.addReg(D2);
+  if (NumRegs > 3)
+    MIB.addReg(D3);
+
+  // Copy the other source register operand.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
   if (SrcIsKill)
     // Add an implicit kill for the super-reg.
     (*MIB).addRegisterKilled(SrcReg, TRI, true);
+  TransferImpOps(MI, MIB, MIB);
   MI.eraseFromParent();
 }
 
-bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
-  bool Modified = false;
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+  const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+  MachineInstrBuilder LO16, HI16;
 
-  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-  while (MBBI != E) {
-    MachineInstr &MI = *MBBI;
-    MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+  if (!STI->hasV6T2Ops() &&
+      (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+    // Expand into a movi + orr.
+    LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+    HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstReg);
+
+    assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+    unsigned ImmVal = (unsigned)MO.getImm();
+    unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+    unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+    LO16 = LO16.addImm(SOImmValV1);
+    HI16 = HI16.addImm(SOImmValV2);
+    (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    LO16.addImm(Pred).addReg(PredReg).addReg(0);
+    HI16.addImm(Pred).addReg(PredReg).addReg(0);
+    TransferImpOps(MI, LO16, HI16);
+    MI.eraseFromParent();
+    return;
+  }
+
+  unsigned LO16Opc = 0;
+  unsigned HI16Opc = 0;
+  if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+    LO16Opc = ARM::t2MOVi16;
+    HI16Opc = ARM::t2MOVTi16;
+  } else {
+    LO16Opc = ARM::MOVi16;
+    HI16Opc = ARM::MOVTi16;
+  }
 
-    bool ModifiedOp = true;
-    unsigned Opcode = MI.getOpcode();
-    switch (Opcode) {
+  LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+  HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+    .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstReg);
+
+  if (MO.isImm()) {
+    unsigned Imm = MO.getImm();
+    unsigned Lo16 = Imm & 0xffff;
+    unsigned Hi16 = (Imm >> 16) & 0xffff;
+    LO16 = LO16.addImm(Lo16);
+    HI16 = HI16.addImm(Hi16);
+  } else {
+    const GlobalValue *GV = MO.getGlobal();
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+    HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+  }
+
+  (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  LO16.addImm(Pred).addReg(PredReg);
+  HI16.addImm(Pred).addReg(PredReg);
+
+  TransferImpOps(MI, LO16, HI16);
+  MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
     default:
-      ModifiedOp = false;
-      break;
+      return false;
+    case ARM::Int_eh_sjlj_dispatchsetup: {
+      MachineFunction &MF = *MI.getParent()->getParent();
+      const ARMBaseInstrInfo *AII =
+        static_cast<const ARMBaseInstrInfo*>(TII);
+      const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+      // For functions using a base pointer, we rematerialize it (via the frame
+      // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+      // for us. Otherwise, expand to nothing.
+      if (RI.hasBasePointer(MF)) {
+        int32_t NumBytes = AFI->getFramePtrSpillOffset();
+        unsigned FramePtr = RI.getFrameRegister(MF);
+        assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+               "base pointer without frame pointer?");
+
+        if (AFI->isThumb2Function()) {
+          llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                       FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+        } else if (AFI->isThumbFunction()) {
+          llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6,
+                                          FramePtr, -NumBytes,
+                                          *TII, RI, MI.getDebugLoc());
+        } else {
+          llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                        FramePtr, -NumBytes, ARMCC::AL, 0,
+                                        *TII);
+        }
+        // If there's dynamic realignment, adjust for it.
+        if (RI.needsStackRealignment(MF)) {
+          MachineFrameInfo  *MFI = MF.getFrameInfo();
+          unsigned MaxAlign = MFI->getMaxAlignment();
+          assert (!AFI->isThumb1OnlyFunction());
+          // Emit bic r6, r6, MaxAlign
+          unsigned bicOpc = AFI->isThumbFunction() ?
+            ARM::t2BICri : ARM::BICri;
+          AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                              TII->get(bicOpc), ARM::R6)
+                                      .addReg(ARM::R6, RegState::Kill)
+                                      .addImm(MaxAlign-1)));
+        }
+
+      }
+      MI.eraseFromParent();
+      return true;
+    }
 
-    case ARM::tLDRpci_pic: 
+    case ARM::MOVsrl_flag:
+    case ARM::MOVsra_flag: {
+      // These are just fancy MOVs insructions.
+      AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+                             MI.getOperand(0).getReg())
+                     .addOperand(MI.getOperand(1))
+                     .addReg(0)
+                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
+                                                  : ARM_AM::asr), 1)))
+        .addReg(ARM::CPSR, RegState::Define);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::RRX: {
+      // This encodes as "MOVs Rd, Rm, rrx
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+                               MI.getOperand(0).getReg())
+                       .addOperand(MI.getOperand(1))
+                       .addOperand(MI.getOperand(1))
+                       .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+        .addReg(0);
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::TPsoft: {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                TII->get(ARM::BL))
+        .addExternalSymbol("__aeabi_read_tp", 0);
+
+      (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::tLDRpci_pic:
     case ARM::t2LDRpci_pic: {
       unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
         ? ARM::tLDRpci : ARM::t2LDRpci;
@@ -225,54 +803,73 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
       (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                          TII->get(ARM::tPICADD))
-        .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
         .addReg(DstReg)
         .addOperand(MI.getOperand(2));
       TransferImpOps(MI, MIB1, MIB2);
       MI.eraseFromParent();
-      break;
+      return true;
     }
 
-    case ARM::MOVi32imm:
-    case ARM::t2MOVi32imm: {
-      unsigned PredReg = 0;
-      ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+    case ARM::MOV_ga_dyn:
+    case ARM::MOV_ga_pcrel:
+    case ARM::MOV_ga_pcrel_ldr:
+    case ARM::t2MOV_ga_dyn:
+    case ARM::t2MOV_ga_pcrel: {
+      // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+      unsigned LabelId = AFI->createPICLabelUId();
       unsigned DstReg = MI.getOperand(0).getReg();
       bool DstIsDead = MI.getOperand(0).isDead();
-      const MachineOperand &MO = MI.getOperand(1);
-      MachineInstrBuilder LO16, HI16;
-
-      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                     TII->get(Opcode == ARM::MOVi32imm ?
-                              ARM::MOVi16 : ARM::t2MOVi16),
-                     DstReg);
-      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                     TII->get(Opcode == ARM::MOVi32imm ?
-                              ARM::MOVTi16 : ARM::t2MOVTi16))
-        .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
-        .addReg(DstReg);
-
-      if (MO.isImm()) {
-        unsigned Imm = MO.getImm();
-        unsigned Lo16 = Imm & 0xffff;
-        unsigned Hi16 = (Imm >> 16) & 0xffff;
-        LO16 = LO16.addImm(Lo16);
-        HI16 = HI16.addImm(Hi16);
-      } else {
-        const GlobalValue *GV = MO.getGlobal();
-        unsigned TF = MO.getTargetFlags();
-        LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
-        HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+      const MachineOperand &MO1 = MI.getOperand(1);
+      const GlobalValue *GV = MO1.getGlobal();
+      unsigned TF = MO1.getTargetFlags();
+      bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+      bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+      unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
+      unsigned LO16TF = isPIC
+        ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+      unsigned HI16TF = isPIC
+        ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+      unsigned PICAddOpc = isARM
+        ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+        : ARM::tPICADD;
+      MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(LO16Opc), DstReg)
+        .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+        .addImm(LabelId);
+      MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(HI16Opc), DstReg)
+        .addReg(DstReg)
+        .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+        .addImm(LabelId);
+      if (!isPIC) {
+        TransferImpOps(MI, MIB1, MIB2);
+        MI.eraseFromParent();
+        return true;
       }
-      (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-      (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-      LO16.addImm(Pred).addReg(PredReg);
-      HI16.addImm(Pred).addReg(PredReg);
-      TransferImpOps(MI, LO16, HI16);
+
+      MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(PICAddOpc))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg).addImm(LabelId);
+      if (isARM) {
+        AddDefaultPred(MIB3);
+        if (Opcode == ARM::MOV_ga_pcrel_ldr)
+          (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      }
+      TransferImpOps(MI, MIB1, MIB3);
       MI.eraseFromParent();
-      break;
+      return true;
     }
 
+    case ARM::MOVi32imm:
+    case ARM::MOVCCi32imm:
+    case ARM::t2MOVi32imm:
+    case ARM::t2MOVCCi32imm:
+      ExpandMOV32BitImm(MBB, MBBI);
+      return true;
+
     case ARM::VMOVQQ: {
       unsigned DstReg = MI.getOperand(0).getReg();
       bool DstIsDead = MI.getOperand(0).isDead();
@@ -285,222 +882,339 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
       MachineInstrBuilder Even =
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                TII->get(ARM::VMOVQ))
-                     .addReg(EvenDst,
-                             getDefRegState(true) | getDeadRegState(DstIsDead))
-                     .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+                       .addReg(EvenDst,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill)));
       MachineInstrBuilder Odd =
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                TII->get(ARM::VMOVQ))
-                     .addReg(OddDst,
-                             getDefRegState(true) | getDeadRegState(DstIsDead))
-                     .addReg(OddSrc, getKillRegState(SrcIsKill)));
+                       .addReg(OddDst,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill)));
       TransferImpOps(MI, Even, Odd);
       MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VLDMQIA:
+    case ARM::VLDMQDB: {
+      unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+
+      // Grab the Q register destination.
+      bool DstIsDead = MI.getOperand(OpIdx).isDead();
+      unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+      // Copy the source register.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Copy the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Add the destination operands (D subregs).
+      unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+      unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+      MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+      // Add an implicit def for the super-register.
+      MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VSTMQIA:
+    case ARM::VSTMQDB: {
+      unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+
+      // Grab the Q register source.
+      bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+      unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+      // Copy the destination register.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Copy the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Add the source operands (D subregs).
+      unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+      unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+      MIB.addReg(D0).addReg(D1);
+
+      if (SrcIsKill)
+        // Add an implicit kill for the Q register.
+        (*MIB).addRegisterKilled(SrcReg, TRI, true);
+
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::VDUPfqf:
+    case ARM::VDUPfdf:{
+      unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+      unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+                                               Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+      // The lane is [0,1] for the containing DReg superregister.
+      // Copy the dst/src register operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addReg(DReg);
+      ++OpIdx;
+      // Add the lane select operand.
+      MIB.addImm(Lane);
+      // Add the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
     }
 
     case ARM::VLD1q8Pseudo:
-      ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
     case ARM::VLD1q16Pseudo:
-      ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
     case ARM::VLD1q32Pseudo:
-      ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
     case ARM::VLD1q64Pseudo:
-      ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
     case ARM::VLD1q8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
     case ARM::VLD1q16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
     case ARM::VLD1q32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
     case ARM::VLD1q64Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
-
     case ARM::VLD2d8Pseudo:
-      ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
     case ARM::VLD2d16Pseudo:
-      ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
     case ARM::VLD2d32Pseudo:
-      ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
     case ARM::VLD2q8Pseudo:
-      ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
     case ARM::VLD2q16Pseudo:
-      ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
     case ARM::VLD2q32Pseudo:
-      ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
     case ARM::VLD2d8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
     case ARM::VLD2d16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
     case ARM::VLD2d32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
     case ARM::VLD2q8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
     case ARM::VLD2q16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
     case ARM::VLD2q32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
-
     case ARM::VLD3d8Pseudo:
-      ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
     case ARM::VLD3d16Pseudo:
-      ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
     case ARM::VLD3d32Pseudo:
-      ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
     case ARM::VLD1d64TPseudo:
-      ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
     case ARM::VLD3d8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
     case ARM::VLD3d16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
     case ARM::VLD3d32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
     case ARM::VLD1d64TPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
     case ARM::VLD3q8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
     case ARM::VLD3q16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
     case ARM::VLD3q32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
+    case ARM::VLD3q8oddPseudo:
+    case ARM::VLD3q16oddPseudo:
+    case ARM::VLD3q32oddPseudo:
     case ARM::VLD3q8oddPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
     case ARM::VLD3q16oddPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
     case ARM::VLD3q32oddPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
-
     case ARM::VLD4d8Pseudo:
-      ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
     case ARM::VLD4d16Pseudo:
-      ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
     case ARM::VLD4d32Pseudo:
-      ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
     case ARM::VLD1d64QPseudo:
-      ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
     case ARM::VLD4d8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
     case ARM::VLD4d16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
     case ARM::VLD4d32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
     case ARM::VLD1d64QPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
     case ARM::VLD4q8Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
     case ARM::VLD4q16Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
     case ARM::VLD4q32Pseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
+    case ARM::VLD4q8oddPseudo:
+    case ARM::VLD4q16oddPseudo:
+    case ARM::VLD4q32oddPseudo:
     case ARM::VLD4q8oddPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
     case ARM::VLD4q16oddPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
     case ARM::VLD4q32oddPseudo_UPD:
-      ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+    case ARM::VLD1DUPq8Pseudo:
+    case ARM::VLD1DUPq16Pseudo:
+    case ARM::VLD1DUPq32Pseudo:
+    case ARM::VLD1DUPq8Pseudo_UPD:
+    case ARM::VLD1DUPq16Pseudo_UPD:
+    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD2DUPd8Pseudo:
+    case ARM::VLD2DUPd16Pseudo:
+    case ARM::VLD2DUPd32Pseudo:
+    case ARM::VLD2DUPd8Pseudo_UPD:
+    case ARM::VLD2DUPd16Pseudo_UPD:
+    case ARM::VLD2DUPd32Pseudo_UPD:
+    case ARM::VLD3DUPd8Pseudo:
+    case ARM::VLD3DUPd16Pseudo:
+    case ARM::VLD3DUPd32Pseudo:
+    case ARM::VLD3DUPd8Pseudo_UPD:
+    case ARM::VLD3DUPd16Pseudo_UPD:
+    case ARM::VLD3DUPd32Pseudo_UPD:
+    case ARM::VLD4DUPd8Pseudo:
+    case ARM::VLD4DUPd16Pseudo:
+    case ARM::VLD4DUPd32Pseudo:
+    case ARM::VLD4DUPd8Pseudo_UPD:
+    case ARM::VLD4DUPd16Pseudo_UPD:
+    case ARM::VLD4DUPd32Pseudo_UPD:
+      ExpandVLD(MBBI);
+      return true;
 
     case ARM::VST1q8Pseudo:
-      ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
     case ARM::VST1q16Pseudo:
-      ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
     case ARM::VST1q32Pseudo:
-      ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
     case ARM::VST1q64Pseudo:
-      ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
     case ARM::VST1q8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
     case ARM::VST1q16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
     case ARM::VST1q32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
     case ARM::VST1q64Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
-
     case ARM::VST2d8Pseudo:
-      ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
     case ARM::VST2d16Pseudo:
-      ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
     case ARM::VST2d32Pseudo:
-      ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
     case ARM::VST2q8Pseudo:
-      ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
     case ARM::VST2q16Pseudo:
-      ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
     case ARM::VST2q32Pseudo:
-      ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
     case ARM::VST2d8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
     case ARM::VST2d16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
     case ARM::VST2d32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
     case ARM::VST2q8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
     case ARM::VST2q16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
     case ARM::VST2q32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
-
     case ARM::VST3d8Pseudo:
-      ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
     case ARM::VST3d16Pseudo:
-      ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
     case ARM::VST3d32Pseudo:
-      ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
     case ARM::VST1d64TPseudo:
-      ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
     case ARM::VST3d8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
     case ARM::VST3d16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
     case ARM::VST3d32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
     case ARM::VST1d64TPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
     case ARM::VST3q8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
     case ARM::VST3q16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
     case ARM::VST3q32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
+    case ARM::VST3q8oddPseudo:
+    case ARM::VST3q16oddPseudo:
+    case ARM::VST3q32oddPseudo:
     case ARM::VST3q8oddPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
     case ARM::VST3q16oddPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
     case ARM::VST3q32oddPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
-
     case ARM::VST4d8Pseudo:
-      ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
     case ARM::VST4d16Pseudo:
-      ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
     case ARM::VST4d32Pseudo:
-      ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
     case ARM::VST1d64QPseudo:
-      ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
     case ARM::VST4d8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
     case ARM::VST4d16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
     case ARM::VST4d32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
     case ARM::VST1d64QPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
     case ARM::VST4q8Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
     case ARM::VST4q16Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
     case ARM::VST4q32Pseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
+    case ARM::VST4q8oddPseudo:
+    case ARM::VST4q16oddPseudo:
+    case ARM::VST4q32oddPseudo:
     case ARM::VST4q8oddPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
     case ARM::VST4q16oddPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
     case ARM::VST4q32oddPseudo_UPD:
-      ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
-    }
+      ExpandVST(MBBI);
+      return true;
+
+    case ARM::VLD1LNq8Pseudo:
+    case ARM::VLD1LNq16Pseudo:
+    case ARM::VLD1LNq32Pseudo:
+    case ARM::VLD1LNq8Pseudo_UPD:
+    case ARM::VLD1LNq16Pseudo_UPD:
+    case ARM::VLD1LNq32Pseudo_UPD:
+    case ARM::VLD2LNd8Pseudo:
+    case ARM::VLD2LNd16Pseudo:
+    case ARM::VLD2LNd32Pseudo:
+    case ARM::VLD2LNq16Pseudo:
+    case ARM::VLD2LNq32Pseudo:
+    case ARM::VLD2LNd8Pseudo_UPD:
+    case ARM::VLD2LNd16Pseudo_UPD:
+    case ARM::VLD2LNd32Pseudo_UPD:
+    case ARM::VLD2LNq16Pseudo_UPD:
+    case ARM::VLD2LNq32Pseudo_UPD:
+    case ARM::VLD3LNd8Pseudo:
+    case ARM::VLD3LNd16Pseudo:
+    case ARM::VLD3LNd32Pseudo:
+    case ARM::VLD3LNq16Pseudo:
+    case ARM::VLD3LNq32Pseudo:
+    case ARM::VLD3LNd8Pseudo_UPD:
+    case ARM::VLD3LNd16Pseudo_UPD:
+    case ARM::VLD3LNd32Pseudo_UPD:
+    case ARM::VLD3LNq16Pseudo_UPD:
+    case ARM::VLD3LNq32Pseudo_UPD:
+    case ARM::VLD4LNd8Pseudo:
+    case ARM::VLD4LNd16Pseudo:
+    case ARM::VLD4LNd32Pseudo:
+    case ARM::VLD4LNq16Pseudo:
+    case ARM::VLD4LNq32Pseudo:
+    case ARM::VLD4LNd8Pseudo_UPD:
+    case ARM::VLD4LNd16Pseudo_UPD:
+    case ARM::VLD4LNd32Pseudo_UPD:
+    case ARM::VLD4LNq16Pseudo_UPD:
+    case ARM::VLD4LNq32Pseudo_UPD:
+    case ARM::VST1LNq8Pseudo:
+    case ARM::VST1LNq16Pseudo:
+    case ARM::VST1LNq32Pseudo:
+    case ARM::VST1LNq8Pseudo_UPD:
+    case ARM::VST1LNq16Pseudo_UPD:
+    case ARM::VST1LNq32Pseudo_UPD:
+    case ARM::VST2LNd8Pseudo:
+    case ARM::VST2LNd16Pseudo:
+    case ARM::VST2LNd32Pseudo:
+    case ARM::VST2LNq16Pseudo:
+    case ARM::VST2LNq32Pseudo:
+    case ARM::VST2LNd8Pseudo_UPD:
+    case ARM::VST2LNd16Pseudo_UPD:
+    case ARM::VST2LNd32Pseudo_UPD:
+    case ARM::VST2LNq16Pseudo_UPD:
+    case ARM::VST2LNq32Pseudo_UPD:
+    case ARM::VST3LNd8Pseudo:
+    case ARM::VST3LNd16Pseudo:
+    case ARM::VST3LNd32Pseudo:
+    case ARM::VST3LNq16Pseudo:
+    case ARM::VST3LNq32Pseudo:
+    case ARM::VST3LNd8Pseudo_UPD:
+    case ARM::VST3LNd16Pseudo_UPD:
+    case ARM::VST3LNd32Pseudo_UPD:
+    case ARM::VST3LNq16Pseudo_UPD:
+    case ARM::VST3LNq32Pseudo_UPD:
+    case ARM::VST4LNd8Pseudo:
+    case ARM::VST4LNd16Pseudo:
+    case ARM::VST4LNd32Pseudo:
+    case ARM::VST4LNq16Pseudo:
+    case ARM::VST4LNq32Pseudo:
+    case ARM::VST4LNd8Pseudo_UPD:
+    case ARM::VST4LNd16Pseudo_UPD:
+    case ARM::VST4LNd32Pseudo_UPD:
+    case ARM::VST4LNq16Pseudo_UPD:
+    case ARM::VST4LNq32Pseudo_UPD:
+      ExpandLaneOp(MBBI);
+      return true;
+
+    case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+    case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+  }
+
+  return false;
+}
 
-    if (ModifiedOp)
-      Modified = true;
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+    Modified |= ExpandMI(MBB, MBBI);
     MBBI = NMBBI;
   }
 
@@ -508,8 +1222,11 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
 }
 
 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  TRI = MF.getTarget().getRegisterInfo();
+  const TargetMachine &TM = MF.getTarget();
+  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  TRI = TM.getRegisterInfo();
+  STI = &TM.getSubtarget<ARMSubtarget>();
+  AFI = MF.getInfo<ARMFunctionInfo>();
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4892eae95833..9f295302db0e 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -15,14 +15,17 @@
 
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
 #include "ARMRegisterInfo.h"
 #include "ARMTargetMachine.h"
 #include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -30,7 +33,9 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -43,12 +48,37 @@
 using namespace llvm;
 
 static cl::opt<bool>
-EnableARMFastISel("arm-fast-isel",
-                  cl::desc("Turn on experimental ARM fast-isel support"),
-                  cl::init(false), cl::Hidden);
+DisableARMFastISel("disable-arm-fast-isel",
+                    cl::desc("Turn off experimental ARM fast-isel support"),
+                    cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
 
 namespace {
 
+  // All possible address modes, plus some.
+  typedef struct Address {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    union {
+      unsigned Reg;
+      int FI;
+    } Base;
+
+    int Offset;
+    unsigned Scale;
+    unsigned PlusReg;
+
+    // Innocuous defaults for our address.
+    Address()
+     : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) {
+       Base.Reg = 0;
+     }
+  } Address;
+
 class ARMFastISel : public FastISel {
 
   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
@@ -57,13 +87,14 @@ class ARMFastISel : public FastISel {
   const TargetMachine &TM;
   const TargetInstrInfo &TII;
   const TargetLowering &TLI;
-  const ARMFunctionInfo *AFI;
+  ARMFunctionInfo *AFI;
 
-  // Convenience variable to avoid checking all the time.
+  // Convenience variables to avoid some queries.
   bool isThumb;
+  LLVMContext *Context;
 
   public:
-    explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 
+    explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
     : FastISel(funcInfo),
       TM(funcInfo.MF->getTarget()),
       TII(*TM.getInstrInfo()),
@@ -71,6 +102,7 @@ class ARMFastISel : public FastISel {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
       isThumb = AFI->isThumbFunction();
+      Context = &funcInfo.Fn->getContext();
     }
 
     // Code from FastISel.cpp.
@@ -102,36 +134,73 @@ class ARMFastISel : public FastISel {
     virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
                                                 unsigned Op0, bool Op0IsKill,
                                                 uint32_t Idx);
-                                                
+
     // Backend specific FastISel code.
     virtual bool TargetSelectInstruction(const Instruction *I);
     virtual unsigned TargetMaterializeConstant(const Constant *C);
+    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
 
   #include "ARMGenFastISel.inc"
-  
+
     // Instruction selection routines.
-    virtual bool ARMSelectLoad(const Instruction *I);
-    virtual bool ARMSelectStore(const Instruction *I);
-    virtual bool ARMSelectBranch(const Instruction *I);
+  private:
+    bool SelectLoad(const Instruction *I);
+    bool SelectStore(const Instruction *I);
+    bool SelectBranch(const Instruction *I);
+    bool SelectCmp(const Instruction *I);
+    bool SelectFPExt(const Instruction *I);
+    bool SelectFPTrunc(const Instruction *I);
+    bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+    bool SelectSIToFP(const Instruction *I);
+    bool SelectFPToSI(const Instruction *I);
+    bool SelectSDiv(const Instruction *I);
+    bool SelectSRem(const Instruction *I);
+    bool SelectCall(const Instruction *I);
+    bool SelectSelect(const Instruction *I);
+    bool SelectRet(const Instruction *I);
 
     // Utility routines.
   private:
-    bool isTypeLegal(const Type *Ty, EVT &VT);
-    bool isLoadTypeLegal(const Type *Ty, EVT &VT);
-    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
-    bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
-    bool ARMLoadAlloca(const Instruction *I);
-    bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg);
-    bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
-    bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg);
-    
+    bool isTypeLegal(const Type *Ty, MVT &VT);
+    bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+    bool ARMComputeAddress(const Value *Obj, Address &Addr);
+    void ARMSimplifyAddress(Address &Addr, EVT VT);
+    unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+    unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+    unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+    unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+    unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+
+    // Call handling routines.
+  private:
+    bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+                        unsigned &ResultReg);
+    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+                         SmallVectorImpl<unsigned> &ArgRegs,
+                         SmallVectorImpl<MVT> &ArgVTs,
+                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+                         SmallVectorImpl<unsigned> &RegArgs,
+                         CallingConv::ID CC,
+                         unsigned &NumBytes);
+    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                    const Instruction *I, CallingConv::ID CC,
+                    unsigned &NumBytes);
+    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+    // OptionalDef handling routines.
+  private:
     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+    void AddLoadStoreOperands(EVT VT, Address &Addr,
+                              const MachineInstrBuilder &MIB);
 };
 
 } // end anonymous namespace
 
-// #include "ARMGenCallingConv.inc"
+#include "ARMGenCallingConv.inc"
 
 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
 // we don't care about implicit defs here, just places we'll need to add a
@@ -153,6 +222,9 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
 
 // If the machine is predicable go ahead and add the predicate operands, if
 // it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
 const MachineInstrBuilder &
 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
   MachineInstr *MI = &*MIB;
@@ -160,7 +232,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
   // Do we use a predicate?
   if (TII.isPredicable(MI))
     AddDefaultPred(MIB);
-  
+
   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
   bool CPSR = false;
@@ -297,7 +369,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                      uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-  
+
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addImm(Imm));
@@ -323,16 +395,84 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
   return ResultReg;
 }
 
-unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
-  EVT VT = TLI.getValueType(C->getType(), true);
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+  if (VT == MVT::f64) return 0;
 
-  // Only handle simple types.
-  if (!VT.isSimple()) return 0;
-  
-  // TODO: This should be safe for fp because they're just bits from the
-  // Constant.
-  // TODO: Theoretically we could materialize fp constants with instructions
-  // from VFP3.
+  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VMOVRS), MoveReg)
+                  .addReg(SrcReg));
+  return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+  if (VT == MVT::i64) return 0;
+
+  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VMOVSR), MoveReg)
+                  .addReg(SrcReg));
+  return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+  const APFloat Val = CFP->getValueAPF();
+  bool is64bit = VT == MVT::f64;
+
+  // This checks to see if we can use VFP3 instructions to materialize
+  // a constant, otherwise we have to go through the constant pool.
+  if (TLI.isFPImmLegal(Val, VT)) {
+    unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                            DestReg)
+                    .addFPImm(CFP));
+    return DestReg;
+  }
+
+  // Require VFP2 for loading fp constants.
+  if (!Subtarget->hasVFP2()) return false;
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(CFP->getType());
+  }
+  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+  // The extra reg is for addrmode5.
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          DestReg)
+                  .addConstantPoolIndex(Idx)
+                  .addReg(0));
+  return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+  // For now 32-bit only.
+  if (VT != MVT::i32) return false;
+
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+  // If we can do this in a single instruction without a constant pool entry
+  // do so now.
+  const ConstantInt *CI = cast<ConstantInt>(C);
+  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+    unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(Opc), DestReg)
+                    .addImm(CI->getSExtValue()));
+    return DestReg;
+  }
 
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = TD.getPrefTypeAlignment(C->getType());
@@ -342,58 +482,144 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
   }
   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
 
-  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
-  // Different addressing modes between ARM/Thumb2 for constant pool loads.
   if (isThumb)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(ARM::t2LDRpci))
-                    .addReg(DestReg).addConstantPoolIndex(Idx));
+                            TII.get(ARM::t2LDRpci), DestReg)
+                    .addConstantPoolIndex(Idx));
   else
+    // The extra immediate is for addrmode2.
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(ARM::LDRcp))
-                    .addReg(DestReg).addConstantPoolIndex(Idx)
-                    .addReg(0).addImm(0));
-    
+                            TII.get(ARM::LDRcp), DestReg)
+                    .addConstantPoolIndex(Idx)
+                    .addImm(0));
+
   return DestReg;
 }
 
-bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
-  VT = TLI.getValueType(Ty, true);
-  
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+  // For now 32-bit only.
+  if (VT != MVT::i32) return 0;
+
+  Reloc::Model RelocM = TM.getRelocationModel();
+
+  // TODO: No external globals for now.
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
+
+  // TODO: Need more magic for ARM PIC.
+  if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(GV->getType());
+  }
+
+  // Grab index.
+  unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+  unsigned Id = AFI->createPICLabelUId();
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+                                                       ARMCP::CPValue, PCAdj);
+  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+  // Load value.
+  MachineInstrBuilder MIB;
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  if (isThumb) {
+    unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+          .addConstantPoolIndex(Idx);
+    if (RelocM == Reloc::PIC_)
+      MIB.addImm(Id);
+  } else {
+    // The extra immediate is for addrmode2.
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+                  DestReg)
+          .addConstantPoolIndex(Idx)
+          .addImm(0);
+  }
+  AddOptionalDefs(MIB);
+  return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+  EVT VT = TLI.getValueType(C->getType(), true);
+
   // Only handle simple types.
-  if (VT == MVT::Other || !VT.isSimple()) return false;
-    
+  if (!VT.isSimple()) return 0;
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    return ARMMaterializeFP(CFP, VT);
+  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return ARMMaterializeGV(GV, VT);
+  else if (isa<ConstantInt>(C))
+    return ARMMaterializeInt(C, VT);
+
+  return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+  // Don't handle dynamic allocas.
+  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+  MVT VT;
+  if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+  DenseMap<const AllocaInst*, int>::iterator SI =
+    FuncInfo.StaticAllocaMap.find(AI);
+
+  // This will get lowered later into the correct offsets and registers
+  // via rewriteXFrameIndex.
+  if (SI != FuncInfo.StaticAllocaMap.end()) {
+    TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+    unsigned ResultReg = createResultReg(RC);
+    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+                            TII.get(Opc), ResultReg)
+                            .addFrameIndex(SI->second)
+                            .addImm(0));
+    return ResultReg;
+  }
+
+  return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+  EVT evt = TLI.getValueType(Ty, true);
+
+  // Only handle simple types.
+  if (evt == MVT::Other || !evt.isSimple()) return false;
+  VT = evt.getSimpleVT();
+
   // Handle all legal types, i.e. a register that will directly hold this
   // value.
   return TLI.isTypeLegal(VT);
 }
 
-bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
   if (isTypeLegal(Ty, VT)) return true;
-  
+
   // If this is a type than can be sign or zero-extended to a basic operation
   // go ahead and accept it now.
   if (VT == MVT::i8 || VT == MVT::i16)
     return true;
-  
+
   return false;
 }
 
-// Computes the Reg+Offset to get to an object.
-bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
-                                      int &Offset) {
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
   // Some boilerplate from the X86 FastISel.
   const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
-    // Don't walk into other basic blocks; it's possible we haven't
-    // visited them yet, so the instructions may not yet be assigned
-    // virtual registers.
-    if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
-      return false;
-
-    Opcode = I->getOpcode();
-    U = I;
+    // Don't walk into other basic blocks unless the object is an alloca from
+    // another block, otherwise it may not have a virtual register assigned.
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
     Opcode = C->getOpcode();
     U = C;
@@ -404,141 +630,282 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
       // Fast instruction selection doesn't support the special
       // address spaces.
       return false;
-  
+
   switch (Opcode) {
-    default: 
-    //errs() << "Failing Opcode is: " << *Op1 << "\n";
+    default:
     break;
+    case Instruction::BitCast: {
+      // Look through bitcasts.
+      return ARMComputeAddress(U->getOperand(0), Addr);
+    }
+    case Instruction::IntToPtr: {
+      // Look past no-op inttoptrs.
+      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+        return ARMComputeAddress(U->getOperand(0), Addr);
+      break;
+    }
+    case Instruction::PtrToInt: {
+      // Look past no-op ptrtoints.
+      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+        return ARMComputeAddress(U->getOperand(0), Addr);
+      break;
+    }
+    case Instruction::GetElementPtr: {
+      Address SavedAddr = Addr;
+      int TmpOffset = Addr.Offset;
+
+      // Iterate through the GEP folding the constants into offsets where
+      // we can.
+      gep_type_iterator GTI = gep_type_begin(U);
+      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+           i != e; ++i, ++GTI) {
+        const Value *Op = *i;
+        if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+          const StructLayout *SL = TD.getStructLayout(STy);
+          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+          TmpOffset += SL->getElementOffset(Idx);
+        } else {
+          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+          SmallVector<const Value *, 4> Worklist;
+          Worklist.push_back(Op);
+          do {
+            Op = Worklist.pop_back_val();
+            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+              // Constant-offset addressing.
+              TmpOffset += CI->getSExtValue() * S;
+            } else if (isa<AddOperator>(Op) &&
+                       isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+              // An add with a constant operand. Fold the constant.
+              ConstantInt *CI =
+                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+              TmpOffset += CI->getSExtValue() * S;
+              // Add the other operand back to the work list.
+              Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+            } else
+              goto unsupported_gep;
+          } while (!Worklist.empty());
+        }
+      }
+
+      // Try to grab the base operand now.
+      Addr.Offset = TmpOffset;
+      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+      // We failed, restore everything and try the other options.
+      Addr = SavedAddr;
+
+      unsupported_gep:
+      break;
+    }
     case Instruction::Alloca: {
-      assert(false && "Alloca should have been handled earlier!");
-      return false;
+      const AllocaInst *AI = cast<AllocaInst>(Obj);
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+      if (SI != FuncInfo.StaticAllocaMap.end()) {
+        Addr.BaseType = Address::FrameIndexBase;
+        Addr.Base.FI = SI->second;
+        return true;
+      }
+      break;
     }
   }
-  
+
+  // Materialize the global variable's address into a reg which can
+  // then be used later to load the variable.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
-    //errs() << "Failing GV is: " << GV << "\n";
-    (void)GV;
-    return false;
+    unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+    if (Tmp == 0) return false;
+
+    Addr.Base.Reg = Tmp;
+    return true;
   }
-  
+
   // Try to get this in a register if nothing else has worked.
-  Reg = getRegForValue(Obj);
-  if (Reg == 0) return false;
+  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+  return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
 
-  // Since the offset may be too large for the load instruction
+  assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+  bool needsLowering = false;
+  switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      assert(false && "Unhandled load/store type!");
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      // Integer loads/stores handle 12-bit offsets.
+      needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      // Floating point operands handle 8-bit offsets.
+      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+      break;
+  }
+
+  // If this is a stack pointer and the offset needs to be simplified then
+  // put the alloca address into a register, set the base type back to
+  // register and continue. This should almost never happen.
+  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+                              ARM::GPRRegisterClass;
+    unsigned ResultReg = createResultReg(RC);
+    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+                            TII.get(Opc), ResultReg)
+                            .addFrameIndex(Addr.Base.FI)
+                            .addImm(0));
+    Addr.Base.Reg = ResultReg;
+    Addr.BaseType = Address::RegBase;
+  }
+
+  // Since the offset is too large for the load/store instruction
   // get the reg+offset into a register.
-  // TODO: Verify the additions work, otherwise we'll need to add the
-  // offset instead of 0 to the instructions and do all sorts of operand
-  // munging.
-  // TODO: Optimize this somewhat.
-  if (Offset != 0) {
+  if (needsLowering) {
     ARMCC::CondCodes Pred = ARMCC::AL;
     unsigned PredReg = 0;
 
+    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+      ARM::GPRRegisterClass;
+    unsigned BaseReg = createResultReg(RC);
+
     if (!isThumb)
       emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                              Reg, Reg, Offset, Pred, PredReg,
+                              BaseReg, Addr.Base.Reg, Addr.Offset,
+                              Pred, PredReg,
                               static_cast<const ARMBaseInstrInfo&>(TII));
     else {
       assert(AFI->isThumb2Function());
       emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                             Reg, Reg, Offset, Pred, PredReg,
+                             BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg,
                              static_cast<const ARMBaseInstrInfo&>(TII));
     }
+    Addr.Offset = 0;
+    Addr.Base.Reg = BaseReg;
   }
-  
-  return true;
 }
 
-bool ARMFastISel::ARMLoadAlloca(const Instruction *I) {
-  Value *Op0 = I->getOperand(0);
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+                                       const MachineInstrBuilder &MIB) {
+  // addrmode5 output depends on the selection dag addressing dividing the
+  // offset by 4 that it then later multiplies. Do this here as well.
+  if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+      VT.getSimpleVT().SimpleTy == MVT::f64)
+    Addr.Offset /= 4;
+    
+  // Frame base works a bit differently. Handle it separately.
+  if (Addr.BaseType == Address::FrameIndexBase) {
+    int FI = Addr.Base.FI;
+    int Offset = Addr.Offset;
+    MachineMemOperand *MMO =
+          FuncInfo.MF->getMachineMemOperand(
+                                  MachinePointerInfo::getFixedStack(FI, Offset),
+                                  MachineMemOperand::MOLoad,
+                                  MFI.getObjectSize(FI),
+                                  MFI.getObjectAlignment(FI));
+    // Now add the rest of the operands.
+    MIB.addFrameIndex(FI);
 
-  // Verify it's an alloca.
-  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-
-    if (SI != FuncInfo.StaticAllocaMap.end()) {
-      TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
-      unsigned ResultReg = createResultReg(RC);
-      TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
-                               ResultReg, SI->second, RC,
-                               TM.getRegisterInfo());
-      UpdateValueMap(I, ResultReg);
-      return true;
-    }
+    // ARM halfword load/stores need an additional operand.
+    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+    MIB.addImm(Addr.Offset);
+    MIB.addMemOperand(MMO);
+  } else {
+    // Now add the rest of the operands.
+    MIB.addReg(Addr.Base.Reg);
+  
+    // ARM halfword load/stores need an additional operand.
+    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+    MIB.addImm(Addr.Offset);
   }
-  return false;
+  AddOptionalDefs(MIB);
 }
 
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
-                              unsigned Reg, int Offset) {
-  
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
   assert(VT.isSimple() && "Non-simple types are invalid here!");
   unsigned Opc;
-  
+  TargetRegisterClass *RC;
   switch (VT.getSimpleVT().SimpleTy) {
-    default: 
-      assert(false && "Trying to emit for an unhandled type!");
-      return false;
+    // This is mostly going to be Neon/vector support.
+    default: return false;
     case MVT::i16:
-      Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
-      VT = MVT::i32;
+      Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+      RC = ARM::GPRRegisterClass;
       break;
     case MVT::i8:
-      Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
-      VT = MVT::i32;
+      Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+      RC = ARM::GPRRegisterClass;
       break;
     case MVT::i32:
-      Opc = isThumb ? ARM::tLDR : ARM::LDR;
+      Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::f32:
+      Opc = ARM::VLDRS;
+      RC = TLI.getRegClassFor(VT);
+      break;
+    case MVT::f64:
+      Opc = ARM::VLDRD;
+      RC = TLI.getRegClassFor(VT);
       break;
   }
-  
-  ResultReg = createResultReg(TLI.getRegClassFor(VT));
-  
-  // TODO: Fix the Addressing modes so that these can share some code.
-  // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
-  if (isThumb)
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(Opc), ResultReg)
-                    .addReg(Reg).addImm(Offset).addReg(0));
-  else
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(Opc), ResultReg)
-                    .addReg(Reg).addReg(0).addImm(Offset));
-                    
+  // Simplify this down to something we can handle.
+  ARMSimplifyAddress(Addr, VT);
+
+  // Create the base instruction, then add the operands.
+  ResultReg = createResultReg(RC);
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(Opc), ResultReg);
+  AddLoadStoreOperands(VT, Addr, MIB);
   return true;
 }
 
-bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) {
-  Value *Op1 = I->getOperand(1);
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+  // Verify we have a legal type before going any further.
+  MVT VT;
+  if (!isLoadTypeLegal(I->getType(), VT))
+    return false;
 
-  // Verify it's an alloca.
-  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
 
-    if (SI != FuncInfo.StaticAllocaMap.end()) {
-      TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
-      assert(SrcReg != 0 && "Nothing to store!");
-      TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
-                              SrcReg, true /*isKill*/, SI->second, RC,
-                              TM.getRegisterInfo());
-      return true;
-    }
-  }
-  return false;
+  unsigned ResultReg;
+  if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+  UpdateValueMap(I, ResultReg);
+  return true;
 }
 
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
-                               unsigned DstReg, int Offset) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
   unsigned StrOpc;
   switch (VT.getSimpleVT().SimpleTy) {
+    // This is mostly going to be Neon/vector support.
     default: return false;
-    case MVT::i1:
-    case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
-    case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
-    case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
+    case MVT::i1: {
+      unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+                                               ARM::GPRRegisterClass);
+      unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(Opc), Res)
+                      .addReg(SrcReg).addImm(1));
+      SrcReg = Res;
+    } // Fallthrough here.
+    case MVT::i8:
+      StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+      break;
+    case MVT::i16:
+      StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+      break;
+    case MVT::i32:
+      StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+      break;
     case MVT::f32:
       if (!Subtarget->hasVFP2()) return false;
       StrOpc = ARM::VSTRS;
@@ -548,91 +915,162 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
       StrOpc = ARM::VSTRD;
       break;
   }
-  
-  if (isThumb)
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(StrOpc), SrcReg)
-                    .addReg(DstReg).addImm(Offset).addReg(0));
-  else
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(StrOpc), SrcReg)
-                    .addReg(DstReg).addReg(0).addImm(Offset));
-  
+  // Simplify this down to something we can handle.
+  ARMSimplifyAddress(Addr, VT);
+
+  // Create the base instruction, then add the operands.
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(StrOpc))
+                            .addReg(SrcReg, getKillRegState(true));
+  AddLoadStoreOperands(VT, Addr, MIB);
   return true;
 }
 
-bool ARMFastISel::ARMSelectStore(const Instruction *I) {
+bool ARMFastISel::SelectStore(const Instruction *I) {
   Value *Op0 = I->getOperand(0);
   unsigned SrcReg = 0;
 
-  // Yay type legalization
-  EVT VT;
+  // Verify we have a legal type before going any further.
+  MVT VT;
   if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
     return false;
 
   // Get the value to be stored into a register.
   SrcReg = getRegForValue(Op0);
-  if (SrcReg == 0)
-    return false;
-    
-  // If we're an alloca we know we have a frame index and can emit the store
-  // quickly.
-  if (ARMStoreAlloca(I, SrcReg))
-    return true;
-    
-  // Our register and offset with innocuous defaults.
-  unsigned Reg = 0;
-  int Offset = 0;
-  
-  // See if we can handle this as Reg + Offset
-  if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
-    return false;
-    
-  if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
-    
-  return false;
-  
-}
+  if (SrcReg == 0) return false;
 
-bool ARMFastISel::ARMSelectLoad(const Instruction *I) {
-  // If we're an alloca we know we have a frame index and can emit the load
-  // directly in short order.
-  if (ARMLoadAlloca(I))
-    return true;
-    
-  // Verify we have a legal type before going any further.
-  EVT VT;
-  if (!isLoadTypeLegal(I->getType(), VT))
-    return false;
-  
-  // Our register and offset with innocuous defaults.
-  unsigned Reg = 0;
-  int Offset = 0;
-  
-  // See if we can handle this as Reg + Offset
-  if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(I->getOperand(1), Addr))
     return false;
-  
-  unsigned ResultReg;
-  if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
-  
-  UpdateValueMap(I, ResultReg);
+
+  if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
   return true;
 }
 
-bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+  switch (Pred) {
+    // Needs two compares...
+    case CmpInst::FCMP_ONE:
+    case CmpInst::FCMP_UEQ:
+    default:
+      // AL is our "false" for now. The other two need more compares.
+      return ARMCC::AL;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::FCMP_OEQ:
+      return ARMCC::EQ;
+    case CmpInst::ICMP_SGT:
+    case CmpInst::FCMP_OGT:
+      return ARMCC::GT;
+    case CmpInst::ICMP_SGE:
+    case CmpInst::FCMP_OGE:
+      return ARMCC::GE;
+    case CmpInst::ICMP_UGT:
+    case CmpInst::FCMP_UGT:
+      return ARMCC::HI;
+    case CmpInst::FCMP_OLT:
+      return ARMCC::MI;
+    case CmpInst::ICMP_ULE:
+    case CmpInst::FCMP_OLE:
+      return ARMCC::LS;
+    case CmpInst::FCMP_ORD:
+      return ARMCC::VC;
+    case CmpInst::FCMP_UNO:
+      return ARMCC::VS;
+    case CmpInst::FCMP_UGE:
+      return ARMCC::PL;
+    case CmpInst::ICMP_SLT:
+    case CmpInst::FCMP_ULT:
+      return ARMCC::LT;
+    case CmpInst::ICMP_SLE:
+    case CmpInst::FCMP_ULE:
+      return ARMCC::LE;
+    case CmpInst::FCMP_UNE:
+    case CmpInst::ICMP_NE:
+      return ARMCC::NE;
+    case CmpInst::ICMP_UGE:
+      return ARMCC::HS;
+    case CmpInst::ICMP_ULT:
+      return ARMCC::LO;
+  }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
   const BranchInst *BI = cast<BranchInst>(I);
   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
-  
+
   // Simple branch support.
-  unsigned CondReg = getRegForValue(BI->getCondition());
-  if (CondReg == 0) return false;
-  
-  unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
-  unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+
+  // If we can, avoid recomputing the compare - redoing it could lead to wonky
+  // behavior.
+  // TODO: Factor this out.
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
+      MVT VT;
+      const Type *Ty = CI->getOperand(0)->getType();
+      if (!isTypeLegal(Ty, VT))
+        return false;
+
+      bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+      if (isFloat && !Subtarget->hasVFP2())
+        return false;
+
+      unsigned CmpOpc;
+      switch (VT.SimpleTy) {
+        default: return false;
+        // TODO: Verify compares.
+        case MVT::f32:
+          CmpOpc = ARM::VCMPES;
+          break;
+        case MVT::f64:
+          CmpOpc = ARM::VCMPED;
+          break;
+        case MVT::i32:
+          CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+          break;
+      }
+
+      // Get the compare predicate.
+      ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+      // We may not handle every CC for now.
+      if (ARMPred == ARMCC::AL) return false;
+
+      unsigned Arg1 = getRegForValue(CI->getOperand(0));
+      if (Arg1 == 0) return false;
+
+      unsigned Arg2 = getRegForValue(CI->getOperand(1));
+      if (Arg2 == 0) return false;
+
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(CmpOpc))
+                      .addReg(Arg1).addReg(Arg2));
+
+      // For floating point we need to move the result to a comparison register
+      // that we can then use for branches.
+      if (isFloat)
+        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(ARM::FMSTAT)));
+
+      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+      FastEmitBranch(FBB, DL);
+      FuncInfo.MBB->addSuccessor(TBB);
+      return true;
+    }
+  }
+
+  unsigned CmpReg = getRegForValue(BI->getCondition());
+  if (CmpReg == 0) return false;
+
+  // Re-set the flags just in case.
+  unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
-                  .addReg(CondReg).addReg(CondReg));
+                  .addReg(CmpReg).addImm(0));
+
+  unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
                   .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   FastEmitBranch(FBB, DL);
@@ -640,18 +1078,809 @@ bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
   return true;
 }
 
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
+
+  MVT VT;
+  const Type *Ty = CI->getOperand(0)->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+  if (isFloat && !Subtarget->hasVFP2())
+    return false;
+
+  unsigned CmpOpc;
+  unsigned CondReg;
+  switch (VT.SimpleTy) {
+    default: return false;
+    // TODO: Verify compares.
+    case MVT::f32:
+      CmpOpc = ARM::VCMPES;
+      CondReg = ARM::FPSCR;
+      break;
+    case MVT::f64:
+      CmpOpc = ARM::VCMPED;
+      CondReg = ARM::FPSCR;
+      break;
+    case MVT::i32:
+      CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+      CondReg = ARM::CPSR;
+      break;
+  }
+
+  // Get the compare predicate.
+  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+  // We may not handle every CC for now.
+  if (ARMPred == ARMCC::AL) return false;
+
+  unsigned Arg1 = getRegForValue(CI->getOperand(0));
+  if (Arg1 == 0) return false;
+
+  unsigned Arg2 = getRegForValue(CI->getOperand(1));
+  if (Arg2 == 0) return false;
+
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(Arg1).addReg(Arg2));
+
+  // For floating point we need to move the result to a comparison register
+  // that we can then use for branches.
+  if (isFloat)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::FMSTAT)));
+
+  // Now set a register based on the comparison. Explicitly set the predicates
+  // here.
+  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+  TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+                                    : ARM::GPRRegisterClass;
+  unsigned DestReg = createResultReg(RC);
+  Constant *Zero
+    = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+  unsigned ZeroReg = TargetMaterializeConstant(Zero);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+          .addReg(ZeroReg).addImm(1)
+          .addImm(ARMPred).addReg(CondReg);
+
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+  // Make sure we have VFP and that we're extending float to double.
+  if (!Subtarget->hasVFP2()) return false;
+
+  Value *V = I->getOperand(0);
+  if (!I->getType()->isDoubleTy() ||
+      !V->getType()->isFloatTy()) return false;
+
+  unsigned Op = getRegForValue(V);
+  if (Op == 0) return false;
+
+  unsigned Result = createResultReg(ARM::DPRRegisterClass);
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VCVTDS), Result)
+                  .addReg(Op));
+  UpdateValueMap(I, Result);
+  return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+  // Make sure we have VFP and that we're truncating double to float.
+  if (!Subtarget->hasVFP2()) return false;
+
+  Value *V = I->getOperand(0);
+  if (!(I->getType()->isFloatTy() &&
+        V->getType()->isDoubleTy())) return false;
+
+  unsigned Op = getRegForValue(V);
+  if (Op == 0) return false;
+
+  unsigned Result = createResultReg(ARM::SPRRegisterClass);
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VCVTSD), Result)
+                  .addReg(Op));
+  UpdateValueMap(I, Result);
+  return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+  // Make sure we have VFP.
+  if (!Subtarget->hasVFP2()) return false;
+
+  MVT DstVT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, DstVT))
+    return false;
+
+  unsigned Op = getRegForValue(I->getOperand(0));
+  if (Op == 0) return false;
+
+  // The conversion routine works on fp-reg to fp-reg and the operand above
+  // was an integer, move it to the fp registers if possible.
+  unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+  if (FP == 0) return false;
+
+  unsigned Opc;
+  if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+  else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+  else return 0;
+
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          ResultReg)
+                  .addReg(FP));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+  // Make sure we have VFP.
+  if (!Subtarget->hasVFP2()) return false;
+
+  MVT DstVT;
+  const Type *RetTy = I->getType();
+  if (!isTypeLegal(RetTy, DstVT))
+    return false;
+
+  unsigned Op = getRegForValue(I->getOperand(0));
+  if (Op == 0) return false;
+
+  unsigned Opc;
+  const Type *OpTy = I->getOperand(0)->getType();
+  if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+  else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+  else return 0;
+
+  // f64->s32 or f32->s32 both need an intermediate f32 reg.
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          ResultReg)
+                  .addReg(Op));
+
+  // This result needs to be in an integer register, but the conversion only
+  // takes place in fp-regs.
+  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+  if (IntReg == 0) return false;
+
+  UpdateValueMap(I, IntReg);
+  return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  // Things need to be register sized for register moves.
+  if (VT != MVT::i32) return false;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+  unsigned CondReg = getRegForValue(I->getOperand(0));
+  if (CondReg == 0) return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  unsigned Op2Reg = getRegForValue(I->getOperand(2));
+  if (Op2Reg == 0) return false;
+
+  unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(CondReg).addImm(1));
+  unsigned ResultReg = createResultReg(RC);
+  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+    .addReg(Op1Reg).addReg(Op2Reg)
+    .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+  MVT VT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  // If we have integer div support we should have selected this automagically.
+  // In case we have a real miss go ahead and return false and we'll pick
+  // it up later.
+  if (Subtarget->hasDivide()) return false;
+
+  // Otherwise emit a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i8)
+    LC = RTLIB::SDIV_I8;
+  else if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+  return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+  MVT VT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i8)
+    LC = RTLIB::SREM_I8;
+  else if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+  return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+  EVT VT  = TLI.getValueType(I->getType(), true);
+
+  // We can get here in the case when we want to use NEON for our fp
+  // operations, but can't figure out how to. Just use the vfp instructions
+  // if we have them.
+  // FIXME: It'd be nice to use NEON instructions.
+  const Type *Ty = I->getType();
+  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+  if (isFloat && !Subtarget->hasVFP2())
+    return false;
+
+  unsigned Op1 = getRegForValue(I->getOperand(0));
+  if (Op1 == 0) return false;
+
+  unsigned Op2 = getRegForValue(I->getOperand(1));
+  if (Op2 == 0) return false;
+
+  unsigned Opc;
+  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+  switch (ISDOpcode) {
+    default: return false;
+    case ISD::FADD:
+      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+      break;
+    case ISD::FSUB:
+      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+      break;
+    case ISD::FMUL:
+      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+      break;
+  }
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(Opc), ResultReg)
+                  .addReg(Op1).addReg(Op2));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+                                 EVT SrcVT, unsigned &ResultReg) {
+  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+                           Src, /*TODO: Kill=*/false);
+
+  if (RR != 0) {
+    ResultReg = RR;
+    return true;
+  } else
+    return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+    // Ignore fastcc. Silence compiler warnings.
+    (void)RetFastCC_ARM_APCS;
+    (void)FastCC_ARM_APCS;
+    // Fallthrough
+  case CallingConv::C:
+    // Use target triple & subtarget features to do actual dispatch.
+    if (Subtarget->isAAPCS_ABI()) {
+      if (Subtarget->hasVFP2() &&
+          FloatABIType == FloatABI::Hard)
+        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+      else
+        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+    } else
+        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::ARM_AAPCS_VFP:
+    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+                                  SmallVectorImpl<unsigned> &ArgRegs,
+                                  SmallVectorImpl<MVT> &ArgVTs,
+                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+                                  SmallVectorImpl<unsigned> &RegArgs,
+                                  CallingConv::ID CC,
+                                  unsigned &NumBytes) {
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, false, TM, ArgLocs, *Context);
+  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  NumBytes = CCInfo.getNextStackOffset();
+
+  // Issue CALLSEQ_START
+  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(AdjStackDown))
+                  .addImm(NumBytes));
+
+  // Process the args.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    unsigned Arg = ArgRegs[VA.getValNo()];
+    MVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // We don't handle NEON/vector parameters yet.
+    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+      return false;
+
+    // Handle arg promotion, etc.
+    switch (VA.getLocInfo()) {
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt: {
+        bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+        Emitted = true;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::ZExt: {
+        bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+        Emitted = true;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::AExt: {
+        bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        if (!Emitted)
+          Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                      Arg, ArgVT, Arg);
+        if (!Emitted)
+          Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                      Arg, ArgVT, Arg);
+
+        assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::BCvt: {
+        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+                                 /*TODO: Kill=*/false);
+        assert(BC != 0 && "Failed to emit a bitcast!");
+        Arg = BC;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      default: llvm_unreachable("Unknown arg promotion!");
+    }
+
+    // Now copy/store arg to correct locations.
+    if (VA.isRegLoc() && !VA.needsCustom()) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              VA.getLocReg())
+      .addReg(Arg);
+      RegArgs.push_back(VA.getLocReg());
+    } else if (VA.needsCustom()) {
+      // TODO: We need custom lowering for vector (v2f64) args.
+      if (VA.getLocVT() != MVT::f64) return false;
+
+      CCValAssign &NextVA = ArgLocs[++i];
+
+      // TODO: Only handle register args for now.
+      if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(ARM::VMOVRRD), VA.getLocReg())
+                      .addReg(NextVA.getLocReg(), RegState::Define)
+                      .addReg(Arg));
+      RegArgs.push_back(VA.getLocReg());
+      RegArgs.push_back(NextVA.getLocReg());
+    } else {
+      assert(VA.isMemLoc());
+      // Need to store on the stack.
+      Address Addr;
+      Addr.BaseType = Address::RegBase;
+      Addr.Base.Reg = ARM::SP;
+      Addr.Offset = VA.getLocMemOffset();
+
+      if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+    }
+  }
+  return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                             const Instruction *I, CallingConv::ID CC,
+                             unsigned &NumBytes) {
+  // Issue CALLSEQ_END
+  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(AdjStackUp))
+                  .addImm(NumBytes).addImm(0));
+
+  // Now the return value.
+  if (RetVT != MVT::isVoid) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CC, false, TM, RVLocs, *Context);
+    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+    // Copy all of the result registers out of their specified physreg.
+    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+      // For this move we copy into two registers and then move into the
+      // double fp reg we want.
+      EVT DestVT = RVLocs[0].getValVT();
+      TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+      unsigned ResultReg = createResultReg(DstRC);
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(ARM::VMOVDRR), ResultReg)
+                      .addReg(RVLocs[0].getLocReg())
+                      .addReg(RVLocs[1].getLocReg()));
+
+      UsedRegs.push_back(RVLocs[0].getLocReg());
+      UsedRegs.push_back(RVLocs[1].getLocReg());
+
+      // Finally update the result.
+      UpdateValueMap(I, ResultReg);
+    } else {
+      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+      EVT CopyVT = RVLocs[0].getValVT();
+      TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+      unsigned ResultReg = createResultReg(DstRC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(RVLocs[0].getLocReg());
+      UsedRegs.push_back(RVLocs[0].getLocReg());
+
+      // Finally update the result.
+      UpdateValueMap(I, ResultReg);
+    }
+  }
+
+  return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const Function &F = *I->getParent()->getParent();
+
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  if (F.isVarArg())
+    return false;
+
+  CallingConv::ID CC = F.getCallingConv();
+  if (Ret->getNumOperands() > 0) {
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                  Outs, TLI);
+
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+    const Value *RV = Ret->getOperand(0);
+    unsigned Reg = getRegForValue(RV);
+    if (Reg == 0)
+      return false;
+
+    // Only handle a single return value for now.
+    if (ValLocs.size() != 1)
+      return false;
+
+    CCValAssign &VA = ValLocs[0];
+
+    // Don't bother handling odd stuff for now.
+    if (VA.getLocInfo() != CCValAssign::Full)
+      return false;
+    // Only handle register returns for now.
+    if (!VA.isRegLoc())
+      return false;
+    // TODO: For now, don't try to handle cases where getLocInfo()
+    // says Full but the types don't match.
+    if (TLI.getValueType(RV->getType()) != VA.getValVT())
+      return false;
+
+    // Make the copy.
+    unsigned SrcReg = Reg + VA.getValNo();
+    unsigned DstReg = VA.getLocReg();
+    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+    // Avoid a cross-class copy. This is very unlikely.
+    if (!SrcRC->contains(DstReg))
+      return false;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            DstReg).addReg(SrcReg);
+
+    // Mark the register as live out of the function.
+    MRI.addLiveOut(VA.getLocReg());
+  }
+
+  unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(RetOpc)));
+  return true;
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = I->getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT))
+    return false;
+
+  // For now we're using BLX etc on the assumption that we have v5t ops.
+  if (!Subtarget->hasV5TOps()) return false;
+
+  // TODO: For now if we have long calls specified we don't handle the call.
+  if (EnableARMLongCalls) return false;
+
+  // Set up the argument vectors.
+  SmallVector<Value*, 8> Args;
+  SmallVector<unsigned, 8> ArgRegs;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(I->getNumOperands());
+  ArgRegs.reserve(I->getNumOperands());
+  ArgVTs.reserve(I->getNumOperands());
+  ArgFlags.reserve(I->getNumOperands());
+  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+    Value *Op = I->getOperand(i);
+    unsigned Arg = getRegForValue(Op);
+    if (Arg == 0) return false;
+
+    const Type *ArgTy = Op->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+    ISD::ArgFlagsTy Flags;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(Op);
+    ArgRegs.push_back(Arg);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Handle the arguments now that we've gotten them.
+  SmallVector<unsigned, 4> RegArgs;
+  unsigned NumBytes;
+  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+    return false;
+
+  // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+  // TODO: Turn this into the table of arm call ops.
+  MachineInstrBuilder MIB;
+  unsigned CallOpc;
+  if(isThumb) {
+    CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc)))
+                         .addExternalSymbol(TLI.getLibcallName(Call));
+  } else {
+    CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc))
+          .addExternalSymbol(TLI.getLibcallName(Call)));
+  }
+
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Finish off the call including any return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = CI->getCalledValue();
+
+  // Can't handle inline asm or worry about intrinsics yet.
+  if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+  // Only handle global variable Callees that are direct calls.
+  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+  if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()))
+    return false;
+
+  // Check the calling convention.
+  ImmutableCallSite CS(CI);
+  CallingConv::ID CC = CS.getCallingConv();
+
+  // TODO: Avoid some calling conventions?
+
+  // Let SDISel handle vararg functions.
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  if (FTy->isVarArg())
+    return false;
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = I->getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT))
+    return false;
+
+  // For now we're using BLX etc on the assumption that we have v5t ops.
+  // TODO: Maybe?
+  if (!Subtarget->hasV5TOps()) return false;
+
+  // TODO: For now if we have long calls specified we don't handle the call.
+  if (EnableARMLongCalls) return false;
+  
+  // Set up the argument vectors.
+  SmallVector<Value*, 8> Args;
+  SmallVector<unsigned, 8> ArgRegs;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(CS.arg_size());
+  ArgRegs.reserve(CS.arg_size());
+  ArgVTs.reserve(CS.arg_size());
+  ArgFlags.reserve(CS.arg_size());
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    unsigned Arg = getRegForValue(*i);
+
+    if (Arg == 0)
+      return false;
+    ISD::ArgFlagsTy Flags;
+    unsigned AttrInd = i - CS.arg_begin() + 1;
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+      Flags.setSExt();
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+      Flags.setZExt();
+
+         // FIXME: Only handle *easy* calls for now.
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+        CS.paramHasAttr(AttrInd, Attribute::ByVal))
+      return false;
+
+    const Type *ArgTy = (*i)->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT))
+      return false;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(*i);
+    ArgRegs.push_back(Arg);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Handle the arguments now that we've gotten them.
+  SmallVector<unsigned, 4> RegArgs;
+  unsigned NumBytes;
+  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+    return false;
+
+  // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+  // TODO: Turn this into the table of arm call ops.
+  MachineInstrBuilder MIB;
+  unsigned CallOpc;
+  // Explicitly adding the predicate here.
+  if(isThumb) {
+    CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc)))
+          .addGlobalAddress(GV, 0, 0);
+  } else {
+    CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc))
+          .addGlobalAddress(GV, 0, 0));
+  }
+  
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Finish off the call including any return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+
+}
+
 // TODO: SoftFP support.
 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
-  // No Thumb-1 for now.
-  if (isThumb && !AFI->isThumb2Function()) return false;
-  
+
   switch (I->getOpcode()) {
     case Instruction::Load:
-      return ARMSelectLoad(I);
+      return SelectLoad(I);
     case Instruction::Store:
-      return ARMSelectStore(I);
+      return SelectStore(I);
     case Instruction::Br:
-      return ARMSelectBranch(I);
+      return SelectBranch(I);
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      return SelectCmp(I);
+    case Instruction::FPExt:
+      return SelectFPExt(I);
+    case Instruction::FPTrunc:
+      return SelectFPTrunc(I);
+    case Instruction::SIToFP:
+      return SelectSIToFP(I);
+    case Instruction::FPToSI:
+      return SelectFPToSI(I);
+    case Instruction::FAdd:
+      return SelectBinaryOp(I, ISD::FADD);
+    case Instruction::FSub:
+      return SelectBinaryOp(I, ISD::FSUB);
+    case Instruction::FMul:
+      return SelectBinaryOp(I, ISD::FMUL);
+    case Instruction::SDiv:
+      return SelectSDiv(I);
+    case Instruction::SRem:
+      return SelectSRem(I);
+    case Instruction::Call:
+      return SelectCall(I);
+    case Instruction::Select:
+      return SelectSelect(I);
+    case Instruction::Ret:
+      return SelectRet(I);
     default: break;
   }
   return false;
@@ -659,7 +1888,14 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
 
 namespace llvm {
   llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
-    if (EnableARMFastISel) return new ARMFastISel(funcInfo);
+    // Completely untested on non-darwin.
+    const TargetMachine &TM = funcInfo.MF->getTarget();
+
+    // Darwin and thumb1 only for now.
+    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+        !DisableARMFastISel)
+      return new ARMFastISel(funcInfo);
     return 0;
   }
 }
diff --git a/lib/Target/ARM/ARMFixupKinds.h b/lib/Target/ARM/ARMFixupKinds.h
new file mode 100644
index 000000000000..3d175e386901
--- /dev/null
+++ b/lib/Target/ARM/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+  // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+  // addresses
+  fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+  // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+  // the 16-bit halfwords reordered.
+  fixup_t2_ldst_pcrel_12,
+
+  // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+  // used in VFP instructions where the lower 2 bits are not encoded
+  // (so it's encoded as an 8-bit immediate).
+  fixup_arm_pcrel_10,
+  // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+  // the short-swapped encoding of Thumb2 instructions.
+  fixup_t2_pcrel_10,
+  // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+  // addresses where the lower 2 bits are not encoded (so it's encoded as an
+  // 8-bit immediate).
+  fixup_thumb_adr_pcrel_10,
+  // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+  // instruction.
+  fixup_arm_adr_pcrel_12,
+  // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+  // instruction.
+  fixup_t2_adr_pcrel_12,
+  // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+  // instructions. 
+  fixup_arm_condbranch,
+  // fixup_arm_uncondbranch - 24-bit PC relative relocation for 
+  // branch instructions. (unconditional)
+  fixup_arm_uncondbranch,
+  // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+  // uconditional branch instructions.
+  fixup_t2_condbranch,
+  // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+  // branch unconditional branch instructions.
+  fixup_t2_uncondbranch,
+
+  // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+  fixup_arm_thumb_br,
+
+  // fixup_arm_thumb_blx - Fixup for Thumb BL instructions.
+  fixup_arm_thumb_bl,
+
+  // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+  fixup_arm_thumb_blx,
+
+  // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+  fixup_arm_thumb_cb,
+
+  // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+  fixup_arm_thumb_cp,
+
+  // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+  fixup_arm_thumb_bcc,
+
+  // The next two are for the movt/movw pair
+  // the 16bit imm field are split into imm{15-12} and imm{11-0}
+  fixup_arm_movt_hi16, // :upper16:
+  fixup_arm_movw_lo16, // :lower16:
+  fixup_t2_movt_hi16, // :upper16:
+  fixup_t2_movw_lo16, // :lower16:
+
+  // It is possible to create an "immediate" that happens to be pcrel.
+  // movw r0, :lower16:Foo-(Bar+8) and movt  r0, :upper16:Foo-(Bar+8)
+  // result in different reloc tags than the above two.
+  // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+  fixup_arm_movt_hi16_pcrel, // :upper16:
+  fixup_arm_movw_lo16_pcrel, // :lower16:
+  fixup_t2_movt_hi16_pcrel, // :upper16:
+  fixup_t2_movw_lo16_pcrel, // :lower16:
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
deleted file mode 100644
index d5dae2442499..000000000000
--- a/lib/Target/ARM/ARMFrameInfo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_FRAMEINFO_H
-#define ARM_FRAMEINFO_H
-
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "llvm/Target/TargetFrameInfo.h"
-
-namespace llvm {
-
-class ARMFrameInfo : public TargetFrameInfo {
-public:
-  explicit ARMFrameInfo(const ARMSubtarget &ST)
-    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0, 4) {
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 000000000000..f42c6db84fd3
--- /dev/null
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1021 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+  // Mac OS X requires FP not to be clobbered for backtracing purpose.
+  if (STI.isTargetDarwin())
+    return true;
+
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Always eliminate non-leaf frame pointers.
+  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+          RegInfo->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function.  This eliminates the need for
+/// add/sub sp brackets around call sites.  Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified.  Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+                        const ARMBaseInstrInfo &TII,
+                        const unsigned *CSRegs) {
+  // Integer spill area is handled with "pop".
+  if (MI->getOpcode() == ARM::LDMIA_RET ||
+      MI->getOpcode() == ARM::t2LDMIA_RET ||
+      MI->getOpcode() == ARM::LDMIA_UPD ||
+      MI->getOpcode() == ARM::t2LDMIA_UPD ||
+      MI->getOpcode() == ARM::VLDMDIA_UPD) {
+    // The first two operands are predicates. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  if ((MI->getOpcode() == ARM::LDR_POST ||
+       MI->getOpcode() == ARM::t2LDR_POST) &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+      MI->getOperand(1).getReg() == ARM::SP)
+    return true;
+
+  return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             DebugLoc dl, const ARMBaseInstrInfo &TII,
+             int NumBytes,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+  if (isARM)
+    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                            Pred, PredReg, TII);
+  else
+    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                           Pred, PredReg, TII);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitPrologue does not support Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  // Allocate the vararg register save area. This is not counted in NumBytes.
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  // Move past area 1.
+  if (GPRCS1Size > 0) MBBI++;
+
+  // Set FP to point to the stack slot that contains the previous FP.
+  // For Darwin, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not Darwin, all the callee-saved registers go
+  // into spill area 1, including the FP in R11.  In either case, it is
+  // now safe to emit this assignment.
+  bool HasFP = hasFP(MF);
+  if (HasFP) {
+    unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+    AddDefaultCC(AddDefaultPred(MIB));
+  }
+
+  // Move past area 2.
+  if (GPRCS2Size > 0) MBBI++;
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  if (HasFP)
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+                                NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  // Move past area 3.
+  if (DPRCSSize > 0) MBBI++;
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Adjust SP after all the callee-save spills.
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+    if (HasFP && isARM)
+      // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+      // Note it's not safe to do this in Thumb2 mode because it would have
+      // taken two instructions:
+      // mov sp, r7
+      // sub sp, #24
+      // If an interrupt is taken between the two instructions, then sp is in
+      // an inconsistent state (pointing to the middle of callee-saved area).
+      // The interrupt handler can end up clobbering the registers.
+      AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF))
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need dynamic stack realignment, do it here. Be paranoid and make
+  // sure if we also have VLAs, we have a base pointer for frame access.
+  if (RegInfo->needsStackRealignment(MF)) {
+    unsigned MaxAlign = MFI->getMaxAlignment();
+    assert (!AFI->isThumb1OnlyFunction());
+    if (!AFI->isThumbFunction()) {
+      // Emit bic sp, sp, MaxAlign
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::BICri), ARM::SP)
+                                  .addReg(ARM::SP, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+    } else {
+      // We cannot use sp as source/dest register here, thus we're emitting the
+      // following sequence:
+      // mov r4, sp
+      // bic r4, r4, MaxAlign
+      // mov sp, r4
+      // FIXME: It will be better just to find spare register here.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill);
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::t2BICri), ARM::R4)
+                                  .addReg(ARM::R4, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill);
+    }
+
+    AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  if (RegInfo->hasBasePointer(MF)) {
+    if (isARM)
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+        .addReg(ARM::SP)
+        .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+    else
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
+        .addReg(ARM::SP);
+  }
+
+  // If the frame has variable sized objects then the epilogue must restore
+  // the sp from fp. We can assume there's an FP here since hasFP already
+  // checks for hasVarSizedObjects.
+  if (MFI->hasVarSizedObjects())
+    AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->getDesc().isReturn() &&
+         "Can only insert epilog into returning blocks");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitEpilogue does not support Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / VLDRD.
+    const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+      if (!isCSRestore(MBBI, TII, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    // Reset SP based on frame pointer only if the stack frame extends beyond
+    // frame pointer stack slot or target is ELF and the function has FP.
+    if (AFI->shouldRestoreSPFromFP()) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      if (NumBytes) {
+        if (isARM)
+          emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+                                  ARMCC::AL, 0, TII);
+        else {
+          // It's not possible to restore SP from FP in a single instruction.
+          // For Darwin, this looks like:
+          // mov sp, r7
+          // sub sp, #24
+          // This is bad, if an interrupt is taken after the mov, sp is in an
+          // inconsistent state.
+          // Use the first callee-saved register as a scratch register.
+          assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+                 "No scratch register to restore SP from FP!");
+          emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+                                 ARMCC::AL, 0, TII);
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+            .addReg(ARM::R4);
+        }
+      } else {
+        // Thumb2 or ARM.
+        if (isARM)
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+            .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+        else
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+            .addReg(FramePtr);
+      }
+    } else if (NumBytes)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+    // Increment past our save areas.
+    if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+    if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+    if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+  }
+
+  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+      RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+    // Tail call return: adjust the stack pointer and jump to callee.
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+    // Jump to label or value in register.
+    if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+      unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+        ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+        : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (RetOpcode == ARM::TCRETURNri) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    } else if (RetOpcode == ARM::TCRETURNriND) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+  }
+
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info.  It's the same as what we use for resolving the code-gen
+/// references for now.  FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+                                         unsigned &FrameReg) const {
+  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+                                             int FI,
+                                             unsigned &FrameReg,
+                                             int SPAdj) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+  bool isFixed = MFI->isFixedObjectIndex(FI);
+
+  FrameReg = ARM::SP;
+  Offset += SPAdj;
+  if (AFI->isGPRCalleeSavedArea1Frame(FI))
+    return Offset - AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+    return Offset - AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+    return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+  // When dynamically realigning the stack, use the frame pointer for
+  // parameters, and the stack/base pointer for locals.
+  if (RegInfo->needsStackRealignment(MF)) {
+    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+    if (isFixed) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      Offset = FPOffset;
+    } else if (MFI->hasVarSizedObjects()) {
+      assert(RegInfo->hasBasePointer(MF) &&
+             "VLAs and dynamic stack alignment, but missing base pointer!");
+      FrameReg = RegInfo->getBaseRegister();
+    }
+    return Offset;
+  }
+
+  // If there is a frame pointer, use it when we can.
+  if (hasFP(MF) && AFI->hasStackFrame()) {
+    // Use frame pointer to reference fixed objects. Use it for locals if
+    // there are VLAs (and thus the SP isn't reliable as a base).
+    if (isFixed || (MFI->hasVarSizedObjects() &&
+                    !RegInfo->hasBasePointer(MF))) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    } else if (MFI->hasVarSizedObjects()) {
+      assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+      // Try to use the frame pointer if we can, else use the base pointer
+      // since it's available. This is handy for the emergency spill slot, in
+      // particular.
+      if (AFI->isThumb2Function()) {
+        if (FPOffset >= -255 && FPOffset < 0) {
+          FrameReg = RegInfo->getFrameRegister(MF);
+          return FPOffset;
+        }
+      } else
+        FrameReg = RegInfo->getBaseRegister();
+    } else if (AFI->isThumb2Function()) {
+      // In Thumb2 mode, the negative offset is very limited. Try to avoid
+      // out of range references.
+      if (FPOffset >= -255 && FPOffset < 0) {
+        FrameReg = RegInfo->getFrameRegister(MF);
+        return FPOffset;
+      }
+    } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+      // Otherwise, use SP or FP, whichever is closer to the stack slot.
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    }
+  }
+  // Use the base pointer if we have one.
+  if (RegInfo->hasBasePointer(MF))
+    FrameReg = RegInfo->getBaseRegister();
+  return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
+  unsigned FrameReg;
+  return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    const std::vector<CalleeSavedInfo> &CSI,
+                                    unsigned StmOpc, unsigned StrOpc,
+                                    bool NoGap,
+                                    bool(*Func)(unsigned, bool)) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  SmallVector<std::pair<unsigned,bool>, 4> Regs;
+  unsigned i = CSI.size();
+  while (i != 0) {
+    unsigned LastReg = 0;
+    for (; i != 0; --i) {
+      unsigned Reg = CSI[i-1].getReg();
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+      // Add the callee-saved register as live-in unless it's LR and
+      // @llvm.returnaddress is called. If LR is returned for
+      // @llvm.returnaddress then it's already added to the function and
+      // entry block live-in sets.
+      bool isKill = true;
+      if (Reg == ARM::LR) {
+        if (MF.getFrameInfo()->isReturnAddressTaken() &&
+            MF.getRegInfo().isLiveIn(Reg))
+          isKill = false;
+      }
+
+      if (isKill)
+        MBB.addLiveIn(Reg);
+
+      // If NoGap is true, push consecutive registers and then leave the rest
+      // for other instructions. e.g.
+      // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+      if (NoGap && LastReg && LastReg != Reg-1)
+        break;
+      LastReg = Reg;
+      Regs.push_back(std::make_pair(Reg, isKill));
+    }
+
+    if (Regs.empty())
+      continue;
+    if (Regs.size() > 1 || StrOpc== 0) {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+                       .addReg(ARM::SP));
+      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+        MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+    } else if (Regs.size() == 1) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+                                        ARM::SP)
+        .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+        .addReg(ARM::SP);
+      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+      // that refactoring is complete (eventually).
+      if (StrOpc == ARM::STR_PRE) {
+        MIB.addReg(0);
+        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
+      } else
+        MIB.addImm(-4);
+      AddDefaultPred(MIB);
+    }
+    Regs.clear();
+  }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   unsigned LdmOpc, unsigned LdrOpc,
+                                   bool isVarArg, bool NoGap,
+                                   bool(*Func)(unsigned, bool)) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RetOpcode = MI->getOpcode();
+  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+                     RetOpcode == ARM::TCRETURNdiND ||
+                     RetOpcode == ARM::TCRETURNri ||
+                     RetOpcode == ARM::TCRETURNriND);
+
+  SmallVector<unsigned, 4> Regs;
+  unsigned i = CSI.size();
+  while (i != 0) {
+    unsigned LastReg = 0;
+    bool DeleteRet = false;
+    for (; i != 0; --i) {
+      unsigned Reg = CSI[i-1].getReg();
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+        Reg = ARM::PC;
+        LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+        // Fold the return instruction into the LDM.
+        DeleteRet = true;
+      }
+
+      // If NoGap is true, pop consecutive registers and then leave the rest
+      // for other instructions. e.g.
+      // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+      if (NoGap && LastReg && LastReg != Reg-1)
+        break;
+
+      LastReg = Reg;
+      Regs.push_back(Reg);
+    }
+
+    if (Regs.empty())
+      continue;
+    if (Regs.size() > 1 || LdrOpc == 0) {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+                       .addReg(ARM::SP));
+      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+        MIB.addReg(Regs[i], getDefRegState(true));
+      if (DeleteRet)
+        MI->eraseFromParent();
+      MI = MIB;
+    } else if (Regs.size() == 1) {
+      // If we adjusted the reg to PC from LR above, switch it back here. We
+      // only do that for LDM.
+      if (Regs[0] == ARM::PC)
+        Regs[0] = ARM::LR;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+          .addReg(ARM::SP, RegState::Define)
+          .addReg(ARM::SP);
+      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+      // that refactoring is complete (eventually).
+      if (LdrOpc == ARM::LDR_POST) {
+        MIB.addReg(0);
+        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+      } else
+        MIB.addImm(4);
+      AddDefaultPred(MIB);
+    }
+    Regs.clear();
+  }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+  unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+  unsigned FltOpc = ARM::VSTMDDB_UPD;
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register);
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register);
+  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register);
+
+  return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+  unsigned FltOpc = ARM::VLDMDIA_UPD;
+  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+              &isARMArea2Register);
+  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+              &isARMArea1Register);
+
+  return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+                                       const ARMBaseInstrInfo &TII) {
+  unsigned FnSize = 0;
+  for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    const MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+         I != E; ++I)
+      FnSize += TII.GetInstSizeInBytes(I);
+  }
+  return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  int Offset = 0;
+  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -FFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+    if (FFI->isDeadObjectIndex(i))
+      continue;
+    Offset += FFI->getObjectSize(i);
+    unsigned Align = FFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+  }
+  return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+                                         const TargetFrameLowering *TFI) {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned Limit = (1 << 12) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        // When using ADDri to get the address of a stack object, 255 is the
+        // largest offset guaranteed to fit in the immediate offset.
+        if (I->getOpcode() == ARM::ADDri) {
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        }
+
+        // Otherwise check the addressing mode.
+        switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+        case ARMII::AddrMode3:
+        case ARMII::AddrModeT2_i8:
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode5:
+        case ARMII::AddrModeT2_i8s4:
+          Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+          break;
+        case ARMII::AddrModeT2_i12:
+          // i12 supports only positive offset so these will be converted to
+          // i8 opcodes. See llvm::rewriteT2FrameIndex.
+          if (TFI->hasFP(MF) && AFI->hasStackFrame())
+            Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode4:
+        case ARMII::AddrMode6:
+          // Addressing modes 4 & 6 (load/store) instructions can't encode an
+          // immediate offset for stack references.
+          return 0;
+        default:
+          break;
+        }
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // This tells PEI to spill the FP as if it is any other callee-save register
+  // to take advantage the eliminateFrameIndex machinery. This also ensures it
+  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+  // to combine multiple loads / stores.
+  bool CanEliminateFrame = true;
+  bool CS1Spilled = false;
+  bool LRSpilled = false;
+  unsigned NumGPRSpills = 0;
+  SmallVector<unsigned, 4> UnspilledCS1GPRs;
+  SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+  // since it's not always possible to restore sp from fp in a single
+  // instruction.
+  // FIXME: It will be better just to find spare register here.
+  if (AFI->isThumb2Function() &&
+      (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+    MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+  if (AFI->isThumb1OnlyFunction()) {
+    // Spill LR if Thumb1 function uses variable length argument lists.
+    if (AFI->getVarArgsRegSaveSize() > 0)
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+    // Spill R4 if Thumb1 epilogue has to restore SP from FP since 
+    // FIXME: It will be better just to find spare register here.
+    if (MFI->hasVarSizedObjects())
+      MF.getRegInfo().setPhysRegUsed(ARM::R4);
+  }
+
+  // Spill the BasePtr if it's used.
+  if (RegInfo->hasBasePointer(MF))
+    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+  // Don't spill FP if the frame can be eliminated. This is determined
+  // by scanning the callee-save registers to see if any is used.
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    bool Spilled = false;
+    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+      Spilled = true;
+      CanEliminateFrame = false;
+    } else {
+      // Check alias registers too.
+      for (const unsigned *Aliases =
+             RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+          Spilled = true;
+          CanEliminateFrame = false;
+        }
+      }
+    }
+
+    if (!ARM::GPRRegisterClass->contains(Reg))
+      continue;
+
+    if (Spilled) {
+      NumGPRSpills++;
+
+      if (!STI.isTargetDarwin()) {
+        if (Reg == ARM::LR)
+          LRSpilled = true;
+        CS1Spilled = true;
+        continue;
+      }
+
+      // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+      switch (Reg) {
+      case ARM::LR:
+        LRSpilled = true;
+        // Fallthrough
+      case ARM::R4: case ARM::R5:
+      case ARM::R6: case ARM::R7:
+        CS1Spilled = true;
+        break;
+      default:
+        break;
+      }
+    } else {
+      if (!STI.isTargetDarwin()) {
+        UnspilledCS1GPRs.push_back(Reg);
+        continue;
+      }
+
+      switch (Reg) {
+      case ARM::R4: case ARM::R5:
+      case ARM::R6: case ARM::R7:
+      case ARM::LR:
+        UnspilledCS1GPRs.push_back(Reg);
+        break;
+      default:
+        UnspilledCS2GPRs.push_back(Reg);
+        break;
+      }
+    }
+  }
+
+  bool ForceLRSpill = false;
+  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+    // Force LR to be spilled if the Thumb function size is > 2048. This enables
+    // use of BL to implement far jump. If it turns out that it's not needed
+    // then the branch fix up path will undo it.
+    if (FnSize >= (1 << 11)) {
+      CanEliminateFrame = false;
+      ForceLRSpill = true;
+    }
+  }
+
+  // If any of the stack slot references may be out of range of an immediate
+  // offset, make sure a register (or a spill slot) is available for the
+  // register scavenger. Note that if we're indexing off the frame pointer, the
+  // effective stack size is 4 bytes larger since the FP points to the stack
+  // slot of the previous FP. Also, if we have variable sized objects in the
+  // function, stack slot references will often be negative, and some of
+  // our instructions are positive-offset only, so conservatively consider
+  // that case to want a spill slot (or register) as well. Similarly, if
+  // the function adjusts the stack pointer during execution and the
+  // adjustments aren't already part of our stack size estimate, our offset
+  // calculations may be off, so be conservative.
+  // FIXME: We could add logic to be more precise about negative offsets
+  //        and which instructions will need a scratch register for them. Is it
+  //        worth the effort and added fragility?
+  bool BigStack =
+    (RS &&
+     (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+      estimateRSStackSizeLimit(MF, this)))
+    || MFI->hasVarSizedObjects()
+    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+  bool ExtraCSSpill = false;
+  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+    AFI->setHasStackFrame(true);
+
+    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+    if (!LRSpilled && CS1Spilled) {
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+      NumGPRSpills++;
+      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+      ForceLRSpill = false;
+      ExtraCSSpill = true;
+    }
+
+    if (hasFP(MF)) {
+      MF.getRegInfo().setPhysRegUsed(FramePtr);
+      NumGPRSpills++;
+    }
+
+    // If stack and double are 8-byte aligned and we are spilling an odd number
+    // of GPRs, spill one extra callee save GPR so we won't have to pad between
+    // the integer and double callee save areas.
+    unsigned TargetAlign = getStackAlignment();
+    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+          unsigned Reg = UnspilledCS1GPRs[i];
+          // Don't spill high register if the function is thumb1
+          if (!AFI->isThumb1OnlyFunction() ||
+              isARMLowRegister(Reg) || Reg == ARM::LR) {
+            MF.getRegInfo().setPhysRegUsed(Reg);
+            if (!RegInfo->isReservedReg(MF, Reg))
+              ExtraCSSpill = true;
+            break;
+          }
+        }
+      } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+        unsigned Reg = UnspilledCS2GPRs.front();
+        MF.getRegInfo().setPhysRegUsed(Reg);
+        if (!RegInfo->isReservedReg(MF, Reg))
+          ExtraCSSpill = true;
+      }
+    }
+
+    // Estimate if we might need to scavenge a register at some point in order
+    // to materialize a stack offset. If so, either spill one additional
+    // callee-saved register or reserve a special spill slot to facilitate
+    // register scavenging. Thumb1 needs a spill slot for stack pointer
+    // adjustments also, even when the frame itself is small.
+    if (BigStack && !ExtraCSSpill) {
+      // If any non-reserved CS register isn't spilled, just spill one or two
+      // extra. That should take care of it!
+      unsigned NumExtras = TargetAlign / 4;
+      SmallVector<unsigned, 2> Extras;
+      while (NumExtras && !UnspilledCS1GPRs.empty()) {
+        unsigned Reg = UnspilledCS1GPRs.back();
+        UnspilledCS1GPRs.pop_back();
+        if (!RegInfo->isReservedReg(MF, Reg) &&
+            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+             Reg == ARM::LR)) {
+          Extras.push_back(Reg);
+          NumExtras--;
+        }
+      }
+      // For non-Thumb1 functions, also check for hi-reg CS registers
+      if (!AFI->isThumb1OnlyFunction()) {
+        while (NumExtras && !UnspilledCS2GPRs.empty()) {
+          unsigned Reg = UnspilledCS2GPRs.back();
+          UnspilledCS2GPRs.pop_back();
+          if (!RegInfo->isReservedReg(MF, Reg)) {
+            Extras.push_back(Reg);
+            NumExtras--;
+          }
+        }
+      }
+      if (Extras.size() && NumExtras == 0) {
+        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+          MF.getRegInfo().setPhysRegUsed(Extras[i]);
+        }
+      } else if (!AFI->isThumb1OnlyFunction()) {
+        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
+        // closest to SP or frame pointer.
+        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                           RC->getAlignment(),
+                                                           false));
+      }
+    }
+  }
+
+  if (ForceLRSpill) {
+    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+    AFI->setLRIsSpilledForFarJump(true);
+  }
+}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 000000000000..1288b706c599
--- /dev/null
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,74 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+  const ARMSubtarget &STI;
+
+public:
+  explicit ARMFrameLowering(const ARMSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+  bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const;
+  int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+                                 unsigned &FrameReg, int SPAdj) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+ private:
+  void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                    const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+                    unsigned StrOpc, bool NoGap,
+                    bool(*Func)(unsigned, bool)) const;
+  void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+                   unsigned LdrOpc, bool isVarArg, bool NoGap,
+                   bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
index 85b0c6c248d0..3f0238387a2b 100644
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -12,7 +12,8 @@
 // global). Such a transformation can significantly reduce the register pressure
 // when many globals are involved.
 //
-// For example, consider the code which touches several global variables at once:
+// For example, consider the code which touches several global variables at 
+// once:
 //
 // static int foo[N], bar[N], baz[N];
 //
@@ -48,7 +49,7 @@
 //  str     r0, [r5], #4
 //
 //  note that we saved 2 registers here almostly "for free".
-// ===----------------------------------------------------------------------===//
+// ===---------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "arm-global-merge"
 #include "ARM.h"
@@ -64,16 +65,17 @@
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 using namespace llvm;
 
 namespace {
-  class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+  class ARMGlobalMerge : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// target type sizes.
     const TargetLowering *TLI;
 
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                 Module &M, bool) const;
+                 Module &M, bool isConst) const;
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
@@ -81,7 +83,7 @@ namespace {
       : FunctionPass(ID), TLI(tli) {}
 
     virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function& F);
+    virtual bool runOnFunction(Function &F);
 
     const char *getPassName() const {
       return "Merge internal globals";
@@ -95,13 +97,11 @@ namespace {
     struct GlobalCmp {
       const TargetData *TD;
 
-      GlobalCmp(const TargetData *td):
-        TD(td) { }
+      GlobalCmp(const TargetData *td) : TD(td) { }
 
-      bool operator() (const GlobalVariable* GV1,
-                       const GlobalVariable* GV2) {
-        const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
-        const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+      bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+        const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+        const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
 
         return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
       }
@@ -130,27 +130,27 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
     uint64_t MergedSize = 0;
     std::vector<const Type*> Tys;
     std::vector<Constant*> Inits;
-    for (j = i; MergedSize < MaxOffset && j != e; ++j) {
-      const Type* Ty = Globals[j]->getType()->getElementType();
+    for (j = i; j != e; ++j) {
+      const Type *Ty = Globals[j]->getType()->getElementType();
+      MergedSize += TD->getTypeAllocSize(Ty);
+      if (MergedSize > MaxOffset) {
+        break;
+      }
       Tys.push_back(Ty);
       Inits.push_back(Globals[j]->getInitializer());
-      MergedSize += TD->getTypeAllocSize(Ty);
     }
 
-    StructType* MergedTy = StructType::get(M.getContext(), Tys);
-    Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
-    GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+    StructType *MergedTy = StructType::get(M.getContext(), Tys);
+    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+    GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
                                                   GlobalValue::InternalLinkage,
-                                                  MergedInit, "merged");
+                                                  MergedInit, "_MergedGlobals");
     for (size_t k = i; k < j; ++k) {
-      SmallVector<Constant*, 2> Idx;
-      Idx.push_back(ConstantInt::get(Int32Ty, 0));
-      Idx.push_back(ConstantInt::get(Int32Ty, k-i));
-
-      Constant* GEP =
-        ConstantExpr::getInBoundsGetElementPtr(MergedGV,
-                                               &Idx[0], Idx.size());
-
+      Constant *Idx[2] = {
+        ConstantInt::get(Int32Ty, 0),
+        ConstantInt::get(Int32Ty, k-i)
+      };
+      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
     }
@@ -161,8 +161,8 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
 }
 
 
-bool ARMGlobalMerge::doInitialization(Module& M) {
-  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
+bool ARMGlobalMerge::doInitialization(Module &M) {
+  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
   const TargetData *TD = TLI->getTargetData();
   unsigned MaxOffset = TLI->getMaximalGlobalOffset();
   bool Changed = false;
@@ -183,8 +183,11 @@ bool ARMGlobalMerge::doInitialization(Module& M) {
         I->getName().startswith(".llvm."))
       continue;
 
-    if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
-      if (I->isConstant())
+    if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) {
+      const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+      if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+        BSSGlobals.push_back(I);
+      else if (I->isConstant())
         ConstGlobals.push_back(I);
       else
         Globals.push_back(I);
@@ -193,17 +196,19 @@ bool ARMGlobalMerge::doInitialization(Module& M) {
 
   if (Globals.size() > 1)
     Changed |= doMerge(Globals, M, false);
+  if (BSSGlobals.size() > 1)
+    Changed |= doMerge(BSSGlobals, M, false);
+
   // FIXME: This currently breaks the EH processing due to way how the 
   // typeinfo detection works. We might want to detect the TIs and ignore 
   // them in the future.
-  
   // if (ConstGlobals.size() > 1)
   //  Changed |= doMerge(ConstGlobals, M, true);
 
   return Changed;
 }
 
-bool ARMGlobalMerge::runOnFunction(Function& F) {
+bool ARMGlobalMerge::runOnFunction(Function &F) {
   return false;
 }
 
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 000000000000..676b01e91c53
--- /dev/null
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,121 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+                         const TargetRegisterInfo &TRI) {
+  // FIXME: Detect integer instructions properly.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+  if (Domain == ARMII::DomainVFP) {
+    unsigned Opcode = MI->getOpcode();
+    if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
+        Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+      return false;
+  } else if (Domain == ARMII::DomainNEON) {
+    if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
+      return false;
+  } else
+    return false;
+  return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+  MachineInstr *MI = SU->getInstr();
+
+  if (!MI->isDebugValue()) {
+    if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+      return Hazard;
+
+    // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+    // a VMLA / VMLS will cause 4 cycle stall.
+    const TargetInstrDesc &TID = MI->getDesc();
+    if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+      MachineInstr *DefMI = LastMI;
+      const TargetInstrDesc &LastTID = LastMI->getDesc();
+      // Skip over one non-VFP / NEON instruction.
+      if (!LastTID.isBarrier() &&
+          (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+        MachineBasicBlock::iterator I = LastMI;
+        if (I != LastMI->getParent()->begin()) {
+          I = llvm::prior(I);
+          DefMI = &*I;
+        }
+      }
+
+      if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+          (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+           hasRAWHazard(DefMI, MI, TRI))) {
+        // Try to schedule another instruction for the next 4 cycles.
+        if (FpMLxStalls == 0)
+          FpMLxStalls = 4;
+        return Hazard;
+      }
+    }
+  }
+
+  return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+  LastMI = 0;
+  FpMLxStalls = 0;
+  ITBlockSize = 0;
+  ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+  MachineInstr *MI = SU->getInstr();
+  unsigned Opcode = MI->getOpcode();
+  if (ITBlockSize) {
+    --ITBlockSize;
+  } else if (Opcode == ARM::t2IT) {
+    unsigned Mask = MI->getOperand(1).getImm();
+    unsigned NumTZ = CountTrailingZeros_32(Mask);
+    assert(NumTZ <= 3 && "Invalid IT mask!");
+    ITBlockSize = 4 - NumTZ;
+    MachineBasicBlock::iterator I = MI;
+    for (unsigned i = 0; i < ITBlockSize; ++i) {
+      // Advance to the next instruction, skipping any dbg_value instructions.
+      do {
+        ++I;
+      } while (I->isDebugValue());
+      ITBlockMIs[ITBlockSize-1-i] = &*I;
+    }
+  }
+
+  if (!MI->isDebugValue()) {
+    LastMI = MI;
+    FpMLxStalls = 0;
+  }
+
+  ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+  if (FpMLxStalls && --FpMLxStalls == 0)
+    // Stalled for 4 cycles but still can't schedule any other instructions.
+    LastMI = 0;
+  ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+  llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 000000000000..2bc218d8566b
--- /dev/null
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+  const ARMBaseInstrInfo &TII;
+  const ARMBaseRegisterInfo &TRI;
+  const ARMSubtarget &STI;
+
+  MachineInstr *LastMI;
+  unsigned FpMLxStalls;
+  unsigned ITBlockSize;  // No. of MIs in current IT block yet to be scheduled.
+  MachineInstr *ITBlockMIs[4];
+
+public:
+  ARMHazardRecognizer(const InstrItineraryData *ItinData,
+                      const ARMBaseInstrInfo &tii,
+                      const ARMBaseRegisterInfo &tri,
+                      const ARMSubtarget &sti,
+                      const ScheduleDAG *DAG) :
+    ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+    TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void Reset();
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 51a30c158dd1..a506cffdba34 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -13,6 +13,7 @@
 
 #define DEBUG_TYPE "arm-isel"
 #include "ARM.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMAddressingModes.h"
 #include "ARMTargetMachine.h"
 #include "llvm/CallingConv.h"
@@ -41,13 +42,25 @@ DisableShifterOp("disable-shifter-op", cl::Hidden,
   cl::desc("Disable isel of shifter-op"),
   cl::init(false));
 
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+  cl::desc("Check fp vmla / vmls hazard at isel time"),
+  cl::init(false));
+
 //===--------------------------------------------------------------------===//
 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
 /// instructions for SelectionDAG operations.
 ///
 namespace {
+
+enum AddrMode2Type {
+  AM2_BASE, // Simple AM2 (+-imm12)
+  AM2_SHOP  // Shifter-op AM2
+};
+
 class ARMDAGToDAGISel : public SelectionDAGISel {
   ARMBaseTargetMachine &TM;
+  const ARMBaseInstrInfo *TII;
 
   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
   /// make the right decision when generating code for different targets.
@@ -57,7 +70,8 @@ public:
   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
                            CodeGenOpt::Level OptLevel)
     : SelectionDAGISel(tm, OptLevel), TM(tm),
-    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+      TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+      Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
   }
 
   virtual const char *getPassName() const {
@@ -72,60 +86,101 @@ public:
 
   SDNode *Select(SDNode *N);
 
-  bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
+
+  bool hasNoVMLxHazardUse(SDNode *N) const;
+  bool isShifterOpProfitable(const SDValue &Shift,
+                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+  bool SelectShifterOperandReg(SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
-  bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
-                       SDValue &Offset, SDValue &Opc);
+  bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+                                    SDValue &B, SDValue &C);
+  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+                                      SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+                           SDValue &Opc) {
+    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+  }
+
+  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+                           SDValue &Opc) {
+    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+  }
+
+  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+                       SDValue &Opc) {
+    SelectAddrMode2Worker(N, Base, Offset, Opc);
+//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+    // This always matches one way or another.
+    return true;
+  }
+
   bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base,
+  bool SelectAddrMode3(SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr,
-                       SDValue &Mode);
-  bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
+  bool SelectAddrMode5(SDValue N, SDValue &Base,
                        SDValue &Offset);
-  bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
-
-  bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
-                        SDValue &Label);
-
-  bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base,
-                             SDValue &Offset);
-  bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale,
-                              SDValue &Base, SDValue &OffImm,
-                              SDValue &Offset);
-  bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base,
-                             SDValue &OffImm, SDValue &Offset);
-  bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base,
-                             SDValue &OffImm, SDValue &Offset);
-  bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base,
-                             SDValue &OffImm, SDValue &Offset);
-  bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base,
-                             SDValue &OffImm);
-
-  bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
+  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+
+  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+  // Thumb Addressing Modes:
+  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+                             unsigned Scale);
+  bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+                                SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+  // Thumb 2 Addressing Modes:
+  bool SelectT2ShifterOperandReg(SDValue N,
                                  SDValue &BaseReg, SDValue &Opc);
-  bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base,
-                             SDValue &OffImm);
-  bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base,
+  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
                             SDValue &OffImm);
   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
                                  SDValue &OffImm);
-  bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base,
-                              SDValue &OffImm);
-  bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
+  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
                              SDValue &OffReg, SDValue &ShImm);
 
+  inline bool is_so_imm(unsigned Imm) const {
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  }
+
+  inline bool is_so_imm_not(unsigned Imm) const {
+    return ARM_AM::getSOImmVal(~Imm) != -1;
+  }
+
+  inline bool is_t2_so_imm(unsigned Imm) const {
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  }
+
+  inline bool is_t2_so_imm_not(unsigned Imm) const {
+    return ARM_AM::getT2SOImmVal(~Imm) != -1;
+  }
+
   inline bool Pred_so_imm(SDNode *inN) const {
     ConstantSDNode *N = cast<ConstantSDNode>(inN);
-    return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+    return is_so_imm(N->getZExtValue());
   }
 
   inline bool Pred_t2_so_imm(SDNode *inN) const {
     ConstantSDNode *N = cast<ConstantSDNode>(inN);
-    return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+    return is_t2_so_imm(N->getZExtValue());
   }
 
   // Include the pieces autogenerated from the target description.
@@ -141,22 +196,30 @@ private:
   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// loads of D registers and even subregs and odd subregs of Q registers.
   /// For NumVecs <= 2, QOpcodes1 is not used.
-  SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
+  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+                    unsigned *DOpcodes,
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
   /// SelectVST - Select NEON store intrinsics.  NumVecs should
   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// stores of D registers and even subregs and odd subregs of Q registers.
   /// For NumVecs <= 2, QOpcodes1 is not used.
-  SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
+  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+                    unsigned *DOpcodes,
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
-  /// load/store of D registers and even subregs and odd subregs of Q registers.
-  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
-                          unsigned *DOpcodes, unsigned *QOpcodes0,
-                          unsigned *QOpcodes1);
+  /// load/store of D registers and Q registers.
+  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+                          bool isUpdating, unsigned NumVecs,
+                          unsigned *DOpcodes, unsigned *QOpcodes);
+
+  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
+  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
+  /// for loading D registers.  (Q registers are not supported.)
+  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+                       unsigned *Opcodes);
 
   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
@@ -174,10 +237,10 @@ private:
   SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                                ARMCC::CondCodes CCVal, SDValue CCR,
                                SDValue InFlag);
-  SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+  SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                               ARMCC::CondCodes CCVal, SDValue CCR,
                               SDValue InFlag);
-  SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+  SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                                ARMCC::CondCodes CCVal, SDValue CCR,
                                SDValue InFlag);
 
@@ -199,9 +262,8 @@ private:
   SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
   SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 
-  // Form sequences of 8 consecutive D registers.
-  SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
-                    SDValue V4, SDValue V5, SDValue V6, SDValue V7);
+  // Get the alignment operand for a NEON VLD or VST instruction.
+  SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
 };
 }
 
@@ -229,9 +291,85 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 }
 
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+                                    int RangeMin, int RangeMax,
+                                    int &ScaledConstant) {
+  assert(Scale && "Invalid scale!");
+
+  // Check that this is a constant.
+  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+  if (!C)
+    return false;
+
+  ScaledConstant = (int) C->getZExtValue();
+  if ((ScaledConstant % Scale) != 0)
+    return false;
+
+  ScaledConstant /= Scale;
+  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+  if (OptLevel == CodeGenOpt::None)
+    return true;
+
+  if (!CheckVMLxHazard)
+    return true;
+
+  if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+    return true;
+
+  if (!N->hasOneUse())
+    return false;
+
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg)
+    return true;
+  if (Use->isMachineOpcode()) {
+    const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode());
+    if (TID.mayStore())
+      return true;
+    unsigned Opcode = TID.getOpcode();
+    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+      return true;
+    // vmlx feeding into another vmlx. We actually want to unfold
+    // the use later in the MLxExpansion pass. e.g.
+    // vmla
+    // vmla (stall 8 cycles)
+    //
+    // vmul (5 cycles)
+    // vadd (5 cycles)
+    // vmla
+    // This adds up to about 18 - 19 cycles.
+    //
+    // vmla
+    // vmul (stall 4 cycles)
+    // vadd adds up to about 14 cycles.
+    return TII->isFpMLxInstruction(Opcode);
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+                                            ARM_AM::ShiftOpc ShOpcVal,
+                                            unsigned ShAmt) {
+  if (!Subtarget->isCortexA9())
+    return true;
+  if (Shift.hasOneUse())
+    return true;
+  // R << 2 is free.
+  return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
 
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
-                                              SDValue N,
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &ShReg,
                                               SDValue &Opc) {
@@ -251,16 +389,92 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
     ShImmVal = RHS->getZExtValue() & 31;
   } else {
     ShReg = N.getOperand(1);
+    if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+      return false;
   }
   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
                                   MVT::i32);
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
-                                      SDValue &Base, SDValue &Offset,
+bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
+                                                   SDValue &BaseReg,
+                                                   SDValue &ShReg,
+                                                   SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  // Do not check isShifterOpProfitable. This must return true.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ISD::FrameIndex) {
+      // Match frame index.
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      return true;
+    }
+    
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+                     N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else
+      Base = N;
+    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+      Base   = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
-  if (N.getOpcode() == ISD::MUL) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
       // X * [3,5,9] -> X + X * [2,4,8] etc.
       int RHSC = (int)RHS->getZExtValue();
@@ -283,7 +497,114 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
     }
   }
 
-  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      // ISD::OR that is equivalent to an ISD::ADD.
+      !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Leave simple R +/- imm12 operands for LDRi12
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
+      return false;
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse())
+    // Compute R +/- (R << N) and reuse it.
+    return false;
+
+  // Otherwise this is R +/- [possibly shifted] R.
+  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getZExtValue();
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+                                                     SDValue &Base,
+                                                     SDValue &Offset,
+                                                     SDValue &Opc) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      // X * [3,5,9] -> X + X * [2,4,8] etc.
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC & 1) {
+        RHSC = RHSC & ~1;
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        if (isPowerOf2_32(RHSC)) {
+          unsigned ShAmt = Log2_32(RHSC);
+          Base = Offset = N.getOperand(0);
+          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+                                                            ARM_AM::lsl),
+                                          MVT::i32);
+          return AM2_SHOP;
+        }
+      }
+    }
+  }
+
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      // ISD::OR that is equivalent to an ADD.
+      !CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -297,36 +618,45 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
                                                       ARM_AM::no_shift),
                                     MVT::i32);
-    return true;
+    return AM2_BASE;
   }
 
   // Match simple R +/- imm12 operands.
-  if (N.getOpcode() == ISD::ADD)
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getZExtValue();
-      if ((RHSC >= 0 && RHSC < 0x1000) ||
-          (RHSC < 0 && RHSC > -0x1000)) { // 12 bits.
-        Base = N.getOperand(0);
-        if (Base.getOpcode() == ISD::FrameIndex) {
-          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-        }
-        Offset = CurDAG->getRegister(0, MVT::i32);
+  if (N.getOpcode() != ISD::SUB) {
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      Offset = CurDAG->getRegister(0, MVT::i32);
 
-        ARM_AM::AddrOpc AddSub = ARM_AM::add;
-        if (RHSC < 0) {
-          AddSub = ARM_AM::sub;
-          RHSC = - RHSC;
-        }
-        Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
-                                                          ARM_AM::no_shift),
-                                        MVT::i32);
-        return true;
+      ARM_AM::AddrOpc AddSub = ARM_AM::add;
+      if (RHSC < 0) {
+        AddSub = ARM_AM::sub;
+        RHSC = - RHSC;
       }
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+                                                        ARM_AM::no_shift),
+                                      MVT::i32);
+      return AM2_BASE;
     }
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R +/- (R << N) and reuse it.
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
 
   // Otherwise this is R +/- [possibly shifted] R.
-  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
   unsigned ShAmt = 0;
 
@@ -339,14 +669,20 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
     if (ConstantSDNode *Sh =
            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
       ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(1).getOperand(0);
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
     } else {
       ShOpcVal = ARM_AM::no_shift;
     }
   }
 
   // Try matching (R shl C) + (R).
-  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
     if (ShOpcVal != ARM_AM::no_shift) {
       // Check to see if the RHS of the shift is a constant, if not, we can't
@@ -354,8 +690,15 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
       if (ConstantSDNode *Sh =
           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
         ShAmt = Sh->getZExtValue();
-        Offset = N.getOperand(0).getOperand(0);
-        Base = N.getOperand(1);
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
       } else {
         ShOpcVal = ARM_AM::no_shift;
       }
@@ -364,7 +707,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
 
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
                                   MVT::i32);
-  return true;
+  return AM2_SHOP;
 }
 
 bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
@@ -375,15 +718,13 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
     : cast<StoreSDNode>(Op)->getAddressingMode();
   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
     ? ARM_AM::add : ARM_AM::sub;
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
-    int Val = (int)C->getZExtValue();
-    if (Val >= 0 && Val < 0x1000) { // 12 bits.
-      Offset = CurDAG->getRegister(0, MVT::i32);
-      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
-                                                        ARM_AM::no_shift),
-                                      MVT::i32);
-      return true;
-    }
+  int Val;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return true;
   }
 
   Offset = N;
@@ -394,7 +735,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
     // it.
     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
       ShAmt = Sh->getZExtValue();
-      Offset = N.getOperand(0);
+      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+        Offset = N.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
     } else {
       ShOpcVal = ARM_AM::no_shift;
     }
@@ -406,7 +752,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
 }
 
 
-bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
   if (N.getOpcode() == ISD::SUB) {
@@ -417,7 +763,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
     return true;
   }
 
-  if (N.getOpcode() != ISD::ADD) {
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -429,25 +775,23 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
   }
 
   // If the RHS is +/- imm8, fold into addr mode.
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    int RHSC = (int)RHS->getZExtValue();
-    if ((RHSC >= 0 && RHSC < 256) ||
-        (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
-      Base = N.getOperand(0);
-      if (Base.getOpcode() == ISD::FrameIndex) {
-        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-      }
-      Offset = CurDAG->getRegister(0, MVT::i32);
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                              -256 + 1, 256, RHSC)) { // 8 bits.
+    Base = N.getOperand(0);
+    if (Base.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
 
-      ARM_AM::AddrOpc AddSub = ARM_AM::add;
-      if (RHSC < 0) {
-        AddSub = ARM_AM::sub;
-        RHSC = - RHSC;
-      }
-      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
-      return true;
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (RHSC < 0) {
+      AddSub = ARM_AM::sub;
+      RHSC = -RHSC;
     }
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+    return true;
   }
 
   Base = N.getOperand(0);
@@ -464,13 +808,11 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
     : cast<StoreSDNode>(Op)->getAddressingMode();
   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
     ? ARM_AM::add : ARM_AM::sub;
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
-    int Val = (int)C->getZExtValue();
-    if (Val >= 0 && Val < 256) {
-      Offset = CurDAG->getRegister(0, MVT::i32);
-      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
-      return true;
-    }
+  int Val;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+    return true;
   }
 
   Offset = N;
@@ -478,16 +820,9 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
-                                      SDValue &Addr, SDValue &Mode) {
-  Addr = N;
-  Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
-  return true;
-}
-
-bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
                                       SDValue &Base, SDValue &Offset) {
-  if (N.getOpcode() != ISD::ADD) {
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -503,28 +838,23 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
   }
 
   // If the RHS is +/- imm8, fold into addr mode.
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    int RHSC = (int)RHS->getZExtValue();
-    if ((RHSC & 3) == 0) {  // The constant is implicitly multiplied by 4.
-      RHSC >>= 2;
-      if ((RHSC >= 0 && RHSC < 256) ||
-          (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
-        Base = N.getOperand(0);
-        if (Base.getOpcode() == ISD::FrameIndex) {
-          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-        }
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+                              -256 + 1, 256, RHSC)) {
+    Base = N.getOperand(0);
+    if (Base.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
 
-        ARM_AM::AddrOpc AddSub = ARM_AM::add;
-        if (RHSC < 0) {
-          AddSub = ARM_AM::sub;
-          RHSC = - RHSC;
-        }
-        Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
-                                           MVT::i32);
-        return true;
-      }
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (RHSC < 0) {
+      AddSub = ARM_AM::sub;
+      RHSC = -RHSC;
     }
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+                                       MVT::i32);
+    return true;
   }
 
   Base = N;
@@ -533,30 +863,50 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
-                                      SDValue &Addr, SDValue &Align) {
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+                                      SDValue &Align) {
   Addr = N;
-  // Default to no alignment.
-  Align = CurDAG->getTargetConstant(0, MVT::i32);
+
+  unsigned Alignment = 0;
+  if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+    // The maximum alignment is equal to the memory size being referenced.
+    unsigned LSNAlign = LSN->getAlignment();
+    unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+    if (LSNAlign > MemSize && MemSize > 1)
+      Alignment = MemSize;
+  } else {
+    // All other uses of addrmode6 are for intrinsics.  For now just record
+    // the raw alignment value; it will be refined later based on the legal
+    // alignment operands for the intrinsic.
+    Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+  }
+
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
                                        SDValue &Offset, SDValue &Label) {
   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
     Offset = N.getOperand(0);
     SDValue N1 = N.getOperand(1);
-    Label  = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
-                                       MVT::i32);
+    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                                      MVT::i32);
     return true;
   }
+
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
+
+//===----------------------------------------------------------------------===//
+//                         Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
                                             SDValue &Base, SDValue &Offset){
-  // FIXME dl should come from the parent load or store, not the address
-  if (N.getOpcode() != ISD::ADD) {
+  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
     if (!NC || !NC->isNullValue())
       return false;
@@ -571,82 +921,137 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
 }
 
 bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N,
-                                        unsigned Scale, SDValue &Base,
-                                        SDValue &OffImm, SDValue &Offset) {
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+                                       SDValue &Offset, unsigned Scale) {
+  if (Scale == 4) {
+    SDValue TmpBase, TmpOffImm;
+    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+      return false;  // We want to select tLDRspi / tSTRspi instead.
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+      return false;  // We want to select tLDRpci instead.
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Thumb does not have [sp, r] address mode.
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+  if ((LHSR && LHSR->getReg() == ARM::SP) ||
+      (RHSR && RHSR->getReg() == ARM::SP))
+    return false;
+
+  // FIXME: Why do we explicitly check for a match here and then return false?
+  // Presumably to allow something else to match, but shouldn't this be
+  // documented?
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+    return false;
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+                                          SDValue &Base, SDValue &OffImm) {
   if (Scale == 4) {
     SDValue TmpBase, TmpOffImm;
-    if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
       return false;  // We want to select tLDRspi / tSTRspi instead.
+
     if (N.getOpcode() == ARMISD::Wrapper &&
         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
       return false;  // We want to select tLDRpci instead.
   }
 
-  if (N.getOpcode() != ISD::ADD) {
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
     if (N.getOpcode() == ARMISD::Wrapper &&
         !(Subtarget->useMovt() &&
           N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
-    } else
+    } else {
       Base = N;
+    }
 
-    Offset = CurDAG->getRegister(0, MVT::i32);
     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
 
-  // Thumb does not have [sp, r] address mode.
   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
   if ((LHSR && LHSR->getReg() == ARM::SP) ||
       (RHSR && RHSR->getReg() == ARM::SP)) {
+    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+    unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+    // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+    if (LHSC != 0 || RHSC != 0) return false;
+
     Base = N;
-    Offset = CurDAG->getRegister(0, MVT::i32);
     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
 
   // If the RHS is + imm5 * scale, fold into addr mode.
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    int RHSC = (int)RHS->getZExtValue();
-    if ((RHSC & (Scale-1)) == 0) {  // The constant is implicitly multiplied.
-      RHSC /= Scale;
-      if (RHSC >= 0 && RHSC < 32) {
-        Base = N.getOperand(0);
-        Offset = CurDAG->getRegister(0, MVT::i32);
-        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
-        return true;
-      }
-    }
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+    Base = N.getOperand(0);
+    OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+    return true;
   }
 
   Base = N.getOperand(0);
-  Offset = N.getOperand(1);
   OffImm = CurDAG->getTargetConstant(0, MVT::i32);
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N,
-                                            SDValue &Base, SDValue &OffImm,
-                                            SDValue &Offset) {
-  return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N,
-                                            SDValue &Base, SDValue &OffImm,
-                                            SDValue &Offset) {
-  return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N,
-                                            SDValue &Base, SDValue &OffImm,
-                                            SDValue &Offset) {
-  return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
 }
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
-                                           SDValue &Base, SDValue &OffImm) {
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
@@ -654,35 +1059,35 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
     return true;
   }
 
-  if (N.getOpcode() != ISD::ADD)
+  if (!CurDAG->isBaseWithConstantOffset(N))
     return false;
 
   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
       (LHSR && LHSR->getReg() == ARM::SP)) {
     // If the RHS is + imm8 * scale, fold into addr mode.
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getZExtValue();
-      if ((RHSC & 3) == 0) {  // The constant is implicitly multiplied.
-        RHSC >>= 2;
-        if (RHSC >= 0 && RHSC < 256) {
-          Base = N.getOperand(0);
-          if (Base.getOpcode() == ISD::FrameIndex) {
-            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-            Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-          }
-          OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
-          return true;
-        }
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
       }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
     }
   }
 
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
-                                                SDValue &BaseReg,
+
+//===----------------------------------------------------------------------===//
+//                        Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
                                                 SDValue &Opc) {
   if (DisableShifterOp)
     return false;
@@ -704,19 +1109,22 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
                                             SDValue &Base, SDValue &OffImm) {
   // Match simple R + imm12 operands.
 
   // Base only.
-  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
     if (N.getOpcode() == ISD::FrameIndex) {
-      // Match frame index...
+      // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
-    } else if (N.getOpcode() == ARMISD::Wrapper &&
+    }
+    
+    if (N.getOpcode() == ARMISD::Wrapper &&
                !(Subtarget->useMovt() &&
                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
@@ -729,7 +1137,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
   }
 
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    if (SelectT2AddrModeImm8(Op, N, Base, OffImm))
+    if (SelectT2AddrModeImm8(N, Base, OffImm))
       // Let t2LDRi8 handle (R - imm8).
       return false;
 
@@ -754,24 +1162,26 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
                                            SDValue &Base, SDValue &OffImm) {
   // Match simple R - imm8 operands.
-  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) {
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getSExtValue();
-      if (N.getOpcode() == ISD::SUB)
-        RHSC = -RHSC;
-
-      if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
-        Base = N.getOperand(0);
-        if (Base.getOpcode() == ISD::FrameIndex) {
-          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-        }
-        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
-        return true;
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+  
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getSExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
       }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
     }
   }
 
@@ -784,52 +1194,22 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
     ? cast<LoadSDNode>(Op)->getAddressingMode()
     : cast<StoreSDNode>(Op)->getAddressingMode();
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) {
-    int RHSC = (int)RHS->getZExtValue();
-    if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
-      OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
-        ? CurDAG->getTargetConstant(RHSC, MVT::i32)
-        : CurDAG->getTargetConstant(-RHSC, MVT::i32);
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
-                                             SDValue &Base, SDValue &OffImm) {
-  if (N.getOpcode() == ISD::ADD) {
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getZExtValue();
-      // 8 bits.
-      if (((RHSC & 0x3) == 0) &&
-          ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) {
-        Base   = N.getOperand(0);
-        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
-        return true;
-      }
-    }
-  } else if (N.getOpcode() == ISD::SUB) {
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int RHSC = (int)RHS->getZExtValue();
-      // 8 bits.
-      if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) {
-        Base   = N.getOperand(0);
-        OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
-        return true;
-      }
-    }
+  int RHSC;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+      ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+      : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+    return true;
   }
 
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
                                             SDValue &Base,
                                             SDValue &OffReg, SDValue &ShImm) {
   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
-  if (N.getOpcode() != ISD::ADD)
+  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
     return false;
 
   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
@@ -841,6 +1221,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
       return false;
   }
 
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R + (R << [1,2,3]) and reuse it.
+    Base = N;
+    return false;
+  }
+
   // Look for (R + R) or (R + (R << [1,2,3])).
   unsigned ShAmt = 0;
   Base   = N.getOperand(0);
@@ -859,11 +1245,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
     // it.
     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
       ShAmt = Sh->getZExtValue();
-      if (ShAmt >= 4) {
+      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+        OffReg = OffReg.getOperand(0);
+      else {
         ShAmt = 0;
         ShOpcVal = ARM_AM::no_shift;
-      } else
-        OffReg = OffReg.getOperand(0);
+      }
     } else {
       ShOpcVal = ARM_AM::no_shift;
     }
@@ -1045,52 +1432,43 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
 }
 
-/// OctoDRegs - Form 8 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
-                                   SDValue V2, SDValue V3,
-                                   SDValue V4, SDValue V5,
-                                   SDValue V6, SDValue V7) {
-  DebugLoc dl = V0.getNode()->getDebugLoc();
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
-  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
-  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
-  SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
-  SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
-  SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
-  SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
-  const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
-                         V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
-}
-
-/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
-/// for a 64-bit subregister of the vector.
-static EVT GetNEONSubregVT(EVT VT) {
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("unhandled NEON type");
-  case MVT::v16i8: return MVT::v8i8;
-  case MVT::v8i16: return MVT::v4i16;
-  case MVT::v4f32: return MVT::v2f32;
-  case MVT::v4i32: return MVT::v2i32;
-  case MVT::v2i64: return MVT::v1i64;
-  }
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction.  The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+                                       bool is64BitVector) {
+  unsigned NumRegs = NumVecs;
+  if (!is64BitVector && NumVecs < 3)
+    NumRegs *= 2;
+
+  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+  if (Alignment >= 32 && NumRegs == 4)
+    Alignment = 32;
+  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+    Alignment = 16;
+  else if (Alignment >= 8)
+    Alignment = 8;
+  else
+    Alignment = 0;
+
+  return CurDAG->getTargetConstant(Alignment, MVT::i32);
 }
 
-SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
                                    unsigned *DOpcodes, unsigned *QOpcodes0,
                                    unsigned *QOpcodes1) {
   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
   EVT VT = N->getValueType(0);
   bool is64BitVector = VT.is64BitVector();
+  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -1120,88 +1498,97 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
       ResTyElts *= 2;
     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
   }
+  std::vector<EVT> ResTys;
+  ResTys.push_back(ResTy);
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
 
   SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-  SDValue SuperReg;
-  if (is64BitVector) {
-    unsigned Opc = DOpcodes[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
-    if (NumVecs == 1)
-      return VLd;
-
-    SuperReg = SDValue(VLd, 0);
-    assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
-      SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
-                                                 dl, VT, SuperReg);
-      ReplaceUses(SDValue(N, Vec), D);
-    }
-    ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
-    return NULL;
-  }
-
-  if (NumVecs <= 2) {
-    // Quad registers are directly supported for VLD1 and VLD2,
-    // loading pairs of D regs.
-    unsigned Opc = QOpcodes0[OpcodeIndex];
-    const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
-    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
-    if (NumVecs == 1)
-      return VLd;
+  SDNode *VLd;
+  SmallVector<SDValue, 7> Ops;
 
-    SuperReg = SDValue(VLd, 0);
-    Chain = SDValue(VLd, 1);
+  // Double registers and VLD1/VLD2 quad registers are directly supported.
+  if (is64BitVector || NumVecs <= 2) {
+    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                    QOpcodes0[OpcodeIndex]);
+    Ops.push_back(MemAddr);
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    }
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
 
   } else {
     // Otherwise, quad registers are loaded with two separate instructions,
     // where one loads the even registers and the other loads the odd registers.
     EVT AddrTy = MemAddr.getValueType();
 
-    // Load the even subregs.
-    unsigned Opc = QOpcodes0[OpcodeIndex];
+    // Load the even subregs.  This is always an updating load, so that it
+    // provides the address to the second load for the odd subregs.
     SDValue ImplDef =
       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
-    SDNode *VLdA =
-      CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+                                          ResTy, AddrTy, MVT::Other, OpsA, 7);
     Chain = SDValue(VLdA, 2);
 
     // Load the odd subregs.
-    Opc = QOpcodes1[OpcodeIndex];
-    const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
-                             Pred, Reg0, Chain };
-    SDNode *VLdB =
-      CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
-    SuperReg = SDValue(VLdB, 0);
-    Chain = SDValue(VLdB, 2);
-  }
-
-  // Extract out the Q registers.
-  assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
-  for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
-    SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
-                                               dl, VT, SuperReg);
-    ReplaceUses(SDValue(N, Vec), Q);
-  }
-  ReplaceUses(SDValue(N, NumVecs), Chain);
+    Ops.push_back(SDValue(VLdA, 1));
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      assert(isa<ConstantSDNode>(Inc.getNode()) &&
+             "only constant post-increment update allowed for VLD3/4");
+      (void)Inc;
+      Ops.push_back(Reg0);
+    }
+    Ops.push_back(SDValue(VLdA, 0));
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                 Ops.data(), Ops.size());
+  }
+
+  if (NumVecs == 1)
+    return VLd;
+
+  // Extract out the subregisters.
+  SDValue SuperReg = SDValue(VLd, 0);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
   return NULL;
 }
 
-SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
                                    unsigned *DOpcodes, unsigned *QOpcodes0,
                                    unsigned *QOpcodes1) {
   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
-  EVT VT = N->getOperand(3).getValueType();
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
   bool is64BitVector = VT.is64BitVector();
+  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -1222,119 +1609,128 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
     break;
   }
 
+  std::vector<EVT> ResTys;
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
   SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-
   SmallVector<SDValue, 7> Ops;
-  Ops.push_back(MemAddr);
-  Ops.push_back(Align);
 
-  if (is64BitVector) {
+  // Double registers and VST1/VST2 quad registers are directly supported.
+  if (is64BitVector || NumVecs <= 2) {
+    SDValue SrcReg;
     if (NumVecs == 1) {
-      Ops.push_back(N->getOperand(3));
-    } else {
-      SDValue RegSeq;
-      SDValue V0 = N->getOperand(0+3);
-      SDValue V1 = N->getOperand(1+3);
-
+      SrcReg = N->getOperand(Vec0Idx);
+    } else if (is64BitVector) {
       // Form a REG_SEQUENCE to force register allocation.
+      SDValue V0 = N->getOperand(Vec0Idx + 0);
+      SDValue V1 = N->getOperand(Vec0Idx + 1);
       if (NumVecs == 2)
-        RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+        SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
       else {
-        SDValue V2 = N->getOperand(2+3);
-        // If it's a vld3, form a quad D-register and leave the last part as 
+        SDValue V2 = N->getOperand(Vec0Idx + 2);
+        // If it's a vst3, form a quad D-register and leave the last part as
         // an undef.
         SDValue V3 = (NumVecs == 3)
           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
-          : N->getOperand(3+3);
-        RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+          : N->getOperand(Vec0Idx + 3);
+        SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
       }
-      Ops.push_back(RegSeq);
-    }
-    Ops.push_back(Pred);
-    Ops.push_back(Reg0); // predicate register
-    Ops.push_back(Chain);
-    unsigned Opc = DOpcodes[OpcodeIndex];
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
-  }
-
-  if (NumVecs <= 2) {
-    // Quad registers are directly supported for VST1 and VST2.
-    unsigned Opc = QOpcodes0[OpcodeIndex];
-    if (NumVecs == 1) {
-      Ops.push_back(N->getOperand(3));
     } else {
       // Form a QQ register.
-      SDValue Q0 = N->getOperand(3);
-      SDValue Q1 = N->getOperand(4);
-      Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
+      SDValue Q0 = N->getOperand(Vec0Idx);
+      SDValue Q1 = N->getOperand(Vec0Idx + 1);
+      SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+    }
+
+    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                    QOpcodes0[OpcodeIndex]);
+    Ops.push_back(MemAddr);
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
     }
+    Ops.push_back(SrcReg);
     Ops.push_back(Pred);
-    Ops.push_back(Reg0); // predicate register
+    Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
   }
 
   // Otherwise, quad registers are stored with two separate instructions,
   // where one stores the even registers and the other stores the odd registers.
 
   // Form the QQQQ REG_SEQUENCE.
-  SDValue V0 = N->getOperand(0+3);
-  SDValue V1 = N->getOperand(1+3);
-  SDValue V2 = N->getOperand(2+3);
+  SDValue V0 = N->getOperand(Vec0Idx + 0);
+  SDValue V1 = N->getOperand(Vec0Idx + 1);
+  SDValue V2 = N->getOperand(Vec0Idx + 2);
   SDValue V3 = (NumVecs == 3)
     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
-    : N->getOperand(3+3);
+    : N->getOperand(Vec0Idx + 3);
   SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
 
-  // Store the even D registers.
-  Ops.push_back(Reg0); // post-access address offset
-  Ops.push_back(RegSeq);
-  Ops.push_back(Pred);
-  Ops.push_back(Reg0); // predicate register
-  Ops.push_back(Chain);
-  unsigned Opc = QOpcodes0[OpcodeIndex];
-  SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), 7);
+  // Store the even D registers.  This is always an updating store, so that it
+  // provides the address to the second store for the odd subregs.
+  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+                                        MemAddr.getValueType(),
+                                        MVT::Other, OpsA, 7);
   Chain = SDValue(VStA, 1);
 
   // Store the odd D registers.
-  Ops[0] = SDValue(VStA, 0); // MemAddr
-  Ops[6] = Chain;
-  Opc = QOpcodes1[OpcodeIndex];
-  SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), 7);
-  Chain = SDValue(VStB, 1);
-  ReplaceUses(SDValue(N, 0), Chain);
-  return NULL;
+  Ops.push_back(SDValue(VStA, 0));
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    assert(isa<ConstantSDNode>(Inc.getNode()) &&
+           "only constant post-increment update allowed for VST3/4");
+    (void)Inc;
+    Ops.push_back(Reg0);
+  }
+  Ops.push_back(RegSeq);
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+  return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                Ops.data(), Ops.size());
 }
 
 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
-                                         unsigned NumVecs, unsigned *DOpcodes,
-                                         unsigned *QOpcodes0,
-                                         unsigned *QOpcodes1) {
+                                         bool isUpdating, unsigned NumVecs,
+                                         unsigned *DOpcodes,
+                                         unsigned *QOpcodes) {
   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
   DebugLoc dl = N->getDebugLoc();
 
   SDValue MemAddr, Align;
-  if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return NULL;
 
   SDValue Chain = N->getOperand(0);
   unsigned Lane =
-    cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue();
-  EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
+    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
   bool is64BitVector = VT.is64BitVector();
 
-  // Quad registers are handled by load/store of subregs. Find the subreg info.
-  unsigned NumElts = 0;
-  bool Even = false;
-  EVT RegVT = VT;
-  if (!is64BitVector) {
-    RegVT = GetNEONSubregVT(VT);
-    NumElts = RegVT.getVectorNumElements();
-    Even = Lane < NumElts;
-  }
+  unsigned Alignment = 0;
+  if (NumVecs != 3) {
+    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    if (Alignment > NumBytes)
+      Alignment = NumBytes;
+    if (Alignment < 8 && Alignment < NumBytes)
+      Alignment = 0;
+    // Alignment must be a power of two; make sure of that.
+    Alignment = (Alignment & -Alignment);
+    if (Alignment == 1)
+      Alignment = 0;
+  }
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
 
   unsigned OpcodeIndex;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -1350,124 +1746,144 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   case MVT::v4i32: OpcodeIndex = 1; break;
   }
 
+  std::vector<EVT> ResTys;
+  if (IsLoad) {
+    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+    if (!is64BitVector)
+      ResTyElts *= 2;
+    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+                                      MVT::i64, ResTyElts));
+  }
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
   SDValue Pred = getAL(CurDAG);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
 
-  SmallVector<SDValue, 10> Ops;
+  SmallVector<SDValue, 8> Ops;
   Ops.push_back(MemAddr);
   Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+  }
 
-  unsigned Opc = 0;
-  if (is64BitVector) {
-    Opc = DOpcodes[OpcodeIndex];
-    SDValue RegSeq;
-    SDValue V0 = N->getOperand(0+3);
-    SDValue V1 = N->getOperand(1+3);
-    if (NumVecs == 2) {
-      RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
-    } else {
-      SDValue V2 = N->getOperand(2+3);
-      SDValue V3 = (NumVecs == 3)
-        ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
-        : N->getOperand(3+3);
-      RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
-    }
-
-    // Now extract the D registers back out.
-    Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
-    Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
-    if (NumVecs > 2)
-      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
-    if (NumVecs > 3)
-      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
+  SDValue SuperReg;
+  SDValue V0 = N->getOperand(Vec0Idx + 0);
+  SDValue V1 = N->getOperand(Vec0Idx + 1);
+  if (NumVecs == 2) {
+    if (is64BitVector)
+      SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+    else
+      SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
   } else {
-    // Check if this is loading the even or odd subreg of a Q register.
-    if (Lane < NumElts) {
-      Opc = QOpcodes0[OpcodeIndex];
-    } else {
-      Lane -= NumElts;
-      Opc = QOpcodes1[OpcodeIndex];
-    }
-
-    SDValue RegSeq;
-    SDValue V0 = N->getOperand(0+3);
-    SDValue V1 = N->getOperand(1+3);
-    if (NumVecs == 2) {
-      RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
-    } else {
-      SDValue V2 = N->getOperand(2+3);
-      SDValue V3 = (NumVecs == 3)
-        ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
-        : N->getOperand(3+3);
-      RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
-    }
-
-    // Extract the subregs of the input vector.
-    unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
-    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-      Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
-                                                   RegSeq));
+    SDValue V2 = N->getOperand(Vec0Idx + 2);
+    SDValue V3 = (NumVecs == 3)
+      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+      : N->getOperand(Vec0Idx + 3);
+    if (is64BitVector)
+      SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+    else
+      SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
   }
+  Ops.push_back(SuperReg);
   Ops.push_back(getI32Imm(Lane));
   Ops.push_back(Pred);
   Ops.push_back(Reg0);
   Ops.push_back(Chain);
 
+  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                                  QOpcodes[OpcodeIndex]);
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+                                         Ops.data(), Ops.size());
   if (!IsLoad)
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
+    return VLdLn;
 
-  std::vector<EVT> ResTys(NumVecs, RegVT);
-  ResTys.push_back(MVT::Other);
-  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
+  // Extract the subregisters.
+  SuperReg = SDValue(VLdLn, 0);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+  return NULL;
+}
 
-  // Form a REG_SEQUENCE to force register allocation.
-  SDValue RegSeq;
-  if (is64BitVector) {
-    SDValue V0 = SDValue(VLdLn, 0);
-    SDValue V1 = SDValue(VLdLn, 1);
-    if (NumVecs == 2) {
-      RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
-    } else {
-      SDValue V2 = SDValue(VLdLn, 2);
-      // If it's a vld3, form a quad D-register but discard the last part.
-      SDValue V3 = (NumVecs == 3)
-        ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
-        : SDValue(VLdLn, 3);
-      RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
-    }
-  } else {
-    // For 128-bit vectors, take the 64-bit results of the load and insert
-    // them as subregs into the result.
-    SDValue V[8];
-    for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
-      if (Even) {
-        V[i]   = SDValue(VLdLn, Vec);
-        V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                                dl, RegVT), 0);
-      } else {
-        V[i]   = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                                dl, RegVT), 0);
-        V[i+1] = SDValue(VLdLn, Vec);
-      }
-    }
-    if (NumVecs == 3)
-      V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                                   dl, RegVT), 0);
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+                                      unsigned NumVecs, unsigned *Opcodes) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
 
-    if (NumVecs == 2)
-      RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
-    else
-      RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
-                                 V[4], V[5], V[6], V[7]), 0);
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  unsigned Alignment = 0;
+  if (NumVecs != 3) {
+    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    if (Alignment > NumBytes)
+      Alignment = NumBytes;
+    if (Alignment < 8 && Alignment < NumBytes)
+      Alignment = 0;
+    // Alignment must be a power of two; make sure of that.
+    Alignment = (Alignment & -Alignment);
+    if (Alignment == 1)
+      Alignment = 0;
+  }
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld-dup type");
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  }
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SDValue SuperReg;
+  unsigned Opc = Opcodes[OpcodeIndex];
+  SmallVector<SDValue, 6> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(2);
+    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
   }
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
 
+  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+  std::vector<EVT> ResTys;
+  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+  SDNode *VLdDup =
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  SuperReg = SDValue(VLdDup, 0);
+
+  // Extract the subregisters.
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
-  assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
-  unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+  unsigned SubIdx = ARM::dsub_0;
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     ReplaceUses(SDValue(N, Vec),
-                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
-  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
   return NULL;
 }
 
@@ -1486,7 +1902,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
     RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
   else {
     SDValue V2 = N->getOperand(FirstTblReg + 2);
-    // If it's a vtbl3, form a quad D-register and leave the last part as 
+    // If it's a vtbl3, form a quad D-register and leave the last part as
     // an undef.
     SDValue V3 = (NumVecs == 3)
       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
@@ -1494,17 +1910,10 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
     RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
   }
 
-  // Now extract the D registers back out.
   SmallVector<SDValue, 6> Ops;
   if (IsExt)
     Ops.push_back(N->getOperand(1));
-  Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
-  Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
-  if (NumVecs > 2)
-    Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
-  if (NumVecs > 3)
-    Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
-
+  Ops.push_back(RegSeq);
   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
   Ops.push_back(getAL(CurDAG)); // predicate
   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
@@ -1574,7 +1983,7 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   SDValue CPTmp0;
   SDValue CPTmp1;
-  if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) {
+  if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
     unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
     unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
     unsigned Opc = 0;
@@ -1602,7 +2011,7 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
   SDValue CPTmp0;
   SDValue CPTmp1;
   SDValue CPTmp2;
-  if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+  if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
     return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
@@ -1611,36 +2020,66 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 }
 
 SDNode *ARMDAGToDAGISel::
-SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
-                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                  ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
   if (!T)
     return 0;
 
-  if (Pred_t2_so_imm(TrueVal.getNode())) {
-    SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+  unsigned Opc = 0;
+  unsigned TrueImm = T->getZExtValue();
+  if (is_t2_so_imm(TrueImm)) {
+    Opc = ARM::t2MOVCCi;
+  } else if (TrueImm <= 0xffff) {
+    Opc = ARM::t2MOVCCi16;
+  } else if (is_t2_so_imm_not(TrueImm)) {
+    TrueImm = ~TrueImm;
+    Opc = ARM::t2MVNCCi;
+  } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+    // Large immediate.
+    Opc = ARM::t2MOVCCi32imm;
+  }
+
+  if (Opc) {
+    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
-    return CurDAG->SelectNodeTo(N,
-                                ARM::t2MOVCCi, MVT::i32, Ops, 5);
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
   }
+
   return 0;
 }
 
 SDNode *ARMDAGToDAGISel::
-SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
-                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                   ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
   if (!T)
     return 0;
 
-  if (Pred_so_imm(TrueVal.getNode())) {
-    SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+  unsigned Opc = 0;
+  unsigned TrueImm = T->getZExtValue();
+  bool isSoImm = is_so_imm(TrueImm);
+  if (isSoImm) {
+    Opc = ARM::MOVCCi;
+  } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+    Opc = ARM::MOVCCi16;
+  } else if (is_so_imm_not(TrueImm)) {
+    TrueImm = ~TrueImm;
+    Opc = ARM::MVNCCi;
+  } else if (TrueVal.getNode()->hasOneUse() &&
+             (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+    // Large immediate.
+    Opc = ARM::MOVCCi32imm;
+  }
+
+  if (Opc) {
+    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
-    return CurDAG->SelectNodeTo(N,
-                                ARM::MOVCCi, MVT::i32, Ops, 5);
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
   }
+
   return 0;
 }
 
@@ -1688,18 +2127,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
     //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
     // Pattern complexity = 10  cost = 1  size = 0
     if (Subtarget->isThumb()) {
-      SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal,
+      SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
                                         CCVal, CCR, InFlag);
       if (!Res)
-        Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal,
+        Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
       if (Res)
         return Res;
     } else {
-      SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal,
+      SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
                                          CCVal, CCR, InFlag);
       if (!Res)
-        Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal,
+        Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
       if (Res)
         return Res;
@@ -1742,13 +2181,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
   EVT VT = N->getValueType(0);
   if (!VT.is128BitVector() || N->getNumOperands() != 2)
     llvm_unreachable("unexpected CONCAT_VECTORS");
-  DebugLoc dl = N->getDebugLoc();
-  SDValue V0 = N->getOperand(0);
-  SDValue V1 = N->getOperand(1);
-  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
-  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
 }
 
 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
@@ -1788,19 +2221,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         SDValue Pred = getAL(CurDAG);
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
-        ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
+        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
                                          Ops, 4);
       } else {
         SDValue Ops[] = {
           CPIdx,
-          CurDAG->getRegister(0, MVT::i32),
           CurDAG->getTargetConstant(0, MVT::i32),
           getAL(CurDAG),
           CurDAG->getRegister(0, MVT::i32),
           CurDAG->getEntryNode()
         };
         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
-                                       Ops, 6);
+                                       Ops, 5);
       }
       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
       return NULL;
@@ -1930,7 +2362,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+                                    ARM::UMULL : ARM::UMULLv5,
+                                    dl, MVT::i32, MVT::i32, Ops, 5);
     }
   }
   case ISD::SMUL_LOHI: {
@@ -1944,7 +2378,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+                                    ARM::SMULL : ARM::SMULLv5,
+                                    dl, MVT::i32, MVT::i32, Ops, 5);
     }
   }
   case ISD::LOAD: {
@@ -1987,7 +2423,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
-                                             MVT::Flag, Ops, 5);
+                                             MVT::Glue, Ops, 5);
     Chain = SDValue(ResNode, 0);
     if (N->getNumValues() == 2) {
       InFlag = SDValue(ResNode, 1);
@@ -2088,12 +2524,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     EVT VecVT = N->getValueType(0);
     EVT EltVT = VecVT.getVectorElementType();
     unsigned NumElts = VecVT.getVectorNumElements();
-    if (EltVT.getSimpleVT() == MVT::f64) {
+    if (EltVT == MVT::f64) {
       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
       return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
     }
-    assert(EltVT.getSimpleVT() == MVT::f32 &&
-           "unexpected type for BUILD_VECTOR");
+    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
     if (NumElts == 2)
       return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
@@ -2101,6 +2536,170 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                      N->getOperand(2), N->getOperand(3));
   }
 
+  case ARMISD::VLD2DUP: {
+    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+                           ARM::VLD2DUPd32Pseudo };
+    return SelectVLDDup(N, false, 2, Opcodes);
+  }
+
+  case ARMISD::VLD3DUP: {
+    unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+                           ARM::VLD3DUPd32Pseudo };
+    return SelectVLDDup(N, false, 3, Opcodes);
+  }
+
+  case ARMISD::VLD4DUP: {
+    unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+                           ARM::VLD4DUPd32Pseudo };
+    return SelectVLDDup(N, false, 4, Opcodes);
+  }
+
+  case ARMISD::VLD2DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+                           ARM::VLD2DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 2, Opcodes);
+  }
+
+  case ARMISD::VLD3DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+                           ARM::VLD3DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 3, Opcodes);
+  }
+
+  case ARMISD::VLD4DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+                           ARM::VLD4DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 4, Opcodes);
+  }
+
+  case ARMISD::VLD1_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+                            ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+    unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+                            ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VLD2_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+                            ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+                            ARM::VLD2q32Pseudo_UPD };
+    return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VLD3_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+                            ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+                             ARM::VLD3q16Pseudo_UPD,
+                             ARM::VLD3q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+                             ARM::VLD3q16oddPseudo_UPD,
+                             ARM::VLD3q32oddPseudo_UPD };
+    return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VLD4_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+                            ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+                             ARM::VLD4q16Pseudo_UPD,
+                             ARM::VLD4q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+                             ARM::VLD4q16oddPseudo_UPD,
+                             ARM::VLD4q32oddPseudo_UPD };
+    return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VLD2LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+                            ARM::VLD2LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+                            ARM::VLD2LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VLD3LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+                            ARM::VLD3LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+                            ARM::VLD3LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VLD4LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+                            ARM::VLD4LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+                            ARM::VLD4LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST1_UPD: {
+    unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+                            ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+    unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+                            ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VST2_UPD: {
+    unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+                            ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+                            ARM::VST2q32Pseudo_UPD };
+    return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VST3_UPD: {
+    unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+                            ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+                             ARM::VST3q16Pseudo_UPD,
+                             ARM::VST3q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+                             ARM::VST3q16oddPseudo_UPD,
+                             ARM::VST3q32oddPseudo_UPD };
+    return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VST4_UPD: {
+    unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+                            ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                             ARM::VST4q16Pseudo_UPD,
+                             ARM::VST4q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+                             ARM::VST4q16oddPseudo_UPD,
+                             ARM::VST4q32oddPseudo_UPD };
+    return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VST2LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+                            ARM::VST2LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+                            ARM::VST2LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST3LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+                            ARM::VST3LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+                            ARM::VST3LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST4LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+                            ARM::VST4LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+                            ARM::VST4LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+  }
+
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -2113,7 +2712,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                               ARM::VLD1d32, ARM::VLD1d64 };
       unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
                               ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
-      return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+      return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vld2: {
@@ -2121,7 +2720,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                               ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
       unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
                               ARM::VLD2q32Pseudo };
-      return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
+      return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vld3: {
@@ -2130,10 +2729,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
                                ARM::VLD3q16Pseudo_UPD,
                                ARM::VLD3q32Pseudo_UPD };
-      unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
-                               ARM::VLD3q16oddPseudo_UPD,
-                               ARM::VLD3q32oddPseudo_UPD };
-      return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+                               ARM::VLD3q16oddPseudo,
+                               ARM::VLD3q32oddPseudo };
+      return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld4: {
@@ -2142,31 +2741,31 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
                                ARM::VLD4q16Pseudo_UPD,
                                ARM::VLD4q32Pseudo_UPD };
-      unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
-                               ARM::VLD4q16oddPseudo_UPD,
-                               ARM::VLD4q32oddPseudo_UPD };
-      return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+                               ARM::VLD4q16oddPseudo,
+                               ARM::VLD4q32oddPseudo };
+      return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vld2lane: {
-      unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
-      unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
-      return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+                              ARM::VLD2LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
     }
 
     case Intrinsic::arm_neon_vld3lane: {
-      unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
-      unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
-      return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+                              ARM::VLD3LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
     }
 
     case Intrinsic::arm_neon_vld4lane: {
-      unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
-      unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
-      unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
-      return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+                              ARM::VLD4LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
     }
 
     case Intrinsic::arm_neon_vst1: {
@@ -2174,7 +2773,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                               ARM::VST1d32, ARM::VST1d64 };
       unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
                               ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
-      return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+      return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vst2: {
@@ -2182,7 +2781,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                               ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
       unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
                               ARM::VST2q32Pseudo };
-      return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
+      return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vst3: {
@@ -2191,10 +2790,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
                                ARM::VST3q16Pseudo_UPD,
                                ARM::VST3q32Pseudo_UPD };
-      unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
-                               ARM::VST3q16oddPseudo_UPD,
-                               ARM::VST3q32oddPseudo_UPD };
-      return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+                               ARM::VST3q16oddPseudo,
+                               ARM::VST3q32oddPseudo };
+      return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4: {
@@ -2203,31 +2802,31 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
                                ARM::VST4q16Pseudo_UPD,
                                ARM::VST4q32Pseudo_UPD };
-      unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
-                               ARM::VST4q16oddPseudo_UPD,
-                               ARM::VST4q32oddPseudo_UPD };
-      return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+                               ARM::VST4q16oddPseudo,
+                               ARM::VST4q32oddPseudo };
+      return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst2lane: {
-      unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
-      unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
-      return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+                              ARM::VST2LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
     }
 
     case Intrinsic::arm_neon_vst3lane: {
-      unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
-      unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
-      return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+                              ARM::VST3LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
     }
 
     case Intrinsic::arm_neon_vst4lane: {
-      unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
-      unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
-      unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
-      return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+      unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+                              ARM::VST4LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
     }
     }
     break;
@@ -2240,18 +2839,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       break;
 
     case Intrinsic::arm_neon_vtbl2:
-      return SelectVTBL(N, false, 2, ARM::VTBL2);
+      return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
     case Intrinsic::arm_neon_vtbl3:
-      return SelectVTBL(N, false, 3, ARM::VTBL3);
+      return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
     case Intrinsic::arm_neon_vtbl4:
-      return SelectVTBL(N, false, 4, ARM::VTBL4);
+      return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
 
     case Intrinsic::arm_neon_vtbx2:
-      return SelectVTBL(N, true, 2, ARM::VTBX2);
+      return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
     case Intrinsic::arm_neon_vtbx3:
-      return SelectVTBL(N, true, 3, ARM::VTBX3);
+      return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
     case Intrinsic::arm_neon_vtbx4:
-      return SelectVTBL(N, true, 4, ARM::VTBX4);
+      return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
     }
     break;
   }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ce4a2c90689c..1835ec0f0054 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "arm-isel"
 #include "ARM.h"
 #include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMISelLowering.h"
 #include "ARMMachineFunctionInfo.h"
@@ -28,9 +29,11 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -41,6 +44,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -50,6 +54,7 @@
 using namespace llvm;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
 
 // This option should go away when tail calls fully work.
 static cl::opt<bool>
@@ -57,14 +62,7 @@ EnableARMTailCalls("arm-tail-calls", cl::Hidden,
   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
   cl::init(false));
 
-// This option should go away when Machine LICM is smart enough to hoist a 
-// reg-to-reg VDUP.
-static cl::opt<bool>
-EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
-  cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
-  cl::init(false));
-
-static cl::opt<bool>
+cl::opt<bool>
 EnableARMLongCalls("arm-long-calls", cl::Hidden,
   cl::desc("Generate calls via indirect call instructions"),
   cl::init(false));
@@ -74,28 +72,6 @@ ARMInterworking("arm-interworking", cl::Hidden,
   cl::desc("Enable / disable ARM interworking (for debugging only)"),
   cl::init(true));
 
-static cl::opt<bool>
-EnableARMCodePlacement("arm-code-placement", cl::Hidden,
-  cl::desc("Enable code placement pass for ARM"),
-  cl::init(false));
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                   CCValAssign::LocInfo &LocInfo,
-                                   ISD::ArgFlagsTy &ArgFlags,
-                                   CCState &State);
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                    CCValAssign::LocInfo &LocInfo,
-                                    ISD::ArgFlagsTy &ArgFlags,
-                                    CCState &State);
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                      CCValAssign::LocInfo &LocInfo,
-                                      ISD::ArgFlagsTy &ArgFlags,
-                                      CCState &State);
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State);
-
 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
                                        EVT PromotedBitwiseVT) {
   if (VT != PromotedLdStVT) {
@@ -111,8 +87,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   EVT ElemTy = VT.getVectorElementType();
   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
     setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
-  if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
   if (ElemTy != MVT::i32) {
     setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
     setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
@@ -122,7 +97,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
-  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
   if (VT.isInteger()) {
@@ -131,6 +106,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
     setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
     setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
     setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction(VT.getSimpleVT(),
+                          (MVT::SimpleValueType)InnerVT, Expand);
   }
   setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
 
@@ -177,6 +156,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   RegInfo = TM.getRegisterInfo();
+  Itins = TM.getInstrItineraryData();
 
   if (Subtarget->isTargetDarwin()) {
     // Uses VFP for Thumb libfuncs if available.
@@ -260,13 +240,157 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLibcallName(RTLIB::SRL_I128, 0);
   setLibcallName(RTLIB::SRA_I128, 0);
 
-  // Libcalls should use the AAPCS base standard ABI, even if hard float
-  // is in effect, as per the ARM RTABI specification, section 4.1.2.
   if (Subtarget->isAAPCS_ABI()) {
-    for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
-      setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
-                            CallingConv::ARM_AAPCS);
-    }
+    // Double-precision floating-point arithmetic helper functions
+    // RTABI chapter 4.1.2, Table 2
+    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+    // Double-precision floating-point comparison helper functions
+    // RTABI chapter 4.1.2, Table 3
+    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
+    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
+    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
+    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+    // Single-precision floating-point arithmetic helper functions
+    // RTABI chapter 4.1.2, Table 4
+    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+    // Single-precision floating-point comparison helper functions
+    // RTABI chapter 4.1.2, Table 5
+    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
+    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
+    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
+    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+    // Floating-point to integer conversions.
+    // RTABI chapter 4.1.2, Table 6
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+    // Conversions between floating types.
+    // RTABI chapter 4.1.2, Table 7
+    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
+    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+    // Integer to floating-point conversions.
+    // RTABI chapter 4.1.2, Table 8
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+    // Long long helper functions
+    // RTABI chapter 4.2, Table 9
+    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
+    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+    // Integer division functions
+    // RTABI chapter 4.3.1
+    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
   }
 
   if (Subtarget->isThumb1Only())
@@ -330,9 +454,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+    // Custom handling for some vector types to avoid expensive expansions
+    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
     setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
     setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
 
+    setTargetDAGCombine(ISD::INTRINSIC_VOID);
+    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
     setTargetDAGCombine(ISD::SHL);
     setTargetDAGCombine(ISD::SRL);
@@ -341,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::ANY_EXTEND);
     setTargetDAGCombine(ISD::SELECT_CC);
+    setTargetDAGCombine(ISD::BUILD_VECTOR);
+    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+    setTargetDAGCombine(ISD::STORE);
   }
 
   computeRegisterProperties();
@@ -397,7 +532,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 
   // These are expanded into libcalls.
-  if (!Subtarget->hasDivide()) {
+  if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
     // v7M has a hardware divider
     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
@@ -423,14 +558,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
   setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
-  // FIXME: Shouldn't need this, since no register is used, but the legalizer
-  // doesn't yet know how to not do that for SjLj.
-  setExceptionSelectorRegister(ARM::R0);
+  setOperationAction(ISD::EXCEPTIONADDR,      MVT::i32,   Expand);
+  setExceptionPointerRegister(ARM::R0);
+  setExceptionSelectorRegister(ARM::R1);
+
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
   // the default expansion.
   if (Subtarget->hasDataBarrier() ||
-      (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
+      (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
     // membarrier needs custom lowering; the rest are legal and handled
     // normally.
     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -474,6 +610,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
 
+  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
+
   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
   if (!Subtarget->hasV6Ops()) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -484,7 +622,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
-    setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
   }
 
@@ -493,6 +631,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (Subtarget->isTargetDarwin()) {
     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+    setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
   }
 
   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
@@ -547,8 +686,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::MUL);
 
-  if (Subtarget->hasV6T2Ops())
+  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
     setTargetDAGCombine(ISD::OR);
+  if (Subtarget->hasNEON())
+    setTargetDAGCombine(ISD::AND);
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
@@ -557,16 +698,26 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   else
     setSchedulingPreference(Sched::Hybrid);
 
-  maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
+  //// temporary - rewrite interface to use type
+  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
 
   // On ARM arguments smaller than 4 bytes are extended, so all arguments
   // are at least 4 bytes aligned.
   setMinStackArgumentAlignment(4);
 
-  if (EnableARMCodePlacement)
-    benefitFromCodePlacementOpt = true;
+  benefitFromCodePlacementOpt = true;
 }
 
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurence of cross class copies and subregister insertions
+// and extractions.
 std::pair<const TargetRegisterClass*, uint8_t>
 ARMTargetLowering::findRepresentativeClass(EVT VT) const{
   const TargetRegisterClass *RRC = 0;
@@ -580,6 +731,12 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
     RRC = ARM::DPRRegisterClass;
+    // When NEON is used for SP, only half of the register file is available
+    // because operations that define both SP and DP results will be constrained
+    // to the VFP2 class (D0-D15). We currently model this constraint prior to
+    // coalescing by double-counting the SP regs. See the FIXME above.
+    if (Subtarget->useNEONForSinglePrecisionFP())
+      Cost = 2;
     break;
   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
   case MVT::v4f32: case MVT::v2f64:
@@ -602,6 +759,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default: return 0;
   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
+  case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
+  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
   case ARMISD::CALL:          return "ARMISD::CALL";
   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
@@ -612,7 +771,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
-  case ARMISD::AND:           return "ARMISD::AND";
   case ARMISD::CMP:           return "ARMISD::CMP";
   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
@@ -633,25 +791,33 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
   case ARMISD::RRX:           return "ARMISD::RRX";
 
-  case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
-  case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
+  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
+  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
 
   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+  case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
 
   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
-  
+
   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 
   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
 
   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
-  case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
+  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
 
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
+  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
   case ARMISD::VCGE:          return "ARMISD::VCGE";
+  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
+  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
   case ARMISD::VCGT:          return "ARMISD::VCGT";
+  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
+  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
   case ARMISD::VTST:          return "ARMISD::VTST";
 
@@ -693,6 +859,28 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FMAX:          return "ARMISD::FMAX";
   case ARMISD::FMIN:          return "ARMISD::FMIN";
   case ARMISD::BFI:           return "ARMISD::BFI";
+  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
+  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
+  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
+  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
+  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
+  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
+  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
+  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
+  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
+  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
+  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
+  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
+  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
+  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
+  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
+  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
+  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
+  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
+  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
+  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
+  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
+  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
   }
 }
 
@@ -735,6 +923,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
 
   for (unsigned i = 0; i != NumVals; ++i) {
     EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue || VT == MVT::Other)
+      continue;
     if (VT.isFloatingPoint() || VT.isVector())
       return Sched::Latency;
   }
@@ -746,25 +936,29 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   // is not available.
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-  if (TID.mayLoad())
-    return Sched::Latency;
 
-  const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
-  if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+  if (TID.getNumDefs() == 0)
+    return Sched::RegPressure;
+  if (!Itins->isEmpty() &&
+      Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
     return Sched::Latency;
+
   return Sched::RegPressure;
 }
 
+// FIXME: Move to RegInfo
 unsigned
 ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
                                        MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   switch (RC->getID()) {
   default:
     return 0;
   case ARM::tGPRRegClassID:
-    return RegInfo->hasFP(MF) ? 4 : 5;
+    return TFI->hasFP(MF) ? 4 : 5;
   case ARM::GPRRegClassID: {
-    unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+    unsigned FP = TFI->hasFP(MF) ? 1 : 0;
     return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
   }
   case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
@@ -829,136 +1023,6 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 
 #include "ARMGenCallingConv.inc"
 
-// APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                          CCValAssign::LocInfo &LocInfo,
-                          CCState &State, bool CanFail) {
-  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-  // Try to get the first register.
-  if (unsigned Reg = State.AllocateReg(RegList, 4))
-    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  else {
-    // For the 2nd half of a v2f64, do not fail.
-    if (CanFail)
-      return false;
-
-    // Put the whole thing on the stack.
-    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
-                                           State.AllocateStack(8, 4),
-                                           LocVT, LocInfo));
-    return true;
-  }
-
-  // Try to get the second register.
-  if (unsigned Reg = State.AllocateReg(RegList, 4))
-    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  else
-    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
-                                           State.AllocateStack(4, 4),
-                                           LocVT, LocInfo));
-  return true;
-}
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                   CCValAssign::LocInfo &LocInfo,
-                                   ISD::ArgFlagsTy &ArgFlags,
-                                   CCState &State) {
-  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
-    return false;
-  if (LocVT == MVT::v2f64 &&
-      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
-    return false;
-  return true;  // we handled it
-}
-
-// AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                           CCValAssign::LocInfo &LocInfo,
-                           CCState &State, bool CanFail) {
-  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
-  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
-  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
-
-  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
-  if (Reg == 0) {
-    // For the 2nd half of a v2f64, do not just fail.
-    if (CanFail)
-      return false;
-
-    // Put the whole thing on the stack.
-    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
-                                           State.AllocateStack(8, 8),
-                                           LocVT, LocInfo));
-    return true;
-  }
-
-  unsigned i;
-  for (i = 0; i < 2; ++i)
-    if (HiRegList[i] == Reg)
-      break;
-
-  unsigned T = State.AllocateReg(LoRegList[i]);
-  (void)T;
-  assert(T == LoRegList[i] && "Could not allocate register");
-
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
-                                         LocVT, LocInfo));
-  return true;
-}
-
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                    CCValAssign::LocInfo &LocInfo,
-                                    ISD::ArgFlagsTy &ArgFlags,
-                                    CCState &State) {
-  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
-    return false;
-  if (LocVT == MVT::v2f64 &&
-      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
-    return false;
-  return true;  // we handled it
-}
-
-static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                         CCValAssign::LocInfo &LocInfo, CCState &State) {
-  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
-  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
-
-  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
-  if (Reg == 0)
-    return false; // we didn't handle it
-
-  unsigned i;
-  for (i = 0; i < 2; ++i)
-    if (HiRegList[i] == Reg)
-      break;
-
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
-                                         LocVT, LocInfo));
-  return true;
-}
-
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                      CCValAssign::LocInfo &LocInfo,
-                                      ISD::ArgFlagsTy &ArgFlags,
-                                      CCState &State) {
-  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
-    return false;
-  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
-    return false;
-  return true;  // we handled it
-}
-
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State) {
-  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
-                                   State);
-}
-
 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 /// given CallingConvention value.
 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
@@ -967,23 +1031,29 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
   switch (CC) {
   default:
     llvm_unreachable("Unsupported calling convention");
-  case CallingConv::C:
   case CallingConv::Fast:
+    if (Subtarget->hasVFP2() && !isVarArg) {
+      if (!Subtarget->isAAPCS_ABI())
+        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+      // For AAPCS ABI targets, just use VFP variant of the calling convention.
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    }
+    // Fallthrough
+  case CallingConv::C: {
     // Use target triple & subtarget features to do actual dispatch.
-    if (Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2() &&
-          FloatABIType == FloatABI::Hard && !isVarArg)
-        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
-      else
-        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
-    } else
-        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+    if (!Subtarget->isAAPCS_ABI())
+      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+    else if (Subtarget->hasVFP2() &&
+             FloatABIType == FloatABI::Hard && !isVarArg)
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+  }
   case CallingConv::ARM_AAPCS_VFP:
-    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   case CallingConv::ARM_AAPCS:
-    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
-    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   }
 }
 
@@ -1050,7 +1120,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::BCvt:
-      Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
+      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
       break;
     }
 
@@ -1073,7 +1143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile=*/false, /*AlwaysInline=*/false,
-                       NULL, 0, NULL, 0);
+                       MachinePointerInfo(0), MachinePointerInfo(0));
 }
 
 /// LowerMemOpCallTo - Store the argument to the stack.
@@ -1086,11 +1156,11 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
-  if (Flags.isByVal()) {
+  if (Flags.isByVal())
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-  }
+
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset,
+                      MachinePointerInfo::getStack(LocMemOffset),
                       false, false, 0);
 }
 
@@ -1198,7 +1268,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::BCvt:
-      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
       break;
     }
 
@@ -1289,7 +1359,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
       const GlobalValue *GV = G->getGlobal();
       // Create a constant pool entry for the callee address
-      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
                                                            ARMPCLabelIndex,
                                                            ARMCP::CPValue, 0);
@@ -1298,13 +1368,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0,
+                           MachinePointerInfo::getConstantPool(),
                            false, false, 0);
     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
       const char *Sym = S->getSymbol();
 
       // Create a constant pool entry for the callee address
-      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
                                                        Sym, ARMPCLabelIndex, 0);
       // Get the address of the callee into a register
@@ -1312,7 +1382,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0,
+                           MachinePointerInfo::getConstantPool(),
                            false, false, 0);
     }
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -1326,7 +1396,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
     // tBX takes a register source operand.
     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
-      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
                                                            ARMPCLabelIndex,
                                                            ARMCP::CPValue, 4);
@@ -1334,13 +1404,19 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0,
+                           MachinePointerInfo::getConstantPool(),
                            false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
-    } else
-      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+    } else {
+      // On ELF targets for PIC code, direct calls should go through the PLT
+      unsigned OpFlags = 0;
+      if (Subtarget->isTargetELF() &&
+                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+        OpFlags = ARMII::MO_PLT;
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+    }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     isDirect = true;
     bool isStub = Subtarget->isTargetDarwin() &&
@@ -1349,20 +1425,26 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // tBX takes a register source operand.
     const char *Sym = S->getSymbol();
     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
-      unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
                                                        Sym, ARMPCLabelIndex, 4);
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0,
+                           MachinePointerInfo::getConstantPool(),
                            false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
-    } else
-      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+    } else {
+      unsigned OpFlags = 0;
+      // On ELF targets for PIC code, direct calls should go through the PLT
+      if (Subtarget->isTargetELF() &&
+                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+        OpFlags = ARMII::MO_PLT;
+      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+    }
   }
 
   // FIXME: handle tail calls differently.
@@ -1391,7 +1473,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   if (isTailCall)
     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
 
@@ -1421,7 +1503,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   int FI = INT_MAX;
   if (Arg.getOpcode() == ISD::CopyFromReg) {
     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
-    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+    if (!TargetRegisterInfo::isVirtualRegister(VR))
       return false;
     MachineInstr *Def = MRI->getVRegDef(VR);
     if (!Def)
@@ -1490,32 +1572,15 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // LR.  This means if we need to reload LR, it takes an extra instructions,
   // which outweighs the value of the tail call; but here we don't know yet
   // whether LR is going to be used.  Probably the right approach is to
-  // generate the tail call here and turn it back into CALL/RET in 
+  // generate the tail call here and turn it back into CALL/RET in
   // emitEpilogue if LR is used.
-  if (Subtarget->isThumb1Only())
-    return false;
-
-  // For the moment, we can only do this to functions defined in this
-  // compilation, or to indirect calls.  A Thumb B to an ARM function,
-  // or vice versa, is not easily fixed up in the linker unlike BL.
-  // (We could do this by loading the address of the callee into a register;
-  // that is an extra instruction over the direct call and burns a register
-  // as well, so is not likely to be a win.)
-
-  // It might be safe to remove this restriction on non-Darwin.
 
   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
   // but we need to make sure there are enough registers; the only valid
   // registers are the 4 used for parameters.  We don't currently do this
   // case.
-  if (isa<ExternalSymbolSDNode>(Callee))
-      return false;
-
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    const GlobalValue *GV = G->getGlobal();
-    if (GV->isDeclaration() || GV->isWeakForLinker())
-      return false;
-  }
+  if (Subtarget->isThumb1Only())
+    return false;
 
   // If the calling conventions do not match, then we'd better make sure the
   // results are returned in the same way as what the caller expects.
@@ -1583,7 +1648,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
           if (!VA.isRegLoc())
             return false;
           if (!ArgLocs[++i].isRegLoc())
-            return false; 
+            return false;
           if (RegVT == MVT::v2f64) {
             if (!ArgLocs[++i].isRegLoc())
               return false;
@@ -1643,7 +1708,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::BCvt:
-      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
       break;
     }
 
@@ -1693,6 +1758,61 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
   return result;
 }
 
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  unsigned NumCopies = 0;
+  SDNode* Copies[2];
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg) {
+    Copies[NumCopies++] = Use;
+  } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+    // f64 returned in a pair of GPRs.
+    for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() != ISD::CopyToReg)
+        return false;
+      Copies[UI.getUse().getResNo()] = *UI;
+      ++NumCopies;
+    }
+  } else if (Use->getOpcode() == ISD::BITCAST) {
+    // f32 returned in a single GPR.
+    if (!Use->hasNUsesOfValue(1, 0))
+      return false;
+    Use = *Use->use_begin();
+    if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+      return false;
+    Copies[NumCopies++] = Use;
+  } else {
+    return false;
+  }
+
+  if (NumCopies != 1 && NumCopies != 2)
+    return false;
+
+  bool HasRet = false;
+  for (unsigned i = 0; i < NumCopies; ++i) {
+    SDNode *Copy = Copies[i];
+    for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() == ISD::CopyToReg) {
+        SDNode *Use = *UI;
+        if (Use == Copies[0] || Use == Copies[1])
+          continue;
+        return false;
+      }
+      if (UI->getOpcode() != ARMISD::RET_FLAG)
+        return false;
+      HasRet = true;
+    }
+  }
+
+  return HasRet;
+}
+
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
 // one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -1732,7 +1852,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   } else {
     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
-    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+    ARMPCLabelIndex = AFI->createPICLabelUId();
     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
                                                          ARMCP::CPBlockAddress,
                                                          PCAdj);
@@ -1740,7 +1860,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   }
   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0,
+                               MachinePointerInfo::getConstantPool(),
                                false, false, 0);
   if (RelocM == Reloc::Static)
     return Result;
@@ -1757,14 +1877,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   ARMConstantPoolValue *CPV =
     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
-                             ARMCP::CPValue, PCAdj, "tlsgd", true);
+                             ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
-                         PseudoSourceValue::getConstantPool(), 0,
+                         MachinePointerInfo::getConstantPool(),
                          false, false, 0);
   SDValue Chain = Argument.getValue(1);
 
@@ -1802,16 +1922,16 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   if (GV->isDeclaration()) {
     MachineFunction &MF = DAG.getMachineFunction();
     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
     // Initial exec model.
     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
     ARMConstantPoolValue *CPV =
       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
-                               ARMCP::CPValue, PCAdj, "gottpoff", true);
+                               ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0,
+                         MachinePointerInfo::getConstantPool(),
                          false, false, 0);
     Chain = Offset.getValue(1);
 
@@ -1819,15 +1939,15 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0,
+                         MachinePointerInfo::getConstantPool(),
                          false, false, 0);
   } else {
     // local exec model
-    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0,
+                         MachinePointerInfo::getConstantPool(),
                          false, false, 0);
   }
 
@@ -1859,51 +1979,72 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   if (RelocM == Reloc::PIC_) {
     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
     ARMConstantPoolValue *CPV =
-      new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
+      new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                                  CPAddr,
-                                 PseudoSourceValue::getConstantPool(), 0,
+                                 MachinePointerInfo::getConstantPool(),
                                  false, false, 0);
     SDValue Chain = Result.getValue(1);
     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
     if (!UseGOTOFF)
       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                           PseudoSourceValue::getGOT(), 0,
-                           false, false, 0);
+                           MachinePointerInfo::getGOT(), false, false, 0);
     return Result;
+  }
+
+  // If we have T2 ops, we can materialize the address directly via movt/movw
+  // pair. This is always cheaper.
+  if (Subtarget->useMovt()) {
+    ++NumMovwMovt;
+    // FIXME: Once remat is capable of dealing with instructions with register
+    // operands, expand this into two nodes.
+    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   } else {
-    // If we have T2 ops, we can materialize the address directly via movt/movw
-    // pair. This is always cheaper.
-    if (Subtarget->useMovt()) {
-      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
-                         DAG.getTargetGlobalAddress(GV, dl, PtrVT));
-    } else {
-      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
-      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                         PseudoSourceValue::getConstantPool(), 0,
-                         false, false, 0);
-    }
+    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                       MachinePointerInfo::getConstantPool(),
+                       false, false, 0);
   }
 }
 
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                                     SelectionDAG &DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned ARMPCLabelIndex = 0;
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  if (Subtarget->useMovt()) {
+    ++NumMovwMovt;
+    // FIXME: Once remat is capable of dealing with instructions with register
+    // operands, expand this into two nodes.
+    if (RelocM == Reloc::Static)
+      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+    unsigned Wrapper = (RelocM == Reloc::PIC_)
+      ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+    SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+      Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+                           MachinePointerInfo::getGOT(), false, false, 0);
+    return Result;
+  }
+
+  unsigned ARMPCLabelIndex = 0;
   SDValue CPAddr;
-  if (RelocM == Reloc::Static)
+  if (RelocM == Reloc::Static) {
     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
-  else {
-    ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+  } else {
+    ARMPCLabelIndex = AFI->createPICLabelUId();
     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
     ARMConstantPoolValue *CPV =
       new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
@@ -1912,7 +2053,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0,
+                               MachinePointerInfo::getConstantPool(),
                                false, false, 0);
   SDValue Chain = Result.getValue(1);
 
@@ -1922,8 +2063,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   }
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
-    Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                         PseudoSourceValue::getGOT(), 0,
+    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
                          false, false, 0);
 
   return Result;
@@ -1935,7 +2075,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1945,12 +2085,20 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0,
+                               MachinePointerInfo::getConstantPool(),
                                false, false, 0);
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
 
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+  const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 SDValue
 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -1980,7 +2128,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   case Intrinsic::eh_sjlj_lsda: {
     MachineFunction &MF = DAG.getMachineFunction();
     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
     EVT PtrVT = getPointerTy();
     DebugLoc dl = Op.getDebugLoc();
     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -1994,7 +2142,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result =
       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                  PseudoSourceValue::getConstantPool(), 0,
+                  MachinePointerInfo::getConstantPool(),
                   false, false, 0);
 
     if (RelocM == Reloc::PIC_) {
@@ -2009,21 +2157,55 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
                                const ARMSubtarget *Subtarget) {
   DebugLoc dl = Op.getDebugLoc();
-  SDValue Op5 = Op.getOperand(5);
-  unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
-  // Some subtargets which have dmb and dsb instructions can handle barriers
-  // directly. Some ARMv6 cpus can support them with the help of mcr
-  // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
-  // never get here.
-  unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
-  if (Subtarget->hasDataBarrier())
-    return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
-  else {
-    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+  if (!Subtarget->hasDataBarrier()) {
+    // Some ARMv6 cpus can support data barriers with an mcr instruction.
+    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+    // here.
+    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
-    return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
+    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
                        DAG.getConstant(0, MVT::i32));
   }
+
+  SDValue Op5 = Op.getOperand(5);
+  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+  ARM_MB::MemBOpt DMBOpt;
+  if (isDeviceBarrier)
+    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+  else
+    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+                     DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+                             const ARMSubtarget *Subtarget) {
+  // ARM pre v5TE and Thumb1 does not have preload instructions.
+  if (!(Subtarget->isThumb2() ||
+        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+    // Just preserve the chain.
+    return Op.getOperand(0);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+  if (!isRead &&
+      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+    // ARMv7 with MP extension has PLDW.
+    return Op.getOperand(0);
+
+  if (Subtarget->isThumb())
+    // Invert the bits.
+    isRead = ~isRead & 1;
+  unsigned isData = Subtarget->isThumb() ? 0 : 1;
+
+  // Currently there is no intrinsic that matches pli.
+  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+                     DAG.getConstant(isData, MVT::i32));
 }
 
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -2036,8 +2218,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
-                      false, false, 0);
+  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
 }
 
 SDValue
@@ -2054,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     RC = ARM::GPRRegisterClass;
 
   // Transform the arguments stored in physical registers into virtual ones.
-  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 
+  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
 
   SDValue ArgValue2;
@@ -2065,10 +2247,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
-                            PseudoSourceValue::getFixedStack(FI), 0,
+                            MachinePointerInfo::getFixedStack(FI),
                             false, false, 0);
   } else {
-    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+    Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   }
 
@@ -2119,7 +2301,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
-                                    PseudoSourceValue::getFixedStack(FI), 0,
+                                    MachinePointerInfo::getFixedStack(FI),
                                     false, false, 0);
           } else {
             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
@@ -2149,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
 
         // Transform the arguments in physical registers into virtual ones.
-        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
       }
 
@@ -2160,7 +2342,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       default: llvm_unreachable("Unknown loc info!");
       case CCValAssign::Full: break;
       case CCValAssign::BCvt:
-        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
         break;
       case CCValAssign::SExt:
         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
@@ -2188,7 +2370,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -2202,7 +2384,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
     unsigned NumGPRs = CCInfo.getFirstUnallocated
       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
 
-    unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+    unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
     unsigned VARegSize = (4 - NumGPRs) * 4;
     unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
     unsigned ArgOffset = CCInfo.getNextStackOffset();
@@ -2214,7 +2396,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       AFI->setVarArgsFrameIndex(
         MFI->CreateFixedObject(VARegSaveSize,
                                ArgOffset + VARegSaveSize - VARegSize,
-                               true));
+                               false));
       SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
                                       getPointerTy());
 
@@ -2226,12 +2408,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
         else
           RC = ARM::GPRRegisterClass;
 
-        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
+        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
-               PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
-               0, false, false, 0);
+               MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+                       false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
@@ -2320,7 +2502,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     break;
   }
   ARMcc = DAG.getConstant(CondCode, MVT::i32);
-  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
+  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
 }
 
 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
@@ -2329,10 +2511,10 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
                              DebugLoc dl) const {
   SDValue Cmp;
   if (!isFloatingPointZero(RHS))
-    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
+    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
   else
-    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
-  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
+    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
 }
 
 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -2444,8 +2626,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
 
   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
-                       Ld->getChain(), Ld->getBasePtr(),
-                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
                        Ld->isVolatile(), Ld->isNonTemporal(),
                        Ld->getAlignment());
 
@@ -2464,7 +2645,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
     SDValue Ptr = Ld->getBasePtr();
     RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
                           Ld->getChain(), Ptr,
-                          Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                          Ld->getPointerInfo(),
                           Ld->isVolatile(), Ld->isNonTemporal(),
                           Ld->getAlignment());
 
@@ -2474,7 +2655,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
     RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
                           Ld->getChain(), NewPtr,
-                          Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+                          Ld->getPointerInfo().getWithOffset(4),
                           Ld->isVolatile(), Ld->isNonTemporal(),
                           NewAlign);
     return;
@@ -2524,7 +2705,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
     expandf64Toi32(RHS, DAG, RHS1, RHS2);
     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
     ARMcc = DAG.getConstant(CondCode, MVT::i32);
-    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
   }
@@ -2564,7 +2745,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   if (CondCode2 != ARMCC::AL) {
@@ -2599,14 +2780,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   }
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
-                       PseudoSourceValue::getJumpTable(), 0,
+                       MachinePointerInfo::getJumpTable(),
                        false, false, 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   } else {
     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
-                       PseudoSourceValue::getJumpTable(), 0, false, false, 0);
+                       MachinePointerInfo::getJumpTable(), false, false, 0);
     Chain = Addr.getValue(1);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   }
@@ -2627,7 +2808,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
     break;
   }
   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 }
 
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -2646,7 +2827,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
     break;
   }
 
-  Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   return DAG.getNode(Opc, dl, VT, Op);
 }
 
@@ -2657,12 +2838,46 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   EVT SrcVT = Tmp1.getValueType();
-  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-  SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
-  SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
-  SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
+  bool F2IisFast = Subtarget->isCortexA9() ||
+    Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+
+  // Bitcast operand 1 to i32.
+  if (SrcVT == MVT::f64)
+    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                       &Tmp1, 1).getValue(1);
+  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+  // If float to int conversion isn't going to be super expensive, then simply
+  // or in the signbit.
+  if (F2IisFast) {
+    SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+    SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+    Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+    if (VT == MVT::f32) {
+      Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+      return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                         DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+    }
+
+    // f64: Or the high part with signbit and then combine two parts.
+    Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                       &Tmp0, 1);
+    SDValue Lo = Tmp0.getValue(0);
+    SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+    Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+  }
+
+  // Remove the signbit of operand 0.
+  Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+
+  // If operand 1 signbit is one, then negate operand 0.
+  SDValue ARMcc;
+  SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
+                          ISD::SETLT, ARMcc, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
+  return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
 }
 
 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2678,11 +2893,11 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
     SDValue Offset = DAG.getConstant(4, MVT::i32);
     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Return LR, which contains the return address. Mark it an implicit live-in.
-  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
 }
 
@@ -2697,17 +2912,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
     ? ARM::R7 : ARM::R11;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
                             false, false, 0);
   return FrameAddr;
 }
 
-/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
+/// ExpandBITCAST - If the target supports VFP, this function is called to
 /// expand a bit convert where either the source or destination type is i64 to
 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
 /// vectors), since the legalizer won't know what to do with that.
-static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = N->getOperand(0);
@@ -2717,7 +2933,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
   EVT SrcVT = Op.getValueType();
   EVT DstVT = N->getValueType(0);
   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
-         "ExpandBIT_CONVERT called for non-i64 type");
+         "ExpandBITCAST called for non-i64 type");
 
   // Turn i64->f64 into VMOVDRR.
   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
@@ -2725,7 +2941,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
                              DAG.getConstant(0, MVT::i32));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
                              DAG.getConstant(1, MVT::i32));
-    return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
+    return DAG.getNode(ISD::BITCAST, dl, DstVT,
                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
   }
 
@@ -2752,7 +2968,7 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
 }
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -2825,7 +3041,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
-SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                                             SelectionDAG &DAG) const {
   // The rounding mode is in bits 23:22 of the FPSCR.
   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
@@ -2835,11 +3051,11 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
                               DAG.getConstant(Intrinsic::arm_get_fpscr,
                                               MVT::i32));
-  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 
+  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
                                   DAG.getConstant(1U << 22, MVT::i32));
   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
                               DAG.getConstant(22, MVT::i32));
-  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 
+  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
                      DAG.getConstant(3, MVT::i32));
 }
 
@@ -2860,33 +3076,40 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
+  if (!VT.isVector())
+    return SDValue();
+
   // Lower vector shifts on NEON to use VSHL.
-  if (VT.isVector()) {
-    assert(ST->hasNEON() && "unexpected vector shift");
-
-    // Left shifts translate directly to the vshiftu intrinsic.
-    if (N->getOpcode() == ISD::SHL)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                         DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
-                         N->getOperand(0), N->getOperand(1));
-
-    assert((N->getOpcode() == ISD::SRA ||
-            N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
-
-    // NEON uses the same intrinsics for both left and right shifts.  For
-    // right shifts, the shift amounts are negative, so negate the vector of
-    // shift amounts.
-    EVT ShiftVT = N->getOperand(1).getValueType();
-    SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
-                                       getZeroVector(ShiftVT, DAG, dl),
-                                       N->getOperand(1));
-    Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
-                               Intrinsic::arm_neon_vshifts :
-                               Intrinsic::arm_neon_vshiftu);
+  assert(ST->hasNEON() && "unexpected vector shift");
+
+  // Left shifts translate directly to the vshiftu intrinsic.
+  if (N->getOpcode() == ISD::SHL)
     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(vshiftInt, MVT::i32),
-                       N->getOperand(0), NegatedCount);
-  }
+                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+                       N->getOperand(0), N->getOperand(1));
+
+  assert((N->getOpcode() == ISD::SRA ||
+          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+  // NEON uses the same intrinsics for both left and right shifts.  For
+  // right shifts, the shift amounts are negative, so negate the vector of
+  // shift amounts.
+  EVT ShiftVT = N->getOperand(1).getValueType();
+  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+                                     getZeroVector(ShiftVT, DAG, dl),
+                                     N->getOperand(1));
+  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+                             Intrinsic::arm_neon_vshifts :
+                             Intrinsic::arm_neon_vshiftu);
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(vshiftInt, MVT::i32),
+                     N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+                                const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
 
   // We can get here for a node like i32 = ISD::SHL i32, i64
   if (VT != MVT::i64)
@@ -2912,7 +3135,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   // captures the result into a carry flag.
   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
-  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
 
   // The low part is an ARMISD::RRX operand, which shifts the carry in.
   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
@@ -2998,13 +3221,13 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
         AndOp = Op1;
 
       // Ignore bitconvert.
-      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
+      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
         AndOp = AndOp.getOperand(0);
 
       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
         Opc = ARMISD::VTST;
-        Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
-        Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
+        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
         Invert = !Invert;
       }
     }
@@ -3013,7 +3236,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   if (Swap)
     std::swap(Op0, Op1);
 
-  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+  // If one of the operands is a constant vector zero, attempt to fold the
+  // comparison to a specialized compare-against-zero form.
+  SDValue SingleOp;
+  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+    SingleOp = Op0;
+  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+    if (Opc == ARMISD::VCGE)
+      Opc = ARMISD::VCLEZ;
+    else if (Opc == ARMISD::VCGT)
+      Opc = ARMISD::VCLTZ;
+    SingleOp = Op1;
+  }
+
+  SDValue Result;
+  if (SingleOp.getNode()) {
+    switch (Opc) {
+    case ARMISD::VCEQ:
+      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGE:
+      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLEZ:
+      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGT:
+      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLTZ:
+      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+    default:
+      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+    }
+  } else {
+     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+  }
 
   if (Invert)
     Result = DAG.getNOT(dl, Result, VT);
@@ -3026,7 +3280,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
 /// operand (e.g., VMOV).  If so, return the encoded value.
 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
                                  unsigned SplatBitSize, SelectionDAG &DAG,
-                                 EVT &VT, bool is128Bits, bool isVMOV) {
+                                 EVT &VT, bool is128Bits, NEONModImmType type) {
   unsigned OpCmode, Imm;
 
   // SplatBitSize is set to the smallest size that splats the vector, so a
@@ -3039,7 +3293,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
 
   switch (SplatBitSize) {
   case 8:
-    if (!isVMOV)
+    if (type != VMOVModImm)
       return SDValue();
     // Any 1-byte value is OK.  Op=0, Cmode=1110.
     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
@@ -3096,6 +3350,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
       break;
     }
 
+    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+    if (type == OtherModImm) return SDValue();
+
     if ((SplatBits & ~0xffff) == 0 &&
         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
       // Value = 0x0000nnff: Op=x, Cmode=1100.
@@ -3122,7 +3379,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
     return SDValue();
 
   case 64: {
-    if (!isVMOV)
+    if (type != VMOVModImm)
       return SDValue();
     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
     uint64_t BitMask = 0xff;
@@ -3376,8 +3633,8 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
 
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 
-                                 const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+                                             const ARMSubtarget *ST) const {
   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
@@ -3391,10 +3648,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       EVT VmovVT;
       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
                                       SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, VmovVT, VT.is128BitVector(), true);
+                                      DAG, VmovVT, VT.is128BitVector(),
+                                      VMOVModImm);
       if (Val.getNode()) {
         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
-        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
       }
 
       // Try an immediate VMVN.
@@ -3402,10 +3660,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                              ((1LL << SplatBitSize) - 1));
       Val = isNEONModifiedImm(NegatedImm,
                                       SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, VmovVT, VT.is128BitVector(), false);
+                                      DAG, VmovVT, VT.is128BitVector(),
+                                      VMVNModImm);
       if (Val.getNode()) {
         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
-        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
       }
     }
   }
@@ -3439,26 +3698,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
 
-  if (EnableARMVDUPsplat) {
-    // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
-    // i32 and try again.
-    if (usesOnlyOneValue && EltSize <= 32) {
-      if (!isConstant)
-        return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
-      if (VT.getVectorElementType().isFloatingPoint()) {
-        SmallVector<SDValue, 8> Ops;
-        for (unsigned i = 0; i < NumElts; ++i)
-          Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, 
-                                    Op.getOperand(i)));
-        SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
-                                  NumElts);
-        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 
-                           LowerBUILD_VECTOR(Val, DAG, ST));
-      }
-      SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
+  // i32 and try again.
+  if (usesOnlyOneValue && EltSize <= 32) {
+    if (!isConstant)
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+    if (VT.getVectorElementType().isFloatingPoint()) {
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i < NumElts; ++i)
+        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+                                  Op.getOperand(i)));
+      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+      Val = LowerBUILD_VECTOR(Val, DAG, ST);
       if (Val.getNode())
-        return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
     }
+    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+    if (Val.getNode())
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
   }
 
   // If all elements are constants and the case above didn't get hit, fall back
@@ -3467,10 +3725,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   if (isConstant)
     return SDValue();
 
-  if (!EnableARMVDUPsplat) {
-    // Use VDUP for non-constant splats.
-    if (usesOnlyOneValue && EltSize <= 32)
-      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+  if (NumElts >= 4) {
+    SDValue shuffle = ReconstructShuffle(Op, DAG);
+    if (shuffle != SDValue())
+      return shuffle;
   }
 
   // Vectors with 32- or 64-bit elements can be built by directly assigning
@@ -3483,14 +3742,144 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0; i < NumElts; ++i)
-      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
+      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   }
 
   return SDValue();
 }
 
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SmallVector<SDValue, 2> SourceVecs;
+  SmallVector<unsigned, 2> MinElts;
+  SmallVector<unsigned, 2> MaxElts;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = Op.getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+      // A shuffle can only come from building a vector from various
+      // elements of other vectors.
+      return SDValue();
+    }
+
+    // Record this extraction against the appropriate vector if possible...
+    SDValue SourceVec = V.getOperand(0);
+    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+    bool FoundSource = false;
+    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+      if (SourceVecs[j] == SourceVec) {
+        if (MinElts[j] > EltNo)
+          MinElts[j] = EltNo;
+        if (MaxElts[j] < EltNo)
+          MaxElts[j] = EltNo;
+        FoundSource = true;
+        break;
+      }
+    }
+
+    // Or record a new source if not...
+    if (!FoundSource) {
+      SourceVecs.push_back(SourceVec);
+      MinElts.push_back(EltNo);
+      MaxElts.push_back(EltNo);
+    }
+  }
+
+  // Currently only do something sane when at most two source vectors
+  // involved.
+  if (SourceVecs.size() > 2)
+    return SDValue();
+
+  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+  int VEXTOffsets[2] = {0, 0};
+
+  // This loop extracts the usage patterns of the source vectors
+  // and prepares appropriate SDValues for a shuffle if possible.
+  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    if (SourceVecs[i].getValueType() == VT) {
+      // No VEXT necessary
+      ShuffleSrcs[i] = SourceVecs[i];
+      VEXTOffsets[i] = 0;
+      continue;
+    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+      // It probably isn't worth padding out a smaller vector just to
+      // break it down again in a shuffle.
+      return SDValue();
+    }
+
+    // Since only 64-bit and 128-bit vectors are legal on ARM and
+    // we've eliminated the other cases...
+    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+           "unexpected vector sizes in ReconstructShuffle");
+
+    if (MaxElts[i] - MinElts[i] >= NumElts) {
+      // Span too large for a VEXT to cope
+      return SDValue();
+    }
+
+    if (MinElts[i] >= NumElts) {
+      // The extraction can just take the second half
+      VEXTOffsets[i] = NumElts;
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                   SourceVecs[i],
+                                   DAG.getIntPtrConstant(NumElts));
+    } else if (MaxElts[i] < NumElts) {
+      // The extraction can just take the first half
+      VEXTOffsets[i] = 0;
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                   SourceVecs[i],
+                                   DAG.getIntPtrConstant(0));
+    } else {
+      // An actual VEXT is needed
+      VEXTOffsets[i] = MinElts[i];
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                     SourceVecs[i],
+                                     DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                     SourceVecs[i],
+                                     DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
+    }
+  }
+
+  SmallVector<int, 8> Mask;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Entry = Op.getOperand(i);
+    if (Entry.getOpcode() == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    SDValue ExtractVec = Entry.getOperand(0);
+    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+                                          .getOperand(1))->getSExtValue();
+    if (ExtractVec == SourceVecs[0]) {
+      Mask.push_back(ExtractElt - VEXTOffsets[0]);
+    } else {
+      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+    }
+  }
+
+  // Final check before we try to produce nonsense...
+  if (isShuffleMaskLegal(Mask, VT))
+    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+                                &Mask[0]);
+
+  return SDValue();
+}
+
 /// isShuffleMaskLegal - Targets can use this to indicate that they only
 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
@@ -3706,8 +4095,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     // registers are defined to use, and since i64 is not legal.
     EVT EltVT = EVT::getFloatingPointVT(EltSize);
     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
-    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
-    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0; i < NumElts; ++i) {
       if (ShuffleMask[i] < 0)
@@ -3719,21 +4108,26 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
                                                   MVT::i32)));
     }
     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   }
 
   return SDValue();
 }
 
 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Vec = Op.getOperand(0);
+  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
   SDValue Lane = Op.getOperand(1);
-  assert(VT == MVT::i32 &&
-         Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
-         "unexpected type for custom-lowering vector extract");
-  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+  if (!isa<ConstantSDNode>(Lane))
+    return SDValue();
+
+  SDValue Vec = Op.getOperand(0);
+  if (Op.getValueType() == MVT::i32 &&
+      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+  }
+
+  return Op;
 }
 
 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
@@ -3747,25 +4141,123 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   SDValue Op1 = Op.getOperand(1);
   if (Op0.getOpcode() != ISD::UNDEF)
     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
-                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
+                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
                       DAG.getIntPtrConstant(0));
   if (Op1.getOpcode() != ISD::UNDEF)
     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
-                      DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
+                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
                       DAG.getIntPtrConstant(1));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
+  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+                                   bool isSigned) {
+  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+  EVT VT = N->getValueType(0);
+  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+    SDNode *BVN = N->getOperand(0).getNode();
+    if (BVN->getValueType(0) != MVT::v4i32 ||
+        BVN->getOpcode() != ISD::BUILD_VECTOR)
+      return false;
+    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    unsigned HiElt = 1 - LoElt;
+    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+      return false;
+    if (isSigned) {
+      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+        return true;
+    } else {
+      if (Hi0->isNullValue() && Hi1->isNullValue())
+        return true;
+    }
+    return false;
+  }
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDNode *Elt = N->getOperand(i).getNode();
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      unsigned HalfSize = EltSize / 2;
+      if (isSigned) {
+        int64_t SExtVal = C->getSExtValue();
+        if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+          return false;
+      } else {
+        if ((C->getZExtValue() >> HalfSize) != 0)
+          return false;
+      }
+      continue;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+    return true;
+  if (isExtendedBUILD_VECTOR(N, DAG, true))
+    return true;
+  return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+    return true;
+  if (isExtendedBUILD_VECTOR(N, DAG, false))
+    return true;
+  return false;
 }
 
-/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
-/// an extending load, return the unextended value.
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
 static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
   if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
     return N->getOperand(0);
-  LoadSDNode *LD = cast<LoadSDNode>(N);
-  return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
-                     LD->getBasePtr(), LD->getSrcValue(),
-                     LD->getSrcValueOffset(), LD->isVolatile(),
-                     LD->isNonTemporal(), LD->getAlignment());
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+                       LD->isNonTemporal(), LD->getAlignment());
+  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
+  // have been legalized as a BITCAST from v4i32.
+  if (N->getOpcode() == ISD::BITCAST) {
+    SDNode *BVN = N->getOperand(0).getNode();
+    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+  }
+  // Construct a new BUILD_VECTOR with elements truncated to half the size.
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+  EVT VT = N->getValueType(0);
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT TruncVT = MVT::getIntegerVT(EltSize);
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+    const APInt &CInt = C->getAPIntValue();
+    Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                     MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
 }
 
 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
@@ -3776,19 +4268,16 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   SDNode *N0 = Op.getOperand(0).getNode();
   SDNode *N1 = Op.getOperand(1).getNode();
   unsigned NewOpc = 0;
-  if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
-      (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+  if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
     NewOpc = ARMISD::VMULLs;
-  } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
-             (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+  else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
     NewOpc = ARMISD::VMULLu;
-  } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+  else if (VT == MVT::v2i64)
     // Fall through to expand this.  It is not legal.
     return SDValue();
-  } else {
+  else
     // Other vector multiplications are legal.
     return Op;
-  }
 
   // Legalize to a VMULL instruction.
   DebugLoc DL = Op.getDebugLoc();
@@ -3801,6 +4290,181 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
 }
 
+static SDValue 
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+  // Convert to float
+  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+  // Get reciprocal estimate.
+  // float4 recip = vrecpeq_f32(yf);
+  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+  // Because char has a smaller range than uchar, we can actually get away
+  // without any newton steps.  This requires that we use a weird bias
+  // of 0xb000, however (again, this has been exhaustively tested).
+  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+  Y = DAG.getConstant(0xb000, MVT::i32);
+  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+  // Convert back to short.
+  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+  return X;
+}
+
+static SDValue 
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+  SDValue N2;
+  // Convert to float.
+  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+  
+  // Use reciprocal estimate and one refinement step.
+  // float4 recip = vrecpeq_f32(yf);
+  // recip *= vrecpsq_f32(yf, recip);
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  // Because short has a smaller range than ushort, we can actually get away
+  // with only a single newton step.  This requires that we use a weird bias
+  // of 89, however (again, this has been exhaustively tested).
+  // float4 result = as_float4(as_int4(xf*recip) + 89);
+  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+  N1 = DAG.getConstant(89, MVT::i32);
+  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+  // Convert back to integer and return.
+  // return vmovn_s32(vcvt_s32_f32(result));
+  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+  return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+         "unexpected type for custom-lowering ISD::SDIV");
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2, N3;
+  
+  if (VT == MVT::v8i8) {
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+    
+    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(4));
+    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(4)); 
+    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(0));
+    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(0));
+
+    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+    N0 = LowerCONCAT_VECTORS(N0, DAG);
+    
+    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+    return N0;
+  }
+  return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+         "unexpected type for custom-lowering ISD::UDIV");
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2, N3;
+  
+  if (VT == MVT::v8i8) {
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+    
+    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(4));
+    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(4)); 
+    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(0));
+    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(0));
+    
+    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+    
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+    N0 = LowerCONCAT_VECTORS(N0, DAG);
+    
+    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 
+                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+                     N0);
+    return N0;
+  }
+  
+  // v4i16 sdiv ... Convert to float.
+  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+  // Use reciprocal estimate and two refinement steps.
+  // float4 recip = vrecpeq_f32(yf);
+  // recip *= vrecpsq_f32(yf, recip);
+  // recip *= vrecpsq_f32(yf, recip);
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  // Simply multiplying by the reciprocal estimate can leave us a few ulps
+  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+  // and that it will never cause us to return an answer too large).
+  // float4 result = as_float4(as_int4(xf*recip) + 89);
+  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+  N1 = DAG.getConstant(2, MVT::i32);
+  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+  // Convert back to integer and return.
+  // return vmovn_u32(vcvt_s32_f32(result));
+  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+  return N0;
+}
+
 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3816,6 +4480,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
+  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:
@@ -3826,9 +4491,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+  case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
                                                                Subtarget);
-  case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
+  case ISD::BITCAST:   return ExpandBITCAST(Op.getNode(), DAG);
   case ISD::SHL:
   case ISD::SRL:
   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
@@ -3843,6 +4509,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::MUL:           return LowerMUL(Op, DAG);
+  case ISD::SDIV:          return LowerSDIV(Op, DAG);
+  case ISD::UDIV:          return LowerUDIV(Op, DAG);
   }
   return SDValue();
 }
@@ -3857,12 +4525,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   default:
     llvm_unreachable("Don't know how to custom expand this!");
     break;
-  case ISD::BIT_CONVERT:
-    Res = ExpandBIT_CONVERT(N, DAG);
+  case ISD::BITCAST:
+    Res = ExpandBITCAST(N, DAG);
     break;
   case ISD::SRL:
   case ISD::SRA:
-    Res = LowerShift(N, DAG, Subtarget);
+    Res = Expand64BitShift(N, DAG, Subtarget);
     break;
   }
   if (Res.getNode())
@@ -3892,7 +4560,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   case 1:
     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
-    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
     break;
   case 2:
     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
@@ -4183,6 +4851,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   case ARM::BCCi64:
   case ARM::BCCZi64: {
+    // If there is an unconditional branch to the other successor, remove it.
+    BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
     // Compare both parts that make up the double comparison separately for
     // equality.
     bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
@@ -4341,10 +5012,6 @@ static SDValue PerformMULCombine(SDNode *N,
   if (Subtarget->isThumb1Only())
     return SDValue();
 
-  if (DAG.getMachineFunction().
-      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
-    return SDValue();
-
   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
     return SDValue();
 
@@ -4389,10 +5056,67 @@ static SDValue PerformMULCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformANDCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  // Attempt to use immediate-form VBIC
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN &&
+      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      EVT VbicVT;
+      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VbicVT, VT.is128BitVector(),
+                                      OtherModImm);
+      if (Val.getNode()) {
+        SDValue Input =
+          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
 static SDValue PerformORCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const ARMSubtarget *Subtarget) {
+  // Attempt to use immediate-form VORR
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN && Subtarget->hasNEON() &&
+      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      EVT VorrVT;
+      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VorrVT, VT.is128BitVector(),
+                                      OtherModImm);
+      if (Val.getNode()) {
+        SDValue Input =
+          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+      }
+    }
+  }
+
   // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
   // reasonable.
 
@@ -4400,7 +5124,6 @@ static SDValue PerformORCombine(SDNode *N,
   if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
     return SDValue();
 
-  SelectionDAG &DAG = DCI.DAG;
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   DebugLoc DL = N->getDebugLoc();
   // 1) or (and A, mask), val => ARMbfi A, val, mask
@@ -4415,40 +5138,46 @@ static SDValue PerformORCombine(SDNode *N,
   if (N0.getOpcode() != ISD::AND)
     return SDValue();
 
-  EVT VT = N->getValueType(0);
   if (VT != MVT::i32)
     return SDValue();
 
+  SDValue N00 = N0.getOperand(0);
 
   // The value and the mask need to be constants so we can verify this is
   // actually a bitfield set. If the mask is 0xffff, we can do better
   // via a movt instruction, so don't use BFI in that case.
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-  if (!C)
+  SDValue MaskOp = N0.getOperand(1);
+  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+  if (!MaskC)
     return SDValue();
-  unsigned Mask = C->getZExtValue();
+  unsigned Mask = MaskC->getZExtValue();
   if (Mask == 0xffff)
     return SDValue();
   SDValue Res;
   // Case (1): or (and A, mask), val => ARMbfi A, val, mask
-  if ((C = dyn_cast<ConstantSDNode>(N1))) {
-    unsigned Val = C->getZExtValue();
-    if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N1C) {
+    unsigned Val = N1C->getZExtValue();
+    if ((Val & ~Mask) != Val)
       return SDValue();
-    Val >>= CountTrailingZeros_32(~Mask);
 
-    Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
-                      DAG.getConstant(Val, MVT::i32),
-                      DAG.getConstant(Mask, MVT::i32));
+    if (ARM::isBitFieldInvertedMask(Mask)) {
+      Val >>= CountTrailingZeros_32(~Mask);
 
-    // Do not add new nodes to DAG combiner worklist.
-    DCI.CombineTo(N, Res, false);
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+                        DAG.getConstant(Val, MVT::i32),
+                        DAG.getConstant(Mask, MVT::i32));
+
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    }
   } else if (N1.getOpcode() == ISD::AND) {
     // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
-    C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
-    if (!C)
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C)
       return SDValue();
-    unsigned Mask2 = C->getZExtValue();
+    unsigned Mask2 = N11C->getZExtValue();
 
     if (ARM::isBitFieldInvertedMask(Mask) &&
         ARM::isBitFieldInvertedMask(~Mask2) &&
@@ -4462,10 +5191,11 @@ static SDValue PerformORCombine(SDNode *N,
       unsigned lsb = CountTrailingZeros_32(Mask2);
       Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
                         DAG.getConstant(lsb, MVT::i32));
-      Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
                         DAG.getConstant(Mask, MVT::i32));
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
+      return SDValue();
     } else if (ARM::isBitFieldInvertedMask(~Mask) &&
                ARM::isBitFieldInvertedMask(Mask2) &&
                (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
@@ -4476,40 +5206,472 @@ static SDValue PerformORCombine(SDNode *N,
         return SDValue();
       // 2b
       unsigned lsb = CountTrailingZeros_32(Mask);
-      Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
                         DAG.getConstant(lsb, MVT::i32));
       Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
                                 DAG.getConstant(Mask2, MVT::i32));
       // Do not add new nodes to DAG combiner worklist.
       DCI.CombineTo(N, Res, false);
+      return SDValue();
     }
   }
 
+  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+      ARM::isBitFieldInvertedMask(~Mask)) {
+    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+    // where lsb(mask) == #shamt and masked bits of B are known zero.
+    SDValue ShAmt = N00.getOperand(1);
+    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+    unsigned LSB = CountTrailingZeros_32(Mask);
+    if (ShAmtC != LSB)
+      return SDValue();
+
+    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+                      DAG.getConstant(~Mask, MVT::i32));
+
+    // Do not add new nodes to DAG combiner worklist.
+    DCI.CombineTo(N, Res, false);
+  }
+
+  return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
+/// C1 & C2 == C1.
+static SDValue PerformBFICombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N1 = N->getOperand(1);
+  if (N1.getOpcode() == ISD::AND) {
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C)
+      return SDValue();
+    unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned Mask2 = N11C->getZExtValue();
+    if ((Mask & Mask2) == Mask2)
+      return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+                             N->getOperand(0), N1.getOperand(0),
+                             N->getOperand(2));
+  }
   return SDValue();
 }
 
 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
 /// ARMISD::VMOVRRD.
 static SDValue PerformVMOVRRDCombine(SDNode *N,
-                                   TargetLowering::DAGCombinerInfo &DCI) {
-  // fmrrd(fmdrr x, y) -> x,y
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  // vmovrrd(vmovdrr x, y) -> x,y
   SDValue InDouble = N->getOperand(0);
   if (InDouble.getOpcode() == ARMISD::VMOVDRR)
     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
   return SDValue();
 }
 
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() == ISD::BITCAST)
+    Op0 = Op0.getOperand(0);
+  if (Op1.getOpcode() == ISD::BITCAST)
+    Op1 = Op1.getOperand(0);
+  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+      Op0.getNode() == Op1.getNode() &&
+      Op0.getResNo() == 0 && Op1.getResNo() == 1)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                       N->getValueType(0), Op0.getOperand(0));
+  return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI) {
+  // Bitcast an i64 store extracted from a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  SDValue StVal = St->getValue();
+  if (!ISD::isNormalStore(St) || St->isVolatile() ||
+      StVal.getValueType() != MVT::i64 ||
+      StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = StVal.getDebugLoc();
+  SDValue IntVec = StVal.getOperand(0);
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+                                 IntVec.getValueType().getVectorNumElements());
+  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                               Vec, StVal.getOperand(1));
+  dl = N->getDebugLoc();
+  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+  // Make the DAGCombiner fold the bitcasts.
+  DCI.AddToWorklist(Vec.getNode());
+  DCI.AddToWorklist(ExtElt.getNode());
+  DCI.AddToWorklist(V.getNode());
+  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+                      St->getPointerInfo(), St->isVolatile(),
+                      St->isNonTemporal(), St->getAlignment(),
+                      St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+  unsigned NumElts = N->getValueType(0).getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDNode *Elt = N->getOperand(i).getNode();
+    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+      return true;
+  }
+  return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+                                          TargetLowering::DAGCombinerInfo &DCI){
+  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
+  // into a pair of GPRs, which is fine when the value is used as a scalar,
+  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+  SelectionDAG &DAG = DCI.DAG;
+  if (N->getNumOperands() == 2) {
+    SDValue RV = PerformVMOVDRRCombine(N, DAG);
+    if (RV.getNode())
+      return RV;
+  }
+
+  // Load i64 elements as f64 values so that type legalization does not split
+  // them up into i32 values.
+  EVT VT = N->getValueType(0);
+  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+    return SDValue();
+  DebugLoc dl = N->getDebugLoc();
+  SmallVector<SDValue, 8> Ops;
+  unsigned NumElts = VT.getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+    Ops.push_back(V);
+    // Make the DAGCombiner fold the bitcast.
+    DCI.AddToWorklist(V.getNode());
+  }
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+                                       TargetLowering::DAGCombinerInfo &DCI) {
+  // Bitcast an i64 load inserted into a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
+  EVT VT = N->getValueType(0);
+  SDNode *Elt = N->getOperand(1).getNode();
+  if (VT.getVectorElementType() != MVT::i64 ||
+      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+                                 VT.getVectorNumElements());
+  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+  // Make the DAGCombiner fold the bitcasts.
+  DCI.AddToWorklist(Vec.getNode());
+  DCI.AddToWorklist(V.getNode());
+  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+                               Vec, V, N->getOperand(2));
+  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+  // The LLVM shufflevector instruction does not require the shuffle mask
+  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
+  // operands do not match the mask length, they are extended by concatenating
+  // them with undef vectors.  That is probably the right thing for other
+  // targets, but for NEON it is better to concatenate two double-register
+  // size vector operands into a single quad-register size vector.  Do that
+  // transformation here:
+  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
+  //   shuffle(concat(v1, v2), undef)
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+      Op0.getNumOperands() != 2 ||
+      Op1.getNumOperands() != 2)
+    return SDValue();
+  SDValue Concat0Op1 = Op0.getOperand(1);
+  SDValue Concat1Op1 = Op1.getOperand(1);
+  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+      Concat1Op1.getOpcode() != ISD::UNDEF)
+    return SDValue();
+  // Skip the transformation if any of the types are illegal.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = N->getValueType(0);
+  if (!TLI.isTypeLegal(VT) ||
+      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+      !TLI.isTypeLegal(Concat1Op1.getValueType()))
+    return SDValue();
+
+  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                                  Op0.getOperand(0), Op1.getOperand(0));
+  // Translate the shuffle mask.
+  SmallVector<int, 16> NewMask;
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfElts = NumElts/2;
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  for (unsigned n = 0; n < NumElts; ++n) {
+    int MaskElt = SVN->getMaskElt(n);
+    int NewElt = -1;
+    if (MaskElt < (int)HalfElts)
+      NewElt = MaskElt;
+    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+      NewElt = HalfElts + MaskElt - NumElts;
+    NewMask.push_back(NewElt);
+  }
+  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+                              DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+  SDValue Addr = N->getOperand(AddrOpIdx);
+
+  // Search for a use of the address operand that is an increment.
+  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() != ISD::ADD ||
+        UI.getUse().getResNo() != Addr.getResNo())
+      continue;
+
+    // Check that the add is independent of the load/store.  Otherwise, folding
+    // it would create a cycle.
+    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+      continue;
+
+    // Find the new opcode for the updating load/store.
+    bool isLoad = true;
+    bool isLaneOp = false;
+    unsigned NewOpc = 0;
+    unsigned NumVecs = 0;
+    if (isIntrinsic) {
+      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+      switch (IntNo) {
+      default: assert(0 && "unexpected intrinsic for Neon base update");
+      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
+        NumVecs = 1; break;
+      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
+        NumVecs = 2; break;
+      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
+        NumVecs = 3; break;
+      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
+        NumVecs = 4; break;
+      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+        NumVecs = 2; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+        NumVecs = 3; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+        NumVecs = 4; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
+        NumVecs = 1; isLoad = false; break;
+      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
+        NumVecs = 2; isLoad = false; break;
+      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
+        NumVecs = 3; isLoad = false; break;
+      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
+        NumVecs = 4; isLoad = false; break;
+      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+        NumVecs = 2; isLoad = false; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+        NumVecs = 3; isLoad = false; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+        NumVecs = 4; isLoad = false; isLaneOp = true; break;
+      }
+    } else {
+      isLaneOp = true;
+      switch (N->getOpcode()) {
+      default: assert(0 && "unexpected opcode for Neon base update");
+      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+      }
+    }
+
+    // Find the size of memory referenced by the load/store.
+    EVT VecTy;
+    if (isLoad)
+      VecTy = N->getValueType(0);
+    else 
+      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+    if (isLaneOp)
+      NumBytes /= VecTy.getVectorNumElements();
+
+    // If the increment is a constant, it must match the memory ref size.
+    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+      uint64_t IncVal = CInc->getZExtValue();
+      if (IncVal != NumBytes)
+        continue;
+    } else if (NumBytes >= 3 * 16) {
+      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+      // separate instructions that make it harder to use a non-constant update.
+      continue;
+    }
+
+    // Create the new updating load/store node.
+    EVT Tys[6];
+    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+    unsigned n;
+    for (n = 0; n < NumResultVecs; ++n)
+      Tys[n] = VecTy;
+    Tys[n++] = MVT::i32;
+    Tys[n] = MVT::Other;
+    SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+    SmallVector<SDValue, 8> Ops;
+    Ops.push_back(N->getOperand(0)); // incoming chain
+    Ops.push_back(N->getOperand(AddrOpIdx));
+    Ops.push_back(Inc);
+    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+      Ops.push_back(N->getOperand(i));
+    }
+    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+                                           Ops.data(), Ops.size(),
+                                           MemInt->getMemoryVT(),
+                                           MemInt->getMemOperand());
+
+    // Update the uses.
+    std::vector<SDValue> NewResults;
+    for (unsigned i = 0; i < NumResultVecs; ++i) {
+      NewResults.push_back(SDValue(UpdN.getNode(), i));
+    }
+    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+    DCI.CombineTo(N, NewResults);
+    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+    break;
+  } 
+  return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  // vldN-dup instructions only support 64-bit vectors for N > 1.
+  if (!VT.is64BitVector())
+    return false;
+
+  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+  SDNode *VLD = N->getOperand(0).getNode();
+  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return false;
+  unsigned NumVecs = 0;
+  unsigned NewOpc = 0;
+  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+  if (IntNo == Intrinsic::arm_neon_vld2lane) {
+    NumVecs = 2;
+    NewOpc = ARMISD::VLD2DUP;
+  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+    NumVecs = 3;
+    NewOpc = ARMISD::VLD3DUP;
+  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+    NumVecs = 4;
+    NewOpc = ARMISD::VLD4DUP;
+  } else {
+    return false;
+  }
+
+  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+  // numbers match the load.
+  unsigned VLDLaneNo =
+    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+       UI != UE; ++UI) {
+    // Ignore uses of the chain result.
+    if (UI.getUse().getResNo() == NumVecs)
+      continue;
+    SDNode *User = *UI;
+    if (User->getOpcode() != ARMISD::VDUPLANE ||
+        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+      return false;
+  }
+
+  // Create the vldN-dup node.
+  EVT Tys[5];
+  unsigned n;
+  for (n = 0; n < NumVecs; ++n)
+    Tys[n] = VT;
+  Tys[n] = MVT::Other;
+  SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+                                           Ops, 2, VLDMemInt->getMemoryVT(),
+                                           VLDMemInt->getMemOperand());
+
+  // Update the uses.
+  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+       UI != UE; ++UI) {
+    unsigned ResNo = UI.getUse().getResNo();
+    // Ignore uses of the chain result.
+    if (ResNo == NumVecs)
+      continue;
+    SDNode *User = *UI;
+    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+  }
+
+  // Now the vldN-lane intrinsic is dead except for its chain result.
+  // Update uses of the chain.
+  std::vector<SDValue> VLDDupResults;
+  for (unsigned n = 0; n < NumVecs; ++n)
+    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+  DCI.CombineTo(VLD, VLDDupResults);
+
+  return true;
+}
+
 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
 /// ARMISD::VDUPLANE.
 static SDValue PerformVDUPLANECombine(SDNode *N,
                                       TargetLowering::DAGCombinerInfo &DCI) {
-  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
-  // redundant.
   SDValue Op = N->getOperand(0);
-  EVT VT = N->getValueType(0);
 
-  // Ignore bit_converts.
-  while (Op.getOpcode() == ISD::BIT_CONVERT)
+  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+  if (CombineVLDDUP(N, DCI))
+    return SDValue(N, 0);
+
+  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+  // redundant.  Ignore bit_converts for now; element sizes are checked below.
+  while (Op.getOpcode() == ISD::BITCAST)
     Op = Op.getOperand(0);
   if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
     return SDValue();
@@ -4521,11 +5683,11 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
   unsigned EltBits;
   if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
     EltSize = 8;
+  EVT VT = N->getValueType(0);
   if (EltSize > VT.getVectorElementType().getSizeInBits())
     return SDValue();
 
-  SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
-  return DCI.CombineTo(N, Res, false);
+  return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
 }
 
 /// getVShiftImm - Check if this is a valid build_vector for the immediate
@@ -4533,7 +5695,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
 /// build_vector must have the same constant integer value.
 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
   // Ignore bit_converts.
-  while (Op.getOpcode() == ISD::BIT_CONVERT)
+  while (Op.getOpcode() == ISD::BITCAST)
     Op = Op.getOperand(0);
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   APInt SplatBits, SplatUndef;
@@ -4747,7 +5909,8 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
 
   // Nothing to be done for scalar shifts.
-  if (! VT.isVector())
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!VT.isVector() || !TLI.isTypeLegal(VT))
     return SDValue();
 
   assert(ST->hasNEON() && "unexpected vector shift");
@@ -4793,7 +5956,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
 
     if (VT == MVT::i32 &&
         (EltVT == MVT::i8 || EltVT == MVT::i16) &&
-        TLI.isTypeLegal(Vec.getValueType())) {
+        TLI.isTypeLegal(Vec.getValueType()) &&
+        isa<ConstantSDNode>(Lane)) {
 
       unsigned Opc = 0;
       switch (N->getOpcode()) {
@@ -4906,7 +6070,14 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
+  case ISD::AND:        return PerformANDCombine(N, DCI);
+  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+  case ISD::STORE:      return PerformSTORECombine(N, DCI);
+  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
@@ -4916,20 +6087,42 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+  case ARMISD::VLD2DUP:
+  case ARMISD::VLD3DUP:
+  case ARMISD::VLD4DUP:
+    return CombineBaseUpdate(N, DCI);
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN:
+    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+    case Intrinsic::arm_neon_vld1:
+    case Intrinsic::arm_neon_vld2:
+    case Intrinsic::arm_neon_vld3:
+    case Intrinsic::arm_neon_vld4:
+    case Intrinsic::arm_neon_vld2lane:
+    case Intrinsic::arm_neon_vld3lane:
+    case Intrinsic::arm_neon_vld4lane:
+    case Intrinsic::arm_neon_vst1:
+    case Intrinsic::arm_neon_vst2:
+    case Intrinsic::arm_neon_vst3:
+    case Intrinsic::arm_neon_vst4:
+    case Intrinsic::arm_neon_vst2lane:
+    case Intrinsic::arm_neon_vst3lane:
+    case Intrinsic::arm_neon_vst4lane:
+      return CombineBaseUpdate(N, DCI);
+    default: break;
+    }
+    break;
   }
   return SDValue();
 }
 
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
-  if (!Subtarget->hasV6Ops())
-    // Pre-v6 does not support unaligned mem access.
-    return false;
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+                                                          EVT VT) const {
+  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
 
-  // v6+ may or may not support unaligned mem access depending on the system
-  // configuration.
-  // FIXME: This is pretty conservative. Should we provide cmdline option to
-  // control the behaviour?
-  if (!Subtarget->isTargetDarwin())
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+  if (!Subtarget->allowsUnalignedMem())
     return false;
 
   switch (VT.getSimpleVT().SimpleTy) {
@@ -5143,7 +6336,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   if (!Subtarget->isThumb())
     return ARM_AM::getSOImmVal(Imm) != -1;
   if (Subtarget->isThumb2())
-    return ARM_AM::getT2SOImmVal(Imm) != -1; 
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
   return Imm >= 0 && Imm <= 255;
 }
 
@@ -5348,6 +6541,37 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
 //                           ARM Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  // Looking for "rev" which is V6+.
+  if (!Subtarget->hasV6Ops())
+    return false;
+
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::string AsmStr = IA->getAsmString();
+  SmallVector<StringRef, 4> AsmPieces;
+  SplitString(AsmStr, AsmPieces, ";\n");
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t,");
+
+    // rev $0, $1
+    if (AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+      const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+      if (Ty && Ty->getBitWidth() == 32)
+        return IntrinsicLowering::LowerToByteSwap(CI);
+    }
+    break;
+  }
+
+  return false;
+}
+
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 ARMTargetLowering::ConstraintType
@@ -5362,6 +6586,40 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'l':
+    if (type->isIntegerTy()) {
+      if (Subtarget->isThumb())
+        weight = CW_SpecificReg;
+      else
+        weight = CW_Register;
+    }
+    break;
+  case 'w':
+    if (type->isFloatingPointTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
 std::pair<unsigned, const TargetRegisterClass*>
 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
@@ -5664,3 +6922,63 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
     return ARM::getVFPf64Imm(Imm) != -1;
   return false;
 }
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                           const CallInst &I,
+                                           unsigned Intrinsic) const {
+  switch (Intrinsic) {
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    // Conservatively set memVT to the entire set of vectors loaded.
+    uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+    Info.vol = false; // volatile loads with NEON intrinsics not supported
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    // Conservatively set memVT to the entire set of vectors stored.
+    unsigned NumElts = 0;
+    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+      const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+      if (!ArgTy->isVectorTy())
+        break;
+      NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+    }
+    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+    Info.vol = false; // volatile stores with NEON intrinsics not supported
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
+  default:
+    break;
+  }
+
+  return false;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ba9ea7f15e7b..dc400c485ec6 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -34,6 +34,10 @@ namespace llvm {
 
       Wrapper,      // Wrapper - A wrapper node for TargetConstantPool,
                     // TargetExternalSymbol, and TargetGlobalAddress.
+      WrapperDYN,   // WrapperDYN - A wrapper node for TargetGlobalAddress in
+                    // DYN mode.
+      WrapperPIC,   // WrapperPIC - A wrapper node for TargetGlobalAddress in
+                    // PIC mode.
       WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
 
       CALL,         // Function call.
@@ -47,8 +51,6 @@ namespace llvm {
 
       PIC_ADD,      // Add with a PC operand and a PIC label.
 
-      AND,          // ARM "and" instruction that sets the 's' flag in CPSR.
-
       CMP,          // ARM compare instructions.
       CMPZ,         // ARM compare that sets only Z flag.
       CMPFP,        // ARM VFP compare instruction, sets FPSCR.
@@ -73,8 +75,9 @@ namespace llvm {
       VMOVRRD,      // double to two gprs.
       VMOVDRR,      // Two gprs to double.
 
-      EH_SJLJ_SETJMP,    // SjLj exception handling setjmp.
-      EH_SJLJ_LONGJMP,   // SjLj exception handling longjmp.
+      EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
+      EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
+      EH_SJLJ_DISPATCHSETUP,  // SjLj exception handling dispatch setup.
 
       TC_RETURN,    // Tail call return pseudo.
 
@@ -82,13 +85,20 @@ namespace llvm {
 
       DYN_ALLOC,    // Dynamic allocation on the stack.
 
-      MEMBARRIER,   // Memory barrier
-      SYNCBARRIER,  // Memory sync barrier
+      MEMBARRIER,   // Memory barrier (DMB)
+      MEMBARRIER_MCR, // Memory barrier (MCR)
+
+      PRELOAD,      // Preload
       
       VCEQ,         // Vector compare equal.
+      VCEQZ,        // Vector compare equal to zero.
       VCGE,         // Vector compare greater than or equal.
+      VCGEZ,        // Vector compare greater than or equal to zero.
+      VCLEZ,        // Vector compare less than or equal to zero.
       VCGEU,        // Vector compare unsigned greater than or equal.
       VCGT,         // Vector compare greater than.
+      VCGTZ,        // Vector compare greater than zero.
+      VCLTZ,        // Vector compare less than zero.
       VCGTU,        // Vector compare unsigned greater than.
       VTST,         // Vector test bits.
 
@@ -161,7 +171,38 @@ namespace llvm {
       FMIN,
 
       // Bit-field insert
-      BFI
+      BFI,
+      
+      // Vector OR with immediate
+      VORRIMM,
+      // Vector AND with NOT of immediate
+      VBICIMM,
+
+      // Vector load N-element structure to all lanes:
+      VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      VLD3DUP,
+      VLD4DUP,
+
+      // NEON loads with post-increment base updates:
+      VLD1_UPD,
+      VLD2_UPD,
+      VLD3_UPD,
+      VLD4_UPD,
+      VLD2LN_UPD,
+      VLD3LN_UPD,
+      VLD4LN_UPD,
+      VLD2DUP_UPD,
+      VLD3DUP_UPD,
+      VLD4DUP_UPD,
+
+      // NEON stores with post-increment base updates:
+      VST1_UPD,
+      VST2_UPD,
+      VST3_UPD,
+      VST4_UPD,
+      VST2LN_UPD,
+      VST3LN_UPD,
+      VST4LN_UPD
     };
   }
 
@@ -193,14 +234,16 @@ namespace llvm {
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                                     SelectionDAG &DAG) const;
 
-    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
     virtual MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
 
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
     /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
@@ -241,7 +284,15 @@ namespace llvm {
                                                 unsigned Depth) const;
 
 
+    virtual bool ExpandInlineAsm(CallInst *CI) const;
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
@@ -290,6 +341,9 @@ namespace llvm {
     /// materialize the FP immediate as a load from a constant pool.
     virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
 
+    virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                    const CallInst &I,
+                                    unsigned Intrinsic) const;
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
     findRepresentativeClass(EVT VT) const;
@@ -301,6 +355,8 @@ namespace llvm {
 
     const TargetRegisterInfo *RegInfo;
 
+    const InstrItineraryData *Itins;
+
     /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
     ///
     unsigned ARMPCLabelIndex;
@@ -329,6 +385,7 @@ namespace llvm {
                              ISD::ArgFlagsTy Flags) const;
     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                     const ARMSubtarget *Subtarget) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -350,6 +407,10 @@ namespace llvm {
     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 
+                              const ARMSubtarget *ST) const;
+
+    SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
@@ -393,6 +454,8 @@ namespace llvm {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
 
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                       SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
     SDValue getVFPCmp(SDValue LHS, SDValue RHS,
@@ -410,6 +473,13 @@ namespace llvm {
 
   };
   
+  enum NEONModImmType {
+    VMOVModImm,
+    VMVNModImm,
+    OtherModImm
+  };
+  
+  
   namespace ARM {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
   }
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 113cfffe61f9..765cba42d0bd 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,4 +1,4 @@
-//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -71,7 +71,7 @@ def NVTBLFrm      : Format<41>;
 
 // Misc flags.
 
-// the instruction has a Rn register operand.
+// The instruction has an Rn register operand.
 // UnaryDP - Indicates this is a unary data processing instruction, i.e.
 // it doesn't have a Rn operand.
 class UnaryDP    { bit isUnaryDataProc = 1; }
@@ -84,9 +84,10 @@ class Xform16Bit { bit canXformTo16Bit = 1; }
 // ARM Instruction flags.  These need to match ARMBaseInstrInfo.h.
 //
 
+// FIXME: Once the JIT is MC-ized, these can go away.
 // Addressing mode.
-class AddrMode<bits<4> val> {
-  bits<4> Value = val;
+class AddrMode<bits<5> val> {
+  bits<5> Value = val;
 }
 def AddrModeNone    : AddrMode<0>;
 def AddrMode1       : AddrMode<1>;
@@ -104,6 +105,7 @@ def AddrModeT2_i8   : AddrMode<12>;
 def AddrModeT2_so   : AddrMode<13>;
 def AddrModeT2_pc   : AddrMode<14>;
 def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12    : AddrMode<16>;
 
 // Instruction size.
 class SizeFlagVal<bits<3> val> {
@@ -134,7 +136,6 @@ def NeonDomain    : Domain<2>; // Instructions in Neon domain only
 def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
 
 //===----------------------------------------------------------------------===//
-
 // ARM special operands.
 //
 
@@ -143,6 +144,39 @@ def CondCodeOperand : AsmOperandClass {
   let SuperClasses = [];
 }
 
+def CCOutOperand : AsmOperandClass {
+  let Name = "CCOut";
+  let SuperClasses = [];
+}
+
+def MemBarrierOptOperand : AsmOperandClass {
+  let Name = "MemBarrierOpt";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemBarrierOptOperand";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+  let Name = "ProcIFlags";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseProcIFlagsOperand";
+}
+
+def MSRMaskOperand : AsmOperandClass {
+  let Name = "MSRMask";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMSRMaskOperand";
+}
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+  let PrintMethod = "printCPSIMod";
+}
+
+def iflags_op : Operand<i32> {
+  let PrintMethod = "printCPSIFlag";
+  let ParserMatchClass = ProcIFlagsOperand;
+}
+
 // ARM Predicate operand. Default to 14 = always (AL). Second part is CC
 // register whose default is 0 (no register).
 def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
@@ -153,16 +187,23 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
 
 // Conditional code result for instructions whose 's' bit is set, e.g. subs.
 def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+  let EncoderMethod = "getCCOutOpValue";
   let PrintMethod = "printSBitModifierOperand";
+  let ParserMatchClass = CCOutOperand;
 }
 
 // Same as cc_out except it defaults to setting CPSR.
 def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+  let EncoderMethod = "getCCOutOpValue";
   let PrintMethod = "printSBitModifierOperand";
+  let ParserMatchClass = CCOutOperand;
 }
 
 // ARM special operands for disassembly only.
 //
+def setend_op : Operand<i32> {
+  let PrintMethod = "printSetendOperand";
+}
 
 def cps_opt : Operand<i32> {
   let PrintMethod = "printCPSOptionOperand";
@@ -170,6 +211,7 @@ def cps_opt : Operand<i32> {
 
 def msr_mask : Operand<i32> {
   let PrintMethod = "printMSRMaskOperand";
+  let ParserMatchClass = MSRMaskOperand;
 }
 
 // A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
@@ -179,7 +221,6 @@ def neg_zero : Operand<i32> {
 }
 
 //===----------------------------------------------------------------------===//
-
 // ARM Instruction templates.
 //
 
@@ -198,14 +239,17 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
   bit isUnaryDataProc = 0;
   bit canXformTo16Bit = 0;
 
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
   // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
-  let TSFlags{3-0}   = AM.Value;
-  let TSFlags{6-4}   = SZ.Value;
-  let TSFlags{8-7}   = IndexModeBits;
-  let TSFlags{14-9}  = Form;
-  let TSFlags{15}    = isUnaryDataProc;
-  let TSFlags{16}    = canXformTo16Bit;
-  let TSFlags{18-17} = D.Value;
+  let TSFlags{4-0}   = AM.Value;
+  let TSFlags{7-5}   = SZ.Value;
+  let TSFlags{9-8}   = IndexModeBits;
+  let TSFlags{15-10} = Form;
+  let TSFlags{16}    = isUnaryDataProc;
+  let TSFlags{17}    = canXformTo16Bit;
+  let TSFlags{19-18} = D.Value;
 
   let Constraints = cstr;
   let Itinerary = itin;
@@ -225,25 +269,51 @@ class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
                 Format f, Domain d, string cstr, InstrItinClass itin>
   : InstTemplate<am, sz, im, f, d, cstr, itin>;
 
-class PseudoInst<dag oops, dag iops, InstrItinClass itin,
-                 string asm, list<dag> pattern>
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+  // FIXME: This really should derive from InstTemplate instead, as pseudos
+  //        don't need encoding information. TableGen doesn't like that
+  //        currently. Need to figure out why and fix it.
   : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
             "", itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
-  let AsmString = asm;
   let Pattern = pattern;
 }
 
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let SZ = sz;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let SZ = sz;
+  list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let SZ = sz;
+  list<Predicate> Predicates = [IsThumb2];
+}
 // Almost all ARM instructions are predicable.
 class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
         IndexMode im, Format f, InstrItinClass itin,
         string opc, string asm, string cstr,
         list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, "${p}", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
 }
@@ -270,9 +340,14 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
          string opc, string asm, string cstr,
          list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  bits<4> p; // Predicate operand
+  bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+  let Inst{31-28} = p;
+  let Inst{20} = s;
+
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
-  let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
 }
@@ -319,10 +394,6 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
        asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
-class ABXIx2<dag oops, dag iops, InstrItinClass itin,
-             string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin,
-       asm, "", pattern>;
 
 // BR_JT instructions
 class JTI<dag oops, dag iops, InstrItinClass itin,
@@ -335,19 +406,42 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
       opc, asm, "", pattern> {
+  bits<4> Rt;
+  bits<4> Rn;
   let Inst{27-23} = 0b00011;
   let Inst{22-21} = opcod;
   let Inst{20}    = 1;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
   let Inst{11-0}  = 0b111110011111;
 }
 class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
       opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rt;
+  bits<4> Rn;
   let Inst{27-23} = 0b00011;
   let Inst{22-21} = opcod;
   let Inst{20}    = 0;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
   let Inst{11-4}  = 0b11111001;
+  let Inst{3-0}   = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+  : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> Rn;
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = b;
+  let Inst{21-20} = 0b00;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+  let Inst{11-4} = 0b00001001;
+  let Inst{3-0} = Rt2;
 }
 
 // addrmode1 instructions
@@ -372,387 +466,125 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
 }
-class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern>;
-
-
-// addrmode2 loads and stores
-class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
-          string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{27-26} = 0b01;
-}
 
 // loads
-class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
-              string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
-              string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-
-// stores
-class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
-              string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
-              string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
 
-// Pre-indexed loads
-class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-
-// Pre-indexed stores
-class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+             Format f, InstrItinClass itin, string opc, string asm,
+             list<dag> pattern>
+  : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+      "", pattern> {
+  let Inst{27-25} = op;
+  let Inst{24} = 1;  // 24 == P
+  // 23 == U
+  let Inst{22} = isByte;
+  let Inst{21} = 0;  // 21 == W
+  let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin,
       opc, asm, cstr, pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-26} = 0b01;
-}
-
-// Post-indexed loads
-class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-26} = 0b01;
-}
-
-// Post-indexed stores
-class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 0; // B bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-26} = 0b01;
-}
-class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{22}    = 1; // B bit
-  let Inst{24}    = 0; // P bit
+  bits<4> Rt;
   let Inst{27-26} = 0b01;
+  let Inst{24}    = isPre; // P bit
+  let Inst{22}    = isByte; // B bit
+  let Inst{21}    = isPre; // W bit
+  let Inst{20}    = isLd; // L bit
+  let Inst{15-12} = Rt;
+}
+class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM2 store w/ two operands: (GPR, am2offset)
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<14> offset;
+  bits<4> Rn;
+  let Inst{25} = offset{13};
+  let Inst{23} = offset{12};
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = offset{11-0};
 }
 
 // addrmode3 instructions
-class AI3<dag oops, dag iops, Format f, InstrItinClass itin,
-          string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern>;
-class AXI3<dag oops, dag iops, Format f, InstrItinClass itin,
-           string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern>;
-
-// loads
-class AI3ldh<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 0; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AXI3ldh<dag oops, dag iops, Format f, InstrItinClass itin,
-              string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 0; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
-}
-class AI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin,
-              string opc, string asm, list<dag> pattern>
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
+  bits<14> addr;
+  bits<4> Rt;
   let Inst{27-25} = 0b000;
-}
-class AXI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin,
-               string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
-}
-class AI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin,
-              string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
+  let Inst{24}    = 1;            // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{21}    = 0;            // W bit
+  let Inst{20}    = op20;         // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+      opc, asm, cstr, pattern> {
+  bits<4> Rt;
   let Inst{27-25} = 0b000;
-}
-class AXI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin,
-               string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
-}
-class AI3ldd<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
+  let Inst{24}    = isPre;        // P bit
+  let Inst{21}    = isPre;        // W bit
+  let Inst{20}    = op20;         // L bit
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{7-4}   = op;
+}
+class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM3 store w/ two operands: (GPR, am3offset)
+  bits<14> offset;
+  bits<4> Rt;
+  bits<4> Rn;
   let Inst{27-25} = 0b000;
+  let Inst{23}    = offset{8};
+  let Inst{22}    = offset{9};
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = offset{7-4};  // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = offset{3-0};  // imm3_0/Rm
 }
 
 // stores
-class AI3sth<dag oops, dag iops, Format f, InstrItinClass itin,
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 0; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AXI3sth<dag oops, dag iops, Format f, InstrItinClass itin,
-              string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 0; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
-}
-class AI3std<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
-      opc, asm, "", pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 1; // P bit
+  bits<14> addr;
+  bits<4> Rt;
   let Inst{27-25} = 0b000;
+  let Inst{24}    = 1;            // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{21}    = 0;            // W bit
+  let Inst{20}    = 0;            // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
 }
 
-// Pre-indexed loads
-class AI3ldhpr<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 0; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AI3ldshpr<dag oops, dag iops, Format f, InstrItinClass itin,
-                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin,
-                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 1; // W bit
-  let Inst{24}    = 1; // P bit
-  let Inst{27-25} = 0b000;
-}
-
-
 // Pre-indexed stores
 class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
@@ -781,60 +613,6 @@ class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{27-25} = 0b000;
 }
 
-// Post-indexed loads
-class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
-               string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 0; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin,
-                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 1; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin,
-                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr,pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 1; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-25} = 0b000;
-}
-class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin,
-             string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
-      opc, asm, cstr, pattern> {
-  let Inst{4}     = 1;
-  let Inst{5}     = 0; // H bit
-  let Inst{6}     = 1; // S bit
-  let Inst{7}     = 1;
-  let Inst{20}    = 0; // L bit
-  let Inst{21}    = 0; // W bit
-  let Inst{24}    = 0; // P bit
-  let Inst{27-25} = 0b000;
-}
-
 // Post-indexed stores
 class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
@@ -864,21 +642,17 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
 }
 
 // addrmode4 instructions
-class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
-             string asm, string cstr, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin,
-       asm, cstr, pattern> {
-  let Inst{20}    = 1; // L bit
-  let Inst{22}    = 0; // S bit
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+           string asm, string cstr, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> {
+  bits<4>  p;
+  bits<16> regs;
+  bits<4>  Rn;
+  let Inst{31-28} = p;
   let Inst{27-25} = 0b100;
-}
-class AXI4st<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
-             string asm, string cstr, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin,
-       asm, cstr, pattern> {
-  let Inst{20}    = 0; // L bit
   let Inst{22}    = 0; // S bit
-  let Inst{27-25} = 0b100;
+  let Inst{19-16} = Rn;
+  let Inst{15-0}  = regs;
 }
 
 // Unsigned multiply, multiply-accumulate instructions.
@@ -899,24 +673,65 @@ class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
 }
 
 // Most significant word multiply
-class AMul2I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
-             string opc, string asm, list<dag> pattern>
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+             InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
-  let Inst{7-4}   = 0b1001;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{7-4}   = opc7_4;
   let Inst{20}    = 1;
   let Inst{27-21} = opcod;
+  let Inst{19-16} = Rd;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
 }
 
 // SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
-class AMulxyI<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
-              string opc, string asm, list<dag> pattern>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
+  bits<4> Rn;
+  bits<4> Rm;
   let Inst{4}     = 0;
   let Inst{7}     = 1;
   let Inst{20}    = 0;
   let Inst{27-21} = opcod;
+  let Inst{6-5}   = bit6_5;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
 }
 
 // Extend instructions.
@@ -924,16 +739,47 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
       opc, asm, "", pattern> {
+  // All AExtI instructions have Rd and Rm register operands.
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{15-12} = Rd;
+  let Inst{3-0}   = Rm;
   let Inst{7-4}   = 0b0111;
+  let Inst{9-8}   = 0b00;
   let Inst{27-20} = opcod;
 }
 
 // Misc Arithmetic instructions.
-class AMiscA1I<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
-               string opc, string asm, list<dag> pattern>
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
       opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
   let Inst{27-20} = opcod;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rd;
+  let Inst{11-8}  = 0b1111;
+  let Inst{7-4}   = opc7_4;
+  let Inst{3-0}   = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<8> sh;
+  let Inst{27-20} = opcod;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = sh{7-3};
+  let Inst{6}     = tb;
+  let Inst{5-4}   = 0b01;
+  let Inst{3-0}   = Rm;
 }
 
 //===----------------------------------------------------------------------===//
@@ -950,12 +796,9 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 }
 
 //===----------------------------------------------------------------------===//
-//
 // Thumb Instruction Format Definitions.
 //
 
-// TI - Thumb instruction.
-
 class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -966,6 +809,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   list<Predicate> Predicates = [IsThumb];
 }
 
+// TI - Thumb instruction.
 class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
   : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
 
@@ -986,6 +830,13 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
   let Inst{12}    = opcod3;
 }
 
+// Move to/from coprocessor instructions
+class T1Cop<dag oops, dag iops, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>,
+    Encoding, Requires<[IsThumb, HasV6]> {
+  let Inst{31-28} = 0b1110;
+}
+
 // BR_JT instructions
 class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
            list<dag> pattern>
@@ -999,7 +850,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = iops;
   let AsmString = asm;
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb1Only];
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
 }
 
 class T1I<dag oops, dag iops, InstrItinClass itin,
@@ -1008,9 +859,6 @@ class T1I<dag oops, dag iops, InstrItinClass itin,
 class T1Ix2<dag oops, dag iops, InstrItinClass itin,
             string asm, list<dag> pattern>
   : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
-class T1JTI<dag oops, dag iops, InstrItinClass itin,
-            string asm, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
 
 // Two-address instructions
 class T1It<dag oops, dag iops, InstrItinClass itin,
@@ -1025,9 +873,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = !con(oops, (outs s_cc_out:$s));
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb1Only];
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
 }
 
 class T1sI<dag oops, dag iops, InstrItinClass itin,
@@ -1038,7 +886,7 @@ class T1sI<dag oops, dag iops, InstrItinClass itin,
 class T1sIt<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
   : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
-             "$lhs = $dst", pattern>;
+             "$Rn = $Rdn", pattern>;
 
 // Thumb1 instruction that can be predicated.
 class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1047,9 +895,9 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, "${p}", asm);
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb1Only];
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
 }
 
 class T1pI<dag oops, dag iops, InstrItinClass itin,
@@ -1060,17 +908,8 @@ class T1pI<dag oops, dag iops, InstrItinClass itin,
 class T1pIt<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
   : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
-             "$lhs = $dst", pattern>;
+             "$Rn = $Rdn", pattern>;
 
-class T1pI1<dag oops, dag iops, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeT1_1, Size2Bytes, itin, opc, asm, "", pattern>;
-class T1pI2<dag oops, dag iops, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeT1_2, Size2Bytes, itin, opc, asm, "", pattern>;
-class T1pI4<dag oops, dag iops, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>;
 class T1pIs<dag oops, dag iops,
             InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
@@ -1099,7 +938,7 @@ class T1DataProcessing<bits<4> opcode> : Encoding16 {
 // A6.2.3 Special data instructions and branch and exchange encoding.
 class T1Special<bits<4> opcode> : Encoding16 {
   let Inst{15-10} = 0b010001;
-  let Inst{9-6} = opcode;
+  let Inst{9-6}   = opcode;
 }
 
 // A6.2.4 Load/store single data item encoding.
@@ -1107,12 +946,37 @@ class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
   let Inst{15-12} = opA;
   let Inst{11-9}  = opB;
 }
-class T1LdSt<bits<3> opB>     : T1LoadStore<0b0101, opB>;
-class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
-class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
-class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
 class T1LdStSP<bits<3> opB>   : T1LoadStore<0b1001, opB>; // SP relative
 
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+//   0b0110 => Immediate, 4 bytes
+//   0b1000 => Immediate, 2 bytes
+//   0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+                     InstrItinClass itin, string opc, string asm,
+                     list<dag> pattern>
+  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+    T1LoadStore<0b0101, opcode> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{8-6} = addr{5-3};    // Rm
+  let Inst{5-3} = addr{2-0};    // Rn
+  let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+    T1LoadStore<opA, {opB,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-6} = addr{7-3};   // imm5
+  let Inst{5-3}  = addr{2-0};   // Rn
+  let Inst{2-0}  = Rt;
+}
+
 // A6.2.5 Miscellaneous 16-bit instructions encoding.
 class T1Misc<bits<7> opcode> : Encoding16 {
   let Inst{15-12} = 0b1011;
@@ -1126,7 +990,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, "${p}", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
 }
@@ -1134,16 +998,19 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
 // input operand since by default it's a zero register. It will become an
 // implicit def once it's "flipped".
-// 
+//
 // FIXME: This uses unified syntax so {s} comes before {p}. We should make it
 // more consistent.
 class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+  let Inst{20} = s;
+
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
-  let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
 }
@@ -1168,7 +1035,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = iops;
   let AsmString = asm;
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb1Only];
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
 }
 
 class T2I<dag oops, dag iops, InstrItinClass itin,
@@ -1186,17 +1053,23 @@ class T2Iso<dag oops, dag iops, InstrItinClass itin,
 class T2Ipc<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
-class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin,
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "",
             pattern> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<13> addr;
+  let Inst{31-25} = 0b1110100;
   let Inst{24}    = P;
-  let Inst{23}    = ?; // The U bit.
+  let Inst{23}    = addr{8};
   let Inst{22}    = 1;
   let Inst{21}    = W;
-  let Inst{20}    = load;
+  let Inst{20}    = isLoad;
+  let Inst{19-16} = addr{12-9};
+  let Inst{15-12} = Rt{3-0};
+  let Inst{11-8}  = Rt2{3-0};
+  let Inst{7-0}   = addr{7-0};
 }
 
 class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1210,9 +1083,11 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
             string asm, list<dag> pattern>
   : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
 
-class T2Ix2<dag oops, dag iops, InstrItinClass itin,
-            string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
+// Move to/from coprocessor instructions
+class T2Cop<dag oops, dag iops, string asm, list<dag> pattern>
+  : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> {
+  let Inst{31-28} = 0b1111;
+}
 
 // Two-address instructions
 class T2XIt<dag oops, dag iops, InstrItinClass itin,
@@ -1227,7 +1102,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
   : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, "${p}", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb2];
   let Inst{31-27} = 0b11111;
@@ -1240,29 +1115,25 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
   // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
   let Inst{10}    = pre; // The P bit.
   let Inst{8}     = 1; // The W bit.
-}
 
-// Helper class for disassembly only
-// A6.3.16 & A6.3.17
-// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
-class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
-             InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-24} = 0b011;
-  let Inst{23}    = long;
-  let Inst{22-20} = op22_20;
-  let Inst{7-4}   = op7_4;
+  bits<9> addr;
+  let Inst{7-0} = addr{7-0};
+  let Inst{9}   = addr{8}; // Sign bit
+
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{15-12} = Rt{3-0};
+  let Inst{19-16} = Rn{3-0};
 }
 
 // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
 class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsThumb1Only, HasV5T];
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
 }
 
 // T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
 class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsThumb1Only];
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
 }
 
 // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
@@ -1281,10 +1152,13 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
            IndexMode im, Format f, InstrItinClass itin,
            string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let AsmString = !strconcat(opc, "${p}", asm);
   let Pattern = pattern;
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
   list<Predicate> Predicates = [HasVFP2];
 }
 
@@ -1293,17 +1167,22 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
             IndexMode im, Format f, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
   let OutOperandList = oops;
   let InOperandList = iops;
   let AsmString = asm;
   let Pattern = pattern;
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
   list<Predicate> Predicates = [HasVFP2];
 }
 
 class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
   : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
-         opc, asm, "", pattern>;
+         opc, asm, "", pattern> {
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
 
 // ARM VFP addrmode5 loads and stores
 class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
@@ -1311,12 +1190,24 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            string opc, string asm, list<dag> pattern>
   : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
+  // Instruction operands.
+  bits<5>  Dd;
+  bits<13> addr;
+
+  // Encode instruction operands.
+  let Inst{23}    = addr{8};      // U (add = (U == '1'))
+  let Inst{22}    = Dd{4};
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Dd{3-0};
+  let Inst{7-0}   = addr{7-0};    // imm8
+
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-24} = opcod1;
   let Inst{21-20} = opcod2;
-  let Inst{11-8}  = 0b1011;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
 
-  // 64-bit loads & stores operate on both NEON and VFP pipelines.
+  // Loads & stores operate on both NEON and VFP pipelines.
   let D = VFPNeonDomain;
 }
 
@@ -1325,10 +1216,36 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            string opc, string asm, list<dag> pattern>
   : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
+  // Instruction operands.
+  bits<5>  Sd;
+  bits<13> addr;
+
+  // Encode instruction operands.
+  let Inst{23}    = addr{8};      // U (add = (U == '1'))
+  let Inst{22}    = Sd{0};
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Sd{4-1};
+  let Inst{7-0}   = addr{7-0};    // imm8
+
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-24} = opcod1;
   let Inst{21-20} = opcod2;
-  let Inst{11-8}  = 0b1010;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+
+  // Loads & stores operate on both NEON and VFP pipelines.
+  let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+                     list<dag> pattern>
+  : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+            cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasVFP2];
 }
 
 // Load / store multiple
@@ -1336,21 +1253,40 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
   : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
           VFPLdStMulFrm, itin, asm, cstr, pattern> {
+  // Instruction operands.
+  bits<4>  Rn;
+  bits<13> regs;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Rn;
+  let Inst{22}    = regs{12};
+  let Inst{15-12} = regs{11-8};
+  let Inst{7-0}   = regs{7-0};
+
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
-  let Inst{11-8}  = 0b1011;
-
-  // 64-bit loads & stores operate on both NEON and VFP pipelines.
-  let D = VFPNeonDomain;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
 }
 
 class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
   : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
           VFPLdStMulFrm, itin, asm, cstr, pattern> {
+  // Instruction operands.
+  bits<4> Rn;
+  bits<13> regs;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Rn;
+  let Inst{22}    = regs{8};
+  let Inst{15-12} = regs{12-9};
+  let Inst{7-0}   = regs{7-0};
+
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
-  let Inst{11-8}  = 0b1010;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
 }
 
 // Double precision, unary
@@ -1358,10 +1294,21 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
            string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+
   let Inst{27-23} = opcod1;
   let Inst{21-20} = opcod2;
   let Inst{19-16} = opcod3;
-  let Inst{11-8}  = 0b1011;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
   let Inst{7-6}   = opcod4;
   let Inst{4}     = opcod5;
 }
@@ -1371,24 +1318,25 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
            dag iops, InstrItinClass itin, string opc, string asm,
            list<dag> pattern>
   : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
-  let Inst{27-23} = opcod1;
-  let Inst{21-20} = opcod2;
-  let Inst{11-8}  = 0b1011;
-  let Inst{6}     = op6;
-  let Inst{4}     = op4;
-}
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dn;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{19-16} = Dn{3-0};
+  let Inst{7}     = Dn{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
 
-// Double precision, binary, VML[AS] (for additional predicate)
-class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
-           dag iops, InstrItinClass itin, string opc, string asm,
-           list<dag> pattern>
-  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
   let Inst{27-23} = opcod1;
   let Inst{21-20} = opcod2;
-  let Inst{11-8}  = 0b1011;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
   let Inst{6}     = op6;
   let Inst{4}     = op4;
-  list<Predicate> Predicates = [HasVFP2, UseVMLx];
 }
 
 // Single precision, unary
@@ -1396,16 +1344,27 @@ class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
            string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
   let Inst{27-23} = opcod1;
   let Inst{21-20} = opcod2;
   let Inst{19-16} = opcod3;
-  let Inst{11-8}  = 0b1010;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
   let Inst{7-6}   = opcod4;
   let Inst{4}     = opcod5;
 }
 
-// Single precision unary, if no NEON
-// Same as ASuI except not available if NEON is enabled
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
 class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
             bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
             string asm, list<dag> pattern>
@@ -1418,20 +1377,47 @@ class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
 class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
            InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
   let Inst{27-23} = opcod1;
   let Inst{21-20} = opcod2;
-  let Inst{11-8}  = 0b1010;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
   let Inst{6}     = op6;
   let Inst{4}     = op4;
 }
 
-// Single precision binary, if no NEON
-// Same as ASbI except not available if NEON is enabled
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
 class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
             dag iops, InstrItinClass itin, string opc, string asm,
             list<dag> pattern>
   : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
   list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
 }
 
 // VFP conversion instructions
@@ -1502,9 +1488,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
   : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(
-                     !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
-                     !strconcat("\t", asm));
+  let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
 }
@@ -1516,7 +1500,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
   : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
+  let AsmString = !strconcat(opc, "${p}", "\t", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
 }
@@ -1531,6 +1515,25 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
   let Inst{21-20} = op21_20;
   let Inst{11-8}  = op11_8;
   let Inst{7-4}   = op7_4;
+
+  let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+
+  bits<5> Vd;
+  bits<6> Rn;
+  bits<4> Rm;
+
+  let Inst{22}    = Vd{4};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{19-16} = Rn{3-0};
+  let Inst{3-0}   = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+          dt, asm, cstr, pattern> {
+  bits<3> lane;
 }
 
 class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
@@ -1541,11 +1544,22 @@ class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
   list<Predicate> Predicates = [HasNEON];
 }
 
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+                  list<dag> pattern>
+  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+            itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
 class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string dt, string asm, string cstr, list<dag> pattern>
   : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
           pattern> {
   let Inst{31-25} = 0b1111001;
+  let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
 }
 
 class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1553,6 +1567,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
   : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
            cstr, pattern> {
   let Inst{31-25} = 0b1111001;
+  let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
 }
 
 // NEON "one register and a modified immediate" format.
@@ -1569,6 +1584,16 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
   let Inst{6}     = op6;
   let Inst{5}     = op5;
   let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<13> SIMM;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{24}    = SIMM{7};
+  let Inst{18-16} = SIMM{6-4};
+  let Inst{3-0}   = SIMM{3-0};
 }
 
 // NEON 2 vector register format.
@@ -1584,6 +1609,15 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
   let Inst{11-7}  = op11_7;
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
 }
 
 // Same as N2V except it doesn't have a datatype suffix.
@@ -1599,6 +1633,15 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
   let Inst{11-7}  = op11_7;
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
 }
 
 // NEON 2 vector register with immediate.
@@ -1612,6 +1655,17 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
   let Inst{7}    = op7;
   let Inst{6}    = op6;
   let Inst{4}    = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+  bits<6> SIMM;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+  let Inst{21-16} = SIMM{5-0};
 }
 
 // NEON 3 vector register format.
@@ -1625,6 +1679,18 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
   let Inst{11-8}  = op11_8;
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
 }
 
 // Same as N3V except it doesn't have a data type suffix.
@@ -1639,13 +1705,25 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
   let Inst{11-8}  = op11_8;
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
 }
 
 // NEON VMOVs between scalar and core registers.
 class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string dt, string asm, list<dag> pattern>
-  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
+  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
             "", itin> {
   let Inst{27-20} = opcod1;
   let Inst{11-8}  = opcod2;
@@ -1654,11 +1732,21 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
 
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
-  let AsmString = !strconcat(
-                     !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
-                     !strconcat("\t", asm));
+  let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
   let Pattern = pattern;
   list<Predicate> Predicates = [HasNEON];
+
+  let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+
+  bits<5> V;
+  bits<4> R;
+  bits<4> p;
+  bits<4> lane;
+
+  let Inst{31-28} = p{3-0};
+  let Inst{7}     = V{4};
+  let Inst{19-16} = V{3-0};
+  let Inst{15-12} = R{3-0};
 }
 class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                 dag oops, dag iops, InstrItinClass itin,
@@ -1687,6 +1775,15 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
   let Inst{11-7}  = 0b11000;
   let Inst{6}     = op6;
   let Inst{4}     = 0;
+
+  bits<5> Vd;
+  bits<5> Vm;
+  bits<4> lane;
+
+  let Inst{22}     = Vd{4};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{5}     = Vm{4};
+  let Inst{3-0} = Vm{3-0};
 }
 
 // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index ba228ffac8ed..6f48d967f919 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -33,13 +33,13 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
   default: break;
   case ARM::LDR_PRE:
   case ARM::LDR_POST:
-    return ARM::LDR;
+    return ARM::LDRi12;
   case ARM::LDRH_PRE:
   case ARM::LDRH_POST:
     return ARM::LDRH;
   case ARM::LDRB_PRE:
   case ARM::LDRB_POST:
-    return ARM::LDRB;
+    return ARM::LDRBi12;
   case ARM::LDRSH_PRE:
   case ARM::LDRSH_POST:
     return ARM::LDRSH;
@@ -48,39 +48,14 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
     return ARM::LDRSB;
   case ARM::STR_PRE:
   case ARM::STR_POST:
-    return ARM::STR;
+    return ARM::STRi12;
   case ARM::STRH_PRE:
   case ARM::STRH_POST:
     return ARM::STRH;
   case ARM::STRB_PRE:
   case ARM::STRB_POST:
-    return ARM::STRB;
+    return ARM::STRBi12;
   }
 
   return 0;
 }
-
-void ARMInstrInfo::
-reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-              unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
-              const TargetRegisterInfo &TRI) const {
-  DebugLoc dl = Orig->getDebugLoc();
-  unsigned Opcode = Orig->getOpcode();
-  switch (Opcode) {
-  default:
-    break;
-  case ARM::MOVi2pieces: {
-    RI.emitLoadConstPool(MBB, I, dl,
-                         DestReg, SubIdx,
-                         Orig->getOperand(1).getImm(),
-                         (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
-                         Orig->getOperand(3).getReg());
-    MachineInstr *NewMI = prior(I);
-    NewMI->getOperand(0).setSubReg(SubIdx);
-    return;
-  }
-  }
-
-  return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI);
-}
-
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 4563ffea7b9c..f2c7bdc31be9 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -32,11 +32,6 @@ public:
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
 
-  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                     unsigned DestReg, unsigned SubIdx,
-                     const MachineInstr *Orig,
-                     const TargetRegisterInfo &TRI) const;
-
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index e66f9b9ad0ac..c827ce3da97c 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,10 +58,9 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
                                                  SDTCisInt<2>]>;
 def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
 
-def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIER    : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERMCR  : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
@@ -70,33 +69,35 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
 
 // Node definitions.
 def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
+def ARMWrapperDYN    : SDNode<"ARMISD::WrapperDYN",  SDTIntUnaryOp>;
+def ARMWrapperPIC    : SDNode<"ARMISD::WrapperPIC",  SDTIntUnaryOp>;
 def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
 
 def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
-                              [SDNPHasChain, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOutGlue]>;
 def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                SDNPVariadic]>;
 def ARMcall_pred    : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                SDNPVariadic]>;
 def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                SDNPVariadic]>;
 
 def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
-                              [SDNPHasChain, SDNPOptInFlag]>;
+                              [SDNPHasChain, SDNPOptInGlue]>;
 
 def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
-                              [SDNPInFlag]>;
+                              [SDNPInGlue]>;
 def ARMcneg          : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
-                              [SDNPInFlag]>;
+                              [SDNPInGlue]>;
 
 def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
-                              [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
 
 def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
                               [SDNPHasChain]>;
@@ -106,40 +107,38 @@ def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
 def ARMBcci64        : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
                               [SDNPHasChain]>;
 
-def ARMand           : SDNode<"ARMISD::AND", SDT_ARMAnd,
-                              [SDNPOutFlag]>;
-
 def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
-                              [SDNPOutFlag]>;
+                              [SDNPOutGlue]>;
 
 def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
-                              [SDNPOutFlag, SDNPCommutative]>;
+                              [SDNPOutGlue, SDNPCommutative]>;
 
 def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
 
-def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
-def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
-def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInFlag ]>;
+def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInGlue ]>;
 
 def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
 def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
                                SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
 def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
-                                SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+                               SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+                               SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
 
 def ARMMemBarrier     : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
-def ARMSyncBarrier    : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
-                               [SDNPHasChain]>;
-def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
-                               [SDNPHasChain]>;
-def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
+def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+                               [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
 
 def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
 
-def ARMtcret         : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, 
-                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
+def ARMtcret         : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
 
 def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
@@ -147,34 +146,40 @@ def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
-def HasV4T           : Predicate<"Subtarget->hasV4TOps()">;
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate;
 def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
 def HasV5T           : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6            : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate;
+def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7            : Predicate<"Subtarget->hasV7Ops()">;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2          : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3          : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON          : Predicate<"Subtarget->hasNEON()">;
-def HasDivide        : Predicate<"Subtarget->hasDivide()">;
-def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def HasDB            : Predicate<"Subtarget->hasDataBarrier()">;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate;
+def HasDivide        : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+                                 AssemblerPredicate;
+def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
+                                 AssemblerPredicate;
+def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
+                                 AssemblerPredicate;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb          : Predicate<"Subtarget->isThumb()">;
+def IsThumb          : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
 def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2         : Predicate<"Subtarget->isThumb2()">;
-def IsARM            : Predicate<"!Subtarget->isThumb()">;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate;
+def IsARM            : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate;
 def IsDarwin         : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin      : Predicate<"!Subtarget->isTargetDarwin()">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
 def UseMovt          : Predicate<"Subtarget->useMovt()">;
 def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
-def UseVMLx          : Predicate<"Subtarget->useVMLx()">;
+def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -199,12 +204,6 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
 }]>;
 
-// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
-def rot_imm : PatLeaf<(i32 imm), [{
-  int32_t v = (int32_t)N->getZExtValue();
-  return v == 8 || v == 16 || v == 24;
-}]>;
-
 /// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
 def imm1_15 : PatLeaf<(i32 imm), [{
   return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
@@ -217,12 +216,12 @@ def imm16_31 : PatLeaf<(i32 imm), [{
 
 def so_imm_neg :
   PatLeaf<(imm), [{
-    return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
+    return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
   }], so_imm_neg_XFORM>;
 
 def so_imm_not :
   PatLeaf<(imm), [{
-    return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1;
+    return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
   }], so_imm_not_XFORM>;
 
 // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
@@ -230,15 +229,6 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
   return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
 }]>;
 
-/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
-/// e.g., 0xf000ffff
-def bf_inv_mask_imm : Operand<i32>,
-                      PatLeaf<(imm), [{
-  return ARM::isBitFieldInvertedMask(N->getZExtValue());
-}] > {
-  let PrintMethod = "printBitfieldInvMaskImmOperand";
-}
-
 /// Split a 32-bit immediate into two 16 bit parts.
 def hi16 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
@@ -273,28 +263,103 @@ def sube_live_carry :
   PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
   [{return N->hasAnyUseOfValue(1);}]>;
 
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+  return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+  return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+  return hasNoVMLxHazardUse(N);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
 //
 
 // Branch target.
-def brtarget : Operand<OtherVT>;
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+  let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+  let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+  // Encoded the same as branch targets.
+  let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+  // Encoded the same as branch targets.
+  let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
 
 // A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass {
+  let Name = "RegList";
+  let SuperClasses = [];
+}
+
+def DPRRegListAsmOperand : AsmOperandClass {
+  let Name = "DPRRegList";
+  let SuperClasses = [];
+}
+
+def SPRRegListAsmOperand : AsmOperandClass {
+  let Name = "SPRRegList";
+  let SuperClasses = [];
+}
+
 def reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = RegListAsmOperand;
   let PrintMethod = "printRegisterList";
 }
 
-// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
-def cpinst_operand : Operand<i32> {
-  let PrintMethod = "printCPInstOperand";
+def dpr_reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = DPRRegListAsmOperand;
+  let PrintMethod = "printRegisterList";
 }
 
-def jtblock_operand : Operand<i32> {
-  let PrintMethod = "printJTBlockOperand";
+def spr_reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = SPRRegListAsmOperand;
+  let PrintMethod = "printRegisterList";
 }
-def jt2block_operand : Operand<i32> {
-  let PrintMethod = "printJT2BlockOperand";
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+  let PrintMethod = "printCPInstOperand";
 }
 
 // Local PC labels.
@@ -302,6 +367,22 @@ def pclabel : Operand<i32> {
   let PrintMethod = "printPCLabel";
 }
 
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+  let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+  let EncoderMethod = "getNEONVcvtImm32OpValue";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+    int32_t v = (int32_t)N->getZExtValue();
+    return v == 8 || v == 16 || v == 24; }]> {
+  let EncoderMethod = "getRotImmOpValue";
+}
+
 // shift_imm: An integer that encodes a shift amount and the type of shift
 // (currently either asr or lsl) using the same encoding used for the
 // immediates in so_reg operands.
@@ -313,73 +394,120 @@ def shift_imm : Operand<i32> {
 def so_reg : Operand<i32>,    // reg reg imm
              ComplexPattern<i32, 3, "SelectShifterOperandReg",
                             [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+def shift_so_reg : Operand<i32>,    // reg reg imm
+                   ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+                                  [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getSORegOpValue";
   let PrintMethod = "printSORegOperand";
   let MIOperandInfo = (ops GPR, GPR, i32imm);
 }
 
 // so_imm - Match a 32-bit shifter_operand immediate operand, which is an
-// 8-bit immediate rotated by an arbitrary number of bits.  so_imm values are
-// represented in the imm field in the same 12-bit form that they are encoded
-// into so_imm instructions: the 8-bit immediate is the least significant bits
-// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+// 8-bit immediate rotated by an arbitrary number of bits.
 def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+  let EncoderMethod = "getSOImmOpValue";
   let PrintMethod = "printSOImmOperand";
 }
 
 // Break so_imm's up into two pieces.  This handles immediates with up to 16
 // bits set in them.  This uses so_imm2part to match and so_imm2part_[12] to
 // get the first/second pieces.
-def so_imm2part : Operand<i32>,
-                  PatLeaf<(imm), [{
+def so_imm2part : PatLeaf<(imm), [{
       return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-    }]> {
-  let PrintMethod = "printSOImm2PartOperand";
-}
+}]>;
 
-def so_imm2part_1 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+  if (Subtarget->hasV6T2Ops())
+    return true;
+  return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]>;
 
-def so_imm2part_2 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
+  return (int32_t)N->getZExtValue() < 32;
 }]>;
 
-def so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
-      return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue());
-    }]> {
-  let PrintMethod = "printSOImm2PartOperand";
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
+  return (int32_t)N->getZExtValue() < 32;
+}]> {
+  let EncoderMethod = "getImmMinusOneOpValue";
 }
 
-def so_neg_imm2part_1 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getSOImmTwoPartFirst(-(int)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
+// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
+// The imm is split into imm{15-12}, imm{11-0}
+//
+def i32imm_hilo16 : Operand<i32> {
+  let EncoderMethod = "getHiLo16ImmOpValue";
+}
 
-def so_neg_imm2part_2 : SDNodeXForm<imm, [{
-  unsigned V = ARM_AM::getSOImmTwoPartSecond(-(int)N->getZExtValue());
-  return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+                      PatLeaf<(imm), [{
+  return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+  let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+  let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
 
-/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
-  return (int32_t)N->getZExtValue() < 32;
+/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
+def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{
+  return isInt<5>(N->getSExtValue());
 }]>;
 
+/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
+def width_imm : Operand<i32>, PatLeaf<(imm), [{
+  return N->getSExtValue() > 0 &&  N->getSExtValue() <= 32;
+}] > {
+  let EncoderMethod = "getMsbOpValue";
+}
+
 // Define ARM specific addressing modes.
 
-// addrmode2 := reg +/- reg shop imm
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+  // 12-bit immediate operand. Note that instructions using this encode
+  // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+  // immediate values are as normal.
+
+  let EncoderMethod = "getAddrModeImm12OpValue";
+  let PrintMethod = "printAddrModeImm12Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+                  ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+  let EncoderMethod = "getLdStSORegOpValue";
+  // FIXME: Simplify the printer
+  let PrintMethod = "printAddrMode2Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
 // addrmode2 := reg +/- imm12
+//           := reg +/- reg shop imm
 //
 def addrmode2 : Operand<i32>,
                 ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+  let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
 def am2offset : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+                ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+                [], [SDNPWantRoot]> {
+  let EncoderMethod = "getAddrMode2OffsetOpValue";
   let PrintMethod = "printAddrMode2OffsetOperand";
   let MIOperandInfo = (ops GPR, i32imm);
 }
@@ -389,22 +517,29 @@ def am2offset : Operand<i32>,
 //
 def addrmode3 : Operand<i32>,
                 ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+  let EncoderMethod = "getAddrMode3OpValue";
   let PrintMethod = "printAddrMode3Operand";
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
 def am3offset : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+                ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+                               [], [SDNPWantRoot]> {
+  let EncoderMethod = "getAddrMode3OffsetOpValue";
   let PrintMethod = "printAddrMode3OffsetOperand";
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
-// addrmode4 := reg, <mode|W>
+// ldstm_mode := {ia, ib, da, db}
 //
-def addrmode4 : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode4", []> {
-  let PrintMethod = "printAddrMode4Operand";
-  let MIOperandInfo = (ops GPR:$addr, i32imm);
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+  let EncoderMethod = "getLdStmModeOpValue";
+  let PrintMethod = "printLdStmModeOperand";
+}
+
+def MemMode5AsmOperand : AsmOperandClass {
+  let Name = "MemMode5";
+  let SuperClasses = [];
 }
 
 // addrmode5 := reg +/- imm8*4
@@ -413,19 +548,32 @@ def addrmode5 : Operand<i32>,
                 ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let PrintMethod = "printAddrMode5Operand";
   let MIOperandInfo = (ops GPR:$base, i32imm);
+  let ParserMatchClass = MemMode5AsmOperand;
+  let EncoderMethod = "getAddrMode5OpValue";
 }
 
-// addrmode6 := reg with optional writeback
+// addrmode6 := reg with optional alignment
 //
 def addrmode6 : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode6", []> {
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6AddressOpValue";
 }
 
 def am6offset : Operand<i32> {
   let PrintMethod = "printAddrMode6OffsetOperand";
   let MIOperandInfo = (ops GPR);
+  let EncoderMethod = "getAddrMode6OffsetOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6DupAddressOpValue";
 }
 
 // addrmodepc := pc + reg
@@ -440,6 +588,28 @@ def nohash_imm : Operand<i32> {
   let PrintMethod = "printNoHashImmediate";
 }
 
+def CoprocNumAsmOperand : AsmOperandClass {
+  let Name = "CoprocNum";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+  let Name = "CoprocReg";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseCoprocRegOperand";
+}
+
+def p_imm : Operand<i32> {
+  let PrintMethod = "printPImmediate";
+  let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def c_imm : Operand<i32> {
+  let PrintMethod = "printCImmediate";
+  let ParserMatchClass = CoprocRegAsmOperand;
+}
+
 //===----------------------------------------------------------------------===//
 
 include "ARMInstrFormats.td"
@@ -450,55 +620,93 @@ include "ARMInstrFormats.td"
 
 /// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
 /// binop that produces a value.
-multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
-                        bit Commutable = 0> {
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                        PatFrag opnode, bit Commutable = 0> {
   // The register-immediate version is re-materializable. This is useful
   // in particular for taking the address of a local.
   let isReMaterializable = 1 in {
-  def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               IIC_iALUi, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
+  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+               iii, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
     let Inst{25} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = imm;
   }
   }
-  def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               IIC_iALUr, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
-    let Inst{11-4} = 0b00000000;
+  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+               iir, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
     let Inst{25} = 0;
     let isCommutable = Commutable;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
   }
-  def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               IIC_iALUsr, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+  def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+               iis, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
     let Inst{25} = 0;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = shift;
   }
 }
 
 /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
 /// instruction modifies the CPSR register.
-let Defs = [CPSR] in {
-multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
-                         bit Commutable = 0> {
-  def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-               IIC_iALUi, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
-    let Inst{20} = 1;
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> {
+  def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+               iii, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
     let Inst{25} = 1;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = imm;
   }
-  def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-               IIC_iALUr, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+  def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+               iir, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
     let isCommutable = Commutable;
-    let Inst{11-4} = 0b00000000;
-    let Inst{20} = 1;
     let Inst{25} = 0;
-  }
-  def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-               IIC_iALUsr, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
     let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+               iis, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
     let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = shift;
   }
 }
 }
@@ -507,146 +715,233 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
 /// patterns. Similar to AsI1_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
 let isCompare = 1, Defs = [CPSR] in {
-multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
-                       bit Commutable = 0> {
-  def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
-               opc, "\t$a, $b",
-               [(opnode GPR:$a, so_imm:$b)]> {
-    let Inst{20} = 1;
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode, bit Commutable = 0> {
+  def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+               opc, "\t$Rn, $imm",
+               [(opnode GPR:$Rn, so_imm:$imm)]> {
+    bits<4> Rn;
+    bits<12> imm;
     let Inst{25} = 1;
-  }
-  def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
-               opc, "\t$a, $b",
-               [(opnode GPR:$a, GPR:$b)]> {
-    let Inst{11-4} = 0b00000000;
     let Inst{20} = 1;
-    let Inst{25} = 0;
-    let isCommutable = Commutable;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-0} = imm;
   }
-  def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
-               opc, "\t$a, $b",
-               [(opnode GPR:$a, so_reg:$b)]> {
+  def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+               opc, "\t$Rn, $Rm",
+               [(opnode GPR:$Rn, GPR:$Rm)]> {
+    bits<4> Rn;
+    bits<4> Rm;
+    let isCommutable = Commutable;
+    let Inst{25} = 0;
     let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+               opc, "\t$Rn, $shift",
+               [(opnode GPR:$Rn, so_reg:$shift)]> {
+    bits<4> Rn;
+    bits<12> shift;
     let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-0} = shift;
   }
 }
 }
 
-/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 /// FIXME: Remove the 'r' variant. Its rot_imm is zero.
-multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
-  def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
-                 IIC_iUNAr, opc, "\t$dst, $src",
-                 [(set GPR:$dst, (opnode GPR:$src))]>,
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def r     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm",
+                 [(set GPR:$Rd, (opnode GPR:$Rm))]>,
               Requires<[IsARM, HasV6]> {
-    let Inst{11-10} = 0b00;
+    bits<4> Rd;
+    bits<4> Rm;
     let Inst{19-16} = 0b1111;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = 0b00;
+    let Inst{3-0}   = Rm;
   }
-  def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
-                 IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
-                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+  def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+                 [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
               Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<2> rot;
     let Inst{19-16} = 0b1111;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = rot;
+    let Inst{3-0}   = Rm;
   }
 }
 
-multiclass AI_unary_rrot_np<bits<8> opcod, string opc> {
-  def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
-                 IIC_iUNAr, opc, "\t$dst, $src",
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+  def r     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm",
                  [/* For disassembly only; pattern left blank */]>,
               Requires<[IsARM, HasV6]> {
-    let Inst{11-10} = 0b00;
     let Inst{19-16} = 0b1111;
+    let Inst{11-10} = 0b00;
   }
-  def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
-                 IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
+  def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
                  [/* For disassembly only; pattern left blank */]>,
               Requires<[IsARM, HasV6]> {
+    bits<2> rot;
     let Inst{19-16} = 0b1111;
+    let Inst{11-10} = rot;
   }
 }
 
-/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
-multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
-  def rr     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
-                  IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
-                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def rr     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+                  [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
                Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<4> Rn;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
     let Inst{11-10} = 0b00;
+    let Inst{9-4}   = 0b000111;
+    let Inst{3-0}   = Rm;
+  }
+  def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                             rot_imm:$rot),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [(set GPR:$Rd, (opnode GPR:$Rn,
+                                          (rotr GPR:$Rm, rot_imm:$rot)))]>,
+                  Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<4> Rn;
+    bits<2> rot;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = rot;
+    let Inst{9-4}   = 0b000111;
+    let Inst{3-0}   = Rm;
   }
-  def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
-                                              i32imm:$rot),
-                  IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
-                  [(set GPR:$dst, (opnode GPR:$LHS,
-                                          (rotr GPR:$RHS, rot_imm:$rot)))]>,
-                  Requires<[IsARM, HasV6]>;
 }
 
 // For disassembly only.
-multiclass AI_bin_rrot_np<bits<8> opcod, string opc> {
-  def rr     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
-                  IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+  def rr     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
                   [/* For disassembly only; pattern left blank */]>,
                Requires<[IsARM, HasV6]> {
     let Inst{11-10} = 0b00;
   }
-  def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
-                                              i32imm:$rot),
-                  IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
+  def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                             rot_imm:$rot),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
                   [/* For disassembly only; pattern left blank */]>,
-                  Requires<[IsARM, HasV6]>;
+                  Requires<[IsARM, HasV6]> {
+    bits<4> Rn;
+    bits<2> rot;
+    let Inst{19-16} = Rn;
+    let Inst{11-10} = rot;
+  }
 }
 
 /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
 let Uses = [CPSR] in {
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                              bit Commutable = 0> {
-  def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
                Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
     let Inst{25} = 1;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = imm;
   }
-  def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
                Requires<[IsARM]> {
-    let isCommutable = Commutable;
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
     let Inst{11-4} = 0b00000000;
     let Inst{25} = 0;
+    let isCommutable = Commutable;
+    let Inst{3-0} = Rm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
   }
-  def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+  def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
                Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
     let Inst{25} = 0;
+    let Inst{11-0} = shift;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
   }
 }
 // Carry setting variants
-let Defs = [CPSR] in {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
 multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
                              bit Commutable = 0> {
-  def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+  def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
                Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = imm;
     let Inst{20} = 1;
     let Inst{25} = 1;
   }
-  def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+  def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
                Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
     let Inst{11-4} = 0b00000000;
+    let isCommutable = Commutable;
+    let Inst{3-0} = Rm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
-  def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+  def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
                Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{11-0} = shift;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
@@ -654,6 +949,62 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
 }
 }
 
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+           InstrItinClass iir, PatFrag opnode> {
+  // Note: We use the complex addrmode_imm12 rather than just an input
+  // GPR and a constrained immediate so that we can use this to match
+  // frame index references and avoid matching constant pool references.
+  def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+                   AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+                  [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+    bits<4>  Rt;
+    bits<17> addr;
+    let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+  def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+                  AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+                 [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+    bits<4>  Rt;
+    bits<17> shift;
+    let Inst{23}    = shift{12};    // U (add = ('U' == 1))
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+}
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+           InstrItinClass iir, PatFrag opnode> {
+  // Note: We use the complex addrmode_imm12 rather than just an input
+  // GPR and a constrained immediate so that we can use this to match
+  // frame index references and avoid matching constant pool references.
+  def i12 : AI2ldst<0b010, 0, isByte, (outs),
+                   (ins GPR:$Rt, addrmode_imm12:$addr),
+                   AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+                  [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+    bits<4> Rt;
+    bits<17> addr;
+    let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+  def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+                  AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+                 [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+    bits<4> Rt;
+    bits<17> shift;
+    let Inst{23}    = shift{12};    // U (add = ('U' == 1))
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -669,8 +1020,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
 let neverHasSideEffects = 1, isNotDuplicable = 1 in
 def CONSTPOOL_ENTRY :
 PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
-                    i32imm:$size), NoItinerary,
-           "${instid:label} ${cpidx:cpentry}", []>;
+                    i32imm:$size), NoItinerary, []>;
 
 // FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
 // from removing one half of the matched pairs. That breaks PEI, which assumes
@@ -678,12 +1028,10 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
 let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def ADJCALLSTACKUP :
 PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
-           "${:comment} ADJCALLSTACKUP $amt1",
            [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
 
 def ADJCALLSTACKDOWN :
 PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
-           "${:comment} ADJCALLSTACKDOWN $amt",
            [(ARMcallseq_start timm:$amt)]>;
 }
 
@@ -691,6 +1039,7 @@ def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV6T2]> {
   let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
   let Inst{7-0} = 0b00000000;
 }
 
@@ -698,6 +1047,7 @@ def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV6T2]> {
   let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
   let Inst{7-0} = 0b00000001;
 }
 
@@ -705,6 +1055,7 @@ def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV6T2]> {
   let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
   let Inst{7-0} = 0b00000010;
 }
 
@@ -712,6 +1063,7 @@ def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV6T2]> {
   let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
   let Inst{7-0} = 0b00000011;
 }
 
@@ -719,14 +1071,22 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
              "\t$dst, $a, $b",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
   let Inst{27-20} = 0b01101000;
   let Inst{7-4} = 0b1011;
+  let Inst{11-8} = 0b1111;
 }
 
 def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV6T2]> {
   let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
   let Inst{7-0} = 0b00000100;
 }
 
@@ -735,154 +1095,174 @@ def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
 def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
               [/* For disassembly only; pattern left blank */]>,
            Requires<[IsARM]> {
+  bits<16> val;
+  let Inst{3-0} = val{3-0};
+  let Inst{19-8} = val{15-4};
   let Inst{27-20} = 0b00010010;
   let Inst{7-4} = 0b0111;
 }
 
-// Change Processor State is a system instruction -- for disassembly only.
-// The singleton $opt operand contains the following information:
-// opt{4-0} = mode from Inst{4-0}
-// opt{5} = changemode from Inst{17}
-// opt{8-6} = AIF from Inst{8-6}
-// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt",
-              [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsARM]> {
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class CPS<dag iops, string asm_ops>
+  : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+        [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+  bits<2> imod;
+  bits<3> iflags;
+  bits<5> mode;
+  bit M;
+
   let Inst{31-28} = 0b1111;
   let Inst{27-20} = 0b00010000;
-  let Inst{16} = 0;
-  let Inst{5} = 0;
+  let Inst{19-18} = imod;
+  let Inst{17}    = M; // Enabled if mode is set;
+  let Inst{16}    = 0;
+  let Inst{8-6}   = iflags;
+  let Inst{5}     = 0;
+  let Inst{4-0}   = mode;
 }
 
+let M = 1 in
+  def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+                  "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+  def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+  def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+
 // Preload signals the memory system of possible future data/instruction access.
 // These are for disassembly only.
-//
-// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-multiclass APreLoad<bit data, bit read, string opc> {
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
 
-  def i : AXI<(outs), (ins GPR:$base, neg_zero:$imm), MiscFrm, NoItinerary,
-               !strconcat(opc, "\t[$base, $imm]"), []> {
+  def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+                !strconcat(opc, "\t$addr"),
+                [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+    bits<4> Rt;
+    bits<17> addr;
     let Inst{31-26} = 0b111101;
     let Inst{25} = 0; // 0 for immediate form
     let Inst{24} = data;
+    let Inst{23} = addr{12};        // U (add = ('U' == 1))
     let Inst{22} = read;
     let Inst{21-20} = 0b01;
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = 0b1111;
+    let Inst{11-0}  = addr{11-0};   // imm12
   }
 
-  def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary,
-               !strconcat(opc, "\t$addr"), []> {
+  def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+               !strconcat(opc, "\t$shift"),
+               [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+    bits<17> shift;
     let Inst{31-26} = 0b111101;
     let Inst{25} = 1; // 1 for register form
     let Inst{24} = data;
+    let Inst{23} = shift{12};    // U (add = ('U' == 1))
     let Inst{22} = read;
     let Inst{21-20} = 0b01;
-    let Inst{4} = 0;
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = 0b1111;
+    let Inst{11-0}  = shift{11-0};
   }
 }
 
-defm PLD  : APreLoad<1, 1, "pld">;
-defm PLDW : APreLoad<1, 0, "pldw">;
-defm PLI  : APreLoad<0, 1, "pli">;
-
-def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
-                   [/* For disassembly only; pattern left blank */]>,
-               Requires<[IsARM]> {
-  let Inst{31-28} = 0b1111;
-  let Inst{27-20} = 0b00010000;
-  let Inst{16} = 1;
-  let Inst{9} = 1;
-  let Inst{7-4} = 0b0000;
-}
+defm PLD  : APreLoad<1, 1, "pld">,  Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI  : APreLoad<1, 0, "pli">,  Requires<[IsARM,HasV7]>;
 
-def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle",
-                   [/* For disassembly only; pattern left blank */]>,
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+                 "setend\t$end",
+                 [/* For disassembly only; pattern left blank */]>,
                Requires<[IsARM]> {
-  let Inst{31-28} = 0b1111;
-  let Inst{27-20} = 0b00010000;
-  let Inst{16} = 1;
-  let Inst{9} = 0;
-  let Inst{7-4} = 0b0000;
+  bits<1> end;
+  let Inst{31-10} = 0b1111000100000001000000;
+  let Inst{9} = end;
+  let Inst{8-0} = 0;
 }
 
 def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
              [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM, HasV7]> {
-  let Inst{27-16} = 0b001100100000;
-  let Inst{7-4} = 0b1111;
+  bits<4> opt;
+  let Inst{27-4} = 0b001100100000111100001111;
+  let Inst{3-0} = opt;
 }
 
 // A5.4 Permanently UNDEFINED instructions.
-// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
-// binutils
 let isBarrier = 1, isTerminator = 1 in
-def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, 
-               ".long 0xe7ffdefe ${:comment} trap", [(trap)]>,
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+               "trap", [(trap)]>,
            Requires<[IsARM]> {
-  let Inst{27-25} = 0b011;
-  let Inst{24-20} = 0b11111;
-  let Inst{7-5} = 0b111;
-  let Inst{4} = 0b1;
+  let Inst = 0xe7ffdefe;
 }
 
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
-def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
-                  Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p\t$dst, pc, $a",
-                   [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+def PICADD  : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+                            Size4Bytes, IIC_iALUr,
+                            [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
 
 let AddedComplexity = 10 in {
-def PICLDR  : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                  Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr",
-                  [(set GPR:$dst, (load addrmodepc:$addr))]>;
+def PICLDR  : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_r,
+                            [(set GPR:$dst, (load addrmodepc:$addr))]>;
 
-def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr",
-                  [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
 
-def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr",
-                  [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
 
-def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr",
-                  [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
 
-def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr",
-                  [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
 }
 let AddedComplexity = 10 in {
-def PICSTR  : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr",
-               [(store GPR:$src, addrmodepc:$addr)]>;
+def PICSTR  : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
 
-def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr",
-               [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+                                                   addrmodepc:$addr)]>;
 
-def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-               Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr",
-               [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
 }
 } // isNotDuplicable = 1
 
 
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
-let neverHasSideEffects = 1 in {
-let isReMaterializable = 1 in
-def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
-                    Pseudo, IIC_iALUi,
-                    "adr$p\t$dst, #$label", []>;
-
-} // neverHasSideEffects
-def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
-                           (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                      Pseudo, IIC_iALUi,
-                      "adr$p\t$dst, #${label}_${id}", []> {
-    let Inst{25} = 1;
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label),
+                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+  bits<4> Rd;
+  bits<12> label;
+  let Inst{27-25} = 0b001;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = label;
 }
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+                    Size4Bytes, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+                      (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                      Size4Bytes, IIC_iALUi, []>;
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
@@ -893,159 +1273,139 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
                   "bx", "\tlr", [(ARMretflag)]>,
                Requires<[IsARM, HasV4T]> {
-    let Inst{3-0}   = 0b1110;
-    let Inst{7-4}   = 0b0001;
-    let Inst{19-8}  = 0b111111111111;
-    let Inst{27-20} = 0b00010010;
+    let Inst{27-0}  = 0b0001001011111111111100011110;
   }
 
   // ARMV4 only
-  def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, 
+  def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
                   "mov", "\tpc, lr", [(ARMretflag)]>,
                Requires<[IsARM, NoV4T]> {
-    let Inst{11-0}  = 0b000000001110;
-    let Inst{15-12} = 0b1111;
-    let Inst{19-16} = 0b0000;
-    let Inst{27-20} = 0b00011010;
+    let Inst{27-0} = 0b0001101000001111000000001110;
   }
 }
 
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   // ARMV4T and above
-  def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+  def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
                   [(brind GPR:$dst)]>,
               Requires<[IsARM, HasV4T]> {
-    let Inst{7-4}   = 0b0001;
-    let Inst{19-8}  = 0b111111111111;
-    let Inst{27-20} = 0b00010010;
-    let Inst{31-28} = 0b1110;
+    bits<4> dst;
+    let Inst{31-4} = 0b1110000100101111111111110001;
+    let Inst{3-0}  = dst;
   }
 
   // ARMV4 only
-  def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst",
-                  [(brind GPR:$dst)]>,
-              Requires<[IsARM, NoV4T]> {
-    let Inst{11-4}  = 0b00000000;
-    let Inst{15-12} = 0b1111;
-    let Inst{19-16} = 0b0000;
-    let Inst{27-20} = 0b00011010;
-    let Inst{31-28} = 0b1110;
-  }
+  // FIXME: We would really like to define this as a vanilla ARMPat like:
+  // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)>
+  // With that, however, we can't set isBranch, isTerminator, etc..
+  def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst),
+                    Size4Bytes, IIC_Br, [(brind GPR:$dst)]>,
+                    Requires<[IsARM, NoV4T]>;
 }
 
-// FIXME: remove when we have a way to marking a MI with these properties.
-// FIXME: Should pc be an implicit operand like PICADD, etc?
-let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
-    hasExtraDefRegAllocReq = 1 in
-  def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                        reglist:$dsts, variable_ops),
-                       IndexModeUpd, LdStMulFrm, IIC_Br,
-                       "ldm${addr:submode}${p}\t$addr!, $dsts",
-                       "$addr.addr = $wb", []>;
-
-// On non-Darwin platforms R9 is callee-saved.
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
 let isCall = 1,
+  // On non-Darwin platforms R9 is callee-saved.
   Defs = [R0,  R1,  R2,  R3,  R12, LR,
           D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
           D16, D17, D18, D19, D20, D21, D22, D23,
-          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
-  def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                IIC_Br, "bl\t${func:call}",
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [SP] in {
+  def BL  : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+                IIC_Br, "bl\t$func",
                 [(ARMcall tglobaladdr:$func)]>,
             Requires<[IsARM, IsNotDarwin]> {
     let Inst{31-28} = 0b1110;
+    bits<24> func;
+    let Inst{23-0} = func;
   }
 
-  def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                   IIC_Br, "bl", "\t${func:call}",
+  def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+                   IIC_Br, "bl", "\t$func",
                    [(ARMcall_pred tglobaladdr:$func)]>,
-                Requires<[IsARM, IsNotDarwin]>;
+                Requires<[IsARM, IsNotDarwin]> {
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
 
   // ARMv5T and above
   def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
                 IIC_Br, "blx\t$func",
                 [(ARMcall GPR:$func)]>,
             Requires<[IsARM, HasV5T, IsNotDarwin]> {
-    let Inst{7-4}   = 0b0011;
-    let Inst{19-8}  = 0b111111111111;
-    let Inst{27-20} = 0b00010010;
+    bits<4> func;
+    let Inst{31-4} = 0b1110000100101111111111110011;
+    let Inst{3-0}   = func;
   }
 
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
-  def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
-                  IIC_Br, "mov\tlr, pc\n\tbx\t$func",
-                  [(ARMcall_nolink tGPR:$func)]>,
-           Requires<[IsARM, HasV4T, IsNotDarwin]> {
-    let Inst{7-4}   = 0b0001;
-    let Inst{19-8}  = 0b111111111111;
-    let Inst{27-20} = 0b00010010;
-  }
+  def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   Requires<[IsARM, HasV4T, IsNotDarwin]>;
 
   // ARMv4
-  def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
-                 IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
-                 [(ARMcall_nolink tGPR:$func)]>,
-           Requires<[IsARM, NoV4T, IsNotDarwin]> {
-    let Inst{11-4}  = 0b00000000;
-    let Inst{15-12} = 0b1111;
-    let Inst{19-16} = 0b0000;
-    let Inst{27-20} = 0b00011010;
-  }
+  def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   Requires<[IsARM, NoV4T, IsNotDarwin]>;
 }
 
-// On Darwin R9 is call-clobbered.
 let isCall = 1,
+  // On Darwin R9 is call-clobbered.
+  // R7 is marked as a use to prevent frame-pointer assignments from being
+  // moved above / below calls.
   Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR,
           D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
           D16, D17, D18, D19, D20, D21, D22, D23,
-          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
-  def BLr9  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                IIC_Br, "bl\t${func:call}",
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [R7, SP] in {
+  def BLr9  : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+                IIC_Br, "bl\t$func",
                 [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
     let Inst{31-28} = 0b1110;
+    bits<24> func;
+    let Inst{23-0} = func;
   }
 
-  def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
-                   IIC_Br, "bl", "\t${func:call}",
+  def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+                   IIC_Br, "bl", "\t$func",
                    [(ARMcall_pred tglobaladdr:$func)]>,
-                  Requires<[IsARM, IsDarwin]>;
+                  Requires<[IsARM, IsDarwin]> {
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
 
   // ARMv5T and above
   def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
                 IIC_Br, "blx\t$func",
                 [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
-    let Inst{7-4}   = 0b0011;
-    let Inst{19-8}  = 0b111111111111;
-    let Inst{27-20} = 0b00010010;
+    bits<4> func;
+    let Inst{31-4} = 0b1110000100101111111111110011;
+    let Inst{3-0}   = func;
   }
 
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
-  def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
-                  IIC_Br, "mov\tlr, pc\n\tbx\t$func",
-                  [(ARMcall_nolink tGPR:$func)]>,
-             Requires<[IsARM, HasV4T, IsDarwin]> {
-    let Inst{7-4}   = 0b0001;
-    let Inst{19-8}  = 0b111111111111;
-    let Inst{27-20} = 0b00010010;
-  }
+  def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  Requires<[IsARM, HasV4T, IsDarwin]>;
 
   // ARMv4
-  def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
-                 IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
-                 [(ARMcall_nolink tGPR:$func)]>,
-           Requires<[IsARM, NoV4T, IsDarwin]> {
-    let Inst{11-4}  = 0b00000000;
-    let Inst{15-12} = 0b1111;
-    let Inst{19-16} = 0b0000;
-    let Inst{27-20} = 0b00011010;
-  }
+  def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  Requires<[IsARM, NoV4T, IsDarwin]>;
 }
 
 // Tail calls.
 
+// FIXME: These should probably be xformed into the non-TC versions of the
+// instructions as part of MC lowering.
+// FIXME: These seem to be used for both Thumb and ARM instruction selection.
+// Thumb should have its own version since the instruction is actually
+// different, even though the mnemonic is the same.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
   // Darwin versions.
   let Defs = [R0, R1, R2, R3, R9, R12,
@@ -1053,29 +1413,26 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
               D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
               D27, D28, D29, D30, D31, PC],
       Uses = [SP] in {
-    def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops),
-                       Pseudo, IIC_Br,
-                       "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
 
-    def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
-                       Pseudo, IIC_Br,
-                       "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
 
     def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
                    IIC_Br, "b\t$dst  @ TAILCALL",
-                   []>, Requires<[IsDarwin]>;
+                   []>, Requires<[IsARM, IsDarwin]>;
 
     def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
                    IIC_Br, "b.w\t$dst  @ TAILCALL",
-                   []>, Requires<[IsDarwin]>;
+                   []>, Requires<[IsThumb, IsDarwin]>;
 
     def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
                      BrMiscFrm, IIC_Br, "bx\t$dst  @ TAILCALL",
                    []>, Requires<[IsDarwin]> {
-                   let Inst{7-4}   = 0b0001;
-                   let Inst{19-8}  = 0b111111111111;
-                   let Inst{27-20} = 0b00010010;
-                   let Inst{31-28} = 0b1110;
+      bits<4> dst;
+      let Inst{31-4} = 0b1110000100101111111111110001;
+      let Inst{3-0}  = dst;
     }
   }
 
@@ -1085,13 +1442,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
               D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
               D27, D28, D29, D30, D31, PC],
       Uses = [SP] in {
-    def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops),
-                       Pseudo, IIC_Br,
-                       "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
 
-    def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
-                       Pseudo, IIC_Br,
-                       "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
 
     def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
                    IIC_Br, "b\t$dst  @ TAILCALL",
@@ -1104,10 +1459,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
     def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
                      BrMiscFrm, IIC_Br, "bx\t$dst  @ TAILCALL",
                    []>, Requires<[IsNotDarwin]> {
-                   let Inst{7-4}   = 0b0001;
-                   let Inst{19-8}  = 0b111111111111;
-                   let Inst{27-20} = 0b00010010;
-                   let Inst{31-28} = 0b1110;
+      bits<4> dst;
+      let Inst{31-4} = 0b1110000100101111111111110001;
+      let Inst{3-0}  = dst;
     }
   }
 }
@@ -1117,48 +1471,40 @@ let isBranch = 1, isTerminator = 1 in {
   let isBarrier = 1 in {
     let isPredicable = 1 in
     def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
-                "b\t$target", [(br bb:$target)]>;
+                "b\t$target", [(br bb:$target)]> {
+      bits<24> target;
+      let Inst{31-28} = 0b1110;
+      let Inst{23-0} = target;
+    }
 
-  let isNotDuplicable = 1, isIndirectBranch = 1 in {
-  def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
-                    IIC_Br, "mov\tpc, $target$jt",
-                    [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
-    let Inst{11-4}  = 0b00000000;
-    let Inst{15-12} = 0b1111;
-    let Inst{20}    = 0; // S Bit
-    let Inst{24-21} = 0b1101;
-    let Inst{27-25} = 0b000;
-  }
-  def BR_JTm : JTI<(outs),
-                   (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
-                   IIC_Br, "ldr\tpc, $target$jt",
-                   [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
-                     imm:$id)]> {
-    let Inst{15-12} = 0b1111;
-    let Inst{20}    = 1; // L bit
-    let Inst{21}    = 0; // W bit
-    let Inst{22}    = 0; // B bit
-    let Inst{24}    = 1; // P bit
-    let Inst{27-25} = 0b011;
-  }
-  def BR_JTadd : JTI<(outs),
-                   (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
-                    IIC_Br, "add\tpc, $target, $idx$jt",
-                    [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
-                      imm:$id)]> {
-    let Inst{15-12} = 0b1111;
-    let Inst{20}    = 0; // S bit
-    let Inst{24-21} = 0b0100;
-    let Inst{27-25} = 0b000;
-  }
-  } // isNotDuplicable = 1, isIndirectBranch = 1
+    let isNotDuplicable = 1, isIndirectBranch = 1 in {
+    def BR_JTr : ARMPseudoInst<(outs),
+                      (ins GPR:$target, i32imm:$jt, i32imm:$id),
+                      SizeSpecial, IIC_Br,
+                      [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+    // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+    // into i12 and rs suffixed versions.
+    def BR_JTm : ARMPseudoInst<(outs),
+                     (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+                     SizeSpecial, IIC_Br,
+                     [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+                       imm:$id)]>;
+    def BR_JTadd : ARMPseudoInst<(outs),
+                   (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+                   SizeSpecial, IIC_Br,
+                   [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+                     imm:$id)]>;
+    } // isNotDuplicable = 1, isIndirectBranch = 1
   } // isBarrier = 1
 
   // FIXME: should be able to write a pattern for ARMBrcond, but can't use
   // a two-value operand where a dag node expects two operands. :(
-  def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
+  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
                IIC_Br, "b", "\t$target",
-               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
+               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+    bits<24> target;
+    let Inst{23-0} = target;
+  }
 }
 
 // Branch and Exchange Jazelle -- for disassembly only
@@ -1172,271 +1518,303 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
 // Secure Monitor Call is a system instruction -- for disassembly only
 def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
               [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0110;
-  let Inst{7-4} = 0b0111;
+  bits<4> opt;
+  let Inst{23-4} = 0b01100000000000000111;
+  let Inst{3-0} = opt;
 }
 
 // Supervisor Call (Software Interrupt) -- for disassembly only
-let isCall = 1 in {
+let isCall = 1, Uses = [SP] in {
 def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
-              [/* For disassembly only; pattern left blank */]>;
+              [/* For disassembly only; pattern left blank */]> {
+  bits<24> svc;
+  let Inst{23-0} = svc;
+}
 }
 
 // Store Return State is a system instruction -- for disassembly only
-def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
-                NoItinerary, "srs${addr:submode}\tsp!, $mode",
+let isCodeGenOnly = 1 in {  // FIXME: This should not use submode!
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+                NoItinerary, "srs${amode}\tsp!, $mode",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b110; // W = 1
 }
 
-def SRS  : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
-                NoItinerary, "srs${addr:submode}\tsp, $mode",
+def SRS  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+                NoItinerary, "srs${amode}\tsp, $mode",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b100; // W = 0
 }
 
 // Return From Exception is a system instruction -- for disassembly only
-def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
-                NoItinerary, "rfe${addr:submode}\t$base!",
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+                NoItinerary, "rfe${amode}\t$base!",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b011; // W = 1
 }
 
-def RFE  : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
-                NoItinerary, "rfe${addr:submode}\t$base",
+def RFE  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+                NoItinerary, "rfe${amode}\t$base",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
   let Inst{22-20} = 0b001; // W = 0
 }
+} // isCodeGenOnly = 1
 
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
 
 // Load
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
-               "ldr", "\t$dst, $addr",
-               [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+
+defm LDR  : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+                    UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+                    UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR  : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+                   BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
     isReMaterializable = 1 in
-def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
-                 "ldr", "\t$dst, $addr", []>;
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+                 AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+                 []> {
+  bits<4> Rt;
+  bits<17> addr;
+  let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rt;
+  let Inst{11-0}  = addr{11-0};   // imm12
+}
 
 // Loads with zero extension
-def LDRH  : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                  IIC_iLoadr, "ldrh", "\t$dst, $addr",
-                  [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
-
-def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
-                  IIC_iLoadr, "ldrb", "\t$dst, $addr",
-                  [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+def LDRH  : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                  IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+                  [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
 
 // Loads with sign extension
-def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                   IIC_iLoadr, "ldrsh", "\t$dst, $addr",
-                   [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
-
-def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                   IIC_iLoadr, "ldrsb", "\t$dst, $addr",
-                   [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                   IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+                   [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                   IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+                   [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
+    isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring?
+// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1)
+//        how to represent that such that tblgen is happy and we don't
+//        mark this codegen only?
 // Load doubleword
-def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
-                 IIC_iLoadr, "ldrd", "\t$dst1, $addr",
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+                 (ins addrmode3:$addr), LdMiscFrm,
+                 IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr",
                  []>, Requires<[IsARM, HasV5TE]>;
+}
 
 // Indexed loads
-def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
-                     (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
-                     "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
-                     (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
-                     "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRH_PRE  : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
-                     (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                     "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
-                     (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                    "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRB_PRE  : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
-                     (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
-                     "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
-                     (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
-                    "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
-                      (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                      "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
-                      (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                   "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
-                      (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
-                      "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
-                      (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                   "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+  def _PRE  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                      (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+                      opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+    // {17-14}  Rn
+    // {13}     1 == Rm, 0 == imm12
+    // {12}     isAdd
+    // {11-0}   imm12/Rm
+    bits<18> addr;
+    let Inst{25} = addr{13};
+    let Inst{23} = addr{12};
+    let Inst{19-16} = addr{17-14};
+    let Inst{11-0} = addr{11-0};
+  }
+  def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+                      (ins GPR:$Rn, am2offset:$offset),
+                      IndexModePost, LdFrm, itin,
+                      opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+    // {13}     1 == Rm, 0 == imm12
+    // {12}     isAdd
+    // {11-0}   imm12/Rm
+    bits<14> offset;
+    bits<4> Rn;
+    let Inst{25} = offset{13};
+    let Inst{23} = offset{12};
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = offset{11-0};
+  }
+}
 
-// For disassembly only
-def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
-                        (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr,
-                 "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>,
-                Requires<[IsARM, HasV5TE]>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR  : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
 
-// For disassembly only
-def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
-                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr,
-            "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
-                Requires<[IsARM, HasV5TE]>;
+multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
+  def _PRE  : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                        (ins addrmode3:$addr), IndexModePre,
+                        LdMiscFrm, itin,
+                        opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+    bits<14> addr;
+    let Inst{23}    = addr{8};      // U bit
+    let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+    let Inst{19-16} = addr{12-9};   // Rn
+    let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+    let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+  }
+  def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+                        (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+                        LdMiscFrm, itin,
+                        opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+    bits<10> offset;
+    bits<4> Rn;
+    let Inst{23}    = offset{8};      // U bit
+    let Inst{22}    = offset{9};      // 1 == imm8, 0 == Rm
+    let Inst{19-16} = Rn;
+    let Inst{11-8}  = offset{7-4};    // imm7_4/zero
+    let Inst{3-0}   = offset{3-0};    // imm3_0/Rm
+  }
+}
 
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH  : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+defm LDRD :  AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>;
+} // mayLoad = 1, neverHasSideEffects = 1
 
 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
-
-def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
-                   (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb),
+                   (ins GPR:$base, am2offset:$offset), IndexModeNone,
+                   LdFrm, IIC_iLoad_ru,
                    "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
-
-def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
-                  (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                  (ins GPR:$base, am2offset:$offset), IndexModeNone,
+                  LdFrm, IIC_iLoad_bh_ru,
                   "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
-
-def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base, am3offset:$offset), IndexModePost,
+                 LdMiscFrm, IIC_iLoad_bh_ru,
                  "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
-
-def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
-                  (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru,
-                  "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base, am3offset:$offset), IndexModePost,
+                 LdMiscFrm, IIC_iLoad_bh_ru,
+                 "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
-
-def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
-                 (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base, am3offset:$offset), IndexModePost,
+                 LdMiscFrm, IIC_iLoad_bh_ru,
                  "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
   let Inst{21} = 1; // overwrite
 }
+}
 
 // Store
-def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
-               "str", "\t$src, $addr",
-               [(store GPR:$src, addrmode2:$addr)]>;
 
 // Stores with truncate
-def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
-               IIC_iStorer, "strh", "\t$src, $addr",
-               [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
-
-def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
-               "strb", "\t$src, $addr",
-               [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+               IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+               [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
 
 // Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
-               StMiscFrm, IIC_iStorer,
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1,
+    isCodeGenOnly = 1 in  // $src2 doesn't exist in asm string
+def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+               StMiscFrm, IIC_iStore_d_r,
                "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
 
 // Indexed stores
-def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base, am2offset:$offset),
-                     StFrm, IIC_iStoreru,
-                    "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
-                    [(set GPR:$base_wb,
-                      (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
-
-def STR_POST : AI2stwpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset),
-                     StFrm, IIC_iStoreru,
-                    "str", "\t$src, [$base], $offset", "$base = $base_wb",
-                    [(set GPR:$base_wb,
-                      (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
-
-def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am3offset:$offset),
-                     StMiscFrm, IIC_iStoreru,
-                     "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
-                    [(set GPR:$base_wb,
-                      (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
-
-def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am3offset:$offset),
-                     StMiscFrm, IIC_iStoreru,
-                     "strh", "\t$src, [$base], $offset", "$base = $base_wb",
-                    [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
-                                         GPR:$base, am3offset:$offset))]>;
-
-def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset),
-                     StFrm, IIC_iStoreru,
-                     "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
-                    [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
-                                         GPR:$base, am2offset:$offset))]>;
-
-def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset),
-                     StFrm, IIC_iStoreru,
-                     "strb", "\t$src, [$base], $offset", "$base = $base_wb",
-                    [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
-                                         GPR:$base, am2offset:$offset))]>;
+def STR_PRE  : AI2stridx<0, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePre, StFrm, IIC_iStore_ru,
+                     "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePost, StFrm, IIC_iStore_ru,
+                     "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePre, StFrm, IIC_iStore_bh_ru,
+                     "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
+                                        GPR:$Rn, am2offset:$offset))]>;
+def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePost, StFrm, IIC_iStore_bh_ru,
+                     "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
+                                        GPR:$Rn, am2offset:$offset))]>;
+
+def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+                     IndexModePre, StMiscFrm, IIC_iStore_ru,
+                     "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+
+def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+                     IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+                     "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+                                        GPR:$Rn, am3offset:$offset))]>;
 
 // For disassembly only
 def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
                      (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
-                     StMiscFrm, IIC_iStoreru,
+                     StMiscFrm, IIC_iStore_d_ru,
                      "strd", "\t$src1, $src2, [$base, $offset]!",
                      "$base = $base_wb", []>;
 
 // For disassembly only
 def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
                      (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
-                     StMiscFrm, IIC_iStoreru,
+                     StMiscFrm, IIC_iStore_d_ru,
                      "strd", "\t$src1, $src2, [$base], $offset",
                      "$base = $base_wb", []>;
 
 // STRT, STRBT, and STRHT are for disassembly only.
 
-def STRT : AI2stwpo<(outs GPR:$base_wb),
-                    (ins GPR:$src, GPR:$base,am2offset:$offset),
-                    StFrm, IIC_iStoreru,
-                    "strt", "\t$src, [$base], $offset", "$base = $base_wb",
+def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+                    (ins GPR:$Rt, GPR:$Rn,am2offset:$offset),
+                    IndexModeNone, StFrm, IIC_iStore_ru,
+                    "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
                     [/* For disassembly only; pattern left blank */]> {
   let Inst{21} = 1; // overwrite
 }
 
-def STRBT : AI2stbpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset),
-                     StFrm, IIC_iStoreru,
-                     "strbt", "\t$src, [$base], $offset", "$base = $base_wb",
+def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModeNone, StFrm, IIC_iStore_bh_ru,
+                     "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
                      [/* For disassembly only; pattern left blank */]> {
   let Inst{21} = 1; // overwrite
 }
 
 def STRHT: AI3sthpo<(outs GPR:$base_wb),
                     (ins GPR:$src, GPR:$base,am3offset:$offset),
-                    StMiscFrm, IIC_iStoreru,
+                    StMiscFrm, IIC_iStore_bh_ru,
                     "strht", "\t$src, [$base], $offset", "$base = $base_wb",
                     [/* For disassembly only; pattern left blank */]> {
   let Inst{21} = 1; // overwrite
@@ -1446,103 +1824,212 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
-                          reglist:$dsts, variable_ops),
-                 IndexModeNone, LdStMulFrm, IIC_iLoadm,
-                 "ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
-
-def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                      reglist:$dsts, variable_ops),
-                     IndexModeUpd, LdStMulFrm, IIC_iLoadm,
-                     "ldm${addr:submode}${p}\t$addr!, $dsts",
-                     "$addr.addr = $wb", []>;
-} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
-                          reglist:$srcs, variable_ops),
-                 IndexModeNone, LdStMulFrm, IIC_iStorem,
-                 "stm${addr:submode}${p}\t$addr, $srcs", "", []>;
-
-def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                      reglist:$srcs, variable_ops),
-                     IndexModeUpd, LdStMulFrm, IIC_iStorem,
-                     "stm${addr:submode}${p}\t$addr!, $srcs",
-                     "$addr.addr = $wb", []>;
-} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+                         InstrItinClass itin, InstrItinClass itin_upd> {
+  def IA :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def IA_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DA :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DA_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DB :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DB_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def IB :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def IB_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+} 
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// Load / Store Multiple Mnemonic Aliases
+def : MnemonicAlias<"ldm", "ldmia">;
+def : MnemonicAlias<"stm", "stmia">;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+// FIXME: Should be a pseudo-instruction.
+def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                      reglist:$regs, variable_ops),
+                     IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
+                     "ldmia${p}\t$Rn!, $regs",
+                     "$Rn = $wb", []> {
+  let Inst{24-23} = 0b01;       // Increment After
+  let Inst{21}    = 1;          // Writeback
+  let Inst{20}    = 1;          // Load
+}
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
 
 let neverHasSideEffects = 1 in
-def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
-                "mov", "\t$dst, $src", []>, UnaryDP {
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+                "mov", "\t$Rd, $Rm", []>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+
   let Inst{11-4} = 0b00000000;
   let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
 }
 
 // A version for the smaller set of tail call registers.
 let neverHasSideEffects = 1 in
-def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm, 
-                IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP {
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+                IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+
   let Inst{11-4} = 0b00000000;
   let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
 }
 
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
                 DPSoRegFrm, IIC_iMOVsr,
-                "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
+                "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+                UnaryDP {
+  bits<4> Rd;
+  bits<12> src;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = src;
   let Inst{25} = 0;
 }
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
-                "mov", "\t$dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+                "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
   let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = 0b0000;
+  let Inst{11-0} = imm;
 }
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
                  DPFrm, IIC_iMOVi,
-                 "movw", "\t$dst, $src",
-                 [(set GPR:$dst, imm0_65535:$src)]>,
+                 "movw", "\t$Rd, $imm",
+                 [(set GPR:$Rd, imm0_65535:$imm)]>,
                  Requires<[IsARM, HasV6T2]>, UnaryDP {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+  let Inst{19-16} = imm{15-12};
   let Inst{20} = 0;
   let Inst{25} = 1;
 }
 
-let Constraints = "$src = $dst" in
-def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+                                (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
                   DPFrm, IIC_iMOVi,
-                  "movt", "\t$dst, $imm",
-                  [(set GPR:$dst,
+                  "movt", "\t$Rd, $imm",
+                  [(set GPR:$Rd,
                         (or (and GPR:$src, 0xffff),
                             lo16AllZero:$imm))]>, UnaryDP,
                   Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+  let Inst{19-16} = imm{15-12};
   let Inst{20} = 0;
   let Inst{25} = 1;
 }
 
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+                      (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
 def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
       Requires<[IsARM, HasV6T2]>;
 
 let Uses = [CPSR] in
-def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
-                 "mov", "\t$dst, $src, rrx",
-                 [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+                    [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+                    Requires<[IsARM]>;
 
 // These aren't really mov instructions, but we have to define them this way
 // due to flag operands.
 
 let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
-                      IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
-                      [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
-def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
-                      IIC_iMOVsi, "movs", "\t$dst, $src, asr #1",
-                      [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                      [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+                      Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                      [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+                      Requires<[IsARM]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1551,31 +2038,31 @@ def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
 
 // Sign extenders
 
-defm SXTB  : AI_unary_rrot<0b01101010,
-                           "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm SXTH  : AI_unary_rrot<0b01101011,
-                           "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm SXTB  : AI_ext_rrot<0b01101010,
+                         "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH  : AI_ext_rrot<0b01101011,
+                         "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
 
-defm SXTAB : AI_bin_rrot<0b01101010,
+defm SXTAB : AI_exta_rrot<0b01101010,
                "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm SXTAH : AI_bin_rrot<0b01101011,
+defm SXTAH : AI_exta_rrot<0b01101011,
                "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
 
 // For disassembly only
-defm SXTB16  : AI_unary_rrot_np<0b01101000, "sxtb16">;
+defm SXTB16  : AI_ext_rrot_np<0b01101000, "sxtb16">;
 
 // For disassembly only
-defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">;
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
 
 // Zero extenders
 
 let AddedComplexity = 16 in {
-defm UXTB   : AI_unary_rrot<0b01101110,
-                            "uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm UXTH   : AI_unary_rrot<0b01101111,
-                            "uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm UXTB16 : AI_unary_rrot<0b01101100,
-                            "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+defm UXTB   : AI_ext_rrot<0b01101110,
+                          "uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH   : AI_ext_rrot<0b01101111,
+                          "uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+                          "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
 
 // FIXME: This pattern incorrectly assumes the shl operator is a rotate.
 //        The transformation should probably be done as a combiner action
@@ -1586,33 +2073,49 @@ defm UXTB16 : AI_unary_rrot<0b01101100,
 def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
                (UXTB16r_rot GPR:$Src, 8)>;
 
-defm UXTAB : AI_bin_rrot<0b01101110, "uxtab",
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
                         BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
                         BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
 }
 
 // This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
 // For disassembly only
-defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">;
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
 
 
-def SBFX  : I<(outs GPR:$dst),
-              (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
-               "sbfx", "\t$dst, $src, $lsb, $width", "", []>,
+def SBFX  : I<(outs GPR:$Rd),
+              (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
   let Inst{27-21} = 0b0111101;
   let Inst{6-4}   = 0b101;
+  let Inst{20-16} = width;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{3-0}   = Rn;
 }
 
-def UBFX  : I<(outs GPR:$dst),
-              (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
-               "ubfx", "\t$dst, $src, $lsb, $width", "", []>,
+def UBFX  : I<(outs GPR:$Rd),
+              (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
   let Inst{27-21} = 0b0111111;
   let Inst{6-4}   = 0b101;
+  let Inst{20-16} = width;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{3-0}   = Rn;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1620,100 +2123,166 @@ def UBFX  : I<(outs GPR:$dst),
 //
 
 defm ADD  : AsI1_bin_irs<0b0100, "add",
+                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                          BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
+                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                          BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set.
 defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                           BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
 defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
                           BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
                           BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+
+// ADC and SUBC with 's' bit set.
 defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
                           BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
 defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
                           BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
 
-def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                 IIC_iALUi, "rsb", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
-    let Inst{25} = 1;
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+                 IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
 }
 
 // The reg/reg form is only defined for the disassembler; for codegen it is
 // equivalent to SUBrr.
-def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
-                 IIC_iALUr, "rsb", "\t$dst, $a, $b",
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+                 IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
                  [/* For disassembly only; pattern left blank */]> {
-    let Inst{25} = 0;
-    let Inst{11-4} = 0b00000000;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
 }
 
-def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                 IIC_iALUsr, "rsb", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
-    let Inst{25} = 0;
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
 }
 
 // RSB with 's' bit set.
-let Defs = [CPSR] in {
-def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
-                 IIC_iALUi, "rsbs", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
-    let Inst{20} = 1;
-    let Inst{25} = 1;
-}
-def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
-                 IIC_iALUsr, "rsbs", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
-    let Inst{20} = 1;
-    let Inst{25} = 0;
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+                 IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{20} = 1;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
 }
 }
 
 let Uses = [CPSR] in {
-def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                 DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                 DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
                  Requires<[IsARM]> {
-    let Inst{25} = 1;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
 }
 // The reg/reg form is only defined for the disassembler; for codegen it is
 // equivalent to SUBrr.
-def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                 DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b",
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                 DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
                  [/* For disassembly only; pattern left blank */]> {
-    let Inst{25} = 0;
-    let Inst{11-4} = 0b00000000;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
 }
-def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                 DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
                  Requires<[IsARM]> {
-    let Inst{25} = 0;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
 }
 }
 
 // FIXME: Allow these to be predicated.
-let Defs = [CPSR], Uses = [CPSR] in {
-def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
-                  DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
-                  [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in {
+def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                  DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
+                  [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
                   Requires<[IsARM]> {
-    let Inst{20} = 1;
-    let Inst{25} = 1;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
 }
-def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
-                  DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
-                  [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                  DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
+                  [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
                   Requires<[IsARM]> {
-    let Inst{20} = 1;
-    let Inst{25} = 0;
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{20} = 1;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
 }
 }
 
@@ -1740,111 +2309,166 @@ def : ARMPat<(adde   GPR:$src, so_imm_not:$imm),
 
 // ARM Arithmetic Instruction -- for disassembly only
 // GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
-          list<dag> pattern = [/* For disassembly only; pattern left blank */]>
-  : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
-       opc, "\t$dst, $a, $b", pattern> {
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+          list<dag> pattern = [/* For disassembly only; pattern left blank */],
+          dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
+  : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<4> Rd;
+  bits<4> Rm;
   let Inst{27-20} = op27_20;
-  let Inst{7-4} = op7_4;
+  let Inst{11-4} = op11_4;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{3-0}   = Rm;
 }
 
 // Saturating add/subtract -- for disassembly only
 
-def QADD    : AAI<0b00010000, 0b0101, "qadd",
-                  [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
-def QADD16  : AAI<0b01100010, 0b0001, "qadd16">;
-def QADD8   : AAI<0b01100010, 0b1001, "qadd8">;
-def QASX    : AAI<0b01100010, 0b0011, "qasx">;
-def QDADD   : AAI<0b00010100, 0b0101, "qdadd">;
-def QDSUB   : AAI<0b00010110, 0b0101, "qdsub">;
-def QSAX    : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB    : AAI<0b00010010, 0b0101, "qsub",
-                  [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
-def QSUB16  : AAI<0b01100010, 0b0111, "qsub16">;
-def QSUB8   : AAI<0b01100010, 0b1111, "qsub8">;
-def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
-def UQADD8  : AAI<0b01100110, 0b1001, "uqadd8">;
-def UQASX   : AAI<0b01100110, 0b0011, "uqasx">;
-def UQSAX   : AAI<0b01100110, 0b0101, "uqsax">;
-def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">;
-def UQSUB8  : AAI<0b01100110, 0b1111, "uqsub8">;
+def QADD    : AAI<0b00010000, 0b00000101, "qadd",
+                  [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
+                  (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB    : AAI<0b00010010, 0b00000101, "qsub",
+                  [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
+                  (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD   : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+                  "\t$Rd, $Rm, $Rn">;
+def QDSUB   : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+                  "\t$Rd, $Rm, $Rn">;
+
+def QADD16  : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8   : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX    : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX    : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16  : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8   : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8  : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX   : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX   : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8  : AAI<0b01100110, 0b11111111, "uqsub8">;
 
 // Signed/Unsigned add/subtract -- for disassembly only
 
-def SASX   : AAI<0b01100001, 0b0011, "sasx">;
-def SADD16 : AAI<0b01100001, 0b0001, "sadd16">;
-def SADD8  : AAI<0b01100001, 0b1001, "sadd8">;
-def SSAX   : AAI<0b01100001, 0b0101, "ssax">;
-def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">;
-def SSUB8  : AAI<0b01100001, 0b1111, "ssub8">;
-def UASX   : AAI<0b01100101, 0b0011, "uasx">;
-def UADD16 : AAI<0b01100101, 0b0001, "uadd16">;
-def UADD8  : AAI<0b01100101, 0b1001, "uadd8">;
-def USAX   : AAI<0b01100101, 0b0101, "usax">;
-def USUB16 : AAI<0b01100101, 0b0111, "usub16">;
-def USUB8  : AAI<0b01100101, 0b1111, "usub8">;
+def SASX   : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8  : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX   : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8  : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX   : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8  : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX   : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8  : AAI<0b01100101, 0b11111111, "usub8">;
 
 // Signed/Unsigned halving add/subtract -- for disassembly only
 
-def SHASX   : AAI<0b01100011, 0b0011, "shasx">;
-def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">;
-def SHADD8  : AAI<0b01100011, 0b1001, "shadd8">;
-def SHSAX   : AAI<0b01100011, 0b0101, "shsax">;
-def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">;
-def SHSUB8  : AAI<0b01100011, 0b1111, "shsub8">;
-def UHASX   : AAI<0b01100111, 0b0011, "uhasx">;
-def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">;
-def UHADD8  : AAI<0b01100111, 0b1001, "uhadd8">;
-def UHSAX   : AAI<0b01100111, 0b0101, "uhsax">;
-def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">;
-def UHSUB8  : AAI<0b01100111, 0b1111, "uhsub8">;
+def SHASX   : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8  : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX   : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8  : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX   : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8  : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX   : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8  : AAI<0b01100111, 0b11111111, "uhsub8">;
 
 // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
 
-def USAD8  : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def USAD8  : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                 MulFrm /* for convenience */, NoItinerary, "usad8",
-                "\t$dst, $a, $b", []>,
+                "\t$Rd, $Rn, $Rm", []>,
              Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
   let Inst{27-20} = 0b01111000;
   let Inst{15-12} = 0b1111;
   let Inst{7-4} = 0b0001;
+  let Inst{19-16} = Rd;
+  let Inst{11-8} = Rm;
+  let Inst{3-0} = Rn;
 }
-def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
                 MulFrm /* for convenience */, NoItinerary, "usada8",
-                "\t$dst, $a, $b, $acc", []>,
+                "\t$Rd, $Rn, $Rm, $Ra", []>,
              Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<4> Ra;
   let Inst{27-20} = 0b01111000;
   let Inst{7-4} = 0b0001;
+  let Inst{19-16} = Rd;
+  let Inst{15-12} = Ra;
+  let Inst{11-8} = Rm;
+  let Inst{3-0} = Rn;
 }
 
 // Signed/Unsigned saturate -- for disassembly only
 
-def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
-              SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+def SSAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+              SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
               [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<5> sat_imm;
+  bits<4> Rn;
+  bits<8> sh;
   let Inst{27-21} = 0b0110101;
   let Inst{5-4} = 0b01;
+  let Inst{20-16} = sat_imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-7} = sh{7-3};
+  let Inst{6} = sh{0};
+  let Inst{3-0} = Rn;
 }
 
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
-                NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
+def SSAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$Rn), SatFrm,
+                NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> sat_imm;
+  bits<4> Rn;
   let Inst{27-20} = 0b01101010;
-  let Inst{7-4} = 0b0011;
+  let Inst{11-4} = 0b11110011;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = sat_imm;
+  let Inst{3-0} = Rn;
 }
 
-def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
-              SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+              SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
               [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<5> sat_imm;
+  bits<4> Rn;
+  bits<8> sh;
   let Inst{27-21} = 0b0110111;
   let Inst{5-4} = 0b01;
+  let Inst{15-12} = Rd;
+  let Inst{11-7} = sh{7-3};
+  let Inst{6} = sh{0};
+  let Inst{20-16} = sat_imm;
+  let Inst{3-0} = Rn;
 }
 
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
-                NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+                NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> sat_imm;
+  bits<4> Rn;
   let Inst{27-20} = 0b01101110;
-  let Inst{7-4} = 0b0011;
+  let Inst{11-4} = 0b11110011;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = sat_imm;
+  let Inst{3-0} = Rn;
 }
 
 def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
@@ -1855,52 +2479,100 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
 //
 
 defm AND   : AsI1_bin_irs<0b0000, "and",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
                           BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
-defm ANDS  : AI1_bin_s_irs<0b0000, "and",
-                           BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
 defm ORR   : AsI1_bin_irs<0b1100, "orr",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
                           BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
 defm EOR   : AsI1_bin_irs<0b0001, "eor",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
                           BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
                           BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
                AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
-               "bfc", "\t$dst, $imm", "$src = $dst",
-               [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+               "bfc", "\t$Rd, $imm", "$src = $Rd",
+               [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
                Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<10> imm;
   let Inst{27-21} = 0b0111110;
   let Inst{6-0}   = 0b0011111;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = imm{4-0}; // lsb
+  let Inst{20-16} = imm{9-5}; // width
 }
 
 // A8.6.18  BFI - Bitfield insert (Encoding A1)
-def BFI    : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
+def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
                AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
-               "bfi", "\t$dst, $val, $imm", "$src = $dst",
-               [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+               "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+               [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
                                 bf_inv_mask_imm:$imm))]>,
                Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<10> imm;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = imm{4-0}; // lsb
+  let Inst{20-16} = imm{9-5}; // width
+  let Inst{3-0}   = Rn;
+}
+
+// GNU as only supports this form of bfi (w/ 4 arguments)
+let isAsmParserOnly = 1 in
+def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
+                                   lsb_pos_imm:$lsb, width_imm:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
+               []>, Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
   let Inst{27-21} = 0b0111110;
   let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{20-16} = width; // Custom encoder => lsb+width-1
+  let Inst{3-0}   = Rn;
 }
 
-def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
-                  "mvn", "\t$dst, $src",
-                  [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
+def  MVNr  : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+                  "mvn", "\t$Rd, $Rm",
+                  [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
   let Inst{25} = 0;
+  let Inst{19-16} = 0b0000;
   let Inst{11-4} = 0b00000000;
-}
-def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
-                  IIC_iMOVsr, "mvn", "\t$dst, $src",
-                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
+  let Inst{15-12} = Rd;
+  let Inst{3-0} = Rm;
+}
+def  MVNs  : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+                  IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+                  [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+  bits<4> Rd;
+  bits<12> shift;
   let Inst{25} = 0;
-}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
-                  IIC_iMOVi, "mvn", "\t$dst, $imm",
-                  [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
-    let Inst{25} = 1;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = shift;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def  MVNi  : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+                  IIC_iMVNi, "mvn", "\t$Rd, $imm",
+                  [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
 }
 
 def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
@@ -1909,247 +2581,299 @@ def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
 //===----------------------------------------------------------------------===//
 //  Multiply Instructions.
 //
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = Rd;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
 
-let isCommutable = 1 in
-def MUL   : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                   IIC_iMUL32, "mul", "\t$dst, $a, $b",
-                   [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
-
-def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-                    IIC_iMAC32, "mla", "\t$dst, $a, $b, $c",
-                   [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
-
-def MLS   : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-                   IIC_iMAC32, "mls", "\t$dst, $a, $b, $c",
-                   [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
-                   Requires<[IsARM, HasV6T2]>;
+let isCommutable = 1 in {
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                          pred:$p, cc_out:$s),
+                          Size4Bytes, IIC_iMUL32,
+                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+                        Requires<[IsARM, NoV6]>;
+
+def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                   IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+                   [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+                   Requires<[IsARM, HasV6]>;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
+                         (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+                         Size4Bytes, IIC_iMAC32, 
+                         [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, 
+                        Requires<[IsARM, NoV6]> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                    IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+                   [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+                   Requires<[IsARM, HasV6]> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+
+def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                   IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+                   [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+                   Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  bits<4> Ra;
+  let Inst{19-16} = Rd;
+  let Inst{15-12} = Ra;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
 
 // Extra precision multiplies with low / high results
+
 let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b), IIC_iMUL64,
-                    "smull", "\t$ldst, $hdst, $a, $b", []>;
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMUL64, []>,
+                           Requires<[IsARM, NoV6]>;
 
-def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b), IIC_iMUL64,
-                    "umull", "\t$ldst, $hdst, $a, $b", []>;
+def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            Size4Bytes, IIC_iMUL64, []>,
+                           Requires<[IsARM, NoV6]>;
 }
 
-// Multiply + accumulate
-def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                    "smlal", "\t$ldst, $hdst, $a, $b", []>;
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                    "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
 
-def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                    "umlal", "\t$ldst, $hdst, $a, $b", []>;
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                    "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+}
 
-def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
-                               (ins GPR:$a, GPR:$b), IIC_iMAC64,
-                    "umaal", "\t$ldst, $hdst, $a, $b", []>,
+// Multiply + accumulate
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMAC64, []>,
+                           Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMAC64, []>,
+                           Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMAC64, []>,
+                           Requires<[IsARM, NoV6]>;
+
+}
+
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
                     Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = RdLo;
+  let Inst{15-12} = RdHi;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
 } // neverHasSideEffects
 
 // Most significant word multiply
-def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-               IIC_iMUL32, "smmul", "\t$dst, $a, $b",
-               [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
             Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b0001;
   let Inst{15-12} = 0b1111;
 }
 
-def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-               IIC_iMUL32, "smmulr", "\t$dst, $a, $b",
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
                [/* For disassembly only; pattern left blank */]>,
             Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b0011; // R = 1
   let Inst{15-12} = 0b1111;
 }
 
-def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
-               [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
-            Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b0001;
-}
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+               [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+            Requires<[IsARM, HasV6]>;
 
-def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c",
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
                [/* For disassembly only; pattern left blank */]>,
-            Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b0011; // R = 1
-}
+            Requires<[IsARM, HasV6]>;
 
-def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
-               [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
-            Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b1101;
-}
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+               [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+            Requires<[IsARM, HasV6]>;
 
-def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-               IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c",
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
                [/* For disassembly only; pattern left blank */]>,
-            Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b1111; // R = 1
-}
+            Requires<[IsARM, HasV6]>;
 
 multiclass AI_smul<string opc, PatFrag opnode> {
-  def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
-              [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
-                                      (sext_inreg GPR:$b, i16)))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 0;
-             let Inst{6} = 0;
-           }
-
-  def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b",
-              [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
-                                      (sra GPR:$b, (i32 16))))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 0;
-             let Inst{6} = 1;
-           }
-
-  def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b",
-              [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
-                                      (sext_inreg GPR:$b, i16)))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 1;
-             let Inst{6} = 0;
-           }
-
-  def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b",
-              [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
-                                      (sra GPR:$b, (i32 16))))]>,
-            Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 1;
-             let Inst{6} = 1;
-           }
-
-  def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b",
-              [(set GPR:$dst, (sra (opnode GPR:$a,
-                                    (sext_inreg GPR:$b, i16)), (i32 16)))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 1;
-             let Inst{6} = 0;
-           }
-
-  def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-              IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b",
-              [(set GPR:$dst, (sra (opnode GPR:$a,
-                                    (sra GPR:$b, (i32 16))), (i32 16)))]>,
-            Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 1;
-             let Inst{6} = 1;
-           }
+  def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+                                      (sext_inreg GPR:$Rm, i16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+                                      (sra GPR:$Rm, (i32 16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+                                      (sext_inreg GPR:$Rm, i16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+                                      (sra GPR:$Rm, (i32 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+                                    (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+                                    (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
 }
 
 
 multiclass AI_smla<string opc, PatFrag opnode> {
-  def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
-              [(set GPR:$dst, (add GPR:$acc,
-                               (opnode (sext_inreg GPR:$a, i16),
-                                       (sext_inreg GPR:$b, i16))))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 0;
-             let Inst{6} = 0;
-           }
-
-  def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
-              [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
-                                                    (sra GPR:$b, (i32 16)))))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 0;
-             let Inst{6} = 1;
-           }
-
-  def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
-              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
-                                                 (sext_inreg GPR:$b, i16))))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 1;
-             let Inst{6} = 0;
-           }
-
-  def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
-             [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
-                                                    (sra GPR:$b, (i32 16)))))]>,
-            Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 1;
-             let Inst{6} = 1;
-           }
-
-  def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
-              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
-                                       (sext_inreg GPR:$b, i16)), (i32 16))))]>,
-           Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 0;
-             let Inst{6} = 0;
-           }
-
-  def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-              IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
-              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
-                                         (sra GPR:$b, (i32 16))), (i32 16))))]>,
-            Requires<[IsARM, HasV5TE]> {
-             let Inst{5} = 0;
-             let Inst{6} = 1;
-           }
+  def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra,
+                               (opnode (sext_inreg GPR:$Rn, i16),
+                                       (sext_inreg GPR:$Rm, i16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+                                                   (sra GPR:$Rm, (i32 16)))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+                                                (sext_inreg GPR:$Rm, i16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+                                                   (sra GPR:$Rm, (i32 16)))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+                                      (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+                                        (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
 }
 
 defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 
 // Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
-def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
-                      IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
                       [/* For disassembly only; pattern left blank */]>,
-              Requires<[IsARM, HasV5TE]> {
-  let Inst{5} = 0;
-  let Inst{6} = 0;
-}
+              Requires<[IsARM, HasV5TE]>;
 
-def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
-                      IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
                       [/* For disassembly only; pattern left blank */]>,
-              Requires<[IsARM, HasV5TE]> {
-  let Inst{5} = 0;
-  let Inst{6} = 1;
-}
+              Requires<[IsARM, HasV5TE]>;
 
-def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
-                      IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
                       [/* For disassembly only; pattern left blank */]>,
-              Requires<[IsARM, HasV5TE]> {
-  let Inst{5} = 1;
-  let Inst{6} = 0;
-}
+              Requires<[IsARM, HasV5TE]>;
 
-def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
-                      IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
                       [/* For disassembly only; pattern left blank */]>,
-              Requires<[IsARM, HasV5TE]> {
-  let Inst{5} = 1;
-  let Inst{6} = 1;
-}
+              Requires<[IsARM, HasV5TE]>;
 
 // Helper class for AI_smld -- for disassembly only
-class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
-                InstrItinClass itin, string opc, string asm>
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+                    InstrItinClass itin, string opc, string asm>
   : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+  bits<4> Rn;
+  bits<4> Rm;
   let Inst{4}     = 1;
   let Inst{5}     = swap;
   let Inst{6}     = sub;
@@ -2157,21 +2881,46 @@ class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
   let Inst{21-20} = 0b00;
   let Inst{22}    = long;
   let Inst{27-23} = 0b01110;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> Rd;
+  let Inst{15-12} = 0b1111;
+  let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+                  InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
 }
 
 multiclass AI_smld<bit sub, string opc> {
 
-  def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-                  NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">;
+  def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                  NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
 
-  def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
-                  NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">;
+  def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                  NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
 
-  def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b),
-                  NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">;
+  def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+                  (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+                  !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
 
-  def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
-                  NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">;
+  def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+                  (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+                  !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
 
 }
 
@@ -2180,16 +2929,10 @@ defm SMLS : AI_smld<1, "smls">;
 
 multiclass AI_sdml<bit sub, string opc> {
 
-  def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                    NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> {
-    let Inst{15-12} = 0b1111;
-  }
-
-  def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                    NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> {
-    let Inst{15-12} = 0b1111;
-  }
-
+  def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                    NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+  def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                    NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
 }
 
 defm SMUA : AI_sdml<0, "smua">;
@@ -2199,55 +2942,35 @@ defm SMUS : AI_sdml<1, "smus">;
 //  Misc. Arithmetic Instructions.
 //
 
-def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-              "clz", "\t$dst, $src",
-              [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
-  let Inst{7-4}   = 0b0001;
-  let Inst{11-8}  = 0b1111;
-  let Inst{19-16} = 0b1111;
-}
-
-def RBIT : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-              "rbit", "\t$dst, $src",
-              [(set GPR:$dst, (ARMrbit GPR:$src))]>,
-           Requires<[IsARM, HasV6T2]> {
-  let Inst{7-4}   = 0b0011;
-  let Inst{11-8}  = 0b1111;
-  let Inst{19-16} = 0b1111;
-}
-
-def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-              "rev", "\t$dst, $src",
-              [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b0011;
-  let Inst{11-8}  = 0b1111;
-  let Inst{19-16} = 0b1111;
-}
-
-def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-               "rev16", "\t$dst, $src",
-               [(set GPR:$dst,
-                   (or (and (srl GPR:$src, (i32 8)), 0xFF),
-                       (or (and (shl GPR:$src, (i32 8)), 0xFF00),
-                           (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
-                               (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>,
-               Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b1011;
-  let Inst{11-8}  = 0b1111;
-  let Inst{19-16} = 0b1111;
-}
-
-def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
-               "revsh", "\t$dst, $src",
-               [(set GPR:$dst,
+def CLZ  : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "clz", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+           Requires<[IsARM, HasV6T2]>;
+
+def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "rev", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+               IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+               [(set GPR:$Rd,
+                   (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
+                       (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
+                           (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
+                               (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+               Requires<[IsARM, HasV6]>;
+
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+               IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+               [(set GPR:$Rd,
                   (sext_inreg
-                    (or (srl (and GPR:$src, 0xFF00), (i32 8)),
-                        (shl GPR:$src, (i32 8))), i16))]>,
-               Requires<[IsARM, HasV6]> {
-  let Inst{7-4}   = 0b1011;
-  let Inst{11-8}  = 0b1111;
-  let Inst{19-16} = 0b1111;
-}
+                    (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+                        (shl GPR:$Rm, (i32 8))), i16))]>,
+               Requires<[IsARM, HasV6]>;
 
 def lsl_shift_imm : SDNodeXForm<imm, [{
   unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
@@ -2258,21 +2981,19 @@ def lsl_amt : PatLeaf<(i32 imm), [{
   return (N->getZExtValue() < 32);
 }], lsl_shift_imm>;
 
-def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
-                                 (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
-               IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
-               [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
-                                   (and (shl GPR:$src2, lsl_amt:$sh),
-                                        0xFFFF0000)))]>,
-               Requires<[IsARM, HasV6]> {
-  let Inst{6-4} = 0b001;
-}
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+                              (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+               IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+               [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+                                  (and (shl GPR:$Rm, lsl_amt:$sh),
+                                       0xFFFF0000)))]>,
+               Requires<[IsARM, HasV6]>;
 
 // Alternate cases for PKHBT where identities eliminate some nodes.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
-               (PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)),
-               (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+               (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+               (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
 
 def asr_shift_imm : SDNodeXForm<imm, [{
   unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
@@ -2285,15 +3006,13 @@ def asr_amt : PatLeaf<(i32 imm), [{
 
 // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
 // will match the pattern below.
-def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
-                                 (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
-               IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
-               [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
-                                   (and (sra GPR:$src2, asr_amt:$sh),
-                                        0xFFFF)))]>,
-               Requires<[IsARM, HasV6]> {
-  let Inst{6-4} = 0b101;
-}
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+                              (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+               IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+               [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+                                  (and (sra GPR:$Rm, asr_amt:$sh),
+                                       0xFFFF)))]>,
+               Requires<[IsARM, HasV6]>;
 
 // Alternate cases for PKHTB where identities eliminate some nodes.  Note that
 // a shift amount of 0 is *not legal* here, it is PKHBT instead.
@@ -2308,10 +3027,19 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
 //
 
 defm CMP  : AI1_cmp_irs<0b1010, "cmp",
+                        IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
                         BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
 
-// FIXME: There seems to be a (potential) hardware bug with the CMN instruction
-// and comparison with 0. These two pieces of code should give identical
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+             (CMPri   GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+             (CMPrr   GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
+             (CMPrs   GPR:$src, so_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
 // results:
 //
 //   rsbs r1, r1, 0
@@ -2321,7 +3049,7 @@ defm CMP  : AI1_cmp_irs<0b1010, "cmp",
 //   mov  r0, #1
 //
 // and:
-// 
+//
 //   cmn  r0, r1
 //   mov  r0, #0
 //   it   ls
@@ -2336,20 +3064,16 @@ defm CMP  : AI1_cmp_irs<0b1010, "cmp",
 // never a "carry" when this AddWithCarry is performed (because the "carry bit"
 // parameter to AddWithCarry is defined as 0).
 //
-// The AddWithCarry in the CMP case seems to be relying upon the identity:
-// 
-//   ~x + 1 = -x
-//
-// However when x is 0 and unsigned, this doesn't hold:
+// When x is 0 and unsigned:
 //
 //    x = 0
 //   ~x = 0xFFFF FFFF
 //   ~x + 1 = 0x1 0000 0000
 //   (-x = 0) != (0x1 0000 0000 = ~x + 1)
 //
-// Therefore, we should disable *all* versions of CMN, especially when comparing
-// against zero, until we can limit when the CMN instruction is used (when we
-// know that the RHS is not 0) or when we have a hardware fix for this.
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
 //
 // (See the ARM docs for the "AddWithCarry" pseudo-code.)
 //
@@ -2360,13 +3084,14 @@ defm CMP  : AI1_cmp_irs<0b1010, "cmp",
 
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
-                        BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>;
+                        IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
-                        BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>;
+                        IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+                      BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
 
-defm CMPz  : AI1_cmp_irs<0b1010, "cmp",
-                         BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
 defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
+                         IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
                          BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
 //def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
@@ -2381,13 +3106,10 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
 def BCCi64 : PseudoInst<(outs),
     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
      IIC_Br,
-     "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
     [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
 
 def BCCZi64 : PseudoInst<(outs),
-     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
-      IIC_Br,
-     "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
     [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
 } // usesCustomInserter
 
@@ -2395,29 +3117,87 @@ def BCCZi64 : PseudoInst<(outs),
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
+// FIXME: These should all be pseudo-instructions that get expanded to
+//        the normal MOV instructions. That would fix the dependency on
+//        special casing them in tblgen.
 let neverHasSideEffects = 1 in {
-def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
-                IIC_iCMOVr, "mov", "\t$dst, $true",
-      [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst">, UnaryDP {
-  let Inst{11-4} = 0b00000000;
+def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
+                IIC_iCMOVr, "mov", "\t$Rd, $Rm",
+      [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
   let Inst{25} = 0;
+  let Inst{20} = 0;
+  let Inst{15-12} = Rd;
+  let Inst{11-4} = 0b00000000;
+  let Inst{3-0} = Rm;
 }
 
-def MOVCCs : AI1<0b1101, (outs GPR:$dst),
-                        (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr,
-                "mov", "\t$dst, $true",
-   [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst">, UnaryDP {
+def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
+                 (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr,
+                "mov", "\t$Rd, $shift",
+   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<12> shift;
   let Inst{25} = 0;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = shift;
 }
 
-def MOVCCi : AI1<0b1101, (outs GPR:$dst),
-                        (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
-                "mov", "\t$dst, $true",
-   [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst">, UnaryDP {
+let isMoveImm = 1 in
+def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm),
+                 DPFrm, IIC_iMOVi,
+                 "movw", "\t$Rd, $imm",
+                 []>,
+                 RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+                 UnaryDP {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 0;
+  let Inst{19-16} = imm{15-12};
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+}
+
+let isMoveImm = 1 in
+def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
+                         (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+                "mov", "\t$Rd, $imm",
+   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
   let Inst{25} = 1;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
+}
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
+                             (ins GPR:$false, i32imm:$src, pred:$p),
+                  IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
+                         (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+                "mvn", "\t$Rd, $imm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
 }
 } // neverHasSideEffects
 
@@ -2425,64 +3205,41 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
 // Atomic operations intrinsics
 //
 
+def memb_opt : Operand<i32> {
+  let PrintMethod = "printMemBOption";
+  let ParserMatchClass = MemBarrierOptOperand;
+}
+
 // memory barriers protect the atomic sequences
 let hasSideEffects = 1 in {
-def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
-                  [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
   let Inst{31-4} = 0xf57ff05;
-  // FIXME: add support for options other than a full system DMB
-  // See DMB disassembly-only variants below.
-  let Inst{3-0} = 0b1111;
-}
-
-def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
-                  [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
-  let Inst{31-4} = 0xf57ff04;
-  // FIXME: add support for options other than a full system DSB
-  // See DSB disassembly-only variants below.
-  let Inst{3-0} = 0b1111;
+  let Inst{3-0} = opt;
 }
 
 def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
                        "mcr", "\tp15, 0, $zero, c7, c10, 5",
                        [(ARMMemBarrierMCR GPR:$zero)]>,
                        Requires<[IsARM, HasV6]> {
-  // FIXME: add support for options other than a full system DMB
   // FIXME: add encoding
 }
-
-def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
-                        "mcr", "\tp15, 0, $zero, c7, c10, 4",
-                        [(ARMSyncBarrierMCR GPR:$zero)]>,
-                        Requires<[IsARM, HasV6]> {
-  // FIXME: add support for options other than a full system DSB
-  // FIXME: add encoding
-}
-}
-
-// Memory Barrier Operations Variants -- for disassembly only
-
-def memb_opt : Operand<i32> {
-  let PrintMethod = "printMemBOption";
 }
 
-class AMBI<bits<4> op7_4, string opc>
-  : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
-          [/* For disassembly only; pattern left blank */]>,
-    Requires<[IsARM, HasDB]> {
-  let Inst{31-8} = 0xf57ff0;
-  let Inst{7-4} = op7_4;
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "dsb", "\t$opt",
+                [/* For disassembly only; pattern left blank */]>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf57ff04;
+  let Inst{3-0} = opt;
 }
 
-// These DMB variants are for disassembly only.
-def DMBvar : AMBI<0b0101, "dmb">;
-
-// These DSB variants are for disassembly only.
-def DSBvar : AMBI<0b0100, "dsb">;
-
 // ISB has only full system option -- for disassembly only
-def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
-            Requires<[IsARM, HasDB]> {
+def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+                Requires<[IsARM, HasDB]> {
   let Inst{31-4} = 0xf57ff06;
   let Inst{3-0} = 0b1111;
 }
@@ -2491,138 +3248,114 @@ let usesCustomInserter = 1 in {
   let Uses = [CPSR] in {
     def ATOMIC_LOAD_ADD_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
-      "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
 
     def ATOMIC_SWAP_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
-      "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
     def ATOMIC_SWAP_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
-      "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
     def ATOMIC_SWAP_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
-      "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
 
     def ATOMIC_CMP_SWAP_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
-      "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
       [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
-      "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
       [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
-      "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
       [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
 }
 }
 
 let mayLoad = 1 in {
-def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
-                    "ldrexb", "\t$dest, [$ptr]",
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+                    "ldrexb", "\t$Rt, [$Rn]",
                     []>;
-def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
-                    "ldrexh", "\t$dest, [$ptr]",
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+                    "ldrexh", "\t$Rt, [$Rn]",
                     []>;
-def LDREX  : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
-                    "ldrex", "\t$dest, [$ptr]",
+def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+                    "ldrex", "\t$Rt, [$Rn]",
                     []>;
-def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn),
                     NoItinerary,
-                    "ldrexd", "\t$dest, $dest2, [$ptr]",
+                    "ldrexd", "\t$Rt, $Rt2, [$Rn]",
                     []>;
 }
 
-let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn),
                     NoItinerary,
-                    "strexb", "\t$success, $src, [$ptr]",
+                    "strexb", "\t$Rd, $src, [$Rn]",
                     []>;
-def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
                     NoItinerary,
-                    "strexh", "\t$success, $src, [$ptr]",
+                    "strexh", "\t$Rd, $Rt, [$Rn]",
                     []>;
-def STREX  : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def STREX  : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
                     NoItinerary,
-                    "strex", "\t$success, $src, [$ptr]",
+                    "strex", "\t$Rd, $Rt, [$Rn]",
                     []>;
-def STREXD : AIstrex<0b01, (outs GPR:$success),
-                    (ins GPR:$src, GPR:$src2, GPR:$ptr),
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+                    (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn),
                     NoItinerary,
-                    "strexd", "\t$success, $src, $src2, [$ptr]",
+                    "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]",
                     []>;
 }
 
@@ -2630,29 +3363,15 @@ def STREXD : AIstrex<0b01, (outs GPR:$success),
 def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
                 [/* For disassembly only; pattern left blank */]>,
             Requires<[IsARM, HasV7]>  {
-  let Inst{31-20} = 0xf57;
-  let Inst{7-4} = 0b0001;
+  let Inst{31-0} = 0b11110101011111111111000000011111;
 }
 
 // SWP/SWPB are deprecated in V6/V7 and for disassembly only.
 let mayLoad = 1 in {
-def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
-             "swp", "\t$dst, $src, [$ptr]",
-             [/* For disassembly only; pattern left blank */]> {
-  let Inst{27-23} = 0b00010;
-  let Inst{22} = 0; // B = 0
-  let Inst{21-20} = 0b00;
-  let Inst{7-4} = 0b1001;
-}
-
-def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
-             "swpb", "\t$dst, $src, [$ptr]",
-             [/* For disassembly only; pattern left blank */]> {
-  let Inst{27-23} = 0b00010;
-  let Inst{22} = 1; // B = 1
-  let Inst{21-20} = 0b00;
-  let Inst{7-4} = 0b1001;
-}
+def SWP  : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
+             [/* For disassembly only; pattern left blank */]>;
+def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
+             [/* For disassembly only; pattern left blank */]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2660,10 +3379,11 @@ def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
 //
 
 // __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right, 
+// complete with fixup for the aeabi_read_tp function.
 let isCall = 1,
-  Defs = [R0, R12, LR, CPSR] in {
-  def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br,
-               "bl\t__aeabi_read_tp",
+  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+  def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
                [(set R0, ARMthread_pointer)]>;
 }
 
@@ -2680,19 +3400,16 @@ let isCall = 1,
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved resgisters, which is exactly what we want.
 //   A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
     D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
     D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
     D31 ], hasSideEffects = 1, isBarrier = 1 in {
-  def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
-                               AddrModeNone, SizeSpecial, IndexModeNone,
-                               Pseudo, NoItinerary,
-                           "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t"
-                           "str\t$val, [$src, #+4]\n\t"
-                           "mov\tr0, #0\n\t"
-                           "add\tpc, pc, #0\n\t"
-                           "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+  def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                               NoItinerary,
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                            Requires<[IsARM, HasVFP2]>;
 }
@@ -2700,14 +3417,8 @@ let Defs =
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ],
   hasSideEffects = 1, isBarrier = 1 in {
-  def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
-                                   AddrModeNone, SizeSpecial, IndexModeNone,
-                                   Pseudo, NoItinerary,
-                           "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t"
-                           "str\t$val, [$src, #+4]\n\t"
-                           "mov\tr0, #0\n\t"
-                           "add\tpc, pc, #0\n\t"
-                           "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+  def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                                   NoItinerary,
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                                 Requires<[IsARM, NoVFP]>;
 }
@@ -2715,53 +3426,58 @@ let Defs =
 // FIXME: Non-Darwin version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
     Defs = [ R7, LR, SP ] in {
-def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
-                             AddrModeNone, SizeSpecial, IndexModeNone,
-                             Pseudo, NoItinerary,
-                             "ldr\tsp, [$src, #8]\n\t"
-                             "ldr\t$scratch, [$src, #4]\n\t"
-                             "ldr\tr7, [$src]\n\t"
-                             "bx\t$scratch", "",
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+                             NoItinerary,
                          [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
                                 Requires<[IsARM, IsDarwin]>;
 }
 
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+            [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+              Requires<[IsDarwin]>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
 
 // Large immediate handling.
 
-// Two piece so_imms.
-let isReMaterializable = 1 in
-def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
-                         Pseudo, IIC_iMOVi,
-                         "mov", "\t$dst, $src",
-                         [(set GPR:$dst, so_imm2part:$src)]>,
-                  Requires<[IsARM, NoV6T2]>;
-
-def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
-             (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
-                    (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
-             (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
-                    (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS),
-             (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
-                    (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
-             (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)),
-                    (so_neg_imm2part_2 imm:$RHS))>;
-
-// 32-bit immediate using movw + movt.
+// 32-bit immediate using two piece so_imms or movw + movt.
 // This is a single pseudo instruction, the benefit is that it can be remat'd
 // as a single unit instead of having to handle reg inputs.
 // FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1 in
-def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
-                   "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
-                     [(set GPR:$dst, (i32 imm:$src))]>,
-               Requires<[IsARM, HasV6T2]>;
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                           [(set GPR:$dst, (arm_i32imm:$src))]>,
+                           Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2addpc,
+                        [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                             IIC_iMOVix2,
+                        [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2ld,
+                    [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+                    Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
 
 // ConstantPool, GlobalAddress, and JumpTable
 def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
@@ -2800,11 +3516,15 @@ def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
       Requires<[IsARM, IsDarwin]>;
 
 // zextload i1 -> zextload i8
-def : ARMPat<(zextloadi1 addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr),    (LDRBrs ldst_so_reg:$addr)>;
 
 // extload -> zextload
-def : ARMPat<(extloadi1  addrmode2:$addr),  (LDRB addrmode2:$addr)>;
-def : ARMPat<(extloadi8  addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi1 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+
 def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
 
 def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
@@ -2889,19 +3609,45 @@ include "ARMInstrNEON.td"
 // Coprocessor Instructions.  For disassembly only.
 //
 
-def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
-            nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
-            NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
-              [/* For disassembly only; pattern left blank */]> {
-  let Inst{4} = 0;
-}
-
-def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
-               nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
-               NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+            NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+            [/* For disassembly only; pattern left blank */]> {
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+               NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
                [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
-  let Inst{4} = 0;
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
 }
 
 class ACI<dag oops, dag iops, string opc, string asm>
@@ -3000,110 +3746,164 @@ defm LDC2 : LdStCop<0b1111,    1, "ldc2">;
 defm STC  : LdStCop<{?,?,?,?}, 0, "stc">;
 defm STC2 : LdStCop<0b1111,    0, "stc2">;
 
-def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
-              GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
-              NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
-              [/* For disassembly only; pattern left blank */]> {
-  let Inst{20} = 0;
-  let Inst{4} = 1;
-}
-
-def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
-                GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
-                NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
-                [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-28} = 0b1111;
-  let Inst{20} = 0;
-  let Inst{4} = 1;
-}
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
 
-def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
-              GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
-              NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
-              [/* For disassembly only; pattern left blank */]> {
-  let Inst{20} = 1;
+class MovRCopro<string opc, bit direction>
+  : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+        GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+        NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{20} = direction;
   let Inst{4} = 1;
-}
 
-def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
-                GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
-                NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
-                [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRCopro2<string opc, bit direction>
+  : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+         GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+         NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+         [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
-  let Inst{20} = 1;
+  let Inst{20} = direction;
   let Inst{4} = 1;
-}
 
-def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
-               GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
-               NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
-               [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0100;
-}
-
-def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
-                 GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
-                 NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
-                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro<string opc, bit direction>
+  : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+        GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+        NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction>
+  : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+         GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+         NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+         [/* For disassembly only; pattern left blank */]> {
   let Inst{31-28} = 0b1111;
-  let Inst{23-20} = 0b0100;
-}
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
 
-def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
-               GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
-               NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
-               [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0101;
-}
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
 
-def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
-                 GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
-                 NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
-                 [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-28} = 0b1111;
-  let Inst{23-20} = 0b0101;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
 }
 
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
 //===----------------------------------------------------------------------===//
 // Move between special register and ARM core register -- for disassembly only
 //
 
-def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr",
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
               [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0000;
+  bits<4> Rd;
+  let Inst{23-16} = 0b00001111;
+  let Inst{15-12} = Rd;
   let Inst{7-4} = 0b0000;
 }
 
-def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
               [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0100;
+  bits<4> Rd;
+  let Inst{23-16} = 0b01001111;
+  let Inst{15-12} = Rd;
   let Inst{7-4} = 0b0000;
 }
 
-def MSR : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary,
-              "msr", "\tcpsr$mask, $src",
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+              "msr", "\t$mask, $Rn",
               [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0010;
-  let Inst{7-4} = 0b0000;
-}
+  bits<5> mask;
+  bits<4> Rn;
 
-def MSRi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary,
-              "msr", "\tcpsr$mask, $a",
-              [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0010;
-  let Inst{7-4} = 0b0000;
+  let Inst{23} = 0;
+  let Inst{22} = mask{4}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = mask{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-4} = 0b00000000;
+  let Inst{3-0} = Rn;
 }
 
-def MSRsys : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary,
-              "msr", "\tspsr$mask, $src",
-              [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0110;
-  let Inst{7-4} = 0b0000;
-}
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
+               "msr", "\t$mask, $a",
+               [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<12> a;
 
-def MSRsysi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary,
-              "msr", "\tspsr$mask, $a",
-              [/* For disassembly only; pattern left blank */]> {
-  let Inst{23-20} = 0b0110;
-  let Inst{7-4} = 0b0000;
+  let Inst{23} = 0;
+  let Inst{22} = mask{4}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = mask{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-0} = a;
 }
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 4d2f1169061f..1e2e5504e662 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -16,11 +16,17 @@
 //===----------------------------------------------------------------------===//
 
 def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
 
 def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
 def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
 def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
 def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
 def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
 def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
 
@@ -69,6 +75,11 @@ def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
 def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
 def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
 
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                           SDTCisVT<2, i32>]>;
+def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
 def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
 
 // VDUPLANE can produce a quad-register result from a double-register source,
@@ -129,830 +140,1506 @@ def nModImm : Operand<i32> {
 // NEON load / store instructions
 //===----------------------------------------------------------------------===//
 
-// Use vldmia to load a Q register as a D register pair.
-// This is equivalent to VLDMD except that it has a Q register operand
-// instead of a pair of D registers.
-def VLDMQ
-  : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
-          IndexModeNone, IIC_fpLoadm,
-          "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
-          [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-// Use vld1 to load a Q register as a D register pair.
-// This alternative to VLDMQ allows an alignment to be specified.
-// This is equivalent to VLD1q64 except that it has a Q register operand.
-def VLD1q
-  : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
-          IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
-} // mayLoad = 1, neverHasSideEffects = 1
-
-// Use vstmia to store a Q register as a D register pair.
-// This is equivalent to VSTMD except that it has a Q register operand
-// instead of a pair of D registers.
-def VSTMQ
-  : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
-          IndexModeNone, IIC_fpStorem,
-          "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
-          [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-// Use vst1 to store a Q register as a D register pair.
-// This alternative to VSTMQ allows an alignment to be specified.
-// This is equivalent to VST1q64 except that it has a Q register operand.
-def VST1q
-  : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
-          IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
-} // mayStore = 1, neverHasSideEffects = 1
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+                    IIC_fpLoad_m, "",
+                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+def VLDMQDB
+  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+                    IIC_fpLoad_m, "",
+                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+                    IIC_fpStore_m, "",
+                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+def VSTMQDB
+  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+                    IIC_fpStore_m, "",
+                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
 
 // Classes for VLD* pseudo-instructions with multi-register operands.
 // These are expanded to real instructions after register allocation.
-class VLDQPseudo
-  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
-class VLDQWBPseudo
+class VLDQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
-                (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+                (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
-class VLDQQPseudo
-  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
-class VLDQQWBPseudo
+class VLDQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
-                (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+                (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
-class VLDQQQQWBPseudo
+class VLDQQQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
-                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
                 "$addr.addr = $wb, $src = $dst">;
 
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
 //   VLD1     : Vector Load (multiple single elements)
 class VLD1D<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
-          (ins addrmode6:$addr), IIC_VLD1,
-          "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+          (ins addrmode6:$Rn), IIC_VLD1,
+          "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
 class VLD1Q<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
-          (ins addrmode6:$addr), IIC_VLD1,
-          "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn), IIC_VLD1x2,
+          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 
-def  VLD1d8   : VLD1D<0b0000, "8">;
-def  VLD1d16  : VLD1D<0b0100, "16">;
-def  VLD1d32  : VLD1D<0b1000, "32">;
-def  VLD1d64  : VLD1D<0b1100, "64">;
+def  VLD1d8   : VLD1D<{0,0,0,?}, "8">;
+def  VLD1d16  : VLD1D<{0,1,0,?}, "16">;
+def  VLD1d32  : VLD1D<{1,0,0,?}, "32">;
+def  VLD1d64  : VLD1D<{1,1,0,?}, "64">;
 
-def  VLD1q8   : VLD1Q<0b0000, "8">;
-def  VLD1q16  : VLD1Q<0b0100, "16">;
-def  VLD1q32  : VLD1Q<0b1000, "32">;
-def  VLD1q64  : VLD1Q<0b1100, "64">;
+def  VLD1q8   : VLD1Q<{0,0,?,?}, "8">;
+def  VLD1q16  : VLD1Q<{0,1,?,?}, "16">;
+def  VLD1q32  : VLD1Q<{1,0,?,?}, "32">;
+def  VLD1q64  : VLD1Q<{1,1,?,?}, "64">;
 
-def  VLD1q8Pseudo  : VLDQPseudo;
-def  VLD1q16Pseudo : VLDQPseudo;
-def  VLD1q32Pseudo : VLDQPseudo;
-def  VLD1q64Pseudo : VLDQPseudo;
+def  VLD1q8Pseudo  : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
 
 // ...with address register writeback:
 class VLD1DWB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
-          "vld1", Dt, "\\{$dst\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+          "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
 class VLD1QWB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
-          "vld1", Dt, "${dst:dregpair}, $addr$offset",
-          "$addr.addr = $wb", []>;
-
-def VLD1d8_UPD  : VLD1DWB<0b0000, "8">;
-def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
-def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
-def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
-
-def VLD1q8_UPD  : VLD1QWB<0b0000, "8">;
-def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
-def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
-def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
-
-def VLD1q8Pseudo_UPD  : VLDQWBPseudo;
-def VLD1q16Pseudo_UPD : VLDQWBPseudo;
-def VLD1q32Pseudo_UPD : VLDQWBPseudo;
-def VLD1q64Pseudo_UPD : VLDQWBPseudo;
+  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8_UPD  : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD  : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
 
 // ...with 3 registers (some of these are only for the disassembler):
 class VLD1D3<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
-          "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
 class VLD1D3WB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
-          "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
 
-def VLD1d8T      : VLD1D3<0b0000, "8">;
-def VLD1d16T     : VLD1D3<0b0100, "16">;
-def VLD1d32T     : VLD1D3<0b1000, "32">;
-def VLD1d64T     : VLD1D3<0b1100, "64">;
+def VLD1d8T      : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T     : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T     : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T     : VLD1D3<{1,1,0,?}, "64">;
 
-def VLD1d8T_UPD  : VLD1D3WB<0b0000, "8">;
-def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
-def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
-def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d8T_UPD  : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
 
-def VLD1d64TPseudo     : VLDQQPseudo;
-def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
+def VLD1d64TPseudo     : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
 
 // ...with 4 registers (some of these are only for the disassembler):
 class VLD1D4<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
-          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 class VLD1D4WB<bits<4> op7_4, string Dt>
   : NLdSt<0,0b10,0b0010,op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
-          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
-          []>;
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+          []> {
+  let Inst{5-4} = Rn{5-4};
+}
 
-def VLD1d8Q      : VLD1D4<0b0000, "8">;
-def VLD1d16Q     : VLD1D4<0b0100, "16">;
-def VLD1d32Q     : VLD1D4<0b1000, "32">;
-def VLD1d64Q     : VLD1D4<0b1100, "64">;
+def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64">;
 
-def VLD1d8Q_UPD  : VLD1D4WB<0b0000, "8">;
-def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
-def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
-def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d8Q_UPD  : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
 
-def VLD1d64QPseudo     : VLDQQPseudo;
-def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
+def VLD1d64QPseudo     : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
 
 //   VLD2     : Vector Load (multiple 2-element structures)
 class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
-          (ins addrmode6:$addr), IIC_VLD2,
-          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn), IIC_VLD2,
+          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 class VLD2Q<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b10, 0b0011, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr), IIC_VLD2,
-          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD2x2,
+          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 
-def  VLD2d8   : VLD2D<0b1000, 0b0000, "8">;
-def  VLD2d16  : VLD2D<0b1000, 0b0100, "16">;
-def  VLD2d32  : VLD2D<0b1000, 0b1000, "32">;
+def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8">;
+def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16">;
+def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32">;
 
-def  VLD2q8   : VLD2Q<0b0000, "8">;
-def  VLD2q16  : VLD2Q<0b0100, "16">;
-def  VLD2q32  : VLD2Q<0b1000, "32">;
+def  VLD2q8   : VLD2Q<{0,0,?,?}, "8">;
+def  VLD2q16  : VLD2Q<{0,1,?,?}, "16">;
+def  VLD2q32  : VLD2Q<{1,0,?,?}, "32">;
 
-def  VLD2d8Pseudo  : VLDQPseudo;
-def  VLD2d16Pseudo : VLDQPseudo;
-def  VLD2d32Pseudo : VLDQPseudo;
+def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
+def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def  VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
 
-def  VLD2q8Pseudo  : VLDQQPseudo;
-def  VLD2q16Pseudo : VLDQQPseudo;
-def  VLD2q32Pseudo : VLDQQPseudo;
+def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
+def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
 
 // ...with address register writeback:
 class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
-          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
 class VLD2QWB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b10, 0b0011, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
-          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
 
-def VLD2d8_UPD  : VLD2DWB<0b1000, 0b0000, "8">;
-def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
-def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
+def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
 
-def VLD2q8_UPD  : VLD2QWB<0b0000, "8">;
-def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
-def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
 
-def VLD2d8Pseudo_UPD  : VLDQWBPseudo;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo;
+def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
 
-def VLD2q8Pseudo_UPD  : VLDQQWBPseudo;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
 
 // ...with double-spaced registers (for disassembly only):
-def VLD2b8      : VLD2D<0b1001, 0b0000, "8">;
-def VLD2b16     : VLD2D<0b1001, 0b0100, "16">;
-def VLD2b32     : VLD2D<0b1001, 0b1000, "32">;
-def VLD2b8_UPD  : VLD2DWB<0b1001, 0b0000, "8">;
-def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
-def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
+def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
 
 //   VLD3     : Vector Load (multiple 3-element structures)
 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr), IIC_VLD3,
-          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn), IIC_VLD3,
+          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
 
-def  VLD3d8   : VLD3D<0b0100, 0b0000, "8">;
-def  VLD3d16  : VLD3D<0b0100, 0b0100, "16">;
-def  VLD3d32  : VLD3D<0b0100, 0b1000, "32">;
+def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
+def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
+def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
 
-def  VLD3d8Pseudo  : VLDQQPseudo;
-def  VLD3d16Pseudo : VLDQQPseudo;
-def  VLD3d32Pseudo : VLDQQPseudo;
+def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>;
+def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
 
 // ...with address register writeback:
 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b10, op11_8, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
-          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
-
-def VLD3d8_UPD  : VLD3DWB<0b0100, 0b0000, "8">;
-def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
-def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
-
-def VLD3d8Pseudo_UPD  : VLDQQWBPseudo;
-def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
-def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VLD3q8      : VLD3D<0b0101, 0b0000, "8">;
-def VLD3q16     : VLD3D<0b0101, 0b0100, "16">;
-def VLD3q32     : VLD3D<0b0101, 0b1000, "32">;
-def VLD3q8_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
-
-def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo;
-def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
 
 // ...alternate versions to be allocated odd register numbers:
-def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo;
-def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
 
 //   VLD4     : Vector Load (multiple 4-element structures)
 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b10, op11_8, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr), IIC_VLD4,
-          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD4,
+          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 
-def  VLD4d8   : VLD4D<0b0000, 0b0000, "8">;
-def  VLD4d16  : VLD4D<0b0000, 0b0100, "16">;
-def  VLD4d32  : VLD4D<0b0000, 0b1000, "32">;
+def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
+def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
+def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
 
-def  VLD4d8Pseudo  : VLDQQPseudo;
-def  VLD4d16Pseudo : VLDQQPseudo;
-def  VLD4d32Pseudo : VLDQQPseudo;
+def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>;
+def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
 
 // ...with address register writeback:
 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b10, op11_8, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
-          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
-
-def VLD4d8_UPD  : VLD4DWB<0b0000, 0b0000, "8">;
-def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
-def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
-
-def VLD4d8Pseudo_UPD  : VLDQQWBPseudo;
-def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
-def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VLD4q8      : VLD4D<0b0001, 0b0000, "8">;
-def VLD4q16     : VLD4D<0b0001, 0b0100, "16">;
-def VLD4q32     : VLD4D<0b0001, 0b1000, "32">;
-def VLD4q8_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
-
-def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo;
-def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
 
 // ...alternate versions to be allocated odd register numbers:
-def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo;
-def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst),
+                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst),
+                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst),
+                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
 
 //   VLD1LN   : Vector Load (single element to one lane)
-//   FIXME: Not yet implemented.
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag LoadOp>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+          "$src = $Vd",
+          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+                                         (i32 (LoadOp addrmode6:$Rn)),
+                                         imm:$lane))]> {
+  let Rm = 0b1111;
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+                                               (i32 (LoadOp addrmode6:$addr)),
+                                               imm:$lane))];
+}
+
+def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load> {
+  let Inst{7} = lane{0};
+  let Inst{5} = Rn{4};
+  let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+                         (f32 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+                         (f32 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+          "\\{$Vd[$lane]\\}, $Rn$Rm",
+          "$src = $Vd, $Rn.addr = $wb", []>;
+
+def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+  let Inst{7} = lane{0};
+  let Inst{5} = Rn{4};
+  let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
 
 //   VLD2LN   : Vector Load (single 2-element structure to one lane)
 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-          IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
-          "$src1 = $dst1, $src2 = $dst2", []>;
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
 
-def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8">;
-def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
-def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
 
 // ...with double-spaced registers:
-def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
 
-// ...alternate versions to be allocated odd register numbers:
-def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
 
 // ...with address register writeback:
 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
-          "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
-          "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+  let Inst{4}   = Rn{4};
+}
 
-def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
-def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
-def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
 
-def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
+def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
 
 //   VLD3LN   : Vector Load (single 3-element structure to one lane)
 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-          nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
-          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
-          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+  let Rm = 0b1111;
+}
+
+def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8">;
-def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
-def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
 
 // ...with double-spaced registers:
-def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-// ...alternate versions to be allocated odd register numbers:
-def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
 
 // ...with address register writeback:
 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, op11_8, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
            DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
-          IIC_VLD3, "vld3", Dt,
-          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
-          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
+          IIC_VLD3lnu, "vld3", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
           []>;
 
-def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
-def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
-def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
 
 //   VLD4LN   : Vector Load (single 4-element structure to one lane)
 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, op11_8, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-          nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
-          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
-          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
 
-def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8">;
-def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
-def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
 
 // ...with double-spaced registers:
-def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
 
-// ...alternate versions to be allocated odd register numbers:
-def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
 
 // ...with address register writeback:
 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, op11_8, op7_4,
-          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
            DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
-          IIC_VLD4, "vld4", Dt,
-"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
-"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
-          []>;
+          IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+          []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
 
-def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
-def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
-def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
 
-def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+  let Pattern = [(set QPR:$dst,
+                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6dup:$Rn), IIC_VLD1dup,
+          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6dup:$Rn), IIC_VLD2dup,
+          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8_UPD  : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD  : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
 //   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6dup:$Rn), IIC_VLD3dup,
+          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2  : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
 //   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
-//   FIXME: Not yet implemented.
+class VLD4DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1111, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6dup:$Rn), IIC_VLD4dup,
+          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2  : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1111, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
 
 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 
 // Classes for VST* pseudo-instructions with multi-register operands.
 // These are expanded to real instructions after register allocation.
-class VSTQPseudo
-  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
-class VSTQWBPseudo
+class VSTQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs GPR:$wb),
-                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
                 "$addr.addr = $wb">;
-class VSTQQPseudo
-  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
-class VSTQQWBPseudo
+class VSTQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs GPR:$wb),
-                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
                 "$addr.addr = $wb">;
-class VSTQQQQWBPseudo
+class VSTQQQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs GPR:$wb),
-                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
                 "$addr.addr = $wb">;
 
 //   VST1     : Vector Store (multiple single elements)
 class VST1D<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
-          "vst1", Dt, "\\{$src\\}, $addr", "", []>;
+  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+          IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
 class VST1Q<bits<4> op7_4, string Dt>
   : NLdSt<0,0b00,0b1010,op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
-          "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+          "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 
-def  VST1d8   : VST1D<0b0000, "8">;
-def  VST1d16  : VST1D<0b0100, "16">;
-def  VST1d32  : VST1D<0b1000, "32">;
-def  VST1d64  : VST1D<0b1100, "64">;
+def  VST1d8   : VST1D<{0,0,0,?}, "8">;
+def  VST1d16  : VST1D<{0,1,0,?}, "16">;
+def  VST1d32  : VST1D<{1,0,0,?}, "32">;
+def  VST1d64  : VST1D<{1,1,0,?}, "64">;
 
-def  VST1q8   : VST1Q<0b0000, "8">;
-def  VST1q16  : VST1Q<0b0100, "16">;
-def  VST1q32  : VST1Q<0b1000, "32">;
-def  VST1q64  : VST1Q<0b1100, "64">;
+def  VST1q8   : VST1Q<{0,0,?,?}, "8">;
+def  VST1q16  : VST1Q<{0,1,?,?}, "16">;
+def  VST1q32  : VST1Q<{1,0,?,?}, "32">;
+def  VST1q64  : VST1Q<{1,1,?,?}, "64">;
 
-def  VST1q8Pseudo  : VSTQPseudo;
-def  VST1q16Pseudo : VSTQPseudo;
-def  VST1q32Pseudo : VSTQPseudo;
-def  VST1q64Pseudo : VSTQPseudo;
+def  VST1q8Pseudo  : VSTQPseudo<IIC_VST1x2>;
+def  VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def  VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def  VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
 
 // ...with address register writeback:
 class VST1DWB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
-          "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+          "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
 class VST1QWB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
-          "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+          IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
 
-def VST1d8_UPD  : VST1DWB<0b0000, "8">;
-def VST1d16_UPD : VST1DWB<0b0100, "16">;
-def VST1d32_UPD : VST1DWB<0b1000, "32">;
-def VST1d64_UPD : VST1DWB<0b1100, "64">;
+def VST1d8_UPD  : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
 
-def VST1q8_UPD  : VST1QWB<0b0000, "8">;
-def VST1q16_UPD : VST1QWB<0b0100, "16">;
-def VST1q32_UPD : VST1QWB<0b1000, "32">;
-def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8_UPD  : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
 
-def VST1q8Pseudo_UPD  : VSTQWBPseudo;
-def VST1q16Pseudo_UPD : VSTQWBPseudo;
-def VST1q32Pseudo_UPD : VSTQWBPseudo;
-def VST1q64Pseudo_UPD : VSTQWBPseudo;
+def VST1q8Pseudo_UPD  : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
 
 // ...with 3 registers (some of these are only for the disassembler):
 class VST1D3<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
-          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
 class VST1D3WB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3),
-          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3),
+          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
 
-def VST1d8T      : VST1D3<0b0000, "8">;
-def VST1d16T     : VST1D3<0b0100, "16">;
-def VST1d32T     : VST1D3<0b1000, "32">;
-def VST1d64T     : VST1D3<0b1100, "64">;
+def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
 
-def VST1d8T_UPD  : VST1D3WB<0b0000, "8">;
-def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
-def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
-def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
 
-def VST1d64TPseudo     : VSTQQPseudo;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo;
+def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
 
 // ...with 4 registers (some of these are only for the disassembler):
 class VST1D4<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
-          []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+          []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 class VST1D4WB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
 
-def VST1d8Q      : VST1D4<0b0000, "8">;
-def VST1d16Q     : VST1D4<0b0100, "16">;
-def VST1d32Q     : VST1D4<0b1000, "32">;
-def VST1d64Q     : VST1D4<0b1100, "64">;
+def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
 
-def VST1d8Q_UPD  : VST1D4WB<0b0000, "8">;
-def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
-def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
-def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
 
-def VST1d64QPseudo     : VSTQQPseudo;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo;
+def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
 
 //   VST2     : Vector Store (multiple 2-element structures)
 class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
-          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 class VST2Q<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
-          "", []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 
-def  VST2d8   : VST2D<0b1000, 0b0000, "8">;
-def  VST2d16  : VST2D<0b1000, 0b0100, "16">;
-def  VST2d32  : VST2D<0b1000, 0b1000, "32">;
+def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
+def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
+def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
 
-def  VST2q8   : VST2Q<0b0000, "8">;
-def  VST2q16  : VST2Q<0b0100, "16">;
-def  VST2q32  : VST2Q<0b1000, "32">;
+def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
+def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
+def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
 
-def  VST2d8Pseudo  : VSTQPseudo;
-def  VST2d16Pseudo : VSTQPseudo;
-def  VST2d32Pseudo : VSTQPseudo;
+def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
+def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def  VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
 
-def  VST2q8Pseudo  : VSTQQPseudo;
-def  VST2q16Pseudo : VSTQQPseudo;
-def  VST2q32Pseudo : VSTQQPseudo;
+def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
+def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
 
 // ...with address register writeback:
 class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
-          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
 class VST2QWB<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
 
-def VST2d8_UPD  : VST2DWB<0b1000, 0b0000, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
+def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
 
-def VST2q8_UPD  : VST2QWB<0b0000, "8">;
-def VST2q16_UPD : VST2QWB<0b0100, "16">;
-def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
 
-def VST2d8Pseudo_UPD  : VSTQWBPseudo;
-def VST2d16Pseudo_UPD : VSTQWBPseudo;
-def VST2d32Pseudo_UPD : VSTQWBPseudo;
+def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
 
-def VST2q8Pseudo_UPD  : VSTQQWBPseudo;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
 
 // ...with double-spaced registers (for disassembly only):
-def VST2b8      : VST2D<0b1001, 0b0000, "8">;
-def VST2b16     : VST2D<0b1001, 0b0100, "16">;
-def VST2b32     : VST2D<0b1001, 0b1000, "32">;
-def VST2b8_UPD  : VST2DWB<0b1001, 0b0000, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
+def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
 
 //   VST3     : Vector Store (multiple 3-element structures)
 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
 
-def  VST3d8   : VST3D<0b0100, 0b0000, "8">;
-def  VST3d16  : VST3D<0b0100, 0b0100, "16">;
-def  VST3d32  : VST3D<0b0100, 0b1000, "32">;
+def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
+def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
+def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
 
-def  VST3d8Pseudo  : VSTQQPseudo;
-def  VST3d16Pseudo : VSTQQPseudo;
-def  VST3d32Pseudo : VSTQQPseudo;
+def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>;
+def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
 
 // ...with address register writeback:
 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
-          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
-
-def VST3d8_UPD  : VST3DWB<0b0100, 0b0000, "8">;
-def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
-def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
-
-def VST3d8Pseudo_UPD  : VSTQQWBPseudo;
-def VST3d16Pseudo_UPD : VSTQQWBPseudo;
-def VST3d32Pseudo_UPD : VSTQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VST3q8      : VST3D<0b0101, 0b0000, "8">;
-def VST3q16     : VST3D<0b0101, 0b0100, "16">;
-def VST3q32     : VST3D<0b0101, 0b1000, "32">;
-def VST3q8_UPD  : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
-
-def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo;
-def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
-def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
 
 // ...alternate versions to be allocated odd register numbers:
-def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo;
-def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
-def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
 
 //   VST4     : Vector Store (multiple 4-element structures)
 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
-          "", []>;
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
 
-def  VST4d8   : VST4D<0b0000, 0b0000, "8">;
-def  VST4d16  : VST4D<0b0000, 0b0100, "16">;
-def  VST4d32  : VST4D<0b0000, 0b1000, "32">;
+def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
+def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
+def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
 
-def  VST4d8Pseudo  : VSTQQPseudo;
-def  VST4d16Pseudo : VSTQQPseudo;
-def  VST4d32Pseudo : VSTQQPseudo;
+def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>;
+def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
 
 // ...with address register writeback:
 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
-           "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
-
-def VST4d8_UPD  : VST4DWB<0b0000, 0b0000, "8">;
-def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
-def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
-
-def VST4d8Pseudo_UPD  : VSTQQWBPseudo;
-def VST4d16Pseudo_UPD : VSTQQWBPseudo;
-def VST4d32Pseudo_UPD : VSTQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VST4q8      : VST4D<0b0001, 0b0000, "8">;
-def VST4q16     : VST4D<0b0001, 0b0100, "16">;
-def VST4q32     : VST4D<0b0001, 0b1000, "32">;
-def VST4q8_UPD  : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
-
-def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo;
-def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
-def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
 
 // ...alternate versions to be allocated odd register numbers:
-def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo;
-def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
-def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
 
 //   VST1LN   : Vector Store (single element from one lane)
-//   FIXME: Not yet implemented.
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag StoreOp, SDNode ExtractOp>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+  let Rm = 0b1111;
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+  : VSTQLNPseudo<IIC_VST1ln> {
+  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+                          addrmode6:$addr)];
+}
+
+def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+                       NEONvgetlaneu> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+                       NEONvgetlaneu> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{5};
+}
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+  let Inst{7}   = lane{0};
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+          "\\{$Vd[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []>;
+
+def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
 
 //   VST2LN   : Vector Store (single 2-element structure from one lane)
 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-          IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
-          "", []>;
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8">;
-def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
-def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
 
 // ...with double-spaced registers:
-def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{4}   = Rn{4};
+}
 
-// ...alternate versions to be allocated odd register numbers:
-def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
 
 // ...with address register writeback:
 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
           (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
           "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+          "$addr.addr = $wb", []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
 
-def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8">;
-def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
-def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
 
 //   VST3LN   : Vector Store (single 3-element structure from one lane)
 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-           nohash_imm:$lane), IIC_VST, "vst3", Dt,
-          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+}
 
-def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8">;
-def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
-def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
 
 // ...with double-spaced registers:
-def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-// ...alternate versions to be allocated odd register numbers:
-def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
 
 // ...with address register writeback:
 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
-          IIC_VST, "vst3", Dt,
-          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VST3lnu, "vst3", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []>;
+
+def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
 
-def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8">;
-def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
-def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
 
-def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
 
 //   VST4LN   : Vector Store (single 4-element structure from one lane)
 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-           nohash_imm:$lane), IIC_VST, "vst4", Dt,
-          "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
-          "", []>;
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
 
-def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8">;
-def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
-def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
 
 // ...with double-spaced registers:
-def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
 
-// ...alternate versions to be allocated odd register numbers:
-def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
 
 // ...with address register writeback:
 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
-          IIC_VST, "vst4", Dt,
-  "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
-          "$addr.addr = $wb", []>;
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VST4lnu, "vst4", Dt,
+  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
 
-def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8">;
-def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
-def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
 
-def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
 
 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 
@@ -1000,98 +1687,92 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
 // Instruction Classes
 //===----------------------------------------------------------------------===//
 
-// Basic 2-register operations: single-, double- and quad-register.
-class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
-           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
-        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
-        IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
+// Basic 2-register operations: double- and quad-register.
 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
            string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "",
-        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
            string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "",
-        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
 
 // Basic 2-register intrinsics, both double- and quad-register.
 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op4,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op4,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
 // Narrow 2-register operations.
 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
            ValueType TyD, ValueType TyQ, SDNode OpNode>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
-        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
 
 // Narrow 2-register intrinsics.
 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType TyD, ValueType TyQ, Intrinsic IntOp>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
-        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
 
 // Long 2-register operations (currently only used for VMOVL).
 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
            bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
            ValueType TyQ, ValueType TyD, SDNode OpNode>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
-        (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
 
 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
-  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
-        (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 
-        OpcodeStr, Dt, "$dst1, $dst2",
-        "$src1 = $dst1, $src2 = $dst2", []>;
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+        OpcodeStr, Dt, "$Vd, $Vm",
+        "$src1 = $Vd, $src2 = $Vm", []>;
 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
                   InstrItinClass itin, string OpcodeStr, string Dt>
-  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
-        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
-        "$src1 = $dst1, $src2 = $dst2", []>;
-
-// Basic 3-register operations: single-, double- and quad-register.
-class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
-           SDNode OpNode, bit Commutable>
-  : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm,
-        IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
-  let isCommutable = Commutable;
-}
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+        "$src1 = $Vd, $src2 = $Vm", []>;
 
+// Basic 3-register operations: double- and quad-register.
 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
            ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
 // Same as N3VD but no data type.
@@ -1100,31 +1781,31 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3VX<op24, op23, op21_20, op11_8, 0, op4,
-         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 
-         OpcodeStr, "$dst, $src1, $src2", "",
-         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
+         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+         OpcodeStr, "$Vd, $Vn, $Vm", "",
+         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
   let isCommutable = Commutable;
 }
 
-class N3VDSL<bits<2> op21_20, bits<4> op11_8, 
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType Ty, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (Ty DPR:$dst),
-              (Ty (ShOp (Ty DPR:$src1),
-                        (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> {
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$Vn),
+                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
   let isCommutable = 0;
 }
-class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
-        [(set (Ty DPR:$dst),
-              (Ty (ShOp (Ty DPR:$src1),
-                        (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$Vn),
+                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
   let isCommutable = 0;
 }
 
@@ -1132,40 +1813,40 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
            ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr,
            ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
   : N3VX<op24, op23, op21_20, op11_8, 1, op4,
-         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 
-         OpcodeStr, "$dst, $src1, $src2", "",
-         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
+         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+         OpcodeStr, "$Vd, $Vn, $Vm", "",
+         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
   let isCommutable = Commutable;
 }
-class N3VQSL<bits<2> op21_20, bits<4> op11_8, 
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (ResTy QPR:$dst),
-              (ResTy (ShOp (ResTy QPR:$src1),
-                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$Vn),
+                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
                                                 imm:$lane)))))]> {
   let isCommutable = 0;
 }
 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
-        [(set (ResTy QPR:$dst),
-              (ResTy (ShOp (ResTy QPR:$src1),
-                           (ResTy (NEONvduplane (OpTy DPR_8:$src2),
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$Vn),
+                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
                                                 imm:$lane)))))]> {
   let isCommutable = 0;
 }
@@ -1175,30 +1856,39 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               Format f, InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
-class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (Ty DPR:$dst),
-              (Ty (IntOp (Ty DPR:$src1),
-                         (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (IntOp (Ty DPR:$Vn),
+                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
                                            imm:$lane)))))]> {
   let isCommutable = 0;
 }
 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (Ty DPR:$dst),
-              (Ty (IntOp (Ty DPR:$src1),
-                         (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (IntOp (Ty DPR:$Vn),
+                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
   let isCommutable = 0;
 }
 
@@ -1206,20 +1896,20 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               Format f, InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
-class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (ResTy QPR:$dst),
-              (ResTy (IntOp (ResTy QPR:$src1),
-                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$Vn),
+                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
                                                  imm:$lane)))))]> {
   let isCommutable = 0;
 }
@@ -1227,93 +1917,95 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt,
                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (ResTy QPR:$dst),
-              (ResTy (IntOp (ResTy QPR:$src1),
-                            (ResTy (NEONvduplane (OpTy DPR_8:$src2),
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$Vn),
+                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
                                                  imm:$lane)))))]> {
   let isCommutable = 0;
 }
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+  let isCommutable = 0;
+}
 
-// Multiply-Add/Sub operations: single-, double- and quad-register.
-class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                InstrItinClass itin, string OpcodeStr, string Dt,
-                ValueType Ty, SDNode MulOp, SDNode OpNode>
-  : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR_VFP2:$dst),
-        (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
-
+// Multiply-Add/Sub operations: double- and quad-register.
 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                 InstrItinClass itin, string OpcodeStr, string Dt,
-                ValueType Ty, SDNode MulOp, SDNode OpNode>
+                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set DPR:$dst, (Ty (OpNode DPR:$src1,
-                             (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt,
-                  ValueType Ty, SDNode MulOp, SDNode ShOp>
+                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$dst),
-        (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+        (outs DPR:$Vd),
+        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set (Ty DPR:$dst),
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$src1),
-                        (Ty (MulOp DPR:$src2,
-                                   (Ty (NEONvduplane (Ty DPR_VFP2:$src3),
+                        (Ty (MulOp DPR:$Vn,
+                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
                                                      imm:$lane)))))))]>;
 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
                     ValueType Ty, SDNode MulOp, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$dst),
-        (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+        (outs DPR:$Vd),
+        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set (Ty DPR:$dst),
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$src1),
-                        (Ty (MulOp DPR:$src2,
-                                   (Ty (NEONvduplane (Ty DPR_8:$src3),
+                        (Ty (MulOp DPR:$Vn,
+                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
                                                      imm:$lane)))))))]>;
 
 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
-                SDNode MulOp, SDNode OpNode>
+                SDPatternOperator MulOp, SDPatternOperator OpNode>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set QPR:$dst, (Ty (OpNode QPR:$src1,
-                             (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
-                  SDNode MulOp, SDNode ShOp>
+                  SDPatternOperator MulOp, SDPatternOperator ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst),
-        (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+        (outs QPR:$Vd),
+        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set (ResTy QPR:$dst),
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$src1),
-                           (ResTy (MulOp QPR:$src2,
-                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+                           (ResTy (MulOp QPR:$Vn,
+                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
                                                         imm:$lane)))))))]>;
 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
                     ValueType ResTy, ValueType OpTy,
                     SDNode MulOp, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst),
-        (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+        (outs QPR:$Vd),
+        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set (ResTy QPR:$dst),
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$src1),
-                           (ResTy (MulOp QPR:$src2,
-                                   (ResTy (NEONvduplane (OpTy DPR_8:$src3),
+                           (ResTy (MulOp QPR:$Vn,
+                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
                                                         imm:$lane)))))))]>;
 
 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
@@ -1321,18 +2013,18 @@ class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                 InstrItinClass itin, string OpcodeStr, string Dt,
                 ValueType Ty, Intrinsic IntOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set DPR:$dst, (Ty (OpNode DPR:$src1,
-                             (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>;
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                 InstrItinClass itin, string OpcodeStr, string Dt,
                 ValueType Ty, Intrinsic IntOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set QPR:$dst, (Ty (OpNode QPR:$src1,
-                             (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
 
 // Neon 3-argument intrinsics, both double- and quad-register.
 // The destination register is also used as the first source operand register.
@@ -1340,52 +2032,52 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
-                                      (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
-                                      (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
 
 // Long Multiply-Add/Sub operations.
 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                 InstrItinClass itin, string OpcodeStr, string Dt,
                 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
-                                (TyQ (MulOp (TyD DPR:$src2),
-                                            (TyD DPR:$src3)))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+                                (TyQ (MulOp (TyD DPR:$Vn),
+                                            (TyD DPR:$Vm)))))]>;
 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
                   ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set QPR:$dst,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set QPR:$Vd,
           (OpNode (TyQ QPR:$src1),
-                  (TyQ (MulOp (TyD DPR:$src2),
-                              (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+                  (TyQ (MulOp (TyD DPR:$Vn),
+                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
                                                  imm:$lane))))))]>;
 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                     InstrItinClass itin, string OpcodeStr, string Dt,
                     ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
-  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set QPR:$dst,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set QPR:$Vd,
           (OpNode (TyQ QPR:$src1),
-                  (TyQ (MulOp (TyD DPR:$src2),
-                              (TyD (NEONvduplane (TyD DPR_8:$src3),
+                  (TyQ (MulOp (TyD DPR:$Vn),
+                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
                                                  imm:$lane))))))]>;
 
 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
@@ -1394,11 +2086,11 @@ class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                    ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
                    SDNode OpNode>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
-                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2),
-                                                        (TyD DPR:$src3)))))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+                                                        (TyD DPR:$Vm)))))))]>;
 
 // Neon Long 3-argument intrinsic.  The destination register is
 // a quad-register and is also used as the first source operand register.
@@ -1406,35 +2098,35 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType TyQ, ValueType TyD, Intrinsic IntOp>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
-        [(set QPR:$dst,
-          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                  string OpcodeStr, string Dt,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst),
-        (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+        (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set (ResTy QPR:$dst),
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$src1),
-                            (OpTy DPR:$src2),
-                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+                            (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
                                                 imm:$lane)))))]>;
 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                    InstrItinClass itin, string OpcodeStr, string Dt,
                    ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst),
-        (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+        (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
-        [(set (ResTy QPR:$dst),
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$src1),
-                            (OpTy DPR:$src2),
-                            (OpTy (NEONvduplane (OpTy DPR_8:$src3),
+                            (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
                                                 imm:$lane)))))]>;
 
 // Narrowing 3-register intrinsics.
@@ -1442,9 +2134,9 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
               Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
+        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
 
@@ -1453,29 +2145,29 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
            ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType TyQ, ValueType TyD, SDNode OpNode>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set QPR:$dst,
-          (TyQ (OpNode (TyD DPR:$src1),
-                       (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set QPR:$Vd,
+          (TyQ (OpNode (TyD DPR:$Vn),
+                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType TyQ, ValueType TyD, SDNode OpNode>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set QPR:$dst,
-          (TyQ (OpNode (TyD DPR:$src1),
-                       (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set QPR:$Vd,
+          (TyQ (OpNode (TyD DPR:$Vn),
+                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
 
 // Long 3-register operations with explicitly extended operands.
 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1483,10 +2175,10 @@ class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
               bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
-                                (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
   let isCommutable = Commutable;
 }
 
@@ -1496,10 +2188,10 @@ class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                  ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
                  bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
-                                                (TyD DPR:$src2))))))]> {
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+                                                (TyD DPR:$Vm))))))]> {
   let isCommutable = Commutable;
 }
 
@@ -1508,30 +2200,30 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
   let isCommutable = Commutable;
 }
 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (ResTy QPR:$dst),
-              (ResTy (IntOp (OpTy DPR:$src1),
-                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
                                                 imm:$lane)))))]>;
 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
-        [(set (ResTy QPR:$dst),
-              (ResTy (IntOp (OpTy DPR:$src1),
-                            (OpTy (NEONvduplane (OpTy DPR_8:$src2),
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
                                                 imm:$lane)))))]>;
 
 // Wide 3-register operations.
@@ -1539,10 +2231,10 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
            SDNode OpNode, SDNode ExtOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
-                                (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
   let isCommutable = Commutable;
 }
 
@@ -1551,16 +2243,16 @@ class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                 bits<2> op17_16, bits<5> op11_7, bit op4,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                 bits<2> op17_16, bits<5> op11_7, bit op4,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
 // Pairwise long 2-register accumulate intrinsics,
 // both double- and quad-register.
@@ -1570,17 +2262,17 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                  string OpcodeStr, string Dt,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
-        OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
-        [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                  bits<2> op17_16, bits<5> op11_7, bit op4,
                  string OpcodeStr, string Dt,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
-        OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
-        [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
 
 // Shift by immediate,
 // both double- and quad-register.
@@ -1588,25 +2280,25 @@ class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, 0, op4,
-           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin,
-           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
-           [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
+           (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType Ty, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, 1, op4,
-           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin,
-           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
-           [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
+           (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
 
 // Long shift by immediate.
 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
              string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
-           (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm,
-           IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
-           [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
+           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
                                           (i32 imm:$SIMM))))]>;
 
 // Narrow shift by immediate.
@@ -1614,42 +2306,42 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
-           (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin,
-           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
-           [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
+           (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
                                           (i32 imm:$SIMM))))]>;
 
 // Shift right by immediate and accumulate,
 // both double- and quad-register.
 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
                 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
-  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
-           (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
-           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
-           [(set DPR:$dst, (Ty (add DPR:$src1,
-                                (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
+  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+           (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set DPR:$Vd, (Ty (add DPR:$src1,
+                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
                 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
-  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
-           (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
-           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
-           [(set QPR:$dst, (Ty (add QPR:$src1,
-                                (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
+  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+           (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set QPR:$Vd, (Ty (add QPR:$src1,
+                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
 
 // Shift by immediate and insert,
 // both double- and quad-register.
 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
                 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
-  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
-           (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD,
-           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
-           [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
+  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+           (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
                 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
-  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
-           (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ,
-           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
-           [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
+  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+           (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
 
 // Convert, with fractional bits immediate,
 // both double- and quad-register.
@@ -1657,16 +2349,16 @@ class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
               string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp>
   : N2VImm<op24, op23, op11_8, op7, 0, op4,
-           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm,
-           IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
-           [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
+           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
               string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
               Intrinsic IntOp>
   : N2VImm<op24, op23, op11_8, op7, 1, op4,
-           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm,
-           IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
-           [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
+           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
 
 //===----------------------------------------------------------------------===//
 // Multiclasses
@@ -1678,45 +2370,127 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 //   S = single int (32 bit) elements
 //   D = double int (64 bit) elements
 
-// Neon 2-register vector operations -- for disassembly only.
+// Neon 2-register vector operations and intrinsics.
 
-// First with only element sizes of 8, 16 and 32 bits:
+// Neon 2-register comparisons.
+//   source operand element sizes of 8, 16 and 32 bits:
 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                        bits<5> op11_7, bit op4, string opc, string Dt,
-                       string asm> {
+                       string asm, SDNode OpNode> {
   // 64-bit vector types.
   def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
-                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "8"), asm, "", []>;
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
   def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
-                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "16"), asm, "", []>;
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
   def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
-                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "32"), asm, "", []>;
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
   def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
-                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, "f32", asm, "", []> {
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, "f32", asm, "",
+                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
 
   // 128-bit vector types.
   def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
-                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "8"), asm, "", []>;
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
   def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
-                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "16"), asm, "", []>;
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
   def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
-                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "32"), asm, "", []>;
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
   def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
-                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, "f32", asm, "", []> {
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, "f32", asm, "",
+                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
 }
 
+
+// Neon 2-register vector intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                      bits<5> op11_7, bit op4,
+                      InstrItinClass itinD, InstrItinClass itinQ,
+                      string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                    bits<5> op11_7, bit op6, bit op4,
+                    InstrItinClass itin, string OpcodeStr, string Dt,
+                    SDNode OpNode> {
+  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "16"),
+                   v8i8, v8i16, OpNode>;
+  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "32"),
+                   v4i16, v4i32, OpNode>;
+  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "64"),
+                   v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                       bits<5> op11_7, bit op6, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       Intrinsic IntOp> {
+  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "16"),
+                      v8i8, v8i16, IntOp>;
+  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "32"),
+                      v4i16, v4i32, IntOp>;
+  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "64"),
+                      v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+                    string OpcodeStr, string Dt, SDNode OpNode> {
+  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
 // Neon 3-register vector operations.
 
 // First with only element sizes of 8, 16 and 32 bits:
@@ -1726,7 +2500,7 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                    string OpcodeStr, string Dt,
                    SDNode OpNode, bit Commutable = 0> {
   // 64-bit vector types.
-  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 
+  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
                    OpcodeStr, !strconcat(Dt, "8"),
                    v8i8, v8i8, OpNode, Commutable>;
   def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
@@ -1775,54 +2549,6 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 }
 
 
-// Neon Narrowing 2-register vector operations,
-//   source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
-                    bits<5> op11_7, bit op6, bit op4, 
-                    InstrItinClass itin, string OpcodeStr, string Dt,
-                    SDNode OpNode> {
-  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
-                   itin, OpcodeStr, !strconcat(Dt, "16"),
-                   v8i8, v8i16, OpNode>;
-  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
-                   itin, OpcodeStr, !strconcat(Dt, "32"),
-                   v4i16, v4i32, OpNode>;
-  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
-                   itin, OpcodeStr, !strconcat(Dt, "64"),
-                   v2i32, v2i64, OpNode>;
-}
-
-// Neon Narrowing 2-register vector intrinsics,
-//   source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
-                       bits<5> op11_7, bit op6, bit op4, 
-                       InstrItinClass itin, string OpcodeStr, string Dt,
-                       Intrinsic IntOp> {
-  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
-                      itin, OpcodeStr, !strconcat(Dt, "16"),
-                      v8i8, v8i16, IntOp>;
-  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
-                      itin, OpcodeStr, !strconcat(Dt, "32"),
-                      v4i16, v4i32, IntOp>;
-  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
-                      itin, OpcodeStr, !strconcat(Dt, "64"),
-                      v2i32, v2i64, IntOp>;
-}
-
-
-// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
-//   source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
-                    string OpcodeStr, string Dt, SDNode OpNode> {
-  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
-                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
-  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
-                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
-  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
-                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
-}
-
-
 // Neon 3-register vector intrinsics.
 
 // First with only element sizes of 16 and 32 bits:
@@ -1847,8 +2573,29 @@ multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
                       OpcodeStr, !strconcat(Dt, "32"),
                       v4i32, v4i32, IntOp, Commutable>;
 }
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                     InstrItinClass itinD16, InstrItinClass itinD32,
+                     InstrItinClass itinQ16, InstrItinClass itinQ32,
+                     string OpcodeStr, string Dt,
+                     Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i16, v4i16, IntOp>;
+  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i32, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i16, v8i16, IntOp>;
+  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i32, v4i32, IntOp>;
+}
 
-multiclass N3VIntSL_HS<bits<4> op11_8, 
+multiclass N3VIntSL_HS<bits<4> op11_8,
                        InstrItinClass itinD16, InstrItinClass itinD32,
                        InstrItinClass itinQ16, InstrItinClass itinQ32,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
@@ -1877,6 +2624,21 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
                       OpcodeStr, !strconcat(Dt, "8"),
                       v16i8, v16i8, IntOp, Commutable>;
 }
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                      InstrItinClass itinD16, InstrItinClass itinD32,
+                      InstrItinClass itinQ16, InstrItinClass itinQ32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp>
+  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+              OpcodeStr, Dt, IntOp> {
+  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i8, v8i8, IntOp>;
+  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v16i8, v16i8, IntOp>;
+}
+
 
 // ....then also with element size of 64 bits:
 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
@@ -1893,6 +2655,20 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
                       OpcodeStr, !strconcat(Dt, "64"),
                       v2i64, v2i64, IntOp, Commutable>;
 }
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp>
+  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+               OpcodeStr, Dt, IntOp> {
+  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v1i64, v1i64, IntOp>;
+  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i64, v2i64, IntOp>;
+}
 
 // Neon Narrowing 3-register vector intrinsics,
 //   source operand element sizes of 16, 32 and 64 bits:
@@ -1920,7 +2696,7 @@ multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
   def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
                    OpcodeStr, !strconcat(Dt, "8"),
                    v8i16, v8i8, OpNode, Commutable>;
-  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 
+  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
                    OpcodeStr, !strconcat(Dt, "16"),
                    v4i32, v4i16, OpNode, Commutable>;
   def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
@@ -1944,7 +2720,7 @@ multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
   def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
                       OpcodeStr, !strconcat(Dt, "8"),
                       v8i16, v8i8, OpNode, ExtOp, Commutable>;
-  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 
+  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
                       OpcodeStr, !strconcat(Dt, "16"),
                       v4i32, v4i16, OpNode, ExtOp, Commutable>;
   def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
@@ -1959,7 +2735,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
                       InstrItinClass itin16, InstrItinClass itin32,
                       string OpcodeStr, string Dt,
                       Intrinsic IntOp, bit Commutable = 0> {
-  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 
+  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
                       OpcodeStr, !strconcat(Dt, "16"),
                       v4i32, v4i16, IntOp, Commutable>;
   def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
@@ -1970,7 +2746,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
                         InstrItinClass itin, string OpcodeStr, string Dt,
                         Intrinsic IntOp> {
-  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 
+  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
   def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
@@ -1995,7 +2771,7 @@ multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
   def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
                          OpcodeStr, !strconcat(Dt, "8"),
                          v8i16, v8i8, IntOp, ExtOp, Commutable>;
-  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 
+  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
                          OpcodeStr, !strconcat(Dt, "16"),
                          v4i32, v4i16, IntOp, ExtOp, Commutable>;
   def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
@@ -2044,7 +2820,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                         OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
 }
 
-multiclass N3VMulOpSL_HS<bits<4> op11_8, 
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
                          InstrItinClass itinD16, InstrItinClass itinD32,
                          InstrItinClass itinQ16, InstrItinClass itinQ32,
                          string OpcodeStr, string Dt, SDNode ShOp> {
@@ -2174,30 +2950,6 @@ multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
 }
 
 
-// Neon 2-register vector intrinsics,
-//   element sizes of 8, 16 and 32 bits:
-multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
-                      bits<5> op11_7, bit op4,
-                      InstrItinClass itinD, InstrItinClass itinQ,
-                      string OpcodeStr, string Dt, Intrinsic IntOp> {
-  // 64-bit vector types.
-  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
-  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
-  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
-
-  // 128-bit vector types.
-  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
-  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
-  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
-}
-
-
 // Neon Pairwise long 2-register intrinsics,
 //   element sizes of 8, 16 and 32 bits:
 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -2461,9 +3213,9 @@ def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
                         "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
 def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
                         "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
-def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
+def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
                      v2f32, v2f32, fmul, 1>;
-def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
+def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
                      v4f32, v4f32, fmul, 1>;
 defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
 def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
@@ -2491,7 +3243,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
 
 //   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
 defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
-                          IIC_VMULi16Q, IIC_VMULi32Q, 
+                          IIC_VMULi16Q, IIC_VMULi32Q,
                           "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
                             IIC_VMULi16Q, IIC_VMULi32Q,
@@ -2555,15 +3307,19 @@ defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
 defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
-                          v2f32, fmul, fadd>;
+                          v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
-                          v4f32, fmul, fadd>;
+                          v4f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
-                            v2f32, fmul, fadd>;
+                            v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
-                            v4f32, v2f32, fmul, fadd>;
+                            v4f32, v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 
 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
@@ -2581,14 +3337,15 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1),
                                       (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 
-def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
-                  (fmul (v4f32 QPR:$src2),
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+                  (fmul_su (v4f32 QPR:$src2),
                         (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
           (v4f32 (VMLAslfq (v4f32 QPR:$src1),
                            (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
                                    (DSubReg_i32_reg imm:$lane))),
-                           (SubReg_i32_lane imm:$lane)))>;
+                           (SubReg_i32_lane imm:$lane)))>,
+          Requires<[HasNEON, UseFPVMLx]>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
 defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2608,15 +3365,19 @@ defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
 defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
-                          v2f32, fmul, fsub>;
+                          v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
-                          v4f32, fmul, fsub>;
+                          v4f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
-                            v2f32, fmul, fsub>;
+                            v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
-                            v4f32, v2f32, fmul, fsub>;
+                            v4f32, v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
 
 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
@@ -2634,13 +3395,14 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
                                       (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 
-def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
-                  (fmul (v4f32 QPR:$src2),
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+                  (fmul_su (v4f32 QPR:$src2),
                         (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
           (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
                                    (DSubReg_i32_reg imm:$lane))),
-                           (SubReg_i32_lane imm:$lane)))>;
+                           (SubReg_i32_lane imm:$lane)))>,
+          Requires<[HasNEON, UseFPVMLx]>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
 defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2703,25 +3465,24 @@ def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
                      NEONvceq, 1>;
-// For disassembly only.
+
 defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
-                            "$dst, $src, #0">;
+                            "$Vd, $Vm, #0", NEONvceqz>;
 
 //   VCGE     : Vector Compare Greater Than or Equal
 defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
                         IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
-defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
                         IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
 def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
                      NEONvcge, 0>;
-// For disassembly only.
+
 defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
-                            "$dst, $src, #0">;
-// For disassembly only.
+                            "$Vd, $Vm, #0", NEONvcgez>;
 defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
-                            "$dst, $src, #0">;
+                            "$Vd, $Vm, #0", NEONvclez>;
 
 //   VCGT     : Vector Compare Greater Than
 defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -2732,12 +3493,11 @@ def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
                      NEONvcgt, 0>;
-// For disassembly only.
+
 defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
-                            "$dst, $src, #0">;
-// For disassembly only.
+                            "$Vd, $Vm, #0", NEONvcgtz>;
 defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
-                            "$dst, $src, #0">;
+                            "$Vd, $Vm, #0", NEONvcltz>;
 
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
 def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
@@ -2750,7 +3510,7 @@ def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
 def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
                         "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
 //   VTST     : Vector Test Bits
-defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
+defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
 
 // Vector Bitwise Operations.
@@ -2779,104 +3539,190 @@ def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
 def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
                       v4i32, v4i32, or, 1>;
 
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+
 //   VBIC     : Vector Bitwise Bit Clear (AND NOT)
-def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
-                     (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
-                     "vbic", "$dst, $src1, $src2", "",
-                     [(set DPR:$dst, (v2i32 (and DPR:$src1,
-                                                 (vnotd DPR:$src2))))]>;
-def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
-                     (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
-                     "vbic", "$dst, $src1, $src2", "",
-                     [(set QPR:$dst, (v4i32 (and QPR:$src1,
-                                                 (vnotq QPR:$src2))))]>;
+def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+                     "vbic", "$Vd, $Vn, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+                                                 (vnotd DPR:$Vm))))]>;
+def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+                     "vbic", "$Vd, $Vn, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+                                                 (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
 
 //   VORN     : Vector Bitwise OR NOT
-def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
-                     (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
-                     "vorn", "$dst, $src1, $src2", "",
-                     [(set DPR:$dst, (v2i32 (or DPR:$src1,
-                                                (vnotd DPR:$src2))))]>;
-def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
-                     (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
-                     "vorn", "$dst, $src1, $src2", "",
-                     [(set QPR:$dst, (v4i32 (or QPR:$src1,
-                                                (vnotq QPR:$src2))))]>;
+def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+                     "vorn", "$Vd, $Vn, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+                                                (vnotd DPR:$Vm))))]>;
+def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+                     "vorn", "$Vd, $Vn, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+                                                (vnotq QPR:$Vm))))]>;
 
 //   VMVN     : Vector Bitwise NOT (Immediate)
 
 let isReMaterializable = 1 in {
-def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmvn", "i16", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
-def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
+                         "vmvn", "i16", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmvn", "i16", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
+                         "vmvn", "i16", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
 
-def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmvn", "i32", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
-def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
+                         "vmvn", "i32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmvn", "i32", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
+                         "vmvn", "i32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
 }
 
 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
-                     (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
-                     "vmvn", "$dst, $src", "",
-                     [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
+                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+                     "vmvn", "$Vd, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
 def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
-                     (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
-                     "vmvn", "$dst, $src", "",
-                     [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
+                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+                     "vmvn", "$Vd, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
 
 //   VBSL     : Vector Bitwise Select
-def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
-                     (ins DPR:$src1, DPR:$src2, DPR:$src3),
+def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VCNTiD,
-                     "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
-                     [(set DPR:$dst,
-                       (v2i32 (or (and DPR:$src2, DPR:$src1),
-                                  (and DPR:$src3, (vnotd DPR:$src1)))))]>;
-def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
-                     (ins QPR:$src1, QPR:$src2, QPR:$src3),
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [(set DPR:$Vd,
+                       (v2i32 (or (and DPR:$Vn, DPR:$src1),
+                                  (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VCNTiQ,
-                     "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
-                     [(set QPR:$dst,
-                       (v4i32 (or (and QPR:$src2, QPR:$src1),
-                                  (and QPR:$src3, (vnotq QPR:$src1)))))]>;
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [(set QPR:$Vd,
+                       (v4i32 (or (and QPR:$Vn, QPR:$src1),
+                                  (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
 def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
-                     (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VBINiD,
-                     "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      [/* For disassembly only; pattern left blank */]>;
 def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
-                     (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VBINiQ,
-                     "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      [/* For disassembly only; pattern left blank */]>;
 
 //   VBIT     : Vector Bitwise Insert if True
 //              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
 def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
-                     (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VBINiD,
-                     "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      [/* For disassembly only; pattern left blank */]>;
 def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
-                     (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VBINiQ,
-                     "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      [/* For disassembly only; pattern left blank */]>;
 
 // VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
@@ -2957,8 +3803,8 @@ def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
 def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
                         "vpadd", "i32",
                         v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 
-                        IIC_VBIND, "vpadd", "f32",
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+                        IIC_VPBIND, "vpadd", "f32",
                         v2f32, v2f32, int_arm_neon_vpadd, 0>;
 
 //   VPADDL   : Vector Pairwise Add Long
@@ -2986,7 +3832,7 @@ def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
 def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
                         "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
 
 //   VPMIN    : Vector Pairwise Minimum
@@ -3002,16 +3848,16 @@ def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
 def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 
 //   VRECPE   : Vector Reciprocal Estimate
-def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
+def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
                         IIC_VUNAD, "vrecpe", "u32",
                         v2i32, v2i32, int_arm_neon_vrecpe>;
-def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
+def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
                         IIC_VUNAQ, "vrecpe", "u32",
                         v4i32, v4i32, int_arm_neon_vrecpe>;
 def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
@@ -3039,7 +3885,7 @@ def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
 def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
                          IIC_VUNAD, "vrsqrte", "f32",
                          v2f32, v2f32, int_arm_neon_vrsqrte>;
-def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 
+def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
                          IIC_VUNAQ, "vrsqrte", "f32",
                          v4f32, v4f32, int_arm_neon_vrsqrte>;
 
@@ -3054,12 +3900,12 @@ def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
 // Vector Shifts.
 
 //   VSHL     : Vector Shift
-defm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
+defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
                             IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
-                            "vshl", "s", int_arm_neon_vshifts, 0>;
-defm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
+                            "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
                             IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
-                            "vshl", "u", int_arm_neon_vshiftu, 0>;
+                            "vshl", "u", int_arm_neon_vshiftu>;
 //   VSHL     : Vector Shift Left (Immediate)
 defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
                            N2RegVShLFrm>;
@@ -3093,12 +3939,12 @@ defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
                            NEONvshrn>;
 
 //   VRSHL    : Vector Rounding Shift
-defm VRSHLs   : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
+defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
-                            "vrshl", "s", int_arm_neon_vrshifts, 0>;
-defm VRSHLu   : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
+                            "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
-                            "vrshl", "u", int_arm_neon_vrshiftu, 0>;
+                            "vrshl", "u", int_arm_neon_vrshiftu>;
 //   VRSHR    : Vector Rounding Shift Right
 defm VRSHRs   : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
                            N2RegVShRFrm>;
@@ -3110,12 +3956,12 @@ defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
                            NEONvrshrn>;
 
 //   VQSHL    : Vector Saturating Shift
-defm VQSHLs   : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
+defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
-                            "vqshl", "s", int_arm_neon_vqshifts, 0>;
-defm VQSHLu   : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
+                            "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
-                            "vqshl", "u", int_arm_neon_vqshiftu, 0>;
+                            "vqshl", "u", int_arm_neon_vqshiftu>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
 defm VQSHLsi  : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
                            N2RegVShLFrm>;
@@ -3136,12 +3982,12 @@ defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
                            NEONvqshrnsu>;
 
 //   VQRSHL   : Vector Saturating Rounding Shift
-defm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
+defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
-                            "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
+                            "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
-                            "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
+                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
 
 //   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
@@ -3168,7 +4014,7 @@ defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
 // Vector Absolute and Saturating Absolute.
 
 //   VABS     : Vector Absolute Value
-defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 
+defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
                            IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
                            int_arm_neon_vabs>;
 def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
@@ -3179,7 +4025,7 @@ def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
                         v4f32, v4f32, int_arm_neon_vabs>;
 
 //   VQABS    : Vector Saturating Absolute Value
-defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 
+defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
                            IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
                            int_arm_neon_vqabs>;
 
@@ -3191,13 +4037,13 @@ def vnegq  : PatFrag<(ops node:$in),
                      (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
 
 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
-        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
+  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
-        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
+  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
 
 //   VNEG     : Vector Negate (integer)
 def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
@@ -3209,13 +4055,13 @@ def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
 
 //   VNEG     : Vector Negate (floating-point)
 def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
-                    (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
-                    "vneg", "f32", "$dst, $src", "",
-                    [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
+                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+                    "vneg", "f32", "$Vd, $Vm", "",
+                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
 def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
-                    (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
-                    "vneg", "f32", "$dst, $src", "",
-                    [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
+                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+                    "vneg", "f32", "$Vd, $Vm", "",
+                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
 
 def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
 def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -3225,22 +4071,22 @@ def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
 
 //   VQNEG    : Vector Saturating Negate
-defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 
+defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
                            IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
                            int_arm_neon_vqneg>;
 
 // Vector Bit Counting Operations.
 
 //   VCLS     : Vector Count Leading Sign Bits
-defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 
+defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
                            IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
                            int_arm_neon_vcls>;
 //   VCLZ     : Vector Count Leading Zeros
-defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 
+defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
                            IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
                            int_arm_neon_vclz>;
 //   VCNT     : Vector Count One Bits
-def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 
+def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
                         IIC_VCNTiD, "vcnt", "8",
                         v8i8, v8i8, int_arm_neon_vcnt>;
 def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
@@ -3249,98 +4095,126 @@ def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
 
 // Vector Swap -- for disassembly only.
 def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
-                     (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                     "vswp", "$dst, $src", "", []>;
+                     (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                     "vswp", "$Vd, $Vm", "", []>;
 def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
-                     (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                     "vswp", "$dst, $src", "", []>;
+                     (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                     "vswp", "$Vd, $Vm", "", []>;
 
 // Vector Move Operations.
 
 //   VMOV     : Vector Move (Register)
 
 let neverHasSideEffects = 1 in {
-def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
-                     N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
-def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
-                     N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm),
+                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+  let Vn{4-0} = Vm{4-0};
+}
+def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+  let Vn{4-0} = Vm{4-0};
+}
 
 // Pseudo vector move instructions for QQ and QQQQ registers. This should
 // be expanded after register allocation is completed.
 def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
-                NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+                NoItinerary, []>;
 
 def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
-                NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+                NoItinerary, []>;
 } // neverHasSideEffects
 
 //   VMOV     : Vector Move (Immediate)
 
 let isReMaterializable = 1 in {
-def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i8", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
+                         "vmov", "i8", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i8", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+                         "vmov", "i8", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
 
-def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i16", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+                         "vmov", "i16", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i16", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
+                         "vmov", "i16", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
 
-def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i32", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
+                         "vmov", "i32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i32", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
+                         "vmov", "i32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
 
-def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i64", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
+                         "vmov", "i64", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
-                         "vmov", "i64", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+                         "vmov", "i64", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
 } // isReMaterializable
 
 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
 
 def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
-                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
-                          [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
-                                           imm:$lane))]>;
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
-                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
-                          [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
-                                           imm:$lane))]>;
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
 def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
-                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
-                          [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
-                                           imm:$lane))]>;
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
-                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
-                          [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
-                                           imm:$lane))]>;
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
-                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
-                          [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
-                                           imm:$lane))]>;
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{0};
+}
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
           (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -3376,37 +4250,45 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
 
 //   VMOV     : Vector Set Lane (move ARM core register to scalar)
 
-let Constraints = "$src1 = $dst" in {
-def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
-                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
-                          [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
-                                           GPR:$src2, imm:$lane))]>;
-def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
-                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
-                          [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
-                                           GPR:$src2, imm:$lane))]>;
-def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
-                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
-                          [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
-                                           GPR:$src2, imm:$lane))]>;
+let Constraints = "$src1 = $V" in {
+def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21} = lane{0};
+}
 }
 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
-          (v16i8 (INSERT_SUBREG QPR:$src1, 
+          (v16i8 (INSERT_SUBREG QPR:$src1,
                   (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
                                    (DSubReg_i8_reg imm:$lane))),
                             GPR:$src2, (SubReg_i8_lane imm:$lane))),
                   (DSubReg_i8_reg imm:$lane)))>;
 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
-          (v8i16 (INSERT_SUBREG QPR:$src1, 
+          (v8i16 (INSERT_SUBREG QPR:$src1,
                   (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
                                      (DSubReg_i16_reg imm:$lane))),
                              GPR:$src2, (SubReg_i16_lane imm:$lane))),
                   (DSubReg_i16_reg imm:$lane)))>;
 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
-          (v4i32 (INSERT_SUBREG QPR:$src1, 
+          (v4i32 (INSERT_SUBREG QPR:$src1,
                   (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
                                      (DSubReg_i32_reg imm:$lane))),
                              GPR:$src2, (SubReg_i32_lane imm:$lane))),
@@ -3454,13 +4336,13 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
 //   VDUP     : Vector Duplicate (from ARM core register to all elements)
 
 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
-  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
-          IIC_VMOVIS, "vdup", Dt, "$dst, $src",
-          [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
+  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+          IIC_VMOVIS, "vdup", Dt, "$V, $R",
+          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
-  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
-          IIC_VMOVIS, "vdup", Dt, "$dst, $src",
-          [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
+  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+          IIC_VMOVIS, "vdup", Dt, "$V, $R",
+          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
 
 def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
 def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
@@ -3469,40 +4351,56 @@ def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
 def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
 def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
 
-def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
-                      IIC_VMOVIS, "vdup", "32", "$dst, $src",
-                      [(set DPR:$dst, (v2f32 (NEONvdup
-                                              (f32 (bitconvert GPR:$src)))))]>;
-def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
-                      IIC_VMOVIS, "vdup", "32", "$dst, $src",
-                      [(set QPR:$dst, (v4f32 (NEONvdup
-                                              (f32 (bitconvert GPR:$src)))))]>;
+def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R),
+                      IIC_VMOVIS, "vdup", "32", "$V, $R",
+                      [(set DPR:$V, (v2f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$R)))))]>;
+def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R),
+                      IIC_VMOVIS, "vdup", "32", "$V, $R",
+                      [(set QPR:$V, (v4f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$R)))))]>;
 
 //   VDUP     : Vector Duplicate Lane (from scalar to all elements)
 
 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
               ValueType Ty>
-  : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-              IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
-              [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
+  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
 
 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy>
-  : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-              IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
-              [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
+  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
                                       imm:$lane)))]>;
 
 // Inst{19-16} is partially specified depending on the element size.
 
-def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
-def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
-def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
-def VDUPLNfd  : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
-def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
-def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
-def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
-def VDUPLNfq  : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
+def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+  let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+  let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLNfd  : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+  let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+  let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLNfq  : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
+  let Inst{19} = lane{0};
+}
 
 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
           (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -3521,18 +4419,13 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
-def  VDUPfdf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
-                    (outs DPR:$dst), (ins SPR:$src),
-                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+def  VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
                     [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
-
-def  VDUPfqf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
-                    (outs QPR:$dst), (ins SPR:$src),
-                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+def  VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
                     [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
 
 //   VMOVN    : Vector Narrowing Move
-defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
                          "vmovn", "i", trunc>;
 //   VQMOVN   : Vector Saturating Narrowing Move
 defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
@@ -3585,20 +4478,30 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
 
+//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
+def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+                        IIC_VUNAQ, "vcvt", "f16.f32",
+                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+                Requires<[HasNEON, HasFP16]>;
+def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+                        IIC_VUNAQ, "vcvt", "f32.f16",
+                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+                Requires<[HasNEON, HasFP16]>;
+
 // Vector Reverse.
 
 //   VREV64   : Vector Reverse elements within 64-bit doublewords
 
 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VMOVD, 
-        OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VMOVD, 
-        OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
 
 def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
@@ -3613,15 +4516,15 @@ def VREV64qf  : VREV64Q<0b10, "vrev64", "32", v4f32>;
 //   VREV32   : Vector Reverse elements within 32-bit words
 
 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VMOVD, 
-        OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VMOVD, 
-        OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
 
 def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
@@ -3632,46 +4535,91 @@ def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
 //   VREV16   : Vector Reverse elements within 16-bit halfwords
 
 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
-        (ins DPR:$src), IIC_VMOVD, 
-        OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
-  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VMOVD, 
-        OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
 
 def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
 def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
 
 // Other Vector Shuffles.
 
+//  Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
 //   VEXT     : Vector Extract
 
 class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
-  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
-        (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm,
-        IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
-        [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
-                                      (Ty DPR:$rhs), imm:$index)))]>;
+  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+                                      (Ty DPR:$Vm), imm:$index)))]> {
+  bits<4> index;
+  let Inst{11-8} = index{3-0};
+}
 
 class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
-  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
-        (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm,
-        IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
-        [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
-                                      (Ty QPR:$rhs), imm:$index)))]>;
-
-def VEXTd8  : VEXTd<"vext", "8",  v8i8>;
-def VEXTd16 : VEXTd<"vext", "16", v4i16>;
-def VEXTd32 : VEXTd<"vext", "32", v2i32>;
-def VEXTdf  : VEXTd<"vext", "32", v2f32>;
-
-def VEXTq8  : VEXTq<"vext", "8",  v16i8>;
-def VEXTq16 : VEXTq<"vext", "16", v8i16>;
-def VEXTq32 : VEXTq<"vext", "32", v4i32>;
-def VEXTqf  : VEXTq<"vext", "32", v4f32>;
+  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+                                      (Ty QPR:$Vm), imm:$index)))]> {
+  bits<4> index;
+  let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
+  let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+  let Inst{11-9} = index{2-0};
+  let Inst{8}    = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}    = 0b00;
+}
+def VEXTdf  : VEXTd<"vext", "32", v2f32> {
+  let Inst{11}    = index{0};
+  let Inst{10-8}  = 0b000;
+}
+
+def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
+  let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+  let Inst{11-9} = index{2-0};
+  let Inst{8}    = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}    = 0b00;
+}
+def VEXTqf  : VEXTq<"vext", "32", v4f32> {
+  let Inst{11}    = index{0};
+  let Inst{10-8}  = 0b000;
+}
 
 //   VTRN     : Vector Transpose
 
@@ -3707,160 +4655,120 @@ def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
 
 //   VTBL     : Vector Table Lookup
 def  VTBL1
-  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
-        (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1,
-        "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
-        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
+  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+        "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBL2
-  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
-        (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
-        "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
+  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
 def  VTBL3
-  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
-        (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
-        "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
+  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
 def  VTBL4
-  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
-        (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
+  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
         NVTBLFrm, IIC_VTB4,
-        "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
 } // hasExtraSrcRegAllocReq = 1
 
+def  VTBL2Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def  VTBL3Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def  VTBL4Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
 //   VTBX     : Vector Table Extension
 def  VTBX1
-  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
-        (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1,
-        "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
-        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
-                               DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
+  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+        "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+                               DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBX2
-  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
-        (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
-        "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
+  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
 def  VTBX3
-  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
-        (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
+  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
         NVTBLFrm, IIC_VTBX3,
-        "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
-        "$orig = $dst", []>;
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+        "$orig = $Vd", []>;
 def  VTBX4
-  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
-        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
-        "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
-        "$orig = $dst", []>;
+  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+        "$orig = $Vd", []>;
 } // hasExtraSrcRegAllocReq = 1
 
+def  VTBX2Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+                IIC_VTBX2, "$orig = $dst", []>;
+def  VTBX3Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+                IIC_VTBX3, "$orig = $dst", []>;
+def  VTBX4Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+                IIC_VTBX4, "$orig = $dst", []>;
+
 //===----------------------------------------------------------------------===//
 // NEON instructions for single-precision FP math
 //===----------------------------------------------------------------------===//
 
-class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
-  : NEONFPPat<(ResTy (OpNode SPR:$a)),
-              (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
-                                                       SPR:$a, ssub_0))),
-                              ssub_0)>;
+class N2VSPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a)),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
 
 class N3VSPat<SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
-              (EXTRACT_SUBREG (v2f32
-                                 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                      SPR:$a, ssub_0),
-                                       (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                      SPR:$b, ssub_0))),
-                              ssub_0)>;
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
 
 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
-              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$acc, ssub_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$a, ssub_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$b, ssub_0)),
-                              ssub_0)>;
-
-// These need separate instructions because they must use DPR_VFP2 register
-// class which have SPR sub-registers.
-
-// Vector Add Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
-def : N3VSPat<fadd, VADDfd_sfp>;
-
-// Vector Sub Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
-def : N3VSPat<fsub, VSUBfd_sfp>;
-
-// Vector Multiply Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
-def : N3VSPat<fmul, VMULfd_sfp>;
-
-// Vector Multiply-Accumulate/Subtract used for single-precision FP
-// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
-// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
-
-//let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
-//                           v2f32, fmul, fadd>;
-//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
-
-//let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
-//                           v2f32, fmul, fsub>;
-//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
-
-// Vector Absolute used for single-precision FP
-let neverHasSideEffects = 1 in
-def  VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
-                      (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
-                      "vabs", "f32", "$dst, $src", "", []>;
-def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
-
-// Vector Negate used for single-precision FP
-let neverHasSideEffects = 1 in
-def  VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
-                      (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
-                      "vneg", "f32", "$dst, $src", "", []>;
-def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
-
-// Vector Maximum used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
-                     (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
-                     "vmax", "f32", "$dst, $src1, $src2", "", []>;
-def : N3VSPat<NEONfmax, VMAXfd_sfp>;
-
-// Vector Minimum used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
-                     (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
-                     "vmin", "f32", "$dst, $src1, $src2", "", []>;
-def : N3VSPat<NEONfmin, VMINfd_sfp>;
-
-// Vector Convert between single-precision FP and integer
-let neverHasSideEffects = 1 in
-def  VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
-                         v2i32, v2f32, fp_to_sint>;
-def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
-
-let neverHasSideEffects = 1 in
-def  VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
-                         v2i32, v2f32, fp_to_uint>;
-def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
-
-let neverHasSideEffects = 1 in
-def  VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
-                         v2f32, v2i32, sint_to_fp>;
-def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
-
-let neverHasSideEffects = 1 in
-def  VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
-                         v2f32, v2i32, uint_to_fp>;
-def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$acc, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index a13ff1232749..826ef46bcdb5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,7 +16,7 @@
 //
 
 def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
-                      [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                        SDNPVariadic]>;
 
 def imm_neg_XFORM : SDNodeXForm<imm, [{
@@ -26,7 +26,6 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
 }]>;
 
-
 /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
 def imm0_7 : PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 8;
@@ -50,9 +49,9 @@ def imm8_255_neg : PatLeaf<(i32 imm), [{
   return Val >= 8 && Val < 256;
 }], imm_neg_XFORM>;
 
-// Break imm's up into two pieces: an immediate + a left shift.
-// This uses thumb_immshifted to match and thumb_immshifted_val and
-// thumb_immshifted_shamt to get the val/shift pieces.
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
 def thumb_immshifted : PatLeaf<(imm), [{
   return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
 }]>;
@@ -67,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(V, MVT::i32);
 }]>;
 
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+  let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
 // Scaled 4 immediate.
 def t_imm_s4 : Operand<i32> {
   let PrintMethod = "printThumbS4ImmOperand";
@@ -74,47 +78,114 @@ def t_imm_s4 : Operand<i32> {
 
 // Define Thumb specific addressing modes.
 
+def t_brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getThumbBRTargetOpValue";
+}
+
+def t_bcctarget : Operand<i32> {
+  let EncoderMethod = "getThumbBCCTargetOpValue";
+}
+
+def t_cbtarget : Operand<i32> {
+  let EncoderMethod = "getThumbCBTargetOpValue";
+}
+
+def t_bltarget : Operand<i32> {
+  let EncoderMethod = "getThumbBLTargetOpValue";
+}
+
+def t_blxtarget : Operand<i32> {
+  let EncoderMethod = "getThumbBLXTargetOpValue";
+}
+
+def MemModeRegThumbAsmOperand : AsmOperandClass {
+  let Name = "MemModeRegThumb";
+  let SuperClasses = [];
+}
+
+def MemModeImmThumbAsmOperand : AsmOperandClass {
+  let Name = "MemModeImmThumb";
+  let SuperClasses = [];
+}
+
 // t_addrmode_rr := reg + reg
 //
 def t_addrmode_rr : Operand<i32>,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
 
-// t_addrmode_s4 := reg + reg
-//                  reg + imm5 * 4
+// t_addrmode_rrs := reg + reg
 //
-def t_addrmode_s4 : Operand<i32>,
-                    ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> {
-  let PrintMethod = "printThumbAddrModeS4Operand";
-  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+def t_addrmode_rrs1 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs2 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs4 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S4Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
 }
 
-// t_addrmode_s2 := reg + reg
-//                  reg + imm5 * 2
+// t_addrmode_is2 := reg + imm5 * 2
 //
-def t_addrmode_s2 : Operand<i32>,
-                    ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> {
-  let PrintMethod = "printThumbAddrModeS2Operand";
-  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+def t_addrmode_is2 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S2Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
 }
 
-// t_addrmode_s1 := reg + reg
-//                  reg + imm5
+// t_addrmode_is1 := reg + imm5
 //
-def t_addrmode_s1 : Operand<i32>,
-                    ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> {
-  let PrintMethod = "printThumbAddrModeS1Operand";
-  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+def t_addrmode_is1 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S1Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
 }
 
 // t_addrmode_sp := sp + imm8 * 4
 //
 def t_addrmode_sp : Operand<i32>,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+  let EncoderMethod = "getAddrModeThumbSPOpValue";
   let PrintMethod = "printThumbAddrModeSPOperand";
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+  let EncoderMethod = "getAddrModePCOpValue";
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
 }
 
 //===----------------------------------------------------------------------===//
@@ -126,132 +197,162 @@ def t_addrmode_sp : Operand<i32>,
 // these will always be in pairs, and asserts if it finds otherwise. Better way?
 let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def tADJCALLSTACKUP :
-PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
-           "${:comment} tADJCALLSTACKUP $amt1",
-           [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
+  PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+             [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+            Requires<[IsThumb, IsThumb1Only]>;
 
 def tADJCALLSTACKDOWN :
-PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
-           "${:comment} tADJCALLSTACKDOWN $amt",
-           [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
+  PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+             [(ARMcallseq_start imm:$amt)]>,
+            Requires<[IsThumb, IsThumb1Only]>;
+}
+
+// T1Disassembly - A simple class to make encoding some disassembly patterns
+// easier and less verbose.
+class T1Disassembly<bits<2> op1, bits<8> op2>
+  : T1Encoding<0b101111> {
+  let Inst{9-8} = op1;
+  let Inst{7-0} = op2;
 }
 
 def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
                 [/* For disassembly only; pattern left blank */]>,
-           T1Encoding<0b101111> {
-  let Inst{9-8} = 0b11;
-  let Inst{7-0} = 0b00000000;
-} 
+           T1Disassembly<0b11, 0x00>; // A8.6.110
 
 def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
                   [/* For disassembly only; pattern left blank */]>,
-             T1Encoding<0b101111> {
-  let Inst{9-8} = 0b11;
-  let Inst{7-0} = 0b00010000;
-} 
+           T1Disassembly<0b11, 0x10>; // A8.6.410
 
 def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
                 [/* For disassembly only; pattern left blank */]>,
-           T1Encoding<0b101111> {
-  let Inst{9-8} = 0b11;
-  let Inst{7-0} = 0b00100000;
-} 
+           T1Disassembly<0b11, 0x20>; // A8.6.408
 
 def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
                 [/* For disassembly only; pattern left blank */]>,
-           T1Encoding<0b101111> {
-  let Inst{9-8} = 0b11;
-  let Inst{7-0} = 0b00110000;
-} 
+           T1Disassembly<0b11, 0x30>; // A8.6.409
 
 def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
                 [/* For disassembly only; pattern left blank */]>,
-           T1Encoding<0b101111> {
-  let Inst{9-8} = 0b11;
-  let Inst{7-0} = 0b01000000;
-} 
+           T1Disassembly<0b11, 0x40>; // A8.6.157
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+  // A8.6.22
+  bits<8> val;
+  let Inst{7-0} = val;
+}
 
 def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
                     [/* For disassembly only; pattern left blank */]>,
                 T1Encoding<0b101101> {
+  // A8.6.156
   let Inst{9-5} = 0b10010;
-  let Inst{3} = 1;
+  let Inst{4}   = 1;
+  let Inst{3}   = 1;            // Big-Endian
+  let Inst{2-0} = 0b000;
 }
 
 def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
                     [/* For disassembly only; pattern left blank */]>,
                 T1Encoding<0b101101> {
+  // A8.6.156
   let Inst{9-5} = 0b10010;
-  let Inst{3} = 0;
+  let Inst{4}   = 1;
+  let Inst{3}   = 0;            // Little-Endian
+  let Inst{2-0} = 0b000;
 }
 
-// The i32imm operand $val can be used by a debugger to store more information
-// about the breakpoint.
-def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+                NoItinerary, "cps$imod $iflags",
                 [/* For disassembly only; pattern left blank */]>,
-            T1Encoding<0b101111> {
-  let Inst{9-8} = 0b10;
+           T1Misc<0b0110011> {
+  // A8.6.38 & B6.1.1
+  bit imod;
+  bits<3> iflags;
+
+  let Inst{4}   = imod;
+  let Inst{3}   = 0;
+  let Inst{2-0} = iflags;
 }
 
-// Change Processor State is a system instruction -- for disassembly only.
-// The singleton $opt operand contains the following information:
-// opt{4-0} = mode ==> don't care
-// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr)
-// opt{8-6} = AIF from Inst{2-0}
-// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
-//
-// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM
-// CPS which has more options.
-def tCPS : T1I<(outs), (ins cps_opt:$opt), NoItinerary, "cps$opt",
-              [/* For disassembly only; pattern left blank */]>,
-           T1Misc<0b0110011>;
-
 // For both thumb1 and thumb2.
-let isNotDuplicable = 1 in
-def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
-                 "\n$cp:\n\tadd\t$dst, pc",
-                 [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+                  [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
               T1Special<{0,0,?,?}> {
-  let Inst{6-3} = 0b1111; // A8.6.6 Rm = pc
+  // A8.6.6
+  bits<3> dst;
+  let Inst{6-3} = 0b1111; // Rm = pc
+  let Inst{2-0} = dst;
 }
 
-// PC relative add.
+// PC relative add (ADR).
 def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
-                  "add\t$dst, pc, $rhs", []>,
-               T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+                   "add\t$dst, pc, $rhs", []>,
+               T1Encoding<{1,0,1,0,0,?}> {
+  // A6.2 & A8.6.10
+  bits<3> dst;
+  bits<8> rhs;
+  let Inst{10-8} = dst;
+  let Inst{7-0}  = rhs;
+}
 
-// ADD rd, sp, #imm8
+// ADD <Rd>, sp, #<imm8>
 // This is rematerializable, which is particularly useful for taking the
 // address of locals.
-let isReMaterializable = 1 in {
+let isReMaterializable = 1 in
 def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
-                  "add\t$dst, $sp, $rhs", []>,
-               T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+                   "add\t$dst, $sp, $rhs", []>,
+               T1Encoding<{1,0,1,0,1,?}> {
+  // A6.2 & A8.6.8
+  bits<3> dst;
+  bits<8> rhs;
+  let Inst{10-8} = dst;
+  let Inst{7-0}  = rhs;
 }
 
-// ADD sp, sp, #imm7
+// ADD sp, sp, #<imm7>
 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
                   "add\t$dst, $rhs", []>,
-              T1Misc<{0,0,0,0,0,?,?}>; // A6.2.5 & A8.6.8
+              T1Misc<{0,0,0,0,0,?,?}> {
+  // A6.2.5 & A8.6.8
+  bits<7> rhs;
+  let Inst{6-0} = rhs;
+}
 
-// SUB sp, sp, #imm7
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
 def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
                   "sub\t$dst, $rhs", []>,
-              T1Misc<{0,0,0,0,1,?,?}>; // A6.2.5 & A8.6.215
+              T1Misc<{0,0,0,0,1,?,?}> {
+  // A6.2.5 & A8.6.214
+  bits<7> rhs;
+  let Inst{6-0} = rhs;
+}
 
-// ADD rm, sp
+// ADD <Rm>, sp
 def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
                   "add\t$dst, $rhs", []>,
               T1Special<{0,0,?,?}> {
-  let Inst{6-3} = 0b1101; // A8.6.9 Encoding T1
+  // A8.6.9 Encoding T1
+  bits<4> dst;
+  let Inst{7}   = dst{3};
+  let Inst{6-3} = 0b1101;
+  let Inst{2-0} = dst{2-0};
 }
 
-// ADD sp, rm
+// ADD sp, <Rm>
 def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
                   "add\t$dst, $rhs", []>,
               T1Special<{0,0,?,?}> {
   // A8.6.9 Encoding T2
+  bits<4> dst;
   let Inst{7} = 1;
+  let Inst{6-3} = dst;
   let Inst{2-0} = 0b101;
 }
 
@@ -260,21 +361,37 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
 //
 
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>,
-                T1Special<{1,1,0,?}> { // A6.2.3 & A8.6.25
+  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr",
+                   [(ARMretflag)]>,
+                T1Special<{1,1,0,?}> {
+    // A6.2.3 & A8.6.25
     let Inst{6-3} = 0b1110; // Rm = lr
+    let Inst{2-0} = 0b000;
   }
+
   // Alternative return instruction used by vararg functions.
-  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>,
-                       T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25
+  def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
+                          IIC_Br, "bx\t$Rm",
+                          []>,
+                       T1Special<{1,1,0,?}> {
+    // A6.2.3 & A8.6.25
+    bits<4> Rm;
+    let Inst{6-3} = Rm;
+    let Inst{2-0} = 0b000;
+  }
 }
 
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-  def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst",
-                  [(brind GPR:$dst)]>,
-               T1Special<{1,0,1,?}> {
-    // <Rd> = Inst{7:2-0} = pc
+  def tBRIND : TI<(outs), (ins GPR:$Rm),
+                  IIC_Br,
+                  "mov\tpc, $Rm",
+                  [(brind GPR:$Rm)]>,
+               T1Special<{1,0,?,?}> {
+    // A8.6.97
+    bits<4> Rm;
+    let Inst{7}   = 1;          // <Rd> = Inst{7:2-0} = pc
+    let Inst{6-3} = Rm;
     let Inst{2-0} = 0b111;
   }
 }
@@ -282,28 +399,52 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 // FIXME: remove when we have a way to marking a MI with these properties.
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
-def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
-                   "pop${p}\t$dsts", []>,
-               T1Misc<{1,1,0,?,?,?,?}>;
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                   IIC_iPop_Br,
+                   "pop${p}\t$regs", []>,
+               T1Misc<{1,1,0,?,?,?,?}> {
+  // A8.6.121
+  bits<16> regs;
+  let Inst{8}   = regs{15};     // registers = P:'0000000':register_list
+  let Inst{7-0} = regs{7-0};
+}
 
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
 let isCall = 1,
+  // On non-Darwin platforms R9 is callee-saved.
   Defs = [R0,  R1,  R2,  R3,  R12, LR,
           D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
           D16, D17, D18, D19, D20, D21, D22, D23,
-          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [SP] in {
   // Also used for Thumb2
   def tBL  : TIx2<0b11110, 0b11, 1,
-                  (outs), (ins i32imm:$func, variable_ops), IIC_Br,
-                  "bl\t${func:call}",
+                  (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
+                  "bl\t$func",
                   [(ARMtcall tglobaladdr:$func)]>,
-             Requires<[IsThumb, IsNotDarwin]>;
+             Requires<[IsThumb, IsNotDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-0} = func{10-0};
+  }
 
   // ARMv5T and above, also used for Thumb2
   def tBLXi : TIx2<0b11110, 0b11, 0,
-                   (outs), (ins i32imm:$func, variable_ops), IIC_Br,
-                   "blx\t${func:call}",
+                   (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
+                   "blx\t$func",
                    [(ARMcall tglobaladdr:$func)]>,
-              Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+              Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-1} = func{10-1};
+    let Inst{0} = 0; // func{0} is assumed zero
+  }
 
   // Also used for Thumb2
   def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
@@ -313,642 +454,1002 @@ let isCall = 1,
               T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24;
 
   // ARMv4T
+  // FIXME: Should be a pseudo.
+  let isCodeGenOnly = 1 in
   def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
                   (outs), (ins tGPR:$func, variable_ops), IIC_Br,
                   "mov\tlr, pc\n\tbx\t$func",
                   [(ARMcall_nolink tGPR:$func)]>,
-            Requires<[IsThumb1Only, IsNotDarwin]>;
+            Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
 }
 
-// On Darwin R9 is call-clobbered.
 let isCall = 1,
+  // On Darwin R9 is call-clobbered.
+  // R7 is marked as a use to prevent frame-pointer assignments from being
+  // moved above / below calls.
   Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR,
           D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
           D16, D17, D18, D19, D20, D21, D22, D23,
-          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [R7, SP] in {
   // Also used for Thumb2
   def tBLr9 : TIx2<0b11110, 0b11, 1,
-                   (outs), (ins i32imm:$func, variable_ops), IIC_Br,
-                   "bl\t${func:call}",
+                   (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+                   IIC_Br, "bl${p}\t$func",
                    [(ARMtcall tglobaladdr:$func)]>,
-              Requires<[IsThumb, IsDarwin]>;
+              Requires<[IsThumb, IsDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-0} = func{10-0};
+  }
 
   // ARMv5T and above, also used for Thumb2
   def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
-                      (outs), (ins i32imm:$func, variable_ops), IIC_Br,
-                      "blx\t${func:call}",
+                      (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+                      IIC_Br, "blx${p}\t$func",
                       [(ARMcall tglobaladdr:$func)]>,
-                 Requires<[IsThumb, HasV5T, IsDarwin]>;
+                 Requires<[IsThumb, HasV5T, IsDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-1} = func{10-1};
+    let Inst{0} = 0; // func{0} is assumed zero
+  }
 
   // Also used for Thumb2
-  def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
-                    "blx\t$func",
+  def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+                    "blx${p}\t$func",
                     [(ARMtcall GPR:$func)]>,
                  Requires<[IsThumb, HasV5T, IsDarwin]>,
-                 T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24
+                 T1Special<{1,1,1,?}> {
+    // A6.2.3 & A8.6.24
+    bits<4> func;
+    let Inst{6-3} = func;
+    let Inst{2-0} = 0b000;
+  }
 
   // ARMv4T
+  let isCodeGenOnly = 1 in
+  // FIXME: Should be a pseudo.
   def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
                    (outs), (ins tGPR:$func, variable_ops), IIC_Br,
                    "mov\tlr, pc\n\tbx\t$func",
                    [(ARMcall_nolink tGPR:$func)]>,
-              Requires<[IsThumb1Only, IsDarwin]>;
+              Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
 }
 
-let isBranch = 1, isTerminator = 1 in {
-  let isBarrier = 1 in {
-    let isPredicable = 1 in
-    def tB   : T1I<(outs), (ins brtarget:$target), IIC_Br,
-                   "b\t$target", [(br bb:$target)]>,
-               T1Encoding<{1,1,1,0,0,?}>;
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  let isPredicable = 1 in
+  def tB   : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
+                 "b\t$target", [(br bb:$target)]>,
+             T1Encoding<{1,1,1,0,0,?}> {
+    bits<11> target;
+    let Inst{10-0} = target;
+  }
 
   // Far jump
+  // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+  // the clobber of LR.
   let Defs = [LR] in
-  def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
-                    "bl\t$target\t${:comment} far jump",[]>;
-
-  def tBR_JTr : T1JTI<(outs),
-                      (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
-                      IIC_Br, "mov\tpc, $target\n\t.align\t2$jt",
-                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
-                Encoding16 {
-    let Inst{15-7} = 0b010001101;
-    let Inst{2-0} = 0b111;
-  }
+  def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target),
+                          Size4Bytes, IIC_Br, []>;
+
+  def tBR_JTr : tPseudoInst<(outs),
+                      (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+                      SizeSpecial, IIC_Br,
+                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+    list<Predicate> Predicates = [IsThumb, IsThumb1Only];
   }
 }
 
 // FIXME: should be able to write a pattern for ARMBrcond, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
-  def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br,
-                 "b$cc\t$target",
+  def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+                 "b${p}\t$target",
                  [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
-             T1Encoding<{1,1,0,1,?,?}>;
+             T1Encoding<{1,1,0,1,?,?}> {
+  bits<4> p;
+  bits<8> target;
+  let Inst{11-8} = p;
+  let Inst{7-0} = target;
+}
 
 // Compare and branch on zero / non-zero
 let isBranch = 1, isTerminator = 1 in {
-  def tCBZ  : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br,
-                  "cbz\t$cmp, $target", []>,
-              T1Misc<{0,0,?,1,?,?,?}>;
+  def tCBZ  : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+                  "cbz\t$Rn, $target", []>,
+              T1Misc<{0,0,?,1,?,?,?}> {
+    // A8.6.27
+    bits<6> target;
+    bits<3> Rn;
+    let Inst{9}   = target{5};
+    let Inst{7-3} = target{4-0};
+    let Inst{2-0} = Rn;
+  }
 
-  def tCBNZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br,
+  def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
                   "cbnz\t$cmp, $target", []>,
-              T1Misc<{1,0,?,1,?,?,?}>;
+              T1Misc<{1,0,?,1,?,?,?}> {
+    // A8.6.27
+    bits<6> target;
+    bits<3> Rn;
+    let Inst{9}   = target{5};
+    let Inst{7-3} = target{4-0};
+    let Inst{2-0} = Rn;
+  }
 }
 
 // A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
 // A8.6.16 B: Encoding T1
 // If Inst{11-8} == 0b1111 then SEE SVC
-let isCall = 1 in {
-def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
-           Encoding16 {
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+                "svc", "\t$imm", []>, Encoding16 {
+  bits<8> imm;
   let Inst{15-12} = 0b1101;
-  let Inst{11-8} = 0b1111;
-}
+  let Inst{11-8}  = 0b1111;
+  let Inst{7-0}   = imm;
 }
 
-// A8.6.16 B: Encoding T1
-// If Inst{11-8} == 0b1110 then UNDEFINED
-// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
-// binutils
+// The assembler uses 0xDEFE for a trap instruction.
 let isBarrier = 1, isTerminator = 1 in
 def tTRAP : TI<(outs), (ins), IIC_Br, 
-               ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 {
-  let Inst{15-12} = 0b1101;
-  let Inst{11-8} = 0b1110;
+               "trap", [(trap)]>, Encoding16 {
+  let Inst = 0xdefe;
 }
 
 //===----------------------------------------------------------------------===//
 //  Load Store Instructions.
 //
 
+// Loads: reg/reg and reg/imm5
 let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
-               "ldr", "\t$dst, $addr",
-               [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
-           T1LdSt<0b100>;
-def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
-               "ldr", "\t$dst, $addr",
-               []>,
-           T1LdSt4Imm<{1,?,?}>;
-
-def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
-                "ldrb", "\t$dst, $addr",
-                [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>,
-            T1LdSt<0b110>;
-def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
-                "ldrb", "\t$dst, $addr",
-                []>,
-            T1LdSt1Imm<{1,?,?}>;
-
-def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
-                "ldrh", "\t$dst, $addr",
-                [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>,
-            T1LdSt<0b101>;
-def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
-                "ldrh", "\t$dst, $addr",
-                []>,
-            T1LdSt2Imm<{1,?,?}>;
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+                              Operand AddrMode_r, Operand AddrMode_i,
+                              AddrMode am, InstrItinClass itin_r,
+                              InstrItinClass itin_i, string asm,
+                              PatFrag opnode> {
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+  def i : // reg/imm5
+    T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+                      (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+                      am, itin_i, asm, "\t$Rt, $addr",
+                      [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+                              Operand AddrMode_r, Operand AddrMode_i,
+                              AddrMode am, InstrItinClass itin_r,
+                              InstrItinClass itin_i, string asm,
+                              PatFrag opnode> {
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+  def i : // reg/imm5
+    T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+                      (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+                      am, itin_i, asm, "\t$Rt, $addr",
+                      [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR  : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iLoad_r, IIC_iLoad_i, "ldr",
+                                UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+                                UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+                                t_addrmode_is2, AddrModeT1_2,
+                                IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+                                UnOpFrag<(zextloadi16 node:$Src)>>;
 
 let AddedComplexity = 10 in
-def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+def tLDRSB :                    // A8.6.80
+  T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 AddrModeT1_1, IIC_iLoad_bh_r,
                  "ldrsb", "\t$dst, $addr",
-                 [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>,
-             T1LdSt<0b011>;
+                 [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
 
 let AddedComplexity = 10 in
-def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+def tLDRSH :                    // A8.6.84
+  T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 AddrModeT1_2, IIC_iLoad_bh_r,
                  "ldrsh", "\t$dst, $addr",
-                 [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>,
-             T1LdSt<0b111>;
+                 [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
 
 let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
-                  "ldr", "\t$dst, $addr",
-                  [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>,
-              T1LdStSP<{1,?,?}>;
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+                    "ldr", "\t$Rt, $addr",
+                    [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+              T1LdStSP<{1,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
 
 // Special instruction for restore. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
-def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
-                    "ldr", "\t$dst, $addr", []>,
-               T1LdStSP<{1,?,?}>;
+// FIXME: Pseudo for tLDRspi
+def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+                     "ldr", "\t$dst, $addr", []>,
+               T1LdStSP<{1,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
 
 // Load tconstpool
 // FIXME: Use ldr.n to work around a Darwin assembler bug.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
-                  "ldr", ".n\t$dst, $addr",
-                  [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>,
-              T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
-
-// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
-    isReMaterializable = 1 in
-def tLDRcp  : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
-                  "ldr", "\t$dst, $addr", []>,
-              T1LdStSP<{1,?,?}>;
-
-def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
-               "str", "\t$src, $addr",
-               [(store tGPR:$src, t_addrmode_s4:$addr)]>,
-           T1LdSt<0b000>;
-def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
-               "str", "\t$src, $addr",
-               []>,
-           T1LdSt4Imm<{0,?,?}>;
-
-def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
-                 "strb", "\t$src, $addr",
-                 [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>,
-            T1LdSt<0b010>;
-def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
-                 "strb", "\t$src, $addr",
-                 []>,
-            T1LdSt1Imm<{0,?,?}>;
-
-def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
-                 "strh", "\t$src, $addr",
-                 [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>,
-            T1LdSt<0b001>;
-def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
-                 "strh", "\t$src, $addr",
-                 []>,
-            T1LdSt2Imm<{0,?,?}>;
-
-def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
-                   "str", "\t$src, $addr",
-                   [(store tGPR:$src, t_addrmode_sp:$addr)]>,
-              T1LdStSP<{0,?,?}>;
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-// Special instruction for spill. It cannot clobber condition register
-// when it's expanded by eliminateCallFramePseudoInstr().
-def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                  "ldr", ".n\t$Rt, $addr",
+                  [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+              T1Encoding<{0,1,0,0,1,?}> {
+  // A6.2 & A8.6.59
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0}  = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iStore_r, IIC_iStore_i, "str",
+                                BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+                                BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+                                t_addrmode_is2, AddrModeT1_2,
+                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+                                BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+                    "str", "\t$Rt, $addr",
+                    [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+              T1LdStSP<{0,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in
+// Special instruction for spill. It cannot clobber condition register when it's
+// expanded by eliminateCallFramePseudoInstr().
+// FIXME: Pseudo for tSTRspi
+def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
                   "str", "\t$src, $addr", []>,
-             T1LdStSP<{0,?,?}>;
+             T1LdStSP<{0,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
 }
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
 
-// These requires base address to be written back or one of the loaded regs.
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def tLDM : T1I<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
-               IIC_iLoadm,
-               "ldm${addr:submode}${p}\t$addr, $dsts", []>,
-           T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-
-def tLDM_UPD : T1It<(outs tGPR:$wb),
-                    (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
-                    IIC_iLoadm,
-                    "ldm${addr:submode}${p}\t$addr!, $dsts",
-                    "$addr.addr = $wb", []>,
-               T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq
-
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
-def tSTM_UPD : T1It<(outs tGPR:$wb),
-                    (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
-                    IIC_iStorem,
-                    "stm${addr:submode}${p}\t$addr!, $srcs",
-                    "$addr.addr = $wb", []>,
-           T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
+multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
+                           InstrItinClass itin_upd, bits<6> T1Enc,
+                           bit L_bit> {
+  def IA :
+    T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+        itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
+       T1Encoding<T1Enc> {
+    bits<3> Rn;
+    bits<8> regs;
+    let Inst{10-8} = Rn;
+    let Inst{7-0}  = regs;
+  }
+  def IA_UPD :
+    T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
+        T1Encoding<T1Enc> {
+    bits<3> Rn;
+    bits<8> regs;
+    let Inst{10-8} = Rn;
+    let Inst{7-0}  = regs;
+  }
+}
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
+                            {1,1,0,0,1,?}, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
+                            {1,1,0,0,0,?}, 0>;
+ 
+} // neverHasSideEffects
 
 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
-def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
-               "pop${p}\t$dsts", []>,
-           T1Misc<{1,1,0,?,?,?,?}>;
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+               IIC_iPop,
+               "pop${p}\t$regs", []>,
+           T1Misc<{1,1,0,?,?,?,?}> {
+  bits<16> regs;
+  let Inst{8}   = regs{15};
+  let Inst{7-0} = regs{7-0};
+}
 
 let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
-def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br,
-                "push${p}\t$srcs", []>,
-            T1Misc<{0,1,0,?,?,?,?}>;
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                IIC_iStore_m,
+                "push${p}\t$regs", []>,
+            T1Misc<{0,1,0,?,?,?,?}> {
+  bits<16> regs;
+  let Inst{8}   = regs{14};
+  let Inst{7-0} = regs{7-0};
+}
 
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
 
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+    : T1pI<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rm;
+  bits<3> Rn;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+                     string opc, string asm, list<dag> pattern>
+    : T1pI<oops, iops, itin, opc, asm, pattern>,
+      T1Misc<opA> {
+  bits<3> Rm;
+  bits<3> Rd;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rd;
+  bits<3> Rn;
+  let Inst{5-3} = Rn;
+  let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                    string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rm;
+  bits<3> Rn;
+  bits<3> Rd;
+  let Inst{8-6} = Rm;
+  let Inst{5-3} = Rn;
+  let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                       string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rd;
+  bits<3> Rm;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                    string opc, string asm, list<dag> pattern>
+    : T1sIt<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rdn;
+  bits<3> Rm;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                        string opc, string asm, list<dag> pattern>
+    : T1sIt<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rdn;
+  bits<8> imm8;
+  let Inst{10-8} = Rdn;
+  let Inst{7-0}  = imm8;
+}
+
 // Add with carry register
 let isCommutable = 1, Uses = [CPSR] in
-def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "adc", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b0101>;
+def tADC :                      // A8.6.2
+  T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+                "adc", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
 
 // Add immediate
-def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                   "add", "\t$dst, $lhs, $rhs",
-                   [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>,
-             T1General<0b01110>;
+def tADDi3 :                    // A8.6.4 T1
+  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi,
+                   "add", "\t$Rd, $Rm, $imm3",
+                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+  bits<3> imm3;
+  let Inst{8-6} = imm3;
+}
 
-def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                   "add", "\t$dst, $rhs",
-                   [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>,
-             T1General<{1,1,0,?,?}>;
+def tADDi8 :                    // A8.6.4 T2
+  T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+                    IIC_iALUi,
+                    "add", "\t$Rdn, $imm8",
+                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
 
 // Add register
 let isCommutable = 1 in
-def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                   "add", "\t$dst, $lhs, $rhs",
-                   [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>,
-             T1General<0b01100>;
+def tADDrr :                    // A8.6.6 T1
+  T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "add", "\t$Rd, $Rn, $Rm",
+                [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
 
 let neverHasSideEffects = 1 in
-def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                     "add", "\t$dst, $rhs", []>,
-               T1Special<{0,0,?,?}>;
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+                     "add", "\t$Rdn, $Rm", []>,
+               T1Special<{0,0,?,?}> {
+  // A8.6.6 T2
+  bits<4> Rdn;
+  bits<4> Rm;
+  let Inst{7}   = Rdn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rdn{2-0};
+}
 
-// And register
+// AND register
 let isCommutable = 1 in
-def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "and", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b0000>;
+def tAND :                      // A8.6.12
+  T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "and", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
 
 // ASR immediate
-def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                  "asr", "\t$dst, $lhs, $rhs",
-                  [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>,
-             T1General<{0,1,0,?,?}>;
+def tASRri :                    // A8.6.14
+  T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "asr", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
 
 // ASR register
-def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                   "asr", "\t$dst, $rhs",
-                   [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>,
-             T1DataProcessing<0b0100>;
+def tASRrr :                    // A8.6.15
+  T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "asr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
 
 // BIC register
-def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "bic", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>,
-           T1DataProcessing<0b1110>;
+def tBIC :                      // A8.6.20
+  T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "bic", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
 
 // CMN register
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
 //FIXME: Disable CMN, as CCodes are backwards from compare expectations
 //       Compare-to-zero still works out, just not the relationals
-//def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-//                "cmn", "\t$lhs, $rhs",
-//                [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>,
-//           T1DataProcessing<0b1011>;
-def tCMNz : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                 "cmn", "\t$lhs, $rhs",
-                 [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>,
-            T1DataProcessing<0b1011>;
-}
+//def tCMN :                     // A8.6.33
+//  T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+//               IIC_iCMPr,
+//               "cmn", "\t$lhs, $rhs",
+//               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz :                     // A8.6.33
+  T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+               IIC_iCMPr,
+               "cmn", "\t$Rn, $Rm",
+               [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
 
 // CMP immediate
-let Defs = [CPSR] in {
-def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
-                  "cmp", "\t$lhs, $rhs",
-                  [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>,
-             T1General<{1,0,1,?,?}>;
-def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
-                  "cmp", "\t$lhs, $rhs",
-                  [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>,
-              T1General<{1,0,1,?,?}>;
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+                  "cmp", "\t$Rn, $imm8",
+                  [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+             T1General<{1,0,1,?,?}> {
+  // A8.6.35
+  bits<3> Rn;
+  bits<8> imm8;
+  let Inst{10-8} = Rn;
+  let Inst{7-0}  = imm8;
 }
 
 // CMP register
-let Defs = [CPSR] in {
-def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                 "cmp", "\t$lhs, $rhs",
-                 [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>,
-            T1DataProcessing<0b1010>;
-def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                  "cmp", "\t$lhs, $rhs",
-                  [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>,
-             T1DataProcessing<0b1010>;
-
-def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
-                   "cmp", "\t$lhs, $rhs", []>,
-              T1Special<{0,1,?,?}>;
-def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
-                    "cmp", "\t$lhs, $rhs", []>,
-               T1Special<{0,1,?,?}>;
+def tCMPr :                     // A8.6.36 T1
+  T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+               IIC_iCMPr,
+               "cmp", "\t$Rn, $Rm",
+               [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+                   "cmp", "\t$Rn, $Rm", []>,
+              T1Special<{0,1,?,?}> {
+  // A8.6.36 T2
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{7}   = Rn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rn{2-0};
 }
+} // isCompare = 1, Defs = [CPSR]
 
 
 // XOR register
 let isCommutable = 1 in
-def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "eor", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b0001>;
+def tEOR :                      // A8.6.45
+  T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "eor", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
 
 // LSL immediate
-def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                  "lsl", "\t$dst, $lhs, $rhs",
-                  [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>,
-             T1General<{0,0,0,?,?}>;
+def tLSLri :                    // A8.6.88
+  T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "lsl", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
 
 // LSL register
-def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                   "lsl", "\t$dst, $rhs",
-                   [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>,
-             T1DataProcessing<0b0010>;
+def tLSLrr :                    // A8.6.89
+  T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "lsl", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
 
 // LSR immediate
-def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                  "lsr", "\t$dst, $lhs, $rhs",
-                  [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>,
-             T1General<{0,0,1,?,?}>;
+def tLSRri :                    // A8.6.90
+  T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "lsr", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
 
 // LSR register
-def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                   "lsr", "\t$dst, $rhs",
-                   [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>,
-             T1DataProcessing<0b0011>;
-
-// move register
-def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
-                  "mov", "\t$dst, $src",
-                  [(set tGPR:$dst, imm0_255:$src)]>,
-             T1General<{1,0,0,?,?}>;
+def tLSRrr :                    // A8.6.91
+  T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "lsr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
+                  "mov", "\t$Rd, $imm8",
+                  [(set tGPR:$Rd, imm0_255:$imm8)]>,
+             T1General<{1,0,0,?,?}> {
+  // A8.6.96
+  bits<3> Rd;
+  bits<8> imm8;
+  let Inst{10-8} = Rd;
+  let Inst{7-0}  = imm8;
+}
 
 // TODO: A7-73: MOV(2) - mov setting flag.
 
-
 let neverHasSideEffects = 1 in {
 // FIXME: Make this predicable.
-def tMOVr       : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                      "mov\t$dst, $src", []>,
-                  T1Special<0b1000>;
+def tMOVr       : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                      "mov\t$Rd, $Rm", []>,
+                  T1Special<0b1000> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  // Bits {7-6} are encoded by the T1Special value.
+  let Inst{5-3} = Rm{2-0};
+  let Inst{2-0} = Rd{2-0};
+}
 let Defs = [CPSR] in
-def tMOVSr      : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                       "movs\t$dst, $src", []>, Encoding16 {
+def tMOVSr      : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                      "movs\t$Rd, $Rm", []>, Encoding16 {
+  // A8.6.97
+  bits<3> Rd;
+  bits<3> Rm;
   let Inst{15-6} = 0b0000000000;
+  let Inst{5-3}  = Rm;
+  let Inst{2-0}  = Rd;
 }
 
 // FIXME: Make these predicable.
-def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr,
-                       "mov\t$dst, $src", []>,
-                   T1Special<{1,0,0,?}>;
-def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                       "mov\t$dst, $src", []>,
-                   T1Special<{1,0,?,0}>;
-def tMOVgpr2gpr  : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
-                       "mov\t$dst, $src", []>,
-                   T1Special<{1,0,?,?}>;
+def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                       "mov\t$Rd, $Rm", []>,
+                   T1Special<{1,0,0,?}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  // Bit {7} is encoded by the T1Special value.
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rd{2-0};
+}
+def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                       "mov\t$Rd, $Rm", []>,
+                   T1Special<{1,0,?,0}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  // Bit {6} is encoded by the T1Special value.
+  let Inst{7}   = Rd{3};
+  let Inst{5-3} = Rm{2-0};
+  let Inst{2-0} = Rd{2-0};
+}
+def tMOVgpr2gpr  : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                       "mov\t$Rd, $Rm", []>,
+                   T1Special<{1,0,?,?}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{7}   = Rd{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rd{2-0};
+}
 } // neverHasSideEffects
 
-// multiply register
+// Multiply register
 let isCommutable = 1 in
-def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
-                 "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */
-                 [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b1101>;
-
-// move inverse register
-def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
-                "mvn", "\t$dst, $src",
-                [(set tGPR:$dst, (not tGPR:$src))]>,
-           T1DataProcessing<0b1111>;
-
-// bitwise or register
+def tMUL :                      // A8.6.105 T1
+  T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMUL32,
+                "mul", "\t$Rdn, $Rm, $Rdn",
+                [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move inverse register
+def tMVN :                      // A8.6.107
+  T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+               "mvn", "\t$Rd, $Rn",
+               [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
 let isCommutable = 1 in
-def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),  IIC_iALUr,
-                 "orr", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b1100>;
-
-// swaps
-def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                "rev", "\t$dst, $src",
-                [(set tGPR:$dst, (bswap tGPR:$src))]>,
-                Requires<[IsThumb1Only, HasV6]>,
-           T1Misc<{1,0,1,0,0,0,?}>;
-
-def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "rev16", "\t$dst, $src",
-             [(set tGPR:$dst,
-                   (or (and (srl tGPR:$src, (i32 8)), 0xFF),
-                       (or (and (shl tGPR:$src, (i32 8)), 0xFF00),
-                           (or (and (srl tGPR:$src, (i32 8)), 0xFF0000),
-                               (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>,
-                Requires<[IsThumb1Only, HasV6]>,
-             T1Misc<{1,0,1,0,0,1,?}>;
-
-def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "revsh", "\t$dst, $src",
-                  [(set tGPR:$dst,
-                        (sext_inreg
-                          (or (srl (and tGPR:$src, 0xFF00), (i32 8)),
-                              (shl tGPR:$src, (i32 8))), i16))]>,
-                  Requires<[IsThumb1Only, HasV6]>,
-             T1Misc<{1,0,1,0,1,1,?}>;
-
-// rotate right register
-def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
-                 "ror", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b0111>;
-
-// negate register
-def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi,
-                "rsb", "\t$dst, $src, #0",
-                [(set tGPR:$dst, (ineg tGPR:$src))]>,
-           T1DataProcessing<0b1001>;
+def tORR :                      // A8.6.114
+  T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "orr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV :                      // A8.6.134
+  T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "rev", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 :                    // A8.6.135
+  T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "rev16", "\t$Rd, $Rm",
+             [(set tGPR:$Rd,
+                   (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
+                       (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
+                           (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
+                               (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+                Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH :                    // A8.6.136
+  T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "revsh", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd,
+                       (sext_inreg
+                         (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+                             (shl tGPR:$Rm, (i32 8))), i16))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR :                      // A8.6.139
+  T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "ror", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB :                      // A8.6.141
+  T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+               IIC_iALUi,
+               "rsb", "\t$Rd, $Rn, #0",
+               [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
 
 // Subtract with carry register
 let Uses = [CPSR] in
-def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                 "sbc", "\t$dst, $rhs",
-                 [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>,
-           T1DataProcessing<0b0110>;
+def tSBC :                      // A8.6.151
+  T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "sbc", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
 
 // Subtract immediate
-def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                  "sub", "\t$dst, $lhs, $rhs",
-                  [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>,
-             T1General<0b01111>;
-
-def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
-                   "sub", "\t$dst, $rhs",
-                   [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>,
-             T1General<{1,1,1,?,?}>;
-
-// subtract register
-def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
-                  "sub", "\t$dst, $lhs, $rhs",
-                  [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>,
-             T1General<0b01101>;
+def tSUBi3 :                    // A8.6.210 T1
+  T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+                   IIC_iALUi,
+                   "sub", "\t$Rd, $Rm, $imm3",
+                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+  bits<3> imm3;
+  let Inst{8-6} = imm3;
+}
 
-// TODO: A7-96: STMIA - store multiple.
+def tSUBi8 :                    // A8.6.210 T2
+  T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+                    IIC_iALUi,
+                    "sub", "\t$Rdn, $imm8",
+                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr :                    // A8.6.212
+  T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "sub", "\t$Rd, $Rn, $Rm",
+                [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
 
-// sign-extend byte
-def tSXTB  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "sxtb", "\t$dst, $src",
-                  [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
-                  Requires<[IsThumb1Only, HasV6]>,
-             T1Misc<{0,0,1,0,0,1,?}>;
-
-// sign-extend short
-def tSXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "sxth", "\t$dst, $src",
-                  [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
-                  Requires<[IsThumb1Only, HasV6]>,
-             T1Misc<{0,0,1,0,0,0,?}>;
-
-// test
-let isCommutable = 1, Defs = [CPSR] in
-def tTST  : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-                 "tst", "\t$lhs, $rhs",
-                 [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>,
-            T1DataProcessing<0b1000>;
-
-// zero-extend byte
-def tUXTB  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "uxtb", "\t$dst, $src",
-                  [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
-                  Requires<[IsThumb1Only, HasV6]>,
-             T1Misc<{0,0,1,0,1,1,?}>;
-
-// zero-extend short
-def tUXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
-                  "uxth", "\t$dst, $src",
-                  [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
-                  Requires<[IsThumb1Only, HasV6]>,
-             T1Misc<{0,0,1,0,1,0,?}>;
+// TODO: A7-96: STMIA - store multiple.
 
+// Sign-extend byte
+def tSXTB :                     // A8.6.222
+  T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "sxtb", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH :                     // A8.6.224
+  T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "sxth", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST :                      // A8.6.230
+  T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+               "tst", "\t$Rn, $Rm",
+               [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB :                     // A8.6.262
+  T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "uxtb", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH :                     // A8.6.264
+  T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "uxth", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
 // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
 // Expanded after instruction selection into a branch sequence.
 let usesCustomInserter = 1 in  // Expanded after instruction selection.
   def tMOVCCr_pseudo :
   PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
-              NoItinerary, "${:comment} tMOVCCr $cc",
+              NoItinerary,
              [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
 
 
 // 16-bit movcc in IT blocks for Thumb2.
 let neverHasSideEffects = 1 in {
-def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
-                    "mov", "\t$dst, $rhs", []>,
-              T1Special<{1,0,?,?}>;
+def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr,
+                    "mov", "\t$Rdn, $Rm", []>,
+              T1Special<{1,0,?,?}> {
+  bits<4> Rdn;
+  bits<4> Rm;
+  let Inst{7}   = Rdn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rdn{2-0};
+}
+
+let isMoveImm = 1 in
+def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi,
+                    "mov", "\t$Rdn, $Rm", []>,
+              T1General<{1,0,0,?,?}> {
+  bits<3> Rdn;
+  bits<8> Rm;
+  let Inst{10-8} = Rdn;
+  let Inst{7-0}  = Rm;
+}
 
-def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
-                    "mov", "\t$dst, $rhs", []>,
-              T1General<{1,0,0,?,?}>;
 } // neverHasSideEffects
 
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
-let neverHasSideEffects = 1 in {
-let isReMaterializable = 1 in
-def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
-                    "adr$p\t$dst, #$label", []>,
-                T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
 
-} // neverHasSideEffects
-def tLEApcrelJT : T1I<(outs tGPR:$dst),
-                      (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                      IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
-                  T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+               IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+               T1Encoding<{1,0,1,0,0,?}> {
+  bits<3> Rd;
+  bits<8> addr;
+  let Inst{10-8} = Rd;
+  let Inst{7-0} = addr;
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel   : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+                              Size2Bytes, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+                              (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                              Size2Bytes, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class tMovRCopro<string opc, bit direction>
+  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                       GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class tMovRRCopro<string opc, bit direction>
+  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1100;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.  For disassembly only.
+//
+def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
 
 // __aeabi_read_tp preserves the registers r1-r3.
-let isCall = 1,
-  Defs = [R0, LR] in {
-  def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
-                     "bl\t__aeabi_read_tp",
-                     [(set R0, ARMthread_pointer)]>;
+let isCall = 1, Defs = [R0, LR], Uses = [SP] in
+def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
+                   "bl\t__aeabi_read_tp",
+                   [(set R0, ARMthread_pointer)]> {
+  // Encoding is 0xf7fffffe.
+  let Inst = 0xf7fffffe;
 }
 
+//===----------------------------------------------------------------------===//
 // SJLJ Exception handling intrinsics
-//   eh_sjlj_setjmp() is an instruction sequence to store the return
-//   address and save #0 in R0 for the non-longjmp case.
-//   Since by its nature we may be coming from some other function to get
-//   here, and we're using the stack frame for the containing function to
-//   save/restore registers, we can't keep anything live in regs across
-//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
-//   when we get here from a longjmp(). We force everthing out of registers
-//   except for our own input by listing the relevant registers in Defs. By
-//   doing so, we also cause the prologue/epilogue code to actively preserve
-//   all of the callee-saved resgisters, which is exactly what we want.
-//   $val is a scratch register for our use.
-let Defs =
-  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12 ], hasSideEffects = 1,
-   isBarrier = 1  in {
-  def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
-                              AddrModeNone, SizeSpecial, NoItinerary,
-                              "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
-                              "adds\t$val, #7\n\t"
-                              "str\t$val, [$src, #4]\n\t"
-                              "movs\tr0, #0\n\t"
-                              "b\t1f\n\t"
-                              "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
-                              "1:", "",
-                   [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
-}
+// 
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case.  Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everthing out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12 ],
+    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+                                  AddrModeNone, SizeSpecial, NoItinerary, "","",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 
 // FIXME: Non-Darwin version(s)
-let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
-    Defs = [ R7, LR, SP ] in {
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+    Defs = [ R7, LR, SP ] in
 def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
-                             AddrModeNone, SizeSpecial, IndexModeNone,
-                             Pseudo, NoItinerary,
-                             "ldr\t$scratch, [$src, #8]\n\t"
-                             "mov\tsp, $scratch\n\t"
-                             "ldr\t$scratch, [$src, #4]\n\t"
-                             "ldr\tr7, [$src]\n\t"
-                             "bx\t$scratch", "",
-                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
-                                Requires<[IsThumb, IsDarwin]>;
-}
+                              AddrModeNone, SizeSpecial, IndexModeNone,
+                              Pseudo, NoItinerary, "", "",
+                              [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                             Requires<[IsThumb, IsDarwin]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
 
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+            (tCMPi8  tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+            (tCMPr   tGPR:$Rn, tGPR:$Rm)>;
+
 // Add with carry
 def : T1Pat<(addc   tGPR:$lhs, imm0_7:$rhs),
             (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
@@ -991,27 +1492,42 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
       Requires<[IsThumb, HasV5T, IsDarwin]>;
 
 // zextload i1 -> zextload i8
-def : T1Pat<(zextloadi1 t_addrmode_s1:$addr),
-            (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+            (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+            (tLDRBi t_addrmode_is1:$addr)>;
 
 // extload -> zextload
-def : T1Pat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : T1Pat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr),  (tLDRHi t_addrmode_is2:$addr)>;
 
 // If it's impossible to use [r,r] address mode for sextload, select to
 // ldr{b|h} + sxt{b|h} instead.
-def : T1Pat<(sextloadi8 t_addrmode_s1:$addr),
-            (tSXTB (tLDRB t_addrmode_s1:$addr))>,
-      Requires<[IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi16 t_addrmode_s2:$addr),
-            (tSXTH (tLDRH t_addrmode_s2:$addr))>,
-      Requires<[IsThumb1Only, HasV6]>;
-
-def : T1Pat<(sextloadi8 t_addrmode_s1:$addr),
-            (tASRri (tLSLri (tLDRB t_addrmode_s1:$addr), 24), 24)>;
-def : T1Pat<(sextloadi16 t_addrmode_s1:$addr),
-            (tASRri (tLSLri (tLDRH t_addrmode_s1:$addr), 16), 16)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+            (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+            (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+            (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+            (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+            (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+            (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+            (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+            (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
 
 // Large immediate handling.
 
@@ -1028,8 +1544,7 @@ def : T1Pat<(i32 imm0_255_comp:$src),
 // scheduling.
 let isReMaterializable = 1 in
 def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
-                   NoItinerary,
-                   "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+                             NoItinerary,
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
-               Requires<[IsThumb1Only]>;
+               Requires<[IsThumb, IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 6ba0a44be470..0e01be59c7e8 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -21,16 +21,12 @@ def it_mask : Operand<i32> {
   let PrintMethod = "printThumbITMask";
 }
 
-// Table branch address
-def tb_addrmode : Operand<i32> {
-  let PrintMethod = "printTBAddrMode";
-}
-
 // Shifted operands. No register controlled shifts for Thumb2.
 // Note: We do not support rrx shifted operands yet.
 def t2_so_reg : Operand<i32>,    // reg imm
                 ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
                                [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getT2SORegOpValue";
   let PrintMethod = "printT2SOOperand";
   let MIOperandInfo = (ops rGPR, i32imm);
 }
@@ -47,11 +43,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 
 // t2_so_imm - Match a 32-bit immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
-// immediate splatted into multiple bytes of the word. t2_so_imm values are
-// represented in the imm field in the same 12-bit form that they are encoded
-// into t2_so_imm instructions: the 8-bit immediate is the least significant
-// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
+  let EncoderMethod = "getT2SOImmOpValue";
+}
 
 // t2_so_imm_not - Match an immediate that is a complement
 // of a t2_so_imm.
@@ -63,7 +58,7 @@ def t2_so_imm_not : Operand<i32>,
 // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
 def t2_so_imm_neg : Operand<i32>,
                     PatLeaf<(imm), [{
-  return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1;
+  return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
 }], t2_so_imm_neg_XFORM>;
 
 // Break t2_so_imm's up into two pieces.  This handles immediates with up to 16
@@ -128,27 +123,41 @@ def imm0_255_not : PatLeaf<(i32 imm), [{
 // t2addrmode_imm12  := reg + imm12
 def t2addrmode_imm12 : Operand<i32>,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
-  let PrintMethod = "printT2AddrModeImm12Operand";
+  let PrintMethod = "printAddrModeImm12Operand";
+  let EncoderMethod = "getAddrModeImm12OpValue";
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
 }
 
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+  let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
 // t2addrmode_imm8  := reg +/- imm8
 def t2addrmode_imm8 : Operand<i32>,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand";
+  let EncoderMethod = "getT2AddrModeImm8OpValue";
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
 }
 
 def t2am_imm8_offset : Operand<i32>,
-                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{
+                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+                                      [], [SDNPWantRoot]> {
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+  let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+  let ParserMatchClass = MemMode5AsmOperand;
 }
 
 // t2addrmode_imm8s4  := reg +/- (imm8 << 2)
-def t2addrmode_imm8s4 : Operand<i32>,
-                        ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> {
+def t2addrmode_imm8s4 : Operand<i32> {
   let PrintMethod = "printT2AddrModeImm8s4Operand";
+  let EncoderMethod = "getT2AddrModeImm8s4OpValue";
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
 }
 
 def t2am_imm8s4_offset : Operand<i32> {
@@ -159,7 +168,9 @@ def t2am_imm8s4_offset : Operand<i32> {
 def t2addrmode_so_reg : Operand<i32>,
                         ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
   let PrintMethod = "printT2AddrModeSoRegOperand";
+  let EncoderMethod = "getT2AddrModeSORegOpValue";
   let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
 }
 
 
@@ -167,45 +178,294 @@ def t2addrmode_so_reg : Operand<i32>,
 // Multiclass helpers...
 //
 
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{19-16}  = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<5> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6}   = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<5> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6}   = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<4> Ra;
+
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Ra;
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{31-23} = 0b111110111;
+  let Inst{22-20} = opc22_20;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = RdLo;
+  let Inst{11-8}  = RdHi;
+  let Inst{7-4}   = opc7_4;
+  let Inst{3-0}   = Rm;
+}
+
+
 /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
 /// unary operation that produces a value. These are predicable and can be
 /// changed to modify CPSR.
-multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
-                      bit Cheap = 0, bit ReMat = 0> {
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                      PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
    // shifted imm
-   def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
-                opc, "\t$dst, $src",
-                [(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
+   def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+                opc, "\t$Rd, $imm",
+                [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
      let isAsCheapAsAMove = Cheap;
      let isReMaterializable = ReMat;
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{19-16} = 0b1111; // Rn
      let Inst{15} = 0;
    }
    // register
-   def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
-                opc, ".w\t$dst, $src",
-                [(set rGPR:$dst, (opnode rGPR:$src))]> {
+   def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+                opc, ".w\t$Rd, $Rm",
+                [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{19-16} = 0b1111; // Rn
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
-                opc, ".w\t$dst, $src",
-                [(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
+   def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+                opc, ".w\t$Rd, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{19-16} = 0b1111; // Rn
    }
 }
@@ -213,94 +473,97 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
 /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
 /// binary operation that produces a value. These are predicable and can be
 /// changed to modify CPSR.
-multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
-                       bit Commutable = 0, string wide = ""> {
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode, bit Commutable = 0, string wide = ""> {
    // shifted imm
-   def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, "\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
+   def ri : T2sTwoRegImm<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+                 opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{15} = 0;
    }
    // register
-   def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
-                 opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+                 opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                 opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
    }
 }
 
 /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
 //  the ".w" prefix to indicate that they are wide.
-multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
-                         bit Commutable = 0> :
-    T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> :
+    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
 
 /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
 /// reversed.  The 'rr' form is only defined for the disassembler; for codegen
 /// it is equivalent to the T2I_bin_irs counterpart.
 multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
-                 opc, ".w\t$dst, $rhs, $lhs",
-                 [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, ".w\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{15} = 0;
    }
    // register
-   def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
-                 opc, "\t$dst, $rhs, $lhs",
+   def rr : T2sThreeReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, "\t$Rd, $Rn, $Rm",
                  [/* For disassembly only; pattern left blank */]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
-                 opc, "\t$dst, $rhs, $lhs",
-                 [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = ?; // The S bit.
    }
 }
 
 /// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
 /// instruction modifies the CPSR register.
-let Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
-                         bit Commutable = 0> {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
-                [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+   def ri : T2TwoRegImm<
+                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -308,9 +571,10 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{15} = 0;
    }
    // register
-   def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
-                !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
-                [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
+   def rr : T2ThreeReg<
+                (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -321,9 +585,10 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
-                [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+   def rs : T2TwoRegShiftedReg<
+                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -340,51 +605,58 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
    // The register-immediate version is re-materializable. This is useful
    // in particular for taking the address of a local.
    let isReMaterializable = 1 in {
-   def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, ".w\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24} = 1;
      let Inst{23-21} = op23_21;
-     let Inst{20} = 0; // The S bit.
      let Inst{15} = 0;
    }
    }
    // 12-bit imm
-   def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
-                  !strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
-                  [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
+   def ri12 : T2I<
+                  (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+                  !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+                  [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+     bits<4> Rd;
+     bits<4> Rn;
+     bits<12> imm;
      let Inst{31-27} = 0b11110;
-     let Inst{25} = 1;
-     let Inst{24} = 0;
+     let Inst{26} = imm{11};
+     let Inst{25-24} = 0b10;
      let Inst{23-21} = op23_21;
      let Inst{20} = 0; // The S bit.
+     let Inst{19-16} = Rn;
      let Inst{15} = 0;
+     let Inst{14-12} = imm{10-8};
+     let Inst{11-8} = Rd;
+     let Inst{7-0} = imm{7-0};
    }
    // register
-   def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24} = 1;
      let Inst{23-21} = op23_21;
-     let Inst{20} = 0; // The S bit.
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24} = 1;
      let Inst{23-21} = op23_21;
-     let Inst{20} = 0; // The S bit.
    }
 }
 
@@ -395,50 +667,49 @@ let Uses = [CPSR] in {
 multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                              bit Commutable = 0> {
    // shifted imm
-   def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, "\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
+   def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+                 IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
                  Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
-     let Inst{20} = 0; // The S bit.
      let Inst{15} = 0;
    }
    // register
-   def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
                  Requires<[IsThumb2]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = 0; // The S bit.
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
                  Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
-     let Inst{20} = 0; // The S bit.
    }
 }
 
 // Carry setting variants
-let Defs = [CPSR] in {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
 multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
                                bit Commutable = 0> {
    // shifted imm
-   def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                 opc, "\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
                  Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
@@ -447,9 +718,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{15} = 0;
    }
    // register
-   def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
                  Requires<[IsThumb2]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
@@ -461,9 +732,10 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
                  Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -476,12 +748,13 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
 
 /// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
 /// version is not needed since this is only for codegen.
-let Defs = [CPSR] in {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
 multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
-                !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
-                [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
+   def ri : T2TwoRegImm<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+                [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -489,9 +762,10 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
      let Inst{15} = 0;
    }
    // shifted register
-   def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
-                !strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
-                [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
+   def rs : T2TwoRegShiftedReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -504,18 +778,20 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
 //  rotate operation that produces a value.
 multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
    // 5-bit imm
-   def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> {
+   def ri : T2sTwoRegShiftImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+                 opc, ".w\t$Rd, $Rm, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-21} = 0b010010;
      let Inst{19-16} = 0b1111; // Rn
      let Inst{5-4} = opcod;
    }
    // register
-   def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr,
-                 opc, ".w\t$dst, $lhs, $rhs",
-                 [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
+   def rr : T2sThreeReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-21} = opcod;
@@ -528,11 +804,14 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
 /// patterns. Similar to T2I_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
 let isCompare = 1, Defs = [CPSR] in {
-multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
-                opc, ".w\t$lhs, $rhs",
-                [(opnode GPR:$lhs, t2_so_imm:$rhs)]> {
+   def ri : T2OneRegCmpImm<
+                (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+                opc, ".w\t$Rn, $imm",
+                [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -541,7 +820,8 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
      let Inst{11-8} = 0b1111; // Rd
    }
    // register
-   def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
+   def rr : T2TwoRegCmp<
+                (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
                 opc, ".w\t$lhs, $rhs",
                 [(opnode GPR:$lhs, rGPR:$rhs)]> {
      let Inst{31-27} = 0b11101;
@@ -554,9 +834,10 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi,
-                opc, ".w\t$lhs, $rhs",
-                [(opnode GPR:$lhs, t2_so_reg:$rhs)]> {
+   def rs : T2OneRegCmpShiftedReg<
+                (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                opc, ".w\t$Rn, $ShiftedRm",
+                [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -567,20 +848,29 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
 }
 
 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
-multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
-  def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi,
-                   opc, ".w\t$dst, $addr",
-                   [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> {
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+  def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-25} = 0b00;
     let Inst{24} = signed;
     let Inst{23} = 1;
     let Inst{22-21} = opcod;
     let Inst{20} = 1; // load
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<17> addr;
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm
   }
-  def i8  : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi,
-                   opc, "\t$dst, $addr",
-                   [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]> {
+  def i8  : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+                   opc, "\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-25} = 0b00;
     let Inst{24} = signed;
@@ -591,10 +881,18 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
     // Offset: index==TRUE, wback==FALSE
     let Inst{10} = 1; // The P bit.
     let Inst{8} = 0; // The W bit.
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{9}     = addr{8};    // U
+    let Inst{7-0}   = addr{7-0};  // imm
   }
-  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr,
-                   opc, ".w\t$dst, $addr",
-                   [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> {
+  def s   : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-25} = 0b00;
     let Inst{24} = signed;
@@ -602,10 +900,20 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
     let Inst{22-21} = opcod;
     let Inst{20} = 1; // load
     let Inst{11-6} = 0b000000;
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<10> addr;
+    let Inst{19-16} = addr{9-6}; // Rn
+    let Inst{3-0}   = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm
   }
-  def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
-                   opc, ".w\t$dst, $addr",
-                   [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> {
+
+  // FIXME: Is the pci variant actually needed?
+  def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
     let isReMaterializable = 1;
     let Inst{31-27} = 0b11111;
     let Inst{26-25} = 0b00;
@@ -614,22 +922,35 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
     let Inst{22-21} = opcod;
     let Inst{20} = 1; // load
     let Inst{19-16} = 0b1111; // Rn
+    bits<4> Rt;
+    bits<12> addr;
+    let Inst{15-12} = Rt{3-0};
+    let Inst{11-0}  = addr{11-0};
   }
 }
 
 /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
-multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
-  def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei,
-                   opc, ".w\t$src, $addr",
-                   [(opnode GPR:$src, t2addrmode_imm12:$addr)]> {
+multiclass T2I_st<bits<2> opcod, string opc,
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+  def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0001;
     let Inst{22-21} = opcod;
     let Inst{20} = 0; // !load
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<17> addr;
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm
   }
-  def i8  : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei,
-                   opc, "\t$src, $addr",
-                   [(opnode GPR:$src, t2addrmode_imm8:$addr)]> {
+  def i8  : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+                   opc, "\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0000;
     let Inst{22-21} = opcod;
@@ -638,24 +959,40 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
     // Offset: index==TRUE, wback==FALSE
     let Inst{10} = 1; // The P bit.
     let Inst{8} = 0; // The W bit.
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{9}     = addr{8};    // U
+    let Inst{7-0}   = addr{7-0};  // imm
   }
-  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer,
-                   opc, ".w\t$src, $addr",
-                   [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> {
+  def s   : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+                   opc, ".w\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0000;
     let Inst{22-21} = opcod;
     let Inst{20} = 0; // !load
     let Inst{11-6} = 0b000000;
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<10> addr;
+    let Inst{19-16}   = addr{9-6}; // Rn
+    let Inst{3-0} = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm
   }
 }
 
-/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
-  def r     : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                  opc, ".w\t$dst, $src",
-                 [(set rGPR:$dst, (opnode rGPR:$src))]> {
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, ".w\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -664,25 +1001,27 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
-                  opc, ".w\t$dst, $src, ror $rot",
-                 [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
+  def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+                  opc, ".w\t$Rd, $Rm, ror $rot",
+                 [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
      let Inst{19-16} = 0b1111; // Rn
      let Inst{15-12} = 0b1111;
      let Inst{7} = 1;
-     let Inst{5-4} = {?,?}; // rotate
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
    }
 }
 
 // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
-  def r     : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                  opc, "\t$dst, $src",
-                 [(set rGPR:$dst, (opnode rGPR:$src))]>,
-                 Requires<[HasT2ExtractPack]> {
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
+                 Requires<[HasT2ExtractPack, IsThumb2]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -691,25 +1030,27 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
-                  opc, "\t$dst, $src, ror $rot",
-                 [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
-                 Requires<[HasT2ExtractPack]> {
+  def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
+                  IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
+                 [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+                 Requires<[HasT2ExtractPack, IsThumb2]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
      let Inst{19-16} = 0b1111; // Rn
      let Inst{15-12} = 0b1111;
      let Inst{7} = 1;
-     let Inst{5-4} = {?,?}; // rotate
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
    }
 }
 
 // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
 // supported yet.
-multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
-  def r     : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                  opc, "\t$dst, $src", []> {
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm", []> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -718,25 +1059,27 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
-                  opc, "\t$dst, $src, ror $rot", []> {
+  def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm, ror $rot", []> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
      let Inst{19-16} = 0b1111; // Rn
      let Inst{15-12} = 0b1111;
      let Inst{7} = 1;
-     let Inst{5-4} = {?,?}; // rotate
+
+      bits<2> rot;
+      let Inst{5-4} = rot{1-0}; // rotate
    }
 }
 
-/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
-  def rr     : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
-                  opc, "\t$dst, $LHS, $RHS",
-                  [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
-                  Requires<[HasT2ExtractPack]> {
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+  def rr     : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+                  opc, "\t$Rd, $Rn, $Rm",
+                  [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -744,25 +1087,28 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
-                  IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
-                  [(set rGPR:$dst, (opnode rGPR:$LHS,
-                                          (rotr rGPR:$RHS, rot_imm:$rot)))]>,
-                  Requires<[HasT2ExtractPack]> {
+  def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
+                  (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+                  IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [(set rGPR:$Rd, (opnode rGPR:$Rn,
+                                          (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
      let Inst{15-12} = 0b1111;
      let Inst{7} = 1;
-     let Inst{5-4} = {?,?}; // rotate
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
    }
 }
 
 // DO variant - disassembly only, no pattern
 
-multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
-  def rr     : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
-                  opc, "\t$dst, $LHS, $RHS", []> {
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+  def rr     : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+                  opc, "\t$Rd, $Rn, $Rm", []> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -770,14 +1116,16 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
-                  IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
+  def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+                  IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
      let Inst{15-12} = 0b1111;
      let Inst{7} = 1;
-     let Inst{5-4} = {?,?}; // rotate
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
    }
 }
 
@@ -789,24 +1137,23 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
 //  Miscellaneous Instructions.
 //
 
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : T2XI<oops, iops, itin, asm, pattern> {
+  bits<4> Rd;
+  bits<12> label;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = label{11};
+  let Inst{14-12} = label{10-8};
+  let Inst{7-0}   = label{7-0};
+}
+
 // LEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
-let neverHasSideEffects = 1 in {
-let isReMaterializable = 1 in
-def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
-                      "adr${p}.w\t$dst, #$label", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25-24} = 0b10;
-  // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
-  let Inst{22} = 0;
-  let Inst{20} = 0;
-  let Inst{19-16} = 0b1111; // Rn
-  let Inst{15} = 0;
-}
-} // neverHasSideEffects
-def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
-                        (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
-                        "adr${p}.w\t$dst, #${label}_${id}", []> {
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+              (ins t2adrlabel:$addr, pred:$p),
+              IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
   let Inst{31-27} = 0b11110;
   let Inst{25-24} = 0b10;
   // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -814,76 +1161,88 @@ def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
   let Inst{20} = 0;
   let Inst{19-16} = 0b1111; // Rn
   let Inst{15} = 0;
-}
 
+  bits<4> Rd;
+  bits<13> addr;
+  let Inst{11-8} = Rd;
+  let Inst{23}    = addr{12};
+  let Inst{21}    = addr{12};
+  let Inst{26}    = addr{11};
+  let Inst{14-12} = addr{10-8};
+  let Inst{7-0}   = addr{7-0};
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+                                Size4Bytes, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+                                (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                                Size4Bytes, IIC_iALUi,
+                                []>;
+
+
+// FIXME: None of these add/sub SP special instructions should be necessary
+// at all for thumb2 since they use the same encodings as the generic
+// add/sub instructions. In thumb1 we need them since they have dedicated
+// encodings. At the least, they should be pseudo instructions.
 // ADD r, sp, {so_imm|i12}
-def t2ADDrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                        IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []> {
+let isCodeGenOnly = 1 in {
+def t2ADDrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+                        IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 0;
   let Inst{24-21} = 0b1000;
-  let Inst{20} = ?; // The S bit.
-  let Inst{19-16} = 0b1101; // Rn = sp
   let Inst{15} = 0;
 }
-def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
-                       IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> {
+def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+                       IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> {
   let Inst{31-27} = 0b11110;
-  let Inst{25} = 1;
-  let Inst{24-21} = 0b0000;
-  let Inst{20} = 0; // The S bit.
-  let Inst{19-16} = 0b1101; // Rn = sp
+  let Inst{25-20} = 0b100000;
   let Inst{15} = 0;
 }
 
 // ADD r, sp, so_reg
-def t2ADDrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
-                        IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []> {
+def t2ADDrSPs   : T2sTwoRegShiftedReg<
+                        (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+                        IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b1000;
-  let Inst{20} = ?; // The S bit.
-  let Inst{19-16} = 0b1101; // Rn = sp
   let Inst{15} = 0;
 }
 
 // SUB r, sp, {so_imm|i12}
-def t2SUBrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
-                        IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []> {
+def t2SUBrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+                        IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 0;
   let Inst{24-21} = 0b1101;
-  let Inst{20} = ?; // The S bit.
-  let Inst{19-16} = 0b1101; // Rn = sp
   let Inst{15} = 0;
 }
-def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
-                       IIC_iALUi, "subw", "\t$dst, $sp, $imm", []> {
+def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+                       IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> {
   let Inst{31-27} = 0b11110;
-  let Inst{25} = 1;
-  let Inst{24-21} = 0b0101;
-  let Inst{20} = 0; // The S bit.
-  let Inst{19-16} = 0b1101; // Rn = sp
+  let Inst{25-20} = 0b101010;
   let Inst{15} = 0;
 }
 
 // SUB r, sp, so_reg
-def t2SUBrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+def t2SUBrSPs   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
                        IIC_iALUsi,
-                       "sub", "\t$dst, $sp, $rhs", []> {
+                       "sub", "\t$Rd, $Rn, $imm", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b1101;
-  let Inst{20} = ?; // The S bit.
   let Inst{19-16} = 0b1101; // Rn = sp
   let Inst{15} = 0;
 }
+} // end isCodeGenOnly = 1
 
 // Signed and unsigned division on v7-M
-def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, 
-                 "sdiv", "\t$dst, $a, $b",
-                 [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>,
-                 Requires<[HasDivide]> {
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "sdiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-21} = 0b011100;
   let Inst{20} = 0b1;
@@ -891,10 +1250,10 @@ def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
   let Inst{7-4} = 0b1111;
 }
 
-def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, 
-                 "udiv", "\t$dst, $a, $b",
-                 [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>,
-                 Requires<[HasDivide]> {
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "udiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-21} = 0b011101;
   let Inst{20} = 0b1;
@@ -908,26 +1267,26 @@ def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
 
 // Load
 let canFoldAsLoad = 1, isReMaterializable = 1  in
-defm t2LDR   : T2I_ld<0, 0b10, "ldr",  UnOpFrag<(load node:$Src)>>;
+defm t2LDR   : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+                      UnOpFrag<(load node:$Src)>>;
 
 // Loads with zero extension
-defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
-defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8  node:$Src)>>;
+defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(zextloadi8  node:$Src)>>;
 
 // Loads with sign extension
-defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
-defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8  node:$Src)>>;
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(sextloadi8  node:$Src)>>;
 
 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
-def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
+def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
                         (ins t2addrmode_imm8s4:$addr),
-                        IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
-                        (ins i32imm:$addr), IIC_iLoadi,
-                       "ldrd", "\t$dst1, $addr", []> {
-  let Inst{19-16} = 0b1111; // Rn
-}
+                        IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
 
 // zextload i1 -> zextload i8
@@ -976,70 +1335,71 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
 //        not via pattern.
 
 // Indexed loads
+
 let mayLoad = 1, neverHasSideEffects = 1 in {
-def t2LDR_PRE  : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDR_PRE  : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldr", "\t$dst, $addr!", "$addr.base = $base_wb",
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+                            "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
                             []>;
 
-def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                            (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                          "ldr", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+                          "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
                             []>;
 
-def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb",
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
                             []>;
-def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                            (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                         "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                         "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
                             []>;
 
-def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb",
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
                             []>;
-def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                            (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                         "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                         "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
                             []>;
 
-def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb",
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
                             []>;
-def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                            (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                        "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                        "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
                             []>;
 
-def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
                             (ins t2addrmode_imm8:$addr),
-                            AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
-                            "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb",
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
                             []>;
-def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
-                            (ins GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
-                        "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                        "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
                             []>;
-} // mayLoad = 1, neverHasSideEffects = 1 
+} // mayLoad = 1, neverHasSideEffects = 1
 
 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
 // for disassembly only.
 // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
-class T2IldT<bit signed, bits<2> type, string opc>
-  : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
-          "\t$dst, $addr", []> {
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+  : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+          "\t$Rt, $addr", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-25} = 0b00;
   let Inst{24} = signed;
@@ -1048,74 +1408,83 @@ class T2IldT<bit signed, bits<2> type, string opc>
   let Inst{20} = 1; // load
   let Inst{11} = 1;
   let Inst{10-8} = 0b110; // PUW.
+
+  bits<4> Rt;
+  bits<13> addr;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = addr{12-9};
+  let Inst{7-0}   = addr{7-0};
 }
 
-def t2LDRT   : T2IldT<0, 0b10, "ldrt">;
-def t2LDRBT  : T2IldT<0, 0b00, "ldrbt">;
-def t2LDRHT  : T2IldT<0, 0b01, "ldrht">;
-def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">;
-def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">;
+def t2LDRT   : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT  : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT  : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
 
 // Store
-defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+                   BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 // Store doubleword
 let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
-                       (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
-               IIC_iStorer, "strd", "\t$src1, $addr", []>;
+                       (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+               IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
 
 // Indexed stores
 def t2STR_PRE  : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
-                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
-                         "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+                         "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
              [(set GPR:$base_wb,
-                   (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+                   (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
-                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
-                          "str", "\t$src, [$base], $offset", "$base = $base_wb",
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+                          "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
              [(set GPR:$base_wb,
-                  (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+                  (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRH_PRE  : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
-                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
-                        "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+                        "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
         [(set GPR:$base_wb,
-              (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+              (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
-                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
-                         "strh", "\t$src, [$base], $offset", "$base = $base_wb",
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+                         "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
        [(set GPR:$base_wb,
-             (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+             (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRB_PRE  : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
-                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
-                        "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+                        "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
          [(set GPR:$base_wb,
-               (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+               (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
-                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
-                            AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
-                         "strb", "\t$src, [$base], $offset", "$base = $base_wb",
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+                         "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
         [(set GPR:$base_wb,
-              (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+              (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
 
 // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
 // only.
 // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
-class T2IstT<bits<2> type, string opc>
-  : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc,
-          "\t$src, $addr", []> {
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+  : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+          "\t$Rt, $addr", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-25} = 0b00;
   let Inst{24} = 0; // not signed
@@ -1124,51 +1493,62 @@ class T2IstT<bits<2> type, string opc>
   let Inst{20} = 0; // store
   let Inst{11} = 1;
   let Inst{10-8} = 0b110; // PUW
+
+  bits<4> Rt;
+  bits<13> addr;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = addr{12-9};
+  let Inst{7-0}   = addr{7-0};
 }
 
-def t2STRT   : T2IstT<0b10, "strt">;
-def t2STRBT  : T2IstT<0b00, "strbt">;
-def t2STRHT  : T2IstT<0b01, "strht">;
+def t2STRT   : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT  : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT  : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
 
 // ldrd / strd pre / post variants
 // For disassembly only.
 
-def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs GPR:$dst1, GPR:$dst2),
-                 (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
-                 "ldrd", "\t$dst1, $dst2, [$base, $imm]!", []>;
+def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+                 "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
 
-def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$dst1, GPR:$dst2),
-                 (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
-                 "ldrd", "\t$dst1, $dst2, [$base], $imm", []>;
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+                 "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
 
 def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs),
-                 (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
-                 NoItinerary, "strd", "\t$src1, $src2, [$base, $imm]!", []>;
+                 (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
 
 def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
-                 (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
-                 NoItinerary, "strd", "\t$src1, $src2, [$base], $imm", []>;
+                 (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
 
 // T2Ipl (Preload Data/Instruction) signals the memory system of possible future
 // data/instruction access.  These are for disassembly only.
-//
-// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-multiclass T2Ipl<bit instr, bit write, string opc> {
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2),  (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
 
-  def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
-                "\t[$base, $imm]", []> {
+  def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+                "\t$addr",
+              [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
     let Inst{31-25} = 0b1111100;
     let Inst{24} = instr;
-    let Inst{23} = 1; // U = 1
     let Inst{22} = 0;
     let Inst{21} = write;
     let Inst{20} = 1;
     let Inst{15-12} = 0b1111;
+
+    bits<17> addr;
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm12
   }
 
-  def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
-                "\t[$base, $imm]", []> {
+  def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+                "\t$addr",
+               [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
     let Inst{31-25} = 0b1111100;
     let Inst{24} = instr;
     let Inst{23} = 0; // U = 0
@@ -1177,22 +1557,15 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
     let Inst{20} = 1;
     let Inst{15-12} = 0b1111;
     let Inst{11-8} = 0b1100;
-  }
 
-  def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
-                "\t[pc, $imm]", []> {
-    let Inst{31-25} = 0b1111100;
-    let Inst{24} = instr;
-    let Inst{23} = ?; // add = (U == 1)
-    let Inst{22} = 0;
-    let Inst{21} = write;
-    let Inst{20} = 1;
-    let Inst{19-16} = 0b1111; // Rn = 0b1111
-    let Inst{15-12} = 0b1111;
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{7-0}   = addr{7-0};  // imm8
   }
 
-  def r   : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc,
-                "\t[$base, $a]", []> {
+  def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+               "\t$addr",
+             [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
     let Inst{31-25} = 0b1111100;
     let Inst{24} = instr;
     let Inst{23} = 0; // add = TRUE for T1
@@ -1201,133 +1574,174 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
     let Inst{20} = 1;
     let Inst{15-12} = 0b1111;
     let Inst{11-6} = 0000000;
-    let Inst{5-4} = 0b00; // no shift is applied
-  }
 
-  def s   : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc,
-                "\t[$base, $a, lsl $shamt]", []> {
-    let Inst{31-25} = 0b1111100;
-    let Inst{24} = instr;
-    let Inst{23} = 0; // add = TRUE for T1
-    let Inst{22} = 0;
-    let Inst{21} = write;
-    let Inst{20} = 1;
-    let Inst{15-12} = 0b1111;
-    let Inst{11-6} = 0000000;
+    bits<10> addr;
+    let Inst{19-16} = addr{9-6}; // Rn
+    let Inst{3-0}   = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm2
   }
 }
 
-defm t2PLD  : T2Ipl<0, 0, "pld">;
-defm t2PLDW : T2Ipl<0, 1, "pldw">;
-defm t2PLI  : T2Ipl<1, 0, "pli">;
+defm t2PLD  : T2Ipl<0, 0, "pld">,  Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI  : T2Ipl<0, 1, "pli">,  Requires<[IsThumb2,HasV7]>;
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
-                          reglist:$dsts, variable_ops), IIC_iLoadm,
-                 "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
-  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
-  let Inst{22} = 0;
-  let Inst{21} = 0; // The W bit.
-  let Inst{20} = 1; // Load
-}
+multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+                            InstrItinClass itin_upd, bit L_bit> {
+  def IA :
+    T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+    bits<4>  Rn;
+    bits<16> regs;
 
-def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                       reglist:$dsts, variable_ops), IIC_iLoadm,
-                      "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
-                      "$addr.addr = $wb", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
-  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
-  let Inst{22} = 0;
-  let Inst{21} = 1; // The W bit.
-  let Inst{20} = 1; // Load
-}
-} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b01;     // Increment After
+    let Inst{22}    = 0;
+    let Inst{21}    = 0;        // No writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def IA_UPD :
+    T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+          itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+    bits<4>  Rn;
+    bits<16> regs;
 
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
-                          reglist:$srcs, variable_ops), IIC_iStorem,
-                 "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
-  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
-  let Inst{22} = 0;
-  let Inst{21} = 0; // The W bit.
-  let Inst{20} = 0; // Store
-}
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b01;     // Increment After
+    let Inst{22}    = 0;
+    let Inst{21}    = 1;        // Writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def DB :
+    T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+    bits<4>  Rn;
+    bits<16> regs;
 
-def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                       reglist:$srcs, variable_ops),
-                      IIC_iStorem,
-                      "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
-                      "$addr.addr = $wb", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
-  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
-  let Inst{22} = 0;
-  let Inst{21} = 1; // The W bit.
-  let Inst{20} = 0; // Store
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b10;     // Decrement Before
+    let Inst{22}    = 0;
+    let Inst{21}    = 0;        // No writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def DB_UPD :
+    T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+          itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b10;     // Decrement Before
+    let Inst{22}    = 0;
+    let Inst{21}    = 1;        // Writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
 }
-} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
 
 let neverHasSideEffects = 1 in
-def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
-                   "mov", ".w\t$dst, $src", []> {
+def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                   "mov", ".w\t$Rd, $Rm", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
-  let Inst{20} = ?; // The S bit.
   let Inst{19-16} = 0b1111; // Rn
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0000;
 }
 
 // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
-def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
-                   "mov", ".w\t$dst, $src",
-                   [(set rGPR:$dst, t2_so_imm:$src)]> {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+    AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+                   "mov", ".w\t$Rd, $imm",
+                   [(set rGPR:$Rd, t2_so_imm:$imm)]> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 0;
   let Inst{24-21} = 0b0010;
-  let Inst{20} = ?; // The S bit.
   let Inst{19-16} = 0b1111; // Rn
   let Inst{15} = 0;
 }
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
-                   "movw", "\t$dst, $src",
-                   [(set rGPR:$dst, imm0_65535:$src)]> {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+                   "movw", "\t$Rd, $imm",
+                   [(set rGPR:$Rd, imm0_65535:$imm)]> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-21} = 0b0010;
   let Inst{20} = 0; // The S bit.
   let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
 }
 
-let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi,
-                    "movt", "\t$dst, $imm",
-                    [(set rGPR:$dst,
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+                                (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+                    (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+                    "movt", "\t$Rd, $imm",
+                    [(set rGPR:$Rd,
                           (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-21} = 0b0110;
   let Inst{20} = 0; // The S bit.
   let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
 }
 
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+                     (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
 def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
 
 //===----------------------------------------------------------------------===//
@@ -1336,28 +1750,28 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
 
 // Sign extenders
 
-defm t2SXTB  : T2I_unary_rrot<0b100, "sxtb",
+defm t2SXTB  : T2I_ext_rrot<0b100, "sxtb",
                               UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm t2SXTH  : T2I_unary_rrot<0b000, "sxth",
+defm t2SXTH  : T2I_ext_rrot<0b000, "sxth",
                               UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
 
-defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
                         BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah",
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
                         BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
 
 // TODO: SXT(A){B|H}16 - done for disassembly only
 
 // Zero extenders
 
 let AddedComplexity = 16 in {
-defm t2UXTB   : T2I_unary_rrot<0b101, "uxtb",
+defm t2UXTB   : T2I_ext_rrot<0b101, "uxtb",
                                UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm t2UXTH   : T2I_unary_rrot<0b001, "uxth",
+defm t2UXTH   : T2I_ext_rrot<0b001, "uxth",
                                UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
                                UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
 
 // FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -1365,15 +1779,17 @@ defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
 //        instead so we can include a check for masking back in the upper
 //        eight bits of the source into the lower eight bits of the result.
 //def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
-//            (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
+//            (t2UXTB16r_rot rGPR:$Src, 24)>,
+//          Requires<[HasT2ExtractPack, IsThumb2]>;
 def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
-            (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
+            (t2UXTB16r_rot rGPR:$Src, 8)>,
+        Requires<[HasT2ExtractPack, IsThumb2]>;
 
-defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah",
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1387,8 +1803,10 @@ defm t2SUB  : T2I_bin_ii12rs<0b101, "sub",
 
 // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
 defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
                              BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
 defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
                              BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
@@ -1436,8 +1854,8 @@ def : T2Pat<(adde       rGPR:$src, t2_so_imm_not:$imm),
 
 // Select Bytes -- for disassembly only
 
-def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
-                "\t$dst, $a, $b", []> {
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-24} = 0b010;
   let Inst{23} = 0b1;
@@ -1450,28 +1868,41 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
 // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
 // And Miscellaneous operations -- for disassembly only
 class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
-              list<dag> pat = [/* For disassembly only; pattern left blank */]>
-  : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc,
-        "\t$dst, $a, $b", pat> {
+              list<dag> pat = [/* For disassembly only; pattern left blank */],
+              dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+              string asm = "\t$Rd, $Rn, $Rm">
+  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0101;
   let Inst{22-20} = op22_20;
   let Inst{15-12} = 0b1111;
   let Inst{7-4} = op7_4;
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
 }
 
 // Saturating add/subtract -- for disassembly only
 
 def t2QADD    : T2I_pam<0b000, 0b1000, "qadd",
-                        [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>;
+                        [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
 def t2QADD16  : T2I_pam<0b001, 0b0001, "qadd16">;
 def t2QADD8   : T2I_pam<0b000, 0b0001, "qadd8">;
 def t2QASX    : T2I_pam<0b010, 0b0001, "qasx">;
-def t2QDADD   : T2I_pam<0b000, 0b1001, "qdadd">;
-def t2QDSUB   : T2I_pam<0b000, 0b1011, "qdsub">;
+def t2QDADD   : T2I_pam<0b000, 0b1001, "qdadd", [],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB   : T2I_pam<0b000, 0b1011, "qdsub", [],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
 def t2QSAX    : T2I_pam<0b110, 0b0001, "qsax">;
 def t2QSUB    : T2I_pam<0b000, 0b1010, "qsub",
-                        [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>;
+                        [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
 def t2QSUB16  : T2I_pam<0b101, 0b0001, "qsub16">;
 def t2QSUB8   : T2I_pam<0b100, 0b0001, "qsub8">;
 def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
@@ -1511,21 +1942,61 @@ def t2UHSAX   : T2I_pam<0b110, 0b0110, "uhsax">;
 def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
 def t2UHSUB8  : T2I_pam<0b100, 0b0110, "uhsub8">;
 
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+  dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23}    = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4}   = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+  dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23}    = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4}   = op7_4;
+}
+
 // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
 
-def t2USAD8   : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
-                                           (ins rGPR:$a, rGPR:$b),
-                        NoItinerary, "usad8", "\t$dst, $a, $b", []> {
+def t2USAD8   : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+                                           (ins rGPR:$Rn, rGPR:$Rm),
+                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> {
   let Inst{15-12} = 0b1111;
 }
-def t2USADA8  : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
-                       (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8",
-                        "\t$dst, $a, $b, $acc", []>;
+def t2USADA8  : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+                       (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>;
 
 // Signed/Unsigned saturate -- for disassembly only
 
-def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
-                NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> sat_imm;
+  bits<7> sh;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{4-0}   = sat_imm{4-0};
+  let Inst{21}    = sh{6};
+  let Inst{14-12} = sh{4-2};
+  let Inst{7-6}   = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+                (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+                NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1100;
@@ -1533,8 +2004,9 @@ def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
   let Inst{15} = 0;
 }
 
-def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
-                   "ssat16", "\t$dst, $bit_pos, $a",
+def t2SSAT16: T2SatI<
+                   (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+                   "ssat16", "\t$Rd, $sat_imm, $Rn",
                    [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1100;
@@ -1545,8 +2017,9 @@ def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
   let Inst{7-6} = 0b00;    // imm2 = '00'
 }
 
-def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
-                NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+def t2USAT: T2SatI<
+                (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+                NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1110;
@@ -1554,8 +2027,9 @@ def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
   let Inst{15} = 0;
 }
 
-def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
-                   "usat16", "\t$dst, $bit_pos, $a",
+def t2USAT16: T2SatI<
+                    (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+                   "usat16", "\t$dst, $sat_imm, $Rn",
                    [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1110;
@@ -1579,23 +2053,23 @@ defm t2ASR  : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
 defm t2ROR  : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
 
 let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
-                   "rrx", "\t$dst, $src",
-                   [(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                   "rrx", "\t$Rd, $Rm",
+                   [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
-  let Inst{20} = ?; // The S bit.
   let Inst{19-16} = 0b1111; // Rn
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0011;
 }
 }
 
-let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
-                        "lsrs", ".w\t$dst, $src, #1",
-                        [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+                        (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                        "lsrs", ".w\t$Rd, $Rm, #1",
+                        [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
@@ -1606,9 +2080,10 @@ def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
   let Inst{14-12} = 0b000;
   let Inst{7-6} = 0b01;
 }
-def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
-                        "asrs", ".w\t$dst, $src, #1",
-                        [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> {
+def t2MOVsra_flag : T2TwoRegShiftImm<
+                        (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                        "asrs", ".w\t$Rd, $Rm, #1",
+                        [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
@@ -1626,39 +2101,67 @@ def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
 //
 
 defm t2AND  : T2I_bin_w_irs<0b0000, "and",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
                             BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
 defm t2ORR  : T2I_bin_w_irs<0b0010, "orr",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
                             BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
 defm t2EOR  : T2I_bin_w_irs<0b0100, "eor",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
                             BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 
 defm t2BIC  : T2I_bin_w_irs<0b0001, "bic",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
                             BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-defm t2ANDS : T2I_bin_s_irs<0b0000, "and",
-                            BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+    : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<5> msb;
+  bits<5> lsb;
+
+  let Inst{11-8}  = Rd;
+  let Inst{4-0}   = msb{4-0};
+  let Inst{14-12} = lsb{4-2};
+  let Inst{7-6}   = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+    : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
 
-let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm),
-                IIC_iUNAsi, "bfc", "\t$dst, $imm",
-                [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+  let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+                IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+                [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-20} = 0b10110;
   let Inst{19-16} = 0b1111; // Rn
   let Inst{15} = 0;
+
+  bits<10> imm;
+  let msb{4-0} = imm{9-5};
+  let lsb{4-0} = imm{4-0};
 }
 
-def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
-                 IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
+def t2SBFX: T2TwoRegBitFI<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+                 IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-20} = 0b10100;
   let Inst{15} = 0;
 }
 
-def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
-                 IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
+def t2UBFX: T2TwoRegBitFI<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+                 IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-20} = 0b11100;
@@ -1666,24 +2169,50 @@ def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
 }
 
 // A8.6.18  BFI - Bitfield insert (Encoding T1)
-let Constraints = "$src = $dst" in
-def t2BFI : T2I<(outs rGPR:$dst),
-                (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
-                IIC_iALUi, "bfi", "\t$dst, $val, $imm",
-                [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
-                                 bf_inv_mask_imm:$imm))]> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 1;
-  let Inst{24-20} = 0b10110;
-  let Inst{15} = 0;
+let Constraints = "$src = $Rd" in {
+  def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+                  (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+                  IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+                  [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+                                   bf_inv_mask_imm:$imm))]> {
+    let Inst{31-27} = 0b11110;
+    let Inst{25} = 1;
+    let Inst{24-20} = 0b10110;
+    let Inst{15} = 0;
+
+    bits<10> imm;
+    let msb{4-0} = imm{9-5};
+    let lsb{4-0} = imm{4-0};
+  }
+
+  // GNU as only supports this form of bfi (w/ 4 arguments)
+  let isAsmParserOnly = 1 in
+  def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
+                  (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
+                       width_imm:$width),
+                  IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
+                  []> {
+    let Inst{31-27} = 0b11110;
+    let Inst{25} = 1;
+    let Inst{24-20} = 0b10110;
+    let Inst{15} = 0;
+
+    bits<5> lsbit;
+    bits<5> width;
+    let msb{4-0} = width; // Custom encoder => lsb+width-1
+    let lsb{4-0} = lsbit;
+  }
 }
 
-defm t2ORN  : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or  node:$LHS,
-                          (not node:$RHS))>, 0, "">;
+defm t2ORN  : T2I_bin_irs<0b0011, "orn",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>, 0, "">;
 
 // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
 let AddedComplexity = 1 in
-defm t2MVN  : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
+defm t2MVN  : T2I_un_irs <0b0011, "mvn",
+                          IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+                          UnOpFrag<(not node:$Src)>, 1, 1>;
 
 
 let AddedComplexity = 1 in
@@ -1702,9 +2231,9 @@ def : T2Pat<(t2_so_imm_not:$src),
 //  Multiply Instructions.
 //
 let isCommutable = 1 in
-def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-                "mul", "\t$dst, $a, $b",
-                [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> {
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                "mul", "\t$Rd, $Rn, $Rm",
+                [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b000;
@@ -1712,83 +2241,63 @@ def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
   let Inst{7-4} = 0b0000; // Multiply
 }
 
-def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
-		"mla", "\t$dst, $a, $b, $c",
-		[(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> {
+def t2MLA: T2FourReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "mla", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b000;
-  let Inst{15-12} = {?, ?, ?, ?}; // Ra
   let Inst{7-4} = 0b0000; // Multiply
 }
 
-def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
-		"mls", "\t$dst, $a, $b, $c",
-                [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> {
+def t2MLS: T2FourReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "mls", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b000;
-  let Inst{15-12} = {?, ?, ?, ?}; // Ra
   let Inst{7-4} = 0b0001; // Multiply and Subtract
 }
 
 // Extra precision multiplies with low / high results
 let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
-                  (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
-                   "smull", "\t$ldst, $hdst, $a, $b", []> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0111;
-  let Inst{22-20} = 0b000;
-  let Inst{7-4} = 0b0000;
-}
-
-def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
-                  (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
-                   "umull", "\t$ldst, $hdst, $a, $b", []> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0111;
-  let Inst{22-20} = 0b010;
-  let Inst{7-4} = 0b0000;
-}
+def t2SMULL : T2MulLong<0b000, 0b0000,
+                  (outs rGPR:$Rd, rGPR:$Ra),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+                   "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+                   "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
 } // isCommutable
 
 // Multiply + accumulate
-def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
-                  (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
-                  "smlal", "\t$ldst, $hdst, $a, $b", []>{
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0111;
-  let Inst{22-20} = 0b100;
-  let Inst{7-4} = 0b0000;
-}
-
-def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
-                  (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
-                  "umlal", "\t$ldst, $hdst, $a, $b", []>{
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0111;
-  let Inst{22-20} = 0b110;
-  let Inst{7-4} = 0b0000;
-}
-
-def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
-                  (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
-                  "umaal", "\t$ldst, $hdst, $a, $b", []>{
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0111;
-  let Inst{22-20} = 0b110;
-  let Inst{7-4} = 0b0110;
-}
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
 } // neverHasSideEffects
 
 // Rounding variants of the below included for disassembly only
 
 // Most significant word multiply
-def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-                  "smmul", "\t$dst, $a, $b",
-                  [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> {
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                  "smmul", "\t$Rd, $Rn, $Rm",
+                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -1796,8 +2305,8 @@ def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
   let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
 }
 
-def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-                  "smmulr", "\t$dst, $a, $b", []> {
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                  "smmulr", "\t$Rd, $Rn, $Rm", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -1805,49 +2314,49 @@ def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
   let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
 }
 
-def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
-                  "smmla", "\t$dst, $a, $b, $c",
-                  [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> {
+def t2SMMLA : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
-  let Inst{15-12} = {?, ?, ?, ?}; // Ra
   let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
 }
 
-def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
-                  "smmlar", "\t$dst, $a, $b, $c", []> {
+def t2SMMLAR: T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
-  let Inst{15-12} = {?, ?, ?, ?}; // Ra
   let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
 }
 
-def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
-                   "smmls", "\t$dst, $a, $b, $c",
-                   [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
+def t2SMMLS: T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
-  let Inst{15-12} = {?, ?, ?, ?}; // Ra
   let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
 }
 
-def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
-                   "smmlsr", "\t$dst, $a, $b, $c", []> {
+def t2SMMLSR:T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
-  let Inst{15-12} = {?, ?, ?, ?}; // Ra
   let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
 }
 
 multiclass T2I_smul<string opc, PatFrag opnode> {
-  def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-              !strconcat(opc, "bb"), "\t$dst, $a, $b",
-              [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
-                                      (sext_inreg rGPR:$b, i16)))]> {
+  def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+                                      (sext_inreg rGPR:$Rm, i16)))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -1856,10 +2365,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
     let Inst{5-4} = 0b00;
   }
 
-  def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-              !strconcat(opc, "bt"), "\t$dst, $a, $b",
-              [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
-                                      (sra rGPR:$b, (i32 16))))]> {
+  def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+                                      (sra rGPR:$Rm, (i32 16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -1868,10 +2377,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
     let Inst{5-4} = 0b01;
   }
 
-  def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-              !strconcat(opc, "tb"), "\t$dst, $a, $b",
-              [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
-                                      (sext_inreg rGPR:$b, i16)))]> {
+  def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+                                      (sext_inreg rGPR:$Rm, i16)))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -1880,10 +2389,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
     let Inst{5-4} = 0b10;
   }
 
-  def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
-              !strconcat(opc, "tt"), "\t$dst, $a, $b",
-              [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
-                                      (sra rGPR:$b, (i32 16))))]> {
+  def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+                                      (sra rGPR:$Rm, (i32 16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -1892,10 +2401,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
     let Inst{5-4} = 0b11;
   }
 
-  def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
-              !strconcat(opc, "wb"), "\t$dst, $a, $b",
-              [(set rGPR:$dst, (sra (opnode rGPR:$a,
-                                    (sext_inreg rGPR:$b, i16)), (i32 16)))]> {
+  def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -1904,10 +2413,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
     let Inst{5-4} = 0b00;
   }
 
-  def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
-              !strconcat(opc, "wt"), "\t$dst, $a, $b",
-              [(set rGPR:$dst, (sra (opnode rGPR:$a,
-                                    (sra rGPR:$b, (i32 16))), (i32 16)))]> {
+  def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -1919,75 +2428,75 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
 
 
 multiclass T2I_smla<string opc, PatFrag opnode> {
-  def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
-              [(set rGPR:$dst, (add rGPR:$acc,
-                               (opnode (sext_inreg rGPR:$a, i16),
-                                       (sext_inreg rGPR:$b, i16))))]> {
+  def BB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra,
+                               (opnode (sext_inreg rGPR:$Rn, i16),
+                                       (sext_inreg rGPR:$Rm, i16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
-    let Inst{15-12} = {?, ?, ?, ?}; // Ra
     let Inst{7-6} = 0b00;
     let Inst{5-4} = 0b00;
   }
 
-  def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
-             !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
-             [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
-                                                  (sra rGPR:$b, (i32 16)))))]> {
+  def BT : T2FourReg<
+       (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+             !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+                                                 (sra rGPR:$Rm, (i32 16)))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
-    let Inst{15-12} = {?, ?, ?, ?}; // Ra
     let Inst{7-6} = 0b00;
     let Inst{5-4} = 0b01;
   }
 
-  def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
-              [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
-                                                (sext_inreg rGPR:$b, i16))))]> {
+  def TB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+                                               (sext_inreg rGPR:$Rm, i16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
-    let Inst{15-12} = {?, ?, ?, ?}; // Ra
     let Inst{7-6} = 0b00;
     let Inst{5-4} = 0b10;
   }
 
-  def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
-             [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
-                                                  (sra rGPR:$b, (i32 16)))))]> {
+  def TT : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+                                                 (sra rGPR:$Rm, (i32 16)))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
-    let Inst{15-12} = {?, ?, ?, ?}; // Ra
     let Inst{7-6} = 0b00;
     let Inst{5-4} = 0b11;
   }
 
-  def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
-              [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
-                                     (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
+  def WB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
-    let Inst{15-12} = {?, ?, ?, ?}; // Ra
     let Inst{7-6} = 0b00;
     let Inst{5-4} = 0b00;
   }
 
-  def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
-              !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
-              [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
-                                       (sra rGPR:$b, (i32 16))), (i32 16))))]> {
+  def WT : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
-    let Inst{15-12} = {?, ?, ?, ?}; // Ra
     let Inst{7-6} = 0b00;
     let Inst{5-4} = 0b01;
   }
@@ -1997,62 +2506,68 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 
 // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
-def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
-         (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>;
-def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
-         (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>;
-def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
-         (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>;
-def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
-         (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>;
 
 // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
 // These are for disassembly only.
 
-def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
-                     IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2ThreeReg_mac<
+            0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> {
   let Inst{15-12} = 0b1111;
 }
-def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
-                     IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2ThreeReg_mac<
+            0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> {
   let Inst{15-12} = 0b1111;
 }
-def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
-                     IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2ThreeReg_mac<
+            0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> {
   let Inst{15-12} = 0b1111;
 }
-def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
-                     IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2ThreeReg_mac<
+            0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> {
   let Inst{15-12} = 0b1111;
 }
-def t2SMLAD   : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
-                        (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad",
-                        "\t$dst, $a, $b, $acc", []>;
-def t2SMLADX  : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst),
-                        (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx",
-                        "\t$dst, $a, $b, $acc", []>;
-def t2SMLSD   : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst),
-                        (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd",
-                        "\t$dst, $a, $b, $acc", []>;
-def t2SMLSDX  : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst),
-                        (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx",
-                        "\t$dst, $a, $b, $acc", []>;
-def t2SMLALD  : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
-                        (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald",
-                        "\t$ldst, $hdst, $a, $b", []>;
-def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
-                        (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx",
-                        "\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLD  : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
-                        (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld",
-                        "\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
-                        (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx",
-                        "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLAD   : T2ThreeReg_mac<
+            0, 0b010, 0b0000, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLADX  : T2FourReg_mac<
+            0, 0b010, 0b0001, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
 
 //===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
@@ -2060,99 +2575,117 @@ def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
 
 class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
       InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
+  : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
   let Inst{31-27} = 0b11111;
   let Inst{26-22} = 0b01010;
   let Inst{21-20} = op1;
   let Inst{15-12} = 0b1111;
   let Inst{7-6} = 0b10;
   let Inst{5-4} = op2;
+  let Rn{3-0} = Rm;
 }
 
-def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                    "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>;
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                    "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
 
-def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                      "rbit", "\t$dst, $src",
-                      [(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                      "rbit", "\t$Rd, $Rm",
+                      [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
 
-def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                 "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                 "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
 
-def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                       "rev16", ".w\t$dst, $src",
-                [(set rGPR:$dst,
-                    (or (and (srl rGPR:$src, (i32 8)), 0xFF),
-                        (or (and (shl rGPR:$src, (i32 8)), 0xFF00),
-                            (or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
-                               (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                       "rev16", ".w\t$Rd, $Rm",
+                [(set rGPR:$Rd,
+                    (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
+                        (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
+                            (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
+                               (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
 
-def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
-                       "revsh", ".w\t$dst, $src",
-                 [(set rGPR:$dst,
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                       "revsh", ".w\t$Rd, $Rm",
+                 [(set rGPR:$Rd,
                     (sext_inreg
-                      (or (srl (and rGPR:$src, 0xFF00), (i32 8)),
-                          (shl rGPR:$src, (i32 8))), i16))]>;
-
-def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
-                  IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
-                  [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
-                                      (and (shl rGPR:$src2, lsl_amt:$sh),
+                      (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+                          (shl rGPR:$Rm, (i32 8))), i16))]>;
+
+def t2PKHBT : T2ThreeReg<
+            (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+                  IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+                  [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+                                      (and (shl rGPR:$Rm, lsl_amt:$sh),
                                            0xFFFF0000)))]>,
-                  Requires<[HasT2ExtractPack]> {
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-20} = 0b01100;
   let Inst{5} = 0; // BT form
   let Inst{4} = 0;
+
+  bits<8> sh;
+  let Inst{14-12} = sh{7-5};
+  let Inst{7-6}   = sh{4-3};
 }
 
 // Alternate cases for PKHBT where identities eliminate some nodes.
 def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
             (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
-            Requires<[HasT2ExtractPack]>;
+            Requires<[HasT2ExtractPack, IsThumb2]>;
 def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
             (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
-            Requires<[HasT2ExtractPack]>;
+            Requires<[HasT2ExtractPack, IsThumb2]>;
 
 // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
 // will match the pattern below.
-def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
-                  IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
-                  [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
-                                       (and (sra rGPR:$src2, asr_amt:$sh),
+def t2PKHTB : T2ThreeReg<
+                  (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+                  IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+                  [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+                                       (and (sra rGPR:$Rm, asr_amt:$sh),
                                             0xFFFF)))]>,
-                  Requires<[HasT2ExtractPack]> {
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-20} = 0b01100;
   let Inst{5} = 1; // TB form
   let Inst{4} = 0;
+
+  bits<8> sh;
+  let Inst{14-12} = sh{7-5};
+  let Inst{7-6}   = sh{4-3};
 }
 
 // Alternate cases for PKHTB where identities eliminate some nodes.  Note that
 // a shift amount of 0 is *not legal* here, it is PKHBT instead.
 def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
             (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
-            Requires<[HasT2ExtractPack]>;
+            Requires<[HasT2ExtractPack, IsThumb2]>;
 def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
                 (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
             (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
-            Requires<[HasT2ExtractPack]>;
+            Requires<[HasT2ExtractPack, IsThumb2]>;
 
 //===----------------------------------------------------------------------===//
 //  Comparison Instructions...
 //
 defm t2CMP  : T2I_cmp_irs<0b1101, "cmp",
+                          IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
                           BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
-                          BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ  GPR:$lhs, t2_so_imm:$imm),
+            (t2CMPri  GPR:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ  GPR:$lhs, rGPR:$rhs),
+            (t2CMPrr  GPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ  GPR:$lhs, t2_so_reg:$rhs),
+            (t2CMPrs  GPR:$lhs, t2_so_reg:$rhs)>;
 
 //FIXME: Disable CMN, as CCodes are backwards from compare expectations
 //       Compare-to-zero still works out, just not the relationals
 //defm t2CMN  : T2I_cmp_irs<0b1000, "cmn",
 //                          BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
 defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+                          IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
                           BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
 //def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
@@ -2162,18 +2695,21 @@ def : T2Pat<(ARMcmpZ  GPR:$src, t2_so_imm_neg:$imm),
             (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
 
 defm t2TST  : T2I_cmp_irs<0b0000, "tst",
-                          BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>;
+                          IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+                         BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
 defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
-                          BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
+                          IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+                         BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
 
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
-                   "mov", ".w\t$dst, $true",
-   [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst"> {
+def t2MOVCCr : T2TwoReg<
+                   (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr,
+                   "mov", ".w\t$Rd, $Rm",
+   [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd"> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
@@ -2183,10 +2719,11 @@ def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
   let Inst{7-4} = 0b0000;
 }
 
-def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
-                   IIC_iCMOVi, "mov", ".w\t$dst, $true",
-[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
-                   RegConstraint<"$false = $dst"> {
+let isMoveImm = 1 in
+def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+                   IIC_iCMOVi, "mov", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $Rd"> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 0;
   let Inst{24-21} = 0b0010;
@@ -2195,9 +2732,49 @@ def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
   let Inst{15} = 0;
 }
 
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+                      IIC_iCMOVi,
+                      "movw", "\t$Rd, $imm", []>,
+                      RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+                               (ins rGPR:$false, i32imm:$src, pred:$p),
+                    IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+                   IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+                   imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b0011;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+}
+
 class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
                    string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
+  : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
@@ -2205,22 +2782,22 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
   let Inst{19-16} = 0b1111; // Rn
   let Inst{5-4} = opcod; // Shift type.
 }
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst),
-                             (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
-                             IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
-                 RegConstraint<"$false = $dst">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst),
-                             (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
-                             IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
-                 RegConstraint<"$false = $dst">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst),
-                             (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
-                             IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
-                 RegConstraint<"$false = $dst">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
-                             (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
-                             IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
-                 RegConstraint<"$false = $dst">;
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -2229,78 +2806,29 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
 
 // memory barriers protect the atomic sequences
 let hasSideEffects = 1 in {
-def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
-                    [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
-  let Inst{31-4} = 0xF3BF8F5;
-  // FIXME: add support for options other than a full system DMB
-  let Inst{3-0} = 0b1111;
-}
-
-def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
-                    [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
-  let Inst{31-4} = 0xF3BF8F4;
-  // FIXME: add support for options other than a full system DSB
-  let Inst{3-0} = 0b1111;
-}
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+                  "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+                  Requires<[IsThumb, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf3bf8f5;
+  let Inst{3-0} = opt;
 }
-
-// Helper class for multiclass T2MemB -- for disassembly only
-class T2I_memb<string opc, string asm>
-  : T2I<(outs), (ins), NoItinerary, opc, asm,
-        [/* For disassembly only; pattern left blank */]>,
-    Requires<[IsThumb2, HasV7]> {
-  let Inst{31-20} = 0xf3b;
-  let Inst{15-14} = 0b10;
-  let Inst{12} = 0;
 }
 
-multiclass T2MemB<bits<4> op7_4, string opc> {
-
-  def st : T2I_memb<opc, "\tst"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b1110;
-  }
-
-  def ish : T2I_memb<opc, "\tish"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b1011;
-  }
-
-  def ishst : T2I_memb<opc, "\tishst"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b1010;
-  }
-
-  def nsh : T2I_memb<opc, "\tnsh"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b0111;
-  }
-
-  def nshst : T2I_memb<opc, "\tnshst"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b0110;
-  }
-
-  def osh : T2I_memb<opc, "\tosh"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b0011;
-  }
-
-  def oshst : T2I_memb<opc, "\toshst"> {
-    let Inst{7-4} = op7_4;
-    let Inst{3-0} = 0b0010;
-  }
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+                  "dsb", "\t$opt",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsThumb, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf3bf8f4;
+  let Inst{3-0} = opt;
 }
 
-// These DMB variants are for disassembly only.
-defm t2DMB : T2MemB<0b0101, "dmb">;
-
-// These DSB variants are for disassembly only.
-defm t2DSB : T2MemB<0b0100, "dsb">;
-
 // ISB has only full system option -- for disassembly only
-def t2ISBsy : T2I_memb<"isb", ""> {
-  let Inst{7-4} = 0b0110;
+def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsThumb2, HasV7]> {
+  let Inst{31-4} = 0xf3bf8f6;
   let Inst{3-0} = 0b1111;
 }
 
@@ -2314,6 +2842,11 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Inst{7-6} = 0b01;
   let Inst{5-4} = opcod;
   let Inst{3-0} = 0b1111;
+
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
 }
 class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
@@ -2324,60 +2857,88 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Inst{11-8} = rt2;
   let Inst{7-6} = 0b01;
   let Inst{5-4} = opcod;
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
 }
 
 let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]",
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+                         Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]",
                          "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]",
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+                         Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]",
                          "", []>;
-def t2LDREX  : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
+def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
                        Size4Bytes, NoItinerary,
-                       "ldrex", "\t$dest, [$ptr]", "",
+                       "ldrex", "\t$Rt, [$Rn]", "",
                       []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-20} = 0b0000101;
   let Inst{11-8} = 0b1111;
   let Inst{7-0} = 0b00000000; // imm8 = 0
+
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
 }
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
                          AddrModeNone, Size4Bytes, NoItinerary,
-                         "ldrexd", "\t$dest, $dest2, [$ptr]", "",
-                         [], {?, ?, ?, ?}>;
+                         "ldrexd", "\t$Rt, $Rt2, [$Rn]", "",
+                         [], {?, ?, ?, ?}> {
+  bits<4> Rt2;
+  let Inst{11-8} = Rt2;
+}
 }
 
-let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
                          AddrModeNone, Size4Bytes, NoItinerary,
-                         "strexb", "\t$success, $src, [$ptr]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
+                         "strexb", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
                          AddrModeNone, Size4Bytes, NoItinerary,
-                         "strexh", "\t$success, $src, [$ptr]", "", []>;
-def t2STREX  : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
+                         "strexh", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
                        AddrModeNone, Size4Bytes, NoItinerary,
-                       "strex", "\t$success, $src, [$ptr]", "",
+                       "strex", "\t$Rd, $Rt, [$Rn]", "",
                       []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-20} = 0b0000100;
   let Inst{7-0} = 0b00000000; // imm8 = 0
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
 }
-def t2STREXD : T2I_strex<0b11, (outs rGPR:$success),
-                         (ins rGPR:$src, rGPR:$src2, rGPR:$ptr),
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn),
                          AddrModeNone, Size4Bytes, NoItinerary,
-                         "strexd", "\t$success, $src, $src2, [$ptr]", "", [],
-                         {?, ?, ?, ?}>;
+                         "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [],
+                         {?, ?, ?, ?}> {
+  bits<4> Rt2;
+  let Inst{11-8} = Rt2;
+}
 }
 
 // Clear-Exclusive is for disassembly only.
-def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
-                  [/* For disassembly only; pattern left blank */]>,
-            Requires<[IsARM, HasV7]>  {
-  let Inst{31-20} = 0xf3b;
+def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
+                   [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsThumb2, HasV7]>  {
+  let Inst{31-16} = 0xf3bf;
   let Inst{15-14} = 0b10;
+  let Inst{13} = 0;
   let Inst{12} = 0;
+  let Inst{11-8} = 0b1111;
   let Inst{7-4} = 0b0010;
+  let Inst{3-0} = 0b1111;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2386,7 +2947,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
 
 // __aeabi_read_tp preserves the registers r1-r3.
 let isCall = 1,
-  Defs = [R0, R12, LR, CPSR] in {
+  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
   def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
                      "bl\t__aeabi_read_tp",
                      [(set R0, ARMthread_pointer)]> {
@@ -2413,32 +2974,18 @@ let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
     D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
     D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
-    D31 ], hasSideEffects = 1, isBarrier = 1 in {
+    D31 ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
-                               AddrModeNone, SizeSpecial, NoItinerary,
-                               "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
-                               "adds\t$val, #7\n\t"
-                               "str\t$val, [$src, #4]\n\t"
-                               "movs\tr0, #0\n\t"
-                               "b\t1f\n\t"
-                               "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
-                               "1:", "",
+                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
                              Requires<[IsThumb2, HasVFP2]>;
 }
 
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ],
-  hasSideEffects = 1, isBarrier = 1 in {
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
   def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
-                               AddrModeNone, SizeSpecial, NoItinerary,
-                               "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
-                               "adds\t$val, #7\n\t"
-                               "str\t$val, [$src, #4]\n\t"
-                               "movs\tr0, #0\n\t"
-                               "b\t1f\n\t"
-                               "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
-                               "1:", "",
+                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
                                   Requires<[IsThumb2, NoVFP]>;
 }
@@ -2453,82 +3000,77 @@ let Defs =
 // operand list.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
-    hasExtraDefRegAllocReq = 1 in
-  def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                         reglist:$dsts, variable_ops), IIC_Br,
-                        "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
-                        "$addr.addr = $wb", []> {
+    hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                        reglist:$regs, variable_ops),
+                        IIC_iLoad_mBr,
+                        "ldmia${p}.w\t$Rn!, $regs",
+                        "$Rn = $wb", []> {
+  bits<4>  Rn;
+  bits<16> regs;
+
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
-  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
-  let Inst{22} = 0;
-  let Inst{21} = 1; // The W bit.
-  let Inst{20} = 1; // Load
+  let Inst{24-23} = 0b01;     // Increment After
+  let Inst{22}    = 0;
+  let Inst{21}    = 1;        // Writeback
+  let Inst{20}    = 1;
+  let Inst{19-16} = Rn;
+  let Inst{15-0}  = regs;
 }
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 let isPredicable = 1 in
-def t2B   : T2XI<(outs), (ins brtarget:$target), IIC_Br,
+def t2B   : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
                  "b.w\t$target",
                  [(br bb:$target)]> {
   let Inst{31-27} = 0b11110;
   let Inst{15-14} = 0b10;
   let Inst{12} = 1;
+
+  bits<20> target;
+  let Inst{26} = target{19};
+  let Inst{11} = target{18};
+  let Inst{13} = target{17};
+  let Inst{21-16} = target{16-11};
+  let Inst{10-0} = target{10-0};
 }
 
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
-def t2BR_JT :
-    T2JTI<(outs),
-          (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
-           IIC_Br, "mov\tpc, $target$jt",
-          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0100100;
-  let Inst{19-16} = 0b1111;
-  let Inst{14-12} = 0b000;
-  let Inst{11-8} = 0b1111; // Rd = pc
-  let Inst{7-4} = 0b0000;
-}
+def t2BR_JT : t2PseudoInst<(outs),
+          (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+           SizeSpecial, IIC_Br,
+          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
 
 // FIXME: Add a non-pc based case that can be predicated.
-def t2TBB :
-    T2JTI<(outs),
-        (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
-         IIC_Br, "tbb\t$index$jt", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0001101;
-  let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
-  let Inst{15-8} = 0b11110000;
-  let Inst{7-4} = 0b0000; // B form
-}
-
-def t2TBH :
-    T2JTI<(outs),
-        (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
-         IIC_Br, "tbh\t$index$jt", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0001101;
-  let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
-  let Inst{15-8} = 0b11110000;
-  let Inst{7-4} = 0b0001; // H form
-}
-
-// Generic versions of the above two instructions, for disassembly only
-
-def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
-                    "tbb", "\t[$a, $b]", []>{
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0001101;
-  let Inst{15-8} = 0b11110000;
-  let Inst{7-4} = 0b0000; // B form
-}
-
-def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
-                   "tbh", "\t[$a, $b, lsl #1]", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0001101;
-  let Inst{15-8} = 0b11110000;
-  let Inst{7-4} = 0b0001; // H form
+def t2TBB_JT : t2PseudoInst<(outs),
+        (ins GPR:$index, i32imm:$jt, i32imm:$id),
+         SizeSpecial, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+        (ins GPR:$index, i32imm:$jt, i32imm:$id),
+         SizeSpecial, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+                    "tbb", "\t[$Rn, $Rm]", []> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{31-20} = 0b111010001101;
+  let Inst{19-16} = Rn;
+  let Inst{15-5} = 0b11110000000;
+  let Inst{4} = 0; // B form
+  let Inst{3-0} = Rm;
+}
+
+def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+                   "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{31-20} = 0b111010001101;
+  let Inst{19-16} = Rn;
+  let Inst{15-5} = 0b11110000000;
+  let Inst{4} = 1; // H form
+  let Inst{3-0} = Rm;
 }
 } // isNotDuplicable, isIndirectBranch
 
@@ -2543,6 +3085,16 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
   let Inst{31-27} = 0b11110;
   let Inst{15-14} = 0b10;
   let Inst{12} = 0;
+
+  bits<4> p;
+  let Inst{25-22} = p;
+
+  bits<21> target;
+  let Inst{26} = target{20};
+  let Inst{11} = target{19};
+  let Inst{13} = target{18};
+  let Inst{21-16} = target{17-12};
+  let Inst{10-0} = target{11-1};
 }
 
 
@@ -2554,6 +3106,11 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
   // 16-bit instruction.
   let Inst{31-16} = 0x0000;
   let Inst{15-8} = 0b10111111;
+
+  bits<4> cc;
+  bits<4> mask;
+  let Inst{7-4} = cc;
+  let Inst{3-0} = mask;
 }
 
 // Branch and Exchange Jazelle -- for disassembly only
@@ -2565,22 +3122,44 @@ def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
   let Inst{25-20} = 0b111100;
   let Inst{15-14} = 0b10;
   let Inst{12} = 0;
+
+  bits<4> func;
+  let Inst{19-16} = func;
 }
 
-// Change Processor State is a system instruction -- for disassembly only.
-// The singleton $opt operand contains the following information:
-// opt{4-0} = mode from Inst{4-0}
-// opt{5} = changemode from Inst{17}
-// opt{8-6} = AIF from Inst{8-6}
-// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-def t2CPS : T2XI<(outs),(ins cps_opt:$opt), NoItinerary, "cps$opt",
-                 [/* For disassembly only; pattern left blank */]> {
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+            !strconcat("cps", asm_op),
+            [/* For disassembly only; pattern left blank */]> {
+  bits<2> imod;
+  bits<3> iflags;
+  bits<5> mode;
+  bit M;
+
   let Inst{31-27} = 0b11110;
-  let Inst{26} = 0;
+  let Inst{26}    = 0;
   let Inst{25-20} = 0b111010;
+  let Inst{19-16} = 0b1111;
   let Inst{15-14} = 0b10;
-  let Inst{12} = 0;
-}
+  let Inst{12}    = 0;
+  let Inst{10-9}  = imod;
+  let Inst{8}     = M;
+  let Inst{7-5}   = iflags;
+  let Inst{4-0}   = mode;
+}
+
+let M = 1 in
+  def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+                      "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+  def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+                      "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+  def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
 
 // A6.3.4 Branches and miscellaneous control
 // Table A6-14 Change Processor State, and hint instructions
@@ -2589,6 +3168,7 @@ class T2I_hint<bits<8> op7_0, string opc, string asm>
   : T2I<(outs), (ins), NoItinerary, opc, asm,
         [/* For disassembly only; pattern left blank */]> {
   let Inst{31-20} = 0xf3a;
+  let Inst{19-16} = 0b1111;
   let Inst{15-14} = 0b10;
   let Inst{12} = 0;
   let Inst{10-8} = 0b000;
@@ -2608,6 +3188,9 @@ def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
   let Inst{12} = 0;
   let Inst{10-8} = 0b000;
   let Inst{7-4} = 0b1111;
+
+  bits<4> opt;
+  let Inst{3-0} = opt;
 }
 
 // Secure Monitor Call is a system instruction -- for disassembly only
@@ -2617,83 +3200,86 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
   let Inst{31-27} = 0b11110;
   let Inst{26-20} = 0b1111111;
   let Inst{15-12} = 0b1000;
-}
 
-// Store Return State is a system instruction -- for disassembly only
-def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0000010; // W = 1
+  bits<4> opt;
+  let Inst{19-16} = opt;
 }
 
-def t2SRSDB  : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0000000; // W = 0
-}
+class T2SRS<bits<12> op31_20,
+           dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
 
-def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0011010; // W = 1
+  bits<5> mode;
+  let Inst{4-0} = mode{4-0};
 }
 
-def t2SRSIA  : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0011000; // W = 0
-}
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2SRS<0b111010000010,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSDB  : T2SRS<0b111010000000,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSIAW : T2SRS<0b111010011010,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSIA  : T2SRS<0b111010011000,
+                   (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
 
 // Return From Exception is a system instruction -- for disassembly only
-def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0000011; // W = 1
-}
 
-def t2RFEDB  : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0000001; // W = 0
-}
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
 
-def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0011011; // W = 1
+  bits<4> Rn;
+  let Inst{19-16} = Rn;
 }
 
-def t2RFEIA  : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-20} = 0b0011001; // W = 0
-}
+def t2RFEDBW : T2RFE<0b111010000011,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB  : T2RFE<0b111010000001,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA  : T2RFE<0b111010011001,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+                   [/* For disassembly only; pattern left blank */]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
 
-// Two piece so_imms.
-def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
-             (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
-                    (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
-             (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
-                    (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
-             (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
-                    (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
-             (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
-                    (t2_so_neg_imm2part_2 imm:$RHS))>;
-
 // 32-bit immediate using movw + movt.
-// This is a single pseudo instruction to make it re-materializable. Remove
-// when we can do generalized remat.
-let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
-                   "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
-                     [(set rGPR:$dst, (i32 imm:$src))]>;
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                            [(set rGPR:$dst, (i32 imm:$src))]>,
+                            Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2addpc,
+                          [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                          Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2,
+                          [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                          Requires<[IsThumb2, UseMovt]>;
+}
 
 // ConstantPool, GlobalAddress, and JumpTable
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2709,10 +3295,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 // be expanded into two instructions late to allow if-conversion and
 // scheduling.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
-def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
-                   NoItinerary,
-                   "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
-               [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                   IIC_iLoadiALU,
+              [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb2]>;
 
@@ -2720,48 +3305,128 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
 // Move between special register and ARM core register -- for disassembly only
 //
 
-// Rd = Instr{11-8}
-def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
-                [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11110;
-  let Inst{26} = 0;
-  let Inst{25-21} = 0b11111;
-  let Inst{20} = 0; // The R bit.
-  let Inst{15-14} = 0b10;
-  let Inst{12} = 0;
+class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+  let Inst{15-14} = op15_14{1-0};
+  let Inst{12} = op12{0};
 }
 
-// Rd = Instr{11-8}
-def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
-                   [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11110;
-  let Inst{26} = 0;
-  let Inst{25-21} = 0b11111;
-  let Inst{20} = 1; // The R bit.
-  let Inst{15-14} = 0b10;
-  let Inst{12} = 0;
+class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  let Inst{11-8} = Rd;
+  let Inst{19-16} = 0b1111;
 }
 
-// Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
-                "\tcpsr$mask, $src",
-                [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11110;
-  let Inst{26} = 0;
-  let Inst{25-21} = 0b11100;
-  let Inst{20} = 0; // The R bit.
-  let Inst{15-14} = 0b10;
-  let Inst{12} = 0;
+def t2MRS : T2MRS<0b111100111110, 0b10, 0,
+                (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+                [/* For disassembly only; pattern left blank */]>;
+def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
+                   (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+                   [/* For disassembly only; pattern left blank */]>;
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
+                         0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
+                         NoItinerary, "msr", "\t$mask, $Rn",
+                         [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<4> Rn;
+  let Inst{19-16} = Rn;
+  let Inst{20}    = mask{4}; // R Bit
+  let Inst{13}    = 0b0;
+  let Inst{11-8}  = mask{3-0};
 }
 
-// Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
-                   "\tspsr$mask, $src",
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class t2MovRCopro<string opc, bit direction>
+  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                       GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class t2MovRRCopro<string opc, bit direction>
+  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1100;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def t2MCRR2 : t2MovRRCopro<"mcrr2",
+                           0 /* from ARM core register to coprocessor */>;
+def t2MRRC2 : t2MovRRCopro<"mrrc2",
+                           1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.  For disassembly only.
+//
+
+def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+                   "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
                    [/* For disassembly only; pattern left blank */]> {
-  let Inst{31-27} = 0b11110;
-  let Inst{26} = 0;
-  let Inst{25-21} = 0b11100;
-  let Inst{20} = 1; // The R bit.
-  let Inst{15-14} = 0b10;
-  let Inst{12} = 0;
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
 }
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index c29e09606bd4..920c5c98002a 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1,4 +1,4 @@
-//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,30 +11,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-def SDT_FTOI :
-SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF :
-SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
-def SDT_CMPFP0 :
-SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDT_VMOVDRR :
-SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
-                     SDTCisSameAs<1, 2>]>;
-
-def arm_ftoui  : SDNode<"ARMISD::FTOUI",  SDT_FTOI>;
-def arm_ftosi  : SDNode<"ARMISD::FTOSI",  SDT_FTOI>;
-def arm_sitof  : SDNode<"ARMISD::SITOF",  SDT_ITOF>;
-def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
-def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
-def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
-def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
-def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR",  SDT_VMOVDRR>;
+def SDT_FTOI    : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF    : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0  : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+                                       SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui  : SDNode<"ARMISD::FTOUI",   SDT_FTOI>;
+def arm_ftosi  : SDNode<"ARMISD::FTOSI",   SDT_FTOI>;
+def arm_sitof  : SDNode<"ARMISD::SITOF",   SDT_ITOF>;
+def arm_uitof  : SDNode<"ARMISD::UITOF",   SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT",  SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
 
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
 //
 
-
 def vfp_f32imm : Operand<f32>,
                  PatLeaf<(f32 fpimm), [{
       return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
@@ -55,86 +51,136 @@ def vfp_f64imm : Operand<f64>,
 //
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
-                 IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
-                 [(set DPR:$dst, (f64 (load addrmode5:$addr)))]>;
 
-def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
-                 IIC_fpLoad32, "vldr", ".32\t$dst, $addr",
-                 [(set SPR:$dst, (load addrmode5:$addr))]>;
-} // canFoldAsLoad
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+                 IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
 
-def VSTRD  : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
-                 IIC_fpStore64, "vstr", ".64\t$src, $addr",
-                 [(store (f64 DPR:$src), addrmode5:$addr)]>;
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+                 IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+                 [(set SPR:$Sd, (load addrmode5:$addr))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VSTRS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
-                 IIC_fpStore32, "vstr", ".32\t$src, $addr",
-                 [(store SPR:$src, addrmode5:$addr)]>;
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 
-//===----------------------------------------------------------------------===//
-//  Load / store multiple Instructions.
-//
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+                 IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
 
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
-                           variable_ops), IndexModeNone, IIC_fpLoadm,
-                  "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
-  let Inst{20} = 1;
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+                 IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+                 [(store SPR:$Sd, addrmode5:$addr)]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
-def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
-                           variable_ops), IndexModeNone, IIC_fpLoadm,
-                  "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
-  let Inst{20} = 1;
-}
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
 
-def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                       reglist:$dsts, variable_ops),
-                      IndexModeUpd, IIC_fpLoadm,
-                      "vldm${addr:submode}${p}\t$addr!, $dsts",
-                      "$addr.addr = $wb", []> {
-  let Inst{20} = 1;
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+                         InstrItinClass itin, InstrItinClass itin_upd> {
+  // Double Precision
+  def DIA :
+    AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DIA_UPD :
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DDB :
+    AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DDB_UPD :
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+
+  // Single Precision
+  def SIA :
+    AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SIA_UPD :
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SDB :
+    AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SDB_UPD :
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
 }
 
-def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                       reglist:$dsts, variable_ops),
-                      IndexModeUpd, IIC_fpLoadm, 
-                      "vldm${addr:submode}${p}\t$addr!, $dsts",
-                      "$addr.addr = $wb", []> {
-  let Inst{20} = 1;
-}
-} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
+let neverHasSideEffects = 1 in {
 
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
-                           variable_ops), IndexModeNone, IIC_fpStorem,
-                  "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
-  let Inst{20} = 0;
-}
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
 
-def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
-                           variable_ops), IndexModeNone, IIC_fpStorem,
-                  "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
-  let Inst{20} = 0;
-}
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
 
-def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                       reglist:$srcs, variable_ops),
-                      IndexModeUpd, IIC_fpStorem,
-                      "vstm${addr:submode}${p}\t$addr!, $srcs",
-                      "$addr.addr = $wb", []> {
-  let Inst{20} = 0;
-}
+} // neverHasSideEffects
 
-def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
-                                       reglist:$srcs, variable_ops),
-                      IndexModeUpd, IIC_fpStorem,
-                      "vstm${addr:submode}${p}\t$addr!, $srcs",
-                      "$addr.addr = $wb", []> {
-  let Inst{20} = 0;
-}
-} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
 
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
@@ -142,56 +188,71 @@ def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
 // FP Binary Operations.
 //
 
-def VADDD  : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
-                 [(set DPR:$dst, (fadd DPR:$a, (f64 DPR:$b)))]>;
-
-def VADDS  : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
-                  [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
-
-// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
-def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$a, DPR:$b),
-                 IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
-                 [(arm_cmpfp DPR:$a, (f64 DPR:$b))]>;
-
-def VCMPD  : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$a, DPR:$b),
-                 IIC_fpCMP64, "vcmp", ".f64\t$a, $b",
-                 [/* For disassembly only; pattern left blank */]>;
-
-def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$a, SPR:$b),
-                 IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
-                 [(arm_cmpfp SPR:$a, SPR:$b)]>;
-
-def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$a, SPR:$b),
-                 IIC_fpCMP32, "vcmp", ".f32\t$a, $b",
-                 [/* For disassembly only; pattern left blank */]>;
+def VADDD  : ADbI<0b11100, 0b11, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS  : ASbIn<0b11100, 0b11, 0, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
-def VDIVD  : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
-                 [(set DPR:$dst, (fdiv DPR:$a, (f64 DPR:$b)))]>;
-
-def VDIVS  : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                 IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
-                 [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
-
-def VMULD  : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
-                 [(set DPR:$dst, (fmul DPR:$a, (f64 DPR:$b)))]>;
-
-def VMULS  : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
-                  [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+def VSUBD  : ADbI<0b11100, 0b11, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS  : ASbIn<0b11100, 0b11, 1, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VNMULD  : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                  IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
-                  [(set DPR:$dst, (fneg (fmul DPR:$a, (f64 DPR:$b))))]>;
+def VDIVD  : ADbI<0b11101, 0b00, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS  : ASbI<0b11101, 0b00, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD  : ADbI<0b11100, 0b10, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS  : ASbIn<0b11100, 0b10, 0, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VNMULS  : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
-                  [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -199,53 +260,128 @@ def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
 def : Pat<(fmul (fneg SPR:$a), SPR:$b),
           (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
 
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+                  (outs), (ins DPR:$Dd, DPR:$Dm),
+                  IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+                  [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+                  (outs), (ins SPR:$Sd, SPR:$Sm),
+                  IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+                  [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VSUBD  : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
-                 IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
-                 [(set DPR:$dst, (fsub DPR:$a, (f64 DPR:$b)))]>;
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD  : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+                  (outs), (ins DPR:$Dd, DPR:$Dm),
+                  IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+                  [/* For disassembly only; pattern left blank */]>;
 
-def VSUBS  : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
-                  IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
-                  [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
+def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+                  (outs), (ins SPR:$Sd, SPR:$Sm),
+                  IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+                  [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+} // Defs = [FPSCR]
 
 //===----------------------------------------------------------------------===//
 // FP Unary Operations.
 //
 
-def VABSD  : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
-                 [(set DPR:$dst, (fabs (f64 DPR:$a)))]>;
-
-def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,(outs SPR:$dst), (ins SPR:$a),
-                  IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
-                  [(set SPR:$dst, (fabs SPR:$a))]>;
+def VABSD  : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+                   [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 let Defs = [FPSCR] in {
-def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$a),
-                  IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
-                  [(arm_cmpfp0 (f64 DPR:$a))]>;
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+                   (outs), (ins DPR:$Dd),
+                   IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+                   [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+}
 
-def VCMPZD  : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$a),
-                  IIC_fpCMP64, "vcmp", ".f64\t$a, #0",
-                  [/* For disassembly only; pattern left blank */]>;
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+                   (outs), (ins SPR:$Sd),
+                   IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+                   [(arm_cmpfp0 SPR:$Sd)]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
 
-def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$a),
-                  IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
-                  [(arm_cmpfp0 SPR:$a)]>;
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$a),
-                  IIC_fpCMP32, "vcmp", ".f32\t$a, #0",
-                  [/* For disassembly only; pattern left blank */]>;
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD  : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+                   (outs), (ins DPR:$Dd),
+                   IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
 }
 
-def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
-                 [(set DPR:$dst, (fextend SPR:$a))]>;
+def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+                   (outs), (ins SPR:$Sd),
+                   IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+                   (outs DPR:$Dd), (ins SPR:$Sm),
+                   IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+                   [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+}
 
 // Special case encoding: bits 11-8 is 0b1011.
-def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
-                   IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a",
-                   [(set SPR:$dst, (fround DPR:$a))]> {
+def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+                    IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+                    [(set SPR:$Sd, (fround DPR:$Dm))]> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
   let Inst{27-23} = 0b11101;
   let Inst{21-16} = 0b110111;
   let Inst{11-8}  = 0b1011;
@@ -255,6 +391,7 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 
 // Between half-precision and single-precision.  For disassembly only.
 
+// FIXME: Verify encoding after integrated assembler is working.
 def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
@@ -277,47 +414,94 @@ def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
                  [/* For disassembly only; pattern left blank */]>;
 
-let neverHasSideEffects = 1 in {
-def VMOVD: ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
-
-def VMOVS: ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
-} // neverHasSideEffects
+def VNEGD  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+                   [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VNEGD  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
-                 [(set DPR:$dst, (fneg (f64 DPR:$a)))]>;
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
 
-def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,(outs SPR:$dst), (ins SPR:$a),
-                  IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
-                  [(set SPR:$dst, (fneg SPR:$a))]>;
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+                  [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
 
-def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
-                 IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
-                 [(set DPR:$dst, (fsqrt (f64 DPR:$a)))]>;
+let neverHasSideEffects = 1 in {
+def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
 
-def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
-                 [(set SPR:$dst, (fsqrt SPR:$a))]>;
+def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
 // FP <-> GPR Copies.  Int <-> FP Conversions.
 //
 
-def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
-                 IIC_fpMOVSI, "vmov", "\t$dst, $src",
-                 [(set GPR:$dst, (bitconvert SPR:$src))]>;
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+                      (outs GPR:$Rt), (ins SPR:$Sn),
+                      IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+                      [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+  // Instruction operands.
+  bits<4> Rt;
+  bits<5> Sn;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Rt;
+
+  let Inst{6-5}   = 0b00;
+  let Inst{3-0}   = 0b0000;
+}
 
-def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
-                 IIC_fpMOVIS, "vmov", "\t$dst, $src",
-                 [(set SPR:$dst, (bitconvert GPR:$src))]>;
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+                      (outs SPR:$Sn), (ins GPR:$Rt),
+                      IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+                      [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+  // Instruction operands.
+  bits<5> Sn;
+  bits<4> Rt;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Rt;
+
+  let Inst{6-5}   = 0b00;
+  let Inst{3-0}   = 0b0000;
+}
 
 let neverHasSideEffects = 1 in {
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
-                      (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
-                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
+                        (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+                        IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
                  [/* FIXME: Can't write pattern for multiple result instr*/]> {
+  // Instruction operands.
+  bits<5> Dm;
+  bits<4> Rt;
+  bits<4> Rt2;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+
   let Inst{7-6} = 0b00;
 }
 
@@ -333,10 +517,21 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
 // FMDLR: GPR -> SPR
 
 def VMOVDRR : AVConv5I<0b11000100, 0b1011,
-                     (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
-                IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
-                [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
-  let Inst{7-6} = 0b00;
+                      (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+                      IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+                      [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+  // Instruction operands.
+  bits<5> Dm;
+  bits<4> Rt;
+  bits<4> Rt2;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+
+  let Inst{7-6}   = 0b00;
 }
 
 let neverHasSideEffects = 1 in
@@ -350,102 +545,183 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
 // FMRDH: SPR -> GPR
 // FMRDL: SPR -> GPR
 // FMRRS: SPR -> GPR
-// FMRX : SPR system reg -> GPR
-
+// FMRX:  SPR system reg -> GPR
 // FMSRR: GPR -> SPR
+// FMXR:  GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                        bits<4> opcod4, dag oops, dag iops,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+}
 
-// FMXR: GPR -> VFP Sstem reg
-
-
-// Int to FP:
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                         bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+                         string opc, string asm, list<dag> pattern>
+  : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+              pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
 
-def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
-                 (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
-                 [(set DPR:$dst, (f64 (arm_sitof SPR:$a)))]> {
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+                               (outs DPR:$Dd), (ins SPR:$Sm),
+                               IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+                               [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
   let Inst{7} = 1; // s32
 }
 
-def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
-                 (outs SPR:$dst),(ins SPR:$a),
-                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
-                 [(set SPR:$dst, (arm_sitof SPR:$a))]> {
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+                                (outs SPR:$Sd),(ins SPR:$Sm),
+                                IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+                                [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
   let Inst{7} = 1; // s32
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
-def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
-                 (outs DPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
-                 [(set DPR:$dst, (f64 (arm_uitof SPR:$a)))]> {
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+                               (outs DPR:$Dd), (ins SPR:$Sm),
+                               IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+                               [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
   let Inst{7} = 0; // u32
 }
 
-def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
-                 (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
-                 [(set SPR:$dst, (arm_uitof SPR:$a))]> {
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+                                (outs SPR:$Sd), (ins SPR:$Sm),
+                                IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+                                [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
   let Inst{7} = 0; // u32
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
-// FP to Int:
-// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                        bits<4> opcod4, dag oops, dag iops,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                         bits<4> opcod4, dag oops, dag iops,
+                         InstrItinClass itin, string opc, string asm,
+                         list<dag> pattern>
+  : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+              pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
 
-def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
-                       (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
-                 [(set SPR:$dst, (arm_ftosi (f64 DPR:$a)))]> {
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
-                        (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
-                 [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
   let Inst{7} = 1; // Z bit
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
-def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
-                       (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
-                 [(set SPR:$dst, (arm_ftoui (f64 DPR:$a)))]> {
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+                               (outs SPR:$Sd), (ins DPR:$Dm),
+                               IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+                               [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
   let Inst{7} = 1; // Z bit
 }
 
-def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
-                        (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
-                 [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
   let Inst{7} = 1; // Z bit
+
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
 }
 
 // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
-// For disassembly only.
 let Uses = [FPSCR] in {
-def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
-                       (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
-                 [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
   let Inst{7} = 0; // Z bit
 }
 
-def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
-                        (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
-                 [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
   let Inst{7} = 0; // Z bit
 }
 
-def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
-                       (outs SPR:$dst), (ins DPR:$a),
-                 IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
-                 [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
   let Inst{7} = 0; // Z bit
 }
 
-def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
-                        (outs SPR:$dst), (ins SPR:$a),
-                 IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
-                 [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
   let Inst{7} = 0; // Z bit
 }
 }
@@ -457,30 +733,47 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
 //   S32 (U=0, sx=1) -> SL
 //   U32 (U=1, sx=1) -> UL
 
-let Constraints = "$a = $dst" in {
+// FIXME: Marking these as codegen only seems wrong. They are real
+//        instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
 
 // FP to Fixed-Point:
 
-let isCodeGenOnly = 1 in {
 def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
                        (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
@@ -501,30 +794,44 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
                  IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]>;
-}
 
 // Fixed-Point to FP:
 
-let isCodeGenOnly = 1 in {
 def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
 def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
                        (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
@@ -545,70 +852,120 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
                  IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]>;
-}
 
-} // End of 'let Constraints = "$src = $dst" in'
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
 
 //===----------------------------------------------------------------------===//
 // FP FMA Operations.
 //
 
-def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
-                (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
-                [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
-                                      (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP2,UseFPVMLx]>;
 
 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
-                 (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                 IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
-                 [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
-                 RegConstraint<"$dstin = $dst">;
-
-def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
-                (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
-                [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
-                                (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 
-def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
-                (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
-                [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
-
-def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
-                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
-             [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
-                             (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP2,UseFPVMLx]>;
 
 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
-                  (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                  IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
-             [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
-
-def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
-          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
-def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
-          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-
-def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
-                 (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
-                 IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
-             [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
-                             (f64 DPR:$dstin)))]>,
-                RegConstraint<"$dstin = $dst">;
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+                RegConstraint<"$Ddin = $Dd">,
+                Requires<[HasVFP2,UseFPVMLx]>;
 
 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
-                (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
-                IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
-             [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
-                RegConstraint<"$dstin = $dst">;
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+                RegConstraint<"$Sdin = $Sd">,
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                           (f64 DPR:$Ddin)))]>,
+               RegConstraint<"$Ddin = $Dd">,
+               Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+             [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">,
+                  Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+          (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+          (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
 
 //===----------------------------------------------------------------------===//
 // FP Conditional moves.
@@ -616,92 +973,157 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
 
 let neverHasSideEffects = 1 in {
 def VMOVDcc  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
-                    (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
-                [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
-                    RegConstraint<"$false = $dst">;
+                    (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                    IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+                    [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+                 RegConstraint<"$Dn = $Dd">;
 
 def VMOVScc  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
-                    (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
-                [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
-                    RegConstraint<"$false = $dst">;
+                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                    IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+                    [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+                 RegConstraint<"$Sn = $Sd">;
 
 def VNEGDcc  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
-                    (outs DPR:$dst), (ins DPR:$false, DPR:$true),
-                    IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
-                [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
-                    RegConstraint<"$false = $dst">;
+                    (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                    IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+                    [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+                 RegConstraint<"$Dn = $Dd">;
 
 def VNEGScc  : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
-                    (outs SPR:$dst), (ins SPR:$false, SPR:$true),
-                    IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
-                [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
-                    RegConstraint<"$false = $dst">;
+                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                    IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+                    [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+                 RegConstraint<"$Sn = $Sd"> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
-// Misc.
+// Move from VFP System Register to ARM core register.
 //
 
-// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
-// to APSR.
-let Defs = [CPSR], Uses = [FPSCR] in
-def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
-                   "\tapsr_nzcv, fpscr",
-             [(arm_fmstat)]> {
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+                 list<dag> pattern>:
+  VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+  // Instruction operand.
+  bits<4> Rt;
+
   let Inst{27-20} = 0b11101111;
-  let Inst{19-16} = 0b0001;
-  let Inst{15-12} = 0b1111;
+  let Inst{19-16} = opc19_16;
+  let Inst{15-12} = Rt;
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
+  let Inst{6-5}   = 0b00;
   let Inst{4}     = 1;
+  let Inst{3-0}   = 0b0000;
 }
 
-// FPSCR <-> GPR (for disassembly only)
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+                        "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
 let hasSideEffects = 1, Uses = [FPSCR] in
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
-                 "vmrs", "\t$dst, fpscr",
-             [(set GPR:$dst, (int_arm_get_fpscr))]> {
-  let Inst{27-20} = 0b11101111;
-  let Inst{19-16} = 0b0001;
-  let Inst{11-8}  = 0b1010;
-  let Inst{7}     = 0;
-  let Inst{4}     = 1;
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+                      "vmrs", "\t$Rt, fpscr",
+                      [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+  def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, fpexc", []>;
+  def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, fpsid", []>;
 }
 
-let Defs = [FPSCR] in 
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, 
-                 "vmsr", "\tfpscr, $src",
-             [(int_arm_set_fpscr GPR:$src)]> {
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+               list<dag> pattern>:
+  VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+  // Instruction operand.
+  bits<4> src;
+
+  // Encode instruction operand.
+  let Inst{15-12} = src;
+
   let Inst{27-20} = 0b11101110;
-  let Inst{19-16} = 0b0001;
+  let Inst{19-16} = opc19_16;
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
   let Inst{4}     = 1;
 }
 
+let Defs = [FPSCR] in {
+  // Application level GPR -> FPSCR
+  def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+  // System level GPR -> FPEXC
+  def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpexc, $src", []>;
+  // System level GPR -> FPSID
+  def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
 // Materialize FP immediates. VFP3 only.
 let isReMaterializable = 1 in {
-def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
                     VFPMiscFrm, IIC_fpUNA64,
-                    "vmov", ".f64\t$dst, $imm",
-                    [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+                    "vmov", ".f64\t$Dd, $imm",
+                    [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+  // Instruction operands.
+  bits<5>  Dd;
+  bits<32> imm;
+
+  // Encode instruction operands.
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+  let Inst{19}    = imm{31};
+  let Inst{18-16} = imm{22-20};
+  let Inst{3-0}   = imm{19-16};
+
+  // Encode remaining instruction bits.
   let Inst{27-23} = 0b11101;
   let Inst{21-20} = 0b11;
   let Inst{11-9}  = 0b101;
-  let Inst{8}     = 1;
+  let Inst{8}     = 1;          // Double precision.
   let Inst{7-4}   = 0b0000;
 }
 
-def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
-                    VFPMiscFrm, IIC_fpUNA32,
-                    "vmov", ".f32\t$dst, $imm",
-                    [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+                     VFPMiscFrm, IIC_fpUNA32,
+                     "vmov", ".f32\t$Sd, $imm",
+                     [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+  // Instruction operands.
+  bits<5>  Sd;
+  bits<32> imm;
+
+  // Encode instruction operands.
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+  let Inst{19}    = imm{31};    // The immediate is handled as a double.
+  let Inst{18-16} = imm{22-20};
+  let Inst{3-0}   = imm{19-16};
+
+  // Encode remaining instruction bits.
   let Inst{27-23} = 0b11101;
   let Inst{21-20} = 0b11;
   let Inst{11-9}  = 0b101;
-  let Inst{8}     = 0;
+  let Inst{8}     = 0;          // Single precision.
   let Inst{7-4}   = 0b0000;
 }
 }
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 5f6d7eef4b5a..45b7e48d0cfb 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
 #include <cstdlib>
 using namespace llvm;
 
@@ -43,7 +43,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
 #define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
 
 // CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us (we need
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
 // to preserve more context and manipulate the stack directly).  Instead,
 // write our own wrapper, which does things our way, so we have complete
 // control over register saving and restoring.
@@ -97,9 +97,10 @@ extern "C" {
     "str  r0, [sp,#16]\n"
     // Return to the (newly modified) stub to invoke the real function.
     // The above twiddling of the saved return addresses allows us to
-    // deallocate everything, including the LR the stub saved, all in one
-    // pop instruction.
-    "ldmia  sp!, {r0, r1, r2, r3, lr, pc}\n"
+    // deallocate everything, including the LR the stub saved, with two
+    // updating load instructions.
+    "ldmia  sp!, {r0, r1, r2, r3, lr}\n"
+    "ldr    pc, [sp], #4\n"
       );
 #else  // Not an ARM host
   void ARMCompilationCallback() {
@@ -290,7 +291,7 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
       *((intptr_t*)RelocPos) |= ResultPtr;
       // Set register Rn to PC.
       *((intptr_t*)RelocPos) |=
-        ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+        getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
       break;
     }
     case ARM::reloc_arm_pic_jt:
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index f5d9effeb9a5..2f9792813d32 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -105,7 +105,7 @@ namespace llvm {
     /// model is PIC.
     void Initialize(const MachineFunction &MF, bool isPIC) {
       const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-      ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries());
+      ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
       JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
       IsPIC = isPIC;
     }
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 2b7645a42119..d9dc5cdedb30 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -128,45 +128,153 @@ namespace {
   char ARMLoadStoreOpt::ID = 0;
 }
 
-static int getLoadStoreMultipleOpcode(int Opcode) {
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
   switch (Opcode) {
-  case ARM::LDR:
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDRi12:
     ++NumLDMGened;
-    return ARM::LDM;
-  case ARM::STR:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::LDMIA;
+    case ARM_AM::da: return ARM::LDMDA;
+    case ARM_AM::db: return ARM::LDMDB;
+    case ARM_AM::ib: return ARM::LDMIB;
+    }
+    break;
+  case ARM::STRi12:
     ++NumSTMGened;
-    return ARM::STM;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::STMIA;
+    case ARM_AM::da: return ARM::STMDA;
+    case ARM_AM::db: return ARM::STMDB;
+    case ARM_AM::ib: return ARM::STMIB;
+    }
+    break;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     ++NumLDMGened;
-    return ARM::t2LDM;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2LDMIA;
+    case ARM_AM::db: return ARM::t2LDMDB;
+    }
+    break;
   case ARM::t2STRi8:
   case ARM::t2STRi12:
     ++NumSTMGened;
-    return ARM::t2STM;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2STMIA;
+    case ARM_AM::db: return ARM::t2STMDB;
+    }
+    break;
   case ARM::VLDRS:
     ++NumVLDMGened;
-    return ARM::VLDMS;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMSIA;
+    case ARM_AM::db: return ARM::VLDMSDB;
+    }
+    break;
   case ARM::VSTRS:
     ++NumVSTMGened;
-    return ARM::VSTMS;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMSIA;
+    case ARM_AM::db: return ARM::VSTMSDB;
+    }
+    break;
   case ARM::VLDRD:
     ++NumVLDMGened;
-    return ARM::VLDMD;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMDIA;
+    case ARM_AM::db: return ARM::VLDMDDB;
+    }
+    break;
   case ARM::VSTRD:
     ++NumVSTMGened;
-    return ARM::VSTMD;
-  default: llvm_unreachable("Unhandled opcode!");
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMDIA;
+    case ARM_AM::db: return ARM::VSTMDDB;
+    }
+    break;
   }
+
   return 0;
 }
 
+namespace llvm {
+  namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMIA_UPD:
+  case ARM::STMIA:
+  case ARM::STMIA_UPD:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2STMIA:
+  case ARM::t2STMIA_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VLDMDIA:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDIA_UPD:
+    return ARM_AM::ia;
+
+  case ARM::LDMDA:
+  case ARM::LDMDA_UPD:
+  case ARM::STMDA:
+  case ARM::STMDA_UPD:
+    return ARM_AM::da;
+
+  case ARM::LDMDB:
+  case ARM::LDMDB_UPD:
+  case ARM::STMDB:
+  case ARM::STMDB_UPD:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMDB:
+  case ARM::t2STMDB_UPD:
+  case ARM::VLDMSDB:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMSDB:
+  case ARM::VSTMSDB_UPD:
+  case ARM::VLDMDDB:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VSTMDDB:
+  case ARM::VSTMDDB_UPD:
+    return ARM_AM::db;
+
+  case ARM::LDMIB:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIB:
+  case ARM::STMIB_UPD:
+    return ARM_AM::ib;
+  }
+
+  return ARM_AM::bad_am_submode;
+}
+
+  } // end namespace ARM_AM
+} // end namespace llvm
+
 static bool isT2i32Load(unsigned Opc) {
   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
 }
 
 static bool isi32Load(unsigned Opc) {
-  return Opc == ARM::LDR || isT2i32Load(Opc);
+  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
 }
 
 static bool isT2i32Store(unsigned Opc) {
@@ -174,7 +282,7 @@ static bool isT2i32Store(unsigned Opc) {
 }
 
 static bool isi32Store(unsigned Opc) {
-  return Opc == ARM::STR || isT2i32Store(Opc);
+  return Opc == ARM::STRi12 || isT2i32Store(Opc);
 }
 
 /// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -245,10 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
 
   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
                 Opcode == ARM::VLDRD);
-  Opcode = getLoadStoreMultipleOpcode(Opcode);
+  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
     .addReg(Base, getKillRegState(BaseKill))
-    .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
+    .addImm(Pred).addReg(PredReg);
   for (unsigned i = 0; i != NumRegs; ++i)
     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
                      | getKillRegState(Regs[i].second));
@@ -271,22 +379,14 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
   // First calculate which of the registers should be killed by the merged
   // instruction.
   const unsigned insertPos = memOps[insertAfter].Position;
-
-  SmallSet<unsigned, 4> UnavailRegs;
   SmallSet<unsigned, 4> KilledRegs;
   DenseMap<unsigned, unsigned> Killer;
-  for (unsigned i = 0; i < memOpsBegin; ++i) {
-    if (memOps[i].Position < insertPos && memOps[i].isKill) {
-      unsigned Reg = memOps[i].Reg;
-      if (memOps[i].Merged)
-        UnavailRegs.insert(Reg);
-      else {
-        KilledRegs.insert(Reg);
-        Killer[Reg] = i;
-      }
+  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+    if (i == memOpsBegin) {
+      i = memOpsEnd;
+      if (i == e)
+        break;
     }
-  }
-  for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
     if (memOps[i].Position < insertPos && memOps[i].isKill) {
       unsigned Reg = memOps[i].Reg;
       KilledRegs.insert(Reg);
@@ -297,12 +397,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
   SmallVector<std::pair<unsigned, bool>, 8> Regs;
   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
     unsigned Reg = memOps[i].Reg;
-    if (UnavailRegs.count(Reg))
-      // Register is killed before and it's not easy / possible to update the
-      // kill marker on already merged instructions. Abort.
-      return;
-
-    // If we are inserting the merged operation after an unmerged operation that
+    // If we are inserting the merged operation after an operation that
     // uses the same register, make sure to transfer any kill flag.
     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
     Regs.push_back(std::make_pair(Reg, isKill));
@@ -318,17 +413,24 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
   // Merge succeeded, update records.
   Merges.push_back(prior(Loc));
   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
-    // Remove kill flags from any unmerged memops that come before insertPos.
+    // Remove kill flags from any memops that come before insertPos.
     if (Regs[i-memOpsBegin].second) {
       unsigned Reg = Regs[i-memOpsBegin].first;
       if (KilledRegs.count(Reg)) {
         unsigned j = Killer[Reg];
-        memOps[j].MBBI->getOperand(0).setIsKill(false);
+        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+        assert(Idx >= 0 && "Cannot find killing operand");
+        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
         memOps[j].isKill = false;
       }
+      memOps[i].isKill = true;
     }
     MBB.erase(memOps[i].MBBI);
+    // Update this memop to refer to the merged instruction.
+    // We may need to move kill flags again.
     memOps[i].Merged = true;
+    memOps[i].MBBI = Merges.back();
+    memOps[i].Position = insertPos;
   }
 }
 
@@ -349,7 +451,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   const MachineOperand &PMO = Loc->getOperand(0);
   unsigned PReg = PMO.getReg();
   unsigned PRegNum = PMO.isUndef() ? UINT_MAX
-    : ARMRegisterInfo::getRegisterNumbering(PReg);
+    : getARMRegisterNumbering(PReg);
   unsigned Count = 1;
 
   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
@@ -357,7 +459,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
     unsigned Reg = MO.getReg();
     unsigned RegNum = MO.isUndef() ? UINT_MAX
-      : ARMRegisterInfo::getRegisterNumbering(Reg);
+      : getARMRegisterNumbering(Reg);
     // Register numbers must be in ascending order.  For VFP, the registers
     // must also be consecutive and there is a limit of 16 double-word
     // registers per instruction.
@@ -440,8 +542,8 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default: return 0;
-  case ARM::LDR:
-  case ARM::STR:
+  case ARM::LDRi12:
+  case ARM::STRi12:
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
   case ARM::t2STRi8:
@@ -452,31 +554,109 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   case ARM::VLDRD:
   case ARM::VSTRD:
     return 8;
-  case ARM::LDM:
-  case ARM::STM:
-  case ARM::t2LDM:
-  case ARM::t2STM:
-  case ARM::VLDMS:
-  case ARM::VSTMS:
-    return (MI->getNumOperands() - 4) * 4;
-  case ARM::VLDMD:
-  case ARM::VSTMD:
-    return (MI->getNumOperands() - 4) * 8;
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
   }
 }
 
-static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+                                            ARM_AM::AMSubMode Mode) {
   switch (Opc) {
-  case ARM::LDM: return ARM::LDM_UPD;
-  case ARM::STM: return ARM::STM_UPD;
-  case ARM::t2LDM: return ARM::t2LDM_UPD;
-  case ARM::t2STM: return ARM::t2STM_UPD;
-  case ARM::VLDMS: return ARM::VLDMS_UPD;
-  case ARM::VLDMD: return ARM::VLDMD_UPD;
-  case ARM::VSTMS: return ARM::VSTMS_UPD;
-  case ARM::VSTMD: return ARM::VSTMD_UPD;
   default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::LDMIA_UPD;
+    case ARM_AM::ib: return ARM::LDMIB_UPD;
+    case ARM_AM::da: return ARM::LDMDA_UPD;
+    case ARM_AM::db: return ARM::LDMDB_UPD;
+    }
+    break;
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::STMIA_UPD;
+    case ARM_AM::ib: return ARM::STMIB_UPD;
+    case ARM_AM::da: return ARM::STMDA_UPD;
+    case ARM_AM::db: return ARM::STMDB_UPD;
+    }
+    break;
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+    case ARM_AM::db: return ARM::t2LDMDB_UPD;
+    }
+    break;
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2STMIA_UPD;
+    case ARM_AM::db: return ARM::t2STMDB_UPD;
+    }
+    break;
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+    case ARM_AM::db: return ARM::VLDMSDB_UPD;
+    }
+    break;
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+    case ARM_AM::db: return ARM::VLDMDDB_UPD;
+    }
+    break;
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+    case ARM_AM::db: return ARM::VSTMSDB_UPD;
+    }
+    break;
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+    case ARM_AM::db: return ARM::VSTMDDB_UPD;
+    }
+    break;
   }
+
   return 0;
 }
 
@@ -505,16 +685,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
   int Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
 
-  bool DoMerge = false;
-  ARM_AM::AMSubMode Mode = ARM_AM::ia;
-
   // Can't use an updating ld/st if the base register is also a dest
   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
-  for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
     if (MI->getOperand(i).getReg() == Base)
       return false;
-  }
-  Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+
+  bool DoMerge = false;
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
 
   // Try merging with the previous instruction.
   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -560,15 +738,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
   if (!DoMerge)
     return false;
 
-  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
+  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
     .addReg(Base, getDefRegState(true)) // WB base register
     .addReg(Base, getKillRegState(BaseKill))
-    .addImm(ARM_AM::getAM4ModeImm(Mode))
     .addImm(Pred).addReg(PredReg);
+
   // Transfer the rest of operands.
-  for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
     MIB.addOperand(MI->getOperand(OpNum));
+
   // Transfer memoperands.
   (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
 
@@ -576,14 +755,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
   return true;
 }
 
-static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+                                             ARM_AM::AddrOpc Mode) {
   switch (Opc) {
-  case ARM::LDR: return ARM::LDR_PRE;
-  case ARM::STR: return ARM::STR_PRE;
-  case ARM::VLDRS: return ARM::VLDMS_UPD;
-  case ARM::VLDRD: return ARM::VLDMD_UPD;
-  case ARM::VSTRS: return ARM::VSTMS_UPD;
-  case ARM::VSTRD: return ARM::VSTMD_UPD;
+  case ARM::LDRi12:
+    return ARM::LDR_PRE;
+  case ARM::STRi12:
+    return ARM::STR_PRE;
+  case ARM::VLDRS:
+    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+  case ARM::VLDRD:
+    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+  case ARM::VSTRS:
+    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+  case ARM::VSTRD:
+    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_PRE;
@@ -595,14 +781,21 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
   return 0;
 }
 
-static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+                                              ARM_AM::AddrOpc Mode) {
   switch (Opc) {
-  case ARM::LDR: return ARM::LDR_POST;
-  case ARM::STR: return ARM::STR_POST;
-  case ARM::VLDRS: return ARM::VLDMS_UPD;
-  case ARM::VLDRD: return ARM::VLDMD_UPD;
-  case ARM::VSTRS: return ARM::VSTMS_UPD;
-  case ARM::VSTRD: return ARM::VSTMD_UPD;
+  case ARM::LDRi12:
+    return ARM::LDR_POST;
+  case ARM::STRi12:
+    return ARM::STR_POST;
+  case ARM::VLDRS:
+    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+  case ARM::VLDRD:
+    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+  case ARM::VSTRS:
+    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+  case ARM::VSTRD:
+    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_POST;
@@ -629,14 +822,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   DebugLoc dl = MI->getDebugLoc();
   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
-  bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
-  if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
-    return false;
-  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
-    return false;
-  if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
+  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+  if (isi32Load(Opcode) || isi32Store(Opcode))
     if (MI->getOperand(2).getImm() != 0)
       return false;
+  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+    return false;
 
   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
   // Can't do the merge if the destination register is the same as the would-be
@@ -666,7 +857,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
       DoMerge = true;
     }
     if (DoMerge) {
-      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
       MBB.erase(PrevMBBI);
     }
   }
@@ -685,7 +876,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
       DoMerge = true;
     }
     if (DoMerge) {
-      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
       if (NextMBBI == I) {
         Advance = true;
         ++I;
@@ -698,12 +889,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     return false;
 
   unsigned Offset = 0;
-  if (isAM5)
-    Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
-                                   ARM_AM::db : ARM_AM::ia);
-  else if (isAM2)
+  if (isAM2)
     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
-  else
+  else if (!isAM5)
     Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
 
   if (isAM5) {
@@ -715,7 +903,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
       .addReg(Base, getDefRegState(true)) // WB base register
       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
-      .addImm(Offset)
       .addImm(Pred).addReg(PredReg)
       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
                             getKillRegState(MO.isKill())));
@@ -782,15 +969,14 @@ static bool isMemoryOp(const MachineInstr *MI) {
   int Opcode = MI->getOpcode();
   switch (Opcode) {
   default: break;
-  case ARM::LDR:
-  case ARM::STR:
-    return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
   case ARM::VLDRS:
   case ARM::VSTRS:
     return MI->getOperand(1).isReg();
   case ARM::VLDRD:
   case ARM::VSTRD:
     return MI->getOperand(1).isReg();
+  case ARM::LDRi12:
+  case ARM::STRi12:
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
   case ARM::t2STRi8:
@@ -818,24 +1004,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
 
 static int getMemoryOpOffset(const MachineInstr *MI) {
   int Opcode = MI->getOpcode();
-  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   unsigned NumOperands = MI->getDesc().getNumOperands();
   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
 
   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
-      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
     return OffField;
 
-  int Offset = isAM2
-    ? ARM_AM::getAM2Offset(OffField)
-    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
-             : ARM_AM::getAM5Offset(OffField) * 4);
-  if (isAM2) {
-    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
-      Offset = -Offset;
-  } else if (isAM3) {
+  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+    : ARM_AM::getAM5Offset(OffField) * 4;
+  if (isAM3) {
     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
   } else {
@@ -847,35 +1028,24 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
 
 static void InsertLDR_STR(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MBBI,
-                          int OffImm, bool isDef,
+                          int Offset, bool isDef,
                           DebugLoc dl, unsigned NewOpc,
                           unsigned Reg, bool RegDeadKill, bool RegUndef,
                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
-                          unsigned OffReg, bool OffKill, bool OffUndef,
+                          bool OffKill, bool OffUndef,
                           ARMCC::CondCodes Pred, unsigned PredReg,
                           const TargetInstrInfo *TII, bool isT2) {
-  int Offset = OffImm;
-  if (!isT2) {
-    if (OffImm < 0)
-      Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
-    else
-      Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
-  }
   if (isDef) {
     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
                                       TII->get(NewOpc))
       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
-    if (!isT2)
-      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   } else {
     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
                                       TII->get(NewOpc))
       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
-    if (!isT2)
-      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   }
 }
@@ -906,23 +1076,21 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
     unsigned BaseReg = BaseOp.getReg();
     bool BaseKill = BaseOp.isKill();
     bool BaseUndef = BaseOp.isUndef();
-    unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
     int OffImm = getMemoryOpOffset(MI);
     unsigned PredReg = 0;
     ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
 
-    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
+    if (OddRegNum > EvenRegNum && OffImm == 0) {
       // Ascending register numbers and no offset. It's safe to change it to a
       // ldm or stm.
       unsigned NewOpc = (isLd)
-        ? (isT2 ? ARM::t2LDM : ARM::LDM)
-        : (isT2 ? ARM::t2STM : ARM::STM);
+        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
       if (isLd) {
         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
           .addReg(BaseReg, getKillRegState(BaseKill))
-          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
           .addImm(Pred).addReg(PredReg)
           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
@@ -930,7 +1098,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
       } else {
         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
           .addReg(BaseReg, getKillRegState(BaseKill))
-          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
           .addImm(Pred).addReg(PredReg)
           .addReg(EvenReg,
                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
@@ -941,28 +1108,24 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
       NewBBI = llvm::prior(MBBI);
     } else {
       // Split into two instructions.
-      assert((!isT2 || !OffReg) &&
-             "Thumb2 ldrd / strd does not encode offset register!");
       unsigned NewOpc = (isLd)
-        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
-        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
+        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
       DebugLoc dl = MBBI->getDebugLoc();
       // If this is a load and base register is killed, it may have been
       // re-defed by the load, make sure the first load does not clobber it.
       if (isLd &&
           (BaseKill || OffKill) &&
-          (TRI->regsOverlap(EvenReg, BaseReg) ||
-           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
-        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
-               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
+          (TRI->regsOverlap(EvenReg, BaseReg))) {
+        assert(!TRI->regsOverlap(OddReg, BaseReg));
         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
                       OddReg, OddDeadKill, false,
-                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
+                      BaseReg, false, BaseUndef, false, OffUndef,
                       Pred, PredReg, TII, isT2);
         NewBBI = llvm::prior(MBBI);
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                       EvenReg, EvenDeadKill, false,
-                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
+                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
                       Pred, PredReg, TII, isT2);
       } else {
         if (OddReg == EvenReg && EvenDeadKill) {
@@ -974,12 +1137,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
         }
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                       EvenReg, EvenDeadKill, EvenUndef,
-                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
+                      BaseReg, false, BaseUndef, false, OffUndef,
                       Pred, PredReg, TII, isT2);
         NewBBI = llvm::prior(MBBI);
         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
                       OddReg, OddDeadKill, OddUndef,
-                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
+                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
                       Pred, PredReg, TII, isT2);
       }
       if (isLd)
@@ -1158,17 +1321,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   return NumMerges > 0;
 }
 
-namespace {
-  struct OffsetCompare {
-    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
-      int LOffset = getMemoryOpOffset(LHS);
-      int ROffset = getMemoryOpOffset(RHS);
-      assert(LHS == RHS || LOffset != ROffset);
-      return LOffset > ROffset;
-    }
-  };
-}
-
 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
 /// directly restore the value of LR into pc.
@@ -1182,20 +1334,25 @@ namespace {
 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   if (MBB.empty()) return false;
 
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   if (MBBI != MBB.begin() &&
       (MBBI->getOpcode() == ARM::BX_RET ||
        MBBI->getOpcode() == ARM::tBX_RET ||
        MBBI->getOpcode() == ARM::MOVPCLR)) {
     MachineInstr *PrevMI = prior(MBBI);
-    if (PrevMI->getOpcode() == ARM::LDM_UPD ||
-        PrevMI->getOpcode() == ARM::t2LDM_UPD) {
+    unsigned Opcode = PrevMI->getOpcode();
+    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
       if (MO.getReg() != ARM::LR)
         return false;
-      unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
+      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
       PrevMI->setDesc(TII->get(NewOpc));
       MO.setReg(ARM::PC);
+      PrevMI->copyImplicitOps(&*MBBI);
       MBB.erase(MBBI);
       return true;
     }
@@ -1216,7 +1373,8 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
        ++MFI) {
     MachineBasicBlock &MBB = *MFI;
     Modified |= LoadStoreMultipleOpti(MBB);
-    Modified |= MergeReturnIntoLDM(MBB);
+    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+      Modified |= MergeReturnIntoLDM(MBB);
   }
 
   delete RS;
@@ -1250,7 +1408,7 @@ namespace {
     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
                           unsigned &NewOpc, unsigned &EvenReg,
                           unsigned &OddReg, unsigned &BaseReg,
-                          unsigned &OffReg, int &Offset,
+                          int &Offset,
                           unsigned &PredReg, ARMCC::CondCodes &Pred,
                           bool &isT2);
     bool RescheduleOps(MachineBasicBlock *MBB,
@@ -1292,7 +1450,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
     if (I->isDebugValue() || MemOps.count(&*I))
       continue;
     const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
+    if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
       return false;
     if (isLd && TID.mayStore())
       return false;
@@ -1330,8 +1488,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
                                           DebugLoc &dl,
                                           unsigned &NewOpc, unsigned &EvenReg,
                                           unsigned &OddReg, unsigned &BaseReg,
-                                          unsigned &OffReg, int &Offset,
-                                          unsigned &PredReg,
+                                          int &Offset, unsigned &PredReg,
                                           ARMCC::CondCodes &Pred,
                                           bool &isT2) {
   // Make sure we're allowed to generate LDRD/STRD.
@@ -1341,9 +1498,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
   unsigned Scale = 1;
   unsigned Opcode = Op0->getOpcode();
-  if (Opcode == ARM::LDR)
+  if (Opcode == ARM::LDRi12)
     NewOpc = ARM::LDRD;
-  else if (Opcode == ARM::STR)
+  else if (Opcode == ARM::STRi12)
     NewOpc = ARM::STRD;
   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
     NewOpc = ARM::t2LDRDi8;
@@ -1356,12 +1513,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   } else
     return false;
 
-  // Make sure the offset registers match.
-  if (!isT2 &&
-      (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
-      return false;
-
-  // Must sure the base address satisfies i64 ld / st alignment requirement.
+  // Make sure the base address satisfies i64 ld / st alignment requirement.
   if (!Op0->hasOneMemOperand() ||
       !(*Op0->memoperands_begin())->getValue() ||
       (*Op0->memoperands_begin())->isVolatile())
@@ -1370,7 +1522,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
   const Function *Func = MF->getFunction();
   unsigned ReqAlign = STI->hasV6Ops()
-    ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 
+    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
     : 8;  // Pre-v6 need 8-byte align
   if (Align < ReqAlign)
     return false;
@@ -1404,13 +1556,22 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   if (EvenReg == OddReg)
     return false;
   BaseReg = Op0->getOperand(1).getReg();
-  if (!isT2)
-    OffReg = Op0->getOperand(2).getReg();
   Pred = llvm::getInstrPredicate(Op0, PredReg);
   dl = Op0->getDebugLoc();
   return true;
 }
 
+namespace {
+  struct OffsetCompare {
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      int LOffset = getMemoryOpOffset(LHS);
+      int ROffset = getMemoryOpOffset(RHS);
+      assert(LHS == RHS || LOffset != ROffset);
+      return LOffset > ROffset;
+    }
+  };
+}
+
 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
                                  SmallVector<MachineInstr*, 4> &Ops,
                                  unsigned Base, bool isLd,
@@ -1493,14 +1654,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         MachineInstr *Op0 = Ops.back();
         MachineInstr *Op1 = Ops[Ops.size()-2];
         unsigned EvenReg = 0, OddReg = 0;
-        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
+        unsigned BaseReg = 0, PredReg = 0;
         ARMCC::CondCodes Pred = ARMCC::AL;
         bool isT2 = false;
         unsigned NewOpc = 0;
         int Offset = 0;
         DebugLoc dl;
         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
-                                             EvenReg, OddReg, BaseReg, OffReg,
+                                             EvenReg, OddReg, BaseReg,
                                              Offset, PredReg, Pred, isT2)) {
           Ops.pop_back();
           Ops.pop_back();
@@ -1512,8 +1673,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
               .addReg(EvenReg, RegState::Define)
               .addReg(OddReg, RegState::Define)
               .addReg(BaseReg);
+            // FIXME: We're converting from LDRi12 to an insn that still
+            // uses addrmode2, so we need an explicit offset reg. It should
+            // always by reg0 since we're transforming LDRi12s.
             if (!isT2)
-              MIB.addReg(OffReg);
+              MIB.addReg(0);
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
             ++NumLDRDFormed;
           } else {
@@ -1522,8 +1686,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
               .addReg(EvenReg)
               .addReg(OddReg)
               .addReg(BaseReg);
+            // FIXME: We're converting from LDRi12 to an insn that still
+            // uses addrmode2, so we need an explicit offset reg. It should
+            // always by reg0 since we're transforming STRi12s.
             if (!isT2)
-              MIB.addReg(OffReg);
+              MIB.addReg(0);
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
             ++NumSTRDFormed;
           }
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp
new file mode 100644
index 000000000000..6d7b48587d19
--- /dev/null
+++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1230 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "ARMInstrInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+  ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  const ARMSubtarget *Subtarget;
+  MCContext &Ctx;
+
+public:
+  ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+    : TM(tm), TII(*TM.getInstrInfo()),
+      Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) {
+  }
+
+  ~ARMMCCodeEmitter() {}
+
+  unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+  /// the specified operand. This is used for operands with :lower16: and
+  /// :upper16: prefixes.
+  uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+                              unsigned &Reg, unsigned &Imm,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+  /// BL branch target.
+  uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+  /// BLX branch target.
+  uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+  /// branch target.
+  uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+  /// immediate Thumb2 direct branch target.
+  uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+  /// branch target.
+  uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+  /// ADR label target.
+  uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+  /// operand.
+  uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+  uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+                                         SmallVectorImpl<MCFixup> &Fixups)const;
+
+  /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+  /// operand.
+  uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+  /// operand as needed by load/store instructions.
+  uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+  uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::da: return 0;
+    case ARM_AM::ia: return 1;
+    case ARM_AM::db: return 2;
+    case ARM_AM::ib: return 3;
+    }
+  }
+  /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+  ///
+  unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+    switch (ShOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::no_shift:
+    case ARM_AM::lsl: return 0;
+    case ARM_AM::lsr: return 1;
+    case ARM_AM::asr: return 2;
+    case ARM_AM::ror:
+    case ARM_AM::rrx: return 3;
+    }
+    return 0;
+  }
+
+  /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+  uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+  uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+  uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+  uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+  /// operand.
+  uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+  uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+  uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+  uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getCCOutOpValue - Return encoding of the 's' bit.
+  unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+    // '1' respectively.
+    return MI.getOperand(Op).getReg() == ARM::CPSR;
+  }
+
+  /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+  unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned SoImm = MI.getOperand(Op).getImm();
+    int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+    assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+    // Encode rotate_imm.
+    unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+      << ARMII::SoRotImmShift;
+
+    // Encode immed_8.
+    Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+    return Binary;
+  }
+
+  /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+  unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned SoImm = MI.getOperand(Op).getImm();
+    unsigned Encoded =  ARM_AM::getT2SOImmVal(SoImm);
+    assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+    return Encoded;
+  }
+
+  unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getSORegOpValue - Return an encoded so_reg shifted register value.
+  unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
+                            SmallVectorImpl<MCFixup> &Fixups) const {
+    switch (MI.getOperand(Op).getImm()) {
+    default: assert (0 && "Not a valid rot_imm value!");
+    case 0:  return 0;
+    case 8:  return 1;
+    case 16: return 2;
+    case 24: return 3;
+    }
+  }
+
+  unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
+                                 SmallVectorImpl<MCFixup> &Fixups) const {
+    return MI.getOperand(Op).getImm() - 1;
+  }
+
+  unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+                                   SmallVectorImpl<MCFixup> &Fixups) const {
+    return 64 - MI.getOperand(Op).getImm();
+  }
+
+  unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+                                      SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+                                      SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+                                        SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+                                      unsigned EncodedValue) const;
+  unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+                                          unsigned EncodedValue) const;
+  unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+                                    unsigned EncodedValue) const;
+
+  unsigned VFPThumb2PostEncoder(const MCInst &MI,
+                                unsigned EncodedValue) const;
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
+                                            MCContext &Ctx) {
+  return new ARMMCCodeEmitter(TM, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+    // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+    // set to 1111.
+    unsigned Bit24 = EncodedValue & 0x01000000;
+    unsigned Bit28 = Bit24 << 4;
+    EncodedValue &= 0xEFFFFFFF;
+    EncodedValue |= Bit28;
+    EncodedValue |= 0x0F000000;
+  }
+
+  return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    EncodedValue &= 0xF0FFFFFF;
+    EncodedValue |= 0x09000000;
+  }
+
+  return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    EncodedValue &= 0x00FFFFFF;
+    EncodedValue |= 0xEE000000;
+  }
+
+  return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    EncodedValue &= 0x0FFFFFFF;
+    EncodedValue |= 0xE0000000;
+  }
+  return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = getARMRegisterNumbering(Reg);
+
+    // Q registers are encoded as 2x their register number.
+    switch (Reg) {
+    default:
+      return RegNo;
+    case ARM::Q0:  case ARM::Q1:  case ARM::Q2:  case ARM::Q3:
+    case ARM::Q4:  case ARM::Q5:  case ARM::Q6:  case ARM::Q7:
+    case ARM::Q8:  case ARM::Q9:  case ARM::Q10: case ARM::Q11:
+    case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+      return 2 * RegNo;
+    }
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+                     .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  }
+
+  llvm_unreachable("Unable to encode MCOperand!");
+  return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+                       unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+  Reg = getARMRegisterNumbering(MO.getReg());
+
+  int32_t SImm = MO1.getImm();
+  bool isAdd = true;
+
+  // Special value for #-0
+  if (SImm == INT32_MIN)
+    SImm = 0;
+
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (SImm < 0) {
+    SImm = -SImm;
+    isAdd = false;
+  }
+
+  Imm = SImm;
+  return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                       unsigned FixupKind,
+                                       SmallVectorImpl<MCFixup> &Fixups) {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  // If the destination is an immediate, we have nothing to do.
+  if (MO.isImm()) return MO.getImm();
+  assert(MO.isExpr() && "Unexpected branch target type!");
+  const MCExpr *Expr = MO.getExpr();
+  MCFixupKind Kind = MCFixupKind(FixupKind);
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+  // All of the information is in the fixup.
+  return 0;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+  int NumOp = MI.getNumOperands();
+  if (NumOp >= 2) {
+    for (int i = 0; i < NumOp-1; ++i) {
+      const MCOperand &MCOp1 = MI.getOperand(i);
+      const MCOperand &MCOp2 = MI.getOperand(i + 1);
+      if (MCOp1.isImm() && MCOp2.isReg() && 
+          (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+        if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) 
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // FIXME: This really, really shouldn't use TargetMachine. We don't want
+  // coupling between MC and TM anywhere we can help it.
+  if (Subtarget->isThumb2())
+    return
+      ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+  return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  if (HasConditionalBranch(MI)) 
+    return ::getBranchTargetOpValue(MI, OpIdx,
+                                    ARM::fixup_arm_condbranch, Fixups);
+  return ::getBranchTargetOpValue(MI, OpIdx, 
+                                  ARM::fixup_arm_uncondbranch, Fixups);
+}
+
+
+
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Val =
+    ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+  bool I  = (Val & 0x800000);
+  bool J1 = (Val & 0x400000);
+  bool J2 = (Val & 0x200000);
+  if (I ^ J1)
+    Val &= ~0x400000;
+  else
+    Val |= 0x400000;
+
+  if (I ^ J2)
+    Val &= ~0x200000;
+  else
+    Val |= 0x200000;
+
+  return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+                                  Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+                                  Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+                                  Fixups);
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &) const {
+  // [Rn, Rm]
+  //   {5-3} = Rm
+  //   {2-0} = Rn
+  const MCOperand &MO1 = MI.getOperand(OpIdx);
+  const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+  unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+  unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+  return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  // {17-13} = reg
+  // {12}    = (U)nsigned (add == '1', sub == '0')
+  // {11-0}  = imm12
+  unsigned Reg, Imm12;
+  bool isAdd = true;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm12 = 0;
+    isAdd = false ; // 'U' bit is set as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+
+    MCFixupKind Kind;
+    if (Subtarget->isThumb2())
+      Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+    else
+      Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else
+    isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+  uint32_t Binary = Imm12 & 0xfff;
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 12);
+  Binary |= (Reg << 13);
+  return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  // {12-9} = reg
+  // {8}    = (U)nsigned (add == '1', sub == '0')
+  // {7-0}  = imm8
+  unsigned Reg, Imm8;
+  bool isAdd = true;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm8 = 0;
+    isAdd = false ; // 'U' bit is set as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else
+    isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+  uint32_t Binary = (Imm8 >> 2) & 0xff;
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 8);
+  Binary |= (Reg << 9);
+  return Binary;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+  switch (Expr->getKind()) {
+  default: assert(0 && "Unexpected expression type");
+  case MCExpr::SymbolRef: return false;
+  case MCExpr::Binary: return true;
+  }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  // {20-16} = imm{15-12}
+  // {11-0}  = imm{11-0}
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    // Hi / lo 16 bits already extracted during earlier passes.
+    return static_cast<unsigned>(MO.getImm());
+
+  // Handle :upper16: and :lower16: assembly prefixes.
+  const MCExpr *E = MO.getExpr();
+  if (E->getKind() == MCExpr::Target) {
+    const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+    E = ARM16Expr->getSubExpr();
+
+    MCFixupKind Kind;
+    switch (ARM16Expr->getKind()) {
+    default: assert(0 && "Unsupported ARMFixup");
+    case ARMMCExpr::VK_ARM_HI16:
+      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movt_hi16_pcrel
+                           : ARM::fixup_arm_movt_hi16_pcrel);
+      else
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movt_hi16
+                           : ARM::fixup_arm_movt_hi16);
+      break;
+    case ARMMCExpr::VK_ARM_LO16:
+      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movw_lo16_pcrel
+                           : ARM::fixup_arm_movw_lo16_pcrel);
+      else
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movw_lo16
+                           : ARM::fixup_arm_movw_lo16);
+      break;
+    }
+    Fixups.push_back(MCFixup::Create(0, E, Kind));
+    return 0;
+  };
+
+  llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+  return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+  unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+  bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+  ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+  unsigned SBits = getShiftOp(ShOp);
+
+  // {16-13} = Rn
+  // {12}    = isAdd
+  // {11-0}  = shifter
+  //  {3-0}  = Rm
+  //  {4}    = 0
+  //  {6-5}  = type
+  //  {11-7} = imm
+  uint32_t Binary = Rm;
+  Binary |= Rn << 13;
+  Binary |= SBits << 5;
+  Binary |= ShImm << 7;
+  if (isAdd)
+    Binary |= 1 << 12;
+  return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {17-14}  Rn
+  // {13}     1 == imm12, 0 == Rm
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+  Binary |= Rn << 14;
+  return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // {13}     1 == imm12, 0 == Rm
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  unsigned Imm = MO1.getImm();
+  bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+  bool isReg = MO.getReg() != 0;
+  uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+  if (isReg) {
+    ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+    Binary <<= 7;                    // Shift amount is bits [11:7]
+    Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+    Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+  }
+  return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // {9}      1 == imm8, 0 == Rm
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  unsigned Imm = MO1.getImm();
+  bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+  bool isImm = MO.getReg() == 0;
+  uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+  if (!isImm)
+    Imm8 = getARMRegisterNumbering(MO.getReg());
+  return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Imm = MO2.getImm();
+  bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+  bool isImm = MO1.getReg() == 0;
+  uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+  if (!isImm)
+    Imm8 = getARMRegisterNumbering(MO1.getReg());
+  return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // [SP, #imm]
+  //   {7-0} = imm8
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+         "Unexpected base register!");
+
+  // The immediate is already shifted for the implicit zeroes, so no change
+  // here.
+  return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  // [Rn, #imm]
+  //   {7-3} = imm5
+  //   {2-0} = Rn
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Imm5 = MO1.getImm();
+  return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {12-9} = reg
+  // {8}    = (U)nsigned (add == '1', sub == '0')
+  // {7-0}  = imm8
+  unsigned Reg, Imm8;
+  bool isAdd;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm8 = 0;
+    isAdd = false; // 'U' bit is handled as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind;
+    if (Subtarget->isThumb2())
+      Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+    else
+      Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else {
+    EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+    isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+  }
+
+  uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 8);
+  Binary |= (Reg << 9);
+  return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+  // shifted. The second is either Rs, the amount to shift by, or reg0 in which
+  // case the imm contains the amount to shift by.
+  //
+  // {3-0} = Rm.
+  // {4}   = 1 if reg shift, 0 if imm shift
+  // {6-5} = type
+  //    If reg shift:
+  //      {11-8} = Rs
+  //      {7}    = 0
+  //    else (imm shift)
+  //      {11-7} = imm
+
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+  // Encode Rm.
+  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  unsigned Rs = MO1.getReg();
+  if (Rs) {
+    // Set shift operand (bit[7:4]).
+    // LSL - 0001
+    // LSR - 0011
+    // ASR - 0101
+    // ROR - 0111
+    // RRX - 0110 and bit[11:8] clear.
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x1; break;
+    case ARM_AM::lsr: SBits = 0x3; break;
+    case ARM_AM::asr: SBits = 0x5; break;
+    case ARM_AM::ror: SBits = 0x7; break;
+    case ARM_AM::rrx: SBits = 0x6; break;
+    }
+  } else {
+    // Set shift operand (bit[6:4]).
+    // LSL - 000
+    // LSR - 010
+    // ASR - 100
+    // ROR - 110
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x0; break;
+    case ARM_AM::lsr: SBits = 0x2; break;
+    case ARM_AM::asr: SBits = 0x4; break;
+    case ARM_AM::ror: SBits = 0x6; break;
+    }
+  }
+
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode the shift operation Rs or shift_imm (except rrx).
+  if (Rs) {
+    // Encode Rs bit[11:8].
+    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+  }
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+  const MCOperand &MO2 = MI.getOperand(OpNum+1);
+  const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+  // Encoded as [Rn, Rm, imm].
+  // FIXME: Needs fixup support.
+  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+  Value <<= 4;
+  Value |= getARMRegisterNumbering(MO2.getReg());
+  Value <<= 2;
+  Value |= MO3.getImm();
+
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+  const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+  // Even though the immediate is 8 bits long, we need 9 bits in order
+  // to represent the (inverse of the) sign bit.
+  Value <<= 9;
+  int32_t tmp = (int32_t)MO2.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 256; // Set the ADD bit
+  Value |= tmp & 255;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = 0;
+  int32_t tmp = (int32_t)MO1.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 256; // Set the ADD bit
+  Value |= tmp & 255;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = 0;
+  int32_t tmp = (int32_t)MO1.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 4096; // Set the ADD bit
+  Value |= tmp & 4095;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+  // shifted. The second is the amount to shift by.
+  //
+  // {3-0} = Rm.
+  // {4}   = 0
+  // {6-5} = type
+  // {11-7} = imm
+
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+  // Encode Rm.
+  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  // Set shift operand (bit[6:4]).
+  // LSL - 000
+  // LSR - 010
+  // ASR - 100
+  // ROR - 110
+  switch (SOpc) {
+  default: llvm_unreachable("Unknown shift opc!");
+  case ARM_AM::lsl: SBits = 0x0; break;
+  case ARM_AM::lsr: SBits = 0x2; break;
+  case ARM_AM::asr: SBits = 0x4; break;
+  case ARM_AM::ror: SBits = 0x6; break;
+  }
+
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+  // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+  // msb of the mask.
+  const MCOperand &MO = MI.getOperand(Op);
+  uint32_t v = ~MO.getImm();
+  uint32_t lsb = CountTrailingZeros_32(v);
+  uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+  assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+  return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getMsbOpValue(const MCInst &MI, unsigned Op,
+              SmallVectorImpl<MCFixup> &Fixups) const {
+  // MSB - 5 bits.
+  uint32_t lsb = MI.getOperand(Op-1).getImm();
+  uint32_t width = MI.getOperand(Op).getImm();
+  uint32_t msb = lsb+width-1;
+  assert (width != 0 && msb < 32 && "Illegal bit width!");
+  return msb;
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // VLDM/VSTM:
+  //   {12-8} = Vd
+  //   {7-0}  = Number of registers
+  //
+  // LDM/STM:
+  //   {15-0}  = Bitfield of GPRs.
+  unsigned Reg = MI.getOperand(Op).getReg();
+  bool SPRRegs = ARM::SPRRegClass.contains(Reg);
+  bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+
+  unsigned Binary = 0;
+
+  if (SPRRegs || DPRRegs) {
+    // VLDM/VSTM
+    unsigned RegNo = getARMRegisterNumbering(Reg);
+    unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+    Binary |= (RegNo & 0x1f) << 8;
+    if (SPRRegs)
+      Binary |= NumRegs;
+    else
+      Binary |= NumRegs * 2;
+  } else {
+    for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+      unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+      Binary |= 1 << RegNo;
+    }
+  }
+
+  return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:  Align = 0x01; break;
+  case 16: Align = 0x02; break;
+  case 32: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions.  This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:  Align = 0x01; break;
+  case 16: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(Op);
+  if (MO.getReg() == 0) return 0x0D;
+  return MO.getReg();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  // Pseudo instructions don't get encoded.
+  const TargetInstrDesc &Desc = TII.get(MI.getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+  if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+    return;
+  int Size;
+  // Basic size info comes from the TSFlags field.
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: llvm_unreachable("Unexpected instruction size!");
+  case ARMII::Size2Bytes: Size = 2; break;
+  case ARMII::Size4Bytes: Size = 4; break;
+  }
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+  // Thumb 32-bit wide instructions need to emit the high order halfword
+  // first.
+  if (Subtarget->isThumb() && Size == 4) {
+    EmitConstant(Binary >> 16, 2, OS);
+    EmitConstant(Binary & 0xffff, 2, OS);
+  } else
+    EmitConstant(Binary, Size, OS);
+  ++MCNumEmitted;  // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMMCExpr.cpp b/lib/Target/ARM/ARMMCExpr.cpp
new file mode 100644
index 000000000000..2727ba8c8aa5
--- /dev/null
+++ b/lib/Target/ARM/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                       MCContext &Ctx) {
+  return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: assert(0 && "Invalid kind!");
+  case VK_ARM_HI16: OS << ":upper16:"; break;
+  case VK_ARM_LO16: OS << ":lower16:"; break;
+  }
+
+  const MCExpr *Expr = getSubExpr();
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << '(';
+  Expr->print(OS);
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                     const MCAsmLayout *Layout) const {
+  return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    assert(0 && "Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbols_(BE->getLHS(), Asm);
+    AddValueSymbols_(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h
new file mode 100644
index 000000000000..d42f766ca91f
--- /dev/null
+++ b/lib/Target/ARM/ARMMCExpr.h
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_ARM_None,
+    VK_ARM_HI16,  // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+    VK_ARM_LO16   // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+  
+public:
+  /// @name Construction
+  /// @{
+
+  static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                      MCContext &Ctx);
+
+  static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_ARM_HI16, Expr, Ctx);
+  }
+
+  static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_ARM_LO16, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+  
+  static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index ab2b06b60783..59d60506fc0f 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -12,122 +12,69 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMMCInstLower.h"
-//#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMMCExpr.h"
+#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-//#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 
-#if 0
-const ARMSubtarget &ARMMCInstLower::getSubtarget() const {
-  return AsmPrinter.getSubtarget();
-}
-
-MachineModuleInfoMachO &ARMMCInstLower::getMachOMMI() const {
-  assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
-  return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); 
-}
-#endif
-
-MCSymbol *ARMMCInstLower::
-GetGlobalAddressSymbol(const MachineOperand &MO) const {
-  // FIXME: HANDLE PLT references how??
-  switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
-  case 0: break;
-  }
-  
-  return Printer.Mang->getSymbol(MO.getGlobal());
-}
-
-MCSymbol *ARMMCInstLower::
-GetExternalSymbolSymbol(const MachineOperand &MO) const {
-  // FIXME: HANDLE PLT references how??
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              ARMAsmPrinter &Printer) {
+  MCContext &Ctx = Printer.OutContext;
+  const MCExpr *Expr;
   switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
-  case 0: break;
+  default: {
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+    switch (MO.getTargetFlags()) {
+    default:
+      assert(0 && "Unknown target flag on symbol operand");
+    case 0:
+      break;
+    case ARMII::MO_LO16:
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+      Expr = ARMMCExpr::CreateLower16(Expr, Ctx);
+      break;
+    case ARMII::MO_HI16:
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+      Expr = ARMMCExpr::CreateUpper16(Expr, Ctx);
+      break;
+    }
+    break;
   }
-  
-  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
-}
-
 
-
-MCSymbol *ARMMCInstLower::
-GetJumpTableSymbol(const MachineOperand &MO) const {
-  SmallString<256> Name;
-  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
-    << Printer.getFunctionNumber() << '_' << MO.getIndex();
-  
-#if 0
-  switch (MO.getTargetFlags()) {
-    default: llvm_unreachable("Unknown target flag on GV operand");
+  case ARMII::MO_PLT:
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+    break;
   }
-#endif
-  
-  // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
 
-MCSymbol *ARMMCInstLower::
-GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
-  SmallString<256> Name;
-  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
-    << Printer.getFunctionNumber() << '_' << MO.getIndex();
-  
-#if 0
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  }
-#endif
-  
-  // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
-  
-MCOperand ARMMCInstLower::
-LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
-  // FIXME: We would like an efficient form for this, so we don't have to do a
-  // lot of extra uniquing.
-  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-  
-#if 0
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  }
-#endif
-  
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(Expr,
                                    MCConstantExpr::Create(MO.getOffset(), Ctx),
                                    Ctx);
   return MCOperand::CreateExpr(Expr);
-}
 
+}
 
-void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                        ARMAsmPrinter &AP) {
   OutMI.setOpcode(MI->getOpcode());
-  
+
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    
+
     MCOperand MCOp;
     switch (MO.getType()) {
     default:
       MI->dump();
       assert(0 && "unknown operand type");
     case MachineOperand::MO_Register:
-      // Ignore all implicit register operands.
-      if (MO.isImplicit()) continue;
+      // Ignore all non-CPSR implicit register operands.
+      if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
       assert(!MO.getSubReg() && "Subregs should be eliminated!");
       MCOp = MCOperand::CreateReg(MO.getReg());
       break;
@@ -136,27 +83,33 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       MO.getMBB()->getSymbol(), Ctx));
+                       MO.getMBB()->getSymbol(), AP.OutContext));
       break;
     case MachineOperand::MO_GlobalAddress:
-      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
       break;
     case MachineOperand::MO_ExternalSymbol:
-      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      MCOp = GetSymbolRef(MO,
+                          AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
       break;
     case MachineOperand::MO_JumpTableIndex:
-      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
       break;
     case MachineOperand::MO_ConstantPoolIndex:
-      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
       break;
     case MachineOperand::MO_BlockAddress:
-      MCOp = LowerSymbolOperand(MO, Printer.GetBlockAddressSymbol(
-                                              MO.getBlockAddress()));
+      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
       break;
+    case MachineOperand::MO_FPImmediate: {
+      APFloat Val = MO.getFPImm()->getValueAPF();
+      bool ignored;
+      Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+      MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+      break;
+    }
     }
-    
+
     OutMI.addOperand(MCOp);
   }
-  
 }
diff --git a/lib/Target/ARM/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h
deleted file mode 100644
index b81a30690ce2..000000000000
--- a/lib/Target/ARM/ARMMCInstLower.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- ARMMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_MCINSTLOWER_H
-#define ARM_MCINSTLOWER_H
-
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-  class AsmPrinter;
-  class MCAsmInfo;
-  class MCContext;
-  class MCInst;
-  class MCOperand;
-  class MCSymbol;
-  class MachineInstr;
-  class MachineModuleInfoMachO;
-  class MachineOperand;
-  class Mangler;
-  //class ARMSubtarget;
-  
-/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class LLVM_LIBRARY_VISIBILITY ARMMCInstLower {
-  MCContext &Ctx;
-  Mangler &Mang;
-  AsmPrinter &Printer;
-
-  //const ARMSubtarget &getSubtarget() const;
-public:
-  ARMMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
-    : Ctx(ctx), Mang(mang), Printer(printer) {}
-  
-  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-  //MCSymbol *GetPICBaseSymbol() const;
-  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
-  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-  
-/*
-private:
-  MachineModuleInfoMachO &getMachOMMI() const;
- */
-};
-
-}
-
-#endif
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 514c26b4daf0..138f0c262271 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -22,8 +22,8 @@
 
 namespace llvm {
 
-/// ARMFunctionInfo - This class is derived from MachineFunction private
-/// ARM target-specific information for each MachineFunction.
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
 class ARMFunctionInfo : public MachineFunctionInfo {
 
   /// isThumb - True if this function is compiled under Thumb mode.
@@ -79,15 +79,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   BitVector GPRCS2Frames;
   BitVector DPRCSFrames;
 
-  /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
-  ///
-  BitVector SpilledCSRegs;
-
   /// JumpTableUId - Unique id for jumptables.
   ///
   unsigned JumpTableUId;
 
-  unsigned ConstPoolEntryUId;
+  unsigned PICLabelUId;
 
   /// VarArgsFrameIndex - FrameIndex for start of varargs area.
   int VarArgsFrameIndex;
@@ -95,6 +91,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// HasITBlocks - True if IT blocks have been inserted.
   bool HasITBlocks;
 
+  /// CPEClones - Track constant pool entries clones created by Constant Island
+  /// pass.
+  DenseMap<unsigned, unsigned> CPEClones;
+
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -104,8 +104,8 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
-    JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
-    HasITBlocks(false) {}
+    JumpTableUId(0), PICLabelUId(0),
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -115,9 +115,8 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
-    SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
-    JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
-    HasITBlocks(false) {}
+    JumpTableUId(0), PICLabelUId(0),
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
 
   bool isThumbFunction() const { return isThumb; }
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -207,18 +206,6 @@ public:
     }
   }
 
-  void setCSRegisterIsSpilled(unsigned Reg) {
-    SpilledCSRegs.set(Reg);
-  }
-
-  bool isCSRegisterSpilled(unsigned Reg) const {
-    return SpilledCSRegs[Reg];
-  }
-
-  const BitVector &getSpilledCSRegisters() const {
-    return SpilledCSRegs;
-  }
-
   unsigned createJumpTableUId() {
     return JumpTableUId++;
   }
@@ -227,16 +214,16 @@ public:
     return JumpTableUId;
   }
 
-  void initConstPoolEntryUId(unsigned UId) {
-    ConstPoolEntryUId = UId;
+  void initPICLabelUId(unsigned UId) {
+    PICLabelUId = UId;
   }
 
-  unsigned getNumConstPoolEntries() const {
-    return ConstPoolEntryUId;
+  unsigned getNumPICLabels() const {
+    return PICLabelUId;
   }
 
-  unsigned createConstPoolEntryUId() {
-    return ConstPoolEntryUId++;
+  unsigned createPICLabelUId() {
+    return PICLabelUId++;
   }
 
   int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
@@ -244,6 +231,19 @@ public:
 
   bool hasITBlocks() const { return HasITBlocks; }
   void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+  void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+    if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+      assert(0 && "Duplicate entries!");
+  }
+
+  unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+    DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+    if (I != CPEClones.end())
+      return I->second;
+    else
+      return -1U;
+  }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index 5ff7c381bc51..edecc4b0240a 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -21,6566 +21,6566 @@
 
 // This table is 6561*4 = 26244 bytes in size.
 static const unsigned PerfectShuffleTable[6561+1] = {
-  135053414U,	// <0,0,0,0>: Cost 1 vdup0 LHS
-  1543503974U,	// <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
-  2618572962U,	// <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
-  2568054923U,	// <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
-  1476398390U,	// <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
-  2550140624U,	// <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
-  2550141434U,	// <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
-  2591945711U,	// <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
-  135053414U,	// <0,0,0,u>: Cost 1 vdup0 LHS
-  2886516736U,	// <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
-  1812775014U,	// <0,0,1,1>: Cost 2 vzipl LHS, LHS
-  1618133094U,	// <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
-  2625209292U,	// <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
-  2886558034U,	// <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
-  2617246864U,	// <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
-  3659723031U,	// <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
-  2591953904U,	// <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
-  1812775581U,	// <0,0,1,u>: Cost 2 vzipl LHS, LHS
-  3020734464U,	// <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
-  3020734474U,	// <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
-  1946992742U,	// <0,0,2,2>: Cost 2 vtrnl LHS, LHS
-  2631181989U,	// <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
-  3020734668U,	// <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
-  3826550569U,	// <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
-  2617247674U,	// <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
-  2591962097U,	// <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
-  1946992796U,	// <0,0,2,u>: Cost 2 vtrnl LHS, LHS
-  2635163787U,	// <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
-  2686419196U,	// <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
-  2686492933U,	// <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
-  2617248156U,	// <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
-  2617248258U,	// <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
-  3826551298U,	// <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
-  3690990200U,	// <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
-  3713551042U,	// <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
-  2635163787U,	// <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
-  2617248658U,	// <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
-  2888450150U,	// <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
-  3021570150U,	// <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
-  3641829519U,	// <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
-  3021570252U,	// <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
-  1543507254U,	// <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
-  2752810294U,	// <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
-  3786998152U,	// <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
-  1543507497U,	// <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
-  2684354972U,	// <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
-  2617249488U,	// <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
-  3765617070U,	// <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
-  3635865780U,	// <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
-  2617249734U,	// <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
-  2617249796U,	// <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
-  2718712274U,	// <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
-  2617249960U,	// <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
-  2720039396U,	// <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
-  2684355053U,	// <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
-  3963609190U,	// <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
-  2617250298U,	// <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
-  3796435464U,	// <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
-  3659762998U,	// <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
-  3659763810U,	// <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
-  2617250616U,	// <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
-  2657727309U,	// <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
-  2658390942U,	// <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
-  2659054575U,	// <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
-  3635880854U,	// <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
-  3635881401U,	// <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
-  3734787298U,	// <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
-  2617251174U,	// <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
-  3659772002U,	// <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
-  3659772189U,	// <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
-  2617251436U,	// <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
-  2659054575U,	// <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
-  135053414U,	// <0,0,u,0>: Cost 1 vdup0 LHS
-  1817419878U,	// <0,0,u,1>: Cost 2 vzipl LHS, LHS
-  1947435110U,	// <0,0,u,2>: Cost 2 vtrnl LHS, LHS
-  2568120467U,	// <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
-  1476463926U,	// <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
-  1543510170U,	// <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
-  2752813210U,	// <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
-  2592011255U,	// <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
-  135053414U,	// <0,0,u,u>: Cost 1 vdup0 LHS
-  2618581002U,	// <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
-  1557446758U,	// <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
-  2618581155U,	// <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
-  2690548468U,	// <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
-  2626543954U,	// <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
-  4094985216U,	// <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
-  2592019278U,	// <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
-  2592019448U,	// <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
-  1557447325U,	// <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
-  1476476938U,	// <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
-  2886517556U,	// <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
-  2886517654U,	// <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
-  2886517720U,	// <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
-  1476480310U,	// <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
-  2886558864U,	// <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
-  2550223354U,	// <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
-  2550223856U,	// <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
-  1476482862U,	// <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
-  1494401126U,	// <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
-  3020735284U,	// <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
-  2562172349U,	// <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
-  835584U,	// <0,1,2,3>: Cost 0 copy LHS
-  1494404406U,	// <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
-  3020735488U,	// <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
-  2631190458U,	// <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
-  1518294010U,	// <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
-  835584U,	// <0,1,2,u>: Cost 0 copy LHS
-  2692318156U,	// <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
-  2691875800U,	// <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
-  2691875806U,	// <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
-  2692539367U,	// <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
-  2562182454U,	// <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
-  2691875840U,	// <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
-  2692760578U,	// <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
-  2639817411U,	// <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
-  2691875863U,	// <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
-  2568159334U,	// <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
-  4095312692U,	// <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
-  2568160934U,	// <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
-  2568161432U,	// <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
-  2568162614U,	// <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
-  1557450038U,	// <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
-  2754235702U,	// <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
-  2592052220U,	// <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
-  1557450281U,	// <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
-  3765617775U,	// <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
-  2647781007U,	// <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
-  3704934138U,	// <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
-  2691875984U,	// <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
-  2657734598U,	// <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
-  2650435539U,	// <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
-  2651099172U,	// <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
-  2651762805U,	// <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
-  2691876029U,	// <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
-  2592063590U,	// <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
-  3765617871U,	// <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
-  2654417337U,	// <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
-  3765617889U,	// <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
-  2592066870U,	// <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
-  3765617907U,	// <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
-  2657071869U,	// <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
-  1583993678U,	// <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
-  1584657311U,	// <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
-  2657735672U,	// <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
-  2657735808U,	// <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
-  2631193772U,	// <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
-  2661053667U,	// <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
-  2657736038U,	// <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
-  3721524621U,	// <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
-  2657736158U,	// <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
-  2657736300U,	// <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
-  2657736322U,	// <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
-  1494450278U,	// <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
-  1557452590U,	// <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
-  2754238254U,	// <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
-  835584U,	// <0,1,u,3>: Cost 0 copy LHS
-  1494453558U,	// <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
-  1557452954U,	// <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
-  2754238618U,	// <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
-  1518343168U,	// <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
-  835584U,	// <0,1,u,u>: Cost 0 copy LHS
-  2752299008U,	// <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
-  1544847462U,	// <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
-  1678557286U,	// <0,2,0,2>: Cost 2 vuzpl LHS, LHS
-  2696521165U,	// <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
-  2752340172U,	// <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
-  2691876326U,	// <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
-  2618589695U,	// <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
-  2592093185U,	// <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
-  1678557340U,	// <0,2,0,u>: Cost 2 vuzpl LHS, LHS
-  2618589942U,	// <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
-  2752299828U,	// <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
-  2886518376U,	// <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
-  2752299766U,	// <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
-  2550295862U,	// <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
-  2752340992U,	// <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
-  2886559674U,	// <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
-  3934208106U,	// <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
-  2752340771U,	// <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
-  1476558868U,	// <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
-  2226628029U,	// <0,2,2,1>: Cost 3 vrev <2,0,1,2>
-  2752300648U,	// <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
-  3020736114U,	// <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
-  1476562230U,	// <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
-  2550304464U,	// <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
-  2618591162U,	// <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
-  2550305777U,	// <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
-  1476564782U,	// <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
-  2618591382U,	// <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
-  2752301206U,	// <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
-  3826043121U,	// <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
-  2752301468U,	// <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
-  2618591746U,	// <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
-  2752301570U,	// <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
-  3830688102U,	// <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
-  2698807012U,	// <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
-  2752301269U,	// <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
-  2562261094U,	// <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
-  4095313828U,	// <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
-  2226718152U,	// <0,2,4,2>: Cost 3 vrev <2,0,2,4>
-  2568235169U,	// <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
-  2562264374U,	// <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
-  1544850742U,	// <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
-  1678560566U,	// <0,2,4,6>: Cost 2 vuzpl LHS, RHS
-  2592125957U,	// <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
-  1678560584U,	// <0,2,4,u>: Cost 2 vuzpl LHS, RHS
-  2691876686U,	// <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
-  2618592976U,	// <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
-  3765618528U,	// <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
-  3765618536U,	// <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
-  2618593222U,	// <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
-  2752303108U,	// <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
-  2618593378U,	// <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
-  2824785206U,	// <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
-  2824785207U,	// <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
-  2752303950U,	// <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
-  3830690081U,	// <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
-  2618593786U,	// <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
-  2691876794U,	// <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
-  2752303990U,	// <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
-  3830690445U,	// <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
-  2752303928U,	// <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
-  2657743695U,	// <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
-  2691876839U,	// <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
-  2659070961U,	// <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
-  2659734594U,	// <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
-  3734140051U,	// <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
-  2701166596U,	// <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
-  2662389094U,	// <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
-  2662389126U,	// <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
-  3736794583U,	// <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
-  2752304748U,	// <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
-  2659070961U,	// <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
-  1476608026U,	// <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
-  1544853294U,	// <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
-  1678563118U,	// <0,2,u,2>: Cost 2 vuzpl LHS, LHS
-  3021178482U,	// <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
-  1476611382U,	// <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
-  1544853658U,	// <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
-  1678563482U,	// <0,2,u,6>: Cost 2 vuzpl LHS, RHS
-  2824785449U,	// <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
-  1678563172U,	// <0,2,u,u>: Cost 2 vuzpl LHS, LHS
-  2556329984U,	// <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
-  2686421142U,	// <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
-  2562303437U,	// <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
-  4094986652U,	// <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
-  2556333366U,	// <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
-  4094986754U,	// <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
-  3798796488U,	// <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
-  3776530634U,	// <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
-  2556335918U,	// <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
-  2886518934U,	// <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
-  2556338933U,	// <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
-  2691877105U,	// <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
-  2886519196U,	// <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
-  2886519298U,	// <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
-  4095740418U,	// <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
-  3659944242U,	// <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
-  3769600286U,	// <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
-  2886519582U,	// <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
-  1482604646U,	// <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
-  1482605302U,	// <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
-  2556348008U,	// <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
-  3020736924U,	// <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
-  1482607926U,	// <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
-  3020737026U,	// <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
-  2598154746U,	// <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
-  2598155258U,	// <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
-  1482610478U,	// <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
-  3692341398U,	// <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
-  2635851999U,	// <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
-  3636069840U,	// <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
-  2691877276U,	// <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
-  3961522690U,	// <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
-  3826797058U,	// <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
-  3703622282U,	// <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
-  3769600452U,	// <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
-  2640497430U,	// <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
-  3962194070U,	// <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
-  2232617112U,	// <0,3,4,1>: Cost 3 vrev <3,0,1,4>
-  2232690849U,	// <0,3,4,2>: Cost 3 vrev <3,0,2,4>
-  4095314332U,	// <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
-  3962194434U,	// <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
-  2691877378U,	// <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
-  3826765110U,	// <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
-  3665941518U,	// <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
-  2691877405U,	// <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
-  3630112870U,	// <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
-  3630113526U,	// <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
-  4035199734U,	// <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
-  3769600578U,	// <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
-  2232846516U,	// <0,3,5,4>: Cost 3 vrev <3,0,4,5>
-  3779037780U,	// <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
-  2718714461U,	// <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
-  2706106975U,	// <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
-  2233141464U,	// <0,3,5,u>: Cost 3 vrev <3,0,u,5>
-  2691877496U,	// <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
-  3727511914U,	// <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
-  3765619338U,	// <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
-  3765619347U,	// <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
-  3765987996U,	// <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
-  3306670270U,	// <0,3,6,5>: Cost 4 vrev <3,0,5,6>
-  3792456365U,	// <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
-  2706770608U,	// <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
-  2706844345U,	// <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
-  3769600707U,	// <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
-  2659742787U,	// <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
-  3636102612U,	// <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
-  3769600740U,	// <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
-  3769600747U,	// <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
-  3769600758U,	// <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
-  3659993400U,	// <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
-  3781176065U,	// <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
-  2664388218U,	// <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
-  1482653798U,	// <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
-  1482654460U,	// <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
-  2556397160U,	// <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
-  3021179292U,	// <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
-  1482657078U,	// <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
-  3021179394U,	// <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
-  2598203898U,	// <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
-  2708097874U,	// <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
-  1482659630U,	// <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
-  2617278468U,	// <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
-  2618605670U,	// <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
-  2618605734U,	// <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
-  3642091695U,	// <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
-  2753134796U,	// <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
-  2718714770U,	// <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
-  3021245750U,	// <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
-  3665982483U,	// <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
-  3021245768U,	// <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
-  2568355942U,	// <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
-  3692348212U,	// <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
-  3692348310U,	// <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
-  2568358064U,	// <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
-  2568359222U,	// <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
-  1812778294U,	// <0,4,1,5>: Cost 2 vzipl LHS, RHS
-  3022671158U,	// <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
-  2592248852U,	// <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
-  1812778537U,	// <0,4,1,u>: Cost 2 vzipl LHS, RHS
-  2568364134U,	// <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
-  2238573423U,	// <0,4,2,1>: Cost 3 vrev <4,0,1,2>
-  3692349032U,	// <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
-  2631214761U,	// <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
-  2568367414U,	// <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
-  2887028022U,	// <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
-  1946996022U,	// <0,4,2,6>: Cost 2 vtrnl LHS, RHS
-  2592257045U,	// <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
-  1946996040U,	// <0,4,2,u>: Cost 2 vtrnl LHS, RHS
-  3692349590U,	// <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
-  3826878614U,	// <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
-  3826878625U,	// <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
-  3692349852U,	// <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
-  3692349954U,	// <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
-  3826878978U,	// <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
-  4095200566U,	// <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
-  3713583814U,	// <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
-  3692350238U,	// <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
-  2550464552U,	// <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
-  3962194914U,	// <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
-  3693677631U,	// <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
-  3642124467U,	// <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
-  2718715088U,	// <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
-  2618608950U,	// <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
-  2753137974U,	// <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
-  3666015255U,	// <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
-  2618609193U,	// <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
-  2568388710U,	// <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
-  2568389526U,	// <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
-  3636159963U,	// <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
-  2568390836U,	// <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
-  2568391990U,	// <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
-  2718715180U,	// <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
-  1618136374U,	// <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
-  2592281624U,	// <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
-  1618136392U,	// <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
-  2550480938U,	// <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
-  3826880801U,	// <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
-  2562426332U,	// <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
-  3786190181U,	// <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
-  2718715252U,	// <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
-  3826881165U,	// <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
-  2712669568U,	// <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
-  2657760081U,	// <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
-  2718715284U,	// <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
-  3654090854U,	// <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
-  3934229326U,	// <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
-  3734156437U,	// <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
-  3734820070U,	// <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
-  3654094134U,	// <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
-  2713259464U,	// <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
-  2713333201U,	// <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
-  3654095866U,	// <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
-  2713259464U,	// <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
-  2568413286U,	// <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
-  2618611502U,	// <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
-  2753140526U,	// <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
-  2568415415U,	// <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
-  2568416566U,	// <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
-  1817423158U,	// <0,4,u,5>: Cost 2 vzipl LHS, RHS
-  1947438390U,	// <0,4,u,6>: Cost 2 vtrnl LHS, RHS
-  2592306203U,	// <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
-  1947438408U,	// <0,4,u,u>: Cost 2 vtrnl LHS, RHS
-  3630219264U,	// <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
-  2625912934U,	// <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
-  3692355748U,	// <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
-  3693019384U,	// <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
-  3630222646U,	// <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
-  3699655062U,	// <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
-  2718715508U,	// <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
-  3087011126U,	// <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
-  2625913501U,	// <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
-  1500659814U,	// <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
-  2886520528U,	// <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
-  2574403176U,	// <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
-  2574403734U,	// <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
-  1500662674U,	// <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
-  2886520836U,	// <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
-  2886520930U,	// <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
-  2718715600U,	// <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
-  1500665646U,	// <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
-  2556493926U,	// <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
-  2244546120U,	// <0,5,2,1>: Cost 3 vrev <5,0,1,2>
-  3692357256U,	// <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
-  2568439994U,	// <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
-  2556497206U,	// <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
-  3020738564U,	// <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
-  4027877161U,	// <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
-  3093220662U,	// <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
-  3093220663U,	// <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
-  3699656854U,	// <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
-  3699656927U,	// <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
-  3699657006U,	// <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
-  3699657116U,	// <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
-  2637859284U,	// <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
-  3790319453U,	// <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
-  3699657354U,	// <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
-  2716725103U,	// <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
-  2716798840U,	// <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
-  2661747602U,	// <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
-  3630252810U,	// <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
-  3636225507U,	// <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
-  3716910172U,	// <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
-  3962195892U,	// <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
-  2625916214U,	// <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
-  3718901071U,	// <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
-  2718715846U,	// <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
-  2625916457U,	// <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
-  3791278034U,	// <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
-  3791351771U,	// <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
-  3318386260U,	// <0,5,5,2>: Cost 4 vrev <5,0,2,5>
-  3791499245U,	// <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
-  3318533734U,	// <0,5,5,4>: Cost 4 vrev <5,0,4,5>
-  2718715908U,	// <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
-  2657767522U,	// <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
-  2718715928U,	// <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
-  2718715937U,	// <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
-  2592358502U,	// <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
-  3792015404U,	// <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
-  3731509754U,	// <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
-  3785748546U,	// <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
-  2592361782U,	// <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
-  2592362594U,	// <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
-  3785748576U,	// <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
-  1644974178U,	// <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
-  1645047915U,	// <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
-  2562506854U,	// <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
-  2562507670U,	// <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
-  2562508262U,	// <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
-  3636250774U,	// <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
-  2562510134U,	// <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
-  2718716072U,	// <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
-  2718716074U,	// <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
-  2719379635U,	// <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
-  2562512686U,	// <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
-  1500717158U,	// <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
-  2625918766U,	// <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
-  2719674583U,	// <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
-  2568489152U,	// <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
-  1500720025U,	// <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
-  2625919130U,	// <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
-  2586407243U,	// <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
-  1646301444U,	// <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
-  1646375181U,	// <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
-  2586411110U,	// <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
-  2619949158U,	// <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
-  2619949220U,	// <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
-  3785748789U,	// <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
-  2619949386U,	// <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
-  2586415202U,	// <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
-  2586415436U,	// <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
-  2952793398U,	// <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
-  2619949725U,	// <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
-  2562531430U,	// <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
-  3693691700U,	// <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
-  2886521338U,	// <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
-  3693691864U,	// <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
-  2562534710U,	// <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
-  2580450932U,	// <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
-  2886521656U,	// <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
-  2966736182U,	// <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
-  2966736183U,	// <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
-  1500741734U,	// <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
-  2250518817U,	// <0,6,2,1>: Cost 3 vrev <6,0,1,2>
-  2574485096U,	// <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
-  2631894694U,	// <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
-  1500744604U,	// <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
-  2574487248U,	// <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
-  3020739384U,	// <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
-  2954136886U,	// <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
-  1500747566U,	// <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
-  3693693078U,	// <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
-  3705637136U,	// <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
-  3705637192U,	// <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
-  3693693340U,	// <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
-  2637867477U,	// <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
-  3705637424U,	// <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
-  3666154056U,	// <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
-  2722697800U,	// <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
-  2722771537U,	// <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
-  2562556006U,	// <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
-  4095316257U,	// <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
-  2562557420U,	// <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
-  3636299926U,	// <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
-  2562559286U,	// <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
-  2619952438U,	// <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
-  2723287696U,	// <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
-  4027895094U,	// <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
-  2619952681U,	// <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
-  2718716594U,	// <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
-  3648250774U,	// <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
-  3792458436U,	// <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
-  3705638767U,	// <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
-  3648252831U,	// <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
-  3797619416U,	// <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
-  3792458472U,	// <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
-  4035202358U,	// <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
-  2718716594U,	// <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
-  3786412796U,	// <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
-  3792458504U,	// <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
-  3728200126U,	// <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
-  3798135575U,	// <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
-  3786412836U,	// <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
-  3792458543U,	// <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
-  2718716728U,	// <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
-  2718716738U,	// <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
-  2718716747U,	// <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
-  2718716750U,	// <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
-  2724909910U,	// <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
-  3636323823U,	// <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
-  2725057384U,	// <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
-  2718716790U,	// <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
-  2718716800U,	// <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
-  3792458629U,	// <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
-  2725352332U,	// <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
-  2718716822U,	// <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
-  1500790886U,	// <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
-  2619954990U,	// <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
-  2562590192U,	// <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
-  2725721017U,	// <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
-  1500793762U,	// <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
-  2619955354U,	// <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
-  2725942228U,	// <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
-  2954186038U,	// <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
-  1500796718U,	// <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
-  2256401391U,	// <0,7,0,0>: Cost 3 vrev <7,0,0,0>
-  2632564838U,	// <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
-  2256548865U,	// <0,7,0,2>: Cost 3 vrev <7,0,2,0>
-  3700998396U,	// <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
-  2718716952U,	// <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
-  2718716962U,	// <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
-  2621284845U,	// <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
-  3904685542U,	// <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
-  2632565405U,	// <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
-  2256409584U,	// <0,7,1,0>: Cost 3 vrev <7,0,0,1>
-  3706307380U,	// <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
-  2632565654U,	// <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
-  3769603168U,	// <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
-  2256704532U,	// <0,7,1,4>: Cost 3 vrev <7,0,4,1>
-  3769603184U,	// <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
-  3700999366U,	// <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
-  2886522476U,	// <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
-  2256999480U,	// <0,7,1,u>: Cost 3 vrev <7,0,u,1>
-  2586501222U,	// <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
-  1182749690U,	// <0,7,2,1>: Cost 2 vrev <7,0,1,2>
-  3636356595U,	// <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
-  2727711916U,	// <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
-  2586504502U,	// <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
-  2632566606U,	// <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
-  2586505559U,	// <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
-  3020740204U,	// <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
-  1183265849U,	// <0,7,2,u>: Cost 2 vrev <7,0,u,2>
-  3701000342U,	// <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
-  3706308849U,	// <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
-  3330315268U,	// <0,7,3,2>: Cost 4 vrev <7,0,2,3>
-  3706309020U,	// <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
-  3706309122U,	// <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
-  3712281127U,	// <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
-  2639202936U,	// <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
-  3802412321U,	// <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
-  2640530202U,	// <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
-  3654287462U,	// <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
-  2256507900U,	// <0,7,4,1>: Cost 3 vrev <7,0,1,4>
-  2256581637U,	// <0,7,4,2>: Cost 3 vrev <7,0,2,4>
-  3660262008U,	// <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
-  3786413405U,	// <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
-  2632568118U,	// <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
-  3718917457U,	// <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
-  3787003255U,	// <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
-  2632568361U,	// <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
-  3706310268U,	// <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
-  3792459156U,	// <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
-  3330331654U,	// <0,7,5,2>: Cost 4 vrev <7,0,2,5>
-  3722899255U,	// <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
-  2256737304U,	// <0,7,5,4>: Cost 3 vrev <7,0,4,5>
-  3724226521U,	// <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
-  2718717377U,	// <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
-  2729997763U,	// <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
-  2720044499U,	// <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
-  3712946517U,	// <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
-  2256524286U,	// <0,7,6,1>: Cost 3 vrev <7,0,1,6>
-  3792459246U,	// <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
-  3796440567U,	// <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
-  3654307126U,	// <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
-  2656457394U,	// <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
-  3792459281U,	// <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
-  2730661396U,	// <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
-  2658448293U,	// <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
-  3787003431U,	// <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
-  3654312854U,	// <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
-  3654313446U,	// <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
-  3804771905U,	// <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
-  3654315318U,	// <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
-  3654315651U,	// <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
-  3660288348U,	// <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
-  2718717548U,	// <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
-  2664420990U,	// <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
-  2256466935U,	// <0,7,u,0>: Cost 3 vrev <7,0,0,u>
-  1182798848U,	// <0,7,u,1>: Cost 2 vrev <7,0,1,u>
-  2256614409U,	// <0,7,u,2>: Cost 3 vrev <7,0,2,u>
-  2731693714U,	// <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
-  2256761883U,	// <0,7,u,4>: Cost 3 vrev <7,0,4,u>
-  2632571034U,	// <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
-  2669066421U,	// <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
-  2731988662U,	// <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
-  1183315007U,	// <0,7,u,u>: Cost 2 vrev <7,0,u,u>
-  135053414U,	// <0,u,0,0>: Cost 1 vdup0 LHS
-  1544896614U,	// <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
-  1678999654U,	// <0,u,0,2>: Cost 2 vuzpl LHS, LHS
-  2691880677U,	// <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
-  1476988214U,	// <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
-  2718791419U,	// <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
-  3021248666U,	// <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
-  2592535607U,	// <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
-  135053414U,	// <0,u,0,u>: Cost 1 vdup0 LHS
-  1476993097U,	// <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
-  1812780846U,	// <0,u,1,1>: Cost 2 vzipl LHS, LHS
-  1618138926U,	// <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
-  2752742134U,	// <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
-  1476996406U,	// <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
-  1812781210U,	// <0,u,1,5>: Cost 2 vzipl LHS, RHS
-  2887006416U,	// <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
-  2966736200U,	// <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
-  1812781413U,	// <0,u,1,u>: Cost 2 vzipl LHS, LHS
-  1482973286U,	// <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
-  1482973987U,	// <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
-  1946998574U,	// <0,u,2,2>: Cost 2 vtrnl LHS, LHS
-  835584U,	// <0,u,2,3>: Cost 0 copy LHS
-  1482976566U,	// <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
-  3020781631U,	// <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
-  1946998938U,	// <0,u,2,6>: Cost 2 vtrnl LHS, RHS
-  1518810169U,	// <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
-  835584U,	// <0,u,2,u>: Cost 0 copy LHS
-  2618640534U,	// <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
-  2752743574U,	// <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
-  2636556597U,	// <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
-  2752743836U,	// <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
-  2618640898U,	// <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
-  2752743938U,	// <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
-  2639202936U,	// <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
-  2639874762U,	// <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
-  2752743637U,	// <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
-  2562703462U,	// <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
-  2888455982U,	// <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
-  3021575982U,	// <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
-  2568677591U,	// <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
-  2562706742U,	// <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
-  1544899894U,	// <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
-  1679002934U,	// <0,u,4,6>: Cost 2 vuzpl LHS, RHS
-  2718718033U,	// <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
-  1679002952U,	// <0,u,4,u>: Cost 2 vuzpl LHS, RHS
-  2568683622U,	// <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
-  2568684438U,	// <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
-  3765622902U,	// <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
-  2691881087U,	// <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
-  2568686902U,	// <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
-  2650492890U,	// <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
-  1618139290U,	// <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
-  2824834358U,	// <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
-  1618139308U,	// <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
-  2592579686U,	// <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
-  2262496983U,	// <0,u,6,1>: Cost 3 vrev <u,0,1,6>
-  2654474688U,	// <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
-  2691881168U,	// <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
-  2592582966U,	// <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
-  2656465587U,	// <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
-  2657129220U,	// <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
-  1584051029U,	// <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
-  1584714662U,	// <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
-  2562728038U,	// <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
-  2562728854U,	// <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
-  2562729473U,	// <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
-  2661111018U,	// <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
-  2562731318U,	// <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
-  2718718258U,	// <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
-  2586620261U,	// <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
-  2657793644U,	// <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
-  2562733870U,	// <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
-  135053414U,	// <0,u,u,0>: Cost 1 vdup0 LHS
-  1544902446U,	// <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
-  1679005486U,	// <0,u,u,2>: Cost 2 vuzpl LHS, LHS
-  835584U,	// <0,u,u,3>: Cost 0 copy LHS
-  1483025718U,	// <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
-  1544902810U,	// <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
-  1679005850U,	// <0,u,u,6>: Cost 2 vuzpl LHS, RHS
-  1518859327U,	// <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
-  835584U,	// <0,u,u,u>: Cost 0 copy LHS
-  2689744896U,	// <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
-  1610694666U,	// <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
-  2689744916U,	// <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
-  2619310332U,	// <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
-  2684657701U,	// <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
-  2620637598U,	// <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
-  3708977654U,	// <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
-  3666351168U,	// <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
-  1611210825U,	// <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
-  2556780646U,	// <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
-  2556781355U,	// <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
-  1616003174U,	// <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
-  3693052888U,	// <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
-  2556783926U,	// <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
-  2580672143U,	// <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
-  2724839566U,	// <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
-  3654415354U,	// <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
-  1616003228U,	// <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
-  2685690019U,	// <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
-  2685763756U,	// <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
-  2698297524U,	// <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
-  2685911230U,	// <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
-  2689745100U,	// <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
-  3764814038U,	// <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
-  2724839640U,	// <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
-  2592625658U,	// <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
-  2686279915U,	// <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
-  3087843328U,	// <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
-  3087843338U,	// <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
-  67944550U,	// <1,0,3,2>: Cost 1 vrev LHS
-  2568743135U,	// <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
-  2562772278U,	// <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
-  4099850454U,	// <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
-  3704998538U,	// <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
-  2592633923U,	// <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
-  68386972U,	// <1,0,3,u>: Cost 1 vrev LHS
-  2620640146U,	// <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
-  2689745234U,	// <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
-  2689745244U,	// <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
-  3760980320U,	// <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
-  3761054057U,	// <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
-  2619313462U,	// <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
-  3761201531U,	// <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
-  3666383940U,	// <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
-  2619313705U,	// <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
-  4029300736U,	// <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
-  2895249510U,	// <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
-  3028287590U,	// <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
-  3642501345U,	// <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
-  2215592058U,	// <1,0,5,4>: Cost 3 vrev <0,1,4,5>
-  3724242907U,	// <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
-  3724906540U,	// <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
-  3911118134U,	// <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
-  3028287644U,	// <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
-  3762086375U,	// <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
-  2698297846U,	// <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
-  3760022015U,	// <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
-  3642509538U,	// <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
-  3762381323U,	// <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
-  3730215604U,	// <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
-  3730879237U,	// <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
-  2657801046U,	// <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
-  2658464679U,	// <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
-  2659128312U,	// <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
-  4047898278U,	// <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
-  2215460970U,	// <1,0,7,2>: Cost 3 vrev <0,1,2,7>
-  3734861035U,	// <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
-  3731543398U,	// <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
-  3736188301U,	// <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
-  2663110110U,	// <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
-  3731543660U,	// <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
-  2664437376U,	// <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
-  3087884288U,	// <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
-  1616003730U,	// <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
-  67985515U,	// <1,0,u,2>: Cost 1 vrev LHS
-  2689893028U,	// <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
-  2689745586U,	// <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
-  2619316378U,	// <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
-  2669082807U,	// <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
-  2592674888U,	// <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
-  68427937U,	// <1,0,u,u>: Cost 1 vrev LHS
-  1543585802U,	// <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
-  1548894310U,	// <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
-  2618654892U,	// <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
-  2689745654U,	// <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
-  2622636370U,	// <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
-  2620645791U,	// <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
-  3696378367U,	// <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
-  3666424905U,	// <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
-  1548894866U,	// <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
-  1483112550U,	// <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
-  202162278U,	// <1,1,1,1>: Cost 1 vdup1 LHS
-  2622636950U,	// <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
-  2622637016U,	// <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
-  1483115830U,	// <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
-  2622637200U,	// <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
-  2622637263U,	// <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
-  2592691274U,	// <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
-  202162278U,	// <1,1,1,u>: Cost 1 vdup1 LHS
-  2550890588U,	// <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
-  2617329183U,	// <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
-  2622637672U,	// <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
-  2622637734U,	// <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
-  2550893878U,	// <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
-  3696379744U,	// <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
-  2622638010U,	// <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
-  3804554170U,	// <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
-  2622638139U,	// <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
-  2622638230U,	// <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
-  3087844148U,	// <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
-  4161585244U,	// <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
-  2014101606U,	// <1,1,3,3>: Cost 2 vtrnr LHS, LHS
-  2622638594U,	// <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
-  2689745920U,	// <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
-  3763487753U,	// <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
-  2592707660U,	// <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
-  2014101611U,	// <1,1,3,u>: Cost 2 vtrnr LHS, LHS
-  2556878950U,	// <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
-  2221335351U,	// <1,1,4,1>: Cost 3 vrev <1,1,1,4>
-  3696380988U,	// <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
-  3763487805U,	// <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
-  2556882230U,	// <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
-  1548897590U,	// <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
-  2758184246U,	// <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
-  3666457677U,	// <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
-  1548897833U,	// <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
-  2693653615U,	// <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
-  2617331408U,	// <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
-  4029302934U,	// <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
-  2689746064U,	// <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
-  2221564755U,	// <1,1,5,4>: Cost 3 vrev <1,1,4,5>
-  2955559250U,	// <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
-  2617331810U,	// <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
-  2825293110U,	// <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
-  2689746109U,	// <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
-  3696382241U,	// <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
-  2689746127U,	// <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
-  2617332218U,	// <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
-  3763487969U,	// <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
-  3696382605U,	// <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
-  4029309266U,	// <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
-  2617332536U,	// <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
-  2724840702U,	// <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
-  2725504263U,	// <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
-  2617332720U,	// <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
-  2659800138U,	// <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
-  3691074717U,	// <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
-  4167811174U,	// <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
-  2617333094U,	// <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
-  3295396702U,	// <1,1,7,5>: Cost 4 vrev <1,1,5,7>
-  3803891014U,	// <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
-  2617333356U,	// <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
-  2659800138U,	// <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
-  1483112550U,	// <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
-  202162278U,	// <1,1,u,1>: Cost 1 vdup1 LHS
-  2622642056U,	// <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
-  2014142566U,	// <1,1,u,3>: Cost 2 vtrnr LHS, LHS
-  1483115830U,	// <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
-  1548900506U,	// <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
-  2622642384U,	// <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
-  2825293353U,	// <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
-  202162278U,	// <1,1,u,u>: Cost 1 vdup1 LHS
-  2635251712U,	// <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
-  1561509990U,	// <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
-  2618663085U,	// <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
-  2696529358U,	// <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
-  2635252050U,	// <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
-  3769533926U,	// <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
-  2621317617U,	// <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
-  2659140170U,	// <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
-  1561510557U,	// <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
-  2623308516U,	// <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
-  2635252532U,	// <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
-  2631271318U,	// <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
-  2958180454U,	// <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
-  2550959414U,	// <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
-  2635252880U,	// <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
-  2635252952U,	// <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
-  3732882731U,	// <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
-  2958180459U,	// <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
-  2629281213U,	// <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
-  2635253280U,	// <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
-  2618664552U,	// <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
-  2689746546U,	// <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
-  3764815485U,	// <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
-  3760023176U,	// <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
-  2635253690U,	// <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
-  2659141610U,	// <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
-  2689746591U,	// <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
-  403488870U,	// <1,2,3,0>: Cost 1 vext1 LHS, LHS
-  1477231350U,	// <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  1477232232U,	// <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
-  1477233052U,	// <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
-  403492150U,	// <1,2,3,4>: Cost 1 vext1 LHS, RHS
-  1525010128U,	// <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
-  1525010938U,	// <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1525011450U,	// <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  403494702U,	// <1,2,3,u>: Cost 1 vext1 LHS, LHS
-  2641226607U,	// <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
-  3624723446U,	// <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
-  3301123609U,	// <1,2,4,2>: Cost 4 vrev <2,1,2,4>
-  2598759198U,	// <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
-  2659142864U,	// <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
-  1561513270U,	// <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
-  2659143028U,	// <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
-  2659143112U,	// <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
-  1561513513U,	// <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
-  2550988902U,	// <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
-  2550989824U,	// <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
-  3624732264U,	// <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
-  2955559014U,	// <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
-  2550992182U,	// <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
-  2659143684U,	// <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
-  2659143778U,	// <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
-  2659143848U,	// <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
-  2550994734U,	// <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
-  2700289945U,	// <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
-  2635256232U,	// <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
-  2659144186U,	// <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
-  2689746874U,	// <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
-  3763488705U,	// <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
-  3763488716U,	// <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
-  2659144504U,	// <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
-  2657817432U,	// <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
-  2689746919U,	// <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
-  1585402874U,	// <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
-  2659144770U,	// <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
-  3708998858U,	// <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
-  2635257059U,	// <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
-  2659145062U,	// <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
-  3732886916U,	// <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
-  3732886998U,	// <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
-  2659145255U,	// <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
-  1590711938U,	// <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
-  403529835U,	// <1,2,u,0>: Cost 1 vext1 LHS, LHS
-  1477272310U,	// <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  1477273192U,	// <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
-  1477273750U,	// <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
-  403533110U,	// <1,2,u,4>: Cost 1 vext1 LHS, RHS
-  1561516186U,	// <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
-  1525051898U,	// <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1525052410U,	// <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  403535662U,	// <1,2,u,u>: Cost 1 vext1 LHS, LHS
-  2819407872U,	// <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
-  1551564902U,	// <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
-  2819408630U,	// <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
-  2619334911U,	// <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
-  2625306962U,	// <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
-  3832725879U,	// <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
-  3699048959U,	// <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
-  3776538827U,	// <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
-  1551565469U,	// <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
-  2618671862U,	// <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
-  2819408692U,	// <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
-  2624643975U,	// <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
-  1745666150U,	// <1,3,1,3>: Cost 2 vuzpr LHS, LHS
-  2557005110U,	// <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
-  2625307792U,	// <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
-  3698386127U,	// <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
-  2592838748U,	// <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
-  1745666155U,	// <1,3,1,u>: Cost 2 vuzpr LHS, LHS
-  2819408790U,	// <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
-  2625308193U,	// <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
-  2819408036U,	// <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
-  2819851890U,	// <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
-  2819408794U,	// <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
-  3893149890U,	// <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
-  2819408076U,	// <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
-  3772041583U,	// <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
-  2819408042U,	// <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
-  1483276390U,	// <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
-  1483277128U,	// <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
-  2557019752U,	// <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
-  2819408856U,	// <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
-  1483279670U,	// <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
-  2819409614U,	// <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
-  2598826490U,	// <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
-  3087844352U,	// <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
-  1483282222U,	// <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
-  2568970342U,	// <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
-  2568971224U,	// <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
-  3832761290U,	// <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
-  2233428219U,	// <1,3,4,3>: Cost 3 vrev <3,1,3,4>
-  2568973622U,	// <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
-  1551568182U,	// <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
-  2819410434U,	// <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
-  3666605151U,	// <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
-  1551568425U,	// <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
-  2563006566U,	// <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
-  2568979456U,	// <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
-  2563008035U,	// <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
-  2233436412U,	// <1,3,5,3>: Cost 3 vrev <3,1,3,5>
-  2563009846U,	// <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
-  2867187716U,	// <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
-  2655834214U,	// <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
-  1745669430U,	// <1,3,5,7>: Cost 2 vuzpr LHS, RHS
-  1745669431U,	// <1,3,5,u>: Cost 2 vuzpr LHS, RHS
-  2867187810U,	// <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
-  3699052931U,	// <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
-  2654507460U,	// <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
-  3766291091U,	// <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
-  2655834726U,	// <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
-  3923384562U,	// <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
-  2657161992U,	// <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
-  2819852218U,	// <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
-  2819852219U,	// <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
-  2706926275U,	// <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
-  2659816524U,	// <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
-  3636766245U,	// <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
-  2867187903U,	// <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
-  2625312102U,	// <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
-  2867188598U,	// <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
-  3728250344U,	// <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
-  2867187880U,	// <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
-  2707516171U,	// <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
-  1483317350U,	// <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
-  1483318093U,	// <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
-  2819410718U,	// <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
-  1745666717U,	// <1,3,u,3>: Cost 2 vuzpr LHS, LHS
-  1483320630U,	// <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
-  1551571098U,	// <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
-  2819410758U,	// <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
-  1745669673U,	// <1,3,u,7>: Cost 2 vuzpr LHS, RHS
-  1745666722U,	// <1,3,u,u>: Cost 2 vuzpr LHS, LHS
-  2617352205U,	// <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
-  2619342950U,	// <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
-  3692421295U,	// <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
-  2619343104U,	// <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
-  2617352530U,	// <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
-  1634880402U,	// <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
-  2713930652U,	// <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
-  3732898396U,	// <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
-  1635101613U,	// <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
-  3693085430U,	// <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
-  2623988535U,	// <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
-  3693085590U,	// <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
-  3692422134U,	// <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
-  3693085726U,	// <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
-  2892401974U,	// <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
-  3026619702U,	// <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
-  3800206324U,	// <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
-  2892402217U,	// <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
-  3966978927U,	// <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
-  3966979018U,	// <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
-  3693086312U,	// <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
-  2635269798U,	// <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
-  3966979280U,	// <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
-  2893204790U,	// <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
-  3693086650U,	// <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
-  3666662502U,	// <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
-  2893205033U,	// <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
-  2563063910U,	// <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
-  2563064730U,	// <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
-  2563065386U,	// <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
-  3693087132U,	// <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
-  2619345410U,	// <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
-  3087843666U,	// <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
-  3087843676U,	// <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
-  3666670695U,	// <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
-  3087843669U,	// <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
-  2620672914U,	// <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
-  3630842706U,	// <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
-  3313069003U,	// <1,4,4,2>: Cost 4 vrev <4,1,2,4>
-  3642788100U,	// <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
-  2713930960U,	// <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
-  2619346230U,	// <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
-  2713930980U,	// <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
-  3736882642U,	// <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
-  2619346473U,	// <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
-  2557108326U,	// <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
-  2557109075U,	// <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
-  2598913774U,	// <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
-  3630852246U,	// <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
-  2557111606U,	// <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
-  2895252790U,	// <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
-  1616006454U,	// <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  3899059510U,	// <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
-  1616006472U,	// <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
-  2557116518U,	// <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
-  2557117236U,	// <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
-  3630859880U,	// <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
-  2569062550U,	// <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
-  2557119798U,	// <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
-  3763490174U,	// <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
-  3763490183U,	// <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
-  2712751498U,	// <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
-  2557122350U,	// <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
-  2659161084U,	// <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
-  3732903040U,	// <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
-  3734230174U,	// <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
-  3734893807U,	// <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
-  3660729654U,	// <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
-  3786493384U,	// <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
-  2713341394U,	// <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
-  3660731386U,	// <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
-  2664470148U,	// <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
-  2557132902U,	// <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
-  2619348782U,	// <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
-  2563106351U,	// <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
-  2713783816U,	// <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
-  2622666815U,	// <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
-  1640189466U,	// <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
-  1616006697U,	// <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  2712751498U,	// <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
-  1616006715U,	// <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
-  2620014592U,	// <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
-  1546272870U,	// <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
-  2618687664U,	// <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
-  3693093120U,	// <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
-  1546273106U,	// <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
-  2620678563U,	// <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
-  2714668660U,	// <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
-  3772042877U,	// <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
-  1546273437U,	// <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
-  2620015350U,	// <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
-  2620015412U,	// <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
-  2620015510U,	// <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
-  2618688512U,	// <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
-  2620015677U,	// <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
-  2620015727U,	// <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
-  2620015859U,	// <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
-  3093728566U,	// <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
-  2620015981U,	// <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
-  3692430816U,	// <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
-  2620016163U,	// <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
-  2620016232U,	// <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
-  2620016294U,	// <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
-  3693758221U,	// <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
-  3692431209U,	// <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
-  2620016570U,	// <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
-  4173598006U,	// <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
-  2620016699U,	// <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
-  2620016790U,	// <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
-  2569110672U,	// <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
-  3693758785U,	// <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
-  2620017052U,	// <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
-  2620017154U,	// <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
-  3135623172U,	// <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
-  4161587048U,	// <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
-  2014104886U,	// <1,5,3,7>: Cost 2 vtrnr LHS, RHS
-  2014104887U,	// <1,5,3,u>: Cost 2 vtrnr LHS, RHS
-  2620017554U,	// <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
-  2620017634U,	// <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
-  3693759551U,	// <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
-  3642861837U,	// <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
-  2575092710U,	// <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
-  1546276150U,	// <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
-  2759855414U,	// <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
-  2713931718U,	// <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
-  1546276393U,	// <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
-  2557182054U,	// <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
-  2557182812U,	// <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
-  3630925347U,	// <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
-  4029301675U,	// <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
-  2557185334U,	// <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
-  2713931780U,	// <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
-  2667794530U,	// <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
-  2713931800U,	// <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
-  2557187886U,	// <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
-  2718208036U,	// <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
-  2620019115U,	// <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
-  2667794938U,	// <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
-  3787673666U,	// <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
-  3693761165U,	// <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
-  3319279297U,	// <1,5,6,5>: Cost 4 vrev <5,1,5,6>
-  2667795256U,	// <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
-  2713931874U,	// <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
-  2713931883U,	// <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
-  2557198438U,	// <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
-  2557199156U,	// <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
-  2569143974U,	// <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
-  2569144592U,	// <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
-  2557201718U,	// <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
-  2713931944U,	// <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
-  3787673770U,	// <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
-  2719387828U,	// <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
-  2557204270U,	// <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
-  2620020435U,	// <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
-  1546278702U,	// <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
-  2620020616U,	// <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
-  2620020668U,	// <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
-  1594054682U,	// <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
-  1546279066U,	// <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
-  2620020944U,	// <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
-  2014145846U,	// <1,5,u,7>: Cost 2 vtrnr LHS, RHS
-  2014145847U,	// <1,5,u,u>: Cost 2 vtrnr LHS, RHS
-  3692437504U,	// <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
-  2618695782U,	// <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
-  2618695857U,	// <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
-  3794161970U,	// <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
-  2620023122U,	// <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
-  2620686756U,	// <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
-  2621350389U,	// <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
-  4028599606U,	// <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
-  2618696349U,	// <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
-  3692438262U,	// <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
-  2625995572U,	// <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
-  3692438422U,	// <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
-  3692438488U,	// <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
-  2625995820U,	// <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
-  3692438672U,	// <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
-  3692438720U,	// <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
-  2958183734U,	// <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
-  2958183735U,	// <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
-  2721526201U,	// <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
-  3692439097U,	// <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
-  3692439144U,	// <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
-  3692439206U,	// <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
-  3636948278U,	// <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
-  3787674092U,	// <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
-  2618697658U,	// <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
-  2970799414U,	// <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
-  2970799415U,	// <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
-  2563211366U,	// <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
-  3699738854U,	// <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
-  2563212860U,	// <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
-  3692439964U,	// <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
-  2563214646U,	// <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
-  4191820018U,	// <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
-  2587103648U,	// <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
-  3087845306U,	// <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
-  3087845307U,	// <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
-  3693767570U,	// <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
-  3693767650U,	// <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
-  3636962877U,	// <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
-  3325088134U,	// <1,6,4,3>: Cost 4 vrev <6,1,3,4>
-  3693767898U,	// <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
-  2618699062U,	// <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
-  3833670966U,	// <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
-  4028632374U,	// <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
-  2618699305U,	// <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
-  3693768264U,	// <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
-  3630998373U,	// <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
-  3636971070U,	// <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
-  3642943767U,	// <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
-  3693768628U,	// <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
-  3732918276U,	// <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
-  2620690530U,	// <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
-  2955562294U,	// <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
-  2955562295U,	// <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
-  2724180733U,	// <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
-  3631006566U,	// <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
-  3631007674U,	// <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
-  3692442184U,	// <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
-  3631009078U,	// <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
-  3787674416U,	// <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
-  2713932600U,	// <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
-  2713932610U,	// <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
-  2713932619U,	// <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
-  1651102542U,	// <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
-  2724918103U,	// <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
-  2698302306U,	// <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
-  3642960153U,	// <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
-  2713932662U,	// <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
-  2725213051U,	// <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
-  2724844426U,	// <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
-  4035956022U,	// <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
-  1651692438U,	// <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
-  1651766175U,	// <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
-  2618701614U,	// <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
-  3135663508U,	// <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
-  3692443580U,	// <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
-  2713932743U,	// <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
-  2618701978U,	// <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
-  2622683344U,	// <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
-  3087886266U,	// <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
-  1652356071U,	// <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
-  2726171632U,	// <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
-  2626666598U,	// <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
-  3695100067U,	// <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
-  3707044102U,	// <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
-  2726466580U,	// <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
-  3654921933U,	// <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
-  2621358582U,	// <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
-  2622022215U,	// <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
-  2626667165U,	// <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
-  2593128550U,	// <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
-  2626667316U,	// <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
-  3700409238U,	// <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
-  2257294428U,	// <1,7,1,3>: Cost 3 vrev <7,1,3,1>
-  2593131830U,	// <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
-  2626667646U,	// <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
-  2627331279U,	// <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
-  2593133696U,	// <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
-  2628658545U,	// <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
-  2587164774U,	// <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
-  3701073445U,	// <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
-  3700409960U,	// <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
-  2638612134U,	// <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
-  2587168054U,	// <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
-  3706382167U,	// <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
-  2587169192U,	// <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
-  3660911610U,	// <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
-  2587170606U,	// <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
-  1507459174U,	// <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
-  2569257984U,	// <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
-  2581202536U,	// <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
-  2569259294U,	// <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
-  1507462454U,	// <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
-  1507462864U,	// <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
-  2581205498U,	// <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
-  2581206010U,	// <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
-  1507465006U,	// <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
-  2728826164U,	// <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
-  3654951732U,	// <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
-  3330987094U,	// <1,7,4,2>: Cost 4 vrev <7,1,2,4>
-  3331060831U,	// <1,7,4,3>: Cost 4 vrev <7,1,3,4>
-  3787674971U,	// <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
-  2626669878U,	// <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
-  3785979241U,	// <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
-  3787085176U,	// <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
-  2626670121U,	// <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
-  2569273446U,	// <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
-  2569274368U,	// <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
-  3643016808U,	// <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
-  2569275680U,	// <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
-  2569276726U,	// <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
-  4102034790U,	// <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
-  2651222067U,	// <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
-  3899378998U,	// <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
-  2569279278U,	// <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
-  2730153430U,	// <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
-  2724845022U,	// <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
-  3643025338U,	// <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
-  3643025697U,	// <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
-  3643026742U,	// <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
-  3654971091U,	// <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
-  3787675153U,	// <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
-  2724845076U,	// <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
-  2725508637U,	// <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
-  2730817063U,	// <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
-  3631088436U,	// <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
-  3660949158U,	// <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
-  3801904705U,	// <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
-  3631090998U,	// <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
-  2662503828U,	// <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
-  3660951981U,	// <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
-  2713933420U,	// <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
-  2731406959U,	// <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
-  1507500134U,	// <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
-  2626672430U,	// <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
-  2581243496U,	// <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
-  2569300259U,	// <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
-  1507503414U,	// <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
-  1507503829U,	// <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
-  2581246458U,	// <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
-  2581246970U,	// <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
-  1507505966U,	// <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
-  1543643153U,	// <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
-  1546297446U,	// <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
-  2819448852U,	// <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
-  2619375876U,	// <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
-  1546297685U,	// <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
-  1658771190U,	// <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
-  2736789248U,	// <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
-  2659189376U,	// <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
-  1546298013U,	// <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
-  1483112550U,	// <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
-  202162278U,	// <1,u,1,1>: Cost 1 vdup1 LHS
-  1616009006U,	// <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
-  1745707110U,	// <1,u,1,3>: Cost 2 vuzpr LHS, LHS
-  1483115830U,	// <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
-  2620040336U,	// <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
-  3026622618U,	// <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
-  2958183752U,	// <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
-  202162278U,	// <1,u,1,u>: Cost 1 vdup1 LHS
-  2819449750U,	// <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
-  2893207342U,	// <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
-  2819448996U,	// <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
-  2819450482U,	// <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
-  2819449754U,	// <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
-  2893207706U,	// <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
-  2819449036U,	// <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
-  2970799432U,	// <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
-  2819449002U,	// <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
-  403931292U,	// <1,u,3,0>: Cost 1 vext1 LHS, LHS
-  1477673718U,	// <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  115726126U,	// <1,u,3,2>: Cost 1 vrev LHS
-  2014102173U,	// <1,u,3,3>: Cost 2 vtrnr LHS, LHS
-  403934518U,	// <1,u,3,4>: Cost 1 vext1 LHS, RHS
-  1507536601U,	// <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
-  1525453306U,	// <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  2014105129U,	// <1,u,3,7>: Cost 2 vtrnr LHS, RHS
-  403937070U,	// <1,u,3,u>: Cost 1 vext1 LHS, LHS
-  2620042157U,	// <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
-  2620042237U,	// <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
-  2263217967U,	// <1,u,4,2>: Cost 3 vrev <u,1,2,4>
-  2569341224U,	// <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
-  2569342262U,	// <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
-  1546300726U,	// <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
-  2819449180U,	// <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
-  2724845649U,	// <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
-  1546300969U,	// <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
-  2551431270U,	// <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
-  2551432192U,	// <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
-  3028293422U,	// <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
-  2955559068U,	// <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
-  2551434550U,	// <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
-  2895255706U,	// <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
-  1616009370U,	// <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  1745710390U,	// <1,u,5,7>: Cost 2 vuzpr LHS, RHS
-  1745710391U,	// <1,u,5,u>: Cost 2 vuzpr LHS, RHS
-  2653221159U,	// <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
-  2725509303U,	// <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
-  2659193338U,	// <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
-  2689751248U,	// <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
-  2867228774U,	// <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
-  3764820194U,	// <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
-  2657202957U,	// <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
-  2819450810U,	// <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
-  2819450811U,	// <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
-  1585452032U,	// <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
-  2557420340U,	// <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
-  2569365158U,	// <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
-  2569365803U,	// <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
-  2557422902U,	// <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
-  2662512021U,	// <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
-  2724845884U,	// <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
-  2659194476U,	// <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
-  1590761096U,	// <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
-  403972257U,	// <1,u,u,0>: Cost 1 vext1 LHS, LHS
-  202162278U,	// <1,u,u,1>: Cost 1 vdup1 LHS
-  115767091U,	// <1,u,u,2>: Cost 1 vrev LHS
-  1745707677U,	// <1,u,u,3>: Cost 2 vuzpr LHS, LHS
-  403975478U,	// <1,u,u,4>: Cost 1 vext1 LHS, RHS
-  1546303642U,	// <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
-  1616009613U,	// <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  1745710633U,	// <1,u,u,7>: Cost 2 vuzpr LHS, RHS
-  403978030U,	// <1,u,u,u>: Cost 1 vext1 LHS, LHS
-  2551463936U,	// <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
-  2685698058U,	// <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
-  1610776596U,	// <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
-  2619384069U,	// <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
-  2551467318U,	// <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
-  3899836596U,	// <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
-  2621374968U,	// <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
-  4168271334U,	// <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
-  1611219018U,	// <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
-  2551472138U,	// <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
-  2690564186U,	// <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
-  1611956326U,	// <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
-  2826092646U,	// <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
-  2551475510U,	// <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
-  3692463248U,	// <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
-  2587308473U,	// <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
-  3661050874U,	// <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
-  1611956380U,	// <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
-  1477738598U,	// <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
-  2551481078U,	// <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
-  2551481796U,	// <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
-  2551482518U,	// <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
-  1477741878U,	// <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
-  2551484112U,	// <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
-  2551484759U,	// <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
-  2551485434U,	// <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
-  1477744430U,	// <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
-  2953625600U,	// <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
-  2953627302U,	// <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
-  2953625764U,	// <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
-  4027369695U,	// <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
-  3625233718U,	// <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
-  3899836110U,	// <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
-  4032012618U,	// <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
-  3899835392U,	// <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
-  2953625770U,	// <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
-  2551496806U,	// <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
-  2685698386U,	// <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
-  2685698396U,	// <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
-  3625240726U,	// <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
-  2551500086U,	// <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
-  2618723638U,	// <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
-  2765409590U,	// <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
-  3799990664U,	// <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
-  2685698450U,	// <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
-  3625246822U,	// <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
-  3289776304U,	// <2,0,5,1>: Cost 4 vrev <0,2,1,5>
-  2690564526U,	// <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
-  3289923778U,	// <2,0,5,3>: Cost 4 vrev <0,2,3,5>
-  2216255691U,	// <2,0,5,4>: Cost 3 vrev <0,2,4,5>
-  3726307332U,	// <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
-  3726307426U,	// <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
-  2826095926U,	// <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
-  2216550639U,	// <2,0,5,u>: Cost 3 vrev <0,2,u,5>
-  4162420736U,	// <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
-  2901885030U,	// <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
-  2685698559U,	// <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
-  3643173171U,	// <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
-  2216263884U,	// <2,0,6,4>: Cost 3 vrev <0,2,4,6>
-  3730289341U,	// <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
-  3726308152U,	// <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
-  3899836346U,	// <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
-  2216558832U,	// <2,0,6,u>: Cost 3 vrev <0,2,u,6>
-  2659202049U,	// <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
-  3726308437U,	// <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
-  2726249034U,	// <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
-  3734934772U,	// <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
-  3726308710U,	// <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
-  3726308814U,	// <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
-  3736925671U,	// <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
-  3726308972U,	// <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
-  2659202049U,	// <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
-  1477787750U,	// <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
-  2953668262U,	// <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
-  1611956893U,	// <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
-  2551531670U,	// <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
-  1477791030U,	// <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
-  2618726554U,	// <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
-  2765412506U,	// <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
-  2826096169U,	// <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
-  1611956947U,	// <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
-  2569453670U,	// <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
-  2619392102U,	// <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
-  3759440619U,	// <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
-  1616823030U,	// <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
-  2569456950U,	// <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
-  2690712328U,	// <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
-  3661115841U,	// <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
-  2622046794U,	// <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
-  1617191715U,	// <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
-  2551545958U,	// <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
-  2685698868U,	// <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
-  2628682646U,	// <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
-  2685698888U,	// <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
-  2551549238U,	// <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
-  3693134992U,	// <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
-  3661124034U,	// <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
-  3625292794U,	// <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
-  2685698933U,	// <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
-  2551554150U,	// <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
-  3893649571U,	// <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
-  2551555688U,	// <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
-  2685698966U,	// <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
-  2551557430U,	// <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
-  3763422123U,	// <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
-  3693135802U,	// <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
-  2726249402U,	// <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
-  2685699011U,	// <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
-  2551562342U,	// <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
-  2953625610U,	// <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
-  2953627798U,	// <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
-  2953626584U,	// <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
-  2551565622U,	// <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
-  2953625938U,	// <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
-  2587398596U,	// <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
-  4032013519U,	// <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
-  2953625617U,	// <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
-  2690565154U,	// <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
-  3625313270U,	// <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
-  3771532340U,	// <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
-  1148404634U,	// <2,1,4,3>: Cost 2 vrev <1,2,3,4>
-  3625315638U,	// <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
-  2619395382U,	// <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
-  3837242678U,	// <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
-  3799991394U,	// <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
-  1148773319U,	// <2,1,4,u>: Cost 2 vrev <1,2,u,4>
-  2551578726U,	// <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
-  2551579648U,	// <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
-  3625321952U,	// <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
-  2685699216U,	// <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
-  2551582006U,	// <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
-  3740913668U,	// <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
-  3661156806U,	// <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
-  3893652790U,	// <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
-  2685699261U,	// <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
-  2551586918U,	// <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
-  3625329398U,	// <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
-  2551588794U,	// <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
-  3088679014U,	// <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
-  2551590198U,	// <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
-  4029382994U,	// <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
-  3625333560U,	// <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
-  3731624800U,	// <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
-  2551592750U,	// <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
-  2622051322U,	// <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
-  3733615699U,	// <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
-  3795125538U,	// <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
-  2222171037U,	// <2,1,7,3>: Cost 3 vrev <1,2,3,7>
-  3740915046U,	// <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
-  3296060335U,	// <2,1,7,5>: Cost 4 vrev <1,2,5,7>
-  3736933864U,	// <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
-  3805300055U,	// <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
-  2669827714U,	// <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
-  2551603302U,	// <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
-  2953666570U,	// <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
-  2953668758U,	// <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
-  1148437406U,	// <2,1,u,3>: Cost 2 vrev <1,2,3,u>
-  2551606582U,	// <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
-  2953666898U,	// <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
-  2587398596U,	// <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
-  2669828370U,	// <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
-  1148806091U,	// <2,1,u,u>: Cost 2 vrev <1,2,u,u>
-  1543667732U,	// <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
-  1548976230U,	// <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
-  2685699524U,	// <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
-  2685699535U,	// <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
-  2551614774U,	// <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
-  3704422830U,	// <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
-  3893657642U,	// <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
-  3770574323U,	// <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
-  1548976796U,	// <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
-  2622718710U,	// <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
-  2622718772U,	// <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
-  2622718870U,	// <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
-  2819915878U,	// <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
-  3625364790U,	// <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
-  2622719120U,	// <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
-  3760031292U,	// <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
-  3667170468U,	// <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
-  2819915883U,	// <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
-  1489829990U,	// <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
-  2563572470U,	// <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
-  269271142U,	// <2,2,2,2>: Cost 1 vdup2 LHS
-  2685699698U,	// <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
-  1489833270U,	// <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
-  2685699720U,	// <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
-  2622719930U,	// <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
-  2593436837U,	// <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
-  269271142U,	// <2,2,2,u>: Cost 1 vdup2 LHS
-  2685699750U,	// <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
-  2690565806U,	// <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
-  2953627240U,	// <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
-  1879883878U,	// <2,2,3,3>: Cost 2 vzipr LHS, LHS
-  2685699790U,	// <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
-  3893659342U,	// <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
-  2958270812U,	// <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
-  2593445030U,	// <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
-  1879883883U,	// <2,2,3,u>: Cost 2 vzipr LHS, LHS
-  2551644262U,	// <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
-  3625386742U,	// <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
-  2551645902U,	// <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
-  3759441686U,	// <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
-  2551647542U,	// <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
-  1548979510U,	// <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
-  2764901686U,	// <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
-  3667195047U,	// <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
-  1548979753U,	// <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
-  3696463432U,	// <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
-  2617413328U,	// <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
-  2685699936U,	// <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
-  4027383910U,	// <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
-  2228201085U,	// <2,2,5,4>: Cost 3 vrev <2,2,4,5>
-  2617413636U,	// <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
-  2617413730U,	// <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
-  2819919158U,	// <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
-  2819919159U,	// <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
-  3625402554U,	// <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
-  3760031652U,	// <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
-  2617414138U,	// <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
-  2685700026U,	// <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
-  3625405750U,	// <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
-  3760031692U,	// <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
-  3088679116U,	// <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
-  2657891169U,	// <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
-  2685700071U,	// <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
-  2726250474U,	// <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
-  3704427616U,	// <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
-  2660545701U,	// <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
-  4030718054U,	// <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
-  2617415014U,	// <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
-  3302033032U,	// <2,2,7,5>: Cost 4 vrev <2,2,5,7>
-  3661246929U,	// <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
-  2617415276U,	// <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
-  2731558962U,	// <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
-  1489829990U,	// <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
-  1548982062U,	// <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
-  269271142U,	// <2,2,u,2>: Cost 1 vdup2 LHS
-  1879924838U,	// <2,2,u,3>: Cost 2 vzipr LHS, LHS
-  1489833270U,	// <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
-  1548982426U,	// <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
-  2953666908U,	// <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
-  2819919401U,	// <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
-  269271142U,	// <2,2,u,u>: Cost 1 vdup2 LHS
-  1544339456U,	// <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
-  470597734U,	// <2,3,0,1>: Cost 1 vext2 LHS, LHS
-  1548984484U,	// <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  2619408648U,	// <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
-  1548984658U,	// <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  2665857454U,	// <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
-  2622726655U,	// <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
-  2593494188U,	// <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
-  470598301U,	// <2,3,0,u>: Cost 1 vext2 LHS, LHS
-  1544340214U,	// <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  1544340276U,	// <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
-  1544340374U,	// <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
-  1548985304U,	// <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  2551696694U,	// <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
-  1548985488U,	// <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  2622727375U,	// <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
-  2665858347U,	// <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
-  1548985709U,	// <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
-  2622727613U,	// <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
-  2622727711U,	// <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
-  1544341096U,	// <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
-  1544341158U,	// <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
-  2622727958U,	// <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
-  2622728032U,	// <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
-  1548986298U,	// <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  2665859050U,	// <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
-  1548986427U,	// <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
-  1548986518U,	// <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
-  2622728415U,	// <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
-  1489913458U,	// <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
-  1544341916U,	// <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
-  1548986882U,	// <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
-  2665859632U,	// <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
-  2234304870U,	// <2,3,3,6>: Cost 3 vrev <3,2,6,3>
-  2958271632U,	// <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
-  1548987166U,	// <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
-  1483948134U,	// <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
-  1483948954U,	// <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
-  2622729276U,	// <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
-  2557692054U,	// <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
-  1483951414U,	// <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
-  470601014U,	// <2,3,4,5>: Cost 1 vext2 LHS, RHS
-  1592118644U,	// <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
-  2593526960U,	// <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
-  470601257U,	// <2,3,4,u>: Cost 1 vext2 LHS, RHS
-  2551726182U,	// <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
-  1592118992U,	// <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
-  2665860862U,	// <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
-  2551728642U,	// <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
-  1592119238U,	// <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
-  1592119300U,	// <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
-  1592119394U,	// <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
-  1592119464U,	// <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
-  1592119545U,	// <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
-  2622730529U,	// <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
-  2557707164U,	// <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
-  1592119802U,	// <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
-  2665861682U,	// <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
-  2622730893U,	// <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
-  2665861810U,	// <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
-  1592120120U,	// <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
-  1592120142U,	// <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
-  1592120223U,	// <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
-  1592120314U,	// <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
-  2659890261U,	// <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
-  2660553894U,	// <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
-  2665862371U,	// <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
-  1592120678U,	// <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
-  2665862534U,	// <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
-  2665862614U,	// <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
-  1592120940U,	// <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
-  1592120962U,	// <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
-  1548990163U,	// <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
-  470603566U,	// <2,3,u,1>: Cost 1 vext2 LHS, LHS
-  1548990341U,	// <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
-  1548990396U,	// <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
-  1548990527U,	// <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
-  470603930U,	// <2,3,u,5>: Cost 1 vext2 LHS, RHS
-  1548990672U,	// <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
-  1592121600U,	// <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
-  470604133U,	// <2,3,u,u>: Cost 1 vext2 LHS, LHS
-  2617425942U,	// <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
-  2618753126U,	// <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
-  2618753208U,	// <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
-  2619416841U,	// <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
-  2587593628U,	// <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
-  2712832914U,	// <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
-  1634962332U,	// <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
-  3799993252U,	// <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
-  1634962332U,	// <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
-  2619417334U,	// <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
-  3692495668U,	// <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
-  2625389466U,	// <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
-  2826125414U,	// <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
-  3699794995U,	// <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
-  3692496016U,	// <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
-  3763424238U,	// <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
-  3667317942U,	// <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
-  2826125419U,	// <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
-  2629371336U,	// <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
-  3699131946U,	// <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
-  2630698602U,	// <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
-  2618754766U,	// <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
-  2826126234U,	// <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
-  2899119414U,	// <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
-  3033337142U,	// <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
-  3800214597U,	// <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
-  2899119657U,	// <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
-  2635344033U,	// <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
-  4032012325U,	// <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
-  3692497228U,	// <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
-  3692497308U,	// <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
-  3001404624U,	// <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
-  2953627342U,	// <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
-  2953625804U,	// <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
-  3899868160U,	// <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
-  2953625806U,	// <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
-  2710916266U,	// <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
-  3899869648U,	// <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
-  3899869658U,	// <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
-  3899868930U,	// <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
-  2712833232U,	// <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
-  2618756406U,	// <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
-  2765737270U,	// <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
-  4168304426U,	// <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
-  2618756649U,	// <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
-  2551800011U,	// <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
-  2569716470U,	// <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
-  2563745405U,	// <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
-  2569718102U,	// <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
-  2551803190U,	// <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
-  3625545732U,	// <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
-  1611959606U,	// <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  2826128694U,	// <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
-  1611959624U,	// <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
-  1478066278U,	// <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
-  2551808758U,	// <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
-  2551809516U,	// <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
-  2551810198U,	// <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
-  1478069558U,	// <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
-  2901888310U,	// <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
-  2551812920U,	// <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
-  2726251914U,	// <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
-  1478072110U,	// <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
-  2659234821U,	// <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
-  3786722726U,	// <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
-  3734303911U,	// <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
-  3734967544U,	// <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
-  3727005030U,	// <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
-  2726251976U,	// <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
-  2726251986U,	// <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
-  3727005292U,	// <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
-  2659234821U,	// <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
-  1478082662U,	// <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
-  2618758958U,	// <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
-  2551826024U,	// <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
-  2551826582U,	// <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
-  1478085942U,	// <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
-  2953668302U,	// <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
-  1611959849U,	// <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  2826128937U,	// <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
-  1611959867U,	// <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
-  3691839488U,	// <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
-  2618097766U,	// <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
-  2620088484U,	// <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
-  2619425034U,	// <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
-  2620088667U,	// <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
-  2620752300U,	// <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
-  3693830655U,	// <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
-  3094531382U,	// <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
-  2618098333U,	// <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
-  3691840246U,	// <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
-  3691840308U,	// <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
-  2626061206U,	// <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
-  2618098688U,	// <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
-  2626061364U,	// <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
-  3691840656U,	// <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
-  3789082310U,	// <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
-  2712833744U,	// <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
-  2628715896U,	// <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
-  3693831613U,	// <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
-  4026698642U,	// <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
-  2632033896U,	// <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
-  3691841190U,	// <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
-  2632034061U,	// <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
-  3691841352U,	// <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
-  3691841466U,	// <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
-  3088354614U,	// <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
-  3088354615U,	// <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
-  2557829222U,	// <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
-  2557830059U,	// <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
-  2575746766U,	// <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
-  3691841948U,	// <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
-  2619427330U,	// <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
-  2581720847U,	// <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
-  2953628162U,	// <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  2953626624U,	// <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
-  2953626625U,	// <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
-  2569781350U,	// <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
-  3631580076U,	// <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
-  2569782990U,	// <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
-  2569783646U,	// <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
-  2569784630U,	// <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
-  2618101046U,	// <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
-  3893905922U,	// <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
-  3094564150U,	// <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
-  2618101289U,	// <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
-  2551873638U,	// <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
-  3637560320U,	// <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
-  3637560966U,	// <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
-  3723030343U,	// <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
-  2551876918U,	// <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
-  2712834052U,	// <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
-  4028713474U,	// <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
-  2712834072U,	// <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
-  2712834081U,	// <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
-  2575769702U,	// <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
-  3631596462U,	// <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
-  2655924730U,	// <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
-  3643541856U,	// <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
-  2655924849U,	// <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
-  3787755607U,	// <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
-  4029385218U,	// <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
-  3088682294U,	// <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
-  3088682295U,	// <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
-  2563833958U,	// <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
-  2551890678U,	// <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
-  2563835528U,	// <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
-  3637577878U,	// <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
-  2563837238U,	// <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
-  2712834216U,	// <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
-  2712834220U,	// <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
-  4174449974U,	// <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
-  2563839790U,	// <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
-  2563842150U,	// <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
-  2618103598U,	// <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
-  2563843721U,	// <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
-  2569816418U,	// <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
-  2622748735U,	// <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
-  2618103962U,	// <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
-  2953669122U,	// <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  2953667584U,	// <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
-  2618104165U,	// <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
-  2620096512U,	// <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
-  1546354790U,	// <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
-  2620096676U,	// <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
-  3693838588U,	// <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
-  1546355036U,	// <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
-  3694502317U,	// <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
-  2551911246U,	// <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
-  2720723287U,	// <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
-  1546355357U,	// <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
-  2620097270U,	// <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
-  2620097332U,	// <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
-  2620097430U,	// <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
-  2820243558U,	// <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
-  2620097598U,	// <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
-  2620097680U,	// <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
-  3693839585U,	// <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
-  2721386920U,	// <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
-  2820243563U,	// <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
-  2714014137U,	// <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
-  2712834500U,	// <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
-  2620098152U,	// <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
-  2620098214U,	// <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
-  2632042254U,	// <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
-  2712834540U,	// <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
-  2820243660U,	// <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
-  2958265654U,	// <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
-  2620098619U,	// <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
-  2620098710U,	// <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
-  3893986982U,	// <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
-  2569848762U,	// <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
-  2620098972U,	// <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
-  2620099074U,	// <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
-  3893987022U,	// <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
-  3001404644U,	// <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
-  1879887158U,	// <2,6,3,7>: Cost 2 vzipr LHS, RHS
-  1879887159U,	// <2,6,3,u>: Cost 2 vzipr LHS, RHS
-  2620099484U,	// <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
-  2620099566U,	// <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
-  2620099644U,	// <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
-  3643599207U,	// <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
-  2575830080U,	// <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
-  1546358070U,	// <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
-  2667875700U,	// <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
-  4028042550U,	// <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
-  1546358313U,	// <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
-  3693841992U,	// <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
-  2667876048U,	// <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
-  2712834756U,	// <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
-  3643607400U,	// <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
-  2252091873U,	// <2,6,5,4>: Cost 3 vrev <6,2,4,5>
-  2667876356U,	// <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
-  2667876450U,	// <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
-  2820246838U,	// <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
-  2820246839U,	// <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
-  2563899494U,	// <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
-  3893988683U,	// <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
-  2563901072U,	// <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
-  3893987236U,	// <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
-  2563902774U,	// <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
-  3893988723U,	// <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
-  2712834872U,	// <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
-  2955644214U,	// <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
-  2955644215U,	// <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
-  2712834894U,	// <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
-  2724926296U,	// <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
-  2725000033U,	// <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
-  2702365544U,	// <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
-  2712834934U,	// <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
-  3776107393U,	// <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
-  2725294981U,	// <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
-  2726253452U,	// <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
-  2712834966U,	// <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
-  2620102355U,	// <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
-  1546360622U,	// <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
-  2620102536U,	// <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
-  2820244125U,	// <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
-  1594136612U,	// <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
-  1546360986U,	// <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
-  2620102864U,	// <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
-  1879928118U,	// <2,6,u,7>: Cost 2 vzipr LHS, RHS
-  1879928119U,	// <2,6,u,u>: Cost 2 vzipr LHS, RHS
-  2726179825U,	// <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
-  1652511738U,	// <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
-  2621431972U,	// <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
-  2257949868U,	// <2,7,0,3>: Cost 3 vrev <7,2,3,0>
-  2726474773U,	// <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
-  2620768686U,	// <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
-  2621432319U,	// <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
-  2599760953U,	// <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
-  1653027897U,	// <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
-  2639348470U,	// <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
-  3695174452U,	// <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
-  3695174550U,	// <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
-  3694511104U,	// <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
-  3713090594U,	// <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
-  3693184144U,	// <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
-  2627405016U,	// <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
-  3799995519U,	// <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
-  2639348470U,	// <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
-  3695175101U,	// <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
-  3643655168U,	// <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
-  2257892517U,	// <2,7,2,2>: Cost 3 vrev <7,2,2,2>
-  3695175334U,	// <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
-  3695175465U,	// <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
-  2632714080U,	// <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
-  2633377713U,	// <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
-  3695175658U,	// <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
-  2634704979U,	// <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
-  1514094694U,	// <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
-  2569921680U,	// <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
-  2587838056U,	// <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
-  2569922927U,	// <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
-  1514097974U,	// <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
-  2581868321U,	// <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
-  1514099194U,	// <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
-  2587841530U,	// <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
-  1514100526U,	// <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
-  2708706617U,	// <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
-  3649643418U,	// <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
-  3649644330U,	// <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
-  2257982640U,	// <2,7,4,3>: Cost 3 vrev <7,2,3,4>
-  3649645641U,	// <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
-  2621435190U,	// <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
-  2712835441U,	// <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
-  3799995762U,	// <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
-  2621435433U,	// <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
-  2729497990U,	// <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
-  3643679744U,	// <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
-  3637708424U,	// <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
-  3643681137U,	// <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
-  2599800118U,	// <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
-  3786577334U,	// <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
-  3786577345U,	// <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
-  2599802214U,	// <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
-  2599802670U,	// <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
-  2581889126U,	// <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
-  3643687936U,	// <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
-  2663240186U,	// <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
-  3643689330U,	// <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
-  2581892406U,	// <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
-  2581892900U,	// <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
-  2587865597U,	// <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
-  3786577428U,	// <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
-  2581894958U,	// <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
-  2726254119U,	// <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
-  3804640817U,	// <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
-  3637724826U,	// <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
-  3734992123U,	// <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
-  2552040758U,	// <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
-  3799995992U,	// <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
-  2663241198U,	// <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
-  2712835692U,	// <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
-  2731562607U,	// <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
-  1514135654U,	// <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
-  1657820802U,	// <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
-  2587879016U,	// <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
-  2569963892U,	// <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
-  1514138934U,	// <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
-  2621438106U,	// <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
-  1514140159U,	// <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
-  2587882490U,	// <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
-  1514141486U,	// <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
-  1544380416U,	// <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
-  470638699U,	// <2,u,0,1>: Cost 1 vext2 LHS, LHS
-  1544380580U,	// <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  1658631909U,	// <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
-  1544380754U,	// <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  2665898414U,	// <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
-  1658853120U,	// <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
-  3094531625U,	// <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
-  470639261U,	// <2,u,0,u>: Cost 1 vext2 LHS, LHS
-  1544381174U,	// <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  1544381236U,	// <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
-  1544381334U,	// <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
-  1544381400U,	// <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  2618123325U,	// <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
-  1544381584U,	// <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  2618123489U,	// <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
-  2726254427U,	// <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
-  1544381823U,	// <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
-  1478328422U,	// <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
-  2618123807U,	// <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
-  269271142U,	// <2,u,2,2>: Cost 1 vdup2 LHS
-  1544382118U,	// <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
-  1478331702U,	// <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
-  2618124136U,	// <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
-  1544382394U,	// <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  3088354857U,	// <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
-  269271142U,	// <2,u,2,u>: Cost 1 vdup2 LHS
-  1544382614U,	// <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
-  2953627374U,	// <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
-  1490282143U,	// <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
-  1879883932U,	// <2,u,3,3>: Cost 2 vzipr LHS, LHS
-  1544382978U,	// <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
-  2953627378U,	// <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
-  1514172931U,	// <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
-  1879887176U,	// <2,u,3,7>: Cost 2 vzipr LHS, RHS
-  1879883937U,	// <2,u,3,u>: Cost 2 vzipr LHS, LHS
-  1484316774U,	// <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
-  1484317639U,	// <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
-  2552088270U,	// <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
-  1190213513U,	// <2,u,4,3>: Cost 2 vrev <u,2,3,4>
-  1484320054U,	// <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
-  470641974U,	// <2,u,4,5>: Cost 1 vext2 LHS, RHS
-  1592159604U,	// <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
-  3094564393U,	// <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
-  470642217U,	// <2,u,4,u>: Cost 1 vext2 LHS, RHS
-  2552094959U,	// <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
-  1592159952U,	// <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
-  2564040353U,	// <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
-  2690275455U,	// <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
-  1592160198U,	// <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
-  1592160260U,	// <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
-  1611962522U,	// <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  1592160424U,	// <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
-  1611962540U,	// <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
-  1478361190U,	// <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
-  2552103670U,	// <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
-  1592160762U,	// <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
-  2685704400U,	// <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
-  1478364470U,	// <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
-  2901891226U,	// <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
-  1592161080U,	// <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
-  1592161102U,	// <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
-  1478367022U,	// <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
-  1592161274U,	// <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
-  2659931226U,	// <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
-  2564056739U,	// <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
-  2665903331U,	// <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
-  1592161638U,	// <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
-  2665903494U,	// <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
-  2587947527U,	// <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
-  1592161900U,	// <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
-  1592161922U,	// <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
-  1478377574U,	// <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
-  470644526U,	// <2,u,u,1>: Cost 1 vext2 LHS, LHS
-  269271142U,	// <2,u,u,2>: Cost 1 vdup2 LHS
-  1879924892U,	// <2,u,u,3>: Cost 2 vzipr LHS, LHS
-  1478380854U,	// <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
-  470644890U,	// <2,u,u,5>: Cost 1 vext2 LHS, RHS
-  1611962765U,	// <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  1879928136U,	// <2,u,u,7>: Cost 2 vzipr LHS, RHS
-  470645093U,	// <2,u,u,u>: Cost 1 vext2 LHS, LHS
-  1611448320U,	// <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
-  1611890698U,	// <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
-  1611890708U,	// <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
-  3763576860U,	// <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
-  2689835045U,	// <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
-  3698508206U,	// <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
-  3763576887U,	// <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
-  3667678434U,	// <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
-  1616093258U,	// <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
-  1490337894U,	// <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
-  2685632602U,	// <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
-  537706598U,	// <3,0,1,2>: Cost 1 vext3 LHS, LHS
-  2624766936U,	// <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
-  1490341174U,	// <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
-  2624767120U,	// <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
-  2732966030U,	// <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
-  2593944803U,	// <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
-  537706652U,	// <3,0,1,u>: Cost 1 vext3 LHS, LHS
-  1611890852U,	// <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
-  2685632684U,	// <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
-  2685632692U,	// <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
-  2685632702U,	// <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
-  1611890892U,	// <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
-  2732966102U,	// <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
-  2624767930U,	// <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
-  2685632744U,	// <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
-  1611890924U,	// <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
-  2624768150U,	// <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
-  2685632764U,	// <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
-  2685632774U,	// <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
-  2624768412U,	// <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
-  2624768514U,	// <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
-  3702491714U,	// <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
-  2624768632U,	// <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
-  3702491843U,	// <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
-  2686959934U,	// <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
-  2689835336U,	// <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
-  1611891026U,	// <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
-  1611891036U,	// <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
-  3763577184U,	// <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
-  2689835374U,	// <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
-  1551027510U,	// <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
-  2666573172U,	// <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
-  3667711206U,	// <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
-  1616093586U,	// <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
-  2685190556U,	// <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
-  2666573520U,	// <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
-  3040886886U,	// <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
-  3625912834U,	// <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
-  2666573766U,	// <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
-  2666573828U,	// <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
-  2732966354U,	// <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
-  2666573992U,	// <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
-  3040886940U,	// <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
-  2685190637U,	// <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
-  2732966390U,	// <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
-  2689835519U,	// <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
-  3667724438U,	// <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
-  3763577355U,	// <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
-  3806708243U,	// <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
-  2666574648U,	// <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
-  2657948520U,	// <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
-  2689835573U,	// <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
-  2666574842U,	// <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
-  2685633095U,	// <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
-  2660603052U,	// <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
-  3643844997U,	// <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
-  2666575206U,	// <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
-  3655790391U,	// <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
-  3731690968U,	// <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
-  2666575468U,	// <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
-  2664584850U,	// <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
-  1616093834U,	// <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
-  1611891346U,	// <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
-  537707165U,	// <3,0,u,2>: Cost 1 vext3 LHS, LHS
-  2689835684U,	// <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
-  1616093874U,	// <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
-  1551030426U,	// <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
-  2624772304U,	// <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
-  2594002154U,	// <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
-  537707219U,	// <3,0,u,u>: Cost 1 vext3 LHS, LHS
-  2552201318U,	// <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
-  2618802278U,	// <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
-  2618802366U,	// <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
-  1611449078U,	// <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
-  2552204598U,	// <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
-  2732966663U,	// <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
-  3906258396U,	// <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
-  3667752171U,	// <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
-  1611891491U,	// <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
-  2689835819U,	// <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
-  1611449140U,	// <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
-  2624775063U,	// <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
-  1611891528U,	// <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
-  2689835859U,	// <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
-  2689835868U,	// <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
-  3763577701U,	// <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
-  3765273452U,	// <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
-  1611891573U,	// <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
-  2629420494U,	// <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
-  2689835911U,	// <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
-  2564163248U,	// <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
-  1611449238U,	// <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
-  2564164918U,	// <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
-  2689835947U,	// <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
-  3692545978U,	// <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
-  2732966842U,	// <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
-  1611891651U,	// <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
-  1484456038U,	// <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
-  1611891672U,	// <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
-  2685633502U,	// <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
-  2685633512U,	// <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
-  1484459318U,	// <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
-  1611891712U,	// <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
-  2689836041U,	// <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
-  2733409294U,	// <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
-  1611891735U,	// <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
-  2552234086U,	// <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
-  2732966955U,	// <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
-  2732966964U,	// <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
-  2685633597U,	// <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
-  2552237366U,	// <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
-  2618805558U,	// <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
-  2769472822U,	// <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
-  3667784943U,	// <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
-  2685633642U,	// <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
-  2689836143U,	// <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
-  2564187280U,	// <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
-  2564187827U,	// <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
-  1611891856U,	// <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
-  2689836183U,	// <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
-  3759375522U,	// <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
-  3720417378U,	// <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
-  2832518454U,	// <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
-  1611891901U,	// <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
-  3763578048U,	// <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
-  2689836239U,	// <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
-  2732967128U,	// <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
-  2685633761U,	// <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
-  3763578088U,	// <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
-  2689836275U,	// <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
-  3763578108U,	// <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
-  2732967166U,	// <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
-  2685633806U,	// <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
-  3631972454U,	// <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
-  2659947612U,	// <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
-  4036102294U,	// <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
-  3095396454U,	// <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
-  3631975734U,	// <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
-  2222982144U,	// <3,1,7,5>: Cost 3 vrev <1,3,5,7>
-  3296797705U,	// <3,1,7,6>: Cost 4 vrev <1,3,6,7>
-  3720418924U,	// <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
-  3095396459U,	// <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
-  1484496998U,	// <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
-  1611892077U,	// <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
-  2685633907U,	// <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
-  1611892092U,	// <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
-  1484500278U,	// <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
-  1611892117U,	// <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
-  2685633950U,	// <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
-  2832518697U,	// <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
-  1611892140U,	// <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
-  2623455232U,	// <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
-  1549713510U,	// <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
-  2689836484U,	// <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
-  2685633997U,	// <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
-  2623455570U,	// <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
-  2732967398U,	// <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
-  2689836524U,	// <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
-  2229044964U,	// <3,2,0,7>: Cost 3 vrev <2,3,7,0>
-  1549714077U,	// <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
-  1549714166U,	// <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
-  2623456052U,	// <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
-  2623456150U,	// <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
-  2685634079U,	// <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
-  2552286518U,	// <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
-  2623456400U,	// <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
-  2689836604U,	// <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
-  3667834101U,	// <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
-  1155385070U,	// <3,2,1,u>: Cost 2 vrev <2,3,u,1>
-  2689836629U,	// <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
-  2689836640U,	// <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
-  1611449960U,	// <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
-  1611892338U,	// <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
-  2689836669U,	// <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
-  2689836680U,	// <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
-  2689836688U,	// <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
-  3763578518U,	// <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
-  1611892383U,	// <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
-  1611450022U,	// <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
-  2685191854U,	// <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
-  2685191865U,	// <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
-  2685191875U,	// <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
-  1611450062U,	// <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
-  2732967635U,	// <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
-  2732967645U,	// <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
-  2732967652U,	// <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
-  1611450094U,	// <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
-  2558279782U,	// <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
-  2558280602U,	// <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
-  2732967692U,	// <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
-  2685634326U,	// <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
-  2558283062U,	// <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
-  1549716790U,	// <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
-  2689836844U,	// <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
-  2229077736U,	// <3,2,4,7>: Cost 3 vrev <2,3,7,4>
-  1549717033U,	// <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
-  2552316006U,	// <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
-  2228643507U,	// <3,2,5,1>: Cost 3 vrev <2,3,1,5>
-  2689836896U,	// <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
-  2685634408U,	// <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
-  1155122894U,	// <3,2,5,4>: Cost 2 vrev <2,3,4,5>
-  2665263108U,	// <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
-  2689836932U,	// <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
-  2665263272U,	// <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
-  1155417842U,	// <3,2,5,u>: Cost 2 vrev <2,3,u,5>
-  2689836953U,	// <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
-  2689836964U,	// <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
-  2689836976U,	// <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
-  1611892666U,	// <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
-  2689836993U,	// <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
-  2689837004U,	// <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
-  2689837013U,	// <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
-  2665263950U,	// <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
-  1611892711U,	// <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
-  2665264122U,	// <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
-  2623460419U,	// <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
-  4169138340U,	// <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
-  2962358374U,	// <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
-  2665264486U,	// <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
-  2228954841U,	// <3,2,7,5>: Cost 3 vrev <2,3,5,7>
-  2229028578U,	// <3,2,7,6>: Cost 3 vrev <2,3,6,7>
-  2665264748U,	// <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
-  2962358379U,	// <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
-  1611892795U,	// <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
-  1549719342U,	// <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
-  1611449960U,	// <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
-  1611892824U,	// <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
-  1611892835U,	// <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
-  1549719706U,	// <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
-  2689837168U,	// <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
-  2665265408U,	// <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
-  1611892867U,	// <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
-  2685192331U,	// <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
-  1611450518U,	// <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
-  2685634717U,	// <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
-  2564294806U,	// <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
-  2685634736U,	// <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
-  2732968122U,	// <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
-  3763579075U,	// <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
-  4034053264U,	// <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
-  1611450581U,	// <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
-  2685192415U,	// <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
-  1550385992U,	// <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
-  2685192433U,	// <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
-  2685634808U,	// <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
-  2558332214U,	// <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
-  2685634828U,	// <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
-  3759376661U,	// <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
-  2703477022U,	// <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
-  1555031423U,	// <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
-  2564309094U,	// <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
-  2630100513U,	// <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
-  1557022322U,	// <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
-  2685192520U,	// <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
-  2564312374U,	// <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
-  2732968286U,	// <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
-  2685634918U,	// <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
-  2704140655U,	// <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
-  1561004120U,	// <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
-  1496547430U,	// <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
-  2624129256U,	// <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
-  2630764866U,	// <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
-  336380006U,	// <3,3,3,3>: Cost 1 vdup3 LHS
-  1496550710U,	// <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
-  2732968368U,	// <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
-  2624129683U,	// <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
-  2594182400U,	// <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
-  336380006U,	// <3,3,3,u>: Cost 1 vdup3 LHS
-  2558353510U,	// <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
-  2558354411U,	// <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
-  2564327108U,	// <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
-  2564327938U,	// <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
-  2960343962U,	// <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
-  1611893250U,	// <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
-  2771619126U,	// <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
-  4034086032U,	// <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
-  1611893277U,	// <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
-  2558361702U,	// <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
-  2558362604U,	// <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
-  2558363342U,	// <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
-  2732968512U,	// <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
-  2558364982U,	// <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
-  3101279950U,	// <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
-  2665934946U,	// <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
-  2826636598U,	// <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
-  2826636599U,	// <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
-  2732968568U,	// <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
-  3763579521U,	// <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
-  2732968586U,	// <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
-  2732968595U,	// <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
-  2732968604U,	// <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
-  3763579557U,	// <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
-  2732968621U,	// <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
-  2657973099U,	// <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
-  2658636732U,	// <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
-  2558378086U,	// <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
-  2558378990U,	// <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
-  2564351687U,	// <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
-  2661291264U,	// <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
-  2558381366U,	// <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
-  2732968694U,	// <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
-  3781126907U,	// <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
-  3095397376U,	// <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
-  2558383918U,	// <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
-  1496547430U,	// <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
-  1611893534U,	// <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
-  1592858504U,	// <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
-  336380006U,	// <3,3,u,3>: Cost 1 vdup3 LHS
-  1496550710U,	// <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
-  1611893574U,	// <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
-  2690280268U,	// <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
-  2826636841U,	// <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
-  336380006U,	// <3,3,u,u>: Cost 1 vdup3 LHS
-  2624798720U,	// <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
-  1551056998U,	// <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
-  2624798884U,	// <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
-  3693232384U,	// <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
-  2624799058U,	// <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
-  1659227026U,	// <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
-  1659227036U,	// <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
-  3667973382U,	// <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
-  1551057565U,	// <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
-  2624799478U,	// <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
-  2624799540U,	// <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
-  1551057818U,	// <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
-  2624799704U,	// <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
-  2564377910U,	// <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
-  2689838050U,	// <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
-  2689838062U,	// <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
-  2628117807U,	// <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
-  1555039616U,	// <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
-  3626180710U,	// <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
-  2624800298U,	// <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
-  2624800360U,	// <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
-  2624800422U,	// <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
-  2624800514U,	// <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
-  2709965878U,	// <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
-  2689838140U,	// <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
-  2634090504U,	// <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
-  2689838158U,	// <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
-  2624800918U,	// <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
-  2636081403U,	// <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
-  2636745036U,	// <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
-  2624801180U,	// <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
-  2624801232U,	// <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
-  2905836854U,	// <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
-  3040054582U,	// <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
-  3702524611U,	// <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
-  2624801566U,	// <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
-  2564399206U,	// <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
-  2564400026U,	// <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
-  2564400845U,	// <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
-  2570373542U,	// <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
-  1659227344U,	// <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
-  1551060278U,	// <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
-  1659227364U,	// <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
-  3668006154U,	// <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
-  1551060521U,	// <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
-  1490665574U,	// <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
-  2689838341U,	// <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
-  1490667214U,	// <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
-  2564409494U,	// <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
-  1490668854U,	// <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
-  2689838381U,	// <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
-  537709878U,	// <3,4,5,6>: Cost 1 vext3 LHS, RHS
-  2594272523U,	// <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
-  537709896U,	// <3,4,5,u>: Cost 1 vext3 LHS, RHS
-  2689838411U,	// <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
-  2558444534U,	// <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
-  2666607098U,	// <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
-  2558446082U,	// <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
-  1659227508U,	// <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
-  2689838462U,	// <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
-  2689838471U,	// <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
-  2657981292U,	// <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
-  1659227540U,	// <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
-  2666607610U,	// <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
-  3702527072U,	// <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
-  2660635824U,	// <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
-  3644139945U,	// <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
-  2666607974U,	// <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
-  2732969416U,	// <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
-  2732969425U,	// <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
-  2666608236U,	// <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
-  2664617622U,	// <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
-  1490690150U,	// <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
-  1551062830U,	// <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
-  1490691793U,	// <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
-  2624804796U,	// <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
-  1490693430U,	// <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
-  1551063194U,	// <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
-  537710121U,	// <3,4,u,6>: Cost 1 vext3 LHS, RHS
-  2594297102U,	// <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
-  537710139U,	// <3,4,u,u>: Cost 1 vext3 LHS, RHS
-  3692576768U,	// <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
-  2618835046U,	// <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
-  2618835138U,	// <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
-  3692577024U,	// <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
-  2689838690U,	// <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
-  2732969579U,	// <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
-  2732969588U,	// <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
-  2246963055U,	// <3,5,0,7>: Cost 3 vrev <5,3,7,0>
-  2618835613U,	// <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
-  2594308198U,	// <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
-  3692577588U,	// <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
-  2624807835U,	// <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
-  2625471468U,	// <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
-  2626135101U,	// <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
-  2594311888U,	// <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
-  3699877107U,	// <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
-  1641680592U,	// <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
-  1641754329U,	// <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
-  3692578274U,	// <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
-  2630116899U,	// <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
-  3692578408U,	// <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
-  2625472206U,	// <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
-  2632107798U,	// <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
-  2715938575U,	// <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
-  3692578746U,	// <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
-  2716086049U,	// <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
-  2634762330U,	// <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
-  3692578966U,	// <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
-  2636089596U,	// <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
-  3699214668U,	// <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
-  2638080412U,	// <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
-  2618837506U,	// <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
-  2832844494U,	// <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
-  4033415682U,	// <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
-  3095072054U,	// <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
-  3095072055U,	// <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
-  2600304742U,	// <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
-  3763580815U,	// <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
-  2564474582U,	// <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
-  3699879044U,	// <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
-  2600308022U,	// <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
-  2618838326U,	// <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
-  2772454710U,	// <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
-  1659228102U,	// <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
-  1659228111U,	// <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
-  2570453094U,	// <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
-  2624810704U,	// <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
-  2570454734U,	// <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
-  2570455472U,	// <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
-  2570456374U,	// <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
-  1659228164U,	// <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
-  2732969998U,	// <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
-  1659228184U,	// <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
-  1659228193U,	// <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
-  2732970020U,	// <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
-  2732970035U,	// <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
-  2564490968U,	// <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
-  2732970050U,	// <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
-  2732970060U,	// <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
-  2732970071U,	// <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
-  2732970080U,	// <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
-  1659228258U,	// <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
-  1659228267U,	// <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
-  1484783718U,	// <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
-  1484784640U,	// <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
-  2558527080U,	// <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
-  2558527638U,	// <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
-  1484786998U,	// <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
-  1659228328U,	// <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
-  2732970154U,	// <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
-  2558531180U,	// <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
-  1484789550U,	// <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
-  1484791910U,	// <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
-  1484792833U,	// <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
-  2558535272U,	// <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
-  2558535830U,	// <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
-  1484795190U,	// <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
-  1659228409U,	// <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
-  2772457626U,	// <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
-  1646326023U,	// <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
-  1484797742U,	// <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
-  2558541926U,	// <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
-  2689839393U,	// <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
-  2689839404U,	// <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
-  3706519808U,	// <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
-  2689839420U,	// <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
-  2732970314U,	// <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
-  2732970316U,	// <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
-  2960313654U,	// <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
-  2689839456U,	// <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
-  3763581290U,	// <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
-  3763581297U,	// <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
-  2624816028U,	// <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
-  3763581315U,	// <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
-  2626143294U,	// <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
-  3763581335U,	// <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
-  2721321376U,	// <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
-  2721395113U,	// <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
-  2628797826U,	// <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
-  2594390118U,	// <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
-  2721616324U,	// <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
-  2630788725U,	// <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
-  3763581395U,	// <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
-  2632115991U,	// <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
-  2632779624U,	// <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
-  2594394618U,	// <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
-  1648316922U,	// <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
-  1648390659U,	// <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
-  3693914262U,	// <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
-  3638281176U,	// <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
-  3696568678U,	// <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
-  2638088604U,	// <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
-  2632780290U,	// <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
-  3712494145U,	// <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
-  3698559612U,	// <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
-  2959674678U,	// <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
-  2959674679U,	// <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
-  3763581536U,	// <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
-  2722943590U,	// <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
-  2732970609U,	// <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
-  3698560147U,	// <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
-  2732970628U,	// <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
-  2689839757U,	// <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
-  2732970640U,	// <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
-  2960346422U,	// <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
-  2689839784U,	// <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
-  2576498790U,	// <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
-  3650241270U,	// <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
-  2732970692U,	// <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
-  2576501250U,	// <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
-  2576501906U,	// <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
-  3650244622U,	// <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
-  4114633528U,	// <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
-  2732970735U,	// <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
-  2576504622U,	// <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
-  2732970749U,	// <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
-  2724270856U,	// <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
-  2624819706U,	// <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
-  3656223234U,	// <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
-  2732970788U,	// <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
-  2732970800U,	// <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
-  1659228984U,	// <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
-  1659228994U,	// <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
-  1659229003U,	// <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
-  1659229006U,	// <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
-  2558600201U,	// <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
-  2558601146U,	// <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
-  2725081963U,	// <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
-  1659229046U,	// <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
-  2715423611U,	// <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
-  2722059141U,	// <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
-  2962361654U,	// <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
-  1659229078U,	// <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
-  1659229087U,	// <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
-  2689840041U,	// <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
-  2558609339U,	// <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
-  2576525853U,	// <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
-  1659229127U,	// <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
-  2689840081U,	// <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
-  1659228984U,	// <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
-  1652298720U,	// <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
-  1659229159U,	// <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
-  2626813952U,	// <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
-  1553072230U,	// <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
-  2626814116U,	// <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
-  3700556028U,	// <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
-  2626814290U,	// <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
-  2582507375U,	// <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
-  2588480072U,	// <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
-  2732971055U,	// <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
-  1553072797U,	// <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
-  2626814710U,	// <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
-  2626814772U,	// <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
-  2626814870U,	// <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
-  2625487854U,	// <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
-  2582514998U,	// <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
-  1553073296U,	// <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
-  2627478753U,	// <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
-  2727367810U,	// <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
-  1555064195U,	// <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
-  2588491878U,	// <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
-  3700557318U,	// <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
-  2626815592U,	// <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
-  2626815654U,	// <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
-  2588495158U,	// <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
-  2632787817U,	// <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
-  1559709626U,	// <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
-  2728031443U,	// <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
-  1561036892U,	// <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
-  2626816150U,	// <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
-  2626816268U,	// <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
-  2633451878U,	// <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
-  2626816412U,	// <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
-  2626816514U,	// <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
-  2638760514U,	// <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
-  2639424147U,	// <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
-  2826961920U,	// <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
-  2626816798U,	// <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
-  2582536294U,	// <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
-  2582537360U,	// <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
-  2588510138U,	// <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
-  3700558996U,	// <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
-  2582539574U,	// <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
-  1553075510U,	// <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
-  2588512844U,	// <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
-  2564625766U,	// <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
-  1553075753U,	// <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
-  2732971398U,	// <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
-  2626817744U,	// <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
-  3700559649U,	// <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
-  2626817903U,	// <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
-  2258728203U,	// <3,7,5,4>: Cost 3 vrev <7,3,4,5>
-  2732971446U,	// <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
-  2732971457U,	// <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
-  2826964278U,	// <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
-  2826964279U,	// <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
-  2732971478U,	// <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
-  2732971486U,	// <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
-  2633454074U,	// <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
-  2633454152U,	// <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
-  2732971518U,	// <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
-  2732971526U,	// <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
-  2732971537U,	// <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
-  2732971540U,	// <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
-  2726041124U,	// <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
-  2570616934U,	// <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
-  2570617856U,	// <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
-  2564646635U,	// <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
-  2570619332U,	// <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
-  2570620214U,	// <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
-  2582564726U,	// <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
-  2588537423U,	// <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
-  1659229804U,	// <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
-  1659229804U,	// <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
-  2626819795U,	// <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
-  1553078062U,	// <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
-  2626819973U,	// <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
-  2826961565U,	// <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
-  2626820159U,	// <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
-  1553078426U,	// <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
-  1595545808U,	// <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
-  1659229804U,	// <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
-  1553078629U,	// <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
-  1611448320U,	// <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
-  1611896531U,	// <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
-  1659672284U,	// <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
-  1616099045U,	// <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
-  2685638381U,	// <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
-  1663874806U,	// <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
-  1663874816U,	// <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
-  2960313672U,	// <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
-  1611896594U,	// <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
-  1549763324U,	// <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
-  1550426957U,	// <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
-  537712430U,	// <3,u,1,2>: Cost 1 vext3 LHS, LHS
-  1616541495U,	// <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
-  1490930998U,	// <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
-  1553081489U,	// <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
-  2627486946U,	// <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
-  1659230043U,	// <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
-  537712484U,	// <3,u,1,u>: Cost 1 vext3 LHS, LHS
-  1611890852U,	// <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
-  2624833102U,	// <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
-  1557063287U,	// <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
-  1616099205U,	// <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
-  1611890892U,	// <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
-  2689841054U,	// <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
-  1559717819U,	// <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
-  1659230124U,	// <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
-  1616541618U,	// <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
-  1611896764U,	// <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
-  1484973079U,	// <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
-  2685638607U,	// <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
-  336380006U,	// <3,u,3,3>: Cost 1 vdup3 LHS
-  1611896804U,	// <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
-  1616541679U,	// <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
-  2690283512U,	// <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
-  2959674696U,	// <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
-  336380006U,	// <3,u,3,u>: Cost 1 vdup3 LHS
-  2558722150U,	// <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
-  1659672602U,	// <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
-  1659672612U,	// <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
-  2689841196U,	// <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
-  1659227344U,	// <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
-  1611896895U,	// <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
-  1663875144U,	// <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
-  1659230289U,	// <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
-  1611896922U,	// <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
-  1490960486U,	// <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
-  2689841261U,	// <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
-  1490962162U,	// <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
-  1616541823U,	// <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
-  1490963766U,	// <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
-  1659228164U,	// <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
-  537712794U,	// <3,u,5,6>: Cost 1 vext3 LHS, RHS
-  1659230371U,	// <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
-  537712812U,	// <3,u,5,u>: Cost 1 vext3 LHS, RHS
-  2689841327U,	// <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
-  2558739482U,	// <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
-  2689841351U,	// <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
-  1616099536U,	// <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
-  1659227508U,	// <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
-  2690283746U,	// <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
-  1659228984U,	// <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
-  1659230445U,	// <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
-  1616099581U,	// <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
-  1485004902U,	// <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
-  1485005851U,	// <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
-  2558748264U,	// <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
-  3095397021U,	// <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
-  1485008182U,	// <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
-  1659228328U,	// <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
-  2722060599U,	// <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
-  1659229804U,	// <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
-  1485010734U,	// <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
-  1616099665U,	// <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
-  1611897179U,	// <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
-  537712997U,	// <3,u,u,2>: Cost 1 vext3 LHS, LHS
-  336380006U,	// <3,u,u,3>: Cost 1 vdup3 LHS
-  1616099705U,	// <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
-  1611897219U,	// <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
-  537713037U,	// <3,u,u,6>: Cost 1 vext3 LHS, RHS
-  1659230607U,	// <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
-  537713051U,	// <3,u,u,u>: Cost 1 vext3 LHS, LHS
-  2691907584U,	// <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
-  2691907594U,	// <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
-  2691907604U,	// <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
-  3709862144U,	// <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
-  2684682280U,	// <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
-  3694600633U,	// <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
-  3291431290U,	// <4,0,0,6>: Cost 4 vrev <0,4,6,0>
-  3668342067U,	// <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
-  2691907657U,	// <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
-  2570715238U,	// <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
-  2570716058U,	// <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
-  1618165862U,	// <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  2570717648U,	// <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
-  2570718518U,	// <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
-  2594607206U,	// <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
-  3662377563U,	// <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
-  2594608436U,	// <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
-  1618165916U,	// <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
-  2685714598U,	// <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
-  3759530159U,	// <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
-  2685862072U,	// <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
-  2631476937U,	// <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
-  2685714636U,	// <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
-  3765649622U,	// <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
-  2686157020U,	// <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
-  3668358453U,	// <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
-  2686304494U,	// <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
-  3632529510U,	// <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
-  2686451968U,	// <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
-  2686525705U,	// <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
-  3760341266U,	// <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
-  3632532790U,	// <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
-  3913254606U,	// <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
-  3705219740U,	// <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
-  3713845990U,	// <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
-  2686451968U,	// <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
-  2552823910U,	// <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
-  2691907922U,	// <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
-  2691907932U,	// <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
-  3626567830U,	// <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
-  2552827190U,	// <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
-  2631478582U,	// <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
-  3626570017U,	// <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
-  3668374839U,	// <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
-  2552829742U,	// <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
-  2558804070U,	// <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
-  1839644774U,	// <4,0,5,1>: Cost 2 vzipl RHS, LHS
-  2913386660U,	// <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
-  2570750420U,	// <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
-  2558807350U,	// <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
-  3987128750U,	// <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
-  3987128822U,	// <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
-  2594641208U,	// <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
-  1839645341U,	// <4,0,5,u>: Cost 2 vzipl RHS, LHS
-  2552840294U,	// <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
-  3047604234U,	// <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
-  1973862502U,	// <4,0,6,2>: Cost 2 vtrnl RHS, LHS
-  2570758613U,	// <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
-  2552843574U,	// <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
-  2217664887U,	// <4,0,6,5>: Cost 3 vrev <0,4,5,6>
-  3662418528U,	// <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
-  2658022257U,	// <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
-  1973862556U,	// <4,0,6,u>: Cost 2 vtrnl RHS, LHS
-  3731764218U,	// <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
-  3988324454U,	// <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
-  4122034278U,	// <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
-  3735082246U,	// <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
-  3731764536U,	// <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
-  3937145718U,	// <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
-  3737073145U,	// <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
-  3731764844U,	// <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
-  4122034332U,	// <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
-  2552856678U,	// <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
-  1841635430U,	// <4,0,u,1>: Cost 2 vzipl RHS, LHS
-  1618166429U,	// <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  2570774999U,	// <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
-  2552859958U,	// <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
-  2631481498U,	// <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
-  2686157020U,	// <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
-  2594665787U,	// <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
-  1618166483U,	// <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
-  2617548837U,	// <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
-  2622857318U,	// <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
-  3693281484U,	// <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
-  2691908342U,	// <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
-  2622857554U,	// <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
-  3764470538U,	// <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
-  3695272459U,	// <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
-  3733094980U,	// <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
-  2622857885U,	// <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
-  3696599798U,	// <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
-  2691097399U,	// <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
-  2631484314U,	// <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
-  2691908424U,	// <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
-  3696600125U,	// <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
-  3696600175U,	// <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
-  3696600307U,	// <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
-  3668423997U,	// <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
-  2691908469U,	// <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
-  2570797158U,	// <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
-  2570797978U,	// <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
-  3696600680U,	// <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
-  1618166682U,	// <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
-  2570800438U,	// <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
-  3765650347U,	// <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
-  3696601018U,	// <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
-  3668432190U,	// <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
-  1618535367U,	// <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
-  2564833382U,	// <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
-  2691908568U,	// <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
-  2691908578U,	// <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
-  2692572139U,	// <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
-  2564836662U,	// <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
-  2691908608U,	// <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
-  2588725862U,	// <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
-  3662468090U,	// <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
-  2691908631U,	// <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
-  3760194590U,	// <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
-  3693947874U,	// <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
-  3765650484U,	// <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
-  3113877606U,	// <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
-  3760194630U,	// <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
-  2622860598U,	// <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
-  3297436759U,	// <4,1,4,6>: Cost 4 vrev <1,4,6,4>
-  3800007772U,	// <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
-  2622860841U,	// <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
-  1479164006U,	// <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
-  2552906486U,	// <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
-  2552907299U,	// <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
-  2552907926U,	// <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
-  1479167286U,	// <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
-  2913387664U,	// <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
-  2600686074U,	// <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
-  2600686586U,	// <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
-  1479169838U,	// <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
-  2552914022U,	// <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
-  2558886708U,	// <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
-  4028205206U,	// <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
-  3089858662U,	// <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
-  2552917302U,	// <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
-  2223637584U,	// <4,1,6,5>: Cost 3 vrev <1,4,5,6>
-  4121347081U,	// <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
-  3721155406U,	// <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
-  2552919854U,	// <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
-  2659357716U,	// <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
-  3733763173U,	// <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
-  3734426806U,	// <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
-  2695226671U,	// <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
-  3721155942U,	// <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
-  3721155976U,	// <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
-  3662500458U,	// <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
-  3721156204U,	// <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
-  2659357716U,	// <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
-  1479188582U,	// <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
-  2552931062U,	// <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
-  2552931944U,	// <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
-  1622148480U,	// <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
-  1479191862U,	// <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
-  2622863514U,	// <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
-  2588725862U,	// <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
-  2600686586U,	// <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
-  1479194414U,	// <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
-  2617557030U,	// <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
-  2622865510U,	// <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
-  2622865612U,	// <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
-  3693289753U,	// <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
-  2635473244U,	// <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
-  3765650918U,	// <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
-  2696775148U,	// <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
-  3695944285U,	// <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
-  2622866077U,	// <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
-  3696607990U,	// <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
-  3696608052U,	// <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
-  3696608150U,	// <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
-  3895574630U,	// <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
-  2691909162U,	// <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
-  3696608400U,	// <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
-  3760784956U,	// <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
-  3773908549U,	// <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
-  2691909162U,	// <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
-  3696608748U,	// <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
-  3696608828U,	// <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
-  2691909224U,	// <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
-  2691909234U,	// <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
-  3759605368U,	// <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
-  3696609156U,	// <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
-  3760785040U,	// <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
-  3668505927U,	// <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
-  2691909279U,	// <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
-  2691909286U,	// <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
-  3764840111U,	// <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
-  3765651129U,	// <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
-  2698544836U,	// <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
-  2685863630U,	// <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
-  2698692310U,	// <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
-  3772507871U,	// <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
-  2698839784U,	// <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
-  2691909358U,	// <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
-  2564915302U,	// <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
-  2564916122U,	// <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
-  2564917004U,	// <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
-  2699208469U,	// <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
-  2564918582U,	// <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
-  2622868790U,	// <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
-  2229667632U,	// <4,2,4,6>: Cost 3 vrev <2,4,6,4>
-  3800082229U,	// <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
-  2622869033U,	// <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
-  2552979558U,	// <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
-  2558952342U,	// <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
-  2564925032U,	// <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
-  2967060582U,	// <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
-  2552982838U,	// <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
-  3987130190U,	// <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
-  2913388474U,	// <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
-  3895577910U,	// <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
-  2552985390U,	// <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
-  1479245926U,	// <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
-  2552988406U,	// <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
-  2552989288U,	// <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
-  2954461286U,	// <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
-  1479249206U,	// <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
-  2229610281U,	// <4,2,6,5>: Cost 3 vrev <2,4,5,6>
-  2600767994U,	// <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
-  2600768506U,	// <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
-  1479251758U,	// <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
-  2659365909U,	// <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
-  3733771366U,	// <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
-  3734434999U,	// <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
-  2701199368U,	// <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
-  4175774618U,	// <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
-  3303360298U,	// <4,2,7,5>: Cost 4 vrev <2,4,5,7>
-  3727136217U,	// <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
-  3727136364U,	// <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
-  2659365909U,	// <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
-  1479262310U,	// <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
-  2553004790U,	// <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
-  2553005672U,	// <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
-  2954477670U,	// <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
-  1479265590U,	// <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
-  2622871706U,	// <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
-  2229700404U,	// <4,2,u,6>: Cost 3 vrev <2,4,6,u>
-  2600784890U,	// <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
-  1479268142U,	// <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
-  3765651595U,	// <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
-  2691909782U,	// <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
-  2702452897U,	// <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
-  3693297946U,	// <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
-  3760711856U,	// <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
-  2235533820U,	// <4,3,0,5>: Cost 3 vrev <3,4,5,0>
-  3309349381U,	// <4,3,0,6>: Cost 4 vrev <3,4,6,0>
-  3668563278U,	// <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
-  2691909845U,	// <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
-  2235173328U,	// <4,3,1,0>: Cost 3 vrev <3,4,0,1>
-  3764840678U,	// <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
-  2630173594U,	// <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
-  2703190267U,	// <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
-  3760195840U,	// <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
-  3765651724U,	// <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
-  3309357574U,	// <4,3,1,6>: Cost 4 vrev <3,4,6,1>
-  3769633054U,	// <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
-  2703558952U,	// <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
-  3626770534U,	// <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
-  2630174250U,	// <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
-  3765651777U,	// <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
-  2703853900U,	// <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
-  3626773814U,	// <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
-  2704001374U,	// <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
-  3765651814U,	// <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
-  3769633135U,	// <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
-  2634819681U,	// <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
-  3765651839U,	// <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
-  3765651848U,	// <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
-  3710552404U,	// <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
-  2691910044U,	// <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
-  2704591270U,	// <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
-  3769633202U,	// <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
-  3703917212U,	// <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
-  3769633220U,	// <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
-  2691910044U,	// <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
-  2691910096U,	// <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
-  2691910106U,	// <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
-  2564990741U,	// <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
-  3765651946U,	// <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
-  2691910136U,	// <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
-  2686454274U,	// <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
-  2235640329U,	// <4,3,4,6>: Cost 3 vrev <3,4,6,4>
-  3801483792U,	// <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
-  2691910168U,	// <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
-  2559025254U,	// <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
-  2559026237U,	// <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
-  2564998862U,	// <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
-  2570971548U,	// <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
-  2559028534U,	// <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
-  4163519477U,	// <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
-  3309390346U,	// <4,3,5,6>: Cost 4 vrev <3,4,6,5>
-  2706139747U,	// <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
-  2559031086U,	// <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
-  2559033446U,	// <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
-  2559034430U,	// <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
-  2565007127U,	// <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
-  2570979740U,	// <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
-  2559036726U,	// <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
-  1161841154U,	// <4,3,6,5>: Cost 2 vrev <3,4,5,6>
-  4028203932U,	// <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
-  2706803380U,	// <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
-  1162062365U,	// <4,3,6,u>: Cost 2 vrev <3,4,u,6>
-  3769633475U,	// <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
-  3769633488U,	// <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
-  3638757144U,	// <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
-  3769633508U,	// <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
-  3769633515U,	// <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
-  3769633526U,	// <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
-  3662647932U,	// <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
-  3781208837U,	// <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
-  3769633547U,	// <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
-  2559049830U,	// <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
-  2691910430U,	// <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
-  2565023513U,	// <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
-  2707835698U,	// <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
-  2559053110U,	// <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
-  1161857540U,	// <4,3,u,5>: Cost 2 vrev <3,4,5,u>
-  2235673101U,	// <4,3,u,6>: Cost 3 vrev <3,4,6,u>
-  2708130646U,	// <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
-  1162078751U,	// <4,3,u,u>: Cost 2 vrev <3,4,u,u>
-  2617573416U,	// <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
-  1570373734U,	// <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
-  2779676774U,	// <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
-  3760196480U,	// <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
-  2576977100U,	// <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
-  2718747538U,	// <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
-  2718747548U,	// <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
-  3668637015U,	// <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
-  1570374301U,	// <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
-  2644116214U,	// <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
-  2644116276U,	// <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
-  2691910602U,	// <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
-  2644116440U,	// <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
-  2711227356U,	// <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
-  2709310438U,	// <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
-  3765652462U,	// <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
-  3768970231U,	// <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
-  2695891968U,	// <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
-  3703260634U,	// <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
-  3765652499U,	// <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
-  2644117096U,	// <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
-  2631509709U,	// <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
-  2644117269U,	// <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
-  3705251698U,	// <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
-  2710047808U,	// <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
-  3783863369U,	// <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
-  2634827874U,	// <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
-  2644117654U,	// <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
-  3638797210U,	// <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
-  3638798082U,	// <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
-  2637482406U,	// <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
-  2638146039U,	// <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
-  3913287374U,	// <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
-  3765652625U,	// <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
-  3713878762U,	// <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
-  2637482406U,	// <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
-  1503264870U,	// <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
-  2577007514U,	// <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
-  2577008232U,	// <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
-  2571037175U,	// <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
-  161926454U,	// <4,4,4,4>: Cost 1 vdup0 RHS
-  1570377014U,	// <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
-  2779680054U,	// <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
-  2594927963U,	// <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
-  161926454U,	// <4,4,4,u>: Cost 1 vdup0 RHS
-  2571042918U,	// <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
-  2571043738U,	// <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
-  3638814495U,	// <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
-  2571045368U,	// <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
-  2571046198U,	// <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
-  1839648054U,	// <4,4,5,5>: Cost 2 vzipl RHS, RHS
-  1618169142U,	// <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  2594936156U,	// <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
-  1618169160U,	// <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
-  2553135206U,	// <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
-  3626877686U,	// <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
-  2565080782U,	// <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
-  2571053561U,	// <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
-  2553138486U,	// <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
-  2241555675U,	// <4,4,6,5>: Cost 3 vrev <4,4,5,6>
-  1973865782U,	// <4,4,6,6>: Cost 2 vtrnl RHS, RHS
-  2658055029U,	// <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
-  1973865800U,	// <4,4,6,u>: Cost 2 vtrnl RHS, RHS
-  2644120570U,	// <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
-  3638829978U,	// <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
-  3638830881U,	// <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
-  3735115018U,	// <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
-  2662036827U,	// <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
-  2713292236U,	// <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
-  2713365973U,	// <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
-  2644121196U,	// <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
-  2662036827U,	// <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
-  1503297638U,	// <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
-  1570379566U,	// <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
-  2779682606U,	// <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
-  2571069947U,	// <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
-  161926454U,	// <4,4,u,4>: Cost 1 vdup0 RHS
-  1841638710U,	// <4,4,u,5>: Cost 2 vzipl RHS, RHS
-  1618169385U,	// <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  2594960735U,	// <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
-  161926454U,	// <4,4,u,u>: Cost 1 vdup0 RHS
-  2631516160U,	// <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
-  1557774438U,	// <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
-  2618908875U,	// <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
-  2571078140U,	// <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
-  2626871634U,	// <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
-  3705258414U,	// <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
-  2594968438U,	// <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
-  2594968928U,	// <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
-  1557775005U,	// <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
-  2631516918U,	// <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
-  2624217939U,	// <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
-  2631517078U,	// <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
-  2821341286U,	// <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
-  3895086054U,	// <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
-  2626872471U,	// <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
-  3895083131U,	// <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
-  2718748368U,	// <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
-  2821341291U,	// <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
-  2571092070U,	// <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
-  3699287585U,	// <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
-  2630854269U,	// <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
-  1557776078U,	// <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
-  2631517974U,	// <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
-  3692652384U,	// <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
-  2631518138U,	// <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
-  4164013366U,	// <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
-  1561094243U,	// <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
-  2631518358U,	// <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
-  3895084710U,	// <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
-  2631518540U,	// <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
-  2631518620U,	// <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
-  2631518716U,	// <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
-  2631518784U,	// <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
-  2658060980U,	// <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
-  2640145131U,	// <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
-  2631519006U,	// <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
-  2571108454U,	// <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
-  3632907342U,	// <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
-  2571110094U,	// <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
-  2571110912U,	// <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
-  2571111734U,	// <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
-  1557777718U,	// <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
-  2645454195U,	// <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
-  2718748614U,	// <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
-  1557777961U,	// <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
-  1503346790U,	// <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
-  2913398480U,	// <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
-  2631519998U,	// <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
-  2577090710U,	// <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
-  1503349978U,	// <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
-  2631520260U,	// <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
-  2913390690U,	// <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
-  2821344566U,	// <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
-  1503352622U,	// <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
-  1497383014U,	// <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
-  2559181904U,	// <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
-  2565154601U,	// <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
-  1497385474U,	// <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
-  1497386294U,	// <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
-  3047608324U,	// <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
-  2571129656U,	// <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
-  27705344U,	// <4,5,6,7>: Cost 0 copy RHS
-  27705344U,	// <4,5,6,u>: Cost 0 copy RHS
-  2565161062U,	// <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
-  2565161882U,	// <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
-  2565162794U,	// <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
-  2661381387U,	// <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
-  2565164342U,	// <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
-  2718748840U,	// <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
-  2718748846U,	// <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
-  2719412407U,	// <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
-  2565166894U,	// <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
-  1497399398U,	// <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
-  1557780270U,	// <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
-  2631522181U,	// <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
-  1497401860U,	// <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
-  1497402678U,	// <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
-  1557780634U,	// <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
-  2631522512U,	// <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
-  27705344U,	// <4,5,u,7>: Cost 0 copy RHS
-  27705344U,	// <4,5,u,u>: Cost 0 copy RHS
-  2618916864U,	// <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
-  1545175142U,	// <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
-  1545175244U,	// <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
-  3692658940U,	// <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
-  2618917202U,	// <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
-  3852910806U,	// <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
-  2253525648U,	// <4,6,0,6>: Cost 3 vrev <6,4,6,0>
-  4040764726U,	// <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
-  1545175709U,	// <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
-  2618917622U,	// <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
-  2618917684U,	// <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
-  2618917782U,	// <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
-  2618917848U,	// <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
-  3692659773U,	// <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
-  2618918032U,	// <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
-  3692659937U,	// <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
-  4032146742U,	// <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
-  2618918253U,	// <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
-  2618918380U,	// <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
-  2618918460U,	// <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
-  2618918504U,	// <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
-  2618918566U,	// <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
-  2618918679U,	// <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
-  2618918788U,	// <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
-  2618918842U,	// <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
-  2718749178U,	// <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
-  2618918971U,	// <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
-  2618919062U,	// <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
-  2636171526U,	// <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
-  3692661057U,	// <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
-  2618919324U,	// <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
-  2618919426U,	// <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
-  2638826058U,	// <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
-  3913303030U,	// <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
-  2722730572U,	// <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
-  2618919710U,	// <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
-  2565210214U,	// <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
-  2718749286U,	// <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
-  2565211952U,	// <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
-  2571184649U,	// <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
-  2565213494U,	// <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
-  1545178422U,	// <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
-  1705430326U,	// <4,6,4,6>: Cost 2 vuzpl RHS, RHS
-  2595075437U,	// <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
-  1545178665U,	// <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
-  2565218406U,	// <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
-  2645462736U,	// <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
-  2913399290U,	// <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
-  3913305394U,	// <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
-  2645462982U,	// <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
-  2779172868U,	// <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
-  2913391416U,	// <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
-  2821426486U,	// <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
-  2821426487U,	// <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
-  1503428710U,	// <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
-  2577171190U,	// <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
-  2645463546U,	// <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
-  2577172630U,	// <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
-  1503431908U,	// <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
-  2253501069U,	// <4,6,6,5>: Cost 3 vrev <6,4,5,6>
-  2618921784U,	// <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
-  2954464566U,	// <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
-  1503434542U,	// <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
-  2645464058U,	// <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
-  2779173882U,	// <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
-  3638978355U,	// <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
-  2725090156U,	// <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
-  2645464422U,	// <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
-  2779174246U,	// <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
-  3852915914U,	// <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
-  2779174508U,	// <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
-  2779173945U,	// <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
-  1503445094U,	// <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
-  1545180974U,	// <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
-  1705432878U,	// <4,6,u,2>: Cost 2 vuzpl RHS, LHS
-  2618922940U,	// <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
-  1503448294U,	// <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
-  1545181338U,	// <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
-  1705433242U,	// <4,6,u,6>: Cost 2 vuzpl RHS, RHS
-  2954480950U,	// <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
-  1545181541U,	// <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
-  3706601472U,	// <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
-  2632859750U,	// <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
-  2726343685U,	// <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
-  3701293312U,	// <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
-  3706601810U,	// <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
-  2259424608U,	// <4,7,0,5>: Cost 3 vrev <7,4,5,0>
-  3695321617U,	// <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
-  3800454194U,	// <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
-  2632860317U,	// <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
-  2259064116U,	// <4,7,1,0>: Cost 3 vrev <7,4,0,1>
-  3700630324U,	// <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
-  2632860570U,	// <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
-  3769635936U,	// <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
-  3656920374U,	// <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
-  3700630681U,	// <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
-  3701294314U,	// <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
-  3793818754U,	// <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
-  2259654012U,	// <4,7,1,u>: Cost 3 vrev <7,4,u,1>
-  3656925286U,	// <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
-  3706603050U,	// <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
-  3706603112U,	// <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
-  2727744688U,	// <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
-  3705939745U,	// <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
-  2632861554U,	// <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
-  3706603450U,	// <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
-  3792491731U,	// <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
-  2634852453U,	// <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
-  3706603670U,	// <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
-  3662906266U,	// <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
-  3725183326U,	// <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
-  3706603932U,	// <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
-  3701295618U,	// <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
-  2638834251U,	// <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
-  2639497884U,	// <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
-  3802445093U,	// <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
-  2640825150U,	// <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
-  2718750004U,	// <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
-  3706604490U,	// <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
-  3656943474U,	// <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
-  3779884371U,	// <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
-  2259383643U,	// <4,7,4,4>: Cost 3 vrev <7,4,4,4>
-  2632863030U,	// <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
-  2259531117U,	// <4,7,4,6>: Cost 3 vrev <7,4,6,4>
-  3907340074U,	// <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
-  2632863273U,	// <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
-  2913391610U,	// <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
-  3645006848U,	// <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
-  2589181646U,	// <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
-  3645008403U,	// <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
-  2913391974U,	// <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
-  2583211973U,	// <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
-  2589184670U,	// <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
-  2913392236U,	// <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
-  2913392258U,	// <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
-  1509474406U,	// <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
-  3047609338U,	// <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
-  2583217768U,	// <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
-  2583218326U,	// <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
-  1509477686U,	// <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
-  1509478342U,	// <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
-  2583220730U,	// <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
-  3047609964U,	// <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
-  1509480238U,	// <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
-  3650994278U,	// <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
-  3650995098U,	// <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
-  3650996010U,	// <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
-  3804804677U,	// <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
-  3650997486U,	// <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
-  2662725039U,	// <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
-  3662942880U,	// <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
-  2718750316U,	// <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
-  2664715938U,	// <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
-  1509490790U,	// <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
-  2632865582U,	// <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
-  2583234152U,	// <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
-  2583234710U,	// <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
-  1509494070U,	// <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
-  1509494728U,	// <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
-  2583237114U,	// <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
-  3047757420U,	// <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
-  1509496622U,	// <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
-  2618933248U,	// <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
-  1545191526U,	// <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
-  1545191630U,	// <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
-  2691913445U,	// <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
-  2618933586U,	// <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
-  2265397305U,	// <4,u,0,5>: Cost 3 vrev <u,4,5,0>
-  2595189625U,	// <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
-  2595190139U,	// <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
-  1545192093U,	// <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
-  2618934006U,	// <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
-  2618934068U,	// <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
-  1618171694U,	// <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  2618934232U,	// <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
-  2695894848U,	// <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
-  2618934416U,	// <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
-  3692676321U,	// <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
-  2718750555U,	// <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
-  1618171748U,	// <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
-  2553397350U,	// <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
-  2630215215U,	// <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
-  2618934888U,	// <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
-  1557800657U,	// <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
-  2618935065U,	// <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
-  2733864859U,	// <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
-  2618935226U,	// <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
-  2718750636U,	// <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
-  1561118822U,	// <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
-  2618935446U,	// <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
-  2779318422U,	// <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
-  2636851545U,	// <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
-  2618935708U,	// <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
-  2618935810U,	// <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
-  2691913711U,	// <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
-  2588725862U,	// <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
-  2640169710U,	// <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
-  2618936094U,	// <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
-  1503559782U,	// <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
-  2692282391U,	// <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
-  2565359426U,	// <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
-  2571332123U,	// <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
-  161926454U,	// <4,u,4,4>: Cost 1 vdup0 RHS
-  1545194806U,	// <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
-  1705577782U,	// <4,u,4,6>: Cost 2 vuzpl RHS, RHS
-  2718750801U,	// <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
-  161926454U,	// <4,u,4,u>: Cost 1 vdup0 RHS
-  1479164006U,	// <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
-  1839650606U,	// <4,u,5,1>: Cost 2 vzipl RHS, LHS
-  2565367502U,	// <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
-  3089777309U,	// <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
-  1479167286U,	// <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
-  1839650970U,	// <4,u,5,5>: Cost 2 vzipl RHS, RHS
-  1618172058U,	// <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  3089780265U,	// <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
-  1618172076U,	// <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
-  1479688294U,	// <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
-  2553430774U,	// <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
-  1973868334U,	// <4,u,6,2>: Cost 2 vtrnl RHS, LHS
-  1497606685U,	// <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
-  1479691574U,	// <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
-  1509552079U,	// <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
-  1973868698U,	// <4,u,6,6>: Cost 2 vtrnl RHS, RHS
-  27705344U,	// <4,u,6,7>: Cost 0 copy RHS
-  27705344U,	// <4,u,6,u>: Cost 0 copy RHS
-  2565382246U,	// <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
-  2565383066U,	// <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
-  2565384005U,	// <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
-  2661405966U,	// <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
-  2565385526U,	// <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
-  2779321702U,	// <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
-  2589274793U,	// <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
-  2779321964U,	// <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
-  2565388078U,	// <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
-  1479704678U,	// <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
-  1545197358U,	// <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
-  1618172261U,	// <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  1497623071U,	// <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
-  161926454U,	// <4,u,u,4>: Cost 1 vdup0 RHS
-  1545197722U,	// <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
-  1618172301U,	// <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  27705344U,	// <4,u,u,7>: Cost 0 copy RHS
-  27705344U,	// <4,u,u,u>: Cost 0 copy RHS
-  2687123456U,	// <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
-  2687123466U,	// <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
-  2687123476U,	// <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
-  3710599434U,	// <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
-  2642166098U,	// <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
-  3657060306U,	// <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
-  3292094923U,	// <5,0,0,6>: Cost 4 vrev <0,5,6,0>
-  3669005700U,	// <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
-  2687123530U,	// <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
-  2559434854U,	// <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
-  2559435887U,	// <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
-  1613381734U,	// <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  3698656256U,	// <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
-  2559438134U,	// <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
-  2583326675U,	// <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
-  3715908851U,	// <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
-  3657069562U,	// <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
-  1613381788U,	// <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
-  2686017700U,	// <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
-  2685796528U,	// <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
-  2698625208U,	// <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
-  2685944002U,	// <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
-  2686017739U,	// <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
-  2686091476U,	// <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
-  2725167324U,	// <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
-  2595280230U,	// <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
-  2686312687U,	// <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
-  3760128248U,	// <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
-  3759685888U,	// <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
-  2686533898U,	// <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
-  3760349459U,	// <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
-  2638187004U,	// <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
-  3776348452U,	// <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
-  3713256094U,	// <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
-  3914064896U,	// <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
-  2686976320U,	// <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
-  2559459430U,	// <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
-  1613381970U,	// <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
-  2687123804U,	// <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
-  3761013092U,	// <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
-  2559462710U,	// <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
-  2638187830U,	// <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
-  3761234303U,	// <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
-  2646150600U,	// <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
-  1613381970U,	// <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
-  3766763926U,	// <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
-  2919268454U,	// <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
-  3053486182U,	// <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
-  3723210589U,	// <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
-  3766763966U,	// <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
-  2650796031U,	// <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
-  3719893090U,	// <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
-  3914067254U,	// <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
-  2919269021U,	// <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
-  4047519744U,	// <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
-  2920038502U,	// <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
-  3759759871U,	// <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
-  3645164070U,	// <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
-  3762414095U,	// <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
-  3993780690U,	// <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
-  3719893816U,	// <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
-  2662077302U,	// <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
-  2920039069U,	// <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
-  2565455974U,	// <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
-  2565456790U,	// <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
-  2565457742U,	// <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
-  3639199894U,	// <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
-  2565459254U,	// <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
-  2589347938U,	// <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
-  2589348530U,	// <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
-  4188456422U,	// <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
-  2565461806U,	// <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
-  2687124106U,	// <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
-  1616036502U,	// <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
-  1613382301U,	// <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  2689925800U,	// <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
-  2687124146U,	// <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
-  2638190746U,	// <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
-  2589356723U,	// <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
-  2595280230U,	// <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
-  1613382355U,	// <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
-  2646818816U,	// <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
-  1573077094U,	// <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
-  2646818980U,	// <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
-  2687124214U,	// <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
-  2641510738U,	// <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
-  2641510814U,	// <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
-  3720561142U,	// <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
-  3298141357U,	// <5,1,0,7>: Cost 4 vrev <1,5,7,0>
-  1573077661U,	// <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
-  2223891567U,	// <5,1,1,0>: Cost 3 vrev <1,5,0,1>
-  2687124276U,	// <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
-  2646819734U,	// <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
-  2687124296U,	// <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
-  2691326803U,	// <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
-  2691400540U,	// <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
-  3765216101U,	// <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
-  3765289838U,	// <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
-  2687124341U,	// <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
-  3297641584U,	// <5,1,2,0>: Cost 4 vrev <1,5,0,2>
-  3763520391U,	// <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
-  2646820456U,	// <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
-  2687124374U,	// <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
-  2691990436U,	// <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
-  2687124395U,	// <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
-  2646820794U,	// <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
-  3808199610U,	// <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
-  2687124419U,	// <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
-  2577440870U,	// <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
-  2687124440U,	// <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
-  3759686627U,	// <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
-  2692580332U,	// <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
-  2687124469U,	// <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
-  2685207552U,	// <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
-  3760866313U,	// <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
-  2692875280U,	// <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
-  2687124503U,	// <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
-  1567771538U,	// <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
-  2693096491U,	// <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
-  2693170228U,	// <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
-  2687124541U,	// <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
-  2646822096U,	// <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
-  1573080374U,	// <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
-  2646822260U,	// <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
-  3298174129U,	// <5,1,4,7>: Cost 4 vrev <1,5,7,4>
-  1573080602U,	// <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
-  2687124591U,	// <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
-  2646822543U,	// <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
-  3760866433U,	// <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
-  2687124624U,	// <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
-  2687124631U,	// <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
-  2646822916U,	// <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
-  2646823010U,	// <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
-  2646823080U,	// <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
-  2687124663U,	// <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
-  2553577574U,	// <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
-  3763520719U,	// <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
-  2646823418U,	// <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
-  3760866529U,	// <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
-  2553580854U,	// <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
-  2687124723U,	// <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
-  2646823736U,	// <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
-  2646823758U,	// <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
-  2646823839U,	// <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
-  2559557734U,	// <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
-  2559558452U,	// <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
-  2571503270U,	// <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
-  2040971366U,	// <5,1,7,3>: Cost 2 vtrnr RHS, LHS
-  2559561014U,	// <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
-  2595393232U,	// <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
-  4188455035U,	// <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
-  2646824556U,	// <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
-  2040971371U,	// <5,1,7,u>: Cost 2 vtrnr RHS, LHS
-  1591662326U,	// <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
-  1573082926U,	// <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
-  2695824760U,	// <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
-  2040979558U,	// <5,1,u,3>: Cost 2 vtrnr RHS, LHS
-  2687124874U,	// <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
-  1573083290U,	// <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
-  2646825168U,	// <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
-  2646825216U,	// <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
-  2040979563U,	// <5,1,u,u>: Cost 2 vtrnr RHS, LHS
-  3702652928U,	// <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
-  2628911206U,	// <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
-  2641518756U,	// <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
-  3759760847U,	// <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
-  3760866775U,	// <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
-  3759539680U,	// <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
-  3760866796U,	// <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
-  3304114054U,	// <5,2,0,7>: Cost 4 vrev <2,5,7,0>
-  2628911773U,	// <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
-  2623603464U,	// <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
-  3698008921U,	// <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
-  3633325603U,	// <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
-  2687125027U,	// <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
-  3633327414U,	// <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
-  3759539760U,	// <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
-  3760866876U,	// <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
-  3304122247U,	// <5,2,1,7>: Cost 4 vrev <2,5,7,1>
-  2687125072U,	// <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
-  3633332326U,	// <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
-  3759760992U,	// <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
-  2687125096U,	// <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
-  2687125106U,	// <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
-  2697963133U,	// <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
-  3759466120U,	// <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
-  3760866960U,	// <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
-  3771926168U,	// <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
-  2687125151U,	// <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
-  2687125158U,	// <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
-  2698405555U,	// <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
-  2577516238U,	// <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
-  3759687365U,	// <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
-  1624884942U,	// <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
-  2698700503U,	// <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
-  3772368608U,	// <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
-  3702655716U,	// <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
-  1625179890U,	// <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
-  2641521555U,	// <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
-  3772368642U,	// <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
-  2699142925U,	// <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
-  2698626838U,	// <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
-  2698626848U,	// <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
-  2628914486U,	// <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
-  2645503353U,	// <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
-  3304146826U,	// <5,2,4,7>: Cost 4 vrev <2,5,7,4>
-  2628914729U,	// <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
-  2553643110U,	// <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
-  3758950227U,	// <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
-  3759761248U,	// <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
-  2982396006U,	// <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
-  2553646390U,	// <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
-  2553647108U,	// <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
-  3760867204U,	// <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
-  3702657141U,	// <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
-  2982396011U,	// <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
-  3627393126U,	// <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
-  3760867236U,	// <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
-  2645504506U,	// <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
-  2687125434U,	// <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
-  2700617665U,	// <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
-  3760867276U,	// <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
-  3763521493U,	// <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
-  3719246670U,	// <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
-  2687125479U,	// <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
-  2565603430U,	// <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
-  2553660150U,	// <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
-  2565605216U,	// <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
-  2961178726U,	// <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
-  2565606710U,	// <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
-  4034920552U,	// <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
-  3114713292U,	// <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
-  3702658668U,	// <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
-  2961178731U,	// <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
-  2687125563U,	// <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
-  2628917038U,	// <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
-  2565613409U,	// <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
-  2687125592U,	// <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
-  1628203107U,	// <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
-  2628917402U,	// <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
-  2702092405U,	// <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
-  3304179598U,	// <5,2,u,7>: Cost 4 vrev <2,5,7,u>
-  1628498055U,	// <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
-  3760867467U,	// <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
-  2687125654U,	// <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
-  3759761565U,	// <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
-  3633391766U,	// <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
-  2687125680U,	// <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
-  3760277690U,	// <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
-  3310013014U,	// <5,3,0,6>: Cost 4 vrev <3,5,6,0>
-  2236344927U,	// <5,3,0,7>: Cost 3 vrev <3,5,7,0>
-  2687125717U,	// <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
-  3760867551U,	// <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
-  3760867558U,	// <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
-  2624938923U,	// <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
-  2703198460U,	// <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
-  3760867587U,	// <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
-  2636219536U,	// <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
-  3698681075U,	// <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
-  2703493408U,	// <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
-  2628920721U,	// <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
-  3766765870U,	// <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
-  3698681379U,	// <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
-  3760867649U,	// <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
-  2698627404U,	// <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
-  2703935830U,	// <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
-  2698627422U,	// <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
-  3760867686U,	// <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
-  3769788783U,	// <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
-  2701945209U,	// <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
-  3760867711U,	// <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
-  2636220684U,	// <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
-  3772369298U,	// <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
-  2687125916U,	// <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
-  2704599463U,	// <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
-  2704673200U,	// <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
-  3709962935U,	// <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
-  3772369346U,	// <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
-  2704894411U,	// <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
-  2704968148U,	// <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
-  3698682850U,	// <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
-  2642857014U,	// <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
-  2705189359U,	// <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
-  2705263096U,	// <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
-  2685946370U,	// <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
-  3779152394U,	// <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
-  2236377699U,	// <5,3,4,7>: Cost 3 vrev <3,5,7,4>
-  2687126045U,	// <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
-  2571632742U,	// <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
-  2559689870U,	// <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
-  2571634382U,	// <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
-  2571635264U,	// <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
-  2571636022U,	// <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
-  2559692804U,	// <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
-  3720581218U,	// <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
-  2236385892U,	// <5,3,5,7>: Cost 3 vrev <3,5,7,5>
-  2571638574U,	// <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
-  2565668966U,	// <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
-  3633439887U,	// <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
-  2565670760U,	// <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
-  2565671426U,	// <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
-  2565672246U,	// <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
-  3639414630U,	// <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
-  4047521640U,	// <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
-  2725169844U,	// <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
-  2565674798U,	// <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
-  1485963366U,	// <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
-  1485964432U,	// <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
-  2559706728U,	// <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
-  2559707286U,	// <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
-  1485966646U,	// <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
-  2559708880U,	// <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
-  2601513466U,	// <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
-  3114714112U,	// <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
-  1485969198U,	// <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
-  1485971558U,	// <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
-  1485972625U,	// <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
-  2559714920U,	// <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
-  2559715478U,	// <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
-  1485974838U,	// <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
-  2687126342U,	// <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
-  2601521658U,	// <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
-  2236410471U,	// <5,3,u,7>: Cost 3 vrev <3,5,7,u>
-  1485977390U,	// <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
-  3627491430U,	// <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
-  2636890214U,	// <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
-  3703333028U,	// <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
-  3782249348U,	// <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
-  2642198866U,	// <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
-  2687126418U,	// <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
-  2242243887U,	// <5,4,0,6>: Cost 3 vrev <4,5,6,0>
-  3316059448U,	// <5,4,0,7>: Cost 4 vrev <4,5,7,0>
-  2636890781U,	// <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
-  2241809658U,	// <5,4,1,0>: Cost 3 vrev <4,5,0,1>
-  3698025307U,	// <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
-  3698688940U,	// <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
-  3698689024U,	// <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
-  3700016206U,	// <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
-  2687126498U,	// <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
-  3760868336U,	// <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
-  3316067641U,	// <5,4,1,7>: Cost 4 vrev <4,5,7,1>
-  2242399554U,	// <5,4,1,u>: Cost 3 vrev <4,5,u,1>
-  3703334371U,	// <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
-  3703998004U,	// <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
-  3704661637U,	// <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
-  2636891854U,	// <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
-  3705988903U,	// <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
-  2698628150U,	// <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
-  3760868415U,	// <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
-  3783871562U,	// <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
-  2666752099U,	// <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
-  3639459942U,	// <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
-  3709970701U,	// <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
-  2636892510U,	// <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
-  3710634396U,	// <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
-  2638219776U,	// <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
-  3766987908U,	// <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
-  2710719634U,	// <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
-  3914097664U,	// <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
-  2640874308U,	// <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
-  2583642214U,	// <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
-  2642201574U,	// <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
-  3710635062U,	// <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
-  3717270664U,	// <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
-  2713963728U,	// <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
-  1637567706U,	// <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
-  2242276659U,	// <5,4,4,6>: Cost 3 vrev <4,5,6,4>
-  2646183372U,	// <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
-  1637788917U,	// <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
-  2559762534U,	// <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
-  2559763607U,	// <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
-  2698628366U,	// <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
-  3633506454U,	// <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
-  2559765814U,	// <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
-  2583654395U,	// <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
-  1613385014U,	// <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
-  3901639990U,	// <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
-  1613385032U,	// <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
-  2559770726U,	// <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
-  2559771648U,	// <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
-  3633514088U,	// <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
-  2571717122U,	// <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
-  2559774006U,	// <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
-  2712636796U,	// <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
-  3760868743U,	// <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
-  2712784270U,	// <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
-  2559776558U,	// <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
-  2565750886U,	// <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
-  2565751706U,	// <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
-  2565752690U,	// <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
-  2571725387U,	// <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
-  2565754166U,	// <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
-  3114713426U,	// <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
-  94817590U,	// <5,4,7,6>: Cost 1 vrev RHS
-  2595616175U,	// <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
-  94965064U,	// <5,4,7,u>: Cost 1 vrev RHS
-  2559787110U,	// <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
-  2559788186U,	// <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
-  2242014483U,	// <5,4,u,2>: Cost 3 vrev <4,5,2,u>
-  2667419628U,	// <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
-  2559790390U,	// <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
-  1640222238U,	// <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
-  94825783U,	// <5,4,u,6>: Cost 1 vrev RHS
-  2714111536U,	// <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
-  94973257U,	// <5,4,u,u>: Cost 1 vrev RHS
-  2646851584U,	// <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
-  1573109862U,	// <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
-  2646851748U,	// <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
-  3760279130U,	// <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
-  2687127138U,	// <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
-  2248142847U,	// <5,5,0,5>: Cost 3 vrev <5,5,5,0>
-  3720593910U,	// <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
-  4182502710U,	// <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
-  1573110429U,	// <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
-  2646852342U,	// <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
-  2624291676U,	// <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
-  2646852502U,	// <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
-  2646852568U,	// <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
-  2715217591U,	// <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
-  2628936848U,	// <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
-  3698033907U,	// <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
-  2713964240U,	// <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
-  2628937107U,	// <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
-  3645497446U,	// <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
-  3760869099U,	// <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
-  2646853224U,	// <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
-  2698628862U,	// <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
-  3772370694U,	// <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
-  2713964303U,	// <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
-  2646853562U,	// <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
-  4038198272U,	// <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
-  2701946667U,	// <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
-  2646853782U,	// <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
-  3698034922U,	// <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
-  3702679919U,	// <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
-  2637564336U,	// <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
-  2646854146U,	// <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
-  2638891602U,	// <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
-  3702680247U,	// <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
-  3702680259U,	// <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
-  2646854430U,	// <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
-  2646854546U,	// <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
-  2642209767U,	// <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
-  3711306806U,	// <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
-  3645516369U,	// <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
-  1570458842U,	// <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
-  1573113142U,	// <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
-  2645527932U,	// <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
-  2713964486U,	// <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
-  1573113374U,	// <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
-  1509982310U,	// <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
-  2646855376U,	// <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
-  2583725672U,	// <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
-  2583726230U,	// <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
-  1509985590U,	// <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
-  229035318U,	// <5,5,5,5>: Cost 1 vdup1 RHS
-  2646855778U,	// <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
-  2646855848U,	// <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
-  229035318U,	// <5,5,5,u>: Cost 1 vdup1 RHS
-  2577760358U,	// <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
-  3633587361U,	// <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
-  2646856186U,	// <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
-  3633588738U,	// <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
-  2718535756U,	// <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
-  2644202223U,	// <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
-  2973780482U,	// <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
-  2646856526U,	// <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
-  2646856607U,	// <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
-  2571796582U,	// <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
-  3633595392U,	// <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
-  2571798222U,	// <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
-  2571799124U,	// <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
-  2571799862U,	// <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
-  3114717188U,	// <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
-  4034923010U,	// <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
-  2040974646U,	// <5,5,7,7>: Cost 2 vtrnr RHS, RHS
-  2040974647U,	// <5,5,7,u>: Cost 2 vtrnr RHS, RHS
-  1509982310U,	// <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
-  1573115694U,	// <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
-  2571806414U,	// <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
-  2571807317U,	// <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
-  1509985590U,	// <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
-  229035318U,	// <5,5,u,5>: Cost 1 vdup1 RHS
-  2646857936U,	// <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
-  2040982838U,	// <5,5,u,7>: Cost 2 vtrnr RHS, RHS
-  229035318U,	// <5,5,u,u>: Cost 1 vdup1 RHS
-  2638233600U,	// <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
-  1564491878U,	// <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
-  2632261796U,	// <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
-  2638233856U,	// <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
-  2638233938U,	// <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
-  3706003885U,	// <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
-  3706003967U,	// <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
-  4047473974U,	// <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
-  1564492445U,	// <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
-  2638234358U,	// <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
-  2638234420U,	// <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
-  2638234518U,	// <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
-  2638234584U,	// <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
-  2626290768U,	// <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
-  2638234768U,	// <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
-  3700032719U,	// <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
-  2982366518U,	// <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
-  2628945300U,	// <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
-  3706004925U,	// <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
-  3711976966U,	// <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
-  2638235240U,	// <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
-  2638235302U,	// <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
-  2632263465U,	// <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
-  2638235496U,	// <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
-  2638235578U,	// <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
-  2713965050U,	// <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
-  2634917997U,	// <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
-  2638235798U,	// <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
-  3711977695U,	// <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
-  3710650720U,	// <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
-  2638236060U,	// <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
-  1564494338U,	// <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
-  2638236234U,	// <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
-  3711978104U,	// <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
-  4034227510U,	// <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
-  1567148870U,	// <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
-  2577817702U,	// <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
-  3700034544U,	// <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
-  2723033713U,	// <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
-  2638236818U,	// <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
-  2644208859U,	// <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
-  1564495158U,	// <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
-  2645536125U,	// <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
-  2723402398U,	// <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
-  1564495401U,	// <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
-  2577825894U,	// <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
-  2662125264U,	// <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
-  3775836867U,	// <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
-  3711979343U,	// <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
-  2650181556U,	// <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
-  2662125572U,	// <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
-  2638237732U,	// <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
-  2982399286U,	// <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
-  2982399287U,	// <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
-  2583806054U,	// <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
-  3711979910U,	// <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
-  2662126074U,	// <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
-  2583808514U,	// <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
-  2583809334U,	// <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
-  2583810062U,	// <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
-  2638238520U,	// <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
-  2973781302U,	// <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
-  2973781303U,	// <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
-  430358630U,	// <5,6,7,0>: Cost 1 vext1 RHS, LHS
-  1504101110U,	// <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
-  1504101992U,	// <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1504102550U,	// <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  430361910U,	// <5,6,7,4>: Cost 1 vext1 RHS, RHS
-  1504104390U,	// <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
-  1504105272U,	// <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
-  1504106092U,	// <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
-  430364462U,	// <5,6,7,u>: Cost 1 vext1 RHS, LHS
-  430366822U,	// <5,6,u,0>: Cost 1 vext1 RHS, LHS
-  1564497710U,	// <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
-  1504110184U,	// <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1504110742U,	// <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  430370103U,	// <5,6,u,4>: Cost 1 vext1 RHS, RHS
-  1564498074U,	// <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
-  1504113146U,	// <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
-  1504113658U,	// <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
-  430372654U,	// <5,6,u,u>: Cost 1 vext1 RHS, LHS
-  2625634304U,	// <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
-  1551892582U,	// <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625634468U,	// <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
-  2571889247U,	// <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
-  2625634642U,	// <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
-  2595778728U,	// <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
-  3699376639U,	// <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
-  2260235715U,	// <5,7,0,7>: Cost 3 vrev <7,5,7,0>
-  1551893149U,	// <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625635062U,	// <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
-  2624308020U,	// <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
-  2625635222U,	// <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
-  1551893504U,	// <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
-  2571898166U,	// <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
-  2625635472U,	// <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
-  2627626227U,	// <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
-  3702031684U,	// <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
-  1555211669U,	// <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
-  2629617126U,	// <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
-  3699377670U,	// <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
-  2625635944U,	// <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
-  2625636006U,	// <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
-  2632271658U,	// <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
-  2625636201U,	// <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
-  2625636282U,	// <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
-  3708004381U,	// <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
-  2625636411U,	// <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
-  2625636502U,	// <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
-  2625636604U,	// <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
-  3699378478U,	// <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
-  2625636764U,	// <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
-  2625636866U,	// <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
-  2625636959U,	// <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
-  3699378808U,	// <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
-  2640235254U,	// <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
-  2625637150U,	// <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
-  2571919462U,	// <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
-  2571920384U,	// <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
-  3699379260U,	// <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
-  2571922019U,	// <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
-  2571922742U,	// <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
-  1551895862U,	// <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
-  2846277980U,	// <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
-  2646207951U,	// <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
-  1551896105U,	// <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
-  2583871590U,	// <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
-  2652180176U,	// <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
-  2625638177U,	// <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
-  2625638262U,	// <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
-  2583874870U,	// <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
-  2846281732U,	// <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
-  2651517015U,	// <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
-  1772539190U,	// <5,7,5,7>: Cost 2 vuzpr RHS, RHS
-  1772539191U,	// <5,7,5,u>: Cost 2 vuzpr RHS, RHS
-  2846281826U,	// <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
-  3699380615U,	// <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
-  2846281108U,	// <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
-  2589854210U,	// <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
-  2846281830U,	// <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
-  2725467658U,	// <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
-  2846281076U,	// <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
-  2846279610U,	// <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
-  2846279611U,	// <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
-  1510146150U,	// <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
-  2846282574U,	// <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
-  2583889512U,	// <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
-  2846281919U,	// <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
-  1510149430U,	// <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
-  1510150168U,	// <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
-  2583892474U,	// <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
-  2625640044U,	// <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
-  1510151982U,	// <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
-  1510154342U,	// <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
-  1551898414U,	// <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625640325U,	// <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
-  1772536477U,	// <5,7,u,3>: Cost 2 vuzpr RHS, LHS
-  1510157622U,	// <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
-  1551898778U,	// <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
-  2625640656U,	// <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
-  1772539433U,	// <5,7,u,7>: Cost 2 vuzpr RHS, RHS
-  1551898981U,	// <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625642496U,	// <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
-  1551900774U,	// <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
-  2625642660U,	// <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
-  2698630885U,	// <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
-  2687129325U,	// <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
-  2689783542U,	// <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
-  2266134675U,	// <5,u,0,6>: Cost 3 vrev <u,5,6,0>
-  2595853772U,	// <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
-  1551901341U,	// <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
-  2625643254U,	// <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
-  2625643316U,	// <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
-  1613387566U,	// <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  1551901697U,	// <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
-  2626307154U,	// <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
-  2689783622U,	// <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
-  2627634420U,	// <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
-  2982366536U,	// <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
-  1613387620U,	// <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
-  2846286742U,	// <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
-  2685796528U,	// <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
-  2625644136U,	// <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
-  2687129480U,	// <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
-  2632279851U,	// <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
-  2625644394U,	// <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
-  2625644474U,	// <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
-  2713966508U,	// <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
-  2625644603U,	// <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
-  2687129532U,	// <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
-  2636261649U,	// <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
-  2636925282U,	// <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
-  2625644956U,	// <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
-  1564510724U,	// <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
-  2625645160U,	// <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
-  2734610422U,	// <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
-  2640243447U,	// <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
-  1567165256U,	// <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
-  1567828889U,	// <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
-  1661163546U,	// <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
-  2734463012U,	// <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
-  2698631212U,	// <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
-  1570458842U,	// <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
-  1551904054U,	// <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
-  2846286172U,	// <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
-  2646216144U,	// <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
-  1551904297U,	// <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
-  1509982310U,	// <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
-  2560058555U,	// <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
-  2698926194U,	// <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
-  2698631295U,	// <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
-  1509985590U,	// <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
-  229035318U,	// <5,u,5,5>: Cost 1 vdup1 RHS
-  1613387930U,	// <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
-  1772547382U,	// <5,u,5,7>: Cost 2 vuzpr RHS, RHS
-  229035318U,	// <5,u,5,u>: Cost 1 vdup1 RHS
-  2566037606U,	// <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
-  2920044334U,	// <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
-  2566039445U,	// <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
-  2687129808U,	// <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
-  2566040886U,	// <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
-  2920044698U,	// <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
-  2846289268U,	// <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
-  2973781320U,	// <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
-  2687129853U,	// <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
-  430506086U,	// <5,u,7,0>: Cost 1 vext1 RHS, LHS
-  1486333117U,	// <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
-  1504249448U,	// <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  2040971933U,	// <5,u,7,3>: Cost 2 vtrnr RHS, LHS
-  430509384U,	// <5,u,7,4>: Cost 1 vext1 RHS, RHS
-  1504251600U,	// <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
-  118708378U,	// <5,u,7,6>: Cost 1 vrev RHS
-  2040974889U,	// <5,u,7,7>: Cost 2 vtrnr RHS, RHS
-  430511918U,	// <5,u,7,u>: Cost 1 vext1 RHS, LHS
-  430514278U,	// <5,u,u,0>: Cost 1 vext1 RHS, LHS
-  1551906606U,	// <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
-  1613388133U,	// <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  1772544669U,	// <5,u,u,3>: Cost 2 vuzpr RHS, LHS
-  430517577U,	// <5,u,u,4>: Cost 1 vext1 RHS, RHS
-  229035318U,	// <5,u,u,5>: Cost 1 vdup1 RHS
-  118716571U,	// <5,u,u,6>: Cost 1 vrev RHS
-  1772547625U,	// <5,u,u,7>: Cost 2 vuzpr RHS, RHS
-  430520110U,	// <5,u,u,u>: Cost 1 vext1 RHS, LHS
-  2686025728U,	// <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
-  2686025738U,	// <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
-  2686025748U,	// <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
-  3779084320U,	// <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
-  2642903388U,	// <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
-  3657723939U,	// <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
-  3926676514U,	// <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
-  3926675786U,	// <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
-  2686025802U,	// <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
-  2566070374U,	// <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
-  3759767642U,	// <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
-  1612284006U,	// <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  2583988738U,	// <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
-  2566073654U,	// <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
-  2583990308U,	// <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
-  2589963005U,	// <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
-  2595935702U,	// <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
-  1612284060U,	// <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
-  2686025892U,	// <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
-  2685804721U,	// <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
-  3759620282U,	// <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
-  2705342658U,	// <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
-  1612284108U,	// <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
-  3706029956U,	// <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
-  2686173406U,	// <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
-  3651769338U,	// <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
-  1612579056U,	// <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
-  3706030230U,	// <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
-  2705342720U,	// <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
-  2705342730U,	// <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
-  3706030492U,	// <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
-  2644896258U,	// <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
-  3718638154U,	// <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
-  3729918619U,	// <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
-  3926672384U,	// <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
-  2705342784U,	// <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
-  2687058250U,	// <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
-  2686026066U,	// <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
-  1613463900U,	// <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
-  3761021285U,	// <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
-  2687353198U,	// <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
-  2632289590U,	// <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
-  2645560704U,	// <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
-  2646224337U,	// <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
-  1613906322U,	// <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
-  3651788902U,	// <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
-  2687795620U,	// <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
-  3761611181U,	// <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
-  3723284326U,	// <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
-  2646224838U,	// <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
-  3718639630U,	// <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
-  2652196962U,	// <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
-  2852932918U,	// <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
-  2852932919U,	// <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
-  2852933730U,	// <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
-  2925985894U,	// <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
-  3060203622U,	// <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
-  3718640178U,	// <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
-  2656178832U,	// <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
-  3725939378U,	// <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
-  2657506098U,	// <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
-  2619020110U,	// <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
-  2925986461U,	// <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
-  2572091494U,	// <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
-  2572092310U,	// <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
-  2980495524U,	// <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
-  2572094072U,	// <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
-  2572094774U,	// <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
-  4054238242U,	// <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
-  3645837653U,	// <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
-  4054239054U,	// <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
-  2572097326U,	// <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
-  2686026378U,	// <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
-  2686026386U,	// <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
-  1612284573U,	// <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  2705343144U,	// <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
-  1616265906U,	// <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
-  2632292506U,	// <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
-  2590020356U,	// <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
-  2852933161U,	// <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
-  1612284627U,	// <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
-  2595995750U,	// <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
-  2646229094U,	// <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
-  3694092492U,	// <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
-  2686026486U,	// <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
-  2595999030U,	// <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
-  3767730952U,	// <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
-  2596000590U,	// <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
-  2596001246U,	// <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
-  2686026531U,	// <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
-  3763602219U,	// <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
-  2686026548U,	// <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
-  3764929346U,	// <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
-  2686026568U,	// <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
-  2691334996U,	// <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
-  3760874332U,	// <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
-  3765224294U,	// <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
-  3669751263U,	// <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
-  2686026613U,	// <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
-  2554208358U,	// <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
-  3763602311U,	// <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
-  3639895971U,	// <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
-  2686026646U,	// <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
-  2554211638U,	// <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
-  3760874411U,	// <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
-  2554212858U,	// <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
-  3802973114U,	// <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
-  2686026691U,	// <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
-  2566160486U,	// <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
-  2686026712U,	// <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
-  2686026724U,	// <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
-  3759768552U,	// <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
-  2692662262U,	// <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
-  2686026752U,	// <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
-  2590053128U,	// <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
-  3663795194U,	// <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
-  2686026775U,	// <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
-  2641587099U,	// <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
-  2693104684U,	// <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
-  3639912357U,	// <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
-  2687206462U,	// <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
-  3633941814U,	// <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
-  2693399632U,	// <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
-  3765077075U,	// <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
-  2646232530U,	// <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
-  2687206507U,	// <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
-  2647559796U,	// <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
-  3765077118U,	// <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
-  3767583878U,	// <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
-  2686026896U,	// <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
-  2693989528U,	// <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
-  3767805089U,	// <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
-  2652868706U,	// <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
-  3908250934U,	// <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
-  2686026941U,	// <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
-  2554241126U,	// <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
-  3763602639U,	// <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
-  3759547607U,	// <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
-  3115221094U,	// <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
-  2554244406U,	// <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
-  3760874739U,	// <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
-  2554245944U,	// <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
-  3719975758U,	// <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
-  3115221099U,	// <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
-  2560221286U,	// <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
-  2560222415U,	// <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
-  2980497558U,	// <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
-  3103211622U,	// <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
-  2560224566U,	// <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
-  2980495698U,	// <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
-  3633967526U,	// <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
-  4054237686U,	// <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
-  2560227118U,	// <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
-  2560229478U,	// <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
-  2686027117U,	// <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
-  2686027129U,	// <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
-  2686027132U,	// <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
-  2687206795U,	// <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
-  2686027157U,	// <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
-  2590094093U,	// <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
-  2596066790U,	// <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
-  2686027177U,	// <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
-  2646900736U,	// <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
-  1573159014U,	// <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
-  2646900900U,	// <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
-  3759769037U,	// <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
-  2641592668U,	// <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
-  3779085794U,	// <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
-  2686027244U,	// <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
-  3669816807U,	// <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
-  1573159581U,	// <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
-  2230527897U,	// <6,2,1,0>: Cost 3 vrev <2,6,0,1>
-  2646901556U,	// <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
-  2646901654U,	// <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
-  2847047782U,	// <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
-  3771049517U,	// <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
-  2646901904U,	// <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
-  2686027324U,	// <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
-  3669825000U,	// <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
-  2231117793U,	// <6,2,1,u>: Cost 3 vrev <2,6,u,1>
-  3763603029U,	// <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
-  3759769184U,	// <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
-  2686027368U,	// <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
-  2686027378U,	// <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
-  2697971326U,	// <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
-  3759769224U,	// <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
-  2698118800U,	// <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
-  3920794092U,	// <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
-  2686027423U,	// <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
-  2686027430U,	// <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
-  3759769262U,	// <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
-  2698487485U,	// <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
-  2705344196U,	// <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
-  2686027470U,	// <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
-  2698708696U,	// <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
-  2724660961U,	// <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
-  2729232104U,	// <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
-  2686027502U,	// <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
-  1567853468U,	// <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
-  3759769351U,	// <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
-  2699151118U,	// <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
-  2686027543U,	// <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
-  2699298592U,	// <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
-  1573162294U,	// <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
-  2686027564U,	// <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
-  3719982547U,	// <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
-  1573162532U,	// <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
-  3779086154U,	// <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
-  2646904528U,	// <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
-  3759769440U,	// <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
-  2699888488U,	// <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
-  2230855617U,	// <6,2,5,4>: Cost 3 vrev <2,6,4,5>
-  2646904836U,	// <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
-  2646904930U,	// <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
-  2847051062U,	// <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
-  2700257173U,	// <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
-  2687207321U,	// <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
-  2686027684U,	// <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
-  2566260656U,	// <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
-  2685806522U,	// <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
-  2687207361U,	// <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
-  2686027724U,	// <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
-  2646905656U,	// <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
-  2646905678U,	// <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
-  2686027751U,	// <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
-  2554323046U,	// <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
-  2572239606U,	// <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
-  2566268849U,	// <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
-  1906753638U,	// <6,2,7,3>: Cost 2 vzipr RHS, LHS
-  2554326326U,	// <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
-  3304687564U,	// <6,2,7,5>: Cost 4 vrev <2,6,5,7>
-  2980495708U,	// <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
-  2646906476U,	// <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
-  1906753643U,	// <6,2,7,u>: Cost 2 vzipr RHS, LHS
-  1591744256U,	// <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
-  1573164846U,	// <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
-  2701805650U,	// <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
-  1906761830U,	// <6,2,u,3>: Cost 2 vzipr RHS, LHS
-  2686027875U,	// <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
-  1573165210U,	// <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
-  2686322800U,	// <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
-  2847051305U,	// <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
-  1906761835U,	// <6,2,u,u>: Cost 2 vzipr RHS, LHS
-  3759769739U,	// <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
-  2686027926U,	// <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
-  2686027937U,	// <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
-  3640027286U,	// <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
-  2687207601U,	// <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
-  2705344698U,	// <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
-  3663917847U,	// <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
-  2237008560U,	// <6,3,0,7>: Cost 3 vrev <3,6,7,0>
-  2686027989U,	// <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
-  3759769823U,	// <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
-  3759769830U,	// <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
-  3759769841U,	// <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
-  3759769848U,	// <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
-  2703280390U,	// <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
-  3759769868U,	// <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
-  3704063194U,	// <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
-  3767732510U,	// <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
-  2703280390U,	// <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
-  3704063468U,	// <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
-  2630321724U,	// <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
-  3759769921U,	// <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
-  3759769928U,	// <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
-  3704063767U,	// <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
-  3704063876U,	// <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
-  2636957626U,	// <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
-  3777907058U,	// <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
-  2630321724U,	// <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
-  3759769983U,	// <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
-  3710036245U,	// <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
-  2636958054U,	// <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
-  2686028188U,	// <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
-  2704607656U,	// <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
-  3773041072U,	// <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
-  3711363731U,	// <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
-  3767732676U,	// <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
-  2707999179U,	// <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
-  2584232038U,	// <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
-  2642267118U,	// <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
-  2642930751U,	// <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
-  2705197552U,	// <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
-  2584235318U,	// <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
-  1631603202U,	// <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
-  2654211444U,	// <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
-  2237041332U,	// <6,3,4,7>: Cost 3 vrev <3,6,7,4>
-  1631824413U,	// <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
-  3640066150U,	// <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
-  3772746288U,	// <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
-  3640067790U,	// <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
-  3773041216U,	// <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
-  2705934922U,	// <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
-  3773041236U,	// <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
-  3779086940U,	// <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
-  3767732831U,	// <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
-  2706229870U,	// <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
-  2602164326U,	// <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
-  2654212512U,	// <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
-  2566334393U,	// <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
-  3704066588U,	// <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
-  2602167524U,	// <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
-  3710702321U,	// <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
-  2724661933U,	// <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
-  3710702465U,	// <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
-  2602170158U,	// <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
-  1492598886U,	// <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
-  2560369889U,	// <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
-  1492600762U,	// <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
-  2566342806U,	// <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
-  1492602166U,	// <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
-  2602176208U,	// <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
-  2566345210U,	// <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
-  2980496528U,	// <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
-  1492604718U,	// <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
-  1492607078U,	// <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
-  2686028574U,	// <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
-  1492608955U,	// <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
-  2566350998U,	// <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
-  1492610358U,	// <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
-  1634257734U,	// <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
-  2566353489U,	// <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
-  2980504720U,	// <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
-  1492612910U,	// <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
-  3703406592U,	// <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
-  2629664870U,	// <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
-  2629664972U,	// <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
-  3779087232U,	// <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
-  2642936156U,	// <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
-  2712570770U,	// <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
-  2687208348U,	// <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
-  3316723081U,	// <6,4,0,7>: Cost 4 vrev <4,6,7,0>
-  2629665437U,	// <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
-  2242473291U,	// <6,4,1,0>: Cost 3 vrev <4,6,0,1>
-  3700089652U,	// <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
-  3703407510U,	// <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
-  2852962406U,	// <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
-  3628166454U,	// <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
-  3760876514U,	// <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
-  2687208430U,	// <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
-  3316731274U,	// <6,4,1,7>: Cost 4 vrev <4,6,7,1>
-  2243063187U,	// <6,4,1,u>: Cost 3 vrev <4,6,u,1>
-  2629666284U,	// <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
-  3703408188U,	// <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
-  3703408232U,	// <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
-  3703408294U,	// <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
-  2632320816U,	// <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
-  2923384118U,	// <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
-  2687208508U,	// <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
-  3760950341U,	// <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
-  2634975348U,	// <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
-  3703408790U,	// <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
-  3316305238U,	// <6,4,3,1>: Cost 4 vrev <4,6,1,3>
-  3703408947U,	// <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
-  3703409052U,	// <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
-  2644929026U,	// <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
-  3718670922U,	// <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
-  2705345682U,	// <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
-  3926705152U,	// <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
-  2668817222U,	// <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
-  2590277734U,	// <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
-  3716017135U,	// <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
-  2642938944U,	// <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
-  3717344401U,	// <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
-  2712571088U,	// <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
-  2629668150U,	// <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
-  1637649636U,	// <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
-  2646257109U,	// <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
-  1637649636U,	// <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
-  2566398054U,	// <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
-  3760876805U,	// <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
-  2566399937U,	// <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
-  2584316418U,	// <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
-  2566401334U,	// <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
-  2584318028U,	// <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
-  1612287286U,	// <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
-  2852965686U,	// <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
-  1612287304U,	// <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
-  1504608358U,	// <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
-  2578350838U,	// <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
-  2578351720U,	// <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
-  2578352278U,	// <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
-  1504611638U,	// <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
-  2578353872U,	// <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
-  2578354682U,	// <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
-  2578355194U,	// <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
-  1504614190U,	// <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
-  2572386406U,	// <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
-  2572387226U,	// <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
-  3640157902U,	// <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
-  2572389020U,	// <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
-  2572389686U,	// <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
-  2980497102U,	// <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
-  2980495564U,	// <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
-  4054239090U,	// <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
-  2572392238U,	// <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
-  1504608358U,	// <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
-  2629670702U,	// <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
-  2566424516U,	// <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
-  2584340994U,	// <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
-  1640156694U,	// <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
-  2629671066U,	// <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
-  1612287529U,	// <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
-  2852965929U,	// <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
-  1612287547U,	// <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
-  3708723200U,	// <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
-  2634981478U,	// <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
-  3694125260U,	// <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
-  3779087962U,	// <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
-  3760877154U,	// <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
-  4195110916U,	// <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
-  3696779775U,	// <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
-  1175212130U,	// <6,5,0,7>: Cost 2 vrev <5,6,7,0>
-  1175285867U,	// <6,5,0,u>: Cost 2 vrev <5,6,u,0>
-  2248445988U,	// <6,5,1,0>: Cost 3 vrev <5,6,0,1>
-  3698107237U,	// <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
-  3708724118U,	// <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
-  3908575334U,	// <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
-  3716023376U,	// <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
-  3708724368U,	// <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
-  3767733960U,	// <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
-  2712571600U,	// <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
-  2712571609U,	// <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
-  2578391142U,	// <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
-  3704079934U,	// <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
-  3708724840U,	// <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
-  3705407182U,	// <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
-  2578394422U,	// <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
-  3717351272U,	// <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
-  2634983354U,	// <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
-  3115486518U,	// <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
-  2634983541U,	// <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
-  3708725398U,	// <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
-  3710052631U,	// <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
-  3708725606U,	// <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
-  3708725660U,	// <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
-  2643610114U,	// <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
-  3717352010U,	// <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
-  3773632358U,	// <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
-  2248978533U,	// <6,5,3,7>: Cost 3 vrev <5,6,7,3>
-  2249052270U,	// <6,5,3,u>: Cost 3 vrev <5,6,u,3>
-  2596323430U,	// <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
-  3716025328U,	// <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
-  3716688961U,	// <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
-  2643610770U,	// <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
-  2596326710U,	// <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
-  2634984758U,	// <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
-  3767734199U,	// <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
-  1643696070U,	// <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
-  1643769807U,	// <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
-  2578415718U,	// <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
-  3652158198U,	// <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
-  3652159080U,	// <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
-  3652159638U,	// <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
-  2578418998U,	// <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
-  2712571908U,	// <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
-  2718027790U,	// <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
-  2712571928U,	// <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
-  2712571937U,	// <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
-  2705346596U,	// <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
-  3767144496U,	// <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
-  3773116473U,	// <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
-  2705346626U,	// <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
-  2705346636U,	// <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
-  3908577217U,	// <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
-  2578428728U,	// <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
-  2712572002U,	// <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
-  2705346668U,	// <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
-  2560516198U,	// <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
-  2560517363U,	// <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
-  2566490060U,	// <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
-  3634260118U,	// <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
-  2560519478U,	// <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
-  2980498650U,	// <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
-  2980497922U,	// <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
-  3103214902U,	// <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
-  2560522030U,	// <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
-  2560524390U,	// <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
-  2560525556U,	// <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
-  2566498253U,	// <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
-  2646931439U,	// <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
-  2560527670U,	// <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
-  2634987674U,	// <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
-  2980506114U,	// <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
-  1175277674U,	// <6,5,u,7>: Cost 2 vrev <5,6,7,u>
-  1175351411U,	// <6,5,u,u>: Cost 2 vrev <5,6,u,u>
-  2578448486U,	// <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
-  1573191782U,	// <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
-  2686030124U,	// <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
-  3779088690U,	// <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
-  2687209788U,	// <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
-  3652194000U,	// <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
-  2254852914U,	// <6,6,0,6>: Cost 3 vrev <6,6,6,0>
-  4041575734U,	// <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
-  1573192349U,	// <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
-  2646934262U,	// <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
-  2646934324U,	// <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
-  2646934422U,	// <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
-  2846785638U,	// <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
-  3760951694U,	// <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
-  2646934672U,	// <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
-  2712572320U,	// <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
-  3775549865U,	// <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
-  2846785643U,	// <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
-  3759772094U,	// <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
-  3704751676U,	// <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
-  2631009936U,	// <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
-  2646935206U,	// <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
-  3759772127U,	// <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
-  3704752004U,	// <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
-  2646935482U,	// <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
-  2712572410U,	// <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
-  2712572419U,	// <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
-  2646935702U,	// <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
-  3777024534U,	// <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
-  3704752453U,	// <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
-  2646935964U,	// <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
-  2705347122U,	// <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
-  3779678778U,	// <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
-  2657553069U,	// <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
-  4039609654U,	// <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
-  2708001366U,	// <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
-  2578481254U,	// <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
-  3652223734U,	// <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
-  3760951922U,	// <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
-  3779089019U,	// <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
-  1570540772U,	// <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
-  1573195062U,	// <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
-  2712572560U,	// <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
-  2723410591U,	// <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
-  1573195304U,	// <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
-  3640287334U,	// <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
-  2646937296U,	// <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
-  3640289235U,	// <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
-  3720679279U,	// <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
-  2646937542U,	// <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
-  2646937604U,	// <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
-  2646937698U,	// <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
-  2846788918U,	// <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
-  2846788919U,	// <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
-  1516699750U,	// <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
-  2590442230U,	// <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
-  2646938106U,	// <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
-  2590443670U,	// <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
-  1516703030U,	// <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
-  2590445264U,	// <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
-  296144182U,	// <6,6,6,6>: Cost 1 vdup2 RHS
-  2712572738U,	// <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
-  296144182U,	// <6,6,6,u>: Cost 1 vdup2 RHS
-  2566561894U,	// <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
-  3634332924U,	// <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
-  2566563797U,	// <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
-  2584480258U,	// <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
-  2566565174U,	// <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
-  2717438846U,	// <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
-  2980500280U,	// <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
-  1906756918U,	// <6,6,7,7>: Cost 2 vzipr RHS, RHS
-  1906756919U,	// <6,6,7,u>: Cost 2 vzipr RHS, RHS
-  1516699750U,	// <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
-  1573197614U,	// <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
-  2566571990U,	// <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
-  2846786205U,	// <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
-  1516703030U,	// <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
-  1573197978U,	// <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
-  296144182U,	// <6,6,u,6>: Cost 1 vdup2 RHS
-  1906765110U,	// <6,6,u,7>: Cost 2 vzipr RHS, RHS
-  296144182U,	// <6,6,u,u>: Cost 1 vdup2 RHS
-  1571209216U,	// <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
-  497467494U,	// <6,7,0,1>: Cost 1 vext2 RHS, LHS
-  1571209380U,	// <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
-  2644951292U,	// <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
-  1571209554U,	// <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
-  1510756450U,	// <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
-  2644951542U,	// <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
-  2584499194U,	// <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
-  497468061U,	// <6,7,0,u>: Cost 1 vext2 RHS, LHS
-  1571209974U,	// <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
-  1571210036U,	// <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
-  1571210134U,	// <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
-  1571210200U,	// <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
-  2644952098U,	// <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
-  1571210384U,	// <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
-  2644952271U,	// <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
-  2578535418U,	// <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
-  1571210605U,	// <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
-  2644952509U,	// <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
-  2644952582U,	// <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
-  1571210856U,	// <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
-  1571210918U,	// <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
-  2644952828U,	// <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
-  2633009028U,	// <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
-  1571211194U,	// <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
-  2668840938U,	// <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
-  1571211323U,	// <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
-  1571211414U,	// <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
-  2644953311U,	// <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
-  2644953390U,	// <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
-  1571211676U,	// <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
-  1571211778U,	// <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
-  2644953648U,	// <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
-  2644953720U,	// <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
-  2644953795U,	// <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
-  1571212062U,	// <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
-  1573202834U,	// <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
-  2644954058U,	// <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
-  2644954166U,	// <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
-  2644954258U,	// <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
-  1571212496U,	// <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
-  497470774U,	// <6,7,4,5>: Cost 1 vext2 RHS, RHS
-  1573203316U,	// <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  2646281688U,	// <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
-  497471017U,	// <6,7,4,u>: Cost 1 vext2 RHS, RHS
-  2644954696U,	// <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
-  1573203664U,	// <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  2644954878U,	// <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
-  2644954991U,	// <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
-  1571213254U,	// <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
-  1571213316U,	// <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
-  1571213410U,	// <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
-  1573204136U,	// <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
-  1573204217U,	// <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
-  2644955425U,	// <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
-  2644955561U,	// <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
-  1573204474U,	// <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  2644955698U,	// <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
-  2644955789U,	// <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
-  2644955889U,	// <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
-  1571214136U,	// <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
-  1571214158U,	// <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
-  1573204895U,	// <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
-  1573204986U,	// <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
-  2572608656U,	// <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
-  2644956362U,	// <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
-  2572610231U,	// <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
-  1573205350U,	// <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
-  2646947220U,	// <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
-  1516786498U,	// <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
-  1571214956U,	// <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
-  1573205634U,	// <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
-  1571215059U,	// <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
-  497473326U,	// <6,7,u,1>: Cost 1 vext2 RHS, LHS
-  1571215237U,	// <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
-  1571215292U,	// <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
-  1571215423U,	// <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
-  497473690U,	// <6,7,u,5>: Cost 1 vext2 RHS, RHS
-  1571215568U,	// <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
-  1573206272U,	// <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
-  497473893U,	// <6,7,u,u>: Cost 1 vext2 RHS, LHS
-  1571217408U,	// <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
-  497475686U,	// <6,u,0,1>: Cost 1 vext2 RHS, LHS
-  1571217572U,	// <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
-  2689865445U,	// <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
-  1571217746U,	// <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
-  1510830187U,	// <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
-  2644959734U,	// <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
-  1193130221U,	// <6,u,0,7>: Cost 2 vrev <u,6,7,0>
-  497476253U,	// <6,u,0,u>: Cost 1 vext2 RHS, LHS
-  1571218166U,	// <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
-  1571218228U,	// <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
-  1612289838U,	// <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  1571218392U,	// <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
-  2566663478U,	// <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
-  1571218576U,	// <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
-  2644960463U,	// <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
-  2717439835U,	// <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
-  1612289892U,	// <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
-  1504870502U,	// <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
-  2644960774U,	// <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
-  1571219048U,	// <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
-  1571219110U,	// <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
-  1504873782U,	// <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
-  2633017221U,	// <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
-  1571219386U,	// <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
-  2712573868U,	// <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
-  1571219515U,	// <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
-  1571219606U,	// <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
-  2644961503U,	// <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
-  2566678499U,	// <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
-  1571219868U,	// <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
-  1571219970U,	// <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
-  2689865711U,	// <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
-  2708002806U,	// <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
-  2644961987U,	// <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
-  1571220254U,	// <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
-  1571220370U,	// <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
-  2644962250U,	// <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
-  1661245476U,	// <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
-  2686031917U,	// <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
-  1571220688U,	// <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
-  497478967U,	// <6,u,4,5>: Cost 1 vext2 RHS, RHS
-  1571220852U,	// <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  1661614161U,	// <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
-  497479209U,	// <6,u,4,u>: Cost 1 vext2 RHS, RHS
-  2566692966U,	// <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
-  1571221200U,	// <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  2566694885U,	// <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
-  2689865855U,	// <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
-  1571221446U,	// <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
-  1571221508U,	// <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
-  1612290202U,	// <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
-  1571221672U,	// <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
-  1612290220U,	// <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
-  1504903270U,	// <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
-  2644963752U,	// <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
-  1571222010U,	// <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  2686032080U,	// <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
-  1504906550U,	// <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
-  2644964079U,	// <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
-  296144182U,	// <6,u,6,6>: Cost 1 vdup2 RHS
-  1571222350U,	// <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
-  296144182U,	// <6,u,6,u>: Cost 1 vdup2 RHS
-  1492967526U,	// <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
-  2560738574U,	// <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
-  1492969447U,	// <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
-  1906753692U,	// <6,u,7,3>: Cost 2 vzipr RHS, LHS
-  1492970806U,	// <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
-  2980495761U,	// <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
-  1516860235U,	// <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
-  1906756936U,	// <6,u,7,7>: Cost 2 vzipr RHS, RHS
-  1492973358U,	// <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
-  1492975718U,	// <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
-  497481518U,	// <6,u,u,1>: Cost 1 vext2 RHS, LHS
-  1612290405U,	// <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  1571223484U,	// <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
-  1492978998U,	// <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
-  497481882U,	// <6,u,u,5>: Cost 1 vext2 RHS, RHS
-  296144182U,	// <6,u,u,6>: Cost 1 vdup2 RHS
-  1906765128U,	// <6,u,u,7>: Cost 2 vzipr RHS, RHS
-  497482085U,	// <6,u,u,u>: Cost 1 vext2 RHS, LHS
-  1638318080U,	// <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
-  1638318090U,	// <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
-  1638318100U,	// <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
-  3646442178U,	// <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
-  2712059941U,	// <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
-  2651603364U,	// <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
-  2590618445U,	// <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
-  3785801798U,	// <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
-  1638318153U,	// <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
-  1516879974U,	// <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
-  2693922911U,	// <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
-  564576358U,	// <7,0,1,2>: Cost 1 vext3 RHS, LHS
-  2638996480U,	// <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
-  1516883254U,	// <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
-  2649613456U,	// <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
-  1516884814U,	// <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
-  2590626808U,	// <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
-  564576412U,	// <7,0,1,u>: Cost 1 vext3 RHS, LHS
-  1638318244U,	// <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
-  2692743344U,	// <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
-  2712060084U,	// <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
-  2712060094U,	// <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
-  1638318284U,	// <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
-  2712060118U,	// <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
-  2651604922U,	// <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
-  2686255336U,	// <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
-  1638318316U,	// <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
-  2651605142U,	// <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
-  2712060156U,	// <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
-  2712060165U,	// <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
-  2651605404U,	// <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
-  2651605506U,	// <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
-  2638998111U,	// <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
-  2639661744U,	// <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
-  3712740068U,	// <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
-  2640989010U,	// <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
-  2712060232U,	// <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
-  1638318418U,	// <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
-  1638318428U,	// <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
-  3646474950U,	// <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
-  2712060270U,	// <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
-  1577864502U,	// <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
-  2651606388U,	// <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
-  3787792776U,	// <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
-  1638318481U,	// <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
-  2590654566U,	// <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
-  2651606736U,	// <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
-  2712060334U,	// <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
-  2649616239U,	// <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
-  2651606982U,	// <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
-  2651607044U,	// <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
-  1577865314U,	// <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
-  2651607208U,	// <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
-  1579192580U,	// <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
-  2688393709U,	// <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
-  2712060406U,	// <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
-  2688541183U,	// <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
-  2655588936U,	// <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
-  3762430481U,	// <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
-  2651607730U,	// <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
-  2651607864U,	// <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
-  2651607886U,	// <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
-  2688983605U,	// <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
-  2651608058U,	// <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
-  2932703334U,	// <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
-  3066921062U,	// <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
-  3712742678U,	// <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
-  2651608422U,	// <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
-  2651608513U,	// <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
-  2663552532U,	// <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
-  2651608684U,	// <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
-  2651608706U,	// <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
-  1638318730U,	// <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
-  1638318738U,	// <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
-  564576925U,	// <7,0,u,2>: Cost 1 vext3 RHS, LHS
-  2572765898U,	// <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
-  1638318770U,	// <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
-  1577867418U,	// <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
-  1516942165U,	// <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
-  2651609344U,	// <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
-  564576979U,	// <7,0,u,u>: Cost 1 vext3 RHS, LHS
-  2590687334U,	// <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
-  2639003750U,	// <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
-  2793357414U,	// <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
-  1638318838U,	// <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
-  2590690614U,	// <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
-  2712060679U,	// <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
-  2590692182U,	// <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
-  3785802521U,	// <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
-  1638318883U,	// <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
-  2712060715U,	// <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
-  1638318900U,	// <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
-  3774300994U,	// <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
-  1638318920U,	// <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
-  2712060755U,	// <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
-  2691416926U,	// <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
-  2590700375U,	// <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
-  3765158766U,	// <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
-  1638318965U,	// <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
-  2712060796U,	// <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
-  2712060807U,	// <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
-  3712747112U,	// <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
-  1638318998U,	// <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
-  2712060836U,	// <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
-  2712060843U,	// <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
-  2590708568U,	// <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
-  2735948730U,	// <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
-  1638319043U,	// <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
-  2712060876U,	// <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
-  1638319064U,	// <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
-  2712060894U,	// <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
-  2692596718U,	// <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
-  2712060917U,	// <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
-  1619002368U,	// <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
-  2692817929U,	// <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
-  2735948814U,	// <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
-  1619223579U,	// <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
-  2712060962U,	// <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
-  2712060971U,	// <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
-  2712060980U,	// <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
-  2712060989U,	// <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
-  3785802822U,	// <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
-  2639007030U,	// <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
-  2645642634U,	// <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
-  3719384520U,	// <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
-  2639007273U,	// <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
-  2572812390U,	// <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
-  2693776510U,	// <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
-  3774301318U,	// <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
-  1620182160U,	// <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
-  2572815670U,	// <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
-  3766486178U,	// <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
-  2651615331U,	// <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
-  2652278964U,	// <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
-  1620550845U,	// <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
-  3768108230U,	// <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
-  2694440143U,	// <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
-  2712061144U,	// <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
-  2694587617U,	// <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
-  3768403178U,	// <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
-  2694735091U,	// <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
-  3768550652U,	// <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
-  2652279630U,	// <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
-  2694956302U,	// <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
-  2645644282U,	// <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
-  2859062094U,	// <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
-  3779462437U,	// <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
-  3121938534U,	// <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
-  2554916150U,	// <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
-  3769140548U,	// <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
-  3726022164U,	// <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
-  2554918508U,	// <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
-  3121938539U,	// <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
-  2572836966U,	// <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
-  1638319469U,	// <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
-  2712061299U,	// <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
-  1622173059U,	// <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
-  2572840246U,	// <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
-  1622320533U,	// <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
-  2696136094U,	// <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
-  2859060777U,	// <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
-  1622541744U,	// <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
-  2712061364U,	// <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
-  2712061373U,	// <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
-  2712061380U,	// <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
-  2712061389U,	// <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
-  2712061404U,	// <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
-  2696725990U,	// <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
-  2712061417U,	// <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
-  3785803251U,	// <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
-  2696947201U,	// <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
-  2712061446U,	// <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
-  3785803276U,	// <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
-  3785803285U,	// <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
-  2712061471U,	// <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
-  2712061482U,	// <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
-  3766486576U,	// <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
-  2712061500U,	// <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
-  2602718850U,	// <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
-  2712061516U,	// <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
-  2712061525U,	// <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
-  2712061536U,	// <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
-  1638319720U,	// <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
-  1638319730U,	// <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
-  2712061565U,	// <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
-  2698053256U,	// <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
-  2712061584U,	// <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
-  3771795096U,	// <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
-  1638319775U,	// <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
-  1638319782U,	// <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
-  2693924531U,	// <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
-  2700560061U,	// <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
-  2693924551U,	// <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
-  1638319822U,	// <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
-  2698716889U,	// <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
-  2712061665U,	// <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
-  2735949540U,	// <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
-  1638319854U,	// <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
-  2712061692U,	// <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
-  2712061698U,	// <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
-  2712061708U,	// <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
-  2712061718U,	// <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
-  2712061728U,	// <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
-  2699380522U,	// <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
-  2712061740U,	// <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
-  3809691445U,	// <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
-  2699601733U,	// <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
-  2699675470U,	// <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
-  3766486867U,	// <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
-  2699822944U,	// <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
-  2692745065U,	// <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
-  2699970418U,	// <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
-  3766486907U,	// <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
-  2700117892U,	// <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
-  3771795334U,	// <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
-  2692745110U,	// <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
-  2572894310U,	// <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
-  2712061860U,	// <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
-  2700486577U,	// <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
-  1626818490U,	// <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
-  2572897590U,	// <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
-  2700707788U,	// <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
-  2700781525U,	// <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
-  3774597086U,	// <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
-  1627187175U,	// <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
-  2735949802U,	// <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
-  3780200434U,	// <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
-  3773564928U,	// <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
-  2986541158U,	// <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
-  2554989878U,	// <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
-  3775113245U,	// <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
-  4060283228U,	// <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
-  2554992236U,	// <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
-  2986541163U,	// <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
-  1638320187U,	// <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
-  2693924936U,	// <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
-  1638319720U,	// <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
-  1628145756U,	// <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
-  1638320227U,	// <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
-  2702035054U,	// <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
-  2702108791U,	// <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
-  2735949945U,	// <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
-  1628514441U,	// <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
-  2712062091U,	// <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
-  1638320278U,	// <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
-  2712062109U,	// <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
-  2590836886U,	// <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
-  2712062128U,	// <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
-  2712062138U,	// <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
-  2590839656U,	// <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
-  3311414017U,	// <7,3,0,7>: Cost 4 vrev <3,7,7,0>
-  1638320341U,	// <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
-  2237164227U,	// <7,3,1,0>: Cost 3 vrev <3,7,0,1>
-  2712062182U,	// <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
-  2712062193U,	// <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
-  2692745468U,	// <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
-  2712062214U,	// <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
-  2693925132U,	// <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
-  3768183059U,	// <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
-  2692745504U,	// <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
-  2696063273U,	// <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
-  2712062254U,	// <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
-  2712062262U,	// <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
-  2712062273U,	// <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
-  2712062280U,	// <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
-  2712062294U,	// <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
-  2712062302U,	// <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
-  2700560742U,	// <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
-  2712062319U,	// <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
-  2712062325U,	// <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
-  2712062335U,	// <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
-  2636368158U,	// <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
-  2637031791U,	// <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
-  1638320540U,	// <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
-  2712062374U,	// <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
-  2704689586U,	// <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
-  2590864235U,	// <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
-  2704837060U,	// <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
-  1638320540U,	// <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
-  2712062416U,	// <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
-  2712062426U,	// <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
-  2566981640U,	// <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
-  2712062447U,	// <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
-  2712062456U,	// <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
-  1638320642U,	// <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
-  2648313204U,	// <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
-  3311446789U,	// <7,3,4,7>: Cost 4 vrev <3,7,7,4>
-  1638320669U,	// <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
-  2602819686U,	// <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
-  1574571728U,	// <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
-  2648977185U,	// <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
-  2705869378U,	// <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
-  2237491947U,	// <7,3,5,4>: Cost 3 vrev <3,7,4,5>
-  2706016852U,	// <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
-  2648313954U,	// <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
-  2692745823U,	// <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
-  1579217159U,	// <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
-  2706311800U,	// <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
-  2654286249U,	// <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
-  1581208058U,	// <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
-  2706533011U,	// <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
-  2706606748U,	// <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
-  3780422309U,	// <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
-  2712062637U,	// <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
-  2706827959U,	// <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
-  1585189856U,	// <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
-  2693925571U,	// <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
-  2693925584U,	// <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
-  2700561114U,	// <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
-  2572978916U,	// <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
-  2693925611U,	// <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
-  2707344118U,	// <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
-  2654950894U,	// <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
-  2648315500U,	// <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
-  2693925643U,	// <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
-  2237221578U,	// <7,3,u,0>: Cost 3 vrev <3,7,0,u>
-  1638320926U,	// <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
-  1593153452U,	// <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
-  1638320540U,	// <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
-  2237516526U,	// <7,3,u,4>: Cost 3 vrev <3,7,4,u>
-  1638320966U,	// <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
-  2712062796U,	// <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
-  2692967250U,	// <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
-  1638320989U,	// <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
-  2651635712U,	// <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
-  1577893990U,	// <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
-  2651635876U,	// <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
-  3785804672U,	// <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
-  2651636050U,	// <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
-  1638468498U,	// <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
-  1638468508U,	// <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
-  3787795364U,	// <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
-  1640459181U,	// <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
-  2651636470U,	// <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
-  2651636532U,	// <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
-  2712062922U,	// <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
-  2639029248U,	// <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
-  2712062940U,	// <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
-  2712062946U,	// <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
-  2712062958U,	// <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
-  3785804791U,	// <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
-  2712062973U,	// <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
-  3785804807U,	// <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
-  3785804818U,	// <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
-  2651637352U,	// <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
-  2651637414U,	// <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
-  3716753194U,	// <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
-  2712063030U,	// <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
-  2712063036U,	// <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
-  3773123658U,	// <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
-  2712063054U,	// <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
-  2651637910U,	// <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
-  3712772348U,	// <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
-  3785804906U,	// <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
-  2651638172U,	// <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
-  2651638274U,	// <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
-  2639030883U,	// <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
-  2712063122U,	// <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
-  3712772836U,	// <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
-  2641021782U,	// <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
-  2714053802U,	// <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
-  3785804978U,	// <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
-  3716754505U,	// <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
-  3785804998U,	// <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
-  1638321360U,	// <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
-  1638468826U,	// <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
-  1638468836U,	// <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
-  3785215214U,	// <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
-  1640459509U,	// <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
-  1517207654U,	// <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
-  2573034640U,	// <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
-  2712063246U,	// <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
-  2573036267U,	// <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
-  1517210934U,	// <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
-  2711989549U,	// <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
-  564579638U,	// <7,4,5,6>: Cost 1 vext3 RHS, RHS
-  2651639976U,	// <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
-  564579656U,	// <7,4,5,u>: Cost 1 vext3 RHS, RHS
-  2712063307U,	// <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
-  3767668056U,	// <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
-  2651640314U,	// <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
-  2655621708U,	// <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
-  1638468980U,	// <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
-  2712063358U,	// <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
-  2712063367U,	// <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
-  2712210826U,	// <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
-  1638469012U,	// <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
-  2651640826U,	// <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
-  3773713830U,	// <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
-  3773713842U,	// <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
-  3780349372U,	// <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
-  2651641140U,	// <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
-  2712210888U,	// <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
-  2712210898U,	// <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
-  2651641452U,	// <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
-  2713538026U,	// <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
-  1517232230U,	// <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
-  1577899822U,	// <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
-  2712063489U,	// <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
-  2573060846U,	// <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
-  1640312342U,	// <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
-  1638469146U,	// <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
-  564579881U,	// <7,4,u,6>: Cost 1 vext3 RHS, RHS
-  2714054192U,	// <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
-  564579899U,	// <7,4,u,u>: Cost 1 vext3 RHS, RHS
-  2579038310U,	// <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
-  2636382310U,	// <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
-  2796339302U,	// <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
-  3646810719U,	// <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
-  2712063586U,	// <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
-  2735951467U,	// <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
-  2735951476U,	// <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
-  2579043322U,	// <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
-  2636382877U,	// <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
-  2712211087U,	// <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
-  3698180916U,	// <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
-  3710124950U,	// <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
-  2636383232U,	// <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
-  2712211127U,	// <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
-  2590994128U,	// <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
-  2590995323U,	// <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
-  1638469328U,	// <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
-  1638469337U,	// <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
-  3785805536U,	// <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
-  3785805544U,	// <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
-  3704817288U,	// <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
-  2712063742U,	// <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
-  3716761386U,	// <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
-  2714054415U,	// <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
-  3774304024U,	// <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
-  2712063777U,	// <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
-  2712063787U,	// <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
-  3634888806U,	// <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
-  2636384544U,	// <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
-  3710790001U,	// <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
-  3710126492U,	// <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
-  3634892086U,	// <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
-  2639039076U,	// <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
-  3713444533U,	// <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
-  2693926767U,	// <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
-  2712063864U,	// <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
-  2579071078U,	// <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
-  3646841856U,	// <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
-  3716762698U,	// <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
-  3646843491U,	// <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
-  2579074358U,	// <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
-  2636385590U,	// <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
-  2645675406U,	// <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
-  1638322118U,	// <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
-  1638469583U,	// <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
-  2714054611U,	// <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
-  2652974800U,	// <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
-  3710127905U,	// <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
-  3785805808U,	// <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
-  2712211450U,	// <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
-  1638322180U,	// <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
-  2712064014U,	// <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
-  1638469656U,	// <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
-  1638469665U,	// <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
-  2712064036U,	// <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
-  2714054707U,	// <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
-  3785805879U,	// <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
-  2712064066U,	// <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
-  2712064076U,	// <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
-  2714054743U,	// <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
-  2712064096U,	// <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
-  1638322274U,	// <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
-  1638469739U,	// <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
-  1511325798U,	// <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
-  2692747392U,	// <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
-  2585069160U,	// <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
-  2573126390U,	// <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
-  1511329078U,	// <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
-  1638469800U,	// <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
-  2712211626U,	// <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
-  2712211636U,	// <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
-  1638469823U,	// <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
-  1511333990U,	// <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
-  2636388142U,	// <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
-  2712211671U,	// <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
-  2573134583U,	// <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
-  1511337270U,	// <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
-  1638469881U,	// <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
-  2712064258U,	// <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
-  1638469892U,	// <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
-  1638469904U,	// <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
-  2650324992U,	// <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
-  1576583270U,	// <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
-  2712064300U,	// <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
-  2255295336U,	// <7,6,0,3>: Cost 3 vrev <6,7,3,0>
-  2712064316U,	// <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
-  2585088098U,	// <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
-  2735952204U,	// <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
-  2712211799U,	// <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
-  1576583837U,	// <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
-  1181340494U,	// <7,6,1,0>: Cost 2 vrev <6,7,0,1>
-  2650325812U,	// <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
-  2650325910U,	// <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
-  2650325976U,	// <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
-  2579123510U,	// <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
-  2650326160U,	// <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
-  2714055072U,	// <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
-  2712064425U,	// <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
-  1181930390U,	// <7,6,1,u>: Cost 2 vrev <6,7,u,1>
-  2712211897U,	// <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
-  2714055108U,	// <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
-  2650326632U,	// <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
-  2650326694U,	// <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
-  2714055137U,	// <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
-  2714055148U,	// <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
-  2650326970U,	// <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
-  1638470138U,	// <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
-  1638470147U,	// <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
-  2650327190U,	// <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
-  2255172441U,	// <7,6,3,1>: Cost 3 vrev <6,7,1,3>
-  2255246178U,	// <7,6,3,2>: Cost 3 vrev <6,7,2,3>
-  2650327452U,	// <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
-  2712064562U,	// <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
-  2650327627U,	// <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
-  3713452726U,	// <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
-  2700563016U,	// <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
-  2712064593U,	// <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
-  2650327954U,	// <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
-  2735952486U,	// <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
-  2735952497U,	// <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
-  2255328108U,	// <7,6,4,3>: Cost 3 vrev <6,7,3,4>
-  2712212100U,	// <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
-  1576586550U,	// <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
-  2714055312U,	// <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
-  2712212126U,	// <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
-  1576586793U,	// <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
-  2579152998U,	// <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
-  2650328784U,	// <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
-  2714055364U,	// <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
-  3785806538U,	// <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
-  1576587206U,	// <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
-  2650329092U,	// <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
-  2650329186U,	// <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
-  2712064753U,	// <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
-  1181963162U,	// <7,6,5,u>: Cost 2 vrev <6,7,u,5>
-  2714055421U,	// <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
-  2714055432U,	// <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
-  2650329594U,	// <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
-  3785806619U,	// <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
-  2712212260U,	// <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
-  2714055472U,	// <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
-  1638323000U,	// <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
-  1638470466U,	// <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
-  1638470475U,	// <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
-  1638323022U,	// <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
-  2712064854U,	// <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
-  2712064865U,	// <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
-  2712064872U,	// <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
-  1638323062U,	// <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
-  2712064894U,	// <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
-  2712064905U,	// <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
-  2712064915U,	// <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
-  1638323094U,	// <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
-  1638470559U,	// <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
-  1576589102U,	// <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
-  2712212402U,	// <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
-  2712212409U,	// <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
-  1638470599U,	// <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
-  1576589466U,	// <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
-  1638323000U,	// <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
-  1638470624U,	// <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
-  1638470631U,	// <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
-  2712065007U,	// <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
-  1638323194U,	// <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
-  2712065025U,	// <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
-  3646958337U,	// <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
-  2712065044U,	// <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
-  2585161907U,	// <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
-  2591134604U,	// <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
-  2591134714U,	// <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
-  1638323257U,	// <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
-  2712065091U,	// <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
-  2712065098U,	// <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
-  2712065109U,	// <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
-  2692748384U,	// <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
-  2585169206U,	// <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
-  2693928048U,	// <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
-  2585170766U,	// <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
-  2735953024U,	// <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
-  2695918731U,	// <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
-  3770471574U,	// <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
-  3785807002U,	// <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
-  2712065189U,	// <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
-  2712065196U,	// <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
-  3773125818U,	// <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
-  3766490305U,	// <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
-  2700563658U,	// <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
-  2735953107U,	// <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
-  2701890780U,	// <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
-  2712065251U,	// <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
-  3766490350U,	// <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
-  3774305530U,	// <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
-  2637728196U,	// <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
-  2712065291U,	// <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
-  2585186486U,	// <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
-  2639719095U,	// <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
-  2640382728U,	// <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
-  2641046361U,	// <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
-  2712212792U,	// <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
-  3646989312U,	// <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
-  3785807176U,	// <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
-  3646991109U,	// <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
-  2712065371U,	// <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
-  1638323558U,	// <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
-  2712212845U,	// <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
-  2591167846U,	// <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
-  1638323585U,	// <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
-  2585198694U,	// <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
-  2712212884U,	// <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
-  3711471393U,	// <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
-  2649673590U,	// <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
-  2712065455U,	// <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
-  1577259032U,	// <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
-  2712065473U,	// <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
-  2712212936U,	// <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
-  1579249931U,	// <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
-  2591178854U,	// <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
-  2735953374U,	// <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
-  2712212974U,	// <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
-  2655646287U,	// <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
-  2591182134U,	// <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
-  2656973553U,	// <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
-  1583895362U,	// <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
-  2712065556U,	// <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
-  1585222628U,	// <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
-  1523417190U,	// <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
-  2597159670U,	// <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
-  2597160552U,	// <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
-  2597161110U,	// <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
-  1523420470U,	// <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
-  2651002296U,	// <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
-  2657637906U,	// <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
-  363253046U,	// <7,7,7,7>: Cost 1 vdup3 RHS
-  363253046U,	// <7,7,7,u>: Cost 1 vdup3 RHS
-  1523417190U,	// <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
-  1638471298U,	// <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
-  2712213132U,	// <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
-  2712213138U,	// <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
-  1523420470U,	// <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
-  1638471338U,	// <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
-  1595840756U,	// <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
-  363253046U,	// <7,7,u,7>: Cost 1 vdup3 RHS
-  363253046U,	// <7,7,u,u>: Cost 1 vdup3 RHS
-  1638318080U,	// <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
-  1638323923U,	// <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
-  1662211804U,	// <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
-  1638323941U,	// <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
-  2712065773U,	// <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
-  1662359286U,	// <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
-  1662359296U,	// <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
-  2987150664U,	// <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
-  1638323986U,	// <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
-  1517469798U,	// <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
-  1638318900U,	// <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
-  564582190U,	// <7,u,1,2>: Cost 1 vext3 RHS, LHS
-  1638324023U,	// <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
-  1517473078U,	// <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
-  2693928777U,	// <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
-  1517474710U,	// <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
-  1640462171U,	// <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
-  564582244U,	// <7,u,1,u>: Cost 1 vext3 RHS, LHS
-  1638318244U,	// <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
-  2712065907U,	// <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
-  1638319720U,	// <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
-  1638324101U,	// <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
-  1638318284U,	// <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
-  2712065947U,	// <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
-  2700564387U,	// <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
-  1640314796U,	// <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
-  1638324146U,	// <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
-  1638324156U,	// <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
-  1638319064U,	// <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
-  2700564435U,	// <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
-  1638320540U,	// <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
-  1638324196U,	// <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
-  1638324207U,	// <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
-  2700564472U,	// <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
-  2695919610U,	// <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
-  1638324228U,	// <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
-  2712066061U,	// <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
-  1662212122U,	// <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
-  1662212132U,	// <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
-  2712066092U,	// <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
-  1638321360U,	// <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
-  1638324287U,	// <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
-  1662359624U,	// <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
-  1640314961U,	// <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
-  1638324314U,	// <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
-  1517502566U,	// <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
-  1574612693U,	// <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
-  2712066162U,	// <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
-  1638324351U,	// <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
-  1576603592U,	// <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
-  1577267225U,	// <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
-  564582554U,	// <7,u,5,6>: Cost 1 vext3 RHS, RHS
-  1640462499U,	// <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
-  564582572U,	// <7,u,5,u>: Cost 1 vext3 RHS, RHS
-  2712066223U,	// <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
-  2712066238U,	// <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
-  1581249023U,	// <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
-  1638324432U,	// <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
-  1638468980U,	// <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
-  2712066274U,	// <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
-  1583903555U,	// <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
-  1640315117U,	// <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
-  1638324477U,	// <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
-  1638471936U,	// <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
-  2692970763U,	// <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
-  2700933399U,	// <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
-  2573347601U,	// <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
-  1638471976U,	// <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
-  1511551171U,	// <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
-  2712213815U,	// <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
-  363253046U,	// <7,u,7,7>: Cost 1 vdup3 RHS
-  363253046U,	// <7,u,7,u>: Cost 1 vdup3 RHS
-  1638324561U,	// <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
-  1638324571U,	// <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
-  564582757U,	// <7,u,u,2>: Cost 1 vext3 RHS, LHS
-  1638324587U,	// <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
-  1638324601U,	// <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
-  1638324611U,	// <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
-  564582797U,	// <7,u,u,6>: Cost 1 vext3 RHS, RHS
-  363253046U,	// <7,u,u,7>: Cost 1 vdup3 RHS
-  564582811U,	// <7,u,u,u>: Cost 1 vext3 RHS, LHS
-  135053414U,	// <u,0,0,0>: Cost 1 vdup0 LHS
-  1611489290U,	// <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
-  1611489300U,	// <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
-  2568054923U,	// <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
-  1481706806U,	// <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
-  2555449040U,	// <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
-  2591282078U,	// <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
-  2591945711U,	// <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
-  135053414U,	// <u,0,0,u>: Cost 1 vdup0 LHS
-  1493655654U,	// <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
-  1860550758U,	// <u,0,1,1>: Cost 2 vzipl LHS, LHS
-  537747563U,	// <u,0,1,2>: Cost 1 vext3 LHS, LHS
-  2625135576U,	// <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
-  1493658934U,	// <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
-  2625135760U,	// <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
-  1517548447U,	// <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
-  2591290362U,	// <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
-  537747612U,	// <u,0,1,u>: Cost 1 vext3 LHS, LHS
-  1611489444U,	// <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
-  2685231276U,	// <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
-  1994768486U,	// <u,0,2,2>: Cost 2 vtrnl LHS, LHS
-  2685231294U,	// <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
-  1611489484U,	// <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
-  2712068310U,	// <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
-  2625136570U,	// <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
-  2591962097U,	// <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
-  1611489516U,	// <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
-  2954067968U,	// <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
-  2685231356U,	// <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
-  72589981U,	// <u,0,3,2>: Cost 1 vrev LHS
-  2625137052U,	// <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
-  2625137154U,	// <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
-  2639071848U,	// <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
-  2639735481U,	// <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
-  2597279354U,	// <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
-  73032403U,	// <u,0,3,u>: Cost 1 vrev LHS
-  2687074636U,	// <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
-  1611489618U,	// <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
-  1611489628U,	// <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
-  3629222038U,	// <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
-  2555481398U,	// <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
-  1551396150U,	// <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
-  2651680116U,	// <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
-  2646150600U,	// <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
-  1611932050U,	// <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
-  2561458278U,	// <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
-  1863532646U,	// <u,0,5,1>: Cost 2 vzipl RHS, LHS
-  2712068526U,	// <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
-  2649689976U,	// <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
-  2220237489U,	// <u,0,5,4>: Cost 3 vrev <0,u,4,5>
-  2651680772U,	// <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
-  1577939051U,	// <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
-  2830077238U,	// <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
-  1579266317U,	// <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
-  2555494502U,	// <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
-  2712068598U,	// <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
-  1997750374U,	// <u,0,6,2>: Cost 2 vtrnl RHS, LHS
-  2655662673U,	// <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
-  2555497782U,	// <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
-  2651681459U,	// <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
-  2651681592U,	// <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
-  2651681614U,	// <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
-  1997750428U,	// <u,0,6,u>: Cost 2 vtrnl RHS, LHS
-  2567446630U,	// <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
-  2567447446U,	// <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
-  2567448641U,	// <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
-  2573421338U,	// <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
-  2567449910U,	// <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
-  2651682242U,	// <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
-  2591339429U,	// <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
-  2651682412U,	// <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
-  2567452462U,	// <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
-  135053414U,	// <u,0,u,0>: Cost 1 vdup0 LHS
-  1611489938U,	// <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
-  537748125U,	// <u,0,u,2>: Cost 1 vext3 LHS, LHS
-  2685674148U,	// <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
-  1611932338U,	// <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
-  1551399066U,	// <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
-  1517605798U,	// <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
-  2830077481U,	// <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
-  537748179U,	// <u,0,u,u>: Cost 1 vext3 LHS, LHS
-  1544101961U,	// <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
-  1558036582U,	// <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
-  2619171051U,	// <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
-  1611490038U,	// <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
-  2555522358U,	// <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
-  2712068871U,	// <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
-  2591355815U,	// <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
-  2597328512U,	// <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
-  1611490083U,	// <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
-  1481785446U,	// <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
-  202162278U,	// <u,1,1,1>: Cost 1 vdup1 LHS
-  2555528808U,	// <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
-  1611490120U,	// <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
-  1481788726U,	// <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
-  2689876828U,	// <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
-  2591364008U,	// <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
-  2592691274U,	// <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
-  202162278U,	// <u,1,1,u>: Cost 1 vdup1 LHS
-  1499709542U,	// <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
-  2689876871U,	// <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
-  2631116445U,	// <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
-  835584U,	// <u,1,2,3>: Cost 0 copy LHS
-  1499712822U,	// <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
-  2689876907U,	// <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
-  2631780282U,	// <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
-  1523603074U,	// <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
-  835584U,	// <u,1,2,u>: Cost 0 copy LHS
-  1487773798U,	// <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
-  1611490264U,	// <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
-  2685232094U,	// <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
-  2018746470U,	// <u,1,3,3>: Cost 2 vtrnr LHS, LHS
-  1487777078U,	// <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
-  1611490304U,	// <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
-  2685674505U,	// <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
-  2640407307U,	// <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
-  1611490327U,	// <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
-  1567992749U,	// <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
-  2693121070U,	// <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
-  2693194807U,	// <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
-  1152386432U,	// <u,1,4,3>: Cost 2 vrev <1,u,3,4>
-  2555555126U,	// <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
-  1558039862U,	// <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
-  2645716371U,	// <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
-  2597361284U,	// <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
-  1152755117U,	// <u,1,4,u>: Cost 2 vrev <1,u,u,4>
-  1481818214U,	// <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
-  2555560694U,	// <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
-  2555561576U,	// <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
-  1611490448U,	// <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
-  1481821494U,	// <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
-  2651025435U,	// <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
-  2651689068U,	// <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
-  2823966006U,	// <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
-  1611932861U,	// <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
-  2555568230U,	// <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
-  2689877199U,	// <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
-  2712069336U,	// <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
-  2685232353U,	// <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
-  2555571510U,	// <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
-  2689877235U,	// <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
-  2657661765U,	// <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
-  1584583574U,	// <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
-  1585247207U,	// <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
-  2561548390U,	// <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
-  2561549681U,	// <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
-  2573493926U,	// <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
-  2042962022U,	// <u,1,7,3>: Cost 2 vtrnr RHS, LHS
-  2561551670U,	// <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
-  2226300309U,	// <u,1,7,5>: Cost 3 vrev <1,u,5,7>
-  2658325990U,	// <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
-  2658326124U,	// <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
-  2042962027U,	// <u,1,7,u>: Cost 2 vtrnr RHS, LHS
-  1481842790U,	// <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
-  202162278U,	// <u,1,u,1>: Cost 1 vdup1 LHS
-  2685674867U,	// <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
-  835584U,	// <u,1,u,3>: Cost 0 copy LHS
-  1481846070U,	// <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
-  1611933077U,	// <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
-  2685674910U,	// <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
-  1523652232U,	// <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
-  835584U,	// <u,1,u,u>: Cost 0 copy LHS
-  1544110154U,	// <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
-  1545437286U,	// <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
-  1545437420U,	// <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
-  2685232589U,	// <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
-  2619179346U,	// <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
-  2712069606U,	// <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
-  2689877484U,	// <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
-  2659656273U,	// <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
-  1545437853U,	// <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
-  1550082851U,	// <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
-  2619179828U,	// <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
-  2619179926U,	// <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
-  2685232671U,	// <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
-  2555604278U,	// <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
-  2619180176U,	// <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
-  2689877564U,	// <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
-  2602718850U,	// <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
-  1158703235U,	// <u,2,1,u>: Cost 2 vrev <2,u,u,1>
-  1481867366U,	// <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
-  2555609846U,	// <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
-  269271142U,	// <u,2,2,2>: Cost 1 vdup2 LHS
-  1611490930U,	// <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
-  1481870646U,	// <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
-  2689877640U,	// <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
-  2619180986U,	// <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
-  2593436837U,	// <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
-  269271142U,	// <u,2,2,u>: Cost 1 vdup2 LHS
-  408134301U,	// <u,2,3,0>: Cost 1 vext1 LHS, LHS
-  1481876214U,	// <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  1481877096U,	// <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
-  1880326246U,	// <u,2,3,3>: Cost 2 vzipr LHS, LHS
-  408137014U,	// <u,2,3,4>: Cost 1 vext1 LHS, RHS
-  1529654992U,	// <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
-  1529655802U,	// <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1529656314U,	// <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  408139566U,	// <u,2,3,u>: Cost 1 vext1 LHS, LHS
-  1567853468U,	// <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
-  2561598362U,	// <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
-  2555627214U,	// <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
-  2685232918U,	// <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
-  2555628854U,	// <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
-  1545440566U,	// <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
-  1571982740U,	// <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
-  2592125957U,	// <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
-  1545440809U,	// <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
-  2555633766U,	// <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
-  2561606550U,	// <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
-  2689877856U,	// <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
-  2685233000U,	// <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
-  1158441059U,	// <u,2,5,4>: Cost 2 vrev <2,u,4,5>
-  2645725188U,	// <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
-  2689877892U,	// <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
-  2823900470U,	// <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
-  1158736007U,	// <u,2,5,u>: Cost 2 vrev <2,u,u,5>
-  1481900134U,	// <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
-  2555642614U,	// <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
-  2555643496U,	// <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
-  1611491258U,	// <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
-  1481903414U,	// <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
-  2689877964U,	// <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
-  2689877973U,	// <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
-  2645726030U,	// <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
-  1611933671U,	// <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
-  1585919033U,	// <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
-  2573566710U,	// <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
-  2567596115U,	// <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
-  1906901094U,	// <u,2,7,3>: Cost 2 vzipr RHS, LHS
-  2555653430U,	// <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
-  2800080230U,	// <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
-  2980643164U,	// <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
-  2645726828U,	// <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
-  1906901099U,	// <u,2,7,u>: Cost 2 vzipr RHS, LHS
-  408175266U,	// <u,2,u,0>: Cost 1 vext1 LHS, LHS
-  1545443118U,	// <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
-  269271142U,	// <u,2,u,2>: Cost 1 vdup2 LHS
-  1611491416U,	// <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
-  408177974U,	// <u,2,u,4>: Cost 1 vext1 LHS, RHS
-  1545443482U,	// <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
-  1726339226U,	// <u,2,u,6>: Cost 2 vuzpl LHS, RHS
-  1529697274U,	// <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  408180526U,	// <u,2,u,u>: Cost 1 vext1 LHS, LHS
-  1544781824U,	// <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
-  471040156U,	// <u,3,0,1>: Cost 1 vext2 LHS, LHS
-  1544781988U,	// <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  2618523900U,	// <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
-  1544782162U,	// <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  2238188352U,	// <u,3,0,5>: Cost 3 vrev <3,u,5,0>
-  2623169023U,	// <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
-  2238335826U,	// <u,3,0,7>: Cost 3 vrev <3,u,7,0>
-  471040669U,	// <u,3,0,u>: Cost 1 vext2 LHS, LHS
-  1544782582U,	// <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  1544782644U,	// <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
-  1544782742U,	// <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
-  1544782808U,	// <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  2618524733U,	// <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
-  1544782992U,	// <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  2618524897U,	// <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
-  2703517987U,	// <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
-  1544783213U,	// <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
-  1529716838U,	// <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
-  1164167966U,	// <u,3,2,1>: Cost 2 vrev <3,u,1,2>
-  1544783464U,	// <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
-  1544783526U,	// <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
-  1529720118U,	// <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
-  2618525544U,	// <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
-  1544783802U,	// <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  2704181620U,	// <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
-  1544783931U,	// <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
-  1544784022U,	// <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
-  1487922559U,	// <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
-  1493895256U,	// <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
-  336380006U,	// <u,3,3,3>: Cost 1 vdup3 LHS
-  1544784386U,	// <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
-  2824054478U,	// <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
-  2238286668U,	// <u,3,3,6>: Cost 3 vrev <3,u,6,3>
-  2954069136U,	// <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
-  336380006U,	// <u,3,3,u>: Cost 1 vdup3 LHS
-  1487929446U,	// <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
-  1487930752U,	// <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
-  2623171644U,	// <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
-  2561673366U,	// <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
-  1487932726U,	// <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
-  471043382U,	// <u,3,4,5>: Cost 1 vext2 LHS, RHS
-  1592561012U,	// <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
-  2238368598U,	// <u,3,4,7>: Cost 3 vrev <3,u,7,4>
-  471043625U,	// <u,3,4,u>: Cost 1 vext2 LHS, RHS
-  2555707494U,	// <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
-  1574645465U,	// <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
-  2567653106U,	// <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
-  2555709954U,	// <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
-  1592561606U,	// <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
-  1592561668U,	// <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
-  1592561762U,	// <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
-  1750314294U,	// <u,3,5,7>: Cost 2 vuzpr LHS, RHS
-  1750314295U,	// <u,3,5,u>: Cost 2 vuzpr LHS, RHS
-  2623172897U,	// <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
-  2561688962U,	// <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
-  1581281795U,	// <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
-  2706541204U,	// <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
-  2623173261U,	// <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
-  1164495686U,	// <u,3,6,5>: Cost 2 vrev <3,u,5,6>
-  1592562488U,	// <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
-  1592562510U,	// <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
-  1164716897U,	// <u,3,6,u>: Cost 2 vrev <3,u,u,6>
-  1487954022U,	// <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
-  1487955331U,	// <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
-  1493928028U,	// <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
-  2561697942U,	// <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
-  1487957302U,	// <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
-  2707352311U,	// <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
-  2655024623U,	// <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
-  1592563308U,	// <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
-  1487959854U,	// <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
-  1544787667U,	// <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
-  471045934U,	// <u,3,u,1>: Cost 1 vext2 LHS, LHS
-  1549432709U,	// <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
-  336380006U,	// <u,3,u,3>: Cost 1 vdup3 LHS
-  1544788031U,	// <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
-  471046298U,	// <u,3,u,5>: Cost 1 vext2 LHS, RHS
-  1549433040U,	// <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
-  1750314537U,	// <u,3,u,7>: Cost 2 vuzpr LHS, RHS
-  471046501U,	// <u,3,u,u>: Cost 1 vext2 LHS, LHS
-  2625167360U,	// <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
-  1551425638U,	// <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
-  2619195630U,	// <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
-  2619343104U,	// <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
-  2625167698U,	// <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
-  1638329234U,	// <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
-  1638329244U,	// <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
-  3787803556U,	// <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
-  1551426205U,	// <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
-  2555748454U,	// <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
-  2625168180U,	// <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
-  1551426503U,	// <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
-  2625168344U,	// <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
-  2555751734U,	// <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
-  1860554038U,	// <u,4,1,5>: Cost 2 vzipl LHS, RHS
-  2689879022U,	// <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
-  2592248852U,	// <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
-  1555408301U,	// <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
-  2555756646U,	// <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
-  2625168943U,	// <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
-  2625169000U,	// <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
-  2619197134U,	// <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
-  2555759926U,	// <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
-  2712071222U,	// <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
-  1994771766U,	// <u,4,2,6>: Cost 2 vtrnl LHS, RHS
-  2592257045U,	// <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
-  1994771784U,	// <u,4,2,u>: Cost 2 vtrnl LHS, RHS
-  2625169558U,	// <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
-  2567709594U,	// <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
-  2567710817U,	// <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
-  2625169820U,	// <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
-  2625169922U,	// <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
-  2954069710U,	// <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
-  2954068172U,	// <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
-  3903849472U,	// <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
-  2954068174U,	// <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
-  1505919078U,	// <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
-  2567717831U,	// <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
-  2567719010U,	// <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
-  2570373542U,	// <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
-  161926454U,	// <u,4,4,4>: Cost 1 vdup0 RHS
-  1551428918U,	// <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
-  1638329572U,	// <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
-  2594927963U,	// <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
-  161926454U,	// <u,4,4,u>: Cost 1 vdup0 RHS
-  1493983334U,	// <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
-  2689879301U,	// <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
-  1493985379U,	// <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
-  2567727254U,	// <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
-  1493986614U,	// <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
-  1863535926U,	// <u,4,5,5>: Cost 2 vzipl RHS, RHS
-  537750838U,	// <u,4,5,6>: Cost 1 vext3 LHS, RHS
-  2830110006U,	// <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
-  537750856U,	// <u,4,5,u>: Cost 1 vext3 LHS, RHS
-  1482047590U,	// <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
-  2555790070U,	// <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
-  2555790952U,	// <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
-  2555791510U,	// <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
-  1482050870U,	// <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
-  2689879422U,	// <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
-  1997753654U,	// <u,4,6,6>: Cost 2 vtrnl RHS, RHS
-  2712071562U,	// <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
-  1482053422U,	// <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
-  2567741542U,	// <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
-  2567742362U,	// <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
-  2567743589U,	// <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
-  2573716286U,	// <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
-  2567744822U,	// <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
-  2712071624U,	// <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
-  96808489U,	// <u,4,7,6>: Cost 1 vrev RHS
-  2651715180U,	// <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
-  96955963U,	// <u,4,7,u>: Cost 1 vrev RHS
-  1482063974U,	// <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
-  1551431470U,	// <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
-  1494009958U,	// <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
-  2555807894U,	// <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
-  161926454U,	// <u,4,u,4>: Cost 1 vdup0 RHS
-  1551431834U,	// <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
-  537751081U,	// <u,4,u,6>: Cost 1 vext3 LHS, RHS
-  2830110249U,	// <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
-  537751099U,	// <u,4,u,u>: Cost 1 vext3 LHS, RHS
-  2631811072U,	// <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
-  1558069350U,	// <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
-  2619203823U,	// <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
-  2619867456U,	// <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
-  1546273106U,	// <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
-  2733010539U,	// <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
-  2597622682U,	// <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
-  1176539396U,	// <u,5,0,7>: Cost 2 vrev <5,u,7,0>
-  1558069917U,	// <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
-  1505968230U,	// <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
-  2624512887U,	// <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
-  2631811990U,	// <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
-  2618541056U,	// <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
-  1505971510U,	// <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
-  2627167419U,	// <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
-  2579714554U,	// <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
-  1638330064U,	// <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
-  1638477529U,	// <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
-  2561802342U,	// <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
-  2561803264U,	// <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
-  2631149217U,	// <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
-  1558071026U,	// <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
-  2561805622U,	// <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
-  2714062607U,	// <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
-  2631813050U,	// <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
-  3092335926U,	// <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
-  1561389191U,	// <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
-  2561810534U,	// <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
-  2561811857U,	// <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
-  2631813474U,	// <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
-  2631813532U,	// <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
-  2619869698U,	// <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
-  3001847002U,	// <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
-  2954070530U,	// <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  2018749750U,	// <u,5,3,7>: Cost 2 vtrnr LHS, RHS
-  2018749751U,	// <u,5,3,u>: Cost 2 vtrnr LHS, RHS
-  2573762662U,	// <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
-  2620017634U,	// <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
-  2573764338U,	// <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
-  2573765444U,	// <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
-  1570680053U,	// <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
-  1558072630U,	// <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
-  2645749143U,	// <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
-  1638330310U,	// <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
-  1558072873U,	// <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
-  1506000998U,	// <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
-  2561827984U,	// <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
-  2579744360U,	// <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
-  2579744918U,	// <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
-  1506004278U,	// <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
-  229035318U,	// <u,5,5,5>: Cost 1 vdup1 RHS
-  2712072206U,	// <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
-  1638330392U,	// <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
-  229035318U,	// <u,5,5,u>: Cost 1 vdup1 RHS
-  1500037222U,	// <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
-  2561836436U,	// <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
-  2567809133U,	// <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
-  1500040006U,	// <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
-  1500040502U,	// <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
-  2714062935U,	// <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
-  2712072288U,	// <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
-  27705344U,	// <u,5,6,7>: Cost 0 copy RHS
-  27705344U,	// <u,5,6,u>: Cost 0 copy RHS
-  1488101478U,	// <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
-  1488102805U,	// <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
-  2561844840U,	// <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
-  2561845398U,	// <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
-  1488104758U,	// <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
-  1638330536U,	// <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
-  2712072362U,	// <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
-  2042965302U,	// <u,5,7,7>: Cost 2 vtrnr RHS, RHS
-  1488107310U,	// <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
-  1488109670U,	// <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
-  1488110998U,	// <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
-  2561853032U,	// <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
-  1500056392U,	// <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
-  1488112950U,	// <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
-  229035318U,	// <u,5,u,5>: Cost 1 vdup1 RHS
-  2954111490U,	// <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  27705344U,	// <u,5,u,7>: Cost 0 copy RHS
-  27705344U,	// <u,5,u,u>: Cost 0 copy RHS
-  2619211776U,	// <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
-  1545470054U,	// <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
-  1545470192U,	// <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
-  2255958969U,	// <u,6,0,3>: Cost 3 vrev <6,u,3,0>
-  1546797458U,	// <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
-  2720624971U,	// <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
-  2256180180U,	// <u,6,0,6>: Cost 3 vrev <6,u,6,0>
-  2960682294U,	// <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
-  1545470621U,	// <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
-  1182004127U,	// <u,6,1,0>: Cost 2 vrev <6,u,0,1>
-  2619212596U,	// <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
-  2619212694U,	// <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
-  2619212760U,	// <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
-  2626511979U,	// <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
-  2619212944U,	// <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
-  2714063264U,	// <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
-  2967326006U,	// <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
-  1182594023U,	// <u,6,1,u>: Cost 2 vrev <6,u,u,1>
-  1506050150U,	// <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
-  2579792630U,	// <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
-  2619213416U,	// <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
-  2619213478U,	// <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
-  1506053430U,	// <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
-  2633148309U,	// <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
-  2619213754U,	// <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
-  1638330874U,	// <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
-  1638478339U,	// <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
-  2619213974U,	// <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
-  2255836074U,	// <u,6,3,1>: Cost 3 vrev <6,u,1,3>
-  2255909811U,	// <u,6,3,2>: Cost 3 vrev <6,u,2,3>
-  2619214236U,	// <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
-  1564715549U,	// <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
-  2639121006U,	// <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
-  3001847012U,	// <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
-  1880329526U,	// <u,6,3,7>: Cost 2 vzipr LHS, RHS
-  1880329527U,	// <u,6,3,u>: Cost 2 vzipr LHS, RHS
-  2567864422U,	// <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
-  2733011558U,	// <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
-  2567866484U,	// <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
-  2638458005U,	// <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
-  1570540772U,	// <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
-  1545473334U,	// <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
-  1572015512U,	// <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
-  2960715062U,	// <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
-  1545473577U,	// <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
-  2567872614U,	// <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
-  2645757648U,	// <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
-  2567874490U,	// <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
-  2576501250U,	// <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
-  1576660943U,	// <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
-  2645757956U,	// <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
-  2645758050U,	// <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
-  2824080694U,	// <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
-  1182626795U,	// <u,6,5,u>: Cost 2 vrev <6,u,u,5>
-  1506082918U,	// <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
-  2579825398U,	// <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
-  2645758458U,	// <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
-  2579826838U,	// <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
-  1506086198U,	// <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
-  2579828432U,	// <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
-  296144182U,	// <u,6,6,6>: Cost 1 vdup2 RHS
-  1638331202U,	// <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
-  296144182U,	// <u,6,6,u>: Cost 1 vdup2 RHS
-  432349286U,	// <u,6,7,0>: Cost 1 vext1 RHS, LHS
-  1506091766U,	// <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
-  1506092648U,	// <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1506093206U,	// <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  432352809U,	// <u,6,7,4>: Cost 1 vext1 RHS, RHS
-  1506094800U,	// <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
-  1506095610U,	// <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
-  1906904374U,	// <u,6,7,7>: Cost 2 vzipr RHS, RHS
-  432355118U,	// <u,6,7,u>: Cost 1 vext1 RHS, LHS
-  432357478U,	// <u,6,u,0>: Cost 1 vext1 RHS, LHS
-  1545475886U,	// <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
-  1506100840U,	// <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1506101398U,	// <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  432361002U,	// <u,6,u,4>: Cost 1 vext1 RHS, RHS
-  1545476250U,	// <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
-  296144182U,	// <u,6,u,6>: Cost 1 vdup2 RHS
-  1880370486U,	// <u,6,u,7>: Cost 2 vzipr LHS, RHS
-  432363310U,	// <u,6,u,u>: Cost 1 vext1 RHS, LHS
-  1571356672U,	// <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
-  497614950U,	// <u,7,0,1>: Cost 1 vext2 RHS, LHS
-  1571356836U,	// <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
-  2573880146U,	// <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
-  1571357010U,	// <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
-  1512083716U,	// <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
-  2621874741U,	// <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
-  2585826298U,	// <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
-  497615517U,	// <u,7,0,u>: Cost 1 vext2 RHS, LHS
-  1571357430U,	// <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
-  1571357492U,	// <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
-  1571357590U,	// <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
-  1552114715U,	// <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
-  2573888822U,	// <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
-  1553441981U,	// <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
-  2627847438U,	// <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
-  2727408775U,	// <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
-  1555432880U,	// <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
-  2629838337U,	// <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
-  1188058754U,	// <u,7,2,1>: Cost 2 vrev <7,u,1,2>
-  1571358312U,	// <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
-  1571358374U,	// <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
-  2632492869U,	// <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
-  2633156502U,	// <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
-  1560078311U,	// <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
-  2728072408U,	// <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
-  1561405577U,	// <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
-  1571358870U,	// <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
-  2627184913U,	// <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
-  2633820523U,	// <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
-  1571359132U,	// <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
-  1571359234U,	// <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
-  1512108295U,	// <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
-  1518080992U,	// <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
-  2640456465U,	// <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
-  1571359518U,	// <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
-  1571359634U,	// <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
-  2573911067U,	// <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
-  2645101622U,	// <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
-  2573912918U,	// <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
-  1571359952U,	// <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
-  497618248U,	// <u,7,4,5>: Cost 1 vext2 RHS, RHS
-  1571360116U,	// <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  2645102024U,	// <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
-  497618473U,	// <u,7,4,u>: Cost 1 vext2 RHS, RHS
-  2645102152U,	// <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
-  1571360464U,	// <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  2645102334U,	// <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
-  2645102447U,	// <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
-  1571360710U,	// <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
-  1571360772U,	// <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
-  1571360866U,	// <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
-  1571360936U,	// <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
-  1571361017U,	// <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
-  1530044518U,	// <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
-  2645103016U,	// <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
-  1571361274U,	// <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  2645103154U,	// <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
-  1530047798U,	// <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
-  1188386474U,	// <u,7,6,5>: Cost 2 vrev <7,u,5,6>
-  1571361592U,	// <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
-  1571361614U,	// <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
-  1571361695U,	// <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
-  1571361786U,	// <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
-  2573935616U,	// <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
-  2645103781U,	// <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
-  2573937497U,	// <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
-  1571362150U,	// <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
-  1512141067U,	// <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
-  1518113764U,	// <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
-  363253046U,	// <u,7,7,7>: Cost 1 vdup3 RHS
-  363253046U,	// <u,7,7,u>: Cost 1 vdup3 RHS
-  1571362515U,	// <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
-  497620782U,	// <u,7,u,1>: Cost 1 vext2 RHS, LHS
-  1571362693U,	// <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
-  1571362748U,	// <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
-  1571362879U,	// <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
-  497621146U,	// <u,7,u,5>: Cost 1 vext2 RHS, RHS
-  1571363024U,	// <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
-  363253046U,	// <u,7,u,7>: Cost 1 vdup3 RHS
-  497621349U,	// <u,7,u,u>: Cost 1 vext2 RHS, LHS
-  135053414U,	// <u,u,0,0>: Cost 1 vdup0 LHS
-  471081121U,	// <u,u,0,1>: Cost 1 vext2 LHS, LHS
-  1544822948U,	// <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  1616140005U,	// <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
-  1544823122U,	// <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  1512157453U,	// <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
-  1662220032U,	// <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
-  1194457487U,	// <u,u,0,7>: Cost 2 vrev <u,u,7,0>
-  471081629U,	// <u,u,0,u>: Cost 1 vext2 LHS, LHS
-  1544823542U,	// <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  202162278U,	// <u,u,1,1>: Cost 1 vdup1 LHS
-  537753390U,	// <u,u,1,2>: Cost 1 vext3 LHS, LHS
-  1544823768U,	// <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  1494248758U,	// <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
-  1544823952U,	// <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  1518138343U,	// <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
-  1640322907U,	// <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
-  537753444U,	// <u,u,1,u>: Cost 1 vext3 LHS, LHS
-  1482309734U,	// <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
-  1194031451U,	// <u,u,2,1>: Cost 2 vrev <u,u,1,2>
-  269271142U,	// <u,u,2,2>: Cost 1 vdup2 LHS
-  835584U,	// <u,u,2,3>: Cost 0 copy LHS
-  1482313014U,	// <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
-  2618566504U,	// <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
-  1544824762U,	// <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  1638479788U,	// <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
-  835584U,	// <u,u,2,u>: Cost 0 copy LHS
-  408576723U,	// <u,u,3,0>: Cost 1 vext1 LHS, LHS
-  1482318582U,	// <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  120371557U,	// <u,u,3,2>: Cost 1 vrev LHS
-  336380006U,	// <u,u,3,3>: Cost 1 vdup3 LHS
-  408579382U,	// <u,u,3,4>: Cost 1 vext1 LHS, RHS
-  1616140271U,	// <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
-  1530098170U,	// <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1880329544U,	// <u,u,3,7>: Cost 2 vzipr LHS, RHS
-  408581934U,	// <u,u,3,u>: Cost 1 vext1 LHS, LHS
-  1488298086U,	// <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
-  1488299437U,	// <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
-  1659271204U,	// <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
-  1194195311U,	// <u,u,4,3>: Cost 2 vrev <u,u,3,4>
-  161926454U,	// <u,u,4,4>: Cost 1 vdup0 RHS
-  471084342U,	// <u,u,4,5>: Cost 1 vext2 LHS, RHS
-  1571368308U,	// <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  1640323153U,	// <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
-  471084585U,	// <u,u,4,u>: Cost 1 vext2 LHS, RHS
-  1494278246U,	// <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
-  1571368656U,	// <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  1494280327U,	// <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
-  1616140415U,	// <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
-  1494281526U,	// <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
-  229035318U,	// <u,u,5,5>: Cost 1 vdup1 RHS
-  537753754U,	// <u,u,5,6>: Cost 1 vext3 LHS, RHS
-  1750355254U,	// <u,u,5,7>: Cost 2 vuzpr LHS, RHS
-  537753772U,	// <u,u,5,u>: Cost 1 vext3 LHS, RHS
-  1482342502U,	// <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
-  2556084982U,	// <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
-  1571369466U,	// <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  1611938000U,	// <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
-  1482345782U,	// <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
-  1194359171U,	// <u,u,6,5>: Cost 2 vrev <u,u,5,6>
-  296144182U,	// <u,u,6,6>: Cost 1 vdup2 RHS
-  27705344U,	// <u,u,6,7>: Cost 0 copy RHS
-  27705344U,	// <u,u,6,u>: Cost 0 copy RHS
-  432496742U,	// <u,u,7,0>: Cost 1 vext1 RHS, LHS
-  1488324016U,	// <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
-  1494296713U,	// <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
-  1906901148U,	// <u,u,7,3>: Cost 2 vzipr RHS, LHS
-  432500283U,	// <u,u,7,4>: Cost 1 vext1 RHS, RHS
-  1506242256U,	// <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
-  120699277U,	// <u,u,7,6>: Cost 1 vrev RHS
-  363253046U,	// <u,u,7,7>: Cost 1 vdup3 RHS
-  432502574U,	// <u,u,7,u>: Cost 1 vext1 RHS, LHS
-  408617688U,	// <u,u,u,0>: Cost 1 vext1 LHS, LHS
-  471086894U,	// <u,u,u,1>: Cost 1 vext2 LHS, LHS
-  537753957U,	// <u,u,u,2>: Cost 1 vext3 LHS, LHS
-  835584U,	// <u,u,u,3>: Cost 0 copy LHS
-  408620342U,	// <u,u,u,4>: Cost 1 vext1 LHS, RHS
-  471087258U,	// <u,u,u,5>: Cost 1 vext2 LHS, RHS
-  537753997U,	// <u,u,u,6>: Cost 1 vext3 LHS, RHS
-  27705344U,	// <u,u,u,7>: Cost 0 copy RHS
-  835584U,	// <u,u,u,u>: Cost 0 copy LHS
+   135053414U,  // <0,0,0,0>: Cost 1 vdup0 LHS
+  1543503974U,  // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+  2618572962U,  // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+  2568054923U,  // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1476398390U,  // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+  2550140624U,  // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+  2550141434U,  // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+  2591945711U,  // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+   135053414U,  // <0,0,0,u>: Cost 1 vdup0 LHS
+  2886516736U,  // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+  1812775014U,  // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+  1618133094U,  // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2625209292U,  // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+  2886558034U,  // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+  2617246864U,  // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+  3659723031U,  // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+  2591953904U,  // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+  1812775581U,  // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+  3020734464U,  // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+  3020734474U,  // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+  1946992742U,  // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2631181989U,  // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+  3020734668U,  // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+  3826550569U,  // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+  2617247674U,  // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+  2591962097U,  // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1946992796U,  // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+  2635163787U,  // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2686419196U,  // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+  2686492933U,  // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+  2617248156U,  // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+  2617248258U,  // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+  3826551298U,  // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+  3690990200U,  // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+  3713551042U,  // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+  2635163787U,  // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2617248658U,  // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+  2888450150U,  // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021570150U,  // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  3641829519U,  // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+  3021570252U,  // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+  1543507254U,  // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752810294U,  // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  3786998152U,  // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+  1543507497U,  // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+  2684354972U,  // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+  2617249488U,  // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+  3765617070U,  // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+  3635865780U,  // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+  2617249734U,  // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+  2617249796U,  // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+  2718712274U,  // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+  2617249960U,  // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+  2720039396U,  // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+  2684355053U,  // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+  3963609190U,  // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+  2617250298U,  // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+  3796435464U,  // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+  3659762998U,  // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+  3659763810U,  // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+  2617250616U,  // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+  2657727309U,  // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+  2658390942U,  // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+  2659054575U,  // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  3635880854U,  // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+  3635881401U,  // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+  3734787298U,  // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+  2617251174U,  // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+  3659772002U,  // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+  3659772189U,  // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+  2617251436U,  // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+  2659054575U,  // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+   135053414U,  // <0,0,u,0>: Cost 1 vdup0 LHS
+  1817419878U,  // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+  1947435110U,  // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+  2568120467U,  // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+  1476463926U,  // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+  1543510170U,  // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752813210U,  // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  2592011255U,  // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+   135053414U,  // <0,0,u,u>: Cost 1 vdup0 LHS
+  2618581002U,  // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+  1557446758U,  // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2618581155U,  // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+  2690548468U,  // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+  2626543954U,  // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+  4094985216U,  // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+  2592019278U,  // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+  2592019448U,  // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+  1557447325U,  // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+  1476476938U,  // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+  2886517556U,  // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+  2886517654U,  // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+  2886517720U,  // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+  1476480310U,  // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+  2886558864U,  // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+  2550223354U,  // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+  2550223856U,  // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+  1476482862U,  // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+  1494401126U,  // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+  3020735284U,  // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+  2562172349U,  // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+      835584U,  // <0,1,2,3>: Cost 0 copy LHS
+  1494404406U,  // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+  3020735488U,  // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+  2631190458U,  // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+  1518294010U,  // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+      835584U,  // <0,1,2,u>: Cost 0 copy LHS
+  2692318156U,  // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+  2691875800U,  // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+  2691875806U,  // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+  2692539367U,  // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+  2562182454U,  // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+  2691875840U,  // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+  2692760578U,  // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+  2639817411U,  // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+  2691875863U,  // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+  2568159334U,  // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+  4095312692U,  // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+  2568160934U,  // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+  2568161432U,  // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+  2568162614U,  // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+  1557450038U,  // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754235702U,  // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  2592052220U,  // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+  1557450281U,  // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+  3765617775U,  // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+  2647781007U,  // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+  3704934138U,  // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+  2691875984U,  // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+  2657734598U,  // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+  2650435539U,  // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+  2651099172U,  // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+  2651762805U,  // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+  2691876029U,  // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+  2592063590U,  // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+  3765617871U,  // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+  2654417337U,  // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+  3765617889U,  // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+  2592066870U,  // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+  3765617907U,  // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+  2657071869U,  // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+  1583993678U,  // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+  1584657311U,  // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+  2657735672U,  // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+  2657735808U,  // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+  2631193772U,  // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+  2661053667U,  // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+  2657736038U,  // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+  3721524621U,  // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+  2657736158U,  // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+  2657736300U,  // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+  2657736322U,  // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+  1494450278U,  // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+  1557452590U,  // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2754238254U,  // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+      835584U,  // <0,1,u,3>: Cost 0 copy LHS
+  1494453558U,  // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+  1557452954U,  // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754238618U,  // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  1518343168U,  // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+      835584U,  // <0,1,u,u>: Cost 0 copy LHS
+  2752299008U,  // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+  1544847462U,  // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678557286U,  // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+  2696521165U,  // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+  2752340172U,  // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+  2691876326U,  // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+  2618589695U,  // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+  2592093185U,  // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+  1678557340U,  // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+  2618589942U,  // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+  2752299828U,  // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+  2886518376U,  // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+  2752299766U,  // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  2550295862U,  // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+  2752340992U,  // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+  2886559674U,  // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+  3934208106U,  // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+  2752340771U,  // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+  1476558868U,  // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+  2226628029U,  // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+  2752300648U,  // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+  3020736114U,  // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476562230U,  // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+  2550304464U,  // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+  2618591162U,  // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+  2550305777U,  // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+  1476564782U,  // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+  2618591382U,  // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+  2752301206U,  // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  3826043121U,  // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+  2752301468U,  // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618591746U,  // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+  2752301570U,  // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  3830688102U,  // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+  2698807012U,  // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+  2752301269U,  // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562261094U,  // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+  4095313828U,  // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+  2226718152U,  // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+  2568235169U,  // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+  2562264374U,  // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+  1544850742U,  // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678560566U,  // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+  2592125957U,  // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1678560584U,  // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+  2691876686U,  // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+  2618592976U,  // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+  3765618528U,  // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+  3765618536U,  // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+  2618593222U,  // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+  2752303108U,  // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+  2618593378U,  // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+  2824785206U,  // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2824785207U,  // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2752303950U,  // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+  3830690081U,  // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+  2618593786U,  // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+  2691876794U,  // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+  2752303990U,  // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+  3830690445U,  // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+  2752303928U,  // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+  2657743695U,  // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+  2691876839U,  // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+  2659070961U,  // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  2659734594U,  // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+  3734140051U,  // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+  2701166596U,  // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+  2662389094U,  // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+  2662389126U,  // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+  3736794583U,  // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+  2752304748U,  // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+  2659070961U,  // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  1476608026U,  // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+  1544853294U,  // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678563118U,  // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+  3021178482U,  // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476611382U,  // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+  1544853658U,  // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678563482U,  // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+  2824785449U,  // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  1678563172U,  // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+  2556329984U,  // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+  2686421142U,  // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+  2562303437U,  // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+  4094986652U,  // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+  2556333366U,  // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+  4094986754U,  // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+  3798796488U,  // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+  3776530634U,  // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+  2556335918U,  // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+  2886518934U,  // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+  2556338933U,  // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+  2691877105U,  // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+  2886519196U,  // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+  2886519298U,  // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+  4095740418U,  // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+  3659944242U,  // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+  3769600286U,  // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+  2886519582U,  // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+  1482604646U,  // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+  1482605302U,  // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+  2556348008U,  // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+  3020736924U,  // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482607926U,  // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+  3020737026U,  // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598154746U,  // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+  2598155258U,  // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+  1482610478U,  // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+  3692341398U,  // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+  2635851999U,  // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+  3636069840U,  // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+  2691877276U,  // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+  3961522690U,  // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+  3826797058U,  // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+  3703622282U,  // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+  3769600452U,  // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+  2640497430U,  // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+  3962194070U,  // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+  2232617112U,  // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+  2232690849U,  // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+  4095314332U,  // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+  3962194434U,  // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+  2691877378U,  // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+  3826765110U,  // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+  3665941518U,  // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+  2691877405U,  // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+  3630112870U,  // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+  3630113526U,  // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+  4035199734U,  // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+  3769600578U,  // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+  2232846516U,  // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+  3779037780U,  // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+  2718714461U,  // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+  2706106975U,  // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+  2233141464U,  // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+  2691877496U,  // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+  3727511914U,  // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+  3765619338U,  // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+  3765619347U,  // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+  3765987996U,  // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+  3306670270U,  // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+  3792456365U,  // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+  2706770608U,  // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+  2706844345U,  // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+  3769600707U,  // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+  2659742787U,  // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+  3636102612U,  // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+  3769600740U,  // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+  3769600747U,  // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+  3769600758U,  // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+  3659993400U,  // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+  3781176065U,  // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+  2664388218U,  // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+  1482653798U,  // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+  1482654460U,  // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+  2556397160U,  // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+  3021179292U,  // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482657078U,  // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+  3021179394U,  // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598203898U,  // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+  2708097874U,  // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+  1482659630U,  // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+  2617278468U,  // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+  2618605670U,  // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2618605734U,  // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+  3642091695U,  // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+  2753134796U,  // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+  2718714770U,  // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+  3021245750U,  // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  3665982483U,  // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+  3021245768U,  // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2568355942U,  // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+  3692348212U,  // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+  3692348310U,  // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+  2568358064U,  // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+  2568359222U,  // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+  1812778294U,  // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+  3022671158U,  // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+  2592248852U,  // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1812778537U,  // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+  2568364134U,  // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+  2238573423U,  // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+  3692349032U,  // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+  2631214761U,  // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+  2568367414U,  // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+  2887028022U,  // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+  1946996022U,  // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U,  // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1946996040U,  // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+  3692349590U,  // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+  3826878614U,  // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+  3826878625U,  // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+  3692349852U,  // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+  3692349954U,  // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+  3826878978U,  // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+  4095200566U,  // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+  3713583814U,  // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+  3692350238U,  // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+  2550464552U,  // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+  3962194914U,  // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+  3693677631U,  // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+  3642124467U,  // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+  2718715088U,  // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+  2618608950U,  // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+  2753137974U,  // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+  3666015255U,  // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+  2618609193U,  // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+  2568388710U,  // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+  2568389526U,  // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+  3636159963U,  // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+  2568390836U,  // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+  2568391990U,  // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+  2718715180U,  // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+  1618136374U,  // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592281624U,  // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+  1618136392U,  // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2550480938U,  // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+  3826880801U,  // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+  2562426332U,  // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+  3786190181U,  // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+  2718715252U,  // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+  3826881165U,  // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+  2712669568U,  // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+  2657760081U,  // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+  2718715284U,  // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+  3654090854U,  // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+  3934229326U,  // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+  3734156437U,  // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+  3734820070U,  // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+  3654094134U,  // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+  2713259464U,  // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2713333201U,  // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+  3654095866U,  // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+  2713259464U,  // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2568413286U,  // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+  2618611502U,  // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2753140526U,  // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+  2568415415U,  // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+  2568416566U,  // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+  1817423158U,  // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+  1947438390U,  // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+  2592306203U,  // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+  1947438408U,  // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+  3630219264U,  // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+  2625912934U,  // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  3692355748U,  // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+  3693019384U,  // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+  3630222646U,  // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+  3699655062U,  // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+  2718715508U,  // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+  3087011126U,  // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+  2625913501U,  // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+  1500659814U,  // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+  2886520528U,  // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+  2574403176U,  // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+  2574403734U,  // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+  1500662674U,  // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+  2886520836U,  // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+  2886520930U,  // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+  2718715600U,  // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+  1500665646U,  // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+  2556493926U,  // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+  2244546120U,  // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+  3692357256U,  // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+  2568439994U,  // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+  2556497206U,  // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+  3020738564U,  // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+  4027877161U,  // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+  3093220662U,  // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3093220663U,  // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3699656854U,  // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+  3699656927U,  // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+  3699657006U,  // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+  3699657116U,  // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+  2637859284U,  // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+  3790319453U,  // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+  3699657354U,  // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+  2716725103U,  // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+  2716798840U,  // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+  2661747602U,  // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+  3630252810U,  // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+  3636225507U,  // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+  3716910172U,  // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+  3962195892U,  // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+  2625916214U,  // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  3718901071U,  // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+  2718715846U,  // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+  2625916457U,  // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+  3791278034U,  // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+  3791351771U,  // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+  3318386260U,  // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+  3791499245U,  // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+  3318533734U,  // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+  2718715908U,  // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+  2657767522U,  // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+  2718715928U,  // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+  2718715937U,  // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+  2592358502U,  // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+  3792015404U,  // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+  3731509754U,  // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+  3785748546U,  // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+  2592361782U,  // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+  2592362594U,  // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+  3785748576U,  // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+  1644974178U,  // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+  1645047915U,  // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+  2562506854U,  // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+  2562507670U,  // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+  2562508262U,  // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+  3636250774U,  // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+  2562510134U,  // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+  2718716072U,  // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+  2718716074U,  // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+  2719379635U,  // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+  2562512686U,  // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+  1500717158U,  // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+  2625918766U,  // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  2719674583U,  // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+  2568489152U,  // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+  1500720025U,  // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+  2625919130U,  // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  2586407243U,  // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+  1646301444U,  // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+  1646375181U,  // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+  2586411110U,  // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+  2619949158U,  // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2619949220U,  // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+  3785748789U,  // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+  2619949386U,  // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+  2586415202U,  // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+  2586415436U,  // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+  2952793398U,  // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+  2619949725U,  // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562531430U,  // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+  3693691700U,  // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+  2886521338U,  // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+  3693691864U,  // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+  2562534710U,  // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+  2580450932U,  // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+  2886521656U,  // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+  2966736182U,  // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  2966736183U,  // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+  1500741734U,  // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+  2250518817U,  // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+  2574485096U,  // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+  2631894694U,  // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+  1500744604U,  // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+  2574487248U,  // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+  3020739384U,  // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+  2954136886U,  // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+  1500747566U,  // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+  3693693078U,  // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+  3705637136U,  // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+  3705637192U,  // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+  3693693340U,  // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+  2637867477U,  // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+  3705637424U,  // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+  3666154056U,  // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+  2722697800U,  // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+  2722771537U,  // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+  2562556006U,  // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+  4095316257U,  // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+  2562557420U,  // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+  3636299926U,  // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+  2562559286U,  // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+  2619952438U,  // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2723287696U,  // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+  4027895094U,  // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+  2619952681U,  // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+  2718716594U,  // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3648250774U,  // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+  3792458436U,  // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+  3705638767U,  // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+  3648252831U,  // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+  3797619416U,  // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+  3792458472U,  // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+  4035202358U,  // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+  2718716594U,  // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3786412796U,  // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+  3792458504U,  // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+  3728200126U,  // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+  3798135575U,  // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+  3786412836U,  // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+  3792458543U,  // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+  2718716728U,  // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+  2718716738U,  // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+  2718716747U,  // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+  2718716750U,  // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+  2724909910U,  // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+  3636323823U,  // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+  2725057384U,  // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+  2718716790U,  // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+  2718716800U,  // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+  3792458629U,  // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+  2725352332U,  // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+  2718716822U,  // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+  1500790886U,  // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+  2619954990U,  // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562590192U,  // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+  2725721017U,  // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+  1500793762U,  // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+  2619955354U,  // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2725942228U,  // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+  2954186038U,  // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+  1500796718U,  // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+  2256401391U,  // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+  2632564838U,  // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256548865U,  // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+  3700998396U,  // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+  2718716952U,  // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+  2718716962U,  // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+  2621284845U,  // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+  3904685542U,  // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+  2632565405U,  // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256409584U,  // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+  3706307380U,  // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+  2632565654U,  // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+  3769603168U,  // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+  2256704532U,  // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+  3769603184U,  // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+  3700999366U,  // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+  2886522476U,  // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+  2256999480U,  // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+  2586501222U,  // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+  1182749690U,  // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+  3636356595U,  // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+  2727711916U,  // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+  2586504502U,  // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+  2632566606U,  // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+  2586505559U,  // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+  3020740204U,  // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+  1183265849U,  // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+  3701000342U,  // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+  3706308849U,  // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+  3330315268U,  // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+  3706309020U,  // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+  3706309122U,  // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+  3712281127U,  // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+  2639202936U,  // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  3802412321U,  // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+  2640530202U,  // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+  3654287462U,  // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+  2256507900U,  // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+  2256581637U,  // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+  3660262008U,  // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+  3786413405U,  // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+  2632568118U,  // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  3718917457U,  // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+  3787003255U,  // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+  2632568361U,  // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+  3706310268U,  // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+  3792459156U,  // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+  3330331654U,  // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+  3722899255U,  // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+  2256737304U,  // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+  3724226521U,  // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+  2718717377U,  // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+  2729997763U,  // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+  2720044499U,  // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+  3712946517U,  // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+  2256524286U,  // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+  3792459246U,  // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+  3796440567U,  // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+  3654307126U,  // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+  2656457394U,  // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+  3792459281U,  // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+  2730661396U,  // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+  2658448293U,  // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+  3787003431U,  // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+  3654312854U,  // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+  3654313446U,  // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+  3804771905U,  // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+  3654315318U,  // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+  3654315651U,  // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+  3660288348U,  // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+  2718717548U,  // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+  2664420990U,  // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+  2256466935U,  // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+  1182798848U,  // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+  2256614409U,  // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+  2731693714U,  // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+  2256761883U,  // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+  2632571034U,  // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  2669066421U,  // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+  2731988662U,  // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+  1183315007U,  // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+   135053414U,  // <0,u,0,0>: Cost 1 vdup0 LHS
+  1544896614U,  // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1678999654U,  // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+  2691880677U,  // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+  1476988214U,  // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+  2718791419U,  // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+  3021248666U,  // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2592535607U,  // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+   135053414U,  // <0,u,0,u>: Cost 1 vdup0 LHS
+  1476993097U,  // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+  1812780846U,  // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+  1618138926U,  // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2752742134U,  // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  1476996406U,  // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+  1812781210U,  // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+  2887006416U,  // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+  2966736200U,  // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  1812781413U,  // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+  1482973286U,  // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+  1482973987U,  // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+  1946998574U,  // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+      835584U,  // <0,u,2,3>: Cost 0 copy LHS
+  1482976566U,  // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+  3020781631U,  // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+  1946998938U,  // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+  1518810169U,  // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+      835584U,  // <0,u,2,u>: Cost 0 copy LHS
+  2618640534U,  // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+  2752743574U,  // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  2636556597U,  // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+  2752743836U,  // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618640898U,  // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+  2752743938U,  // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  2639202936U,  // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  2639874762U,  // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+  2752743637U,  // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562703462U,  // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+  2888455982U,  // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021575982U,  // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  2568677591U,  // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+  2562706742U,  // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+  1544899894U,  // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679002934U,  // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+  2718718033U,  // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+  1679002952U,  // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+  2568683622U,  // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+  2568684438U,  // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+  3765622902U,  // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+  2691881087U,  // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+  2568686902U,  // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+  2650492890U,  // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+  1618139290U,  // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2824834358U,  // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+  1618139308U,  // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592579686U,  // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+  2262496983U,  // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+  2654474688U,  // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+  2691881168U,  // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+  2592582966U,  // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+  2656465587U,  // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+  2657129220U,  // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+  1584051029U,  // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+  1584714662U,  // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+  2562728038U,  // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+  2562728854U,  // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+  2562729473U,  // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+  2661111018U,  // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+  2562731318U,  // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+  2718718258U,  // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+  2586620261U,  // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+  2657793644U,  // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+  2562733870U,  // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+   135053414U,  // <0,u,u,0>: Cost 1 vdup0 LHS
+  1544902446U,  // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1679005486U,  // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+      835584U,  // <0,u,u,3>: Cost 0 copy LHS
+  1483025718U,  // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+  1544902810U,  // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679005850U,  // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+  1518859327U,  // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+      835584U,  // <0,u,u,u>: Cost 0 copy LHS
+  2689744896U,  // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+  1610694666U,  // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+  2689744916U,  // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+  2619310332U,  // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+  2684657701U,  // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+  2620637598U,  // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+  3708977654U,  // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+  3666351168U,  // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+  1611210825U,  // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+  2556780646U,  // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+  2556781355U,  // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+  1616003174U,  // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  3693052888U,  // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+  2556783926U,  // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+  2580672143U,  // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+  2724839566U,  // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+  3654415354U,  // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+  1616003228U,  // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+  2685690019U,  // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+  2685763756U,  // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+  2698297524U,  // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+  2685911230U,  // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+  2689745100U,  // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+  3764814038U,  // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+  2724839640U,  // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+  2592625658U,  // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+  2686279915U,  // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+  3087843328U,  // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  3087843338U,  // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+    67944550U,  // <1,0,3,2>: Cost 1 vrev LHS
+  2568743135U,  // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+  2562772278U,  // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+  4099850454U,  // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+  3704998538U,  // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+  2592633923U,  // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+    68386972U,  // <1,0,3,u>: Cost 1 vrev LHS
+  2620640146U,  // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+  2689745234U,  // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+  2689745244U,  // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+  3760980320U,  // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+  3761054057U,  // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+  2619313462U,  // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  3761201531U,  // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+  3666383940U,  // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+  2619313705U,  // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+  4029300736U,  // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+  2895249510U,  // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+  3028287590U,  // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3642501345U,  // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+  2215592058U,  // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+  3724242907U,  // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+  3724906540U,  // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+  3911118134U,  // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+  3028287644U,  // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3762086375U,  // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+  2698297846U,  // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+  3760022015U,  // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+  3642509538U,  // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+  3762381323U,  // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+  3730215604U,  // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+  3730879237U,  // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+  2657801046U,  // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+  2658464679U,  // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+  2659128312U,  // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+  4047898278U,  // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+  2215460970U,  // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+  3734861035U,  // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+  3731543398U,  // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+  3736188301U,  // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+  2663110110U,  // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+  3731543660U,  // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+  2664437376U,  // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+  3087884288U,  // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  1616003730U,  // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+    67985515U,  // <1,0,u,2>: Cost 1 vrev LHS
+  2689893028U,  // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+  2689745586U,  // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+  2619316378U,  // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  2669082807U,  // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+  2592674888U,  // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+    68427937U,  // <1,0,u,u>: Cost 1 vrev LHS
+  1543585802U,  // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+  1548894310U,  // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+  2618654892U,  // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+  2689745654U,  // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+  2622636370U,  // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+  2620645791U,  // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+  3696378367U,  // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+  3666424905U,  // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+  1548894866U,  // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+  1483112550U,  // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+   202162278U,  // <1,1,1,1>: Cost 1 vdup1 LHS
+  2622636950U,  // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+  2622637016U,  // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+  1483115830U,  // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2622637200U,  // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+  2622637263U,  // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+  2592691274U,  // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+   202162278U,  // <1,1,1,u>: Cost 1 vdup1 LHS
+  2550890588U,  // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+  2617329183U,  // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+  2622637672U,  // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+  2622637734U,  // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+  2550893878U,  // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+  3696379744U,  // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+  2622638010U,  // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+  3804554170U,  // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+  2622638139U,  // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+  2622638230U,  // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+  3087844148U,  // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+  4161585244U,  // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+  2014101606U,  // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+  2622638594U,  // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+  2689745920U,  // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+  3763487753U,  // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+  2592707660U,  // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+  2014101611U,  // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+  2556878950U,  // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+  2221335351U,  // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+  3696380988U,  // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+  3763487805U,  // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+  2556882230U,  // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+  1548897590U,  // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2758184246U,  // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+  3666457677U,  // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+  1548897833U,  // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+  2693653615U,  // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+  2617331408U,  // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+  4029302934U,  // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+  2689746064U,  // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+  2221564755U,  // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+  2955559250U,  // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+  2617331810U,  // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+  2825293110U,  // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  2689746109U,  // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+  3696382241U,  // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+  2689746127U,  // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+  2617332218U,  // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+  3763487969U,  // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+  3696382605U,  // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+  4029309266U,  // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+  2617332536U,  // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+  2724840702U,  // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+  2725504263U,  // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+  2617332720U,  // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+  2659800138U,  // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  3691074717U,  // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+  4167811174U,  // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+  2617333094U,  // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+  3295396702U,  // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+  3803891014U,  // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+  2617333356U,  // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+  2659800138U,  // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  1483112550U,  // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+   202162278U,  // <1,1,u,1>: Cost 1 vdup1 LHS
+  2622642056U,  // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+  2014142566U,  // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+  1483115830U,  // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  1548900506U,  // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2622642384U,  // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+  2825293353U,  // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+   202162278U,  // <1,1,u,u>: Cost 1 vdup1 LHS
+  2635251712U,  // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+  1561509990U,  // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+  2618663085U,  // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+  2696529358U,  // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+  2635252050U,  // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+  3769533926U,  // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+  2621317617U,  // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+  2659140170U,  // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+  1561510557U,  // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+  2623308516U,  // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+  2635252532U,  // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+  2631271318U,  // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+  2958180454U,  // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+  2550959414U,  // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+  2635252880U,  // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+  2635252952U,  // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+  3732882731U,  // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+  2958180459U,  // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+  2629281213U,  // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+  2635253280U,  // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+  2618664552U,  // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+  2689746546U,  // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+  3764815485U,  // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+  3760023176U,  // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+  2635253690U,  // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+  2659141610U,  // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+  2689746591U,  // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+   403488870U,  // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+  1477231350U,  // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477232232U,  // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477233052U,  // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+   403492150U,  // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+  1525010128U,  // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1525010938U,  // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525011450U,  // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   403494702U,  // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+  2641226607U,  // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+  3624723446U,  // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+  3301123609U,  // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+  2598759198U,  // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+  2659142864U,  // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+  1561513270U,  // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  2659143028U,  // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+  2659143112U,  // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+  1561513513U,  // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+  2550988902U,  // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+  2550989824U,  // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+  3624732264U,  // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+  2955559014U,  // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2550992182U,  // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+  2659143684U,  // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+  2659143778U,  // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+  2659143848U,  // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+  2550994734U,  // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+  2700289945U,  // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+  2635256232U,  // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+  2659144186U,  // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+  2689746874U,  // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+  3763488705U,  // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+  3763488716U,  // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+  2659144504U,  // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+  2657817432U,  // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+  2689746919U,  // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+  1585402874U,  // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+  2659144770U,  // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+  3708998858U,  // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+  2635257059U,  // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+  2659145062U,  // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+  3732886916U,  // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+  3732886998U,  // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+  2659145255U,  // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+  1590711938U,  // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+   403529835U,  // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+  1477272310U,  // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477273192U,  // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477273750U,  // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+   403533110U,  // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+  1561516186U,  // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  1525051898U,  // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525052410U,  // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   403535662U,  // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+  2819407872U,  // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+  1551564902U,  // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+  2819408630U,  // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+  2619334911U,  // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+  2625306962U,  // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+  3832725879U,  // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+  3699048959U,  // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+  3776538827U,  // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+  1551565469U,  // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+  2618671862U,  // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+  2819408692U,  // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+  2624643975U,  // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+  1745666150U,  // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+  2557005110U,  // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+  2625307792U,  // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+  3698386127U,  // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+  2592838748U,  // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+  1745666155U,  // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+  2819408790U,  // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2625308193U,  // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+  2819408036U,  // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819851890U,  // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819408794U,  // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  3893149890U,  // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+  2819408076U,  // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  3772041583U,  // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+  2819408042U,  // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  1483276390U,  // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+  1483277128U,  // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+  2557019752U,  // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+  2819408856U,  // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+  1483279670U,  // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+  2819409614U,  // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2598826490U,  // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+  3087844352U,  // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+  1483282222U,  // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+  2568970342U,  // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+  2568971224U,  // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+  3832761290U,  // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+  2233428219U,  // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+  2568973622U,  // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+  1551568182U,  // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410434U,  // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+  3666605151U,  // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+  1551568425U,  // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+  2563006566U,  // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+  2568979456U,  // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+  2563008035U,  // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+  2233436412U,  // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+  2563009846U,  // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+  2867187716U,  // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+  2655834214U,  // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+  1745669430U,  // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1745669431U,  // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2867187810U,  // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+  3699052931U,  // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+  2654507460U,  // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+  3766291091U,  // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+  2655834726U,  // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+  3923384562U,  // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+  2657161992U,  // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+  2819852218U,  // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819852219U,  // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  2706926275U,  // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+  2659816524U,  // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+  3636766245U,  // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+  2867187903U,  // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+  2625312102U,  // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+  2867188598U,  // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+  3728250344U,  // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+  2867187880U,  // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+  2707516171U,  // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+  1483317350U,  // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+  1483318093U,  // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+  2819410718U,  // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+  1745666717U,  // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+  1483320630U,  // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+  1551571098U,  // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410758U,  // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+  1745669673U,  // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+  1745666722U,  // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+  2617352205U,  // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+  2619342950U,  // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  3692421295U,  // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+  2619343104U,  // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2617352530U,  // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+  1634880402U,  // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+  2713930652U,  // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+  3732898396U,  // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+  1635101613U,  // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+  3693085430U,  // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+  2623988535U,  // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+  3693085590U,  // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+  3692422134U,  // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+  3693085726U,  // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+  2892401974U,  // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+  3026619702U,  // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  3800206324U,  // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+  2892402217U,  // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+  3966978927U,  // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+  3966979018U,  // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+  3693086312U,  // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+  2635269798U,  // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+  3966979280U,  // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+  2893204790U,  // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  3693086650U,  // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+  3666662502U,  // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+  2893205033U,  // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+  2563063910U,  // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+  2563064730U,  // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+  2563065386U,  // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+  3693087132U,  // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+  2619345410U,  // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+  3087843666U,  // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+  3087843676U,  // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+  3666670695U,  // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+  3087843669U,  // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+  2620672914U,  // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+  3630842706U,  // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+  3313069003U,  // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+  3642788100U,  // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+  2713930960U,  // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+  2619346230U,  // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+  2713930980U,  // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+  3736882642U,  // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+  2619346473U,  // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+  2557108326U,  // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+  2557109075U,  // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+  2598913774U,  // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+  3630852246U,  // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+  2557111606U,  // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+  2895252790U,  // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616006454U,  // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  3899059510U,  // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+  1616006472U,  // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2557116518U,  // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+  2557117236U,  // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+  3630859880U,  // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+  2569062550U,  // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+  2557119798U,  // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+  3763490174U,  // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+  3763490183U,  // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+  2712751498U,  // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  2557122350U,  // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+  2659161084U,  // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+  3732903040U,  // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+  3734230174U,  // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+  3734893807U,  // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+  3660729654U,  // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+  3786493384U,  // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+  2713341394U,  // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+  3660731386U,  // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+  2664470148U,  // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+  2557132902U,  // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+  2619348782U,  // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  2563106351U,  // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+  2713783816U,  // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+  2622666815U,  // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+  1640189466U,  // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+  1616006697U,  // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  2712751498U,  // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  1616006715U,  // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2620014592U,  // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+  1546272870U,  // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2618687664U,  // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+  3693093120U,  // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+  1546273106U,  // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2620678563U,  // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+  2714668660U,  // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+  3772042877U,  // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+  1546273437U,  // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620015350U,  // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+  2620015412U,  // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+  2620015510U,  // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+  2618688512U,  // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+  2620015677U,  // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+  2620015727U,  // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+  2620015859U,  // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+  3093728566U,  // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+  2620015981U,  // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+  3692430816U,  // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+  2620016163U,  // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+  2620016232U,  // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+  2620016294U,  // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+  3693758221U,  // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+  3692431209U,  // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+  2620016570U,  // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+  4173598006U,  // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+  2620016699U,  // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+  2620016790U,  // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+  2569110672U,  // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+  3693758785U,  // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+  2620017052U,  // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+  2620017154U,  // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+  3135623172U,  // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+  4161587048U,  // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+  2014104886U,  // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2014104887U,  // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2620017554U,  // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+  2620017634U,  // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  3693759551U,  // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+  3642861837U,  // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+  2575092710U,  // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+  1546276150U,  // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2759855414U,  // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+  2713931718U,  // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+  1546276393U,  // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+  2557182054U,  // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+  2557182812U,  // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+  3630925347U,  // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+  4029301675U,  // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+  2557185334U,  // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+  2713931780U,  // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+  2667794530U,  // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+  2713931800U,  // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+  2557187886U,  // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+  2718208036U,  // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+  2620019115U,  // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+  2667794938U,  // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+  3787673666U,  // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+  3693761165U,  // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+  3319279297U,  // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+  2667795256U,  // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+  2713931874U,  // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+  2713931883U,  // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+  2557198438U,  // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+  2557199156U,  // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+  2569143974U,  // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+  2569144592U,  // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+  2557201718U,  // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+  2713931944U,  // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+  3787673770U,  // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+  2719387828U,  // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+  2557204270U,  // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+  2620020435U,  // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+  1546278702U,  // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620020616U,  // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+  2620020668U,  // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+  1594054682U,  // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+  1546279066U,  // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2620020944U,  // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+  2014145846U,  // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+  2014145847U,  // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+  3692437504U,  // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+  2618695782U,  // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  2618695857U,  // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+  3794161970U,  // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+  2620023122U,  // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+  2620686756U,  // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+  2621350389U,  // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+  4028599606U,  // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+  2618696349U,  // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+  3692438262U,  // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+  2625995572U,  // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+  3692438422U,  // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+  3692438488U,  // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+  2625995820U,  // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+  3692438672U,  // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+  3692438720U,  // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+  2958183734U,  // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  2958183735U,  // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+  2721526201U,  // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+  3692439097U,  // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+  3692439144U,  // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+  3692439206U,  // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+  3636948278U,  // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+  3787674092U,  // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+  2618697658U,  // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+  2970799414U,  // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2970799415U,  // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+  2563211366U,  // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+  3699738854U,  // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+  2563212860U,  // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+  3692439964U,  // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+  2563214646U,  // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+  4191820018U,  // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+  2587103648U,  // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+  3087845306U,  // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  3087845307U,  // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+  3693767570U,  // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+  3693767650U,  // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+  3636962877U,  // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+  3325088134U,  // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+  3693767898U,  // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+  2618699062U,  // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  3833670966U,  // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+  4028632374U,  // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+  2618699305U,  // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+  3693768264U,  // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+  3630998373U,  // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+  3636971070U,  // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+  3642943767U,  // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+  3693768628U,  // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+  3732918276U,  // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+  2620690530U,  // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+  2955562294U,  // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+  2955562295U,  // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+  2724180733U,  // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+  3631006566U,  // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+  3631007674U,  // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+  3692442184U,  // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+  3631009078U,  // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+  3787674416U,  // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+  2713932600U,  // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+  2713932610U,  // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+  2713932619U,  // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+  1651102542U,  // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+  2724918103U,  // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+  2698302306U,  // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+  3642960153U,  // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+  2713932662U,  // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+  2725213051U,  // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+  2724844426U,  // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+  4035956022U,  // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+  1651692438U,  // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+  1651766175U,  // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+  2618701614U,  // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  3135663508U,  // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+  3692443580U,  // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+  2713932743U,  // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+  2618701978U,  // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  2622683344U,  // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+  3087886266U,  // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  1652356071U,  // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+  2726171632U,  // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+  2626666598U,  // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  3695100067U,  // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+  3707044102U,  // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+  2726466580U,  // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+  3654921933U,  // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+  2621358582U,  // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+  2622022215U,  // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+  2626667165U,  // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+  2593128550U,  // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+  2626667316U,  // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+  3700409238U,  // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+  2257294428U,  // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+  2593131830U,  // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+  2626667646U,  // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+  2627331279U,  // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+  2593133696U,  // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+  2628658545U,  // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+  2587164774U,  // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+  3701073445U,  // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+  3700409960U,  // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+  2638612134U,  // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+  2587168054U,  // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+  3706382167U,  // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+  2587169192U,  // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+  3660911610U,  // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+  2587170606U,  // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+  1507459174U,  // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+  2569257984U,  // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+  2581202536U,  // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+  2569259294U,  // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+  1507462454U,  // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+  1507462864U,  // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+  2581205498U,  // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+  2581206010U,  // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+  1507465006U,  // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+  2728826164U,  // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+  3654951732U,  // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+  3330987094U,  // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+  3331060831U,  // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+  3787674971U,  // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+  2626669878U,  // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+  3785979241U,  // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+  3787085176U,  // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+  2626670121U,  // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+  2569273446U,  // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+  2569274368U,  // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+  3643016808U,  // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+  2569275680U,  // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+  2569276726U,  // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+  4102034790U,  // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+  2651222067U,  // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+  3899378998U,  // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+  2569279278U,  // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+  2730153430U,  // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+  2724845022U,  // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+  3643025338U,  // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+  3643025697U,  // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+  3643026742U,  // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+  3654971091U,  // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+  3787675153U,  // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+  2724845076U,  // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+  2725508637U,  // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+  2730817063U,  // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+  3631088436U,  // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+  3660949158U,  // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+  3801904705U,  // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+  3631090998U,  // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+  2662503828U,  // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+  3660951981U,  // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+  2713933420U,  // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+  2731406959U,  // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+  1507500134U,  // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+  2626672430U,  // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  2581243496U,  // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+  2569300259U,  // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+  1507503414U,  // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+  1507503829U,  // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+  2581246458U,  // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+  2581246970U,  // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+  1507505966U,  // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+  1543643153U,  // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+  1546297446U,  // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+  2819448852U,  // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+  2619375876U,  // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+  1546297685U,  // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+  1658771190U,  // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+  2736789248U,  // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+  2659189376U,  // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+  1546298013U,  // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+  1483112550U,  // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+   202162278U,  // <1,u,1,1>: Cost 1 vdup1 LHS
+  1616009006U,  // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  1745707110U,  // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+  1483115830U,  // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2620040336U,  // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+  3026622618U,  // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  2958183752U,  // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+   202162278U,  // <1,u,1,u>: Cost 1 vdup1 LHS
+  2819449750U,  // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2893207342U,  // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+  2819448996U,  // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819450482U,  // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819449754U,  // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  2893207706U,  // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  2819449036U,  // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  2970799432U,  // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2819449002U,  // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+   403931292U,  // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+  1477673718U,  // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+   115726126U,  // <1,u,3,2>: Cost 1 vrev LHS
+  2014102173U,  // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+   403934518U,  // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+  1507536601U,  // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+  1525453306U,  // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  2014105129U,  // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+   403937070U,  // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+  2620042157U,  // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+  2620042237U,  // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+  2263217967U,  // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+  2569341224U,  // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+  2569342262U,  // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+  1546300726U,  // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  2819449180U,  // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+  2724845649U,  // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+  1546300969U,  // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+  2551431270U,  // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+  2551432192U,  // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+  3028293422U,  // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  2955559068U,  // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2551434550U,  // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+  2895255706U,  // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616009370U,  // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710390U,  // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+  1745710391U,  // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+  2653221159U,  // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+  2725509303U,  // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+  2659193338U,  // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+  2689751248U,  // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+  2867228774U,  // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+  3764820194U,  // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+  2657202957U,  // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+  2819450810U,  // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819450811U,  // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  1585452032U,  // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+  2557420340U,  // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+  2569365158U,  // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+  2569365803U,  // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+  2557422902U,  // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+  2662512021U,  // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+  2724845884U,  // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+  2659194476U,  // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+  1590761096U,  // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+   403972257U,  // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+   202162278U,  // <1,u,u,1>: Cost 1 vdup1 LHS
+   115767091U,  // <1,u,u,2>: Cost 1 vrev LHS
+  1745707677U,  // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+   403975478U,  // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+  1546303642U,  // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  1616009613U,  // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710633U,  // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+   403978030U,  // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+  2551463936U,  // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+  2685698058U,  // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+  1610776596U,  // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+  2619384069U,  // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+  2551467318U,  // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+  3899836596U,  // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+  2621374968U,  // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+  4168271334U,  // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+  1611219018U,  // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+  2551472138U,  // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+  2690564186U,  // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+  1611956326U,  // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2826092646U,  // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+  2551475510U,  // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+  3692463248U,  // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+  2587308473U,  // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+  3661050874U,  // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+  1611956380U,  // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  1477738598U,  // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+  2551481078U,  // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+  2551481796U,  // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+  2551482518U,  // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+  1477741878U,  // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+  2551484112U,  // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+  2551484759U,  // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+  2551485434U,  // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+  1477744430U,  // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+  2953625600U,  // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2953627302U,  // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  2953625764U,  // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+  4027369695U,  // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+  3625233718U,  // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+  3899836110U,  // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+  4032012618U,  // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+  3899835392U,  // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+  2953625770U,  // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+  2551496806U,  // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+  2685698386U,  // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+  2685698396U,  // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+  3625240726U,  // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+  2551500086U,  // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+  2618723638U,  // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765409590U,  // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  3799990664U,  // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+  2685698450U,  // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+  3625246822U,  // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+  3289776304U,  // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+  2690564526U,  // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+  3289923778U,  // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+  2216255691U,  // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+  3726307332U,  // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+  3726307426U,  // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+  2826095926U,  // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  2216550639U,  // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+  4162420736U,  // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+  2901885030U,  // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+  2685698559U,  // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+  3643173171U,  // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+  2216263884U,  // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+  3730289341U,  // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+  3726308152U,  // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+  3899836346U,  // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+  2216558832U,  // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+  2659202049U,  // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  3726308437U,  // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+  2726249034U,  // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+  3734934772U,  // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+  3726308710U,  // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+  3726308814U,  // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+  3736925671U,  // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+  3726308972U,  // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+  2659202049U,  // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  1477787750U,  // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+  2953668262U,  // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  1611956893U,  // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2551531670U,  // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+  1477791030U,  // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+  2618726554U,  // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765412506U,  // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  2826096169U,  // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  1611956947U,  // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  2569453670U,  // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+  2619392102U,  // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+  3759440619U,  // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+  1616823030U,  // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+  2569456950U,  // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+  2690712328U,  // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+  3661115841U,  // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+  2622046794U,  // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+  1617191715U,  // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+  2551545958U,  // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+  2685698868U,  // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+  2628682646U,  // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+  2685698888U,  // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+  2551549238U,  // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+  3693134992U,  // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+  3661124034U,  // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+  3625292794U,  // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+  2685698933U,  // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+  2551554150U,  // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+  3893649571U,  // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+  2551555688U,  // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+  2685698966U,  // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+  2551557430U,  // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+  3763422123U,  // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+  3693135802U,  // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+  2726249402U,  // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+  2685699011U,  // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+  2551562342U,  // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+  2953625610U,  // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953627798U,  // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  2953626584U,  // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+  2551565622U,  // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+  2953625938U,  // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U,  // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  4032013519U,  // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+  2953625617U,  // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+  2690565154U,  // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+  3625313270U,  // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+  3771532340U,  // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+  1148404634U,  // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+  3625315638U,  // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+  2619395382U,  // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+  3837242678U,  // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+  3799991394U,  // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+  1148773319U,  // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+  2551578726U,  // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+  2551579648U,  // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+  3625321952U,  // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+  2685699216U,  // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+  2551582006U,  // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+  3740913668U,  // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+  3661156806U,  // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+  3893652790U,  // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+  2685699261U,  // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+  2551586918U,  // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+  3625329398U,  // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+  2551588794U,  // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+  3088679014U,  // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+  2551590198U,  // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+  4029382994U,  // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+  3625333560U,  // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+  3731624800U,  // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+  2551592750U,  // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+  2622051322U,  // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+  3733615699U,  // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+  3795125538U,  // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+  2222171037U,  // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+  3740915046U,  // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+  3296060335U,  // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+  3736933864U,  // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+  3805300055U,  // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+  2669827714U,  // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+  2551603302U,  // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+  2953666570U,  // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953668758U,  // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  1148437406U,  // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+  2551606582U,  // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+  2953666898U,  // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U,  // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  2669828370U,  // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+  1148806091U,  // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+  1543667732U,  // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+  1548976230U,  // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  2685699524U,  // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+  2685699535U,  // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+  2551614774U,  // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+  3704422830U,  // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+  3893657642U,  // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+  3770574323U,  // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+  1548976796U,  // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+  2622718710U,  // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+  2622718772U,  // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+  2622718870U,  // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+  2819915878U,  // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+  3625364790U,  // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+  2622719120U,  // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+  3760031292U,  // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+  3667170468U,  // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+  2819915883U,  // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+  1489829990U,  // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  2563572470U,  // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+   269271142U,  // <2,2,2,2>: Cost 1 vdup2 LHS
+  2685699698U,  // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+  1489833270U,  // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  2685699720U,  // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+  2622719930U,  // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+  2593436837U,  // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+   269271142U,  // <2,2,2,u>: Cost 1 vdup2 LHS
+  2685699750U,  // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+  2690565806U,  // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+  2953627240U,  // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+  1879883878U,  // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+  2685699790U,  // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+  3893659342U,  // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+  2958270812U,  // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2593445030U,  // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+  1879883883U,  // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+  2551644262U,  // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+  3625386742U,  // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+  2551645902U,  // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+  3759441686U,  // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+  2551647542U,  // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+  1548979510U,  // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2764901686U,  // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+  3667195047U,  // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+  1548979753U,  // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+  3696463432U,  // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+  2617413328U,  // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+  2685699936U,  // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+  4027383910U,  // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+  2228201085U,  // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+  2617413636U,  // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+  2617413730U,  // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+  2819919158U,  // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  2819919159U,  // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+  3625402554U,  // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+  3760031652U,  // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+  2617414138U,  // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+  2685700026U,  // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+  3625405750U,  // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+  3760031692U,  // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+  3088679116U,  // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+  2657891169U,  // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+  2685700071U,  // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+  2726250474U,  // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+  3704427616U,  // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+  2660545701U,  // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+  4030718054U,  // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+  2617415014U,  // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+  3302033032U,  // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+  3661246929U,  // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+  2617415276U,  // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+  2731558962U,  // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+  1489829990U,  // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  1548982062U,  // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+   269271142U,  // <2,2,u,2>: Cost 1 vdup2 LHS
+  1879924838U,  // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+  1489833270U,  // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  1548982426U,  // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2953666908U,  // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2819919401U,  // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+   269271142U,  // <2,2,u,u>: Cost 1 vdup2 LHS
+  1544339456U,  // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+   470597734U,  // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+  1548984484U,  // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2619408648U,  // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+  1548984658U,  // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665857454U,  // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  2622726655U,  // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2593494188U,  // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+   470598301U,  // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544340214U,  // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544340276U,  // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544340374U,  // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1548985304U,  // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2551696694U,  // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+  1548985488U,  // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2622727375U,  // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+  2665858347U,  // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+  1548985709U,  // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  2622727613U,  // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+  2622727711U,  // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  1544341096U,  // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544341158U,  // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  2622727958U,  // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+  2622728032U,  // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+  1548986298U,  // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2665859050U,  // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+  1548986427U,  // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1548986518U,  // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2622728415U,  // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+  1489913458U,  // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+  1544341916U,  // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+  1548986882U,  // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2665859632U,  // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+  2234304870U,  // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+  2958271632U,  // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  1548987166U,  // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+  1483948134U,  // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+  1483948954U,  // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+  2622729276U,  // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2557692054U,  // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+  1483951414U,  // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+   470601014U,  // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592118644U,  // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2593526960U,  // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+   470601257U,  // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+  2551726182U,  // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+  1592118992U,  // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2665860862U,  // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+  2551728642U,  // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+  1592119238U,  // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592119300U,  // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592119394U,  // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1592119464U,  // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1592119545U,  // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+  2622730529U,  // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2557707164U,  // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+  1592119802U,  // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2665861682U,  // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+  2622730893U,  // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  2665861810U,  // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+  1592120120U,  // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592120142U,  // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1592120223U,  // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+  1592120314U,  // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659890261U,  // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+  2660553894U,  // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+  2665862371U,  // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592120678U,  // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665862534U,  // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2665862614U,  // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+  1592120940U,  // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592120962U,  // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1548990163U,  // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+   470603566U,  // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+  1548990341U,  // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  1548990396U,  // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+  1548990527U,  // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+   470603930U,  // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+  1548990672U,  // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1592121600U,  // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+   470604133U,  // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+  2617425942U,  // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+  2618753126U,  // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2618753208U,  // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+  2619416841U,  // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+  2587593628U,  // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+  2712832914U,  // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+  1634962332U,  // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  3799993252U,  // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+  1634962332U,  // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  2619417334U,  // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+  3692495668U,  // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+  2625389466U,  // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+  2826125414U,  // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+  3699794995U,  // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+  3692496016U,  // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+  3763424238U,  // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+  3667317942U,  // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+  2826125419U,  // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+  2629371336U,  // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+  3699131946U,  // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+  2630698602U,  // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+  2618754766U,  // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+  2826126234U,  // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+  2899119414U,  // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+  3033337142U,  // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+  3800214597U,  // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+  2899119657U,  // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+  2635344033U,  // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+  4032012325U,  // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+  3692497228U,  // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+  3692497308U,  // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+  3001404624U,  // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+  2953627342U,  // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2953625804U,  // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3899868160U,  // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+  2953625806U,  // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  2710916266U,  // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+  3899869648U,  // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+  3899869658U,  // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+  3899868930U,  // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+  2712833232U,  // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+  2618756406U,  // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+  2765737270U,  // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+  4168304426U,  // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+  2618756649U,  // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+  2551800011U,  // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+  2569716470U,  // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+  2563745405U,  // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+  2569718102U,  // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+  2551803190U,  // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+  3625545732U,  // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+  1611959606U,  // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128694U,  // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959624U,  // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478066278U,  // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+  2551808758U,  // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+  2551809516U,  // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+  2551810198U,  // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+  1478069558U,  // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+  2901888310U,  // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  2551812920U,  // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+  2726251914U,  // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+  1478072110U,  // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+  2659234821U,  // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  3786722726U,  // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+  3734303911U,  // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+  3734967544U,  // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+  3727005030U,  // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+  2726251976U,  // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+  2726251986U,  // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+  3727005292U,  // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+  2659234821U,  // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  1478082662U,  // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+  2618758958U,  // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2551826024U,  // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+  2551826582U,  // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+  1478085942U,  // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+  2953668302U,  // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  1611959849U,  // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128937U,  // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959867U,  // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  3691839488U,  // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+  2618097766U,  // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620088484U,  // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+  2619425034U,  // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+  2620088667U,  // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+  2620752300U,  // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+  3693830655U,  // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+  3094531382U,  // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  2618098333U,  // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  3691840246U,  // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+  3691840308U,  // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+  2626061206U,  // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+  2618098688U,  // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+  2626061364U,  // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+  3691840656U,  // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+  3789082310U,  // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+  2712833744U,  // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+  2628715896U,  // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+  3693831613U,  // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+  4026698642U,  // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+  2632033896U,  // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+  3691841190U,  // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+  2632034061U,  // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+  3691841352U,  // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+  3691841466U,  // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+  3088354614U,  // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  3088354615U,  // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+  2557829222U,  // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+  2557830059U,  // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+  2575746766U,  // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+  3691841948U,  // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+  2619427330U,  // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+  2581720847U,  // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+  2953628162U,  // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953626624U,  // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2953626625U,  // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+  2569781350U,  // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+  3631580076U,  // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+  2569782990U,  // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+  2569783646U,  // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+  2569784630U,  // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+  2618101046U,  // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  3893905922U,  // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+  3094564150U,  // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  2618101289U,  // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+  2551873638U,  // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+  3637560320U,  // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+  3637560966U,  // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+  3723030343U,  // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+  2551876918U,  // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+  2712834052U,  // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+  4028713474U,  // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+  2712834072U,  // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+  2712834081U,  // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+  2575769702U,  // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+  3631596462U,  // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+  2655924730U,  // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+  3643541856U,  // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+  2655924849U,  // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+  3787755607U,  // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+  4029385218U,  // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+  3088682294U,  // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+  3088682295U,  // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+  2563833958U,  // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+  2551890678U,  // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+  2563835528U,  // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+  3637577878U,  // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+  2563837238U,  // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+  2712834216U,  // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+  2712834220U,  // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+  4174449974U,  // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+  2563839790U,  // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+  2563842150U,  // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+  2618103598U,  // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2563843721U,  // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+  2569816418U,  // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+  2622748735U,  // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+  2618103962U,  // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  2953669122U,  // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953667584U,  // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2618104165U,  // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620096512U,  // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+  1546354790U,  // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620096676U,  // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+  3693838588U,  // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+  1546355036U,  // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+  3694502317U,  // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+  2551911246U,  // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+  2720723287U,  // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+  1546355357U,  // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620097270U,  // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+  2620097332U,  // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+  2620097430U,  // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+  2820243558U,  // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2620097598U,  // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+  2620097680U,  // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+  3693839585U,  // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+  2721386920U,  // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+  2820243563U,  // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2714014137U,  // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+  2712834500U,  // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+  2620098152U,  // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+  2620098214U,  // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+  2632042254U,  // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+  2712834540U,  // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+  2820243660U,  // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+  2958265654U,  // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+  2620098619U,  // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+  2620098710U,  // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+  3893986982U,  // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+  2569848762U,  // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+  2620098972U,  // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+  2620099074U,  // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+  3893987022U,  // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+  3001404644U,  // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1879887158U,  // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+  1879887159U,  // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+  2620099484U,  // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+  2620099566U,  // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+  2620099644U,  // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+  3643599207U,  // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+  2575830080U,  // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+  1546358070U,  // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2667875700U,  // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+  4028042550U,  // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+  1546358313U,  // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+  3693841992U,  // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+  2667876048U,  // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+  2712834756U,  // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+  3643607400U,  // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+  2252091873U,  // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+  2667876356U,  // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+  2667876450U,  // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+  2820246838U,  // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2820246839U,  // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2563899494U,  // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+  3893988683U,  // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+  2563901072U,  // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+  3893987236U,  // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+  2563902774U,  // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+  3893988723U,  // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+  2712834872U,  // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+  2955644214U,  // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+  2955644215U,  // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+  2712834894U,  // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+  2724926296U,  // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+  2725000033U,  // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+  2702365544U,  // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+  2712834934U,  // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+  3776107393U,  // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+  2725294981U,  // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+  2726253452U,  // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+  2712834966U,  // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+  2620102355U,  // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+  1546360622U,  // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620102536U,  // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+  2820244125U,  // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  1594136612U,  // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+  1546360986U,  // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2620102864U,  // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+  1879928118U,  // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+  1879928119U,  // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+  2726179825U,  // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+  1652511738U,  // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+  2621431972U,  // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+  2257949868U,  // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+  2726474773U,  // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+  2620768686U,  // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+  2621432319U,  // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+  2599760953U,  // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+  1653027897U,  // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+  2639348470U,  // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695174452U,  // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+  3695174550U,  // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+  3694511104U,  // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+  3713090594U,  // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+  3693184144U,  // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+  2627405016U,  // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+  3799995519U,  // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+  2639348470U,  // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695175101U,  // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+  3643655168U,  // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+  2257892517U,  // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+  3695175334U,  // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+  3695175465U,  // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+  2632714080U,  // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+  2633377713U,  // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+  3695175658U,  // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+  2634704979U,  // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+  1514094694U,  // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+  2569921680U,  // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+  2587838056U,  // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+  2569922927U,  // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+  1514097974U,  // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+  2581868321U,  // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+  1514099194U,  // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+  2587841530U,  // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+  1514100526U,  // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+  2708706617U,  // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+  3649643418U,  // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+  3649644330U,  // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+  2257982640U,  // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+  3649645641U,  // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+  2621435190U,  // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  2712835441U,  // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+  3799995762U,  // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+  2621435433U,  // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+  2729497990U,  // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+  3643679744U,  // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+  3637708424U,  // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+  3643681137U,  // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+  2599800118U,  // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+  3786577334U,  // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+  3786577345U,  // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+  2599802214U,  // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+  2599802670U,  // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+  2581889126U,  // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+  3643687936U,  // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+  2663240186U,  // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+  3643689330U,  // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+  2581892406U,  // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+  2581892900U,  // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+  2587865597U,  // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+  3786577428U,  // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+  2581894958U,  // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+  2726254119U,  // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+  3804640817U,  // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+  3637724826U,  // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+  3734992123U,  // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+  2552040758U,  // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+  3799995992U,  // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+  2663241198U,  // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+  2712835692U,  // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+  2731562607U,  // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+  1514135654U,  // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+  1657820802U,  // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+  2587879016U,  // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+  2569963892U,  // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+  1514138934U,  // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+  2621438106U,  // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  1514140159U,  // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+  2587882490U,  // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+  1514141486U,  // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+  1544380416U,  // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+   470638699U,  // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544380580U,  // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1658631909U,  // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+  1544380754U,  // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665898414U,  // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  1658853120U,  // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+  3094531625U,  // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+   470639261U,  // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544381174U,  // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544381236U,  // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544381334U,  // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544381400U,  // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618123325U,  // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544381584U,  // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618123489U,  // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2726254427U,  // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+  1544381823U,  // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+  1478328422U,  // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+  2618123807U,  // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+   269271142U,  // <2,u,2,2>: Cost 1 vdup2 LHS
+  1544382118U,  // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1478331702U,  // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+  2618124136U,  // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544382394U,  // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  3088354857U,  // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+   269271142U,  // <2,u,2,u>: Cost 1 vdup2 LHS
+  1544382614U,  // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2953627374U,  // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+  1490282143U,  // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+  1879883932U,  // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+  1544382978U,  // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2953627378U,  // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+  1514172931U,  // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+  1879887176U,  // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+  1879883937U,  // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+  1484316774U,  // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+  1484317639U,  // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+  2552088270U,  // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+  1190213513U,  // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+  1484320054U,  // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+   470641974U,  // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+  1592159604U,  // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  3094564393U,  // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+   470642217U,  // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+  2552094959U,  // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+  1592159952U,  // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2564040353U,  // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+  2690275455U,  // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+  1592160198U,  // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592160260U,  // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1611962522U,  // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1592160424U,  // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1611962540U,  // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478361190U,  // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+  2552103670U,  // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+  1592160762U,  // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2685704400U,  // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+  1478364470U,  // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+  2901891226U,  // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  1592161080U,  // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592161102U,  // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1478367022U,  // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+  1592161274U,  // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659931226U,  // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+  2564056739U,  // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+  2665903331U,  // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592161638U,  // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665903494U,  // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2587947527U,  // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+  1592161900U,  // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592161922U,  // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1478377574U,  // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+   470644526U,  // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+   269271142U,  // <2,u,u,2>: Cost 1 vdup2 LHS
+  1879924892U,  // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+  1478380854U,  // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+   470644890U,  // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+  1611962765U,  // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1879928136U,  // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+   470645093U,  // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+  1611448320U,  // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611890698U,  // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611890708U,  // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  3763576860U,  // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+  2689835045U,  // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+  3698508206U,  // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+  3763576887U,  // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+  3667678434U,  // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+  1616093258U,  // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+  1490337894U,  // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+  2685632602U,  // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+   537706598U,  // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+  2624766936U,  // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+  1490341174U,  // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+  2624767120U,  // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+  2732966030U,  // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+  2593944803U,  // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+   537706652U,  // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U,  // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685632684U,  // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  2685632692U,  // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+  2685632702U,  // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611890892U,  // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2732966102U,  // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+  2624767930U,  // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+  2685632744U,  // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+  1611890924U,  // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2624768150U,  // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+  2685632764U,  // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  2685632774U,  // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+  2624768412U,  // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+  2624768514U,  // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+  3702491714U,  // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+  2624768632U,  // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+  3702491843U,  // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+  2686959934U,  // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+  2689835336U,  // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+  1611891026U,  // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611891036U,  // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3763577184U,  // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+  2689835374U,  // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+  1551027510U,  // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2666573172U,  // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+  3667711206U,  // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+  1616093586U,  // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2685190556U,  // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+  2666573520U,  // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+  3040886886U,  // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+  3625912834U,  // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+  2666573766U,  // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+  2666573828U,  // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+  2732966354U,  // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+  2666573992U,  // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+  3040886940U,  // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+  2685190637U,  // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+  2732966390U,  // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+  2689835519U,  // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+  3667724438U,  // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+  3763577355U,  // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+  3806708243U,  // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+  2666574648U,  // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+  2657948520U,  // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+  2689835573U,  // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+  2666574842U,  // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+  2685633095U,  // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+  2660603052U,  // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+  3643844997U,  // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+  2666575206U,  // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+  3655790391U,  // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+  3731690968U,  // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+  2666575468U,  // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+  2664584850U,  // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+  1616093834U,  // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+  1611891346U,  // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+   537707165U,  // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+  2689835684U,  // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1616093874U,  // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551030426U,  // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2624772304U,  // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+  2594002154U,  // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+   537707219U,  // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+  2552201318U,  // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+  2618802278U,  // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+  2618802366U,  // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+  1611449078U,  // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2552204598U,  // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+  2732966663U,  // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+  3906258396U,  // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+  3667752171U,  // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+  1611891491U,  // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  2689835819U,  // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+  1611449140U,  // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+  2624775063U,  // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+  1611891528U,  // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  2689835859U,  // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+  2689835868U,  // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  3763577701U,  // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+  3765273452U,  // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+  1611891573U,  // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+  2629420494U,  // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+  2689835911U,  // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2564163248U,  // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+  1611449238U,  // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+  2564164918U,  // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+  2689835947U,  // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  3692545978U,  // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+  2732966842U,  // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+  1611891651U,  // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+  1484456038U,  // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+  1611891672U,  // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685633502U,  // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2685633512U,  // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+  1484459318U,  // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+  1611891712U,  // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2689836041U,  // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2733409294U,  // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+  1611891735U,  // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  2552234086U,  // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+  2732966955U,  // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+  2732966964U,  // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+  2685633597U,  // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+  2552237366U,  // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+  2618805558U,  // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+  2769472822U,  // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+  3667784943U,  // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+  2685633642U,  // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+  2689836143U,  // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+  2564187280U,  // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+  2564187827U,  // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+  1611891856U,  // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  2689836183U,  // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+  3759375522U,  // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+  3720417378U,  // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+  2832518454U,  // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611891901U,  // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  3763578048U,  // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+  2689836239U,  // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2732967128U,  // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+  2685633761U,  // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  3763578088U,  // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+  2689836275U,  // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  3763578108U,  // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+  2732967166U,  // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+  2685633806U,  // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+  3631972454U,  // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+  2659947612U,  // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+  4036102294U,  // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+  3095396454U,  // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  3631975734U,  // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+  2222982144U,  // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+  3296797705U,  // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+  3720418924U,  // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+  3095396459U,  // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1484496998U,  // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+  1611892077U,  // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+  2685633907U,  // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  1611892092U,  // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+  1484500278U,  // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+  1611892117U,  // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685633950U,  // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  2832518697U,  // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611892140U,  // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+  2623455232U,  // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+  1549713510U,  // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  2689836484U,  // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+  2685633997U,  // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2623455570U,  // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+  2732967398U,  // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+  2689836524U,  // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2229044964U,  // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+  1549714077U,  // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+  1549714166U,  // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+  2623456052U,  // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+  2623456150U,  // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+  2685634079U,  // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2552286518U,  // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+  2623456400U,  // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+  2689836604U,  // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  3667834101U,  // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+  1155385070U,  // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+  2689836629U,  // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+  2689836640U,  // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+  1611449960U,  // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892338U,  // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  2689836669U,  // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+  2689836680U,  // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2689836688U,  // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+  3763578518U,  // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+  1611892383U,  // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+  1611450022U,  // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+  2685191854U,  // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+  2685191865U,  // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+  2685191875U,  // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+  1611450062U,  // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+  2732967635U,  // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+  2732967645U,  // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+  2732967652U,  // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+  1611450094U,  // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+  2558279782U,  // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+  2558280602U,  // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+  2732967692U,  // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+  2685634326U,  // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2558283062U,  // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+  1549716790U,  // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689836844U,  // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+  2229077736U,  // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+  1549717033U,  // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+  2552316006U,  // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+  2228643507U,  // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+  2689836896U,  // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685634408U,  // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1155122894U,  // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+  2665263108U,  // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+  2689836932U,  // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2665263272U,  // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+  1155417842U,  // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+  2689836953U,  // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+  2689836964U,  // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+  2689836976U,  // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+  1611892666U,  // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  2689836993U,  // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+  2689837004U,  // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689837013U,  // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2665263950U,  // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+  1611892711U,  // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  2665264122U,  // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+  2623460419U,  // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+  4169138340U,  // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+  2962358374U,  // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+  2665264486U,  // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+  2228954841U,  // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+  2229028578U,  // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+  2665264748U,  // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+  2962358379U,  // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+  1611892795U,  // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+  1549719342U,  // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  1611449960U,  // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892824U,  // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  1611892835U,  // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+  1549719706U,  // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689837168U,  // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+  2665265408U,  // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+  1611892867U,  // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+  2685192331U,  // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+  1611450518U,  // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+  2685634717U,  // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+  2564294806U,  // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+  2685634736U,  // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+  2732968122U,  // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+  3763579075U,  // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+  4034053264U,  // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+  1611450581U,  // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+  2685192415U,  // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+  1550385992U,  // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+  2685192433U,  // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+  2685634808U,  // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+  2558332214U,  // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+  2685634828U,  // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+  3759376661U,  // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+  2703477022U,  // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+  1555031423U,  // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+  2564309094U,  // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+  2630100513U,  // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+  1557022322U,  // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+  2685192520U,  // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+  2564312374U,  // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+  2732968286U,  // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+  2685634918U,  // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+  2704140655U,  // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+  1561004120U,  // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+  1496547430U,  // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  2624129256U,  // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+  2630764866U,  // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+   336380006U,  // <3,3,3,3>: Cost 1 vdup3 LHS
+  1496550710U,  // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  2732968368U,  // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+  2624129683U,  // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+  2594182400U,  // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+   336380006U,  // <3,3,3,u>: Cost 1 vdup3 LHS
+  2558353510U,  // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+  2558354411U,  // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+  2564327108U,  // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+  2564327938U,  // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+  2960343962U,  // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+  1611893250U,  // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+  2771619126U,  // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+  4034086032U,  // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+  1611893277U,  // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+  2558361702U,  // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+  2558362604U,  // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+  2558363342U,  // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+  2732968512U,  // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+  2558364982U,  // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+  3101279950U,  // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+  2665934946U,  // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+  2826636598U,  // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2826636599U,  // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2732968568U,  // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+  3763579521U,  // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+  2732968586U,  // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+  2732968595U,  // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+  2732968604U,  // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+  3763579557U,  // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+  2732968621U,  // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+  2657973099U,  // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+  2658636732U,  // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+  2558378086U,  // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+  2558378990U,  // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+  2564351687U,  // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+  2661291264U,  // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+  2558381366U,  // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+  2732968694U,  // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+  3781126907U,  // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+  3095397376U,  // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+  2558383918U,  // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+  1496547430U,  // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  1611893534U,  // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+  1592858504U,  // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+   336380006U,  // <3,3,u,3>: Cost 1 vdup3 LHS
+  1496550710U,  // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  1611893574U,  // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+  2690280268U,  // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+  2826636841U,  // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+   336380006U,  // <3,3,u,u>: Cost 1 vdup3 LHS
+  2624798720U,  // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+  1551056998U,  // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624798884U,  // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+  3693232384U,  // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+  2624799058U,  // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+  1659227026U,  // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+  1659227036U,  // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+  3667973382U,  // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+  1551057565U,  // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624799478U,  // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+  2624799540U,  // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+  1551057818U,  // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+  2624799704U,  // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+  2564377910U,  // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+  2689838050U,  // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+  2689838062U,  // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2628117807U,  // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+  1555039616U,  // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+  3626180710U,  // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+  2624800298U,  // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+  2624800360U,  // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+  2624800422U,  // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+  2624800514U,  // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+  2709965878U,  // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+  2689838140U,  // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+  2634090504U,  // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+  2689838158U,  // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+  2624800918U,  // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+  2636081403U,  // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+  2636745036U,  // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+  2624801180U,  // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+  2624801232U,  // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+  2905836854U,  // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+  3040054582U,  // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+  3702524611U,  // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+  2624801566U,  // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+  2564399206U,  // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+  2564400026U,  // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+  2564400845U,  // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+  2570373542U,  // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  1659227344U,  // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1551060278U,  // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  1659227364U,  // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+  3668006154U,  // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+  1551060521U,  // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+  1490665574U,  // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+  2689838341U,  // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1490667214U,  // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+  2564409494U,  // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+  1490668854U,  // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+  2689838381U,  // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+   537709878U,  // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+  2594272523U,  // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+   537709896U,  // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+  2689838411U,  // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+  2558444534U,  // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+  2666607098U,  // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+  2558446082U,  // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+  1659227508U,  // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2689838462U,  // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  2689838471U,  // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+  2657981292U,  // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+  1659227540U,  // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+  2666607610U,  // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+  3702527072U,  // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+  2660635824U,  // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+  3644139945U,  // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+  2666607974U,  // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+  2732969416U,  // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+  2732969425U,  // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+  2666608236U,  // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+  2664617622U,  // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+  1490690150U,  // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+  1551062830U,  // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  1490691793U,  // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+  2624804796U,  // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+  1490693430U,  // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+  1551063194U,  // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+   537710121U,  // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+  2594297102U,  // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+   537710139U,  // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+  3692576768U,  // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+  2618835046U,  // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+  2618835138U,  // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+  3692577024U,  // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+  2689838690U,  // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+  2732969579U,  // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2732969588U,  // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+  2246963055U,  // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+  2618835613U,  // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+  2594308198U,  // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+  3692577588U,  // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+  2624807835U,  // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+  2625471468U,  // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+  2626135101U,  // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+  2594311888U,  // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+  3699877107U,  // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+  1641680592U,  // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+  1641754329U,  // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+  3692578274U,  // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+  2630116899U,  // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+  3692578408U,  // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+  2625472206U,  // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+  2632107798U,  // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+  2715938575U,  // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+  3692578746U,  // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+  2716086049U,  // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+  2634762330U,  // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+  3692578966U,  // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+  2636089596U,  // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+  3699214668U,  // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+  2638080412U,  // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+  2618837506U,  // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+  2832844494U,  // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+  4033415682U,  // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+  3095072054U,  // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+  3095072055U,  // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+  2600304742U,  // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+  3763580815U,  // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+  2564474582U,  // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+  3699879044U,  // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+  2600308022U,  // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+  2618838326U,  // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+  2772454710U,  // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1659228102U,  // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+  1659228111U,  // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+  2570453094U,  // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+  2624810704U,  // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+  2570454734U,  // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+  2570455472U,  // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+  2570456374U,  // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+  1659228164U,  // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  2732969998U,  // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+  1659228184U,  // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+  1659228193U,  // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+  2732970020U,  // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+  2732970035U,  // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+  2564490968U,  // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+  2732970050U,  // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+  2732970060U,  // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+  2732970071U,  // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+  2732970080U,  // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+  1659228258U,  // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+  1659228267U,  // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+  1484783718U,  // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484784640U,  // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+  2558527080U,  // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+  2558527638U,  // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+  1484786998U,  // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+  1659228328U,  // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2732970154U,  // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+  2558531180U,  // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+  1484789550U,  // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484791910U,  // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+  1484792833U,  // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+  2558535272U,  // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+  2558535830U,  // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+  1484795190U,  // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+  1659228409U,  // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+  2772457626U,  // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1646326023U,  // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+  1484797742U,  // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+  2558541926U,  // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+  2689839393U,  // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+  2689839404U,  // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+  3706519808U,  // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+  2689839420U,  // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+  2732970314U,  // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+  2732970316U,  // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+  2960313654U,  // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  2689839456U,  // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+  3763581290U,  // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+  3763581297U,  // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+  2624816028U,  // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+  3763581315U,  // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+  2626143294U,  // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+  3763581335U,  // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+  2721321376U,  // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+  2721395113U,  // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+  2628797826U,  // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+  2594390118U,  // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+  2721616324U,  // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+  2630788725U,  // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+  3763581395U,  // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+  2632115991U,  // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+  2632779624U,  // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+  2594394618U,  // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+  1648316922U,  // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+  1648390659U,  // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+  3693914262U,  // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+  3638281176U,  // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+  3696568678U,  // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+  2638088604U,  // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+  2632780290U,  // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+  3712494145U,  // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+  3698559612U,  // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+  2959674678U,  // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  2959674679U,  // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+  3763581536U,  // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+  2722943590U,  // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+  2732970609U,  // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+  3698560147U,  // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+  2732970628U,  // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+  2689839757U,  // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+  2732970640U,  // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+  2960346422U,  // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+  2689839784U,  // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+  2576498790U,  // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+  3650241270U,  // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+  2732970692U,  // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+  2576501250U,  // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  2576501906U,  // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+  3650244622U,  // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+  4114633528U,  // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+  2732970735U,  // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+  2576504622U,  // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+  2732970749U,  // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+  2724270856U,  // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+  2624819706U,  // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+  3656223234U,  // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+  2732970788U,  // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+  2732970800U,  // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+  1659228984U,  // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659228994U,  // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+  1659229003U,  // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+  1659229006U,  // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+  2558600201U,  // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+  2558601146U,  // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+  2725081963U,  // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+  1659229046U,  // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+  2715423611U,  // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+  2722059141U,  // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+  2962361654U,  // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+  1659229078U,  // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+  1659229087U,  // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+  2689840041U,  // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+  2558609339U,  // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+  2576525853U,  // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+  1659229127U,  // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+  2689840081U,  // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+  1659228984U,  // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1652298720U,  // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+  1659229159U,  // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+  2626813952U,  // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+  1553072230U,  // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814116U,  // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+  3700556028U,  // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+  2626814290U,  // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+  2582507375U,  // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+  2588480072U,  // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+  2732971055U,  // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+  1553072797U,  // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814710U,  // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+  2626814772U,  // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+  2626814870U,  // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+  2625487854U,  // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+  2582514998U,  // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+  1553073296U,  // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+  2627478753U,  // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+  2727367810U,  // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+  1555064195U,  // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+  2588491878U,  // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+  3700557318U,  // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+  2626815592U,  // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+  2626815654U,  // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+  2588495158U,  // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+  2632787817U,  // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+  1559709626U,  // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+  2728031443U,  // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+  1561036892U,  // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+  2626816150U,  // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+  2626816268U,  // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+  2633451878U,  // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+  2626816412U,  // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+  2626816514U,  // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+  2638760514U,  // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+  2639424147U,  // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+  2826961920U,  // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+  2626816798U,  // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+  2582536294U,  // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+  2582537360U,  // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+  2588510138U,  // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+  3700558996U,  // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+  2582539574U,  // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+  1553075510U,  // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  2588512844U,  // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+  2564625766U,  // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+  1553075753U,  // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+  2732971398U,  // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+  2626817744U,  // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+  3700559649U,  // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+  2626817903U,  // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+  2258728203U,  // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+  2732971446U,  // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+  2732971457U,  // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+  2826964278U,  // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2826964279U,  // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2732971478U,  // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+  2732971486U,  // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+  2633454074U,  // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+  2633454152U,  // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+  2732971518U,  // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+  2732971526U,  // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+  2732971537U,  // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+  2732971540U,  // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+  2726041124U,  // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+  2570616934U,  // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+  2570617856U,  // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+  2564646635U,  // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+  2570619332U,  // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+  2570620214U,  // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+  2582564726U,  // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+  2588537423U,  // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+  1659229804U,  // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1659229804U,  // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+  2626819795U,  // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+  1553078062U,  // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626819973U,  // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+  2826961565U,  // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+  2626820159U,  // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+  1553078426U,  // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  1595545808U,  // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+  1659229804U,  // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1553078629U,  // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  1611448320U,  // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611896531U,  // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+  1659672284U,  // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+  1616099045U,  // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  2685638381U,  // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+  1663874806U,  // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+  1663874816U,  // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+  2960313672U,  // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  1611896594U,  // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+  1549763324U,  // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+  1550426957U,  // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+   537712430U,  // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+  1616541495U,  // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+  1490930998U,  // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+  1553081489U,  // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+  2627486946U,  // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+  1659230043U,  // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+   537712484U,  // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U,  // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2624833102U,  // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+  1557063287U,  // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+  1616099205U,  // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+  1611890892U,  // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2689841054U,  // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+  1559717819U,  // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+  1659230124U,  // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+  1616541618U,  // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+  1611896764U,  // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+  1484973079U,  // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+  2685638607U,  // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+   336380006U,  // <3,u,3,3>: Cost 1 vdup3 LHS
+  1611896804U,  // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+  1616541679U,  // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  2690283512U,  // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+  2959674696U,  // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+   336380006U,  // <3,u,3,u>: Cost 1 vdup3 LHS
+  2558722150U,  // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+  1659672602U,  // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+  1659672612U,  // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  2689841196U,  // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+  1659227344U,  // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1611896895U,  // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+  1663875144U,  // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+  1659230289U,  // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+  1611896922U,  // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+  1490960486U,  // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+  2689841261U,  // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+  1490962162U,  // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+  1616541823U,  // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1490963766U,  // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+  1659228164U,  // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+   537712794U,  // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+  1659230371U,  // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+   537712812U,  // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+  2689841327U,  // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+  2558739482U,  // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+  2689841351U,  // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+  1616099536U,  // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1659227508U,  // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2690283746U,  // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+  1659228984U,  // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659230445U,  // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+  1616099581U,  // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+  1485004902U,  // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+  1485005851U,  // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+  2558748264U,  // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+  3095397021U,  // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1485008182U,  // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+  1659228328U,  // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2722060599U,  // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+  1659229804U,  // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1485010734U,  // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+  1616099665U,  // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+  1611897179U,  // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+   537712997U,  // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+   336380006U,  // <3,u,u,3>: Cost 1 vdup3 LHS
+  1616099705U,  // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+  1611897219U,  // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+   537713037U,  // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+  1659230607U,  // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+   537713051U,  // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+  2691907584U,  // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+  2691907594U,  // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+  2691907604U,  // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+  3709862144U,  // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+  2684682280U,  // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+  3694600633U,  // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+  3291431290U,  // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+  3668342067U,  // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+  2691907657U,  // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+  2570715238U,  // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+  2570716058U,  // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+  1618165862U,  // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570717648U,  // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+  2570718518U,  // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+  2594607206U,  // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+  3662377563U,  // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+  2594608436U,  // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+  1618165916U,  // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2685714598U,  // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+  3759530159U,  // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+  2685862072U,  // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+  2631476937U,  // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+  2685714636U,  // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+  3765649622U,  // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+  2686157020U,  // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  3668358453U,  // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+  2686304494U,  // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+  3632529510U,  // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+  2686451968U,  // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2686525705U,  // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+  3760341266U,  // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+  3632532790U,  // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+  3913254606U,  // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+  3705219740U,  // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+  3713845990U,  // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+  2686451968U,  // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2552823910U,  // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+  2691907922U,  // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+  2691907932U,  // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+  3626567830U,  // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+  2552827190U,  // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+  2631478582U,  // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  3626570017U,  // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+  3668374839U,  // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+  2552829742U,  // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+  2558804070U,  // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+  1839644774U,  // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+  2913386660U,  // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+  2570750420U,  // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+  2558807350U,  // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+  3987128750U,  // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+  3987128822U,  // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+  2594641208U,  // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+  1839645341U,  // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+  2552840294U,  // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+  3047604234U,  // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+  1973862502U,  // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2570758613U,  // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+  2552843574U,  // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+  2217664887U,  // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+  3662418528U,  // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+  2658022257U,  // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+  1973862556U,  // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+  3731764218U,  // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+  3988324454U,  // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+  4122034278U,  // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+  3735082246U,  // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+  3731764536U,  // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+  3937145718U,  // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+  3737073145U,  // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+  3731764844U,  // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+  4122034332U,  // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+  2552856678U,  // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+  1841635430U,  // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+  1618166429U,  // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570774999U,  // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+  2552859958U,  // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+  2631481498U,  // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  2686157020U,  // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  2594665787U,  // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+  1618166483U,  // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2617548837U,  // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+  2622857318U,  // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+  3693281484U,  // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+  2691908342U,  // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+  2622857554U,  // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+  3764470538U,  // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+  3695272459U,  // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+  3733094980U,  // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+  2622857885U,  // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+  3696599798U,  // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+  2691097399U,  // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+  2631484314U,  // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+  2691908424U,  // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+  3696600125U,  // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+  3696600175U,  // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+  3696600307U,  // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+  3668423997U,  // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+  2691908469U,  // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+  2570797158U,  // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+  2570797978U,  // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+  3696600680U,  // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+  1618166682U,  // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+  2570800438U,  // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+  3765650347U,  // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+  3696601018U,  // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+  3668432190U,  // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+  1618535367U,  // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+  2564833382U,  // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+  2691908568U,  // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+  2691908578U,  // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+  2692572139U,  // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+  2564836662U,  // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+  2691908608U,  // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+  2588725862U,  // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  3662468090U,  // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+  2691908631U,  // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+  3760194590U,  // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+  3693947874U,  // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+  3765650484U,  // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+  3113877606U,  // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+  3760194630U,  // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+  2622860598U,  // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  3297436759U,  // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+  3800007772U,  // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+  2622860841U,  // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+  1479164006U,  // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552906486U,  // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+  2552907299U,  // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+  2552907926U,  // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+  1479167286U,  // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  2913387664U,  // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+  2600686074U,  // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+  2600686586U,  // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479169838U,  // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552914022U,  // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+  2558886708U,  // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+  4028205206U,  // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+  3089858662U,  // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+  2552917302U,  // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+  2223637584U,  // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+  4121347081U,  // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+  3721155406U,  // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+  2552919854U,  // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+  2659357716U,  // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  3733763173U,  // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+  3734426806U,  // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+  2695226671U,  // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+  3721155942U,  // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+  3721155976U,  // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+  3662500458U,  // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+  3721156204U,  // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+  2659357716U,  // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  1479188582U,  // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+  2552931062U,  // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+  2552931944U,  // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+  1622148480U,  // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+  1479191862U,  // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+  2622863514U,  // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  2588725862U,  // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2600686586U,  // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479194414U,  // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+  2617557030U,  // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+  2622865510U,  // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+  2622865612U,  // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+  3693289753U,  // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+  2635473244U,  // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+  3765650918U,  // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+  2696775148U,  // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+  3695944285U,  // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+  2622866077U,  // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+  3696607990U,  // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+  3696608052U,  // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+  3696608150U,  // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+  3895574630U,  // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+  2691909162U,  // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608400U,  // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+  3760784956U,  // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+  3773908549U,  // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+  2691909162U,  // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608748U,  // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+  3696608828U,  // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+  2691909224U,  // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+  2691909234U,  // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+  3759605368U,  // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+  3696609156U,  // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+  3760785040U,  // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+  3668505927U,  // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+  2691909279U,  // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+  2691909286U,  // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+  3764840111U,  // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+  3765651129U,  // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+  2698544836U,  // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+  2685863630U,  // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+  2698692310U,  // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+  3772507871U,  // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+  2698839784U,  // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+  2691909358U,  // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+  2564915302U,  // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+  2564916122U,  // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+  2564917004U,  // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+  2699208469U,  // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+  2564918582U,  // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+  2622868790U,  // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229667632U,  // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+  3800082229U,  // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+  2622869033U,  // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+  2552979558U,  // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+  2558952342U,  // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+  2564925032U,  // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+  2967060582U,  // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+  2552982838U,  // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+  3987130190U,  // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+  2913388474U,  // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+  3895577910U,  // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+  2552985390U,  // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+  1479245926U,  // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+  2552988406U,  // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+  2552989288U,  // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+  2954461286U,  // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+  1479249206U,  // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+  2229610281U,  // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+  2600767994U,  // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+  2600768506U,  // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+  1479251758U,  // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+  2659365909U,  // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  3733771366U,  // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+  3734434999U,  // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+  2701199368U,  // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+  4175774618U,  // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+  3303360298U,  // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+  3727136217U,  // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+  3727136364U,  // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+  2659365909U,  // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  1479262310U,  // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+  2553004790U,  // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+  2553005672U,  // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+  2954477670U,  // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+  1479265590U,  // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+  2622871706U,  // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229700404U,  // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+  2600784890U,  // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+  1479268142U,  // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+  3765651595U,  // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+  2691909782U,  // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+  2702452897U,  // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+  3693297946U,  // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+  3760711856U,  // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+  2235533820U,  // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+  3309349381U,  // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+  3668563278U,  // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+  2691909845U,  // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+  2235173328U,  // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+  3764840678U,  // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+  2630173594U,  // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+  2703190267U,  // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+  3760195840U,  // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+  3765651724U,  // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+  3309357574U,  // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+  3769633054U,  // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+  2703558952U,  // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+  3626770534U,  // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+  2630174250U,  // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+  3765651777U,  // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+  2703853900U,  // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+  3626773814U,  // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+  2704001374U,  // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+  3765651814U,  // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+  3769633135U,  // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+  2634819681U,  // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+  3765651839U,  // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+  3765651848U,  // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+  3710552404U,  // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+  2691910044U,  // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2704591270U,  // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+  3769633202U,  // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+  3703917212U,  // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+  3769633220U,  // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+  2691910044U,  // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2691910096U,  // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+  2691910106U,  // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+  2564990741U,  // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+  3765651946U,  // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+  2691910136U,  // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+  2686454274U,  // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+  2235640329U,  // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+  3801483792U,  // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+  2691910168U,  // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+  2559025254U,  // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559026237U,  // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+  2564998862U,  // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+  2570971548U,  // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+  2559028534U,  // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+  4163519477U,  // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+  3309390346U,  // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+  2706139747U,  // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+  2559031086U,  // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559033446U,  // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+  2559034430U,  // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+  2565007127U,  // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+  2570979740U,  // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+  2559036726U,  // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+  1161841154U,  // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+  4028203932U,  // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+  2706803380U,  // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+  1162062365U,  // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+  3769633475U,  // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+  3769633488U,  // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+  3638757144U,  // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+  3769633508U,  // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+  3769633515U,  // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+  3769633526U,  // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+  3662647932U,  // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+  3781208837U,  // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+  3769633547U,  // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+  2559049830U,  // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+  2691910430U,  // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+  2565023513U,  // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+  2707835698U,  // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+  2559053110U,  // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+  1161857540U,  // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+  2235673101U,  // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+  2708130646U,  // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+  1162078751U,  // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+  2617573416U,  // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+  1570373734U,  // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779676774U,  // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  3760196480U,  // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+  2576977100U,  // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+  2718747538U,  // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+  2718747548U,  // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+  3668637015U,  // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+  1570374301U,  // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+  2644116214U,  // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+  2644116276U,  // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+  2691910602U,  // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+  2644116440U,  // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+  2711227356U,  // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+  2709310438U,  // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+  3765652462U,  // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+  3768970231U,  // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+  2695891968U,  // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+  3703260634U,  // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+  3765652499U,  // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+  2644117096U,  // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+  2631509709U,  // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+  2644117269U,  // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+  3705251698U,  // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+  2710047808U,  // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+  3783863369U,  // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+  2634827874U,  // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+  2644117654U,  // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+  3638797210U,  // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+  3638798082U,  // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+  2637482406U,  // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  2638146039U,  // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+  3913287374U,  // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+  3765652625U,  // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+  3713878762U,  // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+  2637482406U,  // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  1503264870U,  // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+  2577007514U,  // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+  2577008232U,  // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+  2571037175U,  // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+   161926454U,  // <4,4,4,4>: Cost 1 vdup0 RHS
+  1570377014U,  // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+  2779680054U,  // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+  2594927963U,  // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+   161926454U,  // <4,4,4,u>: Cost 1 vdup0 RHS
+  2571042918U,  // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+  2571043738U,  // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+  3638814495U,  // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+  2571045368U,  // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+  2571046198U,  // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+  1839648054U,  // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+  1618169142U,  // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594936156U,  // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+  1618169160U,  // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  2553135206U,  // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+  3626877686U,  // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+  2565080782U,  // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+  2571053561U,  // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+  2553138486U,  // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+  2241555675U,  // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+  1973865782U,  // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2658055029U,  // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+  1973865800U,  // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+  2644120570U,  // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+  3638829978U,  // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+  3638830881U,  // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+  3735115018U,  // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+  2662036827U,  // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  2713292236U,  // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+  2713365973U,  // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+  2644121196U,  // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+  2662036827U,  // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  1503297638U,  // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+  1570379566U,  // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779682606U,  // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  2571069947U,  // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+   161926454U,  // <4,4,u,4>: Cost 1 vdup0 RHS
+  1841638710U,  // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+  1618169385U,  // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594960735U,  // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+   161926454U,  // <4,4,u,u>: Cost 1 vdup0 RHS
+  2631516160U,  // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+  1557774438U,  // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2618908875U,  // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+  2571078140U,  // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+  2626871634U,  // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+  3705258414U,  // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+  2594968438U,  // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+  2594968928U,  // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+  1557775005U,  // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631516918U,  // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+  2624217939U,  // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+  2631517078U,  // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+  2821341286U,  // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+  3895086054U,  // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+  2626872471U,  // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+  3895083131U,  // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+  2718748368U,  // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+  2821341291U,  // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+  2571092070U,  // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+  3699287585U,  // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+  2630854269U,  // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+  1557776078U,  // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+  2631517974U,  // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+  3692652384U,  // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+  2631518138U,  // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+  4164013366U,  // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+  1561094243U,  // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+  2631518358U,  // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+  3895084710U,  // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+  2631518540U,  // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+  2631518620U,  // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+  2631518716U,  // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+  2631518784U,  // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+  2658060980U,  // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+  2640145131U,  // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+  2631519006U,  // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+  2571108454U,  // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+  3632907342U,  // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+  2571110094U,  // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+  2571110912U,  // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+  2571111734U,  // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+  1557777718U,  // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2645454195U,  // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+  2718748614U,  // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+  1557777961U,  // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+  1503346790U,  // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+  2913398480U,  // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+  2631519998U,  // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+  2577090710U,  // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+  1503349978U,  // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+  2631520260U,  // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+  2913390690U,  // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+  2821344566U,  // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+  1503352622U,  // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+  1497383014U,  // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+  2559181904U,  // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+  2565154601U,  // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+  1497385474U,  // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+  1497386294U,  // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+  3047608324U,  // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+  2571129656U,  // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+    27705344U,  // <4,5,6,7>: Cost 0 copy RHS
+    27705344U,  // <4,5,6,u>: Cost 0 copy RHS
+  2565161062U,  // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+  2565161882U,  // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+  2565162794U,  // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+  2661381387U,  // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+  2565164342U,  // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+  2718748840U,  // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+  2718748846U,  // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+  2719412407U,  // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+  2565166894U,  // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+  1497399398U,  // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+  1557780270U,  // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631522181U,  // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+  1497401860U,  // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+  1497402678U,  // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+  1557780634U,  // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2631522512U,  // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+    27705344U,  // <4,5,u,7>: Cost 0 copy RHS
+    27705344U,  // <4,5,u,u>: Cost 0 copy RHS
+  2618916864U,  // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+  1545175142U,  // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1545175244U,  // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+  3692658940U,  // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+  2618917202U,  // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+  3852910806U,  // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+  2253525648U,  // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+  4040764726U,  // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+  1545175709U,  // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  2618917622U,  // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+  2618917684U,  // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+  2618917782U,  // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+  2618917848U,  // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+  3692659773U,  // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+  2618918032U,  // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+  3692659937U,  // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+  4032146742U,  // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+  2618918253U,  // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+  2618918380U,  // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+  2618918460U,  // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+  2618918504U,  // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+  2618918566U,  // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+  2618918679U,  // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+  2618918788U,  // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+  2618918842U,  // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+  2718749178U,  // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+  2618918971U,  // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+  2618919062U,  // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+  2636171526U,  // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+  3692661057U,  // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+  2618919324U,  // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+  2618919426U,  // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+  2638826058U,  // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+  3913303030U,  // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+  2722730572U,  // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+  2618919710U,  // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+  2565210214U,  // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+  2718749286U,  // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+  2565211952U,  // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+  2571184649U,  // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+  2565213494U,  // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+  1545178422U,  // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705430326U,  // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+  2595075437U,  // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+  1545178665U,  // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+  2565218406U,  // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+  2645462736U,  // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+  2913399290U,  // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+  3913305394U,  // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+  2645462982U,  // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+  2779172868U,  // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+  2913391416U,  // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+  2821426486U,  // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+  2821426487U,  // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+  1503428710U,  // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+  2577171190U,  // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+  2645463546U,  // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+  2577172630U,  // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+  1503431908U,  // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+  2253501069U,  // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+  2618921784U,  // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+  2954464566U,  // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+  1503434542U,  // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+  2645464058U,  // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+  2779173882U,  // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+  3638978355U,  // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+  2725090156U,  // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+  2645464422U,  // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+  2779174246U,  // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  3852915914U,  // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+  2779174508U,  // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2779173945U,  // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+  1503445094U,  // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+  1545180974U,  // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1705432878U,  // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+  2618922940U,  // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+  1503448294U,  // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+  1545181338U,  // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705433242U,  // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+  2954480950U,  // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+  1545181541U,  // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  3706601472U,  // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+  2632859750U,  // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2726343685U,  // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+  3701293312U,  // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+  3706601810U,  // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+  2259424608U,  // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+  3695321617U,  // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+  3800454194U,  // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+  2632860317U,  // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+  2259064116U,  // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+  3700630324U,  // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+  2632860570U,  // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+  3769635936U,  // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+  3656920374U,  // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+  3700630681U,  // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+  3701294314U,  // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+  3793818754U,  // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+  2259654012U,  // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+  3656925286U,  // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+  3706603050U,  // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+  3706603112U,  // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+  2727744688U,  // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+  3705939745U,  // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+  2632861554U,  // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+  3706603450U,  // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+  3792491731U,  // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+  2634852453U,  // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+  3706603670U,  // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+  3662906266U,  // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+  3725183326U,  // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+  3706603932U,  // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+  3701295618U,  // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+  2638834251U,  // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+  2639497884U,  // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+  3802445093U,  // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+  2640825150U,  // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+  2718750004U,  // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+  3706604490U,  // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+  3656943474U,  // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+  3779884371U,  // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+  2259383643U,  // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+  2632863030U,  // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+  2259531117U,  // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+  3907340074U,  // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+  2632863273U,  // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+  2913391610U,  // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+  3645006848U,  // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+  2589181646U,  // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+  3645008403U,  // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+  2913391974U,  // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+  2583211973U,  // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+  2589184670U,  // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+  2913392236U,  // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+  2913392258U,  // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+  1509474406U,  // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+  3047609338U,  // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+  2583217768U,  // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+  2583218326U,  // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+  1509477686U,  // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+  1509478342U,  // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+  2583220730U,  // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+  3047609964U,  // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509480238U,  // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+  3650994278U,  // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+  3650995098U,  // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+  3650996010U,  // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+  3804804677U,  // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+  3650997486U,  // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+  2662725039U,  // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+  3662942880U,  // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+  2718750316U,  // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+  2664715938U,  // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+  1509490790U,  // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+  2632865582U,  // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2583234152U,  // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+  2583234710U,  // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+  1509494070U,  // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+  1509494728U,  // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+  2583237114U,  // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+  3047757420U,  // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509496622U,  // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+  2618933248U,  // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+  1545191526U,  // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1545191630U,  // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+  2691913445U,  // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+  2618933586U,  // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+  2265397305U,  // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+  2595189625U,  // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+  2595190139U,  // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+  1545192093U,  // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+  2618934006U,  // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+  2618934068U,  // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+  1618171694U,  // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2618934232U,  // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+  2695894848U,  // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+  2618934416U,  // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+  3692676321U,  // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+  2718750555U,  // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+  1618171748U,  // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2553397350U,  // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+  2630215215U,  // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+  2618934888U,  // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+  1557800657U,  // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+  2618935065U,  // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+  2733864859U,  // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+  2618935226U,  // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+  2718750636U,  // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+  1561118822U,  // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+  2618935446U,  // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+  2779318422U,  // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+  2636851545U,  // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+  2618935708U,  // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+  2618935810U,  // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+  2691913711U,  // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+  2588725862U,  // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2640169710U,  // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+  2618936094U,  // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+  1503559782U,  // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+  2692282391U,  // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+  2565359426U,  // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+  2571332123U,  // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+   161926454U,  // <4,u,4,4>: Cost 1 vdup0 RHS
+  1545194806U,  // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1705577782U,  // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+  2718750801U,  // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+   161926454U,  // <4,u,4,u>: Cost 1 vdup0 RHS
+  1479164006U,  // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  1839650606U,  // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+  2565367502U,  // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+  3089777309U,  // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+  1479167286U,  // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  1839650970U,  // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+  1618172058U,  // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  3089780265U,  // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+  1618172076U,  // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  1479688294U,  // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+  2553430774U,  // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+  1973868334U,  // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+  1497606685U,  // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+  1479691574U,  // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+  1509552079U,  // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+  1973868698U,  // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+    27705344U,  // <4,u,6,7>: Cost 0 copy RHS
+    27705344U,  // <4,u,6,u>: Cost 0 copy RHS
+  2565382246U,  // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+  2565383066U,  // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+  2565384005U,  // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+  2661405966U,  // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+  2565385526U,  // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+  2779321702U,  // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  2589274793U,  // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+  2779321964U,  // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2565388078U,  // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+  1479704678U,  // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+  1545197358U,  // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1618172261U,  // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  1497623071U,  // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+   161926454U,  // <4,u,u,4>: Cost 1 vdup0 RHS
+  1545197722U,  // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1618172301U,  // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+    27705344U,  // <4,u,u,7>: Cost 0 copy RHS
+    27705344U,  // <4,u,u,u>: Cost 0 copy RHS
+  2687123456U,  // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+  2687123466U,  // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+  2687123476U,  // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+  3710599434U,  // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+  2642166098U,  // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+  3657060306U,  // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+  3292094923U,  // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+  3669005700U,  // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+  2687123530U,  // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+  2559434854U,  // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+  2559435887U,  // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+  1613381734U,  // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  3698656256U,  // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+  2559438134U,  // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+  2583326675U,  // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+  3715908851U,  // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+  3657069562U,  // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+  1613381788U,  // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2686017700U,  // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+  2685796528U,  // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2698625208U,  // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+  2685944002U,  // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+  2686017739U,  // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+  2686091476U,  // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+  2725167324U,  // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+  2595280230U,  // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  2686312687U,  // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+  3760128248U,  // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+  3759685888U,  // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+  2686533898U,  // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+  3760349459U,  // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+  2638187004U,  // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+  3776348452U,  // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+  3713256094U,  // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+  3914064896U,  // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+  2686976320U,  // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+  2559459430U,  // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+  1613381970U,  // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  2687123804U,  // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+  3761013092U,  // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+  2559462710U,  // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+  2638187830U,  // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  3761234303U,  // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+  2646150600U,  // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1613381970U,  // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  3766763926U,  // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+  2919268454U,  // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+  3053486182U,  // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+  3723210589U,  // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+  3766763966U,  // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+  2650796031U,  // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+  3719893090U,  // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+  3914067254U,  // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+  2919269021U,  // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+  4047519744U,  // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+  2920038502U,  // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  3759759871U,  // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+  3645164070U,  // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+  3762414095U,  // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+  3993780690U,  // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+  3719893816U,  // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+  2662077302U,  // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+  2920039069U,  // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+  2565455974U,  // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+  2565456790U,  // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+  2565457742U,  // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+  3639199894U,  // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+  2565459254U,  // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+  2589347938U,  // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+  2589348530U,  // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+  4188456422U,  // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+  2565461806U,  // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+  2687124106U,  // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+  1616036502U,  // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+  1613382301U,  // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  2689925800U,  // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+  2687124146U,  // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+  2638190746U,  // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  2589356723U,  // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+  2595280230U,  // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  1613382355U,  // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2646818816U,  // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+  1573077094U,  // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2646818980U,  // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+  2687124214U,  // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+  2641510738U,  // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+  2641510814U,  // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+  3720561142U,  // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+  3298141357U,  // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+  1573077661U,  // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+  2223891567U,  // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+  2687124276U,  // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+  2646819734U,  // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+  2687124296U,  // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+  2691326803U,  // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+  2691400540U,  // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+  3765216101U,  // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+  3765289838U,  // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+  2687124341U,  // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+  3297641584U,  // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+  3763520391U,  // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+  2646820456U,  // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+  2687124374U,  // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+  2691990436U,  // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+  2687124395U,  // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+  2646820794U,  // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+  3808199610U,  // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+  2687124419U,  // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+  2577440870U,  // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+  2687124440U,  // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+  3759686627U,  // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+  2692580332U,  // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+  2687124469U,  // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+  2685207552U,  // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+  3760866313U,  // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+  2692875280U,  // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+  2687124503U,  // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+  1567771538U,  // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+  2693096491U,  // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+  2693170228U,  // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+  2687124541U,  // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+  2646822096U,  // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+  1573080374U,  // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646822260U,  // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+  3298174129U,  // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+  1573080602U,  // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+  2687124591U,  // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+  2646822543U,  // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+  3760866433U,  // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+  2687124624U,  // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+  2687124631U,  // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+  2646822916U,  // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+  2646823010U,  // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+  2646823080U,  // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+  2687124663U,  // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+  2553577574U,  // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+  3763520719U,  // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+  2646823418U,  // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+  3760866529U,  // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+  2553580854U,  // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+  2687124723U,  // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+  2646823736U,  // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+  2646823758U,  // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+  2646823839U,  // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+  2559557734U,  // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+  2559558452U,  // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+  2571503270U,  // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+  2040971366U,  // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2559561014U,  // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+  2595393232U,  // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+  4188455035U,  // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+  2646824556U,  // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+  2040971371U,  // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1591662326U,  // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+  1573082926U,  // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2695824760U,  // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+  2040979558U,  // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+  2687124874U,  // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+  1573083290U,  // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646825168U,  // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+  2646825216U,  // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+  2040979563U,  // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+  3702652928U,  // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+  2628911206U,  // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2641518756U,  // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+  3759760847U,  // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+  3760866775U,  // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+  3759539680U,  // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+  3760866796U,  // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+  3304114054U,  // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+  2628911773U,  // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+  2623603464U,  // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+  3698008921U,  // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+  3633325603U,  // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+  2687125027U,  // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+  3633327414U,  // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+  3759539760U,  // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+  3760866876U,  // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+  3304122247U,  // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+  2687125072U,  // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+  3633332326U,  // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+  3759760992U,  // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+  2687125096U,  // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+  2687125106U,  // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+  2697963133U,  // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+  3759466120U,  // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+  3760866960U,  // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+  3771926168U,  // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+  2687125151U,  // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+  2687125158U,  // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+  2698405555U,  // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+  2577516238U,  // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+  3759687365U,  // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+  1624884942U,  // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+  2698700503U,  // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+  3772368608U,  // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+  3702655716U,  // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+  1625179890U,  // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+  2641521555U,  // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+  3772368642U,  // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+  2699142925U,  // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+  2698626838U,  // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+  2698626848U,  // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+  2628914486U,  // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2645503353U,  // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+  3304146826U,  // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+  2628914729U,  // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+  2553643110U,  // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+  3758950227U,  // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+  3759761248U,  // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+  2982396006U,  // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+  2553646390U,  // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+  2553647108U,  // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+  3760867204U,  // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+  3702657141U,  // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+  2982396011U,  // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+  3627393126U,  // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+  3760867236U,  // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+  2645504506U,  // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+  2687125434U,  // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+  2700617665U,  // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+  3760867276U,  // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+  3763521493U,  // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+  3719246670U,  // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+  2687125479U,  // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+  2565603430U,  // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+  2553660150U,  // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+  2565605216U,  // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+  2961178726U,  // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+  2565606710U,  // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+  4034920552U,  // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+  3114713292U,  // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+  3702658668U,  // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+  2961178731U,  // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+  2687125563U,  // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+  2628917038U,  // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2565613409U,  // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+  2687125592U,  // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+  1628203107U,  // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+  2628917402U,  // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2702092405U,  // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+  3304179598U,  // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+  1628498055U,  // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+  3760867467U,  // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+  2687125654U,  // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+  3759761565U,  // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+  3633391766U,  // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+  2687125680U,  // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+  3760277690U,  // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+  3310013014U,  // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+  2236344927U,  // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+  2687125717U,  // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+  3760867551U,  // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+  3760867558U,  // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+  2624938923U,  // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+  2703198460U,  // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+  3760867587U,  // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+  2636219536U,  // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+  3698681075U,  // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+  2703493408U,  // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+  2628920721U,  // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+  3766765870U,  // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+  3698681379U,  // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+  3760867649U,  // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+  2698627404U,  // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+  2703935830U,  // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+  2698627422U,  // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+  3760867686U,  // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+  3769788783U,  // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+  2701945209U,  // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+  3760867711U,  // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+  2636220684U,  // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+  3772369298U,  // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+  2687125916U,  // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+  2704599463U,  // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+  2704673200U,  // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+  3709962935U,  // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+  3772369346U,  // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+  2704894411U,  // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+  2704968148U,  // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+  3698682850U,  // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+  2642857014U,  // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+  2705189359U,  // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+  2705263096U,  // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+  2685946370U,  // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+  3779152394U,  // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+  2236377699U,  // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+  2687126045U,  // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+  2571632742U,  // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+  2559689870U,  // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+  2571634382U,  // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+  2571635264U,  // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+  2571636022U,  // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+  2559692804U,  // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+  3720581218U,  // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+  2236385892U,  // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+  2571638574U,  // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+  2565668966U,  // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+  3633439887U,  // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+  2565670760U,  // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+  2565671426U,  // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+  2565672246U,  // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+  3639414630U,  // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+  4047521640U,  // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+  2725169844U,  // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+  2565674798U,  // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+  1485963366U,  // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485964432U,  // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+  2559706728U,  // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+  2559707286U,  // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+  1485966646U,  // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+  2559708880U,  // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+  2601513466U,  // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+  3114714112U,  // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+  1485969198U,  // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485971558U,  // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+  1485972625U,  // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+  2559714920U,  // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+  2559715478U,  // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+  1485974838U,  // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+  2687126342U,  // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+  2601521658U,  // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+  2236410471U,  // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+  1485977390U,  // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+  3627491430U,  // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+  2636890214U,  // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+  3703333028U,  // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+  3782249348U,  // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+  2642198866U,  // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+  2687126418U,  // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+  2242243887U,  // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+  3316059448U,  // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+  2636890781U,  // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+  2241809658U,  // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+  3698025307U,  // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+  3698688940U,  // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+  3698689024U,  // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+  3700016206U,  // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+  2687126498U,  // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+  3760868336U,  // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+  3316067641U,  // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+  2242399554U,  // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+  3703334371U,  // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+  3703998004U,  // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+  3704661637U,  // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+  2636891854U,  // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+  3705988903U,  // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+  2698628150U,  // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+  3760868415U,  // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+  3783871562U,  // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+  2666752099U,  // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+  3639459942U,  // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+  3709970701U,  // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+  2636892510U,  // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+  3710634396U,  // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+  2638219776U,  // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+  3766987908U,  // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+  2710719634U,  // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+  3914097664U,  // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+  2640874308U,  // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+  2583642214U,  // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+  2642201574U,  // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+  3710635062U,  // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+  3717270664U,  // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+  2713963728U,  // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+  1637567706U,  // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+  2242276659U,  // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+  2646183372U,  // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+  1637788917U,  // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+  2559762534U,  // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+  2559763607U,  // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+  2698628366U,  // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+  3633506454U,  // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+  2559765814U,  // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+  2583654395U,  // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+  1613385014U,  // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  3901639990U,  // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+  1613385032U,  // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+  2559770726U,  // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+  2559771648U,  // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+  3633514088U,  // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+  2571717122U,  // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+  2559774006U,  // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+  2712636796U,  // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+  3760868743U,  // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+  2712784270U,  // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+  2559776558U,  // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+  2565750886U,  // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+  2565751706U,  // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+  2565752690U,  // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+  2571725387U,  // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+  2565754166U,  // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+  3114713426U,  // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+    94817590U,  // <5,4,7,6>: Cost 1 vrev RHS
+  2595616175U,  // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+    94965064U,  // <5,4,7,u>: Cost 1 vrev RHS
+  2559787110U,  // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+  2559788186U,  // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+  2242014483U,  // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+  2667419628U,  // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+  2559790390U,  // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+  1640222238U,  // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+    94825783U,  // <5,4,u,6>: Cost 1 vrev RHS
+  2714111536U,  // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+    94973257U,  // <5,4,u,u>: Cost 1 vrev RHS
+  2646851584U,  // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+  1573109862U,  // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646851748U,  // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+  3760279130U,  // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+  2687127138U,  // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+  2248142847U,  // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+  3720593910U,  // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+  4182502710U,  // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+  1573110429U,  // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646852342U,  // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+  2624291676U,  // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+  2646852502U,  // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+  2646852568U,  // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+  2715217591U,  // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+  2628936848U,  // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+  3698033907U,  // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+  2713964240U,  // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+  2628937107U,  // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+  3645497446U,  // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+  3760869099U,  // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+  2646853224U,  // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+  2698628862U,  // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+  3772370694U,  // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+  2713964303U,  // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+  2646853562U,  // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+  4038198272U,  // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+  2701946667U,  // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+  2646853782U,  // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+  3698034922U,  // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+  3702679919U,  // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+  2637564336U,  // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+  2646854146U,  // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+  2638891602U,  // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+  3702680247U,  // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+  3702680259U,  // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+  2646854430U,  // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+  2646854546U,  // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+  2642209767U,  // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+  3711306806U,  // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+  3645516369U,  // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+  1570458842U,  // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1573113142U,  // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+  2645527932U,  // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+  2713964486U,  // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+  1573113374U,  // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+  1509982310U,  // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2646855376U,  // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+  2583725672U,  // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+  2583726230U,  // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+  1509985590U,  // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+   229035318U,  // <5,5,5,5>: Cost 1 vdup1 RHS
+  2646855778U,  // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+  2646855848U,  // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+   229035318U,  // <5,5,5,u>: Cost 1 vdup1 RHS
+  2577760358U,  // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+  3633587361U,  // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+  2646856186U,  // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+  3633588738U,  // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+  2718535756U,  // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+  2644202223U,  // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+  2973780482U,  // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+  2646856526U,  // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+  2646856607U,  // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+  2571796582U,  // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+  3633595392U,  // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+  2571798222U,  // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+  2571799124U,  // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+  2571799862U,  // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+  3114717188U,  // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+  4034923010U,  // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+  2040974646U,  // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+  2040974647U,  // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+  1509982310U,  // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  1573115694U,  // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2571806414U,  // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+  2571807317U,  // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+  1509985590U,  // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+   229035318U,  // <5,5,u,5>: Cost 1 vdup1 RHS
+  2646857936U,  // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+  2040982838U,  // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+   229035318U,  // <5,5,u,u>: Cost 1 vdup1 RHS
+  2638233600U,  // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+  1564491878U,  // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  2632261796U,  // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+  2638233856U,  // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+  2638233938U,  // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+  3706003885U,  // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+  3706003967U,  // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+  4047473974U,  // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+  1564492445U,  // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+  2638234358U,  // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+  2638234420U,  // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+  2638234518U,  // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+  2638234584U,  // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+  2626290768U,  // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+  2638234768U,  // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+  3700032719U,  // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+  2982366518U,  // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  2628945300U,  // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+  3706004925U,  // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+  3711976966U,  // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+  2638235240U,  // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+  2638235302U,  // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+  2632263465U,  // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+  2638235496U,  // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+  2638235578U,  // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+  2713965050U,  // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+  2634917997U,  // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+  2638235798U,  // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+  3711977695U,  // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+  3710650720U,  // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+  2638236060U,  // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+  1564494338U,  // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+  2638236234U,  // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+  3711978104U,  // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+  4034227510U,  // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+  1567148870U,  // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+  2577817702U,  // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+  3700034544U,  // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+  2723033713U,  // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+  2638236818U,  // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+  2644208859U,  // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+  1564495158U,  // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  2645536125U,  // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+  2723402398U,  // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+  1564495401U,  // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+  2577825894U,  // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+  2662125264U,  // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+  3775836867U,  // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+  3711979343U,  // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+  2650181556U,  // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+  2662125572U,  // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+  2638237732U,  // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+  2982399286U,  // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+  2982399287U,  // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+  2583806054U,  // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+  3711979910U,  // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+  2662126074U,  // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+  2583808514U,  // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+  2583809334U,  // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+  2583810062U,  // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+  2638238520U,  // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+  2973781302U,  // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2973781303U,  // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+   430358630U,  // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+  1504101110U,  // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1504101992U,  // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504102550U,  // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   430361910U,  // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+  1504104390U,  // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+  1504105272U,  // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+  1504106092U,  // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+   430364462U,  // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+   430366822U,  // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+  1564497710U,  // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  1504110184U,  // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504110742U,  // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   430370103U,  // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+  1564498074U,  // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  1504113146U,  // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1504113658U,  // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+   430372654U,  // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+  2625634304U,  // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+  1551892582U,  // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625634468U,  // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+  2571889247U,  // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+  2625634642U,  // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+  2595778728U,  // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+  3699376639U,  // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+  2260235715U,  // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+  1551893149U,  // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625635062U,  // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+  2624308020U,  // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+  2625635222U,  // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+  1551893504U,  // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+  2571898166U,  // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+  2625635472U,  // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+  2627626227U,  // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+  3702031684U,  // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+  1555211669U,  // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+  2629617126U,  // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+  3699377670U,  // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+  2625635944U,  // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+  2625636006U,  // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+  2632271658U,  // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+  2625636201U,  // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+  2625636282U,  // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+  3708004381U,  // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+  2625636411U,  // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+  2625636502U,  // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+  2625636604U,  // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+  3699378478U,  // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+  2625636764U,  // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+  2625636866U,  // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+  2625636959U,  // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+  3699378808U,  // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+  2640235254U,  // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+  2625637150U,  // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+  2571919462U,  // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+  2571920384U,  // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+  3699379260U,  // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+  2571922019U,  // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+  2571922742U,  // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+  1551895862U,  // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2846277980U,  // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646207951U,  // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+  1551896105U,  // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+  2583871590U,  // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+  2652180176U,  // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+  2625638177U,  // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+  2625638262U,  // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+  2583874870U,  // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+  2846281732U,  // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+  2651517015U,  // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+  1772539190U,  // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+  1772539191U,  // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+  2846281826U,  // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+  3699380615U,  // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+  2846281108U,  // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+  2589854210U,  // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+  2846281830U,  // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+  2725467658U,  // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+  2846281076U,  // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2846279610U,  // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+  2846279611U,  // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+  1510146150U,  // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+  2846282574U,  // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+  2583889512U,  // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+  2846281919U,  // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+  1510149430U,  // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+  1510150168U,  // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+  2583892474U,  // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+  2625640044U,  // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+  1510151982U,  // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+  1510154342U,  // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+  1551898414U,  // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625640325U,  // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+  1772536477U,  // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+  1510157622U,  // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+  1551898778U,  // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2625640656U,  // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+  1772539433U,  // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+  1551898981U,  // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625642496U,  // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+  1551900774U,  // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625642660U,  // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+  2698630885U,  // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+  2687129325U,  // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+  2689783542U,  // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+  2266134675U,  // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+  2595853772U,  // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+  1551901341U,  // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625643254U,  // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+  2625643316U,  // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+  1613387566U,  // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1551901697U,  // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+  2626307154U,  // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+  2689783622U,  // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+  2627634420U,  // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+  2982366536U,  // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  1613387620U,  // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2846286742U,  // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+  2685796528U,  // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2625644136U,  // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+  2687129480U,  // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+  2632279851U,  // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+  2625644394U,  // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+  2625644474U,  // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+  2713966508U,  // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+  2625644603U,  // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+  2687129532U,  // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+  2636261649U,  // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+  2636925282U,  // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+  2625644956U,  // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+  1564510724U,  // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+  2625645160U,  // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+  2734610422U,  // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+  2640243447U,  // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+  1567165256U,  // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+  1567828889U,  // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+  1661163546U,  // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+  2734463012U,  // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+  2698631212U,  // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+  1570458842U,  // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1551904054U,  // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+  2846286172U,  // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646216144U,  // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+  1551904297U,  // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+  1509982310U,  // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2560058555U,  // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+  2698926194U,  // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+  2698631295U,  // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+  1509985590U,  // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+   229035318U,  // <5,u,5,5>: Cost 1 vdup1 RHS
+  1613387930U,  // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  1772547382U,  // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+   229035318U,  // <5,u,5,u>: Cost 1 vdup1 RHS
+  2566037606U,  // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+  2920044334U,  // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  2566039445U,  // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+  2687129808U,  // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+  2566040886U,  // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+  2920044698U,  // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+  2846289268U,  // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2973781320U,  // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2687129853U,  // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+   430506086U,  // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+  1486333117U,  // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+  1504249448U,  // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  2040971933U,  // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+   430509384U,  // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+  1504251600U,  // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+   118708378U,  // <5,u,7,6>: Cost 1 vrev RHS
+  2040974889U,  // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+   430511918U,  // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+   430514278U,  // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+  1551906606U,  // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  1613388133U,  // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1772544669U,  // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+   430517577U,  // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+   229035318U,  // <5,u,u,5>: Cost 1 vdup1 RHS
+   118716571U,  // <5,u,u,6>: Cost 1 vrev RHS
+  1772547625U,  // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+   430520110U,  // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+  2686025728U,  // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+  2686025738U,  // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+  2686025748U,  // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+  3779084320U,  // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+  2642903388U,  // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+  3657723939U,  // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+  3926676514U,  // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+  3926675786U,  // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+  2686025802U,  // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+  2566070374U,  // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+  3759767642U,  // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+  1612284006U,  // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2583988738U,  // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+  2566073654U,  // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+  2583990308U,  // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+  2589963005U,  // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+  2595935702U,  // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+  1612284060U,  // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2686025892U,  // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+  2685804721U,  // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+  3759620282U,  // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+  2705342658U,  // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+  1612284108U,  // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+  3706029956U,  // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+  2686173406U,  // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+  3651769338U,  // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+  1612579056U,  // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+  3706030230U,  // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+  2705342720U,  // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+  2705342730U,  // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+  3706030492U,  // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+  2644896258U,  // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+  3718638154U,  // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+  3729918619U,  // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+  3926672384U,  // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+  2705342784U,  // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+  2687058250U,  // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+  2686026066U,  // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+  1613463900U,  // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+  3761021285U,  // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+  2687353198U,  // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+  2632289590U,  // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2645560704U,  // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+  2646224337U,  // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+  1613906322U,  // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+  3651788902U,  // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+  2687795620U,  // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+  3761611181U,  // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+  3723284326U,  // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+  2646224838U,  // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+  3718639630U,  // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+  2652196962U,  // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+  2852932918U,  // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852932919U,  // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852933730U,  // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+  2925985894U,  // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+  3060203622U,  // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+  3718640178U,  // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+  2656178832U,  // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+  3725939378U,  // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+  2657506098U,  // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+  2619020110U,  // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+  2925986461U,  // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+  2572091494U,  // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+  2572092310U,  // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+  2980495524U,  // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+  2572094072U,  // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+  2572094774U,  // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+  4054238242U,  // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+  3645837653U,  // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+  4054239054U,  // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+  2572097326U,  // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+  2686026378U,  // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+  2686026386U,  // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+  1612284573U,  // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2705343144U,  // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+  1616265906U,  // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+  2632292506U,  // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2590020356U,  // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+  2852933161U,  // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  1612284627U,  // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2595995750U,  // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+  2646229094U,  // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+  3694092492U,  // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+  2686026486U,  // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+  2595999030U,  // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+  3767730952U,  // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+  2596000590U,  // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+  2596001246U,  // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+  2686026531U,  // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+  3763602219U,  // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+  2686026548U,  // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+  3764929346U,  // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+  2686026568U,  // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+  2691334996U,  // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+  3760874332U,  // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+  3765224294U,  // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+  3669751263U,  // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+  2686026613U,  // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+  2554208358U,  // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+  3763602311U,  // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+  3639895971U,  // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+  2686026646U,  // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+  2554211638U,  // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+  3760874411U,  // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+  2554212858U,  // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+  3802973114U,  // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+  2686026691U,  // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+  2566160486U,  // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+  2686026712U,  // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+  2686026724U,  // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+  3759768552U,  // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+  2692662262U,  // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+  2686026752U,  // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+  2590053128U,  // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+  3663795194U,  // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+  2686026775U,  // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+  2641587099U,  // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+  2693104684U,  // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+  3639912357U,  // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+  2687206462U,  // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+  3633941814U,  // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+  2693399632U,  // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+  3765077075U,  // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+  2646232530U,  // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+  2687206507U,  // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+  2647559796U,  // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+  3765077118U,  // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+  3767583878U,  // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+  2686026896U,  // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+  2693989528U,  // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+  3767805089U,  // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+  2652868706U,  // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+  3908250934U,  // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+  2686026941U,  // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+  2554241126U,  // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+  3763602639U,  // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+  3759547607U,  // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+  3115221094U,  // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2554244406U,  // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+  3760874739U,  // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+  2554245944U,  // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+  3719975758U,  // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+  3115221099U,  // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2560221286U,  // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560222415U,  // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+  2980497558U,  // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+  3103211622U,  // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+  2560224566U,  // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+  2980495698U,  // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+  3633967526U,  // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+  4054237686U,  // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+  2560227118U,  // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560229478U,  // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+  2686027117U,  // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+  2686027129U,  // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+  2686027132U,  // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+  2687206795U,  // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+  2686027157U,  // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+  2590094093U,  // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+  2596066790U,  // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+  2686027177U,  // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+  2646900736U,  // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+  1573159014U,  // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2646900900U,  // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+  3759769037U,  // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+  2641592668U,  // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+  3779085794U,  // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+  2686027244U,  // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+  3669816807U,  // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+  1573159581U,  // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+  2230527897U,  // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+  2646901556U,  // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+  2646901654U,  // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+  2847047782U,  // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+  3771049517U,  // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+  2646901904U,  // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+  2686027324U,  // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+  3669825000U,  // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+  2231117793U,  // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+  3763603029U,  // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+  3759769184U,  // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+  2686027368U,  // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+  2686027378U,  // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+  2697971326U,  // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+  3759769224U,  // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+  2698118800U,  // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+  3920794092U,  // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+  2686027423U,  // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+  2686027430U,  // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+  3759769262U,  // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+  2698487485U,  // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+  2705344196U,  // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+  2686027470U,  // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+  2698708696U,  // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+  2724660961U,  // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+  2729232104U,  // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+  2686027502U,  // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+  1567853468U,  // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  3759769351U,  // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+  2699151118U,  // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+  2686027543U,  // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+  2699298592U,  // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+  1573162294U,  // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686027564U,  // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+  3719982547U,  // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+  1573162532U,  // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+  3779086154U,  // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+  2646904528U,  // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+  3759769440U,  // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+  2699888488U,  // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+  2230855617U,  // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+  2646904836U,  // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+  2646904930U,  // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+  2847051062U,  // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  2700257173U,  // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+  2687207321U,  // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+  2686027684U,  // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+  2566260656U,  // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+  2685806522U,  // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+  2687207361U,  // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+  2686027724U,  // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+  2646905656U,  // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+  2646905678U,  // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+  2686027751U,  // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+  2554323046U,  // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+  2572239606U,  // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+  2566268849U,  // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+  1906753638U,  // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+  2554326326U,  // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+  3304687564U,  // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+  2980495708U,  // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2646906476U,  // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+  1906753643U,  // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+  1591744256U,  // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+  1573164846U,  // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2701805650U,  // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+  1906761830U,  // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+  2686027875U,  // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+  1573165210U,  // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686322800U,  // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+  2847051305U,  // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  1906761835U,  // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+  3759769739U,  // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+  2686027926U,  // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+  2686027937U,  // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+  3640027286U,  // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+  2687207601U,  // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+  2705344698U,  // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+  3663917847U,  // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+  2237008560U,  // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+  2686027989U,  // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+  3759769823U,  // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+  3759769830U,  // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+  3759769841U,  // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+  3759769848U,  // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+  2703280390U,  // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3759769868U,  // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+  3704063194U,  // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+  3767732510U,  // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+  2703280390U,  // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3704063468U,  // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+  2630321724U,  // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769921U,  // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+  3759769928U,  // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+  3704063767U,  // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+  3704063876U,  // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+  2636957626U,  // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+  3777907058U,  // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+  2630321724U,  // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769983U,  // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+  3710036245U,  // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+  2636958054U,  // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+  2686028188U,  // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+  2704607656U,  // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+  3773041072U,  // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+  3711363731U,  // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+  3767732676U,  // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+  2707999179U,  // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+  2584232038U,  // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+  2642267118U,  // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+  2642930751U,  // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+  2705197552U,  // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+  2584235318U,  // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+  1631603202U,  // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+  2654211444U,  // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+  2237041332U,  // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+  1631824413U,  // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+  3640066150U,  // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+  3772746288U,  // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+  3640067790U,  // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+  3773041216U,  // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+  2705934922U,  // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+  3773041236U,  // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+  3779086940U,  // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+  3767732831U,  // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+  2706229870U,  // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+  2602164326U,  // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+  2654212512U,  // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+  2566334393U,  // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+  3704066588U,  // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+  2602167524U,  // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+  3710702321U,  // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+  2724661933U,  // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+  3710702465U,  // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+  2602170158U,  // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+  1492598886U,  // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+  2560369889U,  // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+  1492600762U,  // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+  2566342806U,  // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+  1492602166U,  // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+  2602176208U,  // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+  2566345210U,  // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+  2980496528U,  // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492604718U,  // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+  1492607078U,  // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+  2686028574U,  // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+  1492608955U,  // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+  2566350998U,  // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+  1492610358U,  // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+  1634257734U,  // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+  2566353489U,  // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+  2980504720U,  // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492612910U,  // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+  3703406592U,  // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+  2629664870U,  // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2629664972U,  // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+  3779087232U,  // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+  2642936156U,  // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+  2712570770U,  // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+  2687208348U,  // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+  3316723081U,  // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+  2629665437U,  // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+  2242473291U,  // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+  3700089652U,  // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+  3703407510U,  // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+  2852962406U,  // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+  3628166454U,  // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+  3760876514U,  // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+  2687208430U,  // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+  3316731274U,  // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+  2243063187U,  // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+  2629666284U,  // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+  3703408188U,  // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+  3703408232U,  // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+  3703408294U,  // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+  2632320816U,  // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+  2923384118U,  // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+  2687208508U,  // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+  3760950341U,  // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+  2634975348U,  // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+  3703408790U,  // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+  3316305238U,  // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+  3703408947U,  // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+  3703409052U,  // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+  2644929026U,  // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+  3718670922U,  // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+  2705345682U,  // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+  3926705152U,  // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+  2668817222U,  // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+  2590277734U,  // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+  3716017135U,  // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+  2642938944U,  // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+  3717344401U,  // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+  2712571088U,  // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+  2629668150U,  // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1637649636U,  // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2646257109U,  // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+  1637649636U,  // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2566398054U,  // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+  3760876805U,  // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+  2566399937U,  // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+  2584316418U,  // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+  2566401334U,  // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+  2584318028U,  // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+  1612287286U,  // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965686U,  // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287304U,  // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504608358U,  // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2578350838U,  // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+  2578351720U,  // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+  2578352278U,  // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+  1504611638U,  // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+  2578353872U,  // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+  2578354682U,  // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+  2578355194U,  // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+  1504614190U,  // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+  2572386406U,  // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+  2572387226U,  // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+  3640157902U,  // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+  2572389020U,  // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+  2572389686U,  // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+  2980497102U,  // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+  2980495564U,  // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+  4054239090U,  // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+  2572392238U,  // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+  1504608358U,  // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2629670702U,  // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2566424516U,  // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+  2584340994U,  // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+  1640156694U,  // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+  2629671066U,  // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1612287529U,  // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965929U,  // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287547U,  // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  3708723200U,  // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+  2634981478U,  // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+  3694125260U,  // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+  3779087962U,  // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+  3760877154U,  // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+  4195110916U,  // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+  3696779775U,  // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+  1175212130U,  // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+  1175285867U,  // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+  2248445988U,  // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+  3698107237U,  // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+  3708724118U,  // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+  3908575334U,  // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+  3716023376U,  // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+  3708724368U,  // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+  3767733960U,  // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+  2712571600U,  // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+  2712571609U,  // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+  2578391142U,  // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+  3704079934U,  // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+  3708724840U,  // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+  3705407182U,  // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+  2578394422U,  // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+  3717351272U,  // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+  2634983354U,  // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+  3115486518U,  // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+  2634983541U,  // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+  3708725398U,  // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+  3710052631U,  // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+  3708725606U,  // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+  3708725660U,  // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+  2643610114U,  // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+  3717352010U,  // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+  3773632358U,  // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+  2248978533U,  // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+  2249052270U,  // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+  2596323430U,  // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+  3716025328U,  // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+  3716688961U,  // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+  2643610770U,  // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+  2596326710U,  // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+  2634984758U,  // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  3767734199U,  // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+  1643696070U,  // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+  1643769807U,  // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+  2578415718U,  // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+  3652158198U,  // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+  3652159080U,  // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+  3652159638U,  // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+  2578418998U,  // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+  2712571908U,  // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+  2718027790U,  // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+  2712571928U,  // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+  2712571937U,  // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+  2705346596U,  // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+  3767144496U,  // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+  3773116473U,  // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+  2705346626U,  // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+  2705346636U,  // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+  3908577217U,  // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+  2578428728U,  // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+  2712572002U,  // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+  2705346668U,  // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+  2560516198U,  // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560517363U,  // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+  2566490060U,  // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+  3634260118U,  // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+  2560519478U,  // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+  2980498650U,  // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+  2980497922U,  // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  3103214902U,  // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+  2560522030U,  // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560524390U,  // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+  2560525556U,  // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+  2566498253U,  // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+  2646931439U,  // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+  2560527670U,  // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+  2634987674U,  // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  2980506114U,  // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  1175277674U,  // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+  1175351411U,  // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+  2578448486U,  // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+  1573191782U,  // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2686030124U,  // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+  3779088690U,  // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+  2687209788U,  // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+  3652194000U,  // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+  2254852914U,  // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+  4041575734U,  // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+  1573192349U,  // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+  2646934262U,  // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+  2646934324U,  // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+  2646934422U,  // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+  2846785638U,  // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3760951694U,  // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+  2646934672U,  // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+  2712572320U,  // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+  3775549865U,  // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+  2846785643U,  // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3759772094U,  // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+  3704751676U,  // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+  2631009936U,  // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+  2646935206U,  // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+  3759772127U,  // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+  3704752004U,  // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+  2646935482U,  // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+  2712572410U,  // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+  2712572419U,  // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+  2646935702U,  // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+  3777024534U,  // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+  3704752453U,  // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+  2646935964U,  // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+  2705347122U,  // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+  3779678778U,  // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+  2657553069U,  // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+  4039609654U,  // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+  2708001366U,  // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+  2578481254U,  // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+  3652223734U,  // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+  3760951922U,  // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+  3779089019U,  // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+  1570540772U,  // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1573195062U,  // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  2712572560U,  // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+  2723410591U,  // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+  1573195304U,  // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+  3640287334U,  // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+  2646937296U,  // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+  3640289235U,  // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+  3720679279U,  // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+  2646937542U,  // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+  2646937604U,  // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+  2646937698U,  // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+  2846788918U,  // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+  2846788919U,  // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+  1516699750U,  // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  2590442230U,  // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+  2646938106U,  // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+  2590443670U,  // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+  1516703030U,  // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  2590445264U,  // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+   296144182U,  // <6,6,6,6>: Cost 1 vdup2 RHS
+  2712572738U,  // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+   296144182U,  // <6,6,6,u>: Cost 1 vdup2 RHS
+  2566561894U,  // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+  3634332924U,  // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+  2566563797U,  // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+  2584480258U,  // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+  2566565174U,  // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+  2717438846U,  // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+  2980500280U,  // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+  1906756918U,  // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+  1906756919U,  // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+  1516699750U,  // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  1573197614U,  // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2566571990U,  // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+  2846786205U,  // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  1516703030U,  // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  1573197978U,  // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+   296144182U,  // <6,6,u,6>: Cost 1 vdup2 RHS
+  1906765110U,  // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+   296144182U,  // <6,6,u,u>: Cost 1 vdup2 RHS
+  1571209216U,  // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+   497467494U,  // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571209380U,  // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2644951292U,  // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+  1571209554U,  // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510756450U,  // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+  2644951542U,  // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  2584499194U,  // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+   497468061U,  // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571209974U,  // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571210036U,  // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571210134U,  // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1571210200U,  // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2644952098U,  // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+  1571210384U,  // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644952271U,  // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2578535418U,  // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+  1571210605U,  // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+  2644952509U,  // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+  2644952582U,  // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571210856U,  // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571210918U,  // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2644952828U,  // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+  2633009028U,  // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+  1571211194U,  // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2668840938U,  // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+  1571211323U,  // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571211414U,  // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644953311U,  // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2644953390U,  // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+  1571211676U,  // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571211778U,  // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2644953648U,  // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+  2644953720U,  // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+  2644953795U,  // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571212062U,  // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1573202834U,  // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644954058U,  // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  2644954166U,  // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2644954258U,  // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+  1571212496U,  // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+   497470774U,  // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+  1573203316U,  // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2646281688U,  // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+   497471017U,  // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+  2644954696U,  // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1573203664U,  // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2644954878U,  // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2644954991U,  // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571213254U,  // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571213316U,  // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571213410U,  // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1573204136U,  // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1573204217U,  // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  2644955425U,  // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+  2644955561U,  // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+  1573204474U,  // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2644955698U,  // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  2644955789U,  // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+  2644955889U,  // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+  1571214136U,  // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571214158U,  // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1573204895U,  // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1573204986U,  // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2572608656U,  // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+  2644956362U,  // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+  2572610231U,  // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+  1573205350U,  // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  2646947220U,  // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+  1516786498U,  // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+  1571214956U,  // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+  1573205634U,  // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+  1571215059U,  // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+   497473326U,  // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571215237U,  // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571215292U,  // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571215423U,  // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+   497473690U,  // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571215568U,  // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  1573206272U,  // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+   497473893U,  // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+  1571217408U,  // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+   497475686U,  // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+  1571217572U,  // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2689865445U,  // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+  1571217746U,  // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510830187U,  // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+  2644959734U,  // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  1193130221U,  // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+   497476253U,  // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+  1571218166U,  // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571218228U,  // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1612289838U,  // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571218392U,  // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2566663478U,  // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+  1571218576U,  // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644960463U,  // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2717439835U,  // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+  1612289892U,  // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  1504870502U,  // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+  2644960774U,  // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571219048U,  // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571219110U,  // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  1504873782U,  // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+  2633017221U,  // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+  1571219386U,  // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2712573868U,  // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+  1571219515U,  // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571219606U,  // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644961503U,  // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2566678499U,  // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+  1571219868U,  // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571219970U,  // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2689865711U,  // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+  2708002806U,  // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+  2644961987U,  // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571220254U,  // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571220370U,  // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644962250U,  // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  1661245476U,  // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+  2686031917U,  // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+  1571220688U,  // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+   497478967U,  // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+  1571220852U,  // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1661614161U,  // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+   497479209U,  // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+  2566692966U,  // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+  1571221200U,  // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2566694885U,  // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+  2689865855U,  // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+  1571221446U,  // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571221508U,  // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1612290202U,  // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  1571221672U,  // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1612290220U,  // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504903270U,  // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+  2644963752U,  // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571222010U,  // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2686032080U,  // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+  1504906550U,  // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+  2644964079U,  // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+   296144182U,  // <6,u,6,6>: Cost 1 vdup2 RHS
+  1571222350U,  // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+   296144182U,  // <6,u,6,u>: Cost 1 vdup2 RHS
+  1492967526U,  // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+  2560738574U,  // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+  1492969447U,  // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+  1906753692U,  // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+  1492970806U,  // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+  2980495761U,  // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+  1516860235U,  // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+  1906756936U,  // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+  1492973358U,  // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+  1492975718U,  // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+   497481518U,  // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+  1612290405U,  // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571223484U,  // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1492978998U,  // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+   497481882U,  // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+   296144182U,  // <6,u,u,6>: Cost 1 vdup2 RHS
+  1906765128U,  // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+   497482085U,  // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+  1638318080U,  // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638318090U,  // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+  1638318100U,  // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+  3646442178U,  // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+  2712059941U,  // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+  2651603364U,  // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+  2590618445U,  // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+  3785801798U,  // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+  1638318153U,  // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+  1516879974U,  // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+  2693922911U,  // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+   564576358U,  // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+  2638996480U,  // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+  1516883254U,  // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+  2649613456U,  // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+  1516884814U,  // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+  2590626808U,  // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+   564576412U,  // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U,  // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2692743344U,  // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+  2712060084U,  // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+  2712060094U,  // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+  1638318284U,  // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712060118U,  // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2651604922U,  // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+  2686255336U,  // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+  1638318316U,  // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+  2651605142U,  // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+  2712060156U,  // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+  2712060165U,  // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+  2651605404U,  // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+  2651605506U,  // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+  2638998111U,  // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+  2639661744U,  // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+  3712740068U,  // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+  2640989010U,  // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+  2712060232U,  // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+  1638318418U,  // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+  1638318428U,  // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+  3646474950U,  // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+  2712060270U,  // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+  1577864502U,  // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  2651606388U,  // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+  3787792776U,  // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+  1638318481U,  // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+  2590654566U,  // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+  2651606736U,  // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+  2712060334U,  // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649616239U,  // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+  2651606982U,  // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+  2651607044U,  // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+  1577865314U,  // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+  2651607208U,  // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+  1579192580U,  // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+  2688393709U,  // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+  2712060406U,  // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  2688541183U,  // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+  2655588936U,  // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+  3762430481U,  // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+  2651607730U,  // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+  2651607864U,  // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+  2651607886U,  // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+  2688983605U,  // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+  2651608058U,  // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+  2932703334U,  // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+  3066921062U,  // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+  3712742678U,  // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+  2651608422U,  // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+  2651608513U,  // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+  2663552532U,  // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+  2651608684U,  // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+  2651608706U,  // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+  1638318730U,  // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+  1638318738U,  // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+   564576925U,  // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+  2572765898U,  // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+  1638318770U,  // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+  1577867418U,  // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  1516942165U,  // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+  2651609344U,  // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+   564576979U,  // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+  2590687334U,  // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+  2639003750U,  // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+  2793357414U,  // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+  1638318838U,  // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+  2590690614U,  // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+  2712060679U,  // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2590692182U,  // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+  3785802521U,  // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+  1638318883U,  // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+  2712060715U,  // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+  1638318900U,  // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  3774300994U,  // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+  1638318920U,  // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+  2712060755U,  // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+  2691416926U,  // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+  2590700375U,  // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+  3765158766U,  // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+  1638318965U,  // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+  2712060796U,  // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+  2712060807U,  // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+  3712747112U,  // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+  1638318998U,  // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+  2712060836U,  // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+  2712060843U,  // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+  2590708568U,  // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+  2735948730U,  // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+  1638319043U,  // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+  2712060876U,  // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+  1638319064U,  // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2712060894U,  // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+  2692596718U,  // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+  2712060917U,  // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+  1619002368U,  // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+  2692817929U,  // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+  2735948814U,  // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+  1619223579U,  // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+  2712060962U,  // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+  2712060971U,  // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+  2712060980U,  // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+  2712060989U,  // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+  3785802822U,  // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+  2639007030U,  // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+  2645642634U,  // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+  3719384520U,  // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+  2639007273U,  // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+  2572812390U,  // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+  2693776510U,  // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+  3774301318U,  // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+  1620182160U,  // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+  2572815670U,  // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+  3766486178U,  // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+  2651615331U,  // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+  2652278964U,  // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+  1620550845U,  // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+  3768108230U,  // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+  2694440143U,  // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+  2712061144U,  // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2694587617U,  // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+  3768403178U,  // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+  2694735091U,  // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+  3768550652U,  // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+  2652279630U,  // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+  2694956302U,  // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+  2645644282U,  // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+  2859062094U,  // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+  3779462437U,  // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+  3121938534U,  // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2554916150U,  // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+  3769140548U,  // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+  3726022164U,  // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+  2554918508U,  // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+  3121938539U,  // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2572836966U,  // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+  1638319469U,  // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+  2712061299U,  // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+  1622173059U,  // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+  2572840246U,  // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+  1622320533U,  // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+  2696136094U,  // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+  2859060777U,  // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+  1622541744U,  // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+  2712061364U,  // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+  2712061373U,  // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+  2712061380U,  // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+  2712061389U,  // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+  2712061404U,  // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+  2696725990U,  // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+  2712061417U,  // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+  3785803251U,  // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+  2696947201U,  // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+  2712061446U,  // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+  3785803276U,  // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+  3785803285U,  // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+  2712061471U,  // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+  2712061482U,  // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+  3766486576U,  // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+  2712061500U,  // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+  2602718850U,  // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  2712061516U,  // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+  2712061525U,  // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+  2712061536U,  // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+  1638319720U,  // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638319730U,  // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+  2712061565U,  // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+  2698053256U,  // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+  2712061584U,  // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+  3771795096U,  // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+  1638319775U,  // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+  1638319782U,  // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+  2693924531U,  // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+  2700560061U,  // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+  2693924551U,  // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+  1638319822U,  // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+  2698716889U,  // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+  2712061665U,  // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+  2735949540U,  // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+  1638319854U,  // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+  2712061692U,  // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+  2712061698U,  // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+  2712061708U,  // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+  2712061718U,  // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+  2712061728U,  // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+  2699380522U,  // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+  2712061740U,  // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+  3809691445U,  // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+  2699601733U,  // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+  2699675470U,  // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+  3766486867U,  // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+  2699822944U,  // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+  2692745065U,  // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+  2699970418U,  // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+  3766486907U,  // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+  2700117892U,  // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+  3771795334U,  // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+  2692745110U,  // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+  2572894310U,  // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+  2712061860U,  // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+  2700486577U,  // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+  1626818490U,  // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+  2572897590U,  // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+  2700707788U,  // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+  2700781525U,  // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+  3774597086U,  // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+  1627187175U,  // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+  2735949802U,  // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+  3780200434U,  // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+  3773564928U,  // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+  2986541158U,  // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+  2554989878U,  // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+  3775113245U,  // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+  4060283228U,  // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+  2554992236U,  // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+  2986541163U,  // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+  1638320187U,  // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+  2693924936U,  // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+  1638319720U,  // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1628145756U,  // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+  1638320227U,  // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+  2702035054U,  // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+  2702108791U,  // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+  2735949945U,  // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+  1628514441U,  // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+  2712062091U,  // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+  1638320278U,  // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+  2712062109U,  // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+  2590836886U,  // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+  2712062128U,  // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+  2712062138U,  // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+  2590839656U,  // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+  3311414017U,  // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+  1638320341U,  // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+  2237164227U,  // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+  2712062182U,  // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+  2712062193U,  // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+  2692745468U,  // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+  2712062214U,  // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+  2693925132U,  // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+  3768183059U,  // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+  2692745504U,  // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+  2696063273U,  // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+  2712062254U,  // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+  2712062262U,  // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+  2712062273U,  // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+  2712062280U,  // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+  2712062294U,  // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+  2712062302U,  // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+  2700560742U,  // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+  2712062319U,  // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+  2712062325U,  // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+  2712062335U,  // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+  2636368158U,  // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+  2637031791U,  // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+  1638320540U,  // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062374U,  // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+  2704689586U,  // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+  2590864235U,  // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+  2704837060U,  // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+  1638320540U,  // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062416U,  // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+  2712062426U,  // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+  2566981640U,  // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+  2712062447U,  // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+  2712062456U,  // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+  1638320642U,  // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+  2648313204U,  // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+  3311446789U,  // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+  1638320669U,  // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+  2602819686U,  // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+  1574571728U,  // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+  2648977185U,  // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+  2705869378U,  // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+  2237491947U,  // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+  2706016852U,  // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+  2648313954U,  // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+  2692745823U,  // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+  1579217159U,  // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+  2706311800U,  // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+  2654286249U,  // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+  1581208058U,  // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+  2706533011U,  // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+  2706606748U,  // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+  3780422309U,  // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+  2712062637U,  // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+  2706827959U,  // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+  1585189856U,  // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+  2693925571U,  // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+  2693925584U,  // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+  2700561114U,  // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+  2572978916U,  // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+  2693925611U,  // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+  2707344118U,  // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+  2654950894U,  // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+  2648315500U,  // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+  2693925643U,  // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+  2237221578U,  // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+  1638320926U,  // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+  1593153452U,  // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+  1638320540U,  // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2237516526U,  // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+  1638320966U,  // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+  2712062796U,  // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+  2692967250U,  // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+  1638320989U,  // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+  2651635712U,  // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+  1577893990U,  // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2651635876U,  // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+  3785804672U,  // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+  2651636050U,  // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+  1638468498U,  // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638468508U,  // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787795364U,  // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1640459181U,  // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+  2651636470U,  // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+  2651636532U,  // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+  2712062922U,  // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+  2639029248U,  // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+  2712062940U,  // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+  2712062946U,  // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+  2712062958U,  // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+  3785804791U,  // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+  2712062973U,  // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+  3785804807U,  // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+  3785804818U,  // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+  2651637352U,  // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+  2651637414U,  // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+  3716753194U,  // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+  2712063030U,  // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  2712063036U,  // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+  3773123658U,  // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+  2712063054U,  // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+  2651637910U,  // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+  3712772348U,  // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+  3785804906U,  // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+  2651638172U,  // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+  2651638274U,  // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+  2639030883U,  // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+  2712063122U,  // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+  3712772836U,  // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+  2641021782U,  // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+  2714053802U,  // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+  3785804978U,  // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+  3716754505U,  // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+  3785804998U,  // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+  1638321360U,  // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638468826U,  // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+  1638468836U,  // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  3785215214U,  // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+  1640459509U,  // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+  1517207654U,  // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+  2573034640U,  // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+  2712063246U,  // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+  2573036267U,  // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+  1517210934U,  // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+  2711989549U,  // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+   564579638U,  // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+  2651639976U,  // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+   564579656U,  // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+  2712063307U,  // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+  3767668056U,  // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+  2651640314U,  // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+  2655621708U,  // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+  1638468980U,  // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712063358U,  // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+  2712063367U,  // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+  2712210826U,  // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1638469012U,  // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+  2651640826U,  // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+  3773713830U,  // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+  3773713842U,  // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+  3780349372U,  // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+  2651641140U,  // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+  2712210888U,  // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  2712210898U,  // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+  2651641452U,  // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+  2713538026U,  // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+  1517232230U,  // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+  1577899822U,  // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2712063489U,  // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+  2573060846U,  // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+  1640312342U,  // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+  1638469146U,  // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+   564579881U,  // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+  2714054192U,  // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+   564579899U,  // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+  2579038310U,  // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+  2636382310U,  // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2796339302U,  // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+  3646810719U,  // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+  2712063586U,  // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+  2735951467U,  // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+  2735951476U,  // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+  2579043322U,  // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+  2636382877U,  // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211087U,  // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+  3698180916U,  // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+  3710124950U,  // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+  2636383232U,  // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+  2712211127U,  // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+  2590994128U,  // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+  2590995323U,  // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+  1638469328U,  // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638469337U,  // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  3785805536U,  // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+  3785805544U,  // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+  3704817288U,  // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+  2712063742U,  // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+  3716761386U,  // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+  2714054415U,  // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  3774304024U,  // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+  2712063777U,  // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+  2712063787U,  // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+  3634888806U,  // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+  2636384544U,  // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+  3710790001U,  // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+  3710126492U,  // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+  3634892086U,  // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+  2639039076U,  // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+  3713444533U,  // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+  2693926767U,  // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+  2712063864U,  // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+  2579071078U,  // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+  3646841856U,  // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+  3716762698U,  // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+  3646843491U,  // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+  2579074358U,  // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+  2636385590U,  // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+  2645675406U,  // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+  1638322118U,  // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1638469583U,  // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+  2714054611U,  // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+  2652974800U,  // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+  3710127905U,  // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+  3785805808U,  // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+  2712211450U,  // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+  1638322180U,  // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+  2712064014U,  // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638469656U,  // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  1638469665U,  // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+  2712064036U,  // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+  2714054707U,  // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+  3785805879U,  // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+  2712064066U,  // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+  2712064076U,  // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+  2714054743U,  // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712064096U,  // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  1638322274U,  // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+  1638469739U,  // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+  1511325798U,  // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+  2692747392U,  // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+  2585069160U,  // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+  2573126390U,  // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+  1511329078U,  // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+  1638469800U,  // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712211626U,  // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2712211636U,  // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+  1638469823U,  // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+  1511333990U,  // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+  2636388142U,  // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211671U,  // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+  2573134583U,  // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+  1511337270U,  // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+  1638469881U,  // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+  2712064258U,  // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+  1638469892U,  // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+  1638469904U,  // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+  2650324992U,  // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+  1576583270U,  // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712064300U,  // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+  2255295336U,  // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+  2712064316U,  // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+  2585088098U,  // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+  2735952204U,  // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+  2712211799U,  // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+  1576583837U,  // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+  1181340494U,  // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+  2650325812U,  // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+  2650325910U,  // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+  2650325976U,  // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+  2579123510U,  // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+  2650326160U,  // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+  2714055072U,  // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2712064425U,  // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+  1181930390U,  // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+  2712211897U,  // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+  2714055108U,  // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+  2650326632U,  // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+  2650326694U,  // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+  2714055137U,  // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+  2714055148U,  // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+  2650326970U,  // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+  1638470138U,  // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638470147U,  // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2650327190U,  // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+  2255172441U,  // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+  2255246178U,  // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+  2650327452U,  // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+  2712064562U,  // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+  2650327627U,  // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+  3713452726U,  // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+  2700563016U,  // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+  2712064593U,  // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+  2650327954U,  // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+  2735952486U,  // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+  2735952497U,  // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+  2255328108U,  // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+  2712212100U,  // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+  1576586550U,  // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  2714055312U,  // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+  2712212126U,  // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+  1576586793U,  // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+  2579152998U,  // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+  2650328784U,  // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+  2714055364U,  // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+  3785806538U,  // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+  1576587206U,  // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+  2650329092U,  // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+  2650329186U,  // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+  2712064753U,  // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+  1181963162U,  // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+  2714055421U,  // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+  2714055432U,  // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+  2650329594U,  // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+  3785806619U,  // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+  2712212260U,  // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+  2714055472U,  // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+  1638323000U,  // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470466U,  // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  1638470475U,  // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+  1638323022U,  // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+  2712064854U,  // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+  2712064865U,  // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+  2712064872U,  // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+  1638323062U,  // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+  2712064894U,  // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+  2712064905U,  // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+  2712064915U,  // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+  1638323094U,  // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+  1638470559U,  // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+  1576589102U,  // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712212402U,  // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+  2712212409U,  // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+  1638470599U,  // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+  1576589466U,  // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  1638323000U,  // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470624U,  // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+  1638470631U,  // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+  2712065007U,  // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+  1638323194U,  // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+  2712065025U,  // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+  3646958337U,  // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+  2712065044U,  // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+  2585161907U,  // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+  2591134604U,  // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+  2591134714U,  // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+  1638323257U,  // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+  2712065091U,  // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+  2712065098U,  // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+  2712065109U,  // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+  2692748384U,  // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+  2585169206U,  // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+  2693928048U,  // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+  2585170766U,  // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+  2735953024U,  // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+  2695918731U,  // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+  3770471574U,  // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+  3785807002U,  // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+  2712065189U,  // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+  2712065196U,  // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+  3773125818U,  // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+  3766490305U,  // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+  2700563658U,  // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+  2735953107U,  // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+  2701890780U,  // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+  2712065251U,  // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+  3766490350U,  // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+  3774305530U,  // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+  2637728196U,  // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+  2712065291U,  // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+  2585186486U,  // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+  2639719095U,  // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+  2640382728U,  // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+  2641046361U,  // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+  2712212792U,  // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+  3646989312U,  // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+  3785807176U,  // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+  3646991109U,  // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+  2712065371U,  // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+  1638323558U,  // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+  2712212845U,  // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+  2591167846U,  // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+  1638323585U,  // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+  2585198694U,  // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+  2712212884U,  // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+  3711471393U,  // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+  2649673590U,  // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+  2712065455U,  // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+  1577259032U,  // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+  2712065473U,  // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+  2712212936U,  // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+  1579249931U,  // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+  2591178854U,  // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+  2735953374U,  // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+  2712212974U,  // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+  2655646287U,  // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+  2591182134U,  // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+  2656973553U,  // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+  1583895362U,  // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+  2712065556U,  // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+  1585222628U,  // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+  1523417190U,  // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  2597159670U,  // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+  2597160552U,  // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+  2597161110U,  // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+  1523420470U,  // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  2651002296U,  // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+  2657637906U,  // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+   363253046U,  // <7,7,7,7>: Cost 1 vdup3 RHS
+   363253046U,  // <7,7,7,u>: Cost 1 vdup3 RHS
+  1523417190U,  // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  1638471298U,  // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+  2712213132U,  // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+  2712213138U,  // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+  1523420470U,  // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  1638471338U,  // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+  1595840756U,  // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+   363253046U,  // <7,7,u,7>: Cost 1 vdup3 RHS
+   363253046U,  // <7,7,u,u>: Cost 1 vdup3 RHS
+  1638318080U,  // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638323923U,  // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+  1662211804U,  // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+  1638323941U,  // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+  2712065773U,  // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+  1662359286U,  // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+  1662359296U,  // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  2987150664U,  // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+  1638323986U,  // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+  1517469798U,  // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+  1638318900U,  // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+   564582190U,  // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+  1638324023U,  // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+  1517473078U,  // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+  2693928777U,  // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+  1517474710U,  // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+  1640462171U,  // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+   564582244U,  // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U,  // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2712065907U,  // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+  1638319720U,  // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638324101U,  // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+  1638318284U,  // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712065947U,  // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+  2700564387U,  // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+  1640314796U,  // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  1638324146U,  // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+  1638324156U,  // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+  1638319064U,  // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2700564435U,  // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+  1638320540U,  // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  1638324196U,  // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+  1638324207U,  // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+  2700564472U,  // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+  2695919610U,  // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+  1638324228U,  // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+  2712066061U,  // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+  1662212122U,  // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+  1662212132U,  // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+  2712066092U,  // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+  1638321360U,  // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638324287U,  // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+  1662359624U,  // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+  1640314961U,  // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  1638324314U,  // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+  1517502566U,  // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+  1574612693U,  // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+  2712066162U,  // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+  1638324351U,  // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+  1576603592U,  // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+  1577267225U,  // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+   564582554U,  // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+  1640462499U,  // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+   564582572U,  // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+  2712066223U,  // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+  2712066238U,  // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+  1581249023U,  // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+  1638324432U,  // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+  1638468980U,  // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712066274U,  // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+  1583903555U,  // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+  1640315117U,  // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+  1638324477U,  // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+  1638471936U,  // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+  2692970763U,  // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+  2700933399U,  // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+  2573347601U,  // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+  1638471976U,  // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+  1511551171U,  // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+  2712213815U,  // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+   363253046U,  // <7,u,7,7>: Cost 1 vdup3 RHS
+   363253046U,  // <7,u,7,u>: Cost 1 vdup3 RHS
+  1638324561U,  // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+  1638324571U,  // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+   564582757U,  // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+  1638324587U,  // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+  1638324601U,  // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+  1638324611U,  // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+   564582797U,  // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+   363253046U,  // <7,u,u,7>: Cost 1 vdup3 RHS
+   564582811U,  // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+   135053414U,  // <u,0,0,0>: Cost 1 vdup0 LHS
+  1611489290U,  // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611489300U,  // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  2568054923U,  // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1481706806U,  // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+  2555449040U,  // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+  2591282078U,  // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+  2591945711U,  // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+   135053414U,  // <u,0,0,u>: Cost 1 vdup0 LHS
+  1493655654U,  // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+  1860550758U,  // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+   537747563U,  // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+  2625135576U,  // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+  1493658934U,  // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+  2625135760U,  // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+  1517548447U,  // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+  2591290362U,  // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+   537747612U,  // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611489444U,  // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685231276U,  // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  1994768486U,  // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2685231294U,  // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611489484U,  // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2712068310U,  // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2625136570U,  // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+  2591962097U,  // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1611489516U,  // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2954067968U,  // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2685231356U,  // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+    72589981U,  // <u,0,3,2>: Cost 1 vrev LHS
+  2625137052U,  // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+  2625137154U,  // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+  2639071848U,  // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+  2639735481U,  // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+  2597279354U,  // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+    73032403U,  // <u,0,3,u>: Cost 1 vrev LHS
+  2687074636U,  // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+  1611489618U,  // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611489628U,  // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3629222038U,  // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+  2555481398U,  // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+  1551396150U,  // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  2651680116U,  // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+  2646150600U,  // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1611932050U,  // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2561458278U,  // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+  1863532646U,  // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+  2712068526U,  // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649689976U,  // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+  2220237489U,  // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+  2651680772U,  // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+  1577939051U,  // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+  2830077238U,  // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  1579266317U,  // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+  2555494502U,  // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+  2712068598U,  // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  1997750374U,  // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2655662673U,  // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+  2555497782U,  // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+  2651681459U,  // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+  2651681592U,  // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+  2651681614U,  // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+  1997750428U,  // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+  2567446630U,  // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+  2567447446U,  // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+  2567448641U,  // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+  2573421338U,  // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+  2567449910U,  // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+  2651682242U,  // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+  2591339429U,  // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+  2651682412U,  // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+  2567452462U,  // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+   135053414U,  // <u,0,u,0>: Cost 1 vdup0 LHS
+  1611489938U,  // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+   537748125U,  // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+  2685674148U,  // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1611932338U,  // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551399066U,  // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  1517605798U,  // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+  2830077481U,  // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+   537748179U,  // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+  1544101961U,  // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+  1558036582U,  // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+  2619171051U,  // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+  1611490038U,  // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2555522358U,  // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+  2712068871U,  // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2591355815U,  // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+  2597328512U,  // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+  1611490083U,  // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  1481785446U,  // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+   202162278U,  // <u,1,1,1>: Cost 1 vdup1 LHS
+  2555528808U,  // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+  1611490120U,  // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  1481788726U,  // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+  2689876828U,  // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  2591364008U,  // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+  2592691274U,  // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+   202162278U,  // <u,1,1,u>: Cost 1 vdup1 LHS
+  1499709542U,  // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+  2689876871U,  // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2631116445U,  // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+      835584U,  // <u,1,2,3>: Cost 0 copy LHS
+  1499712822U,  // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+  2689876907U,  // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  2631780282U,  // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+  1523603074U,  // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+      835584U,  // <u,1,2,u>: Cost 0 copy LHS
+  1487773798U,  // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+  1611490264U,  // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685232094U,  // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2018746470U,  // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+  1487777078U,  // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+  1611490304U,  // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2685674505U,  // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2640407307U,  // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+  1611490327U,  // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  1567992749U,  // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+  2693121070U,  // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+  2693194807U,  // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+  1152386432U,  // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+  2555555126U,  // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+  1558039862U,  // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+  2645716371U,  // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+  2597361284U,  // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+  1152755117U,  // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+  1481818214U,  // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+  2555560694U,  // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+  2555561576U,  // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+  1611490448U,  // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  1481821494U,  // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+  2651025435U,  // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+  2651689068U,  // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+  2823966006U,  // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+  1611932861U,  // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  2555568230U,  // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+  2689877199U,  // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2712069336U,  // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2685232353U,  // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  2555571510U,  // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+  2689877235U,  // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  2657661765U,  // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+  1584583574U,  // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+  1585247207U,  // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+  2561548390U,  // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+  2561549681U,  // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+  2573493926U,  // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+  2042962022U,  // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2561551670U,  // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+  2226300309U,  // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+  2658325990U,  // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+  2658326124U,  // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+  2042962027U,  // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1481842790U,  // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+   202162278U,  // <u,1,u,1>: Cost 1 vdup1 LHS
+  2685674867U,  // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+      835584U,  // <u,1,u,3>: Cost 0 copy LHS
+  1481846070U,  // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+  1611933077U,  // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685674910U,  // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  1523652232U,  // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+      835584U,  // <u,1,u,u>: Cost 0 copy LHS
+  1544110154U,  // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+  1545437286U,  // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  1545437420U,  // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+  2685232589U,  // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2619179346U,  // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+  2712069606U,  // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+  2689877484U,  // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2659656273U,  // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+  1545437853U,  // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+  1550082851U,  // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+  2619179828U,  // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+  2619179926U,  // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+  2685232671U,  // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2555604278U,  // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+  2619180176U,  // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+  2689877564U,  // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  2602718850U,  // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  1158703235U,  // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+  1481867366U,  // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+  2555609846U,  // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+   269271142U,  // <u,2,2,2>: Cost 1 vdup2 LHS
+  1611490930U,  // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  1481870646U,  // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+  2689877640U,  // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2619180986U,  // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+  2593436837U,  // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+   269271142U,  // <u,2,2,u>: Cost 1 vdup2 LHS
+   408134301U,  // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+  1481876214U,  // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1481877096U,  // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1880326246U,  // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+   408137014U,  // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+  1529654992U,  // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1529655802U,  // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1529656314U,  // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   408139566U,  // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+  1567853468U,  // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  2561598362U,  // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+  2555627214U,  // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+  2685232918U,  // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2555628854U,  // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+  1545440566U,  // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1571982740U,  // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+  2592125957U,  // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1545440809U,  // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+  2555633766U,  // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+  2561606550U,  // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+  2689877856U,  // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685233000U,  // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1158441059U,  // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+  2645725188U,  // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+  2689877892U,  // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2823900470U,  // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+  1158736007U,  // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+  1481900134U,  // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+  2555642614U,  // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+  2555643496U,  // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+  1611491258U,  // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  1481903414U,  // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+  2689877964U,  // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689877973U,  // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2645726030U,  // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+  1611933671U,  // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  1585919033U,  // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+  2573566710U,  // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+  2567596115U,  // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+  1906901094U,  // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+  2555653430U,  // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+  2800080230U,  // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+  2980643164U,  // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2645726828U,  // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+  1906901099U,  // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+   408175266U,  // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+  1545443118U,  // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+   269271142U,  // <u,2,u,2>: Cost 1 vdup2 LHS
+  1611491416U,  // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+   408177974U,  // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+  1545443482U,  // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1726339226U,  // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+  1529697274U,  // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   408180526U,  // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+  1544781824U,  // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+   471040156U,  // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+  1544781988U,  // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2618523900U,  // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+  1544782162U,  // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2238188352U,  // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+  2623169023U,  // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2238335826U,  // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+   471040669U,  // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544782582U,  // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544782644U,  // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544782742U,  // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544782808U,  // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618524733U,  // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544782992U,  // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618524897U,  // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2703517987U,  // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+  1544783213U,  // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  1529716838U,  // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+  1164167966U,  // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+  1544783464U,  // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544783526U,  // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1529720118U,  // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+  2618525544U,  // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544783802U,  // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2704181620U,  // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+  1544783931U,  // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1544784022U,  // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  1487922559U,  // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+  1493895256U,  // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+   336380006U,  // <u,3,3,3>: Cost 1 vdup3 LHS
+  1544784386U,  // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2824054478U,  // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2238286668U,  // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+  2954069136U,  // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+   336380006U,  // <u,3,3,u>: Cost 1 vdup3 LHS
+  1487929446U,  // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+  1487930752U,  // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+  2623171644U,  // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2561673366U,  // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+  1487932726U,  // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+   471043382U,  // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592561012U,  // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2238368598U,  // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+   471043625U,  // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+  2555707494U,  // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+  1574645465U,  // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+  2567653106U,  // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+  2555709954U,  // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+  1592561606U,  // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592561668U,  // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592561762U,  // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1750314294U,  // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1750314295U,  // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2623172897U,  // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2561688962U,  // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+  1581281795U,  // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+  2706541204U,  // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+  2623173261U,  // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  1164495686U,  // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+  1592562488U,  // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592562510U,  // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1164716897U,  // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+  1487954022U,  // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+  1487955331U,  // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+  1493928028U,  // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+  2561697942U,  // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+  1487957302U,  // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+  2707352311U,  // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+  2655024623U,  // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+  1592563308U,  // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1487959854U,  // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+  1544787667U,  // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+   471045934U,  // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+  1549432709U,  // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+   336380006U,  // <u,3,u,3>: Cost 1 vdup3 LHS
+  1544788031U,  // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+   471046298U,  // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+  1549433040U,  // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1750314537U,  // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+   471046501U,  // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+  2625167360U,  // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+  1551425638U,  // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  2619195630U,  // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+  2619343104U,  // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2625167698U,  // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+  1638329234U,  // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638329244U,  // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787803556U,  // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1551426205U,  // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+  2555748454U,  // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+  2625168180U,  // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+  1551426503U,  // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+  2625168344U,  // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+  2555751734U,  // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+  1860554038U,  // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+  2689879022U,  // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2592248852U,  // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1555408301U,  // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+  2555756646U,  // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+  2625168943U,  // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+  2625169000U,  // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+  2619197134U,  // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+  2555759926U,  // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+  2712071222U,  // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  1994771766U,  // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U,  // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1994771784U,  // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+  2625169558U,  // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+  2567709594U,  // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+  2567710817U,  // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+  2625169820U,  // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+  2625169922U,  // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+  2954069710U,  // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2954068172U,  // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3903849472U,  // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+  2954068174U,  // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  1505919078U,  // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+  2567717831U,  // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+  2567719010U,  // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+  2570373542U,  // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+   161926454U,  // <u,4,4,4>: Cost 1 vdup0 RHS
+  1551428918U,  // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  1638329572U,  // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  2594927963U,  // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+   161926454U,  // <u,4,4,u>: Cost 1 vdup0 RHS
+  1493983334U,  // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+  2689879301U,  // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1493985379U,  // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+  2567727254U,  // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+  1493986614U,  // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+  1863535926U,  // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+   537750838U,  // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+  2830110006U,  // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+   537750856U,  // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+  1482047590U,  // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+  2555790070U,  // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+  2555790952U,  // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+  2555791510U,  // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+  1482050870U,  // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+  2689879422U,  // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  1997753654U,  // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2712071562U,  // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1482053422U,  // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+  2567741542U,  // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+  2567742362U,  // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+  2567743589U,  // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+  2573716286U,  // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+  2567744822U,  // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+  2712071624U,  // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+    96808489U,  // <u,4,7,6>: Cost 1 vrev RHS
+  2651715180U,  // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+    96955963U,  // <u,4,7,u>: Cost 1 vrev RHS
+  1482063974U,  // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+  1551431470U,  // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  1494009958U,  // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+  2555807894U,  // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+   161926454U,  // <u,4,u,4>: Cost 1 vdup0 RHS
+  1551431834U,  // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+   537751081U,  // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+  2830110249U,  // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+   537751099U,  // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+  2631811072U,  // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+  1558069350U,  // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+  2619203823U,  // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+  2619867456U,  // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+  1546273106U,  // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2733010539U,  // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2597622682U,  // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+  1176539396U,  // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+  1558069917U,  // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+  1505968230U,  // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+  2624512887U,  // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+  2631811990U,  // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+  2618541056U,  // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+  1505971510U,  // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+  2627167419U,  // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+  2579714554U,  // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+  1638330064U,  // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638477529U,  // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  2561802342U,  // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+  2561803264U,  // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+  2631149217U,  // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+  1558071026U,  // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+  2561805622U,  // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+  2714062607U,  // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  2631813050U,  // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+  3092335926U,  // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+  1561389191U,  // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+  2561810534U,  // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+  2561811857U,  // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+  2631813474U,  // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+  2631813532U,  // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+  2619869698U,  // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+  3001847002U,  // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+  2954070530U,  // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2018749750U,  // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2018749751U,  // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2573762662U,  // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+  2620017634U,  // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  2573764338U,  // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+  2573765444U,  // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+  1570680053U,  // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+  1558072630U,  // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+  2645749143U,  // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+  1638330310U,  // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1558072873U,  // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+  1506000998U,  // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+  2561827984U,  // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+  2579744360U,  // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+  2579744918U,  // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+  1506004278U,  // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+   229035318U,  // <u,5,5,5>: Cost 1 vdup1 RHS
+  2712072206U,  // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638330392U,  // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+   229035318U,  // <u,5,5,u>: Cost 1 vdup1 RHS
+  1500037222U,  // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+  2561836436U,  // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+  2567809133U,  // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+  1500040006U,  // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+  1500040502U,  // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+  2714062935U,  // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712072288U,  // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+    27705344U,  // <u,5,6,7>: Cost 0 copy RHS
+    27705344U,  // <u,5,6,u>: Cost 0 copy RHS
+  1488101478U,  // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488102805U,  // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+  2561844840U,  // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+  2561845398U,  // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+  1488104758U,  // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+  1638330536U,  // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712072362U,  // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2042965302U,  // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+  1488107310U,  // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488109670U,  // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+  1488110998U,  // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+  2561853032U,  // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+  1500056392U,  // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+  1488112950U,  // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+   229035318U,  // <u,5,u,5>: Cost 1 vdup1 RHS
+  2954111490U,  // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+    27705344U,  // <u,5,u,7>: Cost 0 copy RHS
+    27705344U,  // <u,5,u,u>: Cost 0 copy RHS
+  2619211776U,  // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+  1545470054U,  // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1545470192U,  // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+  2255958969U,  // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+  1546797458U,  // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+  2720624971U,  // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+  2256180180U,  // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+  2960682294U,  // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+  1545470621U,  // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+  1182004127U,  // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+  2619212596U,  // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+  2619212694U,  // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+  2619212760U,  // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+  2626511979U,  // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+  2619212944U,  // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+  2714063264U,  // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2967326006U,  // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+  1182594023U,  // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+  1506050150U,  // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+  2579792630U,  // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+  2619213416U,  // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+  2619213478U,  // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+  1506053430U,  // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+  2633148309U,  // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+  2619213754U,  // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+  1638330874U,  // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638478339U,  // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2619213974U,  // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+  2255836074U,  // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+  2255909811U,  // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+  2619214236U,  // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+  1564715549U,  // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+  2639121006U,  // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+  3001847012U,  // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1880329526U,  // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+  1880329527U,  // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+  2567864422U,  // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+  2733011558U,  // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+  2567866484U,  // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+  2638458005U,  // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+  1570540772U,  // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1545473334U,  // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  1572015512U,  // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+  2960715062U,  // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+  1545473577U,  // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+  2567872614U,  // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+  2645757648U,  // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+  2567874490U,  // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+  2576501250U,  // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  1576660943U,  // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+  2645757956U,  // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+  2645758050U,  // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+  2824080694U,  // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+  1182626795U,  // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+  1506082918U,  // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+  2579825398U,  // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+  2645758458U,  // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+  2579826838U,  // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+  1506086198U,  // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+  2579828432U,  // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+   296144182U,  // <u,6,6,6>: Cost 1 vdup2 RHS
+  1638331202U,  // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+   296144182U,  // <u,6,6,u>: Cost 1 vdup2 RHS
+   432349286U,  // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+  1506091766U,  // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1506092648U,  // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506093206U,  // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   432352809U,  // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+  1506094800U,  // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  1506095610U,  // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1906904374U,  // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+   432355118U,  // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+   432357478U,  // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+  1545475886U,  // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1506100840U,  // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506101398U,  // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   432361002U,  // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+  1545476250U,  // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+   296144182U,  // <u,6,u,6>: Cost 1 vdup2 RHS
+  1880370486U,  // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+   432363310U,  // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+  1571356672U,  // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+   497614950U,  // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571356836U,  // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2573880146U,  // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+  1571357010U,  // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1512083716U,  // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+  2621874741U,  // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+  2585826298U,  // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+   497615517U,  // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571357430U,  // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571357492U,  // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571357590U,  // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1552114715U,  // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+  2573888822U,  // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+  1553441981U,  // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+  2627847438U,  // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+  2727408775U,  // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+  1555432880U,  // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+  2629838337U,  // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+  1188058754U,  // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+  1571358312U,  // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571358374U,  // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2632492869U,  // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+  2633156502U,  // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+  1560078311U,  // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+  2728072408U,  // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+  1561405577U,  // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+  1571358870U,  // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2627184913U,  // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+  2633820523U,  // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+  1571359132U,  // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571359234U,  // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  1512108295U,  // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+  1518080992U,  // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+  2640456465U,  // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+  1571359518U,  // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571359634U,  // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2573911067U,  // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+  2645101622U,  // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2573912918U,  // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+  1571359952U,  // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+   497618248U,  // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+  1571360116U,  // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2645102024U,  // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+   497618473U,  // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+  2645102152U,  // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1571360464U,  // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2645102334U,  // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2645102447U,  // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571360710U,  // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571360772U,  // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571360866U,  // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1571360936U,  // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1571361017U,  // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  1530044518U,  // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+  2645103016U,  // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571361274U,  // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2645103154U,  // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  1530047798U,  // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+  1188386474U,  // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+  1571361592U,  // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571361614U,  // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1571361695U,  // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1571361786U,  // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2573935616U,  // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+  2645103781U,  // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+  2573937497U,  // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+  1571362150U,  // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  1512141067U,  // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+  1518113764U,  // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+   363253046U,  // <u,7,7,7>: Cost 1 vdup3 RHS
+   363253046U,  // <u,7,7,u>: Cost 1 vdup3 RHS
+  1571362515U,  // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+   497620782U,  // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571362693U,  // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571362748U,  // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571362879U,  // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+   497621146U,  // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571363024U,  // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+   363253046U,  // <u,7,u,7>: Cost 1 vdup3 RHS
+   497621349U,  // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+   135053414U,  // <u,u,0,0>: Cost 1 vdup0 LHS
+   471081121U,  // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544822948U,  // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1616140005U,  // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  1544823122U,  // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  1512157453U,  // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+  1662220032U,  // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  1194457487U,  // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+   471081629U,  // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544823542U,  // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+   202162278U,  // <u,u,1,1>: Cost 1 vdup1 LHS
+   537753390U,  // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+  1544823768U,  // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  1494248758U,  // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+  1544823952U,  // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  1518138343U,  // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+  1640322907U,  // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+   537753444U,  // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+  1482309734U,  // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+  1194031451U,  // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+   269271142U,  // <u,u,2,2>: Cost 1 vdup2 LHS
+      835584U,  // <u,u,2,3>: Cost 0 copy LHS
+  1482313014U,  // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+  2618566504U,  // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544824762U,  // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  1638479788U,  // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+      835584U,  // <u,u,2,u>: Cost 0 copy LHS
+   408576723U,  // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+  1482318582U,  // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+   120371557U,  // <u,u,3,2>: Cost 1 vrev LHS
+   336380006U,  // <u,u,3,3>: Cost 1 vdup3 LHS
+   408579382U,  // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+  1616140271U,  // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  1530098170U,  // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1880329544U,  // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+   408581934U,  // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+  1488298086U,  // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+  1488299437U,  // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+  1659271204U,  // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  1194195311U,  // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+   161926454U,  // <u,u,4,4>: Cost 1 vdup0 RHS
+   471084342U,  // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+  1571368308U,  // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1640323153U,  // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+   471084585U,  // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+  1494278246U,  // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+  1571368656U,  // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  1494280327U,  // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+  1616140415U,  // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1494281526U,  // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+   229035318U,  // <u,u,5,5>: Cost 1 vdup1 RHS
+   537753754U,  // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+  1750355254U,  // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+   537753772U,  // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+  1482342502U,  // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+  2556084982U,  // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+  1571369466U,  // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  1611938000U,  // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1482345782U,  // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+  1194359171U,  // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+   296144182U,  // <u,u,6,6>: Cost 1 vdup2 RHS
+    27705344U,  // <u,u,6,7>: Cost 0 copy RHS
+    27705344U,  // <u,u,6,u>: Cost 0 copy RHS
+   432496742U,  // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+  1488324016U,  // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+  1494296713U,  // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+  1906901148U,  // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+   432500283U,  // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+  1506242256U,  // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+   120699277U,  // <u,u,7,6>: Cost 1 vrev RHS
+   363253046U,  // <u,u,7,7>: Cost 1 vdup3 RHS
+   432502574U,  // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+   408617688U,  // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+   471086894U,  // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+   537753957U,  // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+      835584U,  // <u,u,u,3>: Cost 0 copy LHS
+   408620342U,  // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+   471087258U,  // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+   537753997U,  // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+    27705344U,  // <u,u,u,7>: Cost 0 copy RHS
+      835584U,  // <u,u,u,u>: Cost 0 copy LHS
   0
 };
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index d5bc3f60b01a..ad51bc13edf0 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -28,7 +28,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 305b232e6a99..22d15b572ddd 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -201,6 +201,10 @@ def CPSR    : ARMReg<0, "cpsr">;
 def FPSCR   : ARMReg<1, "fpscr">;
 def ITSTATE : ARMReg<2, "itstate">;
 
+// Special Registers - only available in privileged mode.
+def FPSID   : ARMReg<0, "fpsid">;
+def FPEXC   : ARMReg<8, "fpexc">;
+
 // Register classes.
 //
 // pc  == Program Counter
@@ -256,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
 
 // restricted GPR register class. Many Thumb2 instructions allow the full
 // register range for operands, but have undefined behaviours when PC
-// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
 // via the BadReg() pseudo-code description.
 def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
                                             R7, R8, R9, R10, R11, R12, LR]> {
@@ -381,27 +385,29 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    // VFP2
+    // VFP2 / VFPv3-D16
     static const unsigned ARM_DPR_VFP2[] = {
       ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
       ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
       ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
       ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-    // VFP3
+    // VFP3: D8-D15 are callee saved and should be allocated last.
+    // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
     static const unsigned ARM_DPR_VFP3[] = {
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15,
       ARM::D16, ARM::D17, ARM::D18, ARM::D19,
       ARM::D20, ARM::D21, ARM::D22, ARM::D23,
       ARM::D24, ARM::D25, ARM::D26, ARM::D27,
-      ARM::D28, ARM::D29, ARM::D30, ARM::D31 };
+      ARM::D28, ARM::D29, ARM::D30, ARM::D31,
+      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
+      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
+      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
+      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
+
     DPRClass::iterator
     DPRClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3())
+      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
         return ARM_DPR_VFP3;
       return ARM_DPR_VFP2;
     }
@@ -410,7 +416,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
     DPRClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3())
+      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
         return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
       else
         return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
@@ -438,6 +444,29 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
                         [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
                          Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15]> {
   let SubRegClasses = [(DPR dsub_0, dsub_1)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // Q4-Q7 are callee saved and should be allocated last.
+    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+    static const unsigned ARM_QPR[] = {
+      ARM::Q8,  ARM::Q9,  ARM::Q10, ARM::Q11,
+      ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
+      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
+      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7 };
+
+    QPRClass::iterator
+    QPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return ARM_QPR;
+    }
+
+    QPRClass::iterator
+    QPRClass::allocation_order_end(const MachineFunction &MF) const {
+      return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
+    }
+  }];
 }
 
 // Subset of QPR that have 32-bit SPR subregs.
@@ -463,6 +492,27 @@ def QQPR : RegisterClass<"ARM", [v4i64],
                          [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR qsub_0, qsub_1)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // QQ2-QQ3 are callee saved and should be allocated last.
+    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+    static const unsigned ARM_QQPR[] = {
+      ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
+      ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
+
+    QQPRClass::iterator
+    QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return ARM_QQPR;
+    }
+
+    QQPRClass::iterator
+    QQPRClass::allocation_order_end(const MachineFunction &MF) const {
+      return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
+    }
+  }];
 }
 
 // Subset of QQPR that have 32-bit SPR subregs.
@@ -483,6 +533,26 @@ def QQQQPR : RegisterClass<"ARM", [v8i64],
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
                             dsub_4, dsub_5, dsub_6, dsub_7),
                        (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // QQQQ1 is callee saved and should be allocated last.
+    // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
+    static const unsigned ARM_QQQQPR[] = {
+      ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
+
+    QQQQPRClass::iterator
+    QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return ARM_QQQQPR;
+    }
+
+    QQQQPRClass::iterator
+    QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
+      return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
+    }
+  }];
 }
 
 // Condition code registers.
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index b60ccca46867..958c5c647013 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -14,42 +14,86 @@ def IIC_iALUx      : InstrItinClass;
 def IIC_iALUi      : InstrItinClass;
 def IIC_iALUr      : InstrItinClass;
 def IIC_iALUsi     : InstrItinClass;
+def IIC_iALUsir    : InstrItinClass;
 def IIC_iALUsr     : InstrItinClass;
+def IIC_iBITi      : InstrItinClass;
+def IIC_iBITr      : InstrItinClass;
+def IIC_iBITsi     : InstrItinClass;
+def IIC_iBITsr     : InstrItinClass;
 def IIC_iUNAr      : InstrItinClass;
 def IIC_iUNAsi     : InstrItinClass;
-def IIC_iUNAsr     : InstrItinClass;
+def IIC_iEXTr      : InstrItinClass;
+def IIC_iEXTAr     : InstrItinClass;
+def IIC_iEXTAsr    : InstrItinClass;
 def IIC_iCMPi      : InstrItinClass;
 def IIC_iCMPr      : InstrItinClass;
 def IIC_iCMPsi     : InstrItinClass;
 def IIC_iCMPsr     : InstrItinClass;
+def IIC_iTSTi      : InstrItinClass;
+def IIC_iTSTr      : InstrItinClass;
+def IIC_iTSTsi     : InstrItinClass;
+def IIC_iTSTsr     : InstrItinClass;
 def IIC_iMOVi      : InstrItinClass;
 def IIC_iMOVr      : InstrItinClass;
 def IIC_iMOVsi     : InstrItinClass;
 def IIC_iMOVsr     : InstrItinClass;
+def IIC_iMOVix2    : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld  : InstrItinClass;
+def IIC_iMVNi      : InstrItinClass;
+def IIC_iMVNr      : InstrItinClass;
+def IIC_iMVNsi     : InstrItinClass;
+def IIC_iMVNsr     : InstrItinClass;
 def IIC_iCMOVi     : InstrItinClass;
 def IIC_iCMOVr     : InstrItinClass;
 def IIC_iCMOVsi    : InstrItinClass;
 def IIC_iCMOVsr    : InstrItinClass;
+def IIC_iCMOVix2   : InstrItinClass;
 def IIC_iMUL16     : InstrItinClass;
 def IIC_iMAC16     : InstrItinClass;
 def IIC_iMUL32     : InstrItinClass;
 def IIC_iMAC32     : InstrItinClass;
 def IIC_iMUL64     : InstrItinClass;
 def IIC_iMAC64     : InstrItinClass;
-def IIC_iLoadi     : InstrItinClass;
-def IIC_iLoadr     : InstrItinClass;
-def IIC_iLoadsi    : InstrItinClass;
-def IIC_iLoadiu    : InstrItinClass;
-def IIC_iLoadru    : InstrItinClass;
-def IIC_iLoadsiu   : InstrItinClass;
-def IIC_iLoadm     : InstrItinClass;
-def IIC_iStorei    : InstrItinClass;
-def IIC_iStorer    : InstrItinClass;
-def IIC_iStoresi   : InstrItinClass;
-def IIC_iStoreiu   : InstrItinClass;
-def IIC_iStoreru   : InstrItinClass;
-def IIC_iStoresiu  : InstrItinClass;
-def IIC_iStorem    : InstrItinClass;
+def IIC_iLoad_i    : InstrItinClass;
+def IIC_iLoad_r    : InstrItinClass;
+def IIC_iLoad_si   : InstrItinClass;
+def IIC_iLoad_iu   : InstrItinClass;
+def IIC_iLoad_ru   : InstrItinClass;
+def IIC_iLoad_siu  : InstrItinClass;
+def IIC_iLoad_bh_i   : InstrItinClass;
+def IIC_iLoad_bh_r   : InstrItinClass;
+def IIC_iLoad_bh_si  : InstrItinClass;
+def IIC_iLoad_bh_iu  : InstrItinClass;
+def IIC_iLoad_bh_ru  : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i  : InstrItinClass;
+def IIC_iLoad_d_r  : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mu   : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mBr  : InstrItinClass<0>;  // micro-coded
+def IIC_iPop       : InstrItinClass<0>;  // micro-coded
+def IIC_iPop_Br    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoadiALU  : InstrItinClass;
+def IIC_iStore_i   : InstrItinClass;
+def IIC_iStore_r   : InstrItinClass;
+def IIC_iStore_si  : InstrItinClass;
+def IIC_iStore_iu  : InstrItinClass;
+def IIC_iStore_ru  : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i   : InstrItinClass;
+def IIC_iStore_bh_r   : InstrItinClass;
+def IIC_iStore_bh_si  : InstrItinClass;
+def IIC_iStore_bh_iu  : InstrItinClass;
+def IIC_iStore_bh_ru  : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i   : InstrItinClass;
+def IIC_iStore_d_r   : InstrItinClass;
+def IIC_iStore_d_ru  : InstrItinClass;
+def IIC_iStore_m   : InstrItinClass<0>;  // micro-coded
+def IIC_iStore_mu  : InstrItinClass<0>;  // micro-coded
+def IIC_Preload    : InstrItinClass;
 def IIC_Br         : InstrItinClass;
 def IIC_fpSTAT     : InstrItinClass;
 def IIC_fpUNA32    : InstrItinClass;
@@ -80,19 +124,76 @@ def IIC_fpSQRT32   : InstrItinClass;
 def IIC_fpSQRT64   : InstrItinClass;
 def IIC_fpLoad32   : InstrItinClass;
 def IIC_fpLoad64   : InstrItinClass;
-def IIC_fpLoadm    : InstrItinClass;
+def IIC_fpLoad_m   : InstrItinClass<0>;  // micro-coded
+def IIC_fpLoad_mu  : InstrItinClass<0>;  // micro-coded
 def IIC_fpStore32  : InstrItinClass;
 def IIC_fpStore64  : InstrItinClass;
-def IIC_fpStorem   : InstrItinClass;
+def IIC_fpStore_m  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>;  // micro-coded
 def IIC_VLD1       : InstrItinClass;
+def IIC_VLD1x2     : InstrItinClass;
+def IIC_VLD1x3     : InstrItinClass;
+def IIC_VLD1x4     : InstrItinClass;
+def IIC_VLD1u      : InstrItinClass;
+def IIC_VLD1x2u    : InstrItinClass;
+def IIC_VLD1x3u    : InstrItinClass;
+def IIC_VLD1x4u    : InstrItinClass;
+def IIC_VLD1ln     : InstrItinClass;
+def IIC_VLD1lnu    : InstrItinClass;
+def IIC_VLD1dup    : InstrItinClass;
+def IIC_VLD1dupu   : InstrItinClass;
 def IIC_VLD2       : InstrItinClass;
+def IIC_VLD2x2     : InstrItinClass;
+def IIC_VLD2u      : InstrItinClass;
+def IIC_VLD2x2u    : InstrItinClass;
+def IIC_VLD2ln     : InstrItinClass;
+def IIC_VLD2lnu    : InstrItinClass;
+def IIC_VLD2dup    : InstrItinClass;
+def IIC_VLD2dupu   : InstrItinClass;
 def IIC_VLD3       : InstrItinClass;
+def IIC_VLD3ln     : InstrItinClass;
+def IIC_VLD3u      : InstrItinClass;
+def IIC_VLD3lnu    : InstrItinClass;
+def IIC_VLD3dup    : InstrItinClass;
+def IIC_VLD3dupu   : InstrItinClass;
 def IIC_VLD4       : InstrItinClass;
-def IIC_VST        : InstrItinClass;
+def IIC_VLD4ln     : InstrItinClass;
+def IIC_VLD4u      : InstrItinClass;
+def IIC_VLD4lnu    : InstrItinClass;
+def IIC_VLD4dup    : InstrItinClass;
+def IIC_VLD4dupu   : InstrItinClass;
+def IIC_VST1       : InstrItinClass;
+def IIC_VST1x2     : InstrItinClass;
+def IIC_VST1x3     : InstrItinClass;
+def IIC_VST1x4     : InstrItinClass;
+def IIC_VST1u      : InstrItinClass;
+def IIC_VST1x2u    : InstrItinClass;
+def IIC_VST1x3u    : InstrItinClass;
+def IIC_VST1x4u    : InstrItinClass;
+def IIC_VST1ln     : InstrItinClass;
+def IIC_VST1lnu    : InstrItinClass;
+def IIC_VST2       : InstrItinClass;
+def IIC_VST2x2     : InstrItinClass;
+def IIC_VST2u      : InstrItinClass;
+def IIC_VST2x2u    : InstrItinClass;
+def IIC_VST2ln     : InstrItinClass;
+def IIC_VST2lnu    : InstrItinClass;
+def IIC_VST3       : InstrItinClass;
+def IIC_VST3u      : InstrItinClass;
+def IIC_VST3ln     : InstrItinClass;
+def IIC_VST3lnu    : InstrItinClass;
+def IIC_VST4       : InstrItinClass;
+def IIC_VST4u      : InstrItinClass;
+def IIC_VST4ln     : InstrItinClass;
+def IIC_VST4lnu    : InstrItinClass;
 def IIC_VUNAD      : InstrItinClass;
 def IIC_VUNAQ      : InstrItinClass;
 def IIC_VBIND      : InstrItinClass;
 def IIC_VBINQ      : InstrItinClass;
+def IIC_VPBIND     : InstrItinClass;
+def IIC_VFMULD     : InstrItinClass;
+def IIC_VFMULQ     : InstrItinClass;
+def IIC_VMOV       : InstrItinClass;
 def IIC_VMOVImm    : InstrItinClass;
 def IIC_VMOVD      : InstrItinClass;
 def IIC_VMOVQ      : InstrItinClass;
@@ -101,6 +202,7 @@ def IIC_VMOVID     : InstrItinClass;
 def IIC_VMOVISL    : InstrItinClass;
 def IIC_VMOVSI     : InstrItinClass;
 def IIC_VMOVDI     : InstrItinClass;
+def IIC_VMOVN      : InstrItinClass;
 def IIC_VPERMD     : InstrItinClass;
 def IIC_VPERMQ     : InstrItinClass;
 def IIC_VPERMQ3    : InstrItinClass;
@@ -152,7 +254,7 @@ def IIC_VTBX4      : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
 
-def GenericItineraries : ProcessorItineraries<[], []>;
+def GenericItineraries : ProcessorItineraries<[], [], []>;
 
 include "ARMScheduleV6.td"
 include "ARMScheduleA8.td"
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 282abca98803..8d86c01dc741 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -14,18 +14,17 @@
 //
 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
 // Functional Units.
-def A8_Issue   : FuncUnit; // issue
 def A8_Pipe0   : FuncUnit; // pipeline 0
 def A8_Pipe1   : FuncUnit; // pipeline 1
-def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
-def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
+def A8_LSPipe  : FuncUnit; // Load / store pipeline
 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
 def A8_NLSPipe : FuncUnit; // NEON LS pipe
 //
 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
 //
 def CortexA8Itineraries : ProcessorItineraries<
-  [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
+  [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+  [], [
   // Two fully-pipelined integer ALU pipelines
   //
   // No operand cycles
@@ -35,12 +34,23 @@ def CortexA8Itineraries : ProcessorItineraries<
   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
   //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
   // Unary Instructions that produce a result
   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
   //
   // Compare instructions
   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
@@ -48,124 +58,184 @@ def CortexA8Itineraries : ProcessorItineraries<
   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
   //
+  // Test instructions
+  InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
   // Move instructions, unconditional
   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+  InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                             InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LSPipe]>], [5]>,
   //
   // Move instructions, conditional
   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
 
   // Integer multiply pipeline
   // Result written in E5, but that is relative to the last cycle of multicycle,
   // so we use 6 for those cases
   //
   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
-  InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>,
-                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>,
-                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
-  InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>,
-                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
-  InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>,
-                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
-  InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>,
-                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
 
   // Integer load pipeline
   //
-  // loads have an extra cycle of latency, but are fully pipelined
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  //
   // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+  // FIXME: lsl by 2 takes 1 cycle.
+  InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
-  //
-  // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>]>,
+  InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple, def is the 5th operand. Pipeline 0 only.
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+                               [1, 2, 1, 1, 3]>,
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+  //
+  // Push, def is the 3th operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+                               [1, 1, 3]>,
 
-  // Integer store pipeline
   //
-  // use A8_Issue to enforce the 1 load/store per cycle limit
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                InstrStage<1, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+  // Integer store pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
-  //
-  // Store multiple
-  InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>]>,
+  InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                   InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple. Pipeline 0 only.
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>], [2]>,
+
+  //
+  // Preload
+  InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
 
   // Branch
   //
@@ -178,440 +248,786 @@ def CortexA8Itineraries : ProcessorItineraries<
   // possible.
   //
   // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                              InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                              InstrStage<1, [A8_NLSPipe]>], [20]>,
   //
   // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
   //
   // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<4, [A8_NPipe], 0>,
                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
   //
   // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<4, [A8_NPipe], 0>,
                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
   //
   // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<7, [A8_NPipe], 0>,
                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
   //
   // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<5, [A8_NPipe], 0>,
                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<8, [A8_NPipe], 0>,
                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<8, [A8_NPipe], 0>,
                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
   //
   // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
   //
   // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<9, [A8_NPipe], 0>,
                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
   //
   // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
   //
   // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<11, [A8_NPipe], 0>,
                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
   //
   // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<19, [A8_NPipe], 0>,
                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<20, [A8_NPipe], 0>,
                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<29, [A8_NPipe], 0>,
                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<19, [A8_NPipe], 0>,
                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<29, [A8_NPipe], 0>,
                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [20, 20, 1]>,
+
   //
   // Single-precision FP Load
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [2, 1]>,
   //
   // Double-precision FP Load
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0], 0>,
-                               InstrStage<1, [A8_Pipe1]>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [2, 1]>,
   //
   // FP Load Multiple
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>,
-                               InstrStage<2, [A8_Pipe0], 0>,
-                               InstrStage<2, [A8_Pipe1]>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
   //
   // Single-precision FP Store
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [1, 1]>,
   //
   // Double-precision FP Store
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0], 0>,
-                               InstrStage<1, [A8_Pipe1]>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [1, 1]>,
   //
   // FP Store Multiple
-  // use A8_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
-                               InstrStage<2, [A8_Pipe0], 0>,
-                               InstrStage<2, [A8_Pipe1]>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                InstrStage<1, [A8_NLSPipe], 0>,
+                                InstrStage<1, [A8_LSPipe]>,
+                                InstrStage<1, [A8_NLSPipe], 0>,
+                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
 
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
   //
   // VLD1
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1]>,
+  // VLD1x2
+  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD1x3
+  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 1]>,
+  //
+  // VLD1x4
+  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD1u
+  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD1x2u
+  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1]>,
+  //
+  // VLD1x3u
+  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 2, 1]>,
+  //
+  // VLD1x4u
+  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD1ln
+  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 1, 1, 1]>,
+  //
+  // VLD1lnu
+  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 2, 1, 1, 1, 1]>,
+  //
+  // VLD1dup
+  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // VLD1dupu
+  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1, 1]>,
   //
   // VLD2
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2x2
+  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD2ln
+  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 3, 1, 1, 1, 1]>,
+  //
+  // VLD2u
+  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1, 1, 1]>,
+  //
+  // VLD2x2u
+  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD2lnu
+  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 3, 2, 1, 1, 1, 1, 1]>,
+  //
+  // VLD2dup
+  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2dupu
+  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1, 1]>,
   //
   // VLD3
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3ln
+  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3u
+  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 2, 1]>,
+  //
+  // VLD3lnu
+  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3dup
+  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 1]>,
+  //
+  // VLD3dupu
+  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 2, 1, 1]>,
   //
   // VLD4
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4dup
+  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD4dupu
+  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1, 1]>,
+  //
+  // VST1
+  InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1]>,
   //
-  // VST
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>,
-                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                               InstrStage<1, [A8_LdSt0], 0>,
-                               InstrStage<1, [A8_NLSPipe]>]>,
+  // VST1x2
+  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST1x3
+  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST1x4
+  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1u
+  InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST1x2u
+  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST1x3u
+  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST1x4u
+  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1ln
+  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1]>,
+  //
+  // VST1lnu
+  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST2
+  InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2x2
+  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2u
+  InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST2x2u
+  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2ln
+  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2lnu
+  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST3
+  InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3u
+  InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST3ln
+  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3lnu
+  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST4
+  InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4u
+  InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4ln
+  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4lnu
+  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
   //
   // Double-register FP Unary
-  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
   //
   // Quad-register FP Unary
   // Result written in N5, but that is relative to the last cycle of multicycle,
   // so we use 6 for those cases
-  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
   //
   // Double-register FP Binary
-  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
   //
+  // VPADD, etc.
+  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // Double-register FP VMUL
+  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+  //
   // Quad-register FP Binary
   // Result written in N5, but that is relative to the last cycle of multicycle,
   // so we use 6 for those cases
-  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
   //
+  // Quad-register FP VMUL
+  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
   // Move Immediate
-  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3]>,
   //
   // Double-register Permute Move
-  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
   //
   // Quad-register Permute Move
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 3 for those cases
-  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
   //
   // Integer to Single-precision Move
-  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
   //
   // Integer to Double-precision Move
-  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
   //
   // Single-precision to Integer Move
-  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
   //
   // Double-precision to Integer Move
-  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
   //
   // Integer to Lane Move
-  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
   //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [2, 1]>,
+  //
   // Double-register Permute
-  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
   //
   // Quad-register Permute
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 3 for those cases
-  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
   //
   // Quad-register Permute (3 cycle issue)
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 4 for those cases
-  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>,
                                InstrStage<1, [A8_NPipe], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
   //
   // Double-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
   //
   // Quad-register FP Multiple-Accumulate
   // Result written in N9, but that is relative to the last cycle of multicycle,
   // so we use 10 for those cases
-  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
   //
   // Double-register Reciprical Step
-  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
   //
   // Quad-register Reciprical Step
-  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
   //
   // Double-register Integer Count
-  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
   //
   // Quad-register Integer Count
   // Result written in N3, but that is relative to the last cycle of multicycle,
   // so we use 4 for those cases
-  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
   //
   // Double-register Integer Unary
-  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
   //
   // Quad-register Integer Unary
-  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
   //
   // Double-register Integer Q-Unary
-  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
   //
   // Quad-register Integer CountQ-Unary
-  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
   //
   // Double-register Integer Binary
-  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
   //
   // Quad-register Integer Binary
-  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
   //
   // Double-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
   //
   // Quad-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
 
   //
   // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
   //
   // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
   //
   // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
   //
   // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
   //
   // Double-register Integer Shift
-  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
   //
   // Quad-register Integer Shift
-  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
   //
   // Double-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
   //
   // Quad-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
   //
   // Double-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
   //
   // Quad-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
   //
   // Double-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
   //
   // Quad-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
 
   //
   // Double-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
   //
   // Double-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
   //
   // Quad-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
   //
   // Quad-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>,
                                InstrStage<2, [A8_NLSPipe], 0>,
                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
   //
   // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
   //
   // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
   //
   // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
   //
   // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>,
                                InstrStage<2, [A8_NLSPipe], 0>,
                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
   //
   // Double-register VEXT
-  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
   //
   // Quad-register VEXT
-  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
   //
   // VTB
-  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
-  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
-  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>,
                                InstrStage<1, [A8_NPipe], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>,
                                InstrStage<1, [A8_NPipe], 0>,
                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
   //
   // VTBX
-  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
-  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>,
                                InstrStage<1, [A8_NPipe], 0>,
                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe]>,
                                InstrStage<1, [A8_NPipe], 0>,
                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index df2f896a8d4b..82c6735f1b14 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -16,130 +16,417 @@
 // Reference Manual".
 //
 // Functional units
-def A9_Pipe0   : FuncUnit; // pipeline 0
-def A9_Pipe1   : FuncUnit; // pipeline 1
-def A9_LSPipe  : FuncUnit; // LS pipe
-def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A9_Issue0  : FuncUnit; // Issue 0
+def A9_Issue1  : FuncUnit; // Issue 1
+def A9_Branch  : FuncUnit; // Branch
+def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1    : FuncUnit; // ALU pipeline 1
+def A9_AGU     : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe   : FuncUnit; // NEON pipeline
+def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit  : FuncUnit; // L/S Unit
 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
 
-// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
-//
+// Bypasses
+def A9_LdBypass : Bypass;
+
 def CortexA9Itineraries : ProcessorItineraries<
-  [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
+  [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+   A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+  [A9_LdBypass], [
   // Two fully-pipelined integer ALU pipelines
-  // FIXME: There are no operand latencies for these instructions at all!
+
   //
   // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
-  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_AGU], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [5]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                              [1]>,
+  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                              [1, 1], [NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                              [2, 1]>,
+  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                              [3, 1, 1]>,
   //
   // No operand cycles
-  InstrItinData<IIC_iALUx    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+  InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
   //
   // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
-  InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
-  InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                            [1, 1], [NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                            [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                            [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+  InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                            [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                            [3, 1, 1, 1],
+                            [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+  InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
   //
   // Unary Instructions that produce a result
-  InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+
+  // CLZ, RBIT, etc.
+  InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+  // BFC, BFI, UBFX, SBFX
+  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
   //
   // Compare instructions
-  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [1], [A9_LdBypass]>,
+  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                                [1, 1], [A9_LdBypass, NoBypass]>,
+  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                              [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
   //
   // Move instructions, conditional
-  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+  // FIXME: Correctly model the extra input dep on the destination.
+  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
 
   // Integer multiply pipeline
   //
-  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
-                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
-  InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
-                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
-  InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
-                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
-  InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
-                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
-  InstrItinData<IIC_iMUL64   , [InstrStage<2, [A9_Pipe1], 0>,
-                                InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
-  InstrItinData<IIC_iMAC64   , [InstrStage<2, [A9_Pipe1], 0>,
-                                InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+  InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>],
+                              [3, 1, 1, 1]>,
+  InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>],
+                              [4, 1, 1, 1]>,
+  InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+  InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0]>],
+                              [4, 5, 1, 1]>,
   // Integer load pipeline
   // FIXME: The timings are some rough approximations
   //
   // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 1], [A9_LdBypass]>,
+  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1], [A9_LdBypass]>,
   //
   // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1, 1], [A9_LdBypass]>,
   //
   // Scaled register offset
-  InstrItinData<IIC_iLoadsi  , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit], 0>],
+                                [4, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [5, 1, 1], [A9_LdBypass]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 2, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1], [A9_LdBypass]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 2, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1, 1], [A9_LdBypass]>,
   //
   // Scaled register offset with update
-  InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [5, 4, 1, 1], [A9_LdBypass]>,
+  //
+  // Load multiple, def is the 5th operand.
+  // FIXME: This assumes 3 to 4 registers.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [1, 1, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [2, 1, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>,
+                                InstrStage<1, [A9_Branch]>],
+                               [1, 2, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [1, 1, 3],
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Pop + branch, def is the 3rd operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>,
+                                InstrStage<1, [A9_Branch]>],
+                               [1, 1, 3],
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+
   //
-  // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>]>,
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<1, [A9_LSUnit]>,
+                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [2, 1]>,
 
   // Integer store pipeline
   ///
   // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [ A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
   //
   // Scaled register offset
-  InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [2, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [3, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [3, 1, 1, 1]>,
   //
   // Scaled register offset with update
-  InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                    InstrStage<1, [A9_MUX0], 0>,
+                                    InstrStage<1, [A9_AGU], 0>,
+                                    InstrStage<1, [A9_LSUnit]>],
+                                   [2, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                    InstrStage<1, [A9_MUX0], 0>,
+                                    InstrStage<2, [A9_AGU], 1>,
+                                    InstrStage<1, [A9_LSUnit]>],
+                                   [3, 1, 1, 1]>,
   //
   // Store multiple
-  InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
-                                InstrStage<1, [A9_LSPipe]>]>,
+  InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<2, [A9_LSUnit]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<2, [A9_LSUnit]>], [2]>,
+
+  //
+  // Preload
+  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
+                                InstrStage<1, [A9_Issue1], 0>,
+                                InstrStage<1, [A9_Branch]>]>,
 
   // VFP and NEON shares the same register file. This means that every VFP
   // instruction should wait for full completion of the consecutive NEON
@@ -159,687 +446,1379 @@ def CortexA9Itineraries : ProcessorItineraries<
   // Issue through integer pipeline, and execute in NEON unit.
 
   // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                              InstrStage<1, [A9_MUX0], 0>,
+                              InstrStage<1, [A9_DRegsVFP], 0, Required>,
                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                              InstrStage<1, [A9_Pipe1]>,
-                              InstrStage<1, [A9_NPipe]>]>,
+                              InstrStage<1, [A9_NPipe]>],
+                             [1]>,
   //
   // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra latency cycles since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
   //
   // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra latency cycles since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
 
   //
   // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra latency cycles since wbck is 4 cycles
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
   //
   // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra latency cycles since wbck is 4 cycles
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
   //
   // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
 
   //
   // Single to Half FP Convert
-  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Half to Single FP Convert
-  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
 
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
   //
   // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
   //
   // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 1, 1]>,
   //
   // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 1, 1]>,
   //
   // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [8, 1, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
-                               InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
+                               InstrStage<2,  [A9_NPipe]>],
+                              [9, 1, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
-                               InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
+                               InstrStage<10, [A9_NPipe]>],
+                              [15, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
-                               InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
+                               InstrStage<20, [A9_NPipe]>],
+                              [25, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<13, [A9_NPipe]>], [17, 1]>,
+                               InstrStage<13, [A9_NPipe]>],
+                              [17, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1,  [A9_Pipe1]>,
-                               InstrStage<28, [A9_NPipe]>], [32, 1]>,
+                               InstrStage<28, [A9_NPipe]>],
+                              [32, 1]>,
 
   //
   // Integer to Single-precision Move
-  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra 1 latency cycle since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
   //
   // Integer to Double-precision Move
-  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                // Extra 1 latency cycle since wbck is 2 cycles
                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1, 1]>,
   //
   // Single-precision to Integer Move
-  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
   //
   // Double-precision to Integer Move
-  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1, 1]>,
   //
   // Single-precision FP Load
-  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
   //
   // Double-precision FP Load
-  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  // FIXME: Result latency is 1 if address is 64-bit aligned.
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1]>,
   //
   // FP Load Multiple
-  InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
   //
   // Single-precision FP Store
-  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
   //
   // Double-precision FP Store
-  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
   //
   // FP Store Multiple
-  InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                                InstrStage<1, [A9_NPipe], 0>,
+                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
   // NEON
-  // Issue through integer pipeline, and execute in NEON unit.
-  // FIXME: Neon pipeline and LdSt unit are multiplexed.
-  //        Add some syntactic sugar to model this!
   // VLD1
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+  // FIXME: Conservatively assume insufficent alignment.
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1]>,
+  // VLD1x2
+  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 1]>,
+  // VLD1x3
+  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 1]>,
+  // VLD1x4
+  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 1]>,
+  // VLD1u
+  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 1]>,
+  // VLD1x2u
+  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 2, 1]>,
+  // VLD1x3u
+  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 2, 1]>,
+  // VLD1x4u
+  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD1ln
+  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 1, 1, 1]>,
+  //
+  // VLD1lnu
+  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 2, 1, 1, 1, 1]>,
+  //
+  // VLD1dup
+  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 1]>,
+  //
+  // VLD1dupu
+  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 2, 1, 1]>,
   //
   // VLD2
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 1]>,
+  //
+  // VLD2x2
+  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 4, 3, 4, 1]>,
+  //
+  // VLD2ln
+  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 4, 1, 1, 1, 1]>,
+  //
+  // VLD2u
+  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 2, 1, 1, 1]>,
+  //
+  // VLD2x2u
+  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 4, 3, 4, 2, 1]>,
+  //
+  // VLD2lnu
+  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 4, 2, 1, 1, 1, 1, 1]>,
+  //
+  // VLD2dup
+  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 1]>,
+  //
+  // VLD2dupu
+  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 2, 1, 1]>,
   //
   // VLD3
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 1]>,
+  //
+  // VLD3ln
+  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3u
+  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 2, 1]>,
+  //
+  // VLD3lnu
+  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3dup
+  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3dupu
+  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 2, 1, 1]>,
   //
   // VLD4
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
-  //
-  // VST
-  // FIXME: We don't model this instruction properly
-  InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 6 cycles
-                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1], 0>,
-                               InstrStage<1, [A9_LSPipe]>,
-                               InstrStage<1, [A9_NPipe]>]>,
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4dup
+  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4dupu
+  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 2, 1, 1]>,
+  //
+  // VST1
+  InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  //
+  // VST1x2
+  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST1x3
+  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST1x4
+  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1u
+  InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST1x2u
+  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST1x3u
+  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST1x4u
+  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1ln
+  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  //
+  // VST1lnu
+  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST2
+  InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2x2
+  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2u
+  InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST2x2u
+  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2ln
+  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2lnu
+  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST3
+  InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3u
+  InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST3ln
+  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3lnu
+  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST4
+  InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4u
+  InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4ln
+  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4lnu
+  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+
   //
   // Double-register Integer Unary
-  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2]>,
   //
   // Quad-register Integer Unary
-  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2]>,
   //
   // Double-register Integer Q-Unary
-  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Quad-register Integer CountQ-Unary
-  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
   //
   // Double-register Integer Binary
-  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
   //
   // Quad-register Integer Binary
-  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
   //
   // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 1]>,
   //
   // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 1]>,
   //
   // Double-register Integer Shift
-  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1, 1]>,
   //
   // Quad-register Integer Shift
-  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1, 1]>,
   //
   // Double-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
   //
   // Quad-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
   //
   // Double-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 2]>,
   //
   // Quad-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 2]>,
   //
   // Double-register Integer Subtract (4 cycle)
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 1]>,
   //
   // Quad-register Integer Subtract (4 cycle)
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 1]>,
 
   //
   // Double-register Integer Count
-  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
   //
   // Quad-register Integer Count
   // Result written in N3, but that is relative to the last cycle of multicycle,
   // so we use 4 for those cases
-  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [4, 2, 2]>,
   //
   // Double-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
   //
   // Quad-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
   //
   // Double-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 1]>,
   //
   // Quad-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 1]>,
 
   //
   // Double-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 2, 2]>,
   //
   // Quad-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 2, 2]>,
 
   //
   // Double-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 2, 1]>,
   //
   // Quad-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [9, 2, 1]>,
   //
   // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 2, 2]>,
   //
   // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 3, 2, 1]>,
   //
   // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 3, 2, 2]>,
   //
   // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [9, 3, 2, 1]>,
+
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1,1]>,
   //
   // Move Immediate
-  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [3]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3]>,
   //
   // Double-register Permute Move
-  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe]>], [2, 1]>,
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
   //
   // Quad-register Permute Move
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
-  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 1]>,
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
   //
   // Integer to Single-precision Move
-  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
   //
   // Integer to Double-precision Move
-  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1, 1]>,
   //
   // Single-precision to Integer Move
-  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
   //
   // Double-precision to Integer Move
-  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 2, 1]>,
   //
   // Integer to Lane Move
-  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
-  // FIXME: all latencies are arbitrary, no information is available
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 1]>,
 
   //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1]>,
+  //
   // Double-register FP Unary
-  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [5, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2]>,
   //
   // Quad-register FP Unary
   // Result written in N5, but that is relative to the last cycle of multicycle,
   // so we use 6 for those cases
-  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 2]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 2]>,
   //
   // Double-register FP Binary
   // FIXME: We're using this itin for many instructions and [2, 2] here is too
   // optimistic.
-  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 7 cycles
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2, 2]>,
+
+  //
+  // VPADD, etc.
+  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 1, 1]>,
+  //
+  // Double-register FP VMUL
+  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2, 1]>,
   //
   // Quad-register FP Binary
   // Result written in N5, but that is relative to the last cycle of multicycle,
   // so we use 6 for those cases
   // FIXME: We're using this itin for many instructions and [2, 2] here is too
   // optimistic.
-  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 8 cycles
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 2, 2]>,
+  //
+  // Quad-register FP VMUL
+  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 2, 1]>,
   //
   // Double-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
   //
   // Quad-register FP Multiple-Accumulate
   // Result written in N9, but that is relative to the last cycle of multicycle,
   // so we use 10 for those cases
-  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 9 cycles
                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [8, 4, 2, 1]>,
   //
   // Double-register Reciprical Step
-  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 7 cycles
-                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 10 cycles
+                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [9, 2, 2]>,
   //
   // Quad-register Reciprical Step
-  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 9 cycles
-                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 11 cycles
+                               InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [10, 2, 2]>,
   //
   // Double-register Permute
-  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 2, 1, 1]>,
   //
   // Quad-register Permute
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 3 for those cases
-  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 3, 1, 1]>,
   //
   // Quad-register Permute (3 cycle issue)
   // Result written in N2, but that is relative to the last cycle of multicycle,
   // so we use 4 for those cases
-  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 4, 1, 1]>,
 
   //
   // Double-register VEXT
-  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 7 cycles
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1, 1]>,
   //
   // Quad-register VEXT
-  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
-                               // Extra latency cycles since wbck is 9 cycles
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2]>,
   //
   // VTB
-  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
-  InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
-  InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 2, 2, 3, 3, 1]>,
   //
   // VTBX
-  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
-  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 7 cycles
                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
-  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
-  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Pipe1]>,
-                              InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+                               InstrStage<2, [A9_NPipe]>],
+                              [4, 1, 2, 2, 3, 3, 1]>
 ]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 08b560cc0c2f..c1880a72fff3 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -19,7 +19,7 @@ def V6_Pipe : FuncUnit; // pipeline
 // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
 //
 def ARMV6Itineraries : ProcessorItineraries<
-  [V6_Pipe], [
+  [V6_Pipe], [], [
   //
   // No operand cycles
   InstrItinData<IIC_iALUx    , [InstrStage<1, [V6_Pipe]>]>,
@@ -30,10 +30,20 @@ def ARMV6Itineraries : ProcessorItineraries<
   InstrItinData<IIC_iALUsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
   InstrItinData<IIC_iALUsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
   //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iBITr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iBITsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iBITsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
   // Unary Instructions that produce a result
   InstrItinData<IIC_iUNAr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
   InstrItinData<IIC_iUNAsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr    , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+  InstrItinData<IIC_iEXTAr   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iEXTAsr  , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
   //
   // Compare instructions
   InstrItinData<IIC_iCMPi    , [InstrStage<1, [V6_Pipe]>], [2]>,
@@ -41,17 +51,39 @@ def ARMV6Itineraries : ProcessorItineraries<
   InstrItinData<IIC_iCMPsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
   InstrItinData<IIC_iCMPsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
   //
+  // Test instructions
+  InstrItinData<IIC_iTSTi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iTSTr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iTSTsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iTSTsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  //
   // Move instructions, unconditional
   InstrItinData<IIC_iMOVi    , [InstrStage<1, [V6_Pipe]>], [2]>,
   InstrItinData<IIC_iMOVr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
   InstrItinData<IIC_iMOVsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
   InstrItinData<IIC_iMOVsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iMOVix2  , [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+                                  InstrStage<1, [V6_Pipe]>,
+                                  InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+                                 InstrStage<1, [V6_Pipe]>,
+                                 InstrStage<1, [V6_Pipe]>], [5]>,
   //
   // Move instructions, conditional
   InstrItinData<IIC_iCMOVi   , [InstrStage<1, [V6_Pipe]>], [3]>,
   InstrItinData<IIC_iCMOVr   , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
   InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
   InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [4]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMVNr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMVNsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMVNsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
 
   // Integer multiply pipeline
   //
@@ -65,50 +97,90 @@ def ARMV6Itineraries : ProcessorItineraries<
   // Integer load pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+  InstrItinData<IIC_iLoad_si   , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iLoad_iu   , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_ru   , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+  InstrItinData<IIC_iLoad_siu,   [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+  //
+  // Load multiple, def is the 5th operand.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [3, 1]>,
 
   //
-  // Load multiple
-  InstrItinData<IIC_iLoadm   , [InstrStage<3, [V6_Pipe]>]>,
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop     , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+  //
+  // Pop + branch, def is the 3rd operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
 
   // Integer store pipeline
   //
   // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_i   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
-
+  InstrItinData<IIC_iStore_r   , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_si   , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_iu   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_ru,   [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_iStore_siu,   [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
   //
   // Store multiple
-  InstrItinData<IIC_iStorem   , [InstrStage<3, [V6_Pipe]>]>,
+  InstrItinData<IIC_iStore_m  , [InstrStage<3, [V6_Pipe]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
   
   // Branch
   //
@@ -183,6 +255,18 @@ def ARMV6Itineraries : ProcessorItineraries<
   // Double-precision FP SQRT
   InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
   //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+  //
   // Single-precision FP Load
   InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
   //
@@ -190,7 +274,10 @@ def ARMV6Itineraries : ProcessorItineraries<
   InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
   //
   // FP Load Multiple
-  InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>,
+  InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
@@ -200,5 +287,8 @@ def ARMV6Itineraries : ProcessorItineraries<
   InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
   //
   // FP Store Multiple
-  InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]>
+  InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
 ]>;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a28940754d0e..2b9202bff01c 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -29,10 +29,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                              SDValue Dst, SDValue Src,
                                              SDValue Size, unsigned Align,
                                              bool isVolatile, bool AlwaysInline,
-                                             const Value *DstSV,
-                                             uint64_t DstSVOff,
-                                             const Value *SrcSV,
-                                             uint64_t SrcSVOff) const {
+                                             MachinePointerInfo DstPtrInfo,
+                                          MachinePointerInfo SrcPtrInfo) const {
   // Do repeated 4-byte loads and stores. To be improved.
   // This requires 4-byte alignment.
   if ((Align & 3) != 0)
@@ -66,7 +64,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
       Loads[i] = DAG.getLoad(VT, dl, Chain,
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                          DAG.getConstant(SrcOff, MVT::i32)),
-                             SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
+                             SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+                             false, 0);
       TFOps[i] = Loads[i].getValue(1);
       SrcOff += VTSize;
     }
@@ -77,7 +76,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                               DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                           DAG.getConstant(DstOff, MVT::i32)),
-                              DstSV, DstSVOff + DstOff, isVolatile, false, 0);
+                              DstPtrInfo.getWithOffset(DstOff),
+                              isVolatile, false, 0);
       DstOff += VTSize;
     }
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
@@ -103,7 +103,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                        DAG.getConstant(SrcOff, MVT::i32)),
-                           SrcSV, SrcSVOff + SrcOff, false, false, 0);
+                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
     TFOps[i] = Loads[i].getValue(1);
     ++i;
     SrcOff += VTSize;
@@ -125,7 +125,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                         DAG.getConstant(DstOff, MVT::i32)),
-                            DstSV, DstSVOff + DstOff, false, false, 0);
+                            DstPtrInfo.getWithOffset(DstOff), false, false, 0);
     ++i;
     DstOff += VTSize;
     BytesLeft -= VTSize;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index d7d00c23f270..753369037a11 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -33,10 +33,8 @@ public:
                                   SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align,
                                   bool isVolatile, bool AlwaysInline,
-                                  const Value *DstSV,
-                                  uint64_t DstSVOff,
-                                  const Value *SrcSV,
-                                  uint64_t SrcSVOff) const;
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index cb539f4c01ec..0bd740cfb28c 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,6 +13,7 @@
 
 #include "ARMSubtarget.h"
 #include "ARMGenSubtarget.inc"
+#include "ARMBaseRegisterInfo.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -24,45 +25,52 @@ ReserveR9("arm-reserve-r9", cl::Hidden,
           cl::desc("Reserve R9, making it unavailable as GPR"));
 
 static cl::opt<bool>
-UseMOVT("arm-use-movt",
-        cl::init(true), cl::Hidden);
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+            cl::desc("Disallow all unaligned memory accesses"));
 
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                            bool isT)
   : ARMArchVersion(V4)
+  , ARMProcFamily(Others)
   , ARMFPUType(None)
   , UseNEONForSinglePrecisionFP(false)
-  , SlowVMLx(false)
+  , SlowFPVMLx(false)
   , SlowFPBrcc(false)
   , IsThumb(isT)
   , ThumbMode(Thumb1)
   , NoARM(false)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
-  , UseMovt(UseMOVT)
+  , UseMovt(false)
   , HasFP16(false)
+  , HasD16(false)
   , HasHardwareDivide(false)
   , HasT2ExtractPack(false)
   , HasDataBarrier(false)
   , Pref32BitThumb(false)
+  , HasMPExtension(false)
   , FPOnlySP(false)
+  , AllowsUnalignedMem(false)
   , stackAlignment(4)
   , CPUString("generic")
-  , TargetType(isELF) // Default to ELF unless otherwise specified.
+  , TargetTriple(TT)
   , TargetABI(ARM_ABI_APCS) {
-  // default to soft float ABI
+  // Default to soft float ABI
   if (FloatABIType == FloatABI::Default)
     FloatABIType = FloatABI::Soft;
 
   // Determine default and user specified characteristics
 
-  // Parse features string.
-  CPUString = ParseSubtargetFeatures(FS, CPUString);
-
   // When no arch is specified either by CPU or by attributes, make the default
   // ARMv4T.
-  if (CPUString == "generic" && (FS.empty() || FS == "generic"))
+  const char *ARMArchFeature = "";
+  if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
     ARMArchVersion = V4T;
+    ARMArchFeature = ",+v4t";
+  }
 
   // Set the boolean corresponding to the current target triple, or the default
   // if one cannot be determined, to true.
@@ -80,47 +88,78 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
     unsigned SubVer = TT[Idx];
     if (SubVer >= '7' && SubVer <= '9') {
       ARMArchVersion = V7A;
-      if (Len >= Idx+2 && TT[Idx+1] == 'm')
+      ARMArchFeature = ",+v7a";
+      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
         ARMArchVersion = V7M;
+        ARMArchFeature = ",+v7m";
+      }
     } else if (SubVer == '6') {
       ARMArchVersion = V6;
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+      ARMArchFeature = ",+v6";
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
         ARMArchVersion = V6T2;
+        ARMArchFeature = ",+v6t2";
+      }
     } else if (SubVer == '5') {
       ARMArchVersion = V5T;
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+      ARMArchFeature = ",+v5t";
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
         ARMArchVersion = V5TE;
+        ARMArchFeature = ",+v5te";
+      }
     } else if (SubVer == '4') {
-      if (Len >= Idx+2 && TT[Idx+1] == 't')
+      if (Len >= Idx+2 && TT[Idx+1] == 't') {
         ARMArchVersion = V4T;
-      else
+        ARMArchFeature = ",+v4t";
+      } else {
         ARMArchVersion = V4;
+        ARMArchFeature = "";
+      }
     }
   }
 
+  if (TT.find("eabi") != std::string::npos)
+    TargetABI = ARM_ABI_AAPCS;
+
+  // Parse features string.  If the first entry in FS (the CPU) is missing,
+  // insert the architecture feature derived from the target triple.  This is
+  // important for setting features that are implied based on the architecture
+  // version.
+  std::string FSWithArch;
+  if (FS.empty())
+    FSWithArch = std::string(ARMArchFeature);
+  else if (FS.find(',') == 0)
+    FSWithArch = std::string(ARMArchFeature) + FS;
+  else
+    FSWithArch = FS;
+  CPUString = ParseSubtargetFeatures(FSWithArch, CPUString);
+
+  // After parsing Itineraries, set ItinData.IssueWidth.
+  computeIssueWidth();
+
   // Thumb2 implies at least V6T2.
   if (ARMArchVersion >= V6T2)
     ThumbMode = Thumb2;
   else if (ThumbMode >= Thumb2)
     ARMArchVersion = V6T2;
 
-  if (Len >= 10) {
-    if (TT.find("-darwin") != std::string::npos)
-      // arm-darwin
-      TargetType = isDarwin;
-  }
-
-  if (TT.find("eabi") != std::string::npos)
-    TargetABI = ARM_ABI_AAPCS;
-
   if (isAAPCS_ABI())
     stackAlignment = 8;
 
-  if (isTargetDarwin())
+  if (!isTargetDarwin())
+    UseMovt = hasV6T2Ops();
+  else {
     IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+    UseMovt = DarwinUseMOVT && hasV6T2Ops();
+  }
 
   if (!isThumb() || hasThumb2())
     PostRAScheduler = true;
+
+  // v6+ may or may not support unaligned mem access depending on the system
+  // configuration.
+  if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+    AllowsUnalignedMem = true;
 }
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
@@ -163,7 +202,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
       // through a stub.
       if (!isDecl && !GV->isWeakForLinker())
         return false;
-    
+
       // Unless we have a symbol with hidden visibility, we have to go through a
       // normal $non_lazy_ptr stub because this symbol might be resolved late.
       if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
@@ -174,6 +213,34 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
   return false;
 }
 
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+  // If we have a reasonable estimate of the pipeline depth, then we can
+  // estimate the penalty of a misprediction based on that.
+  if (isCortexA8())
+    return 13;
+  else if (isCortexA9())
+    return 8;
+
+  // Otherwise, just return a sensible default.
+  return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+  unsigned allStage1Units = 0;
+  for (const InstrItinerary *itin = InstrItins.Itineraries;
+       itin->FirstStage != ~0U; ++itin) {
+    const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+    allStage1Units |= IS->getUnits();
+  }
+  InstrItins.IssueWidth = 0;
+  while (allStage1Units) {
+    ++InstrItins.IssueWidth;
+    // clear the lowest bit
+    allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+  }
+  assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
            TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 67e58038ee77..76c1c3fb41b1 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -17,7 +17,7 @@
 #include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtarget.h"
-#include "ARMBaseRegisterInfo.h"
+#include "llvm/ADT/Triple.h"
 #include <string>
 
 namespace llvm {
@@ -29,6 +29,10 @@ protected:
     V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
   };
 
+  enum ARMProcFamilyEnum {
+    Others, CortexA8, CortexA9
+  };
+
   enum ARMFPEnum {
     None, VFPv2, VFPv3, NEON
   };
@@ -42,6 +46,9 @@ protected:
   /// V6, V6T2, V7A, V7M.
   ARMArchEnum ARMArchVersion;
 
+  /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+  ARMProcFamilyEnum ARMProcFamily;
+
   /// ARMFPUType - Floating Point Unit type.
   ARMFPEnum ARMFPUType;
 
@@ -50,9 +57,9 @@ protected:
   /// determine if NEON should actually be used.
   bool UseNEONForSinglePrecisionFP;
 
-  /// SlowVMLx - If the VFP2 instructions are available, indicates whether
-  /// the VML[AS] instructions are slow (if so, don't use them).
-  bool SlowVMLx;
+  /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+  /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+  bool SlowFPVMLx;
 
   /// SlowFPBrcc - True if floating point compare + branch is slow.
   bool SlowFPBrcc;
@@ -80,6 +87,10 @@ protected:
   /// only so far)
   bool HasFP16;
 
+  /// HasD16 - True if subtarget is limited to 16 double precision
+  /// FP registers for VFPv3.
+  bool HasD16;
+
   /// HasHardwareDivide - True if subtarget supports [su]div
   bool HasHardwareDivide;
 
@@ -95,10 +106,19 @@ protected:
   /// over 16-bit ones.
   bool Pref32BitThumb;
 
+  /// HasMPExtension - True if the subtarget supports Multiprocessing
+  /// extension (ARMv7 only).
+  bool HasMPExtension;
+
   /// FPOnlySP - If true, the floating point unit only supports single
   /// precision.
   bool FPOnlySP;
 
+  /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+  /// accesses for some types.  For details, see
+  /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+  bool AllowsUnalignedMem;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -106,6 +126,9 @@ protected:
   /// CPUString - String name of used CPU.
   std::string CPUString;
 
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
 
@@ -136,6 +159,8 @@ protected:
   std::string ParseSubtargetFeatures(const std::string &FS,
                                      const std::string &CPU);
 
+  void computeIssueWidth();
+
   bool hasV4TOps()  const { return ARMArchVersion >= V4T;  }
   bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
   bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
@@ -143,6 +168,9 @@ protected:
   bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
   bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
 
+  bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+  bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
   bool hasARMOps() const { return !NoARM; }
 
   bool hasVFP2() const { return ARMFPUType >= VFPv2; }
@@ -153,15 +181,17 @@ protected:
   bool hasDivide() const { return HasHardwareDivide; }
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool hasDataBarrier() const { return HasDataBarrier; }
-  bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
+  bool useFPVMLx() const { return !SlowFPVMLx; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
+  bool hasMPExtension() const { return HasMPExtension; }
 
   bool hasFP16() const { return HasFP16; }
+  bool hasD16() const { return HasD16; }
 
-  bool isTargetDarwin() const { return TargetType == isDarwin; }
-  bool isTargetELF() const { return TargetType == isELF; }
+  bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+  bool isTargetELF() const { return !isTargetDarwin(); }
 
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
@@ -175,8 +205,12 @@ protected:
 
   bool useMovt() const { return UseMovt && hasV6T2Ops(); }
 
+  bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
   const std::string & getCPUString() const { return CPUString; }
 
+  unsigned getMispredictionPenalty() const;
+
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 30ff8276cdaa..0ee773b165fb 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -12,15 +12,18 @@
 
 #include "ARMTargetMachine.h"
 #include "ARMMCAsmInfo.h"
-#include "ARMFrameInfo.h"
+#include "ARMFrameLowering.h"
 #include "ARM.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
+static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
+
 static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
@@ -31,6 +34,26 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   }
 }
 
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    llvm_unreachable("ARM does not support Windows COFF format");
+    return NULL;
+  default:
+    return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+  }
+}
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -39,6 +62,19 @@ extern "C" void LLVMInitializeARMTarget() {
   // Register the target asm info.
   RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
   RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
+
 }
 
 /// TargetMachine ctor - Create an ARM architecture model.
@@ -49,9 +85,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
                                            bool isThumb)
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, isThumb),
-    FrameInfo(Subtarget),
     JITInfo(),
-    InstrItins(Subtarget.getInstrItineraryData()) {
+    InstrItins(Subtarget.getInstrItineraryData())
+{
   DefRelocModel = getRelocationModel();
 }
 
@@ -59,12 +95,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
                                    const std::string &FS)
   : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
-               std::string("e-p:32:32-f64:32:32-i64:32:32-"
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "v128:32:128-v64:32:64-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "v128:64:128-v64:64:64-n32")),
+    ELFWriterInfo(*this),
     TLInfo(*this),
-    TSInfo(*this) {
+    TSInfo(*this),
+    FrameLowering(Subtarget) {
   if (!Subtarget.hasARMOps())
     report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
                        "support ARM mode execution!");
@@ -77,14 +115,18 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
               ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
     DataLayout(Subtarget.isAPCS_ABI() ?
-               std::string("e-p:32:32-f64:32:32-i64:32:32-"
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "i16:16:32-i8:8:32-i1:8:32-"
                            "v128:32:128-v64:32:64-a:0:32-n32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "i16:16:32-i8:8:32-i1:8:32-"
                            "v128:64:128-v64:64:64-a:0:32-n32")),
+    ELFWriterInfo(*this),
     TLInfo(*this),
-    TSInfo(*this) {
+    TSInfo(*this),
+    FrameLowering(Subtarget.hasThumb2()
+              ? new ARMFrameLowering(Subtarget)
+              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
 }
 
 // Pass Pipeline Configuration
@@ -104,12 +146,12 @@ bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
 
 bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
-  if (Subtarget.hasNEON())
-    PM.add(createNEONPreAllocPass());
-
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
   if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass(true));
+  if (ExpandMLx &&
+      OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
+    PM.add(createMLxExpansionPass());
 
   return true;
 }
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 17e5425a9d37..e0aa149c4cc2 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,16 +14,19 @@
 #ifndef ARMTARGETMACHINE_H
 #define ARMTARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
 #include "ARMInstrInfo.h"
-#include "ARMFrameInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
 #include "ARMSelectionDAGInfo.h"
 #include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
 #include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/ADT/OwningPtr.h"
 
 namespace llvm {
@@ -31,9 +34,7 @@ namespace llvm {
 class ARMBaseTargetMachine : public LLVMTargetMachine {
 protected:
   ARMSubtarget        Subtarget;
-
 private:
-  ARMFrameInfo        FrameInfo;
   ARMJITInfo          JITInfo;
   InstrItineraryData  InstrItins;
   Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
@@ -42,11 +43,10 @@ public:
   ARMBaseTargetMachine(const Target &T, const std::string &TT,
                        const std::string &FS, bool isThumb);
 
-  virtual const ARMFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual const InstrItineraryData getInstrItineraryData() const {
-    return InstrItins;
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
   }
 
   // Pass Pipeline Configuration
@@ -64,9 +64,11 @@ public:
 class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMInstrInfo        InstrInfo;
   const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
   ARMTargetLowering   TLInfo;
   ARMSelectionDAGInfo TSInfo;
-public:
+  ARMFrameLowering    FrameLowering;
+ public:
   ARMTargetMachine(const Target &T, const std::string &TT,
                    const std::string &FS);
 
@@ -81,9 +83,15 @@ public:
   virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
+  virtual const ARMFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
 
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
 };
 
 /// ThumbTargetMachine - Thumb target machine.
@@ -94,8 +102,11 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   // Either Thumb1InstrInfo or Thumb2InstrInfo.
   OwningPtr<ARMBaseInstrInfo> InstrInfo;
   const TargetData    DataLayout;   // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
   ARMTargetLowering   TLInfo;
   ARMSelectionDAGInfo TSInfo;
+  // Either Thumb1FrameLowering or ARMFrameLowering.
+  OwningPtr<ARMFrameLowering> FrameLowering;
 public:
   ThumbTargetMachine(const Target &T, const std::string &TT,
                      const std::string &FS);
@@ -117,7 +128,14 @@ public:
   virtual const ARMBaseInstrInfo *getInstrInfo() const {
     return InstrInfo.get();
   }
+  /// returns either Thumb1FrameLowering or ARMFrameLowering
+  virtual const ARMFrameLowering *getFrameLowering() const {
+    return FrameLowering.get();
+  }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 091a3b3d8497..7535da54a95f 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 using namespace dwarf;
@@ -26,14 +27,20 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
 
   if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
     StaticCtorSection =
-      getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
-                                 MCSectionELF::SHF_WRITE |
-                                 MCSectionELF::SHF_ALLOC,
+      getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                                 ELF::SHF_WRITE |
+                                 ELF::SHF_ALLOC,
                                  SectionKind::getDataRel());
     StaticDtorSection =
-      getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
-                                 MCSectionELF::SHF_WRITE |
-                                 MCSectionELF::SHF_ALLOC,
+      getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                                 ELF::SHF_WRITE |
+                                 ELF::SHF_ALLOC,
                                  SectionKind::getDataRel());
   }
+  
+  AttributesSection =
+    getContext().getELFSection(".ARM.attributes",
+                               ELF::SHT_ARM_ATTRIBUTES,
+                               0,
+                               SectionKind::getMetadata());
 }
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 097fc2cceba3..c6a7261439d7 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -18,10 +18,19 @@ class MCContext;
 class TargetMachine;
 
 class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+  const MCSection *AttributesSection;
 public:
-  ARMElfTargetObjectFile() : TargetLoweringObjectFileELF() {}
+  ARMElfTargetObjectFile() :
+    TargetLoweringObjectFileELF(),
+    AttributesSection(NULL)
+  {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getAttributesSection() const {
+    return AttributesSection;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index f859d1b1c95f..2428ce16d3d5 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -10,10 +10,6 @@
 #include "ARM.h"
 #include "ARMTargetMachine.h"
 
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -22,119 +18,135 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
 
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
 #include <string>
 #include <map>
 
 using namespace llvm;
 
 namespace {
-  
-  class ARMBaseAsmLexer : public TargetAsmLexer {
-    const MCAsmInfo &AsmInfo;
-    
-    const AsmToken &lexDefinite() {
-      return getLexer()->Lex();
-    }
-    
-    AsmToken LexTokenUAL();
-  protected:
-    typedef std::map <std::string, unsigned> rmap_ty;
-    
-    rmap_ty RegisterMap;
-    
-    void InitRegisterMap(const TargetRegisterInfo *info) {
-      unsigned numRegs = info->getNumRegs();
-
-      for (unsigned i = 0; i < numRegs; ++i) {
-        const char *regName = info->getName(i);
-        if (regName)
-          RegisterMap[regName] = i;
-      }
-    }
-    
-    unsigned MatchRegisterName(StringRef Name) {
-      rmap_ty::iterator iter = RegisterMap.find(Name.str());
-      if (iter != RegisterMap.end())
-        return iter->second;
-      else
-        return 0;
-    }
-    
-    AsmToken LexToken() {
-      if (!Lexer) {
-        SetError(SMLoc(), "No MCAsmLexer installed");
-        return AsmToken(AsmToken::Error, "", 0);
-      }
-      
-      switch (AsmInfo.getAssemblerDialect()) {
-      default:
-        SetError(SMLoc(), "Unhandled dialect");
-        return AsmToken(AsmToken::Error, "", 0);
-      case 0:
-        return LexTokenUAL();
-      }
-    }
-  public:
-    ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
-      : TargetAsmLexer(T), AsmInfo(MAI) {
+
+class ARMBaseAsmLexer : public TargetAsmLexer {
+  const MCAsmInfo &AsmInfo;
+
+  const AsmToken &lexDefinite() {
+    return getLexer()->Lex();
+  }
+
+  AsmToken LexTokenUAL();
+protected:
+  typedef std::map <std::string, unsigned> rmap_ty;
+
+  rmap_ty RegisterMap;
+
+  void InitRegisterMap(const TargetRegisterInfo *info) {
+    unsigned numRegs = info->getNumRegs();
+
+    for (unsigned i = 0; i < numRegs; ++i) {
+      const char *regName = info->getName(i);
+      if (regName)
+        RegisterMap[regName] = i;
     }
-  };
-  
-  class ARMAsmLexer : public ARMBaseAsmLexer {
-  public:
-    ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
-      : ARMBaseAsmLexer(T, MAI) {
-      std::string tripleString("arm-unknown-unknown");
-      std::string featureString;
-      OwningPtr<const TargetMachine> 
-        targetMachine(T.createTargetMachine(tripleString, featureString));
-      InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+
+  unsigned MatchRegisterName(StringRef Name) {
+    rmap_ty::iterator iter = RegisterMap.find(Name.str());
+    if (iter != RegisterMap.end())
+      return iter->second;
+    else
+      return 0;
+  }
+
+  AsmToken LexToken() {
+    if (!Lexer) {
+      SetError(SMLoc(), "No MCAsmLexer installed");
+      return AsmToken(AsmToken::Error, "", 0);
     }
-  };
-  
-  class ThumbAsmLexer : public ARMBaseAsmLexer {
-  public:
-    ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
-      : ARMBaseAsmLexer(T, MAI) {
-      std::string tripleString("thumb-unknown-unknown");
-      std::string featureString;
-      OwningPtr<const TargetMachine> 
-        targetMachine(T.createTargetMachine(tripleString, featureString));
-      InitRegisterMap(targetMachine->getRegisterInfo());
+
+    switch (AsmInfo.getAssemblerDialect()) {
+    default:
+      SetError(SMLoc(), "Unhandled dialect");
+      return AsmToken(AsmToken::Error, "", 0);
+    case 0:
+      return LexTokenUAL();
     }
-  };
-}
+  }
+public:
+  ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : TargetAsmLexer(T), AsmInfo(MAI) {
+  }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+  ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : ARMBaseAsmLexer(T, MAI) {
+    std::string tripleString("arm-unknown-unknown");
+    std::string featureString;
+    OwningPtr<const TargetMachine>
+      targetMachine(T.createTargetMachine(tripleString, featureString));
+    InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+  ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : ARMBaseAsmLexer(T, MAI) {
+    std::string tripleString("thumb-unknown-unknown");
+    std::string featureString;
+    OwningPtr<const TargetMachine>
+      targetMachine(T.createTargetMachine(tripleString, featureString));
+    InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+};
+
+} // end anonymous namespace
 
 AsmToken ARMBaseAsmLexer::LexTokenUAL() {
   const AsmToken &lexedToken = lexDefinite();
-  
+
   switch (lexedToken.getKind()) {
-  default:
-    return AsmToken(lexedToken);
+  default: break;
   case AsmToken::Error:
     SetError(Lexer->getErrLoc(), Lexer->getErr());
-    return AsmToken(lexedToken);
-  case AsmToken::Identifier:
-  {
+    break;
+  case AsmToken::Identifier: {
     std::string upperCase = lexedToken.getString().str();
     std::string lowerCase = LowercaseString(upperCase);
     StringRef lowerRef(lowerCase);
-    
+
     unsigned regID = MatchRegisterName(lowerRef);
-    
-    if (regID) {
+    // Check for register aliases.
+    //   r13 -> sp
+    //   r14 -> lr
+    //   r15 -> pc
+    //   ip  -> r12
+    //   FIXME: Some assemblers support lots of others. Do we want them all?
+    if (!regID) {
+      regID = StringSwitch<unsigned>(lowerCase)
+        .Case("r13", ARM::SP)
+        .Case("r14", ARM::LR)
+        .Case("r15", ARM::PC)
+        .Case("ip", ARM::R12)
+        .Default(0);
+    }
+
+    if (regID)
       return AsmToken(AsmToken::Register,
                       lexedToken.getString(),
                       static_cast<int64_t>(regID));
-    } else {
-      return AsmToken(lexedToken);
-    }
   }
   }
+
+  return AsmToken(lexedToken);
 }
 
 extern "C" void LLVMInitializeARMAsmLexer() {
   RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
   RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
 }
-
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 75e2a739bf1f..129af206e1d9 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,28 +8,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "ARMBaseRegisterInfo.h"
 #include "ARMSubtarget.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmParser.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-namespace {
-struct ARMOperand;
-
-// The shift types for register controlled shifts in arm memory addressing
+/// Shift types used for register controlled shifts in ARM memory addressing.
 enum ShiftType {
   Lsl,
   Lsr,
@@ -38,24 +38,30 @@ enum ShiftType {
   Rrx
 };
 
+namespace {
+
+class ARMOperand;
+
 class ARMAsmParser : public TargetAsmParser {
   MCAsmParser &Parser;
   TargetMachine &TM;
 
-private:
   MCAsmParser &getParser() const { return Parser; }
-
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
 
-  bool MaybeParseRegister(OwningPtr<ARMOperand> &Op, bool ParseWriteBack);
+  int TryParseRegister();
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+  bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+  bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
+  const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
+                                  MCSymbolRefExpr::VariantKind Variant);
 
-  bool ParseRegisterList(OwningPtr<ARMOperand> &Op);
-
-  bool ParseMemory(OwningPtr<ARMOperand> &Op);
 
   bool ParseMemoryOffsetReg(bool &Negative,
                             bool &OffsetRegShifted,
@@ -65,76 +71,98 @@ private:
                             bool &OffsetIsReg,
                             int &OffsetRegNum,
                             SMLoc &E);
-
   bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
-
-  bool ParseOperand(OwningPtr<ARMOperand> &Op);
-
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
-
   bool ParseDirectiveThumb(SMLoc L);
-
   bool ParseDirectiveThumbFunc(SMLoc L);
-
   bool ParseDirectiveCode(SMLoc L);
-
   bool ParseDirectiveSyntax(SMLoc L);
 
-  bool MatchInstruction(SMLoc IDLoc,
-                        const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCInst &Inst) {
-    if (!MatchInstructionImpl(Operands, Inst))
-      return false;
-
-    // FIXME: We should give nicer diagnostics about the exact failure.
-    Error(IDLoc, "unrecognized instruction");
-
-    return true;
-  }
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+  void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+                             bool &CanAcceptPredicationCode);
 
   /// @name Auto-generated Match Functions
   /// {
 
-  unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
-
-  bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>
-                              &Operands,
-                            MCInst &Inst);
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
 
   /// }
 
+  OperandMatchResultTy tryParseCoprocNumOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseCoprocRegOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemBarrierOptOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseProcIFlagsOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMSRMaskOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
 
 public:
   ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
-    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
+      // Initialize the set of available features.
+      setAvailableFeatures(ComputeAvailableFeatures(
+          &TM.getSubtarget<ARMSubtarget>()));
+    }
 
   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
-  
+} // end anonymous namespace
+
+namespace {
+
 /// ARMOperand - Instances of this class represent a parsed ARM machine
 /// instruction.
-struct ARMOperand : public MCParsedAsmOperand {
-private:
-  ARMOperand() {}
-public:
+class ARMOperand : public MCParsedAsmOperand {
   enum KindTy {
     CondCode,
+    CCOut,
+    CoprocNum,
+    CoprocReg,
     Immediate,
+    MemBarrierOpt,
     Memory,
+    MSRMask,
+    ProcIFlags,
     Register,
+    RegisterList,
+    DPRRegisterList,
+    SPRRegisterList,
     Token
   } Kind;
 
   SMLoc StartLoc, EndLoc;
+  SmallVector<unsigned, 8> Registers;
 
   union {
     struct {
       ARMCC::CondCodes Val;
     } CC;
 
+    struct {
+      ARM_MB::MemBOpt Val;
+    } MBOpt;
+
+    struct {
+      unsigned Val;
+    } Cop;
+
+    struct {
+      ARM_PROC::IFlags Val;
+    } IFlags;
+
+    struct {
+      unsigned Val;
+    } MMask;
+
     struct {
       const char *Data;
       unsigned Length;
@@ -142,34 +170,32 @@ public:
 
     struct {
       unsigned RegNum;
-      bool Writeback;
     } Reg;
 
     struct {
       const MCExpr *Val;
     } Imm;
-    
-    // This is for all forms of ARM address expressions
+
+    /// Combined record for all forms of ARM address expressions.
     struct {
       unsigned BaseRegNum;
-      unsigned OffsetRegNum; // used when OffsetIsReg is true
-      const MCExpr *Offset; // used when OffsetIsReg is false
-      const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
-      enum ShiftType ShiftType;  // used when OffsetRegShifted is true
-      unsigned
-        OffsetRegShifted : 1, // only used when OffsetIsReg is true
-        Preindexed : 1,
-        Postindexed : 1,
-        OffsetIsReg : 1,
-        Negative : 1, // only used when OffsetIsReg is true
-        Writeback : 1;
+      union {
+        unsigned RegNum;     ///< Offset register num, when OffsetIsReg.
+        const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
+      } Offset;
+      const MCExpr *ShiftAmount;     // used when OffsetRegShifted is true
+      enum ShiftType ShiftType;      // used when OffsetRegShifted is true
+      unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
+      unsigned Preindexed       : 1;
+      unsigned Postindexed      : 1;
+      unsigned OffsetIsReg      : 1;
+      unsigned Negative         : 1; // only used when OffsetIsReg is true
+      unsigned Writeback        : 1;
     } Mem;
-
   };
-  
-  //ARMOperand(KindTy K, SMLoc S, SMLoc E)
-  //  : Kind(K), StartLoc(S), EndLoc(E) {}
-  
+
+  ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
   ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
     Kind = o.Kind;
     StartLoc = o.StartLoc;
@@ -181,18 +207,36 @@ public:
     case Token:
       Tok = o.Tok;
       break;
+    case CCOut:
     case Register:
       Reg = o.Reg;
       break;
+    case RegisterList:
+    case DPRRegisterList:
+    case SPRRegisterList:
+      Registers = o.Registers;
+      break;
+    case CoprocNum:
+    case CoprocReg:
+      Cop = o.Cop;
+      break;
     case Immediate:
       Imm = o.Imm;
       break;
+    case MemBarrierOpt:
+      MBOpt = o.MBOpt;
+      break;
     case Memory:
       Mem = o.Mem;
       break;
+    case MSRMask:
+      MMask = o.MMask;
+      break;
+    case ProcIFlags:
+      IFlags = o.IFlags;
     }
   }
-  
+
   /// getStartLoc - Get the location of the first token of this operand.
   SMLoc getStartLoc() const { return StartLoc; }
   /// getEndLoc - Get the location of the last token of this operand.
@@ -203,32 +247,129 @@ public:
     return CC.Val;
   }
 
+  unsigned getCoproc() const {
+    assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+    return Cop.Val;
+  }
+
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
     return StringRef(Tok.Data, Tok.Length);
   }
 
   unsigned getReg() const {
-    assert(Kind == Register && "Invalid access!");
+    assert((Kind == Register || Kind == CCOut) && "Invalid access!");
     return Reg.RegNum;
   }
 
+  const SmallVectorImpl<unsigned> &getRegList() const {
+    assert((Kind == RegisterList || Kind == DPRRegisterList ||
+            Kind == SPRRegisterList) && "Invalid access!");
+    return Registers;
+  }
+
   const MCExpr *getImm() const {
     assert(Kind == Immediate && "Invalid access!");
     return Imm.Val;
   }
 
-  bool isCondCode() const { return Kind == CondCode; }
+  ARM_MB::MemBOpt getMemBarrierOpt() const {
+    assert(Kind == MemBarrierOpt && "Invalid access!");
+    return MBOpt.Val;
+  }
 
-  bool isImm() const { return Kind == Immediate; }
+  ARM_PROC::IFlags getProcIFlags() const {
+    assert(Kind == ProcIFlags && "Invalid access!");
+    return IFlags.Val;
+  }
+
+  unsigned getMSRMask() const {
+    assert(Kind == MSRMask && "Invalid access!");
+    return MMask.Val;
+  }
+
+  /// @name Memory Operand Accessors
+  /// @{
+
+  unsigned getMemBaseRegNum() const {
+    return Mem.BaseRegNum;
+  }
+  unsigned getMemOffsetRegNum() const {
+    assert(Mem.OffsetIsReg && "Invalid access!");
+    return Mem.Offset.RegNum;
+  }
+  const MCExpr *getMemOffset() const {
+    assert(!Mem.OffsetIsReg && "Invalid access!");
+    return Mem.Offset.Value;
+  }
+  unsigned getMemOffsetRegShifted() const {
+    assert(Mem.OffsetIsReg && "Invalid access!");
+    return Mem.OffsetRegShifted;
+  }
+  const MCExpr *getMemShiftAmount() const {
+    assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+    return Mem.ShiftAmount;
+  }
+  enum ShiftType getMemShiftType() const {
+    assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+    return Mem.ShiftType;
+  }
+  bool getMemPreindexed() const { return Mem.Preindexed; }
+  bool getMemPostindexed() const { return Mem.Postindexed; }
+  bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
+  bool getMemNegative() const { return Mem.Negative; }
+  bool getMemWriteback() const { return Mem.Writeback; }
+
+  /// @}
 
+  bool isCoprocNum() const { return Kind == CoprocNum; }
+  bool isCoprocReg() const { return Kind == CoprocReg; }
+  bool isCondCode() const { return Kind == CondCode; }
+  bool isCCOut() const { return Kind == CCOut; }
+  bool isImm() const { return Kind == Immediate; }
   bool isReg() const { return Kind == Register; }
+  bool isRegList() const { return Kind == RegisterList; }
+  bool isDPRRegList() const { return Kind == DPRRegisterList; }
+  bool isSPRRegList() const { return Kind == SPRRegisterList; }
+  bool isToken() const { return Kind == Token; }
+  bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
+  bool isMemory() const { return Kind == Memory; }
+  bool isMemMode5() const {
+    if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
+        getMemNegative())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    // The offset must be a multiple of 4 in the range 0-1020.
+    int64_t Value = CE->getValue();
+    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
+  }
+  bool isMemModeRegThumb() const {
+    if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+      return false;
+    return true;
+  }
+  bool isMemModeImmThumb() const {
+    if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
 
-  bool isToken() const {return Kind == Token; }
+    // The offset must be a multiple of 4 in the range 0-124.
+    uint64_t Value = CE->getValue();
+    return ((Value & 0x3) == 0 && Value <= 124);
+  }
+  bool isMSRMask() const { return Kind == MSRMask; }
+  bool isProcIFlags() const { return Kind == ProcIFlags; }
 
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
-    // Add as immediates when possible.
-    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
     else
       Inst.addOperand(MCOperand::CreateExpr(Expr));
@@ -237,8 +378,23 @@ public:
   void addCondCodeOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
-    // FIXME: What belongs here?
-    Inst.addOperand(MCOperand::CreateReg(0));
+    unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+    Inst.addOperand(MCOperand::CreateReg(RegNum));
+  }
+
+  void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+  }
+
+  void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+  }
+
+  void addCCOutOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -246,66 +402,181 @@ public:
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
+  void addRegListOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ++I)
+      Inst.addOperand(MCOperand::CreateReg(*I));
+  }
+
+  void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     addExpr(Inst, getImm());
   }
 
+  void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+  }
+
+  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 5 offset operand!");
+
+    // The MCInst offset operand doesn't include the low two bits (like
+    // the instruction encoding).
+    int64_t Offset = CE->getValue() / 4;
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
+                                                             Offset)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
+                                                             -Offset)));
+  }
+
+  void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+  }
+
+  void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode offset operand!");
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+  }
+
+  void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+  }
+
+  void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+  }
+
   virtual void dump(raw_ostream &OS) const;
 
-  static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
-                             SMLoc S) {
-    Op.reset(new ARMOperand);
-    Op->Kind = CondCode;
+  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CondCode);
     Op->CC.Val = CC;
     Op->StartLoc = S;
     Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CoprocNum);
+    Op->Cop.Val = CopVal;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
   }
 
-  static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
-                          SMLoc S) {
-    Op.reset(new ARMOperand);
-    Op->Kind = Token;
+  static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CoprocReg);
+    Op->Cop.Val = CopVal;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CCOut);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(Token);
     Op->Tok.Data = Str.data();
     Op->Tok.Length = Str.size();
     Op->StartLoc = S;
     Op->EndLoc = S;
+    return Op;
   }
 
-  static void CreateReg(OwningPtr<ARMOperand> &Op, unsigned RegNum, 
-                        bool Writeback, SMLoc S, SMLoc E) {
-    Op.reset(new ARMOperand);
-    Op->Kind = Register;
+  static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Register);
     Op->Reg.RegNum = RegNum;
-    Op->Reg.Writeback = Writeback;
-    
     Op->StartLoc = S;
     Op->EndLoc = E;
+    return Op;
   }
 
-  static void CreateImm(OwningPtr<ARMOperand> &Op, const MCExpr *Val,
-                        SMLoc S, SMLoc E) {
-    Op.reset(new ARMOperand);
-    Op->Kind = Immediate;
+  static ARMOperand *
+  CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+                SMLoc StartLoc, SMLoc EndLoc) {
+    KindTy Kind = RegisterList;
+
+    if (ARM::DPRRegClass.contains(Regs.front().first))
+      Kind = DPRRegisterList;
+    else if (ARM::SPRRegClass.contains(Regs.front().first))
+      Kind = SPRRegisterList;
+
+    ARMOperand *Op = new ARMOperand(Kind);
+    for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+           I = Regs.begin(), E = Regs.end(); I != E; ++I)
+      Op->Registers.push_back(I->first);
+    array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+    Op->StartLoc = StartLoc;
+    Op->EndLoc = EndLoc;
+    return Op;
+  }
+
+  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Immediate);
     Op->Imm.Val = Val;
-    
     Op->StartLoc = S;
     Op->EndLoc = E;
+    return Op;
   }
 
-  static void CreateMem(OwningPtr<ARMOperand> &Op,
-                        unsigned BaseRegNum, bool OffsetIsReg,
-                        const MCExpr *Offset, unsigned OffsetRegNum,
-                        bool OffsetRegShifted, enum ShiftType ShiftType,
-                        const MCExpr *ShiftAmount, bool Preindexed,
-                        bool Postindexed, bool Negative, bool Writeback,
-                        SMLoc S, SMLoc E) {
-    Op.reset(new ARMOperand);
-    Op->Kind = Memory;
+  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
+                               const MCExpr *Offset, int OffsetRegNum,
+                               bool OffsetRegShifted, enum ShiftType ShiftType,
+                               const MCExpr *ShiftAmount, bool Preindexed,
+                               bool Postindexed, bool Negative, bool Writeback,
+                               SMLoc S, SMLoc E) {
+    assert((OffsetRegNum == -1 || OffsetIsReg) &&
+           "OffsetRegNum must imply OffsetIsReg!");
+    assert((!OffsetRegShifted || OffsetIsReg) &&
+           "OffsetRegShifted must imply OffsetIsReg!");
+    assert((Offset || OffsetIsReg) &&
+           "Offset must exists unless register offset is used!");
+    assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
+           "Cannot have shift amount without shifted register offset!");
+    assert((!Offset || !OffsetIsReg) &&
+           "Cannot have expression offset and register offset!");
+
+    ARMOperand *Op = new ARMOperand(Memory);
     Op->Mem.BaseRegNum = BaseRegNum;
     Op->Mem.OffsetIsReg = OffsetIsReg;
-    Op->Mem.Offset = Offset;
-    Op->Mem.OffsetRegNum = OffsetRegNum;
+    if (OffsetIsReg)
+      Op->Mem.Offset.RegNum = OffsetRegNum;
+    else
+      Op->Mem.Offset.Value = Offset;
     Op->Mem.OffsetRegShifted = OffsetRegShifted;
     Op->Mem.ShiftType = ShiftType;
     Op->Mem.ShiftAmount = ShiftAmount;
@@ -313,9 +584,34 @@ public:
     Op->Mem.Postindexed = Postindexed;
     Op->Mem.Negative = Negative;
     Op->Mem.Writeback = Writeback;
-    
+
     Op->StartLoc = S;
     Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+    Op->MBOpt.Val = Opt;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(ProcIFlags);
+    Op->IFlags.Val = IFlags;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(MSRMask);
+    Op->MMask.Val = MMask;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
   }
 };
 
@@ -324,17 +620,77 @@ public:
 void ARMOperand::dump(raw_ostream &OS) const {
   switch (Kind) {
   case CondCode:
-    OS << ARMCondCodeToString(getCondCode());
+    OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+    break;
+  case CCOut:
+    OS << "<ccout " << getReg() << ">";
+    break;
+  case CoprocNum:
+    OS << "<coprocessor number: " << getCoproc() << ">";
+    break;
+  case CoprocReg:
+    OS << "<coprocessor register: " << getCoproc() << ">";
+    break;
+  case MSRMask:
+    OS << "<mask: " << getMSRMask() << ">";
     break;
   case Immediate:
     getImm()->print(OS);
     break;
+  case MemBarrierOpt:
+    OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+    break;
   case Memory:
-    OS << "<memory>";
+    OS << "<memory "
+       << "base:" << getMemBaseRegNum();
+    if (getMemOffsetIsReg()) {
+      OS << " offset:<register " << getMemOffsetRegNum();
+      if (getMemOffsetRegShifted()) {
+        OS << " offset-shift-type:" << getMemShiftType();
+        OS << " offset-shift-amount:" << *getMemShiftAmount();
+      }
+    } else {
+      OS << " offset:" << *getMemOffset();
+    }
+    if (getMemOffsetIsReg())
+      OS << " (offset-is-reg)";
+    if (getMemPreindexed())
+      OS << " (pre-indexed)";
+    if (getMemPostindexed())
+      OS << " (post-indexed)";
+    if (getMemNegative())
+      OS << " (negative)";
+    if (getMemWriteback())
+      OS << " (writeback)";
+    OS << ">";
+    break;
+  case ProcIFlags: {
+    OS << "<ARM_PROC::";
+    unsigned IFlags = getProcIFlags();
+    for (int i=2; i >= 0; --i)
+      if (IFlags & (1 << i))
+        OS << ARM_PROC::IFlagsToString(1 << i);
+    OS << ">";
     break;
+  }
   case Register:
     OS << "<register " << getReg() << ">";
     break;
+  case RegisterList:
+  case DPRRegisterList:
+  case SPRRegisterList: {
+    OS << "<register_list ";
+
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ) {
+      OS << *I;
+      if (++I < E) OS << ", ";
+    }
+
+    OS << ">";
+    break;
+  }
   case Token:
     OS << "'" << getToken() << "'";
     break;
@@ -348,184 +704,456 @@ static unsigned MatchRegisterName(StringRef Name);
 
 /// }
 
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+                                 SMLoc &StartLoc, SMLoc &EndLoc) {
+  RegNo = TryParseRegister();
+
+  return (RegNo == (unsigned)-1);
+}
+
 /// Try to parse a register name.  The token must be an Identifier when called,
-/// and if it is a register name a Reg operand is created, the token is eaten
-/// and false is returned.  Else true is returned and no token is eaten.
-/// TODO this is likely to change to allow different register types and or to
-/// parse for a specific register type.
-bool ARMAsmParser::MaybeParseRegister
-  (OwningPtr<ARMOperand> &Op, bool ParseWriteBack) {
-  SMLoc S, E;
+/// and if it is a register name the token is eaten and the register number is
+/// returned.  Otherwise return -1.
+///
+int ARMAsmParser::TryParseRegister() {
   const AsmToken &Tok = Parser.getTok();
   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 
   // FIXME: Validate register for the current architecture; we have to do
   // validation later, so maybe there is no need for this here.
-  int RegNum;
+  std::string upperCase = Tok.getString().str();
+  std::string lowerCase = LowercaseString(upperCase);
+  unsigned RegNum = MatchRegisterName(lowerCase);
+  if (!RegNum) {
+    RegNum = StringSwitch<unsigned>(lowerCase)
+      .Case("r13", ARM::SP)
+      .Case("r14", ARM::LR)
+      .Case("r15", ARM::PC)
+      .Case("ip", ARM::R12)
+      .Default(0);
+  }
+  if (!RegNum) return -1;
 
-  RegNum = MatchRegisterName(Tok.getString());
-  if (RegNum == -1)
-    return true;
-  
-  S = Tok.getLoc();
-  
   Parser.Lex(); // Eat identifier token.
-    
-  E = Parser.getTok().getLoc();
+  return RegNum;
+}
 
-  bool Writeback = false;
-  if (ParseWriteBack) {
-    const AsmToken &ExclaimTok = Parser.getTok();
-    if (ExclaimTok.is(AsmToken::Exclaim)) {
-      E = ExclaimTok.getLoc();
-      Writeback = true;
-      Parser.Lex(); // Eat exclaim token
+/// Try to parse a register name.  The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  int RegNo = TryParseRegister();
+  if (RegNo == -1)
+    return true;
+
+  Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+  const AsmToken &ExclaimTok = Parser.getTok();
+  if (ExclaimTok.is(AsmToken::Exclaim)) {
+    Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+                                               ExclaimTok.getLoc()));
+    Parser.Lex(); // Eat exclaim token
+  }
+
+  return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+  // Use the same layout as the tablegen'erated register name matcher. Ugly,
+  // but efficient.
+  switch (Name.size()) {
+  default: break;
+  case 2:
+    if (Name[0] != CoprocOp)
+      return -1;
+    switch (Name[1]) {
+    default:  return -1;
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    }
+    break;
+  case 3:
+    if (Name[0] != CoprocOp || Name[1] != '1')
+      return -1;
+    switch (Name[2]) {
+    default:  return -1;
+    case '0': return 10;
+    case '1': return 11;
+    case '2': return 12;
+    case '3': return 13;
+    case '4': return 14;
+    case '5': return 15;
     }
+    break;
   }
 
-  ARMOperand::CreateReg(Op, RegNum, Writeback, S, E);
+  return -1;
+}
 
-  return false;
+/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+  if (Num == -1)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+  return MatchOperand_Success;
 }
 
-/// Parse a register list, return false if successful else return true or an 
-/// error.  The first token must be a '{' when called.
-bool ARMAsmParser::ParseRegisterList(OwningPtr<ARMOperand> &Op) {
-  SMLoc S, E;
-  assert(Parser.getTok().is(AsmToken::LCurly) &&
-         "Token is not an Left Curly Brace");
-  S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat left curly brace token.
-
-  const AsmToken &RegTok = Parser.getTok();
-  SMLoc RegLoc = RegTok.getLoc();
-  if (RegTok.isNot(AsmToken::Identifier))
-    return Error(RegLoc, "register expected");
-  int RegNum = MatchRegisterName(RegTok.getString());
-  if (RegNum == -1)
-    return Error(RegLoc, "register expected");
+/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+  if (Reg == -1)
+    return MatchOperand_NoMatch;
+
   Parser.Lex(); // Eat identifier token.
-  unsigned RegList = 1 << RegNum;
+  Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+  return MatchOperand_Success;
+}
 
-  int HighRegNum = RegNum;
-  // TODO ranges like "{Rn-Rm}"
-  while (Parser.getTok().is(AsmToken::Comma)) {
-    Parser.Lex(); // Eat comma token.
+/// Parse a register list, return it if successful else return null.  The first
+/// token must be a '{' when called.
+bool ARMAsmParser::
+ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LCurly) &&
+         "Token is not a Left Curly Brace");
+  SMLoc S = Parser.getTok().getLoc();
+
+  // Read the rest of the registers in the list.
+  unsigned PrevRegNum = 0;
+  SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
+
+  do {
+    bool IsRange = Parser.getTok().is(AsmToken::Minus);
+    Parser.Lex(); // Eat non-identifier token.
 
     const AsmToken &RegTok = Parser.getTok();
     SMLoc RegLoc = RegTok.getLoc();
-    if (RegTok.isNot(AsmToken::Identifier))
-      return Error(RegLoc, "register expected");
-    int RegNum = MatchRegisterName(RegTok.getString());
-    if (RegNum == -1)
-      return Error(RegLoc, "register expected");
+    if (RegTok.isNot(AsmToken::Identifier)) {
+      Error(RegLoc, "register expected");
+      return true;
+    }
 
-    if (RegList & (1 << RegNum))
-      Warning(RegLoc, "register duplicated in register list");
-    else if (RegNum <= HighRegNum)
-      Warning(RegLoc, "register not in ascending order in register list");
-    RegList |= 1 << RegNum;
-    HighRegNum = RegNum;
+    int RegNum = TryParseRegister();
+    if (RegNum == -1) {
+      Error(RegLoc, "register expected");
+      return true;
+    }
 
-    Parser.Lex(); // Eat identifier token.
-  }
+    if (IsRange) {
+      int Reg = PrevRegNum;
+      do {
+        ++Reg;
+        Registers.push_back(std::make_pair(Reg, RegLoc));
+      } while (Reg != RegNum);
+    } else {
+      Registers.push_back(std::make_pair(RegNum, RegLoc));
+    }
+
+    PrevRegNum = RegNum;
+  } while (Parser.getTok().is(AsmToken::Comma) ||
+           Parser.getTok().is(AsmToken::Minus));
+
+  // Process the right curly brace of the list.
   const AsmToken &RCurlyTok = Parser.getTok();
-  if (RCurlyTok.isNot(AsmToken::RCurly))
-    return Error(RCurlyTok.getLoc(), "'}' expected");
-  E = RCurlyTok.getLoc();
-  Parser.Lex(); // Eat left curly brace token.
+  if (RCurlyTok.isNot(AsmToken::RCurly)) {
+    Error(RCurlyTok.getLoc(), "'}' expected");
+    return true;
+  }
+
+  SMLoc E = RCurlyTok.getLoc();
+  Parser.Lex(); // Eat right curly brace token.
+
+  // Verify the register list.
+  SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+    RI = Registers.begin(), RE = Registers.end();
+
+  unsigned HighRegNum = getARMRegisterNumbering(RI->first);
+  bool EmittedWarning = false;
+
+  DenseMap<unsigned, bool> RegMap;
+  RegMap[HighRegNum] = true;
 
+  for (++RI; RI != RE; ++RI) {
+    const std::pair<unsigned, SMLoc> &RegInfo = *RI;
+    unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+
+    if (RegMap[Reg]) {
+      Error(RegInfo.second, "register duplicated in register list");
+      return true;
+    }
+
+    if (!EmittedWarning && Reg < HighRegNum)
+      Warning(RegInfo.second,
+              "register not in ascending order in register list");
+
+    RegMap[Reg] = true;
+    HighRegNum = std::max(Reg, HighRegNum);
+  }
+
+  Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
   return false;
 }
 
-/// Parse an arm memory expression, return false if successful else return true
+/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef OptStr = Tok.getString();
+
+  unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+    .Case("sy",    ARM_MB::SY)
+    .Case("st",    ARM_MB::ST)
+    .Case("ish",   ARM_MB::ISH)
+    .Case("ishst", ARM_MB::ISHST)
+    .Case("nsh",   ARM_MB::NSH)
+    .Case("nshst", ARM_MB::NSHST)
+    .Case("osh",   ARM_MB::OSH)
+    .Case("oshst", ARM_MB::OSHST)
+    .Default(~0U);
+
+  if (Opt == ~0U)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef IFlagsStr = Tok.getString();
+
+  unsigned IFlags = 0;
+  for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+    unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+    .Case("a", ARM_PROC::A)
+    .Case("i", ARM_PROC::I)
+    .Case("f", ARM_PROC::F)
+    .Default(~0U);
+
+    // If some specific iflag is already set, it means that some letter is
+    // present more than once, this is not acceptable.
+    if (Flag == ~0U || (IFlags & Flag))
+      return MatchOperand_NoMatch;
+
+    IFlags |= Flag;
+  }
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef Mask = Tok.getString();
+
+  // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+  size_t Start = 0, Next = Mask.find('_');
+  StringRef Flags = "";
+  StringRef SpecReg = Mask.slice(Start, Next);
+  if (Next != StringRef::npos)
+    Flags = Mask.slice(Next+1, Mask.size());
+
+  // FlagsVal contains the complete mask:
+  // 3-0: Mask
+  // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+  unsigned FlagsVal = 0;
+
+  if (SpecReg == "apsr") {
+    FlagsVal = StringSwitch<unsigned>(Flags)
+    .Case("nzcvq",  0x8) // same as CPSR_c
+    .Case("g",      0x4) // same as CPSR_s
+    .Case("nzcvqg", 0xc) // same as CPSR_fs
+    .Default(~0U);
+
+    if (FlagsVal == ~0U) {
+      if (!Flags.empty())
+        return MatchOperand_NoMatch;
+      else
+        FlagsVal = 0; // No flag
+    }
+  } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+    for (int i = 0, e = Flags.size(); i != e; ++i) {
+      unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+      .Case("c", 1)
+      .Case("x", 2)
+      .Case("s", 4)
+      .Case("f", 8)
+      .Default(~0U);
+
+      // If some specific flag is already set, it means that some letter is
+      // present more than once, this is not acceptable.
+      if (FlagsVal == ~0U || (FlagsVal & Flag))
+        return MatchOperand_NoMatch;
+      FlagsVal |= Flag;
+    }
+  } else // No match for special register.
+    return MatchOperand_NoMatch;
+
+  // Special register without flags are equivalent to "fc" flags.
+  if (!FlagsVal)
+    FlagsVal = 0x9;
+
+  // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+  if (SpecReg == "spsr")
+    FlagsVal |= 16;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+  return MatchOperand_Success;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
 /// or an error.  The first token must be a '[' when called.
+///
 /// TODO Only preindexing and postindexing addressing are started, unindexed
 /// with option, etc are still to do.
-bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
+bool ARMAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S, E;
   assert(Parser.getTok().is(AsmToken::LBrac) &&
-         "Token is not an Left Bracket");
+         "Token is not a Left Bracket");
   S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat left bracket token.
 
   const AsmToken &BaseRegTok = Parser.getTok();
-  if (BaseRegTok.isNot(AsmToken::Identifier))
-    return Error(BaseRegTok.getLoc(), "register expected");
-  if (MaybeParseRegister(Op, false))
-    return Error(BaseRegTok.getLoc(), "register expected");
-  int BaseRegNum = Op->getReg();
+  if (BaseRegTok.isNot(AsmToken::Identifier)) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return true;
+  }
+  int BaseRegNum = TryParseRegister();
+  if (BaseRegNum == -1) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return true;
+  }
+
+  // The next token must either be a comma or a closing bracket.
+  const AsmToken &Tok = Parser.getTok();
+  if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+    return true;
 
   bool Preindexed = false;
   bool Postindexed = false;
   bool OffsetIsReg = false;
   bool Negative = false;
   bool Writeback = false;
+  ARMOperand *WBOp = 0;
+  int OffsetRegNum = -1;
+  bool OffsetRegShifted = false;
+  enum ShiftType ShiftType = Lsl;
+  const MCExpr *ShiftAmount = 0;
+  const MCExpr *Offset = 0;
 
   // First look for preindexed address forms, that is after the "[Rn" we now
   // have to see if the next token is a comma.
-  const AsmToken &Tok = Parser.getTok();
   if (Tok.is(AsmToken::Comma)) {
     Preindexed = true;
     Parser.Lex(); // Eat comma token.
-    int OffsetRegNum;
-    bool OffsetRegShifted;
-    enum ShiftType ShiftType;
-    const MCExpr *ShiftAmount;
-    const MCExpr *Offset;
-    if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
-                            Offset, OffsetIsReg, OffsetRegNum, E))
+
+    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+                             Offset, OffsetIsReg, OffsetRegNum, E))
       return true;
     const AsmToken &RBracTok = Parser.getTok();
-    if (RBracTok.isNot(AsmToken::RBrac))
-      return Error(RBracTok.getLoc(), "']' expected");
+    if (RBracTok.isNot(AsmToken::RBrac)) {
+      Error(RBracTok.getLoc(), "']' expected");
+      return true;
+    }
     E = RBracTok.getLoc();
     Parser.Lex(); // Eat right bracket token.
 
     const AsmToken &ExclaimTok = Parser.getTok();
     if (ExclaimTok.is(AsmToken::Exclaim)) {
-      E = ExclaimTok.getLoc();
+      WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
+                                     ExclaimTok.getLoc());
       Writeback = true;
       Parser.Lex(); // Eat exclaim token
     }
-    ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
-                          OffsetRegShifted, ShiftType, ShiftAmount,
-                          Preindexed, Postindexed, Negative, Writeback, S, E);
-    return false;
-  }
-  // The "[Rn" we have so far was not followed by a comma.
-  else if (Tok.is(AsmToken::RBrac)) {
-    // This is a post indexing addressing forms, that is a ']' follows after
-    // the "[Rn".
-    Postindexed = true;
-    Writeback = true;
+  } else {
+    // The "[Rn" we have so far was not followed by a comma.
+
+    // If there's anything other than the right brace, this is a post indexing
+    // addressing form.
     E = Tok.getLoc();
     Parser.Lex(); // Eat right bracket token.
 
-    int OffsetRegNum = 0;
-    bool OffsetRegShifted = false;
-    enum ShiftType ShiftType;
-    const MCExpr *ShiftAmount;
-    const MCExpr *Offset;
-
     const AsmToken &NextTok = Parser.getTok();
+
     if (NextTok.isNot(AsmToken::EndOfStatement)) {
-      if (NextTok.isNot(AsmToken::Comma))
-        return Error(NextTok.getLoc(), "',' expected");
+      Postindexed = true;
+      Writeback = true;
+
+      if (NextTok.isNot(AsmToken::Comma)) {
+        Error(NextTok.getLoc(), "',' expected");
+        return true;
+      }
+
       Parser.Lex(); // Eat comma token.
-      if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
-                              ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 
-                              E))
+
+      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
+                               E))
         return true;
     }
+  }
 
-    ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
-                          OffsetRegShifted, ShiftType, ShiftAmount,
-                          Preindexed, Postindexed, Negative, Writeback, S, E);
-    return false;
+  // Force Offset to exist if used.
+  if (!OffsetIsReg) {
+    if (!Offset)
+      Offset = MCConstantExpr::Create(0, getContext());
   }
 
-  return true;
+  Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset,
+                                           OffsetRegNum, OffsetRegShifted,
+                                           ShiftType, ShiftAmount, Preindexed,
+                                           Postindexed, Negative, Writeback,
+                                           S, E));
+  if (WBOp)
+    Operands.push_back(WBOp);
+
+  return false;
 }
 
 /// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
@@ -543,7 +1171,6 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
                                         bool &OffsetIsReg,
                                         int &OffsetRegNum,
                                         SMLoc &E) {
-  OwningPtr<ARMOperand> Op;
   Negative = false;
   OffsetRegShifted = false;
   OffsetIsReg = false;
@@ -559,13 +1186,15 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
   // See if there is a register following the "[Rn," or "[Rn]," we have so far.
   const AsmToken &OffsetRegTok = Parser.getTok();
   if (OffsetRegTok.is(AsmToken::Identifier)) {
-    OffsetIsReg = !MaybeParseRegister(Op, false);
-    if (OffsetIsReg) {
-      E = Op->getEndLoc();
-      OffsetRegNum = Op->getReg();
+    SMLoc CurLoc = OffsetRegTok.getLoc();
+    OffsetRegNum = TryParseRegister();
+    if (OffsetRegNum != -1) {
+      OffsetIsReg = true;
+      E = CurLoc;
     }
   }
-  // If we parsed a register as the offset then their can be a shift after that
+
+  // If we parsed a register as the offset then there can be a shift after that.
   if (OffsetRegNum != -1) {
     // Look for a comma then a shift
     const AsmToken &Tok = Parser.getTok();
@@ -583,7 +1212,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
     const AsmToken &HashTok = Parser.getTok();
     if (HashTok.isNot(AsmToken::Hash))
       return Error(HashTok.getLoc(), "'#' expected");
-    
+
     Parser.Lex(); // Eat hash token.
 
     if (getParser().ParseExpression(Offset))
@@ -597,8 +1226,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
 ///   ( lsl | lsr | asr | ror ) , # shift_amount
 ///   rrx
 /// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ShiftType &St, 
-                              const MCExpr *&ShiftAmount, 
+bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
                               SMLoc &E) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
@@ -636,13 +1264,33 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
 
 /// Parse a arm instruction operand.  For now this parses the operand regardless
 /// of the mnemonic.
-bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
+bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                StringRef Mnemonic) {
   SMLoc S, E;
-  
+
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
   switch (getLexer().getKind()) {
+  default:
+    Error(Parser.getTok().getLoc(), "unexpected token in operand");
+    return true;
   case AsmToken::Identifier:
-    if (!MaybeParseRegister(Op, true))
+    if (!TryParseRegisterWithWriteBack(Operands))
       return false;
+
+    // Fall though for the Identifier case that is not a register or a
+    // special name.
+  case AsmToken::Integer: // things like 1f and 2b as a branch targets
+  case AsmToken::Dot: {   // . as a branch target
     // This was not a register so parse other operands that start with an
     // identifier (like labels) as expressions and create them as immediates.
     const MCExpr *IdVal;
@@ -650,12 +1298,13 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
     if (getParser().ParseExpression(IdVal))
       return true;
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-    ARMOperand::CreateImm(Op, IdVal, S, E);
+    Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
     return false;
+  }
   case AsmToken::LBrac:
-    return ParseMemory(Op);
+    return ParseMemory(Operands);
   case AsmToken::LCurly:
-    return ParseRegisterList(Op);
+    return ParseRegisterList(Operands);
   case AsmToken::Hash:
     // #42 -> immediate.
     // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -665,28 +1314,134 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
     if (getParser().ParseExpression(ImmVal))
       return true;
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-    ARMOperand::CreateImm(Op, ImmVal, S, E);
+    Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
     return false;
-  default:
-    return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+  case AsmToken::Colon: {
+    // ":lower16:" and ":upper16:" expression prefixes
+    // FIXME: Check it's an expression prefix,
+    // e.g. (FOO - :lower16:BAR) isn't legal.
+    ARMMCExpr::VariantKind RefKind;
+    if (ParsePrefix(RefKind))
+      return true;
+
+    const MCExpr *SubExprVal;
+    if (getParser().ParseExpression(SubExprVal))
+      return true;
+
+    const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+                                                   getContext());
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+    return false;
+  }
   }
 }
 
-/// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
-                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  OwningPtr<ARMOperand> Op;
+// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+//  :lower16: and :upper16:.
+bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+  RefKind = ARMMCExpr::VK_ARM_None;
 
-  // Create the leading tokens for the mnemonic, split by '.' characters.
-  size_t Start = 0, Next = Name.find('.');
-  StringRef Head = Name.slice(Start, Next);
+  // :lower16: and :upper16: modifiers
+  assert(getLexer().is(AsmToken::Colon) && "expected a :");
+  Parser.Lex(); // Eat ':'
+
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+    return true;
+  }
+
+  StringRef IDVal = Parser.getTok().getIdentifier();
+  if (IDVal == "lower16") {
+    RefKind = ARMMCExpr::VK_ARM_LO16;
+  } else if (IDVal == "upper16") {
+    RefKind = ARMMCExpr::VK_ARM_HI16;
+  } else {
+    Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+    return true;
+  }
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Colon)) {
+    Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+    return true;
+  }
+  Parser.Lex(); // Eat the last ':'
+  return false;
+}
+
+const MCExpr *
+ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
+                                MCSymbolRefExpr::VariantKind Variant) {
+  // Recurse over the given expression, rebuilding it to apply the given variant
+  // to the leftmost symbol.
+  if (Variant == MCSymbolRefExpr::VK_None)
+    return E;
+
+  switch (E->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle target expr yet");
+  case MCExpr::Constant:
+    llvm_unreachable("Can't handle lower16/upper16 of constant yet");
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
 
-  // Determine the predicate, if any.
+    if (SRE->getKind() != MCSymbolRefExpr::VK_None)
+      return 0;
+
+    return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+  }
+
+  case MCExpr::Unary:
+    llvm_unreachable("Can't handle unary expressions yet");
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
+    const MCExpr *RHS = BE->getRHS();
+    if (!LHS)
+      return 0;
+
+    return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+  return 0;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+static StringRef SplitMnemonic(StringRef Mnemonic,
+                               unsigned &PredicationCode,
+                               bool &CarrySetting,
+                               unsigned &ProcessorIMod) {
+  PredicationCode = ARMCC::AL;
+  CarrySetting = false;
+  ProcessorIMod = 0;
+
+  // Ignore some mnemonics we know aren't predicated forms.
   //
-  // FIXME: We need a way to check whether a prefix supports predication,
-  // otherwise we will end up with an ambiguity for instructions that happen to
-  // end with a predicate name.
-  unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+  // FIXME: Would be nice to autogen this.
+  if (Mnemonic == "teq" || Mnemonic == "vceq" ||
+      Mnemonic == "movs" ||
+      Mnemonic == "svc" ||
+      (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+       Mnemonic == "vmls" || Mnemonic == "vnmls") ||
+      Mnemonic == "vacge" || Mnemonic == "vcge" ||
+      Mnemonic == "vclt" ||
+      Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
+      Mnemonic == "vcle" ||
+      (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
+       Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
+       Mnemonic == "vqdmlal"))
+    return Mnemonic;
+
+  // First, split out any predication code.
+  unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
     .Case("eq", ARMCC::EQ)
     .Case("ne", ARMCC::NE)
     .Case("hs", ARMCC::HS)
@@ -704,44 +1459,268 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
     .Case("al", ARMCC::AL)
     .Default(~0U);
   if (CC != ~0U) {
-    Head = Head.slice(0, Head.size() - 2);
-  } else
-    CC = ARMCC::AL;
+    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+    PredicationCode = CC;
+  }
+
+  // Next, determine if we have a carry setting bit. We explicitly ignore all
+  // the instructions we know end in 's'.
+  if (Mnemonic.endswith("s") &&
+      !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
+        Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
+        Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+        Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
+        Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+    CarrySetting = true;
+  }
+
+  // The "cps" instruction can have a interrupt mode operand which is glued into
+  // the mnemonic. Check if this is the case, split it and parse the imod op
+  if (Mnemonic.startswith("cps")) {
+    // Split out any imod code.
+    unsigned IMod =
+      StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+      .Case("ie", ARM_PROC::IE)
+      .Case("id", ARM_PROC::ID)
+      .Default(~0U);
+    if (IMod != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+      ProcessorIMod = IMod;
+    }
+  }
+
+  return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+                      bool &CanAcceptPredicationCode) {
+  bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb();
+
+  if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+      Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+      Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+      Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+      Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mov" ||
+      Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+      Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
+      Mnemonic == "eor" || Mnemonic == "smlal" || Mnemonic == "mvn") {
+    CanAcceptCarrySet = true;
+  } else {
+    CanAcceptCarrySet = false;
+  }
+
+  if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+      Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+      Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+      Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+      Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
+      Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+    CanAcceptPredicationCode = false;
+  } else {
+    CanAcceptPredicationCode = true;
+  }
 
-  ARMOperand::CreateToken(Op, Head, NameLoc);
-  Operands.push_back(Op.take());
+  if (isThumb)
+    if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+      CanAcceptPredicationCode = false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create the leading tokens for the mnemonic, split by '.' characters.
+  size_t Start = 0, Next = Name.find('.');
+  StringRef Head = Name.slice(Start, Next);
+
+  // Split out the predication code and carry setting flag from the mnemonic.
+  unsigned PredicationCode;
+  unsigned ProcessorIMod;
+  bool CarrySetting;
+  Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
+                       ProcessorIMod);
+
+  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+
+  // Next, add the CCOut and ConditionCode operands, if needed.
+  //
+  // For mnemonics which can ever incorporate a carry setting bit or predication
+  // code, our matching model involves us always generating CCOut and
+  // ConditionCode operands to match the mnemonic "as written" and then we let
+  // the matcher deal with finding the right instruction or generating an
+  // appropriate error.
+  bool CanAcceptCarrySet, CanAcceptPredicationCode;
+  GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+  // Add the carry setting operand, if necessary.
+  //
+  // FIXME: It would be awesome if we could somehow invent a location such that
+  // match errors on this operand would print a nice diagnostic about how the
+  // 's' character in the mnemonic resulted in a CCOut operand.
+  if (CanAcceptCarrySet) {
+    Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+                                               NameLoc));
+  } else {
+    // This mnemonic can't ever accept a carry set, but the user wrote one (or
+    // misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the predication code operand, if necessary.
+  if (CanAcceptPredicationCode) {
+    Operands.push_back(ARMOperand::CreateCondCode(
+                         ARMCC::CondCodes(PredicationCode), NameLoc));
+  } else {
+    // This mnemonic can't ever accept a predication code, but the user wrote
+    // one (or misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the processor imod operand, if necessary.
+  if (ProcessorIMod) {
+    Operands.push_back(ARMOperand::CreateImm(
+          MCConstantExpr::Create(ProcessorIMod, getContext()),
+                                 NameLoc, NameLoc));
+  } else {
+    // This mnemonic can't ever accept a imod, but the user wrote
+    // one (or misspelled another mnemonic).
 
-  ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
-  Operands.push_back(Op.take());
+    // FIXME: Issue a nice error.
+  }
 
   // Add the remaining tokens in the mnemonic.
   while (Next != StringRef::npos) {
     Start = Next;
     Next = Name.find('.', Start + 1);
-    Head = Name.slice(Start, Next);
+    StringRef ExtraToken = Name.slice(Start, Next);
 
-    ARMOperand::CreateToken(Op, Head, NameLoc);
-    Operands.push_back(Op.take());
+    Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
   }
 
   // Read the remaining operands.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
-    OwningPtr<ARMOperand> Op;
-    if (ParseOperand(Op)) return true;
-    Operands.push_back(Op.take());
+    if (ParseOperand(Operands, Head)) {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
 
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();  // Eat the comma.
 
       // Parse and remember the operand.
-      if (ParseOperand(Op)) return true;
-      Operands.push_back(Op.take());
+      if (ParseOperand(Operands, Head)) {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
     }
   }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Parser.EatToEndOfStatement();
+    return TokError("unexpected token in argument list");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
   return false;
 }
 
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  MCInst Inst;
+  unsigned ErrorInfo;
+  MatchResultTy MatchResult, MatchResult2;
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+  if (MatchResult != Match_Success) {
+    // If we get a Match_InvalidOperand it might be some arithmetic instruction
+    // that does not update the condition codes.  So try adding a CCOut operand
+    // with a value of reg0.
+    if (MatchResult == Match_InvalidOperand) {
+      Operands.insert(Operands.begin() + 1,
+                      ARMOperand::CreateCCOut(0,
+                                  ((ARMOperand*)Operands[0])->getStartLoc()));
+      MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+      if (MatchResult2 == Match_Success)
+        MatchResult = Match_Success;
+      else {
+        ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+        Operands.erase(Operands.begin() + 1);
+        delete CCOut;
+      }
+    }
+    // If we get a Match_MnemonicFail it might be some arithmetic instruction
+    // that updates the condition codes if it ends in 's'.  So see if the
+    // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
+    // operand with a value of CPSR.
+    else if(MatchResult == Match_MnemonicFail) {
+      // Get the instruction mnemonic, which is the first token.
+      StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
+      if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
+        // removed the 's' from the mnemonic for matching.
+        StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
+        SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
+        ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+        Operands.erase(Operands.begin());
+        delete OldMnemonic;
+        Operands.insert(Operands.begin(),
+                        ARMOperand::CreateToken(MnemonicNoS, NameLoc));
+        Operands.insert(Operands.begin() + 1,
+                        ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
+        MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+        if (MatchResult2 == Match_Success)
+          MatchResult = Match_Success;
+        else {
+          ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+          Operands.erase(Operands.begin());
+          delete OldMnemonic;
+          Operands.insert(Operands.begin(),
+                          ARMOperand::CreateToken(Mnemonic, NameLoc));
+          ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+          Operands.erase(Operands.begin() + 1);
+          delete CCOut;
+        }
+      }
+    }
+  }
+  switch (MatchResult) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
 /// ParseDirective parses the arm specific directives
 bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getIdentifier();
@@ -771,7 +1750,7 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
-      
+
       // FIXME: Improve diagnostic.
       if (getLexer().isNot(AsmToken::Comma))
         return Error(L, "unexpected token in directive");
@@ -801,16 +1780,16 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
 bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
-    return Error(L, "unexpected token in .syntax directive");
-  StringRef ATTRIBUTE_UNUSED SymbolName = Parser.getTok().getIdentifier();
+    return Error(L, "unexpected token in .thumb_func directive");
+  StringRef Name = Tok.getString();
   Parser.Lex(); // Consume the identifier token.
-
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return Error(L, "unexpected token in directive");
   Parser.Lex();
 
-  // TODO: mark symbol as a thumb symbol
-  // getParser().getStreamer().Emit???();
+  // Mark symbol as a thumb symbol.
+  MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+  getParser().getStreamer().EmitThumbFunc(Func);
   return false;
 }
 
@@ -824,7 +1803,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
   if (Mode == "unified" || Mode == "UNIFIED")
     Parser.Lex();
   else if (Mode == "divided" || Mode == "DIVIDED")
-    Parser.Lex();
+    return Error(L, "'.syntax divided' arm asssembly not supported");
   else
     return Error(L, "unrecognized syntax mode in .syntax directive");
 
@@ -855,8 +1834,21 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
     return Error(Parser.getTok().getLoc(), "unexpected token in directive");
   Parser.Lex();
 
-  // TODO tell the MC streamer the mode
-  // getParser().getStreamer().Emit???();
+  // FIXME: We need to be able switch subtargets at this point so that
+  // MatchInstructionImpl() will work when it gets the AvailableFeatures which
+  // includes Feature_IsThumb or not to match the right instructions.  This is
+  // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine.
+  if (Val == 16){
+    assert(TM.getSubtarget<ARMSubtarget>().isThumb() &&
+	   "switching between arm/thumb not yet suppported via .code 16)");
+    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+  }
+  else{
+    assert(!TM.getSubtarget<ARMSubtarget>().isThumb() &&
+           "switching between thumb/arm not yet suppported via .code 32)");
+    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+   }
+
   return false;
 }
 
@@ -869,4 +1861,6 @@ extern "C" void LLVMInitializeARMAsmParser() {
   LLVMInitializeARMAsmLexer();
 }
 
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
 #include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
deleted file mode 100644
index 8026e7718ca9..000000000000
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ /dev/null
@@ -1,800 +0,0 @@
-//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "ARM.h" // FIXME: FACTOR ENUMS BETTER.
-#include "ARMInstPrinter.h"
-#include "ARMAddressingModes.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
-#define ARMAsmPrinter ARMInstPrinter  // FIXME: REMOVE.
-#include "ARMGenAsmWriter.inc"
-#undef MachineInstr
-#undef ARMAsmPrinter
-
-static unsigned NextReg(unsigned Reg) {
-  switch (Reg) {
-  default:
-    assert(0 && "Unexpected register enum");
-
-  case ARM::D0:
-    return ARM::D1;
-  case ARM::D1:
-    return ARM::D2;
-  case ARM::D2:
-    return ARM::D3;
-  case ARM::D3:
-    return ARM::D4;
-  case ARM::D4:
-    return ARM::D5;
-  case ARM::D5:
-    return ARM::D6;
-  case ARM::D6:
-    return ARM::D7;
-  case ARM::D7:
-    return ARM::D8;
-  case ARM::D8:
-    return ARM::D9;
-  case ARM::D9:
-    return ARM::D10;
-  case ARM::D10:
-    return ARM::D11;
-  case ARM::D11:
-    return ARM::D12;
-  case ARM::D12:
-    return ARM::D13;
-  case ARM::D13:
-    return ARM::D14;
-  case ARM::D14:
-    return ARM::D15;
-  case ARM::D15:
-    return ARM::D16;
-  case ARM::D16:
-    return ARM::D17;
-  case ARM::D17:
-    return ARM::D18;
-  case ARM::D18:
-    return ARM::D19;
-  case ARM::D19:
-    return ARM::D20;
-  case ARM::D20:
-    return ARM::D21;
-  case ARM::D21:
-    return ARM::D22;
-  case ARM::D22:
-    return ARM::D23;
-  case ARM::D23:
-    return ARM::D24;
-  case ARM::D24:
-    return ARM::D25;
-  case ARM::D25:
-    return ARM::D26;
-  case ARM::D26:
-    return ARM::D27;
-  case ARM::D27:
-    return ARM::D28;
-  case ARM::D28:
-    return ARM::D29;
-  case ARM::D29:
-    return ARM::D30;
-  case ARM::D30:
-    return ARM::D31;
-  }
-}
-
-void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
-  // Check for MOVs and print canonical forms, instead.
-  if (MI->getOpcode() == ARM::MOVs) {
-    const MCOperand &Dst = MI->getOperand(0);
-    const MCOperand &MO1 = MI->getOperand(1);
-    const MCOperand &MO2 = MI->getOperand(2);
-    const MCOperand &MO3 = MI->getOperand(3);
-
-    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
-    printSBitModifierOperand(MI, 6, O);
-    printPredicateOperand(MI, 4, O);
-
-    O << '\t' << getRegisterName(Dst.getReg())
-      << ", " << getRegisterName(MO1.getReg());
-
-    if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
-      return;
-
-    O << ", ";
-
-    if (MO2.getReg()) {
-      O << getRegisterName(MO2.getReg());
-      assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
-    } else {
-      O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
-    }
-    return;
-  }
-
-  // A8.6.123 PUSH
-  if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) &&
-      MI->getOperand(0).getReg() == ARM::SP) {
-    const MCOperand &MO1 = MI->getOperand(2);
-    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
-      O << '\t' << "push";
-      printPredicateOperand(MI, 3, O);
-      O << '\t';
-      printRegisterList(MI, 5, O);
-      return;
-    }
-  }
-
-  // A8.6.122 POP
-  if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) &&
-      MI->getOperand(0).getReg() == ARM::SP) {
-    const MCOperand &MO1 = MI->getOperand(2);
-    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
-      O << '\t' << "pop";
-      printPredicateOperand(MI, 3, O);
-      O << '\t';
-      printRegisterList(MI, 5, O);
-      return;
-    }
-  }
-
-  // A8.6.355 VPUSH
-  if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
-      MI->getOperand(0).getReg() == ARM::SP) {
-    const MCOperand &MO1 = MI->getOperand(2);
-    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
-      O << '\t' << "vpush";
-      printPredicateOperand(MI, 3, O);
-      O << '\t';
-      printRegisterList(MI, 5, O);
-      return;
-    }
-  }
-
-  // A8.6.354 VPOP
-  if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
-      MI->getOperand(0).getReg() == ARM::SP) {
-    const MCOperand &MO1 = MI->getOperand(2);
-    if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
-      O << '\t' << "vpop";
-      printPredicateOperand(MI, 3, O);
-      O << '\t';
-      printRegisterList(MI, 5, O);
-      return;
-    }
-  }
-
-  printInstruction(MI, O);
- }
-
-void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O, const char *Modifier) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    if (Modifier && strcmp(Modifier, "dregpair") == 0) {
-      O << '{' << getRegisterName(Reg) << ", "
-               << getRegisterName(NextReg(Reg)) << '}';
-#if 0
-      // FIXME: Breaks e.g. ARM/vmul.ll.
-      assert(0);
-      /*
-      unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0);
-      unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1);
-      O << '{'
-      << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
-      << '}';*/
-#endif
-    } else if (Modifier && strcmp(Modifier, "lane") == 0) {
-      assert(0);
-      /*
-      unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
-      unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1,
-                                               &ARM::DPR_VFP2RegClass);
-      O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
-       */
-    } else {
-      O << getRegisterName(Reg);
-    }
-  } else if (Op.isImm()) {
-    assert((Modifier && !strcmp(Modifier, "call")) ||
-           ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
-    O << '#' << Op.getImm();
-  } else {
-    if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0)
-      llvm_unreachable("Unsupported modifier");
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << *Op.getExpr();
-  }
-}
-
-static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
-                       const MCAsmInfo *MAI) {
-  // Break it up into two parts that make up a shifter immediate.
-  V = ARM_AM::getSOImmVal(V);
-  assert(V != -1 && "Not a valid so_imm value!");
-  
-  unsigned Imm = ARM_AM::getSOImmValImm(V);
-  unsigned Rot = ARM_AM::getSOImmValRot(V);
-  
-  // Print low-level immediate formation info, per
-  // A5.1.3: "Data-processing operands - Immediate".
-  if (Rot) {
-    O << "#" << Imm << ", " << Rot;
-    // Pretty printed version.
-    if (VerboseAsm)
-      O << ' ' << MAI->getCommentString()
-      << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
-  } else {
-    O << "#" << Imm;
-  }
-}
-
-
-/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
-/// immediate in bits 0-7.
-void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  printSOImm(O, MO.getImm(), VerboseAsm, &MAI);
-}
-
-/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
-/// followed by an 'orr' to materialize.
-void ARMInstPrinter::printSOImm2PartOperand(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  // FIXME: REMOVE this method.
-  abort();
-}
-
-// so_reg is a 4-operand unit corresponding to register forms of the A5.1
-// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
-//    REG 0   0           - e.g. R5
-//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
-//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
-void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
-  
-  O << getRegisterName(MO1.getReg());
-  
-  // Print the shift opc.
-  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
-  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
-  if (MO2.getReg()) {
-    O << ' ' << getRegisterName(MO2.getReg());
-    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
-  } else if (ShOpc != ARM_AM::rrx) {
-    O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
-  }
-}
-
-
-void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
-  
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
-    return;
-  }
-  
-  O << "[" << getRegisterName(MO1.getReg());
-  
-  if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
-      O << ", #"
-        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-        << ARM_AM::getAM2Offset(MO3.getImm());
-    O << "]";
-    return;
-  }
-  
-  O << ", "
-    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-    << getRegisterName(MO2.getReg());
-  
-  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
-    O << ", "
-    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
-    << " #" << ShImm;
-  O << "]";
-}  
-
-void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  
-  if (!MO1.getReg()) {
-    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    O << '#'
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-      << ImmOffs;
-    return;
-  }
-  
-  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-    << getRegisterName(MO1.getReg());
-  
-  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
-    O << ", "
-    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
-    << " #" << ShImm;
-}
-
-void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
-  
-  O << '[' << getRegisterName(MO1.getReg());
-  
-  if (MO2.getReg()) {
-    O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
-      << getRegisterName(MO2.getReg()) << ']';
-    return;
-  }
-  
-  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
-    O << ", #"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
-      << ImmOffs;
-  O << ']';
-}
-
-void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  
-  if (MO1.getReg()) {
-    O << (char)ARM_AM::getAM3Op(MO2.getImm())
-    << getRegisterName(MO1.getReg());
-    return;
-  }
-  
-  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  O << '#'
-    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
-    << ImmOffs;
-}
-
-
-void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
-  if (Modifier && strcmp(Modifier, "submode") == 0) {
-    O << ARM_AM::getAMSubModeStr(Mode);
-  } else if (Modifier && strcmp(Modifier, "wide") == 0) {
-    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
-    if (Mode == ARM_AM::ia)
-      O << ".w";
-  } else {
-    printOperand(MI, OpNum, O);
-  }
-}
-
-void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
-    return;
-  }
-  
-  O << "[" << getRegisterName(MO1.getReg());
-  
-  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
-    O << ", #"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
-      << ImmOffs*4;
-  }
-  O << "]";
-}
-
-void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  
-  O << "[" << getRegisterName(MO1.getReg());
-  if (MO2.getImm()) {
-    // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << (MO2.getImm() << 3);
-  }
-  O << "]";
-}
-
-void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  if (MO.getReg() == 0)
-    O << "!";
-  else
-    O << ", " << getRegisterName(MO.getReg());
-}
-
-void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O,
-                                            const char *Modifier) {
-  assert(0 && "FIXME: Implement printAddrModePCOperand");
-}
-
-void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
-                                                    unsigned OpNum,
-                                                    raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  uint32_t v = ~MO.getImm();
-  int32_t lsb = CountTrailingZeros_32(v);
-  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
-  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
-  O << '#' << lsb << ", #" << width;
-}
-
-void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
-                                     raw_ostream &O) {
-  unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_MB::MemBOptToString(val);
-}
-
-void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
-                                          raw_ostream &O) {
-  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
-  ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
-  switch (Opc) {
-  case ARM_AM::no_shift:
-    return;
-  case ARM_AM::lsl:
-    O << ", lsl #";
-    break;
-  case ARM_AM::asr:
-    O << ", asr #";
-    break;
-  default:
-    assert(0 && "unexpected shift opcode for shift immediate operand");
-  }
-  O << ARM_AM::getSORegOffset(ShiftOp);
-}
-
-void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  O << "{";
-  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum) O << ", ";
-    O << getRegisterName(MI->getOperand(i).getReg());
-  }
-  O << "}";
-}
-
-void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  unsigned option = Op.getImm();
-  unsigned mode = option & 31;
-  bool changemode = option >> 5 & 1;
-  unsigned AIF = option >> 6 & 7;
-  unsigned imod = option >> 9 & 3;
-  if (imod == 2)
-    O << "ie";
-  else if (imod == 3)
-    O << "id";
-  O << '\t';
-  if (imod > 1) {
-    if (AIF & 4) O << 'a';
-    if (AIF & 2) O << 'i';
-    if (AIF & 1) O << 'f';
-    if (AIF > 0 && changemode) O << ", ";
-  }
-  if (changemode)
-    O << '#' << mode;
-}
-
-void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  unsigned Mask = Op.getImm();
-  if (Mask) {
-    O << '_';
-    if (Mask & 8) O << 'f';
-    if (Mask & 4) O << 's';
-    if (Mask & 2) O << 'x';
-    if (Mask & 1) O << 'c';
-  }
-}
-
-void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  O << '#';
-  if (Op.getImm() < 0)
-    O << '-' << (-Op.getImm() - 1);
-  else
-    O << Op.getImm();
-}
-
-void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  if (CC != ARMCC::AL)
-    O << ARMCondCodeToString(CC);
-}
-
-void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, 
-                                                    unsigned OpNum,
-                                                    raw_ostream &O) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  O << ARMCondCodeToString(CC);
-}
-
-void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  if (MI->getOperand(OpNum).getReg()) {
-    assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
-           "Expect ARM CPSR register!");
-    O << 's';
-  }
-}
-
-
-
-void ARMInstPrinter::printCPInstOperand(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O,
-                                        const char *Modifier) {
-  // FIXME: remove this.
-  abort();
-}
-
-void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
-                                          raw_ostream &O) {
-  O << MI->getOperand(OpNum).getImm();
-}
-
-
-void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  // FIXME: remove this.
-  abort();
-}
-
-void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
-}
-
-void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  // (3 - the number of trailing zeros) is the number of then / else.
-  unsigned Mask = MI->getOperand(OpNum).getImm();
-  unsigned CondBit0 = Mask >> 4 & 1;
-  unsigned NumTZ = CountTrailingZeros_32(Mask);
-  assert(NumTZ <= 3 && "Invalid IT mask!");
-  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
-    bool T = ((Mask >> Pos) & 1) == CondBit0;
-    if (T)
-      O << 't';
-    else
-      O << 'e';
-  }
-}
-
-void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  O << "[" << getRegisterName(MO1.getReg());
-  O << ", " << getRegisterName(MO2.getReg()) << "]";
-}
-
-void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op,
-                                                  raw_ostream &O,
-                                                  unsigned Scale) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
-
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
-    return;
-  }
-
-  O << "[" << getRegisterName(MO1.getReg());
-  if (MO3.getReg())
-    O << ", " << getRegisterName(MO3.getReg());
-  else if (unsigned ImmOffs = MO2.getImm())
-    O << ", #" << ImmOffs * Scale;
-  O << "]";
-}
-
-void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op,
-                                                 raw_ostream &O) {
-  printThumbAddrModeRI5Operand(MI, Op, O, 1);
-}
-
-void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op,
-                                                 raw_ostream &O) {
-  printThumbAddrModeRI5Operand(MI, Op, O, 2);
-}
-
-void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op,
-                                                 raw_ostream &O) {
-  printThumbAddrModeRI5Operand(MI, Op, O, 4);
-}
-
-void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  O << "[" << getRegisterName(MO1.getReg());
-  if (unsigned ImmOffs = MO2.getImm())
-    O << ", #" << ImmOffs*4;
-  O << "]";
-}
-
-void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum,
-                                     raw_ostream &O) {
-  O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
-  if (MI->getOpcode() == ARM::t2TBH)
-    O << ", lsl #1";
-  O << ']';
-}
-
-// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
-// register with shift forms.
-// REG 0   0           - e.g. R5
-// REG IMM, SH_OPC     - e.g. R5, LSL #3
-void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
-  unsigned Reg = MO1.getReg();
-  O << getRegisterName(Reg);
-
-  // Print the shift opc.
-  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
-  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
-  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
-  if (ShOpc != ARM_AM::rrx)
-    O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
-}
-
-void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  unsigned OffImm = MO2.getImm();
-  if (OffImm)  // Don't print +0.
-    O << ", #" << OffImm;
-  O << "]";
-}
-
-void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm();
-  // Don't print +0.
-  if (OffImm < 0)
-    O << ", #-" << -OffImm;
-  else if (OffImm > 0)
-    O << ", #" << OffImm;
-  O << "]";
-}
-
-void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
-                                                  unsigned OpNum,
-                                                  raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm() / 4;
-  // Don't print +0.
-  if (OffImm < 0)
-    O << ", #-" << -OffImm * 4;
-  else if (OffImm > 0)
-    O << ", #" << OffImm * 4;
-  O << "]";
-}
-
-void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  int32_t OffImm = (int32_t)MO1.getImm();
-  // Don't print +0.
-  if (OffImm < 0)
-    O << "#-" << -OffImm;
-  else if (OffImm > 0)
-    O << "#" << OffImm;
-}
-
-void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  int32_t OffImm = (int32_t)MO1.getImm() / 4;
-  // Don't print +0.
-  if (OffImm < 0)
-    O << "#-" << -OffImm * 4;
-  else if (OffImm > 0)
-    O << "#" << OffImm * 4;
-}
-
-void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
-
-  O << "[" << getRegisterName(MO1.getReg());
-
-  assert(MO2.getReg() && "Invalid so_reg load / store address!");
-  O << ", " << getRegisterName(MO2.getReg());
-
-  unsigned ShAmt = MO3.getImm();
-  if (ShAmt) {
-    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-    O << ", lsl #" << ShAmt;
-  }
-  O << "]";
-}
-
-void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  O << '#' << MI->getOperand(OpNum).getImm();
-}
-
-void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-  O << '#' << MI->getOperand(OpNum).getImm();
-}
-
-void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
-  unsigned EltBits;
-  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << "#0x" << utohexstr(Val);
-}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
deleted file mode 100644
index e5ad0d07e9ba..000000000000
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ /dev/null
@@ -1,118 +0,0 @@
-//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMINSTPRINTER_H
-#define ARMINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-  class MCOperand;
-  
-class ARMInstPrinter : public MCInstPrinter {
-  bool VerboseAsm;
-public:
-  ARMInstPrinter(const MCAsmInfo &MAI, bool verboseAsm)
-    : MCInstPrinter(MAI), VerboseAsm(verboseAsm) {}
-
-  virtual void printInst(const MCInst *MI, raw_ostream &O);
-  
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                    const char *Modifier = 0);
-    
-  void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSOImm2PartOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  
-  void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printAddrMode4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                             const char *Modifier = 0);
-  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                             const char *Modifier = 0);
-  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                              const char *Modifier = 0);
-
-  void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O);
-  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O, unsigned Scale);
-  void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
-  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
-  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O);
-  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                          raw_ostream &O);
-  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-  
-  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O);
-  void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPInstOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                          const char *Modifier);
-  void printJTBlockOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {}
-  void printJT2BlockOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {}
-  void printTBAddrMode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);  
-  // FIXME: Implement.
-  void PrintSpecial(const MCInst *MI, raw_ostream &O, const char *Kind) {}
-};
-  
-}
-
-#endif
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 18645c0864a3..000000000000
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARMAsmPrinter
-  ARMInstPrinter.cpp
-  )
-add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
deleted file mode 100644
index 65d372e44b88..000000000000
--- a/lib/Target/ARM/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMAsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 6b4dee5965d2..d3b8b54e76b8 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -6,6 +6,7 @@ tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
 tablegen(ARMGenInstrNames.inc -gen-instr-enums)
 tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
 tablegen(ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
 tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
 tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(ARMGenDAGISel.inc -gen-dag-isel)
@@ -13,21 +14,28 @@ tablegen(ARMGenFastISel.inc -gen-fast-isel)
 tablegen(ARMGenCallingConv.inc -gen-callingconv)
 tablegen(ARMGenSubtarget.inc -gen-subtarget)
 tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(ARMGenDecoderTables.inc -gen-arm-decoder)
 
 add_llvm_target(ARMCodeGen
+  ARMAsmBackend.cpp
   ARMAsmPrinter.cpp
   ARMBaseInstrInfo.cpp
   ARMBaseRegisterInfo.cpp
   ARMCodeEmitter.cpp
   ARMConstantIslandPass.cpp
   ARMConstantPoolValue.cpp
+  ARMELFWriterInfo.cpp
   ARMExpandPseudoInsts.cpp
   ARMFastISel.cpp
+  ARMFrameLowering.cpp
   ARMGlobalMerge.cpp
+  ARMHazardRecognizer.cpp
   ARMISelDAGToDAG.cpp
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
   ARMJITInfo.cpp
+  ARMMCCodeEmitter.cpp
+  ARMMCExpr.cpp
   ARMLoadStoreOptimizer.cpp
   ARMMCAsmInfo.cpp
   ARMMCInstLower.cpp
@@ -36,15 +44,26 @@ add_llvm_target(ARMCodeGen
   ARMSubtarget.cpp
   ARMTargetMachine.cpp
   ARMTargetObjectFile.cpp
+  MLxExpansionPass.cpp
   NEONMoveFix.cpp
-  NEONPreAllocPass.cpp
   Thumb1InstrInfo.cpp
+  Thumb1FrameLowering.cpp
   Thumb1RegisterInfo.cpp
-  Thumb2HazardRecognizer.cpp
   Thumb2ITBlockPass.cpp
   Thumb2InstrInfo.cpp
   Thumb2RegisterInfo.cpp
   Thumb2SizeReduction.cpp
   )
 
-target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)
+# workaround for hanging compilation on MSVC10
+if( MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE ARMISelLowering.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+
+add_subdirectory(TargetInfo)
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e22028985b46..78d73d3a272b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -39,9 +39,9 @@
 /// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
 /// function for a Thumb instruction.
 ///
-#include "../ARMGenDecoderTables.inc"
+#include "ARMGenDecoderTables.inc"
 
-#include "../ARMGenEDInfo.inc"
+#include "ARMGenEDInfo.inc"
 
 using namespace llvm;
 
@@ -89,7 +89,8 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
       return ARM::BFI;
   }
 
-  // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2.
+  // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+  // A1 & A2.
   // As a result, the decoder fails to deocode USAT properly.
   if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
     return ARM::USAT;
@@ -252,9 +253,6 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
   default:
     return Opcode; // Return unmorphed opcode.
 
-  case ARM::t2LDRDi8:
-    return ARM::t2LDRDpci;
-
   case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
   case ARM::t2LDRi12:     case ARM::t2LDRi8:
   case ARM::t2LDRs:       case ARM::t2LDRT:
@@ -349,36 +347,6 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
   return decodeThumbInstruction(insn);
 }
 
-static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case ARM::t2PLDi12:   case ARM::t2PLDi8:
-  case ARM::t2PLDr:     case ARM::t2PLDs:
-  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
-  case ARM::t2PLDWr:    case ARM::t2PLDWs:
-  case ARM::t2PLIi12:   case ARM::t2PLIi8:
-  case ARM::t2PLIr:     case ARM::t2PLIs:
-    return true;
-  }
-}
-
-static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return 0;
-  case ARM::t2PLDi12:   case ARM::t2PLDi8:
-  case ARM::t2PLDr:     case ARM::t2PLDs:
-    return ARM::t2PLDpci;
-  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
-  case ARM::t2PLDWr:    case ARM::t2PLDWs:
-    return ARM::t2PLDWpci;
-  case ARM::t2PLIi12:   case ARM::t2PLIi8:
-  case ARM::t2PLIr:     case ARM::t2PLIs:
-    return ARM::t2PLIpci;
-  }
-}
-
 //
 // Public interface for the disassembler
 //
@@ -485,11 +453,6 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
   // instructions as well.
   unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
 
-  // A8.6.117/119/120/121.
-  // PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant.
-  if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15)
-    Opcode = T2Morph2Preload2PCI(Opcode);
-
   ARMFormat Format = ARMFormats[Opcode];
   Size = IsThumb2 ? 4 : 2;
 
@@ -568,9 +531,9 @@ static MCDisassembler *createThumbDisassembler(const Target &T) {
   return new ThumbDisassembler;
 }
 
-extern "C" void LLVMInitializeARMDisassembler() { 
+extern "C" void LLVMInitializeARMDisassembler() {
   // Register the disassembler.
-  TargetRegistry::RegisterMCDisassembler(TheARMTarget, 
+  TargetRegistry::RegisterMCDisassembler(TheARMTarget,
                                          createARMDisassembler);
   TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
                                          createThumbDisassembler);
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index 9f493b9aee02..bac68dd9ead0 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -79,22 +79,9 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) {
 }
 
 // Return the register enum Based on RegClass and the raw register number.
-// For DRegPair, see comments below.
 // FIXME: Auto-gened?
-static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
-                                bool DRegPair = false) {
-
-  if (DRegPair && RegClassID == ARM::QPRRegClassID) {
-    // LLVM expects { Dd, Dd+1 } to form a super register; this is not specified
-    // in the ARM Architecture Manual as far as I understand it (A8.6.307).
-    // Therefore, we morph the RegClassID to be the sub register class and don't
-    // subsequently transform the RawRegister encoding when calculating RegNum.
-    //
-    // See also ARMinstPrinter::printOperand() wrt "dregpair" modifier part
-    // where this workaround is meant for.
-    RegClassID = ARM::DPRRegClassID;
-  }
-
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
   // For this purpose, we can treat rGPR as if it were GPR.
   if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
 
@@ -704,8 +691,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
 // MSR/MSRsys: Rm mask=Inst{19-16}
 // BXJ:        Rm
 // MSRi/MSRsysi: so_imm
-// SRSW/SRS: addrmode4:$addr mode_imm
-// RFEW/RFE: addrmode4:$addr Rn
+// SRSW/SRS: ldstm_mode:$amode mode_imm
+// RFEW/RFE: ldstm_mode:$amode Rn
 static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
@@ -733,35 +720,34 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     NumOpsAdded = 1;
     return true;
   }
-  // MSR and MSRsys take one GPR reg Rm, followed by the mask.
-  if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) {
-    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+  // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
+  // bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::MSR) {
+    assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+                                       slice(insn, 19, 16) /* Special Reg */ ));
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRm(insn))));
-    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
     NumOpsAdded = 2;
     return true;
   }
-  // MSRi and MSRsysi take one so_imm operand, followed by the mask.
-  if (Opcode == ARM::MSRi || Opcode == ARM::MSRsysi) {
+  // MSRi take a mask, followed by one so_imm operand. The mask contains the
+  // R Bit in bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::MSRi) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+                                       slice(insn, 19, 16) /* Special Reg */ ));
     // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
     // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
     // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
     unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
     unsigned Imm = insn & 0xFF;
     MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
-    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
     NumOpsAdded = 2;
     return true;
   }
-  // SRSW and SRS requires addrmode4:$addr for ${addr:submode}, followed by the
-  // mode immediate (Inst{4-0}).
   if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
       Opcode == ARM::RFEW || Opcode == ARM::RFE) {
-    // ARMInstPrinter::printAddrMode4Operand() prints special mode string
-    // if the base register is SP; so don't set ARM::SP.
-    MI.addOperand(MCOperand::CreateReg(0));
     ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
     MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
 
@@ -807,9 +793,8 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 }
 
 // Misc. Branch Instructions.
-// BR_JTadd, BR_JTr, BR_JTm
 // BLXr9, BXr9
-// BRIND, BX_RET
+// BX, BX_RET
 static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
@@ -820,12 +805,12 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   OpIdx = 0;
 
-  // BX_RET has only two predicate operands, do an early return.
-  if (Opcode == ARM::BX_RET)
+  // BX_RET and MOVPCLR have only two predicate operands; do an early return.
+  if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
     return true;
 
-  // BLXr9 and BRIND take one GPR reg.
-  if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) {
+  // BLXr9 and BX take one GPR reg.
+  if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) {
     assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
            "Reg operand expected");
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -834,72 +819,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     return true;
   }
 
-  // BR_JTadd is an ADD with Rd = PC, (Rn, Rm) as the target and index regs.
-  if (Opcode == ARM::BR_JTadd) {
-    // InOperandList with GPR:$target and GPR:$idx regs.
-
-    assert(NumOps == 4 && "Expect 4 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRn(insn))));
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRm(insn))));
-
-    // Fill in the two remaining imm operands to signify build completion.
-    MI.addOperand(MCOperand::CreateImm(0));
-    MI.addOperand(MCOperand::CreateImm(0));
-
-    OpIdx = 4;
-    return true;
-  }
-
-  // BR_JTr is a MOV with Rd = PC, and Rm as the source register.
-  if (Opcode == ARM::BR_JTr) {
-    // InOperandList with GPR::$target reg.
-
-    assert(NumOps == 3 && "Expect 3 operands");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRm(insn))));
-
-    // Fill in the two remaining imm operands to signify build completion.
-    MI.addOperand(MCOperand::CreateImm(0));
-    MI.addOperand(MCOperand::CreateImm(0));
-
-    OpIdx = 3;
-    return true;
-  }
-
-  // BR_JTm is an LDR with Rt = PC.
-  if (Opcode == ARM::BR_JTm) {
-    // This is the reg/reg form, with base reg followed by +/- reg shop imm.
-    // See also ARMAddressingModes.h (Addressing Mode #2).
-
-    assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1");
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRn(insn))));
-
-    ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
-
-    // Disassemble the offset reg (Rm), shift type, and immediate shift length.
-    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                       decodeRm(insn))));
-    // Inst{6-5} encodes the shift opcode.
-    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
-    // Inst{11-7} encodes the imm5 shift amount.
-    unsigned ShImm = slice(insn, 11, 7);
-
-    // A8.4.1.  Possible rrx or shift amount of 32...
-    getImmShiftSE(ShOp, ShImm);
-    MI.addOperand(MCOperand::CreateImm(
-                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
-
-    // Fill in the two remaining imm operands to signify build completion.
-    MI.addOperand(MCOperand::CreateImm(0));
-    MI.addOperand(MCOperand::CreateImm(0));
-
-    OpIdx = 5;
-    return true;
-  }
-
   return false;
 }
 
@@ -1324,30 +1243,28 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
-
-  unsigned &OpIdx = NumOpsAdded;
-
-  OpIdx = 0;
+  NumOpsAdded = 0;
 
   unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base, if necessary.
-  if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) {
+  if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
+      Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
+      Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
+      Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
     MI.addOperand(MCOperand::CreateReg(Base));
-    ++OpIdx;
+    ++NumOpsAdded;
   }
 
+  // Add the base register operand.
   MI.addOperand(MCOperand::CreateReg(Base));
 
-  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
-  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-
   // Handling the two predicate operands before the reglist.
   int64_t CondVal = insn >> ARMII::CondShift;
   MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
   MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
 
-  OpIdx += 4;
+  NumOpsAdded += 3;
 
   // Fill the variadic part of reglist.
   unsigned RegListBits = insn & ((1 << 16) - 1);
@@ -1355,7 +1272,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     if ((RegListBits >> i) & 1) {
       MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
-      ++OpIdx;
+      ++NumOpsAdded;
     }
   }
 
@@ -1586,8 +1503,7 @@ static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
 }
 
 // A7.5.1
-#if 0
-static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
   assert(N == 32 || N == 64);
 
   uint64_t Result;
@@ -1602,13 +1518,12 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
     Result = (uint64_t)slice(byte, 7, 7) << 63 |
              (uint64_t)slice(byte, 5, 0) << 48;
     if (bit6)
-      Result |= 0xffL << 54;
+      Result |= 0xffULL << 54;
     else
-      Result |= 0x1L << 62;
+      Result |= 0x1ULL << 62;
   }
-  return Result;
+  return APInt(N, Result);
 }
-#endif
 
 // VFP Unary Format Instructions:
 //
@@ -1902,8 +1817,10 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base, if necessary.
-  if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD ||
-      Opcode == ARM::VSTMD_UPD || Opcode == ARM::VSTMS_UPD) {
+  if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
+      Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
+      Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
+      Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
     MI.addOperand(MCOperand::CreateReg(Base));
     ++OpIdx;
   }
@@ -1926,8 +1843,10 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   OpIdx += 4;
 
-  bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
-                  Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
+  bool isSPVFP = (Opcode == ARM::VLDMSIA     || Opcode == ARM::VLDMSDB     ||
+                  Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
+                  Opcode == ARM::VSTMSIA     || Opcode == ARM::VSTMSDB     ||
+                  Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
   unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
 
   // Extract Dd/Sd.
@@ -1985,10 +1904,14 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   // Extract/decode the f64/f32 immediate.
   if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
         && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
-    // The asm syntax specifies the before-expanded <imm>.
-    // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
-    //                  Opcode == ARM::FCONSTD ? 64 : 32)
-    MI.addOperand(MCOperand::CreateImm(slice(insn,19,16)<<4 | slice(insn,3,0)));
+    // The asm syntax specifies the floating point value, not the 8-bit literal.
+    APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+                             Opcode == ARM::FCONSTD ? 64 : 32);
+    APFloat immFP = APFloat(immRaw, true);
+    double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+      immFP.convertToFloat();
+    MI.addOperand(MCOperand::CreateFPImm(imm));
+
     ++OpIdx;
   }
 
@@ -2201,22 +2124,6 @@ static unsigned decodeN3VImm(uint32_t insn) {
   return (insn >> 8) & 0xF;
 }
 
-static bool UseDRegPair(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case ARM::VLD1q8_UPD:
-  case ARM::VLD1q16_UPD:
-  case ARM::VLD1q32_UPD:
-  case ARM::VLD1q64_UPD:
-  case ARM::VST1q8_UPD:
-  case ARM::VST1q16_UPD:
-  case ARM::VST1q32_UPD:
-  case ARM::VST1q64_UPD:
-    return true;
-  }
-}
-
 // VLD*
 //   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
 // VLD*LN*
@@ -2243,10 +2150,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // We have homogeneous NEON registers for Load/Store.
   unsigned RegClass = 0;
-  bool DRegPair = UseDRegPair(Opcode);
 
   // Double-spaced registers have increments of 2.
-  unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
+  unsigned Inc = DblSpaced ? 2 : 1;
 
   unsigned Rn = decodeRn(insn);
   unsigned Rm = decodeRm(insn);
@@ -2292,7 +2198,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     RegClass = OpInfo[OpIdx].RegClass;
     while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(B, RegClass, Rd, DRegPair)));
+                      getRegisterEnum(B, RegClass, Rd)));
       Rd += Inc;
       ++OpIdx;
     }
@@ -2311,7 +2217,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
 
     while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
       MI.addOperand(MCOperand::CreateReg(
-                      getRegisterEnum(B, RegClass, Rd, DRegPair)));
+                      getRegisterEnum(B, RegClass, Rd)));
       Rd += Inc;
       ++OpIdx;
     }
@@ -2771,8 +2677,8 @@ static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
                                   N3V_VectorShift, B);
 }
-static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
-    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
   return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
                                   N3V_VectorExtract, B);
@@ -2959,9 +2865,9 @@ static inline bool MemBarrierInstr(uint32_t insn) {
 
 static inline bool PreLoadOpcode(unsigned Opcode) {
   switch(Opcode) {
-  case ARM::PLDi:  case ARM::PLDr:
-  case ARM::PLDWi: case ARM::PLDWr:
-  case ARM::PLIi:  case ARM::PLIr:
+  case ARM::PLDi12:  case ARM::PLDrs:
+  case ARM::PLDWi12: case ARM::PLDWrs:
+  case ARM::PLIi12:  case ARM::PLIrs:
     return true;
   default:
     return false;
@@ -2971,18 +2877,21 @@ static inline bool PreLoadOpcode(unsigned Opcode) {
 static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  // Preload Data/Instruction requires either 2 or 4 operands.
-  // PLDi, PLDWi, PLIi:                Rn [+/-]imm12 add = (U == '1')
-  // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc
+  // Preload Data/Instruction requires either 2 or 3 operands.
+  // PLDi, PLDWi, PLIi:                addrmode_imm12
+  // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg
 
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
 
-  if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) {
+  if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
+      || Opcode == ARM::PLIi12) {
     unsigned Imm12 = slice(insn, 11, 0);
     bool Negative = getUBit(insn) == 0;
-    int Offset = Negative ? -1 - Imm12 : 1 * Imm12;
-    MI.addOperand(MCOperand::CreateImm(Offset));
+    // -0 is represented specially. All other values are as normal.
+    if (Imm12 == 0 && Negative)
+      Imm12 = INT32_MIN;
+    MI.addOperand(MCOperand::CreateImm(Imm12));
     NumOpsAdded = 2;
   } else {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -3026,22 +2935,36 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   case ARM::WFE:
   case ARM::WFI:
   case ARM::SEV:
-  case ARM::SETENDBE:
-  case ARM::SETENDLE:
     return true;
   default:
     break;
   }
 
-  // CPS has a singleton $opt operand that contains the following information:
-  // opt{4-0} = mode from Inst{4-0}
-  // opt{5} = changemode from Inst{17}
-  // opt{8-6} = AIF from Inst{8-6}
-  // opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-  if (Opcode == ARM::CPS) {
-    unsigned Option = slice(insn, 4, 0) | slice(insn, 17, 17) << 5 |
-      slice(insn, 8, 6) << 6 | slice(insn, 19, 18) << 9;
-    MI.addOperand(MCOperand::CreateImm(Option));
+  if (Opcode == ARM::SETEND) {
+    NumOpsAdded = 1;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+    return true;
+  }
+
+  // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+  // opcodes which match the same real instruction. This is needed since there's
+  // no current handling of optional arguments. Fix here when a better handling
+  // of optional arguments is implemented.
+  if (Opcode == ARM::CPS3p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));   // mode
+    NumOpsAdded = 3;
+    return true;
+  }
+  if (Opcode == ARM::CPS2p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::CPS1p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
     NumOpsAdded = 1;
     return true;
   }
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..b23dd6ba57ef
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -0,0 +1,14 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMDisassembler
+  ARMDisassembler.cpp
+  ARMDisassemblerCore.cpp
+  )
+# workaround for hanging compilation on MSVC8, 9 and 10
+if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE ARMDisassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+add_dependencies(LLVMARMDisassembler ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 112817b13cf9..23372e022414 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -564,6 +564,38 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
 // t_addrmode_sp := sp + imm8 * 4
 //
 
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass < 0 &&
+         "Expect >= 2 operands, first as reg, and second as imm operand");
+
+  // Build the register operand, followed by the (+/-)imm12 immediate.
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+
 // A6.2.4 Load/store single data item
 //
 // Load/Store Register (reg|imm):      tRd tRn imm5 tRm
@@ -796,14 +828,13 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // CPS has a singleton $opt operand that contains the following information:
-  // opt{4-0} = don't care
-  // opt{5} = 0 (false)
-  // opt{8-6} = AIF from Inst{2-0}
-  // opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
+  // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
+  // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
+  // default one. The second get the AIF flags from Inst{2-0}.
   if (Opcode == ARM::tCPS) {
-    unsigned Option = slice(insn, 2, 0) << 6 | slice(insn, 4, 4) << 9 | 1 << 10;
-    MI.addOperand(MCOperand::CreateImm(Option));
-    NumOpsAdded = 1;
+    MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
+    NumOpsAdded = 2;
     return true;
   }
 
@@ -833,40 +864,32 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
 // A8.6.53  LDM / LDMIA
 // A8.6.189 STM / STMIA
 //
-// tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
-// tLDM:              tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA:                tRt AM4ModeImm Pred-Imm Pred-CCR register_list
 static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
-  assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD ||
-          Opcode == ARM::tSTM_UPD) && "Unexpected opcode");
-
-  unsigned &OpIdx = NumOpsAdded;
+                                     uint32_t insn, unsigned short NumOps,
+                                     unsigned &NumOpsAdded, BO B) {
+  assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
+          Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
 
   unsigned tRt = getT1tRt(insn);
-
-  OpIdx = 0;
+  NumOpsAdded = 0;
 
   // WB register, if necessary.
-  if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) {
+  if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        tRt)));
-    ++OpIdx;
+    ++NumOpsAdded;
   }
 
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      tRt)));
-  ++OpIdx;
-
-  // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1
-  // A8.6.53 STM / STMIA / STMEA - Encoding T1
-  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)));
-  ++OpIdx;
+  ++NumOpsAdded;
 
   // Handling the two predicate operands before the reglist.
-  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
-    OpIdx += 2;
-  else {
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+    NumOpsAdded += 2;
+  } else {
     DEBUG(errs() << "Expected predicate operands not found.\n");
     return false;
   }
@@ -874,13 +897,12 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
   unsigned RegListBits = slice(insn, 7, 0);
 
   // Fill the variadic part of reglist.
-  for (unsigned i = 0; i < 8; ++i) {
+  for (unsigned i = 0; i < 8; ++i)
     if ((RegListBits >> i) & 1) {
       MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
                                                          i)));
-      ++OpIdx;
+      ++NumOpsAdded;
     }
-  }
 
   return true;
 }
@@ -959,22 +981,23 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
 // corresponding to op.
 //
 // Table A6-1 16-bit Thumb instruction encoding (abridged)
-// op		Instruction or instruction class
-// ------	--------------------------------------------------------------------
-// 00xxxx	Shift (immediate), add, subtract, move, and compare on page A6-7
-// 010000	Data-processing on page A6-8
-// 010001	Special data instructions and branch and exchange on page A6-9
-// 01001x	Load from Literal Pool, see LDR (literal) on page A8-122
-// 0101xx	Load/store single data item on page A6-10
+// op    Instruction or instruction class
+// ------  --------------------------------------------------------------------
+// 00xxxx  Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000  Data-processing on page A6-8
+// 010001  Special data instructions and branch and exchange on page A6-9
+// 01001x  Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx  Load/store single data item on page A6-10
 // 011xxx
 // 100xxx
-// 10100x	Generate PC-relative address, see ADR on page A8-32
-// 10101x	Generate SP-relative address, see ADD (SP plus immediate) on page A8-28
-// 1011xx	Miscellaneous 16-bit instructions on page A6-11
-// 11000x	Store multiple registers, see STM / STMIA / STMEA on page A8-374
-// 11001x	Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
-// 1101xx	Conditional branch, and Supervisor Call on page A6-13
-// 11100x	Unconditional Branch, see B on page A8-44
+// 10100x  Generate PC-relative address, see ADR on page A8-32
+// 10101x  Generate SP-relative address, see ADD (SP plus immediate) on
+//         page A8-28
+// 1011xx  Miscellaneous 16-bit instructions on page A6-11
+// 11000x  Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x  Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx  Conditional branch, and Supervisor Call on page A6-13
+// 11100x  Unconditional Branch, see B on page A8-44
 //
 static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1121,34 +1144,31 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Thumb2RFEOpcode(Opcode))
     return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
-  assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD ||
-          Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD)
+  assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
+          Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
+          Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
+          Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
          && "Unexpected opcode");
   assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
 
-  unsigned &OpIdx = NumOpsAdded;
-
-  OpIdx = 0;
+  NumOpsAdded = 0;
 
   unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
 
   // Writeback to base.
-  if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) {
+  if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
+      Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
     MI.addOperand(MCOperand::CreateReg(Base));
-    ++OpIdx;
+    ++NumOpsAdded;
   }
 
   MI.addOperand(MCOperand::CreateReg(Base));
-  ++OpIdx;
-
-  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
-  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-  ++OpIdx;
+  ++NumOpsAdded;
 
   // Handling the two predicate operands before the reglist.
-  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
-    OpIdx += 2;
-  else {
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+    NumOpsAdded += 2;
+  } else {
     DEBUG(errs() << "Expected predicate operands not found.\n");
     return false;
   }
@@ -1156,13 +1176,12 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
   unsigned RegListBits = insn & ((1 << 16) - 1);
 
   // Fill the variadic part of reglist.
-  for (unsigned i = 0; i < 16; ++i) {
+  for (unsigned i = 0; i < 16; ++i)
     if ((RegListBits >> i) & 1) {
       MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                          i)));
-      ++OpIdx;
+      ++NumOpsAdded;
     }
-  }
 
   return true;
 }
@@ -1260,13 +1279,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
   return true;
 }
 
-// PC-based defined for Codegen, which do not get decoded by design:
-//
-// t2TBB, t2TBH: Rm immDontCare immDontCare
-//
-// Generic version defined for disassembly:
-//
-// t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR
+// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
 static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
@@ -1401,7 +1414,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 //
 // Two register operands: Rs Rn ModImm
 // One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
-// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - {t2MOVi, t2MVNi}
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
 //
 // ModImm = ThumbExpandImm(i:imm3:imm8)
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
@@ -1644,15 +1658,25 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
     break;
   }
 
-  // CPS has a singleton $opt operand that contains the following information:
-  // opt{4-0} = mode from Inst{4-0}
-  // opt{5} = changemode from Inst{8}
-  // opt{8-6} = AIF from Inst{7-5}
-  // opt{10-9} = imod from Inst{10-9} with 0b10 as enable and 0b11 as disable
-  if (Opcode == ARM::t2CPS) {
-    unsigned Option = slice(insn, 4, 0) | slice(insn, 8, 8) << 5 |
-      slice(insn, 7, 5) << 6 | slice(insn, 10, 9) << 9;
-    MI.addOperand(MCOperand::CreateImm(Option));
+  // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+  // opcodes which match the same real instruction. This is needed since there's
+  // no current handling of optional arguments. Fix here when a better handling
+  // of optional arguments is implemented.
+  if (Opcode == ARM::t2CPS3p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5)));  // iflags
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));  // mode
+    NumOpsAdded = 3;
+    return true;
+  }
+  if (Opcode == ARM::t2CPS2p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5)));  // iflags
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::t2CPS1p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
     NumOpsAdded = 1;
     return true;
   }
@@ -1678,11 +1702,13 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
     NumOpsAdded = 1;
     return true;
   }
-  // MSR and MSRsys take one GPR reg Rn, followed by the mask.
-  if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) {
+  // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
+  // bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::t2MSR) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
+                                       slice(insn, 11, 8) /* Special Reg */));
     MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                        decodeRn(insn))));
-    MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
     NumOpsAdded = 2;
     return true;
   }
@@ -1728,12 +1754,12 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
   switch (Opcode) {
   default:
     return false;
-  case ARM::t2PLDi12:   case ARM::t2PLDi8:   case ARM::t2PLDpci:
-  case ARM::t2PLDr:     case ARM::t2PLDs:
-  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:  case ARM::t2PLDWpci:
-  case ARM::t2PLDWr:    case ARM::t2PLDWs:
-  case ARM::t2PLIi12:   case ARM::t2PLIi8:   case ARM::t2PLIpci:
-  case ARM::t2PLIr:     case ARM::t2PLIs:
+  case ARM::t2PLDi12:   case ARM::t2PLDi8:
+  case ARM::t2PLDs:
+  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
+  case ARM::t2PLDWs:
+  case ARM::t2PLIi12:   case ARM::t2PLIi8:
+  case ARM::t2PLIs:
     return true;
   }
 }
@@ -1769,11 +1795,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
            && !OpInfo[OpIdx].isOptionalDef()
            && "Pure imm operand expected");
     int Offset = 0;
-    if (Opcode == ARM::t2PLDpci || Opcode == ARM::t2PLDWpci ||
-             Opcode == ARM::t2PLIpci) {
+    if (slice(insn, 19, 16) == 0xFF) {
       bool Negative = slice(insn, 23, 23) == 0;
       unsigned Imm12 = getImm12(insn);
-      Offset = Negative ? -1 - Imm12 : 1 * Imm12;      
+      Offset = Negative ? -1 - Imm12 : 1 * Imm12;
     } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
                Opcode == ARM::t2PLIi8) {
       // A8.6.117 Encoding T2: add = FALSE
@@ -1795,37 +1820,6 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
   return true;
 }
 
-// A8.6.63 LDRB (literal)
-// A8.6.79 LDRSB (literal)
-// A8.6.75 LDRH (literal)
-// A8.6.83 LDRSH (literal)
-// A8.6.59 LDR (literal)
-//
-// These instrs calculate an address from the PC value and an immediate offset.
-// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
-static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
-    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
-  if (!OpInfo) return false;
-
-  assert(NumOps >= 2 &&
-         OpInfo[0].RegClass == ARM::GPRRegClassID &&
-         OpInfo[1].RegClass < 0 &&
-         "Expect >= 2 operands, first as reg, and second as imm operand");
-
-  // Build the register operand, followed by the (+/-)imm12 immediate.
-
-  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
-                                                     decodeRd(insn))));
-
-  MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
-
-  NumOpsAdded = 2;
-
-  return true;
-}
-
 // A6.3.10 Store single data item
 // A6.3.9 Load byte, memory hints
 // A6.3.8 Load halfword, memory hints
@@ -1835,13 +1829,15 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 //
 // t2LDRi12:   Rd Rn (+)imm12
 // t2LDRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
 // t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 // t2LDR_PRE:  Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 //
 // t2STRi12:   Rd Rn (+)imm12
 // t2STRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
 // t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 // t2STR_PRE:  Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
 //
@@ -1862,7 +1858,6 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
   // See, for example, A6.3.7 Load word: Table A6-18 Load word.
   if (Load && Rn == 15)
     return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
   const TargetInstrDesc &TID = ARMInsts[Opcode];
   const TargetOperandInfo *OpInfo = TID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
@@ -1909,7 +1904,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
     else
       Imm = decodeImm8(insn);
   }
-  
+
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      R0)));
   ++OpIdx;
@@ -2081,25 +2076,29 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 // corresponding to (op1, op2, op).
 //
 // Table A6-9 32-bit Thumb instruction encoding
-// op1	op2		op	Instruction class, see
-// ---	-------	--	------------------------------------------------------------
-// 01	00xx0xx	-	Load/store multiple on page A6-23
-// 		00xx1xx	-	Load/store dual, load/store exclusive, table branch on page A6-24
-// 		01xxxxx	-	Data-processing (shifted register) on page A6-31
-// 		1xxxxxx	-	Coprocessor instructions on page A6-40
-// 10	x0xxxxx	0	Data-processing (modified immediate) on page A6-15
-// 		x1xxxxx	0	Data-processing (plain binary immediate) on page A6-19
-// 		-		1	Branches and miscellaneous control on page A6-20
-// 11	000xxx0	-	Store single data item on page A6-30
-// 		001xxx0	-	Advanced SIMD element or structure load/store instructions on page A7-27
-// 		00xx001 -	Load byte, memory hints on page A6-28
-// 		00xx011	-	Load halfword, memory hints on page A6-26
-// 		00xx101	-	Load word on page A6-25
-// 		00xx111	-	UNDEFINED
-// 		010xxxx	-	Data-processing (register) on page A6-33
-// 		0110xxx	-	Multiply, multiply accumulate, and absolute difference on page A6-38
-// 		0111xxx	-	Long multiply, long multiply accumulate, and divide on page A6-39
-// 		1xxxxxx	-	Coprocessor instructions on page A6-40
+// op1  op2    op  Instruction class, see
+// ---  -------  --  -----------------------------------------------------------
+// 01  00xx0xx  -  Load/store multiple on page A6-23
+//     00xx1xx  -  Load/store dual, load/store exclusive, table branch on
+//                 page A6-24
+//     01xxxxx  -  Data-processing (shifted register) on page A6-31
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
+// 10  x0xxxxx  0  Data-processing (modified immediate) on page A6-15
+//     x1xxxxx  0  Data-processing (plain binary immediate) on page A6-19
+//         -    1  Branches and miscellaneous control on page A6-20
+// 11  000xxx0  -  Store single data item on page A6-30
+//     001xxx0  -  Advanced SIMD element or structure load/store instructions
+//                 on page A7-27
+//     00xx001  - Load byte, memory hints on page A6-28
+//     00xx011  -  Load halfword, memory hints on page A6-26
+//     00xx101  -  Load word on page A6-25
+//     00xx111  -  UNDEFINED
+//     010xxxx  -  Data-processing (register) on page A6-33
+//     0110xxx  -  Multiply, multiply accumulate, and absolute difference on
+//                 page A6-38
+//     0111xxx  -  Long multiply, long multiply accumulate, and divide on
+//                 page A6-39
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
 //
 static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
     MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
@@ -2130,7 +2129,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
         return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
                                          B);
       }
-      if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) {
+      if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
         // Table branch.
         return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
       }
@@ -2175,7 +2174,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
         }
       } else {
         // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
-        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B);
+        return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+                                     NumOpsAdded, B);
       }
       break;
     case 1:
@@ -2229,7 +2229,7 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // A6.3 32-bit Thumb instruction encoding
-  
+
   uint16_t op1 = slice(HalfWord, 12, 11);
   uint16_t op2 = slice(HalfWord, 10, 4);
   uint16_t op = slice(insn, 15, 15);
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 000000000000..1499da00ae1c
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,711 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
+#include "ARMAddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "ARMGenAsmWriter.inc"
+
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  unsigned Opcode = MI->getOpcode();
+
+  // Check for MOVs and print canonical forms, instead.
+  if (Opcode == ARM::MOVs) {
+    // FIXME: Thumb variants?
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+    const MCOperand &MO3 = MI->getOperand(3);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+    printSBitModifierOperand(MI, 6, O);
+    printPredicateOperand(MI, 4, O);
+
+    O << '\t' << getRegisterName(Dst.getReg())
+      << ", " << getRegisterName(MO1.getReg());
+
+    if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+      return;
+
+    O << ", ";
+
+    if (MO2.getReg()) {
+      O << getRegisterName(MO2.getReg());
+      assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+    } else {
+      O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+    }
+    return;
+  }
+
+  // A8.6.123 PUSH
+  if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "push";
+    printPredicateOperand(MI, 2, O);
+    if (Opcode == ARM::t2STMDB_UPD)
+      O << ".w";
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.122 POP
+  if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "pop";
+    printPredicateOperand(MI, 2, O);
+    if (Opcode == ARM::t2LDMIA_UPD)
+      O << ".w";
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.355 VPUSH
+  if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "vpush";
+    printPredicateOperand(MI, 2, O);
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.354 VPOP
+  if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "vpop";
+    printPredicateOperand(MI, 2, O);
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  printInstruction(MI, O);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
+                       const MCAsmInfo *MAI) {
+  // Break it up into two parts that make up a shifter immediate.
+  V = ARM_AM::getSOImmVal(V);
+  assert(V != -1 && "Not a valid so_imm value!");
+
+  unsigned Imm = ARM_AM::getSOImmValImm(V);
+  unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+  // Print low-level immediate formation info, per
+  // A5.1.3: "Data-processing operands - Immediate".
+  if (Rot) {
+    O << "#" << Imm << ", " << Rot;
+    // Pretty printed version.
+    if (CommentStream)
+      *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
+  } else {
+    O << "#" << Imm;
+  }
+}
+
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+  printSOImm(O, MO.getImm(), CommentStream, &MAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
+//    REG 0   0           - e.g. R5
+//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
+//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << getRegisterName(MO1.getReg());
+
+  // Print the shift opc.
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (MO2.getReg()) {
+    O << ' ' << getRegisterName(MO2.getReg());
+    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+  } else if (ShOpc != ARM_AM::rrx) {
+    O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
+  }
+}
+
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  if (!MO2.getReg()) {
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+      O << ", #"
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+        << ARM_AM::getAM2Offset(MO3.getImm());
+    O << "]";
+    return;
+  }
+
+  O << ", "
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+    << " #" << ShImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+      << ImmOffs;
+    return;
+  }
+
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+    << getRegisterName(MO1.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+    << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << '[' << getRegisterName(MO1.getReg());
+
+  if (MO2.getReg()) {
+    O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << getRegisterName(MO2.getReg()) << ']';
+    return;
+  }
+
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+    O << ", #"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+      << ImmOffs;
+  O << ']';
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (MO1.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << getRegisterName(MO1.getReg());
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+    << ImmOffs;
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+                                                 .getImm());
+  O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+    O << ", #"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+      << ImmOffs * 4;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO2.getImm()) {
+    // FIXME: Both darwin as and GNU as violate ARM docs here.
+    O << ", :" << (MO2.getImm() << 3);
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint32_t v = ~MO.getImm();
+  int32_t lsb = CountTrailingZeros_32(v);
+  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+  O << '#' << lsb << ", #" << width;
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     raw_ostream &O) {
+  unsigned val = MI->getOperand(OpNum).getImm();
+  O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+  ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+  switch (Opc) {
+  case ARM_AM::no_shift:
+    return;
+  case ARM_AM::lsl:
+    O << ", lsl #";
+    break;
+  case ARM_AM::asr:
+    O << ", asr #";
+    break;
+  default:
+    assert(0 && "unexpected shift opcode for shift immediate operand");
+  }
+  O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  O << "{";
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != OpNum) O << ", ";
+    O << getRegisterName(MI->getOperand(i).getReg());
+  }
+  O << "}";
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  if (Op.getImm())
+    O << "be";
+  else
+    O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned IFlags = Op.getImm();
+  for (int i=2; i >= 0; --i)
+    if (IFlags & (1 << i))
+      O << ARM_PROC::IFlagsToString(1 << i);
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned SpecRegRBit = Op.getImm() >> 4;
+  unsigned Mask = Op.getImm() & 0xf;
+
+  if (SpecRegRBit)
+    O << "spsr";
+  else
+    O << "cpsr";
+
+  if (Mask) {
+    O << '_';
+    if (Mask & 8) O << 'f';
+    if (Mask & 4) O << 's';
+    if (Mask & 2) O << 'x';
+    if (Mask & 1) O << 'c';
+  }
+}
+
+void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << '#';
+  if (Op.getImm() < 0)
+    O << '-' << (-Op.getImm() - 1);
+  else
+    O << Op.getImm();
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  if (CC != ARMCC::AL)
+    O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  if (MI->getOperand(OpNum).getReg()) {
+    assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+           "Expect ARM CPSR register!");
+    O << 's';
+  }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
+  unsigned NumTZ = CountTrailingZeros_32(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned RegNum = MO2.getReg())
+    O << ", " << getRegisterName(RegNum);
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+                                                    unsigned Op,
+                                                    raw_ostream &O,
+                                                    unsigned Scale) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs * Scale;
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  O << getRegisterName(Reg);
+
+  // Print the shift opc.
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc != ARM_AM::rrx)
+    O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  bool isSub = OffImm < 0;
+  // Special value for #-0. All others are normal.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << ", #" << OffImm * 4;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+                                                      unsigned OpNum,
+                                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+                                                        unsigned OpNum,
+                                                        raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", " << getRegisterName(MO2.getReg());
+
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl #" << ShAmt;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm()) {
+    O << (float)MO.getFPImm();
+  } else {
+    union {
+      uint32_t I;
+      float F;
+    } FPUnion;
+
+    FPUnion.I = MO.getImm();
+    O << FPUnion.F;
+  }
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm()) {
+    O << MO.getFPImm();
+  } else {
+    // We expect the binary encoding of a floating point number here.
+    union {
+      uint64_t I;
+      double D;
+    } FPUnion;
+
+    FPUnion.I = MO.getImm();
+    O << FPUnion.D;
+  }
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+  unsigned EltBits;
+  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+  O << "#0x" << utohexstr(Val);
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 000000000000..679d3135ea6d
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,111 @@
+//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+  ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+  static const char *getInstructionName(unsigned Opcode);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O, unsigned Scale);
+  void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O);
+  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O);
+  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O);
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O);
+  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+  void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..18645c0864a3
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmPrinter
+  ARMInstPrinter.cpp
+  )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/InstPrinter/Makefile b/lib/Target/ARM/InstPrinter/Makefile
new file mode 100644
index 000000000000..65d372e44b88
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 000000000000..f9e86eb36e04
--- /dev/null
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,321 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+  struct MLxExpansion : public MachineFunctionPass {
+    static char ID;
+    MLxExpansion() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM MLA / MLS expansion pass";
+    }
+
+  private:
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+
+    unsigned MIIdx;
+    MachineInstr* LastMIs[4];
+
+    void clearStack();
+    void pushStack(MachineInstr *MI);
+    MachineInstr *getAccDefMI(MachineInstr *MI) const;
+    unsigned getDefReg(MachineInstr *MI) const;
+    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+    bool FindMLxHazard(MachineInstr *MI) const;
+    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned MulOpc, unsigned AddSubOpc,
+                                bool NegAcc, bool HasLane);
+    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+  };
+  char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+  std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+  MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+  LastMIs[MIIdx] = MI;
+  if (++MIIdx == 4)
+    MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+  // Look past COPY and INSERT_SUBREG instructions to find the
+  // real definition MI. This is important for _sfp instructions.
+  unsigned Reg = MI->getOperand(1).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return 0;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *DefMI = MRI->getVRegDef(Reg);
+  while (true) {
+    if (DefMI->getParent() != MBB)
+      break;
+    if (DefMI->isCopyLike()) {
+      Reg = DefMI->getOperand(1).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        DefMI = MRI->getVRegDef(Reg);
+        continue;
+      }
+    } else if (DefMI->isInsertSubreg()) {
+      Reg = DefMI->getOperand(2).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        DefMI = MRI->getVRegDef(Reg);
+        continue;
+      }
+    }
+    break;
+  }
+  return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+      !MRI->hasOneNonDBGUse(Reg))
+    return Reg;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+  if (UseMI->getParent() != MBB)
+    return Reg;
+
+  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+    Reg = UseMI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+        !MRI->hasOneNonDBGUse(Reg))
+      return Reg;
+    UseMI = &*MRI->use_nodbg_begin(Reg);
+    if (UseMI->getParent() != MBB)
+      return Reg;
+  }
+
+  return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  // FIXME: Detect integer instructions properly.
+  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+  if (Domain == ARMII::DomainVFP) {
+    unsigned Opcode = TID.getOpcode();
+    if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
+        Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+      return false;
+  } else if (Domain == ARMII::DomainNEON) {
+    if (TID.mayStore() || TID.mayLoad())
+      return false;
+  } else {
+    return false;
+  }
+
+  return MI->readsRegister(Reg, TRI);
+  return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
+  if (NumExpand >= ExpandLimit)
+    return false;
+
+  if (ForceExapnd)
+    return true;
+
+  MachineInstr *DefMI = getAccDefMI(MI);
+  if (TII->isFpMLxInstruction(DefMI->getOpcode()))
+    // r0 = vmla
+    // r3 = vmla r0, r1, r2
+    // takes 16 - 17 cycles
+    //
+    // r0 = vmla
+    // r4 = vmul r1, r2
+    // r3 = vadd r0, r4
+    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+    return true;
+
+  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+  // preserves the in-order retirement of the instructions.
+  // Look at the next few instructions, if *most* of them can cause hazards,
+  // then the scheduler can't *fix* this, we'd better break up the VMLA.
+  for (unsigned i = 1; i <= 4; ++i) {
+    int Idx = ((int)MIIdx - i + 4) % 4;
+    MachineInstr *NextMI = LastMIs[Idx];
+    if (!NextMI)
+      continue;
+
+    if (TII->canCauseFpMLxStall(NextMI->getOpcode()))
+      return true;
+
+    // Look for VMLx RAW hazard.
+    if (hasRAWHazard(getDefReg(MI), NextMI))
+      return true;
+  }
+
+  return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned MulOpc, unsigned AddSubOpc,
+                                     bool NegAcc, bool HasLane) {
+  unsigned DstReg = MI->getOperand(0).getReg();
+  bool DstDead = MI->getOperand(0).isDead();
+  unsigned AccReg = MI->getOperand(1).getReg();
+  unsigned Src1Reg = MI->getOperand(2).getReg();
+  unsigned Src2Reg = MI->getOperand(3).getReg();
+  bool Src1Kill = MI->getOperand(2).isKill();
+  bool Src2Kill = MI->getOperand(3).isKill();
+  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+  unsigned NextOp = HasLane ? 5 : 4;
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+  unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+  const TargetInstrDesc &TID1 = TII->get(MulOpc);
+  const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
+  unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
+
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
+    .addReg(Src1Reg, getKillRegState(Src1Kill))
+    .addReg(Src2Reg, getKillRegState(Src2Kill));
+  if (HasLane)
+    MIB.addImm(LaneImm);
+  MIB.addImm(Pred).addReg(PredReg);
+
+  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
+    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+  if (NegAcc) {
+    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+    MIB.addReg(TmpReg, getKillRegState(true))
+       .addReg(AccReg, getKillRegState(AccKill));
+  } else {
+    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+  }
+  MIB.addImm(Pred).addReg(PredReg);
+
+  DEBUG({
+      dbgs() << "Expanding: " << *MI;
+      dbgs() << "  to:\n";
+      MachineBasicBlock::iterator MII = MI;
+      MII = llvm::prior(MII);
+      MachineInstr &MI2 = *MII;
+      MII = llvm::prior(MII);
+      MachineInstr &MI1 = *MII;
+      dbgs() << "    " << MI1;
+      dbgs() << "    " << MI2;
+   });
+
+  MI->eraseFromParent();
+  ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+  bool Changed = false;
+
+  clearStack();
+
+  unsigned Skip = 0;
+  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+  while (MII != E) {
+    MachineInstr *MI = &*MII;
+
+    if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+      ++MII;
+      continue;
+    }
+
+    const TargetInstrDesc &TID = MI->getDesc();
+    if (TID.isBarrier()) {
+      clearStack();
+      Skip = 0;
+      ++MII;
+      continue;
+    }
+
+    unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+    if (Domain == ARMII::DomainGeneral) {
+      if (++Skip == 2)
+        // Assume dual issues of non-VFP / NEON instructions.
+        pushStack(0);
+    } else {
+      Skip = 0;
+
+      unsigned MulOpc, AddSubOpc;
+      bool NegAcc, HasLane;
+      if (!TII->isFpMLxInstruction(TID.getOpcode(),
+                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
+          !FindMLxHazard(MI))
+        pushStack(MI);
+      else {
+        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+        E = MBB.rend(); // May have changed if MI was the 1st instruction.
+        Changed = true;
+        continue;
+      }
+    }
+
+    ++MII;
+  }
+
+  return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+  TRI = Fn.getTarget().getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= ExpandFPMLxInstructions(MBB);
+  }
+
+  return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+  return new MLxExpansion();
+}
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index b3fcfaf6bda7..65a6494986fe 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -18,8 +18,8 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
                 ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
                 ARMGenDecoderTables.inc ARMGenEDInfo.inc \
-                ARMGenFastISel.inc
+                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc
 
-DIRS = AsmPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
deleted file mode 100644
index 3407ac6fe08e..000000000000
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ /dev/null
@@ -1,406 +0,0 @@
-//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "neon-prealloc"
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-using namespace llvm;
-
-namespace {
-  class NEONPreAllocPass : public MachineFunctionPass {
-    const TargetInstrInfo *TII;
-    MachineRegisterInfo *MRI;
-
-  public:
-    static char ID;
-    NEONPreAllocPass() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual const char *getPassName() const {
-      return "NEON register pre-allocation pass";
-    }
-
-  private:
-    bool FormsRegSequence(MachineInstr *MI,
-                          unsigned FirstOpnd, unsigned NumRegs,
-                          unsigned Offset, unsigned Stride) const;
-    bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
-  };
-
-  char NEONPreAllocPass::ID = 0;
-}
-
-static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
-                             unsigned &Offset, unsigned &Stride) {
-  // Default to unit stride with no offset.
-  Stride = 1;
-  Offset = 0;
-
-  switch (Opcode) {
-  default:
-    break;
-
-  case ARM::VLD2LNd8:
-  case ARM::VLD2LNd16:
-  case ARM::VLD2LNd32:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VLD2LNq16:
-  case ARM::VLD2LNq32:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD2LNq16odd:
-  case ARM::VLD2LNq32odd:
-    FirstOpnd = 0;
-    NumRegs = 2;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD3LNd8:
-  case ARM::VLD3LNd16:
-  case ARM::VLD3LNd32:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VLD3LNq16:
-  case ARM::VLD3LNq32:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD3LNq16odd:
-  case ARM::VLD3LNq32odd:
-    FirstOpnd = 0;
-    NumRegs = 3;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD4LNd8:
-  case ARM::VLD4LNd16:
-  case ARM::VLD4LNd32:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VLD4LNq16:
-  case ARM::VLD4LNq32:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VLD4LNq16odd:
-  case ARM::VLD4LNq32odd:
-    FirstOpnd = 0;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST2LNd8:
-  case ARM::VST2LNd16:
-  case ARM::VST2LNd32:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VST2LNq16:
-  case ARM::VST2LNq32:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST2LNq16odd:
-  case ARM::VST2LNq32odd:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST3LNd8:
-  case ARM::VST3LNd16:
-  case ARM::VST3LNd32:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VST3LNq16:
-  case ARM::VST3LNq32:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST3LNq16odd:
-  case ARM::VST3LNq32odd:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4LNd8:
-  case ARM::VST4LNd16:
-  case ARM::VST4LNd32:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VST4LNq16:
-  case ARM::VST4LNq32:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4LNq16odd:
-  case ARM::VST4LNq32odd:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
-  case ARM::VTBL2:
-    FirstOpnd = 1;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VTBL3:
-    FirstOpnd = 1;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VTBL4:
-    FirstOpnd = 1;
-    NumRegs = 4;
-    return true;
-
-  case ARM::VTBX2:
-    FirstOpnd = 2;
-    NumRegs = 2;
-    return true;
-
-  case ARM::VTBX3:
-    FirstOpnd = 2;
-    NumRegs = 3;
-    return true;
-
-  case ARM::VTBX4:
-    FirstOpnd = 2;
-    NumRegs = 4;
-    return true;
-  }
-
-  return false;
-}
-
-bool
-NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
-                                   unsigned FirstOpnd, unsigned NumRegs,
-                                   unsigned Offset, unsigned Stride) const {
-  MachineOperand &FMO = MI->getOperand(FirstOpnd);
-  assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
-  unsigned VirtReg = FMO.getReg();
-  (void)VirtReg;
-  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-         "expected a virtual register");
-
-  unsigned LastSubIdx = 0;
-  if (FMO.isDef()) {
-    MachineInstr *RegSeq = 0;
-    for (unsigned R = 0; R < NumRegs; ++R) {
-      const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-      assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
-      unsigned VirtReg = MO.getReg();
-      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-             "expected a virtual register");
-      // Feeding into a REG_SEQUENCE.
-      if (!MRI->hasOneNonDBGUse(VirtReg))
-        return false;
-      MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
-      if (!UseMI->isRegSequence())
-        return false;
-      if (RegSeq && RegSeq != UseMI)
-        return false;
-      unsigned OpIdx = 1 + (Offset + R * Stride) * 2;
-      if (UseMI->getOperand(OpIdx).getReg() != VirtReg)
-        llvm_unreachable("Malformed REG_SEQUENCE instruction!");
-      unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm();
-      if (LastSubIdx) {
-        if (LastSubIdx != SubIdx-Stride)
-          return false;
-      } else {
-        // Must start from dsub_0 or qsub_0.
-        if (SubIdx != (ARM::dsub_0+Offset) &&
-            SubIdx != (ARM::qsub_0+Offset))
-          return false;
-      }
-      RegSeq = UseMI;
-      LastSubIdx = SubIdx;
-    }
-
-    // In the case of vld3, etc., make sure the trailing operand of
-    // REG_SEQUENCE is an undef.
-    if (NumRegs == 3) {
-      unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2;
-      const MachineOperand &MO = RegSeq->getOperand(OpIdx);
-      unsigned VirtReg = MO.getReg();
-      MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
-      if (!DefMI || !DefMI->isImplicitDef())
-        return false;
-    }
-    return true;
-  }
-
-  unsigned LastSrcReg = 0;
-  SmallVector<unsigned, 4> SubIds;
-  for (unsigned R = 0; R < NumRegs; ++R) {
-    const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-    assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
-    unsigned VirtReg = MO.getReg();
-    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-           "expected a virtual register");
-    // Extracting from a Q or QQ register.
-    MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
-    if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg())
-      return false;
-    VirtReg = DefMI->getOperand(1).getReg();
-    if (LastSrcReg && LastSrcReg != VirtReg)
-      return false;
-    LastSrcReg = VirtReg;
-    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-    if (RC != ARM::QPRRegisterClass &&
-        RC != ARM::QQPRRegisterClass &&
-        RC != ARM::QQQQPRRegisterClass)
-      return false;
-    unsigned SubIdx = DefMI->getOperand(1).getSubReg();
-    if (LastSubIdx) {
-      if (LastSubIdx != SubIdx-Stride)
-        return false;
-    } else {
-      // Must start from dsub_0 or qsub_0.
-      if (SubIdx != (ARM::dsub_0+Offset) &&
-          SubIdx != (ARM::qsub_0+Offset))
-        return false;
-    }
-    SubIds.push_back(SubIdx);
-    LastSubIdx = SubIdx;
-  }
-
-  // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
-  // currently required for correctness. e.g.
-  //  %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
-  //  %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
-  //  %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
-  //  VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
-  // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5
-  // respectively.
-  // We need to change how we model uses of REG_SEQUENCE.
-  for (unsigned R = 0; R < NumRegs; ++R) {
-    MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-    unsigned OldReg = MO.getReg();
-    MachineInstr *DefMI = MRI->getVRegDef(OldReg);
-    assert(DefMI->isCopy());
-    MO.setReg(LastSrcReg);
-    MO.setSubReg(SubIds[R]);
-    MO.setIsKill(false);
-    // Delete the EXTRACT_SUBREG if its result is now dead.
-    if (MRI->use_empty(OldReg))
-      DefMI->eraseFromParent();
-  }
-
-  return true;
-}
-
-bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
-  bool Modified = false;
-
-  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-  for (; MBBI != E; ++MBBI) {
-    MachineInstr *MI = &*MBBI;
-    unsigned FirstOpnd, NumRegs, Offset, Stride;
-    if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
-      continue;
-    if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
-      continue;
-
-    MachineBasicBlock::iterator NextI = llvm::next(MBBI);
-    for (unsigned R = 0; R < NumRegs; ++R) {
-      MachineOperand &MO = MI->getOperand(FirstOpnd + R);
-      assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
-      unsigned VirtReg = MO.getReg();
-      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-             "expected a virtual register");
-
-      // For now, just assign a fixed set of adjacent registers.
-      // This leaves plenty of room for future improvements.
-      static const unsigned NEONDRegs[] = {
-        ARM::D0, ARM::D1, ARM::D2, ARM::D3,
-        ARM::D4, ARM::D5, ARM::D6, ARM::D7
-      };
-      MO.setReg(NEONDRegs[Offset + R * Stride]);
-
-      if (MO.isUse()) {
-        // Insert a copy from VirtReg.
-        BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg())
-          .addReg(VirtReg, getKillRegState(MO.isKill()));
-        MO.setIsKill();
-      } else if (MO.isDef() && !MO.isDead()) {
-        // Add a copy to VirtReg.
-        BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg)
-          .addReg(MO.getReg());
-      }
-    }
-  }
-
-  return Modified;
-}
-
-bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  MRI = &MF.getRegInfo();
-
-  bool Modified = false;
-  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
-       ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    Modified |= PreAllocNEONRegisters(MBB);
-  }
-
-  return Modified;
-}
-
-/// createNEONPreAllocPass - returns an instance of the NEON register
-/// pre-allocation pass.
-FunctionPass *llvm::createNEONPreAllocPass() {
-  return new NEONPreAllocPass();
-}
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 6b605bb0a7cf..463c440852f5 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -68,7 +68,7 @@ LPCRELL0:
 
 //===---------------------------------------------------------------------===//
 
-We compiles the following:
+We compile the following:
 
 define i16 @func_entry_2E_ce(i32 %i) {
         switch i32 %i, label %bb12.exitStub [
@@ -246,3 +246,22 @@ Thumb2.
 Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
 add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes
 won't have to over-estimate. It can also be used for loop alignment pass.
+
+//===---------------------------------------------------------------------===//
+
+We generate conditional code for icmp when we don't need to. This code:
+
+  int foo(int s) {
+    return s == 1;
+  }
+
+produces:
+
+foo:
+        cmp     r0, #1
+        mov.w   r0, #0
+        it      eq
+        moveq   r0, #1
+        bx      lr
+
+when it could use subs + adcs. This is GCC PR46975.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 000000000000..233e16538771
--- /dev/null
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,352 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb1RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, dl);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned BasePtr = RegInfo->getBaseRegister();
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+  NumBytes = DPRCSOffset;
+
+  // Adjust FP so it point to the stack slot that contains the previous FP.
+  if (hasFP(MF)) {
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+    if (NumBytes > 7)
+      // If offset is > 7 then sp cannot be adjusted in a single instruction,
+      // try restoring from fp instead.
+      AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  if (NumBytes)
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+
+  if (STI.isTargetELF() && hasFP(MF))
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  if (RegInfo->hasBasePointer(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
+    
+  // If the frame has variable sized objects then the epilogue must restore
+  // the sp from fp. We can assume there's an FP here since hasFP already
+  // checks for hasVarSizedObjects.
+  if (MFI->hasVarSizedObjects())
+    AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  if (MI->getOpcode() == ARM::tRestore &&
+      MI->getOperand(1).isFI() &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+    return true;
+  else if (MI->getOpcode() == ARM::tPOP) {
+    // The first two operands are predicates. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / VLDRD.
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (AFI->shouldRestoreSPFromFP()) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot, the target is ELF and the function has FP, or
+      // the target uses var sized objects.
+      if (NumBytes) {
+        assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+               "No scratch register to restore SP from FP!");
+        emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
+                                  TII, *RegInfo, dl);
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+          .addReg(ARM::R4);
+      } else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+          .addReg(FramePtr);
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+    // to LR, and we can't pop the value directly to the PC since
+    // we need to update the SP after popping the value. Therefore, we
+    // pop the old LR into R3 as a temporary.
+
+    // Move back past the callee-saved register restoration
+    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+      ++MBBI;
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+      .addReg(ARM::R3, RegState::Define);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill);
+    // erase the old tBX_RET instruction
+    MBB.erase(MBBI);
+  }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+  AddDefaultPred(MIB);
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    bool isKill = true;
+
+    // Add the callee-saved register as live-in unless it's LR and
+    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+    // then it's already added to the function and entry block live-in sets.
+    if (Reg == ARM::LR) {
+      MachineFunction &MF = *MBB.getParent();
+      if (MF.getFrameInfo()->isReturnAddressTaken() &&
+          MF.getRegInfo().isLiveIn(Reg))
+        isKill = false;
+    }
+
+    if (isKill)
+      MBB.addLiveIn(Reg);
+
+    MIB.addReg(Reg, getKillRegState(isKill));
+  }
+  return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+  AddDefaultPred(MIB);
+
+  bool NumRegs = false;
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    MIB.addReg(Reg, getDefRegState(true));
+    NumRegs = true;
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (NumRegs)
+    MBB.insert(MI, &*MIB);
+  else
+    MF.DeleteMachineInstr(MIB);
+
+  return true;
+}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 000000000000..c592e125de17
--- /dev/null
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMM_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+  explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+    : ARMFrameLowering(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index af630ac797c5..3fbb43340c3f 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -71,8 +71,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOStore, 0,
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
@@ -99,85 +100,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOLoad, 0,
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
 }
-
-bool Thumb1InstrInfo::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          const std::vector<CalleeSavedInfo> &CSI,
-                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
-  AddDefaultPred(MIB);
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    bool isKill = true;
-
-    // Add the callee-saved register as live-in unless it's LR and
-    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
-    // then it's already added to the function and entry block live-in sets.
-    if (Reg == ARM::LR) {
-      MachineFunction &MF = *MBB.getParent();
-      if (MF.getFrameInfo()->isReturnAddressTaken() &&
-          MF.getRegInfo().isLiveIn(Reg))
-        isKill = false;
-    }
-
-    if (isKill)
-      MBB.addLiveIn(Reg);
-
-    MIB.addReg(Reg, getKillRegState(isKill));
-  }
-  return true;
-}
-
-bool Thumb1InstrInfo::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI,
-                            const std::vector<CalleeSavedInfo> &CSI,
-                            const TargetRegisterInfo *TRI) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (CSI.empty())
-    return false;
-
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
-  DebugLoc DL = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP));
-  AddDefaultPred(MIB);
-
-  bool NumRegs = false;
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    if (Reg == ARM::LR) {
-      // Special epilogue for vararg functions. See emitEpilogue
-      if (isVarArg)
-        continue;
-      Reg = ARM::PC;
-      (*MIB).setDesc(get(ARM::tPOP_RET));
-      MI = MBB.erase(MI);
-    }
-    MIB.addReg(Reg, getDefRegState(true));
-    NumRegs = true;
-  }
-
-  // It's illegal to emit pop instruction without operands.
-  if (NumRegs)
-    MBB.insert(MI, &*MIB);
-  else
-    MF.DeleteMachineInstr(MIB);
-
-  return true;
-}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 555135a8b76c..17ef2f758ef4 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -37,28 +37,19 @@ public:
   ///
   const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
 
-  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 const TargetRegisterInfo *TRI) const;
-  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   const std::vector<CalleeSavedInfo> &CSI,
-                                   const TargetRegisterInfo *TRI) const;
-
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator I, DebugLoc DL,
                    unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const;
   void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
                            const TargetRegisterClass *RC,
                            const TargetRegisterInfo *TRI) const;
 
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const;
 
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a21a3da10bda..f62a13e3e288 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -29,7 +29,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
@@ -63,24 +63,11 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
 
-  BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRcp))
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
           .addReg(DestReg, getDefRegState(true), SubIdx)
           .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
 }
 
-bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  unsigned CFSize = FFI->getMaxCallFrameSize();
-  // It's not always a good idea to include the call frame as part of the
-  // stack frame. ARM (especially Thumb) has small immediate offset to
-  // address the stack frame. So a large call frame can cause poor codegen
-  // and may even makes it impossible to scavenge a register.
-  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
-    return false;
-
-  return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
 
 /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
 /// a destreg = basereg + immediate in Thumb code. Materialize the immediate
@@ -92,7 +79,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
                               unsigned DestReg, unsigned BaseReg,
                               int NumBytes, bool CanChangeCC,
                               const TargetInstrInfo &TII,
-                              const Thumb1RegisterInfo& MRI,
+                              const ARMBaseRegisterInfo& MRI,
                               DebugLoc dl) {
     MachineFunction &MF = *MBB.getParent();
     bool isHigh = !isARMLowRegister(DestReg) ||
@@ -162,13 +149,12 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
 
 /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
 /// a destreg = basereg + immediate in Thumb code.
-static
-void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator &MBBI,
-                               unsigned DestReg, unsigned BaseReg,
-                               int NumBytes, const TargetInstrInfo &TII,
-                               const Thumb1RegisterInfo& MRI,
-                               DebugLoc dl) {
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &MBBI,
+                                     unsigned DestReg, unsigned BaseReg,
+                                     int NumBytes, const TargetInstrInfo &TII,
+                                     const ARMBaseRegisterInfo& MRI,
+                                     DebugLoc dl) {
   bool isSub = NumBytes < 0;
   unsigned Bytes = (unsigned)NumBytes;
   if (isSub) Bytes = -NumBytes;
@@ -304,7 +290,9 @@ static void emitSPUpdate(MachineBasicBlock &MBB,
 void Thumb1RegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
     // If we have alloca, convert as follows:
     // ADJCALLSTACKDOWN -> sub, sp, sp, amount
     // ADJCALLSTACKUP   -> add, sp, sp, amount
@@ -315,7 +303,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
-      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      unsigned Align = TFI->getStackAlignment();
       Amount = (Amount+Align-1)/Align*Align;
 
       // Replace the pseudo instruction with a new instruction...
@@ -363,6 +351,22 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
     MI.RemoveOperand(Op);
 }
 
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::tLDRspi:
+  case ARM::tRestore:           // FIXME: Should this opcode be here?
+    return ARM::tLDRi;
+
+  case ARM::tSTRspi:
+  case ARM::tSpill:             // FIXME: Should this opcode be here?
+    return ARM::tSTRi;
+  }
+
+  return Opcode;
+}
+
 bool Thumb1RegisterInfo::
 rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
                   unsigned FrameReg, int &Offset,
@@ -464,55 +468,51 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
     }
     return true;
   } else {
-    unsigned ImmIdx = 0;
-    int InstrOffs = 0;
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    switch (AddrMode) {
-    case ARMII::AddrModeT1_s: {
-      ImmIdx = FrameRegIdx+1;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
-      Scale = 4;
-      break;
-    }
-    default:
+    if (AddrMode != ARMII::AddrModeT1_s)
       llvm_unreachable("Unsupported addressing mode!");
-      break;
-    }
+
+    unsigned ImmIdx = FrameRegIdx + 1;
+    int InstrOffs = MI.getOperand(ImmIdx).getImm();
+    unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+    unsigned Scale = 4;
 
     Offset += InstrOffs * Scale;
-    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+    assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
 
     // Common case: small offset, fits into instruction.
     MachineOperand &ImmOp = MI.getOperand(ImmIdx);
     int ImmedOffset = Offset / Scale;
     unsigned Mask = (1 << NumBits) - 1;
+
     if ((unsigned)Offset <= Mask * Scale) {
-      // Replace the FrameIndex with sp
+      // Replace the FrameIndex with the frame register (e.g., sp).
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       ImmOp.ChangeToImmediate(ImmedOffset);
+
+      // If we're using a register where sp was stored, convert the instruction
+      // to the non-SP version.
+      unsigned NewOpc = convertToNonSPOpcode(Opcode);
+      if (NewOpc != Opcode && FrameReg != ARM::SP)
+        MI.setDesc(TII.get(NewOpc));
+
       return true;
     }
 
-    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
-    if (AddrMode == ARMII::AddrModeT1_s) {
-      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
-      // a different base register.
-      NumBits = 5;
-      Mask = (1 << NumBits) - 1;
-    }
+    NumBits = 5;
+    Mask = (1 << NumBits) - 1;
+
     // If this is a thumb spill / restore, we will be using a constpool load to
     // materialize the offset.
-    if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
+    if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) {
       ImmOp.ChangeToImmediate(0);
-    else {
+    } else {
       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
       ImmedOffset = ImmedOffset & Mask;
       ImmOp.ChangeToImmediate(ImmedOffset);
-      Offset &= ~(Mask*Scale);
+      Offset &= ~(Mask * Scale);
     }
   }
+
   return Offset == 0;
 }
 
@@ -602,7 +602,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
     Offset -= AFI->getGPRCalleeSavedArea2Offset();
   else if (MF.getFrameInfo()->hasVarSizedObjects()) {
-    assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+    assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+           "Unexpected");
     // There are alloca()'s in this function, must reference off the frame
     // pointer or base pointer instead.
     if (!hasBasePointer(MF)) {
@@ -655,13 +656,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                 *this, dl);
     }
 
-    MI.setDesc(TII.get(ARM::tLDR));
+    MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
     MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
     if (UseRR)
-      // Use [reg, reg] addrmode.
-      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-    else  // tLDR has an extra register operand.
-      MI.addOperand(MachineOperand::CreateReg(0, false));
+      // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+      // register. The offset is already handled in the vreg value.
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
   } else if (Desc.mayStore()) {
       VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
       bool UseRR = false;
@@ -677,14 +677,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       } else
         emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
                                   *this, dl);
-      MI.setDesc(TII.get(ARM::tSTR));
+      MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
       MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
-      if (UseRR)  // Use [reg, reg] addrmode.
-        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-      else // tSTR has an extra register operand.
-        MI.addOperand(MachineOperand::CreateReg(0, false));
-  } else
+      if (UseRR)
+        // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+        // register. The offset is already handled in the vreg value.
+        MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+  } else {
     assert(false && "Unexpected opcode!");
+  }
 
   // Add predicate back if it's needed.
   if (MI.getDesc().isPredicable()) {
@@ -692,206 +693,3 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     AddDefaultPred(MIB);
   }
 }
-
-void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo  *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  unsigned NumBytes = MFI->getStackSize();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
-  NumBytes = (NumBytes + 3) & ~3;
-  MFI->setStackSize(NumBytes);
-
-  // Determine the sizes of each callee-save spill areas and record which frame
-  // belongs to which callee-save spill areas.
-  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
-  int FramePtrSpillFI = 0;
-
-  if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
-    return;
-  }
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    int FI = CSI[i].getFrameIdx();
-    switch (Reg) {
-    case ARM::R4:
-    case ARM::R5:
-    case ARM::R6:
-    case ARM::R7:
-    case ARM::LR:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      AFI->addGPRCalleeSavedArea1Frame(FI);
-      GPRCS1Size += 4;
-      break;
-    case ARM::R8:
-    case ARM::R9:
-    case ARM::R10:
-    case ARM::R11:
-      if (Reg == FramePtr)
-        FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
-        AFI->addGPRCalleeSavedArea2Frame(FI);
-        GPRCS2Size += 4;
-      } else {
-        AFI->addGPRCalleeSavedArea1Frame(FI);
-        GPRCS1Size += 4;
-      }
-      break;
-    default:
-      AFI->addDPRCalleeSavedAreaFrame(FI);
-      DPRCSSize += 8;
-    }
-  }
-
-  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
-    ++MBBI;
-    if (MBBI != MBB.end())
-      dl = MBBI->getDebugLoc();
-  }
-
-  // Adjust FP so it point to the stack slot that contains the previous FP.
-  if (hasFP(MF)) {
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
-    AFI->setShouldRestoreSPFromFP(true);
-  }
-
-  // Determine starting offsets of spill areas.
-  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
-  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
-  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
-  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
-  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
-  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
-  NumBytes = DPRCSOffset;
-  if (NumBytes) {
-    // Insert it after all the callee-save spills.
-    emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
-  }
-
-  if (STI.isTargetELF() && hasFP(MF))
-    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
-                             AFI->getFramePtrSpillOffset());
-
-  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
-  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
-  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
-
-  // If we need a base pointer, set it up here. It's whatever the value
-  // of the stack pointer is at this point. Any variable size objects
-  // will be allocated after this, so we can still use the base pointer
-  // to reference locals.
-  if (hasBasePointer(MF))
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  if (MI->getOpcode() == ARM::tRestore &&
-      MI->getOperand(1).isFI() &&
-      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
-    return true;
-  else if (MI->getOpcode() == ARM::tPOP) {
-    // The first two operands are predicates. The last two are
-    // imp-def and imp-use of SP. Check everything in between.
-    for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
-      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
-        return false;
-    return true;
-  }
-  return false;
-}
-
-void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert((MBBI->getOpcode() == ARM::tBX_RET ||
-          MBBI->getOpcode() == ARM::tPOP_RET) &&
-         "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
-  int NumBytes = (int)MFI->getStackSize();
-  const unsigned *CSRegs = getCalleeSavedRegs();
-
-  if (!AFI->hasStackFrame()) {
-    if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
-  } else {
-    // Unwind MBBI to point to first LDR / VLDRD.
-    if (MBBI != MBB.begin()) {
-      do
-        --MBBI;
-      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
-      if (!isCSRestore(MBBI, CSRegs))
-        ++MBBI;
-    }
-
-    // Move SP to start of FP callee save spill area.
-    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
-                 AFI->getGPRCalleeSavedArea2Size() +
-                 AFI->getDPRCalleeSavedAreaSize());
-
-    if (AFI->shouldRestoreSPFromFP()) {
-      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-      // Reset SP based on frame pointer only if the stack frame extends beyond
-      // frame pointer stack slot or target is ELF and the function has FP.
-      if (NumBytes)
-        emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
-                                  TII, *this, dl);
-      else
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-          .addReg(FramePtr);
-    } else {
-      if (MBBI->getOpcode() == ARM::tBX_RET &&
-          &MBB.front() != MBBI &&
-          prior(MBBI)->getOpcode() == ARM::tPOP) {
-        MachineBasicBlock::iterator PMBBI = prior(MBBI);
-        emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
-      } else
-        emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
-    }
-  }
-
-  if (VARegSaveSize) {
-    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
-    // to LR, and we can't pop the value directly to the PC since
-    // we need to update the SP after popping the value. Therefore, we
-    // pop the old LR into R3 as a temporary.
-
-    // Move back past the callee-saved register restoration
-    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
-      ++MBBI;
-    // Epilogue for vararg functions: pop LR to R3 and branch off it.
-    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-      .addReg(ARM::R3, RegState::Define);
-
-    emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
-
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
-      .addReg(ARM::R3, RegState::Kill);
-    // erase the old tBX_RET instruction
-    MBB.erase(MBBI);
-  }
-}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index c578054a5d71..8a87cc55c829 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,8 +38,6 @@ public:
                         unsigned PredReg = 0) const;
 
   /// Code Generation virtual methods...
-  bool hasReservedCallFrame(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -59,9 +57,6 @@ public:
                              unsigned Reg) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 };
 }
 
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
deleted file mode 100644
index 172908da228a..000000000000
--- a/lib/Target/ARM/Thumb2HazardRecognizer.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "Thumb2HazardRecognizer.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-using namespace llvm;
-
-ScheduleHazardRecognizer::HazardType
-Thumb2HazardRecognizer::getHazardType(SUnit *SU) {
-  if (ITBlockSize) {
-    MachineInstr *MI = SU->getInstr();
-    if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1])
-      return Hazard;
-  }
-
-  return PostRAHazardRecognizer::getHazardType(SU);
-}
-
-void Thumb2HazardRecognizer::Reset() {
-  ITBlockSize = 0;
-  PostRAHazardRecognizer::Reset();
-}
-
-void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) {
-  MachineInstr *MI = SU->getInstr();
-  unsigned Opcode = MI->getOpcode();
-  if (ITBlockSize) {
-    --ITBlockSize;
-  } else if (Opcode == ARM::t2IT) {
-    unsigned Mask = MI->getOperand(1).getImm();
-    unsigned NumTZ = CountTrailingZeros_32(Mask);
-    assert(NumTZ <= 3 && "Invalid IT mask!");
-    ITBlockSize = 4 - NumTZ;
-    MachineBasicBlock::iterator I = MI;
-    for (unsigned i = 0; i < ITBlockSize; ++i) {
-      // Advance to the next instruction, skipping any dbg_value instructions.
-      do {
-        ++I;
-      } while (I->isDebugValue());
-      ITBlockMIs[ITBlockSize-1-i] = &*I;
-    }
-  }
-
-  PostRAHazardRecognizer::EmitInstruction(SU);
-}
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h
deleted file mode 100644
index 472665862e41..000000000000
--- a/lib/Target/ARM/Thumb2HazardRecognizer.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines hazard recognizers for scheduling Thumb2 functions on
-// ARM processors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef THUMB2HAZARDRECOGNIZER_H
-#define THUMB2HAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
-
-namespace llvm {
-
-class MachineInstr;
-
-class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
-  unsigned ITBlockSize;  // No. of MIs in current IT block yet to be scheduled.
-  MachineInstr *ITBlockMIs[4];
-
-public:
-  Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
-    PostRAHazardRecognizer(ItinData) {}
-
-  virtual HazardType getHazardType(SUnit *SU);
-  virtual void Reset();
-  virtual void EmitInstruction(SUnit *SU);
-};
-
-
-} // end namespace llvm
-
-#endif // THUMB2HAZARDRECOGNIZER_H
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 442f41da8a2d..2f67257f8fa1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -17,7 +17,6 @@
 #include "ARMAddressingModes.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
-#include "Thumb2HazardRecognizer.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,15 +27,10 @@
 
 using namespace llvm;
 
-static cl::opt<unsigned>
-IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden,
-           cl::desc("Thumb2 if-conversion limit (default 3)"),
-           cl::init(3));
-
-static cl::opt<unsigned>
-IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden,
-                  cl::desc("Thumb2 diamond if-conversion limit (default 3)"),
-                  cl::init(3));
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+           cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+           cl::init(false));
 
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
   : ARMBaseInstrInfo(STI), RI(*this, STI) {
@@ -105,21 +99,6 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
   return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
 }
 
-bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                          unsigned NumInstrs) const {
-  return NumInstrs && NumInstrs <= IfCvtLimit;
-}
-  
-bool Thumb2InstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
-                    MachineBasicBlock &FMBB, unsigned NumF) const {
-  // FIXME: Catch optimization such as:
-  //        r0 = movne
-  //        r0 = moveq
-  return NumT && NumF &&
-    NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit);
-}
-
 void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
@@ -155,8 +134,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOStore, 0,
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
@@ -181,8 +161,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              MachineMemOperand::MOLoad, 0,
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
@@ -193,11 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
 }
 
-ScheduleHazardRecognizer *Thumb2InstrInfo::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
-  return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
-}
-
 void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                                unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 3a9f8b194d3c..f2637d7fbcab 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -38,11 +38,6 @@ public:
   bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI) const;
 
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const;
-  
-  bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
-                           MachineBasicBlock &FMBB, unsigned NumFInstrs) const;
-
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator I, DebugLoc DL,
                    unsigned DestReg, unsigned SrcReg,
@@ -70,9 +65,6 @@ public:
   /// always be able to get register info as well (through this method).
   ///
   const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
-
-  ScheduleHazardRecognizer *
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
 };
 
 /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 07dd0be078d7..099b8f724140 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0c3962dd123d..cc8f61cd72a4 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -58,7 +58,7 @@ namespace {
     { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
     { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
     // Note: immediate scale is 4.
-    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 0 },
+    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 1 },
     { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 1 },
     { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 1 },
     { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
@@ -68,9 +68,7 @@ namespace {
     //FIXME: Disable CMN, as CCodes are backwards from compare expectations
     //{ ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  2,0, 0 },
     { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0 },
-    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0 },
-    { ARM::t2CMPzri,ARM::tCMPzi8, 0,             8,   0,    1,   0,  2,0, 0 },
-    { ARM::t2CMPzrr,ARM::tCMPzhir,0,             0,   0,    0,   0,  2,0, 0 },
+    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 1 },
     { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
     // FIXME: adr.n immediate offset must be multiple of 4.
     //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,     0,   0,    1,   0,  1,0, 0 },
@@ -106,26 +104,27 @@ namespace {
 
     // FIXME: Clean this up after splitting each Thumb load / store opcode
     // into multiple ones.
-    { ARM::t2LDRi12,ARM::tLDR,    ARM::tLDRspi,  5,   8,    1,   0,  0,0, 1 },
-    { ARM::t2LDRs,  ARM::tLDR,    0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRBi12,ARM::tLDRB,  0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRBs, ARM::tLDRB,   0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRHi12,ARM::tLDRH,  0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2LDRHs, ARM::tLDRH,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 1 },
+    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 1 },
     { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRi12,ARM::tSTR,    ARM::tSTRspi,  5,   8,    1,   0,  0,0, 1 },
-    { ARM::t2STRs,  ARM::tSTR,    0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRBi12,ARM::tSTRB,  0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRBs, ARM::tSTRB,   0,             0,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRHi12,ARM::tSTRH,  0,             5,   0,    1,   0,  0,0, 1 },
-    { ARM::t2STRHs, ARM::tSTRH,   0,             0,   0,    1,   0,  0,0, 1 },
-
-    { ARM::t2LDM,   ARM::tLDM,    0,             0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2LDM_RET,0,           ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2LDM_UPD,ARM::tLDM_UPD,ARM::tPOP,    0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 1 },
+    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 1 },
+
+    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 1 },
     // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
-    { ARM::t2STM_UPD,ARM::tSTM_UPD,ARM::tPUSH,   0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
   };
 
   class Thumb2SizeReduce : public MachineFunctionPass {
@@ -217,8 +216,8 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
     /// Old opcode has an optional def of CPSR.
     if (HasCC)
       return true;
-    // If both old opcode does not implicit CPSR def, then it's not ok since
-    // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP.
+    // If old opcode does not implicitly define CPSR, then it's not ok since
+    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
     if (!HasImplicitCPSRDef(MI->getDesc()))
       return false;
     HasCC = true;
@@ -233,9 +232,10 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
 
 static bool VerifyLowRegs(MachineInstr *MI) {
   unsigned Opc = MI->getOpcode();
-  bool isPCOk = (Opc == ARM::t2LDM_RET || Opc == ARM::t2LDM ||
-                 Opc == ARM::t2LDM_UPD);
-  bool isLROk = (Opc == ARM::t2STM_UPD);
+  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
+                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
+                 Opc == ARM::t2LDMDB_UPD);
+  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
   bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
@@ -275,29 +275,32 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   unsigned Opc = Entry.NarrowOpc1;
   unsigned OpNum = 3; // First 'rest' of operands.
   uint8_t  ImmLimit = Entry.Imm1Limit;
+
   switch (Entry.WideOpc) {
   default:
     llvm_unreachable("Unexpected Thumb2 load / store opcode!");
   case ARM::t2LDRi12:
-  case ARM::t2STRi12: {
-    unsigned BaseReg = MI->getOperand(1).getReg();
-    if (BaseReg == ARM::SP) {
+  case ARM::t2STRi12:
+    if (MI->getOperand(1).getReg() == ARM::SP) {
       Opc = Entry.NarrowOpc2;
       ImmLimit = Entry.Imm2Limit;
       HasOffReg = false;
     }
+
     Scale = 4;
     HasImmOffset = true;
+    HasOffReg = false;
     break;
-  }
   case ARM::t2LDRBi12:
   case ARM::t2STRBi12:
     HasImmOffset = true;
+    HasOffReg = false;
     break;
   case ARM::t2LDRHi12:
   case ARM::t2STRHi12:
     Scale = 2;
     HasImmOffset = true;
+    HasOffReg = false;
     break;
   case ARM::t2LDRs:
   case ARM::t2LDRBs:
@@ -310,11 +313,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     HasShift = true;
     OpNum = 4;
     break;
-  case ARM::t2LDM: {
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB: {
     unsigned BaseReg = MI->getOperand(0).getReg();
-    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
-    if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
+    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
       return false;
+
     // For the non-writeback version (this one), the base register must be
     // one of the registers being loaded.
     bool isOK = false;
@@ -324,6 +328,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
         break;
       }
     }
+
     if (!isOK)
       return false;
 
@@ -331,28 +336,33 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     isLdStMul = true;
     break;
   }
-  case ARM::t2LDM_RET: {
+  case ARM::t2LDMIA_RET: {
     unsigned BaseReg = MI->getOperand(1).getReg();
     if (BaseReg != ARM::SP)
       return false;
     Opc = Entry.NarrowOpc2; // tPOP_RET
-    OpNum = 3;
+    OpNum = 2;
     isLdStMul = true;
     break;
   }
-  case ARM::t2LDM_UPD:
-  case ARM::t2STM_UPD: {
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD: {
     OpNum = 0;
+
     unsigned BaseReg = MI->getOperand(1).getReg();
-    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(2).getImm());
     if (BaseReg == ARM::SP &&
-        ((Entry.WideOpc == ARM::t2LDM_UPD && Mode == ARM_AM::ia) ||
-         (Entry.WideOpc == ARM::t2STM_UPD && Mode == ARM_AM::db))) {
+        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+         Entry.WideOpc == ARM::t2STMDB_UPD)) {
       Opc = Entry.NarrowOpc2; // tPOP or tPUSH
-      OpNum = 3;
-    } else if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) {
+      OpNum = 2;
+    } else if (!isARMLowRegister(BaseReg) ||
+               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+                Entry.WideOpc != ARM::t2STMIA_UPD)) {
       return false;
     }
+
     isLdStMul = true;
     break;
   }
@@ -363,6 +373,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   if (HasShift) {
     OffsetReg  = MI->getOperand(2).getReg();
     OffsetKill = MI->getOperand(2).isKill();
+
     if (MI->getOperand(3).getImm())
       // Thumb1 addressing mode doesn't support shift.
       return false;
@@ -372,23 +383,22 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   if (HasImmOffset) {
     OffsetImm = MI->getOperand(2).getImm();
     unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
-    if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
+
+    if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
       // Make sure the immediate field fits.
       return false;
   }
 
   // Add the 16-bit load / store instruction.
-  // FIXME: Thumb1 addressing mode encode both immediate and register offset.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
   if (!isLdStMul) {
-    MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
-    if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) {
-      // tLDRSB and tLDRSH do not have an immediate offset field. On the other
-      // hand, it must have an offset register.
-      // FIXME: Remove this special case.
-      MIB.addImm(OffsetImm/Scale);
-    }
+    MIB.addOperand(MI->getOperand(0));
+    MIB.addOperand(MI->getOperand(1));
+
+    if (HasImmOffset)
+      MIB.addImm(OffsetImm / Scale);
+
     assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
 
     if (HasOffReg)
@@ -423,7 +433,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
-  case ARM::t2ADDSri: 
+  case ARM::t2ADDSri:
   case ARM::t2ADDSrr: {
     unsigned PredReg = 0;
     if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
@@ -451,6 +461,25 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     if (MI->getOperand(1).isImm())
       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
     break;
+  case ARM::t2CMPrr: {
+    // Try to reduce to the lo-reg only version first. Why there are two
+    // versions of the instruction is a mystery.
+    // It would be nice to just have two entries in the master table that
+    // are prioritized, but the table assumes a unique entry for each
+    // source insn opcode. So for now, we hack a local entry record to use.
+    static const ReduceEntry NarrowEntry =
+      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 };
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR))
+      return true;
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+  }
+  case ARM::t2ADDrSPi: {
+    static const ReduceEntry NarrowEntry =
+      { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 };
+    if (MI->getOperand(0).getReg() == ARM::SP)
+      return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+  }
   }
   return false;
 }
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index 5cf48662038c..2c359dade29b 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -18,6 +18,13 @@
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
+  namespace Alpha {
+    // These describe LDAx
+
+    static const int IMM_LOW  = -32768;
+    static const int IMM_HIGH = 32767;
+    static const int IMM_MULT = 65536;
+  }
 
   class AlphaTargetMachine;
   class FunctionPass;
diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp
new file mode 100644
index 000000000000..46ae286895a9
--- /dev/null
+++ b/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct AlphaAsmPrinter : public AsmPrinter {
+    /// Unique incrementer for label values for referencing Global values.
+    ///
+
+    explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+      : AsmPrinter(tm, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Alpha Assembly Printer";
+    }
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOp(const MachineOperand &MO, raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd(); 
+    void EmitStartOfAsmFile(Module &M);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode, raw_ostream &O);
+  };
+} // end of anonymous namespace
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isReg()) {
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Not physreg??");
+    O << getRegisterName(MO.getReg());
+  } else if (MO.isImm()) {
+    O << MO.getImm();
+    assert(MO.getImm() < (1 << 30));
+  } else {
+    printOp(MO, O);
+  }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    return;
+
+  case MachineOperand::MO_Immediate:
+    assert(0 && "printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    return;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    return;
+
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyStart() {
+  OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyEnd() {
+  OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
+}
+
+void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
+  OutStreamer.EmitRawText(StringRef("\t.set noat"));
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,
+                                      const char *ExtraCode, raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  O << "0(";
+  printOperand(MI, OpNo, O);
+  O << ")";
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() { 
+  RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
+}
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
deleted file mode 100644
index 3aec07035d74..000000000000
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the Alpha machine instructions
-// into relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "alpha-emitter"
-#include "AlphaTargetMachine.h"
-#include "AlphaRelocations.h"
-#include "Alpha.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class AlphaCodeEmitter : public MachineFunctionPass {
-    JITCodeEmitter &MCE;
-    const AlphaInstrInfo *II;
-  public:
-    static char ID;
-
-    AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID),
-    MCE(mce) {}
-
-    /// getBinaryCodeForInstr - This function, generated by the
-    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
-    /// machine instructions.
-
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
-
-    /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
-
-    unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO);
-    
-    bool runOnMachineFunction(MachineFunction &MF);
-    
-    virtual const char *getPassName() const {
-      return "Alpha Machine Code Emitter";
-    }
-    
-  private:
-    void emitBasicBlock(MachineBasicBlock &MBB);
-  };
-}
-
-char AlphaCodeEmitter::ID = 0;
-
-
-/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha
-/// code to the specified MCE object.
-
-FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
-                                                  JITCodeEmitter &JCE) {
-  return new AlphaCodeEmitter(JCE);
-}
-
-bool AlphaCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
-  II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo();
-
-  do {
-    MCE.startFunction(MF);
-    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-      emitBasicBlock(*I);
-  } while (MCE.finishFunction(MF));
-
-  return false;
-}
-
-void AlphaCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
-  MCE.StartMachineBasicBlock(&MBB);
-  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-       I != E; ++I) {
-    const MachineInstr &MI = *I;
-    MCE.processDebugLoc(MI.getDebugLoc(), true);
-    switch(MI.getOpcode()) {
-    default:
-      MCE.emitWordLE(getBinaryCodeForInstr(*I));
-      break;
-    case Alpha::ALTENT:
-    case Alpha::PCLABEL:
-    case Alpha::MEMLABEL:
-    case TargetOpcode::IMPLICIT_DEF:
-    case TargetOpcode::KILL:
-      break; //skip these
-    }
-    MCE.processDebugLoc(MI.getDebugLoc(), false);
-  }
-}
-
-static unsigned getAlphaRegNumber(unsigned Reg) {
-  switch (Reg) {
-  case Alpha::R0  : case Alpha::F0  : return 0;
-  case Alpha::R1  : case Alpha::F1  : return 1;
-  case Alpha::R2  : case Alpha::F2  : return 2;
-  case Alpha::R3  : case Alpha::F3  : return 3;
-  case Alpha::R4  : case Alpha::F4  : return 4;
-  case Alpha::R5  : case Alpha::F5  : return 5;
-  case Alpha::R6  : case Alpha::F6  : return 6;
-  case Alpha::R7  : case Alpha::F7  : return 7;
-  case Alpha::R8  : case Alpha::F8  : return 8;
-  case Alpha::R9  : case Alpha::F9  : return 9;
-  case Alpha::R10 : case Alpha::F10 : return 10;
-  case Alpha::R11 : case Alpha::F11 : return 11;
-  case Alpha::R12 : case Alpha::F12 : return 12;
-  case Alpha::R13 : case Alpha::F13 : return 13;
-  case Alpha::R14 : case Alpha::F14 : return 14;
-  case Alpha::R15 : case Alpha::F15 : return 15;
-  case Alpha::R16 : case Alpha::F16 : return 16;
-  case Alpha::R17 : case Alpha::F17 : return 17;
-  case Alpha::R18 : case Alpha::F18 : return 18;
-  case Alpha::R19 : case Alpha::F19 : return 19;
-  case Alpha::R20 : case Alpha::F20 : return 20;
-  case Alpha::R21 : case Alpha::F21 : return 21;
-  case Alpha::R22 : case Alpha::F22 : return 22;
-  case Alpha::R23 : case Alpha::F23 : return 23;
-  case Alpha::R24 : case Alpha::F24 : return 24;
-  case Alpha::R25 : case Alpha::F25 : return 25;
-  case Alpha::R26 : case Alpha::F26 : return 26;
-  case Alpha::R27 : case Alpha::F27 : return 27;
-  case Alpha::R28 : case Alpha::F28 : return 28;
-  case Alpha::R29 : case Alpha::F29 : return 29;
-  case Alpha::R30 : case Alpha::F30 : return 30;
-  case Alpha::R31 : case Alpha::F31 : return 31;
-  default:
-    llvm_unreachable("Unhandled reg");
-  }
-}
-
-unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                             const MachineOperand &MO) {
-
-  unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
-                   // or things that get fixed up later by the JIT.
-
-  if (MO.isReg()) {
-    rv = getAlphaRegNumber(MO.getReg());
-  } else if (MO.isImm()) {
-    rv = MO.getImm();
-  } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
-    DEBUG(errs() << MO << " is a relocated op for " << MI << "\n");
-    unsigned Reloc = 0;
-    int Offset = 0;
-    bool useGOT = false;
-    switch (MI.getOpcode()) {
-    case Alpha::BSR:
-      Reloc = Alpha::reloc_bsr;
-      break;
-    case Alpha::LDLr:
-    case Alpha::LDQr:
-    case Alpha::LDBUr:
-    case Alpha::LDWUr:
-    case Alpha::LDSr:
-    case Alpha::LDTr:
-    case Alpha::LDAr:
-    case Alpha::STQr:
-    case Alpha::STLr:
-    case Alpha::STWr:
-    case Alpha::STBr:
-    case Alpha::STSr:
-    case Alpha::STTr:
-      Reloc = Alpha::reloc_gprellow;
-      break;
-    case Alpha::LDAHr:
-      Reloc = Alpha::reloc_gprelhigh;
-      break;
-    case Alpha::LDQl:
-      Reloc = Alpha::reloc_literal;
-      useGOT = true;
-      break;
-    case Alpha::LDAg:
-    case Alpha::LDAHg:
-      Reloc = Alpha::reloc_gpdist;
-      Offset = MI.getOperand(3).getImm();
-      break;
-    default:
-      llvm_unreachable("unknown relocatable instruction");
-    }
-    if (MO.isGlobal())
-      MCE.addRelocation(MachineRelocation::getGV(
-            MCE.getCurrentPCOffset(),
-            Reloc,
-            const_cast<GlobalValue *>(MO.getGlobal()),
-            Offset,
-            isa<Function>(MO.getGlobal()),
-            useGOT));
-    else if (MO.isSymbol())
-      MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
-                                                     Reloc, MO.getSymbolName(),
-                                                     Offset, true));
-    else
-     MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
-                                          Reloc, MO.getIndex(), Offset));
-  } else if (MO.isMBB()) {
-    MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
-                                               Alpha::reloc_bsr, MO.getMBB()));
-  } else {
-#ifndef NDEBUG
-    errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-#endif
-    llvm_unreachable(0);
-  }
-
-  return rv;
-}
-
-#include "AlphaGenCodeEmitter.inc"
diff --git a/lib/Target/Alpha/AlphaFrameLowering.cpp b/lib/Target/Alpha/AlphaFrameLowering.cpp
new file mode 100644
index 000000000000..690cd1da9c1d
--- /dev/null
+++ b/lib/Target/Alpha/AlphaFrameLowering.cpp
@@ -0,0 +1,143 @@
+//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaFrameLowering.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+static long getUpper16(long l) {
+  long y = l / Alpha::IMM_MULT;
+  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+    ++y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Alpha::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->hasVarSizedObjects();
+}
+
+void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
+  bool FP = hasFP(MF);
+
+  // Handle GOP offset
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
+
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+    .addGlobalAddress(MF.getFunction());
+
+  // Get the number of bytes to allocate from the FrameInfo
+  long NumBytes = MFI->getStackSize();
+
+  if (FP)
+    NumBytes += 8; //reserve space for the old FP
+
+  // Do we need to allocate space on the stack?
+  if (NumBytes == 0) return;
+
+  unsigned Align = getStackAlignment();
+  NumBytes = (NumBytes+Align-1)/Align*Align;
+
+  // Update frame info to pretend that this is part of the stack...
+  MFI->setStackSize(NumBytes);
+
+  // adjust stack pointer: r30 -= numbytes
+  NumBytes = -NumBytes;
+  if (NumBytes >= Alpha::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+      .addReg(Alpha::R30);
+  } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+      .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+      .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+  } else {
+    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+  }
+
+  // Now if we need to, save the old FP and set the new
+  if (FP) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+      .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+    // This must be the last instr in the prolog
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+      .addReg(Alpha::R30).addReg(Alpha::R30);
+  }
+
+}
+
+void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  assert((MBBI->getOpcode() == Alpha::RETDAG ||
+          MBBI->getOpcode() == Alpha::RETDAGp)
+         && "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  bool FP = hasFP(MF);
+
+  // Get the number of bytes allocated from the FrameInfo...
+  long NumBytes = MFI->getStackSize();
+
+  //now if we need to, restore the old FP
+  if (FP) {
+    //copy the FP into the SP (discards allocas)
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+      .addReg(Alpha::R15);
+    //restore the FP
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+      .addImm(0).addReg(Alpha::R15);
+  }
+
+  if (NumBytes != 0) {
+    if (NumBytes <= Alpha::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+        .addReg(Alpha::R30);
+    } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+        .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+        .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+    } else {
+      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+    }
+  }
+}
diff --git a/lib/Target/Alpha/AlphaFrameLowering.h b/lib/Target/Alpha/AlphaFrameLowering.h
new file mode 100644
index 000000000000..ebd9e1bac190
--- /dev/null
+++ b/lib/Target/Alpha/AlphaFrameLowering.h
@@ -0,0 +1,43 @@
+//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Alpha.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class AlphaSubtarget;
+
+class AlphaFrameLowering : public TargetFrameLowering {
+  const AlphaSubtarget &STI;
+  // FIXME: This should end in MachineFunctionInfo, not here!
+  mutable int curgpdist;
+public:
+  explicit AlphaFrameLowering(const AlphaSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index d197bd15ef9c..7b91fea54af4 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -130,19 +130,6 @@ namespace {
         return (x - y) == r;
     }
 
-    static bool isFPZ(SDValue N) {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      return (CN && (CN->getValueAPF().isZero()));
-    }
-    static bool isFPZn(SDValue N) {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      return (CN && CN->getValueAPF().isNegZero());
-    }
-    static bool isFPZp(SDValue N) {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-      return (CN && CN->getValueAPF().isPosZero());
-    }
-
   public:
     explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
       : SelectionDAGISel(TM)
@@ -253,7 +240,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, 
                                  Chain.getValue(1));
     SDNode *CNode =
-      CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, 
+      CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue, 
                              Chain, Chain.getValue(1));
     Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, 
                                    SDValue(CNode, 1));
@@ -416,13 +403,13 @@ void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
      Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
      InFlag = Chain.getValue(1);
      Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, 
-                                            MVT::Flag, Addr.getOperand(0),
+                                            MVT::Glue, Addr.getOperand(0),
                                             Chain, InFlag), 0);
    } else {
      Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
      InFlag = Chain.getValue(1);
      Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
-                                            MVT::Flag, Chain, InFlag), 0);
+                                            MVT::Glue, Chain, InFlag), 0);
    }
    InFlag = Chain.getValue(1);
 
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index ea78bf374200..9137d654edba 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -124,7 +125,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
 
   setOperationAction(ISD::SETCC, MVT::f32, Promote);
 
-  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
+  setOperationAction(ISD::BITCAST, MVT::f32, Promote);
 
   setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
 
@@ -284,8 +285,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                    DAG.getIntPtrConstant(VA.getLocMemOffset()));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), 0,
-                                         false, false, 0));
+                                         MachinePointerInfo(),false, false, 0));
     }
   }
 
@@ -306,7 +306,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   }
 
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
@@ -431,7 +431,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
                            false, false, 0);
     }
     InVals.push_back(ArgVal);
@@ -448,7 +448,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
       if (i == 0) FuncInfo->setVarArgsBase(FI);
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
                                 false, false, 0));
 
       if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
@@ -456,7 +456,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
       FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
       SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
                                 false, false, 0));
     }
 
@@ -537,12 +537,14 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
   const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0,
+  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+                             MachinePointerInfo(VAListS),
                              false, false, 0);
   SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                               DAG.getConstant(8, MVT::i64));
-  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1),
-                                  Tmp, NULL, 0, MVT::i32, false, false, 0);
+  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+                                  Tmp, MachinePointerInfo(),
+                                  MVT::i32, false, false, 0);
   DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
   if (N->getValueType(0).isFloatingPoint())
   {
@@ -556,7 +558,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
 
   SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
                                     DAG.getConstant(8, MVT::i64));
-  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
+  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+                            MachinePointerInfo(),
                             MVT::i32, false, false, 0);
 }
 
@@ -613,7 +616,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
            "Unhandled SINT_TO_FP type in custom expander!");
     SDValue LD;
     bool isDouble = Op.getValueType() == MVT::f64;
-    LD = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op.getOperand(0));
+    LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
     SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
                                isDouble?MVT::f64:MVT::f32, LD);
     return FP;
@@ -627,7 +630,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
 
     src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
 
-    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
   }
   case ISD::ConstantPool: {
     ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -645,11 +648,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
   case ISD::GlobalAddress: {
     GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
     const GlobalValue *GV = GSDN->getGlobal();
-    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, 
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
                                             GSDN->getOffset());
     // FIXME there isn't really any debug info here
 
-    //    if (!GV->hasWeakLinkage() && !GV->isDeclaration() 
+    //    if (!GV->hasWeakLinkage() && !GV->isDeclaration()
     //        && !GV->hasLinkOnceLinkage()) {
     if (GV->hasLocalLinkage()) {
       SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, GA,
@@ -706,10 +709,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
 
     SDValue Result;
     if (Op.getValueType() == MVT::i32)
-      Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr,
-                              NULL, 0, MVT::i32, false, false, 0);
+      Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+                              MachinePointerInfo(), MVT::i32, false, false, 0);
     else
-      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
+      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+                           MachinePointerInfo(),
                            false, false, 0);
     return Result;
   }
@@ -720,17 +724,20 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
     const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
     const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
-    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0,
+    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+                              MachinePointerInfo(SrcS),
                               false, false, 0);
-    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0,
+    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
+                                  MachinePointerInfo(DestS),
                                   false, false, 0);
     SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
                                DAG.getConstant(8, MVT::i64));
-    Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result,
-                         NP, NULL,0, MVT::i32, false, false, 0);
+    Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+                         NP, MachinePointerInfo(), MVT::i32, false, false, 0);
     SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
                                 DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32,
+    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+                             MachinePointerInfo(), MVT::i32,
                              false, false, 0);
   }
   case ISD::VASTART: {
@@ -743,14 +750,15 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
 
     // vastart stores the address of the VarArgsBase and VarArgsOffset
     SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
-    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
-                               false, false, 0);
+    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP,
+                               MachinePointerInfo(VAListS), false, false, 0);
     SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                                 DAG.getConstant(8, MVT::i64));
     return DAG.getTruncStore(S1, dl,
                              DAG.getConstant(FuncInfo->getVarArgsOffset(),
                                              MVT::i64),
-                             SA2, NULL, 0, MVT::i32, false, false, 0);
+                             SA2, MachinePointerInfo(),
+                             MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
     return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
@@ -771,7 +779,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
 
   SDValue Chain, DataPtr;
   LowerVAARG(N, Chain, DataPtr, DAG);
-  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0,
+  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
+                            MachinePointerInfo(),
                             false, false, 0);
   Results.push_back(Res);
   Results.push_back(SDValue(Res.getNode(), 1));
@@ -795,6 +804,30 @@ AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AlphaTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'f':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
 std::vector<unsigned> AlphaTargetLowering::
 getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                   EVT VT) const {
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 46e0c7dc9f80..b429e9fc1390 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -87,6 +87,11 @@ namespace llvm {
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::vector<unsigned> 
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                         EVT VT) const;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 92de78a364ba..099d7157ca2b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -27,7 +27,7 @@ def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi",   SDTIntBinOp, []>;
 def Alpha_rellit  : SDNode<"AlphaISD::RelLit",    SDTIntBinOp, [SDNPMayLoad]>;
 
 def retflag       : SDNode<"AlphaISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue]>;
 
 // These are target-independent nodes, but have target-specific formats.
 def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
@@ -35,9 +35,9 @@ def SDT_AlphaCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i64>,
                                            SDTCisVT<1, i64> ]>;
 
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_AlphaCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 //********************
 //Paterns for matching
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
deleted file mode 100644
index 12685ed17e3c..000000000000
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the Alpha target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jit"
-#include "AlphaJITInfo.h"
-#include "AlphaRelocations.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
-  ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC))
-#define BUILD_OFormat(Op, RA, RB, FUN, RC) \
-  ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC))
-
-#define BUILD_LDA(RD, RS, IMM16) \
-  ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-#define BUILD_LDAH(RD, RS, IMM16) \
-  ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-
-#define BUILD_LDQ(RD, RS, IMM16) \
-  ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF))
-
-#define BUILD_JMP(RD, RS, IMM16) \
-  ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF))
-#define BUILD_JSR(RD, RS, IMM16) \
-  ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF))
-
-#define BUILD_SLLi(RD, RS, IMM8) \
-  (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD))
-
-#define BUILD_ORi(RD, RS, IMM8) \
-  (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD))
-
-#define BUILD_OR(RD, RS, RT) \
-  (BUILD_OFormat(0x11, RS, RT, 0x20, RD))
-
-
-
-static void EmitBranchToAt(void *At, void *To) {
-  unsigned long Fn = (unsigned long)To;
-
-  unsigned *AtI = (unsigned*)At;
-
-  AtI[0] = BUILD_OR(0, 27, 27);
-
-  DEBUG(errs() << "Stub targeting " << To << "\n");
-
-  for (int x = 1; x <= 8; ++x) {
-    AtI[2*x - 1] = BUILD_SLLi(27,27,8);
-    unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF;
-    //DEBUG(errs() << "outputing " << hex << d << dec << "\n");
-    AtI[2*x] = BUILD_ORi(27, 27, d);
-  }
-  AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv
-  AtI[18] = 0x00FFFFFF; //mark this as a stub
-}
-
-void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  //FIXME
-  llvm_unreachable(0);
-}
-
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-//static AlphaJITInfo* AlphaJTI;
-
-extern "C" {
-#ifdef __alpha
-
-  void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub)
-  {
-    void* Target = JITCompilerFunction(CameFromStub);
-
-    //rewrite the stub to an unconditional branch
-    if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) {
-      DEBUG(errs() << "Came from a stub, rewriting\n");
-      EmitBranchToAt(CameFromStub, Target);
-    } else {
-      DEBUG(errs() << "confused, didn't come from stub at " << CameFromStub
-                   << " old jump vector " << oldpv
-                   << " new jump vector " << Target << "\n");
-    }
-
-    //Change pv to new Target
-    *oldpv = (long)Target;
-  }
-
-  void AlphaCompilationCallback(void);
-
-  asm(
-      ".text\n"
-      ".globl AlphaCompilationCallbackC\n"
-      ".align 4\n"
-      ".globl AlphaCompilationCallback\n"
-      ".ent AlphaCompilationCallback\n"
-"AlphaCompilationCallback:\n"
-      //      //get JIT's GOT
-      "ldgp $29, 0($27)\n"
-      //Save args, callee saved, and perhaps others?
-      //args: $16-$21 $f16-$f21     (12)
-      //callee: $9-$14 $f2-$f9      (14)
-      //others: fp:$15 ra:$26 pv:$27 (3)
-      "lda $30, -232($30)\n"
-      "stq $16,   0($30)\n"
-      "stq $17,   8($30)\n"
-      "stq $18,  16($30)\n"
-      "stq $19,  24($30)\n"
-      "stq $20,  32($30)\n"
-      "stq $21,  40($30)\n"
-      "stt $f16, 48($30)\n"
-      "stt $f17, 56($30)\n"
-      "stt $f18, 64($30)\n"
-      "stt $f19, 72($30)\n"
-      "stt $f20, 80($30)\n"
-      "stt $f21, 88($30)\n"
-      "stq $9,   96($30)\n"
-      "stq $10, 104($30)\n"
-      "stq $11, 112($30)\n"
-      "stq $12, 120($30)\n"
-      "stq $13, 128($30)\n"
-      "stq $14, 136($30)\n"
-      "stt $f2, 144($30)\n"
-      "stt $f3, 152($30)\n"
-      "stt $f4, 160($30)\n"
-      "stt $f5, 168($30)\n"
-      "stt $f6, 176($30)\n"
-      "stt $f7, 184($30)\n"
-      "stt $f8, 192($30)\n"
-      "stt $f9, 200($30)\n"
-      "stq $15, 208($30)\n"
-      "stq $26, 216($30)\n"
-      "stq $27, 224($30)\n"
-
-      "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg
-      "bis $0, $0, $17\n" //pass the roughly stub addr in second arg
-      "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra
-
-      "ldq $16,   0($30)\n"
-      "ldq $17,   8($30)\n"
-      "ldq $18,  16($30)\n"
-      "ldq $19,  24($30)\n"
-      "ldq $20,  32($30)\n"
-      "ldq $21,  40($30)\n"
-      "ldt $f16, 48($30)\n"
-      "ldt $f17, 56($30)\n"
-      "ldt $f18, 64($30)\n"
-      "ldt $f19, 72($30)\n"
-      "ldt $f20, 80($30)\n"
-      "ldt $f21, 88($30)\n"
-      "ldq $9,   96($30)\n"
-      "ldq $10, 104($30)\n"
-      "ldq $11, 112($30)\n"
-      "ldq $12, 120($30)\n"
-      "ldq $13, 128($30)\n"
-      "ldq $14, 136($30)\n"
-      "ldt $f2, 144($30)\n"
-      "ldt $f3, 152($30)\n"
-      "ldt $f4, 160($30)\n"
-      "ldt $f5, 168($30)\n"
-      "ldt $f6, 176($30)\n"
-      "ldt $f7, 184($30)\n"
-      "ldt $f8, 192($30)\n"
-      "ldt $f9, 200($30)\n"
-      "ldq $15, 208($30)\n"
-      "ldq $26, 216($30)\n"
-      "ldq $27, 224($30)\n" //this was updated in the callback with the target
-
-      "lda $30, 232($30)\n" //restore sp
-      "jmp $31, ($27)\n" //jump to the new function
-      ".end AlphaCompilationCallback\n"
-      );
-#else
-  void AlphaCompilationCallback() {
-    llvm_unreachable("Cannot call AlphaCompilationCallback() on a non-Alpha arch!");
-  }
-#endif
-}
-
-TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() {
-  // The stub contains 19 4-byte instructions, aligned at 4 bytes:
-  // R0 = R27
-  // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target"  == 16 instructions
-  // JMP R27
-  // Magic number so the compilation callback can recognize the stub.
-  StubLayout Result = {19 * 4, 4};
-  return Result;
-}
-
-void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
-                                     JITCodeEmitter &JCE) {
-  //assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
-  //Do things in a stupid slow way!
-  void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue();
-  for (int x = 0; x < 19; ++ x)
-    JCE.emitWordLE(0);
-  EmitBranchToAt(Addr, Fn);
-  DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n");
-  return Addr;
-}
-
-TargetJITInfo::LazyResolverFn
-AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) {
-  JITCompilerFunction = F;
-  //  setZerothGOTEntry((void*)AlphaCompilationCallback);
-  return AlphaCompilationCallback;
-}
-
-//These describe LDAx
-static const int IMM_LOW  = -32768;
-static const int IMM_HIGH = 32767;
-static const int IMM_MULT = 65536;
-
-static long getUpper16(long l)
-{
-  long y = l / IMM_MULT;
-  if (l % IMM_MULT > IMM_HIGH)
-    ++y;
-  if (l % IMM_MULT < IMM_LOW)
-    --y;
-  assert((short)y == y && "displacement out of range");
-  return y;
-}
-
-static long getLower16(long l)
-{
-  long h = getUpper16(l);
-  long y = l - h * IMM_MULT;
-  assert(y == (short)y && "Displacement out of range");
-  return y;
-}
-
-void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
-                            unsigned NumRelocs, unsigned char* GOTBase) {
-  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
-    unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
-    long idx = 0;
-    bool doCommon = true;
-    switch ((Alpha::RelocationType)MR->getRelocationType()) {
-    default: llvm_unreachable("Unknown relocation type!");
-    case Alpha::reloc_literal:
-      //This is a LDQl
-      idx = MR->getGOTIndex();
-      DEBUG(errs() << "Literal relocation to slot " << idx);
-      idx = (idx - GOToffset) * 8;
-      DEBUG(errs() << " offset " << idx << "\n");
-      break;
-    case Alpha::reloc_gprellow:
-      idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
-      idx = getLower16(idx);
-      DEBUG(errs() << "gprellow relocation offset " << idx << "\n");
-      DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer()
-           << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n");
-      break;
-    case Alpha::reloc_gprelhigh:
-      idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
-      idx = getUpper16(idx);
-      DEBUG(errs() << "gprelhigh relocation offset " << idx << "\n");
-      DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer()
-            << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n");
-      break;
-    case Alpha::reloc_gpdist:
-      switch (*RelocPos >> 26) {
-      case 0x09: //LDAH
-        idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos;
-        idx = getUpper16(idx);
-        DEBUG(errs() << "LDAH: " << idx << "\n");
-        //add the relocation to the map
-        gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos;
-        break;
-      case 0x08: //LDA
-        assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] &&
-               "LDAg without seeing LDAHg");
-        idx = &GOTBase[GOToffset * 8] -
-          (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())];
-        idx = getLower16(idx);
-        DEBUG(errs() << "LDA: " << idx << "\n");
-        break;
-      default:
-        llvm_unreachable("Cannot handle gpdist yet");
-      }
-      break;
-    case Alpha::reloc_bsr: {
-      idx = (((unsigned char*)MR->getResultPointer() -
-             (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun
-      *RelocPos |= (idx & ((1 << 21)-1));
-      doCommon = false;
-      break;
-    }
-    }
-    if (doCommon) {
-      short x = (short)idx;
-      assert(x == idx);
-      *(short*)RelocPos = x;
-    }
-  }
-}
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
deleted file mode 100644
index bd358a413128..000000000000
--- a/lib/Target/Alpha/AlphaJITInfo.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHA_JITINFO_H
-#define ALPHA_JITINFO_H
-
-#include "llvm/Target/TargetJITInfo.h"
-#include <map>
-
-namespace llvm {
-  class TargetMachine;
-
-  class AlphaJITInfo : public TargetJITInfo {
-  protected:
-    TargetMachine &TM;
-    
-    //because gpdist are paired and relative to the pc of the first inst,
-    //we need to have some state
-    std::map<std::pair<void*, int>, void*> gpdistmap;
-  public:
-    explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
-    { useGOT = true; }
-
-    virtual StubLayout getStubLayout();
-    virtual void *emitFunctionStub(const Function* F, void *Fn,
-                                   JITCodeEmitter &JCE);
-    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
-    virtual void relocate(void *Function, MachineRelocation *MR,
-                          unsigned NumRelocs, unsigned char* GOTBase);
-
-    /// replaceMachineCodeForFunction - Make it so that calling the function
-    /// whose machine code is at OLD turns into a call to NEW, perhaps by
-    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
-    /// code.
-    ///
-    virtual void replaceMachineCodeForFunction(void *Old, void *New);
-  private:
-    static const unsigned GOToffset = 4096;
-
-  };
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 327ddb4d9a72..7667fd831d1a 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -35,29 +35,21 @@
 #include <cstdlib>
 using namespace llvm;
 
-//These describe LDAx
-static const int IMM_LOW  = -32768;
-static const int IMM_HIGH = 32767;
-static const int IMM_MULT = 65536;
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+  : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+    TII(tii) {
+}
 
-static long getUpper16(long l)
-{
-  long y = l / IMM_MULT;
-  if (l % IMM_MULT > IMM_HIGH)
+static long getUpper16(long l) {
+  long y = l / Alpha::IMM_MULT;
+  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
     ++y;
   return y;
 }
 
-static long getLower16(long l)
-{
+static long getLower16(long l) {
   long h = getUpper16(l);
-  return l - h * IMM_MULT;
-}
-
-AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
-  : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
-    TII(tii), curgpdist(0)
-{
+  return l - h * Alpha::IMM_MULT;
 }
 
 const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -86,19 +78,12 @@ BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->hasVarSizedObjects();
-}
-
 void AlphaRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (hasFP(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF)) {
     // If we have a frame pointer, turn the adjcallstackup instruction into a
     // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
     // <amt>'
@@ -108,7 +93,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
-      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      unsigned Align = TFI->getStackAlignment();
       Amount = (Amount+Align-1)/Align*Align;
 
       MachineInstr *New;
@@ -146,7 +131,9 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  bool FP = hasFP(MF);
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  bool FP = TFI->hasFP(MF);
 
   while (!MI.getOperand(i).isFI()) {
     ++i;
@@ -168,7 +155,7 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   DEBUG(errs() << "Corrected Offset " << Offset
        << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
 
-  if (Offset > IMM_HIGH || Offset < IMM_LOW) {
+  if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
     DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
           << Offset << "\n");
     //so in this case, we need to use a temporary register, and move the
@@ -186,111 +173,14 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 }
 
-
-void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
-  bool FP = hasFP(MF);
-
-  //handle GOP offset
-  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
-    .addGlobalAddress(MF.getFunction())
-    .addReg(Alpha::R27).addImm(++curgpdist);
-  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
-    .addGlobalAddress(MF.getFunction())
-    .addReg(Alpha::R29).addImm(curgpdist);
-
-  BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
-    .addGlobalAddress(MF.getFunction());
-
-  // Get the number of bytes to allocate from the FrameInfo
-  long NumBytes = MFI->getStackSize();
-
-  if (FP)
-    NumBytes += 8; //reserve space for the old FP
-
-  // Do we need to allocate space on the stack?
-  if (NumBytes == 0) return;
-
-  unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
-  NumBytes = (NumBytes+Align-1)/Align*Align;
-
-  // Update frame info to pretend that this is part of the stack...
-  MFI->setStackSize(NumBytes);
-
-  // adjust stack pointer: r30 -= numbytes
-  NumBytes = -NumBytes;
-  if (NumBytes >= IMM_LOW) {
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
-      .addReg(Alpha::R30);
-  } else if (getUpper16(NumBytes) >= IMM_LOW) {
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
-      .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
-      .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
-  } else {
-    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
-  }
-
-  //now if we need to, save the old FP and set the new
-  if (FP)
-  {
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
-      .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
-    //this must be the last instr in the prolog
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
-      .addReg(Alpha::R30).addReg(Alpha::R30);
-  }
-
-}
-
-void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert((MBBI->getOpcode() == Alpha::RETDAG ||
-          MBBI->getOpcode() == Alpha::RETDAGp)
-         && "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  bool FP = hasFP(MF);
-
-  // Get the number of bytes allocated from the FrameInfo...
-  long NumBytes = MFI->getStackSize();
-
-  //now if we need to, restore the old FP
-  if (FP) {
-    //copy the FP into the SP (discards allocas)
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
-      .addReg(Alpha::R15);
-    //restore the FP
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
-      .addImm(0).addReg(Alpha::R15);
-  }
-
-  if (NumBytes != 0) {
-    if (NumBytes <= IMM_HIGH) {
-      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
-        .addReg(Alpha::R30);
-    } else if (getUpper16(NumBytes) <= IMM_HIGH) {
-      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
-        .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
-      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
-        .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
-    } else {
-      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
-    }
-  }
-}
-
 unsigned AlphaRegisterInfo::getRARegister() const {
   return Alpha::R26;
 }
 
 unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return hasFP(MF) ? Alpha::R15 : Alpha::R30;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
 }
 
 unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index b164979a6311..b0d4dd03b33c 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -32,8 +32,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -41,11 +39,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
-  //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -57,9 +50,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
 
   static std::string getPrettyName(unsigned reg);
-  
-private:
-  mutable int curgpdist;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
index 4dc04b88a70b..3703dd4fa9f6 100644
--- a/lib/Target/Alpha/AlphaSchedule.td
+++ b/lib/Target/Alpha/AlphaSchedule.td
@@ -50,11 +50,11 @@ def s_ftoi  : InstrItinClass;
 def s_itof  : InstrItinClass;
 def s_pseudo : InstrItinClass;
 
-//Table 2�4 Instruction Class Latency in Cycles
+//Table 2-4 Instruction Class Latency in Cycles
 //modified some
 
 def Alpha21264Itineraries : ProcessorItineraries<
-  [L0, L1, FST0, FST1, U0, U1, FA, FM], [
+  [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
   InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
   InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
   InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index fc9be03d2f30..b53533b8ebcb 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Alpha.h"
-#include "AlphaJITInfo.h"
 #include "AlphaMCAsmInfo.h"
 #include "AlphaTargetMachine.h"
 #include "llvm/PassManager.h"
@@ -29,8 +28,7 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
                                        const std::string &FS)
   : LLVMTargetMachine(T, TT),
     DataLayout("e-f128:128:128-n64"),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
-    JITInfo(*this),
+    FrameLowering(Subtarget),
     Subtarget(TT, FS),
     TLInfo(*this),
     TSInfo(*this) {
@@ -54,9 +52,3 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
   PM.add(createAlphaLLRPPass(*this));
   return false;
 }
-bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel,
-                                        JITCodeEmitter &JCE) {
-  PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
-  return false;
-}
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 153944e4bbbd..26238fbbc431 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -14,14 +14,14 @@
 #ifndef ALPHA_TARGETMACHINE_H
 #define ALPHA_TARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "AlphaInstrInfo.h"
-#include "AlphaJITInfo.h"
 #include "AlphaISelLowering.h"
+#include "AlphaFrameLowering.h"
 #include "AlphaSelectionDAGInfo.h"
 #include "AlphaSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
 
@@ -30,8 +30,7 @@ class GlobalValue;
 class AlphaTargetMachine : public LLVMTargetMachine {
   const TargetData DataLayout;       // Calculates type size & alignment
   AlphaInstrInfo InstrInfo;
-  TargetFrameInfo FrameInfo;
-  AlphaJITInfo JITInfo;
+  AlphaFrameLowering FrameLowering;
   AlphaSubtarget Subtarget;
   AlphaTargetLowering TLInfo;
   AlphaSelectionDAGInfo TSInfo;
@@ -41,7 +40,9 @@ public:
                      const std::string &FS);
 
   virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
-  virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
   virtual const AlphaSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
   virtual const AlphaRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
@@ -53,15 +54,10 @@ public:
     return &TSInfo;
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-  virtual AlphaJITInfo* getJITInfo() {
-    return &JITInfo;
-  }
 
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              JITCodeEmitter &JCE);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
deleted file mode 100644
index 5428cb96173b..000000000000
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format Alpha assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  struct AlphaAsmPrinter : public AsmPrinter {
-    /// Unique incrementer for label values for referencing Global values.
-    ///
-
-    explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
-      : AsmPrinter(tm, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Alpha Assembly Printer";
-    }
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    static const char *getRegisterName(unsigned RegNo);
-
-    void printOp(const MachineOperand &MO, raw_ostream &O);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    virtual void EmitFunctionBodyStart();
-    virtual void EmitFunctionBodyEnd(); 
-    void EmitStartOfAsmFile(Module &M);
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI,
-                               unsigned OpNo, unsigned AsmVariant,
-                               const char *ExtraCode, raw_ostream &O);
-  };
-} // end of anonymous namespace
-
-#include "AlphaGenAsmWriter.inc"
-
-void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.isReg()) {
-    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-           "Not physreg??");
-    O << getRegisterName(MO.getReg());
-  } else if (MO.isImm()) {
-    O << MO.getImm();
-    assert(MO.getImm() < (1 << 30));
-  } else {
-    printOp(MO, O);
-  }
-}
-
-
-void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    O << getRegisterName(MO.getReg());
-    return;
-
-  case MachineOperand::MO_Immediate:
-    llvm_unreachable("printOp() does not handle immediate values");
-    return;
-
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
-      << MO.getIndex();
-    return;
-
-  case MachineOperand::MO_ExternalSymbol:
-    O << MO.getSymbolName();
-    return;
-
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    return;
-
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-
-  default:
-    O << "<unknown operand type: " << MO.getType() << ">";
-    return;
-  }
-}
-
-/// EmitFunctionBodyStart - Targets can override this to emit stuff before
-/// the first basic block in the function.
-void AlphaAsmPrinter::EmitFunctionBodyStart() {
-  OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
-}
-
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
-void AlphaAsmPrinter::EmitFunctionBodyEnd() {
-  OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
-}
-
-void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
-  OutStreamer.EmitRawText(StringRef("\t.set noat"));
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,
-                                      const char *ExtraCode, raw_ostream &O) {
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-  O << "0(";
-  printOperand(MI, OpNo, O);
-  O << ")";
-  return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeAlphaAsmPrinter() { 
-  RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
-}
diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 992c21813149..000000000000
--- a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMAlphaAsmPrinter
-  AlphaAsmPrinter.cpp
-  )
-add_dependencies(LLVMAlphaAsmPrinter AlphaCodeGenTable_gen)
diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile
deleted file mode 100644
index ea13c38df4aa..000000000000
--- a/lib/Target/Alpha/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/Alpha/AsmPrinter/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMAlphaAsmPrinter
-
-# Hack: we need to include 'main' alpha target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index fbf7f3ab6b30..454262ad631d 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -5,19 +5,18 @@ tablegen(AlphaGenRegisterNames.inc -gen-register-enums)
 tablegen(AlphaGenRegisterInfo.inc -gen-register-desc)
 tablegen(AlphaGenInstrNames.inc -gen-instr-enums)
 tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
-tablegen(AlphaGenCodeEmitter.inc -gen-emitter)
 tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
 tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
 tablegen(AlphaGenCallingConv.inc -gen-callingconv)
 tablegen(AlphaGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(AlphaCodeGen
+  AlphaAsmPrinter.cpp
   AlphaBranchSelector.cpp
-  AlphaCodeEmitter.cpp
   AlphaInstrInfo.cpp
   AlphaISelDAGToDAG.cpp
   AlphaISelLowering.cpp
-  AlphaJITInfo.cpp
+  AlphaFrameLowering.cpp
   AlphaLLRP.cpp
   AlphaMCAsmInfo.cpp
   AlphaRegisterInfo.cpp
@@ -26,4 +25,4 @@ add_llvm_target(AlphaCodeGen
   AlphaSelectionDAGInfo.cpp
   )
 
-target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
index 54d53abea5f1..9564be680e51 100644
--- a/lib/Target/Alpha/Makefile
+++ b/lib/Target/Alpha/Makefile
@@ -14,10 +14,10 @@ TARGET = Alpha
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
                 AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
-                AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \
+                AlphaGenInstrInfo.inc \
                 AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
                 AlphaGenCallingConv.inc AlphaGenSubtarget.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
deleted file mode 100644
index 6ba258beb2b8..000000000000
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Blackfin.h"
-#include "BlackfinInstrInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class BlackfinAsmPrinter : public AsmPrinter {
-  public:
-    BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Blackfin Assembly Printer";
-    }
-
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
-    static const char *getRegisterName(unsigned RegNo);
-
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O);
-  };
-} // end of anonymous namespace
-
-#include "BlackfinGenAsmWriter.inc"
-
-extern "C" void LLVMInitializeBlackfinAsmPrinter() {
-  RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
-}
-
-void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                      raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-           "Virtual registers should be already mapped!");
-    O << getRegisterName(MO.getReg());
-    break;
-
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    break;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    printOffset(MO.getOffset(), O);
-    break;
-  case MachineOperand::MO_ExternalSymbol:
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    break;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
-      << MO.getIndex();
-    break;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    break;
-  default:
-    llvm_unreachable("<unknown operand type>");
-    break;
-  }
-}
-
-void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
-                                            raw_ostream &O) {
-  printOperand(MI, opNum, O);
-
-  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
-    return;
-
-  O << " + ";
-  printOperand(MI, opNum+1, O);
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
-                                         unsigned OpNo, unsigned AsmVariant,
-                                         const char *ExtraCode,
-                                         raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default: return true;  // Unknown modifier.
-    case 'r':
-      break;
-    }
-  }
-
-  printOperand(MI, OpNo, O);
-
-  return false;
-}
-
-bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                               unsigned OpNo,
-                                               unsigned AsmVariant,
-                                               const char *ExtraCode,
-                                               raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true;  // Unknown modifier
-
-  O << '[';
-  printOperand(MI, OpNo, O);
-  O << ']';
-
-  return false;
-}
diff --git a/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt b/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 795aebfe2b8e..000000000000
--- a/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMBlackfinAsmPrinter
-  BlackfinAsmPrinter.cpp
-  )
-add_dependencies(LLVMBlackfinAsmPrinter BlackfinCodeGenTable_gen)
diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile
deleted file mode 100644
index a106a2382a14..000000000000
--- a/lib/Target/Blackfin/AsmPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Blackfin/AsmPrinter/Makefile -------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMBlackfinAsmPrinter
-
-# Hack: we need to include 'main' Blackfin target directory to grab private
-# headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
new file mode 100644
index 000000000000..6ba258beb2b8
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -0,0 +1,156 @@
+//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Blackfin.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class BlackfinAsmPrinter : public AsmPrinter {
+  public:
+    BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Blackfin Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+  };
+} // end of anonymous namespace
+
+#include "BlackfinGenAsmWriter.inc"
+
+extern "C" void LLVMInitializeBlackfinAsmPrinter() {
+  RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
+}
+
+void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Virtual registers should be already mapped!");
+    O << getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+    break;
+  }
+}
+
+void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
+                                            raw_ostream &O) {
+  printOperand(MI, opNum, O);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+
+  O << " + ";
+  printOperand(MI, opNum+1, O);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                         unsigned OpNo, unsigned AsmVariant,
+                                         const char *ExtraCode,
+                                         raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
+bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                               unsigned OpNo,
+                                               unsigned AsmVariant,
+                                               const char *ExtraCode,
+                                               raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printOperand(MI, OpNo, O);
+  O << ']';
+
+  return false;
+}
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/lib/Target/Blackfin/BlackfinFrameLowering.cpp
new file mode 100644
index 000000000000..08bb9522b7c3
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinFrameLowering.cpp
@@ -0,0 +1,124 @@
+//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinFrameLowering.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) ||
+    MFI->adjustsStack() || MFI->hasVarSizedObjects();
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const BlackfinInstrInfo &TII =
+    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  if (FrameSize%4) {
+    FrameSize = (FrameSize+3) & ~3;
+    MFI->setStackSize(FrameSize);
+  }
+
+  if (!hasFP(MF)) {
+    assert(!MFI->adjustsStack() &&
+           "FP elimination on a non-leaf function is not supported");
+    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+    return;
+  }
+
+  // emit a LINK instruction
+  if (FrameSize <= 0x3ffff) {
+    BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+    return;
+  }
+
+  // Frame is too big, do a manual LINK:
+  // [--SP] = RETS;
+  // [--SP] = FP;
+  // FP = SP;
+  // P1 = -FrameSize;
+  // SP = SP + P1;
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::RETS, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::FP, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+    .addReg(BF::SP);
+  RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+    .addReg(BF::SP, RegState::Kill)
+    .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const BlackfinInstrInfo &TII =
+    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  if (!hasFP(MF)) {
+    assert(!MFI->adjustsStack() &&
+           "FP elimination on a non-leaf function is not supported");
+    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+    return;
+  }
+
+  // emit an UNLINK instruction
+  BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+void BlackfinFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const TargetRegisterClass *RC = BF::DPRegisterClass;
+
+  if (RegInfo->requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+}
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.h b/lib/Target/Blackfin/BlackfinFrameLowering.h
new file mode 100644
index 000000000000..3d2ee251d3bd
--- /dev/null
+++ b/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -0,0 +1,46 @@
+//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Blackfin.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class BlackfinSubtarget;
+
+class BlackfinFrameLowering : public TargetFrameLowering {
+protected:
+  const BlackfinSubtarget &STI;
+
+public:
+  explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 80ee1075aada..9df2aeeecbc9 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -51,8 +51,7 @@ namespace {
 
   private:
     SDNode *Select(SDNode *N);
-    bool SelectADDRspii(SDNode *Op, SDValue Addr,
-                        SDValue &Base, SDValue &Offset);
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
 
     // Walk the DAG after instruction selection, fixing register class issues.
     void FixRegisterClasses(SelectionDAG &DAG);
@@ -94,8 +93,7 @@ SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
   return SelectCode(N);
 }
 
-bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op,
-                                          SDValue Addr,
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
                                           SDValue &Base,
                                           SDValue &Offset) {
   FrameIndexSDNode *FIN = 0;
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 6e828e1b36b3..dd27d0a0ff36 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "BlackfinISelLowering.h"
 #include "BlackfinTargetMachine.h"
 #include "llvm/Function.h"
+#include "llvm/Type.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -207,7 +208,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
       unsigned ObjSize = VA.getLocVT().getStoreSize();
       int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
                                    false, false, 0));
     }
   }
@@ -332,8 +334,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue OffsetN = DAG.getIntPtrConstant(Offset);
       OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
-                                         PseudoSourceValue::getStack(),
-                                         Offset, false, false, 0));
+                                         MachinePointerInfo(),false, false, 0));
     }
   }
 
@@ -364,7 +365,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   std::vector<EVT> NodeTys;
   NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
+  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
   SDValue Ops[] = { Chain, Callee, InFlag };
   Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
                       InFlag.getNode() ? 3 : 2);
@@ -431,7 +432,7 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
                                SDValue(CarryIn, 0));
 
   // Add operands, produce sum and carry flag
-  SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+  SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
                                    Op.getOperand(0), Op.getOperand(1));
 
   // Store intermediate carry from Sum
@@ -439,11 +440,11 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
                                       /* flag= */ SDValue(Sum, 1));
 
   // Add incoming carry, again producing an output flag
-  Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+  Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
                            SDValue(Sum, 0), SDValue(CarryIn, 0));
 
   // Update AC0 with the intermediate carry, producing a flag.
-  SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Flag,
+  SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
                                         SDValue(Carry1, 0));
 
   // Compose (i32, flag) pair
@@ -549,6 +550,52 @@ BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+BlackfinTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+
+    // Blackfin-specific constraints
+  case 'a':
+  case 'd':
+  case 'z':
+  case 'D':
+  case 'W':
+  case 'e':
+  case 'b':
+  case 'v':
+  case 'f':
+  case 'c':
+  case 't':
+  case 'u':
+  case 'k':
+  case 'x':
+  case 'y':
+  case 'w':
+    return CW_Register;
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'Z':
+  case 'Y':
+    return CW_SpecificReg;
+  }
+  return weight;
+}
+
 /// getRegForInlineAsmConstraint - Return register no and class for a C_Register
 /// constraint.
 std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 6bebcc320ce9..15a745fa8724 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -39,6 +39,12 @@ namespace llvm {
                                     SelectionDAG &DAG) const;
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::pair<unsigned, const TargetRegisterClass*>
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
     std::vector<unsigned>
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 8034a7fd0fc2..5b59d7769c7e 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -23,17 +23,17 @@ def SDT_BfinCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
 
 def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
-                              [SDNPHasChain, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOutGlue]>;
 def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPVariadic]>;
 
 def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
-                    [SDNPHasChain, SDNPOptInFlag]>;
+                    [SDNPHasChain, SDNPOptInGlue]>;
 
 def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index a51831263e90..b4a9b84f9e43 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -50,6 +50,8 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector
 BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   using namespace BF;
   BitVector Reserved(getNumRegs());
   Reserved.set(AZ);
@@ -70,20 +72,11 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(L3);
   Reserved.set(SP);
   Reserved.set(RETS);
-  if (hasFP(MF))
+  if (TFI->hasFP(MF))
     Reserved.set(FP);
   return Reserved;
 }
 
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) ||
-    MFI->adjustsStack() || MFI->hasVarSizedObjects();
-}
-
 bool BlackfinRegisterInfo::
 requiresRegisterScavenging(const MachineFunction &MF) const {
   return true;
@@ -161,7 +154,9 @@ void BlackfinRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF,
                               MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
     int64_t Amount = I->getOperand(0).getImm();
     if (Amount != 0) {
       assert(Amount%4 == 0 && "Unaligned call frame size");
@@ -196,6 +191,7 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc DL = MI.getDebugLoc();
 
   unsigned FIPos;
@@ -208,7 +204,7 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
     + MI.getOperand(FIPos+1).getImm();
   unsigned BaseReg = BF::FP;
-  if (hasFP(MF)) {
+  if (TFI->hasFP(MF)) {
     assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
   } else {
     BaseReg = BF::SP;
@@ -329,93 +325,15 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 }
 
-void BlackfinRegisterInfo::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetRegisterClass *RC = BF::DPRegisterClass;
-  if (requiresRegisterScavenging(MF)) {
-    // Reserve a slot close to SP or frame pointer.
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),
-                                                       false));
-  }
-}
-
-// Emit a prologue that sets up a stack frame.
-// On function entry, R0-R2 and P0 may hold arguments.
-// R3, P1, and P2 may be used as scratch registers
-void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  int FrameSize = MFI->getStackSize();
-  if (FrameSize%4) {
-    FrameSize = (FrameSize+3) & ~3;
-    MFI->setStackSize(FrameSize);
-  }
-
-  if (!hasFP(MF)) {
-    assert(!MFI->adjustsStack() &&
-           "FP elimination on a non-leaf function is not supported");
-    adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
-    return;
-  }
-
-  // emit a LINK instruction
-  if (FrameSize <= 0x3ffff) {
-    BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
-    return;
-  }
-
-  // Frame is too big, do a manual LINK:
-  // [--SP] = RETS;
-  // [--SP] = FP;
-  // FP = SP;
-  // P1 = -FrameSize;
-  // SP = SP + P1;
-  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
-    .addReg(BF::RETS, RegState::Kill);
-  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
-    .addReg(BF::FP, RegState::Kill);
-  BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
-    .addReg(BF::SP);
-  loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
-  BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
-    .addReg(BF::SP, RegState::Kill)
-    .addReg(BF::P1, RegState::Kill);
-
-}
-
-void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                        MachineBasicBlock &MBB) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  int FrameSize = MFI->getStackSize();
-  assert(FrameSize%4 == 0 && "Misaligned frame size");
-
-  if (!hasFP(MF)) {
-    assert(!MFI->adjustsStack() &&
-           "FP elimination on a non-leaf function is not supported");
-    adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
-    return;
-  }
-
-  // emit an UNLINK instruction
-  BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
-}
-
 unsigned BlackfinRegisterInfo::getRARegister() const {
   return BF::RETS;
 }
 
 unsigned
 BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return hasFP(MF) ? BF::FP : BF::SP;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? BF::FP : BF::SP;
 }
 
 unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index bb83c34f8003..642b8adaf9b5 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -41,8 +41,6 @@ namespace llvm {
       return &BF::PRegClass;
     }
 
-    bool hasFP(const MachineFunction &MF) const;
-
     // bool hasReservedCallFrame(MachineFunction &MF) const;
 
     bool requiresRegisterScavenging(const MachineFunction &MF) const;
@@ -54,12 +52,6 @@ namespace llvm {
     void eliminateFrameIndex(MachineBasicBlock::iterator II,
                              int SPAdj, RegScavenger *RS = NULL) const;
 
-    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                              RegScavenger *RS) const;
-
-    void emitPrologue(MachineFunction &MF) const;
-    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
     unsigned getFrameRegister(const MachineFunction &MF) const;
     unsigned getRARegister() const;
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index e1cfae904401..f5dd439a8111 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -252,9 +252,9 @@ def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
     PClass::iterator
     PClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       return allocation_order_begin(MF)
-             + (RI->hasFP(MF) ? 7 : 6);
+             + (TFI->hasFP(MF) ? 7 : 6);
     }
   }];
 }
@@ -275,9 +275,9 @@ def DP : RegisterClass<"BF", [i32], 32,
     DPClass::iterator
     DPClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       return allocation_order_begin(MF)
-             + (RI->hasFP(MF) ? 15 : 14);
+             + (TFI->hasFP(MF) ? 15 : 14);
     }
   }];
 }
@@ -295,9 +295,9 @@ def GR : RegisterClass<"BF", [i32], 32,
     GRClass::iterator
     GRClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       return allocation_order_begin(MF)
-             + (RI->hasFP(MF) ? 31 : 30);
+             + (TFI->hasFP(MF) ? 31 : 30);
     }
   }];
 }
@@ -318,9 +318,9 @@ def ALL : RegisterClass<"BF", [i32], 32,
     ALLClass::iterator
     ALLClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       return allocation_order_begin(MF)
-             + (RI->hasFP(MF) ? 31 : 30);
+             + (TFI->hasFP(MF) ? 31 : 30);
     }
   }];
 }
@@ -334,9 +334,9 @@ def PI : RegisterClass<"BF", [i32], 32,
     PIClass::iterator
     PIClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       return allocation_order_begin(MF)
-             + (RI->hasFP(MF) ? 11 : 10);
+             + (TFI->hasFP(MF) ? 11 : 10);
     }
   }];
 }
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 66a2f689bf10..e11920f568a2 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -33,7 +33,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
     TLInfo(*this),
     TSInfo(*this),
     InstrInfo(Subtarget),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) {
+    FrameLowering(Subtarget) {
 }
 
 bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index a63aa54b9f55..29b2b177fc3c 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -14,14 +14,15 @@
 #ifndef BLACKFINTARGETMACHINE_H
 #define BLACKFINTARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "BlackfinInstrInfo.h"
-#include "BlackfinSubtarget.h"
+#include "BlackfinIntrinsicInfo.h"
 #include "BlackfinISelLowering.h"
+#include "BlackfinFrameLowering.h"
+#include "BlackfinSubtarget.h"
 #include "BlackfinSelectionDAGInfo.h"
-#include "BlackfinIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
 
@@ -31,14 +32,16 @@ namespace llvm {
     BlackfinTargetLowering TLInfo;
     BlackfinSelectionDAGInfo TSInfo;
     BlackfinInstrInfo InstrInfo;
-    TargetFrameInfo FrameInfo;
+    BlackfinFrameLowering FrameLowering;
     BlackfinIntrinsicInfo IntrinsicInfo;
   public:
     BlackfinTargetMachine(const Target &T, const std::string &TT,
                           const std::string &FS);
 
     virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
-    virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+    virtual const TargetFrameLowering *getFrameLowering() const {
+      return &FrameLowering;
+    }
     virtual const BlackfinSubtarget *getSubtargetImpl() const {
       return &Subtarget;
     }
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
index f8847d057da6..a47299ff1611 100644
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -12,13 +12,17 @@ tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
 tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
 
 add_llvm_target(BlackfinCodeGen
+  BlackfinAsmPrinter.cpp
   BlackfinInstrInfo.cpp
   BlackfinIntrinsicInfo.cpp
   BlackfinISelDAGToDAG.cpp
   BlackfinISelLowering.cpp
+  BlackfinFrameLowering.cpp
   BlackfinMCAsmInfo.cpp
   BlackfinRegisterInfo.cpp
   BlackfinSubtarget.cpp
   BlackfinTargetMachine.cpp
   BlackfinSelectionDAGInfo.cpp
   )
+
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
index 339bef9981f4..5eb8e9a992b9 100644
--- a/lib/Target/Blackfin/Makefile
+++ b/lib/Target/Blackfin/Makefile
@@ -18,7 +18,7 @@ BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \
                 BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \
 		BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 270fff6064ad..6c555a3e9d1f 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -47,12 +47,16 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Config/config.h"
 #include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
 using namespace llvm;
 
-extern "C" void LLVMInitializeCBackendTarget() { 
+extern "C" void LLVMInitializeCBackendTarget() {
   // Register the target.
   RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
 }
@@ -72,8 +76,10 @@ namespace {
   class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
   public:
     static char ID;
-    CBackendNameAllUsedStructsAndMergeFunctions() 
-      : ModulePass(ID) {}
+    CBackendNameAllUsedStructsAndMergeFunctions()
+        : ModulePass(ID) {
+          initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
+        }
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<FindUsedTypes>();
     }
@@ -110,9 +116,10 @@ namespace {
   public:
     static char ID;
     explicit CWriter(formatted_raw_ostream &o)
-      : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), 
+      : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
         TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
         NextAnonValueNumber(0) {
+      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
       FPCounter = 0;
     }
 
@@ -183,7 +190,7 @@ namespace {
         Out << ")";
       }
     }
-    
+
     void writeOperand(Value *Operand, bool Static = false);
     void writeInstComputationInline(Instruction &I);
     void writeOperandInternal(Value *Operand, bool Static = false);
@@ -224,7 +231,7 @@ namespace {
         return ByValParams.count(A);
       return isa<GlobalVariable>(V) || isDirectAlloca(V);
     }
-    
+
     // isInlinableInst - Attempt to inline instructions into their uses to build
     // trees as much as possible.  To do this, we have to consistently decide
     // what is acceptable to inline, so that variable declarations don't get
@@ -233,7 +240,7 @@ namespace {
     static bool isInlinableInst(const Instruction &I) {
       // Always inline cmp instructions, even if they are shared by multiple
       // expressions.  GCC generates horrible code if we don't.
-      if (isa<CmpInst>(I)) 
+      if (isa<CmpInst>(I))
         return true;
 
       // Must be an expression, must be used exactly once.  If it is dead, we
@@ -270,14 +277,14 @@ namespace {
         return 0;
       return AI;
     }
-    
+
     // isInlineAsm - Check if the instruction is a call to an inline asm chunk
     static bool isInlineAsm(const Instruction& I) {
       if (const CallInst *CI = dyn_cast<CallInst>(&I))
         return isa<InlineAsm>(CI->getCalledValue());
       return false;
     }
-    
+
     // Instruction visitation functions
     friend class InstVisitor<CWriter>;
 
@@ -310,7 +317,7 @@ namespace {
     void visitStoreInst (StoreInst  &I);
     void visitGetElementPtrInst(GetElementPtrInst &I);
     void visitVAArgInst (VAArgInst &I);
-    
+
     void visitInsertElementInst(InsertElementInst &I);
     void visitExtractElementInst(ExtractElementInst &I);
     void visitShuffleVectorInst(ShuffleVectorInst &SVI);
@@ -346,7 +353,7 @@ char CWriter::ID = 0;
 
 static std::string CBEMangle(const std::string &S) {
   std::string Result;
-  
+
   for (unsigned i = 0, e = S.size(); i != e; ++i)
     if (isalnum(S[i]) || S[i] == '_') {
       Result += S[i];
@@ -375,7 +382,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
   for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
        TI != TE; ) {
     TypeSymbolTable::iterator I = TI++;
-    
+
     // If this isn't a struct or array type, remove it from our set of types
     // to name. This simplifies emission later.
     if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
@@ -403,8 +410,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
         ++RenameCounter;
       Changed = true;
     }
-      
-      
+
+
   // Loop over all external functions and globals.  If we have two with
   // identical names, merge them.
   // FIXME: This code should disappear when we don't allow values with the same
@@ -440,7 +447,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
       }
     }
   }
-  
+
   return Changed;
 }
 
@@ -479,20 +486,20 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
     FunctionInnards << "void";
   }
   FunctionInnards << ')';
-  printType(Out, RetTy, 
+  printType(Out, RetTy,
       /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
 }
 
 raw_ostream &
 CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
                          const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && 
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
          "Invalid type for printSimpleType");
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:   return Out << "void " << NameSoFar;
   case Type::IntegerTyID: {
     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-    if (NumBits == 1) 
+    if (NumBits == 1)
       return Out << "bool " << NameSoFar;
     else if (NumBits <= 8)
       return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
@@ -502,7 +509,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
       return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
     else if (NumBits <= 64)
       return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
-    else { 
+    else {
       assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
       return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
     }
@@ -514,14 +521,18 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
   case Type::X86_FP80TyID:
   case Type::PPC_FP128TyID:
   case Type::FP128TyID:  return Out << "long double " << NameSoFar;
-      
+
+  case Type::X86_MMXTyID:
+    return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+                     " __attribute__((vector_size(64))) " + NameSoFar);
+
   case Type::VectorTyID: {
     const VectorType *VTy = cast<VectorType>(Ty);
     return printSimpleType(Out, VTy->getElementType(), isSigned,
                      " __attribute__((vector_size(" +
                      utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
   }
-    
+
   default:
 #ifndef NDEBUG
     errs() << "Unknown primitive type: " << *Ty << "\n";
@@ -575,7 +586,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
       FunctionInnards << "void";
     }
     FunctionInnards << ')';
-    printType(Out, FTy->getReturnType(), 
+    printType(Out, FTy->getReturnType(),
       /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
     return Out;
   }
@@ -759,7 +770,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) {
 }
 
 /// Print out the casting for a cast operation. This does the double casting
-/// necessary for conversion to the destination type, if necessary. 
+/// necessary for conversion to the destination type, if necessary.
 /// @brief Print a cast
 void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
   // Print the destination type cast
@@ -782,7 +793,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
       printSimpleType(Out, DstTy, false);
       Out << ')';
       break;
-    case Instruction::SExt: 
+    case Instruction::SExt:
     case Instruction::FPToSI: // For these, make sure we get a signed dest
       Out << '(';
       printSimpleType(Out, DstTy, true);
@@ -803,7 +814,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
     case Instruction::SIToFP:
     case Instruction::SExt:
       Out << '(';
-      printSimpleType(Out, SrcTy, true); 
+      printSimpleType(Out, SrcTy, true);
       Out << ')';
       break;
     case Instruction::IntToPtr:
@@ -895,7 +906,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
     case Instruction::AShr:
     {
       Out << '(';
-      bool NeedsClosingParens = printConstExprCast(CE, Static); 
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
       printConstantWithCast(CE->getOperand(0), CE->getOpcode());
       switch (CE->getOpcode()) {
       case Instruction::Add:
@@ -905,10 +916,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       case Instruction::Mul:
       case Instruction::FMul: Out << " * "; break;
       case Instruction::URem:
-      case Instruction::SRem: 
+      case Instruction::SRem:
       case Instruction::FRem: Out << " % "; break;
-      case Instruction::UDiv: 
-      case Instruction::SDiv: 
+      case Instruction::UDiv:
+      case Instruction::SDiv:
       case Instruction::FDiv: Out << " / "; break;
       case Instruction::And: Out << " & "; break;
       case Instruction::Or:  Out << " | "; break;
@@ -920,7 +931,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
         switch (CE->getPredicate()) {
           case ICmpInst::ICMP_EQ: Out << " == "; break;
           case ICmpInst::ICMP_NE: Out << " != "; break;
-          case ICmpInst::ICMP_SLT: 
+          case ICmpInst::ICMP_SLT:
           case ICmpInst::ICMP_ULT: Out << " < "; break;
           case ICmpInst::ICMP_SLE:
           case ICmpInst::ICMP_ULE: Out << " <= "; break;
@@ -940,8 +951,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       return;
     }
     case Instruction::FCmp: {
-      Out << '('; 
-      bool NeedsClosingParens = printConstExprCast(CE, Static); 
+      Out << '(';
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
       if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
         Out << "0";
       else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
@@ -1006,18 +1017,18 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
     else {
       Out << "((";
       printSimpleType(Out, Ty, false) << ')';
-      if (CI->isMinValue(true)) 
+      if (CI->isMinValue(true))
         Out << CI->getZExtValue() << 'u';
       else
         Out << CI->getSExtValue();
       Out << ')';
     }
     return;
-  } 
+  }
 
   switch (CPV->getType()->getTypeID()) {
   case Type::FloatTyID:
-  case Type::DoubleTyID: 
+  case Type::DoubleTyID:
   case Type::X86_FP80TyID:
   case Type::PPC_FP128TyID:
   case Type::FP128TyID: {
@@ -1027,8 +1038,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
       // Because of FP precision problems we must load from a stack allocated
       // value that holds the value in hex.
       Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
-                       "float" : 
-                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? 
+                       "float" :
+                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
                        "double" :
                        "long double")
           << "*)&FPConstant" << I->second << ')';
@@ -1047,7 +1058,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
         Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
         V = Tmp.convertToDouble();
       }
-      
+
       if (IsNAN(V)) {
         // The value is NaN
 
@@ -1211,10 +1222,10 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
   case Instruction::LShr:
-  case Instruction::URem: 
+  case Instruction::URem:
   case Instruction::UDiv: NeedsExplicitCast = true; break;
   case Instruction::AShr:
-  case Instruction::SRem: 
+  case Instruction::SRem:
   case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
   case Instruction::SExt:
     Ty = CE->getType();
@@ -1267,7 +1278,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
   switch (Opcode) {
     default:
       // for most instructions, it doesn't matter
-      break; 
+      break;
     case Instruction::Add:
     case Instruction::Sub:
     case Instruction::Mul:
@@ -1294,7 +1305,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
     Out << ")";
     printConstant(CPV, false);
     Out << ")";
-  } else 
+  } else
     printConstant(CPV, false);
 }
 
@@ -1312,16 +1323,16 @@ std::string CWriter::GetValueName(const Value *Operand) {
     Mang->getNameWithPrefix(Str, GV, false);
     return CBEMangle(Str.str().str());
   }
-    
+
   std::string Name = Operand->getName();
-    
+
   if (Name.empty()) { // Assign unique names to local temporaries.
     unsigned &No = AnonValueNumbers[Operand];
     if (No == 0)
       No = ++NextAnonValueNumber;
     Name = "tmp__" + utostr(No);
   }
-    
+
   std::string VarName;
   VarName.reserve(Name.capacity());
 
@@ -1348,7 +1359,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
   // Validate this.
   const Type *Ty = I.getType();
   if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
-        Ty!=Type::getInt8Ty(I.getContext()) && 
+        Ty!=Type::getInt8Ty(I.getContext()) &&
         Ty!=Type::getInt16Ty(I.getContext()) &&
         Ty!=Type::getInt32Ty(I.getContext()) &&
         Ty!=Type::getInt64Ty(I.getContext()))) {
@@ -1364,12 +1375,12 @@ void CWriter::writeInstComputationInline(Instruction &I) {
   if (I.getType() == Type::getInt1Ty(I.getContext()) &&
       !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
     NeedBoolTrunc = true;
-  
+
   if (NeedBoolTrunc)
     Out << "((";
-  
+
   visit(I);
-  
+
   if (NeedBoolTrunc)
     Out << ")&1)";
 }
@@ -1404,9 +1415,9 @@ void CWriter::writeOperand(Value *Operand, bool Static) {
     Out << ')';
 }
 
-// Some instructions need to have their result value casted back to the 
-// original types because their operands were casted to the expected type. 
-// This function takes care of detecting that case and printing the cast 
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
 // for the Instruction.
 bool CWriter::writeInstructionCast(const Instruction &I) {
   const Type *Ty = I.getOperand(0)->getType();
@@ -1417,15 +1428,15 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
     // We need to cast integer arithmetic so that it is always performed
     // as unsigned, to avoid undefined behavior on overflow.
   case Instruction::LShr:
-  case Instruction::URem: 
-  case Instruction::UDiv: 
+  case Instruction::URem:
+  case Instruction::UDiv:
     Out << "((";
     printSimpleType(Out, Ty, false);
     Out << ")(";
     return true;
   case Instruction::AShr:
-  case Instruction::SRem: 
-  case Instruction::SDiv: 
+  case Instruction::SRem:
+  case Instruction::SDiv:
     Out << "((";
     printSimpleType(Out, Ty, true);
     Out << ")(";
@@ -1437,7 +1448,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
 
 // Write the operand with a cast to another type based on the Opcode being used.
 // This will be used in cases where an instruction has specific type
-// requirements (usually signedness) for its operands. 
+// requirements (usually signedness) for its operands.
 void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
 
   // Extract the operand's type, we'll need it.
@@ -1455,7 +1466,7 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
   switch (Opcode) {
     default:
       // for most instructions, it doesn't matter
-      break; 
+      break;
     case Instruction::Add:
     case Instruction::Sub:
     case Instruction::Mul:
@@ -1484,14 +1495,14 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
     Out << ")";
     writeOperand(Operand);
     Out << ")";
-  } else 
+  } else
     writeOperand(Operand);
 }
 
-// Write the operand with a cast to another type based on the icmp predicate 
-// being used. 
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
 void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
-  // This has to do a cast to ensure the operand has the right signedness. 
+  // This has to do a cast to ensure the operand has the right signedness.
   // Also, if the operand is a pointer, we make sure to cast to an integer when
   // doing the comparison both for signedness and so that the C compiler doesn't
   // optimize things like "p < NULL" to false (p may contain an integer value
@@ -1504,7 +1515,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
     writeOperand(Operand);
     return;
   }
-  
+
   // Should this be a signed comparison?  If so, convert to signed.
   bool castIsSigned = Cmp.isSigned();
 
@@ -1512,7 +1523,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
   const Type* OpTy = Operand->getType();
   if (OpTy->isPointerTy())
     OpTy = TD->getIntPtrType(Operand->getContext());
-  
+
   Out << "((";
   printSimpleType(Out, OpTy, castIsSigned);
   Out << ")";
@@ -1579,7 +1590,7 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
   Out << "#if defined(__GNUC__)\n"
       << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
       << "#endif\n\n";
-    
+
   // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
   // From the GCC documentation:
   //
@@ -1635,7 +1646,7 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
       << "#define __ATTRIBUTE_DTOR__\n"
       << "#define LLVM_ASM(X)\n"
       << "#endif\n\n";
-  
+
   Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
       << "#define __builtin_stack_save() 0   /* not implemented */\n"
       << "#define __builtin_stack_restore(X) /* noop */\n"
@@ -1658,11 +1669,11 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
 static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
   ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
   if (!InitList) return;
-  
+
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
       if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
-      
+
       if (CS->getOperand(1)->isNullValue())
         return;  // Found a null terminator, exit printing.
       Constant *FP = CS->getOperand(1);
@@ -1690,12 +1701,12 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
     else if (GV->getName() == "llvm.global_dtors")
       return GlobalDtors;
   }
-  
+
   // Otherwise, if it is other metadata, don't print it.  This catches things
   // like debug information.
   if (GV->getSection() == "llvm.metadata")
     return NotPrinted;
-  
+
   return NotSpecial;
 }
 
@@ -1726,7 +1737,7 @@ static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
 
 bool CWriter::doInitialization(Module &M) {
   FunctionPass::doInitialization(M);
-  
+
   // Initialize
   TheModule = &M;
 
@@ -1738,13 +1749,13 @@ bool CWriter::doInitialization(Module &M) {
   std::string Triple = TheModule->getTargetTriple();
   if (Triple.empty())
     Triple = llvm::sys::getHostTriple();
-  
+
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
     TAsm = Match->createAsmInfo(Triple);
-#endif    
+#endif
   TAsm = new CBEMCAsmInfo();
-  TCtx = new MCContext(*TAsm);
+  TCtx = new MCContext(*TAsm, NULL);
   Mang = new Mangler(*TCtx, *TD);
 
   // Keep track of which functions are static ctors/dtors so they can have
@@ -1762,7 +1773,7 @@ bool CWriter::doInitialization(Module &M) {
       break;
     }
   }
-  
+
   // get declaration for alloca
   Out << "/* Provide Declarations */\n";
   Out << "#include <stdarg.h>\n";      // Varargs support
@@ -1819,7 +1830,7 @@ bool CWriter::doInitialization(Module &M) {
     for (Module::global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
 
-      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || 
+      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
           I->hasCommonLinkage())
         Out << "extern ";
       else if (I->hasDLLImportLinkage())
@@ -1844,7 +1855,7 @@ bool CWriter::doInitialization(Module &M) {
   Out << "double fmod(double, double);\n";   // Support for FP rem
   Out << "float fmodf(float, float);\n";
   Out << "long double fmodl(long double, long double);\n";
-  
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     // Don't print declarations for intrinsic functions.
     if (!I->isIntrinsic() && I->getName() != "setjmp" &&
@@ -1852,7 +1863,7 @@ bool CWriter::doInitialization(Module &M) {
       if (I->hasExternalWeakLinkage())
         Out << "extern ";
       printFunctionSignature(I, true);
-      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) 
+      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
         Out << " __ATTRIBUTE_WEAK__";
       if (I->hasExternalWeakLinkage())
         Out << " __EXTERNAL_WEAK__";
@@ -1862,10 +1873,10 @@ bool CWriter::doInitialization(Module &M) {
         Out << " __ATTRIBUTE_DTOR__";
       if (I->hasHiddenVisibility())
         Out << " __HIDDEN__";
-      
+
       if (I->hasName() && I->getName()[0] == 1)
         Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
-          
+
       Out << ";\n";
     }
   }
@@ -1889,7 +1900,7 @@ bool CWriter::doInitialization(Module &M) {
         if (I->isThreadLocal())
           Out << "__thread ";
 
-        printType(Out, I->getType()->getElementType(), false, 
+        printType(Out, I->getType()->getElementType(), false,
                   GetValueName(I));
 
         if (I->hasLinkOnceLinkage())
@@ -1909,7 +1920,7 @@ bool CWriter::doInitialization(Module &M) {
   // Output the global variable definitions and contents...
   if (!M.global_empty()) {
     Out << "\n\n/* Global Variable Definitions and Initialization */\n";
-    for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I)
       if (!I->isDeclaration()) {
         // Ignore special globals, such as debug info.
@@ -1927,7 +1938,7 @@ bool CWriter::doInitialization(Module &M) {
         if (I->isThreadLocal())
           Out << "__thread ";
 
-        printType(Out, I->getType()->getElementType(), false, 
+        printType(Out, I->getType()->getElementType(), false,
                   GetValueName(I));
         if (I->hasLinkOnceLinkage())
           Out << " __attribute__((common))";
@@ -1938,7 +1949,7 @@ bool CWriter::doInitialization(Module &M) {
 
         if (I->hasHiddenVisibility())
           Out << " __HIDDEN__";
-        
+
         // If the initializer is not null, emit the initializer.  If it is null,
         // we try to avoid emitting large amounts of zeros.  The problem with
         // this, however, occurs when the variable has weak linkage.  In this
@@ -1972,7 +1983,7 @@ bool CWriter::doInitialization(Module &M) {
   if (!M.empty())
     Out << "\n\n/* Function Bodies */\n";
 
-  // Emit some helper functions for dealing with FCMP instruction's 
+  // Emit some helper functions for dealing with FCMP instruction's
   // predicates
   Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
   Out << "return X == X && Y == Y; }\n";
@@ -2027,7 +2038,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
       printFloatingPointConstants(CE->getOperand(i));
     return;
   }
-    
+
   // Otherwise, check for a FP constant that we need to print.
   const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
   if (FPC == 0 ||
@@ -2038,7 +2049,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
     return;
 
   FPConstantMap[FPC] = FPCounter;  // Number the FP constants
-  
+
   if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
     double Val = FPC->getValueAPF().convertToDouble();
     uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
@@ -2057,7 +2068,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
     APInt api = FPC->getValueAPF().bitcastToAPInt();
     const uint64_t *p = api.getRawData();
     Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
-    << " = { 0x" << utohexstr(p[0]) 
+    << " = { 0x" << utohexstr(p[0])
     << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
     << "}; /* Long double constant */\n";
   } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
@@ -2068,7 +2079,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
     << " = { 0x"
     << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
     << "}; /* Long double constant */\n";
-    
+
   } else {
     llvm_unreachable("Unknown float type!");
   }
@@ -2140,12 +2151,12 @@ void CWriter::printContainedStructs(const Type *Ty,
   // Don't walk through pointers.
   if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
     return;
-  
+
   // Print all contained types first.
   for (Type::subtype_iterator I = Ty->subtype_begin(),
        E = Ty->subtype_end(); I != E; ++I)
     printContainedStructs(*I, StructPrinted);
-  
+
   if (Ty->isStructTy() || Ty->isArrayTy()) {
     // Check to see if we have already printed this struct.
     if (StructPrinted.insert(Ty).second) {
@@ -2160,10 +2171,10 @@ void CWriter::printContainedStructs(const Type *Ty,
 void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
   /// isStructReturn - Should this function actually return a struct by-value?
   bool isStructReturn = F->hasStructRetAttr();
-  
+
   if (F->hasLocalLinkage()) Out << "static ";
   if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
-  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";  
+  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
   switch (F->getCallingConv()) {
    case CallingConv::X86_StdCall:
     Out << "__attribute__((stdcall)) ";
@@ -2177,7 +2188,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
    default:
     break;
   }
-  
+
   // Loop over the arguments, printing them...
   const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
   const AttrListPtr &PAL = F->getAttributes();
@@ -2193,7 +2204,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
     if (!F->arg_empty()) {
       Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
       unsigned Idx = 1;
-      
+
       // If this is a struct-return function, don't print the hidden
       // struct-return argument.
       if (isStructReturn) {
@@ -2201,7 +2212,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
         ++I;
         ++Idx;
       }
-      
+
       std::string ArgName;
       for (; I != E; ++I) {
         if (PrintedArg) FunctionInnards << ", ";
@@ -2225,7 +2236,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
     // Loop over the arguments, printing them.
     FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
     unsigned Idx = 1;
-    
+
     // If this is a struct-return function, don't print the hidden
     // struct-return argument.
     if (isStructReturn) {
@@ -2233,7 +2244,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
       ++I;
       ++Idx;
     }
-    
+
     for (; I != E; ++I) {
       if (PrintedArg) FunctionInnards << ", ";
       const Type *ArgTy = *I;
@@ -2262,7 +2273,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
     FunctionInnards << "void"; // ret() -> ret(void) in C.
   }
   FunctionInnards << ')';
-  
+
   // Get the return tpe for the function.
   const Type *RetTy;
   if (!isStructReturn)
@@ -2271,9 +2282,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
     // If this is a struct-return function, print the struct-return type.
     RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
   }
-    
+
   // Print out the return type and the signature built above.
-  printType(Out, RetTy, 
+  printType(Out, RetTy,
             /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
             FunctionInnards.str());
 }
@@ -2293,7 +2304,7 @@ void CWriter::printFunction(Function &F) {
 
   printFunctionSignature(&F, false);
   Out << " {\n";
-  
+
   // If this is a struct return function, handle the result with magic.
   if (isStructReturn) {
     const Type *StructTy =
@@ -2303,13 +2314,13 @@ void CWriter::printFunction(Function &F) {
     Out << ";  /* Struct return temporary */\n";
 
     Out << "  ";
-    printType(Out, F.arg_begin()->getType(), false, 
+    printType(Out, F.arg_begin()->getType(), false,
               GetValueName(F.arg_begin()));
     Out << " = &StructReturn;\n";
   }
 
   bool PrintedVar = false;
-  
+
   // print local variable information for the function
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
     if (const AllocaInst *AI = isDirectAlloca(&*I)) {
@@ -2317,7 +2328,7 @@ void CWriter::printFunction(Function &F) {
       printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
       Out << ";    /* Address-exposed local */\n";
       PrintedVar = true;
-    } else if (I->getType() != Type::getVoidTy(F.getContext()) && 
+    } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
                !isInlinableInst(*I)) {
       Out << "  ";
       printType(Out, I->getType(), false, GetValueName(&*I));
@@ -2333,7 +2344,7 @@ void CWriter::printFunction(Function &F) {
     }
     // We need a temporary for the BitCast to use so it can pluck a value out
     // of a union to do the BitCast. This is separate from the need for a
-    // variable to hold the result of the BitCast. 
+    // variable to hold the result of the BitCast.
     if (isFPIntBitCast(*I)) {
       Out << "  llvmBitCastUnion " << GetValueName(&*I)
           << "__BITCAST_TEMPORARY;\n";
@@ -2421,7 +2432,7 @@ void CWriter::visitReturnInst(ReturnInst &I) {
     Out << "  return StructReturn;\n";
     return;
   }
-  
+
   // Don't output a void return if this is the last basic block in the function
   if (I.getNumOperands() == 0 &&
       &*--I.getParent()->getParent()->end() == I.getParent() &&
@@ -2578,7 +2589,7 @@ void CWriter::visitBinaryOperator(Instruction &I) {
   // We must cast the results of binary operations which might be promoted.
   bool needsCast = false;
   if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
-      (I.getType() == Type::getInt16Ty(I.getContext())) 
+      (I.getType() == Type::getInt16Ty(I.getContext()))
       || (I.getType() == Type::getFloatTy(I.getContext()))) {
     needsCast = true;
     Out << "((";
@@ -2630,7 +2641,7 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     case Instruction::SRem:
     case Instruction::FRem: Out << " % "; break;
     case Instruction::UDiv:
-    case Instruction::SDiv: 
+    case Instruction::SDiv:
     case Instruction::FDiv: Out << " / "; break;
     case Instruction::And:  Out << " & "; break;
     case Instruction::Or:   Out << " | "; break;
@@ -2638,7 +2649,7 @@ void CWriter::visitBinaryOperator(Instruction &I) {
     case Instruction::Shl : Out << " << "; break;
     case Instruction::LShr:
     case Instruction::AShr: Out << " >> "; break;
-    default: 
+    default:
 #ifndef NDEBUG
        errs() << "Invalid operator type!" << I;
 #endif
@@ -2681,7 +2692,7 @@ void CWriter::visitICmpInst(ICmpInst &I) {
   case ICmpInst::ICMP_SGT: Out << " > "; break;
   default:
 #ifndef NDEBUG
-    errs() << "Invalid icmp predicate!" << I; 
+    errs() << "Invalid icmp predicate!" << I;
 #endif
     llvm_unreachable(0);
   }
@@ -2754,7 +2765,7 @@ void CWriter::visitCastInst(CastInst &I) {
   if (isFPIntBitCast(I)) {
     Out << '(';
     // These int<->float and long<->double casts need to be handled specially
-    Out << GetValueName(&I) << "__BITCAST_TEMPORARY." 
+    Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
         << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
     writeOperand(I.getOperand(0));
     Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
@@ -2762,7 +2773,7 @@ void CWriter::visitCastInst(CastInst &I) {
     Out << ')';
     return;
   }
-  
+
   Out << '(';
   printCast(I.getOpcode(), SrcTy, DstTy);
 
@@ -2770,15 +2781,15 @@ void CWriter::visitCastInst(CastInst &I) {
   if (SrcTy == Type::getInt1Ty(I.getContext()) &&
       I.getOpcode() == Instruction::SExt)
     Out << "0-";
-  
+
   writeOperand(I.getOperand(0));
-    
-  if (DstTy == Type::getInt1Ty(I.getContext()) && 
+
+  if (DstTy == Type::getInt1Ty(I.getContext()) &&
       (I.getOpcode() == Instruction::Trunc ||
        I.getOpcode() == Instruction::FPToUI ||
        I.getOpcode() == Instruction::FPToSI ||
        I.getOpcode() == Instruction::PtrToInt)) {
-    // Make sure we really get a trunc to bool by anding the operand with 1 
+    // Make sure we really get a trunc to bool by anding the operand with 1
     Out << "&1u";
   }
   Out << ')';
@@ -2835,7 +2846,7 @@ void CWriter::lowerIntrinsics(Function &F) {
 #undef GET_GCC_BUILTIN_NAME
             // If we handle it, don't lower it.
             if (BuiltinName[0]) break;
-            
+
             // All other intrinsic calls we must lower.
             Instruction *Before = 0;
             if (CI != &BB->front())
@@ -2858,7 +2869,7 @@ void CWriter::lowerIntrinsics(Function &F) {
             break;
           }
 
-  // We may have collected some prototypes to emit in the loop above. 
+  // We may have collected some prototypes to emit in the loop above.
   // Emit them now, before the function that uses them is emitted. But,
   // be careful not to emit them twice.
   std::vector<Function*>::iterator I = prototypesToGen.begin();
@@ -2898,9 +2909,9 @@ void CWriter::visitCallInst(CallInst &I) {
     writeOperandDeref(I.getArgOperand(0));
     Out << " = ";
   }
-  
+
   if (I.isTailCall()) Out << " /*tail*/ ";
-  
+
   if (!WroteCallee) {
     // If this is an indirect call to a struct return function, we need to cast
     // the pointer. Ditto for indirect calls with byval arguments.
@@ -2924,7 +2935,7 @@ void CWriter::visitCallInst(CallInst &I) {
           NeedsCast = true;
           Callee = RF;
         }
-  
+
     if (NeedsCast) {
       // Ok, just cast the pointer type.
       Out << "((";
@@ -2957,14 +2968,14 @@ void CWriter::visitCallInst(CallInst &I) {
     ++AI;
     ++ArgNo;
   }
-      
+
 
   for (; AI != AE; ++AI, ++ArgNo) {
     if (PrintedArg) Out << ", ";
     if (ArgNo < NumDeclaredParams &&
         (*AI)->getType() != FTy->getParamType(ArgNo)) {
       Out << '(';
-      printType(Out, FTy->getParamType(ArgNo), 
+      printType(Out, FTy->getParamType(ArgNo),
             /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
       Out << ')';
     }
@@ -2993,7 +3004,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
 #include "llvm/Intrinsics.gen"
 #undef GET_GCC_BUILTIN_NAME
     assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
-    
+
     Out << BuiltinName;
     WroteCallee = true;
     return false;
@@ -3003,7 +3014,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     return true;
   case Intrinsic::vastart:
     Out << "0; ";
-      
+
     Out << "va_start(*(va_list*)";
     writeOperand(I.getArgOperand(0));
     Out << ", ";
@@ -3081,7 +3092,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
   case Intrinsic::x86_sse2_cmp_pd:
     Out << '(';
     printType(Out, I.getType());
-    Out << ')';  
+    Out << ')';
     // Multiple GCC builtins multiplex onto this intrinsic.
     switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
     default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
@@ -3102,7 +3113,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
       Out << 's';
     else
       Out << 'd';
-      
+
     Out << "(";
     writeOperand(I.getArgOperand(0));
     Out << ", ";
@@ -3112,7 +3123,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
   case Intrinsic::ppc_altivec_lvsl:
     Out << '(';
     printType(Out, I.getType());
-    Out << ')';  
+    Out << ')';
     Out << "__builtin_altivec_lvsl(0, (void*)";
     writeOperand(I.getArgOperand(0));
     Out << ")";
@@ -3132,13 +3143,13 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
   std::string Triple = TheModule->getTargetTriple();
   if (Triple.empty())
     Triple = llvm::sys::getHostTriple();
-  
+
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
     TargetAsm = Match->createAsmInfo(Triple);
   else
     return c.Codes[0];
-  
+
   const char *const *table = TargetAsm->getAsmCBE();
 
   // Search the translation table if it exists.
@@ -3164,7 +3175,7 @@ static std::string gccifyAsm(std::string asmstr) {
       if (asmstr[i + 1] == '{') {
         std::string::size_type a = asmstr.find_first_of(':', i + 1);
         std::string::size_type b = asmstr.find_first_of('}', i + 1);
-        std::string n = "%" + 
+        std::string n = "%" +
           asmstr.substr(a + 1, b - a - 1) +
           asmstr.substr(i + 2, a - i - 2);
         asmstr.replace(i, b - i + 1, n);
@@ -3174,7 +3185,7 @@ static std::string gccifyAsm(std::string asmstr) {
     }
     else if (asmstr[i] == '%')//grr
       { asmstr.replace(i, 1, "%%"); ++i;}
-  
+
   return asmstr;
 }
 
@@ -3182,8 +3193,8 @@ static std::string gccifyAsm(std::string asmstr) {
 //      handle communitivity
 void CWriter::visitInlineAsm(CallInst &CI) {
   InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
-  std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
-  
+  InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
+
   std::vector<std::pair<Value*, int> > ResultVals;
   if (CI.getType() == Type::getVoidTy(CI.getContext()))
     ;
@@ -3193,27 +3204,27 @@ void CWriter::visitInlineAsm(CallInst &CI) {
   } else {
     ResultVals.push_back(std::make_pair(&CI, -1));
   }
-  
+
   // Fix up the asm string for gcc and emit it.
   Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
   Out << "        :";
 
   unsigned ValueCount = 0;
   bool IsFirst = true;
-  
+
   // Convert over all the output constraints.
-  for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
        E = Constraints.end(); I != E; ++I) {
-    
+
     if (I->Type != InlineAsm::isOutput) {
       ++ValueCount;
       continue;  // Ignore non-output constraints.
     }
-    
+
     assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
     std::string C = InterpretASMConstraint(*I);
     if (C.empty()) continue;
-    
+
     if (!IsFirst) {
       Out << ", ";
       IsFirst = false;
@@ -3222,7 +3233,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
     // Unpack the dest.
     Value *DestVal;
     int DestValNo = -1;
-    
+
     if (ValueCount < ResultVals.size()) {
       DestVal = ResultVals[ValueCount].first;
       DestValNo = ResultVals[ValueCount].second;
@@ -3231,38 +3242,38 @@ void CWriter::visitInlineAsm(CallInst &CI) {
 
     if (I->isEarlyClobber)
       C = "&"+C;
-      
+
     Out << "\"=" << C << "\"(" << GetValueName(DestVal);
     if (DestValNo != -1)
       Out << ".field" << DestValNo; // Multiple retvals.
     Out << ")";
     ++ValueCount;
   }
-  
-  
+
+
   // Convert over all the input constraints.
   Out << "\n        :";
   IsFirst = true;
   ValueCount = 0;
-  for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
        E = Constraints.end(); I != E; ++I) {
     if (I->Type != InlineAsm::isInput) {
       ++ValueCount;
       continue;  // Ignore non-input constraints.
     }
-    
+
     assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
     std::string C = InterpretASMConstraint(*I);
     if (C.empty()) continue;
-    
+
     if (!IsFirst) {
       Out << ", ";
       IsFirst = false;
     }
-    
+
     assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
     Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
-    
+
     Out << "\"" << C << "\"(";
     if (!I->isIndirect)
       writeOperand(SrcVal);
@@ -3270,10 +3281,10 @@ void CWriter::visitInlineAsm(CallInst &CI) {
       writeOperandDeref(SrcVal);
     Out << ")";
   }
-  
+
   // Convert over the clobber constraints.
   IsFirst = true;
-  for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
        E = Constraints.end(); I != E; ++I) {
     if (I->Type != InlineAsm::isClobber)
       continue;  // Ignore non-input constraints.
@@ -3281,15 +3292,15 @@ void CWriter::visitInlineAsm(CallInst &CI) {
     assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
     std::string C = InterpretASMConstraint(*I);
     if (C.empty()) continue;
-    
+
     if (!IsFirst) {
       Out << ", ";
       IsFirst = false;
     }
-    
+
     Out << '\"' << C << '"';
   }
-  
+
   Out << ")";
 }
 
@@ -3308,13 +3319,13 @@ void CWriter::visitAllocaInst(AllocaInst &I) {
 
 void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
                                  gep_type_iterator E, bool Static) {
-  
+
   // If there are no indices, just print out the pointer.
   if (I == E) {
     writeOperand(Ptr);
     return;
   }
-    
+
   // Find out if the last index is into a vector.  If so, we have to print this
   // specially.  Since vectors can't have elements of indexable type, only the
   // last index could possibly be of a vector element.
@@ -3323,9 +3334,9 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
     for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
       LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
   }
-  
+
   Out << "(";
-  
+
   // If the last index is into a vector, we can't print it as &a[i][j] because
   // we can't index into a vector with j in GCC.  Instead, emit this as
   // (((float*)&a[i])+j)
@@ -3334,7 +3345,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
     printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
     Out << ")(";
   }
-  
+
   Out << '&';
 
   // If the first index is 0 (very typical) we can do a number of
@@ -3444,7 +3455,7 @@ void CWriter::visitStoreInst(StoreInst &I) {
   if (BitMask) {
     Out << ") & ";
     printConstant(BitMask, false);
-    Out << ")"; 
+    Out << ")";
   }
 }
 
@@ -3477,7 +3488,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) {
 void CWriter::visitExtractElementInst(ExtractElementInst &I) {
   // We know that our operand is not inlined.
   Out << "((";
-  const Type *EltTy = 
+  const Type *EltTy =
     cast<VectorType>(I.getOperand(0)->getType())->getElementType();
   printType(Out, PointerType::getUnqual(EltTy));
   Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
index be243366d50e..a23ff8529703 100644
--- a/lib/Target/CBackend/CMakeLists.txt
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -1,3 +1,5 @@
 add_llvm_target(CBackend
   CBackend.cpp
   )
+
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 43ebdac528ca..09b48ce632f2 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -2,14 +2,56 @@ add_llvm_library(LLVMTarget
   Mangler.cpp
   SubtargetFeature.cpp
   Target.cpp
+  TargetAsmInfo.cpp
   TargetAsmLexer.cpp
   TargetData.cpp
   TargetELFWriterInfo.cpp
-  TargetFrameInfo.cpp
+  TargetFrameLowering.cpp
   TargetInstrInfo.cpp
   TargetIntrinsicInfo.cpp
+  TargetLibraryInfo.cpp
   TargetLoweringObjectFile.cpp
   TargetMachine.cpp
   TargetRegisterInfo.cpp
   TargetSubtarget.cpp
   )
+
+set(LLVM_ENUM_ASM_PRINTERS "")
+set(LLVM_ENUM_ASM_PARSERS "")
+set(LLVM_ENUM_DISASSEMBLERS "")
+foreach(t ${LLVM_TARGETS_TO_BUILD})
+  message(STATUS "Targeting ${t}")
+  add_subdirectory(${t})
+  set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} )
+  file(GLOB asmp_file "${td}/*AsmPrinter.cpp")
+  if( asmp_file )
+    set(LLVM_ENUM_ASM_PRINTERS
+      "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
+  endif()
+  if( EXISTS ${td}/AsmParser/CMakeLists.txt )
+    set(LLVM_ENUM_ASM_PARSERS
+      "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
+  endif()
+  if( EXISTS ${td}/Disassembler/CMakeLists.txt )
+    set(LLVM_ENUM_DISASSEMBLERS
+      "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n")
+  endif()
+endforeach(t)
+
+# Produce llvm/Config/AsmPrinters.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
+  )
+
+# Produce llvm/Config/AsmParsers.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
+  )
+
+# Produce llvm/Config/Disassemblers.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def
+  )
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 8a2b59a88a68..000000000000
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMCellSPUAsmPrinter
-  SPUAsmPrinter.cpp
-  )
-add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
deleted file mode 100644
index 4ec9d04c0020..000000000000
--- a/lib/Target/CellSPU/AsmPrinter/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-##===- lib/Target/CellSPU/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUAsmPrinter
-
-# Hack: we need to include 'main' CellSPU target directory to grab
-# private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
deleted file mode 100644
index 3e955310b513..000000000000
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ /dev/null
@@ -1,364 +0,0 @@
-//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to Cell SPU assembly language. This printer
-// is the output mechanism used by `llc'.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asmprinter"
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class SPUAsmPrinter : public AsmPrinter {
-  public:
-    explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
-      AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "STI CBEA SPU Assembly Printer";
-    }
-
-    SPUTargetMachine &getTM() {
-      return static_cast<SPUTargetMachine&>(TM);
-    }
-
-    /// printInstruction - This method is automatically generated by tablegen
-    /// from the instruction set description.
-    void printInstruction(const MachineInstr *MI, raw_ostream &OS);
-    static const char *getRegisterName(unsigned RegNo);
-
-
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    void printOp(const MachineOperand &MO, raw_ostream &OS);
-
-    /// printRegister - Print register according to target requirements.
-    ///
-    void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){
-      unsigned RegNo = MO.getReg();
-      assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
-             "Not physreg??");
-      O << getRegisterName(RegNo);
-    }
-
-    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      if (MO.isReg()) {
-        O << getRegisterName(MO.getReg());
-      } else if (MO.isImm()) {
-        O << MO.getImm();
-      } else {
-        printOp(MO, O);
-      }
-    }
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O);
-
-
-    void
-    printS7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      int value = MI->getOperand(OpNo).getImm();
-      value = (value << (32 - 7)) >> (32 - 7);
-
-      assert((value >= -(1 << 8) && value <= (1 << 7) - 1)
-             && "Invalid s7 argument");
-      O << value;
-    }
-
-    void
-    printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      unsigned int value = MI->getOperand(OpNo).getImm();
-      assert(value < (1 << 8) && "Invalid u7 argument");
-      O << value;
-    }
-
-    void
-    printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      char value = MI->getOperand(OpNo).getImm();
-      O << (int) value;
-      O << "(";
-      printOperand(MI, OpNo+1, O);
-      O << ")";
-    }
-
-    void
-    printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      O << (short) MI->getOperand(OpNo).getImm();
-    }
-
-    void
-    printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      O << (unsigned short)MI->getOperand(OpNo).getImm();
-    }
-
-    void
-    printU32ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      O << (unsigned)MI->getOperand(OpNo).getImm();
-    }
-
-    void
-    printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // When used as the base register, r0 reads constant zero rather than
-      // the value contained in the register.  For this reason, the darwin
-      // assembler requires that we print r0 as 0 (no r) when used as the base.
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      O << getRegisterName(MO.getReg()) << ", ";
-      printOperand(MI, OpNo+1, O);
-    }
-
-    void
-    printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      unsigned int value = MI->getOperand(OpNo).getImm();
-      assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
-      O << value;
-    }
-
-    void
-    printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
-                             >> 16);
-      assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
-             && "Invalid s10 argument");
-      O << value;
-    }
-
-    void
-    printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
-                             >> 16);
-      assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
-      O << value;
-    }
-
-    void
-    printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      assert(MI->getOperand(OpNo).isImm() &&
-             "printDFormAddr first operand is not immediate");
-      int64_t value = int64_t(MI->getOperand(OpNo).getImm());
-      int16_t value16 = int16_t(value);
-      assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
-             && "Invalid dform s10 offset argument");
-      O << (value16 & ~0xf) << "(";
-      printOperand(MI, OpNo+1, O);
-      O << ")";
-    }
-
-    void
-    printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      /* Note: operand 1 is an offset or symbol name. */
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-        if (MI->getOperand(OpNo+1).isImm()) {
-          int displ = int(MI->getOperand(OpNo+1).getImm());
-          if (displ > 0)
-            O << "+" << displ;
-          else if (displ < 0)
-            O << displ;
-        }
-      }
-    }
-
-    void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // Used to generate a ".-<target>", but it turns out that the assembler
-      // really wants the target.
-      //
-      // N.B.: This operand is used for call targets. Branch hints are another
-      // animal entirely.
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // HBR operands are generated in front of branches, hence, the
-      // program counter plus the target.
-      O << ".+";
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-        O << "@h";
-      }
-    }
-
-    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-        O << "@l";
-      }
-    }
-
-    /// Print local store address
-    void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
-                          raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        int value = (int) MI->getOperand(OpNo).getImm();
-        assert((value >= 0 && value < 16)
-               && "Invalid negated immediate rotate 7-bit argument");
-        O << -value;
-      } else {
-        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
-      }
-    }
-
-    void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
-      assert(MI->getOperand(OpNo).isImm() &&
-             "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
-      int value = (int) MI->getOperand(OpNo).getImm();
-      assert((value >= 0 && value <= 32)
-             && "Invalid negated immediate rotate 7-bit argument");
-      O << -value;
-    }
-  };
-} // end of anonymous namespace
-
-// Include the auto-generated portion of the assembly writer
-#include "SPUGenAsmWriter.inc"
-
-void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
-  switch (MO.getType()) {
-  case MachineOperand::MO_Immediate:
-    report_fatal_error("printOp() does not handle immediate values");
-    return;
-
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-  case MachineOperand::MO_ExternalSymbol:
-    // Computing the address of an external symbol, not calling it.
-    if (TM.getRelocationModel() != Reloc::Static) {
-      O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
-        << "$non_lazy_ptr";
-      return;
-    }
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    return;
-  case MachineOperand::MO_GlobalAddress:
-    // External or weakly linked global variables need non-lazily-resolved
-    // stubs
-    if (TM.getRelocationModel() != Reloc::Static) {
-      const GlobalValue *GV = MO.getGlobal();
-      if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
-            GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
-        O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        return;
-      }
-    }
-    O << *Mang->getSymbol(MO.getGlobal());
-    return;
-  default:
-    O << "<unknown operand type: " << MO.getType() << ">";
-    return;
-  }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned AsmVariant,
-                                    const char *ExtraCode, raw_ostream &O) {
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default: return true;  // Unknown modifier.
-    case 'L': // Write second word of DImode reference.
-      // Verify that this operand has two consecutive registers.
-      if (!MI->getOperand(OpNo).isReg() ||
-          OpNo+1 == MI->getNumOperands() ||
-          !MI->getOperand(OpNo+1).isReg())
-        return true;
-      ++OpNo;   // Return the high-part.
-      break;
-    }
-  }
-
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNo, unsigned AsmVariant,
-                                          const char *ExtraCode,
-                                          raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-  printMemRegReg(MI, OpNo, O);
-  return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
-  RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
-}
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index ddfca37d23e3..a2a2ef1aa9af 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -12,16 +12,18 @@ tablegen(SPUGenSubtarget.inc -gen-subtarget)
 tablegen(SPUGenCallingConv.inc -gen-callingconv)
 
 add_llvm_target(CellSPUCodeGen
-  SPUFrameInfo.cpp
+  SPUAsmPrinter.cpp
   SPUHazardRecognizers.cpp
   SPUInstrInfo.cpp
   SPUISelDAGToDAG.cpp
   SPUISelLowering.cpp
+  SPUFrameLowering.cpp
   SPUMCAsmInfo.cpp
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
   SPUTargetMachine.cpp
   SPUSelectionDAGInfo.cpp
+  SPUNopFiller.cpp
   )
 
-target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
index cbdbd3c3a5ba..77c66be9e857 100644
--- a/lib/Target/CellSPU/Makefile
+++ b/lib/Target/CellSPU/Makefile
@@ -16,6 +16,6 @@ BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
 		SPUGenInstrInfo.inc SPUGenDAGISel.inc \
 		SPUGenSubtarget.inc SPUGenCallingConv.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
index 0e7ad3533e91..3e7e0b68e8e6 100644
--- a/lib/Target/CellSPU/README.txt
+++ b/lib/Target/CellSPU/README.txt
@@ -55,7 +55,7 @@ TODO:
 * i128 support:
 
   * zero extension, any extension: done
-  * sign extension: needed
+  * sign extension: done
   * arithmetic operators (add, sub, mul, div): needed
   * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
 
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 1f215113b405..72f84300b2c3 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -23,6 +23,7 @@ namespace llvm {
   class formatted_raw_ostream;
 
   FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+  FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
 
   extern Target TheCellSPUTarget;
 }
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 069a182c26df..5ef5716bd8cf 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -54,8 +54,8 @@ class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
 // The i64 seteq fragment that does the scalar->vector conversion and
 // comparison:
 def CEQr64compare:
-    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
-                                           (ORv2i64_i64 R64C:$rB))), 0xb)>;
+    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                           (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
 
 // The i64 seteq fragment that does the vector comparison
 def CEQv2i64compare:
@@ -67,12 +67,14 @@ def CEQv2i64compare:
 // v2i64 seteq (equality): the setcc result is v4i32
 multiclass CompareEqual64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
-  def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
 }
 
 defm I64EQ: CompareEqual64;
@@ -89,10 +91,12 @@ def : I64SELECTNegCond<setne, I64EQr64>;
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 def CLGTr64ugt:
-    CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
 
 def CLGTr64eq:
-    CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
     
 def CLGTr64compare:
     CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
@@ -112,12 +116,14 @@ def CLGTv2i64compare:
 
 multiclass CompareLogicalGreaterThan64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
 }
 
 defm I64LGT: CompareLogicalGreaterThan64;
@@ -144,12 +150,14 @@ def CLGEv2i64compare:
 
 multiclass CompareLogicalGreaterEqual64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
 }
 
 defm I64LGE: CompareLogicalGreaterEqual64;
@@ -168,10 +176,12 @@ def : I64SELECTNegCond<setult, I64LGEr64>;
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 def CGTr64sgt:
-    CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
 
 def CGTr64eq:
-    CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
     
 def CGTr64compare:
     CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
@@ -191,12 +201,14 @@ def CGTv2i64compare:
 
 multiclass CompareGreaterThan64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                             (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
 }
 
 defm I64GT: CompareLogicalGreaterThan64;
@@ -223,12 +235,12 @@ def CGEv2i64compare:
 
 multiclass CompareGreaterEqual64 {
   // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
   def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
 
   // SELB mask from FSM:
-  def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
-  def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
 }
 
 defm I64GE: CompareGreaterEqual64;
@@ -255,9 +267,9 @@ class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
     v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
 
 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-           (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
-                                  (ORv2i64_i64 R64C:$rB),
-                                  (v4i32 VECREG:$rCGmask)>.Fragment)>;
+           (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
 
 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)),
@@ -275,11 +287,12 @@ class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
     CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
 
 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-           (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
-                                  (ORv2i64_i64 R64C:$rB),
-                                  v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
-                                               (ORv2i64_i64 R64C:$rB)>.Fragment,
-                                  (v4i32 VECREG:$rCGmask)>.Fragment)>;
+           (COPY_TO_REGCLASS 
+               v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                         (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                         v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                      (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
 
 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)),
@@ -374,9 +387,9 @@ class v2i64_mul<dag rA, dag rB, dag rCGmask>:
               rCGmask>;
 
 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-          (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
-                                 (ORv2i64_i64 R64C:$rB),
-                                 (v4i32 VECREG:$rCGmask)>.Fragment)>;
+          (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                 (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
 
 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)),
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
new file mode 100644
index 000000000000..40404614b703
--- /dev/null
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -0,0 +1,327 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SPUAsmPrinter : public AsmPrinter {
+  public:
+    explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
+      AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "STI CBEA SPU Assembly Printer";
+    }
+
+    /// printInstruction - This method is automatically generated by tablegen
+    /// from the instruction set description.
+    void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+    static const char *getRegisterName(unsigned RegNo);
+
+
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    void printOp(const MachineOperand &MO, raw_ostream &OS);
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      if (MO.isReg()) {
+        O << getRegisterName(MO.getReg());
+      } else if (MO.isImm()) {
+        O << MO.getImm();
+      } else {
+        printOp(MO, O);
+      }
+    }
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+
+    void
+    printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      unsigned int value = MI->getOperand(OpNo).getImm();
+      assert(value < (1 << 8) && "Invalid u7 argument");
+      O << value;
+    }
+
+    void
+    printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      char value = MI->getOperand(OpNo).getImm();
+      O << (int) value;
+      O << "(";
+      printOperand(MI, OpNo+1, O);
+      O << ")";
+    }
+
+    void
+    printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      O << (short) MI->getOperand(OpNo).getImm();
+    }
+
+    void
+    printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      O << (unsigned short)MI->getOperand(OpNo).getImm();
+    }
+
+    void
+    printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      // When used as the base register, r0 reads constant zero rather than
+      // the value contained in the register.  For this reason, the darwin
+      // assembler requires that we print r0 as 0 (no r) when used as the base.
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      O << getRegisterName(MO.getReg()) << ", ";
+      printOperand(MI, OpNo+1, O);
+    }
+
+    void
+    printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      unsigned int value = MI->getOperand(OpNo).getImm();
+      assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+      O << value;
+    }
+
+    void
+    printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+                             >> 16);
+      assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+             && "Invalid s10 argument");
+      O << value;
+    }
+
+    void
+    printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+                             >> 16);
+      assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+      O << value;
+    }
+
+    void
+    printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      assert(MI->getOperand(OpNo).isImm() &&
+             "printDFormAddr first operand is not immediate");
+      int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+      int16_t value16 = int16_t(value);
+      assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+             && "Invalid dform s10 offset argument");
+      O << (value16 & ~0xf) << "(";
+      printOperand(MI, OpNo+1, O);
+      O << ")";
+    }
+
+    void
+    printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      /* Note: operand 1 is an offset or symbol name. */
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        if (MI->getOperand(OpNo+1).isImm()) {
+          int displ = int(MI->getOperand(OpNo+1).getImm());
+          if (displ > 0)
+            O << "+" << displ;
+          else if (displ < 0)
+            O << displ;
+        }
+      }
+    }
+
+    void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      // Used to generate a ".-<target>", but it turns out that the assembler
+      // really wants the target.
+      //
+      // N.B.: This operand is used for call targets. Branch hints are another
+      // animal entirely.
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        O << "@h";
+      }
+    }
+
+    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        O << "@l";
+      }
+    }
+
+    /// Print local store address
+    void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
+                          raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        int value = (int) MI->getOperand(OpNo).getImm();
+        assert((value >= 0 && value < 16)
+               && "Invalid negated immediate rotate 7-bit argument");
+        O << -value;
+      } else {
+        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+      }
+    }
+
+    void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
+      assert(MI->getOperand(OpNo).isImm() &&
+             "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+      int value = (int) MI->getOperand(OpNo).getImm();
+      assert((value >= 0 && value <= 32)
+             && "Invalid negated immediate rotate 7-bit argument");
+      O << -value;
+    }
+  };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    report_fatal_error("printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    // Computing the address of an external symbol, not calling it.
+    if (TM.getRelocationModel() != Reloc::Static) {
+      O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
+        << "$non_lazy_ptr";
+      return;
+    }
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    // External or weakly linked global variables need non-lazily-resolved
+    // stubs
+    if (TM.getRelocationModel() != Reloc::Static) {
+      const GlobalValue *GV = MO.getGlobal();
+      if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+            GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+        O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        return;
+      }
+    }
+    O << *Mang->getSymbol(MO.getGlobal());
+    return;
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  printMemRegReg(MI, OpNo, O);
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
+  RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
+}
diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp
deleted file mode 100644
index 60d7ba736ac6..000000000000
--- a/lib/Target/CellSPU/SPUFrameInfo.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUFrameInfo.h"
-#include "SPURegisterNames.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// SPUFrameInfo:
-//===----------------------------------------------------------------------===//
-
-SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm):
-  TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
-  TM(tm)
-{
-  LR[0].first = SPU::R0;
-  LR[0].second = 16;
-}
diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h
deleted file mode 100644
index f511acd64954..000000000000
--- a/lib/Target/CellSPU/SPUFrameInfo.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains CellSPU frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#if !defined(SPUFRAMEINFO_H)
-
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "SPURegisterInfo.h"
-
-namespace llvm {
-  class SPUFrameInfo: public TargetFrameInfo {
-    const TargetMachine &TM;
-    std::pair<unsigned, int> LR[1];
-
-  public:
-    SPUFrameInfo(const TargetMachine &tm);
-
-    //! Return a function's saved spill slots
-    /*!
-      For CellSPU, a function's saved spill slots is just the link register.
-     */
-    const std::pair<unsigned, int> *
-    getCalleeSaveSpillSlots(unsigned &NumEntries) const;
-
-    //! Stack slot size (16 bytes)
-    static int stackSlotSize() {
-      return 16;
-    }
-    //! Maximum frame offset representable by a signed 10-bit integer
-    /*!
-      This is the maximum frame offset that can be expressed as a 10-bit
-      integer, used in D-form addresses.
-     */
-    static int maxFrameOffset() {
-      return ((1 << 9) - 1) * stackSlotSize();
-    }
-    //! Minimum frame offset representable by a signed 10-bit integer
-    static int minFrameOffset() {
-      return -(1 << 9) * stackSlotSize();
-    }
-    //! Minimum frame size (enough to spill LR + SP)
-    static int minStackSize() {
-      return (2 * stackSlotSize());
-    }
-    //! Convert frame index to stack offset
-    static int FItoStackOffset(int frame_index) {
-      return frame_index * stackSlotSize();
-    }
-    //! Number of instructions required to overcome hint-for-branch latency
-    /*!
-      HBR (hint-for-branch) instructions can be inserted when, for example,
-      we know that a given function is going to be called, such as printf(),
-      in the control flow graph. HBRs are only inserted if a sufficient number
-      of instructions occurs between the HBR and the target. Currently, HBRs
-      take 6 cycles, ergo, the magic number 6.
-     */
-    static int branchHintPenalty() {
-      return 6;
-    }
-  };
-}
-
-#define SPUFRAMEINFO_H 1
-#endif
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 000000000000..432f4a1b59e2
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,276 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+    Subtarget(sti) {
+  LR[0].first = SPU::R0;
+  LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.  This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  return MFI->getStackSize() &&
+    (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  unsigned TargetAlign = getStackAlignment();
+  unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+  assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+  unsigned AlignMask = Align - 1;
+
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SPUInstrInfo &TII =
+    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineModuleInfo &MMI = MF.getMMI();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Prepare for debug frame info.
+  bool hasDebugInfo = MMI.hasDebugInfo();
+  MCSymbol *FrameLabel = 0;
+
+  // Move MBBI back to the beginning of the function.
+  MBBI = MBB.begin();
+
+  // Work out frame sizes.
+  determineFrameLayout(MF);
+  int FrameSize = MFI->getStackSize();
+
+  assert((FrameSize & 0xf) == 0
+         && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+  // the "empty" frame size is 16 - just the register scavenger spill slot
+  if (FrameSize > 16 || MFI->adjustsStack()) {
+    FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+    if (hasDebugInfo) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+    }
+
+    // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+    // for the ABI
+    BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+      .addReg(SPU::R1);
+    if (isInt<10>(FrameSize)) {
+      // Spill $sp to adjusted $sp
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+        .addReg(SPU::R1);
+      // Adjust $sp by required amout
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+        .addImm(FrameSize);
+    } else if (isInt<16>(FrameSize)) {
+      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+      // $r2 to adjust $sp:
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+        .addImm(-16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+        .addImm(FrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+        .addReg(SPU::R1)
+        .addReg(SPU::R2);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+        .addReg(SPU::R2)
+        .addImm(16);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+    } else {
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+    }
+
+    if (hasDebugInfo) {
+      std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+      // Show update of SP.
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+      // Add callee saved registers to move list.
+      const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+      for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+        int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+        unsigned Reg = CSI[I].getReg();
+        if (Reg == SPU::R0) continue;
+        MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+        MachineLocation CSSrc(Reg);
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+      }
+
+      // Mark effective beginning of when frame pointer is ready.
+      MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+      MachineLocation FPDst(SPU::R1);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+    }
+  } else {
+    // This is a leaf function -- insert a branch hint iff there are
+    // sufficient number instructions in the basic block. Note that
+    // this is just a best guess based on the basic block's size.
+    if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
+      MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+      dl = MBBI->getDebugLoc();
+
+      // Insert terminator label
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
+        .addSym(MMI.getContext().CreateTempSymbol());
+    }
+  }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SPUInstrInfo &TII =
+    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int FrameSize = MFI->getStackSize();
+  int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  assert(MBBI->getOpcode() == SPU::RET &&
+         "Can only insert epilog into returning blocks");
+  assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+  // the "empty" frame size is 16 - just the register scavenger spill slot
+  if (FrameSize > 16 || MFI->adjustsStack()) {
+    FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+    if (isInt<10>(FrameSize + LinkSlotOffset)) {
+      // Reload $lr, adjust $sp by required amount
+      // Note: We do this to slightly improve dual issue -- not by much, but it
+      // is an opportunity for dual issue.
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+        .addImm(FrameSize + LinkSlotOffset)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+        .addReg(SPU::R1)
+        .addImm(FrameSize);
+    } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+      // $r2 to adjust $sp:
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+        .addImm(16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+        .addImm(FrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+        .addReg(SPU::R1)
+        .addReg(SPU::R2);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+        .addImm(16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+        addReg(SPU::R2)
+        .addImm(16);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+    } else {
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+    }
+  }
+}
+
+void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                         const {
+  // Initial state of the frame pointer is R1.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(SPU::R1, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                        RegScavenger *RS) const{
+  // Mark LR and SP unused, since the prolog spills them to stack and
+  // we don't want anyone else to spill them for us.
+  //
+  // Also, unless R2 is really used someday, don't spill it automatically.
+  MF.getRegInfo().setPhysRegUnused(SPU::R0);
+  MF.getRegInfo().setPhysRegUnused(SPU::R1);
+  MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterClass *RC = &SPU::R32CRegClass;
+  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                     RC->getAlignment(),
+                                                     false));
+}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
new file mode 100644
index 000000000000..4fee72d946a2
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameLowering.h
@@ -0,0 +1,94 @@
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
+
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SPUSubtarget;
+
+  class SPUFrameLowering: public TargetFrameLowering {
+    const SPUSubtarget &Subtarget;
+    std::pair<unsigned, int> LR[1];
+
+  public:
+    SPUFrameLowering(const SPUSubtarget &sti);
+
+    //! Determine the frame's layour
+    void determineFrameLayout(MachineFunction &MF) const;
+
+    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+    /// the function.
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    //! Prediate: Target has dedicated frame pointer
+    bool hasFP(const MachineFunction &MF) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS = NULL) const;
+
+    //! Perform target-specific stack frame setup.
+    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+    //! Return a function's saved spill slots
+    /*!
+      For CellSPU, a function's saved spill slots is just the link register.
+     */
+    const std::pair<unsigned, int> *
+    getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+    //! Stack slot size (16 bytes)
+    static int stackSlotSize() {
+      return 16;
+    }
+    //! Maximum frame offset representable by a signed 10-bit integer
+    /*!
+      This is the maximum frame offset that can be expressed as a 10-bit
+      integer, used in D-form addresses.
+     */
+    static int maxFrameOffset() {
+      return ((1 << 9) - 1) * stackSlotSize();
+    }
+    //! Minimum frame offset representable by a signed 10-bit integer
+    static int minFrameOffset() {
+      return -(1 << 9) * stackSlotSize();
+    }
+    //! Minimum frame size (enough to spill LR + SP)
+    static int minStackSize() {
+      return (2 * stackSlotSize());
+    }
+    //! Convert frame index to stack offset
+    static int FItoStackOffset(int frame_index) {
+      return frame_index * stackSlotSize();
+    }
+    //! Number of instructions required to overcome hint-for-branch latency
+    /*!
+      HBR (hint-for-branch) instructions can be inserted when, for example,
+      we know that a given function is going to be called, such as printf(),
+      in the control flow graph. HBRs are only inserted if a sufficient number
+      of instructions occurs between the HBR and the target. Currently, HBRs
+      take 6 cycles, ergo, the magic number 6.
+     */
+    static int branchHintPenalty() {
+      return 6;
+    }
+  };
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
index 9dbab1da9902..403d7ef1fd9e 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -41,12 +41,14 @@ SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
 ///
 /// \return NoHazard
 ScheduleHazardRecognizer::HazardType
-SPUHazardRecognizer::getHazardType(SUnit *SU)
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
 {
   // Initial thoughts on how to do this, but this code cannot work unless the
   // function's prolog and epilog code are also being scheduled so that we can
   // accurately determine which pipeline is being scheduled.
 #if 0
+  assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
   const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
   ScheduleHazardRecognizer::HazardType retval = NoHazard;
   bool mustBeOdd = false;
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
index d0ae2d8e71c8..675632cc7f13 100644
--- a/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -20,7 +20,7 @@
 namespace llvm {
 
 class TargetInstrInfo;
-  
+
 /// SPUHazardRecognizer
 class SPUHazardRecognizer : public ScheduleHazardRecognizer
 {
@@ -30,7 +30,7 @@ private:
 
 public:
   SPUHazardRecognizer(const TargetInstrInfo &TII);
-  virtual HazardType getHazardType(SUnit *SU);
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void EmitInstruction(SUnit *SU);
   virtual void AdvanceCycle();
   virtual void EmitNoop();
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 2f1598441f5a..d2261562e721 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -15,7 +15,7 @@
 #include "SPU.h"
 #include "SPUTargetMachine.h"
 #include "SPUHazardRecognizers.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
 #include "SPURegisterNames.h"
 #include "SPUTargetMachine.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -111,55 +111,6 @@ namespace {
     return false;
   }
 
-  //===------------------------------------------------------------------===//
-  //! EVT to "useful stuff" mapping structure:
-
-  struct valtype_map_s {
-    EVT VT;
-    unsigned ldresult_ins;      /// LDRESULT instruction (0 = undefined)
-    bool ldresult_imm;          /// LDRESULT instruction requires immediate?
-    unsigned lrinst;            /// LR instruction
-  };
-
-  const valtype_map_s valtype_map[] = {
-    { MVT::i8,    SPU::ORBIr8,  true,  SPU::LRr8 },
-    { MVT::i16,   SPU::ORHIr16, true,  SPU::LRr16 },
-    { MVT::i32,   SPU::ORIr32,  true,  SPU::LRr32 },
-    { MVT::i64,   SPU::ORr64,   false, SPU::LRr64 },
-    { MVT::f32,   SPU::ORf32,   false, SPU::LRf32 },
-    { MVT::f64,   SPU::ORf64,   false, SPU::LRf64 },
-    // vector types... (sigh!)
-    { MVT::v16i8, 0,            false, SPU::LRv16i8 },
-    { MVT::v8i16, 0,            false, SPU::LRv8i16 },
-    { MVT::v4i32, 0,            false, SPU::LRv4i32 },
-    { MVT::v2i64, 0,            false, SPU::LRv2i64 },
-    { MVT::v4f32, 0,            false, SPU::LRv4f32 },
-    { MVT::v2f64, 0,            false, SPU::LRv2f64 }
-  };
-
-  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
-  const valtype_map_s *getValueTypeMapEntry(EVT VT)
-  {
-    const valtype_map_s *retval = 0;
-    for (size_t i = 0; i < n_valtype_map; ++i) {
-      if (valtype_map[i].VT == VT) {
-        retval = valtype_map + i;
-        break;
-      }
-    }
-
-
-#ifndef NDEBUG
-    if (retval == 0) {
-      report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns"
-                         "NULL for " + Twine(VT.getEVTString()));
-    }
-#endif
-
-    return retval;
-  }
-
   //! Generate the carry-generate shuffle mask.
   SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
     SmallVector<SDValue, 16 > ShufBytes;
@@ -221,16 +172,10 @@ namespace {
       return CurDAG->getTargetConstant(Imm, MVT::i32);
     }
 
-    /// getI64Imm - Return a target constant with the specified value, of type
-    /// i64.
-    inline SDValue getI64Imm(uint64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i64);
-    }
-
     /// getSmallIPtrImm - Return a target constant of pointer type.
     inline SDValue getSmallIPtrImm(unsigned Imm) {
       return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
-      }
+    }
 
     SDNode *emitBuildVector(SDNode *bvNode) {
       EVT vecVT = bvNode->getValueType(0);
@@ -268,10 +213,10 @@ namespace {
       unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
       SDValue CGPoolOffset =
               SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
-      
+
       HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
                                          CurDAG->getEntryNode(), CGPoolOffset,
-                                         PseudoSourceValue::getConstantPool(),0,
+                                         MachinePointerInfo::getConstantPool(),
                                          false, false, Alignment));
       CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
       if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
@@ -356,13 +301,8 @@ namespace {
       return "Cell SPU DAG->DAG Pattern Instruction Selection";
     }
 
-    /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
-    /// this target when scheduling the DAG.
-    virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
-      const TargetInstrInfo *II = TM.getInstrInfo();
-      assert(II && "No InstrInfo?");
-      return new SPUHazardRecognizer(*II);
-    }
+  private:
+    SDValue getRC( MVT );
 
     // Include the pieces autogenerated from the target description.
 #include "SPUGenDAGISel.inc"
@@ -450,8 +390,8 @@ bool
 SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
                                  SDValue &Index) {
   return DFormAddressPredicate(Op, N, Base, Index,
-                               SPUFrameInfo::minFrameOffset(),
-                               SPUFrameInfo::maxFrameOffset());
+                               SPUFrameLowering::minFrameOffset(),
+                               SPUFrameLowering::maxFrameOffset());
 }
 
 bool
@@ -467,7 +407,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
     int FI = int(FIN->getIndex());
     DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
                << FI << "\n");
-    if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+    if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
       Base = CurDAG->getTargetConstant(0, PtrTy);
       Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
       return true;
@@ -493,7 +433,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
         DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
 
-        if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
           Base = CurDAG->getTargetConstant(offset, PtrTy);
           Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
           return true;
@@ -514,7 +454,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
         DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
                    << " frame index = " << FI << "\n");
 
-        if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
           Base = CurDAG->getTargetConstant(offset, PtrTy);
           Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
           return true;
@@ -564,8 +504,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
     Base = CurDAG->getTargetConstant(0, N.getValueType());
     Index = N;
     return true;
-  } else if (Opc == ISD::Register 
-           ||Opc == ISD::CopyFromReg 
+  } else if (Opc == ISD::Register
+           ||Opc == ISD::CopyFromReg
            ||Opc == ISD::UNDEF
            ||Opc == ISD::Constant) {
     unsigned OpOpc = Op->getOpcode();
@@ -625,6 +565,46 @@ SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
   return false;
 }
 
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+  switch( VT.SimpleTy ) {
+  case MVT::i8:
+    return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i16:
+    return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i32:
+    return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::f32:
+    return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i64:
+    return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i128:
+    return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+    break;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+  case MVT::v2i64:
+  case MVT::v2f64:
+    return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+    break;
+  default:
+    assert( false && "add a new case here" );
+  }
+  return SDValue();
+}
+
 //! Convert the operand from a target-independent to a target-specific node
 /*!
  */
@@ -632,7 +612,7 @@ SDNode *
 SPUDAGToDAGISel::Select(SDNode *N) {
   unsigned Opc = N->getOpcode();
   int n_ops = -1;
-  unsigned NewOpc;
+  unsigned NewOpc = 0;
   EVT OpVT = N->getValueType(0);
   SDValue Ops[8];
   DebugLoc dl = N->getDebugLoc();
@@ -654,7 +634,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
       NewOpc = SPU::Ar32;
       Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
       Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
-                                              N->getValueType(0), TFI, Imm0),
+                                              N->getValueType(0), TFI),
                        0);
       n_ops = 2;
     }
@@ -669,7 +649,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     EVT Op0VT = Op0.getValueType();
     EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                     Op0VT, (128 / Op0VT.getSizeInBits()));
-    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), 
+    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                    OpVT, (128 / OpVT.getSizeInBits()));
     SDValue shufMask;
 
@@ -703,19 +683,19 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     }
 
     SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-    
+
     HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
                                                Op0VecVT, Op0));
-    
+
     SDValue PromScalar;
     if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
       PromScalar = SDValue(N, 0);
     else
       PromScalar = PromoteScalar.getValue();
-    
+
     SDValue zextShuffle =
             CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
-                            PromScalar, PromScalar, 
+                            PromScalar, PromScalar,
                             SDValue(shufMaskLoad, 0));
 
     HandleSDNode Dummy2(zextShuffle);
@@ -725,7 +705,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
       zextShuffle = Dummy2.getValue();
     HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
                                        zextShuffle));
-    
+
     CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
     SelectCode(Dummy.getValue().getNode());
     return Dummy.getValue().getNode();
@@ -736,7 +716,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
                                        N->getOperand(0), N->getOperand(1),
                                        SDValue(CGLoad, 0)));
-    
+
     CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
     if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
       return N;
@@ -748,7 +728,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
                                        N->getOperand(0), N->getOperand(1),
                                        SDValue(CGLoad, 0)));
-    
+
     CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
     if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
       return N;
@@ -779,8 +759,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
 
         if (shift_amt >= 32) {
           SDNode *hi32 =
-                  CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT,
-                                         Op0.getOperand(0));
+                  CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                         Op0.getOperand(0), getRC(MVT::i32));
 
           shift_amt -= 32;
           if (shift_amt > 0) {
@@ -862,23 +842,12 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     SDValue Arg = N->getOperand(0);
     SDValue Chain = N->getOperand(1);
     SDNode *Result;
-    const valtype_map_s *vtm = getValueTypeMapEntry(VT);
-
-    if (vtm->ldresult_ins == 0) {
-      report_fatal_error("LDRESULT for unsupported type: " +
-                         Twine(VT.getEVTString()));
-    }
-
-    Opc = vtm->ldresult_ins;
-    if (vtm->ldresult_imm) {
-      SDValue Zero = CurDAG->getTargetConstant(0, VT);
-
-      Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
-    } else {
-      Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
-    }
 
+    Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+                                    MVT::Other, Arg,
+                                    getRC( VT.getSimpleVT()), Chain);
     return Result;
+
   } else if (Opc == SPUISD::IndirectAddr) {
     // Look at the operands: SelectCode() will catch the cases that aren't
     // specifically handled here.
@@ -904,10 +873,10 @@ SPUDAGToDAGISel::Select(SDNode *N) {
           NewOpc = SPU::AIr32;
           Ops[1] = Op1;
         } else {
-          Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, 
-                                                  N->getValueType(0), 
+          Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+                                                  N->getValueType(0),
                                                   Op1),
-                           0); 
+                           0);
         }
       }
       Ops[0] = Op0;
@@ -939,7 +908,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
 SDNode *
 SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
   SDValue Op0 = N->getOperand(0);
-  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), 
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                OpVT, (128 / OpVT.getSizeInBits()));
   SDValue ShiftAmt = N->getOperand(1);
   EVT ShiftAmtVT = ShiftAmt.getValueType();
@@ -947,7 +916,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
   SDValue SelMaskVal;
   DebugLoc dl = N->getDebugLoc();
 
-  VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
   SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
   SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
   ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
@@ -991,7 +961,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
                              SDValue(Shift, 0), SDValue(Bits, 0));
   }
 
-  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
 }
 
 /*!
@@ -1012,7 +983,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
   SDNode *VecOp0, *Shift = 0;
   DebugLoc dl = N->getDebugLoc();
 
-  VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
 
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
     unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
@@ -1058,7 +1030,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
                              SDValue(Shift, 0), SDValue(Bits, 0));
   }
 
-  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
 }
 
 /*!
@@ -1072,21 +1045,23 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
 SDNode *
 SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
   // Promote Op0 to vector
-  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), 
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
                                OpVT, (128 / OpVT.getSizeInBits()));
   SDValue ShiftAmt = N->getOperand(1);
   EVT ShiftAmtVT = ShiftAmt.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   SDNode *VecOp0 =
-    CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0));
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                           VecVT, N->getOperand(0), getRC(MVT::v2i64));
 
   SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
   SDNode *SignRot =
     CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
                            SDValue(VecOp0, 0), SignRotAmt);
   SDNode *UpperHalfSign =
-    CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                           MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
 
   SDNode *UpperHalfSignMask =
     CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
@@ -1133,7 +1108,8 @@ SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
                              SDValue(Shift, 0), SDValue(NegShift, 0));
   }
 
-  return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
 }
 
 /*!
@@ -1154,20 +1130,21 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
   // Here's where it gets interesting, because we have to parse out the
   // subtree handed back in i64vec:
 
-  if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
+  if (i64vec.getOpcode() == ISD::BITCAST) {
     // The degenerate case where the upper and lower bits in the splat are
     // identical:
     SDValue Op0 = i64vec.getOperand(0);
 
     ReplaceUses(i64vec, Op0);
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(emitBuildVector(Op0.getNode()), 0));
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(Op0.getNode()), 0),
+                                  getRC(MVT::i64));
   } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
     SDValue lhs = i64vec.getOperand(0);
     SDValue rhs = i64vec.getOperand(1);
     SDValue shufmask = i64vec.getOperand(2);
 
-    if (lhs.getOpcode() == ISD::BIT_CONVERT) {
+    if (lhs.getOpcode() == ISD::BITCAST) {
       ReplaceUses(lhs, lhs.getOperand(0));
       lhs = lhs.getOperand(0);
     }
@@ -1176,7 +1153,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
                        ? lhs.getNode()
                        : emitBuildVector(lhs.getNode()));
 
-    if (rhs.getOpcode() == ISD::BIT_CONVERT) {
+    if (rhs.getOpcode() == ISD::BITCAST) {
       ReplaceUses(rhs, rhs.getOperand(0));
       rhs = rhs.getOperand(0);
     }
@@ -1185,7 +1162,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
                        ? rhs.getNode()
                        : emitBuildVector(rhs.getNode()));
 
-    if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
+    if (shufmask.getOpcode() == ISD::BITCAST) {
       ReplaceUses(shufmask, shufmask.getOperand(0));
       shufmask = shufmask.getOperand(0);
     }
@@ -1201,11 +1178,13 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
     HandleSDNode Dummy(shufNode);
     SDNode *SN = SelectCode(Dummy.getValue().getNode());
     if (SN == 0) SN = Dummy.getValue().getNode();
-    
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
+
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                  OpVT, SDValue(SN, 0), getRC(MVT::i64));
   } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(emitBuildVector(i64vec.getNode()), 0));
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(i64vec.getNode()), 0),
+                                  getRC(MVT::i64));
   } else {
     report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
                       "condition");
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 46f31899be0c..e6511d008c2b 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1,4 +1,3 @@
-//
 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,12 +13,13 @@
 #include "SPURegisterNames.h"
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
 #include "SPUMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
+#include "llvm/Type.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -41,41 +41,12 @@ using namespace llvm;
 namespace {
   std::map<unsigned, const char *> node_names;
 
-  //! EVT mapping to useful data for Cell SPU
-  struct valtype_map_s {
-    EVT   valtype;
-    int   prefslot_byte;
-  };
-
-  const valtype_map_s valtype_map[] = {
-    { MVT::i1,   3 },
-    { MVT::i8,   3 },
-    { MVT::i16,  2 },
-    { MVT::i32,  0 },
-    { MVT::f32,  0 },
-    { MVT::i64,  0 },
-    { MVT::f64,  0 },
-    { MVT::i128, 0 }
-  };
-
-  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
-  const valtype_map_s *getValueTypeMapEntry(EVT VT) {
-    const valtype_map_s *retval = 0;
-
-    for (size_t i = 0; i < n_valtype_map; ++i) {
-      if (valtype_map[i].valtype == VT) {
-        retval = valtype_map + i;
-        break;
-      }
-    }
-
-#ifndef NDEBUG
-    if (retval == 0) {
-      report_fatal_error("getValueTypeMapEntry returns NULL for " +
-                         Twine(VT.getEVTString()));
-    }
-#endif
+  // Byte offset of the preferred slot (counted from the MSB)
+  int prefslotOffset(EVT VT) {
+    int retval=0;
+    if (VT==MVT::i1) retval=3;
+    if (VT==MVT::i8) retval=3;
+    if (VT==MVT::i16) retval=2;
 
     return retval;
   }
@@ -125,8 +96,6 @@ namespace {
 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
     SPUTM(TM) {
-  // Fold away setcc operations if possible.
-  setPow2DivIsCheap();
 
   // Use _setjmp/_longjmp instead of setjmp/longjmp.
   setUseUnderscoreSetJmp(true);
@@ -376,10 +345,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 
-  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
-  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
-  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
-  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
+  setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+  setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+  setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+  setOperationAction(ISD::BITCAST, MVT::f64, Legal);
 
   // We cannot sextinreg(i1).  Expand to shifts.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -439,9 +408,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
     setOperationAction(ISD::AND,     VT, Legal);
     setOperationAction(ISD::OR,      VT, Legal);
     setOperationAction(ISD::XOR,     VT, Legal);
-    setOperationAction(ISD::LOAD,    VT, Legal);
+    setOperationAction(ISD::LOAD,    VT, Custom);
     setOperationAction(ISD::SELECT,  VT, Legal);
-    setOperationAction(ISD::STORE,   VT, Legal);
+    setOperationAction(ISD::STORE,   VT, Custom);
 
     // These operations need to be expanded:
     setOperationAction(ISD::SDIV,    VT, Expand);
@@ -502,8 +471,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
-    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
-    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
+    node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+    node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
@@ -531,10 +500,20 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 //===----------------------------------------------------------------------===//
 
 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
-  // i16 and i32 are valid SETCC result types
-  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
-    VT.getSimpleVT().SimpleTy :
-    MVT::i32);
+  // i8, i16 and i32 are valid SETCC result types
+  MVT::SimpleValueType retval;
+
+  switch(VT.getSimpleVT().SimpleTy){
+    case MVT::i1:
+    case MVT::i8:
+      retval = MVT::i8; break;
+    case MVT::i16:
+      retval = MVT::i16; break;
+    case MVT::i32:
+    default:
+      retval = MVT::i32;
+  }
+  return retval;
 }
 
 //===----------------------------------------------------------------------===//
@@ -572,113 +551,174 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   EVT OutVT = Op.getValueType();
   ISD::LoadExtType ExtType = LN->getExtensionType();
   unsigned alignment = LN->getAlignment();
-  const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+  int pso = prefslotOffset(InVT);
   DebugLoc dl = Op.getDebugLoc();
-
-  switch (LN->getAddressingMode()) {
-  case ISD::UNINDEXED: {
-    SDValue result;
-    SDValue basePtr = LN->getBasePtr();
-    SDValue rotate;
-
-    if (alignment == 16) {
-      ConstantSDNode *CN;
-
-      // Special cases for a known aligned load to simplify the base pointer
-      // and the rotation amount:
-      if (basePtr.getOpcode() == ISD::ADD
-          && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
-        // Known offset into basePtr
-        int64_t offset = CN->getSExtValue();
-        int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
-
-        if (rotamt < 0)
-          rotamt += 16;
-
-        rotate = DAG.getConstant(rotamt, MVT::i16);
-
-        // Simplify the base pointer for this case:
-        basePtr = basePtr.getOperand(0);
-        if ((offset & ~0xf) > 0) {
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                basePtr,
-                                DAG.getConstant((offset & ~0xf), PtrVT));
-        }
-      } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
-                 || (basePtr.getOpcode() == SPUISD::IndirectAddr
-                     && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
-                     && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
-        // Plain aligned a-form address: rotate into preferred slot
-        // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
-        int64_t rotamt = -vtm->prefslot_byte;
-        if (rotamt < 0)
-          rotamt += 16;
-        rotate = DAG.getConstant(rotamt, MVT::i16);
-      } else {
-        // Offset the rotate amount by the basePtr and the preferred slot
-        // byte offset
-        int64_t rotamt = -vtm->prefslot_byte;
-        if (rotamt < 0)
-          rotamt += 16;
-        rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
-                             basePtr,
-                             DAG.getConstant(rotamt, PtrVT));
-      }
-    } else {
-      // Unaligned load: must be more pessimistic about addressing modes:
-      if (basePtr.getOpcode() == ISD::ADD) {
-        MachineFunction &MF = DAG.getMachineFunction();
-        MachineRegisterInfo &RegInfo = MF.getRegInfo();
-        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-        SDValue Flag;
-
-        SDValue Op0 = basePtr.getOperand(0);
-        SDValue Op1 = basePtr.getOperand(1);
-
-        if (isa<ConstantSDNode>(Op1)) {
-          // Convert the (add <ptr>, <const>) to an indirect address contained
-          // in a register. Note that this is done because we need to avoid
-          // creating a 0(reg) d-form address due to the SPU's block loads.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
-          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
-        } else {
-          // Convert the (add <arg1>, <arg2>) to an indirect address, which
-          // will likely be lowered as a reg(reg) x-form address.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-        }
-      } else {
+  EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+                                                  (128 / InVT.getSizeInBits()));
+
+  // two sanity checks
+  assert( LN->getAddressingMode() == ISD::UNINDEXED
+          && "we should get only UNINDEXED adresses");
+  // clean aligned loads can be selected as-is
+  if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+    return SDValue();
+
+  // Get pointerinfos to the memory chunk(s) that contain the data to load
+  uint64_t mpi_offset = LN->getPointerInfo().Offset;
+  mpi_offset -= mpi_offset%16;
+  MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+  MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+  SDValue result;
+  SDValue basePtr = LN->getBasePtr();
+  SDValue rotate;
+
+  if ((alignment%16) == 0) {
+    ConstantSDNode *CN;
+
+    // Special cases for a known aligned load to simplify the base pointer
+    // and the rotation amount:
+    if (basePtr.getOpcode() == ISD::ADD
+        && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+      // Known offset into basePtr
+      int64_t offset = CN->getSExtValue();
+      int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+      if (rotamt < 0)
+        rotamt += 16;
+
+      rotate = DAG.getConstant(rotamt, MVT::i16);
+
+      // Simplify the base pointer for this case:
+      basePtr = basePtr.getOperand(0);
+      if ((offset & ~0xf) > 0) {
         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                               basePtr,
-                              DAG.getConstant(0, PtrVT));
+                              DAG.getConstant((offset & ~0xf), PtrVT));
       }
-
+    } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+               || (basePtr.getOpcode() == SPUISD::IndirectAddr
+                   && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+                   && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+      // Plain aligned a-form address: rotate into preferred slot
+      // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+      int64_t rotamt = -pso;
+      if (rotamt < 0)
+        rotamt += 16;
+      rotate = DAG.getConstant(rotamt, MVT::i16);
+    } else {
       // Offset the rotate amount by the basePtr and the preferred slot
       // byte offset
+      int64_t rotamt = -pso;
+      if (rotamt < 0)
+        rotamt += 16;
       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
                            basePtr,
-                           DAG.getConstant(-vtm->prefslot_byte, PtrVT));
+                           DAG.getConstant(rotamt, PtrVT));
     }
+  } else {
+    // Unaligned load: must be more pessimistic about addressing modes:
+    if (basePtr.getOpcode() == ISD::ADD) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+      SDValue Flag;
+
+      SDValue Op0 = basePtr.getOperand(0);
+      SDValue Op1 = basePtr.getOperand(1);
+
+      if (isa<ConstantSDNode>(Op1)) {
+        // Convert the (add <ptr>, <const>) to an indirect address contained
+        // in a register. Note that this is done because we need to avoid
+        // creating a 0(reg) d-form address due to the SPU's block loads.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+      } else {
+        // Convert the (add <arg1>, <arg2>) to an indirect address, which
+        // will likely be lowered as a reg(reg) x-form address.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+      }
+    } else {
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                            basePtr,
+                            DAG.getConstant(0, PtrVT));
+   }
+
+    // Offset the rotate amount by the basePtr and the preferred slot
+    // byte offset
+    rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+                         basePtr,
+                         DAG.getConstant(-pso, PtrVT));
+  }
 
-    // Re-emit as a v16i8 vector load
-    result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
-                         LN->getSrcValue(), LN->getSrcValueOffset(),
-                         LN->isVolatile(), LN->isNonTemporal(), 16);
+  // Do the load as a i128 to allow possible shifting
+  SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+                       lowMemPtr,
+                       LN->isVolatile(), LN->isNonTemporal(), 16);
 
+  // When the size is not greater than alignment we get all data with just
+  // one load
+  if (alignment >= InVT.getSizeInBits()/8) {
     // Update the chain
-    the_chain = result.getValue(1);
+    the_chain = low.getValue(1);
 
     // Rotate into the preferred slot:
-    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
-                         result.getValue(0), rotate);
+    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+                         low.getValue(0), rotate);
 
     // Convert the loaded v16i8 vector to the appropriate vector type
     // specified by the operand:
-    EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
                                  InVT, (128 / InVT.getSizeInBits()));
     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
-                         DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+                         DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+  }
+  // When alignment is less than the size, we might need (known only at
+  // run-time) two loads
+  // TODO: if the memory address is composed only from constants, we have
+  // extra kowledge, and might avoid the second load
+  else {
+    // storage position offset from lower 16 byte aligned memory chunk
+    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+                                  basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+    // get a registerfull of ones. (this implementation is a workaround: LLVM
+    // cannot handle 128 bit signed int constants)
+    SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+    SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+                               DAG.getNode(ISD::ADD, dl, PtrVT,
+                                           basePtr,
+                                           DAG.getConstant(16, PtrVT)),
+                               highMemPtr,
+                               LN->isVolatile(), LN->isNonTemporal(), 16);
+
+    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+                                                              high.getValue(1));
+
+    // Shift the (possible) high part right to compensate the misalignemnt.
+    // if there is no highpart (i.e. value is i64 and offset is 4), this
+    // will zero out the high value.
+    high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+                                     DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                                 DAG.getConstant( 16, MVT::i32),
+                                                 offset
+                                                ));
+
+    // Shift the low similarily
+    // TODO: add SPUISD::SHL_BYTES
+    low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+    // Merge the two parts
+    result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+                          DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+    if (!InVT.isVector()) {
+      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+     }
 
+  }
     // Handle extending loads by extending the scalar result:
     if (ExtType == ISD::SEXTLOAD) {
       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
@@ -702,21 +742,6 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
                          retops, sizeof(retops) / sizeof(retops[0]));
     return result;
-  }
-  case ISD::PRE_INC:
-  case ISD::PRE_DEC:
-  case ISD::POST_INC:
-  case ISD::POST_DEC:
-  case ISD::LAST_INDEXED_MODE:
-    {
-      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
-                         "than UNINDEXED\n" +
-                         Twine((unsigned)LN->getAddressingMode()));
-      /*NOTREACHED*/
-    }
-  }
-
-  return SDValue();
 }
 
 /// Custom lower stores for CellSPU
@@ -734,93 +759,103 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   unsigned alignment = SN->getAlignment();
+  SDValue result;
+  EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+                                                 (128 / StVT.getSizeInBits()));
+  // Get pointerinfos to the memory chunk(s) that contain the data to load
+  uint64_t mpi_offset = SN->getPointerInfo().Offset;
+  mpi_offset -= mpi_offset%16;
+  MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+  MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+  // two sanity checks
+  assert( SN->getAddressingMode() == ISD::UNINDEXED
+          && "we should get only UNINDEXED adresses");
+  // clean aligned loads can be selected as-is
+  if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+    return SDValue();
+
+  SDValue alignLoadVec;
+  SDValue basePtr = SN->getBasePtr();
+  SDValue the_chain = SN->getChain();
+  SDValue insertEltOffs;
+
+  if ((alignment%16) == 0) {
+    ConstantSDNode *CN;
+    // Special cases for a known aligned load to simplify the base pointer
+    // and insertion byte:
+    if (basePtr.getOpcode() == ISD::ADD
+        && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+      // Known offset into basePtr
+      int64_t offset = CN->getSExtValue();
+
+      // Simplify the base pointer for this case:
+      basePtr = basePtr.getOperand(0);
+      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant((offset & 0xf), PtrVT));
 
-  switch (SN->getAddressingMode()) {
-  case ISD::UNINDEXED: {
-    // The vector type we really want to load from the 16-byte chunk.
-    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
-                                 VT, (128 / VT.getSizeInBits()));
-
-    SDValue alignLoadVec;
-    SDValue basePtr = SN->getBasePtr();
-    SDValue the_chain = SN->getChain();
-    SDValue insertEltOffs;
-
-    if (alignment == 16) {
-      ConstantSDNode *CN;
-      // Special cases for a known aligned load to simplify the base pointer
-      // and insertion byte:
-      if (basePtr.getOpcode() == ISD::ADD
-          && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
-        // Known offset into basePtr
-        int64_t offset = CN->getSExtValue();
-
-        // Simplify the base pointer for this case:
-        basePtr = basePtr.getOperand(0);
-        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                    basePtr,
-                                    DAG.getConstant((offset & 0xf), PtrVT));
-
-        if ((offset & ~0xf) > 0) {
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                basePtr,
-                                DAG.getConstant((offset & ~0xf), PtrVT));
-        }
-      } else {
-        // Otherwise, assume it's at byte 0 of basePtr
-        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                    basePtr,
-                                    DAG.getConstant(0, PtrVT));
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                    basePtr,
-                                    DAG.getConstant(0, PtrVT));
-      }
-    } else {
-      // Unaligned load: must be more pessimistic about addressing modes:
-      if (basePtr.getOpcode() == ISD::ADD) {
-        MachineFunction &MF = DAG.getMachineFunction();
-        MachineRegisterInfo &RegInfo = MF.getRegInfo();
-        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-        SDValue Flag;
-
-        SDValue Op0 = basePtr.getOperand(0);
-        SDValue Op1 = basePtr.getOperand(1);
-
-        if (isa<ConstantSDNode>(Op1)) {
-          // Convert the (add <ptr>, <const>) to an indirect address contained
-          // in a register. Note that this is done because we need to avoid
-          // creating a 0(reg) d-form address due to the SPU's block loads.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
-          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
-        } else {
-          // Convert the (add <arg1>, <arg2>) to an indirect address, which
-          // will likely be lowered as a reg(reg) x-form address.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-        }
-      } else {
+      if ((offset & ~0xf) > 0) {
         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                               basePtr,
-                              DAG.getConstant(0, PtrVT));
+                              DAG.getConstant((offset & ~0xf), PtrVT));
       }
-
-      // Insertion point is solely determined by basePtr's contents
-      insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+    } else {
+      // Otherwise, assume it's at byte 0 of basePtr
+      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant(0, PtrVT));
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                   basePtr,
                                   DAG.getConstant(0, PtrVT));
     }
+  } else {
+    // Unaligned load: must be more pessimistic about addressing modes:
+    if (basePtr.getOpcode() == ISD::ADD) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+      SDValue Flag;
+
+      SDValue Op0 = basePtr.getOperand(0);
+      SDValue Op1 = basePtr.getOperand(1);
+
+      if (isa<ConstantSDNode>(Op1)) {
+        // Convert the (add <ptr>, <const>) to an indirect address contained
+        // in a register. Note that this is done because we need to avoid
+        // creating a 0(reg) d-form address due to the SPU's block loads.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+      } else {
+        // Convert the (add <arg1>, <arg2>) to an indirect address, which
+        // will likely be lowered as a reg(reg) x-form address.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+      }
+    } else {
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                            basePtr,
+                            DAG.getConstant(0, PtrVT));
+    }
 
-    // Load the memory to which to store.
-    alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
-                               SN->getSrcValue(), SN->getSrcValueOffset(),
-                               SN->isVolatile(), SN->isNonTemporal(), 16);
+    // Insertion point is solely determined by basePtr's contents
+    insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+                                basePtr,
+                                DAG.getConstant(0, PtrVT));
+  }
 
+  // Load the lower part of the memory to which to store.
+  SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+  // if we don't need to store over the 16 byte boundary, one store suffices
+  if (alignment >= StVT.getSizeInBits()/8) {
     // Update the chain
-    the_chain = alignLoadVec.getValue(1);
+    the_chain = low.getValue(1);
 
-    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+    LoadSDNode *LN = cast<LoadSDNode>(low);
     SDValue theValue = SN->getValue();
-    SDValue result;
 
     if (StVT != VT
         && (theValue.getOpcode() == ISD::AssertZext
@@ -844,48 +879,114 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
     SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
                                       insertEltOffs);
-    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, 
+    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
                                       theValue);
 
     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
-                         vectorizeOp, alignLoadVec,
-                         DAG.getNode(ISD::BIT_CONVERT, dl,
+                         vectorizeOp, low,
+                         DAG.getNode(ISD::BITCAST, dl,
                                      MVT::v4i32, insertEltOp));
 
     result = DAG.getStore(the_chain, dl, result, basePtr,
-                          LN->getSrcValue(), LN->getSrcValueOffset(),
+                          lowMemPtr,
                           LN->isVolatile(), LN->isNonTemporal(),
-                          LN->getAlignment());
-
-#if 0 && !defined(NDEBUG)
-    if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-      const SDValue &currentRoot = DAG.getRoot();
-
-      DAG.setRoot(result);
-      errs() << "------- CellSPU:LowerStore result:\n";
-      DAG.dump();
-      errs() << "-------\n";
-      DAG.setRoot(currentRoot);
-    }
-#endif
-
-    return result;
-    /*UNREACHED*/
-  }
-  case ISD::PRE_INC:
-  case ISD::PRE_DEC:
-  case ISD::POST_INC:
-  case ISD::POST_DEC:
-  case ISD::LAST_INDEXED_MODE:
-    {
-      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
-                         "than UNINDEXED\n" +
-                         Twine((unsigned)SN->getAddressingMode()));
-      /*NOTREACHED*/
-    }
+                          16);
+
+  }
+  // do the store when it might cross the 16 byte memory access boundary.
+  else {
+    // TODO issue a warning if SN->isVolatile()== true? This is likely not
+    // what the user wanted.
+
+    // address offset from nearest lower 16byte alinged address
+    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+                                    SN->getBasePtr(),
+                                    DAG.getConstant(0xf, MVT::i32));
+    // 16 - offset
+    SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                           DAG.getConstant( 16, MVT::i32),
+                                           offset);
+    // 16 - sizeof(Value)
+    SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                     DAG.getConstant( 16, MVT::i32),
+                                     DAG.getConstant( VT.getSizeInBits()/8,
+                                                      MVT::i32));
+    // get a registerfull of ones
+    SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+    // Create the 128 bit masks that have ones where the data to store is
+    // located.
+    SDValue lowmask, himask;
+    // if the value to store don't fill up the an entire 128 bits, zero
+    // out the last bits of the mask so that only the value we want to store
+    // is masked.
+    // this is e.g. in the case of store i32, align 2
+    if (!VT.isVector()){
+      Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+      lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+      lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+                                                               surplus);
+      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+      Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+    }
+    else {
+      lowmask = ones;
+      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+    }
+    // this will zero, if there are no data that goes to the high quad
+    himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+                                                            offset_compl);
+    lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+                                                             offset);
+
+    // Load in the old data and zero out the parts that will be overwritten with
+    // the new data to store.
+    SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+                               DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+                                           DAG.getConstant( 16, PtrVT)),
+                               highMemPtr,
+                               SN->isVolatile(), SN->isNonTemporal(), 16);
+    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+                                                              hi.getValue(1));
+
+    low = DAG.getNode(ISD::AND, dl, MVT::i128,
+                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+                        DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+    hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+                        DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+    // Shift the Value to store into place. rlow contains the parts that go to
+    // the lower memory chunk, rhi has the parts that go to the upper one.
+    SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+    rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+    SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+                                                            offset_compl);
+
+    // Merge the old data and the new data and store the results
+    // Need to convert vectors here to integer as 'OR'ing floats assert
+    rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+    rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+    low = DAG.getStore(the_chain, dl, rlow, basePtr,
+                          lowMemPtr,
+                          SN->isVolatile(), SN->isNonTemporal(), 16);
+    hi  = DAG.getStore(the_chain, dl, rhi,
+                            DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+                                        DAG.getConstant( 16, PtrVT)),
+                            highMemPtr,
+                            SN->isVolatile(), SN->isNonTemporal(), 16);
+    result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+                                                           hi.getValue(0));
   }
 
-  return SDValue();
+  return result;
 }
 
 //! Generate the address of a constant pool entry.
@@ -993,7 +1094,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
     SDValue T = DAG.getConstant(dbits, MVT::i64);
     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
+                       DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
   }
 
   return SDValue();
@@ -1013,9 +1114,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
 
-  unsigned ArgOffset = SPUFrameInfo::minStackSize();
+  unsigned ArgOffset = SPUFrameLowering::minStackSize();
   unsigned ArgRegIdx = 0;
-  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
@@ -1080,7 +1181,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       // or we're forced to do vararg
       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
       ArgOffset += StackSlotSize;
     }
 
@@ -1091,8 +1193,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // vararg handling:
   if (isVarArg) {
-    // FIXME: we should be able to query the argument registers from 
-    //        tablegen generated code. 
+    // FIXME: we should be able to query the argument registers from
+    //        tablegen generated code.
     static const unsigned ArgRegs[] = {
       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
@@ -1117,9 +1219,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       FuncInfo->setVarArgsFrameIndex(
         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
-      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
+      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl);
       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
                                    false, false, 0);
       Chain = Store.getOperand(0);
       MemOps.push_back(Store);
@@ -1163,14 +1265,14 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
   unsigned NumOps     = Outs.size();
-  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
 
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
-                 *DAG.getContext()); 
+                 *DAG.getContext());
   // FIXME: allow for other calling conventions
   CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-  
+
   const unsigned NumArgRegs = ArgLocs.size();
 
 
@@ -1184,7 +1286,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Figure out which arguments are going to go in registers, and which in
   // memory.
-  unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
+  unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
   unsigned ArgRegIdx = 0;
 
   // Keep track of registers passing arguments
@@ -1219,7 +1321,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
                                            false, false, 0));
         ArgOffset += StackSlotSize;
       }
@@ -1230,7 +1333,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // Accumulate how many bytes are to be pushed on the stack, including the
   // linkage area, and parameter passing area.  According to the SPU ABI,
   // we minimally need space for [LR] and [SP].
-  unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
+  unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
 
   // Insert a call sequence start
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
@@ -1311,7 +1414,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (InFlag.getNode())
     Ops.push_back(InFlag);
   // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
                       &Ops[0], Ops.size());
   InFlag = Chain.getValue(1);
 
@@ -1334,7 +1437,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // If the call has results, copy the values out of the ret val registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign VA = RVLocs[i];
-    
+
     SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
                                      InFlag);
     Chain = Val.getValue(1);
@@ -1567,7 +1670,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
     SDValue T = DAG.getConstant(Value32, MVT::i32);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
     break;
   }
@@ -1577,7 +1680,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
     SDValue T = DAG.getConstant(f64val, MVT::i64);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
     break;
   }
@@ -1587,7 +1690,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
    SmallVector<SDValue, 8> Ops;
 
    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
-   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+   return DAG.getNode(ISD::BITCAST, dl, VT,
                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
   }
   case MVT::v8i16: {
@@ -1621,7 +1724,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
   if (upper == lower) {
     // Magic constant that can be matched by IL, ILA, et. al.
     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+    return DAG.getNode(ISD::BITCAST, dl, OpVT,
                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                    Val, Val, Val, Val));
   } else {
@@ -1650,7 +1753,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
     // Create lower vector if not a special pattern
     if (!lower_special) {
       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
-      LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+      LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                      LO32C, LO32C, LO32C, LO32C));
     }
@@ -1658,7 +1761,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
     // Create upper vector if not a special pattern
     if (!upper_special) {
       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
-      HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+      HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                      HI32C, HI32C, HI32C, HI32C));
     }
@@ -1735,14 +1838,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   unsigned CurrElt = 0;
   unsigned MaxElts = VecVT.getVectorNumElements();
   unsigned PrevElt = 0;
-  unsigned V0Elt = 0;
   bool monotonic = true;
   bool rotate = true;
+  int rotamt=0;
   EVT maskVT;             // which of the c?d instructions to use
 
   if (EltVT == MVT::i8) {
     V2EltIdx0 = 16;
-    maskVT = MVT::v16i8; 
+    maskVT = MVT::v16i8;
   } else if (EltVT == MVT::i16) {
     V2EltIdx0 = 8;
     maskVT = MVT::v8i16;
@@ -1758,7 +1861,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   for (unsigned i = 0; i != MaxElts; ++i) {
     if (SVN->getMaskElt(i) < 0)
       continue;
-    
+
     unsigned SrcElt = SVN->getMaskElt(i);
 
     if (monotonic) {
@@ -1782,13 +1885,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
         if ((PrevElt == SrcElt - 1)
             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
           PrevElt = SrcElt;
-          if (SrcElt == 0)
-            V0Elt = i;
         } else {
           rotate = false;
         }
-      } else if (i == 0) {
-        // First time through, need to keep track of previous element
+      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+        // First time or after a "wrap around"
+        rotamt = SrcElt-i;
         PrevElt = SrcElt;
       } else {
         // This isn't a rotation, takes elements from vector 2
@@ -1806,15 +1908,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                 DAG.getRegister(SPU::R1, PtrVT),
                                 DAG.getConstant(V2EltOffset, MVT::i32));
-    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, 
+    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
                                      maskVT, Pointer);
 
     // Use shuffle mask in SHUFB synthetic instruction:
     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
                        ShufMaskOp);
   } else if (rotate) {
-    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+    if (rotamt < 0)
+      rotamt +=MaxElts;
+    rotamt *= EltVT.getSizeInBits()/8;
     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
                        V1, DAG.getConstant(rotamt, MVT::i16));
   } else {
@@ -1999,7 +2102,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
                         DAG.getConstant(scaleShift, MVT::i32));
     }
 
-    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
+    vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
 
     // Replicate the bytes starting at byte 0 across the entire vector (for
     // consistency with the notion of a unified register set)
@@ -2069,7 +2172,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
                                 DAG.getRegister(SPU::R1, PtrVT),
                                 DAG.getConstant(Offset, PtrVT));
   // widen the mask when dealing with half vectors
-  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), 
+  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
                                 128/ VT.getVectorElementType().getSizeInBits());
   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
 
@@ -2077,7 +2180,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     DAG.getNode(SPUISD::SHUFB, dl, VT,
                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
                 VecOp,
-                DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
+                DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
 
   return result;
 }
@@ -2197,12 +2300,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
   ConstVec = Op.getOperand(0);
   Arg = Op.getOperand(1);
   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
-    if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+    if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
       ConstVec = ConstVec.getOperand(0);
     } else {
       ConstVec = Op.getOperand(1);
       Arg = Op.getOperand(0);
-      if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+      if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
         ConstVec = ConstVec.getOperand(0);
       }
     }
@@ -2243,7 +2346,7 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
 */
 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
-  EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
                                VT, (128 / VT.getSizeInBits()));
   DebugLoc dl = Op.getDebugLoc();
 
@@ -2419,7 +2522,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
 
   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
   // selected to a NOP:
-  SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+  SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
   SDValue lhsHi32 =
           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
                       DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2453,7 +2556,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
                                     ISD::SETGT));
   }
 
-  SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+  SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
   SDValue rhsHi32 =
           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
                       DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2567,7 +2670,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
   // Type to truncate to
   EVT VT = Op.getValueType();
   MVT simpleVT = VT.getSimpleVT();
-  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
                                VT, (128 / VT.getSizeInBits()));
   DebugLoc dl = Op.getDebugLoc();
 
@@ -2575,7 +2678,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
   SDValue Op0 = Op.getOperand(0);
   EVT Op0VT = Op0.getValueType();
 
-  if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
     // Create shuffle mask, least significant doubleword of quadword
     unsigned maskHigh = 0x08090a0b;
     unsigned maskLow = 0x0c0d0e0f;
@@ -2616,6 +2719,12 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
   SDValue Op0 = Op.getOperand(0);
   MVT Op0VT = Op0.getValueType().getSimpleVT();
 
+  // extend i8 & i16 via i32
+  if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+    Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+    Op0VT = MVT::i32;
+  }
+
   // The type to extend to needs to be a i128 and
   // the type to extend from needs to be i64 or i32.
   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
@@ -2640,12 +2749,17 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
                  DAG.getConstant(31, MVT::i32));
 
+  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                        dl, Op0VT, Op0,
+                                        DAG.getTargetConstant(
+                                                  SPU::GPRCRegClass.getID(),
+                                                  MVT::i32)), 0);
   // Shuffle bytes - Copy the sign bits into the upper 64 bits
   // and the input value into the lower 64 bits.
   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
-      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
-
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+        extended, sraVal, shufMask);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
 }
 
 //! Custom (target-specific) lowering entry point
@@ -2903,8 +3017,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
     }
     break;
   }
-  case SPUISD::SHLQUAD_L_BITS:
-  case SPUISD::SHLQUAD_L_BYTES:
+  case SPUISD::SHL_BITS:
+  case SPUISD::SHL_BYTES:
   case SPUISD::ROTBYTES_LEFT: {
     SDValue Op1 = N->getOperand(1);
 
@@ -2982,6 +3096,38 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
   return TargetLowering::getConstraintType(ConstraintLetter);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+    //FIXME: Seems like the supported constraint letters were just copied
+    // from PPC, as the following doesn't correspond to the GCC docs.
+    // I'm leaving it so until someone adds the corresponding lowering support.
+  case 'b':
+  case 'r':
+  case 'f':
+  case 'd':
+  case 'v':
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
 std::pair<unsigned, const TargetRegisterClass*>
 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const
@@ -3086,3 +3232,28 @@ SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The SPU target isn't yet aware of offsets.
   return false;
 }
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  //ceqi, cgti, etc. all take s10 operand
+  return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                         const Type * ) const{
+
+  // A-form: 18bit absolute address.
+  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+    return true;
+
+  // D-form: reg + 14bit offset
+  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+    return true;
+
+  // X-form: reg+reg
+  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+    return true;
+
+  return false;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 6d3c90b7512c..95d44afe37c8 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -41,8 +41,9 @@ namespace llvm {
       CNTB,                     ///< Count leading ones in bytes
       PREFSLOT2VEC,             ///< Promote scalar->vector
       VEC2PREFSLOT,             ///< Extract element 0
-      SHLQUAD_L_BITS,           ///< Rotate quad left, by bits
-      SHLQUAD_L_BYTES,          ///< Rotate quad left, by bytes
+      SHL_BITS,                 ///< Shift quad left, by bits
+      SHL_BYTES,                ///< Shift quad left, by bytes
+      SRL_BYTES,                ///< Shift quad right, by bytes. Insert zeros.
       VEC_ROTL,                 ///< Vector rotate left
       VEC_ROTR,                 ///< Vector rotate right
       ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI)
@@ -129,6 +130,11 @@ namespace llvm {
 
     ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
 
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
@@ -170,6 +176,19 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    virtual bool isLegalAddressingMode(const AddrMode &AM, 
+                                       const Type *Ty) const;
+ 
+    /// After allocating this many registers, the allocator should feel
+    /// register pressure. The value is a somewhat random guess, based on the
+    /// number of non callee saved registers in the C calling convention.
+    virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+                                          MachineFunction &MF) const{
+      return 50;
+    }
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 26d6b4f25ef1..f9e6c72ef310 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -16,6 +16,7 @@
 #include "SPUInstrBuilder.h"
 #include "SPUTargetMachine.h"
 #include "SPUGenInstrInfo.inc"
+#include "SPUHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -54,6 +55,16 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
     RI(*TM.getSubtargetImpl(), *this)
 { /* NOP */ }
 
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+  const TargetMachine *TM,
+  const ScheduleDAG *DAG) const {
+  const TargetInstrInfo *TII = TM->getInstrInfo();
+  assert(TII && "No InstrInfo?");
+  return new SPUHazardRecognizer(*TII);
+}
+
 unsigned
 SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                   int &FrameIndex) const {
@@ -129,7 +140,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                   const TargetRegisterInfo *TRI) const
 {
   unsigned opc;
-  bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
   if (RC == SPU::GPRCRegisterClass) {
     opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
   } else if (RC == SPU::R64CRegisterClass) {
@@ -164,7 +175,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                    const TargetRegisterInfo *TRI) const
 {
   unsigned opc;
-  bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
   if (RC == SPU::GPRCRegisterClass) {
     opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
   } else if (RC == SPU::R64CRegisterClass) {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 191e55d0ca61..e5e91481419a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,6 +32,10 @@ namespace llvm {
     ///
     virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
 
+    ScheduleHazardRecognizer *
+    CreateTargetHazardRecognizer(const TargetMachine *TM,
+                                 const ScheduleDAG *DAG) const;
+
     unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                  int &FrameIndex) const;
     unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index ca0fe00e37f8..25f6fd000b8b 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -416,7 +416,7 @@ multiclass ImmLoadAddress
   def lo: ILARegInst<R32C, symbolLo, imm18>;
 
   def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
-                   [/* no pattern */]>;
+                   [(set R32C:$rT, imm18:$val)]>;
 }
 
 defm ILA : ImmLoadAddress;
@@ -1167,10 +1167,10 @@ class XSHWRegInst<RegisterClass rclass>:
              [(set rclass:$rDest, (sext R16C:$rSrc))]>;
 
 multiclass ExtendHalfwordWord {
-  def v4i32: XSHWVecInst<v4i32, v8i16>;
-  
+  def v4i32: XSHWVecInst<v8i16, v4i32>;
+
   def r16:   XSHWRegInst<R32C>;
-  
+
   def r32:   XSHWInRegInst<R32C,
                           [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
   def r64:   XSHWInRegInst<R64C, [/* no pattern */]>;
@@ -1385,59 +1385,6 @@ class ORRegInst<RegisterClass rclass>:
     ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
            [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
 
-// ORCvtForm: OR conversion form
-//
-// This is used to "convert" the preferred slot to its vector equivalent, as
-// well as convert a vector back to its preferred slot.
-//
-// These are effectively no-ops, but need to exist for proper type conversion
-// and type coercion.
-
-class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
-          : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
-  bits<7> RA;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-10} = 0b10000010000;
-  let Inst{11-17} = RA;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RT;
-}
-
-class ORPromoteScalar<RegisterClass rclass>:
-    ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
-
-class ORExtractElt<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
-
-/* class ORCvtRegGPRC<RegisterClass rclass>:
-    ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
-
-/* class ORCvtGPRCReg<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
-    
-class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
-    
-class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
-    
-class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
-    ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtGPRCVec:
-    ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
-
-class ORCvtVecGPRC:
-    ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
-
-class ORCvtVecVec:
-    ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
 
 multiclass BitwiseOr
 {
@@ -1468,119 +1415,48 @@ multiclass BitwiseOr
 
   def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
                   [/* no pattern */]>;
-
-  // scalar->vector promotion, prefslot2vec:
-  def v16i8_i8:  ORPromoteScalar<R8C>;
-  def v8i16_i16: ORPromoteScalar<R16C>;
-  def v4i32_i32: ORPromoteScalar<R32C>;
-  def v2i64_i64: ORPromoteScalar<R64C>;
-  def v4f32_f32: ORPromoteScalar<R32FP>;
-  def v2f64_f64: ORPromoteScalar<R64FP>;
-
-  // vector->scalar demotion, vec2prefslot:
-  def i8_v16i8:  ORExtractElt<R8C>;
-  def i16_v8i16: ORExtractElt<R16C>;
-  def i32_v4i32: ORExtractElt<R32C>;
-  def i64_v2i64: ORExtractElt<R64C>;
-  def f32_v4f32: ORExtractElt<R32FP>;
-  def f64_v2f64: ORExtractElt<R64FP>;
-
-  // Conversion from vector to GPRC
-  def i128_vec:  ORCvtVecGPRC;
-
-  // Conversion from GPRC to vector
-  def vec_i128:  ORCvtGPRCVec;
-
-/*
-  // Conversion from register to GPRC
-  def i128_r64:  ORCvtRegGPRC<R64C>;
-  def i128_f64:  ORCvtRegGPRC<R64FP>;
-  def i128_r32:  ORCvtRegGPRC<R32C>;
-  def i128_f32:  ORCvtRegGPRC<R32FP>;
-  def i128_r16:  ORCvtRegGPRC<R16C>;
-  def i128_r8:   ORCvtRegGPRC<R8C>;
-
-  // Conversion from GPRC to register
-  def r64_i128:  ORCvtGPRCReg<R64C>;
-  def f64_i128:  ORCvtGPRCReg<R64FP>;
-  def r32_i128:  ORCvtGPRCReg<R32C>;
-  def f32_i128:  ORCvtGPRCReg<R32FP>;
-  def r16_i128:  ORCvtGPRCReg<R16C>;
-  def r8_i128:   ORCvtGPRCReg<R8C>;
-*/
-/*
-  // Conversion from register to R32C:
-  def r32_r16:   ORCvtFormRegR32<R16C>;
-  def r32_r8:    ORCvtFormRegR32<R8C>;
-  
-  // Conversion from R32C to register
-  def r32_r16:   ORCvtFormR32Reg<R16C>;
-  def r32_r8:    ORCvtFormR32Reg<R8C>;
-*/
-  
-  // Conversion from R64C to register:
-  def r32_r64:   ORCvtFormR64Reg<R32C>;
-  // def r16_r64:   ORCvtFormR64Reg<R16C>;
-  // def r8_r64:    ORCvtFormR64Reg<R8C>;
-  
-  // Conversion to R64C from register:
-  def r64_r32:   ORCvtFormRegR64<R32C>;
-  // def r64_r16:   ORCvtFormRegR64<R16C>;
-  // def r64_r8:    ORCvtFormRegR64<R8C>;
-
-  // bitconvert patterns:
-  def r32_f32:   ORCvtFormR32Reg<R32FP,
-                                 [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
-  def f32_r32:   ORCvtFormRegR32<R32FP,
-                                 [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
-
-  def r64_f64:   ORCvtFormR64Reg<R64FP,
-                                 [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
-  def f64_r64:   ORCvtFormRegR64<R64FP,
-                                 [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
 }
 
 defm OR : BitwiseOr;
 
-// scalar->vector promotion patterns (preferred slot to vector):
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
 def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
-          (ORv16i8_i8 R8C:$rA)>;
+          (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
 
 def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
-          (ORv8i16_i16 R16C:$rA)>;
+          (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
 
 def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
-          (ORv4i32_i32 R32C:$rA)>;
+          (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
 
 def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
-          (ORv2i64_i64 R64C:$rA)>;
+          (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
 
 def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
-          (ORv4f32_f32 R32FP:$rA)>;
+          (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
 
 def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
-          (ORv2f64_f64 R64FP:$rA)>;
-
-// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
-// known as converting the vector back to its preferred slot
-
-def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
-          (ORi8_v16i8 VECREG:$rA)>;
+          (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+ 
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
 
-def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
-          (ORi16_v8i16 VECREG:$rA)>;
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
 
-def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
-          (ORi32_v4i32 VECREG:$rA)>;
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
 
-def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
-          (ORi64_v2i64 VECREG:$rA)>;
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
 
-def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
-          (ORf32_v4f32 VECREG:$rA)>;
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
 
-def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
-          (ORf64_v2f64 VECREG:$rA)>;
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
 
 // Load Register: This is an assembler alias for a bitwise OR of a register
 // against itself. It's here because it brings some clarity to assembly
@@ -2093,7 +1969,7 @@ defm EQV: BitEquivalence;
 
 class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
     RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
-            IntegerOp, pattern>;
+            ShuffleOp, pattern>;
 
 class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
     SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
@@ -2134,7 +2010,7 @@ defm SHUFB : ShuffleBytes;
 
 class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftVec, pattern>;
 
 class SHLHVecInst<ValueType vectype>:
     SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
@@ -2156,7 +2032,7 @@ defm SHLH : ShiftLeftHalfword;
 
 class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftVec, pattern>;
 
 class SHLHIVecInst<ValueType vectype>:
     SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
@@ -2182,7 +2058,7 @@ def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
 
 class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftVec, pattern>;
 
 multiclass ShiftLeftWord
 {
@@ -2201,7 +2077,7 @@ defm SHL: ShiftLeftWord;
 
 class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftVec, pattern>;
 
 multiclass ShiftLeftWordImm
 {
@@ -2230,7 +2106,7 @@ defm SHLI : ShiftLeftWordImm;
 
 class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
 class SHLQBIVecInst<ValueType vectype>:
     SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2259,7 +2135,7 @@ defm SHLQBI : ShiftLeftQuadByBits;
 // enforcement, whereas with SHLQBI, we have to "take it on faith."
 class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
 
 class SHLQBIIVecInst<ValueType vectype>:
     SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
@@ -2283,7 +2159,7 @@ defm SHLQBII : ShiftLeftQuadByBitsImm;
 
 class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
 
 class SHLQBYVecInst<ValueType vectype>:
     SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2306,7 +2182,7 @@ defm SHLQBY: ShiftLeftQuadBytes;
 
 class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
 
 class SHLQBYIVecInst<ValueType vectype>:
     SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
@@ -2330,7 +2206,7 @@ defm SHLQBYI : ShiftLeftQuadBytesImm;
 
 class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
 class SHLQBYBIVecInst<ValueType vectype>:
     SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2359,7 +2235,7 @@ defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftVec, pattern>;
 
 class ROTHVecInst<ValueType vectype>:
     ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
@@ -2386,7 +2262,7 @@ def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftVec, pattern>;
 
 class ROTHIVecInst<ValueType vectype>:
     ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
@@ -2413,7 +2289,7 @@ def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
 
 class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftVec, pattern>;
 
 class ROTVecInst<ValueType vectype>:
     ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2461,7 +2337,7 @@ def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
 
 class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftVec, pattern>;
 
 class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
     ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
@@ -2491,12 +2367,15 @@ defm ROTI : RotateLeftWordImm;
 
 class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
-class ROTQBYVecInst<ValueType vectype>:
-    ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-               [(set (vectype VECREG:$rT),
-                     (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>;
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+    ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+               [(set (type rc:$rT),
+                     (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+    ROTQBYGenInst<type, VECREG>;
 
 multiclass RotateQuadLeftByBytes
 {
@@ -2506,6 +2385,7 @@ multiclass RotateQuadLeftByBytes
   def v4f32: ROTQBYVecInst<v4f32>;
   def v2i64: ROTQBYVecInst<v2i64>;
   def v2f64: ROTQBYVecInst<v2f64>;
+  def i128:  ROTQBYGenInst<i128, GPRC>;
 }
 
 defm ROTQBY: RotateQuadLeftByBytes;
@@ -2516,12 +2396,15 @@ defm ROTQBY: RotateQuadLeftByBytes;
 
 class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+    ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+                [(set (type rclass:$rT),
+                      (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
 
 class ROTQBYIVecInst<ValueType vectype>:
-    ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
-                [(set (vectype VECREG:$rT),
-                      (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+    ROTQBYIGenInst<vectype, VECREG>;
 
 multiclass RotateQuadByBytesImm
 {
@@ -2531,6 +2414,7 @@ multiclass RotateQuadByBytesImm
   def v4f32: ROTQBYIVecInst<v4f32>;
   def v2i64: ROTQBYIVecInst<v2i64>;
   def vfi64: ROTQBYIVecInst<v2f64>;
+  def i128:  ROTQBYIGenInst<i128, GPRC>;
 }
 
 defm ROTQBYI: RotateQuadByBytesImm;
@@ -2539,7 +2423,7 @@ defm ROTQBYI: RotateQuadByBytesImm;
 class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b00110011100, OOL, IOL,
       "rotqbybi\t$rT, $rA, $shift",
-      RotateShift, pattern>;
+      RotShiftQuad, pattern>;
 
 class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
     ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
@@ -2564,7 +2448,7 @@ defm ROTQBYBI : RotateQuadByBytesByBitshift;
 
 class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
 class ROTQBIVecInst<ValueType vectype>:
     ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2589,7 +2473,7 @@ defm ROTQBI: RotateQuadByBitCount;
 
 class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
 
 class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
                      PatLeaf pred>:
@@ -2624,7 +2508,7 @@ defm ROTQBII : RotateQuadByBitCountImm;
 
 class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftVec, pattern>;
 
 def ROTHMv8i16:
     ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2666,7 +2550,7 @@ def : Pat<(srl R16C:$rA, R8C:$rB),
 
 class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftVec, pattern>;
 
 def ROTHMIv8i16:
     ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
@@ -2697,7 +2581,7 @@ def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
 // ROTM v4i32 form: See the ROTHM v8i16 comments.
 class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftVec, pattern>;
 
 def ROTMv4i32:
     ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2732,7 +2616,7 @@ def : Pat<(srl R32C:$rA, R8C:$rB),
 // ROTMI v4i32 form: See the comment for ROTHM v8i16.
 def ROTMIv4i32:
     RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
-      "rotmi\t$rT, $rA, $val", RotateShift,
+      "rotmi\t$rT, $rA, $val", RotShiftVec,
       [(set (v4i32 VECREG:$rT),
             (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
 
@@ -2745,7 +2629,7 @@ def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
 // ROTMI r32 form: know how to complement the immediate value.
 def ROTMIr32:
     RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
-      "rotmi\t$rT, $rA, $val", RotateShift,
+      "rotmi\t$rT, $rA, $val", RotShiftVec,
       [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
 
 def : Pat<(srl R32C:$rA, (i16 imm:$val)),
@@ -2762,7 +2646,7 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)),
 
 class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
 class ROTQMBYVecInst<ValueType vectype>:
     ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2785,9 +2669,13 @@ multiclass RotateQuadBytes
 
 defm ROTQMBY : RotateQuadBytes;
 
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+          (ROTQMBYr128  GPRC:$rA, 
+                        (SFIr32 R32C:$rB, 0))>;
+
 class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
 
 class ROTQMBYIVecInst<ValueType vectype>:
     ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
@@ -2827,7 +2715,7 @@ defm ROTQMBYI : RotateQuadBytesImm;
 
 class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
 class ROTQMBYBIVecInst<ValueType vectype>:
     ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2839,6 +2727,8 @@ multiclass RotateMaskQuadByBitCount
   def v8i16: ROTQMBYBIVecInst<v8i16>;
   def v4i32: ROTQMBYBIVecInst<v4i32>;
   def v2i64: ROTQMBYBIVecInst<v2i64>;
+  def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+                           [/*no pattern*/]>;
 }
 
 defm ROTQMBYBI: RotateMaskQuadByBitCount;
@@ -2850,7 +2740,7 @@ defm ROTQMBYBI: RotateMaskQuadByBitCount;
 
 class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
-           RotateShift, pattern>;
+           RotShiftQuad, pattern>;
 
 class ROTQMBIVecInst<ValueType vectype>:
     ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2873,13 +2763,19 @@ multiclass RotateMaskQuadByBits
 
 defm ROTQMBI: RotateMaskQuadByBits;
 
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+          (ROTQMBYBIr128 (ROTQMBIr128  GPRC:$rA, 
+                                       (SFIr32 R32C:$rB, 0)),
+                         (SFIr32 R32C:$rB, 0))>;
+
+
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // Rotate quad and mask by bits, immediate
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
     RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
-            RotateShift, pattern>;
+            RotShiftQuad, pattern>;
 
 class ROTQMBIIVecInst<ValueType vectype>:
    ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
@@ -2907,7 +2803,7 @@ defm ROTQMBII: RotateMaskQuadByBitsImm;
 
 def ROTMAHv8i16:
     RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-      "rotmah\t$rT, $rA, $rB", RotateShift,
+      "rotmah\t$rT, $rA, $rB", RotShiftVec,
       [/* see patterns below - $rB must be negated */]>;
 
 def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB),
@@ -2923,7 +2819,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB),
 
 def ROTMAHr16:
     RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
-      "rotmah\t$rT, $rA, $rB", RotateShift,
+      "rotmah\t$rT, $rA, $rB", RotShiftVec,
       [/* see patterns below - $rB must be negated */]>;
 
 def : Pat<(sra R16C:$rA, R32C:$rB),
@@ -2939,7 +2835,7 @@ def : Pat<(sra R16C:$rA, R8C:$rB),
 
 def ROTMAHIv8i16:
     RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
-      "rotmahi\t$rT, $rA, $val", RotateShift,
+      "rotmahi\t$rT, $rA, $val", RotShiftVec,
       [(set (v8i16 VECREG:$rT),
             (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
 
@@ -2951,7 +2847,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
 
 def ROTMAHIr16:
     RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
-      "rotmahi\t$rT, $rA, $val", RotateShift,
+      "rotmahi\t$rT, $rA, $val", RotShiftVec,
       [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
 
 def : Pat<(sra R16C:$rA, (i32 imm:$val)),
@@ -2962,7 +2858,7 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)),
 
 def ROTMAv4i32:
     RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-      "rotma\t$rT, $rA, $rB", RotateShift,
+      "rotma\t$rT, $rA, $rB", RotShiftVec,
       [/* see patterns below - $rB must be negated */]>;
 
 def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB),
@@ -2978,7 +2874,7 @@ def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB),
 
 def ROTMAr32:
     RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-      "rotma\t$rT, $rA, $rB", RotateShift,
+      "rotma\t$rT, $rA, $rB", RotShiftVec,
       [/* see patterns below - $rB must be negated */]>;
 
 def : Pat<(sra R32C:$rA, R32C:$rB),
@@ -2995,7 +2891,7 @@ def : Pat<(sra R32C:$rA, R8C:$rB),
 class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
     RRForm<0b01011110000, OOL, IOL,
       "rotmai\t$rT, $rA, $val",
-      RotateShift, pattern>;
+      RotShiftVec, pattern>;
 
 class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
     ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
@@ -4010,7 +3906,7 @@ def FCGTf32 :
       "fcgt\t$rT, $rA, $rB", SPrecFP,
       [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
 
-def : Pat<(setugt R32FP:$rA, R32FP:$rB),
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
           (FCGTf32 R32FP:$rA, R32FP:$rB)>;
 
 def FCMGTf32 :
@@ -4018,7 +3914,7 @@ def FCMGTf32 :
       "fcmgt\t$rT, $rA, $rB", SPrecFP,
       [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
 
-def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
           (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
 
 //--------------------------------------------------------------------------
@@ -4320,7 +4216,7 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
 // in the odd pipeline)
 //===----------------------------------------------------------------------===//
 
-def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> {
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
   let Pattern = [];
 
   let Inst{0-10} = 0b10000000010;
@@ -4379,30 +4275,43 @@ def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
 def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
 
 def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
-          (ORi128_vec VECREG:$src)>;
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
 
 def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
-          (v16i8 (ORvec_i128 GPRC:$src))>;
+          (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
-          (v8i16 (ORvec_i128 GPRC:$src))>;
+          (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
-          (v4i32 (ORvec_i128 GPRC:$src))>;
+          (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
-          (v2i64 (ORvec_i128 GPRC:$src))>;
+          (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
-          (v4f32 (ORvec_i128 GPRC:$src))>;
+          (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
 def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
-          (v2f64 (ORvec_i128 GPRC:$src))>;
+          (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+          (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+          (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+          (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+          (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
 
 //===----------------------------------------------------------------------===//
 // Instruction patterns:
@@ -4453,11 +4362,12 @@ def : Pat<(i32 (zext R8C:$rSrc)),
 
 // zext 8->64: Zero extend bytes to double words
 def : Pat<(i64 (zext R8C:$rSrc)),
-          (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
-                                    (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+          (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+                                    (COPY_TO_REGCLASS 
+                                       (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
                                     0x4),
                                   (ILv4i32 0x0),
-                                  (FSMBIv4i32 0x0f0f)))>;
+                                  (FSMBIv4i32 0x0f0f)), R64C)>;
 
 // anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
 def : Pat<(i16 (anyext R8C:$rSrc)),
@@ -4465,7 +4375,7 @@ def : Pat<(i16 (anyext R8C:$rSrc)),
 
 // anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
 def : Pat<(i32 (anyext R8C:$rSrc)),
-          (ORIi8i32 R8C:$rSrc, 0)>;
+          (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
 
 // sext 16->64: Sign extend halfword to double word
 def : Pat<(sext_inreg R64C:$rSrc, i16),
@@ -4489,7 +4399,7 @@ def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
 
 // anyext 16->32: Extend 16->32 bits, irrespective of sign
 def : Pat<(i32 (anyext R16C:$rSrc)),
-          (ORIi16i32 R16C:$rSrc, 0)>;
+          (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
 
 //===----------------------------------------------------------------------===//
 // Truncates:
@@ -4498,61 +4408,61 @@ def : Pat<(i32 (anyext R16C:$rSrc)),
 //===----------------------------------------------------------------------===//
 
 def : Pat<(i8 (trunc GPRC:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
 
 def : Pat<(i8 (trunc R64C:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBv2i64_m32
-              (ORv2i64_i64 R64C:$src),
-              (ORv2i64_i64 R64C:$src),
-              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
 
 def : Pat<(i8 (trunc R32C:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBv4i32_m32
-               (ORv4i32_i32 R32C:$src),
-               (ORv4i32_i32 R32C:$src),
-               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
 
 def : Pat<(i8 (trunc R16C:$src)),
-          (ORi8_v16i8
+          (COPY_TO_REGCLASS
             (SHUFBv4i32_m32
-               (ORv8i16_i16 R16C:$src),
-               (ORv8i16_i16 R16C:$src),
-               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
 
 def : Pat<(i16 (trunc GPRC:$src)),
-          (ORi16_v8i16
+          (COPY_TO_REGCLASS
             (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
 
 def : Pat<(i16 (trunc R64C:$src)),
-          (ORi16_v8i16
+          (COPY_TO_REGCLASS
             (SHUFBv2i64_m32
-              (ORv2i64_i64 R64C:$src),
-              (ORv2i64_i64 R64C:$src),
-              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
 
 def : Pat<(i16 (trunc R32C:$src)),
-          (ORi16_v8i16
+          (COPY_TO_REGCLASS
             (SHUFBv4i32_m32
-               (ORv4i32_i32 R32C:$src),
-               (ORv4i32_i32 R32C:$src),
-               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
 
 def : Pat<(i32 (trunc GPRC:$src)),
-          (ORi32_v4i32
+          (COPY_TO_REGCLASS
             (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
 
 def : Pat<(i32 (trunc R64C:$src)),
-          (ORi32_v4i32
+          (COPY_TO_REGCLASS
             (SHUFBv2i64_m32
-              (ORv2i64_i64 R64C:$src),
-              (ORv2i64_i64 R64C:$src),
-              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
 
 //===----------------------------------------------------------------------===//
 // Address generation: SPU, like PPC, has to split addresses into high and
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 25ba88acc8ba..99aaeb006a0b 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -24,9 +24,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
   GlobalPrefix = "";
   PrivateGlobalPrefix = ".L";
 
-  // Has leb128, .loc and .file
+  // Has leb128
   HasLEB128 = true;
-  HasDotLocAndDotFile = true;
 
   SupportsDebugInformation = true;
 
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 647da3051d3d..a6e621f36b35 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -19,16 +19,16 @@ def SPU_GenControl : SDTypeProfile<1, 1, []>;
 def SPUshufmask    : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
 
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPUCallSeq,
-                           [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
 //===----------------------------------------------------------------------===//
 // Operand constraints:
 //===----------------------------------------------------------------------===//
 
 def SDT_SPUCall   : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
 def SPUcall       : SDNode<"SPUISD::CALL", SDT_SPUCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPVariadic]>;
 
 // Operand type constraints for vector shuffle/permute operations
@@ -83,10 +83,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
 // SPUISelLowering.h):
 def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
 
-// Shift left quadword by bits and bytes
-def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
-def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
-
 // Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
 def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
 def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
@@ -105,6 +101,12 @@ def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
 def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
                                    SPUvecshift_type>;
 
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
 // SPU form select mask for bytes, immediate
 def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
 
@@ -154,4 +156,4 @@ class NoEncode<string E> {
 //===----------------------------------------------------------------------===//
 
 def retflag     : SDNode<"SPUISD::RET_FLAG", SDTNone,
-                         [SDNPHasChain, SDNPOptInFlag]>;
+                         [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 000000000000..e2bd2d7f4100
--- /dev/null
+++ b/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to 
+// satisfy the dual issue requirements. The actual dual issue scheduling is 
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  struct SPUNopFiller : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+    const InstrItineraryData *IID;
+    bool isEvenPlace;  // the instruction slot (mem address) at hand is even/odd
+
+    static char ID;
+    SPUNopFiller(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), 
+        IID(tm.getInstrItineraryData()) 
+    {
+      DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+    }
+
+    virtual const char *getPassName() const {
+      return "SPU nop/lnop Filler";
+    }
+
+    void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    bool runOnMachineFunction(MachineFunction &F) {
+      isEvenPlace = true; //all functions get an .align 3 directive at start 
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        runOnMachineBasicBlock(*FI);
+      return true; //never-ever do any more modifications, just print it!
+    }
+
+    typedef enum { none   = 0, // no more instructions in this function / BB
+                   pseudo = 1, // this does not get executed
+                   even   = 2, 
+                   odd    = 3 } SPUOpPlace;
+    SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+  };
+  char SPUNopFiller::ID = 0;
+
+} 
+
+// Fill a BasicBlock to alignment. 
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define 
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB) 
+{
+  assert( isEvenPlace && "basic block start from odd address");
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+  {
+    SPUOpPlace this_optype, next_optype;
+    MachineBasicBlock::iterator J = I;
+    J++;
+
+    this_optype = getOpPlacement( *I );
+    next_optype = none;
+    while (J!=MBB.end()){
+      next_optype = getOpPlacement( *J );
+      ++J;
+      if (next_optype != pseudo ) 
+        break;
+    }
+
+    // padd: odd(wrong), even(wrong), ...
+    // to:   nop(corr), odd(corr), even(corr)...
+    if( isEvenPlace && this_optype == odd && next_optype == even ) {
+      DEBUG( dbgs() <<"Adding NOP before: "; );
+      DEBUG( I->dump(); );
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+      isEvenPlace=false;
+    }
+    
+    // padd: even(wrong), odd(wrong), ...
+    // to:   lnop(corr), even(corr), odd(corr)...
+    else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+      DEBUG( dbgs() <<"Adding LNOP before: "; );
+      DEBUG( I->dump(); );
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+      isEvenPlace=true;
+    }
+      
+    // now go to next mem slot
+    if( this_optype != pseudo )
+      isEvenPlace = !isEvenPlace;    
+
+  }
+
+  // padd basicblock end
+  if( !isEvenPlace ){
+    MachineBasicBlock::iterator J = MBB.end();
+    J--;
+    if (getOpPlacement( *J ) == odd) {
+      DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+      BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+    }  
+    else {
+      J++;
+      DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+      BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+    }
+    isEvenPlace=true;
+  }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+  return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace 
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+  int sc = instr.getDesc().getSchedClass();
+  const InstrStage *stage = IID->beginStage(sc);
+  unsigned FUs = stage->getUnits();
+  SPUOpPlace retval;
+
+  switch( FUs ) {
+    case 0: retval = pseudo; break;
+    case 1: retval = odd;    break;
+    case 2: retval = even;   break;
+    default: retval= pseudo; 
+             assert( false && "got unknown FuncUnit\n");
+             break;
+  };
+  return retval;
+}
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index e1a0358abc46..96cde51709ec 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -143,7 +143,7 @@ def immU16 : PatLeaf<(imm), [{
 def imm18  : PatLeaf<(imm), [{
   // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
   int Value = (int) N->getZExtValue();
-  return ((Value & ((1 << 19) - 1)) == Value);
+  return isUInt<18>(Value); 
 }]>;
 
 def lo16 : PatLeaf<(imm), [{
@@ -203,7 +203,7 @@ def FPimm_sext16 : SDNodeXForm<fpimm, [{
 
 def FPimm_u18 : SDNodeXForm<fpimm, [{
   float fval = N->getValueAPF().convertToFloat();
-  return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1));
+  return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
 }]>;
 
 def fpimmSExt16 : PatLeaf<(fpimm), [{
@@ -225,7 +225,7 @@ def hi16_f32 : PatLeaf<(fpimm), [{
 def fpimm18  : PatLeaf<(fpimm), [{
   if (N->getValueType(0) == MVT::f32) {
     uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
-    return ((Value & ((1 << 19) - 1)) == Value);
+    return isUInt<18>(Value);
   }
 
   return false;
@@ -654,7 +654,11 @@ def memrr : Operand<iPTR> {
 // A-form   : abs     (256K LSA offset)
 // D-form(2): [r+I7]  (7-bit signed offset + reg)
 
-def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",     [], []>;
-def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",     [], []>;
-def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",     [], []>;
-def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",    [], []>;
+def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+                                  [], [SDNPWantRoot]>;
+def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+                                  [], [SDNPWantRoot]>;
+def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+                                  [], [SDNPWantRoot]>;
+def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+                                  [], [SDNPWantRoot]>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index cf718917a561..0bdd50ac79f5 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -18,7 +18,7 @@
 #include "SPUInstrBuilder.h"
 #include "SPUSubtarget.h"
 #include "SPUMachineFunction.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
 #include "llvm/Constants.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -30,7 +30,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -240,25 +240,6 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-// needsFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool needsFP(const MachineFunction &MF) {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-//--------------------------------------------------------------------------
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register.  This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool
-SPURegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getStackSize() && needsFP(MF);
-}
-
 //--------------------------------------------------------------------------
 void
 SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -302,7 +283,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   MachineOperand &MO = MI.getOperand(OpNo);
 
   // Offset is biased by $lr's slot at the bottom.
-  Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize();
+  Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
   assert((Offset & 0xf) == 0
          && "16-byte alignment violated in eliminateFrameIndex");
 
@@ -329,225 +310,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   }
 }
 
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void
-SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
-{
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // Get the number of bytes to allocate from the FrameInfo
-  unsigned FrameSize = MFI->getStackSize();
-
-  // Get the alignments provided by the target, and the maximum alignment
-  // (if any) of the fixed frame objects.
-  unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-  unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
-  assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
-  unsigned AlignMask = Align - 1;
-
-  // Get the maximum call frame size of all the calls.
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
-  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
-  // that allocations will be aligned.
-  if (MFI->hasVarSizedObjects())
-    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
-  // Update maximum call frame size.
-  MFI->setMaxCallFrameSize(maxCallFrameSize);
-
-  // Include call frame size in total.
-  FrameSize += maxCallFrameSize;
-
-  // Make sure the frame is aligned.
-  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
-  // Update frame info.
-  MFI->setStackSize(FrameSize);
-}
-
-void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                           RegScavenger *RS)
-  const {
-  // Mark LR and SP unused, since the prolog spills them to stack and
-  // we don't want anyone else to spill them for us.
-  //
-  // Also, unless R2 is really used someday, don't spill it automatically.
-  MF.getRegInfo().setPhysRegUnused(SPU::R0);
-  MF.getRegInfo().setPhysRegUnused(SPU::R1);
-  MF.getRegInfo().setPhysRegUnused(SPU::R2);
-
-  MachineFrameInfo *MFI = MF.getFrameInfo(); 
-  const TargetRegisterClass *RC = &SPU::R32CRegClass;
-  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                     RC->getAlignment(),
-                                                     false));
-  
-  
-}
-
-void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
-{
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Prepare for debug frame info.
-  bool hasDebugInfo = MMI.hasDebugInfo();
-  MCSymbol *FrameLabel = 0;
-
-  // Move MBBI back to the beginning of the function.
-  MBBI = MBB.begin();
-
-  // Work out frame sizes.
-  determineFrameLayout(MF);
-  int FrameSize = MFI->getStackSize();
-
-  assert((FrameSize & 0xf) == 0
-         && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
-
-  // the "empty" frame size is 16 - just the register scavenger spill slot
-  if (FrameSize > 16 || MFI->adjustsStack()) {
-    FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
-    if (hasDebugInfo) {
-      // Mark effective beginning of when frame pointer becomes valid.
-      FrameLabel = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
-    }
-
-    // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
-    // for the ABI
-    BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
-      .addReg(SPU::R1);
-    if (isInt<10>(FrameSize)) {
-      // Spill $sp to adjusted $sp
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
-        .addReg(SPU::R1);
-      // Adjust $sp by required amout
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
-        .addImm(FrameSize);
-    } else if (isInt<16>(FrameSize)) {
-      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
-      // $r2 to adjust $sp:
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
-        .addImm(-16)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
-        .addImm(FrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
-        .addReg(SPU::R2)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
-        .addReg(SPU::R1)
-        .addReg(SPU::R2);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
-        .addReg(SPU::R2)
-        .addImm(16);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
-        .addReg(SPU::R2)
-        .addReg(SPU::R1);
-    } else {
-      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
-    }
-
-    if (hasDebugInfo) {
-      std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
-      // Show update of SP.
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
-      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
-      // Add callee saved registers to move list.
-      const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-      for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
-        int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-        unsigned Reg = CSI[I].getReg();
-        if (Reg == SPU::R0) continue;
-        MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-        MachineLocation CSSrc(Reg);
-        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
-      }
-
-      // Mark effective beginning of when frame pointer is ready.
-      MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
-
-      MachineLocation FPDst(SPU::R1);
-      MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
-    }
-  } else {
-    // This is a leaf function -- insert a branch hint iff there are
-    // sufficient number instructions in the basic block. Note that
-    // this is just a best guess based on the basic block's size.
-    if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) {
-      MachineBasicBlock::iterator MBBI = prior(MBB.end());
-      dl = MBBI->getDebugLoc();
-
-      // Insert terminator label
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
-        .addSym(MMI.getContext().CreateTempSymbol());
-    }
-  }
-}
-
-void
-SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  int FrameSize = MFI->getStackSize();
-  int LinkSlotOffset = SPUFrameInfo::stackSlotSize();
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  assert(MBBI->getOpcode() == SPU::RET &&
-         "Can only insert epilog into returning blocks");
-  assert((FrameSize & 0xf) == 0
-         && "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
-
-  // the "empty" frame size is 16 - just the register scavenger spill slot
-  if (FrameSize > 16 || MFI->adjustsStack()) {
-    FrameSize = FrameSize + SPUFrameInfo::minStackSize();
-    if (isInt<10>(FrameSize + LinkSlotOffset)) {
-      // Reload $lr, adjust $sp by required amount
-      // Note: We do this to slightly improve dual issue -- not by much, but it
-      // is an opportunity for dual issue.
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
-        .addImm(FrameSize + LinkSlotOffset)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
-        .addReg(SPU::R1)
-        .addImm(FrameSize);
-    } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
-      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
-      // $r2 to adjust $sp:
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
-        .addImm(16)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
-        .addImm(FrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
-        .addReg(SPU::R1)
-        .addReg(SPU::R2);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
-        .addImm(16)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
-        addReg(SPU::R2)
-        .addImm(16);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
-        .addReg(SPU::R2)
-        .addReg(SPU::R1);
-    } else {
-      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
-    }
-   }
-}
-
 unsigned
 SPURegisterInfo::getRARegister() const
 {
@@ -560,26 +322,16 @@ SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
   return SPU::R1;
 }
 
-void
-SPURegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const
-{
-  // Initial state of the frame pointer is R1.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(SPU::R1, 0);
-  Moves.push_back(MachineMove(0, Dst, Src));
-}
-
-
 int
 SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   // FIXME: Most probably dwarf numbers differs for Linux and Darwin
   return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
-int 
+int
 SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
 {
-  switch(dFormOpcode) 
+  switch(dFormOpcode)
   {
     case SPU::AIr32:     return SPU::Ar32;
     case SPU::LQDr32:    return SPU::LQXr32;
@@ -602,10 +354,10 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
 
 // TODO this is already copied from PPC. Could this convenience function
 // be moved to the RegScavenger class?
-unsigned  
-SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, 
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
                                      RegScavenger *RS,
-                                     const TargetRegisterClass *RC, 
+                                     const TargetRegisterClass *RC,
                                      int SPAdj) const
 {
   assert(RS && "Register scavenging must be on");
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index aedb769cb4fc..641da0480a8d 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -33,7 +33,7 @@ namespace llvm {
 
   public:
     SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
-    
+ 
     //! Translate a register's enum value to a register number
     /*!
       This method translates a register's enum value to it's regiser number,
@@ -56,8 +56,6 @@ namespace llvm {
     //! Return the reserved registers
     BitVector getReservedRegs(const MachineFunction &MF) const;
 
-    //! Prediate: Target has dedicated frame pointer
-    bool hasFP(const MachineFunction &MF) const;
     //! Eliminate the call frame setup pseudo-instructions
     void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                        MachineBasicBlock &MBB,
@@ -65,21 +63,11 @@ namespace llvm {
     //! Convert frame indicies into machine operands
     void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                              RegScavenger *RS = NULL) const;
-    //! Determine the frame's layour
-    void determineFrameLayout(MachineFunction &MF) const;
-
-    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                              RegScavenger *RS = NULL) const;
-    //! Emit the function prologue
-    void emitPrologue(MachineFunction &MF) const;
-    //! Emit the function epilogue
-    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
     //! Get return address register (LR, aka R0)
     unsigned getRARegister() const;
     //! Get the stack frame register (SP, aka R1)
     unsigned getFrameRegister(const MachineFunction &MF) const;
-    //! Perform target-specific stack frame setup.
-    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
     //------------------------------------------------------------------------
     // New methods added:
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
index a0b581f1632b..9cd3c2327df0 100644
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -32,11 +32,12 @@ def FPInt        : InstrItinClass;              // EVEN_UNIT (FP<->integer)
 def ByteOp       : InstrItinClass;              // EVEN_UNIT
 def IntegerOp    : InstrItinClass;              // EVEN_UNIT
 def IntegerMulDiv: InstrItinClass;              // EVEN_UNIT
-def RotateShift  : InstrItinClass;              // EVEN_UNIT
+def RotShiftVec  : InstrItinClass;              // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass;              // ODD_UNIT Entire quad
 def ImmLoad      : InstrItinClass;              // EVEN_UNIT
 
 /* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
   InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
   InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,
@@ -51,7 +52,8 @@ def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
   InstrItinData<FPInt       , [InstrStage<2,  [EVEN_UNIT]>]>,
   InstrItinData<ByteOp      , [InstrStage<4,  [EVEN_UNIT]>]>,
   InstrItinData<IntegerOp   , [InstrStage<2,  [EVEN_UNIT]>]>,
-  InstrItinData<RotateShift , [InstrStage<4,  [EVEN_UNIT]>]>,
+  InstrItinData<RotShiftVec , [InstrStage<4,  [EVEN_UNIT]>]>, 
+  InstrItinData<RotShiftQuad, [InstrStage<4,  [ODD_UNIT]>]>,
   InstrItinData<IntegerMulDiv,[InstrStage<7,  [EVEN_UNIT]>]>,
   InstrItinData<ImmLoad     , [InstrStage<2,  [EVEN_UNIT]>]>
   ]>;
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 0f18b7fa8b26..07c8352fba9f 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -14,6 +14,8 @@
 #include "SPUSubtarget.h"
 #include "SPU.h"
 #include "SPUGenSubtarget.inc"
+#include "llvm/ADT/SmallVector.h"
+#include "SPURegisterInfo.h"
 
 using namespace llvm;
 
@@ -34,3 +36,22 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
 /// producing code for the JIT.
 void SPUSubtarget::SetJITMode() {
 }
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+                       CodeGenOpt::Level OptLevel,
+                       TargetSubtarget::AntiDepBreakMode& Mode,
+                       RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  // CriticalPathsRCs seems to be the set of
+  // RegisterClasses that antidep breakings are performed for.
+  // Do it for all register classes 
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&SPU::R8CRegClass);
+  CriticalPathRCs.push_back(&SPU::R16CRegClass);
+  CriticalPathRCs.push_back(&SPU::R32CRegClass);
+  CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+  CriticalPathRCs.push_back(&SPU::R64CRegClass);
+  CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+  return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index 88201c653b9b..d7929302f080 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -81,9 +81,13 @@ namespace llvm {
     /// properties of this subtarget.
     const char *getTargetDataString() const {
       return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
-             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
+             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
              "-s:128:128-n32:64";
     }
+
+    bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                               TargetSubtarget::AntiDepBreakMode& Mode,
+                               RegClassVector& CriticalPathRCs) const;
   };
 } // End llvm namespace
 
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 480ec3f7f885..3ed73613a31d 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -29,7 +29,7 @@ extern "C" void LLVMInitializeCellSPUTarget() {
 }
 
 const std::pair<unsigned, int> *
-SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
   NumEntries = 1;
   return &LR[0];
 }
@@ -40,7 +40,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
     Subtarget(TT, FS),
     DataLayout(Subtarget.getTargetDataString()),
     InstrInfo(*this),
-    FrameInfo(*this),
+    FrameLowering(Subtarget),
     TLInfo(*this),
     TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
@@ -59,3 +59,12 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
   PM.add(createSPUISelDag(*this));
   return false;
 }
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) 
+{
+  //align instructions with nops/lnops for dual issue
+  PM.add(createSPUNopFillerPass(*this));
+  return true;
+}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 7e0270159a84..75abd5eb3fca 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -18,14 +18,14 @@
 #include "SPUInstrInfo.h"
 #include "SPUISelLowering.h"
 #include "SPUSelectionDAGInfo.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 
 namespace llvm {
 class PassManager;
 class GlobalValue;
-class TargetFrameInfo;
+class TargetFrameLowering;
 
 /// SPUTargetMachine
 ///
@@ -33,7 +33,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUSubtarget        Subtarget;
   const TargetData    DataLayout;
   SPUInstrInfo        InstrInfo;
-  SPUFrameInfo        FrameInfo;
+  SPUFrameLowering    FrameLowering;
   SPUTargetLowering   TLInfo;
   SPUSelectionDAGInfo TSInfo;
   InstrItineraryData  InstrItins;
@@ -48,8 +48,8 @@ public:
   virtual const SPUInstrInfo     *getInstrInfo() const {
     return &InstrInfo;
   }
-  virtual const SPUFrameInfo     *getFrameInfo() const {
-    return &FrameInfo;
+  virtual const SPUFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
   }
   /*!
     \note Cell SPU does not support JIT today. It could support JIT at some
@@ -75,13 +75,14 @@ public:
     return &DataLayout;
   }
 
-  virtual const InstrItineraryData getInstrItineraryData() const {
-    return InstrItins;
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
   }
   
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM,
                                CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);	
 };
 
 } // end namespace llvm
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index f8182b80c94c..e9375599511c 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -1,3 +1,5 @@
 add_llvm_target(CppBackend
   CPPBackend.cpp
   )
+
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index f08559f6e9f2..71d6049c8a1b 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -358,6 +358,7 @@ std::string CppWriter::getCppName(const Type* Ty) {
     case Type::FloatTyID:    return "Type::getFloatTy(mod->getContext())";
     case Type::DoubleTyID:   return "Type::getDoubleTy(mod->getContext())";
     case Type::LabelTyID:    return "Type::getLabelTy(mod->getContext())";
+    case Type::X86_MMXTyID:  return "Type::getX86_MMXTy(mod->getContext())";
     default:
       error("Invalid primitive type");
       break;
@@ -1563,11 +1564,25 @@ void CppWriter::printFunctionUses(const Function* F) {
         // If the operand references a GVal or Constant, make a note of it
         if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
           gvs.insert(GV);
-          if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
-            if (GVar->hasInitializer())
-              consts.insert(GVar->getInitializer());
-        } else if (Constant* C = dyn_cast<Constant>(operand))
+          if (GenerationType != GenFunction)
+            if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+              if (GVar->hasInitializer())
+                consts.insert(GVar->getInitializer());
+        } else if (Constant* C = dyn_cast<Constant>(operand)) {
           consts.insert(C);
+          for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+            // If the operand references a GVal or Constant, make a note of it
+            Value* operand = C->getOperand(j);
+            printType(operand->getType());
+            if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+              gvs.insert(GV);
+              if (GenerationType != GenFunction)
+                if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+                  if (GVar->hasInitializer())
+                    consts.insert(GVar->getInitializer());
+            }
+          }
+        }
       }
     }
   }
@@ -1590,7 +1605,7 @@ void CppWriter::printFunctionUses(const Function* F) {
       printVariableHead(F);
   }
 
-// Print the constants found
+  // Print the constants found
   nl(Out) << "// Constant Definitions"; nl(Out);
   for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
          E = consts.end(); I != E; ++I) {
@@ -1600,11 +1615,13 @@ void CppWriter::printFunctionUses(const Function* F) {
   // Process the global variables definitions now that all the constants have
   // been emitted. These definitions just couple the gvars with their constant
   // initializers.
-  nl(Out) << "// Global Variable Definitions"; nl(Out);
-  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
-       I != E; ++I) {
-    if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
-      printVariableBody(GV);
+  if (GenerationType != GenFunction) {
+    nl(Out) << "// Global Variable Definitions"; nl(Out);
+    for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+         I != E; ++I) {
+      if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+        printVariableBody(GV);
+    }
   }
 }
 
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..87e7cb5da561
--- /dev/null
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmParser
+  MBlazeAsmLexer.cpp
+  MBlazeAsmParser.cpp
+  )
+
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
new file mode 100644
index 000000000000..190379657f42
--- /dev/null
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -0,0 +1,127 @@
+//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+  
+  class MBlazeBaseAsmLexer : public TargetAsmLexer {
+    const MCAsmInfo &AsmInfo;
+    
+    const AsmToken &lexDefinite() {
+      return getLexer()->Lex();
+    }
+    
+    AsmToken LexTokenUAL();
+  protected:
+    typedef std::map <std::string, unsigned> rmap_ty;
+    
+    rmap_ty RegisterMap;
+    
+    void InitRegisterMap(const TargetRegisterInfo *info) {
+      unsigned numRegs = info->getNumRegs();
+
+      for (unsigned i = 0; i < numRegs; ++i) {
+        const char *regName = info->getName(i);
+        if (regName)
+          RegisterMap[regName] = i;
+      }
+    }
+    
+    unsigned MatchRegisterName(StringRef Name) {
+      rmap_ty::iterator iter = RegisterMap.find(Name.str());
+      if (iter != RegisterMap.end())
+        return iter->second;
+      else
+        return 0;
+    }
+    
+    AsmToken LexToken() {
+      if (!Lexer) {
+        SetError(SMLoc(), "No MCAsmLexer installed");
+        return AsmToken(AsmToken::Error, "", 0);
+      }
+      
+      switch (AsmInfo.getAssemblerDialect()) {
+      default:
+        SetError(SMLoc(), "Unhandled dialect");
+        return AsmToken(AsmToken::Error, "", 0);
+      case 0:
+        return LexTokenUAL();
+      }
+    }
+  public:
+    MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : TargetAsmLexer(T), AsmInfo(MAI) {
+    }
+  };
+  
+  class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
+  public:
+    MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : MBlazeBaseAsmLexer(T, MAI) {
+      std::string tripleString("mblaze-unknown-unknown");
+      std::string featureString;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+}
+
+AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    } else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer() {
+  RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+}
+
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 000000000000..524f33d19335
--- /dev/null
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,568 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeISelLowering.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+  TargetMachine &TM;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  MBlazeOperand *ParseRegister(unsigned &RegNo);
+  MBlazeOperand *ParseImmediate();
+  MBlazeOperand *ParseFsl();
+  MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+  /// }
+
+
+public:
+  MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Register,
+    Memory,
+    Fsl
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNum;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned Base;
+      unsigned OffReg;
+      const MCExpr *Off;
+    } Mem;
+
+    struct {
+      const MCExpr *Val;
+    } FslImm;
+  };
+
+  MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case Register:
+      Reg = o.Reg;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    case Memory:
+      Mem = o.Mem;
+      break;
+    case Fsl:
+      FslImm = o.FslImm;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getFslImm() const {
+    assert(Kind == Fsl && "Invalid access!");
+    return FslImm.Val;
+  }
+
+  unsigned getMemBase() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Base;
+  }
+
+  const MCExpr* getMemOff() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Off;
+  }
+
+  unsigned getMemOffReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.OffReg;
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isMem() const { return Kind == Memory; }
+  bool isFsl() const { return Kind == Fsl; }
+  bool isReg() const { return Kind == Register; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addFslOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getFslImm());
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+    unsigned RegOff = getMemOffReg();
+    if (RegOff)
+      Inst.addOperand(MCOperand::CreateReg(RegOff));
+    else
+      addExpr(Inst, getMemOff());
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void dump(raw_ostream &OS) const;
+
+  static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+    MBlazeOperand *Op = new MBlazeOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Fsl);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+                                  SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Memory);
+    Op->Mem.Base = Base;
+    Op->Mem.Off = Off;
+    Op->Mem.OffReg = 0;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+                                  SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Memory);
+    Op->Mem.Base = Base;
+    Op->Mem.OffReg = Off;
+    Op->Mem.Off = 0;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::dump(raw_ostream &OS) const {
+  switch (Kind) {
+  case Immediate:
+    getImm()->print(OS);
+    break;
+  case Register:
+    OS << "<register R";
+    OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+    break;
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case Memory: {
+    OS << "<memory R";
+    OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+    OS << ", ";
+
+    unsigned RegOff = getMemOffReg();
+    if (RegOff)
+      OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+    else
+      OS << getMemOff();
+    OS << ">";
+    }
+    break;
+  case Fsl:
+    getFslImm()->print(OS);
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  MCInst Inst;
+  SMLoc ErrorLoc;
+  unsigned ErrorInfo;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction use requires an option to be enabled");
+  case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  case Match_InvalidOperand:
+    ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  if (Operands.size() != 4)
+    return 0;
+
+  MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+  MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+  SMLoc S = Base.getStartLoc();
+  SMLoc O = Offset.getStartLoc();
+  SMLoc E = Offset.getEndLoc();
+
+  if (!Base.isReg()) {
+    Error(S, "base address must be a register");
+    return 0;
+  }
+
+  if (!Offset.isReg() && !Offset.isImm()) {
+    Error(O, "offset must be a register or immediate");
+    return 0;
+  }
+
+  MBlazeOperand *Op;
+  if (Offset.isReg())
+    Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+  else
+    Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+  delete Operands.pop_back_val();
+  delete Operands.pop_back_val();
+  Operands.push_back(Op);
+
+  return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+                                    SMLoc &StartLoc, SMLoc &EndLoc) {
+  return (ParseRegister(RegNo) == 0);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::Identifier:
+    RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+    if (RegNo == 0)
+      return 0;
+
+    getLexer().Lex();
+    return MBlazeOperand::CreateReg(RegNo, S, E);
+  }
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+  if (!String.startswith("rfsl"))
+    return -1;
+
+  unsigned regNum;
+  if (String.substr(4).getAsInteger(10,regNum))
+    return -1;
+
+  return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::Identifier:
+    unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+    if (reg >= 16)
+      return 0;
+
+    getLexer().Lex();
+    const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+    return MBlazeOperand::CreateFslImm(EVal,S,E);
+  }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  const MCExpr *EVal;
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Identifier:
+    if (getParser().ParseExpression(EVal))
+      return 0;
+
+    return MBlazeOperand::CreateImm(EVal, S, E);
+  }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  MBlazeOperand *Op;
+
+  // Attempt to parse the next token as a register name
+  unsigned RegNo;
+  Op = ParseRegister(RegNo);
+
+  // Attempt to parse the next token as an FSL immediate
+  if (!Op)
+    Op = ParseFsl();
+
+  // Attempt to parse the next token as an immediate
+  if (!Op)
+    Op = ParseImmediate();
+
+  // If the token could not be parsed then fail
+  if (!Op) {
+    Error(Parser.getTok().getLoc(), "unknown operand");
+    return 0;
+  }
+
+  // Push the parsed operand into the list of operands
+  Operands.push_back(Op);
+  return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The first operands is the token for the instruction name
+  size_t dotLoc = Name.find('.');
+  Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+  if (dotLoc < Name.size())
+    Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+  // If there are no more operands then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse the first operand
+  if (!ParseOperand(Operands))
+    return true;
+
+  while (getLexer().isNot(AsmToken::EndOfStatement) &&
+         getLexer().is(AsmToken::Comma)) {
+    // Consume the comma token
+    getLexer().Lex();
+
+    // Parse the next operand
+    if (!ParseOperand(Operands))
+      return true;
+  }
+
+  // If the instruction requires a memory operand then we need to
+  // replace the last two operands (base+offset) with a single
+  // memory operand.
+  if (Name.startswith("lw") || Name.startswith("sw") ||
+      Name.startswith("lh") || Name.startswith("sh") ||
+      Name.startswith("lb") || Name.startswith("sb"))
+    return (ParseMemory(Operands) == NULL);
+
+  return false;
+}
+
+/// ParseDirective parses the arm specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+  RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+  LLVMInitializeMBlazeAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/lib/Target/MBlaze/AsmParser/Makefile b/lib/Target/MBlaze/AsmParser/Makefile
new file mode 100644
index 000000000000..611a0f473f73
--- /dev/null
+++ b/lib/Target/MBlaze/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmParser
+
+# Hack: we need to include 'main' MBlaze target directory for private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index fac2c1959d7a..000000000000
--- a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMMBlazeAsmPrinter
-  MBlazeAsmPrinter.cpp
-  )
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
deleted file mode 100644
index f4b30ad271f1..000000000000
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mblaze-asm-printer"
-
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
-#include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
-#include "MBlazeMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cctype>
-
-using namespace llvm;
-
-namespace {
-  class MBlazeAsmPrinter : public AsmPrinter {
-    const MBlazeSubtarget *Subtarget;
-  public:
-    explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {
-      Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
-    }
-
-    virtual const char *getPassName() const {
-      return "MBlaze Assembly Printer";
-    }
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                         const char *Modifier = 0);
-    void printSavedRegsBitmask(raw_ostream &OS);
-
-    void emitFrameDirective();
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    void EmitInstruction(const MachineInstr *MI) { 
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    virtual void EmitFunctionBodyStart();
-    virtual void EmitFunctionBodyEnd();
-    static const char *getRegisterName(unsigned RegNo);
-
-    virtual void EmitFunctionEntryLabel();
-  };
-} // end of anonymous namespace
-
-#include "MBlazeGenAsmWriter.inc"
-
-//===----------------------------------------------------------------------===//
-//
-//  MBlaze Asm Directives
-//
-//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
-//  Describe the stack frame.
-//
-//  -- Mask directives "mask  bitmask, offset"
-//  Tells the assembler which registers are saved and where.
-//  bitmask - contain a little endian bitset indicating which registers are
-//            saved on function prologue (e.g. with a 0x80000000 mask, the
-//            assembler knows the register 31 (RA) is saved at prologue.
-//  offset  - the position before stack pointer subtraction indicating where
-//            the first saved register on prologue is located. (e.g. with a
-//
-//  Consider the following function prologue:
-//
-//    .frame  R19,48,R15
-//    .mask   0xc0000000,-8
-//       addiu R1, R1, -48
-//       sw R15, 40(R1)
-//       sw R19, 36(R1)
-//
-//    With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
-//    19 (R19) are saved at prologue. As the save order on prologue is from
-//    left to right, R15 is saved first. A -8 offset means that after the
-//    stack pointer subtration, the first register in the mask (R15) will be
-//    saved at address 48-8=40.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Mask directives
-//===----------------------------------------------------------------------===//
-
-// Print a 32 bit hex number with all numbers.
-static void printHex32(unsigned int Value, raw_ostream &O) {
-  O << "0x";
-  for (int i = 7; i >= 0; i--)
-    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
-}
-
-
-// Create a bitmask with all callee saved registers for CPU or Floating Point
-// registers. For CPU registers consider RA, GP and FP for saving if necessary.
-void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
-  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-  const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
-
-  // CPU Saved Registers Bitmasks
-  unsigned int CPUBitmask = 0;
-
-  // Set the CPU Bitmasks
-  const MachineFrameInfo *MFI = MF->getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
-    if (MBlaze::CPURegsRegisterClass->contains(Reg))
-      CPUBitmask |= (1 << RegNum);
-  }
-
-  // Return Address and Frame registers must also be set in CPUBitmask.
-  if (RI.hasFP(*MF))
-    CPUBitmask |= (1 << MBlazeRegisterInfo::
-                getRegisterNumbering(RI.getFrameRegister(*MF)));
-
-  if (MFI->adjustsStack())
-    CPUBitmask |= (1 << MBlazeRegisterInfo::
-                getRegisterNumbering(RI.getRARegister()));
-
-  // Print CPUBitmask
-  O << "\t.mask \t"; printHex32(CPUBitmask, O);
-  O << ',' << MBlazeFI->getCPUTopSavedRegOff() << '\n';
-}
-
-//===----------------------------------------------------------------------===//
-// Frame and Set directives
-//===----------------------------------------------------------------------===//
-
-/// Frame Directive
-void MBlazeAsmPrinter::emitFrameDirective() {
-  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-
-  unsigned stackReg  = RI.getFrameRegister(*MF);
-  unsigned returnReg = RI.getRARegister();
-  unsigned stackSize = MF->getFrameInfo()->getStackSize();
-
-
-  OutStreamer.EmitRawText("\t.frame\t" + Twine(getRegisterName(stackReg)) +
-                          "," + Twine(stackSize) + "," +
-                          Twine(getRegisterName(returnReg)));
-}
-
-void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
-  OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
-  OutStreamer.EmitLabel(CurrentFnSym);
-}
-
-/// EmitFunctionBodyStart - Targets can override this to emit stuff before
-/// the first basic block in the function.
-void MBlazeAsmPrinter::EmitFunctionBodyStart() {
-  emitFrameDirective();
-  
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  printSavedRegsBitmask(OS);
-  OutStreamer.EmitRawText(OS.str());
-}
-
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
-void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
-  OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
-}
-
-// Print out an operand for an inline asm expression.
-bool MBlazeAsmPrinter::
-PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                    raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    O << getRegisterName(MO.getReg());
-    break;
-
-  case MachineOperand::MO_Immediate:
-    O << (int)MO.getImm();
-    break;
-
-  case MachineOperand::MO_FPImmediate: {
-    const ConstantFP *fp = MO.getFPImm();
-    printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
-    O << ";\t# immediate = " << *fp;
-    break;
-  }
-
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    break;
-
-  case MachineOperand::MO_ExternalSymbol:
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    break;
-
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-    << '_' << MO.getIndex();
-    break;
-
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI"
-      << getFunctionNumber() << "_" << MO.getIndex();
-    if (MO.getOffset())
-      O << "+" << MO.getOffset();
-    break;
-
-  default:
-    llvm_unreachable("<unknown operand type>");
-  }
-}
-
-void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
-                                        raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.isImm())
-    O << (unsigned int)MO.getImm();
-  else
-    printOperand(MI, opNum, O);
-}
-
-void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.isImm())
-    O << "rfsl" << (unsigned int)MO.getImm();
-  else
-    printOperand(MI, opNum, O);
-}
-
-void MBlazeAsmPrinter::
-printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                const char *Modifier) {
-  printOperand(MI, opNum+1, O);
-  O << ", ";
-  printOperand(MI, opNum, O);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeMBlazeAsmPrinter() {
-  RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
-}
diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile
deleted file mode 100644
index c44651cc93bd..000000000000
--- a/lib/Target/MBlaze/AsmPrinter/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMBlazeAsmPrinter
-
-# Hack: we need to include 'main' MBlaze target directory to grab
-# private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 7f85bf82518d..004057ad4ae3 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -5,17 +5,21 @@ tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
 tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
 tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
 tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
 tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
 tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
 tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
 tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
 
 add_llvm_target(MBlazeCodeGen
   MBlazeDelaySlotFiller.cpp
   MBlazeInstrInfo.cpp
   MBlazeISelDAGToDAG.cpp
   MBlazeISelLowering.cpp
+  MBlazeFrameLowering.cpp
   MBlazeMCAsmInfo.cpp
   MBlazeRegisterInfo.cpp
   MBlazeSubtarget.cpp
@@ -23,6 +27,14 @@ add_llvm_target(MBlazeCodeGen
   MBlazeTargetObjectFile.cpp
   MBlazeIntrinsicInfo.cpp
   MBlazeSelectionDAGInfo.cpp
+  MBlazeAsmPrinter.cpp
+  MBlazeAsmBackend.cpp
+  MBlazeMCInstLower.cpp
+  MBlazeELFWriterInfo.cpp
+  MBlazeMCCodeEmitter.cpp
   )
 
-target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..9376e68a35cf
--- /dev/null
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeDisassembler
+  MBlazeDisassembler.cpp
+  )
+
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE MBlazeDisassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+
+add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 000000000000..3379ac216972
--- /dev/null
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,647 @@
+//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeDisassembler.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+#include "MBlazeGenInstrInfo.inc"
+#include "MBlazeGenEDInfo.inc"
+
+using namespace llvm;
+
+const unsigned UNSUPPORTED = -1;
+
+static unsigned mblazeBinary2Opcode[] = {
+  MBlaze::ADD,   MBlaze::RSUB,   MBlaze::ADDC,   MBlaze::RSUBC,   //00,01,02,03
+  MBlaze::ADDK,  MBlaze::RSUBK,  MBlaze::ADDKC,  MBlaze::RSUBKC,  //04,05,06,07
+  MBlaze::ADDI,  MBlaze::RSUBI,  MBlaze::ADDIC,  MBlaze::RSUBIC,  //08,09,0A,0B
+  MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+  MBlaze::MUL,   MBlaze::BSRL,   MBlaze::IDIV,   MBlaze::GETD,    //10,11,12,13
+  UNSUPPORTED,   UNSUPPORTED,    MBlaze::FADD,   UNSUPPORTED,     //14,15,16,17
+  MBlaze::MULI,  MBlaze::BSRLI,  UNSUPPORTED,    MBlaze::GET,     //18,19,1A,1B
+  UNSUPPORTED,   UNSUPPORTED,    UNSUPPORTED,    UNSUPPORTED,     //1C,1D,1E,1F
+
+  MBlaze::OR,    MBlaze::AND,    MBlaze::XOR,    MBlaze::ANDN,    //20,21,22,23
+  MBlaze::SEXT8, MBlaze::MFS,    MBlaze::BR,     MBlaze::BEQ,     //24,25,26,27
+  MBlaze::ORI,   MBlaze::ANDI,   MBlaze::XORI,   MBlaze::ANDNI,   //28,29,2A,2B
+  MBlaze::IMM,   MBlaze::RTSD,   MBlaze::BRI,    MBlaze::BEQI,    //2C,2D,2E,2F
+
+  MBlaze::LBU,   MBlaze::LHU,    MBlaze::LW,     UNSUPPORTED,     //30,31,32,33
+  MBlaze::SB,    MBlaze::SH,     MBlaze::SW,     UNSUPPORTED,     //34,35,36,37
+  MBlaze::LBUI,  MBlaze::LHUI,   MBlaze::LWI,    UNSUPPORTED,     //38,39,3A,3B
+  MBlaze::SBI,   MBlaze::SHI,    MBlaze::SWI,    UNSUPPORTED,     //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+  return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+    int16_t val = (insn & 0xFFFF);
+    return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+    int16_t val = (insn & 0x1F);
+    return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+    return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+    int16_t val = (insn & 0xF);
+    return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default: return UNSUPPORTED;
+    case 0:  return MBlaze::MUL;
+    case 1:  return MBlaze::MULH;
+    case 2:  return MBlaze::MULHSU;
+    case 3:  return MBlaze::MULHU;
+    }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+    switch (insn&0x7FF) {
+    default:   return UNSUPPORTED;
+    case 0x60: return MBlaze::SEXT8;
+    case 0x68: return MBlaze::WIC;
+    case 0x64: return MBlaze::WDC;
+    case 0x66: return MBlaze::WDCC;
+    case 0x74: return MBlaze::WDCF;
+    case 0x61: return MBlaze::SEXT16;
+    case 0x41: return MBlaze::SRL;
+    case 0x21: return MBlaze::SRC;
+    case 0x01: return MBlaze::SRA;
+    }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:    return UNSUPPORTED;
+    case 0x00:  return MBlaze::BEQ;
+    case 0x10:  return MBlaze::BEQD;
+    case 0x05:  return MBlaze::BGE;
+    case 0x15:  return MBlaze::BGED;
+    case 0x04:  return MBlaze::BGT;
+    case 0x14:  return MBlaze::BGTD;
+    case 0x03:  return MBlaze::BLE;
+    case 0x13:  return MBlaze::BLED;
+    case 0x02:  return MBlaze::BLT;
+    case 0x12:  return MBlaze::BLTD;
+    case 0x01:  return MBlaze::BNE;
+    case 0x11:  return MBlaze::BNED;
+    }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:    return UNSUPPORTED;
+    case 0x00:  return MBlaze::BEQI;
+    case 0x10:  return MBlaze::BEQID;
+    case 0x05:  return MBlaze::BGEI;
+    case 0x15:  return MBlaze::BGEID;
+    case 0x04:  return MBlaze::BGTI;
+    case 0x14:  return MBlaze::BGTID;
+    case 0x03:  return MBlaze::BLEI;
+    case 0x13:  return MBlaze::BLEID;
+    case 0x02:  return MBlaze::BLTI;
+    case 0x12:  return MBlaze::BLTID;
+    case 0x01:  return MBlaze::BNEI;
+    case 0x11:  return MBlaze::BNEID;
+    }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+    switch ((insn>>16)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::BR;
+    case 0x08: return MBlaze::BRA;
+    case 0x0C: return MBlaze::BRK;
+    case 0x10: return MBlaze::BRD;
+    case 0x14: return MBlaze::BRLD;
+    case 0x18: return MBlaze::BRAD;
+    case 0x1C: return MBlaze::BRALD;
+    }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+    switch ((insn>>16)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::BRI;
+    case 0x08: return MBlaze::BRAI;
+    case 0x0C: return MBlaze::BRKI;
+    case 0x10: return MBlaze::BRID;
+    case 0x14: return MBlaze::BRLID;
+    case 0x18: return MBlaze::BRAID;
+    case 0x1C: return MBlaze::BRALID;
+    }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x2: return MBlaze::BSLL;
+    case 0x1: return MBlaze::BSRA;
+    case 0x0: return MBlaze::BSRL;
+    }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x2: return MBlaze::BSLLI;
+    case 0x1: return MBlaze::BSRAI;
+    case 0x0: return MBlaze::BSRLI;
+    }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::RSUBK;
+    case 0x1: return MBlaze::CMP;
+    case 0x3: return MBlaze::CMPU;
+    }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::FADD;
+    case 0x080: return MBlaze::FRSUB;
+    case 0x100: return MBlaze::FMUL;
+    case 0x180: return MBlaze::FDIV;
+    case 0x200: return MBlaze::FCMP_UN;
+    case 0x210: return MBlaze::FCMP_LT;
+    case 0x220: return MBlaze::FCMP_EQ;
+    case 0x230: return MBlaze::FCMP_LE;
+    case 0x240: return MBlaze::FCMP_GT;
+    case 0x250: return MBlaze::FCMP_NE;
+    case 0x260: return MBlaze::FCMP_GE;
+    case 0x280: return MBlaze::FLT;
+    case 0x300: return MBlaze::FINT;
+    case 0x380: return MBlaze::FSQRT;
+    }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+    switch ((insn>>10)&0x3F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::GET;
+    case 0x01: return MBlaze::EGET;
+    case 0x02: return MBlaze::AGET;
+    case 0x03: return MBlaze::EAGET;
+    case 0x04: return MBlaze::TGET;
+    case 0x05: return MBlaze::TEGET;
+    case 0x06: return MBlaze::TAGET;
+    case 0x07: return MBlaze::TEAGET;
+    case 0x08: return MBlaze::CGET;
+    case 0x09: return MBlaze::ECGET;
+    case 0x0A: return MBlaze::CAGET;
+    case 0x0B: return MBlaze::ECAGET;
+    case 0x0C: return MBlaze::TCGET;
+    case 0x0D: return MBlaze::TECGET;
+    case 0x0E: return MBlaze::TCAGET;
+    case 0x0F: return MBlaze::TECAGET;
+    case 0x10: return MBlaze::NGET;
+    case 0x11: return MBlaze::NEGET;
+    case 0x12: return MBlaze::NAGET;
+    case 0x13: return MBlaze::NEAGET;
+    case 0x14: return MBlaze::TNGET;
+    case 0x15: return MBlaze::TNEGET;
+    case 0x16: return MBlaze::TNAGET;
+    case 0x17: return MBlaze::TNEAGET;
+    case 0x18: return MBlaze::NCGET;
+    case 0x19: return MBlaze::NECGET;
+    case 0x1A: return MBlaze::NCAGET;
+    case 0x1B: return MBlaze::NECAGET;
+    case 0x1C: return MBlaze::TNCGET;
+    case 0x1D: return MBlaze::TNECGET;
+    case 0x1E: return MBlaze::TNCAGET;
+    case 0x1F: return MBlaze::TNECAGET;
+    case 0x20: return MBlaze::PUT;
+    case 0x22: return MBlaze::APUT;
+    case 0x24: return MBlaze::TPUT;
+    case 0x26: return MBlaze::TAPUT;
+    case 0x28: return MBlaze::CPUT;
+    case 0x2A: return MBlaze::CAPUT;
+    case 0x2C: return MBlaze::TCPUT;
+    case 0x2E: return MBlaze::TCAPUT;
+    case 0x30: return MBlaze::NPUT;
+    case 0x32: return MBlaze::NAPUT;
+    case 0x34: return MBlaze::TNPUT;
+    case 0x36: return MBlaze::TNAPUT;
+    case 0x38: return MBlaze::NCPUT;
+    case 0x3A: return MBlaze::NCAPUT;
+    case 0x3C: return MBlaze::TNCPUT;
+    case 0x3E: return MBlaze::TNCAPUT;
+    }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+    switch ((insn>>5)&0x3F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::GETD;
+    case 0x01: return MBlaze::EGETD;
+    case 0x02: return MBlaze::AGETD;
+    case 0x03: return MBlaze::EAGETD;
+    case 0x04: return MBlaze::TGETD;
+    case 0x05: return MBlaze::TEGETD;
+    case 0x06: return MBlaze::TAGETD;
+    case 0x07: return MBlaze::TEAGETD;
+    case 0x08: return MBlaze::CGETD;
+    case 0x09: return MBlaze::ECGETD;
+    case 0x0A: return MBlaze::CAGETD;
+    case 0x0B: return MBlaze::ECAGETD;
+    case 0x0C: return MBlaze::TCGETD;
+    case 0x0D: return MBlaze::TECGETD;
+    case 0x0E: return MBlaze::TCAGETD;
+    case 0x0F: return MBlaze::TECAGETD;
+    case 0x10: return MBlaze::NGETD;
+    case 0x11: return MBlaze::NEGETD;
+    case 0x12: return MBlaze::NAGETD;
+    case 0x13: return MBlaze::NEAGETD;
+    case 0x14: return MBlaze::TNGETD;
+    case 0x15: return MBlaze::TNEGETD;
+    case 0x16: return MBlaze::TNAGETD;
+    case 0x17: return MBlaze::TNEAGETD;
+    case 0x18: return MBlaze::NCGETD;
+    case 0x19: return MBlaze::NECGETD;
+    case 0x1A: return MBlaze::NCAGETD;
+    case 0x1B: return MBlaze::NECAGETD;
+    case 0x1C: return MBlaze::TNCGETD;
+    case 0x1D: return MBlaze::TNECGETD;
+    case 0x1E: return MBlaze::TNCAGETD;
+    case 0x1F: return MBlaze::TNECAGETD;
+    case 0x20: return MBlaze::PUTD;
+    case 0x22: return MBlaze::APUTD;
+    case 0x24: return MBlaze::TPUTD;
+    case 0x26: return MBlaze::TAPUTD;
+    case 0x28: return MBlaze::CPUTD;
+    case 0x2A: return MBlaze::CAPUTD;
+    case 0x2C: return MBlaze::TCPUTD;
+    case 0x2E: return MBlaze::TCAPUTD;
+    case 0x30: return MBlaze::NPUTD;
+    case 0x32: return MBlaze::NAPUTD;
+    case 0x34: return MBlaze::TNPUTD;
+    case 0x36: return MBlaze::TNAPUTD;
+    case 0x38: return MBlaze::NCPUTD;
+    case 0x3A: return MBlaze::NCAPUTD;
+    case 0x3C: return MBlaze::TNCPUTD;
+    case 0x3E: return MBlaze::TNCAPUTD;
+    }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+    switch (insn&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::IDIV;
+    case 0x2: return MBlaze::IDIVU;
+    }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LBU;
+    case 0x1: return MBlaze::LBUR;
+    }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LHU;
+    case 0x1: return MBlaze::LHUR;
+    }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LW;
+    case 0x1: return MBlaze::LWR;
+    case 0x2: return MBlaze::LWX;
+    }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SB;
+    case 0x1: return MBlaze::SBR;
+    }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SH;
+    case 0x1: return MBlaze::SHR;
+    }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SW;
+    case 0x1: return MBlaze::SWR;
+    case 0x2: return MBlaze::SWX;
+    }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+    switch ((insn>>15)&0x1) {
+    default:   return UNSUPPORTED;
+    case 0x0:
+      switch ((insn>>16)&0x1) {
+      default:   return UNSUPPORTED;
+      case 0x0: return MBlaze::MSRSET;
+      case 0x1: return MBlaze::MSRCLR;
+      }
+    case 0x1:
+      switch ((insn>>14)&0x1) {
+      default:   return UNSUPPORTED;
+      case 0x0: return MBlaze::MFS;
+      case 0x1: return MBlaze::MTS;
+      }
+    }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::OR;
+    case 0x400: return MBlaze::PCMPBF;
+    }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::XOR;
+    case 0x400: return MBlaze::PCMPEQ;
+    }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::ANDN;
+    case 0x400: return MBlaze::PCMPNE;
+    }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x10: return MBlaze::RTSD;
+    case 0x11: return MBlaze::RTID;
+    case 0x12: return MBlaze::RTBD;
+    case 0x14: return MBlaze::RTED;
+    }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+  unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+  switch (opcode) {
+  case MBlaze::MUL:     return decodeMUL(insn);
+  case MBlaze::SEXT8:   return decodeSEXT(insn);
+  case MBlaze::BEQ:     return decodeBEQ(insn);
+  case MBlaze::BEQI:    return decodeBEQI(insn);
+  case MBlaze::BR:      return decodeBR(insn);
+  case MBlaze::BRI:     return decodeBRI(insn);
+  case MBlaze::BSRL:    return decodeBSRL(insn);
+  case MBlaze::BSRLI:   return decodeBSRLI(insn);
+  case MBlaze::RSUBK:   return decodeRSUBK(insn);
+  case MBlaze::FADD:    return decodeFADD(insn);
+  case MBlaze::GET:     return decodeGET(insn);
+  case MBlaze::GETD:    return decodeGETD(insn);
+  case MBlaze::IDIV:    return decodeIDIV(insn);
+  case MBlaze::LBU:     return decodeLBU(insn);
+  case MBlaze::LHU:     return decodeLHU(insn);
+  case MBlaze::LW:      return decodeLW(insn);
+  case MBlaze::SB:      return decodeSB(insn);
+  case MBlaze::SH:      return decodeSH(insn);
+  case MBlaze::SW:      return decodeSW(insn);
+  case MBlaze::MFS:     return decodeMFS(insn);
+  case MBlaze::OR:      return decodeOR(insn);
+  case MBlaze::XOR:     return decodeXOR(insn);
+  case MBlaze::ANDN:    return decodeANDN(insn);
+  case MBlaze::RTSD:    return decodeRTSD(insn);
+  default:              return opcode;
+  }
+}
+
+EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+  return instInfoMBlaze;
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool MBlazeDisassembler::getInstruction(MCInst &instr,
+                                        uint64_t &size,
+                                        const MemoryObject &region,
+                                        uint64_t address,
+                                        raw_ostream &vStream) const {
+  // The machine instruction.
+  uint32_t insn;
+  uint8_t bytes[4];
+
+  // We always consume 4 bytes of data
+  size = 4;
+
+  // We want to read exactly 4 bytes of data.
+  if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1)
+    return false;
+
+  // Encoded as a big-endian 32-bit word in the stream.
+  insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+  // Get the MCInst opcode from the binary instruction and make sure
+  // that it is a valid instruction.
+  unsigned opcode = getOPCODE(insn);
+  if (opcode == UNSUPPORTED)
+    return false;
+
+  instr.setOpcode(opcode);
+
+  uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+  switch ((tsFlags & MBlazeII::FormMask)) {
+  default: llvm_unreachable("unknown instruction encoding");
+
+  case MBlazeII::FRRRR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+
+  case MBlazeII::FRRR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FRI:
+    switch (opcode) {
+    default: llvm_unreachable("unknown instruction encoding");
+    case MBlaze::MFS:
+      instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+      instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+      break;
+    case MBlaze::MTS:
+      instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+      instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+      break;
+    case MBlaze::MSRSET:
+    case MBlaze::MSRCLR:
+      instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+      instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+      break;
+    }
+    break;
+
+  case MBlazeII::FRRI:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    switch (opcode) {
+    default:
+      instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+      break;
+    case MBlaze::BSRLI:
+    case MBlaze::BSRAI:
+    case MBlaze::BSLLI:
+      instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+      break;
+    }
+    break;
+
+  case MBlazeII::FCRR:
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FCRI:
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FRCR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FRCI:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FCCR:
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FCCI:
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FRRCI:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+    break;
+
+  case MBlazeII::FRRC:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+
+  case MBlazeII::FRCX:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FRCS:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+    break;
+
+  case MBlazeII::FCRCS:
+    instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+
+  case MBlazeII::FCRCX:
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FCX:
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FCR:
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FRIR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+  }
+
+  return true;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T) {
+  return new MBlazeDisassembler;
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+                                         createMBlazeDisassembler);
+}
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 000000000000..d05eced0bacf
--- /dev/null
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,55 @@
+//===- MBlazeDisassembler.h - Disassembler for MicroBlaze  ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+  
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  MBlazeDisassembler() :
+    MCDisassembler() {
+  }
+
+  ~MBlazeDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+};
+
+} // namespace llvm
+  
+#endif
diff --git a/lib/Target/MBlaze/Disassembler/Makefile b/lib/Target/MBlaze/Disassembler/Makefile
new file mode 100644
index 000000000000..0530b3286bc4
--- /dev/null
+++ b/lib/Target/MBlaze/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDisassembler
+
+# Hack: we need to include 'main' MBlaze target directory to grab headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..242a573036e6
--- /dev/null
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+    MBlazeInstPrinter.cpp
+  )
+
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 000000000000..a7fd287990b7
--- /dev/null
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,69 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlaze.h"
+#include "MBlazeInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << (int32_t)Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  if (MO.isImm())
+    O << "rfsl" << MO.getImm();
+  else
+    printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  if (MO.isImm())
+    O << (uint32_t)MO.getImm();
+  else
+    printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+                                        raw_ostream &O, const char *Modifier) {
+  printOperand(MI, OpNo, O, NULL);
+  O << ", ";
+  printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 000000000000..bebc6c83d544
--- /dev/null
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class MBlazeInstPrinter : public MCInstPrinter {
+  public:
+    MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
+    }
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+    static const char *getInstructionName(unsigned Opcode);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = 0);
+    void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+    void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+    void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+                         const char *Modifier = 0);
+  };
+}
+
+#endif
diff --git a/lib/Target/MBlaze/InstPrinter/Makefile b/lib/Target/MBlaze/InstPrinter/Makefile
new file mode 100644
index 000000000000..9fb6e869d945
--- /dev/null
+++ b/lib/Target/MBlaze/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+#       private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
index f9d828b26616..00c73f06fe10 100644
--- a/lib/Target/MBlaze/MBlaze.h
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -21,8 +21,16 @@ namespace llvm {
   class MBlazeTargetMachine;
   class FunctionPass;
   class MachineCodeEmitter;
+  class MCCodeEmitter;
+  class TargetAsmBackend;
   class formatted_raw_ostream;
 
+  MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &,
+                                           TargetMachine &TM,
+                                           MCContext &Ctx);
+
+  TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
+
   FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
   FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
 
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 3815b6d0a398..1fa1e4dd5776 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -32,35 +32,35 @@ def MBlazeInstrInfo : InstrInfo;
 //===----------------------------------------------------------------------===//
 
 def FeaturePipe3       : SubtargetFeature<"pipe3", "HasPipe3", "true",
-                                "Implements 3-stage pipeline.">;
+                                "Implements 3-stage pipeline">;
 def FeatureBarrel      : SubtargetFeature<"barrel", "HasBarrel", "true",
-                                "Implements barrel shifter.">;
+                                "Implements barrel shifter">;
 def FeatureDiv         : SubtargetFeature<"div", "HasDiv", "true",
-                                "Implements hardware divider.">;
+                                "Implements hardware divider">;
 def FeatureMul         : SubtargetFeature<"mul", "HasMul", "true",
-                                "Implements hardware multiplier.">;
+                                "Implements hardware multiplier">;
 def FeatureFSL         : SubtargetFeature<"fsl", "HasFSL", "true",
-                                "Implements FSL instructions.">;
+                                "Implements FSL instructions">;
 def FeatureEFSL        : SubtargetFeature<"efsl", "HasEFSL", "true",
-                                "Implements extended FSL instructions.">;
+                                "Implements extended FSL instructions">;
 def FeatureMSRSet      : SubtargetFeature<"msrset", "HasMSRSet", "true",
-                                "Implements MSR register set and clear.">;
+                                "Implements MSR register set and clear">;
 def FeatureException   : SubtargetFeature<"exception", "HasException", "true",
-                                "Implements hardware exception support.">;
+                                "Implements hardware exception support">;
 def FeaturePatCmp      : SubtargetFeature<"patcmp", "HasPatCmp", "true",
-                                "Implements pattern compare instruction.">;
+                                "Implements pattern compare instruction">;
 def FeatureFPU         : SubtargetFeature<"fpu", "HasFPU", "true",
-                                "Implements floating point unit.">;
+                                "Implements floating point unit">;
 def FeatureESR         : SubtargetFeature<"esr", "HasESR", "true",
                                 "Implements ESR and EAR registers">;
 def FeaturePVR         : SubtargetFeature<"pvr", "HasPVR", "true",
-                                "Implements processor version register.">;
+                                "Implements processor version register">;
 def FeatureMul64       : SubtargetFeature<"mul64", "HasMul64", "true",
                                 "Implements multiplier with 64-bit result">;
 def FeatureSqrt        : SubtargetFeature<"sqrt", "HasSqrt", "true",
-                                "Implements sqrt and floating point convert.">;
+                                "Implements sqrt and floating point convert">;
 def FeatureMMU         : SubtargetFeature<"mmu", "HasMMU", "true",
-                                "Implements memory management unit.">;
+                                "Implements memory management unit">;
 
 //===----------------------------------------------------------------------===//
 // MBlaze processors supported.
@@ -69,13 +69,26 @@ def FeatureMMU         : SubtargetFeature<"mmu", "HasMMU", "true",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, MBlazeGenericItineraries, Features>;
 
-
 def : Proc<"v400", []>;
 def : Proc<"v500", []>;
 def : Proc<"v600", []>;
 def : Proc<"v700", []>;
 def : Proc<"v710", []>;
 
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
 def MBlaze : Target {
   let InstructionSet = MBlazeInstrInfo;
+  let AssemblyWriters = [MBlazeAsmWriter];
 }
diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MBlazeAsmBackend.cpp
new file mode 100644
index 000000000000..a4b21afa599e
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -0,0 +1,163 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "MBlaze.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case FK_Data_1: return 1;
+  case FK_PCRel_2:
+  case FK_Data_2: return 2;
+  case FK_PCRel_4:
+  case FK_Data_4: return 4;
+  case FK_Data_8: return 8;
+  }
+}
+
+
+namespace {
+class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  MBlazeELFObjectWriter(Triple::OSType OSType)
+    : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
+                              /*HasRelocationAddend*/ true) {}
+};
+
+class MBlazeAsmBackend : public TargetAsmBackend {
+public:
+  MBlazeAsmBackend(const Target &T)
+    : TargetAsmBackend() {
+  }
+
+  unsigned getNumFixupKinds() const {
+    return 2;
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  unsigned getPointerSize() const {
+    return 4;
+  }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+    switch (Op) {
+    default:            return Op;
+    case MBlaze::ADDIK: return MBlaze::ADDIK32;
+    case MBlaze::ORI:   return MBlaze::ORI32;
+    case MBlaze::BRLID: return MBlaze::BRLID32;
+    }
+}
+
+bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+    return false;
+
+  bool hasExprOrImm = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+    hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+  return hasExprOrImm;
+}
+
+void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  Res = Inst;
+  Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if ((Count % 4) != 0)
+    return false;
+
+  for (uint64_t i = 0; i < Count; i += 4)
+      OW->Write32(0x00000000);
+
+  return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
+    : MBlazeAsmBackend(T), OSType(_OSType) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
+                                 /*IsLittleEndian*/ false);
+  }
+};
+
+void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  unsigned Size = getFixupKindSize(Fixup.getKind());
+
+  assert(Fixup.getOffset() + Size <= DataSize &&
+         "Invalid fixup offset!");
+
+  char *data = Data + Fixup.getOffset();
+  switch (Size) {
+  default: llvm_unreachable("Cannot fixup unknown value.");
+  case 1:  llvm_unreachable("Cannot fixup 1 byte value.");
+  case 8:  llvm_unreachable("Cannot fixup 8 byte value.");
+
+  case 4:
+    *(data+7) = uint8_t(Value);
+    *(data+6) = uint8_t(Value >> 8);
+    *(data+3) = uint8_t(Value >> 16);
+    *(data+2) = uint8_t(Value >> 24);
+    break;
+
+  case 2:
+    *(data+3) = uint8_t(Value >> 0);
+    *(data+2) = uint8_t(Value >> 8);
+  }
+}
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    assert(0 && "Mac not supported on MBlaze");
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    assert(0 && "Windows not supported on MBlaze");
+  default:
+    return new ELFMBlazeAsmBackend(T, Triple(TT).getOS());
+  }
+}
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
new file mode 100644
index 000000000000..0016df569b93
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -0,0 +1,335 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeMCInstLower.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+  class MBlazeAsmPrinter : public AsmPrinter {
+    const MBlazeSubtarget *Subtarget;
+  public:
+    explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Assembly Printer";
+    }
+
+    void printSavedRegsBitmask();
+    void emitFrameDirective();
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
+    virtual void EmitFunctionEntryLabel();
+
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+      const;
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                         const char *Modifier = 0);
+
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+// #include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+//  MBlaze Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "mask  bitmask, offset"
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  R19,48,R15
+//    .mask   0xc0000000,-8
+//       addiu R1, R1, -48
+//       sw R15, 40(R1)
+//       sw R19, 36(R1)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+//    19 (R19) are saved at prologue. As the save order on prologue is from
+//    left to right, R15 is saved first. A -8 offset means that after the
+//    stack pointer subtration, the first register in the mask (R15) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+// Print a 32 bit hex number with all numbers.
+static void printHex32(unsigned int Value, raw_ostream &O) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--)
+    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  // CPU Saved Registers Bitmasks
+  unsigned int CPUBitmask = 0;
+
+  // Set the CPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+    if (MBlaze::GPRRegisterClass->contains(Reg))
+      CPUBitmask |= (1 << RegNum);
+  }
+
+  // Return Address and Frame registers must also be set in CPUBitmask.
+  if (TFI->hasFP(*MF))
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+  if (MFI->adjustsStack())
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getRARegister()));
+
+  // Print CPUBitmask
+  OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
+}
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+  if (!OutStreamer.hasRawTextSupport())
+    return;
+
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+  unsigned stkReg = RI.getFrameRegister(*MF);
+  unsigned retReg = RI.getRARegister();
+  unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+  OutStreamer.EmitRawText("\t.frame\t" +
+                          Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+                          "," + Twine(stkSze) + "," +
+                          Twine(MBlazeInstPrinter::getRegisterName(retReg)));
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  AsmPrinter::EmitFunctionEntryLabel();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+  if (!OutStreamer.hasRawTextSupport())
+    return;
+
+  emitFrameDirective();
+  printSavedRegsBitmask();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << MBlazeInstPrinter::getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << (int32_t)MO.getImm();
+    break;
+
+  case MachineOperand::MO_FPImmediate: {
+    const ConstantFP *fp = MO.getFPImm();
+    printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
+    O << ";\t# immediate = " << *fp;
+    break;
+  }
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI"
+      << getFunctionNumber() << "_" << MO.getIndex();
+    if (MO.getOffset())
+      O << "+" << MO.getOffset();
+    break;
+
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+                                        raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (uint32_t)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << "rfsl" << (unsigned int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new MBlazeInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+  RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+  TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+                                        createMBlazeMCInstPrinter);
+
+}
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
index 8622e0d74bcd..4962573f96ab 100644
--- a/lib/Target/MBlaze/MBlazeCallingConv.td
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -1,16 +1,16 @@
 //===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for MBlaze architecture.
 //===----------------------------------------------------------------------===//
 
 /// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>: 
+class CCIfSubtarget<string F, CCAction A>:
   CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
 
 //===----------------------------------------------------------------------===//
@@ -19,8 +19,10 @@ class CCIfSubtarget<string F, CCAction A>:
 
 def RetCC_MBlaze : CallingConv<[
   // i32 are returned in registers R3, R4
-  CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
 
-  // f32 are returned in registers F3, F4
-  CCIfType<[f32], CCAssignToReg<[F3, F4]>>
+def CC_MBlaze : CallingConv<[
+  CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+  CCIfType<[i32,f32], CCAssignToStack<4, 4>>
 ]>;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index b551b79b291e..4399ee280098 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Simple pass to fills delay slots with NOPs.
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,11 +20,23 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 STATISTIC(FilledSlots, "Number of delay slots filled");
 
+namespace llvm {
+cl::opt<bool> DisableDelaySlotFiller(
+  "disable-mblaze-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the MBlaze delay slot filter."),
+  cl::Hidden);
+}
+
 namespace {
   struct Filler : public MachineFunctionPass {
 
@@ -31,7 +44,7 @@ namespace {
     const TargetInstrInfo *TII;
 
     static char ID;
-    Filler(TargetMachine &tm) 
+    Filler(TargetMachine &tm)
       : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
 
     virtual const char *getPassName() const {
@@ -51,6 +64,168 @@ namespace {
   char Filler::ID = 0;
 } // end of anonymous namespace
 
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+    // Any instruction with an immediate mode operand greater than
+    // 16-bits requires an implicit IMM instruction.
+    unsigned numOper = candidate->getNumOperands();
+    for (unsigned op = 0; op < numOper; ++op) {
+        MachineOperand &mop = candidate->getOperand(op);
+
+        // The operand requires more than 16-bits to represent.
+        if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+          return true;
+
+        // We must assume that unknown immediate values require more than
+        // 16-bits to represent.
+        if (mop.isGlobal() || mop.isSymbol())
+          return true;
+
+        // FIXME: we could probably check to see if the FP value happens
+        //        to not need an IMM instruction. For now we just always
+        //        assume that FP values do.
+        if (mop.isFPImm())
+          return true;
+    }
+
+    return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+  switch (instr->getOpcode()) {
+  default: return instr->getNumOperands();
+
+  // These instructions have a variable number of operands but the first two
+  // are the "real" operands that we care about during hazard detection.
+  case MBlaze::BRLID:
+  case MBlaze::BRALID:
+  case MBlaze::BRLD:
+  case MBlaze::BRALD:
+    return 2;
+  }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+                           MachineBasicBlock::iterator &slot) {
+  // Hazard check
+  MachineBasicBlock::iterator a = candidate;
+  MachineBasicBlock::iterator b = slot;
+  TargetInstrDesc desc = candidate->getDesc();
+
+  // MBB layout:-
+  //    candidate := a0 = operation(a1, a2)
+  //    ...middle bit...
+  //    slot := b0 = operation(b1, b2)
+
+  // Possible hazards:-/
+  // 1. a1 or a2 was written during the middle bit
+  // 2. a0 was read or written during the middle bit
+  // 3. a0 is one or more of {b0, b1, b2}
+  // 4. b0 is one or more of {a1, a2}
+  // 5. a accesses memory, and the middle bit
+  //    contains a store operation.
+  bool a_is_memory = desc.mayLoad() || desc.mayStore();
+
+  // Determine the number of operands in the slot instruction and in the
+  // candidate instruction.
+  const unsigned aend = getLastRealOperand(a);
+  const unsigned bend = getLastRealOperand(b);
+
+  // Check hazards type 1, 2 and 5 by scanning the middle bit
+  MachineBasicBlock::iterator m = a;
+  for (++m; m != b; ++m) {
+    for (unsigned aop = 0; aop<aend; ++aop) {
+      bool aop_is_reg = a->getOperand(aop).isReg();
+      if (!aop_is_reg) continue;
+
+      bool aop_is_def = a->getOperand(aop).isDef();
+      unsigned aop_reg = a->getOperand(aop).getReg();
+
+      const unsigned mend = getLastRealOperand(m);
+      for (unsigned mop = 0; mop<mend; ++mop) {
+        bool mop_is_reg = m->getOperand(mop).isReg();
+        if (!mop_is_reg) continue;
+
+        bool mop_is_def = m->getOperand(mop).isDef();
+        unsigned mop_reg = m->getOperand(mop).getReg();
+
+        if (aop_is_def && (mop_reg == aop_reg))
+            return true; // Hazard type 2, because aop = a0
+        else if (mop_is_def && (mop_reg == aop_reg))
+            return true; // Hazard type 1, because aop in {a1, a2}
+      }
+    }
+
+    // Check hazard type 5
+    if (a_is_memory && m->getDesc().mayStore())
+      return true;
+  }
+
+  // Check hazard type 3 & 4
+  for (unsigned aop = 0; aop<aend; ++aop) {
+    if (a->getOperand(aop).isReg()) {
+      unsigned aop_reg = a->getOperand(aop).getReg();
+
+      for (unsigned bop = 0; bop<bend; ++bop) {
+        if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+          unsigned bop_reg = b->getOperand(bop).getReg();
+          if (aop_reg == bop_reg)
+            return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator candidate) {
+  if (candidate == MBB.begin())
+    return false;
+
+  TargetInstrDesc brdesc = (--candidate)->getDesc();
+  return (brdesc.hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+  if (!I->hasUnmodeledSideEffects())
+    return false;
+
+  unsigned op = I->getOpcode();
+  if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+      op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+      op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+      op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+      op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+      op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+    return false;
+
+  return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+  MachineBasicBlock::iterator I = slot;
+  while (true) {
+    if (I == MBB.begin())
+      break;
+
+    --I;
+    TargetInstrDesc desc = I->getDesc();
+    if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
+        desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+        hasUnknownSideEffects(I))
+      break;
+
+    if (hasImmInstruction(I) || delayHasHazard(I,slot))
+      continue;
+
+    return I;
+  }
+
+  return MBB.end();
+}
+
 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
 /// Currently, we fill delay slots with NOPs. We assume there is only one
 /// delay slot per delayed instruction.
@@ -58,11 +233,19 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
     if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
-      ++J;
-      BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB,I);
+
       ++FilledSlots;
       Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
     }
   return Changed;
 }
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
new file mode 100644
index 000000000000..3f26ed15b284
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeELFWriterInfo.h"
+#include "MBlazeRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the MBlazeELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+                        TM.getTargetData()->isLittleEndian()) {
+}
+
+MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {}
+
+unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch (MachineRelTy) {
+  case MBlaze::reloc_pcrel_word:
+    return ELF::R_MICROBLAZE_64_PCREL;
+  case MBlaze::reloc_absolute_word:
+    return ELF::R_MICROBLAZE_NONE;
+  default:
+    llvm_unreachable("unknown mblaze machine relocation type");
+  }
+  return 0;
+}
+
+long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32_PCREL:
+    return Modifier - 4;
+  case ELF::R_MICROBLAZE_32:
+    return Modifier;
+  default:
+    llvm_unreachable("unknown mblaze relocation type");
+  }
+  return 0;
+}
+
+unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  // FIXME: Most of these sizes are guesses based on the name
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32:
+  case ELF::R_MICROBLAZE_32_PCREL:
+  case ELF::R_MICROBLAZE_32_PCREL_LO:
+  case ELF::R_MICROBLAZE_32_LO:
+  case ELF::R_MICROBLAZE_SRO32:
+  case ELF::R_MICROBLAZE_SRW32:
+  case ELF::R_MICROBLAZE_32_SYM_OP_SYM:
+  case ELF::R_MICROBLAZE_GOTOFF_32:
+    return 32;
+
+  case ELF::R_MICROBLAZE_64_PCREL:
+  case ELF::R_MICROBLAZE_64:
+  case ELF::R_MICROBLAZE_GOTPC_64:
+  case ELF::R_MICROBLAZE_GOT_64:
+  case ELF::R_MICROBLAZE_PLT_64:
+  case ELF::R_MICROBLAZE_GOTOFF_64:
+    return 64;
+  }
+
+  return 0;
+}
+
+bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  // FIXME: Most of these are guesses based on the name
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32_PCREL:
+  case ELF::R_MICROBLAZE_64_PCREL:
+  case ELF::R_MICROBLAZE_32_PCREL_LO:
+  case ELF::R_MICROBLAZE_GOTPC_64:
+    return true;
+  }
+
+  return false;
+}
+
+unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return MBlaze::reloc_absolute_word;
+}
+
+long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                                unsigned RelOffset,
+                                                unsigned RelTy) const {
+  if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
+    return SymOffset - (RelOffset + 4);
+  else
+    assert("computeRelocation unknown for this relocation type");
+
+  return 0;
+}
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h
new file mode 100644
index 000000000000..63bfc0da745a
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_ELF_WRITER_INFO_H
+#define MBLAZE_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class MBlazeELFWriterInfo : public TargetELFWriterInfo {
+  public:
+    MBlazeELFWriterInfo(TargetMachine &TM);
+    virtual ~MBlazeELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // MBLAZE_ELF_WRITER_INFO_H
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 000000000000..e7639025cf1a
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,450 @@
+//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool> DisableStackAdjust(
+    "disable-mblaze-stack-adjust",
+    cl::init(false),
+    cl::desc("Disable MBlaze stack layout adjustment."),
+    cl::Hidden);
+}
+
+static void replaceFrameIndexes(MachineFunction &MF, 
+                                SmallVector<std::pair<int,int64_t>, 16> &FR) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+  const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+  SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+  for (; FRI != FRE; ++FRI) {
+    MFI->RemoveStackObject(FRI->first);
+    int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+    MBlazeFI->recordReplacement(FRI->first, NFI);
+
+    for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+      MachineBasicBlock::iterator MBB = MB->begin();
+      const MachineBasicBlock::iterator MBE = MB->end();
+
+      for (; MBB != MBE; ++MBB) {
+        MachineInstr::mop_iterator MIB = MBB->operands_begin();
+        const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+        for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+          if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+          DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+          MII->setIndex(NFI);
+        }
+      }
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+  if (DisableStackAdjust) return;
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+  MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+  const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+  SmallVector<MachineInstr*, 16> EraseInstr;
+  SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+  MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+  MachineBasicBlock::iterator MIB = MBB->begin();
+  MachineBasicBlock::iterator MIE = MBB->end();
+
+  int StackAdjust = 0;
+  int StackOffset = -28;
+
+  // In this loop we are searching frame indexes that corrospond to incoming
+  // arguments that are already in the stack. We look for instruction sequences
+  // like the following:
+  //    
+  //    LWI REG, FI1, 0
+  //    ...
+  //    SWI REG, FI2, 0
+  //
+  // As long as there are no defs of REG in the ... part, we can eliminate
+  // the SWI instruction because the value has already been stored to the
+  // stack by the caller. All we need to do is locate FI at the correct
+  // stack location according to the calling convensions.
+  //
+  // Additionally, if the SWI operation kills the def of REG then we don't
+  // need the LWI operation so we can erase it as well.
+  for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+      if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+          I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+      unsigned FIReg = I->getOperand(0).getReg();
+      MachineBasicBlock::iterator SI = I;
+      for (SI++; SI != MIE; ++SI) {
+        if (!SI->getOperand(0).isReg() ||
+            !SI->getOperand(1).isFI() ||
+            SI->getOpcode() != MBlaze::SWI) continue;
+
+        int FI = SI->getOperand(1).getIndex();
+        if (SI->getOperand(0).getReg() != FIReg ||
+            MFI->isFixedObjectIndex(FI) ||
+            MFI->getObjectSize(FI) != 4) continue;
+
+        if (SI->getOperand(0).isDef()) break;
+
+        if (SI->getOperand(0).isKill()) {
+          DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex() 
+                       << " removed\n");
+          EraseInstr.push_back(I);
+        }
+
+        EraseInstr.push_back(SI);
+        DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+        FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+        DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+        StackOffset -= 4;
+        StackAdjust += 4;
+        break;
+      }
+    }
+  }
+
+  // In this loop we are searching for frame indexes that corrospond to
+  // incoming arguments that are in registers. We look for instruction
+  // sequences like the following:
+  //    
+  //    ...  SWI REG, FI, 0
+  // 
+  // As long as the ... part does not define REG and if REG is an incoming
+  // parameter register then we know that, according to ABI convensions, the
+  // caller has allocated stack space for it already.  Instead of allocating
+  // stack space on our frame, we record the correct location in the callers
+  // frame.
+  for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+      if (I->definesRegister(LI->first))
+        break;
+
+      if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+          I->getOperand(1).getIndex() < 0) continue;
+
+      if (I->getOperand(0).getReg() == LI->first) {
+        int FI = I->getOperand(1).getIndex();
+        MBlazeFI->recordLiveIn(FI);
+
+        int FILoc = 0;
+        switch (LI->first) {
+        default: llvm_unreachable("invalid incoming parameter!");
+        case MBlaze::R5:  FILoc = -4; break;
+        case MBlaze::R6:  FILoc = -8; break;
+        case MBlaze::R7:  FILoc = -12; break;
+        case MBlaze::R8:  FILoc = -16; break;
+        case MBlaze::R9:  FILoc = -20; break;
+        case MBlaze::R10: FILoc = -24; break;
+        }
+
+        StackAdjust += 4;
+        FrameRelocate.push_back(std::make_pair(FI,FILoc));
+        DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+        break;
+      }
+    }
+  }
+
+  // Go ahead and erase all of the instructions that we determined were
+  // no longer needed.
+  for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+    MBB->erase(EraseInstr[i]);
+
+  // Replace all of the frame indexes that we have relocated with new
+  // fixed object frame indexes.
+  replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  llvm::CallingConv::ID CallConv = F->getCallingConv();
+
+  // If this function is not using either the interrupt_handler
+  // calling convention or the save_volatiles calling convention
+  // then we don't need to do any additional frame layout.
+  if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
+      CallConv != llvm::CallingConv::MBLAZE_SVOL)
+      return;
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  // Determine if the calling convention is the interrupt_handler
+  // calling convention. Some pieces of the prologue and epilogue
+  // only need to be emitted if we are lowering and interrupt handler.
+  bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Determine where to put prologue and epilogue additions
+  MachineBasicBlock &MENT   = MF.front();
+  MachineBasicBlock &MEXT   = MF.back();
+
+  MachineBasicBlock::iterator MENTI = MENT.begin();
+  MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+  DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+  DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+  // Store the frame indexes generated during prologue additions for use
+  // when we are generating the epilogue additions.
+  SmallVector<int, 10> VFI;
+
+  // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+  // always have a SWI because it is used when processing RMSR.
+  for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+    if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+    
+    int FI = MFI->CreateStackObject(4,4,false,false);
+    VFI.push_back(FI);
+
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+      .addFrameIndex(FI).addImm(0);
+  }
+    
+  // Build the prologue SWI for R17, R18
+  int R17FI = MFI->CreateStackObject(4,4,false,false);
+  int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+    .addFrameIndex(R17FI).addImm(0);
+    
+  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+    .addFrameIndex(R18FI).addImm(0);
+
+  // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+  if (isIntr) {
+    int MSRFI = MFI->CreateStackObject(4,4,false,false);
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+      .addReg(MBlaze::RMSR);
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+      .addFrameIndex(MSRFI).addImm(0);
+
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+      .addFrameIndex(MSRFI).addImm(0);
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+      .addReg(MBlaze::R11);
+  }
+
+  // Build the epilogue LWI for R17, R18
+  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+    .addFrameIndex(R18FI).addImm(0);
+
+  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+    .addFrameIndex(R17FI).addImm(0);
+
+  // Build the epilogue LWI for R3 - R12 if needed
+  for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+    if (!MRI.isPhysRegUsed(r)) continue;
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+      .addFrameIndex(VFI[--i]).addImm(0);
+  }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+  // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+  // the approach done by calculateFrameObjectOffsets to the stack frame.
+  MBlazeFI->adjustLoadArgsFI(MFI);
+  MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+  DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  // unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned AlignMask = TargetAlign - 1;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+  MFI->setStackSize(FrameSize);
+  DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) 
+  const {
+  const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  if (MBlazeFI->hasReplacement(FI))
+    FI = MBlazeFI->getReplacement(FI);
+  return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Determine the correct frame layout
+  determineFrameLayout(MF);
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  // Adjust stack : addi R1, R1, -imm
+  BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(-StackSize);
+
+  // swi  R15, R1, stack_loc
+  if (MFI->adjustsStack() || requiresRA) {
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+        .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+  }
+
+  if (hasFP(MF)) {
+    // swi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+      .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+    // add R19, R1, R0
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+      .addReg(MBlaze::R1).addReg(MBlaze::R0);
+  }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI     = MF.getInfo<MBlazeFunctionInfo>();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Get the FI's where RA and FP are saved.
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  if (hasFP(MF)) {
+    // add R1, R19, R0
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+      .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+    // lwi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+      .addReg(MBlaze::R1).addImm(FPOffset);
+  }
+
+  // lwi R15, R1, stack_loc
+  if (MFI->adjustsStack() || requiresRA) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+      .addReg(MBlaze::R1).addImm(RAOffset);
+  }
+
+  // Get the number of bytes from FrameInfo
+  int StackSize = (int) MFI->getStackSize();
+
+  // addi R1, R1, imm
+  if (StackSize) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(StackSize);
+  }
+}
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  if (MFI->adjustsStack() || requiresRA) {
+    MBlazeFI->setRAStackOffset(0);
+    MFI->CreateFixedObject(4,0,true);
+  }
+
+  if (hasFP(MF)) {
+    MBlazeFI->setFPStackOffset(4);
+    MFI->CreateFixedObject(4,4,true);
+  }
+
+  interruptFrameLayout(MF);
+  analyzeFrameIndexes(MF);
+}
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 000000000000..8be15bfb857d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,53 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+  const MBlazeSubtarget &STI;
+
+public:
+  explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+  }
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                    RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index e64dd0e3e2c3..6b4349766f37 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -81,13 +81,9 @@ private:
   SDNode *getGlobalBaseReg();
   SDNode *Select(SDNode *N);
 
-  // Complex Pattern.
-  bool SelectAddr(SDNode *Op, SDValue N,
-                  SDValue &Base, SDValue &Offset);
-
   // Address Selection
-  bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index);
-  bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base);
+  bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+  bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
 
   // getI32Imm - Return a target constant with the specified value, of type i32.
   inline SDValue getI32Imm(unsigned Imm) {
@@ -122,7 +118,7 @@ static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented with [r+imm].
 bool MBlazeDAGToDAGISel::
-SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
   if (N.getOpcode() == ISD::FrameIndex) return false;
   if (N.getOpcode() == ISD::TargetExternalSymbol ||
       N.getOpcode() == ISD::TargetGlobalAddress)
@@ -137,8 +133,8 @@ SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
         N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
       return false; // jump tables.
 
-    Base = N.getOperand(1);
-    Index = N.getOperand(0);
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
     return true;
   }
 
@@ -149,9 +145,9 @@ SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
 /// a signed 32-bit displacement [r+imm], and if it is not better
 /// represented as reg+reg.
 bool MBlazeDAGToDAGISel::
-SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
   // If this can be more profitably realized as r+r, fail.
-  if (SelectAddrRegReg(Op, N, Disp, Base))
+  if (SelectAddrRegReg(N, Base, Disp))
     return false;
 
   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
@@ -163,7 +159,6 @@ SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
       } else {
         Base = N.getOperand(0);
       }
-      DEBUG( errs() << "WESLEY: Using Operand Immediate\n" );
       return true; // [r+i]
     }
   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
@@ -171,7 +166,6 @@ SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
     uint32_t Imm = CN->getZExtValue();
     Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
     Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
-    DEBUG( errs() << "WESLEY: Using Constant Node\n" );
     return true;
   }
 
@@ -190,76 +184,21 @@ SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-/// ComplexPattern used on MBlazeInstrInfo
-/// Used on MBlaze Load/Store instructions
-bool MBlazeDAGToDAGISel::
-SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
-  // if Address is FI, get the TargetFrameIndex.
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-
-  // on PIC code Load GA
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
-        (Addr.getOpcode() == ISD::TargetConstantPool) ||
-        (Addr.getOpcode() == ISD::TargetJumpTable)){
-      Base   = CurDAG->getRegister(MBlaze::R15, MVT::i32);
-      Offset = Addr;
-      return true;
-    }
-  } else {
-    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
-        Addr.getOpcode() == ISD::TargetGlobalAddress))
-      return false;
-  }
-
-  // Operand is a result from an ADD.
-  if (Addr.getOpcode() == ISD::ADD) {
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      if (isUInt<16>(CN->getZExtValue())) {
-
-        // If the first operand is a FI, get the TargetFI Node
-        if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
-                                    (Addr.getOperand(0))) {
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-        } else {
-          Base = Addr.getOperand(0);
-        }
-
-        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
-        return true;
-      }
-    }
-  }
-
-  Base   = Addr;
-  Offset = CurDAG->getTargetConstant(0, MVT::i32);
-  return true;
-}
-
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
 SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
   DebugLoc dl = Node->getDebugLoc();
 
-  // Dump information about the Node being selected
-  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
-
   // If we have a custom node, we already have selected!
-  if (Node->isMachineOpcode()) {
-    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+  if (Node->isMachineOpcode())
     return NULL;
-  }
 
   ///
   // Instruction Selection not handled by the auto-generated
   // tablegen selection should be handled here.
   ///
-  switch(Opcode) {
+  switch (Opcode) {
     default: break;
 
     // Get target GOT address.
@@ -271,7 +210,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
         int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
         EVT VT = Node->getValueType(0);
         SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
-        unsigned Opc = MBlaze::ADDI;
+        unsigned Opc = MBlaze::ADDIK;
         if (Node->hasOneUse())
           return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
         return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
@@ -289,8 +228,8 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
         SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
         SDValue InFlag(0, 0);
 
-        if ( (isa<GlobalAddressSDNode>(Callee)) ||
-             (isa<ExternalSymbolSDNode>(Callee)) )
+        if ((isa<GlobalAddressSDNode>(Callee)) ||
+            (isa<ExternalSymbolSDNode>(Callee)))
         {
           /// Direct call for global addresses and external symbols
           SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
@@ -309,7 +248,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
 
         // Emit Jump and Link Register
         SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
-                                                 MVT::Flag, R20Reg, Chain);
+                                                 MVT::Glue, R20Reg, Chain);
         Chain  = SDValue(ResNode, 0);
         InFlag = SDValue(ResNode, 1);
         ReplaceUses(SDValue(Node, 0), Chain);
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 1730b689d361..2f40bfc89601 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -35,6 +35,11 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags,
+                                CCState &State);
+
 const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
     case MBlazeISD::JmpLink    : return "MBlazeISD::JmpLink";
@@ -56,9 +61,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   setBooleanContents(ZeroOrOneBooleanContent);
 
   // Set up the register classes
-  addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+  addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
   if (Subtarget->hasFPU()) {
-    addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+    addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
     setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
   }
 
@@ -86,6 +91,10 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
 
+  // Sign extended loads must be expanded
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
   // MBlaze has no REM or DIVREM operations.
   setOperationAction(ISD::UREM,    MVT::i32, Expand);
   setOperationAction(ISD::SREM,    MVT::i32, Expand);
@@ -112,8 +121,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   }
 
   // Expand unsupported conversions
-  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
-  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 
   // Expand SELECT_CC
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
@@ -166,7 +175,6 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   // Use the default for now
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
-  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Expand);
 
   // MBlaze doesn't have extending float->double load/store
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -204,172 +212,353 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
 //===----------------------------------------------------------------------===//
 MachineBasicBlock*
 MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                  MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
-
+                                                  MachineBasicBlock *MBB)
+                                                  const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
+
   case MBlaze::ShiftRL:
   case MBlaze::ShiftRA:
-  case MBlaze::ShiftL: {
-    // To "insert" a shift left instruction, we actually have to insert a
-    // simple loop.  The incoming instruction knows the destination vreg to
-    // set, the source vreg to operate over and the shift amount.
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
-
-    // start:
-    //   andi     samt, samt, 31
-    //   beqid    samt, finish
-    //   add      dst, src, r0
-    // loop:
-    //   addik    samt, samt, -1
-    //   sra      dst, dst
-    //   bneid    samt, loop
-    //   nop
-    // finish:
-    MachineFunction *F = BB->getParent();
-    MachineRegisterInfo &R = F->getRegInfo();
-    MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
-    F->insert(It, loop);
-    F->insert(It, finish);
-
-    // Update machine-CFG edges by transfering adding all successors and
-    // remaining instructions from the current block to the new block which
-    // will contain the Phi node for the select.
-    finish->splice(finish->begin(), BB,
-                   llvm::next(MachineBasicBlock::iterator(MI)),
-                   BB->end());
-    finish->transferSuccessorsAndUpdatePHIs(BB);
-
-    // Add the true and fallthrough blocks as its successors.
-    BB->addSuccessor(loop);
-    BB->addSuccessor(finish);
-
-    // Next, add the finish block as a successor of the loop block
-    loop->addSuccessor(finish);
-    loop->addSuccessor(loop);
-
-    unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
-      .addReg(MI->getOperand(2).getReg())
-      .addImm(31);
-
-    unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
-      .addReg(MI->getOperand(1).getReg())
-      .addImm(0);
-
-    BuildMI(BB, dl, TII->get(MBlaze::BEQID))
-      .addReg(IAMT)
-      .addMBB(finish);
-
-    unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
-      .addReg(IVAL).addMBB(BB)
-      .addReg(NDST).addMBB(loop);
-
-    unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
-    BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
-      .addReg(IAMT).addMBB(BB)
-      .addReg(NAMT).addMBB(loop);
-
-    if (MI->getOpcode() == MBlaze::ShiftL)
-      BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
-    else if (MI->getOpcode() == MBlaze::ShiftRA)
-      BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
-    else if (MI->getOpcode() == MBlaze::ShiftRL)
-      BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
-    else
-        llvm_unreachable( "Cannot lower unknown shift instruction" );
-
-    BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT)
-      .addReg(SAMT)
-      .addImm(-1);
-
-    BuildMI(loop, dl, TII->get(MBlaze::BNEID))
-      .addReg(NAMT)
-      .addMBB(loop);
-
-    BuildMI(*finish, finish->begin(), dl,
-            TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
-      .addReg(IVAL).addMBB(BB)
-      .addReg(NDST).addMBB(loop);
-
-    // The pseudo instruction is no longer needed so remove it
+  case MBlaze::ShiftL:
+    return EmitCustomShift(MI, MBB);
+
+  case MBlaze::Select_FCC:
+  case MBlaze::Select_CC:
+    return EmitCustomSelect(MI, MBB);
+
+  case MBlaze::CAS32:
+  case MBlaze::SWP32:
+  case MBlaze::LAA32:
+  case MBlaze::LAS32:
+  case MBlaze::LAD32:
+  case MBlaze::LAO32:
+  case MBlaze::LAX32:
+  case MBlaze::LAN32:
+    return EmitCustomAtomic(MI, MBB);
+
+  case MBlaze::MEMBARRIER:
+    // The Microblaze does not need memory barriers. Just delete the pseudo
+    // instruction and finish.
     MI->eraseFromParent();
-    return finish;
+    return MBB;
+  }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+                                      MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // To "insert" a shift left instruction, we actually have to insert a
+  // simple loop.  The incoming instruction knows the destination vreg to
+  // set, the source vreg to operate over and the shift amount.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  // start:
+  //   andi     samt, samt, 31
+  //   beqid    samt, finish
+  //   add      dst, src, r0
+  // loop:
+  //   addik    samt, samt, -1
+  //   sra      dst, dst
+  //   bneid    samt, loop
+  //   nop
+  // finish:
+  MachineFunction *F = MBB->getParent();
+  MachineRegisterInfo &R = F->getRegInfo();
+  MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loop);
+  F->insert(It, finish);
+
+  // Update machine-CFG edges by transfering adding all successors and
+  // remaining instructions from the current block to the new block which
+  // will contain the Phi node for the select.
+  finish->splice(finish->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Add the true and fallthrough blocks as its successors.
+  MBB->addSuccessor(loop);
+  MBB->addSuccessor(finish);
+
+  // Next, add the finish block as a successor of the loop block
+  loop->addSuccessor(finish);
+  loop->addSuccessor(loop);
+
+  unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+    .addReg(MI->getOperand(2).getReg())
+    .addImm(31);
+
+  unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(0);
+
+  BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+    .addReg(IAMT)
+    .addMBB(finish);
+
+  unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+    .addReg(IVAL).addMBB(MBB)
+    .addReg(NDST).addMBB(loop);
+
+  unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+    .addReg(IAMT).addMBB(MBB)
+    .addReg(NAMT).addMBB(loop);
+
+  if (MI->getOpcode() == MBlaze::ShiftL)
+    BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+  else if (MI->getOpcode() == MBlaze::ShiftRA)
+    BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+  else if (MI->getOpcode() == MBlaze::ShiftRL)
+    BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+  else
+    llvm_unreachable("Cannot lower unknown shift instruction");
+
+  BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+    .addReg(SAMT)
+    .addImm(-1);
+
+  BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+    .addReg(NAMT)
+    .addMBB(loop);
+
+  BuildMI(*finish, finish->begin(), dl,
+          TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    .addReg(IVAL).addMBB(MBB)
+    .addReg(NDST).addMBB(loop);
+
+  // The pseudo instruction is no longer needed so remove it
+  MI->eraseFromParent();
+  return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  unsigned Opc;
+  switch (MI->getOperand(4).getImm()) {
+  default: llvm_unreachable("Unknown branch condition");
+  case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+  case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+  case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+  case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+  case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+  case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+  }
+
+  F->insert(It, flsBB);
+  F->insert(It, dneBB);
+
+  // Transfer the remainder of MBB and its successor edges to dneBB.
+  dneBB->splice(dneBB->begin(), MBB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                MBB->end());
+  dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  MBB->addSuccessor(flsBB);
+  MBB->addSuccessor(dneBB);
+  flsBB->addSuccessor(dneBB);
+
+  BuildMI(MBB, dl, TII->get(Opc))
+    .addReg(MI->getOperand(3).getReg())
+    .addMBB(dneBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+  //  .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+  //  .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+  BuildMI(*dneBB, dneBB->begin(), dl,
+          TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // All atomic instructions on the Microblaze are implemented using the
+  // load-linked / store-conditional style atomic instruction sequences.
+  // Thus, all operations will look something like the following:
+  // 
+  //  start:
+  //    lwx     RV, RP, 0
+  //    <do stuff>
+  //    swx     RV, RP, 0
+  //    addic   RC, R0, 0
+  //    bneid   RC, start
+  //
+  //  exit:
+  //
+  // To "insert" a shift left instruction, we actually have to insert a
+  // simple loop.  The incoming instruction knows the destination vreg to
+  // set, the source vreg to operate over and the shift amount.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  // start:
+  //   andi     samt, samt, 31
+  //   beqid    samt, finish
+  //   add      dst, src, r0
+  // loop:
+  //   addik    samt, samt, -1
+  //   sra      dst, dst
+  //   bneid    samt, loop
+  //   nop
+  // finish:
+  MachineFunction *F = MBB->getParent();
+  MachineRegisterInfo &R = F->getRegInfo();
+
+  // Create the start and exit basic blocks for the atomic operation
+  MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, start);
+  F->insert(It, exit);
+
+  // Update machine-CFG edges by transfering adding all successors and
+  // remaining instructions from the current block to the new block which
+  // will contain the Phi node for the select.
+  exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+               MBB->end());
+  exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Add the fallthrough block as its successors.
+  MBB->addSuccessor(start);
+
+  BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MBlaze::R0);
+
+  MachineBasicBlock *final = start;
+  unsigned finalReg = 0;
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+  case MBlaze::SWP32:
+    finalReg = MI->getOperand(2).getReg();
+    start->addSuccessor(exit);
+    start->addSuccessor(start);
+    break;
+
+  case MBlaze::LAN32:
+  case MBlaze::LAX32:
+  case MBlaze::LAO32:
+  case MBlaze::LAD32:
+  case MBlaze::LAS32:
+  case MBlaze::LAA32: {
+    unsigned opcode = 0;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Cannot lower unknown atomic load!");
+    case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+    case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+    case MBlaze::LAD32: opcode = MBlaze::AND; break;
+    case MBlaze::LAO32: opcode = MBlaze::OR; break;
+    case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+    case MBlaze::LAN32: opcode = MBlaze::AND; break;
     }
 
-  case MBlaze::Select_FCC:
-  case MBlaze::Select_CC: {
-    // To "insert" a SELECT_CC instruction, we actually have to insert the
-    // diamond control-flow pattern.  The incoming instruction knows the
-    // destination vreg to set, the condition code register to branch on, the
-    // true/false values to select between, and a branch opcode to use.
-    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
-
-    //  thisMBB:
-    //  ...
-    //   TrueVal = ...
-    //   setcc r1, r2, r3
-    //   bNE   r1, r0, copy1MBB
-    //   fallthrough --> copy0MBB
-    MachineFunction *F = BB->getParent();
-    MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
-
-    unsigned Opc;
-    switch (MI->getOperand(4).getImm()) {
-    default: llvm_unreachable( "Unknown branch condition" );
-    case MBlazeCC::EQ: Opc = MBlaze::BNEID; break;
-    case MBlazeCC::NE: Opc = MBlaze::BEQID; break;
-    case MBlazeCC::GT: Opc = MBlaze::BLEID; break;
-    case MBlazeCC::LT: Opc = MBlaze::BGEID; break;
-    case MBlazeCC::GE: Opc = MBlaze::BLTID; break;
-    case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
+    finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    start->addSuccessor(exit);
+    start->addSuccessor(start);
+
+    BuildMI(start, dl, TII->get(opcode), finalReg)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg());
+
+    if (MI->getOpcode() == MBlaze::LAN32) {
+      unsigned tmp = finalReg;
+      finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+      BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+        .addReg(tmp)
+        .addImm(-1);
     }
+    break;
+  }
+
+  case MBlaze::CAS32: {
+    finalReg = MI->getOperand(3).getReg();
+    final = F->CreateMachineBasicBlock(LLVM_BB);
+
+    F->insert(It, final);
+    start->addSuccessor(exit);
+    start->addSuccessor(final);
+    final->addSuccessor(exit);
+    final->addSuccessor(start);
+
+    unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg());
 
-    F->insert(It, flsBB);
-    F->insert(It, dneBB);
-
-    // Transfer the remainder of BB and its successor edges to dneBB.
-    dneBB->splice(dneBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
-    dneBB->transferSuccessorsAndUpdatePHIs(BB);
-
-    BB->addSuccessor(flsBB);
-    BB->addSuccessor(dneBB);
-    flsBB->addSuccessor(dneBB);
-
-    BuildMI(BB, dl, TII->get(Opc))
-      .addReg(MI->getOperand(3).getReg())
-      .addMBB(dneBB);
-
-    //  sinkMBB:
-    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-    //  ...
-    //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
-    //  .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
-    //  .addReg(MI->getOperand(2).getReg()).addMBB(BB);
-
-    BuildMI(*dneBB, dneBB->begin(), dl,
-            TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
-      .addReg(MI->getOperand(1).getReg()).addMBB(BB);
-
-    MI->eraseFromParent();   // The pseudo instruction is gone now.
-    return dneBB;
+    BuildMI(start, dl, TII->get(MBlaze::BNEID))
+      .addReg(CMP)
+      .addMBB(exit);
+
+    final->moveAfter(start);
+    exit->moveAfter(final);
+    break;
   }
   }
+
+  unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(final, dl, TII->get(MBlaze::SWX))
+    .addReg(finalReg)
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MBlaze::R0);
+
+  BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+    .addReg(MBlaze::R0)
+    .addImm(0);
+
+  BuildMI(final, dl, TII->get(MBlaze::BNEID))
+    .addReg(CHK)
+    .addMBB(start);
+
+  // The pseudo instruction is no longer needed so remove it
+  MI->eraseFromParent();
+  return exit;
 }
 
 //===----------------------------------------------------------------------===//
@@ -392,9 +581,9 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
     CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
                     .getValue(1);
   } else {
-    llvm_unreachable( "Cannot lower select_cc with unknown type" );
+    llvm_unreachable("Cannot lower select_cc with unknown type");
   }
- 
+
   return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
                      CompareFlag);
 }
@@ -421,15 +610,12 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   SDValue HiPart;
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
 
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
 
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
-  //return JTI;
 }
 
 SDValue MBlazeTargetLowering::
@@ -440,7 +626,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
-                                         N->getOffset(), MBlazeII::MO_ABS_HILO);
+                                         N->getOffset(), 0);
   return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
 }
 
@@ -456,7 +642,8 @@ SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
                       false, false, 0);
 }
 
@@ -466,52 +653,24 @@ SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
 
 #include "MBlazeGenCallingConv.inc"
 
-static bool CC_MBlaze2(unsigned ValNo, EVT ValVT,
-                       EVT LocVT, CCValAssign::LocInfo LocInfo,
-                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
-  static const unsigned RegsSize=6;
-  static const unsigned IntRegs[] = {
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags,
+                                CCState &State) {
+  static const unsigned ArgRegs[] = {
     MBlaze::R5, MBlaze::R6, MBlaze::R7,
     MBlaze::R8, MBlaze::R9, MBlaze::R10
   };
 
-  static const unsigned FltRegs[] = {
-    MBlaze::F5, MBlaze::F6, MBlaze::F7,
-    MBlaze::F8, MBlaze::F9, MBlaze::F10
-  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+  if (!Reg) return false;
 
-  unsigned Reg=0;
-
-  // Promote i8 and i16
-  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
-    LocVT = MVT::i32;
-    if (ArgFlags.isSExt())
-      LocInfo = CCValAssign::SExt;
-    else if (ArgFlags.isZExt())
-      LocInfo = CCValAssign::ZExt;
-    else
-      LocInfo = CCValAssign::AExt;
-  }
-
-  if (ValVT == MVT::i32) {
-    Reg = State.AllocateReg(IntRegs, RegsSize);
-    LocVT = MVT::i32;
-  } else if (ValVT == MVT::f32) {
-    Reg = State.AllocateReg(FltRegs, RegsSize);
-    LocVT = MVT::f32;
-  }
+  unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+  State.AllocateStack(SizeInBytes, SizeInBytes);
+  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 
-  if (!Reg) {
-    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
-    unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
-  } else {
-    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
-    State.AllocateStack(SizeInBytes, SizeInBytes);
-    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  }
-
-  return false; // CC must always match
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
@@ -532,31 +691,35 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
   // MBlaze does not yet support tail call optimization
   isTailCall = false;
 
+  // The MBlaze requires stack slots for arguments passed to var arg
+  // functions even if they are passed in registers.
+  bool needsRegArgSlots = isVarArg;
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
                  *DAG.getContext());
-  CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2);
+  CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // Variable argument function calls require a minimum of 24-bytes of stack
+  if (isVarArg && NumBytes < 24) NumBytes = 24;
+
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
 
-  // First/LastArgStackLoc contains the first/last
-  // "at stack" argument location.
-  int LastArgStackLoc = 0;
-  unsigned FirstStackArgLoc = 0;
-
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-    EVT RegVT = VA.getLocVT();
+    MVT RegVT = VA.getLocVT();
     SDValue Arg = OutVals[i];
 
     // Promote the value if needed.
@@ -582,20 +745,31 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
       // Register can't get to this point...
       assert(VA.isMemLoc());
 
+      // Since we are alread passing values on the stack we don't
+      // need to worry about creating additional slots for the
+      // values passed via registers.
+      needsRegArgSlots = false;
+
       // Create the frame index object for this incoming parameter
-      LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
-      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
-                                      LastArgStackLoc, true);
+      unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+      unsigned StackLoc = VA.getLocMemOffset() + 4;
+      int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
 
       SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
 
       // emit ISD::STORE whichs stores the
       // parameter value to a stack Location
-      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
                                          false, false, 0));
     }
   }
 
+  // If we need to reserve stack space for the arguments passed via registers
+  // then create a fixed stack object at the beginning of the stack.
+  if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+    MFI->CreateFixedObject(28,0,true);
+
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
   if (!MemOpChains.empty())
@@ -616,19 +790,18 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
-  unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                getPointerTy(), 0, OpFlag);
+                                getPointerTy(), 0, 0);
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
-                                getPointerTy(), OpFlag);
+                                getPointerTy(), 0);
 
   // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
   //             = Chain, Callee, Reg#1, Reg#2, ...
   //
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
@@ -678,7 +851,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
     InVals.push_back(Chain.getValue(0));
-  } 
+  }
 
   return Chain;
 }
@@ -713,30 +886,28 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  ArgLocs, *DAG.getContext());
 
-  CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2);
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
   SDValue StackPtr;
 
-  unsigned FirstStackArgLoc = 0;
-
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
 
     // Arguments stored on registers
     if (VA.isRegLoc()) {
-      EVT RegVT = VA.getLocVT();
+      MVT RegVT = VA.getLocVT();
       ArgRegEnd = VA.getLocReg();
       TargetRegisterClass *RC = 0;
 
       if (RegVT == MVT::i32)
-        RC = MBlaze::CPURegsRegisterClass;
+        RC = MBlaze::GPRRegisterClass;
       else if (RegVT == MVT::f32)
-        RC = MBlaze::FGR32RegisterClass;
+        RC = MBlaze::GPRRegisterClass;
       else
         llvm_unreachable("RegVT not supported by LowerFormalArguments");
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
@@ -756,9 +927,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
       }
 
       InVals.push_back(ArgValue);
-
     } else { // VA.isRegLoc()
-
       // sanity check
       assert(VA.isMemLoc());
 
@@ -774,41 +943,44 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
       // offset on PEI::calculateFrameObjectOffsets.
       // Arguments are always 32-bit.
       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      unsigned StackLoc = VA.getLocMemOffset() + 4;
       int FI = MFI->CreateFixedObject(ArgSize, 0, true);
-      MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
-        (FirstStackArgLoc + VA.getLocMemOffset())));
+      MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+      MBlazeFI->recordLiveIn(FI);
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
 
   // To meet ABI, when VARARGS are passed on registers, the registers
   // must have their values written to the caller stack frame. If the last
-  // argument was placed in the stack, there's no need to save any register. 
+  // argument was placed in the stack, there's no need to save any register.
   if ((isVarArg) && ArgRegEnd) {
     if (StackPtr.getNode() == 0)
       StackPtr = DAG.getRegister(StackReg, getPointerTy());
 
     // The last register argument that must be saved is MBlaze::R10
-    TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass;
+    TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
 
     unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
     unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
     unsigned End   = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
-    unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin);
+    unsigned StackLoc = Start - Begin + 1;
 
     for (; Start <= End; ++Start, ++StackLoc) {
       unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
-      unsigned LiveReg = MF.addLiveIn(Reg, RC);
+      unsigned LiveReg = MF.addLiveIn(Reg, RC, dl);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
 
       int FI = MFI->CreateFixedObject(4, 0, true);
-      MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
+      MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0));
 
       // Record the frame index of the first variable argument
@@ -818,7 +990,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     }
   }
 
-  // All stores are grouped in one node to allow the matching between 
+  // All stores are grouped in one node to allow the matching between
   // the size of Ins and InVals. This only happens when on varg functions
   if (!OutChains.empty()) {
     OutChains.push_back(Chain);
@@ -872,13 +1044,18 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     Flag = Chain.getValue(1);
   }
 
-  // Return on MBlaze is always a "rtsd R15, 8"
+  // If this function is using the interrupt_handler calling convention
+  // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+  unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet 
+                                                              : MBlazeISD::Ret;
+  unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14 
+                                                              : MBlaze::R15;
+  SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+
   if (Flag.getNode())
-    return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
-                       Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag);
-  else // Return Void
-    return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
-                       Chain, DAG.getRegister(MBlaze::R15, MVT::i32));
+    return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+
+  return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
 }
 
 //===----------------------------------------------------------------------===//
@@ -909,6 +1086,37 @@ getConstraintType(const std::string &Constraint) const
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'y':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
 /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
 /// return a list of registers that can be used to satisfy the constraint.
 /// This should only be used for C_RegisterClass constraints.
@@ -917,10 +1125,10 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
-      return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+      return std::make_pair(0U, MBlaze::GPRRegisterClass);
     case 'f':
       if (VT == MVT::f32)
-        return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+        return std::make_pair(0U, MBlaze::GPRRegisterClass);
     }
   }
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
@@ -940,6 +1148,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
     // GCC MBlaze Constraint Letters
     case 'd':
     case 'y':
+    case 'f':
       return make_vector<unsigned>(
         MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
         MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
@@ -947,15 +1156,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
         MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
         MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
         MBlaze::R30, MBlaze::R31, 0);
-
-    case 'f':
-      return make_vector<unsigned>(
-        MBlaze::F3,  MBlaze::F4,  MBlaze::F5,  MBlaze::F6,
-        MBlaze::F7,  MBlaze::F9,  MBlaze::F10, MBlaze::F11,
-        MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
-        MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
-        MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
-        MBlaze::F30, MBlaze::F31, 0);
   }
   return std::vector<unsigned>();
 }
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 5ec2563c555c..91649bc6db08 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,6 +15,7 @@
 #ifndef MBlazeISELLOWERING_H
 #define MBlazeISELLOWERING_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
 #include "MBlaze.h"
@@ -31,6 +32,30 @@ namespace llvm {
       GE,
       LE
     };
+
+    inline static CC getOppositeCondition(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return NE;
+      case NE: return EQ;
+      case GT: return LE;
+      case LT: return GE;
+      case GE: return LT;
+      case LE: return GE;
+      }
+    }
+
+    inline static const char *MBlazeCCToString(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return "eq";
+      case NE: return "ne";
+      case GT: return "gt";
+      case LT: return "lt";
+      case GE: return "ge";
+      case LE: return "le";
+      }
+    }
   }
 
   namespace MBlazeISD {
@@ -53,8 +78,11 @@ namespace llvm {
       // Integer Compare
       ICmp,
 
-      // Return
-      Ret
+      // Return from subroutine
+      Ret,
+
+      // Return from interrupt
+      IRet
     };
   }
 
@@ -121,6 +149,15 @@ namespace llvm {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
 
+    virtual MachineBasicBlock*
+      EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock*
+      EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock*
+            EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
     virtual MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
@@ -128,6 +165,11 @@ namespace llvm {
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::pair<unsigned, const TargetRegisterClass*>
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index 657b1d4940a7..094de5c0c1a8 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -19,72 +19,72 @@
 // Memory Access Instructions
 //===----------------------------------------------------------------------===//
 class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
-             TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr),
+             TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>;
+                [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
 
 class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-              TAI<op, (outs FGR32:$dst), (ins memri:$addr),
-                  !strconcat(instr_asm, "   $dst, $addr"),
-                  [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>;
+              TB<op, (outs GPR:$dst), (ins memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
 
 class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
-              TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr),
+              TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
                  !strconcat(instr_asm, "   $dst, $addr"),
-                 [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>;
+                 [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>;
 
 class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-               TAI<op, (outs), (ins FGR32:$dst, memrr:$addr),
-                   !strconcat(instr_asm, "   $dst, $addr"),
-                   [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>;
+               TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+                  !strconcat(instr_asm, "   $dst, $addr"),
+                  [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>;
 
 class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
              InstrItinClass itin> :
-             TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
                [], itin>;
 
 class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
              InstrItinClass itin> :
-             TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
-                !strconcat(instr_asm, "   $dst, $c, $b"),
-                [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
-
-class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
-              InstrItinClass itin> :
-              TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b),
-                 !strconcat(instr_asm, "   $dst, $b"),
-                 [], itin>;
-
-class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
-              InstrItinClass itin> :
-              TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b),
-                 !strconcat(instr_asm, "   $dst, $b"),
-                 [], itin>;
-
-class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
-              InstrItinClass itin> :
-              TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b),
-                 !strconcat(instr_asm, "   $dst, $b"),
-                 [], itin>;
+             TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                 !strconcat(instr_asm, "   $dst, $c, $b"),
+                 [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class LogicF<bits<6> op, string instr_asm> :
-             TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [],
-                 IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
 
 class LogicFI<bits<6> op, string instr_asm> :
-             TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [],
-                 IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
+
+let rb=0 in {
+  class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+}
 
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
@@ -94,24 +94,25 @@ class LogicFI<bits<6> op, string instr_asm> :
 // FPU Arithmetic Instructions
 //===----------------------------------------------------------------------===//
 let Predicates=[HasFPU] in {
-  def FOR    :  LogicF<0x28, "or     ">;
   def FORI   : LogicFI<0x28, "ori    ">;
   def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIAlu>;
   def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIAlu>;
   def FMUL   :  ArithF<0x16, 0x100, "fmul   ", fmul, IIAlu>;
   def FDIV   :  ArithF<0x16, 0x180, "fdiv   ", fdiv, IIAlu>;
+}
 
-  def LWF    :   LoadFM<0x32, "lw     ", load>;
-  def LWFI   :  LoadFMI<0x32, "lwi    ", load>;
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+  def LWF    :   LoadFM<0x32, "lw      ", load>;
+  def LWFI   :  LoadFMI<0x3A, "lwi     ", load>;
 
-  def SWF    :  StoreFM<0x32, "sw     ", store>;
-  def SWFI   : StoreFMI<0x32, "swi    ", store>;
+  def SWF    :  StoreFM<0x36, "sw      ", store>;
+  def SWFI   : StoreFMI<0x3E, "swi     ", store>;
 }
 
 let Predicates=[HasFPU,HasSqrt] in {
   def FLT    : ArithIF<0x16, 0x280, "flt    ", IIAlu>;
   def FINT   : ArithFI<0x16, 0x300, "fint   ", IIAlu>;
-  def FSQRT  : ArithF2<0x16, 0x300, "fsqrt  ", IIAlu>;
+  def FSQRT  : ArithF2<0x16, 0x380, "fsqrt  ", IIAlu>;
 }
 
 let isAsCheapAsAMove = 1 in {
@@ -126,98 +127,98 @@ let isAsCheapAsAMove = 1 in {
 
 
 let usesCustomInserter = 1 in {
-  def Select_FCC : MBlazePseudo<(outs FGR32:$dst),
-    (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC),
+  def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
     "; SELECT_FCC PSEUDO!",
     []>;
 }
 
 // Floating point conversions
 let Predicates=[HasFPU] in {
-  def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>;
-  def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>;
-  def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>;
+  def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+  def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+  def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
 }
 
 // SET_CC operations
 let Predicates=[HasFPU] in {
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_EQ FGR32:$L, FGR32:$R), 1)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (XOR (FCMP_UN FGR32:$L, FGR32:$R),
-                            (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_NE FGR32:$L, FGR32:$R), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_GT FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_LT FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_GE FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
-                           (FCMP_LE FGR32:$L, FGR32:$R)), 2)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETO),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_UN FGR32:$L, FGR32:$R), 1)>;
-  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO),
-            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
-                       (FCMP_UN FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (XOR (FCMP_UN GPR:$L, GPR:$R),
+                            (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_NE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 2)>;
 }
 
 // SELECT operations
-def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F),
-          (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>;
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+          (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
 
 //===----------------------------------------------------------------------===//
 // Patterns for Floating Point Instructions
 //===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>;
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index 51584111e666..32098452416b 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -10,144 +10,220 @@
 //===----------------------------------------------------------------------===//
 // FSL Instruction Formats
 //===----------------------------------------------------------------------===//
-class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
-              TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b),
-                 !strconcat(instr_asm, " $dst, $b"),
-                 [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>;
-
-class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> :
-             TAI<op, (outs CPURegs:$dst), (ins fslimm:$b),
-                 !strconcat(instr_asm, " $dst, $b"),
-                 [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>;
-
-class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
-              TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b),
-                 !strconcat(instr_asm, " $v, $b"),
-                 [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>;
-
-class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> :
-             TAI<op, (outs), (ins CPURegs:$v, fslimm:$b),
-                 !strconcat(instr_asm, " $v, $b"),
-                 [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>;
-
-class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
-               TA<op, flags, (outs), (ins CPURegs:$b),
-                  !strconcat(instr_asm, " $b"),
-                  [(OpNode CPURegs:$b)], IIAlu>;
-
-class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> :
-              TAI<op, (outs), (ins fslimm:$b),
-                  !strconcat(instr_asm, " $b"),
-                  [(OpNode immZExt4:$b)], IIAlu>;
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+                        !strconcat(instr_asm, " $dst, $b"),
+                        [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu>
+{
+    bits<5> rd;
+    bits<4> fslno;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x0;
+    let Inst{17-21} = flags; // NCTAE
+    let Inst{22-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+                         !strconcat(instr_asm, " $dst, $b"),
+                         [(set GPR:$dst, (OpNode GPR:$b))], IIAlu>
+{
+    bits<5> rd;
+    bits<5> rb;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x0;
+    let Inst{22-26} = flags; // NCTAE
+    let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+                        !strconcat(instr_asm, " $v, $b"),
+                        [(OpNode GPR:$v, immZExt4:$b)], IIAlu>
+{
+    bits<5> ra;
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+                         !strconcat(instr_asm, " $v, $b"),
+                         [(OpNode GPR:$v, GPR:$b)], IIAlu>
+{
+    bits<5> ra;
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+                         !strconcat(instr_asm, " $b"),
+                         [(OpNode immZExt4:$b)], IIAlu>
+{
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+               MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+                          !strconcat(instr_asm, " $b"),
+                          [(OpNode GPR:$b)], IIAlu>
+{
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
 
 //===----------------------------------------------------------------------===//
 // FSL Get Instructions
 //===----------------------------------------------------------------------===//
-def GET      : FSLGet<0x1B, "get      ", int_mblaze_fsl_get>;
-def AGET     : FSLGet<0x1B, "aget     ", int_mblaze_fsl_aget>;
-def CGET     : FSLGet<0x1B, "cget     ", int_mblaze_fsl_cget>;
-def CAGET    : FSLGet<0x1B, "caget    ", int_mblaze_fsl_caget>;
-def EGET     : FSLGet<0x1B, "eget     ", int_mblaze_fsl_eget>;
-def EAGET    : FSLGet<0x1B, "eaget    ", int_mblaze_fsl_eaget>;
-def ECGET    : FSLGet<0x1B, "ecget    ", int_mblaze_fsl_ecget>;
-def ECAGET   : FSLGet<0x1B, "ecaget   ", int_mblaze_fsl_ecaget>;
-def NGET     : FSLGet<0x1B, "nget     ", int_mblaze_fsl_nget>;
-def NAGET    : FSLGet<0x1B, "naget    ", int_mblaze_fsl_naget>;
-def NCGET    : FSLGet<0x1B, "ncget    ", int_mblaze_fsl_ncget>;
-def NCAGET   : FSLGet<0x1B, "ncaget   ", int_mblaze_fsl_ncaget>;
-def NEGET    : FSLGet<0x1B, "neget    ", int_mblaze_fsl_neget>;
-def NEAGET   : FSLGet<0x1B, "neaget   ", int_mblaze_fsl_neaget>;
-def NECGET   : FSLGet<0x1B, "necget   ", int_mblaze_fsl_necget>;
-def NECAGET  : FSLGet<0x1B, "necaget  ", int_mblaze_fsl_necaget>;
-def TGET     : FSLGet<0x1B, "tget     ", int_mblaze_fsl_tget>;
-def TAGET    : FSLGet<0x1B, "taget    ", int_mblaze_fsl_taget>;
-def TCGET    : FSLGet<0x1B, "tcget    ", int_mblaze_fsl_tcget>;
-def TCAGET   : FSLGet<0x1B, "tcaget   ", int_mblaze_fsl_tcaget>;
-def TEGET    : FSLGet<0x1B, "teget    ", int_mblaze_fsl_teget>;
-def TEAGET   : FSLGet<0x1B, "teaget   ", int_mblaze_fsl_teaget>;
-def TECGET   : FSLGet<0x1B, "tecget   ", int_mblaze_fsl_tecget>;
-def TECAGET  : FSLGet<0x1B, "tecaget  ", int_mblaze_fsl_tecaget>;
-def TNGET    : FSLGet<0x1B, "tnget    ", int_mblaze_fsl_tnget>;
-def TNAGET   : FSLGet<0x1B, "tnaget   ", int_mblaze_fsl_tnaget>;
-def TNCGET   : FSLGet<0x1B, "tncget   ", int_mblaze_fsl_tncget>;
-def TNCAGET  : FSLGet<0x1B, "tncaget  ", int_mblaze_fsl_tncaget>;
-def TNEGET   : FSLGet<0x1B, "tneget   ", int_mblaze_fsl_tneget>;
-def TNEAGET  : FSLGet<0x1B, "tneaget  ", int_mblaze_fsl_tneaget>;
-def TNECGET  : FSLGet<0x1B, "tnecget  ", int_mblaze_fsl_tnecget>;
-def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>;
+def GET      : FSLGet<0x1B, 0x00, "get      ", int_mblaze_fsl_get>;
+def AGET     : FSLGet<0x1B, 0x02, "aget     ", int_mblaze_fsl_aget>;
+def CGET     : FSLGet<0x1B, 0x08, "cget     ", int_mblaze_fsl_cget>;
+def CAGET    : FSLGet<0x1B, 0x0A, "caget    ", int_mblaze_fsl_caget>;
+def EGET     : FSLGet<0x1B, 0x01, "eget     ", int_mblaze_fsl_eget>;
+def EAGET    : FSLGet<0x1B, 0x03, "eaget    ", int_mblaze_fsl_eaget>;
+def ECGET    : FSLGet<0x1B, 0x09, "ecget    ", int_mblaze_fsl_ecget>;
+def ECAGET   : FSLGet<0x1B, 0x0B, "ecaget   ", int_mblaze_fsl_ecaget>;
+def TGET     : FSLGet<0x1B, 0x04, "tget     ", int_mblaze_fsl_tget>;
+def TAGET    : FSLGet<0x1B, 0x06, "taget    ", int_mblaze_fsl_taget>;
+def TCGET    : FSLGet<0x1B, 0x0C, "tcget    ", int_mblaze_fsl_tcget>;
+def TCAGET   : FSLGet<0x1B, 0x0E, "tcaget   ", int_mblaze_fsl_tcaget>;
+def TEGET    : FSLGet<0x1B, 0x05, "teget    ", int_mblaze_fsl_teget>;
+def TEAGET   : FSLGet<0x1B, 0x07, "teaget   ", int_mblaze_fsl_teaget>;
+def TECGET   : FSLGet<0x1B, 0x0D, "tecget   ", int_mblaze_fsl_tecget>;
+def TECAGET  : FSLGet<0x1B, 0x0F, "tecaget  ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+  def NGET     : FSLGet<0x1B, 0x10, "nget     ", int_mblaze_fsl_nget>;
+  def NAGET    : FSLGet<0x1B, 0x12, "naget    ", int_mblaze_fsl_naget>;
+  def NCGET    : FSLGet<0x1B, 0x18, "ncget    ", int_mblaze_fsl_ncget>;
+  def NCAGET   : FSLGet<0x1B, 0x1A, "ncaget   ", int_mblaze_fsl_ncaget>;
+  def NEGET    : FSLGet<0x1B, 0x11, "neget    ", int_mblaze_fsl_neget>;
+  def NEAGET   : FSLGet<0x1B, 0x13, "neaget   ", int_mblaze_fsl_neaget>;
+  def NECGET   : FSLGet<0x1B, 0x19, "necget   ", int_mblaze_fsl_necget>;
+  def NECAGET  : FSLGet<0x1B, 0x1B, "necaget  ", int_mblaze_fsl_necaget>;
+  def TNGET    : FSLGet<0x1B, 0x14, "tnget    ", int_mblaze_fsl_tnget>;
+  def TNAGET   : FSLGet<0x1B, 0x16, "tnaget   ", int_mblaze_fsl_tnaget>;
+  def TNCGET   : FSLGet<0x1B, 0x1C, "tncget   ", int_mblaze_fsl_tncget>;
+  def TNCAGET  : FSLGet<0x1B, 0x1E, "tncaget  ", int_mblaze_fsl_tncaget>;
+  def TNEGET   : FSLGet<0x1B, 0x15, "tneget   ", int_mblaze_fsl_tneget>;
+  def TNEAGET  : FSLGet<0x1B, 0x17, "tneaget  ", int_mblaze_fsl_tneaget>;
+  def TNECGET  : FSLGet<0x1B, 0x1D, "tnecget  ", int_mblaze_fsl_tnecget>;
+  def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
 
 //===----------------------------------------------------------------------===//
 // FSL Dynamic Get Instructions
 //===----------------------------------------------------------------------===//
-def GETD      : FSLGetD<0x1B, 0x00, "getd     ", int_mblaze_fsl_get>;
-def AGETD     : FSLGetD<0x1B, 0x00, "agetd    ", int_mblaze_fsl_aget>;
-def CGETD     : FSLGetD<0x1B, 0x00, "cgetd    ", int_mblaze_fsl_cget>;
-def CAGETD    : FSLGetD<0x1B, 0x00, "cagetd   ", int_mblaze_fsl_caget>;
-def EGETD     : FSLGetD<0x1B, 0x00, "egetd    ", int_mblaze_fsl_eget>;
-def EAGETD    : FSLGetD<0x1B, 0x00, "eagetd   ", int_mblaze_fsl_eaget>;
-def ECGETD    : FSLGetD<0x1B, 0x00, "ecgetd   ", int_mblaze_fsl_ecget>;
-def ECAGETD   : FSLGetD<0x1B, 0x00, "ecagetd  ", int_mblaze_fsl_ecaget>;
-def NGETD     : FSLGetD<0x1B, 0x00, "ngetd    ", int_mblaze_fsl_nget>;
-def NAGETD    : FSLGetD<0x1B, 0x00, "nagetd   ", int_mblaze_fsl_naget>;
-def NCGETD    : FSLGetD<0x1B, 0x00, "ncgetd   ", int_mblaze_fsl_ncget>;
-def NCAGETD   : FSLGetD<0x1B, 0x00, "ncagetd  ", int_mblaze_fsl_ncaget>;
-def NEGETD    : FSLGetD<0x1B, 0x00, "negetd   ", int_mblaze_fsl_neget>;
-def NEAGETD   : FSLGetD<0x1B, 0x00, "neagetd  ", int_mblaze_fsl_neaget>;
-def NECGETD   : FSLGetD<0x1B, 0x00, "necgetd  ", int_mblaze_fsl_necget>;
-def NECAGETD  : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>;
-def TGETD     : FSLGetD<0x1B, 0x00, "tgetd    ", int_mblaze_fsl_tget>;
-def TAGETD    : FSLGetD<0x1B, 0x00, "tagetd   ", int_mblaze_fsl_taget>;
-def TCGETD    : FSLGetD<0x1B, 0x00, "tcgetd   ", int_mblaze_fsl_tcget>;
-def TCAGETD   : FSLGetD<0x1B, 0x00, "tcagetd  ", int_mblaze_fsl_tcaget>;
-def TEGETD    : FSLGetD<0x1B, 0x00, "tegetd   ", int_mblaze_fsl_teget>;
-def TEAGETD   : FSLGetD<0x1B, 0x00, "teagetd  ", int_mblaze_fsl_teaget>;
-def TECGETD   : FSLGetD<0x1B, 0x00, "tecgetd  ", int_mblaze_fsl_tecget>;
-def TECAGETD  : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>;
-def TNGETD    : FSLGetD<0x1B, 0x00, "tngetd   ", int_mblaze_fsl_tnget>;
-def TNAGETD   : FSLGetD<0x1B, 0x00, "tnagetd  ", int_mblaze_fsl_tnaget>;
-def TNCGETD   : FSLGetD<0x1B, 0x00, "tncgetd  ", int_mblaze_fsl_tncget>;
-def TNCAGETD  : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>;
-def TNEGETD   : FSLGetD<0x1B, 0x00, "tnegetd  ", int_mblaze_fsl_tneget>;
-def TNEAGETD  : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>;
-def TNECGETD  : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>;
-def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>;
+def GETD      : FSLGetD<0x13, 0x00, "getd     ", int_mblaze_fsl_get>;
+def AGETD     : FSLGetD<0x13, 0x02, "agetd    ", int_mblaze_fsl_aget>;
+def CGETD     : FSLGetD<0x13, 0x08, "cgetd    ", int_mblaze_fsl_cget>;
+def CAGETD    : FSLGetD<0x13, 0x0A, "cagetd   ", int_mblaze_fsl_caget>;
+def EGETD     : FSLGetD<0x13, 0x01, "egetd    ", int_mblaze_fsl_eget>;
+def EAGETD    : FSLGetD<0x13, 0x03, "eagetd   ", int_mblaze_fsl_eaget>;
+def ECGETD    : FSLGetD<0x13, 0x09, "ecgetd   ", int_mblaze_fsl_ecget>;
+def ECAGETD   : FSLGetD<0x13, 0x0B, "ecagetd  ", int_mblaze_fsl_ecaget>;
+def TGETD     : FSLGetD<0x13, 0x04, "tgetd    ", int_mblaze_fsl_tget>;
+def TAGETD    : FSLGetD<0x13, 0x06, "tagetd   ", int_mblaze_fsl_taget>;
+def TCGETD    : FSLGetD<0x13, 0x0C, "tcgetd   ", int_mblaze_fsl_tcget>;
+def TCAGETD   : FSLGetD<0x13, 0x0E, "tcagetd  ", int_mblaze_fsl_tcaget>;
+def TEGETD    : FSLGetD<0x13, 0x05, "tegetd   ", int_mblaze_fsl_teget>;
+def TEAGETD   : FSLGetD<0x13, 0x07, "teagetd  ", int_mblaze_fsl_teaget>;
+def TECGETD   : FSLGetD<0x13, 0x0D, "tecgetd  ", int_mblaze_fsl_tecget>;
+def TECAGETD  : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+  def NGETD     : FSLGetD<0x13, 0x10, "ngetd    ", int_mblaze_fsl_nget>;
+  def NAGETD    : FSLGetD<0x13, 0x12, "nagetd   ", int_mblaze_fsl_naget>;
+  def NCGETD    : FSLGetD<0x13, 0x18, "ncgetd   ", int_mblaze_fsl_ncget>;
+  def NCAGETD   : FSLGetD<0x13, 0x1A, "ncagetd  ", int_mblaze_fsl_ncaget>;
+  def NEGETD    : FSLGetD<0x13, 0x11, "negetd   ", int_mblaze_fsl_neget>;
+  def NEAGETD   : FSLGetD<0x13, 0x13, "neagetd  ", int_mblaze_fsl_neaget>;
+  def NECGETD   : FSLGetD<0x13, 0x19, "necgetd  ", int_mblaze_fsl_necget>;
+  def NECAGETD  : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+  def TNGETD    : FSLGetD<0x13, 0x14, "tngetd   ", int_mblaze_fsl_tnget>;
+  def TNAGETD   : FSLGetD<0x13, 0x16, "tnagetd  ", int_mblaze_fsl_tnaget>;
+  def TNCGETD   : FSLGetD<0x13, 0x1C, "tncgetd  ", int_mblaze_fsl_tncget>;
+  def TNCAGETD  : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+  def TNEGETD   : FSLGetD<0x13, 0x15, "tnegetd  ", int_mblaze_fsl_tneget>;
+  def TNEAGETD  : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+  def TNECGETD  : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+  def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
 
 //===----------------------------------------------------------------------===//
 // FSL Put Instructions
 //===----------------------------------------------------------------------===//
-def PUT     :  FSLPut<0x1B, "put      ", int_mblaze_fsl_put>;
-def APUT    :  FSLPut<0x1B, "aput     ", int_mblaze_fsl_aput>;
-def CPUT    :  FSLPut<0x1B, "cput     ", int_mblaze_fsl_cput>;
-def CAPUT   :  FSLPut<0x1B, "caput    ", int_mblaze_fsl_caput>;
-def NPUT    :  FSLPut<0x1B, "nput     ", int_mblaze_fsl_nput>;
-def NAPUT   :  FSLPut<0x1B, "naput    ", int_mblaze_fsl_naput>;
-def NCPUT   :  FSLPut<0x1B, "ncput    ", int_mblaze_fsl_ncput>;
-def NCAPUT  :  FSLPut<0x1B, "ncaput   ", int_mblaze_fsl_ncaput>;
-def TPUT    : FSLPutT<0x1B, "tput     ", int_mblaze_fsl_tput>;
-def TAPUT   : FSLPutT<0x1B, "taput    ", int_mblaze_fsl_taput>;
-def TCPUT   : FSLPutT<0x1B, "tcput    ", int_mblaze_fsl_tcput>;
-def TCAPUT  : FSLPutT<0x1B, "tcaput   ", int_mblaze_fsl_tcaput>;
-def TNPUT   : FSLPutT<0x1B, "tnput    ", int_mblaze_fsl_tnput>;
-def TNAPUT  : FSLPutT<0x1B, "tnaput   ", int_mblaze_fsl_tnaput>;
-def TNCPUT  : FSLPutT<0x1B, "tncput   ", int_mblaze_fsl_tncput>;
-def TNCAPUT : FSLPutT<0x1B, "tncaput  ", int_mblaze_fsl_tncaput>;
+def PUT     :  FSLPut<0x1B, 0x0, "put      ", int_mblaze_fsl_put>;
+def APUT    :  FSLPut<0x1B, 0x1, "aput     ", int_mblaze_fsl_aput>;
+def CPUT    :  FSLPut<0x1B, 0x4, "cput     ", int_mblaze_fsl_cput>;
+def CAPUT   :  FSLPut<0x1B, 0x5, "caput    ", int_mblaze_fsl_caput>;
+def TPUT    : FSLPutT<0x1B, 0x2, "tput     ", int_mblaze_fsl_tput>;
+def TAPUT   : FSLPutT<0x1B, 0x3, "taput    ", int_mblaze_fsl_taput>;
+def TCPUT   : FSLPutT<0x1B, 0x6, "tcput    ", int_mblaze_fsl_tcput>;
+def TCAPUT  : FSLPutT<0x1B, 0x7, "tcaput   ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+  def NPUT    :  FSLPut<0x1B, 0x8, "nput     ", int_mblaze_fsl_nput>;
+  def NAPUT   :  FSLPut<0x1B, 0x9, "naput    ", int_mblaze_fsl_naput>;
+  def NCPUT   :  FSLPut<0x1B, 0xC, "ncput    ", int_mblaze_fsl_ncput>;
+  def NCAPUT  :  FSLPut<0x1B, 0xD, "ncaput   ", int_mblaze_fsl_ncaput>;
+  def TNPUT   : FSLPutT<0x1B, 0xA, "tnput    ", int_mblaze_fsl_tnput>;
+  def TNAPUT  : FSLPutT<0x1B, 0xB, "tnaput   ", int_mblaze_fsl_tnaput>;
+  def TNCPUT  : FSLPutT<0x1B, 0xE, "tncput   ", int_mblaze_fsl_tncput>;
+  def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput  ", int_mblaze_fsl_tncaput>;
+}
 
 //===----------------------------------------------------------------------===//
 // FSL Dynamic Put Instructions
 //===----------------------------------------------------------------------===//
-def PUTD     :  FSLPutD<0x1B, 0x00, "putd     ", int_mblaze_fsl_put>;
-def APUTD    :  FSLPutD<0x1B, 0x00, "aputd    ", int_mblaze_fsl_aput>;
-def CPUTD    :  FSLPutD<0x1B, 0x00, "cputd    ", int_mblaze_fsl_cput>;
-def CAPUTD   :  FSLPutD<0x1B, 0x00, "caputd   ", int_mblaze_fsl_caput>;
-def NPUTD    :  FSLPutD<0x1B, 0x00, "nputd    ", int_mblaze_fsl_nput>;
-def NAPUTD   :  FSLPutD<0x1B, 0x00, "naputd   ", int_mblaze_fsl_naput>;
-def NCPUTD   :  FSLPutD<0x1B, 0x00, "ncputd   ", int_mblaze_fsl_ncput>;
-def NCAPUTD  :  FSLPutD<0x1B, 0x00, "ncaputd  ", int_mblaze_fsl_ncaput>;
-def TPUTD    : FSLPutTD<0x1B, 0x00, "tputd    ", int_mblaze_fsl_tput>;
-def TAPUTD   : FSLPutTD<0x1B, 0x00, "taputd   ", int_mblaze_fsl_taput>;
-def TCPUTD   : FSLPutTD<0x1B, 0x00, "tcputd   ", int_mblaze_fsl_tcput>;
-def TCAPUTD  : FSLPutTD<0x1B, 0x00, "tcaputd  ", int_mblaze_fsl_tcaput>;
-def TNPUTD   : FSLPutTD<0x1B, 0x00, "tnputd   ", int_mblaze_fsl_tnput>;
-def TNAPUTD  : FSLPutTD<0x1B, 0x00, "tnaputd  ", int_mblaze_fsl_tnaput>;
-def TNCPUTD  : FSLPutTD<0x1B, 0x00, "tncputd  ", int_mblaze_fsl_tncput>;
-def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>;
+def PUTD     :  FSLPutD<0x13, 0x0, "putd     ", int_mblaze_fsl_put>;
+def APUTD    :  FSLPutD<0x13, 0x1, "aputd    ", int_mblaze_fsl_aput>;
+def CPUTD    :  FSLPutD<0x13, 0x4, "cputd    ", int_mblaze_fsl_cput>;
+def CAPUTD   :  FSLPutD<0x13, 0x5, "caputd   ", int_mblaze_fsl_caput>;
+def TPUTD    : FSLPutTD<0x13, 0x2, "tputd    ", int_mblaze_fsl_tput>;
+def TAPUTD   : FSLPutTD<0x13, 0x3, "taputd   ", int_mblaze_fsl_taput>;
+def TCPUTD   : FSLPutTD<0x13, 0x6, "tcputd   ", int_mblaze_fsl_tcput>;
+def TCAPUTD  : FSLPutTD<0x13, 0x7, "tcaputd  ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+  def NPUTD    :  FSLPutD<0x13, 0x8, "nputd    ", int_mblaze_fsl_nput>;
+  def NAPUTD   :  FSLPutD<0x13, 0x9, "naputd   ", int_mblaze_fsl_naput>;
+  def NCPUTD   :  FSLPutD<0x13, 0xC, "ncputd   ", int_mblaze_fsl_ncput>;
+  def NCAPUTD  :  FSLPutD<0x13, 0xD, "ncaputd  ", int_mblaze_fsl_ncaput>;
+  def TNPUTD   : FSLPutTD<0x13, 0xA, "tnputd   ", int_mblaze_fsl_tnput>;
+  def TNAPUTD  : FSLPutTD<0x13, 0xB, "tnaputd  ", int_mblaze_fsl_tnaput>;
+  def TNCPUTD  : FSLPutTD<0x13, 0xE, "tncputd  ", int_mblaze_fsl_tncput>;
+  def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 28e8e4402225..d62574d0edee 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -7,6 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+      bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR    : Format<1>;  // ADD, OR, etc.
+def FRRI    : Format<2>;  // ADDI, ORI, etc.
+def FCRR    : Format<3>;  // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI    : Format<4>;  // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR    : Format<5>;  // BRLD, BRALD, GETD
+def FRCI    : Format<6>;  // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR    : Format<7>;  // BR, BRA, BRD, etc.
+def FCCI    : Format<8>;  // IMM, BRI, BRAI, BRID, etc.
+def FRRCI   : Format<9>;  // BSRLI, BSRAI, BSLLI
+def FRRC    : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX    : Format<11>; // GET
+def FRCS    : Format<12>; // MFS
+def FCRCS   : Format<13>; // MTS
+def FCRCX   : Format<14>; // PUT
+def FCX     : Format<15>; // TPUT
+def FCR     : Format<16>; // TPUTD
+def FRIR    : Format<17>; // RSUBI
+def FRRRR   : Format<18>; // RSUB, FRSUB
+def FRI     : Format<19>; // RSUB, FRSUB
+def FC      : Format<20>; // NOP
+
 //===----------------------------------------------------------------------===//
 //  Describe MBlaze instructions format
 //
@@ -21,226 +50,155 @@
 //===----------------------------------------------------------------------===//
 
 // Generic MBlaze Format
-class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern, 
-               InstrItinClass itin> : Instruction 
-{
-  field bits<32> Inst;
-
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+                 list<dag> pattern, InstrItinClass itin> : Instruction {
   let Namespace = "MBlaze";
+  field bits<32> Inst;
 
-  bits<6> opcode;
+  bits<6> opcode = op;
+  Format Form = form;
+  bits<6> FormBits = Form.Value;
 
   // Top 6 bits are the 'opcode' field
-  let Inst{0-5} = opcode;   
-  
+  let Inst{0-5} = opcode;
+
+  // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+  // assembler and disassmbler ignore it.
+  let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
   dag OutOperandList = outs;
   dag InOperandList  = ins;
 
   let AsmString   = asmstr;
   let Pattern     = pattern;
   let Itinerary   = itin;
+
+  // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+  let TSFlags{5-0}   = FormBits;
 }
 
 //===----------------------------------------------------------------------===//
 // Pseudo instruction class
 //===----------------------------------------------------------------------===//
 class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
-      MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>;
+      MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>;
 
 //===----------------------------------------------------------------------===//
 // Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
 //===----------------------------------------------------------------------===//
 
 class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
-         list<dag> pattern, InstrItinClass itin> : 
-         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+         list<dag> pattern, InstrItinClass itin> :
+         MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
 {
   bits<5> rd;
   bits<5> ra;
   bits<5> rb;
 
-  let opcode = op;
-
   let Inst{6-10}  = rd;
-  let Inst{11-15} = ra; 
+  let Inst{11-15} = ra;
   let Inst{16-20} = rb;
   let Inst{21-31} = flags;
 }
 
-class TAI<bits<6> op, dag outs, dag ins, string asmstr,
-          list<dag> pattern, InstrItinClass itin> :
-          MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> rd;
-  bits<5> ra;
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = rd;
-  let Inst{11-15} = ra; 
-  let Inst{16-31} = imm16;
-}
-
-class TIMM<bits<6> op, dag outs, dag ins, string asmstr,
-           list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-15}  = 0;
-  let Inst{16-31} = imm16;
-}
-
-class TADDR<bits<6> op, dag outs, dag ins, string asmstr,
-            list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<26> addr;
-
-  let opcode = op;
-
-  let Inst{6-31} = addr;
-}
-
 //===----------------------------------------------------------------------===//
 // Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
 //===----------------------------------------------------------------------===//
 
 class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin> : 
-         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+         InstrItinClass itin> :
+         MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
 {
   bits<5>  rd;
   bits<5>  ra;
   bits<16> imm16;
 
-  let opcode = op;
-
   let Inst{6-10}  = rd;
-  let Inst{11-15} = ra; 
+  let Inst{11-15} = ra;
   let Inst{16-31} = imm16;
 }
 
 //===----------------------------------------------------------------------===//
-// Float instruction class in MBlaze : <|opcode|rd|ra|flags|>
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
 //===----------------------------------------------------------------------===//
 
-class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
-         list<dag> pattern, InstrItinClass itin> : 
-         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          TA<op, flags, outs, ins, asmstr, pattern, itin>
 {
-  bits<5>  rd;
-  bits<5>  ra;
+  bits<5> rrd;
+  bits<5> rrb;
+  bits<5> rra;
 
-  let opcode = op;
+  let Form = FRRRR;
 
-  let Inst{6-10}  = rd;
-  let Inst{11-15} = ra; 
-  let Inst{16-20} = 0;
-  let Inst{21-31} = flags;
+  let rd = rrd;
+  let ra = rra;
+  let rb = rrb;
 }
 
 //===----------------------------------------------------------------------===//
-// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|>
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
 //===----------------------------------------------------------------------===//
-
-class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
-          string asmstr, list<dag> pattern, InstrItinClass itin> :
-          MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0;
-  let Inst{11-15} = br; 
-  let Inst{16-20} = ra;
-  let Inst{21-31} = flags;
-}
-
-class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
-           string asmstr, list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-  bits<5> rb;
-
-  let opcode = op;
-
-  let Inst{6-10}  = br;
-  let Inst{11-15} = ra; 
-  let Inst{16-20} = rb;
-  let Inst{21-31} = flags;
-}
-
-class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
-           string asmstr, list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0xF;
-  let Inst{11-15} = br; 
-  let Inst{16-20} = ra;
-  let Inst{21-31} = flags;
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> :
+         TB<op, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rrd;
+  bits<16> rimm16;
+  bits<5>  rra;
+
+  let Form = FRIR;
+
+  let rd = rrd;
+  let ra = rra;
+  let imm16 = rimm16;
 }
 
-class TBRI<bits<6> op, bits<5> br, dag outs, dag ins,
-           string asmstr, list<dag> pattern, InstrItinClass itin> :
-           MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<16> imm16;
-
-  let opcode = op;
-
-  let Inst{6-10}  = 0;
-  let Inst{11-15} = br; 
-  let Inst{16-31} = imm16;
-}
-
-class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins,
-            string asmstr, list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<16> imm16;
-
-  let opcode = op;
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<5>  ra;
+  bits<5>  imm5;
 
-  let Inst{6-10}  = 0xF;
-  let Inst{11-15} = br; 
-  let Inst{16-31} = imm16;
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-20} = 0x0;
+  let Inst{21-22} = flags;
+  let Inst{23-26} = 0x0;
+  let Inst{27-31} = imm5;
 }
 
-class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins,
-            string asmstr, list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5> ra;
-  bits<16> imm16;
-
-  let opcode = op;
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<14> imm14;
 
-  let Inst{6-10}  = br;
-  let Inst{11-15} = ra; 
-  let Inst{16-31} = imm16;
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = 0x0;
+  let Inst{16-17} = flags;
+  let Inst{18-31} = imm14;
 }
 
-class TRET<bits<6> op, dag outs, dag ins,
-            string asmstr, list<dag> pattern, InstrItinClass itin> :
-            MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
-  bits<5>  ra;
-  bits<16> imm16;
-
-  let opcode = op;
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<15> imm15;
 
-  let Inst{6-10}  = 0x10;
-  let Inst{11-15} = ra; 
-  let Inst{16-31} = imm16;
+  let Inst{6-10}  = rd;
+  let Inst{11-16} = flags;
+  let Inst{17-31} = imm15;
 }
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b590c090e095..b353dcdef05b 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -38,10 +38,10 @@ static bool isZeroImm(const MachineOperand &op) {
 unsigned MBlazeInstrInfo::
 isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
   if (MI->getOpcode() == MBlaze::LWI) {
-    if ((MI->getOperand(2).isFI()) && // is a stack slot
-        (MI->getOperand(1).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(1)))) {
-      FrameIndex = MI->getOperand(2).getIndex();
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
       return MI->getOperand(0).getReg();
     }
   }
@@ -57,10 +57,10 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
 unsigned MBlazeInstrInfo::
 isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
   if (MI->getOpcode() == MBlaze::SWI) {
-    if ((MI->getOperand(2).isFI()) && // is a stack slot
-        (MI->getOperand(1).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(1)))) {
-      FrameIndex = MI->getOperand(2).getIndex();
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
       return MI->getOperand(0).getReg();
     }
   }
@@ -80,7 +80,7 @@ copyPhysReg(MachineBasicBlock &MBB,
             MachineBasicBlock::iterator I, DebugLoc DL,
             unsigned DestReg, unsigned SrcReg,
             bool KillSrc) const {
-  llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
+  llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
     .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
 }
 
@@ -91,7 +91,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
-    .addImm(0).addFrameIndex(FI);
+    .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
 }
 
 void MBlazeInstrInfo::
@@ -101,21 +101,168 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
-      .addImm(0).addFrameIndex(FI);
+      .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
 }
 
 //===----------------------------------------------------------------------===//
 // Branch Analysis
 //===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock *&TBB,
+                                    MachineBasicBlock *&FBB,
+                                    SmallVectorImpl<MachineOperand> &Cond,
+                                    bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (MBlaze::isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with something like BEQID then BRID, handle it.
+  if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.
+  // The second one is not executed, so remove it.
+  if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
 unsigned MBlazeInstrInfo::
 InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
              MachineBasicBlock *FBB,
              const SmallVectorImpl<MachineOperand> &Cond,
              DebugLoc DL) const {
-  // Can only insert uncond branches so far.
-  assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
-  BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB);
-  return 1;
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "MBlaze branch conditions have two components!");
+
+  unsigned Opc = MBlaze::BRID;
+  if (!Cond.empty())
+    Opc = (unsigned)Cond[0].getImm();
+
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch
+      BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+    else              // Conditional branch
+      BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+
+  BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+  return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+
+  if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+      !MBlaze::isCondBranchOpcode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+  switch (Cond[0].getImm()) {
+  default:            return true;
+  case MBlaze::BEQ:   Cond[0].setImm(MBlaze::BNE); return false;
+  case MBlaze::BNE:   Cond[0].setImm(MBlaze::BEQ); return false;
+  case MBlaze::BGT:   Cond[0].setImm(MBlaze::BLE); return false;
+  case MBlaze::BGE:   Cond[0].setImm(MBlaze::BLT); return false;
+  case MBlaze::BLT:   Cond[0].setImm(MBlaze::BGE); return false;
+  case MBlaze::BLE:   Cond[0].setImm(MBlaze::BGT); return false;
+  case MBlaze::BEQI:  Cond[0].setImm(MBlaze::BNEI); return false;
+  case MBlaze::BNEI:  Cond[0].setImm(MBlaze::BEQI); return false;
+  case MBlaze::BGTI:  Cond[0].setImm(MBlaze::BLEI); return false;
+  case MBlaze::BGEI:  Cond[0].setImm(MBlaze::BLTI); return false;
+  case MBlaze::BLTI:  Cond[0].setImm(MBlaze::BGEI); return false;
+  case MBlaze::BLEI:  Cond[0].setImm(MBlaze::BGTI); return false;
+  case MBlaze::BEQD:  Cond[0].setImm(MBlaze::BNED); return false;
+  case MBlaze::BNED:  Cond[0].setImm(MBlaze::BEQD); return false;
+  case MBlaze::BGTD:  Cond[0].setImm(MBlaze::BLED); return false;
+  case MBlaze::BGED:  Cond[0].setImm(MBlaze::BLTD); return false;
+  case MBlaze::BLTD:  Cond[0].setImm(MBlaze::BGED); return false;
+  case MBlaze::BLED:  Cond[0].setImm(MBlaze::BGTD); return false;
+  case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+  case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+  case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+  case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+  case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+  case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+  }
 }
 
 /// getGlobalBaseReg - Return a virtual register initialized with the
@@ -134,7 +281,7 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
-  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
   BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
           GlobalBaseReg).addReg(MBlaze::R20);
   RegInfo.addLiveIn(MBlaze::R20);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index b3dba0ec768c..b7300c14080d 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -73,59 +73,92 @@ namespace MBlaze {
     FCOND_GT,
 
     // Only integer conditions
-    COND_E,
-    COND_GZ,
-    COND_GEZ,
-    COND_LZ,
-    COND_LEZ,
+    COND_EQ,
+    COND_GT,
+    COND_GE,
+    COND_LT,
+    COND_LE,
     COND_NE,
     COND_INVALID
   };
 
   // Turn condition code into conditional branch opcode.
-  unsigned GetCondBranchFromCond(CondCode CC);
+  inline static unsigned GetCondBranchFromCond(CondCode CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case COND_EQ: return MBlaze::BEQID;
+    case COND_NE: return MBlaze::BNEID;
+    case COND_GT: return MBlaze::BGTID;
+    case COND_GE: return MBlaze::BGEID;
+    case COND_LT: return MBlaze::BLTID;
+    case COND_LE: return MBlaze::BLEID;
+    }
+  }
 
   /// GetOppositeBranchCondition - Return the inverse of the specified cond,
   /// e.g. turning COND_E to COND_NE.
-  CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+  // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
 
   /// MBlazeCCToString - Map each FP condition code to its string
-  inline static const char *MBlazeFCCToString(MBlaze::CondCode CC)
-  {
+  inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
     switch (CC) {
-      default: llvm_unreachable("Unknown condition code");
-      case FCOND_F:
-      case FCOND_T:   return "f";
-      case FCOND_UN:
-      case FCOND_OR:  return "un";
-      case FCOND_EQ:
-      case FCOND_NEQ: return "eq";
-      case FCOND_UEQ:
-      case FCOND_OGL: return "ueq";
-      case FCOND_OLT:
-      case FCOND_UGE: return "olt";
-      case FCOND_ULT:
-      case FCOND_OGE: return "ult";
-      case FCOND_OLE:
-      case FCOND_UGT: return "ole";
-      case FCOND_ULE:
-      case FCOND_OGT: return "ule";
-      case FCOND_SF:
-      case FCOND_ST:  return "sf";
-      case FCOND_NGLE:
-      case FCOND_GLE: return "ngle";
-      case FCOND_SEQ:
-      case FCOND_SNE: return "seq";
-      case FCOND_NGL:
-      case FCOND_GL:  return "ngl";
-      case FCOND_LT:
-      case FCOND_NLT: return "lt";
-      case FCOND_NGE:
-      case FCOND_GE:  return "ge";
-      case FCOND_LE:
-      case FCOND_NLE: return "nle";
-      case FCOND_NGT:
-      case FCOND_GT:  return "gt";
+    default: llvm_unreachable("Unknown condition code");
+    case FCOND_F:
+    case FCOND_T:   return "f";
+    case FCOND_UN:
+    case FCOND_OR:  return "un";
+    case FCOND_EQ:
+    case FCOND_NEQ: return "eq";
+    case FCOND_UEQ:
+    case FCOND_OGL: return "ueq";
+    case FCOND_OLT:
+    case FCOND_UGE: return "olt";
+    case FCOND_ULT:
+    case FCOND_OGE: return "ult";
+    case FCOND_OLE:
+    case FCOND_UGT: return "ole";
+    case FCOND_ULE:
+    case FCOND_OGT: return "ule";
+    case FCOND_SF:
+    case FCOND_ST:  return "sf";
+    case FCOND_NGLE:
+    case FCOND_GLE: return "ngle";
+    case FCOND_SEQ:
+    case FCOND_SNE: return "seq";
+    case FCOND_NGL:
+    case FCOND_GL:  return "ngl";
+    case FCOND_LT:
+    case FCOND_NLT: return "lt";
+    case FCOND_NGE:
+    case FCOND_GE:  return "ge";
+    case FCOND_LE:
+    case FCOND_NLE: return "nle";
+    case FCOND_NGT:
+    case FCOND_GT:  return "gt";
+    }
+  }
+
+  inline static bool isUncondBranchOpcode(int Opc) {
+    switch (Opc) {
+    default: return false;
+    case MBlaze::BRI:
+    case MBlaze::BRAI:
+    case MBlaze::BRID:
+    case MBlaze::BRAID:
+      return true;
+    }
+  }
+
+  inline static bool isCondBranchOpcode(int Opc) {
+    switch (Opc) {
+    default: return false;
+    case MBlaze::BEQI: case MBlaze::BEQID:
+    case MBlaze::BNEI: case MBlaze::BNEID:
+    case MBlaze::BGTI: case MBlaze::BGTID:
+    case MBlaze::BGEI: case MBlaze::BGEID:
+    case MBlaze::BLTI: case MBlaze::BLTID:
+    case MBlaze::BLEI: case MBlaze::BLEID:
+      return true;
     }
   }
 }
@@ -134,29 +167,54 @@ namespace MBlaze {
 /// instruction info tracks.
 ///
 namespace MBlazeII {
-  /// Target Operand Flag enum.
-  enum TOF {
+  enum {
+    // PseudoFrm - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    FPseudo = 0,
+    FRRR,
+    FRRI,
+    FCRR,
+    FCRI,
+    FRCR,
+    FRCI,
+    FCCR,
+    FCCI,
+    FRRCI,
+    FRRC,
+    FRCX,
+    FRCS,
+    FCRCS,
+    FCRCX,
+    FCX,
+    FCR,
+    FRIR,
+    FRRRR,
+    FRI,
+    FC,
+    FormMask = 63
+
     //===------------------------------------------------------------------===//
     // MBlaze Specific MachineOperand flags.
-    MO_NO_FLAG,
+    // MO_NO_FLAG,
 
     /// MO_GOT - Represents the offset into the global offset table at which
     /// the address the relocation entry symbol resides during execution.
-    MO_GOT,
+    // MO_GOT,
 
     /// MO_GOT_CALL - Represents the offset into the global offset table at
     /// which the address of a call site relocation entry symbol resides
     /// during execution. This is different from the above since this flag
     /// can only be present in call instructions.
-    MO_GOT_CALL,
+    // MO_GOT_CALL,
 
     /// MO_GPREL - Represents the offset from the current gp value to be used
     /// for the relocatable object file being produced.
-    MO_GPREL,
+    // MO_GPREL,
 
     /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
     /// address.
-    MO_ABS_HILO
+    // MO_ABS_HILO
 
   };
 }
@@ -190,10 +248,20 @@ public:
                                       int &FrameIndex) const;
 
   /// Branch Analysis
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+    const;
+
+
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index e5d153474a7e..7b8f70a30434 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -13,35 +13,36 @@
 include "MBlazeInstrFormats.td"
 
 //===----------------------------------------------------------------------===//
-// MBlaze profiles and nodes
+// MBlaze type profiles
 //===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
 def SDT_MBlazeRet     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_MBlazeIRet    : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 
-// Call
-def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
-                               [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
 
-// Return
-def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+def MBlazeRet     : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+def MBlazeIRet    : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
 
-// Hi and Lo nodes are used to handle global addresses. Used on 
-// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol 
-// static model.
-def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
-def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>;
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+                           [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+                            SDNPVariadic]>;
 
-def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
-def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
 
-// These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
-def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 
-def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 //===----------------------------------------------------------------------===//
 // MBlaze Instruction Predicate Definitions.
@@ -67,11 +68,22 @@ def HasMMU       : Predicate<"Subtarget.hasMMU()">;
 // MBlaze Operand, Complex Patterns and Transformations Definitions.
 //===----------------------------------------------------------------------===//
 
+def MBlazeMemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+  let Name = "Fsl";
+  let SuperClasses = [];
+}
+
 // Instruction operand types
 def brtarget    : Operand<OtherVT>;
 def calltarget  : Operand<i32>;
 def simm16      : Operand<i32>;
 def uimm5       : Operand<i32>;
+def uimm15      : Operand<i32>;
 def fimm        : Operand<f32>;
 
 // Unsigned Operand
@@ -82,31 +94,23 @@ def uimm16      : Operand<i32> {
 // FSL Operand
 def fslimm      : Operand<i32> {
   let PrintMethod = "printFSLImm";
+  let ParserMatchClass = MBlazeFslAsmOperand;
 }
 
 // Address operand
 def memri : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops simm16, CPURegs);
+  let MIOperandInfo = (ops GPR, simm16);
+  let ParserMatchClass = MBlazeMemAsmOperand;
 }
 
 def memrr : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops CPURegs, CPURegs);
+  let MIOperandInfo = (ops GPR, GPR);
+  let ParserMatchClass = MBlazeMemAsmOperand;
 }
 
-// Transformation Function - get the lower 16 bits.
-def LO16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
-}]>;
-
-// Transformation Function - get the higher 16 bits.
-def HI16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
-}]>;
-
 // Node immediate fits as 16-bit sign extended on target immediate.
-// e.g. addi, andi
 def immSExt16  : PatLeaf<(imm), [{
   return (N->getZExtValue() >> 16) == 0;
 }]>;
@@ -117,19 +121,19 @@ def immSExt16  : PatLeaf<(imm), [{
 // e.g. addiu, sltiu
 def immZExt16  : PatLeaf<(imm), [{
   return (N->getZExtValue() >> 16) == 0;
-}], LO16>;
+}]>;
 
 // FSL immediate field must fit in 4 bits.
 def immZExt4 : PatLeaf<(imm), [{
-      return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+  return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
 }]>;
 
 // shamt field must fit in 5 bits.
 def immZExt5 : PatLeaf<(imm), [{
-      return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
 }]>;
 
-// MBlaze Address Mode! SDNode frameindex could possibily be a match
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
 // since load and store instructions from stack used it.
 def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
 def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
@@ -141,28 +145,14 @@ def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
 // As stack alignment is always done with addiu, we need a 16-bit immediate
 let Defs = [R1], Uses = [R1] in {
 def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
-                                  "${:comment} ADJCALLSTACKDOWN $amt",
+                                  "#ADJCALLSTACKDOWN $amt",
                                   [(callseq_start timm:$amt)]>;
 def ADJCALLSTACKUP   : MBlazePseudo<(outs),
                                   (ins uimm16:$amt1, simm16:$amt2),
-                                  "${:comment} ADJCALLSTACKUP $amt1",
+                                  "#ADJCALLSTACKUP $amt1",
                                   [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-// Some assembly macros need to avoid pseudoinstructions and assembler
-// automatic reodering, we should reorder ourselves.
-def MACRO     : MBlazePseudo<(outs), (ins), ".set macro",     []>;
-def REORDER   : MBlazePseudo<(outs), (ins), ".set reorder",   []>;
-def NOMACRO   : MBlazePseudo<(outs), (ins), ".set nomacro",   []>;
-def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>;
-
-// When handling PIC code the assembler needs .cpload and .cprestore
-// directives. If the real instructions corresponding these directives
-// are used, we have the same behavior, but get also a bunch of warnings
-// from the assembler.
-def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>;
-def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
-
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
@@ -172,47 +162,58 @@ def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
 //===----------------------------------------------------------------------===//
 class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
-               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+               TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                  !strconcat(instr_asm, "   $dst, $b, $c"),
+                  [], IIAlu>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
                  !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+                 [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
 
 class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
-               !strconcat(instr_asm, "   $dst, $c, $b"),
-               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+            TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $c, $b"),
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
 
 class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c),
+             TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
                  !strconcat(instr_asm, "   $dst, $c, $b"),
-                 [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>;
+                 [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>;
 
 class ArithN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
                [], itin>;
 
 class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [], IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
 
 class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
             InstrItinClass itin> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
-               !strconcat(instr_asm, "   $dst, $b, $c"),
-               [], itin>;
+            TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], itin>;
 
 class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
-             TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b),
+             TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
                  !strconcat(instr_asm, "   $dst, $b, $c"),
                  [], IIAlu>;
 
@@ -221,135 +222,179 @@ class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
 //===----------------------------------------------------------------------===//
 
 class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
-            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
                !strconcat(instr_asm, "   $dst, $b, $c"),
-               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>;
 
 class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
-             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
-                 !strconcat(instr_asm, "   $dst, $b, $c"),
-                 [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))],
-                 IIAlu>;
-
-class EffectiveAddress<string instr_asm> :
-          TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr),
-              instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>;
+             TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+                IIAlu>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+               TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                  !strconcat(instr_asm, "   $dst, $b, $c"),
+                  [], IIAlu>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIAlu>;
 
 //===----------------------------------------------------------------------===//
 // Memory Access Instructions
 //===----------------------------------------------------------------------===//
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> :
-            TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr),
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+            TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
                !strconcat(instr_asm, "   $dst, $addr"),
-               [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>;
+               [], IILoad>;
 
 class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-             TAI<op, (outs CPURegs:$dst), (ins memri:$addr),
-                 !strconcat(instr_asm, "   $dst, $addr"),
-                 [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>;
+             TB<op, (outs GPR:$dst), (ins memri:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
 
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> :
-             TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr),
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
                 !strconcat(instr_asm, "   $dst, $addr"),
-                [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>;
+                [], IIStore>;
 
 class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
-              TAI<op, (outs), (ins CPURegs:$dst, memri:$addr),
-                  !strconcat(instr_asm, "   $dst, $addr"),
-                  [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>;
+              TB<op, (outs), (ins GPR:$dst, memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>;
 
 //===----------------------------------------------------------------------===//
 // Branch Instructions
 //===----------------------------------------------------------------------===//
 class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
-             TBR<op, br, flags, (outs), (ins CPURegs:$target),
-                 !strconcat(instr_asm, "   $target"),
-                 [(brind CPURegs:$target)], IIBranch>;
+             TA<op, flags, (outs), (ins GPR:$target),
+                !strconcat(instr_asm, "   $target"),
+                [], IIBranch> {
+  let rd = 0x0;
+  let ra = br;
+  let Form = FCCR;
+}
 
-class BranchI<bits<6> op, bits<5> brf, string instr_asm> :
-              TBRI<op, brf, (outs), (ins brtarget:$target),
-                   !strconcat(instr_asm, "   $target"),
-                   [(br bb:$target)], IIBranch>;
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+              TB<op, (outs), (ins brtarget:$target),
+                 !strconcat(instr_asm, "   $target"),
+                 [], IIBranch> {
+  let rd = 0;
+  let ra = br;
+  let Form = FCCI;
+}
 
 //===----------------------------------------------------------------------===//
 // Branch and Link Instructions
 //===----------------------------------------------------------------------===//
 class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
-              TBRL<op, br, flags, (outs), (ins CPURegs:$target),
-                   !strconcat(instr_asm, "   r15, $target"),
-                   [], IIBranch>;
+              TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+                 !strconcat(instr_asm, "   $link, $target"),
+                 [], IIBranch> {
+  let ra = br;
+  let Form = FRCR;
+}
 
 class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
-               TBRLI<op, br, (outs), (ins calltarget:$target),
-                     !strconcat(instr_asm, "   r15, $target"),
-                     [], IIBranch>;
+               TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+                  !strconcat(instr_asm, "   $link, $target"),
+                  [], IIBranch> {
+  let ra = br;
+  let Form = FRCI;
+}
 
 //===----------------------------------------------------------------------===//
 // Conditional Branch Instructions
 //===----------------------------------------------------------------------===//
-class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm,
-              PatFrag cond_op> :
-              TBRC<op, br, flags, (outs),
-                   (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
-                   !strconcat(instr_asm, "   $a, $b, $offset"),
-                   [], IIBranch>; 
-                   //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
-                   //IIBranch>;
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+              TA<op, flags, (outs),
+                 (ins GPR:$a, GPR:$b),
+                 !strconcat(instr_asm, "   $a, $b"),
+                 [], IIBranch> {
+  let rd = br;
+  let Form = FCRR;
+}
 
-class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> :
-               TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset),
-                     !strconcat(instr_asm, "   $a, $offset"),
-                     [], IIBranch>;
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+               TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+                  !strconcat(instr_asm, "   $a, $offset"),
+                  [], IIBranch> {
+  let rd = br;
+  let Form = FCRI;
+}
 
 //===----------------------------------------------------------------------===//
 // MBlaze arithmetic instructions
 //===----------------------------------------------------------------------===//
 
 let isCommutable = 1, isAsCheapAsAMove = 1 in {
-    def ADD    :  Arith<0x00, 0x000, "add    ", add,  IIAlu>;
-    def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
-    def ADDK   :  Arith<0x04, 0x000, "addk   ", addc, IIAlu>;
+  def ADDK   :  Arith<0x04, 0x000, "addk   ", add,  IIAlu>;
+  def AND    :  Logic<0x21, 0x000, "and    ", and>;
+  def OR     :  Logic<0x20, 0x000, "or     ", or>;
+  def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
+  def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+  def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+  def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+
+  let Defs = [CARRY] in {
+    def ADD    :  Arith<0x00, 0x000, "add    ", addc, IIAlu>;
+
+    let Uses = [CARRY] in {
+      def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
+    }
+  }
+
+  let Uses = [CARRY] in {
     def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIAlu>;
-    def AND    :  Logic<0x21, 0x000, "and    ", and>;
-    def OR     :  Logic<0x20, 0x000, "or     ", or>;
-    def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
+  }
 }
 
 let isAsCheapAsAMove = 1 in {
-    def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
-    def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
-    def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
-    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", sub,  IIAlu>;
-    def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
-    def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", subc, IIAlu>;
+  def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
+  def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
+  def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
+  def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", sub,  IIAlu>;
+
+  let Defs = [CARRY] in {
+    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", subc, IIAlu>;
+
+    let Uses = [CARRY] in {
+      def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
+    }
+  }
+
+  let Uses = [CARRY] in {
     def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+  }
 }
 
 let isCommutable = 1, Predicates=[HasMul] in {
-    def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
+  def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
 }
 
 let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
-    def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
-    def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
+  def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
+  def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
 }
 
 let Predicates=[HasMul,HasMul64] in {
-    def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+  def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
 }
 
 let Predicates=[HasBarrel] in {
-    def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
-    def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
-    def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
-    def BSRLI  :  ArithI<0x11, "bsrli  ", srl, uimm5, immZExt5>;
-    def BSRAI  :  ArithI<0x11, "bsrai  ", sra, uimm5, immZExt5>;
-    def BSLLI  :  ArithI<0x11, "bslli  ", shl, uimm5, immZExt5>;
+  def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
+  def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
+  def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
+  def BSRLI  :  ShiftI<0x19, 0x0, "bsrli  ", srl, uimm5, immZExt5>;
+  def BSRAI  :  ShiftI<0x19, 0x1, "bsrai  ", sra, uimm5, immZExt5>;
+  def BSLLI  :  ShiftI<0x19, 0x2, "bslli  ", shl, uimm5, immZExt5>;
 }
 
 let Predicates=[HasDiv] in {
-    def IDIV   :  Arith<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
-    def IDIVU  :  Arith<0x12, 0x002, "idivu  ", udiv, IIAlu>;
+  def IDIV   :  ArithR<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
+  def IDIVU  :  ArithR<0x12, 0x002, "idivu  ", udiv, IIAlu>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -357,22 +402,31 @@ let Predicates=[HasDiv] in {
 //===----------------------------------------------------------------------===//
 
 let isAsCheapAsAMove = 1 in {
-    def ADDI    :   ArithI<0x08, "addi   ", add,  simm16, immSExt16>;
-    def ADDIC   :  ArithNI<0x0A, "addic  ", simm16, immSExt16>;
-    def ADDIK   :  ArithNI<0x0C, "addik  ", simm16, immSExt16>;
-    def ADDIKC  :   ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
-    def RSUBI   :   ArithRI<0x09, "rsubi  ", sub,  simm16, immSExt16>;
-    def RSUBIC  :  ArithRNI<0x0B, "rsubi  ", simm16, immSExt16>;
-    def RSUBIK  :  ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
-    def RSUBIKC :   ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
-    def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
-    def ANDI    :   LogicI<0x29, "andi   ", and>;
-    def ORI     :   LogicI<0x28, "ori    ", or>;
-    def XORI    :   LogicI<0x2A, "xori   ", xor>;
+  def ADDIK   :   ArithI<0x0C, "addik  ", add,  simm16, immSExt16>;
+  def RSUBIK  :  ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+  def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
+  def ANDI    :   LogicI<0x29, "andi   ", and>;
+  def ORI     :   LogicI<0x28, "ori    ", or>;
+  def XORI    :   LogicI<0x2A, "xori   ", xor>;
+
+  let Defs = [CARRY] in {
+    def ADDI    :   ArithI<0x08, "addi   ", addc, simm16, immSExt16>;
+    def RSUBI   :  ArithRI<0x09, "rsubi  ", subc,  simm16, immSExt16>;
+
+    let Uses = [CARRY] in {
+      def ADDIC   :   ArithI<0x0A, "addic  ", adde, simm16, immSExt16>;
+      def RSUBIC  :  ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def ADDIKC  :  ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+    def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+  }
 }
 
 let Predicates=[HasMul] in {
-    def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
+  def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -380,290 +434,445 @@ let Predicates=[HasMul] in {
 //===----------------------------------------------------------------------===//
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-    def LBU  :  LoadM<0x30, "lbu    ", zextloadi8>;
-    def LHU  :  LoadM<0x31, "lhu    ", zextloadi16>;
-    def LW   :  LoadM<0x32, "lw     ", load>;
+  def LBU  :  LoadM<0x30, 0x000, "lbu    ">;
+  def LBUR :  LoadM<0x30, 0x200, "lbur   ">;
+
+  def LHU  :  LoadM<0x31, 0x000, "lhu    ">;
+  def LHUR :  LoadM<0x31, 0x200, "lhur   ">;
+
+  def LW   :  LoadM<0x32, 0x000, "lw     ">;
+  def LWR  :  LoadM<0x32, 0x200, "lwr    ">;
 
-    def LBUI : LoadMI<0x30, "lbui   ", zextloadi8>;
-    def LHUI : LoadMI<0x31, "lhui   ", zextloadi16>;
-    def LWI  : LoadMI<0x32, "lwi    ", load>;
+  let Defs = [CARRY] in {
+    def LWX  :  LoadM<0x32, 0x400, "lwx    ">;
+  }
+
+  def LBUI : LoadMI<0x38, "lbui   ", zextloadi8>;
+  def LHUI : LoadMI<0x39, "lhui   ", zextloadi16>;
+  def LWI  : LoadMI<0x3A, "lwi    ", load>;
 }
 
-    def SB  :  StoreM<0x34, "sb     ", truncstorei8>;
-    def SH  :  StoreM<0x35, "sh     ", truncstorei16>;
-    def SW  :  StoreM<0x36, "sw     ", store>;
+def SB  :  StoreM<0x34, 0x000, "sb     ">;
+def SBR :  StoreM<0x34, 0x200, "sbr    ">;
+
+def SH  :  StoreM<0x35, 0x000, "sh     ">;
+def SHR :  StoreM<0x35, 0x200, "shr    ">;
+
+def SW  :  StoreM<0x36, 0x000, "sw     ">;
+def SWR :  StoreM<0x36, 0x200, "swr    ">;
 
-    def SBI : StoreMI<0x34, "sbi    ", truncstorei8>;
-    def SHI : StoreMI<0x35, "shi    ", truncstorei16>;
-    def SWI : StoreMI<0x36, "swi    ", store>;
+let Defs = [CARRY] in {
+  def SWX :  StoreM<0x36, 0x400, "swx    ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi    ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi    ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi    ", store>;
 
 //===----------------------------------------------------------------------===//
 // MBlaze branch instructions
 //===----------------------------------------------------------------------===//
 
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
+  def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
+}
+
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-    def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
-    def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
-    def BEQI   : BranchCI<0x2F, 0x00, "beqi   ", seteq>;
-    def BNEI   : BranchCI<0x2F, 0x01, "bnei   ", setne>;
-    def BLTI   : BranchCI<0x2F, 0x02, "blti   ", setlt>;
-    def BLEI   : BranchCI<0x2F, 0x03, "blei   ", setle>;
-    def BGTI   : BranchCI<0x2F, 0x04, "bgti   ", setgt>;
-    def BGEI   : BranchCI<0x2F, 0x05, "bgei   ", setge>;
+  def BEQI   : BranchCI<0x2F, 0x00, "beqi   ">;
+  def BNEI   : BranchCI<0x2F, 0x01, "bnei   ">;
+  def BLTI   : BranchCI<0x2F, 0x02, "blti   ">;
+  def BLEI   : BranchCI<0x2F, 0x03, "blei   ">;
+  def BGTI   : BranchCI<0x2F, 0x04, "bgti   ">;
+  def BGEI   : BranchCI<0x2F, 0x05, "bgei   ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+    isBarrier = 1 in {
+  def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
+  def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
 }
 
 let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-    def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
-    def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
-    def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ", seteq>;
-    def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ", setne>;
-    def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ", setlt>;
-    def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ", setle>;
-    def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ", setgt>;
-    def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ", setge>;
+  def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ">;
+  def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ">;
+  def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ">;
+  def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ">;
+  def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ">;
+  def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+    isBarrier = 1 in {
+  def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
+  def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
 }
 
 let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
-    def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
-    def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
-    def BEQID  : BranchCI<0x2F, 0x10, "beqid  ", seteq>;
-    def BNEID  : BranchCI<0x2F, 0x11, "bneid  ", setne>;
-    def BLTID  : BranchCI<0x2F, 0x12, "bltid  ", setlt>;
-    def BLEID  : BranchCI<0x2F, 0x13, "bleid  ", setle>;
-    def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ", setgt>;
-    def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ", setge>;
+  def BEQID  : BranchCI<0x2F, 0x10, "beqid  ">;
+  def BNEID  : BranchCI<0x2F, 0x11, "bneid  ">;
+  def BLTID  : BranchCI<0x2F, 0x12, "bltid  ">;
+  def BLEID  : BranchCI<0x2F, 0x13, "bleid  ">;
+  def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ">;
+  def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+    hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
+  def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
 }
 
 let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
     hasDelaySlot = 1, hasCtrlDep = 1 in {
-    def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
-    def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
-    def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ", seteq>;
-    def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ", setne>;
-    def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ", setlt>;
-    def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ", setle>;
-    def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ", setgt>;
-    def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ", setge>;
+  def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ">;
+  def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ">;
+  def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ">;
+  def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ">;
+  def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ">;
+  def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+    Uses = [R1] in {
+  def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
+  def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+    Uses = [R1] in {
+  def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
+  def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
 }
 
-let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1,
-    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
-    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
-    def BRL    : BranchL<0x26, 0x04, 0x000, "brl    ">;
-    def BRAL   : BranchL<0x26, 0x0C, 0x000, "bral   ">;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x10, Form=FCRI in {
+  def RTSD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtsd      $target, $imm",
+                  [],
+                  IIBranch>;
 }
 
-let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1,
-    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
-    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
-    def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
-    def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x11, Form=FCRI in {
+  def RTID   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtid      $target, $imm",
+                  [],
+                  IIBranch>;
 }
 
-let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1,
-    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
-    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
-    def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
-    def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x12, Form=FCRI in {
+  def RTBD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtbd      $target, $imm",
+                  [],
+                  IIBranch>;
 }
 
-let isReturn=1, isTerminator=1, hasDelaySlot=1,
-    isBarrier=1, hasCtrlDep=1, imm16=0x8 in {
-    def RTSD   : TRET<0x2D, (outs), (ins CPURegs:$target),
-                      "rtsd      $target, 8",
-                      [(MBlazeRet CPURegs:$target)],
-                      IIBranch>;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x14, Form=FCRI in {
+  def RTED   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rted      $target, $imm",
+                  [],
+                  IIBranch>;
 }
 
 //===----------------------------------------------------------------------===//
 // MBlaze misc instructions
 //===----------------------------------------------------------------------===//
 
-let addr = 0 in {
-    def NOP :  TADDR<0x00, (outs), (ins), "nop    ", [], IIAlu>;
+let neverHasSideEffects = 1 in {
+  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIAlu>;
 }
 
 let usesCustomInserter = 1 in {
-  //class PseudoSelCC<RegisterClass RC, string asmstr>:
-  //  MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr,
-  //  [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>;
-  //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">;
-
-  def Select_CC : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC),
+  def Select_CC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
     "; SELECT_CC PSEUDO!",
     []>;
 
-  def ShiftL : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$L, CPURegs:$R),
+  def ShiftL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
     "; ShiftL PSEUDO!",
     []>;
 
-  def ShiftRA : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$L, CPURegs:$R),
+  def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
     "; ShiftRA PSEUDO!",
     []>;
 
-  def ShiftRL : MBlazePseudo<(outs CPURegs:$dst),
-    (ins CPURegs:$L, CPURegs:$R),
+  def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
     "; ShiftRL PSEUDO!",
     []>;
 }
 
-
 let rb = 0 in {
-    def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "sext16  $dst, $src", [], IIAlu>;
-    def SEXT8  : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "sext8   $dst, $src", [], IIAlu>;
-    def SRL    : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "srl     $dst, $src", [], IIAlu>;
-    def SRA    : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "sra     $dst, $src", [], IIAlu>;
-    def SRC    : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src),
-                    "src     $dst, $src", [], IIAlu>;
+  def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+                  "sext16    $dst, $src", [], IIAlu>;
+  def SEXT8  : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+                  "sext8     $dst, $src", [], IIAlu>;
+  let Defs = [CARRY] in {
+    def SRL    : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+                    "srl       $dst, $src", [], IIAlu>;
+    def SRA    : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+                    "sra       $dst, $src", [], IIAlu>;
+    let Uses = [CARRY] in {
+      def SRC    : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+                      "src       $dst, $src", [], IIAlu>;
+    }
+  }
+}
+
+let isCodeGenOnly=1 in {
+  def ADDIK32 : ArithI32<0x08, "addik  ", simm16, immSExt16>;
+  def ORI32   : LogicI32<0x28, "ori    ">;
+  def BRLID32 : BranchLI<0x2E, 0x14, "brlid  ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+  def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+                "mfs       $dst, $src", [], IIAlu>;
+}
+
+let Form=FCRCS in {
+  def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+                "mts       $dst, $src", [], IIAlu>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+                 "msrset    $dst, $set", [], IIAlu>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+                 "msrclr    $dst, $clr", [], IIAlu>;
+
+let rd=0x0, Form=FCRR in {
+  def WDC  : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+                "wdc       $a, $b", [], IIAlu>;
+  def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+                "wdc.flush $a, $b", [], IIAlu>;
+  def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+                "wdc.clear $a, $b", [], IIAlu>;
+  def WIC  : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+                "wic       $a, $b", [], IIAlu>;
 }
 
-def LEA_ADDI : EffectiveAddress<"addi    $dst, ${addr:stackloc}">;
+def BRK  :  BranchL<0x26, 0x0C, 0x000, "brk    ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki   ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+                     "imm       $imm", [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+  def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+    "# atomic compare and swap",
+    [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+  def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+    "# atomic swap",
+    [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+  def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and add",
+    [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and sub",
+    [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and and",
+    [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and or",
+    [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and xor",
+    [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and nand",
+    [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+  def MEMBARRIER : MBlazePseudo<(outs), (ins),
+    "# memory barrier",
+    [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
 // Small immediates
-def : Pat<(i32 0), (ADD R0, R0)>;
-def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>;
-def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>;
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
 
 // Arbitrary immediates
-def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>;
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
 
 // In register sign extension
-def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>;
-def : Pat<(sext_inreg CPURegs:$src, i8),  (SEXT8 CPURegs:$src)>;
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8),  (SEXT8 GPR:$src)>;
 
 // Call
-def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>;
-def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>;
-def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>;
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+          (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+          (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+          (BRALD (i32 R15), GPR:$dst)>;
 
 // Shift Instructions
-def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>;
-def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>;
-def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>;
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
 
 // SET_CC operations
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 1)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 2)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMP CPURegs:$L, CPURegs:$R), 6)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE),
-          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
-                     (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 6)>;
 
 // SELECT operations
-def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F),
-          (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>;
-
-// SELECT_CC 
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE),
-          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+          (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
 
 // BRCOND instructions
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T),
-          (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T),
-          (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T),
-          (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T),
-          (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T),
-          (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T),
-          (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T),
-          (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T),
-          (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T),
-          (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T),
-          (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond CPURegs:$C, bb:$T),
-          (BNEID CPURegs:$C, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+          (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+          (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+          (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+          (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+          (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+          (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+          (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+          (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+          (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+          (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+          (BNEID GPR:$C, bb:$T)>;
 
 // Jump tables, global addresses, and constant pools
-def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>;
-def : Pat<(MBWrapper tjumptable:$in),  (ORI R0, tjumptable:$in)>;
-def : Pat<(MBWrapper tconstpool:$in),  (ORI R0, tconstpool:$in)>;
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in),  (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in),  (ORI (i32 R0), tconstpool:$in)>;
 
 // Misc instructions
-def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>;
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
 
 // Arithmetic with immediates
-def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>;
-def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>;
-def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>;
-
-// extended load and stores
-def : Pat<(extloadi1  iaddr:$src), (LBUI iaddr:$src)>;
-def : Pat<(extloadi8  iaddr:$src), (LBUI iaddr:$src)>;
-def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>;
-def : Pat<(extloadi1  xaddr:$src), (LBU  xaddr:$src)>;
-def : Pat<(extloadi8  xaddr:$src), (LBU  xaddr:$src)>;
-def : Pat<(extloadi16 xaddr:$src), (LHU  xaddr:$src)>;
-
-def : Pat<(sextloadi1  iaddr:$src), (SEXT8  (LBUI iaddr:$src))>;
-def : Pat<(sextloadi8  iaddr:$src), (SEXT8  (LBUI iaddr:$src))>;
-def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>;
-def : Pat<(sextloadi1  xaddr:$src), (SEXT8  (LBU xaddr:$src))>;
-def : Pat<(sextloadi8  xaddr:$src), (SEXT8  (LBU xaddr:$src))>;
-def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>;
-
-// peepholes
-def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>;
+def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>;
+def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>;
+def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8  iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8  xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Support
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 4931860912a1..7e4a2f5c945e 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -48,7 +48,7 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
   assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
   if (IntrID < Intrinsic::num_intrinsics)
     return 0;
-  assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics && 
+  assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
          "Invalid intrinsic ID");
 
   std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
@@ -94,12 +94,12 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) {
   const Type *ResultTy = NULL;
   std::vector<const Type*> ArgTys;
   bool IsVarArg = false;
-  
+
 #define GET_INTRINSIC_GENERATOR
 #include "MBlazeGenIntrinsics.inc"
 #undef GET_INTRINSIC_GENERATOR
 
-  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg);
 }
 
 Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index a27cb5ba0dc4..278afbefc165 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -1,10 +1,10 @@
 //===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines all of the MicroBlaze-specific intrinsics.
@@ -16,7 +16,7 @@
 //
 
 // MBlaze intrinsic classes.
-let TargetPrefix = "mblaze", isTarget = 1 in { 
+let TargetPrefix = "mblaze", isTarget = 1 in {
   class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
 
   class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
index 4abeb2ed5d6b..1467141d34ae 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -14,14 +14,9 @@
 #include "MBlazeMCAsmInfo.h"
 using namespace llvm;
 
-MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) {
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+  SupportsDebugInformation    = true;
   AlignmentIsInBytes          = false;
-  Data16bitsDirective         = "\t.half\t";
-  Data32bitsDirective         = "\t.word\t";
-  Data64bitsDirective         = 0;
   PrivateGlobalPrefix         = "$";
-  CommentString               = "#";
-  ZeroDirective               = "\t.space\t";
   GPRel32Directive            = "\t.gpword\t";
-  HasSetDirective             = false;
 }
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
index 9d6ff3a11e78..e68dd58b016b 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -19,10 +19,10 @@
 
 namespace llvm {
   class Target;
-  
+
   class MBlazeMCAsmInfo : public MCAsmInfo {
   public:
-    explicit MBlazeMCAsmInfo(const Target &T, StringRef TT);
+    explicit MBlazeMCAsmInfo();
   };
 
 } // namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 000000000000..3ece1a8a340d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,223 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+  MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  MCContext &Ctx;
+
+public:
+  MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+  }
+
+  ~MBlazeMCCodeEmitter() {}
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+  unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+    return getMachineOpValue(MI, MI.getOperand(OpIdx));
+  }
+
+  static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+    // FIXME: getMBlazeRegisterNumbering() is sufficient?
+    assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
+    return 0;
+  }
+
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    // The MicroBlaze uses a bit reversed format so we need to reverse the
+    // order of the bits. Taken from:
+    // http://graphics.stanford.edu/~seander/bithacks.html
+    C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const {
+    assert(Size <= 8 && "size too big in emit constant");
+
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, CurByte, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+  void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+  void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &,
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new MBlazeMCCodeEmitter(TM, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                             const MCOperand &MO) const {
+  if (MO.isReg())
+    return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+  else if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isExpr())
+      return 0; // The relocation has already been recorded at this point.
+  else {
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable(0);
+  }
+  return 0;
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+  int32_t val = (int32_t)imm.getImm();
+  if (val > 32767 || val < -32768) {
+    EmitByte(0x0D, CurByte, OS);
+    EmitByte(0x00, CurByte, OS);
+    EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+    EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+  }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+  switch (MI.getOpcode()) {
+  default: break;
+
+  case MBlaze::ADDIK32:
+  case MBlaze::ORI32:
+  case MBlaze::BRLID32:
+    EmitByte(0x0D, CurByte, OS);
+    EmitByte(0x00, CurByte, OS);
+    EmitRawByte(0, CurByte, OS);
+    EmitRawByte(0, CurByte, OS);
+  }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+              raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+  MCOperand oper = MI.getOperand(opNo);
+
+  if (oper.isImm()) {
+    EmitIMM(oper, CurByte, OS);
+  } else if (oper.isExpr()) {
+    MCFixupKind FixupKind;
+    switch (MI.getOpcode()) {
+    default:
+      FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+      Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+      break;
+    case MBlaze::ORI32:
+    case MBlaze::ADDIK32:
+    case MBlaze::BRLID32:
+      FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+      Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+      break;
+    }
+  }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+
+  // Emit an IMM instruction if the instruction we are encoding requires it
+  EmitIMM(MI,CurByte,OS);
+
+  switch ((TSFlags & MBlazeII::FormMask)) {
+  default: break;
+  case MBlazeII::FPseudo:
+    // Pseudo instructions don't get encoded.
+    return;
+  case MBlazeII::FRRI:
+    EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FRIR:
+    EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FCRI:
+    EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FRCI:
+    EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+  case MBlazeII::FCCI:
+    EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+    break;
+  }
+
+  ++MCNumEmitted;  // Keep track of the # of mi's emitted
+  unsigned Value = getBinaryCodeForInstr(MI);
+  EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 000000000000..a7e400b1d1a4
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default:
+      llvm_unreachable("Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default:
+      assert(0 && "Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  switch (MO.getTargetFlags()) {
+  default:
+      llvm_unreachable("Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default: llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                         MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_FPImmediate:
+      bool ignored;
+      APFloat FVal = MO.getFPImm()->getValueAPF();
+      FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+      APInt IVal = FVal.bitcastToAPInt();
+      uint64_t Val = *IVal.getRawData();
+      MCOp = MCOperand::CreateImm(Val);
+      break;
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
new file mode 100644
index 000000000000..92196f220225
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+
+  /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+  /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
+  MCContext &Ctx;
+  Mangler &Mang;
+
+  AsmPrinter &Printer;
+public:
+  MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+    : Ctx(ctx), Mang(mang), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index 1f956c1f90fb..df395094282f 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -14,6 +14,7 @@
 #ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
 #define MBLAZE_MACHINE_FUNCTION_INFO_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -26,20 +27,14 @@ namespace llvm {
 class MBlazeFunctionInfo : public MachineFunctionInfo {
 
 private:
-  /// Holds for each function where on the stack the Frame Pointer must be 
+  /// Holds for each function where on the stack the Frame Pointer must be
   /// saved. This is used on Prologue and Epilogue to emit FP save/restore
   int FPStackOffset;
 
-  /// Holds for each function where on the stack the Return Address must be 
+  /// Holds for each function where on the stack the Return Address must be
   /// saved. This is used on Prologue and Epilogue to emit RA save/restore
   int RAStackOffset;
 
-  /// At each function entry a special bitmask directive must be emitted
-  /// to help in debugging CPU callee saved registers. It needs a negative
-  /// offset from the final stack size and its higher register location on
-  /// the stack.
-  int CPUTopSavedRegOff;
-
   /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
   struct MBlazeFIHolder {
 
@@ -50,25 +45,30 @@ private:
       : FI(FrameIndex), SPOffset(StackPointerOffset) {}
   };
 
-  /// When PIC is used the GP must be saved on the stack on the function 
-  /// prologue and must be reloaded from this stack location after every 
-  /// call. A reference to its stack location and frame index must be kept 
+  /// When PIC is used the GP must be saved on the stack on the function
+  /// prologue and must be reloaded from this stack location after every
+  /// call. A reference to its stack location and frame index must be kept
   /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
   MBlazeFIHolder GPHolder;
 
   /// On LowerFormalArguments the stack size is unknown, so the Stack
-  /// Pointer Offset calculation of "not in register arguments" must be 
-  /// postponed to emitPrologue. 
+  /// Pointer Offset calculation of "not in register arguments" must be
+  /// postponed to emitPrologue.
   SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
   bool HasLoadArgs;
 
-  // When VarArgs, we must write registers back to caller stack, preserving 
-  // on register arguments. Since the stack size is unknown on 
+  // When VarArgs, we must write registers back to caller stack, preserving
+  // on register arguments. Since the stack size is unknown on
   // LowerFormalArguments, the Stack Pointer Offset calculation must be
-  // postponed to emitPrologue. 
+  // postponed to emitPrologue.
   SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
   bool HasStoreVarArgs;
 
+  // When determining the final stack layout some of the frame indexes may
+  // be replaced by new frame indexes that reside in the caller's stack
+  // frame. The replacements are recorded in this structure.
+  DenseMap<int,int> FIReplacements;
+
   /// SRetReturnReg - Some subtargets require that sret lowering includes
   /// returning the value of the returned struct in a register. This field
   /// holds the virtual register into which the sret argument is passed.
@@ -82,11 +82,15 @@ private:
   // VarArgsFrameIndex - FrameIndex for start of varargs area.
   int VarArgsFrameIndex;
 
+  /// LiveInFI - keeps track of the frame indexes in a callers stack
+  /// frame that are live into a function.
+  SmallVector<int, 16> LiveInFI;
+
 public:
-  MBlazeFunctionInfo(MachineFunction& MF) 
-  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
-    GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
-    SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0)
+  MBlazeFunctionInfo(MachineFunction& MF)
+  : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0), LiveInFI()
   {}
 
   int getFPStackOffset() const { return FPStackOffset; }
@@ -95,9 +99,6 @@ public:
   int getRAStackOffset() const { return RAStackOffset; }
   void setRAStackOffset(int Off) { RAStackOffset = Off; }
 
-  int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
-  void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
-
   int getGPStackOffset() const { return GPHolder.SPOffset; }
   int getGPFI() const { return GPHolder.FI; }
   void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
@@ -105,12 +106,38 @@ public:
   bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
 
   bool hasLoadArgs() const { return HasLoadArgs; }
-  bool hasStoreVarArgs() const { return HasStoreVarArgs; } 
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+  void recordLiveIn(int FI) {
+    LiveInFI.push_back(FI);
+  }
+
+  bool isLiveIn(int FI) {
+    for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+      if (FI == LiveInFI[i]) return true;
+
+    return false;
+  }
+
+  const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+  void recordReplacement(int OFI, int NFI) {
+    FIReplacements.insert(std::make_pair(OFI,NFI));
+  }
+
+  bool hasReplacement(int OFI) const {
+    return FIReplacements.find(OFI) != FIReplacements.end();
+  }
+
+  int getReplacement(int OFI) const {
+    return FIReplacements.lookup(OFI);
+  }
 
   void recordLoadArgsFI(int FI, int SPOffset) {
     if (!HasLoadArgs) HasLoadArgs=true;
     FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
   }
+
   void recordStoreVarArgsFI(int FI, int SPOffset) {
     if (!HasStoreVarArgs) HasStoreVarArgs=true;
     FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
@@ -118,13 +145,14 @@ public:
 
   void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
     if (!hasLoadArgs()) return;
-    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) 
-      MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+      MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
   }
+
   void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
-    if (!hasStoreVarArgs()) return; 
-    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) 
-      MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+    if (!hasStoreVarArgs()) return;
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+      MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
   }
 
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 22b6a30470d1..fa9140d7922f 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "mblaze-reg-info"
+#define DEBUG_TYPE "mblaze-frame-info"
 
 #include "MBlaze.h"
 #include "MBlazeSubtarget.h"
@@ -26,7 +26,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -48,38 +48,62 @@ MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
 /// MBlaze::R0, return the number that it corresponds to (e.g. 0).
 unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   switch (RegEnum) {
-    case MBlaze::R0  : case MBlaze::F0  : return 0;
-    case MBlaze::R1  : case MBlaze::F1  : return 1;
-    case MBlaze::R2  : case MBlaze::F2  : return 2;
-    case MBlaze::R3  : case MBlaze::F3  : return 3;
-    case MBlaze::R4  : case MBlaze::F4  : return 4;
-    case MBlaze::R5  : case MBlaze::F5  : return 5;
-    case MBlaze::R6  : case MBlaze::F6  : return 6;
-    case MBlaze::R7  : case MBlaze::F7  : return 7;
-    case MBlaze::R8  : case MBlaze::F8  : return 8;
-    case MBlaze::R9  : case MBlaze::F9  : return 9;
-    case MBlaze::R10 : case MBlaze::F10 : return 10;
-    case MBlaze::R11 : case MBlaze::F11 : return 11;
-    case MBlaze::R12 : case MBlaze::F12 : return 12;
-    case MBlaze::R13 : case MBlaze::F13 : return 13;
-    case MBlaze::R14 : case MBlaze::F14 : return 14;
-    case MBlaze::R15 : case MBlaze::F15 : return 15;
-    case MBlaze::R16 : case MBlaze::F16 : return 16;
-    case MBlaze::R17 : case MBlaze::F17 : return 17;
-    case MBlaze::R18 : case MBlaze::F18 : return 18;
-    case MBlaze::R19 : case MBlaze::F19 : return 19;
-    case MBlaze::R20 : case MBlaze::F20 : return 20;
-    case MBlaze::R21 : case MBlaze::F21 : return 21;
-    case MBlaze::R22 : case MBlaze::F22 : return 22;
-    case MBlaze::R23 : case MBlaze::F23 : return 23;
-    case MBlaze::R24 : case MBlaze::F24 : return 24;
-    case MBlaze::R25 : case MBlaze::F25 : return 25;
-    case MBlaze::R26 : case MBlaze::F26 : return 26;
-    case MBlaze::R27 : case MBlaze::F27 : return 27;
-    case MBlaze::R28 : case MBlaze::F28 : return 28;
-    case MBlaze::R29 : case MBlaze::F29 : return 29;
-    case MBlaze::R30 : case MBlaze::F30 : return 30;
-    case MBlaze::R31 : case MBlaze::F31 : return 31;
+    case MBlaze::R0     : return 0;
+    case MBlaze::R1     : return 1;
+    case MBlaze::R2     : return 2;
+    case MBlaze::R3     : return 3;
+    case MBlaze::R4     : return 4;
+    case MBlaze::R5     : return 5;
+    case MBlaze::R6     : return 6;
+    case MBlaze::R7     : return 7;
+    case MBlaze::R8     : return 8;
+    case MBlaze::R9     : return 9;
+    case MBlaze::R10    : return 10;
+    case MBlaze::R11    : return 11;
+    case MBlaze::R12    : return 12;
+    case MBlaze::R13    : return 13;
+    case MBlaze::R14    : return 14;
+    case MBlaze::R15    : return 15;
+    case MBlaze::R16    : return 16;
+    case MBlaze::R17    : return 17;
+    case MBlaze::R18    : return 18;
+    case MBlaze::R19    : return 19;
+    case MBlaze::R20    : return 20;
+    case MBlaze::R21    : return 21;
+    case MBlaze::R22    : return 22;
+    case MBlaze::R23    : return 23;
+    case MBlaze::R24    : return 24;
+    case MBlaze::R25    : return 25;
+    case MBlaze::R26    : return 26;
+    case MBlaze::R27    : return 27;
+    case MBlaze::R28    : return 28;
+    case MBlaze::R29    : return 29;
+    case MBlaze::R30    : return 30;
+    case MBlaze::R31    : return 31;
+    case MBlaze::RPC    : return 0x0000;
+    case MBlaze::RMSR   : return 0x0001;
+    case MBlaze::REAR   : return 0x0003;
+    case MBlaze::RESR   : return 0x0005;
+    case MBlaze::RFSR   : return 0x0007;
+    case MBlaze::RBTR   : return 0x000B;
+    case MBlaze::REDR   : return 0x000D;
+    case MBlaze::RPID   : return 0x1000;
+    case MBlaze::RZPR   : return 0x1001;
+    case MBlaze::RTLBX  : return 0x1002;
+    case MBlaze::RTLBLO : return 0x1003;
+    case MBlaze::RTLBHI : return 0x1004;
+    case MBlaze::RPVR0  : return 0x2000;
+    case MBlaze::RPVR1  : return 0x2001;
+    case MBlaze::RPVR2  : return 0x2002;
+    case MBlaze::RPVR3  : return 0x2003;
+    case MBlaze::RPVR4  : return 0x2004;
+    case MBlaze::RPVR5  : return 0x2005;
+    case MBlaze::RPVR6  : return 0x2006;
+    case MBlaze::RPVR7  : return 0x2007;
+    case MBlaze::RPVR8  : return 0x2008;
+    case MBlaze::RPVR9  : return 0x2009;
+    case MBlaze::RPVR10 : return 0x200A;
+    case MBlaze::RPVR11 : return 0x200B;
     default: llvm_unreachable("Unknown register number!");
   }
   return 0; // Not reached
@@ -126,6 +150,37 @@ unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
   return 0; // Not reached
 }
 
+unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
+  switch (Reg) {
+    case 0x0000 : return MBlaze::RPC;
+    case 0x0001 : return MBlaze::RMSR;
+    case 0x0003 : return MBlaze::REAR;
+    case 0x0005 : return MBlaze::RESR;
+    case 0x0007 : return MBlaze::RFSR;
+    case 0x000B : return MBlaze::RBTR;
+    case 0x000D : return MBlaze::REDR;
+    case 0x1000 : return MBlaze::RPID;
+    case 0x1001 : return MBlaze::RZPR;
+    case 0x1002 : return MBlaze::RTLBX;
+    case 0x1003 : return MBlaze::RTLBLO;
+    case 0x1004 : return MBlaze::RTLBHI;
+    case 0x2000 : return MBlaze::RPVR0;
+    case 0x2001 : return MBlaze::RPVR1;
+    case 0x2002 : return MBlaze::RPVR2;
+    case 0x2003 : return MBlaze::RPVR3;
+    case 0x2004 : return MBlaze::RPVR4;
+    case 0x2005 : return MBlaze::RPVR5;
+    case 0x2006 : return MBlaze::RPVR6;
+    case 0x2007 : return MBlaze::RPVR7;
+    case 0x2008 : return MBlaze::RPVR8;
+    case 0x2009 : return MBlaze::RPVR9;
+    case 0x200A : return MBlaze::RPVR10;
+    case 0x200B : return MBlaze::RPVR11;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
 unsigned MBlazeRegisterInfo::getPICCallReg() {
   return MBlaze::R20;
 }
@@ -164,77 +219,40 @@ getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-//===----------------------------------------------------------------------===//
-//
-// Stack Frame Processing methods
-// +----------------------------+
-//
-// The stack is allocated decrementing the stack pointer on
-// the first instruction of a function prologue. Once decremented,
-// all stack references are are done through a positive offset
-// from the stack/frame pointer, so the stack is considered
-// to grow up.
-//
-//===----------------------------------------------------------------------===//
-
-void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
-
-  // See the description at MicroBlazeMachineFunction.h
-  int TopCPUSavedRegOff = -1;
-
-  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
-  // be saved in this CPU Area there is the need. This whole Area must
-  // be aligned to the default Stack Alignment requirements.
-  unsigned StackOffset = MFI->getStackSize();
-  unsigned RegSize = 4;
-
-  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
-  // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
-  // the approach done by calculateFrameObjectOffsets to the stack frame.
-  MBlazeFI->adjustLoadArgsFI(MFI);
-  MBlazeFI->adjustStoreVarArgsFI(MFI);
-
-  if (hasFP(MF)) {
-    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
-                         StackOffset);
-    MBlazeFI->setFPStackOffset(StackOffset);
-    TopCPUSavedRegOff = StackOffset;
-    StackOffset += RegSize;
-  }
-
-  if (MFI->adjustsStack()) {
-    MBlazeFI->setRAStackOffset(0);
-    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
-                         StackOffset);
-    TopCPUSavedRegOff = StackOffset;
-    StackOffset += RegSize;
-  }
-
-  // Update frame info
-  MFI->setStackSize(StackOffset);
-
-  // Recalculate the final tops offset. The final values must be '0'
-  // if there isn't a callee saved register for CPU or FPU, otherwise
-  // a negative offset is needed.
-  if (TopCPUSavedRegOff >= 0)
-    MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
-}
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
+// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
 void MBlazeRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have a frame pointer, turn the adjcallstackup instruction into a
+    // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+    // 'addi r1, r1, <amt>'
+    MachineInstr *Old = I;
+    int Amount = Old->getOperand(0).getImm() + 4;
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      MachineInstr *New;
+      if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(-Amount);
+      } else {
+        assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
   MBB.erase(I);
 }
@@ -247,6 +265,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                     RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
 
   unsigned i = 0;
   while (!MI.getOperand(i).isFI()) {
@@ -257,116 +276,33 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
 
   unsigned oi = i == 2 ? 1 : 2;
 
-  DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
-        errs() << "<--------->\n" << MI);
+  DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        dbgs() << "<--------->\n" << MI);
 
   int FrameIndex = MI.getOperand(i).getIndex();
-  int stackSize  = MF.getFrameInfo()->getStackSize();
-  int spOffset   = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  int stackSize  = MFI->getStackSize();
+  int spOffset   = MFI->getObjectOffset(FrameIndex);
 
-  DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+  DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+        dbgs() << "FrameIndex : " << FrameIndex << "\n"
                << "spOffset   : " << spOffset << "\n"
-               << "stackSize  : " << stackSize << "\n");
+               << "stackSize  : " << stackSize << "\n"
+               << "isFixed    : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+               << "isLiveIn   : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+               << "isSpill    : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+               << "\n" );
 
   // as explained on LowerFormalArguments, detect negative offsets
   // and adjust SPOffsets considering the final stack size.
-  int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4);
-  Offset    += MI.getOperand(oi).getImm();
+  int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+  Offset += MI.getOperand(oi).getImm();
 
-  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+  DEBUG(dbgs() << "Offset     : " << Offset << "\n" << "<--------->\n");
 
   MI.getOperand(oi).ChangeToImmediate(Offset);
   MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
 }
 
-void MBlazeRegisterInfo::
-emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB   = MF.front();
-  MachineFrameInfo *MFI    = MF.getFrameInfo();
-  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Get the right frame order for MBlaze.
-  adjustMBlazeStackFrame(MF);
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  unsigned StackSize = MFI->getStackSize();
-
-  // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->adjustsStack()) return;
-  if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
-
-  int FPOffset = MBlazeFI->getFPStackOffset();
-  int RAOffset = MBlazeFI->getRAStackOffset();
-
-  // Adjust stack : addi R1, R1, -imm
-  BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDI), MBlaze::R1)
-      .addReg(MBlaze::R1).addImm(-StackSize);
-
-  // Save the return address only if the function isnt a leaf one.
-  // swi  R15, R1, stack_loc
-  if (MFI->adjustsStack()) {
-    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
-        .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
-  }
-
-  // if framepointer enabled, save it and set it
-  // to point to the stack pointer
-  if (hasFP(MF)) {
-    // swi  R19, R1, stack_loc
-    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
-      .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1);
-
-    // add R19, R1, R0
-    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
-      .addReg(MBlaze::R1).addReg(MBlaze::R0);
-  }
-}
-
-void MBlazeRegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  MachineFrameInfo *MFI            = MF.getFrameInfo();
-  MBlazeFunctionInfo *MBlazeFI         = MF.getInfo<MBlazeFunctionInfo>();
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  // Get the FI's where RA and FP are saved.
-  int FPOffset = MBlazeFI->getFPStackOffset();
-  int RAOffset = MBlazeFI->getRAStackOffset();
-
-  // if framepointer enabled, restore it and restore the
-  // stack pointer
-  if (hasFP(MF)) {
-    // add R1, R19, R0
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
-      .addReg(MBlaze::R19).addReg(MBlaze::R0);
-
-    // lwi  R19, R1, stack_loc
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
-      .addImm(FPOffset).addReg(MBlaze::R1);
-  }
-
-  // Restore the return address only if the function isnt a leaf one.
-  // lwi R15, R1, stack_loc
-  if (MFI->adjustsStack()) {
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
-      .addImm(RAOffset).addReg(MBlaze::R1);
-  }
-
-  // Get the number of bytes from FrameInfo
-  int StackSize = (int) MFI->getStackSize();
-  if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
-
-  // adjust stack.
-  // addi R1, R1, imm
-  if (StackSize) {
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
-      .addReg(MBlaze::R1).addImm(StackSize);
-  }
-}
-
-
 void MBlazeRegisterInfo::
 processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
   // Set the stack offset where GP must be saved/loaded from.
@@ -381,7 +317,9 @@ unsigned MBlazeRegisterInfo::getRARegister() const {
 }
 
 unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
 }
 
 unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
@@ -394,9 +332,8 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
   return 0;
 }
 
-int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  llvm_unreachable("What is the dwarf register number");
-  return -1;
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+  return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
 }
 
 #include "MBlazeGenRegisterInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 1e1fde14ab7b..839536d4e7b5 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -25,8 +25,8 @@ class TargetInstrInfo;
 class Type;
 
 namespace MBlaze {
-  /// SubregIndex - The index of various sized subregister classes. Note that 
-  /// these indices must be kept in sync with the class indices in the 
+  /// SubregIndex - The index of various sized subregister classes. Note that
+  /// these indices must be kept in sync with the class indices in the
   /// MBlazeRegisterInfo.td file.
   enum SubregIndex {
     SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
@@ -36,7 +36,7 @@ namespace MBlaze {
 struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
   const MBlazeSubtarget &Subtarget;
   const TargetInstrInfo &TII;
-  
+
   MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
                      const TargetInstrInfo &tii);
 
@@ -44,20 +44,16 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
   /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
   static unsigned getRegisterNumbering(unsigned RegEnum);
   static unsigned getRegisterFromNumbering(unsigned RegEnum);
+  static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
 
   /// Get PIC indirect call register
   static unsigned getPICCallReg();
 
-  /// Adjust the MBlaze stack frame.
-  void adjustMBlazeStackFrame(MachineFunction &MF) const;
-
   /// Code Generation virtual methods...
   const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -68,9 +64,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  
   /// Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -79,11 +72,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
   unsigned getEHExceptionRegister() const;
   unsigned getEHHandlerRegister() const;
 
-  /// targetHandlesStackFrameRounding - Returns true if the target is
-  /// responsible for rounding up the stack frame (probably at emitPrologue
-  /// time).
-  bool targetHandlesStackFrameRounding() const { return true; }
-
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
 };
 
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 5e935103389e..fbefb22e9f25 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -17,15 +17,20 @@ class MBlazeReg<string n> : Register<n> {
   let Namespace = "MBlaze";
 }
 
-// MBlaze CPU Registers
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+  field bits<15> Num;
+  let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
 class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
   let Num = num;
 }
 
-// MBlaze 32-bit (aliased) FPU Registers
-class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
   let Num = num;
-  let Aliases = aliases;
 }
 
 //===----------------------------------------------------------------------===//
@@ -33,7 +38,6 @@ class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
 //===----------------------------------------------------------------------===//
 
 let Namespace = "MBlaze" in {
-
   // General Purpose Registers
   def R0  : MBlazeGPRReg< 0,  "r0">,   DwarfRegNum<[0]>;
   def R1  : MBlazeGPRReg< 1,  "r1">,   DwarfRegNum<[1]>;
@@ -68,46 +72,43 @@ let Namespace = "MBlaze" in {
   def R30 : MBlazeGPRReg< 30, "r30">,  DwarfRegNum<[30]>;
   def R31 : MBlazeGPRReg< 31, "r31">,  DwarfRegNum<[31]>;
 
-  /// MBlaze Single point precision FPU Registers
-  def F0  : FPR< 0,  "r0", [R0]>,  DwarfRegNum<[32]>;
-  def F1  : FPR< 1,  "r1", [R1]>,  DwarfRegNum<[33]>;
-  def F2  : FPR< 2,  "r2", [R2]>,  DwarfRegNum<[34]>;
-  def F3  : FPR< 3,  "r3", [R3]>,  DwarfRegNum<[35]>;
-  def F4  : FPR< 4,  "r4", [R4]>,  DwarfRegNum<[36]>;
-  def F5  : FPR< 5,  "r5", [R5]>,  DwarfRegNum<[37]>;
-  def F6  : FPR< 6,  "r6", [R6]>,  DwarfRegNum<[38]>;
-  def F7  : FPR< 7,  "r7", [R7]>,  DwarfRegNum<[39]>;
-  def F8  : FPR< 8,  "r8", [R8]>,  DwarfRegNum<[40]>;
-  def F9  : FPR< 9,  "r9", [R9]>,  DwarfRegNum<[41]>;
-  def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>;
-  def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>;
-  def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>;
-  def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>;
-  def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>;
-  def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>;
-  def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>;
-  def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>;
-  def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>;
-  def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>;
-  def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>;
-  def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>;
-  def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>;
-  def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>;
-  def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>;
-  def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>;
-  def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>;
-  def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>;
-  def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>;
-  def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>;
-  def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>;
-  def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>;
+  // Special Purpose Registers
+  def RPC    : MBlazeSPRReg<0x0000, "rpc">,    DwarfRegNum<[32]>;
+  def RMSR   : MBlazeSPRReg<0x0001, "rmsr">,   DwarfRegNum<[33]>;
+  def REAR   : MBlazeSPRReg<0x0003, "rear">,   DwarfRegNum<[34]>;
+  def RESR   : MBlazeSPRReg<0x0005, "resr">,   DwarfRegNum<[35]>;
+  def RFSR   : MBlazeSPRReg<0x0007, "rfsr">,   DwarfRegNum<[36]>;
+  def RBTR   : MBlazeSPRReg<0x000B, "rbtr">,   DwarfRegNum<[37]>;
+  def REDR   : MBlazeSPRReg<0x000D, "redr">,   DwarfRegNum<[38]>;
+  def RPID   : MBlazeSPRReg<0x1000, "rpid">,   DwarfRegNum<[39]>;
+  def RZPR   : MBlazeSPRReg<0x1001, "rzpr">,   DwarfRegNum<[40]>;
+  def RTLBX  : MBlazeSPRReg<0x1002, "rtlbx">,  DwarfRegNum<[41]>;
+  def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+  def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+  def RPVR0  : MBlazeSPRReg<0x2000, "rpvr0">,  DwarfRegNum<[44]>;
+  def RPVR1  : MBlazeSPRReg<0x2001, "rpvr1">,  DwarfRegNum<[45]>;
+  def RPVR2  : MBlazeSPRReg<0x2002, "rpvr2">,  DwarfRegNum<[46]>;
+  def RPVR3  : MBlazeSPRReg<0x2003, "rpvr3">,  DwarfRegNum<[47]>;
+  def RPVR4  : MBlazeSPRReg<0x2004, "rpvr4">,  DwarfRegNum<[48]>;
+  def RPVR5  : MBlazeSPRReg<0x2005, "rpvr5">,  DwarfRegNum<[49]>;
+  def RPVR6  : MBlazeSPRReg<0x2006, "rpvr6">,  DwarfRegNum<[50]>;
+  def RPVR7  : MBlazeSPRReg<0x2007, "rpvr7">,  DwarfRegNum<[51]>;
+  def RPVR8  : MBlazeSPRReg<0x2008, "rpvr8">,  DwarfRegNum<[52]>;
+  def RPVR9  : MBlazeSPRReg<0x2009, "rpvr9">,  DwarfRegNum<[53]>;
+  def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>;
+  def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>;
+
+  // The carry bit. In the Microblaze this is really bit 29 of the
+  // MSR register but this is the only bit of that register that we
+  // are interested in modeling.
+  def CARRY  : MBlazeSPRReg<0x0000, "rmsr[c]">, DwarfRegNum<[33]>;
 }
 
 //===----------------------------------------------------------------------===//
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def CPURegs : RegisterClass<"MBlaze", [i32], 32,
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32,
   [
   // Return Values and Arguments
   R3, R4, R5, R6, R7, R8, R9, R10,
@@ -135,46 +136,55 @@ def CPURegs : RegisterClass<"MBlaze", [i32], 32,
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    CPURegsClass::iterator
-    CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+    GPRClass::iterator
+    GPRClass::allocation_order_end(const MachineFunction &MF) const {
       // The last 10 registers on the list above are reserved
       return end()-10;
     }
   }];
 }
 
-def FGR32 : RegisterClass<"MBlaze", [f32], 32,
+def SPR : RegisterClass<"MBlaze", [i32], 32,
   [
-  // Return Values and Arguments
-  F3, F4, F5, F6, F7, F8, F9, F10,
-
-  // Not preserved across procedure calls
-  F11, F12,
-
-  // Callee save
-  F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
-
   // Reserved
-  F0,  // Always zero
-  F1,  // The stack pointer
-  F2,  // Read-only small data area anchor
-  F13, // Read-write small data area anchor
-  F14, // Return address for interrupts
-  F15, // Return address for sub-routines
-  F16, // Return address for trap
-  F17, // Return address for exceptions
-  F18, // Reserved for assembler
-  F19  // The frame pointer
+  RPC,
+  RMSR,
+  REAR,
+  RESR,
+  RFSR,
+  RBTR,
+  REDR,
+  RPID,
+  RZPR,
+  RTLBX,
+  RTLBLO,
+  RTLBHI,
+  RPVR0,
+  RPVR1,
+  RPVR2,
+  RPVR3,
+  RPVR4,
+  RPVR5,
+  RPVR6,
+  RPVR7,
+  RPVR8,
+  RPVR9,
+  RPVR10,
+  RPVR11
   ]>
 {
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
   let MethodBodies = [{
-    FGR32Class::iterator
-    FGR32Class::allocation_order_end(const MachineFunction &MF) const {
-      // The last 10 registers on the list above are reserved
-      return end()-10;
+    SPRClass::iterator
+    SPRClass::allocation_order_end(const MachineFunction &MF) const {
+      // None of the special purpose registers are allocatable.
+      return end()-24;
     }
   }];
 }
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> {
+  let CopyCost = -1;
+}
diff --git a/lib/Target/MBlaze/MBlazeRelocations.h b/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 000000000000..c298eda2195f
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace MBlaze {
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+
+      /// reloc_absolute_word - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_word = 2,
+
+      /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+      /// value to the value already in memory. In object files, it represents a
+      /// value which must be sign-extended when resolving the relocation.
+      reloc_absolute_word_sext = 3,
+
+      /// reloc_absolute_dword - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_dword = 4
+    };
+  }
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 4a65542a447c..ac4d98c9240e 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -14,7 +14,7 @@ def ALU     : FuncUnit;
 def IMULDIV : FuncUnit;
 
 //===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for MBlaze 
+// Instruction Itinerary classes used for MBlaze
 //===----------------------------------------------------------------------===//
 def IIAlu              : InstrItinClass;
 def IILoad             : InstrItinClass;
@@ -41,7 +41,7 @@ def IIPseudo           : InstrItinClass;
 // MBlaze Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
 def MBlazeGenericItineraries : ProcessorItineraries<
-  [ALU, IMULDIV], [
+  [ALU, IMULDIV], [], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 425295340d4f..cd949e1998de 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -15,13 +15,62 @@
 #include "MBlazeMCAsmInfo.h"
 #include "MBlazeTargetMachine.h"
 #include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+    return new MBlazeMCAsmInfo();
+  }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+    return NULL;
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    llvm_unreachable("MBlaze does not support Windows COFF format");
+    return NULL;
+  default:
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll,
+                             NoExecStack);
+  }
+}
+
+
 extern "C" void LLVMInitializeMBlazeTarget() {
   // Register the target.
   RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
-  RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget);
+
+  // Register the target asm info.
+  RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo);
+
+  // Register the MC code emitter
+  TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
+                                      llvm::createMBlazeMCCodeEmitter);
+
+  // Register the asm backend
+  TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
+                                     createMBlazeAsmBackend);
+
+  // Register the object streamer
+  TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
+                                         createMCStreamer);
+
 }
 
 // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -35,11 +84,10 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
                     const std::string &FS):
   LLVMTargetMachine(T, TT),
   Subtarget(TT, FS),
-  DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-"
-             "f64:32:32-v64:32:32-v128:32:32-n32"),
+  DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
   InstrInfo(*this),
-  FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
-  TLInfo(*this), TSInfo(*this) {
+  FrameLowering(Subtarget),
+  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) {
   if (getRelocationModel() == Reloc::Default) {
       setRelocationModel(Reloc::Static);
   }
@@ -50,8 +98,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
 
 // Install an instruction selector pass using
 // the ISelDag to gen MBlaze code.
-bool MBlazeTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
   PM.add(createMBlazeISelDag(*this));
   return false;
 }
@@ -59,8 +107,8 @@ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
-bool MBlazeTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
   PM.add(createMBlazeDelaySlotFillerPass(*this));
   return true;
 }
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 6a57e5890aa4..45ad07858887 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -19,21 +19,25 @@
 #include "MBlazeISelLowering.h"
 #include "MBlazeSelectionDAGInfo.h"
 #include "MBlazeIntrinsicInfo.h"
+#include "MBlazeFrameLowering.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
   class formatted_raw_ostream;
 
   class MBlazeTargetMachine : public LLVMTargetMachine {
-    MBlazeSubtarget       Subtarget;
-    const TargetData    DataLayout; // Calculates type size & alignment
-    MBlazeInstrInfo       InstrInfo;
-    TargetFrameInfo     FrameInfo;
-    MBlazeTargetLowering  TLInfo;
+    MBlazeSubtarget        Subtarget;
+    const TargetData       DataLayout; // Calculates type size & alignment
+    MBlazeInstrInfo        InstrInfo;
+    MBlazeFrameLowering    FrameLowering;
+    MBlazeTargetLowering   TLInfo;
     MBlazeSelectionDAGInfo TSInfo;
-    MBlazeIntrinsicInfo IntrinsicInfo;
+    MBlazeIntrinsicInfo    IntrinsicInfo;
+    MBlazeELFWriterInfo    ELFWriterInfo;
   public:
     MBlazeTargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS);
@@ -41,8 +45,8 @@ namespace llvm {
     virtual const MBlazeInstrInfo *getInstrInfo() const
     { return &InstrInfo; }
 
-    virtual const TargetFrameInfo *getFrameInfo() const
-    { return &FrameInfo; }
+    virtual const TargetFrameLowering *getFrameLowering() const
+    { return &FrameLowering; }
 
     virtual const MBlazeSubtarget *getSubtargetImpl() const
     { return &Subtarget; }
@@ -62,12 +66,13 @@ namespace llvm {
     const TargetIntrinsicInfo *getIntrinsicInfo() const
     { return &IntrinsicInfo; }
 
-    // Pass Pipeline Configuration
-    virtual bool addInstSelector(PassManagerBase &PM,
-                                 CodeGenOpt::Level OptLevel);
+    virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+      return &ELFWriterInfo;
+    }
 
-    virtual bool addPreEmitPass(PassManagerBase &PM,
-                                CodeGenOpt::Level OptLevel);
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
+    virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
   };
 } // End llvm namespace
 
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 05c01ef7a5d9..abd1b0b62c7d 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 void MBlazeTargetObjectFile::
@@ -23,13 +24,13 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
   SmallDataSection =
-    getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getDataRel());
 
   SmallBSSSection =
-    getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getBSS());
 
 }
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
index 20e77026c687..c313722427db 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.h
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -18,10 +18,9 @@ namespace llvm {
     const MCSection *SmallDataSection;
     const MCSection *SmallBSSSection;
   public:
-    
+
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
-    
     /// IsGlobalInSmallSection - Return true if this global address should be
     /// placed into small data/bss section.
     bool IsGlobalInSmallSection(const GlobalValue *GV,
@@ -29,8 +28,8 @@ namespace llvm {
                                 SectionKind Kind) const;
 
     bool IsGlobalInSmallSection(const GlobalValue *GV,
-                                const TargetMachine &TM) const;  
-    
+                                const TargetMachine &TM) const;
+
     const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
                                             SectionKind Kind,
                                             Mangler *Mang,
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
index 19e508c532a6..e01c60bb8c65 100644
--- a/lib/Target/MBlaze/Makefile
+++ b/lib/Target/MBlaze/Makefile
@@ -12,12 +12,14 @@ TARGET = MBlaze
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
-                MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
-                MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
-                MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \
-                MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc
+								MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
+								MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
+								MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
+								MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
+								MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \
+								MBlazeGenEDInfo.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/MBlaze/TODO b/lib/Target/MBlaze/TODO
new file mode 100644
index 000000000000..2e613eb0ca0f
--- /dev/null
+++ b/lib/Target/MBlaze/TODO
@@ -0,0 +1,26 @@
+* Writing out ELF files is close to working but the following needs to
+  be examined more closely:
+    - Relocations use 2-byte / 4-byte to terminology in reference to
+      the size of the immediate value being changed. The Xilinx
+      terminology seems to be (???) 4-byte / 8-byte in reference
+      to the number of bytes of instructions that are being changed.
+
+* Code generation seems to work relatively well now but the following
+  needs to be examined more closely:
+    - The stack layout needs to be examined to make sure it meets
+      the standard, especially in regards to var arg functions.
+    - The processor itineraries are copied from a different backend
+      and need to be updated to model the MicroBlaze correctly.
+    - Look at the MBlazeGenFastISel.inc stuff and make use of it
+      if appropriate.
+
+* A basic assembly parser is present now and seems to parse most things.
+  There are a few things that need to be looked at:
+    - There are some instructions that are not generated by the backend
+      and have not been tested as far as the parser is concerned.
+    - The assembly parser does not use any MicroBlaze specific directives.
+      I should investigate if there are MicroBlaze specific directive and,
+      if there are, add them.
+    - The instruction MFS and MTS use special names for some of the
+      special registers that can be accessed. These special register
+      names should be parsed by the assembly parser.
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 5afb14d09a55..40696f63c462 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -1,4 +1,5 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMMBlazeInfo
   MBlazeTargetInfo.cpp
diff --git a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 4b1f4e6ff269..000000000000
--- a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMSP430AsmPrinter
-  MSP430AsmPrinter.cpp
-  MSP430InstPrinter.cpp
-  MSP430MCInstLower.cpp
-  )
-add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
deleted file mode 100644
index 56f72bbc1474..000000000000
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the MSP430 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "MSP430.h"
-#include "MSP430InstrInfo.h"
-#include "MSP430InstPrinter.h"
-#include "MSP430MCAsmInfo.h"
-#include "MSP430MCInstLower.h"
-#include "MSP430TargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class MSP430AsmPrinter : public AsmPrinter {
-  public:
-    MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "MSP430 Assembly Printer";
-    }
-
-    void printOperand(const MachineInstr *MI, int OpNum,
-                      raw_ostream &O, const char* Modifier = 0);
-    void printSrcMemOperand(const MachineInstr *MI, int OpNum,
-                            raw_ostream &O);
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI,
-                               unsigned OpNo, unsigned AsmVariant,
-                               const char *ExtraCode, raw_ostream &O);
-    void EmitInstruction(const MachineInstr *MI);
-  };
-} // end of anonymous namespace
-
-
-void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
-                                    raw_ostream &O, const char *Modifier) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  switch (MO.getType()) {
-  default: assert(0 && "Not implemented yet!");
-  case MachineOperand::MO_Register:
-    O << MSP430InstPrinter::getRegisterName(MO.getReg());
-    return;
-  case MachineOperand::MO_Immediate:
-    if (!Modifier || strcmp(Modifier, "nohash"))
-      O << '#';
-    O << MO.getImm();
-    return;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_GlobalAddress: {
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
-    uint64_t Offset = MO.getOffset();
-
-    // If the global address expression is a part of displacement field with a
-    // register base, we should not emit any prefix symbol here, e.g.
-    //   mov.w &foo, r1
-    // vs
-    //   mov.w glb(r1), r2
-    // Otherwise (!) msp430-as will silently miscompile the output :(
-    if (!Modifier || strcmp(Modifier, "nohash"))
-      O << (isMemOp ? '&' : '#');
-    if (Offset)
-      O << '(' << Offset << '+';
-
-    O << *Mang->getSymbol(MO.getGlobal());
-
-    if (Offset)
-      O << ')';
-
-    return;
-  }
-  case MachineOperand::MO_ExternalSymbol: {
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
-    O << (isMemOp ? '&' : '#');
-    O << MAI->getGlobalPrefix() << MO.getSymbolName();
-    return;
-  }
-  }
-}
-
-void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
-                                          raw_ostream &O) {
-  const MachineOperand &Base = MI->getOperand(OpNum);
-  const MachineOperand &Disp = MI->getOperand(OpNum+1);
-
-  // Print displacement first
-
-  // Imm here is in fact global address - print extra modifier.
-  if (Disp.isImm() && !Base.getReg())
-    O << '&';
-  printOperand(MI, OpNum+1, O, "nohash");
-
-  // Print register base field
-  if (Base.getReg()) {
-    O << '(';
-    printOperand(MI, OpNum, O);
-    O << ')';
-  }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                       unsigned AsmVariant,
-                                       const char *ExtraCode, raw_ostream &O) {
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                             unsigned OpNo, unsigned AsmVariant,
-                                             const char *ExtraCode,
-                                             raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0]) {
-    return true; // Unknown modifier.
-  }
-  printSrcMemOperand(MI, OpNo, O);
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
-
-  MCInst TmpInst;
-  MCInstLowering.Lower(MI, TmpInst);
-  OutStreamer.EmitInstruction(TmpInst);
-}
-
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
-                                                unsigned SyntaxVariant,
-                                                const MCAsmInfo &MAI) {
-  if (SyntaxVariant == 0)
-    return new MSP430InstPrinter(MAI);
-  return 0;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeMSP430AsmPrinter() {
-  RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
-  TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
-                                        createMSP430MCInstPrinter);
-}
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
deleted file mode 100644
index c15d4085bc8b..000000000000
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an MSP430 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "MSP430.h"
-#include "MSP430InstrInfo.h"
-#include "MSP430InstPrinter.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-using namespace llvm;
-
-
-// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
-#include "MSP430GenAsmWriter.inc"
-#undef MachineInstr
-
-void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
-  printInstruction(MI, O);
-}
-
-void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm())
-    O << Op.getImm();
-  else {
-    assert(Op.isExpr() && "unknown pcrel immediate operand");
-    O << *Op.getExpr();
-  }
-}
-
-void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                     raw_ostream &O, const char *Modifier) {
-  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    O << getRegisterName(Op.getReg());
-  } else if (Op.isImm()) {
-    O << '#' << Op.getImm();
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << '#' << *Op.getExpr();
-  }
-}
-
-void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  const MCOperand &Base = MI->getOperand(OpNo);
-  const MCOperand &Disp = MI->getOperand(OpNo+1);
-
-  // Print displacement first
-
-  // If the global address expression is a part of displacement field with a
-  // register base, we should not emit any prefix symbol here, e.g.
-  //   mov.w &foo, r1
-  // vs
-  //   mov.w glb(r1), r2
-  // Otherwise (!) msp430-as will silently miscompile the output :(
-  if (!Base.getReg())
-    O << '&';
-
-  if (Disp.isExpr())
-    O << *Disp.getExpr();
-  else {
-    assert(Disp.isImm() && "Expected immediate in displacement field");
-    O << Disp.getImm();
-  }
-
-  // Print register base field
-  if (Base.getReg())
-    O << '(' << getRegisterName(Base.getReg()) << ')';
-}
-
-void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned CC = MI->getOperand(OpNo).getImm();
-
-  switch (CC) {
-  default:
-   llvm_unreachable("Unsupported CC code");
-   break;
-  case MSP430CC::COND_E:
-   O << "eq";
-   break;
-  case MSP430CC::COND_NE:
-   O << "ne";
-   break;
-  case MSP430CC::COND_HS:
-   O << "hs";
-   break;
-  case MSP430CC::COND_LO:
-   O << "lo";
-   break;
-  case MSP430CC::COND_GE:
-   O << "ge";
-   break;
-  case MSP430CC::COND_L:
-   O << 'l';
-   break;
-  }
-}
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h
deleted file mode 100644
index f0e1ce22841b..000000000000
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a MSP430 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MSP430INSTPRINTER_H
-#define MSP430INSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-  class MCOperand;
-
-  class MSP430InstPrinter : public MCInstPrinter {
-  public:
-    MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
-    }
-
-    virtual void printInst(const MCInst *MI, raw_ostream &O);
-
-    // Autogenerated by tblgen.
-    void printInstruction(const MCInst *MI, raw_ostream &O);
-    static const char *getRegisterName(unsigned RegNo);
-
-    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                      const char *Modifier = 0);
-    void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-    void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                            const char *Modifier = 0);
-    void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  };
-}
-
-#endif
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
deleted file mode 100644
index d1d9a1158635..000000000000
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower MSP430 MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSP430MCInstLower.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-MCSymbol *MSP430MCInstLower::
-GetGlobalAddressSymbol(const MachineOperand &MO) const {
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case 0: break;
-  }
-
-  return Printer.Mang->getSymbol(MO.getGlobal());
-}
-
-MCSymbol *MSP430MCInstLower::
-GetExternalSymbolSymbol(const MachineOperand &MO) const {
-  switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
-  case 0: break;
-  }
-
-  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
-}
-
-MCSymbol *MSP430MCInstLower::
-GetJumpTableSymbol(const MachineOperand &MO) const {
-  SmallString<256> Name;
-  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
-                            << Printer.getFunctionNumber() << '_'
-                            << MO.getIndex();
-
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case 0: break;
-  }
-
-  // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
-
-MCSymbol *MSP430MCInstLower::
-GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
-  SmallString<256> Name;
-  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
-                            << Printer.getFunctionNumber() << '_'
-                            << MO.getIndex();
-
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case 0: break;
-  }
-
-  // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
-}
-
-MCSymbol *MSP430MCInstLower::
-GetBlockAddressSymbol(const MachineOperand &MO) const {
-  switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
-  case 0: break;
-  }
-
-  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
-}
-
-MCOperand MSP430MCInstLower::
-LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
-  // FIXME: We would like an efficient form for this, so we don't have to do a
-  // lot of extra uniquing.
-  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-
-  switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case 0: break;
-  }
-
-  if (!MO.isJTI() && MO.getOffset())
-    Expr = MCBinaryExpr::CreateAdd(Expr,
-                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
-                                   Ctx);
-  return MCOperand::CreateExpr(Expr);
-}
-
-void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
-  OutMI.setOpcode(MI->getOpcode());
-
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-
-    MCOperand MCOp;
-    switch (MO.getType()) {
-    default:
-      MI->dump();
-      assert(0 && "unknown operand type");
-    case MachineOperand::MO_Register:
-      // Ignore all implicit register operands.
-      if (MO.isImplicit()) continue;
-      MCOp = MCOperand::CreateReg(MO.getReg());
-      break;
-    case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
-      break;
-    case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                         MO.getMBB()->getSymbol(), Ctx));
-      break;
-    case MachineOperand::MO_GlobalAddress:
-      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
-      break;
-    case MachineOperand::MO_ExternalSymbol:
-      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
-      break;
-    case MachineOperand::MO_JumpTableIndex:
-      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
-      break;
-    case MachineOperand::MO_ConstantPoolIndex:
-      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
-      break;
-    case MachineOperand::MO_BlockAddress:
-      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
-    }
-
-    OutMI.addOperand(MCOp);
-  }
-}
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
deleted file mode 100644
index e937696406fe..000000000000
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MSP430_MCINSTLOWER_H
-#define MSP430_MCINSTLOWER_H
-
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-  class AsmPrinter;
-  class MCAsmInfo;
-  class MCContext;
-  class MCInst;
-  class MCOperand;
-  class MCSymbol;
-  class MachineInstr;
-  class MachineModuleInfoMachO;
-  class MachineOperand;
-  class Mangler;
-
-  /// MSP430MCInstLower - This class is used to lower an MachineInstr
-  /// into an MCInst.
-class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
-  MCContext &Ctx;
-  Mangler &Mang;
-
-  AsmPrinter &Printer;
-public:
-  MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
-    : Ctx(ctx), Mang(mang), Printer(printer) {}
-  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
-  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
-};
-
-}
-
-#endif
diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile
deleted file mode 100644
index a5293ab8a234..000000000000
--- a/lib/Target/MSP430/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMSP430AsmPrinter
-
-# Hack: we need to include 'main' MSP430 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index a3f60d2a44f1..2c7cbb64418f 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -15,11 +15,15 @@ add_llvm_target(MSP430CodeGen
   MSP430ISelDAGToDAG.cpp
   MSP430ISelLowering.cpp
   MSP430InstrInfo.cpp
+  MSP430FrameLowering.cpp
   MSP430MCAsmInfo.cpp
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
   MSP430SelectionDAGInfo.cpp
+  MSP430AsmPrinter.cpp
+  MSP430MCInstLower.cpp
   )
 
-target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..f5458d59a821
--- /dev/null
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430AsmPrinter
+  MSP430InstPrinter.cpp
+  )
+add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
new file mode 100644
index 000000000000..e10d4fe7ca16
--- /dev/null
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '#' << *Op.getExpr();
+  }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+  // Print displacement first
+
+  // If the global address expression is a part of displacement field with a
+  // register base, we should not emit any prefix symbol here, e.g.
+  //   mov.w &foo, r1
+  // vs
+  //   mov.w glb(r1), r2
+  // Otherwise (!) msp430-as will silently miscompile the output :(
+  if (!Base.getReg())
+    O << '&';
+
+  if (Disp.isExpr())
+    O << *Disp.getExpr();
+  else {
+    assert(Disp.isImm() && "Expected immediate in displacement field");
+    O << Disp.getImm();
+  }
+
+  // Print register base field
+  if (Base.getReg())
+    O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned CC = MI->getOperand(OpNo).getImm();
+
+  switch (CC) {
+  default:
+   llvm_unreachable("Unsupported CC code");
+   break;
+  case MSP430CC::COND_E:
+   O << "eq";
+   break;
+  case MSP430CC::COND_NE:
+   O << "ne";
+   break;
+  case MSP430CC::COND_HS:
+   O << "hs";
+   break;
+  case MSP430CC::COND_LO:
+   O << "lo";
+   break;
+  case MSP430CC::COND_GE:
+   O << "ge";
+   break;
+  case MSP430CC::COND_L:
+   O << 'l';
+   break;
+  }
+}
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
new file mode 100644
index 000000000000..f0e1ce22841b
--- /dev/null
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430INSTPRINTER_H
+#define MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class MSP430InstPrinter : public MCInstPrinter {
+  public:
+    MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
+    }
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = 0);
+    void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+    void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                            const char *Modifier = 0);
+    void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  };
+}
+
+#endif
diff --git a/lib/Target/MSP430/InstPrinter/Makefile b/lib/Target/MSP430/InstPrinter/Makefile
new file mode 100644
index 000000000000..a5293ab8a234
--- /dev/null
+++ b/lib/Target/MSP430/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430AsmPrinter
+
+# Hack: we need to include 'main' MSP430 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 0f08e3d8ca0f..5cc5e6e3d7c9 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -52,6 +52,7 @@ def MSP430InstrInfo : InstrInfo;
 
 def MSP430InstPrinter : AsmWriter {
   string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 000000000000..a1a7f44c19c4
--- /dev/null
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,179 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCAsmInfo.h"
+#include "MSP430MCInstLower.h"
+#include "MSP430TargetMachine.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class MSP430AsmPrinter : public AsmPrinter {
+  public:
+    MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "MSP430 Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum,
+                      raw_ostream &O, const char* Modifier = 0);
+    void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                            raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode, raw_ostream &O);
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                    raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Not implemented yet!");
+  case MachineOperand::MO_Register:
+    O << MSP430InstPrinter::getRegisterName(MO.getReg());
+    return;
+  case MachineOperand::MO_Immediate:
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << '#';
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    uint64_t Offset = MO.getOffset();
+
+    // If the global address expression is a part of displacement field with a
+    // register base, we should not emit any prefix symbol here, e.g.
+    //   mov.w &foo, r1
+    // vs
+    //   mov.w glb(r1), r2
+    // Otherwise (!) msp430-as will silently miscompile the output :(
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << (isMemOp ? '&' : '#');
+    if (Offset)
+      O << '(' << Offset << '+';
+
+    O << *Mang->getSymbol(MO.getGlobal());
+
+    if (Offset)
+      O << ')';
+
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    O << (isMemOp ? '&' : '#');
+    O << MAI->getGlobalPrefix() << MO.getSymbolName();
+    return;
+  }
+  }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                                          raw_ostream &O) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+  // Print displacement first
+
+  // Imm here is in fact global address - print extra modifier.
+  if (Disp.isImm() && !Base.getReg())
+    O << '&';
+  printOperand(MI, OpNum+1, O, "nohash");
+
+  // Print register base field
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    O << ')';
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                             unsigned OpNo, unsigned AsmVariant,
+                                             const char *ExtraCode,
+                                             raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    return true; // Unknown modifier.
+  }
+  printSrcMemOperand(MI, OpNo, O);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new MSP430InstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+  RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+  TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+                                        createMSP430MCInstPrinter);
+}
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 000000000000..c99f4ab6c2f9
--- /dev/null
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,223 @@
+//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  return (DisableFramePointerElim(MF) ||
+          MF.getFrameInfo()->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  uint64_t StackSize = MFI->getStackSize();
+
+  uint64_t NumBytes = 0;
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment
+    uint64_t FrameSize = StackSize - 2;
+    NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+    // Get the offset of the stack slot for the EBP register... which is
+    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+    // Update the frame offset adjustment.
+    MFI->setOffsetAdjustment(-NumBytes);
+
+    // Save FPW into the appropriate stack slot...
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+      .addReg(MSP430::FPW, RegState::Kill);
+
+    // Update FPW with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+      .addReg(MSP430::SPW);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(MSP430::FPW);
+
+  } else
+    NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+    // If there is an SUB16ri of SPW immediately before this instruction, merge
+    // the two.
+    //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+    // If there is an ADD16ri or SUB16ri of SPW immediately after this
+    // instruction, merge the two instructions.
+    // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+    if (NumBytes) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(NumBytes);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  switch (RetOpcode) {
+  case MSP430::RET:
+  case MSP430::RETI: break;  // These are ok
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  uint64_t StackSize = MFI->getStackSize();
+  unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = 0;
+
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment
+    uint64_t FrameSize = StackSize - 2;
+    NumBytes = FrameSize - CSSize;
+
+    // pop FPW.
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+  } else
+    NumBytes = StackSize - CSSize;
+
+  // Skip the callee-saved pop instructions.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    unsigned Opc = PI->getOpcode();
+    if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+      break;
+    --MBBI;
+  }
+
+  DL = MBBI->getDebugLoc();
+
+  // If there is an ADD16ri or SUB16ri of SPW immediately before this
+  // instruction, merge the two instructions.
+  //if (NumBytes || MFI->hasVarSizedObjects())
+  //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+  if (MFI->hasVarSizedObjects()) {
+    BuildMI(MBB, MBBI, DL,
+            TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+    if (CSSize) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(CSSize);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  } else {
+    // adjust stack pointer back: SPW += numbytes
+    if (NumBytes) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(NumBytes);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+  MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+      .addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+  return true;
+}
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 000000000000..b636827da7b0
--- /dev/null
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,53 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+  const MSP430Subtarget &STI;
+
+public:
+  explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 3395e9fc3437..5430d433b650 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -60,15 +60,6 @@ namespace {
       return GV != 0 || CP != 0 || ES != 0 || JT != -1;
     }
 
-    bool hasBaseReg() const {
-      return Base.Reg.getNode() != 0;
-    }
-
-    void setBaseReg(SDValue Reg) {
-      BaseType = RegBase;
-      Base.Reg = Reg;
-    }
-
     void dump() {
       errs() << "MSP430ISelAddressMode " << this << '\n';
       if (BaseType == RegBase && Base.Reg.getNode() != 0) {
@@ -129,7 +120,7 @@ namespace {
     SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
                                unsigned Opc8, unsigned Opc16);
 
-    bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
   };
 }  // end anonymous namespace
 
@@ -254,7 +245,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
-bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
                                     SDValue &Base, SDValue &Disp) {
   MSP430ISelAddressMode AM;
 
@@ -272,7 +263,7 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
     AM.Base.Reg;
 
   if (AM.GV)
-    Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(),
+    Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
                                           MVT::i16, AM.Disp,
                                           0/*AM.SymbolFlags*/);
   else if (AM.CP)
@@ -298,7 +289,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-    if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+    if (!SelectAddr(Op, Op0, Op1))
       return true;
     break;
   }
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index a1703a3e78bf..30ef4f5da08e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -366,7 +366,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
       if (ObjSize > 2) {
         errs() << "LowerFormalArguments Unhandled argument type: "
-             << VA.getLocVT().getSimpleVT().SimpleTy
+             << EVT(VA.getLocVT()).getEVTString()
              << "\n";
       }
       // Create the frame index object for this incoming parameter...
@@ -376,7 +376,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
       InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -507,8 +507,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(),
-                                         VA.getLocMemOffset(), false, false, 0));
+                                         MachinePointerInfo(),false, false, 0));
     }
   }
 
@@ -537,7 +536,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
 
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
@@ -748,7 +747,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
   }
 
   TargetCC = DAG.getConstant(TCC, MVT::i8);
-  return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS);
+  return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
 }
 
 
@@ -837,7 +836,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     return SR;
   } else {
     SDValue Zero = DAG.getConstant(0, VT);
-    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
     SmallVector<SDValue, 4> Ops;
     Ops.push_back(One);
     Ops.push_back(Zero);
@@ -859,7 +858,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
   SDValue TargetCC;
   SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
 
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
   SmallVector<SDValue, 4> Ops;
   Ops.push_back(TrueV);
   Ops.push_back(FalseV);
@@ -914,13 +913,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0, false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -934,7 +933,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                                          MSP430::FPW, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
                             false, false, 0);
   return FrameAddr;
 }
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index bfab844f5b1a..424df136cc16 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -40,8 +40,9 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, 0,
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
 
@@ -68,8 +69,9 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOLoad, 0,
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
 
@@ -99,48 +101,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     .addReg(SrcReg, getKillRegState(KillSrc));
 }
 
-bool
-MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineFunction &MF = *MBB.getParent();
-  MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
-  MFI->setCalleeSavedFrameSize(CSI.size() * 2);
-
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    BuildMI(MBB, MI, DL, get(MSP430::PUSH16r))
-      .addReg(Reg, RegState::Kill);
-  }
-  return true;
-}
-
-bool
-MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
-    BuildMI(MBB, MI, DL, get(MSP430::POP16r), CSI[i].getReg());
-
-  return true;
-}
-
 unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 49ccc032bf29..e885cd36a041 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -66,15 +66,6 @@ public:
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
 
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                         const TargetRegisterInfo *TRI) const;
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                           const TargetRegisterInfo *TRI) const;
-
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
 
   // Branch folding goodness
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 8792b2236855..59cb59873ab7 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -40,28 +40,28 @@ def SDT_MSP430Shift        : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
 // MSP430 Specific Node Definitions.
 //===----------------------------------------------------------------------===//
 def MSP430retflag  : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
-                       [SDNPHasChain, SDNPOptInFlag]>;
+                       [SDNPHasChain, SDNPOptInGlue]>;
 def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
-                       [SDNPHasChain, SDNPOptInFlag]>;
+                       [SDNPHasChain, SDNPOptInGlue]>;
 
 def MSP430rra     : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
 def MSP430rla     : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
 def MSP430rrc     : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
 
 def MSP430call    : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
-                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
+                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
 def MSP430callseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
-                        [SDNPHasChain, SDNPOutFlag]>;
+                        [SDNPHasChain, SDNPOutGlue]>;
 def MSP430callseq_end :
                  SDNode<"ISD::CALLSEQ_END",   SDT_MSP430CallSeqEnd,
-                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
-def MSP430cmp     : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
+def MSP430cmp     : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
 def MSP430brcc    : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
-                            [SDNPHasChain, SDNPInFlag]>;
+                            [SDNPHasChain, SDNPInGlue]>;
 def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
-                            [SDNPInFlag]>;
+                            [SDNPInGlue]>;
 def MSP430shl     : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
 def MSP430sra     : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
 def MSP430srl     : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
new file mode 100644
index 000000000000..d1d9a1158635
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -0,0 +1,150 @@
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MSP430 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MSP430MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MSP430MCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                         MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
new file mode 100644
index 000000000000..e937696406fe
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCINSTLOWER_H
+#define MSP430_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+
+  /// MSP430MCInstLower - This class is used to lower an MachineInstr
+  /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
+  MCContext &Ctx;
+  Mangler &Mang;
+
+  AsmPrinter &Printer;
+public:
+  MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+    : Ctx(ctx), Mang(mang), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 3c3fa73477a5..1da6d8da1f25 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -33,11 +33,12 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
                                        const TargetInstrInfo &tii)
   : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
     TM(tm), TII(tii) {
-  StackAlign = TM.getFrameInfo()->getStackAlignment();
+  StackAlign = TM.getFrameLowering()->getStackAlignment();
 }
 
 const unsigned*
 MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
   const Function* F = MF->getFunction();
   static const unsigned CalleeSavedRegs[] = {
     MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
@@ -62,7 +63,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     0
   };
 
-  if (hasFP(*MF))
+  if (TFI->hasFP(*MF))
     return (F->getCallingConv() == CallingConv::MSP430_INTR ?
             CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
   else
@@ -73,6 +74,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
   // Mark 4 special registers as reserved.
   Reserved.set(MSP430::PCW);
@@ -81,7 +83,7 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(MSP430::CGW);
 
   // Mark frame pointer as reserved if needed.
-  if (hasFP(MF))
+  if (TFI->hasFP(MF))
     Reserved.set(MSP430::FPW);
 
   return Reserved;
@@ -92,23 +94,12 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &MSP430::GR16RegClass;
 }
 
-
-bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  return (DisableFramePointerElim(MF) ||
-          MF.getFrameInfo()->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken());
-}
-
-bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
 void MSP430RegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
     // If the stack pointer can be changed after prologue, turn the
     // adjcallstackup instruction into a 'sub SPW, <amt>' and the
     // adjcallstackdown instruction into 'add SPW, <amt>'
@@ -172,6 +163,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
   while (!MI.getOperand(i).isFI()) {
     ++i;
@@ -180,13 +172,13 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   int FrameIndex = MI.getOperand(i).getIndex();
 
-  unsigned BasePtr = (hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+  unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
 
   // Skip the saved PC
   Offset += 2;
 
-  if (!hasFP(MF))
+  if (!TFI->hasFP(MF))
     Offset += MF.getFrameInfo()->getStackSize();
   else
     Offset += 2; // Skip the saved FPW
@@ -224,8 +216,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 void
 MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
                                                                          const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   // Create a frame entry for the FPW register that must be saved.
-  if (hasFP(MF)) {
+  if (TFI->hasFP(MF)) {
     int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
     (void)FrameIdx;
     assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
@@ -233,144 +227,14 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
   }
 }
 
-
-void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  uint64_t StackSize = MFI->getStackSize();
-
-  uint64_t NumBytes = 0;
-  if (hasFP(MF)) {
-    // Calculate required stack adjustment
-    uint64_t FrameSize = StackSize - 2;
-    NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
-
-    // Get the offset of the stack slot for the EBP register... which is
-    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
-    // Update the frame offset adjustment.
-    MFI->setOffsetAdjustment(-NumBytes);
-
-    // Save FPW into the appropriate stack slot...
-    BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
-      .addReg(MSP430::FPW, RegState::Kill);
-
-    // Update FPW with the new base value...
-    BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
-      .addReg(MSP430::SPW);
-
-    // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
-         I != E; ++I)
-      I->addLiveIn(MSP430::FPW);
-
-  } else
-    NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
-
-  // Skip the callee-saved push instructions.
-  while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
-    ++MBBI;
-
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
-
-  if (NumBytes) { // adjust stack pointer: SPW -= numbytes
-    // If there is an SUB16ri of SPW immediately before this instruction, merge
-    // the two.
-    //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
-    // If there is an ADD16ri or SUB16ri of SPW immediately after this
-    // instruction, merge the two instructions.
-    // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
-
-    if (NumBytes) {
-      MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
-        .addReg(MSP430::SPW).addImm(NumBytes);
-      // The SRW implicit def is dead.
-      MI->getOperand(3).setIsDead();
-    }
-  }
-}
-
-void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc DL = MBBI->getDebugLoc();
-
-  switch (RetOpcode) {
-  case MSP430::RET:
-  case MSP430::RETI: break;  // These are ok
-  default:
-    llvm_unreachable("Can only insert epilog into returning blocks");
-  }
-
-  // Get the number of bytes to allocate from the FrameInfo
-  uint64_t StackSize = MFI->getStackSize();
-  unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
-  uint64_t NumBytes = 0;
-
-  if (hasFP(MF)) {
-    // Calculate required stack adjustment
-    uint64_t FrameSize = StackSize - 2;
-    NumBytes = FrameSize - CSSize;
-
-    // pop FPW.
-    BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
-  } else
-    NumBytes = StackSize - CSSize;
-
-  // Skip the callee-saved pop instructions.
-  while (MBBI != MBB.begin()) {
-    MachineBasicBlock::iterator PI = prior(MBBI);
-    unsigned Opc = PI->getOpcode();
-    if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
-      break;
-    --MBBI;
-  }
-
-  DL = MBBI->getDebugLoc();
-
-  // If there is an ADD16ri or SUB16ri of SPW immediately before this
-  // instruction, merge the two instructions.
-  //if (NumBytes || MFI->hasVarSizedObjects())
-  //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
-
-  if (MFI->hasVarSizedObjects()) {
-    BuildMI(MBB, MBBI, DL,
-            TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
-    if (CSSize) {
-      MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL,
-                TII.get(MSP430::SUB16ri), MSP430::SPW)
-        .addReg(MSP430::SPW).addImm(CSSize);
-      // The SRW implicit def is dead.
-      MI->getOperand(3).setIsDead();
-    }
-  } else {
-    // adjust stack pointer back: SPW += numbytes
-    if (NumBytes) {
-      MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
-        .addReg(MSP430::SPW).addImm(NumBytes);
-      // The SRW implicit def is dead.
-      MI->getOperand(3).setIsDead();
-    }
-  }
-}
-
 unsigned MSP430RegisterInfo::getRARegister() const {
   return MSP430::PCW;
 }
 
 unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
 }
 
 int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 4d2795bb4020..56744fa64d32 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -39,9 +39,6 @@ public:
   BitVector getReservedRegs(const MachineFunction &MF) const;
   const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-  bool hasReservedCallFrame(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -49,9 +46,6 @@ public:
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
   // Debug information queries.
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index f8aec66a7d98..ab7b59b4eafe 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -79,10 +79,10 @@ def GR8 : RegisterClass<"MSP430", [i8], 8,
     GR8Class::iterator
     GR8Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       // Depending on whether the function uses frame pointer or not, last 5 or 4
       // registers on the list above are reserved
-      if (RI->hasFP(MF))
+      if (TFI->hasFP(MF))
         return end()-5;
       else
         return end()-4;
@@ -106,10 +106,10 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
     GR16Class::iterator
     GR16Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       // Depending on whether the function uses frame pointer or not, last 5 or 4
       // registers on the list above are reserved
-      if (RI->hasFP(MF))
+      if (TFI->hasFP(MF))
         return end()-5;
       else
         return end()-4;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 99877c8c54c8..fba95365a6a4 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -28,13 +28,13 @@ extern "C" void LLVMInitializeMSP430Target() {
 
 MSP430TargetMachine::MSP430TargetMachine(const Target &T,
                                          const std::string &TT,
-                                         const std::string &FS) :
-  LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS),
-  // FIXME: Check TargetData string.
-  DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
-  InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-  FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
+                                         const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
+    // FIXME: Check TargetData string.
+    DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(Subtarget) { }
 
 
 bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index b93edfde6c59..cee3b0480596 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -17,11 +17,12 @@
 
 #include "MSP430InstrInfo.h"
 #include "MSP430ISelLowering.h"
+#include "MSP430FrameLowering.h"
 #include "MSP430SelectionDAGInfo.h"
 #include "MSP430RegisterInfo.h"
 #include "MSP430Subtarget.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -34,16 +35,15 @@ class MSP430TargetMachine : public LLVMTargetMachine {
   MSP430InstrInfo        InstrInfo;
   MSP430TargetLowering   TLInfo;
   MSP430SelectionDAGInfo TSInfo;
-
-  // MSP430 does not have any call stack frame, therefore not having
-  // any MSP430 specific FrameInfo class.
-  TargetFrameInfo       FrameInfo;
+  MSP430FrameLowering    FrameLowering;
 
 public:
   MSP430TargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS);
 
-  virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
   virtual const MSP430InstrInfo *getInstrInfo() const  { return &InstrInfo; }
   virtual const TargetData *getTargetData() const     { return &DataLayout;}
   virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
index b1f33d6d0c11..fa4e80b0ff37 100644
--- a/lib/Target/MSP430/Makefile
+++ b/lib/Target/MSP430/Makefile
@@ -18,7 +18,7 @@ BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
 		MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
 		MSP430GenSubtarget.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = InstPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 1d408d0cb5be..2d1aa9d4e5e7 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,4 +4,4 @@ add_llvm_library(LLVMMSP430Info
   MSP430TargetInfo.cpp
   )
 
-add_dependencies(LLVMMSP430Info MSP430Table_gen)
+add_dependencies(LLVMMSP430Info MSP430CodeGenTable_gen)
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 49efe75d79d8..46c687b64001 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -224,16 +224,6 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   }
 }
 
-/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
-/// and the specified global variable's name.  If the global variable doesn't
-/// have a name, this fills in a unique name for the global.
-std::string Mangler::getNameWithPrefix(const GlobalValue *GV,
-                                       bool isImplicitlyPrivate) {
-  SmallString<64> Buf;
-  getNameWithPrefix(Buf, GV, isImplicitlyPrivate);
-  return std::string(Buf.begin(), Buf.end());
-}
-
 /// getSymbol - Return the MCSymbol for the specified global value.  This
 /// symbol is the main label that is the address of the global.
 MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index d3099d2a4e3e..000000000000
--- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMMipsAsmPrinter
-  MipsAsmPrinter.cpp
-  )
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile
deleted file mode 100644
index b1efe9b94479..000000000000
--- a/lib/Target/Mips/AsmPrinter/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMipsAsmPrinter
-
-# Hack: we need to include 'main' Mips target directory to grab
-# private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
deleted file mode 100644
index 6660f6b62430..000000000000
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ /dev/null
@@ -1,386 +0,0 @@
-//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format MIPS assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mips-asm-printer"
-#include "Mips.h"
-#include "MipsSubtarget.h"
-#include "MipsInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h" 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class MipsAsmPrinter : public AsmPrinter {
-    const MipsSubtarget *Subtarget;
-  public:
-    explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {
-      Subtarget = &TM.getSubtarget<MipsSubtarget>();
-    }
-
-    virtual const char *getPassName() const {
-      return "Mips Assembly Printer";
-    }
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, 
-                         const char *Modifier = 0);
-    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, 
-                         const char *Modifier = 0);
-    void printSavedRegsBitmask(raw_ostream &O);
-    void printHex32(unsigned int Value, raw_ostream &O);
-
-    const char *getCurrentABIString() const;
-    void emitFrameDirective();
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    virtual void EmitFunctionBodyStart();
-    virtual void EmitFunctionBodyEnd();
-    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
-    static const char *getRegisterName(unsigned RegNo);
-
-    virtual void EmitFunctionEntryLabel();
-    void EmitStartOfAsmFile(Module &M);
-  };
-} // end of anonymous namespace
-
-#include "MipsGenAsmWriter.inc"
-
-//===----------------------------------------------------------------------===//
-//
-//  Mips Asm Directives
-//
-//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
-//  Describe the stack frame.
-//
-//  -- Mask directives "(f)mask  bitmask, offset" 
-//  Tells the assembler which registers are saved and where.
-//  bitmask - contain a little endian bitset indicating which registers are 
-//            saved on function prologue (e.g. with a 0x80000000 mask, the 
-//            assembler knows the register 31 (RA) is saved at prologue.
-//  offset  - the position before stack pointer subtraction indicating where 
-//            the first saved register on prologue is located. (e.g. with a
-//
-//  Consider the following function prologue:
-//
-//    .frame  $fp,48,$ra
-//    .mask   0xc0000000,-8
-//       addiu $sp, $sp, -48
-//       sw $ra, 40($sp)
-//       sw $fp, 36($sp)
-//
-//    With a 0xc0000000 mask, the assembler knows the register 31 (RA) and 
-//    30 (FP) are saved at prologue. As the save order on prologue is from 
-//    left to right, RA is saved first. A -8 offset means that after the 
-//    stack pointer subtration, the first register in the mask (RA) will be
-//    saved at address 48-8=40.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Mask directives
-//===----------------------------------------------------------------------===//
-
-// Create a bitmask with all callee saved registers for CPU or Floating Point 
-// registers. For CPU registers consider RA, GP and FP for saving if necessary.
-void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
-  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-  const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
-             
-  // CPU and FPU Saved Registers Bitmasks
-  unsigned int CPUBitmask = 0;
-  unsigned int FPUBitmask = 0;
-
-  // Set the CPU and FPU Bitmasks
-  const MachineFrameInfo *MFI = MF->getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
-    if (Mips::CPURegsRegisterClass->contains(Reg))
-      CPUBitmask |= (1 << RegNum);
-    else
-      FPUBitmask |= (1 << RegNum);
-  }
-
-  // Return Address and Frame registers must also be set in CPUBitmask.
-  if (RI.hasFP(*MF)) 
-    CPUBitmask |= (1 << MipsRegisterInfo::
-                getRegisterNumbering(RI.getFrameRegister(*MF)));
-  
-  if (MFI->adjustsStack()) 
-    CPUBitmask |= (1 << MipsRegisterInfo::
-                getRegisterNumbering(RI.getRARegister()));
-
-  // Print CPUBitmask
-  O << "\t.mask \t"; printHex32(CPUBitmask, O);
-  O << ',' << MipsFI->getCPUTopSavedRegOff() << '\n';
-
-  // Print FPUBitmask
-  O << "\t.fmask\t"; printHex32(FPUBitmask, O); O << ","
-    << MipsFI->getFPUTopSavedRegOff() << '\n';
-}
-
-// Print a 32 bit hex number with all numbers.
-void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
-  O << "0x";
-  for (int i = 7; i >= 0; i--) 
-    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
-}
-
-//===----------------------------------------------------------------------===//
-// Frame and Set directives
-//===----------------------------------------------------------------------===//
-
-/// Frame Directive
-void MipsAsmPrinter::emitFrameDirective() {
-  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-
-  unsigned stackReg  = RI.getFrameRegister(*MF);
-  unsigned returnReg = RI.getRARegister();
-  unsigned stackSize = MF->getFrameInfo()->getStackSize();
-
-  OutStreamer.EmitRawText("\t.frame\t$" +
-                          Twine(LowercaseString(getRegisterName(stackReg))) +
-                          "," + Twine(stackSize) + ",$" +
-                          Twine(LowercaseString(getRegisterName(returnReg))));
-}
-
-/// Emit Set directives.
-const char *MipsAsmPrinter::getCurrentABIString() const { 
-  switch (Subtarget->getTargetABI()) {
-  case MipsSubtarget::O32:  return "abi32";  
-  case MipsSubtarget::O64:  return "abiO64";
-  case MipsSubtarget::N32:  return "abiN32";
-  case MipsSubtarget::N64:  return "abi64";
-  case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
-  default: break;
-  }
-
-  llvm_unreachable("Unknown Mips ABI");
-  return NULL;
-}  
-
-void MipsAsmPrinter::EmitFunctionEntryLabel() {
-  OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
-  OutStreamer.EmitLabel(CurrentFnSym);
-}
-
-/// EmitFunctionBodyStart - Targets can override this to emit stuff before
-/// the first basic block in the function.
-void MipsAsmPrinter::EmitFunctionBodyStart() {
-  emitFrameDirective();
-  
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  printSavedRegsBitmask(OS);
-  OutStreamer.EmitRawText(OS.str());
-}
-
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
-void MipsAsmPrinter::EmitFunctionBodyEnd() {
-  // There are instruction for this macros, but they must
-  // always be at the function end, and we can't emit and
-  // break with BB logic. 
-  OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
-  OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
-  OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
-}
-
-
-/// isBlockOnlyReachableByFallthough - Return true if the basic block has
-/// exactly one predecessor and the control transfer mechanism between
-/// the predecessor and this block is a fall-through.
-bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) 
-    const {
-  // The predecessor has to be immediately before this block.
-  const MachineBasicBlock *Pred = *MBB->pred_begin();
-
-  // If the predecessor is a switch statement, assume a jump table
-  // implementation, so it is not a fall through.
-  if (const BasicBlock *bb = Pred->getBasicBlock())
-    if (isa<SwitchInst>(bb->getTerminator()))
-      return false;
-  
-  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
-}
-
-// Print out an operand for an inline asm expression.
-bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
-                                     unsigned AsmVariant,const char *ExtraCode,
-                                     raw_ostream &O) {
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0]) 
-    return true; // Unknown modifier.
-
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                  raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  bool closeP = false;
-
-  if (MO.getTargetFlags())
-    closeP = true;
-
-  switch(MO.getTargetFlags()) {
-  case MipsII::MO_GPREL:    O << "%gp_rel("; break;
-  case MipsII::MO_GOT_CALL: O << "%call16("; break;
-  case MipsII::MO_GOT:
-    if (MI->getOpcode() == Mips::LW)
-      O << "%got(";
-    else
-      O << "%lo(";
-    break;
-  case MipsII::MO_ABS_HILO:
-    if (MI->getOpcode() == Mips::LUi)
-      O << "%hi(";
-    else
-      O << "%lo(";     
-    break;
-  }
-
-  switch (MO.getType()) {
-    case MachineOperand::MO_Register:
-      O << '$' << LowercaseString(getRegisterName(MO.getReg()));
-      break;
-
-    case MachineOperand::MO_Immediate:
-      O << (short int)MO.getImm();
-      break;
-
-    case MachineOperand::MO_MachineBasicBlock:
-      O << *MO.getMBB()->getSymbol();
-      return;
-
-    case MachineOperand::MO_GlobalAddress:
-      O << *Mang->getSymbol(MO.getGlobal());
-      break;
-
-    case MachineOperand::MO_ExternalSymbol:
-      O << *GetExternalSymbolSymbol(MO.getSymbolName());
-      break;
-
-    case MachineOperand::MO_JumpTableIndex:
-      O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-        << '_' << MO.getIndex();
-      break;
-
-    case MachineOperand::MO_ConstantPoolIndex:
-      O << MAI->getPrivateGlobalPrefix() << "CPI"
-        << getFunctionNumber() << "_" << MO.getIndex();
-      if (MO.getOffset())
-        O << "+" << MO.getOffset();
-      break;
-  
-    default:
-      llvm_unreachable("<unknown operand type>");
-  }
-
-  if (closeP) O << ")";
-}
-
-void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
-                                      raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.isImm())
-    O << (unsigned short int)MO.getImm();
-  else 
-    printOperand(MI, opNum, O);
-}
-
-void MipsAsmPrinter::
-printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                const char *Modifier) {
-  // when using stack locations for not load/store instructions
-  // print the same way as all normal 3 operand instructions.
-  if (Modifier && !strcmp(Modifier, "stackloc")) {
-    printOperand(MI, opNum+1, O);
-    O << ", ";
-    printOperand(MI, opNum, O);
-    return;
-  }
-
-  // Load/Store memory operands -- imm($reg) 
-  // If PIC target the target is loaded as the 
-  // pattern lw $25,%call16($28)
-  printOperand(MI, opNum, O);
-  O << "(";
-  printOperand(MI, opNum+1, O);
-  O << ")";
-}
-
-void MipsAsmPrinter::
-printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                const char *Modifier) {
-  const MachineOperand& MO = MI->getOperand(opNum);
-  O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); 
-}
-
-void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  // FIXME: Use SwitchSection.
-  
-  // Tell the assembler which ABI we are using
-  OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
-
-  // TODO: handle O64 ABI
-  if (Subtarget->isABI_EABI()) {
-    if (Subtarget->isGP32bit())
-      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
-    else
-      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
-  }
-
-  // return to previous section
-  OutStreamer.EmitRawText(StringRef("\t.previous")); 
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeMipsAsmPrinter() { 
-  RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
-  RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
-}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index a77802aec52a..26df1a05295e 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -11,10 +11,12 @@ tablegen(MipsGenCallingConv.inc -gen-callingconv)
 tablegen(MipsGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(MipsCodeGen
+  MipsAsmPrinter.cpp
   MipsDelaySlotFiller.cpp
   MipsInstrInfo.cpp
   MipsISelDAGToDAG.cpp
   MipsISelLowering.cpp
+  MipsFrameLowering.cpp
   MipsMCAsmInfo.cpp
   MipsRegisterInfo.cpp
   MipsSubtarget.cpp
@@ -23,4 +25,4 @@ add_llvm_target(MipsCodeGen
   MipsSelectionDAGInfo.cpp
   )
 
-target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 2ed8d77b85b2..d16b066a624e 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -18,7 +18,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
                 MipsGenSubtarget.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index a51c3779c7f4..3e6437b93ccf 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -36,19 +36,15 @@ def FeatureFP64Bit     : SubtargetFeature<"fp64", "IsFP64bit", "true",
                                 "Support 64-bit FP registers.">;
 def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
                                 "true", "Only supports single precision float">;
-def FeatureMips1       : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
-                                "Mips1 ISA Support">;
-def FeatureMips2       : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
-                                "Mips2 ISA Support">;
 def FeatureO32         : SubtargetFeature<"o32", "MipsABI", "O32",
                                 "Enable o32 ABI">;
 def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
                                 "Enable eabi ABI">;
-def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU", 
+def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
                                 "true", "Enable vector FPU instructions.">;
-def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true", 
+def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
                                 "Enable 'signext in register' instructions.">;
-def FeatureCondMov     : SubtargetFeature<"condmov", "HasCondMov", "true", 
+def FeatureCondMov     : SubtargetFeature<"condmov", "HasCondMov", "true",
                                 "Enable 'conditional move' instructions.">;
 def FeatureMulDivAdd   : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
                                 "Enable 'multiply add/sub' instructions.">;
@@ -58,6 +54,16 @@ def FeatureSwap        : SubtargetFeature<"swap", "HasSwap", "true",
                                 "Enable 'byte/half swap' instructions.">;
 def FeatureBitCount    : SubtargetFeature<"bitcount", "HasBitCount", "true",
                                 "Enable 'count leading bits' instructions.">;
+def FeatureMips1       : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+                                "Mips1 ISA Support">;
+def FeatureMips2       : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+                                "Mips2 ISA Support">;
+def FeatureMips32      : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+                                "Mips32 ISA Support", 
+                                [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
+                                "Mips32r2", "Mips32r2 ISA Support",
+                                [FeatureMips32, FeatureSEInReg]>;
 
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
@@ -73,10 +79,12 @@ def : Proc<"r3000", [FeatureMips1]>;
 def : Proc<"mips2", [FeatureMips2]>;
 def : Proc<"r6000", [FeatureMips2]>;
 
-// Allegrex is a 32bit subset of r4000, both for interger and fp registers, 
-// but much more similar to Mips2 than Mips3. It also contains some of 
-// Mips32/Mips32r2 instructions and a custom vector fpu processor. 
-def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, 
+def : Proc<"4ke", [FeatureMips32r2]>;
+
+// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
       FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
       FeatureMinMax, FeatureSwap, FeatureBitCount]>;
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 000000000000..bd28a9bd073b
--- /dev/null
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,393 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h" 
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class MipsAsmPrinter : public AsmPrinter {
+    const MipsSubtarget *Subtarget;
+  public:
+    explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<MipsSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Mips Assembly Printer";
+    }
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, 
+                         const char *Modifier = 0);
+    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, 
+                         const char *Modifier = 0);
+    void printSavedRegsBitmask(raw_ostream &O);
+    void printHex32(unsigned int Value, raw_ostream &O);
+
+    const char *getCurrentABIString() const;
+    void emitFrameDirective();
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+    static const char *getRegisterName(unsigned RegNo);
+
+    virtual void EmitFunctionEntryLabel();
+    void EmitStartOfAsmFile(Module &M);
+  };
+} // end of anonymous namespace
+
+#include "MipsGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+//  Mips Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "(f)mask  bitmask, offset" 
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are 
+//            saved on function prologue (e.g. with a 0x80000000 mask, the 
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where 
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  $fp,48,$ra
+//    .mask   0xc0000000,-8
+//       addiu $sp, $sp, -48
+//       sw $ra, 40($sp)
+//       sw $fp, 36($sp)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 31 (RA) and 
+//    30 (FP) are saved at prologue. As the save order on prologue is from 
+//    left to right, RA is saved first. A -8 offset means that after the 
+//    stack pointer subtration, the first register in the mask (RA) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point 
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+
+  // CPU and FPU Saved Registers Bitmasks
+  unsigned int CPUBitmask = 0;
+  unsigned int FPUBitmask = 0;
+
+  // Set the CPU and FPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+    if (Mips::CPURegsRegisterClass->contains(Reg))
+      CPUBitmask |= (1 << RegNum);
+    else
+      FPUBitmask |= (1 << RegNum);
+  }
+
+  // Return Address and Frame registers must also be set in CPUBitmask.
+  // FIXME: Do we really need hasFP() call here? When no FP is present SP is
+  // just returned -- will it be ok?
+  if (TFI->hasFP(*MF))
+    CPUBitmask |= (1 << MipsRegisterInfo::
+                getRegisterNumbering(RI->getFrameRegister(*MF)));
+
+  if (MFI->adjustsStack())
+    CPUBitmask |= (1 << MipsRegisterInfo::
+                getRegisterNumbering(RI->getRARegister()));
+
+  // Print CPUBitmask
+  O << "\t.mask \t"; printHex32(CPUBitmask, O);
+  O << ',' << MipsFI->getCPUTopSavedRegOff() << '\n';
+
+  // Print FPUBitmask
+  O << "\t.fmask\t"; printHex32(FPUBitmask, O); O << ","
+    << MipsFI->getFPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--) 
+    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::emitFrameDirective() {
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  unsigned stackReg  = RI.getFrameRegister(*MF);
+  unsigned returnReg = RI.getRARegister();
+  unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+  OutStreamer.EmitRawText("\t.frame\t$" +
+                          Twine(LowercaseString(getRegisterName(stackReg))) +
+                          "," + Twine(stackSize) + ",$" +
+                          Twine(LowercaseString(getRegisterName(returnReg))));
+}
+
+/// Emit Set directives.
+const char *MipsAsmPrinter::getCurrentABIString() const { 
+  switch (Subtarget->getTargetABI()) {
+  case MipsSubtarget::O32:  return "abi32";  
+  case MipsSubtarget::O64:  return "abiO64";
+  case MipsSubtarget::N32:  return "abiN32";
+  case MipsSubtarget::N64:  return "abi64";
+  case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+  default: break;
+  }
+
+  llvm_unreachable("Unknown Mips ABI");
+  return NULL;
+}  
+
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
+  OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+  emitFrameDirective();
+  
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  printSavedRegsBitmask(OS);
+  OutStreamer.EmitRawText(OS.str());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
+  // There are instruction for this macros, but they must
+  // always be at the function end, and we can't emit and
+  // break with BB logic. 
+  OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+  OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+  OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) 
+    const {
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+  // If the predecessor is a switch statement, assume a jump table
+  // implementation, so it is not a fall through.
+  if (const BasicBlock *bb = Pred->getBasicBlock())
+    if (isa<SwitchInst>(bb->getTerminator()))
+      return false;
+  
+  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 
+                                     unsigned AsmVariant,const char *ExtraCode,
+                                     raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) 
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                  raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  bool closeP = false;
+
+  if (MO.getTargetFlags())
+    closeP = true;
+
+  switch(MO.getTargetFlags()) {
+  case MipsII::MO_GPREL:    O << "%gp_rel("; break;
+  case MipsII::MO_GOT_CALL: O << "%call16("; break;
+  case MipsII::MO_GOT: {
+    const MachineOperand &LastMO = MI->getOperand(opNum-1);
+    bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register
+                      && LastMO.getReg() == Mips::GP;
+    if (MI->getOpcode() == Mips::LW || LastMOIsGP)
+      O << "%got(";
+    else
+      O << "%lo(";
+    break;
+  }
+  case MipsII::MO_ABS_HILO:
+    if (MI->getOpcode() == Mips::LUi)
+      O << "%hi(";
+    else
+      O << "%lo(";     
+    break;
+  }
+
+  switch (MO.getType()) {
+    case MachineOperand::MO_Register:
+      O << '$' << LowercaseString(getRegisterName(MO.getReg()));
+      break;
+
+    case MachineOperand::MO_Immediate:
+      O << (short int)MO.getImm();
+      break;
+
+    case MachineOperand::MO_MachineBasicBlock:
+      O << *MO.getMBB()->getSymbol();
+      return;
+
+    case MachineOperand::MO_GlobalAddress:
+      O << *Mang->getSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_ExternalSymbol:
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+        << '_' << MO.getIndex();
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      O << MAI->getPrivateGlobalPrefix() << "CPI"
+        << getFunctionNumber() << "_" << MO.getIndex();
+      if (MO.getOffset())
+        O << "+" << MO.getOffset();
+      break;
+  
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+
+  if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)MO.getImm();
+  else 
+    printOperand(MI, opNum, O);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  if (Modifier && !strcmp(Modifier, "stackloc")) {
+    printOperand(MI, opNum+1, O);
+    O << ", ";
+    printOperand(MI, opNum, O);
+    return;
+  }
+
+  // Load/Store memory operands -- imm($reg) 
+  // If PIC target the target is loaded as the 
+  // pattern lw $25,%call16($28)
+  printOperand(MI, opNum, O);
+  O << "(";
+  printOperand(MI, opNum+1, O);
+  O << ")";
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  const MachineOperand& MO = MI->getOperand(opNum);
+  O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); 
+}
+
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  // FIXME: Use SwitchSection.
+  
+  // Tell the assembler which ABI we are using
+  OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+
+  // TODO: handle O64 ABI
+  if (Subtarget->isABI_EABI()) {
+    if (Subtarget->isGP32bit())
+      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+    else
+      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+  }
+
+  // return to previous section
+  OutStreamer.EmitRawText(StringRef("\t.previous")); 
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMipsAsmPrinter() { 
+  RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+  RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 597ea0d6c207..b44a0af2d436 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -31,7 +31,7 @@ namespace {
     const TargetInstrInfo *TII;
 
     static char ID;
-    Filler(TargetMachine &tm) 
+    Filler(TargetMachine &tm)
       : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
 
     virtual const char *getPassName() const {
@@ -55,17 +55,22 @@ namespace {
 /// Currently, we fill delay slots with NOPs. We assume there is only one
 /// delay slot per delayed instruction.
 bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB) 
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
 {
   bool Changed = false;
-  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+    const TargetInstrDesc& Tid = I->getDesc();
+    if (Tid.hasDelaySlot() &&
+        (TM.getSubtarget<MipsSubtarget>().isMips1() ||
+         Tid.isCall() || Tid.isBranch() || Tid.isReturn())) {
       MachineBasicBlock::iterator J = I;
       ++J;
       BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
       ++FilledSlots;
       Changed = true;
     }
+  }
+
   return Changed;
 }
 
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 000000000000..87a097a5d590
--- /dev/null
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,314 @@
+//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+//  The stack frame required by the ABI (after call):
+//  Offset
+//
+//  0                 ----------
+//  4                 Args to pass
+//  .                 saved $GP  (used in PIC)
+//  .                 Alloca allocations
+//  .                 Local Area
+//  .                 CPU "Callee Saved" Registers
+//  .                 saved FP
+//  .                 saved RA
+//  .                 FPU "Callee Saved" Registers
+//  StackSize         -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+//   for any local area var there is smt like : FI >= 0, StackOffset: 4
+//     sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+//   suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+//   The emitted instruction will be something like:
+//     lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  unsigned StackAlign = getStackAlignment();
+  unsigned RegSize = STI.isGP32bit() ? 4 : 8;
+  bool HasGP = MipsFI->needGPSaveRestore();
+
+  // Min and Max CSI FrameIndex.
+  int MinCSFI = -1, MaxCSFI = -1;
+
+  // See the description at MipsMachineFunction.h
+  int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
+
+  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+  // LowerFormalArguments. Leaving '0' for while is necessary to avoid the
+  // approach done by calculateFrameObjectOffsets to the stack frame.
+  MipsFI->adjustLoadArgsFI(MFI);
+  MipsFI->adjustStoreVarArgsFI(MFI);
+
+  // It happens that the default stack frame allocation order does not directly
+  // map to the convention used for mips. So we must fix it. We move the callee
+  // save register slots after the local variables area, as described in the
+  // stack frame above.
+  unsigned CalleeSavedAreaSize = 0;
+  if (!CSI.empty()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
+  }
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+
+  unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
+                : (STI.isABI_O32() ? 16 : 0);
+
+  // Adjust local variables. They should come on the stack right
+  // after the arguments.
+  int LastOffsetFI = -1;
+  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (i >= MinCSFI && i <= MaxCSFI)
+      continue;
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    unsigned Offset =
+      StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
+    if (LastOffsetFI == -1)
+      LastOffsetFI = i;
+    if (Offset > MFI->getObjectOffset(LastOffsetFI))
+      LastOffsetFI = i;
+    MFI->setObjectOffset(i, Offset);
+  }
+
+  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+  // be saved in this CPU Area. This whole area must be aligned to the
+  // default Stack Alignment requirements.
+  if (LastOffsetFI >= 0)
+    StackOffset = MFI->getObjectOffset(LastOffsetFI)+
+                  MFI->getObjectSize(LastOffsetFI);
+  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+  for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!Mips::CPURegsRegisterClass->contains(Reg))
+      break;
+    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+    TopCPUSavedRegOff = StackOffset;
+    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+  }
+
+  // Stack locations for FP and RA. If only one of them is used,
+  // the space must be allocated for both, otherwise no space at all.
+  if (hasFP(MF) || MFI->adjustsStack()) {
+    // FP stack location
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+                         StackOffset);
+    MipsFI->setFPStackOffset(StackOffset);
+    TopCPUSavedRegOff = StackOffset;
+    StackOffset += RegSize;
+
+    // SP stack location
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+                         StackOffset);
+    MipsFI->setRAStackOffset(StackOffset);
+    StackOffset += RegSize;
+
+    if (MFI->adjustsStack())
+      TopCPUSavedRegOff += RegSize;
+  }
+
+  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+  // Adjust FPU Callee Saved Registers Area. This Area must be
+  // aligned to the default Stack Alignment requirements.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (Mips::CPURegsRegisterClass->contains(Reg))
+      continue;
+    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+    TopFPUSavedRegOff = StackOffset;
+    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+  }
+  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+  // Update frame info
+  MFI->setStackSize(StackOffset);
+
+  // Recalculate the final tops offset. The final values must be '0'
+  // if there isn't a callee saved register for CPU or FPU, otherwise
+  // a negative offset is needed.
+  if (TopCPUSavedRegOff >= 0)
+    MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+
+  if (TopFPUSavedRegOff >= 0)
+    MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
+}
+
+void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+
+  // Get the right frame order for Mips.
+  adjustMipsStackFrame(MF);
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+  int FPOffset = MipsFI->getFPStackOffset();
+  int RAOffset = MipsFI->getRAStackOffset();
+
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+  // TODO: check need from GP here.
+  if (isPIC && STI.isABI_O32())
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+      .addReg(RegInfo->getPICCallReg());
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+  // Adjust stack : addi sp, sp, (-imm)
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+      .addReg(Mips::SP).addImm(-StackSize);
+
+  // Save the return address only if the function isnt a leaf one.
+  // sw  $ra, stack_loc($sp)
+  if (MFI->adjustsStack()) {
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+        .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+  }
+
+  // if framepointer enabled, save it and set it
+  // to point to the stack pointer
+  if (hasFP(MF)) {
+    // sw  $fp,stack_loc($sp)
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+      .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+
+    // move $fp, $sp
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+      .addReg(Mips::SP).addReg(Mips::ZERO);
+  }
+
+  // Restore GP from the saved stack location
+  if (MipsFI->needGPSaveRestore())
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+      .addImm(MipsFI->getGPStackOffset());
+}
+
+void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
+                                 MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI         = MF.getInfo<MipsFunctionInfo>();
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  // Get the number of bytes from FrameInfo
+  int NumBytes = (int) MFI->getStackSize();
+
+  // Get the FI's where RA and FP are saved.
+  int FPOffset = MipsFI->getFPStackOffset();
+  int RAOffset = MipsFI->getRAStackOffset();
+
+  // if framepointer enabled, restore it and restore the
+  // stack pointer
+  if (hasFP(MF)) {
+    // move $sp, $fp
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
+      .addReg(Mips::FP).addReg(Mips::ZERO);
+
+    // lw  $fp,stack_loc($sp)
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
+      .addImm(FPOffset).addReg(Mips::SP);
+  }
+
+  // Restore the return address only if the function isnt a leaf one.
+  // lw  $ra, stack_loc($sp)
+  if (MFI->adjustsStack()) {
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
+      .addImm(RAOffset).addReg(Mips::SP);
+  }
+
+  // adjust stack  : insert addi sp, sp, (imm)
+  if (NumBytes) {
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+      .addReg(Mips::SP).addImm(NumBytes);
+  }
+}
+
+void MipsFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  RegInfo->processFunctionBeforeFrameFinalized(MF);
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 000000000000..a8426c1b70fd
--- /dev/null
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,48 @@
+//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+  const MipsSubtarget &STI;
+
+public:
+  explicit MipsFrameLowering(const MipsSubtarget &sti)
+    // FIXME: Is this correct at all?
+    : TargetFrameLowering(StackGrowsUp, 8, 0), STI(sti) {
+  }
+
+  void adjustMipsStackFrame(MachineFunction &MF) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index a47cf7b4f201..755e04df63be 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -84,8 +84,7 @@ private:
   SDNode *Select(SDNode *N);
 
   // Complex Pattern.
-  bool SelectAddr(SDNode *Op, SDValue N, 
-                  SDValue &Base, SDValue &Offset);
+  bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
 
   SDNode *SelectLoadFp64(SDNode *N);
   SDNode *SelectStoreFp64(SDNode *N);
@@ -110,8 +109,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
 bool MipsDAGToDAGISel::
-SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
-{
+SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -193,7 +191,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N, N1, Offset0, Base) ||
+  if (!SelectAddr(N1, Offset0, Base) ||
       N1.getValueType() != MVT::i32)
     return NULL;
 
@@ -257,7 +255,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N, N2, Offset0, Base) ||
+  if (!SelectAddr(N2, Offset0, Base) ||
       N1.getValueType() != MVT::f64 ||
       N2.getValueType() != MVT::i32)
     return NULL;
@@ -327,7 +325,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
     case ISD::SUBE: 
     case ISD::ADDE: {
       SDValue InFlag = Node->getOperand(2), CmpLHS;
-      unsigned Opc = InFlag.getOpcode(); Opc=Opc;
+      unsigned Opc = InFlag.getOpcode(); (void)Opc;
       assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || 
               (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&  
              "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
@@ -351,7 +349,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
       SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, 
                                                 SDValue(Carry,0), RHS);
 
-      return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Flag,
+      return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
                                   LHS, SDValue(AddCarry,0));
     }
 
@@ -369,11 +367,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
       else
         Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
 
-      SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2);
+      SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
 
       SDValue InFlag = SDValue(MulDiv, 0);
       SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, 
-                                          MVT::Flag, InFlag);
+                                          MVT::Glue, InFlag);
       InFlag = SDValue(Lo,1);
       SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
 
@@ -388,6 +386,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 
     /// Special Muls
     case ISD::MUL: 
+      if (Subtarget.isMips32())
+        break;
     case ISD::MULHS:
     case ISD::MULHU: {
       SDValue MulOp1 = Node->getOperand(0);
@@ -395,7 +395,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 
       unsigned MulOp  = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
       SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, 
-                                               MVT::Flag, MulOp1, MulOp2);
+                                               MVT::Glue, MulOp1, MulOp2);
 
       SDValue InFlag = SDValue(MulNode, 0);
 
@@ -421,7 +421,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
         Op  = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
         MOp = Mips::MFHI;
       }
-      SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2);
+      SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
 
       SDValue InFlag = SDValue(Node, 0);
       return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag);
@@ -474,7 +474,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
         SDValue InFlag;
 
         // Skip the incomming flag if present
-        if (Node->getOperand(LastOpNum).getValueType() == MVT::Flag)
+        if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue)
           LastOpNum--;
 
         if ( (isa<GlobalAddressSDNode>(Callee)) ||
@@ -496,7 +496,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
           Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag);
 
         // Map the JmpLink operands to JALR
-        SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Flag);
+        SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
         SmallVector<SDValue, 8> Ops;
         Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32));
 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b0b99bad1607..1d7a1c0ae8c7 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -41,12 +41,15 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
     case MipsISD::Lo         : return "MipsISD::Lo";
     case MipsISD::GPRel      : return "MipsISD::GPRel";
     case MipsISD::Ret        : return "MipsISD::Ret";
-    case MipsISD::CMov       : return "MipsISD::CMov";
     case MipsISD::SelectCC   : return "MipsISD::SelectCC";
     case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
     case MipsISD::FPBrcond   : return "MipsISD::FPBrcond";
     case MipsISD::FPCmp      : return "MipsISD::FPCmp";
     case MipsISD::FPRound    : return "MipsISD::FPRound";
+    case MipsISD::MAdd       : return "MipsISD::MAdd";
+    case MipsISD::MAddu      : return "MipsISD::MAddu";
+    case MipsISD::MSub       : return "MipsISD::MSub";
+    case MipsISD::MSubu      : return "MipsISD::MSubu";
     default                  : return NULL;
   }
 }
@@ -57,7 +60,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
   Subtarget = &TM.getSubtarget<MipsSubtarget>();
 
   // Mips does not have i1 type, so use i32 for
-  // setcc operations results (slt, sgt, ...). 
+  // setcc operations results (slt, sgt, ...).
   setBooleanContents(ZeroOrOneBooleanContent);
 
   // Set up the register classes
@@ -69,7 +72,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
     if (!Subtarget->isFP64bit())
       addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
 
-  // Load extented operations for i1 types must be promoted 
+  // Load extented operations for i1 types must be promoted
   setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
@@ -78,9 +81,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
-  // Used by legalize types to correctly generate the setcc result. 
-  // Without this, every float setcc comes with a AND/OR with the result, 
-  // we don't want this, since the fpcmp result goes to a flag register, 
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
   // which is used implicitly by brcond and select operations.
   AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
 
@@ -100,8 +103,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
 
 
-  // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' 
-  // with operands comming from setcc fp comparions. This is necessary since 
+  // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
+  // with operands comming from setcc fp comparions. This is necessary since
   // the result from these setcc are in a flag registers (FCR31).
   setOperationAction(ISD::AND,              MVT::i32,   Custom);
   setOperationAction(ISD::OR,               MVT::i32,   Custom);
@@ -116,7 +119,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
-  setOperationAction(ISD::ROTR,              MVT::i32,   Expand);
+
+  if (!Subtarget->isMips32r2())
+    setOperationAction(ISD::ROTR, MVT::i32,   Expand);
+
   setOperationAction(ISD::SHL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
@@ -152,6 +158,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
   if (!Subtarget->hasSwap())
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 
+  setTargetDAGCombine(ISD::ADDE);
+  setTargetDAGCombine(ISD::SUBE);
+
   setStackPointerRegisterToSaveRestore(Mips::SP);
   computeRegisterProperties();
 }
@@ -165,10 +174,198 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
   return 2;
 }
 
+// SelectMadd -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if mattern matching was successful.
+static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+  // ADDENode's second operand must be a flag output of an ADDC node in order
+  // for the matching to be successful.
+  SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+
+  if (ADDCNode->getOpcode() != ISD::ADDC)
+    return false;
+
+  SDValue MultHi = ADDENode->getOperand(0);
+  SDValue MultLo = ADDCNode->getOperand(0);
+  SDNode* MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than ADDENode or ADDCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MADD instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  SDValue Chain = CurDAG->getEntryNode();
+  DebugLoc dl = ADDENode->getDebugLoc();
+
+  // create MipsMAdd(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+  SDValue MAdd = CurDAG->getNode(MultOpc, dl,
+                                 MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ADDCNode->getOperand(1),// Lo0
+                                 ADDENode->getOperand(1));// Hi0
+
+  // create CopyFromReg nodes
+  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+                                              MAdd);
+  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+                                              Mips::HI, MVT::i32,
+                                              CopyFromLo.getValue(2));
+
+  // replace uses of adde and addc here
+  if (!SDValue(ADDCNode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
+
+  if (!SDValue(ADDENode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
+
+  return true;
+}
+
+// SelectMsub -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if mattern matching was successful.
+static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+  // SUBENode's second operand must be a flag output of an SUBC node in order
+  // for the matching to be successful.
+  SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+
+  if (SUBCNode->getOpcode() != ISD::SUBC)
+    return false;
+
+  SDValue MultHi = SUBENode->getOperand(1);
+  SDValue MultLo = SUBCNode->getOperand(1);
+  SDNode* MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than SUBENode or SUBCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MSUB instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  SDValue Chain = CurDAG->getEntryNode();
+  DebugLoc dl = SUBENode->getDebugLoc();
+
+  // create MipsSub(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+  SDValue MSub = CurDAG->getNode(MultOpc, dl,
+                                 MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 SUBCNode->getOperand(0),// Lo0
+                                 SUBENode->getOperand(0));// Hi0
+
+  // create CopyFromReg nodes
+  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+                                              MSub);
+  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+                                              Mips::HI, MVT::i32,
+                                              CopyFromLo.getValue(2));
+
+  // replace uses of sube and subc here
+  if (!SDValue(SUBCNode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
+
+  if (!SDValue(SUBENode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
+
+  return true;
+}
+
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+  const {
+  SelectionDAG &DAG = DCI.DAG;
+  unsigned opc = N->getOpcode();
+
+  switch (opc) {
+  default: break;
+  case ISD::ADDE:
+    return PerformADDECombine(N, DAG, DCI, Subtarget);
+  case ISD::SUBE:
+    return PerformSUBECombine(N, DAG, DCI, Subtarget);
+  }
+
+  return SDValue();
+}
+
 SDValue MipsTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
-  switch (Op.getOpcode()) 
+  switch (Op.getOpcode())
   {
     case ISD::AND:                return LowerANDOR(Op, DAG);
     case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
@@ -194,7 +391,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 // MachineFunction as a live in value.  It also creates a corresponding
 // virtual register for it.
 static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC) 
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
 {
   assert(RC->contains(PReg) && "Not the correct regclass!");
   unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -212,7 +409,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
 
   return Mips::BRANCH_INVALID;
 }
-  
+
 static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
   switch(BC) {
     default:
@@ -227,24 +424,24 @@ static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
 static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
   switch (CC) {
   default: llvm_unreachable("Unknown fp condition code!");
-  case ISD::SETEQ:  
+  case ISD::SETEQ:
   case ISD::SETOEQ: return Mips::FCOND_EQ;
   case ISD::SETUNE: return Mips::FCOND_OGL;
-  case ISD::SETLT:  
+  case ISD::SETLT:
   case ISD::SETOLT: return Mips::FCOND_OLT;
-  case ISD::SETGT:  
+  case ISD::SETGT:
   case ISD::SETOGT: return Mips::FCOND_OGT;
-  case ISD::SETLE:  
-  case ISD::SETOLE: return Mips::FCOND_OLE; 
+  case ISD::SETLE:
+  case ISD::SETOLE: return Mips::FCOND_OLE;
   case ISD::SETGE:
   case ISD::SETOGE: return Mips::FCOND_OGE;
   case ISD::SETULT: return Mips::FCOND_ULT;
-  case ISD::SETULE: return Mips::FCOND_ULE; 
+  case ISD::SETULE: return Mips::FCOND_ULE;
   case ISD::SETUGT: return Mips::FCOND_UGT;
   case ISD::SETUGE: return Mips::FCOND_UGE;
-  case ISD::SETUO:  return Mips::FCOND_UN; 
+  case ISD::SETUO:  return Mips::FCOND_UN;
   case ISD::SETO:   return Mips::FCOND_OR;
-  case ISD::SETNE:  
+  case ISD::SETNE:
   case ISD::SETONE: return Mips::FCOND_NEQ;
   case ISD::SETUEQ: return Mips::FCOND_UEQ;
   }
@@ -364,7 +561,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
   // Emit the round instruction and bit convert to integer
   SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32,
                               Src, CondReg.getValue(1));
-  SDValue BitCvt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Trunc);
+  SDValue BitCvt = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Trunc);
   return BitCvt;
 }
 
@@ -382,11 +579,11 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   // obtain the new stack size.
   SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
 
-  // The Sub result contains the new stack start address, so it 
+  // The Sub result contains the new stack start address, so it
   // must be placed in the stack pointer register.
   Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub);
-  
-  // This node always has two return values: a new stack pointer 
+
+  // This node always has two return values: a new stack pointer
   // value and a chain
   SDValue Ops[2] = { Sub, Chain };
   return DAG.getMergeValues(Ops, 2, dl);
@@ -405,9 +602,9 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) const
   SDValue True  = DAG.getConstant(1, MVT::i32);
   SDValue False = DAG.getConstant(0, MVT::i32);
 
-  SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), 
+  SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
                              LHS, True, False, LHS.getOperand(2));
-  SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), 
+  SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
                              RHS, True, False, RHS.getOperand(2));
 
   return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
@@ -416,7 +613,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) const
 SDValue MipsTargetLowering::
 LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 {
-  // The first operand is the chain, the second is the condition, the third is 
+  // The first operand is the chain, the second is the condition, the third is
   // the block to branch to if the condition is true.
   SDValue Chain = Op.getOperand(0);
   SDValue Dest = Op.getOperand(2);
@@ -424,55 +621,55 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 
   if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
     return Op;
-  
+
   SDValue CondRes = Op.getOperand(1);
   SDValue CCNode  = CondRes.getOperand(2);
   Mips::CondCode CC =
     (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
-  SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); 
+  SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
 
-  return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, 
+  return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
              Dest, CondRes);
 }
 
 SDValue MipsTargetLowering::
 LowerSETCC(SDValue Op, SelectionDAG &DAG) const
 {
-  // The operands to this are the left and right operands to compare (ops #0, 
-  // and #1) and the condition code to compare them with (op #2) as a 
+  // The operands to this are the left and right operands to compare (ops #0,
+  // and #1) and the condition code to compare them with (op #2) as a
   // CondCodeSDNode.
-  SDValue LHS = Op.getOperand(0); 
+  SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   DebugLoc dl = Op.getDebugLoc();
 
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  
-  return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, 
+
+  return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
                  DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
 }
 
 SDValue MipsTargetLowering::
 LowerSELECT(SDValue Op, SelectionDAG &DAG) const
 {
-  SDValue Cond  = Op.getOperand(0); 
+  SDValue Cond  = Op.getOperand(0);
   SDValue True  = Op.getOperand(1);
   SDValue False = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
 
-  // if the incomming condition comes from a integer compare, the select 
-  // operation must be SelectCC or a conditional move if the subtarget 
+  // if the incomming condition comes from a integer compare, the select
+  // operation must be SelectCC or a conditional move if the subtarget
   // supports it.
   if (Cond.getOpcode() != MipsISD::FPCmp) {
     if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
       return Op;
-    return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), 
+    return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
                        Cond, True, False);
   }
 
   // if the incomming condition comes from fpcmp, the select
   // operation must use FPSelectCC.
   SDValue CCNode = Cond.getOperand(2);
-  return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), 
+  return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
                      Cond, True, False, CCNode);
 }
 
@@ -484,16 +681,16 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
     SDVTList VTs = DAG.getVTList(MVT::i32);
-    
+
     MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
-    
+
     // %gp_rel relocation
-    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { 
-      SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, 
+    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+      SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
                                               MipsII::MO_GPREL);
       SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
       SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
-      return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); 
+      return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
     }
     // %hi/%lo relocation
     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
@@ -505,8 +702,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
   } else {
     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
                                             MipsII::MO_GOT);
-    SDValue ResNode = DAG.getLoad(MVT::i32, dl, 
-                                  DAG.getEntryNode(), GA, NULL, 0,
+    SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+                                  DAG.getEntryNode(), GA, MachinePointerInfo(),
                                   false, false, 0);
     // On functions and global targets not internal linked only
     // a load from got/GP is necessary for PIC to work.
@@ -531,7 +728,7 @@ SDValue MipsTargetLowering::
 LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 {
   SDValue ResNode;
-  SDValue HiPart; 
+  SDValue HiPart;
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
   bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
@@ -546,7 +743,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
     SDValue Ops[] = { JTI };
     HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
   } else // Emit Load from Global Pointer
-    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0,
+    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+                         MachinePointerInfo(),
                          false, false, 0);
 
   SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
@@ -565,26 +763,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
   DebugLoc dl = Op.getDebugLoc();
 
   // gp_rel relocation
-  // FIXME: we should reference the constant pool using small data sections, 
+  // FIXME: we should reference the constant pool using small data sections,
   // but the asm printer currently doens't support this feature without
-  // hacking it. This feature should come soon so we can uncomment the 
+  // hacking it. This feature should come soon so we can uncomment the
   // stuff below.
   //if (IsInSmallSection(C->getType())) {
   //  SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
   //  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
-  //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); 
+  //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
-    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
                                       N->getOffset(), MipsII::MO_ABS_HILO);
     SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
   } else {
-    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
                                       N->getOffset(), MipsII::MO_GOT);
-    SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), 
-                               CP, NULL, 0, false, false, 0);
+    SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+                               CP, MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
   }
@@ -603,7 +802,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
                       false, false, 0);
 }
 
@@ -614,23 +814,23 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 #include "MipsGenCallingConv.inc"
 
 //===----------------------------------------------------------------------===//
-// TODO: Implement a generic logic using tblgen that can support this. 
+// TODO: Implement a generic logic using tblgen that can support this.
 // Mips O32 ABI rules:
 // ---
 // i32 - Passed in A0, A1, A2, A3 and stack
-// f32 - Only passed in f32 registers if no int reg has been used yet to hold 
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
 //       an argument. Otherwise, passed in A1, A2, A3 and stack.
-// f64 - Only passed in two aliased f32 registers if no int reg has been used 
-//       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is 
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+//       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
 //       not used, it must be shadowed. If only A3 is avaiable, shadow it and
 //       go to stack.
 //===----------------------------------------------------------------------===//
 
-static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
-                       EVT LocVT, CCValAssign::LocInfo LocInfo,
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+                       MVT LocVT, CCValAssign::LocInfo LocInfo,
                        ISD::ArgFlagsTy ArgFlags, CCState &State) {
 
-  static const unsigned IntRegsSize=4, FloatRegsSize=2; 
+  static const unsigned IntRegsSize=4, FloatRegsSize=2;
 
   static const unsigned IntRegs[] = {
       Mips::A0, Mips::A1, Mips::A2, Mips::A3
@@ -642,9 +842,15 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
       Mips::D6, Mips::D7
   };
 
-  unsigned Reg=0;
-  unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
-  bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0)));
+  unsigned Reg = 0;
+  static bool IntRegUsed = false;
+
+  // This must be the first arg of the call if no regs have been allocated.
+  // Initialize IntRegUsed in that case.
+  if (IntRegs[State.getFirstUnallocated(IntRegs, IntRegsSize)] == Mips::A0 &&
+      F32Regs[State.getFirstUnallocated(F32Regs, FloatRegsSize)] == Mips::F12 &&
+      F64Regs[State.getFirstUnallocated(F64Regs, FloatRegsSize)] == Mips::D6)
+    IntRegUsed = false;
 
   // Promote i8 and i16
   if (LocVT == MVT::i8 || LocVT == MVT::i16) {
@@ -657,30 +863,48 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
       LocInfo = CCValAssign::AExt;
   }
 
-  if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) {
+  if (ValVT == MVT::i32) {
     Reg = State.AllocateReg(IntRegs, IntRegsSize);
     IntRegUsed = true;
-    LocVT = MVT::i32;
-  }
-
-  if (ValVT.isFloatingPoint() && !IntRegUsed) {
-    if (ValVT == MVT::f32)
-      Reg = State.AllocateReg(F32Regs, FloatRegsSize);
-    else
-      Reg = State.AllocateReg(F64Regs, FloatRegsSize);
-  }
+  } else if (ValVT == MVT::f32) {
+    // An int reg has to be marked allocated regardless of whether or not
+    // IntRegUsed is true.
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
 
-  if (ValVT == MVT::f64 && IntRegUsed) {
-    if (UnallocIntReg != IntRegsSize) {
-      // If we hit register A3 as the first not allocated, we must
-      // mark it as allocated (shadow) and use the stack instead.
-      if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3)))
-        Reg = Mips::A2;
-      for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg)
-        State.AllocateReg(UnallocIntReg);
-    } 
-    LocVT = MVT::i32;
-  }
+    if (IntRegUsed) {
+      if (Reg) // Int reg is available
+        LocVT = MVT::i32;
+    } else {
+      unsigned FReg = State.AllocateReg(F32Regs, FloatRegsSize);
+      if (FReg) // F32 reg is available
+        Reg = FReg;
+      else if (Reg) // No F32 regs are available, but an int reg is available.
+        LocVT = MVT::i32;
+    }
+  } else if (ValVT == MVT::f64) {
+    // Int regs have to be marked allocated regardless of whether or not
+    // IntRegUsed is true.
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    if (Reg == Mips::A1)
+      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    else if (Reg == Mips::A3)
+      Reg = 0;
+    State.AllocateReg(IntRegs, IntRegsSize);
+
+    // At this point, Reg is A0, A2 or 0, and all the unavailable integer regs
+    // are marked as allocated.
+    if (IntRegUsed) {
+      if (Reg)// if int reg is available
+        LocVT = MVT::i32;
+    } else {
+      unsigned FReg = State.AllocateReg(F64Regs, FloatRegsSize);
+      if (FReg) // F64 reg is available.
+        Reg = FReg;
+      else if (Reg) // No F64 regs are available, but an int reg is available.
+        LocVT = MVT::i32;
+    }
+  } else
+    assert(false && "cannot handle this ValVT");
 
   if (!Reg) {
     unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
@@ -692,8 +916,8 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
   return false; // CC must always match
 }
 
-static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT,
-                       EVT LocVT, CCValAssign::LocInfo LocInfo,
+static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT,
+                       MVT LocVT, CCValAssign::LocInfo LocInfo,
                        ISD::ArgFlagsTy ArgFlags, CCState &State) {
 
   static const unsigned IntRegsSize=4;
@@ -736,7 +960,7 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT,
         IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) {
       unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize);
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
-      // Shadow the next register so it can be used 
+      // Shadow the next register so it can be used
       // later to get the other 32bit part.
       State.AllocateReg(IntRegs, IntRegsSize);
       return false;
@@ -786,13 +1010,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // To meet O32 ABI, Mips must always allocate 16 bytes on
   // the stack (even if less than 4 are used as arguments)
   if (Subtarget->isABI_O32()) {
-    int VTsize = EVT(MVT::i32).getSizeInBits()/8;
+    int VTsize = MVT(MVT::i32).getSizeInBits()/8;
     MFI->CreateFixedObject(VTsize, (VTsize*3), true);
-    CCInfo.AnalyzeCallOperands(Outs, 
+    CCInfo.AnalyzeCallOperands(Outs,
                      isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
   } else
     CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
-  
+
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
@@ -801,7 +1025,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
 
-  // First/LastArgStackLoc contains the first/last 
+  // First/LastArgStackLoc contains the first/last
   // "at stack" argument location.
   int LastArgStackLoc = 0;
   unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
@@ -814,12 +1038,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full: 
+    case CCValAssign::Full:
       if (Subtarget->isABI_O32() && VA.isRegLoc()) {
         if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
-          Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg);
+          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
         if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
-          Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
           SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
                                    DAG.getConstant(0, getPointerTy()));
           SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
@@ -827,7 +1051,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
           RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
           RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
           continue;
-        }  
+        }
       }
       break;
     case CCValAssign::SExt:
@@ -840,17 +1064,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     }
-    
-    // Arguments that can be passed on register must be kept at 
+
+    // Arguments that can be passed on register must be kept at
     // RegsToPass vector
     if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
       continue;
     }
-    
+
     // Register can't get to this point...
     assert(VA.isMemLoc());
-    
+
     // Create the frame index object for this incoming parameter
     // This guarantees that when allocating Local Area the firsts
     // 16 bytes which are alwayes reserved won't be overwritten
@@ -861,50 +1085,51 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
 
-    // emit ISD::STORE whichs stores the 
+    // emit ISD::STORE whichs stores the
     // parameter value to a stack Location
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0));
   }
 
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
-  if (!MemOpChains.empty())     
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
-  // Build a sequence of copy-to-reg nodes chained together with token 
+  // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
   // The InFlag in necessary since all emited instructions must be
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                              RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 
-  // node so that legalize doesn't hack it. 
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
   unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
                                 getPointerTy(), 0, OpFlag);
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), 
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
                                 getPointerTy(), OpFlag);
 
   // MipsJmpLink = #chain, #target_address, #opt_in_flags...
-  //             = Chain, Callee, Reg#1, Reg#2, ...  
+  //             = Chain, Callee, Reg#1, Reg#2, ...
   //
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
 
-  // Add argument registers to the end of the list so that they are 
+  // Add argument registers to the end of the list so that they are
   // known live into the call.
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
@@ -916,17 +1141,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   Chain  = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
   InFlag = Chain.getValue(1);
 
-  // Create a stack location to hold GP when PIC is used. This stack 
-  // location is used on function prologue to save GP and also after all 
-  // emited CALL's to restore GP. 
+  // Create a stack location to hold GP when PIC is used. This stack
+  // location is used on function prologue to save GP and also after all
+  // emited CALL's to restore GP.
   if (IsPIC) {
-      // Function can have an arbitrary number of calls, so 
+      // Function can have an arbitrary number of calls, so
       // hold the LastArgStackLoc with the biggest offset.
       int FI;
       MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
       if (LastArgStackLoc >= MipsFI->getGPStackOffset()) {
         LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4);
-        // Create the frame index only once. SPOffset here can be anything 
+        // Create the frame index only once. SPOffset here can be anything
         // (this will be fixed on processFunctionBeforeFrameFinalized)
         if (MipsFI->getGPStackOffset() == -1) {
           FI = MFI->CreateFixedObject(4, 0, true);
@@ -937,14 +1162,15 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
       // Reload GP value.
       FI = MipsFI->getGPFI();
-      SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
-      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0,
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0);
       Chain = GPLoad.getValue(1);
-      Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), 
+      Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
                                GPLoad, SDValue(0,0));
       InFlag = Chain.getValue(1);
-  }      
+  }
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -988,7 +1214,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 //             Formal Arguments Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-/// LowerFormalArguments - transform physical registers into virtual registers 
+/// LowerFormalArguments - transform physical registers into virtual registers
 /// and generate load operations for arguments places on the stack.
 SDValue
 MipsTargetLowering::LowerFormalArguments(SDValue Chain,
@@ -1018,7 +1244,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                  ArgLocs, *DAG.getContext());
 
   if (Subtarget->isABI_O32())
-    CCInfo.AnalyzeFormalArguments(Ins, 
+    CCInfo.AnalyzeFormalArguments(Ins,
                         isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
   else
     CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
@@ -1037,22 +1263,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       TargetRegisterClass *RC = 0;
 
       if (RegVT == MVT::i32)
-        RC = Mips::CPURegsRegisterClass; 
-      else if (RegVT == MVT::f32) 
+        RC = Mips::CPURegsRegisterClass;
+      else if (RegVT == MVT::f32)
         RC = Mips::FGR32RegisterClass;
       else if (RegVT == MVT::f64) {
-        if (!Subtarget->isSingleFloat()) 
+        if (!Subtarget->isSingleFloat())
           RC = Mips::AFGR64RegisterClass;
-      } else  
+      } else
         llvm_unreachable("RegVT not supported by FormalArguments Lowering");
 
-      // Transform the arguments stored on 
+      // Transform the arguments stored on
       // physical registers into virtual ones
       unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
-      
-      // If this is an 8 or 16-bit value, it has been passed promoted 
-      // to 32 bits.  Insert an assert[sz]ext to capture this, then 
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
       // truncate to the right size.
       if (VA.getLocInfo() != CCValAssign::Full) {
         unsigned Opcode = 0;
@@ -1061,22 +1287,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
         else if (VA.getLocInfo() == CCValAssign::ZExt)
           Opcode = ISD::AssertZext;
         if (Opcode)
-          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, 
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
                                  DAG.getValueType(VA.getValVT()));
         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
 
-      // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64 
+      // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
       if (Subtarget->isABI_O32()) {
-        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32) 
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
+        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
         if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
-          unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), 
+          unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
                                     VA.getLocReg()+1, RC);
           SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
-          SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
-          SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2);
-          ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi);
+          SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue);
+          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair);
         }
       }
 
@@ -1088,13 +1313,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // The last argument is not a register anymore
       ArgRegEnd = 0;
-      
-      // The stack pointer offset is relative to the caller stack frame. 
-      // Since the real stack size is unknown here, a negative SPOffset 
+
+      // The stack pointer offset is relative to the caller stack frame.
+      // Since the real stack size is unknown here, a negative SPOffset
       // is used so there's a way to adjust these offsets when the stack
-      // size get known (on EliminateFrameIndex). A dummy SPOffset is 
+      // size get known (on EliminateFrameIndex). A dummy SPOffset is
       // used instead of a direct negative address (which is recorded to
-      // be used on emitPrologue) to avoid mis-calc of the first stack 
+      // be used on emitPrologue) to avoid mis-calc of the first stack
       // offset on PEI::calculateFrameObjectOffsets.
       // Arguments are always 32-bit.
       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
@@ -1104,7 +1329,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -1124,11 +1350,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // To meet ABI, when VARARGS are passed on registers, the registers
   // must have their values written to the caller stack frame. If the last
-  // argument was placed in the stack, there's no need to save any register. 
+  // argument was placed in the stack, there's no need to save any register.
   if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) {
     if (StackPtr.getNode() == 0)
       StackPtr = DAG.getRegister(StackReg, getPointerTy());
-  
+
     // The last register argument that must be saved is Mips::A3
     TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
     unsigned StackLoc = ArgLocs.size()-1;
@@ -1140,7 +1366,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(4, 0, true);
       MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0));
 
       // Record the frame index of the first variable argument
@@ -1150,7 +1377,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
     }
   }
 
-  // All stores are grouped in one node to allow the matching between 
+  // All stores are grouped in one node to allow the matching between
   // the size of Ins and InVals. This only happens when on varg functions
   if (!OutChains.empty()) {
     OutChains.push_back(Chain);
@@ -1183,7 +1410,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
   // Analize return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
 
-  // If this is the first return lowered for this function, add 
+  // If this is the first return lowered for this function, add
   // the regs to the liveout set for the function.
   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
     for (unsigned i = 0; i != RVLocs.size(); ++i)
@@ -1198,7 +1425,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // guarantee that all emitted copies are
@@ -1215,7 +1442,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
     MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
     unsigned Reg = MipsFI->getSRetReturnReg();
 
-    if (!Reg) 
+    if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
@@ -1225,10 +1452,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
 
   // Return on Mips is always a "jr $ra"
   if (Flag.getNode())
-    return DAG.getNode(MipsISD::Ret, dl, MVT::Other, 
+    return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
                        Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
   else // Return Void
-    return DAG.getNode(MipsISD::Ret, dl, MVT::Other, 
+    return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
                        Chain, DAG.getRegister(Mips::RA, MVT::i32));
 }
 
@@ -1239,21 +1466,21 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 MipsTargetLowering::ConstraintType MipsTargetLowering::
-getConstraintType(const std::string &Constraint) const 
+getConstraintType(const std::string &Constraint) const
 {
-  // Mips specific constrainy 
+  // Mips specific constrainy
   // GCC config/mips/constraints.md
   //
-  // 'd' : An address register. Equivalent to r 
-  //       unless generating MIPS16 code. 
-  // 'y' : Equivalent to r; retained for 
-  //       backwards compatibility. 
-  // 'f' : Floating Point registers.      
+  // 'd' : An address register. Equivalent to r
+  //       unless generating MIPS16 code.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
       default : break;
-      case 'd':     
-      case 'y': 
+      case 'd':
+      case 'y':
       case 'f':
         return C_RegisterClass;
         break;
@@ -1262,6 +1489,37 @@ getConstraintType(const std::string &Constraint) const
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'y':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
 /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
 /// return a list of registers that can be used to satisfy the constraint.
 /// This should only be used for C_RegisterClass constraints.
@@ -1275,7 +1533,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
     case 'f':
       if (VT == MVT::f32)
         return std::make_pair(0U, Mips::FGR32RegisterClass);
-      if (VT == MVT::f64)    
+      if (VT == MVT::f64)
         if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
           return std::make_pair(0U, Mips::AFGR64RegisterClass);
     }
@@ -1293,15 +1551,15 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
   if (Constraint.size() != 1)
     return std::vector<unsigned>();
 
-  switch (Constraint[0]) {         
+  switch (Constraint[0]) {
     default : break;
     case 'r':
     // GCC Mips Constraint Letters
-    case 'd':     
-    case 'y': 
-      return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3, 
-             Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1, 
-             Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, 
+    case 'd':
+    case 'y':
+      return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
+             Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
+             Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
              Mips::T8, 0);
 
     case 'f':
@@ -1313,15 +1571,15 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
                  Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
                  Mips::F30, Mips::F31, 0);
         else
-          return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8, 
-                 Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26, 
+          return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
+                 Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
                  Mips::F28, Mips::F30, 0);
       }
 
-      if (VT == MVT::f64)    
+      if (VT == MVT::f64)
         if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
-          return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4, 
-                 Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13, 
+          return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
+                 Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
                  Mips::D14, Mips::D15, 0);
   }
   return std::vector<unsigned>();
@@ -1336,5 +1594,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   if (VT != MVT::f32 && VT != MVT::f64)
     return false;
+  if (Imm.isNegZero())
+    return false;
   return Imm.isZero();
 }
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 460747bf5438..9d6b9f3daf87 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -40,9 +40,6 @@ namespace llvm {
       // Handle gp_rel (small data/bss sections) relocation.
       GPRel,
 
-      // Conditional Move
-      CMov,
-
       // Select CC Pseudo Instruction
       SelectCC,
 
@@ -59,7 +56,13 @@ namespace llvm {
       FPRound,
 
       // Return 
-      Ret
+      Ret,
+
+      // MAdd/Sub nodes
+      MAdd,
+      MAddu,
+      MSub,
+      MSubu
     };
   }
 
@@ -83,6 +86,8 @@ namespace llvm {
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
   private:
     // Subtarget Info
     const MipsSubtarget *Subtarget;
@@ -139,6 +144,11 @@ namespace llvm {
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::pair<unsigned, const TargetRegisterClass*> 
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index cff79966dcd3..977e0dfa145a 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -32,7 +32,7 @@ def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
 def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
                                   SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
 
-def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInFlag]>;
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
 def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, 
                           [SDNPHasChain]>; 
 def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 320c5b883483..b70266ac3e80 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -19,41 +19,53 @@ include "MipsInstrFormats.td"
 
 def SDT_MipsRet          : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 def SDT_MipsJmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
-def SDT_MipsSelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, 
+def SDT_MipsSelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
                                          SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
-def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, 
+def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
                                          SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
                                          SDTCisInt<4>]>;
 def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
 def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_MipsMAddMSub     : SDTypeProfile<0, 4, 
+                                         [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+                                          SDTCisSameAs<1, 2>, 
+                                          SDTCisSameAs<2, 3>]>;
+
 
 // Call
-def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, 
-                         [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+                         [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
                           SDNPVariadic]>;
 
-// Hi and Lo nodes are used to handle global addresses. Used on 
-// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol 
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
 // static model. (nothing to do with Mips Registers Hi and Lo)
 def MipsHi    : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
 def MipsLo    : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
 def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
 
 // Return
-def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain, 
-                     SDNPOptInFlag]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+                     SDNPOptInGlue]>;
 
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 // Select Condition Code
 def MipsSelectCC  : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
 
-// Conditional Move
-def MipsCMov      : SDNode<"MipsISD::CMov", SDT_MipsCMov>;
+// MAdd*/MSub* nodes
+def MipsMAdd      : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAddu     : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSub      : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSubu     : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
 
 //===----------------------------------------------------------------------===//
 // Mips Instruction Predicate Definitions.
@@ -62,6 +74,8 @@ def HasSEInReg  : Predicate<"Subtarget.hasSEInReg()">;
 def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
 def HasSwap     : Predicate<"Subtarget.hasSwap()">;
 def HasCondMov  : Predicate<"Subtarget.hasCondMov()">;
+def IsMips32    : Predicate<"Subtarget.isMips32()">;
+def IsMips32r2  : Predicate<"Subtarget.isMips32r2()">;
 
 //===----------------------------------------------------------------------===//
 // Mips Operand, Complex Patterns and Transformations Definitions.
@@ -126,90 +140,66 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
 let isCommutable = 1 in
 class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
              InstrItinClass itin>:
-  FR< op,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, CPURegs:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
 
 let isCommutable = 1 in
 class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm>:
-  FR< op,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, CPURegs:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [], IIAlu>;
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
 
 // Arithmetic 2 register operands
 class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
-  FI< op,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, Od:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
 
 class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
              Operand Od, PatLeaf imm_type> :
-  FI< op,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, Od:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [], IIAlu>;
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
 
 // Arithmetic Multiply ADD/SUB
-let rd=0 in
-class MArithR<bits<6> func, string instr_asm> :
-  FR< 0x1c,
-      func,
-      (outs CPURegs:$rs),
-      (ins CPURegs:$rt),
-      !strconcat(instr_asm, "\t$rs, $rt"),
-      [], IIImul>;
+let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
+class MArithR<bits<6> func, string instr_asm, SDNode op> :
+  FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+     !strconcat(instr_asm, "\t$rs, $rt"), 
+     [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>;
 
 //  Logical
 class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
-  FR< 0x00,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, CPURegs:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
 
 class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
-  FI< op,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, uimm16:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
 
 class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
-  FR< op,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, CPURegs:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
 
 // Shifts
-let rt = 0 in
-class LogicR_shift_imm<bits<6> func, string instr_asm, SDNode OpNode>:
-  FR< 0x00,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, shamt:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu>;
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, 
+                              SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+  let rs = _rs;
+}
 
-class LogicR_shift_reg<bits<6> func, string instr_asm, SDNode OpNode>:
-  FR< 0x00,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, CPURegs:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, 
+                              SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
+  let shamt = _shamt;
+}
 
 // Load Upper Imediate
 class LoadUpper<bits<6> op, string instr_asm>:
@@ -222,76 +212,55 @@ class LoadUpper<bits<6> op, string instr_asm>:
 // Memory Load/Store
 let canFoldAsLoad = 1, hasDelaySlot = 1 in
 class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
-  FI< op,
-      (outs CPURegs:$dst),
-      (ins mem:$addr),
-      !strconcat(instr_asm, "\t$dst, $addr"),
-      [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+  FI<op, (outs CPURegs:$dst), (ins mem:$addr),
+     !strconcat(instr_asm, "\t$dst, $addr"),
+     [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
 
 class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
-  FI< op,
-      (outs),
-      (ins CPURegs:$dst, mem:$addr),
-      !strconcat(instr_asm, "\t$dst, $addr"),
-      [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+  FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
+     !strconcat(instr_asm, "\t$dst, $addr"),
+     [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
 
 // Conditional Branch
 let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
 class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
-  FI< op,
-      (outs),
-      (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
-      !strconcat(instr_asm, "\t$a, $b, $offset"),
-      [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
-      IIBranch>;
-
+  FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+     !strconcat(instr_asm, "\t$a, $b, $offset"),
+     [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+     IIBranch>;
 
 class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
-  FI< op,
-      (outs),
-      (ins CPURegs:$src, brtarget:$offset),
-      !strconcat(instr_asm, "\t$src, $offset"),
-      [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
-      IIBranch>;
+  FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
+     !strconcat(instr_asm, "\t$src, $offset"),
+     [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+     IIBranch>;
 }
 
 // SetCC
 class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
       PatFrag cond_op>:
-  FR< op,
-      func,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, CPURegs:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
-      IIAlu>;
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+     IIAlu>;
 
 class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
       Operand Od, PatLeaf imm_type>:
-  FI< op,
-      (outs CPURegs:$dst),
-      (ins CPURegs:$b, Od:$c),
-      !strconcat(instr_asm, "\t$dst, $b, $c"),
-      [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
-      IIAlu>;
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+     IIAlu>;
 
 // Unconditional branch
 let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
 class JumpFJ<bits<6> op, string instr_asm>:
-  FJ< op,
-      (outs),
-      (ins brtarget:$target),
-      !strconcat(instr_asm, "\t$target"),
-      [(br bb:$target)], IIBranch>;
+  FJ<op, (outs), (ins brtarget:$target),
+     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
 
 let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
 class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
-  FR< op,
-      func,
-      (outs),
-      (ins CPURegs:$target),
-      !strconcat(instr_asm, "\t$target"),
-      [(brind CPURegs:$target)], IIBranch>;
+  FR<op, func, (outs), (ins CPURegs:$target),
+     !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
 
 // Jump and Link (Call)
 let isCall=1, hasDelaySlot=1,
@@ -299,86 +268,64 @@ let isCall=1, hasDelaySlot=1,
   Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
           K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
   class JumpLink<bits<6> op, string instr_asm>:
-    FJ< op,
-        (outs),
-        (ins calltarget:$target, variable_ops),
-        !strconcat(instr_asm, "\t$target"),
-        [(MipsJmpLink imm:$target)], IIBranch>;
+    FJ<op, (outs), (ins calltarget:$target, variable_ops),
+       !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
+       IIBranch>;
 
   let rd=31 in
   class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
-    FR< op,
-        func,
-        (outs),
-        (ins CPURegs:$rs, variable_ops),
-        !strconcat(instr_asm, "\t$rs"),
-        [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+    FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
+       !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
 
   class BranchLink<string instr_asm>:
-    FI< 0x1,
-        (outs),
-        (ins CPURegs:$rs, brtarget:$target, variable_ops),
-        !strconcat(instr_asm, "\t$rs, $target"),
-        [], IIBranch>;
+    FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
+       !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
 }
 
 // Mul, Div
 class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>:
-  FR< 0x00,
-      func,
-      (outs),
-      (ins CPURegs:$a, CPURegs:$b),
-      !strconcat(instr_asm, "\t$a, $b"),
-      [], itin>;
+  FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+     !strconcat(instr_asm, "\t$a, $b"), [], itin>;
 
 // Move from Hi/Lo
 class MoveFromLOHI<bits<6> func, string instr_asm>:
-  FR< 0x00,
-      func,
-      (outs CPURegs:$dst),
-      (ins),
-      !strconcat(instr_asm, "\t$dst"),
-      [], IIHiLo>;
+  FR<0x00, func, (outs CPURegs:$dst), (ins),
+     !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
 
 class MoveToLOHI<bits<6> func, string instr_asm>:
-  FR< 0x00,
-      func,
-      (outs),
-      (ins CPURegs:$src),
-      !strconcat(instr_asm, "\t$src"),
-      [], IIHiLo>;
+  FR<0x00, func, (outs), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
 
 class EffectiveAddress<string instr_asm> :
-  FI<0x09,
-     (outs CPURegs:$dst),
-     (ins mem:$addr),
-     instr_asm,
-     [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+  FI<0x09, (outs CPURegs:$dst), (ins mem:$addr),
+     instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
 
 // Count Leading Ones/Zeros in Word
-class CountLeading<bits<6> func, string instr_asm, SDNode CountOp>:
-  FR< 0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
-      !strconcat(instr_asm, "\t$dst, $src"), 
-      [(set CPURegs:$dst, (CountOp CPURegs:$src))], IIAlu>;
+class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
+  FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+     Requires<[HasBitCount]> {
+  let shamt = 0;
+  let rt = rd;
+}
 
 // Sign Extend in Register.
 class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
-  FR< 0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
-      !strconcat(instr_asm, "\t$dst, $src"),
-      [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+  FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"),
+     [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
 
 // Byte Swap
 class ByteSwap<bits<6> func, string instr_asm>:
-  FR< 0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
-      !strconcat(instr_asm, "\t$dst, $src"),
-      [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+  FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"),
+     [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
 
 // Conditional Move
 class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
-  FR< 0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T, 
-      CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"), 
-      [(set CPURegs:$dst, (MipsCMov CPURegs:$F, CPURegs:$T, 
-                           CPURegs:$cond, MovCode))], NoItinerary>;
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+     CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+     [], NoItinerary>;
 
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
@@ -408,13 +355,13 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
 def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
 def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
 
-// The supported Mips ISAs dont have any instruction close to the SELECT_CC 
+// The supported Mips ISAs dont have any instruction close to the SELECT_CC
 // operation. The solution is to create a Mips pseudo SELECT_CC instruction
-// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally 
+// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
 // replace it for real supported nodes into EmitInstrWithCustomInserter
 let usesCustomInserter = 1 in {
-  class PseudoSelCC<RegisterClass RC, string asmstr>: 
-    MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, 
+  class PseudoSelCC<RegisterClass RC, string asmstr>:
+    MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
     [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
 }
 
@@ -451,12 +398,18 @@ def XOR     : LogicR<0x26, "xor", xor>;
 def NOR     : LogicNOR<0x00, 0x27, "nor">;
 
 /// Shift Instructions
-def SLL     : LogicR_shift_imm<0x00, "sll", shl>;
-def SRL     : LogicR_shift_imm<0x02, "srl", srl>;
-def SRA     : LogicR_shift_imm<0x03, "sra", sra>;
-def SLLV    : LogicR_shift_reg<0x04, "sllv", shl>;
-def SRLV    : LogicR_shift_reg<0x06, "srlv", srl>;
-def SRAV    : LogicR_shift_reg<0x07, "srav", sra>;
+def SLL     : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
+def SRL     : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
+def SRA     : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
+def SLLV    : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
+def SRLV    : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
+def SRAV    : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+
+// Rotate Instructions
+let Predicates = [IsMips32r2] in {
+    def ROTR    : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
+    def ROTRV   : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+}
 
 /// Load and Store Instructions
 def LB      : LoadM<0x20, "lb",  sextloadi8>;
@@ -493,7 +446,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1,
   def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
                 "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
 
-/// Multiply and Divide Instructions. 
+/// Multiply and Divide Instructions.
 let Defs = [HI, LO] in {
   def MULT    : MulDiv<0x18, "mult", IIImul>;
   def MULTu   : MulDiv<0x19, "multu", IIImul>;
@@ -521,10 +474,10 @@ let Predicates = [HasSEInReg] in {
 }
 
 /// Count Leading
-let Predicates = [HasBitCount] in {
-  let rt = 0 in
-    def CLZ : CountLeading<0b010110, "clz", ctlz>;
-}
+def CLZ : CountLeading<0b100000, "clz",
+                       [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
+def CLO : CountLeading<0b100001, "clo",
+                       [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
 
 /// Byte Swap
 let Predicates = [HasSwap] in {
@@ -551,15 +504,15 @@ let addr=0 in
 // can be matched. It's similar to Sparc LEA_ADDRi
 def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
 
-// MADD*/MSUB* are not part of MipsI either.
-//def MADD    : MArithR<0x00, "madd">;
-//def MADDU   : MArithR<0x01, "maddu">;
-//def MSUB    : MArithR<0x04, "msub">;
-//def MSUBU   : MArithR<0x05, "msubu">;
+// MADD*/MSUB*
+def MADD  : MArithR<0, "madd", MipsMAdd>;
+def MADDU : MArithR<1, "maddu", MipsMAddu>;
+def MSUB  : MArithR<4, "msub", MipsMSub>;
+def MSUBU : MArithR<5, "msubu", MipsMSubu>;
 
 // MUL is a assembly macro in the current used ISAs. In recent ISA's
 // it is a real instruction.
-//def MUL   : ArithR<0x1c, 0x02, "mul", mul, IIImul>;
+def MUL   : ArithR<0x1c, 0x02, "mul", mul, IIImul>, Requires<[IsMips32]>;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
@@ -605,9 +558,9 @@ def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
           (ADDiu CPURegs:$hi, tconstpool:$lo)>;
 
 // gp_rel relocs
-def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), 
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
           (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
-def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), 
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
           (ADDiu CPURegs:$gp, tconstpool:$in)>;
 
 // Mips does not have "not", so we expand our way
@@ -665,9 +618,15 @@ def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
 def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
           (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
 
-def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F), 
+def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
           (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
 
+// select patterns with got access
+def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
+                  (i32 tglobaladdr:$T), CPURegs:$F),
+          (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
+                (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
 // setcc patterns
 def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
           (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 5723f9ea1517..1e8e4feedd03 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -26,11 +26,11 @@ namespace llvm {
 class MipsFunctionInfo : public MachineFunctionInfo {
 
 private:
-  /// Holds for each function where on the stack the Frame Pointer must be 
+  /// Holds for each function where on the stack the Frame Pointer must be
   /// saved. This is used on Prologue and Epilogue to emit FP save/restore
   int FPStackOffset;
 
-  /// Holds for each function where on the stack the Return Address must be 
+  /// Holds for each function where on the stack the Return Address must be
   /// saved. This is used on Prologue and Epilogue to emit RA save/restore
   int RAStackOffset;
 
@@ -51,22 +51,22 @@ private:
       : FI(FrameIndex), SPOffset(StackPointerOffset) {}
   };
 
-  /// When PIC is used the GP must be saved on the stack on the function 
-  /// prologue and must be reloaded from this stack location after every 
-  /// call. A reference to its stack location and frame index must be kept 
+  /// When PIC is used the GP must be saved on the stack on the function
+  /// prologue and must be reloaded from this stack location after every
+  /// call. A reference to its stack location and frame index must be kept
   /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
   MipsFIHolder GPHolder;
 
   /// On LowerFormalArguments the stack size is unknown, so the Stack
-  /// Pointer Offset calculation of "not in register arguments" must be 
-  /// postponed to emitPrologue. 
+  /// Pointer Offset calculation of "not in register arguments" must be
+  /// postponed to emitPrologue.
   SmallVector<MipsFIHolder, 16> FnLoadArgs;
   bool HasLoadArgs;
 
-  // When VarArgs, we must write registers back to caller stack, preserving 
-  // on register arguments. Since the stack size is unknown on 
+  // When VarArgs, we must write registers back to caller stack, preserving
+  // on register arguments. Since the stack size is unknown on
   // LowerFormalArguments, the Stack Pointer Offset calculation must be
-  // postponed to emitPrologue. 
+  // postponed to emitPrologue.
   SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
   bool HasStoreVarArgs;
 
@@ -84,9 +84,9 @@ private:
   int VarArgsFrameIndex;
 
 public:
-  MipsFunctionInfo(MachineFunction& MF) 
-  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
-    FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), 
+  MipsFunctionInfo(MachineFunction& MF)
+  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+    FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
     HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0)
   {}
@@ -110,7 +110,7 @@ public:
   bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
 
   bool hasLoadArgs() const { return HasLoadArgs; }
-  bool hasStoreVarArgs() const { return HasStoreVarArgs; } 
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; }
 
   void recordLoadArgsFI(int FI, int SPOffset) {
     if (!HasLoadArgs) HasLoadArgs=true;
@@ -123,12 +123,12 @@ public:
 
   void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
     if (!hasLoadArgs()) return;
-    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) 
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
       MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
   }
   void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
-    if (!hasStoreVarArgs()) return; 
-    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) 
+    if (!hasStoreVarArgs()) return;
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
       MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
   }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 69436d2acb54..3719e580425f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -25,7 +25,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -117,8 +117,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const
 }
 
 BitVector MipsRegisterInfo::
-getReservedRegs(const MachineFunction &MF) const
-{
+getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   Reserved.set(Mips::ZERO);
   Reserved.set(Mips::AT);
@@ -137,184 +136,6 @@ getReservedRegs(const MachineFunction &MF) const
   return Reserved;
 }
 
-//===----------------------------------------------------------------------===//
-//
-// Stack Frame Processing methods
-// +----------------------------+
-//
-// The stack is allocated decrementing the stack pointer on
-// the first instruction of a function prologue. Once decremented,
-// all stack references are done thought a positive offset
-// from the stack/frame pointer, so the stack is considering
-// to grow up! Otherwise terrible hacks would have to be made
-// to get this stack ABI compliant :)
-//
-//  The stack frame required by the ABI (after call):
-//  Offset
-//
-//  0                 ----------
-//  4                 Args to pass 
-//  .                 saved $GP  (used in PIC)
-//  .                 Alloca allocations
-//  .                 Local Area
-//  .                 CPU "Callee Saved" Registers
-//  .                 saved FP
-//  .                 saved RA
-//  .                 FPU "Callee Saved" Registers
-//  StackSize         -----------
-//
-// Offset - offset from sp after stack allocation on function prologue
-//
-// The sp is the stack pointer subtracted/added from the stack size
-// at the Prologue/Epilogue
-//
-// References to the previous stack (to obtain arguments) are done
-// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
-//
-// Examples:
-// - reference to the actual stack frame
-//   for any local area var there is smt like : FI >= 0, StackOffset: 4
-//     sw REGX, 4(SP)
-//
-// - reference to previous stack frame
-//   suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
-//   The emitted instruction will be something like:
-//     lw REGX, 16+StackSize(SP)
-//
-// Since the total stack size is unknown on LowerFormalArguments, all
-// stack references (ObjectOffset) created to reference the function 
-// arguments, are negative numbers. This way, on eliminateFrameIndex it's
-// possible to detect those references and the offsets are adjusted to
-// their real location.
-//
-//===----------------------------------------------------------------------===//
-
-void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
-{
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-  unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
-  bool HasGP = MipsFI->needGPSaveRestore();
-
-  // Min and Max CSI FrameIndex.
-  int MinCSFI = -1, MaxCSFI = -1; 
-
-  // See the description at MipsMachineFunction.h
-  int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
-
-  // Replace the dummy '0' SPOffset by the negative offsets, as explained on 
-  // LowerFormalArguments. Leaving '0' for while is necessary to avoid
-  // the approach done by calculateFrameObjectOffsets to the stack frame.
-  MipsFI->adjustLoadArgsFI(MFI);
-  MipsFI->adjustStoreVarArgsFI(MFI); 
-
-  // It happens that the default stack frame allocation order does not directly 
-  // map to the convention used for mips. So we must fix it. We move the callee 
-  // save register slots after the local variables area, as described in the
-  // stack frame above.
-  unsigned CalleeSavedAreaSize = 0;
-  if (!CSI.empty()) {
-    MinCSFI = CSI[0].getFrameIdx();
-    MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
-  }
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
-    CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
-
-  unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
-                : (Subtarget.isABI_O32() ? 16 : 0);
-
-  // Adjust local variables. They should come on the stack right
-  // after the arguments.
-  int LastOffsetFI = -1;
-  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (i >= MinCSFI && i <= MaxCSFI)
-      continue;
-    if (MFI->isDeadObjectIndex(i))
-      continue;
-    unsigned Offset = 
-      StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
-    if (LastOffsetFI == -1)
-      LastOffsetFI = i;
-    if (Offset > MFI->getObjectOffset(LastOffsetFI))
-      LastOffsetFI = i;
-    MFI->setObjectOffset(i, Offset);
-  }
-
-  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
-  // be saved in this CPU Area. This whole area must be aligned to the 
-  // default Stack Alignment requirements.
-  if (LastOffsetFI >= 0)
-    StackOffset = MFI->getObjectOffset(LastOffsetFI)+ 
-                  MFI->getObjectSize(LastOffsetFI);
-  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
-
-  for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (!Mips::CPURegsRegisterClass->contains(Reg))
-      break;
-    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
-    TopCPUSavedRegOff = StackOffset;
-    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
-  }
-
-  // Stack locations for FP and RA. If only one of them is used, 
-  // the space must be allocated for both, otherwise no space at all.
-  if (hasFP(MF) || MFI->adjustsStack()) {
-    // FP stack location
-    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), 
-                         StackOffset);
-    MipsFI->setFPStackOffset(StackOffset);
-    TopCPUSavedRegOff = StackOffset;
-    StackOffset += RegSize;
-
-    // SP stack location
-    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
-                         StackOffset);
-    MipsFI->setRAStackOffset(StackOffset);
-    StackOffset += RegSize;
-
-    if (MFI->adjustsStack())
-      TopCPUSavedRegOff += RegSize;
-  }
-
-  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
-  
-  // Adjust FPU Callee Saved Registers Area. This Area must be 
-  // aligned to the default Stack Alignment requirements.
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (Mips::CPURegsRegisterClass->contains(Reg))
-      continue;
-    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
-    TopFPUSavedRegOff = StackOffset;
-    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
-  }
-  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
-
-  // Update frame info
-  MFI->setStackSize(StackOffset);
-
-  // Recalculate the final tops offset. The final values must be '0'
-  // if there isn't a callee saved register for CPU or FPU, otherwise
-  // a negative offset is needed.
-  if (TopCPUSavedRegOff >= 0)
-    MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
-
-  if (TopFPUSavedRegOff >= 0)
-    MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
-}
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool MipsRegisterInfo::
-hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
 // This function eliminate ADJCALLSTACKDOWN, 
 // ADJCALLSTACKUP pseudo instructions
 void MipsRegisterInfo::
@@ -362,106 +183,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
 }
 
-void MipsRegisterInfo::
-emitPrologue(MachineFunction &MF) const 
-{
-  MachineBasicBlock &MBB   = MF.front();
-  MachineFrameInfo *MFI    = MF.getFrameInfo();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-  bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
-
-  // Get the right frame order for Mips.
-  adjustMipsStackFrame(MF);
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  unsigned StackSize = MFI->getStackSize();
-
-  // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->adjustsStack()) return;
-
-  int FPOffset = MipsFI->getFPStackOffset();
-  int RAOffset = MipsFI->getRAStackOffset();
-
-  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
-  
-  // TODO: check need from GP here.
-  if (isPIC && Subtarget.isABI_O32()) 
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)).addReg(getPICCallReg());
-  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
-
-  // Adjust stack : addi sp, sp, (-imm)
-  BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
-      .addReg(Mips::SP).addImm(-StackSize);
-
-  // Save the return address only if the function isnt a leaf one.
-  // sw  $ra, stack_loc($sp)
-  if (MFI->adjustsStack()) { 
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
-        .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
-  }
-
-  // if framepointer enabled, save it and set it
-  // to point to the stack pointer
-  if (hasFP(MF)) {
-    // sw  $fp,stack_loc($sp)
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
-      .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
-
-    // move $fp, $sp
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
-      .addReg(Mips::SP).addReg(Mips::ZERO);
-  }
-
-  // Restore GP from the saved stack location
-  if (MipsFI->needGPSaveRestore())
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
-      .addImm(MipsFI->getGPStackOffset());
-}
-
-void MipsRegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const 
-{
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  MachineFrameInfo *MFI            = MF.getFrameInfo();
-  MipsFunctionInfo *MipsFI         = MF.getInfo<MipsFunctionInfo>();
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  // Get the number of bytes from FrameInfo
-  int NumBytes = (int) MFI->getStackSize();
-
-  // Get the FI's where RA and FP are saved.
-  int FPOffset = MipsFI->getFPStackOffset();
-  int RAOffset = MipsFI->getRAStackOffset();
-
-  // if framepointer enabled, restore it and restore the
-  // stack pointer
-  if (hasFP(MF)) {
-    // move $sp, $fp
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
-      .addReg(Mips::FP).addReg(Mips::ZERO);
-
-    // lw  $fp,stack_loc($sp)
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
-      .addImm(FPOffset).addReg(Mips::SP);
-  }
-
-  // Restore the return address only if the function isnt a leaf one.
-  // lw  $ra, stack_loc($sp)
-  if (MFI->adjustsStack()) { 
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
-      .addImm(RAOffset).addReg(Mips::SP);
-  }
-
-  // adjust stack  : insert addi sp, sp, (imm)
-  if (NumBytes) {
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
-      .addReg(Mips::SP).addImm(NumBytes);
-  }
-}
-
-
 void MipsRegisterInfo::
 processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
   // Set the stack offset where GP must be saved/loaded from.
@@ -478,7 +199,9 @@ getRARegister() const {
 
 unsigned MipsRegisterInfo::
 getFrameRegister(const MachineFunction &MF) const {
-  return hasFP(MF) ? Mips::FP : Mips::SP;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
 }
 
 unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 89282f8fa146..a7f4bf987ae9 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -44,8 +44,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -56,9 +54,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  
   /// Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 055ff3237218..49ca5d19c9cf 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -40,7 +40,7 @@ def IIPseudo           : InstrItinClass;
 //===----------------------------------------------------------------------===//
 // Mips Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
-def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 2d5fd226e6f9..e4f4b334e13a 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -31,7 +31,7 @@ public:
 protected:
 
   enum MipsArchEnum {
-    Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2, Mips64, Mips64r2
+    Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
   };
 
   // Mips architecture version 
@@ -100,6 +100,8 @@ public:
                                      const std::string &CPU);
 
   bool isMips1() const { return MipsArchVersion == Mips1; }
+  bool isMips32() const { return MipsArchVersion >= Mips32; } 
+  bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
 
   bool isLittle() const { return IsLittle; }
   bool isFP64bit() const { return IsFP64bit; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ad3eb9e77da7..7a2dd1f651d2 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -30,18 +30,18 @@ extern "C" void LLVMInitializeMipsTarget() {
 // The stack is always 8 byte aligned
 // On function prologue, the stack is created by decrementing
 // its pointer. Once decremented, all references are done with positive
-// offset from the stack/frame pointer, using StackGrowsUp enables 
+// offset from the stack/frame pointer, using StackGrowsUp enables
 // an easier handling.
 // Using CodeModel::Large enables different CALL behavior.
 MipsTargetMachine::
 MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
                   bool isLittle=false):
   LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS, isLittle), 
+  Subtarget(TT, FS, isLittle),
   DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") :
-                        std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), 
-  InstrInfo(*this), 
-  FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+                        std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
+  InstrInfo(*this),
+  FrameLowering(Subtarget),
   TLInfo(*this), TSInfo(*this) {
   // Abicall enables PIC by default
   if (getRelocationModel() == Reloc::Default) {
@@ -57,20 +57,20 @@ MipselTargetMachine(const Target &T, const std::string &TT,
                     const std::string &FS) :
   MipsTargetMachine(T, TT, FS, true) {}
 
-// Install an instruction selector pass using 
+// Install an instruction selector pass using
 // the ISelDag to gen Mips code.
 bool MipsTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) 
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
 {
   PM.add(createMipsISelDag(*this));
   return false;
 }
 
-// Implemented by targets that want to run passes immediately before 
-// machine code is emitted. return true if -print-machineinstrs should 
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
 bool MipsTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) 
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
 {
   PM.add(createMipsDelaySlotFillerPass(*this));
   return true;
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index d63976fcfc3e..43ab7984520e 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -17,39 +17,40 @@
 #include "MipsSubtarget.h"
 #include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
+#include "MipsFrameLowering.h"
 #include "MipsSelectionDAGInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
   class formatted_raw_ostream;
-  
+
   class MipsTargetMachine : public LLVMTargetMachine {
     MipsSubtarget       Subtarget;
     const TargetData    DataLayout; // Calculates type size & alignment
     MipsInstrInfo       InstrInfo;
-    TargetFrameInfo     FrameInfo;
+    MipsFrameLowering   FrameLowering;
     MipsTargetLowering  TLInfo;
     MipsSelectionDAGInfo TSInfo;
   public:
     MipsTargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS, bool isLittle);
-    
-    virtual const MipsInstrInfo   *getInstrInfo()     const 
+
+    virtual const MipsInstrInfo   *getInstrInfo()     const
     { return &InstrInfo; }
-    virtual const TargetFrameInfo *getFrameInfo()     const 
-    { return &FrameInfo; }
-    virtual const MipsSubtarget   *getSubtargetImpl() const 
+    virtual const TargetFrameLowering *getFrameLowering()     const
+    { return &FrameLowering; }
+    virtual const MipsSubtarget   *getSubtargetImpl() const
     { return &Subtarget; }
-    virtual const TargetData      *getTargetData()    const 
+    virtual const TargetData      *getTargetData()    const
     { return &DataLayout;}
 
     virtual const MipsRegisterInfo *getRegisterInfo()  const {
       return &InstrInfo.getRegisterInfo();
     }
 
-    virtual const MipsTargetLowering *getTargetLowering() const { 
+    virtual const MipsTargetLowering *getTargetLowering() const {
       return &TLInfo;
     }
 
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 405f41981fa3..cf5d1b58addd 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 static cl::opt<unsigned>
@@ -25,21 +26,21 @@ SSThreshold("mips-ssection-threshold", cl::Hidden,
 
 void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- 
+
   SmallDataSection =
-    getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getDataRel());
-  
+
   SmallBSSSection =
-    getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getBSS());
-  
+
 }
 
-// A address must be loaded from a small section if its size is less than the 
-// small section size threshold. Data in this section must be addressed using 
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
 // gp_rel operator.
 static bool IsInSmallSection(uint64_t Size) {
   return Size > 0 && Size <= SSThreshold;
@@ -49,7 +50,7 @@ bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
                                                 const TargetMachine &TM) const {
   if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
     return false;
-  
+
   return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
 }
 
@@ -68,11 +69,11 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
   const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
   if (!GVA)
     return false;
-  
+
   // We can only do this for datarel or BSS objects for now.
   if (!Kind.isBSS() && !Kind.isDataRel())
     return false;
-  
+
   // If this is a internal constant string, there is a special
   // section for it, but not in small data/bss.
   if (Kind.isMergeable1ByteCString())
@@ -89,13 +90,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
   // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
   // sections?
-  
+
   // Handle Small Section classification here.
   if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallBSSSection;
   if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallDataSection;
-  
+
   // Otherwise, we work the same as ELF.
   return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
 }
diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index d36bb8eb4a5f..000000000000
--- a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-include_directories(
-  ${CMAKE_CURRENT_BINARY_DIR}/..
-  ${CMAKE_CURRENT_SOURCE_DIR}/..
-  )
-
-add_llvm_library(LLVMPIC16AsmPrinter
-  PIC16AsmPrinter.cpp
-  )
-add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen)
diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile
deleted file mode 100644
index e3c0684ebcf8..000000000000
--- a/lib/Target/PIC16/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/PIC16/AsmPrinter/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPIC16AsmPrinter
-
-# Hack: we need to include 'main' pic16 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
deleted file mode 100644
index b665817e614e..000000000000
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ /dev/null
@@ -1,512 +0,0 @@
-//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to PIC16 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16ABINames.h"
-#include "PIC16AsmPrinter.h"
-#include "PIC16Section.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16MachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include <cstring>
-using namespace llvm;
-
-#include "PIC16GenAsmWriter.inc"
-
-PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-: AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) {
-  PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo());
-  PTOF = &getObjFileLowering();
-}
-
-void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  printInstruction(MI, OS);
-  
-  OutStreamer.EmitRawText(OS.str());
-}
-
-static int getFunctionColor(const Function *F) {
-  if (F->hasSection()) {
-    std::string Sectn = F->getSection();
-    std::string StrToFind = "Overlay=";
-    std::string::size_type Pos = Sectn.find(StrToFind);
-
-    // Retreive the color number if the key is found.
-    if (Pos != std::string::npos) {
-      Pos += StrToFind.length();
-      std::string Color = "";
-      char c = Sectn.at(Pos);
-      // A Color can only consist of digits.
-      while (c >= '0' && c<= '9') {
-        Color.append(1,c);
-        Pos++;
-        if (Pos >= Sectn.length())
-          break;
-        c = Sectn.at(Pos);
-      }
-      return atoi(Color.c_str());
-    }
-  }
-
-  // Color was not set for function, so return -1.
-  return -1;
-}
-
-// Color the Auto section of the given function. 
-void PIC16AsmPrinter::ColorAutoSection(const Function *F) {
-  std::string SectionName = PAN::getAutosSectionName(CurrentFnSym->getName());
-  PIC16Section* Section = PTOF->findPIC16Section(SectionName);
-  if (Section != NULL) {
-    int Color = getFunctionColor(F);
-    if (Color >= 0)
-      Section->setColor(Color);
-  }
-}
-
-
-/// runOnMachineFunction - This emits the frame section, autos section and 
-/// assembly for each instruction. Also takes care of function begin debug
-/// directive and file begin debug directive (if required) for the function.
-///
-bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-  // This calls the base class function required to be called at beginning
-  // of runOnMachineFunction.
-  SetupMachineFunction(MF);
-
-  // Put the color information from function to its auto section.
-  const Function *F = MF.getFunction();
-  ColorAutoSection(F);
-
-  // Emit the function frame (args and temps).
-  EmitFunctionFrame(MF);
-
-  DbgInfo.BeginFunction(MF);
-
-  // Now emit the instructions of function in its code section.
-  const MCSection *fCodeSection = 
-    getObjFileLowering().SectionForCode(CurrentFnSym->getName(), 
-                                        PAN::isISR(F->getSection()));
-
-  // Start the Code Section.
-  OutStreamer.SwitchSection(fCodeSection);
-
-  // Emit the frame address of the function at the beginning of code.
-  OutStreamer.EmitRawText("\tretlw  low(" + 
-                          Twine(PAN::getFrameLabel(CurrentFnSym->getName())) +
-                          ")");
-  OutStreamer.EmitRawText("\tretlw  high(" +
-                          Twine(PAN::getFrameLabel(CurrentFnSym->getName())) +
-                          ")");
-
-  // Emit function start label.
-  OutStreamer.EmitLabel(CurrentFnSym);
-
-  DebugLoc CurDL;
-  // Print out code for the function.
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-
-    // Print a label for the basic block.
-    if (I != MF.begin())
-      EmitBasicBlockStart(I);
-    
-    // Print a basic block.
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      // Emit the line directive if source line changed.
-      DebugLoc DL = II->getDebugLoc();
-      if (!DL.isUnknown() && DL != CurDL) {
-        DbgInfo.ChangeDebugLoc(MF, DL);
-        CurDL = DL;
-      }
-        
-      // Print the assembly for the instruction.
-      EmitInstruction(II);
-    }
-  }
-  
-  // Emit function end debug directives.
-  DbgInfo.EndFunction(MF);
-
-  return false;  // we didn't modify anything.
-}
-
-
-// printOperand - print operand of insn.
-void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  const Function *F = MI->getParent()->getParent()->getFunction();
-
-  switch (MO.getType()) {
-    case MachineOperand::MO_Register:
-      {
-        // For indirect load/store insns, the fsr name is printed as INDF.
-        std::string RegName = getRegisterName(MO.getReg());
-        if ((MI->getOpcode() == PIC16::load_indirect) ||
-            (MI->getOpcode() == PIC16::store_indirect))
-          RegName.replace (0, 3, "INDF");
-        O << RegName;
-      }
-      return;
-
-    case MachineOperand::MO_Immediate:
-      O << (int)MO.getImm();
-      return;
-
-    case MachineOperand::MO_GlobalAddress: {
-      MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
-      // FIXME: currently we do not have a memcpy def coming in the module
-      // by any chance, as we do not link in those as .bc lib. So these calls
-      // are always external and it is safe to emit an extern.
-      if (PAN::isMemIntrinsic(Sym->getName()))
-        LibcallDecls.insert(Sym->getName());
-
-      O << *Sym;
-      break;
-    }
-    case MachineOperand::MO_ExternalSymbol: {
-       const char *Sname = MO.getSymbolName();
-       std::string Printname = Sname;
-
-      // Intrinsic stuff needs to be renamed if we are printing IL fn. 
-      if (PAN::isIntrinsicStuff(Printname)) {
-        if (PAN::isISR(F->getSection())) {
-          Printname = PAN::Rename(Sname);
-        }
-        // Record these decls, we need to print them in asm as extern.
-        LibcallDecls.insert(Printname);
-      }
-
-      O << Printname;
-      break;
-    }
-    case MachineOperand::MO_MachineBasicBlock:
-      O << *MO.getMBB()->getSymbol();
-      return;
-
-    default:
-      llvm_unreachable(" Operand type not supported.");
-  }
-}
-
-/// printCCOperand - Print the cond code operand.
-///
-void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
-                                     raw_ostream &O) {
-  int CC = (int)MI->getOperand(opNum).getImm();
-  O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
-}
-
-/// printLibcallDecls - print the extern declarations for compiler 
-/// intrinsics.
-///
-void PIC16AsmPrinter::printLibcallDecls() {
-  // If no libcalls used, return.
-  if (LibcallDecls.empty()) return;
-
-  OutStreamer.AddComment("External decls for libcalls - BEGIN");
-  OutStreamer.AddBlankLine();
-
-  for (std::set<std::string>::const_iterator I = LibcallDecls.begin(),
-       E = LibcallDecls.end(); I != E; I++)
-    OutStreamer.EmitRawText(MAI->getExternDirective() + Twine(*I));
-
-  OutStreamer.AddComment("External decls for libcalls - END");
-  OutStreamer.AddBlankLine();
-}
-
-/// doInitialization - Perform Module level initializations here.
-/// One task that we do here is to sectionize all global variables.
-/// The MemSelOptimizer pass depends on the sectionizing.
-///
-bool PIC16AsmPrinter::doInitialization(Module &M) {
-  bool Result = AsmPrinter::doInitialization(M);
-
-  // Every asmbly contains these std headers. 
-  OutStreamer.EmitRawText(StringRef("\n#include p16f1xxx.inc"));
-  OutStreamer.EmitRawText(StringRef("#include stdmacros.inc"));
-
-  // Set the section names for all globals.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-
-    // Record External Var Decls.
-    if (I->isDeclaration()) {
-      ExternalVarDecls.push_back(I);
-      continue;
-    }
-
-    // Record Exteranl Var Defs.
-    if (I->hasExternalLinkage() || I->hasCommonLinkage()) {
-      ExternalVarDefs.push_back(I);
-    }
-
-    // Sectionify actual data.
-    if (!I->hasAvailableExternallyLinkage()) {
-      const MCSection *S = getObjFileLowering().SectionForGlobal(I, Mang, TM);
-      
-      I->setSection(((const PIC16Section *)S)->getName());
-    }
-  }
-
-  DbgInfo.BeginModule(M);
-  EmitFunctionDecls(M);
-  EmitUndefinedVars(M);
-  EmitDefinedVars(M);
-  EmitIData(M);
-  EmitUData(M);
-  EmitRomData(M);
-  EmitSharedUdata(M);
-  EmitUserSections(M);
-  return Result;
-}
-
-/// Emit extern decls for functions imported from other modules, and emit
-/// global declarations for function defined in this module and which are
-/// available to other modules.
-///
-void PIC16AsmPrinter::EmitFunctionDecls(Module &M) {
- // Emit declarations for external functions.
-  OutStreamer.AddComment("Function Declarations - BEGIN");
-  OutStreamer.AddBlankLine();
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
-    if (I->isIntrinsic() || I->getName() == "@abort")
-      continue;
-    
-    if (!I->isDeclaration() && !I->hasExternalLinkage())
-      continue;
-
-    MCSymbol *Sym = Mang->getSymbol(I);
-    
-    // Do not emit memcpy, memset, and memmove here.
-    // Calls to these routines can be generated in two ways,
-    // 1. User calling the standard lib function
-    // 2. Codegen generating these calls for llvm intrinsics.
-    // In the first case a prototype is alread availale, while in
-    // second case the call is via and externalsym and the prototype is missing.
-    // So declarations for these are currently always getting printing by
-    // tracking both kind of references in printInstrunction.
-    if (I->isDeclaration() && PAN::isMemIntrinsic(Sym->getName())) continue;
-
-    const char *directive = I->isDeclaration() ? MAI->getExternDirective() :
-                                                 MAI->getGlobalDirective();
-      
-    OutStreamer.EmitRawText(directive + Twine(Sym->getName()));
-    OutStreamer.EmitRawText(directive +
-                            Twine(PAN::getRetvalLabel(Sym->getName())));
-    OutStreamer.EmitRawText(directive +
-                            Twine(PAN::getArgsLabel(Sym->getName())));
-  }
-
-  OutStreamer.AddComment("Function Declarations - END");
-  OutStreamer.AddBlankLine();
-
-}
-
-// Emit variables imported from other Modules.
-void PIC16AsmPrinter::EmitUndefinedVars(Module &M) {
-  std::vector<const GlobalVariable*> Items = ExternalVarDecls;
-  if (!Items.size()) return;
-
-  OutStreamer.AddComment("Imported Variables - BEGIN");
-  OutStreamer.AddBlankLine();
-  for (unsigned j = 0; j < Items.size(); j++)
-    OutStreamer.EmitRawText(MAI->getExternDirective() +
-                            Twine(Mang->getSymbol(Items[j])->getName()));
-  
-  OutStreamer.AddComment("Imported Variables - END");
-  OutStreamer.AddBlankLine();
-}
-
-// Emit variables defined in this module and are available to other modules.
-void PIC16AsmPrinter::EmitDefinedVars(Module &M) {
-  std::vector<const GlobalVariable*> Items = ExternalVarDefs;
-  if (!Items.size()) return;
-
-  OutStreamer.AddComment("Exported Variables - BEGIN");
-  OutStreamer.AddBlankLine();
-
-  for (unsigned j = 0; j < Items.size(); j++)
-    OutStreamer.EmitRawText(MAI->getGlobalDirective() +
-                            Twine(Mang->getSymbol(Items[j])->getName()));
-  OutStreamer.AddComment("Exported Variables - END");
-  OutStreamer.AddBlankLine();
-}
-
-// Emit initialized data placed in ROM.
-void PIC16AsmPrinter::EmitRomData(Module &M) {
-  EmitSingleSection(PTOF->ROMDATASection());
-}
-
-// Emit Shared section udata.
-void PIC16AsmPrinter::EmitSharedUdata(Module &M) {
-  EmitSingleSection(PTOF->SHAREDUDATASection());
-}
-
-bool PIC16AsmPrinter::doFinalization(Module &M) {
-  EmitAllAutos(M);
-  printLibcallDecls();
-  DbgInfo.EndModule(M);
-  OutStreamer.EmitRawText(StringRef("\tEND"));
-  return AsmPrinter::doFinalization(M);
-}
-
-void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  const TargetData *TD = TM.getTargetData();
-  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-
-  // Emit the data section name.
-  
-  PIC16Section *fPDataSection =
-    const_cast<PIC16Section *>(getObjFileLowering().
-                                SectionForFrame(CurrentFnSym->getName()));
- 
-  fPDataSection->setColor(getFunctionColor(F)); 
-  OutStreamer.SwitchSection(fPDataSection);
-  
-  // Emit function frame label
-  OutStreamer.EmitRawText(PAN::getFrameLabel(CurrentFnSym->getName()) +
-                          Twine(":"));
-
-  const Type *RetType = F->getReturnType();
-  unsigned RetSize = 0; 
-  if (RetType->getTypeID() != Type::VoidTyID) 
-    RetSize = TD->getTypeAllocSize(RetType);
-  
-  //Emit function return value space
-  // FIXME: Do not emit RetvalLable when retsize is zero. To do this
-  // we will need to avoid printing a global directive for Retval label
-  // in emitExternandGloblas.
-  if(RetSize > 0)
-     OutStreamer.EmitRawText(PAN::getRetvalLabel(CurrentFnSym->getName()) +
-                             Twine(" RES ") + Twine(RetSize));
-  else
-     OutStreamer.EmitRawText(PAN::getRetvalLabel(CurrentFnSym->getName()) +
-                             Twine(":"));
-   
-  // Emit variable to hold the space for function arguments 
-  unsigned ArgSize = 0;
-  for (Function::const_arg_iterator argi = F->arg_begin(),
-           arge = F->arg_end(); argi != arge ; ++argi) {
-    const Type *Ty = argi->getType();
-    ArgSize += TD->getTypeAllocSize(Ty);
-   }
-
-  OutStreamer.EmitRawText(PAN::getArgsLabel(CurrentFnSym->getName()) +
-                          Twine(" RES ") + Twine(ArgSize));
-
-  // Emit temporary space
-  int TempSize = FuncInfo->getTmpSize();
-  if (TempSize > 0)
-    OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) +
-                            Twine(" RES  ") + Twine(TempSize));
-}
-
-
-void PIC16AsmPrinter::EmitInitializedDataSection(const PIC16Section *S) {
-  /// Emit Section header.
-  OutStreamer.SwitchSection(S);
-
-    std::vector<const GlobalVariable*> Items = S->Items;
-    for (unsigned j = 0; j < Items.size(); j++) {
-      Constant *C = Items[j]->getInitializer();
-      int AddrSpace = Items[j]->getType()->getAddressSpace();
-      OutStreamer.EmitRawText(Mang->getSymbol(Items[j])->getName());
-      EmitGlobalConstant(C, AddrSpace);
-   }
-}
-
-// Print all IDATA sections.
-void PIC16AsmPrinter::EmitIData(Module &M) {
-  EmitSectionList (M, PTOF->IDATASections());
-}
-
-void PIC16AsmPrinter::
-EmitUninitializedDataSection(const PIC16Section *S) {
-    const TargetData *TD = TM.getTargetData();
-    OutStreamer.SwitchSection(S);
-    std::vector<const GlobalVariable*> Items = S->Items;
-    for (unsigned j = 0; j < Items.size(); j++) {
-      Constant *C = Items[j]->getInitializer();
-      const Type *Ty = C->getType();
-      unsigned Size = TD->getTypeAllocSize(Ty);
-      OutStreamer.EmitRawText(Mang->getSymbol(Items[j])->getName() +
-                              Twine(" RES ") + Twine(Size));
-    }
-}
-
-// Print all UDATA sections.
-void PIC16AsmPrinter::EmitUData(Module &M) {
-  EmitSectionList (M, PTOF->UDATASections());
-}
-
-// Print all USER sections.
-void PIC16AsmPrinter::EmitUserSections(Module &M) {
-  EmitSectionList (M, PTOF->USERSections());
-}
-
-// Print all AUTO sections.
-void PIC16AsmPrinter::EmitAllAutos(Module &M) {
-  EmitSectionList (M, PTOF->AUTOSections());
-}
-
-extern "C" void LLVMInitializePIC16AsmPrinter() { 
-  RegisterAsmPrinter<PIC16AsmPrinter> X(ThePIC16Target);
-}
-
-// Emit one data section using correct section emitter based on section type.
-void PIC16AsmPrinter::EmitSingleSection(const PIC16Section *S) {
-  if (S == NULL) return;
-
-  switch (S->getType()) {
-    default: llvm_unreachable ("unknow user section type");
-    case UDATA:
-    case UDATA_SHR:
-    case UDATA_OVR:
-      EmitUninitializedDataSection(S);
-      break;
-    case IDATA:
-    case ROMDATA:
-      EmitInitializedDataSection(S);
-      break;
-  }
-}
-
-// Emit a list of sections.
-void PIC16AsmPrinter::
-EmitSectionList(Module &M, const std::vector<PIC16Section *> &SList) {
-  for (unsigned i = 0; i < SList.size(); i++) {
-    // Exclude llvm specific metadata sections.
-    if (SList[i]->getName().find("llvm.") != std::string::npos)
-      continue;
-    OutStreamer.AddBlankLine();
-    EmitSingleSection(SList[i]);
-  }
-}
-
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
deleted file mode 100644
index aa2e1f4f486e..000000000000
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ /dev/null
@@ -1,88 +0,0 @@
-//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to PIC16 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16ASMPRINTER_H
-#define PIC16ASMPRINTER_H
-
-#include "PIC16.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16DebugInfo.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16TargetObjectFile.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
-#include <list>
-#include <set>
-#include <string>
-
-namespace llvm {
-  class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter {
-  public:
-    explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
-  private:
-    virtual const char *getPassName() const {
-      return "PIC16 Assembly Printer";
-    }
-    
-    const PIC16TargetObjectFile &getObjFileLowering() const {
-      return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
-    }
-
-    bool runOnMachineFunction(MachineFunction &F);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    static const char *getRegisterName(unsigned RegNo);
-
-    void EmitInstruction(const MachineInstr *MI);
-    void EmitFunctionDecls (Module &M);
-    void EmitUndefinedVars (Module &M);
-    void EmitDefinedVars (Module &M);
-    void EmitIData (Module &M);
-    void EmitUData (Module &M);
-    void EmitAllAutos (Module &M);
-    void EmitRomData (Module &M);
-    void EmitSharedUdata(Module &M);
-    void EmitUserSections (Module &M);
-    void EmitFunctionFrame(MachineFunction &MF);
-    void printLibcallDecls();
-    void EmitUninitializedDataSection(const PIC16Section *S);
-    void EmitInitializedDataSection(const PIC16Section *S);
-    void EmitSingleSection(const PIC16Section *S);
-    void EmitSectionList(Module &M, 
-                         const std::vector< PIC16Section *> &SList);
-    void ColorAutoSection(const Function *F);
-  protected:
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
-
-    /// EmitGlobalVariable - Emit the specified global variable and its
-    /// initializer to the output stream.
-    virtual void EmitGlobalVariable(const GlobalVariable *GV) {
-      // PIC16 doesn't use normal hooks for this.
-    }
-    
-  private:
-    const PIC16TargetObjectFile *PTOF;
-    PIC16DbgInfo DbgInfo;
-    const PIC16MCAsmInfo *PMAI;
-    std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls.
-    std::vector<const GlobalVariable *> ExternalVarDecls;
-    std::vector<const GlobalVariable *> ExternalVarDefs;
-  };
-} // end of namespace
-
-#endif
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
deleted file mode 100644
index 2b6cb9e4e461..000000000000
--- a/lib/Target/PIC16/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS PIC16.td)
-
-tablegen(PIC16GenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PIC16GenRegisterNames.inc -gen-register-enums)
-tablegen(PIC16GenRegisterInfo.inc -gen-register-desc)
-tablegen(PIC16GenInstrNames.inc -gen-instr-enums)
-tablegen(PIC16GenInstrInfo.inc -gen-instr-desc)
-tablegen(PIC16GenAsmWriter.inc -gen-asm-writer)
-tablegen(PIC16GenDAGISel.inc -gen-dag-isel)
-tablegen(PIC16GenCallingConv.inc -gen-callingconv)
-tablegen(PIC16GenSubtarget.inc -gen-subtarget)
-
-add_llvm_target(PIC16CodeGen
-  PIC16DebugInfo.cpp
-  PIC16InstrInfo.cpp
-  PIC16ISelDAGToDAG.cpp
-  PIC16ISelLowering.cpp
-  PIC16MemSelOpt.cpp
-  PIC16MCAsmInfo.cpp
-  PIC16RegisterInfo.cpp
-  PIC16Section.cpp
-  PIC16Subtarget.cpp
-  PIC16TargetMachine.cpp
-  PIC16TargetObjectFile.cpp
-  PIC16SelectionDAGInfo.cpp
-  )
diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile
deleted file mode 100644
index 9e784d1d7262..000000000000
--- a/lib/Target/PIC16/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-##===- lib/Target/PIC16/Makefile ---------------------------*- Makefile -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMPIC16CodeGen
-TARGET = PIC16
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \
-		PIC16GenRegisterInfo.inc PIC16GenInstrNames.inc \
-		PIC16GenInstrInfo.inc PIC16GenAsmWriter.inc \
-		PIC16GenDAGISel.inc PIC16GenCallingConv.inc \
-		PIC16GenSubtarget.inc
-
-DIRS = AsmPrinter TargetInfo PIC16Passes
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
deleted file mode 100644
index 08bb3e6f055b..000000000000
--- a/lib/Target/PIC16/PIC16.h
+++ /dev/null
@@ -1,134 +0,0 @@
-//===-- PIC16.h - Top-level interface for PIC16 representation --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in 
-// the LLVM PIC16 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_PIC16_H
-#define LLVM_TARGET_PIC16_H
-
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <sstream>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace llvm {
-  class PIC16TargetMachine;
-  class FunctionPass;
-  class MachineCodeEmitter;
-  class formatted_raw_ostream;
-
-namespace PIC16CC {
-  enum CondCodes {
-    EQ,
-    NE,
-    LT,
-    LE,
-    GT,
-    GE,
-    ULT,
-    UGT,
-    ULE,
-    UGE
-  };
-}
-
-  enum PIC16SectionType {
-      CODE,
-      UDATA,
-      IDATA,
-      ROMDATA,
-      UDATA_OVR,
-      UDATA_SHR
-    };
-
-  class ESNames {
-    std::vector<char*> stk;
-    ESNames() {}
-    public:
-    ~ESNames() {
-      while (!stk.empty())
-        {
-        char* p = stk.back();
-        delete [] p;
-        stk.pop_back();
-        }
-    }
-
-    // External symbol names require memory to live till the program end.
-    // So we have to allocate it and keep. Push all such allocations into a 
-    // vector so that they get freed up on termination.
-    inline static const char *createESName (const std::string &name) {
-      static ESNames esn;
-      char *tmpName = new char[name.size() + 1];
-      memcpy(tmpName, name.c_str(), name.size() + 1);
-      esn.stk.push_back(tmpName);
-      return tmpName;
-    }
-
- };
-
-  inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
-    switch (CC) {
-    default: llvm_unreachable("Unknown condition code");
-    case PIC16CC::NE:  return "ne";
-    case PIC16CC::EQ:   return "eq";
-    case PIC16CC::LT:   return "lt";
-    case PIC16CC::ULT:   return "lt";
-    case PIC16CC::LE:  return "le";
-    case PIC16CC::ULE:  return "le";
-    case PIC16CC::GT:  return "gt";
-    case PIC16CC::UGT:  return "gt";
-    case PIC16CC::GE:   return "ge";
-    case PIC16CC::UGE:   return "ge";
-    }
-  }
-
-  inline static bool isSignedComparison(PIC16CC::CondCodes CC) {
-    switch (CC) {
-    default: llvm_unreachable("Unknown condition code");
-    case PIC16CC::NE:  
-    case PIC16CC::EQ: 
-    case PIC16CC::LT:
-    case PIC16CC::LE:
-    case PIC16CC::GE:
-    case PIC16CC::GT:
-      return true;
-    case PIC16CC::ULT:
-    case PIC16CC::UGT:
-    case PIC16CC::ULE:
-    case PIC16CC::UGE:
-      return false;   // condition codes for unsigned comparison. 
-    }
-  }
-
-
-
-  FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
-  // Banksel optimizer pass.
-  FunctionPass *createPIC16MemSelOptimizerPass();
-
-  extern Target ThePIC16Target;
-  extern Target TheCooperTarget;
-  
-} // end namespace llvm;
-
-// Defines symbolic names for PIC16 registers.  This defines a mapping from
-// register name to register number.
-#include "PIC16GenRegisterNames.inc"
-
-// Defines symbolic names for the PIC16 instructions.
-#include "PIC16GenInstrNames.inc"
-
-#endif
diff --git a/lib/Target/PIC16/PIC16.td b/lib/Target/PIC16/PIC16.td
deleted file mode 100644
index b2b9b1cd171e..000000000000
--- a/lib/Target/PIC16/PIC16.td
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- PIC16.td - Describe the PIC16 Target Machine -----------*- tblgen -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is the top level entry point for the PIC16 target.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-include "PIC16RegisterInfo.td"
-include "PIC16InstrInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features. 
-//===----------------------------------------------------------------------===//
-def FeatureCooper : SubtargetFeature<"cooper", "IsCooper", "true",
-                                     "PIC16 Cooper ISA Support">;
-
-//===----------------------------------------------------------------------===//
-// PIC16 supported processors.
-//===----------------------------------------------------------------------===//
-
-def : Processor<"generic", NoItineraries, []>;
-def : Processor<"cooper", NoItineraries, [FeatureCooper]>;
-
-
-def PIC16InstrInfo : InstrInfo {} 
-
-def PIC16 : Target {
-  let InstructionSet = PIC16InstrInfo;
-}
-
diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h
deleted file mode 100644
index 4c1a8da286c2..000000000000
--- a/lib/Target/PIC16/PIC16ABINames.h
+++ /dev/null
@@ -1,399 +0,0 @@
-//===-- PIC16ABINames.h - PIC16 Naming conventios for ABI----- --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions to manage ABI Naming conventions for PIC16. 
-// 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_PIC16ABINAMES_H
-#define LLVM_TARGET_PIC16ABINAMES_H
-
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <sstream>
-#include <cstring>
-#include <string>
-
-namespace llvm {
-  class PIC16TargetMachine;
-  class FunctionPass;
-  class MachineCodeEmitter;
-  class formatted_raw_ostream;
-
-  // A Central class to manage all ABI naming conventions.
-  // PAN - [P]ic16 [A]BI [N]ames
-  class PAN {
-    public:
-    // Map the name of the symbol to its section name.
-    // Current ABI:
-    // -----------------------------------------------------
-    // ALL Names are prefixed with the symobl '@'.
-    // ------------------------------------------------------
-    // Global variables do not have any '.' in their names.
-    // These are maily function names and global variable names.
-    // Example - @foo,  @i
-    // Static local variables - @<func>.<var>
-    // -------------------------------------------------------
-    // Functions and auto variables.
-    // Names are mangled as <prefix><funcname>.<tag>.<varname>
-    // Where <prefix> is '@' and <tag> is any one of
-    // the following
-    // .auto. - an automatic var of a function.
-    // .temp. - temproray data of a function.
-    // .ret.  - return value label for a function.
-    // .frame. - Frame label for a function where retval, args
-    //           and temps are stored.
-    // .args. - Label used to pass arguments to a direct call.
-    // Example - Function name:   @foo
-    //           Its frame:       @foo.frame.
-    //           Its retval:      @foo.ret.
-    //           Its local vars:  @foo.auto.a
-    //           Its temp data:   @foo.temp.
-    //           Its arg passing: @foo.args.
-    //----------------------------------------------
-    // Libcall - compiler generated libcall names must start with .lib.
-    //           This id will be used to emit extern decls for libcalls.
-    // Example - libcall name:   @.lib.sra.i8
-    //           To pass args:   @.lib.sra.i8.args.
-    //           To return val:  @.lib.sra.i8.ret.
-    //----------------------------------------------
-    // SECTION Names
-    // uninitialized globals - @udata.<num>.#
-    // initialized globals - @idata.<num>.#
-    // Program memory data - @romdata.#
-    // Variables with user defined section name - <user_defined_section>
-    // Variables with user defined address - @<var>.user_section.<address>.#
-    // Function frame - @<func>.frame_section.
-    // Function autos - @<func>.autos_section.
-    // Overlay sections - @<color>.##
-    // Declarations - Enclosed in comments. No section for them.
-    //----------------------------------------------------------
-    
-    // Tags used to mangle different names. 
-    enum TAGS {
-      PREFIX_SYMBOL,
-      GLOBAL,
-      STATIC_LOCAL,
-      AUTOS_LABEL,
-      FRAME_LABEL,
-      RET_LABEL,
-      ARGS_LABEL,
-      TEMPS_LABEL,
-      
-      LIBCALL,
-      
-      FRAME_SECTION,
-      AUTOS_SECTION,
-      CODE_SECTION,
-      USER_SECTION
-    };
-
-    // Textual names of the tags.
-    inline static const char *getTagName(TAGS tag) {
-      switch (tag) {
-      default: return "";
-      case PREFIX_SYMBOL:    return "@";
-      case AUTOS_LABEL:       return ".auto.";
-      case FRAME_LABEL:       return ".frame.";
-      case TEMPS_LABEL:       return ".temp.";
-      case ARGS_LABEL:       return ".args.";
-      case RET_LABEL:       return ".ret.";
-      case LIBCALL:       return ".lib.";
-      case FRAME_SECTION:       return ".frame_section.";
-      case AUTOS_SECTION:       return ".autos_section.";
-      case CODE_SECTION:       return ".code_section.";
-      case USER_SECTION:       return ".user_section.";
-      }
-    }
-
-    // Get tag type for the Symbol.
-    inline static TAGS getSymbolTag(const std::string &Sym) {
-      if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos)
-        return TEMPS_LABEL;
-
-      if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos)
-        return FRAME_LABEL;
-
-      if (Sym.find(getTagName(RET_LABEL)) != std::string::npos)
-        return RET_LABEL;
-
-      if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos)
-        return ARGS_LABEL;
-
-      if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos)
-        return AUTOS_LABEL;
-
-      if (Sym.find(getTagName(LIBCALL)) != std::string::npos)
-        return LIBCALL;
-
-      // It does not have any Tag. So its a true global or static local.
-      if (Sym.find(".") == std::string::npos) 
-        return GLOBAL;
-      
-      // If a . is there, then it may be static local.
-      // We should mangle these as well in clang.
-      if (Sym.find(".") != std::string::npos) 
-        return STATIC_LOCAL;
- 
-      assert (0 && "Could not determine Symbol's tag");
-      return PREFIX_SYMBOL; // Silence warning when assertions are turned off.
-    }
-
-    // addPrefix - add prefix symbol to a name if there isn't one already.
-    inline static std::string addPrefix (const std::string &Name) {
-      std::string prefix = getTagName (PREFIX_SYMBOL);
-
-      // If this name already has a prefix, nothing to do.
-      if (Name.compare(0, prefix.size(), prefix) == 0)
-        return Name;
-
-      return prefix + Name;
-    }
-
-    // Get mangled func name from a mangled sym name.
-    // In all cases func name is the first component before a '.'.
-    static inline std::string getFuncNameForSym(const std::string &Sym1) {
-      assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function");
-
-      std::string Sym = addPrefix(Sym1);
-
-      // Position of the . after func name. That's where func name ends.
-      size_t func_name_end = Sym.find ('.');
-
-      return Sym.substr (0, func_name_end);
-    }
-
-    // Get Frame start label for a func.
-    static std::string getFrameLabel(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(FRAME_LABEL);
-      return Func1 + tag;
-    }
-
-    // Get the retval label for the given function.
-    static std::string getRetvalLabel(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(RET_LABEL);
-      return Func1 + tag;
-    }
-
-    // Get the argument label for the given function.
-    static std::string getArgsLabel(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(ARGS_LABEL);
-      return Func1 + tag;
-    }
-
-    // Get the tempdata label for the given function.
-    static std::string getTempdataLabel(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(TEMPS_LABEL);
-      return Func1 + tag;
-    }
-
-    static std::string getFrameSectionName(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(FRAME_SECTION);
-      return Func1 + tag + "#";
-    }
-
-    static std::string getAutosSectionName(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(AUTOS_SECTION);
-      return Func1 + tag + "#";
-    }
-
-    static std::string getCodeSectionName(const std::string &Func) {
-      std::string Func1 = addPrefix(Func);
-      std::string tag = getTagName(CODE_SECTION);
-      return Func1 + tag + "#";
-    }
-
-    static std::string getUserSectionName(const std::string &Name) {
-      std::string sname = addPrefix(Name);;
-      std::string tag = getTagName(USER_SECTION);
-      return sname + tag + "#";
-    }
-
-    // udata, romdata and idata section names are generated by a given number.
-    // @udata.<num>.# 
-    static std::string getUdataSectionName(unsigned num, 
-                                           std::string prefix = "") {
-       std::ostringstream o;
-       o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num 
-         << ".#"; 
-       return o.str(); 
-    }
-
-    static std::string getRomdataSectionName() {
-      return "romdata.#";
-    }
-
-    static std::string getSharedUDataSectionName() {
-       std::ostringstream o;
-       o << getTagName(PREFIX_SYMBOL)  << "udata_shr" << ".#";
-       return o.str();
-    }
-
-    static std::string getRomdataSectionName(unsigned num,
-                                             std::string prefix = "") {
-       std::ostringstream o;
-       o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num 
-         << ".#";
-       return o.str();
-    }
-
-    static std::string getIdataSectionName(unsigned num,
-                                           std::string prefix = "") {
-       std::ostringstream o;
-       o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num 
-         << ".#"; 
-       return o.str(); 
-    }
-
-    inline static bool isLocalName (const std::string &Name) {
-      if (getSymbolTag(Name) == AUTOS_LABEL)
-        return true;
-
-      return false;
-    }
-
-
-    inline static bool isMemIntrinsic (const std::string &Name) {
-      if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 ||
-          Name.compare("@memmove") == 0) {
-        return true;
-      }
-      
-      return false;
-    }
-
-    // Currently names of libcalls are assigned during TargetLowering
-    // object construction. There is no provision to change the when the 
-    // code for a function IL function being generated. 
-    // So we have to change these names while printing assembly.
-    // We need to do that mainly for names related to intrinsics. This
-    // function returns true if a name needs to be cloned. 
-    inline static bool isIntrinsicStuff(const std::string &Name) {
-      // Return true if the name contains LIBCALL marker, or a MemIntrinisc.
-      // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself.
-      if ((Name.find(getTagName(LIBCALL)) != std::string::npos) 
-          || isMemIntrinsic(Name))
-        return true;
- 
-      return false;
-    }
-
-    // Rename the name for IL.
-    inline static std::string Rename(const std::string &Name) {
-      std::string Newname;
-      // If its a label (LIBCALL+Func+LABEL), change it to
-      // (LIBCALL+Func+IL+LABEL).
-      TAGS id = getSymbolTag(Name);
-      if (id == ARGS_LABEL || id == RET_LABEL) {
-        std::size_t pos = Name.find(getTagName(id));
-        Newname = Name.substr(0, pos) + ".IL" + getTagName(id);
-        return Newname;
-      }
- 
-      // Else, just append IL to name. 
-      return Name + ".IL";
-   }
-    
-    
-   
-
-    inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
-      if (! isLocalName(Var)) return false;
-
-      std::string Func1 = addPrefix(Func);
-      // Extract func name of the varilable.
-      const std::string &fname = getFuncNameForSym(Var);
-
-      if (fname.compare(Func1) == 0)
-        return true;
-
-      return false;
-    }
-
-
-    // Get the section for the given external symbol names.
-    // This tries to find the type (Tag) of the symbol from its mangled name
-    // and return appropriate section name for it.
-    static inline std::string getSectionNameForSym(const std::string &Sym1) {
-      std::string Sym = addPrefix(Sym1);
-
-      std::string SectionName;
- 
-      std::string Fname = getFuncNameForSym (Sym);
-      TAGS id = getSymbolTag (Sym);
-
-      switch (id) {
-        default : assert (0 && "Could not determine external symbol type");
-        case FRAME_LABEL:
-        case RET_LABEL:
-        case TEMPS_LABEL:
-        case ARGS_LABEL:  {
-          return getFrameSectionName(Fname);
-        }
-        case AUTOS_LABEL: {
-          return getAutosSectionName(Fname);
-        }
-      }
-    }
-
-    /// Return Overlay Name for the section.
-    /// The ABI Convention is: @<Color>.##.<section_tag>
-    /// The section_tag is retrieved from the SectName parameter and
-    /// and Color is passed in parameter.
-    static inline std::string  getOverlayName(std::string SectName, int Color) {
-      // FIXME: Only autos_section and frame_section are colored.
-      // So check and assert if the passed SectName does not have AUTOS_SECTION
-      // or FRAME_SECTION tag in it.
-      std::ostringstream o;
-      o << getTagName(PREFIX_SYMBOL) << Color << ".##" 
-        << SectName.substr(SectName.find("."));
-
-      return o.str();
-    } 
-
-    // Return true if the current function is an ISR
-    inline static bool isISR(const std::string SectName) {
-       if (SectName.find("interrupt") != std::string::npos)
-         return true;
-
-       return false;
-    }
-
-    // Return the address for ISR starts in rom.
-    inline static std::string getISRAddr(void) {
-      return "0x4";
-    }
-
-    // Returns the name of clone of a function.
-    static std::string getCloneFnName(const std::string &Func) {
-       return (Func + ".IL");
-    }
-
-    // Returns the name of clone of a variable.
-    static std::string getCloneVarName(const std::string &Fn, 
-                                       const std::string &Var) {
-      std::string cloneVarName = Var;
-      // These vars are named like fun.auto.var.
-      // Just replace the function name, with clone function name.
-      std::string cloneFnName = getCloneFnName(Fn);
-      cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName);
-      return cloneVarName;
-    }
-  }; // class PAN.
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
deleted file mode 100644
index 7a948def3cfe..000000000000
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ /dev/null
@@ -1,490 +0,0 @@
-
-//===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the helper functions for representing debug information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16DebugInfo.h" 
-#include "llvm/GlobalVariable.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-using namespace llvm;
-
-/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty.
-///
-void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
-                                      bool &HasAux, int Aux[], 
-                                      std::string &TagName) {
-  if (Ty.isBasicType())
-    PopulateBasicTypeInfo (Ty, TypeNo);
-  else if (Ty.isCompositeType())
-    PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
-  else if (Ty.isDerivedType())
-    PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
-  else {
-    TypeNo = PIC16Dbg::T_NULL;
-    HasAux = false;
-  }
-  return;
-}
-
-/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
-///
-void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
-  std::string Name = Ty.getName();
-  unsigned short BaseTy = GetTypeDebugNumber(Name);
-  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-  TypeNo = TypeNo | (0xffff & BaseTy);
-}
-
-/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type 
-/// from Ty. Derived types are mostly pointers.
-///
-void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
-                                            bool &HasAux, int Aux[],
-                                            std::string &TagName) {
-
-  switch(Ty.getTag())
-  {
-    case dwarf::DW_TAG_pointer_type:
-      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-      TypeNo = TypeNo | PIC16Dbg::DT_PTR;
-      break;
-    default:
-      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-  }
-  
-  // We also need to encode the information about the base type of
-  // pointer in TypeNo.
-  DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom();
-  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
-}
-
-/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty.
-void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
-                                          bool &HasAux, int Aux[],
-                                          std::string &TagName) {
-
-  DICompositeType CTy = DICompositeType(Ty);
-  DIArray Elements = CTy.getTypeArray();
-  unsigned short size = 1;
-  unsigned short Dimension[4]={0,0,0,0};
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
-      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-      TypeNo = TypeNo | PIC16Dbg::DT_ARY;
-      DISubrange SubRange = DISubrange(Element);
-      Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
-      // Each dimension is represented by 2 bytes starting at byte 9.
-      Aux[8+i*2+0] = Dimension[i];
-      Aux[8+i*2+1] = Dimension[i] >> 8;
-      size = size * Dimension[i];
-    }
-  }
-  HasAux = true;
-  // In auxillary entry for array, 7th and 8th byte represent array size.
-  Aux[6] = size & 0xff;
-  Aux[7] = size >> 8;
-  DIType BaseType = CTy.getTypeDerivedFrom();
-  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
-}
-
-/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for 
-/// structure or union.
-///
-void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, 
-                                                  unsigned short &TypeNo,
-                                                  bool &HasAux, int Aux[],
-                                                  std::string &TagName) {
-  DICompositeType CTy = DICompositeType(Ty);
-  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-  if (Ty.getTag() == dwarf::DW_TAG_structure_type)
-    TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
-  else
-    TypeNo = TypeNo | PIC16Dbg::T_UNION;
-  TagName = CTy.getName();
-  // UniqueSuffix is .number where number is obtained from
-  // llvm.dbg.composite<number>.
-  // FIXME: This will break when composite type is not represented by
-  // llvm.dbg.composite* global variable. Since we need to revisit 
-  // PIC16DebugInfo implementation anyways after the MDNodes based 
-  // framework is done, let us continue with the way it is.
-  std::string UniqueSuffix = "." + Ty->getNameStr().substr(18);
-  TagName += UniqueSuffix;
-  unsigned short size = CTy.getSizeInBits()/8;
-  // 7th and 8th byte represent size.
-  HasAux = true;
-  Aux[6] = size & 0xff;
-  Aux[7] = size >> 8;
-}
-
-/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty.
-void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) {
-  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-  TypeNo = TypeNo | PIC16Dbg::T_ENUM;
-}
-
-/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for 
-/// composite types from Ty.
-///
-void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
-                                              bool &HasAux, int Aux[],
-                                              std::string &TagName) {
-  switch (Ty.getTag()) {
-    case dwarf::DW_TAG_array_type: {
-      PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
-      break;
-    }
-    case dwarf:: DW_TAG_union_type:
-    case dwarf::DW_TAG_structure_type: {
-      PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
-      break;
-    }
-    case dwarf::DW_TAG_enumeration_type: {
-      PopulateEnumTypeInfo (Ty, TypeNo);
-      break;
-    }
-    default:
-      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-  }
-}
-
-/// GetTypeDebugNumber - Get debug type number for given type.
-///
-unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
-  if (type == "char")
-    return PIC16Dbg::T_CHAR;
-  else if (type == "short")
-    return PIC16Dbg::T_SHORT;
-  else if (type == "int")
-    return PIC16Dbg::T_INT;
-  else if (type == "long")
-    return PIC16Dbg::T_LONG;
-  else if (type == "unsigned char")
-    return PIC16Dbg::T_UCHAR;
-  else if (type == "unsigned short")
-    return PIC16Dbg::T_USHORT;
-  else if (type == "unsigned int")
-    return PIC16Dbg::T_UINT;
-  else if (type == "unsigned long")
-    return PIC16Dbg::T_ULONG;
-  else
-    return 0;
-}
- 
-/// GetStorageClass - Get storage class for give debug variable.
-///
-short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
-  short ClassNo;
-  if (PAN::isLocalName(DIGV.getName())) {
-    // Generating C_AUTO here fails due to error in linker. Change it once
-    // linker is fixed.
-    ClassNo = PIC16Dbg::C_STAT;
-  }
-  else if (DIGV.isLocalToUnit())
-    ClassNo = PIC16Dbg::C_STAT;
-  else
-    ClassNo = PIC16Dbg::C_EXT;
-  return ClassNo;
-}
-
-/// BeginModule - Emit necessary debug info to start a Module and do other
-/// required initializations.
-void PIC16DbgInfo::BeginModule(Module &M) {
-  // Emit file directive for module.
-  DebugInfoFinder DbgFinder;
-  DbgFinder.processModule(M);
-  if (DbgFinder.compile_unit_count() != 0) {
-    // FIXME : What if more then one CUs are present in a module ?
-    MDNode *CU = *DbgFinder.compile_unit_begin();
-    EmitDebugDirectives = true;
-    SwitchToCU(CU);
-  }
-  // Emit debug info for decls of composite types.
-  EmitCompositeTypeDecls(M);
-}
-
-/// Helper to find first valid debug loc for a function.
-///
-static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) {
-  DebugLoc DL;
-  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
-         II != E; ++II) {
-      DL = II->getDebugLoc();
-      if (!DL.isUnknown())
-        return DL;
-    }
-  }
-  return DL;
-}
-
-/// BeginFunction - Emit necessary debug info to start a function.
-///
-void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
-  if (! EmitDebugDirectives) return;
-  
-  // Retreive the first valid debug Loc and process it.
-  const DebugLoc &DL = GetDebugLocForFunction(MF);
-  // Emit debug info only if valid debug info is available.
-  if (!DL.isUnknown()) {
-    ChangeDebugLoc(MF, DL, true);
-    EmitFunctBeginDI(MF.getFunction());
-  } 
-  // Set current line to 0 so that.line directive is genearted after .bf.
-  CurLine = 0;
-}
-
-/// ChangeDebugLoc - Take necessary steps when DebugLoc changes.
-/// CurFile and CurLine may change as a result of this.
-///
-void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,  
-                                  const DebugLoc &DL, bool IsInBeginFunction) {
-  if (!EmitDebugDirectives) return;
-  assert(!DL.isUnknown() && "can't change to invalid debug loc");
-
-  SwitchToCU(DL.getScope(MF.getFunction()->getContext()));
-  SwitchToLine(DL.getLine(), IsInBeginFunction);
-}
-
-/// SwitchToLine - Emit line directive for a new line.
-///
-void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
-  if (CurLine == Line) return;
-  if (!IsInBeginFunction)
-    OS.EmitRawText("\n\t.line " + Twine(Line));
-  CurLine = Line;
-}
-
-/// EndFunction - Emit .ef for end of function.
-///
-void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
-  if (! EmitDebugDirectives) return;
-  const DebugLoc &DL = GetDebugLocForFunction(MF);
-  // Emit debug info only if valid debug info is available.
-  if (!DL.isUnknown())
-    EmitFunctEndDI(MF.getFunction(), CurLine);
-}
-
-/// EndModule - Emit .eof for end of module.
-///
-void PIC16DbgInfo::EndModule(Module &M) {
-  if (! EmitDebugDirectives) return;
-  EmitVarDebugInfo(M);
-  if (CurFile != "") OS.EmitRawText(StringRef("\n\t.eof"));
-}
- 
-/// EmitCompositeTypeElements - Emit debug information for members of a 
-/// composite type.
-/// 
-void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
-                                              std::string SuffixNo) {
-  unsigned long Value = 0;
-  DIArray Elements = CTy.getTypeArray();
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
-    DIDescriptor Element = Elements.getElement(i);
-    unsigned short TypeNo = 0;
-    bool HasAux = false;
-    int ElementAux[PIC16Dbg::AuxSize] = { 0 };
-    std::string TagName = "";
-    DIDerivedType DITy(Element);
-    unsigned short ElementSize = DITy.getSizeInBits()/8;
-    // Get mangleddd name for this structure/union  element.
-    std::string MangMemName = DITy.getName().str() + SuffixNo;
-    PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
-    short Class = 0;
-    if( CTy.getTag() == dwarf::DW_TAG_union_type)
-      Class = PIC16Dbg::C_MOU;
-    else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
-      Class = PIC16Dbg::C_MOS;
-    EmitSymbol(MangMemName.c_str(), Class, TypeNo, Value);
-    if (CTy.getTag() == dwarf::DW_TAG_structure_type)
-      Value += ElementSize;
-    if (HasAux)
-      EmitAuxEntry(MangMemName.c_str(), ElementAux, PIC16Dbg::AuxSize, TagName);
-  }
-}
-
-/// EmitCompositeTypeDecls - Emit composite type declarations like structure 
-/// and union declarations.
-///
-void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
-  DebugInfoFinder DbgFinder;
-  DbgFinder.processModule(M);
-  for (DebugInfoFinder::iterator I = DbgFinder.type_begin(),
-         E = DbgFinder.type_end(); I != E; ++I) {
-    DICompositeType CTy(*I);
-    if (!CTy.Verify())
-      continue;
-    if (CTy.getTag() == dwarf::DW_TAG_union_type ||
-        CTy.getTag() == dwarf::DW_TAG_structure_type ) {
-      // Get the number after llvm.dbg.composite and make UniqueSuffix from 
-      // it.
-      std::string DIVar = CTy->getNameStr();
-      std::string UniqueSuffix = "." + DIVar.substr(18);
-      std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
-      unsigned short size = CTy.getSizeInBits()/8;
-      int Aux[PIC16Dbg::AuxSize] = {0};
-      // 7th and 8th byte represent size of structure/union.
-      Aux[6] = size & 0xff;
-      Aux[7] = size >> 8;
-      // Emit .def for structure/union tag.
-      if( CTy.getTag() == dwarf::DW_TAG_union_type)
-        EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_UNTAG);
-      else if  (CTy.getTag() == dwarf::DW_TAG_structure_type) 
-        EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_STRTAG);
-      
-      // Emit auxiliary debug information for structure/union tag. 
-      EmitAuxEntry(MangledCTyName.c_str(), Aux, PIC16Dbg::AuxSize);
-      
-      // Emit members.
-      EmitCompositeTypeElements (CTy, UniqueSuffix);
-      
-      // Emit mangled Symbol for end of structure/union.
-      std::string EOSSymbol = ".eos" + UniqueSuffix;
-      EmitSymbol(EOSSymbol.c_str(), PIC16Dbg::C_EOS);
-      EmitAuxEntry(EOSSymbol.c_str(), Aux, PIC16Dbg::AuxSize, 
-                   MangledCTyName.c_str());
-    }
-  }
-}
-
-
-/// EmitFunctBeginDI - Emit .bf for function.
-///
-void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
-  std::string FunctName = F->getName();
-  if (EmitDebugDirectives) {
-    std::string FunctBeginSym = ".bf." + FunctName;
-    std::string BlockBeginSym = ".bb." + FunctName;
-
-    int BFAux[PIC16Dbg::AuxSize] = {0};
-    BFAux[4] = CurLine;
-    BFAux[5] = CurLine >> 8;
-
-    // Emit debug directives for beginning of function.
-    EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
-    EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
-
-    EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
-    EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
-  }
-}
-
-/// EmitFunctEndDI - Emit .ef for function end.
-///
-void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
-  std::string FunctName = F->getName();
-  if (EmitDebugDirectives) {
-    std::string FunctEndSym = ".ef." + FunctName;
-    std::string BlockEndSym = ".eb." + FunctName;
-
-    // Emit debug directives for end of function.
-    EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
-    int EFAux[PIC16Dbg::AuxSize] = {0};
-    // 5th and 6th byte stand for line number.
-    EFAux[4] = CurLine;
-    EFAux[5] = CurLine >> 8;
-    EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
-    EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
-    EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
-  }
-}
-
-/// EmitAuxEntry - Emit Auxiliary debug information.
-///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
-                                std::string TagName) {
-  std::string Tmp;
-  // TagName is emitted in case of structure/union objects.
-  if (!TagName.empty()) Tmp += ", " + TagName;
-  
-  for (int i = 0; i<Num; i++)
-    Tmp += "," + utostr(Aux[i] & 0xff);
-  
-  OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp);
-}
-
-/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
-///
-void PIC16DbgInfo::EmitSymbol(std::string Name, short Class,
-                              unsigned short Type, unsigned long Value) {
-  std::string Tmp;
-  if (Value > 0)
-    Tmp = ", value = " + utostr(Value);
-  
-  OS.EmitRawText("\n\t.def " + Twine(Name) + ", type = " + utostr(Type) +
-                 ", class = " + utostr(Class) + Tmp);
-}
-
-/// EmitVarDebugInfo - Emit debug information for all variables.
-///
-void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
-  DebugInfoFinder DbgFinder;
-  DbgFinder.processModule(M);
-  
-  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
-         E = DbgFinder.global_variable_end(); I != E; ++I) {
-    DIGlobalVariable DIGV(*I);
-    DIType Ty = DIGV.getType();
-    unsigned short TypeNo = 0;
-    bool HasAux = false;
-    int Aux[PIC16Dbg::AuxSize] = { 0 };
-    std::string TagName = "";
-    std::string VarName = DIGV.getName();
-    VarName = MAI->getGlobalPrefix() + VarName;
-    PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
-    // Emit debug info only if type information is availaible.
-    if (TypeNo != PIC16Dbg::T_NULL) {
-      OS.EmitRawText("\t.type " + Twine(VarName) + ", " + Twine(TypeNo));
-      short ClassNo = getStorageClass(DIGV);
-      OS.EmitRawText("\t.class " + Twine(VarName) + ", " + Twine(ClassNo));
-      if (HasAux)
-        EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
-    }
-  }
-}
-
-/// SwitchToCU - Switch to a new compilation unit.
-///
-void PIC16DbgInfo::SwitchToCU(MDNode *CU) {
-  // Get the file path from CU.
-  DICompileUnit cu(CU);
-  std::string DirName = cu.getDirectory();
-  std::string FileName = cu.getFilename();
-  std::string FilePath = DirName + "/" + FileName;
-
-  // Nothing to do if source file is still same.
-  if ( FilePath == CurFile ) return;
-
-  // Else, close the current one and start a new.
-  if (CurFile != "")
-    OS.EmitRawText(StringRef("\t.eof"));
-  OS.EmitRawText("\n\t.file\t\"" + Twine(FilePath) + "\"");
-  CurFile = FilePath;
-  CurLine = 0;
-}
-
-/// EmitEOF - Emit .eof for end of file.
-///
-void PIC16DbgInfo::EmitEOF() {
-  if (CurFile != "")
-    OS.EmitRawText(StringRef("\t.EOF"));
-}
-
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
deleted file mode 100644
index 031dcf092f04..000000000000
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- PIC16DebugInfo.h - Interfaces for PIC16 Debug Information ============//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the helper functions for representing debug information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16DBG_H
-#define PIC16DBG_H
-
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Module.h"
-
-namespace llvm {
-  class MachineFunction;
-  class DebugLoc;
-  class MCStreamer;
-  
-  namespace PIC16Dbg {
-    enum VarType {
-      T_NULL,
-      T_VOID,
-      T_CHAR,
-      T_SHORT,
-      T_INT,
-      T_LONG,
-      T_FLOAT,
-      T_DOUBLE,
-      T_STRUCT,
-      T_UNION,
-      T_ENUM,
-      T_MOE,
-      T_UCHAR,
-      T_USHORT,
-      T_UINT,
-      T_ULONG
-    };
-    enum DerivedType {
-      DT_NONE,
-      DT_PTR,
-      DT_FCN,
-      DT_ARY
-    };
-    enum TypeSize {
-      S_BASIC = 5,
-      S_DERIVED = 3
-    };
-    enum DbgClass {
-      C_NULL,
-      C_AUTO,
-      C_EXT,
-      C_STAT,
-      C_REG,
-      C_EXTDEF,
-      C_LABEL,
-      C_ULABEL,
-      C_MOS,
-      C_ARG,
-      C_STRTAG,
-      C_MOU,
-      C_UNTAG,
-      C_TPDEF,
-      C_USTATIC,
-      C_ENTAG,
-      C_MOE,
-      C_REGPARM,
-      C_FIELD,
-      C_AUTOARG,
-      C_LASTENT,
-      C_BLOCK = 100,
-      C_FCN,
-      C_EOS,
-      C_FILE,
-      C_LINE,
-      C_ALIAS,
-      C_HIDDEN,
-      C_EOF,
-      C_LIST,
-      C_SECTION,
-      C_EFCN = 255
-    };
-    enum SymbolSize {
-      AuxSize =20
-    };
-  }
-
-  class PIC16DbgInfo {
-    MCStreamer &OS;
-    const MCAsmInfo *MAI;
-    std::string CurFile;
-    unsigned CurLine;
-
-    // EmitDebugDirectives is set if debug information is available. Default
-    // value for it is false.
-    bool EmitDebugDirectives;
-
-  public:
-    PIC16DbgInfo(MCStreamer &os, const MCAsmInfo *T) : OS(os), MAI(T) {
-      CurFile = "";
-      CurLine = 0;
-      EmitDebugDirectives = false; 
-    }
-
-    void BeginModule (Module &M);
-    void BeginFunction (const MachineFunction &MF);
-    void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL,
-                         bool IsInBeginFunction = false);
-    void EndFunction (const MachineFunction &MF);
-    void EndModule (Module &M);
-
-
-    private:
-    void SwitchToCU (MDNode *CU);
-    void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
-
-    void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
-                           int Aux[], std::string &TypeName);
-    void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo);
-    void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, 
-                                  bool &HasAux, int Aux[],
-                                  std::string &TypeName);
-
-    void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
-                                    bool &HasAux, int Aux[],
-                                    std::string &TypeName);
-    void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
-                                bool &HasAux, int Aux[],
-                                std::string &TypeName);
-
-    void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo,
-                                        bool &HasAux, int Aux[],
-                                        std::string &TypeName);
-    void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo);
-
-    unsigned GetTypeDebugNumber(std::string &Type);
-    short getStorageClass(DIGlobalVariable DIGV);
-    void EmitFunctBeginDI(const Function *F);
-    void EmitCompositeTypeDecls(Module &M);
-    void EmitCompositeTypeElements (DICompositeType CTy, std::string Suffix);
-    void EmitFunctEndDI(const Function *F, unsigned Line);
-    void EmitAuxEntry(const std::string VarName, int Aux[], 
-                      int num = PIC16Dbg::AuxSize, std::string TagName = "");
-    inline void EmitSymbol(std::string Name, short Class, 
-                           unsigned short Type = PIC16Dbg::T_NULL, 
-                           unsigned long Value = 0);
-    void EmitVarDebugInfo(Module &M);
-    void EmitEOF();
-  };
-} // end namespace llvm;
-#endif
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
deleted file mode 100644
index 6cbd00262b7f..000000000000
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the PIC16 target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-isel"
-
-#include "llvm/Support/ErrorHandling.h"
-#include "PIC16ISelDAGToDAG.h"
-using namespace llvm;
-
-/// createPIC16ISelDag - This pass converts a legalized DAG into a
-/// PIC16-specific DAG, ready for instruction scheduling.
-FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
-  return new PIC16DAGToDAGISel(TM);
-}
-
-
-/// Select - Select instructions not customized! Used for
-/// expanded, promoted and normal instructions.
-SDNode* PIC16DAGToDAGISel::Select(SDNode *N) {
-
-  // Select the default instruction.
-  SDNode *ResNode = SelectCode(N);
-
-  return ResNode;
-}
-
-
-// SelectDirectAddr - Match a direct address for DAG. 
-// A direct address could be a globaladdress or externalsymbol.
-bool PIC16DAGToDAGISel::SelectDirectAddr(SDNode *Op, SDValue N, 
-                                      SDValue &Address) {
-  // Return true if TGA or ES.
-  if (N.getOpcode() == ISD::TargetGlobalAddress
-      || N.getOpcode() == ISD::TargetExternalSymbol) {
-    Address = N;
-    return true;
-  }
-
-  return false;
-}
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
deleted file mode 100644
index ecaddd3cff8d..000000000000
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the PIC16 target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-isel"
-
-#include "PIC16.h"
-#include "PIC16RegisterInfo.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16MachineFunctionInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Intrinsics.h"
-using namespace llvm;
-
-namespace {
-
-class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel {
-
-  /// TM - Keep a reference to PIC16TargetMachine.
-  const PIC16TargetMachine &TM;
-
-  /// PIC16Lowering - This object fully describes how to lower LLVM code to an
-  /// PIC16-specific SelectionDAG.
-  const PIC16TargetLowering &PIC16Lowering;
-
-public:
-  explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : 
-        SelectionDAGISel(tm),
-        TM(tm), PIC16Lowering(*TM.getTargetLowering()) {}
-  
-  // Pass Name
-  virtual const char *getPassName() const {
-    return "PIC16 DAG->DAG Pattern Instruction Selection";
-  } 
-
-private:
-  // Include the pieces autogenerated from the target description.
-#include "PIC16GenDAGISel.inc"
-
-  SDNode *Select(SDNode *N);
-
-  // Match direct address complex pattern.
-  bool SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address);
-
-};
-
-}
-
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
deleted file mode 100644
index 527b31d0cc9f..000000000000
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ /dev/null
@@ -1,2000 +0,0 @@
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that PIC16 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-lower"
-#include "PIC16ABINames.h"
-#include "PIC16ISelLowering.h"
-#include "PIC16TargetObjectFile.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16MachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Function.h"
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/ErrorHandling.h"
-
-
-using namespace llvm;
-
-static const char *getIntrinsicName(unsigned opcode) {
-  std::string Basename;
-  switch(opcode) {
-  default: llvm_unreachable("do not know intrinsic name");
-  // Arithmetic Right shift for integer types.
-  case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
-  case RTLIB::SRA_I16: Basename = "sra.i16"; break;
-  case RTLIB::SRA_I32: Basename = "sra.i32"; break;
-
-  // Left shift for integer types.
-  case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
-  case RTLIB::SHL_I16: Basename = "sll.i16"; break;
-  case RTLIB::SHL_I32: Basename = "sll.i32"; break;
-
-  // Logical Right Shift for integer types.
-  case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
-  case RTLIB::SRL_I16: Basename = "srl.i16"; break;
-  case RTLIB::SRL_I32: Basename = "srl.i32"; break;
-
-  // Multiply for integer types.
-  case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
-  case RTLIB::MUL_I16: Basename = "mul.i16"; break;
-  case RTLIB::MUL_I32: Basename = "mul.i32"; break;
-
-  // Signed division for integers.
-  case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
-  case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
-
-  // Unsigned division for integers.
-  case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
-  case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
-
-  // Signed Modulas for integers.
-  case RTLIB::SREM_I16: Basename = "srem.i16"; break;
-  case RTLIB::SREM_I32: Basename = "srem.i32"; break;
-
-  // Unsigned Modulas for integers.
-  case RTLIB::UREM_I16: Basename = "urem.i16"; break;
-  case RTLIB::UREM_I32: Basename = "urem.i32"; break;
-
-  //////////////////////
-  // LIBCALLS FOR FLOATS
-  //////////////////////
-
-  // Float to signed integrals
-  case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break;
-  case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break;
-  case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break;
-
-  // Signed integrals to float. char and int are first sign extended to i32 
-  // before being converted to float, so an I8_F32 or I16_F32 isn't required.
-  case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break;
-
-  // Float to Unsigned conversions.
-  // Signed conversion can be used for unsigned conversion as well.
-  // In signed and unsigned versions only the interpretation of the 
-  // MSB is different. Bit representation remains the same. 
-  case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break;
-  case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break;
-  case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break;
-
-  // Unsigned to Float conversions. char and int are first zero extended 
-  // before being converted to float.
-  case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break;
-               
-  // Floating point add, sub, mul, div.
-  case RTLIB::ADD_F32: Basename = "add.f32"; break;
-  case RTLIB::SUB_F32: Basename = "sub.f32"; break;
-  case RTLIB::MUL_F32: Basename = "mul.f32"; break;
-  case RTLIB::DIV_F32: Basename = "div.f32"; break;
-
-  // Floating point comparison
-  case RTLIB::O_F32: Basename = "unordered.f32"; break;
-  case RTLIB::UO_F32: Basename = "unordered.f32"; break;
-  case RTLIB::OLE_F32: Basename = "le.f32"; break;
-  case RTLIB::OGE_F32: Basename = "ge.f32"; break;
-  case RTLIB::OLT_F32: Basename = "lt.f32"; break;
-  case RTLIB::OGT_F32: Basename = "gt.f32"; break;
-  case RTLIB::OEQ_F32: Basename = "eq.f32"; break;
-  case RTLIB::UNE_F32: Basename = "neq.f32"; break;
-  }
-  
-  std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
-  std::string tagname = PAN::getTagName(PAN::LIBCALL);
-  std::string Fullname = prefix + tagname + Basename; 
-
-  // The name has to live through program life.
-  return ESNames::createESName(Fullname);
-}
-
-// getStdLibCallName - Get the name for the standard library function.
-static const char *getStdLibCallName(unsigned opcode) {
-  std::string BaseName;
-  switch(opcode) {
-    case RTLIB::COS_F32: BaseName = "cos";
-      break;
-    case RTLIB::SIN_F32: BaseName = "sin";
-      break;
-    case RTLIB::MEMCPY: BaseName = "memcpy";
-      break;
-    case RTLIB::MEMSET: BaseName = "memset";
-      break;
-    case RTLIB::MEMMOVE: BaseName = "memmove";
-      break;
-    default: llvm_unreachable("do not know std lib call name");
-  }
-  std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
-  std::string LibCallName = prefix + BaseName;
-
-  // The name has to live through program life.
-  return ESNames::createESName(LibCallName);
-}
-
-// PIC16TargetLowering Constructor.
-PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
-  : TargetLowering(TM, new PIC16TargetObjectFile()) {
- 
-  Subtarget = &TM.getSubtarget<PIC16Subtarget>();
-
-  addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
-
-  setShiftAmountType(MVT::i8);
-  
-  // Std lib call names
-  setLibcallName(RTLIB::COS_F32, getStdLibCallName(RTLIB::COS_F32));
-  setLibcallName(RTLIB::SIN_F32, getStdLibCallName(RTLIB::SIN_F32));
-  setLibcallName(RTLIB::MEMCPY, getStdLibCallName(RTLIB::MEMCPY));
-  setLibcallName(RTLIB::MEMSET, getStdLibCallName(RTLIB::MEMSET));
-  setLibcallName(RTLIB::MEMMOVE, getStdLibCallName(RTLIB::MEMMOVE));
-
-  // SRA library call names
-  setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8));
-  setLibcallName(RTLIB::SRA_I16, getIntrinsicName(RTLIB::SRA_I16));
-  setLibcallName(RTLIB::SRA_I32, getIntrinsicName(RTLIB::SRA_I32));
-
-  // SHL library call names
-  setPIC16LibcallName(PIC16ISD::SLL_I8, getIntrinsicName(PIC16ISD::SLL_I8));
-  setLibcallName(RTLIB::SHL_I16, getIntrinsicName(RTLIB::SHL_I16));
-  setLibcallName(RTLIB::SHL_I32, getIntrinsicName(RTLIB::SHL_I32));
-
-  // SRL library call names
-  setPIC16LibcallName(PIC16ISD::SRL_I8, getIntrinsicName(PIC16ISD::SRL_I8));
-  setLibcallName(RTLIB::SRL_I16, getIntrinsicName(RTLIB::SRL_I16));
-  setLibcallName(RTLIB::SRL_I32, getIntrinsicName(RTLIB::SRL_I32));
-
-  // MUL Library call names
-  setPIC16LibcallName(PIC16ISD::MUL_I8, getIntrinsicName(PIC16ISD::MUL_I8));
-  setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16));
-  setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32));
-
-  // Signed division lib call names
-  setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
-  setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
-
-  // Unsigned division lib call names
-  setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
-  setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
-
-  // Signed remainder lib call names
-  setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
-  setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
-
-  // Unsigned remainder lib call names
-  setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
-  setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
- 
-  // Floating point to signed int conversions.
-  setLibcallName(RTLIB::FPTOSINT_F32_I8, 
-                 getIntrinsicName(RTLIB::FPTOSINT_F32_I8));
-  setLibcallName(RTLIB::FPTOSINT_F32_I16, 
-                 getIntrinsicName(RTLIB::FPTOSINT_F32_I16));
-  setLibcallName(RTLIB::FPTOSINT_F32_I32, 
-                 getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
-
-  // Signed int to floats.
-  setLibcallName(RTLIB::SINTTOFP_I32_F32, 
-                 getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
-
-  // Floating points to unsigned ints.
-  setLibcallName(RTLIB::FPTOUINT_F32_I8, 
-                 getIntrinsicName(RTLIB::FPTOUINT_F32_I8));
-  setLibcallName(RTLIB::FPTOUINT_F32_I16, 
-                 getIntrinsicName(RTLIB::FPTOUINT_F32_I16));
-  setLibcallName(RTLIB::FPTOUINT_F32_I32, 
-                 getIntrinsicName(RTLIB::FPTOUINT_F32_I32));
-
-  // Unsigned int to floats.
-  setLibcallName(RTLIB::UINTTOFP_I32_F32, 
-                 getIntrinsicName(RTLIB::UINTTOFP_I32_F32));
-
-  // Floating point add, sub, mul ,div.
-  setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
-  setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
-  setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
-  setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
-
-  // Floationg point comparison
-  setLibcallName(RTLIB::O_F32, getIntrinsicName(RTLIB::O_F32));
-  setLibcallName(RTLIB::UO_F32, getIntrinsicName(RTLIB::UO_F32));
-  setLibcallName(RTLIB::OLE_F32, getIntrinsicName(RTLIB::OLE_F32));
-  setLibcallName(RTLIB::OGE_F32, getIntrinsicName(RTLIB::OGE_F32));
-  setLibcallName(RTLIB::OLT_F32, getIntrinsicName(RTLIB::OLT_F32));
-  setLibcallName(RTLIB::OGT_F32, getIntrinsicName(RTLIB::OGT_F32));
-  setLibcallName(RTLIB::OEQ_F32, getIntrinsicName(RTLIB::OEQ_F32));
-  setLibcallName(RTLIB::UNE_F32, getIntrinsicName(RTLIB::UNE_F32));
-
-  // Return value comparisons of floating point calls. 
-  setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
-  setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
-
-  setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
-  setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
-
-  setOperationAction(ISD::LOAD,   MVT::i8,  Legal);
-  setOperationAction(ISD::LOAD,   MVT::i16, Custom);
-  setOperationAction(ISD::LOAD,   MVT::i32, Custom);
-
-  setOperationAction(ISD::STORE,  MVT::i8,  Legal);
-  setOperationAction(ISD::STORE,  MVT::i16, Custom);
-  setOperationAction(ISD::STORE,  MVT::i32, Custom);
-  setOperationAction(ISD::STORE,  MVT::i64, Custom);
-
-  setOperationAction(ISD::ADDE,    MVT::i8,  Custom);
-  setOperationAction(ISD::ADDC,    MVT::i8,  Custom);
-  setOperationAction(ISD::SUBE,    MVT::i8,  Custom);
-  setOperationAction(ISD::SUBC,    MVT::i8,  Custom);
-  setOperationAction(ISD::SUB,    MVT::i8,  Custom);
-  setOperationAction(ISD::ADD,    MVT::i8,  Custom);
-  setOperationAction(ISD::ADD,    MVT::i16, Custom);
-
-  setOperationAction(ISD::OR,     MVT::i8,  Custom);
-  setOperationAction(ISD::AND,    MVT::i8,  Custom);
-  setOperationAction(ISD::XOR,    MVT::i8,  Custom);
-
-  setOperationAction(ISD::FrameIndex, MVT::i16, Custom);
-
-  setOperationAction(ISD::MUL,    MVT::i8,  Custom);
-
-  setOperationAction(ISD::SMUL_LOHI,    MVT::i8,  Expand);
-  setOperationAction(ISD::UMUL_LOHI,    MVT::i8,  Expand);
-  setOperationAction(ISD::MULHU,        MVT::i8, Expand);
-  setOperationAction(ISD::MULHS,        MVT::i8, Expand);
-
-  setOperationAction(ISD::SRA,    MVT::i8,  Custom);
-  setOperationAction(ISD::SHL,    MVT::i8,  Custom);
-  setOperationAction(ISD::SRL,    MVT::i8,  Custom);
-
-  setOperationAction(ISD::ROTL,    MVT::i8,  Expand);
-  setOperationAction(ISD::ROTR,    MVT::i8,  Expand);
-
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
-  // PIC16 does not support shift parts
-  setOperationAction(ISD::SRA_PARTS,    MVT::i8, Expand);
-  setOperationAction(ISD::SHL_PARTS,    MVT::i8, Expand);
-  setOperationAction(ISD::SRL_PARTS,    MVT::i8, Expand);
-
-
-  // PIC16 does not have a SETCC, expand it to SELECT_CC.
-  setOperationAction(ISD::SETCC,  MVT::i8, Expand);
-  setOperationAction(ISD::SELECT,  MVT::i8, Expand);
-  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
-  setOperationAction(ISD::BRIND, MVT::Other, Expand);
-
-  setOperationAction(ISD::SELECT_CC,  MVT::i8, Custom);
-  setOperationAction(ISD::BR_CC,  MVT::i8, Custom);
-
-  //setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
-  setTruncStoreAction(MVT::i16,   MVT::i8,  Custom);
-
-  // Now deduce the information based on the above mentioned 
-  // actions
-  computeRegisterProperties();
-}
-
-std::pair<const TargetRegisterClass*, uint8_t>
-PIC16TargetLowering::findRepresentativeClass(EVT VT) const {
-  switch (VT.getSimpleVT().SimpleTy) {
-  default:
-    return TargetLowering::findRepresentativeClass(VT);
-  case MVT::i16:
-    return std::make_pair(PIC16::FSR16RegisterClass, 1);
-  }
-}
-
-// getOutFlag - Extract the flag result if the Op has it.
-static SDValue getOutFlag(SDValue &Op) {
-  // Flag is the last value of the node.
-  SDValue Flag = Op.getValue(Op.getNode()->getNumValues() - 1);
-
-  assert (Flag.getValueType() == MVT::Flag 
-          && "Node does not have an out Flag");
-
-  return Flag;
-}
-// Get the TmpOffset for FrameIndex
-unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size,
-                                                MachineFunction &MF) const {
-  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-  std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap();
-
-  std::map<unsigned, unsigned>::iterator 
-            MapIt = FiTmpOffsetMap.find(FI);
-  if (MapIt != FiTmpOffsetMap.end())
-      return MapIt->second;
-
-  // This FI (FrameIndex) is not yet mapped, so map it
-  FiTmpOffsetMap[FI] = FuncInfo->getTmpSize(); 
-  FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size);
-  return FiTmpOffsetMap[FI];
-}
-
-void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-  FuncInfo->getFiTmpOffsetMap().clear();
-  FuncInfo->setTmpSize(0);
-}
-
-// To extract chain value from the SDValue Nodes
-// This function will help to maintain the chain extracting
-// code at one place. In case of any change in future it will
-// help maintain the code.
-static SDValue getChain(SDValue &Op) { 
-  SDValue Chain = Op.getValue(Op.getNode()->getNumValues() - 1);
-
-  // If the last value returned in Flag then the chain is
-  // second last value returned.
-  if (Chain.getValueType() == MVT::Flag)
-    Chain = Op.getValue(Op.getNode()->getNumValues() - 2);
-  
-  // All nodes may not produce a chain. Therefore following assert
-  // verifies that the node is returning a chain only.
-  assert (Chain.getValueType() == MVT::Other 
-          && "Node does not have a chain");
-
-  return Chain;
-}
-
-/// PopulateResults - Helper function to LowerOperation.
-/// If a node wants to return multiple results after lowering,
-/// it stuffs them into an array of SDValue called Results.
-
-static void PopulateResults(SDValue N, SmallVectorImpl<SDValue>&Results) {
-  if (N.getOpcode() == ISD::MERGE_VALUES) {
-    int NumResults = N.getNumOperands();
-    for( int i = 0; i < NumResults; i++)
-      Results.push_back(N.getOperand(i));
-  }
-  else
-    Results.push_back(N);
-}
-
-MVT::SimpleValueType
-PIC16TargetLowering::getSetCCResultType(EVT ValType) const {
-  return MVT::i8;
-}
-
-MVT::SimpleValueType
-PIC16TargetLowering::getCmpLibcallReturnType() const {
-  return MVT::i8; 
-}
-
-/// The type legalizer framework of generating legalizer can generate libcalls
-/// only when the operand/result types are illegal.
-/// PIC16 needs to generate libcalls even for the legal types (i8) for some ops.
-/// For example an arithmetic right shift. These functions are used to lower
-/// such operations that generate libcall for legal types.
-
-void 
-PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call,
-                                         const char *Name) {
-  PIC16LibcallNames[Call] = Name; 
-}
-
-const char *
-PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const {
-  return PIC16LibcallNames[Call];
-}
-
-SDValue
-PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
-                                      EVT RetVT, const SDValue *Ops,
-                                      unsigned NumOps, bool isSigned,
-                                      SelectionDAG &DAG, DebugLoc dl) const {
-
-  TargetLowering::ArgListTy Args;
-  Args.reserve(NumOps);
-
-  TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    Entry.Node = Ops[i];
-    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
-    Entry.isSExt = isSigned;
-    Entry.isZExt = !isSigned;
-    Args.push_back(Entry);
-  }
-
-  SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i16);
-
-   const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
-   std::pair<SDValue,SDValue> CallInfo = 
-     LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                 false, 0, CallingConv::C, false,
-                 /*isReturnValueUsed=*/true,
-                 Callee, Args, DAG, dl);
-
-  return CallInfo.first;
-}
-
-const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default:                         return NULL;
-  case PIC16ISD::Lo:               return "PIC16ISD::Lo";
-  case PIC16ISD::Hi:               return "PIC16ISD::Hi";
-  case PIC16ISD::MTLO:             return "PIC16ISD::MTLO";
-  case PIC16ISD::MTHI:             return "PIC16ISD::MTHI";
-  case PIC16ISD::MTPCLATH:         return "PIC16ISD::MTPCLATH";
-  case PIC16ISD::PIC16Connect:     return "PIC16ISD::PIC16Connect";
-  case PIC16ISD::Banksel:          return "PIC16ISD::Banksel";
-  case PIC16ISD::PIC16Load:        return "PIC16ISD::PIC16Load";
-  case PIC16ISD::PIC16LdArg:       return "PIC16ISD::PIC16LdArg";
-  case PIC16ISD::PIC16LdWF:        return "PIC16ISD::PIC16LdWF";
-  case PIC16ISD::PIC16Store:       return "PIC16ISD::PIC16Store";
-  case PIC16ISD::PIC16StWF:        return "PIC16ISD::PIC16StWF";
-  case PIC16ISD::BCF:              return "PIC16ISD::BCF";
-  case PIC16ISD::LSLF:             return "PIC16ISD::LSLF";
-  case PIC16ISD::LRLF:             return "PIC16ISD::LRLF";
-  case PIC16ISD::RLF:              return "PIC16ISD::RLF";
-  case PIC16ISD::RRF:              return "PIC16ISD::RRF";
-  case PIC16ISD::CALL:             return "PIC16ISD::CALL";
-  case PIC16ISD::CALLW:            return "PIC16ISD::CALLW";
-  case PIC16ISD::SUBCC:            return "PIC16ISD::SUBCC";
-  case PIC16ISD::SELECT_ICC:       return "PIC16ISD::SELECT_ICC";
-  case PIC16ISD::BRCOND:           return "PIC16ISD::BRCOND";
-  case PIC16ISD::RET:              return "PIC16ISD::RET";
-  case PIC16ISD::Dummy:            return "PIC16ISD::Dummy";
-  }
-}
-
-void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
-                                             SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) const {
-
-  switch (N->getOpcode()) {
-    case ISD::GlobalAddress:
-      Results.push_back(ExpandGlobalAddress(N, DAG));
-      return;
-    case ISD::ExternalSymbol:
-      Results.push_back(ExpandExternalSymbol(N, DAG));
-      return;
-    case ISD::STORE:
-      Results.push_back(ExpandStore(N, DAG));
-      return;
-    case ISD::LOAD:
-      PopulateResults(ExpandLoad(N, DAG), Results);
-      return;
-    case ISD::ADD:
-      // Results.push_back(ExpandAdd(N, DAG));
-      return;
-    case ISD::FrameIndex:
-      Results.push_back(ExpandFrameIndex(N, DAG));
-      return;
-    default:
-      assert (0 && "not implemented");
-      return;
-  }
-}
-
-SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N,
-                                              SelectionDAG &DAG) const {
-
-  // Currently handling FrameIndex of size MVT::i16 only
-  // One example of this scenario is when return value is written on
-  // FrameIndex#0
-
-  if (N->getValueType(0) != MVT::i16)
-    return SDValue();
-
-  // Expand the FrameIndex into ExternalSymbol and a Constant node
-  // The constant will represent the frame index number
-  // Get the current function frame
-  MachineFunction &MF = DAG.getMachineFunction();
-  const Function *Func = MF.getFunction();
-  const std::string Name = Func->getName();
-  
-  FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0));
-  // FIXME there isn't really debug info here
-  DebugLoc dl = FR->getDebugLoc();
-
-  // Expand FrameIndex like GlobalAddress and ExternalSymbol
-  // Also use Offset field for lo and hi parts. The default 
-  // offset is zero.
-
-  SDValue ES;
-  int FrameOffset;
-  SDValue FI = SDValue(N,0);
-  LegalizeFrameIndex(FI, DAG, ES, FrameOffset);
-  SDValue Offset = DAG.getConstant(FrameOffset, MVT::i8);
-  SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, ES, Offset);
-  SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, ES, Offset);
-  return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi);
-}
-
-
-SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const { 
-  StoreSDNode *St = cast<StoreSDNode>(N);
-  SDValue Chain = St->getChain();
-  SDValue Src = St->getValue();
-  SDValue Ptr = St->getBasePtr();
-  EVT ValueType = Src.getValueType();
-  unsigned StoreOffset = 0;
-  DebugLoc dl = N->getDebugLoc();
-
-  SDValue PtrLo, PtrHi;
-  LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, StoreOffset, dl);
- 
-  if (ValueType == MVT::i8) {
-    return DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, Src,
-                        PtrLo, PtrHi, 
-                        DAG.getConstant (0 + StoreOffset, MVT::i8));
-  }
-  else if (ValueType == MVT::i16) {
-    // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
-    SDValue SrcLo, SrcHi;
-    GetExpandedParts(Src, DAG, SrcLo, SrcHi);
-    SDValue ChainLo = Chain, ChainHi = Chain;
-    // FIXME: This makes unsafe assumptions. The Chain may be a TokenFactor
-    // created for an unrelated purpose, in which case it may not have
-    // exactly two operands. Also, even if it does have two operands, they
-    // may not be the low and high parts of an aligned load that was split.
-    if (Chain.getOpcode() == ISD::TokenFactor) {
-      ChainLo = Chain.getOperand(0);
-      ChainHi = Chain.getOperand(1);
-    }
-    SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
-                                 ChainLo,
-                                 SrcLo, PtrLo, PtrHi,
-                                 DAG.getConstant (0 + StoreOffset, MVT::i8));
-
-    SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi, 
-                                 SrcHi, PtrLo, PtrHi,
-                                 DAG.getConstant (1 + StoreOffset, MVT::i8));
-
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, getChain(Store1),
-                       getChain(Store2));
-  }
-  else if (ValueType == MVT::i32) {
-    // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
-    SDValue SrcLo, SrcHi;
-    GetExpandedParts(Src, DAG, SrcLo, SrcHi);
-
-    // Get the expanded parts of each of SrcLo and SrcHi.
-    SDValue SrcLo1, SrcLo2, SrcHi1, SrcHi2;
-    GetExpandedParts(SrcLo, DAG, SrcLo1, SrcLo2);
-    GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2);
-
-    SDValue ChainLo = Chain, ChainHi = Chain;
-    // FIXME: This makes unsafe assumptions; see the FIXME above.
-    if (Chain.getOpcode() == ISD::TokenFactor) {  
-      ChainLo = Chain.getOperand(0);
-      ChainHi = Chain.getOperand(1);
-    }
-    SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi,
-            ChainHi2 = ChainHi;
-    // FIXME: This makes unsafe assumptions; see the FIXME above.
-    if (ChainLo.getOpcode() == ISD::TokenFactor) {
-      ChainLo1 = ChainLo.getOperand(0);
-      ChainLo2 = ChainLo.getOperand(1);
-    }
-    // FIXME: This makes unsafe assumptions; see the FIXME above.
-    if (ChainHi.getOpcode() == ISD::TokenFactor) {
-      ChainHi1 = ChainHi.getOperand(0);
-      ChainHi2 = ChainHi.getOperand(1);
-    }
-    SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
-                                 ChainLo1,
-                                 SrcLo1, PtrLo, PtrHi,
-                                 DAG.getConstant (0 + StoreOffset, MVT::i8));
-
-    SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainLo2,
-                                 SrcLo2, PtrLo, PtrHi,
-                                 DAG.getConstant (1 + StoreOffset, MVT::i8));
-
-    SDValue Store3 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi1,
-                                 SrcHi1, PtrLo, PtrHi,
-                                 DAG.getConstant (2 + StoreOffset, MVT::i8));
-
-    SDValue Store4 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi2,
-                                 SrcHi2, PtrLo, PtrHi,
-                                 DAG.getConstant (3 + StoreOffset, MVT::i8));
-
-    SDValue RetLo =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                                 getChain(Store1), getChain(Store2));
-    SDValue RetHi =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                                 getChain(Store3), getChain(Store4));
-    return  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi);
-
-  } else if (ValueType == MVT::i64) {
-    SDValue SrcLo, SrcHi;
-    GetExpandedParts(Src, DAG, SrcLo, SrcHi);
-    SDValue ChainLo = Chain, ChainHi = Chain;
-    // FIXME: This makes unsafe assumptions; see the FIXME above.
-    if (Chain.getOpcode() == ISD::TokenFactor) {
-      ChainLo = Chain.getOperand(0);
-      ChainHi = Chain.getOperand(1);
-    }
-    SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL,
-                                  0 + StoreOffset, false, false, 0);
-
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(4, Ptr.getValueType()));
-    SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL,
-                                  1 + StoreOffset, false, false, 0);
-
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1,
-                       Store2);
-  } else {
-    assert (0 && "value type not supported");
-    return SDValue();
-  }
-}
-
-SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N,
-                                                  SelectionDAG &DAG)
- const {
-  ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0));
-  // FIXME there isn't really debug info here
-  DebugLoc dl = ES->getDebugLoc();
-
-  SDValue TES = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
-  SDValue Offset = DAG.getConstant(0, MVT::i8);
-  SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TES, Offset);
-  SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TES, Offset);
-
-  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
-}
-
-// ExpandGlobalAddress - 
-SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N,
-                                                 SelectionDAG &DAG) const {
-  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0));
-  // FIXME there isn't really debug info here
-  DebugLoc dl = G->getDebugLoc();
-  
-  SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(),
-                                           MVT::i8,
-                                           G->getOffset());
-
-  SDValue Offset = DAG.getConstant(0, MVT::i8);
-  SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TGA, Offset);
-  SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TGA, Offset);
-
-  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
-}
-
-bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const {
-  assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!");
-
-  if (Op.getOpcode() == ISD::BUILD_PAIR) {
-   if (Op.getOperand(0).getOpcode() == PIC16ISD::Lo) 
-     return true;
-  }
-  return false;
-}
-
-// Return true if DirectAddress is in ROM_SPACE
-bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const {
-
-  // RomAddress is a GlobalAddress in ROM_SPACE_
-  // If the Op is not a GlobalAddress return NULL without checking
-  // anything further.
-  if (!isDirectAddress(Op))
-    return false; 
-
-  // Its a GlobalAddress.
-  // It is BUILD_PAIR((PIC16Lo TGA), (PIC16Hi TGA)) and Op is BUILD_PAIR
-  SDValue TGA = Op.getOperand(0).getOperand(0);
-  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(TGA);
-
-  if (GSDN->getAddressSpace() == PIC16ISD::ROM_SPACE)
-    return true;
-
-  // Any other address space return it false
-  return false;
-}
-
-
-// GetExpandedParts - This function is on the similiar lines as
-// the GetExpandedInteger in type legalizer is. This returns expanded
-// parts of Op in Lo and Hi. 
-
-void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
-                                           SDValue &Lo, SDValue &Hi) const {
-  SDNode *N = Op.getNode();
-  DebugLoc dl = N->getDebugLoc();
-  EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-
-  // Extract the lo component.
-  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
-                   DAG.getConstant(0, MVT::i8));
-
-  // extract the hi component
-  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
-                   DAG.getConstant(1, MVT::i8));
-}
-
-// Legalize FrameIndex into ExternalSymbol and offset.
-void 
-PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
-                                        SDValue &ES, int &Offset) const {
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  const Function *Func = MF.getFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-  const std::string Name = Func->getName();
-
-  FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op);
-
-  // FrameIndices are not stack offsets. But they represent the request
-  // for space on stack. That space requested may be more than one byte. 
-  // Therefore, to calculate the stack offset that a FrameIndex aligns
-  // with, we need to traverse all the FrameIndices available earlier in 
-  // the list and add their requested size.
-  unsigned FIndex = FR->getIndex();
-  const char *tmpName;
-  if (FIndex < FuncInfo->getReservedFrameCount()) {
-    tmpName = ESNames::createESName(PAN::getFrameLabel(Name));
-    ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-    Offset = 0;
-    for (unsigned i=0; i<FIndex ; ++i) {
-      Offset += MFI->getObjectSize(i);
-    }
-  } else {
-   // FrameIndex has been made for some temporary storage 
-    tmpName = ESNames::createESName(PAN::getTempdataLabel(Name));
-    ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-    Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF);
-  }
-
-  return;
-}
-
-// This function legalizes the PIC16 Addresses. If the Pointer is  
-//  -- Direct address variable residing 
-//     --> then a Banksel for that variable will be created.
-//  -- Rom variable            
-//     --> then it will be treated as an indirect address.
-//  -- Indirect address 
-//     --> then the address will be loaded into FSR
-//  -- ADD with constant operand
-//     --> then constant operand of ADD will be returned as Offset
-//         and non-constant operand of ADD will be treated as pointer.
-// Returns the high and lo part of the address, and the offset(in case of ADD).
-
-void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, 
-                                          SDValue &Lo, SDValue &Hi,
-                                          unsigned &Offset, DebugLoc dl) const {
-
-  // Offset, by default, should be 0
-  Offset = 0;
-
-  // If the pointer is ADD with constant,
-  // return the constant value as the offset  
-  if (Ptr.getOpcode() == ISD::ADD) {
-    SDValue OperLeft = Ptr.getOperand(0);
-    SDValue OperRight = Ptr.getOperand(1);
-    if ((OperLeft.getOpcode() == ISD::Constant) &&
-        (dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue() < 32 )) {
-      Offset = dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue();
-      Ptr = OperRight;
-    } else if ((OperRight.getOpcode() == ISD::Constant)  &&
-               (dyn_cast<ConstantSDNode>(OperRight)->getZExtValue() < 32 )){
-      Offset = dyn_cast<ConstantSDNode>(OperRight)->getZExtValue();
-      Ptr = OperLeft;
-    }
-  }
-
-  // If the pointer is Type i8 and an external symbol
-  // then treat it as direct address.
-  // One example for such case is storing and loading
-  // from function frame during a call
-  if (Ptr.getValueType() == MVT::i8) {
-    switch (Ptr.getOpcode()) {
-    case ISD::TargetExternalSymbol:
-      Lo = Ptr;
-      Hi = DAG.getConstant(1, MVT::i8);
-      return;
-    }
-  }
-
-  // Expansion of FrameIndex has Lo/Hi parts
-  if (isDirectAddress(Ptr)) { 
-      SDValue TFI = Ptr.getOperand(0).getOperand(0); 
-      int FrameOffset;
-      if (TFI.getOpcode() == ISD::TargetFrameIndex) {
-        LegalizeFrameIndex(TFI, DAG, Lo, FrameOffset);
-        Hi = DAG.getConstant(1, MVT::i8);
-        Offset += FrameOffset; 
-        return;
-      } else if (TFI.getOpcode() == ISD::TargetExternalSymbol) {
-        // FrameIndex has already been expanded.
-        // Now just make use of its expansion
-        Lo = TFI;
-        Hi = DAG.getConstant(1, MVT::i8);
-        SDValue FOffset = Ptr.getOperand(0).getOperand(1);
-        assert (FOffset.getOpcode() == ISD::Constant && 
-                          "Invalid operand of PIC16ISD::Lo");
-        Offset += dyn_cast<ConstantSDNode>(FOffset)->getZExtValue();
-        return;
-      }
-  }
-
-  if (isDirectAddress(Ptr) && !isRomAddress(Ptr)) {
-    // Direct addressing case for RAM variables. The Hi part is constant
-    // and the Lo part is the TGA itself.
-    Lo = Ptr.getOperand(0).getOperand(0);
-
-    // For direct addresses Hi is a constant. Value 1 for the constant
-    // signifies that banksel needs to generated for it. Value 0 for
-    // the constant signifies that banksel does not need to be generated 
-    // for it. Mark it as 1 now and optimize later. 
-    Hi = DAG.getConstant(1, MVT::i8);
-    return; 
-  }
-
-  // Indirect addresses. Get the hi and lo parts of ptr. 
-  GetExpandedParts(Ptr, DAG, Lo, Hi);
-
-  // Put the hi and lo parts into FSR.
-  Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Lo);
-  Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Hi);
-
-  return;
-}
-
-SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const {
-  LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0));
-  SDValue Chain = LD->getChain();
-  SDValue Ptr = LD->getBasePtr();
-  DebugLoc dl = LD->getDebugLoc();
-
-  SDValue Load, Offset;
-  SDVTList Tys; 
-  EVT VT, NewVT;
-  SDValue PtrLo, PtrHi;
-  unsigned LoadOffset;
-
-  // Legalize direct/indirect addresses. This will give the lo and hi parts
-  // of the address and the offset.
-  LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, LoadOffset, dl);
-
-  // Load from the pointer (direct address or FSR) 
-  VT = N->getValueType(0);
-  unsigned NumLoads = VT.getSizeInBits() / 8; 
-  std::vector<SDValue> PICLoads;
-  unsigned iter;
-  EVT MemVT = LD->getMemoryVT();
-  if(ISD::isNON_EXTLoad(N)) {
-    for (iter=0; iter<NumLoads ; ++iter) {
-      // Add the pointer offset if any
-      Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
-      Tys = DAG.getVTList(MVT::i8, MVT::Other); 
-      Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
-                         Offset); 
-      PICLoads.push_back(Load);
-    }
-  } else {
-    // If it is extended load then use PIC16Load for Memory Bytes
-    // and for all extended bytes perform action based on type of
-    // extention - i.e. SignExtendedLoad or ZeroExtendedLoad
-
-    
-    // For extended loads this is the memory value type
-    // i.e. without any extension
-    EVT MemVT = LD->getMemoryVT();
-    unsigned MemBytes = MemVT.getSizeInBits() / 8;
-    // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero
-    // So set it to one
-    if (MemBytes == 0) MemBytes = 1;
-    
-    unsigned ExtdBytes = VT.getSizeInBits() / 8;
-    Offset = DAG.getConstant(LoadOffset, MVT::i8);
-
-    Tys = DAG.getVTList(MVT::i8, MVT::Other); 
-    // For MemBytes generate PIC16Load with proper offset
-    for (iter=0; iter < MemBytes; ++iter) {
-      // Add the pointer offset if any
-      Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
-      Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
-                         Offset); 
-      PICLoads.push_back(Load);
-    }
-
-    // For SignExtendedLoad
-    if (ISD::isSEXTLoad(N)) {
-      // For all ExtdBytes use the Right Shifted(Arithmetic) Value of the 
-      // highest MemByte
-      SDValue SRA = DAG.getNode(ISD::SRA, dl, MVT::i8, Load, 
-                                DAG.getConstant(7, MVT::i8));
-      for (iter=MemBytes; iter<ExtdBytes; ++iter) { 
-        PICLoads.push_back(SRA);
-      }
-    } else if (ISD::isZEXTLoad(N) || ISD::isEXTLoad(N)) {
-    //} else if (ISD::isZEXTLoad(N)) {
-      // ZeroExtendedLoad -- For all ExtdBytes use constant 0
-      SDValue ConstZero = DAG.getConstant(0, MVT::i8);
-      for (iter=MemBytes; iter<ExtdBytes; ++iter) { 
-        PICLoads.push_back(ConstZero);
-      }
-    }
-  }
-  SDValue BP;
-
-  if (VT == MVT::i8) {
-    // Operand of Load is illegal -- Load itself is legal
-    return PICLoads[0];
-  }
-  else if (VT == MVT::i16) {
-    BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, PICLoads[0], PICLoads[1]);
-    if ((MemVT == MVT::i8) || (MemVT == MVT::i1))
-      Chain = getChain(PICLoads[0]);
-    else
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                          getChain(PICLoads[0]), getChain(PICLoads[1]));
-  } else if (VT == MVT::i32) {
-    SDValue BPs[2];
-    BPs[0] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, 
-                         PICLoads[0], PICLoads[1]);
-    BPs[1] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
-                         PICLoads[2], PICLoads[3]);
-    BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, BPs[0], BPs[1]);
-    if ((MemVT == MVT::i8) || (MemVT == MVT::i1))
-      Chain = getChain(PICLoads[0]);
-    else if (MemVT == MVT::i16)
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                          getChain(PICLoads[0]), getChain(PICLoads[1]));
-    else {
-      SDValue Chains[2];
-      Chains[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                              getChain(PICLoads[0]), getChain(PICLoads[1]));
-      Chains[1] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                              getChain(PICLoads[2]), getChain(PICLoads[3]));
-      Chain =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                           Chains[0], Chains[1]);
-    }
-  }
-  Tys = DAG.getVTList(VT, MVT::Other); 
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain);
-}
-
-SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
-  // We should have handled larger operands in type legalizer itself.
-  assert (Op.getValueType() == MVT::i8 && "illegal shift to lower");
- 
-  SDNode *N = Op.getNode();
-  SDValue Value = N->getOperand(0);
-  SDValue Amt = N->getOperand(1);
-  PIC16ISD::PIC16Libcall CallCode;
-  switch (N->getOpcode()) {
-  case ISD::SRA:
-    CallCode = PIC16ISD::SRA_I8;
-    break;
-  case ISD::SHL:
-    CallCode = PIC16ISD::SLL_I8;
-    break;
-  case ISD::SRL:
-    CallCode = PIC16ISD::SRL_I8;
-    break;
-  default:
-    assert ( 0 && "This shift is not implemented yet.");
-    return SDValue();
-  }
-  SmallVector<SDValue, 2> Ops(2);
-  Ops[0] = Value;
-  Ops[1] = Amt;
-  SDValue Call = MakePIC16Libcall(CallCode, N->getValueType(0), &Ops[0], 2, 
-                                  true, DAG, N->getDebugLoc());
-  return Call;
-}
-
-SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
-  // We should have handled larger operands in type legalizer itself.
-  assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower");
-
-  SDNode *N = Op.getNode();
-  SmallVector<SDValue, 2> Ops(2);
-  Ops[0] = N->getOperand(0);
-  Ops[1] = N->getOperand(1);
-  SDValue Call = MakePIC16Libcall(PIC16ISD::MUL_I8, N->getValueType(0), 
-                                  &Ops[0], 2, true, DAG, N->getDebugLoc());
-  return Call;
-}
-
-void
-PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
-                                           SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) const {
-  SDValue Op = SDValue(N, 0);
-  SDValue Res;
-  unsigned i;
-  switch (Op.getOpcode()) {
-    case ISD::LOAD:
-      Res = ExpandLoad(Op.getNode(), DAG); break;
-    default: {
-      // All other operations are handled in LowerOperation.
-      Res = LowerOperation(Op, DAG);
-      if (Res.getNode())
-        Results.push_back(Res);
-        
-      return; 
-    }
-  }
-
-  N = Res.getNode();
-  unsigned NumValues = N->getNumValues(); 
-  for (i = 0; i < NumValues ; i++) {
-    Results.push_back(SDValue(N, i)); 
-  }
-}
-
-SDValue PIC16TargetLowering::LowerOperation(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
-    case ISD::ADD:
-    case ISD::ADDC:
-    case ISD::ADDE:
-      return LowerADD(Op, DAG);
-    case ISD::SUB:
-    case ISD::SUBC:
-    case ISD::SUBE:
-      return LowerSUB(Op, DAG);
-    case ISD::LOAD:
-      return ExpandLoad(Op.getNode(), DAG);
-    case ISD::STORE:
-      return ExpandStore(Op.getNode(), DAG);
-    case ISD::MUL:
-      return LowerMUL(Op, DAG);
-    case ISD::SHL:
-    case ISD::SRA:
-    case ISD::SRL:
-      return LowerShift(Op, DAG);
-    case ISD::OR:
-    case ISD::AND:
-    case ISD::XOR:
-      return LowerBinOp(Op, DAG);
-    case ISD::BR_CC:
-      return LowerBR_CC(Op, DAG);
-    case ISD::SELECT_CC:
-      return LowerSELECT_CC(Op, DAG);
-  }
-  return SDValue();
-}
-
-SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
-                                                 SelectionDAG &DAG,
-                                                 DebugLoc dl) const {
-  assert (Op.getValueType() == MVT::i8 
-          && "illegal value type to store on stack.");
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  const Function *Func = MF.getFunction();
-  const std::string FuncName = Func->getName();
-
-
-  // Put the value on stack.
-  // Get a stack slot index and convert to es.
-  int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false);
-  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
-  SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-
-  // Store the value to ES.
-  SDValue Store = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other,
-                               DAG.getEntryNode(),
-                               Op, ES, 
-                               DAG.getConstant (1, MVT::i8), // Banksel.
-                               DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), 
-                                                MVT::i8));
-
-  // Load the value from ES.
-  SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other);
-  SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store,
-                             ES, DAG.getConstant (1, MVT::i8),
-                             DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), 
-                             MVT::i8));
-    
-  return Load.getValue(0);
-}
-
-SDValue PIC16TargetLowering::
-LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
-                           SDValue DataAddr_Lo, SDValue DataAddr_Hi,
-                           const SmallVectorImpl<ISD::OutputArg> &Outs,
-                           const SmallVectorImpl<SDValue> &OutVals,
-                           const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG) const {
-  unsigned NumOps = Outs.size();
-
-  // If call has no arguments then do nothing and return.
-  if (NumOps == 0)
-    return Chain;
-
-  std::vector<SDValue> Ops;
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Arg, StoreRet;
-
-  // For PIC16 ABI the arguments come after the return value. 
-  unsigned RetVals = Ins.size();
-  for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
-    // Get the arguments
-    Arg = OutVals[i];
-    
-    Ops.clear();
-    Ops.push_back(Chain);
-    Ops.push_back(Arg);
-    Ops.push_back(DataAddr_Lo);
-    Ops.push_back(DataAddr_Hi);
-    Ops.push_back(DAG.getConstant(ArgOffset, MVT::i8));
-    Ops.push_back(InFlag);
-
-    StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
-
-    Chain = getChain(StoreRet);
-    InFlag = getOutFlag(StoreRet);
-    ArgOffset++;
-  }
-  return Chain;
-}
-
-SDValue PIC16TargetLowering::
-LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
-                         const SmallVectorImpl<ISD::OutputArg> &Outs,
-                         const SmallVectorImpl<SDValue> &OutVals,
-                         DebugLoc dl, SelectionDAG &DAG) const {
-  unsigned NumOps = Outs.size();
-  std::string Name;
-  SDValue Arg, StoreAt;
-  EVT ArgVT;
-  unsigned Size=0;
-
-  // If call has no arguments then do nothing and return.
-  if (NumOps == 0)
-    return Chain; 
-
-  // FIXME: This portion of code currently assumes only
-  // primitive types being passed as arguments.
-
-  // Legalize the address before use
-  SDValue PtrLo, PtrHi;
-  unsigned AddressOffset;
-  int StoreOffset = 0;
-  LegalizeAddress(ArgLabel, DAG, PtrLo, PtrHi, AddressOffset, dl);
-  SDValue StoreRet;
-
-  std::vector<SDValue> Ops;
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  for (unsigned i=0, Offset = 0; i<NumOps; i++) {
-    // Get the argument
-    Arg = OutVals[i];
-    StoreOffset = (Offset + AddressOffset);
-   
-    // Store the argument on frame
-
-    Ops.clear();
-    Ops.push_back(Chain);
-    Ops.push_back(Arg);
-    Ops.push_back(PtrLo);
-    Ops.push_back(PtrHi);
-    Ops.push_back(DAG.getConstant(StoreOffset, MVT::i8));
-    Ops.push_back(InFlag);
-
-    StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
-
-    Chain = getChain(StoreRet);
-    InFlag = getOutFlag(StoreRet);
-
-    // Update the frame offset to be used for next argument
-    ArgVT = Arg.getValueType();
-    Size = ArgVT.getSizeInBits();
-    Size = Size/8;    // Calculate size in bytes
-    Offset += Size;   // Increase the frame offset
-  }
-  return Chain;
-}
-
-SDValue PIC16TargetLowering::
-LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
-                        SDValue DataAddr_Lo, SDValue DataAddr_Hi,
-                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                        DebugLoc dl, SelectionDAG &DAG,
-                        SmallVectorImpl<SDValue> &InVals) const {
-  unsigned RetVals = Ins.size();
-
-  // If call does not have anything to return
-  // then do nothing and go back.
-  if (RetVals == 0)
-    return Chain;
-
-  // Call has something to return
-  SDValue LoadRet;
-
-  SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
-  for(unsigned i=0;i<RetVals;i++) {
-    LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, DataAddr_Lo,
-                          DataAddr_Hi, DAG.getConstant(i, MVT::i8),
-                          InFlag);
-    InFlag = getOutFlag(LoadRet);
-    Chain = getChain(LoadRet);
-    InVals.push_back(LoadRet);
-  }
-  return Chain;
-}
-
-SDValue PIC16TargetLowering::
-LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
-                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                      DebugLoc dl, SelectionDAG &DAG,
-                      SmallVectorImpl<SDValue> &InVals) const {
-
-  // Currently handling primitive types only. They will come in
-  // i8 parts
-  unsigned RetVals = Ins.size();
-
-  // Return immediately if the return type is void
-  if (RetVals == 0)
-    return Chain;
-
-  // Call has something to return
-  
-  // Legalize the address before use
-  SDValue LdLo, LdHi;
-  unsigned LdOffset;
-  LegalizeAddress(RetLabel, DAG, LdLo, LdHi, LdOffset, dl);
-
-  SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
-  SDValue LoadRet;
- 
-  for(unsigned i=0, Offset=0;i<RetVals;i++) {
-
-    LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, LdLo, LdHi,
-                          DAG.getConstant(LdOffset + Offset, MVT::i8),
-                          InFlag);
-
-    InFlag = getOutFlag(LoadRet);
-
-    Chain = getChain(LoadRet);
-    Offset++;
-    InVals.push_back(LoadRet);
-  }
-
-  return Chain;
-}
-
-SDValue
-PIC16TargetLowering::LowerReturn(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
-
-  // Number of values to return 
-  unsigned NumRet = Outs.size();
-
-  // Function returns value always on stack with the offset starting
-  // from 0 
-  MachineFunction &MF = DAG.getMachineFunction();
-  const Function *F = MF.getFunction();
-  std::string FuncName = F->getName();
-
-  const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName));
-  SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-  SDValue BS = DAG.getConstant(1, MVT::i8);
-  SDValue RetVal;
-  for(unsigned i=0;i<NumRet; ++i) {
-    RetVal = OutVals[i];
-    Chain =  DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
-                        ES, BS,
-                        DAG.getConstant (i, MVT::i8));
-      
-  }
-  return DAG.getNode(PIC16ISD::RET, dl, MVT::Other, Chain);
-}
-
-void PIC16TargetLowering::
-GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, 
-               SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
-               SelectionDAG &DAG) const {
-   assert (Callee.getOpcode() == PIC16ISD::PIC16Connect
-           && "Don't know what to do of such callee!!");
-   SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
-   SDValue SeqStart  = DAG.getCALLSEQ_START(Chain, ZeroOperand);
-   Chain = getChain(SeqStart);
-   SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
-
-   // Get the Lo and Hi part of code address
-   SDValue Lo = Callee.getOperand(0);
-   SDValue Hi = Callee.getOperand(1);
-
-   SDValue Data_Lo, Data_Hi;
-   SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
-   // Subtract 2 from Address to get the Lower part of DataAddress.
-   SDVTList VTList = DAG.getVTList(MVT::i8, MVT::Flag);
-   Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo, 
-                         DAG.getConstant(2, MVT::i8));
-   SDValue Ops[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
-   Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, Ops, 3);
-   SDValue PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
-   Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
-   SDValue Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee,
-                              OperFlag);
-   Chain = getChain(Call);
-   OperFlag = getOutFlag(Call);
-   SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
-                                       OperFlag);
-   Chain = getChain(SeqEnd);
-   OperFlag = getOutFlag(SeqEnd);
-
-   // Low part of Data Address 
-   DataAddr_Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Call, OperFlag);
-
-   // Make the second call.
-   SeqStart  = DAG.getCALLSEQ_START(Chain, ZeroOperand);
-   Chain = getChain(SeqStart);
-   OperFlag = getOutFlag(SeqStart); // To manage the data dependency
-
-   // Subtract 1 from Address to get high part of data address.
-   Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo, 
-                         DAG.getConstant(1, MVT::i8));
-   SDValue HiOps[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
-   Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
-   PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
-
-   // Use new Lo to make another CALLW
-   Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
-   Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, OperFlag);
-   Chain = getChain(Call);
-   OperFlag = getOutFlag(Call);
-   SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
-                                        OperFlag);
-   Chain = getChain(SeqEnd);
-   OperFlag = getOutFlag(SeqEnd);
-   // Hi part of Data Address
-   DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag);
-}
-
-SDValue
-PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                               CallingConv::ID CallConv, bool isVarArg,
-                               bool &isTailCall,
-                               const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               const SmallVectorImpl<SDValue> &OutVals,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) const {
-    // PIC16 target does not yet support tail call optimization.
-    isTailCall = false;
-
-    assert(Callee.getValueType() == MVT::i16 &&
-           "Don't know how to legalize this call node!!!");
-
-    // The flag to track if this is a direct or indirect call.
-    bool IsDirectCall = true;    
-    unsigned RetVals = Ins.size();
-    unsigned NumArgs = Outs.size();
-
-    SDValue DataAddr_Lo, DataAddr_Hi; 
-    if (!isa<GlobalAddressSDNode>(Callee) &&
-        !isa<ExternalSymbolSDNode>(Callee)) {
-       IsDirectCall = false;    // This is indirect call
-
-       // If this is an indirect call then to pass the arguments
-       // and read the return value back, we need the data address
-       // of the function being called.
-       // To get the data address two more calls need to be made.
-
-       // Come here for indirect calls
-       SDValue Lo, Hi;
-       // Indirect addresses. Get the hi and lo parts of ptr.
-       GetExpandedParts(Callee, DAG, Lo, Hi);
-       // Connect Lo and Hi parts of the callee with the PIC16Connect
-       Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi);
-
-       // Read DataAddress only if we have to pass arguments or 
-       // read return value. 
-       if ((RetVals > 0) || (NumArgs > 0)) 
-         GetDataAddress(dl, Callee, Chain, DataAddr_Lo, DataAddr_Hi, DAG);
-    }
-
-    SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
-
-    // Start the call sequence.
-    // Carring the Constant 0 along the CALLSEQSTART
-    // because there is nothing else to carry.
-    SDValue SeqStart  = DAG.getCALLSEQ_START(Chain, ZeroOperand);
-    Chain = getChain(SeqStart);
-    SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
-    std::string Name;
-
-    // For any direct call - callee will be GlobalAddressNode or
-    // ExternalSymbol
-    SDValue ArgLabel, RetLabel;
-    if (IsDirectCall) { 
-       // Considering the GlobalAddressNode case here.
-       if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-          const GlobalValue *GV = G->getGlobal();
-          Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8);
-          Name = G->getGlobal()->getName();
-       } else {// Considering the ExternalSymbol case here
-          ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee);
-          Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8); 
-          Name = ES->getSymbol();
-       }
-
-       // Label for argument passing
-       const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name));
-       ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8);
-
-       // Label for reading return value
-       const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name));
-       RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8);
-    } else {
-       // if indirect call
-       SDValue CodeAddr_Lo = Callee.getOperand(0);
-       SDValue CodeAddr_Hi = Callee.getOperand(1);
-
-       /*CodeAddr_Lo = DAG.getNode(ISD::ADD, dl, MVT::i8, CodeAddr_Lo,
-                                 DAG.getConstant(2, MVT::i8));*/
-
-       // move Hi part in PCLATH
-       CodeAddr_Hi = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, CodeAddr_Hi);
-       Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, CodeAddr_Lo,
-                            CodeAddr_Hi);
-    } 
-
-    // Pass the argument to function before making the call.
-    SDValue CallArgs;
-    if (IsDirectCall) {
-      CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag,
-                                          Outs, OutVals, dl, DAG);
-      Chain = getChain(CallArgs);
-      OperFlag = getOutFlag(CallArgs);
-    } else {
-      CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo,
-                                            DataAddr_Hi, Outs, OutVals, Ins,
-                                            dl, DAG);
-      Chain = getChain(CallArgs);
-      OperFlag = getOutFlag(CallArgs);
-    }
-
-    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-    SDValue PICCall = DAG.getNode(PIC16ISD::CALL, dl, Tys, Chain, Callee,
-                                  OperFlag);
-    Chain = getChain(PICCall);
-    OperFlag = getOutFlag(PICCall);
-
-
-    // Carrying the Constant 0 along the CALLSEQSTART
-    // because there is nothing else to carry.
-    SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
-                                        OperFlag);
-    Chain = getChain(SeqEnd);
-    OperFlag = getOutFlag(SeqEnd);
-
-    // Lower the return value reading after the call.
-    if (IsDirectCall)
-      return LowerDirectCallReturn(RetLabel, Chain, OperFlag,
-                                   Ins, dl, DAG, InVals);
-    else
-      return LowerIndirectCallReturn(Chain, OperFlag, DataAddr_Lo,
-                                     DataAddr_Hi, Ins, dl, DAG, InVals);
-}
-
-bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const {
-  if (Op.getOpcode() == PIC16ISD::PIC16Load)
-    if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress
-     || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol)
-      return true;
-  return false;
-}
-
-// NeedToConvertToMemOp - Returns true if one of the operands of the
-// operation 'Op' needs to be put into memory. Also returns the
-// operand no. of the operand to be converted in 'MemOp'. Remember, PIC16 has 
-// no instruction that can operation on two registers. Most insns take
-// one register and one memory operand (addwf) / Constant (addlw).
-bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, 
-                      SelectionDAG &DAG) const {
-  // If one of the operand is a constant, return false.
-  if (Op.getOperand(0).getOpcode() == ISD::Constant ||
-      Op.getOperand(1).getOpcode() == ISD::Constant)
-    return false;    
-
-  // Return false if one of the operands is already a direct
-  // load and that operand has only one use.
-  if (isDirectLoad(Op.getOperand(0))) {
-    if (Op.getOperand(0).hasOneUse()) {  
-      // Legal and profitable folding check uses the NodeId of DAG nodes.
-      // This NodeId is assigned by topological order. Therefore first 
-      // assign topological order then perform legal and profitable check.
-      // Note:- Though this ordering is done before begining with legalization,
-      // newly added node during legalization process have NodeId=-1 (NewNode)
-      // therefore before performing any check proper ordering of the node is
-      // required.
-      DAG.AssignTopologicalOrder();
-
-      // Direct load operands are folded in binary operations. But before folding
-      // verify if this folding is legal. Fold only if it is legal otherwise
-      // convert this direct load to a separate memory operation.
-      if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0),
-                                          Op.getNode(), Op.getNode(),
-                                          CodeGenOpt::Default))
-        return false;
-      else 
-        MemOp = 0;
-    }
-  }
-
-  // For operations that are non-cummutative there is no need to check 
-  // for right operand because folding right operand may result in 
-  // incorrect operation. 
-  if (! SelectionDAG::isCommutativeBinOp(Op.getOpcode()))
-    return true;
-
-  if (isDirectLoad(Op.getOperand(1))) {
-    if (Op.getOperand(1).hasOneUse()) {
-      // Legal and profitable folding check uses the NodeId of DAG nodes.
-      // This NodeId is assigned by topological order. Therefore first 
-      // assign topological order then perform legal and profitable check.
-      // Note:- Though this ordering is done before begining with legalization,
-      // newly added node during legalization process have NodeId=-1 (NewNode)
-      // therefore before performing any check proper ordering of the node is
-      // required.
-      DAG.AssignTopologicalOrder();
-
-      // Direct load operands are folded in binary operations. But before folding
-      // verify if this folding is legal. Fold only if it is legal otherwise
-      // convert this direct load to a separate memory operation.
-      if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1),
-                                          Op.getNode(), Op.getNode(),
-                                          CodeGenOpt::Default))
-         return false;
-      else 
-         MemOp = 1; 
-    }
-  }
-  return true;
-}  
-
-// LowerBinOp - Lower a commutative binary operation that does not
-// affect status flag carry.
-SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-
-  // We should have handled larger operands in type legalizer itself.
-  assert (Op.getValueType() == MVT::i8 && "illegal Op to lower");
-
-  unsigned MemOp = 1;
-  if (NeedToConvertToMemOp(Op, MemOp, DAG)) {
-    // Put one value on stack.
-    SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
-
-    return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
-    NewVal);
-  }
-  else {
-    return Op;
-  }
-}
-
-// LowerADD - Lower all types of ADD operations including the ones
-// that affects carry.
-SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
-  // We should have handled larger operands in type legalizer itself.
-  assert (Op.getValueType() == MVT::i8 && "illegal add to lower");
-  DebugLoc dl = Op.getDebugLoc();
-  unsigned MemOp = 1;
-  if (NeedToConvertToMemOp(Op, MemOp, DAG)) {
-    // Put one value on stack.
-    SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
-    
-    // ADDC and ADDE produce two results.
-    SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
-
-    // ADDE has three operands, the last one is the carry bit.
-    if (Op.getOpcode() == ISD::ADDE)
-      return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
-                         NewVal, Op.getOperand(2));
-    // ADDC has two operands.
-    else if (Op.getOpcode() == ISD::ADDC)
-      return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
-                         NewVal);
-    // ADD it is. It produces only one result.
-    else
-      return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
-                         NewVal);
-  }
-  else
-    return Op;
-}
-
-SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  // We should have handled larger operands in type legalizer itself.
-  assert (Op.getValueType() == MVT::i8 && "illegal sub to lower");
-  unsigned MemOp = 1;
-  SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
-
-  // Since we don't have an instruction for X - c , 
-  // we can change it to X + (-c)
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-  if (C && (Op.getOpcode() == ISD::SUB))
-    {
-      return DAG.getNode(ISD::ADD, 
-                         dl, MVT::i8, Op.getOperand(0), 
-                         DAG.getConstant(0-(C->getZExtValue()), MVT::i8));
-    }
-
-  if (NeedToConvertToMemOp(Op, MemOp, DAG) ||
-      (isDirectLoad(Op.getOperand(1)) && 
-       (!isDirectLoad(Op.getOperand(0))) &&
-       (Op.getOperand(0).getOpcode() != ISD::Constant)))
-    {
-      // Put first operand on stack.
-      SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl);
-      
-      switch (Op.getOpcode()) {
-      default:
-        assert (0 && "Opcode unknown."); 
-      case ISD::SUBE:
-        return DAG.getNode(Op.getOpcode(), 
-                           dl, Tys, NewVal, Op.getOperand(1),
-                           Op.getOperand(2));
-        break;
-      case ISD::SUBC:
-        return DAG.getNode(Op.getOpcode(), 
-                           dl, Tys, NewVal, Op.getOperand(1));
-        break;
-      case ISD::SUB:
-        return DAG.getNode(Op.getOpcode(), 
-                           dl, MVT::i8, NewVal, Op.getOperand(1));
-        break;
-      }
-    }
-  else 
-    return Op;
-}
-
-void PIC16TargetLowering::InitReservedFrameCount(const Function *F,
-                                                 SelectionDAG &DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-
-  unsigned NumArgs = F->arg_size();
-
-  bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID);
-
-  if (isVoidFunc)
-    FuncInfo->setReservedFrameCount(NumArgs);
-  else
-    FuncInfo->setReservedFrameCount(NumArgs + 1);
-}
-
-// LowerFormalArguments - Argument values are loaded from the
-// <fname>.args + offset. All arguments are already broken to leaglized
-// types, so the offset just runs from 0 to NumArgVals - 1.
-
-SDValue
-PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
-                                          CallingConv::ID CallConv,
-                                          bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                          DebugLoc dl,
-                                          SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals)
-                                            const {
-  unsigned NumArgVals = Ins.size();
-
-  // Get the callee's name to create the <fname>.args label to pass args.
-  MachineFunction &MF = DAG.getMachineFunction();
-  const Function *F = MF.getFunction();
-  std::string FuncName = F->getName();
-
-  // Reset the map of FI and TmpOffset 
-  ResetTmpOffsetMap(DAG);
-  // Initialize the ReserveFrameCount
-  InitReservedFrameCount(F, DAG);
-
-  // Create the <fname>.args external symbol.
-  const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName));
-  SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-
-  // Load arg values from the label + offset.
-  SDVTList VTs  = DAG.getVTList (MVT::i8, MVT::Other);
-  SDValue BS = DAG.getConstant(1, MVT::i8);
-  for (unsigned i = 0; i < NumArgVals ; ++i) {
-    SDValue Offset = DAG.getConstant(i, MVT::i8);
-    SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS,
-                                  Offset);
-    Chain = getChain(PICLoad);
-    InVals.push_back(PICLoad);
-  }
-
-  return Chain;
-}
-
-// Perform DAGCombine of PIC16Load.
-// FIXME - Need a more elaborate comment here.
-SDValue PIC16TargetLowering::
-PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const {
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue Chain = N->getOperand(0); 
-  if (N->hasNUsesOfValue(0, 0)) {
-    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), Chain);
-  }
-  return SDValue();
-}
-
-// For all the functions with arguments some STORE nodes are generated 
-// that store the argument on the frameindex. However in PIC16 the arguments
-// are passed on stack only. Therefore these STORE nodes are redundant. 
-// To remove these STORE nodes will be removed in PerformStoreCombine 
-//
-// Currently this function is doint nothing and will be updated for removing
-// unwanted store operations
-SDValue PIC16TargetLowering::
-PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const {
-  return SDValue(N, 0);
-  /*
-  // Storing an undef value is of no use, so remove it
-  if (isStoringUndef(N, Chain, DAG)) {
-    return Chain; // remove the store and return the chain
-  }
-  //else everything is ok.
-  return SDValue(N, 0);
-  */
-}
-
-SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N, 
-                                               DAGCombinerInfo &DCI) const {
-  switch (N->getOpcode()) {
-  case ISD::STORE:   
-   return PerformStoreCombine(N, DCI); 
-  case PIC16ISD::PIC16Load:   
-    return PerformPIC16LoadCombine(N, DCI);
-  }
-  return SDValue();
-}
-
-static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
-  switch (CC) {
-  default: llvm_unreachable("Unknown condition code!");
-  case ISD::SETNE:  return PIC16CC::NE;
-  case ISD::SETEQ:  return PIC16CC::EQ;
-  case ISD::SETGT:  return PIC16CC::GT;
-  case ISD::SETGE:  return PIC16CC::GE;
-  case ISD::SETLT:  return PIC16CC::LT;
-  case ISD::SETLE:  return PIC16CC::LE;
-  case ISD::SETULT: return PIC16CC::ULT;
-  case ISD::SETULE: return PIC16CC::ULE;
-  case ISD::SETUGE: return PIC16CC::UGE;
-  case ISD::SETUGT: return PIC16CC::UGT;
-  }
-}
-
-// Look at LHS/RHS/CC and see if they are a lowered setcc instruction.  If so
-// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
-static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
-                             ISD::CondCode CC, unsigned &SPCC) {
-  if (isa<ConstantSDNode>(RHS) &&
-      cast<ConstantSDNode>(RHS)->isNullValue() &&
-      CC == ISD::SETNE &&
-      (LHS.getOpcode() == PIC16ISD::SELECT_ICC &&
-        LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) &&
-      isa<ConstantSDNode>(LHS.getOperand(0)) &&
-      isa<ConstantSDNode>(LHS.getOperand(1)) &&
-      cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
-      cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
-    SDValue CMPCC = LHS.getOperand(3);
-    SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
-    LHS = CMPCC.getOperand(0);
-    RHS = CMPCC.getOperand(1);
-  }
-}
-
-// Returns appropriate CMP insn and corresponding condition code in PIC16CC
-SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, 
-                                         unsigned CC, SDValue &PIC16CC, 
-                                         SelectionDAG &DAG, DebugLoc dl) const {
-  PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC;
-
-  // PIC16 sub is literal - W. So Swap the operands and condition if needed.
-  // i.e. a < 12 can be rewritten as 12 > a.
-  if (RHS.getOpcode() == ISD::Constant) {
-
-    SDValue Tmp = LHS;
-    LHS = RHS;
-    RHS = Tmp;
-
-    switch (CondCode) {
-    default: break;
-    case PIC16CC::LT:
-      CondCode = PIC16CC::GT; 
-      break;
-    case PIC16CC::GT:
-      CondCode = PIC16CC::LT; 
-      break;
-    case PIC16CC::ULT:
-      CondCode = PIC16CC::UGT; 
-      break;
-    case PIC16CC::UGT:
-      CondCode = PIC16CC::ULT; 
-      break;
-    case PIC16CC::GE:
-      CondCode = PIC16CC::LE; 
-      break;
-    case PIC16CC::LE:
-      CondCode = PIC16CC::GE;
-      break;
-    case PIC16CC::ULE:
-      CondCode = PIC16CC::UGE;
-      break;
-    case PIC16CC::UGE:
-      CondCode = PIC16CC::ULE;
-      break;
-    }
-  }
-
-  PIC16CC = DAG.getConstant(CondCode, MVT::i8);
-
-  // These are signed comparisons. 
-  SDValue Mask = DAG.getConstant(128, MVT::i8);
-  if (isSignedComparison(CondCode)) {
-    LHS = DAG.getNode (ISD::XOR, dl, MVT::i8, LHS, Mask);
-    RHS = DAG.getNode (ISD::XOR, dl, MVT::i8, RHS, Mask); 
-  }
-
-  SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Flag);
-  // We can use a subtract operation to set the condition codes. But
-  // we need to put one operand in memory if required.
-  // Nothing to do if the first operand is already a valid type (direct load 
-  // for subwf and literal for sublw) and it is used by this operation only. 
-  if ((LHS.getOpcode() == ISD::Constant || isDirectLoad(LHS)) 
-      && LHS.hasOneUse())
-    return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
-
-  // else convert the first operand to mem.
-  LHS = ConvertToMemOperand (LHS, DAG, dl);
-  return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
-}
-
-
-SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDValue TrueVal = Op.getOperand(2);
-  SDValue FalseVal = Op.getOperand(3);
-  unsigned ORIGCC = ~0;
-  DebugLoc dl = Op.getDebugLoc();
-
-  // If this is a select_cc of a "setcc", and if the setcc got lowered into
-  // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
-  // i.e.
-  // A setcc: lhs, rhs, cc is expanded by llvm to 
-  // select_cc: result of setcc, 0, 1, 0, setne
-  // We can think of it as:
-  // select_cc: lhs, rhs, 1, 0, cc
-  LookThroughSetCC(LHS, RHS, CC, ORIGCC);
-  if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
-
-  SDValue PIC16CC;
-  SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
-
-  return DAG.getNode (PIC16ISD::SELECT_ICC, dl, TrueVal.getValueType(), TrueVal,
-                      FalseVal, PIC16CC, Cmp.getValue(1)); 
-}
-
-MachineBasicBlock *
-PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
-  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
-  unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
-  DebugLoc dl = MI->getDebugLoc();
-
-  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
-  // control-flow pattern.  The incoming instruction knows the destination vreg
-  // to set, the condition code register to branch on, the true/false values to
-  // select between, and a branch opcode to use.
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
-
-  //  thisMBB:
-  //  ...
-  //   TrueVal = ...
-  //   [f]bCC copy1MBB
-  //   fallthrough --> copy0MBB
-  MachineBasicBlock *thisMBB = BB;
-  MachineFunction *F = BB->getParent();
-  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  BuildMI(BB, dl, TII.get(PIC16::pic16brcond)).addMBB(sinkMBB).addImm(CC);
-  F->insert(It, copy0MBB);
-  F->insert(It, sinkMBB);
-
-  // Transfer the remainder of BB and its successor edges to sinkMBB.
-  sinkMBB->splice(sinkMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
-  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  // Next, add the true and fallthrough blocks as its successors.
-  BB->addSuccessor(copy0MBB);
-  BB->addSuccessor(sinkMBB);
-
-  //  copy0MBB:
-  //   %FalseValue = ...
-  //   # fallthrough to sinkMBB
-  BB = copy0MBB;
-
-  // Update machine-CFG edges
-  BB->addSuccessor(sinkMBB);
-
-  //  sinkMBB:
-  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-  //  ...
-  BB = sinkMBB;
-  BuildMI(*BB, BB->begin(), dl,
-          TII.get(PIC16::PHI), MI->getOperand(0).getReg())
-    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
-    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
-
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
-  return BB;
-}
-
-
-SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain = Op.getOperand(0);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-  SDValue LHS = Op.getOperand(2);   // LHS of the condition.
-  SDValue RHS = Op.getOperand(3);   // RHS of the condition.
-  SDValue Dest = Op.getOperand(4);  // BB to jump to
-  unsigned ORIGCC = ~0;
-  DebugLoc dl = Op.getDebugLoc();
-
-  // If this is a br_cc of a "setcc", and if the setcc got lowered into
-  // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
-  LookThroughSetCC(LHS, RHS, CC, ORIGCC);
-  if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
-
-  // Get the Compare insn and condition code.
-  SDValue PIC16CC;
-  SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
-
-  return DAG.getNode(PIC16ISD::BRCOND, dl, MVT::Other, Chain, Dest, PIC16CC, 
-                     Cmp.getValue(1));
-}
-
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
deleted file mode 100644
index d942af46a9e9..000000000000
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ /dev/null
@@ -1,253 +0,0 @@
-//===-- PIC16ISelLowering.h - PIC16 DAG Lowering Interface ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that PIC16 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16ISELLOWERING_H
-#define PIC16ISELLOWERING_H
-
-#include "PIC16.h"
-#include "PIC16Subtarget.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
-#include <map>
-
-namespace llvm {
-  namespace PIC16ISD {
-    enum NodeType {
-      // Start the numbering from where ISD NodeType finishes.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-      Lo,            // Low 8-bits of GlobalAddress.
-      Hi,            // High 8-bits of GlobalAddress.
-      PIC16Load,
-      PIC16LdArg,   // This is replica of PIC16Load but used to load function 
-                    // arguments and is being used for facilitating for some 
-                    // store removal optimizations. 
-
-      PIC16LdWF,
-      PIC16Store,
-      PIC16StWF,
-      Banksel,
-      MTLO,          // Move to low part of FSR
-      MTHI,          // Move to high part of FSR
-      MTPCLATH,      // Move to PCLATCH
-      PIC16Connect,  // General connector for PIC16 nodes
-      BCF,
-      LSLF,          // PIC16 Logical shift left
-      LRLF,          // PIC16 Logical shift right
-      RLF,           // Rotate left through carry
-      RRF,           // Rotate right through carry
-      CALL,          // PIC16 Call instruction 
-      CALLW,         // PIC16 CALLW instruction 
-      SUBCC,         // Compare for equality or inequality.
-      SELECT_ICC,    // Pseudo to be caught in scheduler and expanded to brcond.
-      BRCOND,        // Conditional branch.
-      RET,           // Return.
-      Dummy
-    };
-
-    // Keep track of different address spaces. 
-    enum AddressSpace {
-      RAM_SPACE = 0,   // RAM address space
-      ROM_SPACE = 1    // ROM address space number is 1
-    };
-    enum PIC16Libcall {
-      MUL_I8 = RTLIB::UNKNOWN_LIBCALL + 1,
-      SRA_I8,
-      SLL_I8,
-      SRL_I8,
-      PIC16UnknownCall
-    };
-  }
-
-
-  //===--------------------------------------------------------------------===//
-  // TargetLowering Implementation
-  //===--------------------------------------------------------------------===//
-  class PIC16TargetLowering : public TargetLowering {
-  public:
-    explicit PIC16TargetLowering(PIC16TargetMachine &TM);
-
-    /// getTargetNodeName - This method returns the name of a target specific
-    /// DAG node.
-    virtual const char *getTargetNodeName(unsigned Opcode) const;
-    /// getSetCCResultType - Return the ISD::SETCC ValueType
-    virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const;
-    virtual MVT::SimpleValueType getCmpLibcallReturnType() const;
-    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const;
-    // Call returns
-    SDValue 
-    LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
-                          const SmallVectorImpl<ISD::InputArg> &Ins,
-                          DebugLoc dl, SelectionDAG &DAG,
-                          SmallVectorImpl<SDValue> &InVals) const;
-    SDValue 
-    LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
-                             SDValue DataAddr_Lo, SDValue DataAddr_Hi,
-                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                            DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
-
-    // Call arguments
-    SDValue 
-    LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
-                             const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             const SmallVectorImpl<SDValue> &OutVals,
-                             DebugLoc dl, SelectionDAG &DAG) const;
-
-    SDValue 
-    LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
-                               SDValue DataAddr_Lo, SDValue DataAddr_Hi, 
-                               const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               const SmallVectorImpl<SDValue> &OutVals,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               DebugLoc dl, SelectionDAG &DAG) const;
-
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
-                        SelectionDAG &DAG, DebugLoc dl) const;
-    virtual MachineBasicBlock *
-      EmitInstrWithCustomInserter(MachineInstr *MI,
-                                  MachineBasicBlock *MBB) const;
-
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-    virtual void ReplaceNodeResults(SDNode *N,
-                                    SmallVectorImpl<SDValue> &Results,
-                                    SelectionDAG &DAG) const;
-    virtual void LowerOperationWrapper(SDNode *N,
-                                       SmallVectorImpl<SDValue> &Results,
-                                       SelectionDAG &DAG) const;
-
-    virtual SDValue
-    LowerFormalArguments(SDValue Chain,
-                         CallingConv::ID CallConv,
-                         bool isVarArg,
-                         const SmallVectorImpl<ISD::InputArg> &Ins,
-                         DebugLoc dl, SelectionDAG &DAG,
-                         SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
-                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                const SmallVectorImpl<SDValue> &OutVals,
-                const SmallVectorImpl<ISD::InputArg> &Ins,
-                DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerReturn(SDValue Chain,
-                  CallingConv::ID CallConv, bool isVarArg,
-                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
-
-    SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const;
-    SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const;
-    SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const;
-    SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const;
-    SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const;
-
-    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
-    SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
-    SDValue PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 
-
-    // This function returns the Tmp Offset for FrameIndex. If any TmpOffset 
-    // already exists for the FI then it returns the same else it creates the 
-    // new offset and returns.
-    unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size,
-                               MachineFunction &MF) const;
-    void ResetTmpOffsetMap(SelectionDAG &DAG) const;
-    void InitReservedFrameCount(const Function *F,
-                                SelectionDAG &DAG) const;
-
-    /// getFunctionAlignment - Return the Log2 alignment of this function.
-    virtual unsigned getFunctionAlignment(const Function *) const {
-      // FIXME: The function never seems to be aligned.
-      return 1;
-    }
-  protected:
-    std::pair<const TargetRegisterClass*, uint8_t>
-    findRepresentativeClass(EVT VT) const;
-  private:
-    // If the Node is a BUILD_PAIR representing a direct Address,
-    // then this function will return true.
-    bool isDirectAddress(const SDValue &Op) const;
-
-    // If the Node is a DirectAddress in ROM_SPACE then this 
-    // function will return true
-    bool isRomAddress(const SDValue &Op) const;
-
-    // Extract the Lo and Hi component of Op. 
-    void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, 
-                          SDValue &Hi) const;
-
-
-    // Load pointer can be a direct or indirect address. In PIC16 direct
-    // addresses need Banksel and Indirect addresses need to be loaded to
-    // FSR first. Handle address specific cases here.
-    void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, 
-                         SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const;
-
-    // FrameIndex should be broken down into ExternalSymbol and FrameOffset. 
-    void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, 
-                            int &Offset) const;
-
-    // For indirect calls data address of the callee frame need to be
-    // extracted. This function fills the arguments DataAddr_Lo and 
-    // DataAddr_Hi with the address of the callee frame.
-    void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
-                        SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
-                        SelectionDAG &DAG) const;
-
-    // We can not have both operands of a binary operation in W.
-    // This function is used to put one operand on stack and generate a load.
-    SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG,
-                                DebugLoc dl) const; 
-
-    // This function checks if we need to put an operand of an operation on
-    // stack and generate a load or not.
-    // DAG parameter is required to access DAG information during
-    // analysis.
-    bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
-                              SelectionDAG &DAG) const;
-
-    /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can
-    /// make the right decision when generating code for different targets.
-    const PIC16Subtarget *Subtarget;
-
-
-    // Extending the LIB Call framework of LLVM
-    // to hold the names of PIC16Libcalls.
-    const char *PIC16LibcallNames[PIC16ISD::PIC16UnknownCall]; 
-
-    // To set and retrieve the lib call names.
-    void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name);
-    const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const;
-
-    // Make PIC16 Libcall.
-    SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, 
-                             const SDValue *Ops, unsigned NumOps, bool isSigned,
-                             SelectionDAG &DAG, DebugLoc dl) const;
-
-    // Check if operation has a direct load operand.
-    inline bool isDirectLoad(const SDValue Op) const;
-  };
-} // namespace llvm
-
-#endif // PIC16ISELLOWERING_H
diff --git a/lib/Target/PIC16/PIC16InstrFormats.td b/lib/Target/PIC16/PIC16InstrFormats.td
deleted file mode 100644
index e213ea847fc8..000000000000
--- a/lib/Target/PIC16/PIC16InstrFormats.td
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- PIC16InstrFormats.td - PIC16 Instruction Formats-------*- tblgen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//  Describe PIC16 instructions format
-//
-//  All the possible PIC16 fields are:
-//
-//  opcode  - operation code.
-//  f       - 7-bit register file address.
-//  d       - 1-bit direction specifier
-//  k       - 8/11 bit literals
-//  b       - 3 bits bit num specifier
-//
-//===----------------------------------------------------------------------===//
-
-// Generic PIC16 Format
-// PIC16 Instructions are 14-bit wide.
-
-// FIXME: Add Cooper Specific Formats if any.
-
-class PIC16Inst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : Instruction {
-  field bits<14> Inst;
-
-  let Namespace = "PIC16";
-  dag OutOperandList = outs;
-  dag InOperandList = ins;
-  let AsmString = asmstr;
-  let Pattern = pattern;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Byte Oriented instruction class in PIC16 : <|opcode|d|f|>
-// opcode = 6 bits.
-// d = direction = 1 bit.
-// f = file register address = 7 bits.
-//===----------------------------------------------------------------------===//
-
-class ByteFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
-                 list<dag> pattern>
-  :PIC16Inst<outs, ins, asmstr, pattern> {
-  bits<1>  d;
-  bits<7>  f;
-
-  let Inst{13-8} = opcode;
-
-  let Inst{7} = d;
-  let Inst{6-0} = f; 
-}
-
-//===----------------------------------------------------------------------===//
-// Bit Oriented instruction class in PIC16 : <|opcode|b|f|>
-// opcode = 4 bits.
-// b = bit specifier = 3 bits.
-// f = file register address = 7 bits.
-//===----------------------------------------------------------------------===//
-
-class BitFormat<bits<4> opcode, dag outs, dag ins, string asmstr, 
-                list<dag> pattern>
-  : PIC16Inst<outs, ins, asmstr, pattern> {
-  bits<3>  b;
-  bits<7>  f;
-
-  let Inst{13-10} = opcode;
-
-  let Inst{9-7} = b;
-  let Inst{6-0} = f; 
-}
-
-//===----------------------------------------------------------------------===//
-// Literal Format instruction class in PIC16 : <|opcode|k|>
-// opcode = 6 bits
-// k = literal = 8 bits
-//===----------------------------------------------------------------------===//
-
-class LiteralFormat<bits<6> opcode, dag outs, dag ins, string asmstr, 
-                    list<dag> pattern>
-  : PIC16Inst<outs, ins, asmstr, pattern> {
-  bits<8> k;
-  
-  let Inst{13-8} = opcode;
-
-  let Inst{7-0} = k; 
-}
-
-//===----------------------------------------------------------------------===//
-// Control Format instruction class in PIC16 : <|opcode|k|>
-// opcode = 3 bits.
-// k = jump address = 11 bits.
-//===----------------------------------------------------------------------===//
-
-class ControlFormat<bits<3> opcode, dag outs, dag ins, string asmstr, 
-                    list<dag> pattern>
-  : PIC16Inst<outs, ins, asmstr, pattern> {
-  bits<11> k;
-
-  let Inst{13-11} = opcode;
-
-  let Inst{10-0} = k; 
-}
-
-//===----------------------------------------------------------------------===//
-// Pseudo instruction class in PIC16
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : PIC16Inst<outs, ins, asmstr, pattern> {
-   let Inst{13-6} = 0;
-}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
deleted file mode 100644
index 81257f3c4108..000000000000
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-//===- PIC16InstrInfo.cpp - PIC16 Instruction Information -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16InstrInfo.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16GenInstrInfo.inc"
-#include "llvm/Function.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstdio>
-
-
-using namespace llvm;
-
-// FIXME: Add the subtarget support on this constructor.
-PIC16InstrInfo::PIC16InstrInfo(PIC16TargetMachine &tm)
-  : TargetInstrInfoImpl(PIC16Insts, array_lengthof(PIC16Insts)),
-    TM(tm), 
-    RegInfo(*this, *TM.getSubtargetImpl()) {}
-
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot.  
-/// If not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned PIC16InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
-  if (MI->getOpcode() == PIC16::movwf 
-      && MI->getOperand(0).isReg()
-      && MI->getOperand(1).isSymbol()) {
-    FrameIndex = MI->getOperand(1).getIndex();
-    return MI->getOperand(0).getReg();
-  }
-  return 0;
-}
-
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the dest reg along with the FrameIndex of the stack slot.  
-/// If not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
-  if (MI->getOpcode() == PIC16::movf 
-      && MI->getOperand(0).isReg()
-      && MI->getOperand(1).isSymbol()) {
-    FrameIndex = MI->getOperand(1).getIndex();
-    return MI->getOperand(0).getReg();
-  }
-  return 0;
-}
-
-
-void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 
-                                         MachineBasicBlock::iterator I,
-                                         unsigned SrcReg, bool isKill, int FI,
-                                         const TargetRegisterClass *RC,
-                                         const TargetRegisterInfo *TRI) const {
-  const PIC16TargetLowering *PTLI = TM.getTargetLowering();
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
-  const Function *Func = MBB.getParent()->getFunction();
-  const std::string FuncName = Func->getName();
-
-  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
-
-  // On the order of operands here: think "movwf SrcReg, tmp_slot, offset".
-  if (RC == PIC16::GPRRegisterClass) {
-    //MachineFunction &MF = *MBB.getParent();
-    //MachineRegisterInfo &RI = MF.getRegInfo();
-    BuildMI(MBB, I, DL, get(PIC16::movwf))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
-      .addExternalSymbol(tmpName)
-      .addImm(1); // Emit banksel for it.
-  }
-  else if (RC == PIC16::FSR16RegisterClass) {
-    // This is a 16-bit register and the frameindex given by llvm is of
-    // size two here. Break this index N into two zero based indexes and 
-    // put one into the map. The second one is always obtained by adding 1
-    // to the first zero based index. In fact it is going to use 3 slots
-    // as saving FSRs corrupts W also and hence we need to save/restore W also.
-
-    unsigned opcode = (SrcReg == PIC16::FSR0) ? PIC16::save_fsr0 
-                                                 : PIC16::save_fsr1;
-    BuildMI(MBB, I, DL, get(opcode))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
-      .addExternalSymbol(tmpName)
-      .addImm(1); // Emit banksel for it.
-  }
-  else
-    llvm_unreachable("Can't store this register to stack slot");
-}
-
-void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 
-                                          MachineBasicBlock::iterator I,
-                                          unsigned DestReg, int FI,
-                                          const TargetRegisterClass *RC,
-                                          const TargetRegisterInfo *TRI) const {
-  const PIC16TargetLowering *PTLI = TM.getTargetLowering();
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-
-  const Function *Func = MBB.getParent()->getFunction();
-  const std::string FuncName = Func->getName();
-
-  const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
-
-  // On the order of operands here: think "movf FrameIndex, W".
-  if (RC == PIC16::GPRRegisterClass) {
-    //MachineFunction &MF = *MBB.getParent();
-    //MachineRegisterInfo &RI = MF.getRegInfo();
-    BuildMI(MBB, I, DL, get(PIC16::movf), DestReg)
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
-      .addExternalSymbol(tmpName)
-      .addImm(1); // Emit banksel for it.
-  }
-  else if (RC == PIC16::FSR16RegisterClass) {
-    // This is a 16-bit register and the frameindex given by llvm is of
-    // size two here. Break this index N into two zero based indexes and 
-    // put one into the map. The second one is always obtained by adding 1
-    // to the first zero based index. In fact it is going to use 3 slots
-    // as saving FSRs corrupts W also and hence we need to save/restore W also.
-
-    unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 
-                                                 : PIC16::restore_fsr1;
-    BuildMI(MBB, I, DL, get(opcode), DestReg)
-      .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
-      .addExternalSymbol(tmpName)
-      .addImm(1); // Emit banksel for it.
-  }
-  else
-    llvm_unreachable("Can't load this register from stack slot");
-}
-
-void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I, DebugLoc DL,
-                                 unsigned DestReg, unsigned SrcReg,
-                                 bool KillSrc) const {
-  unsigned Opc;
-  if (PIC16::FSR16RegClass.contains(DestReg, SrcReg))
-    Opc = PIC16::copy_fsr;
-  else if (PIC16::GPRRegClass.contains(DestReg, SrcReg))
-    Opc = PIC16::copy_w;
-  else
-    llvm_unreachable("Impossible reg-to-reg copy");
-
-  BuildMI(MBB, I, DL, get(Opc), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-/// InsertBranch - Insert a branch into the end of the specified
-/// MachineBasicBlock.  This operands to this method are the same as those
-/// returned by AnalyzeBranch.  This is invoked in cases where AnalyzeBranch
-/// returns success and when an unconditional branch (TBB is non-null, FBB is
-/// null, Cond is empty) needs to be inserted. It returns the number of
-/// instructions inserted.
-unsigned PIC16InstrInfo::
-InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 
-             MachineBasicBlock *FBB,
-             const SmallVectorImpl<MachineOperand> &Cond,
-             DebugLoc DL) const {
-  // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
-  if (FBB == 0) { // One way branch.
-    if (Cond.empty()) {
-      // Unconditional branch?
-      BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB);
-    }
-    return 1;
-  }
-
-  // FIXME: If the there are some conditions specified then conditional branch
-  // should be generated.   
-  // For the time being no instruction is being generated therefore
-  // returning NULL.
-  return 0;
-}
-
-bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
-    return true;
-
-  // Get the terminator instruction.
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return true;
-    --I;
-  }
-  // Handle unconditional branches. If the unconditional branch's target is
-  // successor basic block then remove the unconditional branch. 
-  if (I->getOpcode() == PIC16::br_uncond  && AllowModify) {
-    if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
-      TBB = 0;
-      I->eraseFromParent();
-    }
-  }
-  return true;
-}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
deleted file mode 100644
index 661b335d3b6c..000000000000
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ /dev/null
@@ -1,76 +0,0 @@
-//===- PIC16InstrInfo.h - PIC16 Instruction Information----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the niversity of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16INSTRUCTIONINFO_H
-#define PIC16INSTRUCTIONINFO_H
-
-#include "PIC16.h"
-#include "PIC16RegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-namespace llvm {
-
-
-class PIC16InstrInfo : public TargetInstrInfoImpl 
-{
-  PIC16TargetMachine &TM;
-  const PIC16RegisterInfo RegInfo;
-public:
-  explicit PIC16InstrInfo(PIC16TargetMachine &TM);
-
-  virtual const PIC16RegisterInfo &getRegisterInfo() const { return RegInfo; }
-
-  /// isLoadFromStackSlot - If the specified machine instruction is a direct
-  /// load from a stack slot, return the virtual or physical register number of
-  /// the destination along with the FrameIndex of the loaded stack slot.  If
-  /// not, return 0.  This predicate must return 0 if the instruction has
-  /// any side effects other than loading from the stack slot.
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, 
-                                       int &FrameIndex) const;
-                                                                               
-  /// isStoreToStackSlot - If the specified machine instruction is a direct
-  /// store to a stack slot, return the virtual or physical register number of
-  /// the source reg along with the FrameIndex of the loaded stack slot.  If
-  /// not, return 0.  This predicate must return 0 if the instruction has
-  /// any side effects other than storing to the stack slot.
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI, 
-                                      int &FrameIndex) const;
-
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-                                                                               
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const;
-  virtual 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
-                        DebugLoc DL) const; 
-  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                             MachineBasicBlock *&FBB,
-                             SmallVectorImpl<MachineOperand> &Cond,
-                             bool AllowModify) const;
-  };
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
deleted file mode 100644
index 86d36cb76ee4..000000000000
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ /dev/null
@@ -1,540 +0,0 @@
-//===- PIC16InstrInfo.td - PIC16 Instruction defs -------------*- tblgen-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the PIC16 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// PIC16 Specific Type Constraints.
-//===----------------------------------------------------------------------===//
-class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
-class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Specific Type Profiles.
-//===----------------------------------------------------------------------===//
-
-// Generic type profiles for i8/i16 unary/binary operations.
-// Taking one i8 or i16 and producing void.
-def SDTI8VoidOp : SDTypeProfile<0, 1, [SDTCisI8<0>]>;
-def SDTI16VoidOp : SDTypeProfile<0, 1, [SDTCisI16<0>]>;
-
-// Taking one value and producing an output of same type.
-def SDTI8UnaryOp : SDTypeProfile<1, 1, [SDTCisI8<0>, SDTCisI8<1>]>;
-def SDTI16UnaryOp : SDTypeProfile<1, 1, [SDTCisI16<0>, SDTCisI16<1>]>;
-
-// Taking two values and producing an output of same type.
-def SDTI8BinOp : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>]>;
-def SDTI16BinOp : SDTypeProfile<1, 2, [SDTCisI16<0>, SDTCisI16<1>, 
-                                       SDTCisI16<2>]>;
-
-// Node specific type profiles.
-def SDT_PIC16Load : SDTypeProfile<1, 3, [SDTCisI8<0>, SDTCisI8<1>, 
-                                          SDTCisI8<2>, SDTCisI8<3>]>;
-
-def SDT_PIC16Store : SDTypeProfile<0, 4, [SDTCisI8<0>, SDTCisI8<1>, 
-                                          SDTCisI8<2>, SDTCisI8<3>]>;
-
-def SDT_PIC16Connect : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>,
-                                            SDTCisI8<2>]>;
-
-// PIC16ISD::CALL type prorile
-def SDT_PIC16call : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def SDT_PIC16callw : SDTypeProfile<1, -1, [SDTCisInt<0>]>;
-
-// PIC16ISD::BRCOND
-def SDT_PIC16Brcond: SDTypeProfile<0, 2, 
-                                   [SDTCisVT<0, OtherVT>, SDTCisI8<1>]>;
-
-// PIC16ISD::BRCOND
-def SDT_PIC16Selecticc: SDTypeProfile<1, 3, 
-                                   [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>,
-                                    SDTCisI8<3>]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 addressing modes matching via DAG.
-//===----------------------------------------------------------------------===//
-def diraddr : ComplexPattern<i8, 1, "SelectDirectAddr", [], []>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Specific Node Definitions.
-//===----------------------------------------------------------------------===//
-def PIC16callseq_start : SDNode<"ISD::CALLSEQ_START", SDTI8VoidOp,
-                                [SDNPHasChain, SDNPOutFlag]>;
-def PIC16callseq_end   : SDNode<"ISD::CALLSEQ_END", SDTI8VoidOp, 
-                                [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-
-// Low 8-bits of GlobalAddress.
-def PIC16Lo : SDNode<"PIC16ISD::Lo", SDTI8BinOp>;  
-
-// High 8-bits of GlobalAddress.
-def PIC16Hi : SDNode<"PIC16ISD::Hi", SDTI8BinOp>;
-
-// The MTHI and MTLO nodes are used only to match them in the incoming 
-// DAG for replacement by corresponding set_fsrhi, set_fsrlo insntructions.
-// These nodes are not used for defining any instructions.
-def MTLO     : SDNode<"PIC16ISD::MTLO", SDTI8UnaryOp>;
-def MTHI     : SDNode<"PIC16ISD::MTHI", SDTI8UnaryOp>;
-def MTPCLATH : SDNode<"PIC16ISD::MTPCLATH", SDTI8UnaryOp>;
-
-// Node to generate Bank Select for a GlobalAddress.
-def Banksel : SDNode<"PIC16ISD::Banksel", SDTI8UnaryOp>;
-
-// Node to match a direct store operation.
-def PIC16Store : SDNode<"PIC16ISD::PIC16Store", SDT_PIC16Store, [SDNPHasChain]>;
-def PIC16StWF : SDNode<"PIC16ISD::PIC16StWF", SDT_PIC16Store, 
-                       [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-
-// Node to match a direct load operation.
-def PIC16Load  : SDNode<"PIC16ISD::PIC16Load", SDT_PIC16Load, [SDNPHasChain]>;
-def PIC16LdArg  : SDNode<"PIC16ISD::PIC16LdArg", SDT_PIC16Load, [SDNPHasChain]>;
-def PIC16LdWF  : SDNode<"PIC16ISD::PIC16LdWF", SDT_PIC16Load, 
-                       [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-def PIC16Connect: SDNode<"PIC16ISD::PIC16Connect", SDT_PIC16Connect, []>;
-
-// Node to match PIC16 call
-def PIC16call : SDNode<"PIC16ISD::CALL", SDT_PIC16call,
-                              [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
-def PIC16callw : SDNode<"PIC16ISD::CALLW", SDT_PIC16callw,
-                              [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
-
-// Node to match a comparison instruction.
-def PIC16Subcc : SDNode<"PIC16ISD::SUBCC", SDTI8BinOp, [SDNPOutFlag]>;
-
-// Node to match a conditional branch.
-def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond, 
-                         [SDNPHasChain, SDNPInFlag]>;
-
-def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc, 
-                         [SDNPInFlag]>;
-
-def PIC16ret       : SDNode<"PIC16ISD::RET", SDTNone, [SDNPHasChain]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Operand Definitions.
-//===----------------------------------------------------------------------===//
-def i8mem : Operand<i8>;
-def brtarget: Operand<OtherVT>;
-
-// Operand for printing out a condition code.
-let PrintMethod = "printCCOperand" in
-  def CCOp : Operand<i8>;
-
-include "PIC16InstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-// PIC16 Common Classes.
-//===----------------------------------------------------------------------===//
-
-// W = W Op F : Load the value from F and do Op to W.
-let Constraints = "$src = $dst", mayLoad = 1 in
-class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
-  ByteFormat<OpCode, (outs GPR:$dst),
-             (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
-              !strconcat(OpcStr, " $ptrlo + $offset, W"),
-             [(set GPR:$dst, (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
-                                             (i8 imm:$ptrhi),
-                                             (i8 imm:$offset))))]>;
-
-// F = F Op W : Load the value from F, do op with W and store in F.
-// This insn class is not marked as TwoAddress because the reg is
-// being used as a source operand only. (Remember a TwoAddress insn
-// needs a copy.)
-let mayStore = 1 in
-class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
-  ByteFormat<OpCode, (outs),
-             (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
-              !strconcat(OpcStr, " $ptrlo + $offset, F"),
-             [(PIC16Store (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
-                                             (i8 imm:$ptrhi),
-                                             (i8 imm:$offset))),
-                                             diraddr:$ptrlo,
-                                             (i8 imm:$ptrhi), (i8 imm:$offset)
-                                             )]>;
-
-// W = W Op L : Do Op of L with W and place result in W.
-let Constraints = "$src = $dst" in
-class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
-  LiteralFormat<opcode, (outs GPR:$dst),
-                (ins GPR:$src, i8imm:$literal),
-                !strconcat(OpcStr, " $literal"),
-                [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Instructions.
-//===----------------------------------------------------------------------===//
-
-// Pseudo-instructions.
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i8imm:$amt),
-                       "!ADJCALLSTACKDOWN $amt",
-                       [(PIC16callseq_start imm:$amt)]>;
-
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i8imm:$amt),
-                       "!ADJCALLSTACKUP $amt", 
-                       [(PIC16callseq_end imm:$amt)]>;
-
-//-----------------------------------
-// Vaious movlw insn patterns.
-//-----------------------------------
-let isReMaterializable = 1 in {
-// Move 8-bit literal to W.
-def movlw : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src),
-                      "movlw $src",
-                      [(set GPR:$dst, (i8 imm:$src))]>;
-
-// Move a Lo(TGA) to W.
-def movlw_lo_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
-                      "movlw LOW(${src} + ${src2})",
-                      [(set GPR:$dst, (PIC16Lo tglobaladdr:$src, imm:$src2 ))]>;
-
-// Move a Lo(TES) to W.
-def movlw_lo_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
-                      "movlw LOW(${src} + ${src2})",
-                      [(set GPR:$dst, (PIC16Lo texternalsym:$src, imm:$src2 ))]>;
-
-// Move a Hi(TGA) to W.
-def movlw_hi_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
-                      "movlw HIGH(${src} + ${src2})",
-                      [(set GPR:$dst, (PIC16Hi tglobaladdr:$src, imm:$src2))]>;
-
-// Move a Hi(TES) to W.
-def movlw_hi_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
-                      "movlw HIGH(${src} + ${src2})",
-                      [(set GPR:$dst, (PIC16Hi texternalsym:$src, imm:$src2))]>;
-}
-
-//-------------------
-// FSR setting insns. 
-//-------------------
-// These insns are matched via a DAG replacement pattern.
-def set_fsrlo:
-  ByteFormat<0, (outs FSR16:$fsr), 
-             (ins GPR:$val),
-             "movwf ${fsr}L",
-             []>;
-
-let Constraints = "$src = $dst" in
-def set_fsrhi:
-  ByteFormat<0, (outs FSR16:$dst), 
-             (ins FSR16:$src, GPR:$val),
-             "movwf ${dst}H",
-             []>;
-
-def set_pclath:
-  ByteFormat<0, (outs PCLATHR:$dst), 
-             (ins GPR:$val),
-             "movwf ${dst}",
-             [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>;
-
-//----------------------------
-// copyPhysReg 
-// copyPhysReg insns. These are dummy. They should always be deleted
-// by the optimizer and never be present in the final generated code.
-// if they are, then we have to write correct macros for these insns.
-//----------------------------
-def copy_fsr:
-  Pseudo<(outs FSR16:$dst), (ins FSR16:$src), "copy_fsr $dst, $src", []>;
-
-def copy_w:
-  Pseudo<(outs GPR:$dst), (ins GPR:$src), "copy_w $dst, $src", []>;
-
-class SAVE_FSR<string OpcStr>:
-  Pseudo<(outs), 
-         (ins FSR16:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), 
-         !strconcat(OpcStr, " $ptrlo, $offset"),
-         []>; 
- 
-def save_fsr0: SAVE_FSR<"save_fsr0">;
-def save_fsr1: SAVE_FSR<"save_fsr1">;
-
-class RESTORE_FSR<string OpcStr>:
-  Pseudo<(outs FSR16:$dst), 
-         (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), 
-         !strconcat(OpcStr, " $ptrlo, $offset"),
-         []>; 
-
-def restore_fsr0: RESTORE_FSR<"restore_fsr0">;
-def restore_fsr1: RESTORE_FSR<"restore_fsr1">;
-
-//--------------------------
-// Store to memory
-//-------------------------
-
-// Direct store.
-// Input operands are: val = W, ptrlo = GA, offset = offset, ptrhi = banksel.
-let mayStore = 1 in
-class MOVWF_INSN<bits<6> OpCode, SDNode OpNodeDest, SDNode Op>:
-  ByteFormat<0, (outs), 
-             (ins GPR:$val, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
-             "movwf ${ptrlo} + ${offset}",
-             [(Op GPR:$val, OpNodeDest:$ptrlo, (i8 imm:$ptrhi), 
-               (i8 imm:$offset))]>;
-
-// Store W to a Global Address.
-def movwf : MOVWF_INSN<0, tglobaladdr, PIC16Store>;
-
-// Store W to an External Symobol.
-def movwf_1 : MOVWF_INSN<0, texternalsym, PIC16Store>;
-
-// Store with InFlag and OutFlag
-// This is same as movwf_1 but has a flag. A flag is required to 
-// order the stores while passing the params to function.
-def movwf_2 : MOVWF_INSN<0, texternalsym, PIC16StWF>;
-
-// Indirect store. Matched via a DAG replacement pattern.
-def store_indirect : 
-  ByteFormat<0, (outs), 
-             (ins GPR:$val, FSR16:$fsr, i8imm:$offset),
-             "movwi $offset[$fsr]",
-             []>;
-
-//----------------------------
-// Load from memory
-//----------------------------
-// Direct load.
-// Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel.
-// Output: dst = W
-let Defs = [STATUS], mayLoad = 1 in
-class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>:
-  ByteFormat<0, (outs GPR:$dst), 
-             (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
-             "movf ${ptrlo} + ${offset}, W",
-             [(set GPR:$dst, 
-               (Op OpNodeSrc:$ptrlo, (i8 imm:$ptrhi),
-               (i8 imm:$offset)))]>;
-
-// Load from a GA.
-def movf : MOVF_INSN<0, tglobaladdr, PIC16Load>;
-
-// Load from an ES.
-def movf_1 : MOVF_INSN<0, texternalsym, PIC16Load>;
-def movf_1_1 : MOVF_INSN<0, texternalsym, PIC16LdArg>;
-
-// Load with InFlag and OutFlag
-// This is same as movf_1 but has a flag. A flag is required to 
-// order the loads while copying the return value of a function.
-def movf_2 : MOVF_INSN<0, texternalsym, PIC16LdWF>;
-
-// Indirect load. Matched via a DAG replacement pattern.
-def load_indirect : 
-  ByteFormat<0, (outs GPR:$dst), 
-             (ins FSR16:$fsr, i8imm:$offset),
-             "moviw $offset[$fsr]",
-             []>;
-
-//-------------------------
-// Bitwise operations patterns
-//--------------------------
-// W = W op [F]
-let Defs = [STATUS] in {
-def OrFW :  BinOpFW<0, "iorwf", or>;
-def XOrFW : BinOpFW<0, "xorwf", xor>;
-def AndFW : BinOpFW<0, "andwf", and>;
-
-// F = W op [F]
-def OrWF :  BinOpWF<0, "iorwf", or>;
-def XOrWF : BinOpWF<0, "xorwf", xor>;
-def AndWF : BinOpWF<0, "andwf", and>;
-
-//-------------------------
-// Various add/sub patterns.
-//-------------------------
-
-// W = W + [F]
-def addfw_1: BinOpFW<0, "addwf", add>;
-def addfw_2: BinOpFW<0, "addwf", addc>;
-
-let Uses = [STATUS] in
-def addfwc: BinOpFW<0, "addwfc", adde>;  // With Carry.
-
-// F = W + [F]
-def addwf_1: BinOpWF<0, "addwf", add>;
-def addwf_2: BinOpWF<0, "addwf", addc>;
-let Uses = [STATUS] in
-def addwfc: BinOpWF<0, "addwfc", adde>;  // With Carry.
-}
-
-// W -= [F] ; load from F and sub the value from W.
-let Constraints = "$src = $dst", mayLoad = 1 in
-class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
-  ByteFormat<OpCode, (outs GPR:$dst),
-             (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
-              !strconcat(OpcStr, " $ptrlo + $offset, W"),
-             [(set GPR:$dst, (OpNode (PIC16Load diraddr:$ptrlo,
-                                      (i8 imm:$ptrhi), (i8 imm:$offset)),
-                                      GPR:$src))]>;
-let Defs = [STATUS] in {
-def subfw_1: SUBFW<0, "subwf", sub>;
-def subfw_2: SUBFW<0, "subwf", subc>;
-
-let Uses = [STATUS] in
-def subfwb: SUBFW<0, "subwfb", sube>;  // With Borrow.
-
-}
-let Defs = [STATUS], isTerminator = 1 in
-def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>;
-
-// [F] -= W ; 
-let mayStore = 1 in
-class SUBWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
-  ByteFormat<OpCode, (outs),
-             (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
-              !strconcat(OpcStr, " $ptrlo + $offset"),
-             [(PIC16Store (OpNode (PIC16Load diraddr:$ptrlo,
-                                      (i8 imm:$ptrhi), (i8 imm:$offset)),
-                                      GPR:$src), diraddr:$ptrlo,
-                                      (i8 imm:$ptrhi), (i8 imm:$offset))]>;
-
-let Defs = [STATUS] in {
-def subwf_1: SUBWF<0, "subwf", sub>;
-def subwf_2: SUBWF<0, "subwf", subc>;
-
-let Uses = [STATUS] in
-  def subwfb: SUBWF<0, "subwfb", sube>;  // With Borrow.
-
-def subwf_cc: SUBWF<0, "subwf", PIC16Subcc>;
-}
-
-// addlw 
-let Defs = [STATUS] in {
-def addlw_1 : BinOpWL<0, "addlw", add>;
-def addlw_2 : BinOpWL<0, "addlw", addc>;
-
-let Uses = [STATUS] in
-def addlwc : BinOpWL<0, "addlwc", adde>; // With Carry. (Assembler macro).
-
-// bitwise operations involving a literal and w.
-def andlw : BinOpWL<0, "andlw", and>;
-def xorlw : BinOpWL<0, "xorlw", xor>;
-def orlw  : BinOpWL<0, "iorlw", or>;
-}
-
-// sublw 
-// W = C - W ; sub W from literal. (Without borrow).
-let Constraints = "$src = $dst" in
-class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
-  LiteralFormat<opcode, (outs GPR:$dst),
-                (ins GPR:$src, i8imm:$literal),
-                !strconcat(OpcStr, " $literal"),
-                [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>;
-// subwl 
-// W = W - C ; sub literal from W  (Without borrow).
-let Constraints = "$src = $dst" in
-class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
-  LiteralFormat<opcode, (outs GPR:$dst),
-                (ins GPR:$src, i8imm:$literal),
-                !strconcat(OpcStr, " $literal"),
-                [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>;
-
-let Defs = [STATUS] in {
-def sublw_1 : SUBLW<0, "sublw", sub>;
-def sublw_2 : SUBLW<0, "sublw", subc>;
-def sublw_3 : SUBLW<0, "sublwb", sube>; // With borrow (Assembler macro).
-
-def sublw_4 : SUBWL<0, "subwl", sub>;   // Assembler macro replace with addlw
-def sublw_5 : SUBWL<0, "subwl", subc>;  // Assembler macro replace with addlw
-def sublw_6 : SUBWL<0, "subwlb", sube>; // With borrow (Assembler macro).
-}
-let Defs = [STATUS], isTerminator = 1 in 
-def sublw_cc : SUBLW<0, "sublw", PIC16Subcc>;
-
-// Call instruction.
-let isCall = 1,
-    Defs = [W, FSR0, FSR1] in {
-    def CALL: LiteralFormat<0x1, (outs), (ins i8imm:$func),
-            //"call ${func} + 2",
-            "call ${func}",
-            [(PIC16call diraddr:$func)]>;
-}
-
-let isCall = 1,
-    Defs = [W, FSR0, FSR1] in {
-    def CALL_1: LiteralFormat<0x1, (outs), (ins GPR:$func, PCLATHR:$pc),
-            "callw",
-            [(PIC16call (PIC16Connect GPR:$func, PCLATHR:$pc))]>;
-}
-
-let isCall = 1,
-    Defs = [FSR0, FSR1] in {
-    def CALLW: LiteralFormat<0x1, (outs GPR:$dest), 
-                                  (ins GPR:$func, PCLATHR:$pc),
-            "callw",
-            [(set GPR:$dest, (PIC16callw (PIC16Connect GPR:$func, PCLATHR:$pc)))]>;
-}
-
-let Uses = [STATUS], isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
-def pic16brcond: ControlFormat<0x0, (outs), (ins brtarget:$dst, CCOp:$cc),
-                          "b$cc $dst",
-                          [(PIC16Brcond bb:$dst, imm:$cc)]>;
-
-// Unconditional branch.
-let isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
-def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst),
-                          "goto $dst",
-                          [(br bb:$dst)]>;
-
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
-// instruction selection into a branch sequence.
-let usesCustomInserter = 1 in {   // Expanded after instruction selection.
-  def SELECT_CC_Int_ICC
-   : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond),
-            "; SELECT_CC_Int_ICC PSEUDO!",
-            [(set GPR:$dst, (PIC16Selecticc GPR:$T, GPR:$F,
-                                             imm:$Cond))]>;
-}
-
-
-// Banksel.
-def banksel : 
-  Pseudo<(outs),
-         (ins i8mem:$ptr),
-         "banksel $ptr",
-         []>;
-
-def pagesel : 
-  Pseudo<(outs),
-         (ins i8mem:$ptr),
-         "movlp $ptr",
-         []>;
-
-
-// Return insn.
-let isTerminator = 1, isBarrier = 1, isReturn = 1 in
-def Return : 
-  ControlFormat<0, (outs), (ins), "return", [(PIC16ret)]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Replacment Patterns.
-//===----------------------------------------------------------------------===//
-
-// Identify an indirect store and select insns for it.
-def : Pat<(PIC16Store GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), 
-           imm:$offset),
-          (store_indirect GPR:$val, 
-           (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
-           imm:$offset)>;
-
-def : Pat<(PIC16StWF GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), 
-           imm:$offset),
-          (store_indirect GPR:$val, 
-           (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
-           imm:$offset)>;
-
-// Identify an indirect load and select insns for it.
-def : Pat<(PIC16Load (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), 
-           imm:$offset),
-          (load_indirect  (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
-           imm:$offset)>;
-
-def : Pat<(PIC16LdWF (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), 
-           imm:$offset),
-          (load_indirect  (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
-           imm:$offset)>;
-
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
deleted file mode 100644
index 1bcc4971ebb7..000000000000
--- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- PIC16MCAsmInfo.cpp - PIC16 asm properties -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the PIC16MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16MCAsmInfo.h"
-
-// FIXME: Layering violation to get enums and static function, should be moved
-// to separate headers.
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16ISelLowering.h"
-using namespace llvm;
-
-PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) {
-  CommentString = ";";
-  GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
-  GlobalDirective = "\tglobal\t";
-  ExternDirective = "\textern\t";
-
-  Data8bitsDirective = " db ";
-  Data16bitsDirective = " dw ";
-  Data32bitsDirective = " dl ";
-  Data64bitsDirective = NULL;
-  ZeroDirective = NULL;
-  AsciiDirective = " dt ";
-  AscizDirective = NULL;
-    
-  RomData8bitsDirective = " dw ";
-  RomData16bitsDirective = " rom_di ";
-  RomData32bitsDirective = " rom_dl ";
-  HasSetDirective = false;  
-    
-  // Set it to false because we weed to generate c file name and not bc file
-  // name.
-  HasSingleParameterDotFile = false;
-}
-
-const char *PIC16MCAsmInfo::getDataASDirective(unsigned Size,
-                                               unsigned AS) const {
-  if (AS != PIC16ISD::ROM_SPACE)
-    return 0;
-  
-  switch (Size) {
-  case  8: return RomData8bitsDirective;
-  case 16: return RomData16bitsDirective;
-  case 32: return RomData32bitsDirective;
-  default: return NULL;
-  }
-}
-
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h
deleted file mode 100644
index 6e1c111078ce..000000000000
--- a/lib/Target/PIC16/PIC16MCAsmInfo.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//=====-- PIC16MCAsmInfo.h - PIC16 asm properties -------------*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the PIC16MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16TARGETASMINFO_H
-#define PIC16TARGETASMINFO_H
-
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
-  class Target;
-  class StringRef;
-
-  class PIC16MCAsmInfo : public MCAsmInfo {
-    const char *RomData8bitsDirective;
-    const char *RomData16bitsDirective;
-    const char *RomData32bitsDirective;
-  public:    
-    PIC16MCAsmInfo(const Target &T, StringRef TT);
-    
-    virtual const char *getDataASDirective(unsigned size, unsigned AS) const;
-  };
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/lib/Target/PIC16/PIC16MachineFunctionInfo.h
deleted file mode 100644
index bdf50867f2e1..000000000000
--- a/lib/Target/PIC16/PIC16MachineFunctionInfo.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares PIC16-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16MACHINEFUNCTIONINFO_H
-#define PIC16MACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// PIC16MachineFunctionInfo - This class is derived from MachineFunction
-/// private PIC16 target-specific information for each MachineFunction.
-class PIC16MachineFunctionInfo : public MachineFunctionInfo {
-  // The frameindexes generated for spill/reload are stack based.
-  // This maps maintain zero based indexes for these FIs.
-  std::map<unsigned, unsigned> FiTmpOffsetMap;
-  unsigned TmpSize;
-
-  // These are the frames for return value and argument passing 
-  // These FrameIndices will be expanded to foo.frame external symbol
-  // and all others will be expanded to foo.tmp external symbol.
-  unsigned ReservedFrameCount;
-
-public:
-  PIC16MachineFunctionInfo()
-    : TmpSize(0), ReservedFrameCount(0) {}
-
-  explicit PIC16MachineFunctionInfo(MachineFunction &MF)
-    : TmpSize(0), ReservedFrameCount(0) {}
-
-  std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; }
-
-  unsigned getTmpSize() const { return TmpSize; }
-  void setTmpSize(unsigned Size) { TmpSize = Size; }
-
-  unsigned getReservedFrameCount() const { return ReservedFrameCount; }
-  void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
deleted file mode 100644
index b6aa38f765ea..000000000000
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-//===-- PIC16MemSelOpt.cpp - PIC16 banksel optimizer  --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the pass which optimizes the emitting of banksel 
-// instructions before accessing data memory. This currently works within
-// a basic block only and keep tracks of the last accessed memory bank.
-// If memory access continues to be in the same bank it just makes banksel
-// immediate, which is a part of the insn accessing the data memory, from 1
-// to zero. The asm printer emits a banksel only if that immediate is 1. 
-//
-// FIXME: this is not implemented yet.  The banksel pass only works on local
-// basic blocks.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-codegen"
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16InstrInfo.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16TargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/DerivedTypes.h"
-
-using namespace llvm;
-
-namespace {
-  struct MemSelOpt : public MachineFunctionPass {
-    static char ID;
-    MemSelOpt() : MachineFunctionPass(ID) {}
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreservedID(MachineLoopInfoID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual const char *getPassName() const { 
-      return "PIC16 Memsel Optimizer"; 
-    }
-
-   bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
-   bool processInstruction(MachineInstr *MI);
-
-  private:
-    const TargetInstrInfo *TII; // Machine instruction info.
-    MachineBasicBlock *MBB;     // Current basic block
-    std::string CurBank;
-    int PageChanged;
-
-  };
-  char MemSelOpt::ID = 0;
-}
-
-FunctionPass *llvm::createPIC16MemSelOptimizerPass() { 
-  return new MemSelOpt(); 
-}
-
-
-/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
-/// register references into FP stack references.
-///
-bool MemSelOpt::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  bool Changed = false;
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
-       I != E; ++I) {
-    Changed |= processBasicBlock(MF, *I);
-  }
-
-  return Changed;
-}
-
-/// processBasicBlock - Loop over all of the instructions in the basic block,
-/// transforming FP instructions into their stack form.
-///
-bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
-  bool Changed = false;
-  MBB = &BB;
-
-  // Let us assume that when entering a basic block now bank is selected.
-  // Ideally we should look at the predecessors for this information.
-  CurBank=""; 
-  PageChanged=0;
-
-  MachineBasicBlock::iterator I;
-  for (I = BB.begin(); I != BB.end(); ++I) {
-    Changed |= processInstruction(I);
-
-    // if the page has changed insert a page sel before 
-    // any instruction that needs one
-    if (PageChanged == 1)
-    {
-      // Restore the page if it was changed, before leaving the basic block,
-      // because it may be required by the goto terminator or the fall thru
-      // basic blcok.
-      // If the terminator is return, we don't need to restore since there
-      // is no goto or fall thru basic block.
-      if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto
-          (I->getOpcode() == PIC16::sublw_6) || //macro has goto
-          (I->getOpcode() == PIC16::addlwc)  || //macro has goto
-          (TII->get(I->getOpcode()).isBranch()))
-      {
-        DebugLoc dl = I->getDebugLoc();
-        BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
-        Changed = true;
-        PageChanged = 0;            
-      }
-    }
-  }
-
-   // The basic block is over, but if we did not find any goto yet,
-   // we haven't restored the page.
-   // Restore the page if it was changed, before leaving the basic block,
-   // because it may be required by fall thru basic blcok.
-   // If the terminator is return, we don't need to restore since there
-   // is fall thru basic block.
-   if (PageChanged == 1) {
-      // save the end pointer before we move back to last insn.
-     MachineBasicBlock::iterator J = I;
-     I--;
-     const TargetInstrDesc &TID = TII->get(I->getOpcode());
-     if (! TID.isReturn())
-     {
-       DebugLoc dl = I->getDebugLoc();
-       BuildMI(*MBB, J, dl, 
-               TII->get(PIC16::pagesel)).addExternalSymbol("$");
-       Changed = true;
-       PageChanged = 0;
-     }
-   }
-
-
-  return Changed;
-}
-
-bool MemSelOpt::processInstruction(MachineInstr *MI) {
-  bool Changed = false;
-
-  unsigned NumOperands = MI->getNumOperands();
-  if (NumOperands == 0) return false;
-
-
-  // If this insn is not going to access any memory, return.
-  const TargetInstrDesc &TID = TII->get(MI->getOpcode());
-  if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
-    return false;
-
-  // The first thing we should do is that record if banksel/pagesel are
-  // changed in an unknown way. This can happend via any type of call. 
-  // We do it here first before scanning of MemOp / BBOp as the indirect
-  // call insns do not have any operands, but they still may change bank/page.
-  if (TID.isCall()) {
-    // Record that we have changed the page, so that we can restore it
-    // before basic block ends.
-    // We require to signal that a page anc bank change happened even for
-    // indirect calls. 
-    PageChanged = 1;
-
-    // When a call is made, there may be banksel for variables in callee.
-    // Hence the banksel in caller needs to be reset.
-    CurBank = "";
-  }
-
-  // Scan for the memory address operand.
-  // FIXME: Should we use standard interfaces like memoperands_iterator,
-  // hasMemOperand() etc ?
-  int MemOpPos = -1;
-  int BBOpPos = -1;
-  for (unsigned i = 0; i < NumOperands; i++) {
-    MachineOperand Op = MI->getOperand(i);
-    if (Op.getType() ==  MachineOperand::MO_GlobalAddress ||
-        Op.getType() ==  MachineOperand::MO_ExternalSymbol) { 
-      // We found one mem operand. Next one may be BS.
-      MemOpPos = i;
-    }
-    if (Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
-      // We found one BB operand. Next one may be pagesel.
-      BBOpPos = i;
-    }
-  }
-
-  // If we did not find an insn accessing memory. Continue.
-  if ((MemOpPos == -1) &&
-      (BBOpPos == -1))
-    return false;
-  assert ((BBOpPos != MemOpPos) && "operand can only be of one type");
- 
-
-  // If this is a pagesel material, handle it first.
-  // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp.
-  // Pagesel is required only for a direct call.
-  if ((MI->getOpcode() == PIC16::CALL)) {
-    // Get the BBOp.
-    MachineOperand &MemOp = MI->getOperand(MemOpPos);
-    DebugLoc dl = MI->getDebugLoc();
-    BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp);   
-
-    // CALL and br_ucond needs only pagesel. so we are done.
-    return true; 
-  }
-
-  // Pagesel is handled. Now, add a Banksel if needed.
-  if (MemOpPos == -1) return Changed;
-  // Get the MemOp.
-  MachineOperand &Op = MI->getOperand(MemOpPos);
-
-  // Get the section name(NewBank) for MemOp.
-  // This assumes that the section names for globals are already set by
-  // AsmPrinter->doInitialization.
-  std::string NewBank = CurBank;
-  bool hasExternalLinkage = false;
-  if (Op.getType() ==  MachineOperand::MO_GlobalAddress &&
-      Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
-    if (Op.getGlobal()->hasExternalLinkage())
-      hasExternalLinkage= true;
-    NewBank = Op.getGlobal()->getSection();
-  } else if (Op.getType() ==  MachineOperand::MO_ExternalSymbol) {
-    // External Symbol is generated for temp data and arguments. They are
-    // in fpdata.<functionname>.# section.
-    std::string Sym = Op.getSymbolName();
-    NewBank = PAN::getSectionNameForSym(Sym);
-  }
-
-  // If the section is shared section, do not emit banksel.
-  if (NewBank == PAN::getSharedUDataSectionName())
-    return Changed;
-
-  // If the previous and new section names are same, we don't need to
-  // emit banksel. 
-  if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) {
-    DebugLoc dl = MI->getDebugLoc();
-    BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
-      addOperand(Op);
-    Changed = true;
-    CurBank = NewBank;
-  }
-
-  return Changed;
-}
-
diff --git a/lib/Target/PIC16/PIC16Passes/Makefile b/lib/Target/PIC16/PIC16Passes/Makefile
deleted file mode 100644
index 9684b8d2cae4..000000000000
--- a/lib/Target/PIC16/PIC16Passes/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/PIC16/PIC16Passes/Makefile -----------*- Makefile -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source 
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-TARGET = PIC16
-LIBRARYNAME = LLVMpic16passes
-BUILD_ARCHIVE = 1
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
deleted file mode 100644
index 56f021157092..000000000000
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to clone all functions that are shared between
-// the main line code (ML) and interrupt line code (IL). It clones all such
-// shared functions and their automatic global vars by adding the .IL suffix. 
-//
-// This pass is supposed to be run on the linked .bc module.
-// It traveses the module call graph twice. Once starting from the main function
-// and marking each reached function as "ML". Again, starting from the ISR
-// and cloning any reachable function that was marked as "ML". After cloning
-// the function, it remaps all the call sites in IL functions to call the
-// cloned functions. 
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "PIC16Cloner.h"
-#include "../PIC16ABINames.h"
-#include <vector>
-
-using namespace llvm;
-using std::vector;
-using std::string;
-using std::map;
-
-namespace llvm {
-  char PIC16Cloner::ID = 0;
-
-  ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); }
-}
-
-// We currently intend to run these passes in opt, which does not have any
-// diagnostic support. So use these functions for now. In future
-// we will probably write our own driver tool.
-//
-void PIC16Cloner::reportError(string ErrorString) {
-  errs() << "ERROR : " << ErrorString << "\n";
-  exit(1);
-}
-
-void PIC16Cloner::
-reportError (string ErrorString, vector<string> &Values) {
-  unsigned ValCount = Values.size();
-  string TargetString;
-  for (unsigned i=0; i<ValCount; ++i) {
-    TargetString = "%";
-    TargetString += ((char)i + '0');
-    ErrorString.replace(ErrorString.find(TargetString), TargetString.length(), 
-                        Values[i]);
-  }
-  errs() << "ERROR : " << ErrorString << "\n";
-  exit(1);
-}
-
-
-// Entry point
-//
-bool PIC16Cloner::runOnModule(Module &M) {
-   CallGraph &CG = getAnalysis<CallGraph>();
-
-   // Search for the "main" and "ISR" functions.
-   CallGraphNode *mainCGN = NULL, *isrCGN = NULL;
-   for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++)
-   {
-     // External calling node doesn't have any function associated with it.
-     if (! it->first)
-       continue;
-     
-     if (it->first->getName().str() == "main") {
-       mainCGN = it->second;
-     }
-
-     if (PAN::isISR(it->first->getSection())) {
-       isrCGN = it->second;
-     }
- 
-     // Don't search further if we've found both.
-     if (mainCGN && isrCGN)
-       break;
-   }
-
-   // We have nothing to do if any of the main or ISR is missing.
-   if (! mainCGN || ! isrCGN) return false;
-       
-   // Time for some diagnostics.
-   // See if the main itself is interrupt function then report an error.
-   if (PAN::isISR(mainCGN->getFunction()->getSection())) {
-     reportError("Function 'main' can't be interrupt function");
-   }
-
-    
-   // Mark all reachable functions from main as ML.
-   markCallGraph(mainCGN, "ML");
-
-   // And then all the functions reachable from ISR will be cloned.
-   cloneSharedFunctions(isrCGN);
-
-   return true;
-}
-
-// Mark all reachable functions from the given node, with the given mark.
-//
-void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
-  // Mark the top node first.
-  Function *thisF = CGN->getFunction();
-
-  thisF->setSection(StringMark);
-
-  // Mark all the called functions
-  for(CallGraphNode::iterator cgn_it = CGN->begin();
-              cgn_it != CGN->end(); ++cgn_it) {
-     Function *CalledF = cgn_it->second->getFunction();
-
-     // If calling an external function then CallGraphNode
-     // will not be associated with any function.
-     if (! CalledF)
-       continue;
-  
-     // Issue diagnostic if interrupt function is being called.
-     if (PAN::isISR(CalledF->getSection())) {
-       vector<string> Values;
-       Values.push_back(CalledF->getName().str());
-       reportError("Interrupt function (%0) can't be called", Values); 
-     }
-
-     // Has already been mark 
-     if (CalledF->getSection().find(StringMark) != string::npos) {
-       // Should we do anything here?
-     } else {
-       // Mark now
-       CalledF->setSection(StringMark);
-     }
-
-     // Before going any further mark all the called function by current
-     // function.
-     markCallGraph(cgn_it->second ,StringMark);
-  } // end of loop of all called functions.
-}
-
-
-// For PIC16, automatic variables of a function are emitted as globals.
-// Clone the auto variables of a function  and put them in VMap, 
-// this VMap will be used while
-// Cloning the code of function itself.
-//
-void PIC16Cloner::CloneAutos(Function *F) {
-  // We'll need to update module's globals list as well. So keep a reference
-  // handy.
-  Module *M = F->getParent();
-  Module::GlobalListType &Globals = M->getGlobalList();
-
-  // Clear the leftovers in VMap by any previous cloning.
-  VMap.clear();
-
-  // Find the auto globls for this function and clone them, and put them
-  // in VMap.
-  std::string FnName = F->getName().str();
-  std::string VarName, ClonedVarName;
-  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I) {
-    VarName = I->getName().str();
-    if (PAN::isLocalToFunc(FnName, VarName)) {
-      // Auto variable for current function found. Clone it.
-      const GlobalVariable *GV = I;
-
-      const Type *InitTy = GV->getInitializer()->getType();
-      GlobalVariable *ClonedGV = 
-        new GlobalVariable(InitTy, false, GV->getLinkage(), 
-                           GV->getInitializer());
-      ClonedGV->setName(PAN::getCloneVarName(FnName, VarName));
-      // Add these new globals to module's globals list.
-      Globals.push_back(ClonedGV);
- 
-      // Update VMap.
-      VMap[GV] = ClonedGV;
-     }
-  }
-}
-
-
-// Clone all functions that are reachable from ISR and are already 
-// marked as ML.
-//
-void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
-
-  // Check all the called functions from ISR.
-  for(CallGraphNode::iterator cgn_it = CGN->begin(); 
-              cgn_it != CGN->end(); ++cgn_it) {
-     Function *CalledF = cgn_it->second->getFunction();
-
-     // If calling an external function then CallGraphNode
-     // will not be associated with any function.
-     if (!CalledF)
-       continue;
-  
-     // Issue diagnostic if interrupt function is being called.
-     if (PAN::isISR(CalledF->getSection())) {
-       vector<string> Values;
-       Values.push_back(CalledF->getName().str());
-       reportError("Interrupt function (%0) can't be called", Values); 
-     }
-
-     if (CalledF->getSection().find("ML") != string::npos) {
-       // Function is alternatively marked. It should be a shared one.
-       // Create IL copy. Passing called function as first argument
-       // and the caller as the second argument.
-
-       // Before making IL copy, first ensure that this function has a 
-       // body. If the function does have a body. It can't be cloned.
-       // Such a case may occur when the function has been declarated
-       // in the C source code but its body exists in assembly file.
-       if (!CalledF->isDeclaration()) {
-         Function *cf = cloneFunction(CalledF);
-         remapAllSites(CGN->getFunction(), CalledF, cf);
-       }  else {
-         // It is called only from ISR. Still mark it as we need this info
-         // in code gen while calling intrinsics.Function is not marked.
-         CalledF->setSection("IL");
-       }
-     }
-     // Before going any further clone all the shared function reachaable 
-     // by current function.
-     cloneSharedFunctions(cgn_it->second);
-  } // end of loop of all called functions.
-}
-
-// Clone the given function and return it.
-// Note: it uses the VMap member of the class, which is already populated
-// by cloneAutos by the time we reach here. 
-// FIXME: Should we just pass VMap's ref as a parameter here? rather
-// than keeping the VMap as a member.
-Function *
-PIC16Cloner::cloneFunction(Function *OrgF) {
-   Function *ClonedF;
-
-   // See if we already cloned it. Return that. 
-   cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF);
-   if(cm_it != ClonedFunctionMap.end()) {
-     ClonedF = cm_it->second;
-     return ClonedF;
-   }
-
-   // Clone does not exist. 
-   // First clone the autos, and populate VMap.
-   CloneAutos(OrgF);
-
-   // Now create the clone.
-   ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false);
-
-   // The new function should be for interrupt line. Therefore should have 
-   // the name suffixed with IL and section attribute marked with IL. 
-   ClonedF->setName(PAN::getCloneFnName(OrgF->getName()));
-   ClonedF->setSection("IL");
-
-   // Add the newly created function to the module.
-   OrgF->getParent()->getFunctionList().push_back(ClonedF);
-
-   // Update the ClonedFunctionMap to record this cloning activity.
-   ClonedFunctionMap[OrgF] = ClonedF;
-
-   return ClonedF;
-}
-
-
-// Remap the call sites of shared functions, that are in IL.
-// Change the IL call site of a shared function to its clone.
-//
-void PIC16Cloner::
-remapAllSites(Function *Caller, Function *OrgF, Function *Clone) {
-  // First find the caller to update. If the caller itself is cloned
-  // then use the cloned caller. Otherwise use it.
-  cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller);
-  if (cm_it != ClonedFunctionMap.end())
-    Caller = cm_it->second;
-
-  // For the lack of a better call site finding mechanism, iterate over 
-  // all insns to find the uses of original fn.
-  for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) {
-    BasicBlock &BB = *BI;
-    for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
-      if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF)
-          II->setOperand(0, Clone);
-    }
-  }
-}
-
-
-
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
deleted file mode 100644
index e7d67ce09629..000000000000
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ /dev/null
@@ -1,83 +0,0 @@
-//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains declaration of a cloner class clone all functions that 
-// are shared between the main line code (ML) and interrupt line code (IL).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16CLONER_H
-#define PIC16CLONER_H
-
-#include "llvm/ADT/ValueMap.h"
-
-using namespace llvm;
-using std::vector;
-using std::string;
-using std::map;
-
-namespace llvm {
-  // forward classes.
-  class Value;
-  class Function;
-  class Module;
-  class ModulePass;
-  class CallGraph;
-  class CallGraphNode;
-  class AnalysisUsage;
-
-  class PIC16Cloner : public ModulePass { 
-  public:
-    static char ID; // Class identification 
-    PIC16Cloner() : ModulePass(ID)  {}
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<CallGraph>();
-    }
-    virtual bool runOnModule(Module &M);
-
-  private: // Functions
-    // Mark reachable functions for the MainLine or InterruptLine.
-    void markCallGraph(CallGraphNode *CGN, string StringMark);
-
-    // Clone auto variables of function specified.
-    void CloneAutos(Function *F);
-   
-    // Clone the body of a function.
-    Function *cloneFunction(Function *F);
-
-    // Clone all shared functions.
-    void cloneSharedFunctions(CallGraphNode *isrCGN);
-
-    // Remap all call sites to the shared function.
-    void remapAllSites(Function *Caller, Function *OrgF, Function *Clone);
-
-    // Error reporting for PIC16Pass
-    void reportError(string ErrorString, vector<string> &Values);
-    void reportError(string ErrorString);
-
-  private:  //data
-    // Records if the interrupt function has already been found.
-    // If more than one interrupt function is found then an error
-    // should be thrown.
-    bool foundISR;
-
-    // This ValueMap maps the auto variables of the original functions with
-    // the corresponding cloned auto variable of the cloned function. 
-    // This value map is passed during the function cloning so that all the
-    // uses of auto variables be updated properly. 
-    ValueMap<const Value*, Value*> VMap;
-
-    // Map of a already cloned functions. 
-    map<Function *, Function *> ClonedFunctionMap;
-    typedef map<Function *, Function *>::iterator cloned_map_iterator;
-  };
-}  // End of anonymous namespace
-
-#endif
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
deleted file mode 100644
index 0f8928a4b5f5..000000000000
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===-- PIC16Overlay.cpp - Implementation for PIC16 Frame Overlay===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 Frame Overlay implementation.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Value.h"
-#include "PIC16Overlay.h"
-#include "llvm/Function.h"
-#include <cstdlib>
-#include <sstream>
-using namespace llvm;
-
-namespace llvm {
-  char PIC16Overlay::ID = 0;
-  ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); }
-}
-
-void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<CallGraph>();
-}
-
-void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
-  // Do not set any color for external calling node.
-  if (Depth != 0 && CGN->getFunction()) {
-    unsigned Color = getColor(CGN->getFunction());
-
-    // Handle indirectly called functions
-    if (Color >= PIC16OVERLAY::StartIndirectCallColor || 
-        Depth >= PIC16OVERLAY::StartIndirectCallColor) {
-      // All functions called from an indirectly called function are given
-      // an unique color.
-      if (Color < PIC16OVERLAY::StartIndirectCallColor &&
-          Depth >= PIC16OVERLAY::StartIndirectCallColor)
-        setColor(CGN->getFunction(), Depth);
-
-      for (unsigned int i = 0; i < CGN->size(); i++)
-        DFSTraverse((*CGN)[i], ++IndirectCallColor);
-      return;
-    }
-    // Just return if the node already has a color greater than the current 
-    // depth. A node must be colored with the maximum depth that it has.
-    if (Color >= Depth)
-      return;
-    
-    Depth = ModifyDepthForInterrupt(CGN, Depth);  
-    setColor(CGN->getFunction(), Depth);
-  }
-  
-  // Color all children of this node with color depth+1.
-  for (unsigned int i = 0; i < CGN->size(); i++)
-    DFSTraverse((*CGN)[i], Depth+1);
-}
-
-unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
-                                                    unsigned Depth) {
-  Function *Fn = CGN->getFunction();
-
-  // Return original Depth if function or section for function do not exist.
-  if (!Fn || !Fn->hasSection())
-    return Depth;
-
-  // Return original Depth if this function is not marked as interrupt.
-  if (Fn->getSection().find("interrupt") == string::npos)
-    return Depth;
-
-  Depth = Depth + InterruptDepth;
-  return Depth;
-}
-
-void PIC16Overlay::setColor(Function *Fn, unsigned Color) {
-  std::string Section = "";
-  if (Fn->hasSection())
-    Section = Fn->getSection();
-
-  size_t Pos = Section.find(OverlayStr);
-
-  // Convert Color to string.
-  std::stringstream ss;
-  ss << Color;
-  std::string ColorString = ss.str();
-
-  // If color is already set then reset it with the new value. Else append 
-  // the Color string to section.
-  if (Pos != std::string::npos) {
-    Pos += OverlayStr.length();
-    char c = Section.at(Pos);
-    unsigned OldColorLength = 0;  
-    while (c >= '0' && c<= '9') {
-      OldColorLength++;    
-      if (Pos < Section.length() - 1)
-        Pos++;
-      else
-        break;
-      c = Section.at(Pos);
-    }
-    // Replace old color with new one.
-    Section.replace(Pos-OldColorLength +1, OldColorLength, ColorString); 
-  }
-  else {
-    // Append Color information to section string.
-    if (Fn->hasSection())
-      Section.append(" ");
-    Section.append(OverlayStr + ColorString);
-  }
-  Fn->setSection(Section);
-}
-
-unsigned PIC16Overlay::getColor(Function *Fn) {
-  int Color = 0;
-  if (!Fn->hasSection())
-    return 0;
-
-  std::string Section = Fn->getSection();
-  size_t Pos = Section.find(OverlayStr);
-  
-  // Return 0 if Color is not set.
-  if (Pos == std::string::npos)
-    return 0;
-
-  // Set Pos to after "Overlay=".
-  Pos += OverlayStr.length();
-  char c = Section.at(Pos);
-  std::string ColorString = "";
-
-  // Find the string representing Color. A Color can only consist of digits.
-  while (c >= '0' && c<= '9') { 
-    ColorString.append(1,c);
-    if (Pos < Section.length() - 1)
-      Pos++;
-    else
-      break;
-    c = Section.at(Pos);
-  }
-  Color = atoi(ColorString.c_str());
-  
-  return Color;    
-}
-
-bool PIC16Overlay::runOnModule(Module &M) {
-  CallGraph &CG = getAnalysis<CallGraph>();
-  CallGraphNode *ECN = CG.getExternalCallingNode();
-
-  MarkIndirectlyCalledFunctions(M); 
-  // Since External Calling Node is the base function, do a depth first 
-  // traversal of CallGraph with ECN as root. Each node with be marked with 
-  // a color that is max(color(callers)) + 1.
-  if(ECN) {
-    DFSTraverse(ECN, 0);
-  }
-  return false;
-}
-
-void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
-  // If the use of a function is not a call instruction then this
-  // function might be called indirectly. In that case give it
-  // an unique color.
-  for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) {
-    for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E;
-         ++I) {
-      User *U = *I;
-      if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
-          || !CallSite(cast<Instruction>(U)).isCallee(I)) {
-        setColor(MI, ++IndirectCallColor);
-        break;
-      }
-    }
-  }
-}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
deleted file mode 100644
index 2f611e65de1f..000000000000
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 Overlay infrastructure.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16FRAMEOVERLAY_H
-#define PIC16FRAMEOVERLAY_H
- 
-
-using std::string;
-using namespace llvm;
-
-namespace  llvm {
-  // Forward declarations.
-  class Function;
-  class Module;
-  class ModulePass;
-  class AnalysisUsage;
-  class CallGraphNode;
-  class CallGraph;
-
-  namespace PIC16OVERLAY {
-    enum OverlayConsts {
-      StartInterruptColor = 200,
-      StartIndirectCallColor = 300
-    }; 
-  }
-  class PIC16Overlay : public ModulePass {
-    std::string OverlayStr;
-    unsigned InterruptDepth;
-    unsigned IndirectCallColor;
-  public:
-    static char ID; // Class identification 
-    PIC16Overlay() : ModulePass(ID) {
-      OverlayStr = "Overlay=";
-      InterruptDepth = PIC16OVERLAY::StartInterruptColor;
-      IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const; 
-    virtual bool runOnModule(Module &M);
-
-  private: 
-    unsigned getColor(Function *Fn);
-    void setColor(Function *Fn, unsigned Color);
-    unsigned ModifyDepthForInterrupt(CallGraphNode *CGN, unsigned Depth);
-    void MarkIndirectlyCalledFunctions(Module &M);
-    void DFSTraverse(CallGraphNode *CGN, unsigned Depth);
-  };
-}  // End of  namespace
-
-#endif
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
deleted file mode 100644
index 76de47fdf0f4..000000000000
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-//===- PIC16RegisterInfo.cpp - PIC16 Register Information -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-reg-info"
-
-#include "PIC16.h"
-#include "PIC16RegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-PIC16RegisterInfo::PIC16RegisterInfo(const TargetInstrInfo &tii,
-                                     const PIC16Subtarget &st)
-  : PIC16GenRegisterInfo(PIC16::ADJCALLSTACKDOWN, PIC16::ADJCALLSTACKUP),
-    TII(tii),
-    ST(st) {}
-
-#include "PIC16GenRegisterInfo.inc"
-
-/// PIC16 Callee Saved Registers
-const unsigned* PIC16RegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
-  static const unsigned CalleeSavedRegs[] = { 0 };
-  return CalleeSavedRegs;
-}
-
-BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  BitVector Reserved(getNumRegs());
-  return Reserved;
-}
-
-bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
-  return false;
-}
-
-void PIC16RegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                    RegScavenger *RS) const
-{ /* NOT YET IMPLEMENTED */ }
-
-void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
-{    /* NOT YET IMPLEMENTED */  }
-
-void PIC16RegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{    /* NOT YET IMPLEMENTED */  }
-
-int PIC16RegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
-  llvm_unreachable("Not keeping track of debug information yet!!");
-  return -1;
-}
-
-unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  llvm_unreachable("PIC16 Does not have any frame register");
-  return 0;
-}
-
-unsigned PIC16RegisterInfo::getRARegister() const {
-  llvm_unreachable("PIC16 Does not have any return address register");
-  return 0;
-}
-
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void PIC16RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  // Simply discard ADJCALLSTACKDOWN,
-  // ADJCALLSTACKUP instructions.
-  MBB.erase(I);
-}
-
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
deleted file mode 100644
index 20052b003442..000000000000
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ /dev/null
@@ -1,64 +0,0 @@
-//===- PIC16RegisterInfo.h - PIC16 Register Information Impl ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16REGISTERINFO_H
-#define PIC16REGISTERINFO_H
-
-#include "PIC16GenRegisterInfo.h.inc"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
-
-// Forward Declarations.
-  class PIC16Subtarget;
-  class TargetInstrInfo;
-
-class PIC16RegisterInfo : public PIC16GenRegisterInfo {
-  private:
-    const TargetInstrInfo &TII;
-    const PIC16Subtarget &ST;
-  
-  public:
-    PIC16RegisterInfo(const TargetInstrInfo &tii, 
-                      const PIC16Subtarget &st);
-
-
-  //------------------------------------------------------
-  // Pure virtual functions from TargetRegisterInfo
-  //------------------------------------------------------
-
-  // PIC16 callee saved registers
-  virtual const unsigned* 
-  getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
-  virtual BitVector getReservedRegs(const MachineFunction &MF) const;
-  virtual bool hasFP(const MachineFunction &MF) const;
-
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                                   int SPAdj, RegScavenger *RS=NULL) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  virtual void emitPrologue(MachineFunction &MF) const;
-  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
-  virtual unsigned getFrameRegister(const MachineFunction &MF) const;
-  virtual unsigned getRARegister() const;
-
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.td b/lib/Target/PIC16/PIC16RegisterInfo.td
deleted file mode 100644
index 2959d912ec32..000000000000
--- a/lib/Target/PIC16/PIC16RegisterInfo.td
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- PIC16RegisterInfo.td - PIC16 Register defs ------------*- tblgen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//  Declarations that describe the PIC16 register file
-//===----------------------------------------------------------------------===//
-
-class PIC16Reg<string n> : Register<n> {
-  let Namespace = "PIC16";
-}
-
-// PIC16 Registers.
-def W   : PIC16Reg<"W">;
-def FSR0   : PIC16Reg<"FSR0">;
-def FSR1   : PIC16Reg<"FSR1">;
-def BS     : PIC16Reg<"BS">;
-def PCLATH : PIC16Reg<"PCLATH">;
-
-def STATUS : PIC16Reg<"STATUS">;
-
-// PIC16 Register classes.
-def GPR     : RegisterClass<"PIC16", [i8],  8, [W]>;
-def FSR16   : RegisterClass<"PIC16", [i16], 8, [FSR0, FSR1]>;
-def BSR     : RegisterClass<"PIC16", [i8],  8, [BS]>;
-def PCLATHR : RegisterClass<"PIC16", [i8],  8, [PCLATH]>;
-def STATUSR : RegisterClass<"PIC16", [i8],  8, [STATUS]>;
-
diff --git a/lib/Target/PIC16/PIC16Section.cpp b/lib/Target/PIC16/PIC16Section.cpp
deleted file mode 100644
index 2505b111f1e8..000000000000
--- a/lib/Target/PIC16/PIC16Section.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- PIC16Section.cpp - PIC16 Section ----------- --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16Section.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-
-// This is the only way to create a PIC16Section. Sections created here
-// do not need to be explicitly deleted as they are managed by auto_ptrs.
-PIC16Section *PIC16Section::Create(StringRef Name, PIC16SectionType Ty,
-                                   StringRef Address, int Color,
-                                   MCContext &Ctx) {
-
-  /// Determine the internal SectionKind info.
-  /// Users of PIC16Section class should not need to know the internal
-  /// SectionKind. They should work only with PIC16SectionType.
-  ///
-  /// PIC16 Terminology for section kinds is as below.
-  /// UDATA - BSS
-  /// IDATA - initialized data (equiv to Metadata) 
-  /// ROMDATA - ReadOnly.
-  /// UDATA_OVR - Sections that can be overlaid. Section of such type is
-  ///             used to contain function autos an frame. We can think of
-  ///             it as equiv to llvm ThreadBSS)
-  /// UDATA_SHR - Shared RAM. Memory area that is mapped to all banks.
-
-  SectionKind K;
-  switch (Ty) {
-    default: llvm_unreachable ("can not create unknown section type");
-    case UDATA_OVR: {
-      K = SectionKind::getThreadBSS();
-      break;
-    }
-    case UDATA_SHR:
-    case UDATA: {
-      K = SectionKind::getBSS();
-      break;
-    }
-    case ROMDATA:
-    case IDATA: {
-      K = SectionKind::getMetadata();
-      break;
-    }
-    case CODE: {
-      K = SectionKind::getText();
-      break;
-    }
-      
-  }
-
-  // Copy strings into context allocated memory so they get free'd when the
-  // context is destroyed.
-  char *NameCopy = static_cast<char*>(Ctx.Allocate(Name.size(), 1));
-  memcpy(NameCopy, Name.data(), Name.size());
-  char *AddressCopy = static_cast<char*>(Ctx.Allocate(Address.size(), 1));
-  memcpy(AddressCopy, Address.data(), Address.size());
-
-  // Create the Section.
-  PIC16Section *S =
-    new (Ctx) PIC16Section(StringRef(NameCopy, Name.size()), K,
-                           StringRef(AddressCopy, Address.size()), Color);
-  S->T = Ty;
-  return S;
-}
-
-// A generic way to print all types of sections.
-void PIC16Section::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                          raw_ostream &OS) const {
- 
-  // If the section is overlaid(i.e. it has a color), print overlay name for 
-  // it. Otherwise print its normal name.
-  if (Color != -1)
-    OS << PAN::getOverlayName(getName(), Color) << '\t';
-  else
-    OS << getName() << '\t';
-
-  // Print type.
-  switch (getType()) {
-  default : llvm_unreachable ("unknown section type"); 
-  case UDATA: OS << "UDATA"; break;
-  case IDATA: OS << "IDATA"; break;
-  case ROMDATA: OS << "ROMDATA"; break;
-  case UDATA_SHR: OS << "UDATA_SHR"; break;
-  case UDATA_OVR: OS << "UDATA_OVR"; break;
-  case CODE: OS << "CODE"; break;
-  }
-
-  OS << '\t';
-
-  // Print Address.
-  OS << Address;
-
-  OS << '\n';
-}
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
deleted file mode 100644
index 5b33b51a3866..000000000000
--- a/lib/Target/PIC16/PIC16Section.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- PIC16Section.h - PIC16-specific section representation -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PIC16Section class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_PIC16SECTION_H
-#define LLVM_PIC16SECTION_H
-
-#include "llvm/MC/MCSection.h"
-#include "llvm/GlobalVariable.h"
-#include <vector>
-
-namespace llvm {
-  /// PIC16Section - Represents a physical section in PIC16 COFF.
-  /// Contains data objects.
-  ///
-  class PIC16Section : public MCSection {
-    /// PIC16 Sections does not really use the SectionKind class to
-    /// to distinguish between various types of sections. PIC16 maintain
-    /// its own Section Type info. See the PIC16SectionType enum in PIC16.h 
-    /// for various section types.
-    PIC16SectionType T;
-
-    /// Name of the section to uniquely identify it.
-    StringRef Name;
-
-    /// User can specify an address at which a section should be placed. 
-    /// Negative value here means user hasn't specified any. 
-    StringRef Address; 
-
-    /// Overlay information - Sections with same color can be overlaid on
-    /// one another.
-    int Color; 
-
-    /// Total size of all data objects contained here.
-    unsigned Size;
-    
-    PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
-      : MCSection(SV_PIC16, K), Name(name), Address(addr),
-        Color(color), Size(0) {
-    }
-    
-  public:
-    /// Return the name of the section.
-    StringRef getName() const { return Name; }
-
-    /// Return the Address of the section.
-    StringRef getAddress() const { return Address; }
-
-    /// Return the Color of the section.
-    int getColor() const { return Color; }
-    void setColor(int color) { Color = color; }
-
-    /// Return the size of the section.
-    unsigned getSize() const { return Size; }
-    void setSize(unsigned size) { Size = size; }
-
-    /// Conatined data objects.
-    // FIXME: This vector is leaked because sections are allocated with a
-    //        BumpPtrAllocator.
-    std::vector<const GlobalVariable *>Items;
-
-    /// Check section type. 
-    bool isUDATA_Type() const { return T == UDATA; }
-    bool isIDATA_Type() const { return T == IDATA; }
-    bool isROMDATA_Type() const { return T == ROMDATA; }
-    bool isUDATA_OVR_Type() const { return T == UDATA_OVR; }
-    bool isUDATA_SHR_Type() const { return T == UDATA_SHR; }
-    bool isCODE_Type() const { return T == CODE; }
-
-    PIC16SectionType getType() const { return T; }
-
-    /// This would be the only way to create a section. 
-    static PIC16Section *Create(StringRef Name, PIC16SectionType Ty, 
-                                StringRef Address, int Color, 
-                                MCContext &Ctx);
-    
-    /// Override this as PIC16 has its own way of printing switching
-    /// to a section.
-    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const;
-
-    static bool classof(const MCSection *S) {
-      return S->getVariant() == SV_PIC16;
-    }
-    static bool classof(const PIC16Section *) { return true; }
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
deleted file mode 100644
index 995955a0c897..000000000000
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PIC16SelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-selectiondag-info"
-#include "PIC16TargetMachine.h"
-using namespace llvm;
-
-PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM)
-  : TargetSelectionDAGInfo(TM) {
-}
-
-PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
-}
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
deleted file mode 100644
index c67fd8bfce24..000000000000
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PIC16 subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16SELECTIONDAGINFO_H
-#define PIC16SELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class PIC16TargetMachine;
-
-class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM);
-  ~PIC16SelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp
deleted file mode 100644
index 33fc3fb16994..000000000000
--- a/lib/Target/PIC16/PIC16Subtarget.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- PIC16Subtarget.cpp - PIC16 Subtarget Information -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PIC16 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16Subtarget.h"
-#include "PIC16GenSubtarget.inc"
-
-using namespace llvm;
-
-PIC16Subtarget::PIC16Subtarget(const std::string &TT, const std::string &FS, 
-                               bool Cooper)
-  :IsCooper(Cooper)
-{
-  std::string CPU = "generic";
-
-  // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
-}
diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h
deleted file mode 100644
index 81e3783d7299..000000000000
--- a/lib/Target/PIC16/PIC16Subtarget.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//=====-- PIC16Subtarget.h - Define Subtarget for the PIC16 ---*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PIC16 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16SUBTARGET_H
-#define PIC16SUBTARGET_H
-
-#include "llvm/Target/TargetSubtarget.h"
-
-#include <string>
-
-namespace llvm {
-
-class PIC16Subtarget : public TargetSubtarget {
-
-  // IsCooper - Target ISA is Cooper.
-  bool IsCooper;
-
-public:
-  /// This constructor initializes the data members to match that
-  /// of the specified triple.
-  ///
-  PIC16Subtarget(const std::string &TT, const std::string &FS, bool Cooper);
-  
-  /// isCooper - Returns true if the target ISA is Cooper.
-  bool isCooper() const { return IsCooper; }
-
-  /// ParseSubtargetFeatures - Parses features string setting specified 
-  /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
-};
-} // End llvm namespace
-
-#endif  // PIC16SUBTARGET_H
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
deleted file mode 100644
index 82b69be8d13a..000000000000
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- PIC16TargetMachine.cpp - Define TargetMachine for PIC16 -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the PIC16 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16TargetMachine.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializePIC16Target() {
-  // Register the target. Curretnly the codegen works for
-  // enhanced pic16 mid-range.
-  RegisterTargetMachine<PIC16TargetMachine> X(ThePIC16Target);
-  RegisterAsmInfo<PIC16MCAsmInfo> A(ThePIC16Target);
-}
-
-
-// PIC16TargetMachine - Enhanced PIC16 mid-range Machine. May also represent
-// a Traditional Machine if 'Trad' is true.
-PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
-                                       const std::string &FS, bool Trad)
-: LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS, Trad),
-  DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), 
-  InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-  FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
-
-
-bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
-                                         CodeGenOpt::Level OptLevel) {
-  // Install an instruction selector.
-  PM.add(createPIC16ISelDag(*this));
-  return false;
-}
-
-bool PIC16TargetMachine::addPreEmitPass(PassManagerBase &PM, 
-                                         CodeGenOpt::Level OptLevel) {
-  PM.add(createPIC16MemSelOptimizerPass());
-  return true;  // -print-machineinstr should print after this.
-}
-
-
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
deleted file mode 100644
index dae5d3129c63..000000000000
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PIC16TargetMachine.h - Define TargetMachine for PIC16 ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PIC16 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef PIC16_TARGETMACHINE_H
-#define PIC16_TARGETMACHINE_H
-
-#include "PIC16InstrInfo.h"
-#include "PIC16ISelLowering.h"
-#include "PIC16SelectionDAGInfo.h"
-#include "PIC16RegisterInfo.h"
-#include "PIC16Subtarget.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-/// PIC16TargetMachine
-///
-class PIC16TargetMachine : public LLVMTargetMachine {
-  PIC16Subtarget        Subtarget;
-  const TargetData      DataLayout;       // Calculates type size & alignment
-  PIC16InstrInfo        InstrInfo;
-  PIC16TargetLowering   TLInfo;
-  PIC16SelectionDAGInfo TSInfo;
-
-  // PIC16 does not have any call stack frame, therefore not having 
-  // any PIC16 specific FrameInfo class.
-  TargetFrameInfo       FrameInfo;
-
-public:
-  PIC16TargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS, bool Cooper = false);
-
-  virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
-  virtual const PIC16InstrInfo *getInstrInfo() const  { return &InstrInfo; }
-  virtual const TargetData *getTargetData() const     { return &DataLayout;}
-  virtual const PIC16Subtarget *getSubtargetImpl() const { return &Subtarget; }
- 
-  virtual const PIC16RegisterInfo *getRegisterInfo() const { 
-    return &(InstrInfo.getRegisterInfo()); 
-  }
-
-  virtual const PIC16TargetLowering *getTargetLowering() const { 
-    return &TLInfo;
-  }
-
-  virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const {
-    return &TSInfo;
-  }
-
-  virtual bool addInstSelector(PassManagerBase &PM,
-                               CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-}; // PIC16TargetMachine.
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
deleted file mode 100644
index ff0f971cc382..000000000000
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ /dev/null
@@ -1,384 +0,0 @@
-//===-- PIC16TargetObjectFile.cpp - PIC16 object files --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16TargetObjectFile.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16Section.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-
-PIC16TargetObjectFile::PIC16TargetObjectFile() {
-}
-
-PIC16TargetObjectFile::~PIC16TargetObjectFile() {
-}
-
-/// Find a pic16 section. Return null if not found. Do not create one.
-PIC16Section *PIC16TargetObjectFile::
-findPIC16Section(const std::string &Name) const {
-  /// Return if we have an already existing one.
-  PIC16Section *Entry = SectionsByName[Name];
-  if (Entry)
-    return Entry;
-
-  return NULL;
-}
-
-
-/// Find a pic16 section. If not found, create one.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16Section(const std::string &Name, PIC16SectionType Ty, 
-                const std::string &Address, int Color) const {
-
-  /// Return if we have an already existing one.
-  PIC16Section *&Entry = SectionsByName[Name];
-  if (Entry)
-    return Entry;
-
-
-  Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-  return Entry;
-}
-
-/// Find a standard pic16 data section. If not found, create one and keep
-/// track of it by adding it to appropriate std section list.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16DataSection(const std::string &Name, PIC16SectionType Ty, 
-                    const std::string &Address, int Color) const {
-
-  /// Return if we have an already existing one.
-  PIC16Section *&Entry = SectionsByName[Name];
-  if (Entry)
-    return Entry;
-
-
-  /// Else create a new one and add it to appropriate section list.
-  Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-
-  switch (Ty) {
-  default: llvm_unreachable ("unknow standard section type.");
-  case UDATA: UDATASections_.push_back(Entry); break;
-  case IDATA: IDATASections_.push_back(Entry); break;
-  case ROMDATA: ROMDATASection_ = Entry; break;
-  case UDATA_SHR: SHAREDUDATASection_ = Entry; break;
-  }
-
-  return Entry;
-}
-    
-
-/// Find a standard pic16 autos section. If not found, create one and keep
-/// track of it by adding it to appropriate std section list.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16AutoSection(const std::string &Name, PIC16SectionType Ty, 
-                    const std::string &Address, int Color) const {
-
-  /// Return if we have an already existing one.
-  PIC16Section *&Entry = SectionsByName[Name];
-  if (Entry)
-    return Entry;
-
-
-  /// Else create a new one and add it to appropriate section list.
-  Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-
-  assert (Ty == UDATA_OVR && "incorrect section type for autos");
-  AUTOSections_.push_back(Entry);
-
-  return Entry;
-}
-    
-/// Find a pic16 user section. If not found, create one and keep
-/// track of it by adding it to appropriate std section list.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16UserSection(const std::string &Name, PIC16SectionType Ty, 
-                    const std::string &Address, int Color) const {
-
-  /// Return if we have an already existing one.
-  PIC16Section *&Entry = SectionsByName[Name];
-  if (Entry)
-    return Entry;
-
-
-  /// Else create a new one and add it to appropriate section list.
-  Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-
-  USERSections_.push_back(Entry);
-
-  return Entry;
-}
-
-/// Do some standard initialization.
-void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){
-  TargetLoweringObjectFile::Initialize(Ctx, tm);
-  TM = &tm;
-  
-  ROMDATASection_ = NULL;
-  SHAREDUDATASection_ = NULL;
-}
-
-/// allocateUDATA - Allocate a un-initialized global to an existing or new UDATA
-/// section and return that section.
-const MCSection *
-PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const {
-  assert(GV->hasInitializer() && "This global doesn't need space");
-  const Constant *C = GV->getInitializer();
-  assert(C->isNullValue() && "Unitialized globals has non-zero initializer");
-
-  // Find how much space this global needs.
-  const TargetData *TD = TM->getTargetData();
-  const Type *Ty = C->getType(); 
-  unsigned ValSize = TD->getTypeAllocSize(Ty);
- 
-  // Go through all UDATA Sections and assign this variable
-  // to the first available section having enough space.
-  PIC16Section *Found = NULL;
-  for (unsigned i = 0; i < UDATASections_.size(); i++) {
-    if (DataBankSize - UDATASections_[i]->getSize() >= ValSize) {
-      Found = UDATASections_[i];
-      break;
-    }
-  }
-
-  // No UDATA section spacious enough was found. Crate a new one.
-  if (!Found) {
-    std::string name = PAN::getUdataSectionName(UDATASections_.size());
-    Found = getPIC16DataSection(name.c_str(), UDATA);
-  }
-  
-  // Insert the GV into this UDATA section.
-  Found->Items.push_back(GV);
-  Found->setSize(Found->getSize() + ValSize);
-  return Found;
-} 
-
-/// allocateIDATA - allocate an initialized global into an existing
-/// or new section and return that section.
-const MCSection *
-PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{
-  assert(GV->hasInitializer() && "This global doesn't need space");
-  const Constant *C = GV->getInitializer();
-  assert(!C->isNullValue() && "initialized globals has zero initializer");
-  assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
-         "can allocate initialized RAM data only");
-
-  // Find how much space this global needs.
-  const TargetData *TD = TM->getTargetData();
-  const Type *Ty = C->getType(); 
-  unsigned ValSize = TD->getTypeAllocSize(Ty);
- 
-  // Go through all IDATA Sections and assign this variable
-  // to the first available section having enough space.
-  PIC16Section *Found = NULL;
-  for (unsigned i = 0; i < IDATASections_.size(); i++) {
-    if (DataBankSize - IDATASections_[i]->getSize() >= ValSize) {
-      Found = IDATASections_[i]; 
-      break;
-    }
-  }
-
-  // No IDATA section spacious enough was found. Crate a new one.
-  if (!Found) {
-    std::string name = PAN::getIdataSectionName(IDATASections_.size());
-    Found = getPIC16DataSection(name.c_str(), IDATA);
-  }
-  
-  // Insert the GV into this IDATA.
-  Found->Items.push_back(GV);
-  Found->setSize(Found->getSize() + ValSize);
-  return Found;
-} 
-
-// Allocate a program memory variable into ROMDATA section.
-const MCSection *
-PIC16TargetObjectFile::allocateROMDATA(const GlobalVariable *GV) const {
-
-  std::string name = PAN::getRomdataSectionName();
-  PIC16Section *S = getPIC16DataSection(name.c_str(), ROMDATA);
-
-  S->Items.push_back(GV);
-  return S;
-}
-
-// Get the section for an automatic variable of a function.
-// For PIC16 they are globals only with mangled names.
-const MCSection *
-PIC16TargetObjectFile::allocateAUTO(const GlobalVariable *GV) const {
-
-  const std::string name = PAN::getSectionNameForSym(GV->getName());
-  PIC16Section *S = getPIC16AutoSection(name.c_str());
-
-  S->Items.push_back(GV);
-  return S;
-}
-
-
-// Override default implementation to put the true globals into
-// multiple data sections if required.
-const MCSection *
-PIC16TargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV1,
-                                              SectionKind Kind,
-                                              Mangler *Mang,
-                                              const TargetMachine &TM) const {
-  // We select the section based on the initializer here, so it really
-  // has to be a GlobalVariable.
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GV1); 
-  if (!GV)
-    return TargetLoweringObjectFile::SelectSectionForGlobal(GV1, Kind, Mang,TM);
-
-  assert(GV->hasInitializer() && "A def without initializer?");
-
-  // First, if this is an automatic variable for a function, get the section
-  // name for it and return.
-  std::string name = GV->getName();
-  if (PAN::isLocalName(name))
-    return allocateAUTO(GV);
-
-  // See if this is an uninitialized global.
-  const Constant *C = GV->getInitializer();
-  if (C->isNullValue()) 
-    return allocateUDATA(GV);
-
-  // If this is initialized data in RAM. Put it in the correct IDATA section.
-  if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) 
-    return allocateIDATA(GV);
-
-  // This is initialized data in rom, put it in the readonly section.
-  if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) 
-    return allocateROMDATA(GV);
-
-  // Else let the default implementation take care of it.
-  return TargetLoweringObjectFile::SelectSectionForGlobal(GV, Kind, Mang,TM);
-}
-
-
-
-
-/// getExplicitSectionGlobal - Allow the target to completely override
-/// section assignment of a global.
-const MCSection *PIC16TargetObjectFile::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
-                         Mangler *Mang, const TargetMachine &TM) const {
-  assert(GV->hasSection());
-  
-  if (const GlobalVariable *GVar = cast<GlobalVariable>(GV)) {
-    std::string SectName = GVar->getSection();
-    // If address for a variable is specified, get the address and create
-    // section.
-    // FIXME: move this attribute checking in PAN.
-    std::string AddrStr = "Address=";
-    if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) {
-      std::string SectAddr = SectName.substr(AddrStr.length());
-      if (SectAddr.compare("NEAR") == 0)
-        return allocateSHARED(GVar, Mang);
-      else
-        return allocateAtGivenAddress(GVar, SectAddr);
-    }
-     
-    // Create the section specified with section attribute. 
-    return allocateInGivenSection(GVar);
-  }
-
-  return getPIC16DataSection(GV->getSection().c_str(), UDATA);
-}
-
-const MCSection *
-PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV,
-                                      Mangler *Mang) const {
-  // Make sure that this is an uninitialized global.
-  assert(GV->hasInitializer() && "This global doesn't need space");
-  if (!GV->getInitializer()->isNullValue()) {
-    // FIXME: Generate a warning in this case that near qualifier will be 
-    // ignored.
-    return SelectSectionForGlobal(GV, SectionKind::getDataRel(), Mang, *TM); 
-  } 
-  std::string Name = PAN::getSharedUDataSectionName(); 
-
-  PIC16Section *SharedUDataSect = getPIC16DataSection(Name.c_str(), UDATA_SHR); 
-  // Insert the GV into shared section.
-  SharedUDataSect->Items.push_back(GV);
-  return SharedUDataSect;
-}
-
-
-// Interface used by AsmPrinter to get a code section for a function.
-const PIC16Section *
-PIC16TargetObjectFile::SectionForCode(const std::string &FnName,
-                                      bool isISR) const {
-  const std::string &sec_name = PAN::getCodeSectionName(FnName);
-  // If it is ISR, its code section starts at a specific address.
-  if (isISR)
-    return getPIC16Section(sec_name, CODE, PAN::getISRAddr());
-  return getPIC16Section(sec_name, CODE);
-}
-
-// Interface used by AsmPrinter to get a frame section for a function.
-const PIC16Section *
-PIC16TargetObjectFile::SectionForFrame(const std::string &FnName) const {
-  const std::string &sec_name = PAN::getFrameSectionName(FnName);
-  return getPIC16Section(sec_name, UDATA_OVR);
-}
-
-// Allocate a global var in existing or new section of given name.
-const MCSection *
-PIC16TargetObjectFile::allocateInGivenSection(const GlobalVariable *GV) const {
-  // Determine the type of section that we need to create.
-  PIC16SectionType SecTy;
-
-  // See if this is an uninitialized global.
-  const Constant *C = GV->getInitializer();
-  if (C->isNullValue())
-    SecTy = UDATA;
-  // If this is initialized data in RAM. Put it in the correct IDATA section.
-  else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
-    SecTy = IDATA;
-  // This is initialized data in rom, put it in the readonly section.
-  else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) 
-    SecTy = ROMDATA;
-  else
-    llvm_unreachable ("Could not determine section type for global");
-
-  PIC16Section *S = getPIC16UserSection(GV->getSection().c_str(), SecTy);
-  S->Items.push_back(GV);
-  return S;
-}
-
-// Allocate a global var in a new absolute sections at given address.
-const MCSection *
-PIC16TargetObjectFile::allocateAtGivenAddress(const GlobalVariable *GV,
-                                               const std::string &Addr) const {
-  // Determine the type of section that we need to create.
-  PIC16SectionType SecTy;
-
-  // See if this is an uninitialized global.
-  const Constant *C = GV->getInitializer();
-  if (C->isNullValue())
-    SecTy = UDATA;
-  // If this is initialized data in RAM. Put it in the correct IDATA section.
-  else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
-    SecTy = IDATA;
-  // This is initialized data in rom, put it in the readonly section.
-  else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) 
-    SecTy = ROMDATA;
-  else
-    llvm_unreachable ("Could not determine section type for global");
-
-  std::string Prefix = GV->getNameStr() + "." + Addr + ".";
-  std::string SName = PAN::getUserSectionName(Prefix);
-  PIC16Section *S = getPIC16UserSection(SName.c_str(), SecTy, Addr.c_str());
-  S->Items.push_back(GV);
-  return S;
-}
-
-
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
deleted file mode 100644
index b1eb9f9d854a..000000000000
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ /dev/null
@@ -1,168 +0,0 @@
-//===-- PIC16TargetObjectFile.h - PIC16 Object Info -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_PIC16_TARGETOBJECTFILE_H
-#define LLVM_TARGET_PIC16_TARGETOBJECTFILE_H
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/StringMap.h"
-#include <vector>
-#include <string>
-
-namespace llvm {
-  class GlobalVariable;
-  class Module;
-  class PIC16TargetMachine;
-  class PIC16Section;
-  
-  enum { DataBankSize = 80 };
-
-  /// PIC16 Splits the global data into mulitple udata and idata sections.
-  /// Each udata and idata section needs to contain a list of globals that
-  /// they contain, in order to avoid scanning over all the global values 
-  /// again and printing only those that match the current section. 
-  /// Keeping values inside the sections make printing a section much easier.
-  ///
-  /// FIXME: MOVE ALL THIS STUFF TO PIC16Section.
-  ///
-
-  /// PIC16TargetObjectFile - PIC16 Object file. Contains data and code
-  /// sections. 
-  // PIC16 Object File has two types of sections.
-  // 1. Standard Sections
-  //    1.1 un-initialized global data 
-  //    1.2 initialized global data
-  //    1.3 program memory data
-  //    1.4 local variables of functions.
-  // 2. User defined sections
-  //    2.1 Objects placed in a specific section. (By _Section() macro)
-  //    2.2 Objects placed at a specific address. (By _Address() macro)
-  class PIC16TargetObjectFile : public TargetLoweringObjectFile {
-    /// SectionsByName - Bindings of names to allocated sections.
-    mutable StringMap<PIC16Section*> SectionsByName;
-
-    const TargetMachine *TM;
-    
-    /// Lists of sections.
-    /// Standard Data Sections.
-    mutable std::vector<PIC16Section *> UDATASections_;
-    mutable std::vector<PIC16Section *> IDATASections_;
-    mutable PIC16Section * ROMDATASection_;
-    mutable PIC16Section * SHAREDUDATASection_;
-
-    /// Standard Auto Sections.
-    mutable std::vector<PIC16Section *> AUTOSections_;
- 
-    /// User specified sections.
-    mutable std::vector<PIC16Section *> USERSections_;
-
-    
-    /// Find or Create a PIC16 Section, without adding it to any
-    /// section list.
-    PIC16Section *getPIC16Section(const std::string &Name,
-                                   PIC16SectionType Ty, 
-                                   const std::string &Address = "", 
-                                   int Color = -1) const;
-
-    /// Convenience functions. These wrappers also take care of adding 
-    /// the newly created section to the appropriate sections list.
-
-    /// Find or Create PIC16 Standard Data Section.
-    PIC16Section *getPIC16DataSection(const std::string &Name,
-                                       PIC16SectionType Ty, 
-                                       const std::string &Address = "", 
-                                       int Color = -1) const;
-
-    /// Find or Create PIC16 Standard Auto Section.
-    PIC16Section *getPIC16AutoSection(const std::string &Name,
-                                       PIC16SectionType Ty = UDATA_OVR,
-                                       const std::string &Address = "", 
-                                       int Color = -1) const;
-
-    /// Find or Create PIC16 Standard Auto Section.
-    PIC16Section *getPIC16UserSection(const std::string &Name,
-                                       PIC16SectionType Ty, 
-                                       const std::string &Address = "", 
-                                       int Color = -1) const;
-
-    /// Allocate Un-initialized data to a standard UDATA section. 
-    const MCSection *allocateUDATA(const GlobalVariable *GV) const;
-
-    /// Allocate Initialized data to a standard IDATA section. 
-    const MCSection *allocateIDATA(const GlobalVariable *GV) const;
-
-    /// Allocate ROM data to the standard ROMDATA section. 
-    const MCSection *allocateROMDATA(const GlobalVariable *GV) const;
-
-    /// Allocate an AUTO variable to an AUTO section.
-    const MCSection *allocateAUTO(const GlobalVariable *GV) const;
-    
-    /// Allocate DATA in user specified section.
-    const MCSection *allocateInGivenSection(const GlobalVariable *GV) const;
-
-    /// Allocate DATA at user specified address.
-    const MCSection *allocateAtGivenAddress(const GlobalVariable *GV,
-                                            const std::string &Addr) const;
-
-    /// Allocate a shared variable to SHARED section.
-    const MCSection *allocateSHARED(const GlobalVariable *GV,
-                                    Mangler *Mang) const;
-   
-    public:
-    PIC16TargetObjectFile();
-    ~PIC16TargetObjectFile();
-    void Initialize(MCContext &Ctx, const TargetMachine &TM);
-
-    /// Return the section with the given Name. Null if not found.
-    PIC16Section *findPIC16Section(const std::string &Name) const;
-
-    /// Override section allocations for user specified sections.
-    virtual const MCSection *
-    getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
-                             Mangler *Mang, const TargetMachine &TM) const;
-    
-    /// Select sections for Data and Auto variables(globals).
-    virtual const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
-                                                    SectionKind Kind,
-                                                    Mangler *Mang,
-                                                    const TargetMachine&) const;
-
-
-    /// Return a code section for a function.
-    const PIC16Section *SectionForCode (const std::string &FnName,
-                                        bool isISR) const;
-
-    /// Return a frame section for a function.
-    const PIC16Section *SectionForFrame (const std::string &FnName) const;
-
-    /// Accessors for various section lists.
-    const std::vector<PIC16Section *> &UDATASections() const {
-      return UDATASections_;
-    }
-    const std::vector<PIC16Section *> &IDATASections() const {
-      return IDATASections_;
-    }
-    const PIC16Section *ROMDATASection() const {
-      return ROMDATASection_;
-    }
-    const PIC16Section *SHAREDUDATASection() const {
-      return SHAREDUDATASection_;
-    }
-    const std::vector<PIC16Section *> &AUTOSections() const {
-      return AUTOSections_;
-    }
-    const std::vector<PIC16Section *> &USERSections() const {
-      return USERSections_;
-    }
-  };
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/PIC16/TargetInfo/CMakeLists.txt b/lib/Target/PIC16/TargetInfo/CMakeLists.txt
deleted file mode 100644
index bfc6ff4e8e2e..000000000000
--- a/lib/Target/PIC16/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPIC16Info
-  PIC16TargetInfo.cpp
-  )
-
-add_dependencies(LLVMPIC16Info PIC16Table_gen)
diff --git a/lib/Target/PIC16/TargetInfo/Makefile b/lib/Target/PIC16/TargetInfo/Makefile
deleted file mode 100644
index 76609f66d652..000000000000
--- a/lib/Target/PIC16/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/PIC16/TargetInfo/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPIC16Info
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
deleted file mode 100644
index f1bdb1210fc6..000000000000
--- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- PIC16TargetInfo.cpp - PIC16 Target Implementation -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::ThePIC16Target, llvm::TheCooperTarget;
-
-extern "C" void LLVMInitializePIC16TargetInfo() { 
-  RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16",
-                                  "PIC16 14-bit [experimental]");
-
-  RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]");
-}
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
new file mode 100644
index 000000000000..331266da30b3
--- /dev/null
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_TARGET_DEFINITIONS PTX.td)
+
+tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
+tablegen(PTXGenInstrNames.inc -gen-instr-enums)
+tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
+tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PTXGenRegisterNames.inc -gen-register-enums)
+tablegen(PTXGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PTXCodeGen
+  PTXAsmPrinter.cpp
+  PTXISelDAGToDAG.cpp
+  PTXISelLowering.cpp
+  PTXInstrInfo.cpp
+  PTXFrameLowering.cpp
+  PTXMCAsmInfo.cpp
+  PTXMCAsmStreamer.cpp
+  PTXMFInfoExtract.cpp
+  PTXRegisterInfo.cpp
+  PTXSubtarget.cpp
+  PTXTargetMachine.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile
new file mode 100644
index 000000000000..2c40d6994094
--- /dev/null
+++ b/lib/Target/PTX/Makefile
@@ -0,0 +1,26 @@
+##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMPTXCodeGen
+TARGET = PTX
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PTXGenAsmWriter.inc \
+		PTXGenDAGISel.inc \
+		PTXGenInstrInfo.inc \
+		PTXGenInstrNames.inc \
+		PTXGenRegisterInfo.inc \
+		PTXGenRegisterInfo.h.inc \
+		PTXGenRegisterNames.inc \
+		PTXGenSubtarget.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
new file mode 100644
index 000000000000..19385ba1ff8c
--- /dev/null
+++ b/lib/Target/PTX/PTX.h
@@ -0,0 +1,49 @@
+//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class PTXTargetMachine;
+  class FunctionPass;
+
+  namespace PTX {
+    enum StateSpace {
+      GLOBAL = 0, // default to global state space
+      CONSTANT = 1,
+      LOCAL = 2,
+      PARAMETER = 3,
+      SHARED = 4
+    };
+  } // namespace PTX
+
+  FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel);
+
+  FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel);
+
+  extern Target ThePTXTarget;
+} // namespace llvm;
+
+// Defines symbolic names for PTX registers.
+#include "PTXGenRegisterNames.inc"
+
+// Defines symbolic names for the PTX instructions.
+#include "PTXGenInstrNames.inc"
+
+#endif // PTX_H
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
new file mode 100644
index 000000000000..8b1a1b18da54
--- /dev/null
+++ b/lib/Target/PTX/PTX.td
@@ -0,0 +1,54 @@
+//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
+                                   "Enable sm_20 target architecture">;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+  : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+  let InstructionSet = PTXInstrInfo;
+}
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 000000000000..a6059974ab3d
--- /dev/null
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,347 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
+
+static cl::opt<std::string>
+OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
+             cl::init("sm_10"));
+
+namespace {
+class PTXAsmPrinter : public AsmPrinter {
+public:
+  explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {}
+
+  const char *getPassName() const { return "PTX Assembly Printer"; }
+
+  bool doFinalization(Module &M);
+
+  virtual void EmitStartOfAsmFile(Module &M);
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual void EmitFunctionBodyStart();
+  virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
+
+  virtual void EmitInstruction(const MachineInstr *MI);
+
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                       const char *Modifier = 0);
+  void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                         const char *Modifier = 0);
+
+  // autogen'd.
+  void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+
+private:
+  void EmitVariableDeclaration(const GlobalVariable *gv);
+  void EmitFunctionDeclaration();
+}; // class PTXAsmPrinter
+} // namespace
+
+static const char PARAM_PREFIX[] = "__param_";
+
+static const char *getRegisterTypeName(unsigned RegNo) {
+#define TEST_REGCLS(cls, clsstr) \
+  if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+  TEST_REGCLS(RRegs32, s32);
+  TEST_REGCLS(Preds, pred);
+#undef TEST_REGCLS
+
+  llvm_unreachable("Not in any register class!");
+  return NULL;
+}
+
+static const char *getInstructionTypeName(const MachineInstr *MI) {
+  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.getType() == MachineOperand::MO_Register)
+      return getRegisterTypeName(MO.getReg());
+  }
+
+  llvm_unreachable("No reg operand found in instruction!");
+  return NULL;
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+  switch (addressSpace) {
+  default: llvm_unreachable("Unknown state space");
+  case PTX::GLOBAL:    return "global";
+  case PTX::CONSTANT:  return "const";
+  case PTX::LOCAL:     return "local";
+  case PTX::PARAMETER: return "param";
+  case PTX::SHARED:    return "shared";
+  }
+  return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+  // XXX Temproarily remove global variables so that doFinalization() will not
+  // emit them again (global variables are emitted at beginning).
+
+  Module::GlobalListType &global_list = M.getGlobalList();
+  int i, n = global_list.size();
+  GlobalVariable **gv_array = new GlobalVariable* [n];
+
+  // first, back-up GlobalVariable in gv_array
+  i = 0;
+  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+       I != E; ++I)
+    gv_array[i++] = &*I;
+
+  // second, empty global_list
+  while (!global_list.empty())
+    global_list.remove(global_list.begin());
+
+  // call doFinalization
+  bool ret = AsmPrinter::doFinalization(M);
+
+  // now we restore global variables
+  for (i = 0; i < n; i ++)
+    global_list.insert(global_list.end(), gv_array[i]);
+
+  delete[] gv_array;
+  return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+  OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
+  OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
+  OutStreamer.AddBlankLine();
+
+  // declare global variables
+  for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+       i != e; ++i)
+    EmitVariableDeclaration(i);
+}
+
+bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+  EmitFunctionDeclaration();
+  EmitFunctionBody();
+  return false;
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+  OutStreamer.EmitRawText(Twine("{"));
+
+  const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+  // Print local variable definition
+  for (PTXMachineFunctionInfo::reg_iterator
+       i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
+    unsigned reg = *i;
+
+    std::string def = "\t.reg .";
+    def += getRegisterTypeName(reg);
+    def += ' ';
+    def += getRegisterName(reg);
+    def += ';';
+    OutStreamer.EmitRawText(Twine(def));
+  }
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  std::string str;
+  str.reserve(64);
+
+  // Write instruction to str
+  raw_string_ostream OS(str);
+  printInstruction(MI, OS);
+  OS << ';';
+  OS.flush();
+
+  // Replace "%type" if found
+  size_t pos;
+  if ((pos = str.find("%type")) != std::string::npos)
+    str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI));
+
+  StringRef strref = StringRef(str);
+  OutStreamer.EmitRawText(strref);
+}
+
+void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                 raw_ostream &OS) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+    default:
+      llvm_unreachable("<unknown operand type>");
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      OS << *Mang->getSymbol(MO.getGlobal());
+      break;
+    case MachineOperand::MO_Immediate:
+      OS << (int) MO.getImm();
+      break;
+    case MachineOperand::MO_Register:
+      OS << getRegisterName(MO.getReg());
+      break;
+  }
+}
+
+void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                    raw_ostream &OS, const char *Modifier) {
+  printOperand(MI, opNum, OS);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return; // don't print "+0"
+
+  OS << "+";
+  printOperand(MI, opNum+1, OS);
+}
+
+void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &OS, const char *Modifier) {
+  OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(gv))
+    return;
+
+  MCSymbol *gvsym = Mang->getSymbol(gv);
+
+  assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+  std::string decl;
+
+  // check if it is defined in some other translation unit
+  if (gv->isDeclaration())
+    decl += ".extern ";
+
+  // state space: e.g., .global
+  decl += ".";
+  decl += getStateSpaceName(gv->getType()->getAddressSpace());
+  decl += " ";
+
+  // alignment (optional)
+  unsigned alignment = gv->getAlignment();
+  if (alignment != 0) {
+    decl += ".align ";
+    decl += utostr(Log2_32(gv->getAlignment()));
+    decl += " ";
+  }
+
+  // TODO: add types
+  decl += ".s32 ";
+
+  decl += gvsym->getName();
+
+  if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType()))
+    decl += "[]";
+
+  decl += ";";
+
+  OutStreamer.EmitRawText(Twine(decl));
+
+  OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration() {
+  // The function label could have already been emitted if two symbols end up
+  // conflicting due to asm renaming.  Detect this and emit an error.
+  if (!CurrentFnSym->isUndefined()) {
+    report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+                       "' label emitted multiple times to assembly file");
+    return;
+  }
+
+  const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+  const bool isKernel = MFI->isKernel();
+  unsigned reg;
+
+  std::string decl = isKernel ? ".entry" : ".func";
+
+  // Print return register
+  reg = MFI->retReg();
+  if (!isKernel && reg != PTX::NoRegister) {
+    decl += " (.reg ."; // FIXME: could it return in .param space?
+    decl += getRegisterTypeName(reg);
+    decl += " ";
+    decl += getRegisterName(reg);
+    decl += ")";
+  }
+
+  // Print function name
+  decl += " ";
+  decl += CurrentFnSym->getName().str();
+
+  // Print parameter list
+  if (!MFI->argRegEmpty()) {
+    decl += " (";
+    if (isKernel) {
+      for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
+        if (i != 0)
+          decl += ", ";
+        decl += ".param .s32 "; // TODO: add types
+        decl += PARAM_PREFIX;
+        decl += utostr(i + 1);
+      }
+    } else {
+      for (PTXMachineFunctionInfo::reg_iterator
+           i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
+        reg = *i;
+        assert(reg != PTX::NoRegister && "Not a valid register!");
+        if (i != b)
+          decl += ", ";
+        decl += ".reg .";
+        decl += getRegisterTypeName(reg);
+        decl += " ";
+        decl += getRegisterName(reg);
+      }
+    }
+    decl += ")";
+  }
+
+  OutStreamer.EmitRawText(Twine(decl));
+}
+
+#include "PTXGenAsmWriter.inc"
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+  RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget);
+}
diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 000000000000..b621b9d634d2
--- /dev/null
+++ b/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+}
diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 000000000000..574ae7a19dc2
--- /dev/null
+++ b/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,43 @@
+//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+  const PTXSubtarget &STI;
+
+public:
+  explicit PTXFrameLowering(const PTXSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 000000000000..efb0e8b1af77
--- /dev/null
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,151 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+  public:
+    PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+    virtual const char *getPassName() const {
+      return "PTX DAG->DAG Pattern Instruction Selection";
+    }
+
+    SDNode *Select(SDNode *Node);
+
+    // Complex Pattern Selectors.
+    bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+    bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+    // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+  private:
+    SDNode *SelectREAD_PARAM(SDNode *Node);
+
+    bool isImm(const SDValue &operand);
+    bool SelectImm(const SDValue &operand, SDValue &imm);
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel) {
+  return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel)
+  : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+  if (Node->getOpcode() == PTXISD::READ_PARAM)
+    return SelectREAD_PARAM(Node);
+  else
+    return SelectCode(Node);
+}
+
+SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
+  SDValue index = Node->getOperand(1);
+  DebugLoc dl = Node->getDebugLoc();
+
+  if (index.getOpcode() != ISD::TargetConstant)
+    llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
+
+  return PTXInstrInfo::
+    GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+  if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+      isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+    return false;
+
+  R1 = Addr;
+  R2 = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  if (Addr.getOpcode() != ISD::ADD) {
+    // let SelectADDRii handle the [imm] case
+    if (isImm(Addr))
+      return false;
+    // it is [reg]
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (Addr.getNumOperands() < 2)
+    return false;
+
+  // let SelectADDRii handle the [imm+imm] case
+  if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+    return false;
+
+  // try [reg+imm] and [imm+reg]
+  for (int i = 0; i < 2; i ++)
+    if (SelectImm(Addr.getOperand(1-i), Offset)) {
+      Base = Addr.getOperand(i);
+      return true;
+    }
+
+  // neither [reg+imm] nor [imm+reg]
+  return false;
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  // is [imm+imm]?
+  if (Addr.getOpcode() == ISD::ADD) {
+    return SelectImm(Addr.getOperand(0), Base) &&
+           SelectImm(Addr.getOperand(1), Offset);
+  }
+
+  // is [imm]?
+  if (SelectImm(Addr, Base)) {
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+  return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+  SDNode *node = operand.getNode();
+  if (!ConstantSDNode::classof(node))
+    return false;
+
+  ConstantSDNode *CN = cast<ConstantSDNode>(node);
+  imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32);
+  return true;
+}
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 000000000000..e6d44907ed37
--- /dev/null
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,210 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXISelLowering.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  // Set up the register classes.
+  addRegisterClass(MVT::i1,  PTX::PredsRegisterClass);
+  addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+
+  // Customize translation of memory addresses
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+    default:                 llvm_unreachable("Unimplemented operand");
+    case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+  }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    default:
+      llvm_unreachable("Unknown opcode");
+    case PTXISD::READ_PARAM:
+      return "PTXISD::READ_PARAM";
+    case PTXISD::EXIT:
+      return "PTXISD::EXIT";
+    case PTXISD::RET:
+      return "PTXISD::RET";
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  return DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct argmap_entry {
+  MVT::SimpleValueType VT;
+  TargetRegisterClass *RC;
+  TargetRegisterClass::iterator loc;
+
+  argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC)
+    : VT(_VT), RC(_RC), loc(_RC->begin()) {}
+
+  void reset() { loc = RC->begin(); }
+  bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
+} argmap[] = {
+  argmap_entry(MVT::i1,  PTX::PredsRegisterClass),
+  argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
+};
+} // end anonymous namespace
+
+SDValue PTXTargetLowering::
+  LowerFormalArguments(SDValue Chain,
+                       CallingConv::ID CallConv,
+                       bool isVarArg,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl,
+                       SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const {
+  if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+  switch (CallConv) {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+      break;
+    case CallingConv::PTX_Kernel:
+      MFI->setKernel(true);
+      break;
+    case CallingConv::PTX_Device:
+      MFI->setKernel(false);
+      break;
+  }
+
+  // Make sure we don't add argument registers twice
+  if (MFI->isDoneAddArg())
+    llvm_unreachable("cannot add argument registers twice");
+
+  // Reset argmap before allocation
+  for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
+       i != e; ++ i)
+    i->reset();
+
+  for (int i = 0, e = Ins.size(); i != e; ++ i) {
+    MVT::SimpleValueType VT = Ins[i].VT.SimpleTy;
+
+    struct argmap_entry *entry = std::find(argmap,
+                                           argmap + array_lengthof(argmap), VT);
+    if (entry == argmap + array_lengthof(argmap))
+      llvm_unreachable("Type of argument is not supported");
+
+    if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass)
+      llvm_unreachable("cannot pass preds to kernel");
+
+    MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+
+    unsigned preg = *++(entry->loc); // allocate start from register 1
+    unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
+    RegInfo.addLiveIn(preg, vreg);
+
+    MFI->addArgReg(preg);
+
+    SDValue inval;
+    if (MFI->isKernel())
+      inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain,
+                          DAG.getTargetConstant(i, MVT::i32));
+    else
+      inval = DAG.getCopyFromReg(Chain, dl, vreg, VT);
+    InVals.push_back(inval);
+  }
+
+  MFI->doneAddArg();
+
+  return Chain;
+}
+
+SDValue PTXTargetLowering::
+  LowerReturn(SDValue Chain,
+              CallingConv::ID CallConv,
+              bool isVarArg,
+              const SmallVectorImpl<ISD::OutputArg> &Outs,
+              const SmallVectorImpl<SDValue> &OutVals,
+              DebugLoc dl,
+              SelectionDAG &DAG) const {
+  if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+  switch (CallConv) {
+    default:
+      llvm_unreachable("Unsupported calling convention.");
+    case CallingConv::PTX_Kernel:
+      assert(Outs.size() == 0 && "Kernel must return void.");
+      return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+    case CallingConv::PTX_Device:
+      assert(Outs.size() <= 1 && "Can at most return one value.");
+      break;
+  }
+
+  // PTX_Device
+
+  // return void
+  if (Outs.size() == 0)
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+
+  assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
+
+  SDValue Flag;
+  unsigned reg = PTX::R0;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+  MFI->setRetReg(reg);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+    DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
+
+  // Copy the result values into the output registers
+  Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
+
+  // Guarantee that all emitted copies are stuck together,
+  // avoiding something bad
+  Flag = Chain.getValue(1);
+
+  return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+}
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 000000000000..b03a9f66630f
--- /dev/null
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,67 @@
+//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class PTXSubtarget;
+class PTXTargetMachine;
+
+namespace PTXISD {
+  enum NodeType {
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+    READ_PARAM,
+    EXIT,
+    RET
+  };
+} // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+  public:
+    explicit PTXTargetLowering(TargetMachine &TM);
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual unsigned getFunctionAlignment(const Function *F) const {
+      return 2; }
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl,
+                           SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv,
+                  bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl,
+                  SelectionDAG &DAG) const;
+
+  private:
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 000000000000..e4e099987e8d
--- /dev/null
+++ b/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,24 @@
+//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops Preds, i32imm),
+                                     (ops (i1 zero_reg), (i32 0))>;
+
+let Namespace = "PTX" in {
+  class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+    : Instruction {
+      dag OutOperandList = oops;
+      dag InOperandList = !con(iops, (ins pred:$_p));
+      let AsmString = asmstr; // Predicate printing is defined elsewhere.
+      let Pattern = pattern;
+      let isPredicable = 1;
+  }
+}
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 000000000000..805759bcab1e
--- /dev/null
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,87 @@
+//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#include "PTXGenInstrInfo.inc"
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+  : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)),
+    RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+  const TargetRegisterClass *cls;
+  const int opcode;
+} map[] = {
+  { &PTX::RRegs32RegClass, PTX::MOVrr },
+  { &PTX::PredsRegClass,   PTX::MOVpp }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DstReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+    if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
+      BuildMI(MBB, I, DL,
+              get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+  llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I,
+                                unsigned DstReg, unsigned SrcReg,
+                                const TargetRegisterClass *DstRC,
+                                const TargetRegisterClass *SrcRC,
+                                DebugLoc DL) const {
+  if (DstRC != SrcRC)
+    return false;
+
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+    if (DstRC == map[i].cls) {
+      MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode),
+                                 DstReg).addReg(SrcReg);
+      if (MI->findFirstPredOperandIdx() == -1) {
+        MI->addOperand(MachineOperand::CreateReg(0, false));
+        MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0));
+      }
+      return true;
+    }
+
+  return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+                               unsigned &SrcReg, unsigned &DstReg,
+                               unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+  switch (MI.getOpcode()) {
+    default:
+      return false;
+    case PTX::MOVpp:
+    case PTX::MOVrr:
+      assert(MI.getNumOperands() >= 2 &&
+             MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+             "Invalid register-register move instruction");
+      SrcSubIdx = DstSubIdx = 0; // No sub-registers
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(1).getReg();
+      return true;
+  }
+}
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 000000000000..e7f00f09c2f1
--- /dev/null
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,75 @@
+//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class PTXInstrInfo : public TargetInstrInfoImpl {
+  private:
+    const PTXRegisterInfo RI;
+    PTXTargetMachine &TM;
+
+  public:
+    explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+    virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+    virtual void copyPhysReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I, DebugLoc DL,
+                             unsigned DstReg, unsigned SrcReg,
+                             bool KillSrc) const;
+
+    virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg,
+                              const TargetRegisterClass *DstRC,
+                              const TargetRegisterClass *SrcRC,
+                              DebugLoc DL) const;
+
+    virtual bool isMoveInstr(const MachineInstr& MI,
+                             unsigned &SrcReg, unsigned &DstReg,
+                             unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+    // static helper routines
+
+    static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                            DebugLoc dl, EVT VT,
+                                            SDValue Op1) {
+      SDValue pred_reg = DAG->getRegister(0, MVT::i1);
+      SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
+      SDValue ops[] = { Op1, pred_reg, pred_imm };
+      return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+    }
+
+    static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                            DebugLoc dl, EVT VT,
+                                            SDValue Op1,
+                                            SDValue Op2) {
+      SDValue pred_reg = DAG->getRegister(0, MVT::i1);
+      SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
+      SDValue ops[] = { Op1, Op2, pred_reg, pred_imm };
+      return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+    }
+
+  }; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 000000000000..9a747788f6a1
--- /dev/null
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,257 @@
+//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::CONSTANT;
+  return false;
+}]>;
+
+def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
+def store_global
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def store_local
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def store_parameter
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def store_shared
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+
+// Address operands
+def MEMri : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops RRegs32, i32imm);
+}
+def MEMii : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMpi : Operand<i32> {
+  let PrintMethod = "printParamOperand";
+  let MIOperandInfo = (ops i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+  : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+  : SDNode<"PTXISD::RET",  SDTNone, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+multiclass INT3<string opcstr, SDNode opnode> {
+  def rr : InstPTX<(outs RRegs32:$d),
+                   (ins RRegs32:$a, RRegs32:$b),
+                   !strconcat(opcstr, ".%type\t$d, $a, $b"),
+                   [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
+  def ri : InstPTX<(outs RRegs32:$d),
+                   (ins RRegs32:$a, i32imm:$b),
+                   !strconcat(opcstr, ".%type\t$d, $a, $b"),
+                   [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+}
+
+// no %type directive, non-communtable
+multiclass INT3ntnc<string opcstr, SDNode opnode> {
+  def rr : InstPTX<(outs RRegs32:$d),
+                   (ins RRegs32:$a, RRegs32:$b),
+                   !strconcat(opcstr, "\t$d, $a, $b"),
+                   [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
+  def ri : InstPTX<(outs RRegs32:$d),
+                   (ins RRegs32:$a, i32imm:$b),
+                   !strconcat(opcstr, "\t$d, $a, $b"),
+                   [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+  def ir : InstPTX<(outs RRegs32:$d),
+                   (ins i32imm:$a, RRegs32:$b),
+                   !strconcat(opcstr, "\t$d, $a, $b"),
+                   [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
+}
+
+multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
+  def rr : InstPTX<(outs RC:$d),
+                   (ins MEMri:$a),
+                   !strconcat(opstr, ".%type\t$d, [$a]"),
+                   [(set RC:$d, (pat_load ADDRrr:$a))]>;
+  def ri : InstPTX<(outs RC:$d),
+                   (ins MEMri:$a),
+                   !strconcat(opstr, ".%type\t$d, [$a]"),
+                   [(set RC:$d, (pat_load ADDRri:$a))]>;
+  def ii : InstPTX<(outs RC:$d),
+                   (ins MEMii:$a),
+                   !strconcat(opstr, ".%type\t$d, [$a]"),
+                   [(set RC:$d, (pat_load ADDRii:$a))]>;
+}
+
+multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
+  def rr : InstPTX<(outs),
+                   (ins RC:$d, MEMri:$a),
+                   !strconcat(opstr, ".%type\t[$a], $d"),
+                   [(pat_store RC:$d, ADDRrr:$a)]>;
+  def ri : InstPTX<(outs),
+                   (ins RC:$d, MEMri:$a),
+                   !strconcat(opstr, ".%type\t[$a], $d"),
+                   [(pat_store RC:$d, ADDRri:$a)]>;
+  def ii : InstPTX<(outs),
+                   (ins RC:$d, MEMii:$a),
+                   !strconcat(opstr, ".%type\t[$a], $d"),
+                   [(pat_store RC:$d, ADDRii:$a)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : INT3<"add", add>;
+defm SUB : INT3<"sub", sub>;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : INT3ntnc<"shl.b32", PTXshl>;
+defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s32", PTXsra>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+let neverHasSideEffects = 1 in {
+  // rely on isMoveInstr to separate MOVpp, MOVrr, etc.
+  def MOVpp
+    : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
+  def MOVrr
+    : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def MOVpi
+    : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+              [(set Preds:$d, imm:$a)]>;
+  def MOVri
+    : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
+              [(set RRegs32:$d, imm:$a)]>;
+}
+
+defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
+defm LDc : PTX_LD<"ld.const",  RRegs32, load_constant>;
+defm LDl : PTX_LD<"ld.local",  RRegs32, load_local>;
+defm LDp : PTX_LD<"ld.param",  RRegs32, load_parameter>;
+defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
+
+def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
+                   "ld.param.%type\t$d, [$a]", []>;
+
+defm STg : PTX_ST<"st.global", RRegs32, store_global>;
+defm STl : PTX_ST<"st.local",  RRegs32, store_local>;
+// Store to parameter state space requires PTX 2.0 or higher?
+// defm STp : PTX_ST<"st.param",  RRegs32, store_parameter>;
+defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
+
+///===- Control Flow Instructions -----------------------------------------===//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+  def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
+}
diff --git a/lib/Target/PTX/PTXMCAsmInfo.cpp b/lib/Target/PTX/PTXMCAsmInfo.cpp
new file mode 100644
index 000000000000..b670abdbe095
--- /dev/null
+++ b/lib/Target/PTX/PTXMCAsmInfo.cpp
@@ -0,0 +1,30 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+
+using namespace llvm;
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+  CommentString = "//";
+
+  PrivateGlobalPrefix = "$L__";
+
+  AllowPeriodsInName = false;
+
+  HasSetDirective = false;
+
+  HasDotTypeDotSizeDirective = false;
+
+  HasSingleParameterDotFile = false;
+}
diff --git a/lib/Target/PTX/PTXMCAsmInfo.h b/lib/Target/PTX/PTXMCAsmInfo.h
new file mode 100644
index 000000000000..03f5d66b3d60
--- /dev/null
+++ b/lib/Target/PTX/PTXMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct PTXMCAsmInfo : public MCAsmInfo {
+    explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 000000000000..0886ba8008f3
--- /dev/null
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,542 @@
+//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+  formatted_raw_ostream &OS;
+  const MCAsmInfo &MAI;
+  OwningPtr<MCInstPrinter> InstPrinter;
+  OwningPtr<MCCodeEmitter> Emitter;
+
+  SmallString<128> CommentToEmit;
+  raw_svector_ostream CommentStream;
+
+  unsigned IsVerboseAsm : 1;
+  unsigned ShowInst : 1;
+
+public:
+  PTXMCAsmStreamer(MCContext &Context,
+                   formatted_raw_ostream &os,
+                   bool isVerboseAsm, bool useLoc,
+                   MCInstPrinter *printer,
+                   MCCodeEmitter *emitter,
+                   bool showInst)
+    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+      InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+      IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst) {
+    if (InstPrinter && IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+  }
+
+  ~PTXMCAsmStreamer() {}
+
+  inline void EmitEOL() {
+    // If we don't have any comments, just emit a \n.
+    if (!IsVerboseAsm) {
+      OS << '\n';
+      return;
+    }
+    EmitCommentsAndEOL();
+  }
+  void EmitCommentsAndEOL();
+
+  /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+  /// all.
+  virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+  /// hasRawTextSupport - We support EmitRawText.
+  virtual bool hasRawTextSupport() const { return true; }
+
+  /// AddComment - Add a comment that can be emitted to the generated .s
+  /// file if applicable as a QoI issue to make the output of the compiler
+  /// more readable.  This only affects the MCAsmStreamer, and only when
+  /// verbose assembly output is enabled.
+  virtual void AddComment(const Twine &T);
+
+  /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+  virtual void AddEncodingComment(const MCInst &Inst);
+
+  /// GetCommentOS - Return a raw_ostream that comments can be written to.
+  /// Unlike AddComment, you are required to terminate comments with \n if you
+  /// use this method.
+  virtual raw_ostream &GetCommentOS() {
+    if (!IsVerboseAsm)
+      return nulls();  // Discard comments unless in verbose asm mode.
+    return CommentStream;
+  }
+
+  /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+  virtual void AddBlankLine() {
+    EmitEOL();
+  }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void InitSections() {}
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+  virtual void EmitThumbFunc(MCSymbol *Func);
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label);
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+
+  /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+  ///
+  /// @param Symbol - The common symbol to emit.
+  /// @param Size - The size of the common symbol.
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace);
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+  /// the specified string in the output .s file.  This capability is
+  /// indicated by the hasRawTextSupport() predicate.
+  virtual void EmitRawText(StringRef String);
+
+  virtual void Finish();
+
+  /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+  if (!IsVerboseAsm) return;
+
+  // Make sure that CommentStream is flushed.
+  CommentStream.flush();
+
+  T.toVector(CommentToEmit);
+  // Each comment goes on its own line.
+  CommentToEmit.push_back('\n');
+
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+  if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+    OS << '\n';
+    return;
+  }
+
+  CommentStream.flush();
+  StringRef Comments = CommentToEmit.str();
+
+  assert(Comments.back() == '\n' &&
+         "Comment array not newline terminated");
+  do {
+    // Emit a line of comments.
+    OS.PadToColumn(MAI.getCommentColumn());
+    size_t Position = Comments.find('\n');
+    OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+    Comments = Comments.substr(Position+1);
+  } while (!Comments.empty());
+
+  CommentToEmit.clear();
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  OS << *Symbol << MAI.getLabelSuffix();
+  EmitEOL();
+  Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  OS << *Symbol << " = " << *Value;
+  EmitEOL();
+
+  // FIXME: Lift context changes into super class.
+  Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+                                         const MCSymbol *Symbol) {
+  OS << ".weakref " << *Alias << ", " << *Symbol;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                                const MCSymbol *LastLabel,
+                                                const MCSymbol *Label) {
+  report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                           MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                        unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                    unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+                                      MCSymbol *Symbol,
+                                      uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+  OS << '"';
+
+  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+    unsigned char C = Data[i];
+    if (C == '"' || C == '\\') {
+      OS << '\\' << (char)C;
+      continue;
+    }
+
+    if (isprint((unsigned char)C)) {
+      OS << (char)C;
+      continue;
+    }
+
+    switch (C) {
+      case '\b': OS << "\\b"; break;
+      case '\f': OS << "\\f"; break;
+      case '\n': OS << "\\n"; break;
+      case '\r': OS << "\\r"; break;
+      case '\t': OS << "\\t"; break;
+      default:
+        OS << '\\';
+        OS << toOctal(C >> 6);
+        OS << toOctal(C >> 3);
+        OS << toOctal(C >> 0);
+        break;
+    }
+  }
+
+  OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  if (Data.empty()) return;
+
+  if (Data.size() == 1) {
+    OS << MAI.getData8bitsDirective(AddrSpace);
+    OS << (unsigned)(unsigned char)Data[0];
+    EmitEOL();
+    return;
+  }
+
+  // If the data ends with 0 and the target supports .asciz, use it, otherwise
+  // use .ascii
+  if (MAI.getAscizDirective() && Data.back() == 0) {
+    OS << MAI.getAscizDirective();
+    Data = Data.substr(0, Data.size()-1);
+  } else {
+    OS << MAI.getAsciiDirective();
+  }
+
+  OS << ' ';
+  PrintQuotedString(Data, OS);
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                     bool isPCRel, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(!isPCRel && "Cannot emit pc relative relocations!");
+  const char *Directive = 0;
+  switch (Size) {
+  default: break;
+  case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+  case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+  case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+  case 8:
+    Directive = MAI.getData64bitsDirective(AddrSpace);
+    // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+    if (Directive) break;
+    int64_t IntValue;
+    if (!Value->EvaluateAsAbsolute(IntValue))
+      report_fatal_error("Don't know how to emit this value.");
+    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+    } else {
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+    }
+    return;
+  }
+
+  assert(Directive && "Invalid size for machine code value!");
+  OS << Directive << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  assert(MAI.hasLEB128() && "Cannot print a .uleb");
+  OS << ".uleb128 " << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  assert(MAI.hasLEB128() && "Cannot print a .sleb");
+  OS << ".sleb128 " << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  assert(MAI.getGPRel32Directive() != 0);
+  OS << MAI.getGPRel32Directive() << *Value;
+  EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                                unsigned AddrSpace) {
+  if (NumBytes == 0) return;
+
+  if (AddrSpace == 0)
+    if (const char *ZeroDirective = MAI.getZeroDirective()) {
+      OS << ZeroDirective << NumBytes;
+      if (FillValue != 0)
+        OS << ',' << (int)FillValue;
+      EmitEOL();
+      return;
+    }
+
+  // Emit a byte at a time.
+  MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                            unsigned ValueSize,
+                                            unsigned MaxBytesToEmit) {
+  // Some assemblers don't support non-power of two alignments, so we always
+  // emit alignments as a power of two if possible.
+  if (isPowerOf2_32(ByteAlignment)) {
+    switch (ValueSize) {
+    default: llvm_unreachable("Invalid size for machine code value!");
+    case 1: OS << MAI.getAlignDirective(); break;
+    // FIXME: use MAI for this!
+    case 2: OS << ".p2alignw "; break;
+    case 4: OS << ".p2alignl "; break;
+    case 8: llvm_unreachable("Unsupported alignment size!");
+    }
+
+    if (MAI.getAlignmentIsInBytes())
+      OS << ByteAlignment;
+    else
+      OS << Log2_32(ByteAlignment);
+
+    if (Value || MaxBytesToEmit) {
+      OS << ", 0x";
+      OS.write_hex(truncateToSize(Value, ValueSize));
+
+      if (MaxBytesToEmit)
+        OS << ", " << MaxBytesToEmit;
+    }
+    EmitEOL();
+    return;
+  }
+
+  // Non-power of two alignment.  This is not widely supported by assemblers.
+  // FIXME: Parameterize this based on MAI.
+  switch (ValueSize) {
+  default: llvm_unreachable("Invalid size for machine code value!");
+  case 1: OS << ".balign";  break;
+  case 2: OS << ".balignw"; break;
+  case 4: OS << ".balignl"; break;
+  case 8: llvm_unreachable("Unsupported alignment size!");
+  }
+
+  OS << ' ' << ByteAlignment;
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit)
+    OS << ", " << MaxBytesToEmit;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                         unsigned MaxBytesToEmit) {}
+
+void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                         unsigned char Value) {}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+  assert(MAI.hasSingleParameterDotFile());
+  OS << "\t.file\t";
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                              StringRef Filename){
+  OS << "\t.file\t" << FileNo << ' ';
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+  // Show the encoding in a comment if we have a code emitter.
+  if (Emitter)
+    AddEncodingComment(Inst);
+
+  // Show the MCInst if enabled.
+  if (ShowInst) {
+    Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+    GetCommentOS() << "\n";
+  }
+
+  // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+  if (InstPrinter)
+    InstPrinter->printInst(&Inst, OS);
+  else
+    Inst.print(OS, &MAI);
+  EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+  if (!String.empty() && String.back() == '\n')
+    String = String.substr(0, String.size()-1);
+  OS << String;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::Finish() {}
+
+namespace llvm {
+  MCStreamer *createPTXAsmStreamer(MCContext &Context,
+                                   formatted_raw_ostream &OS,
+                                   bool isVerboseAsm, bool useLoc,
+                                   MCInstPrinter *IP,
+                                   MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                   bool ShowInst) {
+    return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+                                IP, CE, ShowInst);
+  }
+}
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 000000000000..b37c740006f9
--- /dev/null
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,96 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace llvm {
+  /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+  /// function information for PTXAsmPrinter
+  ///
+  class PTXMFInfoExtract : public MachineFunctionPass {
+    private:
+      static char ID;
+
+    public:
+      PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+        : MachineFunctionPass(ID) {}
+
+      virtual bool runOnMachineFunction(MachineFunction &MF);
+
+      virtual const char *getPassName() const {
+        return "PTX Machine Function Info Extractor";
+      }
+  }; // class PTXMFInfoExtract
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
+
+  unsigned retreg = MFI->retReg();
+
+  DEBUG(dbgs()
+        << "PTX::NoRegister == " << PTX::NoRegister << "\n"
+        << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
+
+  DEBUG(for (unsigned reg = PTX::NoRegister + 1;
+             reg < PTX::NUM_TARGET_REGS; ++reg)
+          if (MRI.isPhysRegUsed(reg))
+            dbgs() << "Used Reg: " << reg << "\n";);
+
+  // FIXME: This is a slow linear scanning
+  for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
+    if (MRI.isPhysRegUsed(reg) &&
+        reg != retreg &&
+        (MFI->isKernel() || !MFI->isArgReg(reg)))
+      MFI->addLocalVarReg(reg);
+
+  // Notify MachineFunctionInfo that I've done adding local var reg
+  MFI->doneAddLocalVar();
+
+  DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->argRegBegin(), e = MFI->argRegEnd();
+	     i != e; ++i)
+        dbgs() << "Arg Reg: " << *i << "\n";);
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
+	     i != e; ++i)
+        dbgs() << "Local Var Reg: " << *i << "\n";);
+
+  return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+                                           CodeGenOpt::Level OptLevel) {
+  return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 000000000000..56d044b5fc0d
--- /dev/null
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,79 @@
+//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+  bool is_kernel;
+  std::vector<unsigned> reg_arg, reg_local_var;
+  unsigned reg_ret;
+  bool _isDoneAddArg;
+
+public:
+  PTXMachineFunctionInfo(MachineFunction &MF)
+    : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
+      reg_arg.reserve(8);
+      reg_local_var.reserve(32);
+    }
+
+  void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
+
+  void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
+  void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
+  void setRetReg(unsigned reg) { reg_ret = reg; }
+
+  void doneAddArg(void) {
+    std::sort(reg_arg.begin(), reg_arg.end());
+    _isDoneAddArg = true;
+  }
+  void doneAddLocalVar(void) {
+    std::sort(reg_local_var.begin(), reg_local_var.end());
+  }
+
+  bool isDoneAddArg(void) { return _isDoneAddArg; }
+
+  bool isKernel() const { return is_kernel; }
+
+  typedef std::vector<unsigned>::const_iterator reg_iterator;
+
+  bool argRegEmpty() const { return reg_arg.empty(); }
+  int getNumArg() const { return reg_arg.size(); }
+  reg_iterator argRegBegin() const { return reg_arg.begin(); }
+  reg_iterator argRegEnd()   const { return reg_arg.end(); }
+
+  bool localVarRegEmpty() const { return reg_local_var.empty(); }
+  reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
+  reg_iterator localVarRegEnd()   const { return reg_local_var.end(); }
+
+  unsigned retReg() const { return reg_ret; }
+
+  bool isArgReg(unsigned reg) const {
+    return std::binary_search(reg_arg.begin(), reg_arg.end(), reg);
+  }
+
+  bool isLocalVarReg(unsigned reg) const {
+    return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg);
+  }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
new file mode 100644
index 000000000000..0f3e7bc2c3a7
--- /dev/null
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -0,0 +1,19 @@
+//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXRegisterInfo.h"
+
+using namespace llvm;
+
+#include "PTXGenRegisterInfo.inc"
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 000000000000..67e130f153d5
--- /dev/null
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,63 @@
+//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#include "PTXGenRegisterInfo.h.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+  PTXRegisterInfo(PTXTargetMachine &TM,
+                  const TargetInstrInfo &TII) {}
+
+  virtual const unsigned
+    *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+    static const unsigned CalleeSavedRegs[] = { 0 };
+    return CalleeSavedRegs; // save nothing
+  }
+
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+    BitVector Reserved(getNumRegs());
+    return Reserved; // reserve no regs
+  }
+
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                   int SPAdj,
+                                   RegScavenger *RS = NULL) const {
+    llvm_unreachable("PTX does not support general function call");
+  }
+
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+    llvm_unreachable("PTX does not have a frame register");
+    return 0;
+  }
+
+  virtual unsigned getRARegister() const {
+    llvm_unreachable("PTX does not have a return address register");
+    return 0;
+  }
+
+  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+    return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+  }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 000000000000..22e2b343a0e5
--- /dev/null
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,102 @@
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+  let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+def P0  : PTXReg<"p0">;
+def P1  : PTXReg<"p1">;
+def P2  : PTXReg<"p2">;
+def P3  : PTXReg<"p3">;
+def P4  : PTXReg<"p4">;
+def P5  : PTXReg<"p5">;
+def P6  : PTXReg<"p6">;
+def P7  : PTXReg<"p7">;
+def P8  : PTXReg<"p8">;
+def P9  : PTXReg<"p9">;
+def P10 : PTXReg<"p10">;
+def P11 : PTXReg<"p11">;
+def P12 : PTXReg<"p12">;
+def P13 : PTXReg<"p13">;
+def P14 : PTXReg<"p14">;
+def P15 : PTXReg<"p15">;
+def P16 : PTXReg<"p16">;
+def P17 : PTXReg<"p17">;
+def P18 : PTXReg<"p18">;
+def P19 : PTXReg<"p19">;
+def P20 : PTXReg<"p20">;
+def P21 : PTXReg<"p21">;
+def P22 : PTXReg<"p22">;
+def P23 : PTXReg<"p23">;
+def P24 : PTXReg<"p24">;
+def P25 : PTXReg<"p25">;
+def P26 : PTXReg<"p26">;
+def P27 : PTXReg<"p27">;
+def P28 : PTXReg<"p28">;
+def P29 : PTXReg<"p29">;
+def P30 : PTXReg<"p30">;
+def P31 : PTXReg<"p31">;
+
+def R0  : PTXReg<"r0">;
+def R1  : PTXReg<"r1">;
+def R2  : PTXReg<"r2">;
+def R3  : PTXReg<"r3">;
+def R4  : PTXReg<"r4">;
+def R5  : PTXReg<"r5">;
+def R6  : PTXReg<"r6">;
+def R7  : PTXReg<"r7">;
+def R8  : PTXReg<"r8">;
+def R9  : PTXReg<"r9">;
+def R10 : PTXReg<"r10">;
+def R11 : PTXReg<"r11">;
+def R12 : PTXReg<"r12">;
+def R13 : PTXReg<"r13">;
+def R14 : PTXReg<"r14">;
+def R15 : PTXReg<"r15">;
+def R16 : PTXReg<"r16">;
+def R17 : PTXReg<"r17">;
+def R18 : PTXReg<"r18">;
+def R19 : PTXReg<"r19">;
+def R20 : PTXReg<"r20">;
+def R21 : PTXReg<"r21">;
+def R22 : PTXReg<"r22">;
+def R23 : PTXReg<"r23">;
+def R24 : PTXReg<"r24">;
+def R25 : PTXReg<"r25">;
+def R26 : PTXReg<"r26">;
+def R27 : PTXReg<"r27">;
+def R28 : PTXReg<"r28">;
+def R29 : PTXReg<"r29">;
+def R30 : PTXReg<"r30">;
+def R31 : PTXReg<"r31">;
+
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
+
+def Preds : RegisterClass<"PTX", [i1], 8,
+                          [P0, P1, P2, P3, P4, P5, P6, P7,
+                           P8, P9, P10, P11, P12, P13, P14, P15,
+                           P16, P17, P18, P19, P20, P21, P22, P23,
+                           P24, P25, P26, P27, P28, P29, P30, P31]>;
+
+def RRegs32 : RegisterClass<"PTX", [i32], 32,
+                            [R0, R1, R2, R3, R4, R5, R6, R7,
+                             R8, R9, R10, R11, R12, R13, R14, R15,
+                             R16, R17, R18, R19, R20, R21, R22, R23,
+                             R24, R25, R26, R27, R28, R29, R30, R31]>;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 000000000000..00e2c882a5ca
--- /dev/null
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,23 @@
+//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+
+using namespace llvm;
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
+  std::string TARGET = "sm_20";
+  // TODO: call ParseSubtargetFeatures(FS, TARGET);
+}
+
+#include "PTXGenSubtarget.inc"
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 000000000000..7fd85f873ae4
--- /dev/null
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,32 @@
+//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+namespace llvm {
+  class PTXSubtarget : public TargetSubtarget {
+    private:
+      bool is_sm20;
+
+    public:
+      PTXSubtarget(const std::string &TT, const std::string &FS);
+
+      std::string ParseSubtargetFeatures(const std::string &FS,
+                                         const std::string &CPU);
+  }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 000000000000..b263813cb4e7
--- /dev/null
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,60 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXMCAsmInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace llvm {
+  MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+                                   bool isVerboseAsm, bool useLoc,
+                                   MCInstPrinter *InstPrint,
+                                   MCCodeEmitter *CE,
+                                   TargetAsmBackend *TAB,
+                                   bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+  RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget);
+  RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget);
+  TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer);
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+                                   const std::string &TT,
+                                   const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"),
+    FrameLowering(Subtarget),
+    InstrInfo(*this),
+    TLInfo(*this),
+    Subtarget(TT, FS) {
+}
+
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createPTXISelDag(*this, OptLevel));
+  return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // PTXMFInfoExtract must after register allocation!
+  PM.add(createPTXMFInfoExtract(*this, OptLevel));
+  return false;
+}
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 000000000000..728e36f56f01
--- /dev/null
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,60 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+  private:
+    const TargetData DataLayout;
+    PTXFrameLowering FrameLowering;
+    PTXInstrInfo InstrInfo;
+    PTXTargetLowering TLInfo;
+    PTXSubtarget Subtarget;
+
+  public:
+    PTXTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+
+    virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+    virtual const TargetFrameLowering *getFrameLowering() const {
+      return &FrameLowering;
+    }
+
+    virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+    virtual const TargetRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo(); }
+
+    virtual const PTXTargetLowering *getTargetLowering() const {
+      return &TLInfo; }
+
+    virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    virtual bool addPostRegAlloc(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+}; // class PTXTargetMachine
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..4b09cf5ce099
--- /dev/null
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPTXInfo
+  PTXTargetInfo.cpp
+  )
+
+add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/lib/Target/PTX/TargetInfo/Makefile b/lib/Target/PTX/TargetInfo/Makefile
new file mode 100644
index 000000000000..8619785889aa
--- /dev/null
+++ b/lib/Target/PTX/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 000000000000..a577d7755af5
--- /dev/null
+++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTXTarget;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+  // see llvm/ADT/Triple.h
+  RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX");
+}
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 42cd4862a98f..000000000000
--- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPowerPCAsmPrinter
-  PPCAsmPrinter.cpp
-  )
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile
deleted file mode 100644
index bd5dce12dc43..000000000000
--- a/lib/Target/PowerPC/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPowerPCAsmPrinter
-
-# Hack: we need to include 'main' PowerPC target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
deleted file mode 100644
index c1a5663be931..000000000000
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ /dev/null
@@ -1,922 +0,0 @@
-//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to PowerPC assembly language. This printer is
-// the output mechanism used by `llc'.
-//
-// Documentation at http://developer.apple.com/documentation/DeveloperTools/
-// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asmprinter"
-#include "PPC.h"
-#include "PPCPredicates.h"
-#include "PPCTargetMachine.h"
-#include "PPCSubtarget.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-namespace {
-  class PPCAsmPrinter : public AsmPrinter {
-  protected:
-    DenseMap<MCSymbol*, MCSymbol*> TOC;
-    const PPCSubtarget &Subtarget;
-    uint64_t LabelID;
-  public:
-    explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer),
-        Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {}
-
-    virtual const char *getPassName() const {
-      return "PowerPC Assembly Printer";
-    }
-
-    PPCTargetMachine &getTM() {
-      return static_cast<PPCTargetMachine&>(TM);
-    }
-
-    unsigned enumRegToMachineReg(unsigned enumReg) {
-      switch (enumReg) {
-      default: llvm_unreachable("Unhandled register!");
-      case PPC::CR0:  return  0;
-      case PPC::CR1:  return  1;
-      case PPC::CR2:  return  2;
-      case PPC::CR3:  return  3;
-      case PPC::CR4:  return  4;
-      case PPC::CR5:  return  5;
-      case PPC::CR6:  return  6;
-      case PPC::CR7:  return  7;
-      }
-      llvm_unreachable(0);
-    }
-
-    /// printInstruction - This method is automatically generated by tablegen
-    /// from the instruction set description.  This method returns true if the
-    /// machine instruction was sufficiently described to print it, otherwise it
-    /// returns false.
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    static const char *getRegisterName(unsigned RegNo);
-
-
-    virtual void EmitInstruction(const MachineInstr *MI);
-    void printOp(const MachineOperand &MO, raw_ostream &O);
-
-    /// stripRegisterPrefix - This method strips the character prefix from a
-    /// register name so that only the number is left.  Used by for linux asm.
-    const char *stripRegisterPrefix(const char *RegName) {
-      switch (RegName[0]) {
-      case 'r':
-      case 'f':
-      case 'v': return RegName + 1;
-      case 'c': if (RegName[1] == 'r') return RegName + 2;
-      }
-
-      return RegName;
-    }
-
-    /// printRegister - Print register according to target requirements.
-    ///
-    void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){
-      unsigned RegNo = MO.getReg();
-      assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
-
-      // If we should use 0 for R0.
-      if (R0AsZero && RegNo == PPC::R0) {
-        O << "0";
-        return;
-      }
-
-      const char *RegName = getRegisterName(RegNo);
-      // Linux assembler (Others?) does not take register mnemonics.
-      // FIXME - What about special registers used in mfspr/mtspr?
-      if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
-      O << RegName;
-    }
-
-    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      if (MO.isReg()) {
-        printRegister(MO, false, O);
-      } else if (MO.isImm()) {
-        O << MO.getImm();
-      } else {
-        printOp(MO, O);
-      }
-    }
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O);
-
-
-    void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo,
-                           raw_ostream &O) {
-      char value = MI->getOperand(OpNo).getImm();
-      value = (value << (32-5)) >> (32-5);
-      O << (int)value;
-    }
-    void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo,
-                           raw_ostream &O) {
-      unsigned char value = MI->getOperand(OpNo).getImm();
-      assert(value <= 31 && "Invalid u5imm argument!");
-      O << (unsigned int)value;
-    }
-    void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo,
-                           raw_ostream &O) {
-      unsigned char value = MI->getOperand(OpNo).getImm();
-      assert(value <= 63 && "Invalid u6imm argument!");
-      O << (unsigned int)value;
-    }
-    void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, 
-                            raw_ostream &O) {
-      O << (short)MI->getOperand(OpNo).getImm();
-    }
-    void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo,
-                            raw_ostream &O) {
-      O << (unsigned short)MI->getOperand(OpNo).getImm();
-    }
-    void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo,
-                              raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        O << (short)(MI->getOperand(OpNo).getImm()*4);
-      } else {
-        O << "lo16(";
-        printOp(MI->getOperand(OpNo), O);
-        if (TM.getRelocationModel() == Reloc::PIC_)
-          O << "-\"L" << getFunctionNumber() << "$pb\")";
-        else
-          O << ')';
-      }
-    }
-    void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
-                            raw_ostream &O) {
-      // Branches can take an immediate operand.  This is used by the branch
-      // selection pass to print $+8, an eight byte displacement from the PC.
-      if (MI->getOperand(OpNo).isImm()) {
-        O << "$+" << MI->getOperand(OpNo).getImm()*4;
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-      }
-    }
-    void printCallOperand(const MachineInstr *MI, unsigned OpNo,
-                          raw_ostream &O) {
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      if (TM.getRelocationModel() != Reloc::Static) {
-        if (MO.isGlobal()) {
-          const GlobalValue *GV = MO.getGlobal();
-          if (GV->isDeclaration() || GV->isWeakForLinker()) {
-            // Dynamically-resolved functions need a stub for the function.
-            MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
-            MachineModuleInfoImpl::StubValueTy &StubSym =
-              MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
-            if (StubSym.getPointer() == 0)
-              StubSym = MachineModuleInfoImpl::
-                StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
-            O << *Sym;
-            return;
-          }
-        }
-        if (MO.isSymbol()) {
-          SmallString<128> TempNameStr;
-          TempNameStr += StringRef(MO.getSymbolName());
-          TempNameStr += StringRef("$stub");
-          
-          MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
-          MachineModuleInfoImpl::StubValueTy &StubSym =
-            MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
-          if (StubSym.getPointer() == 0)
-            StubSym = MachineModuleInfoImpl::
-              StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
-          O << *Sym;
-          return;
-        }
-      }
-
-      printOp(MI->getOperand(OpNo), O);
-    }
-    void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
-                             raw_ostream &O) {
-     O << (int)MI->getOperand(OpNo).getImm()*4;
-    }
-    void printPICLabel(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      O << "\"L" << getFunctionNumber() << "$pb\"\n";
-      O << "\"L" << getFunctionNumber() << "$pb\":";
-    }
-    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        if (Subtarget.isDarwin()) O << "ha16(";
-        printOp(MI->getOperand(OpNo), O);
-        if (TM.getRelocationModel() == Reloc::PIC_)
-          O << "-\"L" << getFunctionNumber() << "$pb\"";
-        if (Subtarget.isDarwin())
-          O << ')';
-        else
-          O << "@ha";
-      }
-    }
-    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        if (Subtarget.isDarwin()) O << "lo16(";
-        printOp(MI->getOperand(OpNo), O);
-        if (TM.getRelocationModel() == Reloc::PIC_)
-          O << "-\"L" << getFunctionNumber() << "$pb\"";
-        if (Subtarget.isDarwin())
-          O << ')';
-        else
-          O << "@l";
-      }
-    }
-    void printcrbitm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      unsigned CCReg = MI->getOperand(OpNo).getReg();
-      unsigned RegNo = enumRegToMachineReg(CCReg);
-      O << (0x80 >> RegNo);
-    }
-    // The new addressing mode printers.
-    void printMemRegImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      printSymbolLo(MI, OpNo, O);
-      O << '(';
-      if (MI->getOperand(OpNo+1).isReg() &&
-          MI->getOperand(OpNo+1).getReg() == PPC::R0)
-        O << "0";
-      else
-        printOperand(MI, OpNo+1, O);
-      O << ')';
-    }
-    void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo,
-                               raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm())
-        printS16X4ImmOperand(MI, OpNo, O);
-      else
-        printSymbolLo(MI, OpNo, O);
-      O << '(';
-      if (MI->getOperand(OpNo+1).isReg() &&
-          MI->getOperand(OpNo+1).getReg() == PPC::R0)
-        O << "0";
-      else
-        printOperand(MI, OpNo+1, O);
-      O << ')';
-    }
-
-    void printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // When used as the base register, r0 reads constant zero rather than
-      // the value contained in the register.  For this reason, the darwin
-      // assembler requires that we print r0 as 0 (no r) when used as the base.
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      printRegister(MO, true, O);
-      O << ", ";
-      printOperand(MI, OpNo+1, O);
-    }
-
-    void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo,
-                            raw_ostream &O) {
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      assert(MO.isGlobal());
-      MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
-
-      // Map symbol -> label of TOC entry.
-      MCSymbol *&TOCEntry = TOC[Sym];
-      if (TOCEntry == 0)
-        TOCEntry = OutContext.
-          GetOrCreateSymbol(StringRef(MAI->getPrivateGlobalPrefix()) +
-                            "C" + Twine(LabelID++));
-
-      O << *TOCEntry << "@toc";
-    }
-
-    void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
-                               raw_ostream &O, const char *Modifier);
-
-    MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
-
-      MachineLocation Location;
-      assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
-      // Frame address.  Currently handles register +- offset only.
-      if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
-        Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
-      else {
-        DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
-      }
-      return Location;
-    }
-  };
-
-  /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
-  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
-  public:
-    explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : PPCAsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Linux PPC Assembly Printer";
-    }
-
-    bool doFinalization(Module &M);
-
-    virtual void EmitFunctionEntryLabel();
-  };
-
-  /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
-  /// OS X
-  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
-  public:
-    explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : PPCAsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Darwin PPC Assembly Printer";
-    }
-
-    bool doFinalization(Module &M);
-    void EmitStartOfAsmFile(Module &M);
-
-    void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
-  };
-} // end of anonymous namespace
-
-// Include the auto-generated portion of the assembly writer
-#include "PPCGenAsmWriter.inc"
-
-void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
-  switch (MO.getType()) {
-  case MachineOperand::MO_Immediate:
-    llvm_unreachable("printOp() does not handle immediate values");
-
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    // FIXME: PIC relocation model
-    return;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-  case MachineOperand::MO_BlockAddress:
-    O << *GetBlockAddressSymbol(MO.getBlockAddress());
-    return;
-  case MachineOperand::MO_ExternalSymbol: {
-    // Computing the address of an external symbol, not calling it.
-    if (TM.getRelocationModel() == Reloc::Static) {
-      O << *GetExternalSymbolSymbol(MO.getSymbolName());
-      return;
-    }
-
-    MCSymbol *NLPSym = 
-      OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
-                                   MO.getSymbolName()+"$non_lazy_ptr");
-    MachineModuleInfoImpl::StubValueTy &StubSym = 
-      MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
-    if (StubSym.getPointer() == 0)
-      StubSym = MachineModuleInfoImpl::
-        StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
-    
-    O << *NLPSym;
-    return;
-  }
-  case MachineOperand::MO_GlobalAddress: {
-    // Computing the address of a global symbol, not calling it.
-    const GlobalValue *GV = MO.getGlobal();
-    MCSymbol *SymToPrint;
-
-    // External or weakly linked global variables need non-lazily-resolved stubs
-    if (TM.getRelocationModel() != Reloc::Static &&
-        (GV->isDeclaration() || GV->isWeakForLinker())) {
-      if (!GV->hasHiddenVisibility()) {
-        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        MachineModuleInfoImpl::StubValueTy &StubSym = 
-          MMI->getObjFileInfo<MachineModuleInfoMachO>()
-            .getGVStubEntry(SymToPrint);
-        if (StubSym.getPointer() == 0)
-          StubSym = MachineModuleInfoImpl::
-            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
-      } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
-                 GV->hasAvailableExternallyLinkage()) {
-        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        
-        MachineModuleInfoImpl::StubValueTy &StubSym = 
-          MMI->getObjFileInfo<MachineModuleInfoMachO>().
-                    getHiddenGVStubEntry(SymToPrint);
-        if (StubSym.getPointer() == 0)
-          StubSym = MachineModuleInfoImpl::
-            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
-      } else {
-        SymToPrint = Mang->getSymbol(GV);
-      }
-    } else {
-      SymToPrint = Mang->getSymbol(GV);
-    }
-    
-    O << *SymToPrint;
-
-    printOffset(MO.getOffset(), O);
-    return;
-  }
-
-  default:
-    O << "<unknown operand type: " << MO.getType() << ">";
-    return;
-  }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned AsmVariant,
-                                    const char *ExtraCode, raw_ostream &O) {
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default: return true;  // Unknown modifier.
-    case 'c': // Don't print "$" before a global var name or constant.
-      // PPC never has a prefix.
-      printOperand(MI, OpNo, O);
-      return false;
-    case 'L': // Write second word of DImode reference.
-      // Verify that this operand has two consecutive registers.
-      if (!MI->getOperand(OpNo).isReg() ||
-          OpNo+1 == MI->getNumOperands() ||
-          !MI->getOperand(OpNo+1).isReg())
-        return true;
-      ++OpNo;   // Return the high-part.
-      break;
-    case 'I':
-      // Write 'i' if an integer constant, otherwise nothing.  Used to print
-      // addi vs add, etc.
-      if (MI->getOperand(OpNo).isImm())
-        O << "i";
-      return false;
-    }
-  }
-
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-// At the moment, all inline asm memory operands are a single register.
-// In any case, the output of this routine should always be just one
-// assembler operand.
-
-bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                                          unsigned AsmVariant,
-                                          const char *ExtraCode,
-                                          raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-  assert (MI->getOperand(OpNo).isReg());
-  O << "0(";
-  printOperand(MI, OpNo, O);
-  O << ")";
-  return false;
-}
-
-void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
-                                          raw_ostream &O, const char *Modifier){
-  assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
-  unsigned Code = MI->getOperand(OpNo).getImm();
-  if (!strcmp(Modifier, "cc")) {
-    switch ((PPC::Predicate)Code) {
-    case PPC::PRED_ALWAYS: return; // Don't print anything for always.
-    case PPC::PRED_LT: O << "lt"; return;
-    case PPC::PRED_LE: O << "le"; return;
-    case PPC::PRED_EQ: O << "eq"; return;
-    case PPC::PRED_GE: O << "ge"; return;
-    case PPC::PRED_GT: O << "gt"; return;
-    case PPC::PRED_NE: O << "ne"; return;
-    case PPC::PRED_UN: O << "un"; return;
-    case PPC::PRED_NU: O << "nu"; return;
-    }
-
-  } else {
-    assert(!strcmp(Modifier, "reg") &&
-           "Need to specify 'cc' or 'reg' as predicate op modifier!");
-    // Don't print the register for 'always'.
-    if (Code == PPC::PRED_ALWAYS) return;
-    printOperand(MI, OpNo+1, O);
-  }
-}
-
-
-/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
-/// the current output stream.
-///
-void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  SmallString<128> Str;
-  raw_svector_ostream O(Str);
-
-  if (MI->getOpcode() == TargetOpcode::DBG_VALUE) {
-    unsigned NOps = MI->getNumOperands();
-    assert(NOps==4);
-    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
-    // cast away const; DIetc do not take const operands for some reason.
-    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
-    O << V.getName();
-    O << " <- ";
-    // Frame address.  Currently handles register +- offset only.
-    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
-    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
-    O << ']';
-    O << "+";
-    printOperand(MI, NOps-2, O);
-    OutStreamer.EmitRawText(O.str());
-    return;
-  }
-  // Check for slwi/srwi mnemonics.
-  if (MI->getOpcode() == PPC::RLWINM) {
-    unsigned char SH = MI->getOperand(2).getImm();
-    unsigned char MB = MI->getOperand(3).getImm();
-    unsigned char ME = MI->getOperand(4).getImm();
-    bool useSubstituteMnemonic = false;
-    if (SH <= 31 && MB == 0 && ME == (31-SH)) {
-      O << "\tslwi "; useSubstituteMnemonic = true;
-    }
-    if (SH <= 31 && MB == (32-SH) && ME == 31) {
-      O << "\tsrwi "; useSubstituteMnemonic = true;
-      SH = 32-SH;
-    }
-    if (useSubstituteMnemonic) {
-      printOperand(MI, 0, O);
-      O << ", ";
-      printOperand(MI, 1, O);
-      O << ", " << (unsigned int)SH;
-      OutStreamer.EmitRawText(O.str());
-      return;
-    }
-  }
-  
-  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
-      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
-    O << "\tmr ";
-    printOperand(MI, 0, O);
-    O << ", ";
-    printOperand(MI, 1, O);
-    OutStreamer.EmitRawText(O.str());
-    return;
-  }
-  
-  if (MI->getOpcode() == PPC::RLDICR) {
-    unsigned char SH = MI->getOperand(2).getImm();
-    unsigned char ME = MI->getOperand(3).getImm();
-    // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
-    if (63-SH == ME) {
-      O << "\tsldi ";
-      printOperand(MI, 0, O);
-      O << ", ";
-      printOperand(MI, 1, O);
-      O << ", " << (unsigned int)SH;
-      OutStreamer.EmitRawText(O.str());
-      return;
-    }
-  }
-
-  printInstruction(MI, O);
-  OutStreamer.EmitRawText(O.str());
-}
-
-void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
-  if (!Subtarget.isPPC64())  // linux/ppc32 - Normal entry label.
-    return AsmPrinter::EmitFunctionEntryLabel();
-    
-  // Emit an official procedure descriptor.
-  // FIXME 64-bit SVR4: Use MCSection here!
-  OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
-  OutStreamer.EmitRawText(StringRef("\t.align 3"));
-  OutStreamer.EmitLabel(CurrentFnSym);
-  OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
-                          ",.TOC.@tocbase");
-  OutStreamer.EmitRawText(StringRef("\t.previous"));
-  OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
-}
-
-
-bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  const TargetData *TD = TM.getTargetData();
-
-  bool isPPC64 = TD->getPointerSizeInBits() == 64;
-
-  if (isPPC64 && !TOC.empty()) {
-    // FIXME 64-bit SVR4: Use MCSection here?
-    OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
-
-    // FIXME: This is nondeterminstic!
-    for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
-         E = TOC.end(); I != E; ++I) {
-      OutStreamer.EmitLabel(I->second);
-      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
-                              "[TC]," + I->first->getName());
-    }
-  }
-
-  return AsmPrinter::doFinalization(M);
-}
-
-void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  static const char *const CPUDirectives[] = {
-    "",
-    "ppc",
-    "ppc601",
-    "ppc602",
-    "ppc603",
-    "ppc7400",
-    "ppc750",
-    "ppc970",
-    "ppc64"
-  };
-
-  unsigned Directive = Subtarget.getDarwinDirective();
-  if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
-    Directive = PPC::DIR_970;
-  if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
-    Directive = PPC::DIR_7400;
-  if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
-    Directive = PPC::DIR_64;
-  assert(Directive <= PPC::DIR_64 && "Directive out of range.");
-  OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
-
-  // Prime text sections so they are adjacent.  This reduces the likelihood a
-  // large data or debug section causes a branch to exceed 16M limit.
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-  OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    OutStreamer.SwitchSection(
-           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
-                                      MCSectionMachO::S_SYMBOL_STUBS |
-                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      32, SectionKind::getText()));
-  } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
-    OutStreamer.SwitchSection(
-           OutContext.getMachOSection("__TEXT","__symbol_stub1",
-                                      MCSectionMachO::S_SYMBOL_STUBS |
-                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      16, SectionKind::getText()));
-  }
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
-}
-
-static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
-  // Remove $stub suffix, add $lazy_ptr.
-  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
-  TmpStr += "$lazy_ptr";
-  return Ctx.GetOrCreateSymbol(TmpStr.str());
-}
-
-static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
-  // Add $tmp suffix to $stub, yielding $stub$tmp.
-  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
-  TmpStr += "$tmp";
-  return Ctx.GetOrCreateSymbol(TmpStr.str());
-}
-
-void PPCDarwinAsmPrinter::
-EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
-  
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-
-  // .lazy_symbol_pointer
-  const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
-  
-  // Output stubs for dynamically-linked functions
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    const MCSection *StubSection = 
-    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
-                               MCSectionMachO::S_SYMBOL_STUBS |
-                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                               32, SectionKind::getText());
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      OutStreamer.SwitchSection(StubSection);
-      EmitAlignment(4);
-      
-      MCSymbol *Stub = Stubs[i].first;
-      MCSymbol *RawSym = Stubs[i].second.getPointer();
-      MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
-      MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
-                                           
-      OutStreamer.EmitLabel(Stub);
-      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-      // FIXME: MCize this.
-      OutStreamer.EmitRawText(StringRef("\tmflr r0"));
-      OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
-      OutStreamer.EmitLabel(AnonSymbol);
-      OutStreamer.EmitRawText(StringRef("\tmflr r11"));
-      OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
-                              "-" + AnonSymbol->getName() + ")");
-      OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
-      
-      if (isPPC64)
-        OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
-                                "-" + AnonSymbol->getName() + ")(r11)");
-      else
-        OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
-                                "-" + AnonSymbol->getName() + ")(r11)");
-      OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
-      OutStreamer.EmitRawText(StringRef("\tbctr"));
-      
-      OutStreamer.SwitchSection(LSPSection);
-      OutStreamer.EmitLabel(LazyPtr);
-      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-      
-      if (isPPC64)
-        OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
-      else
-        OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
-    }
-    OutStreamer.AddBlankLine();
-    return;
-  }
-  
-  const MCSection *StubSection =
-    OutContext.getMachOSection("__TEXT","__symbol_stub1",
-                               MCSectionMachO::S_SYMBOL_STUBS |
-                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                               16, SectionKind::getText());
-  for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-    MCSymbol *Stub = Stubs[i].first;
-    MCSymbol *RawSym = Stubs[i].second.getPointer();
-    MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
-
-    OutStreamer.SwitchSection(StubSection);
-    EmitAlignment(4);
-    OutStreamer.EmitLabel(Stub);
-    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-    OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
-    if (isPPC64)
-      OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
-                              ")(r11)");
-    else
-      OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
-                              ")(r11)");
-    OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
-    OutStreamer.EmitRawText(StringRef("\tbctr"));
-    OutStreamer.SwitchSection(LSPSection);
-    OutStreamer.EmitLabel(LazyPtr);
-    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-    
-    if (isPPC64)
-      OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
-    else
-      OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
-  }
-  
-  OutStreamer.AddBlankLine();
-}
-
-
-bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
-
-  // Darwin/PPC always uses mach-o.
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-  MachineModuleInfoMachO &MMIMacho =
-    MMI->getObjFileInfo<MachineModuleInfoMachO>();
-  
-  MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
-  if (!Stubs.empty())
-    EmitFunctionStubs(Stubs);
-
-  if (MAI->doesSupportExceptionHandling() && MMI) {
-    // Add the (possibly multiple) personalities to the set of global values.
-    // Only referenced functions get into the Personalities list.
-    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
-         E = Personalities.end(); I != E; ++I) {
-      if (*I) {
-        MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
-        MachineModuleInfoImpl::StubValueTy &StubSym =
-          MMIMacho.getGVStubEntry(NLPSym);
-        StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
-      }
-    }
-  }
-
-  // Output stubs for dynamically-linked functions.
-  Stubs = MMIMacho.GetGVStubList();
-  
-  // Output macho stubs for external and common global variables.
-  if (!Stubs.empty()) {
-    // Switch with ".non_lazy_symbol_pointer" directive.
-    OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
-    EmitAlignment(isPPC64 ? 3 : 2);
-    
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
-      //   .indirect_symbol _foo
-      MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
-      OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
-
-      if (MCSym.getInt())
-        // External to current translation unit.
-        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
-      else
-        // Internal to current translation unit.
-        //
-        // When we place the LSDA into the TEXT section, the type info pointers
-        // need to be indirect and pc-rel. We accomplish this by using NLPs.
-        // However, sometimes the types are local to the file. So we need to
-        // fill in the value for the NLP in those cases.
-        OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
-                                                      OutContext),
-                              isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
-    }
-
-    Stubs.clear();
-    OutStreamer.AddBlankLine();
-  }
-
-  Stubs = MMIMacho.GetHiddenGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
-    EmitAlignment(isPPC64 ? 3 : 2);
-    
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
-      //   .long _foo
-      OutStreamer.EmitValue(MCSymbolRefExpr::
-                            Create(Stubs[i].second.getPointer(),
-                                   OutContext),
-                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
-    }
-
-    Stubs.clear();
-    OutStreamer.AddBlankLine();
-  }
-
-  // Funny Darwin hack: This flag tells the linker that no global symbols
-  // contain code that falls through to other global symbols (e.g. the obvious
-  // implementation of multiple entry points).  If this doesn't occur, the
-  // linker can safely perform dead code stripping.  Since LLVM never generates
-  // code that does this, it is always safe to set.
-  OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-
-  return AsmPrinter::doFinalization(M);
-}
-
-/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
-/// for a MachineFunction to the given output stream, in a format that the
-/// Darwin assembler can deal with.
-///
-static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
-                                           MCStreamer &Streamer) {
-  const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
-
-  if (Subtarget->isDarwin())
-    return new PPCDarwinAsmPrinter(tm, Streamer);
-  return new PPCLinuxAsmPrinter(tm, Streamer);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() { 
-  TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
-  TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
-}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 7ffc5eb5f311..f28257999d1b 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -4,6 +4,7 @@ tablegen(PPCGenInstrNames.inc -gen-instr-enums)
 tablegen(PPCGenRegisterNames.inc -gen-register-enums)
 tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
 tablegen(PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
 tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header)
 tablegen(PPCGenRegisterInfo.inc -gen-register-desc)
 tablegen(PPCGenInstrInfo.inc -gen-instr-desc)
@@ -12,14 +13,19 @@ tablegen(PPCGenCallingConv.inc -gen-callingconv)
 tablegen(PPCGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(PowerPCCodeGen
+  PPCAsmBackend.cpp
+  PPCAsmPrinter.cpp
   PPCBranchSelector.cpp
   PPCCodeEmitter.cpp
   PPCHazardRecognizers.cpp
   PPCInstrInfo.cpp
   PPCISelDAGToDAG.cpp
   PPCISelLowering.cpp
+  PPCFrameLowering.cpp
   PPCJITInfo.cpp
   PPCMCAsmInfo.cpp
+  PPCMCCodeEmitter.cpp
+  PPCMCInstLower.cpp
   PPCPredicates.cpp
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
@@ -27,4 +33,5 @@ add_llvm_target(PowerPCCodeGen
   PPCSelectionDAGInfo.cpp
   )
 
-target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..389ea7742b06
--- /dev/null
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmPrinter
+  PPCInstPrinter.cpp
+  )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/lib/Target/PowerPC/InstPrinter/Makefile b/lib/Target/PowerPC/InstPrinter/Makefile
new file mode 100644
index 000000000000..f097e84248ff
--- /dev/null
+++ b/lib/Target/PowerPC/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' powerpc target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 000000000000..c8db0c404765
--- /dev/null
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,292 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PPCGenAsmWriter.inc"
+
+StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  // Check for slwi/srwi mnemonics.
+  if (MI->getOpcode() == PPC::RLWINM) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char MB = MI->getOperand(3).getImm();
+    unsigned char ME = MI->getOperand(4).getImm();
+    bool useSubstituteMnemonic = false;
+    if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+      O << "\tslwi "; useSubstituteMnemonic = true;
+    }
+    if (SH <= 31 && MB == (32-SH) && ME == 31) {
+      O << "\tsrwi "; useSubstituteMnemonic = true;
+      SH = 32-SH;
+    }
+    if (useSubstituteMnemonic) {
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      return;
+    }
+  }
+  
+  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+    O << "\tmr ";
+    printOperand(MI, 0, O);
+    O << ", ";
+    printOperand(MI, 1, O);
+    return;
+  }
+  
+  if (MI->getOpcode() == PPC::RLDICR) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char ME = MI->getOperand(3).getImm();
+    // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+    if (63-SH == ME) {
+      O << "\tsldi ";
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      return;
+    }
+  }
+  
+  printInstruction(MI, O);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O, 
+                                           const char *Modifier) {
+  assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+  unsigned Code = MI->getOperand(OpNo).getImm();
+  if (StringRef(Modifier) == "cc") {
+    switch ((PPC::Predicate)Code) {
+    default: assert(0 && "Invalid predicate");
+    case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+    case PPC::PRED_LT: O << "lt"; return;
+    case PPC::PRED_LE: O << "le"; return;
+    case PPC::PRED_EQ: O << "eq"; return;
+    case PPC::PRED_GE: O << "ge"; return;
+    case PPC::PRED_GT: O << "gt"; return;
+    case PPC::PRED_NE: O << "ne"; return;
+    case PPC::PRED_UN: O << "un"; return;
+    case PPC::PRED_NU: O << "nu"; return;
+    }
+  }
+  
+  assert(StringRef(Modifier) == "reg" &&
+         "Need to specify 'cc' or 'reg' as predicate op modifier!");
+  // Don't print the register for 'always'.
+  if (Code == PPC::PRED_ALWAYS) return;
+  printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  char Value = MI->getOperand(OpNo).getImm();
+  Value = (Value << (32-5)) >> (32-5);
+  O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 31 && "Invalid u5imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 63 && "Invalid u6imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    O << (short)(MI->getOperand(OpNo).getImm()*4);
+  else
+    printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (!MI->getOperand(OpNo).isImm())
+    return printOperand(MI, OpNo, O);
+
+  // Branches can take an immediate operand.  This is used by the branch
+  // selection pass to print $+8, an eight byte displacement from the PC.
+  O << "$+";
+  printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  unsigned CCReg = MI->getOperand(OpNo).getReg();
+  unsigned RegNo;
+  switch (CCReg) {
+  default: assert(0 && "Unknown CR register");
+  case PPC::CR0: RegNo = 0; break;
+  case PPC::CR1: RegNo = 1; break;
+  case PPC::CR2: RegNo = 2; break;
+  case PPC::CR3: RegNo = 3; break;
+  case PPC::CR4: RegNo = 4; break;
+  case PPC::CR5: RegNo = 5; break;
+  case PPC::CR6: RegNo = 6; break;
+  case PPC::CR7: RegNo = 7; break;
+  }
+  O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  printSymbolLo(MI, OpNo, O);
+  O << '(';
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    printS16X4ImmOperand(MI, OpNo, O);
+  else
+    printSymbolLo(MI, OpNo, O);
+  O << '(';
+  
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  // When used as the base register, r0 reads constant zero rather than
+  // the value contained in the register.  For this reason, the darwin
+  // assembler requires that we print r0 as 0 (no r) when used as the base.
+  if (MI->getOperand(OpNo).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo, O);
+  O << ", ";
+  printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left.  Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+  switch (RegName[0]) {
+  case 'r':
+  case 'f':
+  case 'v': return RegName + 1;
+  case 'c': if (RegName[1] == 'r') return RegName + 2;
+  }
+  
+  return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    const char *RegName = getRegisterName(Op.getReg());
+    // The linux and AIX assembler does not take register prefixes.
+    if (!isDarwinSyntax())
+      RegName = stripRegisterPrefix(RegName);
+    
+    O << RegName;
+    return;
+  }
+  
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+  
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  O << *Op.getExpr();
+}
+  
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    return printS16ImmOperand(MI, OpNo, O);
+  
+  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
+  if (MI->getOperand(OpNo).isExpr() &&
+      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+    O << "lo16(";
+    printOperand(MI, OpNo, O);
+    O << ')';
+  } else {
+    printOperand(MI, OpNo, O);
+  }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    return printS16ImmOperand(MI, OpNo, O);
+
+  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
+  if (MI->getOperand(OpNo).isExpr() &&
+      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+    O << "ha16(";
+    printOperand(MI, OpNo, O);
+    O << ')';
+  } else {
+    printOperand(MI, OpNo, O);
+  }
+}
+
+
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 000000000000..ebc10daa5f16
--- /dev/null
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,69 @@
+//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+  // 0 -> AIX, 1 -> Darwin.
+  unsigned SyntaxVariant;
+public:
+  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+    : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+  
+  bool isDarwinSyntax() const {
+    return SyntaxVariant == 1;
+  }
+  
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  
+  static const char *getInstructionName(unsigned Opcode);
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                             raw_ostream &O, const char *Modifier);
+
+
+  void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  
+  // FIXME: Remove
+  void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 1265f1d36910..030defe212c0 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
                 PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
                 PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
-                PPCGenSubtarget.inc PPCGenCallingConv.inc
+                PPCGenSubtarget.inc PPCGenCallingConv.inc \
+                PPCGenMCCodeEmitter.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = InstPrinter TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 67e3a4ad677c..7242f3aa8458 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,24 +15,70 @@
 #ifndef LLVM_TARGET_POWERPC_H
 #define LLVM_TARGET_POWERPC_H
 
+#include <string>
+
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
 
-#include "llvm/Target/TargetMachine.h"
-
 namespace llvm {
   class PPCTargetMachine;
   class FunctionPass;
   class formatted_raw_ostream;
+  class JITCodeEmitter;
+  class Target;
+  class MachineInstr;
+  class AsmPrinter;
+  class MCInst;
+  class MCCodeEmitter;
+  class MCContext;
+  class TargetMachine;
+  class TargetAsmBackend;
+  
+  FunctionPass *createPPCBranchSelectionPass();
+  FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+  FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+                                            JITCodeEmitter &MCE);
+  MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+                                        MCContext &Ctx);
+  TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
   
-FunctionPass *createPPCBranchSelectionPass();
-FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
-                                          JITCodeEmitter &MCE);
+  void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                    AsmPrinter &AP);
+  
+  extern Target ThePPC32Target;
+  extern Target ThePPC64Target;
+  
+  namespace PPCII {
+    
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // PPC Specific MachineOperand flags.
+    MO_NO_FLAG,
+    
+    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
+    /// and jumps to external functions on Tiger and earlier.
+    MO_DARWIN_STUB = 1,
+    
+    /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+    MO_LO16 = 4, MO_HA16 = 8,
 
-extern Target ThePPC32Target;
-extern Target ThePPC64Target;
+    /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+    /// the function's picbase, e.g. lo16(symbol-picbase).
+    MO_PIC_FLAG = 16,
 
+    /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+    /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+    MO_NLP_FLAG = 32,
+    
+    /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+    /// symbol with hidden visibility.  This causes a different kind of
+    /// non-lazy-pointer to be generated.
+    MO_NLP_HIDDEN_FLAG = 64
+  };
+  } // end namespace PPCII
+  
 } // end namespace llvm;
 
 // Defines symbolic names for PowerPC registers.  This defines a mapping from
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 27644b2daca8..aabf494012e2 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -99,8 +99,14 @@ def PPCInstrInfo : InstrInfo {
   let isLittleEndianEncoding = 1;
 }
 
+def PPCAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
 
 def PPC : Target {
   // Information about the instructions.
   let InstructionSet = PPCInstrInfo;
+  
+  let AssemblyWriters = [PPCAsmWriter];
 }
diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp
new file mode 100644
index 000000000000..c4d4ac9b3eb9
--- /dev/null
+++ b/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -0,0 +1,119 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "PPC.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+};
+
+class PPCAsmBackend : public TargetAsmBackend {
+const Target &TheTarget;
+public:
+  PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+
+  unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+      // name                    offset  bits  flags
+      { "fixup_ppc_br24",        6,      24,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_lo16",        16,     16,   0 },
+      { "fixup_ppc_ha16",        16,     16,   0 },
+      { "fixup_ppc_lo14",        16,     14,   0 }
+    };
+  
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+  
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+  
+  bool MayNeedRelaxation(const MCInst &Inst) const {
+    // FIXME.
+    return false;
+  }
+  
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+    // FIXME.
+    assert(0 && "RelaxInstruction() unimplemented");
+  }
+  
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+    // FIXME: Zero fill for now. That's not right, but at least will get the
+    // section size right.
+    for (uint64_t i = 0; i != Count; ++i)
+      OW->Write8(0);
+    return true;
+  }      
+  
+  unsigned getPointerSize() const {
+    StringRef Name = TheTarget.getName();
+    if (Name == "ppc64") return 8;
+    assert(Name == "ppc32" && "Unknown target name!");
+    return 4;
+  }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+  class DarwinPPCAsmBackend : public PPCAsmBackend {
+  public:
+    DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+    
+    void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                    uint64_t Value) const {
+      assert(0 && "UNIMP");
+    }
+    
+    MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+      bool is64 = getPointerSize() == 8;
+      return createMachObjectWriter(new PPCMachObjectWriter(
+                                      /*Is64Bit=*/is64,
+                                      (is64 ? object::mach::CTM_PowerPC64 :
+                                       object::mach::CTM_PowerPC),
+                                      object::mach::CSPPC_ALL),
+                                    OS, /*IsLittleEndian=*/false);
+    }
+    
+    virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+      return false;
+    }
+  };
+} // end anonymous namespace
+
+
+
+
+TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinPPCAsmBackend(T);
+  default:
+    return 0;
+  }
+}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 000000000000..8ed5d7f0ee71
--- /dev/null
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,696 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "InstPrinter/PPCInstPrinter.h"
+using namespace llvm;
+
+namespace {
+  class PPCAsmPrinter : public AsmPrinter {
+  protected:
+    DenseMap<MCSymbol*, MCSymbol*> TOC;
+    const PPCSubtarget &Subtarget;
+    uint64_t TOCLabelID;
+  public:
+    explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+    virtual const char *getPassName() const {
+      return "PowerPC Assembly Printer";
+    }
+
+
+    virtual void EmitInstruction(const MachineInstr *MI);
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+    MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+      MachineLocation Location;
+      assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+      // Frame address.  Currently handles register +- offset only.
+      if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+        Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+      else {
+        DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+      }
+      return Location;
+    }
+  };
+
+  /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+  public:
+    explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : PPCAsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Linux PPC Assembly Printer";
+    }
+
+    bool doFinalization(Module &M);
+
+    virtual void EmitFunctionEntryLabel();
+  };
+
+  /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+  /// OS X
+  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+  public:
+    explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : PPCAsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Darwin PPC Assembly Printer";
+    }
+
+    bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
+
+    void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+  };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left.  Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+  switch (RegName[0]) {
+    case 'r':
+    case 'f':
+    case 'v': return RegName + 1;
+    case 'c': if (RegName[1] == 'r') return RegName + 2;
+  }
+  
+  return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+    // Linux assembler (Others?) does not take register mnemonics.
+    // FIXME - What about special registers used in mfspr/mtspr?
+    if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+    O << RegName;
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    // FIXME: PIC relocation model
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_BlockAddress:
+    O << *GetBlockAddressSymbol(MO.getBlockAddress());
+    return;
+  case MachineOperand::MO_ExternalSymbol: {
+    // Computing the address of an external symbol, not calling it.
+    if (TM.getRelocationModel() == Reloc::Static) {
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      return;
+    }
+
+    MCSymbol *NLPSym = 
+      OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
+                                   MO.getSymbolName()+"$non_lazy_ptr");
+    MachineModuleInfoImpl::StubValueTy &StubSym = 
+      MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
+    if (StubSym.getPointer() == 0)
+      StubSym = MachineModuleInfoImpl::
+        StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
+    
+    O << *NLPSym;
+    return;
+  }
+  case MachineOperand::MO_GlobalAddress: {
+    // Computing the address of a global symbol, not calling it.
+    const GlobalValue *GV = MO.getGlobal();
+    MCSymbol *SymToPrint;
+
+    // External or weakly linked global variables need non-lazily-resolved stubs
+    if (TM.getRelocationModel() != Reloc::Static &&
+        (GV->isDeclaration() || GV->isWeakForLinker())) {
+      if (!GV->hasHiddenVisibility()) {
+        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>()
+            .getGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+      } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+                 GV->hasAvailableExternallyLinkage()) {
+        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>().
+                    getHiddenGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+      } else {
+        SymToPrint = Mang->getSymbol(GV);
+      }
+    } else {
+      SymToPrint = Mang->getSymbol(GV);
+    }
+    
+    O << *SymToPrint;
+
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'c': // Don't print "$" before a global var name or constant.
+      break; // PPC never has a prefix.
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    case 'I':
+      // Write 'i' if an integer constant, otherwise nothing.  Used to print
+      // addi vs add, etc.
+      if (MI->getOperand(OpNo).isImm())
+        O << "i";
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                          unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  assert(MI->getOperand(OpNo).isReg());
+  O << "0(";
+  printOperand(MI, OpNo, O);
+  O << ")";
+  return false;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MCInst TmpInst;
+  
+  // Lower multi-instruction pseudo operations.
+  switch (MI->getOpcode()) {
+  default: break;
+  case TargetOpcode::DBG_VALUE: {
+    if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+      
+    SmallString<32> Str;
+    raw_svector_ostream O(Str);
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    O << V.getName();
+    O << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+    O << ']';
+    O << "+";
+    printOperand(MI, NOps-2, O);
+    OutStreamer.EmitRawText(O.str());
+    return;
+  }
+      
+  case PPC::MovePCtoLR:
+  case PPC::MovePCtoLR8: {
+    // Transform %LR = MovePCtoLR
+    // Into this, where the label is the PIC base: 
+    //     bl L1$pb
+    // L1$pb:
+    MCSymbol *PICBase = MF->getPICBaseSymbol();
+    
+    // Emit the 'bl'.
+    TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
+    
+    
+    // FIXME: We would like an efficient form for this, so we don't have to do
+    // a lot of extra uniquing.
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
+                                             Create(PICBase, OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    
+    // Emit the label.
+    OutStreamer.EmitLabel(PICBase);
+    return;
+  }
+  case PPC::LDtoc: {
+    // Transform %X3 = LDtoc <ga:@min1>, %X2
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+      
+    // Change the opcode to LD, and the global address operand to be a
+    // reference to the TOC entry we will synthesize later.
+    TmpInst.setOpcode(PPC::LD);
+    const MachineOperand &MO = MI->getOperand(1);
+    assert(MO.isGlobal());
+      
+    // Map symbol -> label of TOC entry.
+    MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+    if (TOCEntry == 0)
+      TOCEntry = GetTempSymbol("C", TOCLabelID++);
+      
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+      
+  case PPC::MFCRpseud:
+    // Transform: %R3 = MFCRpseud %CR7
+    // Into:      %R3 = MFCR      ;; cr7
+    OutStreamer.AddComment(PPCInstPrinter::
+                           getRegisterName(MI->getOperand(1).getReg()));
+    TmpInst.setOpcode(PPC::MFCR);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+
+  LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+  if (!Subtarget.isPPC64())  // linux/ppc32 - Normal entry label.
+    return AsmPrinter::EmitFunctionEntryLabel();
+    
+  // Emit an official procedure descriptor.
+  // FIXME 64-bit SVR4: Use MCSection here!
+  OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
+  OutStreamer.EmitRawText(StringRef("\t.align 3"));
+  OutStreamer.EmitLabel(CurrentFnSym);
+  OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
+                          ",.TOC.@tocbase");
+  OutStreamer.EmitRawText(StringRef("\t.previous"));
+  OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+  const TargetData *TD = TM.getTargetData();
+
+  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+  if (isPPC64 && !TOC.empty()) {
+    // FIXME 64-bit SVR4: Use MCSection here?
+    OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+
+    // FIXME: This is nondeterminstic!
+    for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+         E = TOC.end(); I != E; ++I) {
+      OutStreamer.EmitLabel(I->second);
+      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
+                              "[TC]," + I->first->getName());
+    }
+  }
+
+  return AsmPrinter::doFinalization(M);
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  static const char *const CPUDirectives[] = {
+    "",
+    "ppc",
+    "ppc601",
+    "ppc602",
+    "ppc603",
+    "ppc7400",
+    "ppc750",
+    "ppc970",
+    "ppc64"
+  };
+
+  unsigned Directive = Subtarget.getDarwinDirective();
+  if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+    Directive = PPC::DIR_970;
+  if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+    Directive = PPC::DIR_7400;
+  if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+    Directive = PPC::DIR_64;
+  assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+  
+  // FIXME: This is a total hack, finish mc'izing the PPC backend.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+
+  // Prime text sections so they are adjacent.  This reduces the likelihood a
+  // large data or debug section causes a branch to exceed 16M limit.
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    OutStreamer.SwitchSection(
+           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      32, SectionKind::getText()));
+  } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+    OutStreamer.SwitchSection(
+           OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      16, SectionKind::getText()));
+  }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+  // Remove $stub suffix, add $lazy_ptr.
+  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
+  TmpStr += "$lazy_ptr";
+  return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+  // Add $tmp suffix to $stub, yielding $stub$tmp.
+  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
+  TmpStr += "$tmp";
+  return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+  
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+  // .lazy_symbol_pointer
+  const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+  
+  // Output stubs for dynamically-linked functions
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    const MCSection *StubSection = 
+    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               32, SectionKind::getText());
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      OutStreamer.SwitchSection(StubSection);
+      EmitAlignment(4);
+      
+      MCSymbol *Stub = Stubs[i].first;
+      MCSymbol *RawSym = Stubs[i].second.getPointer();
+      MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+      MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+                                           
+      OutStreamer.EmitLabel(Stub);
+      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      // FIXME: MCize this.
+      OutStreamer.EmitRawText(StringRef("\tmflr r0"));
+      OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+      OutStreamer.EmitLabel(AnonSymbol);
+      OutStreamer.EmitRawText(StringRef("\tmflr r11"));
+      OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
+                              "-" + AnonSymbol->getName() + ")");
+      OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
+      
+      if (isPPC64)
+        OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+                                "-" + AnonSymbol->getName() + ")(r11)");
+      else
+        OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+                                "-" + AnonSymbol->getName() + ")(r11)");
+      OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+      OutStreamer.EmitRawText(StringRef("\tbctr"));
+      
+      OutStreamer.SwitchSection(LSPSection);
+      OutStreamer.EmitLabel(LazyPtr);
+      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      
+      if (isPPC64)
+        OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+      else
+        OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+    }
+    OutStreamer.AddBlankLine();
+    return;
+  }
+  
+  const MCSection *StubSection =
+    OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               16, SectionKind::getText());
+  for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+    MCSymbol *Stub = Stubs[i].first;
+    MCSymbol *RawSym = Stubs[i].second.getPointer();
+    MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+
+    OutStreamer.SwitchSection(StubSection);
+    EmitAlignment(4);
+    OutStreamer.EmitLabel(Stub);
+    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
+    if (isPPC64)
+      OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+                              ")(r11)");
+    else
+      OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+                              ")(r11)");
+    OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+    OutStreamer.EmitRawText(StringRef("\tbctr"));
+    OutStreamer.SwitchSection(LSPSection);
+    OutStreamer.EmitLabel(LazyPtr);
+    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    
+    if (isPPC64)
+      OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+    else
+      OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+  }
+  
+  OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+  // Darwin/PPC always uses mach-o.
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  MachineModuleInfoMachO &MMIMacho =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  
+  MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+  if (!Stubs.empty())
+    EmitFunctionStubs(Stubs);
+
+  if (MAI->doesSupportExceptionHandling() && MMI) {
+    // Add the (possibly multiple) personalities to the set of global values.
+    // Only referenced functions get into the Personalities list.
+    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+         E = Personalities.end(); I != E; ++I) {
+      if (*I) {
+        MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+          MMIMacho.getGVStubEntry(NLPSym);
+        StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+      }
+    }
+  }
+
+  // Output stubs for dynamically-linked functions.
+  Stubs = MMIMacho.GetGVStubList();
+  
+  // Output macho stubs for external and common global variables.
+  if (!Stubs.empty()) {
+    // Switch with ".non_lazy_symbol_pointer" directive.
+    OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .indirect_symbol _foo
+      MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+      OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+      if (MCSym.getInt())
+        // External to current translation unit.
+        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+      else
+        // Internal to current translation unit.
+        //
+        // When we place the LSDA into the TEXT section, the type info pointers
+        // need to be indirect and pc-rel. We accomplish this by using NLPs.
+        // However, sometimes the types are local to the file. So we need to
+        // fill in the value for the NLP in those cases.
+        OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                      OutContext),
+                              isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
+  Stubs = MMIMacho.GetHiddenGVStubList();
+  if (!Stubs.empty()) {
+    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .long _foo
+      OutStreamer.EmitValue(MCSymbolRefExpr::
+                            Create(Stubs[i].second.getPointer(),
+                                   OutContext),
+                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
+  // Funny Darwin hack: This flag tells the linker that no global symbols
+  // contain code that falls through to other global symbols (e.g. the obvious
+  // implementation of multiple entry points).  If this doesn't occur, the
+  // linker can safely perform dead code stripping.  Since LLVM never generates
+  // code that does this, it is always safe to set.
+  OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+  return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+                                           MCStreamer &Streamer) {
+  const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+  if (Subtarget->isDarwin())
+    return new PPCDarwinAsmPrinter(tm, Streamer);
+  return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() { 
+  TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+  
+  TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+}
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index df9ab52389ba..42232a07535b 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -50,13 +50,24 @@ namespace {
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
 
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
-
+    
+    MachineRelocation GetRelocation(const MachineOperand &MO,
+                                    unsigned RelocID) const;
+    
     /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
-
     unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO);
+                               const MachineOperand &MO) const;
+
+    unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+    unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
     const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
 
@@ -67,10 +78,6 @@ namespace {
     /// emitBasicBlock - emits the given MachineBasicBlock to memory
     ///
     void emitBasicBlock(MachineBasicBlock &MBB);
-
-    /// getValueBit - return the particular bit of Val
-    ///
-    unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
   };
 }
 
@@ -128,125 +135,127 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
   }
 }
 
-unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                           const MachineOperand &MO) {
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+                                             unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+  return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
 
-  unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
-                   // or things that get fixed up later by the JIT.
-  if (MO.isReg()) {
-    rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, 
+                                                unsigned RelocID) const {
+  // If in PIC mode, we need to encode the negated address of the
+  // 'movepctolr' into the unrelocated field.  After relocation, we'll have
+  // &gv-&movepctolr-4 in the imm field.  Once &movepctolr is added to the imm
+  // field, we get &gv.  This doesn't happen for branch relocations, which are
+  // always implicitly pc relative.
+  intptr_t Cst = 0;
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+    Cst = -(intptr_t)MovePCtoLROffset - 4;
+  }
+  
+  if (MO.isGlobal())
+    return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+                                    const_cast<GlobalValue *>(MO.getGlobal()),
+                                    Cst, isa<Function>(MO.getGlobal()));
+  if (MO.isSymbol())
+    return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                        RelocID, MO.getSymbolName(), Cst);
+  if (MO.isCPI())
+    return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                           RelocID, MO.getIndex(), Cst);
 
-    // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the
-    // register, not the register number directly.
-    if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
-        (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) {
-      rv = 0x80 >> rv;
-    }
-  } else if (MO.isImm()) {
-    rv = MO.getImm();
-  } else if (MO.isGlobal() || MO.isSymbol() ||
-             MO.isCPI() || MO.isJTI()) {
-    unsigned Reloc = 0;
-    if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin ||
-        MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF ||
-        MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
-      Reloc = PPC::reloc_pcrel_bx;
-    else {
-      if (TM.getRelocationModel() == Reloc::PIC_) {
-        assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
-      }
-      switch (MI.getOpcode()) {
-      default: MI.dump(); llvm_unreachable("Unknown instruction for relocation!");
-      case PPC::LIS:
-      case PPC::LIS8:
-      case PPC::ADDIS:
-      case PPC::ADDIS8:
-        Reloc = PPC::reloc_absolute_high;       // Pointer to symbol
-        break;
-      case PPC::LI:
-      case PPC::LI8:
-      case PPC::LA:
-      // Loads.
-      case PPC::LBZ:
-      case PPC::LBZ8:
-      case PPC::LHA:
-      case PPC::LHA8:
-      case PPC::LHZ:
-      case PPC::LHZ8:
-      case PPC::LWZ:
-      case PPC::LWZ8:
-      case PPC::LFS:
-      case PPC::LFD:
-
-      // Stores.
-      case PPC::STB:
-      case PPC::STB8:
-      case PPC::STH:
-      case PPC::STH8:
-      case PPC::STW:
-      case PPC::STW8:
-      case PPC::STFS:
-      case PPC::STFD:
-        Reloc = PPC::reloc_absolute_low;
-        break;
-
-      case PPC::LWA:
-      case PPC::LD:
-      case PPC::STD:
-      case PPC::STD_32:
-        Reloc = PPC::reloc_absolute_low_ix;
-        break;
-      }
-    }
+  if (MO.isMBB())
+    return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                    RelocID, MO.getMBB());
+  
+  assert(MO.isJTI());
+  return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                         RelocID, MO.getIndex(), Cst);
+}
 
-    MachineRelocation R;
-    if (MO.isGlobal()) {
-      R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                                   const_cast<GlobalValue *>(MO.getGlobal()), 0,
-                                   isa<Function>(MO.getGlobal()));
-    } else if (MO.isSymbol()) {
-      R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
-                                       Reloc, MO.getSymbolName(), 0);
-    } else if (MO.isCPI()) {
-      R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
-                                          Reloc, MO.getIndex(), 0);
-    } else {
-      assert(MO.isJTI());
-      R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
-                                          Reloc, MO.getIndex(), 0);
-    }
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+                                             unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+  return 0;
+}
 
-    // If in PIC mode, we need to encode the negated address of the
-    // 'movepctolr' into the unrelocated field.  After relocation, we'll have
-    // &gv-&movepctolr-4 in the imm field.  Once &movepctolr is added to the imm
-    // field, we get &gv.  This doesn't happen for branch relocations, which are
-    // always implicitly pc relative.
-    if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){
-      assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
-      R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4);
-    }
-    MCE.addRelocation(R);
-
-  } else if (MO.isMBB()) {
-    unsigned Reloc = 0;
-    unsigned Opcode = MI.getOpcode();
-    if (Opcode == PPC::B || Opcode == PPC::BL_Darwin ||
-        Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 ||
-        Opcode == PPC::BLA_SVR4)
-      Reloc = PPC::reloc_pcrel_bx;
-    else // BCC instruction
-      Reloc = PPC::reloc_pcrel_bcx;
-
-    MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
-                                               Reloc, MO.getMBB()));
-  } else {
-#ifndef NDEBUG
-    errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-#endif
-    llvm_unreachable(0);
-  }
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+  return 0;
+}
 
-  return rv;
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+                                         unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+                                         unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+                                          unsigned OpNo) const {
+  // Encode (imm, reg) as a memri, which has the low 16-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+  
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+  
+  // Add a fixup for the displacement field.
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+  
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+  return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+                                           const MachineOperand &MO) const {
+
+  if (MO.isReg()) {
+    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+    // The GPR operand should come through here though.
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+    return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+  }
+  
+  assert(MO.isImm() &&
+         "Relocation required in an instruction that we cannot encode!");
+  return MO.getImm();
 }
 
 #include "PPCGenCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/PPCFixupKinds.h b/lib/Target/PowerPC/PPCFixupKinds.h
new file mode 100644
index 000000000000..b3c889e3f8da
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFixupKinds.h
@@ -0,0 +1,45 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+  // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+  // and 'bl'.
+  fixup_ppc_br24 = FirstTargetFixupKind,
+  
+  /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+  /// branches.
+  fixup_ppc_brcond14,
+  
+  /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+  /// like 'li'.
+  fixup_ppc_lo16,
+  
+  /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+  /// like 'lis'.
+  fixup_ppc_ha16,
+  
+  /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
+  /// like 'std'.
+  fixup_ppc_lo14,
+  
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
deleted file mode 100644
index 7587b0359816..000000000000
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ /dev/null
@@ -1,300 +0,0 @@
-//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef POWERPC_FRAMEINFO_H
-#define POWERPC_FRAMEINFO_H
-
-#include "PPC.h"
-#include "PPCSubtarget.h"
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
-
-namespace llvm {
-
-class PPCFrameInfo: public TargetFrameInfo {
-  const TargetMachine &TM;
-
-public:
-  PPCFrameInfo(const TargetMachine &tm, bool LP64)
-    : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) {
-  }
-
-  /// getReturnSaveOffset - Return the previous frame offset to save the
-  /// return address.
-  static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
-    if (isDarwinABI)
-      return isPPC64 ? 16 : 8;
-    // SVR4 ABI:
-    return isPPC64 ? 16 : 4;
-  }
-
-  /// getFramePointerSaveOffset - Return the previous frame offset to save the
-  /// frame pointer.
-  static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
-    // For the Darwin ABI:
-    // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
-    // for saving the frame pointer (if needed.)  While the published ABI has
-    // not used this slot since at least MacOSX 10.2, there is older code
-    // around that does use it, and that needs to continue to work.
-    if (isDarwinABI)
-      return isPPC64 ? -8U : -4U;
-    
-    // SVR4 ABI: First slot in the general register save area.
-    return isPPC64 ? -8U : -4U;
-  }
-  
-  /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
-  ///
-  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
-    if (isDarwinABI || isPPC64)
-      return 6 * (isPPC64 ? 8 : 4);
-    
-    // SVR4 ABI:
-    return 8;
-  }
-
-  /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
-  /// argument area.
-  static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
-    // For the Darwin ABI / 64-bit SVR4 ABI:
-    // The prolog code of the callee may store up to 8 GPR argument registers to
-    // the stack, allowing va_start to index over them in memory if its varargs.
-    // Because we cannot tell if this is needed on the caller side, we have to
-    // conservatively assume that it is needed.  As such, make sure we have at
-    // least enough stack space for the caller to store the 8 GPRs.
-    if (isDarwinABI || isPPC64)
-      return 8 * (isPPC64 ? 8 : 4);
-    
-    // 32-bit SVR4 ABI:
-    // There is no default stack allocated for the 8 first GPR arguments.
-    return 0;
-  }
-
-  /// getMinCallFrameSize - Return the minimum size a call frame can be using
-  /// the PowerPC ABI.
-  static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
-    // The call frame needs to be at least big enough for linkage and 8 args.
-    return getLinkageSize(isPPC64, isDarwinABI) +
-           getMinCallArgumentsSize(isPPC64, isDarwinABI);
-  }
-
-  // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
-  const SpillSlot *
-  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
-    if (TM.getSubtarget<PPCSubtarget>().isDarwinABI()) {
-      NumEntries = 1;
-      if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
-        static const SpillSlot darwin64Offsets = {PPC::X31, -8};
-        return &darwin64Offsets;
-      } else {
-        static const SpillSlot darwinOffsets = {PPC::R31, -4};
-        return &darwinOffsets;
-      }
-    }
-
-    // Early exit if not using the SVR4 ABI.
-    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
-      NumEntries = 0;
-      return 0;
-    }
-
-    static const SpillSlot Offsets[] = {
-      // Floating-point register save area offsets.
-      {PPC::F31, -8},
-      {PPC::F30, -16},
-      {PPC::F29, -24},
-      {PPC::F28, -32},
-      {PPC::F27, -40},
-      {PPC::F26, -48},
-      {PPC::F25, -56},
-      {PPC::F24, -64},
-      {PPC::F23, -72},
-      {PPC::F22, -80},
-      {PPC::F21, -88},
-      {PPC::F20, -96},
-      {PPC::F19, -104},
-      {PPC::F18, -112},
-      {PPC::F17, -120},
-      {PPC::F16, -128},
-      {PPC::F15, -136},
-      {PPC::F14, -144},
-
-      // General register save area offsets.
-      {PPC::R31, -4},
-      {PPC::R30, -8},
-      {PPC::R29, -12},
-      {PPC::R28, -16},
-      {PPC::R27, -20},
-      {PPC::R26, -24},
-      {PPC::R25, -28},
-      {PPC::R24, -32},
-      {PPC::R23, -36},
-      {PPC::R22, -40},
-      {PPC::R21, -44},
-      {PPC::R20, -48},
-      {PPC::R19, -52},
-      {PPC::R18, -56},
-      {PPC::R17, -60},
-      {PPC::R16, -64},
-      {PPC::R15, -68},
-      {PPC::R14, -72},
-
-      // CR save area offset.
-      // FIXME SVR4: Disable CR save area for now.
-//      {PPC::CR2, -4},
-//      {PPC::CR3, -4},
-//      {PPC::CR4, -4},
-//      {PPC::CR2LT, -4},
-//      {PPC::CR2GT, -4},
-//      {PPC::CR2EQ, -4},
-//      {PPC::CR2UN, -4},
-//      {PPC::CR3LT, -4},
-//      {PPC::CR3GT, -4},
-//      {PPC::CR3EQ, -4},
-//      {PPC::CR3UN, -4},
-//      {PPC::CR4LT, -4},
-//      {PPC::CR4GT, -4},
-//      {PPC::CR4EQ, -4},
-//      {PPC::CR4UN, -4},
-
-      // VRSAVE save area offset.
-      {PPC::VRSAVE, -4},
-
-      // Vector register save area
-      {PPC::V31, -16},
-      {PPC::V30, -32},
-      {PPC::V29, -48},
-      {PPC::V28, -64},
-      {PPC::V27, -80},
-      {PPC::V26, -96},
-      {PPC::V25, -112},
-      {PPC::V24, -128},
-      {PPC::V23, -144},
-      {PPC::V22, -160},
-      {PPC::V21, -176},
-      {PPC::V20, -192}
-    };
-
-    static const SpillSlot Offsets64[] = {
-      // Floating-point register save area offsets.
-      {PPC::F31, -8},
-      {PPC::F30, -16},
-      {PPC::F29, -24},
-      {PPC::F28, -32},
-      {PPC::F27, -40},
-      {PPC::F26, -48},
-      {PPC::F25, -56},
-      {PPC::F24, -64},
-      {PPC::F23, -72},
-      {PPC::F22, -80},
-      {PPC::F21, -88},
-      {PPC::F20, -96},
-      {PPC::F19, -104},
-      {PPC::F18, -112},
-      {PPC::F17, -120},
-      {PPC::F16, -128},
-      {PPC::F15, -136},
-      {PPC::F14, -144},
-
-      // General register save area offsets.
-      // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
-      //                    mode?
-      {PPC::R31, -4},
-      {PPC::R30, -12},
-      {PPC::R29, -20},
-      {PPC::R28, -28},
-      {PPC::R27, -36},
-      {PPC::R26, -44},
-      {PPC::R25, -52},
-      {PPC::R24, -60},
-      {PPC::R23, -68},
-      {PPC::R22, -76},
-      {PPC::R21, -84},
-      {PPC::R20, -92},
-      {PPC::R19, -100},
-      {PPC::R18, -108},
-      {PPC::R17, -116},
-      {PPC::R16, -124},
-      {PPC::R15, -132},
-      {PPC::R14, -140},
-
-      {PPC::X31, -8},
-      {PPC::X30, -16},
-      {PPC::X29, -24},
-      {PPC::X28, -32},
-      {PPC::X27, -40},
-      {PPC::X26, -48},
-      {PPC::X25, -56},
-      {PPC::X24, -64},
-      {PPC::X23, -72},
-      {PPC::X22, -80},
-      {PPC::X21, -88},
-      {PPC::X20, -96},
-      {PPC::X19, -104},
-      {PPC::X18, -112},
-      {PPC::X17, -120},
-      {PPC::X16, -128},
-      {PPC::X15, -136},
-      {PPC::X14, -144},
-
-      // CR save area offset.
-      // FIXME SVR4: Disable CR save area for now.
-//      {PPC::CR2, -4},
-//      {PPC::CR3, -4},
-//      {PPC::CR4, -4},
-//      {PPC::CR2LT, -4},
-//      {PPC::CR2GT, -4},
-//      {PPC::CR2EQ, -4},
-//      {PPC::CR2UN, -4},
-//      {PPC::CR3LT, -4},
-//      {PPC::CR3GT, -4},
-//      {PPC::CR3EQ, -4},
-//      {PPC::CR3UN, -4},
-//      {PPC::CR4LT, -4},
-//      {PPC::CR4GT, -4},
-//      {PPC::CR4EQ, -4},
-//      {PPC::CR4UN, -4},
-
-      // VRSAVE save area offset.
-      {PPC::VRSAVE, -4},
-
-      // Vector register save area
-      {PPC::V31, -16},
-      {PPC::V30, -32},
-      {PPC::V29, -48},
-      {PPC::V28, -64},
-      {PPC::V27, -80},
-      {PPC::V26, -96},
-      {PPC::V25, -112},
-      {PPC::V24, -128},
-      {PPC::V23, -144},
-      {PPC::V22, -160},
-      {PPC::V21, -176},
-      {PPC::V20, -192}
-    };
-
-    if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
-      NumEntries = array_lengthof(Offsets64);
-
-      return Offsets64;
-    } else {
-      NumEntries = array_lengthof(Offsets);
-
-      return Offsets;
-    }
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 000000000000..6aca6b00a06c
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,971 @@
+//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment.  This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta.  You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function.  Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+  MachineBasicBlock *Entry = MI->getParent();
+  MachineFunction *MF = Entry->getParent();
+
+  // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
+  MachineBasicBlock::iterator MBBI = MI;
+  ++MBBI;
+  assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+  MBBI->eraseFromParent();
+
+  bool RemovedAllMTVRSAVEs = true;
+  // See if we can find and remove the MTVRSAVE instruction from all of the
+  // epilog blocks.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+    // If last instruction is a return instruction, add an epilogue
+    if (!I->empty() && I->back().getDesc().isReturn()) {
+      bool FoundIt = false;
+      for (MBBI = I->end(); MBBI != I->begin(); ) {
+        --MBBI;
+        if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+          MBBI->eraseFromParent();  // remove it.
+          FoundIt = true;
+          break;
+        }
+      }
+      RemovedAllMTVRSAVEs &= FoundIt;
+    }
+  }
+
+  // If we found and removed all MTVRSAVE instructions, remove the read of
+  // VRSAVE as well.
+  if (RemovedAllMTVRSAVEs) {
+    MBBI = MI;
+    assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+    --MBBI;
+    assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+    MBBI->eraseFromParent();
+  }
+
+  // Finally, nuke the UPDATE_VRSAVE.
+  MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector.  Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+  MachineFunction *MF = MI->getParent()->getParent();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned UsedRegMask = 0;
+  for (unsigned i = 0; i != 32; ++i)
+    if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+      UsedRegMask |= 1 << (31-i);
+
+  // Live in and live out values already must be in the mask, so don't bother
+  // marking them.
+  for (MachineRegisterInfo::livein_iterator
+       I = MF->getRegInfo().livein_begin(),
+       E = MF->getRegInfo().livein_end(); I != E; ++I) {
+    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+    if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
+      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+  }
+  for (MachineRegisterInfo::liveout_iterator
+       I = MF->getRegInfo().liveout_begin(),
+       E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+    if (VRRegNo[RegNo] == *I)              // If this really is a vector reg.
+      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+  }
+
+  // If no registers are used, turn this into a copy.
+  if (UsedRegMask == 0) {
+    // Remove all VRSAVE code.
+    RemoveVRSaveCode(MI);
+    return;
+  }
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask);
+  } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask >> 16);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask >> 16);
+  } else {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask >> 16);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask >> 16);
+
+    BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+      .addReg(DstReg, RegState::Kill)
+      .addImm(UsedRegMask & 0xFFFF);
+  }
+
+  // Remove the old UPDATE_VRSAVE instruction.
+  MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned TargetAlign = getStackAlignment();
+  unsigned AlignMask = TargetAlign - 1;  //
+
+  // If we are a leaf function, and use up to 224 bytes of stack space,
+  // don't have a frame pointer, calls, or dynamic alloca then we do not need
+  // to adjust the stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+  if (!DisableRedZone &&
+      FrameSize <= 224 &&                          // Fits in red zone.
+      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
+      !MFI->adjustsStack() &&                      // No calls.
+      (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+    // No need for frame
+    MFI->setStackSize(0);
+    return;
+  }
+
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // Maximum call frame needs to be at least big enough for linkage and 8 args.
+  unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+                                                  Subtarget.isDarwinABI());
+  maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // FIXME: This is pretty much broken by design: hasFP() might be called really
+  // early, before the stack layout was calculated and thus hasFP() might return
+  // true or false here depending on the time of call.
+  return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Naked functions have no stack frame pushed, so we don't have a frame
+  // pointer.
+  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+    return false;
+
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
+    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  DebugLoc dl;
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+       !MF.getFunction()->doesNotThrow() ||
+       UnwindTablesMandatory;
+
+  // Prepare for frame info.
+  MCSymbol *FrameLabel = 0;
+
+  // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
+  // process it.
+  for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+    if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+      HandleVRSaveUpdate(MBBI, TII);
+      break;
+    }
+  }
+
+  // Move MBBI back to the beginning of the function.
+  MBBI = MBB.begin();
+
+  // Work out frame sizes.
+  // FIXME: determineFrameLayout() may change the frame size. This should be
+  // moved upper, to some hook.
+  determineFrameLayout(MF);
+  unsigned FrameSize = MFI->getStackSize();
+
+  int NegFrameSize = -FrameSize;
+
+  // Get processor type.
+  bool isPPC64 = Subtarget.isPPC64();
+  // Get operating system
+  bool isDarwinABI = Subtarget.isDarwinABI();
+  // Check if the link register (LR) must be saved.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  bool MustSaveLR = FI->mustSaveLR();
+  // Do we have a frame pointer for this function?
+  bool HasFP = hasFP(MF);
+
+  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    }
+  }
+
+  if (isPPC64) {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+        .addReg(PPC::X31)
+        .addImm(FPOffset/4)
+        .addReg(PPC::X1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+        .addReg(PPC::X0)
+        .addImm(LROffset / 4)
+        .addReg(PPC::X1);
+  } else {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+        .addReg(PPC::R31)
+        .addImm(FPOffset)
+        .addReg(PPC::R1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+        .addReg(PPC::R0)
+        .addImm(LROffset)
+        .addReg(PPC::R1);
+  }
+
+  // Skip if a leaf routine.
+  if (!FrameSize) return;
+
+  // Get stack alignments.
+  unsigned TargetAlign = getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Adjust stack pointer: r1 += NegFrameSize.
+  // If there is a preferred stack alignment, align R1 now
+  if (!isPPC64) {
+    // PPC32.
+    if (ALIGN_STACK && MaxAlign > TargetAlign) {
+      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+             "Invalid alignment!");
+      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+        .addReg(PPC::R1)
+        .addImm(0)
+        .addImm(32 - Log2_32(MaxAlign))
+        .addImm(31);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+        .addReg(PPC::R0, RegState::Kill)
+        .addImm(NegFrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+        .addReg(PPC::R1)
+        .addReg(PPC::R1)
+        .addReg(PPC::R0);
+    } else if (isInt<16>(NegFrameSize)) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+        .addReg(PPC::R1)
+        .addImm(NegFrameSize)
+        .addReg(PPC::R1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+        .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+        .addReg(PPC::R0, RegState::Kill)
+        .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+        .addReg(PPC::R1)
+        .addReg(PPC::R1)
+        .addReg(PPC::R0);
+    }
+  } else {    // PPC64.
+    if (ALIGN_STACK && MaxAlign > TargetAlign) {
+      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+             "Invalid alignment!");
+      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+        .addReg(PPC::X1)
+        .addImm(0)
+        .addImm(64 - Log2_32(MaxAlign));
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+        .addReg(PPC::X0)
+        .addImm(NegFrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X1)
+        .addReg(PPC::X1)
+        .addReg(PPC::X0);
+    } else if (isInt<16>(NegFrameSize)) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+        .addReg(PPC::X1)
+        .addImm(NegFrameSize / 4)
+        .addReg(PPC::X1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+        .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+        .addReg(PPC::X0, RegState::Kill)
+        .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X1)
+        .addReg(PPC::X1)
+        .addReg(PPC::X0);
+    }
+  }
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+  // Add the "machine moves" for the instructions we generated above, but in
+  // reverse order.
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer becomes valid.
+    FrameLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+    // Show update of SP.
+    if (NegFrameSize) {
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+    } else {
+      MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+      Moves.push_back(MachineMove(FrameLabel, SP, SP));
+    }
+
+    if (HasFP) {
+      MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+      MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    if (MustSaveLR) {
+      MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+      MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+      Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+    }
+  }
+
+  MCSymbol *ReadyLabel = 0;
+
+  // If there is a frame pointer, copy R1 into R31
+  if (HasFP) {
+    if (!isPPC64) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+        .addReg(PPC::R1)
+        .addReg(PPC::R1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+        .addReg(PPC::X1)
+        .addReg(PPC::X1);
+    }
+
+    if (needsFrameMoves) {
+      ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+      // Mark effective beginning of when frame pointer is ready.
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+      MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+                                    (isPPC64 ? PPC::X1 : PPC::R1));
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+    }
+  }
+
+  if (needsFrameMoves) {
+    MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+    // Add callee saved registers to move list.
+    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+      unsigned Reg = CSI[I].getReg();
+      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+    }
+  }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI != MBB.end() && "Returning block has no terminator");
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl;
+
+  assert((RetOpcode == PPC::BLR ||
+          RetOpcode == PPC::TCRETURNri ||
+          RetOpcode == PPC::TCRETURNdi ||
+          RetOpcode == PPC::TCRETURNai ||
+          RetOpcode == PPC::TCRETURNri8 ||
+          RetOpcode == PPC::TCRETURNdi8 ||
+          RetOpcode == PPC::TCRETURNai8) &&
+         "Can only insert epilog into returning blocks");
+
+  // Get alignment info so we know how to restore r1
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned TargetAlign = getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Get the number of bytes allocated from the FrameInfo.
+  int FrameSize = MFI->getStackSize();
+
+  // Get processor type.
+  bool isPPC64 = Subtarget.isPPC64();
+  // Get operating system
+  bool isDarwinABI = Subtarget.isDarwinABI();
+  // Check if the link register (LR) has been saved.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  bool MustSaveLR = FI->mustSaveLR();
+  // Do we have a frame pointer for this function?
+  bool HasFP = hasFP(MF);
+
+  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    }
+  }
+
+  bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
+    RetOpcode == PPC::TCRETURNdi ||
+    RetOpcode == PPC::TCRETURNai ||
+    RetOpcode == PPC::TCRETURNri8 ||
+    RetOpcode == PPC::TCRETURNdi8 ||
+    RetOpcode == PPC::TCRETURNai8;
+
+  if (UsesTCRet) {
+    int MaxTCRetDelta = FI->getTailCallSPDelta();
+    MachineOperand &StackAdjust = MBBI->getOperand(1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+    int Delta = StackAdj - MaxTCRetDelta;
+    assert((Delta >= 0) && "Delta must be positive");
+    if (MaxTCRetDelta>0)
+      FrameSize += (StackAdj +Delta);
+    else
+      FrameSize += StackAdj;
+  }
+
+  if (FrameSize) {
+    // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+    // on entry to the function.  Add this offset back now.
+    if (!isPPC64) {
+      // If this function contained a fastcc call and GuaranteedTailCallOpt is
+      // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+      // call which invalidates the stack pointer value in SP(0). So we use the
+      // value of R31 in this case.
+      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+          .addReg(PPC::R31).addImm(FrameSize);
+      } else if(FI->hasFastCall()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+          .addImm(FrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+          .addReg(PPC::R0, RegState::Kill)
+          .addImm(FrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+          .addReg(PPC::R1)
+          .addReg(PPC::R31)
+          .addReg(PPC::R0);
+      } else if (isInt<16>(FrameSize) &&
+                 (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+                 !MFI->hasVarSizedObjects()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+          .addReg(PPC::R1).addImm(FrameSize);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+          .addImm(0).addReg(PPC::R1);
+      }
+    } else {
+      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+          .addReg(PPC::X31).addImm(FrameSize);
+      } else if(FI->hasFastCall()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+          .addImm(FrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+          .addReg(PPC::X0, RegState::Kill)
+          .addImm(FrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+          .addReg(PPC::X1)
+          .addReg(PPC::X31)
+          .addReg(PPC::X0);
+      } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+            !MFI->hasVarSizedObjects()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+           .addReg(PPC::X1).addImm(FrameSize);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+           .addImm(0).addReg(PPC::X1);
+      }
+    }
+  }
+
+  if (isPPC64) {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+        .addImm(LROffset/4).addReg(PPC::X1);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+        .addImm(FPOffset/4).addReg(PPC::X1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+  } else {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+          .addImm(LROffset).addReg(PPC::R1);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+          .addImm(FPOffset).addReg(PPC::R1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+  }
+
+  // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+  // call optimization
+  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+      MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+     PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+     unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+     unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+     unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+     unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+     unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+     unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+     unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+     unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+     if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+       BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+         .addReg(StackReg).addImm(CallerAllocatedAmt);
+     } else {
+       BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+          .addImm(CallerAllocatedAmt >> 16);
+       BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+          .addReg(TmpReg, RegState::Kill)
+          .addImm(CallerAllocatedAmt & 0xFFFF);
+       BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+          .addReg(StackReg)
+          .addReg(FPReg)
+          .addReg(TmpReg);
+     }
+  } else if (RetOpcode == PPC::TCRETURNdi) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+  } else if (RetOpcode == PPC::TCRETURNri) {
+    MBBI = MBB.getLastNonDebugInstr();
+    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+  } else if (RetOpcode == PPC::TCRETURNai) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+  } else if (RetOpcode == PPC::TCRETURNdi8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+  } else if (RetOpcode == PPC::TCRETURNri8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+  } else if (RetOpcode == PPC::TCRETURNai8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+  }
+}
+
+void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  // Initial state of the frame pointer is R1.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(PPC::R1, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->isCRSpilled();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+  // We need a save/restore of LR if there is any def of LR (which is
+  // defined by calls, including the PIC setup sequence), or if there is
+  // some use of the LR stack slot (e.g. for builtin_return_address).
+  // (LR comes in 32 and 64 bit versions.)
+  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                   RegScavenger *RS) const {
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+  //  Save and clear the LR state.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  unsigned LR = RegInfo->getRARegister();
+  FI->setMustSaveLR(MustSaveLR(MF, LR));
+  MF.getRegInfo().setPhysRegUnused(LR);
+
+  //  Save R31 if necessary
+  int FPSI = FI->getFramePointerSaveIndex();
+  bool isPPC64 = Subtarget.isPPC64();
+  bool isDarwinABI  = Subtarget.isDarwinABI();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!FPSI && needsFP(MF)) {
+    // Find out what the fix offset of the frame pointer save area.
+    int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    // Allocate the frame index for frame pointer save area.
+    FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    // Save the result.
+    FI->setFramePointerSaveIndex(FPSI);
+  }
+
+  // Reserve stack space to move the linkage area to in case of a tail call.
+  int TCSPDelta = 0;
+  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+    MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+  }
+
+  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+  // a large stack, which will require scavenging a register to materialize a
+  // large offset.
+  // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+  // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+  //        r0 for now.
+
+  if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+    if (needsFP(MF) || spillsCR(MF)) {
+      const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+      const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+      const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+      RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                         RC->getAlignment(),
+                                                         false));
+    }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                                        const {
+  // Early exit if not using the SVR4 ABI.
+  if (!Subtarget.isSVR4ABI())
+    return;
+
+  // Get callee saved register information.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty() && !needsFP(MF)) {
+    return;
+  }
+
+  unsigned MinGPR = PPC::R31;
+  unsigned MinG8R = PPC::X31;
+  unsigned MinFPR = PPC::F31;
+  unsigned MinVR = PPC::V31;
+
+  bool HasGPSaveArea = false;
+  bool HasG8SaveArea = false;
+  bool HasFPSaveArea = false;
+  bool HasCRSaveArea = false;
+  bool HasVRSAVESaveArea = false;
+  bool HasVRSaveArea = false;
+
+  SmallVector<CalleeSavedInfo, 18> GPRegs;
+  SmallVector<CalleeSavedInfo, 18> G8Regs;
+  SmallVector<CalleeSavedInfo, 18> FPRegs;
+  SmallVector<CalleeSavedInfo, 18> VRegs;
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (PPC::GPRCRegisterClass->contains(Reg)) {
+      HasGPSaveArea = true;
+
+      GPRegs.push_back(CSI[i]);
+
+      if (Reg < MinGPR) {
+        MinGPR = Reg;
+      }
+    } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+      HasG8SaveArea = true;
+
+      G8Regs.push_back(CSI[i]);
+
+      if (Reg < MinG8R) {
+        MinG8R = Reg;
+      }
+    } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+      HasFPSaveArea = true;
+
+      FPRegs.push_back(CSI[i]);
+
+      if (Reg < MinFPR) {
+        MinFPR = Reg;
+      }
+// FIXME SVR4: Disable CR save area for now.
+    } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+               || PPC::CRRCRegisterClass->contains(Reg)) {
+//      HasCRSaveArea = true;
+    } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+      HasVRSAVESaveArea = true;
+    } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+      HasVRSaveArea = true;
+
+      VRegs.push_back(CSI[i]);
+
+      if (Reg < MinVR) {
+        MinVR = Reg;
+      }
+    } else {
+      llvm_unreachable("Unknown RegisterClass!");
+    }
+  }
+
+  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+
+  int64_t LowerBound = 0;
+
+  // Take into account stack space reserved for tail calls.
+  int TCSPDelta = 0;
+  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+    LowerBound = TCSPDelta;
+  }
+
+  // The Floating-point register save area is right below the back chain word
+  // of the previous stack frame.
+  if (HasFPSaveArea) {
+    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+      int FI = FPRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+  }
+
+  // Check whether the frame pointer register is allocated. If so, make sure it
+  // is spilled to the correct offset.
+  if (needsFP(MF)) {
+    HasGPSaveArea = true;
+
+    int FI = PFI->getFramePointerSaveIndex();
+    assert(FI && "No Frame Pointer Save Slot!");
+
+    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+  }
+
+  // General register save area starts right below the Floating-point
+  // register save area.
+  if (HasGPSaveArea || HasG8SaveArea) {
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+      int FI = GPRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+      int FI = G8Regs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    unsigned MinReg =
+      std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
+                         PPCRegisterInfo::getRegisterNumbering(MinG8R));
+
+    if (Subtarget.isPPC64()) {
+      LowerBound -= (31 - MinReg + 1) * 8;
+    } else {
+      LowerBound -= (31 - MinReg + 1) * 4;
+    }
+  }
+
+  // The CR save area is below the general register save area.
+  if (HasCRSaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the CR/CRBIT register class?
+    // Adjust the frame index of the CR spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+
+      if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+          PPC::CRRCRegisterClass->contains(Reg)) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+
+    LowerBound -= 4; // The CR save area is always 4 bytes long.
+  }
+
+  if (HasVRSAVESaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the VRSAVE register class?
+    // Adjust the frame index of the VRSAVE spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+
+      if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+
+    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+  }
+
+  if (HasVRSaveArea) {
+    // Insert alignment padding, we need 16-byte alignment.
+    LowerBound = (LowerBound - 15) & ~(15);
+
+    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+      int FI = VRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+  }
+}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 000000000000..0c18de1e2e26
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,322 @@
+//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+  class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+  const PPCSubtarget &Subtarget;
+
+public:
+  PPCFrameLowering(const PPCSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+      Subtarget(sti) {
+  }
+
+  void determineFrameLayout(MachineFunction &MF) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool needsFP(const MachineFunction &MF) const;
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  /// getReturnSaveOffset - Return the previous frame offset to save the
+  /// return address.
+  static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI)
+      return isPPC64 ? 16 : 8;
+    // SVR4 ABI:
+    return isPPC64 ? 16 : 4;
+  }
+
+  /// getFramePointerSaveOffset - Return the previous frame offset to save the
+  /// frame pointer.
+  static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI:
+    // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+    // for saving the frame pointer (if needed.)  While the published ABI has
+    // not used this slot since at least MacOSX 10.2, there is older code
+    // around that does use it, and that needs to continue to work.
+    if (isDarwinABI)
+      return isPPC64 ? -8U : -4U;
+
+    // SVR4 ABI: First slot in the general register save area.
+    return isPPC64 ? -8U : -4U;
+  }
+
+  /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+  ///
+  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI || isPPC64)
+      return 6 * (isPPC64 ? 8 : 4);
+
+    // SVR4 ABI:
+    return 8;
+  }
+
+  /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+  /// argument area.
+  static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI / 64-bit SVR4 ABI:
+    // The prolog code of the callee may store up to 8 GPR argument registers to
+    // the stack, allowing va_start to index over them in memory if its varargs.
+    // Because we cannot tell if this is needed on the caller side, we have to
+    // conservatively assume that it is needed.  As such, make sure we have at
+    // least enough stack space for the caller to store the 8 GPRs.
+    if (isDarwinABI || isPPC64)
+      return 8 * (isPPC64 ? 8 : 4);
+
+    // 32-bit SVR4 ABI:
+    // There is no default stack allocated for the 8 first GPR arguments.
+    return 0;
+  }
+
+  /// getMinCallFrameSize - Return the minimum size a call frame can be using
+  /// the PowerPC ABI.
+  static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
+    // The call frame needs to be at least big enough for linkage and 8 args.
+    return getLinkageSize(isPPC64, isDarwinABI) +
+           getMinCallArgumentsSize(isPPC64, isDarwinABI);
+  }
+
+  // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+  const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    if (Subtarget.isDarwinABI()) {
+      NumEntries = 1;
+      if (Subtarget.isPPC64()) {
+        static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+        return &darwin64Offsets;
+      } else {
+        static const SpillSlot darwinOffsets = {PPC::R31, -4};
+        return &darwinOffsets;
+      }
+    }
+
+    // Early exit if not using the SVR4 ABI.
+    if (!Subtarget.isSVR4ABI()) {
+      NumEntries = 0;
+      return 0;
+    }
+
+    static const SpillSlot Offsets[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      {PPC::R31, -4},
+      {PPC::R30, -8},
+      {PPC::R29, -12},
+      {PPC::R28, -16},
+      {PPC::R27, -20},
+      {PPC::R26, -24},
+      {PPC::R25, -28},
+      {PPC::R24, -32},
+      {PPC::R23, -36},
+      {PPC::R22, -40},
+      {PPC::R21, -44},
+      {PPC::R20, -48},
+      {PPC::R19, -52},
+      {PPC::R18, -56},
+      {PPC::R17, -60},
+      {PPC::R16, -64},
+      {PPC::R15, -68},
+      {PPC::R14, -72},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    static const SpillSlot Offsets64[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+      //                    mode?
+      {PPC::R31, -4},
+      {PPC::R30, -12},
+      {PPC::R29, -20},
+      {PPC::R28, -28},
+      {PPC::R27, -36},
+      {PPC::R26, -44},
+      {PPC::R25, -52},
+      {PPC::R24, -60},
+      {PPC::R23, -68},
+      {PPC::R22, -76},
+      {PPC::R21, -84},
+      {PPC::R20, -92},
+      {PPC::R19, -100},
+      {PPC::R18, -108},
+      {PPC::R17, -116},
+      {PPC::R16, -124},
+      {PPC::R15, -132},
+      {PPC::R14, -140},
+
+      {PPC::X31, -8},
+      {PPC::X30, -16},
+      {PPC::X29, -24},
+      {PPC::X28, -32},
+      {PPC::X27, -40},
+      {PPC::X26, -48},
+      {PPC::X25, -56},
+      {PPC::X24, -64},
+      {PPC::X23, -72},
+      {PPC::X22, -80},
+      {PPC::X21, -88},
+      {PPC::X20, -96},
+      {PPC::X19, -104},
+      {PPC::X18, -112},
+      {PPC::X17, -120},
+      {PPC::X16, -128},
+      {PPC::X15, -136},
+      {PPC::X14, -144},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    if (Subtarget.isPPC64()) {
+      NumEntries = array_lengthof(Offsets64);
+
+      return Offsets64;
+    } else {
+      NumEntries = array_lengthof(Offsets);
+
+      return Offsets;
+    }
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index db11fdeb7c1e..0de5844d1c28 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 //
 // This models the dispatch group formation of the PPC970 processor.  Dispatch
 // groups are bundles of up to five instructions that can contain various mixes
-// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one 
+// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
 // branch instruction per-cycle.
 //
 // There are a number of restrictions to dispatch group formation: some
@@ -55,14 +55,14 @@ PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
 void PPCHazardRecognizer970::EndDispatchGroup() {
   DEBUG(errs() << "=== Start of dispatch group\n");
   NumIssued = 0;
-  
+
   // Structural hazard info.
   HasCTRSet = false;
   NumStores = 0;
 }
 
 
-PPCII::PPC970_Unit 
+PPCII::PPC970_Unit
 PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
                                      bool &isFirst, bool &isSingle,
                                      bool &isCracked,
@@ -72,14 +72,14 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
     return PPCII::PPC970_Pseudo;
   }
   Opcode = ~Opcode;
-  
+
   const TargetInstrDesc &TID = TII.get(Opcode);
-  
+
   isLoad  = TID.mayLoad();
   isStore = TID.mayStore();
-  
+
   uint64_t TSFlags = TID.TSFlags;
-  
+
   isFirst   = TSFlags & PPCII::PPC970_First;
   isSingle  = TSFlags & PPCII::PPC970_Single;
   isCracked = TSFlags & PPCII::PPC970_Cracked;
@@ -96,7 +96,7 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
       return true;
     if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
       return true;
-    
+
     // Okay, we don't have an exact match, if this is an indexed offset, see if
     // we have overlap (which happens during fp->int conversion for example).
     if (StorePtr2[i] == Ptr2) {
@@ -122,26 +122,28 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
 /// instructions that wouldn't terminate the dispatch group that would cause a
 /// pipeline flush.
 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
-getHazardType(SUnit *SU) {
-  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+getHazardType(SUnit *SU, int Stalls) {
+  assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+  const SDNode *Node = SU->getNode()->getGluedMachineNode();
   bool isFirst, isSingle, isCracked, isLoad, isStore;
-  PPCII::PPC970_Unit InstrType = 
+  PPCII::PPC970_Unit InstrType =
     GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
                  isLoad, isStore);
-  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;  
+  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
   unsigned Opcode = Node->getMachineOpcode();
 
   // We can only issue a PPC970_First/PPC970_Single instruction (such as
   // crand/mtspr/etc) if this is the first cycle of the dispatch group.
   if (NumIssued != 0 && (isFirst || isSingle))
     return Hazard;
-  
+
   // If this instruction is cracked into two ops by the decoder, we know that
   // it is not a branch and that it cannot issue if 3 other instructions are
   // already in the dispatch group.
   if (isCracked && NumIssued > 2)
     return Hazard;
-      
+
   switch (InstrType) {
   default: llvm_unreachable("Unknown instruction type!");
   case PPCII::PPC970_FXU:
@@ -159,11 +161,11 @@ getHazardType(SUnit *SU) {
   case PPCII::PPC970_BRU:
     break;
   }
-  
+
   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
   if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
     return NoopHazard;
-  
+
   // If this is a load following a store, make sure it's not to the same or
   // overlapping address.
   if (isLoad && NumStores) {
@@ -212,27 +214,27 @@ getHazardType(SUnit *SU) {
       LoadSize = 16;
       break;
     }
-    
-    if (isLoadOfStoredAddress(LoadSize, 
+
+    if (isLoadOfStoredAddress(LoadSize,
                               Node->getOperand(0), Node->getOperand(1)))
       return NoopHazard;
   }
-  
+
   return NoHazard;
 }
 
 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
-  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+  const SDNode *Node = SU->getNode()->getGluedMachineNode();
   bool isFirst, isSingle, isCracked, isLoad, isStore;
-  PPCII::PPC970_Unit InstrType = 
+  PPCII::PPC970_Unit InstrType =
     GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
                  isLoad, isStore);
-  if (InstrType == PPCII::PPC970_Pseudo) return;  
+  if (InstrType == PPCII::PPC970_Pseudo) return;
   unsigned Opcode = Node->getMachineOpcode();
 
   // Update structural hazard information.
   if (Opcode == PPC::MTCTR) HasCTRSet = true;
-  
+
   // Track the address stored to.
   if (isStore) {
     unsigned ThisStoreSize;
@@ -278,22 +280,22 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
       ThisStoreSize = 16;
       break;
     }
-    
+
     StoreSize[NumStores] = ThisStoreSize;
     StorePtr1[NumStores] = Node->getOperand(1);
     StorePtr2[NumStores] = Node->getOperand(2);
     ++NumStores;
   }
-  
+
   if (InstrType == PPCII::PPC970_BRU || isSingle)
     NumIssued = 4;  // Terminate a d-group.
   ++NumIssued;
-  
+
   // If this instruction is cracked into two ops by the decoder, remember that
   // we issued two pieces.
   if (isCracked)
     ++NumIssued;
-  
+
   if (NumIssued == 5)
     EndDispatchGroup();
 }
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 74bf8e52d8fa..2f81f0f7c7f1 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -19,7 +19,7 @@
 #include "PPCInstrInfo.h"
 
 namespace llvm {
-  
+
 /// PPCHazardRecognizer970 - This class defines a finite state automata that
 /// models the dispatch logic on the PowerPC 970 (aka G5) processor.  This
 /// promotes good dispatch group formation and implements noop insertion to
@@ -28,14 +28,14 @@ namespace llvm {
 /// or storing then loading from the same address within a dispatch group.
 class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
   const TargetInstrInfo &TII;
-  
+
   unsigned NumIssued;  // Number of insts issued, including advanced cycles.
-  
+
   // Various things that can cause a structural hazard.
-  
+
   // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
   bool HasCTRSet;
-  
+
   // StoredPtr - Keep track of the address of any store.  If we see a load from
   // the same address (or one that aliases it), disallow the store.  We can have
   // up to four stores in one dispatch group, hence we track up to 4.
@@ -45,24 +45,24 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
   SDValue StorePtr1[4], StorePtr2[4];
   unsigned  StoreSize[4];
   unsigned NumStores;
-  
+
 public:
   PPCHazardRecognizer970(const TargetInstrInfo &TII);
-  virtual HazardType getHazardType(SUnit *SU);
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void EmitInstruction(SUnit *SU);
   virtual void AdvanceCycle();
-  
+
 private:
   /// EndDispatchGroup - Called when we are finishing a new dispatch group.
   ///
   void EndDispatchGroup();
-  
+
   /// GetInstrType - Classify the specified powerpc opcode according to its
   /// pipeline.
   PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
                                   bool &isFirst, bool &isSingle,bool &isCracked,
                                   bool &isLoad, bool &isStore);
-  
+
   bool isLoadOfStoredAddress(unsigned LoadSize,
                              SDValue Ptr1, SDValue Ptr2) const;
 };
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 00eebb83f12d..faae9b2f22ae 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
 #include "PPC.h"
 #include "PPCPredicates.h"
 #include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -49,16 +48,16 @@ namespace {
       : SelectionDAGISel(tm), TM(tm),
         PPCLowering(*TM.getTargetLowering()),
         PPCSubTarget(*TM.getSubtargetImpl()) {}
-    
+
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       // Make sure we re-emit a set of the global base reg if necessary
       GlobalBaseReg = 0;
       SelectionDAGISel::runOnMachineFunction(MF);
-      
+
       InsertVRSaveCode(MF);
       return true;
     }
-   
+
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
     inline SDValue getI32Imm(unsigned Imm) {
@@ -70,13 +69,13 @@ namespace {
     inline SDValue getI64Imm(uint64_t Imm) {
       return CurDAG->getTargetConstant(Imm, MVT::i64);
     }
-    
+
     /// getSmallIPtrImm - Return a target constant of pointer type.
     inline SDValue getSmallIPtrImm(unsigned Imm) {
       return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
     }
-    
-    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s 
+
+    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
     /// with any number of 0s on either side.  The 1s are allowed to wrap from
     /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
     /// 0x0F0F0000 is not, since all 1s are not contiguous.
@@ -87,15 +86,15 @@ namespace {
     /// rotate and mask opcode and mask operation.
     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
                                 unsigned &SH, unsigned &MB, unsigned &ME);
-    
+
     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
     /// base register.  Return the virtual register that holds this value.
     SDNode *getGlobalBaseReg();
-    
+
     // Select - Convert the specified operand from a target-independent to a
     // target-specific node if it hasn't already been changed.
     SDNode *Select(SDNode *N);
-    
+
     SDNode *SelectBitfieldInsert(SDNode *N);
 
     /// SelectCC - Select a comparison of the specified values with the
@@ -104,42 +103,39 @@ namespace {
 
     /// SelectAddrImm - Returns true if the address N can be represented by
     /// a base register plus a signed 16-bit displacement [r+imm].
-    bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp,
+    bool SelectAddrImm(SDValue N, SDValue &Disp,
                        SDValue &Base) {
       return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
     }
-    
+
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
     /// immediate field.  Because preinc imms have already been validated, just
     /// accept it.
-    bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const {
+    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
       Out = N;
       return true;
     }
-      
+
     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
     /// represented as an indexed [r+r] operation.  Returns false if it can
     /// be represented by [r+imm], which are preferred.
-    bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base,
-                       SDValue &Index) {
+    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
       return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
     }
-    
+
     /// SelectAddrIdxOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
-    bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base,
-                           SDValue &Index) {
+    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
       return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
     }
 
     /// SelectAddrImmShift - Returns true if the address N can be represented by
     /// a base register plus a signed 14-bit displacement [r+imm*4].  Suitable
     /// for use by STD and friends.
-    bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp,
-                            SDValue &Base) {
+    bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
       return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
     }
-      
+
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.  It is always correct to compute the value into
     /// a register.  The case of adding a (possibly relocatable) constant to a
@@ -151,29 +147,16 @@ namespace {
       OutOps.push_back(Op);
       return false;
     }
-    
-    SDValue BuildSDIVSequence(SDNode *N);
-    SDValue BuildUDIVSequence(SDNode *N);
-    
+
     void InsertVRSaveCode(MachineFunction &MF);
 
     virtual const char *getPassName() const {
       return "PowerPC DAG->DAG Pattern Instruction Selection";
-    } 
-    
-    /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
-    /// this target when scheduling the DAG.
-    virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
-      // Should use subtarget info to pick the right hazard recognizer.  For
-      // now, always return a PPC970 recognizer.
-      const TargetInstrInfo *II = TM.getInstrInfo();
-      assert(II && "No InstrInfo?");
-      return new PPCHazardRecognizer970(*II); 
     }
 
 // Include the pieces autogenerated from the target description.
 #include "PPCGenDAGISel.inc"
-    
+
 private:
     SDNode *SelectSETCC(SDNode *N);
   };
@@ -184,19 +167,20 @@ private:
 /// check to see if we need to save/restore VRSAVE.  If so, do it.
 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   // Check to see if this function uses vector registers, which means we have to
-  // save and restore the VRSAVE register and update it with the regs we use.  
+  // save and restore the VRSAVE register and update it with the regs we use.
   //
   // In this case, there will be virtual registers of vector type created
   // by the scheduler.  Detect them now.
   bool HasVectorVReg = false;
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, 
-       e = RegInfo->getLastVirtReg()+1; i != e; ++i)
-    if (RegInfo->getRegClass(i) == &PPC::VRRCRegClass) {
+  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
       HasVectorVReg = true;
       break;
     }
+  }
   if (!HasVectorVReg) return;  // nothing to do.
-      
+
   // If we have a vector register, we want to emit code into the entry and exit
   // blocks to save and restore the VRSAVE register.  We do this here (instead
   // of marking all vector instructions as clobbering VRSAVE) for two reasons:
@@ -211,7 +195,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   // function and one for the value after having bits or'd into it.
   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
-  
+
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   MachineBasicBlock &EntryBB = *Fn.begin();
   DebugLoc dl;
@@ -224,21 +208,21 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
           UpdatedVRSAVE).addReg(InVRSAVE);
   BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
-  
+
   // Find all return blocks, outputting a restore in each epilog.
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     if (!BB->empty() && BB->back().getDesc().isReturn()) {
       IP = BB->end(); --IP;
-      
+
       // Skip over all terminator instructions, which are part of the return
       // sequence.
       MachineBasicBlock::iterator I2 = IP;
       while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
         IP = I2;
-      
+
       // Emit: MTVRSAVE InVRSave
       BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
-    }        
+    }
   }
 }
 
@@ -344,8 +328,8 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
   return false;
 }
 
-bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 
-                                      bool isShiftMask, unsigned &SH, 
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+                                      bool isShiftMask, unsigned &SH,
                                       unsigned &MB, unsigned &ME) {
   // Don't even go down this path for i64, since different logic will be
   // necessary for rldicl/rldicr/rldimi.
@@ -358,13 +342,13 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
   if (N->getNumOperands() != 2 ||
       !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
     return false;
-  
+
   if (Opcode == ISD::SHL) {
     // apply shift left to mask if it comes first
     if (isShiftMask) Mask = Mask << Shift;
     // determine which bits are made indeterminant by shift
     Indeterminant = ~(0xFFFFFFFFu << Shift);
-  } else if (Opcode == ISD::SRL) { 
+  } else if (Opcode == ISD::SRL) {
     // apply shift right to mask if it comes first
     if (isShiftMask) Mask = Mask >> Shift;
     // determine which bits are made indeterminant by shift
@@ -376,7 +360,7 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
   } else {
     return false;
   }
-  
+
   // if the mask doesn't intersect any Indeterminant bits
   if (Mask && !(Mask & Indeterminant)) {
     SH = Shift & 31;
@@ -392,14 +376,14 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
-  
+
   APInt LKZ, LKO, RKZ, RKO;
   CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
   CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
-  
+
   unsigned TargetMask = LKZ.getZExtValue();
   unsigned InsertMask = RKZ.getZExtValue();
-  
+
   if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
     unsigned Op0Opc = Op0.getOpcode();
     unsigned Op1Opc = Op1.getOpcode();
@@ -427,7 +411,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
         std::swap(TargetMask, InsertMask);
       }
     }
-    
+
     unsigned MB, ME;
     if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
       SDValue Tmp1, Tmp2;
@@ -463,7 +447,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
                                     ISD::CondCode CC, DebugLoc dl) {
   // Always select the LHS.
   unsigned Opc;
-  
+
   if (LHS.getValueType() == MVT::i32) {
     unsigned Imm;
     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
@@ -476,11 +460,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         if (isInt<16>((int)Imm))
           return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
                                                 getI32Imm(Imm & 0xFFFF)), 0);
-        
+
         // For non-equality comparisons, the default code would materialize the
         // constant, then compare against it, like this:
         //   lis r2, 4660
-        //   ori r2, r2, 22136 
+        //   ori r2, r2, 22136
         //   cmpw cr0, r3, r2
         // Since we are just comparing for equality, we can emit this instead:
         //   xoris r0,r3,0x1234
@@ -517,11 +501,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
         if (isInt<16>(Imm))
           return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
                                                 getI32Imm(Imm & 0xFFFF)), 0);
-        
+
         // For non-equality comparisons, the default code would materialize the
         // constant, then compare against it, like this:
         //   lis r2, 4660
-        //   ori r2, r2, 22136 
+        //   ori r2, r2, 22136
         //   cmpd cr0, r3, r2
         // Since we are just comparing for equality, we can emit this instead:
         //   xoris r0,r3,0x1234
@@ -610,9 +594,9 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
   case ISD::SETUNE:
   case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
   case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
-  case ISD::SETUEQ: 
-  case ISD::SETOGE: 
-  case ISD::SETOLE: 
+  case ISD::SETUEQ:
+  case ISD::SETOGE:
+  case ISD::SETOLE:
   case ISD::SETONE:
     llvm_unreachable("Invalid branch code: should be expanded by legalize");
   // These are invalid for floating point.  Assume integer.
@@ -641,9 +625,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       }
       case ISD::SETNE: {
         SDValue AD =
-          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                          Op, getI32Imm(~0U)), 0);
-        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, 
+        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
                                     AD.getValue(1));
       }
       case ISD::SETLT: {
@@ -663,16 +647,16 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       switch (CC) {
       default: break;
       case ISD::SETEQ:
-        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(1)), 0);
-        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 
-                              SDValue(CurDAG->getMachineNode(PPC::LI, dl, 
+        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
                                                              MVT::i32,
                                                              getI32Imm(0)), 0),
                                       Op.getValue(1));
       case ISD::SETNE: {
         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
-        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(~0U));
         return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
                                     Op, SDValue(AD, 1));
@@ -687,35 +671,35 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       }
       case ISD::SETGT: {
         SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
-        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
                      0);
-        return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, 
+        return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
                                     getI32Imm(1));
       }
       }
     }
   }
-  
+
   bool Inv;
   int OtherCondIdx;
   unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
   SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
   SDValue IntCR;
-  
+
   // Force the ccreg into CR7.
   SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
-  
+
   SDValue InFlag(0, 0);  // Null incoming flag value.
-  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 
+  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
                                InFlag).getValue(1);
-  
+
   if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
     IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
                                            CCReg), 0);
  else
     IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
                                            CR7Reg, CCReg), 0);
-  
+
   SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
                       getI32Imm(31), getI32Imm(31) };
   if (OtherCondIdx == -1 && !Inv)
@@ -734,7 +718,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
 
   // Get the other bit of the comparison.
   Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
-  SDValue OtherCond = 
+  SDValue OtherCond =
     SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
 
   return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
@@ -750,7 +734,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
-  
+
   case ISD::Constant: {
     if (N->getValueType(0) == MVT::i64) {
       // Get 64 bit value.
@@ -759,12 +743,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       unsigned Remainder = 0;
       // Assume no shift required.
       unsigned Shift = 0;
-      
+
       // If it can't be represented as a 32 bit value.
       if (!isInt<32>(Imm)) {
         Shift = CountTrailingZeros_64(Imm);
         int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-        
+
         // If the shifted value fits 32 bits.
         if (isInt<32>(ImmSh)) {
           // Go with the shifted value.
@@ -776,14 +760,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
           Imm >>= 32;
         }
       }
-      
+
       // Intermediate operand.
       SDNode *Result;
 
       // Handle first 32 bits.
       unsigned Lo = Imm & 0xFFFF;
       unsigned Hi = (Imm >> 16) & 0xFFFF;
-      
+
       // Simple value.
       if (isInt<16>(Imm)) {
        // Just the Lo bits.
@@ -799,7 +783,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
        // Just the Hi bits.
         Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
       }
-      
+
       // If no shift, we're done.
       if (!Shift) return Result;
 
@@ -815,22 +799,22 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       if ((Hi = (Remainder >> 16) & 0xFFFF)) {
         Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
                                         SDValue(Result, 0), getI32Imm(Hi));
-      } 
+      }
       if ((Lo = Remainder & 0xFFFF)) {
         Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
                                         SDValue(Result, 0), getI32Imm(Lo));
       }
-      
+
       return Result;
     }
     break;
   }
-  
+
   case ISD::SETCC:
     return SelectSETCC(N);
   case PPCISD::GlobalBaseReg:
     return getGlobalBaseReg();
-    
+
   case ISD::FrameIndex: {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
@@ -852,11 +836,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
                                     N->getOperand(0), InFlag);
   }
-    
+
   case ISD::SDIV: {
     // FIXME: since this depends on the setting of the carry flag from the srawi
     //        we should really be making notes about that for the scheduler.
-    // FIXME: It sure would be nice if we could cheaply recognize the 
+    // FIXME: It sure would be nice if we could cheaply recognize the
     //        srl/add/sra pattern the dag combiner will generate for this as
     //        sra/addze rather than having to handle sdiv ourselves.  oh well.
     unsigned Imm;
@@ -864,13 +848,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue N0 = N->getOperand(0);
       if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
         SDNode *Op =
-          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
                                  N0, getI32Imm(Log2_32(Imm)));
-        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 
+        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
                                     SDValue(Op, 0), SDValue(Op, 1));
       } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
         SDNode *Op =
-          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
                                  N0, getI32Imm(Log2_32(-Imm)));
         SDValue PT =
           SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
@@ -879,24 +863,24 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
       }
     }
-    
+
     // Other cases are autogenerated.
     break;
   }
-    
+
   case ISD::LOAD: {
     // Handle preincrement loads.
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT LoadedVT = LD->getMemoryVT();
-    
+
     // Normal loads are handled by code generated from the .td file.
     if (LD->getAddressingMode() != ISD::PRE_INC)
       break;
-    
+
     SDValue Offset = LD->getOffset();
     if (isa<ConstantSDNode>(Offset) ||
         Offset.getOpcode() == ISD::TargetGlobalAddress) {
-      
+
       unsigned Opcode;
       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
       if (LD->getValueType(0) != MVT::i64) {
@@ -923,7 +907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
           case MVT::i8:  Opcode = PPC::LBZU8; break;
         }
       }
-      
+
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[] = { Offset, Base, Chain };
@@ -935,7 +919,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       llvm_unreachable("R+R preindex loads not supported yet!");
     }
   }
-    
+
   case ISD::AND: {
     unsigned Imm, Imm2, SH, MB, ME;
 
@@ -950,7 +934,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     // If this is just a masked value where the input is not handled above, and
     // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
     if (isInt32Immediate(N->getOperand(1), Imm) &&
-        isRunOfOnes(Imm, MB, ME) && 
+        isRunOfOnes(Imm, MB, ME) &&
         N->getOperand(0).getOpcode() != ISD::ROTL) {
       SDValue Val = N->getOperand(0);
       SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
@@ -963,7 +947,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     }
     // ISD::OR doesn't get all the bitfield insertion fun.
     // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
-    if (isInt32Immediate(N->getOperand(1), Imm) && 
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
         N->getOperand(0).getOpcode() == ISD::OR &&
         isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
       unsigned MB, ME;
@@ -975,7 +959,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
       }
     }
-    
+
     // Other cases are autogenerated.
     break;
   }
@@ -983,7 +967,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     if (N->getValueType(0) == MVT::i32)
       if (SDNode *I = SelectBitfieldInsert(N))
         return I;
-      
+
     // Other cases are autogenerated.
     break;
   case ISD::SHL: {
@@ -994,25 +978,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                           getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
       return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
     }
-    
+
     // Other cases are autogenerated.
     break;
   }
   case ISD::SRL: {
     unsigned Imm, SH, MB, ME;
     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
-        isRotateAndMask(N, Imm, true, SH, MB, ME)) { 
+        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
       SDValue Ops[] = { N->getOperand(0).getOperand(0),
                           getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
       return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
     }
-    
+
     // Other cases are autogenerated.
     break;
   }
   case ISD::SELECT_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-    
+
     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
     if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
       if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
@@ -1022,7 +1006,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
               // FIXME: Implement this optzn for PPC64.
               N->getValueType(0) == MVT::i32) {
             SDNode *Tmp =
-              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                      N->getOperand(0), getI32Imm(~0U));
             return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
                                         SDValue(Tmp, 0), N->getOperand(0),
@@ -1064,7 +1048,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
   case ISD::BR_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
-    SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, 
+    SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
                         N->getOperand(4), N->getOperand(0) };
     return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
   }
@@ -1078,13 +1062,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
   }
   }
-  
+
   return SelectCode(N);
 }
 
 
 
-/// createPPCISelDag - This pass converts a legalized DAG into a 
+/// createPPCISelDag - This pass converts a legalized DAG into a
 /// PowerPC-specific DAG, ready for instruction scheduling.
 ///
 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 14d1b154a5c9..8f623b859b55 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -38,17 +38,17 @@
 #include "llvm/DerivedTypes.h"
 using namespace llvm;
 
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                      CCValAssign::LocInfo &LocInfo,
                                      ISD::ArgFlagsTy &ArgFlags,
                                      CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
-                                            EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
-                                              EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State);
@@ -73,6 +73,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setUseUnderscoreSetJmp(true);
   setUseUnderscoreLongJmp(true);
 
+  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+  // arguments are at least 4/8 bytes aligned.
+  setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+
   // Set up the register classes.
   addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
   addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
@@ -174,10 +178,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 
-  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
-  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
-  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
-  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 
   // We cannot sextinreg(i1).  Expand to shifts.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -545,7 +549,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 
 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
-bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              bool isUnary) {
   if (!isUnary)
     return isVMerge(N, UnitSize, 8, 24);
@@ -554,7 +558,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 
 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
-bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              bool isUnary) {
   if (!isUnary)
     return isVMerge(N, UnitSize, 0, 16);
@@ -569,7 +573,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
          "PPC only supports shuffles by bytes!");
 
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  
+
   // Find the first non-undef value in the shuffle mask.
   unsigned i;
   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
@@ -607,7 +611,7 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
   // This is a splat operation if each element of the permute is the same, and
   // if the value doesn't reference the second vector.
   unsigned ElementBase = N->getMaskElt(0);
-  
+
   // FIXME: Handle UNDEF elements too!
   if (ElementBase >= 16)
     return false;
@@ -635,7 +639,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
   APInt APVal, APUndef;
   unsigned BitSize;
   bool HasAnyUndefs;
-  
+
   if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
       return CFP->getValueAPF().isNegZero();
@@ -1054,7 +1058,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
     VT = LD->getMemoryVT();
 
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
-    ST = ST;
     Ptr = ST->getBasePtr();
     VT  = ST->getMemoryVT();
   } else
@@ -1094,158 +1097,126 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
 //  LowerOperation implementation
 //===----------------------------------------------------------------------===//
 
-SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
-                                             SelectionDAG &DAG) const {
-  EVT PtrVT = Op.getValueType();
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  const Constant *C = CP->getConstVal();
-  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  const TargetMachine &TM = DAG.getTarget();
-
-  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
-  SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
-
-  // If this is a non-darwin platform, we don't support non-static relo models
-  // yet.
-  if (TM.getRelocationModel() == Reloc::Static ||
-      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
-    // Generate non-pic code that has direct accesses to the constant pool.
-    // The address of the global is just (hi(&g)+lo(&g)).
-    return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+                               unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+  HiOpFlags = PPCII::MO_HA16;
+  LoOpFlags = PPCII::MO_LO16;
+
+  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
+  // non-darwin platform.  We don't support PIC on other platforms yet.
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+               TM.getSubtarget<PPCSubtarget>().isDarwin();
+  if (isPIC) {
+    HiOpFlags |= PPCII::MO_PIC_FLAG;
+    LoOpFlags |= PPCII::MO_PIC_FLAG;
   }
 
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    // With PIC, the first instruction is actually "GR+hi(&G)".
-    Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
-                     DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc(), PtrVT), Hi);
+  // If this is a reference to a global value that requires a non-lazy-ptr, make
+  // sure that instruction lowering adds it.
+  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+    HiOpFlags |= PPCII::MO_NLP_FLAG;
+    LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+    if (GV->hasHiddenVisibility()) {
+      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+    }
   }
 
-  Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
-  return Lo;
+  return isPIC;
 }
 
-SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
-  EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+                             SelectionDAG &DAG) {
+  EVT PtrVT = HiPart.getValueType();
   SDValue Zero = DAG.getConstant(0, PtrVT);
-  // FIXME there isn't really any debug loc here
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = HiPart.getDebugLoc();
 
-  const TargetMachine &TM = DAG.getTarget();
+  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
 
-  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
-  SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
+  // With PIC, the first instruction is actually "GR+hi(&G)".
+  if (isPIC)
+    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
 
-  // If this is a non-darwin platform, we don't support non-static relo models
-  // yet.
-  if (TM.getRelocationModel() == Reloc::Static ||
-      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
-    // Generate non-pic code that has direct accesses to the constant pool.
-    // The address of the global is just (hi(&g)+lo(&g)).
-    return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
-  }
+  // Generate non-pic code that has direct accesses to the constant pool.
+  // The address of the global is just (hi(&g)+lo(&g)).
+  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
 
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    // With PIC, the first instruction is actually "GR+hi(&G)".
-    Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
-                     DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc(), PtrVT), Hi);
-  }
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = CP->getConstVal();
 
-  Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
-  return Lo;
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue CPIHi =
+    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+  SDValue CPILo =
+    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
 }
 
-SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  llvm_unreachable("TLS not implemented for PPC.");
-  return SDValue(); // Not reached
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
 }
 
 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
-  DebugLoc DL = Op.getDebugLoc();
 
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true);
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
-  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero);
-
-  // If this is a non-darwin platform, we don't support non-static relo models
-  // yet.
-  const TargetMachine &TM = DAG.getTarget();
-  if (TM.getRelocationModel() == Reloc::Static ||
-      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
-    // Generate non-pic code that has direct accesses to globals.
-    // The address of the global is just (hi(&g)+lo(&g)).
-    return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
-  }
 
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    // With PIC, the first instruction is actually "GR+hi(&G)".
-    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
-                     DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc(), PtrVT), Hi);
-  }
-
-  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
+  SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = GSDN->getDebugLoc();
+  DebugLoc DL = GSDN->getDebugLoc();
   const GlobalValue *GV = GSDN->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset());
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-
-  const TargetMachine &TM = DAG.getTarget();
 
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
-    return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA,
+    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
                        DAG.getRegister(PPC::X2, MVT::i64));
   }
 
-  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
-  SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
-
-  // If this is a non-darwin platform, we don't support non-static relo models
-  // yet.
-  if (TM.getRelocationModel() == Reloc::Static ||
-      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
-    // Generate non-pic code that has direct accesses to globals.
-    // The address of the global is just (hi(&g)+lo(&g)).
-    return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
-  }
-
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    // With PIC, the first instruction is actually "GR+hi(&G)".
-    Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
-                     DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc(), PtrVT), Hi);
-  }
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
 
-  Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+  SDValue GAHi =
+    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+  SDValue GALo =
+    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
 
-  if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM))
-    return Lo;
+  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
 
-  // If the global is weak or external, we have to go through the lazy
-  // resolution stub.
-  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0,
-                     false, false, 0);
+  // If the global reference is actually to a non-lazy-pointer, we have to do an
+  // extra load to get the address of the global.
+  if (MOHiFlag & PPCII::MO_NLP_FLAG)
+    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+                      false, false, 0);
+  return Ptr;
 }
 
 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -1353,7 +1324,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV),
                         false, false, 0);
   }
 
@@ -1406,43 +1378,47 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
   // Store first byte : number of int regs
   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
-                                         Op.getOperand(1), SV, 0, MVT::i8,
-                                         false, false, 0);
+                                         Op.getOperand(1),
+                                         MachinePointerInfo(SV),
+                                         MVT::i8, false, false, 0);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
-    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8,
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset), MVT::i8,
                       false, false, 0);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
   // Store second word : arguments given on stack
   SDValue thirdStore =
-    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset,
+    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+                 MachinePointerInfo(SV, nextOffset),
                  false, false, 0);
   nextOffset += FrameOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
 
   // Store third word : arguments given in registers
-  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset,
+  return DAG.getStore(thirdStore, dl, FR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset),
                       false, false, 0);
 
 }
 
 #include "PPCGenCallingConv.inc"
 
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                      CCValAssign::LocInfo &LocInfo,
                                      ISD::ArgFlagsTy &ArgFlags,
                                      CCState &State) {
   return true;
 }
 
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
-                                            EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State) {
@@ -1451,7 +1427,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
   const unsigned NumArgRegs = array_lengthof(ArgRegs);
-  
+
   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
 
   // Skip one register if the first unallocated register has an even register
@@ -1461,15 +1437,15 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
     State.AllocateReg(ArgRegs[RegNum]);
   }
-  
+
   // Always return false here, as this function only makes sure that the first
   // unallocated register has an odd register number and does not actually
   // allocate a register for the current argument.
   return false;
 }
 
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
-                                              EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State) {
@@ -1479,7 +1455,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
   };
 
   const unsigned NumArgRegs = array_lengthof(ArgRegs);
-  
+
   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
 
   // If there is only one Floating-point register left we need to put both f64
@@ -1487,7 +1463,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
     State.AllocateReg(ArgRegs[RegNum]);
   }
-  
+
   // Always return false here, as this function only makes sure that the two f64
   // values a ppc_fp128 value is split into are both passed in registers or both
   // passed on the stack and does not actually allocate a register for the
@@ -1572,7 +1548,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
   // Specifications:
   //   System V Application Binary Interface PowerPC Processor Supplement
   //   AltiVec Technology Programming Interface Manual
-  
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -1588,18 +1564,18 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
                  *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
-  CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
-  
+
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-    
+
     // Arguments stored in registers.
     if (VA.isRegLoc()) {
       TargetRegisterClass *RC;
       EVT ValVT = VA.getValVT();
-      
+
       switch (ValVT.getSimpleVT().SimpleTy) {
         default:
           llvm_unreachable("ValVT not supported by formal arguments Lowering");
@@ -1619,9 +1595,9 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
           RC = PPC::VRRCRegisterClass;
           break;
       }
-      
+
       // Transform the arguments stored in physical registers into virtual ones.
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
 
       InVals.push_back(ArgValue);
@@ -1635,7 +1611,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
                                    false, false, 0));
     }
   }
@@ -1654,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
   // Area that is at least reserved in the caller of this function.
   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
-  
+
   // Set the size that is at least reserved in caller of this function.  Tail
   // call optimized function's reserved stack space needs to be aligned so that
   // taking the difference between two stack areas will result in an aligned
@@ -1663,17 +1640,17 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
   MinReservedArea =
     std::max(MinReservedArea,
-             PPCFrameInfo::getMinCallFrameSize(false, false));
-  
-  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+             PPCFrameLowering::getMinCallFrameSize(false, false));
+
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
     getStackAlignment();
   unsigned AlignMask = TargetAlign-1;
   MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-  
+
   FI->setMinReservedArea(MinReservedArea);
 
   SmallVector<SDValue, 8> MemOps;
-  
+
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
@@ -1705,28 +1682,18 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
-    // The fixed integer arguments of a variadic function are
-    // stored to the VarArgsFrameIndex on the stack.
-    unsigned GPRIndex = 0;
-    for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) {
-      SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
-      MemOps.push_back(Store);
-      // Increment the address by four for the next argument to store
-      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
-      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-    }
-
-    // If this function is vararg, store any remaining integer argument regs
-    // to their spots on the stack so that they may be loaded by deferencing the
-    // result of va_next.
-    for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
-      unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+    // The fixed integer arguments of a variadic function are stored to the
+    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+    // the result of va_next.
+    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1735,27 +1702,17 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
     // is set.
-    
     // The double arguments are stored to the VarArgsFrameIndex
     // on the stack.
-    unsigned FPRIndex = 0;
-    for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) {
-      SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
-      MemOps.push_back(Store);
-      // Increment the address by eight for the next argument to store
-      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
-                                         PtrVT);
-      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-    }
-
-    for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
-      unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1791,7 +1748,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
-  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
 
@@ -1915,18 +1872,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            NULL, 0,
+                                            MachinePointerInfo(),
                                             ObjSize==1 ? MVT::i8 : MVT::i16,
                                             false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
         }
-        
+
         ArgOffset += PtrByteSize;
-        
+
         continue;
       }
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
@@ -1934,11 +1891,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                       MachinePointerInfo(),
                                        false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -1956,7 +1914,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     case MVT::i32:
       if (!isPPC64) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
           ++GPR_idx;
         } else {
@@ -1970,7 +1928,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       // FALLTHROUGH
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32) {
@@ -2008,9 +1966,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         unsigned VReg;
 
         if (ObjectVT == MVT::f32)
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl);
         else
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl);
 
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++FPR_idx;
@@ -2028,7 +1986,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       // Note that vector arguments in registers don't reserve stack space,
       // except in varargs functions.
       if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         if (isVarArg) {
           while ((ArgOffset % 16) != 0) {
@@ -2063,7 +2021,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       CurArgOffset + (ArgSize - ObjSize),
                                       isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
                            false, false, 0);
     }
 
@@ -2082,8 +2040,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   }
   MinReservedArea =
     std::max(MinReservedArea,
-             PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
-  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
     getStackAlignment();
   unsigned AlignMask = TargetAlign-1;
   MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
@@ -2104,15 +2062,15 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     // result of va_next.
     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
       unsigned VReg;
-      
+
       if (isPPC64)
-        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
       else
-        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -2141,7 +2099,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
-  unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
+  unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
   unsigned NumOps = Outs.size();
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
@@ -2153,7 +2111,6 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // 16-byte aligned.
   nAltivecParamsAtEnd = 0;
   for (unsigned i = 0; i != NumOps; ++i) {
-    SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     EVT ArgVT = Outs[i].VT;
     // Varargs Altivec parameters are padded to a 16 byte boundary.
@@ -2183,11 +2140,11 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
   NumBytes = std::max(NumBytes,
-                      PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
+                      PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
 
   // Tail call needs the stack to be aligned.
   if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
-    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
       getStackAlignment();
     unsigned AlignMask = TargetAlign-1;
     NumBytes = (NumBytes + AlignMask) & ~AlignMask;
@@ -2292,8 +2249,8 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
     int FI = TailCallArgs[i].FrameIdx;
     // Store relative to framepointer.
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
-                                       PseudoSourceValue::getFixedStack(FI),
-                                       0, false, false, 0));
+                                       MachinePointerInfo::getFixedStack(FI),
+                                       false, false, 0));
   }
 }
 
@@ -2311,26 +2268,26 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
   if (SPDiff) {
     // Calculate the new stack slot for the return address.
     int SlotSize = isPPC64 ? 8 : 4;
-    int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
+    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
                                                                    isDarwinABI);
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                           NewRetAddrLoc, true);
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
-                         PseudoSourceValue::getFixedStack(NewRetAddr), 0,
+                         MachinePointerInfo::getFixedStack(NewRetAddr),
                          false, false, 0);
 
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
     // slot as the FP is never overwritten.
     if (isDarwinABI) {
       int NewFPLoc =
-        SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
                                                           true);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                           PseudoSourceValue::getFixedStack(NewFPIdx), 0,
+                           MachinePointerInfo::getFixedStack(NewFPIdx),
                            false, false, 0);
     }
   }
@@ -2369,15 +2326,15 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
-    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0,
+    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
                           false, false, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
-    
+
     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
     // slot as the FP is never overwritten.
     if (isDarwinABI) {
       FPOpOut = getFramePointerFrameIndex(DAG);
-      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0,
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
                             false, false, 0);
       Chain = SDValue(FPOpOut.getNode(), 1);
     }
@@ -2397,7 +2354,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           DebugLoc dl) {
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, false, NULL, 0, NULL, 0);
+                       false, false, MachinePointerInfo(0),
+                       MachinePointerInfo(0));
 }
 
 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -2407,7 +2365,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                  SDValue Arg, SDValue PtrOff, int SPDiff,
                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
                  bool isVector, SmallVector<SDValue, 8> &MemOpChains,
-                 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
+                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
                  DebugLoc dl) {
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   if (!isTailCall) {
@@ -2420,8 +2378,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                            DAG.getConstant(ArgOffset, PtrVT));
     }
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
-                                       false, false, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0));
   // Calculate and remember argument location.
   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
                                   TailCallArguments);
@@ -2460,10 +2418,14 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                      SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
                      SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
                      SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
-                     bool isPPC64, bool isSVR4ABI) {
+                     const PPCSubtarget &PPCSubTarget) {
+
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
+  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
 
   unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
 
@@ -2473,24 +2435,49 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     Callee = SDValue(Dest, 0);
     needIndirectCall = false;
   }
-  // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
-  // Use indirect calls for ALL functions calls in JIT mode, since the
-  // far-call stubs may be outside relocation limits for a BL instruction.
-  if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
-    // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-    // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-    // node so that legalize doesn't hack it.
-    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+    // Use indirect calls for ALL functions calls in JIT mode, since the
+    // far-call stubs may be outside relocation limits for a BL instruction.
+    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+      unsigned OpFlags = 0;
+      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+          PPCSubTarget.getDarwinVers() < 9 &&
+          (G->getGlobal()->isDeclaration() ||
+           G->getGlobal()->isWeakForLinker())) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = PPCII::MO_DARWIN_STUB;
+      }
+
+      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+      // every direct call is) turn it into a TargetGlobalAddress /
+      // TargetExternalSymbol node so that legalize doesn't hack it.
       Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-					  Callee.getValueType());
+                                          Callee.getValueType(),
+                                          0, OpFlags);
       needIndirectCall = false;
     }
   }
+
   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-      Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
-					   Callee.getValueType());
-      needIndirectCall = false;
+    unsigned char OpFlags = 0;
+
+    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+        PPCSubTarget.getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = PPCII::MO_DARWIN_STUB;
+    }
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+                                         OpFlags);
+    needIndirectCall = false;
   }
+
   if (needIndirectCall) {
     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
     // to do the call, we can't use PPCISD::CALL.
@@ -2525,7 +2512,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
 
       // Load the address of the function entry point from the function
       // descriptor.
-      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Flag);
+      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
       SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
                                         InFlag.getNode() ? 3 : 2);
       Chain = LoadFuncPtr.getValue(1);
@@ -2552,7 +2539,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
       // prevents the register allocator from allocating it), resulting in an
       // additional register being allocated and an unnecessary move instruction
       // being generated.
-      VTs = DAG.getVTList(MVT::Other, MVT::Flag);
+      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
                                        Callee, InFlag);
       Chain = LoadTOCPtr.getValue(0);
@@ -2569,7 +2556,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
 
     NodeTys.clear();
     NodeTys.push_back(MVT::Other);
-    NodeTys.push_back(MVT::Flag);
+    NodeTys.push_back(MVT::Glue);
     Ops.push_back(Chain);
     CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
     Callee.setNode(0);
@@ -2637,8 +2624,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
                                  isTailCall, RegsToPass, Ops, NodeTys,
-                                 PPCSubTarget.isPPC64(),
-                                 PPCSubTarget.isSVR4ABI());
+                                 PPCSubTarget);
 
   // When performing tail call optimization the callee pops its arguments off
   // the stack. Account for this here so these bytes can be pushed back on in
@@ -2684,7 +2670,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   // stack frame. If caller and callee belong to the same module (and have the
   // same TOC), the NOP will remain unchanged.
   if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
-    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag);
+    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
     if (CallOpc == PPCISD::BCTRL_SVR4) {
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
@@ -2699,7 +2685,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
       InFlag = Chain.getValue(1);
     } else {
       // Otherwise insert NOP.
-      InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag);
+      InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
     }
   }
 
@@ -2726,15 +2712,14 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                    Ins, DAG);
 
-  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
     return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
                           isTailCall, Outs, OutVals, Ins,
                           dl, DAG, InVals);
-  } else {
-    return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
-                            isTailCall, Outs, OutVals, Ins,
-                            dl, DAG, InVals);
-  }
+
+  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+                          isTailCall, Outs, OutVals, Ins,
+                          dl, DAG, InVals);
 }
 
 SDValue
@@ -2763,7 +2748,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // in this function's (MF) stack pointer stack slot 0(SP).
   if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
-  
+
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, parameter list area and the part of the local variable space which
   // contains copies of aggregates which are passed by value.
@@ -2774,19 +2759,19 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
                  ArgLocs, *DAG.getContext());
 
   // Reserve space for the linkage area on the stack.
-  CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
 
   if (isVarArg) {
     // Handle fixed and variable vector arguments differently.
     // Fixed vector arguments go into registers as long as registers are
     // available. Variable vector arguments always go into memory.
     unsigned NumArgs = Outs.size();
-    
+
     for (unsigned i = 0; i != NumArgs; ++i) {
-      EVT ArgVT = Outs[i].VT;
+      MVT ArgVT = Outs[i].VT;
       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
       bool Result;
-      
+
       if (Outs[i].IsFixed) {
         Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
                              CCInfo);
@@ -2794,11 +2779,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
         Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
                                     ArgFlags, CCInfo);
       }
-      
+
       if (Result) {
 #ifndef NDEBUG
         errs() << "Call operand #" << i << " has unhandled type "
-             << ArgVT.getEVTString() << "\n";
+             << EVT(ArgVT).getEVTString() << "\n";
 #endif
         llvm_unreachable(0);
       }
@@ -2807,7 +2792,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     // All arguments are treated the same.
     CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
   }
-  
+
   // Assign locations to all of the outgoing aggregate by value arguments.
   SmallVector<CCValAssign, 16> ByValArgLocs;
   CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
@@ -2822,7 +2807,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // space variable where copies of aggregates which are passed by value are
   // stored.
   unsigned NumBytes = CCByValInfo.getNextStackOffset();
-  
+
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
@@ -2842,7 +2827,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // arguments that may not fit in the registers available for argument
   // passing.
   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
-  
+
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
   SmallVector<SDValue, 8> MemOpChains;
@@ -2854,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     CCValAssign &VA = ArgLocs[i];
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
-    
+
     if (Flags.isByVal()) {
       // Argument is an aggregate which is passed by value, thus we need to
       // create a copy of it in the local variable space of the current stack
@@ -2863,33 +2848,33 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
       CCValAssign &ByValVA = ByValArgLocs[j++];
       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
-      
+
       // Memory reserved in the local variable space of the callers stack frame.
       unsigned LocMemOffset = ByValVA.getLocMemOffset();
-      
+
       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
-      
+
       // Create a copy of the argument in the local area of the current
       // stack frame.
       SDValue MemcpyCall =
         CreateCopyOfByValArgument(Arg, PtrOff,
                                   CallSeqStart.getNode()->getOperand(0),
                                   Flags, DAG, dl);
-      
+
       // This must go outside the CALLSEQ_START..END.
       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                            CallSeqStart.getNode()->getOperand(1));
       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                              NewCallSeqStart.getNode());
       Chain = CallSeqStart = NewCallSeqStart;
-      
+
       // Pass the address of the aggregate copy on the stack either in a
       // physical register or in the parameter list area of the current stack
       // frame to the callee.
       Arg = PtrOff;
     }
-    
+
     if (VA.isRegLoc()) {
       // Put argument in a physical register.
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -2903,7 +2888,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 
         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                           PseudoSourceValue::getStack(), LocMemOffset,
+                                           MachinePointerInfo(),
                                            false, false, 0));
       } else {
         // Calculate and remember argument location.
@@ -2912,11 +2897,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
       }
     }
   }
-  
+
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
-  
+
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
@@ -2925,7 +2910,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
                              RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
-  
+
   // Set CR6 to true if this is a vararg call.
   if (isVarArg) {
     SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
@@ -2933,10 +2918,9 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     InFlag = Chain.getValue(1);
   }
 
-  if (isTailCall) {
+  if (isTailCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
                     false, TailCallArguments);
-  }
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
@@ -3012,7 +2996,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // memory.  Also, if this is a vararg function, floating point operations
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
-  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
   static const unsigned GPR_32[] = {           // 32-bit registers.
@@ -3066,8 +3050,9 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         // Everything else is passed left-justified.
         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
-          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg,
-                                        NULL, 0, VT, false, false, 0);
+          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+                                        MachinePointerInfo(), VT,
+                                        false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
@@ -3104,7 +3089,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
-          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0,
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
                                      false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3136,21 +3122,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
         if (isVarArg) {
-          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
-                                       false, false, 0);
+          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
           MemOpChains.push_back(Store);
 
           // Float varargs are always shadowed in available integer registers
           if (GPR_idx != NumGPRs) {
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
-                                       false, false, 0);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(),
                                        false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3194,11 +3181,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         // entirely in R registers.  Maybe later.
         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                             DAG.getConstant(ArgOffset, PtrVT));
-        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
-                                     false, false, 0);
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
         MemOpChains.push_back(Store);
         if (VR_idx != NumVRs) {
-          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0,
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+                                     MachinePointerInfo(),
                                      false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
@@ -3209,7 +3197,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                   DAG.getConstant(i, PtrVT));
-          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0,
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
                                      false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3275,14 +3263,14 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     // TOC save area offset.
     SDValue PtrOff = DAG.getIntPtrConstant(40);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0,
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
                          false, false, 0);
   }
 
   // On Darwin, R12 must contain the address of an indirect callee.  This does
   // not mean the MTCTR instruction must use R12; it's easier to model this as
   // an extra parameter, so do that.
-  if (!isTailCall && 
+  if (!isTailCall &&
       !dyn_cast<GlobalAddressSDNode>(Callee) &&
       !dyn_cast<ExternalSymbolSDNode>(Callee) &&
       !isBLACompatibleAddress(Callee, DAG))
@@ -3298,10 +3286,9 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     InFlag = Chain.getValue(1);
   }
 
-  if (isTailCall) {
+  if (isTailCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
                     FPOp, true, TailCallArguments);
-  }
 
   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
@@ -3362,14 +3349,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   SDValue SaveSP = Op.getOperand(1);
 
   // Load the old link SP.
-  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0,
+  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+                                   MachinePointerInfo(),
                                    false, false, 0);
 
   // Restore the stack pointer.
   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
 
   // Store the old link SP.
-  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0,
+  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
                       false, false, 0);
 }
 
@@ -3390,7 +3378,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!RASI) {
     // Find out what the fix offset of the frame pointer save area.
-    int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
+    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
     // Allocate the frame index for frame pointer save area.
     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
     // Save the result.
@@ -3414,7 +3402,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
+    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
                                                            isDarwinABI);
 
     // Allocate the frame index for frame pointer save area.
@@ -3533,7 +3521,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
-                                                         PPCISD::FCTIDZ, 
+                                                         PPCISD::FCTIDZ,
                       dl, MVT::f64, Src);
     break;
   case MVT::i64:
@@ -3545,15 +3533,15 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
 
   // Emit a store to the stack slot.
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0,
-                               false, false, 0);
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+                               MachinePointerInfo(), false, false, 0);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias.
   if (Op.getValueType() == MVT::i32)
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0,
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
                      false, false, 0);
 }
 
@@ -3565,8 +3553,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
     return SDValue();
 
   if (Op.getOperand(0).getValueType() == MVT::i64) {
-    SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
-                               MVT::f64, Op.getOperand(0));
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
     SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
     if (Op.getValueType() == MVT::f32)
       FP = DAG.getNode(ISD::FP_ROUND, dl,
@@ -3591,14 +3578,15 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
 
   // STD the extended value into the stack slot.
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, 0, 8, 8);
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOStore, 8, 8);
   SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
   SDValue Store =
     DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
                             Ops, 4, MVT::i64, MMO);
   // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0);
+  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+                           false, false, 0);
 
   // FCFID it and return it.
   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3637,19 +3625,19 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 
   // Save FP Control Word to register
   NodeTys.push_back(MVT::f64);    // return register
-  NodeTys.push_back(MVT::Flag);   // unused in this context
+  NodeTys.push_back(MVT::Glue);   // unused in this context
   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
 
   // Save FP register to stack slot
   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
-                               StackSlot, NULL, 0, false, false, 0);
+                               StackSlot, MachinePointerInfo(), false, false,0);
 
   // Load FP Control Word from low 32 bits of stack slot.
   SDValue Four = DAG.getConstant(4, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
-  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0,
+  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
                             false, false, 0);
 
   // Transform as necessary
@@ -3786,7 +3774,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
                               &Ops[0], Ops.size());
-  return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
+  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
 }
 
 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
@@ -3815,14 +3803,14 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
                              EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   // Force LHS/RHS to be the right type.
-  LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
-  RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
+  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
 
   int Ops[16];
   for (unsigned i = 0; i != 16; ++i)
     Ops[i] = i + Amt;
   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+  return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
 // If this is a case we can't handle, return null and let the default
@@ -3856,7 +3844,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
       SDValue Z = DAG.getConstant(0, MVT::i32);
       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
-      Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
+      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
     }
     return Op;
   }
@@ -3875,7 +3863,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
     SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
     Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   }
 
   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
@@ -3891,7 +3879,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
     // xor by OnesV to invert it.
     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   }
 
   // Check to see if this is a wide variety of vsplti*, binop self cases.
@@ -3917,7 +3905,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
         Intrinsic::ppc_altivec_vslw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // vsplti + srl self.
@@ -3928,7 +3916,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
         Intrinsic::ppc_altivec_vsrw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // vsplti + sra self.
@@ -3939,7 +3927,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
         Intrinsic::ppc_altivec_vsraw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // vsplti + rol self.
@@ -3951,7 +3939,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
         Intrinsic::ppc_altivec_vrlw
       };
       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
     }
 
     // t = vsplti c, result = vsldoi t, t, 1
@@ -3978,14 +3966,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
     SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
     LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
   }
   // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
   if (SextVal >= -31 && SextVal <= 0) {
     SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
     SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
     LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
   }
 
   return SDValue();
@@ -4062,10 +4050,10 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
   }
   EVT VT = OpLHS.getValueType();
-  OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
-  OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
+  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+  return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
@@ -4118,7 +4106,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   // perfect shuffle table to emit an optimal matching sequence.
   SmallVector<int, 16> PermMask;
   SVOp->getMask(PermMask);
-  
+
   unsigned PFIndexes[4];
   bool isFourElementShuffle = true;
   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
@@ -4253,7 +4241,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
                               Op.getOperand(1), Op.getOperand(2),
                               DAG.getConstant(CompareOpc, MVT::i32));
-    return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
   }
 
   // Create the PPCISD altivec 'dot' comparison node.
@@ -4264,7 +4252,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   };
   std::vector<EVT> VTs;
   VTs.push_back(Op.getOperand(2).getValueType());
-  VTs.push_back(MVT::Flag);
+  VTs.push_back(MVT::Glue);
   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
 
   // Now that we have the comparison, emit a copy from the CR to a GPR.
@@ -4317,10 +4305,10 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
 
   // Store the input value into Value#0 of the stack slot.
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
-                               Op.getOperand(0), FIdx, NULL, 0,
+                               Op.getOperand(0), FIdx, MachinePointerInfo(),
                                false, false, 0);
   // Load it out.
-  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0,
+  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
                      false, false, 0);
 }
 
@@ -4336,9 +4324,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
 
     // Shrinkify inputs to v8i16.
-    LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
-    RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
-    RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
+    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
 
     // Low parts multiplied together, generating 32-bit results (we ignore the
     // top parts).
@@ -4364,12 +4352,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
     // Multiply the even 8-bit parts, producing 16-bit sums.
     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
                                            LHS, RHS, DAG, dl, MVT::v8i16);
-    EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
+    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
 
     // Multiply the odd 8-bit parts, producing 16-bit sums.
     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
                                           LHS, RHS, DAG, dl, MVT::v8i16);
-    OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
+    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
 
     // Merge the results together.
     int Ops[16];
@@ -4391,7 +4379,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   llvm_unreachable("TLS not implemented for PPC");
   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
@@ -4456,20 +4444,20 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
 
     NodeTys.push_back(MVT::f64);   // Return register
-    NodeTys.push_back(MVT::Flag);    // Returns a flag for later insns
+    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
     Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
     MFFSreg = Result.getValue(0);
     InFlag = Result.getValue(1);
 
     NodeTys.clear();
-    NodeTys.push_back(MVT::Flag);   // Returns a flag
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
     Ops[0] = DAG.getConstant(31, MVT::i32);
     Ops[1] = InFlag;
     Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
     InFlag = Result.getValue(0);
 
     NodeTys.clear();
-    NodeTys.push_back(MVT::Flag);   // Returns a flag
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
     Ops[0] = DAG.getConstant(30, MVT::i32);
     Ops[1] = InFlag;
     Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
@@ -4477,7 +4465,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
 
     NodeTys.clear();
     NodeTys.push_back(MVT::f64);    // result of add
-    NodeTys.push_back(MVT::Flag);   // Returns a flag
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
     Ops[0] = Lo;
     Ops[1] = Hi;
     Ops[2] = InFlag;
@@ -5283,7 +5271,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         DAG.getConstant(CompareOpc, MVT::i32)
       };
       VTs.push_back(LHS.getOperand(2).getValueType());
-      VTs.push_back(MVT::Flag);
+      VTs.push_back(MVT::Glue);
       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
 
       // Unpack the result based on how the target uses it.
@@ -5377,6 +5365,47 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'b':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  case 'd':
+    if (type->isDoubleTy())
+      weight = CW_Register;
+    break;
+  case 'v':
+    if (type->isVectorTy())
+      weight = CW_Register;
+    break;
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
 std::pair<unsigned, const TargetRegisterClass*>
 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
@@ -5536,19 +5565,19 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-    
-      DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI),
+
+      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
                       isPPC64? MVT::i64 : MVT::i32);
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0, false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -5571,7 +5600,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                          PtrVT);
   while (Depth--)
     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
-                            FrameAddr, NULL, 0, false, false, 0);
+                            FrameAddr, MachinePointerInfo(), false, false, 0);
   return FrameAddr;
 }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 700816f5a129..80cab75b960a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -308,6 +308,12 @@ namespace llvm {
                                             bool is8bit, unsigned Opcode) const;
     
     ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
     std::pair<unsigned, const TargetRegisterClass*> 
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
@@ -383,7 +389,6 @@ namespace llvm {
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index a0781b987056..6636b6927191 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -23,9 +23,11 @@ def u16imm64 : Operand<i64> {
 }
 def symbolHi64 : Operand<i64> {
   let PrintMethod = "printSymbolHi";
+  let EncoderMethod = "getHA16Encoding";
 }
 def symbolLo64 : Operand<i64> {
   let PrintMethod = "printSymbolLo";
+  let EncoderMethod = "getLO16Encoding";
 }
 
 //===----------------------------------------------------------------------===//
@@ -58,7 +60,7 @@ def HI48_64 : SDNodeXForm<imm, [{
 //
 
 let Defs = [LR8] in
-  def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+  def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "", []>,
                     PPC970_Unit_BRU;
 
 // Darwin ABI Calls.
@@ -130,39 +132,31 @@ def : Pat<(PPCnop),
 let usesCustomInserter = 1 in {
   let Uses = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
-      "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_SUB_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
-      "${:comment} ATOMIC_LOAD_SUB_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_OR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
-      "${:comment} ATOMIC_LOAD_OR_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_XOR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
-      "${:comment} ATOMIC_LOAD_XOR_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_AND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
-      "${:comment} ATOMIC_LOAD_AND_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
     def ATOMIC_LOAD_NAND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
-      "${:comment} ATOMIC_LOAD_NAND_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
       [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new),
-      "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
       [(set G8RC:$dst, 
                     (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
 
     def ATOMIC_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new),
-      "${:comment} ATOMIC_SWAP_I64 PSEUDO!",
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
       [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
   }
 }
@@ -240,8 +234,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
 }
 
 let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),
-                       "${:comment} DYNALLOC8 $result, $negsize, $fpsi",
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
                        [(set G8RC:$result,
                              (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
 
@@ -500,7 +493,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
 
 // Update forms.
 let mayLoad = 1 in
-def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
                             ptr_rc:$rA),
                     "lhau $rD, $disp($rA)", LdStGeneral,
                     []>, RegConstraint<"$rA = $ea_result">,
@@ -555,18 +548,20 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
                     [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
-def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
-                    "ld $rD, $disp($reg)", LdStLD,
-                    [(set G8RC:$rD,
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+                  "",
+                  [(set G8RC:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
-let RST = 2, DS = 8 in
+                     
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
 def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
                     "ld 2, 8($reg)", LdStLD,
                     [(PPCload_toc G8RC:$reg)]>, isPPC64;
-let RST = 2, DS = 40, RA = 1 in
+                    
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
 def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
                     "ld 2, 40(1)", LdStLD,
-                    []>, isPPC64;
+                    [(PPCtoc_restore)]>, isPPC64;
 def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
                    [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
@@ -579,8 +574,6 @@ def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr
 
 }
 
-def : Pat<(PPCtoc_restore),
-          (LDtoc_restore)>;
 def : Pat<(PPCload ixaddr:$src),
           (LD ixaddr:$src)>;
 def : Pat<(PPCload xaddr:$src),
@@ -621,14 +614,14 @@ def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
 
 let PPC970_Unit = 2 in {
 
-def STBU8 : DForm_1<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res,
                           (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, 
                                          iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res,
@@ -636,8 +629,8 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                                         iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
 
-def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                                s16immX4:$ptroff, ptr_rc:$ptrreg),
+def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                                        s16immX4:$ptroff, ptr_rc:$ptrreg),
                     "stdu $rS, $ptroff($ptrreg)", LdStSTD,
                     [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, 
                                                      iaddroff:$ptroff))]>,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 4357bdccec7b..84a15b1ca942 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -102,6 +102,19 @@ class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
   bits<5>  A;
+  bits<21> Addr;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = Addr{20-16}; // Base Reg
+  let Inst{16-31} = Addr{15-0};  // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
   bits<16> C;
   bits<5>  B;
 
@@ -112,6 +125,7 @@ class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
   let Inst{16-31} = C;
 }
 
+
 class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
   : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
@@ -147,8 +161,7 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                    InstrItinClass itin, list<dag> pattern>
   : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
   let A = 0;
-  let B = 0;
-  let C = 0;
+  let Addr = 0;
 }
 
 class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -188,17 +201,31 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
          : I<opcode, OOL, IOL, asmstr, itin> {
   bits<5>  RST;
-  bits<14> DS;
-  bits<5>  RA;
+  bits<19> DS_RA;
 
   let Pattern = pattern;
   
   let Inst{6-10}  = RST;
-  let Inst{11-15} = RA;
-  let Inst{16-29} = DS;
+  let Inst{11-15} = DS_RA{18-14};  // Register #
+  let Inst{16-29} = DS_RA{13-0};   // Displacement.
   let Inst{30-31} = xo;
 }
 
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+   bits<5>  RST;
+   bits<14> DS;
+   bits<5>  RA;
+ 
+   let Pattern = pattern;
+   
+   let Inst{6-10}  = RST;
+   let Inst{11-15} = RA;
+   let Inst{16-29} = DS;
+   let Inst{30-31} = xo;
+}
+
 // 1.7.6 X-Form
 class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
                       InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index c17108fa9230..53b049135e24 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -17,6 +17,7 @@
 #include "PPCPredicates.h"
 #include "PPCGenInstrInfo.inc"
 #include "PPCTargetMachine.h"
+#include "PPCHazardRecognizers.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -39,7 +40,19 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
     RI(*TM.getSubtargetImpl(), *this) {}
 
-unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+  const TargetMachine *TM,
+  const ScheduleDAG *DAG) const {
+  // Should use subtarget info to pick the right hazard recognizer.  For
+  // now, always return a PPC970 recognizer.
+  const TargetInstrInfo *TII = TM->getInstrInfo();
+  assert(TII && "No InstrInfo?");
+  return new PPCHazardRecognizer970(*TII);
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default: break;
@@ -57,7 +70,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                           int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default: break;
@@ -84,11 +97,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   // Normal instructions can be commuted the obvious way.
   if (MI->getOpcode() != PPC::RLWIMI)
     return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
-  
+
   // Cannot commute if it has a non-zero rotate count.
   if (MI->getOperand(3).getImm() != 0)
     return 0;
-  
+
   // If we have a zero rotate count, we have:
   //   M = mask(MB,ME)
   //   Op0 = (Op1 & ~M) | (Op2 & M)
@@ -135,14 +148,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   MI->getOperand(1).setReg(Reg2);
   MI->getOperand(2).setIsKill(Reg1IsKill);
   MI->getOperand(1).setIsKill(Reg2IsKill);
-  
+
   // Swap the mask around.
   MI->getOperand(4).setImm((ME+1) & 31);
   MI->getOperand(5).setImm((MB-1) & 31);
   return MI;
 }
 
-void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, 
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI) const {
   DebugLoc DL;
   BuildMI(MBB, MI, DL, get(PPC::NOP));
@@ -169,7 +182,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
 
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
-  
+
   // If there is only one terminator instruction, process it.
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
     if (LastInst->getOpcode() == PPC::B) {
@@ -189,7 +202,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     // Otherwise, don't know what this is.
     return true;
   }
-  
+
   // Get the instruction before it if it's a terminator.
   MachineInstr *SecondLastInst = I;
 
@@ -197,9 +210,9 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
   if (SecondLastInst && I != MBB.begin() &&
       isUnpredicatedTerminator(--I))
     return true;
-  
+
   // If the block ends with PPC::B and PPC:BCC, handle it.
-  if (SecondLastInst->getOpcode() == PPC::BCC && 
+  if (SecondLastInst->getOpcode() == PPC::BCC &&
       LastInst->getOpcode() == PPC::B) {
     if (!SecondLastInst->getOperand(2).isMBB() ||
         !LastInst->getOperand(0).isMBB())
@@ -210,10 +223,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     FBB = LastInst->getOperand(0).getMBB();
     return false;
   }
-  
+
   // If the block ends with two PPC:Bs, handle it.  The second one is not
   // executed, so remove it.
-  if (SecondLastInst->getOpcode() == PPC::B && 
+  if (SecondLastInst->getOpcode() == PPC::B &&
       LastInst->getOpcode() == PPC::B) {
     if (!SecondLastInst->getOperand(0).isMBB())
       return true;
@@ -239,17 +252,17 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   }
   if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
     return 0;
-  
+
   // Remove the branch.
   I->eraseFromParent();
-  
+
   I = MBB.end();
 
   if (I == MBB.begin()) return 1;
   --I;
   if (I->getOpcode() != PPC::BCC)
     return 1;
-  
+
   // Remove the branch.
   I->eraseFromParent();
   return 2;
@@ -262,9 +275,9 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 2 || Cond.size() == 0) && 
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
          "PPC branch conditions have two components!");
-  
+
   // One-way branch.
   if (FBB == 0) {
     if (Cond.empty())   // Unconditional branch
@@ -274,7 +287,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
         .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
     return 1;
   }
-  
+
   // Two-way Conditional Branch.
   BuildMI(&MBB, DL, get(PPC::BCC))
     .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
@@ -377,11 +390,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
 
       // We need to store the CR in the low 4-bits of the saved value.  First,
       // issue a MFCR to save all of the CRBits.
-      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? 
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
                                                            PPC::R2 : PPC::R0;
       NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
                                .addReg(SrcReg, getKillRegState(isKill)));
-    
+
       // If the saved register wasn't CR0, shift the bits left so that they are
       // in CR0's slot.
       if (SrcReg != PPC::CR0) {
@@ -391,7 +404,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                        .addReg(ScratchReg).addImm(ShiftBits)
                        .addImm(0).addImm(31));
       }
-    
+
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
                                          .addReg(ScratchReg,
                                                  getKillRegState(isKill)),
@@ -428,14 +441,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
              SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
       Reg = PPC::CR7;
 
-    return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, 
+    return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
                                PPC::CRRCRegisterClass, NewMIs);
 
   } else if (RC == PPC::VRRCRegisterClass) {
     // We don't have indexed addressing for vector loads.  Emit:
     // R0 = ADDI FI#
     // STVX VAL, 0, R0
-    // 
+    //
     // FIXME: We use R0 here, because it isn't available for RA.
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
                                        FrameIdx, 0, 0));
@@ -469,8 +482,9 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, /*Offset=*/0,
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);
@@ -513,9 +527,9 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     // at the moment.
     unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
                                                           PPC::R2 : PPC::R0;
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), 
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
                                        ScratchReg), FrameIdx));
-    
+
     // If the reloaded register isn't CR0, shift the bits right so that they are
     // in the right CR's slot.
     if (DestReg != PPC::CR0) {
@@ -525,11 +539,11 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                     .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
                     .addImm(31));
     }
-    
+
     NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
                      .addReg(ScratchReg));
   } else if (RC == PPC::CRBITRCRegisterClass) {
-   
+
     unsigned Reg = 0;
     if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
         DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
@@ -556,14 +570,14 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
              DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
       Reg = PPC::CR7;
 
-    return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, 
+    return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
                                 PPC::CRRCRegisterClass, NewMIs);
 
   } else if (RC == PPC::VRRCRegisterClass) {
     // We don't have indexed addressing for vector loads.  Emit:
     // R0 = ADDI FI#
     // Dest = LVX 0, R0
-    // 
+    //
     // FIXME: We use R0 here, because it isn't available for RA.
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
                                        FrameIdx, 0, 0));
@@ -590,8 +604,9 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOLoad, /*Offset=*/0,
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index fc7b7b3cb897..b5249ae03769 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -32,7 +32,7 @@ enum {
   /// PPC970_First - This instruction starts a new dispatch group, so it will
   /// always be the first one in the group.
   PPC970_First = 0x1,
-  
+
   /// PPC970_Single - This instruction starts a new dispatch group and
   /// terminates it, so it will be the sole instruction in the group.
   PPC970_Single = 0x2,
@@ -40,7 +40,7 @@ enum {
   /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
   /// two dispatch pipes to be available to issue.
   PPC970_Cracked = 0x4,
-  
+
   /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
   /// an instruction is issued to.
   PPC970_Shift = 3,
@@ -58,9 +58,9 @@ enum PPC970_Unit {
   PPC970_VPERM  = 6 << PPC970_Shift,   // Vector Permute Unit
   PPC970_BRU    = 7 << PPC970_Shift    // Branch Unit
 };
-}
-  
-  
+} // end namespace PPCII
+
+
 class PPCInstrInfo : public TargetInstrInfoImpl {
   PPCTargetMachine &TM;
   const PPCRegisterInfo RI;
@@ -69,7 +69,7 @@ class PPCInstrInfo : public TargetInstrInfoImpl {
                            unsigned SrcReg, bool isKill, int FrameIdx,
                            const TargetRegisterClass *RC,
                            SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, 
+  void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                             unsigned DestReg, int FrameIdx,
                             const TargetRegisterClass *RC,
                             SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -82,6 +82,10 @@ public:
   ///
   virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
 
+  ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const;
+
   unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                int &FrameIndex) const;
   unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -90,8 +94,8 @@ public:
   // commuteInstruction - We can commute rlwimi instructions, but only if the
   // rotate amt is zero.  We also have to munge the immediates a bit.
   virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
-  
-  virtual void insertNoop(MachineBasicBlock &MBB, 
+
+  virtual void insertNoop(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI) const;
 
 
@@ -109,7 +113,7 @@ public:
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
-  
+
   virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    unsigned SrcReg, bool isKill, int FrameIndex,
@@ -121,7 +125,7 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
-  
+
   virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
                                                  int FrameIx,
                                                  uint64_t Offset,
@@ -130,7 +134,7 @@ public:
 
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-  
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index eb100ec75280..82aadeb47ad1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -68,17 +68,17 @@ def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
 // This sequence is used for long double->int conversions.  It changes the
 // bits in the FPSCR which is not modelled.  
 def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
-                        [SDNPOutFlag]>;
+                        [SDNPOutGlue]>;
 def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInFlag, SDNPOutFlag]>;
+                       [SDNPInGlue, SDNPOutGlue]>;
 def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInFlag, SDNPOutFlag]>;
+                       [SDNPInGlue, SDNPOutGlue]>;
 def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
-                       [SDNPInFlag, SDNPOutFlag]>;
+                       [SDNPInGlue, SDNPOutGlue]>;
 def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
                        [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
                         SDTCisVT<3, f64>]>,
-                       [SDNPInFlag]>;
+                       [SDNPInGlue]>;
 
 def PPCfsel   : SDNode<"PPCISD::FSEL",  
    // Type constraint for fsel.
@@ -105,45 +105,45 @@ def PPCstd_32     : SDNode<"PPCISD::STD_32"    , SDTStore,
 
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
 def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                              SDNPVariadic]>;
 def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
-                       [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
-                          [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
 def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
-                            [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+                            [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
-                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                SDNPVariadic]>;
 
 def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                              SDNPVariadic]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
-                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
-def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
+def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 
 def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue]>;
 
 def PPClbrx       : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
                            [SDNPHasChain, SDNPMayLoad]>;
@@ -286,31 +286,38 @@ def u16imm  : Operand<i32> {
 def s16immX4  : Operand<i32> {   // Multiply imm by 4 before printing.
   let PrintMethod = "printS16X4ImmOperand";
 }
-def target : Operand<OtherVT> {
+def directbrtarget : Operand<OtherVT> {
   let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getCondBrEncoding";
 }
 def calltarget : Operand<iPTR> {
-  let PrintMethod = "printCallOperand";
+  let EncoderMethod = "getDirectBrEncoding";
 }
 def aaddr : Operand<iPTR> {
   let PrintMethod = "printAbsAddrOperand";
 }
-def piclabel: Operand<iPTR> {
-  let PrintMethod = "printPICLabel";
-}
+def piclabel: Operand<iPTR> {}
 def symbolHi: Operand<i32> {
   let PrintMethod = "printSymbolHi";
+  let EncoderMethod = "getHA16Encoding";
 }
 def symbolLo: Operand<i32> {
   let PrintMethod = "printSymbolLo";
+  let EncoderMethod = "getLO16Encoding";
 }
 def crbitm: Operand<i8> {
   let PrintMethod = "printcrbitm";
+  let EncoderMethod = "get_crbitm_encoding";
 }
 // Address operands
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
   let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let EncoderMethod = "getMemRIEncoding";
 }
 def memrr : Operand<iPTR> {
   let PrintMethod = "printMemRegReg";
@@ -319,9 +326,9 @@ def memrr : Operand<iPTR> {
 def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
   let PrintMethod = "printMemRegImmShifted";
   let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let EncoderMethod = "getMemRIXEncoding";
 }
 def tocentry : Operand<iPTR> {
-  let PrintMethod = "printTOCEntryLabel";
   let MIOperandInfo = (ops i32imm:$imm);
 }
 
@@ -355,11 +362,9 @@ def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 
 let hasCtrlDep = 1 in {
 let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt),
-                              "${:comment} ADJCALLSTACKDOWN",
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
                               [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
-                              "${:comment} ADJCALLSTACKUP",
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
                               [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
@@ -368,8 +373,7 @@ def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
 }
 
 let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
-                       "${:comment} DYNALLOC $result, $negsize, $fpsi",
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
                        [(set GPRC:$result,
                              (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
                          
@@ -378,26 +382,26 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
 let usesCustomInserter = 1,    // Expanded after instruction selection.
     PPC970_Single = 1 in {
   def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
-                              i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+                              i32imm:$BROPC), "",
                               []>;
   def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
-                              i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+                              i32imm:$BROPC), "",
                               []>;
   def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
-                              i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+                              i32imm:$BROPC), "",
                               []>;
   def SELECT_CC_F8  : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
-                              i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+                              i32imm:$BROPC), "",
                               []>;
   def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
-                              i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+                              i32imm:$BROPC), "",
                               []>;
 }
 
 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
 // scavenge a register for it.
 def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
-                     "${:comment} SPILL_CR $cond $F", []>;
+                     "", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
   let isReturn = 1, Uses = [LR, RM] in
@@ -409,12 +413,12 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
 }
 
 let Defs = [LR] in
-  def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+  def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "", []>,
                    PPC970_Unit_BRU;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   let isBarrier = 1 in {
-  def B   : IForm<18, 0, 0, (outs), (ins target:$dst),
+  def B   : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
                   "b $dst", BrB,
                   [(br bb:$dst)]>;
   }
@@ -422,7 +426,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   // BCC represents an arbitrary conditional branch on a predicate.
   // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
   // a two-value operand where a dag node expects two operands. :( 
-  def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, target:$dst),
+  def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
                   "b${cond:cc} ${cond:reg}, $dst"
                   /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
 }
@@ -548,105 +552,81 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
 let usesCustomInserter = 1 in {
   let Uses = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
-      "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
       [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
-      "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
       [(set GPRC:$dst, 
                     (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
-      "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
       [(set GPRC:$dst, 
                     (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
-      "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
       [(set GPRC:$dst, 
                     (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
 
     def ATOMIC_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
-      "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
       [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
     def ATOMIC_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
-      "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
       [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
     def ATOMIC_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
-      "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
       [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
   }
 }
@@ -785,33 +765,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
 
 // Unindexed (r+i) Stores with Update (preinc).
 let PPC970_Unit = 2 in {
-def STBU  : DForm_1<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+def STBU  : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res,
                           (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, 
                                          iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU  : DForm_1<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+def STHU  : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res,
                         (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, 
                                         iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU  : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+def STWU  : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, 
                                                      iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res, (pre_store F4RC:$rS,  ptr_rc:$ptrreg, 
                                           iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
                     "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
                     [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, 
@@ -1120,9 +1100,16 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
 // As it turns out, in all cases where we currently use this,
 // we're only interested in one subregister of it.  Represent this in the
 // instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
 def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
-                       "mfcr $rT ${:comment} $FXM", SprMFCR>,
+                       "", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
+            
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
+
 def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "mfcr $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index daf4ec6d012a..78383e0603bd 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -16,7 +16,7 @@
 #include "PPCRelocations.h"
 #include "PPCTargetMachine.h"
 #include "llvm/Function.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
index 3644c79d0410..d1178dd7e1ff 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -17,10 +17,11 @@ using namespace llvm;
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   PCSymbol = ".";
   CommentString = ";";
-  ExceptionsType = ExceptionHandling::Dwarf;
+  ExceptionsType = ExceptionHandling::DwarfTable;
 
   if (!is64Bit)
     Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
+
   AssemblerDialect = 1;           // New-Style mnemonics.
   SupportsDebugInformation= true; // Debug information.
 }
@@ -47,7 +48,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
 
   // Exceptions handling
   if (!is64Bit)
-    ExceptionsType = ExceptionHandling::Dwarf;
+    ExceptionsType = ExceptionHandling::DwarfTable;
     
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
diff --git a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
new file mode 100644
index 000000000000..65c2c82c51a7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -0,0 +1,195 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+  PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const PPCMCCodeEmitter &);   // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  MCContext &Ctx;
+  
+public:
+  PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+    : TM(tm), Ctx(ctx) {
+  }
+  
+  ~PPCMCCodeEmitter() {}
+
+  unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+    
+    // Output the constant in big endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      OS << (char)(Bits >> 24);
+      Bits <<= 8;
+    }
+    
+    ++MCNumEmitted;  // Keep track of the # of mi's emitted.
+  }
+  
+};
+  
+} // end anonymous namespace
+  
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+                                            MCContext &Ctx) {
+  return new PPCMCCodeEmitter(TM, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_br24));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_brcond14));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_ha16));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups) const {
+  // Encode (imm, reg) as a memri, which has the low 16-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+  
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+  
+  // Add a fixup for the displacement field.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+  
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo14));
+  return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+  return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+    // The GPR operand should come through here though.
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+    return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+  }
+  
+  assert(MO.isImm() &&
+         "Relocation required in an instruction that we cannot encode!");
+  return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 000000000000..6082587b2d3d
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,172 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+  return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+  MCContext &Ctx = AP.OutContext;
+
+  SmallString<128> Name;
+  if (!MO.isGlobal()) {
+    assert(MO.isSymbol() && "Isn't a symbol reference");
+    Name += AP.MAI->getGlobalPrefix();
+    Name += MO.getSymbolName();
+  } else {    
+    const GlobalValue *GV = MO.getGlobal();
+    bool isImplicitlyPrivate = false;
+    if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+        (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+      isImplicitlyPrivate = true;
+    
+    AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+  }
+  
+  // If the target flags on the operand changes the name of the symbol, do that
+  // before we return the symbol.
+  if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI(AP).getFnStubEntry(Sym);
+    if (StubSym.getPointer())
+      return Sym;
+    
+    if (MO.isGlobal()) {
+      StubSym =
+      MachineModuleInfoImpl::
+      StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+                  !MO.getGlobal()->hasInternalLinkage());
+    } else {
+      Name.erase(Name.end()-5, Name.end());
+      StubSym =
+      MachineModuleInfoImpl::
+      StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+    }
+    return Sym;
+  }
+
+  // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+  // then add the suffix.
+  if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+  
+    MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+    
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? 
+         MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+    
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym = MachineModuleInfoImpl::
+                   StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+                               !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              AsmPrinter &Printer) {
+  MCContext &Ctx = Printer.OutContext;
+  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+  if (MO.getTargetFlags() & PPCII::MO_LO16)
+    RefKind = MCSymbolRefExpr::VK_PPC_LO16;
+  else if (MO.getTargetFlags() & PPCII::MO_HA16)
+    RefKind = MCSymbolRefExpr::VK_PPC_HA16;
+
+  // FIXME: This isn't right, but we don't have a good way to express this in
+  // the MC Level, see below.
+  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+    RefKind = MCSymbolRefExpr::VK_None;
+  
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+
+  // Subtract off the PIC base if required.
+  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+    const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+    
+    const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+    Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+    // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+    // since it is not a symbol!
+  }
+  
+  return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                        AsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      assert(!MO.getSubReg() && "Subregs should be eliminated!");
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                      MO.getMBB()->getSymbol(), AP.OutContext));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP);
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 653e143ba407..45d8b6bb238d 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -17,7 +17,7 @@
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCRegisterInfo.h"
-#include "PPCFrameInfo.h"
+#include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
@@ -31,7 +31,7 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -44,16 +44,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
 
-// FIXME This disables some code that aligns the stack to a boundary
-// bigger than the default (16 bytes on Darwin) when there is a stack local
-// of greater alignment.  This does not currently work, because the delta
-// between old and new stack pointers is added to offsets that reference
-// incoming parameters after the prolog is generated, and the code that 
-// does that doesn't handle a variable delta.  You don't want to do that
-// anyway; a better approach is to reserve another register that retains
-// to the incoming stack pointer, and reference parameters relative to that.
-#define ALIGN_STACK 0
-
 // FIXME (64-bit): Eventually enable by default.
 namespace llvm {
 cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
@@ -68,14 +58,11 @@ cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
 
 using namespace llvm;
 
-#define EnableRegisterScavenging \
-  ((EnablePPC32RS && !Subtarget.isPPC64()) || \
-   (EnablePPC64RS && Subtarget.isPPC64()))
-
 // FIXME (64-bit): Should be inlined.
 bool
 PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
-  return EnableRegisterScavenging;
+  return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
+          (EnablePPC64RS && Subtarget.isPPC64()));
 }
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -269,26 +256,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
 }
 
-// needsFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool needsFP(const MachineFunction &MF) {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  // Naked functions have no stack frame pushed, so we don't have a frame pointer.
-  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
-    return false;
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
-    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
-}
-
-static bool spillsCR(const MachineFunction &MF) {
-  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
-  return FuncInfo->isCRSpilled();
-}
-
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
+  const PPCFrameLowering *PPCFI =
+    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
   Reserved.set(PPC::R0);
   Reserved.set(PPC::R1);
   Reserved.set(PPC::LR);
@@ -314,7 +286,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R13);
     Reserved.set(PPC::R31);
 
-    if (!EnableRegisterScavenging)
+    if (!requiresRegisterScavenging(MF))
       Reserved.set(PPC::R0);    // FIXME (64-bit): Remove
 
     Reserved.set(PPC::X0);
@@ -334,7 +306,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
-  if (needsFP(MF))
+  if (PPCFI->needsFP(MF))
     Reserved.set(PPC::R31);
 
   return Reserved;
@@ -344,30 +316,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register.  This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getStackSize() && needsFP(MF);
-}
-
-/// MustSaveLR - Return true if this function requires that we save the LR
-/// register onto the stack in the prolog and restore it in the epilog of the
-/// function.
-static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
-  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
-  
-  // We need a save/restore of LR if there is any def of LR (which is
-  // defined by calls, including the PIC setup sequence), or if there is
-  // some use of the LR stack slot (e.g. for builtin_return_address).
-  // (LR comes in 32 and 64 bit versions.)
-  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
-  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
-}
-
-
-
 void PPCRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
@@ -447,7 +395,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
   unsigned FrameSize = MFI->getStackSize();
   
   // Get stack alignments.
-  unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
   unsigned MaxAlign = MFI->getMaxAlignment();
   if (MaxAlign > TargetAlign)
     report_fatal_error("Dynamic alloca with large aligns not supported");
@@ -464,7 +412,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
 
   // FIXME (64-bit): Use "findScratchRegister"
   unsigned Reg;
-  if (EnableRegisterScavenging)
+  if (requiresRegisterScavenging(MF))
     Reg = findScratchRegister(II, RS, RC, SPAdj);
   else
     Reg = PPC::R0;
@@ -474,7 +422,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
       .addReg(PPC::R31)
       .addImm(FrameSize);
   } else if (LP64) {
-    if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
       BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
         .addImm(0)
         .addReg(PPC::X1);
@@ -491,7 +439,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
   // Grow the stack and update the stack pointer link, then determine the
   // address of new allocated space.
   if (LP64) {
-    if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
       BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
         .addReg(Reg, RegState::Kill)
         .addReg(PPC::X1)
@@ -593,6 +541,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MBB.getParent();
   // Get the frame info.
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
 
   // Find out which operand is the frame index.
@@ -625,14 +574,15 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 
   // Special case for pseudo-op SPILL_CR.
-  if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
+  if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
     if (OpC == PPC::SPILL_CR) {
       lowerCRSpilling(II, FrameIndex, SPAdj, RS);
       return;
     }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
-  MI.getOperand(FIOperandNo).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1,
+  MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+                                              PPC::R31 : PPC::R1,
                                               false);
 
   // Figure out if the offset in the instruction is shifted right two bits. This
@@ -682,7 +632,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // FIXME (64-bit): Use "findScratchRegister".
   unsigned SReg;
-  if (EnableRegisterScavenging)
+  if (requiresRegisterScavenging(MF))
     SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
   else
     SReg = PPC::R0;
@@ -715,898 +665,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
 }
 
-/// VRRegNo - Map from a numbered VR register to its enum value.
-///
-static const unsigned short VRRegNo[] = {
- PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
- PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-/// RemoveVRSaveCode - We have found that this function does not need any code
-/// to manipulate the VRSAVE register, even though it uses vector registers.
-/// This can happen when the only registers used are known to be live in or out
-/// of the function.  Remove all of the VRSAVE related code from the function.
-static void RemoveVRSaveCode(MachineInstr *MI) {
-  MachineBasicBlock *Entry = MI->getParent();
-  MachineFunction *MF = Entry->getParent();
-
-  // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
-  MachineBasicBlock::iterator MBBI = MI;
-  ++MBBI;
-  assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
-  MBBI->eraseFromParent();
-  
-  bool RemovedAllMTVRSAVEs = true;
-  // See if we can find and remove the MTVRSAVE instruction from all of the
-  // epilog blocks.
-  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
-    // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().getDesc().isReturn()) {
-      bool FoundIt = false;
-      for (MBBI = I->end(); MBBI != I->begin(); ) {
-        --MBBI;
-        if (MBBI->getOpcode() == PPC::MTVRSAVE) {
-          MBBI->eraseFromParent();  // remove it.
-          FoundIt = true;
-          break;
-        }
-      }
-      RemovedAllMTVRSAVEs &= FoundIt;
-    }
-  }
-
-  // If we found and removed all MTVRSAVE instructions, remove the read of
-  // VRSAVE as well.
-  if (RemovedAllMTVRSAVEs) {
-    MBBI = MI;
-    assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
-    --MBBI;
-    assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
-    MBBI->eraseFromParent();
-  }
-  
-  // Finally, nuke the UPDATE_VRSAVE.
-  MI->eraseFromParent();
-}
-
-// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
-// instruction selector.  Based on the vector registers that have been used,
-// transform this into the appropriate ORI instruction.
-static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
-  MachineFunction *MF = MI->getParent()->getParent();
-  DebugLoc dl = MI->getDebugLoc();
-
-  unsigned UsedRegMask = 0;
-  for (unsigned i = 0; i != 32; ++i)
-    if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
-      UsedRegMask |= 1 << (31-i);
-  
-  // Live in and live out values already must be in the mask, so don't bother
-  // marking them.
-  for (MachineRegisterInfo::livein_iterator
-       I = MF->getRegInfo().livein_begin(),
-       E = MF->getRegInfo().livein_end(); I != E; ++I) {
-    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
-    if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
-      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
-  }
-  for (MachineRegisterInfo::liveout_iterator
-       I = MF->getRegInfo().liveout_begin(),
-       E = MF->getRegInfo().liveout_end(); I != E; ++I) {
-    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
-    if (VRRegNo[RegNo] == *I)              // If this really is a vector reg.
-      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
-  }
-  
-  // If no registers are used, turn this into a copy.
-  if (UsedRegMask == 0) {
-    // Remove all VRSAVE code.
-    RemoveVRSaveCode(MI);
-    return;
-  }
-
-  unsigned SrcReg = MI->getOperand(1).getReg();
-  unsigned DstReg = MI->getOperand(0).getReg();
-
-  if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
-    if (DstReg != SrcReg)
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
-        .addReg(SrcReg)
-        .addImm(UsedRegMask);
-    else
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
-        .addReg(SrcReg, RegState::Kill)
-        .addImm(UsedRegMask);
-  } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
-    if (DstReg != SrcReg)
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg)
-        .addImm(UsedRegMask >> 16);
-    else
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg, RegState::Kill)
-        .addImm(UsedRegMask >> 16);
-  } else {
-    if (DstReg != SrcReg)
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg)
-        .addImm(UsedRegMask >> 16);
-    else
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg, RegState::Kill)
-        .addImm(UsedRegMask >> 16);
-
-    BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
-      .addReg(DstReg, RegState::Kill)
-      .addImm(UsedRegMask & 0xFFFF);
-  }
-  
-  // Remove the old UPDATE_VRSAVE instruction.
-  MI->eraseFromParent();
-}
-
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // Get the number of bytes to allocate from the FrameInfo
-  unsigned FrameSize = MFI->getStackSize();
-  
-  // Get the alignments provided by the target, and the maximum alignment
-  // (if any) of the fixed frame objects.
-  unsigned MaxAlign = MFI->getMaxAlignment();
-  unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-  unsigned AlignMask = TargetAlign - 1;  //
-
-  // If we are a leaf function, and use up to 224 bytes of stack space,
-  // don't have a frame pointer, calls, or dynamic alloca then we do not need
-  // to adjust the stack pointer (we fit in the Red Zone).
-  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
-  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
-  if (!DisableRedZone &&
-      FrameSize <= 224 &&                          // Fits in red zone.
-      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->adjustsStack() &&                      // No calls.
-      (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
-    // No need for frame
-    MFI->setStackSize(0);
-    return;
-  }
-  
-  // Get the maximum call frame size of all the calls.
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-  
-  // Maximum call frame needs to be at least big enough for linkage and 8 args.
-  unsigned minCallFrameSize =
-    PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), 
-                                      Subtarget.isDarwinABI());
-  maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
-
-  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
-  // that allocations will be aligned.
-  if (MFI->hasVarSizedObjects())
-    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-  
-  // Update maximum call frame size.
-  MFI->setMaxCallFrameSize(maxCallFrameSize);
-  
-  // Include call frame size in total.
-  FrameSize += maxCallFrameSize;
-  
-  // Make sure the frame is aligned.
-  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
-  // Update frame info.
-  MFI->setStackSize(FrameSize);
-}
-
-void
-PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                      RegScavenger *RS) const {
-  //  Save and clear the LR state.
-  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-  unsigned LR = getRARegister();
-  FI->setMustSaveLR(MustSaveLR(MF, LR));
-  MF.getRegInfo().setPhysRegUnused(LR);
-
-  //  Save R31 if necessary
-  int FPSI = FI->getFramePointerSaveIndex();
-  bool isPPC64 = Subtarget.isPPC64();
-  bool isDarwinABI  = Subtarget.isDarwinABI();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
- 
-  // If the frame pointer save index hasn't been defined yet.
-  if (!FPSI && needsFP(MF)) {
-    // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
-                                                           isDarwinABI);
-    // Allocate the frame index for frame pointer save area.
-    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
-    // Save the result.
-    FI->setFramePointerSaveIndex(FPSI);                      
-  }
-
-  // Reserve stack space to move the linkage area to in case of a tail call.
-  int TCSPDelta = 0;
-  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
-    MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
-  }
-  
-  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
-  // a large stack, which will require scavenging a register to materialize a
-  // large offset.
-  // FIXME: this doesn't actually check stack size, so is a bit pessimistic
-  // FIXME: doesn't detect whether or not we need to spill vXX, which requires
-  //        r0 for now.
-
-  if (EnableRegisterScavenging) // FIXME (64-bit): Enable.
-    if (needsFP(MF) || spillsCR(MF)) {
-      const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-      const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-      const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
-      RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                         RC->getAlignment(),
-                                                         false));
-    }
-}
-
-void
-PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
-                                                     const {
-  // Early exit if not using the SVR4 ABI.
-  if (!Subtarget.isSVR4ABI()) {
-    return;
-  }
-
-  // Get callee saved register information.
-  MachineFrameInfo *FFI = MF.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
-
-  // Early exit if no callee saved registers are modified!
-  if (CSI.empty() && !needsFP(MF)) {
-    return;
-  }
-  
-  unsigned MinGPR = PPC::R31;
-  unsigned MinG8R = PPC::X31;
-  unsigned MinFPR = PPC::F31;
-  unsigned MinVR = PPC::V31;
-  
-  bool HasGPSaveArea = false;
-  bool HasG8SaveArea = false;
-  bool HasFPSaveArea = false;
-  bool HasCRSaveArea = false;
-  bool HasVRSAVESaveArea = false;
-  bool HasVRSaveArea = false;
-  
-  SmallVector<CalleeSavedInfo, 18> GPRegs;
-  SmallVector<CalleeSavedInfo, 18> G8Regs;
-  SmallVector<CalleeSavedInfo, 18> FPRegs;
-  SmallVector<CalleeSavedInfo, 18> VRegs;
-  
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (PPC::GPRCRegisterClass->contains(Reg)) {
-      HasGPSaveArea = true;
-      
-      GPRegs.push_back(CSI[i]);
-      
-      if (Reg < MinGPR) {
-        MinGPR = Reg;
-      }
-    } else if (PPC::G8RCRegisterClass->contains(Reg)) {
-      HasG8SaveArea = true;
-
-      G8Regs.push_back(CSI[i]);
-
-      if (Reg < MinG8R) {
-        MinG8R = Reg;
-      }
-    } else if (PPC::F8RCRegisterClass->contains(Reg)) {
-      HasFPSaveArea = true;
-      
-      FPRegs.push_back(CSI[i]);
-      
-      if (Reg < MinFPR) {
-        MinFPR = Reg;
-      }
-// FIXME SVR4: Disable CR save area for now.
-    } else if (PPC::CRBITRCRegisterClass->contains(Reg)
-               || PPC::CRRCRegisterClass->contains(Reg)) {
-//      HasCRSaveArea = true;
-    } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
-      HasVRSAVESaveArea = true;
-    } else if (PPC::VRRCRegisterClass->contains(Reg)) {
-      HasVRSaveArea = true;
-      
-      VRegs.push_back(CSI[i]);
-      
-      if (Reg < MinVR) {
-        MinVR = Reg;
-      }
-    } else {
-      llvm_unreachable("Unknown RegisterClass!");
-    }
-  }
-
-  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
-  
-  int64_t LowerBound = 0;
-
-  // Take into account stack space reserved for tail calls.
-  int TCSPDelta = 0;
-  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
-    LowerBound = TCSPDelta;
-  }
-
-  // The Floating-point register save area is right below the back chain word
-  // of the previous stack frame.
-  if (HasFPSaveArea) {
-    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
-      int FI = FPRegs[i].getFrameIdx();
-      
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-    }
-    
-    LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8; 
-  }
-
-  // Check whether the frame pointer register is allocated. If so, make sure it
-  // is spilled to the correct offset.
-  if (needsFP(MF)) {
-    HasGPSaveArea = true;
-    
-    int FI = PFI->getFramePointerSaveIndex();
-    assert(FI && "No Frame Pointer Save Slot!");
-    
-    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-  }
-  
-  // General register save area starts right below the Floating-point
-  // register save area.
-  if (HasGPSaveArea || HasG8SaveArea) {
-    // Move general register save area spill slots down, taking into account
-    // the size of the Floating-point register save area.
-    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
-      int FI = GPRegs[i].getFrameIdx();
-      
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-    }
-    
-    // Move general register save area spill slots down, taking into account
-    // the size of the Floating-point register save area.
-    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
-      int FI = G8Regs[i].getFrameIdx();
-
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-    }
-
-    unsigned MinReg = std::min<unsigned>(getRegisterNumbering(MinGPR),
-                                         getRegisterNumbering(MinG8R));
-
-    if (Subtarget.isPPC64()) {
-      LowerBound -= (31 - MinReg + 1) * 8;
-    } else {
-      LowerBound -= (31 - MinReg + 1) * 4;
-    }
-  }
-  
-  // The CR save area is below the general register save area.
-  if (HasCRSaveArea) {
-    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
-    //             which have the CR/CRBIT register class?
-    // Adjust the frame index of the CR spill slot.
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      unsigned Reg = CSI[i].getReg();
-    
-      if (PPC::CRBITRCRegisterClass->contains(Reg) ||
-          PPC::CRRCRegisterClass->contains(Reg)) {
-        int FI = CSI[i].getFrameIdx();
-
-        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-      }
-    }
-    
-    LowerBound -= 4; // The CR save area is always 4 bytes long.
-  }
-  
-  if (HasVRSAVESaveArea) {
-    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
-    //             which have the VRSAVE register class?
-    // Adjust the frame index of the VRSAVE spill slot.
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      unsigned Reg = CSI[i].getReg();
-    
-      if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
-        int FI = CSI[i].getFrameIdx();
-
-        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-      }
-    }
-    
-    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
-  }
-  
-  if (HasVRSaveArea) {
-    // Insert alignment padding, we need 16-byte alignment.
-    LowerBound = (LowerBound - 15) & ~(15);
-    
-    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
-      int FI = VRegs[i].getFrameIdx();
-      
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
-    }
-  }
-}
-
-void
-PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-  DebugLoc dl;
-  bool needsFrameMoves = MMI.hasDebugInfo() ||
-       !MF.getFunction()->doesNotThrow() ||
-       UnwindTablesMandatory;
-  
-  // Prepare for frame info.
-  MCSymbol *FrameLabel = 0;
-
-  // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
-  // process it.
-  for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
-    if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
-      HandleVRSaveUpdate(MBBI, TII);
-      break;
-    }
-  }
-  
-  // Move MBBI back to the beginning of the function.
-  MBBI = MBB.begin();
-
-  // Work out frame sizes.
-  determineFrameLayout(MF);
-  unsigned FrameSize = MFI->getStackSize();
-  
-  int NegFrameSize = -FrameSize;
-  
-  // Get processor type.
-  bool isPPC64 = Subtarget.isPPC64();
-  // Get operating system
-  bool isDarwinABI = Subtarget.isDarwinABI();
-  // Check if the link register (LR) must be saved.
-  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-  bool MustSaveLR = FI->mustSaveLR();
-  // Do we have a frame pointer for this function?
-  bool HasFP = hasFP(MF) && FrameSize;
-  
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
-
-  int FPOffset = 0;
-  if (HasFP) {
-    if (Subtarget.isSVR4ABI()) {
-      MachineFrameInfo *FFI = MF.getFrameInfo();
-      int FPIndex = FI->getFramePointerSaveIndex();
-      assert(FPIndex && "No Frame Pointer Save Slot!");
-      FPOffset = FFI->getObjectOffset(FPIndex);
-    } else {
-      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
-    }
-  }
-
-  if (isPPC64) {
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
-      
-    if (HasFP)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
-        .addReg(PPC::X31)
-        .addImm(FPOffset/4)
-        .addReg(PPC::X1);
-    
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
-        .addReg(PPC::X0)
-        .addImm(LROffset / 4)
-        .addReg(PPC::X1);
-  } else {
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
-      
-    if (HasFP)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
-        .addReg(PPC::R31)
-        .addImm(FPOffset)
-        .addReg(PPC::R1);
-
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
-        .addReg(PPC::R0)
-        .addImm(LROffset)
-        .addReg(PPC::R1);
-  }
-  
-  // Skip if a leaf routine.
-  if (!FrameSize) return;
-  
-  // Get stack alignments.
-  unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-  unsigned MaxAlign = MFI->getMaxAlignment();
-
-  // Adjust stack pointer: r1 += NegFrameSize.
-  // If there is a preferred stack alignment, align R1 now
-  if (!isPPC64) {
-    // PPC32.
-    if (ALIGN_STACK && MaxAlign > TargetAlign) {
-      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
-             "Invalid alignment!");
-      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
-
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
-        .addReg(PPC::R1)
-        .addImm(0)
-        .addImm(32 - Log2_32(MaxAlign))
-        .addImm(31);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
-        .addReg(PPC::R0, RegState::Kill)
-        .addImm(NegFrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
-        .addReg(PPC::R1)
-        .addReg(PPC::R1)
-        .addReg(PPC::R0);
-    } else if (isInt<16>(NegFrameSize)) {
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
-        .addReg(PPC::R1)
-        .addImm(NegFrameSize)
-        .addReg(PPC::R1);
-    } else {
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
-        .addImm(NegFrameSize >> 16);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
-        .addReg(PPC::R0, RegState::Kill)
-        .addImm(NegFrameSize & 0xFFFF);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
-        .addReg(PPC::R1)
-        .addReg(PPC::R1)
-        .addReg(PPC::R0);
-    }
-  } else {    // PPC64.
-    if (ALIGN_STACK && MaxAlign > TargetAlign) {
-      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
-             "Invalid alignment!");
-      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
-
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
-        .addReg(PPC::X1)
-        .addImm(0)
-        .addImm(64 - Log2_32(MaxAlign));
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
-        .addReg(PPC::X0)
-        .addImm(NegFrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
-        .addReg(PPC::X1)
-        .addReg(PPC::X1)
-        .addReg(PPC::X0);
-    } else if (isInt<16>(NegFrameSize)) {
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
-        .addReg(PPC::X1)
-        .addImm(NegFrameSize / 4)
-        .addReg(PPC::X1);
-    } else {
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
-        .addImm(NegFrameSize >> 16);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
-        .addReg(PPC::X0, RegState::Kill)
-        .addImm(NegFrameSize & 0xFFFF);
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
-        .addReg(PPC::X1)
-        .addReg(PPC::X1)
-        .addReg(PPC::X0);
-    }
-  }
-
-  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-  
-  // Add the "machine moves" for the instructions we generated above, but in
-  // reverse order.
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer becomes valid.
-    FrameLabel = MMI.getContext().CreateTempSymbol();
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
-  
-    // Show update of SP.
-    if (NegFrameSize) {
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
-      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-    } else {
-      MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
-      Moves.push_back(MachineMove(FrameLabel, SP, SP));
-    }
-    
-    if (HasFP) {
-      MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
-      MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
-      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
-    }
-
-    if (MustSaveLR) {
-      MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
-      MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
-      Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
-    }
-  }
-
-  MCSymbol *ReadyLabel = 0;
-
-  // If there is a frame pointer, copy R1 into R31
-  if (HasFP) {
-    if (!isPPC64) {
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
-        .addReg(PPC::R1)
-        .addReg(PPC::R1);
-    } else {
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
-        .addReg(PPC::X1)
-        .addReg(PPC::X1);
-    }
-
-    if (needsFrameMoves) {
-      ReadyLabel = MMI.getContext().CreateTempSymbol();
-
-      // Mark effective beginning of when frame pointer is ready.
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
-
-      MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
-                                    (isPPC64 ? PPC::X1 : PPC::R1));
-      MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
-    }
-  }
-
-  if (needsFrameMoves) {
-    MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
-
-    // Add callee saved registers to move list.
-    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
-      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-      unsigned Reg = CSI[I].getReg();
-      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
-      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-      MachineLocation CSSrc(Reg);
-      Moves.push_back(MachineMove(Label, CSDst, CSSrc));
-    }
-  }
-}
-
-void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                   MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc dl;
-
-  assert( (RetOpcode == PPC::BLR ||
-           RetOpcode == PPC::TCRETURNri ||
-           RetOpcode == PPC::TCRETURNdi ||
-           RetOpcode == PPC::TCRETURNai ||
-           RetOpcode == PPC::TCRETURNri8 ||
-           RetOpcode == PPC::TCRETURNdi8 ||
-           RetOpcode == PPC::TCRETURNai8) &&
-         "Can only insert epilog into returning blocks");
-
-  // Get alignment info so we know how to restore r1
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
-  unsigned MaxAlign = MFI->getMaxAlignment();
-
-  // Get the number of bytes allocated from the FrameInfo.
-  int FrameSize = MFI->getStackSize();
-
-  // Get processor type.
-  bool isPPC64 = Subtarget.isPPC64();
-  // Get operating system
-  bool isDarwinABI = Subtarget.isDarwinABI();
-  // Check if the link register (LR) has been saved.
-  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-  bool MustSaveLR = FI->mustSaveLR();
-  // Do we have a frame pointer for this function?
-  bool HasFP = hasFP(MF) && FrameSize;
-  
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
-
-  int FPOffset = 0;
-  if (HasFP) {
-    if (Subtarget.isSVR4ABI()) {
-      MachineFrameInfo *FFI = MF.getFrameInfo();
-      int FPIndex = FI->getFramePointerSaveIndex();
-      assert(FPIndex && "No Frame Pointer Save Slot!");
-      FPOffset = FFI->getObjectOffset(FPIndex);
-    } else {
-      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
-    }
-  }
-  
-  bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
-    RetOpcode == PPC::TCRETURNdi ||
-    RetOpcode == PPC::TCRETURNai ||
-    RetOpcode == PPC::TCRETURNri8 ||
-    RetOpcode == PPC::TCRETURNdi8 ||
-    RetOpcode == PPC::TCRETURNai8;
-
-  if (UsesTCRet) {
-    int MaxTCRetDelta = FI->getTailCallSPDelta();
-    MachineOperand &StackAdjust = MBBI->getOperand(1);
-    assert(StackAdjust.isImm() && "Expecting immediate value.");
-    // Adjust stack pointer.
-    int StackAdj = StackAdjust.getImm();
-    int Delta = StackAdj - MaxTCRetDelta;
-    assert((Delta >= 0) && "Delta must be positive");
-    if (MaxTCRetDelta>0)
-      FrameSize += (StackAdj +Delta);
-    else
-      FrameSize += StackAdj;
-  }
-
-  if (FrameSize) {
-    // The loaded (or persistent) stack pointer value is offset by the 'stwu'
-    // on entry to the function.  Add this offset back now.
-    if (!isPPC64) {
-      // If this function contained a fastcc call and GuaranteedTailCallOpt is
-      // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
-      // call which invalidates the stack pointer value in SP(0). So we use the
-      // value of R31 in this case.
-      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
-        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
-          .addReg(PPC::R31).addImm(FrameSize);
-      } else if(FI->hasFastCall()) {
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
-          .addImm(FrameSize >> 16);
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
-          .addReg(PPC::R0, RegState::Kill)
-          .addImm(FrameSize & 0xFFFF);
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
-          .addReg(PPC::R1)
-          .addReg(PPC::R31)
-          .addReg(PPC::R0);
-      } else if (isInt<16>(FrameSize) &&
-                 (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
-                 !MFI->hasVarSizedObjects()) {
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
-          .addReg(PPC::R1).addImm(FrameSize);
-      } else {
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
-          .addImm(0).addReg(PPC::R1);
-      }
-    } else {
-      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
-        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
-          .addReg(PPC::X31).addImm(FrameSize);
-      } else if(FI->hasFastCall()) {
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
-          .addImm(FrameSize >> 16);
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
-          .addReg(PPC::X0, RegState::Kill)
-          .addImm(FrameSize & 0xFFFF);
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
-          .addReg(PPC::X1)
-          .addReg(PPC::X31)
-          .addReg(PPC::X0);
-      } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
-            !MFI->hasVarSizedObjects()) {
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
-           .addReg(PPC::X1).addImm(FrameSize);
-      } else {
-        BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
-           .addImm(0).addReg(PPC::X1);
-      }
-    }
-  }
-
-  if (isPPC64) {
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
-        .addImm(LROffset/4).addReg(PPC::X1);
-        
-    if (HasFP)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
-        .addImm(FPOffset/4).addReg(PPC::X1);
-        
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
-  } else {
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
-          .addImm(LROffset).addReg(PPC::R1);
-        
-    if (HasFP)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
-          .addImm(FPOffset).addReg(PPC::R1);
-          
-    if (MustSaveLR)
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
-  }
-
-  // Callee pop calling convention. Pop parameter/linkage area. Used for tail
-  // call optimization
-  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
-      MF.getFunction()->getCallingConv() == CallingConv::Fast) {
-     PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-     unsigned CallerAllocatedAmt = FI->getMinReservedArea();
-     unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
-     unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
-     unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
-     unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
-     unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
-     unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
-     unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
-
-     if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
-       BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
-         .addReg(StackReg).addImm(CallerAllocatedAmt);
-     } else {
-       BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
-          .addImm(CallerAllocatedAmt >> 16);
-       BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
-          .addReg(TmpReg, RegState::Kill)
-          .addImm(CallerAllocatedAmt & 0xFFFF);
-       BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
-          .addReg(StackReg)
-          .addReg(FPReg)
-          .addReg(TmpReg);
-     }
-  } else if (RetOpcode == PPC::TCRETURNdi) {
-    MBBI = prior(MBB.end());
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
-  } else if (RetOpcode == PPC::TCRETURNri) {
-    MBBI = prior(MBB.end());
-    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
-  } else if (RetOpcode == PPC::TCRETURNai) {
-    MBBI = prior(MBB.end());
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
-  } else if (RetOpcode == PPC::TCRETURNdi8) {
-    MBBI = prior(MBB.end());
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
-  } else if (RetOpcode == PPC::TCRETURNri8) {
-    MBBI = prior(MBB.end());
-    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
-  } else if (RetOpcode == PPC::TCRETURNai8) {
-    MBBI = prior(MBB.end());
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
-  }
-}
-
 unsigned PPCRegisterInfo::getRARegister() const {
   return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
 }
 
 unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   if (!Subtarget.isPPC64())
-    return hasFP(MF) ? PPC::R31 : PPC::R1;
+    return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
   else
-    return hasFP(MF) ? PPC::X31 : PPC::X1;
-}
-
-void PPCRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
-                                                                         const {
-  // Initial state of the frame pointer is R1.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(PPC::R1, 0);
-  Moves.push_back(MachineMove(0, Dst, Src));
+    return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
 }
 
 unsigned PPCRegisterInfo::getEHExceptionRegister() const {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 890b24b9c0a8..aa29ffef0676 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -44,17 +44,10 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  /// targetHandlesStackFrameRounding - Returns true if the target is
-  /// responsible for rounding up the stack frame (probably at emitPrologue
-  /// time).
-  bool targetHandlesStackFrameRounding() const { return true; }
-
   /// requiresRegisterScavenging - We require a register scavenger.
   /// FIXME (64-bit): Should be inlined.
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -66,21 +59,9 @@ public:
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
-  /// determineFrameLayout - Determine the size of the frame and maximum call
-  /// frame size.
-  void determineFrameLayout(MachineFunction &MF) const;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS = NULL) const;
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
-  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 8604f54de932..26391657fd1c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -300,13 +300,14 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
       // R31 when the FP is not needed.
       // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area
       // pointer.
-      const PPCSubtarget &Subtarget
-        = MF.getTarget().getSubtarget<PPCSubtarget>();
-         
+      const PPCSubtarget &Subtarget = MF.getTarget().getSubtarget<PPCSubtarget>();
+      const PPCFrameLowering *PPCFI =
+        static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+   
       if (Subtarget.isPPC64() || Subtarget.isSVR4ABI())
         return end()-5;  // don't allocate R13, R31, R0, R1, LR
         
-      if (needsFP(MF))
+      if (PPCFI->needsFP(MF))
         return end()-4;  // don't allocate R31, R0, R1, LR
       else
         return end()-3;  // don't allocate R0, R1, LR
@@ -331,7 +332,9 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
     }
     G8RCClass::iterator
     G8RCClass::allocation_order_end(const MachineFunction &MF) const {
-      if (needsFP(MF))
+      const PPCFrameLowering *PPCFI =
+        static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+      if (PPCFI->needsFP(MF))
         return end()-5;
       else
         return end()-4;
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 73447631b2ad..ad4da1fe224f 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -13,7 +13,7 @@
 
 
 def G3Itineraries : ProcessorItineraries<
-  [IU1, IU2, FPU1, BPU, SRU, SLU], [
+  [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 7efc693fa8c9..03c3b29cc101 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 def G4Itineraries : ProcessorItineraries<
-  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [
+  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
   InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 15056c0cfe44..00cac3c7cab2 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -15,7 +15,7 @@ def IU3    : FuncUnit; // integer unit 3 (7450 simple)
 def IU4    : FuncUnit; // integer unit 4 (7450 simple)
 
 def G4PlusItineraries : ProcessorItineraries<
-  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [
+  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
   InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
   InstrItinData<IntDivW     , [InstrStage<23, [IU2]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 2dffc48b238f..1671f22b30ad 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 def G5Itineraries : ProcessorItineraries<
-  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [
+  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
   InstrItinData<IntGeneral  , [InstrStage<2, [IU1, IU2]>]>,
   InstrItinData<IntCompare  , [InstrStage<3, [IU1, IU2]>]>,
   InstrItinData<IntDivD     , [InstrStage<68, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 5d46065d96f2..72a1deeced44 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -129,7 +129,7 @@ void PPCSubtarget::SetJITMode() {
 /// is required to get the address of the global.
 bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
                                        const TargetMachine &TM) const {
-  // We never hae stubs if HasLazyResolverStubs=false or if in static mode.
+  // We never have stubs if HasLazyResolverStubs=false or if in static mode.
   if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
     return false;
   // If symbol visibility is hidden, the extra load is not needed if
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 10cd10b9038f..212b450e7db9 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "PPCMCAsmInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/PassManager.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/FormattedStream.h"
@@ -29,6 +30,21 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   
 }
 
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+  default:
+    return NULL;
+  }
+}
+
 extern "C" void LLVMInitializePowerPCTarget() {
   // Register the targets
   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
@@ -36,6 +52,19 @@ extern "C" void LLVMInitializePowerPCTarget() {
   
   RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
   RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
+  
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+  
+  
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
+  TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
+  
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
 }
 
 
@@ -44,7 +73,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
-    FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit),
+    FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 626ddbb6a6bb..2d2498943a2d 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -14,7 +14,7 @@
 #ifndef PPC_TARGETMACHINE_H
 #define PPC_TARGETMACHINE_H
 
-#include "PPCFrameInfo.h"
+#include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
 #include "PPCJITInfo.h"
 #include "PPCInstrInfo.h"
@@ -33,7 +33,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
   PPCSubtarget        Subtarget;
   const TargetData    DataLayout;       // Calculates type size & alignment
   PPCInstrInfo        InstrInfo;
-  PPCFrameInfo        FrameInfo;
+  PPCFrameLowering    FrameLowering;
   PPCJITInfo          JITInfo;
   PPCTargetLowering   TLInfo;
   PPCSelectionDAGInfo TSInfo;
@@ -43,23 +43,25 @@ public:
   PPCTargetMachine(const Target &T, const std::string &TT,
                    const std::string &FS, bool is64Bit);
 
-  virtual const PPCInstrInfo     *getInstrInfo() const { return &InstrInfo; }
-  virtual const PPCFrameInfo     *getFrameInfo() const { return &FrameInfo; }
-  virtual       PPCJITInfo       *getJITInfo()         { return &JITInfo; }
+  virtual const PPCInstrInfo      *getInstrInfo() const { return &InstrInfo; }
+  virtual const PPCFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual       PPCJITInfo        *getJITInfo()         { return &JITInfo; }
   virtual const PPCTargetLowering *getTargetLowering() const { 
    return &TLInfo;
   }
   virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
-  virtual const PPCRegisterInfo  *getRegisterInfo() const {
+  virtual const PPCRegisterInfo   *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
   
   virtual const TargetData    *getTargetData() const    { return &DataLayout; }
   virtual const PPCSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual const InstrItineraryData getInstrItineraryData() const {  
-    return InstrItins;
+  virtual const InstrItineraryData *getInstrItineraryData() const {  
+    return &InstrItins;
   }
 
   // Pass Pipeline Configuration
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 3465779e344b..349cd890d5ee 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -37,6 +37,31 @@ _f3:
 	ori r3, r2, 65535
 	blr 
 
+===-------------------------------------------------------------------------===
+
+This code:
+
+unsigned add32carry(unsigned sum, unsigned x) {
+ unsigned z = sum + x;
+ if (sum + x < x)
+     z++;
+ return z;
+}
+
+Should compile to something like:
+
+	addc r3,r3,r4
+	addze r3,r3
+
+instead we get:
+
+	add r3, r4, r3
+	cmplw cr7, r3, r4
+	mfcr r4 ; 1
+	rlwinm r4, r4, 29, 31, 31
+	add r3, r3, r4
+
+Ick.
 
 ===-------------------------------------------------------------------------===
 
@@ -260,8 +285,8 @@ including having this work sanely.
 Fix Darwin FP-In-Integer Registers ABI
 
 Darwin passes doubles in structures in integer registers, which is very very 
-bad.  Add something like a BIT_CONVERT to LLVM, then do an i-p transformation 
-that percolates these things out of functions.
+bad.  Add something like a BITCAST to LLVM, then do an i-p transformation that
+percolates these things out of functions.
 
 Check out how horrible this is:
 http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4faf8bcfd419..4e14fbbb09ba 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,29 +2,6 @@ Target Independent Opportunities:
 
 //===---------------------------------------------------------------------===//
 
-Dead argument elimination should be enhanced to handle cases when an argument is
-dead to an externally visible function.  Though the argument can't be removed
-from the externally visible function, the caller doesn't need to pass it in.
-For example in this testcase:
-
-  void foo(int X) __attribute__((noinline));
-  void foo(int X) { sideeffect(); }
-  void bar(int A) { foo(A+1); }
-
-We compile bar to:
-
-define void @bar(i32 %A) nounwind ssp {
-  %0 = add nsw i32 %A, 1                          ; <i32> [#uses=1]
-  tail call void @foo(i32 %0) nounwind noinline ssp
-  ret void
-}
-
-The add is dead, we could pass in 'i32 undef' instead.  This occurs for C++
-templates etc, which usually have linkonce_odr/weak_odr linkage, not internal
-linkage.
-
-//===---------------------------------------------------------------------===//
-
 With the recent changes to make the implicit def/use set explicit in
 machineinstrs, we should change the target descriptions for 'call' instructions
 so that the .td files don't list all the call-clobbered registers as implicit
@@ -41,7 +18,17 @@ This has a number of uses:
 
 //===---------------------------------------------------------------------===//
 
-Make the PPC branch selector target independant
+We should recognized various "overflow detection" idioms and translate them into
+llvm.uadd.with.overflow and similar intrinsics.  Here is a multiply idiom:
+
+unsigned int mul(unsigned int a,unsigned int b) {
+ if ((unsigned long long)a*b>0xffffffff)
+   exit(0);
+  return a*b;
+}
+
+The legalization code for mul-with-overflow needs to be made more robust before
+this can be implemented though.
 
 //===---------------------------------------------------------------------===//
 
@@ -53,41 +40,6 @@ right).
 
 //===---------------------------------------------------------------------===//
 
-Solve this DAG isel folding deficiency:
-
-int X, Y;
-
-void fn1(void)
-{
-  X = X | (Y << 3);
-}
-
-compiles to
-
-fn1:
-	movl Y, %eax
-	shll $3, %eax
-	orl X, %eax
-	movl %eax, X
-	ret
-
-The problem is the store's chain operand is not the load X but rather
-a TokenFactor of the load X and load Y, which prevents the folding.
-
-There are two ways to fix this:
-
-1. The dag combiner can start using alias analysis to realize that y/x
-   don't alias, making the store to X not dependent on the load from Y.
-2. The generated isel could be made smarter in the case it can't
-   disambiguate the pointers.
-
-Number 1 is the preferred solution.
-
-This has been "fixed" by a TableGen hack. But that is a short term workaround
-which will be removed once the proper fix is made.
-
-//===---------------------------------------------------------------------===//
-
 On targets with expensive 64-bit multiply, we could LSR this:
 
 for (i = ...; ++i) {
@@ -300,14 +252,6 @@ unsigned long reverse(unsigned v) {
     return v ^ (t >> 8);
 }
 
-Neither is this (very standard idiom):
-
-int f(int n)
-{
-  return (((n) << 24) | (((n) & 0xff00) << 8) 
-       | (((n) >> 8) & 0xff00) | ((n) >> 24));
-}
-
 //===---------------------------------------------------------------------===//
 
 [LOOP RECOGNITION]
@@ -343,8 +287,7 @@ unsigned int popcount(unsigned int input) {
   return count;
 }
 
-This is a form of idiom recognition for loops, the same thing that could be
-useful for recognizing memset/memcpy.
+This sort of thing should be added to the loop idiom pass.
 
 //===---------------------------------------------------------------------===//
 
@@ -374,14 +317,6 @@ this construct.
 
 //===---------------------------------------------------------------------===//
 
-[LOOP RECOGNITION]
-
-viterbi speeds up *significantly* if the various "history" related copy loops
-are turned into memcpy calls at the source level.  We need a "loops to memcpy"
-pass.
-
-//===---------------------------------------------------------------------===//
-
 [LOOP OPTIMIZATION]
 
 SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization
@@ -607,46 +542,21 @@ struct THotKey { short Key; bool Control; bool Shift; bool Alt; };
 extern THotKey m_HotKey;
 THotKey GetHotKey () { return m_HotKey; }
 
-into (-O3 -fno-exceptions -static -fomit-frame-pointer):
-
-__Z9GetHotKeyv:
-	pushl	%esi
-	movl	8(%esp), %eax
-	movb	_m_HotKey+3, %cl
-	movb	_m_HotKey+4, %dl
-	movb	_m_HotKey+2, %ch
-	movw	_m_HotKey, %si
-	movw	%si, (%eax)
-	movb	%ch, 2(%eax)
-	movb	%cl, 3(%eax)
-	movb	%dl, 4(%eax)
-	popl	%esi
-	ret	$4
-
-GCC produces:
-
-__Z9GetHotKeyv:
-	movl	_m_HotKey, %edx
-	movl	4(%esp), %eax
-	movl	%edx, (%eax)
-	movzwl	_m_HotKey+4, %edx
-	movw	%dx, 4(%eax)
-	ret	$4
-
-The LLVM IR contains the needed alignment info, so we should be able to 
-merge the loads and stores into 4-byte loads:
-
-	%struct.THotKey = type { i16, i8, i8, i8 }
-define void @_Z9GetHotKeyv(%struct.THotKey* sret  %agg.result) nounwind  {
-...
-	%tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8
-	%tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2
-	%tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1
-	%tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2
-
-Alternatively, we should use a small amount of base-offset alias analysis
-to make it so the scheduler doesn't need to hold all the loads in regs at
-once.
+into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer):
+
+__Z9GetHotKeyv:                         ## @_Z9GetHotKeyv
+	movq	_m_HotKey@GOTPCREL(%rip), %rax
+	movzwl	(%rax), %ecx
+	movzbl	2(%rax), %edx
+	shlq	$16, %rdx
+	orq	%rcx, %rdx
+	movzbl	3(%rax), %ecx
+	shlq	$24, %rcx
+	orq	%rdx, %rcx
+	movzbl	4(%rax), %eax
+	shlq	$32, %rax
+	orq	%rcx, %rax
+	ret
 
 //===---------------------------------------------------------------------===//
 
@@ -658,42 +568,35 @@ implementations of ceil/floor/rint.
 Consider:
 
 int test() {
-  long long input[8] = {1,1,1,1,1,1,1,1};
+  long long input[8] = {1,0,1,0,1,0,1,0};
   foo(input);
 }
 
-We currently compile this into a memcpy from a global array since the 
-initializer is fairly large and not memset'able.  This is good, but the memcpy
-gets lowered to load/stores in the code generator.  This is also ok, except
-that the codegen lowering for memcpy doesn't handle the case when the source
-is a constant global.  This gives us atrocious code like this:
+Clang compiles this into:
 
-	call	"L1$pb"
-"L1$pb":
-	popl	%eax
-	movl	_C.0.1444-"L1$pb"+32(%eax), %ecx
-	movl	%ecx, 40(%esp)
-	movl	_C.0.1444-"L1$pb"+20(%eax), %ecx
-	movl	%ecx, 28(%esp)
-	movl	_C.0.1444-"L1$pb"+36(%eax), %ecx
-	movl	%ecx, 44(%esp)
-	movl	_C.0.1444-"L1$pb"+44(%eax), %ecx
-	movl	%ecx, 52(%esp)
-	movl	_C.0.1444-"L1$pb"+40(%eax), %ecx
-	movl	%ecx, 48(%esp)
-	movl	_C.0.1444-"L1$pb"+12(%eax), %ecx
-	movl	%ecx, 20(%esp)
-	movl	_C.0.1444-"L1$pb"+4(%eax), %ecx
-...
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 16, i1 false)
+  %0 = getelementptr [8 x i64]* %input, i64 0, i64 0
+  store i64 1, i64* %0, align 16
+  %1 = getelementptr [8 x i64]* %input, i64 0, i64 2
+  store i64 1, i64* %1, align 16
+  %2 = getelementptr [8 x i64]* %input, i64 0, i64 4
+  store i64 1, i64* %2, align 16
+  %3 = getelementptr [8 x i64]* %input, i64 0, i64 6
+  store i64 1, i64* %3, align 16
 
-instead of:
-	movl	$1, 16(%esp)
-	movl	$0, 20(%esp)
-	movl	$1, 24(%esp)
-	movl	$0, 28(%esp)
-	movl	$1, 32(%esp)
-	movl	$0, 36(%esp)
-	...
+Which gets codegen'd into:
+
+	pxor	%xmm0, %xmm0
+	movaps	%xmm0, -16(%rbp)
+	movaps	%xmm0, -32(%rbp)
+	movaps	%xmm0, -48(%rbp)
+	movaps	%xmm0, -64(%rbp)
+	movq	$1, -64(%rbp)
+	movq	$1, -48(%rbp)
+	movq	$1, -32(%rbp)
+	movq	$1, -16(%rbp)
+
+It would be better to have 4 movq's of 0 instead of the movaps's.
 
 //===---------------------------------------------------------------------===//
 
@@ -739,20 +642,6 @@ etc.  On X86, we miss a bunch of 'rotate by variable' cases because the rotate
 matching code in dag combine doesn't look through truncates aggressively 
 enough.  Here are some testcases reduces from GCC PR17886:
 
-unsigned long long f(unsigned long long x, int y) {
-  return (x << y) | (x >> 64-y); 
-} 
-unsigned f2(unsigned x, int y){
-  return (x << y) | (x >> 32-y); 
-} 
-unsigned long long f3(unsigned long long x){
-  int y = 9;
-  return (x << y) | (x >> 64-y); 
-} 
-unsigned f4(unsigned x){
-  int y = 10;
-  return (x << y) | (x >> 32-y); 
-}
 unsigned long long f5(unsigned long long x, unsigned long long y) {
   return (x << 8) | ((y >> 48) & 0xffull);
 }
@@ -771,10 +660,50 @@ unsigned long long f6(unsigned long long x, unsigned long long y, int z) {
   }
 }
 
-On X86-64, we only handle f2/f3/f4 right.  On x86-32, a few of these 
-generate truly horrible code, instead of using shld and friends.  On
-ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is
-badness.  PPC64 misses f, f5 and f6.  CellSPU aborts in isel.
+//===---------------------------------------------------------------------===//
+
+This (and similar related idioms):
+
+unsigned int foo(unsigned char i) {
+  return i | (i<<8) | (i<<16) | (i<<24);
+} 
+
+compiles into:
+
+define i32 @foo(i8 zeroext %i) nounwind readnone ssp noredzone {
+entry:
+  %conv = zext i8 %i to i32
+  %shl = shl i32 %conv, 8
+  %shl5 = shl i32 %conv, 16
+  %shl9 = shl i32 %conv, 24
+  %or = or i32 %shl9, %conv
+  %or6 = or i32 %or, %shl5
+  %or10 = or i32 %or6, %shl
+  ret i32 %or10
+}
+
+it would be better as:
+
+unsigned int bar(unsigned char i) {
+  unsigned int j=i | (i << 8); 
+  return j | (j<<16);
+}
+
+aka:
+
+define i32 @bar(i8 zeroext %i) nounwind readnone ssp noredzone {
+entry:
+  %conv = zext i8 %i to i32
+  %shl = shl i32 %conv, 8
+  %or = or i32 %shl, %conv
+  %shl5 = shl i32 %or, 16
+  %or6 = or i32 %shl5, %or
+  ret i32 %or6
+}
+
+or even i*0x01010101, depending on the speed of the multiplier.  The best way to
+handle this is to canonicalize it to a multiply in IR and have codegen handle
+lowering multiplies to shifts on cpus where shifts are faster.
 
 //===---------------------------------------------------------------------===//
 
@@ -804,18 +733,6 @@ codegen badness or something else (haven't investigated).
 
 //===---------------------------------------------------------------------===//
 
-We miss some instcombines for stuff like this:
-void bar (void);
-void foo (unsigned int a) {
-  /* This one is equivalent to a >= (3 << 2).  */
-  if ((a >> 2) >= 3)
-    bar ();
-}
-
-A few other related ones are in GCC PR14753.
-
-//===---------------------------------------------------------------------===//
-
 Divisibility by constant can be simplified (according to GCC PR12849) from
 being a mulhi to being a mul lo (cheaper).  Testcase:
 
@@ -906,16 +823,6 @@ The expression should optimize to something like
 
 //===---------------------------------------------------------------------===//
 
-void a(int variable)
-{
- if (variable == 4 || variable == 6)
-   bar();
-}
-This should optimize to "if ((variable | 2) == 6)".  Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts | llc".
-
-//===---------------------------------------------------------------------===//
-
 unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return
 i;}
 unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
@@ -966,6 +873,12 @@ rshift_gt (unsigned int a)
  if ((a >> 2) > 5)
    bar ();
 }
+
+void neg_eq_cst(unsigned int a) {
+if (-a == 123)
+bar();
+}
+
 All should simplify to a single comparison.  All of these are
 currently not optimized with "clang -emit-llvm-bc | opt
 -std-compile-opts".
@@ -1033,18 +946,6 @@ Should also combine to x | 8.  Currently not optimized with "clang
 
 //===---------------------------------------------------------------------===//
 
-int a(int x) {return (x & 8) == 0 ? -1 : -9;}
-Should combine to (x | -9) ^ 8.  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
-int a(int x) {return (x & 8) == 0 ? -9 : -1;}
-Should combine to x | -9.  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
 int a(int x) {return ((x | -9) ^ 8) & x;}
 Should combine to x & -9.  Currently not optimized with "clang
 -emit-llvm-bc | opt -std-compile-opts".
@@ -1145,6 +1046,77 @@ int test (int a, int b, int c, int g) {
 It would be better to do the mul once to reduce codesize above the if.
 This is GCC PR38204.
 
+
+//===---------------------------------------------------------------------===//
+This simple function from 179.art:
+
+int winner, numf2s;
+struct { double y; int   reset; } *Y;
+
+void find_match() {
+   int i;
+   winner = 0;
+   for (i=0;i<numf2s;i++)
+       if (Y[i].y > Y[winner].y)
+              winner =i;
+}
+
+Compiles into (with clang TBAA):
+
+for.body:                                         ; preds = %for.inc, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.inc ]
+  %i.01718 = phi i32 [ 0, %bb.nph ], [ %i.01719, %for.inc ]
+  %tmp4 = getelementptr inbounds %struct.anon* %tmp3, i64 %indvar, i32 0
+  %tmp5 = load double* %tmp4, align 8, !tbaa !4
+  %idxprom7 = sext i32 %i.01718 to i64
+  %tmp10 = getelementptr inbounds %struct.anon* %tmp3, i64 %idxprom7, i32 0
+  %tmp11 = load double* %tmp10, align 8, !tbaa !4
+  %cmp12 = fcmp ogt double %tmp5, %tmp11
+  br i1 %cmp12, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %i.017 = trunc i64 %indvar to i32
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %i.01719 = phi i32 [ %i.01718, %for.body ], [ %i.017, %if.then ]
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %tmp22
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+
+It is good that we hoisted the reloads of numf2's, and Y out of the loop and
+sunk the store to winner out.
+
+However, this is awful on several levels: the conditional truncate in the loop
+(-indvars at fault? why can't we completely promote the IV to i64?).
+
+Beyond that, we have a partially redundant load in the loop: if "winner" (aka 
+%i.01718) isn't updated, we reload Y[winner].y the next time through the loop.
+Similarly, the addressing that feeds it (including the sext) is redundant. In
+the end we get this generated assembly:
+
+LBB0_2:                                 ## %for.body
+                                        ## =>This Inner Loop Header: Depth=1
+	movsd	(%rdi), %xmm0
+	movslq	%edx, %r8
+	shlq	$4, %r8
+	ucomisd	(%rcx,%r8), %xmm0
+	jbe	LBB0_4
+	movl	%esi, %edx
+LBB0_4:                                 ## %for.inc
+	addq	$16, %rdi
+	incq	%rsi
+	cmpq	%rsi, %rax
+	jne	LBB0_2
+
+All things considered this isn't too bad, but we shouldn't need the movslq or
+the shlq instruction, or the load folded into ucomisd every time through the
+loop.
+
+On an x86-specific topic, if the loop can't be restructure, the movl should be a
+cmov.
+
 //===---------------------------------------------------------------------===//
 
 [STORE SINKING]
@@ -1216,6 +1188,29 @@ loadpre14.c loadpre15.c
 
 actually a conditional increment: loadpre18.c loadpre19.c
 
+//===---------------------------------------------------------------------===//
+
+[LOAD PRE / STORE SINKING / SPEC HACK]
+
+This is a chunk of code from 456.hmmer:
+
+int f(int M, int *mc, int *mpp, int *tpmm, int *ip, int *tpim, int *dpp,
+     int *tpdm, int xmb, int *bp, int *ms) {
+ int k, sc;
+ for (k = 1; k <= M; k++) {
+     mc[k] = mpp[k-1]   + tpmm[k-1];
+     if ((sc = ip[k-1]  + tpim[k-1]) > mc[k])  mc[k] = sc;
+     if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k])  mc[k] = sc;
+     if ((sc = xmb  + bp[k])         > mc[k])  mc[k] = sc;
+     mc[k] += ms[k];
+   }
+}
+
+It is very profitable for this benchmark to turn the conditional stores to mc[k]
+into a conditional move (select instr in IR) and allow the final store to do the
+store.  See GCC PR27313 for more details.  Note that this is valid to xform even
+with the new C++ memory model, since mc[k] is previously loaded and later
+stored.
 
 //===---------------------------------------------------------------------===//
 
@@ -1261,26 +1256,6 @@ SingleSource/Benchmarks/Misc/dt.c
 
 //===---------------------------------------------------------------------===//
 
-A/B get pinned to the stack because we turn an if/then into a select instead
-of PRE'ing the load/store.  This may be fixable in instcombine:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892
-
-struct X { int i; };
-int foo (int x) {
-  struct X a;
-  struct X b;
-  struct X *p;
-  a.i = 1;
-  b.i = 2;
-  if (x)
-    p = &a;
-  else
-    p = &b;
-  return p->i;
-}
-
-//===---------------------------------------------------------------------===//
-
 Interesting missed case because of control flow flattening (should be 2 loads):
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
 With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | 
@@ -1308,12 +1283,6 @@ void foo (int a, struct T b)
 
 simplifylibcalls should do several optimizations for strspn/strcspn:
 
-strcspn(x, "") -> strlen(x)
-strcspn("", x) -> 0
-strspn("", x) -> 0
-strspn(x, "") -> strlen(x)
-strspn(x, "a") -> strchr(x, 'a')-x
-
 strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn):
 
 size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2,
@@ -1353,14 +1322,7 @@ Those should be turned into a switch.
         
 This is interesting for a couple reasons.  First, in this:
 
-        %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind
-        %strlen = call i32 @strlen(i8* %3072)  
-
-The strlen could be replaced with: %strlen = sub %3072, %3073, because the
-strcpy call returns a pointer to the end of the string.  Based on that, the
-endptr GEP just becomes equal to 3073, which eliminates a strlen call and GEP.
-
-Second, the memcpy+strlen strlen can be replaced with:
+The memcpy+strlen strlen can be replaced with:
 
         %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly 
 
@@ -1436,18 +1398,6 @@ This pattern repeats several times, basically doing:
 
 //===---------------------------------------------------------------------===//
 
-186.crafty also contains this code:
-
-%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
-%1907 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1906
-%1908 = call i8* @strcpy(i8* %1907, i8* %1905) nounwind align 1
-%1909 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
-%1910 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1909         
-
-The last strlen is computable as 1908-@pgn_event, which means 1910=1908.
-
-//===---------------------------------------------------------------------===//
-
 186.crafty has this interesting pattern with the "out.4543" variable:
 
 call void @llvm.memcpy.i32(
@@ -1509,22 +1459,6 @@ the float directly.
 
 //===---------------------------------------------------------------------===//
 
-#include <math.h>
-double foo(double a) {    return sin(a); }
-
-This compiles into this on x86-64 Linux:
-foo:
-	subq	$8, %rsp
-	call	sin
-	addq	$8, %rsp
-	ret
-vs:
-
-foo:
-        jmp sin
-
-//===---------------------------------------------------------------------===//
-
 The arg promotion pass should make use of nocapture to make its alias analysis
 stuff much more precise.
 
@@ -1644,21 +1578,6 @@ int bar() { return foo("abcd"); }
 
 //===---------------------------------------------------------------------===//
 
-InstCombine should use SimplifyDemandedBits to remove the or instruction:
-
-define i1 @test(i8 %x, i8 %y) {
-  %A = or i8 %x, 1
-  %B = icmp ugt i8 %A, 3
-  ret i1 %B
-}
-
-Currently instcombine calls SimplifyDemandedBits with either all bits or just
-the sign bit, if the comparison is obviously a sign test. In this case, we only
-need all but the bottom two bits from %A, and if we gave that mask to SDB it
-would delete the or instruction for us.
-
-//===---------------------------------------------------------------------===//
-
 functionattrs doesn't know much about memcpy/memset.  This function should be
 marked readnone rather than readonly, since it only twiddles local memory, but
 functionattrs doesn't handle memset/memcpy/memmove aggressively:
@@ -1674,6 +1593,10 @@ int foo() {
  return **p;
 }
 
+This can be seen at:
+$ clang t.c -S -o - -mkernel -O0 -emit-llvm | opt -functionattrs -S
+
+
 //===---------------------------------------------------------------------===//
 
 Missed instcombine transformation:
@@ -1689,14 +1612,6 @@ This should be optimized to a single compare.  Testcase derived from gcc.
 
 //===---------------------------------------------------------------------===//
 
-Missed instcombine transformation:
-void b();
-void a(int x) { if (((1<<x)&8)==0) b(); }
-
-The shift should be optimized out.  Testcase derived from gcc.
-
-//===---------------------------------------------------------------------===//
-
 Missed instcombine or reassociate transformation:
 int a(int a, int b) { return (a==12)&(b>47)&(b<58); }
 
@@ -1706,28 +1621,35 @@ from gcc.
 //===---------------------------------------------------------------------===//
 
 Missed instcombine transformation:
-define i32 @a(i32 %x) nounwind readnone {
-entry:
-  %rem = srem i32 %x, 32
-  %shl = shl i32 1, %rem
-  ret i32 %shl
-}
 
-The srem can be transformed to an and because if x is negative, the shift is
-undefined. Testcase derived from gcc.
+  %382 = srem i32 %tmp14.i, 64                    ; [#uses=1]
+  %383 = zext i32 %382 to i64                     ; [#uses=1]
+  %384 = shl i64 %381, %383                       ; [#uses=1]
+  %385 = icmp slt i32 %tmp14.i, 64                ; [#uses=1]
+
+The srem can be transformed to an and because if %tmp14.i is negative, the
+shift is undefined.  Testcase derived from 403.gcc.
 
 //===---------------------------------------------------------------------===//
 
-Missed instcombine/dagcombine transformation:
-define i32 @a(i32 %x, i32 %y) nounwind readnone {
-entry:
-  %mul = mul i32 %y, -8
-  %sub = sub i32 %x, %mul
-  ret i32 %sub
-}
+This is a range comparison on a divided result (from 403.gcc):
 
-Should compile to something like x+y*8, but currently compiles to an
-inefficient result.  Testcase derived from gcc.
+  %1337 = sdiv i32 %1336, 8                       ; [#uses=1]
+  %.off.i208 = add i32 %1336, 7                   ; [#uses=1]
+  %1338 = icmp ult i32 %.off.i208, 15             ; [#uses=1]
+  
+We already catch this (removing the sdiv) if there isn't an add, we should
+handle the 'add' as well.  This is a common idiom with it's builtin_alloca code.
+C testcase:
+
+int a(int x) { return (unsigned)(x/16+7) < 15; }
+
+Another similar case involves truncations on 64-bit targets:
+
+  %361 = sdiv i64 %.046, 8                        ; [#uses=1]
+  %362 = trunc i64 %361 to i32                    ; [#uses=2]
+...
+  %367 = icmp eq i32 %362, 0                      ; [#uses=1]
 
 //===---------------------------------------------------------------------===//
 
@@ -1855,13 +1777,12 @@ entry:
 }
 
 Generated code:
-       addq    %rcx, %rdx
-       movl    $0, %eax
-       adcq    $0, %rax
-       addq    %r8, %rax
-       movq    %rax, (%rdi)
-       movq    %rdx, (%rsi)
-       ret
+        addq	%rcx, %rdx
+        sbbq	%rax, %rax
+        subq	%rax, %r8
+        movq	%r8, (%rdi)
+        movq	%rdx, (%rsi)
+        ret
 
 Expected code:
        addq    %rcx, %rdx
@@ -1870,12 +1791,6 @@ Expected code:
        movq    %rdx, (%rsi)
        ret
 
-The generated SelectionDAG has an ADD of an ADDE, where both operands of the
-ADDE are zero. Replacing one of the operands of the ADDE with the other operand
-of the ADD, and replacing the ADD with the ADDE, should give the desired result.
-
-(That said, we are doing a lot better than gcc on this testcase. :) )
-
 //===---------------------------------------------------------------------===//
 
 Switch lowering generates less than ideal code for the following switch:
@@ -1919,21 +1834,433 @@ something like the following, which eliminates a branch:
 	ret
 .LBB0_2:
 	jmp	foo  # TAILCALL
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+int foo(int a) { return (a & (~15)) / 16; }
+
+Into:
+
+define i32 @foo(i32 %a) nounwind readnone ssp {
+entry:
+  %and = and i32 %a, -16
+  %div = sdiv i32 %and, 16
+  ret i32 %div
+}
+
+but this code (X & -A)/A is X >> log2(A) when A is a power of 2, so this case
+should be instcombined into just "a >> 4".
+
+We do get this at the codegen level, so something knows about it, but 
+instcombine should catch it earlier:
+
+_foo:                                   ## @foo
+## BB#0:                                ## %entry
+	movl	%edi, %eax
+	sarl	$4, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+This code (from GCC PR28685):
+
+int test(int a, int b) {
+  int lt = a < b;
+  int eq = a == b;
+  if (lt)
+    return 1;
+  return eq;
+}
+
+Is compiled to:
+
+define i32 @test(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %cmp5 = icmp eq i32 %a, %b
+  %conv6 = zext i1 %cmp5 to i32
+  ret i32 %conv6
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+it could be:
+
+define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %0 = icmp sle i32 %a, %b
+  %retval = zext i1 %0 to i32
+  ret i32 %retval
+}
+
+//===---------------------------------------------------------------------===//
+
+This code can be seen in viterbi:
+
+  %64 = call noalias i8* @malloc(i64 %62) nounwind
+...
+  %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind
+  %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind
+
+llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to
+fold to %62.  This is a security win (overflows of malloc will get caught)
+and also a performance win by exposing more memsets to the optimizer.
+
+This occurs several times in viterbi.
+
+Note that this would change the semantics of @llvm.objectsize which by its
+current definition always folds to a constant. We also should make sure that
+we remove checking in code like
+
+  char *p = malloc(strlen(s)+1);
+  __strcpy_chk(p, s, __builtin_objectsize(p, 0));
+
+//===---------------------------------------------------------------------===//
+
+This code (from Benchmarks/Dhrystone/dry.c):
+
+define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
+entry:
+  %sext = shl i32 %0, 24
+  %conv = ashr i32 %sext, 24
+  %sext6 = shl i32 %1, 24
+  %conv4 = ashr i32 %sext6, 24
+  %cmp = icmp eq i32 %conv, %conv4
+  %. = select i1 %cmp, i32 10000, i32 0
+  ret i32 %.
+}
+
+Should be simplified into something like:
+
+define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
+entry:
+  %sext = shl i32 %0, 24
+  %conv = and i32 %sext, 0xFF000000
+  %sext6 = shl i32 %1, 24
+  %conv4 = and i32 %sext6, 0xFF000000
+  %cmp = icmp eq i32 %conv, %conv4
+  %. = select i1 %cmp, i32 10000, i32 0
+  ret i32 %.
+}
+
+and then to:
+
+define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
+entry:
+  %conv = and i32 %0, 0xFF
+  %conv4 = and i32 %1, 0xFF
+  %cmp = icmp eq i32 %conv, %conv4
+  %. = select i1 %cmp, i32 10000, i32 0
+  ret i32 %.
+}
+//===---------------------------------------------------------------------===//
+
+clang -O3 currently compiles this code
+
+int g(unsigned int a) {
+  unsigned int c[100];
+  c[10] = a;
+  c[11] = a;
+  unsigned int b = c[10] + c[11];
+  if(b > a*2) a = 4;
+  else a = 8;
+  return a + 7;
+}
+
+into
+
+define i32 @g(i32 a) nounwind readnone {
+  %add = shl i32 %a, 1
+  %mul = shl i32 %a, 1
+  %cmp = icmp ugt i32 %add, %mul
+  %a.addr.0 = select i1 %cmp, i32 11, i32 15
+  ret i32 %a.addr.0
+}
+
+The icmp should fold to false. This CSE opportunity is only available
+after GVN and InstCombine have run.
+
+//===---------------------------------------------------------------------===//
+
+memcpyopt should turn this:
+
+define i8* @test10(i32 %x) {
+  %alloc = call noalias i8* @malloc(i32 %x) nounwind
+  call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false)
+  ret i8* %alloc
+}
+
+into a call to calloc.  We should make sure that we analyze calloc as
+aggressively as malloc though.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 doesn't optimize this:
+
+void f1(int* begin, int* end) {
+  std::fill(begin, end, 0);
+}
+
+into a memset.  This is PR8942.
+
 //===---------------------------------------------------------------------===//
-Given a branch where the two target blocks are identical ("ret i32 %b" in
-both), simplifycfg will simplify them away. But not so for a switch statement:
 
-define i32 @f(i32 %a, i32 %b) nounwind readnone {
+clang -O3 -fno-exceptions currently compiles this code:
+
+void f(int N) {
+  std::vector<int> v(N);
+
+  extern void sink(void*); sink(&v);
+}
+
+into
+
+define void @_Z1fi(i32 %N) nounwind {
 entry:
-        switch i32 %a, label %bb3 [
-                i32 4, label %bb
-                i32 6, label %bb
-        ]
+  %v2 = alloca [3 x i32*], align 8
+  %v2.sub = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 0
+  %tmpcast = bitcast [3 x i32*]* %v2 to %"class.std::vector"*
+  %conv = sext i32 %N to i64
+  store i32* null, i32** %v2.sub, align 8, !tbaa !0
+  %tmp3.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 1
+  store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
+  %tmp4.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 2
+  store i32* null, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
+  %cmp.i.i.i.i = icmp eq i32 %N, 0
+  br i1 %cmp.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i, label %cond.true.i.i.i.i
+
+_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i: ; preds = %entry
+  store i32* null, i32** %v2.sub, align 8, !tbaa !0
+  store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
+  %add.ptr.i5.i.i = getelementptr inbounds i32* null, i64 %conv
+  store i32* %add.ptr.i5.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
+  br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit
+
+cond.true.i.i.i.i:                                ; preds = %entry
+  %cmp.i.i.i.i.i = icmp slt i32 %N, 0
+  br i1 %cmp.i.i.i.i.i, label %if.then.i.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %cond.true.i.i.i.i
+  call void @_ZSt17__throw_bad_allocv() noreturn nounwind
+  unreachable
+
+_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i:    ; preds = %cond.true.i.i.i.i
+  %mul.i.i.i.i.i = shl i64 %conv, 2
+  %call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind
+  %0 = bitcast i8* %call3.i.i.i.i.i to i32*
+  store i32* %0, i32** %v2.sub, align 8, !tbaa !0
+  store i32* %0, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
+  %add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv
+  store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
+  call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false)
+  br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit
+
+This is just the handling the construction of the vector. Most surprising here
+is the fact that all three null stores in %entry are dead (because we do no
+cross-block DSE).
+
+Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i.
+This is a because the client of LazyValueInfo doesn't simplify all instruction
+operands, just selected ones.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 -fno-exceptions currently compiles this code:
+
+void f(char* a, int n) {
+  __builtin_memset(a, 0, n);
+  for (int i = 0; i < n; ++i)
+    a[i] = 0;
+}
+
+into:
+
+define void @_Z1fPci(i8* nocapture %a, i32 %n) nounwind {
+entry:
+  %conv = sext i32 %n to i64
+  tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %conv, i32 1, i1 false)
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %tmp10 = add i32 %n, -1
+  %tmp11 = zext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp11, 1
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %tmp12, i32 1, i1 false)
+  ret void
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold
+the two memset's together. The issue with %n seems to stem from poor handling
+of the original loop.
+
+To simplify this, we need SCEV to know that "n != 0" because of the dominating
+conditional.  That would turn the second memset into a simple memset of 'n'.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 -fno-exceptions currently compiles this code:
 
-bb:             ; preds = %entry, %entry
-        ret i32 %b
+struct S {
+  unsigned short m1, m2;
+  unsigned char m3, m4;
+};
 
-bb3:            ; preds = %entry
-        ret i32 %b
+void f(int N) {
+  std::vector<S> v(N);
+  extern void sink(void*); sink(&v);
 }
+
+into poor code for zero-initializing 'v' when N is >0. The problem is that
+S is only 6 bytes, but each element is 8 byte-aligned. We generate a loop and
+4 stores on each iteration. If the struct were 8 bytes, this gets turned into
+a memset.
+
+In order to handle this we have to:
+  A) Teach clang to generate metadata for memsets of structs that have holes in
+     them.
+  B) Teach clang to use such a memset for zero init of this struct (since it has
+     a hole), instead of doing elementwise zeroing.
+
 //===---------------------------------------------------------------------===//
+
+clang -O3 currently compiles this code:
+
+extern const int magic;
+double f() { return 0.0 * magic; }
+
+into
+
+@magic = external constant i32
+
+define double @_Z1fv() nounwind readnone {
+entry:
+  %tmp = load i32* @magic, align 4, !tbaa !0
+  %conv = sitofp i32 %tmp to double
+  %mul = fmul double %conv, 0.000000e+00
+  ret double %mul
+}
+
+We should be able to fold away this fmul to 0.0.  More generally, fmul(x,0.0)
+can be folded to 0.0 if we can prove that the LHS is not -0.0, not a NaN, and
+not an INF.  The CannotBeNegativeZero predicate in value tracking should be
+extended to support general "fpclassify" operations that can return 
+yes/no/unknown for each of these predicates.
+
+In this predicate, we know that uitofp is trivially never NaN or -0.0, and
+we know that it isn't +/-Inf if the floating point type has enough exponent bits
+to represent the largest integer value as < inf.
+
+//===---------------------------------------------------------------------===//
+
+When optimizing a transformation that can change the sign of 0.0 (such as the
+0.0*val -> 0.0 transformation above), it might be provable that the sign of the
+expression doesn't matter.  For example, by the above rules, we can't transform
+fmul(sitofp(x), 0.0) into 0.0, because x might be -1 and the result of the
+expression is defined to be -0.0.
+
+If we look at the uses of the fmul for example, we might be able to prove that
+all uses don't care about the sign of zero.  For example, if we have:
+
+  fadd(fmul(sitofp(x), 0.0), 2.0)
+
+Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can
+transform the fmul to 0.0, and then the fadd to 2.0.
+
+//===---------------------------------------------------------------------===//
+
+We should enhance memcpy/memcpy/memset to allow a metadata node on them
+indicating that some bytes of the transfer are undefined.  This is useful for
+frontends like clang when lowering struct copies, when some elements of the
+struct are undefined.  Consider something like this:
+
+struct x {
+  char a;
+  int b[4];
+};
+void foo(struct x*P);
+struct x testfunc() {
+  struct x V1, V2;
+  foo(&V1);
+  V2 = V1;
+
+  return V2;
+}
+
+We currently compile this to:
+$ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S
+
+
+%struct.x = type { i8, [4 x i32] }
+
+define void @testfunc(%struct.x* sret %agg.result) nounwind ssp {
+entry:
+  %V1 = alloca %struct.x, align 4
+  call void @foo(%struct.x* %V1)
+  %tmp1 = bitcast %struct.x* %V1 to i8*
+  %0 = bitcast %struct.x* %V1 to i160*
+  %srcval1 = load i160* %0, align 4
+  %tmp2 = bitcast %struct.x* %agg.result to i8*
+  %1 = bitcast %struct.x* %agg.result to i160*
+  store i160 %srcval1, i160* %1, align 4
+  ret void
+}
+
+This happens because SRoA sees that the temp alloca has is being memcpy'd into
+and out of and it has holes and it has to be conservative.  If we knew about the
+holes, then this could be much much better.
+
+Having information about these holes would also improve memcpy (etc) lowering at
+llc time when it gets inlined, because we can use smaller transfers.  This also
+avoids partial register stalls in some important cases.
+
+//===---------------------------------------------------------------------===//
+
+We don't fold (icmp (add) (add)) unless the two adds only have a single use.
+There are a lot of cases that we're refusing to fold in (e.g.) 256.bzip2, for
+example:
+
+ %indvar.next90 = add i64 %indvar89, 1     ;; Has 2 uses
+ %tmp96 = add i64 %tmp95, 1                ;; Has 1 use
+ %exitcond97 = icmp eq i64 %indvar.next90, %tmp96
+
+We don't fold this because we don't want to introduce an overlapped live range
+of the ivar.  However if we can make this more aggressive without causing
+performance issues in two ways:
+
+1. If *either* the LHS or RHS has a single use, we can definitely do the
+   transformation.  In the overlapping liverange case we're trading one register
+   use for one fewer operation, which is a reasonable trade.  Before doing this
+   we should verify that the llc output actually shrinks for some benchmarks.
+2. If both ops have multiple uses, we can still fold it if the operations are
+   both sinkable to *after* the icmp (e.g. in a subsequent block) which doesn't
+   increase register pressure.
+
+There are a ton of icmp's we aren't simplifying because of the reg pressure
+concern.  Care is warranted here though because many of these are induction
+variables and other cases that matter a lot to performance, like the above.
+Here's a blob of code that you can drop into the bottom of visitICmp to see some
+missed cases:
+
+  { Value *A, *B, *C, *D;
+    if (match(Op0, m_Add(m_Value(A), m_Value(B))) && 
+        match(Op1, m_Add(m_Value(C), m_Value(D))) &&
+        (A == C || A == D || B == C || B == D)) {
+      errs() << "OP0 = " << *Op0 << "  U=" << Op0->getNumUses() << "\n";
+      errs() << "OP1 = " << *Op1 << "  U=" << Op1->getNumUses() << "\n";
+      errs() << "CMP = " << I << "\n\n";
+    }
+  }
+
+//===---------------------------------------------------------------------===//
+
+
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index da629f6e63fb..000000000000
--- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMSparcAsmPrinter
-  SparcAsmPrinter.cpp
-  )
-add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile
deleted file mode 100644
index fe475389c680..000000000000
--- a/lib/Target/Sparc/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/Sparc/AsmPrinter/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSparcAsmPrinter
-
-# Hack: we need to include 'main' Sparc target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
deleted file mode 100644
index ab948bb37ff2..000000000000
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format SPARC assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Sparc.h"
-#include "SparcInstrInfo.h"
-#include "SparcTargetMachine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class SparcAsmPrinter : public AsmPrinter {
-  public:
-    explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Sparc Assembly Printer";
-    }
-
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
-                         const char *Modifier = 0);
-    void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
-
-    virtual void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
-    static const char *getRegisterName(unsigned RegNo);
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O);
-
-    bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
-    
-    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
-                       const;
-  };
-} // end of anonymous namespace
-
-#include "SparcGenAsmWriter.inc"
-
-void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand (opNum);
-  bool CloseParen = false;
-  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
-    O << "%hi(";
-    CloseParen = true;
-  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
-             !MO.isReg() && !MO.isImm()) {
-    O << "%lo(";
-    CloseParen = true;
-  }
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    O << "%" << LowercaseString(getRegisterName(MO.getReg()));
-    break;
-
-  case MachineOperand::MO_Immediate:
-    O << (int)MO.getImm();
-    break;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    break;
-  case MachineOperand::MO_ExternalSymbol:
-    O << MO.getSymbolName();
-    break;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
-      << MO.getIndex();
-    break;
-  default:
-    llvm_unreachable("<unknown operand type>");
-  }
-  if (CloseParen) O << ")";
-}
-
-void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
-                                      raw_ostream &O, const char *Modifier) {
-  printOperand(MI, opNum, O);
-
-  // If this is an ADD operand, emit it like normal operands.
-  if (Modifier && !strcmp(Modifier, "arith")) {
-    O << ", ";
-    printOperand(MI, opNum+1, O);
-    return;
-  }
-
-  if (MI->getOperand(opNum+1).isReg() &&
-      MI->getOperand(opNum+1).getReg() == SP::G0)
-    return;   // don't print "+%g0"
-  if (MI->getOperand(opNum+1).isImm() &&
-      MI->getOperand(opNum+1).getImm() == 0)
-    return;   // don't print "+0"
-
-  O << "+";
-  if (MI->getOperand(opNum+1).isGlobal() ||
-      MI->getOperand(opNum+1).isCPI()) {
-    O << "%lo(";
-    printOperand(MI, opNum+1, O);
-    O << ")";
-  } else {
-    printOperand(MI, opNum+1, O);
-  }
-}
-
-bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
-                                  raw_ostream &O) {
-  std::string operand = "";
-  const MachineOperand &MO = MI->getOperand(opNum);
-  switch (MO.getType()) {
-  default: assert(0 && "Operand is not a register ");
-  case MachineOperand::MO_Register:
-    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-           "Operand is not a physical register ");
-    operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
-    break;
-  }
-
-  unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
-  unsigned bbNum = MI->getParent()->getNumber();
-
-  O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
-  O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
-
-  O << "\t  sethi\t"
-    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "  
-    << operand << '\n' ;
-
-  O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
-  O << "\tor\t" << operand  
-    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
-    << ")), " << operand << '\n';
-  O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
-  
-  return true;
-}
-
-void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
-                                     raw_ostream &O) {
-  int CC = (int)MI->getOperand(opNum).getImm();
-  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,
-                                      const char *ExtraCode,
-                                      raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default: return true;  // Unknown modifier.
-    case 'r':
-     break;
-    }
-  }
-
-  printOperand(MI, OpNo, O);
-
-  return false;
-}
-
-bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true;  // Unknown modifier
-
-  O << '[';
-  printMemOperand(MI, OpNo, O);
-  O << ']';
-
-  return false;
-}
-
-/// isBlockOnlyReachableByFallthough - Return true if the basic block has
-/// exactly one predecessor and the control transfer mechanism between
-/// the predecessor and this block is a fall-through.
-///
-/// This overrides AsmPrinter's implementation to handle delay slots.
-bool SparcAsmPrinter::
-isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
-  // If this is a landing pad, it isn't a fall through.  If it has no preds,
-  // then nothing falls through to it.
-  if (MBB->isLandingPad() || MBB->pred_empty())
-    return false;
-  
-  // If there isn't exactly one predecessor, it can't be a fall through.
-  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
-  ++PI2;
-  if (PI2 != MBB->pred_end())
-    return false;
-  
-  // The predecessor has to be immediately before this block.
-  const MachineBasicBlock *Pred = *PI;
-  
-  if (!Pred->isLayoutSuccessor(MBB))
-    return false;
-  
-  // Check if the last terminator is an unconditional branch.
-  MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
-    ; // Noop
-  return I == Pred->end() || !I->getDesc().isBarrier();
-}
-
-
-
-// Force static initialization.
-extern "C" void LLVMInitializeSparcAsmPrinter() { 
-  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
-  RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
-}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 684cadfb57f7..6839234a4700 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -13,9 +13,11 @@ tablegen(SparcGenCallingConv.inc -gen-callingconv)
 add_llvm_target(SparcCodeGen
   DelaySlotFiller.cpp
   FPMover.cpp
+  SparcAsmPrinter.cpp
   SparcInstrInfo.cpp
   SparcISelDAGToDAG.cpp
   SparcISelLowering.cpp
+  SparcFrameLowering.cpp
   SparcMCAsmInfo.cpp
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
@@ -23,4 +25,4 @@ add_llvm_target(SparcCodeGen
   SparcSelectionDAGInfo.cpp
   )
 
-target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index aae5da856005..ee292758d186 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -7,21 +7,32 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This is a simple local pass that fills delay slots with NOPs.
-//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "delayslotfiller"
+#define DEBUG_TYPE "delay-slot-filler"
 #include "Sparc.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+
 using namespace llvm;
 
 STATISTIC(FilledSlots, "Number of delay slots filled");
 
+static cl::opt<bool> DisableDelaySlotFiller(
+  "disable-sparc-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the Sparc delay slot filler."),
+  cl::Hidden);
+
 namespace {
   struct Filler : public MachineFunctionPass {
     /// Target machine description which we query for reg. names, data
@@ -47,6 +58,28 @@ namespace {
       return Changed;
     }
 
+    bool isDelayFiller(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator candidate);
+
+    void insertCallUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    void insertDefsUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegDefs,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+                    unsigned Reg);
+
+    bool delayHasHazard(MachineBasicBlock::iterator candidate,
+                        bool &sawLoad, bool &sawStore,
+                        SmallSet<unsigned, 32> &RegDefs,
+                        SmallSet<unsigned, 32> &RegUses);
+
+    MachineBasicBlock::iterator
+    findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+
   };
   char Filler::ID = 0;
 } // end of anonymous namespace
@@ -59,18 +92,201 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
 }
 
 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
-/// Currently, we fill delay slots with NOPs. We assume there is only one
-/// delay slot per delayed instruction.
+/// We assume there is only one delay slot per delayed instruction.
 ///
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
+
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
     if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
-      ++J;
-      BuildMI(MBB, J, DebugLoc(), TII->get(SP::NOP));
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB, I);
+
       ++FilledSlots;
       Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
     }
   return Changed;
 }
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator slot)
+{
+  SmallSet<unsigned, 32> RegDefs;
+  SmallSet<unsigned, 32> RegUses;
+  bool sawLoad = false;
+  bool sawStore = false;
+
+  MachineBasicBlock::iterator I = slot;
+
+  if (slot->getOpcode() == SP::RET)
+    return MBB.end();
+
+  if (slot->getOpcode() == SP::RETL) {
+    --I;
+    if (I->getOpcode() != SP::RESTORErr)
+      return MBB.end();
+    //change retl to ret
+    slot->setDesc(TII->get(SP::RET));
+    return I;
+  }
+
+  //Call's delay filler can def some of call's uses.
+  if (slot->getDesc().isCall())
+    insertCallUses(slot, RegUses);
+  else
+    insertDefsUses(slot, RegDefs, RegUses);
+
+  bool done = false;
+
+  while (!done) {
+    done = (I == MBB.begin());
+
+    if (!done)
+      --I;
+
+    // skip debug value
+    if (I->isDebugValue())
+      continue;
+
+
+    if (I->hasUnmodeledSideEffects()
+        || I->isInlineAsm()
+        || I->isLabel()
+        || I->getDesc().hasDelaySlot()
+        || isDelayFiller(MBB, I))
+      break;
+
+    if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+      insertDefsUses(I, RegDefs, RegUses);
+      continue;
+    }
+
+    return I;
+  }
+  return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+                            bool &sawLoad,
+                            bool &sawStore,
+                            SmallSet<unsigned, 32> &RegDefs,
+                            SmallSet<unsigned, 32> &RegUses)
+{
+
+  if (candidate->isImplicitDef() || candidate->isKill())
+    return true;
+
+  if (candidate->getDesc().mayLoad()) {
+    sawLoad = true;
+    if (sawStore)
+      return true;
+  }
+
+  if (candidate->getDesc().mayStore()) {
+    if (sawStore)
+      return true;
+    sawStore = true;
+    if (sawLoad)
+      return true;
+  }
+
+  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+    const MachineOperand &MO = candidate->getOperand(i);
+    if (!MO.isReg())
+      continue; // skip
+
+    unsigned Reg = MO.getReg();
+
+    if (MO.isDef()) {
+      //check whether Reg is defined or used before delay slot.
+      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+        return true;
+    }
+    if (MO.isUse()) {
+      //check whether Reg is defined before delay slot.
+      if (IsRegInSet(RegDefs, Reg))
+        return true;
+    }
+  }
+  return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+
+  switch(MI->getOpcode()) {
+  default: llvm_unreachable("Unknown opcode.");
+  case SP::CALL: break;
+  case SP::JMPLrr:
+  case SP::JMPLri:
+    assert(MI->getNumOperands() >= 2);
+    const MachineOperand &Reg = MI->getOperand(0);
+    assert(Reg.isReg() && "JMPL first operand is not a register.");
+    assert(Reg.isUse() && "JMPL first operand is not a use.");
+    RegUses.insert(Reg.getReg());
+
+    const MachineOperand &RegOrImm = MI->getOperand(1);
+    if (RegOrImm.isImm())
+        break;
+    assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+    assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+    RegUses.insert(RegOrImm.getReg());
+    break;
+  }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegDefs,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    if (MO.isDef())
+      RegDefs.insert(Reg);
+    if (MO.isUse())
+      RegUses.insert(Reg);
+
+  }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+  if (RegSet.count(Reg))
+    return true;
+  // check Aliased Registers
+  for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+       *Alias; ++ Alias)
+    if (RegSet.count(*Alias))
+      return true;
+
+  return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator candidate)
+{
+  if (candidate == MBB.begin())
+    return false;
+  const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
+  return prevdesc.hasDelaySlot();
+}
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index e4078487e167..27942c56fb3a 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -17,7 +17,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
                 SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
                 SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 000000000000..edde8427aa89
--- /dev/null
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,251 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SparcAsmPrinter : public AsmPrinter {
+  public:
+    explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Sparc Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                         const char *Modifier = 0);
+    void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+
+    virtual void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+    bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
+    
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+                       const;
+  };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand (opNum);
+  bool CloseParen = false;
+  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+    O << "%hi(";
+    CloseParen = true;
+  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+             !MO.isReg() && !MO.isImm()) {
+    O << "%lo(";
+    CloseParen = true;
+  }
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << (int)MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+  if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, O);
+
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    O << ", ";
+    printOperand(MI, opNum+1, O);
+    return;
+  }
+
+  if (MI->getOperand(opNum+1).isReg() &&
+      MI->getOperand(opNum+1).getReg() == SP::G0)
+    return;   // don't print "+%g0"
+  if (MI->getOperand(opNum+1).isImm() &&
+      MI->getOperand(opNum+1).getImm() == 0)
+    return;   // don't print "+0"
+
+  O << "+";
+  if (MI->getOperand(opNum+1).isGlobal() ||
+      MI->getOperand(opNum+1).isCPI()) {
+    O << "%lo(";
+    printOperand(MI, opNum+1, O);
+    O << ")";
+  } else {
+    printOperand(MI, opNum+1, O);
+  }
+}
+
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
+                                  raw_ostream &O) {
+  std::string operand = "";
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Operand is not a register ");
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Operand is not a physical register ");
+    assert(MO.getReg() != SP::O7 && 
+           "%o7 is assigned as destination for getpcx!");
+    operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+    break;
+  }
+
+  unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
+  unsigned bbNum = MI->getParent()->getNumber();
+
+  O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+  O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
+
+  O << "\t  sethi\t"
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum 
+    << ")), "  << operand << '\n' ;
+
+  O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
+  O << "\tor\t" << operand  
+    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+    << ")), " << operand << '\n';
+  O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
+  
+  return true;
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
+                                     raw_ostream &O) {
+  int CC = (int)MI->getOperand(opNum).getImm();
+  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,
+                                      const char *ExtraCode,
+                                      raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+     break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printMemOperand(MI, OpNo, O);
+  O << ']';
+
+  return false;
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+  
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() { 
+  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+  RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+}
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index 33ecfdf5f750..856f87ad1d37 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -24,9 +24,13 @@ def RetCC_Sparc32 : CallingConv<[
 
 // Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
-  // All arguments get passed in integer registers if there is space.
-  CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  
+  //Custom assign SRet to [sp+64].
+  CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+  // i32 f32 arguments get passed in integer registers if there is space.
+  CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  // f64 arguments are split and passed through registers or through stack.
+  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
   // Alternatively, they are assigned to the stack in 4-byte aligned units.
   CCAssignToStack<4, 4>
 ]>;
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 000000000000..320c8ca26d7e
--- /dev/null
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  int NumBytes = (int) MFI->getStackSize();
+
+  // Emit the correct save instruction based on the number of bytes in
+  // the frame. Minimum stack frame size according to V8 ABI is:
+  //   16 words for register window spill
+  //    1 word for address of returned aggregate-value
+  // +  6 words for passing parameters on the stack
+  // ----------
+  //   23 words * 4 bytes per word = 92 bytes
+  NumBytes += 92;
+
+  // Round up to next doubleword boundary -- a double-word boundary
+  // is required by the ABI.
+  NumBytes = (NumBytes + 7) & ~7;
+  NumBytes = -NumBytes;
+
+  if (NumBytes >= -4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+  } else {
+    // Emit this the hard way.  This clobbers G1 which we always know is
+    // available here.
+    unsigned OffHi = (unsigned)NumBytes >> 10U;
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+  }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+  assert(MBBI->getOpcode() == SP::RETL &&
+         "Can only put epilog before 'retl' instruction!");
+  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+    .addReg(SP::G0);
+}
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 000000000000..9a2ddc83f5aa
--- /dev/null
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &STI;
+public:
+  explicit SparcFrameLowering(const SparcSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 4ea94c4cb560..8c6103dd8a39 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -44,9 +44,8 @@ public:
   SDNode *Select(SDNode *N);
 
   // Complex Pattern Selectors.
-  bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2);
-  bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base,
-                    SDValue &Offset);
+  bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
@@ -71,7 +70,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -112,8 +111,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
   return true;
 }
 
-bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
-                                     SDValue &R1,  SDValue &R2) {
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   if (Addr.getOpcode() == ISD::FrameIndex) return false;
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
       Addr.getOpcode() == ISD::TargetGlobalAddress)
@@ -160,7 +158,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     } else {
       TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
     }
-    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart,
+    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
                                      CurDAG->getRegister(SP::G0, MVT::i32)), 0);
 
     // FIXME: Handle div by immediate.
@@ -174,7 +172,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     SDValue MulLHS = N->getOperand(0);
     SDValue MulRHS = N->getOperand(1);
     unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
-    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
                                          MulLHS, MulRHS);
     // The high part is in the Y register.
     return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
@@ -196,8 +194,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-   if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1))
-     SelectADDRri(Op.getNode(), Op, Op0, Op1);
+   if (!SelectADDRrr(Op, Op0, Op1))
+     SelectADDRri(Op, Op0, Op1);
    break;
   }
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4099a628773f..196b87dd58d0 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,3 +1,4 @@
+
 //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -32,6 +33,47 @@ using namespace llvm;
 // Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+                                 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                 ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  assert (ArgFlags.isSRet());
+
+  //Assign SRet argument
+  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                         0,
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+                                MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  static const unsigned RegList[] = {
+    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+  };
+  //Try to get first reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  } else {
+    //Assign whole thing in stack
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8,4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  //Try to get second reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4,4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
 #include "SparcGenCallingConv.inc"
 
 SDValue
@@ -41,6 +83,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
                                  const SmallVectorImpl<SDValue> &OutVals,
                                  DebugLoc dl, SelectionDAG &DAG) const {
 
+  MachineFunction &MF = DAG.getMachineFunction();
+
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
@@ -53,10 +97,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+  if (MF.getRegInfo().liveout_empty()) {
     for (unsigned i = 0; i != RVLocs.size(); ++i)
       if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+        MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
   SDValue Flag;
@@ -66,12 +110,24 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
   }
+  // If the function returns a struct, copy the SRetReturnReg to I0
+  if (MF.getFunction()->hasStructRetAttr()) {
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg)
+      llvm_unreachable("sret virtual register not created in the entry block");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    Flag = Chain.getValue(1);
+    if (MF.getRegInfo().liveout_empty())
+      MF.getRegInfo().addLiveOut(SP::I0);
+  }
 
   if (Flag.getNode())
     return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
@@ -100,135 +156,159 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                  ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
 
-  static const unsigned ArgRegs[] = {
-    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
-  };
-  const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
-  unsigned ArgOffset = 68;
+  const unsigned StackOffset = 92;
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    SDValue ArgValue;
     CCValAssign &VA = ArgLocs[i];
-    // FIXME: We ignore the register assignments of AnalyzeFormalArguments
-    // because it doesn't know how to split a double into two i32 registers.
-    EVT ObjectVT = VA.getValVT();
-    switch (ObjectVT.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unhandled argument type!");
-    case MVT::i1:
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-      if (!Ins[i].Used) {                  // Argument is dead.
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        InVals.push_back(DAG.getUNDEF(ObjectVT));
-      } else if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-        unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-        MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
-        SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-        if (ObjectVT != MVT::i32) {
-          unsigned AssertOp = ISD::AssertSext;
-          Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg,
-                            DAG.getValueType(ObjectVT));
-          Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg);
-        }
-        InVals.push_back(Arg);
-      } else {
-        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                            true);
-        SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load;
-        if (ObjectVT == MVT::i32) {
-          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
-                             false, false, 0);
-        } else {
-          ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
-
-          // Sparc is big endian, so add an offset based on the ObjectVT.
-          unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
-          FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
-                              DAG.getConstant(Offset, MVT::i32));
-          Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr,
-                                NULL, 0, ObjectVT, false, false, 0);
-          Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
-        }
-        InVals.push_back(Load);
-      }
 
-      ArgOffset += 4;
-      break;
-    case MVT::f32:
-      if (!Ins[i].Used) {                  // Argument is dead.
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        InVals.push_back(DAG.getUNDEF(ObjectVT));
-      } else if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-        // FP value is passed in an integer register.
-        unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-        MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
-        SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-
-        Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
-        InVals.push_back(Arg);
-      } else {
-        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                            true);
-        SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
-                                   false, false, 0);
-        InVals.push_back(Load);
-      }
-      ArgOffset += 4;
-      break;
+    if (i == 0  && Ins[i].Flags.isSRet()) {
+      //Get SRet from [%fp+64]
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+      SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                MachinePointerInfo(),
+                                false, false, 0);
+      InVals.push_back(Arg);
+      continue;
+    }
 
-    case MVT::i64:
-    case MVT::f64:
-      if (!Ins[i].Used) {                // Argument is dead.
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        InVals.push_back(DAG.getUNDEF(ObjectVT));
-      } else {
-        SDValue HiVal;
-        if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-          unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-          MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
-          HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
-        } else {
-          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                              true);
-          SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
-                              false, false, 0);
-        }
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+
+      if (VA.needsCustom()) {
+        assert(VA.getLocVT() == MVT::f64);
+        unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+        MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+        SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+        assert(i+1 < e);
+        CCValAssign &NextVA = ArgLocs[++i];
 
         SDValue LoVal;
-        if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-          unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-          MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
-          LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
-        } else {
-          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
-                                                              true);
+        if (NextVA.isMemLoc()) {
+          int FrameIdx = MF.getFrameInfo()->
+            CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                              MachinePointerInfo(),
                               false, false, 0);
+        } else {
+          unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+                                        &SP::IntRegsRegClass, dl);
+          LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
         }
-
-        // Compose the two halves together into an i64 unit.
         SDValue WholeValue =
           DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+        WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+        InVals.push_back(WholeValue);
+        continue;
+      }
+      unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+      SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+      if (VA.getLocVT() == MVT::f32)
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+      else if (VA.getLocVT() != MVT::i32) {
+        Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+                          DAG.getValueType(VA.getLocVT()));
+        Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+      }
+      InVals.push_back(Arg);
+      continue;
+    }
 
-        // If we want a double, do a bit convert.
-        if (ObjectVT == MVT::f64)
-          WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue);
+    assert(VA.isMemLoc());
 
-        InVals.push_back(WholeValue);
+    unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::f64);
+      //If it is double-word aligned, just load.
+      if (Offset % 8 == 0) {
+        int FI = MF.getFrameInfo()->CreateFixedObject(8,
+                                                      Offset,
+                                                      true);
+        SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+        SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                                   MachinePointerInfo(),
+                                   false,false, 0);
+        InVals.push_back(Load);
+        continue;
       }
-      ArgOffset += 8;
-      break;
+
+      int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                    Offset,
+                                                    true);
+      SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+      int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+                                                     Offset+4,
+                                                     true);
+      SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+      SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+
+      SDValue WholeValue =
+        DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+      WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+      InVals.push_back(WholeValue);
+      continue;
+    }
+
+    int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                  Offset,
+                                                  true);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue Load ;
+    if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+      Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                         MachinePointerInfo(),
+                         false, false, 0);
+    } else {
+      ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+      // Sparc is big endian, so add an offset based on the ObjectVT.
+      unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+      FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+                          DAG.getConstant(Offset, MVT::i32));
+      Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+                            MachinePointerInfo(),
+                            VA.getValVT(), false, false,0);
+      Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
     }
+    InVals.push_back(Load);
+  }
+
+  if (MF.getFunction()->hasStructRetAttr()) {
+    //Copy the SRet Argument to SRetReturnReg
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+      SFI->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   }
 
   // Store remaining ArgRegs to the stack if this is a varargs function.
   if (isVarArg) {
+    static const unsigned ArgRegs[] = {
+      SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+    };
+    unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+    const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+    unsigned ArgOffset = CCInfo.getNextStackOffset();
+    if (NumAllocated == 6)
+      ArgOffset += StackOffset;
+    else {
+      assert(!ArgOffset);
+      ArgOffset = 68+4*NumAllocated;
+    }
+
     // Remember the vararg offset for the va_start implementation.
     FuncInfo->setVarArgsFrameOffset(ArgOffset);
 
@@ -243,7 +323,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                                                           true);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
-      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+                                       MachinePointerInfo(),
                                        false, false, 0));
       ArgOffset += 4;
     }
@@ -270,191 +351,180 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // Sparc target does not yet support tail call optimization.
   isTailCall = false;
 
-#if 0
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
+                 *DAG.getContext());
   CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
 
   // Get the size of the outgoing arguments stack space requirement.
   unsigned ArgsSize = CCInfo.getNextStackOffset();
-  // FIXME: We can't use this until f64 is known to take two GPRs.
-#else
-  (void)CC_Sparc32;
-
-  // Count the size of the outgoing arguments.
-  unsigned ArgsSize = 0;
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    switch (Outs[i].VT.getSimpleVT().SimpleTy) {
-      default: llvm_unreachable("Unknown value type!");
-      case MVT::i1:
-      case MVT::i8:
-      case MVT::i16:
-      case MVT::i32:
-      case MVT::f32:
-        ArgsSize += 4;
-        break;
-      case MVT::i64:
-      case MVT::f64:
-        ArgsSize += 8;
-        break;
-    }
-  }
-  if (ArgsSize > 4*6)
-    ArgsSize -= 4*6;    // Space for first 6 arguments is prereserved.
-  else
-    ArgsSize = 0;
-#endif
 
   // Keep stack frames 8-byte aligned.
   ArgsSize = (ArgsSize+7) & ~7;
 
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+  //Create local copies for byval args.
+  SmallVector<SDValue, 8> ByValArgs;
+  for (unsigned i = 0,  e = Outs.size(); i != e; ++i) {
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    if (!Flags.isByVal())
+      continue;
+
+    SDValue Arg = OutVals[i];
+    unsigned Size = Flags.getByValSize();
+    unsigned Align = Flags.getByValAlign();
+
+    int FI = MFI->CreateStackObject(Size, Align, false);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+    Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+                          false,        //isVolatile,
+                          (Size <= 32), //AlwaysInline if size <= 32
+                          MachinePointerInfo(), MachinePointerInfo());
+    ByValArgs.push_back(FIPtr);
+  }
+
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
 
-#if 0
+  const unsigned StackOffset = 92;
   // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+  for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = OutVals[i];
+    SDValue Arg = OutVals[realArgIdx];
+
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+    //Use local copy if it is a byval arg.
+    if (Flags.isByVal())
+      Arg = ByValArgs[byvalArgIdx++];
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
       break;
     }
 
-    // Arguments that can be passed on register must be kept at
-    // RegsToPass vector
-    if (VA.isRegLoc()) {
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    if (Flags.isSRet()) {
+      assert(VA.needsCustom());
+      // store SRet argument in %sp+64
+      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+      SDValue PtrOff = DAG.getIntPtrConstant(64);
+      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
       continue;
     }
 
-    assert(VA.isMemLoc());
-
-    // Create a store off the stack pointer for this argument.
-    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-    // FIXME: VERIFY THAT 68 IS RIGHT.
-    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
-    PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
-    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
-                                       false, false, 0));
-  }
-
-#else
-  static const unsigned ArgRegs[] = {
-    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
-  };
-  unsigned ArgOffset = 68;
-
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    SDValue Val = OutVals[i];
-    EVT ObjectVT = Outs[i].VT;
-    SDValue ValToStore(0, 0);
-    unsigned ObjSize;
-    switch (ObjectVT.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unhandled argument type!");
-    case MVT::i32:
-      ObjSize = 4;
-
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;
-      } else {
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
-      }
-      break;
-    case MVT::f32:
-      ObjSize = 4;
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;
-      } else {
-        // Convert this to a FP value in an int reg.
-        Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val);
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
-      }
-      break;
-    case MVT::f64: {
-      ObjSize = 8;
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;    // Whole thing is passed in memory.
-        break;
+    if (VA.needsCustom()) {
+      assert(VA.getLocVT() == MVT::f64);
+
+      if (VA.isMemLoc()) {
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        //if it is double-word aligned, just store.
+        if (Offset % 8 == 0) {
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+          continue;
+        }
       }
 
-      // Break into top and bottom parts by storing to the stack and loading
-      // out the parts as integers.  Top part goes in a reg.
       SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
-      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 
-                                   Val, StackPtr, NULL, 0,
+      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+                                   Arg, StackPtr, MachinePointerInfo(),
                                    false, false, 0);
       // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
-                               false, false, 0);
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                              DAG.getIntPtrConstant(4));
       // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
-                               false, false, 0);
-
-      RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
-
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Lo;
-        ArgOffset += 4;
-        ObjSize = 4;
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+      if (VA.isRegLoc()) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+        assert(i+1 != e);
+        CCValAssign &NextVA = ArgLocs[++i];
+        if (NextVA.isRegLoc()) {
+          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+        } else {
+          //Store the low part in stack.
+          unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+        }
       } else {
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        // Store the high part.
+        SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+        // Store the low part.
+        PtrOff = DAG.getIntPtrConstant(Offset+4);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
       }
-      break;
+      continue;
     }
-    case MVT::i64: {
-      ObjSize = 8;
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;    // Whole thing is passed in memory.
-        break;
-      }
 
-      // Split the value into top and bottom part.  Top part goes in a reg.
-      SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
-                                 DAG.getConstant(1, MVT::i32));
-      SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
-                                 DAG.getConstant(0, MVT::i32));
-      RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
-
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Lo;
-        ArgOffset += 4;
-        ObjSize = 4;
-      } else {
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      if (VA.getLocVT() != MVT::f32) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+        continue;
       }
-      break;
-    }
+      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
     }
 
-    if (ValToStore.getNode()) {
-      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-      SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
-      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-      MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, 
-                                         PtrOff, NULL, 0,
-                                         false, false, 0));
-    }
-    ArgOffset += ObjSize;
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+    PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
   }
-#endif
+
 
   // Emit all stores, make sure the occur before any copies into physregs.
   if (!MemOpChains.empty())
@@ -484,11 +554,22 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
 
-  std::vector<EVT> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
-  SDValue Ops[] = { Chain, Callee, InFlag };
-  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2);
+  // Returns a chain & a flag for retval copy to use
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    unsigned Reg = RegsToPass[i].first;
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+  }
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
   InFlag = Chain.getValue(1);
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
@@ -610,8 +691,8 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 
-  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
-  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 
   // Sparc has no select or setcc: expand to SELECT_CC.
   setOperationAction(ISD::SELECT, MVT::i32, Expand);
@@ -701,6 +782,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::ITOF:       return "SPISD::ITOF";
   case SPISD::CALL:       return "SPISD::CALL";
   case SPISD::RET_FLAG:   return "SPISD::RET_FLAG";
+  case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+  case SPISD::FLUSHW:     return "SPISD::FLUSHW";
   }
 }
 
@@ -756,7 +839,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
-SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, 
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   // FIXME there isn't really any debug info here
@@ -765,16 +848,16 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
     return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  
+
   SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
                                    getPointerTy());
   SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, 
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0, false, false, 0);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -786,16 +869,16 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
     return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
 
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, 
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
                                    getPointerTy());
   SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0, false, false, 0);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -803,13 +886,13 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
   // Convert the fp value to integer in an FP register.
   assert(Op.getValueType() == MVT::i32);
   Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 }
 
 static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   assert(Op.getOperand(0).getValueType() == MVT::i32);
-  SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+  SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   // Convert the int value to FP in an FP register.
   return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
 }
@@ -832,13 +915,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   if (LHS.getValueType() == MVT::i32) {
     std::vector<EVT> VTs;
     VTs.push_back(MVT::i32);
-    VTs.push_back(MVT::Flag);
+    VTs.push_back(MVT::Glue);
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
     Opc = SPISD::BRICC;
   } else {
-    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
     Opc = SPISD::BRFCC;
   }
@@ -863,13 +946,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   if (LHS.getValueType() == MVT::i32) {
     std::vector<EVT> VTs;
     VTs.push_back(LHS.getValueType());   // subcc returns a value
-    VTs.push_back(MVT::Flag);
+    VTs.push_back(MVT::Glue);
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
     Opc = SPISD::SELECT_ICC;
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
   } else {
-    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     Opc = SPISD::SELECT_FCC;
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
   }
@@ -891,8 +974,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                 DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
                                 MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
-                      false, false, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
 }
 
 static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -902,27 +985,28 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
-                               false, false, 0);
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+                               MachinePointerInfo(SV), false, false, 0);
   // Increment the pointer, VAList, to the next vaarg
   SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                   DAG.getConstant(VT.getSizeInBits()/8,
                                                   MVT::i32));
   // Store the incremented VAList to the legalized pointer
   InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
-                         VAListPtr, SV, 0, false, false, 0);
+                         VAListPtr, MachinePointerInfo(SV), false, false, 0);
   // Load the actual argument out of the pointer VAList, unless this is an
   // f64 load.
   if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
+    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+                       false, false, 0);
 
   // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
                           false, false, 0);
 
   // Bit-Convert the value to f64.
   SDValue Ops[2] = {
-    DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V),
+    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
     V.getValue(1)
   };
   return DAG.getMergeValues(Ops, 2, dl);
@@ -947,13 +1031,82 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 }
 
 
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+                              dl, MVT::Other, DAG.getEntryNode());
+  return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned FrameReg = SP::I6;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue FrameAddr;
+  if (depth == 0)
+    FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                FrameAddr, DAG.getIntPtrConstant(56));
+      FrameAddr = DAG.getLoad(MVT::i32, dl,
+                              Chain,
+                              Ptr,
+                              MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned RetReg = SP::I7;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue RetAddr;
+  if (depth == 0)
+    RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                RetAddr,
+                                DAG.getIntPtrConstant((i == depth-1)?60:56));
+      RetAddr = DAG.getLoad(MVT::i32, dl,
+                            Chain,
+                            Ptr,
+                            MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return RetAddr;
+}
+
 SDValue SparcTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
-  // Frame & Return address.  Currently unimplemented
-  case ISD::RETURNADDR: return SDValue();
-  case ISD::FRAMEADDR:  return SDValue();
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   case ISD::GlobalTLSAddress:
     llvm_unreachable("TLS not implemented for Sparc.");
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
@@ -1009,6 +1162,8 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   MachineFunction *F = BB->getParent();
   MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
 
   // Transfer the remainder of BB and its successor edges to sinkMBB.
   sinkMBB->splice(sinkMBB->begin(), BB,
@@ -1021,8 +1176,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   BB->addSuccessor(sinkMBB);
 
   BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
-  F->insert(It, copy0MBB);
-  F->insert(It, sinkMBB);
 
   //  copy0MBB:
   //   %FalseValue = ...
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index db39e083a836..849e4010af6b 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -36,7 +36,8 @@ namespace llvm {
 
       CALL,        // A call instruction.
       RET_FLAG,    // Return with a flag operand.
-      GLOBAL_BASE_REG // Global base reg for PIC
+      GLOBAL_BASE_REG, // Global base reg for PIC
+      FLUSHW       // FLUSH register windows to stack
     };
   }
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 7ede8e7ebbe4..afa3c1f88f96 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -66,15 +66,200 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
+static bool IsIntegerCC(unsigned CC)
+{
+  return  (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+  switch(CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case SPCC::ICC_NE:   return SPCC::ICC_E;
+  case SPCC::ICC_E:    return SPCC::ICC_NE;
+  case SPCC::ICC_G:    return SPCC::ICC_LE;
+  case SPCC::ICC_LE:   return SPCC::ICC_G;
+  case SPCC::ICC_GE:   return SPCC::ICC_L;
+  case SPCC::ICC_L:    return SPCC::ICC_GE;
+  case SPCC::ICC_GU:   return SPCC::ICC_LEU;
+  case SPCC::ICC_LEU:  return SPCC::ICC_GU;
+  case SPCC::ICC_CC:   return SPCC::ICC_CS;
+  case SPCC::ICC_CS:   return SPCC::ICC_CC;
+  case SPCC::ICC_POS:  return SPCC::ICC_NEG;
+  case SPCC::ICC_NEG:  return SPCC::ICC_POS;
+  case SPCC::ICC_VC:   return SPCC::ICC_VS;
+  case SPCC::ICC_VS:   return SPCC::ICC_VC;
+
+  case SPCC::FCC_U:    return SPCC::FCC_O;
+  case SPCC::FCC_O:    return SPCC::FCC_U;
+  case SPCC::FCC_G:    return SPCC::FCC_LE;
+  case SPCC::FCC_LE:   return SPCC::FCC_G;
+  case SPCC::FCC_UG:   return SPCC::FCC_ULE;
+  case SPCC::FCC_ULE:  return SPCC::FCC_UG;
+  case SPCC::FCC_L:    return SPCC::FCC_GE;
+  case SPCC::FCC_GE:   return SPCC::FCC_L;
+  case SPCC::FCC_UL:   return SPCC::FCC_UGE;
+  case SPCC::FCC_UGE:  return SPCC::FCC_UL;
+  case SPCC::FCC_LG:   return SPCC::FCC_UE;
+  case SPCC::FCC_UE:   return SPCC::FCC_LG;
+  case SPCC::FCC_NE:   return SPCC::FCC_E;
+  case SPCC::FCC_E:    return SPCC::FCC_NE;
+  }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const
+{
+
+  MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    //When we see a non-terminator, we are done
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    //Terminator is not a branch
+    if (!I->getDesc().isBranch())
+      return true;
+
+    //Handle Unconditional branches
+    if (I->getOpcode() == SP::BA) {
+      UnCondBrIter = I;
+
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        UnCondBrIter = MBB.end();
+        continue;
+      }
+
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    unsigned Opcode = I->getOpcode();
+    if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+      return true; //Unknown Opcode
+
+    SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+    if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+
+        //Transform the code
+        //
+        //    brCC L1
+        //    ba L2
+        // L1:
+        //    ..
+        // L2:
+        //
+        // into
+        //
+        //   brnCC L2
+        // L1:
+        //   ...
+        // L2:
+        //
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+    //FIXME: Handle subsequent conditional branches
+    //For now, we can't handle multiple conditional branches
+    return true;
+  }
+  return false;
+}
+
 unsigned
 SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              MachineBasicBlock *FBB,
                              const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL)const{
-  // Can only insert uncond branches so far.
-  assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
-  BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
-  return 1;
+                             DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Sparc branch conditions should have one component!");
+
+  if (Cond.empty()) {
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+    return 1;
+  }
+
+  //Conditional branch
+  unsigned CC = Cond[0].getImm();
+
+  if (IsIntegerCC(CC))
+    BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+  else
+    BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+  if (!FBB)
+    return 1;
+
+  BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+  return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    if (I->getOpcode() != SP::BA
+        && I->getOpcode() != SP::BCOND
+        && I->getOpcode() != SP::FBCOND)
+      break; // Not a branch
+
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
 }
 
 void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index c00bd2198765..b2d24f52503b 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -58,8 +58,15 @@ public:
   /// any side effects other than storing to the stack slot.
   virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const;
-  
-  
+
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const ;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 467ed48487ad..107232357b3b 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -95,10 +95,10 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
 def SDTSPITOF :
 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 
-def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>;
-def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>;
-def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
-def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 
 def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
 def SPlo    : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
@@ -106,8 +106,8 @@ def SPlo    : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
 def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
 def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
 
-def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
-def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
 
 //  These are target-independent nodes, but have target-specific formats.
 def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
@@ -115,16 +115,20 @@ def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
 
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-def SDT_SPCall    : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
 def call          : SDNode<"SPISD::CALL", SDT_SPCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
 
 def retflag       : SDNode<"SPISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
+                           [SDNPHasChain]>;
 
 def getPCX        : Operand<i32> {
   let PrintMethod = "printGetPCX";
@@ -204,7 +208,7 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
    : InstSP<outs, ins, asmstr, pattern>;
 
 // GETPCX for PIC
-let Defs = [O7], Uses = [O7] in {
+let Defs = [O7] in {
   def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
 }
 
@@ -217,6 +221,17 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
                             [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
+let hasSideEffects = 1, mayStore = 1 in {
+  let rd = 0, rs1 = 0, rs2 = 0 in
+    def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+                      "flushw",
+                      [(flushw)]>, Requires<[HasV9]>;
+  let rd = 0, rs1 = 1, simm13 = 3 in
+    def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+                   "ta 3",
+                   [(flushw)]>;
+}
+
 // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the 
 // fpmover pass.
 let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
@@ -233,32 +248,39 @@ let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.  This has to handle all
 // permutations of selection between i32/f32/f64 on ICC and FCC.
-let usesCustomInserter = 1 in {   // Expanded after instruction selection.
+  // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in { 
   def SELECT_CC_Int_ICC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
             [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
                                              imm:$Cond))]>;
-  def SELECT_CC_Int_FCC
-   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
-            "; SELECT_CC_Int_FCC PSEUDO!",
-            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
   def SELECT_CC_FP_ICC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_ICC PSEUDO!",
             [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
                                             imm:$Cond))]>;
-  def SELECT_CC_FP_FCC
-   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
-            "; SELECT_CC_FP_FCC PSEUDO!",
-            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+
   def SELECT_CC_DFP_ICC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_ICC PSEUDO!",
             [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
                                              imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+  def SELECT_CC_Int_FCC
+   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_Int_FCC PSEUDO!",
+            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+                                             imm:$Cond))]>;
+
+  def SELECT_CC_FP_FCC
+   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_FP_FCC PSEUDO!",
+            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+                                            imm:$Cond))]>;
   def SELECT_CC_DFP_FCC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_FCC PSEUDO!",
@@ -272,6 +294,9 @@ let usesCustomInserter = 1 in {   // Expanded after instruction selection.
 let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
   let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
     def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+
+  let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in
+    def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>;
 }
 
 // Section B.1 - Load Integer Instructions, p. 90
@@ -436,28 +461,34 @@ def LEA_ADDri   : F3_2<2, 0b000000,
 let Defs = [ICC] in                   
   defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
 
-defm ADDX  : F3_12<"addx", 0b001000, adde>;
+let Uses = [ICC] in
+  defm ADDX  : F3_12<"addx", 0b001000, adde>;
 
 // Section B.15 - Subtract Instructions, p. 110
 defm SUB    : F3_12  <"sub"  , 0b000100, sub>;
-defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
+let Uses = [ICC] in 
+  defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
 
-let Defs = [ICC] in {
+let Defs = [ICC] in 
   defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
 
+let Uses = [ICC], Defs = [ICC] in
   def SUBXCCrr: F3_1<2, 0b011100, 
                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                 "subxcc $b, $c, $dst", []>;
-}
 
-// Section B.18 - Multiply Instructions, p. 113
-defm UMUL : F3_12np<"umul", 0b001010>;
-defm SMUL : F3_12  <"smul", 0b001011, mul>;
 
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+  defm UMUL : F3_12np<"umul", 0b001010>;
+  defm SMUL : F3_12  <"smul", 0b001011, mul>;
+}
 
 // Section B.19 - Divide Instructions, p. 115
-defm UDIV : F3_12np<"udiv", 0b001110>;
-defm SDIV : F3_12np<"sdiv", 0b001111>;
+let Defs = [Y] in {
+  defm UDIV : F3_12np<"udiv", 0b001110>;
+  defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
 
 // Section B.20 - SAVE and RESTORE, p. 117
 defm SAVE    : F3_12np<"save"   , 0b111100>;
@@ -504,11 +535,12 @@ let Uses = [FCC] in
 
 // Section B.24 - Call and Link Instruction, p. 125
 // This is the only Format 1 instruction
-let Uses = [O0, O1, O2, O3, O4, O5],
+let Uses = [O6],
     hasDelaySlot = 1, isCall = 1,
     Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
-    D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in { 
-  def CALL : InstSP<(outs), (ins calltarget:$dst),
+    D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+        ICC, FCC, Y] in {
+  def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
                     "call $dst", []> {
     bits<30> disp;
     let op = 1;
@@ -517,28 +549,30 @@ let Uses = [O0, O1, O2, O3, O4, O5],
   
   // indirect calls
   def JMPLrr : F3_1<2, 0b111000,
-                    (outs), (ins MEMrr:$ptr),
+                    (outs), (ins MEMrr:$ptr, variable_ops),
                     "call $ptr",
                     [(call ADDRrr:$ptr)]>;
   def JMPLri : F3_2<2, 0b111000,
-                    (outs), (ins MEMri:$ptr),
+                    (outs), (ins MEMri:$ptr, variable_ops),
                     "call $ptr",
                     [(call ADDRri:$ptr)]>;
 }
 
 // Section B.28 - Read State Register Instructions
-def RDY : F3_1<2, 0b101000,
-               (outs IntRegs:$dst), (ins),
-               "rd %y, $dst", []>;
+let Uses = [Y] in 
+  def RDY : F3_1<2, 0b101000,
+                 (outs IntRegs:$dst), (ins),
+                 "rd %y, $dst", []>;
 
 // Section B.29 - Write State Register Instructions
-def WRYrr : F3_1<2, 0b110000,
-                 (outs), (ins IntRegs:$b, IntRegs:$c),
-                 "wr $b, $c, %y", []>;
-def WRYri : F3_2<2, 0b110000,
-                 (outs), (ins IntRegs:$b, i32imm:$c),
-                 "wr $b, $c, %y", []>;
-
+let Defs = [Y] in {
+  def WRYrr : F3_1<2, 0b110000,
+                   (outs), (ins IntRegs:$b, IntRegs:$c),
+                   "wr $b, $c, %y", []>;
+  def WRYri : F3_2<2, 0b110000,
+                   (outs), (ins IntRegs:$b, i32imm:$c),
+                   "wr $b, $c, %y", []>;
+}
 // Convert Integer to Floating-point Instructions, p. 141
 def FITOS : F3_3<2, 0b110100, 0b011000100,
                  (outs FPRegs:$dst), (ins FPRegs:$src),
@@ -660,48 +694,57 @@ let Defs = [FCC] in {
 let Predicates = [HasV9], Constraints = "$T = $dst" in {
   // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
   // FIXME: Add instruction encodings for the JIT some day.
-  def MOVICCrr
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
-             "mov$cc %icc, $F, $dst",
-             [(set IntRegs:$dst,
-                         (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
-  def MOVICCri
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
-             "mov$cc %icc, $F, $dst",
-             [(set IntRegs:$dst,
-                          (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
-
-  def MOVFCCrr
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
-             "mov$cc %fcc0, $F, $dst",
-             [(set IntRegs:$dst,
-                         (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
-  def MOVFCCri
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
-             "mov$cc %fcc0, $F, $dst",
-             [(set IntRegs:$dst,
-                          (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
-
-  def FMOVS_ICC
-    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
-             "fmovs$cc %icc, $F, $dst",
-             [(set FPRegs:$dst,
-                         (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
-  def FMOVD_ICC
-    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
-             "fmovd$cc %icc, $F, $dst",
-             [(set DFPRegs:$dst,
-                         (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
-  def FMOVS_FCC
-    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
-             "fmovs$cc %fcc0, $F, $dst",
-             [(set FPRegs:$dst,
-                         (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
-  def FMOVD_FCC
-    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
-             "fmovd$cc %fcc0, $F, $dst",
-             [(set DFPRegs:$dst,
-                         (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  let Uses = [ICC] in {
+    def MOVICCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVICCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def MOVFCCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVFCCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [ICC] in {
+    def FMOVS_ICC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %icc, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_ICC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %icc, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def FMOVS_FCC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %fcc0, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_FCC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %fcc0, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
 
 }
 
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index e34c1312810c..0b74308eb0ee 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -24,16 +24,23 @@ namespace llvm {
     /// VarArgsFrameOffset - Frame offset to start of varargs area.
     int VarArgsFrameOffset;
 
+    /// SRetReturnReg - Holds the virtual register into which the sret
+    /// argument is passed.
+    unsigned SRetReturnReg;
   public:
-    SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+    SparcMachineFunctionInfo()
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
     explicit SparcMachineFunctionInfo(MachineFunction &MF)
-      : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
 
     unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
     void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
 
     int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
     void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+    unsigned getSRetReturnReg() const { return SRetReturnReg; }
+    void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
   };
 }
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index c85db20d2b74..b010d04a27d1 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -52,10 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
-  return false;
-}
-
 void SparcRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
@@ -112,55 +108,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 void SparcRegisterInfo::
 processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
 
-void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Get the number of bytes to allocate from the FrameInfo
-  int NumBytes = (int) MFI->getStackSize();
-
-  // Emit the correct save instruction based on the number of bytes in
-  // the frame. Minimum stack frame size according to V8 ABI is:
-  //   16 words for register window spill
-  //    1 word for address of returned aggregate-value
-  // +  6 words for passing parameters on the stack
-  // ----------
-  //   23 words * 4 bytes per word = 92 bytes
-  NumBytes += 92;
-
-  // Round up to next doubleword boundary -- a double-word boundary
-  // is required by the ABI.
-  NumBytes = (NumBytes + 7) & ~7;
-  NumBytes = -NumBytes;
-  
-  if (NumBytes >= -4096) {
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
-      .addReg(SP::O6).addImm(NumBytes);
-  } else {
-    // Emit this the hard way.  This clobbers G1 which we always know is 
-    // available here.
-    unsigned OffHi = (unsigned)NumBytes >> 10U;
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
-    // Emit G1 = G1 + I6
-    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
-      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
-      .addReg(SP::O6).addReg(SP::G1);
-  }
-}
-
-void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  DebugLoc dl = MBBI->getDebugLoc();
-  assert(MBBI->getOpcode() == SP::RETL &&
-         "Can only put epilog before 'retl' instruction!");
-  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
-    .addReg(SP::G0);
-}
-
 unsigned SparcRegisterInfo::getRARegister() const {
   return SP::I7;
 }
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 020ce567c956..d930b5398f82 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -26,16 +26,14 @@ class Type;
 struct SparcRegisterInfo : public SparcGenRegisterInfo {
   SparcSubtarget &Subtarget;
   const TargetInstrInfo &TII;
-  
+
   SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
 
-  /// Code Generation virtual methods...  
+  /// Code Generation virtual methods...
   const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -45,9 +43,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index fede9299cc79..5ef4daed2fcf 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -45,6 +45,9 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
 def ICC : SparcCtrlReg<"ICC">;
 def FCC : SparcCtrlReg<"FCC">;
 
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
 // Integer registers
 def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
 def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index b58d6baa7601..b84eab568d29 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Sparc.h"
 #include "SparcMCAsmInfo.h"
 #include "SparcTargetMachine.h"
-#include "Sparc.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
@@ -34,8 +34,8 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
-     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+    TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+    FrameLowering(Subtarget) {
 }
 
 bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 322c82afbd57..c4bb6bd776d4 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,13 +14,14 @@
 #ifndef SPARCTARGETMACHINE_H
 #define SPARCTARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "SparcInstrInfo.h"
-#include "SparcSubtarget.h"
 #include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
 #include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
 
@@ -30,13 +31,15 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcTargetLowering TLInfo;
   SparcSelectionDAGInfo TSInfo;
   SparcInstrInfo InstrInfo;
-  TargetFrameInfo FrameInfo;
+  SparcFrameLowering FrameLowering;
 public:
   SparcTargetMachine(const Target &T, const std::string &TT,
                      const std::string &FS, bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
-  virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
   virtual const SparcSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
   virtual const SparcRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index b35190a369ea..3cf95b57c5dc 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cctype>
+#include <cstdlib>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -162,7 +163,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
   
   errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
        << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
-  exit(1);
+  std::exit(1);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt b/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index c6be83a61080..000000000000
--- a/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMSystemZAsmPrinter
-  SystemZAsmPrinter.cpp
-  )
-add_dependencies(LLVMSystemZAsmPrinter SystemZCodeGenTable_gen)
diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile
deleted file mode 100644
index 0f90ed369f77..000000000000
--- a/lib/Target/SystemZ/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZAsmPrinter
-
-# Hack: we need to include 'main' SystemZ target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
deleted file mode 100644
index d7ac8f50b69e..000000000000
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the SystemZ assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "SystemZ.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class SystemZAsmPrinter : public AsmPrinter {
-  public:
-    SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "SystemZ Assembly Printer";
-    }
-
-    void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                      const char* Modifier = 0);
-    void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
-    void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                            const char* Modifier = 0);
-    void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                             const char* Modifier = 0);
-    void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
-      O << (int16_t)MI->getOperand(OpNum).getImm();
-    }
-    void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
-      O << (int32_t)MI->getOperand(OpNum).getImm();
-    }
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    static const char *getRegisterName(unsigned RegNo);
-
-    void EmitInstruction(const MachineInstr *MI);
-  };
-} // end of anonymous namespace
-
-#include "SystemZGenAsmWriter.inc"
-
-void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  printInstruction(MI, OS);
-  OutStreamer.EmitRawText(OS.str());
-}
-
-void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
-                                             raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    return;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_GlobalAddress: {
-    const GlobalValue *GV = MO.getGlobal();
-    O << *Mang->getSymbol(GV);
-
-    // Assemble calls via PLT for externally visible symbols if PIC.
-    if (TM.getRelocationModel() == Reloc::PIC_ &&
-        !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
-        !GV->hasLocalLinkage())
-      O << "@PLT";
-
-    printOffset(MO.getOffset(), O);
-    return;
-  }
-  case MachineOperand::MO_ExternalSymbol: {
-    std::string Name(MAI->getGlobalPrefix());
-    Name += MO.getSymbolName();
-    O << Name;
-
-    if (TM.getRelocationModel() == Reloc::PIC_)
-      O << "@PLT";
-
-    return;
-  }
-  default:
-    assert(0 && "Not implemented yet!");
-  }
-}
-
-
-void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O, const char *Modifier) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register: {
-    assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-            "Virtual registers should be already mapped!");
-    unsigned Reg = MO.getReg();
-    if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
-      if (strncmp(Modifier + 7, "even", 4) == 0)
-        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
-      else if (strncmp(Modifier + 7, "odd", 3) == 0)
-        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
-      else
-        assert(0 && "Invalid subreg modifier");
-    }
-
-    O << '%' << getRegisterName(Reg);
-    return;
-  }
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    return;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
-      << MO.getIndex();
-
-    return;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
-      << MO.getIndex();
-
-    printOffset(MO.getOffset(), O);
-    break;
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    break;
-  case MachineOperand::MO_ExternalSymbol: {
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    break;
-  }
-  default:
-    assert(0 && "Not implemented yet!");
-  }
-
-  switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
-  case SystemZII::MO_NO_FLAG:
-    break;
-  case SystemZII::MO_GOTENT:    O << "@GOTENT";    break;
-  case SystemZII::MO_PLT:       O << "@PLT";       break;
-  }
-
-  printOffset(MO.getOffset(), O);
-}
-
-void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  const MachineOperand &Base = MI->getOperand(OpNum);
-
-  // Print displacement operand.
-  printOperand(MI, OpNum+1, O);
-
-  // Print base operand (if any)
-  if (Base.getReg()) {
-    O << '(';
-    printOperand(MI, OpNum, O);
-    O << ')';
-  }
-}
-
-void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
-                                            raw_ostream &O,
-                                            const char *Modifier) {
-  const MachineOperand &Base = MI->getOperand(OpNum);
-  const MachineOperand &Index = MI->getOperand(OpNum+2);
-
-  // Print displacement operand.
-  printOperand(MI, OpNum+1, O);
-
-  // Print base operand (if any)
-  if (Base.getReg()) {
-    O << '(';
-    printOperand(MI, OpNum, O);
-    if (Index.getReg()) {
-      O << ',';
-      printOperand(MI, OpNum+2, O);
-    }
-    O << ')';
-  } else
-    assert(!Index.getReg() && "Should allocate base register first!");
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmPrinter() {
-  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
-}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 880e56f0525b..1f5d3552ae7e 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -11,9 +11,11 @@ tablegen(SystemZGenCallingConv.inc -gen-callingconv)
 tablegen(SystemZGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(SystemZCodeGen
+  SystemZAsmPrinter.cpp
   SystemZISelDAGToDAG.cpp
   SystemZISelLowering.cpp
   SystemZInstrInfo.cpp
+  SystemZFrameLowering.cpp
   SystemZMCAsmInfo.cpp
   SystemZRegisterInfo.cpp
   SystemZSubtarget.cpp
@@ -21,4 +23,4 @@ add_llvm_target(SystemZCodeGen
   SystemZSelectionDAGInfo.cpp
   )
 
-target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index 5b44090f3f12..6930e14c061e 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -17,7 +17,7 @@ BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \
                 SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \
                 SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 000000000000..fd4d8b70c75e
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,223 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SystemZ assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SystemZAsmPrinter : public AsmPrinter {
+  public:
+    SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "SystemZ Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                      const char* Modifier = 0);
+    void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+    void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                            const char* Modifier = 0);
+    void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                             const char* Modifier = 0);
+    void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (int16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (uint16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (int32_t)MI->getOperand(OpNum).getImm();
+    }
+    void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (uint32_t)MI->getOperand(OpNum).getImm();
+    }
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  printInstruction(MI, OS);
+  OutStreamer.EmitRawText(OS.str());
+}
+
+void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
+                                             raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    O << *Mang->getSymbol(GV);
+
+    // Assemble calls via PLT for externally visible symbols if PIC.
+    if (TM.getRelocationModel() == Reloc::PIC_ &&
+        !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+        !GV->hasLocalLinkage())
+      O << "@PLT";
+
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    O << Name;
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      O << "@PLT";
+
+    return;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+}
+
+
+void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                     raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+            "Virtual registers should be already mapped!");
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
+      if (strncmp(Modifier + 7, "even", 4) == 0)
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
+      else if (strncmp(Modifier + 7, "odd", 3) == 0)
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
+      else
+        assert(0 && "Invalid subreg modifier");
+    }
+
+    O << '%' << getRegisterName(Reg);
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol: {
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case SystemZII::MO_NO_FLAG:
+    break;
+  case SystemZII::MO_GOTENT:    O << "@GOTENT";    break;
+  case SystemZII::MO_PLT:       O << "@PLT";       break;
+  }
+
+  printOffset(MO.getOffset(), O);
+}
+
+void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1, O);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    O << ')';
+  }
+}
+
+void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                            raw_ostream &O,
+                                            const char *Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Index = MI->getOperand(OpNum+2);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1, O);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    if (Index.getReg()) {
+      O << ',';
+      printOperand(MI, OpNum+2, O);
+    }
+    O << ')';
+  } else
+    assert(!Index.getReg() && "Should allocate base register first!");
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 000000000000..2ad84a2d052e
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,386 @@
+//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
+  // Fill the spill offsets map
+  static const unsigned SpillOffsTab[][2] = {
+    { SystemZ::R2D,  0x10 },
+    { SystemZ::R3D,  0x18 },
+    { SystemZ::R4D,  0x20 },
+    { SystemZ::R5D,  0x28 },
+    { SystemZ::R6D,  0x30 },
+    { SystemZ::R7D,  0x38 },
+    { SystemZ::R8D,  0x40 },
+    { SystemZ::R9D,  0x48 },
+    { SystemZ::R10D, 0x50 },
+    { SystemZ::R11D, 0x58 },
+    { SystemZ::R12D, 0x60 },
+    { SystemZ::R13D, 0x68 },
+    { SystemZ::R14D, 0x70 },
+    { SystemZ::R15D, 0x78 }
+  };
+
+  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+  for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+    RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register.  This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  int64_t NumBytes, const TargetInstrInfo &TII) {
+  unsigned Opc; uint64_t Chunk;
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+  if (Offset >= (1LL << 15) - 1) {
+    Opc = SystemZ::ADD64ri32;
+    Chunk = (1LL << 31) - 1;
+  } else {
+    Opc = SystemZ::ADD64ri16;
+    Chunk = (1LL << 15) - 1;
+  }
+
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
+    // The PSW implicit def is dead.
+    MI->getOperand(3).setIsDead();
+    Offset -= ThisVal;
+  }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZInstrInfo &TII =
+    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize = MFI->getStackSize();
+  StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == SystemZ::MOV64mr ||
+          MBBI->getOpcode() == SystemZ::MOV64mrm))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  // adjust stack pointer: R15 -= numbytes
+  if (StackSize || MFI->hasCalls()) {
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+  }
+
+  if (hasFP(MF)) {
+    // Update R11 with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+      .addReg(SystemZ::R15D);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(SystemZ::R11D);
+
+  }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SystemZInstrInfo &TII =
+    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  switch (RetOpcode) {
+  case SystemZ::RET: break;  // These are ok
+  default:
+    assert(0 && "Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize =
+    MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+  // Skip the final terminator instruction.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    --MBBI;
+    if (!PI->getDesc().isTerminator())
+      break;
+  }
+
+  // During callee-saved restores emission stack frame was not yet finialized
+  // (and thus - the stack size was unknown). Tune the offset having full stack
+  // size in hands.
+  if (StackSize || MFI->hasCalls()) {
+    assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+            MBBI->getOpcode() == SystemZ::MOV64rm) &&
+           "Expected to see callee-save register restore code");
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+
+    unsigned i = 0;
+    MachineInstr &MI = *MBBI;
+    while (!MI.getOperand(i).isImm()) {
+      ++i;
+      assert(i < MI.getNumOperands() && "Unexpected restore code!");
+    }
+
+    uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+    // If Offset does not fit into 20-bit signed displacement field we need to
+    // emit some additional code...
+    if (Offset > 524287) {
+      // Fold the displacement into load instruction as much as possible.
+      NumBytes = Offset - 524287;
+      Offset = 524287;
+      emitSPUpdate(MBB, MBBI, NumBytes, TII);
+    }
+
+    MI.getOperand(i).ChangeToImmediate(Offset);
+  }
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+  uint64_t StackSize = MFI->getStackSize();
+
+  // Fixed objects are really located in the "previous" frame.
+  if (FI < 0)
+    StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  Offset += StackSize - getOffsetOfLocalArea();
+
+  // Skip the register save area if we generated the stack frame.
+  if (StackSize || MFI->hasCalls())
+    Offset -= getOffsetOfLocalArea();
+
+  return Offset;
+}
+
+bool
+SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                            MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned CalleeFrameSize = 0;
+
+  // Scan the callee-saved and find the bounds of register spill area.
+  unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!SystemZ::FP64RegClass.contains(Reg)) {
+      unsigned Offset = RegSpillOffsets[Reg];
+      CalleeFrameSize += 8;
+      if (StartOffset > Offset) {
+        LowReg = Reg; StartOffset = Offset;
+      }
+      if (EndOffset < Offset) {
+        HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+      }
+    }
+  }
+
+  // Save information for epilogue inserter.
+  MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+  MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+  // Save GPRs
+  if (StartOffset) {
+    // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+    // registers to store, otherwise - just STORE.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+                                    SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+    // Add store operands.
+    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+    MIB.addReg(LowReg, RegState::Kill);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Kill);
+
+    // Do a second scan adding regs as being killed by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      // Add the callee-saved register as live-in. It's killed at the spill.
+      MBB.addLiveIn(Reg);
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitKill);
+    }
+  }
+
+  // Save FPRs
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (SystemZ::FP64RegClass.contains(Reg)) {
+      MBB.addLiveIn(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+                              &SystemZ::FP64RegClass, TRI);
+    }
+  }
+
+  return true;
+}
+
+bool
+SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                              MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+  // Restore FP registers
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (SystemZ::FP64RegClass.contains(Reg))
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                               &SystemZ::FP64RegClass, TRI);
+  }
+
+  // Restore GP registers
+  unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+  unsigned StartOffset = RegSpillOffsets[LowReg];
+
+  if (StartOffset) {
+    // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+    // registers to load, otherwise - just LOAD.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+                                    SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+    // Add store operands.
+    MIB.addReg(LowReg, RegState::Define);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Define);
+
+    MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+    MIB.addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+
+    // Do a second scan adding regs as being defined by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+  }
+
+  return true;
+}
+
+void
+SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // Determine whether R15/R14 will ever be clobbered inside the function. And
+  // if yes - mark it as 'callee' saved.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Check whether high FPRs are ever used, if yes - we need to save R15 as
+  // well.
+  static const unsigned HighFPRs[] = {
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
+    SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+  };
+
+  bool HighFPRsUsed = false;
+  for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+    HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+  if (FFI->hasCalls())
+    /* FIXME: function is varargs */
+    /* FIXME: function grabs RA */
+    /* FIXME: function calls eh_return */
+    MRI.setPhysRegUsed(SystemZ::R14D);
+
+  if (HighFPRsUsed ||
+      FFI->hasCalls() ||
+      FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+      FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+      /* FIXME: function is varargs */)
+    MRI.setPhysRegUsed(SystemZ::R15D);
+}
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 000000000000..1284b6802b3a
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,57 @@
+//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_FRAMEINFO_H
+#define SYSTEMZ_FRAMEINFO_H
+
+#include "SystemZ.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace llvm {
+  class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+  IndexedMap<unsigned> RegSpillOffsets;
+protected:
+  const SystemZSubtarget &STI;
+
+public:
+  explicit SystemZFrameLowering(const SystemZSubtarget &sti);
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
+  bool hasFP(const MachineFunction &MF) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index ed290ca7ed95..2186ff1fed54 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -120,18 +120,17 @@ namespace {
     #include "SystemZGenDAGISel.inc"
 
   private:
-    bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
+    bool SelectAddrRI12Only(SDValue& Addr,
                             SDValue &Base, SDValue &Disp);
-    bool SelectAddrRI12(SDNode *Op, SDValue& Addr,
+    bool SelectAddrRI12(SDValue& Addr,
                         SDValue &Base, SDValue &Disp,
                         bool is12BitOnly = false);
-    bool SelectAddrRI(SDNode *Op, SDValue& Addr,
-                      SDValue &Base, SDValue &Disp);
-    bool SelectAddrRRI12(SDNode *Op, SDValue Addr,
+    bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddrRRI12(SDValue Addr,
                          SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectAddrRRI20(SDNode *Op, SDValue Addr,
+    bool SelectAddrRRI20(SDValue Addr,
                          SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectLAAddr(SDNode *Op, SDValue Addr,
+    bool SelectLAAddr(SDValue Addr,
                       SDValue &Base, SDValue &Disp, SDValue &Index);
 
     SDNode *Select(SDNode *Node);
@@ -142,8 +141,6 @@ namespace {
     bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
                       bool is12Bit, unsigned Depth = 0);
     bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
-    bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
-                        bool is12Bit);
   };
 }  // end anonymous namespace
 
@@ -355,12 +352,12 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
 
 /// Returns true if the address can be represented by a base register plus
 /// an unsigned 12-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
                                              SDValue &Base, SDValue &Disp) {
-  return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true);
+  return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
 }
 
-bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
                                          SDValue &Base, SDValue &Disp,
                                          bool is12BitOnly) {
   SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
@@ -410,7 +407,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr,
 
 /// Returns true if the address can be represented by a base register plus
 /// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
                                        SDValue &Base, SDValue &Disp) {
   SystemZRRIAddressMode AM(/*isRI*/true);
   bool Done = false;
@@ -453,7 +450,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr,
 
 /// Returns true if the address can be represented by a base register plus
 /// index register plus an unsigned 12-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM20, AM12;
   bool Done = false;
@@ -502,7 +499,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr,
 
 /// Returns true if the address can be represented by a base register plus
 /// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM;
   bool Done = false;
@@ -546,7 +543,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr,
 
 /// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LA/LAY instruction.
-bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
                                   SDValue &Base, SDValue &Disp, SDValue &Index) {
   SystemZRRIAddressMode AM;
 
@@ -583,7 +580,7 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
       IsLegalToFold(N, P, P, OptLevel))
-    return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
+    return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
   return false;
 }
 
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 67f739f690dd..d694f2e67edc 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -147,8 +147,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   setOperationAction(ISD::FREM,             MVT::f64, Expand);
 
   // We have only 64-bit bitconverts
-  setOperationAction(ISD::BIT_CONVERT,      MVT::f32, Expand);
-  setOperationAction(ISD::BIT_CONVERT,      MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST,          MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST,          MVT::i32, Expand);
 
   setOperationAction(ISD::UINT_TO_FP,       MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Expand);
@@ -341,7 +341,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
       // from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
-                             PseudoSourceValue::getFixedStack(FI), 0,
+                             MachinePointerInfo::getFixedStack(FI),
                              false, false, 0);
     }
 
@@ -377,8 +377,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                       DebugLoc dl, SelectionDAG &DAG,
                                       SmallVectorImpl<SDValue> &InVals) const {
-
   MachineFunction &MF = DAG.getMachineFunction();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
 
   // Offset to first argument stack slot.
   const unsigned FirstArgOffset = 160;
@@ -431,7 +431,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
       if (StackPtr.getNode() == 0)
         StackPtr =
           DAG.getCopyFromReg(Chain, dl,
-                             (RegInfo->hasFP(MF) ?
+                             (TFI->hasFP(MF) ?
                               SystemZ::R11D : SystemZ::R15D),
                              getPointerTy());
 
@@ -441,7 +441,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                    DAG.getIntPtrConstant(Offset));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), Offset,
+                                         MachinePointerInfo(),
                                          false, false, 0));
     }
   }
@@ -471,7 +471,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
 
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
@@ -710,7 +710,7 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
   SDValue SystemZCC;
   SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
 
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
   SmallVector<SDValue, 4> Ops;
   Ops.push_back(TrueV);
   Ops.push_back(FalseV);
@@ -747,7 +747,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
 
   if (ExtraLoadRequired)
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0, false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index fa87061a7df3..2f2ef08dece1 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -115,9 +115,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   if (TID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                            Flags, Offset,
-                            MFI.getObjectSize(FI),
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                PseudoSourceValue::getFixedStack(FI), Offset),
+                            Flags, MFI.getObjectSize(FI),
                             MFI.getObjectAlignment(FI));
   return addOffset(MIB.addFrameIndex(FI), Offset)
             .addMemOperand(MMO);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 367bed3a8539..be5280323c34 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -28,28 +28,6 @@ using namespace llvm;
 SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
   : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)),
     RI(tm, *this), TM(tm) {
-  // Fill the spill offsets map
-  static const unsigned SpillOffsTab[][2] = {
-    { SystemZ::R2D,  0x10 },
-    { SystemZ::R3D,  0x18 },
-    { SystemZ::R4D,  0x20 },
-    { SystemZ::R5D,  0x28 },
-    { SystemZ::R6D,  0x30 },
-    { SystemZ::R7D,  0x38 },
-    { SystemZ::R8D,  0x40 },
-    { SystemZ::R9D,  0x48 },
-    { SystemZ::R10D, 0x50 },
-    { SystemZ::R11D, 0x58 },
-    { SystemZ::R12D, 0x60 },
-    { SystemZ::R13D, 0x68 },
-    { SystemZ::R14D, 0x70 },
-    { SystemZ::R15D, 0x78 }
-  };
-
-  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-
-  for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
-    RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
 }
 
 /// isGVStub - Return true if the GV requires an extra load to get the
@@ -211,134 +189,6 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-bool
-SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineFunction &MF = *MBB.getParent();
-  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
-  unsigned CalleeFrameSize = 0;
-
-  // Scan the callee-saved and find the bounds of register spill area.
-  unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (!SystemZ::FP64RegClass.contains(Reg)) {
-      unsigned Offset = RegSpillOffsets[Reg];
-      CalleeFrameSize += 8;
-      if (StartOffset > Offset) {
-        LowReg = Reg; StartOffset = Offset;
-      }
-      if (EndOffset < Offset) {
-        HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
-      }
-    }
-  }
-
-  // Save information for epilogue inserter.
-  MFI->setCalleeSavedFrameSize(CalleeFrameSize);
-  MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
-
-  // Save GPRs
-  if (StartOffset) {
-    // Build a store instruction. Use STORE MULTIPLE instruction if there are many
-    // registers to store, otherwise - just STORE.
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MI, DL, get((LowReg == HighReg ?
-                                SystemZ::MOV64mr : SystemZ::MOV64mrm)));
-
-    // Add store operands.
-    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
-    if (LowReg == HighReg)
-      MIB.addReg(0);
-    MIB.addReg(LowReg, RegState::Kill);
-    if (LowReg != HighReg)
-      MIB.addReg(HighReg, RegState::Kill);
-
-    // Do a second scan adding regs as being killed by instruction
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      unsigned Reg = CSI[i].getReg();
-      // Add the callee-saved register as live-in. It's killed at the spill.
-      MBB.addLiveIn(Reg);
-      if (Reg != LowReg && Reg != HighReg)
-        MIB.addReg(Reg, RegState::ImplicitKill);
-    }
-  }
-
-  // Save FPRs
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (SystemZ::FP64RegClass.contains(Reg)) {
-      MBB.addLiveIn(Reg);
-      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
-                          &SystemZ::FP64RegClass, &RI);
-    }
-  }
-
-  return true;
-}
-
-bool
-SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineFunction &MF = *MBB.getParent();
-  const TargetRegisterInfo *RegInfo= MF.getTarget().getRegisterInfo();
-  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
-
-  // Restore FP registers
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (SystemZ::FP64RegClass.contains(Reg))
-      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
-                           &SystemZ::FP64RegClass, &RI);
-  }
-
-  // Restore GP registers
-  unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
-  unsigned StartOffset = RegSpillOffsets[LowReg];
-
-  if (StartOffset) {
-    // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
-    // registers to load, otherwise - just LOAD.
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MI, DL, get((LowReg == HighReg ?
-                                SystemZ::MOV64rm : SystemZ::MOV64rmm)));
-    // Add store operands.
-    MIB.addReg(LowReg, RegState::Define);
-    if (LowReg != HighReg)
-      MIB.addReg(HighReg, RegState::Define);
-
-    MIB.addReg((RegInfo->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D));
-    MIB.addImm(StartOffset);
-    if (LowReg == HighReg)
-      MIB.addReg(0);
-
-    // Do a second scan adding regs as being defined by instruction
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      unsigned Reg = CSI[i].getReg();
-      if (Reg != LowReg && Reg != HighReg)
-        MIB.addReg(Reg, RegState::ImplicitDefine);
-    }
-  }
-
-  return true;
-}
-
 bool SystemZInstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1 && "Invalid Xbranch condition!");
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index c248f2489c49..6cb720010207 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -50,7 +50,6 @@ namespace SystemZII {
 class SystemZInstrInfo : public TargetInstrInfoImpl {
   const SystemZRegisterInfo RI;
   SystemZTargetMachine &TM;
-  IndexedMap<unsigned> RegSpillOffsets;
 public:
   explicit SystemZInstrInfo(SystemZTargetMachine &TM);
 
@@ -80,15 +79,6 @@ public:
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
 
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                         const TargetRegisterInfo *TRI) const;
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                           const TargetRegisterInfo *TRI) const;
-
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
   virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 8df07c034385..11a39fcd023a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -46,15 +46,15 @@ def SDT_Address             : SDTypeProfile<1, 1,
 // SystemZ Specific Node Definitions.
 //===----------------------------------------------------------------------===//
 def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
-                     [SDNPHasChain, SDNPOptInFlag]>;
+                     [SDNPHasChain, SDNPOptInGlue]>;
 def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
-                     [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
+                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
 def SystemZcallseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
-                        [SDNPHasChain, SDNPOutFlag]>;
+                        [SDNPHasChain, SDNPOutGlue]>;
 def SystemZcallseq_end :
                  SDNode<"ISD::CALLSEQ_END",   SDT_SystemZCallSeqEnd,
-                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
 def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
 def SystemZbrcond  : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
@@ -229,19 +229,19 @@ def MOV64ri16 : RII<0x9A7,
                     [(set GR64:$dst, immSExt16:$src)]>;
 
 def MOV64rill16 : RII<0xFA5,
-                      (outs GR64:$dst), (ins i64imm:$src),
+                      (outs GR64:$dst), (ins u16imm:$src),
                       "llill\t{$dst, $src}",
                       [(set GR64:$dst, i64ll16:$src)]>;
 def MOV64rilh16 : RII<0xEA5,
-                      (outs GR64:$dst), (ins i64imm:$src),
+                      (outs GR64:$dst), (ins u16imm:$src),
                       "llilh\t{$dst, $src}",
                       [(set GR64:$dst, i64lh16:$src)]>;
 def MOV64rihl16 : RII<0xDA5,
-                      (outs GR64:$dst), (ins i64imm:$src),
+                      (outs GR64:$dst), (ins u16imm:$src),
                       "llihl\t{$dst, $src}",
                       [(set GR64:$dst, i64hl16:$src)]>;
 def MOV64rihh16 : RII<0xCA5,
-                      (outs GR64:$dst), (ins i64imm:$src),
+                      (outs GR64:$dst), (ins u16imm:$src),
                       "llihh\t{$dst, $src}",
                       [(set GR64:$dst, i64hh16:$src)]>;
 
@@ -250,10 +250,10 @@ def MOV64ri32 : RILI<0x1C0,
                      "lgfi\t{$dst, $src}",
                      [(set GR64:$dst, immSExt32:$src)]>;
 def MOV64rilo32 : RILI<0xFC0,
-                       (outs GR64:$dst), (ins i64imm:$src),
+                       (outs GR64:$dst), (ins u32imm:$src),
                        "llilf\t{$dst, $src}",
                        [(set GR64:$dst, i64lo32:$src)]>;
-def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src),
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
                        "llihf\t{$dst, $src}",
                        [(set GR64:$dst, i64hi32:$src)]>;
 }
@@ -642,42 +642,42 @@ def AND64rm   : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
                       (implicit PSW)]>;
 
 def AND32rill16 : RII<0xA57,
-                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
                       "nill\t{$dst, $src2}",
                       [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
 def AND64rill16 : RII<0xA57,
-                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                       "nill\t{$dst, $src2}",
                       [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
 
 def AND32rilh16 : RII<0xA56,
-                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
                       "nilh\t{$dst, $src2}",
                       [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
 def AND64rilh16 : RII<0xA56,
-                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                       "nilh\t{$dst, $src2}",
                       [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
 
 def AND64rihl16 : RII<0xA55,
-                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                       "nihl\t{$dst, $src2}",
                       [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
 def AND64rihh16 : RII<0xA54,
-                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                       "nihh\t{$dst, $src2}",
                       [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
 
 def AND32ri     : RILI<0xC0B,
-                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                       (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
                        "nilf\t{$dst, $src2}",
                        [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
 def AND64rilo32 : RILI<0xC0B,
-                       (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
                        "nilf\t{$dst, $src2}",
                        [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
 def AND64rihi32 : RILI<0xC0A,
-                       (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
                        "nihf\t{$dst, $src2}",
                        [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
 
@@ -707,41 +707,41 @@ def OR64rm   : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
 
  // FIXME: Provide proper encoding!
 def OR32ri16  : RII<0xA5B,
-                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
                     "oill\t{$dst, $src2}",
                     [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
 def OR32ri16h : RII<0xA5A,
-                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
                     "oilh\t{$dst, $src2}",
                     [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
 def OR32ri : RILI<0xC0D,
-                  (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                  (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
                   "oilf\t{$dst, $src2}",
                   [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
 
 def OR64rill16 : RII<0xA5B,
-                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                      "oill\t{$dst, $src2}",
                      [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
 def OR64rilh16 : RII<0xA5A,
-                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                      "oilh\t{$dst, $src2}",
                      [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
 def OR64rihl16 : RII<0xA59,
-                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                      "oihl\t{$dst, $src2}",
                      [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
 def OR64rihh16 : RII<0xA58,
-                     (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
                      "oihh\t{$dst, $src2}",
                      [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
 
 def OR64rilo32 : RILI<0xC0D,
-                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
                       "oilf\t{$dst, $src2}",
                       [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
 def OR64rihi32 : RILI<0xC0C,
-                      (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
                       "oihf\t{$dst, $src2}",
                       [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
 
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index 4f7f70bd85f0..2dc7e7bd29bb 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -14,6 +14,7 @@
 #include "SystemZMCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
@@ -24,6 +25,6 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
 
 const MCSection *SystemZMCAsmInfo::
 getNonexecutableStackSection(MCContext &Ctx) const{
-  return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
-                           0, SectionKind::getMetadata(), false);
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
 }
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 0de50fde582b..8b835cc26e29 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -246,6 +246,14 @@ def s16imm : Operand<i32> {
 def s16imm64 : Operand<i64> {
   let PrintMethod = "printS16ImmOperand";
 }
+// Unsigned i16
+def u16imm : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
 // Signed i20
 def s20imm : Operand<i32> {
   let PrintMethod = "printS20ImmOperand";
@@ -260,6 +268,13 @@ def s32imm : Operand<i32> {
 def s32imm64 : Operand<i64> {
   let PrintMethod = "printS32ImmOperand";
 }
+// Unsigned i32
+def u32imm : Operand<i32> {
+  let PrintMethod = "printU32ImmOperand";
+}
+def u32imm64 : Operand<i64> {
+  let PrintMethod = "printU32ImmOperand";
+}
 
 def imm_pcrel : Operand<i64> {
   let PrintMethod = "printPCRelImmOperand";
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index f8d3e6ac8a6f..28f94f4b6c61 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -20,7 +20,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -49,49 +49,21 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  if (hasFP(MF))
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
     Reserved.set(SystemZ::R11D);
   Reserved.set(SystemZ::R14D);
   Reserved.set(SystemZ::R15D);
   return Reserved;
 }
 
-/// needsFP - Return true if the specified function should have a dedicated
-/// frame pointer register.  This is true if the function has variable sized
-/// allocas or if frame pointer elimination is disabled.
-bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
 void SystemZRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
   MBB.erase(I);
 }
 
-int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
-                                             int FI) const {
-  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const SystemZMachineFunctionInfo *SystemZMFI =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
-  uint64_t StackSize = MFI->getStackSize();
-
-  // Fixed objects are really located in the "previous" frame.
-  if (FI < 0)
-    StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
-  Offset += StackSize - TFI.getOffsetOfLocalArea();
-
-  // Skip the register save area if we generated the stack frame.
-  if (StackSize || MFI->hasCalls())
-    Offset -= TFI.getOffsetOfLocalArea();
-
-  return Offset;
-}
-
 void
 SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                          int SPAdj, RegScavenger *RS) const {
@@ -100,6 +72,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned i = 0;
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
@@ -107,7 +81,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   int FrameIndex = MI.getOperand(i).getIndex();
 
-  unsigned BasePtr = (hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+  unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
 
   // This must be part of a rri or ri operand memory reference.  Replace the
   // FrameIndex with base register with BasePtr.  Add an offset to the
@@ -117,7 +91,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Offset is a either 12-bit unsigned or 20-bit signed integer.
   // FIXME: handle "too long" displacements.
   int Offset =
-    getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+    TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
 
   // Check whether displacement is too long to fit into 12 bit zext field.
   MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
@@ -125,178 +99,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(i+1).ChangeToImmediate(Offset);
 }
 
-void
-SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                       RegScavenger *RS) const {
-  // Determine whether R15/R14 will ever be clobbered inside the function. And
-  // if yes - mark it as 'callee' saved.
-  MachineFrameInfo *FFI = MF.getFrameInfo();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  // Check whether high FPRs are ever used, if yes - we need to save R15 as
-  // well.
-  static const unsigned HighFPRs[] = {
-    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
-    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
-    SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
-    SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
-  };
-
-  bool HighFPRsUsed = false;
-  for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
-    HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
-
-  if (FFI->hasCalls())
-    /* FIXME: function is varargs */
-    /* FIXME: function grabs RA */
-    /* FIXME: function calls eh_return */
-    MRI.setPhysRegUsed(SystemZ::R14D);
-
-  if (HighFPRsUsed ||
-      FFI->hasCalls() ||
-      FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
-      FFI->hasVarSizedObjects() // Function calls dynamic alloca's
-      /* FIXME: function is varargs */)
-    MRI.setPhysRegUsed(SystemZ::R15D);
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                  int64_t NumBytes, const TargetInstrInfo &TII) {
-  unsigned Opc; uint64_t Chunk;
-  bool isSub = NumBytes < 0;
-  uint64_t Offset = isSub ? -NumBytes : NumBytes;
-
-  if (Offset >= (1LL << 15) - 1) {
-    Opc = SystemZ::ADD64ri32;
-    Chunk = (1LL << 31) - 1;
-  } else {
-    Opc = SystemZ::ADD64ri16;
-    Chunk = (1LL << 15) - 1;
-  }
-
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  while (Offset) {
-    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
-    MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
-      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
-    // The PSW implicit def is dead.
-    MI->getOperand(3).setIsDead();
-    Offset -= ThisVal;
-  }
-}
-
-void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  SystemZMachineFunctionInfo *SystemZMFI =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  // Note that area for callee-saved stuff is already allocated, thus we need to
-  // 'undo' the stack movement.
-  uint64_t StackSize = MFI->getStackSize();
-  StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
-  uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea();
-
-  // Skip the callee-saved push instructions.
-  while (MBBI != MBB.end() &&
-         (MBBI->getOpcode() == SystemZ::MOV64mr ||
-          MBBI->getOpcode() == SystemZ::MOV64mrm))
-    ++MBBI;
-
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
-
-  // adjust stack pointer: R15 -= numbytes
-  if (StackSize || MFI->hasCalls()) {
-    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
-           "Invalid stack frame calculation!");
-    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
-  }
-
-  if (hasFP(MF)) {
-    // Update R11 with the new base value...
-    BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
-      .addReg(SystemZ::R15D);
-
-    // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
-         I != E; ++I)
-      I->addLiveIn(SystemZ::R11D);
-
-  }
-}
-
-void SystemZRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  SystemZMachineFunctionInfo *SystemZMFI =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  unsigned RetOpcode = MBBI->getOpcode();
-
-  switch (RetOpcode) {
-  case SystemZ::RET: break;  // These are ok
-  default:
-    assert(0 && "Can only insert epilog into returning blocks");
-  }
-
-  // Get the number of bytes to allocate from the FrameInfo
-  // Note that area for callee-saved stuff is already allocated, thus we need to
-  // 'undo' the stack movement.
-  uint64_t StackSize =
-    MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
-  uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea();
-
-  // Skip the final terminator instruction.
-  while (MBBI != MBB.begin()) {
-    MachineBasicBlock::iterator PI = prior(MBBI);
-    --MBBI;
-    if (!PI->getDesc().isTerminator())
-      break;
-  }
-
-  // During callee-saved restores emission stack frame was not yet finialized
-  // (and thus - the stack size was unknown). Tune the offset having full stack
-  // size in hands.
-  if (StackSize || MFI->hasCalls()) {
-    assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
-            MBBI->getOpcode() == SystemZ::MOV64rm) &&
-           "Expected to see callee-save register restore code");
-    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
-           "Invalid stack frame calculation!");
-
-    unsigned i = 0;
-    MachineInstr &MI = *MBBI;
-    while (!MI.getOperand(i).isImm()) {
-      ++i;
-      assert(i < MI.getNumOperands() && "Unexpected restore code!");
-    }
-
-    uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
-    // If Offset does not fit into 20-bit signed displacement field we need to
-    // emit some additional code...
-    if (Offset > 524287) {
-      // Fold the displacement into load instruction as much as possible.
-      NumBytes = Offset - 524287;
-      Offset = 524287;
-      emitSPUpdate(MBB, MBBI, NumBytes, TII);
-    }
-
-    MI.getOperand(i).ChangeToImmediate(Offset);
-  }
-}
-
 unsigned SystemZRegisterInfo::getRARegister() const {
   assert(0 && "What is the return address register");
   return 0;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 5dae865cb79a..b45079889a23 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -34,11 +34,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
-  bool hasFP(const MachineFunction &MF) const;
-
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -46,13 +41,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 33be8ddffbed..0028c85b4a9d 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -190,8 +190,8 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32,
     GR32Class::iterator
     GR32Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG32_nofp;
       else
         return SystemZ_REG32;
@@ -199,8 +199,8 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32,
     GR32Class::iterator
     GR32Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
       else
         return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
@@ -237,8 +237,8 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
     ADDR32Class::iterator
     ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_ADDR32_nofp;
       else
         return SystemZ_ADDR32;
@@ -246,8 +246,8 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
     ADDR32Class::iterator
     ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
       else
         return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
@@ -284,8 +284,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
     GR64Class::iterator
     GR64Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG64_nofp;
       else
         return SystemZ_REG64;
@@ -293,8 +293,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
     GR64Class::iterator
     GR64Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
       else
         return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
@@ -331,8 +331,8 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
     ADDR64Class::iterator
     ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_ADDR64_nofp;
       else
         return SystemZ_ADDR64;
@@ -340,8 +340,8 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
     ADDR64Class::iterator
     ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
       else
         return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
@@ -368,8 +368,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
     GR64PClass::iterator
     GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG64P_nofp;
       else
         return SystemZ_REG64P;
@@ -377,8 +377,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
     GR64PClass::iterator
     GR64PClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
       else
         return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
@@ -405,8 +405,8 @@ def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
     GR128Class::iterator
     GR128Class::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG128_nofp;
       else
         return SystemZ_REG128;
@@ -414,8 +414,8 @@ def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
     GR128Class::iterator
     GR128Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
       else
         return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f45827b2b300..160389942998 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -30,7 +30,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
     DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
                "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
+    FrameLowering(Subtarget) {
 
   if (getRelocationModel() == Reloc::Default)
     setRelocationModel(Reloc::Static);
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 6af829bb5966..524f83d13229 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -17,11 +17,12 @@
 
 #include "SystemZInstrInfo.h"
 #include "SystemZISelLowering.h"
+#include "SystemZFrameLowering.h"
 #include "SystemZSelectionDAGInfo.h"
 #include "SystemZRegisterInfo.h"
 #include "SystemZSubtarget.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -34,15 +35,14 @@ class SystemZTargetMachine : public LLVMTargetMachine {
   SystemZInstrInfo        InstrInfo;
   SystemZTargetLowering   TLInfo;
   SystemZSelectionDAGInfo TSInfo;
-
-  // SystemZ does not have any call stack frame, therefore not having
-  // any SystemZ specific FrameInfo class.
-  TargetFrameInfo       FrameInfo;
+  SystemZFrameLowering    FrameLowering;
 public:
   SystemZTargetMachine(const Target &T, const std::string &TT,
                        const std::string &FS);
 
-  virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
   virtual const SystemZInstrInfo *getInstrInfo() const  { return &InstrInfo; }
   virtual const TargetData *getTargetData() const     { return &DataLayout;}
   virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index f5c969ae133d..0919fe42dc0e 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -7,12 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMTarget.a, which implements
-// target information.
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMTarget.a, which implements target information.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/LLVMContext.h"
@@ -20,6 +22,15 @@
 
 using namespace llvm;
 
+void llvm::initializeTarget(PassRegistry &Registry) {
+  initializeTargetDataPass(Registry);
+  initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+  initializeTarget(*unwrap(R));
+}
+
 LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
   return wrap(new TargetData(StringRep));
 }
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 000000000000..6fa5420120f5
--- /dev/null
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
+  TLOF = &TM.getTargetLowering()->getObjFileLowering();
+  const TargetData &TD = *TM.getTargetData();
+  IsLittleEndian = TD.isLittleEndian();
+  PointerSize = TD.getPointerSize();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
+  StackDir = TFI.getStackGrowthDirection();
+  TRI = TM.getRegisterInfo();
+  TFI.getInitialFrameState(InitialFrameState);
+}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index f35c96dadcee..c628df04e710 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -25,7 +25,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/ADT/DenseMap.h"
 #include <algorithm>
 #include <cstdlib>
@@ -34,7 +34,7 @@ using namespace llvm;
 // Handle the Pass registration stuff necessary to use TargetData's.
 
 // Register the default SparcV9 implementation...
-INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true);
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
 char TargetData::ID = 0;
 
 //===----------------------------------------------------------------------===//
@@ -83,7 +83,7 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
   assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
          (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
          "Upper bound didn't work!");
-  
+
   // Multiple fields can have the same offset if any of them are zero sized.
   // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
   // at the i32 element, because it is the last element at that offset.  This is
@@ -131,6 +131,8 @@ static unsigned getInt(StringRef R) {
 }
 
 void TargetData::init(StringRef Desc) {
+  initializeTargetDataPass(*PassRegistry::getPassRegistry());
+  
   LayoutMap = 0;
   LittleEndian = false;
   PointerMemSize = 8;
@@ -153,16 +155,16 @@ void TargetData::init(StringRef Desc) {
     std::pair<StringRef, StringRef> Split = Desc.split('-');
     StringRef Token = Split.first;
     Desc = Split.second;
-    
+
     if (Token.empty())
       continue;
-    
+
     Split = Token.split(':');
     StringRef Specifier = Split.first;
     Token = Split.second;
-    
+
     assert(!Specifier.empty() && "Can't be empty here");
-    
+
     switch (Specifier[0]) {
     case 'E':
       LittleEndian = false;
@@ -197,7 +199,7 @@ void TargetData::init(StringRef Desc) {
       unsigned Size = getInt(Specifier.substr(1));
       Split = Token.split(':');
       unsigned ABIAlign = getInt(Split.first) / 8;
-      
+
       Split = Split.second.split(':');
       unsigned PrefAlign = getInt(Split.first) / 8;
       if (PrefAlign == 0)
@@ -215,7 +217,7 @@ void TargetData::init(StringRef Desc) {
         Token = Split.second;
       } while (!Specifier.empty() || !Token.empty());
       break;
-        
+
     default:
       break;
     }
@@ -231,7 +233,7 @@ TargetData::TargetData() : ImmutablePass(ID) {
                     "Tool did not specify a TargetData to use?");
 }
 
-TargetData::TargetData(const Module *M) 
+TargetData::TargetData(const Module *M)
   : ImmutablePass(ID) {
   init(M->getDataLayout());
 }
@@ -249,14 +251,14 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
       return;
     }
   }
-  
+
   Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
                                             pref_align, bit_width));
 }
 
-/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or 
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
 /// preferred if ABIInfo = false) the target wants for the specified datatype.
-unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, 
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
                                       uint32_t BitWidth, bool ABIInfo,
                                       const Type *Ty) const {
   // Check to see if we have an exact match and remember the best match we see.
@@ -266,18 +268,18 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
     if (Alignments[i].AlignType == AlignType &&
         Alignments[i].TypeBitWidth == BitWidth)
       return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
-    
+
     // The best match so far depends on what we're looking for.
-     if (AlignType == INTEGER_ALIGN && 
+     if (AlignType == INTEGER_ALIGN &&
          Alignments[i].AlignType == INTEGER_ALIGN) {
       // The "best match" for integers is the smallest size that is larger than
       // the BitWidth requested.
-      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || 
+      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
            Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
         BestMatchIdx = i;
       // However, if there isn't one that's larger, then we must use the
       // largest one we have (see below)
-      if (LargestInt == -1 || 
+      if (LargestInt == -1 ||
           Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
         LargestInt = i;
     }
@@ -322,8 +324,8 @@ class StructLayoutMap : public AbstractTypeUser {
       I->first->removeAbstractTypeUser(this);
     LayoutInfo.erase(I);
   }
-  
-  
+
+
   /// refineAbstractType - The callback method invoked when an abstract type is
   /// resolved to another type.  An object must override this method to update
   /// its internal state to reference NewType instead of OldType.
@@ -385,21 +387,21 @@ TargetData::~TargetData() {
 const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
   if (!LayoutMap)
     LayoutMap = new StructLayoutMap();
-  
+
   StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
   StructLayout *&SL = (*STM)[Ty];
   if (SL) return SL;
 
-  // Otherwise, create the struct layout.  Because it is variable length, we 
+  // Otherwise, create the struct layout.  Because it is variable length, we
   // malloc it, then use placement new.
   int NumElts = Ty->getNumElements();
   StructLayout *L =
     (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
-  
+
   // Set SL before calling StructLayout's ctor.  The ctor could cause other
   // entries to be added to TheMap, invalidating our reference.
   SL = L;
-  
+
   new (L) StructLayout(Ty, *this);
 
   if (Ty->isAbstract())
@@ -414,14 +416,14 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
 /// avoid a dangling pointer in this cache.
 void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
   if (!LayoutMap) return;  // No cache.
-  
+
   static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty);
 }
 
 std::string TargetData::getStringRepresentation() const {
   std::string Result;
   raw_string_ostream OS(Result);
-  
+
   OS << (LittleEndian ? "e" : "E")
      << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
      << ':' << PointerPrefAlign*8;
@@ -430,10 +432,10 @@ std::string TargetData::getStringRepresentation() const {
     OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
        << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
   }
-  
+
   if (!LegalIntWidths.empty()) {
     OS << "-n" << (unsigned)LegalIntWidths[0];
-    
+
     for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
       OS << ':' << (unsigned)LegalIntWidths[i];
   }
@@ -461,6 +463,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
   case Type::FloatTyID:
     return 32;
   case Type::DoubleTyID:
+  case Type::X86_MMXTyID:
     return 64;
   case Type::PPC_FP128TyID:
   case Type::FP128TyID:
@@ -523,6 +526,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
   case Type::X86_FP80TyID:
     AlignType = FLOAT_ALIGN;
     break;
+  case Type::X86_MMXTyID:
   case Type::VectorTyID:
     AlignType = VECTOR_ALIGN;
     break;
diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp
index 3631b3501341..a661ee9c0c65 100644
--- a/lib/Target/TargetELFWriterInfo.cpp
+++ b/lib/Target/TargetELFWriterInfo.cpp
@@ -17,9 +17,8 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
-  is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
-  isLittleEndian = TM.getTargetData()->isLittleEndian();
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+  is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
 }
 
 TargetELFWriterInfo::~TargetELFWriterInfo() {}
diff --git a/lib/Target/TargetFrameInfo.cpp b/lib/Target/TargetFrameInfo.cpp
deleted file mode 100644
index 873d60a1b5ff..000000000000
--- a/lib/Target/TargetFrameInfo.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements the layout of a stack frame on the target machine.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetFrameInfo.h"
-#include <cstdlib>
-using namespace llvm;
-
-TargetFrameInfo::~TargetFrameInfo() {
-}
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/Target/TargetFrameLowering.cpp
new file mode 100644
index 000000000000..19fd581c7dd5
--- /dev/null
+++ b/lib/Target/TargetFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                         const {
+  // Default is to do nothing.
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                             int FI, unsigned &FrameReg) const {
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // By default, assume all frame indices are referenced via whatever
+  // getFrameRegister() says. The target can override this if it's doing
+  // something different.
+  FrameReg = RI->getFrameRegister(MF);
+  return getFrameIndexOffset(MF, FI);
+}
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index c099a7eaefe7..97f3bf6e57ad 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -12,9 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cctype>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -47,9 +50,85 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
 TargetInstrInfo::~TargetInstrInfo() {
 }
 
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Class = MI->getDesc().getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  // The # of u-ops is dynamically determined. The specific target should
+  // override this function to return the right number.
+  return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  unsigned UseClass = UseMI->getDesc().getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     const MachineInstr *MI,
+                                     unsigned *PredCost) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+  return (DefCycle != -1 && DefCycle <= 1);
+}
+
 /// insertNoop - Insert a noop into the instruction stream at the specified
 /// point.
-void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, 
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI) const {
   llvm_unreachable("Target didn't implement insertNoop!");
 }
@@ -58,7 +137,7 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
 bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isTerminator()) return false;
-  
+
   // Conditional branch is a special case.
   if (TID.isBranch() && !TID.isBarrier())
     return true;
@@ -78,15 +157,15 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
 /// may be overloaded in the target code to do that.
 unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
                                              const MCAsmInfo &MAI) const {
-  
-  
+
+
   // Count the number of instructions in the asm.
   bool atInsnStart = true;
   unsigned Length = 0;
   for (; *Str; ++Str) {
     if (*Str == '\n' || *Str == MAI.getSeparatorChar())
       atInsnStart = true;
-    if (atInsnStart && !isspace(*Str)) {
+    if (atInsnStart && !std::isspace(*Str)) {
       Length += MAI.getMaxInstLength();
       atInsnStart = false;
     }
@@ -94,6 +173,6 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
                                strlen(MAI.getCommentString())) == 0)
       atInsnStart = false;
   }
-  
+
   return Length;
 }
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 000000000000..c8bed18ffabe
--- /dev/null
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,55 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+                "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple.  This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+  initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+  
+  // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+  if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9)
+    TLI.setUnavailable(LibFunc::memset_pattern16);
+  
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+
+  initialize(*this, Triple());
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+  
+  initialize(*this, T);
+}
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+  memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index dd7b532bbfba..5d34c7d7fa3d 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,8 +43,8 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
   StaticCtorSection = 0;
   StaticDtorSection = 0;
   LSDASection = 0;
-  EHFrameSection = 0;
 
+  CommDirectiveSupportsAlignment = true;
   DwarfAbbrevSection = 0;
   DwarfInfoSection = 0;
   DwarfLineSection = 0;
@@ -168,6 +168,12 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     switch (C->getRelocationInfo()) {
     default: assert(0 && "unknown relocation info kind");
     case Constant::NoRelocation:
+      // If the global is required to have a unique address, it can't be put
+      // into a mergable section: just drop it into the general read-only
+      // section instead.
+      if (!GVar->hasUnnamedAddr())
+        return SectionKind::getReadOnly();
+        
       // If initializer is a null-terminated string, put it in a "cstring"
       // section of the right width.
       if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 705b1c097e55..d579d95a99c4 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -219,7 +219,9 @@ FunctionSections("ffunction-sections",
 
 TargetMachine::TargetMachine(const Target &T) 
   : TheTarget(T), AsmInfo(0),
-    MCRelaxAll(false) {
+    MCRelaxAll(false),
+    MCNoExecStack(false),
+    MCUseLoc(true) {
   // Typically it will be subtargets that will adjust FloatABIType from Default
   // to Soft or Hard.
   if (UseSoftFloat)
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 55f222c7c1c9..4811ba5cc488 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -13,10 +13,10 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -30,7 +30,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
     AliasesHash(aliases), AliasesHashSize(aliasessize),
     Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
     RegClassBegin(RCB), RegClassEnd(RCE) {
-  assert(NumRegs < FirstVirtualRegister &&
+  assert(isPhysicalRegister(NumRegs) &&
          "Target has too many physical registers!");
 
   CallFrameSetupOpcode   = CFSO;
@@ -39,6 +39,25 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
 
 TargetRegisterInfo::~TargetRegisterInfo() {}
 
+void PrintReg::print(raw_ostream &OS) const {
+  if (!Reg)
+    OS << "%noreg";
+  else if (TargetRegisterInfo::isStackSlot(Reg))
+    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+  else if (TargetRegisterInfo::isVirtualRegister(Reg))
+    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+  else if (TRI && Reg < TRI->getNumRegs())
+    OS << '%' << TRI->getName(Reg);
+  else
+    OS << "%physreg" << Reg;
+  if (SubIdx) {
+    if (TRI)
+      OS << ':' << TRI->getSubRegIndexName(SubIdx);
+    else
+      OS << ":sub(" << SubIdx << ')';
+  }
+}
+
 /// getMinimalPhysRegClass - Returns the Register Class of a physical
 /// register of the given type, picking the most sub register class of
 /// the right type that contains this physreg.
@@ -82,29 +101,11 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
 
   // Mask out the reserved registers
   BitVector Reserved = getReservedRegs(MF);
-  Allocatable ^= Reserved & Allocatable;
+  Allocatable &= Reserved.flip();
 
   return Allocatable;
 }
 
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
-                                            int FI) const {
-  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
-    TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
-/// getInitialFrameState - Returns a list of machine moves that are assumed
-/// on entry to a function.
-void
-TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const{
-  // Default is to do nothing.
-}
-
 const TargetRegisterClass *
 llvm::getCommonSubClass(const TargetRegisterClass *A,
                         const TargetRegisterClass *B) {
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 26797ab353b6..ec73087a3305 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -65,9 +65,10 @@ public:
   }
 };
 
-}
+} // end anonymous namespace
 
-static unsigned MatchRegisterName(StringRef Name);
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
 
 AsmToken X86AsmLexer::LexTokenATT() {
   AsmToken lexedToken = lexDefinite();
@@ -162,7 +163,3 @@ extern "C" void LLVMInitializeX86AsmLexer() {
   RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
   RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
 }
-
-#define REGISTERS_ONLY
-#include "X86GenAsmMatcher.inc"
-#undef REGISTERS_ONLY
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f8588d818b75..1cac07a0e10a 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -10,20 +10,21 @@
 #include "llvm/Target/TargetAsmParser.h"
 #include "X86.h"
 #include "X86Subtarget.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
 namespace {
@@ -43,35 +44,32 @@ private:
 
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
 
-  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
-
   X86Operand *ParseOperand();
   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
-  bool MatchInstruction(SMLoc IDLoc,
-                        const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCInst &Inst);
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
 
   /// @name Auto-generated Matcher Functions
   /// {
 
-  unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
-
-  bool MatchInstructionImpl(
-    const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
 
   /// }
 
 public:
-  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
-    : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+  X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
+    : TargetAsmParser(T), Parser(parser), TM(TM) {
 
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(
                            &TM.getSubtarget<X86Subtarget>()));
   }
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -81,16 +79,16 @@ public:
 
 class X86_32ATTAsmParser : public X86ATTAsmParser {
 public:
-  X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
-    : X86ATTAsmParser(T, _Parser, TM) {
+  X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+    : X86ATTAsmParser(T, Parser, TM) {
     Is64Bit = false;
   }
 };
 
 class X86_64ATTAsmParser : public X86ATTAsmParser {
 public:
-  X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
-    : X86ATTAsmParser(T, _Parser, TM) {
+  X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+    : X86ATTAsmParser(T, Parser, TM) {
     Is64Bit = true;
   }
 };
@@ -375,14 +373,18 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
   // validation later, so maybe there is no need for this here.
   RegNo = MatchRegisterName(Tok.getString());
 
+  // If the match failed, try the register name as lowercase.
+  if (RegNo == 0)
+    RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
   // FIXME: This should be done using Requires<In32BitMode> and
   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
   // can be also checked.
   if (RegNo == X86::RIZ && !Is64Bit)
     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 
-  // Parse %st(1) and "%st" as "%st(0)"
-  if (RegNo == 0 && Tok.getString() == "st") {
+  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
     RegNo = X86::ST0;
     EndLoc = Tok.getLoc();
     Parser.Lex(); // Eat 'st'
@@ -617,88 +619,13 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 bool X86ATTAsmParser::
 ParseInstruction(StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  // The various flavors of pushf and popf use Requires<In32BitMode> and
-  // Requires<In64BitMode>, but the assembler doesn't yet implement that.
-  // For now, just do a manual check to prevent silent misencoding.
-  if (Is64Bit) {
-    if (Name == "popfl")
-      return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
-    else if (Name == "pushfl")
-      return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
-    else if (Name == "pusha")
-      return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
-  } else {
-    if (Name == "popfq")
-      return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
-    else if (Name == "pushfq")
-      return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
-  }
-
-  // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
-  // the form jrcxz is not allowed in 32-bit mode.
-  if (Is64Bit) {
-    if (Name == "jcxz")
-      return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
-  } else {
-    if (Name == "jrcxz")
-      return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
-  }
-
-  // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
-  // represent alternative syntaxes in the .td file, without requiring
-  // instruction duplication.
-  StringRef PatchedName = StringSwitch<StringRef>(Name)
-    .Case("sal", "shl")
-    .Case("salb", "shlb")
-    .Case("sall", "shll")
-    .Case("salq", "shlq")
-    .Case("salw", "shlw")
-    .Case("repe", "rep")
-    .Case("repz", "rep")
-    .Case("repnz", "repne")
-    .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
-    .Case("popf",  Is64Bit ? "popfq"  : "popfl")
-    .Case("retl", Is64Bit ? "retl" : "ret")
-    .Case("retq", Is64Bit ? "ret" : "retq")
-    .Case("setz", "sete")
-    .Case("setnz", "setne")
-    .Case("jz", "je")
-    .Case("jnz", "jne")
-    .Case("jc", "jb")
-    // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
-    // jecxz requires an AdSize prefix but jecxz does not have a prefix in
-    // 32-bit mode.
-    .Case("jecxz", "jcxz")
-    .Case("jrcxz", "jcxz")
-    .Case("jna", "jbe")
-    .Case("jnae", "jb")
-    .Case("jnb", "jae")
-    .Case("jnbe", "ja")
-    .Case("jnc", "jae")
-    .Case("jng", "jle")
-    .Case("jnge", "jl")
-    .Case("jnl", "jge")
-    .Case("jnle", "jg")
-    .Case("jpe", "jp")
-    .Case("jpo", "jnp")
-    .Case("cmovcl", "cmovbl")
-    .Case("cmovcl", "cmovbl")
-    .Case("cmovnal", "cmovbel")
-    .Case("cmovnbl", "cmovael")
-    .Case("cmovnbel", "cmoval")
-    .Case("cmovncl", "cmovael")
-    .Case("cmovngl", "cmovlel")
-    .Case("cmovnl", "cmovgel")
-    .Case("cmovngl", "cmovlel")
-    .Case("cmovngel", "cmovll")
-    .Case("cmovnll", "cmovgel")
-    .Case("cmovnlel", "cmovgl")
-    .Case("cmovnzl", "cmovnel")
-    .Case("cmovzl", "cmovel")
-    .Case("fwait", "wait")
-    .Case("movzx", "movzb")
-    .Default(Name);
+  StringRef PatchedName = Name;
 
+  // FIXME: Hack to recognize setneb as setne.
+  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+      PatchedName != "setb" && PatchedName != "setnb")
+    PatchedName = PatchedName.substr(0, Name.size()-1);
+  
   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
   const MCExpr *ExtraImmOp = 0;
   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -773,12 +700,26 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
       PatchedName = "vpclmulqdq";
     }
   }
+
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
   if (ExtraImmOp)
     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+  // Determine whether this is an instruction prefix.
+  bool isPrefix =
+    Name == "lock" || Name == "rep" ||
+    Name == "repe" || Name == "repz" ||
+    Name == "repne" || Name == "repnz" ||
+    Name == "rex64" || Name == "data16";
+
+
+  // This does the actual operand parsing.  Don't parse any more if we have a
+  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+  // just want to parse the "lock" as the first instruction and the "incl" as
+  // the next one.
+  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
 
     // Parse '*' modifier.
     if (getLexer().is(AsmToken::Star)) {
@@ -790,8 +731,10 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
     // Read the first operand.
     if (X86Operand *Op = ParseOperand())
       Operands.push_back(Op);
-    else
+    else {
+      Parser.EatToEndOfStatement();
       return true;
+    }
 
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();  // Eat the comma.
@@ -799,23 +742,27 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
       // Parse and remember the operand.
       if (X86Operand *Op = ParseOperand())
         Operands.push_back(Op);
-      else
+      else {
+        Parser.EatToEndOfStatement();
         return true;
+      }
     }
-  }
 
-  // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
-  if ((Name.startswith("shr") || Name.startswith("sar") ||
-       Name.startswith("shl")) &&
-      Operands.size() == 3 &&
-      static_cast<X86Operand*>(Operands[1])->isImm() &&
-      isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
-      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
-    delete Operands[1];
-    Operands.erase(Operands.begin() + 1);
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.EatToEndOfStatement();
+      return Error(Loc, "unexpected token in argument list");
+    }
   }
 
-  // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+  if (getLexer().is(AsmToken::EndOfStatement))
+    Parser.Lex(); // Consume the EndOfStatement
+  else if (isPrefix && getLexer().is(AsmToken::Slash))
+    Parser.Lex(); // Consume the prefix separator Slash
+
+  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
+  // documented form in various unofficial manuals, so a lot of code uses it.
   if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
       Operands.size() == 3) {
     X86Operand &Op = *(X86Operand*)Operands.back();
@@ -829,76 +776,80 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
     }
   }
   
-  // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
-  // "f{mul*,add*,sub*,div*} $op"
-  if ((Name.startswith("fmul") || Name.startswith("fadd") ||
-       Name.startswith("fsub") || Name.startswith("fdiv")) &&
-      Operands.size() == 3 &&
-      static_cast<X86Operand*>(Operands[2])->isReg() &&
-      static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
-    delete Operands[2];
-    Operands.erase(Operands.begin() + 2);
-  }
-
-  // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
-  // B".
-  if (Name.startswith("imul") && Operands.size() == 3 &&
-      static_cast<X86Operand*>(Operands[1])->isImm() &&
-      static_cast<X86Operand*>(Operands.back())->isReg()) {
-    X86Operand *Op = static_cast<X86Operand*>(Operands.back());
-    Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
-                                             Op->getEndLoc()));
-  }
-
-  return false;
-}
-
-bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
-  StringRef IDVal = DirectiveID.getIdentifier();
-  if (IDVal == ".word")
-    return ParseDirectiveWord(2, DirectiveID.getLoc());
-  return true;
-}
-
-/// ParseDirectiveWord
-///  ::= .word [ expression (, expression)* ]
-bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      const MCExpr *Value;
-      if (getParser().ParseExpression(Value))
-        return true;
-
-      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      // FIXME: Improve diagnostic.
-      if (getLexer().isNot(AsmToken::Comma))
-        return Error(L, "unexpected token in directive");
-      Parser.Lex();
+  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
+  // "shift <op>".
+  if ((Name.startswith("shr") || Name.startswith("sar") ||
+       Name.startswith("shl") || Name.startswith("sal") ||
+       Name.startswith("rcl") || Name.startswith("rcr") ||
+       Name.startswith("rol") || Name.startswith("ror")) &&
+      Operands.size() == 3) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+      delete Operands[1];
+      Operands.erase(Operands.begin() + 1);
     }
   }
 
-  Parser.Lex();
   return false;
 }
 
-
-bool
-X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
-                                  const SmallVectorImpl<MCParsedAsmOperand*>
-                                    &Operands,
-                                  MCInst &Inst) {
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
-
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
 
+  // First, handle aliases that expand to multiple instructions.
+  // FIXME: This should be replaced with a real .td file alias mechanism.
+  // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+  // call.
+  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+      Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+      Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+    MCInst Inst;
+    Inst.setOpcode(X86::WAIT);
+    Out.EmitInstruction(Inst);
+
+    const char *Repl =
+      StringSwitch<const char*>(Op->getToken())
+        .Case("finit",  "fninit")
+        .Case("fsave",  "fnsave")
+        .Case("fstcw",  "fnstcw")
+        .Case("fstcww",  "fnstcw")
+        .Case("fstenv", "fnstenv")
+        .Case("fstsw",  "fnstsw")
+        .Case("fstsww", "fnstsw")
+        .Case("fclex",  "fnclex")
+        .Default(0);
+    assert(Repl && "Unknown wait-prefixed instruction");
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+  }
+
+  bool WasOriginallyInvalidOperand = false;
+  unsigned OrigErrorInfo;
+  MCInst Inst;
+
   // First, try a direct match.
-  if (!MatchInstructionImpl(Operands, Inst))
+  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
     return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  case Match_InvalidOperand:
+    WasOriginallyInvalidOperand = true;
+    break;
+  case Match_MnemonicFail:
+    break;
+  }
 
   // FIXME: Ideally, we would only attempt suffix matches for things which are
   // valid prefixes, and we could just infer the right unambiguous
@@ -912,15 +863,26 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
   Tmp += ' ';
   Op->setTokenValue(Tmp.str());
 
+  // If this instruction starts with an 'f', then it is a floating point stack
+  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
+  // 80-bit floating point, which use the suffixes s,l,t respectively.
+  //
+  // Otherwise, we assume that this may be an integer instruction, which comes
+  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+  
   // Check for the various suffix matches.
-  Tmp[Base.size()] = 'b';
-  bool MatchB = MatchInstructionImpl(Operands, Inst);
-  Tmp[Base.size()] = 'w';
-  bool MatchW = MatchInstructionImpl(Operands, Inst);
-  Tmp[Base.size()] = 'l';
-  bool MatchL = MatchInstructionImpl(Operands, Inst);
-  Tmp[Base.size()] = 'q';
-  bool MatchQ = MatchInstructionImpl(Operands, Inst);
+  Tmp[Base.size()] = Suffixes[0];
+  unsigned ErrorInfoIgnore;
+  MatchResultTy Match1, Match2, Match3, Match4;
+  
+  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[1];
+  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[2];
+  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[3];
+  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
 
   // Restore the old token.
   Op->setTokenValue(Base);
@@ -928,24 +890,25 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
   // If exactly one matched, then we treat that as a successful match (and the
   // instruction will already have been filled in correctly, since the failing
   // matches won't have modified it).
-  if (MatchB + MatchW + MatchL + MatchQ == 3)
+  unsigned NumSuccessfulMatches =
+    (Match1 == Match_Success) + (Match2 == Match_Success) +
+    (Match3 == Match_Success) + (Match4 == Match_Success);
+  if (NumSuccessfulMatches == 1) {
+    Out.EmitInstruction(Inst);
     return false;
+  }
 
-  // Otherwise, the match failed.
+  // Otherwise, the match failed, try to produce a decent error message.
 
   // If we had multiple suffix matches, then identify this as an ambiguous
   // match.
-  if (MatchB + MatchW + MatchL + MatchQ != 4) {
+  if (NumSuccessfulMatches > 1) {
     char MatchChars[4];
     unsigned NumMatches = 0;
-    if (!MatchB)
-      MatchChars[NumMatches++] = 'b';
-    if (!MatchW)
-      MatchChars[NumMatches++] = 'w';
-    if (!MatchL)
-      MatchChars[NumMatches++] = 'l';
-    if (!MatchQ)
-      MatchChars[NumMatches++] = 'q';
+    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
 
     SmallString<126> Msg;
     raw_svector_ostream OS(Msg);
@@ -959,14 +922,90 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
     }
     OS << ")";
     Error(IDLoc, OS.str());
-  } else {
-    // FIXME: We should give nicer diagnostics about the exact failure.
-    Error(IDLoc, "unrecognized instruction");
+    return true;
   }
 
+  // Okay, we know that none of the variants matched successfully.
+
+  // If all of the instructions reported an invalid mnemonic, then the original
+  // mnemonic was invalid.
+  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+    if (!WasOriginallyInvalidOperand) {
+      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+      return true;
+    }
+
+    // Recover location info for the operand if we know which was the problem.
+    SMLoc ErrorLoc = IDLoc;
+    if (OrigErrorInfo != ~0U) {
+      if (OrigErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  // If one instruction matched with a missing feature, report this as a
+  // missing feature.
+  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  }
+
+  // If one instruction matched with an invalid operand, report this as an
+  // operand failure.
+  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+    Error(IDLoc, "invalid operand for instruction");
+    return true;
+  }
+
+  // If all of these were an outright failure, report it in a useless way.
+  // FIXME: We should give nicer diagnostics about the exact failure.
+  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+  return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
   return true;
 }
 
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+      
+      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+      
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+      
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+  
+  Parser.Lex();
+  return false;
+}
+
+
+
 
 extern "C" void LLVMInitializeX86AsmLexer();
 
@@ -977,4 +1016,6 @@ extern "C" void LLVMInitializeX86AsmParser() {
   LLVMInitializeX86AsmLexer();
 }
 
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
 #include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 033973eeeff9..000000000000
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMX86AsmPrinter
-  X86ATTInstPrinter.cpp
-  X86IntelInstPrinter.cpp
-  X86InstComments.cpp
-  )
-add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
deleted file mode 100644
index c82aa330a20c..000000000000
--- a/lib/Target/X86/AsmPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMX86AsmPrinter
-
-# Hack: we need to include 'main' x86 target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
deleted file mode 100644
index 554b96c96e0e..000000000000
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code for rendering MCInst instances as AT&T-style
-// assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "X86ATTInstPrinter.h"
-#include "X86InstComments.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
-#include "X86GenInstrNames.inc"
-using namespace llvm;
-
-// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
-#define GET_INSTRUCTION_NAME
-#include "X86GenAsmWriter.inc"
-#undef MachineInstr
-
-void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
-  printInstruction(MI, OS);
-  
-  // If verbose assembly is enabled, we can print some informative comments.
-  if (CommentStream)
-    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
-}
-StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
-}
-
-
-void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
-                                   raw_ostream &O) {
-  switch (MI->getOperand(Op).getImm()) {
-  default: assert(0 && "Invalid ssecc argument!");
-  case 0: O << "eq"; break;
-  case 1: O << "lt"; break;
-  case 2: O << "le"; break;
-  case 3: O << "unord"; break;
-  case 4: O << "neq"; break;
-  case 5: O << "nlt"; break;
-  case 6: O << "nle"; break;
-  case 7: O << "ord"; break;
-  }
-}
-
-/// print_pcrel_imm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value (e.g. for jumps and calls).  These
-/// print slightly differently than normal immediates.  For example, a $ is not
-/// emitted.
-void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm())
-    // Print this as a signed 32-bit value.
-    O << (int)Op.getImm();
-  else {
-    assert(Op.isExpr() && "unknown pcrel immediate operand");
-    O << *Op.getExpr();
-  }
-}
-
-void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                     raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    O << '%' << getRegisterName(Op.getReg());
-  } else if (Op.isImm()) {
-    O << '$' << Op.getImm();
-    
-    if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
-      *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
-    
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << '$' << *Op.getExpr();
-  }
-}
-
-void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
-                                          raw_ostream &O) {
-  const MCOperand &BaseReg  = MI->getOperand(Op);
-  const MCOperand &IndexReg = MI->getOperand(Op+2);
-  const MCOperand &DispSpec = MI->getOperand(Op+3);
-  const MCOperand &SegReg = MI->getOperand(Op+4);
-  
-  // If this has a segment register, print it.
-  if (SegReg.getReg()) {
-    printOperand(MI, Op+4, O);
-    O << ':';
-  }
-  
-  if (DispSpec.isImm()) {
-    int64_t DispVal = DispSpec.getImm();
-    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
-      O << DispVal;
-  } else {
-    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
-    O << *DispSpec.getExpr();
-  }
-  
-  if (IndexReg.getReg() || BaseReg.getReg()) {
-    O << '(';
-    if (BaseReg.getReg())
-      printOperand(MI, Op, O);
-    
-    if (IndexReg.getReg()) {
-      O << ',';
-      printOperand(MI, Op+2, O);
-      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
-      if (ScaleVal != 1)
-        O << ',' << ScaleVal;
-    }
-    O << ')';
-  }
-}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
deleted file mode 100644
index eb986643014c..000000000000
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an X86 MCInst to AT&T style .s file syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_ATT_INST_PRINTER_H
-#define X86_ATT_INST_PRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-  class MCOperand;
-  
-class X86ATTInstPrinter : public MCInstPrinter {
-public:
-  X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
-
-  
-  virtual void printInst(const MCInst *MI, raw_ostream &OS);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &OS);
-  static const char *getRegisterName(unsigned RegNo);
-  static const char *getInstructionName(unsigned Opcode);
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-  
-  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  
-  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-};
-  
-}
-
-#endif
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
deleted file mode 100644
index da9d5a3579e5..000000000000
--- a/lib/Target/X86/AsmPrinter/X86InstComments.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines functionality used to emit comments about X86 instructions to
-// an output stream for -fverbose-asm.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86InstComments.h"
-#include "X86GenInstrNames.inc"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/raw_ostream.h"
-#include "../X86ShuffleDecode.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Top Level Entrypoint
-//===----------------------------------------------------------------------===//
-
-/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
-/// newline terminated strings to the specified string if desired.  This
-/// information is shown in disassembly dumps when verbose assembly is enabled.
-void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
-                                  const char *(*getRegName)(unsigned)) {
-  // If this is a shuffle operation, the switch should fill in this state.
-  SmallVector<unsigned, 8> ShuffleMask;
-  const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
-
-  switch (MI->getOpcode()) {
-  case X86::INSERTPSrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
-    break;
-
-  case X86::MOVLHPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVLHPSMask(2, ShuffleMask);
-    break;
-
-  case X86::MOVHLPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVHLPSMask(2, ShuffleMask);
-    break;
-
-  case X86::PSHUFDri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::PSHUFDmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
-                    ShuffleMask);
-    break;
-
-  case X86::PSHUFHWri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::PSHUFHWmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
-                      ShuffleMask);
-    break;
-  case X86::PSHUFLWri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::PSHUFLWmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
-                      ShuffleMask);
-    break;
-
-  case X86::PUNPCKHBWrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHBWrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(16, ShuffleMask);
-    break;
-  case X86::PUNPCKHWDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHWDrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(8, ShuffleMask);
-    break;
-  case X86::PUNPCKHDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(4, ShuffleMask);
-    break;
-  case X86::PUNPCKHQDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHQDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(2, ShuffleMask);
-    break;
-
-  case X86::PUNPCKLBWrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLBWrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(16, ShuffleMask);
-    break;
-  case X86::PUNPCKLWDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLWDrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(8, ShuffleMask);
-    break;
-  case X86::PUNPCKLDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(4, ShuffleMask);
-    break;
-  case X86::PUNPCKLQDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLQDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLMask(2, ShuffleMask);
-    break;
-
-  case X86::SHUFPDrri:
-    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    break;
-
-  case X86::SHUFPSrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::SHUFPSrmi:
-    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::UNPCKLPDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKLPDrm:
-    DecodeUNPCKLPMask(2, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::UNPCKLPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKLPSrm:
-    DecodeUNPCKLPMask(4, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::UNPCKHPDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKHPDrm:
-    DecodeUNPCKHPMask(2, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::UNPCKHPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKHPSrm:
-    DecodeUNPCKHPMask(4, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
-  }
-
-
-  // If this was a shuffle operation, print the shuffle mask.
-  if (!ShuffleMask.empty()) {
-    if (DestName == 0) DestName = Src1Name;
-    OS << (DestName ? DestName : "mem") << " = ";
-
-    // If the two sources are the same, canonicalize the input elements to be
-    // from the first src so that we get larger element spans.
-    if (Src1Name == Src2Name) {
-      for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
-        if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
-            ShuffleMask[i] >= e)        // From second mask.
-          ShuffleMask[i] -= e;
-      }
-    }
-
-    // The shuffle mask specifies which elements of the src1/src2 fill in the
-    // destination, with a few sentinel values.  Loop through and print them
-    // out.
-    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
-      if (i != 0)
-        OS << ',';
-      if (ShuffleMask[i] == SM_SentinelZero) {
-        OS << "zero";
-        continue;
-      }
-
-      // Otherwise, it must come from src1 or src2.  Print the span of elements
-      // that comes from this src.
-      bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
-      const char *SrcName = isSrc1 ? Src1Name : Src2Name;
-      OS << (SrcName ? SrcName : "mem") << '[';
-      bool IsFirst = true;
-      while (i != e &&
-             (int)ShuffleMask[i] >= 0 &&
-             (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
-        if (!IsFirst)
-          OS << ',';
-        else
-          IsFirst = false;
-        OS << ShuffleMask[i] % ShuffleMask.size();
-        ++i;
-      }
-      OS << ']';
-      --i;  // For loop increments element #.
-    }
-    //MI->print(OS, 0);
-    OS << "\n";
-  }
-
-}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h
deleted file mode 100644
index 6b86db4f9e5c..000000000000
--- a/lib/Target/X86/AsmPrinter/X86InstComments.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines functionality used to emit comments about X86 instructions to
-// an output stream for -fverbose-asm.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_INST_COMMENTS_H
-#define X86_INST_COMMENTS_H
-
-namespace llvm {
-  class MCInst;
-  class raw_ostream;
-  void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
-                              const char *(*getRegName)(unsigned));
-}
-
-#endif
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
deleted file mode 100644
index 5625b0ea618f..000000000000
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code for rendering MCInst instances as AT&T-style
-// assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "X86IntelInstPrinter.h"
-#include "X86InstComments.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "X86GenInstrNames.inc"
-using namespace llvm;
-
-// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
-#define GET_INSTRUCTION_NAME
-#include "X86GenAsmWriter1.inc"
-#undef MachineInstr
-
-void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
-  printInstruction(MI, OS);
-  
-  // If verbose assembly is enabled, we can print some informative comments.
-  if (CommentStream)
-    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
-}
-StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
-}
-
-void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
-                                     raw_ostream &O) {
-  switch (MI->getOperand(Op).getImm()) {
-  default: assert(0 && "Invalid ssecc argument!");
-  case 0: O << "eq"; break;
-  case 1: O << "lt"; break;
-  case 2: O << "le"; break;
-  case 3: O << "unord"; break;
-  case 4: O << "neq"; break;
-  case 5: O << "nlt"; break;
-  case 6: O << "nle"; break;
-  case 7: O << "ord"; break;
-  }
-}
-
-/// print_pcrel_imm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value.
-void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm())
-    O << Op.getImm();
-  else {
-    assert(Op.isExpr() && "unknown pcrel immediate operand");
-    O << *Op.getExpr();
-  }
-}
-
-static void PrintRegName(raw_ostream &O, StringRef RegName) {
-  for (unsigned i = 0, e = RegName.size(); i != e; ++i)
-    O << (char)toupper(RegName[i]);
-}
-
-void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    PrintRegName(O, getRegisterName(Op.getReg()));
-  } else if (Op.isImm()) {
-    O << Op.getImm();
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << *Op.getExpr();
-  }
-}
-
-void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
-                                            raw_ostream &O) {
-  const MCOperand &BaseReg  = MI->getOperand(Op);
-  unsigned ScaleVal         = MI->getOperand(Op+1).getImm();
-  const MCOperand &IndexReg = MI->getOperand(Op+2);
-  const MCOperand &DispSpec = MI->getOperand(Op+3);
-  const MCOperand &SegReg   = MI->getOperand(Op+4);
-  
-  // If this has a segment register, print it.
-  if (SegReg.getReg()) {
-    printOperand(MI, Op+4, O);
-    O << ':';
-  }
-  
-  O << '[';
-  
-  bool NeedPlus = false;
-  if (BaseReg.getReg()) {
-    printOperand(MI, Op, O);
-    NeedPlus = true;
-  }
-  
-  if (IndexReg.getReg()) {
-    if (NeedPlus) O << " + ";
-    if (ScaleVal != 1)
-      O << ScaleVal << '*';
-    printOperand(MI, Op+2, O);
-    NeedPlus = true;
-  }
-  
-  
-  if (!DispSpec.isImm()) {
-    if (NeedPlus) O << " + ";
-    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
-    O << *DispSpec.getExpr();
-  } else {
-    int64_t DispVal = DispSpec.getImm();
-    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
-      if (NeedPlus) {
-        if (DispVal > 0)
-          O << " + ";
-        else {
-          O << " - ";
-          DispVal = -DispVal;
-        }
-      }
-      O << DispVal;
-    }
-  }
-  
-  O << ']';
-}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
deleted file mode 100644
index 6f120322742b..000000000000
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ /dev/null
@@ -1,95 +0,0 @@
-//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an X86 MCInst to intel style .s file syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_INTEL_INST_PRINTER_H
-#define X86_INTEL_INST_PRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-  class MCOperand;
-  
-class X86IntelInstPrinter : public MCInstPrinter {
-public:
-  X86IntelInstPrinter(const MCAsmInfo &MAI)
-    : MCInstPrinter(MAI) {}
-  
-  virtual void printInst(const MCInst *MI, raw_ostream &OS);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
-  
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-  static const char *getInstructionName(unsigned Opcode);
-
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  
-  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "OPAQUE PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  
-  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "BYTE PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "WORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "DWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "QWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "XMMWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "YMMWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "DWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "QWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "XWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "XMMWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "YMMWORD PTR ";
-    printMemReference(MI, OpNo, O);
-  }
-};
-  
-}
-
-#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index e9399f5c8322..b5fa94f12bc7 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -24,10 +24,12 @@ set(sources
   X86ELFWriterInfo.cpp
   X86FastISel.cpp
   X86FloatingPoint.cpp
+  X86FrameLowering.cpp
   X86ISelDAGToDAG.cpp
   X86ISelLowering.cpp
   X86InstrInfo.cpp
   X86JITInfo.cpp
+  X86MachObjectWriter.cpp
   X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp 
   X86MCInstLower.cpp
@@ -39,14 +41,24 @@ set(sources
   )
 
 if( CMAKE_CL_64 )
+  # A workaround for a bug in cmake 2.8.3. See PR 8885.
+  if( CMAKE_VERSION STREQUAL "2.8.3" )
+    include(CMakeDetermineCompilerId)
+  endif()
+  # end of workaround.
   enable_language(ASM_MASM)
   ADD_CUSTOM_COMMAND(
     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+    MAIN_DEPENDENCY X86CompilationCallback_Win64.asm
     COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
    )
    set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
 endif()
 
 add_llvm_target(X86CodeGen ${sources})
 
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 97589c00515b..972a0d9e7e03 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMX86Disassembler
   X86DisassemblerDecoder.c
   )
 # workaround for hanging compilation on MSVC9 and 10
-if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
 set_property(
   SOURCE X86Disassembler.cpp
   PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 09f1584ce4d9..691e2d7204ab 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -157,9 +157,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
 /// @param immediate    - The immediate value to append.
 /// @param operand      - The operand, as stored in the descriptor table.
 /// @param insn         - The internal instruction.
-static void translateImmediate(MCInst &mcInst, 
-                               uint64_t immediate, 
-                               OperandSpecifier &operand,
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+                               const OperandSpecifier &operand,
                                InternalInstruction &insn) {
   // Sign-extend the immediate if necessary.
 
@@ -392,9 +391,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
 ///                       from.
 /// @return             - 0 on success; nonzero otherwise
-static bool translateRM(MCInst &mcInst,
-                       OperandSpecifier &operand,
-                       InternalInstruction &insn) {
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+                        InternalInstruction &insn) {
   switch (operand.type) {
   default:
     debug("Unexpected type for a R/M operand");
@@ -461,9 +459,8 @@ static bool translateFPRegister(MCInst &mcInst,
 /// @param operand      - The operand, as stored in the descriptor table.
 /// @param insn         - The internal instruction.
 /// @return             - false on success; true otherwise.
-static bool translateOperand(MCInst &mcInst,
-                            OperandSpecifier &operand,
-                            InternalInstruction &insn) {
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+                             InternalInstruction &insn) {
   switch (operand.encoding) {
   default:
     debug("Unhandled operand encoding during translation");
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 9c542628d709..550cf9d40de2 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -78,7 +78,7 @@
   const char*             name;
 
 #define INSTRUCTION_IDS               \
-  InstrUID*  instructionIDs;
+  const InstrUID *instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
 
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 6c3ff6bd4a55..b6546fc9e86c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -27,12 +27,6 @@
 
 typedef int8_t bool;
 
-#ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
-#else
-#define NORETURN
-#endif
-
 #ifndef NDEBUG
 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
 #else
@@ -103,7 +97,7 @@ static InstrUID decode(OpcodeType type,
                        InstructionContext insnContext,
                        uint8_t opcode,
                        uint8_t modRM) {
-  struct ModRMDecision* dec;
+  const struct ModRMDecision* dec;
   
   switch (type) {
   default:
@@ -147,7 +141,7 @@ static InstrUID decode(OpcodeType type,
  *              decode(); specifierForUID will not check bounds.
  * @return    - A pointer to the specification for that instruction.
  */
-static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
   return &INSTRUCTIONS_SYM[uid];
 }
 
@@ -296,7 +290,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
   BOOL isPrefix = TRUE;
   BOOL prefixGroups[4] = { FALSE };
   uint64_t prefixLocation;
-  uint8_t byte;
+  uint8_t byte = 0;
   
   BOOL hasAdSize = FALSE;
   BOOL hasOpSize = FALSE;
@@ -394,6 +388,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
     }
   } else {
     unconsumeByte(insn);
+    insn->necessaryPrefixLocation = insn->readerCursor - 1;
   }
   
   if (insn->mode == MODE_16BIT) {
@@ -405,7 +400,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
     insn->registerSize       = (hasOpSize ? 2 : 4);
     insn->addressSize        = (hasAdSize ? 2 : 4);
     insn->displacementSize   = (hasAdSize ? 2 : 4);
-    insn->immediateSize      = (hasAdSize ? 2 : 4);
+    insn->immediateSize      = (hasOpSize ? 2 : 4);
   } else if (insn->mode == MODE_64BIT) {
     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
       insn->registerSize       = 8;
@@ -517,7 +512,8 @@ static int getIDWithAttrMask(uint16_t* instructionID,
                                     insn->opcode);
   
   if (hasModRMExtension) {
-    readModRM(insn);
+    if (readModRM(insn))
+      return -1;
     
     *instructionID = decode(insn->opcodeType,
                             instructionClass,
@@ -632,9 +628,9 @@ static int getID(struct InternalInstruction* insn) {
      * instead of F2 changes a 32 to a 64, we adopt the new encoding.
      */
     
-    struct InstructionSpecifier* spec;
+    const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithREXw;
-    struct InstructionSpecifier* specWithREXw;
+    const struct InstructionSpecifier *specWithREXw;
     
     spec = specifierForUID(instructionID);
     
@@ -672,9 +668,9 @@ static int getID(struct InternalInstruction* insn) {
      * in the right place we check if there's a 16-bit operation.
      */
     
-    struct InstructionSpecifier* spec;
+    const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithOpsize;
-    struct InstructionSpecifier* specWithOpsize;
+    const struct InstructionSpecifier *specWithOpsize;
     
     spec = specifierForUID(instructionID);
     
@@ -866,7 +862,8 @@ static int readModRM(struct InternalInstruction* insn) {
   if (insn->consumedModRM)
     return 0;
   
-  consumeByte(insn, &insn->modRM);
+  if (consumeByte(insn, &insn->modRM))
+    return -1;
   insn->consumedModRM = TRUE;
   
   mod     = modFromModRM(insn->modRM);
@@ -1067,7 +1064,7 @@ GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
  *                invalid for its class.
  */
 static int fixupReg(struct InternalInstruction *insn, 
-                    struct OperandSpecifier *op) {
+                    const struct OperandSpecifier *op) {
   uint8_t valid;
   
   dbgprintf(insn, "fixupReg()");
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 28ba86b03fe8..4f4fbcdd394c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -24,7 +24,7 @@ extern "C" {
   const char*             name;
 
 #define INSTRUCTION_IDS     \
-  InstrUID*  instructionIDs;
+  const InstrUID *instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
   
@@ -423,7 +423,7 @@ struct InternalInstruction {
   /* The instruction ID, extracted from the decode table */
   uint16_t instructionID;
   /* The specifier for the instruction, from the instruction info table */
-  struct InstructionSpecifier* spec;
+  const struct InstructionSpecifier *spec;
   
   /* state for additional bytes, consumed during operand decode.  Pattern:
      consumed___ indicates that the byte was already consumed and does not
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 0f33f525dc2c..1425b86ba53f 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -22,7 +22,7 @@
 #ifndef X86DISASSEMBLERDECODERCOMMON_H
 #define X86DISASSEMBLERDECODERCOMMON_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 #define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
 #define CONTEXTS_SYM      x86DisassemblerContexts
@@ -248,6 +248,7 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
   ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
   ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
+  ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
   ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
   ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
   ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..033973eeeff9
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmPrinter
+  X86ATTInstPrinter.cpp
+  X86IntelInstPrinter.cpp
+  X86InstComments.cpp
+  )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/InstPrinter/Makefile b/lib/Target/X86/InstPrinter/Makefile
new file mode 100644
index 000000000000..c82aa330a20c
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..d6950f49f824
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,127 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter.inc"
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+  printInstruction(MI, OS);
+  
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+                                   raw_ostream &O) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls).  These
+/// print slightly differently than normal immediates.  For example, a $ is not
+/// emitted.
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    // Print this as a signed 32-bit value.
+    O << (int)Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << '%' << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '$' << Op.getImm();
+    
+    if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+      *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+    
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '$' << *Op.getExpr();
+  }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                          raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  const MCOperand &SegReg = MI->getOperand(Op+4);
+  
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+4, O);
+    O << ':';
+  }
+  
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << DispVal;
+  } else {
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    O << *DispSpec.getExpr();
+  }
+  
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op, O);
+    
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2, O);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
new file mode 100644
index 000000000000..eb986643014c
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,81 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+  
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+  X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+  
+  virtual void printInst(const MCInst *MI, raw_ostream &OS);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+};
+  
+}
+
+#endif
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
new file mode 100644
index 000000000000..12144e3f5056
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -0,0 +1,232 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "X86GenInstrNames.inc"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../Utils/X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired.  This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                                  const char *(*getRegName)(unsigned)) {
+  // If this is a shuffle operation, the switch should fill in this state.
+  SmallVector<unsigned, 8> ShuffleMask;
+  const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+  switch (MI->getOpcode()) {
+  case X86::INSERTPSrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+    break;
+
+  case X86::MOVLHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVLHPSMask(2, ShuffleMask);
+    break;
+
+  case X86::MOVHLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVHLPSMask(2, ShuffleMask);
+    break;
+
+  case X86::PSHUFDri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFDmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    break;
+
+  case X86::PSHUFHWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFHWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+  case X86::PSHUFLWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFLWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+
+  case X86::PUNPCKHBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHBWrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(16, ShuffleMask);
+    break;
+  case X86::PUNPCKHWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHWDrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(8, ShuffleMask);
+    break;
+  case X86::PUNPCKHDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(4, ShuffleMask);
+    break;
+  case X86::PUNPCKHQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHQDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(2, ShuffleMask);
+    break;
+
+  case X86::PUNPCKLBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLBWrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLMask(16, ShuffleMask);
+    break;
+  case X86::PUNPCKLWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLWDrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLMask(8, ShuffleMask);
+    break;
+  case X86::PUNPCKLDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLMask(4, ShuffleMask);
+    break;
+  case X86::PUNPCKLQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLQDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLMask(2, ShuffleMask);
+    break;
+
+  case X86::SHUFPDrri:
+    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    break;
+
+  case X86::SHUFPSrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::SHUFPSrmi:
+    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::UNPCKLPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKLPDrm:
+    DecodeUNPCKLPMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::UNPCKLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKLPSrm:
+    DecodeUNPCKLPMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::UNPCKHPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKHPDrm:
+    DecodeUNPCKHPMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::UNPCKHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKHPSrm:
+    DecodeUNPCKHPMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  }
+
+
+  // If this was a shuffle operation, print the shuffle mask.
+  if (!ShuffleMask.empty()) {
+    if (DestName == 0) DestName = Src1Name;
+    OS << (DestName ? DestName : "mem") << " = ";
+
+    // If the two sources are the same, canonicalize the input elements to be
+    // from the first src so that we get larger element spans.
+    if (Src1Name == Src2Name) {
+      for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+        if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+            ShuffleMask[i] >= e)        // From second mask.
+          ShuffleMask[i] -= e;
+      }
+    }
+
+    // The shuffle mask specifies which elements of the src1/src2 fill in the
+    // destination, with a few sentinel values.  Loop through and print them
+    // out.
+    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+      if (i != 0)
+        OS << ',';
+      if (ShuffleMask[i] == SM_SentinelZero) {
+        OS << "zero";
+        continue;
+      }
+
+      // Otherwise, it must come from src1 or src2.  Print the span of elements
+      // that comes from this src.
+      bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+      const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+      OS << (SrcName ? SrcName : "mem") << '[';
+      bool IsFirst = true;
+      while (i != e &&
+             (int)ShuffleMask[i] >= 0 &&
+             (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+        if (!IsFirst)
+          OS << ',';
+        else
+          IsFirst = false;
+        OS << ShuffleMask[i] % ShuffleMask.size();
+        ++i;
+      }
+      OS << ']';
+      --i;  // For loop increments element #.
+    }
+    //MI->print(OS, 0);
+    OS << "\n";
+  }
+
+}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
new file mode 100644
index 000000000000..6b86db4f9e5c
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+  class MCInst;
+  class raw_ostream;
+  void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                              const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..048452985089
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,139 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
+#include <cctype>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+  printInstruction(MI, OS);
+  
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+                                     raw_ostream &O) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+  for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+    O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    PrintRegName(O, getRegisterName(Op.getReg()));
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                            raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  unsigned ScaleVal         = MI->getOperand(Op+1).getImm();
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  const MCOperand &SegReg   = MI->getOperand(Op+4);
+  
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+4, O);
+    O << ':';
+  }
+  
+  O << '[';
+  
+  bool NeedPlus = false;
+  if (BaseReg.getReg()) {
+    printOperand(MI, Op, O);
+    NeedPlus = true;
+  }
+  
+  if (IndexReg.getReg()) {
+    if (NeedPlus) O << " + ";
+    if (ScaleVal != 1)
+      O << ScaleVal << '*';
+    printOperand(MI, Op+2, O);
+    NeedPlus = true;
+  }
+  
+  
+  if (!DispSpec.isImm()) {
+    if (NeedPlus) O << " + ";
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    O << *DispSpec.getExpr();
+  } else {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+      if (NeedPlus) {
+        if (DispVal > 0)
+          O << " + ";
+        else {
+          O << " - ";
+          DispVal = -DispVal;
+        }
+      }
+      O << DispVal;
+    }
+  }
+  
+  O << ']';
+}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
new file mode 100644
index 000000000000..6f120322742b
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,95 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  class MCOperand;
+  
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+  X86IntelInstPrinter(const MCAsmInfo &MAI)
+    : MCInstPrinter(MAI) {}
+  
+  virtual void printInst(const MCInst *MI, raw_ostream &OS);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "OPAQUE PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "BYTE PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "WORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+};
+  
+}
+
+#endif
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index f4ff894a2af7..12fb090d4dce 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -20,6 +20,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
                 X86GenCallingConv.inc X86GenSubtarget.inc \
 		X86GenEDInfo.inc
 
-DIRS = AsmPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index f96b22f1e204..f16ec029e96a 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) {
 //===---------------------------------------------------------------------===//
 
 SSE has instructions for doing operations on complex numbers, we should pattern
-match them.  Compiling this:
+match them.   For example, this should turn into a horizontal add:
+
+typedef float __attribute__((vector_size(16))) v4f32;
+float f32(v4f32 A) {
+  return A[0]+A[1]+A[2]+A[3];
+}
+
+Instead we get this:
+
+_f32:                                   ## @f32
+	pshufd	$1, %xmm0, %xmm1        ## xmm1 = xmm0[1,0,0,0]
+	addss	%xmm0, %xmm1
+	pshufd	$3, %xmm0, %xmm2        ## xmm2 = xmm0[3,0,0,0]
+	movhlps	%xmm0, %xmm0            ## xmm0 = xmm0[1,1]
+	movaps	%xmm0, %xmm3
+	addss	%xmm1, %xmm3
+	movdqa	%xmm2, %xmm0
+	addss	%xmm3, %xmm0
+	ret
+
+Also, there are cases where some simple local SLP would improve codegen a bit.
+compiling this:
 
 _Complex float f32(_Complex float A, _Complex float B) {
   return A+B;
@@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) {
 
 into:
 
-_f32:
+_f32:                                   ## @f32
 	movdqa	%xmm0, %xmm2
 	addss	%xmm1, %xmm2
-	pshufd	$16, %xmm2, %xmm2
-	pshufd	$1, %xmm1, %xmm1
-	pshufd	$1, %xmm0, %xmm0
-	addss	%xmm1, %xmm0
-	pshufd	$16, %xmm0, %xmm1
-	movdqa	%xmm2, %xmm0
-	unpcklps	%xmm1, %xmm0
+	pshufd	$1, %xmm1, %xmm1        ## xmm1 = xmm1[1,0,0,0]
+	pshufd	$1, %xmm0, %xmm3        ## xmm3 = xmm0[1,0,0,0]
+	addss	%xmm1, %xmm3
+	movaps	%xmm2, %xmm0
+	unpcklps	%xmm3, %xmm0    ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
 	ret
 
-seems silly. 
+seems silly when it could just be one addps.
 
 
 //===---------------------------------------------------------------------===//
@@ -904,4 +923,15 @@ The insertps's of $0 are pointless complex copies.
 
 //===---------------------------------------------------------------------===//
 
+If SSE4.1 is available we should inline rounding functions instead of emitting
+a libcall.
+
+floor: roundsd $0x01, %xmm, %xmm
+ceil:  roundsd $0x02, %xmm, %xmm
 
+and likewise for the single precision versions.
+
+Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
+corresponding nodes and some targets (including X86) aren't ready for them.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index 78c4dc00ee72..e21d69a7bcbf 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -41,50 +41,6 @@ saved a few instructions.
 
 //===---------------------------------------------------------------------===//
 
-Poor codegen:
-
-int X[2];
-int b;
-void test(void) {
-  memset(X, b, 2*sizeof(X[0]));
-}
-
-llc:
-	movq _b@GOTPCREL(%rip), %rax
-	movzbq (%rax), %rax
-	movq %rax, %rcx
-	shlq $8, %rcx
-	orq %rax, %rcx
-	movq %rcx, %rax
-	shlq $16, %rax
-	orq %rcx, %rax
-	movq %rax, %rcx
-	shlq $32, %rcx
-	movq _X@GOTPCREL(%rip), %rdx
-	orq %rax, %rcx
-	movq %rcx, (%rdx)
-	ret
-
-gcc:
-	movq	_b@GOTPCREL(%rip), %rax
-	movabsq	$72340172838076673, %rdx
-	movzbq	(%rax), %rax
-	imulq	%rdx, %rax
-	movq	_X@GOTPCREL(%rip), %rdx
-	movq	%rax, (%rdx)
-	ret
-
-And the codegen is even worse for the following
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
-  void fill1(char *s, int a)
-  {
-    __builtin_memset(s, a, 15);
-  }
-
-For this version, we duplicate the computation of the constant to store.
-
-//===---------------------------------------------------------------------===//
-
 It's not possible to reference AH, BH, CH, and DH registers in an instruction
 requiring REX prefix. However, divb and mulb both produce results in AH. If isel
 emits a CopyFromReg which gets turned into a movb and that can be allocated a
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index a305ae6ec550..c10e1709f667 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -67,19 +67,6 @@ cmovs, we should expand to a conditional branch like GCC produces.
 
 //===---------------------------------------------------------------------===//
 
-Compile this:
-_Bool f(_Bool a) { return a!=1; }
-
-into:
-        movzbl  %dil, %eax
-        xorl    $1, %eax
-        ret
-
-(Although note that this isn't a legal way to express the code that llvm-gcc
-currently generates for that function.)
-
-//===---------------------------------------------------------------------===//
-
 Some isel ideas:
 
 1. Dynamic programming based approach when compile time if not an
@@ -109,6 +96,37 @@ It appears icc use push for parameter passing. Need to investigate.
 
 //===---------------------------------------------------------------------===//
 
+This:
+
+void foo(void);
+void bar(int x, int *P) { 
+  x >>= 2;
+  if (x) 
+    foo();
+  *P = x;
+}
+
+compiles into:
+
+	movq	%rsi, %rbx
+	movl	%edi, %r14d
+	sarl	$2, %r14d
+	testl	%r14d, %r14d
+	je	LBB0_2
+
+Instead of doing an explicit test, we can use the flags off the sar.  This
+occurs in a bigger testcase like this, which is pretty common:
+
+#include <vector>
+int test1(std::vector<int> &X) {
+  int Sum = 0;
+  for (long i = 0, e = X.size(); i != e; ++i)
+    X[i] = 0;
+  return Sum;
+}
+
+//===---------------------------------------------------------------------===//
+
 Only use inc/neg/not instructions on processors where they are faster than
 add/sub/xor.  They are slower on the P4 due to only updating some processor
 flags.
@@ -394,72 +412,8 @@ boundary to improve performance.
 
 //===---------------------------------------------------------------------===//
 
-Codegen:
-
-int f(int a, int b) {
-  if (a == 4 || a == 6)
-    b++;
-  return b;
-}
-
-
-as:
-
-or eax, 2
-cmp eax, 6
-jz label
-
-//===---------------------------------------------------------------------===//
-
 GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
-simplifications for integer "x cmp y ? a : b".  For example, instead of:
-
-int G;
-void f(int X, int Y) {
-  G = X < 0 ? 14 : 13;
-}
-
-compiling to:
-
-_f:
-        movl $14, %eax
-        movl $13, %ecx
-        movl 4(%esp), %edx
-        testl %edx, %edx
-        cmovl %eax, %ecx
-        movl %ecx, _G
-        ret
-
-it could be:
-_f:
-        movl    4(%esp), %eax
-        sarl    $31, %eax
-        notl    %eax
-        addl    $14, %eax
-        movl    %eax, _G
-        ret
-
-etc.
-
-Another is:
-int usesbb(unsigned int a, unsigned int b) {
-       return (a < b ? -1 : 0);
-}
-to:
-_usesbb:
-	movl	8(%esp), %eax
-	cmpl	%eax, 4(%esp)
-	sbbl	%eax, %eax
-	ret
-
-instead of:
-_usesbb:
-	xorl	%eax, %eax
-	movl	8(%esp), %ecx
-	cmpl	%ecx, 4(%esp)
-	movl	$4294967295, %ecx
-	cmovb	%ecx, %eax
-	ret
+simplifications for integer "x cmp y ? a : b".
 
 //===---------------------------------------------------------------------===//
 
@@ -756,23 +710,17 @@ This:
         { return !full_add(a, b).second; }
 
 Should compile to:
+	addl	%esi, %edi
+	setae	%al
+	movzbl	%al, %eax
+	ret
 
-
-        _Z11no_overflowjj:
-                addl    %edi, %esi
-                setae   %al
-                ret
-
-FIXME: That code looks wrong; bool return is normally defined as zext.
-
-on x86-64, not:
-
-__Z11no_overflowjj:
-        addl    %edi, %esi
-        cmpl    %edi, %esi
-        setae   %al
-        movzbl  %al, %eax
-        ret
+on x86-64, instead of the rather stupid-looking:
+	addl	%esi, %edi
+	setb	%al
+	xorb	$1, %al
+	movzbl	%al, %eax
+	ret
 
 
 //===---------------------------------------------------------------------===//
@@ -1040,10 +988,10 @@ _foo:
 
 instead of:
 _foo:
-        movl    $255, %eax
-        orl     4(%esp), %eax
-        andl    $65535, %eax
-        ret
+	movl	$65280, %eax
+	andl	4(%esp), %eax
+	orl	$255, %eax
+	ret
 
 //===---------------------------------------------------------------------===//
 
@@ -1165,58 +1113,6 @@ abs:
 
 //===---------------------------------------------------------------------===//
 
-Consider:
-int test(unsigned long a, unsigned long b) { return -(a < b); }
-
-We currently compile this to:
-
-define i32 @test(i32 %a, i32 %b) nounwind  {
-	%tmp3 = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	%tmp34 = zext i1 %tmp3 to i32		; <i32> [#uses=1]
-	%tmp5 = sub i32 0, %tmp34		; <i32> [#uses=1]
-	ret i32 %tmp5
-}
-
-and
-
-_test:
-	movl	8(%esp), %eax
-	cmpl	%eax, 4(%esp)
-	setb	%al
-	movzbl	%al, %eax
-	negl	%eax
-	ret
-
-Several deficiencies here.  First, we should instcombine zext+neg into sext:
-
-define i32 @test2(i32 %a, i32 %b) nounwind  {
-	%tmp3 = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	%tmp34 = sext i1 %tmp3 to i32		; <i32> [#uses=1]
-	ret i32 %tmp34
-}
-
-However, before we can do that, we have to fix the bad codegen that we get for
-sext from bool:
-
-_test2:
-	movl	8(%esp), %eax
-	cmpl	%eax, 4(%esp)
-	setb	%al
-	movzbl	%al, %eax
-	shll	$31, %eax
-	sarl	$31, %eax
-	ret
-
-This code should be at least as good as the code above.  Once this is fixed, we
-can optimize this specific case even more to:
-
-	movl	8(%esp), %eax
-	xorl	%ecx, %ecx
-        cmpl    %eax, 4(%esp)
-        sbbl    %ecx, %ecx
-
-//===---------------------------------------------------------------------===//
-
 Take the following code (from 
 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541):
 
@@ -1605,6 +1501,8 @@ loop, the value comes into the loop as two values, and
 RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
 constructed BUILD_PAIR which represents the cast value.
 
+This can be handled by making CodeGenPrepare sink the cast.
+
 //===---------------------------------------------------------------------===//
 
 Test instructions can be eliminated by using EFLAGS values from arithmetic
@@ -1736,46 +1634,6 @@ Ideal output:
 
 //===---------------------------------------------------------------------===//
 
-Testcase:
-int x(int a) { return (a & 0x80) ? 0x100 : 0; }
-int y(int a) { return (a & 0x80) *2; }
-
-Current:
-	testl	$128, 4(%esp)
-	setne	%al
-	movzbl	%al, %eax
-	shll	$8, %eax
-	ret
-
-Better:
-	movl	4(%esp), %eax
-	addl	%eax, %eax
-	andl	$256, %eax
-	ret
-
-This is another general instcombine transformation that is profitable on all
-targets.  In LLVM IR, these functions look like this:
-
-define i32 @x(i32 %a) nounwind readnone {
-entry:
-	%0 = and i32 %a, 128
-	%1 = icmp eq i32 %0, 0
-	%iftmp.0.0 = select i1 %1, i32 0, i32 256
-	ret i32 %iftmp.0.0
-}
-
-define i32 @y(i32 %a) nounwind readnone {
-entry:
-	%0 = shl i32 %a, 1
-	%1 = and i32 %0, 256
-	ret i32 %1
-}
-
-Replacing an icmp+select with a shift should always be considered profitable in
-instcombine.
-
-//===---------------------------------------------------------------------===//
-
 Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch
 properly.
 
@@ -1960,3 +1818,100 @@ load, making it non-trivial to determine if there's anything between
 the load and the store which would prohibit narrowing.
 
 //===---------------------------------------------------------------------===//
+
+This code:
+void foo(unsigned x) {
+  if (x == 0) bar();
+  else if (x == 1) qux();
+}
+
+currently compiles into:
+_foo:
+	movl	4(%esp), %eax
+	cmpl	$1, %eax
+	je	LBB0_3
+	testl	%eax, %eax
+	jne	LBB0_4
+
+the testl could be removed:
+_foo:
+	movl	4(%esp), %eax
+	cmpl	$1, %eax
+	je	LBB0_3
+	jb	LBB0_4
+
+0 is the only unsigned number < 1.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+%0 = type { i32, i1 }
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x)
+  %cmp = extractvalue %0 %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %add = add i32 %x, %sum
+  %z.0 = add i32 %add, %inc
+  ret i32 %z.0
+}
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+compiles to:
+
+_add32carry:                            ## @add32carry
+	addl	%esi, %edi
+	sbbl	%ecx, %ecx
+	movl	%edi, %eax
+	subl	%ecx, %eax
+	ret
+
+But it could be:
+
+_add32carry:
+	leal	(%rsi,%rdi), %eax
+	cmpl	%esi, %eax
+	adcl	$0, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+This:
+char t(char c) {
+  return c/3;
+}
+
+Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
+
+_t:                                     ## @t
+	movslq	%edi, %rax
+	imulq	$-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB
+	shrq	$32, %rcx
+	addl	%ecx, %eax
+	movl	%eax, %ecx
+	shrl	$31, %ecx
+	shrl	%eax
+	addl	%ecx, %eax
+	movsbl	%al, %eax
+	ret
+
+GCC gets:
+
+_t:
+	movl	$86, %eax
+	imulb	%dil
+	shrw	$8, %ax
+	sarb	$7, %dil
+	subb	%dil, %al
+	movsbl	%al,%eax
+	ret
+
+which is nicer.  This also happens for int, not just char.
+
+//===---------------------------------------------------------------------===//
+
+
+
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..3ad5f991c865
--- /dev/null
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+  X86ShuffleDecode.cpp
+  )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Utils/Makefile b/lib/Target/X86/Utils/Makefile
new file mode 100644
index 000000000000..1df6f0f561d4
--- /dev/null
+++ b/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 000000000000..12879774d780
--- /dev/null
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,148 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+//  Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+  // Defaults the copying the dest value.
+  ShuffleMask.push_back(0);
+  ShuffleMask.push_back(1);
+  ShuffleMask.push_back(2);
+  ShuffleMask.push_back(3);
+
+  // Decode the immediate.
+  unsigned ZMask = Imm & 15;
+  unsigned CountD = (Imm >> 4) & 3;
+  unsigned CountS = (Imm >> 6) & 3;
+
+  // CountS selects which input element to use.
+  unsigned InVal = 4+CountS;
+  // CountD specifies which element of destination to update.
+  ShuffleMask[CountD] = InVal;
+  // ZMask zaps values, potentially overriding the CountD elt.
+  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = NElts/2; i != NElts; ++i)
+    ShuffleMask.push_back(NElts+i);
+
+  for (unsigned i = NElts/2; i != NElts; ++i)
+    ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i)
+    ShuffleMask.push_back(i);
+
+  for (unsigned i = 0; i != NElts/2; ++i)
+    ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts; ++i) {
+    ShuffleMask.push_back(Imm % NElts);
+    Imm /= NElts;
+  }
+}
+
+void DecodePSHUFHWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  ShuffleMask.push_back(0);
+  ShuffleMask.push_back(1);
+  ShuffleMask.push_back(2);
+  ShuffleMask.push_back(3);
+  for (unsigned i = 0; i != 4; ++i) {
+    ShuffleMask.push_back(4+(Imm & 3));
+    Imm >>= 2;
+  }
+}
+
+void DecodePSHUFLWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != 4; ++i) {
+    ShuffleMask.push_back((Imm & 3));
+    Imm >>= 2;
+  }
+  ShuffleMask.push_back(4);
+  ShuffleMask.push_back(5);
+  ShuffleMask.push_back(6);
+  ShuffleMask.push_back(7);
+}
+
+void DecodePUNPCKLMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i);
+    ShuffleMask.push_back(i+NElts);
+  }
+}
+
+void DecodePUNPCKHMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i+NElts/2);
+    ShuffleMask.push_back(i+NElts+NElts/2);
+  }
+}
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+                      SmallVectorImpl<unsigned> &ShuffleMask) {
+  // Part that reads from dest.
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(Imm % NElts);
+    Imm /= NElts;
+  }
+  // Part that reads from src.
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(Imm % NElts + NElts);
+    Imm /= NElts;
+  }
+}
+
+void DecodeUNPCKHPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
+    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
+  }
+}
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc.  NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+void DecodeUNPCKLPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i);        // Reads from dest
+    ShuffleMask.push_back(i+NElts);  // Reads from src
+  }
+}
+
+} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 000000000000..50d9ccbfa68c
--- /dev/null
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,69 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+
+//===----------------------------------------------------------------------===//
+//  Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+  SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+                      SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc.  NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+void DecodeUNPCKLPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 27e88505150b..0ca436690040 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_X86_H
 #define TARGET_X86_H
 
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -23,11 +24,13 @@ class FunctionPass;
 class JITCodeEmitter;
 class MCCodeEmitter;
 class MCContext;
+class MCObjectWriter;
 class MachineCodeEmitter;
 class Target;
 class TargetAsmBackend;
 class X86TargetMachine;
 class formatted_raw_ostream;
+class raw_ostream;
 
 /// createX86ISelDag - This pass converts a legalized DAG into a 
 /// X86-specific DAG, ready for instruction scheduling.
@@ -74,6 +77,13 @@ FunctionPass *createEmitX86CodeToMemory();
 ///
 FunctionPass *createX86MaxStackAlignmentHeuristicPass();
 
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
 extern Target TheX86_32Target, TheX86_64Target;
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a19f1acffaca..efb6c8c0adc6 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
                                       "Enable conditional move instructions">;
 
+def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+                                       "Support POPCNT instruction">;
+
 
 def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
                                       "Enable MMX instructions">;
@@ -45,7 +48,7 @@ def FeatureSSE41   : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
                                       [FeatureSSSE3]>;
 def FeatureSSE42   : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
-                                      [FeatureSSE41]>;
+                                      [FeatureSSE41, FeaturePOPCNT]>;
 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
                                       "Enable 3DNow! instructions">;
 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
@@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
                                         "IsUAMemFast", "true",
                                         "Fast unaligned memory access">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
-                                      "Support SSE 4a instructions">;
+                                      "Support SSE 4a instructions",
+                                      [FeaturePOPCNT]>;
 
 def FeatureAVX     : SubtargetFeature<"avx", "HasAVX", "true",
                                       "Enable AVX instructions">;
@@ -112,11 +116,13 @@ def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
                                FeatureFastUAMem]>;
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
-                                FeatureFastUAMem, FeatureAES]>;
-// Sandy Bridge does not have FMA
-// FIXME: Wikipedia says it does... it should have AES as well.
-def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit]>;
+def : Proc<"westmere",        [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"sandybridge",     [FeatureSSE42, Feature64Bit,
+                               FeatureAES, FeatureCLMUL]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;
@@ -176,7 +182,7 @@ include "X86CallingConv.td"
 
 
 //===----------------------------------------------------------------------===//
-// Assembly Printers
+// Assembly Parser
 //===----------------------------------------------------------------------===//
 
 // Currently the X86 assembly parser only supports ATT syntax.
@@ -191,15 +197,21 @@ def ATTAsmParser : AsmParser {
   string RegisterPrefix = "%";
 }
 
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
 // The X86 target supports two different syntaxes for emitting machine code.
 // This is controlled by the -x86-asm-syntax={att|intel}
 def ATTAsmWriter : AsmWriter {
   string AsmWriterClassName  = "ATTInstPrinter";
   int Variant = 0;
+  bit isMCAsmWriter = 1;
 }
 def IntelAsmWriter : AsmWriter {
   string AsmWriterClassName  = "IntelInstPrinter";
   int Variant = 1;
+  bit isMCAsmWriter = 1;
 }
 
 def X86 : Target {
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 69dc967f9d88..da5f5b182ce9 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -11,50 +11,83 @@
 #include "X86.h"
 #include "X86FixupKinds.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/ELFObjectWriter.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
 
-
 static unsigned getFixupKindLog2Size(unsigned Kind) {
   switch (Kind) {
   default: assert(0 && "invalid fixup kind!");
-  case X86::reloc_pcrel_1byte:
+  case FK_PCRel_1:
   case FK_Data_1: return 0;
-  case X86::reloc_pcrel_2byte:
+  case FK_PCRel_2:
   case FK_Data_2: return 1;
-  case X86::reloc_pcrel_4byte:
+  case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case X86::reloc_global_offset_table:
   case FK_Data_4: return 2;
+  case FK_PCRel_8:
   case FK_Data_8: return 3;
   }
 }
 
 namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+                     bool HasRelocationAddend)
+    : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
 class X86AsmBackend : public TargetAsmBackend {
 public:
   X86AsmBackend(const Target &T)
-    : TargetAsmBackend(T) {}
+    : TargetAsmBackend() {}
+
+  unsigned getNumFixupKinds() const {
+    return X86::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+      { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+      { "reloc_signed_4byte", 0, 4 * 8, 0},
+      { "reloc_global_offset_table", 0, 4 * 8, 0}
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
 
-  void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const {
     unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
 
-    assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
+    assert(Fixup.getOffset() + Size <= DataSize &&
            "Invalid fixup offset!");
     for (unsigned i = 0; i != Size; ++i)
-      DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+      Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
   }
 
   bool MayNeedRelaxation(const MCInst &Inst) const;
@@ -63,9 +96,9 @@ public:
 
   bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
 };
-} // end anonymous namespace 
+} // end anonymous namespace
 
-static unsigned getRelaxedOpcode(unsigned Op) {
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
   switch (Op) {
   default:
     return Op;
@@ -90,16 +123,104 @@ static unsigned getRelaxedOpcode(unsigned Op) {
   }
 }
 
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+    // IMUL
+  case X86::IMUL16rri8: return X86::IMUL16rri;
+  case X86::IMUL16rmi8: return X86::IMUL16rmi;
+  case X86::IMUL32rri8: return X86::IMUL32rri;
+  case X86::IMUL32rmi8: return X86::IMUL32rmi;
+  case X86::IMUL64rri8: return X86::IMUL64rri32;
+  case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+    // AND
+  case X86::AND16ri8: return X86::AND16ri;
+  case X86::AND16mi8: return X86::AND16mi;
+  case X86::AND32ri8: return X86::AND32ri;
+  case X86::AND32mi8: return X86::AND32mi;
+  case X86::AND64ri8: return X86::AND64ri32;
+  case X86::AND64mi8: return X86::AND64mi32;
+
+    // OR
+  case X86::OR16ri8: return X86::OR16ri;
+  case X86::OR16mi8: return X86::OR16mi;
+  case X86::OR32ri8: return X86::OR32ri;
+  case X86::OR32mi8: return X86::OR32mi;
+  case X86::OR64ri8: return X86::OR64ri32;
+  case X86::OR64mi8: return X86::OR64mi32;
+
+    // XOR
+  case X86::XOR16ri8: return X86::XOR16ri;
+  case X86::XOR16mi8: return X86::XOR16mi;
+  case X86::XOR32ri8: return X86::XOR32ri;
+  case X86::XOR32mi8: return X86::XOR32mi;
+  case X86::XOR64ri8: return X86::XOR64ri32;
+  case X86::XOR64mi8: return X86::XOR64mi32;
+
+    // ADD
+  case X86::ADD16ri8: return X86::ADD16ri;
+  case X86::ADD16mi8: return X86::ADD16mi;
+  case X86::ADD32ri8: return X86::ADD32ri;
+  case X86::ADD32mi8: return X86::ADD32mi;
+  case X86::ADD64ri8: return X86::ADD64ri32;
+  case X86::ADD64mi8: return X86::ADD64mi32;
+
+    // SUB
+  case X86::SUB16ri8: return X86::SUB16ri;
+  case X86::SUB16mi8: return X86::SUB16mi;
+  case X86::SUB32ri8: return X86::SUB32ri;
+  case X86::SUB32mi8: return X86::SUB32mi;
+  case X86::SUB64ri8: return X86::SUB64ri32;
+  case X86::SUB64mi8: return X86::SUB64mi32;
+
+    // CMP
+  case X86::CMP16ri8: return X86::CMP16ri;
+  case X86::CMP16mi8: return X86::CMP16mi;
+  case X86::CMP32ri8: return X86::CMP32ri;
+  case X86::CMP32mi8: return X86::CMP32mi;
+  case X86::CMP64ri8: return X86::CMP64ri32;
+  case X86::CMP64mi8: return X86::CMP64mi32;
+
+    // PUSH
+  case X86::PUSHi8: return X86::PUSHi32;
+  }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+  unsigned R = getRelaxedOpcodeArith(Op);
+  if (R != Op)
+    return R;
+  return getRelaxedOpcodeBranch(Op);
+}
+
 bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // Branches can always be relaxed.
+  if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
+
   // Check if this instruction is ever relaxable.
-  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+  if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
     return false;
 
-  // If so, just assume it can be relaxed. Once we support relaxing more complex
-  // instructions we should check that the instruction actually has symbolic
-  // operands before doing this, but we need to be careful about things like
-  // PCrel.
-  return true;
+
+  // Check if it has an expression and is not RIP relative.
+  bool hasExp = false;
+  bool hasRIP = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+    const MCOperand &Op = Inst.getOperand(i);
+    if (Op.isExpr())
+      hasExp = true;
+
+    if (Op.isReg() && Op.getReg() == X86::RIP)
+      hasRIP = true;
+  }
+
+  // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+  // how we do relaxations?
+  return hasExp && !hasRIP;
 }
 
 // FIXME: Can tblgen help at all here to verify there aren't other instructions
@@ -123,10 +244,8 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
 /// WriteNopData - Write optimal nops to the output file for the \arg Count
 /// bytes.  This returns the number of bytes written.  It may return 0 if
 /// the \arg Count is more than the maximum optimal nops.
-///
-/// FIXME this is X86 32-bit specific and should move to a better place.
 bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
-  static const uint8_t Nops[16][16] = {
+  static const uint8_t Nops[10][10] = {
     // nop
     {0x90},
     // xchg %ax,%ax
@@ -147,32 +266,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
     {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
     // nopw %cs:0L(%[re]ax,%[re]ax,1)
     {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
-    // nopl 0(%[re]ax,%[re]ax,1)
-    // nopw 0(%[re]ax,%[re]ax,1)
-    {0x0f, 0x1f, 0x44, 0x00, 0x00,
-     0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
-    // nopw 0(%[re]ax,%[re]ax,1)
-    // nopw 0(%[re]ax,%[re]ax,1)
-    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
-     0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
-    // nopw 0(%[re]ax,%[re]ax,1)
-    // nopl 0L(%[re]ax) */
-    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
-     0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
-    // nopl 0L(%[re]ax)
-    // nopl 0L(%[re]ax)
-    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
-     0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
-    // nopl 0L(%[re]ax)
-    // nopl 0L(%[re]ax,%[re]ax,1)
-    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
-     0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
   };
 
   // Write an optimal sequence for the first 15 bytes.
-  uint64_t OptimalCount = (Count < 16) ? Count : 15;
-  for (uint64_t i = 0, e = OptimalCount; i != e; i++)
-    OW->Write8(Nops[OptimalCount - 1][i]);
+  const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+  const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+  for (uint64_t i = 0, e = Prefixes; i != e; i++)
+    OW->Write8(0x66);
+  const uint64_t Rest = OptimalCount - Prefixes;
+  for (uint64_t i = 0, e = Rest; i != e; i++)
+    OW->Write8(Nops[Rest - 1][i]);
 
   // Finish with single byte nops.
   for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
@@ -186,75 +289,60 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
 namespace {
 class ELFX86AsmBackend : public X86AsmBackend {
 public:
-  ELFX86AsmBackend(const Target &T)
-    : X86AsmBackend(T) {
-    HasAbsolutizedSet = true;
-    HasScatteredSymbols = true;
+  Triple::OSType OSType;
+  ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+    : X86AsmBackend(T), OSType(_OSType) {
+    HasReliableSymbolDifference = true;
   }
 
-  bool isVirtualSection(const MCSection &Section) const {
-    const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
-    return SE.getType() == MCSectionELF::SHT_NOBITS;;
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+    return ES.getFlags() & ELF::SHF_MERGE;
   }
 };
 
 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_32AsmBackend(const Target &T)
-    : ELFX86AsmBackend(T) {}
+  ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return new ELFObjectWriter(OS, /*Is64Bit=*/false,
-                               /*IsLittleEndian=*/true,
-                               /*HasRelocationAddend=*/false);
+    return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
+                                                        ELF::EM_386, false),
+                                 OS, /*IsLittleEndian*/ true);
   }
 };
 
 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_64AsmBackend(const Target &T)
-    : ELFX86AsmBackend(T) {}
+  ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return new ELFObjectWriter(OS, /*Is64Bit=*/true,
-                               /*IsLittleEndian=*/true,
-                               /*HasRelocationAddend=*/true);
+    return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
+                                                        ELF::EM_X86_64, true),
+                                 OS, /*IsLittleEndian*/ true);
   }
 };
 
 class WindowsX86AsmBackend : public X86AsmBackend {
   bool Is64Bit;
+
 public:
   WindowsX86AsmBackend(const Target &T, bool is64Bit)
     : X86AsmBackend(T)
     , Is64Bit(is64Bit) {
-    HasScatteredSymbols = true;
   }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
     return createWinCOFFObjectWriter(OS, Is64Bit);
   }
-
-  bool isVirtualSection(const MCSection &Section) const {
-    const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
-    return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
-  }
 };
 
 class DarwinX86AsmBackend : public X86AsmBackend {
 public:
   DarwinX86AsmBackend(const Target &T)
-    : X86AsmBackend(T) {
-    HasAbsolutizedSet = true;
-    HasScatteredSymbols = true;
-  }
-
-  bool isVirtualSection(const MCSection &Section) const {
-    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
-    return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
-            SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
-            SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
-  }
+    : X86AsmBackend(T) { }
 };
 
 class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
@@ -263,7 +351,9 @@ public:
     : DarwinX86AsmBackend(T) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return new MachObjectWriter(OS, /*Is64Bit=*/false);
+    return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_i386,
+                                     object::mach::CSX86_ALL);
   }
 };
 
@@ -275,7 +365,9 @@ public:
   }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return new MachObjectWriter(OS, /*Is64Bit=*/true);
+    return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+                                     object::mach::CTM_x86_64,
+                                     object::mach::CSX86_ALL);
   }
 
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -312,7 +404,7 @@ public:
   }
 };
 
-} // end anonymous namespace 
+} // end anonymous namespace
 
 TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
                                                const std::string &TT) {
@@ -322,9 +414,12 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
   case Triple::MinGW32:
   case Triple::Cygwin:
   case Triple::Win32:
-    return new WindowsX86AsmBackend(T, false);
+    if (Triple(TT).getEnvironment() == Triple::MachO)
+      return new DarwinX86_32AsmBackend(T);
+    else
+      return new WindowsX86AsmBackend(T, false);
   default:
-    return new ELFX86_32AsmBackend(T);
+    return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
   }
 }
 
@@ -333,11 +428,14 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
   switch (Triple(TT).getOS()) {
   case Triple::Darwin:
     return new DarwinX86_64AsmBackend(T);
-  case Triple::MinGW64:
+  case Triple::MinGW32:
   case Triple::Cygwin:
   case Triple::Win32:
-    return new WindowsX86AsmBackend(T, true);
+    if (Triple(TT).getEnvironment() == Triple::MachO)
+      return new DarwinX86_64AsmBackend(T);
+    else
+      return new WindowsX86AsmBackend(T, true);
   default:
-    return new ELFX86_64AsmBackend(T);
+    return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
   }
 }
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 20110ad788cd..99b4479a9fc9 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "AsmPrinter/X86ATTInstPrinter.h"
-#include "AsmPrinter/X86IntelInstPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
@@ -48,21 +48,15 @@ using namespace llvm;
 // Primitive Helper Functions.
 //===----------------------------------------------------------------------===//
 
-void X86AsmPrinter::PrintPICBaseSymbol(raw_ostream &O) const {
-  const TargetLowering *TLI = TM.getTargetLowering();
-  O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF,
-                                                                    OutContext);
-}
-
 /// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SetupMachineFunction(MF);
 
-  if (Subtarget->isTargetCOFF()) {
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
     bool Intrn = MF.getFunction()->hasInternalLinkage();
     OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC 
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
                                               : COFF::IMAGE_SYM_CLASS_EXTERNAL);
     OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
                                                << COFF::SCT_COMPLEX_TYPE_SHIFT);
@@ -95,7 +89,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
     break;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
-    
+
     MCSymbol *GVSym;
     if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
       GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
@@ -109,11 +103,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
       GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
-    
+
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      MachineModuleInfoImpl::StubValueTy &StubSym = 
+      MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
       if (StubSym.getPointer() == 0)
         StubSym = MachineModuleInfoImpl::
@@ -133,7 +127,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
         StubSym = MachineModuleInfoImpl::
           StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
     }
-    
+
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
     // to avoid having it look like an integer immediate to the assembler.
     if (GVSym->getName()[0] != '$')
@@ -149,7 +143,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
       SmallString<128> TempNameStr;
       TempNameStr += StringRef(MO.getSymbolName());
       TempNameStr += StringRef("$stub");
-      
+
       MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
       MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
@@ -163,17 +157,17 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
     } else {
       SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
     }
-    
+
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
     // to avoid having it look like an integer immediate to the assembler.
-    if (SymToPrint->getName()[0] != '$') 
+    if (SymToPrint->getName()[0] != '$')
       O << *SymToPrint;
     else
       O << '(' << *SymToPrint << '(';
     break;
   }
   }
-  
+
   switch (MO.getTargetFlags()) {
   default:
     llvm_unreachable("Unknown target flag on GV operand");
@@ -185,15 +179,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
     // These affect the name of the symbol, not any suffix.
     break;
   case X86II::MO_GOT_ABSOLUTE_ADDRESS:
-    O << " + [.-";
-    PrintPICBaseSymbol(O);
-    O << ']';
-    break;      
+    O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+    break;
   case X86II::MO_PIC_BASE_OFFSET:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
-    O << '-';
-    PrintPICBaseSymbol(O);
+    O << '-' << *MF->getPICBaseSymbol();
     break;
   case X86II::MO_TLSGD:     O << "@TLSGD";     break;
   case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
@@ -206,8 +197,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
   case X86II::MO_PLT:       O << "@PLT";       break;
   case X86II::MO_TLVP:      O << "@TLVP";      break;
   case X86II::MO_TLVP_PIC_BASE:
-    O << "@TLVP" << '-';
-    PrintPICBaseSymbol(O);
+    O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
     break;
   }
 }
@@ -262,7 +252,7 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   case MachineOperand::MO_JumpTableIndex:
   case MachineOperand::MO_ConstantPoolIndex:
-  case MachineOperand::MO_GlobalAddress: 
+  case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol: {
     O << '$';
     printSymbolOperand(MO, O);
@@ -298,10 +288,10 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
   if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
       BaseReg.getReg() == X86::RIP)
     HasBaseReg = false;
-  
+
   // HasParenPart - True if we will print out the () part of the mem ref.
   bool HasParenPart = IndexReg.getReg() || HasBaseReg;
-  
+
   if (DispSpec.isImm()) {
     int DispVal = DispSpec.getImm();
     if (DispVal || !HasParenPart)
@@ -312,6 +302,9 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
     printSymbolOperand(MI->getOperand(Op+3), O);
   }
 
+  if (Modifier && strcmp(Modifier, "H") == 0)
+    O << "+8";
+
   if (HasParenPart) {
     assert(IndexReg.getReg() != X86::ESP &&
            "X86 doesn't allow scaling by ESP");
@@ -344,10 +337,8 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
 
 void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
                                   raw_ostream &O) {
-  PrintPICBaseSymbol(O);
-  O << '\n';
-  PrintPICBaseSymbol(O);
-  O << ':';
+  O << *MF->getPICBaseSymbol() << '\n';
+  O << *MF->getPICBaseSymbol() << ':';
 }
 
 bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
@@ -386,14 +377,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
 
     const MachineOperand &MO = MI->getOperand(OpNo);
-    
+
     switch (ExtraCode[0]) {
     default: return true;  // Unknown modifier.
     case 'a': // This is an address.  Currently only 'i' and 'r' are expected.
       if (MO.isImm()) {
         O << MO.getImm();
         return false;
-      } 
+      }
       if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
         printSymbolOperand(MO, O);
         if (Subtarget->isPICStyleRIPRel())
@@ -470,6 +461,9 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
     case 'q': // Print SImode register
       // These only apply to registers, ignore on mem.
       break;
+    case 'H':
+      printMemReference(MI, OpNo, O, "H");
+      return false;
     case 'P': // Don't print @PLT, but do print as memory.
       printMemReference(MI, OpNo, O, "no-rip");
       return false;
@@ -480,23 +474,23 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
-  if (Subtarget->isTargetDarwin())
+  if (Subtarget->isTargetEnvMacho())
     OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
 }
 
 
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Subtarget->isTargetDarwin()) {
+  if (Subtarget->isTargetEnvMacho()) {
     // All darwin targets use mach-o.
     MachineModuleInfoMachO &MMIMacho =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
-    
+
     // Output stubs for dynamically-linked functions.
     MachineModuleInfoMachO::SymbolListTy Stubs;
 
     Stubs = MMIMacho.GetFnStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection = 
+      const MCSection *TheSection =
         OutContext.getMachOSection("__IMPORT", "__jump_table",
                                    MCSectionMachO::S_SYMBOL_STUBS |
                                    MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
@@ -514,7 +508,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         const char HltInsts[] = { -12, -12, -12, -12, -12 };
         OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
       }
-      
+
       Stubs.clear();
       OutStreamer.AddBlankLine();
     }
@@ -522,7 +516,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     // Output stubs for external and common global variables.
     Stubs = MMIMacho.GetGVStubList();
     if (!Stubs.empty()) {
-      const MCSection *TheSection = 
+      const MCSection *TheSection =
         OutContext.getMachOSection("__IMPORT", "__pointers",
                                    MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
                                    SectionKind::getMetadata());
@@ -580,7 +574,14 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
 
-  if (Subtarget->isTargetCOFF()) {
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+      MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+    StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+    MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+    OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+  }
+
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
     X86COFFMachineModuleInfo &COFFMMI =
       MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
 
@@ -661,12 +662,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 }
 
-MachineLocation 
+MachineLocation
 X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
   MachineLocation Location;
   assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
   // Frame address.  Currently handles register +- offset only.
-  
+
   if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
     Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
   else {
@@ -690,9 +691,9 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   O << V.getName();
   O << " <- ";
   // Frame address.  Currently handles register +- offset only.
-  O << '['; 
+  O << '[';
   if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
-    printOperand(MI, 0, O); 
+    printOperand(MI, 0, O);
   else
     O << "undef";
   O << '+'; printOperand(MI, 3, O);
@@ -718,10 +719,10 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T,
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() { 
+extern "C" void LLVMInitializeX86AsmPrinter() {
   RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
   RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-  
+
   TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
   TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
 }
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index e61be66c75a2..3a50435d38ba 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -75,8 +75,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
 
   void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
 
-  void PrintPICBaseSymbol(raw_ostream &O) const;
-  
   bool runOnMachineFunction(MachineFunction &F);
   
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index e3409effc318..a44fb694e725 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[
 
   // MMX vector types are always returned in MM0. If the target doesn't have
   // MM0, it doesn't support these vector types.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
+  CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
 
   // Long double types are always returned in ST0 (even with SSE).
   CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
   // weirdly; this is really the sse-regparm calling convention) in which
   // case they use XMM0, otherwise it is the same as the common X86 calling
   // conv.
-  CCIfInReg<CCIfSubtarget<"hasSSE2()",
+  CCIfInReg<CCIfSubtarget<"hasXMMInt()",
     CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
   CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
   CCDelegateTo<RetCC_X86Common>
@@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
   // SSE2.
   // This can happen when a float, 2 x float, or 3 x float vector is split by
   // target lowering, and is returned in 1-3 sse regs.
-  CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
-  CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+  CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+  CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
 
   // For integers, ECX can be used as an extra return register
   CCIfType<[i8],  CCAssignToReg<[AL, DL, CL]>>,
@@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[
   // returned in RAX. This disagrees with ABI documentation but is bug
   // compatible with gcc.
   CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
-  CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>,
+  CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
   CCDelegateTo<RetCC_X86Common>
 ]>;
 
 // X86-Win64 C return-value convention.
 def RetCC_X86_Win64_C : CallingConv<[
   // The X86-Win64 calling convention always returns __m64 values in RAX.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
+  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
 
   // And FP in XMM0 only.
   CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -161,14 +161,14 @@ def CC_X86_64_C : CallingConv<[
 
   // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
   // registers on Darwin.
-  CCIfType<[v8i8, v4i16, v2i32],
+  CCIfType<[x86mmx],
             CCIfSubtarget<"isTargetDarwin()",
-            CCIfSubtarget<"hasSSE2()",
+            CCIfSubtarget<"hasXMMInt()",
             CCPromoteToType<v2i64>>>>,
 
   // The first 8 FP/Vector arguments are passed in XMM registers.
   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasSSE1()",
+            CCIfSubtarget<"hasXMM()",
             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
 
   // The first 8 256-bit vector arguments are passed in YMM registers.
@@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[
            CCAssignToStack<32, 32>>,
 
   // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
 ]>;
 
 // Calling convention used on Win64
@@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
 
   // The first 4 MMX vector arguments are passed in GPRs.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64],
-           CCBitConvertToType<i64>>,
+  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
 
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
@@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[f80], CCAssignToStack<0, 0>>,
 
   // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
 ]>;
 
 def CC_X86_64_GHC : CallingConv<[
@@ -246,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[
 
   // Pass in STG registers: F1, F2, F3, F4, D1, D2
   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasSSE1()",
+            CCIfSubtarget<"hasXMM()",
             CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
 ]>;
 
@@ -264,12 +263,12 @@ def CC_X86_32_Common : CallingConv<[
   // The first 3 float or double arguments, if marked 'inreg' and if the call
   // is not a vararg call and if SSE2 is available, are passed in SSE registers.
   CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
-                CCIfSubtarget<"hasSSE2()",
+                CCIfSubtarget<"hasXMMInt()",
                 CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
 
   // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
   // registers if the call is not a vararg call.
-  CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32],
+  CCIfNotVarArg<CCIfType<[x86mmx],
                 CCAssignToReg<[MM0, MM1, MM2]>>>,
 
   // Integer/Float values get stored in stack slots that are 4 bytes in
@@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[
 
   // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
   // passed in the parameter area.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
 
 def CC_X86_32_C : CallingConv<[
   // Promote i8/i16 arguments to i32.
@@ -363,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[
   // The first 3 float or double arguments, if the call is not a vararg
   // call and if SSE2 is available, are passed in SSE registers.
   CCIfNotVarArg<CCIfType<[f32,f64],
-                CCIfSubtarget<"hasSSE2()",
+                CCIfSubtarget<"hasXMMInt()",
                 CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
 
   // Doubles get 8-byte slots that are 8-byte aligned.
@@ -380,3 +379,35 @@ def CC_X86_32_GHC : CallingConv<[
   // Pass in STG registers: Base, Sp, Hp, R1
   CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
 ]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+  CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+  CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+  // Otherwise, drop to normal X86-32 CC
+  CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+  // Mingw64 and native Win64 use Win64 CC
+  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+  // Otherwise, drop to normal X86-64 CC
+  CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+  CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+  CCDelegateTo<CC_X86_32>
+]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 824021c0c882..60d9d4ad064e 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -68,8 +68,7 @@ namespace {
       return "X86 Machine Code Emitter";
     }
 
-    void emitInstruction(const MachineInstr &MI,
-                         const TargetInstrDesc *Desc);
+    void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -131,7 +130,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
          MBB != E; ++MBB) {
       MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I) {
         const TargetInstrDesc &Desc = I->getDesc();
         emitInstruction(*I, &Desc);
@@ -598,9 +597,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
 }
 
 template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
                                            const TargetInstrDesc *Desc) {
   DEBUG(dbgs() << MI);
+  
+  // If this is a pseudo instruction, lower it.
+  switch (Desc->getOpcode()) {
+  case X86::ADD16rr_DB:   Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+  case X86::ADD32rr_DB:   Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+  case X86::ADD64rr_DB:   Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+  case X86::ADD16ri_DB:   Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+  case X86::ADD32ri_DB:   Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+  case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+  case X86::ADD16ri8_DB:  Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+  case X86::ADD32ri8_DB:  Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+  case X86::ADD64ri8_DB:  Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+  }
+  
 
   MCE.processDebugLoc(MI.getDebugLoc(), true);
 
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index f84995dcf342..f1d7edea7210 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,6 +14,7 @@
 #include "X86ELFWriterInfo.h"
 #include "X86Relocations.h"
 #include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -24,8 +25,8 @@ using namespace llvm;
 //  Implementation of the X86ELFWriterInfo class
 //===----------------------------------------------------------------------===//
 
-X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
-  : TargetELFWriterInfo(TM) {
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+  : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
     EMachine = is64Bit ? EM_X86_64 : EM_386;
   }
 
@@ -35,13 +36,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
   if (is64Bit) {
     switch(MachineRelTy) {
     case X86::reloc_pcrel_word:
-      return R_X86_64_PC32;
+      return ELF::R_X86_64_PC32;
     case X86::reloc_absolute_word:
-      return R_X86_64_32;
+      return ELF::R_X86_64_32;
     case X86::reloc_absolute_word_sext:
-      return R_X86_64_32S;
+      return ELF::R_X86_64_32S;
     case X86::reloc_absolute_dword:
-      return R_X86_64_64;
+      return ELF::R_X86_64_64;
     case X86::reloc_picrel_word:
     default:
       llvm_unreachable("unknown x86_64 machine relocation type");
@@ -49,9 +50,9 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
   } else {
     switch(MachineRelTy) {
     case X86::reloc_pcrel_word:
-      return R_386_PC32;
+      return ELF::R_386_PC32;
     case X86::reloc_absolute_word:
-      return R_386_32;
+      return ELF::R_386_32;
     case X86::reloc_absolute_word_sext:
     case X86::reloc_absolute_dword:
     case X86::reloc_picrel_word:
@@ -66,18 +67,18 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
                                                     long int Modifier) const {
   if (is64Bit) {
     switch(RelTy) {
-    case R_X86_64_PC32: return Modifier - 4;
-    case R_X86_64_32:
-    case R_X86_64_32S:
-    case R_X86_64_64:
+    case ELF::R_X86_64_PC32: return Modifier - 4;
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+    case ELF::R_X86_64_64:
       return Modifier;
     default:
       llvm_unreachable("unknown x86_64 relocation type");
     }
   } else {
     switch(RelTy) {
-      case R_386_PC32: return Modifier - 4;
-      case R_386_32: return Modifier;
+    case ELF::R_386_PC32: return Modifier - 4;
+    case ELF::R_386_32: return Modifier;
     default:
       llvm_unreachable("unknown x86 relocation type");
     }
@@ -88,19 +89,19 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
 unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
   if (is64Bit) {
     switch(RelTy) {
-      case R_X86_64_PC32:
-      case R_X86_64_32:
-      case R_X86_64_32S:
+    case ELF::R_X86_64_PC32:
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
         return 32;
-      case R_X86_64_64:
+    case ELF::R_X86_64_64:
         return 64;
     default:
       llvm_unreachable("unknown x86_64 relocation type");
     }
   } else {
     switch(RelTy) {
-      case R_386_PC32:
-      case R_386_32:
+    case ELF::R_386_PC32:
+    case ELF::R_386_32:
         return 32;
     default:
       llvm_unreachable("unknown x86 relocation type");
@@ -112,20 +113,20 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
 bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
   if (is64Bit) {
     switch(RelTy) {
-      case R_X86_64_PC32:
+    case ELF::R_X86_64_PC32:
         return true;
-      case R_X86_64_32:
-      case R_X86_64_32S:
-      case R_X86_64_64:
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+    case ELF::R_X86_64_64:
         return false;
     default:
       llvm_unreachable("unknown x86_64 relocation type");
     }
   } else {
     switch(RelTy) {
-      case R_386_PC32:
+    case ELF::R_386_PC32:
         return true;
-      case R_386_32:
+    case ELF::R_386_32:
         return false;
     default:
       llvm_unreachable("unknown x86 relocation type");
@@ -143,7 +144,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
                                              unsigned RelOffset,
                                              unsigned RelTy) const {
 
-  if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32)
+  if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
     return SymOffset - (RelOffset + 4);
   else
     assert("computeRelocation unknown for this relocation type");
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 342e6e627d26..a45b5bb66a07 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,25 +20,8 @@ namespace llvm {
 
   class X86ELFWriterInfo : public TargetELFWriterInfo {
 
-    // ELF Relocation types for X86
-    enum X86RelocationType {
-      R_386_NONE = 0,
-      R_386_32   = 1,
-      R_386_PC32 = 2
-    };
-
-    // ELF Relocation types for X86_64
-    enum X86_64RelocationType {
-      R_X86_64_NONE = 0,
-      R_X86_64_64   = 1,
-      R_X86_64_PC32 = 2,
-      R_X86_64_32   = 10,
-      R_X86_64_32S  = 11,
-      R_X86_64_PC64 = 24
-    };
-
   public:
-    X86ELFWriterInfo(TargetMachine &TM);
+    X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
     virtual ~X86ELFWriterInfo();
 
     /// getRelocationType - Returns the target specific ELF Relocation type.
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 0c70eec4827f..9d42ac2e470c 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -36,7 +36,7 @@
 using namespace llvm;
 
 namespace {
-  
+
 class X86FastISel : public FastISel {
   /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
   /// make the right decision when generating code for different targets.
@@ -46,7 +46,7 @@ class X86FastISel : public FastISel {
   ///
   unsigned StackPtr;
 
-  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 
+  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
   /// floating point ops.
   /// When SSE is available, use it for f32 operations.
   /// When SSE2 is available, use it for f64 operations.
@@ -63,11 +63,18 @@ public:
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+
 #include "X86GenFastISel.inc"
 
 private:
   bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
-  
+
   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
 
   bool X86FastEmitStore(EVT VT, const Value *Val,
@@ -77,12 +84,12 @@ private:
 
   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                          unsigned &ResultReg);
-  
+
   bool X86SelectAddress(const Value *V, X86AddressMode &AM);
   bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
 
   bool X86SelectLoad(const Instruction *I);
-  
+
   bool X86SelectStore(const Instruction *I);
 
   bool X86SelectRet(const Instruction *I);
@@ -98,7 +105,7 @@ private:
   bool X86SelectSelect(const Instruction *I);
 
   bool X86SelectTrunc(const Instruction *I);
- 
+
   bool X86SelectFPExt(const Instruction *I);
   bool X86SelectFPTrunc(const Instruction *I);
 
@@ -107,9 +114,6 @@ private:
   bool X86VisitIntrinsicCall(const IntrinsicInst &I);
   bool X86SelectCall(const Instruction *I);
 
-  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
-  CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false);
-
   const X86InstrInfo *getInstrInfo() const {
     return getTargetMachine()->getInstrInfo();
   }
@@ -128,17 +132,18 @@ private:
       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
   }
 
-  bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
+  bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
 };
-  
+
 } // end anonymous namespace.
 
-bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
-  VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
-  if (VT == MVT::Other || !VT.isSimple())
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+  if (evt == MVT::Other || !evt.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
-  
+
+  VT = evt.getSimpleVT();
   // For now, require SSE/SSE2 for performing floating-point operations,
   // since x87 requires additional work.
   if (VT == MVT::f64 && !X86ScalarSSEf64)
@@ -157,45 +162,6 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
 
 #include "X86GenCallingConv.inc"
 
-/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
-                                           bool isTaillCall) {
-  if (Subtarget->is64Bit()) {
-    if (CC == CallingConv::GHC)
-      return CC_X86_64_GHC;
-    else if (Subtarget->isTargetWin64())
-      return CC_X86_Win64_C;
-    else
-      return CC_X86_64_C;
-  }
-
-  if (CC == CallingConv::X86_FastCall)
-    return CC_X86_32_FastCall;
-  else if (CC == CallingConv::X86_ThisCall)
-    return CC_X86_32_ThisCall;
-  else if (CC == CallingConv::Fast)
-    return CC_X86_32_FastCC;
-  else if (CC == CallingConv::GHC)
-    return CC_X86_32_GHC;
-  else
-    return CC_X86_32_C;
-}
-
-/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC,
-                                          bool isTaillCall) {
-  if (Subtarget->is64Bit()) {
-    if (Subtarget->isTargetWin64())
-      return RetCC_X86_Win64_C;
-    else
-      return RetCC_X86_64_C;
-  }
-
-  return RetCC_X86_32_C;
-}
-
 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
 /// Return true and the result register by reference if it is possible.
@@ -284,7 +250,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
     Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
     break;
   }
-  
+
   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
                          DL, TII.get(Opc)), AM).addReg(Val);
   return true;
@@ -295,7 +261,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Val))
     Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
-  
+
   // If this is a store of a simple constant, fold the constant into the store.
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     unsigned Opc = 0;
@@ -312,7 +278,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
         Opc = X86::MOV64mi32;
       break;
     }
-    
+
     if (Opc) {
       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
                              DL, TII.get(Opc)), AM)
@@ -321,11 +287,11 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
       return true;
     }
   }
-  
+
   unsigned ValReg = getRegForValue(Val);
   if (ValReg == 0)
-    return false;    
- 
+    return false;
+
   return X86FastEmitStore(VT, ValReg, AM);
 }
 
@@ -337,7 +303,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
                                     unsigned &ResultReg) {
   unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
                            Src, /*TODO: Kill=*/false);
-  
+
   if (RR != 0) {
     ResultReg = RR;
     return true;
@@ -354,11 +320,11 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
     // Don't walk into other basic blocks; it's possible we haven't
     // visited them yet, so the instructions may not yet be assigned
     // virtual registers.
-    if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
-      return false;
-
-    Opcode = I->getOpcode();
-    U = I;
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
     Opcode = C->getOpcode();
     U = C;
@@ -472,7 +438,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
     AM.Disp = (uint32_t)Disp;
     if (X86SelectAddress(U->getOperand(0), AM))
       return true;
-    
+
     // If we couldn't merge the sub value into this addr mode, revert back to
     // our address and just match the value instead of completely failing.
     AM = SavedAM;
@@ -501,7 +467,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
 
     // Okay, we've committed to selecting this global. Set up the basic address.
     AM.GV = GV;
-    
+
     // Allow the subtarget to classify the global.
     unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 
@@ -510,7 +476,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       // FIXME: How do we know Base.Reg is free??
       AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
     }
-    
+
     // Unless the ABI requires an extra load, return a direct reference to
     // the global.
     if (!isGlobalStubReference(GVFlags)) {
@@ -523,7 +489,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       AM.GVOpFlags = GVFlags;
       return true;
     }
-    
+
     // Ok, we need to do a load from a stub.  If we've already loaded from this
     // stub, reuse the loaded pointer, otherwise emit the load now.
     DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
@@ -545,14 +511,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       if (TLI.getPointerTy() == MVT::i64) {
         Opc = X86::MOV64rm;
         RC  = X86::GR64RegisterClass;
-        
+
         if (Subtarget->isPICStyleRIPRel())
           StubAM.Base.Reg = X86::RIP;
       } else {
         Opc = X86::MOV32rm;
         RC  = X86::GR32RegisterClass;
       }
-      
+
       LoadReg = createResultReg(RC);
       MachineInstrBuilder LoadMI =
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
@@ -564,7 +530,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       // Prevent loading GV stub multiple times in same MBB.
       LocalValueMap[V] = LoadReg;
     }
-    
+
     // Now construct the final address. Note that the Disp, Scale,
     // and Index values may already be set here.
     AM.Base.Reg = LoadReg;
@@ -638,7 +604,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
 
     // Okay, we've committed to selecting this global. Set up the basic address.
     AM.GV = GV;
-    
+
     // No ABI requires an extra load for anything other than DLLImport, which
     // we rejected above. Return a direct reference to the global.
     if (Subtarget->isPICStyleRIPRel()) {
@@ -651,7 +617,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
     } else if (Subtarget->isPICStyleGOT()) {
       AM.GVOpFlags = X86II::MO_GOTOFF;
     }
-    
+
     return true;
   }
 
@@ -674,7 +640,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
 
 /// X86SelectStore - Select and emit code to implement store instructions.
 bool X86FastISel::X86SelectStore(const Instruction *I) {
-  EVT VT;
+  MVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
 
@@ -724,7 +690,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
     CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
-    CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC));
+    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
 
     const Value *RV = Ret->getOperand(0);
     unsigned Reg = getRegForValue(RV);
@@ -736,7 +702,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
       return false;
 
     CCValAssign &VA = ValLocs[0];
-  
+
     // Don't bother handling odd stuff for now.
     if (VA.getLocInfo() != CCValAssign::Full)
       return false;
@@ -745,7 +711,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
       return false;
     // TODO: For now, don't try to handle cases where getLocInfo()
     // says Full but the types don't match.
-    if (VA.getValVT() != TLI.getValueType(RV->getType()))
+    if (TLI.getValueType(RV->getType()) != VA.getValVT())
       return false;
 
     // The calling-convention tables for x87 returns don't tell
@@ -775,7 +741,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
 /// X86SelectLoad - Select and emit code to implement load instructions.
 ///
 bool X86FastISel::X86SelectLoad(const Instruction *I)  {
-  EVT VT;
+  MVT VT;
   if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
     return false;
 
@@ -826,11 +792,11 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
                                      EVT VT) {
   unsigned Op0Reg = getRegForValue(Op0);
   if (Op0Reg == 0) return false;
-  
+
   // Handle 'null' like i32/i64 0.
   if (isa<ConstantPointerNull>(Op1))
     Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
-  
+
   // We have two options: compare with register or immediate.  If the RHS of
   // the compare is an immediate that we can fold into this compare, use
   // CMPri, otherwise use CMPrr.
@@ -842,23 +808,23 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
       return true;
     }
   }
-  
+
   unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
   if (CompareOpc == 0) return false;
-    
+
   unsigned Op1Reg = getRegForValue(Op1);
   if (Op1Reg == 0) return false;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
     .addReg(Op0Reg)
     .addReg(Op1Reg);
-  
+
   return true;
 }
 
 bool X86FastISel::X86SelectCmp(const Instruction *I) {
   const CmpInst *CI = cast<CmpInst>(I);
 
-  EVT VT;
+  MVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
     return false;
 
@@ -869,13 +835,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   case CmpInst::FCMP_OEQ: {
     if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
       return false;
-    
+
     unsigned EReg = createResultReg(&X86::GR8RegClass);
     unsigned NPReg = createResultReg(&X86::GR8RegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
             TII.get(X86::SETNPr), NPReg);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
             TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
     UpdateValueMap(I, ResultReg);
     return true;
@@ -908,7 +874,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
   case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
   case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
-  
+
   case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
   case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
   case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
@@ -930,7 +896,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   // Emit a compare of Op0/Op1.
   if (!X86FastEmitCompare(Op0, Op1, VT))
     return false;
-  
+
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
   UpdateValueMap(I, ResultReg);
   return true;
@@ -995,7 +961,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
       case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
       case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
-          
+
       case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
       case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
       case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
@@ -1009,7 +975,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       default:
         return false;
       }
-      
+
       const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
       if (SwapArgs)
         std::swap(Op0, Op1);
@@ -1017,7 +983,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       // Emit a compare of the LHS and RHS, setting the flags.
       if (!X86FastEmitCompare(Op0, Op1, VT))
         return false;
-      
+
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
         .addMBB(TrueMBB);
 
@@ -1070,8 +1036,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
           }
 
           const TargetInstrDesc &TID = MI.getDesc();
-          if (TID.hasUnmodeledSideEffects() ||
-              TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
+          if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
+              MI.hasUnmodeledSideEffects())
             break;
         }
 
@@ -1147,22 +1113,22 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
     return false;
   }
 
-  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
-  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
     return false;
 
   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   if (Op0Reg == 0) return false;
-  
+
   // Fold immediate in shl(x,3).
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
     unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), 
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
             ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
     UpdateValueMap(I, ResultReg);
     return true;
   }
-  
+
   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   if (Op1Reg == 0) return false;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1183,23 +1149,26 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
 }
 
 bool X86FastISel::X86SelectSelect(const Instruction *I) {
-  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
-  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
     return false;
-  
+
+  // We only use cmov here, if we don't have a cmov instruction bail.
+  if (!Subtarget->hasCMov()) return false;
+
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
-  if (VT.getSimpleVT() == MVT::i16) {
+  if (VT == MVT::i16) {
     Opc = X86::CMOVE16rr;
     RC = &X86::GR16RegClass;
-  } else if (VT.getSimpleVT() == MVT::i32) {
+  } else if (VT == MVT::i32) {
     Opc = X86::CMOVE32rr;
     RC = &X86::GR32RegClass;
-  } else if (VT.getSimpleVT() == MVT::i64) {
+  } else if (VT == MVT::i64) {
     Opc = X86::CMOVE64rr;
     RC = &X86::GR64RegClass;
   } else {
-    return false; 
+    return false;
   }
 
   unsigned Op0Reg = getRegForValue(I->getOperand(0));
@@ -1264,7 +1233,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
     return false;
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
-  
+
   // This code only handles truncation to byte right now.
   if (DstVT != MVT::i8 && DstVT != MVT::i1)
     // All other cases should be handled by the tblgen generated code.
@@ -1335,21 +1304,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
     // Grab the frame index.
     X86AddressMode AM;
     if (!X86SelectAddress(Slot, AM)) return false;
-    
+
     if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-    
+
     return true;
   }
   case Intrinsic::objectsize: {
     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
     const Type *Ty = I.getCalledFunction()->getReturnType();
-    
+
     assert(CI && "Non-constant type in Intrinsic::objectsize?");
-    
-    EVT VT;
+
+    MVT VT;
     if (!isTypeLegal(Ty, VT))
       return false;
-    
+
     unsigned OpC = 0;
     if (VT == MVT::i32)
       OpC = X86::MOV32ri;
@@ -1357,7 +1326,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
       OpC = X86::MOV64ri;
     else
       return false;
-    
+
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
                                   addImm(CI->isZero() ? -1ULL : 0);
@@ -1392,7 +1361,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
     const Type *RetTy =
       cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
 
-    EVT VT;
+    MVT VT;
     if (!isTypeLegal(RetTy, VT))
       return false;
 
@@ -1429,7 +1398,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
       ResultReg = DestReg1+1;
     else
       ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
-    
+
     unsigned Opc = X86::SETBr;
     if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
       Opc = X86::SETOr;
@@ -1476,7 +1445,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
 
   // Handle *simple* calls for now.
   const Type *RetTy = CS.getType();
-  EVT RetVT;
+  MVT RetVT;
   if (RetTy->isVoidTy())
     RetVT = MVT::isVoid;
   else if (!isTypeLegal(RetTy, RetVT, true))
@@ -1506,7 +1475,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   // Deal with call operands first.
   SmallVector<const Value *, 8> ArgVals;
   SmallVector<unsigned, 8> Args;
-  SmallVector<EVT, 8> ArgVTs;
+  SmallVector<MVT, 8> ArgVTs;
   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   Args.reserve(CS.arg_size());
   ArgVals.reserve(CS.arg_size());
@@ -1532,7 +1501,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       return false;
 
     const Type *ArgTy = (*i)->getType();
-    EVT ArgVT;
+    MVT ArgVT;
     if (!isTypeLegal(ArgTy, ArgVT))
       return false;
     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
@@ -1547,13 +1516,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
-  
+
   // Allocate shadow area for Win64
-  if (Subtarget->isTargetWin64()) {  
-    CCInfo.AllocateStack(32, 8); 
+  if (Subtarget->isTargetWin64()) {
+    CCInfo.AllocateStack(32, 8);
   }
 
-  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1570,7 +1539,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     CCValAssign &VA = ArgLocs[i];
     unsigned Arg = Args[VA.getValNo()];
     EVT ArgVT = ArgVTs[VA.getValNo()];
-  
+
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
@@ -1578,20 +1547,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     case CCValAssign::SExt: {
       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
                                        Arg, ArgVT, Arg);
-      assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
-      Emitted = true;
+      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
       ArgVT = VA.getLocVT();
       break;
     }
     case CCValAssign::ZExt: {
       bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
                                        Arg, ArgVT, Arg);
-      assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
-      Emitted = true;
+      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
       ArgVT = VA.getLocVT();
       break;
     }
     case CCValAssign::AExt: {
+      // We don't handle MMX parameters yet.
+      if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
+        return false;
       bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
                                        Arg, ArgVT, Arg);
       if (!Emitted)
@@ -1600,21 +1570,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       if (!Emitted)
         Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
                                     Arg, ArgVT, Arg);
-      
-      assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+
+      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
       ArgVT = VA.getLocVT();
       break;
     }
     case CCValAssign::BCvt: {
-      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
-                               ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
+      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+                               ISD::BITCAST, Arg, /*TODO: Kill=*/false);
       assert(BC != 0 && "Failed to emit a bitcast!");
       Arg = BC;
       ArgVT = VA.getLocVT();
       break;
     }
     }
-    
+
     if (VA.isRegLoc()) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
               VA.getLocReg()).addReg(Arg);
@@ -1625,7 +1595,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       AM.Base.Reg = StackPtr;
       AM.Disp = LocMemOffset;
       const Value *ArgVal = ArgVals[VA.getValNo()];
-      
+
       // If this is a really simple value, emit this with the Value* version of
       // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
       // can cause us to reevaluate the argument.
@@ -1637,13 +1607,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   }
 
   // ELF / PIC requires GOT in the EBX register before function calls via PLT
-  // GOT pointer.  
+  // GOT pointer.
   if (Subtarget->isPICStyleGOT()) {
     unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
             X86::EBX).addReg(Base);
   }
-  
+
   // Issue the call.
   MachineInstrBuilder MIB;
   if (CalleeOp) {
@@ -1657,7 +1627,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       CallOpc = X86::CALL32r;
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
       .addReg(CalleeOp);
-    
+
   } else {
     // Direct call.
     assert(GV && "Not a direct call");
@@ -1668,10 +1638,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       CallOpc = X86::CALL64pcrel32;
     else
       CallOpc = X86::CALLpcrel32;
-    
+
     // See if we need any target-specific flags on the GV operand.
     unsigned char OpFlags = 0;
-    
+
     // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
     // external symbols most go through the PLT in PIC mode.  If the symbol
     // has hidden or protected visibility, or if it is static or local, then
@@ -1688,8 +1658,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
       // automatically synthesizes these stubs.
       OpFlags = X86II::MO_DARWIN_STUB;
     }
-    
-    
+
+
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
       .addGlobalAddress(GV, 0, OpFlags);
   }
@@ -1709,7 +1679,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
 
   // Now handle call return value (if any).
   SmallVector<unsigned, 4> UsedRegs;
-  if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
+  if (RetVT != MVT::isVoid) {
     SmallVector<CCValAssign, 16> RVLocs;
     CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
     CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
@@ -1718,7 +1688,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
     EVT CopyVT = RVLocs[0].getValVT();
     TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
-    
+
     // If this is a call to a function that returns an fp value on the x87 fp
     // stack, but where we prefer to use the value in xmm registers, copy it
     // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
@@ -1756,7 +1726,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
     if (AndToI1) {
       // Mask out all but lowest bit for some call which produces an i1.
       unsigned AndResult = createResultReg(X86::GR8RegisterClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
               TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
       ResultReg = AndResult;
     }
@@ -1823,14 +1793,14 @@ X86FastISel::TargetSelectInstruction(const Instruction *I)  {
 }
 
 unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
-  EVT VT;
+  MVT VT;
   if (!isTypeLegal(C->getType(), VT))
     return false;
-  
+
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
   default: return false;
   case MVT::i8:
     Opc = X86::MOV8rm;
@@ -1871,7 +1841,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
     // No f80 support yet.
     return false;
   }
-  
+
   // Materialize addresses with LEA instructions.
   if (isa<GlobalValue>(C)) {
     X86AddressMode AM;
@@ -1887,14 +1857,14 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
     }
     return 0;
   }
-  
+
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = TD.getPrefTypeAlignment(C->getType());
   if (Align == 0) {
     // Alignment of vector types.  FIXME!
     Align = TD.getTypeAllocSize(C->getType());
   }
-  
+
   // x86-32 PIC requires a PIC base register for constant pools.
   unsigned PICBase = 0;
   unsigned char OpFlag = 0;
@@ -1941,6 +1911,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   return ResultReg;
 }
 
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+
+  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
 namespace llvm {
   llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
     return new X86FastISel(funcInfo);
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index 96e0aaec580b..17d242ab761e 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -15,11 +15,17 @@
 namespace llvm {
 namespace X86 {
 enum Fixups {
-  reloc_pcrel_4byte = FirstTargetFixupKind,  // 32-bit pcrel, e.g. a branch.
-  reloc_pcrel_1byte,                         // 8-bit pcrel, e.g. branch_1
-  reloc_pcrel_2byte,                         // 16-bit pcrel, e.g. callw
-  reloc_riprel_4byte,                        // 32-bit rip-relative
-  reloc_riprel_4byte_movq_load               // 32-bit rip-relative in movq
+  reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+  reloc_riprel_4byte_movq_load,              // 32-bit rip-relative in movq
+  reloc_signed_4byte,                        // 32-bit signed. Unlike FK_Data_4
+                                             // this will be sign extended at
+                                             // runtime.
+  reloc_global_offset_table,                 // 32-bit, relative to the start
+                                             // of the instruction. Used only
+                                             // for _GLOBAL_OFFSET_TABLE_.
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
 }
 }
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index e6ebf669587d..3aaa69327976 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,6 +52,7 @@ namespace {
   struct FPS : public MachineFunctionPass {
     static char ID;
     FPS() : MachineFunctionPass(ID) {
+      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
       // This is really only to keep valgrind quiet.
       // The logic in isLive() is too much for it.
       memset(Stack, 0, sizeof(Stack));
@@ -59,6 +61,7 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<EdgeBundles>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -94,7 +97,7 @@ namespace {
       // FixStack[i] == getStackEntry(i) for all i < FixCount.
       unsigned char FixStack[8];
 
-      LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+      LiveBundle() : Mask(0), FixCount(0) {}
 
       // Have the live registers been assigned a stack order yet?
       bool isFixed() const { return !Mask || FixCount; }
@@ -104,10 +107,8 @@ namespace {
     // with no live FP registers.
     SmallVector<LiveBundle, 8> LiveBundles;
 
-    // Map each MBB in the current function to an (ingoing, outgoing) index into
-    // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
-    // and are not actually stored in the map.
-    DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+    // The edge bundle analysis provides indices into the LiveBundles vector.
+    EdgeBundles *Bundles;
 
     // Return a bitmask of FP registers in block's live-in list.
     unsigned calcLiveInMask(MachineBasicBlock *MBB) {
@@ -167,7 +168,8 @@ namespace {
 
     /// getStackEntry - Return the X86::FP<n> register in register ST(i).
     unsigned getStackEntry(unsigned STi) const {
-      assert(STi < StackTop && "Access past stack top!");
+      if (STi >= StackTop)
+        report_fatal_error("Access past stack top!");
       return Stack[StackTop-1-STi];
     }
 
@@ -180,7 +182,8 @@ namespace {
     // pushReg - Push the specified FP<n> register onto the stack.
     void pushReg(unsigned Reg) {
       assert(Reg < 8 && "Register number out of range!");
-      assert(StackTop < 8 && "Stack overflow!");
+      if (StackTop >= 8)
+        report_fatal_error("Stack overflow!");
       Stack[StackTop] = Reg;
       RegMap[Reg] = StackTop++;
     }
@@ -197,7 +200,8 @@ namespace {
       std::swap(RegMap[RegNo], RegMap[RegOnTop]);
 
       // Swap stack slot contents.
-      assert(RegMap[RegOnTop] < StackTop);
+      if (RegMap[RegOnTop] >= StackTop)
+        report_fatal_error("Access past stack top!");
       std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
 
       // Emit an fxch to update the runtime processors version of the state.
@@ -281,6 +285,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
   // Early exit.
   if (!FPIsUsed) return false;
 
+  Bundles = &getAnalysis<EdgeBundles>();
   TII = MF.getTarget().getInstrInfo();
 
   // Prepare cross-MBB liveness.
@@ -305,7 +310,6 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
       if (Processed.insert(BB))
         Changed |= processBasicBlock(MF, *BB);
 
-  BlockBundle.clear();
   LiveBundles.clear();
 
   return Changed;
@@ -318,90 +322,16 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
 /// registers may be implicitly defined, or not used by all successors.
 void FPS::bundleCFG(MachineFunction &MF) {
   assert(LiveBundles.empty() && "Stale data in LiveBundles");
-  assert(BlockBundle.empty() && "Stale data in BlockBundle");
-  SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+  LiveBundles.resize(Bundles->getNumBundles());
 
-  // LiveBundle[0] is the empty live-in set.
-  LiveBundles.resize(1);
-
-  // First gather the actual live-in masks for all MBBs.
+  // Gather the actual live-in masks for all MBBs.
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = I;
     const unsigned Mask = calcLiveInMask(MBB);
     if (!Mask)
       continue;
-    // Ingoing bundle index.
-    unsigned &Idx = BlockBundle[MBB].first;
-    // Already assigned an ingoing bundle?
-    if (Idx)
-      continue;
-    // Allocate a new LiveBundle struct for this block's live-ins.
-    const unsigned BundleIdx = Idx = LiveBundles.size();
-    DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
-                 << MBB->getNumber());
-    LiveBundles.push_back(Mask);
-    LiveBundle &Bundle = LiveBundles.back();
-
-    // Make sure all predecessors have the same live-out set.
-    PropUp.insert(MBB);
-
-    // Keep pushing liveness up and down the CFG until convergence.
-    // Only critical edges cause iteration here, but when they do, multiple
-    // blocks can be assigned to the same LiveBundle index.
-    do {
-      // Assign BundleIdx as liveout from predecessors in PropUp.
-      for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
-           E = PropUp.end(); I != E; ++I) {
-        MachineBasicBlock *MBB = *I;
-        for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
-             LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
-          MachineBasicBlock *PredMBB = *LinkI;
-          // PredMBB's liveout bundle should be set to LIIdx.
-          unsigned &Idx = BlockBundle[PredMBB].second;
-          if (Idx) {
-            assert(Idx == BundleIdx && "Inconsistent CFG");
-            continue;
-          }
-          Idx = BundleIdx;
-          DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
-          // Propagate to siblings.
-          if (PredMBB->succ_size() > 1)
-            PropDown.insert(PredMBB);
-        }
-      }
-      PropUp.clear();
-
-      // Assign BundleIdx as livein to successors in PropDown.
-      for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
-           E = PropDown.end(); I != E; ++I) {
-        MachineBasicBlock *MBB = *I;
-        for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
-             LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
-          MachineBasicBlock *SuccMBB = *LinkI;
-          // LinkMBB's livein bundle should be set to BundleIdx.
-          unsigned &Idx = BlockBundle[SuccMBB].first;
-          if (Idx) {
-            assert(Idx == BundleIdx && "Inconsistent CFG");
-            continue;
-          }
-          Idx = BundleIdx;
-          DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
-          // Propagate to siblings.
-          if (SuccMBB->pred_size() > 1)
-            PropUp.insert(SuccMBB);
-          // Also accumulate the bundle liveness mask from the liveins here.
-          Bundle.Mask |= calcLiveInMask(SuccMBB);
-        }
-      }
-      PropDown.clear();
-    } while (!PropUp.empty());
-    DEBUG({
-      dbgs() << " live:";
-      for (unsigned i = 0; i < 8; ++i)
-        if (Bundle.Mask & (1<<i))
-          dbgs() << " %FP" << i;
-      dbgs() << '\n';
-    });
+    // Update MBB ingoing bundle mask.
+    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
   }
 }
 
@@ -489,13 +419,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
   return Changed;
 }
 
-/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// setupBlockStack - Use the live bundles to set up our model of the stack
 /// to match predecessors' live out stack.
 void FPS::setupBlockStack() {
   DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
                << " derived from " << MBB->getName() << ".\n");
   StackTop = 0;
-  const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+  // Get the live-in bundle for MBB.
+  const LiveBundle &Bundle =
+    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
 
   if (!Bundle.Mask) {
     DEBUG(dbgs() << "Block has no FP live-ins.\n");
@@ -532,7 +464,8 @@ void FPS::finishBlockStack() {
   DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
                << " derived from " << MBB->getName() << ".\n");
 
-  unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+  // Get MBB's live-out bundle.
+  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
   LiveBundle &Bundle = LiveBundles[BundleIdx];
 
   // We may need to kill and define some registers to match successors.
@@ -572,7 +505,8 @@ namespace {
     friend bool operator<(const TableEntry &TE, unsigned V) {
       return TE.from < V;
     }
-    friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
+    friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+                                              const TableEntry &TE) {
       return V < TE.from;
     }
   };
@@ -824,7 +758,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
   MachineInstr* MI = I;
   DebugLoc dl = MI->getDebugLoc();
   ASSERT_SORTED(PopTable);
-  assert(StackTop > 0 && "Cannot pop empty stack!");
+  if (StackTop == 0)
+    report_fatal_error("Cannot pop empty stack!");
   RegMap[Stack[--StackTop]] = ~0;     // Update state
 
   // Check to see if there is a popping version of this instruction...
@@ -1016,7 +951,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
       MI->getOpcode() == X86::ISTT_FP32m ||
       MI->getOpcode() == X86::ISTT_FP64m ||
       MI->getOpcode() == X86::ST_FP80m) {
-    assert(StackTop > 0 && "Stack empty??");
+    if (StackTop == 0)
+      report_fatal_error("Stack empty??");
     --StackTop;
   } else if (KillsSrc) { // Last use of operand?
     popStackAfter(I);
@@ -1047,7 +983,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
     // If this is the last use of the source register, just make sure it's on
     // the top of the stack.
     moveToTop(Reg, I);
-    assert(StackTop > 0 && "Stack cannot be empty!");
+    if (StackTop == 0)
+      report_fatal_error("Stack cannot be empty!");
     --StackTop;
     pushReg(getFPReg(MI->getOperand(0)));
   } else {
@@ -1300,7 +1237,6 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
 ///
 void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   MachineInstr *MI = I;
-  DebugLoc dl = MI->getDebugLoc();
   switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown SpecialFP instruction!");
   case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
@@ -1341,7 +1277,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     std::swap(RegMap[RegNo], RegMap[RegOnTop]);
     
     // Swap stack slot contents.
-    assert(RegMap[RegOnTop] < StackTop);
+    if (RegMap[RegOnTop] >= StackTop)
+      report_fatal_error("Access past stack top!");
     std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
     break;
   }
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 000000000000..0a3f931acf93
--- /dev/null
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,994 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineModuleInfo &MMI = MF.getMMI();
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  return (DisableFramePointerElim(MF) ||
+          RI->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken() ||
+          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+          MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::SUB64ri8;
+    return X86::SUB64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::SUB32ri8;
+    return X86::SUB32ri;
+  }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::ADD64ri8;
+    return X86::ADD64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::ADD32ri8;
+    return X86::ADD32ri;
+  }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator &MBBI,
+                                       const TargetRegisterInfo &TRI,
+                                       bool Is64Bit) {
+  const MachineFunction *MF = MBB.getParent();
+  const Function *F = MF->getFunction();
+  if (!F || MF->getMMI().callsEHReturn())
+    return 0;
+
+  static const unsigned CallerSavedRegs32Bit[] = {
+    X86::EAX, X86::EDX, X86::ECX
+  };
+
+  static const unsigned CallerSavedRegs64Bit[] = {
+    X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+    X86::R8,  X86::R9,  X86::R10, X86::R11
+  };
+
+  unsigned Opc = MBBI->getOpcode();
+  switch (Opc) {
+  default: return 0;
+  case X86::RET:
+  case X86::RETI:
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    SmallSet<unsigned, 8> Uses;
+    for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MBBI->getOperand(i);
+      if (!MO.isReg() || MO.isDef())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+        Uses.insert(*AsI);
+    }
+
+    const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+    for (; *CS; ++CS)
+      if (!Uses.count(*CS))
+        return *CS;
+  }
+  }
+
+  return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  unsigned StackPtr, int64_t NumBytes,
+                  bool Is64Bit, const TargetInstrInfo &TII,
+                  const TargetRegisterInfo &TRI) {
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+  unsigned Opc = isSub ?
+    getSUBriOpcode(Is64Bit, Offset) :
+    getADDriOpcode(Is64Bit, Offset);
+  uint64_t Chunk = (1LL << 31) - 1;
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    if (ThisVal == (Is64Bit ? 8 : 4)) {
+      // Use push / pop instead.
+      unsigned Reg = isSub
+        ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+      if (Reg) {
+        Opc = isSub
+          ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+          : (Is64Bit ? X86::POP64r  : X86::POP32r);
+        BuildMI(MBB, MBBI, DL, TII.get(Opc))
+          .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+        Offset -= ThisVal;
+        continue;
+      }
+    }
+
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr)
+      .addImm(ThisVal);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    Offset -= ThisVal;
+  }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  if (MBBI == MBB.begin()) return;
+
+  MachineBasicBlock::iterator PI = prior(MBBI);
+  unsigned Opc = PI->getOpcode();
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      PI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes += PI->getOperand(2).getImm();
+    MBB.erase(PI);
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             PI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes -= PI->getOperand(2).getImm();
+    MBB.erase(PI);
+  }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI,
+                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  // FIXME: THIS ISN'T RUN!!!
+  return;
+
+  if (MBBI == MBB.end()) return;
+
+  MachineBasicBlock::iterator NI = llvm::next(MBBI);
+  if (NI == MBB.end()) return;
+
+  unsigned Opc = NI->getOpcode();
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      NI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes -= NI->getOperand(2).getImm();
+    MBB.erase(NI);
+    MBBI = NI;
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             NI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes += NI->getOperand(2).getImm();
+    MBB.erase(NI);
+    MBBI = NI;
+  }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI,
+                           unsigned StackPtr,
+                           bool doMergeWithPrevious) {
+  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+      (!doMergeWithPrevious && MBBI == MBB.end()))
+    return 0;
+
+  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+  unsigned Opc = PI->getOpcode();
+  int Offset = 0;
+
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      PI->getOperand(0).getReg() == StackPtr){
+    Offset += PI->getOperand(2).getImm();
+    MBB.erase(PI);
+    if (!doMergeWithPrevious) MBBI = NI;
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             PI->getOperand(0).getReg() == StackPtr) {
+    Offset -= PI->getOperand(2).getImm();
+    MBB.erase(PI);
+    if (!doMergeWithPrevious) MBBI = NI;
+  }
+
+  return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+    unsigned Reg = II->first;
+
+    if (Reg == X86::EAX || Reg == X86::AX ||
+        Reg == X86::AH || Reg == X86::AL)
+      return true;
+  }
+
+  return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+                                             MCSymbol *Label,
+                                             unsigned FramePtr) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+
+  // Add callee saved registers to move list.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.empty()) return;
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const TargetData *TD = TM.getTargetData();
+  bool HasFP = hasFP(MF);
+
+  // Calculate amount of bytes used for return address storing.
+  int stackGrowth = -TD->getPointerSize();
+
+  // FIXME: This is dirty hack. The code itself is pretty mess right now.
+  // It should be rewritten from scratch and generalized sometimes.
+
+  // Determine maximum offset (minumum due to stack growth).
+  int64_t MaxOffset = 0;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I)
+    MaxOffset = std::min(MaxOffset,
+                         MFI->getObjectOffset(I->getFrameIdx()));
+
+  // Calculate offsets.
+  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+    unsigned Reg = I->getReg();
+    Offset = MaxOffset - Offset + saveAreaOffset;
+
+    // Don't output a new machine move if we're re-saving the frame
+    // pointer. This happens when the PrologEpilogInserter has inserted an extra
+    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+    // generates one when frame pointers are used. If we generate a "machine
+    // move" for this extra "PUSH", the linker will lose track of the fact that
+    // the frame pointer should have the value of the first "PUSH" when it's
+    // trying to unwind.
+    //
+    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+    //        another bug. I.e., one where we generate a prolog like this:
+    //
+    //          pushl  %ebp
+    //          movl   %esp, %ebp
+    //          pushl  %ebp
+    //          pushl  %esi
+    //           ...
+    //
+    //        The immediate re-push of EBP is unnecessary. At the least, it's an
+    //        optimization bug. EBP can be used as a scratch register in certain
+    //        cases, but probably not when we have a frame pointer.
+    if (HasFP && FramePtr == Reg)
+      continue;
+
+    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+    MachineLocation CSSrc(Reg);
+    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+  }
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *Fn = MF.getFunction();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+                          !Fn->doesNotThrow() || UnwindTablesMandatory;
+  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  bool HasFP = hasFP(MF);
+  bool Is64Bit = STI.is64Bit();
+  bool IsWin64 = STI.isTargetWin64();
+  unsigned StackAlign = getStackAlignment();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned StackPtr = RegInfo->getStackRegister();
+
+  DebugLoc DL;
+
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum SlotSize.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+
+  // Add RETADDR move area to callee saved frame size.
+  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+  if (TailCallReturnAddrDelta < 0)
+    X86FI->setCalleeSavedFrameSize(
+      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+  // function, and use up to 128 bytes of stack space, don't have a frame
+  // pointer, calls, or dynamic alloca then we do not need to adjust the
+  // stack pointer (we fit in the Red Zone).
+  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+      !RegInfo->needsStackRealignment(MF) &&
+      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
+      !MFI->adjustsStack() &&                      // No calls.
+      !IsWin64) {                                  // Win64 has no Red Zone
+    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+    if (HasFP) MinSize += SlotSize;
+    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+    MFI->setStackSize(StackSize);
+  }
+
+  // Insert stack pointer adjustment for later moving of return addr.  Only
+  // applies to tail call optimized functions where the callee argument stack
+  // size is bigger than the callers.
+  if (TailCallReturnAddrDelta < 0) {
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL,
+              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+              StackPtr)
+        .addReg(StackPtr)
+        .addImm(-TailCallReturnAddrDelta);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+  }
+
+  // Mapping for machine moves:
+  //
+  //   DST: VirtualFP AND
+  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
+  //        ELSE                        => DW_CFA_def_cfa
+  //
+  //   SRC: VirtualFP AND
+  //        DST: Register               => DW_CFA_def_cfa_register
+  //
+  //   ELSE
+  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
+  //        REG < 64                    => DW_CFA_offset + Reg
+  //        ELSE                        => DW_CFA_offset_extended
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const TargetData *TD = MF.getTarget().getTargetData();
+  uint64_t NumBytes = 0;
+  int stackGrowth = -TD->getPointerSize();
+
+  if (HasFP) {
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    if (RegInfo->needsStackRealignment(MF))
+      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+    // Get the offset of the stack slot for the EBP register, which is
+    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+    // Update the frame offset adjustment.
+    MFI->setOffsetAdjustment(-NumBytes);
+
+    // Save EBP/RBP into the appropriate stack slot.
+    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+      .addReg(FramePtr, RegState::Kill);
+
+    if (needsFrameMoves) {
+      // Mark the place where EBP/RBP was saved.
+      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+      } else {
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+      }
+
+      // Change the rule for the FramePtr to be an "offset" rule.
+      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+      MachineLocation FPSrc(FramePtr);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    // Update EBP with the new base value...
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+        .addReg(StackPtr);
+
+    if (needsFrameMoves) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+      // Define the current CFA to use the EBP/RBP register.
+      MachineLocation FPDst(FramePtr);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(FramePtr);
+
+    // Realign stack
+    if (RegInfo->needsStackRealignment(MF)) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
+      // The EFLAGS implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  } else {
+    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+  }
+
+  // Skip the callee-saved push instructions.
+  bool PushedRegs = false;
+  int StackOffset = 2 * stackGrowth;
+
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == X86::PUSH32r ||
+          MBBI->getOpcode() == X86::PUSH64r)) {
+    PushedRegs = true;
+    ++MBBI;
+
+    if (!HasFP && needsFrameMoves) {
+      // Mark callee-saved push instruction.
+      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+      // Define the current CFA rule to use the provided offset.
+      unsigned Ptr = StackSize ?
+        MachineLocation::VirtualFP : StackPtr;
+      MachineLocation SPDst(Ptr);
+      MachineLocation SPSrc(Ptr, StackOffset);
+      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      StackOffset += stackGrowth;
+    }
+  }
+
+  DL = MBB.findDebugLoc(MBBI);
+
+  // If there is an SUB32ri of ESP immediately before this instruction, merge
+  // the two. This can be the case when tail call elimination is enabled and
+  // the callee has more arguments then the caller.
+  NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately after this
+  // instruction, merge the two instructions.
+  mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+  // Adjust stack pointer: ESP -= numbytes.
+
+  // Windows and cygwin/mingw require a prologue helper routine when allocating
+  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
+  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
+  // stack and adjust the stack pointer in one go.  The 64-bit version of
+  // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
+  // responsible for adjusting the stack pointer.  Touching the stack at 4K
+  // increments is necessary to ensure that the guard pages used by the OS
+  // virtual memory manager are allocated in correct sequence.
+  if (NumBytes >= 4096 &&
+      (STI.isTargetCygMing() || STI.isTargetWin32()) &&
+      !STI.isTargetEnvMacho()) {
+    // Check whether EAX is livein for this function.
+    bool isEAXAlive = isEAXLiveIn(MF);
+
+    const char *StackProbeSymbol =
+      STI.isTargetWindows() ? "_chkstk" : "_alloca";
+    if (Is64Bit && STI.isTargetCygMing())
+      StackProbeSymbol = "__chkstk";
+    unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+    if (!isEAXAlive) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes);
+      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+        .addExternalSymbol(StackProbeSymbol)
+        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    } else {
+      // Save EAX
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+        .addReg(X86::EAX, RegState::Kill);
+
+      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+      // allocated bytes for EAX.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes - 4);
+      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+        .addExternalSymbol(StackProbeSymbol)
+        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+      // Restore EAX
+      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+                                              X86::EAX),
+                                      StackPtr, false, NumBytes - 4);
+      MBB.insert(MBBI, MI);
+    }
+  } else if (NumBytes >= 4096 &&
+             STI.isTargetWin64() &&
+             !STI.isTargetEnvMacho()) {
+    // Sanity check that EAX is not livein for this function.  It should
+    // not be, so throw an assert.
+    assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
+
+    // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+    // Function prologue is responsible for adjusting the stack pointer.
+    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+      .addImm(NumBytes);
+    BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
+      .addExternalSymbol("__chkstk")
+      .addReg(StackPtr, RegState::Define | RegState::Implicit);
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                 TII, *RegInfo);
+  } else if (NumBytes)
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                 TII, *RegInfo);
+
+  if ((NumBytes || PushedRegs) && needsFrameMoves) {
+    // Mark end of stack pointer adjustment.
+    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+    if (!HasFP && NumBytes) {
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP,
+                              -StackSize + stackGrowth);
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      } else {
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      }
+    }
+
+    // Emit DWARF info specifying the offsets of the callee-saved registers.
+    if (PushedRegs)
+      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+  }
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI != MBB.end() && "Returning block has no instructions");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+  bool Is64Bit = STI.is64Bit();
+  unsigned StackAlign = getStackAlignment();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned StackPtr = RegInfo->getStackRegister();
+
+  switch (RetOpcode) {
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
+  case X86::RET:
+  case X86::RETI:
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64:
+    break;  // These are ok
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  uint64_t StackSize = MFI->getStackSize();
+  uint64_t MaxAlign  = MFI->getMaxAlignment();
+  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = 0;
+
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else
+      MaxAlign = MaxAlign ? MaxAlign : 4;
+  }
+
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    if (RegInfo->needsStackRealignment(MF))
+      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+    NumBytes = FrameSize - CSSize;
+
+    // Pop EBP.
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+  } else {
+    NumBytes = StackSize - CSSize;
+  }
+
+  // Skip the callee-saved pop instructions.
+  MachineBasicBlock::iterator LastCSPop = MBBI;
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    unsigned Opc = PI->getOpcode();
+
+    if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+        !PI->getDesc().isTerminator())
+      break;
+
+    --MBBI;
+  }
+
+  DL = MBBI->getDebugLoc();
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately before this
+  // instruction, merge the two instructions.
+  if (NumBytes || MFI->hasVarSizedObjects())
+    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+  // If dynamic alloca is used, then reset esp to point to the last callee-saved
+  // slot before popping them off! Same applies for the case, when stack was
+  // realigned.
+  if (RegInfo->needsStackRealignment(MF)) {
+    // We cannot use LEA here, because stack pointer was realigned. We need to
+    // deallocate local frame back.
+    if (CSSize) {
+      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+      MBBI = prior(LastCSPop);
+    }
+
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+            StackPtr).addReg(FramePtr);
+  } else if (MFI->hasVarSizedObjects()) {
+    if (CSSize) {
+      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+      MachineInstr *MI =
+        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+                     FramePtr, false, -CSSize);
+      MBB.insert(MBBI, MI);
+    } else {
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+        .addReg(FramePtr);
+    }
+  } else if (NumBytes) {
+    // Adjust stack pointer back: ESP += numbytes.
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+  }
+
+  // We're returning from function via eh_return.
+  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &DestAddr  = MBBI->getOperand(0);
+    assert(DestAddr.isReg() && "Offset should be in register!");
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+            StackPtr).addReg(DestAddr.getReg());
+  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+             RetOpcode == X86::TCRETURNmi ||
+             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+             RetOpcode == X86::TCRETURNmi64) {
+    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+    // Tail call return: adjust the stack pointer and jump to callee.
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+    int Offset = 0;
+    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+    // Incoporate the retaddr area.
+    Offset = StackAdj-MaxTCDelta;
+    assert(Offset >= 0 && "Offset should never be negative");
+
+    if (Offset) {
+      // Check for possible merge with preceeding ADD instruction.
+      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+    }
+
+    // Jump to label or value in register.
+    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+                                       ? X86::TAILJMPd : X86::TAILJMPd64));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+                                       ? X86::TAILJMPm : X86::TAILJMPm64));
+      for (unsigned i = 0; i != 5; ++i)
+        MIB.addOperand(MBBI->getOperand(i));
+    } else if (RetOpcode == X86::TCRETURNri64) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+             (X86FI->getTCReturnAddrDelta() < 0)) {
+    // Add the return addr area delta back since we are not tail calling.
+    int delta = -1*X86FI->getTCReturnAddrDelta();
+    MBBI = MBB.getLastNonDebugInstr();
+
+    // Check for possible merge with preceeding ADD instruction.
+    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+  }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  // Calculate amount of bytes used for return address storing
+  int stackGrowth = (STI.is64Bit() ? -8 : -4);
+  const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+  // Initial state of the frame pointer is esp+stackGrowth.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(RI->getStackRegister(), stackGrowth);
+  Moves.push_back(MachineMove(0, Dst, Src));
+
+  // Add return address to move list
+  MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+  MachineLocation CSSrc(RI->getRARegister());
+  Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+  const X86RegisterInfo *RI =
+    static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  uint64_t StackSize = MFI->getStackSize();
+
+  if (RI->needsStackRealignment(MF)) {
+    if (FI < 0) {
+      // Skip the saved EBP.
+      Offset += RI->getSlotSize();
+    } else {
+      unsigned Align = MFI->getObjectAlignment(FI);
+      assert((-(Offset + StackSize)) % Align == 0);
+      Align = 0;
+      return Offset + StackSize;
+    }
+    // FIXME: Support tail calls
+  } else {
+    if (!hasFP(MF))
+      return Offset + StackSize;
+
+    // Skip the saved EBP.
+    Offset += RI->getSlotSize();
+
+    // Skip the RETADDR move area
+    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+    if (TailCallReturnAddrDelta < 0)
+      Offset -= TailCallReturnAddrDelta;
+  }
+
+  return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+
+  MachineFunction &MF = *MBB.getParent();
+
+  bool isWin64 = STI.isTargetWin64();
+  unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+  unsigned FPReg = TRI->getFrameRegister(MF);
+  unsigned CalleeFrameSize = 0;
+
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+  unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+      continue;
+    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
+      CalleeFrameSize += SlotSize;
+      BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+    } else {
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+                              RC, TRI);
+    }
+  }
+
+  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+  return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  unsigned FPReg = TRI->getFrameRegister(MF);
+  bool isWin64 = STI.isTargetWin64();
+  unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+      continue;
+    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
+      BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+    } else {
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                               RC, TRI);
+    }
+  }
+  return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                   RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  unsigned SlotSize = RegInfo->getSlotSize();
+
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+  if (TailCallReturnAddrDelta < 0) {
+    // create RETURNADDR area
+    //   arg
+    //   arg
+    //   RETADDR
+    //   { ...
+    //     RETADDR area
+    //     ...
+    //   }
+    //   [EBP]
+    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+                           (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+  }
+
+  if (hasFP(MF)) {
+    assert((TailCallReturnAddrDelta <= 0) &&
+           "The Delta should always be zero or negative");
+    const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+    // Create a frame entry for the EBP register that must be saved.
+    int FrameIdx = MFI->CreateFixedObject(SlotSize,
+                                          -(int)SlotSize +
+                                          TFI.getOffsetOfLocalArea() +
+                                          TailCallReturnAddrDelta,
+                                          true);
+    assert(FrameIdx == MFI->getObjectIndexBegin() &&
+           "Slot for EBP register must be last in order to be found!");
+    FrameIdx = 0;
+  }
+}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 000000000000..d71108cd0586
--- /dev/null
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,65 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MCSymbol;
+  class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+  const X86TargetMachine &TM;
+  const X86Subtarget &STI;
+public:
+  explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+    : TargetFrameLowering(StackGrowsDown,
+                          sti.getStackAlignment(),
+                          (sti.is64Bit() ? -8 : -4)),
+      TM(tm), STI(sti) {
+  }
+
+  void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+                                 unsigned FramePtr) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c5234413aba6..9b0ec6e123fe 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -190,20 +190,19 @@ namespace {
     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
     SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
 
-    bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
-    bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
-    bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
                     SDValue &Segment);
-    bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
+    bool SelectLEAAddr(SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp,
                        SDValue &Segment);
-    bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
                            SDValue &Scale, SDValue &Index, SDValue &Disp,
                            SDValue &Segment);
     bool SelectScalarSSELoad(SDNode *Root, SDValue N,
@@ -264,12 +263,6 @@ namespace {
       return CurDAG->getTargetConstant(Imm, MVT::i8);
     }
 
-    /// getI16Imm - Return a target constant with the specified value, of type
-    /// i16.
-    inline SDValue getI16Imm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i16);
-    }
-
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
     inline SDValue getI32Imm(unsigned Imm) {
@@ -511,10 +504,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     // FIXME: optimize the case where the src/dest is a load or store?
     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
                                           N->getOperand(0),
-                                          MemTmp, NULL, 0, MemVT,
+                                          MemTmp, MachinePointerInfo(), MemVT,
                                           false, false, 0);
-    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
-                                        NULL, 0, MemVT, false, false, 0);
+    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                        MachinePointerInfo(),
+                                        MemVT, false, false, 0);
 
     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
     // extload we created.  This will cause general havok on the dag because
@@ -536,9 +530,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
                                              MachineFrameInfo *MFI) {
   const TargetInstrInfo *TII = TM.getInstrInfo();
-  if (Subtarget->isTargetCygMing())
+  if (Subtarget->isTargetCygMing()) {
+    unsigned CallOp =
+      Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
     BuildMI(BB, DebugLoc(),
-            TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+            TII->get(CallOp)).addExternalSymbol("__main");
+  }
 }
 
 void X86DAGToDAGISel::EmitFunctionEntryCode() {
@@ -549,29 +546,27 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
 }
 
 
-bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
-                                              X86ISelAddressMode &AM) {
-  assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
-  SDValue Segment = N.getOperand(0);
-
-  if (AM.Segment.getNode() == 0) {
-    AM.Segment = Segment;
-    return false;
-  }
-
-  return true;
-}
-
-bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+  SDValue Address = N->getOperand(1);
+  
+  // load gs:0 -> GS segment register.
+  // load fs:0 -> FS segment register.
+  //
   // This optimization is valid because the GNU TLS model defines that
   // gs:0 (or fs:0 on X86-64) contains its own address.
   // For more information see http://people.redhat.com/drepper/tls.pdf
-
-  SDValue Address = N.getOperand(1);
-  if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
-      !MatchSegmentBaseAddress (Address, AM))
-    return false;
-
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+    if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+        Subtarget->isTargetELF())
+      switch (N->getPointerInfo().getAddrSpace()) {
+      case 256:
+        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+        return false;
+      case 257:
+        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+        return false;
+      }
+  
   return true;
 }
 
@@ -690,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   return false;
 }
 
-/// isLogicallyAddWithConstant - Return true if this node is semantically an
-/// add of a value with a constantint.
-static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
-  // Check for (add x, Cst)
-  if (V->getOpcode() == ISD::ADD)
-    return isa<ConstantSDNode>(V->getOperand(1));
-
-  // Check for (or x, Cst), where Cst & x == 0.
-  if (V->getOpcode() != ISD::OR ||
-      !isa<ConstantSDNode>(V->getOperand(1)))
-    return false;
-  
-  // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-  ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
-    
-  // Check to see if the LHS & C is zero.
-  return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
-}
-
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
   bool is64Bit = Subtarget->is64Bit();
@@ -756,11 +732,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     break;
   }
 
-  case X86ISD::SegmentBaseAddress:
-    if (!MatchSegmentBaseAddress(N, AM))
-      return false;
-    break;
-
   case X86ISD::Wrapper:
   case X86ISD::WrapperRIP:
     if (!MatchWrapper(N, AM))
@@ -768,7 +739,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case ISD::LOAD:
-    if (!MatchLoad(N, AM))
+    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
       return false;
     break;
 
@@ -799,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
         // Okay, we know that we have a scale by now.  However, if the scaled
         // value is an add of something and a constant, we can fold the
         // constant into the disp field here.
-        if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
+        if (CurDAG->isBaseWithConstantOffset(ShVal)) {
           AM.IndexReg = ShVal.getNode()->getOperand(0);
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -943,24 +914,18 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // Add an artificial use to this node so that we can keep track of
     // it if it gets CSE'd with a different node.
     HandleSDNode Handle(N);
-    SDValue LHS = Handle.getValue().getNode()->getOperand(0);
-    SDValue RHS = Handle.getValue().getNode()->getOperand(1);
 
     X86ISelAddressMode Backup = AM;
-    if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
-        !MatchAddressRecursively(RHS, AM, Depth+1))
+    if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+        !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
       return false;
     AM = Backup;
-    LHS = Handle.getValue().getNode()->getOperand(0);
-    RHS = Handle.getValue().getNode()->getOperand(1);
-
+    
     // Try again after commuting the operands.
-    if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
-        !MatchAddressRecursively(LHS, AM, Depth+1))
+    if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+        !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
       return false;
     AM = Backup;
-    LHS = Handle.getValue().getNode()->getOperand(0);
-    RHS = Handle.getValue().getNode()->getOperand(1);
 
     // If we couldn't fold both operands into the address at the same time,
     // see if we can just put each operand into a register and fold at least
@@ -968,17 +933,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         !AM.Base_Reg.getNode() &&
         !AM.IndexReg.getNode()) {
-      AM.Base_Reg = LHS;
-      AM.IndexReg = RHS;
+      N = Handle.getValue();
+      AM.Base_Reg = N.getOperand(0);
+      AM.IndexReg = N.getOperand(1);
       AM.Scale = 1;
       return false;
     }
+    N = Handle.getValue();
     break;
   }
 
   case ISD::OR:
     // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (isLogicallyAddWithConstant(N, CurDAG)) {
+    if (CurDAG->isBaseWithConstantOffset(N)) {
       X86ISelAddressMode Backup = AM;
       ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
       uint64_t Offset = CN->getSExtValue();
@@ -1148,10 +1115,30 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
 /// SelectAddr - returns true if it is able pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+///
+/// Parent is the parent node of the addr operand that is being matched.  It
+/// is always a load, store, atomic node, or null.  It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
+  
+  if (Parent &&
+      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+      // that are not a MemSDNode, and thus don't have proper addrspace info.
+      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+      Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+    unsigned AddrSpace =
+      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+    // AddrSpace 256 -> GS, 257 -> FS.
+    if (AddrSpace == 256)
+      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+    if (AddrSpace == 257)
+      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+  }
+  
   if (MatchAddress(N, AM))
     return false;
 
@@ -1187,7 +1174,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
         IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
         IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
-      if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
+      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
         return false;
       return true;
     }
@@ -1205,7 +1192,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
       IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
     // Okay, this is a zero extending load.  Fold it.
     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
-    if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
       return false;
     PatternNodeWithChain = SDValue(LD, 0);
     return true;
@@ -1216,7 +1203,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
 
 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
                                     SDValue &Base, SDValue &Scale,
                                     SDValue &Index, SDValue &Disp,
                                     SDValue &Segment) {
@@ -1278,7 +1265,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
 }
 
 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp, SDValue &Segment) {
   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1311,7 +1298,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
       !IsLegalToFold(N, P, P, OptLevel))
     return false;
   
-  return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
+  return SelectAddr(N.getNode(),
+                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
 /// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1329,7 +1317,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   SDValue In2L = Node->getOperand(2);
   SDValue In2H = Node->getOperand(3);
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+  if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return NULL;
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1355,7 +1343,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
   SDValue Ptr = Node->getOperand(1);
   SDValue Val = Node->getOperand(2);
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return 0;
 
   bool isInc = false, isDec = false, isSub = false, isCN = false;
@@ -1592,7 +1580,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       return RetVal;
     break;
   }
-
+  case X86ISD::UMUL: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    
+    unsigned LoReg;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
+    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
+    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+    }
+    
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+                                          N0, SDValue()).getValue(1);
+    
+    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+    SDValue Ops[] = {N1, InFlag};
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+    
+    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+    ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+    return NULL;
+  }
+      
   case ISD::SMUL_LOHI:
   case ISD::UMUL_LOHI: {
     SDValue N0 = Node->getOperand(0);
@@ -1642,14 +1655,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
       SDNode *CNode =
-        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
                                array_lengthof(Ops));
       InFlag = SDValue(CNode, 1);
+
       // Update the chain.
       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
     } else {
-      InFlag =
-        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+      SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+      InFlag = SDValue(CNode, 0);
     }
 
     // Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1688,7 +1702,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       ReplaceUses(SDValue(Node, 1), Result);
       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
-
+    
     return NULL;
   }
 
@@ -1773,7 +1787,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       if (isSigned && !signBitIsZero) {
         // Sign extend the low part into the high part.
         InFlag =
-          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
       } else {
         // Zero out the high part, effectively zero extending the input.
         SDValue ClrNode =
@@ -1787,14 +1801,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
       SDNode *CNode =
-        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
                                array_lengthof(Ops));
       InFlag = SDValue(CNode, 1);
       // Update the chain.
       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
     } else {
       InFlag =
-        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
     }
 
     // Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1971,7 +1985,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   case 'v':   // not offsetable    ??
   default: return true;
   case 'm':   // memory
-    if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
+    if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
       return true;
     break;
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a6db979925ad..27024b4e9e5a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,9 +16,9 @@
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
-#include "X86ShuffleDecode.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -28,6 +28,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -56,39 +57,172 @@ using namespace dwarf;
 STATISTIC(NumTailCalls, "Number of tail calls");
 
 static cl::opt<bool>
-DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+Disable256Bit("disable-256bit", cl::Hidden,
+              cl::desc("Disable use of 256-bit vectors"));
 
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
 
+static SDValue Insert128BitVector(SDValue Result,
+                                  SDValue Vec,
+                                  SDValue Idx,
+                                  SelectionDAG &DAG,
+                                  DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+                                   SDValue Idx,
+                                   SelectionDAG &DAG,
+                                   DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference.  Idx is an index in the 128 bits we
+/// want.  It need not be aligned to a 128-bit bounday.  That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+                                   SDValue Idx,
+                                   SelectionDAG &DAG,
+                                   DebugLoc dl) {
+  EVT VT = Vec.getValueType();
+  assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+  EVT ElVT = VT.getVectorElementType();
+
+  int Factor = VT.getSizeInBits() / 128;
+
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+                                  ElVT,
+                                  VT.getVectorNumElements() / Factor);
+
+  // Extract from UNDEF is UNDEF.
+  if (Vec.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+  if (isa<ConstantSDNode>(Idx)) {
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    // Extract the relevant 128 bits.  Generate an EXTRACT_SUBVECTOR
+    // we can match to VEXTRACTF128.
+    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+    // This is the index of the first element of the 128-bit chunk
+    // we want.
+    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+                                 * ElemsPerChunk);
+
+    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+    SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+                                 VecIdx);
+
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits.  This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference.  Idx is an index in the 128 bits
+/// we want.  It need not be aligned to a 128-bit bounday.  That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+                                  SDValue Vec,
+                                  SDValue Idx,
+                                  SelectionDAG &DAG,
+                                  DebugLoc dl) {
+  if (isa<ConstantSDNode>(Idx)) {
+    EVT VT = Vec.getValueType();
+    assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+    EVT ElVT = VT.getVectorElementType();
+
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    EVT ResultVT = Result.getValueType();
+
+    // Insert the relevant 128 bits.
+    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+    // This is the index of the first element of the 128-bit chunk
+    // we want.
+    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+                                 * ElemsPerChunk);
+
+    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+    Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+                         VecIdx);
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+  DebugLoc dl = Lower.getDebugLoc();
+
+  assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+  EVT VT = EVT::getVectorVT(*DAG.getContext(),
+                            Lower.getValueType().getVectorElementType(),
+                            Lower.getValueType().getVectorNumElements() * 2);
+
+  // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+  assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+  // Insert the upper subvector.
+  SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+                                   DAG.getConstant(
+                                     // This is half the length of the result
+                                     // vector.  Start inserting the upper 128
+                                     // bits here.
+                                     Lower.getValueType().getVectorNumElements(),
+                                     MVT::i32),
+                                   DAG, dl);
+
+  // Insert the lower subvector.
+  Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+  return Vec;
+}
+
 static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
-  
-  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-  
-  if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
-    if (is64Bit) return new X8664_MachoTargetObjectFile();
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  bool is64Bit = Subtarget->is64Bit();
+
+  if (Subtarget->isTargetEnvMacho()) {
+    if (is64Bit)
+      return new X8664_MachoTargetObjectFile();
     return new TargetLoweringObjectFileMachO();
-  } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
-    if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
+  }
+
+  if (Subtarget->isTargetELF()) {
+    if (is64Bit)
+      return new X8664_ELFTargetObjectFile(TM);
     return new X8632_ELFTargetObjectFile(TM);
-  } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
+  }
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
     return new TargetLoweringObjectFileCOFF();
-  }  
   llvm_unreachable("unknown subtarget type");
 }
 
 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<X86Subtarget>();
-  X86ScalarSSEf64 = Subtarget->hasSSE2();
-  X86ScalarSSEf32 = Subtarget->hasSSE1();
+  X86ScalarSSEf64 = Subtarget->hasXMMInt();
+  X86ScalarSSEf32 = Subtarget->hasXMM();
   X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
 
   RegInfo = TM.getRegisterInfo();
   TD = getTargetData();
 
   // Set up the TargetLowering object.
+  static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
   // X86 is weird, it always uses i8 for shift amounts and setcc results.
   setShiftAmountType(MVT::i8);
@@ -96,6 +230,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(X86StackPtr);
 
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+    // Setup Windows compiler runtime calls.
+    setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+    setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+  }
+
   if (Subtarget->isTargetDarwin()) {
     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
     setUseUnderscoreSetJmp(false);
@@ -213,16 +359,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   }
 
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
-  if (!X86ScalarSSEf64) { 
-    setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
-    setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
+  if (!X86ScalarSSEf64) {
+    setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
+    setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
     if (Subtarget->is64Bit()) {
-      setOperationAction(ISD::BIT_CONVERT    , MVT::f64  , Expand);
-      // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
-      if (Subtarget->hasMMX() && !DisableMMX)
-        setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Custom);
-      else 
-        setOperationAction(ISD::BIT_CONVERT    , MVT::i64  , Expand);
+      setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
+      // Without SSE, i64->f64 goes through memory.
+      setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
     }
   }
 
@@ -236,30 +379,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // (low) operations are left as Legal, as there are single-result
   // instructions for this in x86. Using the two-result multiply instructions
   // when both high and low results are needed must be arranged by dagcombine.
-  setOperationAction(ISD::MULHS           , MVT::i8    , Expand);
-  setOperationAction(ISD::MULHU           , MVT::i8    , Expand);
-  setOperationAction(ISD::SDIV            , MVT::i8    , Expand);
-  setOperationAction(ISD::UDIV            , MVT::i8    , Expand);
-  setOperationAction(ISD::SREM            , MVT::i8    , Expand);
-  setOperationAction(ISD::UREM            , MVT::i8    , Expand);
-  setOperationAction(ISD::MULHS           , MVT::i16   , Expand);
-  setOperationAction(ISD::MULHU           , MVT::i16   , Expand);
-  setOperationAction(ISD::SDIV            , MVT::i16   , Expand);
-  setOperationAction(ISD::UDIV            , MVT::i16   , Expand);
-  setOperationAction(ISD::SREM            , MVT::i16   , Expand);
-  setOperationAction(ISD::UREM            , MVT::i16   , Expand);
-  setOperationAction(ISD::MULHS           , MVT::i32   , Expand);
-  setOperationAction(ISD::MULHU           , MVT::i32   , Expand);
-  setOperationAction(ISD::SDIV            , MVT::i32   , Expand);
-  setOperationAction(ISD::UDIV            , MVT::i32   , Expand);
-  setOperationAction(ISD::SREM            , MVT::i32   , Expand);
-  setOperationAction(ISD::UREM            , MVT::i32   , Expand);
-  setOperationAction(ISD::MULHS           , MVT::i64   , Expand);
-  setOperationAction(ISD::MULHU           , MVT::i64   , Expand);
-  setOperationAction(ISD::SDIV            , MVT::i64   , Expand);
-  setOperationAction(ISD::UDIV            , MVT::i64   , Expand);
-  setOperationAction(ISD::SREM            , MVT::i64   , Expand);
-  setOperationAction(ISD::UREM            , MVT::i64   , Expand);
+  for (unsigned i = 0, e = 4; i != e; ++i) {
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::MULHS, VT, Expand);
+    setOperationAction(ISD::MULHU, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+
+    // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+    setOperationAction(ISD::ADDC, VT, Custom);
+    setOperationAction(ISD::ADDE, VT, Custom);
+    setOperationAction(ISD::SUBC, VT, Custom);
+    setOperationAction(ISD::SUBE, VT, Custom);
+  }
 
   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
@@ -276,21 +410,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
 
-  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
   setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
-  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
   setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
-  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
   setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
   if (Subtarget->is64Bit()) {
-    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
     setOperationAction(ISD::CTTZ           , MVT::i64  , Custom);
     setOperationAction(ISD::CTLZ           , MVT::i64  , Custom);
   }
 
+  if (Subtarget->hasPOPCNT()) {
+    setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
+  } else {
+    setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
+    setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
+    setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
+    if (Subtarget->is64Bit())
+      setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
+  }
+
   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
   setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
 
@@ -298,7 +438,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
   // X86 wants to expand cmov itself.
   setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
-  setOperationAction(ISD::SELECT        , MVT::i16  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
@@ -341,12 +481,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
   }
 
-  if (Subtarget->hasSSE1())
+  if (Subtarget->hasXMM())
     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
 
   // We may not have a libcall for MEMBARRIER so we should lower this.
   setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
-  
+
   // On X86 and X86-64, atomic operations are lowered to locked instructions.
   // Locked instructions, in turn, have implicit fence semantics (all memory
   // operations are flushed before issuing the locked instruction, and they
@@ -355,15 +495,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setShouldFoldAtomicFences(true);
 
   // Expand certain atomics
-  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
-  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
-  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
-  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
-
-  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+  for (unsigned i = 0, e = 4; i != e; ++i) {
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+  }
 
   if (!Subtarget->is64Bit()) {
     setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -415,7 +551,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
   if (Subtarget->is64Bit())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-  if (Subtarget->isTargetCygMing())
+  if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   else
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
@@ -512,13 +648,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
     {
-      bool ignored;
-      APFloat TmpFlt(+0.0);
-      TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
-                     &ignored);
+      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
       addLegalFPImmediate(TmpFlt);  // FLD0
       TmpFlt.changeSign();
       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
+
+      bool ignored;
       APFloat TmpFlt2(+1.0);
       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
                       &ignored);
@@ -564,8 +699,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
     setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
     setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
@@ -613,91 +749,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
-  if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
-    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass, false);
-    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
-    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
-    
-    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
-
-    setOperationAction(ISD::ADD,                MVT::v8i8,  Legal);
-    setOperationAction(ISD::ADD,                MVT::v4i16, Legal);
-    setOperationAction(ISD::ADD,                MVT::v2i32, Legal);
-    setOperationAction(ISD::ADD,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::SUB,                MVT::v8i8,  Legal);
-    setOperationAction(ISD::SUB,                MVT::v4i16, Legal);
-    setOperationAction(ISD::SUB,                MVT::v2i32, Legal);
-    setOperationAction(ISD::SUB,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::MULHS,              MVT::v4i16, Legal);
-    setOperationAction(ISD::MUL,                MVT::v4i16, Legal);
-
-    setOperationAction(ISD::AND,                MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::AND,                MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::AND,                MVT::v4i16, Promote);
-    AddPromotedToType (ISD::AND,                MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::AND,                MVT::v2i32, Promote);
-    AddPromotedToType (ISD::AND,                MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::AND,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::OR,                 MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::OR,                 MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::OR,                 MVT::v4i16, Promote);
-    AddPromotedToType (ISD::OR,                 MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::OR,                 MVT::v2i32, Promote);
-    AddPromotedToType (ISD::OR,                 MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::OR,                 MVT::v1i64, Legal);
-
-    setOperationAction(ISD::XOR,                MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::XOR,                MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::XOR,                MVT::v4i16, Promote);
-    AddPromotedToType (ISD::XOR,                MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::XOR,                MVT::v2i32, Promote);
-    AddPromotedToType (ISD::XOR,                MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::XOR,                MVT::v1i64, Legal);
-
-    setOperationAction(ISD::LOAD,               MVT::v8i8,  Promote);
-    AddPromotedToType (ISD::LOAD,               MVT::v8i8,  MVT::v1i64);
-    setOperationAction(ISD::LOAD,               MVT::v4i16, Promote);
-    AddPromotedToType (ISD::LOAD,               MVT::v4i16, MVT::v1i64);
-    setOperationAction(ISD::LOAD,               MVT::v2i32, Promote);
-    AddPromotedToType (ISD::LOAD,               MVT::v2i32, MVT::v1i64);
-    setOperationAction(ISD::LOAD,               MVT::v1i64, Legal);
-
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i8,  Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i16, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i32, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v1i64, Custom);
-
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i8,  Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i16, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i32, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v1i64, Custom);
-
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Custom);
-
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i16, Custom);
-
-    setOperationAction(ISD::SELECT,             MVT::v8i8, Promote);
-    setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
-    setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
-    setOperationAction(ISD::SELECT,             MVT::v1i64, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v8i8, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v4i16, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v2i32, Custom);
-
-    if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v8i8,  Custom);
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v4i16, Custom);
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v2i32, Custom);
-      setOperationAction(ISD::BIT_CONVERT,        MVT::v1i64, Custom);
-    }
-  }
-
-  if (!UseSoftFloat && Subtarget->hasSSE1()) {
+  if (!UseSoftFloat && Subtarget->hasMMX()) {
+    addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+    // No operations on x86mmx supported, everything uses intrinsics.
+  }
+
+  // MMX-sized vectors (other than x86mmx) are expected to be expanded
+  // into smaller operations.
+  setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
+  setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
+  setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::AND,                MVT::v4i16, Expand);
+  setOperationAction(ISD::AND,                MVT::v2i32, Expand);
+  setOperationAction(ISD::AND,                MVT::v1i64, Expand);
+  setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
+  setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
+  setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
+  setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
+  setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
+  setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
+  setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
+  setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
+  setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
+
+  if (!UseSoftFloat && Subtarget->hasXMM()) {
     addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
 
     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
@@ -714,7 +803,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VSETCC,             MVT::v4f32, Custom);
   }
 
-  if (!UseSoftFloat && Subtarget->hasSSE2()) {
+  if (!UseSoftFloat && Subtarget->hasXMMInt()) {
     addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
 
     // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -795,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       // Do not attempt to promote non-128-bit vectors
       if (!VT.is128BitVector())
         continue;
-      
+
       setOperationAction(ISD::AND,    SVT, Promote);
       AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
       setOperationAction(ISD::OR,     SVT, Promote);
@@ -818,10 +907,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
-    if (!DisableMMX && Subtarget->hasMMX()) {
-      setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
-      setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
-    }
   }
 
   if (Subtarget->hasSSE41()) {
@@ -863,9 +948,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     }
   }
 
-  if (Subtarget->hasSSE42()) {
+  if (Subtarget->hasSSE42())
     setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
-  }
 
   if (!UseSoftFloat && Subtarget->hasAVX()) {
     addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
@@ -878,27 +962,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
+
     setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
     setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
     setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v8f32, Custom);
-    //setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8f32, Custom);
-    //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
-    //setOperationAction(ISD::SELECT,             MVT::v8f32, Custom);
-    //setOperationAction(ISD::VSETCC,             MVT::v8f32, Custom);
-
-    // Operations to consider commented out -v16i16 v32i8
-    //setOperationAction(ISD::ADD,                MVT::v16i16, Legal);
-    setOperationAction(ISD::ADD,                MVT::v8i32, Custom);
-    setOperationAction(ISD::ADD,                MVT::v4i64, Custom);
-    //setOperationAction(ISD::SUB,                MVT::v32i8, Legal);
-    //setOperationAction(ISD::SUB,                MVT::v16i16, Legal);
-    setOperationAction(ISD::SUB,                MVT::v8i32, Custom);
-    setOperationAction(ISD::SUB,                MVT::v4i64, Custom);
-    //setOperationAction(ISD::MUL,                MVT::v16i16, Legal);
+
     setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
     setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
     setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
@@ -906,85 +977,66 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
 
-    setOperationAction(ISD::VSETCC,             MVT::v4f64, Custom);
-    // setOperationAction(ISD::VSETCC,             MVT::v32i8, Custom);
-    // setOperationAction(ISD::VSETCC,             MVT::v16i16, Custom);
-    setOperationAction(ISD::VSETCC,             MVT::v8i32, Custom);
-
-    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i8, Custom);
-    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i16, Custom);
-    // setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i16, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i32, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8f32, Custom);
-
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f64, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i64, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f64, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i64, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f64, Custom);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
-
-#if 0
-    // Not sure we want to do this since there are no 256-bit integer
-    // operations in AVX
-
-    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
-    // This includes 256-bit vectors
-    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
-      EVT VT = (MVT::SimpleValueType)i;
-
-      // Do not attempt to custom lower non-power-of-2 vectors
-      if (!isPowerOf2_32(VT.getVectorNumElements()))
+    // Custom lower build_vector, vector_shuffle, scalar_to_vector,
+    // insert_vector_elt extract_subvector and extract_vector_elt for
+    // 256-bit types.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+         ++i) {
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+      // Do not attempt to custom lower non-256-bit vectors
+      if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
+          || (MVT(VT).getSizeInBits() < 256))
         continue;
-
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-    }
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
+    }
+    // Custom-lower insert_subvector and extract_subvector based on
+    // the result type.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+         ++i) {
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+      // Do not attempt to custom lower non-256-bit vectors
+      if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+        continue;
 
-    if (Subtarget->is64Bit()) {
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i64, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+      if (MVT(VT).getSizeInBits() == 128) {
+        setOperationAction(ISD::EXTRACT_SUBVECTOR,  VT, Custom);
+      }
+      else if (MVT(VT).getSizeInBits() == 256) {
+        setOperationAction(ISD::INSERT_SUBVECTOR,  VT, Custom);
+      }
     }
-#endif
-
-#if 0
-    // Not sure we want to do this since there are no 256-bit integer
-    // operations in AVX
 
-    // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
-    // Including 256-bit vectors
-    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
-      EVT VT = (MVT::SimpleValueType)i;
+    // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+    // Don't promote loads because we need them for VPERM vector index versions.
 
-      if (!VT.is256BitVector()) {
+    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+         VT++) {
+      if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
+          || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
         continue;
-      }
-      setOperationAction(ISD::AND,    VT, Promote);
-      AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
-      setOperationAction(ISD::OR,     VT, Promote);
-      AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
-      setOperationAction(ISD::XOR,    VT, Promote);
-      AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
-      setOperationAction(ISD::LOAD,   VT, Promote);
-      AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
-      setOperationAction(ISD::SELECT, VT, Promote);
-      AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v4i64);
+      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v4i64);
+      //setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
+      //AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
     }
-
-    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-#endif
   }
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
-  // Add/Sub/Mul with overflow operations are custom lowered.
-  setOperationAction(ISD::SADDO, MVT::i32, Custom);
-  setOperationAction(ISD::UADDO, MVT::i32, Custom);
-  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
-  setOperationAction(ISD::USUBO, MVT::i32, Custom);
-  setOperationAction(ISD::SMULO, MVT::i32, Custom);
 
   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
   // handle type legalization for these operations here.
@@ -992,14 +1044,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // FIXME: We really should do custom legalization for addition and
   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
   // than generic legalization for 64-bit multiplication-with-overflow, though.
-  if (Subtarget->is64Bit()) {
-    setOperationAction(ISD::SADDO, MVT::i64, Custom);
-    setOperationAction(ISD::UADDO, MVT::i64, Custom);
-    setOperationAction(ISD::SSUBO, MVT::i64, Custom);
-    setOperationAction(ISD::USUBO, MVT::i64, Custom);
-    setOperationAction(ISD::SMULO, MVT::i64, Custom);
+  for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+    // Add/Sub/Mul with overflow operations are custom lowered.
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::SADDO, VT, Custom);
+    setOperationAction(ISD::UADDO, VT, Custom);
+    setOperationAction(ISD::SSUBO, VT, Custom);
+    setOperationAction(ISD::USUBO, VT, Custom);
+    setOperationAction(ISD::SMULO, VT, Custom);
+    setOperationAction(ISD::UMULO, VT, Custom);
   }
 
+  // There are no 8-bit 3-address imul/mul instructions
+  setOperationAction(ISD::SMULO, MVT::i8, Expand);
+  setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
   if (!Subtarget->is64Bit()) {
     // These libcalls are not available in 32-bit.
     setLibcallName(RTLIB::SHL_I128, 0);
@@ -1016,6 +1075,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::OR);
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   if (Subtarget->is64Bit())
@@ -1023,11 +1085,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // FIXME: These should be based on subtarget info. Plus, the values should
-  // be smaller when we are in optimizing for size mode.
+  // On Darwin, -Os means optimize for size without hurting performance,
+  // do not reduce the limit.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
   maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
-  maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+  maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+  maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
   setPrefLoopAlignment(16);
   benefitFromCodePlacementOpt = true;
 }
@@ -1078,7 +1143,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
   }
 
   unsigned Align = 4;
-  if (Subtarget->hasSSE1())
+  if (Subtarget->hasXMM())
     getMaxByValAlign(Ty, Align);
   return Align;
 }
@@ -1119,7 +1184,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
     } else if (!MemcpyStrSrc && Size >= 8 &&
                !Subtarget->is64Bit() &&
                Subtarget->getStackAlignment() >= 8 &&
-               Subtarget->hasSSE2()) {
+               Subtarget->hasXMMInt()) {
       // Do not use f64 to lower memcpy if source is string constant. It's
       // better to use i32 to avoid the loads.
       return MVT::f64;
@@ -1139,21 +1204,11 @@ unsigned X86TargetLowering::getJumpTableEncoding() const {
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
       Subtarget->isPICStyleGOT())
     return MachineJumpTableInfo::EK_Custom32;
-  
+
   // Otherwise, use the normal jump table encoding heuristics.
   return TargetLowering::getJumpTableEncoding();
 }
 
-/// getPICBaseSymbol - Return the X86-32 PIC base.
-MCSymbol *
-X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
-                                    MCContext &Ctx) const {
-  const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
-  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
-                               Twine(MF->getFunctionNumber())+"$pb");
-}
-
-
 const MCExpr *
 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                              const MachineBasicBlock *MBB,
@@ -1188,7 +1243,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
 
   // Otherwise, the reference is relative to the PIC base.
-  return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx);
+  return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
 }
 
 /// getFunctionAlignment - Return the Log2 alignment of this function.
@@ -1196,6 +1251,7 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
   return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
 }
 
+// FIXME: Why this routine is here? Move to RegInfo!
 std::pair<const TargetRegisterClass*, uint8_t>
 X86TargetLowering::findRepresentativeClass(EVT VT) const{
   const TargetRegisterClass *RRC = 0;
@@ -1207,8 +1263,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
     RRC = (Subtarget->is64Bit()
            ? X86::GR64RegisterClass : X86::GR32RegisterClass);
     break;
-  case MVT::v8i8: case MVT::v4i16:
-  case MVT::v2i32: case MVT::v1i64: 
+  case MVT::x86mmx:
     RRC = X86::VR64RegisterClass;
     break;
   case MVT::f32: case MVT::f64:
@@ -1222,10 +1277,13 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
   return std::make_pair(RRC, Cost);
 }
 
+// FIXME: Why this routine is here? Move to RegInfo!
 unsigned
 X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
                                        MachineFunction &MF) const {
-  unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
   switch (RC->getID()) {
   default:
     return 0;
@@ -1267,7 +1325,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
 
 #include "X86GenCallingConv.inc"
 
-bool 
+bool
 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                         const SmallVectorImpl<ISD::OutputArg> &Outs,
                         LLVMContext &Context) const {
@@ -1312,16 +1370,18 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     SDValue ValToCopy = OutVals[i];
     EVT ValVT = ValToCopy.getValueType();
 
-    // If this is x86-64, and we disabled SSE, we can't return FP values
-    if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
-        (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+    // If this is x86-64, and we disabled SSE, we can't return FP values,
+    // or SSE or MMX vectors.
+    if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+         VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+          (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
     // llvm-gcc has never done it right and no one has noticed, so this
     // should be OK for now.
     if (ValVT == MVT::f64 &&
-        (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+        (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
       report_fatal_error("SSE2 register return with SSE2 disabled");
 
     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -1340,20 +1400,19 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
     // which is returned in RAX / RDX.
     if (Subtarget->is64Bit()) {
-      if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
-        ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+      if (ValVT == MVT::x86mmx) {
         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+          ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
                                   ValToCopy);
-          
           // If we don't have SSE2 available, convert to v4f32 so the generated
           // register is legal.
           if (!Subtarget->hasSSE2())
-            ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+            ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
         }
       }
     }
-    
+
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
     Flag = Chain.getValue(1);
   }
@@ -1367,7 +1426,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     MachineFunction &MF = DAG.getMachineFunction();
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
-    assert(Reg && 
+    assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
@@ -1388,6 +1447,28 @@ X86TargetLowering::LowerReturn(SDValue Chain,
                      MVT::Other, &RetOps[0], RetOps.size());
 }
 
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  SDNode *Copy = *N->use_begin();
+  if (Copy->getOpcode() != ISD::CopyToReg &&
+      Copy->getOpcode() != ISD::FP_EXTEND)
+    return false;
+
+  bool HasRet = false;
+  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+       UI != UE; ++UI) {
+    if (UI->getOpcode() != X86ISD::RET_FLAG)
+      return false;
+    HasRet = true;
+  }
+
+  return HasRet;
+}
+
 /// LowerCallResult - Lower the result values of a call into the
 /// appropriate copies out of appropriate physical registers.
 ///
@@ -1412,7 +1493,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
-        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
 
@@ -1433,7 +1514,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
       if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
       if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
       SDValue Ops[] = { Chain, InFlag };
-      Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+      Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
                                          Ops, 2), 1);
       Val = Chain.getValue(0);
 
@@ -1456,7 +1537,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    MVT::i64, InFlag).getValue(1);
         Val = Chain.getValue(0);
       }
-      Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
+      Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
     } else {
       Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                  CopyVT, InFlag).getValue(1);
@@ -1499,30 +1580,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
   return Ins[0].Flags.isSRet();
 }
 
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
-  if (Subtarget->is64Bit()) {
-    if (CC == CallingConv::GHC)
-      return CC_X86_64_GHC;
-    else if (Subtarget->isTargetWin64())
-      return CC_X86_Win64_C;
-    else
-      return CC_X86_64_C;
-  }
-
-  if (CC == CallingConv::X86_FastCall)
-    return CC_X86_32_FastCall;
-  else if (CC == CallingConv::X86_ThisCall)
-    return CC_X86_32_ThisCall;
-  else if (CC == CallingConv::Fast)
-    return CC_X86_32_FastCC;
-  else if (CC == CallingConv::GHC)
-    return CC_X86_32_GHC;
-  else
-    return CC_X86_32_C;
-}
-
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 /// by "Src" to address "Dst" with size and alignment information specified by
 /// the specific parameter attribute. The copy will be passed as a byval
@@ -1531,10 +1588,11 @@ static SDValue
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                           DebugLoc dl) {
-  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile*/false, /*AlwaysInline=*/true,
-                       NULL, 0, NULL, 0);
+                       MachinePointerInfo(), MachinePointerInfo());
 }
 
 /// IsTailCallConvention - Return true if the calling convention is one that
@@ -1583,7 +1641,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     VA.getLocMemOffset(), isImmutable);
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     return DAG.getLoad(ValVT, dl, Chain, FIN,
-                       PseudoSourceValue::getFixedStack(FI), 0,
+                       MachinePointerInfo::getFixedStack(FI),
                        false, false, 0);
   }
 }
@@ -1617,7 +1675,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  ArgLocs, *DAG.getContext());
-  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
 
   unsigned LastVal = ~0U;
   SDValue ArgValue;
@@ -1644,12 +1708,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         RC = X86::VR256RegisterClass;
       else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
         RC = X86::VR128RegisterClass;
-      else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+      else if (RegVT == MVT::x86mmx)
         RC = X86::VR64RegisterClass;
       else
         llvm_unreachable("Unknown argument type!");
 
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it is really passed promoted to 32
@@ -1662,14 +1726,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
                                DAG.getValueType(VA.getValVT()));
       else if (VA.getLocInfo() == CCValAssign::BCvt)
-        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
 
       if (VA.isExtInLoc()) {
         // Handle MMX values passed in XMM regs.
         if (RegVT.isVector()) {
-          ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
-                                 ArgValue, DAG.getConstant(0, MVT::i64));
-          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+                                 ArgValue);
         } else
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
@@ -1680,8 +1743,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     // If value is passed via pointer - do a load.
     if (VA.getLocInfo() == CCValAssign::Indirect)
-      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
-                             false, false, 0);
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+                             MachinePointerInfo(), false, false, 0);
 
     InVals.push_back(ArgValue);
   }
@@ -1708,8 +1771,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
-                    CallConv != CallingConv::X86_ThisCall)) {
+    if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+                    CallConv != CallingConv::X86_ThisCall))) {
       FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
     }
     if (Is64Bit) {
@@ -1719,9 +1782,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       static const unsigned GPR64ArgRegsWin64[] = {
         X86::RCX, X86::RDX, X86::R8,  X86::R9
       };
-      static const unsigned XMMArgRegsWin64[] = {
-        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
-      };
       static const unsigned GPR64ArgRegs64Bit[] = {
         X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
       };
@@ -1729,40 +1789,52 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
         X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
       };
-      const unsigned *GPR64ArgRegs, *XMMArgRegs;
+      const unsigned *GPR64ArgRegs;
+      unsigned NumXMMRegs = 0;
 
       if (IsWin64) {
-        TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+        // The XMM registers which might contain var arg parameters are shadowed
+        // in their paired GPR.  So we only need to save the GPR to their home
+        // slots.
+        TotalNumIntRegs = 4;
         GPR64ArgRegs = GPR64ArgRegsWin64;
-        XMMArgRegs = XMMArgRegsWin64;
       } else {
         TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
         GPR64ArgRegs = GPR64ArgRegs64Bit;
-        XMMArgRegs = XMMArgRegs64Bit;
+
+        NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
       }
       unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                        TotalNumIntRegs);
-      unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
-                                                       TotalNumXMMRegs);
 
       bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
-      assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+      assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
              "SSE register cannot be used when SSE is disabled!");
       assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
              "SSE register cannot be used when SSE is disabled!");
-      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
+      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
         // Kernel mode asks for SSE to be disabled, so don't push them
         // on the stack.
         TotalNumXMMRegs = 0;
 
-      // For X86-64, if there are vararg parameters that are passed via
-      // registers, then we must store them to their spots on the stack so they
-      // may be loaded by deferencing the result of va_next.
-      FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
-      FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
-      FuncInfo->setRegSaveFrameIndex(
-        MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+      if (IsWin64) {
+        const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+        // Get to the caller-allocated home save location.  Add 8 to account
+        // for the return address.
+        int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+        FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+      } else {
+        // For X86-64, if there are vararg parameters that are passed via
+        // registers, then we must store them to their spots on the stack so they
+        // may be loaded by deferencing the result of va_next.
+        FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+        FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
                                false));
+      }
 
       // Store the integer parameter registers.
       SmallVector<SDValue, 8> MemOps;
@@ -1773,13 +1845,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                                   DAG.getIntPtrConstant(Offset));
         unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
-                                     X86::GR64RegisterClass);
+                                     X86::GR64RegisterClass, dl);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                       PseudoSourceValue::getFixedStack(
-                         FuncInfo->getRegSaveFrameIndex()),
-                       Offset, false, false, 0);
+                       MachinePointerInfo::getFixedStack(
+                         FuncInfo->getRegSaveFrameIndex(), Offset),
+                       false, false, 0);
         MemOps.push_back(Store);
         Offset += 8;
       }
@@ -1789,7 +1861,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SmallVector<SDValue, 11> SaveXMMOps;
         SaveXMMOps.push_back(Chain);
 
-        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl);
         SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
         SaveXMMOps.push_back(ALVal);
 
@@ -1799,8 +1871,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                FuncInfo->getVarArgsFPOffset()));
 
         for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
-          unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
-                                       X86::VR128RegisterClass);
+          unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+                                       X86::VR128RegisterClass, dl);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
           SaveXMMOps.push_back(Val);
         }
@@ -1843,15 +1915,14 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     DebugLoc dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
                                     ISD::ArgFlagsTy Flags) const {
-  const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
-  unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
+  unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
-  if (Flags.isByVal()) {
+  if (Flags.isByVal())
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-  }
+
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset,
+                      MachinePointerInfo::getStack(LocMemOffset),
                       false, false, 0);
 }
 
@@ -1867,7 +1938,8 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
   OutRetAddr = getReturnAddressFrameIndex(DAG);
 
   // Load the "old" Return address.
-  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
+  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+                           false, false, 0);
   return SDValue(OutRetAddr.getNode(), 1);
 }
 
@@ -1886,7 +1958,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
-                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+                       MachinePointerInfo::getFixedStack(NewReturnAddrFI),
                        false, false, 0);
   return Chain;
 }
@@ -1902,6 +1974,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
+  bool IsWin64        = Subtarget->isTargetWin64();
   bool IsStructRet    = CallIsStructReturn(Outs);
   bool IsSibcall      = false;
 
@@ -1927,7 +2000,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  ArgLocs, *DAG.getContext());
-  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_X86);
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1986,21 +2065,21 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     case CCValAssign::AExt:
       if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
         // Special case: passing MMX values in XMM registers.
-        Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
         Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
       } else
         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
       break;
     case CCValAssign::BCvt:
-      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+      Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
       break;
     case CCValAssign::Indirect: {
       // Store the argument.
       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
-                           PseudoSourceValue::getFixedStack(FI), 0,
+                           MachinePointerInfo::getFixedStack(FI),
                            false, false, 0);
       Arg = SpillSlot;
       break;
@@ -2009,7 +2088,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-      if (isVarArg && Subtarget->isTargetWin64()) {
+      if (isVarArg && IsWin64) {
         // Win64 ABI requires argument XMM reg to be copied to the corresponding
         // shadow reg if callee is a varargs function.
         unsigned ShadowReg = 0;
@@ -2075,7 +2154,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     }
   }
 
-  if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
+  if (Is64Bit && isVarArg && !IsWin64) {
     // From AMD64 ABI document:
     // For calls that may call functions that use varargs or stdargs
     // (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -2090,7 +2169,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
-    assert((Subtarget->hasSSE1() || !NumXMMRegs)
+    assert((Subtarget->hasXMM() || !NumXMMRegs)
            && "SSE registers cannot be used when SSE is disabled");
 
     Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@@ -2143,7 +2222,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
           // Store relative to framepointer.
           MemOpChains2.push_back(
             DAG.getStore(ArgChain, dl, Arg, FIN,
-                         PseudoSourceValue::getFixedStack(FI), 0,
+                         MachinePointerInfo::getFixedStack(FI),
                          false, false, 0));
         }
       }
@@ -2192,8 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
         OpFlags = X86II::MO_PLT;
       } else if (Subtarget->isPICStyleStubAny() &&
-               (GV->isDeclaration() || GV->isWeakForLinker()) &&
-               Subtarget->getDarwinVers() < 9) {
+                 (GV->isDeclaration() || GV->isWeakForLinker()) &&
+                 Subtarget->getDarwinVers() < 9) {
         // PC-relative references to external symbols should go through $stub,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
@@ -2206,13 +2285,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
 
-    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
-    // symbols should go through the PLT.
+    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+    // external symbols should go through the PLT.
     if (Subtarget->isTargetELF() &&
         getTargetMachine().getRelocationModel() == Reloc::PIC_) {
       OpFlags = X86II::MO_PLT;
     } else if (Subtarget->isPICStyleStubAny() &&
-             Subtarget->getDarwinVers() < 9) {
+               Subtarget->getDarwinVers() < 9) {
       // PC-relative references to external symbols should go through $stub,
       // unless we're building with the leopard linker or later, which
       // automatically synthesizes these stubs.
@@ -2224,7 +2303,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   }
 
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
 
   if (!IsSibcall && isTailCall) {
@@ -2250,7 +2329,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
 
   // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
-  if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
+  if (Is64Bit && isVarArg && !IsWin64)
     Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
 
   if (InFlag.getNode())
@@ -2337,7 +2416,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
                                                SelectionDAG& DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetMachine &TM = MF.getTarget();
-  const TargetFrameInfo &TFI = *TM.getFrameInfo();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
@@ -2364,7 +2443,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   int FI = INT_MAX;
   if (Arg.getOpcode() == ISD::CopyFromReg) {
     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
-    if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+    if (!TargetRegisterInfo::isVirtualRegister(VR))
       return false;
     MachineInstr *Def = MRI->getVRegDef(VR);
     if (!Def)
@@ -2510,14 +2589,17 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     SmallVector<CCValAssign, 16> ArgLocs;
     CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
                    ArgLocs, *DAG.getContext());
-    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+    // Allocate shadow area for Win64
+    if (Subtarget->isTargetWin64()) {
+      CCInfo.AllocateStack(32, 8);
+    }
+
+    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
     if (CCInfo.getNextStackOffset()) {
       MachineFunction &MF = DAG.getMachineFunction();
       if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
         return false;
-      if (Subtarget->isTargetWin64())
-        // Win64 ABI has additional complications.
-        return false;
 
       // Check if the arguments are already laid out in the right way as
       // the caller's fixed stack objects.
@@ -2564,6 +2646,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
+  // An stdcall caller is expected to clean up its arguments; the callee
+  // isn't going to do that.
+  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+    return false;
+
   return true;
 }
 
@@ -2592,6 +2679,7 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
   case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
   case X86ISD::SHUFPS:
   case X86ISD::MOVLHPS:
   case X86ISD::MOVLHPD:
@@ -2600,6 +2688,7 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::MOVLPD:
   case X86ISD::MOVSHDUP:
   case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
   case X86ISD::UNPCKLPS:
@@ -2625,6 +2714,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::MOVSHDUP:
   case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
     return DAG.getNode(Opc, dl, VT, V1);
   }
 
@@ -2648,6 +2738,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
                SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PALIGN:
   case X86ISD::SHUFPD:
   case X86ISD::SHUFPS:
     return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -2770,8 +2861,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
   // First determine if it is required or is profitable to flip the operands.
 
   // If LHS is a foldable load, but RHS is not, flip the condition.
-  if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
-      !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+  if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+      !ISD::isNON_EXTLoad(RHS.getNode())) {
     SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
     std::swap(LHS, RHS);
   }
@@ -2865,7 +2956,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
 /// the second operand.
 static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+  if (VT == MVT::v4f32 || VT == MVT::v4i32 )
     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
   if (VT == MVT::v2f64 || VT == MVT::v2i64)
     return (Mask[0] < 2 && Mask[1] < 2);
@@ -2933,15 +3024,15 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
 static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
                           bool hasSSSE3) {
   int i, e = VT.getVectorNumElements();
-  
+
   // Do not handle v2i64 / v2f64 shuffles with palignr.
   if (e < 4 || !hasSSSE3)
     return false;
-  
+
   for (i = 0; i != e; ++i)
     if (Mask[i] >= 0)
       break;
-  
+
   // All undef, not a palignr.
   if (i == e)
     return false;
@@ -2952,13 +3043,13 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
   bool NeedsUnary = false;
 
   int s = Mask[i] - i;
-  
+
   // Check the rest of the elements to see if they are consecutive.
   for (++i; i != e; ++i) {
     int m = Mask[i];
-    if (m < 0) 
+    if (m < 0)
       continue;
-    
+
     Unary = Unary && (m < (int)e);
     NeedsUnary = NeedsUnary || (m < s);
 
@@ -3046,10 +3137,10 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
 /// <2, 3, 2, 3>
 bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
   unsigned NumElems = N->getValueType(0).getVectorNumElements();
-  
+
   if (NumElems != 4)
     return false;
-  
+
   return isUndefOrEqual(N->getMaskElt(0), 2) &&
   isUndefOrEqual(N->getMaskElt(1), 3) &&
   isUndefOrEqual(N->getMaskElt(2), 2) &&
@@ -3320,6 +3411,44 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
   return true;
 }
 
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+    return false;
+
+  // The index should be aligned on a 128-bit boundary.
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+  unsigned VL = N->getValueType(0).getVectorNumElements();
+  unsigned VBits = N->getValueType(0).getSizeInBits();
+  unsigned ElSize = VBits / VL;
+  bool Result = (Index * ElSize) % 128 == 0;
+
+  return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+    return false;
+
+  // The index should be aligned on a 128-bit boundary.
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+  unsigned VL = N->getValueType(0).getVectorNumElements();
+  unsigned VBits = N->getValueType(0).getSizeInBits();
+  unsigned ElSize = VBits / VL;
+  bool Result = (Index * ElSize) % 128 == 0;
+
+  return Result;
+}
+
 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
 unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
@@ -3388,6 +3517,42 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
   return (Val - i) * EltSize;
 }
 
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+    llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+  EVT VecVT = N->getOperand(0).getValueType();
+  EVT ElVT = VecVT.getVectorElementType();
+
+  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+  return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+    llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+  EVT VecVT = N->getValueType(0);
+  EVT ElVT = VecVT.getVectorElementType();
+
+  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+  return Index / NumElemsPerChunk;
+}
+
 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
 /// constant +0.0.
 bool X86::isZeroNode(SDValue Elt) {
@@ -3537,13 +3702,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
                              DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
-  // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+  // Always build SSE zero vectors as <4 x i32> bitcasted
   // to their dest type. This ensures they get CSE'd.
   SDValue Vec;
-  if (VT.getSizeInBits() == 64) { // MMX
-    SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  } else if (VT.getSizeInBits() == 128) {
+  if (VT.getSizeInBits() == 128) {  // SSE
     if (HasSSE2) {  // SSE2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -3559,7 +3721,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
     SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
   }
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
 /// getOnesVector - Returns a vector of specified type with all bits set.
@@ -3571,11 +3733,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   // type.  This ensures they get CSE'd.
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
   SDValue Vec;
-  if (VT.getSizeInBits() == 64) // MMX
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
-  else // SSE
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
 
@@ -3640,9 +3799,6 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
 
 /// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
 static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
-  if (SV->getValueType(0).getVectorNumElements() <= 4)
-    return SDValue(SV, 0);
-
   EVT PVT = MVT::v4f32;
   EVT VT = SV->getValueType(0);
   DebugLoc dl = SV->getDebugLoc();
@@ -3663,9 +3819,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
 
   // Perform the splat.
   int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
-  V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+  V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
   V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+  return DAG.getNode(ISD::BITCAST, dl, VT, V1);
 }
 
 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -3789,7 +3945,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
   }
 
   // Actual nodes that may contain scalar elements
-  if (Opcode == ISD::BIT_CONVERT) {
+  if (Opcode == ISD::BITCAST) {
     V = V.getOperand(0);
     EVT SrcVT = V.getValueType();
     unsigned NumElems = VT.getVectorNumElements();
@@ -3978,7 +4134,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
     }
   }
 
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
 }
 
 /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
@@ -4017,11 +4173,10 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
                          unsigned NumBits, SelectionDAG &DAG,
                          const TargetLowering &TLI, DebugLoc dl) {
-  bool isMMX = VT.getSizeInBits() == 64;
-  EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+  EVT ShVT = MVT::v2i64;
   unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
-  SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+  SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+  return DAG.getNode(ISD::BITCAST, dl, VT,
                      DAG.getNode(Opc, dl, ShVT, SrcOp,
                              DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
 }
@@ -4029,7 +4184,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
 SDValue
 X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
                                           SelectionDAG &DAG) const {
-  
+
   // Check if the scalar load can be widened into a vector load. And if
   // the address is "base + cst" see if the cst can be "absorbed" into
   // the shuffle mask.
@@ -4046,8 +4201,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
     if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
       FI = FINode->getIndex();
       Offset = 0;
-    } else if (Ptr.getOpcode() == ISD::ADD &&
-               isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+    } else if (DAG.isBaseWithConstantOffset(Ptr) &&
                isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
       Offset = Ptr.getConstantOperandVal(1);
@@ -4084,41 +4238,42 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
     int EltNo = (Offset - StartOffset) >> 2;
     int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
     EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
-    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+                             LD->getPointerInfo().getWithOffset(StartOffset),
                              false, false, 0);
     // Canonicalize it to a v4i32 shuffle.
-    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+    return DAG.getNode(ISD::BITCAST, dl, VT,
                        DAG.getVectorShuffle(MVT::v4i32, dl, V1,
-                                            DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+                                            DAG.getUNDEF(MVT::v4i32),&Mask[0]));
   }
 
   return SDValue();
 }
 
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a 
-/// vector of type 'VT', see if the elements can be replaced by a single large 
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
 /// load which has the same value as a build_vector whose operands are 'elts'.
 ///
 /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
-/// 
+///
 /// FIXME: we'd also like to handle the case where the last elements are zero
 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
 /// There's even a handy isZeroNode for that purpose.
 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
-                                        DebugLoc &dl, SelectionDAG &DAG) {
+                                        DebugLoc &DL, SelectionDAG &DAG) {
   EVT EltVT = VT.getVectorElementType();
   unsigned NumElems = Elts.size();
-  
+
   LoadSDNode *LDBase = NULL;
   unsigned LastLoadedElt = -1U;
-  
+
   // For each element in the initializer, see if we've found a load or an undef.
-  // If we don't find an initial load element, or later load elements are 
+  // If we don't find an initial load element, or later load elements are
   // non-consecutive, bail out.
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue Elt = Elts[i];
-    
+
     if (!Elt.getNode() ||
         (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
       return SDValue();
@@ -4143,18 +4298,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
   // consecutive loads for the low half, generate a vzext_load node.
   if (LastLoadedElt == NumElems - 1) {
     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
-      return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
-                         LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+      return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                         LDBase->getPointerInfo(),
                          LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
-    return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
-                       LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+    return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                       LDBase->getPointerInfo(),
                        LDBase->isVolatile(), LDBase->isNonTemporal(),
                        LDBase->getAlignment());
   } else if (NumElems == 4 && LastLoadedElt == 1) {
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
-    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+    SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+                                              Ops, 2, MVT::i32,
+                                              LDBase->getMemOperand());
+    return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
   }
   return SDValue();
 }
@@ -4162,6 +4319,35 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
+
+  EVT VT = Op.getValueType();
+  EVT ExtVT = VT.getVectorElementType();
+
+  unsigned NumElems = Op.getNumOperands();
+
+  // For AVX-length vectors, build the individual 128-bit pieces and
+  // use shuffles to put them in place.
+  if (VT.getSizeInBits() > 256 && 
+      Subtarget->hasAVX() && 
+      !Disable256Bit &&
+      !ISD::isBuildVectorAllZeros(Op.getNode())) {
+    SmallVector<SDValue, 8> V;
+    V.resize(NumElems);
+    for (unsigned i = 0; i < NumElems; ++i) {
+      V[i] = Op.getOperand(i);
+    }
+ 
+    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+    // Build the lower subvector.
+    SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+    // Build the upper subvector.
+    SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+                                NumElems/2);
+
+    return ConcatVectors(Lower, Upper, DAG);
+  }
+
   // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
   // All one's are handled with pcmpeqd. In AVX, zero's are handled with
   // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
@@ -4169,10 +4355,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
       (Op.getValueType().getSizeInBits() != 256 &&
        ISD::isBuildVectorAllOnes(Op.getNode()))) {
-    // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+    // Canonicalize this to <4 x i32> (SSE) to
     // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
     // eliminated on x86-32 hosts.
-    if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+    if (Op.getValueType() == MVT::v4i32)
       return Op;
 
     if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -4180,11 +4366,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
   }
 
-  EVT VT = Op.getValueType();
-  EVT ExtVT = VT.getVectorElementType();
   unsigned EVTBits = ExtVT.getSizeInBits();
 
-  unsigned NumElems = Op.getNumOperands();
   unsigned NumZero  = 0;
   unsigned NumNonZero = 0;
   unsigned NonZeros = 0;
@@ -4223,9 +4406,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
         (!IsAllConstants || Idx == 0)) {
       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
-        // Handle MMX and SSE both.
-        EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
-        unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+        // Handle SSE only.
+        assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+        EVT VecVT = MVT::v4i32;
+        unsigned VecElts = 4;
 
         // Truncate the value (which may itself be a constant) to i32, and
         // convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -4245,7 +4429,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
                                       DAG.getUNDEF(Item.getValueType()),
                                       &Mask[0]);
         }
-        return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
+        return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
       }
     }
 
@@ -4264,11 +4448,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
                                            DAG);
       } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+        EVT MiddleVT = MVT::v4i32;
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
         Item = getShuffleVectorZeroOrUndef(Item, 0, true,
                                            Subtarget->hasSSE2(), DAG);
-        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Item);
       }
     }
 
@@ -4394,20 +4579,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     // Check for a build vector of consecutive loads.
     for (unsigned i = 0; i < NumElems; ++i)
       V[i] = Op.getOperand(i);
-    
+
     // Check for elements which are consecutive loads.
     SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
     if (LD.getNode())
       return LD;
-    
-    // For SSE 4.1, use insertps to put the high elements into the low element. 
+
+    // For SSE 4.1, use insertps to put the high elements into the low element.
     if (getSubtarget()->hasSSE41()) {
       SDValue Result;
       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
       else
         Result = DAG.getUNDEF(VT);
-      
+
       for (unsigned i = 1; i < NumElems; ++i) {
         if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
         Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
@@ -4415,7 +4600,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       }
       return Result;
     }
-    
+
     // Otherwise, expand into a number of unpckl*, start by extending each of
     // our (non-undef) elements to the full vector width with the element in the
     // bottom slot of the vector (which generates no code for SSE).
@@ -4441,7 +4626,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
             EltStride == NumElems/2)
           continue;
-        
+
         V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
       }
       EltStride >>= 1;
@@ -4461,21 +4646,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
          ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
   int Mask[2];
-  SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
+  SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
   SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
   InVec = Op.getOperand(1);
   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
     unsigned NumElts = ResVT.getVectorNumElements();
-    VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+    VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
     VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
                        InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
   } else {
-    InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
+    InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
     SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
     Mask[0] = 0; Mask[1] = 2;
     VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
   }
-  return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+  return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
 }
 
 // v8i16 shuffles - Prefer shuffles in the following order:
@@ -4557,9 +4742,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
     MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
     MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
     NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
-                  DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
-                  DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
-    NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
+                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+    NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
 
     // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
     // source words for the shuffle, to aid later transformations.
@@ -4628,12 +4813,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
       pshufbMask.push_back(DAG.getConstant(EltIdx,   MVT::i8));
       pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
     }
-    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
+    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
     V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
                      DAG.getNode(ISD::BUILD_VECTOR, dl,
                                  MVT::v16i8, &pshufbMask[0], 16));
     if (!TwoInputs)
-      return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+      return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
 
     // Calculate the shuffle mask for the second input, shuffle it, and
     // OR it with the first shuffled input.
@@ -4648,12 +4833,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
       pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
       pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
     }
-    V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
+    V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
     V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
                      DAG.getNode(ISD::BUILD_VECTOR, dl,
                                  MVT::v16i8, &pshufbMask[0], 16));
     V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
-    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
   }
 
   // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
@@ -4820,8 +5005,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
   // No SSSE3 - Calculate in place words and then fix all out of place words
   // With 0-16 extracts & inserts.  Worst case is 16 bytes out of order from
   // the 16 different words that comprise the two doublequadword input vectors.
-  V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
-  V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2);
+  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
   SDValue NewV = V2Only ? V2 : V1;
   for (int i = 0; i != 8; ++i) {
     int Elt0 = MaskVals[i*2];
@@ -4883,25 +5068,23 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
     NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
                        DAG.getIntPtrConstant(i));
   }
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
 }
 
 /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
 /// done when every pair / quad of shuffle mask elements point to elements in
 /// the right sequence. e.g.
-/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
 static
 SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG,
-                                 const TargetLowering &TLI, DebugLoc dl) {
+                                 SelectionDAG &DAG, DebugLoc dl) {
   EVT VT = SVOp->getValueType(0);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   unsigned NumElems = VT.getVectorNumElements();
   unsigned NewWidth = (NumElems == 4) ? 2 : 4;
-  EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
-  EVT NewVT = MaskVT;
+  EVT NewVT;
   switch (VT.getSimpleVT().SimpleTy) {
   default: assert(false && "Unexpected!");
   case MVT::v4f32: NewVT = MVT::v2f64; break;
@@ -4910,12 +5093,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
   case MVT::v16i8: NewVT = MVT::v4i32; break;
   }
 
-  if (NewWidth == 2) {
-    if (VT.isInteger())
-      NewVT = MVT::v2i64;
-    else
-      NewVT = MVT::v2f64;
-  }
   int Scale = NumElems / NewWidth;
   SmallVector<int, 8> MaskVec;
   for (unsigned i = 0; i < NumElems; i += Scale) {
@@ -4935,8 +5112,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
       MaskVec.push_back(StartIdx / Scale);
   }
 
-  V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
-  V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
+  V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
   return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
 }
 
@@ -4953,13 +5130,13 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
       // movssrr and movsdrr do not clear top bits. Try to use movd, movq
       // instead.
       MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
-      if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
+      if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
           SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-          SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+          SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
           SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
         // PR2108
         OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
-        return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+        return DAG.getNode(ISD::BITCAST, dl, VT,
                            DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
                                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
                                                    OpVT,
@@ -4969,9 +5146,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
     }
   }
 
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+  return DAG.getNode(ISD::BITCAST, dl, VT,
                      DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
-                                 DAG.getNode(ISD::BIT_CONVERT, dl,
+                                 DAG.getNode(ISD::BITCAST, dl,
                                              OpVT, SrcOp)));
 }
 
@@ -5125,7 +5302,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
 }
 
 static bool MayFoldVectorLoad(SDValue V) {
-  if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
     V = V.getOperand(0);
   if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
     V = V.getOperand(0);
@@ -5134,39 +5311,143 @@ static bool MayFoldVectorLoad(SDValue V) {
   return false;
 }
 
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (ISD::isNormalLoad(V.getNode()))
+    return true;
+  return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
 static
-SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
-                        bool HasSSE2) {
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  EVT VT = Op.getValueType();
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+                                         const TargetLowering &TLI) {
+  EVT VT = V.getValueType();
+  ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
 
-  assert(VT != MVT::v2i64 && "unsupported shuffle type");
+  // Be sure that the vector shuffle is present in a pattern like this:
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+  if (!V.hasOneUse())
+    return false;
 
-  if (HasSSE2 && VT == MVT::v2f64)
-    return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+  SDNode *N = *V.getNode()->use_begin();
+  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return false;
 
-  // v4f32 or v4i32
-  return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
-}
+  SDValue EltNo = N->getOperand(1);
+  if (!isa<ConstantSDNode>(EltNo))
+    return false;
 
-static
-SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  EVT VT = Op.getValueType();
+  // If the bit convert changed the number of elements, it is unsafe
+  // to examine the mask.
+  bool HasShuffleIntoBitcast = false;
+  if (V.getOpcode() == ISD::BITCAST) {
+    EVT SrcVT = V.getOperand(0).getValueType();
+    if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+      return false;
+    V = V.getOperand(0);
+    HasShuffleIntoBitcast = true;
+  }
 
-  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
-         "unsupported shuffle type");
+  // Select the input vector, guarding against out of range extract vector.
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+  V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
 
-  if (V2.getOpcode() == ISD::UNDEF)
-    V2 = V1;
+  // Skip one more bit_convert if necessary
+  if (V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
 
-  // v4i32 or v4f32
-  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
-}
+  if (ISD::isNormalLoad(V.getNode())) {
+    // Is the original load suitable?
+    LoadSDNode *LN0 = cast<LoadSDNode>(V);
 
-static
+    // FIXME: avoid the multi-use bug that is preventing lots of
+    // of foldings to be detected, this is still wrong of course, but
+    // give the temporary desired behavior, and if it happens that
+    // the load has real more uses, during isel it will not fold, and
+    // will generate poor code.
+    if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+      return false;
+
+    if (!HasShuffleIntoBitcast)
+      return true;
+
+    // If there's a bitcast before the shuffle, check if the load type and
+    // alignment is valid.
+    unsigned Align = LN0->getAlignment();
+    unsigned NewAlign =
+      TLI.getTargetData()->getABITypeAlignment(
+                                    VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+      return false;
+  }
+
+  return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // Canonizalize to v2f64.
+  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+                                          V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+                        bool HasSSE2) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+  if (HasSSE2 && VT == MVT::v2f64)
+    return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+  // v4f32 or v4i32
+  return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+         "unsupported shuffle type");
+
+  if (V2.getOpcode() == ISD::UNDEF)
+    V2 = V1;
+
+  // v4i32 or v4f32
+  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
 SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -5191,6 +5472,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
   if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
     CanFoldLoad = true;
 
+  // Both of them can't be memory operations though.
+  if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+    CanFoldLoad = false;
+  
   if (CanFoldLoad) {
     if (HasSSE2 && NumElems == 2)
       return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
@@ -5228,7 +5513,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT) {
   case MVT::v16i8: return X86ISD::PUNPCKLBW;
   case MVT::v8i16: return X86ISD::PUNPCKLWD;
   default:
-    llvm_unreachable("Unknow type for unpckl");
+    llvm_unreachable("Unknown type for unpckl");
   }
   return 0;
 }
@@ -5242,63 +5527,111 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
   case MVT::v16i8: return X86ISD::PUNPCKHBW;
   case MVT::v8i16: return X86ISD::PUNPCKHWD;
   default:
-    llvm_unreachable("Unknow type for unpckh");
+    llvm_unreachable("Unknown type for unpckh");
   }
   return 0;
 }
 
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+                               const TargetLowering &TLI,
+                               const X86Subtarget *Subtarget) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
-  unsigned NumElems = VT.getVectorNumElements();
-  bool isMMX = VT.getSizeInBits() == 64;
-  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
-  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
-  bool V1IsSplat = false;
-  bool V2IsSplat = false;
-  bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
-  bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
-  MachineFunction &MF = DAG.getMachineFunction();
-  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
 
   if (isZeroShuffle(SVOp))
     return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
 
-  // Promote splats to v4f32.
+  // Handle splat operations
   if (SVOp->isSplat()) {
-    if (isMMX || NumElems < 4)
+    // Special case, this is the only place now where it's
+    // allowed to return a vector_shuffle operation without
+    // using a target specific node, because *hopefully* it
+    // will be optimized away by the dag combiner.
+    if (VT.getVectorNumElements() <= 4 &&
+        CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
       return Op;
+
+    // Handle splats by matching through known masks
+    if (VT.getVectorNumElements() <= 4)
+      return SDValue();
+
+    // Canonicalize all of the remaining to v4f32.
     return PromoteSplat(SVOp, DAG);
   }
 
   // If the shuffle can be profitably rewritten as a narrower shuffle, then
   // do it!
   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
-    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
     if (NewOp.getNode())
-      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                         LowerVECTOR_SHUFFLE(NewOp, DAG));
+      return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
   } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
     // FIXME: Figure out a cleaner way to do this.
     // Try to make use of movq to zero out the top part.
     if (ISD::isBuildVectorAllZeros(V2.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
       if (NewOp.getNode()) {
         if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
           return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
                               DAG, Subtarget, dl);
       }
     } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
       if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
         return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
                             DAG, Subtarget, dl);
     }
   }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned NumElems = VT.getVectorNumElements();
+  bool isMMX = VT.getSizeInBits() == 64;
+  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+  bool V1IsSplat = false;
+  bool V2IsSplat = false;
+  bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+  bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+  bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+  // Shuffle operations on MMX not supported.
+  if (isMMX)
+    return Op;
+
+  // Vector shuffle lowering takes 3 steps:
+  //
+  // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+  //    narrowing and commutation of operands should be handled.
+  // 2) Matching of shuffles with known shuffle masks to x86 target specific
+  //    shuffle nodes.
+  // 3) Rewriting of unmatched masks into new generic shuffle operations,
+  //    so the shuffle can be broken into other shuffles and the legalizer can
+  //    try the lowering again.
+  //
+  // The general ideia is that no vector_shuffle operation should be left to
+  // be matched during isel, all of them must be converted to a target specific
+  // node here.
+
+  // Normalize the input vectors. Here splats, zeroed vectors, profitable
+  // narrowing and commutation of operands should be handled. The actual code
+  // doesn't include all of those, work in progress...
+  SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+  if (NewOp.getNode())
+    return NewOp;
 
   // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
   // unpckh_undef). Only use pshufd if speed is more important than size.
@@ -5309,6 +5642,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     if (VT != MVT::v2i64 && VT != MVT::v2f64)
       return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
 
+  if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+      RelaxedMayFoldVectorLoad(V1))
+    return getMOVDDup(Op, dl, V1, DAG);
+
+  if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  // Use to match splats
+  if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+      (VT == MVT::v2f64 || VT == MVT::v2i64))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
   if (X86::isPSHUFDMask(SVOp)) {
     // The actual implementation will match the mask in the if above and then
     // during isel it can match several different instructions, not only pshufd
@@ -5349,7 +5694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
       return V2;
     if (ISD::isBuildVectorAllZeros(V1.getNode()))
       return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
-    if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+    if (!X86::isMOVLPMask(SVOp)) {
       if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
         return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
 
@@ -5359,22 +5704,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // FIXME: fold these into legal mask.
-  if (!isMMX) {
-    if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
-      return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+  if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+    return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
 
-    if (X86::isMOVHLPSMask(SVOp))
-      return getMOVHighToLow(Op, dl, DAG);
+  if (X86::isMOVHLPSMask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
 
-    if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
-      return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+  if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
 
-    if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
-      return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+  if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
 
-    if (X86::isMOVLPMask(SVOp))
-      return getMOVLP(Op, dl, DAG, HasSSE2);
-  }
+  if (X86::isMOVLPMask(SVOp))
+    return getMOVLP(Op, dl, DAG, HasSSE2);
 
   if (ShouldXformToMOVHLPS(SVOp) ||
       ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -5414,13 +5757,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getMOVL(DAG, dl, VT, V2, V1);
   }
 
-  if (X86::isUNPCKL_v_undef_Mask(SVOp) || X86::isUNPCKLMask(SVOp))
-    return (isMMX) ?
-      Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+  if (X86::isUNPCKLMask(SVOp))
+    return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
 
-  if (X86::isUNPCKH_v_undef_Mask(SVOp) || X86::isUNPCKHMask(SVOp))
-    return (isMMX) ?
-      Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+  if (X86::isUNPCKHMask(SVOp))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
     // Normalize mask so all entries that point to V2 points to its first
@@ -5443,19 +5784,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
     ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
 
-    if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || X86::isUNPCKLMask(NewSVOp))
-      return (isMMX) ?
-        NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+    if (X86::isUNPCKLMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
 
-    if (X86::isUNPCKH_v_undef_Mask(NewSVOp) || X86::isUNPCKHMask(NewSVOp))
-      return (isMMX) ?
-        NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+    if (X86::isUNPCKHMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
   }
 
-  // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
-
   // Normalize the node to match x86 shuffle ops if needed
-  if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
     return CommuteVectorShuffle(SVOp, DAG);
 
   // The checks below are all present in isShuffleMaskLegal, but they are
@@ -5464,15 +5801,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   SmallVector<int, 16> M;
   SVOp->getMask(M);
 
-  // Very little shuffling can be done for 64-bit vectors right now.
-  if (VT.getSizeInBits() == 64)
-    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+  if (isPALIGNRMask(M, VT, HasSSSE3))
+    return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+                                X86::getShufflePALIGNRImmediate(SVOp),
+                                DAG);
 
-  // FIXME: pshufb, blends, shifts.
-  if (VT.getVectorNumElements() == 2 ||
-      ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
-      isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
-    return Op;
+  if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+      SVOp->getSplatIndex() == 0 && V2IsUndef) {
+    if (VT == MVT::v2f64)
+      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+    if (VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+  }
 
   if (isPSHUFHWMask(M, VT))
     return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
@@ -5494,6 +5834,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
                                   TargetMask, DAG);
   }
 
+  if (X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+  if (X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
   // Handle v8i16 specifically since SSE can do byte extraction and insertion.
   if (VT == MVT::v8i16) {
     SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
@@ -5507,8 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
       return NewOp;
   }
 
-  // Handle all 4 wide cases with a number of shuffles except for MMX.
-  if (NumElems == 4 && !isMMX)
+  // Handle all 4 wide cases with a number of shuffles.
+  if (NumElems == 4)
     return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
 
   return SDValue();
@@ -5531,7 +5878,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
     if (Idx == 0)
       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
-                                     DAG.getNode(ISD::BIT_CONVERT, dl,
+                                     DAG.getNode(ISD::BITCAST, dl,
                                                  MVT::v4i32,
                                                  Op.getOperand(0)),
                                      Op.getOperand(1)));
@@ -5552,14 +5899,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
     if ((User->getOpcode() != ISD::STORE ||
          (isa<ConstantSDNode>(Op.getOperand(1)) &&
           cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
-        (User->getOpcode() != ISD::BIT_CONVERT ||
+        (User->getOpcode() != ISD::BITCAST ||
          User->getValueType(0) != MVT::i32))
       return SDValue();
     SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
-                                  DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
+                                  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
                                               Op.getOperand(0)),
                                               Op.getOperand(1));
-    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
   } else if (VT == MVT::i32) {
     // ExtractPS works with constant index.
     if (isa<ConstantSDNode>(Op.getOperand(1)))
@@ -5575,6 +5922,38 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   if (!isa<ConstantSDNode>(Op.getOperand(1)))
     return SDValue();
 
+  SDValue Vec = Op.getOperand(0);
+  EVT VecVT = Vec.getValueType();
+
+  // If this is a 256-bit vector result, first extract the 128-bit
+  // vector and then extract from the 128-bit vector.
+  if (VecVT.getSizeInBits() > 128) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    unsigned NumElems = VecVT.getVectorNumElements();
+    SDValue Idx = Op.getOperand(1);
+
+    if (!isa<ConstantSDNode>(Idx))
+      return SDValue();
+
+    unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    // Get the 128-bit vector.
+    bool Upper = IdxVal >= ExtractNumElems;
+    Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+    // Extract from it.
+    SDValue ScaledIdx = Idx;
+    if (Upper)
+      ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+                              DAG.getConstant(ExtractNumElems,
+                                              Idx.getValueType()));
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+                       ScaledIdx);
+  }
+
+  assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
   if (Subtarget->hasSSE41()) {
     SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
     if (Res.getNode())
@@ -5590,7 +5969,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     if (Idx == 0)
       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
-                                     DAG.getNode(ISD::BIT_CONVERT, dl,
+                                     DAG.getNode(ISD::BITCAST, dl,
                                                  MVT::v4i32, Vec),
                                      Op.getOperand(1)));
     // Transform it so it match pextrw which produces a 32-bit result.
@@ -5650,8 +6029,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
     unsigned Opc;
     if (VT == MVT::v8i16)
       Opc = X86ISD::PINSRW;
-    else if (VT == MVT::v4i16)
-      Opc = X86ISD::MMX_PINSRW;
     else if (VT == MVT::v16i8)
       Opc = X86ISD::PINSRB;
     else
@@ -5689,17 +6066,45 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
 
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2 = Op.getOperand(2);
+
+  // If this is a 256-bit vector result, first insert into a 128-bit
+  // vector and then insert into the 256-bit vector.
+  if (VT.getSizeInBits() > 128) {
+    if (!isa<ConstantSDNode>(N2))
+      return SDValue();
+
+    // Get the 128-bit vector.
+    unsigned NumElems = VT.getVectorNumElements();
+    unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+    bool Upper = IdxVal >= NumElems / 2;
+
+    SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+    // Insert into it.
+    SDValue ScaledN2 = N2;
+    if (Upper)
+      ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+                             DAG.getConstant(NumElems / 
+                                             (VT.getSizeInBits() / 128),
+                                             N2.getValueType()));
+    Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+                     N1, ScaledN2);
+
+    // Insert the 128-bit vector
+    // FIXME: Why UNDEF?
+    return Insert128BitVector(N0, Op, N2, DAG, dl);
+  }
+
   if (Subtarget->hasSSE41())
     return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
 
   if (EltVT == MVT::i8)
     return SDValue();
 
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue N0 = Op.getOperand(0);
-  SDValue N1 = Op.getOperand(1);
-  SDValue N2 = Op.getOperand(2);
-
   if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
     // Transform it so it match pinsrw which expects a 16-bit value in a GR32
     // as its second argument.
@@ -5707,31 +6112,79 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
       N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
     if (N2.getValueType() != MVT::i32)
       N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
-    return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
-                       dl, VT, N0, N1, N2);
+    return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
   }
   return SDValue();
 }
 
 SDValue
 X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
-  
+  EVT OpVT = Op.getValueType();
+
+  // If this is a 256-bit vector result, first insert into a 128-bit
+  // vector and then insert into the 256-bit vector.
+  if (OpVT.getSizeInBits() > 128) {
+    // Insert into a 128-bit vector.
+    EVT VT128 = EVT::getVectorVT(*Context,
+                                 OpVT.getVectorElementType(),
+                                 OpVT.getVectorNumElements() / 2);
+
+    Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+    // Insert the 128-bit vector.
+    return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+                              DAG.getConstant(0, MVT::i32),
+                              DAG, dl);
+  }
+
   if (Op.getValueType() == MVT::v1i64 &&
       Op.getOperand(0).getValueType() == MVT::i64)
     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
 
   SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
-  EVT VT = MVT::v2i32;
-  switch (Op.getValueType().getSimpleVT().SimpleTy) {
-  default: break;
-  case MVT::v16i8:
-  case MVT::v8i16:
-    VT = MVT::v4i32;
-    break;
+  assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+         "Expected an SSE type!");
+  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+                     DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode.  This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasAVX()) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    SDValue Vec = Op.getNode()->getOperand(0);
+    SDValue Idx = Op.getNode()->getOperand(1);
+
+    if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+        && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+        return Extract128BitVector(Vec, Idx, DAG, dl);
+    }
   }
-  return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
-                     DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+  return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode.  This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasAVX()) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    SDValue Vec = Op.getNode()->getOperand(0);
+    SDValue SubVec = Op.getNode()->getOperand(1);
+    SDValue Idx = Op.getNode()->getOperand(2);
+
+    if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+        && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+      return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+    }
+  }
+  return SDValue();
 }
 
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@@ -5797,12 +6250,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
 
   // With PIC, the address is actually $g + Offset.
-  if (OpFlag) {
+  if (OpFlag)
     Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
                                      DebugLoc(), getPointerTy()),
                          Result);
-  }
 
   return Result;
 }
@@ -5906,7 +6358,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
   // load.
   if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0, false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
@@ -5929,7 +6381,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
            SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
            unsigned char OperandFlags) {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   DebugLoc dl = GA->getDebugLoc();
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
                                            GA->getValueType(0),
@@ -5978,14 +6430,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                    const EVT PtrVT, TLSModel::Model model,
                                    bool is64Bit) {
   DebugLoc dl = GA->getDebugLoc();
-  // Get the Thread Pointer
-  SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
-                             DebugLoc(), PtrVT,
-                             DAG.getRegister(is64Bit? X86::FS : X86::GS,
-                                             MVT::i32));
 
-  SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
-                                      NULL, 0, false, false, 0);
+  // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+  Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+                                                         is64Bit ? 257 : 256));
+
+  SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                      DAG.getIntPtrConstant(0),
+                                      MachinePointerInfo(Ptr), false, false, 0);
 
   unsigned char OperandFlags = 0;
   // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
@@ -6004,14 +6456,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, 
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
                                            GA->getValueType(0),
                                            GA->getOffset(), OperandFlags);
   SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
-                         PseudoSourceValue::getGOT(), 0, false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, 0);
 
   // The address of the thread local variable is the add of the thread
   // pointer with the offset of the variable.
@@ -6020,29 +6472,29 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
 SDValue
 X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
-  
+
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GA->getGlobal();
 
   if (Subtarget->isTargetELF()) {
     // TODO: implement the "local dynamic" model
     // TODO: implement the "initial exec"model for pic executables
-    
+
     // If GV is an alias then use the aliasee for determining
     // thread-localness.
     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
       GV = GA->resolveAliasedGlobal(false);
-    
-    TLSModel::Model model 
+
+    TLSModel::Model model
       = getTLSModel(GV, getTargetMachine().getRelocationModel());
-    
+
     switch (model) {
       case TLSModel::GeneralDynamic:
       case TLSModel::LocalDynamic: // not implemented
         if (Subtarget->is64Bit())
           return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
         return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-        
+
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
         return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
@@ -6053,7 +6505,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     unsigned char OpFlag = 0;
     unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
                            X86ISD::WrapperRIP : X86ISD::Wrapper;
-    
+
     // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
     // global base reg.
     bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
@@ -6062,24 +6514,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
       OpFlag = X86II::MO_TLVP_PIC_BASE;
     else
       OpFlag = X86II::MO_TLVP;
-    DebugLoc DL = Op.getDebugLoc();    
+    DebugLoc DL = Op.getDebugLoc();
     SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
-                                                getPointerTy(),
+                                                GA->getValueType(0),
                                                 GA->getOffset(), OpFlag);
     SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-  
+
     // With PIC32, the address is actually $g + Offset.
     if (PIC32)
       Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                            DAG.getNode(X86ISD::GlobalBaseReg,
                                        DebugLoc(), getPointerTy()),
                            Offset);
-    
+
     // Lowering the machine isd will make sure everything is in the right
     // location.
-    SDValue Args[] = { Offset };
-    SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
-    
+    SDValue Chain = DAG.getEntryNode();
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Args[] = { Chain, Offset };
+    Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
     // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
     MFI->setAdjustsStack(true);
@@ -6089,7 +6543,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
     return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
   }
-  
+
   assert(false &&
          "TLS not implemented for this target.");
 
@@ -6148,12 +6602,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
 
-  if (SrcVT.isVector()) {
-    if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
-      return Op;
-    }
+  if (SrcVT.isVector())
     return SDValue();
-  }
 
   assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
          "Unknown SINT_TO_FP to lower!");
@@ -6174,25 +6624,36 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot,
-                               PseudoSourceValue::getFixedStack(SSFI), 0,
+                               MachinePointerInfo::getFixedStack(SSFI),
                                false, false, 0);
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
 }
 
 SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
-                                     SDValue StackSlot, 
+                                     SDValue StackSlot,
                                      SelectionDAG &DAG) const {
   // Build the FILD
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   SDVTList Tys;
   bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
   if (useSSE)
-    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
   else
     Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+  unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, ByteSize, ByteSize);
+
   SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
-  SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
-                               Tys, Ops, array_lengthof(Ops));
+  SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+                                           X86ISD::FILD, DL,
+                                           Tys, Ops, array_lengthof(Ops),
+                                           SrcVT, MMO);
 
   if (useSSE) {
     Chain = Result.getValue(1);
@@ -6202,15 +6663,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     // shouldn't be necessary except that RFP cannot be live across
     // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+    unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+    int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
     SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
     Tys = DAG.getVTList(MVT::Other);
     SDValue Ops[] = {
       Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
     };
-    Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
-    Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0,
+    MachineMemOperand *MMO =
+      DAG.getMachineFunction()
+      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+    Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+                                    Ops, array_lengthof(Ops),
+                                    Op.getValueType(), MMO);
+    Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
                          false, false, 0);
   }
 
@@ -6284,12 +6753,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
                                         DAG.getIntPtrConstant(0)));
   SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
   SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
-  SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
+  SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
   SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
 
@@ -6317,19 +6786,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
                                          DAG.getIntPtrConstant(0)));
 
   Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
-                     DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load),
+                     DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
                      DAG.getIntPtrConstant(0));
 
   // Or the load with the bias.
   SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
-                           DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+                           DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
                                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
                                                    MVT::v2f64, Load)),
-                           DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+                           DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
                                        DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
                                                    MVT::v2f64, Bias)));
   Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
-                   DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or),
+                   DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
                    DAG.getIntPtrConstant(0));
 
   // Subtract the bias.
@@ -6374,24 +6843,34 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
     SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
                                      getPointerTy(), StackSlot, WordOff);
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                  StackSlot, NULL, 0, false, false, 0);
+                                  StackSlot, MachinePointerInfo(),
+                                  false, false, 0);
     SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
-                                  OffsetSlot, NULL, 0, false, false, 0);
+                                  OffsetSlot, MachinePointerInfo(),
+                                  false, false, 0);
     SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
     return Fild;
   }
 
   assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                StackSlot, NULL, 0, false, false, 0);
+                                StackSlot, MachinePointerInfo(),
+                               false, false, 0);
   // For i64 source, we need to add the appropriate power of 2 if the input
   // was negative.  This is the same as the optimization in
   // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
   // we must be careful to do the computation in x87 extended precision, not
   // in SSE. (The generic code can't know it's OK to do this, or how to.)
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, 8, 8);
+
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
   SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
-  SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+                                         MVT::i64, MMO);
 
   APInt FF(32, 0x5F800000ULL);
 
@@ -6414,9 +6893,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   // Load the value out, extending it from f32 to f80.
   // FIXME: Avoid the extend by constructing the right constant pool?
-  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
-                                 FudgePtr, PseudoSourceValue::getConstantPool(),
-                                 0, MVT::f32, false, false, 4);
+  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+                                 FudgePtr, MachinePointerInfo::getConstantPool(),
+                                 MVT::f32, false, false, 4);
   // Extend everything to 80 bits to force it to be done on x87.
   SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
   return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
@@ -6424,7 +6903,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
 std::pair<SDValue,SDValue> X86TargetLowering::
 FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
   EVT DstTy = Op.getValueType();
 
@@ -6453,6 +6932,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
+
+
   unsigned Opc;
   switch (DstTy.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
@@ -6463,37 +6944,43 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
 
   SDValue Chain = DAG.getEntryNode();
   SDValue Value = Op.getOperand(0);
-  if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+  EVT TheVT = Op.getOperand(0).getValueType();
+  if (isScalarFPTypeInSSEReg(TheVT)) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
-    Chain = DAG.getStore(Chain, dl, Value, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0,
+    Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
                          false, false, 0);
     SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
     SDValue Ops[] = {
-      Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+      Chain, StackSlot, DAG.getValueType(TheVT)
     };
-    Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                              MachineMemOperand::MOLoad, MemSize, MemSize);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+                                    DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   }
 
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, MemSize, MemSize);
+
   // Build the FP_TO_INT*_IN_MEM
   SDValue Ops[] = { Chain, Value, StackSlot };
-  SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+  SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                         Ops, 3, DstTy, MMO);
 
   return std::make_pair(FIST, StackSlot);
 }
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
                                            SelectionDAG &DAG) const {
-  if (Op.getValueType().isVector()) {
-    if (Op.getValueType() == MVT::v2i32 &&
-        Op.getOperand(0).getValueType() == MVT::v2f64) {
-      return Op;
-    }
+  if (Op.getValueType().isVector())
     return SDValue();
-  }
 
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -6502,7 +6989,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0, false, false, 0);
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
@@ -6513,7 +7000,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0, false, false, 0);
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -6539,7 +7026,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                             PseudoSourceValue::getConstantPool(), 0,
+                             MachinePointerInfo::getConstantPool(),
                              false, false, 16);
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
@@ -6566,14 +7053,14 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                             PseudoSourceValue::getConstantPool(), 0,
+                             MachinePointerInfo::getConstantPool(),
                              false, false, 16);
   if (VT.isVector()) {
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+    return DAG.getNode(ISD::BITCAST, dl, VT,
                        DAG.getNode(ISD::XOR, dl, MVT::v2i64,
-                    DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
                                 Op.getOperand(0)),
-                    DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask)));
+                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
   } else {
     return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
   }
@@ -6615,7 +7102,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
 
@@ -6625,7 +7112,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
     SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
     SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
                           DAG.getConstant(32, MVT::i32));
-    SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit);
+    SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
     SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
                           DAG.getIntPtrConstant(0));
   }
@@ -6644,7 +7131,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                              PseudoSourceValue::getConstantPool(), 0,
+                              MachinePointerInfo::getConstantPool(),
                               false, false, 16);
   SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
 
@@ -6884,8 +7371,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // Lower (X & (1 << N)) == 0 to BT(X, N).
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
-  if (Op0.getOpcode() == ISD::AND &&
-      Op0.hasOneUse() &&
+  if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
       Op1.getOpcode() == ISD::Constant &&
       cast<ConstantSDNode>(Op1)->isNullValue() &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
@@ -6894,19 +7380,25 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
       return NewSetCC;
   }
 
-  // Look for "(setcc) == / != 1" to avoid unncessary setcc.
-  if (Op0.getOpcode() == X86ISD::SETCC &&
-      Op1.getOpcode() == ISD::Constant &&
+  // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
+  // these.
+  if (Op1.getOpcode() == ISD::Constant &&
       (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
        cast<ConstantSDNode>(Op1)->isNullValue()) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
-    bool Invert = (CC == ISD::SETNE) ^
-      cast<ConstantSDNode>(Op1)->isNullValue();
-    if (Invert)
+
+    // If the input is a setcc, then reuse the input setcc or use a new one with
+    // the inverted condition.
+    if (Op0.getOpcode() == X86ISD::SETCC) {
+      X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+      bool Invert = (CC == ISD::SETNE) ^
+        cast<ConstantSDNode>(Op1)->isNullValue();
+      if (!Invert) return Op0;
+
       CCode = X86::GetOppositeBranchCondition(CCode);
-    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                       DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+    }
   }
 
   bool isFP = Op1.getValueType().isFloatingPoint();
@@ -6914,17 +7406,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   if (X86CC == X86::COND_INVALID)
     return SDValue();
 
-  SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
-
-  // Use sbb x, x to materialize carry bit into a GPR.
-  if (X86CC == X86::COND_B)
-    return DAG.getNode(ISD::AND, dl, MVT::i8,
-                       DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
-                                   DAG.getConstant(X86CC, MVT::i8), Cond),
-                       DAG.getConstant(1, MVT::i8));
-
+  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
   return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                     DAG.getConstant(X86CC, MVT::i8), Cond);
+                     DAG.getConstant(X86CC, MVT::i8), EFLAGS);
 }
 
 SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -6996,11 +7480,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   switch (VT.getSimpleVT().SimpleTy) {
   default: break;
-  case MVT::v8i8:
   case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
-  case MVT::v4i16:
   case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
-  case MVT::v2i32:
   case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
   case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
   }
@@ -7051,6 +7532,8 @@ static bool isX86LogicalCmp(SDValue Op) {
   if (Op.getResNo() == 1 &&
       (Opc == X86ISD::ADD ||
        Opc == X86ISD::SUB ||
+       Opc == X86ISD::ADC ||
+       Opc == X86ISD::SBB ||
        Opc == X86ISD::SMUL ||
        Opc == X86ISD::UMUL ||
        Opc == X86ISD::INC ||
@@ -7060,13 +7543,28 @@ static bool isX86LogicalCmp(SDValue Op) {
        Opc == X86ISD::AND))
     return true;
 
+  if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+    return true;
+
   return false;
 }
 
+static bool isZero(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isAllOnesValue();
+}
+
 SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Cond  = Op.getOperand(0);
-  DebugLoc dl = Op.getDebugLoc();
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
   SDValue CC;
 
   if (Cond.getOpcode() == ISD::SETCC) {
@@ -7075,34 +7573,44 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       Cond = NewCond;
   }
 
-  // (select (x == 0), -1, 0) -> (sign_bit (x - 1))
-  SDValue Op1 = Op.getOperand(1);
-  SDValue Op2 = Op.getOperand(2);
+  // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+  // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+  // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+  // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
   if (Cond.getOpcode() == X86ISD::SETCC &&
-      cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) {
+      Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+      isZero(Cond.getOperand(1).getOperand(1))) {
     SDValue Cmp = Cond.getOperand(1);
-    if (Cmp.getOpcode() == X86ISD::CMP) {
-      ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1);
+
+    unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+    if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+        (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+      SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+      SDValue CmpOp0 = Cmp.getOperand(0);
+      Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+                        CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+      SDValue Res =   // Res = 0 or -1.
+        DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                    DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+      if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+        Res = DAG.getNOT(DL, Res, Res.getValueType());
+
       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
-      ConstantSDNode *RHSC =
-        dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode());
-      if (N1C && N1C->isAllOnesValue() &&
-          N2C && N2C->isNullValue() &&
-          RHSC && RHSC->isNullValue()) {
-        SDValue CmpOp0 = Cmp.getOperand(0);
-        Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
-                          CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
-        return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
-                           DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
-      }
+      if (N2C == 0 || !N2C->isNullValue())
+        Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+      return Res;
     }
   }
 
-  // Look pass (and (setcc_carry (cmp ...)), 1).
+  // Look past (and (setcc_carry (cmp ...)), 1).
   if (Cond.getOpcode() == ISD::AND &&
       Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
-    if (C && C->getAPIntValue() == 1) 
+    if (C && C->getAPIntValue() == 1)
       Cond = Cond.getOperand(0);
   }
 
@@ -7135,8 +7643,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
-    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
-      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
       if (NewSetCC.getNode()) {
         CC = NewSetCC.getOperand(0);
         Cond = NewSetCC.getOperand(1);
@@ -7150,11 +7658,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
 
+  // a <  b ? -1 :  0 -> RES = ~setcc_carry
+  // a <  b ?  0 : -1 -> RES = setcc_carry
+  // a >= b ? -1 :  0 -> RES = setcc_carry
+  // a >= b ?  0 : -1 -> RES = ~setcc_carry
+  if (Cond.getOpcode() == X86ISD::CMP) {
+    unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+    if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+        (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+      SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                                DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+      if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+        return DAG.getNOT(DL, Res, Res.getValueType());
+      return Res;
+    }
+  }
+
   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
   SDValue Ops[] = { Op2, Op1, CC, Cond };
-  return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops));
+  return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
 }
 
 // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
@@ -7209,7 +7734,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   if (Cond.getOpcode() == ISD::AND &&
       Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
-    if (C && C->getAPIntValue() == 1) 
+    if (C && C->getAPIntValue() == 1)
       Cond = Cond.getOperand(0);
   }
 
@@ -7310,7 +7835,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
-    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
       SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
       if (NewSetCC.getNode()) {
         CC = NewSetCC.getOperand(0);
@@ -7337,8 +7862,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                            SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetCygMing() &&
-         "This should be used only on Cygwin/Mingw targets");
+  assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+         "This should be used only on Windows targets");
   DebugLoc dl = Op.getDebugLoc();
 
   // Get the inputs.
@@ -7353,9 +7878,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
   Flag = Chain.getValue(1);
 
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 
-  Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+  Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
   Flag = Chain.getValue(1);
 
   Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
@@ -7369,15 +7894,15 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
-  if (!Subtarget->is64Bit()) {
+  if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                    getPointerTy());
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
-                        false, false, 0);
+    return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV), false, false, 0);
   }
 
   // __va_list_tag:
@@ -7388,48 +7913,107 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   SmallVector<SDValue, 8> MemOps;
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
-  SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+  SDValue Store = DAG.getStore(Op.getOperand(0), DL,
                                DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
                                                MVT::i32),
-                               FIN, SV, 0, false, false, 0);
+                               FIN, MachinePointerInfo(SV), false, false, 0);
   MemOps.push_back(Store);
 
   // Store fp_offset
-  FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
-  Store = DAG.getStore(Op.getOperand(0), dl,
+  Store = DAG.getStore(Op.getOperand(0), DL,
                        DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
                                        MVT::i32),
-                       FIN, SV, 4, false, false, 0);
+                       FIN, MachinePointerInfo(SV, 4), false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
-  FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                     getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
+  Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+                       MachinePointerInfo(SV, 8),
                        false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
-  FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(8));
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
                                     getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
-                       false, false, 0);
+  Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+                       MachinePointerInfo(SV, 16), false, false, 0);
   MemOps.push_back(Store);
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                      &MemOps[0], MemOps.size());
 }
 
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
-  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
-  assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+  assert(Subtarget->is64Bit() &&
+         "LowerVAARG only handles 64-bit va_arg!");
+  assert((Subtarget->isTargetLinux() ||
+          Subtarget->isTargetDarwin()) &&
+          "Unhandled target in LowerVAARG");
+  assert(Op.getNode()->getNumOperands() == 4);
+  SDValue Chain = Op.getOperand(0);
+  SDValue SrcPtr = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  unsigned Align = Op.getConstantOperandVal(3);
+  DebugLoc dl = Op.getDebugLoc();
 
-  report_fatal_error("VAArgInst is not yet implemented for x86-64!");
-  return SDValue();
+  EVT ArgVT = Op.getNode()->getValueType(0);
+  const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+  uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+  uint8_t ArgMode;
+
+  // Decide which area this value should be read from.
+  // TODO: Implement the AMD64 ABI in its entirety. This simple
+  // selection mechanism works only for the basic types.
+  if (ArgVT == MVT::f80) {
+    llvm_unreachable("va_arg for f80 not yet implemented");
+  } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+    ArgMode = 2;  // Argument passed in XMM register. Use fp_offset.
+  } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+    ArgMode = 1;  // Argument passed in GPR64 register(s). Use gp_offset.
+  } else {
+    llvm_unreachable("Unhandled argument type in LowerVAARG");
+  }
+
+  if (ArgMode == 2) {
+    // Sanity Check: Make sure using fp_offset makes sense.
+    assert(!UseSoftFloat &&
+           !(DAG.getMachineFunction()
+                .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+           Subtarget->hasXMM());
+  }
+
+  // Insert VAARG_64 node into the DAG
+  // VAARG_64 returns two values: Variable Argument Address, Chain
+  SmallVector<SDValue, 11> InstOps;
+  InstOps.push_back(Chain);
+  InstOps.push_back(SrcPtr);
+  InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+  InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+  InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+  SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+  SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+                                          VTs, &InstOps[0], InstOps.size(),
+                                          MVT::i64,
+                                          MachinePointerInfo(SV),
+                                          /*Align=*/0,
+                                          /*Volatile=*/false,
+                                          /*ReadMem=*/true,
+                                          /*WriteMem=*/true);
+  Chain = VAARG.getValue(1);
+
+  // Load the next argument and return it
+  return DAG.getLoad(ArgVT, dl,
+                     Chain,
+                     VAARG,
+                     MachinePointerInfo(),
+                     false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
@@ -7440,11 +8024,12 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   SDValue SrcPtr = Op.getOperand(2);
   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
-  return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
                        DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
-                       false, DstSV, 0, SrcSV, 0);
+                       false,
+                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
 SDValue
@@ -7713,10 +8298,11 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
       ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
     } else {
       ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
     }
 
     EVT VT = Op.getValueType();
-    ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                        DAG.getConstant(NewIntNo, MVT::i32),
                        Op.getOperand(1), ShAmt);
@@ -7740,13 +8326,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0, false, false, 0);
+                       MachinePointerInfo(), false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0, false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
@@ -7759,7 +8345,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
                             false, false, 0);
   return FrameAddr;
 }
@@ -7784,7 +8371,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
                                   DAG.getIntPtrConstant(TD->getPointerSize()));
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
-  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
   MF.getRegInfo().addLiveOut(StoreAddrReg);
 
@@ -7819,11 +8407,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
     SDValue Addr = Trmp;
     OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 0, false, false, 0);
+                                Addr, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(2, MVT::i64));
-    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+                                MachinePointerInfo(TrmpAddr, 2),
                                 false, false, 2);
 
     // Load the 'nest' parameter value into R10.
@@ -7832,11 +8422,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(10, MVT::i64));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 10, false, false, 0);
+                                Addr, MachinePointerInfo(TrmpAddr, 10),
+                                false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(12, MVT::i64));
-    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 12),
                                 false, false, 2);
 
     // Jump to the nested function.
@@ -7844,13 +8436,15 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(20, MVT::i64));
     OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 20, false, false, 0);
+                                Addr, MachinePointerInfo(TrmpAddr, 20),
+                                false, false, 0);
 
     unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(22, MVT::i64));
     OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
-                                TrmpAddr, 22, false, false, 0);
+                                MachinePointerInfo(TrmpAddr, 22),
+                                false, false, 0);
 
     SDValue Ops[] =
       { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -7912,22 +8506,26 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
     OutChains[0] = DAG.getStore(Root, dl,
                                 DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
-                                Trmp, TrmpAddr, 0, false, false, 0);
+                                Trmp, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(1, MVT::i32));
-    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 1),
                                 false, false, 1);
 
     const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(5, MVT::i32));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
-                                TrmpAddr, 5, false, false, 1);
+                                MachinePointerInfo(TrmpAddr, 5),
+                                false, false, 1);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(6, MVT::i32));
-    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+                                MachinePointerInfo(TrmpAddr, 6),
                                 false, false, 1);
 
     SDValue Ops[] =
@@ -7959,44 +8557,51 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetMachine &TM = MF.getTarget();
-  const TargetFrameInfo &TFI = *TM.getFrameInfo();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
   unsigned StackAlignment = TFI.getStackAlignment();
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
   // Save FP Control Word to stack slot
   int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
-  SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
-                              DAG.getEntryNode(), StackSlot);
+
+  MachineMemOperand *MMO =
+   MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                           MachineMemOperand::MOStore, 2, 2);
+
+  SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+  SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+                                          DAG.getVTList(MVT::Other),
+                                          Ops, 2, MVT::i16, MMO);
 
   // Load FP Control Word from stack slot
-  SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
-                            false, false, 0);
+  SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+                            MachinePointerInfo(), false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
-    DAG.getNode(ISD::SRL, dl, MVT::i16,
-                DAG.getNode(ISD::AND, dl, MVT::i16,
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
                             CWD, DAG.getConstant(0x800, MVT::i16)),
                 DAG.getConstant(11, MVT::i8));
   SDValue CWD2 =
-    DAG.getNode(ISD::SRL, dl, MVT::i16,
-                DAG.getNode(ISD::AND, dl, MVT::i16,
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
                             CWD, DAG.getConstant(0x400, MVT::i16)),
                 DAG.getConstant(9, MVT::i8));
 
   SDValue RetVal =
-    DAG.getNode(ISD::AND, dl, MVT::i16,
-                DAG.getNode(ISD::ADD, dl, MVT::i16,
-                            DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+    DAG.getNode(ISD::AND, DL, MVT::i16,
+                DAG.getNode(ISD::ADD, DL, MVT::i16,
+                            DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
                             DAG.getConstant(1, MVT::i16)),
                 DAG.getConstant(3, MVT::i16));
 
 
   return DAG.getNode((VT.getSizeInBits() < 16 ?
-                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
 }
 
 SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
@@ -8122,16 +8727,16 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
                      Op.getOperand(1), DAG.getConstant(23, MVT::i32));
 
     ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-    
+
     std::vector<Constant*> CV(4, CI);
     Constant *C = ConstantVector::get(CV);
     SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
     SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                                 PseudoSourceValue::getConstantPool(), 0,
+                                 MachinePointerInfo::getConstantPool(),
                                  false, false, 16);
 
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
-    Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+    Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
     Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
     return DAG.getNode(ISD::MUL, dl, VT, Op, R);
   }
@@ -8149,7 +8754,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
     Constant *C = ConstantVector::get(CVM1);
     SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
     SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                            PseudoSourceValue::getConstantPool(), 0,
+                            MachinePointerInfo::getConstantPool(),
                             false, false, 16);
 
     // r = pblendv(r, psllw(r & (char16)15, 4), a);
@@ -8157,31 +8762,27 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
     M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
                     DAG.getConstant(4, MVT::i32));
-    R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                    DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
-                    R, M, Op);
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
     // a += a
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-    
+
     C = ConstantVector::get(CVM2);
     CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
     M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                    PseudoSourceValue::getConstantPool(), 0, false, false, 16);
-    
+                    MachinePointerInfo::getConstantPool(),
+                    false, false, 16);
+
     // r = pblendv(r, psllw(r & (char16)63, 2), a);
     M = DAG.getNode(ISD::AND, dl, VT, R, M);
     M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
                     DAG.getConstant(2, MVT::i32));
-    R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                    DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
-                    R, M, Op);
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
     // a += a
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-    
+
     // return pblendv(r, r+r, a);
-    R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                    DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
                     R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
     return R;
   }
@@ -8198,8 +8799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   SDValue RHS = N->getOperand(1);
   unsigned BaseOp = 0;
   unsigned Cond = 0;
-  DebugLoc dl = Op.getDebugLoc();
-
+  DebugLoc DL = Op.getDebugLoc();
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Unknown ovf instruction!");
   case ISD::SADDO:
@@ -8238,19 +8838,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
     BaseOp = X86ISD::SMUL;
     Cond = X86::COND_O;
     break;
-  case ISD::UMULO:
-    BaseOp = X86ISD::UMUL;
-    Cond = X86::COND_B;
-    break;
+  case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+    SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+                                 MVT::i32);
+    SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+    SDValue SetCC =
+      DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                  DAG.getConstant(X86::COND_O, MVT::i32),
+                  SDValue(Sum.getNode(), 2));
+
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+    return Sum;
+  }
   }
 
   // Also sets EFLAGS.
   SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
-  SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
+  SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
 
   SDValue SetCC =
-    DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
-                DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+    DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+                DAG.getConstant(Cond, MVT::i32),
+                SDValue(Sum.getNode(), 1));
 
   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
   return Sum;
@@ -8258,10 +8868,10 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
   DebugLoc dl = Op.getDebugLoc();
-  
+
   if (!Subtarget->hasSSE2()) {
     SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0, 
+    SDValue Zero = DAG.getConstant(0,
                                    Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
     SDValue Ops[] = {
       DAG.getRegister(X86::ESP, MVT::i32), // Base
@@ -8272,37 +8882,37 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
       Zero,
       Chain
     };
-    SDNode *Res = 
+    SDNode *Res =
       DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
                           array_lengthof(Ops));
     return SDValue(Res, 0);
   }
-  
+
   unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
   if (!isDev)
     return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-  
+
   unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
   unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-  
+
   // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
   if (!Op1 && !Op2 && !Op3 && Op4)
     return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-  
+
   // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
   if (Op1 && !Op2 && !Op3 && !Op4)
     return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-  
-  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), 
+
+  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
   //           (MFENCE)>;
   return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 }
 
 SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
   EVT T = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   unsigned Reg = 0;
   unsigned size = 0;
   switch(T.getSimpleVT().SimpleTy) {
@@ -8316,24 +8926,26 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
     Reg = X86::RAX; size = 8;
     break;
   }
-  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
                                     Op.getOperand(2), SDValue());
   SDValue Ops[] = { cpIn.getValue(0),
                     Op.getOperand(1),
                     Op.getOperand(3),
                     DAG.getTargetConstant(size, MVT::i8),
                     cpIn.getValue(1) };
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+                                           Ops, 5, T, MMO);
   SDValue cpOut =
-    DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   return cpOut;
 }
 
 SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
                                                  SelectionDAG &DAG) const {
   assert(Subtarget->is64Bit() && "Result not type legalized?");
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue TheChain = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
   SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
@@ -8349,16 +8961,15 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
-SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
                                             SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
   EVT DstVT = Op.getValueType();
-  assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && 
-          Subtarget->hasMMX() && !DisableMMX) &&
-         "Unexpected custom BIT_CONVERT");
-  assert((DstVT == MVT::i64 || 
+  assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+         Subtarget->hasMMX() && "Unexpected custom BITCAST");
+  assert((DstVT == MVT::i64 ||
           (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
-         "Unexpected custom BIT_CONVERT");
+         "Unexpected custom BITCAST");
   // i64 <=> MMX conversions are Legal.
   if (SrcVT==MVT::i64 && DstVT.isVector())
     return Op;
@@ -8370,6 +8981,7 @@ SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
   // All other conversions need to be expanded.
   return SDValue();
 }
+
 SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
@@ -8384,6 +8996,32 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
                        cast<AtomicSDNode>(Node)->getAlignment());
 }
 
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getNode()->getValueType(0);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+  unsigned Opc;
+  bool ExtraOp = false;
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Invalid code");
+  case ISD::ADDC: Opc = X86ISD::ADD; break;
+  case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+  case ISD::SUBC: Opc = X86ISD::SUB; break;
+  case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+  }
+
+  if (!ExtraOp)
+    return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+                       Op.getOperand(1));
+  return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+                     Op.getOperand(1), Op.getOperand(2));
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -8397,6 +9035,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::EXTRACT_SUBVECTOR:  return LowerEXTRACT_SUBVECTOR(Op, DAG);
+  case ISD::INSERT_SUBVECTOR:   return LowerINSERT_SUBVECTOR(Op, DAG);
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
@@ -8441,7 +9081,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SMULO:
   case ISD::UMULO:              return LowerXALUO(Op, DAG);
   case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, DAG);
-  case ISD::BIT_CONVERT:        return LowerBIT_CONVERT(Op, DAG);
+  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUBC:
+  case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   }
 }
 
@@ -8478,6 +9122,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   default:
     assert(false && "Do not know how to custom type legalize this operation!");
     return;
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUBC:
+  case ISD::SUBE:
+    // We don't want to expand or promote these.
+    return;
   case ISD::FP_TO_SINT: {
     std::pair<SDValue,SDValue> Vals =
         FP_TO_INTHelper(SDValue(N, 0), DAG, true);
@@ -8485,13 +9135,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     if (FIST.getNode() != 0) {
       EVT VT = N->getValueType(0);
       // Return a load from the stack slot.
-      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
-                                    false, false, 0));
+      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+                                    MachinePointerInfo(), false, false, 0));
     }
     return;
   }
   case ISD::READCYCLECOUNTER: {
-    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue TheChain = N->getOperand(0);
     SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
     SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
@@ -8527,8 +9177,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     SDValue Ops[] = { swapInH.getValue(0),
                       N->getOperand(1),
                       swapInH.getValue(1) };
-    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-    SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+    SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+                                             Ops, 3, T, MMO);
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
                                         MVT::i32, Result.getValue(1));
     SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
@@ -8601,15 +9253,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
   case X86ISD::PINSRB:             return "X86ISD::PINSRB";
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
-  case X86ISD::MMX_PINSRW:         return "X86ISD::MMX_PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
+  case X86ISD::PANDN:              return "X86ISD::PANDN";
+  case X86ISD::PSIGNB:             return "X86ISD::PSIGNB";
+  case X86ISD::PSIGNW:             return "X86ISD::PSIGNW";
+  case X86ISD::PSIGND:             return "X86ISD::PSIGND";
+  case X86ISD::PBLENDVB:           return "X86ISD::PBLENDVB";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
   case X86ISD::FRCP:               return "X86ISD::FRCP";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
-  case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
   case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
   case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
   case X86ISD::FNSTCW16m:          return "X86ISD::FNSTCW16m";
@@ -8637,6 +9292,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PCMPGTQ:            return "X86ISD::PCMPGTQ";
   case X86ISD::ADD:                return "X86ISD::ADD";
   case X86ISD::SUB:                return "X86ISD::SUB";
+  case X86ISD::ADC:                return "X86ISD::ADC";
+  case X86ISD::SBB:                return "X86ISD::SBB";
   case X86ISD::SMUL:               return "X86ISD::SMUL";
   case X86ISD::UMUL:               return "X86ISD::UMUL";
   case X86ISD::INC:                return "X86ISD::INC";
@@ -8681,7 +9338,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
   case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
-  case X86ISD::MINGW_ALLOCA:       return "X86ISD::MINGW_ALLOCA";
+  case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
+  case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
   }
 }
 
@@ -9203,15 +9861,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
 MachineBasicBlock *
 X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
                             unsigned numArgs, bool memArg) const {
-
   assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
          "Target must have SSE4.2 or AVX features enabled");
 
   DebugLoc dl = MI->getDebugLoc();
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
   unsigned Opc;
-
   if (!Subtarget->hasAVX()) {
     if (memArg)
       Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
@@ -9224,23 +9879,317 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
       Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
   }
 
-  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
-
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
   for (unsigned i = 0; i < numArgs; ++i) {
     MachineOperand &Op = MI->getOperand(i+1);
-
     if (!(Op.isReg() && Op.isImplicit()))
       MIB.addOperand(Op);
   }
-
-  BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+  BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
     .addReg(X86::XMM0);
 
   MI->eraseFromParent();
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // Address into RAX/EAX, other two args into ECX, EDX.
+  unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+  unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+  for (int i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(i));
+
+  unsigned ValOps = X86::AddrNumOperands;
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+    .addReg(MI->getOperand(ValOps).getReg());
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+    .addReg(MI->getOperand(ValOps+1).getReg());
+
+  // The instruction doesn't actually take any operands though.
+  BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // First arg in ECX, the second in EAX.
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+    .addReg(MI->getOperand(0).getReg());
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+    .addReg(MI->getOperand(1).getReg());
 
+  // The instruction doesn't actually take any operands though.
+  BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
   return BB;
 }
 
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+                   MachineInstr *MI,
+                   MachineBasicBlock *MBB) const {
+  // Emit va_arg instruction on X86-64.
+
+  // Operands to this pseudo-instruction:
+  // 0  ) Output        : destination address (reg)
+  // 1-5) Input         : va_list address (addr, i64mem)
+  // 6  ) ArgSize       : Size (in bytes) of vararg type
+  // 7  ) ArgMode       : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+  // 8  ) Align         : Alignment of type
+  // 9  ) EFLAGS (implicit-def)
+
+  assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+  assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+  unsigned DestReg = MI->getOperand(0).getReg();
+  MachineOperand &Base = MI->getOperand(1);
+  MachineOperand &Scale = MI->getOperand(2);
+  MachineOperand &Index = MI->getOperand(3);
+  MachineOperand &Disp = MI->getOperand(4);
+  MachineOperand &Segment = MI->getOperand(5);
+  unsigned ArgSize = MI->getOperand(6).getImm();
+  unsigned ArgMode = MI->getOperand(7).getImm();
+  unsigned Align = MI->getOperand(8).getImm();
+
+  // Memory Reference
+  assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  // Machine Information
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+  const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+  DebugLoc DL = MI->getDebugLoc();
+
+  // struct va_list {
+  //   i32   gp_offset
+  //   i32   fp_offset
+  //   i64   overflow_area (address)
+  //   i64   reg_save_area (address)
+  // }
+  // sizeof(va_list) = 24
+  // alignment(va_list) = 8
+
+  unsigned TotalNumIntRegs = 6;
+  unsigned TotalNumXMMRegs = 8;
+  bool UseGPOffset = (ArgMode == 1);
+  bool UseFPOffset = (ArgMode == 2);
+  unsigned MaxOffset = TotalNumIntRegs * 8 +
+                       (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+  /* Align ArgSize to a multiple of 8 */
+  unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+  bool NeedsAlign = (Align > 8);
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *overflowMBB;
+  MachineBasicBlock *offsetMBB;
+  MachineBasicBlock *endMBB;
+
+  unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
+  unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
+  unsigned OffsetReg = 0;
+
+  if (!UseGPOffset && !UseFPOffset) {
+    // If we only pull from the overflow region, we don't create a branch.
+    // We don't need to alter control flow.
+    OffsetDestReg = 0; // unused
+    OverflowDestReg = DestReg;
+
+    offsetMBB = NULL;
+    overflowMBB = thisMBB;
+    endMBB = thisMBB;
+  } else {
+    // First emit code to check if gp_offset (or fp_offset) is below the bound.
+    // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+    // If not, pull from overflow_area. (branch to overflowMBB)
+    //
+    //       thisMBB
+    //         |     .
+    //         |        .
+    //     offsetMBB   overflowMBB
+    //         |        .
+    //         |     .
+    //        endMBB
+
+    // Registers for the PHI in endMBB
+    OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+    OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+    MachineFunction *MF = MBB->getParent();
+    overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+    MachineFunction::iterator MBBIter = MBB;
+    ++MBBIter;
+
+    // Insert the new basic blocks
+    MF->insert(MBBIter, offsetMBB);
+    MF->insert(MBBIter, overflowMBB);
+    MF->insert(MBBIter, endMBB);
+
+    // Transfer the remainder of MBB and its successor edges to endMBB.
+    endMBB->splice(endMBB->begin(), thisMBB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    thisMBB->end());
+    endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+    // Make offsetMBB and overflowMBB successors of thisMBB
+    thisMBB->addSuccessor(offsetMBB);
+    thisMBB->addSuccessor(overflowMBB);
+
+    // endMBB is a successor of both offsetMBB and overflowMBB
+    offsetMBB->addSuccessor(endMBB);
+    overflowMBB->addSuccessor(endMBB);
+
+    // Load the offset value into a register
+    OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Check if there is enough room left to pull this argument.
+    BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+      .addReg(OffsetReg)
+      .addImm(MaxOffset + 8 - ArgSizeA8);
+
+    // Branch to "overflowMBB" if offset >= max
+    // Fall through to "offsetMBB" otherwise
+    BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+      .addMBB(overflowMBB);
+  }
+
+  // In offsetMBB, emit code to use the reg_save_area.
+  if (offsetMBB) {
+    assert(OffsetReg != 0);
+
+    // Read the reg_save_area address.
+    unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, 16)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Zero-extend the offset
+    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+        .addImm(0)
+        .addReg(OffsetReg)
+        .addImm(X86::sub_32bit);
+
+    // Add the offset to the reg_save_area to get the final address.
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+      .addReg(OffsetReg64)
+      .addReg(RegSaveReg);
+
+    // Compute the offset for the next argument
+    unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+      .addReg(OffsetReg)
+      .addImm(UseFPOffset ? 16 : 8);
+
+    // Store it back into the va_list.
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .addReg(NextOffsetReg)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Jump to endMBB
+    BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+      .addMBB(endMBB);
+  }
+
+  //
+  // Emit code to use overflow area
+  //
+
+  // Load the overflow_area address into a register.
+  unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we need to align it, do so. Otherwise, just copy the address
+  // to OverflowDestReg.
+  if (NeedsAlign) {
+    // Align the overflow address
+    assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+    unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+    // aligned_addr = (addr + (align-1)) & ~(align-1)
+    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+      .addReg(OverflowAddrReg)
+      .addImm(Align-1);
+
+    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+      .addReg(TmpReg)
+      .addImm(~(uint64_t)(Align-1));
+  } else {
+    BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+      .addReg(OverflowAddrReg);
+  }
+
+  // Compute the next overflow address after this argument.
+  // (the overflow address should be kept 8-byte aligned)
+  unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+    .addReg(OverflowDestReg)
+    .addImm(ArgSizeA8);
+
+  // Store the new overflow address.
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .addReg(NextAddrReg)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we branched, emit the PHI to the front of endMBB.
+  if (offsetMBB) {
+    BuildMI(*endMBB, endMBB->begin(), DL,
+            TII->get(X86::PHI), DestReg)
+      .addReg(OffsetDestReg).addMBB(offsetMBB)
+      .addReg(OverflowDestReg).addMBB(overflowMBB);
+  }
+
+  // Erase the pseudo instruction
+  MI->eraseFromParent();
+
+  return endMBB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
                                                  MachineInstr *MI,
@@ -9296,8 +10245,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
     MachineMemOperand *MMO =
       F->getMachineMemOperand(
-        PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
-        MachineMemOperand::MOStore, Offset,
+          MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+        MachineMemOperand::MOStore,
         /*Size=*/16, /*Align=*/16);
     BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
       .addFrameIndex(RegSaveFrameIndex)
@@ -9389,7 +10338,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
 }
 
 MachineBasicBlock *
-X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
                                           MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
@@ -9399,8 +10348,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
   // FIXME: The code should be tweaked as soon as we'll try to do codegen for
   // mingw-w64.
 
+  const char *StackProbeSymbol =
+      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
   BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
-    .addExternalSymbol("_alloca")
+    .addExternalSymbol(StackProbeSymbol)
     .addReg(X86::EAX, RegState::Implicit)
     .addReg(X86::ESP, RegState::Implicit)
     .addReg(X86::EAX, RegState::Define | RegState::Implicit)
@@ -9418,30 +10370,30 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
   // our load from the relocation, sticking it in either RDI (x86-64)
   // or EAX and doing an indirect call.  The return value will then
   // be in the normal return register.
-  const X86InstrInfo *TII 
+  const X86InstrInfo *TII
     = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
   DebugLoc DL = MI->getDebugLoc();
   MachineFunction *F = BB->getParent();
-  bool IsWin64 = Subtarget->isTargetWin64();
-  
+
+  assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
   assert(MI->getOperand(3).isGlobal() && "This should be a global");
-  
+
   if (Subtarget->is64Bit()) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV64rm), X86::RDI)
     .addReg(X86::RIP)
     .addImm(0).addReg(0)
-    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, 
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                       MI->getOperand(3).getTargetFlags())
     .addReg(0);
-    MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
     addDirectMem(MIB, X86::RDI);
   } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV32rm), X86::EAX)
     .addReg(0)
     .addImm(0).addReg(0)
-    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, 
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                       MI->getOperand(3).getTargetFlags())
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
@@ -9451,13 +10403,13 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
                                       TII->get(X86::MOV32rm), X86::EAX)
     .addReg(TII->getGlobalBaseReg(F))
     .addImm(0).addReg(0)
-    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, 
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                       MI->getOperand(3).getTargetFlags())
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
     addDirectMem(MIB, X86::EAX);
   }
-  
+
   MI->eraseFromParent(); // The pseudo instruction is gone now.
   return BB;
 }
@@ -9467,13 +10419,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
-  case X86::MINGW_ALLOCA:
-    return EmitLoweredMingwAlloca(MI, BB);
+  case X86::TAILJMPd64:
+  case X86::TAILJMPr64:
+  case X86::TAILJMPm64:
+    assert(!"TAILJMP64 would not be touched here.");
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+    // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+    // On AMD64, additional defs should be added before register allocation.
+    if (!Subtarget->isTargetWin64()) {
+      MI->addRegisterDefined(X86::RSI);
+      MI->addRegisterDefined(X86::RDI);
+      MI->addRegisterDefined(X86::XMM6);
+      MI->addRegisterDefined(X86::XMM7);
+      MI->addRegisterDefined(X86::XMM8);
+      MI->addRegisterDefined(X86::XMM9);
+      MI->addRegisterDefined(X86::XMM10);
+      MI->addRegisterDefined(X86::XMM11);
+      MI->addRegisterDefined(X86::XMM12);
+      MI->addRegisterDefined(X86::XMM13);
+      MI->addRegisterDefined(X86::XMM14);
+      MI->addRegisterDefined(X86::XMM15);
+    }
+    return BB;
+  case X86::WIN_ALLOCA:
+    return EmitLoweredWinAlloca(MI, BB);
   case X86::TLSCall_32:
   case X86::TLSCall_64:
     return EmitLoweredTLSCall(MI, BB);
   case X86::CMOV_GR8:
-  case X86::CMOV_V1I64:
   case X86::CMOV_FR32:
   case X86::CMOV_FR64:
   case X86::CMOV_V4F32:
@@ -9583,6 +10558,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::VPCMPESTRM128MEM:
     return EmitPCMP(MI, BB, 5, true /* in mem */);
 
+    // Thread synchronization.
+  case X86::MONITOR:
+    return EmitMonitor(MI, BB);
+  case X86::MWAIT:
+    return EmitMwait(MI, BB);
+
     // Atomic Lowering.
   case X86::ATOMAND32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
@@ -9747,6 +10728,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                false);
   case X86::VASTART_SAVE_XMM_REGS:
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+  case X86::VAARG_64:
+    return EmitVAARG64WithCustomInserter(MI, BB);
   }
 }
 
@@ -9773,6 +10757,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   default: break;
   case X86ISD::ADD:
   case X86ISD::SUB:
+  case X86ISD::ADC:
+  case X86ISD::SBB:
   case X86ISD::SMUL:
   case X86ISD::UMUL:
   case X86ISD::INC:
@@ -9791,6 +10777,16 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   }
 }
 
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+  if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+    return Op.getValueType().getScalarType().getSizeInBits();
+
+  // Fallback case.
+  return 1;
+}
+
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
 bool X86TargetLowering::isGAPlusOffset(SDNode *N,
@@ -9811,13 +10807,18 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
 /// if the load addresses are consecutive, non-overlapping, and in the right
 /// order.
 static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
-                                     const TargetLowering &TLI) {
+                                     TargetLowering::DAGCombinerInfo &DCI) {
   DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
 
   if (VT.getSizeInBits() != 128)
     return SDValue();
 
+  // Don't create instructions with illegal types after legalize types has run.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+    return SDValue();
+
   SmallVector<SDValue, 16> Elts;
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
     Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
@@ -9877,8 +10878,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
 
   // Store the value to a temporary stack slot.
   SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
-                            0, false, false, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
 
   // Replace each use (extract) with a load of the appropriate element.
   for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9893,11 +10894,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
 
     SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
-                                     OffsetVal, StackPtr);
+                                     StackPtr, OffsetVal);
 
     // Load the scalar.
     SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
-                                     ScalarAddr, NULL, 0, false, false, 0);
+                                     ScalarAddr, MachinePointerInfo(),
+                                     false, false, 0);
 
     // Replace the exact with the load.
     DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -10473,6 +11475,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
   return SDValue();
 }
 
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  // Want to form PANDN nodes, in the hopes of then easily combining them with
+  // OR and AND nodes to form PBLEND/PSIGN.
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v2i64)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Check LHS for vnot
+  if (N0.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+    return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+
+  // Check RHS for vnot
+  if (N1.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+    return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+
+  return SDValue();
+}
+
 static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const X86Subtarget *Subtarget) {
@@ -10480,12 +11512,99 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   EVT VT = N->getValueType(0);
-  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
     return SDValue();
 
-  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
+
+  // look for psign/blend
+  if (Subtarget->hasSSSE3()) {
+    if (VT == MVT::v2i64) {
+      // Canonicalize pandn to RHS
+      if (N0.getOpcode() == X86ISD::PANDN)
+        std::swap(N0, N1);
+      // or (and (m, x), (pandn m, y))
+      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+        SDValue Mask = N1.getOperand(0);
+        SDValue X    = N1.getOperand(1);
+        SDValue Y;
+        if (N0.getOperand(0) == Mask)
+          Y = N0.getOperand(1);
+        if (N0.getOperand(1) == Mask)
+          Y = N0.getOperand(0);
+
+        // Check to see if the mask appeared in both the AND and PANDN and
+        if (!Y.getNode())
+          return SDValue();
+
+        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+        if (Mask.getOpcode() != ISD::BITCAST ||
+            X.getOpcode() != ISD::BITCAST ||
+            Y.getOpcode() != ISD::BITCAST)
+          return SDValue();
+
+        // Look through mask bitcast.
+        Mask = Mask.getOperand(0);
+        EVT MaskVT = Mask.getValueType();
+
+        // Validate that the Mask operand is a vector sra node.  The sra node
+        // will be an intrinsic.
+        if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+          return SDValue();
+
+        // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+        // there is no psrai.b
+        switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+        case Intrinsic::x86_sse2_psrai_w:
+        case Intrinsic::x86_sse2_psrai_d:
+          break;
+        default: return SDValue();
+        }
+
+        // Check that the SRA is all signbits.
+        SDValue SraC = Mask.getOperand(2);
+        unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
+        unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+        if ((SraAmt + 1) != EltBits)
+          return SDValue();
+
+        DebugLoc DL = N->getDebugLoc();
+
+        // Now we know we at least have a plendvb with the mask val.  See if
+        // we can form a psignb/w/d.
+        // psign = x.type == y.type == mask.type && y = sub(0, x);
+        X = X.getOperand(0);
+        Y = Y.getOperand(0);
+        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+            X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+          unsigned Opc = 0;
+          switch (EltBits) {
+          case 8: Opc = X86ISD::PSIGNB; break;
+          case 16: Opc = X86ISD::PSIGNW; break;
+          case 32: Opc = X86ISD::PSIGND; break;
+          default: break;
+          }
+          if (Opc) {
+            SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+            return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+          }
+        }
+        // PBLENDVB only available on SSE 4.1
+        if (!Subtarget->hasSSE41())
+          return SDValue();
+
+        X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+        Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+        Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+        Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+        return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+      }
+    }
+  }
+
+  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
     std::swap(N0, N1);
   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
@@ -10600,9 +11719,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     // pair instead.
     if (Subtarget->is64Bit() || F64IsLegal) {
       EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
-      SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
-                                  Ld->getBasePtr(), Ld->getSrcValue(),
-                                  Ld->getSrcValueOffset(), Ld->isVolatile(),
+      SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+                                  Ld->getPointerInfo(), Ld->isVolatile(),
                                   Ld->isNonTemporal(), Ld->getAlignment());
       SDValue NewChain = NewLd.getValue(1);
       if (TokenFactorIndex != -1) {
@@ -10611,7 +11729,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                Ops.size());
       }
       return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
-                          St->getSrcValue(), St->getSrcValueOffset(),
+                          St->getPointerInfo(),
                           St->isVolatile(), St->isNonTemporal(),
                           St->getAlignment());
     }
@@ -10622,11 +11740,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                  DAG.getConstant(4, MVT::i32));
 
     SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
-                               Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                               Ld->getPointerInfo(),
                                Ld->isVolatile(), Ld->isNonTemporal(),
                                Ld->getAlignment());
     SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
-                               Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+                               Ld->getPointerInfo().getWithOffset(4),
                                Ld->isVolatile(), Ld->isNonTemporal(),
                                MinAlign(Ld->getAlignment(), 4));
 
@@ -10643,12 +11761,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                          DAG.getConstant(4, MVT::i32));
 
     SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
-                                St->getSrcValue(), St->getSrcValueOffset(),
+                                St->getPointerInfo(),
                                 St->isVolatile(), St->isNonTemporal(),
                                 St->getAlignment());
     SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
-                                St->getSrcValue(),
-                                St->getSrcValueOffset() + 4,
+                                St->getPointerInfo().getWithOffset(4),
                                 St->isVolatile(),
                                 St->isNonTemporal(),
                                 MinAlign(St->getAlignment(), 4));
@@ -10706,13 +11823,13 @@ static SDValue PerformBTCombine(SDNode *N,
 
 static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
   SDValue Op = N->getOperand(0);
-  if (Op.getOpcode() == ISD::BIT_CONVERT)
+  if (Op.getOpcode() == ISD::BITCAST)
     Op = Op.getOperand(0);
   EVT VT = N->getValueType(0), OpVT = Op.getValueType();
   if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
       VT.getVectorElementType().getSizeInBits() ==
       OpVT.getVectorElementType().getSizeInBits()) {
-    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
   }
   return SDValue();
 }
@@ -10743,19 +11860,106 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+// Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned X86CC = N->getConstantOperandVal(0);
+  SDValue EFLAG = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+  // a zext and produces an all-ones bit which is more useful than 0/1 in some
+  // cases.
+  if (X86CC == X86::COND_B)
+    return DAG.getNode(ISD::AND, DL, MVT::i8,
+                       DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+                                   DAG.getConstant(X86CC, MVT::i8), EFLAG),
+                       DAG.getConstant(1, MVT::i8));
+
+  return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+                                 X86TargetLowering::DAGCombinerInfo &DCI) {
+  // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+  // the result is either zero or one (depending on the input carry bit).
+  // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+  if (X86::isZeroNode(N->getOperand(0)) &&
+      X86::isZeroNode(N->getOperand(1)) &&
+      // We don't have a good way to replace an EFLAGS use, so only do this when
+      // dead right now.
+      SDValue(N, 1).use_empty()) {
+    DebugLoc DL = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+    SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+    SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+                               DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+                                           DAG.getConstant(X86::COND_B,MVT::i8),
+                                           N->getOperand(2)),
+                               DAG.getConstant(1, VT));
+    return DCI.CombineTo(N, Res1, CarryOut);
+  }
+
+  return SDValue();
+}
+
+// fold (add Y, (sete  X, 0)) -> adc  0, Y
+//      (add Y, (setne X, 0)) -> sbb -1, Y
+//      (sub (sete  X, 0), Y) -> sbb  0, Y
+//      (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // Look through ZExts.
+  SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+  if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+    return SDValue();
+
+  SDValue SetCC = Ext.getOperand(0);
+  if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+    return SDValue();
+
+  X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+  if (CC != X86::COND_E && CC != X86::COND_NE)
+    return SDValue();
+
+  SDValue Cmp = SetCC.getOperand(1);
+  if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+      !X86::isZeroNode(Cmp.getOperand(1)) ||
+      !Cmp.getOperand(0).getValueType().isInteger())
+    return SDValue();
+
+  SDValue CmpOp0 = Cmp.getOperand(0);
+  SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+                               DAG.getConstant(1, CmpOp0.getValueType()));
+
+  SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+  if (CC == X86::COND_NE)
+    return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+                       DL, OtherVal.getValueType(), OtherVal,
+                       DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+  return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+                     DL, OtherVal.getValueType(), OtherVal,
+                     DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
   default: break;
   case ISD::EXTRACT_VECTOR_ELT:
-                        return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
+  case ISD::ADD:
+  case ISD::SUB:            return OptimizeConditonalInDecrement(N, DAG);
+  case X86ISD::ADC:         return PerformADCCombine(N, DAG, DCI);
   case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
+  case ISD::AND:            return PerformAndCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:             return PerformOrCombine(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
@@ -10764,8 +11968,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
   case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
+  case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG);
   case X86ISD::SHUFPS:      // Handle all target specific shuffles
   case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
   case X86ISD::PUNPCKHBW:
   case X86ISD::PUNPCKHWD:
   case X86ISD::PUNPCKHDQ:
@@ -10785,7 +11991,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::PSHUFLW:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
   }
 
   return SDValue();
@@ -10892,44 +12098,14 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-static bool LowerToBSwap(CallInst *CI) {
-  // FIXME: this should verify that we are targetting a 486 or better.  If not,
-  // we will turn this bswap into something that will be lowered to logical ops
-  // instead of emitting the bswap asm.  For now, we don't support 486 or lower
-  // so don't worry about this.
-
-  // Verify this is a simple bswap.
-  if (CI->getNumArgOperands() != 1 ||
-      CI->getType() != CI->getArgOperand(0)->getType() ||
-      !CI->getType()->isIntegerTy())
-    return false;
-
-  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-  if (!Ty || Ty->getBitWidth() % 16 != 0)
-    return false;
-
-  // Okay, we can do this xform, do so now.
-  const Type *Tys[] = { Ty };
-  Module *M = CI->getParent()->getParent()->getParent();
-  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-
-  Value *Op = CI->getArgOperand(0);
-  Op = CallInst::Create(Int, Op, CI->getName(), CI);
-
-  CI->replaceAllUsesWith(Op);
-  CI->eraseFromParent();
-  return true;
-}
-
 bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
-  std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
 
   std::string AsmStr = IA->getAsmString();
 
   // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
   SmallVector<StringRef, 4> AsmPieces;
-  SplitString(AsmStr, AsmPieces, "\n");  // ; as separator?
+  SplitString(AsmStr, AsmPieces, ";\n");
 
   switch (AsmPieces.size()) {
   default: return false;
@@ -10938,6 +12114,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
     AsmPieces.clear();
     SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
 
+    // FIXME: this should verify that we are targetting a 486 or better.  If not,
+    // we will turn this bswap into something that will be lowered to logical ops
+    // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+    // so don't worry about this.
     // bswap $0
     if (AsmPieces.size() == 2 &&
         (AsmPieces[0] == "bswap" ||
@@ -10947,7 +12127,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
          AsmPieces[1] == "${0:q}")) {
       // No need to check constraints, nothing other than the equivalent of
       // "=r,0" would be valid here.
-      return LowerToBSwap(CI);
+      const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+      if (!Ty || Ty->getBitWidth() % 16 != 0)
+        return false;
+      return IntrinsicLowering::LowerToByteSwap(CI);
     }
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
     if (CI->getType()->isIntegerTy(16) &&
@@ -10957,35 +12140,76 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
         AsmPieces[2] == "${0:w}" &&
         IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
       AsmPieces.clear();
-      const std::string &Constraints = IA->getConstraintString();
-      SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
+      const std::string &ConstraintsStr = IA->getConstraintString();
+      SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
       std::sort(AsmPieces.begin(), AsmPieces.end());
       if (AsmPieces.size() == 4 &&
           AsmPieces[0] == "~{cc}" &&
           AsmPieces[1] == "~{dirflag}" &&
           AsmPieces[2] == "~{flags}" &&
           AsmPieces[3] == "~{fpsr}") {
-        return LowerToBSwap(CI);
+        const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+        if (!Ty || Ty->getBitWidth() % 16 != 0)
+          return false;
+        return IntrinsicLowering::LowerToByteSwap(CI);
       }
     }
     break;
   case 3:
-    if (CI->getType()->isIntegerTy(64) &&
-        Constraints.size() >= 2 &&
-        Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
-        Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
-      // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+    if (CI->getType()->isIntegerTy(32) &&
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
       SmallVector<StringRef, 4> Words;
-      SplitString(AsmPieces[0], Words, " \t");
-      if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+      SplitString(AsmPieces[0], Words, " \t,");
+      if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+          Words[2] == "${0:w}") {
         Words.clear();
-        SplitString(AsmPieces[1], Words, " \t");
-        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+        SplitString(AsmPieces[1], Words, " \t,");
+        if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+            Words[2] == "$0") {
           Words.clear();
           SplitString(AsmPieces[2], Words, " \t,");
-          if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
-              Words[2] == "%edx") {
-            return LowerToBSwap(CI);
+          if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+              Words[2] == "${0:w}") {
+            AsmPieces.clear();
+            const std::string &ConstraintsStr = IA->getConstraintString();
+            SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+            std::sort(AsmPieces.begin(), AsmPieces.end());
+            if (AsmPieces.size() == 4 &&
+                AsmPieces[0] == "~{cc}" &&
+                AsmPieces[1] == "~{dirflag}" &&
+                AsmPieces[2] == "~{flags}" &&
+                AsmPieces[3] == "~{fpsr}") {
+              const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+              if (!Ty || Ty->getBitWidth() % 16 != 0)
+                return false;
+              return IntrinsicLowering::LowerToByteSwap(CI);
+            }
+          }
+        }
+      }
+    }
+
+    if (CI->getType()->isIntegerTy(64)) {
+      InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+      if (Constraints.size() >= 2 &&
+          Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+          Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+        // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+        SmallVector<StringRef, 4> Words;
+        SplitString(AsmPieces[0], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+          Words.clear();
+          SplitString(AsmPieces[1], Words, " \t");
+          if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+            Words.clear();
+            SplitString(AsmPieces[2], Words, " \t,");
+            if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+                Words[2] == "%edx") {
+              const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+              if (!Ty || Ty->getBitWidth() % 16 != 0)
+                return false;
+              return IntrinsicLowering::LowerToByteSwap(CI);
+            }
           }
         }
       }
@@ -11003,18 +12227,32 @@ X86TargetLowering::ConstraintType
 X86TargetLowering::getConstraintType(const std::string &Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
-    case 'A':
-      return C_Register;
-    case 'f':
-    case 'r':
     case 'R':
-    case 'l':
     case 'q':
     case 'Q':
-    case 'x':
+    case 'f':
+    case 't':
+    case 'u':
     case 'y':
+    case 'x':
     case 'Y':
       return C_RegisterClass;
+    case 'a':
+    case 'b':
+    case 'c':
+    case 'd':
+    case 'S':
+    case 'D':
+    case 'A':
+      return C_Register;
+    case 'I':
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'G':
+    case 'C':
     case 'e':
     case 'Z':
       return C_Other;
@@ -11025,6 +12263,110 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  X86TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+  case 'R':
+  case 'q':
+  case 'Q':
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'S':
+  case 'D':
+  case 'A':
+    if (CallOperandVal->getType()->isIntegerTy())
+      weight = CW_SpecificReg;
+    break;
+  case 'f':
+  case 't':
+  case 'u':
+      if (type->isFloatingPointTy())
+        weight = CW_SpecificReg;
+      break;
+  case 'y':
+      if (type->isX86_MMXTy() && Subtarget->hasMMX())
+        weight = CW_SpecificReg;
+      break;
+  case 'x':
+  case 'Y':
+    if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+      weight = CW_Register;
+    break;
+  case 'I':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+      if (C->getZExtValue() <= 31)
+        weight = CW_Constant;
+    }
+    break;
+  case 'J':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 63)
+        weight = CW_Constant;
+    }
+    break;
+  case 'K':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+        weight = CW_Constant;
+    }
+    break;
+  case 'L':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+        weight = CW_Constant;
+    }
+    break;
+  case 'M':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 3)
+        weight = CW_Constant;
+    }
+    break;
+  case 'N':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 0xff)
+        weight = CW_Constant;
+    }
+    break;
+  case 'G':
+  case 'C':
+    if (dyn_cast<ConstantFP>(CallOperandVal)) {
+      weight = CW_Constant;
+    }
+    break;
+  case 'e':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -0x80000000LL) &&
+          (C->getSExtValue() <= 0x7fffffffLL))
+        weight = CW_Constant;
+    }
+    break;
+  case 'Z':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 0xffffffff)
+        weight = CW_Constant;
+    }
+    break;
+  }
+  return weight;
+}
+
 /// LowerXConstraint - try to replace an X constraint, which matches anything,
 /// with another that has more specific requirements based on the type of the
 /// corresponding operand.
@@ -11033,9 +12375,9 @@ LowerXConstraint(EVT ConstraintVT) const {
   // FP X constraints get lowered to SSE1/2 registers if available, otherwise
   // 'f' like normal targets.
   if (ConstraintVT.isFloatingPoint()) {
-    if (Subtarget->hasSSE2())
+    if (Subtarget->hasXMMInt())
       return "Y";
-    if (Subtarget->hasSSE1())
+    if (Subtarget->hasXMM())
       return "x";
   }
 
@@ -11265,10 +12607,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       if (!Subtarget->hasMMX()) break;
       return std::make_pair(0U, X86::VR64RegisterClass);
     case 'Y':   // SSE_REGS if SSE2 allowed
-      if (!Subtarget->hasSSE2()) break;
+      if (!Subtarget->hasXMMInt()) break;
       // FALL THROUGH.
     case 'x':   // SSE_REGS if SSE1 allowed
-      if (!Subtarget->hasSSE1()) break;
+      if (!Subtarget->hasXMM()) break;
 
       switch (VT.getSimpleVT().SimpleTy) {
       default: break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d2d9b28a0396..419da3742cf8 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -57,35 +57,6 @@ namespace llvm {
       /// corresponds to X86::PSRLDQ.
       FSRL,
 
-      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
-      /// integer source in memory and FP reg result.  This corresponds to the
-      /// X86::FILD*m instructions. It has three inputs (token chain, address,
-      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
-      /// also produces a flag).
-      FILD,
-      FILD_FLAG,
-
-      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
-      /// integer destination in memory and a FP reg source.  This corresponds
-      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
-      /// has two inputs (token chain and address) and two outputs (int value
-      /// and token chain).
-      FP_TO_INT16_IN_MEM,
-      FP_TO_INT32_IN_MEM,
-      FP_TO_INT64_IN_MEM,
-
-      /// FLD - This instruction implements an extending load to FP stack slots.
-      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
-      /// operand, ptr to load from, and a ValueType node indicating the type
-      /// to load to.
-      FLD,
-
-      /// FST - This instruction implements a truncating store to FP stack
-      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
-      /// chain operand, value to store, address, and a ValueType to store it
-      /// as.
-      FST,
-
       /// CALL - These operations represent an abstract X86 call
       /// instruction, which includes a bunch of information.  In particular the
       /// operands of these node are:
@@ -105,7 +76,7 @@ namespace llvm {
       ///
       CALL,
 
-      /// RDTSC_DAG - This operation implements the lowering for 
+      /// RDTSC_DAG - This operation implements the lowering for
       /// readcyclecounter
       RDTSC_DAG,
 
@@ -115,13 +86,13 @@ namespace llvm {
       /// X86 bit-test instructions.
       BT,
 
-      /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag
-      /// operand produced by a CMP instruction.
+      /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+      /// operand, usually produced by a CMP instruction.
       SETCC,
 
       // Same as SETCC except it's materialized with a sbb and the value is all
       // one's or all zero's.
-      SETCC_CARRY,
+      SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 
       /// X86 conditional moves. Operand 0 and operand 1 are the two values
       /// to select from. Operand 2 is the condition code, and operand 3 is the
@@ -157,11 +128,15 @@ namespace llvm {
       /// relative displacements.
       WrapperRIP,
 
-      /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
-      /// Can be used to move a vector value from a MMX register to a XMM
-      /// register.
+      /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+      /// of an XMM vector, with the high word zero filled.
       MOVQ2DQ,
 
+      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+      /// to an MMX vector.  If you think this is too close to the previous
+      /// mnemonic, so do I; blame Intel.
+      MOVDQ2Q,
+
       /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRB.
       PEXTRB,
@@ -184,7 +159,16 @@ namespace llvm {
 
       /// PSHUFB - Shuffle 16 8-bit values within a vector.
       PSHUFB,
-
+      
+      /// PANDN - and with not'd value.
+      PANDN,
+      
+      /// PSIGNB/W/D - Copy integer sign.
+      PSIGNB, PSIGNW, PSIGND, 
+      
+      /// PBLENDVB - Variable blend
+      PBLENDVB,
+      
       /// FMAX, FMIN - Floating point max and min.
       ///
       FMAX, FMIN,
@@ -196,17 +180,14 @@ namespace llvm {
 
       // TLSADDR - Thread Local Storage.
       TLSADDR,
-      
+
       // TLSCALL - Thread Local Storage.  When calling to an OS provided
       // thunk at the address from an earlier relocation.
       TLSCALL,
 
-      // SegmentBaseAddress - The address segment:0
-      SegmentBaseAddress,
-
       // EH_RETURN - Exception Handling helpers.
       EH_RETURN,
-      
+
       /// TC_RETURN - Tail call return.
       ///   operand #0 chain
       ///   operand #1 callee (register or absolute)
@@ -214,37 +195,29 @@ namespace llvm {
       ///   operand #3 optional in flag
       TC_RETURN,
 
-      // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
-      LCMPXCHG_DAG,
-      LCMPXCHG8_DAG,
-
-      // FNSTCW16m - Store FP control world into i16 memory.
-      FNSTCW16m,
-
       // VZEXT_MOVL - Vector move low and zero extend.
       VZEXT_MOVL,
 
-      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
-      VZEXT_LOAD,
-
       // VSHL, VSRL - Vector logical left / right shift.
       VSHL, VSRL,
 
       // CMPPD, CMPPS - Vector double/float comparison.
       // CMPPD, CMPPS - Vector double/float comparison.
       CMPPD, CMPPS,
-      
+
       // PCMP* - Vector integer comparisons.
       PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
       PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
 
-      // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
-      ADD, SUB, SMUL, UMUL,
+      // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+      ADD, SUB, ADC, SBB, SMUL,
       INC, DEC, OR, XOR, AND,
+      
+      UMUL, // LOW, HI, FLAGS = umul LHS, RHS
 
       // MUL_IMM - X86 specific multiply by immediate.
       MUL_IMM,
-      
+
       // PTEST - Vector bitwise comparisons
       PTEST,
 
@@ -291,11 +264,17 @@ namespace llvm {
       // with control flow.
       VASTART_SAVE_XMM_REGS,
 
-      // MINGW_ALLOCA - MingW's __alloca call to do stack probing.
-      MINGW_ALLOCA,
+      // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+      WIN_ALLOCA,
+
+      // Memory barrier
+      MEMBARRIER,
+      MFENCE,
+      SFENCE,
+      LFENCE,
 
-      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 
-      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 
+      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
       // Atomic 64-bit binary operations.
       ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       ATOMSUB64_DAG,
@@ -304,12 +283,49 @@ namespace llvm {
       ATOMAND64_DAG,
       ATOMNAND64_DAG,
       ATOMSWAP64_DAG,
-      
-      // Memory barrier
-      MEMBARRIER,
-      MFENCE,
-      SFENCE,
-      LFENCE
+
+      // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+      LCMPXCHG_DAG,
+      LCMPXCHG8_DAG,
+
+      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+      VZEXT_LOAD,
+
+      // FNSTCW16m - Store FP control world into i16 memory.
+      FNSTCW16m,
+
+      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+      /// integer destination in memory and a FP reg source.  This corresponds
+      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+      /// has two inputs (token chain and address) and two outputs (int value
+      /// and token chain).
+      FP_TO_INT16_IN_MEM,
+      FP_TO_INT32_IN_MEM,
+      FP_TO_INT64_IN_MEM,
+
+      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+      /// integer source in memory and FP reg result.  This corresponds to the
+      /// X86::FILD*m instructions. It has three inputs (token chain, address,
+      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+      /// also produces a flag).
+      FILD,
+      FILD_FLAG,
+
+      /// FLD - This instruction implements an extending load to FP stack slots.
+      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+      /// operand, ptr to load from, and a ValueType node indicating the type
+      /// to load to.
+      FLD,
+
+      /// FST - This instruction implements a truncating store to FP stack
+      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+      /// chain operand, value to store, address, and a ValueType to store it
+      /// as.
+      FST,
+
+      /// VAARG_64 - This instruction grabs the address of the next argument
+      /// from a va_list. (reads and modifies the va_list in memory)
+      VAARG_64
 
       // WARNING: Do not add anything in the end unless you want the node to
       // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -392,6 +408,16 @@ namespace llvm {
     /// specifies a shuffle of elements that is suitable for input to PALIGNR.
     bool isPALIGNRMask(ShuffleVectorSDNode *N);
 
+    /// isVEXTRACTF128Index - Return true if the specified
+    /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+    /// suitable for input to VEXTRACTF128.
+    bool isVEXTRACTF128Index(SDNode *N);
+
+    /// isVINSERTF128Index - Return true if the specified
+    /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+    /// suitable for input to VINSERTF128.
+    bool isVINSERTF128Index(SDNode *N);
+
     /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
     /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
     /// instructions.
@@ -409,6 +435,16 @@ namespace llvm {
     /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
     unsigned getShufflePALIGNRImmediate(SDNode *N);
 
+    /// getExtractVEXTRACTF128Immediate - Return the appropriate
+    /// immediate to extract the specified EXTRACT_SUBVECTOR index
+    /// with VEXTRACTF128 instructions.
+    unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+    /// getInsertVINSERTF128Immediate - Return the appropriate
+    /// immediate to insert at the specified INSERT_SUBVECTOR index
+    /// with VINSERTF128 instructions.
+    unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
     /// isZeroNode - Returns true if Elt is a constant zero or a floating point
     /// constant +0.0.
     bool isZeroNode(SDValue Elt);
@@ -425,16 +461,13 @@ namespace llvm {
   public:
     explicit X86TargetLowering(X86TargetMachine &TM);
 
-    /// getPICBaseSymbol - Return the X86-32 PIC base.
-    MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const;
-    
     virtual unsigned getJumpTableEncoding() const;
 
     virtual const MCExpr *
     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
                               const MachineBasicBlock *MBB, unsigned uid,
                               MCContext &Ctx) const;
-    
+
     /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
     /// jumptable.
     virtual SDValue getPICJumpTableRelocBase(SDValue Table,
@@ -442,7 +475,7 @@ namespace llvm {
     virtual const MCExpr *
     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                  unsigned JTI, MCContext &Ctx) const;
-    
+
     /// getStackPtrReg - Return the stack pointer register we are using: either
     /// ESP or RSP.
     unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -486,7 +519,7 @@ namespace llvm {
     virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                                     SelectionDAG &DAG) const;
 
-    
+
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     /// isTypeDesirableForOp - Return true if the target has native support for
@@ -505,7 +538,7 @@ namespace llvm {
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
 
- 
+
     /// getTargetNodeName - This method returns the name of a target specific
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
@@ -513,26 +546,36 @@ namespace llvm {
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
 
-    /// computeMaskedBitsForTargetNode - Determine which of the bits specified 
-    /// in Mask are known to be either zero or one and return them in the 
+    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+    /// in Mask are known to be either zero or one and return them in the
     /// KnownZero/KnownOne bitsets.
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
                                                 const APInt &Mask,
-                                                APInt &KnownZero, 
+                                                APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
                                                 unsigned Depth = 0) const;
 
+    // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+    // operation that are sign bits.
+    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                     unsigned Depth) const;
+
     virtual bool
     isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
-    
+
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     virtual bool ExpandInlineAsm(CallInst *CI) const;
-    
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
-     
-    std::vector<unsigned> 
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::vector<unsigned>
       getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                         EVT VT) const;
 
@@ -546,15 +589,15 @@ namespace llvm {
                                               char ConstraintLetter,
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
-    
+
     /// getRegForInlineAsmConstraint - Given a physical register constraint
     /// (e.g. {edx}), return the register number and the register class for the
     /// register.  This should only be used for C_Register constraints.  On
     /// error, this returns a register number of 0.
-    std::pair<unsigned, const TargetRegisterClass*> 
+    std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
-    
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
@@ -609,7 +652,7 @@ namespace llvm {
       // shrink long double fp constant since fldt is very slow.
       return !X86ScalarSSEf64 || VT == MVT::f80;
     }
-    
+
     const X86Subtarget* getSubtarget() const {
       return Subtarget;
     }
@@ -650,8 +693,8 @@ namespace llvm {
 
     /// X86StackPtr - X86 physical register used as stack ptr.
     unsigned X86StackPtr;
-   
-    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 
+
+    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     /// floating point ops.
     /// When SSE is available, use it for f32 operations.
     /// When SSE2 is available, use it for f64 operations.
@@ -702,7 +745,6 @@ namespace llvm {
                                 SDValue Chain, bool IsTailCall, bool Is64Bit,
                                 int FPDiff, DebugLoc dl) const;
 
-    CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
                                          SelectionDAG &DAG) const;
 
@@ -719,6 +761,8 @@ namespace llvm {
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -729,7 +773,7 @@ namespace llvm {
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG) const;
-    SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
+    SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -794,6 +838,8 @@ namespace llvm {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
 
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
     virtual bool
       CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -810,6 +856,13 @@ namespace llvm {
     MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
                                 unsigned argNum, bool inMem) const;
 
+    /// Utility functions to emit monitor and mwait instructions. These
+    /// need to make sure that the arguments to the intrinsic are in the
+    /// correct registers.
+    MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+                                   MachineBasicBlock *BB) const;
+    MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
     /// Utility function to emit atomic bitwise operations (and, or, xor).
     /// It takes the bitwise instruction to expand, the associated machine basic
     /// block, and the associated X86 opcodes for reg/reg and reg/imm.
@@ -833,7 +886,7 @@ namespace llvm {
                                                     unsigned immOpcL,
                                                     unsigned immOpcH,
                                                     bool invSrc = false) const;
-    
+
     /// Utility function to emit atomic min and max.  It takes the min/max
     /// instruction to expand, the associated basic block, and the associated
     /// cmov opcode for moving the min or max value.
@@ -841,6 +894,11 @@ namespace llvm {
                                                           MachineBasicBlock *BB,
                                                         unsigned cmovOpc) const;
 
+    // Utility function to emit the low-level va_arg code for X86-64.
+    MachineBasicBlock *EmitVAARG64WithCustomInserter(
+                       MachineInstr *MI,
+                       MachineBasicBlock *MBB) const;
+
     /// Utility function to emit the xmm reg save portion of va_start.
     MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
                                                    MachineInstr *BInstr,
@@ -849,12 +907,15 @@ namespace llvm {
     MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
                                          MachineBasicBlock *BB) const;
 
-    MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
+    MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
                                               MachineBasicBlock *BB) const;
-    
+
     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 
+    MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const;
+
     /// Emit nodes that will be selected as "test Op0,Op0", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 000000000000..45d1c6bc9d29
--- /dev/null
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,77 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: We don't support any intrinsics for these instructions yet.
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, 
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
+      : I<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
+          TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+  // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+  let isAsmParserOnly = 1;
+}
+
+
+let Constraints = "$src1 = $dst" in {
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+  multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+    def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
+    def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
+  }
+}
+
+defm PAVGUSB  : I3DNow_binop_rm<0xBF, "pavgusb">;
+defm PF2ID    : I3DNow_binop_rm<0x1D, "pf2id">;
+defm PFACC    : I3DNow_binop_rm<0xAE, "pfacc">;
+defm PFADD    : I3DNow_binop_rm<0x9E, "pfadd">;
+defm PFCMPEQ  : I3DNow_binop_rm<0xB0, "pfcmpeq">;
+defm PFCMPGE  : I3DNow_binop_rm<0x90, "pfcmpge">;
+defm PFCMPGT  : I3DNow_binop_rm<0xA0, "pfcmpgt">;
+defm PFMAX    : I3DNow_binop_rm<0xA4, "pfmax">;
+defm PFMIN    : I3DNow_binop_rm<0x94, "pfmin">;
+defm PFMUL    : I3DNow_binop_rm<0xB4, "pfmul">;
+defm PFRCP    : I3DNow_binop_rm<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
+defm PFRSQRT  : I3DNow_binop_rm<0x97, "pfrsqrt">;
+defm PFSUB    : I3DNow_binop_rm<0x9A, "pfsub">;
+defm PFSUBR   : I3DNow_binop_rm<0xAA, "pfsubr">;
+defm PI2FD    : I3DNow_binop_rm<0x0D, "pi2fd">;
+defm PMULHRW  : I3DNow_binop_rm<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH  : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+                       "prefetch $addr", []>;
+                       
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in {
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+                       "prefetchw $addr", []>;
+}
+
+// "3DNowA" instructions
+defm PF2IW    : I3DNow_binop_rm<0x1C, "pf2iw">;
+defm PI2FW    : I3DNow_binop_rm<0x0C, "pi2fw">;
+defm PFNACC   : I3DNow_binop_rm<0x8A, "pfnacc">;
+defm PFPNACC  : I3DNow_binop_rm<0x8E, "pfpnacc">;
+defm PSWAPD   : I3DNow_binop_rm<0xBB, "pswapd">;
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
deleted file mode 100644
index 0884b61425e9..000000000000
--- a/lib/Target/X86/X86Instr64bit.td
+++ /dev/null
@@ -1,2250 +0,0 @@
-//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file describes the X86-64 instruction set, defining the instructions,
-// and properties of the instructions which are needed for code generation,
-// machine code emission, and analysis.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-//
-
-// 64-bits but only 32 bits are significant.
-def i64i32imm  : Operand<i64> {
-  let ParserMatchClass = ImmSExti64i32AsmOperand;
-}
-
-// 64-bits but only 32 bits are significant, and those bits are treated as being
-// pc relative.
-def i64i32imm_pcrel : Operand<i64> {
-  let PrintMethod = "print_pcrel_imm";
-  let ParserMatchClass = X86AbsMemAsmOperand;
-}
-
-
-// 64-bits but only 8 bits are significant.
-def i64i8imm   : Operand<i64> {
-  let ParserMatchClass = ImmSExti64i8AsmOperand;
-}
-
-def lea64_32mem : Operand<i32> {
-  let PrintMethod = "printi32mem";
-  let AsmOperandLowerMethod = "lower_lea64_32mem";
-  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
-}
-
-
-// Special i64mem for addresses of load folding tail calls. These are not
-// allowed to use callee-saved registers since they must be scheduled
-// after callee-saved register are popped.
-def i64mem_TC : Operand<i64> {
-  let PrintMethod = "printi64mem";
-  let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// Complex Pattern Definitions.
-//
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
-                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
-                         X86WrapperRIP], []>;
-
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
-                               [tglobaltlsaddr], []>;
-                               
-//===----------------------------------------------------------------------===//
-// Pattern fragments.
-//
-
-def i64immSExt8  : PatLeaf<(i64 immSext8)>;
-
-def GetLo32XForm : SDNodeXForm<imm, [{
-  // Transformation function: get the low 32 bits.
-  return getI32Imm((unsigned)N->getZExtValue());
-}]>;
-
-def i64immSExt32  : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-
-
-def i64immZExt32  : PatLeaf<(i64 imm), [{
-  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-  // unsignedsign extended field.
-  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
-
-def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
-def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi64i1  : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
-def zextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
-def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
-def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
-def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
-
-//===----------------------------------------------------------------------===//
-// Instruction list...
-//
-
-// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [RSP, EFLAGS], Uses = [RSP] in {
-def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-                           "#ADJCALLSTACKDOWN",
-                           [(X86callseq_start timm:$amt)]>,
-                          Requires<[In64BitMode]>;
-def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
-                           "#ADJCALLSTACKUP",
-                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
-                          Requires<[In64BitMode]>;
-}
-
-// Interrupt Instructions
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>;
-
-//===----------------------------------------------------------------------===//
-//  Call Instructions...
-//
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. RSP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [RSP] in {
-      
-    // NOTE: this pattern doesn't match "X86call imm", because we do not know
-    // that the offset between an arbitrary immediate and the call will fit in
-    // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
-                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                          "call{q}\t$dst", []>,
-                        Requires<[In64BitMode, NotWin64]>;
-    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
-                        Requires<[NotWin64]>;
-    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
-                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
-                        Requires<[NotWin64]>;
-                        
-    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
-                         "lcall{q}\t{*}$dst", []>;
-  }
-
-  // FIXME: We need to teach codegen about single list of call-clobbered 
-  // registers.
-let isCall = 1, isCodeGenOnly = 1 in
-  // All calls clobber the non-callee saved registers. RSP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
-      Uses = [RSP] in {
-    def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
-                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                             "call\t$dst", []>,
-                           Requires<[IsWin64]>;
-    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                             "call\t{*}$dst",
-                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
-    def WINCALL64m       : I<0xFF, MRM2m, (outs), 
-                             (ins i64mem:$dst, variable_ops), "call\t{*}$dst",
-                             [(X86call (loadi64 addr:$dst))]>, 
-                           Requires<[IsWin64]>;
-  }
-
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
-  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [RSP] in {
-  def TCRETURNdi64 : I<0, Pseudo, (outs),
-                         (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
-                       "#TC_RETURN $dst $offset", []>;
-  def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
-                                           variable_ops),
-                       "#TC_RETURN $dst $offset", []>;
-  let mayLoad = 1 in
-  def TCRETURNmi64 : I<0, Pseudo, (outs), 
-                       (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
-                       "#TC_RETURN $dst $offset", []>;
-
-  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
-                                      (ins i64i32imm_pcrel:$dst, variable_ops),
-                   "jmp\t$dst  # TAILCALL", []>;
-  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
-
-  let mayLoad = 1 in
-  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
-}
-
-// Branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-  def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), 
-                       "jmp{q}\t$dst", []>;
-  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
-                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
-  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
-                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
-  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
-                     "ret\t#eh_return, addr: $addr",
-                     [(X86ehret GR64:$addr)]>;
-
-}
-
-//===----------------------------------------------------------------------===//
-//  Miscellaneous Instructions...
-//
-
-def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                    "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                    "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-
-let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
-def LEAVE64  : I<0xC9, RawFrm,
-                 (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
-def POP64r   : I<0x58, AddRegFrm,
-                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
-}
-let mayStore = 1 in {
-def PUSH64r  : I<0x50, AddRegFrm,
-                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
-}
-}
-
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
-                     "push{q}\t$imm", []>;
-def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
-                      "push{q}\t$imm", []>;
-def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
-                      "push{q}\t$imm", []>;
-}
-
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
-def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
-               Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
-def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
-                 Requires<[In64BitMode]>;
-
-def LEA64_32r : I<0x8D, MRMSrcMem,
-                  (outs GR32:$dst), (ins lea64_32mem:$src),
-                  "lea{l}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
-
-let isReMaterializable = 1 in
-def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                  "lea{q}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR64:$dst, lea64addr:$src)]>;
-
-let Constraints = "$src = $dst" in
-def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
-                  "bswap{q}\t$dst", 
-                  [(set GR64:$dst, (bswap GR64:$src))]>, TB;
-
-// Bit scan instructions.
-let Defs = [EFLAGS] in {
-def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                  "bsf{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
-def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                  "bsf{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
-
-def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                  "bsr{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
-def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                  "bsr{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
-} // Defs = [EFLAGS]
-
-// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
-                   [(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
-                   [(X86rep_stos i64)]>, REP;
-
-let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
-def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
-def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
-
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
-
-// Fast system-call instructions
-def SYSEXIT64 : RI<0x35, RawFrm,
-                   (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
-
-//===----------------------------------------------------------------------===//
-//  Move Instructions...
-//
-
-let neverHasSideEffects = 1 in
-def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1  in {
-def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
-                    "movabs{q}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, imm:$src)]>;
-def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
-                      "mov{q}\t{$src, $dst|$dst, $src}",
-                      [(set GR64:$dst, i64immSExt32:$src)]>;
-}
-
-// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
-// movabsq.
-let isAsmParserOnly = 1 in {
-def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
-                    "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let isCodeGenOnly = 1 in {
-def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                     "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}",
-                 [(set GR64:$dst, (load addr:$src))]>;
-
-def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}",
-                 [(store GR64:$src, addr:$dst)]>;
-def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                      "mov{q}\t{$src, $dst|$dst, $src}",
-                      [(store i64immSExt32:$src, addr:$dst)]>;
-
-/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
-    canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}",
-                []>;
-
-let mayStore = 1 in
-def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}",
-                []>;
-}
-
-// FIXME: These definitions are utterly broken
-// Just leave them commented out for now because they're useless outside
-// of the large code model, and most compilers won't generate the instructions
-// in question.
-/*
-def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
-                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
-                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
-                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
-                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-*/
-
-// Moves to and from segment registers
-def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
-                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-// Moves to and from debug registers
-def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
-                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Sign/Zero extenders
-
-// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
-// operand, which makes it a rare instruction with an 8-bit register
-// operand that can never access an h register. If support for h registers
-// were generalized, this would require a special register class.
-def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR8:$src))]>, TB;
-def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
-def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR16:$src))]>, TB;
-def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
-def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
-                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR32:$src))]>;
-def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
-
-// movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Use movzbl instead of movzbq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                   "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
-def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
-// Use movzwl instead of movzwq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                   "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
-def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
-
-// There's no movzlq instruction, but movl can be used for this purpose, using
-// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
-// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
-// zero-extension, however this isn't possible when the 32-bit value is
-// defined by a truncate or is copied from something where the high bits aren't
-// necessarily all zero. In such cases, we fall back to these explicit zext
-// instructions.
-def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                    "", [(set GR64:$dst, (zext GR32:$src))]>;
-def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. And x86's cmov doesn't do anything if the
-// condition is false. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-def def32 : PatLeaf<(i32 GR32:$src), [{
-  return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg &&
-         N->getOpcode() != X86ISD::CMOV;
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)),
-          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-let neverHasSideEffects = 1 in {
-  let Defs = [RAX], Uses = [EAX] in
-  def CDQE : RI<0x98, RawFrm, (outs), (ins),
-               "{cltq|cdqe}", []>;     // RAX = signext(EAX)
-
-  let Defs = [RAX,RDX], Uses = [RAX] in
-  def CQO  : RI<0x99, RawFrm, (outs), (ins),
-                "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
-}
-
-//===----------------------------------------------------------------------===//
-//  Arithmetic Instructions...
-//
-
-let Defs = [EFLAGS] in {
-
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
-                     "add{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isConvertibleToThreeAddress = 1 in {
-let isCommutable = 1 in
-// Register-Register Addition
-def ADD64rr    : RI<0x01, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "add{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86add_flag GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def ADD64rr_alt  : RI<0x03, MRMSrcReg, (outs GR64:$dst), 
-                       (ins GR64:$src1, GR64:$src2),
-                       "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Integer Addition
-def ADD64ri8  : RIi8<0x83, MRM0r, (outs GR64:$dst), 
-                     (ins GR64:$src1, i64i8imm:$src2),
-                     "add{q}\t{$src2, $dst|$dst, $src2}",
-                     [(set GR64:$dst, EFLAGS,
-                           (X86add_flag GR64:$src1, i64immSExt8:$src2))]>;
-def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), 
-                      (ins GR64:$src1, i64i32imm:$src2),
-                      "add{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86add_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isConvertibleToThreeAddress
-
-// Register-Memory Addition
-def ADD64rm     : RI<0x03, MRMSrcMem, (outs GR64:$dst), 
-                     (ins GR64:$src1, i64mem:$src2),
-                     "add{q}\t{$src2, $dst|$dst, $src2}",
-                     [(set GR64:$dst, EFLAGS,
-                           (X86add_flag GR64:$src1, (load addr:$src2)))]>;
-
-} // Constraints = "$src1 = $dst"
-
-// Memory-Register Addition
-def ADD64mr  : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                  "add{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (add (load addr:$dst), GR64:$src2), addr:$dst),
-                   (implicit EFLAGS)]>;
-def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
-                    "add{q}\t{$src2, $dst|$dst, $src2}",
-                [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst),
-                 (implicit EFLAGS)]>;
-def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
-                      "add{q}\t{$src2, $dst|$dst, $src2}",
-               [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst),
-                (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
-                     "adc{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def ADC64rr  : RI<0x11, MRMDestReg, (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2),
-                  "adc{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                    "adc{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC64rm  : RI<0x13, MRMSrcMem , (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2),
-                  "adc{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
-
-def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "adc{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
-def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), 
-                      (ins GR64:$src1, i64i32imm:$src2),
-                      "adc{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def ADC64mr  : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                  "adc{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
-                    "adc{q}\t{$src2, $dst|$dst, $src2}",
-                 [(store (adde (load addr:$dst), i64immSExt8:$src2), 
-                  addr:$dst)]>;
-def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
-                      "adc{q}\t{$src2, $dst|$dst, $src2}",
-                 [(store (adde (load addr:$dst), i64immSExt32:$src2), 
-                  addr:$dst)]>;
-} // Uses = [EFLAGS]
-
-let Constraints = "$src1 = $dst" in {
-// Register-Register Subtraction
-def SUB64rr  : RI<0x29, MRMDestReg, (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2),
-                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86sub_flag GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "sub{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB64rm  : RI<0x2B, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2),
-                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS, 
-                        (X86sub_flag GR64:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
-                                 (ins GR64:$src1, i64i8imm:$src2),
-                    "sub{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>;
-def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
-                                   (ins GR64:$src1, i64i32imm:$src2),
-                      "sub{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
-                     "sub{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Memory-Register Subtraction
-def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                  "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
-                   (implicit EFLAGS)]>;
-
-// Memory-Integer Subtraction
-def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
-                    "sub{q}\t{$src2, $dst|$dst, $src2}",
-                    [(store (sub (load addr:$dst), i64immSExt8:$src2),
-                            addr:$dst),
-                     (implicit EFLAGS)]>;
-def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
-                      "sub{q}\t{$src2, $dst|$dst, $src2}",
-                      [(store (sub (load addr:$dst), i64immSExt32:$src2),
-                              addr:$dst),
-                       (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-def SBB64rr    : RI<0x19, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-                     
-def SBB64rm  : RI<0x1B, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2),
-                  "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
-
-def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
-def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), 
-                      (ins GR64:$src1, i64i32imm:$src2),
-                      "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
-                     "sbb{q}\t{$src, %rax|%rax, $src}", []>;
-
-def SBB64mr  : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                  "sbb{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
-                    "sbb{q}\t{$src2, $dst|$dst, $src2}",
-               [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
-def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2), 
-                      "sbb{q}\t{$src2, $dst|$dst, $src2}",
-              [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-// Unsigned multiplication
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in {
-def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
-                "mul{q}\t$src", []>;         // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
-
-// Signed multiplication
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
-                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
-}
-
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-// Register-Register Signed Integer Multiplication
-def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
-                                   (ins GR64:$src1, GR64:$src2),
-                  "imul{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
-
-// Register-Memory Signed Integer Multiplication
-def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
-                                   (ins GR64:$src1, i64mem:$src2),
-                  "imul{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Suprisingly enough, these are not two address instructions!
-
-// Register-Integer Signed Integer Multiplication
-def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
-                      (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
-def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
-                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       [(set GR64:$dst, EFLAGS,
-                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
-
-// Memory-Integer Signed Integer Multiplication
-def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
-                      (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
-                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1),
-                                          i64immSExt8:$src2))]>;
-def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
-                        (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
-                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                        [(set GR64:$dst, EFLAGS,
-                              (X86smul_flag (load addr:$src1),
-                                            i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Unsigned division / remainder
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in {
-// RDX:RAX/r64 = RAX,RDX
-def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
-                "div{q}\t$src", []>;
-// Signed division / remainder
-// RDX:RAX/r64 = RAX,RDX
-def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
-                "idiv{q}\t$src", []>;
-let mayLoad = 1 in {
-// RDX:RAX/[mem64] = RAX,RDX
-def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
-                "div{q}\t$src", []>;
-// RDX:RAX/[mem64] = RAX,RDX
-def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
-                "idiv{q}\t$src", []>;
-}
-}
-
-// Unary instructions
-let Defs = [EFLAGS], CodeSize = 2 in {
-let Constraints = "$src = $dst" in
-def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
-                [(set GR64:$dst, (ineg GR64:$src)),
-                 (implicit EFLAGS)]>;
-def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
-                [(store (ineg (loadi64 addr:$dst)), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
-def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
-                [(store (add (loadi64 addr:$dst), 1), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
-def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
-                [(store (add (loadi64 addr:$dst), -1), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
-// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), 
-                  "inc{w}\t$dst",
-                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
-                OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), 
-                  "inc{l}\t$dst",
-                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
-                Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), 
-                  "dec{w}\t$dst",
-                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
-                OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), 
-                  "dec{l}\t$dst",
-                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
-                Requires<[In64BitMode]>;
-} // Constraints = "$src = $dst", isConvertibleToThreeAddress
-
-// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
-// how to unfold them.
-def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
-                  [(store (add (loadi16 addr:$dst), 1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                OpSize, Requires<[In64BitMode]>;
-def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
-                  [(store (add (loadi32 addr:$dst), 1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                Requires<[In64BitMode]>;
-def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
-                  [(store (add (loadi16 addr:$dst), -1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                OpSize, Requires<[In64BitMode]>;
-def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
-                  [(store (add (loadi32 addr:$dst), -1), addr:$dst),
-                    (implicit EFLAGS)]>,
-                Requires<[In64BitMode]>;
-} // Defs = [EFLAGS], CodeSize
-
-
-let Defs = [EFLAGS] in {
-// Shift instructions
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                  "shl{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (shl GR64:$src1, CL))]>;
-let isConvertibleToThreeAddress = 1 in   // Can transform into LEA.
-def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i8imm:$src2),
-                    "shl{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shl{q}\t$dst", []>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
-                  "shl{q}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q}\t$dst",
-                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
-                  "shr{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (srl GR64:$src1, CL))]>;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
-                  "shr{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
-def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shr{q}\t$dst",
-                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
-                  "shr{q}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q}\t$dst",
-                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
-                 "sar{q}\t{%cl, $dst|$dst, %CL}",
-                 [(set GR64:$dst, (sra GR64:$src1, CL))]>;
-def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
-                    (ins GR64:$src1, i8imm:$src2),
-                    "sar{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
-def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
-                 "sar{q}\t$dst",
-                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src = $dst"
-
-let Uses = [CL] in
-def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
-                 "sar{q}\t{%cl, $dst|$dst, %CL}",
-                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "sar{q}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
-                  "sar{q}\t$dst",
-                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Rotate instructions
-
-let Constraints = "$src = $dst" in {
-def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
-                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
-                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = "$src = $dst"
-
-def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
-                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
-                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rol{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
-def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i8imm:$src2),
-                    "rol{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
-def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rol{q}\t$dst",
-                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
-                   "rol{q}\t{%cl, $dst|$dst, %CL}",
-                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "rol{q}\t{$src, $dst|$dst, $src}",
-                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
-                 "rol{q}\t$dst",
-               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
-                  "ror{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
-def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), 
-                    (ins GR64:$src1, i8imm:$src2),
-                    "ror{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
-def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
-                  "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
-                  "ror{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
-                    "ror{q}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
-                 "ror{q}\t$dst",
-               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Double shift instructions (generalizations of rotate)
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
-def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, 
-                    TB;
-def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, 
-                    TB;
-}
-
-let isCommutable = 1 in {  // FIXME: Update X86InstrInfo::commuteInstruction
-def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
-                      (outs GR64:$dst), 
-                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
-                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
-                                       (i8 imm:$src3)))]>,
-                 TB;
-def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
-                      (outs GR64:$dst), 
-                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
-                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
-                                       (i8 imm:$src3)))]>,
-                 TB;
-} // isCommutable
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in {
-def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
-                      addr:$dst)]>, TB;
-def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
-                      addr:$dst)]>, TB;
-}
-def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
-                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
-                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
-                 TB;
-def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
-                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
-                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
-                 TB;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-//  Logical Instructions...
-//
-
-let Constraints = "$src = $dst" , AddedComplexity = 15 in
-def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
-                [(set GR64:$dst, (not GR64:$src))]>;
-def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
-                [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
-
-let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
-                     "and{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def AND64rr  : RI<0x21, MRMDestReg, 
-                  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "and{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86and_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                     "and{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def AND64rm  : RI<0x23, MRMSrcMem,
-                  (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                  "and{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86and_flag GR64:$src1, (load addr:$src2)))]>;
-def AND64ri8 : RIi8<0x83, MRM4r, 
-                    (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
-                    "and{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86and_flag GR64:$src1, i64immSExt8:$src2))]>;
-def AND64ri32  : RIi32<0x81, MRM4r, 
-                       (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                       "and{q}\t{$src2, $dst|$dst, $src2}",
-                       [(set GR64:$dst, EFLAGS,
-                             (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def AND64mr  : RI<0x21, MRMDestMem,
-                  (outs), (ins i64mem:$dst, GR64:$src),
-                  "and{q}\t{$src, $dst|$dst, $src}",
-                  [(store (and (load addr:$dst), GR64:$src), addr:$dst),
-                   (implicit EFLAGS)]>;
-def AND64mi8 : RIi8<0x83, MRM4m,
-                    (outs), (ins i64mem:$dst, i64i8imm :$src),
-                    "and{q}\t{$src, $dst|$dst, $src}",
-                 [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-def AND64mi32  : RIi32<0x81, MRM4m,
-                       (outs), (ins i64mem:$dst, i64i32imm:$src),
-                       "and{q}\t{$src, $dst|$dst, $src}",
-             [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
-              (implicit EFLAGS)]>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def OR64rr   : RI<0x09, MRMDestReg, (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2),
-                  "or{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86or_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), 
-                    (ins GR64:$src1, GR64:$src2),
-                    "or{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def OR64rm   : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
-                  (ins GR64:$src1, i64mem:$src2),
-                  "or{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86or_flag GR64:$src1, (load addr:$src2)))]>;
-def OR64ri8  : RIi8<0x83, MRM1r, (outs GR64:$dst),
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "or{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, EFLAGS,
-                         (X86or_flag GR64:$src1, i64immSExt8:$src2))]>;
-def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
-                     (ins GR64:$src1, i64i32imm:$src2),
-                     "or{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                "or{q}\t{$src, $dst|$dst, $src}",
-                [(store (or (load addr:$dst), GR64:$src), addr:$dst),
-                 (implicit EFLAGS)]>;
-def OR64mi8  : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
-                    "or{q}\t{$src, $dst|$dst, $src}",
-                  [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst),
-                   (implicit EFLAGS)]>;
-def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                     "or{q}\t{$src, $dst|$dst, $src}",
-              [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
-               (implicit EFLAGS)]>;
-
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
-                    "or{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def XOR64rr  : RI<0x31, MRMDestReg,  (outs GR64:$dst), 
-                  (ins GR64:$src1, GR64:$src2), 
-                  "xor{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86xor_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), 
-                     (ins GR64:$src1, GR64:$src2),
-                    "xor{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def XOR64rm  : RI<0x33, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$src1, i64mem:$src2), 
-                  "xor{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, EFLAGS,
-                        (X86xor_flag GR64:$src1, (load addr:$src2)))]>;
-def XOR64ri8 : RIi8<0x83, MRM6r,  (outs GR64:$dst), 
-                    (ins GR64:$src1, i64i8imm:$src2),
-                    "xor{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                          (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>;
-def XOR64ri32 : RIi32<0x81, MRM6r, 
-                      (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), 
-                      "xor{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, EFLAGS,
-                            (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def XOR64mr  : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                  "xor{q}\t{$src, $dst|$dst, $src}",
-                  [(store (xor (load addr:$dst), GR64:$src), addr:$dst),
-                   (implicit EFLAGS)]>;
-def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
-                    "xor{q}\t{$src, $dst|$dst, $src}",
-                 [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
-                      "xor{q}\t{$src, $dst|$dst, $src}",
-             [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
-              (implicit EFLAGS)]>;
-              
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
-                     "xor{q}\t{$src, %rax|%rax, $src}", []>;
-
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-//  Comparison Instructions...
-//
-
-// Integer comparison
-let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
-                      "test{q}\t{$src, %rax|%rax, $src}", []>;
-let isCommutable = 1 in
-def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                  "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>;
-def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                  "test{q}\t{$src2, $src1|$src1, $src2}",
-                  [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)),
-                    0))]>;
-def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
-                                        (ins GR64:$src1, i64i32imm:$src2),
-                       "test{q}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2),
-                      0))]>;
-def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
-                                        (ins i64mem:$src1, i64i32imm:$src2),
-                       "test{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1),
-                                           i64immSExt32:$src2), 0))]>;
-
-
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
-                     "cmp{q}\t{$src, %rax|%rax, $src}", []>;
-def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
-                      "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
-}
-
-def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>;
-def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
-                 "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                 [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>;
-def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>;
-def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
-                      "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>;
-def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
-                                          i64immSExt8:$src2))]>;
-def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
-                                       (ins i64mem:$src1, i64i32imm:$src2),
-                      "cmp{q}\t{$src2, $src1|$src1, $src2}",
-                      [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
-                                            i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Bit tests.
-// TODO: BTC, BTR, and BTS
-let Defs = [EFLAGS] in {
-def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-               "bt{q}\t{$src2, $src1|$src1, $src2}",
-               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
-
-// Unlike with the register+register form, the memory+register form of the
-// bt instruction does not ignore the high bits of the index. From ISel's
-// perspective, this is pretty bizarre. Disable these instructions for now.
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-               "bt{q}\t{$src2, $src1|$src1, $src2}",
-//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
-//                (implicit EFLAGS)]
-                []
-                >, TB;
-
-def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
-def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
-                                     i64immSExt8:$src2))]>, TB;
-
-def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
-                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
-                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
-                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // Defs = [EFLAGS]
-
-// Conditional moves
-let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
-def CMOVB64rr : RI<0x42, MRMSrcReg,       // if <u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovb{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rr: RI<0x43, MRMSrcReg,       // if >=u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovae{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rr : RI<0x44, MRMSrcReg,       // if ==, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmove{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rr: RI<0x45, MRMSrcReg,       // if !=, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovne{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rr: RI<0x46, MRMSrcReg,       // if <=u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rr : RI<0x47, MRMSrcReg,       // if >u, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmova{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rr : RI<0x4C, MRMSrcReg,       // if <s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovl{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rr: RI<0x4D, MRMSrcReg,       // if >=s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovge{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rr: RI<0x4E, MRMSrcReg,       // if <=s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovle{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rr : RI<0x4F, MRMSrcReg,       // if >s, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovg{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rr : RI<0x48, MRMSrcReg,       // if signed, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovs{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rr: RI<0x49, MRMSrcReg,       // if !signed, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovns{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rr : RI<0x4A, MRMSrcReg,       // if parity, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovp{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rr : RI<0x4B, MRMSrcReg,       // if !parity, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rr : RI<0x40, MRMSrcReg,       // if overflow, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovo{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                    X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rr : RI<0x41, MRMSrcReg,       // if !overflow, GR64 = GR64
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "cmovno{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
-                                     X86_COND_NO, EFLAGS))]>, TB;
-} // isCommutable = 1
-
-def CMOVB64rm : RI<0x42, MRMSrcMem,       // if <u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovb{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rm: RI<0x43, MRMSrcMem,       // if >=u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovae{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rm : RI<0x44, MRMSrcMem,       // if ==, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmove{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rm: RI<0x45, MRMSrcMem,       // if !=, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovne{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rm: RI<0x46, MRMSrcMem,       // if <=u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rm : RI<0x47, MRMSrcMem,       // if >u, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmova{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rm : RI<0x4C, MRMSrcMem,       // if <s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovl{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rm: RI<0x4D, MRMSrcMem,       // if >=s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovge{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rm: RI<0x4E, MRMSrcMem,       // if <=s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovle{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rm : RI<0x4F, MRMSrcMem,       // if >s, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovg{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rm : RI<0x48, MRMSrcMem,       // if signed, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovs{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rm: RI<0x49, MRMSrcMem,       // if !signed, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovns{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rm : RI<0x4A, MRMSrcMem,       // if parity, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovp{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rm : RI<0x4B, MRMSrcMem,       // if !parity, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rm : RI<0x40, MRMSrcMem,       // if overflow, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovo{q}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rm : RI<0x41, MRMSrcMem,       // if !overflow, GR64 = [mem64]
-                   (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-                   "cmovno{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                     X86_COND_NO, EFLAGS))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Use sbb to materialize carry flag into a GPR.
-// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
-def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-
-def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C64r)>;
-
-//===----------------------------------------------------------------------===//
-// Descriptor-table support instructions
-
-// LLDT is not interpreted specially in 64-bit mode because there is no sign
-//   extension.
-def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
-                 "sldt{q}\t$dst", []>, TB;
-def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
-                 "sldt{q}\t$dst", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
-// smaller encoding, but doing so at isel time interferes with rematerialization
-// in the current register allocator. For now, this is rewritten when the
-// instruction is lowered to an MCInst.
-// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
-// when we have a better way to specify isel priority.
-let Defs = [EFLAGS],
-    AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)]>;
-
-// Materialize i64 constant where top 32-bits are zero. This could theoretically
-// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
-// that would make it more difficult to rematerialize.
-let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
-                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//===----------------------------------------------------------------------===//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. RSP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
-                   ".byte\t0x66; "
-                   "leaq\t$sym(%rip), %rdi; "
-                   ".word\t0x6666; "
-                   "rex64; "
-                   "call\t__tls_get_addr@PLT",
-                  [(X86tlsaddr tls64addr:$sym)]>,
-                  Requires<[In64BitMode]>;
-
-// Darwin TLS Support
-// For x86_64, the address of the thunk is passed in %rdi, on return 
-// the address of the variable is in %rax.  All other registers are preserved.
-let Defs = [RAX],
-    Uses = [RDI],
-    usesCustomInserter = 1 in
-def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
-                  "# TLSCall_64",
-                  [(X86TLSCall addr:$sym)]>,
-                  Requires<[In64BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "movq\t%gs:$src, $dst",
-                 [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "movq\t%fs:$src, $dst",
-                 [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// Atomic Instructions
-//===----------------------------------------------------------------------===//
-
-// TODO: Get this to fold the constant into the instruction.           
-let hasSideEffects = 1, Defs = [ESP] in
-def Int_MemBarrierNoSSE64  : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
-                           "lock\n\t"
-                           "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
-                           [(X86MemBarrierNoSSE GR64:$zero)]>,
-													 Requires<[In64BitMode]>, LOCK;
-
-let Defs = [RAX, EFLAGS], Uses = [RAX] in {
-def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
-               "lock\n\t"
-               "cmpxchgq\t$swap,$ptr",
-               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
-}
-
-let Constraints = "$val = $dst" in {
-let Defs = [EFLAGS] in
-def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
-               "lock\n\t"
-               "xadd\t$val, $ptr",
-               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
-                TB, LOCK;
-
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), 
-                  (ins GR64:$val,i64mem:$ptr),
-                  "xchg{q}\t{$val, $ptr|$ptr, $val}", 
-                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-
-def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
-                  "xchg{q}\t{$val, $src|$src, $val}", []>;
-}
-
-def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-                   
-def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-                      
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
-def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
-                    "cmpxchg16b\t$dst", []>, TB;
-
-def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
-                  "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                      "lock\n\t"
-                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
-                                      (ins i64mem:$dst, i64i8imm :$src2),
-                    "lock\n\t"
-                    "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
-                                        (ins i64mem:$dst, i64i32imm :$src2),
-                      "lock\n\t"
-                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
-                      "lock\n\t"
-                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
-                                      (ins i64mem:$dst, i64i8imm :$src2), 
-                      "lock\n\t"
-                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
-                                        (ins i64mem:$dst, i64i32imm:$src2),
-                      "lock\n\t"
-                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
-                     "lock\n\t"
-                     "inc{q}\t$dst", []>, LOCK;
-def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
-                      "lock\n\t"
-                      "dec{q}\t$dst", []>, LOCK;
-}
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomInserter = 1 in {
-def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMAND64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
-def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMOR64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
-def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMXOR64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
-def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMNAND64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
-def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
-               "#ATOMMIN64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
-def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMMAX64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMUMIN64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMUMAX64 PSEUDO!", 
-               [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
-}
-
-// Segmentation support instructions
-
-// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
-def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), 
-                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
-                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-                 
-def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; 
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
-
-def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
-                 "push{q}\t%fs", []>, TB;
-def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
-                 "push{q}\t%gs", []>, TB;
-
-def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
-                "pop{q}\t%fs", []>, TB;
-def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
-                "pop{q}\t%gs", []>, TB;
-                 
-def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
-                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
-                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
-                 "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Specialized register support
-
-// no m form encodable; use SMSW16m
-def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), 
-                 "smsw{q}\t$dst", []>, TB;
-
-// String manipulation instructions
-
-def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
-// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
-//  'movabs' predicate should handle this sort of thing.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
-
-// In static codegen with small code model, we can get the address of a label
-// into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri64i32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
-
-// In kernel code model, we can get the address of a label
-// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
-
-// If we have small model and -static mode, it is safe to store global addresses
-// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
-// for MOV64mi32 should handle this sort of thing.
-def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tconstpool:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tjumptable:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, texternalsym:$src)>,
-          Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
-          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
-          Requires<[NearData, IsStatic]>;
-
-// Calls
-// Direct PC relative function call for small code model. 32-bit displacement
-// sign extended to 64-bit.
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
-
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
-          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
-
-// tailcall stuff
-def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
-          (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
-          (TCRETURNmi64 addr:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
-          (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
-	  Requires<[In64BitMode]>;
-
-// tls has some funny stuff here...
-// This corresponds to movabs $foo@tpoff, %rax
-def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
-          (MOV64ri tglobaltlsaddr :$dst)>;
-// This corresponds to add $foo@tpoff, %rax
-def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
-          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
-          (MOV64rm tglobaltlsaddr :$dst)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR64:$src1, 0),
-          (TEST64rr GR64:$src1, GR64:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
-          (CMOVAE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
-          (CMOVB64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
-          (CMOVNE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
-          (CMOVE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
-          (CMOVA64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
-          (CMOVBE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
-          (CMOVGE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
-          (CMOVL64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
-          (CMOVG64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
-          (CMOVLE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
-          (CMOVNP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
-          (CMOVP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
-          (CMOVNS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
-          (CMOVS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
-          (CMOVNO64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
-          (CMOVO64rm GR64:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-
-// extload
-// When extloading from 16-bit and smaller memory locations into 64-bit 
-// registers, use zero-extending loads so that the entire 64-bit register is 
-// defined, avoiding partial-register updates.
-def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
-def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
-def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
-// For other extloads, use subregs, since the high contents of the register are
-// defined after an extload.
-def : Pat<(extloadi64i32 addr:$src),
-          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
-                         sub_32bit)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
-def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
-def : Pat<(i64 (anyext GR32:$src)),
-          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR64:$src1, 128),
-          (SUB64ri8 GR64:$src1, -128)>;
-def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
-          (SUB64mi8 addr:$dst, -128)>;
-
-// The same trick applies for 32-bit immediate fields in 64-bit
-// instructions.
-def : Pat<(add GR64:$src1, 0x0000000080000000),
-          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
-def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
-          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
-
-// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
-// has an immediate with at least 32 bits of leading zeros, to avoid needing to
-// materialize that immediate in a register first.
-def : Pat<(and GR64:$src, i64immZExt32:$imm),
-          (SUBREG_TO_REG
-            (i64 0),
-            (AND32ri
-              (EXTRACT_SUBREG GR64:$src, sub_32bit),
-              (i32 (GetLo32XForm imm:$imm))),
-            sub_32bit)>;
-
-// r & (2^32-1) ==> movz
-def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
-          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-// r & (2^16-1) ==> movz
-def : Pat<(and GR64:$src, 0xffff),
-          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR64:$src, 0xff),
-          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
-           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
-      Requires<[In64BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
-           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
-      Requires<[In64BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR64:$src, i32),
-          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-def : Pat<(sext_inreg GR64:$src, i16),
-          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR64:$src, i8),
-          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
-      Requires<[In64BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
-      Requires<[In64BitMode]>;
-
-// trunc patterns
-def : Pat<(i32 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
-def : Pat<(i16 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
-          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
-      Requires<[In64BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
-          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
-      Requires<[In64BitMode]>;
-
-// h-register tricks.
-// For now, be conservative on x86-64 and use an h-register extract only if the
-// value is immediately zero-extended or stored, which are somewhat common
-// cases. This uses a bunch of code to prevent a register requiring a REX prefix
-// from being allocated in the same instruction as the h register, as there's
-// currently no way to describe this requirement to the register allocator.
-
-// h-register extract and zero-extend.
-def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
-          (SUBREG_TO_REG
-            (i64 0),
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                              sub_8bit_hi)),
-            sub_32bit)>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
-          (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
-          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                                   GR32_ABCD)),
-                                             sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
-          (EXTRACT_SUBREG
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_16bit)>,
-      Requires<[In64BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32_NOREXrr8
-            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
-          (SUBREG_TO_REG
-            (i64 0),
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_32bit)>;
-def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
-          (SUBREG_TO_REG
-            (i64 0),
-            (MOVZX32_NOREXrr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_32bit)>;
-
-// h-register extract and store.
-def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
-          (MOV8mr_NOREX
-            addr:$dst,
-            (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                            sub_8bit_hi))>;
-def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
-          (MOV8mr_NOREX
-            addr:$dst,
-            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
-          (MOV8mr_NOREX
-            addr:$dst,
-            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                            sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
-
-// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL, 63)),
-          (SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
-          (SHL64mCL addr:$dst)>;
-
-def : Pat<(srl GR64:$src1, (and CL, 63)),
-          (SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
-          (SHR64mCL addr:$dst)>;
-
-def : Pat<(sra GR64:$src1, (and CL, 63)),
-          (SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
-          (SAR64mCL addr:$dst)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in {  // Try this before the selecting to OR
-def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
-          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
-          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or_is_add GR64:$src1, GR64:$src2),
-          (ADD64rr GR64:$src1, GR64:$src2)>;
-} // AddedComplexity
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR64:$src1, GR64:$src2),
-          (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(addc GR64:$src1, (load addr:$src2)),
-          (ADD64rm GR64:$src1, addr:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
-          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
-          (ADD64ri32 GR64:$src1, imm:$src2)>;
-
-def : Pat<(subc GR64:$src1, GR64:$src2),
-          (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(subc GR64:$src1, (load addr:$src2)),
-          (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
-          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(subc GR64:$src1, imm:$src2),
-          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
-
-// addition
-def : Pat<(add GR64:$src1, GR64:$src2),
-          (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt8:$src2),
-          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2),
-          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
-          (ADD64rm GR64:$src1, addr:$src2)>;
-
-// subtraction
-def : Pat<(sub GR64:$src1, GR64:$src2),
-          (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
-          (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
-          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
-          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Multiply
-def : Pat<(mul GR64:$src1, GR64:$src2),
-          (IMUL64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
-          (IMUL64rm GR64:$src1, addr:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
-          (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
-          (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
-          (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
-          (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-
-// inc/dec
-def : Pat<(add GR16:$src, 1),  (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1),  (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1),  (INC64r GR64:$src)>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-
-// or
-def : Pat<(or GR64:$src1, GR64:$src2),
-          (OR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt8:$src2),
-          (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt32:$src2),
-          (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
-          (OR64rm GR64:$src1, addr:$src2)>;
-
-// xor
-def : Pat<(xor GR64:$src1, GR64:$src2),
-          (XOR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
-          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
-          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
-          (XOR64rm GR64:$src1, addr:$src2)>;
-
-// and
-def : Pat<(and GR64:$src1, GR64:$src2),
-          (AND64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt8:$src2),
-          (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt32:$src2),
-          (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
-          (AND64rm GR64:$src1, addr:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// X86-64 SSE Instructions
-//===----------------------------------------------------------------------===//
-
-// Move instructions...
-
-def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (v2i64 (scalar_to_vector GR64:$src)))]>;
-def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                         "mov{d|q}\t{$src, $dst|$dst, $src}",
-                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
-                                           (iPTR 0)))]>;
-
-def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
-def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
-                       "movq\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
-
-def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
-                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                        [(set GR64:$dst, (bitconvert FR64:$src))]>;
-def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
-                        "movq\t{$src, $dst|$dst, $src}",
-                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 000000000000..f0ea06870869
--- /dev/null
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1125 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r   : I<0x8D, MRMSrcMem,
+                 (outs GR16:$dst), (ins i32mem:$src),
+                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r   : I<0x8D, MRMSrcMem,
+                 (outs GR32:$dst), (ins i32mem:$src),
+                 "lea{l}\t{$src|$dst}, {$dst|$src}",
+                 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+                  (outs GR32:$dst), (ins lea64_32mem:$src),
+                  "lea{l}\t{$src|$dst}, {$dst|$src}",
+                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "lea{q}\t{$src|$dst}, {$dst|$src}",
+                  [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+//  Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
+               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+               // This probably ought to be moved to a def : Pat<> if the
+               // syntax can be accepted.
+               [(set AL, (mul AL, GR8:$src)),
+                (implicit EFLAGS)]>;     // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
+               "mul{w}\t$src", 
+               []>, OpSize;    // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
+               "mul{l}\t$src",   // EAX,EDX = EAX*GR32
+               [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+                "mul{q}\t$src",          // RAX,RDX = RAX*GR64
+                [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+               "mul{b}\t$src",
+               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+               // This probably ought to be moved to a def : Pat<> if the
+               // syntax can be accepted.
+               [(set AL, (mul AL, (loadi8 addr:$src))),
+                (implicit EFLAGS)]>;   // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+               "mul{w}\t$src",
+               []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+              "mul{l}\t$src",
+              []>;          // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+                "mul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
+              // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
+              OpSize;    // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
+              // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+              // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+                "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+                "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+                "imul{l}\t$src", []>;  // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR16:$dst, EFLAGS,
+                       (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, EFLAGS,
+                       (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+                                   (ins GR64:$src1, GR64:$src2),
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, EFLAGS,
+                        (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+                                  (ins GR16:$src1, i16mem:$src2),
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR16:$dst, EFLAGS,
+                       (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+               TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
+                 (ins GR32:$src1, i32mem:$src2),
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, EFLAGS,
+                       (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+                                   (ins GR64:$src1, i64mem:$src2),
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, EFLAGS,
+                        (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Suprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
+                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS, 
+                            (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
+                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR16:$dst, EFLAGS,
+                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+                 OpSize;
+def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
+                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
+                     (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32:$dst, EFLAGS,
+                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
+                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set GR64:$dst, EFLAGS,
+                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
+                      (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR64:$dst, EFLAGS,
+                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
+                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1), imm:$src2))]>,
+                 OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
+                     (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR16:$dst, EFLAGS,
+                           (X86smul_flag (load addr:$src1),
+                                         i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
+                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
+                     (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32:$dst, EFLAGS,
+                           (X86smul_flag (load addr:$src1),
+                                         i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
+                        (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set GR64:$dst, EFLAGS,
+                              (X86smul_flag (load addr:$src1),
+                                            i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
+                      (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR64:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1),
+                                          i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
+               "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
+               "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
+               "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+                "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
+               "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
+               "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+               "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+                "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
+               "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
+               "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
+               "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+                "idiv{q}\t$src", []>;
+               
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
+               "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
+               "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
+               "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+                "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "neg{b}\t$dst",
+               [(set GR8:$dst, (ineg GR8:$src1)),
+                (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+               "neg{w}\t$dst",
+               [(set GR16:$dst, (ineg GR16:$src1)),
+                (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+               "neg{l}\t$dst",
+               [(set GR32:$dst, (ineg GR32:$src1)),
+                (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+                [(set GR64:$dst, (ineg GR64:$src1)),
+                 (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+               "neg{b}\t$dst",
+               [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+               "neg{w}\t$dst",
+               [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+               "neg{l}\t$dst",
+               [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+                [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+                 (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "not{b}\t$dst",
+               [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+               "not{w}\t$dst",
+               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+               "not{l}\t$dst",
+               [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+                [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+               "not{b}\t$dst",
+               [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+               "not{w}\t$dst",
+               [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+               "not{l}\t$dst",
+               [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+                [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "inc{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+               "inc{w}\t$dst",
+               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+             OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+               "inc{l}\t$dst",
+               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+             Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), 
+                  "inc{w}\t$dst",
+                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+                OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), 
+                  "inc{l}\t$dst",
+                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+                Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), 
+                  "dec{w}\t$dst",
+                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+                OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), 
+                  "dec{l}\t$dst",
+                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+                Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+               [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>;
+  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+               [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>,
+               OpSize, Requires<[In32BitMode]>;
+  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+               [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>,
+               Requires<[In32BitMode]>;
+  def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+                  [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+                   (implicit EFLAGS)]>;
+                   
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+                  [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+                  [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+                  [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+                  [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "dec{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+               "dec{w}\t$dst",
+               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+             OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+               "dec{l}\t$dst",
+               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+             Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+               [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>;
+  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+               [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>,
+               OpSize, Requires<[In32BitMode]>;
+  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+               [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>,
+               Requires<[In32BitMode]>;
+  def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+                  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+                   (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types.  For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+                  PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+                  Operand immoperand, SDPatternOperator immoperator,
+                  Operand imm8operand, SDPatternOperator imm8operator,
+                  bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+  /// VT - This is the value type itself.
+  ValueType VT = vt;
+  
+  /// InstrSuffix - This is the suffix used on instructions with this type.  For
+  /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+  string InstrSuffix = instrsuffix;
+  
+  /// RegClass - This is the register class associated with this type.  For
+  /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+  RegisterClass RegClass = regclass;
+  
+  /// LoadNode - This is the load node associated with this type.  For
+  /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+  PatFrag LoadNode = loadnode;
+  
+  /// MemOperand - This is the memory operand associated with this type.  For
+  /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+  X86MemOperand MemOperand = memoperand;
+  
+  /// ImmEncoding - This is the encoding of an immediate of this type.  For
+  /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32.  Note that i64 -> Imm32
+  /// since the immediate fields of i64 instructions is a 32-bit sign extended
+  /// value.
+  ImmType ImmEncoding = immkind;
+  
+  /// ImmOperand - This is the operand kind of an immediate of this type.  For
+  /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm.  Note that i64 ->
+  /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+  /// extended value.
+  Operand ImmOperand = immoperand;
+  
+  /// ImmOperator - This is the operator that should be used to match an
+  /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+  SDPatternOperator ImmOperator = immoperator;
+  
+  /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+  /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm.  This is
+  /// only used for instructions that have a sign-extended imm8 field form.
+  Operand Imm8Operand = imm8operand;
+  
+  /// Imm8Operator - This is the operator that should be used to match an 8-bit
+  /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+  SDPatternOperator Imm8Operator = imm8operator;
+  
+  /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+  /// opposed to even) opcode.  Operations on i8 are usually even, operations on
+  /// other datatypes are odd.
+  bit HasOddOpcode = hasOddOpcode;
+  
+  /// HasOpSizePrefix - This bit is set to true if the instruction should have
+  /// the 0x66 operand size prefix.  This is set for i16 types.
+  bit HasOpSizePrefix = hasOpSizePrefix;
+  
+  /// HasREX_WPrefix - This bit is set to true if the instruction should have
+  /// the 0x40 REX prefix.  This is set for i64 types.
+  bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8  : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+                       Imm8 , i8imm ,    imm,          i8imm   , invalid_node,
+                       0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+                       Imm16, i16imm,    imm,          i16i8imm, i16immSExt8,
+                       1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+                       Imm32, i32imm,    imm,          i32i8imm, i32immSExt8,
+                       1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+                       Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+                       1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+///    suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+///    or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, 
+          string mnemonic, string args, list<dag> pattern>
+  : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+       opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+      f, outs, ins, 
+      !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+  // Infer instruction prefixes from type info.
+  let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+  let hasREX_WPrefix  = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, list<dag> pattern, Format f = MRMDestReg>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f = MRMDestReg>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+            [(set EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+            f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+                          EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+  : ITy<opcode, MRMSrcReg, typeinfo,
+        (outs typeinfo.RegClass:$dst),
+        (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+        mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+  // The disassembler should know about this, but not the asmparser.
+  let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, list<dag> pattern>
+  : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              SDPatternOperator opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+            [(set EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+                    EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              Format f, dag outlist, list<dag> pattern>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+  let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+            [(set EFLAGS,
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS, 
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS, 
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+                        EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+               Format f, dag outlist, list<dag> pattern>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+  let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+               
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+             [(set EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst, EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst, EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+                       EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              list<dag> pattern>
+  : ITy<opcode, MRMDestMem, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+          [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+           (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                    SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+          [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+                  addr:$dst),
+           (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+            [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+              Format f, list<dag> pattern, bits<8> opcode = 0x80>
+  : ITy<opcode, f, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+  let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(store (opnode (typeinfo.VT (load addr:$dst)),
+                            typeinfo.ImmOperator:$src), addr:$dst),
+             (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(store (opnode (typeinfo.VT (load addr:$dst)),
+                            typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+             (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+                                               typeinfo.ImmOperator:$src))],
+            opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+               Format f, list<dag> pattern>
+  : ITy<0x82, f, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+  let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(store (opnode (load addr:$dst),
+                             typeinfo.Imm8Operator:$src), addr:$dst),
+              (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(store (opnode (load addr:$dst),
+                             typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+              (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(set EFLAGS, (opnode (load addr:$dst),
+                                   typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              Register areg>
+  : ITy<opcode, RawFrm, typeinfo,
+        (outs), (ins typeinfo.ImmOperand:$src),
+        mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
+                               areg.AsmName, ", $src}"), []> {
+  let ImmT = typeinfo.ImmEncoding;
+  let Uses = [areg];
+  let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                         string mnemonic, Format RegMRM, Format MemMRM,
+                         SDNode opnodeflag, SDNode opnode,
+                         bit CommutableRR, bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let Constraints = "$src1 = $dst" in {
+      let isCommutable = CommutableRR,
+          isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def #NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+        def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+        def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+        def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+      } // isCommutable
+
+      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+      def #NAME#8rm   : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+      def #NAME#16rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+      def #NAME#32rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+      def #NAME#64rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+      let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        // NOTE: These are order specific, we want the ri8 forms to be listed
+        // first so that they are slightly preferred to the ri forms.
+        def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+        def #NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+        def #NAME#16ri  : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def #NAME#32ri  : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+      }
+    } // Constraints = "$src1 = $dst"
+
+    def #NAME#8mr    : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                          string mnemonic, Format RegMRM, Format MemMRM,
+                          SDNode opnode, bit CommutableRR,
+                           bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let Constraints = "$src1 = $dst" in {
+      let isCommutable = CommutableRR,
+          isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def #NAME#8rr  : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+        def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+        def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+        def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+      } // isCommutable
+
+      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+      def #NAME#8rm   : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+      def #NAME#16rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+      def #NAME#32rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+      def #NAME#64rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+      let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        // NOTE: These are order specific, we want the ri8 forms to be listed
+        // first so that they are slightly preferred to the ri forms.
+        def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+        def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+        def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+        def #NAME#8ri   : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+        def #NAME#16ri  : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+        def #NAME#32ri  : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+        def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+      }
+    } // Constraints = "$src1 = $dst"
+
+    def #NAME#8mr    : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...".  It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                        string mnemonic, Format RegMRM, Format MemMRM,
+                        SDNode opnode,
+                        bit CommutableRR, bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let isCommutable = CommutableRR,
+        isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+      def #NAME#8rr  : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+      def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+      def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+      def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+    } // isCommutable
+
+    def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+    def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+    def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+    def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+    def #NAME#8rm   : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+    def #NAME#16rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+    def #NAME#32rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+    def #NAME#64rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+    let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+      // NOTE: These are order specific, we want the ri8 forms to be listed
+      // first so that they are slightly preferred to the ri forms.
+      def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+      def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+      def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+      
+      def #NAME#8ri   : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+      def #NAME#16ri  : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+      def #NAME#32ri  : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+      def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+    }
+
+    def #NAME#8mr    : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+                         X86and_flag, and, 1, 0>;
+defm OR  : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+                         X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+                         X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+                         X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+                         X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+  defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+                            1, 0>;
+  defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+                            0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result.  From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+                         (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+  let isCommutable = 1 in {
+    def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+    def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+    def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+    def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+  } // isCommutable
+
+  def TEST8rm    : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+  def TEST16rm   : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+  def TEST32rm   : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+  def TEST64rm   : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+  def TEST8ri    : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+  def TEST16ri   : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+  def TEST32ri   : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+  def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+  def TEST8mi    : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+  def TEST16mi   : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+  def TEST32mi   : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+  def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+                     
+  def TEST8i8    : BinOpAI<0xA8, "test", Xi8 , AL>;
+  def TEST16i16  : BinOpAI<0xA8, "test", Xi16, AX>;
+  def TEST32i32  : BinOpAI<0xA8, "test", Xi32, EAX>;
+  def TEST64i32  : BinOpAI<0xA8, "test", Xi64, RAX>;
+}                          
+
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 2a6a71dd91a3..1ea8071053e9 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -56,6 +56,31 @@ struct X86AddressMode {
     : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
     Base.Reg = 0;
   }
+  
+  
+  void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+    assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+    
+    if (BaseType == X86AddressMode::RegBase)
+      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+                                             false, false, false, 0, false));
+    else {
+      assert(BaseType == X86AddressMode::FrameIndexBase);
+      MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+    }
+    
+    MO.push_back(MachineOperand::CreateImm(Scale));
+    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+                                           false, false, false, 0, false));
+    
+    if (GV)
+      MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+    else
+      MO.push_back(MachineOperand::CreateImm(Disp));
+    
+    MO.push_back(MachineOperand::CreateReg(0, false, false,
+                                           false, false, false, 0, false));
+  }
 };
 
 /// addDirectMem - This function is used to add a direct memory reference to the
@@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB,
   
   if (AM.BaseType == X86AddressMode::RegBase)
     MIB.addReg(AM.Base.Reg);
-  else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+  else {
+    assert(AM.BaseType == X86AddressMode::FrameIndexBase);
     MIB.addFrameIndex(AM.Base.FrameIndex);
-  else
-    assert (0);
+  }
+
   MIB.addImm(AM.Scale).addReg(AM.IndexReg);
   if (AM.GV)
     MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
@@ -131,9 +157,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   if (TID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                            Flags, Offset,
-                            MFI.getObjectSize(FI),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+                            Flags, MFI.getObjectSize(FI),
                             MFI.getObjectAlignment(FI));
   return addOffset(MIB.addFrameIndex(FI), Offset)
             .addMemOperand(MMO);
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 000000000000..3a43b22ddf3d
--- /dev/null
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      isCommutable = 1 in {
+    def #NAME#16rr
+      : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst,
+                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+    def #NAME#32rr
+      : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst,
+                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+    def #NAME#64rr
+      :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst,
+                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+  }
+
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+    def #NAME#16rm
+      : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize;
+    def #NAME#32rm
+      : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+    def #NAME#64rm
+      :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+  } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO  : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB  : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE  : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA  : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS  : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP  : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL  : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG  : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+  let Uses = [EFLAGS] in {
+    def r    : I<opc, MRM0r,  (outs GR8:$dst), (ins),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+    def m    : I<opc, MRM0m,  (outs), (ins i8mem:$dst),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+  } // Uses = [EFLAGS]
+}
+
+defm SETO  : SETCC<0x90, "seto",  X86_COND_O>;   // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>;  // is overflow bit not set
+defm SETB  : SETCC<0x92, "setb",  X86_COND_B>;   // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>;  // unsigned greater or equal
+defm SETE  : SETCC<0x94, "sete",  X86_COND_E>;   // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>;  // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>;  // unsigned less than or equal
+defm SETA  : SETCC<0x97, "seta",  X86_COND_A>;   // unsigned greater than
+defm SETS  : SETCC<0x98, "sets",  X86_COND_S>;   // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>;  // is not signed
+defm SETP  : SETCC<0x9A, "setp",  X86_COND_P>;   // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>;  // is parity bit not set
+defm SETL  : SETCC<0x9C, "setl",  X86_COND_L>;   // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>;  // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>;  // signed less than or equal
+defm SETG  : SETCC<0x9F, "setg",  X86_COND_G>;   // signed greater than
+
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 000000000000..4c915d97b62b
--- /dev/null
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1626 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 32 bits.
+  return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 8 bits.
+  return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction.  This expands to code that looks like this:
+//     call  $next_inst
+//     popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+                      "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+                              (outs),
+                              (ins GR8:$al,
+                                   i64imm:$regsavefi, i64imm:$offset,
+                                   variable_ops),
+                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+                              [(X86vastart_save_xmm_regs GR8:$al,
+                                                         imm:$regsavefi,
+                                                         imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+                 (outs GR64:$dst),
+                 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+                 "#VAARG_64 $dst, $ap, $size, $mode, $align",
+                 [(set GR64:$dst,
+                    (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+                  (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets.  These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+  def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+                     "# dynamic stack allocation",
+                     [(X86WinAlloca)]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+                    "ret\t#eh_return, addr: $addr",
+                    [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+                     "ret\t#eh_return, addr: $addr",
+                     [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in {
+def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+                 [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+                 [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+    AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+                 [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+                OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type.  When
+// this happens, it is great.  However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+          (SETBr)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+                  [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+                  [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+                  [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+                   [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+                  [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+                  [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+                  [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+                   [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                  "# TLS_addr32",
+                  [(X86tlsaddr tls32addr:$sym)]>,
+                  Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                   "# TLS_addr64",
+                  [(X86tlsaddr tls64addr:$sym)]>,
+                  Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax.  %ecx is trashed during the function
+// call.  All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+    Uses = [ESP],
+    usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                "# TLSCall_32",
+                [(X86TLSCall addr:$sym)]>,
+                Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax.  All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+    Uses = [RSP, RDI],
+    usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                  "# TLSCall_64",
+                  [(X86TLSCall addr:$sym)]>,
+                  Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+let Constraints = "$src1 = $dst" in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+                 "#CMOV_GR8 PSEUDO!",
+                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+                                          imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+                    "#CMOV_GR32* PSEUDO!",
+                    [(set GR32:$dst,
+                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+                    "#CMOV_GR16* PSEUDO!",
+                    [(set GR16:$dst,
+                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+                    (outs RFP32:$dst),
+                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+                    "#CMOV_RFP32 PSEUDO!",
+                    [(set RFP32:$dst,
+                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+                    (outs RFP64:$dst),
+                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+                    "#CMOV_RFP64 PSEUDO!",
+                    [(set RFP64:$dst,
+                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+                    (outs RFP80:$dst),
+                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+                    "#CMOV_RFP80 PSEUDO!",
+                    [(set RFP80:$dst,
+                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
+} // Uses = [EFLAGS]
+
+} // Constraints = "$src1 = $dst" in
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+                  usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMAND8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMOR8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMXOR8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMNAND8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMAND16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMOR16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMXOR16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMNAND16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+               "#ATOMMIN16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMMAX16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMUMIN16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMUMAX16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMAND32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMOR32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMXOR32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMNAND32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+               "#ATOMMIN32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMMAX32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMUMIN32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMUMAX32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMAND64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMOR64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMXOR64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMNAND64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+               "#ATOMMIN64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMMAX64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMUMIN64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMUMAX64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+                  Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+                  Uses = [EAX, EBX, ECX, EDX],
+                  mayLoad = 1, mayStore = 1,
+                  usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+                      "lock\n\t"
+                      "or{l}\t{$zero, $dst|$dst, $zero}",
+                      []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+                     "#MEMBARRIER",
+                     [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64  : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+                           "lock\n\t"
+                           "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+                           [(X86MemBarrierNoSSE GR64:$zero)]>,
+                           Requires<[In64BitMode]>, LOCK;
+
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+                    "lock\n\t"
+                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    "lock\n\t"
+                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                      "lock\n\t"
+                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+                    "lock\n\t"
+                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+                    "lock\n\t"
+                     "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+                                        (ins i64mem:$dst, i64i32imm :$src2),
+                      "lock\n\t"
+                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+                    "lock\n\t"
+                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+                                      (ins i64mem:$dst, i64i8imm :$src2),
+                    "lock\n\t"
+                    "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+                    "lock\n\t"
+                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    "lock\n\t"
+                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                    "lock\n\t"
+                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+                    "lock\n\t"
+                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+                    "lock\n\t"
+                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+                    "lock\n\t"
+                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+                                        (ins i64mem:$dst, i64i32imm:$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+                    "lock\n\t"
+                     "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+                    "lock\n\t"
+                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+                                      (ins i64mem:$dst, i64i8imm :$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+                     "lock\n\t"
+                     "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+                      "lock\n\t"
+                      "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+    isCodeGenOnly = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+               "lock\n\t"
+               "cmpxchg8b\t$ptr",
+               [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+               "lock\n\t"
+               "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+               "lock\n\t"
+               "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+               "lock\n\t"
+               "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+               "lock\n\t"
+               "cmpxchgq\t$swap,$ptr",
+               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+               "lock\n\t"
+               "xadd{b}\t{$val, $ptr|$ptr, $val}",
+               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+                TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+               "lock\n\t"
+               "xadd{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+                TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+               "lock\n\t"
+               "xadd{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+                TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+               "lock\n\t"
+               "xadd\t$val, $ptr",
+               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+                TB, LOCK;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+  def CMOV_FR32 : I<0, Pseudo,
+                    (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+                    "#CMOV_FR32 PSEUDO!",
+                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+                                                  EFLAGS))]>;
+  def CMOV_FR64 : I<0, Pseudo,
+                    (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+                    "#CMOV_FR64 PSEUDO!",
+                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+                                                  EFLAGS))]>;
+  def CMOV_V4F32 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V4F32 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+  def CMOV_V2F64 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V2F64 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+  def CMOV_V2I64 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V2I64 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+          (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+          (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+          (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+          (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+          (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
+//  'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tconstpool:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tjumptable:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, texternalsym:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+          Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+          (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+          (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+          (TCRETURNri GR32_TC:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi addr:$dst, imm:$off)>,
+          Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+          (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi64 addr:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+          (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+          (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+          (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+          (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+          (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+                  Instruction Inst64> {
+  def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+            (Inst16 GR16:$src2, addr:$src1)>;
+  def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+            (Inst32 GR32:$src2, addr:$src1)>;
+  def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+            (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+                         sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg &&
+         N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies.  However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read.  To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero0, KnownOne0;
+  CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+  APInt KnownZero1, KnownOne1;
+  CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+  return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+    Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+                    "", // orw/addw REG, REG
+                    [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "", // orl/addl REG, REG
+                    [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB  : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "", // orq/addq REG, REG
+                    [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+                    "", // orw/addw REG, imm8
+                    [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                    "", // orw/addw REG, imm
+                    [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+                    "", // orl/addl REG, imm8
+                    [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    "", // orl/addl REG, imm
+                    [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+                    (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+                    "", // orq/addq REG, imm8
+                    [(set GR64:$dst, (or_is_add GR64:$src1,
+                                                i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+                     (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                      "", // orq/addq REG, imm
+                      [(set GR64:$dst, (or_is_add GR64:$src1,
+                                                  i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+          (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+          (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+          (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+          (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+          (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+          (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri8
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
+              (i32 (GetLo8XForm imm:$imm))),
+            sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
+              (i32 (GetLo32XForm imm:$imm))),
+            sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+                                                             GR32_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+          (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
+                                                             GR16_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+      Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+      Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+          (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+          (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+      Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+      Requires<[In64BitMode]>;
+
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                          sub_8bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                          sub_8bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+      Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+      Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                          sub_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                          sub_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32rr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_16bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                                   GR32_ABCD)),
+                                             sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_16bit)>,
+      Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+                            sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+          (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+          (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+          (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+          (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+          (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+          (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+          (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+          (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+          (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+          (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+          (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+          (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+          (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+          (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+          (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri  GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+          (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+          (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+          (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+          (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+          (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+          (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+          (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+          (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+          (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+          (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+          (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+          (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+          (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+          (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+          (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+          (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+          (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+          (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+          (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiply by 2 with EFLAGS result.
+let AddedComplexity = 2 in {
+def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+          (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+          (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+          (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+          (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+          (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+          (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+          (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+          (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+          (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+          (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r     GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r    GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r    GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r    GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r     GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r    GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r    GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r    GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+          (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+          (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+          (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+          (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri  GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+          (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+          (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+          (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+          (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+          (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+          (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+          (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+          (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+          (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+          (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+          (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+          (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+          (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+          (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+          (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+          (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+          (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+          (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+          (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+          (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+          (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+          (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 000000000000..77f47250e9fd
--- /dev/null
+++ b/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,294 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP in {
+  def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret",
+                    [(X86retflag 0)]>;
+  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret\t$amt",
+                    [(X86retflag timm:$amt)]>;
+  def RETIW  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "retw\t$amt",
+                    []>, OpSize;
+  def LRETL  : I   <0xCB, RawFrm, (outs), (ins),
+                    "lretl", []>;
+  def LRETQ  : RI  <0xCB, RawFrm, (outs), (ins),
+                    "lretq", []>;
+  def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lret\t$amt", []>;
+  def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                        "jmp\t$dst", [(br bb:$dst)]>;
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", []>;
+  def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                       "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+  }
+}
+
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+  // These are the 32-bit versions of this instruction for the asmparser.  In
+  // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+  // jecxz.
+  let Uses = [CX] in
+    def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+  let Uses = [ECX] in
+    def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                           "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+  // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+  // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+  // is jrcxz.
+  let Uses = [ECX] in
+    def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                            "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+  let Uses = [RCX] in
+    def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                           "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+                     [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+                     [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+  def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
+                          (ins i16imm:$off, i16imm:$seg),
+                          "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+  def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
+                          (ins i32imm:$off, i16imm:$seg),
+                          "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+                      "ljmp{q}\t{*}$dst", []>;
+
+  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+                     "ljmp{w}\t{*}$dst", []>, OpSize;
+  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+                     "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
+                           "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+                         Requires<[In32BitMode]>;
+    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+                        Requires<[In32BitMode]>;
+
+    def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
+                             (ins i16imm:$off, i16imm:$seg),
+                             "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+    def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
+                             (ins i32imm:$off, i16imm:$seg),
+                             "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+                        "lcall{w}\t{*}$dst", []>, OpSize;
+    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+                        "lcall{l}\t{*}$dst", []>;
+
+    // callw for 16 bit code for the assembler.
+    let isAsmParserOnly = 1 in
+      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+                       (outs), (ins i16imm_pcrel:$dst, variable_ops),
+                       "callw\t$dst", []>, OpSize;
+  }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+  def TCRETURNdi : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+  def TCRETURNri : PseudoI<(outs),
+                     (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+  let mayLoad = 1 in
+  def TCRETURNmi : PseudoI<(outs),
+                     (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+  // FIXME: The should be pseudo instructions that are lowered when going to
+  // mcinst.
+  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, variable_ops),
+                 "jmp\t$dst  # TAILCALL",
+                 []>;
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+                   "", []>;  // FIXME: Remove encoding when JIT is dead.
+  let mayLoad = 1 in
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+    // NOTE: this pattern doesn't match "X86call imm", because we do not know
+    // that the offset between an arbitrary immediate and the call will fit in
+    // the 32-bit pcrel field that we have.
+    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                          "call{q}\t$dst", []>,
+                        Requires<[In64BitMode, NotWin64]>;
+    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+                        Requires<[In64BitMode, NotWin64]>;
+    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+                        Requires<[In64BitMode, NotWin64]>;
+
+    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+                         "lcall{q}\t{*}$dst", []>;
+  }
+
+  // FIXME: We need to teach codegen about single list of call-clobbered
+  // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP] in {
+    def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                             "call{q}\t$dst", []>,
+                           Requires<[IsWin64]>;
+    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                             "call{q}\t{*}$dst",
+                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+    def WINCALL64m       : I<0xFF, MRM2m, (outs),
+                              (ins i64mem:$dst,variable_ops),
+                             "call{q}\t{*}$dst",
+                             [(X86call (loadi64 addr:$dst))]>,
+                           Requires<[IsWin64]>;
+  }
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP],
+      usesCustomInserter = 1 in {
+  def TCRETURNdi64 : PseudoI<(outs),
+                      (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+                      []>;
+  def TCRETURNri64 : PseudoI<(outs),
+                      (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+  let mayLoad = 1 in
+  def TCRETURNmi64 : PseudoI<(outs),
+                       (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+                                      (ins i64i32imm_pcrel:$dst, variable_ops),
+                   "jmp\t$dst  # TAILCALL", []>;
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+
+  let mayLoad = 1 in
+  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+}
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 000000000000..867c0f8b6848
--- /dev/null
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,172 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+  let Defs = [AX], Uses = [AL] in
+  def CBW : I<0x98, RawFrm, (outs), (ins),
+              "{cbtw|cbw}", []>, OpSize;   // AX = signext(AL)
+  let Defs = [EAX], Uses = [AX] in
+  def CWDE : I<0x98, RawFrm, (outs), (ins),
+              "{cwtl|cwde}", []>;   // EAX = signext(AX)
+
+  let Defs = [AX,DX], Uses = [AX] in
+  def CWD : I<0x99, RawFrm, (outs), (ins),
+              "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+  let Defs = [EAX,EDX], Uses = [EAX] in
+  def CDQ : I<0x99, RawFrm, (outs), (ins),
+              "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+  let Defs = [RAX], Uses = [EAX] in
+  def CDQE : RI<0x98, RawFrm, (outs), (ins),
+               "{cltq|cdqe}", []>;     // RAX = signext(EAX)
+
+  let Defs = [RAX,RDX], Uses = [RAX] in
+  def CQO  : RI<0x99, RawFrm, (outs), (ins),
+                "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.  Actual movsbw included for the disassembler.
+def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// FIXME: Use a pat pattern or define a syntax here.                    
+let isCodeGenOnly=1 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+                   "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+                   "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
+}
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.  Actual movzbw included for the disassembler.
+def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;  
+// FIXME: Use a pat pattern or define a syntax here.                    
+let isCodeGenOnly=1 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+                   "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+                   "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
+}
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                   "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                   "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+                    "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 9c9bcc7d0b6a..b506f5e0b81a 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -32,21 +32,24 @@ def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
 def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
-                             [SDNPHasChain, SDNPMayLoad]>;
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
-                             [SDNPHasChain, SDNPInFlag, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
 def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
-                             [SDNPHasChain, SDNPMayLoad]>;
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
-                             [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
+                             [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                              SDNPMemOperand]>;
 def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
-                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
 
 //===----------------------------------------------------------------------===//
 // FPStack pattern fragments
@@ -70,41 +73,23 @@ def fpimmneg1 : PatLeaf<(fpimm), [{
 
 // Some 'special' instructions
 let usesCustomInserter = 1 in {  // Expanded after instruction selection.
-  def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i16mem:$dst, RFP32:$src),
-                              "##FP32_TO_INT16_IN_MEM PSEUDO!",
+  def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
                               [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
-  def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i32mem:$dst, RFP32:$src),
-                              "##FP32_TO_INT32_IN_MEM PSEUDO!",
+  def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
                               [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
-  def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i64mem:$dst, RFP32:$src),
-                              "##FP32_TO_INT64_IN_MEM PSEUDO!",
+  def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
                               [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
-  def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i16mem:$dst, RFP64:$src),
-                              "##FP64_TO_INT16_IN_MEM PSEUDO!",
+  def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
                               [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
-  def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i32mem:$dst, RFP64:$src),
-                              "##FP64_TO_INT32_IN_MEM PSEUDO!",
+  def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
                               [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
-  def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i64mem:$dst, RFP64:$src),
-                              "##FP64_TO_INT64_IN_MEM PSEUDO!",
+  def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
                               [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
-  def FP80_TO_INT16_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i16mem:$dst, RFP80:$src),
-                              "##FP80_TO_INT16_IN_MEM PSEUDO!",
+  def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
                               [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
-  def FP80_TO_INT32_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i32mem:$dst, RFP80:$src),
-                              "##FP80_TO_INT32_IN_MEM PSEUDO!",
+  def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
                               [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
-  def FP80_TO_INT64_IN_MEM : I<0, Pseudo,
-                              (outs), (ins i64mem:$dst, RFP80:$src),
-                              "##FP80_TO_INT64_IN_MEM PSEUDO!",
+  def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
                               [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
 }
 
@@ -212,11 +197,11 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
                   [(set RFP80:$dst, 
                     (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
 def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src), 
-                 !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { 
+                 !strconcat("f", asmstring, "{s}\t$src")> { 
   let mayLoad = 1; 
 }
 def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src), 
-                 !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { 
+                 !strconcat("f", asmstring, "{l}\t$src")> { 
   let mayLoad = 1; 
 }
 // ST(0) = ST(0) + [memint]
@@ -245,11 +230,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
                     [(set RFP80:$dst, (OpNode RFP80:$src1,
                                        (X86fild addr:$src2, i32)))]>;
 def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src), 
-                  !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { 
+                  !strconcat("fi", asmstring, "{s}\t$src")> { 
   let mayLoad = 1; 
 }
 def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src), 
-                  !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { 
+                  !strconcat("fi", asmstring, "{l}\t$src")> { 
   let mayLoad = 1; 
 }
 }
@@ -580,16 +565,16 @@ def UCOM_FPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
 
 def UCOM_FIr   : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
                     (outs), (ins RST:$reg),
-                    "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+                    "fucomi\t$reg">, DB;
 def UCOM_FIPr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
                     (outs), (ins RST:$reg),
-                    "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+                    "fucompi\t$reg">, DF;
 }
 
 def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
-                  "fcomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+                  "fcomi\t$reg">, DB;
 def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
-                   "fcomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+                   "fcompi\t$reg">, DF;
 
 // Floating point flag ops.
 let Defs = [AX] in
@@ -604,8 +589,8 @@ let mayLoad = 1 in
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
 
-// Register free
-
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
 def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
                 "ffree\t$reg">, DD;
 
@@ -613,7 +598,8 @@ def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
 
 def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
 
-// Operandless floating-point instructions for the disassembler
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
 
 def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
 def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
@@ -639,8 +625,12 @@ def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
 
 def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
                "fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+                 "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
 def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
                 "fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                  "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 79187e9a76d7..344c14c112a0 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -39,7 +39,8 @@ def MRM_E8 : Format<39>;
 def MRM_F0 : Format<40>;
 def MRM_F8 : Format<41>;
 def MRM_F9 : Format<42>;
-def RawFrmImm16 : Format<43>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -108,6 +109,7 @@ class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr, Domain d = GenericDomain>
@@ -123,6 +125,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   dag InOperandList = ins;
   string AsmString = AsmStr;
 
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
   //
   // Attributes specific to X86 instructions...
   //
@@ -130,17 +135,18 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasAdSizePrefix = 0;  // Does this inst have a 0x67 prefix?
 
   bits<4> Prefix = 0;       // Which prefix byte does this inst have?
-  bit hasREX_WPrefix  = 0;  // Does this inst requires the REX.W prefix?
+  bit hasREX_WPrefix  = 0;  // Does this inst require the REX.W prefix?
   FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
   bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
   bits<2> SegOvrBits = 0;   // Segment override prefix.
   Domain ExeDomain = d;
-  bit hasVEXPrefix = 0;     // Does this inst requires a VEX prefix?
+  bit hasVEXPrefix = 0;     // Does this inst require a VEX prefix?
   bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
-  bit hasVEX_4VPrefix = 0;  // Does this inst requires the VEX.VVVV field?
-  bit hasVEX_i8ImmReg = 0;  // Does this inst requires the last source register
+  bit hasVEX_4VPrefix = 0;  // Does this inst require the VEX.VVVV field?
+  bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
                             // to be encoded in a immediate field?
-  bit hasVEX_L = 0;         // Does this inst uses large (256-bit) registers?
+  bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
+  bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -159,6 +165,12 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{34}    = hasVEX_4VPrefix;
   let TSFlags{35}    = hasVEX_i8ImmReg;
   let TSFlags{36}    = hasVEX_L;
+  let TSFlags{37}    = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+  let Pattern = pattern;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 01149b699213..5016c0f171ae 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -15,51 +15,8 @@
 // MMX Pattern Fragments
 //===----------------------------------------------------------------------===//
 
-def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
-
-def bc_v8i8  : PatFrag<(ops node:$in), (v8i8  (bitconvert node:$in))>;
-def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
-def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
-def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
-
-//===----------------------------------------------------------------------===//
-// MMX Masks
-//===----------------------------------------------------------------------===//
-
-// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
-// PSHUFW imm.
-def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
-def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
-def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                               (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
-                         (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], MMX_SHUFFLE_get_shuf_imm>;
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
 
 //===----------------------------------------------------------------------===//
 // SSE specific DAG Nodes.
@@ -86,6 +43,21 @@ def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
+def X86pandn   : SDNode<"X86ISD::PANDN", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignb  : SDNode<"X86ISD::PSIGNB", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignw  : SDNode<"X86ISD::PSIGNW", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignd  : SDNode<"X86ISD::PSIGND", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB", 
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
 def X86pextrw  : SDNode<"X86ISD::PEXTRW",
@@ -102,7 +74,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
-                        [SDNPHasChain, SDNPMayLoad]>;
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
 def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
 def X86cmpps   : SDNode<"X86ISD::CMPPS",     SDTX86VFCMP>;
@@ -134,18 +106,12 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
 def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                  SDTCisSameAs<0,2>, SDTCisInt<3>]>;
 
-def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
-                                  SDTCisInt<2>]>;
-
 def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
 def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
 
-def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
-def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
-
 def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
 def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
 
@@ -187,9 +153,11 @@ def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
 // the top elements.  These are used for the SSE 'ss' and 'sd' instruction
 // forms.
 def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
-                                  [SDNPHasChain, SDNPMayLoad]>;
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
 def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
-                                  [SDNPHasChain, SDNPMayLoad]>;
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
 
 def ssmem : Operand<v4f32> {
   let PrintMethod = "printf32mem";
@@ -273,6 +241,7 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
 def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
 def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
 
 // 256-bit memop pattern fragments
@@ -289,10 +258,7 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 8;
 }]>;
 
-def memopv8i8  : PatFrag<(ops node:$ptr), (v8i8  (memop64 node:$ptr))>;
-def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
-def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+def memopmmx  : PatFrag<(ops node:$ptr), (x86mmx  (memop64 node:$ptr))>;
 
 // MOVNT Support
 // Like 'store', but requires the non-temporal bit to be set
@@ -376,6 +342,18 @@ def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
   return getI8Imm(X86::getShufflePALIGNRImmediate(N));
 }]>;
 
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+  return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to 
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+  return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
 def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
                        (vector_shuffle node:$lhs, node:$rhs), [{
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -466,3 +444,16 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs),
                      (vector_shuffle node:$lhs, node:$rhs), [{
   return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
 }], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+                                   (extract_subvector node:$bigvec,
+                                                      node:$index), [{
+  return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+                                      node:$index),
+                                 (insert_subvector node:$bigvec, node:$smallvec,
+                                                   node:$index), [{
+  return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5280940cf437..ceb1b6539826 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -34,7 +34,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
-
 #include <limits>
 
 using namespace llvm;
@@ -55,7 +54,11 @@ ReMatPICStubLoad("remat-pic-stub-load",
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
     TM(tm), RI(tm, *this) {
-  SmallVector<unsigned,16> AmbEntries;
+  enum {
+    TB_NOT_REVERSABLE = 1U << 31,
+    TB_FLAGS = TB_NOT_REVERSABLE
+  };
+
   static const unsigned OpTbl2Addr[][2] = {
     { X86::ADC32ri,     X86::ADC32mi },
     { X86::ADC32ri8,    X86::ADC32mi8 },
@@ -65,13 +68,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::ADC64rr,     X86::ADC64mr },
     { X86::ADD16ri,     X86::ADD16mi },
     { X86::ADD16ri8,    X86::ADD16mi8 },
+    { X86::ADD16ri_DB,  X86::ADD16mi  | TB_NOT_REVERSABLE },
+    { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
     { X86::ADD16rr,     X86::ADD16mr },
+    { X86::ADD16rr_DB,  X86::ADD16mr | TB_NOT_REVERSABLE },
     { X86::ADD32ri,     X86::ADD32mi },
     { X86::ADD32ri8,    X86::ADD32mi8 },
+    { X86::ADD32ri_DB,  X86::ADD32mi | TB_NOT_REVERSABLE },
+    { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
     { X86::ADD32rr,     X86::ADD32mr },
+    { X86::ADD32rr_DB,  X86::ADD32mr | TB_NOT_REVERSABLE },
     { X86::ADD64ri32,   X86::ADD64mi32 },
     { X86::ADD64ri8,    X86::ADD64mi8 },
+    { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+    { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
     { X86::ADD64rr,     X86::ADD64mr },
+    { X86::ADD64rr_DB,  X86::ADD64mr | TB_NOT_REVERSABLE },
     { X86::ADD8ri,      X86::ADD8mi },
     { X86::ADD8rr,      X86::ADD8mr },
     { X86::AND16ri,     X86::AND16mi },
@@ -216,16 +228,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
     unsigned RegOp = OpTbl2Addr[i][0];
-    unsigned MemOp = OpTbl2Addr[i][1];
-    if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
-                                               std::make_pair(MemOp,0))).second)
-      assert(false && "Duplicated entries?");
+    unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+    assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
     // Index 0, folded load and store, no alignment requirement.
     unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
-    if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                                std::make_pair(RegOp,
-                                                              AuxInfo))).second)
-      AmbEntries.push_back(MemOp);
+
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+            "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
 
   // If the third value is 1, then it's folding either a load or a store.
@@ -252,8 +269,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::DIV64r,      X86::DIV64m, 1, 0 },
     { X86::DIV8r,       X86::DIV8m, 1, 0 },
     { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
-    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0, 0 },
-    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0, 0 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
     { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
     { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
     { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
@@ -268,7 +285,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
     { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
     { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
-    { X86::MOV32rr_TC,  X86::MOV32mr_TC, 0, 0 },
     { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
     { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
     { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
@@ -312,19 +328,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp = OpTbl0[i][0];
-    unsigned MemOp = OpTbl0[i][1];
-    unsigned Align = OpTbl0[i][3];
-    if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    unsigned RegOp      = OpTbl0[i][0];
+    unsigned MemOp      = OpTbl0[i][1] & ~TB_FLAGS;
     unsigned FoldedLoad = OpTbl0[i][2];
+    unsigned Align      = OpTbl0[i][3];
+    assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
     // Index 0, folded load or store.
     unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
-    if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
-      if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                     std::make_pair(RegOp, AuxInfo))).second)
-        AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
 
   static const unsigned OpTbl1[][3] = {
@@ -342,8 +361,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
     { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
     { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
-    { X86::FsMOVAPDrr,      X86::MOVSDrm, 0 },
-    { X86::FsMOVAPSrr,      X86::MOVSSrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
     { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
     { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
     { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
@@ -360,8 +379,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm, 16 },
     { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm, 16 },
     { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm, 0 },
-    { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
-    { X86::Int_CVTSD2SIrr,  X86::Int_CVTSD2SIrm, 0 },
+    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm, 0 },
+    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm, 0 },
     { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm, 0 },
     { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
     { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm, 0 },
@@ -370,8 +389,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm, 0 },
     { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
     { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm, 0 },
-    { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
-    { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+    { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm, 16 },
+    { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm, 16 },
     { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
     { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
     { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
@@ -380,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
     { X86::MOV16rr,         X86::MOV16rm, 0 },
     { X86::MOV32rr,         X86::MOV32rm, 0 },
-    { X86::MOV32rr_TC,      X86::MOV32rm_TC, 0 },
     { X86::MOV64rr,         X86::MOV64rm, 0 },
     { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
     { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
@@ -439,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
     unsigned RegOp = OpTbl1[i][0];
-    unsigned MemOp = OpTbl1[i][1];
+    unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
     unsigned Align = OpTbl1[i][2];
-    if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+    RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
     // Index 1, folded load
     unsigned AuxInfo = 1 | (1 << 4);
-    if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
-      if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                     std::make_pair(RegOp, AuxInfo))).second)
-        AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
 
   static const unsigned OpTbl2[][3] = {
     { X86::ADC32rr,         X86::ADC32rm, 0 },
     { X86::ADC64rr,         X86::ADC64rm, 0 },
     { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD16rr_DB,      X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
     { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD32rr_DB,      X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
     { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD64rr_DB,      X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
     { X86::ADD8rr,          X86::ADD8rm, 0 },
     { X86::ADDPDrr,         X86::ADDPDrm, 16 },
     { X86::ADDPSrr,         X86::ADDPSrm, 16 },
@@ -652,20 +676,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
     unsigned RegOp = OpTbl2[i][0];
-    unsigned MemOp = OpTbl2[i][1];
+    unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
     unsigned Align = OpTbl2[i][2];
-    if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+
+    assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+    RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
     // Index 2, folded load
     unsigned AuxInfo = 2 | (1 << 4);
-    if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                   std::make_pair(RegOp, AuxInfo))).second)
-      AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+           "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
   }
-
-  // Remove ambiguous entries.
-  assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
 }
 
 bool
@@ -745,9 +772,7 @@ static bool isFrameLoadOpcode(int Opcode) {
   case X86::MOV8rm:
   case X86::MOV16rm:
   case X86::MOV32rm:
-  case X86::MOV32rm_TC:
   case X86::MOV64rm:
-  case X86::MOV64rm_TC:
   case X86::LD_Fp64m:
   case X86::MOVSSrm:
   case X86::MOVSDrm:
@@ -768,9 +793,7 @@ static bool isFrameStoreOpcode(int Opcode) {
   case X86::MOV8mr:
   case X86::MOV16mr:
   case X86::MOV32mr:
-  case X86::MOV32mr_TC:
   case X86::MOV64mr:
-  case X86::MOV64mr_TC:
   case X86::ST_FpP64m:
   case X86::MOVSSmr:
   case X86::MOVSDmr:
@@ -785,7 +808,7 @@ static bool isFrameStoreOpcode(int Opcode) {
   return false;
 }
 
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
   if (isFrameLoadOpcode(MI->getOpcode()))
     if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
@@ -793,7 +816,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
                                                  int &FrameIndex) const {
   if (isFrameLoadOpcode(MI->getOpcode())) {
     unsigned Reg;
@@ -923,10 +946,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
           isPICBase = true;
         }
         return isPICBase;
-      } 
+      }
       return false;
     }
- 
+
      case X86::LEA32r:
      case X86::LEA64r: {
        if (MI->getOperand(2).isImm() &&
@@ -1099,11 +1122,11 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
     ? X86::LEA64_32r : X86::LEA32r;
   MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
   unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-            
+
   // Build and insert into an implicit UNDEF value. This is OK because
-  // well be shifting and then extracting the lower 16-bits. 
+  // well be shifting and then extracting the lower 16-bits.
   // This has the potential to cause partial register stall. e.g.
   //   movw    (%rbp,%rcx,2), %dx
   //   leal    -65(%rdx), %esi
@@ -1137,9 +1160,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
     break;
   case X86::ADD16ri:
   case X86::ADD16ri8:
-    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());    
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
+    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
     break;
-  case X86::ADD16rr: {
+  case X86::ADD16rr:
+  case X86::ADD16rr_DB: {
     unsigned Src2 = MI->getOperand(2).getReg();
     bool isKill2 = MI->getOperand(2).isKill();
     unsigned leaInReg2 = 0;
@@ -1149,9 +1175,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
       // just a single insert_subreg.
       addRegReg(MIB, leaInReg, true, leaInReg, false);
     } else {
-      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
       // Build and insert into an implicit UNDEF value. This is OK because
-      // well be shifting and then extracting the lower 16-bits. 
+      // well be shifting and then extracting the lower 16-bits.
       BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
       InsMI2 =
         BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
@@ -1218,7 +1244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   case X86::SHUFPSrri: {
     assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
     if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
-    
+
     unsigned B = MI->getOperand(1).getReg();
     unsigned C = MI->getOperand(2).getReg();
     if (B != C) return 0;
@@ -1236,6 +1262,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned ShAmt = MI->getOperand(2).getImm();
     if (ShAmt == 0 || ShAmt >= 4) return 0;
 
+    // LEA can't handle RSP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+      return 0;
+
     NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
       .addReg(Dest, RegState::Define | getDeadRegState(isDead))
       .addReg(0).addImm(1 << ShAmt)
@@ -1250,6 +1281,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned ShAmt = MI->getOperand(2).getImm();
     if (ShAmt == 0 || ShAmt >= 4) return 0;
 
+    // LEA can't handle ESP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+      return 0;
+
     unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
     NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
       .addReg(Dest, RegState::Define | getDeadRegState(isDead))
@@ -1288,6 +1324,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
       unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
         : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                               .addReg(Dest, RegState::Define |
                                       getDeadRegState(isDead)),
@@ -1310,6 +1354,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
       unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
         : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                               .addReg(Dest, RegState::Define |
                                       getDeadRegState(isDead)),
@@ -1327,12 +1378,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                            Src, isKill, -1);
       break;
     case X86::ADD64rr:
-    case X86::ADD32rr: {
+    case X86::ADD64rr_DB:
+    case X86::ADD32rr:
+    case X86::ADD32rr_DB: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
-        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      unsigned Opc;
+      TargetRegisterClass *RC;
+      if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+        Opc = X86::LEA64r;
+        RC = X86::GR64_NOSPRegisterClass;
+      } else {
+        Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+        RC = X86::GR32_NOSPRegisterClass;
+      }
+
+
       unsigned Src2 = MI->getOperand(2).getReg();
       bool isKill2 = MI->getOperand(2).isKill();
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+          !MF.getRegInfo().constrainRegClass(Src2, RC))
+        return 0;
+
       NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                         .addReg(Dest, RegState::Define |
                                 getDeadRegState(isDead)),
@@ -1341,7 +1409,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
         LV->replaceKillInstruction(Src2, MI, NewMI);
       break;
     }
-    case X86::ADD16rr: {
+    case X86::ADD16rr:
+    case X86::ADD16rr_DB: {
       if (DisableLEA16)
         return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1357,6 +1426,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     }
     case X86::ADD64ri32:
     case X86::ADD64ri8:
+    case X86::ADD64ri32_DB:
+    case X86::ADD64ri8_DB:
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
                               .addReg(Dest, RegState::Define |
@@ -1364,7 +1435,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                               Src, isKill, MI->getOperand(2).getImm());
       break;
     case X86::ADD32ri:
-    case X86::ADD32ri8: {
+    case X86::ADD32ri8:
+    case X86::ADD32ri_DB:
+    case X86::ADD32ri8_DB: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
       unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1375,6 +1448,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     }
     case X86::ADD16ri:
     case X86::ADD16ri8:
+    case X86::ADD16ri_DB:
+    case X86::ADD16ri8_DB:
       if (DisableLEA16)
         return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1396,7 +1471,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       LV->replaceKillInstruction(Dest, MI, NewMI);
   }
 
-  MFI->insert(MBBI, NewMI);          // Insert the new inst    
+  MFI->insert(MBBI, NewMI);          // Insert the new inst
   return NewMI;
 }
 
@@ -1617,7 +1692,7 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
 bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isTerminator()) return false;
-  
+
   // Conditional branch is a special case.
   if (TID.isBranch() && !TID.isBarrier())
     return true;
@@ -1626,7 +1701,7 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   return !isPredicated(MI);
 }
 
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                  MachineBasicBlock *&TBB,
                                  MachineBasicBlock *&FBB,
                                  SmallVectorImpl<MachineOperand> &Cond,
@@ -1787,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
     I = MBB.end();
     ++Count;
   }
-  
+
   return Count;
 }
 
@@ -1945,13 +2020,23 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
   default:
     llvm_unreachable("Unknown regclass");
   case X86::GR64RegClassID:
+  case X86::GR64_ABCDRegClassID:
+  case X86::GR64_NOREXRegClassID:
+  case X86::GR64_NOREX_NOSPRegClassID:
   case X86::GR64_NOSPRegClassID:
+  case X86::GR64_TCRegClassID:
+  case X86::GR64_TCW64RegClassID:
     return load ? X86::MOV64rm : X86::MOV64mr;
   case X86::GR32RegClassID:
-  case X86::GR32_NOSPRegClassID:
+  case X86::GR32_ABCDRegClassID:
   case X86::GR32_ADRegClassID:
+  case X86::GR32_NOREXRegClassID:
+  case X86::GR32_NOSPRegClassID:
+  case X86::GR32_TCRegClassID:
     return load ? X86::MOV32rm : X86::MOV32mr;
   case X86::GR16RegClassID:
+  case X86::GR16_ABCDRegClassID:
+  case X86::GR16_NOREXRegClassID:
     return load ? X86::MOV16rm : X86::MOV16mr;
   case X86::GR8RegClassID:
     // Copying to or from a physical H register on x86-64 requires a NOREX
@@ -1961,32 +2046,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
       return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
     else
       return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_ABCDRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32_ABCDRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16_ABCDRegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
   case X86::GR8_ABCD_LRegClassID:
+  case X86::GR8_NOREXRegClassID:
     return load ? X86::MOV8rm :X86::MOV8mr;
   case X86::GR8_ABCD_HRegClassID:
     if (TM.getSubtarget<X86Subtarget>().is64Bit())
       return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
     else
       return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_NOREXRegClassID:
-  case X86::GR64_NOREX_NOSPRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32_NOREXRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16_NOREXRegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
-  case X86::GR8_NOREXRegClassID:
-    return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_TCRegClassID:
-    return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
-  case X86::GR32_TCRegClassID:
-    return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
   case X86::RFP80RegClassID:
     return load ? X86::LD_Fp80m : X86::ST_FpP80m;
   case X86::RFP64RegClassID:
@@ -2085,76 +2152,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   NewMIs.push_back(MIB);
 }
 
-bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL = MBB.findDebugLoc(MI);
-
-  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-  bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
-  unsigned SlotSize = is64Bit ? 8 : 4;
-
-  MachineFunction &MF = *MBB.getParent();
-  unsigned FPReg = RI.getFrameRegister(MF);
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  unsigned CalleeFrameSize = 0;
-  
-  unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    if (Reg == FPReg)
-      // X86RegisterInfo::emitPrologue will handle spilling of frame register.
-      continue;
-    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
-      CalleeFrameSize += SlotSize;
-      BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
-    } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
-                          RC, &RI);
-    }
-  }
-
-  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
-  return true;
-}
-
-bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL = MBB.findDebugLoc(MI);
-
-  MachineFunction &MF = *MBB.getParent();
-  unsigned FPReg = RI.getFrameRegister(MF);
-  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-  bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
-  unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (Reg == FPReg)
-      // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
-      continue;
-    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
-      BuildMI(MBB, MI, DL, get(Opc), Reg);
-    } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
-                           RC, &RI);
-    }
-  }
-  return true;
-}
-
 MachineInstr*
 X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
                                        int FrameIx, uint64_t Offset,
@@ -2181,7 +2178,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
     MIB.addOperand(MOs[i]);
   if (NumAddrOps < 4)  // FrameIndex only
     addOffset(MIB, 0);
-  
+
   // Loop over the rest of the ri operands, converting them over.
   unsigned NumOps = MI->getDesc().getNumOperands()-2;
   for (unsigned i = 0; i != NumOps; ++i) {
@@ -2202,7 +2199,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
   MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                               MI->getDebugLoc(), true);
   MachineInstrBuilder MIB(NewMI);
-  
+
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (i == OpNo) {
@@ -2238,7 +2235,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                     MachineInstr *MI, unsigned i,
                                     const SmallVectorImpl<MachineOperand> &MOs,
                                     unsigned Size, unsigned Align) const {
-  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
   bool isTwoAddrFold = false;
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
@@ -2251,7 +2248,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   if (isTwoAddr && NumOps >= 2 && i < 2 &&
       MI->getOperand(0).isReg() &&
       MI->getOperand(1).isReg() &&
-      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 
+      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
     isTwoAddrFold = true;
   } else if (i == 0) { // If operand 0
@@ -2265,19 +2262,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
     if (NewMI)
       return NewMI;
-    
+
     OpcodeTablePtr = &RegOp2MemOpTable0;
   } else if (i == 1) {
     OpcodeTablePtr = &RegOp2MemOpTable1;
   } else if (i == 2) {
     OpcodeTablePtr = &RegOp2MemOpTable2;
   }
-  
+
   // If table selected...
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-      OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+    DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+      OpcodeTablePtr->find(MI->getOpcode());
     if (I != OpcodeTablePtr->end()) {
       unsigned Opcode = I->second.first;
       unsigned MinAlign = I->second.second;
@@ -2320,8 +2317,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       return NewMI;
     }
   }
-  
-  // No fusion 
+
+  // No fusion
   if (PrintFailedFusing && !MI->isCopy())
     dbgs() << "We failed to fuse operand " << i << " in " << *MI;
   return NULL;
@@ -2332,7 +2329,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
                                            const SmallVectorImpl<unsigned> &Ops,
                                                   int FrameIndex) const {
-  // Check switch flag 
+  // Check switch flag
   if (NoFusing) return NULL;
 
   if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2343,8 +2340,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     case X86::Int_CVTSS2SDrr:
     case X86::RCPSSr:
     case X86::RCPSSr_Int:
-    case X86::ROUNDSDr_Int:
-    case X86::ROUNDSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
     case X86::RSQRTSSr:
     case X86::RSQRTSSr_Int:
     case X86::SQRTSSr:
@@ -2384,7 +2381,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
                                            const SmallVectorImpl<unsigned> &Ops,
                                                   MachineInstr *LoadMI) const {
-  // Check switch flag 
+  // Check switch flag
   if (NoFusing) return NULL;
 
   if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2395,8 +2392,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     case X86::Int_CVTSS2SDrr:
     case X86::RCPSSr:
     case X86::RCPSSr_Int:
-    case X86::ROUNDSDr_Int:
-    case X86::ROUNDSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
     case X86::RSQRTSSr:
     case X86::RSQRTSSr_Int:
     case X86::SQRTSSr:
@@ -2424,9 +2421,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       Alignment = 16;
       break;
     case X86::FsFLD0SD:
+    case X86::VFsFLD0SD:
       Alignment = 8;
       break;
     case X86::FsFLD0SS:
+    case X86::VFsFLD0SS:
       Alignment = 4;
       break;
     default:
@@ -2490,9 +2489,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     MachineConstantPool &MCP = *MF.getConstantPool();
     const Type *Ty;
     unsigned Opc = LoadMI->getOpcode();
-    if (Opc == X86::FsFLD0SS)
+    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
       Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (Opc == X86::FsFLD0SD)
+    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
     else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
       Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -2525,13 +2524,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
 bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
                                   const SmallVectorImpl<unsigned> &Ops) const {
-  // Check switch flag 
+  // Check switch flag
   if (NoFusing) return 0;
 
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
     switch (MI->getOpcode()) {
     default: return false;
-    case X86::TEST8rr: 
+    case X86::TEST8rr:
     case X86::TEST16rr:
     case X86::TEST32rr:
     case X86::TEST64rr:
@@ -2551,16 +2550,15 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
   // replacing the *two* registers with the memory location.
-  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
-  if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
   } else if (OpNum == 0) { // If operand 0
     switch (Opc) {
     case X86::MOV8r0:
     case X86::MOV16r0:
     case X86::MOV32r0:
-    case X86::MOV64r0:
-      return true;
+    case X86::MOV64r0: return true;
     default: break;
     }
     OpcodeTablePtr = &RegOp2MemOpTable0;
@@ -2569,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   } else if (OpNum == 2) {
     OpcodeTablePtr = &RegOp2MemOpTable2;
   }
-  
-  if (OpcodeTablePtr) {
-    // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-      OpcodeTablePtr->find((unsigned*)Opc);
-    if (I != OpcodeTablePtr->end())
-      return true;
-  }
+
+  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+    return true;
   return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
 }
 
 bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                 unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(MI->getOpcode());
   if (I == MemOp2RegOpTable.end())
     return false;
   unsigned Opc = I->second.first;
@@ -2644,7 +2637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
   // Emit the data processing instruction.
   MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
   MachineInstrBuilder MIB(DataMI);
-  
+
   if (FoldedStore)
     MIB.addReg(Reg, RegState::Define);
   for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
@@ -2712,8 +2705,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   if (!N->isMachineOpcode())
     return false;
 
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(N->getMachineOpcode());
   if (I == MemOp2RegOpTable.end())
     return false;
   unsigned Opc = I->second.first;
@@ -2813,8 +2806,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
 unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
                                       bool UnfoldLoad, bool UnfoldStore,
                                       unsigned *LoadRegIndex) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)Opc);
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(Opc);
   if (I == MemOp2RegOpTable.end())
     return 0;
   bool FoldedLoad = I->second.second & (1 << 4);
@@ -2993,6 +2986,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
   case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
   case X86::YMM8:  case X86::YMM9:  case X86::YMM10: case X86::YMM11:
   case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+  case X86::CR8:   case X86::CR9:   case X86::CR10:  case X86::CR11:
+  case X86::CR12:  case X86::CR13:  case X86::CR14:  case X86::CR15:
     return true;
   }
   return false;
@@ -3090,6 +3085,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(X86::NOOP);
 }
 
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  switch (DefMI->getOpcode()) {
+  default: return false;
+  case X86::DIVSDrm:
+  case X86::DIVSDrm_Int:
+  case X86::DIVSDrr:
+  case X86::DIVSDrr_Int:
+  case X86::DIVSSrm:
+  case X86::DIVSSrm_Int:
+  case X86::DIVSSrr:
+  case X86::DIVSSrr_Int:
+  case X86::SQRTPDm:
+  case X86::SQRTPDm_Int:
+  case X86::SQRTPDr:
+  case X86::SQRTPDr_Int:
+  case X86::SQRTPSm:
+  case X86::SQRTPSm_Int:
+  case X86::SQRTPSr:
+  case X86::SQRTPSr_Int:
+  case X86::SQRTSDm:
+  case X86::SQRTSDm_Int:
+  case X86::SQRTSDr:
+  case X86::SQRTSDr_Int:
+  case X86::SQRTSSm:
+  case X86::SQRTSSm_Int:
+  case X86::SQRTSSr:
+  case X86::SQRTSSr_Int:
+    return true;
+  }
+}
+
 namespace {
   /// CGBR - Create Global Base Reg pass. This initializes the PIC
   /// global base register for x86-32.
@@ -3108,6 +3138,13 @@ namespace {
       if (TM->getRelocationModel() != Reloc::PIC_)
         return false;
 
+      X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+      // If we didn't need a GlobalBaseReg, don't insert code.
+      if (GlobalBaseReg == 0)
+        return false;
+
       // Insert the set of GlobalBaseReg into the first MBB of the function
       MachineBasicBlock &FirstMBB = MF.front();
       MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -3119,16 +3156,15 @@ namespace {
       if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
         PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
       else
-        PC = TII->getGlobalBaseReg(&MF);
-  
+        PC = GlobalBaseReg;
+
       // Operand of MovePCtoStack is completely ignored by asm printer. It's
       // only used in JIT code emission as displacement to pc.
       BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-  
+
       // If we're using vanilla 'GOT' PIC style, we should use relative addressing
       // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
       if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
-        unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
         // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
         BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
           .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f33620641e88..1d4420787273 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -174,7 +174,7 @@ namespace X86II {
     
     /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
     /// reference is actually to the "FOO$stub" symbol.  This is used for calls
-    /// and jumps to external functions on Tiger and before.
+    /// and jumps to external functions on Tiger and earlier.
     MO_DARWIN_STUB,
     
     /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
@@ -311,12 +311,17 @@ namespace X86II {
     MRM_F0 = 40,
     MRM_F8 = 41,
     MRM_F9 = 42,
+
+    /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+    /// immediates, the first of which is a 16-bit immediate (specified by
+    /// the imm encoding) and the second is a 8-bit fixed value.
+    RawFrmImm8 = 43,
     
     /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
     /// immediates, the first of which is a 16 or 32-bit immediate (specified by
     /// the imm encoding) and the second is a 16-bit fixed value.  In the AMD
     /// manual, this operand is described as pntr16:32 and pntr16:16
-    RawFrmImm16 = 43,
+    RawFrmImm16 = 44,
 
     FormMask       = 63,
 
@@ -444,28 +449,36 @@ namespace X86II {
     OpcodeMask    = 0xFF << OpcodeShift,
 
     //===------------------------------------------------------------------===//
-    // VEX - The opcode prefix used by AVX instructions
+    /// VEX - The opcode prefix used by AVX instructions
     VEX         = 1U << 0,
 
-    // VEX_W - Has a opcode specific functionality, but is used in the same
-    // way as REX_W is for regular SSE instructions.
+    /// VEX_W - Has a opcode specific functionality, but is used in the same
+    /// way as REX_W is for regular SSE instructions.
     VEX_W       = 1U << 1,
 
-    // VEX_4V - Used to specify an additional AVX/SSE register. Several 2
-    // address instructions in SSE are represented as 3 address ones in AVX
-    // and the additional register is encoded in VEX_VVVV prefix.
+    /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+    /// address instructions in SSE are represented as 3 address ones in AVX
+    /// and the additional register is encoded in VEX_VVVV prefix.
     VEX_4V      = 1U << 2,
 
-    // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
-    // must be encoded in the i8 immediate field. This usually happens in
-    // instructions with 4 operands.
+    /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+    /// must be encoded in the i8 immediate field. This usually happens in
+    /// instructions with 4 operands.
     VEX_I8IMM   = 1U << 3,
 
-    // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
-    // instruction uses 256-bit wide registers. This is usually auto detected if
-    // a VR256 register is used, but some AVX instructions also have this field
-    // marked when using a f256 memory references.
-    VEX_L       = 1U << 4
+    /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+    /// instruction uses 256-bit wide registers. This is usually auto detected
+    /// if a VR256 register is used, but some AVX instructions also have this
+    /// field marked when using a f256 memory references.
+    VEX_L       = 1U << 4,
+    
+    /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+    /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
+    /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+    /// storing a classifier in the imm8 field.  To simplify our implementation,
+    /// we handle this by storeing the classifier in the opcode field and using
+    /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+    Has3DNow0F0FOpcode = 1U << 5
   };
   
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -528,6 +541,7 @@ namespace X86II {
     case X86II::AddRegFrm:
     case X86II::MRMDestReg:
     case X86II::MRMSrcReg:
+    case X86II::RawFrmImm8:
     case X86II::RawFrmImm16:
        return -1;
     case X86II::MRMDestMem:
@@ -599,14 +613,14 @@ class X86InstrInfo : public TargetInstrInfoImpl {
   /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
   /// RegOp2MemOpTable2 - Load / store folding opcode maps.
   ///
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
   
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
-  DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+  DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
 
 public:
   explicit X86InstrInfo(X86TargetMachine &tm);
@@ -728,17 +742,6 @@ public:
                                MachineInstr::mmo_iterator MMOBegin,
                                MachineInstr::mmo_iterator MMOEnd,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                         const TargetRegisterInfo *TRI) const;
-
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                           const TargetRegisterInfo *TRI) const;
-  
   virtual
   MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
                                          int FrameIx, uint64_t Offset,
@@ -845,18 +848,23 @@ public:
   /// SetSSEDomain - Set the SSEDomain of MI.
   void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
 
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      unsigned OpNum,
+                                      const SmallVectorImpl<MachineOperand> &MOs,
+                                      unsigned Size, unsigned Alignment) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+  
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
 
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                     MachineInstr* MI,
-                                     unsigned OpNum,
-                                     const SmallVectorImpl<MachineOperand> &MOs,
-                                     unsigned Size, unsigned Alignment) const;
-
   /// isFrameOperand - Return true and the FrameIndex if the specified
   /// operand and follow operands form a reference to the stack frame.
   bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 09b7721a621d..87dc4bece742 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,10 +1,10 @@
-//===----------------------------------------------------------------------===//
-// 
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 instruction set, defining the instructions, and
@@ -35,6 +35,20 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
                                             [SDTCisSameAs<0, 2>,
                                              SDTCisSameAs<0, 3>,
                                              SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+                                            [SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>, SDTCisVT<1, i32>]>;
 def SDTX86BrCond  : SDTypeProfile<0, 3,
                                   [SDTCisVT<0, OtherVT>,
                                    SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@@ -46,7 +60,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2,
                                   [SDTCisInt<0>,
                                    SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
 
-def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, 
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
                                      SDTCisVT<2, i8>]>;
 def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
@@ -64,6 +78,12 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
                                                          SDTCisVT<1, iPTR>,
                                                          SDTCisVT<2, iPTR>]>;
 
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+                                            SDTCisPtrTy<1>,
+                                            SDTCisVT<2, i32>,
+                                            SDTCisVT<3, i8>,
+                                            SDTCisVT<4, i32>]>;
+
 def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
 
 def SDTX86Void    : SDTypeProfile<0, 0, []>;
@@ -72,9 +92,7 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
 
 def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
-def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
-def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
@@ -110,82 +128,85 @@ def X86setcc   : SDNode<"X86ISD::SETCC",    SDTX86SetCC>;
 def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
 
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
-                        [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
-                         SDNPMayLoad]>;
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
 def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
-                        [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
-                         SDNPMayLoad]>;
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
-                        [SDNPHasChain, SDNPMayStore, 
+                        [SDNPHasChain, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
-                        [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+                        [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 def X86vastart_save_xmm_regs :
                  SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
                         SDT_X86VASTART_SAVE_XMM_REGS,
                         [SDNPHasChain, SDNPVariadic]>;
-
+def X86vaarg64 :
+                 SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+                         SDNPMemOperand]>;
 def X86callseq_start :
                  SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
-                        [SDNPHasChain, SDNPOutFlag]>;
+                        [SDNPHasChain, SDNPOutGlue]>;
 def X86callseq_end :
                  SDNode<"ISD::CALLSEQ_END",   SDT_X86CallSeqEnd,
-                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;       
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
-                        [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+                        [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
                          SDNPVariadic]>;
 
 def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
-                        [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
 def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
-                        [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad]>;
 
 def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
-                        [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
+                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
 
 def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
 def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
 
 def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
-                        [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
-                                 SDT_X86SegmentBaseAddress, []>;
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
-def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, 
-                        [SDNPHasChain,  SDNPOptInFlag, SDNPVariadic]>;
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
 def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
 def X86sub_flag  : SDNode<"X86ISD::SUB",  SDTBinaryArithWithFlags>;
 def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
                           [SDNPCommutative]>;
-                          
+def X86adc_flag  : SDNode<"X86ISD::ADC",  SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag  : SDNode<"X86ISD::SBB",  SDTBinaryArithWithFlagsInOut>;
+
 def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
 def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
 def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
@@ -197,11 +218,11 @@ def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
-def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
-                            [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-                            
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
 def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
-                        []>;
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
@@ -252,6 +273,10 @@ def i8mem_NOREX : Operand<i64> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
 // Special i32mem for addresses of load folding tail calls. These are not
 // allowed to use callee-saved registers since they must be scheduled
 // after callee-saved register are popped.
@@ -261,6 +286,15 @@ def i32mem_TC : Operand<i32> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+                       ptr_rc_tailcall, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
 
 let ParserMatchClass = X86AbsMemAsmOperand,
     PrintMethod = "print_pcrel_imm" in {
@@ -332,43 +366,77 @@ def i32i8imm  : Operand<i32> {
   let ParserMatchClass = ImmSExti32i8AsmOperand;
 }
 
+// 64-bits but only 32 bits are significant.
+def i64i32imm  : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+  let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm   : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
+  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
 //
 
 // Define X86 specific addressing mode.
-def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
 def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
                                [add, sub, mul, X86mul_imm, shl, or, frameindex],
                                []>;
 def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
+                         X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
 def HasCMov      : Predicate<"Subtarget->hasCMov()">;
 def NoCMov       : Predicate<"!Subtarget->hasCMov()">;
 
-// FIXME: temporary hack to let codegen assert or generate poor code in case
-// no AVX version of the desired intructions is present, this is better for
-// incremental dev (without fallbacks it's easier to spot what's missing)
-def HasMMX       : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
-def HasSSE1      : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
-def HasSSE2      : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
-def HasSSE3      : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
-def HasSSSE3     : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
-def HasSSE41     : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
-def HasSSE42     : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
-def HasSSE4A     : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+def HasMMX       : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow     : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA    : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1      : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2      : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3      : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
 
 def HasAVX       : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt    : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES       : Predicate<"Subtarget->hasAES()">;
 def HasCLMUL     : Predicate<"Subtarget->hasCLMUL()">;
 def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
-def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
-def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
-def In32BitMode  : Predicate<"!Subtarget->is64Bit()">;
-def In64BitMode  : Predicate<"Subtarget->is64Bit()">;
+def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
+def In32BitMode  : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
+def In64BitMode  : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -383,7 +451,6 @@ def OptForSize   : Predicate<"OptForSize">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
-def HasAES       : Predicate<"Subtarget->hasAES()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
@@ -418,40 +485,24 @@ def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
 
 def i16immSExt8  : PatLeaf<(i16 immSext8)>;
 def i32immSExt8  : PatLeaf<(i32 immSext8)>;
-
-/// Load patterns: these constraint the match to the right address space.
-def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
-}]>;
-
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      return PT->getAddressSpace() == 256;
-  return false;
+def i64immSExt8  : PatLeaf<(i64 immSext8)>;
+def i64immSExt32  : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
+def i64immZExt32  : PatLeaf<(i64 imm), [{
+  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // unsignedsign extended field.
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
 }]>;
 
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      return PT->getAddressSpace() == 257;
-  return false;
+def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
+    uint64_t v = N->getZExtValue();
+    return v == (uint32_t)v && (int32_t)v == (int8_t)v;
 }]>;
 
-
 // Helper fragments for loads.
 // It's always safe to treat a anyext i16 load as a i32 load if the i16 is
 // known to be 32-bit aligned or better. Ditto for i8 to i16.
 def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD)
     return true;
@@ -462,10 +513,6 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
 
 def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::EXTLOAD)
     return LD->getAlignment() >= 2 && !LD->isVolatile();
@@ -474,10 +521,6 @@ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
 
 def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
   LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD)
     return true;
@@ -486,15 +529,18 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
   return false;
 }]>;
 
-def loadi8  : PatFrag<(ops node:$ptr), (i8  (dsload node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
+def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
 
 def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
 def sextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
 def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
 
 def zextloadi8i1   : PatFrag<(ops node:$ptr), (i8  (zextloadi1 node:$ptr))>;
 def zextloadi16i1  : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
@@ -502,6 +548,10 @@ def zextloadi32i1  : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
 def zextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
 def zextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
 def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1  : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
 
 def extloadi8i1    : PatFrag<(ops node:$ptr), (i8  (extloadi1 node:$ptr))>;
 def extloadi16i1   : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
@@ -509,6 +559,10 @@ def extloadi32i1   : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
 def extloadi16i8   : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
 def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
 def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
 
 
 // An 'and' node with a single use.
@@ -524,66 +578,10 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
   return N->hasOneUse();
 }]>;
 
-// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
-def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
-  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
-  APInt KnownZero0, KnownOne0;
-  CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
-  APInt KnownZero1, KnownOne1;
-  CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
-  return (~KnownZero0 & ~KnownZero1) == 0;
-}]>;
-
 //===----------------------------------------------------------------------===//
-// Instruction list...
+// Instruction list.
 //
 
-// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [ESP, EFLAGS], Uses = [ESP] in {
-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-                           "#ADJCALLSTACKDOWN",
-                           [(X86callseq_start timm:$amt)]>,
-                          Requires<[In32BitMode]>;
-def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
-                           "#ADJCALLSTACKUP",
-                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
-                          Requires<[In32BitMode]>;
-}
-
-// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
-def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
-                              (outs),
-                              (ins GR8:$al,
-                                   i64imm:$regsavefi, i64imm:$offset,
-                                   variable_ops),
-                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
-                              [(X86vastart_save_xmm_regs GR8:$al,
-                                                         imm:$regsavefi,
-                                                         imm:$offset)]>;
-
-// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets.  Calls
-// to _alloca is needed to probe the stack when allocating more than 4k bytes in
-// one go. Touching the stack at 4K increments is necessary to ensure that the
-// guard pages used by the OS virtual memory manager are allocated in correct
-// sequence.
-// The main point of having separate instruction are extra unmodelled effects
-// (compared to ordinary calls) like stack pointer change.
-
-let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
-  def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
-                       "# dynamic stack allocation",
-                       [(X86MingwAlloca)]>;
-}
-
 // Nop
 let neverHasSideEffects = 1 in {
   def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -593,206 +591,22 @@ let neverHasSideEffects = 1 in {
                 "nop{l}\t$zero", []>, TB;
 }
 
-// Trap
-let Uses = [EFLAGS] in {
-  def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-}
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
-              [(int_x86_int (i8 3))]>;
-// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
-              [(int_x86_int imm:$trap)]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
-
-// PIC base construction.  This expands to code that looks like this:
-//     call  $next_inst
-//     popl %destreg"
-let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
-  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
-                      "", []>;
-
-//===----------------------------------------------------------------------===//
-//  Control Flow Instructions.
-//
-
-// Return instructions.
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, FPForm = SpecialFP in {
-  def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
-                    "ret",
-                    [(X86retflag 0)]>;
-  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
-                    "ret\t$amt",
-                    [(X86retflag timm:$amt)]>;
-  def LRET   : I   <0xCB, RawFrm, (outs), (ins),
-                    "lret", []>;
-  def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
-                    "lret\t$amt", []>;
-}
-
-// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
-  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
-                        "jmp\t$dst", [(br bb:$dst)]>;
-  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
-                       "jmp\t$dst", []>;
-}
-
-// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
-  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
-    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
-    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
-                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
-  }
-}
-
-defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
-defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
-
-// FIXME: What about the CX/RCX versions of this instruction?
-let Uses = [ECX], isBranch = 1, isTerminator = 1 in
-  def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                       "jcxz\t$dst", []>;
-
-
-// Indirect branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
-                     [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
-  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
-                     [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
-                     
-  def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs), 
-                          (ins i16imm:$off, i16imm:$seg),
-                          "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
-  def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
-                          (ins i32imm:$off, i16imm:$seg),
-                          "ljmp{l}\t{$seg, $off|$off, $seg}", []>;                     
-
-  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), 
-                     "ljmp{w}\t{*}$dst", []>, OpSize;
-  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
-                     "ljmp{l}\t{*}$dst", []>;
-}
-
-
-// Loop instructions
-
-def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
-
-//===----------------------------------------------------------------------===//
-//  Call Instructions...
-//
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. ESP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [ESP] in {
-    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
-                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
-                           "call\t$dst", []>;
-    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
-                        "call\t{*}$dst", [(X86call GR32:$dst)]>;
-    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
-                        "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
-  
-    def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs), 
-                             (ins i16imm:$off, i16imm:$seg),
-                             "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
-    def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
-                             (ins i32imm:$off, i16imm:$seg),
-                             "lcall{l}\t{$seg, $off|$off, $seg}", []>;
-                             
-    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
-                        "lcall{w}\t{*}$dst", []>, OpSize;
-    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
-                        "lcall{l}\t{*}$dst", []>;
-
-    // callw for 16 bit code for the assembler.
-    let isAsmParserOnly = 1 in
-      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
-                       (outs), (ins i16imm_pcrel:$dst, variable_ops),
-                       "callw\t$dst", []>, OpSize;
-  }
 
 // Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+                 "enter\t$len, $lvl", []>;
 
-def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
-              "enter\t$len, $lvl", []>;
-
-// Tail call stuff.
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
-  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [ESP] in {
-  def TCRETURNdi : I<0, Pseudo, (outs), 
-                     (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
-                   "#TC_RETURN $dst $offset", []>;
-  def TCRETURNri : I<0, Pseudo, (outs), 
-                     (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
-                     "#TC_RETURN $dst $offset", []>;
-  let mayLoad = 1 in
-  def TCRETURNmi : I<0, Pseudo, (outs), 
-                     (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
-                     "#TC_RETURN $dst $offset", []>;
-
-  // FIXME: The should be pseudo instructions that are lowered when going to
-  // mcinst.
-  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
-                           (ins i32imm_pcrel:$dst, variable_ops),
-                 "jmp\t$dst  # TAILCALL",
-                 []>;
-  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), 
-                   "", []>;  // FIXME: Remove encoding when JIT is dead.
-  let mayLoad = 1 in
-  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
-                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
-}
-
-//===----------------------------------------------------------------------===//
-//  Miscellaneous Instructions...
-//
 let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
 def LEAVE    : I<0xC9, RawFrm,
                  (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
 
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                   "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                   "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                   "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                   "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64  : I<0xC9, RawFrm,
+                 (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
 let mayLoad = 1 in {
@@ -805,6 +619,10 @@ def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
   OpSize;
 def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
 def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+               Requires<[In32BitMode]>;
 }
 
 let mayStore = 1 in {
@@ -817,29 +635,54 @@ def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
   OpSize;
 def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
 def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
-}
-}
 
-let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), 
+def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
                       "push{l}\t$imm", []>;
-def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
                       "push{w}\t$imm", []>, OpSize;
-def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
                       "push{l}\t$imm", []>;
-}
 
-let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
-               Requires<[In32BitMode]>;
-}
-let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
 def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
 def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
                Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r   : I<0x58, AddRegFrm,
+                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r  : I<0x50, AddRegFrm,
+                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
 }
 
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+                      "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+               Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+                 Requires<[In64BitMode]>;
+
+
+
 let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
     mayLoad=1, neverHasSideEffects=1 in {
 def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
@@ -851,12 +694,16 @@ def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
                Requires<[In32BitMode]>;
 }
 
-let Uses = [EFLAGS], Constraints = "$src = $dst" in     // GR32 = bswap GR32
-  def BSWAP32r : I<0xC8, AddRegFrm,
-                   (outs GR32:$dst), (ins GR32:$src),
-                   "bswap{l}\t$dst", 
-                   [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+let Constraints = "$src = $dst" in {    // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+                 (outs GR32:$dst), (ins GR32:$src),
+                 "bswap{l}\t$dst",
+                 [(set GR32:$dst, (bswap GR32:$src))]>, TB;
 
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+                  "bswap{q}\t$dst",
+                  [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
 
 // Bit scan instructions.
 let Defs = [EFLAGS] in {
@@ -873,6 +720,12 @@ def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
 def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
@@ -887,44 +740,23 @@ def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
 def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
 } // Defs = [EFLAGS]
 
-let neverHasSideEffects = 1 in
-def LEA16r   : I<0x8D, MRMSrcMem,
-                 (outs GR16:$dst), (ins i32mem:$src),
-                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
-let isReMaterializable = 1 in
-def LEA32r   : I<0x8D, MRMSrcMem,
-                 (outs GR32:$dst), (ins i32mem:$src),
-                 "lea{l}\t{$src|$dst}, {$dst|$src}",
-                 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
-
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
-                  [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
-                  [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
-                  [(X86rep_movs i32)]>, REP;
-}
 
 // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
 let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
 def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
 def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
 def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
 }
 
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
-                  [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
-                  [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
-                  [(X86rep_stos i32)]>, REP;
-
 // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
 let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
 def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
@@ -932,91 +764,24 @@ let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
 def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
 let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
 def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
 
 def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
 def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
 def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
 
 def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
 def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
 def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
-
-let Defs = [RAX, RDX] in
-def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
-            TB;
-
-let Defs = [RAX, RCX, RDX] in
-def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
-def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
-}
-
-def SYSCALL  : I<0x05, RawFrm,
-                 (outs), (ins), "syscall", []>, TB;
-def SYSRET   : I<0x07, RawFrm,
-                 (outs), (ins), "sysret", []>, TB;
-def SYSENTER : I<0x34, RawFrm,
-                 (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT  : I<0x35, RawFrm,
-                 (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
-
-def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
 
 
 //===----------------------------------------------------------------------===//
-//  Input/Output Instructions...
+//  Move Instructions.
 //
-let Defs = [AL], Uses = [DX] in
-def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
-               "in{b}\t{%dx, %al|%AL, %DX}", []>;
-let Defs = [AX], Uses = [DX] in
-def IN16rr : I<0xED, RawFrm, (outs), (ins),
-               "in{w}\t{%dx, %ax|%AX, %DX}", []>,  OpSize;
-let Defs = [EAX], Uses = [DX] in
-def IN32rr : I<0xED, RawFrm, (outs), (ins),
-               "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
-
-let Defs = [AL] in
-def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{b}\t{$port, %al|%AL, $port}", []>;
-let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
-let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
-                  "in{l}\t{$port, %eax|%EAX, $port}", []>;
-
-let Uses = [DX, AL] in
-def OUT8rr  : I<0xEE, RawFrm, (outs), (ins),
-                "out{b}\t{%al, %dx|%DX, %AL}", []>;
-let Uses = [DX, AX] in
-def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
-                "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
-let Uses = [DX, EAX] in
-def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
-                "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
-
-let Uses = [AL] in
-def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{b}\t{%al, $port|$port, %AL}", []>;
-let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
-let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
-                   "out{l}\t{%eax, $port|$port, %EAX}", []>;
-
-def IN8  : I<0x6C, RawFrm, (outs), (ins),
-             "ins{b}", []>;
-def IN16 : I<0x6D, RawFrm, (outs), (ins),
-             "ins{w}", []>,  OpSize;
-def IN32 : I<0x6D, RawFrm, (outs), (ins),
-             "ins{l}", []>;
 
-//===----------------------------------------------------------------------===//
-//  Move Instructions...
-//
 let neverHasSideEffects = 1 in {
 def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}", []>;
@@ -1024,6 +789,8 @@ def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
                 "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
 }
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
@@ -1035,6 +802,12 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
 def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+                    "movabs{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(set GR64:$dst, i64immSExt32:$src)]>;
 }
 
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
@@ -1046,6 +819,9 @@ def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
                    [(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(store i64immSExt32:$src, addr:$dst)]>;
 
 /// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
 /// 32-bit offset from the PC.  These are only valid in x86-32 mode.
@@ -1067,24 +843,22 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
 def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
                      Requires<[In32BitMode]>;
-                      
-// Moves to and from segment registers
-def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
 
 let isCodeGenOnly = 1 in {
 def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1093,6 +867,8 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                     "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
 def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                     "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                     "mov{q}\t{$src, $dst|$dst, $src}", []>;
 }
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1105,6 +881,9 @@ def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
 def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}",
                 [(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(set GR64:$dst, (load addr:$src))]>;
 }
 
 def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
@@ -1116,24 +895,9 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
 def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}",
                 [(store GR32:$src, addr:$dst)]>;
-
-/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
-    canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}",
-                []>;
-
-let mayStore = 1 in
-def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}",
-                []>;
-}
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(store GR64:$src, addr:$dst)]>;
 
 // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
 // that they can be used for copying and storing h registers, which can't be
@@ -1154,2219 +918,6 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
 }
 
-// Moves to and from debug registers
-def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-                
-// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-//===----------------------------------------------------------------------===//
-//  Fixed-Register Multiplication and Division Instructions...
-//
-
-// Extra precision multiplication
-
-// AL is really implied by AX, but the registers in Defs must match the
-// SDNode results (i8, i32).
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
-               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
-               // This probably ought to be moved to a def : Pat<> if the
-               // syntax can be accepted.
-               [(set AL, (mul AL, GR8:$src)),
-                (implicit EFLAGS)]>;     // AL,AH = AL*GR8
-
-let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
-               "mul{w}\t$src", 
-               []>, OpSize;    // AX,DX = AX*GR16
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
-def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
-               "mul{l}\t$src",
-               []>; // EAX,EDX = EAX*GR32
-
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
-               "mul{b}\t$src",
-               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
-               // This probably ought to be moved to a def : Pat<> if the
-               // syntax can be accepted.
-               [(set AL, (mul AL, (loadi8 addr:$src))),
-                (implicit EFLAGS)]>;   // AL,AH = AL*[mem8]
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
-               "mul{w}\t$src",
-               []>, OpSize; // AX,DX = AX*[mem16]
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
-              "mul{l}\t$src",
-              []>;          // EAX,EDX = EAX*[mem32]
-}
-
-let neverHasSideEffects = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
-              // AL,AH = AL*GR8
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
-              OpSize;    // AX,DX = AX*GR16
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
-              // EAX,EDX = EAX*GR32
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l}\t$src", []>;  // EAX,EDX = EAX*[mem32]
-}
-} // neverHasSideEffects
-
-// unsigned division/remainder
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
-               "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
-               "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
-               "div{l}\t$src", []>;
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-                                                    // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
-               "div{l}\t$src", []>;
-}
-
-// Signed division/remainder.
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
-               "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
-               "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
-               "idiv{l}\t$src", []>;
-let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
-                                                    // EDX:EAX/[mem32] = EAX,EDX
-               "idiv{l}\t$src", []>;
-}
-
-//===----------------------------------------------------------------------===//
-//  Two address Instructions.
-//
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
-let Predicates = [HasCMov] in {
-let isCommutable = 1 in {
-def CMOVB16rr : I<0x42, MRMSrcReg,       // if <u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovb{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_B, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVB32rr : I<0x42, MRMSrcReg,       // if <u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovb{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_B, EFLAGS))]>,
-                   TB;
-def CMOVAE16rr: I<0x43, MRMSrcReg,       // if >=u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovae{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVAE32rr: I<0x43, MRMSrcReg,       // if >=u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovae{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB;
-def CMOVE16rr : I<0x44, MRMSrcReg,       // if ==, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmove{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_E, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVE32rr : I<0x44, MRMSrcReg,       // if ==, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmove{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_E, EFLAGS))]>,
-                   TB;
-def CMOVNE16rr: I<0x45, MRMSrcReg,       // if !=, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovne{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVNE32rr: I<0x45, MRMSrcReg,       // if !=, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovne{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB;
-def CMOVBE16rr: I<0x46, MRMSrcReg,       // if <=u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVBE32rr: I<0x46, MRMSrcReg,       // if <=u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB;
-def CMOVA16rr : I<0x47, MRMSrcReg,       // if >u, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmova{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_A, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVA32rr : I<0x47, MRMSrcReg,       // if >u, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmova{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_A, EFLAGS))]>,
-                   TB;
-def CMOVL16rr : I<0x4C, MRMSrcReg,       // if <s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovl{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_L, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVL32rr : I<0x4C, MRMSrcReg,       // if <s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovl{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_L, EFLAGS))]>,
-                   TB;
-def CMOVGE16rr: I<0x4D, MRMSrcReg,       // if >=s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovge{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVGE32rr: I<0x4D, MRMSrcReg,       // if >=s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovge{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB;
-def CMOVLE16rr: I<0x4E, MRMSrcReg,       // if <=s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovle{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVLE32rr: I<0x4E, MRMSrcReg,       // if <=s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovle{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB;
-def CMOVG16rr : I<0x4F, MRMSrcReg,       // if >s, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovg{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_G, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVG32rr : I<0x4F, MRMSrcReg,       // if >s, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovg{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_G, EFLAGS))]>,
-                   TB;
-def CMOVS16rr : I<0x48, MRMSrcReg,       // if signed, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovs{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_S, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVS32rr : I<0x48, MRMSrcReg,       // if signed, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovs{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_S, EFLAGS))]>,
-                  TB;
-def CMOVNS16rr: I<0x49, MRMSrcReg,       // if !signed, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovns{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNS32rr: I<0x49, MRMSrcReg,       // if !signed, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovns{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB;
-def CMOVP16rr : I<0x4A, MRMSrcReg,       // if parity, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovp{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_P, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVP32rr : I<0x4A, MRMSrcReg,       // if parity, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovp{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_P, EFLAGS))]>,
-                  TB;
-def CMOVNP16rr : I<0x4B, MRMSrcReg,       // if !parity, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNP32rr : I<0x4B, MRMSrcReg,       // if !parity, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB;
-def CMOVO16rr : I<0x40, MRMSrcReg,       // if overflow, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovo{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                   X86_COND_O, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVO32rr : I<0x40, MRMSrcReg,       // if overflow, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovo{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                   X86_COND_O, EFLAGS))]>,
-                  TB;
-def CMOVNO16rr : I<0x41, MRMSrcReg,       // if !overflow, GR16 = GR16
-                  (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                  "cmovno{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNO32rr : I<0x41, MRMSrcReg,       // if !overflow, GR32 = GR32
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "cmovno{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB;
-} // isCommutable = 1
-
-def CMOVB16rm : I<0x42, MRMSrcMem,       // if <u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovb{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_B, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVB32rm : I<0x42, MRMSrcMem,       // if <u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovb{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_B, EFLAGS))]>,
-                   TB;
-def CMOVAE16rm: I<0x43, MRMSrcMem,       // if >=u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovae{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVAE32rm: I<0x43, MRMSrcMem,       // if >=u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovae{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_AE, EFLAGS))]>,
-                   TB;
-def CMOVE16rm : I<0x44, MRMSrcMem,       // if ==, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmove{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_E, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVE32rm : I<0x44, MRMSrcMem,       // if ==, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmove{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_E, EFLAGS))]>,
-                   TB;
-def CMOVNE16rm: I<0x45, MRMSrcMem,       // if !=, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovne{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVNE32rm: I<0x45, MRMSrcMem,       // if !=, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovne{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_NE, EFLAGS))]>,
-                   TB;
-def CMOVBE16rm: I<0x46, MRMSrcMem,       // if <=u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVBE32rm: I<0x46, MRMSrcMem,       // if <=u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_BE, EFLAGS))]>,
-                   TB;
-def CMOVA16rm : I<0x47, MRMSrcMem,       // if >u, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmova{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_A, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVA32rm : I<0x47, MRMSrcMem,       // if >u, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmova{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_A, EFLAGS))]>,
-                   TB;
-def CMOVL16rm : I<0x4C, MRMSrcMem,       // if <s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovl{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_L, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVL32rm : I<0x4C, MRMSrcMem,       // if <s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovl{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_L, EFLAGS))]>,
-                   TB;
-def CMOVGE16rm: I<0x4D, MRMSrcMem,       // if >=s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovge{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVGE32rm: I<0x4D, MRMSrcMem,       // if >=s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovge{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_GE, EFLAGS))]>,
-                   TB;
-def CMOVLE16rm: I<0x4E, MRMSrcMem,       // if <=s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovle{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVLE32rm: I<0x4E, MRMSrcMem,       // if <=s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovle{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_LE, EFLAGS))]>,
-                   TB;
-def CMOVG16rm : I<0x4F, MRMSrcMem,       // if >s, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovg{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_G, EFLAGS))]>,
-                   TB, OpSize;
-def CMOVG32rm : I<0x4F, MRMSrcMem,       // if >s, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovg{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_G, EFLAGS))]>,
-                   TB;
-def CMOVS16rm : I<0x48, MRMSrcMem,       // if signed, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovs{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_S, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVS32rm : I<0x48, MRMSrcMem,       // if signed, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovs{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_S, EFLAGS))]>,
-                  TB;
-def CMOVNS16rm: I<0x49, MRMSrcMem,       // if !signed, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovns{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNS32rm: I<0x49, MRMSrcMem,       // if !signed, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovns{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_NS, EFLAGS))]>,
-                  TB;
-def CMOVP16rm : I<0x4A, MRMSrcMem,       // if parity, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovp{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_P, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVP32rm : I<0x4A, MRMSrcMem,       // if parity, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovp{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_P, EFLAGS))]>,
-                  TB;
-def CMOVNP16rm : I<0x4B, MRMSrcMem,       // if !parity, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNP32rm : I<0x4B, MRMSrcMem,       // if !parity, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                    X86_COND_NP, EFLAGS))]>,
-                  TB;
-def CMOVO16rm : I<0x40, MRMSrcMem,       // if overflow, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovo{w}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                   X86_COND_O, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVO32rm : I<0x40, MRMSrcMem,       // if overflow, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovo{l}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                   X86_COND_O, EFLAGS))]>,
-                  TB;
-def CMOVNO16rm : I<0x41, MRMSrcMem,       // if !overflow, GR16 = [mem16]
-                  (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                  "cmovno{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB, OpSize;
-def CMOVNO32rm : I<0x41, MRMSrcMem,       // if !overflow, GR32 = [mem32]
-                  (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                  "cmovno{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                    X86_COND_NO, EFLAGS))]>,
-                  TB;
-} // Predicates = [HasCMov]
-
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
-def CMOV_GR8 : I<0, Pseudo,
-                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
-                 "#CMOV_GR8 PSEUDO!",
-                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
-                                          imm:$cond, EFLAGS))]>;
-
-let Predicates = [NoCMov] in {
-def CMOV_GR32 : I<0, Pseudo,
-                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
-                    "#CMOV_GR32* PSEUDO!",
-                    [(set GR32:$dst,
-                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_GR16 : I<0, Pseudo,
-                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
-                    "#CMOV_GR16* PSEUDO!",
-                    [(set GR16:$dst,
-                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_RFP32 : I<0, Pseudo,
-                    (outs RFP32:$dst),
-                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
-                    "#CMOV_RFP32 PSEUDO!",
-                    [(set RFP32:$dst,
-                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-def CMOV_RFP64 : I<0, Pseudo,
-                    (outs RFP64:$dst),
-                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
-                    "#CMOV_RFP64 PSEUDO!",
-                    [(set RFP64:$dst,
-                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-def CMOV_RFP80 : I<0, Pseudo,
-                    (outs RFP80:$dst),
-                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
-                    "#CMOV_RFP80 PSEUDO!",
-                    [(set RFP80:$dst,
-                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
-                                                  EFLAGS))]>;
-} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] 
-} // Uses = [EFLAGS]
-
-
-// unary instructions
-let CodeSize = 2 in {
-let Defs = [EFLAGS] in {
-def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "neg{b}\t$dst",
-               [(set GR8:$dst, (ineg GR8:$src1)),
-                (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-               "neg{w}\t$dst",
-               [(set GR16:$dst, (ineg GR16:$src1)),
-                (implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-               "neg{l}\t$dst",
-               [(set GR32:$dst, (ineg GR32:$src1)),
-                (implicit EFLAGS)]>;
-                
-let Constraints = "" in {
-  def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
-                 "neg{b}\t$dst",
-                 [(store (ineg (loadi8 addr:$dst)), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
-                 "neg{w}\t$dst",
-                 [(store (ineg (loadi16 addr:$dst)), addr:$dst),
-                  (implicit EFLAGS)]>, OpSize;
-  def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
-                 "neg{l}\t$dst",
-                 [(store (ineg (loadi32 addr:$dst)), addr:$dst),
-                  (implicit EFLAGS)]>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Match xor -1 to not. Favors these over a move imm + xor to save code size.
-let AddedComplexity = 15 in {
-def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "not{b}\t$dst",
-               [(set GR8:$dst, (not GR8:$src1))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-               "not{w}\t$dst",
-               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-               "not{l}\t$dst",
-               [(set GR32:$dst, (not GR32:$src1))]>;
-}
-let Constraints = "" in {
-  def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
-                 "not{b}\t$dst",
-                 [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
-  def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
-                 "not{w}\t$dst",
-                 [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
-  def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
-                 "not{l}\t$dst",
-                 [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-} // Constraints = ""
-} // CodeSize
-
-// TODO: inc/dec is slow for P4, but fast for Pentium-M.
-let Defs = [EFLAGS] in {
-let CodeSize = 2 in
-def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "inc{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
-
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
-               "inc{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
-             OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
-               "inc{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
-             Requires<[In32BitMode]>;
-}
-let Constraints = "", CodeSize = 2 in {
-  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
-               [(store (add (loadi8 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>;
-  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
-               [(store (add (loadi16 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>,
-               OpSize, Requires<[In32BitMode]>;
-  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
-               [(store (add (loadi32 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>,
-               Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-
-let CodeSize = 2 in
-def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-               "dec{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
-               "dec{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
-             OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
-               "dec{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
-             Requires<[In32BitMode]>;
-} // CodeSize = 2
-
-let Constraints = "", CodeSize = 2 in {
-  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
-               [(store (add (loadi8 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>;
-  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
-               [(store (add (loadi16 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>,
-               OpSize, Requires<[In32BitMode]>;
-  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
-               [(store (add (loadi32 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>,
-               Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-} // Defs = [EFLAGS]
-
-// Logical operators...
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {   // X = AND Y, Z   --> X = AND Z, Y
-def AND8rr  : I<0x20, MRMDestReg,
-               (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-               "and{b}\t{$src2, $dst|$dst, $src2}",
-               [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
-def AND16rr : I<0x21, MRMDestReg,
-                (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                "and{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                      GR16:$src2))]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg, 
-                (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                "and{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                      GR32:$src2))]>;
-}
-
-// AND instructions with the destination register in REG and the source register
-//   in R/M.  Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                  "and{b}\t{$src2, $dst|$dst, $src2}", []>;
-def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                   "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                   "and{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def AND8rm   : I<0x22, MRMSrcMem, 
-                 (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
-                 "and{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
-                                                     (loadi8 addr:$src2)))]>;
-def AND16rm  : I<0x23, MRMSrcMem, 
-                 (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-                 "and{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                      (loadi16 addr:$src2)))]>,
-               OpSize;
-def AND32rm  : I<0x23, MRMSrcMem,
-                 (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-                 "and{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                      (loadi32 addr:$src2)))]>;
-
-def AND8ri   : Ii8<0x80, MRM4r, 
-                   (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
-                   "and{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
-                                                        imm:$src2))]>;
-def AND16ri  : Ii16<0x81, MRM4r, 
-                    (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                    "and{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                          imm:$src2))]>, OpSize;
-def AND32ri  : Ii32<0x81, MRM4r, 
-                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                    "and{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                          imm:$src2))]>;
-def AND16ri8 : Ii8<0x83, MRM4r, 
-                   (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "and{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
-                                                         i16immSExt8:$src2))]>,
-                   OpSize;
-def AND32ri8 : Ii8<0x83, MRM4r, 
-                   (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "and{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
-                                                         i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  def AND8mr   : I<0x20, MRMDestMem,
-                   (outs), (ins i8mem :$dst, GR8 :$src),
-                   "and{b}\t{$src, $dst|$dst, $src}",
-                   [(store (and (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def AND16mr  : I<0x21, MRMDestMem,
-                   (outs), (ins i16mem:$dst, GR16:$src),
-                   "and{w}\t{$src, $dst|$dst, $src}",
-                   [(store (and (load addr:$dst), GR16:$src), addr:$dst),
-                    (implicit EFLAGS)]>,
-                   OpSize;
-  def AND32mr  : I<0x21, MRMDestMem,
-                   (outs), (ins i32mem:$dst, GR32:$src),
-                   "and{l}\t{$src, $dst|$dst, $src}",
-                   [(store (and (load addr:$dst), GR32:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def AND8mi   : Ii8<0x80, MRM4m,
-                     (outs), (ins i8mem :$dst, i8imm :$src),
-                     "and{b}\t{$src, $dst|$dst, $src}",
-                      [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
-                       (implicit EFLAGS)]>;
-  def AND16mi  : Ii16<0x81, MRM4m,
-                      (outs), (ins i16mem:$dst, i16imm:$src),
-                      "and{w}\t{$src, $dst|$dst, $src}",
-                      [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
-                       (implicit EFLAGS)]>,
-                      OpSize;
-  def AND32mi  : Ii32<0x81, MRM4m,
-                      (outs), (ins i32mem:$dst, i32imm:$src),
-                      "and{l}\t{$src, $dst|$dst, $src}",
-                      [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
-                       (implicit EFLAGS)]>;
-  def AND16mi8 : Ii8<0x83, MRM4m,
-                     (outs), (ins i16mem:$dst, i16i8imm :$src),
-                     "and{w}\t{$src, $dst|$dst, $src}",
-                [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
-                 (implicit EFLAGS)]>,
-                     OpSize;
-  def AND32mi8 : Ii8<0x83, MRM4m,
-                     (outs), (ins i32mem:$dst, i32i8imm :$src),
-                     "and{l}\t{$src, $dst|$dst, $src}",
-                [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
-                 (implicit EFLAGS)]>;
-
-  def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
-                   "and{b}\t{$src, %al|%al, $src}", []>;
-  def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
-                      "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
-                      "and{l}\t{$src, %eax|%eax, $src}", []>;
-
-} // Constraints = ""
-
-
-let isCommutable = 1 in {   // X = OR Y, Z   --> X = OR Z, Y
-def OR8rr    : I<0x08, MRMDestReg, (outs GR8 :$dst), 
-                 (ins GR8 :$src1, GR8 :$src2),
-                 "or{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
-def OR16rr   : I<0x09, MRMDestReg, (outs GR16:$dst), 
-                 (ins GR16:$src1, GR16:$src2),
-                 "or{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
-               OpSize;
-def OR32rr   : I<0x09, MRMDestReg, (outs GR32:$dst), 
-                 (ins GR32:$src1, GR32:$src2),
-                 "or{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
-}
-
-// OR instructions with the destination register in REG and the source register
-//   in R/M.  Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                  "or{b}\t{$src2, $dst|$dst, $src2}", []>;
-def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
-                   (ins GR16:$src1, GR16:$src2),
-                   "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), 
-                   (ins GR32:$src1, GR32:$src2),
-                   "or{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-                  
-def OR8rm    : I<0x0A, MRMSrcMem, (outs GR8 :$dst), 
-                 (ins GR8 :$src1, i8mem :$src2),
-                 "or{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
-                                                    (load addr:$src2)))]>;
-def OR16rm   : I<0x0B, MRMSrcMem, (outs GR16:$dst), 
-                 (ins GR16:$src1, i16mem:$src2),
-                 "or{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
-                                                     (load addr:$src2)))]>,
-               OpSize;
-def OR32rm   : I<0x0B, MRMSrcMem, (outs GR32:$dst), 
-                 (ins GR32:$src1, i32mem:$src2),
-                 "or{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
-                                                     (load addr:$src2)))]>;
-
-def OR8ri    : Ii8 <0x80, MRM1r, (outs GR8 :$dst), 
-                    (ins GR8 :$src1, i8imm:$src2),
-                    "or{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
-def OR16ri   : Ii16<0x81, MRM1r, (outs GR16:$dst), 
-                    (ins GR16:$src1, i16imm:$src2),
-                    "or{w}\t{$src2, $dst|$dst, $src2}", 
-                    [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
-                                                        imm:$src2))]>, OpSize;
-def OR32ri   : Ii32<0x81, MRM1r, (outs GR32:$dst), 
-                    (ins GR32:$src1, i32imm:$src2),
-                    "or{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
-                                                         imm:$src2))]>;
-
-def OR16ri8  : Ii8<0x83, MRM1r, (outs GR16:$dst), 
-                   (ins GR16:$src1, i16i8imm:$src2),
-                   "or{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
-                                                i16immSExt8:$src2))]>, OpSize;
-def OR32ri8  : Ii8<0x83, MRM1r, (outs GR32:$dst), 
-                   (ins GR32:$src1, i32i8imm:$src2),
-                   "or{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
-                                                        i32immSExt8:$src2))]>;
-let Constraints = "" in {
-  def OR8mr  : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
-                 "or{b}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), GR8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                 "or{w}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), GR16:$src), addr:$dst),
-                  (implicit EFLAGS)]>, OpSize;
-  def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 "or{l}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), GR32:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR8mi    : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
-                 "or{b}\t{$src, $dst|$dst, $src}",
-                 [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR16mi   : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
-                 "or{w}\t{$src, $dst|$dst, $src}",
-                 [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
-                  (implicit EFLAGS)]>,
-                 OpSize;
-  def OR32mi   : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
-                 "or{l}\t{$src, $dst|$dst, $src}",
-                 [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-  def OR16mi8  : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
-                 "or{w}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>,
-                     OpSize;
-  def OR32mi8  : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
-                 "or{l}\t{$src, $dst|$dst, $src}",
-                 [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-                  
-  def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
-                   "or{b}\t{$src, %al|%al, $src}", []>;
-  def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
-                      "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
-                      "or{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
-  def XOR8rr   : I<0x30, MRMDestReg,
-                   (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
-                   "xor{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
-                                                        GR8:$src2))]>;
-  def XOR16rr  : I<0x31, MRMDestReg, 
-                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), 
-                   "xor{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                         GR16:$src2))]>, OpSize;
-  def XOR32rr  : I<0x31, MRMDestReg, 
-                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), 
-                   "xor{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                         GR32:$src2))]>;
-} // isCommutable = 1
-
-// XOR instructions with the destination register in REG and the source register
-//   in R/M.  Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                  "xor{b}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                   "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                   "xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def XOR8rm   : I<0x32, MRMSrcMem, 
-                 (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), 
-                 "xor{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
-                                                      (load addr:$src2)))]>;
-def XOR16rm  : I<0x33, MRMSrcMem, 
-                 (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), 
-                 "xor{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                       (load addr:$src2)))]>,
-                 OpSize;
-def XOR32rm  : I<0x33, MRMSrcMem, 
-                 (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), 
-                 "xor{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                       (load addr:$src2)))]>;
-
-def XOR8ri  : Ii8<0x80, MRM6r, 
-                  (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), 
-                  "xor{b}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
-def XOR16ri : Ii16<0x81, MRM6r, 
-                   (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), 
-                   "xor{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                         imm:$src2))]>, OpSize;
-def XOR32ri  : Ii32<0x81, MRM6r, 
-                    (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), 
-                    "xor{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                          imm:$src2))]>;
-def XOR16ri8 : Ii8<0x83, MRM6r, 
-                   (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                   "xor{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
-                                                         i16immSExt8:$src2))]>,
-                   OpSize;
-def XOR32ri8 : Ii8<0x83, MRM6r, 
-                   (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                   "xor{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
-                                                         i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  def XOR8mr   : I<0x30, MRMDestMem,
-                   (outs), (ins i8mem :$dst, GR8 :$src),
-                   "xor{b}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def XOR16mr  : I<0x31, MRMDestMem,
-                   (outs), (ins i16mem:$dst, GR16:$src),
-                   "xor{w}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
-                    (implicit EFLAGS)]>,
-                   OpSize;
-  def XOR32mr  : I<0x31, MRMDestMem,
-                   (outs), (ins i32mem:$dst, GR32:$src),
-                   "xor{l}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def XOR8mi   : Ii8<0x80, MRM6m,
-                     (outs), (ins i8mem :$dst, i8imm :$src),
-                     "xor{b}\t{$src, $dst|$dst, $src}",
-                    [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
-                     (implicit EFLAGS)]>;
-  def XOR16mi  : Ii16<0x81, MRM6m,
-                      (outs), (ins i16mem:$dst, i16imm:$src),
-                      "xor{w}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
-                    (implicit EFLAGS)]>,
-                      OpSize;
-  def XOR32mi  : Ii32<0x81, MRM6m,
-                      (outs), (ins i32mem:$dst, i32imm:$src),
-                      "xor{l}\t{$src, $dst|$dst, $src}",
-                   [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def XOR16mi8 : Ii8<0x83, MRM6m,
-                     (outs), (ins i16mem:$dst, i16i8imm :$src),
-                     "xor{w}\t{$src, $dst|$dst, $src}",
-                 [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>,
-                     OpSize;
-  def XOR32mi8 : Ii8<0x83, MRM6m,
-                     (outs), (ins i32mem:$dst, i32i8imm :$src),
-                     "xor{l}\t{$src, $dst|$dst, $src}",
-                 [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
-                  (implicit EFLAGS)]>;
-                  
-  def XOR8i8   : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
-                      "xor{b}\t{$src, %al|%al, $src}", []>;
-  def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
-                      "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
-                      "xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Shift instructions
-let Defs = [EFLAGS] in {
-let Uses = [CL] in {
-def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "shl{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shl{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shl{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
-} // Uses = [CL]
-
-def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "shl{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
-                   
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "shl{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "shl{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-
-def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
-                 "shl{b}\t$dst", []>;
-def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shl{w}\t$dst", []>, OpSize;
-def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shl{l}\t$dst", []>;
-
-} // isConvertibleToThreeAddress = 1
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "shl{b}\t{$src, $dst|$dst, $src}",
-                  [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "shl{w}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "shl{l}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Shift by 1
-  def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t$dst",
-                  [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t$dst",
-                 [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t$dst",
-                 [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "shr{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shr{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shr{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
-}
-
-def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                   "shr{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
-def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "shr{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "shr{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
-                 "shr{b}\t$dst",
-                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
-def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shr{w}\t$dst",
-                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
-def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shr{l}\t$dst",
-                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
-                   OpSize;
-  def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "shr{b}\t{$src, $dst|$dst, $src}",
-                  [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "shr{w}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "shr{l}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Shift by 1
-  def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t$dst",
-                  [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t$dst",
-                 [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
-  def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t$dst",
-                 [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "sar{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
-                 "sar{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
-                 "sar{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
-}
-
-def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "sar{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
-def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "sar{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
-                   OpSize;
-def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "sar{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "sar{b}\t$dst",
-                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
-def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
-                 "sar{w}\t$dst",
-                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
-def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
-                 "sar{l}\t$dst",
-                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
-                   "sar{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "sar{b}\t{$src, $dst|$dst, $src}",
-                  [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "sar{w}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "sar{l}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Shift by 1
-  def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t$dst",
-                  [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t$dst",
-                 [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
-                   "sar{l}\t$dst",
-                 [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-// Rotate instructions
-
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
-               "rcl{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-  
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-                "rcl{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-                  
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
-               "rcr{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-  
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-                "rcr{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
-                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Constraints = "" in {
-def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
-               "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
-                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
-                "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
-               "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
-                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
-                "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
-                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = ""
-
-// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
-def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "rol{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rol{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rol{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
-}
-
-def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "rol{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
-def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "rol{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, 
-                   OpSize;
-def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "rol{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "rol{b}\t$dst",
-                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
-def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rol{w}\t$dst",
-                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
-def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rol{l}\t$dst",
-                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "rol{b}\t{$src, $dst|$dst, $src}",
-                 [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "rol{w}\t{$src, $dst|$dst, $src}",
-                [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "rol{l}\t{$src, $dst|$dst, $src}",
-                [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Rotate by 1
-  def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t$dst",
-                 [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t$dst",
-                [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t$dst",
-                [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "ror{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
-                 "ror{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
-                 "ror{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
-}
-
-def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
-                   "ror{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
-def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
-                   "ror{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, 
-                   OpSize;
-def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
-                   "ror{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
-                 "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
-def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
-                 "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
-def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
-                 "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
-  def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
-  def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
-                   "ror{l}\t{%cl, $dst|$dst, CL}",
-                   [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
-  }
-  def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
-                     "ror{b}\t{$src, $dst|$dst, $src}",
-                 [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-  def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
-                     "ror{w}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
-                     OpSize;
-  def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
-                     "ror{l}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
-  // Rotate by 1
-  def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t$dst",
-                 [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
-  def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t$dst",
-                [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
-                     OpSize;
-  def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
-                   "ror{l}\t$dst",
-                [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-
-// Double shift instructions (generalizations of rotate)
-let Uses = [CL] in {
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), 
-                   (ins GR32:$src1, GR32:$src2),
-                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
-                   (ins GR32:$src1, GR32:$src2),
-                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
-                   (ins GR16:$src1, GR16:$src2),
-                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
-                   TB, OpSize;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), 
-                   (ins GR16:$src1, GR16:$src2),
-                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
-                   TB, OpSize;
-}
-
-let isCommutable = 1 in {  // These instructions commute to each other.
-def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
-                     (outs GR32:$dst), 
-                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
-                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
-                                      (i8 imm:$src3)))]>,
-                 TB;
-def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
-                     (outs GR32:$dst), 
-                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
-                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
-                                      (i8 imm:$src3)))]>,
-                 TB;
-def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
-                     (outs GR16:$dst), 
-                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
-                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
-                                      (i8 imm:$src3)))]>,
-                     TB, OpSize;
-def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
-                     (outs GR16:$dst), 
-                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
-                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
-                                      (i8 imm:$src3)))]>,
-                     TB, OpSize;
-}
-
-let Constraints = "" in {
-  let Uses = [CL] in {
-  def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                     [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
-                       addr:$dst)]>, TB;
-  def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                    [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
-                      addr:$dst)]>, TB;
-  }
-  def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
-                      (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
-                      "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
-                                        (i8 imm:$src3)), addr:$dst)]>,
-                      TB;
-  def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
-                       (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
-                       "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                       [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
-                                         (i8 imm:$src3)), addr:$dst)]>,
-                       TB;
-
-  let Uses = [CL] in {
-  def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                     [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
-                       addr:$dst)]>, TB, OpSize;
-  def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                    [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
-                      addr:$dst)]>, TB, OpSize;
-  }
-  def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
-                      (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
-                      "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
-                                        (i8 imm:$src3)), addr:$dst)]>,
-                      TB, OpSize;
-  def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
-                       (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
-                       "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
-                                        (i8 imm:$src3)), addr:$dst)]>,
-                       TB, OpSize;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-
-// Arithmetic.
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {   // X = ADD Y, Z   --> X = ADD Z, Y
-// Register-Register Addition
-def ADD8rr    : I<0x00, MRMDestReg, (outs GR8 :$dst),
-                                    (ins GR8 :$src1, GR8 :$src2),
-                  "add{b}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-// Register-Register Addition
-def ADD16rr  : I<0x01, MRMDestReg, (outs GR16:$dst),
-                                   (ins GR16:$src1, GR16:$src2),
-                 "add{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
-                                                       GR16:$src2))]>, OpSize;
-def ADD32rr  : I<0x01, MRMDestReg, (outs GR32:$dst),
-                                   (ins GR32:$src1, GR32:$src2),
-                 "add{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
-                                                       GR32:$src2))]>;
-} // end isConvertibleToThreeAddress
-} // end isCommutable
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                   "add{b}\t{$src2, $dst|$dst, $src2}", []>;
-  def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
-                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-  def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2),
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Addition
-def ADD8rm   : I<0x02, MRMSrcMem, (outs GR8 :$dst),
-                                  (ins GR8 :$src1, i8mem :$src2),
-                 "add{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
-                                                      (load addr:$src2)))]>;
-def ADD16rm  : I<0x03, MRMSrcMem, (outs GR16:$dst),
-                                  (ins GR16:$src1, i16mem:$src2),
-                 "add{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
-                                                  (load addr:$src2)))]>, OpSize;
-def ADD32rm  : I<0x03, MRMSrcMem, (outs GR32:$dst),
-                                  (ins GR32:$src1, i32mem:$src2),
-                 "add{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
-                                                       (load addr:$src2)))]>;
-                  
-// Register-Integer Addition
-def ADD8ri    : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "add{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, EFLAGS,
-                          (X86add_flag GR8:$src1, imm:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-// Register-Integer Addition
-def ADD16ri  : Ii16<0x81, MRM0r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "add{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, EFLAGS,
-                          (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
-def ADD32ri  : Ii32<0x81, MRM0r, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "add{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS, 
-                          (X86add_flag GR32:$src1, imm:$src2))]>;
-def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "add{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS,
-                         (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "add{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS,
-                         (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
-}
-
-let Constraints = "" in {
-  // Memory-Register Addition
-  def ADD8mr   : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                   "add{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "add{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
-                    (implicit EFLAGS)]>, OpSize;
-  def ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                   "add{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
-                     "add{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                      "add{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
-                   (implicit EFLAGS)]>, OpSize;
-  def ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                      "add{l}\t{$src2, $dst|$dst, $src2}",
-                      [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
-                       (implicit EFLAGS)]>;
-  def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                     "add{w}\t{$src2, $dst|$dst, $src2}",
-                     [(store (add (load addr:$dst), i16immSExt8:$src2),
-                                  addr:$dst),
-                      (implicit EFLAGS)]>, OpSize;
-  def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "add{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (add (load addr:$dst), i32immSExt8:$src2),
-                               addr:$dst),
-                   (implicit EFLAGS)]>;
-
-  // addition to rAX
-  def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
-                   "add{b}\t{$src, %al|%al, $src}", []>;
-  def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
-                      "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
-                      "add{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-let isCommutable = 1 in {  // X = ADC Y, Z --> X = ADC Z, Y
-def ADC8rr   : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                 "adc{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
-def ADC16rr  : I<0x11, MRMDestReg, (outs GR16:$dst),
-                                   (ins GR16:$src1, GR16:$src2),
-                 "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
-def ADC32rr  : I<0x11, MRMDestReg, (outs GR32:$dst),
-                                   (ins GR32:$src1, GR32:$src2),
-                 "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
-}
-
-let isCodeGenOnly = 1 in {
-def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                 "adc{b}\t{$src2, $dst|$dst, $src2}", []>;
-def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                    "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                    "adc{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC8rm   : I<0x12, MRMSrcMem , (outs GR8:$dst), 
-                                   (ins GR8:$src1, i8mem:$src2),
-                 "adc{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
-def ADC16rm  : I<0x13, MRMSrcMem , (outs GR16:$dst),
-                                   (ins GR16:$src1, i16mem:$src2),
-                 "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
-                 OpSize;
-def ADC32rm  : I<0x13, MRMSrcMem , (outs GR32:$dst),
-                                   (ins GR32:$src1, i32mem:$src2),
-                 "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
-def ADC8ri   : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "adc{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
-def ADC16ri  : Ii16<0x81, MRM2r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
-def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "adc{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
-                 OpSize;
-def ADC32ri  : Ii32<0x81, MRM2r, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
-def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "adc{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  def ADC8mr   : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                   "adc{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
-  def ADC16mr  : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "adc{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
-                   OpSize;
-  def ADC32mr  : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                   "adc{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
-  def ADC8mi   : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
-                      "adc{b}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
-  def ADC16mi  : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                      "adc{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
-                  OpSize;
-  def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                     "adc{w}\t{$src2, $dst|$dst, $src2}",
-               [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
-               OpSize;
-  def ADC32mi  : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                      "adc{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
-  def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "adc{l}\t{$src2, $dst|$dst, $src2}",
-               [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
-  def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
-                   "adc{b}\t{$src, %al|%al, $src}", []>;
-  def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
-                      "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
-                      "adc{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Uses = [EFLAGS]
-
-// Register-Register Subtraction
-def SUB8rr  : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                "sub{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS,
-                      (X86sub_flag GR8:$src1, GR8:$src2))]>;
-def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
-                "sub{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS,
-                      (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
-def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
-                "sub{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS,
-                      (X86sub_flag GR32:$src1, GR32:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                   "sub{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB8rm  : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
-                                 (ins GR8 :$src1, i8mem :$src2),
-                "sub{b}\t{$src2, $dst|$dst, $src2}",
-                [(set GR8:$dst, EFLAGS,
-                      (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
-def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16mem:$src2),
-                "sub{w}\t{$src2, $dst|$dst, $src2}",
-                [(set GR16:$dst, EFLAGS,
-                      (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
-def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32mem:$src2),
-                "sub{l}\t{$src2, $dst|$dst, $src2}",
-                [(set GR32:$dst, EFLAGS,
-                      (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB8ri   : Ii8 <0x80, MRM5r, (outs GR8:$dst),
-                                 (ins GR8:$src1, i8imm:$src2),
-                    "sub{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, EFLAGS,
-                          (X86sub_flag GR8:$src1, imm:$src2))]>;
-def SUB16ri  : Ii16<0x81, MRM5r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "sub{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, EFLAGS,
-                          (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
-def SUB32ri  : Ii32<0x81, MRM5r, (outs GR32:$dst),
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "sub{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, EFLAGS,
-                          (X86sub_flag GR32:$src1, imm:$src2))]>;
-def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, EFLAGS,
-                         (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS,
-                         (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
-  // Memory-Register Subtraction
-  def SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
-                   "sub{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-  def SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                   "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
-                    (implicit EFLAGS)]>, OpSize;
-  def SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                   "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
-                    (implicit EFLAGS)]>;
-
-  // Memory-Integer Subtraction
-  def SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
-                     "sub{b}\t{$src2, $dst|$dst, $src2}",
-                     [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
-                      (implicit EFLAGS)]>;
-  def SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                      "sub{w}\t{$src2, $dst|$dst, $src2}",
-                      [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
-                       (implicit EFLAGS)]>, OpSize;
-  def SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                      "sub{l}\t{$src2, $dst|$dst, $src2}",
-                      [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
-                       (implicit EFLAGS)]>;
-  def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
-                     "sub{w}\t{$src2, $dst|$dst, $src2}",
-                     [(store (sub (load addr:$dst), i16immSExt8:$src2),
-                             addr:$dst),
-                      (implicit EFLAGS)]>, OpSize;
-  def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                     "sub{l}\t{$src2, $dst|$dst, $src2}",
-                     [(store (sub (load addr:$dst), i32immSExt8:$src2),
-                             addr:$dst),
-                      (implicit EFLAGS)]>;
-                      
-  def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
-                   "sub{b}\t{$src, %al|%al, $src}", []>;
-  def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
-                      "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
-                      "sub{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-def SBB8rr     : I<0x18, MRMDestReg, (outs GR8:$dst),
-                                     (ins GR8:$src1, GR8:$src2),
-                  "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
-def SBB16rr    : I<0x19, MRMDestReg, (outs GR16:$dst),
-                                     (ins GR16:$src1, GR16:$src2),
-                  "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
-def SBB32rr    : I<0x19, MRMDestReg, (outs GR32:$dst),
-                                      (ins GR32:$src1, GR32:$src2),
-                  "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-
-let Constraints = "" in {
-  def SBB8mr   : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), 
-                   "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
-  def SBB16mr  : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), 
-                   "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
-                   OpSize;
-  def SBB32mr  : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
-  def SBB8mi  : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
-                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
-  def SBB16mi  : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                      "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
-                  OpSize;
-  def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
-                     "sbb{w}\t{$src2, $dst|$dst, $src2}",
-               [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
-               OpSize;
-  def SBB32mi  : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                      "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
-  def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
-                     "sbb{l}\t{$src2, $dst|$dst, $src2}",
-               [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-               
-  def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
-                   "sbb{b}\t{$src, %al|%al, $src}", []>;
-  def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
-                      "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-  def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
-                      "sbb{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let isCodeGenOnly = 1 in {
-def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
-                   "sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), 
-                    (ins GR16:$src1, GR16:$src2),
-                    "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), 
-                    (ins GR32:$src1, GR32:$src2),
-                    "sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB8rm   : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
-                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
-def SBB16rm  : I<0x1B, MRMSrcMem, (outs GR16:$dst),
-                                  (ins GR16:$src1, i16mem:$src2),
-                    "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
-                    OpSize;
-def SBB32rm  : I<0x1B, MRMSrcMem, (outs GR32:$dst),
-                                  (ins GR32:$src1, i32mem:$src2),
-                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
-def SBB8ri   : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
-                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
-def SBB16ri  : Ii16<0x81, MRM3r, (outs GR16:$dst),
-                                 (ins GR16:$src1, i16imm:$src2),
-                    "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
-def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
-                                (ins GR16:$src1, i16i8imm:$src2),
-                   "sbb{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
-                   OpSize;
-def SBB32ri  : Ii32<0x81, MRM3r, (outs GR32:$dst), 
-                                 (ins GR32:$src1, i32imm:$src2),
-                    "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
-def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
-                                (ins GR32:$src1, i32i8imm:$src2),
-                   "sbb{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
-// Register-Register Signed Integer Multiply
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
-                 "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS,
-                       (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
-                 "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS,
-                       (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
-}
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
-                                  (ins GR16:$src1, i16mem:$src2),
-                 "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, EFLAGS,
-                       (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
-               TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
-                 (ins GR32:$src1, i32mem:$src2),
-                 "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, EFLAGS,
-                       (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
-} // Defs = [EFLAGS]
-} // end Two Address instructions
-
-// Suprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
-// Register-Integer Signed Integer Multiply
-def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
-                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS, 
-                            (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
-def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
-                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag GR32:$src1, imm:$src2))]>;
-def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
-                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
-                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR16:$dst, EFLAGS,
-                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
-                 OpSize;
-def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
-                     (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
-                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR32:$dst, EFLAGS,
-                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-// Memory-Integer Signed Integer Multiply
-def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
-                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
-                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1), imm:$src2))]>,
-                 OpSize;
-def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
-                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
-                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1), imm:$src2))]>;
-def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
-                     (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
-                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR16:$dst, EFLAGS,
-                           (X86smul_flag (load addr:$src1),
-                                         i16immSExt8:$src2))]>, OpSize;
-def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
-                     (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
-                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR32:$dst, EFLAGS,
-                           (X86smul_flag (load addr:$src1),
-                                         i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Test instructions are just like AND, except they don't generate a result.
-//
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in {   // TEST X, Y   --> TEST Y, X
-def TEST8rr  : I<0x84, MRMSrcReg, (outs),  (ins GR8:$src1, GR8:$src2),
-                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
-def TEST16rr : I<0x85, MRMSrcReg, (outs),  (ins GR16:$src1, GR16:$src2),
-                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
-                      0))]>,
-                 OpSize;
-def TEST32rr : I<0x85, MRMSrcReg, (outs),  (ins GR32:$src1, GR32:$src2),
-                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
-                      0))]>;
-}
-
-def TEST8i8  : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
-                   "test{b}\t{$src, %al|%al, $src}", []>;
-def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
-                     "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
-                     "test{l}\t{$src, %eax|%eax, $src}", []>;
-
-def TEST8rm  : I<0x84, MRMSrcMem, (outs),  (ins GR8 :$src1, i8mem :$src2),
-                     "test{b}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
-                       0))]>;
-def TEST16rm : I<0x85, MRMSrcMem, (outs),  (ins GR16:$src1, i16mem:$src2),
-                     "test{w}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR16:$src1,
-                                         (loadi16 addr:$src2)), 0))]>, OpSize;
-def TEST32rm : I<0x85, MRMSrcMem, (outs),  (ins GR32:$src1, i32mem:$src2),
-                     "test{l}\t{$src2, $src1|$src1, $src2}",
-                     [(set EFLAGS, (X86cmp (and GR32:$src1,
-                                                (loadi32 addr:$src2)), 0))]>;
-
-def TEST8ri  : Ii8 <0xF6, MRM0r,                     // flags = GR8  & imm8
-                    (outs),  (ins GR8:$src1, i8imm:$src2),
-                    "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
-def TEST16ri : Ii16<0xF7, MRM0r,                     // flags = GR16 & imm16
-                    (outs),  (ins GR16:$src1, i16imm:$src2),
-                    "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
-                    OpSize;
-def TEST32ri : Ii32<0xF7, MRM0r,                     // flags = GR32 & imm32
-                    (outs),  (ins GR32:$src1, i32imm:$src2),
-                    "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
-
-def TEST8mi  : Ii8 <0xF6, MRM0m,                   // flags = [mem8]  & imm8
-                    (outs), (ins i8mem:$src1, i8imm:$src2),
-                    "test{b}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
-                     0))]>;
-def TEST16mi : Ii16<0xF7, MRM0m,                   // flags = [mem16] & imm16
-                    (outs), (ins i16mem:$src1, i16imm:$src2),
-                    "test{w}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
-                     0))]>, OpSize;
-def TEST32mi : Ii32<0xF7, MRM0m,                   // flags = [mem32] & imm32
-                    (outs), (ins i32mem:$src1, i32imm:$src2),
-                    "test{l}\t{$src2, $src1|$src1, $src2}",
-                    [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
-                     0))]>;
-} // Defs = [EFLAGS]
-
 
 // Condition code ops, incl. set if equal/not equal/...
 let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
@@ -3374,305 +925,10 @@ def SAHF     : I<0x9E, RawFrm, (outs),  (ins), "sahf", []>;  // flags = AH
 let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
 def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
 
-let Uses = [EFLAGS] in {
-// Use sbb to materialize carry bit.
-let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
-                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
-                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
-                OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-} // isCodeGenOnly
-
-def SETEr    : I<0x94, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "sete\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
-               TB;                        // GR8 = ==
-def SETEm    : I<0x94, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "sete\t$dst",
-                 [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = ==
-
-def SETNEr   : I<0x95, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setne\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
-               TB;                        // GR8 = !=
-def SETNEm   : I<0x95, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setne\t$dst",
-                 [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = !=
-
-def SETLr    : I<0x9C, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setl\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
-               TB;                        // GR8 = <  signed
-def SETLm    : I<0x9C, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setl\t$dst",
-                 [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <  signed
-
-def SETGEr   : I<0x9D, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setge\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
-               TB;                        // GR8 = >= signed
-def SETGEm   : I<0x9D, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setge\t$dst",
-                 [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >= signed
-
-def SETLEr   : I<0x9E, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setle\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
-               TB;                        // GR8 = <= signed
-def SETLEm   : I<0x9E, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setle\t$dst",
-                 [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <= signed
-
-def SETGr    : I<0x9F, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setg\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
-               TB;                        // GR8 = >  signed
-def SETGm    : I<0x9F, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setg\t$dst",
-                 [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >  signed
-
-def SETBr    : I<0x92, MRM0r,
-                 (outs GR8   :$dst), (ins),
-                 "setb\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
-               TB;                        // GR8 = <  unsign
-def SETBm    : I<0x92, MRM0m,
-                 (outs), (ins i8mem:$dst),
-                 "setb\t$dst",
-                 [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <  unsign
-
-def SETAEr   : I<0x93, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setae\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
-               TB;                        // GR8 = >= unsign
-def SETAEm   : I<0x93, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setae\t$dst",
-                 [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >= unsign
-
-def SETBEr   : I<0x96, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setbe\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
-               TB;                        // GR8 = <= unsign
-def SETBEm   : I<0x96, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setbe\t$dst",
-                 [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <= unsign
-
-def SETAr    : I<0x97, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "seta\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
-               TB;                        // GR8 = >  signed
-def SETAm    : I<0x97, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "seta\t$dst",
-                 [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = >  signed
-
-def SETSr    : I<0x98, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "sets\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
-               TB;                        // GR8 = <sign bit>
-def SETSm    : I<0x98, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "sets\t$dst",
-                 [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = <sign bit>
-def SETNSr   : I<0x99, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setns\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
-               TB;                        // GR8 = !<sign bit>
-def SETNSm   : I<0x99, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setns\t$dst",
-                 [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = !<sign bit>
-
-def SETPr    : I<0x9A, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setp\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
-               TB;                        // GR8 = parity
-def SETPm    : I<0x9A, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setp\t$dst",
-                 [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = parity
-def SETNPr   : I<0x9B, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setnp\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
-               TB;                        // GR8 = not parity
-def SETNPm   : I<0x9B, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setnp\t$dst",
-                 [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = not parity
-
-def SETOr    : I<0x90, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "seto\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
-               TB;                        // GR8 = overflow
-def SETOm    : I<0x90, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "seto\t$dst",
-                 [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = overflow
-def SETNOr   : I<0x91, MRM0r, 
-                 (outs GR8   :$dst), (ins),
-                 "setno\t$dst",
-                 [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
-               TB;                        // GR8 = not overflow
-def SETNOm   : I<0x91, MRM0m, 
-                 (outs), (ins i8mem:$dst),
-                 "setno\t$dst",
-                 [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
-               TB;                        // [mem8] = not overflow
-} // Uses = [EFLAGS]
-
-
-// Integer comparisons
-let Defs = [EFLAGS] in {
-def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
-                 "cmp{b}\t{$src, %al|%al, $src}", []>;
-def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
-                    "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
-                    "cmp{l}\t{$src, %eax|%eax, $src}", []>;
-
-def CMP8rr  : I<0x38, MRMDestReg,
-                (outs), (ins GR8 :$src1, GR8 :$src2),
-                "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
-def CMP16rr : I<0x39, MRMDestReg,
-                (outs), (ins GR16:$src1, GR16:$src2),
-                "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
-def CMP32rr : I<0x39, MRMDestReg,
-                (outs), (ins GR32:$src1, GR32:$src2),
-                "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
-def CMP8mr  : I<0x38, MRMDestMem,
-                (outs), (ins i8mem :$src1, GR8 :$src2),
-                "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
-def CMP16mr : I<0x39, MRMDestMem,
-                (outs), (ins i16mem:$src1, GR16:$src2),
-                "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
-                 OpSize;
-def CMP32mr : I<0x39, MRMDestMem,
-                (outs), (ins i32mem:$src1, GR32:$src2),
-                "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
-def CMP8rm  : I<0x3A, MRMSrcMem,
-                (outs), (ins GR8 :$src1, i8mem :$src2),
-                "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
-def CMP16rm : I<0x3B, MRMSrcMem,
-                (outs), (ins GR16:$src1, i16mem:$src2),
-                "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
-                 OpSize;
-def CMP32rm : I<0x3B, MRMSrcMem,
-                (outs), (ins GR32:$src1, i32mem:$src2),
-                "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
-  def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
-                    "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
-  def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
-                     "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
-  def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
-                     "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
-}
 
-def CMP8ri  : Ii8<0x80, MRM7r,
-                  (outs), (ins GR8:$src1, i8imm:$src2),
-                  "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                  [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
-def CMP16ri : Ii16<0x81, MRM7r,
-                   (outs), (ins GR16:$src1, i16imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
-def CMP32ri : Ii32<0x81, MRM7r,
-                   (outs), (ins GR32:$src1, i32imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
-def CMP8mi  : Ii8 <0x80, MRM7m,
-                   (outs), (ins i8mem :$src1, i8imm :$src2),
-                   "cmp{b}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
-def CMP16mi : Ii16<0x81, MRM7m,
-                   (outs), (ins i16mem:$src1, i16imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
-                   OpSize;
-def CMP32mi : Ii32<0x81, MRM7m,
-                   (outs), (ins i32mem:$src1, i32imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
-def CMP16ri8 : Ii8<0x83, MRM7r,
-                   (outs), (ins GR16:$src1, i16i8imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
-                    OpSize;
-def CMP16mi8 : Ii8<0x83, MRM7m,
-                   (outs), (ins i16mem:$src1, i16i8imm:$src2),
-                   "cmp{w}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
-                                         i16immSExt8:$src2))]>, OpSize;
-def CMP32mi8 : Ii8<0x83, MRM7m,
-                   (outs), (ins i32mem:$src1, i32i8imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
-                                         i32immSExt8:$src2))]>;
-def CMP32ri8 : Ii8<0x83, MRM7r,
-                   (outs), (ins GR32:$src1, i32i8imm:$src2),
-                   "cmp{l}\t{$src2, $src1|$src1, $src2}",
-                   [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
 
-// Bit tests.
-// TODO: BTC, BTR, and BTS
 let Defs = [EFLAGS] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3680,6 +936,9 @@ def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
 def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                "bt{l}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
@@ -3687,17 +946,23 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
 // only for now.
 
 def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
-               "bt{w}\t{$src2, $src1|$src1, $src2}", 
+               "bt{w}\t{$src2, $src1|$src1, $src2}",
 //               [(X86bt (loadi16 addr:$src1), GR16:$src2),
 //                (implicit EFLAGS)]
                []
                >, OpSize, TB, Requires<[FastBTMem]>;
 def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
-               "bt{l}\t{$src2, $src1|$src1, $src2}", 
+               "bt{l}\t{$src2, $src1|$src1, $src2}",
 //               [(X86bt (loadi32 addr:$src1), GR32:$src2),
 //                (implicit EFLAGS)]
                []
                >, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
+//                (implicit EFLAGS)]
+                []
+                >, TB;
 
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3706,6 +971,10 @@ def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
 def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
@@ -3717,307 +986,129 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
                  ]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+                                     i64immSExt8:$src2))]>, TB;
+
 
 def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 
 def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 
 def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
 def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // Defs = [EFLAGS]
 
-// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update.  Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
-                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
-def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
-                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
-def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
-                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR16:$src))]>, TB;
-def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
-                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update.  Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;  
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
-                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
-def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
-                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
-def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
-                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR16:$src))]>, TB;
-def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
-                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
-
-// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
-// except that they use GR32_NOREX for the output operand register class
-// instead of GR32. This allows them to operate on h registers on x86-64.
-def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
-                         (outs GR32_NOREX:$dst), (ins GR8:$src),
-                         "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         []>, TB;
-let mayLoad = 1 in
-def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
-                         (outs GR32_NOREX:$dst), (ins i8mem:$src),
-                         "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
-                         []>, TB;
-
-let neverHasSideEffects = 1 in {
-  let Defs = [AX], Uses = [AL] in
-  def CBW : I<0x98, RawFrm, (outs), (ins),
-              "{cbtw|cbw}", []>, OpSize;   // AX = signext(AL)
-  let Defs = [EAX], Uses = [AX] in
-  def CWDE : I<0x98, RawFrm, (outs), (ins),
-              "{cwtl|cwde}", []>;   // EAX = signext(AX)
-
-  let Defs = [AX,DX], Uses = [AX] in
-  def CWD : I<0x99, RawFrm, (outs), (ins),
-              "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
-  let Defs = [EAX,EDX], Uses = [EAX] in
-  def CDQ : I<0x99, RawFrm, (outs), (ins),
-              "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map movr0 to xor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: Set encoding to pseudo.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isCodeGenOnly = 1 in {
-def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
-                 [(set GR8:$dst, 0)]>;
-
-// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
-// encoding and avoids a partial-register update sometimes, but doing so
-// at isel time interferes with rematerialization in the current register
-// allocator. For now, this is rewritten when the instruction is lowered
-// to an MCInst.
-def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
-                 "",
-                 [(set GR16:$dst, 0)]>, OpSize;
-                 
-// FIXME: Set encoding to pseudo.
-def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, 0)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. ESP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
-                  "leal\t$sym, %eax; "
-                  "call\t___tls_get_addr@PLT",
-                  [(X86tlsaddr tls32addr:$sym)]>,
-                  Requires<[In32BitMode]>;
-
-// Darwin TLS Support
-// For i386, the address of the thunk is passed on the stack, on return the 
-// address of the variable is in %eax.  %ecx is trashed during the function 
-// call.  All other registers are preserved.
-let Defs = [EAX, ECX],
-    Uses = [ESP],
-    usesCustomInserter = 1 in
-def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
-                "# TLSCall_32",
-                [(X86TLSCall addr:$sym)]>,
-                Requires<[In32BitMode]>;
-                
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                   "movl\t%gs:$src, $dst",
-                   [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                   "movl\t%fs:$src, $dst",
-                   [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
-                    "ret\t#eh_return, addr: $addr",
-                    [(X86ehret GR32:$addr)]>;
-
-}
 
 //===----------------------------------------------------------------------===//
 // Atomic support
 //
 
-// Memory barriers
-
-// TODO: Get this to fold the constant into the instruction.           
-def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
-                      "lock\n\t"
-                      "or{l}\t{$zero, $dst|$dst, $zero}",
-                      []>, Requires<[In32BitMode]>, LOCK;
-
-let hasSideEffects = 1 in {
-def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
-                     "#MEMBARRIER",
-                     [(X86MemBarrier)]>, Requires<[HasSSE2]>;
-}
 
 // Atomic swap. These are just normal xchg instructions. But since a memory
 // operand is referenced, the atomicity is ensured.
 let Constraints = "$val = $dst" in {
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), 
-                 (ins GR32:$val, i32mem:$ptr),
-               "xchg{l}\t{$val, $ptr|$ptr, $val}", 
-               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), 
-                 (ins GR16:$val, i16mem:$ptr),
-               "xchg{w}\t{$val, $ptr|$ptr, $val}", 
-               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, 
-                OpSize;
 def XCHG8rm  : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
-               "xchg{b}\t{$val, $ptr|$ptr, $val}", 
+               "xchg{b}\t{$val, $ptr|$ptr, $val}",
                [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+               "xchg{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+                OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+               "xchg{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+                  "xchg{q}\t{$val, $ptr|$ptr, $val}",
+                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
 
-def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
-                 "xchg{l}\t{$val, $src|$src, $val}", []>;
-def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
-                 "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
 def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
                 "xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+                 "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+                 "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+                  "xchg{q}\t{$val, $src|$src, $val}", []>;
 }
 
 def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
                   "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
 def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
                   "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+                  "xchg{q}\t{$src, %rax|%rax, $src}", []>;
 
-// Atomic compare and swap.
-let Defs = [EAX, EFLAGS], Uses = [EAX] in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
-               "lock\n\t"
-               "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
-}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
-               "lock\n\t"
-               "cmpxchg8b\t$ptr",
-               [(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
-
-let Defs = [AX, EFLAGS], Uses = [AX] in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
-               "lock\n\t"
-               "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
-}
-let Defs = [AL, EFLAGS], Uses = [AL] in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
-               "lock\n\t"
-               "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
-}
 
-// Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS] in {
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
-               "lock\n\t"
-               "xadd{l}\t{$val, $ptr|$ptr, $val}",
-               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
-                TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
-               "lock\n\t"
-               "xadd{w}\t{$val, $ptr|$ptr, $val}",
-               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
-                TB, OpSize, LOCK;
-def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
-               "lock\n\t"
-               "xadd{b}\t{$val, $ptr|$ptr, $val}",
-               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
-                TB, LOCK;
-}
 
 def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                 "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -4025,6 +1116,8 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
                  "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
 let mayLoad = 1, mayStore = 1 in {
 def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4033,6 +1126,9 @@ def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                  "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def XADD32rm  : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
 }
 
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
@@ -4041,6 +1137,8 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
                     "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
 let mayLoad = 1, mayStore = 1 in {
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4049,284 +1147,29 @@ def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                      "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
 def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
                   "cmpxchg8b\t$dst", []>, TB;
 
-// Optimized codegen when the non-memory output is not used.
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                    "lock\n\t"
-                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "lock\n\t"
-                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "lock\n\t"
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
-                    "lock\n\t"
-                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
-                    "lock\n\t"
-                     "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
-                    "lock\n\t"
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                    "lock\n\t"
-                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                    "lock\n\t"
-                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
-                    "lock\n\t"
-                    "inc{b}\t$dst", []>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
-                    "lock\n\t"
-                    "inc{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
-                    "lock\n\t"
-                    "inc{l}\t$dst", []>, LOCK;
-
-def LOCK_SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
-                    "lock\n\t"
-                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "lock\n\t"
-                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
-                    "lock\n\t"
-                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
-                    "lock\n\t"
-                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
-                    "lock\n\t"
-                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
-                    "lock\n\t"
-                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
-                    "lock\n\t"
-                     "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
-                    "lock\n\t"
-                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
-                    "lock\n\t"
-                    "dec{b}\t$dst", []>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
-                    "lock\n\t"
-                    "dec{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
-                    "lock\n\t"
-                    "dec{l}\t$dst", []>, LOCK;
-}
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+                    "cmpxchg16b\t$dst", []>, TB;
 
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomInserter = 1 in {
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMAND32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMOR32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMXOR32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMNAND32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
-               "#ATOMMIN32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMMAX32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMUMIN32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMUMAX32 PSEUDO!", 
-               [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMAND16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMOR16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMXOR16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMNAND16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
-               "#ATOMMIN16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMMAX16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMUMIN16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMUMAX16 PSEUDO!", 
-               [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMAND8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMOR8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMXOR8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMNAND8 PSEUDO!", 
-               [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-}
 
-let Constraints = "$val1 = $dst1, $val2 = $dst2", 
-                  Defs = [EFLAGS, EAX, EBX, ECX, EDX],
-                  Uses = [EAX, EBX, ECX, EDX],
-                  mayLoad = 1, mayStore = 1,
-                  usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMSWAP6432 PSEUDO!", []>;
-}
 
-// Segmentation support instructions.
-
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
-                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), 
-                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; 
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
-                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; 
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-                
-def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
-             "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
-             "str{w}\t{$dst}", []>, TB;
-def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
-             "ltr{w}\t{$src}", []>, TB;
-def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
-             "ltr{w}\t{$src}", []>, TB;
-             
-def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
-                 "push{w}\t%fs", []>, OpSize, TB;
-def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
-                 "push{l}\t%fs", []>, TB;
-def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
-                 "push{w}\t%gs", []>, OpSize, TB;
-def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
-                 "push{l}\t%gs", []>, TB;
-
-def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
-                "pop{w}\t%fs", []>, OpSize, TB;
-def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
-                "pop{l}\t%fs", []>, TB;
-def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
-                "pop{w}\t%gs", []>, OpSize, TB;
-def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
-                "pop{l}\t%gs", []>, TB;
-
-def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lds{l}\t{$src, $dst|$dst, $src}", []>;
-def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "les{l}\t{$src, $dst|$dst, $src}", []>;
-def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
-                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
-                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
-              "verr\t$seg", []>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
-              "verr\t$seg", []>, TB;
-def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
-              "verw\t$seg", []>, TB;
-def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
-              "verw\t$seg", []>, TB;
-
-// Descriptor-table support instructions
-
-def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
-              "sgdt\t$dst", []>, TB;
-def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
-              "sidt\t$dst", []>, TB;
-def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
-                "sldt{w}\t$dst", []>, TB;
-def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
-                "sldt{w}\t$dst", []>, TB;
-def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
-              "lgdt\t$src", []>, TB;
-def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
-              "lidt\t$src", []>, TB;
-def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
-                "lldt{w}\t$src", []>, TB;
-def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
-                "lldt{w}\t$src", []>, TB;
-                
 // Lock instruction prefix
 def LOCK_PREFIX : I<0xF0, RawFrm, (outs),  (ins), "lock", []>;
 
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs),  (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs),  (ins), "data16", []>;
+
 // Repeat string operation instruction prefixes
 // These uses the DF flag in the EFLAGS register to inc or dec ECX
 let Defs = [ECX], Uses = [ECX,EFLAGS] in {
@@ -4336,35 +1179,19 @@ def REP_PREFIX : I<0xF3, RawFrm, (outs),  (ins), "rep", []>;
 def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
 }
 
-// Segment override instruction prefixes
-def CS_PREFIX : I<0x2E, RawFrm, (outs),  (ins), "cs", []>;
-def SS_PREFIX : I<0x36, RawFrm, (outs),  (ins), "ss", []>;
-def DS_PREFIX : I<0x3E, RawFrm, (outs),  (ins), "ds", []>;
-def ES_PREFIX : I<0x26, RawFrm, (outs),  (ins), "es", []>;
-def FS_PREFIX : I<0x64, RawFrm, (outs),  (ins), "fs", []>;
-def GS_PREFIX : I<0x65, RawFrm, (outs),  (ins), "gs", []>;
 
 // String manipulation instructions
-
 def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
 def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
 def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
 
 def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
 def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
 def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
 
-// CPU flow control instructions
-
-def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
-def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
-
-// FPU control instructions
-
-def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
 
 // Flag instructions
-
 def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
 def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
 def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
@@ -4376,620 +1203,423 @@ def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
 def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
 
 // Table lookup instructions
-
 def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
 
-// Specialized register support
-
-def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
-def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
-
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), 
-                "smsw{w}\t$dst", []>, OpSize, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), 
-                "smsw{l}\t$dst", []>, TB;
-// For memory operands, there is only a 16-bit form
-def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
-                "smsw{w}\t$dst", []>, TB;
-
-def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
-                "lmsw{w}\t$src", []>, TB;
-def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
-                "lmsw{w}\t$src", []>, TB;
-                
-def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
-
-// Cache instructions
-
-def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
-
-// VMX instructions
-
-// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
-// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
-// 0F 01 C1
-def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
-def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
-  "vmclear\t$vmcs", []>, OpSize, TB;
-// 0F 01 C2
-def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
-// 0F 01 C3
-def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
-def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
-  "vmptrld\t$vmcs", []>, TB;
-def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
-  "vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
-  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
-  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
-  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-// 0F 01 C4
-def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
-def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
-  "vmxon\t{$vmxon}", []>, XS;
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
 
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+                 "aad\t$src", []>, Requires<[In32BitMode]>;
 
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
-def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
-def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
-def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
-
-def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
-          (ADD32ri GR32:$src1, tconstpool:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
-          (ADD32ri GR32:$src1, tjumptable:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
-          (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
-          (ADD32ri GR32:$src1, texternalsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
-          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
-
-def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
-          (MOV32mi addr:$dst, tglobaladdr:$src)>;
-def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
-          (MOV32mi addr:$dst, texternalsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
-          (MOV32mi addr:$dst, tblockaddress:$src)>;
-
-// Calls
-// tailcall stuff
-def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
-          (TCRETURNri GR32_TC:$dst, imm:$off)>,
-	  Requires<[In32BitMode]>;
-
-// FIXME: This is disabled for 32-bit PIC mode because the global base
-// register which is part of the address mode may be assigned a 
-// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
-          (TCRETURNmi addr:$dst, imm:$off)>,
-	  Requires<[In32BitMode, IsNotPIC]>;
-
-def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>,
-	  Requires<[In32BitMode]>;
-
-def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>,
-	  Requires<[In32BitMode]>;
-
-// Normal calls, with various flavors of addresses.
-def : Pat<(X86call (i32 tglobaladdr:$dst)),
-          (CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86call (i32 texternalsym:$dst)),
-          (CALLpcrel32 texternalsym:$dst)>;
-def : Pat<(X86call (i32 imm:$dst)),
-          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR32:$src1, GR32:$src2),
-          (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(addc GR32:$src1, (load addr:$src2)),
-          (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(addc GR32:$src1, imm:$src2),
-          (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
-          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(subc GR32:$src1, GR32:$src2),
-          (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(subc GR32:$src1, (load addr:$src2)),
-          (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(subc GR32:$src1, imm:$src2),
-          (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
-          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR8:$src1, 0),
-          (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(X86cmp GR16:$src1, 0),
-          (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86cmp GR32:$src1, 0),
-          (TEST32rr GR32:$src1, GR32:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
-          (CMOVAE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
-          (CMOVAE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
-          (CMOVB16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
-          (CMOVB32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
-          (CMOVNE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
-          (CMOVNE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
-          (CMOVE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
-          (CMOVE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
-          (CMOVA16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
-          (CMOVA32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
-          (CMOVBE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
-          (CMOVBE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
-          (CMOVGE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
-          (CMOVGE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
-          (CMOVL16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
-          (CMOVL32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
-          (CMOVG16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
-          (CMOVG32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
-          (CMOVLE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
-          (CMOVLE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
-          (CMOVNP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
-          (CMOVNP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
-          (CMOVP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
-          (CMOVP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
-          (CMOVNS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
-          (CMOVNS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
-          (CMOVS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
-          (CMOVS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
-          (CMOVNO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
-          (CMOVNO32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
-          (CMOVO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
-          (CMOVO32rm GR32:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-
-// extload bool -> extload byte
-def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
-def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
-def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
-def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
-
-// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
-def : Pat<(i32 (anyext GR16:$src)),
-          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+                 "aam\t$src", []>, Requires<[In32BitMode]>;
 
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
 
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR16:$src1, 128),
-          (SUB16ri8 GR16:$src1, -128)>;
-def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
-          (SUB16mi8 addr:$dst, -128)>;
-def : Pat<(add GR32:$src1, 128),
-          (SUB32ri8 GR32:$src1, -128)>;
-def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
-          (SUB32mi8 addr:$dst, -128)>;
-
-// r & (2^16-1) ==> movz
-def : Pat<(and GR32:$src1, 0xffff),
-          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, 
-                                                             GR32_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
-          (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, 
-                                                             GR16_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR32:$src, i16),
-          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                             GR32_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
-                                                             GR16_ABCD)),
-                                      sub_8bit))>,
-      Requires<[In32BitMode]>;
-
-// trunc patterns
-def : Pat<(i16 (trunc GR32:$src)),
-          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                          sub_8bit)>,
-      Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
-          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                          sub_8bit)>,
-      Requires<[In32BitMode]>;
-
-// h-register tricks
-def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                          sub_8bit_hi)>,
-      Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
-                          sub_8bit_hi)>,
-      Requires<[In32BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
-          (EXTRACT_SUBREG
-            (MOVZX32rr8
-              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
-                              sub_8bit_hi)),
-            sub_16bit)>,
-      Requires<[In32BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
-                                                             GR16_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, 
-                                                             GR16_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                             GR32_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
-          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, 
-                                                             GR32_ABCD)),
-                                      sub_8bit_hi))>,
-      Requires<[In32BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-
-// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
-          (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
-          (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
-          (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHL32mCL addr:$dst)>;
-
-def : Pat<(srl GR8:$src1, (and CL, 31)),
-          (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
-          (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
-          (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
-          (SHR32mCL addr:$dst)>;
-
-def : Pat<(sra GR8:$src1, (and CL, 31)),
-          (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
-          (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
-          (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
-          (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
-          (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
-          (SAR32mCL addr:$dst)>;
-
-// (anyext (setcc_carry)) -> (setcc_carry)
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C32r)>;
-def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
-          (SETB_C32r)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR16:$src1, imm:$src2),
-          (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR32:$src1, imm:$src2),
-          (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
-          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
-          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(or_is_add GR16:$src1, GR16:$src2),
-          (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or_is_add GR32:$src1, GR32:$src2),
-          (ADD32rr GR32:$src1, GR32:$src2)>;
-} // AddedComplexity
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
 
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
 
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
-          (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
-          (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
-          (ADD32rm GR32:$src1, addr:$src2)>;
-
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri  GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR16:$src1, i16immSExt8:$src2),
-          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(add GR32:$src1, i32immSExt8:$src2),
-          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
-          (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
-          (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
-          (SUB32rm GR32:$src1, addr:$src2)>;
-
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
-          (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
-          (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
-          (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
-          (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
-          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
-          (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
-          (IMUL32rr GR32:$src1, GR32:$src2)>;
-
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
-          (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
-          (IMUL32rm GR32:$src1, addr:$src2)>;
-
-// mul reg, imm
-def : Pat<(mul GR16:$src1, imm:$src2),
-          (IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(mul GR32:$src1, imm:$src2),
-          (IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
-          (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
-          (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// reg = mul mem, imm
-def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
-          (IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
-          (IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
-          (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
-          (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+                   Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>,
+                   Requires<[In32BitMode]>;
 
-// Patterns for nodes that do not produce flags, for instructions that do.
-
-// Increment reg.
-def : Pat<(add GR8:$src1 ,  1), (INC8r  GR8:$src1)>;
-def : Pat<(add GR16:$src1,  1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1,  1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// Decrement reg.
-def : Pat<(add GR8:$src1 , -1), (DEC8r  GR8:$src1)>;
-def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
-          (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
-          (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
-          (OR32rm GR32:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri  GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, i16immSExt8:$src2),
-          (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or GR32:$src1, i32immSExt8:$src2),
-          (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
-          (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
-          (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
-          (XOR32rm GR32:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
-          (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
-          (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
-          (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
-          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
-          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr  GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
-          (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
-          (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
-          (AND32rm GR32:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
-          (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
-          (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
-          (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, i16immSExt8:$src2),
-          (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(and GR32:$src1, i32immSExt8:$src2),
-          (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
 
 //===----------------------------------------------------------------------===//
-// Floating Point Stack Support
+// Subsystems.
 //===----------------------------------------------------------------------===//
 
-include "X86InstrFPStack.td"
-
-//===----------------------------------------------------------------------===//
-// X86-64 Support
-//===----------------------------------------------------------------------===//
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
 
-include "X86Instr64bit.td"
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
 
-//===----------------------------------------------------------------------===//
 // SIMD support (SSE, MMX and AVX)
-//===----------------------------------------------------------------------===//
-
 include "X86InstrFragmentsSIMD.td"
 
-//===----------------------------------------------------------------------===//
 // FMA - Fused Multiply-Add support (requires FMA)
-//===----------------------------------------------------------------------===//
-
 include "X86InstrFMA.td"
 
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
 //===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
+// Assembler Mnemonic Aliases
 //===----------------------------------------------------------------------===//
 
-include "X86InstrSSE.td"
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw",  "cbtw">;
+def : MnemonicAlias<"cwd",  "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd",  "popfl">;
+
+// FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
+// all modes.  However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode.  Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz",   "fcmove">;
+def : MnemonicAlias<"fcmova",   "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna",  "fcmovbe">;
+def : MnemonicAlias<"fcmovae",  "fcmovnb">;
+def : MnemonicAlias<"fcomip",   "fcompi">;
+def : MnemonicAlias<"fildq",    "fildll">;
+def : MnemonicAlias<"fldcww",   "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip",  "fucompi">;
+def : MnemonicAlias<"fwait",    "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+  : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+                  !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+  def C   : CondCodeAlias<Prefix, Suffix, "c",   "b">;   // setc   -> setb
+  def Z   : CondCodeAlias<Prefix, Suffix, "z" ,  "e">;   // setz   -> sete
+  def NA  : CondCodeAlias<Prefix, Suffix, "na",  "be">;  // setna  -> setbe
+  def NB  : CondCodeAlias<Prefix, Suffix, "nb",  "ae">;  // setnb  -> setae
+  def NC  : CondCodeAlias<Prefix, Suffix, "nc",  "ae">;  // setnc  -> setae
+  def NG  : CondCodeAlias<Prefix, Suffix, "ng",  "le">;  // setng  -> setle
+  def NL  : CondCodeAlias<Prefix, Suffix, "nl",  "ge">;  // setnl  -> setge
+  def NZ  : CondCodeAlias<Prefix, Suffix, "nz",  "ne">;  // setnz  -> setne
+  def PE  : CondCodeAlias<Prefix, Suffix, "pe",  "p">;   // setpe  -> setp
+  def PO  : CondCodeAlias<Prefix, Suffix, "po",  "np">;  // setpo  -> setnp
+
+  def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">;   // setnae -> setb
+  def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">;   // setnbe -> seta
+  def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">;   // setnge -> setl
+  def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">;   // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
 
 //===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+// Assembler Instruction Aliases
 //===----------------------------------------------------------------------===//
 
-include "X86InstrMMX.td"
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al",  (DIV8r  GR8 :$src)>;
+def : InstAlias<"divw $src, %ax",  (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al",  (DIV8m  i8mem :$src)>;
+def : InstAlias<"divw $src, %ax",  (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al",  (IDIV8r  GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax",  (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al",  (IDIV8m  i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax",  (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp",        (ADD_FPrST0  ST1)>;
+def : InstAlias<"fsubp",        (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp",       (SUB_FPrST0  ST1)>;
+def : InstAlias<"fmulp",        (MUL_FPrST0  ST1)>;
+def : InstAlias<"fdivp",        (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp",       (DIV_FPrST0  ST1)>;
+def : InstAlias<"fxch",         (XCH_F       ST1)>;
+def : InstAlias<"fcomi",        (COM_FIr     ST1)>;
+def : InstAlias<"fcompi",       (COM_FIPr    ST1)>;
+def : InstAlias<"fucom",        (UCOM_Fr     ST1)>;
+def : InstAlias<"fucomp",       (UCOM_FPr    ST1)>;
+def : InstAlias<"fucomi",       (UCOM_FIr    ST1)>;
+def : InstAlias<"fucompi",      (UCOM_FIPr   ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)".  We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),    (Inst RST:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+}
+
+defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+defm : FpUnaryAlias<"faddp",  ADD_FPrST0>;
+defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp",  SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp",  DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi",   COM_FIr>;
+defm : FpUnaryAlias<"fucomi",  UCOM_FIr>;
+defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
+defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw"     , (FNSTSW8r)>;
+
+// lcall and ljmp aliases.  This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst",      (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst",       (FARJMP32m  opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp.  jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off",  (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off",   (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move.  All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/*  FIXME: This is disabled because the asm matcher is currently incapable of
+ *  matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm  GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm  GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 11d4179534dc..bb2165a8a045 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,6 +11,9 @@
 // and properties of the instructions which are needed for code generation,
 // machine code emission, and analysis.
 //
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -18,58 +21,23 @@
 //===----------------------------------------------------------------------===//
 
 let Constraints = "$src1 = $dst" in {
-  // MMXI_binop_rm - Simple MMX binary operator.
-  multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           ValueType OpVT, bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
-                  (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
-      let isCommutable = Commutable;
-    }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
-                  (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
-                                         (bitconvert
-                                          (load_mmx addr:$src2)))))]>;
-  }
-
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
   multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
                                bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+    def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
                  (ins VR64:$src1, VR64:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
       let isCommutable = Commutable;
     }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+    def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
                  (ins VR64:$src1, i64mem:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))]>;
   }
 
-  // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
-  //
-  // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
-  // to collapse (bitconvert VT to VT) into its operand.
-  //
-  multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                 bit Commutable = 0> {
-    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
-                                  (ins VR64:$src1, VR64:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
-      let isCommutable = Commutable;
-    }
-    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
-                                  (ins VR64:$src1, i64mem:$src2),
-                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst,
-                    (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
-  }
-
   multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
                                 string OpcodeStr, Intrinsic IntId,
                                 Intrinsic IntId2> {
@@ -89,14 +57,75 @@ let Constraints = "$src1 = $dst" in {
   }
 }
 
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId64> {
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst,
+                     (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                             Intrinsic IntId64> {
+  let isCommutable = 0 in
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+       (ins VR64:$src1, VR64:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+       (ins VR64:$src1, i64mem:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+  def R64irr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 
+      [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+  def R64irm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1,
+                       (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+                   (ins DstRC:$src1, x86memop:$src2), asm,
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
 //===----------------------------------------------------------------------===//
-// MMX EMMS & FEMMS Instructions
+// MMX EMMS Instruction
 //===----------------------------------------------------------------------===//
 
 def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",
                      [(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
-                     [(int_x86_mmx_femms)]>;
 
 //===----------------------------------------------------------------------===//
 // MMX Scalar Instructions
@@ -106,12 +135,12 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
 def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, 
-                         (v2i32 (scalar_to_vector GR32:$src)))]>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+                         (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
               [(set VR64:$dst,
-               (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+               (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
 let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                         "movd\t{$src, $dst|$dst, $src}", []>;
@@ -123,42 +152,41 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movd\t{$src, $dst|$dst, $src}",
                              []>;
 
-let neverHasSideEffects = 1 in
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
-                               "movd\t{$src, $dst|$dst, $src}", []>;
+                               "movd\t{$src, $dst|$dst, $src}", 
+                             [(set GR64:$dst,
+                              (bitconvert VR64:$src))]>;
 def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movd\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
-                              (v1i64 (scalar_to_vector GR64:$src)))]>;
-
+                              (bitconvert GR64:$src))]>;
 let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1 in
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, (load_mmx addr:$src))]>;
 def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
-                        [(store (v1i64 VR64:$src), addr:$dst)]>;
+                        [(store (x86mmx VR64:$src), addr:$dst)]>;
 
 def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
                           "movdq2q\t{$src, $dst|$dst, $src}",
                           [(set VR64:$dst,
-                            (v1i64 (bitconvert
+                            (x86mmx (bitconvert
                             (i64 (vector_extract (v2i64 VR128:$src),
                                   (iPTR 0))))))]>;
 
 def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
                            "movq2dq\t{$src, $dst|$dst, $src}",
           [(set VR128:$dst,
-            (movl immAllZerosV,
-                  (v2i64 (scalar_to_vector
-                              (i64 (bitconvert (v1i64 VR64:$src)))))))]>;
+            (v2i64 (scalar_to_vector
+                              (i64 (bitconvert (x86mmx VR64:$src))))))]>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
@@ -176,34 +204,40 @@ let AddedComplexity = 15 in
 def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                              "movd\t{$src, $dst|$dst, $src}",
               [(set VR64:$dst,
-                    (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+                    (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
 let AddedComplexity = 20 in
 def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
                            (ins i32mem:$src),
                              "movd\t{$src, $dst|$dst, $src}",
           [(set VR64:$dst,
-                (v2i32 (X86vzmovl (v2i32
+                (x86mmx (X86vzmovl (x86mmx
                                    (scalar_to_vector (loadi32 addr:$src))))))]>;
 
 // Arithmetic Instructions
-
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
 // -- Addition
-defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8,  1>;
-defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
-defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
-defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
-
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
 defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
 defm MMX_PADDSW  : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
 
 defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
 defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
 
+defm MMX_PHADDW  : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD   : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
 // -- Subtraction
-defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
-defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
-defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
-defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
 
 defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
 defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
@@ -211,16 +245,25 @@ defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
 defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
 defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
 
+defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD  : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
 // -- Multiplication
-defm MMX_PMULLW  : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
 
 defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,  1>;
 defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
 defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw>;
 
 // -- Miscellanea
 defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
 
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+                                     int_x86_ssse3_pmadd_ub_sw>;
 defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
 defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
 
@@ -232,23 +275,17 @@ defm MMX_PMAXSW  : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
 
 defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
 
-// Logical Instructions
-defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
-defm MMX_POR  : MMXI_binop_rm_v1i64<0xEB, "por" , or,  1>;
-defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW :  SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND :  SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+  defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
 
-let Constraints = "$src1 = $dst" in {
-  def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
-                         (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                         "pandn\t{$src2, $dst|$dst, $src2}",
-                         [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
-                                                  VR64:$src2)))]>;
-  def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
-                         (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                         "pandn\t{$src2, $dst|$dst, $src2}",
-                         [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
-                                                  (load addr:$src2))))]>;
-}
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR  : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,  1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
 
 // Shift Instructions
 defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
@@ -270,12 +307,6 @@ defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
 defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
                                     int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
 
-// Shift up / down and insert zero's.
-def : Pat<(v1i64 (X86vshl     VR64:$src, (i8 imm:$amt))),
-          (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-def : Pat<(v1i64 (X86vshr     VR64:$src, (i8 imm:$amt))),
-          (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-
 // Comparison Instructions
 defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
 defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
@@ -285,84 +316,19 @@ defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
 defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
 defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
 
-// Conversion Instructions
-
 // -- Unpack Instructions
-let Constraints = "$src1 = $dst" in {
-  // Unpack High Packed Data Instructions
-  def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckh VR64:$src1,
-                                      (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckh VR64:$src1,
-                                       (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckhdq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckhdq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckh VR64:$src1,
-                                       (bc_v2i32 (load_mmx addr:$src2)))))]>;
-
-  // Unpack Low Packed Data Instructions
-  def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpcklbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpcklbw\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v8i8 (mmx_unpckl VR64:$src1,
-                                      (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpcklwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpcklwd\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v4i16 (mmx_unpckl VR64:$src1,
-                                       (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
-  def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
-                             (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
-                             "punpckldq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
-  def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
-                             (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
-                             "punpckldq\t{$src2, $dst|$dst, $src2}",
-                             [(set VR64:$dst,
-                               (v2i32 (mmx_unpckl VR64:$src1,
-                                       (bc_v2i32 (load_mmx addr:$src2)))))]>;
-}
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", 
+                                       int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", 
+                                       int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", 
+                                       int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", 
+                                       int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", 
+                                       int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+                                       int_x86_mmx_punpckldq>;
 
 // -- Pack Instructions
 defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
@@ -370,93 +336,80 @@ defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
 defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
 
 // -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
 def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
                           (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
-                            (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+                             (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
 def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
                           (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
-                            (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
-                                              (undef)))]>;
+                             (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+                                                   imm:$src2))]>;
 
-// -- Conversion Instructions
-let neverHasSideEffects = 1 in {
-def MMX_CVTPD2PIrr  : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                            "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPD2PIrm  : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
-                            (ins f128mem:$src),
-                            "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PDrr  : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                            "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PDrm  : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
-                            (ins i64mem:$src),
-                            "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PSrr  : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                           "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PSrm  : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
-                           (ins i64mem:$src),
-                           "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPS2PIrr  : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                           "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPS2PIrm  : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                           "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                            "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
-                            (ins f128mem:$src),
-                            "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                           "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
-                           "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-} // end neverHasSideEffects
 
 
-// Extract / Insert
-def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
-                    SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
-                                         SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
 
 
-def MMX_PEXTRWri  : MMXIi8<0xC5, MRMSrcReg,
-                           (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+  defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+                           (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
                            "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                           [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
+                           [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
                                              (iPTR imm:$src2)))]>;
 let Constraints = "$src1 = $dst" in {
-  def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+  def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst), 
-                      (ins VR64:$src1, GR32:$src2,i16i8imm:$src3),
+                      (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                      [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
-                                               GR32:$src2,(iPTR imm:$src3))))]>;
-  def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                        GR32:$src2, (iPTR imm:$src3)))]>;
+
+  def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
                      (outs VR64:$dst),
-                     (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+                     (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                     [(set VR64:$dst,
-                       (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
-                               (i32 (anyext (loadi16 addr:$src2))),
-                               (iPTR imm:$src3))))]>;
+                     [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                         (i32 (anyext (loadi16 addr:$src2))),
+                                       (iPTR imm:$src3)))]>;
 }
 
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
+                          [(set GR32:$dst, 
+                                (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
 // MMX to XMM for vector types
 def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
-                            [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+                            [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
 
 def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
           (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
@@ -464,14 +417,19 @@ def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
 def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
           (v2i64 (MOVQI2PQIrm addr:$src))>;
 
-def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
-                            (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+def : Pat<(v2i64 (MMX_X86movq2dq 
+                    (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
           (v2i64 (MOVDI2PDIrm addr:$src))>;
 
-// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
-                          "pmovmskb\t{$src, $dst|$dst, $src}",
-                          [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+          (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+          (x86mmx (MMX_MOVQ64rm addr:$src))>;
 
 // Misc.
 let Uses = [EDI] in
@@ -483,181 +441,14 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
 
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
-  // FIXME: Change encoding to pseudo.
-  def MMX_V_SET0       : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "",
-                              [(set VR64:$dst, (v2i32 immAllZerosV))]>;
-  def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "",
-                              [(set VR64:$dst, (v2i32 immAllOnesV))]>;
-}
-
-let Predicates = [HasMMX] in {
-  def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
-  def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
-  def : Pat<(v8i8  immAllZerosV), (MMX_V_SET0)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Store 64-bit integer vector values.
-def : Pat<(store (v8i8  VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v4i16 VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2i32 VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v1i64 VR64:$src), addr:$dst),
-          (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-
-// Bit convert.
-def : Pat<(v8i8  (bitconvert (v1i64 VR64:$src))), (v8i8  VR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v2i32 VR64:$src))), (v8i8  VR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v4i16 VR64:$src))), (v8i8  VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8  VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8  VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8  VR64:$src))), (v1i64 VR64:$src)>;
-
 // 64-bit bit convert.
-def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
           (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v8i8  (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
           (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64  (bitconvert (v8i8 VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
-          (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
-          (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
-          (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
-          (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
-          (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
           (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
-          (MMX_MOVFR642Qrr FR64:$src)>;
-
-let AddedComplexity = 20 in {
-  def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
-            (MMX_MOVZDI2PDIrm addr:$src)>;
-}
-
-// Clear top half.
-let AddedComplexity = 15 in {
-  def : Pat<(v2i32 (X86vzmovl VR64:$src)),
-            (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>;
-}
-
-// Patterns to perform canonical versions of vector shuffling.
-let AddedComplexity = 10 in {
-  def : Pat<(v8i8  (mmx_unpckl_undef VR64:$src, (undef))),
-            (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
-  def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
-            (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
-  def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
-            (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
-}
 
-let AddedComplexity = 10 in {
-  def : Pat<(v8i8  (mmx_unpckh_undef VR64:$src, (undef))),
-            (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
-  def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
-            (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
-  def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
-            (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
-}
 
-// Some special case PANDN patterns.
-// FIXME: Get rid of these.
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
-                  VR64:$src2)),
-          (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
-                  (load addr:$src2))),
-          (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-
-// Move MMX to lower 64-bit of XMM
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-// Move lower 64-bit of XMM to MMX.
-def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
-                                                  (iPTR 0))))),
-          (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
-                                                  (iPTR 0))))),
-          (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
-                                                  (iPTR 0))))),
-          (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
-
-// Patterns for vector comparisons
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
-          (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
-          (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
-          (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
-
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
-          (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
-          (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
-          (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
-          (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
-
-// CMOV* - Used to implement the SELECT DAG operation.  Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
-  def CMOV_V1I64 : I<0, Pseudo,
-                    (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
-                    "#CMOV_V1I64 PSEUDO!",
-                    [(set VR64:$dst,
-                      (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
-                                          EFLAGS)))]>;
-}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f5466f83f519..b912949d482f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -14,43 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 
-//===----------------------------------------------------------------------===//
-// SSE scalar FP Instructions
-//===----------------------------------------------------------------------===//
-
-// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
-  def CMOV_FR32 : I<0, Pseudo,
-                    (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
-                    "#CMOV_FR32 PSEUDO!",
-                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
-                                                  EFLAGS))]>;
-  def CMOV_FR64 : I<0, Pseudo,
-                    (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
-                    "#CMOV_FR64 PSEUDO!",
-                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
-                                                  EFLAGS))]>;
-  def CMOV_V4F32 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V4F32 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V2F64 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V2F64 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-  def CMOV_V2I64 : I<0, Pseudo,
-                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
-                    "#CMOV_V2I64 PSEUDO!",
-                    [(set VR128:$dst,
-                      (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
-                                          EFLAGS)))]>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 Instructions Classes
 //===----------------------------------------------------------------------===//
@@ -82,17 +45,15 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
-                       !strconcat(SSEVer, !strconcat("_",
-                       !strconcat(OpcodeStr, FPSizeStr))))
+       [(set RC:$dst, (!cast<Intrinsic>(
+                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
              RC:$src1, RC:$src2))]>;
   def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
-                       !strconcat(SSEVer, !strconcat("_",
-                       !strconcat(OpcodeStr, FPSizeStr))))
+       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+                                          SSEVer, "_", OpcodeStr, FPSizeStr))
              RC:$src1, mem_cpat:$src2))]>;
 }
 
@@ -142,17 +103,15 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-           [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
-                           !strconcat(SSEVer, !strconcat("_",
-                           !strconcat(OpcodeStr, FPSizeStr))))
+           [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
                  RC:$src1, RC:$src2))], d>;
   def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
-                       !strconcat(SSEVer, !strconcat("_",
-                       !strconcat(OpcodeStr, FPSizeStr))))
+       [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
              RC:$src1, (mem_frag addr:$src2)))], d>;
 }
 
@@ -221,6 +180,12 @@ def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
 // Implicitly promote a 64-bit scalar to a vector.
 def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
 
 let AddedComplexity = 20 in {
 // MOVSSrm zeros the high parts of the register; represent this
@@ -403,7 +368,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
                                  string asm_opr> {
   def PSrm : PI<opc, MRMSrcMem,
          (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
-         !strconcat(!strconcat(base_opc,"s"), asm_opr),
+         !strconcat(base_opc, "s", asm_opr),
      [(set RC:$dst,
        (mov_frag RC:$src1,
               (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
@@ -411,7 +376,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
 
   def PDrm : PI<opc, MRMSrcMem,
          (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
-         !strconcat(!strconcat(base_opc,"d"), asm_opr),
+         !strconcat(base_opc, "d", asm_opr),
      [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
                               (scalar_to_vector (loadf64 addr:$src2)))))],
               SSEPackedDouble>, TB, OpSize;
@@ -598,14 +563,6 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
 
 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
 // and/or XMM operand(s).
-multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm, Domain d> {
-  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
-  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
-}
 
 multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
@@ -618,16 +575,6 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
               [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
 }
 
-multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
-                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
-                    PatFrag ld_frag, string asm, Domain d> {
-  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
-              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
-  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
-                   (ins DstRC:$src1, x86memop:$src2), asm,
-              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
-}
-
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
                     RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
                     PatFrag ld_frag, string asm, bit Is2Addr = 1> {
@@ -669,13 +616,11 @@ defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
                       f32mem, load, "cvtss2si">, XS;
 defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
                       f32mem, load, "cvtss2si{q}">, XS, REX_W;
-defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                      f128mem, load, "cvtsd2si">, XD;
-defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
-                        f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+                  f128mem, load, "cvtsd2si{q}">, XD, REX_W;
 
-defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
-                        REX_W;
 
 let isAsmParserOnly = 1 in {
   defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -705,29 +650,6 @@ let Constraints = "$src1 = $dst" in {
                         "cvtsi2sd">, XD, REX_W;
 }
 
-// Instructions below don't have an AVX form.
-defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
-                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
-                      SSEPackedSingle>, TB;
-defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
-                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
-                      SSEPackedDouble>, TB, OpSize;
-defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
-                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
-                       SSEPackedSingle>, TB;
-defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
-                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
-                       SSEPackedDouble>, TB, OpSize;
-defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
-                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
-                         SSEPackedDouble>, TB, OpSize;
-let Constraints = "$src1 = $dst" in {
-  defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
-                         int_x86_sse_cvtpi2ps,
-                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
-                         SSEPackedSingle>, TB;
-}
-
 /// SSE 1 Only
 
 // Aliases for intrinsics
@@ -738,10 +660,10 @@ defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse_cvttss2si64, f32mem, load,
                                     "cvttss2si">, XS, VEX, VEX_W;
 defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttss2si">, XD, VEX;
+                                    f128mem, load, "cvttsd2si">, XD, VEX;
 defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttss2si">, XD, VEX, VEX_W;
+                                    "cvttsd2si">, XD, VEX, VEX_W;
 }
 defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
                                     f32mem, load, "cvttss2si">, XS;
@@ -749,10 +671,10 @@ defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse_cvttss2si64, f32mem, load,
                                     "cvttss2si{q}">, XS, REX_W;
 defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttss2si">, XD;
+                                    f128mem, load, "cvttsd2si">, XD;
 defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttss2si{q}">, XD, REX_W;
+                                    "cvttsd2si{q}">, XD, REX_W;
 
 let isAsmParserOnly = 1, Pattern = []<dag> in {
 defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
@@ -790,6 +712,9 @@ def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
 }
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+        Requires<[HasAVX]>;
+
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround FR64:$src))]>;
@@ -817,6 +742,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
 }
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+        Requires<[HasAVX]>;
+
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -973,9 +901,13 @@ def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq VR128:$src))]>;
 def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
 
 
 let isAsmParserOnly = 1 in {
@@ -990,16 +922,6 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                                            (memop addr:$src)))]>,
                       XS, VEX, Requires<[HasAVX]>;
 }
-def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                              (int_x86_sse2_cvttps2dq VR128:$src))]>,
-                      XS, Requires<[HasSSE2]>;
-def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
-                                           (memop addr:$src)))]>,
-                      XS, Requires<[HasSSE2]>;
 
 let isAsmParserOnly = 1 in {
 def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
@@ -1013,13 +935,13 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
                           [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                              (memop addr:$src)))]>, VEX;
 }
-def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
-def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
-                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                             (memop addr:$src)))]>;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                        (memop addr:$src)))]>;
 
 let isAsmParserOnly = 1 in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -1469,9 +1391,11 @@ let AddedComplexity = 10 in {
 /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
 multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
                                 Domain d> {
-  def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
-              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+  def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
                      [(set GR32:$dst, (Int RC:$src))], d>;
+  def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
 }
 
 // Mask creation
@@ -1522,6 +1446,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
 def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
                  [(set FR64:$dst, fpimm0)]>,
                Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                  [(set FR32:$dst, fp32imm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                  [(set FR64:$dst, fpimm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
 }
 
 // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
@@ -1654,19 +1584,13 @@ defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
 let isCommutable = 0 in
   defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
     // single r+r
-    [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
-                                       (bc_v2i64 (v4i32 immAllOnesV))),
-                                   VR128:$src2)))],
+    [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
     // double r+r
-    [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
-                                 (bc_v2i64 (v2f64 VR128:$src2))))],
+    [],
     // single r+m
-    [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
-                                       (bc_v2i64 (v4i32 immAllOnesV))),
-                                  (memopv2i64 addr:$src2))))],
+    [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
     // double r+m
-    [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
-                           (memopv2i64 addr:$src2)))]]>;
+    []]>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Arithmetic Instructions
@@ -2170,7 +2094,7 @@ def : Pat<(X86SFence), (SFENCE)>;
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
 // FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
+// JIT implementation, it does not expand the instructions below like
 // X86MCInstLower does.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
     isCodeGenOnly = 1 in {
@@ -2277,6 +2201,10 @@ let neverHasSideEffects = 1 in
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", []>;
 
+def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   []>, XS, Requires<[HasSSE2]>;
+
 let canFoldAsLoad = 1, mayLoad = 1 in {
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
@@ -2606,15 +2534,11 @@ let ExeDomain = SSEPackedInt in {
   }
   def PANDNrr : PDI<0xDF, MRMSrcReg,
                     (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "pandn\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
-                                              VR128:$src2)))]>;
+                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
 
   def PANDNrm : PDI<0xDF, MRMSrcMem,
                     (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                    "pandn\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
-                                              (memopv2i64 addr:$src2))))]>;
+                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
 }
 } // Constraints = "$src1 = $dst"
 
@@ -3009,6 +2933,13 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
 
 
 // Move Int Doubleword to Single Scalar
@@ -3051,6 +2982,21 @@ def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)]>;
 
+def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                         "mov{d|q}\t{$src, $dst|$dst, $src}",
+                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+                                           (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+                       "movq\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
 // Move Scalar Single to Double Int
 let isAsmParserOnly = 1 in {
 def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
@@ -3532,18 +3478,6 @@ let Constraints = "$src1 = $dst" in {
 // SSSE3 - Packed Absolute Instructions
 //===---------------------------------------------------------------------===//
 
-/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
-                               PatFrag mem_frag64, Intrinsic IntId64> {
-  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
-  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst,
-                     (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
-}
 
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
@@ -3572,19 +3506,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
 }
 
 defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
-                              int_x86_ssse3_pabs_b_128>,
-             SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8,
-                                 int_x86_ssse3_pabs_b>;
-
+                              int_x86_ssse3_pabs_b_128>;
 defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
-                              int_x86_ssse3_pabs_w_128>,
-             SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16,
-                                 int_x86_ssse3_pabs_w>;
-
+                              int_x86_ssse3_pabs_w_128>;
 defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
-                              int_x86_ssse3_pabs_d_128>,
-             SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32,
-                              int_x86_ssse3_pabs_d>;
+                              int_x86_ssse3_pabs_d_128>;
 
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
@@ -3611,20 +3537,6 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          (IntId128 VR128:$src1,
           (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
 }
-multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
-                             PatFrag mem_frag64, Intrinsic IntId64> {
-  let isCommutable = 1 in
-  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
-       (ins VR64:$src1, VR64:$src2),
-        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
-  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
-       (ins VR64:$src1, i64mem:$src2),
-        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-       [(set VR64:$dst,
-         (IntId64 VR64:$src1,
-          (bitconvert (memopv8i8 addr:$src2))))]>;
-}
 
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
 let isCommutable = 0 in {
@@ -3659,54 +3571,30 @@ defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 let isCommutable = 0 in {
   defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
-                                     int_x86_ssse3_phadd_w_128>,
-                   SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16,
-                                     int_x86_ssse3_phadd_w>;
+                                     int_x86_ssse3_phadd_w_128>;
   defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
-                                     int_x86_ssse3_phadd_d_128>,
-                   SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32,
-                                     int_x86_ssse3_phadd_d>;
+                                     int_x86_ssse3_phadd_d_128>;
   defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
-                                     int_x86_ssse3_phadd_sw_128>,
-                   SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16,
-                                     int_x86_ssse3_phadd_sw>;
+                                     int_x86_ssse3_phadd_sw_128>;
   defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
-                                     int_x86_ssse3_phsub_w_128>,
-                    SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16,
-                                     int_x86_ssse3_phsub_w>;
+                                     int_x86_ssse3_phsub_w_128>;
   defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
-                                     int_x86_ssse3_phsub_d_128>,
-                   SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32,
-                                     int_x86_ssse3_phsub_d>;
+                                     int_x86_ssse3_phsub_d_128>;
   defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
-                                     int_x86_ssse3_phsub_sw_128>,
-                   SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16,
-                                     int_x86_ssse3_phsub_sw>;
+                                     int_x86_ssse3_phsub_sw_128>;
   defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
-                                     int_x86_ssse3_pmadd_ub_sw_128>,
-                   SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8,
-                                     int_x86_ssse3_pmadd_ub_sw>;
-  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, 
-                                     int_x86_ssse3_pshuf_b_128>,
-                   SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8,
-                                     int_x86_ssse3_pshuf_b>;
+                                     int_x86_ssse3_pmadd_ub_sw_128>;
+  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+                                     int_x86_ssse3_pshuf_b_128>;
   defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
-                                     int_x86_ssse3_psign_b_128>,
-                   SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8,
-                                     int_x86_ssse3_psign_b>;
+                                     int_x86_ssse3_psign_b_128>;
   defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
-                                     int_x86_ssse3_psign_w_128>,
-                   SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16,
-                                     int_x86_ssse3_psign_w>;
+                                     int_x86_ssse3_psign_w_128>;
   defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
-                                       int_x86_ssse3_psign_d_128>,
-                   SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32,
-                                       int_x86_ssse3_psign_d>;
+                                       int_x86_ssse3_psign_d_128>;
 }
 defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
-                                     int_x86_ssse3_pmul_hr_sw_128>,
-                   SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16,
-                                     int_x86_ssse3_pmul_hr_sw>;
+                                     int_x86_ssse3_pmul_hr_sw_128>;
 }
 
 def : Pat<(X86pshufb VR128:$src, VR128:$mask),
@@ -3714,19 +3602,17 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
 def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
           (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
 
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+          (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+          (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+          (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Align Instruction Patterns
 //===---------------------------------------------------------------------===//
 
-multiclass ssse3_palign_mm<string asm> {
-  def R64rr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
-      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
-      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
-  def R64rm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
-      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
-      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
-}
-
 multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
   def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -3747,28 +3633,9 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
 let isAsmParserOnly = 1, Predicates = [HasAVX] in
   defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
-  defm PALIGN : ssse3_palign<"palignr">,
-                ssse3_palign_mm<"palignr">;
+  defm PALIGN : ssse3_palign<"palignr">;
 
 let AddedComplexity = 5 in {
-
-def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
-          (PALIGNR64rr VR64:$src2, VR64:$src1,
-                       (SHUFFLE_get_palign_imm VR64:$src3))>,
-          Requires<[HasSSSE3]>;
-
 def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
           (PALIGNR128rr VR128:$src2, VR128:$src1,
                         (SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3792,10 +3659,27 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
 //===---------------------------------------------------------------------===//
 
 // Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
-                [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT   : I<0x01, MRM_C9, (outs), (ins), "mwait",
-                [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+                 Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+                Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+      Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+      Requires<[In64BitMode]>;
 
 //===---------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -3811,7 +3695,7 @@ let Predicates = [HasSSE2] in
            (CVTSS2SDrm addr:$src)>;
 
 // bit_convert
-let Predicates = [HasSSE2] in {
+let Predicates = [HasXMMInt] in {
   def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
   def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
   def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3844,6 +3728,10 @@ let Predicates = [HasSSE2] in {
   def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
 }
 
+let Predicates = [HasAVX] in {
+  def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+}
+
 // Move scalar to XMM zero-extended
 // movd to XMM register zero-extends
 let AddedComplexity = 15 in {
@@ -4017,36 +3905,11 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
           (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
 
-// Some special case pandn patterns.
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
-                  VR128:$src2)),
-          (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
-                  VR128:$src2)),
-          (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
-                  VR128:$src2)),
-          (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
-                  (memop addr:$src2))),
-          (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
-                  (memop addr:$src2))),
-          (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
-                  (memop addr:$src2))),
-          (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
 // vector -> vector casts
 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
           (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-          (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
-          (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
-          (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+          (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
 
 // Use movaps / movups for SSE integer load / store (one byte shorter).
 let Predicates = [HasSSE1] in {
@@ -4504,7 +4367,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                             Intrinsic V4F32Int, Intrinsic V2F64Int> {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
-  def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+  def PSr : SS4AIi8<opcps, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4512,7 +4375,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                     OpSize;
 
   // Vector intrinsic operation, mem
-  def PSm_Int : Ii8<opcps, MRMSrcMem,
+  def PSm : Ii8<opcps, MRMSrcMem,
                     (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4522,7 +4385,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                 Requires<[HasSSE41]>;
 
   // Vector intrinsic operation, reg
-  def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+  def PDr : SS4AIi8<opcpd, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4530,7 +4393,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                     OpSize;
 
   // Vector intrinsic operation, mem
-  def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+  def PDm : SS4AIi8<opcpd, MRMSrcMem,
                     (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4543,28 +4406,28 @@ multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
                    RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
-  def PSr : SS4AIi8<opcps, MRMSrcReg,
+  def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, OpSize;
 
   // Vector intrinsic operation, mem
-  def PSm : Ii8<opcps, MRMSrcMem,
+  def PSm_AVX : Ii8<opcps, MRMSrcMem,
                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, TA, OpSize, Requires<[HasSSE41]>;
 
   // Vector intrinsic operation, reg
-  def PDr : SS4AIi8<opcpd, MRMSrcReg,
+  def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     []>, OpSize;
 
   // Vector intrinsic operation, mem
-  def PDm : SS4AIi8<opcpd, MRMSrcMem,
+  def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4576,7 +4439,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
                             Intrinsic F32Int,
                             Intrinsic F64Int, bit Is2Addr = 1> {
   // Intrinsic operation, reg.
-  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+  def SSr : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4587,7 +4450,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
         OpSize;
 
   // Intrinsic operation, mem.
-  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4599,7 +4462,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
         OpSize;
 
   // Intrinsic operation, reg.
-  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+  def SDr : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4610,7 +4473,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
         OpSize;
 
   // Intrinsic operation, mem.
-  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -4625,28 +4488,28 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
 multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
                                    string OpcodeStr> {
   // Intrinsic operation, reg.
-  def SSr : SS4AIi8<opcss, MRMSrcReg,
+  def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !strconcat(OpcodeStr,
                 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         []>, OpSize;
 
   // Intrinsic operation, mem.
-  def SSm : SS4AIi8<opcss, MRMSrcMem,
+  def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
         !strconcat(OpcodeStr,
                 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         []>, OpSize;
 
   // Intrinsic operation, reg.
-  def SDr : SS4AIi8<opcsd, MRMSrcReg,
+  def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         []>, OpSize;
 
   // Intrinsic operation, mem.
-  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+  def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
             !strconcat(OpcodeStr,
                 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4743,6 +4606,29 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
 // SSE4.1 - Misc Instructions
 //===----------------------------------------------------------------------===//
 
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+
+
+
 // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
 multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                                  Intrinsic IntId128> {
@@ -4981,6 +4867,9 @@ defm BLENDVPD     : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
 defm BLENDVPS     : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
 defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
 
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+          (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
 let isAsmParserOnly = 1, Predicates = [HasAVX] in
 def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -5032,12 +4921,12 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
 
 // Packed Compare Implicit Length Strings, Return Mask
 multiclass pseudo_pcmpistrm<string asm> {
-  def REG : Ii8<0, Pseudo, (outs VR128:$dst),
-    (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
+  def REG : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
     [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
                                                   imm:$src3))]>;
-  def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
-    (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
+  def MEM : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
     [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
                        VR128:$src1, (load addr:$src2), imm:$src3))]>;
 }
@@ -5068,12 +4957,12 @@ let Defs = [XMM0, EFLAGS] in {
 
 // Packed Compare Explicit Length Strings, Return Mask
 multiclass pseudo_pcmpestrm<string asm> {
-  def REG : Ii8<0, Pseudo, (outs VR128:$dst),
-    (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+  def REG : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
     [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
                        VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
-  def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
-    (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
+  def MEM : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
     [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
                        VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
 }
@@ -5555,6 +5444,23 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
 def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
           (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
 
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
 def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
 def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
@@ -5562,6 +5468,23 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
 def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
 
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4f32 (VEXTRACTF128rr
+                    (v8f32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2f64 (VEXTRACTF128rr
+                    (v4f64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTF128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTF128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
           (VBROADCASTF128 addr:$src)>;
 
@@ -5673,19 +5596,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
 def : Pat<(X86Movddup (memopv2f64 addr:$src)),
           (MOVDDUPrm addr:$src)>;
 
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
-          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
-          (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
           (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
           (MOVDDUPrm addr:$src)>;
 
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
           (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
           (MOVDDUPrm addr:$src)>;
 
 def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
@@ -5700,6 +5618,7 @@ def : Pat<(X86Movddup (bc_v2f64
                            (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
           (MOVDDUPrm addr:$src)>;
 
+
 // Shuffle with UNPCKLPS
 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
           (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
@@ -5724,9 +5643,9 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
 
 // Shuffle with UNPCKLPD
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
-          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+          (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
-          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+          (UNPCKLPDrm VR128:$src1, addr:$src2)>;
 
 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
           (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5735,9 +5654,9 @@ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
 
 // Shuffle with UNPCKHPD
 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
-          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+          (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
-          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+          (UNPCKHPDrm VR128:$src1, addr:$src2)>;
 
 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
           (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5812,10 +5731,18 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
           (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
 
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+          (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
 // Shuffle with MOVLHPD
 def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
                     (scalar_to_vector (loadf64 addr:$src2)))),
           (MOVHPDrm VR128:$src1, addr:$src2)>;
+
 // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
 // is during lowering, where it's not possible to recognize the load fold cause
 // it has two uses through a bitcast. One use disappears at isel time and the
@@ -5878,31 +5805,18 @@ def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
           (MOVSLDUPrm addr:$src)>;
 
 // Shuffle with PSHUFHW
-def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))),
-          (PSHUFHWmi addr:$src, imm:$imm)>;
 def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
           (PSHUFHWri VR128:$src, imm:$imm)>;
 def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
           (PSHUFHWmi addr:$src, imm:$imm)>;
 
 // Shuffle with PSHUFLW
-def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))),
-          (PSHUFLWmi addr:$src, imm:$imm)>;
 def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
           (PSHUFLWri VR128:$src, imm:$imm)>;
 def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
           (PSHUFLWmi addr:$src, imm:$imm)>;
 
 // Shuffle with PALIGN
-def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
-          (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
-          (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
-          (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
-          (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-
 def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
 def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
@@ -5920,6 +5834,15 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
 def : Pat<(X86Movlps VR128:$src1,
                     (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
           (MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; 
 
 // Shuffle with MOVLPD
 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 000000000000..8278568184ff
--- /dev/null
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "shl{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+                   
+let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
+def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "shl{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "shl{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "shl{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shl{b}\t$dst", []>;
+def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t$dst", []>, OpSize;
+def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t$dst", []>;
+def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst" 
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "shl{b}\t{$src, $dst|$dst, $src}",
+                [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "shl{w}\t{$src, $dst|$dst, $src}",
+               [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "shl{l}\t{$src, $dst|$dst, $src}",
+               [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+                  "shl{q}\t{$src, $dst|$dst, $src}",
+                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+                 "shl{b}\t$dst",
+                [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+                 "shl{w}\t$dst",
+               [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+                 "shl{l}\t$dst",
+               [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+                  "shl{q}\t$dst",
+                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+                   "shr{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "shr{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "shr{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+                  "shr{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shr{b}\t$dst",
+                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shr{w}\t$dst",
+                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shr{l}\t$dst",
+                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shr{q}\t$dst",
+                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+                 OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "shr{b}\t{$src, $dst|$dst, $src}",
+                [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "shr{w}\t{$src, $dst|$dst, $src}",
+               [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "shr{l}\t{$src, $dst|$dst, $src}",
+               [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+                  "shr{q}\t{$src, $dst|$dst, $src}",
+                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+                 "shr{b}\t$dst",
+                [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+                 "shr{w}\t$dst",
+               [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+                 "shr{l}\t$dst",
+               [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+                  "shr{q}\t$dst",
+                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
+                 [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "sar{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "sar{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+                   OpSize;
+def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "sar{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+                    (ins GR64:$src1, i8imm:$src2),
+                    "sar{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "sar{b}\t$dst",
+                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+                 "sar{w}\t$dst",
+                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+                 "sar{l}\t$dst",
+                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+                 "sar{q}\t$dst",
+                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
+                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "sar{b}\t{$src, $dst|$dst, $src}",
+                [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "sar{w}\t{$src, $dst|$dst, $src}",
+               [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "sar{l}\t{$src, $dst|$dst, $src}",
+               [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+                    "sar{q}\t{$src, $dst|$dst, $src}",
+                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+                 "sar{b}\t$dst",
+                [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+                 "sar{w}\t$dst",
+               [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+                 "sar{l}\t$dst",
+               [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+                  "sar{q}\t$dst",
+                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+               "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+  
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+                "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+                "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+                 "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+               "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+  
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+                "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+                "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+                 
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+                 "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+               "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+                "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+                "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+                 "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+               "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+                "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+                "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+                 "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rol{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "rol{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "rol{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, 
+                   OpSize;
+def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "rol{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "rol{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "rol{b}\t$dst",
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rol{w}\t$dst",
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rol{l}\t$dst",
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rol{q}\t$dst",
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+                   "rol{q}\t{%cl, $dst|$dst, %CL}",
+                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+                   "rol{b}\t{$src1, $dst|$dst, $src1}",
+               [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+                   "rol{w}\t{$src1, $dst|$dst, $src1}",
+              [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+                   OpSize;
+def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+                   "rol{l}\t{$src1, $dst|$dst, $src1}",
+              [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+                    "rol{q}\t{$src1, $dst|$dst, $src1}",
+                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+                 "rol{b}\t$dst",
+               [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+                 "rol{w}\t$dst",
+              [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+                 "rol{l}\t$dst",
+              [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+                 "rol{q}\t$dst",
+               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "ror{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "ror{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, 
+                   OpSize;
+def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "ror{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "ror{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "ror{b}\t$dst",
+                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+                 "ror{w}\t$dst",
+                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+                 "ror{l}\t$dst",
+                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+                  "ror{q}\t$dst",
+                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "ror{b}\t{$src, $dst|$dst, $src}",
+               [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "ror{w}\t{$src, $dst|$dst, $src}",
+              [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "ror{l}\t{$src, $dst|$dst, $src}",
+              [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+                    "ror{q}\t{$src, $dst|$dst, $src}",
+                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+                 "ror{b}\t$dst",
+               [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+                 "ror{w}\t$dst",
+              [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+                 "ror{l}\t$dst",
+              [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+                 "ror{q}\t$dst",
+               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
+                   (ins GR16:$src1, GR16:$src2),
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+                   TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), 
+                   (ins GR16:$src1, GR16:$src2),
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+                   TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), 
+                   (ins GR32:$src1, GR32:$src2),
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+                   (ins GR32:$src1, GR32:$src2),
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), 
+                    (ins GR64:$src1, GR64:$src2),
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, 
+                    TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), 
+                    (ins GR64:$src1, GR64:$src2),
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, 
+                    TB;
+}
+
+let isCommutable = 1 in {  // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+                     (outs GR16:$dst), 
+                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+                                      (i8 imm:$src3)))]>,
+                     TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+                     (outs GR16:$dst), 
+                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+                                      (i8 imm:$src3)))]>,
+                     TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+                     (outs GR32:$dst), 
+                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+                                      (i8 imm:$src3)))]>,
+                 TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+                     (outs GR32:$dst), 
+                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+                                      (i8 imm:$src3)))]>,
+                 TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+                      (outs GR64:$dst), 
+                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+                                       (i8 imm:$src3)))]>,
+                 TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+                      (outs GR64:$dst), 
+                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+                                       (i8 imm:$src3)))]>,
+                 TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+                     addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                  "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                  [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+                    addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+                     addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                  "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                  [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+                    addr:$dst)]>, TB;
+                    
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+                      addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+                      addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+                    (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                    "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                    TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
+                     (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                     TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+                    (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                    "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                    TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
+                     (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                     TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                 TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
+                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                 TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 000000000000..1a58ba0f96ef
--- /dev/null
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,390 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes.  These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+  def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+  def UD2B    : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+  def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+              [(int_x86_int (i8 3))]>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+              [(int_x86_int imm:$trap)]>;
+
+def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL  : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ  :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+               Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+                 
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+                Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+                Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+             Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+//  Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
+               "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+               "in{w}\t{%dx, %ax|%AX, %DX}", []>,  OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+               "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+                  "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+                  "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+                  "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr  : I<0xEE, RawFrm, (outs), (ins),
+                "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+                "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+                "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+                   "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+                   "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+                   "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+def IN8  : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>,  OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), 
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), 
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; 
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; 
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; 
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+             "str{w}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+             "str{w}\t{$dst}", []>, TB;
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+             "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+             "ltr{w}\t{$src}", []>, TB;
+             
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+                 "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+                 "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+                 "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+                 "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+                 "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+                 "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+                 "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+                 "push{l}\t%es", []>, Requires<[In32BitMode]>;
+                 
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{w}\t%fs", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{w}\t%gs", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{q}\t%fs", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{q}\t%gs", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+                "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+                "pop{l}\t%ss", []>        , Requires<[In32BitMode]>;
+                
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+                "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+                "pop{l}\t%ds", []>        , Requires<[In32BitMode]>;
+                
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+                "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+                "pop{l}\t%es", []>        , Requires<[In32BitMode]>;
+                
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{w}\t%fs", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{l}\t%fs", []>, TB    , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{q}\t%fs", []>, TB;
+                
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{w}\t%gs", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{l}\t%gs", []>, TB    , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{q}\t%gs", []>, TB;
+                 
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lds{l}\t{$src, $dst|$dst, $src}", []>;
+                
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "les{l}\t{$src, $dst|$dst, $src}", []>;
+                
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+              "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+              "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+              "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+              "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+              "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+              "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+              "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+              "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+                "sldt{l}\t$dst", []>, TB;
+                
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+//   extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+              "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+              "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+              "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+              "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+                "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+                "lldt{w}\t$src", []>, TB;
+                
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), 
+                "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), 
+                "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), 
+                 "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+                "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+                "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+                "lmsw{w}\t$src", []>, TB;
+                
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 000000000000..daf61e4625d4
--- /dev/null
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,54 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+  "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+  "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 6f0a8d91cb58..3f88fa69d0ee 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -19,7 +19,7 @@
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Valgrind.h"
 #include <cstdlib>
 #include <cstring>
 using namespace llvm;
@@ -127,9 +127,17 @@ extern "C" {
     "movaps  %xmm6, 96(%rsp)\n"
     "movaps  %xmm7, 112(%rsp)\n"
     // JIT callee
+#ifdef _WIN64
+    "subq    $32, %rsp\n"
+    "movq    %rbp, %rcx\n"    // Pass prev frame and return address
+    "movq    8(%rbp), %rdx\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "addq    $32, %rsp\n"
+#else
     "movq    %rbp, %rdi\n"    // Pass prev frame and return address
     "movq    8(%rbp), %rsi\n"
     "call    " ASMPREFIX "X86CompilationCallback2\n"
+#endif
     // Restore all XMM arg registers
     "movaps  112(%rsp), %xmm7\n"
     "movaps  96(%rsp), %xmm6\n"
@@ -333,11 +341,11 @@ extern "C" {
 extern "C" {
 #if !(defined (X86_64_JIT) && defined(_MSC_VER))
  // the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is 
+ // unless we are under 64bit Windows with MSC, where there is
  // no support for inline assembly
 static
 #endif
-void ATTRIBUTE_USED
+void LLVM_ATTRIBUTE_USED
 X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
   intptr_t *RetAddrLoc = &StackPtr[1];
   assert(*RetAddrLoc == RetAddr &&
@@ -462,7 +470,7 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
 
 void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
                                    JITCodeEmitter &JCE) {
-  // Note, we cast to intptr_t here to silence a -pedantic warning that 
+  // Note, we cast to intptr_t here to silence a -pedantic warning that
   // complains about casting a function pointer to a normal pointer.
 #if defined (X86_32_JIT) && !defined (_MSC_VER)
   bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 36badb403e81..6686214e06f5 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 enum AsmWriterFlavorTy {
@@ -68,7 +69,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   DwarfUsesInlineInfoSection = true;
 
   // Exceptions handling
-  ExceptionsType = ExceptionHandling::Dwarf;
+  ExceptionsType = ExceptionHandling::DwarfTable;
 }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -88,8 +89,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   SupportsDebugInformation = true;
 
   // Exceptions handling
-  ExceptionsType = ExceptionHandling::Dwarf;
-  
+  ExceptionsType = ExceptionHandling::DwarfTable;
+
   // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
   // .words.
   if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
@@ -98,13 +99,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
 
 const MCSection *X86ELFMCAsmInfo::
 getNonexecutableStackSection(MCContext &Ctx) const {
-  return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
-                           0, SectionKind::getMetadata(), false);
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
 }
 
 X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
-  if (Triple.getArch() == Triple::x86_64)
+  if (Triple.getArch() == Triple::x86_64) {
     GlobalPrefix = "";
+    PrivateGlobalPrefix = ".L";
+  }
 
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 9564fe0b92d4..e6dc74e65d79 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "x86-emitter"
+#define DEBUG_TYPE "mccodeemitter"
 #include "X86.h"
 #include "X86InstrInfo.h"
 #include "X86FixupKinds.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -37,27 +38,6 @@ public:
 
   ~X86MCCodeEmitter() {}
 
-  unsigned getNumFixupKinds() const {
-    return 5;
-  }
-
-  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
-    const static MCFixupKindInfo Infos[] = {
-      { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
-      { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
-      { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
-      { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
-      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
-    };
-
-    if (Kind < FirstTargetFixupKind)
-      return MCCodeEmitter::getFixupKindInfo(Kind);
-
-    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
-           "Invalid kind!");
-    return Infos[Kind - FirstTargetFixupKind];
-  }
-
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86RegisterInfo::getX86RegNum(MO.getReg());
   }
@@ -170,41 +150,77 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
   unsigned Size = X86II::getSizeOfImm(TSFlags);
   bool isPCRel = X86II::isImmPCRel(TSFlags);
 
-  switch (Size) {
-  default: assert(0 && "Unknown immediate size");
-  case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
-  case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2;
-  case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
-  case 8: assert(!isPCRel); return FK_Data_8;
-  }
+  return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand.  Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+  
+  if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+    return true;
+  return false;
 }
 
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+  if (Expr->getKind() == MCExpr::Binary) {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+    Expr = BE->getLHS();
+  }
+
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    return false;
+
+  const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+  const MCSymbol &S = Ref->getSymbol();
+  return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
 
 void X86MCCodeEmitter::
 EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
               unsigned &CurByte, raw_ostream &OS,
               SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
-  // If this is a simple integer displacement that doesn't require a relocation,
-  // emit it now.
+  const MCExpr *Expr = NULL;
   if (DispOp.isImm()) {
-    // FIXME: is this right for pc-rel encoding??  Probably need to emit this as
-    // a fixup if so.
-    EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
-    return;
+    // If this is a simple integer displacement that doesn't require a relocation,
+    // emit it now.
+    if (FixupKind != FK_PCRel_1 &&
+	FixupKind != FK_PCRel_2 &&
+	FixupKind != FK_PCRel_4) {
+      EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+      return;
+    }
+    Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+  } else {
+    Expr = DispOp.getExpr();
   }
 
   // If we have an immoffset, add it to the expression.
-  const MCExpr *Expr = DispOp.getExpr();
+  if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+    assert(ImmOffset == 0);
+
+    FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+    ImmOffset = CurByte;
+  }
 
   // If the fixup is pc-relative, we need to bias the value to be relative to
   // the start of the field, not the end of the field.
-  if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+  if (FixupKind == FK_PCRel_4 ||
       FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
       FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
     ImmOffset -= 4;
-  if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte))
+  if (FixupKind == FK_PCRel_2)
     ImmOffset -= 2;
-  if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+  if (FixupKind == FK_PCRel_1)
     ImmOffset -= 1;
 
   if (ImmOffset)
@@ -221,10 +237,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
                                         uint64_t TSFlags, unsigned &CurByte,
                                         raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups) const{
-  const MCOperand &Disp     = MI.getOperand(Op+3);
-  const MCOperand &Base     = MI.getOperand(Op);
-  const MCOperand &Scale    = MI.getOperand(Op+1);
-  const MCOperand &IndexReg = MI.getOperand(Op+2);
+  const MCOperand &Disp     = MI.getOperand(Op+X86::AddrDisp);
+  const MCOperand &Base     = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &Scale    = MI.getOperand(Op+X86::AddrScaleAmt);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
   unsigned BaseReg = Base.getReg();
 
   // Handle %rip relative addressing.
@@ -238,8 +254,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // movq loads are handled with a special relocation form which allows the
     // linker to eliminate some loads for GOT references which end up in the
     // same linkage unit.
-    if (MI.getOpcode() == X86::MOV64rm ||
-        MI.getOpcode() == X86::MOV64rm_TC)
+    if (MI.getOpcode() == X86::MOV64rm)
       FixupKind = X86::reloc_riprel_4byte_movq_load;
 
     // rip-relative addressing is actually relative to the *next* instruction.
@@ -295,7 +310,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
     // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
     EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
-    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
     return;
   }
 
@@ -355,7 +371,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
   if (ForceDisp8)
     EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
   else if (ForceDisp32 || Disp.getImm() != 0)
-    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
 }
 
 /// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -708,14 +725,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   if ((TSFlags & X86II::Op0Mask) == X86II::REP)
     EmitByte(0xF3, CurByte, OS);
 
+  // Emit the address size opcode prefix as needed.
+  if ((TSFlags & X86II::AdSize) ||
+      (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+    EmitByte(0x67, CurByte, OS);
+  
   // Emit the operand size opcode prefix as needed.
   if (TSFlags & X86II::OpSize)
     EmitByte(0x66, CurByte, OS);
 
-  // Emit the address size opcode prefix as needed.
-  if (TSFlags & X86II::AdSize)
-    EmitByte(0x67, CurByte, OS);
-
   bool Need0FPrefix = false;
   switch (TSFlags & X86II::Op0Mask) {
   default: assert(0 && "Invalid prefix!");
@@ -806,6 +824,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((TSFlags >> 32) & X86II::VEX_4V)
     HasVEX_4V = true;
 
+  
   // Determine where the memory operand starts, if present.
   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
   if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -815,7 +834,12 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   else
     EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
 
+  
   unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  
+  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+    BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
+  
   unsigned SrcRegNum = 0;
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg:
@@ -828,6 +852,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     EmitByte(BaseOpcode, CurByte, OS);
     break;
       
+  case X86II::RawFrmImm8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+    break;
   case X86II::RawFrmImm16:
     EmitByte(BaseOpcode, CurByte, OS);
     EmitImmediate(MI.getOperand(CurOp++),
@@ -963,12 +994,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       RegNum |= GetX86RegNum(MO) << 4;
       EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
                     Fixups);
-    } else
+    } else {
+      unsigned FixupKind;
+      // FIXME: Is there a better way to know that we need a signed relocation?
+      if (MI.getOpcode() == X86::MOV64ri32 ||
+          MI.getOpcode() == X86::MOV64mi32 ||
+          MI.getOpcode() == X86::PUSH64i32)
+        FixupKind = X86::reloc_signed_4byte;
+      else
+        FixupKind = getImmFixupKind(TSFlags);
       EmitImmediate(MI.getOperand(CurOp++),
-                    X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                    X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
                     CurByte, OS, Fixups);
+    }
   }
 
+  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+    EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+  
 
 #ifndef NDEBUG
   // FIXME: Verify.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 8c4620f92177..cbe6db26e5bc 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "InstPrinter/X86ATTInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
@@ -38,11 +39,6 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
 }
 
 
-MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
-  return static_cast<const X86TargetLowering*>(TM.getTargetLowering())->
-    getPICBaseSymbol(&MF, Ctx);
-}
-
 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
 /// operand to an MCSymbol.
 MCSymbol *X86MCInstLower::
@@ -154,7 +150,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
     // Subtract the pic base.
     Expr = MCBinaryExpr::CreateSub(Expr,
-                                   MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+                                  MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
                                                            Ctx),
                                    Ctx);
     break;
@@ -173,7 +169,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     Expr = MCSymbolRefExpr::Create(Sym, Ctx);
     // Subtract the pic base.
     Expr = MCBinaryExpr::CreateSub(Expr, 
-                               MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
+                            MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
                                    Ctx);
     if (MO.isJTI() && MAI.hasSetDirective()) {
       // If .set directive is supported, use it to reduce the number of
@@ -326,8 +322,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
                        MO.getMBB()->getSymbol(), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
-      MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
-      break;
     case MachineOperand::MO_ExternalSymbol:
       MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
       break;
@@ -347,6 +341,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   }
   
   // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
   switch (OutMI.getOpcode()) {
   case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
     lower_lea64_32mem(&OutMI, 1);
@@ -377,11 +372,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
   case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
   case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
-  case X86::MMX_V_SET0:   LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
-  case X86::MMX_V_SETALLONES:
-    LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
   case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
   case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+  case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
   case X86::V_SET0PS:      LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
   case X86::V_SET0PD:      LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
   case X86::V_SET0PI:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
@@ -417,6 +411,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     break;
   }
 
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    OutMI = MCInst();
+    OutMI.setOpcode(X86::RET);
+    break;
+  }
+
   // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
   case X86::TAILJMPr:
   case X86::TAILJMPd:
@@ -436,6 +437,19 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     break;
   }
 
+  // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
+  // this with an ugly goto in case the resultant OR uses EAX and needs the
+  // short form.
+  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+      
   // The assembler backend wants to see branches in their small form and relax
   // them to their large form.  The JIT can only handle the large form because
   // it does not do relaxation.  For now, translate the large form to the
@@ -513,6 +527,66 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   }
 }
 
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+                         X86MCInstLower &MCInstLowering,
+                         const MachineInstr &MI) {
+  bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+  MCContext &context = OutStreamer.getContext();
+
+  if (is64Bits) {
+    MCInst prefix;
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+  }
+  MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+  const MCSymbolRefExpr *symRef =
+    MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+  MCInst LEA;
+  if (is64Bits) {
+    LEA.setOpcode(X86::LEA64r);
+    LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+    LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+    LEA.addOperand(MCOperand::CreateImm(1));        // scale
+    LEA.addOperand(MCOperand::CreateReg(0));        // index
+    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+  } else {
+    LEA.setOpcode(X86::LEA32r);
+    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+    LEA.addOperand(MCOperand::CreateReg(0));        // base
+    LEA.addOperand(MCOperand::CreateImm(1));        // scale
+    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+  }
+  OutStreamer.EmitInstruction(LEA);
+
+  if (is64Bits) {
+    MCInst prefix;
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+    prefix.setOpcode(X86::REX64_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+  }
+
+  MCInst call;
+  if (is64Bits)
+    call.setOpcode(X86::CALL64pcrel32);
+  else
+    call.setOpcode(X86::CALLpcrel32);
+  StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+  MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+  const MCSymbolRefExpr *tlsRef =
+    MCSymbolRefExpr::Create(tlsGetAddr,
+                            MCSymbolRefExpr::VK_PLT,
+                            context);
+
+  call.addOperand(MCOperand::CreateExpr(tlsRef));
+  OutStreamer.EmitInstruction(call);
+}
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(Mang, *MF, *this);
@@ -532,13 +606,26 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
     return;
         
+
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    // Lower these as normal, but add some comments.
+    unsigned Reg = MI->getOperand(0).getReg();
+    OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+                           X86ATTInstPrinter::getRegisterName(Reg));
+    break;
+  }
   case X86::TAILJMPr:
   case X86::TAILJMPd:
   case X86::TAILJMPd64:
     // Lower these as normal, but add some comments.
     OutStreamer.AddComment("TAILCALL");
     break;
-      
+
+  case X86::TLS_addr32:
+  case X86::TLS_addr64:
+    return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
   case X86::MOVPC32r: {
     MCInst TmpInst;
     // This is a pseudo op for a two instruction sequence with a label, which
@@ -548,7 +635,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     //     popl %esi
     
     // Emit the call.
-    MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol();
+    MCSymbol *PICBase = MF->getPICBaseSymbol();
     TmpInst.setOpcode(X86::CALLpcrel32);
     // FIXME: We would like an efficient form for this, so we don't have to do a
     // lot of extra uniquing.
@@ -586,7 +673,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     
     const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
     const MCExpr *PICBase =
-      MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext);
+      MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
     DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
     
     DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), 
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 539b09be6fd7..021007239128 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -40,8 +40,6 @@ public:
   
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
 
-  MCSymbol *GetPICBaseSymbol() const;
-  
   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
   
diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 000000000000..8f3dd3222489
--- /dev/null
+++ b/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+};
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index fedd49ebb540..2f6bd88c6526 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -31,7 +31,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -41,7 +41,7 @@
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-static cl::opt<bool>
+cl::opt<bool>
 ForceStackAlign("force-align-stack",
                  cl::desc("Force align the stack to the minimum alignment"
                            " needed for the function."),
@@ -60,7 +60,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
   const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
   Is64Bit = Subtarget->is64Bit();
   IsWin64 = Subtarget->isTargetWin64();
-  StackAlign = TM.getFrameInfo()->getStackAlignment();
+  StackAlign = TM.getFrameLowering()->getStackAlignment();
 
   if (Is64Bit) {
     SlotSize = 8;
@@ -159,46 +159,21 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
   case X86::YMM7: case X86::YMM15: case X86::MM7:
     return 7;
 
-  case X86::ES:
-    return 0;
-  case X86::CS:
-    return 1;
-  case X86::SS:
-    return 2;
-  case X86::DS:
-    return 3;
-  case X86::FS:
-    return 4;
-  case X86::GS:
-    return 5;
-
-  case X86::CR0:
-    return 0;
-  case X86::CR1:
-    return 1;
-  case X86::CR2:
-    return 2;
-  case X86::CR3:
-    return 3;
-  case X86::CR4:
-    return 4;
-
-  case X86::DR0:
-    return 0;
-  case X86::DR1:
-    return 1;
-  case X86::DR2:
-    return 2;
-  case X86::DR3:
-    return 3;
-  case X86::DR4:
-    return 4;
-  case X86::DR5:
-    return 5;
-  case X86::DR6:
-    return 6;
-  case X86::DR7:
-    return 7;
+  case X86::ES: return 0;
+  case X86::CS: return 1;
+  case X86::SS: return 2;
+  case X86::DS: return 3;
+  case X86::FS: return 4;
+  case X86::GS: return 5;
+
+  case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+  case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+  case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+  case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+  case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+  case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+  case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+  case X86::CR7: case X86::CR15: case X86::DR7: return 7;
 
   // Pseudo index registers are equivalent to a "none"
   // scaled index (See Intel Manual 2A, table 2-3)
@@ -295,9 +270,14 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
     }
     break;
   case X86::sub_32bit:
-    if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+    if (B == &X86::GR32RegClass) {
       if (A->getSize() == 8)
         return A;
+    } else if (B == &X86::GR32_NOSPRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+        return &X86::GR64_NOSPRegClass;
+      if (A->getSize() == 8)
+        return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
     } else if (B == &X86::GR32_ABCDRegClass) {
       if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
           A == &X86::GR64_NOREXRegClass ||
@@ -336,10 +316,16 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
     if (TM.getSubtarget<X86Subtarget>().is64Bit())
       return &X86::GR64RegClass;
     return &X86::GR32RegClass;
-  case 1: // Normal GRPs except the stack pointer (for encoding reasons).
+  case 1: // Normal GPRs except the stack pointer (for encoding reasons).
     if (TM.getSubtarget<X86Subtarget>().is64Bit())
       return &X86::GR64_NOSPRegClass;
     return &X86::GR32_NOSPRegClass;
+  case 2: // Available for tailcall (not callee-saved GPRs).
+    if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+      return &X86::GR64_TCW64RegClass;
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64_TCRegClass;
+    return &X86::GR32_TCRegClass;
   }
 }
 
@@ -408,6 +394,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   // Set the stack-pointer register and its aliases as reserved.
   Reserved.set(X86::RSP);
   Reserved.set(X86::ESP);
@@ -420,7 +408,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(X86::IP);
 
   // Set the frame-pointer register and its aliases as reserved if needed.
-  if (hasFP(MF)) {
+  if (TFI->hasFP(MF)) {
     Reserved.set(X86::RBP);
     Reserved.set(X86::EBP);
     Reserved.set(X86::BP);
@@ -445,21 +433,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register.  This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const MachineModuleInfo &MMI = MF.getMMI();
-
-  return (DisableFramePointerElim(MF) ||
-          needsStackRealignment(MF) ||
-          MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken() ||
-          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
-          MMI.callsUnwindInit());
-}
-
 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return (RealignStack &&
@@ -478,62 +451,25 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   if (0 && requiresRealignment && MFI->hasVarSizedObjects())
     report_fatal_error(
       "Stack realignment in presense of dynamic allocas is not supported");
-    
+
   // If we've requested that we force align the stack do so now.
   if (ForceStackAlign)
     return canRealignStack(MF);
-    
-  return requiresRealignment && canRealignStack(MF);
-}
 
-bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return requiresRealignment && canRealignStack(MF);
 }
 
 bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
                                            unsigned Reg, int &FrameIdx) const {
-  if (Reg == FramePtr && hasFP(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (Reg == FramePtr && TFI->hasFP(MF)) {
     FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
     return true;
   }
   return false;
 }
 
-int
-X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
-  const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
-  uint64_t StackSize = MFI->getStackSize();
-
-  if (needsStackRealignment(MF)) {
-    if (FI < 0) {
-      // Skip the saved EBP.
-      Offset += SlotSize;
-    } else {
-      unsigned Align = MFI->getObjectAlignment(FI);
-      assert((-(Offset + StackSize)) % Align == 0);
-      Align = 0;
-      return Offset + StackSize;
-    }
-    // FIXME: Support tail calls
-  } else {
-    if (!hasFP(MF))
-      return Offset + StackSize;
-
-    // Skip the saved EBP.
-    Offset += SlotSize;
-
-    // Skip the RETADDR move area
-    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-    if (TailCallReturnAddrDelta < 0)
-      Offset -= TailCallReturnAddrDelta;
-  }
-
-  return Offset;
-}
-
 static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
   if (is64Bit) {
     if (isInt<8>(Imm))
@@ -561,69 +497,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
 void X86RegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+  int Opcode = I->getOpcode();
+  bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+  DebugLoc DL = I->getDebugLoc();
+  uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+  uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+  I = MBB.erase(I);
+
+  if (!reseveCallFrame) {
     // If the stack pointer can be changed after prologue, turn the
     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
     // adjcallstackdown instruction into 'add ESP, <amt>'
     // TODO: consider using push / pop instead of sub + store / add
-    MachineInstr *Old = I;
-    uint64_t Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
-      MachineInstr *New = 0;
-      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
-        New = BuildMI(MF, Old->getDebugLoc(),
-                      TII.get(getSUBriOpcode(Is64Bit, Amount)),
-                      StackPtr)
-          .addReg(StackPtr)
-          .addImm(Amount);
-      } else {
-        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
-
-        // Factor out the amount the callee already popped.
-        uint64_t CalleeAmt = Old->getOperand(1).getImm();
-        Amount -= CalleeAmt;
-  
-      if (Amount) {
-          unsigned Opc = getADDriOpcode(Is64Bit, Amount);
-          New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
-            .addReg(StackPtr)
-            .addImm(Amount);
-        }
-      }
+    if (Amount == 0)
+      return;
+
+    // We need to keep the stack aligned properly.  To do this, we round the
+    // amount of space needed for the outgoing arguments up to the next
+    // alignment boundary.
+    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+    MachineInstr *New = 0;
+    if (Opcode == getCallFrameSetupOpcode()) {
+      New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+                    StackPtr)
+        .addReg(StackPtr)
+        .addImm(Amount);
+    } else {
+      assert(Opcode == getCallFrameDestroyOpcode());
 
-      if (New) {
-        // The EFLAGS implicit def is dead.
-        New->getOperand(3).setIsDead();
+      // Factor out the amount the callee already popped.
+      Amount -= CalleeAmt;
 
-        // Replace the pseudo instruction with a new instruction.
-        MBB.insert(I, New);
+      if (Amount) {
+        unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+        New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+          .addReg(StackPtr).addImm(Amount);
       }
     }
-  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
-    // If we are performing frame pointer elimination and if the callee pops
-    // something off the stack pointer, add it back.  We do this until we have
-    // more advanced stack pointer tracking ability.
-    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
-      unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
-      MachineInstr *Old = I;
-      MachineInstr *New =
-        BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), 
-                StackPtr)
-          .addReg(StackPtr)
-          .addImm(CalleeAmt);
 
+    if (New) {
       // The EFLAGS implicit def is dead.
       New->getOperand(3).setIsDead();
+
+      // Replace the pseudo instruction with a new instruction.
       MBB.insert(I, New);
     }
+
+    return;
   }
 
-  MBB.erase(I);
+  if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.  We do this until we have
+    // more advanced stack pointer tracking ability.
+    unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+    MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr).addImm(CalleeAmt);
+
+    // The EFLAGS implicit def is dead.
+    New->getOperand(3).setIsDead();
+    MBB.insert(I, New);
+  }
 }
 
 void
@@ -634,6 +571,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned i = 0;
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
   while (!MI.getOperand(i).isFI()) {
     ++i;
@@ -650,7 +588,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   else if (AfterFPPop)
     BasePtr = StackPtr;
   else
-    BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+    BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
 
   // This must be part of a four operand memory reference.  Replace the
   // FrameIndex with base register with EBP.  Add an offset to the offset.
@@ -660,11 +598,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FIOffset;
   if (AfterFPPop) {
     // Tail call jmp happens after FP is popped.
-    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
     const MachineFrameInfo *MFI = MF.getFrameInfo();
-    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
+    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
   } else
-    FIOffset = getFrameIndexOffset(MF, FrameIndex);
+    FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
 
   if (MI.getOperand(i+3).isImm()) {
     // Offset is a 32-bit integer.
@@ -677,710 +614,14 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 }
 
-void
-X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                      RegScavenger *RS) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
-  if (TailCallReturnAddrDelta < 0) {
-    // create RETURNADDR area
-    //   arg
-    //   arg
-    //   RETADDR
-    //   { ...
-    //     RETADDR area
-    //     ...
-    //   }
-    //   [EBP]
-    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
-                           (-1U*SlotSize)+TailCallReturnAddrDelta, true);
-  }
-
-  if (hasFP(MF)) {
-    assert((TailCallReturnAddrDelta <= 0) &&
-           "The Delta should always be zero or negative");
-    const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-
-    // Create a frame entry for the EBP register that must be saved.
-    int FrameIdx = MFI->CreateFixedObject(SlotSize,
-                                          -(int)SlotSize +
-                                          TFI.getOffsetOfLocalArea() +
-                                          TailCallReturnAddrDelta,
-                                          true);
-    assert(FrameIdx == MFI->getObjectIndexBegin() &&
-           "Slot for EBP register must be last in order to be found!");
-    FrameIdx = 0;
-  }
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                  unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
-                  const TargetInstrInfo &TII) {
-  bool isSub = NumBytes < 0;
-  uint64_t Offset = isSub ? -NumBytes : NumBytes;
-  unsigned Opc = isSub ?
-    getSUBriOpcode(Is64Bit, Offset) :
-    getADDriOpcode(Is64Bit, Offset);
-  uint64_t Chunk = (1LL << 31) - 1;
-  DebugLoc DL = MBB.findDebugLoc(MBBI);
-
-  while (Offset) {
-    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
-    MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-        .addReg(StackPtr)
-        .addImm(ThisVal);
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-    Offset -= ThisVal;
-  }
-}
-
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
-  if (MBBI == MBB.begin()) return;
-
-  MachineBasicBlock::iterator PI = prior(MBBI);
-  unsigned Opc = PI->getOpcode();
-  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
-      PI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes += PI->getOperand(2).getImm();
-    MBB.erase(PI);
-  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
-              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
-             PI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes -= PI->getOperand(2).getImm();
-    MBB.erase(PI);
-  }
-}
-
-/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
-static
-void mergeSPUpdatesDown(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator &MBBI,
-                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
-  // FIXME: THIS ISN'T RUN!!!
-  return;
-
-  if (MBBI == MBB.end()) return;
-
-  MachineBasicBlock::iterator NI = llvm::next(MBBI);
-  if (NI == MBB.end()) return;
-
-  unsigned Opc = NI->getOpcode();
-  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
-      NI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes -= NI->getOperand(2).getImm();
-    MBB.erase(NI);
-    MBBI = NI;
-  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
-              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
-             NI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes += NI->getOperand(2).getImm();
-    MBB.erase(NI);
-    MBBI = NI;
-  }
-}
-
-/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
-/// SUB.
-static int mergeSPUpdates(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator &MBBI,
-                           unsigned StackPtr,
-                           bool doMergeWithPrevious) {
-  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
-      (!doMergeWithPrevious && MBBI == MBB.end()))
-    return 0;
-
-  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
-  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
-  unsigned Opc = PI->getOpcode();
-  int Offset = 0;
-
-  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
-      PI->getOperand(0).getReg() == StackPtr){
-    Offset += PI->getOperand(2).getImm();
-    MBB.erase(PI);
-    if (!doMergeWithPrevious) MBBI = NI;
-  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
-              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
-             PI->getOperand(0).getReg() == StackPtr) {
-    Offset -= PI->getOperand(2).getImm();
-    MBB.erase(PI);
-    if (!doMergeWithPrevious) MBBI = NI;
-  }
-
-  return Offset;
-}
-
-void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
-                                                MCSymbol *Label,
-                                                unsigned FramePtr) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-
-  // Add callee saved registers to move list.
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  if (CSI.empty()) return;
-
-  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-  const TargetData *TD = MF.getTarget().getTargetData();
-  bool HasFP = hasFP(MF);
-
-  // Calculate amount of bytes used for return address storing.
-  int stackGrowth =
-    (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
-     TargetFrameInfo::StackGrowsUp ?
-     TD->getPointerSize() : -TD->getPointerSize());
-
-  // FIXME: This is dirty hack. The code itself is pretty mess right now.
-  // It should be rewritten from scratch and generalized sometimes.
-
-  // Determine maximum offset (minumum due to stack growth).
-  int64_t MaxOffset = 0;
-  for (std::vector<CalleeSavedInfo>::const_iterator
-         I = CSI.begin(), E = CSI.end(); I != E; ++I)
-    MaxOffset = std::min(MaxOffset,
-                         MFI->getObjectOffset(I->getFrameIdx()));
-
-  // Calculate offsets.
-  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
-  for (std::vector<CalleeSavedInfo>::const_iterator
-         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
-    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
-    unsigned Reg = I->getReg();
-    Offset = MaxOffset - Offset + saveAreaOffset;
-
-    // Don't output a new machine move if we're re-saving the frame
-    // pointer. This happens when the PrologEpilogInserter has inserted an extra
-    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
-    // generates one when frame pointers are used. If we generate a "machine
-    // move" for this extra "PUSH", the linker will lose track of the fact that
-    // the frame pointer should have the value of the first "PUSH" when it's
-    // trying to unwind.
-    // 
-    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
-    //        another bug. I.e., one where we generate a prolog like this:
-    //
-    //          pushl  %ebp
-    //          movl   %esp, %ebp
-    //          pushl  %ebp
-    //          pushl  %esi
-    //           ...
-    //
-    //        The immediate re-push of EBP is unnecessary. At the least, it's an
-    //        optimization bug. EBP can be used as a scratch register in certain
-    //        cases, but probably not when we have a frame pointer.
-    if (HasFP && FramePtr == Reg)
-      continue;
-
-    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-    MachineLocation CSSrc(Reg);
-    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
-  }
-}
-
-/// emitPrologue - Push callee-saved registers onto the stack, which
-/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
-/// space for local variables. Also emit labels used by the exception handler to
-/// generate the exception handling frames.
-void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const Function *Fn = MF.getFunction();
-  const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
-  MachineModuleInfo &MMI = MF.getMMI();
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  bool needsFrameMoves = MMI.hasDebugInfo() ||
-                          !Fn->doesNotThrow() || UnwindTablesMandatory;
-  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
-  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
-  bool HasFP = hasFP(MF);
-  DebugLoc DL;
-
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum SlotSize.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else if (MaxAlign < SlotSize)
-      MaxAlign = SlotSize;
-  }
-
-  // Add RETADDR move area to callee saved frame size.
-  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-  if (TailCallReturnAddrDelta < 0)
-    X86FI->setCalleeSavedFrameSize(
-      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
-
-  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
-  // function, and use up to 128 bytes of stack space, don't have a frame
-  // pointer, calls, or dynamic alloca then we do not need to adjust the
-  // stack pointer (we fit in the Red Zone).
-  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
-      !needsStackRealignment(MF) &&
-      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->adjustsStack() &&                      // No calls.
-      !Subtarget->isTargetWin64()) {               // Win64 has no Red Zone
-    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
-    if (HasFP) MinSize += SlotSize;
-    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
-    MFI->setStackSize(StackSize);
-  } else if (Subtarget->isTargetWin64()) {
-    // We need to always allocate 32 bytes as register spill area.
-    // FIXME: We might reuse these 32 bytes for leaf functions.
-    StackSize += 32;
-    MFI->setStackSize(StackSize);
-  }
-
-  // Insert stack pointer adjustment for later moving of return addr.  Only
-  // applies to tail call optimized functions where the callee argument stack
-  // size is bigger than the callers.
-  if (TailCallReturnAddrDelta < 0) {
-    MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL,
-              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
-              StackPtr)
-        .addReg(StackPtr)
-        .addImm(-TailCallReturnAddrDelta);
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
-  }
-
-  // Mapping for machine moves:
-  //
-  //   DST: VirtualFP AND
-  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
-  //        ELSE                        => DW_CFA_def_cfa
-  //
-  //   SRC: VirtualFP AND
-  //        DST: Register               => DW_CFA_def_cfa_register
-  //
-  //   ELSE
-  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
-  //        REG < 64                    => DW_CFA_offset + Reg
-  //        ELSE                        => DW_CFA_offset_extended
-
-  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-  const TargetData *TD = MF.getTarget().getTargetData();
-  uint64_t NumBytes = 0;
-  int stackGrowth = -TD->getPointerSize();
-
-  if (HasFP) {
-    // Calculate required stack adjustment.
-    uint64_t FrameSize = StackSize - SlotSize;
-    if (needsStackRealignment(MF))
-      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-
-    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
-
-    // Get the offset of the stack slot for the EBP register, which is
-    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
-    // Update the frame offset adjustment.
-    MFI->setOffsetAdjustment(-NumBytes);
-
-    // Save EBP/RBP into the appropriate stack slot.
-    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
-      .addReg(FramePtr, RegState::Kill);
-
-    if (needsFrameMoves) {
-      // Mark the place where EBP/RBP was saved.
-      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
-
-      // Define the current CFA rule to use the provided offset.
-      if (StackSize) {
-        MachineLocation SPDst(MachineLocation::VirtualFP);
-        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
-        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-      } else {
-        // FIXME: Verify & implement for FP
-        MachineLocation SPDst(StackPtr);
-        MachineLocation SPSrc(StackPtr, stackGrowth);
-        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-      }
-
-      // Change the rule for the FramePtr to be an "offset" rule.
-      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
-      MachineLocation FPSrc(FramePtr);
-      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
-    }
-
-    // Update EBP with the new base value...
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
-        .addReg(StackPtr);
-
-    if (needsFrameMoves) {
-      // Mark effective beginning of when frame pointer becomes valid.
-      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
-
-      // Define the current CFA to use the EBP/RBP register.
-      MachineLocation FPDst(FramePtr);
-      MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
-    }
-
-    // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
-         I != E; ++I)
-      I->addLiveIn(FramePtr);
-
-    // Realign stack
-    if (needsStackRealignment(MF)) {
-      MachineInstr *MI =
-        BuildMI(MBB, MBBI, DL,
-                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
-                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
-
-      // The EFLAGS implicit def is dead.
-      MI->getOperand(3).setIsDead();
-    }
-  } else {
-    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
-  }
-
-  // Skip the callee-saved push instructions.
-  bool PushedRegs = false;
-  int StackOffset = 2 * stackGrowth;
-
-  while (MBBI != MBB.end() &&
-         (MBBI->getOpcode() == X86::PUSH32r ||
-          MBBI->getOpcode() == X86::PUSH64r)) {
-    PushedRegs = true;
-    ++MBBI;
-
-    if (!HasFP && needsFrameMoves) {
-      // Mark callee-saved push instruction.
-      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
-
-      // Define the current CFA rule to use the provided offset.
-      unsigned Ptr = StackSize ?
-        MachineLocation::VirtualFP : StackPtr;
-      MachineLocation SPDst(Ptr);
-      MachineLocation SPSrc(Ptr, StackOffset);
-      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
-      StackOffset += stackGrowth;
-    }
-  }
-
-  DL = MBB.findDebugLoc(MBBI);
-
-  // Adjust stack pointer: ESP -= numbytes.
-
-  // Windows and cygwin/mingw require a prologue helper routine when allocating
-  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
-  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe
-  // the stack and adjust the stack pointer in one go.  The 64-bit version
-  // of __chkstk is only responsible for probing the stack.  The 64-bit
-  // prologue is responsible for adjusting the stack pointer.  Touching the
-  // stack at 4K increments is necessary to ensure that the guard pages used
-  // by the OS virtual memory manager are allocated in correct sequence.
-  if (NumBytes >= 4096 &&
-     (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
-    // Check, whether EAX is livein for this function.
-    bool isEAXAlive = false;
-    for (MachineRegisterInfo::livein_iterator
-           II = MF.getRegInfo().livein_begin(),
-           EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
-      unsigned Reg = II->first;
-      isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
-                    Reg == X86::AH || Reg == X86::AL);
-    }
-
-
-    const char *StackProbeSymbol =
-      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
-    if (!isEAXAlive) {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-        .addImm(NumBytes);
-      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-        .addExternalSymbol(StackProbeSymbol)
-        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
-        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-    } else {
-      // Save EAX
-      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
-        .addReg(X86::EAX, RegState::Kill);
-
-      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
-      // allocated bytes for EAX.
-      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-        .addImm(NumBytes - 4);
-      BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-        .addExternalSymbol(StackProbeSymbol)
-        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
-        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
-      // Restore EAX
-      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
-                                              X86::EAX),
-                                      StackPtr, false, NumBytes - 4);
-      MBB.insert(MBBI, MI);
-    }
-  } else if (NumBytes) {
-    // If there is an SUB32ri of ESP immediately before this instruction, merge
-    // the two. This can be the case when tail call elimination is enabled and
-    // the callee has more arguments then the caller.
-    NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
-    // If there is an ADD32ri or SUB32ri of ESP immediately after this
-    // instruction, merge the two instructions.
-    mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
-    if (NumBytes)
-      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
-  }
-
-  if ((NumBytes || PushedRegs) && needsFrameMoves) {
-    // Mark end of stack pointer adjustment.
-    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
-
-    if (!HasFP && NumBytes) {
-      // Define the current CFA rule to use the provided offset.
-      if (StackSize) {
-        MachineLocation SPDst(MachineLocation::VirtualFP);
-        MachineLocation SPSrc(MachineLocation::VirtualFP,
-                              -StackSize + stackGrowth);
-        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
-      } else {
-        // FIXME: Verify & implement for FP
-        MachineLocation SPDst(StackPtr);
-        MachineLocation SPSrc(StackPtr, stackGrowth);
-        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
-      }
-    }
-
-    // Emit DWARF info specifying the offsets of the callee-saved registers.
-    if (PushedRegs)
-      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
-  }
-}
-
-void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
-                                   MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc DL = MBBI->getDebugLoc();
-
-  switch (RetOpcode) {
-  default:
-    llvm_unreachable("Can only insert epilog into returning blocks");
-  case X86::RET:
-  case X86::RETI:
-  case X86::TCRETURNdi:
-  case X86::TCRETURNri:
-  case X86::TCRETURNmi:
-  case X86::TCRETURNdi64:
-  case X86::TCRETURNri64:
-  case X86::TCRETURNmi64:
-  case X86::EH_RETURN:
-  case X86::EH_RETURN64:
-    break;  // These are ok
-  }
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  uint64_t StackSize = MFI->getStackSize();
-  uint64_t MaxAlign  = MFI->getMaxAlignment();
-  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
-  uint64_t NumBytes = 0;
-
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else
-      MaxAlign = MaxAlign ? MaxAlign : 4;
-  }
-
-  if (hasFP(MF)) {
-    // Calculate required stack adjustment.
-    uint64_t FrameSize = StackSize - SlotSize;
-    if (needsStackRealignment(MF))
-      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
-
-    NumBytes = FrameSize - CSSize;
-
-    // Pop EBP.
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
-  } else {
-    NumBytes = StackSize - CSSize;
-  }
-
-  // Skip the callee-saved pop instructions.
-  MachineBasicBlock::iterator LastCSPop = MBBI;
-  while (MBBI != MBB.begin()) {
-    MachineBasicBlock::iterator PI = prior(MBBI);
-    unsigned Opc = PI->getOpcode();
-
-    if (Opc != X86::POP32r && Opc != X86::POP64r &&
-        !PI->getDesc().isTerminator())
-      break;
-
-    --MBBI;
-  }
-
-  DL = MBBI->getDebugLoc();
-
-  // If there is an ADD32ri or SUB32ri of ESP immediately before this
-  // instruction, merge the two instructions.
-  if (NumBytes || MFI->hasVarSizedObjects())
-    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
-
-  // If dynamic alloca is used, then reset esp to point to the last callee-saved
-  // slot before popping them off! Same applies for the case, when stack was
-  // realigned.
-  if (needsStackRealignment(MF)) {
-    // We cannot use LEA here, because stack pointer was realigned. We need to
-    // deallocate local frame back.
-    if (CSSize) {
-      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
-      MBBI = prior(LastCSPop);
-    }
-
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
-            StackPtr).addReg(FramePtr);
-  } else if (MFI->hasVarSizedObjects()) {
-    if (CSSize) {
-      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
-      MachineInstr *MI =
-        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
-                     FramePtr, false, -CSSize);
-      MBB.insert(MBBI, MI);
-    } else {
-      BuildMI(MBB, MBBI, DL,
-              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
-        .addReg(FramePtr);
-    }
-  } else if (NumBytes) {
-    // Adjust stack pointer back: ESP += numbytes.
-    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
-  }
-
-  // We're returning from function via eh_return.
-  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
-    MBBI = prior(MBB.end());
-    MachineOperand &DestAddr  = MBBI->getOperand(0);
-    assert(DestAddr.isReg() && "Offset should be in register!");
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
-            StackPtr).addReg(DestAddr.getReg());
-  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
-             RetOpcode == X86::TCRETURNmi ||
-             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
-             RetOpcode == X86::TCRETURNmi64) {
-    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
-    // Tail call return: adjust the stack pointer and jump to callee.
-    MBBI = prior(MBB.end());
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
-    assert(StackAdjust.isImm() && "Expecting immediate value.");
-
-    // Adjust stack pointer.
-    int StackAdj = StackAdjust.getImm();
-    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
-    int Offset = 0;
-    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
-
-    // Incoporate the retaddr area.
-    Offset = StackAdj-MaxTCDelta;
-    assert(Offset >= 0 && "Offset should never be negative");
-
-    if (Offset) {
-      // Check for possible merge with preceeding ADD instruction.
-      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
-    }
-
-    // Jump to label or value in register.
-    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
-      BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
-                                     ? X86::TAILJMPd : X86::TAILJMPd64)).
-        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                         JumpTarget.getTargetFlags());
-    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
-                                       ? X86::TAILJMPm : X86::TAILJMPm64));
-      for (unsigned i = 0; i != 5; ++i)
-        MIB.addOperand(MBBI->getOperand(i));
-    } else if (RetOpcode == X86::TCRETURNri64) {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    } else {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    }
-
-    MachineInstr *NewMI = prior(MBBI);
-    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
-      NewMI->addOperand(MBBI->getOperand(i));
-
-    // Delete the pseudo instruction TCRETURN.
-    MBB.erase(MBBI);
-  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
-             (X86FI->getTCReturnAddrDelta() < 0)) {
-    // Add the return addr area delta back since we are not tail calling.
-    int delta = -1*X86FI->getTCReturnAddrDelta();
-    MBBI = prior(MBB.end());
-
-    // Check for possible merge with preceeding ADD instruction.
-    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
-  }
-}
-
 unsigned X86RegisterInfo::getRARegister() const {
   return Is64Bit ? X86::RIP     // Should have dwarf #16.
                  : X86::EIP;    // Should have dwarf #8.
 }
 
 unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return hasFP(MF) ? FramePtr : StackPtr;
-}
-
-void
-X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
-  // Calculate amount of bytes used for return address storing
-  int stackGrowth = (Is64Bit ? -8 : -4);
-
-  // Initial state of the frame pointer is esp+stackGrowth.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(StackPtr, stackGrowth);
-  Moves.push_back(MachineMove(0, Dst, Src));
-
-  // Add return address to move list
-  MachineLocation CSDst(StackPtr, stackGrowth);
-  MachineLocation CSSrc(getRARegister());
-  Moves.push_back(MachineMove(0, CSDst, CSSrc));
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  return TFI->hasFP(MF) ? FramePtr : StackPtr;
 }
 
 unsigned X86RegisterInfo::getEHExceptionRegister() const {
@@ -1579,13 +820,13 @@ namespace {
       // Be over-conservative: scan over all vreg defs and find whether vector
       // registers are used. If yes, there is a possibility that vector register
       // will be spilled and thus require dynamic stack realignment.
-      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
-           RegNum < RI.getLastVirtReg(); ++RegNum)
-        if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
+      for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+        unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+        if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
           FuncInfo->setReserveFP(true);
           return true;
         }
-
+      }
       // Nothing to do
       return false;
     }
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 527df05c58fc..064be64f4916 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -111,14 +111,10 @@ public:
   /// register scavenger to determine what registers are free.
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   bool canRealignStack(const MachineFunction &MF) const;
 
   bool needsStackRealignment(const MachineFunction &MF) const;
 
-  bool hasReservedCallFrame(const MachineFunction &MF) const;
-
   bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
                             int &FrameIdx) const;
 
@@ -129,19 +125,12 @@ public:
   void eliminateFrameIndex(MachineBasicBlock::iterator MI,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS = NULL) const;
-
-  void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
-                                 unsigned FramePtr) const;
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  unsigned getStackRegister() const { return StackPtr; }
+  // FIXME: Move to FrameInfok
+  unsigned getSlotSize() const { return SlotSize; }
 
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 95269b15760e..612fac2f3be5 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -1,10 +1,10 @@
 //===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 Register file, defining the registers themselves,
@@ -34,8 +34,8 @@ let Namespace = "X86" in {
   // because the register file generator is smart enough to figure out that
   // AL aliases AX if we tell it that AX aliased AL (for example).
 
-  // Dwarf numbering is different for 32-bit and 64-bit, and there are 
-  // variations by target as well. Currently the first entry is for X86-64, 
+  // Dwarf numbering is different for 32-bit and 64-bit, and there are
+  // variations by target as well. Currently the first entry is for X86-64,
   // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
   // and debug information on X86-32/Darwin)
 
@@ -81,7 +81,7 @@ let Namespace = "X86" in {
   def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
   }
   def IP : Register<"ip">, DwarfRegNum<[16]>;
-  
+
   // X86-64 only
   let SubRegIndices = [sub_8bit] in {
   def R8W  : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
@@ -103,8 +103,8 @@ let Namespace = "X86" in {
   def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
   def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
   def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
-  def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;  
-  
+  def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
   // X86-64 only
   def R8D  : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
   def R9D  : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
@@ -208,7 +208,7 @@ let Namespace = "X86" in {
   def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
   def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
   def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
-  def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; 
+  def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
 
   // Status flags register
   def EFLAGS : Register<"flags">;
@@ -220,7 +220,7 @@ let Namespace = "X86" in {
   def ES : Register<"es">;
   def FS : Register<"fs">;
   def GS : Register<"gs">;
-  
+
   // Debug registers
   def DR0 : Register<"dr0">;
   def DR1 : Register<"dr1">;
@@ -230,8 +230,8 @@ let Namespace = "X86" in {
   def DR5 : Register<"dr5">;
   def DR6 : Register<"dr6">;
   def DR7 : Register<"dr7">;
-  
-  // Condition registers
+
+  // Control registers
   def CR0 : Register<"cr0">;
   def CR1 : Register<"cr1">;
   def CR2 : Register<"cr2">;
@@ -241,6 +241,13 @@ let Namespace = "X86" in {
   def CR6 : Register<"cr6">;
   def CR7 : Register<"cr7">;
   def CR8 : Register<"cr8">;
+  def CR9 : Register<"cr9">;
+  def CR10 : Register<"cr10">;
+  def CR11 : Register<"cr11">;
+  def CR12 : Register<"cr12">;
+  def CR13 : Register<"cr13">;
+  def CR14 : Register<"cr14">;
+  def CR15 : Register<"cr15">;
 
   // Pseudo index registers
   def EIZ : Register<"eiz">;
@@ -254,10 +261,10 @@ let Namespace = "X86" in {
 // implicitly defined to be the register allocation order.
 //
 
-// List call-clobbered registers before callee-save registers. RBX, RBP, (and 
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
 // R12, R13, R14, and R15 for X86-64) are callee-save registers.
 // In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
-// R8B, ... R15B. 
+// R8B, ... R15B.
 // Allocate R12 and R13 last, as these require an extra byte when
 // encoded in x86_64 instructions.
 // FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
@@ -292,14 +299,14 @@ def GR8 : RegisterClass<"X86", [i8],  8,
     GR8Class::iterator
     GR8Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
       if (!Subtarget.is64Bit())
         // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
         return begin() + 8;
-      else if (RI->hasFP(MF) || MFI->getReserveFP())
+      else if (TFI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate SPL or BPL.
         return array_endof(X86_GR8_AO_64) - 1;
       else
@@ -337,12 +344,12 @@ def GR16 : RegisterClass<"X86", [i16], 16,
     GR16Class::iterator
     GR16Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF) || MFI->getReserveFP())
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate SP or BP.
           return array_endof(X86_GR16_AO_64) - 1;
         else
@@ -350,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
           return array_endof(X86_GR16_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF) || MFI->getReserveFP())
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate SP or BP.
           return begin() + 6;
         else
@@ -389,12 +396,12 @@ def GR32 : RegisterClass<"X86", [i32], 32,
     GR32Class::iterator
     GR32Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF) || MFI->getReserveFP())
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate ESP or EBP.
           return array_endof(X86_GR32_AO_64) - 1;
         else
@@ -402,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
           return array_endof(X86_GR32_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF) || MFI->getReserveFP())
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate ESP or EBP.
           return begin() + 6;
         else
@@ -429,13 +436,13 @@ def GR64 : RegisterClass<"X86", [i64], 64,
     GR64Class::iterator
     GR64Class::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF) || MFI->getReserveFP())
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
         return end()-3;  // If so, don't allocate RIP, RSP or RBP
       else
         return end()-2;  // If not, just don't allocate RIP or RSP
@@ -446,18 +453,16 @@ def GR64 : RegisterClass<"X86", [i64], 64,
 // Segment registers for use by MOV instructions (and others) that have a
 //   segment register as one operand.  Always contain a 16-bit segment
 //   descriptor.
-def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
-}
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
 
 // Debug registers.
 def DEBUG_REG : RegisterClass<"X86", [i32], 32,
-                              [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> {
-}
+                              [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
 
 // Control registers.
 def CONTROL_REG : RegisterClass<"X86", [i64], 64,
-                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
-}
+                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
+                                 CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
 
 // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
 // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -465,10 +470,8 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64,
 // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
 // and GR64_ABCD are classes for registers that support 8-bit h-register
 // operations.
-def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
-}
-def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
-}
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
 def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
 }
@@ -493,6 +496,9 @@ def GR64_TC   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
                        (GR32_TC sub_32bit)];
 }
 
+def GR64_TCW64   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
+                                                    R8, R9, R11]>;
+
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
                               [AL, CL, DL, AH, CH, DH, BL, BH]> {
@@ -538,10 +544,10 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
     GR16_NOREXClass::iterator
     GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (RI->hasFP(MF) || MFI->getReserveFP())
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate SP or BP.
         return end() - 2;
       else
@@ -562,10 +568,10 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
     GR32_NOREXClass::iterator
     GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (RI->hasFP(MF) || MFI->getReserveFP())
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate ESP or EBP.
         return end() - 2;
       else
@@ -587,10 +593,10 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
     GR64_NOREXClass::iterator
     GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF) || MFI->getReserveFP())
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate RIP, RSP or RBP.
         return end() - 3;
       else
@@ -629,12 +635,12 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
     GR32_NOSPClass::iterator
     GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (Subtarget.is64Bit()) {
         // Does the function dedicate RBP to being a frame ptr?
-        if (RI->hasFP(MF) || MFI->getReserveFP())
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate EBP.
           return array_endof(X86_GR32_NOSP_AO_64) - 1;
         else
@@ -642,7 +648,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
           return array_endof(X86_GR32_NOSP_AO_64);
       } else {
         // Does the function dedicate EBP to being a frame ptr?
-        if (RI->hasFP(MF) || MFI->getReserveFP())
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
           // If so, don't allocate EBP.
           return begin() + 6;
         else
@@ -667,13 +673,13 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
     GR64_NOSPClass::iterator
     GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF) || MFI->getReserveFP())
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
         return end()-1;  // If so, don't allocate RBP
       else
         return end();  // If not, any reg in this class is ok.
@@ -695,10 +701,10 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
     GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
   {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
       const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
       // Does the function dedicate RBP to being a frame ptr?
-      if (RI->hasFP(MF) || MFI->getReserveFP())
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
         // If so, don't allocate RBP.
         return end() - 1;
       else
@@ -784,7 +790,7 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32,
 }
 
 // Generic vector registers: VR64 and VR128.
-def VR64  : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+def VR64: RegisterClass<"X86", [x86mmx], 64,
                           [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
 def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
                           [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 6297a276a111..42e819343b5b 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -32,10 +32,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                              SDValue Dst, SDValue Src,
                                              SDValue Size, unsigned Align,
                                              bool isVolatile,
-                                             const Value *DstSV,
-                                             uint64_t DstSVOff) const {
+                                         MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+  
   // If not DWORD aligned or size is more than the threshold, call the library.
   // The libc version is likely to be faster for these cases. It can use the
   // address value and run time information about the CPU.
@@ -133,7 +136,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                             Dst, InFlag);
   InFlag = Chain.getValue(1);
 
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
   Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
 
@@ -147,7 +150,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                                              X86::ECX,
                               Left, InFlag);
     InFlag = Chain.getValue(1);
-    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+    Tys = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
     Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
   } else if (BytesLeft) {
@@ -161,7 +164,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                       DAG.getConstant(Offset, AddrVT)),
                           Src,
                           DAG.getConstant(BytesLeft, SizeVT),
-                          Align, isVolatile, DstSV, DstSVOff + Offset);
+                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
   }
 
   // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -173,10 +176,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                         SDValue Chain, SDValue Dst, SDValue Src,
                                         SDValue Size, unsigned Align,
                                         bool isVolatile, bool AlwaysInline,
-                                        const Value *DstSV,
-                                        uint64_t DstSVOff,
-                                        const Value *SrcSV,
-                                        uint64_t SrcSVOff) const {
+                                         MachinePointerInfo DstPtrInfo,
+                                         MachinePointerInfo SrcPtrInfo) const {
   // This requires the copy size to be a constant, preferrably
   // within a subtarget-specific limit.
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -186,14 +187,29 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
     return SDValue();
 
-  /// If not DWORD aligned, call the library.
-  if ((Align & 3) != 0)
+  /// If not DWORD aligned, it is more efficient to call the library.  However
+  /// if calling the library is not allowed (AlwaysInline), then soldier on as
+  /// the code generated here is better than the long load-store sequence we
+  /// would otherwise get.
+  if (!AlwaysInline && (Align & 3) != 0)
+    return SDValue();
+
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256 ||
+      SrcPtrInfo.getAddrSpace() >= 256)
     return SDValue();
 
-  // DWORD aligned
-  EVT AVT = MVT::i32;
-  if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
-    AVT = MVT::i64;
+  MVT AVT;
+  if (Align & 1)
+    AVT = MVT::i8;
+  else if (Align & 2)
+    AVT = MVT::i16;
+  else if (Align & 4)
+    // DWORD aligned
+    AVT = MVT::i32;
+  else
+    // QWORD aligned
+    AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
 
   unsigned UBytes = AVT.getSizeInBits() / 8;
   unsigned CountVal = SizeVal / UBytes;
@@ -214,7 +230,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                             Src, InFlag);
   InFlag = Chain.getValue(1);
 
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
   SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
                                 array_lengthof(Ops));
@@ -234,8 +250,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                                                 DAG.getConstant(Offset, SrcVT)),
                                     DAG.getConstant(BytesLeft, SizeVT),
                                     Align, isVolatile, AlwaysInline,
-                                    DstSV, DstSVOff + Offset,
-                                    SrcSV, SrcSVOff + Offset));
+                                    DstPtrInfo.getWithOffset(Offset),
+                                    SrcPtrInfo.getWithOffset(Offset)));
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 4f30f31eac8c..d1d66fe76e94 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -39,8 +39,7 @@ public:
                                   SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align,
                                   bool isVolatile,
-                                  const Value *DstSV,
-                                  uint64_t DstSVOff) const;
+                                  MachinePointerInfo DstPtrInfo) const;
 
   virtual
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
@@ -48,10 +47,8 @@ public:
                                   SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align,
                                   bool isVolatile, bool AlwaysInline,
-                                  const Value *DstSV,
-                                  uint64_t DstSVOff,
-                                  const Value *SrcSV,
-                                  uint64_t SrcSVOff) const;
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
 };
 
 }
diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h
deleted file mode 100644
index df040520bc8f..000000000000
--- a/lib/Target/X86/X86ShuffleDecode.h
+++ /dev/null
@@ -1,155 +0,0 @@
-//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Define several functions to decode x86 specific shuffle semantics into a
-// generic vector mask.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_SHUFFLE_DECODE_H
-#define X86_SHUFFLE_DECODE_H
-
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//  Vector Mask Decoding
-//===----------------------------------------------------------------------===//
-
-enum {
-  SM_SentinelZero = ~0U
-};
-
-static inline
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
-  // Defaults the copying the dest value.
-  ShuffleMask.push_back(0);
-  ShuffleMask.push_back(1);
-  ShuffleMask.push_back(2);
-  ShuffleMask.push_back(3);
-
-  // Decode the immediate.
-  unsigned ZMask = Imm & 15;
-  unsigned CountD = (Imm >> 4) & 3;
-  unsigned CountS = (Imm >> 6) & 3;
-
-  // CountS selects which input element to use.
-  unsigned InVal = 4+CountS;
-  // CountD specifies which element of destination to update.
-  ShuffleMask[CountD] = InVal;
-  // ZMask zaps values, potentially overriding the CountD elt.
-  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
-  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
-  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
-  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
-}
-
-// <3,1> or <6,7,2,3>
-static void DecodeMOVHLPSMask(unsigned NElts,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = NElts/2; i != NElts; ++i)
-    ShuffleMask.push_back(NElts+i);
-
-  for (unsigned i = NElts/2; i != NElts; ++i)
-    ShuffleMask.push_back(i);
-}
-
-// <0,2> or <0,1,4,5>
-static void DecodeMOVLHPSMask(unsigned NElts,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i)
-    ShuffleMask.push_back(i);
-
-  for (unsigned i = 0; i != NElts/2; ++i)
-    ShuffleMask.push_back(NElts+i);
-}
-
-static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
-                            SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts; ++i) {
-    ShuffleMask.push_back(Imm % NElts);
-    Imm /= NElts;
-  }
-}
-
-static void DecodePSHUFHWMask(unsigned Imm,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  ShuffleMask.push_back(0);
-  ShuffleMask.push_back(1);
-  ShuffleMask.push_back(2);
-  ShuffleMask.push_back(3);
-  for (unsigned i = 0; i != 4; ++i) {
-    ShuffleMask.push_back(4+(Imm & 3));
-    Imm >>= 2;
-  }
-}
-
-static void DecodePSHUFLWMask(unsigned Imm,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != 4; ++i) {
-    ShuffleMask.push_back((Imm & 3));
-    Imm >>= 2;
-  }
-  ShuffleMask.push_back(4);
-  ShuffleMask.push_back(5);
-  ShuffleMask.push_back(6);
-  ShuffleMask.push_back(7);
-}
-
-static void DecodePUNPCKLMask(unsigned NElts,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i);
-    ShuffleMask.push_back(i+NElts);
-  }
-}
-
-static void DecodePUNPCKHMask(unsigned NElts,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i+NElts/2);
-    ShuffleMask.push_back(i+NElts+NElts/2);
-  }
-}
-
-static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
-                             SmallVectorImpl<unsigned> &ShuffleMask) {
-  // Part that reads from dest.
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(Imm % NElts);
-    Imm /= NElts;
-  }
-  // Part that reads from src.
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(Imm % NElts + NElts);
-    Imm /= NElts;
-  }
-}
-
-static void DecodeUNPCKHPMask(unsigned NElts,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
-    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
-  }
-}
-
-
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc.  NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-static void DecodeUNPCKLPMask(unsigned NElts,
-                              SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i);        // Reads from dest
-    ShuffleMask.push_back(i+NElts);  // Reads from src
-  }
-}
-
-#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0d02e5ee472b..de768561f111 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -1,4 +1,4 @@
-//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,7 +18,7 @@
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallVector.h"
@@ -256,13 +256,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
   if ((ECX >> 9)  & 1) X86SSELevel = SSSE3;
   if ((ECX >> 19) & 1) X86SSELevel = SSE41;
   if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+  // FIXME: AVX codegen support is not ready.
+  //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
 
   HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
   HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);
-  HasAVX   = ((ECX >> 28) & 0x1);
   HasAES   = IsIntel && ((ECX >> 25) & 0x1);
 
   if (IsIntel || IsAMD) {
@@ -289,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   , X863DNowLevel(NoThreeDNow)
   , HasCMov(false)
   , HasX86_64(false)
+  , HasPOPCNT(false)
   , HasSSE4A(false)
   , HasAVX(false)
   , HasAES(false)
@@ -315,11 +317,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
     ParseSubtargetFeatures(FS, CPU);
     // All X86-64 CPUs also have SSE2, however user might request no SSE via 
     // -mattr, so don't force SSELevel here.
+    if (HasAVX)
+      X86SSELevel = NoMMXSSE;
   } else {
     // Otherwise, use CPUID to auto-detect feature set.
     AutoDetectSubtargetFeatures();
     // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
-    if (Is64Bit && X86SSELevel < SSE2)
+    if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
       X86SSELevel = SSE2;
   }
 
@@ -338,9 +342,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   assert((!Is64Bit || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
-  // bit targets.
-  if (isTargetDarwin() || Is64Bit)
+  // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and 
+  // for all 64-bit targets.
+  if (isTargetDarwin() || isTargetLinux() || Is64Bit)
     stackAlignment = 16;
 
   if (StackAlignment)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0ee91abe21f4..8a119b43cd91 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -65,6 +65,9 @@ protected:
   ///
   bool HasX86_64;
 
+  /// HasPOPCNT - True if the processor supports POPCNT.
+  bool HasPOPCNT;
+
   /// HasSSE4A - True if the processor supports SSE4A instructions.
   bool HasSSE4A;
 
@@ -100,7 +103,7 @@ protected:
   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
   ///
   unsigned MaxInlineSizeThreshold;
-  
+
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
@@ -150,7 +153,10 @@ public:
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+  bool hasPOPCNT() const { return HasPOPCNT; }
   bool hasAVX() const { return HasAVX; }
+  bool hasXMM() const { return hasSSE1() || hasAVX(); }
+  bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
   bool hasAES() const { return HasAES; }
   bool hasCLMUL() const { return HasCLMUL; }
   bool hasFMA3() const { return HasFMA3; }
@@ -160,23 +166,21 @@ public:
   bool hasVectorUAMem() const { return HasVectorUAMem; }
 
   bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
-  
+
   // ELF is a reasonably sane default and the only other X86 targets we
   // support are Darwin and Windows. Just use "not those".
-  bool isTargetELF() const { 
+  bool isTargetELF() const {
     return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
   }
   bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
 
   bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
-  bool isTargetMingw() const { 
-    return TargetTriple.getOS() == Triple::MinGW32 ||
-           TargetTriple.getOS() == Triple::MinGW64; }
+  bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
   bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
   bool isTargetCygMing() const {
     return isTargetMingw() || isTargetCygwin();
   }
-  
+
   /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
   bool isTargetCOFF() const {
     return isTargetMingw() || isTargetCygwin() || isTargetWindows();
@@ -186,22 +190,12 @@ public:
     return Is64Bit && (isTargetMingw() || isTargetWindows());
   }
 
-  bool isTargetWin32() const {
-    return !Is64Bit && (isTargetMingw() || isTargetWindows());
+  bool isTargetEnvMacho() const {
+    return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
   }
 
-  std::string getDataLayout() const {
-    const char *p;
-    if (is64Bit())
-      p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
-    else if (isTargetDarwin())
-      p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
-    else if (isTargetMingw() || isTargetWindows())
-      p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
-    else
-      p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
-
-    return std::string(p);
+  bool isTargetWin32() const {
+    return !Is64Bit && (isTargetMingw() || isTargetWindows());
   }
 
   bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index ce8636eb72b5..889c824b0e11 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -30,10 +30,12 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   case Triple::Darwin:
     return new X86MCAsmInfoDarwin(TheTriple);
   case Triple::MinGW32:
-  case Triple::MinGW64:
   case Triple::Cygwin:
   case Triple::Win32:
-    return new X86MCAsmInfoCOFF(TheTriple);
+    if (TheTriple.getEnvironment() == Triple::MachO)
+      return new X86MCAsmInfoDarwin(TheTriple);
+    else
+      return new X86MCAsmInfoCOFF(TheTriple);
   default:
     return new X86ELFMCAsmInfo(TheTriple);
   }
@@ -43,22 +45,25 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
                                     raw_ostream &_OS,
                                     MCCodeEmitter *_Emitter,
-                                    bool RelaxAll) {
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
   Triple TheTriple(TT);
   switch (TheTriple.getOS()) {
   case Triple::Darwin:
     return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
   case Triple::MinGW32:
-  case Triple::MinGW64:
   case Triple::Cygwin:
   case Triple::Win32:
-    return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+    if (TheTriple.getEnvironment() == Triple::MachO)
+      return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+    else
+      return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
   default:
-    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
   }
 }
 
-extern "C" void LLVMInitializeX86Target() { 
+extern "C" void LLVMInitializeX86Target() {
   // Register the target.
   RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
   RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
@@ -89,28 +94,38 @@ extern "C" void LLVMInitializeX86Target() {
 
 X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, false) {
+  : X86TargetMachine(T, TT, FS, false),
+    DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+               "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+               (getSubtargetImpl()->isTargetCygMing() ||
+                getSubtargetImpl()->isTargetWindows()) ?
+               "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
 }
 
 
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, true) {
+  : X86TargetMachine(T, TT, FS, true),
+    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
 }
 
 /// X86TargetMachine ctor - Create an X86 target.
 ///
-X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, 
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
                                    const std::string &FS, bool is64Bit)
-  : LLVMTargetMachine(T, TT), 
+  : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, is64Bit),
-    DataLayout(Subtarget.getDataLayout()),
-    FrameInfo(TargetFrameInfo::StackGrowsDown,
-              Subtarget.getStackAlignment(),
-              (Subtarget.isTargetWin64() ? -40 :
-               (Subtarget.is64Bit() ? -8 : -4))),
-    InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
-    ELFWriterInfo(*this) {
+    FrameLowering(*this, Subtarget),
+    ELFWriterInfo(is64Bit, true) {
   DefRelocModel = getRelocationModel();
 
   // If no relocation model was picked, default as appropriate for the target.
@@ -217,12 +232,12 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   // On Darwin, do not override 64-bit setting made in X86TargetMachine().
-  if (DefRelocModel == Reloc::Default && 
+  if (DefRelocModel == Reloc::Default &&
       (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
     setRelocationModel(Reloc::Static);
     Subtarget.setPICStyle(PICStyles::None);
   }
-  
+
 
   PM.add(createX86JITCodeEmitterPass(*this, JCE));
 
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index f9fb424e2d22..597392251e6a 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -14,16 +14,17 @@
 #ifndef X86TARGETMACHINE_H
 #define X86TARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "X86.h"
 #include "X86ELFWriterInfo.h"
 #include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86Subtarget.h"
 #include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
 #include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
   
@@ -31,12 +32,7 @@ class formatted_raw_ostream;
 
 class X86TargetMachine : public LLVMTargetMachine {
   X86Subtarget      Subtarget;
-  const TargetData  DataLayout; // Calculates type size & alignment
-  TargetFrameInfo   FrameInfo;
-  X86InstrInfo      InstrInfo;
-  X86JITInfo        JITInfo;
-  X86TargetLowering TLInfo;
-  X86SelectionDAGInfo TSInfo;
+  X86FrameLowering  FrameLowering;
   X86ELFWriterInfo  ELFWriterInfo;
   Reloc::Model      DefRelocModel; // Reloc model before it's overridden.
 
@@ -49,20 +45,25 @@ public:
   X86TargetMachine(const Target &T, const std::string &TT, 
                    const std::string &FS, bool is64Bit);
 
-  virtual const X86InstrInfo     *getInstrInfo() const { return &InstrInfo; }
-  virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
-  virtual       X86JITInfo       *getJITInfo()         { return &JITInfo; }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    llvm_unreachable("getInstrInfo not implemented");
+  }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    llvm_unreachable("getJITInfo not implemented");
+  }
   virtual const X86Subtarget     *getSubtargetImpl() const{ return &Subtarget; }
-  virtual const X86TargetLowering *getTargetLowering() const { 
-    return &TLInfo;
+  virtual const X86TargetLowering *getTargetLowering() const {
+    llvm_unreachable("getTargetLowering not implemented");
   }
   virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
-    return &TSInfo;
+    llvm_unreachable("getSelectionDAGInfo not implemented");
   }
   virtual const X86RegisterInfo  *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
+    return &getInstrInfo()->getRegisterInfo();
   }
-  virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual const X86ELFWriterInfo *getELFWriterInfo() const {
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
   }
@@ -79,17 +80,53 @@ public:
 /// X86_32TargetMachine - X86 32-bit target machine.
 ///
 class X86_32TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
 public:
   X86_32TargetMachine(const Target &T, const std::string &M,
                       const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
 };
 
 /// X86_64TargetMachine - X86 64-bit target machine.
 ///
 class X86_64TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
 public:
   X86_64TargetMachine(const Target &T, const std::string &TT,
                       const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/XCore/AsmPrinter/CMakeLists.txt b/lib/Target/XCore/AsmPrinter/CMakeLists.txt
deleted file mode 100644
index 7c7c2f4ded04..000000000000
--- a/lib/Target/XCore/AsmPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMXCoreAsmPrinter
-  XCoreAsmPrinter.cpp
-  )
-add_dependencies(LLVMXCoreAsmPrinter XCoreCodeGenTable_gen)
diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile
deleted file mode 100644
index 581f736b7431..000000000000
--- a/lib/Target/XCore/AsmPrinter/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMXCoreAsmPrinter
-
-# Hack: we need to include 'main' XCore target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
deleted file mode 100644
index 8f06dd32662f..000000000000
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the XAS-format XCore assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "XCore.h"
-#include "XCoreInstrInfo.h"
-#include "XCoreSubtarget.h"
-#include "XCoreMCAsmInfo.h"
-#include "XCoreTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cctype>
-using namespace llvm;
-
-static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
-  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
-  cl::Hidden,
-  cl::value_desc("number"),
-  cl::init(8));
-
-namespace {
-  class XCoreAsmPrinter : public AsmPrinter {
-    const XCoreSubtarget &Subtarget;
-  public:
-    explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
-
-    virtual const char *getPassName() const {
-      return "XCore Assembly Printer";
-    }
-
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
-                       const std::string &directive = ".jmptable");
-    void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
-      printInlineJT(MI, opNum, O, ".jmptable32");
-    }
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-
-    void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
-    virtual void EmitGlobalVariable(const GlobalVariable *GV);
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
-    static const char *getRegisterName(unsigned RegNo);
-
-    void EmitFunctionEntryLabel();
-    void EmitInstruction(const MachineInstr *MI);
-    void EmitFunctionBodyEnd();
-  };
-} // end of anonymous namespace
-
-#include "XCoreGenAsmWriter.inc"
-
-void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
-  assert(((GV->hasExternalLinkage() ||
-    GV->hasWeakLinkage()) ||
-    GV->hasLinkOnceLinkage()) && "Unexpected linkage");
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(
-    cast<PointerType>(GV->getType())->getElementType())) {
-    OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
-    // FIXME: MCStreamerize.
-    OutStreamer.EmitRawText(StringRef(".globound"));
-    OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
-    OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
-    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
-      // TODO Use COMDAT groups for LinkOnceLinkage
-      OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
-                              ".globound");
-    }
-  }
-}
-
-void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
-  // Check to see if this is a special global used by LLVM, if so, emit it.
-  if (!GV->hasInitializer() ||
-      EmitSpecialLLVMGlobal(GV))
-    return;
-
-  const TargetData *TD = TM.getTargetData();
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
-
-  
-  MCSymbol *GVSym = Mang->getSymbol(GV);
-  Constant *C = GV->getInitializer();
-  unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
-  
-  // Mark the start of the global
-  OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
-                          GVSym->getName());
-
-  switch (GV->getLinkage()) {
-  case GlobalValue::AppendingLinkage:
-    report_fatal_error("AppendingLinkage is not supported by this target!");
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage:
-  case GlobalValue::WeakAnyLinkage:
-  case GlobalValue::WeakODRLinkage:
-  case GlobalValue::ExternalLinkage:
-    emitArrayBound(GVSym, GV);
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
-
-    // TODO Use COMDAT groups for LinkOnceLinkage
-    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
-    // FALL THROUGH
-  case GlobalValue::InternalLinkage:
-  case GlobalValue::PrivateLinkage:
-    break;
-  case GlobalValue::DLLImportLinkage:
-    llvm_unreachable("DLLImport linkage is not supported by this target!");
-  case GlobalValue::DLLExportLinkage:
-    llvm_unreachable("DLLExport linkage is not supported by this target!");
-  default:
-    llvm_unreachable("Unknown linkage type!");
-  }
-
-  EmitAlignment(Align > 2 ? Align : 2, GV);
-  
-  unsigned Size = TD->getTypeAllocSize(C->getType());
-  if (GV->isThreadLocal()) {
-    Size *= MaxThreads;
-  }
-  if (MAI->hasDotTypeDotSizeDirective()) {
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
-    OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
-                            Twine(Size));
-  }
-  OutStreamer.EmitLabel(GVSym);
-  
-  EmitGlobalConstant(C);
-  if (GV->isThreadLocal()) {
-    for (unsigned i = 1; i < MaxThreads; ++i)
-      EmitGlobalConstant(C);
-  }
-  // The ABI requires that unsigned scalar types smaller than 32 bits
-  // are padded to 32 bits.
-  if (Size < 4)
-    OutStreamer.EmitZeros(4 - Size, 0);
-  
-  // Mark the end of the global
-  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
-}
-
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
-void XCoreAsmPrinter::EmitFunctionBodyEnd() {
-  // Emit function end directives
-  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
-                          ".function");
-}
-
-void XCoreAsmPrinter::EmitFunctionEntryLabel() {
-  // Mark the start of the function
-  OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
-                          ".function," + CurrentFnSym->getName());
-  OutStreamer.EmitLabel(CurrentFnSym);
-}
-
-void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
-                                      raw_ostream &O) {
-  printOperand(MI, opNum, O);
-  
-  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
-    return;
-  
-  O << "+";
-  printOperand(MI, opNum+1, O);
-}
-
-void XCoreAsmPrinter::
-printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
-              const std::string &directive) {
-  unsigned JTI = MI->getOperand(opNum).getIndex();
-  const MachineFunction *MF = MI->getParent()->getParent();
-  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-  O << "\t" << directive << " ";
-  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = JTBBs[i];
-    if (i > 0)
-      O << ",";
-    O << *MBB->getSymbol();
-  }
-}
-
-void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    O << getRegisterName(MO.getReg());
-    break;
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    break;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    break;
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    break;
-  case MachineOperand::MO_ExternalSymbol:
-    O << MO.getSymbolName();
-    break;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    break;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    break;
-  case MachineOperand::MO_BlockAddress:
-    O << *GetBlockAddressSymbol(MO.getBlockAddress());
-    break;
-  default:
-    llvm_unreachable("not implemented");
-  }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,const char *ExtraCode,
-                                      raw_ostream &O) {
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  SmallString<128> Str;
-  raw_svector_ostream O(Str);
-
-  // Check for mov mnemonic
-  if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
-    O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
-      << getRegisterName(MI->getOperand(1).getReg());
-  else
-    printInstruction(MI, O);
-  OutStreamer.EmitRawText(O.str());
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeXCoreAsmPrinter() { 
-  RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
-}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 38b35d7666c0..9093de691582 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -11,7 +11,8 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv)
 tablegen(XCoreGenSubtarget.inc -gen-subtarget)
 
 add_llvm_target(XCoreCodeGen
-  XCoreFrameInfo.cpp
+  XCoreAsmPrinter.cpp
+  XCoreFrameLowering.cpp
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
@@ -22,3 +23,5 @@ add_llvm_target(XCoreCodeGen
   XCoreTargetObjectFile.cpp
   XCoreSelectionDAGInfo.cpp
   )
+
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index 1b709745041a..6c1ef886031b 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -18,7 +18,7 @@ BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
                 XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
 		XCoreGenSubtarget.inc
 
-DIRS = AsmPrinter TargetInfo
+DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index 0a568de1624b..c147b8a66bc3 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,4 +4,4 @@ add_llvm_library(LLVMXCoreInfo
   XCoreTargetInfo.cpp
   )
 
-add_dependencies(LLVMXCoreInfo XCoreTable_gen)
+add_dependencies(LLVMXCoreInfo XCoreCodeGenTable_gen)
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 000000000000..8f06dd32662f
--- /dev/null
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,280 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreMCAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+  cl::Hidden,
+  cl::value_desc("number"),
+  cl::init(8));
+
+namespace {
+  class XCoreAsmPrinter : public AsmPrinter {
+    const XCoreSubtarget &Subtarget;
+  public:
+    explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+
+    virtual const char *getPassName() const {
+      return "XCore Assembly Printer";
+    }
+
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+                       const std::string &directive = ".jmptable");
+    void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
+      printInlineJT(MI, opNum, O, ".jmptable32");
+    }
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+
+    void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
+    virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitFunctionEntryLabel();
+    void EmitInstruction(const MachineInstr *MI);
+    void EmitFunctionBodyEnd();
+  };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
+  assert(((GV->hasExternalLinkage() ||
+    GV->hasWeakLinkage()) ||
+    GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(
+    cast<PointerType>(GV->getType())->getElementType())) {
+    OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+    // FIXME: MCStreamerize.
+    OutStreamer.EmitRawText(StringRef(".globound"));
+    OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
+    OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+      // TODO Use COMDAT groups for LinkOnceLinkage
+      OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
+                              ".globound");
+    }
+  }
+}
+
+void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (!GV->hasInitializer() ||
+      EmitSpecialLLVMGlobal(GV))
+    return;
+
+  const TargetData *TD = TM.getTargetData();
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+
+  
+  MCSymbol *GVSym = Mang->getSymbol(GV);
+  Constant *C = GV->getInitializer();
+  unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+  
+  // Mark the start of the global
+  OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
+                          GVSym->getName());
+
+  switch (GV->getLinkage()) {
+  case GlobalValue::AppendingLinkage:
+    report_fatal_error("AppendingLinkage is not supported by this target!");
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalLinkage:
+    emitArrayBound(GVSym, GV);
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+    // TODO Use COMDAT groups for LinkOnceLinkage
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+    // FALL THROUGH
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    break;
+  case GlobalValue::DLLImportLinkage:
+    llvm_unreachable("DLLImport linkage is not supported by this target!");
+  case GlobalValue::DLLExportLinkage:
+    llvm_unreachable("DLLExport linkage is not supported by this target!");
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+
+  EmitAlignment(Align > 2 ? Align : 2, GV);
+  
+  unsigned Size = TD->getTypeAllocSize(C->getType());
+  if (GV->isThreadLocal()) {
+    Size *= MaxThreads;
+  }
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
+                            Twine(Size));
+  }
+  OutStreamer.EmitLabel(GVSym);
+  
+  EmitGlobalConstant(C);
+  if (GV->isThreadLocal()) {
+    for (unsigned i = 1; i < MaxThreads; ++i)
+      EmitGlobalConstant(C);
+  }
+  // The ABI requires that unsigned scalar types smaller than 32 bits
+  // are padded to 32 bits.
+  if (Size < 4)
+    OutStreamer.EmitZeros(4 - Size, 0);
+  
+  // Mark the end of the global
+  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+  // Emit function end directives
+  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
+                          ".function");
+}
+
+void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+  // Mark the start of the function
+  OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
+                          ".function," + CurrentFnSym->getName());
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  printOperand(MI, opNum, O);
+  
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+  
+  O << "+";
+  printOperand(MI, opNum+1, O);
+}
+
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+              const std::string &directive) {
+  unsigned JTI = MI->getOperand(opNum).getIndex();
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  O << "\t" << directive << " ";
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (i > 0)
+      O << ",";
+    O << *MBB->getSymbol();
+  }
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_BlockAddress:
+    O << *GetBlockAddressSymbol(MO.getBlockAddress());
+    break;
+  default:
+    llvm_unreachable("not implemented");
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,const char *ExtraCode,
+                                      raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream O(Str);
+
+  // Check for mov mnemonic
+  if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+    O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+      << getRegisterName(MI->getOperand(1).getReg());
+  else
+    printInstruction(MI, O);
+  OutStreamer.EmitRawText(O.str());
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() { 
+  RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td
index 8107e329bd58..b20d71f49cfd 100644
--- a/lib/Target/XCore/XCoreCallingConv.td
+++ b/lib/Target/XCore/XCoreCallingConv.td
@@ -24,6 +24,9 @@ def CC_XCore : CallingConv<[
   // Promote i8/i16 arguments to i32.
   CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
+  // The 'nest' parameter, if any, is passed in R11.
+  CCIfNest<CCAssignToReg<[R11]>>,
+
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
 
diff --git a/lib/Target/XCore/XCoreFrameInfo.cpp b/lib/Target/XCore/XCoreFrameInfo.cpp
deleted file mode 100644
index f50dc96c6ba9..000000000000
--- a/lib/Target/XCore/XCoreFrameInfo.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- XCoreFrameInfo.cpp - Frame info for XCore Target ---------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains XCore frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#include "XCore.h"
-#include "XCoreFrameInfo.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// XCoreFrameInfo:
-//===----------------------------------------------------------------------===//
-
-XCoreFrameInfo::XCoreFrameInfo(const TargetMachine &tm):
-  TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0)
-{
-  // Do nothing
-}
diff --git a/lib/Target/XCore/XCoreFrameInfo.h b/lib/Target/XCore/XCoreFrameInfo.h
deleted file mode 100644
index 2c67577181ec..000000000000
--- a/lib/Target/XCore/XCoreFrameInfo.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- XCoreFrameInfo.h - Frame info for XCore Target -----------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains XCore frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef XCOREFRAMEINFO_H
-#define XCOREFRAMEINFO_H
-
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  class XCoreFrameInfo: public TargetFrameInfo {
-
-  public:
-    XCoreFrameInfo(const TargetMachine &tm);
-
-    //! Stack slot size (4 bytes)
-    static int stackSlotSize() {
-      return 4;
-    }
-  };
-}
-
-#endif // XCOREFRAMEINFO_H
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 000000000000..057822074e54
--- /dev/null
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,387 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+  return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+  return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+  return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator I,
+                          unsigned DstReg, int Offset, DebugLoc dl,
+                          const TargetInstrInfo &TII) {
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+  Offset/=4;
+  bool isU6 = isImmU6(Offset);
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+  int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+    .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator I,
+                         unsigned SrcReg, int Offset, DebugLoc dl,
+                         const TargetInstrInfo &TII) {
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+  Offset/=4;
+  bool isU6 = isImmU6(Offset);
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("storeToStack offset too big " + Twine(Offset));
+  int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode))
+    .addReg(SrcReg)
+    .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
+    STI(sti) {
+  // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo *MMI = &MF.getMMI();
+  const XCoreRegisterInfo *RegInfo =
+    static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  bool FP = hasFP(MF);
+  bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+
+  if (Nested) {
+    loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+  }
+
+  // Work out frame sizes.
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+  FrameSize/=4;
+
+  bool isU6 = isImmU6(FrameSize);
+
+  if (!isU6 && !isImmU16(FrameSize)) {
+    // FIXME could emit multiple instructions.
+    report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+  }
+  bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+
+  // Do we need to allocate space on the stack?
+  if (FrameSize) {
+    bool saveLR = XFI->getUsesLR();
+    bool LRSavedOnEntry = false;
+    int Opcode;
+    if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+      Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+      MBB.addLiveIn(XCore::LR);
+      saveLR = false;
+      LRSavedOnEntry = true;
+    } else {
+      Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+    }
+    BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+    if (emitFrameMoves) {
+      std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+      // Show update of SP.
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+      if (LRSavedOnEntry) {
+        MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+        MachineLocation CSSrc(XCore::LR);
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+      }
+    }
+    if (saveLR) {
+      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+      storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+      MBB.addLiveIn(XCore::LR);
+
+      if (emitFrameMoves) {
+        MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+        MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+        MachineLocation CSSrc(XCore::LR);
+        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+      }
+    }
+  }
+
+  if (FP) {
+    // Save R10 to the stack.
+    int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+    storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+    // R10 is live-in. It is killed at the spill.
+    MBB.addLiveIn(XCore::R10);
+    if (emitFrameMoves) {
+      MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+      MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+      MachineLocation CSSrc(XCore::R10);
+      MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+    }
+    // Set the FP from the SP.
+    unsigned FramePtr = XCore::R10;
+    BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+      .addImm(0);
+    if (emitFrameMoves) {
+      // Show FP is now valid.
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+      MachineLocation SPDst(FramePtr);
+      MachineLocation SPSrc(MachineLocation::VirtualFP);
+      MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+    }
+  }
+
+  if (emitFrameMoves) {
+    // Frame moves for callee saved.
+    std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+    std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+        XFI->getSpillLabels();
+    for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+      MCSymbol *SpillLabel = SpillLabels[I].first;
+      CalleeSavedInfo &CSI = SpillLabels[I].second;
+      int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+      unsigned Reg = CSI.getReg();
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+    }
+  }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  bool FP = hasFP(MF);
+  if (FP) {
+    // Restore the stack pointer.
+    unsigned FramePtr = XCore::R10;
+    BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+      .addReg(FramePtr);
+  }
+
+  // Work out frame sizes.
+  int FrameSize = MFI->getStackSize();
+
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  FrameSize/=4;
+
+  bool isU6 = isImmU6(FrameSize);
+
+  if (!isU6 && !isImmU16(FrameSize)) {
+    // FIXME could emit multiple instructions.
+    report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+  }
+
+  if (FrameSize) {
+    XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+    if (FP) {
+      // Restore R10
+      int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+      FPSpillOffset += FrameSize*4;
+      loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+    }
+    bool restoreLR = XFI->getUsesLR();
+    if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+      LRSpillOffset += FrameSize*4;
+      loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+      restoreLR = false;
+    }
+    if (restoreLR) {
+      // Fold prologue into return instruction
+      assert(MBBI->getOpcode() == XCore::RETSP_u6
+        || MBBI->getOpcode() == XCore::RETSP_lu6);
+      int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+      BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+      MBB.erase(MBBI);
+    } else {
+      int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+      BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+    }
+  }
+}
+
+void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                        const {
+  // Initial state of the frame pointer is SP.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(XCore::SP, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return true;
+
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+  bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+                                                    it != CSI.end(); ++it) {
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(it->getReg());
+
+    unsigned Reg = it->getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true,
+                            it->getFrameIdx(), RC, TRI);
+    if (emitFrameMoves) {
+      MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+      BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+      XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+    }
+  }
+  return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                            const TargetRegisterInfo *TRI) const{
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  bool AtStart = MI == MBB.begin();
+  MachineBasicBlock::iterator BeforeI = MI;
+  if (!AtStart)
+    --BeforeI;
+  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+                                                    it != CSI.end(); ++it) {
+    unsigned Reg = it->getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+                             RC, TRI);
+    assert(MI != MBB.begin() &&
+           "loadRegFromStackSlot didn't insert any code!");
+    // Insert in reverse order.  loadRegFromStackSlot can insert multiple
+    // instructions.
+    if (AtStart)
+      MI = MBB.begin();
+    else {
+      MI = BeforeI;
+      ++MI;
+    }
+  }
+  return true;
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+  const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  if (LRUsed) {
+    MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+    bool isVarArg = MF.getFunction()->isVarArg();
+    int FrameIdx;
+    if (! isVarArg) {
+      // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+    } else {
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+                                        false);
+    }
+    XFI->setUsesLR(FrameIdx);
+    XFI->setLRSpillSlot(FrameIdx);
+  }
+  if (RegInfo->requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+  if (hasFP(MF)) {
+    // A callee save register is used to hold the FP.
+    // This needs saving / restoring in the epilogue / prologue.
+    XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+                                               RC->getAlignment(),
+                                               false));
+  }
+}
+
+void XCoreFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 000000000000..7da19f0deb1b
--- /dev/null
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,59 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class XCoreSubtarget;
+
+  class XCoreFrameLowering: public TargetFrameLowering {
+    const XCoreSubtarget &STI;
+  public:
+    XCoreFrameLowering(const XCoreSubtarget &STI);
+
+    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+    /// the function.
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+    bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI,
+                                     const std::vector<CalleeSavedInfo> &CSI,
+                                     const TargetRegisterInfo *TRI) const;
+
+    bool hasFP(const MachineFunction &MF) const;
+
+    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS = NULL) const;
+
+    void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+    //! Stack slot size (4 bytes)
+    static int stackSlotSize() {
+      return 4;
+    }
+  };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 755ece7e9aba..fc8a07aad73b 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -68,12 +68,9 @@ namespace {
     }
 
     // Complex Pattern Selectors.
-    bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
-                        SDValue &Offset);
-    bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base,
-                        SDValue &Offset);
-    bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
-                        SDValue &Offset);
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
     
     virtual const char *getPassName() const {
       return "XCore DAG->DAG Pattern Instruction Selection";
@@ -91,8 +88,8 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
   return new XCoreDAGToDAGISel(TM);
 }
 
-bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
-                                  SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
   FrameIndexSDNode *FIN = 0;
   if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -113,8 +110,8 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
-                                  SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
   if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
     Base = Addr.getOperand(0);
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
@@ -134,8 +131,8 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
-                                  SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
   if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
     Base = Addr.getOperand(0);
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index abe7b2fd42be..828d6f92caf4 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -148,9 +148,13 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
-  
-  maxStoresPerMemset = 4;
-  maxStoresPerMemmove = maxStoresPerMemcpy = 2;
+
+  // TRAMPOLINE is custom lowered.
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+  maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+    = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::STORE);
@@ -177,6 +181,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADD:
   case ISD::SUB:              return ExpandADDSUB(Op.getNode(), DAG);
   case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
+  case ISD::TRAMPOLINE:       return LowerTRAMPOLINE(Op, DAG);
   default:
     llvm_unreachable("unimplemented operand");
     return SDValue();
@@ -392,24 +397,23 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
 }
 
 SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-{
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
          "Unexpected extension type");
   assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
-  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
     return SDValue();
-  }
+
   unsigned ABIAlignment = getTargetData()->
     getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
   // Leave aligned load alone.
-  if (LD->getAlignment() >= ABIAlignment) {
+  if (LD->getAlignment() >= ABIAlignment)
     return SDValue();
-  }
+
   SDValue Chain = LD->getChain();
   SDValue BasePtr = LD->getBasePtr();
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   
   SDValue Base;
   int64_t Offset;
@@ -419,10 +423,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
       // We've managed to infer better alignment information than the load
       // already has. Use an aligned load.
       //
-      // FIXME: No new alignment information is actually passed here.
-      // Should the offset really be 4?
-      //
-      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+      return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+                         MachinePointerInfo(),
                          false, false, 0);
     }
     // Lower to
@@ -436,40 +438,40 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
     SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
     SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
     
-    SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
-    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
+    SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
     
-    SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
-                              LowAddr, NULL, 4, false, false, 0);
-    SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
-                               HighAddr, NULL, 4, false, false, 0);
-    SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
-    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
-    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+    SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+                              LowAddr, MachinePointerInfo(), false, false, 0);
+    SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+                               HighAddr, MachinePointerInfo(), false, false, 0);
+    SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
                              High.getValue(1));
     SDValue Ops[] = { Result, Chain };
-    return DAG.getMergeValues(Ops, 2, dl);
+    return DAG.getMergeValues(Ops, 2, DL);
   }
   
   if (LD->getAlignment() == 2) {
-    int SVOffset = LD->getSrcValueOffset();
-    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain,
-                                 BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
+    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+                                 BasePtr, LD->getPointerInfo(), MVT::i16,
                                  LD->isVolatile(), LD->isNonTemporal(), 2);
-    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
-    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain,
-                                  HighAddr, LD->getSrcValue(), SVOffset + 2,
+    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+                                  HighAddr,
+                                  LD->getPointerInfo().getWithOffset(2),
                                   MVT::i16, LD->isVolatile(),
                                   LD->isNonTemporal(), 2);
-    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
                                       DAG.getConstant(16, MVT::i32));
-    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
                              High.getValue(1));
     SDValue Ops[] = { Result, Chain };
-    return DAG.getMergeValues(Ops, 2, dl);
+    return DAG.getMergeValues(Ops, 2, DL);
   }
   
   // Lower to a call to __misaligned_load(BasePtr).
@@ -486,12 +488,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
                     false, false, 0, CallingConv::C, false,
                     /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
-                    Args, DAG, dl);
+                    Args, DAG, DL);
 
   SDValue Ops[] =
     { CallResult.first, CallResult.second };
 
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, 2, DL);
 }
 
 SDValue XCoreTargetLowering::
@@ -515,18 +517,17 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   DebugLoc dl = Op.getDebugLoc();
   
   if (ST->getAlignment() == 2) {
-    int SVOffset = ST->getSrcValueOffset();
     SDValue Low = Value;
     SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
                                       DAG.getConstant(16, MVT::i32));
     SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
-                                         ST->getSrcValue(), SVOffset, MVT::i16,
+                                         ST->getPointerInfo(), MVT::i16,
                                          ST->isVolatile(), ST->isNonTemporal(),
                                          2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
     SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
-                                          ST->getSrcValue(), SVOffset + 2,
+                                          ST->getPointerInfo().getWithOffset(2),
                                           MVT::i16, ST->isVolatile(),
                                           ST->isNonTemporal(), 2);
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
@@ -757,16 +758,18 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   EVT VT = Node->getValueType(0);
   SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
-                               Node->getOperand(1), V, 0, false, false, 0);
+                               Node->getOperand(1), MachinePointerInfo(V),
+                               false, false, 0);
   // Increment the pointer, VAList, to the next vararg
   SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, 
                      DAG.getConstant(VT.getSizeInBits(), 
                                      getPointerTy()));
   // Store the incremented VAList to the legalized pointer
-  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
-                      false, false, 0);
+  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+                      MachinePointerInfo(V), false, false, 0);
   // Load the actual argument out of the pointer VAList
-  return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
+  return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+                     false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -778,9 +781,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const
   MachineFunction &MF = DAG.getMachineFunction();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
-  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
-                      false, false, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), 
+                      MachinePointerInfo(), false, false, 0);
 }
 
 SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -796,6 +798,64 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
                             RegInfo->getFrameRegister(MF), MVT::i32);
 }
 
+SDValue XCoreTargetLowering::
+LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+  // .align 4
+  // LDAPF_u10 r11, nest
+  // LDW_2rus r11, r11[0]
+  // STWSP_ru6 r11, sp[0]
+  // LDAPF_u10 r11, fptr
+  // LDW_2rus r11, r11[0]
+  // BAU_1r r11
+  // nest:
+  // .word nest
+  // fptr:
+  // .word fptr
+  SDValue OutChains[5];
+
+  SDValue Addr = Trmp;
+
+  DebugLoc dl = Op.getDebugLoc();
+  OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr), false, false,
+                              0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(4, MVT::i32));
+  OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr, 4), false,
+                              false, 0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(8, MVT::i32));
+  OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr, 8), false,
+                              false, 0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(12, MVT::i32));
+  OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+                              MachinePointerInfo(TrmpAddr, 12), false, false,
+                              0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(16, MVT::i32));
+  OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+                              MachinePointerInfo(TrmpAddr, 16), false, false,
+                              0);
+
+  SDValue Ops[] =
+    { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -929,7 +989,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   //             = Chain, Callee, Reg#1, Reg#2, ...  
   //
   // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
@@ -1035,7 +1095,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
 
-  unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize();
+  unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
 
   unsigned LRSaveSize = StackSlotSize;
   
@@ -1068,7 +1128,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
       if (ObjSize > StackSlotSize) {
         errs() << "LowerFormalArguments Unhandled argument type: "
-               << (unsigned)VA.getLocVT().getSimpleVT().SimpleTy
+               << EVT(VA.getLocVT()).getEVTString()
                << "\n";
       }
       // Create the frame index object for this incoming parameter...
@@ -1079,7 +1139,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, 
+                                   MachinePointerInfo::getFixedStack(FI),
                                    false, false, 0));
     }
   }
@@ -1111,8 +1172,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
         RegInfo.addLiveIn(ArgRegs[i], VReg);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         // Move argument from virt reg -> stack
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
-                                     false, false, 0);
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                     MachinePointerInfo(), false, false, 0);
         MemOps.push_back(Store);
       }
       if (!MemOps.empty())
@@ -1443,9 +1504,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
         return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
                               LD->getBasePtr(),
                               DAG.getConstant(StoreBits/8, MVT::i32),
-                              Alignment, false, ST->getSrcValue(),
-                              ST->getSrcValueOffset(), LD->getSrcValue(),
-                              LD->getSrcValueOffset());
+                              Alignment, false, ST->getPointerInfo(),
+                              LD->getPointerInfo());
       }
     }
     break;
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index febc198f4faf..7e5dd2e8e512 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -147,6 +147,7 @@ namespace llvm {
     SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
   
     // Inline asm support
     std::vector<unsigned>
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index ad00046af17d..9cb6a7d17b5e 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -384,74 +384,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     .addImm(0);
 }
 
-bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty()) {
-    return true;
-  }
-  MachineFunction *MF = MBB.getParent();
-  XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
-  
-  bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-  
-  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
-                                                    it != CSI.end(); ++it) {
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(it->getReg());
-
-    unsigned Reg = it->getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    storeRegToStackSlot(MBB, MI, Reg, true,
-                        it->getFrameIdx(), RC, &RI);
-    if (emitFrameMoves) {
-      MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
-      BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
-      XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
-    }
-  }
-  return true;
-}
-
-bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                            const TargetRegisterInfo *TRI) const
-{
-  bool AtStart = MI == MBB.begin();
-  MachineBasicBlock::iterator BeforeI = MI;
-  if (!AtStart)
-    --BeforeI;
-  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
-                                                    it != CSI.end(); ++it) {
-    unsigned Reg = it->getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    loadRegFromStackSlot(MBB, MI, it->getReg(),
-                                  it->getFrameIdx(),
-                         RC, &RI);
-    assert(MI != MBB.begin() &&
-           "loadRegFromStackSlot didn't insert any code!");
-    // Insert in reverse order.  loadRegFromStackSlot can insert multiple
-    // instructions.
-    if (AtStart)
-      MI = MBB.begin();
-    else {
-      MI = BeforeI;
-      ++MI;
-    }
-  }
-  return true;
-}
-
 /// ReverseBranchCondition - Return the inverse opcode of the 
 /// specified Branch instruction.
 bool XCoreInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 
-{
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert((Cond.size() == 2) && 
           "Invalid XCore branch condition!");
   Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index d2b116eef0d8..977fe8dd550a 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -75,15 +75,6 @@ public:
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
 
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                         const TargetRegisterInfo *TRI) const;
-  
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                           const TargetRegisterInfo *TRI) const;
 
   virtual bool ReverseBranchCondition(
                             SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 6b3b39ba1d49..38cc734ce7c3 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -29,11 +29,11 @@ include "XCoreInstrFormats.td"
 // Call
 def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
-                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                              SDNPVariadic]>;
 
 def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTBrind,
-                         [SDNPHasChain, SDNPOptInFlag]>;
+                         [SDNPHasChain, SDNPOptInGlue]>;
 
 def SDT_XCoreBR_JT    : SDTypeProfile<0, 2,
                                       [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -66,9 +66,9 @@ def SDT_XCoreCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
 
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_XCoreCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Pattern Stuff
@@ -610,8 +610,15 @@ def LDC_lru6 : _FLRU6<
                  [(set GRRegs:$dst, immU16:$b)]>;
 }
 
+def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+
+def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+
 // Operand register - U6
-// TODO setc
 let isBranch = 1, isTerminator = 1 in {
 defm BRFT: FRU6_LRU6_branch<"bt">;
 defm BRBT: FRU6_LRU6_branch<"bt">;
@@ -720,9 +727,8 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
                  "neg $dst, $b",
                  [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
 
-// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
-// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
-// tsetmr, sext (reg), zext (reg)
+// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp,
+// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
 let Constraints = "$src1 = $dst" in {
 let neverHasSideEffects = 1 in
 def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
@@ -748,6 +754,50 @@ def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
                  "mkmsk $dst, $size",
                  [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
 
+def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
+                 "getr $dst, $type",
+                 [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "outct res[$r], $val",
+                 [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+                 "outct res[$r], $val",
+                 [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "outt res[$r], $val",
+                 [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "out res[$r], $val",
+                 [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "inct $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "int $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "in $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "chkct res[$r], $val",
+                 [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+                 "chkct res[$r], $val",
+                 [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "setd res[$r], $val",
+                 [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
 // Two operand long
 // TODO settw, setclk, setrdy, setpsc, endin, peek,
 // getd, testlcl, tinitlr, getps, setps
@@ -763,8 +813,12 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
                  "clz $dst, $src",
                  [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
 
+def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
 // One operand short
-// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
+// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp
 // setdp, setcp, setv, setev, kcall
 // dgetreg
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
@@ -805,6 +859,10 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
                  [(XCoreBranchLink GRRegs:$addr)]>;
 }
 
+def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
+               "freer res[$r]",
+               [(int_xcore_freer GRRegs:$r)]>;
+
 // Zero operand short
 // TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
 // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index f82e59814e77..56c0879cc8fc 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -21,7 +21,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -84,11 +84,13 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
 
 BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
   Reserved.set(XCore::CP);
   Reserved.set(XCore::DP);
   Reserved.set(XCore::SP);
   Reserved.set(XCore::LR);
-  if (hasFP(MF)) {
+  if (TFI->hasFP(MF)) {
     Reserved.set(XCore::R10);
   }
   return Reserved;
@@ -96,12 +98,10 @@ BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 bool
 XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  // TODO can we estimate stack size?
-  return hasFP(MF);
-}
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
-bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
-  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+  // TODO can we estimate stack size?
+  return TFI->hasFP(MF);
 }
 
 // This function eliminates ADJCALLSTACKDOWN,
@@ -109,7 +109,9 @@ bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
 void XCoreRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
     // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
     // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
     MachineInstr *Old = I;
@@ -118,14 +120,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
-      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      unsigned Align = TFI->getStackAlignment();
       Amount = (Amount+Align-1)/Align*Align;
 
       assert(Amount%4 == 0);
       Amount /= 4;
-      
+
       bool isU6 = isImmU6(Amount);
-      
       if (!isU6 && !isImmU16(Amount)) {
         // FIX could emit multiple instructions in this case.
 #ifndef NDEBUG
@@ -172,6 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = FrameOp.getIndex();
 
   MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
   int StackSize = MF.getFrameInfo()->getStackSize();
 
@@ -197,7 +199,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   
   Offset/=4;
   
-  bool FP = hasFP(MF);
+  bool FP = TFI->hasFP(MF);
   
   unsigned Reg = MI.getOperand(0).getReg();
   bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
@@ -292,48 +294,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
-void
-XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                      RegScavenger *RS) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
-  const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
-  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
-  if (LRUsed) {
-    MF.getRegInfo().setPhysRegUnused(XCore::LR);
-    
-    bool isVarArg = MF.getFunction()->isVarArg();
-    int FrameIdx;
-    if (! isVarArg) {
-      // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
-      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
-    } else {
-      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
-                                        false);
-    }
-    XFI->setUsesLR(FrameIdx);
-    XFI->setLRSpillSlot(FrameIdx);
-  }
-  if (requiresRegisterScavenging(MF)) {
-    // Reserve a slot close to SP or frame pointer.
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),
-                                                       false));
-  }
-  if (hasFP(MF)) {
-    // A callee save register is used to hold the FP.
-    // This needs saving / restoring in the epilogue / prologue.
-    XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
-                                               RC->getAlignment(),
-                                               false));
-  }
-}
-
-void XCoreRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-  
-}
-
 void XCoreRegisterInfo::
 loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
             unsigned DstReg, int64_t Value, DebugLoc dl) const {
@@ -346,229 +306,19 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
 }
 
-void XCoreRegisterInfo::
-storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                  unsigned SrcReg, int Offset, DebugLoc dl) const {
-  assert(Offset%4 == 0 && "Misaligned stack offset");
-  Offset/=4;
-  bool isU6 = isImmU6(Offset);
-  if (!isU6 && !isImmU16(Offset))
-    report_fatal_error("storeToStack offset too big " + Twine(Offset));
-  int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
-  BuildMI(MBB, I, dl, TII.get(Opcode))
-    .addReg(SrcReg)
-    .addImm(Offset);
-}
-
-void XCoreRegisterInfo::
-loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                  unsigned DstReg, int Offset, DebugLoc dl) const {
-  assert(Offset%4 == 0 && "Misaligned stack offset");
-  Offset/=4;
-  bool isU6 = isImmU6(Offset);
-  if (!isU6 && !isImmU16(Offset))
-    report_fatal_error("loadFromStack offset too big " + Twine(Offset));
-  int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
-  BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
-    .addImm(Offset);
-}
-
-void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo *MMI = &MF.getMMI();
-  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  bool FP = hasFP(MF);
-
-  // Work out frame sizes.
-  int FrameSize = MFI->getStackSize();
-
-  assert(FrameSize%4 == 0 && "Misaligned frame size");
-  
-  FrameSize/=4;
-  
-  bool isU6 = isImmU6(FrameSize);
-
-  if (!isU6 && !isImmU16(FrameSize)) {
-    // FIXME could emit multiple instructions.
-    report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
-  }
-  bool emitFrameMoves = needsFrameMoves(MF);
-
-  // Do we need to allocate space on the stack?
-  if (FrameSize) {
-    bool saveLR = XFI->getUsesLR();
-    bool LRSavedOnEntry = false;
-    int Opcode;
-    if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
-      Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
-      MBB.addLiveIn(XCore::LR);
-      saveLR = false;
-      LRSavedOnEntry = true;
-    } else {
-      Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
-    }
-    BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
-    
-    if (emitFrameMoves) {
-      std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-      
-      // Show update of SP.
-      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
-      
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
-      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-      
-      if (LRSavedOnEntry) {
-        MachineLocation CSDst(MachineLocation::VirtualFP, 0);
-        MachineLocation CSSrc(XCore::LR);
-        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
-      }
-    }
-    if (saveLR) {
-      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
-      storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl);
-      MBB.addLiveIn(XCore::LR);
-      
-      if (emitFrameMoves) {
-        MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
-        BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
-        MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
-        MachineLocation CSSrc(XCore::LR);
-        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
-      }
-    }
-  }
-  
-  if (FP) {
-    // Save R10 to the stack.
-    int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
-    storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl);
-    // R10 is live-in. It is killed at the spill.
-    MBB.addLiveIn(XCore::R10);
-    if (emitFrameMoves) {
-      MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
-      MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
-      MachineLocation CSSrc(XCore::R10);
-      MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
-    }
-    // Set the FP from the SP.
-    unsigned FramePtr = XCore::R10;
-    BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
-      .addImm(0);
-    if (emitFrameMoves) {
-      // Show FP is now valid.
-      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
-      MachineLocation SPDst(FramePtr);
-      MachineLocation SPSrc(MachineLocation::VirtualFP);
-      MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-    }
-  }
-  
-  if (emitFrameMoves) {
-    // Frame moves for callee saved.
-    std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-    std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
-        XFI->getSpillLabels();
-    for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
-      MCSymbol *SpillLabel = SpillLabels[I].first;
-      CalleeSavedInfo &CSI = SpillLabels[I].second;
-      int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
-      unsigned Reg = CSI.getReg();
-      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-      MachineLocation CSSrc(Reg);
-      Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
-    }
-  }
-}
-
-void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  MachineFrameInfo *MFI            = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  DebugLoc dl = MBBI->getDebugLoc();
-  
-  bool FP = hasFP(MF);
-  
-  if (FP) {
-    // Restore the stack pointer.
-    unsigned FramePtr = XCore::R10;
-    BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
-      .addReg(FramePtr);
-  }
-
-  // Work out frame sizes.
-  int FrameSize = MFI->getStackSize();
-
-  assert(FrameSize%4 == 0 && "Misaligned frame size");
-
-  FrameSize/=4;
-  
-  bool isU6 = isImmU6(FrameSize);
-
-  if (!isU6 && !isImmU16(FrameSize)) {
-    // FIXME could emit multiple instructions.
-    report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
-  }
-
-  if (FrameSize) {
-    XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
-    
-    if (FP) {
-      // Restore R10
-      int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
-      FPSpillOffset += FrameSize*4;
-      loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl);
-    }
-    bool restoreLR = XFI->getUsesLR();
-    if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
-      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
-      LRSpillOffset += FrameSize*4;
-      loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl);
-      restoreLR = false;
-    }
-    if (restoreLR) {
-      // Fold prologue into return instruction
-      assert(MBBI->getOpcode() == XCore::RETSP_u6
-        || MBBI->getOpcode() == XCore::RETSP_lu6);
-      int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
-      BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
-      MBB.erase(MBBI);
-    } else {
-      int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
-      BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
-    }
-  }
-}
-
 int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
 unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  bool FP = hasFP(MF);
-  
-  return FP ? XCore::R10 : XCore::SP;
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
 }
 
 unsigned XCoreRegisterInfo::getRARegister() const {
   return XCore::LR;
 }
 
-void XCoreRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
-                                                                         const {
-  // Initial state of the frame pointer is SP.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(XCore::SP, 0);
-  Moves.push_back(MachineMove(0, Dst, Src));
-}
-
 #include "XCoreGenRegisterInfo.inc"
 
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index e636c1c7298a..218575581d4a 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -48,8 +48,6 @@ public:
   
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -57,18 +55,9 @@ public:
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                RegScavenger *RS = NULL) const;
-
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
-  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
 
   //! Return the array of argument passing registers
   /*!
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 62daf5d4567b..765f717e206e 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -61,8 +61,8 @@ def GRRegs : RegisterClass<"XCore", [i32], 32,
     GRRegsClass::iterator
     GRRegsClass::allocation_order_end(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
-      const TargetRegisterInfo *RI = TM.getRegisterInfo();
-      if (RI->hasFP(MF))
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
         return end()-1;  // don't allocate R10
       else
         return end();
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index b0013eb01df9..30da2c896c0f 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -27,7 +27,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
                "i16:16:32-i32:32:32-i64:32:32-n32"),
     InstrInfo(),
-    FrameInfo(*this),
+    FrameLowering(Subtarget),
     TLInfo(*this),
     TSInfo(*this) {
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 14073baf0f94..24daadcb6bf4 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -16,7 +16,7 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
-#include "XCoreFrameInfo.h"
+#include "XCoreFrameLowering.h"
 #include "XCoreSubtarget.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreISelLowering.h"
@@ -28,7 +28,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreSubtarget Subtarget;
   const TargetData DataLayout;       // Calculates type size & alignment
   XCoreInstrInfo InstrInfo;
-  XCoreFrameInfo FrameInfo;
+  XCoreFrameLowering FrameLowering;
   XCoreTargetLowering TLInfo;
   XCoreSelectionDAGInfo TSInfo;
 public:
@@ -36,7 +36,9 @@ public:
                      const std::string &FS);
 
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
-  virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
+  virtual const XCoreFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
   virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
   virtual const XCoreTargetLowering *getTargetLowering() const {
     return &TLInfo;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index cdf5a5371e22..7f4e1c1b4fd7 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 
@@ -19,31 +20,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
 
   DataSection =
-    Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS, 
-                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
-                      MCSectionELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getDataRel(), false);
+    Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS, 
+                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                      ELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getDataRel());
   BSSSection =
-    Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS,
-                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
-                      MCSectionELF::XCORE_SHF_DP_SECTION,
-                      SectionKind::getBSS(), false);
+    Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                      ELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getBSS());
   
   MergeableConst4Section = 
-    Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                      MCSectionELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getMergeableConst4(), false);
+    Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst4());
   MergeableConst8Section = 
-    Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                      MCSectionELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getMergeableConst8(), false);
+    Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst8());
   MergeableConst16Section = 
-    Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                      MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
-                      MCSectionELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getMergeableConst16(), false);
+    Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst16());
   
   // TLS globals are lowered in the backend to arrays indexed by the current
   // thread id. After lowering they require no special handling by the linker
@@ -52,10 +53,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
   TLSBSSSection = BSSSection;
 
   ReadOnlySection = 
-    Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS,
-                      MCSectionELF::SHF_ALLOC |
-                      MCSectionELF::XCORE_SHF_CP_SECTION,
-                      SectionKind::getReadOnlyWithRel(), false);
+    Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getReadOnlyWithRel());
 
   // Dynamic linking is not supported. Data with relocations is placed in the
   // same section as data without relocations.
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
new file mode 100644
index 000000000000..10e0cc6b5691
--- /dev/null
+++ b/lib/Transforms/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_subdirectory(Utils)
+add_subdirectory(Instrumentation)
+add_subdirectory(InstCombine)
+add_subdirectory(Scalar)
+add_subdirectory(IPO)
+add_subdirectory(Hello)
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 838d5505490f..b0e22de8d7ed 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -37,7 +37,7 @@ namespace {
 }
 
 char Hello::ID = 0;
-INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false);
+static RegisterPass<Hello> X("hello", "Hello World Pass");
 
 namespace {
   // Hello2 - The second implementation with getAnalysisUsage implemented.
@@ -60,6 +60,5 @@ namespace {
 }
 
 char Hello2::ID = 0;
-INITIALIZE_PASS(Hello2, "hello2",
-                "Hello World Pass (with getAnalysisUsage implemented)",
-                false, false);
+static RegisterPass<Hello2>
+Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0c77e1fd8cff..0c650cfe6440 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -39,7 +39,6 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Target/TargetData.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
@@ -67,7 +66,9 @@ namespace {
     virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
     explicit ArgPromotion(unsigned maxElements = 3)
-      : CallGraphSCCPass(ID), maxElements(maxElements) {}
+        : CallGraphSCCPass(ID), maxElements(maxElements) {
+      initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+    }
 
     /// A vector used to hold the indices of a single GEP instruction
     typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +85,12 @@ namespace {
 }
 
 char ArgPromotion::ID = 0;
-INITIALIZE_PASS(ArgPromotion, "argpromotion",
-                "Promote 'by reference' arguments to scalars", false, false);
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
 
 Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
   return new ArgPromotion(maxElements);
@@ -130,47 +135,74 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   if (PointerArgs.empty()) return 0;
 
   // Second check: make sure that all callers are direct callers.  We can't
-  // transform functions that have indirect callers.
-  if (F->hasAddressTaken())
-    return 0;
-
+  // transform functions that have indirect callers.  Also see if the function
+  // is self-recursive.
+  bool isSelfRecursive = false;
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+       UI != E; ++UI) {
+    CallSite CS(*UI);
+    // Must be a direct call.
+    if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+    
+    if (CS.getInstruction()->getParent()->getParent() == F)
+      isSelfRecursive = true;
+  }
+  
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
   SmallPtrSet<Argument*, 8> ArgsToPromote;
   SmallPtrSet<Argument*, 8> ByValArgsToTransform;
   for (unsigned i = 0; i != PointerArgs.size(); ++i) {
     bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+    Argument *PtrArg = PointerArgs[i].first;
+    const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
 
     // If this is a byval argument, and if the aggregate type is small, just
     // pass the elements, which is always safe.
-    Argument *PtrArg = PointerArgs[i].first;
     if (isByVal) {
-      const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
       if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (maxElements > 0 && STy->getNumElements() > maxElements) {
           DEBUG(dbgs() << "argpromotion disable promoting argument '"
                 << PtrArg->getName() << "' because it would require adding more"
                 << " than " << maxElements << " arguments to the function.\n");
-        } else {
-          // If all the elements are single-value types, we can promote it.
-          bool AllSimple = true;
-          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-            if (!STy->getElementType(i)->isSingleValueType()) {
-              AllSimple = false;
-              break;
-            }
-
-          // Safe to transform, don't even bother trying to "promote" it.
-          // Passing the elements as a scalar will allow scalarrepl to hack on
-          // the new alloca we introduce.
-          if (AllSimple) {
-            ByValArgsToTransform.insert(PtrArg);
-            continue;
+          continue;
+        }
+        
+        // If all the elements are single-value types, we can promote it.
+        bool AllSimple = true;
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          if (!STy->getElementType(i)->isSingleValueType()) {
+            AllSimple = false;
+            break;
           }
         }
+
+        // Safe to transform, don't even bother trying to "promote" it.
+        // Passing the elements as a scalar will allow scalarrepl to hack on
+        // the new alloca we introduce.
+        if (AllSimple) {
+          ByValArgsToTransform.insert(PtrArg);
+          continue;
+        }
       }
     }
 
+    // If the argument is a recursive type and we're in a recursive
+    // function, we could end up infinitely peeling the function argument.
+    if (isSelfRecursive) {
+      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+        bool RecursiveType = false;
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          if (STy->getElementType(i) == PtrArg->getType()) {
+            RecursiveType = true;
+            break;
+          }
+        }
+        if (RecursiveType)
+          continue;
+      }
+    }
+    
     // Otherwise, see if we can promote the pointer to its value.
     if (isSafeToPromoteArgument(PtrArg, isByVal))
       ArgsToPromote.insert(PtrArg);
@@ -183,22 +215,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
 }
 
-/// IsAlwaysValidPointer - Return true if the specified pointer is always legal
-/// to load.
-static bool IsAlwaysValidPointer(Value *V) {
-  if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V))
-    return IsAlwaysValidPointer(GEP->getOperand(0));
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::GetElementPtr)
-      return IsAlwaysValidPointer(CE->getOperand(0));
-
-  return false;
-}
-
-/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
 /// all callees pass in a valid pointer for the specified function argument.
-static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
   Function *Callee = Arg->getParent();
 
   unsigned ArgNo = std::distance(Callee->arg_begin(),
@@ -211,7 +230,7 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
     CallSite CS(*UI);
     assert(CS && "Should only have direct calls!");
 
-    if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
+    if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
       return false;
   }
   return true;
@@ -318,7 +337,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
   GEPIndicesSet ToPromote;
 
   // If the pointer is always valid, any load with first index 0 is valid.
-  if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+  if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
     SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
 
   // First, iterate the entry block and mark loads of (geps of) arguments as
@@ -434,8 +453,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
   SmallPtrSet<BasicBlock*, 16> TranspBlocks;
 
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  TargetData *TD = getAnalysisIfAvailable<TargetData>();
-  if (!TD) return false; // Without TargetData, assume the worst.
 
   for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
     // Check to see if the load is invalidated from the start of the block to
@@ -443,11 +460,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
     LoadInst *Load = Loads[i];
     BasicBlock *BB = Load->getParent();
 
-    const PointerType *LoadTy =
-      cast<PointerType>(Load->getPointerOperand()->getType());
-    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
-
-    if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
+    AliasAnalysis::Location Loc = AA.getLocation(Load);
+    if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
       return false;  // Pointer is invalidated!
 
     // Now check every path from the entry block to the load for transparency.
@@ -458,7 +472,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
       for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
              I = idf_ext_begin(P, TranspBlocks),
              E = idf_ext_end(P, TranspBlocks); I != E; ++I)
-        if (AA.canBasicBlockModify(**I, Arg, LoadSize))
+        if (AA.canBasicBlockModify(**I, Loc))
           return false;
     }
   }
@@ -694,6 +708,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
           // of the previous load.
           LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
           newLoad->setAlignment(OrigLoad->getAlignment());
+          // Transfer the TBAA info too.
+          newLoad->setMetadata(LLVMContext::MD_tbaa,
+                               OrigLoad->getMetadata(LLVMContext::MD_tbaa));
           Args.push_back(newLoad);
           AA.copyValue(OrigLoad, Args.back());
         }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 65483e8fed63..efdeec564051 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -17,11 +17,8 @@ add_llvm_library(LLVMipo
   LowerSetJmp.cpp
   MergeFunctions.cpp
   PartialInlining.cpp
-  PartialSpecialization.cpp
   PruneEH.cpp
   StripDeadPrototypes.cpp
   StripSymbols.cpp
   StructRetPromotion.cpp
   )
-
-target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine)
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 64e8d792dc3a..a21efced73b9 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -33,7 +33,9 @@ STATISTIC(NumMerged, "Number of global constants merged");
 namespace {
   struct ConstantMerge : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    ConstantMerge() : ModulePass(ID) {}
+    ConstantMerge() : ModulePass(ID) {
+      initializeConstantMergePass(*PassRegistry::getPassRegistry());
+    }
 
     // run - For this pass, process all of the globals in the module,
     // eliminating duplicate constants.
@@ -44,7 +46,7 @@ namespace {
 
 char ConstantMerge::ID = 0;
 INITIALIZE_PASS(ConstantMerge, "constmerge",
-                "Merge Duplicate Global Constants", false, false);
+                "Merge Duplicate Global Constants", false, false)
 
 ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
 
@@ -63,6 +65,18 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
       UsedValues.insert(GV);
 }
 
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+                               const GlobalVariable &B) {
+  if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+    return true;
+
+  if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+    return false;
+
+  return A.hasUnnamedAddr();
+}
+
 bool ConstantMerge::runOnModule(Module &M) {
   // Find all the globals that are marked "used".  These cannot be merged.
   SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -83,44 +97,76 @@ bool ConstantMerge::runOnModule(Module &M) {
   // second level constants have initializers which point to the globals that
   // were just merged.
   while (1) {
-    // First pass: identify all globals that can be merged together, filling in
-    // the Replacements vector.  We cannot do the replacement in this pass
-    // because doing so may cause initializers of other globals to be rewritten,
-    // invalidating the Constant* pointers in CMap.
-    //
+
+    // First: Find the canonical constants others will be merged with.
     for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
          GVI != E; ) {
       GlobalVariable *GV = GVI++;
-      
+
       // If this GV is dead, remove it.
       GV->removeDeadConstantUsers();
       if (GV->use_empty() && GV->hasLocalLinkage()) {
         GV->eraseFromParent();
         continue;
       }
-      
-      // Only process constants with initializers in the default addres space.
-      if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
-          GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
           // Don't touch values marked with attribute(used).
           UsedGlobals.count(GV))
         continue;
-      
-      
-      
+
       Constant *Init = GV->getInitializer();
 
       // Check to see if the initializer is already known.
       GlobalVariable *&Slot = CMap[Init];
 
-      if (Slot == 0) {    // Nope, add it to the map.
+      // If this is the first constant we find or if the old on is local,
+      // replace with the current one. It the current is externally visible
+      // it cannot be replace, but can be the canonical constant we merge with.
+      if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
         Slot = GV;
-      } else if (GV->hasLocalLinkage()) {    // Yup, this is a duplicate!
-        // Make all uses of the duplicate constant use the canonical version.
-        Replacements.push_back(std::make_pair(GV, Slot));
       }
     }
 
+    // Second: identify all globals that can be merged together, filling in
+    // the Replacements vector.  We cannot do the replacement in this pass
+    // because doing so may cause initializers of other globals to be rewritten,
+    // invalidating the Constant* pointers in CMap.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = GVI++;
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      // We can only replace constant with local linkage.
+      if (!GV->hasLocalLinkage())
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *Slot = CMap[Init];
+
+      if (!Slot || Slot == GV)
+        continue;
+
+      if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+        continue;
+
+      if (!GV->hasUnnamedAddr())
+        Slot->setUnnamedAddr(false);
+
+      // Make all uses of the duplicate constant use the canonical version.
+      Replacements.push_back(std::make_pair(GV, Slot));
+    }
+
     if (Replacements.empty())
       return MadeChange;
     CMap.clear();
@@ -133,6 +179,8 @@ bool ConstantMerge::runOnModule(Module &M) {
       Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
 
       // Delete the global value from the module.
+      assert(Replacements[i].first->hasLocalLinkage() &&
+             "Refusing to delete an externally visible global variable.");
       Replacements[i].first->eraseFromParent();
     }
 
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 47df235424e2..b42322116a98 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -39,7 +39,8 @@ using namespace llvm;
 
 STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
 STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
-
+STATISTIC(NumArgumentsReplacedWithUndef, 
+          "Number of unread args replaced with undef");
 namespace {
   /// DAE - The dead argument elimination pass.
   ///
@@ -126,7 +127,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    DAE() : ModulePass(ID) {}
+    DAE() : ModulePass(ID) {
+      initializeDAEPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M);
 
@@ -146,12 +149,13 @@ namespace {
     void PropagateLiveness(const RetOrArg &RA);
     bool RemoveDeadStuffFromFunction(Function *F);
     bool DeleteDeadVarargs(Function &Fn);
+    bool RemoveDeadArgumentsFromCallers(Function &Fn);
   };
 }
 
 
 char DAE::ID = 0;
-INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
 
 namespace {
   /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -168,7 +172,7 @@ namespace {
 char DAH::ID = 0;
 INITIALIZE_PASS(DAH, "deadarghaX0r", 
                 "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
-                false, false);
+                false, false)
 
 /// createDeadArgEliminationPass - This pass removes arguments from functions
 /// which are not used by the body of the function.
@@ -285,6 +289,55 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   return true;
 }
 
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any 
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+  if (Fn.isDeclaration())
+    return false;
+
+  // Functions with local linkage should already have been handled.
+  if (Fn.hasLocalLinkage())
+    return false;
+
+  if (Fn.use_empty())
+    return false;
+
+  llvm::SmallVector<unsigned, 8> UnusedArgs;
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); 
+       I != E; ++I) {
+    Argument *Arg = I;
+
+    if (Arg->use_empty() && !Arg->hasByValAttr())
+      UnusedArgs.push_back(Arg->getArgNo());
+  }
+
+  if (UnusedArgs.empty())
+    return false;
+
+  bool Changed = false;
+
+  for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); 
+       I != E; ++I) {
+    CallSite CS(*I);
+    if (!CS || !CS.isCallee(I))
+      continue;
+
+    // Now go through all unused args and replace them with "undef".
+    for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+      unsigned ArgNo = UnusedArgs[I];
+
+      Value *Arg = CS.getArgument(ArgNo);
+      CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+      ++NumArgumentsReplacedWithUndef;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
 /// Convenience function that returns the number of return values. It returns 0
 /// for void functions and 1 for functions not returning a struct. It returns
 /// the number of struct elements for functions returning a struct.
@@ -791,7 +844,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       } else if (New->getType()->isVoidTy()) {
         // Our return value has uses, but they will get removed later on.
         // Replace by null for now.
-        Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+        if (!Call->getType()->isX86_MMXTy())
+          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
       } else {
         assert(RetTy->isStructTy() &&
                "Return type changed, but not into a void. The old return type"
@@ -854,7 +908,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     } else {
       // If this argument is dead, replace any uses of it with null constants
       // (these are guaranteed to become unused later on).
-      I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+      if (!I->getType()->isX86_MMXTy())
+        I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
     }
 
   // If we change the return value of the function we must rewrite any return
@@ -935,5 +990,14 @@ bool DAE::runOnModule(Module &M) {
     Function *F = I++;
     Changed |= RemoveDeadStuffFromFunction(F);
   }
+
+  // Finally, look for any unused parameters in functions with non-local
+  // linkage and replace the passed in parameters with undef.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function& F = *I;
+
+    Changed |= RemoveDeadArgumentsFromCallers(F);
+  }
+
   return Changed;
 }
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 5dc50c5bef32..a5099313b48c 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -26,7 +26,9 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
 namespace {
   struct DTE : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    DTE() : ModulePass(ID) {}
+    DTE() : ModulePass(ID) {
+      initializeDTEPass(*PassRegistry::getPassRegistry());
+    }
 
     // doPassInitialization - For this pass, it removes global symbol table
     // entries for primitive types.  These are never used for linking in GCC and
@@ -45,7 +47,10 @@ namespace {
 }
 
 char DTE::ID = 0;
-INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
+INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
 
 ModulePass *llvm::createDeadTypeEliminationPass() {
   return new DTE();
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 45c5fe76ba7c..9d432de9fa7b 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -50,24 +50,22 @@ namespace {
 
       // Visit the GlobalVariables.
       for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-           I != E; ++I)
-        if (!I->isDeclaration()) {
-          if (I->hasLocalLinkage())
-            I->setVisibility(GlobalValue::HiddenVisibility);
-          I->setLinkage(GlobalValue::ExternalLinkage);
-          if (deleteStuff == Named.count(I))
-            I->setInitializer(0);
-        }
+           I != E; ++I) {
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+          I->setInitializer(0);
+      }
 
       // Visit the Functions.
-      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-        if (!I->isDeclaration()) {
-          if (I->hasLocalLinkage())
-            I->setVisibility(GlobalValue::HiddenVisibility);
-          I->setLinkage(GlobalValue::ExternalLinkage);
-          if (deleteStuff == Named.count(I))
-            I->deleteBody();
-        }
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+          I->deleteBody();
+      }
 
       return true;
     }
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 6165ba023f73..95decec0f874 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,10 +23,10 @@
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
@@ -41,7 +41,9 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
 namespace {
   struct FunctionAttrs : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
-    FunctionAttrs() : CallGraphSCCPass(ID) {}
+    FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+      initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
     bool runOnSCC(CallGraphSCC &SCC);
@@ -61,67 +63,25 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
-    bool PointsToLocalMemory(Value *V);
+  private:
+    AliasAnalysis *AA;
   };
 }
 
 char FunctionAttrs::ID = 0;
-INITIALIZE_PASS(FunctionAttrs, "functionattrs",
-                "Deduce function attributes", false, false);
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
 
 Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
 
 
-/// PointsToLocalMemory - Returns whether the given pointer value points to
-/// memory that is local to the function.  Global constants are considered
-/// local to all functions.
-bool FunctionAttrs::PointsToLocalMemory(Value *V) {
-  SmallVector<Value*, 16> Worklist;
-  unsigned MaxLookup = 8;
-
-  Worklist.push_back(V);
-
-  do {
-    V = Worklist.pop_back_val()->getUnderlyingObject();
-
-    // An alloca instruction defines local memory.
-    if (isa<AllocaInst>(V))
-      continue;
-
-    // A global constant counts as local memory for our purposes.
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
-      if (!GV->isConstant())
-        return false;
-      continue;
-    }
-
-    // If both select values point to local memory, then so does the select.
-    if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
-      Worklist.push_back(SI->getTrueValue());
-      Worklist.push_back(SI->getFalseValue());
-      continue;
-    }
-
-    // If all values incoming to a phi node point to local memory, then so does
-    // the phi.
-    if (PHINode *PN = dyn_cast<PHINode>(V)) {
-      // Don't bother inspecting phi nodes with many operands.
-      if (PN->getNumIncomingValues() > MaxLookup)
-        return false;
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Worklist.push_back(PN->getIncomingValue(i));
-      continue;
-    }
-
-    return false;
-  } while (!Worklist.empty() && --MaxLookup);
-
-  return Worklist.empty();
-}
-
 /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
 bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
   SmallPtrSet<Function*, 8> SCCNodes;
@@ -141,14 +101,15 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
       // External node - may write memory.  Just give up.
       return false;
 
-    if (F->doesNotAccessMemory())
+    AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+    if (MRB == AliasAnalysis::DoesNotAccessMemory)
       // Already perfect!
       continue;
 
     // Definitions with weak linkage may be overridden at linktime with
     // something that writes memory, so treat them like declarations.
     if (F->isDeclaration() || F->mayBeOverridden()) {
-      if (!F->onlyReadsMemory())
+      if (!AliasAnalysis::onlyReadsMemory(MRB))
         // May write memory.  Just give up.
         return false;
 
@@ -163,32 +124,62 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
       // Some instructions can be ignored even if they read or write memory.
       // Detect these now, skipping to the next instruction if one is found.
       CallSite CS(cast<Value>(I));
-      if (CS && CS.getCalledFunction()) {
+      if (CS) {
         // Ignore calls to functions in the same SCC.
-        if (SCCNodes.count(CS.getCalledFunction()))
+        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
           continue;
-        // Ignore intrinsics that only access local memory.
-        if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
-          if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
-              AliasAnalysis::AccessesArguments) {
-            // Check that all pointer arguments point to local memory.
+        AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+        // If the call doesn't access arbitrary memory, we may be able to
+        // figure out something.
+        if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+          // If the call does access argument pointees, check each argument.
+          if (AliasAnalysis::doesAccessArgPointees(MRB))
+            // Check whether all pointer arguments point to local memory, and
+            // ignore calls that only access local memory.
             for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
                  CI != CE; ++CI) {
               Value *Arg = *CI;
-              if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
-                // Writes memory.  Just give up.
-                return false;
+              if (Arg->getType()->isPointerTy()) {
+                AliasAnalysis::Location Loc(Arg,
+                                            AliasAnalysis::UnknownSize,
+                                            I->getMetadata(LLVMContext::MD_tbaa));
+                if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+                  if (MRB & AliasAnalysis::Mod)
+                    // Writes non-local memory.  Give up.
+                    return false;
+                  if (MRB & AliasAnalysis::Ref)
+                    // Ok, it reads non-local memory.
+                    ReadsMemory = true;
+                }
+              }
             }
-            // Only reads and writes local memory.
-            continue;
-          }
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        // Ignore loads from local memory.
-        if (PointsToLocalMemory(LI->getPointerOperand()))
           continue;
+        }
+        // The call could access any memory. If that includes writes, give up.
+        if (MRB & AliasAnalysis::Mod)
+          return false;
+        // If it reads, note it.
+        if (MRB & AliasAnalysis::Ref)
+          ReadsMemory = true;
+        continue;
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        // Ignore non-volatile loads from local memory.
+        if (!LI->isVolatile()) {
+          AliasAnalysis::Location Loc = AA->getLocation(LI);
+          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+            continue;
+        }
       } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        // Ignore stores to local memory.
-        if (PointsToLocalMemory(SI->getPointerOperand()))
+        // Ignore non-volatile stores to local memory.
+        if (!SI->isVolatile()) {
+          AliasAnalysis::Location Loc = AA->getLocation(SI);
+          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+            continue;
+        }
+      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+        // Ignore vaargs on local memory.
+        AliasAnalysis::Location Loc = AA->getLocation(VI);
+        if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
           continue;
       }
 
@@ -198,10 +189,6 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
         // Writes memory.  Just give up.
         return false;
 
-      if (isMalloc(I))
-        // malloc claims not to write memory!  PR3754.
-        return false;
-
       // If this instruction may read memory, remember that.
       ReadsMemory |= I->mayReadFromMemory();
     }
@@ -384,6 +371,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
 }
 
 bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+  AA = &getAnalysis<AliasAnalysis>();
+
   bool Changed = AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index aa18601b9aec..2b427aa6a4e6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -31,7 +31,9 @@ STATISTIC(NumVariables, "Number of global variables removed");
 namespace {
   struct GlobalDCE : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    GlobalDCE() : ModulePass(ID) {}
+    GlobalDCE() : ModulePass(ID) {
+      initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+    }
 
     // run - Do the GlobalDCE pass on the specified module, optionally updating
     // the specified callgraph to reflect the changes.
@@ -52,7 +54,7 @@ namespace {
 
 char GlobalDCE::ID = 0;
 INITIALIZE_PASS(GlobalDCE, "globaldce",
-                "Dead Global Elimination", false, false);
+                "Dead Global Elimination", false, false)
 
 ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a77af549caa1..d4cb71272f76 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -40,6 +40,7 @@
 using namespace llvm;
 
 STATISTIC(NumMarked    , "Number of globals marked constant");
+STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
 STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
@@ -55,11 +56,14 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
 
 namespace {
+  struct GlobalStatus;
   struct GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     }
     static char ID; // Pass identification, replacement for typeid
-    GlobalOpt() : ModulePass(ID) {}
+    GlobalOpt() : ModulePass(ID) {
+      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M);
 
@@ -69,13 +73,16 @@ namespace {
     bool OptimizeGlobalVars(Module &M);
     bool OptimizeGlobalAliases(Module &M);
     bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
-    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+                               const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                               const GlobalStatus &GS);
   };
 }
 
 char GlobalOpt::ID = 0;
 INITIALIZE_PASS(GlobalOpt, "globalopt",
-                "Global Variable Optimizer", false, false);
+                "Global Variable Optimizer", false, false)
 
 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
 
@@ -85,6 +92,9 @@ namespace {
 /// about it.  If we find out that the address of the global is taken, none of
 /// this info will be accurate.
 struct GlobalStatus {
+  /// isCompared - True if the global's address is used in a comparison.
+  bool isCompared;
+
   /// isLoaded - True if the global is ever loaded.  If the global isn't ever
   /// loaded it can be deleted.
   bool isLoaded;
@@ -129,10 +139,11 @@ struct GlobalStatus {
 
   /// HasPHIUser - Set to true if this global has a user that is a PHI node.
   bool HasPHIUser;
-  
-  GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
-                   AccessingFunction(0), HasMultipleAccessingFunctions(false),
-                   HasNonInstructionUser(false), HasPHIUser(false) {}
+
+  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+                   StoredOnceValue(0), AccessingFunction(0),
+                   HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+                   HasPHIUser(false) {}
 };
 
 }
@@ -165,6 +176,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
     const User *U = *UI;
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
       GS.HasNonInstructionUser = true;
+      
+      // If the result of the constantexpr isn't pointer type, then we won't
+      // know to expect it in various places.  Just reject early.
+      if (!isa<PointerType>(CE->getType())) return true;
+      
       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
       if (!GS.HasMultipleAccessingFunctions) {
@@ -221,7 +237,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
         GS.HasPHIUser = true;
       } else if (isa<CmpInst>(I)) {
-        // Nothing to analyse.
+        GS.isCompared = true;
       } else if (isa<MemTransferInst>(I)) {
         const MemTransferInst *MTI = cast<MemTransferInst>(I);
         if (MTI->getArgOperand(0) == V)
@@ -308,7 +324,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
         Changed |= CleanupConstantGlobalUsers(CE, SubInit);
-      } else if (CE->getOpcode() == Instruction::BitCast && 
+      } else if (CE->getOpcode() == Instruction::BitCast &&
                  CE->getType()->isPointerTy()) {
         // Pointer cast, delete any stores and memsets to the global.
         Changed |= CleanupConstantGlobalUsers(CE, 0);
@@ -324,7 +340,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       // and will invalidate our notion of what Init is.
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
-        ConstantExpr *CE = 
+        ConstantExpr *CE =
           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -361,7 +377,7 @@ static bool isSafeSROAElementUse(Value *V) {
   // We might have a dead and dangling constant hanging off of here.
   if (Constant *C = dyn_cast<Constant>(V))
     return SafeToDestroyConstant(C);
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
 
@@ -371,15 +387,15 @@ static bool isSafeSROAElementUse(Value *V) {
   // Stores *to* the pointer are ok.
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getOperand(0) != V;
-    
+
   // Otherwise, it must be a GEP.
   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
   if (GEPI == 0) return false;
-  
+
   if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
       !cast<Constant>(GEPI->getOperand(1))->isNullValue())
     return false;
-  
+
   for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
        I != E; ++I)
     if (!isSafeSROAElementUse(*I))
@@ -393,11 +409,11 @@ static bool isSafeSROAElementUse(Value *V) {
 ///
 static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
   // The user of the global must be a GEP Inst or a ConstantExpr GEP.
-  if (!isa<GetElementPtrInst>(U) && 
-      (!isa<ConstantExpr>(U) || 
+  if (!isa<GetElementPtrInst>(U) &&
+      (!isa<ConstantExpr>(U) ||
        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
     return false;
-  
+
   // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
   // don't like < 3 operand CE's, and we don't like non-constant integer
   // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
@@ -409,18 +425,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
 
   gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
   ++GEPI;  // Skip over the pointer index.
-  
+
   // If this is a use of an array allocation, do a bit more checking for sanity.
   if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
     uint64_t NumElements = AT->getNumElements();
     ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-    
+
     // Check to make sure that index falls within the array.  If not,
     // something funny is going on, so we won't do the optimization.
     //
     if (Idx->getZExtValue() >= NumElements)
       return false;
-      
+
     // We cannot scalar repl this level of the array unless any array
     // sub-indices are in-range constants.  In particular, consider:
     // A[0][i].  We cannot know that the user isn't doing invalid things like
@@ -441,7 +457,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
                "Indexed GEP type is not array, vector, or struct!");
         continue;
       }
-      
+
       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
         return false;
@@ -465,7 +481,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
   }
   return true;
 }
- 
+
 
 /// SRAGlobal - Perform scalar replacement of aggregates on the specified global
 /// variable.  This opens the door for other optimizations by exposing the
@@ -476,7 +492,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
   // Make sure this global only has simple uses that we can SRA.
   if (!GlobalUsersSafeToSRA(GV))
     return 0;
-  
+
   assert(GV->hasLocalLinkage() && !GV->isConstant());
   Constant *Init = GV->getInitializer();
   const Type *Ty = Init->getType();
@@ -488,7 +504,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
   unsigned StartAlignment = GV->getAlignment();
   if (StartAlignment == 0)
     StartAlignment = TD.getABITypeAlignment(GV->getType());
-   
+
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
     NewGlobals.reserve(STy->getNumElements());
     const StructLayout &Layout = *TD.getStructLayout(STy);
@@ -503,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
                                               GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
-      
+
       // Calculate the known alignment of the field.  If the original aggregate
       // had 256 byte alignment for example, something might depend on that:
       // propagate info to each field.
@@ -522,7 +538,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     if (NumElements > 16 && GV->hasNUsesOrMore(16))
       return 0; // It's not worth it.
     NewGlobals.reserve(NumElements);
-    
+
     uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
     for (unsigned i = 0, e = NumElements; i != e; ++i) {
@@ -537,7 +553,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
                                               GV->getType()->getAddressSpace());
       Globals.insert(GV, NGV);
       NewGlobals.push_back(NGV);
-      
+
       // Calculate the known alignment of the field.  If the original aggregate
       // had 256 byte alignment for example, something might depend on that:
       // propagate info to each field.
@@ -549,7 +565,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
 
   if (NewGlobals.empty())
     return 0;
-  
+
   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -615,7 +631,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
 }
 
 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null.  PHIs keeps track of any 
+/// value will trap if the value is dynamically null.  PHIs keeps track of any
 /// phi nodes we've seen to avoid reprocessing them.
 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
@@ -757,7 +773,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   // Keep track of whether we are able to remove all the uses of the global
   // other than the store that defines it.
   bool AllNonStoreUsesGone = true;
-  
+
   // Replace all uses of loads with uses of uses of the stored value.
   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
     User *GlobalUser = *GUI++;
@@ -830,7 +846,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      ConstantInt *NElements,
                                                      TargetData* TD) {
   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
-  
+
   const Type *GlobalType;
   if (NElements->getZExtValue() == 1)
     GlobalType = AllocTy;
@@ -840,14 +856,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
-  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
                                              GlobalType, false,
                                              GlobalValue::InternalLinkage,
                                              UndefValue::get(GlobalType),
                                              GV->getName()+".body",
                                              GV,
                                              GV->isThreadLocal());
-  
+
   // If there are bitcast users of the malloc (which is typical, usually we have
   // a malloc + bitcast) then replace them with uses of the new global.  Update
   // other users to use the global as well.
@@ -867,10 +883,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
       User->replaceUsesOfWith(CI, TheBC);
     }
   }
-  
+
   Constant *RepValue = NewGV;
   if (NewGV->getType() != GV->getType()->getElementType())
-    RepValue = ConstantExpr::getBitCast(RepValue, 
+    RepValue = ConstantExpr::getBitCast(RepValue,
                                         GV->getType()->getElementType());
 
   // If there is a comparison against null, we will insert a global bool to
@@ -890,7 +906,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
       SI->eraseFromParent();
       continue;
     }
-    
+
     LoadInst *LI = cast<LoadInst>(GV->use_back());
     while (!LI->use_empty()) {
       Use &LoadUse = LI->use_begin().getUse();
@@ -898,7 +914,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
         LoadUse = RepValue;
         continue;
       }
-      
+
       ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
       // Replace the cmp X, 0 with a use of the bool value.
       Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
@@ -963,20 +979,20 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
       continue; // Fine, ignore.
     }
-    
+
     if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
         return false;  // Storing the pointer itself... bad.
       continue; // Otherwise, storing through it, or storing into GV... fine.
     }
-    
+
     // Must index into the array and into the struct.
     if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
         return false;
       continue;
     }
-    
+
     if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
       // cycles.
@@ -985,13 +1001,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
           return false;
       continue;
     }
-    
+
     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
         return false;
       continue;
     }
-    
+
     return false;
   }
   return true;
@@ -1000,9 +1016,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
 /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
 /// somewhere.  Transform all uses of the allocation into loads from the
 /// global and uses of the resultant pointer.  Further, delete the store into
-/// GV.  This assumes that these value pass the 
+/// GV.  This assumes that these value pass the
 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
-static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
                                           GlobalVariable *GV) {
   while (!Alloc->use_empty()) {
     Instruction *U = cast<Instruction>(*Alloc->use_begin());
@@ -1035,7 +1051,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
             continue;
           }
     }
-      
+
     // Insert a load from the global, and use it instead of the malloc.
     Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
     U->replaceUsesOfWith(Alloc, NL);
@@ -1053,24 +1069,24 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
        ++UI) {
     const Instruction *User = cast<Instruction>(*UI);
-    
+
     // Comparison against null is ok.
     if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return false;
       continue;
     }
-    
+
     // getelementptr is also ok, but only a simple form.
     if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
       // Must index into the array and into the struct.
       if (GEPI->getNumOperands() < 3)
         return false;
-      
+
       // Otherwise the GEP is ok.
       continue;
     }
-    
+
     if (const PHINode *PN = dyn_cast<PHINode>(User)) {
       if (!LoadUsingPHIsPerLoad.insert(PN))
         // This means some phi nodes are dependent on each other.
@@ -1079,19 +1095,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
       if (!LoadUsingPHIs.insert(PN))
         // If we have already analyzed this PHI, then it is safe.
         continue;
-      
+
       // Make sure all uses of the PHI are simple enough to transform.
       if (!LoadUsesSimpleEnoughForHeapSRA(PN,
                                           LoadUsingPHIs, LoadUsingPHIsPerLoad))
         return false;
-      
+
       continue;
     }
-    
+
     // Otherwise we don't know what this is, not ok.
     return false;
   }
-  
+
   return true;
 }
 
@@ -1110,10 +1126,10 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
         return false;
       LoadUsingPHIsPerLoad.clear();
     }
-  
+
   // If we reach here, we know that all uses of the loads and transitive uses
   // (through PHI nodes) are simple enough to transform.  However, we don't know
-  // that all inputs the to the PHI nodes are in the same equivalence sets. 
+  // that all inputs the to the PHI nodes are in the same equivalence sets.
   // Check to verify that all operands of the PHIs are either PHIS that can be
   // transformed, loads from GV, or MI itself.
   for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
@@ -1121,29 +1137,29 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
     const PHINode *PN = *I;
     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
       Value *InVal = PN->getIncomingValue(op);
-      
+
       // PHI of the stored value itself is ok.
       if (InVal == StoredVal) continue;
-      
+
       if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
         // One of the PHIs in our set is (optimistically) ok.
         if (LoadUsingPHIs.count(InPN))
           continue;
         return false;
       }
-      
+
       // Load from GV is ok.
       if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
         if (LI->getOperand(0) == GV)
           continue;
-      
+
       // UNDEF? NULL?
-      
+
       // Anything else is rejected.
       return false;
     }
   }
-  
+
   return true;
 }
 
@@ -1151,15 +1167,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
-  
+
   if (FieldNo >= FieldVals.size())
     FieldVals.resize(FieldNo+1);
-  
+
   // If we already have this value, just reuse the previously scalarized
   // version.
   if (Value *FieldVal = FieldVals[FieldNo])
     return FieldVal;
-  
+
   // Depending on what instruction this is, we have several cases.
   Value *Result;
   if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
@@ -1172,9 +1188,9 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
-    const StructType *ST = 
+    const StructType *ST =
       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
-    
+
     Result =
      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
                      PN->getName()+".f"+Twine(FieldNo), PN);
@@ -1183,13 +1199,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
     llvm_unreachable("Unknown usable value");
     Result = 0;
   }
-  
+
   return FieldVals[FieldNo] = Result;
 }
 
 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
 /// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   // If this is a comparison against null, handle it.
@@ -1199,30 +1215,30 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
     // field.
     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
                                    InsertedScalarizedValues, PHIsToRewrite);
-    
+
     Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
-                              Constant::getNullValue(NPtr->getType()), 
+                              Constant::getNullValue(NPtr->getType()),
                               SCI->getName());
     SCI->replaceAllUsesWith(New);
     SCI->eraseFromParent();
     return;
   }
-  
+
   // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
     assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
            && "Unexpected GEPI!");
-  
+
     // Load the pointer for this field.
     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
                                      InsertedScalarizedValues, PHIsToRewrite);
-    
+
     // Create the new GEP idx vector.
     SmallVector<Value*, 8> GEPIdx;
     GEPIdx.push_back(GEPI->getOperand(1));
     GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
-    
+
     Value *NGEPI = GetElementPtrInst::Create(NewPtr,
                                              GEPIdx.begin(), GEPIdx.end(),
                                              GEPI->getName(), GEPI);
@@ -1243,7 +1259,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   tie(InsertPos, Inserted) =
     InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
   if (!Inserted) return;
-  
+
   // If this is the first time we've seen this PHI, recursively process all
   // users.
   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
@@ -1256,7 +1272,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
 /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
 /// use FieldGlobals instead.  All uses of loaded values satisfy
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
-static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
@@ -1264,7 +1280,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
     Instruction *User = cast<Instruction>(*UI++);
     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   }
-  
+
   if (Load->use_empty()) {
     Load->eraseFromParent();
     InsertedScalarizedValues.erase(Load);
@@ -1289,11 +1305,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   // new mallocs at the same place as CI, and N globals.
   std::vector<Value*> FieldGlobals;
   std::vector<Value*> FieldMallocs;
-  
+
   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
     const Type *FieldTy = STy->getElementType(FieldNo);
     const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
-    
+
     GlobalVariable *NGV =
       new GlobalVariable(*GV->getParent(),
                          PFieldTy, false, GlobalValue::InternalLinkage,
@@ -1301,7 +1317,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
                          GV->getName() + ".f" + Twine(FieldNo), GV,
                          GV->isThreadLocal());
     FieldGlobals.push_back(NGV);
-    
+
     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
     if (const StructType *ST = dyn_cast<StructType>(FieldTy))
       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
@@ -1313,7 +1329,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, CI);
   }
-  
+
   // The tricky aspect of this transformation is handling the case when malloc
   // fails.  In the original code, malloc failing would set the result pointer
   // of malloc to null.  In this case, some mallocs could succeed and others
@@ -1340,23 +1356,23 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   // Split the basic block at the old malloc.
   BasicBlock *OrigBB = CI->getParent();
   BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
-  
+
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
   BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
                                                 "malloc_ret_null",
                                                 OrigBB->getParent());
-  
+
   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
   // branch on RunningOr.
   OrigBB->getTerminator()->eraseFromParent();
   BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-  
+
   // Within the NullPtrBlock, we need to emit a comparison and branch for each
   // pointer, because some may be null while others are not.
   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
-    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
                               Constant::getNullValue(GVVal->getType()),
                               "tmp");
     BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1371,10 +1387,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
                   FreeBlock);
     BranchInst::Create(NextBlock, FreeBlock);
-    
+
     NullPtrBlock = NextBlock;
   }
-  
+
   BranchInst::Create(ContBB, NullPtrBlock);
 
   // CI is no longer needed, remove it.
@@ -1385,25 +1401,25 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   /// inserted for a given load.
   DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
   InsertedScalarizedValues[GV] = FieldGlobals;
-  
+
   std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-  
+
   // Okay, the malloc site is completely handled.  All of the uses of GV are now
   // loads, and all uses of those loads are simple.  Rewrite them to use loads
   // of the per-field globals instead.
   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
     Instruction *User = cast<Instruction>(*UI++);
-    
+
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
       RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
       continue;
     }
-    
+
     // Must be a store of null.
     StoreInst *SI = cast<StoreInst>(User);
     assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
            "Unexpected heap-sra user!");
-    
+
     // Insert a store of null into each global.
     for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
       const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
@@ -1430,7 +1446,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
     }
   }
-  
+
   // Drop all inter-phi links and any loads that made it this far.
   for (DenseMap<Value*, std::vector<Value*> >::iterator
        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1440,7 +1456,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
       LI->dropAllReferences();
   }
-  
+
   // Delete all the phis and loads now that inter-references are dead.
   for (DenseMap<Value*, std::vector<Value*> >::iterator
        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1450,7 +1466,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
       LI->eraseFromParent();
   }
-  
+
   // The old global is now dead, remove it.
   GV->eraseFromParent();
 
@@ -1468,7 +1484,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                TargetData *TD) {
   if (!TD)
     return false;
-  
+
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1508,7 +1524,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
       GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
       return true;
     }
-  
+
   // If the allocation is an array of structures, consider transforming this
   // into multiple malloc'd arrays, one for each field.  This is basically
   // SRoA for malloc'd memory.
@@ -1544,13 +1560,13 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
       CI = dyn_cast<BitCastInst>(Malloc) ?
         extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
     }
-      
+
     GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
     return true;
   }
-  
+
   return false;
-}  
+}
 
 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
 // that only one value (besides its initializer) is ever stored to the global.
@@ -1568,7 +1584,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
       GV->getInitializer()->isNullValue()) {
     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
       if (GV->getInitializer()->getType() != SOVC->getType())
-        SOVC = 
+        SOVC =
          ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
 
       // Optimize away any trapping uses of the loaded value.
@@ -1576,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
         return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
       const Type* MallocType = getMallocAllocatedType(CI);
-      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 
+      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
                                                            GVI, TD))
         return true;
     }
@@ -1591,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
 /// whenever it is used.  This exposes the values to other scalar optimizations.
 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   const Type *GVElType = GV->getType()->getElementType();
-  
+
   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
   // an FP value, pointer or vector, don't do this optimization because a select
   // between them is very expensive and unlikely to lead to later
@@ -1611,11 +1627,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   }
 
   DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
-  
+
   // Create the new global, initializing it to false.
   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
                                              false,
-                                             GlobalValue::InternalLinkage, 
+                                             GlobalValue::InternalLinkage,
                                         ConstantInt::getFalse(GV->getContext()),
                                              GV->getName()+".b",
                                              GV->isThreadLocal());
@@ -1684,10 +1700,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
 
 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
 /// it if possible.  If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
-                                      Module::global_iterator &GVI) {
-  SmallPtrSet<const PHINode*, 16> PHIUsers;
-  GlobalStatus GS;
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+                              Module::global_iterator &GVI) {
+  if (!GV->hasLocalLinkage())
+    return false;
+
+  // Do more involved optimizations if the global is internal.
   GV->removeDeadConstantUsers();
 
   if (GV->use_empty()) {
@@ -1697,140 +1715,139 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     return true;
   }
 
-  if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
-#if 0
-    DEBUG(dbgs() << "Global: " << *GV);
-    DEBUG(dbgs() << "  isLoaded = " << GS.isLoaded << "\n");
-    DEBUG(dbgs() << "  StoredType = ");
-    switch (GS.StoredType) {
-    case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break;
-    case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n");
-                                            break;
-    case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break;
-    case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break;
-    }
-    if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
-      DEBUG(dbgs() << "  StoredOnceValue = " << *GS.StoredOnceValue << "\n");
-    if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
-      DEBUG(dbgs() << "  AccessingFunction = "
-                   << GS.AccessingFunction->getName() << "\n");
-    DEBUG(dbgs() << "  HasMultipleAccessingFunctions =  "
-                 << GS.HasMultipleAccessingFunctions << "\n");
-    DEBUG(dbgs() << "  HasNonInstructionUser = " 
-                 << GS.HasNonInstructionUser<<"\n");
-    DEBUG(dbgs() << "\n");
-#endif
-    
-    // If this is a first class global and has only one accessing function
-    // and this function is main (which we know is not recursive we can make
-    // this global a local variable) we replace the global with a local alloca
-    // in this function.
-    //
-    // NOTE: It doesn't make sense to promote non single-value types since we
-    // are just replacing static memory to stack memory.
-    //
-    // If the global is in different address space, don't bring it to stack.
-    if (!GS.HasMultipleAccessingFunctions &&
-        GS.AccessingFunction && !GS.HasNonInstructionUser &&
-        GV->getType()->getElementType()->isSingleValueType() &&
-        GS.AccessingFunction->getName() == "main" &&
-        GS.AccessingFunction->hasExternalLinkage() &&
-        GV->getType()->getAddressSpace() == 0) {
-      DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
-      Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
-                                                     ->getEntryBlock().begin());
-      const Type* ElemTy = GV->getType()->getElementType();
-      // FIXME: Pass Global's alignment when globals have alignment
-      AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
-      if (!isa<UndefValue>(GV->getInitializer()))
-        new StoreInst(GV->getInitializer(), Alloca, &FirstI);
-
-      GV->replaceAllUsesWith(Alloca);
+  SmallPtrSet<const PHINode*, 16> PHIUsers;
+  GlobalStatus GS;
+
+  if (AnalyzeGlobal(GV, GS, PHIUsers))
+    return false;
+
+  if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+    GV->setUnnamedAddr(true);
+    NumUnnamed++;
+  }
+
+  if (GV->isConstant() || !GV->hasInitializer())
+    return false;
+
+  return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible.  If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+                                      Module::global_iterator &GVI,
+                                      const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                                      const GlobalStatus &GS) {
+  // If this is a first class global and has only one accessing function
+  // and this function is main (which we know is not recursive we can make
+  // this global a local variable) we replace the global with a local alloca
+  // in this function.
+  //
+  // NOTE: It doesn't make sense to promote non single-value types since we
+  // are just replacing static memory to stack memory.
+  //
+  // If the global is in different address space, don't bring it to stack.
+  if (!GS.HasMultipleAccessingFunctions &&
+      GS.AccessingFunction && !GS.HasNonInstructionUser &&
+      GV->getType()->getElementType()->isSingleValueType() &&
+      GS.AccessingFunction->getName() == "main" &&
+      GS.AccessingFunction->hasExternalLinkage() &&
+      GV->getType()->getAddressSpace() == 0) {
+    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+    Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+                                                   ->getEntryBlock().begin());
+    const Type* ElemTy = GV->getType()->getElementType();
+    // FIXME: Pass Global's alignment when globals have alignment
+    AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+    if (!isa<UndefValue>(GV->getInitializer()))
+      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+    GV->replaceAllUsesWith(Alloca);
+    GV->eraseFromParent();
+    ++NumLocalized;
+    return true;
+  }
+
+  // If the global is never loaded (but may be stored to), it is dead.
+  // Delete it now.
+  if (!GS.isLoaded) {
+    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+    // Delete any stores we can find to the global.  We may not be able to
+    // make it completely dead though.
+    bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+    // If the global is dead now, delete it.
+    if (GV->use_empty()) {
       GV->eraseFromParent();
-      ++NumLocalized;
-      return true;
+      ++NumDeleted;
+      Changed = true;
     }
-    
-    // If the global is never loaded (but may be stored to), it is dead.
-    // Delete it now.
-    if (!GS.isLoaded) {
-      DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
-
-      // Delete any stores we can find to the global.  We may not be able to
-      // make it completely dead though.
-      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
-
-      // If the global is dead now, delete it.
-      if (GV->use_empty()) {
-        GV->eraseFromParent();
-        ++NumDeleted;
-        Changed = true;
-      }
-      return Changed;
+    return Changed;
 
-    } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
-      DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
-      GV->setConstant(true);
+  } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+    GV->setConstant(true);
 
-      // Clean up any obviously simplifiable users now.
-      CleanupConstantGlobalUsers(GV, GV->getInitializer());
+    // Clean up any obviously simplifiable users now.
+    CleanupConstantGlobalUsers(GV, GV->getInitializer());
 
-      // If the global is dead now, just nuke it.
-      if (GV->use_empty()) {
-        DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
-                     << "all users and delete global!\n");
-        GV->eraseFromParent();
-        ++NumDeleted;
+    // If the global is dead now, just nuke it.
+    if (GV->use_empty()) {
+      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
+            << "all users and delete global!\n");
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+
+    ++NumMarked;
+    return true;
+  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+    if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+        GVI = FirstNewGV;  // Don't skip the newly produced globals!
+        return true;
+      }
+  } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+    // If the initial value for the global was an undef value, and if only
+    // one other value was stored into it, we can just change the
+    // initializer to be the stored value, then delete all stores to the
+    // global.  This allows us to mark it constant.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (isa<UndefValue>(GV->getInitializer())) {
+        // Change the initial value here.
+        GV->setInitializer(SOVConstant);
+
+        // Clean up any obviously simplifiable users now.
+        CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+        if (GV->use_empty()) {
+          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
+                << "simplify all users and delete global!\n");
+          GV->eraseFromParent();
+          ++NumDeleted;
+        } else {
+          GVI = GV;
+        }
+        ++NumSubstitute;
+        return true;
       }
 
-      ++NumMarked;
+    // Try to optimize globals based on the knowledge that only one value
+    // (besides its initializer) is ever stored to the global.
+    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+                                 getAnalysisIfAvailable<TargetData>()))
       return true;
-    } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
-      if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
-        if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
-          GVI = FirstNewGV;  // Don't skip the newly produced globals!
-          return true;
-        }
-    } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
-      // If the initial value for the global was an undef value, and if only
-      // one other value was stored into it, we can just change the
-      // initializer to be the stored value, then delete all stores to the
-      // global.  This allows us to mark it constant.
-      if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
-        if (isa<UndefValue>(GV->getInitializer())) {
-          // Change the initial value here.
-          GV->setInitializer(SOVConstant);
-
-          // Clean up any obviously simplifiable users now.
-          CleanupConstantGlobalUsers(GV, GV->getInitializer());
-
-          if (GV->use_empty()) {
-            DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
-                         << "simplify all users and delete global!\n");
-            GV->eraseFromParent();
-            ++NumDeleted;
-          } else {
-            GVI = GV;
-          }
-          ++NumSubstitute;
-          return true;
-        }
 
-      // Try to optimize globals based on the knowledge that only one value
-      // (besides its initializer) is ever stored to the global.
-      if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
-                                   getAnalysisIfAvailable<TargetData>()))
+    // Otherwise, if the global was not a boolean, we can shrink it to be a
+    // boolean.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+        ++NumShrunkToBool;
         return true;
-
-      // Otherwise, if the global was not a boolean, we can shrink it to be a
-      // boolean.
-      if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
-        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
-          ++NumShrunkToBool;
-          return true;
-        }
-    }
+      }
   }
+
   return false;
 }
 
@@ -1917,10 +1934,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
         if (New && New != CE)
           GV->setInitializer(New);
       }
-    // Do more involved optimizations if the global is internal.
-    if (!GV->isConstant() && GV->hasLocalLinkage() &&
-        GV->hasInitializer())
-      Changed |= ProcessInternalGlobal(GV, GVI);
+
+    Changed |= ProcessGlobal(GV, GVI);
   }
   return Changed;
 }
@@ -1928,46 +1943,47 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
 /// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
 /// initializers have an init priority of 65535.
 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    if (I->getName() == "llvm.global_ctors") {
-      // Found it, verify it's an array of { int, void()* }.
-      const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType());
-      if (!ATy) return 0;
-      const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
-      if (!STy || STy->getNumElements() != 2 ||
-          !STy->getElementType(0)->isIntegerTy(32)) return 0;
-      const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
-      if (!PFTy) return 0;
-      const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
-      if (!FTy || !FTy->getReturnType()->isVoidTy() ||
-          FTy->isVarArg() || FTy->getNumParams() != 0)
-        return 0;
-      
-      // Verify that the initializer is simple enough for us to handle.
-      if (!I->hasDefinitiveInitializer()) return 0;
-      ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
-      if (!CA) return 0;
-      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
-        if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
-          if (isa<ConstantPointerNull>(CS->getOperand(1)))
-            continue;
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (GV == 0) return 0;
+  
+  // Found it, verify it's an array of { int, void()* }.
+  const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType());
+  if (!ATy) return 0;
+  const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+  if (!STy || STy->getNumElements() != 2 ||
+      !STy->getElementType(0)->isIntegerTy(32)) return 0;
+  const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
+  if (!PFTy) return 0;
+  const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
+  if (!FTy || !FTy->getReturnType()->isVoidTy() ||
+      FTy->isVarArg() || FTy->getNumParams() != 0)
+    return 0;
 
-          // Must have a function or null ptr.
-          if (!isa<Function>(CS->getOperand(1)))
-            return 0;
-          
-          // Init priority must be standard.
-          ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
-          if (!CI || CI->getZExtValue() != 65535)
-            return 0;
-        } else {
-          return 0;
-        }
-      
-      return I;
-    }
-  return 0;
+  // Verify that the initializer is simple enough for us to handle. We are
+  // only allowed to optimize the initializer if it is unique.
+  if (!GV->hasUniqueInitializer()) return 0;
+  
+  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) return 0;
+  
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    ConstantStruct *CS = dyn_cast<ConstantStruct>(*i);
+    if (CS == 0) return 0;
+    
+    if (isa<ConstantPointerNull>(CS->getOperand(1)))
+      continue;
+
+    // Must have a function or null ptr.
+    if (!isa<Function>(CS->getOperand(1)))
+      return 0;
+
+    // Init priority must be standard.
+    ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+    if (!CI || CI->getZExtValue() != 65535)
+      return 0;
+  }
+
+  return GV;
 }
 
 /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
@@ -1985,13 +2001,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 
 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
 /// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
                                           const std::vector<Function*> &Ctors) {
   // If we made a change, reassemble the initializer list.
   std::vector<Constant*> CSVals;
   CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
   CSVals.push_back(0);
-  
+
   // Create the new init list.
   std::vector<Constant*> CAList;
   for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
@@ -2007,26 +2023,26 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
     }
     CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
   }
-  
+
   // Create the array initializer.
   const Type *StructTy =
       cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
-  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 
+  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
                                                    CAList.size()), CAList);
-  
+
   // If we didn't change the number of elements, don't create a new GV.
   if (CA->getType() == GCL->getInitializer()->getType()) {
     GCL->setInitializer(CA);
     return GCL;
   }
-  
+
   // Create the new global and insert it next to the existing list.
   GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
                                            GCL->getLinkage(), CA, "",
                                            GCL->isThreadLocal());
   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   NGV->takeName(GCL);
-  
+
   // Nuke the old list, replacing any uses with the new one.
   if (!GCL->use_empty()) {
     Constant *V = NGV;
@@ -2035,7 +2051,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
     GCL->replaceAllUsesWith(V);
   }
   GCL->eraseFromParent();
-  
+
   if (Ctors.size())
     return NGV;
   else
@@ -2043,17 +2059,86 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
 }
 
 
-static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
-                        Value *V) {
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
   if (Constant *CV = dyn_cast<Constant>(V)) return CV;
   Constant *R = ComputedValues[V];
   assert(R && "Reference to an uncomputed value!");
   return R;
 }
 
+static inline bool 
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSet<Constant*, 8> &SimpleConstants);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator.  We don't want to generate something like:
+///   void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+                                   SmallPtrSet<Constant*, 8> &SimpleConstants) {
+  // Simple integer, undef, constant aggregate zero, global addresses, etc are
+  // all supported.
+  if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+      isa<GlobalValue>(C))
+    return true;
+  
+  // Aggregate values are safe if all their elements are.
+  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+      isa<ConstantVector>(C)) {
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      Constant *Op = cast<Constant>(C->getOperand(i));
+      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+        return false;
+    }
+    return true;
+  }
+  
+  // We don't know exactly what relocations are allowed in constant expressions,
+  // so we allow &global+constantoffset, which is safe and uniformly supported
+  // across targets.
+  ConstantExpr *CE = cast<ConstantExpr>(C);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+    // These casts are always fine if the casted value is.
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+      
+  // GEP is fine if it is simple + constant offset.
+  case Instruction::GetElementPtr:
+    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+      if (!isa<ConstantInt>(CE->getOperand(i)))
+        return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+      
+  case Instruction::Add:
+    // We allow simple+cst.
+    if (!isa<ConstantInt>(CE->getOperand(1)))
+      return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+  }
+  return false;
+}
+
+static inline bool 
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSet<Constant*, 8> &SimpleConstants) {
+  // If we already checked this constant, we win.
+  if (!SimpleConstants.insert(C)) return true;
+  // Check the constant.
+  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+}
+
+
 /// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand.  In particular, if it is a cast of something,
-/// we punt.  We basically just support direct accesses to globals and GEP's of
+/// enough for us to understand.  In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
 /// globals.  This should be kept up to date with CommitValueTo.
 static bool isSimpleEnoughPointerToCommit(Constant *C) {
   // Conservatively, avoid aggregate types. This is because we don't
@@ -2062,19 +2147,19 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
     return false;
 
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
-    // Do not allow weak/linkonce/dllimport/dllexport linkage or
+    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
     // external globals.
-    return GV->hasDefinitiveInitializer();
+    return GV->hasUniqueInitializer();
 
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     // Handle a constantexpr gep.
     if (CE->getOpcode() == Instruction::GetElementPtr &&
         isa<GlobalVariable>(CE->getOperand(0)) &&
         cast<GEPOperator>(CE)->isInBounds()) {
       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      // Do not allow weak/linkonce/dllimport/dllexport linkage or
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
       // external globals.
-      if (!GV->hasDefinitiveInitializer())
+      if (!GV->hasUniqueInitializer())
         return false;
 
       // The first index must be zero.
@@ -2087,7 +2172,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
         return false;
 
       return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+    
+    // A constantexpr bitcast from a pointer to another pointer is a no-op,
+    // and we know how to evaluate it by moving the bitcast from the pointer
+    // operand to the value operand.
+    } else if (CE->getOpcode() == Instruction::BitCast &&
+               isa<GlobalVariable>(CE->getOperand(0))) {
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
     }
+  }
+  
   return false;
 }
 
@@ -2101,7 +2197,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     assert(Val->getType() == Init->getType() && "Type mismatch!");
     return Val;
   }
-  
+
   std::vector<Constant*> Elts;
   if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
 
@@ -2119,13 +2215,13 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
       llvm_unreachable("This code is out of sync with "
              " ConstantFoldLoadThroughGEPConstantExpr");
     }
-    
+
     // Replace the element that we are supposed to.
     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
     unsigned Idx = CU->getZExtValue();
     assert(Idx < STy->getNumElements() && "Struct index out of range!");
     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
-    
+
     // Return the modified struct.
     return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
                                STy->isPacked());
@@ -2138,8 +2234,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
       NumElts = ATy->getNumElements();
     else
       NumElts = cast<VectorType>(InitTy)->getNumElements();
-    
-    
+
+
     // Break up the array into elements.
     if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
       for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -2154,16 +2250,15 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
              " ConstantFoldLoadThroughGEPConstantExpr");
       Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
     }
-    
+
     assert(CI->getZExtValue() < NumElts);
     Elts[CI->getZExtValue()] =
       EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
-    
+
     if (Init->getType()->isArrayTy())
       return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
-    else
-      return ConstantVector::get(&Elts[0], Elts.size());
-  }    
+    return ConstantVector::get(Elts);
+  }
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
@@ -2189,14 +2284,14 @@ static Constant *ComputeLoadResult(Constant *P,
   // is the most up-to-date.
   DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
   if (I != Memory.end()) return I->second;
- 
+
   // Access it.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
     if (GV->hasDefinitiveInitializer())
       return GV->getInitializer();
     return 0;
   }
-  
+
   // Handle a constantexpr getelementptr.
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
     if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -2216,17 +2311,19 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
                              const SmallVectorImpl<Constant*> &ActualArgs,
                              std::vector<Function*> &CallStack,
                              DenseMap<Constant*, Constant*> &MutatedMemory,
-                             std::vector<GlobalVariable*> &AllocaTmps) {
+                             std::vector<GlobalVariable*> &AllocaTmps,
+                             SmallPtrSet<Constant*, 8> &SimpleConstants,
+                             const TargetData *TD) {
   // Check to see if this function is already executing (recursion).  If so,
   // bail out.  TODO: we might want to accept limited recursion.
   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
     return false;
-  
+
   CallStack.push_back(F);
-  
+
   /// Values - As we compute SSA register values, we store their contents here.
   DenseMap<Value*, Constant*> Values;
-  
+
   // Initialize arguments to the incoming values specified.
   unsigned ArgNo = 0;
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
@@ -2237,21 +2334,65 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
   /// we can only evaluate any one basic block at most once.  This set keeps
   /// track of what we have executed so we can detect recursive cases etc.
   SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-  
+
   // CurInst - The current instruction we're evaluating.
   BasicBlock::iterator CurInst = F->begin()->begin();
-  
+
   // This is the main evaluation loop.
   while (1) {
     Constant *InstResult = 0;
-    
+
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
       if (SI->isVolatile()) return false;  // no volatile accesses.
       Constant *Ptr = getVal(Values, SI->getOperand(1));
       if (!isSimpleEnoughPointerToCommit(Ptr))
         // If this is too complex for us to commit, reject it.
         return false;
+      
       Constant *Val = getVal(Values, SI->getOperand(0));
+
+      // If this might be too difficult for the backend to handle (e.g. the addr
+      // of one global variable divided by another) then we can't commit it.
+      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+        return false;
+        
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+        if (CE->getOpcode() == Instruction::BitCast) {
+          // If we're evaluating a store through a bitcast, then we need
+          // to pull the bitcast off the pointer type and push it onto the
+          // stored value.
+          Ptr = CE->getOperand(0);
+          
+          const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+          
+          // In order to push the bitcast onto the stored value, a bitcast
+          // from NewTy to Val's type must be legal.  If it's not, we can try
+          // introspecting NewTy to find a legal conversion.
+          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+            // If NewTy is a struct, we can convert the pointer to the struct
+            // into a pointer to its first member.
+            // FIXME: This could be extended to support arrays as well.
+            if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+              NewTy = STy->getTypeAtIndex(0U);
+
+              const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+              Constant * const IdxList[] = {IdxZero, IdxZero};
+
+              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+            
+            // If we can't improve the situation by introspecting NewTy,
+            // we have to give up.
+            } else {
+              return 0;
+            }
+          }
+          
+          // If we found compatible types, go ahead and push the bitcast
+          // onto the stored value.
+          Val = ConstantExpr::getBitCast(Val, NewTy);
+        }
+          
       MutatedMemory[Ptr] = Val;
     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
       InstResult = ConstantExpr::get(BO->getOpcode(),
@@ -2290,7 +2431,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
                                               GlobalValue::InternalLinkage,
                                               UndefValue::get(Ty),
                                               AI->getName()));
-      InstResult = AllocaTmps.back();     
+      InstResult = AllocaTmps.back();
     } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
 
       // Debug info can safely be ignored here.
@@ -2324,11 +2465,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       } else {
         if (Callee->getFunctionType()->isVarArg())
           return false;
-        
+
         Constant *RetVal;
         // Execute the call, if successful, use the return value.
         if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
-                              MutatedMemory, AllocaTmps))
+                              MutatedMemory, AllocaTmps, SimpleConstants, TD))
           return false;
         InstResult = RetVal;
       }
@@ -2342,7 +2483,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
             dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
           if (!Cond) return false;  // Cannot determine.
 
-          NewBB = BI->getSuccessor(!Cond->getZExtValue());          
+          NewBB = BI->getSuccessor(!Cond->getZExtValue());
         }
       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
         ConstantInt *Val =
@@ -2358,20 +2499,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
         if (RI->getNumOperands())
           RetVal = getVal(Values, RI->getOperand(0));
-        
+
         CallStack.pop_back();  // return from fn.
         return true;  // We succeeded at evaluating this ctor!
       } else {
         // invoke, unwind, unreachable.
         return false;  // Cannot handle this terminator.
       }
-      
+
       // Okay, we succeeded in evaluating this control flow.  See if we have
       // executed the new block before.  If so, we have a looping function,
       // which we cannot evaluate in reasonable time.
       if (!ExecutedBlocks.insert(NewBB))
         return false;  // looped!
-      
+
       // Okay, we have never been in this block before.  Check to see if there
       // are any PHI nodes.  If so, evaluate them with information about where
       // we came from.
@@ -2387,10 +2528,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       // Did not know how to evaluate this!
       return false;
     }
-    
-    if (!CurInst->use_empty())
+
+    if (!CurInst->use_empty()) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+        InstResult = ConstantFoldConstantExpression(CE, TD);
+      
       Values[CurInst] = InstResult;
-    
+    }
+
     // Advance program counter.
     ++CurInst;
   }
@@ -2398,7 +2543,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
 
 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
 /// we can.  Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F) {
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
   /// MutatedMemory - For each store we execute, we update this map.  Loads
   /// check this to get the most up-to-date value.  If evaluation is successful,
   /// this state is committed to the process.
@@ -2408,17 +2553,23 @@ static bool EvaluateStaticConstructor(Function *F) {
   /// to represent its body.  This vector is needed so we can delete the
   /// temporary globals when we are done.
   std::vector<GlobalVariable*> AllocaTmps;
-  
+
   /// CallStack - This is used to detect recursion.  In pathological situations
   /// we could hit exponential behavior, but at least there is nothing
   /// unbounded.
   std::vector<Function*> CallStack;
 
+  /// SimpleConstants - These are constants we have checked and know to be
+  /// simple enough to live in a static initializer of a global.
+  SmallPtrSet<Constant*, 8> SimpleConstants;
+  
   // Call the function.
   Constant *RetValDummy;
   bool EvalSuccess = EvaluateFunction(F, RetValDummy,
                                       SmallVector<Constant*, 0>(), CallStack,
-                                      MutatedMemory, AllocaTmps);
+                                      MutatedMemory, AllocaTmps,
+                                      SimpleConstants, TD);
+  
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
@@ -2428,13 +2579,13 @@ static bool EvaluateStaticConstructor(Function *F) {
          E = MutatedMemory.end(); I != E; ++I)
       CommitValueTo(I->second, I->first);
   }
-  
+
   // At this point, we are done interpreting.  If we created any 'alloca'
   // temporaries, release them now.
   while (!AllocaTmps.empty()) {
     GlobalVariable *Tmp = AllocaTmps.back();
     AllocaTmps.pop_back();
-    
+
     // If there are still users of the alloca, the program is doing something
     // silly, e.g. storing the address of the alloca somewhere and using it
     // later.  Since this is undefined, we'll just make it be null.
@@ -2442,7 +2593,7 @@ static bool EvaluateStaticConstructor(Function *F) {
       Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
     delete Tmp;
   }
-  
+
   return EvalSuccess;
 }
 
@@ -2454,7 +2605,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
   bool MadeChange = false;
   if (Ctors.empty()) return false;
-  
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
   // Loop over global ctors, optimizing them when we can.
   for (unsigned i = 0; i != Ctors.size(); ++i) {
     Function *F = Ctors[i];
@@ -2467,12 +2619,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
       }
       break;
     }
-    
+
     // We cannot simplify external ctor functions.
     if (F->empty()) continue;
-    
+
     // If we can evaluate the ctor at compile time, do.
-    if (EvaluateStaticConstructor(F)) {
+    if (EvaluateStaticConstructor(F, TD)) {
       Ctors.erase(Ctors.begin()+i);
       MadeChange = true;
       --i;
@@ -2480,9 +2632,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
       continue;
     }
   }
-  
+
   if (!MadeChange) return false;
-  
+
   GCL = InstallGlobalCtors(GCL, Ctors);
   return true;
 }
@@ -2546,21 +2698,21 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
 
 bool GlobalOpt::runOnModule(Module &M) {
   bool Changed = false;
-  
+
   // Try to find the llvm.globalctors list.
   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
 
   bool LocalChange = true;
   while (LocalChange) {
     LocalChange = false;
-    
+
     // Delete functions that are trivially dead, ccc -> fastcc
     LocalChange |= OptimizeFunctions(M);
-    
+
     // Optimize global_ctors list.
     if (GlobalCtors)
       LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
-    
+
     // Optimize non-address-taken globals.
     LocalChange |= OptimizeGlobalVars(M);
 
@@ -2568,9 +2720,9 @@ bool GlobalOpt::runOnModule(Module &M) {
     LocalChange |= OptimizeGlobalAliases(M);
     Changed |= LocalChange;
   }
-  
+
   // TODO: Move all global ctors functions to the end of the module for code
   // layout.
-  
+
   return Changed;
 }
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 1b3cf7810cc6..c7c293987a58 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -35,7 +35,9 @@ namespace {
   ///
   struct IPCP : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
-    IPCP() : ModulePass(ID) {}
+    IPCP() : ModulePass(ID) {
+      initializeIPCPPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnModule(Module &M);
   private:
@@ -46,7 +48,7 @@ namespace {
 
 char IPCP::ID = 0;
 INITIALIZE_PASS(IPCP, "ipconstprop",
-                "Interprocedural constant propagation", false, false);
+                "Interprocedural constant propagation", false, false)
 
 ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
 
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 340b70eb0268..fbe90ce67591 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -7,17 +7,51 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMIPO.a, which implements
-// several transformations over the LLVM intermediate representation.
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMIPO.a, which implements several transformations over the LLVM 
+// intermediate representation.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Transforms/IPO.h"
 
 using namespace llvm;
 
+void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeArgPromotionPass(Registry);
+  initializeConstantMergePass(Registry);
+  initializeDAEPass(Registry);
+  initializeDAHPass(Registry);
+  initializeDTEPass(Registry);
+  initializeFunctionAttrsPass(Registry);
+  initializeGlobalDCEPass(Registry);
+  initializeGlobalOptPass(Registry);
+  initializeIPCPPass(Registry);
+  initializeAlwaysInlinerPass(Registry);
+  initializeSimpleInlinerPass(Registry);
+  initializeInternalizePassPass(Registry);
+  initializeLoopExtractorPass(Registry);
+  initializeBlockExtractorPassPass(Registry);
+  initializeSingleLoopExtractorPass(Registry);
+  initializeLowerSetJmpPass(Registry);
+  initializeMergeFunctionsPass(Registry);
+  initializePartialInlinerPass(Registry);
+  initializePruneEHPass(Registry);
+  initializeStripDeadPrototypesPassPass(Registry);
+  initializeStripSymbolsPass(Registry);
+  initializeStripDebugDeclarePass(Registry);
+  initializeStripDeadDebugInfoPass(Registry);
+  initializeStripNonDebugSymbolsPass(Registry);
+  initializeSRETPromotionPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+  initializeIPO(*unwrap(R));
+}
+
 void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createArgumentPromotionPass());
 }
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index ecc60ad06932..ce795b72438d 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -36,7 +36,9 @@ namespace {
     InlineCostAnalyzer CA;
   public:
     // Use extremely low threshold. 
-    AlwaysInliner() : Inliner(ID, -2000000000) {}
+    AlwaysInliner() : Inliner(ID, -2000000000) {
+      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+    }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +63,11 @@ namespace {
 }
 
 char AlwaysInliner::ID = 0;
-INITIALIZE_PASS(AlwaysInliner, "always-inline",
-                "Inliner for always_inline functions", false, false);
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
 
 Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
 
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 9c6637dfe5ad..0c5b3be8f983 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -33,8 +33,12 @@ namespace {
     SmallPtrSet<const Function*, 16> NeverInline; 
     InlineCostAnalyzer CA;
   public:
-    SimpleInliner() : Inliner(ID) {}
-    SimpleInliner(int Threshold) : Inliner(ID, Threshold) {}
+    SimpleInliner() : Inliner(ID) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +60,11 @@ namespace {
 }
 
 char SimpleInliner::ID = 0;
-INITIALIZE_PASS(SimpleInliner, "inline",
-                "Function Integration/Inlining", false, false);
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
 
 Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
 
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 4983e8e13a3e..37eafd723bf8 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -52,7 +52,8 @@ Inliner::Inliner(char &ID)
   : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
 
 Inliner::Inliner(char &ID, int Threshold) 
-  : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
+  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+                                          InlineLimit : Threshold) {}
 
 /// getAnalysisUsage - For this class, we declare that we require and preserve
 /// the call graph.  If the derived class implements this method, it should
@@ -74,7 +75,8 @@ InlinedArrayAllocasTy;
 /// inline this call site we attempt to reuse already available allocas or add
 /// any new allocas to the set if not possible.
 static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
-                                 InlinedArrayAllocasTy &InlinedArrayAllocas) {
+                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
+                                 int InlineHistory) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
@@ -91,7 +93,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
            !Caller->hasFnAttr(Attribute::StackProtectReq))
     Caller->addFnAttr(Attribute::StackProtect);
 
-  
   // Look at all of the allocas that we inlined through this call site.  If we
   // have already inlined other allocas through other calls into this function,
   // then we know that they have disjoint lifetimes and that we can merge them.
@@ -115,6 +116,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
   //
   SmallPtrSet<AllocaInst*, 16> UsedAllocas;
   
+  // When processing our SCC, check to see if CS was inlined from some other
+  // call site.  For example, if we're processing "A" in this code:
+  //   A() { B() }
+  //   B() { x = alloca ... C() }
+  //   C() { y = alloca ... }
+  // Assume that C was not inlined into B initially, and so we're processing A
+  // and decide to inline B into A.  Doing this makes an alloca available for
+  // reuse and makes a callsite (C) available for inlining.  When we process
+  // the C call site we don't want to do any alloca merging between X and Y
+  // because their scopes are not disjoint.  We could make this smarter by
+  // keeping track of the inline history for each alloca in the
+  // InlinedArrayAllocas but this isn't likely to be a significant win.
+  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
+    return true;
+  
   // Loop over all the allocas we have so far and see if they can be merged with
   // a previously inlined alloca.  If not, remember that we had it.
   for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
@@ -152,19 +168,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
       
       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
       // success!
-      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI);
+      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+                   << *AvailableAlloca << '\n');
       
       AI->replaceAllUsesWith(AvailableAlloca);
       AI->eraseFromParent();
       MergedAwayAlloca = true;
       ++NumMergedAllocas;
+      IFI.StaticAllocas[AllocaNo] = 0;
       break;
     }
 
     // If we already nuked the alloca, we're done with it.
     if (MergedAwayAlloca)
       continue;
-
+    
     // If we were unable to merge away the alloca either because there are no
     // allocas of the right type available or because we reused them all
     // already, remember that this alloca came from an inlined function and mark
@@ -234,20 +252,25 @@ bool Inliner::shouldInline(CallSite CS) {
   if (Caller->hasLocalLinkage()) {
     int TotalSecondaryCost = 0;
     bool outerCallsFound = false;
-    bool allOuterCallsWillBeInlined = true;
-    bool someOuterCallWouldNotBeInlined = false;
+    // This bool tracks what happens if we do NOT inline C into B.
+    bool callerWillBeRemoved = true;
+    // This bool tracks what happens if we DO inline C into B.
+    bool inliningPreventsSomeOuterInline = false;
     for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
          I != E; ++I) {
       CallSite CS2(*I);
 
       // If this isn't a call to Caller (it could be some other sort
-      // of reference) skip it.
-      if (!CS2 || CS2.getCalledFunction() != Caller)
+      // of reference) skip it.  Such references will prevent the caller
+      // from being removed.
+      if (!CS2 || CS2.getCalledFunction() != Caller) {
+        callerWillBeRemoved = false;
         continue;
+      }
 
       InlineCost IC2 = getInlineCost(CS2);
       if (IC2.isNever())
-        allOuterCallsWillBeInlined = false;
+        callerWillBeRemoved = false;
       if (IC2.isAlways() || IC2.isNever())
         continue;
 
@@ -257,14 +280,14 @@ bool Inliner::shouldInline(CallSite CS) {
       float FudgeFactor2 = getInlineFudgeFactor(CS2);
 
       if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
-        allOuterCallsWillBeInlined = false;
+        callerWillBeRemoved = false;
 
       // See if we have this case.  We subtract off the penalty
       // for the call instruction, which we would be deleting.
       if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
           Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
                 (int)(CurrentThreshold2 * FudgeFactor2)) {
-        someOuterCallWouldNotBeInlined = true;
+        inliningPreventsSomeOuterInline = true;
         TotalSecondaryCost += Cost2;
       }
     }
@@ -272,10 +295,10 @@ bool Inliner::shouldInline(CallSite CS) {
     // one is set very low by getInlineCost, in anticipation that Caller will
     // be removed entirely.  We did not account for this above unless there
     // is only one caller of Caller.
-    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+    if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
       TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
 
-    if (outerCallsFound && someOuterCallWouldNotBeInlined && 
+    if (outerCallsFound && inliningPreventsSomeOuterInline &&
         TotalSecondaryCost < Cost) {
       DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
            " Cost = " << Cost << 
@@ -401,7 +424,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
       
         // If this call site was obtained by inlining another function, verify
         // that the include path for the function did not include the callee
-        // itself.  If so, we'd be recursively inlinling the same function,
+        // itself.  If so, we'd be recursively inlining the same function,
         // which would provide the same callsites, which would cause us to
         // infinitely inline.
         int InlineHistoryID = CallSites[CSi].second;
@@ -416,7 +439,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
           continue;
 
         // Attempt to inline the function.
-        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
+        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+                                  InlineHistoryID))
           continue;
         ++NumInlined;
         
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index a1d919fd8a04..9b9ebad47225 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -64,10 +64,11 @@ namespace {
 
 char InternalizePass::ID = 0;
 INITIALIZE_PASS(InternalizePass, "internalize",
-                "Internalize Global Symbols", false, false);
+                "Internalize Global Symbols", false, false)
 
 InternalizePass::InternalizePass(bool AllButMain)
   : ModulePass(ID), AllButMain(AllButMain){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   if (!APIFile.empty())           // If a filename is specified, use it.
     LoadFile(APIFile.c_str());
   if (!APIList.empty())           // If a list is specified, use it as well.
@@ -76,6 +77,7 @@ InternalizePass::InternalizePass(bool AllButMain)
 
 InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
   : ModulePass(ID), AllButMain(false){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   for(std::vector<const char *>::const_iterator itr = exportList.begin();
         itr != exportList.end(); itr++) {
     ExternalNames.insert(*itr);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index f88dff67d7c9..848944dc9381 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -37,7 +37,9 @@ namespace {
     unsigned NumLoops;
 
     explicit LoopExtractor(unsigned numLoops = ~0) 
-      : LoopPass(ID), NumLoops(numLoops) {}
+      : LoopPass(ID), NumLoops(numLoops) {
+        initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -50,8 +52,13 @@ namespace {
 }
 
 char LoopExtractor::ID = 0;
-INITIALIZE_PASS(LoopExtractor, "loop-extract",
-                "Extract loops into new functions", false, false);
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
 
 namespace {
   /// SingleLoopExtractor - For bugpoint.
@@ -63,7 +70,7 @@ namespace {
 
 char SingleLoopExtractor::ID = 0;
 INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
-                "Extract at most one loop into a new function", false, false);
+                "Extract at most one loop into a new function", false, false)
 
 // createLoopExtractorPass - This pass extracts all natural loops from the
 // program into a function if it can.
@@ -159,7 +166,7 @@ namespace {
 char BlockExtractorPass::ID = 0;
 INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
                 "Extract Basic Blocks From Module (for bugpoint use)",
-                false, false);
+                false, false)
 
 // createBlockExtractorPass - This pass extracts all blocks (except those
 // specified in the argument list) from the functions in the module.
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 6c715de04b76..b545f0bb267d 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -109,7 +109,9 @@ namespace {
     bool IsTransformableFunction(StringRef Name);
   public:
     static char ID; // Pass identification, replacement for typeid
-    LowerSetJmp() : ModulePass(ID) {}
+    LowerSetJmp() : ModulePass(ID) {
+      initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+    }
 
     void visitCallInst(CallInst& CI);
     void visitInvokeInst(InvokeInst& II);
@@ -122,7 +124,7 @@ namespace {
 } // end anonymous namespace
 
 char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
 
 // run - Run the transformation on the program. We grab the function
 // prototypes for longjmp and setjmp. If they are used in the program,
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 5d838f98aa08..cccffca6e384 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -67,42 +67,87 @@
 using namespace llvm;
 
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+  const FunctionType *FTy = F->getFunctionType();
 
-namespace {
-  /// MergeFunctions finds functions which will generate identical machine code,
-  /// by considering all pointer types to be equivalent. Once identified,
-  /// MergeFunctions will fold them by replacing a call to one to a call to a
-  /// bitcast of the other.
-  ///
-  class MergeFunctions : public ModulePass {
-  public:
-    static char ID;
-    MergeFunctions() : ModulePass(ID) {}
-
-    bool runOnModule(Module &M);
-
-  private:
-    /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
-    /// may be deleted, or may be converted into a thunk. In either case, it
-    /// should never be visited again.
-    void MergeTwoFunctions(Function *F, Function *G) const;
-
-    /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
-    /// replace direct uses of G with bitcast(F).
-    void WriteThunk(Function *F, Function *G) const;
-
-    TargetData *TD;
-  };
+  FoldingSetNodeID ID;
+  ID.AddInteger(F->size());
+  ID.AddInteger(F->getCallingConv());
+  ID.AddBoolean(F->hasGC());
+  ID.AddBoolean(FTy->isVarArg());
+  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+  return ID.ComputeHash();
 }
 
-char MergeFunctions::ID = 0;
-INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// TargetData so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+  static const ComparableFunction EmptyKey;
+  static const ComparableFunction TombstoneKey;
+  static TargetData * const LookupOnly;
+
+  ComparableFunction(Function *Func, TargetData *TD)
+    : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+  Function *getFunc() const { return Func; }
+  unsigned getHash() const { return Hash; }
+  TargetData *getTD() const { return TD; }
+
+  // Drops AssertingVH reference to the function. Outside of debug mode, this
+  // does nothing.
+  void release() {
+    assert(Func &&
+           "Attempted to release function twice, or release empty/tombstone!");
+    Func = NULL;
+  }
+
+private:
+  explicit ComparableFunction(unsigned Hash)
+    : Func(NULL), Hash(Hash), TD(NULL) {}
+
+  AssertingVH<Function> Func;
+  unsigned Hash;
+  TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+    ComparableFunction(1);
+TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1);
 
-ModulePass *llvm::createMergeFunctionsPass() {
-  return new MergeFunctions();
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<ComparableFunction> {
+    static ComparableFunction getEmptyKey() {
+      return ComparableFunction::EmptyKey;
+    }
+    static ComparableFunction getTombstoneKey() {
+      return ComparableFunction::TombstoneKey;
+    }
+    static unsigned getHashValue(const ComparableFunction &CF) {
+      return CF.getHash();
+    }
+    static bool isEqual(const ComparableFunction &LHS,
+                        const ComparableFunction &RHS);
+  };
 }
 
 namespace {
+
 /// FunctionComparator - Compares two functions to determine whether or not
 /// they will generate machine code with the same behaviour. TargetData is
 /// used if available. The comparator always fails conservatively (erring on the
@@ -111,34 +156,34 @@ class FunctionComparator {
 public:
   FunctionComparator(const TargetData *TD, const Function *F1,
                      const Function *F2)
-    : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+    : F1(F1), F2(F2), TD(TD) {}
 
-  /// Compare - test whether the two functions have equivalent behaviour.
-  bool Compare();
+  /// Test whether the two functions have equivalent behaviour.
+  bool compare();
 
 private:
-  /// Compare - test whether two basic blocks have equivalent behaviour.
-  bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+  /// Test whether two basic blocks have equivalent behaviour.
+  bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
 
-  /// Enumerate - Assign or look up previously assigned numbers for the two
-  /// values, and return whether the numbers are equal. Numbers are assigned in
-  /// the order visited.
-  bool Enumerate(const Value *V1, const Value *V2);
+  /// Assign or look up previously assigned numbers for the two values, and
+  /// return whether the numbers are equal. Numbers are assigned in the order
+  /// visited.
+  bool enumerate(const Value *V1, const Value *V2);
 
-  /// isEquivalentOperation - Compare two Instructions for equivalence, similar
-  /// to Instruction::isSameOperationAs but with modifications to the type
+  /// Compare two Instructions for equivalence, similar to
+  /// Instruction::isSameOperationAs but with modifications to the type
   /// comparison.
   bool isEquivalentOperation(const Instruction *I1,
                              const Instruction *I2) const;
 
-  /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+  /// Compare two GEPs for equivalent pointer arithmetic.
   bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
   bool isEquivalentGEP(const GetElementPtrInst *GEP1,
                        const GetElementPtrInst *GEP2) {
     return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
   }
 
-  /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+  /// Compare two Types, treating all pointer types as equal.
   bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
 
   // The two functions undergoing comparison.
@@ -146,20 +191,26 @@ private:
 
   const TargetData *TD;
 
-  typedef DenseMap<const Value *, unsigned long> IDMap;
-  IDMap Map1, Map2;
-  unsigned long IDMap1Count, IDMap2Count;
+  DenseMap<const Value *, const Value *> id_map;
+  DenseSet<const Value *> seen_values;
 };
+
 }
 
-/// isEquivalentType - any two pointers in the same address space are
-/// equivalent. Otherwise, standard type equivalence rules apply.
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
 bool FunctionComparator::isEquivalentType(const Type *Ty1,
                                           const Type *Ty2) const {
   if (Ty1 == Ty2)
     return true;
-  if (Ty1->getTypeID() != Ty2->getTypeID())
+  if (Ty1->getTypeID() != Ty2->getTypeID()) {
+    if (TD) {
+      LLVMContext &Ctx = Ty1->getContext();
+      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+    }
     return false;
+  }
 
   switch(Ty1->getTypeID()) {
   default:
@@ -167,6 +218,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
     // Fall through in Release mode.
   case Type::IntegerTyID:
   case Type::OpaqueTyID:
+  case Type::VectorTyID:
     // Ty1 == Ty2 would have returned true earlier.
     return false;
 
@@ -225,21 +277,18 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
     return ATy1->getNumElements() == ATy2->getNumElements() &&
            isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
   }
-
-  case Type::VectorTyID: {
-    const VectorType *VTy1 = cast<VectorType>(Ty1);
-    const VectorType *VTy2 = cast<VectorType>(Ty2);
-    return VTy1->getNumElements() == VTy2->getNumElements() &&
-           isEquivalentType(VTy1->getElementType(), VTy2->getElementType());
-  }
   }
 }
 
-/// isEquivalentOperation - determine whether the two operations are the same
-/// except that pointer-to-A and pointer-to-B are equivalent. This should be
-/// kept in sync with Instruction::isSameOperationAs.
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
 bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
                                                const Instruction *I2) const {
+  // Differences from Instruction::isSameOperationAs:
+  //  * replace type comparison with calls to isEquivalentType.
+  //  * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+  //  * because of the above, we don't test for the tail bit on calls later on
   if (I1->getOpcode() != I2->getOpcode() ||
       I1->getNumOperands() != I2->getNumOperands() ||
       !isEquivalentType(I1->getType(), I2->getType()) ||
@@ -263,14 +312,11 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
   if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
     return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
   if (const CallInst *CI = dyn_cast<CallInst>(I1))
-    return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
-           CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
-           CI->getAttributes().getRawPointer() ==
-             cast<CallInst>(I2)->getAttributes().getRawPointer();
+    return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
     return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
-           CI->getAttributes().getRawPointer() ==
-             cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+           CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
     if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
       return false;
@@ -291,8 +337,7 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
   return true;
 }
 
-/// isEquivalentGEP - determine whether two GEP operations perform the same
-/// underlying arithmetic.
+// Determine whether two GEP operations perform the same underlying arithmetic.
 bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
                                          const GEPOperator *GEP2) {
   // When we have target data, we can reduce the GEP down to the value in bytes
@@ -315,17 +360,17 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
     return false;
 
   for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
-    if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+    if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
       return false;
   }
 
   return true;
 }
 
-/// Enumerate - Compare two values used by the two functions under pair-wise
-/// comparison. If this is the first time the values are seen, they're added to
-/// the mapping so that we will detect mismatches on next use.
-bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
   // Check for function @f1 referring to itself and function @f2 referring to
   // itself, or referring to each other, or both referring to either of them.
   // They're all equivalent if the two functions are otherwise equivalent.
@@ -334,35 +379,44 @@ bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
   if (V1 == F2 && V2 == F1)
     return true;
 
-  // TODO: constant expressions with GEP or references to F1 or F2.
-  if (isa<Constant>(V1))
-    return V1 == V2;
-
-  if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
-    const InlineAsm *IA1 = cast<InlineAsm>(V1);
-    const InlineAsm *IA2 = cast<InlineAsm>(V2);
-    return IA1->getAsmString() == IA2->getAsmString() &&
-           IA1->getConstraintString() == IA2->getConstraintString();
+  if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+    if (V1 == V2) return true;
+    const Constant *C2 = dyn_cast<Constant>(V2);
+    if (!C2) return false;
+    // TODO: constant expressions with GEP or references to F1 or F2.
+    if (C1->isNullValue() && C2->isNullValue() &&
+	isEquivalentType(C1->getType(), C2->getType()))
+      return true;
+    // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+    // then they must have equal bit patterns.
+    return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+      C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
   }
 
-  unsigned long &ID1 = Map1[V1];
-  if (!ID1)
-    ID1 = ++IDMap1Count;
+  if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+    return V1 == V2;
 
-  unsigned long &ID2 = Map2[V2];
-  if (!ID2)
-    ID2 = ++IDMap2Count;
+  // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+  // check whether it's equal to V2. When there is no mapping then we need to
+  // ensure that V2 isn't already equivalent to something else. For this
+  // purpose, we track the V2 values in a set.
 
-  return ID1 == ID2;
+  const Value *&map_elem = id_map[V1];
+  if (map_elem)
+    return map_elem == V2;
+  if (!seen_values.insert(V2).second)
+    return false;
+  map_elem = V2;
+  return true;
 }
 
-/// Compare - test whether two basic blocks have equivalent behaviour.
-bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
   BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
   BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
 
   do {
-    if (!Enumerate(F1I, F2I))
+    if (!enumerate(F1I, F2I))
       return false;
 
     if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
@@ -370,7 +424,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
       if (!GEP2)
         return false;
 
-      if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+      if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
         return false;
 
       if (!isEquivalentGEP(GEP1, GEP2))
@@ -384,7 +438,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
         Value *OpF1 = F1I->getOperand(i);
         Value *OpF2 = F2I->getOperand(i);
 
-        if (!Enumerate(OpF1, OpF2))
+        if (!enumerate(OpF1, OpF2))
           return false;
 
         if (OpF1->getValueID() != OpF2->getValueID() ||
@@ -399,8 +453,8 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
   return F1I == F1E && F2I == F2E;
 }
 
-/// Compare - test whether the two functions have equivalent behaviour.
-bool FunctionComparator::Compare() {
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
   // We need to recheck everything, but check the things that weren't included
   // in the hash first.
 
@@ -431,14 +485,14 @@ bool FunctionComparator::Compare() {
     return false;
 
   assert(F1->arg_size() == F2->arg_size() &&
-         "Identical functions have a different number of args.");
+         "Identically typed functions have different numbers of args!");
 
   // Visit the arguments so that they get enumerated in the order they're
   // passed in.
   for (Function::const_arg_iterator f1i = F1->arg_begin(),
          f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
-    if (!Enumerate(f1i, f2i))
-      llvm_unreachable("Arguments repeat");
+    if (!enumerate(f1i, f2i))
+      llvm_unreachable("Arguments repeat!");
   }
 
   // We do a CFG-ordered walk since the actual ordering of the blocks in the
@@ -456,7 +510,7 @@ bool FunctionComparator::Compare() {
     const BasicBlock *F1BB = F1BBs.pop_back_val();
     const BasicBlock *F2BB = F2BBs.pop_back_val();
 
-    if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+    if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
       return false;
 
     const TerminatorInst *F1TI = F1BB->getTerminator();
@@ -474,23 +528,190 @@ bool FunctionComparator::Compare() {
   return true;
 }
 
-/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
-/// direct uses of G with bitcast(F).
-void MergeFunctions::WriteThunk(Function *F, Function *G) const {
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+  static char ID;
+  MergeFunctions()
+    : ModulePass(ID), HasGlobalAliases(false) {
+    initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M);
+
+private:
+  typedef DenseSet<ComparableFunction> FnSetType;
+
+  /// A work queue of functions that may have been modified and should be
+  /// analyzed again.
+  std::vector<WeakVH> Deferred;
+
+  /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+  /// equal to one that's already present.
+  bool insert(ComparableFunction &NewF);
+
+  /// Remove a Function from the FnSet and queue it up for a second sweep of
+  /// analysis.
+  void remove(Function *F);
+
+  /// Find the functions that use this Value and remove them from FnSet and
+  /// queue the functions.
+  void removeUsers(Value *V);
+
+  /// Replace all direct calls of Old with calls of New. Will bitcast New if
+  /// necessary to make types match.
+  void replaceDirectCallers(Function *Old, Function *New);
+
+  /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+  /// be converted into a thunk. In either case, it should never be visited
+  /// again.
+  void mergeTwoFunctions(Function *F, Function *G);
+
+  /// Replace G with a thunk or an alias to F. Deletes G.
+  void writeThunkOrAlias(Function *F, Function *G);
+
+  /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+  /// of G with bitcast(F). Deletes G.
+  void writeThunk(Function *F, Function *G);
+
+  /// Replace G with an alias to F. Deletes G.
+  void writeAlias(Function *F, Function *G);
+
+  /// The set of all distinct functions. Use the insert() and remove() methods
+  /// to modify it.
+  FnSetType FnSet;
+
+  /// TargetData for more accurate GEP comparisons. May be NULL.
+  TargetData *TD;
+
+  /// Whether or not the target supports global aliases.
+  bool HasGlobalAliases;
+};
+
+}  // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+  return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+  bool Changed = false;
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      Deferred.push_back(WeakVH(I));
+  }
+  FnSet.resize(Deferred.size());
+
+  do {
+    std::vector<WeakVH> Worklist;
+    Deferred.swap(Worklist);
+
+    DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+    DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+    // Insert only strong functions and merge them. Strong function merging
+    // always deletes one of them.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          !F->mayBeOverridden()) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        Changed |= insert(CF);
+      }
+    }
+
+    // Insert only weak functions and merge them. By doing these second we
+    // create thunks to the strong function when possible. When two weak
+    // functions are identical, we create a new strong function with two weak
+    // weak thunks to it which are identical but not mergable.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          F->mayBeOverridden()) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        Changed |= insert(CF);
+      }
+    }
+    DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+  } while (!Deferred.empty());
+
+  FnSet.clear();
+
+  return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+                                               const ComparableFunction &RHS) {
+  if (LHS.getFunc() == RHS.getFunc() &&
+      LHS.getHash() == RHS.getHash())
+    return true;
+  if (!LHS.getFunc() || !RHS.getFunc())
+    return false;
+
+  // One of these is a special "underlying pointer comparison only" object.
+  if (LHS.getTD() == ComparableFunction::LookupOnly ||
+      RHS.getTD() == ComparableFunction::LookupOnly)
+    return false;
+
+  assert(LHS.getTD() == RHS.getTD() &&
+         "Comparing functions for different targets");
+
+  return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+                            RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+  Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+       UI != UE;) {
+    Value::use_iterator TheIter = UI;
+    ++UI;
+    CallSite CS(*TheIter);
+    if (CS && CS.isCallee(TheIter)) {
+      remove(CS.getInstruction()->getParent()->getParent());
+      TheIter.getUse().set(BitcastNew);
+    }
+  }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+  if (HasGlobalAliases && G->hasUnnamedAddr()) {
+    if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+        G->hasWeakLinkage()) {
+      writeAlias(F, G);
+      return;
+    }
+  }
+
+  writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
   if (!G->mayBeOverridden()) {
     // Redirect direct callers of G to F.
-    Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
-    for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
-         UI != UE;) {
-      Value::use_iterator TheIter = UI;
-      ++UI;
-      CallSite CS(*TheIter);
-      if (CS && CS.isCallee(TheIter))
-        TheIter.getUse().set(BitcastF);
-    }
+    replaceDirectCallers(G, F);
   }
 
-  // If G was internal then we may have replaced all uses if G with F. If so,
+  // If G was internal then we may have replaced all uses of G with F. If so,
   // stop here and delete G. There's no need for a thunk.
   if (G->hasLocalLinkage() && G->use_empty()) {
     G->eraseFromParent();
@@ -522,131 +743,126 @@ void MergeFunctions::WriteThunk(Function *F, Function *G) const {
 
   NewG->copyAttributesFrom(G);
   NewG->takeName(G);
+  removeUsers(G);
   G->replaceAllUsesWith(NewG);
   G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+  ++NumThunksWritten;
 }
 
-/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
-/// Function G is deleted.
-void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
-  if (F->isWeakForLinker()) {
-    assert(G->isWeakForLinker());
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+  Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+  GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+                                    BitcastF, G->getParent());
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  removeUsers(G);
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+  ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+  if (F->mayBeOverridden()) {
+    assert(G->mayBeOverridden());
+
+    if (HasGlobalAliases) {
+      // Make them both thunks to the same internal function.
+      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                     F->getParent());
+      H->copyAttributesFrom(F);
+      H->takeName(F);
+      removeUsers(F);
+      F->replaceAllUsesWith(H);
 
-    // Make them both thunks to the same internal function.
-    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
-                                   F->getParent());
-    H->copyAttributesFrom(F);
-    H->takeName(F);
-    F->replaceAllUsesWith(H);
+      unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
 
-    unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+      writeAlias(F, G);
+      writeAlias(F, H);
 
-    WriteThunk(F, G);
-    WriteThunk(F, H);
+      F->setAlignment(MaxAlignment);
+      F->setLinkage(GlobalValue::PrivateLinkage);
+    } else {
+      // We can't merge them. Instead, pick one and update all direct callers
+      // to call it and hope that we improve the instruction cache hit rate.
+      replaceDirectCallers(G, F);
+    }
 
-    F->setAlignment(MaxAlignment);
-    F->setLinkage(GlobalValue::InternalLinkage);
+    ++NumDoubleWeak;
   } else {
-    WriteThunk(F, G);
+    writeThunkOrAlias(F, G);
   }
 
   ++NumFunctionsMerged;
 }
 
-static unsigned ProfileFunction(const Function *F) {
-  const FunctionType *FTy = F->getFunctionType();
-
-  FoldingSetNodeID ID;
-  ID.AddInteger(F->size());
-  ID.AddInteger(F->getCallingConv());
-  ID.AddBoolean(F->hasGC());
-  ID.AddBoolean(FTy->isVarArg());
-  ID.AddInteger(FTy->getReturnType()->getTypeID());
-  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    ID.AddInteger(FTy->getParamType(i)->getTypeID());
-  return ID.ComputeHash();
-}
-
-class ComparableFunction {
-public:
-  ComparableFunction(Function *Func, TargetData *TD)
-    : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+  std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+  if (Result.second) {
+    DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+    return false;
+  }
 
-  AssertingVH<Function> const Func;
-  const unsigned Hash;
-  TargetData * const TD;
-};
+  const ComparableFunction &OldF = *Result.first;
 
-struct MergeFunctionsEqualityInfo {
-  static ComparableFunction *getEmptyKey() {
-    return reinterpret_cast<ComparableFunction*>(0);
-  }
-  static ComparableFunction *getTombstoneKey() {
-    return reinterpret_cast<ComparableFunction*>(-1);
-  }
-  static unsigned getHashValue(const ComparableFunction *CF) {
-    return CF->Hash;
-  }
-  static bool isEqual(const ComparableFunction *LHS,
-                      const ComparableFunction *RHS) {
-    if (LHS == RHS)
-      return true;
-    if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
-        RHS == getEmptyKey() || RHS == getTombstoneKey())
-      return false;
-    assert(LHS->TD == RHS->TD && "Comparing functions for different targets");
-    return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare();
-  }
-};
+  // Never thunk a strong function to a weak function.
+  assert(!OldF.getFunc()->mayBeOverridden() ||
+         NewF.getFunc()->mayBeOverridden());
 
-bool MergeFunctions::runOnModule(Module &M) {
-  typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType;
+  DEBUG(dbgs() << "  " << OldF.getFunc()->getName() << " == "
+               << NewF.getFunc()->getName() << '\n');
 
-  bool Changed = false;
-  TD = getAnalysisIfAvailable<TargetData>();
+  Function *DeleteF = NewF.getFunc();
+  NewF.release();
+  mergeTwoFunctions(OldF.getFunc(), DeleteF);
+  return true;
+}
 
-  std::vector<Function *> Funcs;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
-      Funcs.push_back(F);
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+  // We need to make sure we remove F, not a function "equal" to F per the
+  // function equality comparator.
+  //
+  // The special "lookup only" ComparableFunction bypasses the expensive
+  // function comparison in favour of a pointer comparison on the underlying
+  // Function*'s.
+  ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+  if (FnSet.erase(CF)) {
+    DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+    Deferred.push_back(F);
   }
+}
 
-  bool LocalChanged;
-  do {
-    LocalChanged = false;
-
-    FnSetType FnSet;
-    for (unsigned i = 0, e = Funcs.size(); i != e;) {
-      Function *F = Funcs[i];
-      ComparableFunction *NewF = new ComparableFunction(F, TD);
-      std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
-      if (!Result.second) {
-        ComparableFunction *&OldF = *Result.first;
-        assert(OldF && "Expected a hash collision");
-
-        // NewF will be deleted in favour of OldF unless NewF is strong and
-        // OldF is weak in which case swap them to keep the strong definition.
-
-        if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker())
-          std::swap(OldF, NewF);
-
-        DEBUG(dbgs() << "  " << OldF->Func->getName() << " == "
-                     << NewF->Func->getName() << '\n');
-
-	Funcs.erase(Funcs.begin() + i);
-	--e;
-
-        Function *DeleteF = NewF->Func;
-        delete NewF;
-        MergeTwoFunctions(OldF->Func, DeleteF);
-	LocalChanged = true;
-        Changed = true;
-      } else {
-	++i;
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+  std::vector<Value *> Worklist;
+  Worklist.push_back(V);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+
+    for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      Use &U = UI.getUse();
+      if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+        remove(I->getParent()->getParent());
+      } else if (isa<GlobalValue>(U.getUser())) {
+        // do nothing
+      } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+        for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+             CUI != CUE; ++CUI)
+          Worklist.push_back(*CUI);
       }
     }
-    DeleteContainerPointers(FnSet);
-  } while (LocalChanged);
-
-  return Changed;
+  }
 }
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 432f7c53a67d..2afd02985764 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,9 @@ namespace {
   struct PartialInliner : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
     static char ID; // Pass identification, replacement for typeid
-    PartialInliner() : ModulePass(ID) {}
+    PartialInliner() : ModulePass(ID) {
+      initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+    }
     
     bool runOnModule(Module& M);
     
@@ -41,7 +43,7 @@ namespace {
 
 char PartialInliner::ID = 0;
 INITIALIZE_PASS(PartialInliner, "partial-inliner",
-                "Partial Inliner", false, false);
+                "Partial Inliner", false, false)
 
 ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
 
@@ -67,7 +69,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
     return 0;
   
   // Clone the function, so that we can hack away on it.
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Function* duplicateFunction = CloneFunction(F, VMap,
                                               /*ModuleLevelChanges=*/false);
   duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
deleted file mode 100644
index 4a99a411ab33..000000000000
--- a/lib/Transforms/IPO/PartialSpecialization.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-//===-- PartialSpecialization.cpp - Specialize for common constants--------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass finds function arguments that are often a common constant and 
-// specializes a version of the called function for that constant.
-//
-// This pass simply does the cloning for functions it specializes.  It depends
-// on IPSCCP and DAE to clean up the results.
-//
-// The initial heuristic favors constant arguments that are used in control 
-// flow.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "partialspecialization"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/ADT/DenseSet.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(numSpecialized, "Number of specialized functions created");
-STATISTIC(numReplaced, "Number of callers replaced by specialization");
-
-// Maximum number of arguments markable interested
-static const int MaxInterests = 6;
-
-// Call must be used at least occasionally
-static const int CallsMin = 5;
-
-// Must have 10% of calls having the same constant to specialize on
-static const double ConstValPercent = .1;
-
-namespace {
-  typedef SmallVector<int, MaxInterests> InterestingArgVector;
-  class PartSpec : public ModulePass {
-    void scanForInterest(Function&, InterestingArgVector&);
-    int scanDistribution(Function&, int, std::map<Constant*, int>&);
-  public :
-    static char ID; // Pass identification, replacement for typeid
-    PartSpec() : ModulePass(ID) {}
-    bool runOnModule(Module &M);
-  };
-}
-
-char PartSpec::ID = 0;
-INITIALIZE_PASS(PartSpec, "partialspecialization",
-                "Partial Specialization", false, false);
-
-// Specialize F by replacing the arguments (keys) in replacements with the 
-// constants (values).  Replace all calls to F with those constants with
-// a call to the specialized function.  Returns the specialized function
-static Function* 
-SpecializeFunction(Function* F, 
-                   ValueMap<const Value*, Value*>& replacements) {
-  // arg numbers of deleted arguments
-  DenseMap<unsigned, const Argument*> deleted;
-  for (ValueMap<const Value*, Value*>::iterator 
-         repb = replacements.begin(), repe = replacements.end();
-       repb != repe; ++repb) {
-    Argument const *arg = cast<const Argument>(repb->first);
-    deleted[arg->getArgNo()] = arg;
-  }
-
-  Function* NF = CloneFunction(F, replacements,
-                               /*ModuleLevelChanges=*/false);
-  NF->setLinkage(GlobalValue::InternalLinkage);
-  F->getParent()->getFunctionList().push_back(NF);
-
-  for (Value::use_iterator ii = F->use_begin(), ee = F->use_end(); 
-       ii != ee; ) {
-    Value::use_iterator i = ii;
-    ++ii;
-    User *U = *i;
-    CallSite CS(U);
-    if (CS) {
-      if (CS.getCalledFunction() == F) {
-        SmallVector<Value*, 6> args;
-        // Assemble the non-specialized arguments for the updated callsite.
-        // In the process, make sure that the specialized arguments are
-        // constant and match the specialization.  If that's not the case,
-        // this callsite needs to call the original or some other
-        // specialization; don't change it here.
-        CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end();
-        for (CallSite::arg_iterator ai = as; ai != ae; ++ai) {
-          DenseMap<unsigned, const Argument*>::iterator delit = deleted.find(
-            std::distance(as, ai));
-          if (delit == deleted.end())
-            args.push_back(cast<Value>(ai));
-          else {
-            Constant *ci = dyn_cast<Constant>(ai);
-            if (!(ci && ci == replacements[delit->second]))
-              goto next_use;
-          }
-        }
-        Value* NCall;
-        if (CallInst *CI = dyn_cast<CallInst>(U)) {
-          NCall = CallInst::Create(NF, args.begin(), args.end(), 
-                                   CI->getName(), CI);
-          cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
-          cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
-        } else {
-          InvokeInst *II = cast<InvokeInst>(U);
-          NCall = InvokeInst::Create(NF, II->getNormalDest(),
-                                     II->getUnwindDest(),
-                                     args.begin(), args.end(), 
-                                     II->getName(), II);
-          cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv());
-        }
-        CS.getInstruction()->replaceAllUsesWith(NCall);
-        CS.getInstruction()->eraseFromParent();
-        ++numReplaced;
-      }
-    }
-    next_use:;
-  }
-  return NF;
-}
-
-
-bool PartSpec::runOnModule(Module &M) {
-  bool Changed = false;
-  for (Module::iterator I = M.begin(); I != M.end(); ++I) {
-    Function &F = *I;
-    if (F.isDeclaration() || F.mayBeOverridden()) continue;
-    InterestingArgVector interestingArgs;
-    scanForInterest(F, interestingArgs);
-
-    // Find the first interesting Argument that we can specialize on
-    // If there are multiple interesting Arguments, then those will be found
-    // when processing the cloned function.
-    bool breakOuter = false;
-    for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
-      std::map<Constant*, int> distribution;
-      int total = scanDistribution(F, interestingArgs[x], distribution);
-      if (total > CallsMin) 
-        for (std::map<Constant*, int>::iterator ii = distribution.begin(),
-               ee = distribution.end(); ii != ee; ++ii)
-          if (total > ii->second && ii->first &&
-               ii->second > total * ConstValPercent) {
-            ValueMap<const Value*, Value*> m;
-            Function::arg_iterator arg = F.arg_begin();
-            for (int y = 0; y < interestingArgs[x]; ++y)
-              ++arg;
-            m[&*arg] = ii->first;
-            SpecializeFunction(&F, m);
-            ++numSpecialized;
-            breakOuter = true;
-            Changed = true;
-          }
-    }
-  }
-  return Changed;
-}
-
-/// scanForInterest - This function decides which arguments would be worth
-/// specializing on.
-void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
-  for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
-      ii != ee; ++ii) {
-    for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
-        ui != ue; ++ui) {
-
-      bool interesting = false;
-      User *U = *ui;
-      if (isa<CmpInst>(U)) interesting = true;
-      else if (isa<CallInst>(U))
-        interesting = ui->getOperand(0) == ii;
-      else if (isa<InvokeInst>(U))
-        interesting = ui->getOperand(0) == ii;
-      else if (isa<SwitchInst>(U)) interesting = true;
-      else if (isa<BranchInst>(U)) interesting = true;
-
-      if (interesting) {
-        args.push_back(std::distance(F.arg_begin(), ii));
-        break;
-      }
-    }
-  }
-}
-
-/// scanDistribution - Construct a histogram of constants for arg of F at arg.
-int PartSpec::scanDistribution(Function& F, int arg, 
-                               std::map<Constant*, int>& dist) {
-  bool hasIndirect = false;
-  int total = 0;
-  for (Value::use_iterator ii = F.use_begin(), ee = F.use_end();
-      ii != ee; ++ii) {
-    User *U = *ii;
-    CallSite CS(U);
-    if (CS && CS.getCalledFunction() == &F) {
-      ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
-      ++total;
-    } else
-      hasIndirect = true;
-  }
-
-  // Preserve the original address taken function even if all other uses
-  // will be specialized.
-  if (hasIndirect) ++total;
-  return total;
-}
-
-ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); }
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 09ac76f97964..d91c2c403aae 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -37,7 +37,9 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized");
 namespace {
   struct PruneEH : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
-    PruneEH() : CallGraphSCCPass(ID) {}
+    PruneEH() : CallGraphSCCPass(ID) {
+      initializePruneEHPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnSCC - Analyze the SCC, performing the transformation if possible.
     bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +50,11 @@ namespace {
 }
 
 char PruneEH::ID = 0;
-INITIALIZE_PASS(PruneEH, "prune-eh",
-                "Remove unused exception handling info", false, false);
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
 
 Pass *llvm::createPruneEHPass() { return new PruneEH(); }
 
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index ee10ad0b8ba2..b5f09ecccaf2 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -29,7 +29,9 @@ namespace {
 class StripDeadPrototypesPass : public ModulePass {
 public:
   static char ID; // Pass identification, replacement for typeid
-  StripDeadPrototypesPass() : ModulePass(ID) { }
+  StripDeadPrototypesPass() : ModulePass(ID) {
+    initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+  }
   virtual bool runOnModule(Module &M);
 };
 
@@ -37,7 +39,7 @@ public:
 
 char StripDeadPrototypesPass::ID = 0;
 INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
-                "Strip Unused Function Prototypes", false, false);
+                "Strip Unused Function Prototypes", false, false)
 
 bool StripDeadPrototypesPass::runOnModule(Module &M) {
   bool MadeChange = false;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 20b7b8f2b850..a69076510806 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -39,7 +39,9 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripSymbols(bool ODI = false) 
-      : ModulePass(ID), OnlyDebugInfo(ODI) {}
+      : ModulePass(ID), OnlyDebugInfo(ODI) {
+        initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -52,7 +54,9 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripNonDebugSymbols()
-      : ModulePass(ID) {}
+      : ModulePass(ID) {
+        initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -65,7 +69,9 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripDebugDeclare()
-      : ModulePass(ID) {}
+      : ModulePass(ID) {
+        initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -78,7 +84,9 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit StripDeadDebugInfo()
-      : ModulePass(ID) {}
+      : ModulePass(ID) {
+        initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnModule(Module &M);
 
@@ -90,7 +98,7 @@ namespace {
 
 char StripSymbols::ID = 0;
 INITIALIZE_PASS(StripSymbols, "strip",
-                "Strip all symbols from a module", false, false);
+                "Strip all symbols from a module", false, false)
 
 ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
   return new StripSymbols(OnlyDebugInfo);
@@ -99,7 +107,7 @@ ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
 char StripNonDebugSymbols::ID = 0;
 INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
                 "Strip all symbols, except dbg symbols, from a module",
-                false, false);
+                false, false)
 
 ModulePass *llvm::createStripNonDebugSymbolsPass() {
   return new StripNonDebugSymbols();
@@ -107,7 +115,7 @@ ModulePass *llvm::createStripNonDebugSymbolsPass() {
 
 char StripDebugDeclare::ID = 0;
 INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
-                "Strip all llvm.dbg.declare intrinsics", false, false);
+                "Strip all llvm.dbg.declare intrinsics", false, false)
 
 ModulePass *llvm::createStripDebugDeclarePass() {
   return new StripDebugDeclare();
@@ -115,7 +123,7 @@ ModulePass *llvm::createStripDebugDeclarePass() {
 
 char StripDeadDebugInfo::ID = 0;
 INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
-                "Strip debug info for unused symbols", false, false);
+                "Strip debug info for unused symbols", false, false)
 
 ModulePass *llvm::createStripDeadDebugInfoPass() {
   return new StripDeadDebugInfo();
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index b82b03f7d9e7..584deacaff1b 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -50,7 +50,9 @@ namespace {
 
     virtual bool runOnSCC(CallGraphSCC &SCC);
     static char ID; // Pass identification, replacement for typeid
-    SRETPromotion() : CallGraphSCCPass(ID) {}
+    SRETPromotion() : CallGraphSCCPass(ID) {
+      initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     CallGraphNode *PromoteReturn(CallGraphNode *CGN);
@@ -61,8 +63,11 @@ namespace {
 }
 
 char SRETPromotion::ID = 0;
-INITIALIZE_PASS(SRETPromotion, "sretpromotion",
-                "Promote sret arguments to multiple ret values", false, false);
+INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
+                "Promote sret arguments to multiple ret values", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
+                "Promote sret arguments to multiple ret values", false, false)
 
 Pass *llvm::createStructRetPromotionPass() {
   return new SRETPromotion();
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index 5b1ff3e23bb0..d070ccc0d63f 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,5 +13,3 @@ add_llvm_library(LLVMInstCombine
   InstCombineSimplifyDemanded.cpp
   InstCombineVectorOps.cpp
   )
-
-target_link_libraries (LLVMInstCombine LLVMTransformUtils)
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 6f9609cf997b..9c2969c7ab22 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -81,7 +81,9 @@ public:
   BuilderTy *Builder;
       
   static char ID; // Pass identification, replacement for typeid
-  InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {}
+  InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+    initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+  }
 
 public:
   virtual bool runOnFunction(Function &F);
@@ -143,6 +145,8 @@ public:
                                               ConstantInt *RHS);
   Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
                               ConstantInt *DivRHS);
+  Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+                              ConstantInt *DivRHS);
   Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
                                 ICmpInst::Predicate Pred, Value *TheAdd);
   Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
@@ -284,9 +288,16 @@ public:
 
 private:
 
-  /// SimplifyCommutative - This performs a few simplifications for 
-  /// commutative operators.
-  bool SimplifyCommutative(BinaryOperator &I);
+  /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+  /// operators which are associative or commutative.
+  bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+  /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+  /// which some other binary operation distributes over either by factorizing
+  /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+  /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+  /// a win).  Returns the simplified value, or null if it didn't simplify.
+  Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
 
   /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
   /// based on the demanded bits.
@@ -310,10 +321,7 @@ private:
   // into the PHI (which is only possible if all operands to the PHI are
   // constants).
   //
-  // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
-  // that would normally be unprofitable because they strongly encourage jump
-  // threading.
-  Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
+  Instruction *FoldOpIntoPhi(Instruction &I);
 
   // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
   // operator and they all are only used by the PHI, PHI together their
@@ -339,10 +347,6 @@ private:
 
 
   Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
-
-  unsigned GetOrEnforceKnownAlignment(Value *V,
-                                      unsigned PrefAlign = 0);
-
 };
 
       
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 4d2c89e60f0a..c36a9552e7a3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -84,43 +84,37 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
 }
 
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
   if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
                                  I.hasNoUnsignedWrap(), TD))
     return ReplaceInstUsesWith(I, V);
 
-  
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
-      // X + (signbit) --> X ^ signbit
-      const APInt& Val = CI->getValue();
-      uint32_t BitWidth = Val.getBitWidth();
-      if (Val == APInt::getSignBit(BitWidth))
-        return BinaryOperator::CreateXor(LHS, RHS);
-      
-      // See if SimplifyDemandedBits can simplify this.  This handles stuff like
-      // (X & 254)+1 -> (X&254)|1
-      if (SimplifyDemandedInstructionBits(I))
-        return &I;
-
-      // zext(bool) + C -> bool ? C + 1 : C
-      if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
-        if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
-          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
-    }
+  // (A*B)+(A*C) -> A*(B+C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
 
-    if (isa<PHINode>(LHS))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // X + (signbit) --> X ^ signbit
+    const APInt &Val = CI->getValue();
+    if (Val.isSignBit())
+      return BinaryOperator::CreateXor(LHS, RHS);
+    
+    // See if SimplifyDemandedBits can simplify this.  This handles stuff like
+    // (X & 254)+1 -> (X&254)|1
+    if (SimplifyDemandedInstructionBits(I))
+      return &I;
+
+    // zext(bool) + C -> bool ? C + 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+      if (ZI->getSrcTy()->isIntegerTy(1))
+        return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
     
-    ConstantInt *XorRHS = 0;
-    Value *XorLHS = 0;
-    if (isa<ConstantInt>(RHSC) &&
-        match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+    Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+    if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
       uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
-      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
+      const APInt &RHSVal = CI->getValue();
       unsigned ExtendAmt = 0;
       // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
       // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
@@ -130,13 +124,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         else if (XorRHS->getValue().isPowerOf2())
           ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
       }
-
+      
       if (ExtendAmt) {
         APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
         if (!MaskedValueIsZero(XorLHS, Mask))
           ExtendAmt = 0;
       }
-
+      
       if (ExtendAmt) {
         Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
         Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
@@ -145,34 +139,28 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     }
   }
 
+  if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
+
   if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateXor(LHS, RHS);
 
-  if (I.getType()->isIntegerTy()) {
-    // X + X --> X << 1
-    if (LHS == RHS)
-      return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
-
-    if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
-      if (RHSI->getOpcode() == Instruction::Sub)
-        if (LHS == RHSI->getOperand(1))                   // A + (B - A) --> B
-          return ReplaceInstUsesWith(I, RHSI->getOperand(0));
-    }
-    if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
-      if (LHSI->getOpcode() == Instruction::Sub)
-        if (RHS == LHSI->getOperand(1))                   // (B - A) + A --> B
-          return ReplaceInstUsesWith(I, LHSI->getOperand(0));
-    }
+  // X + X --> X << 1
+  if (LHS == RHS) {
+    BinaryOperator *New =
+      BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+    New->setHasNoSignedWrap(I.hasNoSignedWrap());
+    New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+    return New;
   }
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
   if (Value *LHSV = dyn_castNegVal(LHS)) {
-    if (LHS->getType()->isIntOrIntVectorTy()) {
-      if (Value *RHSV = dyn_castNegVal(RHS)) {
-        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
-        return BinaryOperator::CreateNeg(NewAdd);
-      }
+    if (Value *RHSV = dyn_castNegVal(RHS)) {
+      Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+      return BinaryOperator::CreateNeg(NewAdd);
     }
     
     return BinaryOperator::CreateSub(RHS, LHSV);
@@ -199,11 +187,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (dyn_castFoldableMul(RHS, C2) == LHS)
     return BinaryOperator::CreateMul(LHS, AddOne(C2));
 
-  // X + ~X --> -1   since   ~X = -X-1
-  if (match(LHS, m_Not(m_Specific(RHS))) ||
-      match(RHS, m_Not(m_Specific(LHS))))
-    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
   // A+B --> A|B iff A and B have no bits set in common.
   if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
     APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
@@ -222,7 +205,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // W*X + Y*Z --> W * (X+Z)  iff W == Y
-  if (I.getType()->isIntOrIntVectorTy()) {
+  {
     Value *W, *X, *Y, *Z;
     if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
         match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
@@ -251,24 +234,22 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
     if (LHS->hasOneUse() &&
-        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
-      if (Anded == CRHS) {
-        // See if all bits from the first bit set in the Add RHS up are included
-        // in the mask.  First, get the rightmost bit.
-        const APInt &AddRHSV = CRHS->getValue();
-
-        // Form a mask of all bits from the lowest bit added through the top.
-        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
-
-        // See if the and mask includes all of these bits.
-        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
-
-        if (AddRHSHighBits == AddRHSHighBitsAnd) {
-          // Okay, the xform is safe.  Insert the new add pronto.
-          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
-          return BinaryOperator::CreateAnd(NewAdd, C2);
-        }
+        match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+        CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+      // See if all bits from the first bit set in the Add RHS up are included
+      // in the mask.  First, get the rightmost bit.
+      const APInt &AddRHSV = CRHS->getValue();
+      
+      // Form a mask of all bits from the lowest bit added through the top.
+      APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+      // See if the and mask includes all of these bits.
+      APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+      if (AddRHSHighBits == AddRHSHighBitsAnd) {
+        // Okay, the xform is safe.  Insert the new add pronto.
+        Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+        return BinaryOperator::CreateAnd(NewAdd, C2);
       }
     }
 
@@ -293,12 +274,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
       // Can we fold the add into the argument of the select?
       // We check both true and false select arguments for a matching subtract.
-      if (match(FV, m_Zero()) &&
-          match(TV, m_Sub(m_Value(N), m_Specific(A))))
+      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the true select value.
         return SelectInst::Create(SI->getCondition(), N, A);
-      if (match(TV, m_Zero()) &&
-          match(FV, m_Sub(m_Value(N), m_Specific(A))))
+      
+      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the false select value.
         return SelectInst::Create(SI->getCondition(), A, N);
     }
@@ -342,7 +322,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 }
 
 Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
   if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
@@ -424,6 +404,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
   const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
   Value *Result = Constant::getNullValue(IntPtrTy);
 
+  // If the GEP is inbounds, we know that none of the addressing operations will
+  // overflow in an unsigned sense.
+  bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
+  
   // Build a mask for high order bits.
   unsigned IntPtrWidth = TD.getPointerSizeInBits();
   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
@@ -439,16 +423,16 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
       if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
         Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
         
-        Result = Builder->CreateAdd(Result,
-                                    ConstantInt::get(IntPtrTy, Size),
-                                    GEP->getName()+".offs");
+        if (Size)
+          Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+                                      GEP->getName()+".offs");
         continue;
       }
       
       Constant *Scale = ConstantInt::get(IntPtrTy, Size);
       Constant *OC =
               ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = ConstantExpr::getMul(OC, Scale);
+      Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
       // Emit an add instruction.
       Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
       continue;
@@ -457,9 +441,9 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
     if (Op->getType() != IntPtrTy)
       Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
     if (Size != 1) {
-      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
       // We'll let instcombine(mul) convert this to a shl if possible.
-      Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
+      Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+                              GEP->getName()+".idx", isInBounds /*NUW*/);
     }
 
     // Emit an add instruction.
@@ -545,8 +529,13 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
 Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Op0 == Op1)                        // sub X, X  -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A*B)-(A*C) -> A*(B-C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
 
   // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
   if (Value *V = dyn_castNegVal(Op1)) {
@@ -556,18 +545,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return Res;
   }
 
-  if (isa<UndefValue>(Op0))
-    return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
-  if (isa<UndefValue>(Op1))
-    return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
   if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateXor(Op0, Op1);
+
+  // Replace (-1 - A) with (~A).
+  if (match(Op0, m_AllOnes()))
+    return BinaryOperator::CreateNot(Op1);
   
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
-    // Replace (-1 - A) with (~A).
-    if (C->isAllOnesValue())
-      return BinaryOperator::CreateNot(Op1);
-
     // C - ~X == X + (1+C)
     Value *X = 0;
     if (match(Op1, m_Not(m_Value(X))))
@@ -576,29 +561,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
     if (C->isZero()) {
-      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
-        if (SI->getOpcode() == Instruction::LShr) {
-          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
-            // Check to see if we are shifting out everything but the sign bit.
-            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
-                SI->getType()->getPrimitiveSizeInBits()-1) {
-              // Ok, the transformation is safe.  Insert AShr.
-              return BinaryOperator::Create(Instruction::AShr, 
-                                          SI->getOperand(0), CU, SI->getName());
-            }
-          }
-        } else if (SI->getOpcode() == Instruction::AShr) {
-          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
-            // Check to see if we are shifting out everything but the sign bit.
-            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
-                SI->getType()->getPrimitiveSizeInBits()-1) {
-              // Ok, the transformation is safe.  Insert LShr. 
-              return BinaryOperator::CreateLShr(
-                                          SI->getOperand(0), CU, SI->getName());
-            }
-          }
-        }
-      }
+      Value *X; ConstantInt *CI;
+      if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+          // Verify we are shifting out everything but the sign bit.
+          CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+        return BinaryOperator::CreateAShr(X, CI);
+
+      if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+          // Verify we are shifting out everything but the sign bit.
+          CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+        return BinaryOperator::CreateLShr(X, CI);
     }
 
     // Try to fold constant sub into select arguments.
@@ -608,86 +580,80 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     // C - zext(bool) -> bool ? C - 1 : C
     if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
-      if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+      if (ZI->getSrcTy()->isIntegerTy(1))
         return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+
+    // C-(X+C2) --> (C-C2)-X
+    ConstantInt *C2;
+    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+      return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
   }
 
-  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-    if (Op1I->getOpcode() == Instruction::Add) {
-      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
-                                         I.getName());
-      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
-                                         I.getName());
-      else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
-        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
-          // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::CreateSub(
-            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
-      }
+  
+  { Value *Y;
+    // X-(X+Y) == -Y    X-(Y+X) == -Y
+    if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+        match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+      return BinaryOperator::CreateNeg(Y);
+    
+    // (X-Y)-X == -Y
+    if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+      return BinaryOperator::CreateNeg(Y);
+  }
+  
+  if (Op1->hasOneUse()) {
+    Value *X = 0, *Y = 0, *Z = 0;
+    Constant *C = 0;
+    ConstantInt *CI = 0;
+
+    // (X - (Y - Z))  -->  (X + (Z - Y)).
+    if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+      return BinaryOperator::CreateAdd(Op0,
+                                      Builder->CreateSub(Z, Y, Op1->getName()));
+
+    // (X - (X & Y))   -->   (X & ~Y)
+    //
+    if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+        match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+      return BinaryOperator::CreateAnd(Op0,
+                                  Builder->CreateNot(Y, Y->getName() + ".not"));
+    
+    // 0 - (X sdiv C)  -> (X sdiv -C)
+    if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+        match(Op0, m_Zero()))
+      return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+    // 0 - (X << Y)  -> (-X << Y)   when X is freely negatable.
+    if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+      if (Value *XNeg = dyn_castNegVal(X))
+        return BinaryOperator::CreateShl(XNeg, Y);
+
+    // X - X*C --> X * (1-C)
+    if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+      Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+      return BinaryOperator::CreateMul(Op0, CP1);
     }
 
-    if (Op1I->hasOneUse()) {
-      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
-      // is not used by anyone else...
-      //
-      if (Op1I->getOpcode() == Instruction::Sub) {
-        // Swap the two operands of the subexpr...
-        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
-        Op1I->setOperand(0, IIOp1);
-        Op1I->setOperand(1, IIOp0);
-
-        // Create the new top level add instruction...
-        return BinaryOperator::CreateAdd(Op0, Op1);
-      }
-
-      // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
-      //
-      if (Op1I->getOpcode() == Instruction::And &&
-          (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
-        Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
-
-        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
-        return BinaryOperator::CreateAnd(Op0, NewNot);
-      }
-
-      // 0 - (X sdiv C)  -> (X sdiv -C)
-      if (Op1I->getOpcode() == Instruction::SDiv)
-        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-          if (CSI->isZero())
-            if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
-              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
-                                          ConstantExpr::getNeg(DivRHS));
-
-      // 0 - (C << X)  -> (-C << X)
-      if (Op1I->getOpcode() == Instruction::Shl)
-        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-          if (CSI->isZero())
-            if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0)))
-              return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1));
-
-      // X - X*C --> X * (1-C)
-      ConstantInt *C2 = 0;
-      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
-        Constant *CP1 = 
-          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
-                                             C2);
-        return BinaryOperator::CreateMul(Op0, CP1);
-      }
+    // X - X<<C --> X * (1-(1<<C))
+    if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+      Constant *One = ConstantInt::get(I.getType(), 1);
+      C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+      return BinaryOperator::CreateMul(Op0, C);
     }
-  }
-
-  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-    if (Op0I->getOpcode() == Instruction::Add) {
-      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
-        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
-      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
-        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
-    } else if (Op0I->getOpcode() == Instruction::Sub) {
-      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
-                                         I.getName());
+    
+    // X - A*-B -> X + A*B
+    // X - -A*B -> X + A*B
+    Value *A, *B;
+    if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+        match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+      return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+      
+    // X - A*CI -> X + A*-CI
+    // X - CI*A -> X + A*-CI
+    if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+        match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+      Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+      return BinaryOperator::CreateAdd(Op0, NewMul);
     }
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 19a05bfe9bba..b6b6b84d9647 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -172,7 +172,9 @@ static Value *getFCmpValue(bool isordered, unsigned code,
   case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
   case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
   case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
-  case 7: return ConstantInt::getTrue(LHS->getContext());
+  case 7: 
+    if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+    Pred = FCmpInst::FCMP_ORD; break;
   }
   return Builder->CreateFCmp(Pred, LHS, RHS);
 }
@@ -207,15 +209,26 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     }
     break;
   case Instruction::Or:
-    if (Together == AndRHS) // (X | C) & C --> C
-      return ReplaceInstUsesWith(TheAnd, AndRHS);
-
-    if (Op->hasOneUse() && Together != OpRHS) {
-      // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Value *Or = Builder->CreateOr(X, Together);
-      Or->takeName(Op);
-      return BinaryOperator::CreateAnd(Or, AndRHS);
+    if (Op->hasOneUse()){
+      if (Together != OpRHS) {
+        // (X | C1) & C2 --> (X | (C1&C2)) & C2
+        Value *Or = Builder->CreateOr(X, Together);
+        Or->takeName(Op);
+        return BinaryOperator::CreateAnd(Or, AndRHS);
+      }
+      
+      ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+      if (TogetherCI && !TogetherCI->isZero()){
+        // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+        // NOTE: This reduces the number of bits set in the & mask, which
+        // can expose opportunities for store narrowing.
+        Together = ConstantExpr::getXor(AndRHS, Together);
+        Value *And = Builder->CreateAnd(X, Together);
+        And->takeName(Op);
+        return BinaryOperator::CreateOr(And, OpRHS);
+      }
     }
+    
     break;
   case Instruction::Add:
     if (Op->hasOneUse()) {
@@ -261,10 +274,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
                                        AndRHS->getValue() & ShlMask);
 
-    if (CI->getValue() == ShlMask) { 
-    // Masking out bits that the shift already masks
+    if (CI->getValue() == ShlMask)
+      // Masking out bits that the shift already masks.
       return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
-    } else if (CI != AndRHS) {                  // Reducing bits set in and.
+    
+    if (CI != AndRHS) {                  // Reducing bits set in and.
       TheAnd.setOperand(1, CI);
       return &TheAnd;
     }
@@ -281,10 +295,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     ConstantInt *CI = ConstantInt::get(Op->getContext(),
                                        AndRHS->getValue() & ShrMask);
 
-    if (CI->getValue() == ShrMask) {   
-    // Masking out bits that the shift already masks.
+    if (CI->getValue() == ShrMask)
+      // Masking out bits that the shift already masks.
       return ReplaceInstUsesWith(TheAnd, Op);
-    } else if (CI != AndRHS) {
+    
+    if (CI != AndRHS) {
       TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
       return &TheAnd;
     }
@@ -434,6 +449,270 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
 }
 
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is 
+/// described as the "AMask" or "BMask" part of the enum. If the enum 
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only 
+/// if (A & B) == A, or all bits of A are set in B.
+///   Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only 
+/// if (A & B) == 0, or all bits of A are cleared in B.
+///   Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not 
+/// contain any number of one bits and zero bits.
+///   Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+///   Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+///    (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+///    (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+  FoldMskICmp_AMask_AllOnes           =     1,
+  FoldMskICmp_AMask_NotAllOnes        =     2,
+  FoldMskICmp_BMask_AllOnes           =     4,
+  FoldMskICmp_BMask_NotAllOnes        =     8,
+  FoldMskICmp_Mask_AllZeroes          =    16,
+  FoldMskICmp_Mask_NotAllZeroes       =    32,
+  FoldMskICmp_AMask_Mixed             =    64,
+  FoldMskICmp_AMask_NotMixed          =   128,
+  FoldMskICmp_BMask_Mixed             =   256,
+  FoldMskICmp_BMask_NotMixed          =   512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 
+                                    ICmpInst::Predicate SCC)
+{
+  ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+  bool icmp_abit = (ACst != 0 && !ACst->isZero() && 
+                    ACst->getValue().isPowerOf2());
+  bool icmp_bbit = (BCst != 0 && !BCst->isZero() && 
+                    BCst->getValue().isPowerOf2());
+  unsigned result = 0;
+  if (CCst != 0 && CCst->isZero()) {
+    // if C is zero, then both A and B qualify as mask
+    result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_AMask_Mixed |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_AMask_NotMixed |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+                            FoldMskICmp_AMask_NotMixed) 
+                         : (FoldMskICmp_AMask_AllOnes |
+                            FoldMskICmp_AMask_Mixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_BMask_AllOnes |
+                            FoldMskICmp_BMask_Mixed));
+    return result;
+  }
+  if (A == C) {
+    result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+                          FoldMskICmp_AMask_Mixed)
+                       : (FoldMskICmp_AMask_NotAllOnes |
+                          FoldMskICmp_AMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_AMask_NotMixed)
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_AMask_Mixed));
+  }
+  else if (ACst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(ACst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+                       : FoldMskICmp_AMask_NotMixed);
+  }
+  if (B == C) 
+  {
+    result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_BMask_NotAllOnes |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_BMask_Mixed));
+  }
+  else if (BCst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(BCst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+                       : FoldMskICmp_BMask_NotMixed);
+  }
+  return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
+                                             Value*& B, Value*& C,
+                                             Value*& D, Value*& E,
+                                             ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+  if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+  if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+  if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+  // vectors are not (yet?) supported
+  if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+  // Here comes the tricky part:
+  // LHS might be of the form L11 & L12 == X, X == L21 & L22, 
+  // and L11 & L12 == L21 & L22. The same goes for RHS.
+  // Now we must find those components L** and R**, that are equal, so
+  // that we can extract the parameters A, B, C, D, and E for the canonical 
+  // above.
+  Value *L1 = LHS->getOperand(0);
+  Value *L2 = LHS->getOperand(1);
+  Value *L11,*L12,*L21,*L22;
+  if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+    if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+      L21 = L22 = 0;
+  }
+  else {
+    if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+      return 0;
+    std::swap(L1, L2);
+    L21 = L22 = 0;
+  }
+
+  Value *R1 = RHS->getOperand(0);
+  Value *R2 = RHS->getOperand(1);
+  Value *R11,*R12;
+  bool ok = false;
+  if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+      A = R11; D = R12; E = R2; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R2; ok = true;
+    }
+  }
+  if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+       A = R11; D = R12; E = R1; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R1; ok = true;
+    }
+    else
+      return 0;
+  }
+  if (!ok)
+    return 0;
+
+  if (L11 == A) {
+    B = L12; C = L2;
+  }
+  else if (L12 == A) {
+    B = L11; C = L2;
+  }
+  else if (L21 == A) {
+    B = L22; C = L1;
+  }
+  else if (L22 == A) {
+    B = L21; C = L1;
+  }
+
+  unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+  unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+  return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+                                     ICmpInst::Predicate NEWCC,
+                                     llvm::InstCombiner::BuilderTy* Builder) {
+  Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+  unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+  if (mask == 0) return 0;
+
+  if (NEWCC == ICmpInst::ICMP_NE)
+    mask >>= 1; // treat "Not"-states as normal states
+
+  if (mask & FoldMskICmp_Mask_AllZeroes) {
+    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 
+    // -> (icmp eq (A & (B|D)), 0)
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    // we can't use C as zero, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp ne (A & D), D) 
+    // with B and D, having a single bit set
+    Value* zero = Constant::getNullValue(A->getType());
+    return Builder->CreateICmp(NEWCC, newAnd, zero);
+  }
+  else if (mask & FoldMskICmp_BMask_AllOnes) {
+    // (icmp eq (A & B), B) & (icmp eq (A & D), D) 
+    // -> (icmp eq (A & (B|D)), (B|D))
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr);
+  }     
+  else if (mask & FoldMskICmp_AMask_AllOnes) {
+    // (icmp eq (A & B), A) & (icmp eq (A & D), A) 
+    // -> (icmp eq (A & (B&D)), A)
+    Value* newAnd1 = Builder->CreateAnd(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newAnd1);
+    return Builder->CreateICmp(NEWCC, newAnd, A);
+  }
+  else if (mask & FoldMskICmp_BMask_Mixed) {
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E) 
+    // We already know that B & C == C && D & E == E.
+    // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+    // C and E, which are shared by both the mask B and the mask D, don't
+    // contradict, then we can transform to
+    // -> (icmp eq (A & (B|D)), (C|E))
+    // Currently, we only handle the case of B, C, D, and E being constant.
+    ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+    if (BCst == 0) return 0;
+    ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+    if (DCst == 0) return 0;
+    // we can't simply use C and E, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp eq (A & D), D) 
+    // with B and D, having a single bit set
+
+    ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+    if (CCst == 0) return 0;
+    if (LHS->getPredicate() != NEWCC)
+      CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+    ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+    if (ECst == 0) return 0;
+    if (RHS->getPredicate() != NEWCC)
+      ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+    ConstantInt* MCst = dyn_cast<ConstantInt>(
+      ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+                           ConstantExpr::getXor(CCst, ECst)) );
+    // if there is a conflict we should actually return a false for the
+    // whole construct
+    if (!MCst->isZero())
+      return 0;
+    Value *newOr1 = Builder->CreateOr(B, D);
+    Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+    Value *newAnd = Builder->CreateAnd(A, newOr1);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+  }
+  return 0;
+}
+
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -451,6 +730,10 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
+
+  // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+    return V;
   
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -472,22 +755,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
-    
-    // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
-    // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
-    if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
-      Value *Op1 = 0, *Op2 = 0;
-      ConstantInt *CI1 = 0, *CI2 = 0;
-      if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
-          match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
-        if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
-            CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
-          Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
-          Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
-          return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
-        }
-      }
-    }
   }
   
   // From here on, we only handle:
@@ -712,12 +979,16 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
 
 
 Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Value *V = SimplifyAndInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
+  // (A|B)&(A|C) -> A|(B&C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -725,7 +996,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
     const APInt &AndRHSMask = AndRHS->getValue();
-    APInt NotAndRHS(~AndRHSMask);
 
     // Optimize a variety of ((val OP C1) & C2) combinations...
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
@@ -734,10 +1004,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       switch (Op0I->getOpcode()) {
       default: break;
       case Instruction::Xor:
-      case Instruction::Or:
+      case Instruction::Or: {
         // If the mask is only needed on one incoming arm, push it up.
         if (!Op0I->hasOneUse()) break;
           
+        APInt NotAndRHS(~AndRHSMask);
         if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
           // Not masking anything out for the LHS, move to RHS.
           Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
@@ -753,6 +1024,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         }
 
         break;
+      }
       case Instruction::Add:
         // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
         // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
@@ -772,14 +1044,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
         // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
         // has 1's for all bits that the subtraction with A might affect.
-        if (Op0I->hasOneUse()) {
+        if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
           uint32_t BitWidth = AndRHSMask.getBitWidth();
           uint32_t Zeros = AndRHSMask.countLeadingZeros();
           APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
 
-          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
-          if (!(A && A->isZero()) &&               // avoid infinite recursion.
-              MaskedValueIsZero(Op0LHS, Mask)) {
+          if (MaskedValueIsZero(Op0LHS, Mask)) {
             Value *NewNeg = Builder->CreateNeg(Op0RHS);
             return BinaryOperator::CreateAnd(NewNeg, AndRHS);
           }
@@ -797,39 +1067,25 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         }
         break;
       }
-
+          
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
         if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
           return Res;
-    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
-      // If this is an integer truncation or change from signed-to-unsigned, and
-      // if the source is an and/or with immediate, transform it.  This
-      // frequently occurs for bitfield accesses.
-      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
-        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
-            CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
-            if (CastOp->getOpcode() == Instruction::And) {
-              // Change: and (cast (and X, C1) to T), C2
-              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
-              // This will fold the two constants together, which may allow 
-              // other simplifications.
-              Value *NewCast = Builder->CreateTruncOrBitCast(
-                CastOp->getOperand(0), I.getType(), 
-                CastOp->getName()+".shrunk");
-              // trunc_or_bitcast(C1)&C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              C3 = ConstantExpr::getAnd(C3, AndRHS);
-              return BinaryOperator::CreateAnd(NewCast, C3);
-            } else if (CastOp->getOpcode() == Instruction::Or) {
-              // Change: and (cast (or X, C1) to T), C2
-              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
-              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
-              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
-                // trunc(C1)&C2
-                return ReplaceInstUsesWith(I, AndRHS);
-            }
-          }
+    }
+    
+    // If this is an integer truncation, and if the source is an 'and' with
+    // immediate, transform it.  This frequently occurs for bitfield accesses.
+    {
+      Value *X = 0; ConstantInt *YC = 0;
+      if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+        // Change: and (trunc (and X, YC) to T), C2
+        // into  : and (trunc X to T), trunc(YC) & C2
+        // This will fold the two constants together, which may allow 
+        // other simplifications.
+        Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+        Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+        C3 = ConstantExpr::getAnd(C3, AndRHS);
+        return BinaryOperator::CreateAnd(NewCast, C3);
       }
     }
 
@@ -851,7 +1107,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                                       I.getName()+".demorgan");
         return BinaryOperator::CreateNot(Or);
       }
-
+  
   {
     Value *A = 0, *B = 0, *C = 0, *D = 0;
     // (A|B) & ~(A&B) -> A^B
@@ -884,7 +1140,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         cast<BinaryOperator>(Op1)->swapOperands();
         std::swap(A, B);
       }
-      if (A == Op0)                                // A&(A^B) -> A & ~B
+      // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+      // A is originally -1 (or a vector of -1 and undefs), then we enter
+      // an endless loop. By checking that A is non-constant we ensure that
+      // we will never get to the loop.
+      if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
         return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
     }
 
@@ -1160,7 +1420,12 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
-  
+
+  // handle (roughly):
+  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+    return V;
+
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
   ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1173,24 +1438,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
-  
-    // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) -->
-    // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
-    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
-      Value *Op1 = 0, *Op2 = 0;
-      ConstantInt *CI1 = 0, *CI2 = 0;
-      if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
-          match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
-        if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
-            CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
-          Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
-          Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
-          return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr);
-        }
-      }
-    }
   }
-  
+
+  // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+  //   iff C2 + CA == C1.
+  if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+    ConstantInt *AddCst;
+    if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+      if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+        return Builder->CreateICmpULE(Val, LHSCst);
+  }
+
   // From here on, we only handle:
   //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return 0;
@@ -1429,12 +1687,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
 }
 
 Instruction *InstCombiner::visitOr(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Value *V = SimplifyOrInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
+  // (A&B)|(A&C) -> A&(B|C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -1481,8 +1743,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
   if (match(Op0, m_Or(m_Value(), m_Value())) ||
       match(Op1, m_Or(m_Value(), m_Value())) ||
-      (match(Op0, m_Shift(m_Value(), m_Value())) &&
-       match(Op1, m_Shift(m_Value(), m_Value())))) {
+      (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+       match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
     if (Instruction *BSwap = MatchBSwap(I))
       return BSwap;
   }
@@ -1509,7 +1771,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   Value *C = 0, *D = 0;
   if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
       match(Op1, m_And(m_Value(B), m_Value(D)))) {
-    Value *V1 = 0, *V2 = 0, *V3 = 0;
+    Value *V1 = 0, *V2 = 0;
     C1 = dyn_cast<ConstantInt>(C);
     C2 = dyn_cast<ConstantInt>(D);
     if (C1 && C2) {  // (A & C1)|(B & C2)
@@ -1567,25 +1829,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         }
       }
     }
-    
-    // Check to see if we have any common things being and'ed.  If so, find the
-    // terms for V1 & (V2|V3).
-    if (Op0->hasOneUse() || Op1->hasOneUse()) {
-      V1 = 0;
-      if (A == B)      // (A & C)|(A & D) == A & (C|D)
-        V1 = A, V2 = C, V3 = D;
-      else if (A == D) // (A & C)|(B & A) == A & (B|C)
-        V1 = A, V2 = B, V3 = C;
-      else if (C == B) // (A & C)|(C & D) == C & (A|D)
-        V1 = C, V2 = A, V3 = D;
-      else if (C == D) // (A & C)|(B & C) == C & (A|B)
-        V1 = C, V2 = A, V3 = B;
-      
-      if (V1) {
-        Value *Or = Builder->CreateOr(V2, V3, "tmp");
-        return BinaryOperator::CreateAnd(V1, Or);
-      }
-    }
 
     // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants.
     // Don't do this for vector select idioms, the code generator doesn't handle
@@ -1667,65 +1910,69 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   
   // fold (or (cast A), (cast B)) -> (cast (or A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() &&
-            SrcTy->isIntOrIntVectorTy()) {
-          Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
-
-          if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
-              // Only do this if the casts both really cause code to be
-              // generated.
-              ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
-              ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
-            Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
-            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
-          }
-          
-          // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
-          // cast is otherwise not optimizable.  This happens for vector sexts.
-          if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
-            if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
-              if (Value *Res = FoldOrOfICmps(LHS, RHS))
-                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-          
-          // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
-          // cast is otherwise not optimizable.  This happens for vector sexts.
-          if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
-            if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
-              if (Value *Res = FoldOrOfFCmps(LHS, RHS))
-                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+    CastInst *Op1C = dyn_cast<CastInst>(Op1);
+    if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+        if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+            // Only do this if the casts both really cause code to be
+            // generated.
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
+        
+        // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
       }
+    }
+  }
+  
+  // Note: If we've gotten to the point of visiting the outer OR, then the
+  // inner one couldn't be simplified.  If it was a constant, then it won't
+  // be simplified by a later pass either, so we try swapping the inner/outer
+  // ORs in the hopes that we'll be able to simplify it this way.
+  // (X|C) | V --> (X|V) | C
+  if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+      match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+    Value *Inner = Builder->CreateOr(A, Op1);
+    Inner->takeName(Op0);
+    return BinaryOperator::CreateOr(Inner, C1);
   }
   
   return Changed ? &I : 0;
 }
 
 Instruction *InstCombiner::visitXor(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (isa<UndefValue>(Op1)) {
-    if (isa<UndefValue>(Op0))
-      // Handle undef ^ undef -> 0 special case. This is a common
-      // idiom (misuse).
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-    return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
-  }
+  if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A&B)^(A&C) -> A&(B^C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
 
-  // xor X, X = 0
-  if (Op0 == Op1)
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
-  if (I.getType()->isVectorTy())
-    if (isa<ConstantAggregateZero>(Op1))
-      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
   // Is this a ~ operation?
   if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -1844,15 +2091,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         return NV;
   }
 
-  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
-    if (X == Op1)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
-    if (X == Op0)
-      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-  
   BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
   if (Op1I) {
     Value *A, *B;
@@ -1865,10 +2103,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         I.swapOperands();     // Simplified below.
         std::swap(Op0, Op1);
       }
-    } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
-      return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
-    } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
-      return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
     } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
                Op1I->hasOneUse()){
       if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
@@ -1891,10 +2125,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         std::swap(A, B);
       if (B == Op1)                                  // (A|B)^B == A & ~B
         return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
-    } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
-      return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
-    } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
-      return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
     } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
                Op0I->hasOneUse()){
       if (A == Op1)                                        // (A&B)^A -> (B&A)^A
@@ -1932,29 +2162,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       if ((A == C && B == D) || (A == D && B == C)) 
         return BinaryOperator::CreateXor(A, B);
     }
-    
-    // (A & B)^(C & D)
-    if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
-        match(Op0I, m_And(m_Value(A), m_Value(B))) &&
-        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
-      // (X & Y)^(X & Y) -> (Y^Z) & X
-      Value *X = 0, *Y = 0, *Z = 0;
-      if (A == C)
-        X = A, Y = B, Z = D;
-      else if (A == D)
-        X = A, Y = B, Z = C;
-      else if (B == C)
-        X = B, Y = A, Z = D;
-      else if (B == D)
-        X = B, Y = A, Z = C;
-      
-      if (X) {
-        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
-        return BinaryOperator::CreateAnd(NewOp, X);
-      }
-    }
   }
-    
+
   // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0ebe3b45589e..8449f7b7982c 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 /// getPromotedType - Return the specified type promoted as it would be to pass
@@ -29,100 +30,10 @@ static const Type *getPromotedType(const Type *Ty) {
   return Ty;
 }
 
-/// EnforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-///
-static unsigned EnforceKnownAlignment(Value *V,
-                                      unsigned Align, unsigned PrefAlign) {
-
-  User *U = dyn_cast<User>(V);
-  if (!U) return Align;
-
-  switch (Operator::getOpcode(U)) {
-  default: break;
-  case Instruction::BitCast:
-    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-  case Instruction::GetElementPtr: {
-    // If all indexes are zero, it is just the alignment of the base pointer.
-    bool AllZeroOperands = true;
-    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
-      if (!isa<Constant>(*i) ||
-          !cast<Constant>(*i)->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-
-    if (AllZeroOperands) {
-      // Treat this like a bitcast.
-      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-    }
-    return Align;
-  }
-  case Instruction::Alloca: {
-    AllocaInst *AI = cast<AllocaInst>(V);
-    // If there is a requested alignment and if this is an alloca, round up.
-    if (AI->getAlignment() >= PrefAlign)
-      return AI->getAlignment();
-    AI->setAlignment(PrefAlign);
-    return PrefAlign;
-  }
-  }
-
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-    // If there is a large requested alignment and we can, bump up the alignment
-    // of the global.
-    if (GV->isDeclaration()) return Align;
-    
-    if (GV->getAlignment() >= PrefAlign)
-      return GV->getAlignment();
-    // We can only increase the alignment of the global if it has no alignment
-    // specified or if it is not assigned a section.  If it is assigned a
-    // section, the global could be densely packed with other objects in the
-    // section, increasing the alignment could cause padding issues.
-    if (!GV->hasSection() || GV->getAlignment() == 0)
-      GV->setAlignment(PrefAlign);
-    return GV->getAlignment();
-  }
-
-  return Align;
-}
-
-/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
-unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
-                                                  unsigned PrefAlign) {
-  assert(V->getType()->isPointerTy() &&
-         "GetOrEnforceKnownAlignment expects a pointer!");
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
-  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
-  unsigned TrailZ = KnownZero.countTrailingOnes();
-
-  // Avoid trouble with rediculously large TrailZ values, such as
-  // those computed from a null pointer.
-  TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
-  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-
-  // LLVM doesn't support alignments larger than this currently.
-  Align = std::min(Align, +Value::MaximumAlignment);
-
-  if (PrefAlign > Align)
-    Align = EnforceKnownAlignment(V, Align, PrefAlign);
-  
-    // We don't need to make any adjustment.
-  return Align;
-}
 
 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
-  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
-  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
+  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
   unsigned MinAlign = std::min(DstAlign, SrcAlign);
   unsigned CopyAlign = MI->getAlignment();
 
@@ -211,7 +122,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
 }
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
-  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
   if (MI->getAlignment() < Alignment) {
     MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
                                              Alignment, false));
@@ -234,7 +145,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
     const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
     
     Value *Dest = MI->getDest();
-    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
+    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
 
     // Alignment 0 is identity for alignment 1 for memset, but not store.
     if (Alignment == 0) Alignment = 1;
@@ -280,7 +193,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     // memmove/cpy/set of zero bytes is a noop.
     if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
-      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+      if (NumBytes->isNullValue())
+        return EraseInstFromFunction(CI);
 
       if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
         if (CI->getZExtValue() == 1) {
@@ -289,6 +203,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           // alignment is sufficient.
         }
     }
+    
+    // No other transformations apply to volatile transfers.
+    if (MI->isVolatile())
+      return 0;
 
     // If we have a memmove and the source operation is a constant global,
     // then the source and dest pointers can't alias, so we can change this
@@ -332,82 +250,73 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (!TD) break;
     
     const Type *ReturnTy = CI.getType();
-    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+    uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
 
     // Get to the real allocated thing and offset as fast as possible.
     Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
-    
+
+    uint64_t Offset = 0;
+    uint64_t Size = -1ULL;
+
+    // Try to look through constant GEPs.
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
+      if (!GEP->hasAllConstantIndices()) break;
+
+      // Get the current byte offset into the thing. Use the original
+      // operand in case we're looking through a bitcast.
+      SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+      Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
+                                    Ops.data(), Ops.size());
+
+      Op1 = GEP->getPointerOperand()->stripPointerCasts();
+
+      // Make sure we're not a constant offset from an external
+      // global.
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
+        if (!GV->hasDefinitiveInitializer()) break;
+    }
+
     // If we've stripped down to a single global variable that we
     // can know the size of then just return that.
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
       if (GV->hasDefinitiveInitializer()) {
         Constant *C = GV->getInitializer();
-        uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
-        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
+        Size = TD->getTypeAllocSize(C->getType());
       } else {
         // Can't determine size of the GV.
-        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+        Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
         return ReplaceInstUsesWith(CI, RetVal);
       }
     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
       // Get alloca size.
       if (AI->getAllocatedType()->isSized()) {
-        uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+        Size = TD->getTypeAllocSize(AI->getAllocatedType());
         if (AI->isArrayAllocation()) {
           const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
           if (!C) break;
-          AllocaSize *= C->getZExtValue();
+          Size *= C->getZExtValue();
         }
-        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
       }
     } else if (CallInst *MI = extractMallocCall(Op1)) {
+      // Get allocation size.
       const Type* MallocType = getMallocAllocatedType(MI);
-      // Get alloca size.
-      if (MallocType && MallocType->isSized()) {
-        if (Value *NElems = getMallocArraySize(MI, TD, true)) {
+      if (MallocType && MallocType->isSized())
+        if (Value *NElems = getMallocArraySize(MI, TD, true))
           if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
-        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy,
-               (NElements->getZExtValue() * TD->getTypeAllocSize(MallocType))));
-        }
-      }
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {      
-      // Only handle constant GEPs here.
-      if (CE->getOpcode() != Instruction::GetElementPtr) break;
-      GEPOperator *GEP = cast<GEPOperator>(CE);
-      
-      // Make sure we're not a constant offset from an external
-      // global.
-      Value *Operand = GEP->getPointerOperand();
-      Operand = Operand->stripPointerCasts();
-      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
-        if (!GV->hasDefinitiveInitializer()) break;
-        
-      // Get what we're pointing to and its size. 
-      const PointerType *BaseType = 
-        cast<PointerType>(Operand->getType());
-      uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
-      
-      // Get the current byte offset into the thing. Use the original
-      // operand in case we're looking through a bitcast.
-      SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
-      const PointerType *OffsetType =
-        cast<PointerType>(GEP->getPointerOperand()->getType());
-      uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
-
-      if (Size < Offset) {
-        // Out of bound reference? Negative index normalized to large
-        // index? Just return "I don't know".
-        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-        return ReplaceInstUsesWith(CI, RetVal);
-      }
-      
-      Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
-      return ReplaceInstUsesWith(CI, RetVal);
-    } 
+            Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType);
+    }
 
     // Do not return "I don't know" here. Later optimization passes could
     // make it possible to evaluate objectsize to a constant.
-    break;
+    if (Size == -1ULL)
+      break;
+
+    if (Size < Offset) {
+      // Out of bound reference? Negative index normalized to large
+      // index? Just return "I don't know".
+      return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
+    }
+    return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
   }
   case Intrinsic::bswap:
     // bswap(bswap(x)) -> x
@@ -604,7 +513,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_loadu_dq:
     // Turn PPC lvx     -> load if the pointer is known aligned.
     // Turn X86 loadups -> load if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
@@ -613,7 +522,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
     // Turn stvx -> store if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
       const Type *OpPtrTy = 
         PointerType::getUnqual(II->getArgOperand(0)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -624,16 +533,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_storeu_pd:
   case Intrinsic::x86_sse2_storeu_dq:
     // Turn X86 storeu -> store if the pointer is known aligned.
-    if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
       const Type *OpPtrTy = 
         PointerType::getUnqual(II->getArgOperand(1)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
       return new StoreInst(II->getArgOperand(1), Ptr);
     }
     break;
-    
-  case Intrinsic::x86_sse_cvttss2si: {
-    // These intrinsics only demands the 0th element of its input vector.  If
+
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64: {
+    // These intrinsics only demand the 0th element of their input vectors. If
     // we can simplify the input based on that, do so now.
     unsigned VWidth =
       cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
@@ -646,7 +562,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
     break;
   }
-    
+
   case Intrinsic::ppc_altivec_vperm:
     // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
     if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
@@ -697,6 +613,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
     break;
 
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+    unsigned AlignArg = II->getNumArgOperands() - 1;
+    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+      II->setArgOperand(AlignArg,
+                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                         MemAlign, false));
+      return II;
+    }
+    break;
+  }
+
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
@@ -783,6 +725,8 @@ protected:
     NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
   }
   bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+      return true;
     if (ConstantInt *SizeCI =
                            dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
       if (SizeCI->isAllOnesValue())
@@ -819,11 +763,11 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
   bool Changed = false;
 
-  // If the callee is a constexpr cast of a function, attempt to move the cast
-  // to the arguments of the call/invoke.
-  if (transformConstExprCastCall(CS)) return 0;
-
+  // If the callee is a pointer to a function, attempt to move any casts to the
+  // arguments of the call/invoke.
   Value *Callee = CS.getCalledValue();
+  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+    return 0;
 
   if (Function *CalleeF = dyn_cast<Function>(Callee))
     // If the call and callee calling conventions don't match, this call must
@@ -917,12 +861,10 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
 // attempt to move the cast to the arguments of the call/invoke.
 //
 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
-  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
-  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
-  if (CE->getOpcode() != Instruction::BitCast || 
-      !isa<Function>(CE->getOperand(0)))
+  Function *Callee =
+    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+  if (Callee == 0)
     return false;
-  Function *Callee = cast<Function>(CE->getOperand(0));
   Instruction *Caller = CS.getInstruction();
   const AttrListPtr &CallerPAL = CS.getAttributes();
 
@@ -984,9 +926,22 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (!CastInst::isCastable(ActTy, ParamTy))
       return false;   // Cannot transform this parameter value.
 
-    if (CallerPAL.getParamAttributes(i + 1) 
-        & Attribute::typeIncompatible(ParamTy))
+    unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+    if (Attrs & Attribute::typeIncompatible(ParamTy))
       return false;   // Attribute not compatible with transformed value.
+    
+    // If the parameter is passed as a byval argument, then we have to have a
+    // sized type and the sized type has to have the same size as the old type.
+    if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+      const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+        return false;
+      
+      const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+      if (TD->getTypeAllocSize(CurElTy) !=
+          TD->getTypeAllocSize(ParamPTy->getElementType()))
+        return false;
+    }
 
     // Converting from one pointer type to another or between a pointer and an
     // integer of the same size is safe even if we do not have a body.
@@ -1109,8 +1064,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   Value *NV = NC;
   if (OldRetTy != NV->getType() && !Caller->use_empty()) {
     if (!NV->getType()->isVoidTy()) {
-      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
-                                                            OldRetTy, false);
+      Instruction::CastOps opcode =
+        CastInst::getCastOpcode(NC, false, OldRetTy, false);
       NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
 
       // If this is an invoke instruction, we should insert it after the first
@@ -1119,7 +1074,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
         InsertNewInstBefore(NC, *I);
       } else {
-        // Otherwise, it's a call, just insert cast right after the call instr
+        // Otherwise, it's a call, just insert cast right after the call.
         InsertNewInstBefore(NC, *Caller);
       }
       Worklist.AddUsersToWorkList(*Caller);
@@ -1128,7 +1083,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     }
   }
 
-
   if (!Caller->use_empty())
     Caller->replaceAllUsesWith(NV);
   
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 79a9b09c64d0..b432641a1403 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -462,8 +462,8 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   
   // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
   Value *A = 0; ConstantInt *Cst = 0;
-  if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
-      Src->hasOneUse()) {
+  if (Src->hasOneUse() &&
+      match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
     // We have three types to worry about here, the type of A, the source of
     // the truncate (MidSize), and the destination of the truncate. We know that
     // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
@@ -482,6 +482,16 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     Shift->takeName(Src);
     return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
   }
+  
+  // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+  // type isn't non-native.
+  if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+      ShouldChangeType(Src->getType(), CI.getType()) &&
+      match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+    Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+    return BinaryOperator::CreateAnd(NewTrunc,
+                                     ConstantExpr::getTrunc(Cst, CI.getType()));
+  }
 
   return 0;
 }
@@ -1019,8 +1029,22 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     }
   }
   }
-  
-  
+
+  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
+  if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
+    ICmpInst::Predicate Pred; Value *CmpLHS;
+    if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
+      if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
+        const Type *EltTy = VTy->getElementType();
+
+        // splat the shift constant to a constant vector.
+        Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+        Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
+        return ReplaceInstUsesWith(CI, In);
+      }
+    }
+  }
+
   // If the input is a shl/ashr pair of a same constant, then this is a sign
   // extension from a smaller value.  If we could trust arbitrary bitwidth
   // integers, we could turn this into a truncate to the smaller bit and then
@@ -1363,8 +1387,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
                        ConstantInt::get(Int32Ty, SrcElts));
   }
   
-  Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
-  return new ShuffleVectorInst(InVal, V2, Mask);
+  return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
 }
 
 static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d7e2b72b7fac..999de3409750 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -22,13 +22,17 @@
 using namespace llvm;
 using namespace PatternMatch;
 
+static ConstantInt *getOne(Constant *C) {
+  return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
 /// AddOne - Add one to a ConstantInt
 static Constant *AddOne(Constant *C) {
   return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
 }
 /// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C) {
-  return ConstantExpr::getSub(C,  ConstantInt::get(C->getType(), 1));
+static Constant *SubOne(Constant *C) {
+  return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
 }
 
 static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
@@ -160,8 +164,8 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
   Max = KnownOne|UnknownBits;
   
   if (UnknownBits.isNegative()) { // Sign bit is unknown
-    Min.set(Min.getBitWidth()-1);
-    Max.clear(Max.getBitWidth()-1);
+    Min.setBit(Min.getBitWidth()-1);
+    Max.clearBit(Max.getBitWidth()-1);
   }
 }
 
@@ -694,13 +698,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
   if (Pred == ICmpInst::ICMP_NE)
     return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
 
-  // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
-  bool isNUW = false, isNSW = false;
-  if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
-    isNUW = Add->hasNoUnsignedWrap();
-    isNSW = Add->hasNoSignedWrap();
-  }      
-  
   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
   // so the values can never be equal.  Similiarly for all other "or equals"
   // operators.
@@ -709,10 +706,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
   // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
   // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
   if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
-    // If this is an NUW add, then this is always false.
-    if (isNUW)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); 
-    
     Value *R = 
       ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
     return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
@@ -721,12 +714,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
   // (X+1) >u X        --> X <u (0-1)        --> X != 255
   // (X+2) >u X        --> X <u (0-2)        --> X <u 254
   // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
-  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
-    // If this is an NUW add, then this is always true.
-    if (isNUW)
-      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); 
+  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
     return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
-  }
   
   unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
   ConstantInt *SMax = ConstantInt::get(X->getContext(),
@@ -738,16 +727,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
   // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
   // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
   // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
-  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
-    // If this is an NSW add, then we have two cases: if the constant is
-    // positive, then this is always false, if negative, this is always true.
-    if (isNSW) {
-      bool isTrue = CI->getValue().isNegative();
-      return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
-    }
-    
+  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
     return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
-  }
   
   // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
   // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
@@ -756,13 +737,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
   // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
   
-  // If this is an NSW add, then we have two cases: if the constant is
-  // positive, then this is always true, if negative, this is always false.
-  if (isNSW) {
-    bool isTrue = !CI->getValue().isNegative();
-    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
-  }
-  
   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
   Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
@@ -782,7 +756,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
   // (x /u C1) <u C2.  Simply casting the operands and result won't 
   // work. :(  The if statement below tests that condition and bails 
-  // if it finds it. 
+  // if it finds it.
   bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
   if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
     return 0;
@@ -790,9 +764,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
     return 0; // The ProdOV computation fails on divide by zero.
   if (DivIsSigned && DivRHS->isAllOnesValue())
     return 0; // The overflow computation also screws up here
-  if (DivRHS->isOne())
-    return 0; // Not worth bothering, and eliminates some funny cases
-              // with INT_MIN.
+  if (DivRHS->isOne()) {
+    // This eliminates some funny cases with INT_MIN.
+    ICI.setOperand(0, DivI->getOperand(0));   // X/1 == X.
+    return &ICI;
+  }
 
   // Compute Prod = CI * DivRHS. We are essentially solving an equation
   // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
@@ -809,6 +785,10 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // Get the ICmp opcode
   ICmpInst::Predicate Pred = ICI.getPredicate();
 
+  /// If the division is known to be exact, then there is no remainder from the
+  /// divide, so the covered range size is unit, otherwise it is the divisor.
+  ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+  
   // Figure out the interval that is being checked.  For example, a comparison
   // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
   // Compute this interval based on the constants involved and the signedness of
@@ -818,38 +798,43 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
   int LoOverflow = 0, HiOverflow = 0;
   Constant *LoBound = 0, *HiBound = 0;
-  
+
   if (!DivIsSigned) {  // udiv
     // e.g. X/5 op 3  --> [15, 20)
     LoBound = Prod;
     HiOverflow = LoOverflow = ProdOV;
-    if (!HiOverflow)
-      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+    if (!HiOverflow) {
+      // If this is not an exact divide, then many values in the range collapse
+      // to the same result value.
+      HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+    }
+    
   } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
     if (CmpRHSV == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
-      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
-      HiBound = DivRHS;
+      LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+      HiBound = RangeSize;
     } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
       HiOverflow = LoOverflow = ProdOV;
       if (!HiOverflow)
-        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+        HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
     } else {                       // (X / pos) op neg
       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
       HiBound = AddOne(Prod);
       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow) {
-        ConstantInt* DivNeg =
-                         cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
         LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
-       }
+      }
     }
   } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+    if (DivI->isExact())
+      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
     if (CmpRHSV == 0) {       // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
-      LoBound = AddOne(DivRHS);
-      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+      LoBound = AddOne(RangeSize);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
       if (HiBound == DivRHS) {     // -INTMIN = INTMIN
         HiOverflow = 1;            // [INTMIN+1, overflow)
         HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
@@ -859,12 +844,12 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
       HiBound = AddOne(Prod);
       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow)
-        LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+        LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
     } else {                       // (X / neg) op neg
       LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
       LoOverflow = HiOverflow = ProdOV;
       if (!HiOverflow)
-        HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+        HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
     }
     
     // Dividing by a negative swaps the condition.  LT <-> GT
@@ -883,9 +868,8 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
     if (LoOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, HiBound);
-    return ReplaceInstUsesWith(ICI,
-                               InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
-                                               true));
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, true));
   case ICmpInst::ICMP_NE:
     if (LoOverflow && HiOverflow)
       return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
@@ -908,13 +892,100 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   case ICmpInst::ICMP_SGT:
     if (HiOverflow == +1)       // High bound greater than input range.
       return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
-    else if (HiOverflow == -1)  // High bound less than input range.
+    if (HiOverflow == -1)       // High bound less than input range.
       return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
     if (Pred == ICmpInst::ICMP_UGT)
       return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
-    else
-      return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+    return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+                                          ConstantInt *ShAmt) {
+  const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+  
+  // Check that the shift amount is in range.  If not, don't perform
+  // undefined shifts.  When the shift is visited it will be
+  // simplified.
+  uint32_t TypeBits = CmpRHSV.getBitWidth();
+  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+    return 0;
+  
+  if (!ICI.isEquality()) {
+    // If we have an unsigned comparison and an ashr, we can't simplify this.
+    // Similarly for signed comparisons with lshr.
+    if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+      return 0;
+    
+    // Otherwise, all lshr and all exact ashr's are equivalent to a udiv/sdiv by
+    // a power of 2.  Since we already have logic to simplify these, transform
+    // to div and then simplify the resultant comparison.
+    if (Shr->getOpcode() == Instruction::AShr &&
+        !Shr->isExact())
+      return 0;
+    
+    // Revisit the shift (to delete it).
+    Worklist.Add(Shr);
+    
+    Constant *DivCst =
+      ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+    
+    Value *Tmp =
+      Shr->getOpcode() == Instruction::AShr ?
+      Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+      Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+    
+    ICI.setOperand(0, Tmp);
+    
+    // If the builder folded the binop, just return it.
+    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+    if (TheDiv == 0)
+      return &ICI;
+    
+    // Otherwise, fold this div/compare.
+    assert(TheDiv->getOpcode() == Instruction::SDiv ||
+           TheDiv->getOpcode() == Instruction::UDiv);
+    
+    Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+    assert(Res && "This div/cst should have folded!");
+    return Res;
+  }
+  
+  
+  // If we are comparing against bits always shifted out, the
+  // comparison cannot succeed.
+  APInt Comp = CmpRHSV << ShAmtVal;
+  ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+  if (Shr->getOpcode() == Instruction::LShr)
+    Comp = Comp.lshr(ShAmtVal);
+  else
+    Comp = Comp.ashr(ShAmtVal);
+  
+  if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+    bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                     IsICMP_NE);
+    return ReplaceInstUsesWith(ICI, Cst);
+  }
+  
+  // Otherwise, check to see if the bits shifted out are known to be zero.
+  // If so, we can compare against the unshifted value:
+  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+  if (Shr->hasOneUse() && Shr->isExact())
+    return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+  
+  if (Shr->hasOneUse()) {
+    // Otherwise strength reduce the shift into an and.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+    
+    Value *And = Builder->CreateAnd(Shr->getOperand(0),
+                                    Mask, Shr->getName()+".mask");
+    return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
   }
+  return 0;
 }
 
 
@@ -939,8 +1010,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       // If all the high bits are known, we can do this xform.
       if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
         // Pull in the high bits from known-ones set.
-        APInt NewRHS(RHS->getValue());
-        NewRHS.zext(SrcBits);
+        APInt NewRHS = RHS->getValue().zext(SrcBits);
         NewRHS |= KnownOne;
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
                             ConstantInt::get(ICI.getContext(), NewRHS));
@@ -1022,10 +1092,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
              (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
           uint32_t BitWidth = 
             cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
-          APInt NewCST = AndCST->getValue();
-          NewCST.zext(BitWidth);
-          APInt NewCI = RHSV;
-          NewCI.zext(BitWidth);
+          APInt NewCST = AndCST->getValue().zext(BitWidth);
+          APInt NewCI = RHSV.zext(BitWidth);
           Value *NewAnd = 
             Builder->CreateAnd(Cast->getOperand(0),
                            ConstantInt::get(ICI.getContext(), NewCST),
@@ -1145,7 +1213,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
       // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
       // -> and (icmp eq P, null), (icmp eq Q, null).
-
       Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
                                         Constant::getNullValue(P->getType()));
       Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
@@ -1185,6 +1252,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         return ReplaceInstUsesWith(ICI, Cst);
       }
       
+      // If the shift is NUW, then it is just shifting out zeros, no need for an
+      // AND.
+      if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantExpr::getLShr(RHS, ShAmt));
+      
       if (LHSI->hasOneUse()) {
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1195,8 +1268,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         Value *And =
           Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
         return new ICmpInst(ICI.getPredicate(), And,
-                            ConstantInt::get(ICI.getContext(),
-                                             RHSV.lshr(ShAmtVal)));
+                            ConstantExpr::getLShr(RHS, ShAmt));
       }
     }
     
@@ -1205,8 +1277,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     if (LHSI->hasOneUse() &&
         isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
       // (X << 31) <s 0  --> (X&1) != 0
-      Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) <<
-                                           (TypeBits-ShAmt->getZExtValue()-1));
+      Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+                                        APInt::getOneBitSet(TypeBits, 
+                                            TypeBits-ShAmt->getZExtValue()-1));
       Value *And =
         Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
       return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
@@ -1216,57 +1289,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
   }
     
   case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-  case Instruction::AShr: {
+  case Instruction::AShr:
     // Only handle equality comparisons of shift-by-constant.
-    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!ShAmt || !ICI.isEquality()) break;
-
-    // Check that the shift amount is in range.  If not, don't perform
-    // undefined shifts.  When the shift is visited it will be
-    // simplified.
-    uint32_t TypeBits = RHSV.getBitWidth();
-    if (ShAmt->uge(TypeBits))
-      break;
-    
-    uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-      
-    // If we are comparing against bits always shifted out, the
-    // comparison cannot succeed.
-    APInt Comp = RHSV << ShAmtVal;
-    if (LHSI->getOpcode() == Instruction::LShr)
-      Comp = Comp.lshr(ShAmtVal);
-    else
-      Comp = Comp.ashr(ShAmtVal);
-    
-    if (Comp != RHSV) { // Comparing against a bit that we know is zero.
-      bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-      Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
-                                       IsICMP_NE);
-      return ReplaceInstUsesWith(ICI, Cst);
-    }
-    
-    // Otherwise, check to see if the bits shifted out are known to be zero.
-    // If so, we can compare against the unshifted value:
-    //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
-    if (LHSI->hasOneUse() &&
-        MaskedValueIsZero(LHSI->getOperand(0), 
-                          APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
-      return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                          ConstantExpr::getShl(RHS, ShAmt));
-    }
-      
-    if (LHSI->hasOneUse()) {
-      // Otherwise strength reduce the shift into an and.
-      APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-      Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
-      
-      Value *And = Builder->CreateAnd(LHSI->getOperand(0),
-                                      Mask, LHSI->getName()+".mask");
-      return new ICmpInst(ICI.getPredicate(), And,
-                          ConstantExpr::getShl(RHS, ShAmt));
-    }
+    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+      if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI),
+                                            ShAmt))
+        return Res;
     break;
-  }
     
   case Instruction::SDiv:
   case Instruction::UDiv:
@@ -1543,50 +1572,174 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
 
   // The re-extended constant changed so the constant cannot be represented 
   // in the shorter type. Consequently, we cannot emit a simple comparison.
+  // All the cases that fold to true or false will have already been handled
+  // by SimplifyICmpInst, so only deal with the tricky case.
 
-  // First, handle some easy cases. We know the result cannot be equal at this
-  // point so handle the ICI.isEquality() cases
-  if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
-  if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+  if (isSignedCmp || !isSignedExt)
+    return 0;
 
   // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
   // should have been folded away previously and not enter in here.
-  Value *Result;
-  if (isSignedCmp) {
-    // We're performing a signed comparison.
-    if (cast<ConstantInt>(CI)->getValue().isNegative())
-      Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false
-    else
-      Result = ConstantInt::getTrue(ICI.getContext());  // X < (large) --> true
-  } else {
-    // We're performing an unsigned comparison.
-    if (isSignedExt) {
-      // We're performing an unsigned comp with a sign extended value.
-      // This is true if the input is >= 0. [aka >s -1]
-      Constant *NegOne = Constant::getAllOnesValue(SrcTy);
-      Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
-    } else {
-      // Unsigned extend & unsigned compare -> always true.
-      Result = ConstantInt::getTrue(ICI.getContext());
-    }
-  }
+
+  // We're performing an unsigned comp with a sign extended value.
+  // This is true if the input is >= 0. [aka >s -1]
+  Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+  Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
 
   // Finally, return the value computed.
-  if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
-      ICI.getPredicate() == ICmpInst::ICMP_SLT)
+  if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
     return ReplaceInstUsesWith(ICI, Result);
 
-  assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || 
-          ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
-         "ICmp should be folded!");
-  if (Constant *CI = dyn_cast<Constant>(Result))
-    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+  assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
   return BinaryOperator::CreateNot(Result);
 }
 
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+///   I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+///   sum = a + b
+///   if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+                                          ConstantInt *CI2, ConstantInt *CI1,
+                                          InstCombiner &IC) {
+  // The transformation we're trying to do here is to transform this into an
+  // llvm.sadd.with.overflow.  To do this, we have to replace the original add
+  // with a narrower add, and discard the add-with-constant that is part of the
+  // range check (if we can't eliminate it, this isn't profitable).
+  
+  // In order to eliminate the add-with-constant, the compare can be its only
+  // use.
+  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+  if (!AddWithCst->hasOneUse()) return 0;
+  
+  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+  if (!CI2->getValue().isPowerOf2()) return 0;
+  unsigned NewWidth = CI2->getValue().countTrailingZeros();
+  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+    
+  // The width of the new add formed is 1 more than the bias.
+  ++NewWidth;
+  
+  // Check to see that CI1 is an all-ones value with NewWidth bits.
+  if (CI1->getBitWidth() == NewWidth ||
+      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+    return 0;
+  
+  // In order to replace the original add with a narrower 
+  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+  // and truncates that discard the high bits of the add.  Verify that this is
+  // the case.
+  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+  for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+       UI != E; ++UI) {
+    if (*UI == AddWithCst) continue;
+    
+    // Only accept truncates for now.  We would really like a nice recursive
+    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+    // chain to see which bits of a value are actually demanded.  If the
+    // original add had another add which was then immediately truncated, we
+    // could still do the transformation.
+    TruncInst *TI = dyn_cast<TruncInst>(*UI);
+    if (TI == 0 ||
+        TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+  }
+  
+  // If the pattern matches, truncate the inputs to the narrower type and
+  // use the sadd_with_overflow intrinsic to efficiently compute both the
+  // result and the overflow bit.
+  Module *M = I.getParent()->getParent()->getParent();
+  
+  const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+                                       &NewType, 1);
+
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+  
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  Builder->SetInsertPoint(OrigAdd);
+  
+  Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+  Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+  CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+  Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+  
+  // The inner add was the result of the narrow add, zero extended to the
+  // wider type.  Replace it with the result computed by the intrinsic.
+  IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+  
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+                                     InstCombiner &IC) {
+  // Don't bother doing this transformation for pointers, don't do it for
+  // vectors.
+  if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+  
+  // If the add is a constant expr, then we don't bother transforming it.
+  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+  if (OrigAdd == 0) return 0;
+  
+  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+  
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+  Builder->SetInsertPoint(OrigAdd);
+
+  Module *M = I.getParent()->getParent()->getParent();
+  const Type *Ty = LHS->getType();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0);
 
+  IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded.  This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+                                 unsigned BitWidth, bool isSignCheck) {
+  if (isSignCheck)
+    return APInt::getSignBit(BitWidth);
+  
+  ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+  if (!CI) return APInt::getAllOnesValue(BitWidth);
+  const APInt &RHS = CI->getValue();
+  
+  switch (I.getPredicate()) {
+  // For a UGT comparison, we don't care about any bits that 
+  // correspond to the trailing ones of the comparand.  The value of these
+  // bits doesn't impact the outcome of the comparison, because any value
+  // greater than the RHS must differ in a bit higher than these due to carry.
+  case ICmpInst::ICMP_UGT: {
+    unsigned trailingOnes = RHS.countTrailingOnes();
+    APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+    return ~lowBitsSet;
+  }
+  
+  // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+  // Any value less than the RHS must differ in a higher bit because of carries.
+  case ICmpInst::ICMP_ULT: {
+    unsigned trailingZeros = RHS.countTrailingZeros();
+    APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+    return ~lowBitsSet;
+  }
+  
+  default:
+    return APInt::getAllOnesValue(BitWidth);
+  }
+  
+}
 
 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
@@ -1649,17 +1802,37 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   }
 
   unsigned BitWidth = 0;
-  if (TD)
-    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
-  else if (Ty->isIntOrIntVectorTy())
+  if (Ty->isIntOrIntVectorTy())
     BitWidth = Ty->getScalarSizeInBits();
-
+  else if (TD)  // Pointers require TD info to get their size.
+    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+  
   bool isSignBit = false;
 
   // See if we are doing a comparison with a constant.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
     Value *A = 0, *B = 0;
     
+    // Match the following pattern, which is a common idiom when writing
+    // overflow-safe integer arithmetic function.  The source performs an
+    // addition in wider type, and explicitly checks for overflow using
+    // comparisons against INT_MIN and INT_MAX.  Simplify this by using the
+    // sadd_with_overflow intrinsic.
+    //
+    // TODO: This could probably be generalized to handle other overflow-safe
+    // operations if we worked out the formulas to compute the appropriate 
+    // magic constants.
+    // 
+    // sum = a + b
+    // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
+    {
+    ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
+    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+        match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+      if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+        return Res;
+    }
+    
     // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
     if (I.isEquality() && CI->isZero() &&
         match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
@@ -1704,8 +1877,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
 
     if (SimplifyDemandedBits(I.getOperandUse(0),
-                             isSignBit ? APInt::getSignBit(BitWidth)
-                                       : APInt::getAllOnesValue(BitWidth),
+                             DemandedBitsLHSMask(I, BitWidth, isSignBit),
                              Op0KnownZero, Op0KnownOne, 0))
       return &I;
     if (SimplifyDemandedBits(I.getOperandUse(1),
@@ -1744,14 +1916,80 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // simplify this comparison.  For example, (x&4) < 8  is always true.
     switch (I.getPredicate()) {
     default: llvm_unreachable("Unknown icmp opcode!");
-    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_EQ: {
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
         return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        
+      // If all bits are known zero except for one, then we know at most one
+      // bit is set.   If the comparison is against zero, then this is a check
+      // to see if *that* bit is set.
+      APInt Op0KnownZeroInverted = ~Op0KnownZero;
+      if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+        // If the LHS is an AND with the same constant, look through it.
+        Value *LHS = 0;
+        ConstantInt *LHSC = 0;
+        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+            LHSC->getValue() != Op0KnownZeroInverted)
+          LHS = Op0;
+        
+        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+        // then turn "((1 << x)&8) == 0" into "x != 3".
+        Value *X = 0;
+        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+          unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+          return new ICmpInst(ICmpInst::ICMP_NE, X,
+                              ConstantInt::get(X->getType(), CmpVal));
+        }
+        
+        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+        // then turn "((8 >>u x)&1) == 0" into "x != 3".
+        const APInt *CI;
+        if (Op0KnownZeroInverted == 1 &&
+            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+          return new ICmpInst(ICmpInst::ICMP_NE, X,
+                              ConstantInt::get(X->getType(),
+                                               CI->countTrailingZeros()));
+      }
+        
       break;
-    case ICmpInst::ICMP_NE:
+    }
+    case ICmpInst::ICMP_NE: {
       if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
         return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      
+      // If all bits are known zero except for one, then we know at most one
+      // bit is set.   If the comparison is against zero, then this is a check
+      // to see if *that* bit is set.
+      APInt Op0KnownZeroInverted = ~Op0KnownZero;
+      if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+        // If the LHS is an AND with the same constant, look through it.
+        Value *LHS = 0;
+        ConstantInt *LHSC = 0;
+        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+            LHSC->getValue() != Op0KnownZeroInverted)
+          LHS = Op0;
+        
+        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+        // then turn "((1 << x)&8) != 0" into "x == 3".
+        Value *X = 0;
+        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+          unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+          return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                              ConstantInt::get(X->getType(), CmpVal));
+        }
+        
+        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+        // then turn "((8 >>u x)&1) != 0" into "x == 3".
+        const APInt *CI;
+        if (Op0KnownZeroInverted == 1 &&
+            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+          return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                              ConstantInt::get(X->getType(),
+                                               CI->countTrailingZeros()));
+      }
+      
       break;
+    }
     case ICmpInst::ICMP_ULT:
       if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
         return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
@@ -1894,7 +2132,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         // block.  If in the same block, we're encouraging jump threading.  If
         // not, we are just pessimizing the code by making an i1 phi.
         if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I, true))
+          if (Instruction *NV = FoldOpIntoPhi(I))
             return NV;
         break;
       case Instruction::Select: {
@@ -1995,79 +2233,163 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       if (Instruction *R = visitICmpInstWithCastAndCast(I))
         return R;
   }
-  
-  // See if it's the same type of instruction on the left and right.
-  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
-    if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
-      if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
-          Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
-        switch (Op0I->getOpcode()) {
-        default: break;
-        case Instruction::Add:
-        case Instruction::Sub:
-        case Instruction::Xor:
-          if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
-            return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
-                                Op1I->getOperand(0));
-          // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
-            if (CI->getValue().isSignBit()) {
-              ICmpInst::Predicate Pred = I.isSigned()
-                                             ? I.getUnsignedPredicate()
-                                             : I.getSignedPredicate();
-              return new ICmpInst(Pred, Op0I->getOperand(0),
-                                  Op1I->getOperand(0));
-            }
-            
-            if (CI->getValue().isMaxSignedValue()) {
-              ICmpInst::Predicate Pred = I.isSigned()
-                                             ? I.getUnsignedPredicate()
-                                             : I.getSignedPredicate();
-              Pred = I.getSwappedPredicate(Pred);
-              return new ICmpInst(Pred, Op0I->getOperand(0),
-                                  Op1I->getOperand(0));
-            }
+
+  // Special logic for binary operators.
+  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+  if (BO0 || BO1) {
+    CmpInst::Predicate Pred = I.getPredicate();
+    bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+    if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+      NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+    if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+      NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+    // Analyze the case when either Op0 or Op1 is an add instruction.
+    // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    if (BO0 && BO0->getOpcode() == Instruction::Add)
+      A = BO0->getOperand(0), B = BO0->getOperand(1);
+    if (BO1 && BO1->getOpcode() == Instruction::Add)
+      C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+      return new ICmpInst(Pred, A == Op1 ? B : A,
+                          Constant::getNullValue(Op1->getType()));
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+      return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+                          C == Op0 ? D : C);
+
+    // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse()) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y = (A == C || A == D) ? B : A;
+      Value *Z = (C == A || C == B) ? D : C;
+      return new ICmpInst(Pred, Y, Z);
+    }
+
+    // Analyze the case when either Op0 or Op1 is a sub instruction.
+    // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+    A = 0; B = 0; C = 0; D = 0;
+    if (BO0 && BO0->getOpcode() == Instruction::Sub)
+      A = BO0->getOperand(0), B = BO0->getOperand(1);
+    if (BO1 && BO1->getOpcode() == Instruction::Sub)
+      C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+    // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+    if (A == Op1 && NoOp0WrapProblem)
+      return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+    // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+    if (C == Op0 && NoOp1WrapProblem)
+      return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+    // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+    if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse())
+      return new ICmpInst(Pred, A, C);
+
+    // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+    if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse())
+      return new ICmpInst(Pred, D, B);
+
+    if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+        BO0->hasOneUse() && BO1->hasOneUse() &&
+        BO0->getOperand(1) == BO1->getOperand(1)) {
+      switch (BO0->getOpcode()) {
+      default: break;
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::Xor:
+        if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
+          return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                              BO1->getOperand(0));
+        // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+          if (CI->getValue().isSignBit()) {
+            ICmpInst::Predicate Pred = I.isSigned()
+                                           ? I.getUnsignedPredicate()
+                                           : I.getSignedPredicate();
+            return new ICmpInst(Pred, BO0->getOperand(0),
+                                BO1->getOperand(0));
+          }
+          
+          if (CI->getValue().isMaxSignedValue()) {
+            ICmpInst::Predicate Pred = I.isSigned()
+                                           ? I.getUnsignedPredicate()
+                                           : I.getSignedPredicate();
+            Pred = I.getSwappedPredicate(Pred);
+            return new ICmpInst(Pred, BO0->getOperand(0),
+                                BO1->getOperand(0));
           }
+        }
+        break;
+      case Instruction::Mul:
+        if (!I.isEquality())
           break;
-        case Instruction::Mul:
-          if (!I.isEquality())
-            break;
 
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
-            // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
-            // Mask = -1 >> count-trailing-zeros(Cst).
-            if (!CI->isZero() && !CI->isOne()) {
-              const APInt &AP = CI->getValue();
-              ConstantInt *Mask = ConstantInt::get(I.getContext(), 
-                                      APInt::getLowBitsSet(AP.getBitWidth(),
-                                                           AP.getBitWidth() -
-                                                      AP.countTrailingZeros()));
-              Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
-              Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
-              return new ICmpInst(I.getPredicate(), And1, And2);
-            }
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+          // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+          // Mask = -1 >> count-trailing-zeros(Cst).
+          if (!CI->isZero() && !CI->isOne()) {
+            const APInt &AP = CI->getValue();
+            ConstantInt *Mask = ConstantInt::get(I.getContext(), 
+                                    APInt::getLowBitsSet(AP.getBitWidth(),
+                                                         AP.getBitWidth() -
+                                                    AP.countTrailingZeros()));
+            Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+            Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+            return new ICmpInst(I.getPredicate(), And1, And2);
           }
-          break;
         }
+        break;
       }
     }
   }
   
-  // ~x < ~y --> y < x
   { Value *A, *B;
-    if (match(Op0, m_Not(m_Value(A))) &&
-        match(Op1, m_Not(m_Value(B))))
-      return new ICmpInst(I.getPredicate(), B, A);
+    // ~x < ~y --> y < x
+    // ~x < cst --> ~cst < x
+    if (match(Op0, m_Not(m_Value(A)))) {
+      if (match(Op1, m_Not(m_Value(B))))
+        return new ICmpInst(I.getPredicate(), B, A);
+      if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+        return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+    }
+
+    // (a+b) <u a  --> llvm.uadd.with.overflow.
+    // (a+b) <u b  --> llvm.uadd.with.overflow.
+    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+        match(Op0, m_Add(m_Value(A), m_Value(B))) && 
+        (Op1 == A || Op1 == B))
+      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+        return R;
+                                 
+    // a >u (a+b)  --> llvm.uadd.with.overflow.
+    // b >u (a+b)  --> llvm.uadd.with.overflow.
+    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+        (Op0 == A || Op0 == B))
+      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+        return R;
   }
   
   if (I.isEquality()) {
     Value *A, *B, *C, *D;
-    
-    // -x == -y --> x == y
-    if (match(Op0, m_Neg(m_Value(A))) &&
-        match(Op1, m_Neg(m_Value(B))))
-      return new ICmpInst(I.getPredicate(), A, B);
-    
+
     if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
       if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
         Value *OtherVal = A == Op1 ? B : A;
@@ -2102,16 +2424,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                           Constant::getNullValue(A->getType()));
     }
 
-    // (A-B) == A  ->  B == 0
-    if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
-      return new ICmpInst(I.getPredicate(), B, 
-                          Constant::getNullValue(B->getType()));
-
-    // A == (A-B)  ->  B == 0
-    if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
-      return new ICmpInst(I.getPredicate(), B,
-                          Constant::getNullValue(B->getType()));
-    
     // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
     if (Op0->hasOneUse() && Op1->hasOneUse() &&
         match(Op0, m_And(m_Value(A), m_Value(B))) && 
@@ -2397,7 +2709,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         // block.  If in the same block, we're encouraging jump threading.  If
         // not, we are just pessimizing the code by making an i1 phi.
         if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I, true))
+          if (Instruction *NV = FoldOpIntoPhi(I))
             return NV;
         break;
       case Instruction::SIToFP:
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b68fbc2db5c9..78ff7346abe4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -145,7 +145,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // Attempt to improve the alignment.
   if (TD) {
     unsigned KnownAlign =
-      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+      getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
     unsigned LoadAlign = LI.getAlignment();
     unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
       TD->getABITypeAlignment(LI.getType());
@@ -165,7 +165,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (LI.isVolatile()) return 0;
   
   // Do really simple store-to-load forwarding and load CSE, to catch cases
-  // where there are several consequtive memory accesses to the same location,
+  // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
   BasicBlock::iterator BBI = &LI;
   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
@@ -330,7 +330,9 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                    SIOp0->getName()+".c");
-  return new StoreInst(NewCast, CastOp);
+  SI.setOperand(0, NewCast);
+  SI.setOperand(1, CastOp);
+  return &SI;
 }
 
 /// equivalentAddressValues - Test if A and B will obviously have the same
@@ -414,7 +416,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   // Attempt to improve the alignment.
   if (TD) {
     unsigned KnownAlign =
-      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+      getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+                                 TD);
     unsigned StoreAlign = SI.getAlignment();
     unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
       TD->getABITypeAlignment(Val->getType());
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b3974e8eeffb..d1a1fd6ddfac 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -14,26 +14,22 @@
 
 #include "InstCombine.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
-/// SubOne - Subtract one from a ConstantInt.
-static Constant *SubOne(ConstantInt *C) {
-  return ConstantInt::get(C->getContext(), C->getValue()-1);
-}
-
 /// MultiplyOverflows - True if the multiply can not be expressed in an int
 /// this size.
 static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
   uint32_t W = C1->getBitWidth();
   APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
   if (sign) {
-    LHSExt.sext(W * 2);
-    RHSExt.sext(W * 2);
+    LHSExt = LHSExt.sext(W * 2);
+    RHSExt = RHSExt.sext(W * 2);
   } else {
-    LHSExt.zext(W * 2);
-    RHSExt.zext(W * 2);
+    LHSExt = LHSExt.zext(W * 2);
+    RHSExt = RHSExt.zext(W * 2);
   }
   
   APInt MulExt = LHSExt * RHSExt;
@@ -47,62 +43,48 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
 }
 
 Instruction *InstCombiner::visitMul(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (isa<UndefValue>(Op1))              // undef * X -> 0
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
 
-  // Simplify mul instructions with a constant RHS.
-  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
-
-      // ((X << C1)*C2) == (X * (C2 << C1))
-      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
-        if (SI->getOpcode() == Instruction::Shl)
-          if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
-            return BinaryOperator::CreateMul(SI->getOperand(0),
-                                        ConstantExpr::getShl(CI, ShOp));
-
-      if (CI->isZero())
-        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
-      if (CI->equalsInt(1))                  // X * 1  == X
-        return ReplaceInstUsesWith(I, Op0);
-      if (CI->isAllOnesValue())              // X * -1 == 0 - X
-        return BinaryOperator::CreateNeg(Op0, I.getName());
-
-      const APInt& Val = cast<ConstantInt>(CI)->getValue();
-      if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
-        return BinaryOperator::CreateShl(Op0,
-                 ConstantInt::get(Op0->getType(), Val.logBase2()));
-      }
-    } else if (Op1C->getType()->isVectorTy()) {
-      if (Op1C->isNullValue())
-        return ReplaceInstUsesWith(I, Op1C);
-
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
-        if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
-          return BinaryOperator::CreateNeg(Op0, I.getName());
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
 
-        // As above, vector X*splat(1.0) -> X in all defined cases.
-        if (Constant *Splat = Op1V->getSplatValue()) {
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
-            if (CI->equalsInt(1))
-              return ReplaceInstUsesWith(I, Op0);
-        }
-      }
+  if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
+    return BinaryOperator::CreateNeg(Op0, I.getName());
+  
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    
+    // ((X << C1)*C2) == (X * (C2 << C1))
+    if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+      if (SI->getOpcode() == Instruction::Shl)
+        if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+          return BinaryOperator::CreateMul(SI->getOperand(0),
+                                           ConstantExpr::getShl(CI, ShOp));
+    
+    const APInt &Val = CI->getValue();
+    if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
+      Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+      BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+      if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+      if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+      return Shl;
     }
     
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
-      if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
-        // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
-        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
-        return BinaryOperator::CreateAdd(Add, C1C2);
-        
+    // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+    { Value *X; ConstantInt *C1;
+      if (Op0->hasOneUse() &&
+          match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+        Value *Add = Builder->CreateMul(X, CI, "tmp");
+        return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
       }
-
+    }
+  }
+  
+  // Simplify mul instructions with a constant RHS.
+  if (isa<Constant>(Op1)) {    
     // Try to fold constant mul into select arguments.
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -135,8 +117,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
          BO->getOpcode() == Instruction::SDiv)) {
       Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
 
-      // If the division is exact, X % Y is zero.
-      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+      // If the division is exact, X % Y is zero, so we end up with X or -X.
+      if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
         if (SDiv->isExact()) {
           if (Op1BO == Op1C)
             return ReplaceInstUsesWith(I, Op0BO);
@@ -194,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 }
 
 Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
-  bool Changed = SimplifyCommutative(I);
+  bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // Simplify mul instructions with a constant RHS...
@@ -304,28 +286,6 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
 }
 
 
-/// This function implements the transforms on div instructions that work
-/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
-/// used by the visitors to those instructions.
-/// @brief Transforms common to all three div instructions
-Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  // undef / X -> 0        for integer.
-  // undef / X -> undef    for FP (the undef could be a snan).
-  if (isa<UndefValue>(Op0)) {
-    if (Op0->getType()->isFPOrFPVectorTy())
-      return ReplaceInstUsesWith(I, Op0);
-    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-
-  // X / undef -> undef
-  if (isa<UndefValue>(Op1))
-    return ReplaceInstUsesWith(I, Op1);
-
-  return 0;
-}
-
 /// This function implements the transforms common to both integer division
 /// instructions (udiv and sdiv). It is called by the visitors to those integer
 /// division instructions.
@@ -333,31 +293,12 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
 Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // (sdiv X, X) --> 1     (udiv X, X) --> 1
-  if (Op0 == Op1) {
-    if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
-      std::vector<Constant*> Elts(Ty->getNumElements(), CI);
-      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
-    }
-
-    Constant *CI = ConstantInt::get(I.getType(), 1);
-    return ReplaceInstUsesWith(I, CI);
-  }
-  
-  if (Instruction *Common = commonDivTransforms(I))
-    return Common;
-  
   // Handle cases involving: [su]div X, (select Cond, Y, Z)
   // This does not apply for fdiv.
   if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
     return &I;
 
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // div X, 1 == X
-    if (RHS->equalsInt(1))
-      return ReplaceInstUsesWith(I, Op0);
-
     // (X / C1) / C2  -> X / (C1*C2)
     if (Instruction *LHS = dyn_cast<Instruction>(Op0))
       if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
@@ -365,9 +306,8 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
           if (MultiplyOverflows(RHS, LHSRHS,
                                 I.getOpcode()==Instruction::SDiv))
             return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-          else 
-            return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                      ConstantExpr::getMul(RHS, LHSRHS));
+          return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+                                        ConstantExpr::getMul(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -380,20 +320,13 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     }
   }
 
-  // 0 / X == 0, we don't need to preserve faults!
-  if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
-    if (LHS->equalsInt(0))
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-  // It can't be division by zero, hence it must be division by one.
-  if (I.getType()->isIntegerTy(1))
-    return ReplaceInstUsesWith(I, Op0);
-
-  if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
-    if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
-      // div X, 1 == X
-      if (X->isOne())
-        return ReplaceInstUsesWith(I, Op0);
+  // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+  Value *X = 0, *Z = 0;
+  if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+    bool isSigned = I.getOpcode() == Instruction::SDiv;
+    if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+        (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+      return BinaryOperator::Create(I.getOpcode(), X, Op1);
   }
 
   return 0;
@@ -402,6 +335,9 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
 Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
   // Handle the integer div common cases
   if (Instruction *Common = commonIDivTransforms(I))
     return Common;
@@ -410,60 +346,59 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
-    if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
-      return BinaryOperator::CreateLShr(Op0, 
+    if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+      BinaryOperator *LShr =
+        BinaryOperator::CreateLShr(Op0, 
             ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+      if (I.isExact()) LShr->setIsExact();
+      return LShr;
+    }
 
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
-      Value *IC = Builder->CreateICmpULT( Op0, C);
+      Value *IC = Builder->CreateICmpULT(Op0, C);
       return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
                                 ConstantInt::get(I.getType(), 1));
     }
   }
 
   // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
-  if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
-    if (RHSI->getOpcode() == Instruction::Shl &&
-        isa<ConstantInt>(RHSI->getOperand(0))) {
-      const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
-      if (C1.isPowerOf2()) {
-        Value *N = RHSI->getOperand(1);
-        const Type *NTy = N->getType();
-        if (uint32_t C2 = C1.logBase2())
-          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
-        return BinaryOperator::CreateLShr(Op0, N);
-      }
+  { const APInt *CI; Value *N;
+    if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+      if (*CI != 1)
+        N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
+                               "tmp");
+      if (I.isExact())
+        return BinaryOperator::CreateExactLShr(Op0, N);
+      return BinaryOperator::CreateLShr(Op0, N);
     }
   }
   
   // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
   // where C1&C2 are powers of two.
-  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 
-    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
-      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2)))  {
-        const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
-        if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
-          // Compute the shift amounts
-          uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
-          // Construct the "on true" case of the select
-          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
-          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      // Construct the "on true" case of the select
+      Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+                                       I.isExact());
   
-          // Construct the "on false" case of the select
-          Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
-          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
-
-          // construct the select instruction and return it.
-          return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
-        }
-      }
+      // Construct the "on false" case of the select
+      Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+                                       I.isExact());
+      
+      // construct the select instruction and return it.
+      return SelectInst::Create(Cond, TSI, FSI);
+    }
+  }
   return 0;
 }
 
 Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
   // Handle the integer div common cases
   if (Instruction *Common = commonIDivTransforms(I))
     return Common;
@@ -473,20 +408,17 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
     if (RHS->isAllOnesValue())
       return BinaryOperator::CreateNeg(Op0);
 
-    // sdiv X, C  -->  ashr X, log2(C)
-    if (cast<SDivOperator>(&I)->isExact() &&
-        RHS->getValue().isNonNegative() &&
+    // sdiv X, C  -->  ashr exact X, log2(C)
+    if (I.isExact() && RHS->getValue().isNonNegative() &&
         RHS->getValue().isPowerOf2()) {
       Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
                                             RHS->getValue().exactLogBase2());
-      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+      return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
     }
 
     // -X/C  -->  X/-C  provided the negation doesn't overflow.
     if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
-      if (isa<Constant>(Sub->getOperand(0)) &&
-          cast<Constant>(Sub->getOperand(0))->isNullValue() &&
-          Sub->hasNoSignedWrap())
+      if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
         return BinaryOperator::CreateSDiv(Sub->getOperand(1),
                                           ConstantExpr::getNeg(RHS));
   }
@@ -500,9 +432,8 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
         // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
         return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
       }
-      ConstantInt *ShiftedInt;
-      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
-          ShiftedInt->getValue().isPowerOf2()) {
+      
+      if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
         // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
         // Safe because the only negative value (1 << Y) can take on is
         // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -516,7 +447,12 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
 }
 
 Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
-  return commonDivTransforms(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  return 0;
 }
 
 /// This function implements the transforms on rem instructions that work
@@ -551,6 +487,10 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
   if (Instruction *common = commonRemTransforms(I))
     return common;
 
+  // X % X == 0
+  if (Op0 == Op1)
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
   // 0 % X == 0 for integer, we don't need to preserve faults!
   if (Constant *LHS = dyn_cast<Constant>(Op0))
     if (LHS->isNullValue())
@@ -588,42 +528,29 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   if (Instruction *common = commonIRemTransforms(I))
     return common;
   
-  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // X urem C^2 -> X and C
-    // Check to see if this is an unsigned remainder with an exact power of 2,
-    // if so, convert to a bitwise and.
-    if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
-      if (C->getValue().isPowerOf2())
-        return BinaryOperator::CreateAnd(Op0, SubOne(C));
+  // X urem C^2 -> X and C-1
+  { const APInt *C;
+    if (match(Op1, m_Power2(C)))
+      return BinaryOperator::CreateAnd(Op0,
+                                       ConstantInt::get(I.getType(), *C-1));
   }
 
-  if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
-    // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
-    if (RHSI->getOpcode() == Instruction::Shl &&
-        isa<ConstantInt>(RHSI->getOperand(0))) {
-      if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
-        Constant *N1 = Constant::getAllOnesValue(I.getType());
-        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
-        return BinaryOperator::CreateAnd(Op0, Add);
-      }
-    }
+  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+  if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+    Constant *N1 = Constant::getAllOnesValue(I.getType());
+    Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+    return BinaryOperator::CreateAnd(Op0, Add);
   }
 
-  // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
-  // where C1&C2 are powers of two.
-  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
-    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
-      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
-        // STO == 0 and SFO == 0 handled above.
-        if ((STO->getValue().isPowerOf2()) && 
-            (SFO->getValue().isPowerOf2())) {
-          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
-                                              SI->getName()+".t");
-          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
-                                               SI->getName()+".f");
-          return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
-        }
-      }
+  // urem X, (select Cond, 2^C1, 2^C2) -->
+  //    select Cond, (and X, C1-1), (and X, C2-1)
+  // when C1&C2 are powers of two.
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+      Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+      return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+    }
   }
   
   return 0;
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index f7fc62f9dc4f..297a18c40a97 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -30,22 +31,37 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   const Type *LHSType = LHSVal->getType();
   const Type *RHSType = RHSVal->getType();
   
+  bool isNUW = false, isNSW = false, isExact = false;
+  if (OverflowingBinaryOperator *BO =
+        dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+    isNUW = BO->hasNoUnsignedWrap();
+    isNSW = BO->hasNoSignedWrap();
+  } else if (PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(FirstInst))
+    isExact = PEO->isExact();
+  
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
     if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
         // Verify type of the LHS matches so we don't fold cmp's of different
-        // types or GEP's with different index types.
+        // types.
         I->getOperand(0)->getType() != LHSType ||
         I->getOperand(1)->getType() != RHSType)
       return 0;
 
     // If they are CmpInst instructions, check their predicates
-    if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
-      if (cast<CmpInst>(I)->getPredicate() !=
-          cast<CmpInst>(FirstInst)->getPredicate())
+    if (CmpInst *CI = dyn_cast<CmpInst>(I))
+      if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
         return 0;
     
+    if (isNUW)
+      isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+    if (isNSW)
+      isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    if (isExact)
+      isExact = cast<PossiblyExactOperator>(I)->isExact();
+    
     // Keep track of which operand needs a phi node.
     if (I->getOperand(0) != LHSVal) LHSVal = 0;
     if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -96,11 +112,17 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     }
   }
     
-  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
-  CmpInst *CIOp = cast<CmpInst>(FirstInst);
-  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
-                         LHSVal, RHSVal);
+  if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                           LHSVal, RHSVal);
+  
+  BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+  BinaryOperator *NewBinOp =
+    BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+  if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+  if (isNSW) NewBinOp->setHasNoSignedWrap();
+  if (isExact) NewBinOp->setIsExact();
+  return NewBinOp;
 }
 
 Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
@@ -117,6 +139,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // especially bad when the PHIs are in the header of a loop.
   bool NeededPhi = false;
   
+  bool AllInBounds = true;
+  
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -124,6 +148,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       GEP->getNumOperands() != FirstInst->getNumOperands())
       return 0;
 
+    AllInBounds &= GEP->isInBounds();
+    
     // Keep track of whether or not all GEPs are of alloca pointers.
     if (AllBasePointersAreAllocas &&
         (!isa<AllocaInst>(GEP->getOperand(0)) ||
@@ -201,11 +227,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   }
   
   Value *Base = FixedOperands[0];
-  return cast<GEPOperator>(FirstInst)->isInBounds() ?
-    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
-                                      FixedOperands.end()) :
+  GetElementPtrInst *NewGEP = 
     GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
                               FixedOperands.end());
+  if (AllInBounds) NewGEP->setIsInBounds();
+  return NewGEP;
 }
 
 
@@ -368,6 +394,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   // code size and simplifying code.
   Constant *ConstantOp = 0;
   const Type *CastSrcTy = 0;
+  bool isNUW = false, isNSW = false, isExact = false;
   
   if (isa<CastInst>(FirstInst)) {
     CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -384,6 +411,14 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
     if (ConstantOp == 0)
       return FoldPHIArgBinOpIntoPHI(PN);
+    
+    if (OverflowingBinaryOperator *BO =
+        dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+      isNUW = BO->hasNoUnsignedWrap();
+      isNSW = BO->hasNoSignedWrap();
+    } else if (PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(FirstInst))
+      isExact = PEO->isExact();
   } else {
     return 0;  // Cannot fold this operation.
   }
@@ -399,6 +434,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     } else if (I->getOperand(1) != ConstantOp) {
       return 0;
     }
+    
+    if (isNUW)
+      isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+    if (isNSW)
+      isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    if (isExact)
+      isExact = cast<PossiblyExactOperator>(I)->isExact();
   }
 
   // Okay, they are all the same operation.  Create a new PHI node of the
@@ -433,8 +475,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
     return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
   
-  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+    BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+    if (isNUW) BinOp->setHasNoUnsignedWrap();
+    if (isNSW) BinOp->setHasNoSignedWrap();
+    if (isExact) BinOp->setIsExact();
+    return BinOp;
+  }
   
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
   return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
@@ -731,8 +778,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   // If LCSSA is around, don't mess with Phi nodes
   if (MustPreserveLCSSA) return 0;
-  
-  if (Value *V = PN.hasConstantValue())
+
+  if (Value *V = SimplifyInstruction(&PN, TD))
     return ReplaceInstUsesWith(PN, V);
 
   // If all PHI operands are the same operation, pull them through the PHI,
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c44fe9db6e3a..97abc769ae5f 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -24,14 +24,14 @@ static SelectPatternFlavor
 MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
   SelectInst *SI = dyn_cast<SelectInst>(V);
   if (SI == 0) return SPF_UNKNOWN;
-  
+
   ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
   if (ICI == 0) return SPF_UNKNOWN;
-  
+
   LHS = ICI->getOperand(0);
   RHS = ICI->getOperand(1);
-  
-  // (icmp X, Y) ? X : Y 
+
+  // (icmp X, Y) ? X : Y
   if (SI->getTrueValue() == ICI->getOperand(0) &&
       SI->getFalseValue() == ICI->getOperand(1)) {
     switch (ICI->getPredicate()) {
@@ -46,8 +46,8 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
     case ICmpInst::ICMP_SLE: return SPF_SMIN;
     }
   }
-  
-  // (icmp X, Y) ? Y : X 
+
+  // (icmp X, Y) ? Y : X
   if (SI->getTrueValue() == ICI->getOperand(1) &&
       SI->getFalseValue() == ICI->getOperand(0)) {
     switch (ICI->getPredicate()) {
@@ -62,9 +62,9 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
       case ICmpInst::ICMP_SLE: return SPF_SMAX;
     }
   }
-  
+
   // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
-  
+
   return SPF_UNKNOWN;
 }
 
@@ -136,7 +136,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
                                           FI->getOperand(0), SI.getName()+".v");
     InsertNewInstBefore(NewSI, SI);
-    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
+    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
                             TI->getType());
   }
 
@@ -195,7 +195,10 @@ static bool isSelect01(Constant *C1, Constant *C2) {
   ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
   if (!C2I)
     return false;
-  return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
+  if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+    return false;
+  return C1I->isOne() || C1I->isAllOnesValue() ||
+         C2I->isOne() || C2I->isAllOnesValue();
 }
 
 /// FoldSelectIntoOp - Try fold the select into one of the operands to
@@ -219,7 +222,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
           Constant *C = GetSelectFoldableConstant(TVI);
           Value *OOp = TVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
-          // between 0 and 1.
+          // between 0, 1 and -1.
           if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
             Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
             InsertNewInstBefore(NewSel, SI);
@@ -248,7 +251,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
           Constant *C = GetSelectFoldableConstant(FVI);
           Value *OOp = FVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
-          // between 0 and 1.
+          // between 0, 1 and -1.
           if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
             Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
             InsertNewInstBefore(NewSel, SI);
@@ -278,52 +281,95 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   Value *FalseVal = SI.getFalseValue();
 
   // Check cases where the comparison is with a constant that
-  // can be adjusted to fit the min/max idiom. We may edit ICI in
-  // place here, so make sure the select is the only user.
+  // can be adjusted to fit the min/max idiom. We may move or edit ICI
+  // here, so make sure the select is the only user.
   if (ICI->hasOneUse())
     if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+      // X < MIN ? T : F  -->  F
+      if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+          && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      // X > MAX ? T : F  -->  F
+      else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+               && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+        return ReplaceInstUsesWith(SI, FalseVal);
       switch (Pred) {
       default: break;
       case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_SLT: {
-        // X < MIN ? T : F  -->  F
-        if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
-          return ReplaceInstUsesWith(SI, FalseVal);
-        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
-        Constant *AdjustedRHS =
-          ConstantInt::get(CI->getContext(), CI->getValue()-1);
-        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
-            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
-          Pred = ICmpInst::getSwappedPredicate(Pred);
-          CmpRHS = AdjustedRHS;
-          std::swap(FalseVal, TrueVal);
-          ICI->setPredicate(Pred);
-          ICI->setOperand(1, CmpRHS);
-          SI.setOperand(1, TrueVal);
-          SI.setOperand(2, FalseVal);
-          Changed = true;
-        }
-        break;
-      }
+      case ICmpInst::ICMP_SLT:
       case ICmpInst::ICMP_UGT:
       case ICmpInst::ICMP_SGT: {
-        // X > MAX ? T : F  -->  F
-        if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
-          return ReplaceInstUsesWith(SI, FalseVal);
+        // These transformations only work for selects over integers.
+        const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+        if (!SelectTy)
+          break;
+
+        Constant *AdjustedRHS;
+        if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+        else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
         // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
-        Constant *AdjustedRHS =
-          ConstantInt::get(CI->getContext(), CI->getValue()+1);
+        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
         if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
-            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
-          Pred = ICmpInst::getSwappedPredicate(Pred);
-          CmpRHS = AdjustedRHS;
-          std::swap(FalseVal, TrueVal);
-          ICI->setPredicate(Pred);
-          ICI->setOperand(1, CmpRHS);
-          SI.setOperand(1, TrueVal);
-          SI.setOperand(2, FalseVal);
-          Changed = true;
-        }
+            (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+          ; // Nothing to do here. Values match without any sign/zero extension.
+
+        // Types do not match. Instead of calculating this with mixed types
+        // promote all to the larger type. This enables scalar evolution to
+        // analyze this expression.
+        else if (CmpRHS->getType()->getScalarSizeInBits()
+                 < SelectTy->getBitWidth()) {
+          Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+          // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+          // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+          // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+          // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+          if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+                sextRHS == FalseVal) {
+            CmpLHS = TrueVal;
+            AdjustedRHS = sextRHS;
+          } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+                     sextRHS == TrueVal) {
+            CmpLHS = FalseVal;
+            AdjustedRHS = sextRHS;
+          } else if (ICI->isUnsigned()) {
+            Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+            // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+            // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+            // zext + signed compare cannot be changed:
+            //    0xff <s 0x00, but 0x00ff >s 0x0000
+            if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+                zextRHS == FalseVal) {
+              CmpLHS = TrueVal;
+              AdjustedRHS = zextRHS;
+            } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+                       zextRHS == TrueVal) {
+              CmpLHS = FalseVal;
+              AdjustedRHS = zextRHS;
+            } else
+              break;
+          } else
+            break;
+        } else
+          break;
+
+        Pred = ICmpInst::getSwappedPredicate(Pred);
+        CmpRHS = AdjustedRHS;
+        std::swap(FalseVal, TrueVal);
+        ICI->setPredicate(Pred);
+        ICI->setOperand(0, CmpLHS);
+        ICI->setOperand(1, CmpRHS);
+        SI.setOperand(1, TrueVal);
+        SI.setOperand(2, FalseVal);
+
+        // Move ICI instruction right before the select instruction. Otherwise
+        // the sext/zext value may be defined after the ICI instruction uses it.
+        ICI->moveBefore(&SI);
+
+        Changed = true;
         break;
       }
       }
@@ -399,28 +445,28 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
   // can always be mapped.
   const Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return true;
-  
+
   // If V is a PHI node defined in the same block as the condition PHI, we can
   // map the arguments.
   const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
-  
+
   if (const PHINode *VP = dyn_cast<PHINode>(I))
     if (VP->getParent() == CondPHI->getParent())
       return true;
-  
+
   // Otherwise, if the PHI and select are defined in the same block and if V is
   // defined in a different block, then we can transform it.
   if (SI.getParent() == CondPHI->getParent() &&
       I->getParent() != CondPHI->getParent())
     return true;
-  
+
   // Otherwise we have a 'hard' case and we can't tell without doing more
   // detailed dominator based analysis, punt.
   return false;
 }
 
 /// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
-///   SPF2(SPF1(A, B), C) 
+///   SPF2(SPF1(A, B), C)
 Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
                                         SelectPatternFlavor SPF1,
                                         Value *A, Value *B,
@@ -431,7 +477,7 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
     // MIN(MIN(a, b), a) -> MIN(a, b)
     if (SPF1 == SPF2)
       return ReplaceInstUsesWith(Outer, Inner);
-    
+
     // MAX(MIN(a, b), a) -> a
     // MIN(MAX(a, b), a) -> a
     if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
@@ -440,13 +486,82 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
         (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
       return ReplaceInstUsesWith(Outer, C);
   }
-  
+
   // TODO: MIN(MIN(A, 23), 97)
   return 0;
 }
 
 
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+                                ConstantInt *FalseVal,
+                                InstCombiner::BuilderTy *Builder) {
+  const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+  if (!IC || !IC->isEquality())
+    return 0;
+
+  if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1)))
+    if (!C->isZero())
+      return 0;
 
+  ConstantInt *AndRHS;
+  Value *LHS = IC->getOperand(0);
+  if (LHS->getType() != SI.getType() ||
+      !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+    return 0;
+
+  // If both select arms are non-zero see if we have a select of the form
+  // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+  // for 'x ? 2^n : 0' and fix the thing up at the end.
+  ConstantInt *Offset = 0;
+  if (!TrueVal->isZero() && !FalseVal->isZero()) {
+    if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+      Offset = FalseVal;
+    else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+      Offset = TrueVal;
+    else
+      return 0;
+
+    // Adjust TrueVal and FalseVal to the offset.
+    TrueVal = ConstantInt::get(Builder->getContext(),
+                               TrueVal->getValue() - Offset->getValue());
+    FalseVal = ConstantInt::get(Builder->getContext(),
+                                FalseVal->getValue() - Offset->getValue());
+  }
+
+  // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+  if (!AndRHS->getValue().isPowerOf2() ||
+      (!TrueVal->getValue().isPowerOf2() &&
+       !FalseVal->getValue().isPowerOf2()))
+    return 0;
+
+  // Determine which shift is needed to transform result of the 'and' into the
+  // desired result.
+  ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+  unsigned ValZeros = ValC->getValue().logBase2();
+  unsigned AndZeros = AndRHS->getValue().logBase2();
+
+  Value *V = LHS;
+  if (ValZeros > AndZeros)
+    V = Builder->CreateShl(V, ValZeros - AndZeros);
+  else if (ValZeros < AndZeros)
+    V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+  // Okay, now we know that everything is set up, we just don't know whether we
+  // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+  bool ShouldNotVal = !TrueVal->isZero();
+  ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+  if (ShouldNotVal)
+    V = Builder->CreateXor(V, ValC);
+
+  // Apply an offset if needed.
+  if (Offset)
+    V = Builder->CreateAdd(V, Offset);
+  return V;
+}
 
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
@@ -478,7 +593,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                                            "not."+CondVal->getName()), SI);
       return BinaryOperator::CreateOr(NotCond, TrueVal);
     }
-    
+
     // select a, b, a  -> a&b
     // select a, a, b  -> a|b
     if (CondVal == TrueVal)
@@ -497,7 +612,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // select C, -1, 0 -> sext C to int
       if (FalseValC->isZero() && TrueValC->isAllOnesValue())
         return new SExtInst(CondVal, SI.getType());
-      
+
       // select C, 0, 1 -> zext !C to int
       if (TrueValC->isZero() && FalseValC->getValue() == 1) {
         Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
@@ -509,32 +624,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
         Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
         return new SExtInst(NotCond, SI.getType());
       }
-      
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
-        // If one of the constants is zero (we know they can't both be) and we
-        // have an icmp instruction with zero, and we have an 'and' with the
-        // non-constant value, eliminate this whole mess.  This corresponds to
-        // cases like this: ((X & 27) ? 27 : 0)
-        if (TrueValC->isZero() || FalseValC->isZero())
-          if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
-              cast<Constant>(IC->getOperand(1))->isNullValue())
-            if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
-              if (ICA->getOpcode() == Instruction::And &&
-                  isa<ConstantInt>(ICA->getOperand(1)) &&
-                  (ICA->getOperand(1) == TrueValC ||
-                   ICA->getOperand(1) == FalseValC) &&
-               cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) {
-                // Okay, now we know that everything is set up, we just don't
-                // know whether we have a icmp_ne or icmp_eq and whether the 
-                // true or false val is the zero.
-                bool ShouldNotVal = !TrueValC->isZero();
-                ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
-                Value *V = ICA;
-                if (ShouldNotVal)
-                  V = Builder->CreateXor(V, ICA->getOperand(1));
-                return ReplaceInstUsesWith(SI, V);
-              }
-      }
+
+      if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+        return ReplaceInstUsesWith(SI, V);
     }
 
   // See if we are selecting two values based on a comparison of the two values.
@@ -542,7 +634,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
       // Transform (X == Y) ? X : Y  -> Y
       if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
-        // This is not safe in general for floating point:  
+        // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
@@ -554,7 +646,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
       // Transform (X une Y) ? X : Y  -> X
       if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
-        // This is not safe in general for floating point:  
+        // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
@@ -569,7 +661,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
       // Transform (X == Y) ? Y : X  -> X
       if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
-        // This is not safe in general for floating point:  
+        // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
@@ -581,7 +673,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
       // Transform (X une Y) ? Y : X  -> Y
       if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
-        // This is not safe in general for floating point:  
+        // This is not safe in general for floating point:
         // consider X== -0, Y== +0.
         // It becomes safe if either operand is a nonzero constant.
         ConstantFP *CFPt, *CFPf;
@@ -639,6 +731,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             Value *NegVal;  // Compute -Z
             if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
               NegVal = ConstantExpr::getNeg(C);
+            } else if (SI.getType()->isFloatingPointTy()) {
+              NegVal = InsertNewInstBefore(
+                    BinaryOperator::CreateFNeg(SubOp->getOperand(1),
+                                              "tmp"), SI);
             } else {
               NegVal = InsertNewInstBefore(
                     BinaryOperator::CreateNeg(SubOp->getOperand(1),
@@ -654,7 +750,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                                  NewFalseOp, SI.getName() + ".p");
 
             NewSel = InsertNewInstBefore(NewSel, SI);
-            return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+            if (SI.getType()->isFloatingPointTy())
+              return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+            else
+              return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
           }
         }
       }
@@ -663,7 +762,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (SI.getType()->isIntegerTy()) {
     if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
-    
+
     // MAX(MAX(a, b), a) -> MAX(a, b)
     // MIN(MIN(a, b), a) -> MIN(a, b)
     // MAX(MIN(a, b), a) -> a
@@ -686,13 +785,26 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   }
 
   // See if we can fold the select into a phi node if the condition is a select.
-  if (isa<PHINode>(SI.getCondition())) 
+  if (isa<PHINode>(SI.getCondition()))
     // The true/false values have to be live in the PHI predecessor's blocks.
     if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
         CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
       if (Instruction *NV = FoldOpIntoPhi(SI))
         return NV;
 
+  if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+    if (TrueSI->getCondition() == CondVal) {
+      SI.setOperand(1, TrueSI->getTrueValue());
+      return &SI;
+    }
+  }
+  if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+    if (FalseSI->getCondition() == CondVal) {
+      SI.setOperand(2, FalseSI->getFalseValue());
+      return &SI;
+    }
+  }
+
   if (BinaryOperator::isNot(CondVal)) {
     SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
     SI.setOperand(1, FalseVal);
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 27716b886a22..a7f800587bb6 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,7 @@
 
 #include "InstCombine.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -21,25 +22,6 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // shl X, 0 == X and shr X, 0 == X
-  // shl 0, X == 0 and shr 0, X == 0
-  if (Op1 == Constant::getNullValue(Op1->getType()) ||
-      Op0 == Constant::getNullValue(Op0->getType()))
-    return ReplaceInstUsesWith(I, Op0);
-  
-  if (isa<UndefValue>(Op0)) {            
-    if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
-      return ReplaceInstUsesWith(I, Op0);
-    else                                    // undef << X -> 0, undef >>u X -> 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-  if (isa<UndefValue>(Op1)) {
-    if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
-      return ReplaceInstUsesWith(I, Op0);          
-    else                                     // X << undef, X >>u undef -> 0
-      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-  }
-
   // See if we can fold away this shift.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
@@ -53,6 +35,20 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
     if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
       return Res;
+
+  // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+  // Because shifts by negative values (which could occur if A were negative)
+  // are undefined.
+  Value *A; const APInt *B;
+  if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+    // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+    // demand the sign bit (and many others) here??
+    Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+                                    Op1->getName());
+    I.setOperand(1, Rem);
+    return &I;
+  }
+  
   return 0;
 }
 
@@ -81,7 +77,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
   // if the needed bits are already zero in the input.  This allows us to reuse
   // the value which means that we don't care if the shift has multiple uses.
   //  TODO:  Handle opposite shift by exact value.
-  ConstantInt *CI;
+  ConstantInt *CI = 0;
   if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
       (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
     if (CI->getZExtValue() == NumBits) {
@@ -131,9 +127,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
     // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
     // profitable unless we know the and'd out bits are already zero.
     if (CI->getZExtValue() > NumBits) {
-      unsigned HighBits = CI->getZExtValue() - NumBits;
+      unsigned LowBits = TypeWidth - CI->getZExtValue();
       if (MaskedValueIsZero(I->getOperand(0),
-                            APInt::getHighBitsSet(TypeWidth, HighBits)))
+                       APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
       
@@ -157,7 +153,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
     if (CI->getZExtValue() > NumBits) {
       unsigned LowBits = CI->getZExtValue() - NumBits;
       if (MaskedValueIsZero(I->getOperand(0),
-                            APInt::getLowBitsSet(TypeWidth, LowBits)))
+                          APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
       
@@ -622,16 +618,49 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
 }
 
 Instruction *InstCombiner::visitShl(BinaryOperator &I) {
-  return commonShiftTransforms(I);
+  if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+                                 I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+                                 TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  if (Instruction *V = commonShiftTransforms(I))
+    return V;
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+    unsigned ShAmt = Op1C->getZExtValue();
+    
+    // If the shifted-out value is known-zero, then this is a NUW shift.
+    if (!I.hasNoUnsignedWrap() && 
+        MaskedValueIsZero(I.getOperand(0),
+                          APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+          I.setHasNoUnsignedWrap();
+          return &I;
+        }
+    
+    // If the shifted out value is all signbits, this is a NSW shift.
+    if (!I.hasNoSignedWrap() &&
+        ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+      I.setHasNoSignedWrap();
+      return &I;
+    }
+  }
+  
+  return 0;    
 }
 
 Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+  if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+                                  I.isExact(), TD))
+    return ReplaceInstUsesWith(I, V);
+
   if (Instruction *R = commonShiftTransforms(I))
     return R;
   
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   
-  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1))
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    unsigned ShAmt = Op1C->getZExtValue();
+
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
       unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
       // ctlz.i32(x)>>5  --> zext(x == 0)
@@ -640,7 +669,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
       if ((II->getIntrinsicID() == Intrinsic::ctlz ||
            II->getIntrinsicID() == Intrinsic::cttz ||
            II->getIntrinsicID() == Intrinsic::ctpop) &&
-          isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
+          isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
         bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
         Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
         Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
@@ -648,29 +677,37 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
       }
     }
   
+    // If the shifted-out value is known-zero, then this is an exact shift.
+    if (!I.isExact() && 
+        MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+      I.setIsExact();
+      return &I;
+    }    
+  }
+  
   return 0;
 }
 
 Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+  if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+                                  I.isExact(), TD))
+    return ReplaceInstUsesWith(I, V);
+
   if (Instruction *R = commonShiftTransforms(I))
     return R;
   
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
-  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) {
-    // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
-    if (CSI->isAllOnesValue())
-      return ReplaceInstUsesWith(I, CSI);
-  }
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    unsigned ShAmt = Op1C->getZExtValue();
+    
     // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
     // have a sign-extend idiom.
     Value *X;
     if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
-      // If the input value is known to already be sign extended enough, delete
-      // the extension.
-      if (ComputeNumSignBits(X) > Op1C->getZExtValue())
+      // If the left shift is just shifting out partial signbits, delete the
+      // extension.
+      if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
         return ReplaceInstUsesWith(I, X);
 
       // If the input is an extension from the shifted amount value, e.g.
@@ -685,6 +722,13 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
           return new SExtInst(ZI->getOperand(0), ZI->getType());
       }
     }
+
+    // If the shifted-out value is known-zero, then this is an exact shift.
+    if (!I.isExact() && 
+        MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+      I.setIsExact();
+      return &I;
+    }
   }            
   
   // See if we can turn a signed shr into an unsigned shr.
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index adf7a769f4ae..bda8cea4e41f 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -34,7 +34,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
   if (!OpC) return false;
 
   // If there are no bits set that aren't demanded, nothing to do.
-  Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+  Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
   if ((~Demanded & OpC->getValue()) == 0)
     return false;
 
@@ -121,13 +121,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   }
   if (isa<ConstantPointerNull>(V)) {
     // We know all of the bits for a constant!
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     KnownZero = DemandedMask;
     return 0;
   }
 
-  KnownZero.clear();
-  KnownOne.clear();
+  KnownZero.clearAllBits();
+  KnownOne.clearAllBits();
   if (DemandedMask == 0) {   // Not demanding any bits from V.
     if (isa<UndefValue>(V))
       return 0;
@@ -388,15 +388,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Trunc: {
     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
-    DemandedMask.zext(truncBf);
-    KnownZero.zext(truncBf);
-    KnownOne.zext(truncBf);
+    DemandedMask = DemandedMask.zext(truncBf);
+    KnownZero = KnownZero.zext(truncBf);
+    KnownOne = KnownOne.zext(truncBf);
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
                              KnownZero, KnownOne, Depth+1))
       return I;
-    DemandedMask.trunc(BitWidth);
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
+    DemandedMask = DemandedMask.trunc(BitWidth);
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
     break;
   }
@@ -426,15 +426,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
     
-    DemandedMask.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
+    DemandedMask = DemandedMask.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              KnownZero, KnownOne, Depth+1))
       return I;
-    DemandedMask.zext(BitWidth);
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    DemandedMask = DemandedMask.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
     // The top bits are known to be zero.
     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
@@ -451,17 +451,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // If any of the sign extended bits are demanded, we know that the sign
     // bit is demanded.
     if ((NewBits & DemandedMask) != 0)
-      InputDemandedBits.set(SrcBitWidth-1);
+      InputDemandedBits.setBit(SrcBitWidth-1);
       
-    InputDemandedBits.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
+    InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
                              KnownZero, KnownOne, Depth+1))
       return I;
-    InputDemandedBits.zext(BitWidth);
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    InputDemandedBits = InputDemandedBits.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
       
     // If the sign bit of the input is known set or clear, then we know the
@@ -576,8 +576,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+      
+      // If the shift is NUW/NSW, then it does demand the high bits.
+      ShlOperator *IOp = cast<ShlOperator>(I);
+      if (IOp->hasNoSignedWrap())
+        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+      else if (IOp->hasNoUnsignedWrap())
+        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -592,10 +600,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::LShr:
     // For a logical shift right
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       
       // Unsigned shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<LShrOperator>(I)->isExact())
+        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -627,14 +641,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return I->getOperand(0);
     
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       
       // Signed shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
       // If any of the "high bits" are demanded, we should set the sign bit as
       // demanded.
       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
-        DemandedMaskIn.set(BitWidth-1);
+        DemandedMaskIn.setBit(BitWidth-1);
+      
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<AShrOperator>(I)->isExact())
+        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -793,10 +813,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     for (unsigned i = 0; i != VWidth; ++i)
       if (!DemandedElts[i]) {   // If not demanded, set to undef.
         Elts.push_back(Undef);
-        UndefElts.set(i);
+        UndefElts.setBit(i);
       } else if (isa<UndefValue>(CV->getOperand(i))) {   // Already undef.
         Elts.push_back(Undef);
-        UndefElts.set(i);
+        UndefElts.setBit(i);
       } else {                               // Otherwise, defined.
         Elts.push_back(CV->getOperand(i));
       }
@@ -879,13 +899,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // Otherwise, the element inserted overwrites whatever was there, so the
     // input demanded set is simpler than the output set.
     APInt DemandedElts2 = DemandedElts;
-    DemandedElts2.clear(IdxNo);
+    DemandedElts2.clearBit(IdxNo);
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
                                       UndefElts, Depth+1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
 
     // The inserted element is defined.
-    UndefElts.clear(IdxNo);
+    UndefElts.clearBit(IdxNo);
     break;
   }
   case Instruction::ShuffleVector: {
@@ -900,9 +920,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           assert(MaskVal < LHSVWidth * 2 &&
                  "shufflevector mask index out of range!");
           if (MaskVal < LHSVWidth)
-            LeftDemanded.set(MaskVal);
+            LeftDemanded.setBit(MaskVal);
           else
-            RightDemanded.set(MaskVal - LHSVWidth);
+            RightDemanded.setBit(MaskVal - LHSVWidth);
         }
       }
     }
@@ -921,16 +941,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     for (unsigned i = 0; i < VWidth; i++) {
       unsigned MaskVal = Shuffle->getMaskValue(i);
       if (MaskVal == -1u) {
-        UndefElts.set(i);
+        UndefElts.setBit(i);
       } else if (MaskVal < LHSVWidth) {
         if (UndefElts4[MaskVal]) {
           NewUndefElts = true;
-          UndefElts.set(i);
+          UndefElts.setBit(i);
         }
       } else {
         if (UndefElts3[MaskVal - LHSVWidth]) {
           NewUndefElts = true;
-          UndefElts.set(i);
+          UndefElts.setBit(i);
         }
       }
     }
@@ -973,7 +993,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       Ratio = VWidth/InVWidth;
       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
         if (DemandedElts[OutIdx])
-          InputDemandedElts.set(OutIdx/Ratio);
+          InputDemandedElts.setBit(OutIdx/Ratio);
       }
     } else {
       // Untested so far.
@@ -985,7 +1005,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       Ratio = InVWidth/VWidth;
       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
         if (DemandedElts[InIdx/Ratio])
-          InputDemandedElts.set(InIdx);
+          InputDemandedElts.setBit(InIdx);
     }
     
     // div/rem demand all inputs, because they don't want divide by zero.
@@ -1004,7 +1024,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       // undef.
       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
         if (UndefElts2[OutIdx/Ratio])
-          UndefElts.set(OutIdx);
+          UndefElts.setBit(OutIdx);
     } else if (VWidth < InVWidth) {
       llvm_unreachable("Unimp");
       // If there are more elements in the source than there are in the result,
@@ -1013,7 +1033,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
         if (!UndefElts2[InIdx])            // Not undef?
-          UndefElts.clear(InIdx/Ratio);    // Clear undef bit.
+          UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
     }
     break;
   }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index a58124d7032e..5caa12dfdfa5 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
 /// is to leave as a vector operation.
 static bool CheapToScalarize(Value *V, bool isConstant) {
-  if (isa<ConstantAggregateZero>(V)) 
+  if (isa<ConstantAggregateZero>(V))
     return true;
   if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
     if (isConstant) return true;
@@ -31,7 +31,7 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
   }
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // Insert element gets simplified to the inserted element or is deleted if
   // this is constant idx extract element and its a constant idx insertelt.
   if (I->getOpcode() == Instruction::InsertElement && isConstant &&
@@ -49,26 +49,24 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
         (CheapToScalarize(CI->getOperand(0), isConstant) ||
          CheapToScalarize(CI->getOperand(1), isConstant)))
       return true;
-  
+
   return false;
 }
 
-/// Read and decode a shufflevector mask.
-///
-/// It turns undef elements into values that are larger than the number of
-/// elements in the input.
-static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
+/// getShuffleMask - Read and decode a shufflevector mask.
+/// Turn undef elements into negative values.
+static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
   unsigned NElts = SVI->getType()->getNumElements();
   if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
-    return std::vector<unsigned>(NElts, 0);
+    return std::vector<int>(NElts, 0);
   if (isa<UndefValue>(SVI->getOperand(2)))
-    return std::vector<unsigned>(NElts, 2*NElts);
-  
-  std::vector<unsigned> Result;
+    return std::vector<int>(NElts, -1);
+
+  std::vector<int> Result;
   const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
   for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
     if (isa<UndefValue>(*i))
-      Result.push_back(NElts*2);  // undef -> 8
+      Result.push_back(-1);  // undef
     else
       Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
   return Result;
@@ -83,42 +81,41 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
   unsigned Width = PTy->getNumElements();
   if (EltNo >= Width)  // Out of range access.
     return UndefValue::get(PTy->getElementType());
-  
+
   if (isa<UndefValue>(V))
     return UndefValue::get(PTy->getElementType());
   if (isa<ConstantAggregateZero>(V))
     return Constant::getNullValue(PTy->getElementType());
   if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
     return CP->getOperand(EltNo);
-  
+
   if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert to a variable element, we don't know what it is.
-    if (!isa<ConstantInt>(III->getOperand(2))) 
+    if (!isa<ConstantInt>(III->getOperand(2)))
       return 0;
     unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-    
+
     // If this is an insert to the element we are looking for, return the
     // inserted value.
-    if (EltNo == IIElt) 
+    if (EltNo == IIElt)
       return III->getOperand(1);
-    
+
     // Otherwise, the insertelement doesn't modify the value, recurse on its
     // vector input.
     return FindScalarElement(III->getOperand(0), EltNo);
   }
-  
+
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
     unsigned LHSWidth =
-    cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-    unsigned InEl = getShuffleMask(SVI)[EltNo];
-    if (InEl < LHSWidth)
-      return FindScalarElement(SVI->getOperand(0), InEl);
-    else if (InEl < LHSWidth*2)
-      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
-    else
+      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+    int InEl = getShuffleMask(SVI)[EltNo];
+    if (InEl < 0)
       return UndefValue::get(PTy->getElementType());
+    if (InEl < (int)LHSWidth)
+      return FindScalarElement(SVI->getOperand(0), InEl);
+    return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
   }
-  
+
   // Otherwise, we don't know.
   return 0;
 }
@@ -127,11 +124,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is undef, replace extract with scalar undef.
   if (isa<UndefValue>(EI.getOperand(0)))
     return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-  
+
   // If vector val is constant 0, replace extract with scalar 0.
   if (isa<ConstantAggregateZero>(EI.getOperand(0)))
     return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
-  
+
   if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
     // If vector val is constant with all elements the same, replace EI with
     // that element. When the elements are not identical, we cannot replace yet
@@ -139,53 +136,53 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     Constant *op0 = C->getOperand(0);
     for (unsigned i = 1; i != C->getNumOperands(); ++i)
       if (C->getOperand(i) != op0) {
-        op0 = 0; 
+        op0 = 0;
         break;
       }
     if (op0)
       return ReplaceInstUsesWith(EI, op0);
   }
-  
+
   // If extracting a specified index from the vector, see if we can recursively
   // find a previously computed scalar that was inserted into the vector.
   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
     unsigned IndexVal = IdxC->getZExtValue();
     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
-    
+
     // If this is extracting an invalid index, turn this into undef, to avoid
     // crashing the code below.
     if (IndexVal >= VectorWidth)
       return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-    
+
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
     // property.
     if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
       APInt UndefElts(VectorWidth, 0);
       APInt DemandedMask(VectorWidth, 0);
-      DemandedMask.set(IndexVal);
+      DemandedMask.setBit(IndexVal);
       if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
                                                 DemandedMask, UndefElts)) {
         EI.setOperand(0, V);
         return &EI;
       }
     }
-    
+
     if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
       return ReplaceInstUsesWith(EI, Elt);
-    
+
     // If the this extractelement is directly using a bitcast from a vector of
     // the same number of elements, see if we can find the source element from
     // it.  In this case, we will end up needing to bitcast the scalars.
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
-      if (const VectorType *VT = 
+      if (const VectorType *VT =
           dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
         if (VT->getNumElements() == VectorWidth)
           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
             return new BitCastInst(Elt, EI.getType());
     }
   }
-  
+
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
     // Push extractelement into predecessor operation if legal and
     // profitable to do so
@@ -193,11 +190,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       if (I->hasOneUse() &&
           CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
         Value *newEI0 =
-        Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
-                                      EI.getName()+".lhs");
+          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                        EI.getName()+".lhs");
         Value *newEI1 =
-        Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
-                                      EI.getName()+".rhs");
+          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                        EI.getName()+".rhs");
         return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
       }
     } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
@@ -215,21 +212,22 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       // If this is extracting an element from a shufflevector, figure out where
       // it came from and extract from the appropriate input element instead.
       if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
-        unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+        int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
         Value *Src;
         unsigned LHSWidth =
-        cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-        
-        if (SrcIdx < LHSWidth)
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+        if (SrcIdx < 0)
+          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+        if (SrcIdx < (int)LHSWidth)
           Src = SVI->getOperand(0);
-        else if (SrcIdx < LHSWidth*2) {
+        else {
           SrcIdx -= LHSWidth;
           Src = SVI->getOperand(1);
-        } else {
-          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
         }
+        const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
         return ExtractElementInst::Create(Src,
-                                          ConstantInt::get(Type::getInt32Ty(EI.getContext()),
+                                          ConstantInt::get(Int32Ty,
                                                            SrcIdx, false));
       }
     }
@@ -239,42 +237,42 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 }
 
 /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true. 
+/// elements from either LHS or RHS, return the shuffle mask and true.
 /// Otherwise, return false.
 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                          std::vector<Constant*> &Mask) {
   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-  
+
   if (isa<UndefValue>(V)) {
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
     return true;
   }
-  
+
   if (V == LHS) {
     for (unsigned i = 0; i != NumElts; ++i)
       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
     return true;
   }
-  
+
   if (V == RHS) {
     for (unsigned i = 0; i != NumElts; ++i)
       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
                                       i+NumElts));
     return true;
   }
-  
+
   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
     Value *VecOp    = IEI->getOperand(0);
     Value *ScalarOp = IEI->getOperand(1);
     Value *IdxOp    = IEI->getOperand(2);
-    
+
     if (!isa<ConstantInt>(IdxOp))
       return false;
     unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-    
+
     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
       // Okay, we can handle this if the vector we are insertinting into is
       // transitively ok.
@@ -282,13 +280,13 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
         // If so, update the mask to reflect the inserted undef.
         Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
         return true;
-      }      
+      }
     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
       if (isa<ConstantInt>(EI->getOperand(1)) &&
           EI->getOperand(0)->getType() == V->getType()) {
         unsigned ExtractedIdx =
         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-        
+
         // This must be extracting from either LHS or RHS.
         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
           // Okay, we can handle this if the vector we are insertinting into is
@@ -296,15 +294,14 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
           if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
             // If so, update the mask to reflect the inserted value.
             if (EI->getOperand(0) == LHS) {
-              Mask[InsertedIdx % NumElts] = 
+              Mask[InsertedIdx % NumElts] =
               ConstantInt::get(Type::getInt32Ty(V->getContext()),
                                ExtractedIdx);
             } else {
               assert(EI->getOperand(0) == RHS);
-              Mask[InsertedIdx % NumElts] = 
+              Mask[InsertedIdx % NumElts] =
               ConstantInt::get(Type::getInt32Ty(V->getContext()),
                                ExtractedIdx+NumElts);
-              
             }
             return true;
           }
@@ -313,7 +310,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
     }
   }
   // TODO: Handle shufflevector here!
-  
+
   return false;
 }
 
@@ -322,11 +319,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// that computes V and the LHS value of the shuffle.
 static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
                                      Value *&RHS) {
-  assert(V->getType()->isVectorTy() && 
+  assert(V->getType()->isVectorTy() &&
          (RHS == 0 || V->getType() == RHS->getType()) &&
          "Invalid shuffle!");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-  
+
   if (isa<UndefValue>(V)) {
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
     return V;
@@ -338,25 +335,25 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
     Value *VecOp    = IEI->getOperand(0);
     Value *ScalarOp = IEI->getOperand(1);
     Value *IdxOp    = IEI->getOperand(2);
-    
+
     if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
       if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
           EI->getOperand(0)->getType() == V->getType()) {
         unsigned ExtractedIdx =
-        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
         unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-        
+
         // Either the extracted from or inserted into vector must be RHSVec,
         // otherwise we'd end up with a shuffle of three inputs.
         if (EI->getOperand(0) == RHS || RHS == 0) {
           RHS = EI->getOperand(0);
           Value *V = CollectShuffleElements(VecOp, Mask, RHS);
-          Mask[InsertedIdx % NumElts] = 
-          ConstantInt::get(Type::getInt32Ty(V->getContext()),
-                           NumElts+ExtractedIdx);
+          Mask[InsertedIdx % NumElts] =
+            ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                             NumElts+ExtractedIdx);
           return V;
         }
-        
+
         if (VecOp == RHS) {
           Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
           // Everything but the extracted element is replaced with the RHS.
@@ -367,7 +364,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
           }
           return V;
         }
-        
+
         // If this insertelement is a chain that comes from exactly these two
         // vectors, return the vector and the effective shuffle.
         if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
@@ -376,7 +373,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
     }
   }
   // TODO: Handle shufflevector here!
-  
+
   // Otherwise, can't do anything fancy.  Return an identity vector.
   for (unsigned i = 0; i != NumElts; ++i)
     Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
@@ -387,32 +384,32 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   Value *VecOp    = IE.getOperand(0);
   Value *ScalarOp = IE.getOperand(1);
   Value *IdxOp    = IE.getOperand(2);
-  
+
   // Inserting an undef or into an undefined place, remove this.
   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
     ReplaceInstUsesWith(IE, VecOp);
-  
-  // If the inserted element was extracted from some other vector, and if the 
+
+  // If the inserted element was extracted from some other vector, and if the
   // indexes are constant, try to turn this into a shufflevector operation.
   if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
     if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
         EI->getOperand(0)->getType() == IE.getType()) {
       unsigned NumVectorElts = IE.getType()->getNumElements();
       unsigned ExtractedIdx =
-      cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
       unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-      
+
       if (ExtractedIdx >= NumVectorElts) // Out of range extract.
         return ReplaceInstUsesWith(IE, VecOp);
-      
+
       if (InsertedIdx >= NumVectorElts)  // Out of range insert.
         return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-      
+
       // If we are extracting a value from a vector, then inserting it right
       // back into the same place, just use the input vector.
       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
-        return ReplaceInstUsesWith(IE, VecOp);      
-      
+        return ReplaceInstUsesWith(IE, VecOp);
+
       // If this insertelement isn't used by some other insertelement, turn it
       // (and any insertelements it points to), into one big shuffle.
       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
@@ -421,18 +418,20 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
         Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
         if (RHS == 0) RHS = UndefValue::get(LHS->getType());
         // We now have a shuffle of LHS, RHS, Mask.
-        return new ShuffleVectorInst(LHS, RHS,
-                                     ConstantVector::get(Mask));
+        return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
       }
     }
   }
-  
+
   unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
   APInt UndefElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-  if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
+  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+    if (V != &IE)
+      return ReplaceInstUsesWith(IE, V);
     return &IE;
-  
+  }
+
   return 0;
 }
 
@@ -440,27 +439,29 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
-  std::vector<unsigned> Mask = getShuffleMask(&SVI);
-  
+  std::vector<int> Mask = getShuffleMask(&SVI);
+
   bool MadeChange = false;
-  
+
   // Undefined shuffle mask -> undefined value.
   if (isa<UndefValue>(SVI.getOperand(2)))
     return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
-  
+
   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
-  
+
   if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
     return 0;
-  
+
   APInt UndefElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-  if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+    if (V != &SVI)
+      return ReplaceInstUsesWith(SVI, V);
     LHS = SVI.getOperand(0);
     RHS = SVI.getOperand(1);
     MadeChange = true;
   }
-  
+
   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
   if (LHS == RHS || isa<UndefValue>(LHS)) {
@@ -468,16 +469,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       // shuffle(undef,undef,mask) -> undef.
       return ReplaceInstUsesWith(SVI, LHS);
     }
-    
+
     // Remap any references to RHS to use LHS.
     std::vector<Constant*> Elts;
     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-      if (Mask[i] >= 2*e)
+      if (Mask[i] < 0)
         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
       else {
-        if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
-            (Mask[i] <  e && isa<UndefValue>(LHS))) {
-          Mask[i] = 2*e;     // Turn into undef.
+        if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+            (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
+          Mask[i] = -1;     // Turn into undef.
           Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
         } else {
           Mask[i] = Mask[i] % e;  // Force to LHS.
@@ -493,59 +494,65 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     RHS = SVI.getOperand(1);
     MadeChange = true;
   }
-  
+
   // Analyze the shuffle, are the LHS or RHS and identity shuffles?
   bool isLHSID = true, isRHSID = true;
-  
+
   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-    if (Mask[i] >= e*2) continue;  // Ignore undef values.
+    if (Mask[i] < 0) continue;  // Ignore undef values.
     // Is this an identity shuffle of the LHS value?
-    isLHSID &= (Mask[i] == i);
-    
+    isLHSID &= (Mask[i] == (int)i);
+
     // Is this an identity shuffle of the RHS value?
     isRHSID &= (Mask[i]-e == i);
   }
-  
+
   // Eliminate identity shuffles.
   if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
   if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
-  
+
   // If the LHS is a shufflevector itself, see if we can combine it with this
   // one without producing an unusual shuffle.  Here we are really conservative:
   // we are absolutely afraid of producing a shuffle mask not in the input
   // program, because the code gen may not be smart enough to turn a merged
   // shuffle into two specific shuffles: it may produce worse code.  As such,
-  // we only merge two shuffles if the result is one of the two input shuffle
-  // masks.  In this case, merging the shuffles just removes one instruction,
-  // which we know is safe.  This is good for things like turning:
-  // (splat(splat)) -> splat.
+  // we only merge two shuffles if the result is either a splat or one of the
+  // two input shuffle masks.  In this case, merging the shuffles just removes
+  // one instruction, which we know is safe.  This is good for things like
+  // turning: (splat(splat)) -> splat.
   if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
     if (isa<UndefValue>(RHS)) {
-      std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
-      
+      std::vector<int> LHSMask = getShuffleMask(LHSSVI);
+
       if (LHSMask.size() == Mask.size()) {
-        std::vector<unsigned> NewMask;
-        for (unsigned i = 0, e = Mask.size(); i != e; ++i)
-          if (Mask[i] >= e)
-            NewMask.push_back(2*e);
+        std::vector<int> NewMask;
+        bool isSplat = true;
+        int SplatElt = -1; // undef
+        for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+          int MaskElt;
+          if (Mask[i] < 0 || Mask[i] >= (int)e)
+            MaskElt = -1; // undef
           else
-            NewMask.push_back(LHSMask[Mask[i]]);
-        
+            MaskElt = LHSMask[Mask[i]];
+          // Check if this could still be a splat.
+          if (MaskElt >= 0) {
+            if (SplatElt >=0 && SplatElt != MaskElt)
+              isSplat = false;
+            SplatElt = MaskElt;
+          }
+          NewMask.push_back(MaskElt);
+        }
+
         // If the result mask is equal to the src shuffle or this
         // shuffle mask, do the replacement.
-        if (NewMask == LHSMask || NewMask == Mask) {
-          unsigned LHSInNElts =
-          cast<VectorType>(LHSSVI->getOperand(0)->getType())->
-          getNumElements();
+        if (isSplat || NewMask == LHSMask || NewMask == Mask) {
           std::vector<Constant*> Elts;
+          const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
           for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
-            if (NewMask[i] >= LHSInNElts*2) {
-              Elts.push_back(UndefValue::get(
-                                             Type::getInt32Ty(SVI.getContext())));
+            if (NewMask[i] < 0) {
+              Elts.push_back(UndefValue::get(Int32Ty));
             } else {
-              Elts.push_back(ConstantInt::get(
-                                              Type::getInt32Ty(SVI.getContext()),
-                                              NewMask[i]));
+              Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
             }
           }
           return new ShuffleVectorInst(LHSSVI->getOperand(0),
@@ -555,7 +562,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       }
     }
   }
-  
+
   return MadeChange ? &SVI : 0;
 }
-
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index e46c67994e2b..37123d0621eb 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
 #include <algorithm>
 #include <climits>
 using namespace llvm;
@@ -57,11 +58,22 @@ STATISTIC(NumCombined , "Number of insts combined");
 STATISTIC(NumConstProp, "Number of constant folds");
 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
 STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand,    "Number of expansions");
+STATISTIC(NumFactor   , "Number of factorizations");
+STATISTIC(NumReassoc  , "Number of reassociations");
 
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+  initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+  initializeInstCombine(*unwrap(R));
+}
 
 char InstCombiner::ID = 0;
 INITIALIZE_PASS(InstCombiner, "instcombine",
-                "Combine redundant instructions", false, false);
+                "Combine redundant instructions", false, false)
 
 void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreservedID(LCSSAID);
@@ -97,53 +109,326 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
 }
 
 
-// SimplifyCommutative - This performs a few simplifications for commutative
-// operators:
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+//  Commutative operators:
 //
 //  1. Order operands such that they are listed from right (least complex) to
 //     left (most complex).  This puts constants before unary operators before
 //     binary operators.
 //
-//  2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
-//  3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+//  Associative operators:
+//
+//  2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+//  3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+//  Associative and commutative operators:
+//
+//  4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+//  5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+//  6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+//     if C1 and C2 are constants.
 //
-bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+  Instruction::BinaryOps Opcode = I.getOpcode();
   bool Changed = false;
-  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
-    Changed = !I.swapOperands();
 
-  if (!I.isAssociative()) return Changed;
-  
-  Instruction::BinaryOps Opcode = I.getOpcode();
-  if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
-    if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
-      if (isa<Constant>(I.getOperand(1))) {
-        Constant *Folded = ConstantExpr::get(I.getOpcode(),
-                                             cast<Constant>(I.getOperand(1)),
-                                             cast<Constant>(Op->getOperand(1)));
-        I.setOperand(0, Op->getOperand(0));
-        I.setOperand(1, Folded);
-        return true;
+  do {
+    // Order operands such that they are listed from right (least complex) to
+    // left (most complex).  This puts constants before unary operators before
+    // binary operators.
+    if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+        getComplexity(I.getOperand(1)))
+      Changed = !I.swapOperands();
+
+    BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+    BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+    if (I.isAssociative()) {
+      // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+      if (Op0 && Op0->getOpcode() == Opcode) {
+        Value *A = Op0->getOperand(0);
+        Value *B = Op0->getOperand(1);
+        Value *C = I.getOperand(1);
+
+        // Does "B op C" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+          // It simplifies to V.  Form "A op V".
+          I.setOperand(0, A);
+          I.setOperand(1, V);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
       }
-      
-      if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)))
-        if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
-            Op->hasOneUse() && Op1->hasOneUse()) {
-          Constant *C1 = cast<Constant>(Op->getOperand(1));
-          Constant *C2 = cast<Constant>(Op1->getOperand(1));
-
-          // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
-          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
-          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
-                                                    Op1->getOperand(0),
-                                                    Op1->getName(), &I);
-          Worklist.Add(New);
-          I.setOperand(0, New);
-          I.setOperand(1, Folded);
-          return true;
+
+      // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+      if (Op1 && Op1->getOpcode() == Opcode) {
+        Value *A = I.getOperand(0);
+        Value *B = Op1->getOperand(0);
+        Value *C = Op1->getOperand(1);
+
+        // Does "A op B" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+          // It simplifies to V.  Form "V op C".
+          I.setOperand(0, V);
+          I.setOperand(1, C);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
         }
+      }
     }
-  return Changed;
+
+    if (I.isAssociative() && I.isCommutative()) {
+      // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+      if (Op0 && Op0->getOpcode() == Opcode) {
+        Value *A = Op0->getOperand(0);
+        Value *B = Op0->getOperand(1);
+        Value *C = I.getOperand(1);
+
+        // Does "C op A" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+          // It simplifies to V.  Form "V op B".
+          I.setOperand(0, V);
+          I.setOperand(1, B);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+      if (Op1 && Op1->getOpcode() == Opcode) {
+        Value *A = I.getOperand(0);
+        Value *B = Op1->getOperand(0);
+        Value *C = Op1->getOperand(1);
+
+        // Does "C op A" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+          // It simplifies to V.  Form "B op V".
+          I.setOperand(0, B);
+          I.setOperand(1, V);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+      // if C1 and C2 are constants.
+      if (Op0 && Op1 &&
+          Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+          isa<Constant>(Op0->getOperand(1)) &&
+          isa<Constant>(Op1->getOperand(1)) &&
+          Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *A = Op0->getOperand(0);
+        Constant *C1 = cast<Constant>(Op0->getOperand(1));
+        Value *B = Op1->getOperand(0);
+        Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+        Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+        Instruction *New = BinaryOperator::Create(Opcode, A, B, Op1->getName(),
+                                                  &I);
+        Worklist.Add(New);
+        I.setOperand(0, New);
+        I.setOperand(1, Folded);
+        // Conservatively clear the optional flags, since they may not be
+        // preserved by the reassociation.
+        I.clearSubclassOptionalData();
+        Changed = true;
+        continue;
+      }
+    }
+
+    // No further simplifications.
+    return Changed;
+  } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+                                     Instruction::BinaryOps ROp) {
+  switch (LOp) {
+  default:
+    return false;
+
+  case Instruction::And:
+    // And distributes over Or and Xor.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::Or:
+    case Instruction::Xor:
+      return true;
+    }
+
+  case Instruction::Mul:
+    // Multiplication distributes over addition and subtraction.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::Add:
+    case Instruction::Sub:
+      return true;
+    }
+
+  case Instruction::Or:
+    // Or distributes over And.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::And:
+      return true;
+    }
+  }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+                                     Instruction::BinaryOps ROp) {
+  if (Instruction::isCommutative(ROp))
+    return LeftDistributesOverRight(ROp, LOp);
+  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+  // but this requires knowing that the addition does not overflow and other
+  // such subtleties.
+  return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win).  Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+  // Factorization.
+  if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
+    // a common term.
+    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+    Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+    // Does "X op' Y" always equal "Y op' X"?
+    bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+    // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+    if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+      // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+      // commutative case, "(A op' B) op (C op' A)"?
+      if (A == C || (InnerCommutative && A == D)) {
+        if (A != C)
+          std::swap(C, D);
+        // Consider forming "A op' (B op D)".
+        // If "B op D" simplifies then it can be formed with no cost.
+        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+        // If "B op D" doesn't simplify then only go on if both of the existing
+        // operations "A op' B" and "C op' D" will be zapped as no longer used.
+        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+          V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+        if (V) {
+          ++NumFactor;
+          V = Builder->CreateBinOp(InnerOpcode, A, V);
+          V->takeName(&I);
+          return V;
+        }
+      }
+
+    // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+    if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+      // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+      // commutative case, "(A op' B) op (B op' D)"?
+      if (B == D || (InnerCommutative && B == C)) {
+        if (B != D)
+          std::swap(C, D);
+        // Consider forming "(A op C) op' B".
+        // If "A op C" simplifies then it can be formed with no cost.
+        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+        // If "A op C" doesn't simplify then only go on if both of the existing
+        // operations "A op' B" and "C op' D" will be zapped as no longer used.
+        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+          V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+        if (V) {
+          ++NumFactor;
+          V = Builder->CreateBinOp(InnerOpcode, V, B);
+          V->takeName(&I);
+          return V;
+        }
+      }
+  }
+
+  // Expansion.
+  if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+    // The instruction has the form "(A op' B) op C".  See if expanding it out
+    // to "(A op C) op' (B op C)" results in simplifications.
+    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+    // Do "A op C" and "B op C" both simplify?
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+        // They do! Return "L op' R".
+        ++NumExpand;
+        // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+        if ((L == A && R == B) ||
+            (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+          return Op0;
+        // Otherwise return "L op' R" if it simplifies.
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+          return V;
+        // Otherwise, create a new instruction.
+        C = Builder->CreateBinOp(InnerOpcode, L, R);
+        C->takeName(&I);
+        return C;
+      }
+  }
+
+  if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+    // The instruction has the form "A op (B op' C)".  See if expanding it out
+    // to "(A op B) op' (A op C)" results in simplifications.
+    Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+    Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+    // Do "A op B" and "A op C" both simplify?
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+        // They do! Return "L op' R".
+        ++NumExpand;
+        // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+        if ((L == B && R == C) ||
+            (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+          return Op1;
+        // Otherwise return "L op' R" if it simplifies.
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+          return V;
+        // Otherwise, create a new instruction.
+        A = Builder->CreateBinOp(InnerOpcode, L, R);
+        A->takeName(&I);
+        return A;
+      }
+  }
+
+  return 0;
 }
 
 // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
@@ -185,8 +470,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
 
 static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                              InstCombiner *IC) {
-  if (CastInst *CI = dyn_cast<CastInst>(&I))
+  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
     return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+  }
 
   // Figure out if the constant is the left or the right argument.
   bool ConstIsRHS = isa<Constant>(I.getOperand(1));
@@ -228,11 +514,24 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
     // Bool selects with constant operands can be folded to logical ops.
     if (SI->getType()->isIntegerTy(1)) return 0;
 
+    // If it's a bitcast involving vectors, make sure it has the same number of
+    // elements on both sides.
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+      const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+      const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+      // Verify that either both or neither are vectors.
+      if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+      // If vectors, verify that they have the same number of elements.
+      if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+        return 0;
+    }
+    
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
 
-    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
-                              SelectFalseVal);
+    return SelectInst::Create(SI->getCondition(),
+                              SelectTrueVal, SelectFalseVal);
   }
   return 0;
 }
@@ -242,20 +541,25 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
 /// has a PHI node as operand #0, see if we can fold the instruction into the
 /// PHI (which is only possible if all operands to the PHI are constants).
 ///
-/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
-/// that would normally be unprofitable because they strongly encourage jump
-/// threading.
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
-                                         bool AllowAggressive) {
-  AllowAggressive = false;
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   PHINode *PN = cast<PHINode>(I.getOperand(0));
   unsigned NumPHIValues = PN->getNumIncomingValues();
-  if (NumPHIValues == 0 ||
-      // We normally only transform phis with a single use, unless we're trying
-      // hard to make jump threading happen.
-      (!PN->hasOneUse() && !AllowAggressive))
+  if (NumPHIValues == 0)
     return 0;
   
+  // We normally only transform phis with a single use.  However, if a PHI has
+  // multiple uses and they are all the same operation, we can fold *all* of the
+  // uses into the PHI.
+  if (!PN->hasOneUse()) {
+    // Walk the use list for the instruction, comparing them to I.
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (User != &I && !I.isIdenticalTo(User))
+        return 0;
+    }
+    // Otherwise, we can replace *all* users with the new PHI we form.
+  }
   
   // Check to see if all of the operands of the PHI are simple constants
   // (constantint/constantfp/undef).  If there is one non-constant value,
@@ -263,24 +567,34 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
   // bail out.  We don't do arbitrary constant expressions here because moving
   // their computation can be expensive without a cost model.
   BasicBlock *NonConstBB = 0;
-  for (unsigned i = 0; i != NumPHIValues; ++i)
-    if (!isa<Constant>(PN->getIncomingValue(i)) ||
-        isa<ConstantExpr>(PN->getIncomingValue(i))) {
-      if (NonConstBB) return 0;  // More than one non-const value.
-      if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
-      NonConstBB = PN->getIncomingBlock(i);
-      
-      // If the incoming non-constant value is in I's block, we have an infinite
-      // loop.
-      if (NonConstBB == I.getParent())
+  for (unsigned i = 0; i != NumPHIValues; ++i) {
+    Value *InVal = PN->getIncomingValue(i);
+    if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+      continue;
+
+    if (isa<PHINode>(InVal)) return 0;  // Itself a phi.
+    if (NonConstBB) return 0;  // More than one non-const value.
+    
+    NonConstBB = PN->getIncomingBlock(i);
+
+    // If the InVal is an invoke at the end of the pred block, then we can't
+    // insert a computation after it without breaking the edge.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+      if (II->getParent() == NonConstBB)
         return 0;
-    }
+    
+    // If the incoming non-constant value is in I's block, we will remove one
+    // instruction, but insert another equivalent one, leading to infinite
+    // instcombine.
+    if (NonConstBB == I.getParent())
+      return 0;
+  }
   
   // If there is exactly one non-constant value, we can insert a copy of the
   // operation in that block.  However, if this is a critical edge, we would be
   // inserting the computation one some other paths (e.g. inside a loop).  Only
   // do this if the pred block is unconditionally branching into the phi block.
-  if (NonConstBB != 0 && !AllowAggressive) {
+  if (NonConstBB != 0) {
     BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
     if (!BI || !BI->isUnconditional()) return 0;
   }
@@ -290,7 +604,12 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
   NewPN->reserveOperandSpace(PN->getNumOperands()/2);
   InsertNewInstBefore(NewPN, *PN);
   NewPN->takeName(PN);
-
+  
+  // If we are going to have to insert a new computation, do so right before the
+  // predecessors terminator.
+  if (NonConstBB)
+    Builder->SetInsertPoint(NonConstBB->getTerminator());
+  
   // Next, add all of the operands to the PHI.
   if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
     // We only currently try to fold the condition of a select when it is a phi,
@@ -303,42 +622,36 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
       Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
       Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
       Value *InV = 0;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
         InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
-      } else {
-        assert(PN->getIncomingBlock(i) == NonConstBB);
-        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
-                                 FalseVInPred,
-                                 "phitmp", NonConstBB->getTerminator());
-        Worklist.Add(cast<Instruction>(InV));
-      }
+      else
+        InV = Builder->CreateSelect(PN->getIncomingValue(i),
+                                    TrueVInPred, FalseVInPred, "phitmp");
       NewPN->addIncoming(InV, ThisBB);
     }
+  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+    Constant *C = cast<Constant>(I.getOperand(1));
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+      else if (isa<ICmpInst>(CI))
+        InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                  C, "phitmp");
+      else
+        InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                  C, "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
   } else if (I.getNumOperands() == 2) {
     Constant *C = cast<Constant>(I.getOperand(1));
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV = 0;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
-        if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
-        else
-          InV = ConstantExpr::get(I.getOpcode(), InC, C);
-      } else {
-        assert(PN->getIncomingBlock(i) == NonConstBB);
-        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
-          InV = BinaryOperator::Create(BO->getOpcode(),
-                                       PN->getIncomingValue(i), C, "phitmp",
-                                       NonConstBB->getTerminator());
-        else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = CmpInst::Create(CI->getOpcode(),
-                                CI->getPredicate(),
-                                PN->getIncomingValue(i), C, "phitmp",
-                                NonConstBB->getTerminator());
-        else
-          llvm_unreachable("Unknown binop!");
-        
-        Worklist.Add(cast<Instruction>(InV));
-      }
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::get(I.getOpcode(), InC, C);
+      else
+        InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+                                   PN->getIncomingValue(i), C, "phitmp");
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   } else { 
@@ -346,18 +659,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
     const Type *RetTy = CI->getType();
     for (unsigned i = 0; i != NumPHIValues; ++i) {
       Value *InV;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
         InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
-      } else {
-        assert(PN->getIncomingBlock(i) == NonConstBB);
-        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
-                               I.getType(), "phitmp", 
-                               NonConstBB->getTerminator());
-        Worklist.Add(cast<Instruction>(InV));
-      }
+      else 
+        InV = Builder->CreateCast(CI->getOpcode(),
+                                PN->getIncomingValue(i), I.getType(), "phitmp");
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   }
+  
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+       UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    if (User == &I) continue;
+    ReplaceInstUsesWith(*User, NewPN);
+    EraseInstFromFunction(*User);
+  }
   return ReplaceInstUsesWith(I, NewPN);
 }
 
@@ -432,28 +749,35 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   Value *PtrOp = GEP.getOperand(0);
 
-  if (isa<UndefValue>(GEP.getOperand(0)))
-    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
-
-  // Eliminate unneeded casts for indices.
+  // Eliminate unneeded casts for indices, and replace indices which displace
+  // by multiples of a zero size type with zero.
   if (TD) {
     bool MadeChange = false;
-    unsigned PtrSize = TD->getPointerSizeInBits();
-    
+    const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+
     gep_type_iterator GTI = gep_type_begin(GEP);
     for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
          I != E; ++I, ++GTI) {
-      if (!isa<SequentialType>(*GTI)) continue;
-      
-      // If we are using a wider index than needed for this platform, shrink it
-      // to what we need.  If narrower, sign-extend it to what we need.  This
-      // explicit cast can make subsequent optimizations more obvious.
-      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
-      if (OpBits == PtrSize)
-        continue;
-      
-      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
-      MadeChange = true;
+      // Skip indices into struct types.
+      const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+      if (!SeqTy) continue;
+
+      // If the element type has zero size then any index over it is equivalent
+      // to an index of zero, so replace it with zero if it is not zero already.
+      if (SeqTy->getElementType()->isSized() &&
+          TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+        if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+          *I = Constant::getNullValue(IntPtrTy);
+          MadeChange = true;
+        }
+
+      if ((*I)->getType() != IntPtrTy) {
+        // If we are using a wider index than needed for this platform, shrink
+        // it to what we need.  If narrower, sign-extend it to what we need.
+        // This explicit cast can make subsequent optimizations more obvious.
+        *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+        MadeChange = true;
+      }
     }
     if (MadeChange) return &GEP;
   }
@@ -940,6 +1264,14 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
           EraseInstFromFunction(*II);
           return BinaryOperator::CreateAdd(LHS, RHS);
         }
+          
+        // If the normal result of the add is dead, and the RHS is a constant,
+        // we can transform this into a range comparison.
+        // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
+        if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+            return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+                                ConstantExpr::getNot(CI));
         break;
       case Intrinsic::usub_with_overflow:
       case Intrinsic::ssub_with_overflow:
@@ -964,10 +1296,37 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       }
     }
   }
-  // Can't simplify extracts from other values. Note that nested extracts are
-  // already simplified implicitely by the above (extract ( extract (insert) )
+  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+    // If the (non-volatile) load only has one use, we can rewrite this to a
+    // load from a GEP. This reduces the size of the load.
+    // FIXME: If a load is used only by extractvalue instructions then this
+    //        could be done regardless of having multiple uses.
+    if (!L->isVolatile() && L->hasOneUse()) {
+      // extractvalue has integer indices, getelementptr has Value*s. Convert.
+      SmallVector<Value*, 4> Indices;
+      // Prefix an i32 0 since we need the first element.
+      Indices.push_back(Builder->getInt32(0));
+      for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+            I != E; ++I)
+        Indices.push_back(Builder->getInt32(*I));
+
+      // We need to insert these at the location of the old load, not at that of
+      // the extractvalue.
+      Builder->SetInsertPoint(L->getParent(), L);
+      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
+                                              Indices.begin(), Indices.end());
+      // Returning the load directly will cause the main loop to insert it in
+      // the wrong spot, so use ReplaceInstUsesWith().
+      return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+    }
+  // We could simplify extracts from other values. Note that nested extracts may
+  // already be simplified implicitly by the above: extract (extract (insert) )
   // will be translated into extract ( insert ( extract ) ) first and then just
-  // the value inserted, if appropriate).
+  // the value inserted, if appropriate. Similarly for extracts from single-use
+  // loads: extract (extract (load)) will be translated to extract (load (gep))
+  // and if again single-use then via load (gep (gep)) to load (gep).
+  // However, double extracts from e.g. function arguments or return values
+  // aren't handled yet.
   return 0;
 }
 
@@ -1023,10 +1382,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
   bool MadeIRChange = false;
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
-  
-  std::vector<Instruction*> InstrsForInstCombineWorklist;
-  InstrsForInstCombineWorklist.reserve(128);
 
+  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
   SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
   
   do {
@@ -1231,6 +1588,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         DEBUG(errs() << "IC: Old = " << *I << '\n'
                      << "    New = " << *Result << '\n');
 
+        Result->setDebugLoc(I->getDebugLoc());
         // Everything uses the new instruction now.
         I->replaceAllUsesWith(Result);
 
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 128bf489787c..0ac1cb09bce7 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_llvm_library(LLVMInstrumentation
   EdgeProfiling.cpp
+  Instrumentation.cpp
   OptimalEdgeProfiling.cpp
+  PathProfiling.cpp
   ProfilingUtils.cpp
   )
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index a77d70cd1c1b..1d31fcc4df3f 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -17,6 +17,7 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-edge-profiling"
+
 #include "ProfilingUtils.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
@@ -34,7 +35,9 @@ namespace {
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
-    EdgeProfiler() : ModulePass(ID) {}
+    EdgeProfiler() : ModulePass(ID) {
+      initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual const char *getPassName() const {
       return "Edge Profiler";
@@ -44,7 +47,7 @@ namespace {
 
 char EdgeProfiler::ID = 0;
 INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
-                "Insert instrumentation for edge profiling", false, false);
+                "Insert instrumentation for edge profiling", false, false)
 
 ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
 
@@ -98,7 +101,7 @@ bool EdgeProfiler::runOnModule(Module &M) {
           // otherwise insert it in the successor block.
           if (TI->getNumSuccessors() == 1) {
             // Insert counter at the start of the block
-            IncrementCounterInBlock(BB, i++, Counters);
+            IncrementCounterInBlock(BB, i++, Counters, false);
           } else {
             // Insert counter at the start of the block
             IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 000000000000..96ed4fa5c0fe
--- /dev/null
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,32 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+  initializeEdgeProfilerPass(Registry);
+  initializeOptimalEdgeProfilerPass(Registry);
+  initializePathProfilerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+  initializeInstrumentation(*unwrap(R));
+}
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 8eec9872812d..c85a1a9391d4 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -36,7 +36,9 @@ namespace {
     bool runOnModule(Module &M);
   public:
     static char ID; // Pass identification, replacement for typeid
-    OptimalEdgeProfiler() : ModulePass(ID) {}
+    OptimalEdgeProfiler() : ModulePass(ID) {
+      initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +52,14 @@ namespace {
 }
 
 char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling", 
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+                "Insert optimal instrumentation for edge profiling",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
                 "Insert optimal instrumentation for edge profiling",
-                false, false);
+                false, false)
 
 ModulePass *llvm::createOptimalEdgeProfilerPass() {
   return new OptimalEdgeProfiler();
@@ -125,11 +132,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
     // Calculate a Maximum Spanning Tree with the edge weights determined by
     // ProfileEstimator. ProfileEstimator also assign weights to the virtual
     // edges (0,entry) and (BB,0) (for blocks with no successors) and this
-    // edges also participate in the maximum spanning tree calculation. 
+    // edges also participate in the maximum spanning tree calculation.
     // The third parameter of MaximumSpanningTree() has the effect that not the
     // actual MST is returned but the edges _not_ in the MST.
 
-    ProfileInfo::EdgeWeights ECs = 
+    ProfileInfo::EdgeWeights ECs =
       getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
     std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
     MaximumSpanningTree<BasicBlock> MST (EdgeVector);
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 000000000000..6449b39cfc9d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1423 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling.  Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit.  The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96].  The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG            - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+//                  removed in the following manner.  For every backedge
+//                  v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number    - The number corresponding to a specific path through a
+//                  Ball-Larus DAG.
+// Spanning Tree  - A subgraph, S, is a spanning tree if S covers all
+//                  vertices and is a tree.
+// Chord          - An edge not in the spanning tree.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+//  Thomas Ball.  "Efficiently Counting Program Events with Support for
+//  On-line queries."
+//  ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+//  September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/DerivedTypes.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <map>
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+  // Creates a new BLInstrumentationNode from a BasicBlock.
+  BLInstrumentationNode(BasicBlock* BB);
+
+  // Get/sets the Value corresponding to the pathNumber register,
+  // constant or phinode.  Used by the instrumentation code to remember
+  // path number Values.
+  Value* getStartingPathNumber();
+  void setStartingPathNumber(Value* pathNumber);
+
+  Value* getEndingPathNumber();
+  void setEndingPathNumber(Value* pathNumber);
+
+  // Get/set the PHINode Instruction for this node.
+  PHINode* getPathPHI();
+  void setPathPHI(PHINode* pathPHI);
+
+private:
+
+  Value* _startingPathNumber; // The Value for the current pathNumber.
+  Value* _endingPathNumber; // The Value for the current pathNumber.
+  PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+  BLInstrumentationEdge(BLInstrumentationNode* source,
+                        BLInstrumentationNode* target);
+
+  // Sets the target node of this edge.  Required to split edges.
+  void setTarget(BallLarusNode* node);
+
+  // Get/set whether edge is in the spanning tree.
+  bool isInSpanningTree() const;
+  void setIsInSpanningTree(bool isInSpanningTree);
+
+  // Get/ set whether this edge will be instrumented with a path number
+  // initialization.
+  bool isInitialization() const;
+  void setIsInitialization(bool isInitialization);
+
+  // Get/set whether this edge will be instrumented with a path counter
+  // increment.  Notice this is incrementing the path counter
+  // corresponding to the path number register.  The path number
+  // increment is determined by getIncrement().
+  bool isCounterIncrement() const;
+  void setIsCounterIncrement(bool isCounterIncrement);
+
+  // Get/set the path number increment that this edge will be instrumented
+  // with.  This is distinct from the path counter increment and the
+  // weight.  The counter increment counts the number of executions of
+  // some path, whereas the path number keeps track of which path number
+  // the program is on.
+  long getIncrement() const;
+  void setIncrement(long increment);
+
+  // Get/set whether the edge has been instrumented.
+  bool hasInstrumentation();
+  void setHasInstrumentation(bool hasInstrumentation);
+
+  // Returns the successor number of this edge in the source.
+  unsigned getSuccessorNumber();
+
+private:
+  // The increment that the code will be instrumented with.
+  long long _increment;
+
+  // Whether this edge is in the spanning tree.
+  bool _isInSpanningTree;
+
+  // Whether this edge is an initialiation of the path number.
+  bool _isInitialization;
+
+  // Whether this edge is a path counter increment.
+  bool _isCounterIncrement;
+
+  // Whether this edge has been instrumented.
+  bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+  BLInstrumentationDag(Function &F);
+
+  // Returns the Exit->Root edge. This edge is required for creating
+  // directed cycles in the algorithm for moving instrumentation off of
+  // the spanning tree
+  BallLarusEdge* getExitRootEdge();
+
+  // Returns an array of phony edges which mark those nodes
+  // with function calls
+  BLEdgeVector getCallPhonyEdges();
+
+  // Gets/sets the path counter array
+  GlobalVariable* getCounterArray();
+  void setCounterArray(GlobalVariable* c);
+
+  // Calculates the increments for the chords, thereby removing
+  // instrumentation from the spanning tree edges. Implementation is based
+  // on the algorithm in Figure 4 of [Ball94]
+  void calculateChordIncrements();
+
+  // Updates the state when an edge has been split
+  void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+  // Calculates a spanning tree of the DAG ignoring cycles.  Whichever
+  // edges are in the spanning tree will not be instrumented, but this
+  // implementation does not try to minimize the instrumentation overhead
+  // by trying to find hot edges.
+  void calculateSpanningTree();
+
+  // Pushes initialization further down in order to group the first
+  // increment and initialization.
+  void pushInitialization();
+
+  // Pushes the path counter increments up in order to group the last path
+  // number increment.
+  void pushCounters();
+
+  // Removes phony edges from the successor list of the source, and the
+  // predecessor list of the target.
+  void unlinkPhony();
+
+  // Generate dot graph for the function
+  void generateDotGraph();
+
+protected:
+  // BLInstrumentationDag creates BLInstrumentationNode objects in this
+  // method overriding the creation of BallLarusNode objects.
+  //
+  // Allows subclasses to determine which type of Node is created.
+  // Override this method to produce subclasses of BallLarusNode if
+  // necessary.
+  virtual BallLarusNode* createNode(BasicBlock* BB);
+
+  // BLInstrumentationDag create BLInstrumentationEdges.
+  //
+  // Allows subclasses to determine which type of Edge is created.
+  // Override this method to produce subclasses of BallLarusEdge if
+  // necessary.  Parameters source and target will have been created by
+  // createNode and can be cast to the subclass of BallLarusNode*
+  // returned by createNode.
+  virtual BallLarusEdge* createEdge(
+    BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+  BLEdgeVector _treeEdges; // All edges in the spanning tree.
+  BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+  GlobalVariable* _counterArray; // Array to store path counters
+
+  // Removes the edge from the appropriate predecessor and successor lists.
+  void unlinkEdge(BallLarusEdge* edge);
+
+  // Makes an edge part of the spanning tree.
+  void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+  // Pushes initialization and calls itself recursively.
+  void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+  // Pushes path counter increments up recursively.
+  void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+  // Depth first algorithm for determining the chord increments.f
+  void calculateChordIncrementsDfs(
+    long weight, BallLarusNode* v, BallLarusEdge* e);
+
+  // Determines the relative direction of two edges.
+  int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which intruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+  // Current context for multi threading support.
+  LLVMContext* Context;
+
+  // Which function are we currently instrumenting
+  unsigned currentFunctionNumber;
+
+  // The function prototype in the profiling runtime for incrementing a
+  // single path counter in a hash table.
+  Constant* llvmIncrementHashFunction;
+  Constant* llvmDecrementHashFunction;
+
+  // Instruments each function with path profiling.  'main' is instrumented
+  // with code to save the profile to disk.
+  bool runOnModule(Module &M);
+
+  // Analyzes the function for Ball-Larus path profiling, and inserts code.
+  void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+  // Creates an increment constant representing incr.
+  ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+  // Creates an increment constant representing the value in
+  // edge->getIncrement().
+  ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+  // Finds the insertion point after pathNumber in block.  PathNumber may
+  // be NULL.
+  BasicBlock::iterator getInsertionPoint(
+    BasicBlock* block, Value* pathNumber);
+
+  // Inserts source's pathNumber Value* into target.  Target may or may not
+  // have multiple predecessors, and may or may not have its phiNode
+  // initalized.
+  void pushValueIntoNode(
+    BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+  // Inserts source's pathNumber Value* into the appropriate slot of
+  // target's phiNode.
+  void pushValueIntoPHI(
+    BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+  // The Value* in node, oldVal,  is updated with a Value* correspodning to
+  // oldVal + addition.
+  void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+                             bool atBeginning);
+
+  // Creates a counter increment in the given node.  The Value* in node is
+  // taken as the index into a hash table.
+  void insertCounterIncrement(
+    Value* incValue,
+    BasicBlock::iterator insertPoint,
+    BLInstrumentationDag* dag,
+    bool increment = true);
+
+  // A PHINode is created in the node, and its values initialized to -1U.
+  void preparePHI(BLInstrumentationNode* node);
+
+  // Inserts instrumentation for the given edge
+  //
+  // Pre: The edge's source node has pathNumber set if edge is non zero
+  // path number increment.
+  //
+  // Post: Edge's target node has a pathNumber set to the path number Value
+  // corresponding to the value of the path register after edge's
+  // execution.
+  void insertInstrumentationStartingAt(
+    BLInstrumentationEdge* edge,
+    BLInstrumentationDag* dag);
+
+  // If this edge is a critical edge, then inserts a node at this edge.
+  // This edge becomes the first edge, and a new BallLarusEdge is created.
+  bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+  // Inserts instrumentation according to the marked edges in dag.  Phony
+  // edges must be unlinked from the DAG, but accessible from the
+  // backedges.  Dag must have initializations, path number increments, and
+  // counter increments present.
+  //
+  // Counter storage is created here.
+  void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  PathProfiler() : ModulePass(ID) {
+    initializePathProfilerPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual const char *getPassName() const {
+    return "Path Profiler";
+  }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+        cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+                "Insert instrumentation for Ball-Larus path profiling",
+                false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+  class PathProfilingFunctionTable {};
+
+  // Type for global array storing references to hashes or arrays
+  template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+                                            xcompile> {
+  public:
+    static const StructType *get(LLVMContext& C) {
+      return( StructType::get(
+                C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+                TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+                TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+                NULL));
+    }
+  };
+
+  typedef TypeBuilder<PathProfilingFunctionTable, true>
+  ftEntryTypeBuilder;
+
+  // BallLarusEdge << operator overloading
+  raw_ostream& operator<<(raw_ostream& os,
+                          const BLInstrumentationEdge& edge) {
+    os << "[" << edge.getSource()->getName() << " -> "
+       << edge.getTarget()->getName() << "] init: "
+       << (edge.isInitialization() ? "yes" : "no")
+       << " incr:" << edge.getIncrement() << " cinc: "
+       << (edge.isCounterIncrement() ? "yes" : "no");
+    return(os);
+  }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+  BallLarusNode(BB),
+  _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+                                             BLInstrumentationNode* target)
+  : BallLarusEdge(source, target, 0),
+    _increment(0), _isInSpanningTree(false), _isInitialization(false),
+    _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge.  Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+  _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+  return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+  _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+  return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+  _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment.  Notice this is incrementing the path counter
+// corresponding to the path number register.  The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+  return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+  _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with.  This is distinct from the path counter increment and the
+// weight.  The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+  return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+  _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+  return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+  _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+  BallLarusNode* sourceNode = getSource();
+  BallLarusNode* targetNode = getTarget();
+  BasicBlock* source = sourceNode->getBlock();
+  BasicBlock* target = targetNode->getBlock();
+
+  if(source == NULL || target == NULL)
+    return(0);
+
+  TerminatorInst* terminator = source->getTerminator();
+
+        unsigned i;
+  for(i=0; i < terminator->getNumSuccessors(); i++) {
+    if(terminator->getSuccessor(i) == target)
+      break;
+  }
+
+  return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+                                                          _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+  BLEdgeIterator erEdge = getExit()->succBegin();
+  return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+  BLEdgeVector callEdges;
+
+  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+       edge != end; edge++ ) {
+    if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+      callEdges.push_back(*edge);
+  }
+
+  return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+  return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+  _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+  calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+  BLInstrumentationEdge* chord;
+  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+      end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+    chord = (BLInstrumentationEdge*) *chordEdge;
+    chord->setIncrement(chord->getIncrement() + chord->getWeight());
+  }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+                                       BasicBlock* newBlock) {
+  BallLarusNode* oldTarget = formerEdge->getTarget();
+  BallLarusNode* newNode = addNode(newBlock);
+  formerEdge->setTarget(newNode);
+  newNode->addPredEdge(formerEdge);
+
+  DEBUG(dbgs() << "  Edge split: " << *formerEdge << "\n");
+
+  oldTarget->removePredEdge(formerEdge);
+  BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+  if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+                        formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+                newEdge->setType(formerEdge->getType());
+    newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+    newEdge->setPhonyExit(formerEdge->getPhonyExit());
+    formerEdge->setType(BallLarusEdge::NORMAL);
+                formerEdge->setPhonyRoot(NULL);
+    formerEdge->setPhonyExit(NULL);
+  }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles.  Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+  std::stack<BallLarusNode*> dfsStack;
+
+  for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+      nodeIt != end; nodeIt++) {
+    (*nodeIt)->setColor(BallLarusNode::WHITE);
+  }
+
+  dfsStack.push(getRoot());
+  while(dfsStack.size() > 0) {
+    BallLarusNode* node = dfsStack.top();
+    dfsStack.pop();
+
+    if(node->getColor() == BallLarusNode::WHITE)
+      continue;
+
+    BallLarusNode* nextNode;
+    bool forward = true;
+    BLEdgeIterator succEnd = node->succEnd();
+
+    node->setColor(BallLarusNode::WHITE);
+    // first iterate over successors then predecessors
+    for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+        edge != predEnd; edge++) {
+      if(edge == succEnd) {
+        edge = node->predBegin();
+        forward = false;
+      }
+
+      // Ignore split edges
+      if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+      if(nextNode->getColor() != BallLarusNode::WHITE) {
+        nextNode->setColor(BallLarusNode::WHITE);
+        makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+      }
+    }
+  }
+
+  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+      edge != end; edge++) {
+    BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+      // safe since createEdge is overriden
+    if(!instEdge->isInSpanningTree() && (*edge)->getType()
+        != BallLarusEdge::SPLITEDGE)
+      _chordEdges.push_back(instEdge);
+  }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+  BLInstrumentationEdge* exitRootEdge =
+                (BLInstrumentationEdge*) getExitRootEdge();
+  exitRootEdge->setIsInitialization(true);
+  pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+  BLInstrumentationEdge* exitRootEdge =
+    (BLInstrumentationEdge*) getExitRootEdge();
+  exitRootEdge->setIsCounterIncrement(true);
+  pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+  BallLarusEdge* edge;
+
+  for(BLEdgeIterator next = _edges.begin(),
+      end = _edges.end(); next != end; next++) {
+    edge = (*next);
+
+    if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+        edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+        edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+      unlinkEdge(edge);
+    }
+  }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+  std::string errorInfo;
+  std::string functionName = getFunction().getNameStr();
+  std::string filename = "pathdag." + functionName + ".dot";
+
+  DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+  raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+  if (!errorInfo.empty()) {
+    errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+    errs() << "\n";
+    return;
+  }
+
+  dotFile << "digraph " << functionName << " {\n";
+
+  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+       edge != end; edge++) {
+    std::string sourceName = (*edge)->getSource()->getName();
+    std::string targetName = (*edge)->getTarget()->getName();
+
+    dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+            << targetName.c_str() << "\" ";
+
+    long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+    switch( (*edge)->getType() ) {
+    case BallLarusEdge::NORMAL:
+      dotFile << "[label=" << inc << "] [color=black];\n";
+      break;
+
+    case BallLarusEdge::BACKEDGE:
+      dotFile << "[color=cyan];\n";
+      break;
+
+    case BallLarusEdge::BACKEDGE_PHONY:
+      dotFile << "[label=" << inc
+              << "] [color=blue];\n";
+      break;
+
+    case BallLarusEdge::SPLITEDGE:
+      dotFile << "[color=violet];\n";
+      break;
+
+    case BallLarusEdge::SPLITEDGE_PHONY:
+      dotFile << "[label=" << inc << "] [color=red];\n";
+      break;
+
+    case BallLarusEdge::CALLEDGE_PHONY:
+      dotFile << "[label=" << inc     << "] [color=green];\n";
+      break;
+    }
+  }
+
+  dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+  return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+                                                BallLarusNode* target, unsigned edgeNumber) {
+  // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+  // is overriden to produce BLInstrumentationNode.
+  return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+                                    (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode.  Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+  return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber.  Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+  DEBUG(dbgs() << "  SPN-" << getName() << " <-- " << (pathNumber ?
+                                                       pathNumber->getNameStr() : "unused") << "\n");
+  _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+  return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+  DEBUG(dbgs() << "  EPN-" << getName() << " <-- "
+        << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+  _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node.  Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+  return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node.  Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+  _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+  if(edge == getExitRootEdge())
+    DEBUG(dbgs() << " Removing exit->root edge\n");
+
+  edge->getSource()->removeSuccEdge(edge);
+  edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+  edge->setIsInSpanningTree(true);
+  _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+  BLInstrumentationEdge* edge) {
+  BallLarusNode* target;
+
+  target = edge->getTarget();
+  if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+    return;
+  } else {
+    for(BLEdgeIterator next = target->succBegin(),
+          end = target->succEnd(); next != end; next++) {
+      BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+      // Skip split edges
+      if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      intoEdge->setIncrement(intoEdge->getIncrement() +
+                             edge->getIncrement());
+      intoEdge->setIsInitialization(true);
+      pushInitializationFromEdge(intoEdge);
+    }
+
+    edge->setIncrement(0);
+    edge->setIsInitialization(false);
+  }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+  BallLarusNode* source;
+
+  source = edge->getSource();
+  if(source->getNumberSuccEdges() > 1 || source == getRoot()
+     || edge->isInitialization()) {
+    return;
+  } else {
+    for(BLEdgeIterator previous = source->predBegin(),
+          end = source->predEnd(); previous != end; previous++) {
+      BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+      // Skip split edges
+      if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      fromEdge->setIncrement(fromEdge->getIncrement() +
+                             edge->getIncrement());
+      fromEdge->setIsCounterIncrement(true);
+      pushCountersFromEdge(fromEdge);
+    }
+
+    edge->setIncrement(0);
+    edge->setIsCounterIncrement(false);
+  }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+                                                       BallLarusNode* v, BallLarusEdge* e) {
+  BLInstrumentationEdge* f;
+
+  for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+        end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+    f = (BLInstrumentationEdge*) *treeEdge;
+    if(e != f && v == f->getTarget()) {
+      calculateChordIncrementsDfs(
+        calculateChordIncrementsDir(e,f)*(weight) +
+        f->getWeight(), f->getSource(), f);
+    }
+    if(e != f && v == f->getSource()) {
+      calculateChordIncrementsDfs(
+        calculateChordIncrementsDir(e,f)*(weight) +
+        f->getWeight(), f->getTarget(), f);
+    }
+  }
+
+  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+        end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+    f = (BLInstrumentationEdge*) *chordEdge;
+    if(v == f->getSource() || v == f->getTarget()) {
+      f->setIncrement(f->getIncrement() +
+                      calculateChordIncrementsDir(e,f)*weight);
+    }
+  }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+                                                      BallLarusEdge* f) {
+  if( e == NULL)
+    return(1);
+  else if(e->getSource() == f->getTarget()
+          || e->getTarget() == f->getSource())
+    return(1);
+
+  return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+                                                   int bitsize) {
+  return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+  BLInstrumentationEdge* edge) {
+  return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block.  PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+                                                     pathNumber) {
+  if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+     || (((Instruction*)(pathNumber))->getParent()) != block) {
+    return(block->getFirstNonPHI());
+  } else {
+    Instruction* pathNumberInst = (Instruction*) (pathNumber);
+    BasicBlock::iterator insertPoint;
+    BasicBlock::iterator end = block->end();
+
+    for(insertPoint = block->begin();
+        insertPoint != end; insertPoint++) {
+      Instruction* insertInst = &(*insertPoint);
+
+      if(insertInst == pathNumberInst)
+        return(++insertPoint);
+    }
+
+    return(insertPoint);
+  }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+  BasicBlock* block = node->getBlock();
+  BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+  PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber",
+                                 insertPoint );
+  node->setPathPHI(phi);
+  node->setStartingPathNumber(phi);
+  node->setEndingPathNumber(phi);
+
+  for(pred_iterator predIt = pred_begin(node->getBlock()),
+        end = pred_end(node->getBlock()); predIt != end; predIt++) {
+    BasicBlock* pred = (*predIt);
+
+    if(pred != NULL)
+      phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+  }
+}
+
+// Inserts source's pathNumber Value* into target.  Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+                                     BLInstrumentationNode* target) {
+  if(target->getBlock() == NULL)
+    return;
+
+
+  if(target->getNumberPredEdges() <= 1) {
+    assert(target->getStartingPathNumber() == NULL &&
+           "Target already has path number");
+    target->setStartingPathNumber(source->getEndingPathNumber());
+    target->setEndingPathNumber(source->getEndingPathNumber());
+    DEBUG(dbgs() << "  Passing path number"
+          << (source->getEndingPathNumber() ? "" : " (null)")
+          << " value through.\n");
+  } else {
+    if(target->getPathPHI() == NULL) {
+      DEBUG(dbgs() << "  Initializing PHI node for block '"
+            << target->getName() << "'\n");
+      preparePHI(target);
+    }
+    pushValueIntoPHI(target, source);
+    DEBUG(dbgs() << "  Passing number value into PHI for block '"
+          << target->getName() << "'\n");
+  }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+                                    BLInstrumentationNode* source) {
+  PHINode* phi = target->getPathPHI();
+  assert(phi != NULL && "  Tried to push value into node with PHI, but node"
+         " actually had no PHI.");
+  phi->removeIncomingValue(source->getBlock(), false);
+  phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal,  is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+                                         Value* addition, bool atBeginning) {
+  BasicBlock* block = node->getBlock();
+  assert(node->getStartingPathNumber() != NULL);
+  assert(node->getEndingPathNumber() != NULL);
+
+  BasicBlock::iterator insertPoint;
+
+  if( atBeginning )
+    insertPoint = block->getFirstNonPHI();
+  else
+    insertPoint = block->getTerminator();
+
+  DEBUG(errs() << "  Creating addition instruction.\n");
+  Value* newpn = BinaryOperator::Create(Instruction::Add,
+                                        node->getStartingPathNumber(),
+                                        addition, "pathNumber", insertPoint);
+
+  node->setEndingPathNumber(newpn);
+
+  if( atBeginning )
+    node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node.  The Value* in node is
+// taken as the index into an array or hash table.  The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+                                          BasicBlock::iterator insertPoint,
+                                          BLInstrumentationDag* dag,
+                                          bool increment) {
+  // Counter increment for array
+  if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+    // Get pointer to the array location
+    std::vector<Value*> gepIndices(2);
+    gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+    gepIndices[1] = incValue;
+
+    GetElementPtrInst* pcPointer =
+      GetElementPtrInst::Create(dag->getCounterArray(),
+                                gepIndices.begin(), gepIndices.end(),
+                                "counterInc", insertPoint);
+
+    // Load from the array - call it oldPC
+    LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+    // Test to see whether adding 1 will overflow the counter
+    ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+                                   createIncrementConstant(0xffffffff, 32),
+                                   "isMax");
+
+    // Select increment for the path counter based on overflow
+    SelectInst* inc =
+      SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+                          createIncrementConstant(0,32),
+                          "pathInc", insertPoint);
+
+    // newPc = oldPc + inc
+    BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+                                                   oldPc, inc, "newPC",
+                                                   insertPoint);
+
+    // Store back in to the array
+    new StoreInst(newPc, pcPointer, insertPoint);
+  } else { // Counter increment for hash
+    std::vector<Value*> args(2);
+    args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+                               currentFunctionNumber);
+    args[1] = incValue;
+
+    CallInst::Create(
+      increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+      args.begin(), args.end(), "", insertPoint);
+  }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+                                                   BLInstrumentationDag* dag) {
+  // Mark the edge as instrumented
+  edge->setHasInstrumentation(true);
+  DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+  // create a new node for this edge's instrumentation
+  splitCritical(edge, dag);
+
+  BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+  BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+  BLInstrumentationNode* instrumentNode;
+  BLInstrumentationNode* nextSourceNode;
+
+  bool atBeginning = false;
+
+  // Source node has only 1 successor so any information can be simply
+  // inserted in to it without splitting
+  if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+    DEBUG(dbgs() << "  Potential instructions to be placed in: "
+          << sourceNode->getName() << " (at end)\n");
+    instrumentNode = sourceNode;
+    nextSourceNode = targetNode; // ... since we never made any new nodes
+  }
+
+  // The target node only has one predecessor, so we can safely insert edge
+  // instrumentation into it. If there was splitting, it must have been
+  // successful.
+  else if( targetNode->getNumberPredEdges() == 1 ) {
+    DEBUG(dbgs() << "  Potential instructions to be placed in: "
+          << targetNode->getName() << " (at beginning)\n");
+    pushValueIntoNode(sourceNode, targetNode);
+    instrumentNode = targetNode;
+    nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+    atBeginning = true;
+  }
+
+  // Somehow, splitting must have failed.
+  else {
+    errs() << "Instrumenting could not split a critical edge.\n";
+    DEBUG(dbgs() << "  Couldn't split edge " << (*edge) << ".\n");
+    return;
+  }
+
+  // Insert instrumentation if this is a back or split edge
+  if( edge->getType() == BallLarusEdge::BACKEDGE ||
+      edge->getType() == BallLarusEdge::SPLITEDGE ) {
+    BLInstrumentationEdge* top =
+      (BLInstrumentationEdge*) edge->getPhonyRoot();
+    BLInstrumentationEdge* bottom =
+      (BLInstrumentationEdge*) edge->getPhonyExit();
+
+    assert( top->isInitialization() && " Top phony edge did not"
+            " contain a path number initialization.");
+    assert( bottom->isCounterIncrement() && " Bottom phony edge"
+            " did not contain a path counter increment.");
+
+    // split edge has yet to be initialized
+    if( !instrumentNode->getEndingPathNumber() ) {
+      instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+      instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+    }
+
+    BasicBlock::iterator insertPoint = atBeginning ?
+      instrumentNode->getBlock()->getFirstNonPHI() :
+      instrumentNode->getBlock()->getTerminator();
+
+    // add information from the bottom edge, if it exists
+    if( bottom->getIncrement() ) {
+      Value* newpn =
+        BinaryOperator::Create(Instruction::Add,
+                               instrumentNode->getStartingPathNumber(),
+                               createIncrementConstant(bottom),
+                               "pathNumber", insertPoint);
+      instrumentNode->setEndingPathNumber(newpn);
+    }
+
+    insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                           insertPoint, dag);
+
+    if( atBeginning )
+      instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+    instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+    // Check for path counter increments
+    if( top->isCounterIncrement() ) {
+      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                             instrumentNode->getBlock()->getTerminator(),dag);
+      instrumentNode->setEndingPathNumber(0);
+    }
+  }
+
+  // Insert instrumentation if this is a normal edge
+  else {
+    BasicBlock::iterator insertPoint = atBeginning ?
+      instrumentNode->getBlock()->getFirstNonPHI() :
+      instrumentNode->getBlock()->getTerminator();
+
+    if( edge->isInitialization() ) { // initialize path number
+      instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+    } else if( edge->getIncrement() )       {// increment path number
+      Value* newpn =
+        BinaryOperator::Create(Instruction::Add,
+                               instrumentNode->getStartingPathNumber(),
+                               createIncrementConstant(edge),
+                               "pathNumber", insertPoint);
+      instrumentNode->setEndingPathNumber(newpn);
+
+      if( atBeginning )
+        instrumentNode->setStartingPathNumber(newpn);
+    }
+
+    // Check for path counter increments
+    if( edge->isCounterIncrement() ) {
+      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                             insertPoint, dag);
+      instrumentNode->setEndingPathNumber(0);
+    }
+  }
+
+  // Push it along
+  if (nextSourceNode && instrumentNode->getEndingPathNumber())
+    pushValueIntoNode(instrumentNode, nextSourceNode);
+
+  // Add all the successors
+  for( BLEdgeIterator next = targetNode->succBegin(),
+         end = targetNode->succEnd(); next != end; next++ ) {
+    // So long as it is un-instrumented, add it to the list
+    if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+      insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+    else
+      DEBUG(dbgs() << "  Edge " << *(BLInstrumentationEdge*)(*next)
+            << " already instrumented.\n");
+  }
+}
+
+// Inserts instrumentation according to the marked edges in dag.  Phony edges
+// must be unlinked from the DAG, but accessible from the backedges.  Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+  BLInstrumentationDag& dag, Module &M) {
+
+  BLInstrumentationEdge* exitRootEdge =
+    (BLInstrumentationEdge*) dag.getExitRootEdge();
+  insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+  // Iterate through each call edge and apply the appropriate hash increment
+  // and decrement functions
+  BLEdgeVector callEdges = dag.getCallPhonyEdges();
+  for( BLEdgeIterator edge = callEdges.begin(),
+         end = callEdges.end(); edge != end; edge++ ) {
+    BLInstrumentationNode* node =
+      (BLInstrumentationNode*)(*edge)->getSource();
+    BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+
+    // Find the first function call
+    while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+      insertPoint++;
+
+    DEBUG(dbgs() << "\nInstrumenting method call block '"
+          << node->getBlock()->getNameStr() << "'\n");
+    DEBUG(dbgs() << "   Path number initialized: "
+          << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+    Value* newpn;
+    if( node->getStartingPathNumber() ) {
+      long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+      if ( inc )
+        newpn = BinaryOperator::Create(Instruction::Add,
+                                       node->getStartingPathNumber(),
+                                       createIncrementConstant(inc,32),
+                                       "pathNumber", insertPoint);
+      else
+        newpn = node->getStartingPathNumber();
+    } else {
+      newpn = (Value*)createIncrementConstant(
+        ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+    }
+
+    insertCounterIncrement(newpn, insertPoint, &dag);
+    insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+                           &dag, false);
+  }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+                                 Function &F, Module &M) {
+  // Build DAG from CFG
+  BLInstrumentationDag dag = BLInstrumentationDag(F);
+  dag.init();
+
+  // give each path a unique integer value
+  dag.calculatePathNumbers();
+
+  // modify path increments to increase the efficiency
+  // of instrumentation
+  dag.calculateSpanningTree();
+  dag.calculateChordIncrements();
+  dag.pushInitialization();
+  dag.pushCounters();
+  dag.unlinkPhony();
+
+  // potentially generate .dot graph for the dag
+  if (DotPathDag)
+    dag.generateDotGraph ();
+
+  // Should we store the information in an array or hash
+  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+    const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+                                   dag.getNumberOfPaths());
+
+    dag.setCounterArray(new GlobalVariable(M, t, false,
+                                           GlobalValue::InternalLinkage,
+                                           Constant::getNullValue(t), ""));
+  }
+
+  insertInstrumentation(dag, M);
+
+  // Add to global function reference table
+  unsigned type;
+  const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+    type = ProfilingArray;
+  else
+    type = ProfilingHash;
+
+  std::vector<Constant*> entryArray(3);
+  entryArray[0] = createIncrementConstant(type,32);
+  entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+  entryArray[2] = dag.getCounterArray() ?
+    ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+    Constant::getNullValue(voidPtr);
+
+  const StructType* at = ftEntryTypeBuilder::get(*Context);
+  ConstantStruct* functionEntry =
+    (ConstantStruct*)ConstantStruct::get(at, entryArray);
+  ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() <<                               \
+  "\n\n============= MODULE BEGIN ===============\n" << M << \
+  "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+  Context = &M.getContext();
+
+  DEBUG(dbgs()
+        << "****************************************\n"
+        << "****************************************\n"
+        << "**                                    **\n"
+        << "**   PATH PROFILING INSTRUMENTATION   **\n"
+        << "**                                    **\n"
+        << "****************************************\n"
+        << "****************************************\n");
+
+  // No main, no instrumentation!
+  Function *Main = M.getFunction("main");
+
+  // Using fortran? ... this kind of works
+  if (!Main)
+    Main = M.getFunction("MAIN__");
+
+  if (!Main) {
+    errs() << "WARNING: cannot insert path profiling into a module"
+           << " with no main function!\n";
+    return false;
+  }
+
+  BasicBlock::iterator insertPoint = Main->getEntryBlock().getFirstNonPHI();
+
+  llvmIncrementHashFunction = M.getOrInsertFunction(
+    "llvm_increment_path_count",
+    Type::getVoidTy(*Context), // return type
+    Type::getInt32Ty(*Context), // function number
+    Type::getInt32Ty(*Context), // path number
+    NULL );
+
+  llvmDecrementHashFunction = M.getOrInsertFunction(
+    "llvm_decrement_path_count",
+    Type::getVoidTy(*Context), // return type
+    Type::getInt32Ty(*Context), // function number
+    Type::getInt32Ty(*Context), // path number
+    NULL );
+
+  std::vector<Constant*> ftInit;
+  unsigned functionNumber = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+    if (F->isDeclaration())
+      continue;
+
+    DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+    functionNumber++;
+
+    // set function number
+    currentFunctionNumber = functionNumber;
+    runOnFunction(ftInit, *F, M);
+  }
+
+  const Type *t = ftEntryTypeBuilder::get(*Context);
+  const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+  Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+  DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+  GlobalVariable* functionTable =
+    new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+                       ftInitConstant, "functionPathTable");
+  const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+  InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+                          PointerType::getUnqual(eltType));
+
+  DEBUG(PRINT_MODULE);
+
+  return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+                                 BLInstrumentationDag* dag) {
+  unsigned succNum = edge->getSuccessorNumber();
+  BallLarusNode* sourceNode = edge->getSource();
+  BallLarusNode* targetNode = edge->getTarget();
+  BasicBlock* sourceBlock = sourceNode->getBlock();
+  BasicBlock* targetBlock = targetNode->getBlock();
+
+  if(sourceBlock == NULL || targetBlock == NULL
+     || sourceNode->getNumberSuccEdges() <= 1
+     || targetNode->getNumberPredEdges() == 1 ) {
+    return(false);
+  }
+
+  TerminatorInst* terminator = sourceBlock->getTerminator();
+
+  if( SplitCriticalEdge(terminator, succNum, this, false)) {
+    BasicBlock* newBlock = terminator->getSuccessor(succNum);
+    dag->splitUpdate(edge, newBlock);
+    return(true);
+  } else
+    return(false);
+}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 1a30e9ba288b..b57bbf60a07a 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -22,12 +22,13 @@
 #include "llvm/Module.h"
 
 void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
-                                   GlobalValue *Array) {
+                                   GlobalValue *Array,
+                                   PointerType *arrayType) {
   LLVMContext &Context = MainFn->getContext();
-  const Type *ArgVTy = 
+  const Type *ArgVTy =
     PointerType::getUnqual(Type::getInt8PtrTy(Context));
-  const PointerType *UIntPtr =
-        Type::getInt32PtrTy(Context);
+  const PointerType *UIntPtr = arrayType ? arrayType :
+    Type::getInt32PtrTy(Context);
   Module &M = *MainFn->getParent();
   Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
                                            Type::getInt32Ty(Context),
@@ -71,9 +72,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
   case 2:
     AI = MainFn->arg_begin(); ++AI;
     if (AI->getType() != ArgVTy) {
-      Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, 
+      Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
                                                             false);
-      InitCall->setArgOperand(1, 
+      InitCall->setArgOperand(1,
           CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
     } else {
       InitCall->setArgOperand(1, AI);
@@ -93,7 +94,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
       }
       opcode = CastInst::getCastOpcode(AI, true,
                                        Type::getInt32Ty(Context), true);
-      InitCall->setArgOperand(0, 
+      InitCall->setArgOperand(0,
           CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
                            "argc.cast", InitCall));
     } else {
@@ -106,9 +107,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
 }
 
 void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
-                                   GlobalValue *CounterArray) {
+                                   GlobalValue *CounterArray, bool beginning) {
   // Insert the increment after any alloca or PHI instructions...
-  BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+  BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+                BB->getTerminator();
   while (isa<AllocaInst>(InsertPos))
     ++InsertPos;
 
@@ -118,7 +120,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
   std::vector<Constant*> Indices(2);
   Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
   Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
-  Constant *ElementPtr = 
+  Constant *ElementPtr =
     ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
                                           Indices.size());
 
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h
index 94efffec8a3d..a76e3576e1ca 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -21,11 +21,14 @@ namespace llvm {
   class Function;
   class GlobalValue;
   class BasicBlock;
+  class PointerType;
 
   void InsertProfilingInitCall(Function *MainFn, const char *FnName,
-                               GlobalValue *Arr = 0);
+                               GlobalValue *Arr = 0,
+                               PointerType *arrayType = 0);
   void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
-                               GlobalValue *CounterArray);
+                               GlobalValue *CounterArray,
+                               bool beginning = true);
 }
 
 #endif
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index ada086e9db76..a5adb5e7cefe 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,9 @@ STATISTIC(NumRemoved, "Number of instructions removed");
 namespace {
   struct ADCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    ADCE() : FunctionPass(ID) {}
+    ADCE() : FunctionPass(ID) {
+      initializeADCEPass(*PassRegistry::getPassRegistry());
+    }
     
     virtual bool runOnFunction(Function& F);
     
@@ -45,7 +47,7 @@ namespace {
 }
 
 char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
 
 bool ADCE::runOnFunction(Function& F) {
   SmallPtrSet<Instruction*, 128> alive;
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index b144678c6a0e..cee550265622 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,9 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
 namespace {
   struct BlockPlacement : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    BlockPlacement() : FunctionPass(ID) {}
+    BlockPlacement() : FunctionPass(ID) {
+      initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -74,8 +76,11 @@ namespace {
 }
 
 char BlockPlacement::ID = 0;
-INITIALIZE_PASS(BlockPlacement, "block-placement",
-                "Profile Guided Basic Block Placement", false, false);
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
 
 FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b7598eace536..106fb8f3c833 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -6,13 +6,15 @@ add_llvm_library(LLVMScalarOpts
   CorrelatedValuePropagation.cpp
   DCE.cpp
   DeadStoreElimination.cpp
+  EarlyCSE.cpp
   GEPSplitter.cpp
   GVN.cpp
   IndVarSimplify.cpp
   JumpThreading.cpp
   LICM.cpp
   LoopDeletion.cpp
-  LoopIndexSplit.cpp
+  LoopIdiomRecognize.cpp
+  LoopInstSimplify.cpp
   LoopRotation.cpp
   LoopStrengthReduce.cpp
   LoopUnrollPass.cpp
@@ -31,5 +33,3 @@ add_llvm_library(LLVMScalarOpts
   TailDuplication.cpp
   TailRecursionElimination.cpp
   )
-
-target_link_libraries (LLVMScalarOpts LLVMTransformUtils)
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index e07b761e589c..9536939ba2d4 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,6 +22,8 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
@@ -31,6 +33,7 @@
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
@@ -39,31 +42,59 @@
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+                      "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+                       "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+                          "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+
 static cl::opt<bool>
 CriticalEdgeSplit("cgp-critical-edge-splitting",
                   cl::desc("Split critical edges during codegen prepare"),
-                  cl::init(true), cl::Hidden);
+                  cl::init(false), cl::Hidden);
 
 namespace {
   class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// transformation profitability.
     const TargetLowering *TLI;
+    DominatorTree *DT;
     ProfileInfo *PFI;
+    
+    /// CurInstIterator - As we scan instructions optimizing them, this is the
+    /// next instruction to optimize.  Xforms that can invalidate this should
+    /// update it.
+    BasicBlock::iterator CurInstIterator;
 
     /// BackEdges - Keep a set of all the loop back edges.
     ///
     SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
+
+    // Keeps track of non-local addresses that have been sunk into a block. This
+    // allows us to avoid inserting duplicate code for blocks with multiple
+    // load/stores of the same address.
+    DenseMap<Value*, Value*> SunkAddrs;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetLowering *tli = 0)
-      : FunctionPass(ID), TLI(tli) {}
+      : FunctionPass(ID), TLI(tli) {
+        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+      }
     bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
     }
 
@@ -76,10 +107,9 @@ namespace {
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
     void EliminateMostlyEmptyBlock(BasicBlock *BB);
     bool OptimizeBlock(BasicBlock &BB);
-    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
-                            DenseMap<Value*,Value*> &SunkAddrs);
-    bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
-                               DenseMap<Value*,Value*> &SunkAddrs);
+    bool OptimizeInst(Instruction *I);
+    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+    bool OptimizeInlineAsmInst(CallInst *CS);
     bool OptimizeCallInst(CallInst *CI);
     bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
@@ -89,7 +119,7 @@ namespace {
 
 char CodeGenPrepare::ID = 0;
 INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
-                "Optimize for code generation", false, false);
+                "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
   return new CodeGenPrepare(TLI);
@@ -108,13 +138,16 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {
 bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
+  DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
   // First pass, eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
 
-  // Now find loop back edges.
-  findLoopBackEdges(F);
+  // Now find loop back edges, but only if they are being used to decide which
+  // critical edges to split.
+  if (CriticalEdgeSplit)
+    findLoopBackEdges(F);
 
   bool MadeChange = true;
   while (MadeChange) {
@@ -123,6 +156,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       MadeChange |= OptimizeBlock(*BB);
     EverMadeChange |= MadeChange;
   }
+
+  SunkAddrs.clear();
+
   return EverMadeChange;
 }
 
@@ -297,11 +333,19 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
+  if (DT) {
+    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
+    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+    DT->changeImmediateDominator(DestBB, NewIDom);
+    DT->eraseNode(BB);
+  }
   if (PFI) {
     PFI->replaceAllUses(BB, DestBB);
     PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
   }
   BB->eraseFromParent();
+  ++NumBlocksElim;
 
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
@@ -480,6 +524,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
 
     // Replace a use of the cast with a use of the new cast.
     TheUse = InsertedCast;
+    ++NumCastUses;
   }
 
   // If we removed all uses, nuke the cast.
@@ -537,6 +582,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
 
     // Replace a use of the cmp with a use of the new cmp.
     TheUse = InsertedCmp;
+    ++NumCmpUses;
   }
 
   // If we removed all uses, nuke the cmp.
@@ -563,14 +609,45 @@ protected:
 } // end anonymous namespace
 
 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+  BasicBlock *BB = CI->getParent();
+  
+  // Lower inline assembly if we can.
+  // If we found an inline asm expession, and if the target knows how to
+  // lower it to normal LLVM code, do so now.
+  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+    if (TLI->ExpandInlineAsm(CI)) {
+      // Avoid invalidating the iterator.
+      CurInstIterator = BB->begin();
+      // Avoid processing instructions out of order, which could cause
+      // reuse before a value is defined.
+      SunkAddrs.clear();
+      return true;
+    }
+    // Sink address computing for memory operands into the block.
+    if (OptimizeInlineAsmInst(CI))
+      return true;
+  }
+  
   // Lower all uses of llvm.objectsize.*
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
     bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
     const Type *ReturnTy = CI->getType();
     Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);    
-    CI->replaceAllUsesWith(RetVal);
-    CI->eraseFromParent();
+    
+    // Substituting this can cause recursive simplifications, which can
+    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
+    // happens.
+    WeakVH IterHandle(CurInstIterator);
+    
+    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+
+    // If the iterator instruction was recursively deleted, start over at the
+    // start of the block.
+    if (IterHandle != CurInstIterator) {
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    }
     return true;
   }
 
@@ -588,6 +665,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
   CodeGenPrepareFortifiedLibCalls Simplifier;
   return Simplifier.fold(CI, TD);
 }
+
 //===----------------------------------------------------------------------===//
 // Memory Optimization
 //===----------------------------------------------------------------------===//
@@ -610,13 +688,69 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
 /// This method is used to optimize both load/store and inline asms with memory
 /// operands.
 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
-                                        const Type *AccessTy,
-                                        DenseMap<Value*,Value*> &SunkAddrs) {
-  // Figure out what addressing mode will be built up for this operation.
+                                        const Type *AccessTy) {
+  Value *Repl = Addr;
+  
+  // Try to collapse single-value PHI nodes.  This is necessary to undo 
+  // unprofitable PRE transformations.
+  SmallVector<Value*, 8> worklist;
+  SmallPtrSet<Value*, 16> Visited;
+  worklist.push_back(Addr);
+  
+  // Use a worklist to iteratively look through PHI nodes, and ensure that
+  // the addressing mode obtained from the non-PHI roots of the graph
+  // are equivalent.
+  Value *Consensus = 0;
+  unsigned NumUses = 0;
   SmallVector<Instruction*, 16> AddrModeInsts;
-  ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
-                                                      AddrModeInsts, *TLI);
-
+  ExtAddrMode AddrMode;
+  while (!worklist.empty()) {
+    Value *V = worklist.back();
+    worklist.pop_back();
+    
+    // Break use-def graph loops.
+    if (Visited.count(V)) {
+      Consensus = 0;
+      break;
+    }
+    
+    Visited.insert(V);
+    
+    // For a PHI node, push all of its incoming values.
+    if (PHINode *P = dyn_cast<PHINode>(V)) {
+      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+        worklist.push_back(P->getIncomingValue(i));
+      continue;
+    }
+    
+    // For non-PHIs, determine the addressing mode being computed.
+    SmallVector<Instruction*, 16> NewAddrModeInsts;
+    ExtAddrMode NewAddrMode =
+      AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+                                   NewAddrModeInsts, *TLI);
+    
+    // Ensure that the obtained addressing mode is equivalent to that obtained
+    // for all other roots of the PHI traversal.  Also, when choosing one
+    // such root as representative, select the one with the most uses in order
+    // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
+    if (!Consensus || NewAddrMode == AddrMode) {
+      if (V->getNumUses() > NumUses) {
+        Consensus = V;
+        NumUses = V->getNumUses();
+        AddrMode = NewAddrMode;
+        AddrModeInsts = NewAddrModeInsts;
+      }
+      continue;
+    }
+    
+    Consensus = 0;
+    break;
+  }
+  
+  // If the addressing mode couldn't be determined, or if multiple different
+  // ones were determined, bail out now.
+  if (!Consensus) return false;
+  
   // Check to see if any of the instructions supersumed by this addr mode are
   // non-local to I's BB.
   bool AnyNonLocal = false;
@@ -719,60 +853,39 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
   }
 
-  MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
+  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
 
-  if (Addr->use_empty()) {
-    RecursivelyDeleteTriviallyDeadInstructions(Addr);
+  if (Repl->use_empty()) {
+    RecursivelyDeleteTriviallyDeadInstructions(Repl);
     // This address is now available for reassignment, so erase the table entry;
     // we don't want to match some completely different instruction.
     SunkAddrs[Addr] = 0;
   }
+  ++NumMemoryInsts;
   return true;
 }
 
 /// OptimizeInlineAsmInst - If there are any memory operands, use
 /// OptimizeMemoryInst to sink their address computing into the block when
 /// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
-                                           DenseMap<Value*,Value*> &SunkAddrs) {
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
   bool MadeChange = false;
-  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
-
-  // Do a prepass over the constraints, canonicalizing them, and building up the
-  // ConstraintOperands list.
-  std::vector<InlineAsm::ConstraintInfo>
-    ConstraintInfos = IA->ParseConstraints();
-
-  /// ConstraintOperands - Information about all of the constraints.
-  std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
-  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    ConstraintOperands.
-      push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
-    TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
-
-    // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-    case InlineAsm::isOutput:
-      if (OpInfo.isIndirect)
-        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
-      break;
-    case InlineAsm::isInput:
-      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
-      break;
-    case InlineAsm::isClobber:
-      // Nothing to do.
-      break;
-    }
 
+  TargetLowering::AsmOperandInfoVector 
+    TargetConstraints = TLI->ParseConstraints(CS);
+  unsigned ArgNo = 0;
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
     // Compute the constraint code and ConstraintType to use.
     TLI->ComputeConstraintToUse(OpInfo, SDValue());
 
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         OpInfo.isIndirect) {
-      Value *OpVal = OpInfo.CallOperandVal;
-      MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
-    }
+      Value *OpVal = CS->getArgOperand(ArgNo++);
+      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+    } else if (OpInfo.Type == InlineAsm::isInput)
+      ArgNo++;
   }
 
   return MadeChange;
@@ -794,7 +907,9 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
   // If the load has other users and the truncate is not free, this probably
   // isn't worthwhile.
   if (!LI->hasOneUse() &&
-      TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+      !TLI->isTruncateFree(I->getType(), LI->getType()))
     return false;
 
   // Check whether the target supports casts folded into loads.
@@ -812,13 +927,14 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
   // can fold it.
   I->removeFromParent();
   I->insertAfter(LI);
+  ++NumExtsMoved;
   return true;
 }
 
 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   BasicBlock *DefBB = I->getParent();
 
-  // If both result of the {s|z}xt and its source are live out, rewrite all
+  // If the result of a {s|z}ext and its source are both live out, rewrite all
   // other uses of the source with result of extension.
   Value *Src = I->getOperand(0);
   if (Src->hasOneUse())
@@ -883,13 +999,83 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
 
     // Replace a use of the {s|z}ext source with a use of the result.
     TheUse = InsertedTrunc;
-
+    ++NumExtUses;
     MadeChange = true;
   }
 
   return MadeChange;
 }
 
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+  if (PHINode *P = dyn_cast<PHINode>(I)) {
+    // It is possible for very late stage optimizations (such as SimplifyCFG)
+    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
+    // trivial PHI, go ahead and zap it here.
+    if (Value *V = SimplifyInstruction(P)) {
+      P->replaceAllUsesWith(V);
+      P->eraseFromParent();
+      ++NumPHIsElim;
+      return true;
+    }
+    return false;
+  }
+  
+  if (CastInst *CI = dyn_cast<CastInst>(I)) {
+    // If the source of the cast is a constant, then this should have
+    // already been constant folded.  The only reason NOT to constant fold
+    // it is if something (e.g. LSR) was careful to place the constant
+    // evaluation in a block other than then one that uses it (e.g. to hoist
+    // the address of globals out of a loop).  If this is the case, we don't
+    // want to forward-subst the cast.
+    if (isa<Constant>(CI->getOperand(0)))
+      return false;
+
+    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+      return true;
+
+    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+      bool MadeChange = MoveExtToFormExtLoad(I);
+      return MadeChange | OptimizeExtUses(I);
+    }
+    return false;
+  }
+  
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    return OptimizeCmpExpression(CI);
+  
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+    return false;
+  }
+  
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, SI->getOperand(1),
+                                SI->getOperand(0)->getType());
+    return false;
+  }
+  
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    if (GEPI->hasAllZeroIndices()) {
+      /// The GEP operand must be a pointer, so must its result -> BitCast
+      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+                                        GEPI->getName(), GEPI);
+      GEPI->replaceAllUsesWith(NC);
+      GEPI->eraseFromParent();
+      ++NumGEPsElim;
+      OptimizeInst(NC);
+      return true;
+    }
+    return false;
+  }
+  
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return OptimizeCallInst(CI);
+
+  return false;
+}
+
 // In this pass we look for GEP and cast instructions that are used
 // across basic blocks and rewrite them to improve basic-block-at-a-time
 // selection.
@@ -908,74 +1094,11 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
     }
   }
 
-  // Keep track of non-local addresses that have been sunk into this block.
-  // This allows us to avoid inserting duplicate code for blocks with multiple
-  // load/stores of the same address.
-  DenseMap<Value*, Value*> SunkAddrs;
-
-  for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
-    Instruction *I = BBI++;
+  SunkAddrs.clear();
 
-    if (CastInst *CI = dyn_cast<CastInst>(I)) {
-      // If the source of the cast is a constant, then this should have
-      // already been constant folded.  The only reason NOT to constant fold
-      // it is if something (e.g. LSR) was careful to place the constant
-      // evaluation in a block other than then one that uses it (e.g. to hoist
-      // the address of globals out of a loop).  If this is the case, we don't
-      // want to forward-subst the cast.
-      if (isa<Constant>(CI->getOperand(0)))
-        continue;
-
-      bool Change = false;
-      if (TLI) {
-        Change = OptimizeNoopCopyExpression(CI, *TLI);
-        MadeChange |= Change;
-      }
-
-      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
-        MadeChange |= MoveExtToFormExtLoad(I);
-        MadeChange |= OptimizeExtUses(I);
-      }
-    } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
-      MadeChange |= OptimizeCmpExpression(CI);
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-      if (TLI)
-        MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
-                                         SunkAddrs);
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-      if (TLI)
-        MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
-                                         SI->getOperand(0)->getType(),
-                                         SunkAddrs);
-    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
-      if (GEPI->hasAllZeroIndices()) {
-        /// The GEP operand must be a pointer, so must its result -> BitCast
-        Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
-                                          GEPI->getName(), GEPI);
-        GEPI->replaceAllUsesWith(NC);
-        GEPI->eraseFromParent();
-        MadeChange = true;
-        BBI = NC;
-      }
-    } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
-      // If we found an inline asm expession, and if the target knows how to
-      // lower it to normal LLVM code, do so now.
-      if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
-        if (TLI->ExpandInlineAsm(CI)) {
-          BBI = BB.begin();
-          // Avoid processing instructions out of order, which could cause
-          // reuse before a value is defined.
-          SunkAddrs.clear();
-        } else
-          // Sink address computing for memory operands into the block.
-          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
-      } else {
-        // Other CallInst optimizations that don't need to muck with the
-        // enclosing iterator here.
-        MadeChange |= OptimizeCallInst(CI);
-      }
-    }
-  }
+  CurInstIterator = BB.begin();
+  for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+    MadeChange |= OptimizeInst(CurInstIterator++);
 
   return MadeChange;
 }
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index a0ea369d0cad..664c3f6a222f 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,9 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
 namespace {
   struct ConstantPropagation : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    ConstantPropagation() : FunctionPass(ID) {}
+    ConstantPropagation() : FunctionPass(ID) {
+      initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
 
@@ -46,7 +48,7 @@ namespace {
 
 char ConstantPropagation::ID = 0;
 INITIALIZE_PASS(ConstantPropagation, "constprop",
-                "Simple constant propagation", false, false);
+                "Simple constant propagation", false, false)
 
 FunctionPass *llvm::createConstantPropagationPass() {
   return new ConstantPropagation();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0d4e45de3466..be12973b645f 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -30,18 +31,20 @@ STATISTIC(NumCmps,      "Number of comparisons propagated");
 namespace {
   class CorrelatedValuePropagation : public FunctionPass {
     LazyValueInfo *LVI;
-    
+
     bool processSelect(SelectInst *SI);
     bool processPHI(PHINode *P);
     bool processMemAccess(Instruction *I);
     bool processCmp(CmpInst *C);
-    
+
   public:
     static char ID;
-    CorrelatedValuePropagation(): FunctionPass(ID) { }
-    
+    CorrelatedValuePropagation(): FunctionPass(ID) {
+     initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+    }
+
     bool runOnFunction(Function &F);
-    
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<LazyValueInfo>();
     }
@@ -49,8 +52,11 @@ namespace {
 }
 
 char CorrelatedValuePropagation::ID = 0;
-INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
-                "Value Propagation", false, false);
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false)
 
 // Public interface to the Value Propagation pass
 Pass *llvm::createCorrelatedValuePropagationPass() {
@@ -60,46 +66,51 @@ Pass *llvm::createCorrelatedValuePropagationPass() {
 bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
   if (S->getType()->isVectorTy()) return false;
   if (isa<Constant>(S->getOperand(0))) return false;
-  
+
   Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
   if (!C) return false;
-  
+
   ConstantInt *CI = dyn_cast<ConstantInt>(C);
   if (!CI) return false;
-  
-  S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+
+  Value *ReplaceWith = S->getOperand(1);
+  Value *Other = S->getOperand(2);
+  if (!CI->isOne()) std::swap(ReplaceWith, Other);
+  if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+  S->replaceAllUsesWith(ReplaceWith);
   S->eraseFromParent();
 
   ++NumSelects;
-  
+
   return true;
 }
 
 bool CorrelatedValuePropagation::processPHI(PHINode *P) {
   bool Changed = false;
-  
+
   BasicBlock *BB = P->getParent();
   for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
     Value *Incoming = P->getIncomingValue(i);
     if (isa<Constant>(Incoming)) continue;
-    
+
     Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
                                          P->getIncomingBlock(i),
                                          BB);
     if (!C) continue;
-    
+
     P->setIncomingValue(i, C);
     Changed = true;
   }
-  
-  if (Value *ConstVal = P->hasConstantValue()) {
-    P->replaceAllUsesWith(ConstVal);
+
+  if (Value *V = SimplifyInstruction(P)) {
+    P->replaceAllUsesWith(V);
     P->eraseFromParent();
     Changed = true;
   }
-  
+
   ++NumPhis;
-  
+
   return Changed;
 }
 
@@ -109,12 +120,12 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
     Pointer = L->getPointerOperand();
   else
     Pointer = cast<StoreInst>(I)->getPointerOperand();
-  
+
   if (isa<Constant>(Pointer)) return false;
-  
+
   Constant *C = LVI->getConstant(Pointer, I->getParent());
   if (!C) return false;
-  
+
   ++NumMemAccess;
   I->replaceUsesOfWith(Pointer, C);
   return true;
@@ -130,32 +141,32 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
   if (isa<Instruction>(Op0) &&
       cast<Instruction>(Op0)->getParent() == C->getParent())
     return false;
-  
+
   Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
   if (!Op1) return false;
-  
+
   pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
   if (PI == PE) return false;
-  
-  LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), 
+
+  LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
                                     C->getOperand(0), Op1, *PI, C->getParent());
   if (Result == LazyValueInfo::Unknown) return false;
 
   ++PI;
   while (PI != PE) {
-    LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), 
+    LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
                                     C->getOperand(0), Op1, *PI, C->getParent());
     if (Res != Result) return false;
     ++PI;
   }
-  
+
   ++NumCmps;
-  
+
   if (Result == LazyValueInfo::True)
     C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
   else
     C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-  
+
   C->eraseFromParent();
 
   return true;
@@ -163,9 +174,9 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
 
 bool CorrelatedValuePropagation::runOnFunction(Function &F) {
   LVI = &getAnalysis<LazyValueInfo>();
-  
+
   bool FnChanged = false;
-  
+
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     bool BBChanged = false;
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
@@ -187,14 +198,9 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
         break;
       }
     }
-    
-    // Propagating correlated values might leave cruft around.
-    // Try to clean it up before we continue.
-    if (BBChanged)
-      SimplifyInstructionsInBlock(FI);
-    
+
     FnChanged |= BBChanged;
   }
-  
+
   return FnChanged;
 }
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 87ea8038356a..dbb68f3e0bd1 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,9 @@ namespace {
   //
   struct DeadInstElimination : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
-    DeadInstElimination() : BasicBlockPass(ID) {}
+    DeadInstElimination() : BasicBlockPass(ID) {
+      initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+    }
     virtual bool runOnBasicBlock(BasicBlock &BB) {
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -57,7 +59,7 @@ namespace {
 
 char DeadInstElimination::ID = 0;
 INITIALIZE_PASS(DeadInstElimination, "die",
-                "Dead Instruction Elimination", false, false);
+                "Dead Instruction Elimination", false, false)
 
 Pass *llvm::createDeadInstEliminationPass() {
   return new DeadInstElimination();
@@ -70,7 +72,9 @@ namespace {
   //
   struct DCE : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    DCE() : FunctionPass(ID) {}
+    DCE() : FunctionPass(ID) {
+      initializeDCEPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
@@ -81,7 +85,7 @@ namespace {
 }
 
 char DCE::ID = 0;
-INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
 
 bool DCE::runOnFunction(Function &F) {
   // Start out with all of the instructions in the worklist...
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c8fd9d9fa556..867a06ad202d 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -19,17 +19,20 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumFastStores, "Number of stores deleted");
@@ -37,58 +40,107 @@ STATISTIC(NumFastOther , "Number of other instrs removed");
 
 namespace {
   struct DSE : public FunctionPass {
-    TargetData *TD;
+    AliasAnalysis *AA;
+    MemoryDependenceAnalysis *MD;
 
     static char ID; // Pass identification, replacement for typeid
-    DSE() : FunctionPass(ID) {}
+    DSE() : FunctionPass(ID), AA(0), MD(0) {
+      initializeDSEPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F) {
-      bool Changed = false;
-      
+      AA = &getAnalysis<AliasAnalysis>();
+      MD = &getAnalysis<MemoryDependenceAnalysis>();
       DominatorTree &DT = getAnalysis<DominatorTree>();
       
+      bool Changed = false;
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
         // Only check non-dead blocks.  Dead blocks may have strange pointer
         // cycles that will confuse alias analysis.
         if (DT.isReachableFromEntry(I))
           Changed |= runOnBasicBlock(*I);
+      
+      AA = 0; MD = 0;
       return Changed;
     }
     
     bool runOnBasicBlock(BasicBlock &BB);
-    bool handleFreeWithNonTrivialDependency(const CallInst *F,
-                                            MemDepResult Dep);
+    bool HandleFree(CallInst *F);
     bool handleEndBlock(BasicBlock &BB);
-    bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
-                              BasicBlock::iterator &BBI,
-                              SmallPtrSet<Value*, 64> &deadPointers);
-    void DeleteDeadInstruction(Instruction *I,
-                               SmallPtrSet<Value*, 64> *deadPointers = 0);
-    
+    void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+                               SmallPtrSet<Value*, 16> &DeadStackObjects);
 
-    // getAnalysisUsage - We require post dominance frontiers (aka Control
-    // Dependence Graph)
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<DominatorTree>();
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
-
-    unsigned getPointerSize(Value *V) const;
   };
 }
 
 char DSE::ID = 0;
-INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
 
 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
-/// doesClobberMemory - Does this instruction clobber (write without reading)
-/// some memory?
-static bool doesClobberMemory(Instruction *I) {
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction.  Before we do, go through
+/// and zero out all the operands of this instruction.  If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+                                  MemoryDependenceAnalysis &MD,
+                                  SmallPtrSet<Value*, 16> *ValueSet = 0) {
+  SmallVector<Instruction*, 32> NowDeadInsts;
+  
+  NowDeadInsts.push_back(I);
+  --NumFastOther;
+  
+  // Before we touch this instruction, remove it from memdep!
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
+    ++NumFastOther;
+    
+    // This instruction is dead, zap it, in stages.  Start by removing it from
+    // MemDep, which needs to know the operands and needs it to be in the
+    // function.
+    MD.removeInstruction(DeadInst);
+    
+    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+      Value *Op = DeadInst->getOperand(op);
+      DeadInst->setOperand(op, 0);
+      
+      // If this operand just became dead, add it to the NowDeadInsts list.
+      if (!Op->use_empty()) continue;
+      
+      if (Instruction *OpI = dyn_cast<Instruction>(Op))
+        if (isInstructionTriviallyDead(OpI))
+          NowDeadInsts.push_back(OpI);
+    }
+    
+    DeadInst->eraseFromParent();
+    
+    if (ValueSet) ValueSet->erase(DeadInst);
+  } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory?  This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
   if (isa<StoreInst>(I))
     return true;
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -106,146 +158,296 @@ static bool doesClobberMemory(Instruction *I) {
   return false;
 }
 
-/// isElidable - If the value of this instruction and the memory it writes to is
-/// unused, may we delete this instrtction?
-static bool isElidable(Instruction *I) {
-  assert(doesClobberMemory(I));
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-    return II->getIntrinsicID() != Intrinsic::lifetime_end;
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    return AA.getLocation(SI);
+  
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+    // memcpy/memmove/memset.
+    AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+    // If we don't have target data around, an unknown size in Location means
+    // that we should use the size of the pointee type.  This isn't valid for
+    // memset/memcpy, which writes more than an i8.
+    if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+      return AliasAnalysis::Location();
+    return Loc;
+  }
+  
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+  if (II == 0) return AliasAnalysis::Location();
+  
+  switch (II->getIntrinsicID()) {
+  default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+  case Intrinsic::init_trampoline:
+    // If we don't have target data around, an unknown size in Location means
+    // that we should use the size of the pointee type.  This isn't valid for
+    // init.trampoline, which writes more than an i8.
+    if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+      
+    // FIXME: We don't know the size of the trampoline, so we can't really
+    // handle it here.
+    return AliasAnalysis::Location(II->getArgOperand(0));
+  case Intrinsic::lifetime_end: {
+    uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+    return AliasAnalysis::Location(II->getArgOperand(1), Len);
+  }
+  }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location 
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+  assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+  
+  // The only instructions that both read and write are the mem transfer
+  // instructions (memcpy/memmove).
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+    return AA.getLocationForSource(MTI);
+  return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+  // Don't remove volatile stores.
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return !SI->isVolatile();
-  return true;
+  
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+  default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+  case Intrinsic::lifetime_end:
+    // Never remove dead lifetime_end's, e.g. because it is followed by a
+    // free.
+    return false;
+  case Intrinsic::init_trampoline:
+    // Always safe to remove init_trampoline.
+    return true;
+    
+  case Intrinsic::memset:
+  case Intrinsic::memmove:
+  case Intrinsic::memcpy:
+    // Don't remove volatile memory intrinsics.
+    return !cast<MemIntrinsic>(II)->isVolatile();
+  }
 }
 
-/// getPointerOperand - Return the pointer that is being clobbered.
-static Value *getPointerOperand(Instruction *I) {
-  assert(doesClobberMemory(I));
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getPointerOperand();
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
-    return MI->getArgOperand(0);
+    return MI->getDest();
 
   IntrinsicInst *II = cast<IntrinsicInst>(I);
   switch (II->getIntrinsicID()) {
   default: assert(false && "Unexpected intrinsic!");
   case Intrinsic::init_trampoline:
     return II->getArgOperand(0);
-  case Intrinsic::lifetime_end:
-    return II->getArgOperand(1);
   }
 }
 
-/// getStoreSize - Return the length in bytes of the write by the clobbering
-/// instruction. If variable or unknown, returns -1.
-static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
-  assert(doesClobberMemory(I));
-  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-    if (!TD) return -1u;
-    return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+  const TargetData *TD = AA.getTargetData();
+  if (TD == 0)
+    return AliasAnalysis::UnknownSize;
+  
+  if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+    // Get size information for the alloca
+    if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+      return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+    return AliasAnalysis::UnknownSize;
   }
+  
+  assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+  const PointerType *PT = cast<PointerType>(V->getType());
+  return TD->getTypeAllocSize(PT->getElementType());
+}
 
-  Value *Len;
-  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
-    Len = MI->getLength();
-  } else {
-    IntrinsicInst *II = cast<IntrinsicInst>(I);
-    switch (II->getIntrinsicID()) {
-    default: assert(false && "Unexpected intrinsic!");
-    case Intrinsic::init_trampoline:
-      return -1u;
-    case Intrinsic::lifetime_end:
-      Len = II->getArgOperand(0);
-      break;
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    return !AI->isArrayAllocation();
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return !GV->mayBeOverridden();
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+  return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+                                const AliasAnalysis::Location &Earlier,
+                                AliasAnalysis &AA) {
+  const Value *P1 = Earlier.Ptr->stripPointerCasts();
+  const Value *P2 = Later.Ptr->stripPointerCasts();
+  
+  // If the start pointers are the same, we just have to compare sizes to see if
+  // the later store was larger than the earlier store.
+  if (P1 == P2) {
+    // If we don't know the sizes of either access, then we can't do a
+    // comparison.
+    if (Later.Size == AliasAnalysis::UnknownSize ||
+        Earlier.Size == AliasAnalysis::UnknownSize) {
+      // If we have no TargetData information around, then the size of the store
+      // is inferrable from the pointee type.  If they are the same type, then
+      // we know that the store is safe.
+      if (AA.getTargetData() == 0)
+        return Later.Ptr->getType() == Earlier.Ptr->getType();
+      return false;
     }
+    
+    // Make sure that the Later size is >= the Earlier size.
+    if (Later.Size < Earlier.Size)
+      return false;
+    return true;
   }
-  if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
-    if (!LenCI->isAllOnesValue())
-      return LenCI->getZExtValue();
-  return -1u;
+  
+  // Otherwise, we have to have size information, and the later store has to be
+  // larger than the earlier one.
+  if (Later.Size == AliasAnalysis::UnknownSize ||
+      Earlier.Size == AliasAnalysis::UnknownSize ||
+      Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+    return false;
+  
+  // Check to see if the later store is to the entire object (either a global,
+  // an alloca, or a byval argument).  If so, then it clearly overwrites any
+  // other store to the same object.
+  const TargetData &TD = *AA.getTargetData();
+  
+  const Value *UO1 = GetUnderlyingObject(P1, &TD),
+              *UO2 = GetUnderlyingObject(P2, &TD);
+  
+  // If we can't resolve the same pointers to the same object, then we can't
+  // analyze them at all.
+  if (UO1 != UO2)
+    return false;
+  
+  // If the "Later" store is to a recognizable object, get its size.
+  if (isObjectPointerWithTrustworthySize(UO2)) {
+    uint64_t ObjectSize =
+      TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+    if (ObjectSize == Later.Size)
+      return true;
+  }
+  
+  // Okay, we have stores to two completely different pointers.  Try to
+  // decompose the pointer into a "base + constant_offset" form.  If the base
+  // pointers are equal, then we can reason about the two stores.
+  int64_t Off1 = 0, Off2 = 0;
+  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
+  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+  
+  // If the base pointers still differ, we have two completely different stores.
+  if (BP1 != BP2)
+    return false;
+  
+  // Otherwise, we might have a situation like:
+  //  store i16 -> P + 1 Byte
+  //  store i32 -> P
+  // In this case, we see if the later store completely overlaps all bytes
+  // stored by the previous store.
+  if (Off1 < Off2 ||                       // Earlier starts before Later.
+      Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
+    return false;
+  // Otherwise, we have complete overlap.
+  return true;
 }
 
-/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
-/// greater than or equal to the store in I2.  This returns false if we don't
-/// know.
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense.  Consider this case: 
+///
+///   memcpy(A <- B)
+///   memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
 ///
-static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
-                                   const TargetData *TD) {
-  const Type *I1Ty = getPointerOperand(I1)->getType();
-  const Type *I2Ty = getPointerOperand(I2)->getType();
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+                               const AliasAnalysis::Location &InstStoreLoc,
+                               Instruction *DepWrite, AliasAnalysis &AA) {
+  // Self reads can only happen for instructions that read memory.  Get the
+  // location read.
+  AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+  if (InstReadLoc.Ptr == 0) return false;  // Not a reading instruction.
   
-  // Exactly the same type, must have exactly the same size.
-  if (I1Ty == I2Ty) return true;
+  // If the read and written loc obviously don't alias, it isn't a read.
+  if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
   
-  int I1Size = getStoreSize(I1, TD);
-  int I2Size = getStoreSize(I2, TD);
+  // Okay, 'Inst' may copy over itself.  However, we can still remove a the
+  // DepWrite instruction if we can prove that it reads from the same location
+  // as Inst.  This handles useful cases like:
+  //   memcpy(A <- B)
+  //   memcpy(A <- B)
+  // Here we don't know if A/B may alias, but we do know that B/B are must
+  // aliases, so removing the first memcpy is safe (assuming it writes <= #
+  // bytes as the second one.
+  AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
   
-  return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
+  if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+    return false;
+  
+  // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+  // then it can't be considered dead.
+  return true;
 }
 
-bool DSE::runOnBasicBlock(BasicBlock &BB) {
-  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-  TD = getAnalysisIfAvailable<TargetData>();
 
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
   bool MadeChange = false;
   
   // Do a top-down walk on the BB.
   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
     Instruction *Inst = BBI++;
     
-    // If we find a store or a free, get its memory dependence.
-    if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
-      continue;
-    
-    MemDepResult InstDep = MD.getDependency(Inst);
-    
-    // Ignore non-local stores.
-    // FIXME: cross-block DSE would be fun. :)
-    if (InstDep.isNonLocal()) continue;
-  
-    // Handle frees whose dependencies are non-trivial.
-    if (const CallInst *F = isFreeCall(Inst)) {
-      MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
+    // Handle 'free' calls specially.
+    if (CallInst *F = isFreeCall(Inst)) {
+      MadeChange |= HandleFree(F);
       continue;
     }
     
-    // If not a definite must-alias dependency, ignore it.
-    if (!InstDep.isDef())
+    // If we find something that writes memory, get its memory dependence.
+    if (!hasMemoryWrite(Inst))
       continue;
-    
-    // If this is a store-store dependence, then the previous store is dead so
-    // long as this store is at least as big as it.
-    if (doesClobberMemory(InstDep.getInst())) {
-      Instruction *DepStore = InstDep.getInst();
-      if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
-          isElidable(DepStore)) {
-        // Delete the store and now-dead instructions that feed it.
-        DeleteDeadInstruction(DepStore);
-        ++NumFastStores;
-        MadeChange = true;
 
-        // DeleteDeadInstruction can delete the current instruction in loop
-        // cases, reset BBI.
-        BBI = Inst;
-        if (BBI != BB.begin())
-          --BBI;
-        continue;
-      }
-    }
+    MemDepResult InstDep = MD->getDependency(Inst);
     
-    if (!isElidable(Inst))
+    // Ignore non-local store liveness.
+    // FIXME: cross-block DSE would be fun. :)
+    if (InstDep.isNonLocal() || 
+        // Ignore self dependence, which happens in the entry block of the
+        // function.
+        InstDep.getInst() == Inst)
       continue;
-    
+     
     // If we're storing the same value back to a pointer that we just
     // loaded from, then the store can be removed.
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
         if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
-            SI->getOperand(0) == DepLoad) {
+            SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
+                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
+          
           // DeleteDeadInstruction can delete the current instruction.  Save BBI
           // in case we need it.
           WeakVH NextInst(BBI);
           
-          DeleteDeadInstruction(SI);
+          DeleteDeadInstruction(SI, *MD);
           
           if (NextInst == 0)  // Next instruction deleted.
             BBI = BB.begin();
@@ -258,24 +460,63 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       }
     }
     
-    // If this is a lifetime end marker, we can throw away the store.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        // Delete the store and now-dead instructions that feed it.
-        // DeleteDeadInstruction can delete the current instruction.  Save BBI
-        // in case we need it.
-        WeakVH NextInst(BBI);
-        
-        DeleteDeadInstruction(Inst);
+    // Figure out what location is being stored to.
+    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+    // If we didn't get a useful location, fail.
+    if (Loc.Ptr == 0)
+      continue;
+    
+    while (!InstDep.isNonLocal()) {
+      // Get the memory clobbered by the instruction we depend on.  MemDep will
+      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
+      // end up depending on a may- or must-aliased load, then we can't optimize
+      // away the store and we bail out.  However, if we depend on on something
+      // that overwrites the memory location we *can* potentially optimize it.
+      //
+      // Find out what memory location the dependant instruction stores.
+      Instruction *DepWrite = InstDep.getInst();
+      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+      // If we didn't get a useful location, or if it isn't a size, bail out.
+      if (DepLoc.Ptr == 0)
+        break;
+
+      // If we find a write that is a) removable (i.e., non-volatile), b) is
+      // completely obliterated by the store to 'Loc', and c) which we know that
+      // 'Inst' doesn't load from, then we can remove it.
+      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
+              << *DepWrite << "\n  KILLER: " << *Inst << '\n');
         
-        if (NextInst == 0)  // Next instruction deleted.
-          BBI = BB.begin();
-        else if (BBI != BB.begin())  // Revisit this instruction if possible.
-          --BBI;
+        // Delete the store and now-dead instructions that feed it.
+        DeleteDeadInstruction(DepWrite, *MD);
         ++NumFastStores;
         MadeChange = true;
-        continue;
+        
+        // DeleteDeadInstruction can delete the current instruction in loop
+        // cases, reset BBI.
+        BBI = Inst;
+        if (BBI != BB.begin())
+          --BBI;
+        break;
       }
+      
+      // If this is a may-aliased store that is clobbering the store value, we
+      // can keep searching past it for another must-aliased pointer that stores
+      // to the same location.  For example, in:
+      //   store -> P
+      //   store -> Q
+      //   store -> P
+      // we can remove the first store to P even though we don't know if P and Q
+      // alias.
+      if (DepWrite == &BB.front()) break;
+      
+      // Can't look past this instruction if it might read 'Loc'.
+      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+        break;
+        
+      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
     }
   }
   
@@ -287,26 +528,36 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
   return MadeChange;
 }
 
-/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
-/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
-                                             MemDepResult Dep) {
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  
-  Instruction *Dependency = Dep.getInst();
-  if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
-    return false;
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+  MemDepResult Dep = MD->getDependency(F);
+  do {
+    if (Dep.isNonLocal()) return false;
+    
+    Instruction *Dependency = Dep.getInst();
+    if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+      return false;
   
-  Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
+    Value *DepPointer =
+      GetUnderlyingObject(getStoredPointerOperand(Dependency));
 
-  // Check for aliasing.
-  if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
-         AliasAnalysis::MustAlias)
-    return false;
+    // Check for aliasing.
+    if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+      return false;
+  
+    // DCE instructions only used to calculate that store
+    DeleteDeadInstruction(Dependency, *MD);
+    ++NumFastStores;
+
+    // Inst's old Dependency is now deleted. Compute the next dependency,
+    // which may also be dead, as in
+    //    s[0] = 0;
+    //    s[1] = 0; // This has just been deleted.
+    //    free(s);
+    Dep = MD->getDependency(F);
+  } while (!Dep.isNonLocal());
   
-  // DCE instructions only used to calculate that store
-  DeleteDeadInstruction(Dependency);
-  ++NumFastStores;
   return true;
 }
 
@@ -317,259 +568,163 @@ bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
 /// store i32 1, i32* %A
 /// ret void
 bool DSE::handleEndBlock(BasicBlock &BB) {
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  
   bool MadeChange = false;
   
-  // Pointers alloca'd in this function are dead in the end block
-  SmallPtrSet<Value*, 64> deadPointers;
+  // Keep track of all of the stack objects that are dead at the end of the
+  // function.
+  SmallPtrSet<Value*, 16> DeadStackObjects;
   
   // Find all of the alloca'd pointers in the entry block.
   BasicBlock *Entry = BB.getParent()->begin();
   for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      deadPointers.insert(AI);
+      DeadStackObjects.insert(AI);
   
   // Treat byval arguments the same, stores to them are dead at the end of the
   // function.
   for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
        AE = BB.getParent()->arg_end(); AI != AE; ++AI)
     if (AI->hasByValAttr())
-      deadPointers.insert(AI);
+      DeadStackObjects.insert(AI);
   
   // Scan the basic block backwards
   for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
     --BBI;
     
-    // If we find a store whose pointer is dead.
-    if (doesClobberMemory(BBI)) {
-      if (isElidable(BBI)) {
-        // See through pointer-to-pointer bitcasts
-        Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
-
-        // Alloca'd pointers or byval arguments (which are functionally like
-        // alloca's) are valid candidates for removal.
-        if (deadPointers.count(pointerOperand)) {
-          // DCE instructions only used to calculate that store.
-          Instruction *Dead = BBI;
-          ++BBI;
-          DeleteDeadInstruction(Dead, &deadPointers);
-          ++NumFastStores;
-          MadeChange = true;
-          continue;
-        }
-      }
-      
-      // Because a memcpy or memmove is also a load, we can't skip it if we
-      // didn't remove it.
-      if (!isa<MemTransferInst>(BBI))
+    // If we find a store, check to see if it points into a dead stack value.
+    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+      // See through pointer-to-pointer bitcasts
+      Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+      // Stores to stack values are valid candidates for removal.
+      if (DeadStackObjects.count(Pointer)) {
+        Instruction *Dead = BBI++;
+        
+        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
+                     << *Dead << "\n  Object: " << *Pointer << '\n');
+        
+        // DCE instructions only used to calculate that store.
+        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+        ++NumFastStores;
+        MadeChange = true;
         continue;
+      }
     }
     
-    Value *killPointer = 0;
-    uint64_t killPointerSize = ~0UL;
+    // Remove any dead non-memory-mutating instructions.
+    if (isInstructionTriviallyDead(BBI)) {
+      Instruction *Inst = BBI++;
+      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+      ++NumFastOther;
+      MadeChange = true;
+      continue;
+    }
     
-    // If we encounter a use of the pointer, it is no longer considered dead
-    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
-      // However, if this load is unused and not volatile, we can go ahead and
-      // remove it, and not have to worry about it making our pointer undead!
-      if (L->use_empty() && !L->isVolatile()) {
-        ++BBI;
-        DeleteDeadInstruction(L, &deadPointers);
-        ++NumFastOther;
-        MadeChange = true;
-        continue;
-      }
-      
-      killPointer = L->getPointerOperand();
-    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
-      killPointer = V->getOperand(0);
-    } else if (isa<MemTransferInst>(BBI) &&
-               isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
-      killPointer = cast<MemTransferInst>(BBI)->getSource();
-      killPointerSize = cast<ConstantInt>(
-                       cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
-    } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
-      deadPointers.erase(A);
-      
-      // Dead alloca's can be DCE'd when we reach them
-      if (A->use_empty()) {
-        ++BBI;
-        DeleteDeadInstruction(A, &deadPointers);
-        ++NumFastOther;
-        MadeChange = true;
-      }
-      
+    if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+      DeadStackObjects.erase(A);
       continue;
-    } else if (CallSite CS = cast<Value>(BBI)) {
-      // If this call does not access memory, it can't
-      // be undeadifying any of our pointers.
-      if (AA.doesNotAccessMemory(CS))
+    }
+    
+    if (CallSite CS = cast<Value>(BBI)) {
+      // If this call does not access memory, it can't be loading any of our
+      // pointers.
+      if (AA->doesNotAccessMemory(CS))
         continue;
       
-      unsigned modRef = 0;
-      unsigned other = 0;
+      unsigned NumModRef = 0, NumOther = 0;
       
-      // Remove any pointers made undead by the call from the dead set
-      std::vector<Value*> dead;
-      for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
-           E = deadPointers.end(); I != E; ++I) {
-        // HACK: if we detect that our AA is imprecise, it's not
-        // worth it to scan the rest of the deadPointers set.  Just
-        // assume that the AA will return ModRef for everything, and
-        // go ahead and bail.
-        if (modRef >= 16 && other == 0) {
-          deadPointers.clear();
+      // If the call might load from any of our allocas, then any store above
+      // the call is live.
+      SmallVector<Value*, 8> LiveAllocas;
+      for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+           E = DeadStackObjects.end(); I != E; ++I) {
+        // If we detect that our AA is imprecise, it's not worth it to scan the
+        // rest of the DeadPointers set.  Just assume that the AA will return
+        // ModRef for everything, and go ahead and bail out.
+        if (NumModRef >= 16 && NumOther == 0)
           return MadeChange;
-        }
-        
-        // See if the call site touches it
-        AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I,
-                                                         getPointerSize(*I));
+
+        // See if the call site touches it.
+        AliasAnalysis::ModRefResult A = 
+          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
         
         if (A == AliasAnalysis::ModRef)
-          ++modRef;
+          ++NumModRef;
         else
-          ++other;
+          ++NumOther;
         
         if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
-          dead.push_back(*I);
+          LiveAllocas.push_back(*I);
       }
-
-      for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
-           I != E; ++I)
-        deadPointers.erase(*I);
       
-      continue;
-    } else if (isInstructionTriviallyDead(BBI)) {
-      // For any non-memory-affecting non-terminators, DCE them as we reach them
-      Instruction *Inst = BBI;
-      ++BBI;
-      DeleteDeadInstruction(Inst, &deadPointers);
-      ++NumFastOther;
-      MadeChange = true;
+      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+           E = LiveAllocas.end(); I != E; ++I)
+        DeadStackObjects.erase(*I);
+      
+      // If all of the allocas were clobbered by the call then we're not going
+      // to find anything else to process.
+      if (DeadStackObjects.empty())
+        return MadeChange;
+      
       continue;
     }
     
-    if (!killPointer)
+    AliasAnalysis::Location LoadedLoc;
+    
+    // If we encounter a use of the pointer, it is no longer considered dead
+    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+      LoadedLoc = AA->getLocation(L);
+    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+      LoadedLoc = AA->getLocation(V);
+    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+      LoadedLoc = AA->getLocationForSource(MTI);
+    } else {
+      // Not a loading instruction.
       continue;
+    }
 
-    killPointer = killPointer->getUnderlyingObject();
+    // Remove any allocas from the DeadPointer set that are loaded, as this
+    // makes any stores above the access live.
+    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
 
-    // Deal with undead pointers
-    MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
-                                       deadPointers);
+    // If all of the allocas were clobbered by the access then we're not going
+    // to find anything else to process.
+    if (DeadStackObjects.empty())
+      break;
   }
   
   return MadeChange;
 }
 
-/// RemoveUndeadPointers - check for uses of a pointer that make it
-/// undead when scanning for dead stores to alloca's.
-bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
-                               BasicBlock::iterator &BBI,
-                               SmallPtrSet<Value*, 64> &deadPointers) {
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
-  // If the kill pointer can be easily reduced to an alloca,
-  // don't bother doing extraneous AA queries.
-  if (deadPointers.count(killPointer)) {
-    deadPointers.erase(killPointer);
-    return false;
-  }
-  
-  // A global can't be in the dead pointer set.
-  if (isa<GlobalValue>(killPointer))
-    return false;
-  
-  bool MadeChange = false;
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set.  If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+                                SmallPtrSet<Value*, 16> &DeadStackObjects) {
+  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+  // A constant can't be in the dead pointer set.
+  if (isa<Constant>(UnderlyingPointer))
+    return;
   
-  SmallVector<Value*, 16> undead;
+  // If the kill pointer can be easily reduced to an alloca, don't bother doing
+  // extraneous AA queries.
+  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+    DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+    return;
+  }
   
-  for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
-       E = deadPointers.end(); I != E; ++I) {
-    // See if this pointer could alias it
-    AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I),
-                                            killPointer, killPointerSize);
-
-    // If it must-alias and a store, we can delete it
-    if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
-      StoreInst *S = cast<StoreInst>(BBI);
-
-      // Remove it!
-      ++BBI;
-      DeleteDeadInstruction(S, &deadPointers);
-      ++NumFastStores;
-      MadeChange = true;
-
-      continue;
-
-      // Otherwise, it is undead
-    } else if (A != AliasAnalysis::NoAlias)
-      undead.push_back(*I);
+  SmallVector<Value*, 16> NowLive;
+  for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+       E = DeadStackObjects.end(); I != E; ++I) {
+    // See if the loaded location could alias the stack location.
+    AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+    if (!AA->isNoAlias(StackLoc, LoadedLoc))
+      NowLive.push_back(*I);
   }
 
-  for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end();
+  for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
        I != E; ++I)
-      deadPointers.erase(*I);
-  
-  return MadeChange;
+    DeadStackObjects.erase(*I);
 }
 
-/// DeleteDeadInstruction - Delete this instruction.  Before we do, go through
-/// and zero out all the operands of this instruction.  If any of them become
-/// dead, delete them and the computation tree that feeds them.
-///
-/// If ValueSet is non-null, remove any deleted instructions from it as well.
-///
-void DSE::DeleteDeadInstruction(Instruction *I,
-                                SmallPtrSet<Value*, 64> *ValueSet) {
-  SmallVector<Instruction*, 32> NowDeadInsts;
-  
-  NowDeadInsts.push_back(I);
-  --NumFastOther;
-
-  // Before we touch this instruction, remove it from memdep!
-  MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
-  do {
-    Instruction *DeadInst = NowDeadInsts.pop_back_val();
-    
-    ++NumFastOther;
-    
-    // This instruction is dead, zap it, in stages.  Start by removing it from
-    // MemDep, which needs to know the operands and needs it to be in the
-    // function.
-    MDA.removeInstruction(DeadInst);
-    
-    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
-      Value *Op = DeadInst->getOperand(op);
-      DeadInst->setOperand(op, 0);
-      
-      // If this operand just became dead, add it to the NowDeadInsts list.
-      if (!Op->use_empty()) continue;
-      
-      if (Instruction *OpI = dyn_cast<Instruction>(Op))
-        if (isInstructionTriviallyDead(OpI))
-          NowDeadInsts.push_back(OpI);
-    }
-    
-    DeadInst->eraseFromParent();
-    
-    if (ValueSet) ValueSet->erase(DeadInst);
-  } while (!NowDeadInsts.empty());
-}
-
-unsigned DSE::getPointerSize(Value *V) const {
-  if (TD) {
-    if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
-      // Get size information for the alloca
-      if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
-        return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
-    } else {
-      assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
-      const PointerType *PT = cast<PointerType>(V->getType());
-      return TD->getTypeAllocSize(PT->getElementType());
-    }
-  }
-  return ~0U;
-}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 000000000000..3d3f17b26fc6
--- /dev/null
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE,      "Number of instructions CSE'd");
+STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
+STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
+STATISTIC(NumDSE,      "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+  return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// SimpleValue - Instances of this struct represent available values in the
+  /// scoped hash table.
+  struct SimpleValue {
+    Instruction *Inst;
+    
+    SimpleValue(Instruction *I) : Inst(I) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+    }
+    
+    bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    
+    static bool canHandle(Instruction *Inst) {
+      // This can only handle non-void readnone functions.
+      if (CallInst *CI = dyn_cast<CallInst>(Inst))
+        return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+      return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+             isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+             isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+             isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+             isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+    }
+  };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+  static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+  static inline SimpleValue getEmptyKey() {
+    return DenseMapInfo<Instruction*>::getEmptyKey();
+  }
+  static inline SimpleValue getTombstoneKey() {
+    return DenseMapInfo<Instruction*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(SimpleValue Val);
+  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+  Instruction *Inst = Val.Inst;
+  
+  // Hash in all of the operands as pointers.
+  unsigned Res = 0;
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+    Res ^= getHash(Inst->getOperand(i)) << i;
+
+  if (CastInst *CI = dyn_cast<CastInst>(Inst))
+    Res ^= getHash(CI->getType());
+  else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+    Res ^= CI->getPredicate();
+  else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+    for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+         E = EVI->idx_end(); I != E; ++I)
+      Res ^= *I;
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+    for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+         E = IVI->idx_end(); I != E; ++I)
+      Res ^= *I;
+  } else {
+    // nothing extra to hash in.
+    assert((isa<CallInst>(Inst) ||
+            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+           "Invalid/unknown instruction");
+  }
+
+  // Mix in the opcode.
+  return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  
+  if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// CallValue - Instances of this struct represent available call values in
+  /// the scoped hash table.
+  struct CallValue {
+    Instruction *Inst;
+    
+    CallValue(Instruction *I) : Inst(I) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+    }
+    
+    bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    
+    static bool canHandle(Instruction *Inst) {
+      // Don't value number anything that returns void.
+      if (Inst->getType()->isVoidTy())
+        return false;
+      
+      CallInst *CI = dyn_cast<CallInst>(Inst);
+      if (CI == 0 || !CI->onlyReadsMemory())
+        return false;
+      return true;
+    }
+  };
+}
+
+namespace llvm {
+  // CallValue is POD.
+  template<> struct isPodLike<CallValue> {
+    static const bool value = true;
+  };
+  
+  template<> struct DenseMapInfo<CallValue> {
+    static inline CallValue getEmptyKey() {
+      return DenseMapInfo<Instruction*>::getEmptyKey();
+    }
+    static inline CallValue getTombstoneKey() {
+      return DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    static unsigned getHashValue(CallValue Val);
+    static bool isEqual(CallValue LHS, CallValue RHS);
+  };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+  Instruction *Inst = Val.Inst;
+  // Hash in all of the operands as pointers.
+  unsigned Res = 0;
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+    assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+           "Cannot value number calls with metadata operands");
+    Res ^= getHash(Inst->getOperand(i)) << i;
+  }
+  
+  // Mix in the opcode.
+  return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass. 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes.  It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective.  It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+  const TargetData *TD;
+  DominatorTree *DT;
+  typedef RecyclingAllocator<BumpPtrAllocator,
+                      ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+  typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+                          AllocatorTy> ScopedHTType;
+  
+  /// AvailableValues - This scoped hash table contains the current values of
+  /// all of our simple scalar expressions.  As we walk down the domtree, we
+  /// look to see if instructions are in this: if so, we replace them with what
+  /// we find, otherwise we insert them so that dominated values can succeed in
+  /// their lookup.
+  ScopedHTType *AvailableValues;
+  
+  /// AvailableLoads - This scoped hash table contains the current values
+  /// of loads.  This allows us to get efficient access to dominating loads when
+  /// we have a fully redundant load.  In addition to the most recent load, we
+  /// keep track of a generation count of the read, which is compared against
+  /// the current generation count.  The current generation count is
+  /// incremented after every possibly writing memory operation, which ensures
+  /// that we only CSE loads with other loads that have no intervening store.
+  typedef RecyclingAllocator<BumpPtrAllocator,
+    ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+  typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+                          DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+  LoadHTType *AvailableLoads;
+  
+  /// AvailableCalls - This scoped hash table contains the current values
+  /// of read-only call values.  It uses the same generation count as loads.
+  typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+  CallHTType *AvailableCalls;
+  
+  /// CurrentGeneration - This is the current generation of the memory value.
+  unsigned CurrentGeneration;
+  
+  static char ID;
+  explicit EarlyCSE() : FunctionPass(ID) {
+    initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F);
+
+private:
+  
+  bool processNode(DomTreeNode *Node);
+  
+  // This transformation requires dominator postdominator info
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<DominatorTree>();
+    AU.setPreservesCFG();
+  }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+  return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+  // Define a scope in the scoped hash table.  When we are done processing this
+  // domtree node and recurse back up to our parent domtree node, this will pop
+  // off all the values we install.
+  ScopedHTType::ScopeTy Scope(*AvailableValues);
+  
+  // Define a scope for the load values so that anything we add will get
+  // popped when we recurse back up to our parent domtree node.
+  LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+  
+  // Define a scope for the call values so that anything we add will get
+  // popped when we recurse back up to our parent domtree node.
+  CallHTType::ScopeTy CallScope(*AvailableCalls);
+  
+  BasicBlock *BB = Node->getBlock();
+  
+  // If this block has a single predecessor, then the predecessor is the parent
+  // of the domtree node and all of the live out memory values are still current
+  // in this block.  If this block has multiple predecessors, then they could
+  // have invalidated the live-out memory values of our parent value.  For now,
+  // just be conservative and invalidate memory if this block has multiple
+  // predecessors.
+  if (BB->getSinglePredecessor() == 0)
+    ++CurrentGeneration;
+  
+  /// LastStore - Keep track of the last non-volatile store that we saw... for
+  /// as long as there in no instruction that reads memory.  If we see a store
+  /// to the same location, we delete the dead store.  This zaps trivial dead
+  /// stores which can occur in bitfield code among other things.
+  StoreInst *LastStore = 0;
+  
+  bool Changed = false;
+
+  // See if any instructions in the block can be eliminated.  If so, do it.  If
+  // not, add them to AvailableValues.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    
+    // Dead instructions should just be removed.
+    if (isInstructionTriviallyDead(Inst)) {
+      DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+    
+    // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+    // its simpler value.
+    if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+      DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
+      Inst->replaceAllUsesWith(V);
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+    
+    // If this is a simple instruction that we can value number, process it.
+    if (SimpleValue::canHandle(Inst)) {
+      // See if the instruction has an available value.  If so, use it.
+      if (Value *V = AvailableValues->lookup(Inst)) {
+        DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
+        Inst->replaceAllUsesWith(V);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSE;
+        continue;
+      }
+      
+      // Otherwise, just remember that this value is available.
+      AvailableValues->insert(Inst, Inst);
+      continue;
+    }
+    
+    // If this is a non-volatile load, process it.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      // Ignore volatile loads.
+      if (LI->isVolatile()) {
+        LastStore = 0;
+        continue;
+      }
+      
+      // If we have an available version of this load, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Value*, unsigned> InVal =
+        AvailableLoads->lookup(Inst->getOperand(0));
+      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+        DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << "  to: "
+              << *InVal.first << '\n');
+        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSELoad;
+        continue;
+      }
+      
+      // Otherwise, remember that we have this instruction.
+      AvailableLoads->insert(Inst->getOperand(0),
+                          std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      LastStore = 0;
+      continue;
+    }
+    
+    // If this instruction may read from memory, forget LastStore.
+    if (Inst->mayReadFromMemory())
+      LastStore = 0;
+    
+    // If this is a read-only call, process it.
+    if (CallValue::canHandle(Inst)) {
+      // If we have an available version of this call, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << "  to: "
+                     << *InVal.first << '\n');
+        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSECall;
+        continue;
+      }
+      
+      // Otherwise, remember that we have this instruction.
+      AvailableCalls->insert(Inst,
+                         std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      continue;
+    }
+    
+    // Okay, this isn't something we can CSE at all.  Check to see if it is
+    // something that could modify memory.  If so, our available memory values
+    // cannot be used so bump the generation count.
+    if (Inst->mayWriteToMemory()) {
+      ++CurrentGeneration;
+     
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        // We do a trivial form of DSE if there are two stores to the same
+        // location with no intervening loads.  Delete the earlier store.
+        if (LastStore &&
+            LastStore->getPointerOperand() == SI->getPointerOperand()) {
+          DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << "  due to: "
+                       << *Inst << '\n');
+          LastStore->eraseFromParent();
+          Changed = true;
+          ++NumDSE;
+          LastStore = 0;
+          continue;
+        }
+        
+        // Okay, we just invalidated anything we knew about loaded values.  Try
+        // to salvage *something* by remembering that the stored value is a live
+        // version of the pointer.  It is safe to forward from volatile stores
+        // to non-volatile loads, so we don't have to check for volatility of
+        // the store.
+        AvailableLoads->insert(SI->getPointerOperand(),
+         std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+        
+        // Remember that this was the last store we saw for DSE.
+        if (!SI->isVolatile())
+          LastStore = SI;
+      }
+    }
+  }
+  
+  unsigned LiveOutGeneration = CurrentGeneration;
+  for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+    Changed |= processNode(*I);
+    // Pop any generation changes off the stack from the recursive walk.
+    CurrentGeneration = LiveOutGeneration;
+  }
+  return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+  DT = &getAnalysis<DominatorTree>();
+  
+  // Tables that the pass uses when walking the domtree.
+  ScopedHTType AVTable;
+  AvailableValues = &AVTable;
+  LoadHTType LoadTable;
+  AvailableLoads = &LoadTable;
+  CallHTType CallTable;
+  AvailableCalls = &CallTable;
+  
+  CurrentGeneration = 0;
+  return processNode(DT->getRootNode());
+}
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
index 53dd06d24bb5..4c3d188a8afd 100644
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,15 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit GEPSplitter() : FunctionPass(ID) {}
+    explicit GEPSplitter() : FunctionPass(ID) {
+      initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
+    }
   };
 }
 
 char GEPSplitter::ID = 0;
 INITIALIZE_PASS(GEPSplitter, "split-geps",
-                "split complex GEPs into simple GEPs", false, false);
+                "split complex GEPs into simple GEPs", false, false)
 
 FunctionPass *llvm::createGEPSplitterPass() {
   return new GEPSplitter();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c62ce1f27f64..a0123f589816 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -17,39 +17,30 @@
 
 #define DEBUG_TYPE "gvn"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
 using namespace llvm;
 
 STATISTIC(NumGVNInstr,  "Number of instructions deleted");
@@ -61,7 +52,6 @@ STATISTIC(NumPRELoad,   "Number of loads PRE'd");
 static cl::opt<bool> EnablePRE("enable-pre",
                                cl::init(true), cl::Hidden);
 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
-static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
 
 //===----------------------------------------------------------------------===//
 //                         ValueTable Class
@@ -72,76 +62,23 @@ static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
 /// two values.
 namespace {
   struct Expression {
-    enum ExpressionOpcode { 
-      ADD = Instruction::Add,
-      FADD = Instruction::FAdd,
-      SUB = Instruction::Sub,
-      FSUB = Instruction::FSub,
-      MUL = Instruction::Mul,
-      FMUL = Instruction::FMul,
-      UDIV = Instruction::UDiv,
-      SDIV = Instruction::SDiv,
-      FDIV = Instruction::FDiv,
-      UREM = Instruction::URem,
-      SREM = Instruction::SRem,
-      FREM = Instruction::FRem,
-      SHL = Instruction::Shl,
-      LSHR = Instruction::LShr,
-      ASHR = Instruction::AShr,
-      AND = Instruction::And,
-      OR = Instruction::Or,
-      XOR = Instruction::Xor,
-      TRUNC = Instruction::Trunc,
-      ZEXT = Instruction::ZExt,
-      SEXT = Instruction::SExt,
-      FPTOUI = Instruction::FPToUI,
-      FPTOSI = Instruction::FPToSI,
-      UITOFP = Instruction::UIToFP,
-      SITOFP = Instruction::SIToFP,
-      FPTRUNC = Instruction::FPTrunc,
-      FPEXT = Instruction::FPExt,
-      PTRTOINT = Instruction::PtrToInt,
-      INTTOPTR = Instruction::IntToPtr,
-      BITCAST = Instruction::BitCast,
-      ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
-      ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
-      FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
-      FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
-      FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
-      SHUFFLE, SELECT, GEP, CALL, CONSTANT,
-      INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
-
-    ExpressionOpcode opcode;
+    uint32_t opcode;
     const Type* type;
     SmallVector<uint32_t, 4> varargs;
-    Value *function;
 
     Expression() { }
-    Expression(ExpressionOpcode o) : opcode(o) { }
+    Expression(uint32_t o) : opcode(o) { }
 
     bool operator==(const Expression &other) const {
       if (opcode != other.opcode)
         return false;
-      else if (opcode == EMPTY || opcode == TOMBSTONE)
+      else if (opcode == ~0U || opcode == ~1U)
         return true;
       else if (type != other.type)
         return false;
-      else if (function != other.function)
+      else if (varargs != other.varargs)
         return false;
-      else {
-        if (varargs.size() != other.varargs.size())
-          return false;
-
-        for (size_t i = 0; i < varargs.size(); ++i)
-          if (varargs[i] != other.varargs[i])
-            return false;
-
-        return true;
-      }
-    }
-
-    bool operator!=(const Expression &other) const {
-      return !(*this == other);
+      return true;
     }
   };
 
@@ -155,19 +92,7 @@ namespace {
 
       uint32_t nextValueNumber;
 
-      Expression::ExpressionOpcode getOpcode(CmpInst* C);
-      Expression create_expression(BinaryOperator* BO);
-      Expression create_expression(CmpInst* C);
-      Expression create_expression(ShuffleVectorInst* V);
-      Expression create_expression(ExtractElementInst* C);
-      Expression create_expression(InsertElementInst* V);
-      Expression create_expression(SelectInst* V);
-      Expression create_expression(CastInst* C);
-      Expression create_expression(GetElementPtrInst* G);
-      Expression create_expression(CallInst* C);
-      Expression create_expression(ExtractValueInst* C);
-      Expression create_expression(InsertValueInst* C);
-      
+      Expression create_expression(Instruction* I);
       uint32_t lookup_or_add_call(CallInst* C);
     public:
       ValueTable() : nextValueNumber(1) { }
@@ -176,7 +101,6 @@ namespace {
       void add(Value *V, uint32_t num);
       void clear();
       void erase(Value *v);
-      unsigned size();
       void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
       AliasAnalysis *getAliasAnalysis() const { return AA; }
       void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
@@ -189,11 +113,11 @@ namespace {
 namespace llvm {
 template <> struct DenseMapInfo<Expression> {
   static inline Expression getEmptyKey() {
-    return Expression(Expression::EMPTY);
+    return ~0U;
   }
 
   static inline Expression getTombstoneKey() {
-    return Expression(Expression::TOMBSTONE);
+    return ~1U;
   }
 
   static unsigned getHashValue(const Expression e) {
@@ -205,20 +129,13 @@ template <> struct DenseMapInfo<Expression> {
     for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
          E = e.varargs.end(); I != E; ++I)
       hash = *I + hash * 37;
-
-    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
-            (unsigned)((uintptr_t)e.function >> 9)) +
-           hash * 37;
-
+    
     return hash;
   }
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
     return LHS == RHS;
   }
 };
-  
-template <>
-struct isPodLike<Expression> { static const bool value = true; };
 
 }
 
@@ -226,185 +143,27 @@ struct isPodLike<Expression> { static const bool value = true; };
 //                     ValueTable Internal Functions
 //===----------------------------------------------------------------------===//
 
-Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
-  if (isa<ICmpInst>(C)) {
-    switch (C->getPredicate()) {
-    default:  // THIS SHOULD NEVER HAPPEN
-      llvm_unreachable("Comparison with unknown predicate?");
-    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
-    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
-    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
-    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
-    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
-    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
-    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
-    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
-    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
-    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
-    }
-  } else {
-    switch (C->getPredicate()) {
-    default: // THIS SHOULD NEVER HAPPEN
-      llvm_unreachable("Comparison with unknown predicate?");
-    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
-    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
-    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
-    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
-    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
-    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
-    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
-    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
-    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
-    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
-    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
-    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
-    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
-    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
-    }
-  }
-}
-
-Expression ValueTable::create_expression(CallInst* C) {
-  Expression e;
-
-  e.type = C->getType();
-  e.function = C->getCalledFunction();
-  e.opcode = Expression::CALL;
-
-  CallSite CS(C);
-  for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
-       I != E; ++I)
-    e.varargs.push_back(lookup_or_add(*I));
-
-  return e;
-}
-
-Expression ValueTable::create_expression(BinaryOperator* BO) {
-  Expression e;
-  e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
-  e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
-  e.function = 0;
-  e.type = BO->getType();
-  e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
-
-  return e;
-}
-
-Expression ValueTable::create_expression(CmpInst* C) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
-  e.varargs.push_back(lookup_or_add(C->getOperand(1)));
-  e.function = 0;
-  e.type = C->getType();
-  e.opcode = getOpcode(C);
-
-  return e;
-}
-
-Expression ValueTable::create_expression(CastInst* C) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
-  e.function = 0;
-  e.type = C->getType();
-  e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ShuffleVectorInst* S) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(S->getOperand(0)));
-  e.varargs.push_back(lookup_or_add(S->getOperand(1)));
-  e.varargs.push_back(lookup_or_add(S->getOperand(2)));
-  e.function = 0;
-  e.type = S->getType();
-  e.opcode = Expression::SHUFFLE;
-
-  return e;
-}
 
-Expression ValueTable::create_expression(ExtractElementInst* E) {
+Expression ValueTable::create_expression(Instruction *I) {
   Expression e;
-
-  e.varargs.push_back(lookup_or_add(E->getOperand(0)));
-  e.varargs.push_back(lookup_or_add(E->getOperand(1)));
-  e.function = 0;
-  e.type = E->getType();
-  e.opcode = Expression::EXTRACT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(InsertElementInst* I) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(I->getOperand(0)));
-  e.varargs.push_back(lookup_or_add(I->getOperand(1)));
-  e.varargs.push_back(lookup_or_add(I->getOperand(2)));
-  e.function = 0;
   e.type = I->getType();
-  e.opcode = Expression::INSERT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(SelectInst* I) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(I->getCondition()));
-  e.varargs.push_back(lookup_or_add(I->getTrueValue()));
-  e.varargs.push_back(lookup_or_add(I->getFalseValue()));
-  e.function = 0;
-  e.type = I->getType();
-  e.opcode = Expression::SELECT;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(GetElementPtrInst* G) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
-  e.function = 0;
-  e.type = G->getType();
-  e.opcode = Expression::GEP;
-
-  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
-       I != E; ++I)
-    e.varargs.push_back(lookup_or_add(*I));
-
-  return e;
-}
-
-Expression ValueTable::create_expression(ExtractValueInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
-  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-       II != IE; ++II)
-    e.varargs.push_back(*II);
-  e.function = 0;
-  e.type = E->getType();
-  e.opcode = Expression::EXTRACTVALUE;
-
-  return e;
-}
-
-Expression ValueTable::create_expression(InsertValueInst* E) {
-  Expression e;
-
-  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
-  e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
-  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-       II != IE; ++II)
-    e.varargs.push_back(*II);
-  e.function = 0;
-  e.type = E->getType();
-  e.opcode = Expression::INSERTVALUE;
-
+  e.opcode = I->getOpcode();
+  for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+  
+  if (CmpInst *C = dyn_cast<CmpInst>(I))
+    e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+  else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
+    for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+         II != IE; ++II)
+      e.varargs.push_back(*II);
+  } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+    for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+         II != IE; ++II)
+      e.varargs.push_back(*II);
+  }
+  
   return e;
 }
 
@@ -563,12 +322,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
     case Instruction::And:
     case Instruction::Or :
     case Instruction::Xor:
-      exp = create_expression(cast<BinaryOperator>(I));
-      break;
     case Instruction::ICmp:
     case Instruction::FCmp:
-      exp = create_expression(cast<CmpInst>(I));
-      break;
     case Instruction::Trunc:
     case Instruction::ZExt:
     case Instruction::SExt:
@@ -581,28 +336,14 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
     case Instruction::PtrToInt:
     case Instruction::IntToPtr:
     case Instruction::BitCast:
-      exp = create_expression(cast<CastInst>(I));
-      break;
     case Instruction::Select:
-      exp = create_expression(cast<SelectInst>(I));
-      break;
     case Instruction::ExtractElement:
-      exp = create_expression(cast<ExtractElementInst>(I));
-      break;
     case Instruction::InsertElement:
-      exp = create_expression(cast<InsertElementInst>(I));
-      break;
     case Instruction::ShuffleVector:
-      exp = create_expression(cast<ShuffleVectorInst>(I));
-      break;
     case Instruction::ExtractValue:
-      exp = create_expression(cast<ExtractValueInst>(I));
-      break;
     case Instruction::InsertValue:
-      exp = create_expression(cast<InsertValueInst>(I));
-      break;      
     case Instruction::GetElementPtr:
-      exp = create_expression(cast<GetElementPtrInst>(I));
+      exp = create_expression(I);
       break;
     default:
       valueNumbering[V] = nextValueNumber;
@@ -648,15 +389,6 @@ void ValueTable::verifyRemoved(const Value *V) const {
 //                                GVN Pass
 //===----------------------------------------------------------------------===//
 
-namespace {
-  struct ValueNumberScope {
-    ValueNumberScope* parent;
-    DenseMap<uint32_t, Value*> table;
-
-    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
-  };
-}
-
 namespace {
 
   class GVN : public FunctionPass {
@@ -664,15 +396,70 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit GVN(bool noloads = false)
-      : FunctionPass(ID), NoLoads(noloads), MD(0) { }
+        : FunctionPass(ID), NoLoads(noloads), MD(0) {
+      initializeGVNPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     bool NoLoads;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
+    const TargetData* TD;
 
     ValueTable VN;
-    DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+    
+    /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+    /// have that value number.  Use findLeader to query it.
+    struct LeaderTableEntry {
+      Value *Val;
+      BasicBlock *BB;
+      LeaderTableEntry *Next;
+    };
+    DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+    BumpPtrAllocator TableAllocator;
+    
+    /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+    /// its value number.
+    void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+      LeaderTableEntry& Curr = LeaderTable[N];
+      if (!Curr.Val) {
+        Curr.Val = V;
+        Curr.BB = BB;
+        return;
+      }
+      
+      LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+      Node->Val = V;
+      Node->BB = BB;
+      Node->Next = Curr.Next;
+      Curr.Next = Node;
+    }
+    
+    /// removeFromLeaderTable - Scan the list of values corresponding to a given
+    /// value number, and remove the given value if encountered.
+    void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+      LeaderTableEntry* Prev = 0;
+      LeaderTableEntry* Curr = &LeaderTable[N];
+
+      while (Curr->Val != V || Curr->BB != BB) {
+        Prev = Curr;
+        Curr = Curr->Next;
+      }
+      
+      if (Prev) {
+        Prev->Next = Curr->Next;
+      } else {
+        if (!Curr->Next) {
+          Curr->Val = 0;
+          Curr->BB = 0;
+        } else {
+          LeaderTableEntry* Next = Curr->Next;
+          Curr->Val = Next->Val;
+          Curr->BB = Next->BB;
+          Curr->Next = Next->Next;
+        }
+      }
+    }
 
     // List of critical edges to be split between iterations.
     SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
@@ -699,9 +486,8 @@ namespace {
     bool processBlock(BasicBlock *BB);
     void dump(DenseMap<uint32_t, Value*>& d);
     bool iterateOnFunction(Function &F);
-    Value *CollapsePhi(PHINode* p);
     bool performPRE(Function& F);
-    Value *lookupNumber(BasicBlock *BB, uint32_t num);
+    Value *findLeader(BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
     bool splitCriticalEdges();
@@ -715,7 +501,11 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
   return new GVN(NoLoads);
 }
 
-INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
 void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   errs() << "{\n";
@@ -727,33 +517,6 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   errs() << "}\n";
 }
 
-static bool isSafeReplacement(PHINode* p, Instruction *inst) {
-  if (!isa<PHINode>(inst))
-    return true;
-
-  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
-       UI != E; ++UI)
-    if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
-      if (use_phi->getParent() == inst->getParent())
-        return false;
-
-  return true;
-}
-
-Value *GVN::CollapsePhi(PHINode *PN) {
-  Value *ConstVal = PN->hasConstantValue(DT);
-  if (!ConstVal) return 0;
-
-  Instruction *Inst = dyn_cast<Instruction>(ConstVal);
-  if (!Inst)
-    return ConstVal;
-
-  if (DT->dominates(Inst, PN))
-    if (isSafeReplacement(PN, Inst))
-      return Inst;
-  return 0;
-}
-
 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
 /// we're analyzing is fully available in the specified block.  As we go, keep
 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
@@ -937,47 +700,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
 }
 
-/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
-/// be expressed as a base pointer plus a constant offset.  Return the base and
-/// offset to the caller.
-static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                        const TargetData &TD) {
-  Operator *PtrOp = dyn_cast<Operator>(Ptr);
-  if (PtrOp == 0) return Ptr;
-  
-  // Just look through bitcasts.
-  if (PtrOp->getOpcode() == Instruction::BitCast)
-    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
-  
-  // If this is a GEP with constant indices, we can look through it.
-  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
-  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
-  
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
-       ++I, ++GTI) {
-    ConstantInt *OpC = cast<ConstantInt>(*I);
-    if (OpC->isZero()) continue;
-    
-    // Handle a struct and array indices which add their offset to the pointer.
-    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
-      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
-    } else {
-      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
-      Offset += OpC->getSExtValue()*Size;
-    }
-  }
-  
-  // Re-sign extend from the pointer size if needed to get overflow edge cases
-  // right.
-  unsigned PtrSize = TD.getPointerSizeInBits();
-  if (PtrSize < 64)
-    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
-  
-  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
-}
-
-
 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering memory write (store,
 /// memset, memcpy, memmove).  This means that the write *may* provide bits used
@@ -996,9 +718,8 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
     return -1;
   
   int64_t StoreOffset = 0, LoadOffset = 0;
-  Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
-  Value *LoadBase = 
-    GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
   if (StoreBase != LoadBase)
     return -1;
   
@@ -1020,8 +741,6 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
   // If the load and store don't overlap at all, the store doesn't provide
   // anything to the load.  In this case, they really don't alias at all, AA
   // must have gotten confused.
-  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
-  // remove this check, as it is duplicated with what we have below.
   uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
   
   if ((WriteSizeInBits & 7) | (LoadSize & 7))
@@ -1067,12 +786,12 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
                                           StoreInst *DepSI,
                                           const TargetData &TD) {
   // Cannot handle reading from store of first-class aggregate yet.
-  if (DepSI->getOperand(0)->getType()->isStructTy() ||
-      DepSI->getOperand(0)->getType()->isArrayTy())
+  if (DepSI->getValueOperand()->getType()->isStructTy() ||
+      DepSI->getValueOperand()->getType()->isArrayTy())
     return -1;
 
   Value *StorePtr = DepSI->getPointerOperand();
-  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+  uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
                                         StorePtr, StoreSize, TD);
 }
@@ -1099,7 +818,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
   Constant *Src = dyn_cast<Constant>(MTI->getSource());
   if (Src == 0) return -1;
   
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
   if (GV == 0 || !GV->isConstant()) return -1;
   
   // See if the access is within the bounds of the transfer.
@@ -1331,6 +1050,15 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   if (V->getType()->isPointerTy())
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       AA->copyValue(LI, NewPHIs[i]);
+    
+    // Now that we've copied information to the new PHIs, scan through
+    // them again and inform alias analysis that we've added potentially
+    // escaping uses to any values that are operands to these PHIs.
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+      PHINode *P = NewPHIs[i];
+      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
+        AA->addEscapingUse(P->getOperandUse(2*ii));
+    }
 
   return V;
 }
@@ -1347,8 +1075,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
                               SmallVectorImpl<Instruction*> &toErase) {
   // Find the non-local dependencies of the load.
   SmallVector<NonLocalDepResult, 64> Deps;
-  MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
-                                   Deps);
+  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
   //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
   //             << Deps.size() << *LI << '\n');
 
@@ -1376,8 +1104,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
   SmallVector<BasicBlock*, 16> UnavailableBlocks;
 
-  const TargetData *TD = 0;
-  
   for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
@@ -1392,14 +1118,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       // read by the load, we can extract the bits we need for the load from the
       // stored value.
       if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
-        if (TD == 0)
-          TD = getAnalysisIfAvailable<TargetData>();
         if (TD && Address) {
           int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
                                                       DepSI, *TD);
           if (Offset != -1) {
             ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
-                                                           DepSI->getOperand(0),
+                                                       DepSI->getValueOperand(),
                                                                 Offset));
             continue;
           }
@@ -1409,8 +1133,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       // If the clobbering value is a memset/memcpy/memmove, see if we can
       // forward a value on from it.
       if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
-        if (TD == 0)
-          TD = getAnalysisIfAvailable<TargetData>();
         if (TD && Address) {
           int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
                                                         DepMI, *TD);
@@ -1440,13 +1162,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
       // Reject loads and stores that are to the same address but are of
       // different types if we have to.
-      if (S->getOperand(0)->getType() != LI->getType()) {
-        if (TD == 0)
-          TD = getAnalysisIfAvailable<TargetData>();
-        
+      if (S->getValueOperand()->getType() != LI->getType()) {
         // If the stored value is larger or equal to the loaded value, we can
         // reuse it.
-        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
                                                         LI->getType(), *TD)) {
           UnavailableBlocks.push_back(DepBB);
           continue;
@@ -1454,16 +1173,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       }
 
       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
-                                                          S->getOperand(0)));
+                                                         S->getValueOperand()));
       continue;
     }
     
     if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
       // If the types mismatch and we can't handle it, reject reuse of the load.
       if (LD->getType() != LI->getType()) {
-        if (TD == 0)
-          TD = getAnalysisIfAvailable<TargetData>();
-        
         // If the stored value is larger or equal to the loaded value, we can
         // reuse it.
         if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
@@ -1533,26 +1249,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       return false;
     if (Blockers.count(TmpBB))
       return false;
+    
+    // If any of these blocks has more than one successor (i.e. if the edge we
+    // just traversed was critical), then there are other paths through this 
+    // block along which the load may not be anticipated.  Hoisting the load 
+    // above this block would be adding the load to execution paths along
+    // which it was not previously executed.
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
-      allSingleSucc = false;
+      return false;
   }
 
   assert(TmpBB);
   LoadBB = TmpBB;
 
-  // If we have a repl set with LI itself in it, this means we have a loop where
-  // at least one of the values is LI.  Since this means that we won't be able
-  // to eliminate LI even if we insert uses in the other predecessors, we will
-  // end up increasing code size.  Reject this by scanning for LI.
-  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-    if (ValuesPerBlock[i].isSimpleValue() &&
-        ValuesPerBlock[i].getSimpleValue() == LI) {
-      // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
-      if (!EnableFullLoadPRE || e == 1)
-        return false;
-    }
-  }
-
   // FIXME: It is extremely unclear what this loop is doing, other than
   // artificially restricting loadpre.
   if (isSinglePred) {
@@ -1612,14 +1321,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   unsigned NumUnavailablePreds = PredLoads.size();
   assert(NumUnavailablePreds != 0 &&
          "Fully available value should be eliminated above!");
-  if (!EnableFullLoadPRE) {
-    // If this load is unavailable in multiple predecessors, reject it.
-    // FIXME: If we could restructure the CFG, we could make a common pred with
-    // all the preds that don't have an available LI and insert a new load into
-    // that one block.
-    if (NumUnavailablePreds != 1)
+  
+  // If this load is unavailable in multiple predecessors, reject it.
+  // FIXME: If we could restructure the CFG, we could make a common pred with
+  // all the preds that don't have an available LI and insert a new load into
+  // that one block.
+  if (NumUnavailablePreds != 1)
       return false;
-  }
 
   // Check if the load can safely be moved to all the unavailable predecessors.
   bool CanDoPRE = true;
@@ -1634,7 +1342,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     // If all preds have a single successor, then we know it is safe to insert
     // the load on the pred (?!?), so we can insert code to materialize the
     // pointer if it is not available.
-    PHITransAddr Address(LI->getOperand(0), TD);
+    PHITransAddr Address(LI->getPointerOperand(), TD);
     Value *LoadPtr = 0;
     if (allSingleSucc) {
       LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
@@ -1648,7 +1356,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     // we fail PRE.
     if (LoadPtr == 0) {
       DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
-            << *LI->getOperand(0) << "\n");
+            << *LI->getPointerOperand() << "\n");
       CanDoPRE = false;
       break;
     }
@@ -1657,8 +1365,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     //  @1 = getelementptr (i8* p, ...
     //  test p and branch if == 0
     //  load @1
-    // It is valid to have the getelementptr before the test, even if p can be 0,
-    // as getelementptr only does address arithmetic.
+    // It is valid to have the getelementptr before the test, even if p can
+    // be 0, as getelementptr only does address arithmetic.
     // If we are not pushing the value through any multiple-successor blocks
     // we do not have this case.  Otherwise, check that the load is safe to
     // put anywhere; this can be improved, but should be conservatively safe.
@@ -1675,8 +1383,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   }
 
   if (!CanDoPRE) {
-    while (!NewInsts.empty())
-      NewInsts.pop_back_val()->eraseFromParent();
+    while (!NewInsts.empty()) {
+      Instruction *I = NewInsts.pop_back_val();
+      if (MD) MD->removeInstruction(I);
+      I->eraseFromParent();
+    }
     return false;
   }
 
@@ -1702,9 +1413,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     BasicBlock *UnavailablePred = I->first;
     Value *LoadPtr = I->second;
 
-    Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
-                                  LI->getAlignment(),
-                                  UnavailablePred->getTerminator());
+    Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+                                        LI->getAlignment(),
+                                        UnavailablePred->getTerminator());
+
+    // Transfer the old load's TBAA tag to the new load.
+    if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+      NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
 
     // Add the newly created load.
     ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
@@ -1753,19 +1468,19 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // access code.
     Value *AvailVal = 0;
     if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
-      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+      if (TD) {
         int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
                                                     L->getPointerOperand(),
                                                     DepSI, *TD);
         if (Offset != -1)
-          AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
                                           L->getType(), L, *TD);
       }
     
     // If the clobbering value is a memset/memcpy/memmove, see if we can forward
     // a value on from it.
     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
-      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+      if (TD) {
         int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
                                                       L->getPointerOperand(),
                                                       DepMI, *TD);
@@ -1804,14 +1519,13 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
 
   Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
-    Value *StoredVal = DepSI->getOperand(0);
+    Value *StoredVal = DepSI->getValueOperand();
     
     // The store and load are to a must-aliased pointer, but they may not
     // actually have the same type.  See if we know how to reuse the stored
     // value (depending on its type).
-    const TargetData *TD = 0;
     if (StoredVal->getType() != L->getType()) {
-      if ((TD = getAnalysisIfAvailable<TargetData>())) {
+      if (TD) {
         StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
                                                    L, *TD);
         if (StoredVal == 0)
@@ -1840,9 +1554,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // The loads are of a must-aliased pointer, but they may not actually have
     // the same type.  See if we know how to reuse the previously loaded value
     // (depending on its type).
-    const TargetData *TD = 0;
     if (DepLI->getType() != L->getType()) {
-      if ((TD = getAnalysisIfAvailable<TargetData>())) {
+      if (TD) {
         AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
         if (AvailableVal == 0)
           return false;
@@ -1890,20 +1603,32 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   return false;
 }
 
-Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
-  DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
-  if (I == localAvail.end())
-    return 0;
-
-  ValueNumberScope *Locals = I->second;
-  while (Locals) {
-    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
-    if (I != Locals->table.end())
-      return I->second;
-    Locals = Locals->parent;
+// findLeader - In order to find a leader for a given value number at a 
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in 
+// question.  This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+  LeaderTableEntry Vals = LeaderTable[num];
+  if (!Vals.Val) return 0;
+  
+  Value *Val = 0;
+  if (DT->dominates(Vals.BB, BB)) {
+    Val = Vals.Val;
+    if (isa<Constant>(Val)) return Val;
+  }
+  
+  LeaderTableEntry* Next = Vals.Next;
+  while (Next) {
+    if (DT->dominates(Next->BB, BB)) {
+      if (isa<Constant>(Next->Val)) return Next->Val;
+      if (!Val) Val = Next->Val;
+    }
+    
+    Next = Next->Next;
   }
 
-  return 0;
+  return Val;
 }
 
 
@@ -1915,85 +1640,92 @@ bool GVN::processInstruction(Instruction *I,
   if (isa<DbgInfoIntrinsic>(I))
     return false;
 
+  // If the instruction can be easily simplified then do so now in preference
+  // to value numbering it.  Value numbering often exposes redundancies, for
+  // example if it determines that %y is equal to %x then the instruction
+  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+  if (Value *V = SimplifyInstruction(I, TD, DT)) {
+    I->replaceAllUsesWith(V);
+    if (MD && V->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    VN.erase(I);
+    toErase.push_back(I);
+    return true;
+  }
+
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     bool Changed = processLoad(LI, toErase);
 
     if (!Changed) {
       unsigned Num = VN.lookup_or_add(LI);
-      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
+      addToLeaderTable(Num, LI, LI->getParent());
     }
 
     return Changed;
   }
 
-  uint32_t NextNum = VN.getNextUnusedValueNumber();
-  unsigned Num = VN.lookup_or_add(I);
-
+  // For conditions branches, we can perform simple conditional propagation on
+  // the condition value itself.
   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
       return false;
-
+    
     Value *BranchCond = BI->getCondition();
     uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+  
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
-
+  
     if (TrueSucc->getSinglePredecessor())
-      localAvail[TrueSucc]->table[CondVN] =
-        ConstantInt::getTrue(TrueSucc->getContext());
+      addToLeaderTable(CondVN,
+                   ConstantInt::getTrue(TrueSucc->getContext()),
+                   TrueSucc);
     if (FalseSucc->getSinglePredecessor())
-      localAvail[FalseSucc]->table[CondVN] =
-        ConstantInt::getFalse(TrueSucc->getContext());
-
+      addToLeaderTable(CondVN,
+                   ConstantInt::getFalse(TrueSucc->getContext()),
+                   FalseSucc);
+    
     return false;
+  }
+  
+  // Instructions with void type don't return a value, so there's
+  // no point in trying to find redudancies in them.
+  if (I->getType()->isVoidTy()) return false;
+  
+  uint32_t NextNum = VN.getNextUnusedValueNumber();
+  unsigned Num = VN.lookup_or_add(I);
 
   // Allocations are always uniquely numbered, so we can save time and memory
   // by fast failing them.
-  } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+    addToLeaderTable(Num, I, I->getParent());
     return false;
   }
 
-  // Collapse PHI nodes
-  if (PHINode* p = dyn_cast<PHINode>(I)) {
-    Value *constVal = CollapsePhi(p);
-
-    if (constVal) {
-      p->replaceAllUsesWith(constVal);
-      if (MD && constVal->getType()->isPointerTy())
-        MD->invalidateCachedPointerInfo(constVal);
-      VN.erase(p);
-
-      toErase.push_back(p);
-    } else {
-      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-    }
-
   // If the number we were assigned was a brand new VN, then we don't
   // need to do a lookup to see if the number already exists
   // somewhere in the domtree: it can't!
-  } else if (Num == NextNum) {
-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
+  if (Num == NextNum) {
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+  
   // Perform fast-path value-number based elimination of values inherited from
   // dominators.
-  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
-    // Remove it!
-    VN.erase(I);
-    I->replaceAllUsesWith(repl);
-    if (MD && repl->getType()->isPointerTy())
-      MD->invalidateCachedPointerInfo(repl);
-    toErase.push_back(I);
-    return true;
-
-  } else {
-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+  Value *repl = findLeader(I->getParent(), Num);
+  if (repl == 0) {
+    // Failure, just remember this instance for future use.
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
   }
-
-  return false;
+  
+  // Remove it!
+  VN.erase(I);
+  I->replaceAllUsesWith(repl);
+  if (MD && repl->getType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(repl);
+  toErase.push_back(I);
+  return true;
 }
 
 /// runOnFunction - This is the main transformation entry point for a function.
@@ -2001,6 +1733,7 @@ bool GVN::runOnFunction(Function& F) {
   if (!NoLoads)
     MD = &getAnalysis<MemoryDependenceAnalysis>();
   DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<TargetData>();
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
@@ -2011,8 +1744,8 @@ bool GVN::runOnFunction(Function& F) {
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
-    BasicBlock *BB = FI;
-    ++FI;
+    BasicBlock *BB = FI++;
+    
     bool removedBlock = MergeBlockIntoPredecessor(BB, this);
     if (removedBlock) ++NumGVNBlocks;
 
@@ -2020,7 +1753,6 @@ bool GVN::runOnFunction(Function& F) {
   }
 
   unsigned Iteration = 0;
-
   while (ShouldContinue) {
     DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
     ShouldContinue = iterateOnFunction(F);
@@ -2138,20 +1870,19 @@ bool GVN::performPRE(Function &F) {
         if (P == CurrentBlock) {
           NumWithout = 2;
           break;
-        } else if (!localAvail.count(P))  {
+        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
           NumWithout = 2;
           break;
         }
 
-        DenseMap<uint32_t, Value*>::iterator predV =
-                                            localAvail[P]->table.find(ValNo);
-        if (predV == localAvail[P]->table.end()) {
+        Value* predV = findLeader(P, ValNo);
+        if (predV == 0) {
           PREPred = P;
           ++NumWithout;
-        } else if (predV->second == CurInst) {
+        } else if (predV == CurInst) {
           NumWithout = 2;
         } else {
-          predMap[P] = predV->second;
+          predMap[P] = predV;
           ++NumWith;
         }
       }
@@ -2186,7 +1917,7 @@ bool GVN::performPRE(Function &F) {
         if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
           continue;
 
-        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
+        if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
           PREInstr->setOperand(i, V);
         } else {
           success = false;
@@ -2210,7 +1941,7 @@ bool GVN::performPRE(Function &F) {
       ++NumGVNPRE;
 
       // Update the availability map to include the new instruction.
-      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+      addToLeaderTable(ValNo, PREInstr, PREPred);
 
       // Create a PHI to make the value available in this block.
       PHINode* Phi = PHINode::Create(CurInst->getType(),
@@ -2223,12 +1954,21 @@ bool GVN::performPRE(Function &F) {
       }
 
       VN.add(Phi, ValNo);
-      localAvail[CurrentBlock]->table[ValNo] = Phi;
+      addToLeaderTable(ValNo, Phi, CurrentBlock);
 
       CurInst->replaceAllUsesWith(Phi);
-      if (MD && Phi->getType()->isPointerTy())
-        MD->invalidateCachedPointerInfo(Phi);
+      if (Phi->getType()->isPointerTy()) {
+        // Because we have added a PHI-use of the pointer value, it has now
+        // "escaped" from alias analysis' perspective.  We need to inform
+        // AA of this.
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+        
+        if (MD)
+          MD->invalidateCachedPointerInfo(Phi);
+      }
       VN.erase(CurInst);
+      removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
 
       DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
       if (MD) MD->removeInstruction(CurInst);
@@ -2260,16 +2000,7 @@ bool GVN::splitCriticalEdges() {
 /// iterateOnFunction - Executes one iteration of GVN
 bool GVN::iterateOnFunction(Function &F) {
   cleanupGlobalSets();
-
-  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
-       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
-    if (DI->getIDom())
-      localAvail[DI->getBlock()] =
-                   new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
-    else
-      localAvail[DI->getBlock()] = new ValueNumberScope(0);
-  }
-
+  
   // Top-down walk of the dominator tree
   bool Changed = false;
 #if 0
@@ -2289,11 +2020,8 @@ bool GVN::iterateOnFunction(Function &F) {
 
 void GVN::cleanupGlobalSets() {
   VN.clear();
-
-  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
-       I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
-    delete I->second;
-  localAvail.clear();
+  LeaderTable.clear();
+  TableAllocator.Reset();
 }
 
 /// verifyRemoved - Verify that the specified instruction does not occur in our
@@ -2303,17 +2031,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
 
   // Walk through the value number scope to make sure the instruction isn't
   // ferreted away in it.
-  for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
-         I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
-    const ValueNumberScope *VNS = I->second;
-
-    while (VNS) {
-      for (DenseMap<uint32_t, Value*>::const_iterator
-             II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
-        assert(II->second != Inst && "Inst still in value numbering scope!");
-      }
-
-      VNS = VNS->parent;
+  for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+       I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+    const LeaderTableEntry *Node = &I->second;
+    assert(Node->Val != Inst && "Inst still in value numbering scope!");
+    
+    while (Node->Next) {
+      Node = Node->Next;
+      assert(Node->Val != Inst && "Inst still in value numbering scope!");
     }
   }
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index af2eafc47cbf..0fb67982a3db 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,9 @@ namespace {
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID) {}
+    IndVarSimplify() : LoopPass(ID) {
+      initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -117,8 +119,16 @@ namespace {
 }
 
 char IndVarSimplify::ID = 0;
-INITIALIZE_PASS(IndVarSimplify, "indvars",
-                "Canonicalize Induction Variables", false, false);
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false)
 
 Pass *llvm::createIndVarSimplifyPass() {
   return new IndVarSimplify();
@@ -190,7 +200,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   }
 
   // Expand the code for the iteration count.
-  assert(RHS->isLoopInvariant(L) &&
+  assert(SE->isLoopInvariant(RHS, L) &&
          "Computed iteration count is not loop invariant!");
   Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
 
@@ -233,8 +243,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 /// happen later, except that it's more powerful in some cases, because it's
 /// able to brute-force evaluate arbitrary instructions as long as they have
 /// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L,
-                                           SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   // Verify the input to the pass in already in LCSSA form.
   assert(L->isLCSSAForm(*DT));
 
@@ -292,7 +301,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         // and varies predictably *inside* the loop.  Evaluate the value it
         // contains when the loop exits, if possible.
         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
-        if (!ExitValue->isLoopInvariant(L))
+        if (!SE->isLoopInvariant(ExitValue, L))
           continue;
 
         Changed = true;
@@ -338,7 +347,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
   // If there are, change them into integer recurrences, permitting analysis by
   // the SCEV routines.
   //
-  BasicBlock *Header    = L->getHeader();
+  BasicBlock *Header = L->getHeader();
 
   SmallVector<WeakVH, 8> PHIs;
   for (BasicBlock::iterator I = Header->begin();
@@ -346,7 +355,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
     PHIs.push_back(PN);
 
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
-    if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
       HandleFloatingPointIV(L, PN);
 
   // If the loop previously had floating-point IV, ScalarEvolution
@@ -395,7 +404,7 @@ void IndVarSimplify::EliminateIVComparisons() {
   // which are now dead.
   while (!DeadInsts.empty())
     if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+        dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
@@ -462,7 +471,7 @@ void IndVarSimplify::EliminateIVRemainders() {
   // which are now dead.
   while (!DeadInsts.empty())
     if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
@@ -607,9 +616,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 // currently can only reduce affine polynomials.  For now just disable
 // indvar subst on anything more complex than an affine addrec, unless
 // it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L) {
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
   // Loop-invariant values are safe.
-  if (S->isLoopInvariant(L)) return true;
+  if (SE->isLoopInvariant(S, L)) return true;
 
   // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
   // to transform them into efficient code.
@@ -620,18 +629,18 @@ static bool isSafe(const SCEV *S, const Loop *L) {
   if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
     for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
          E = Commutative->op_end(); I != E; ++I)
-      if (!isSafe(*I, L)) return false;
+      if (!isSafe(*I, L, SE)) return false;
     return true;
   }
   
   // A cast is safe if its operand is.
   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
-    return isSafe(C->getOperand(), L);
+    return isSafe(C->getOperand(), L, SE);
 
   // A udiv is safe if its operands are.
   if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
-    return isSafe(UD->getLHS(), L) &&
-           isSafe(UD->getRHS(), L);
+    return isSafe(UD->getLHS(), L, SE) &&
+           isSafe(UD->getRHS(), L, SE);
 
   // SCEVUnknown is always safe.
   if (isa<SCEVUnknown>(S))
@@ -662,7 +671,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // Evaluate the expression out of the loop, if possible.
     if (!L->contains(UI->getUser())) {
       const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-      if (ExitVal->isLoopInvariant(L))
+      if (SE->isLoopInvariant(ExitVal, L))
         AR = ExitVal;
     }
 
@@ -672,7 +681,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // currently can only reduce affine polynomials.  For now just disable
     // indvar subst on anything more complex than an affine addrec, unless
     // it can be expanded to a trivial value.
-    if (!isSafe(AR, L))
+    if (!isSafe(AR, L, SE))
       continue;
 
     // Determine the insertion point for this user. By default, insert
@@ -725,7 +734,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   // which are now dead.
   while (!DeadInsts.empty())
     if (Instruction *Inst =
-          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst);
 }
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 104d5aecbdd3..90094a8da257 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -40,20 +40,22 @@ STATISTIC(NumFolds,   "Number of terminators folded");
 STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");
 
 static cl::opt<unsigned>
-Threshold("jump-threading-threshold", 
+Threshold("jump-threading-threshold",
           cl::desc("Max block size to duplicate for jump threading"),
           cl::init(6), cl::Hidden);
 
-// Turn on use of LazyValueInfo.
-static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi",
-          cl::desc("Use LVI for jump threading"),
-          cl::init(true),
-          cl::ReallyHidden);
-
-
-
 namespace {
+  // These are at global scope so static functions can use them too.
+  typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+  typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+  // This is used to keep track of what kind of constant we're currently hoping
+  // to find.
+  enum ConstantPreference {
+    WantInteger,
+    WantBlockAddress
+  };
+
   /// This pass performs 'jump threading', which looks at blocks that have
   /// multiple predecessors and multiple successors.  If one or more of the
   /// predecessors of the block can be proven to always jump to one of the
@@ -79,61 +81,59 @@ namespace {
     SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
 #endif
     DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
-    
+
     // RAII helper for updating the recursion stack.
     struct RecursionSetRemover {
       DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
       std::pair<Value*, BasicBlock*> ThePair;
-      
+
       RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
                           std::pair<Value*, BasicBlock*> P)
         : TheSet(S), ThePair(P) { }
-      
+
       ~RecursionSetRemover() {
         TheSet.erase(ThePair);
       }
     };
   public:
     static char ID; // Pass identification
-    JumpThreading() : FunctionPass(ID) {}
+    JumpThreading() : FunctionPass(ID) {
+      initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
-    
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      if (EnableLVI) {
-        AU.addRequired<LazyValueInfo>();
-        AU.addPreserved<LazyValueInfo>();
-      }
+      AU.addRequired<LazyValueInfo>();
+      AU.addPreserved<LazyValueInfo>();
     }
-    
+
     void FindLoopHeaders(Function &F);
     bool ProcessBlock(BasicBlock *BB);
     bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
                     BasicBlock *SuccBB);
     bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
                                   const SmallVectorImpl<BasicBlock *> &PredBBs);
-    
-    typedef SmallVectorImpl<std::pair<ConstantInt*,
-                                      BasicBlock*> > PredValueInfo;
-    
+
     bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
-                                         PredValueInfo &Result);
-    bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
-    
-    
-    bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
-    bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+                                         PredValueInfo &Result,
+                                         ConstantPreference Preference);
+    bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+                                ConstantPreference Preference);
 
     bool ProcessBranchOnPHI(PHINode *PN);
     bool ProcessBranchOnXOR(BinaryOperator *BO);
-    
+
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
   };
 }
 
 char JumpThreading::ID = 0;
-INITIALIZE_PASS(JumpThreading, "jump-threading",
-                "Jump Threading", false, false);
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
 
 // Public interface to the Jump Threading pass
 FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -143,21 +143,21 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 bool JumpThreading::runOnFunction(Function &F) {
   DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TD = getAnalysisIfAvailable<TargetData>();
-  LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
-  
+  LVI = &getAnalysis<LazyValueInfo>();
+
   FindLoopHeaders(F);
-  
+
   bool Changed, EverChanged = false;
   do {
     Changed = false;
     for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
       BasicBlock *BB = I;
-      // Thread all of the branches we can over this block. 
+      // Thread all of the branches we can over this block.
       while (ProcessBlock(BB))
         Changed = true;
-      
+
       ++I;
-      
+
       // If the block is trivially dead, zap it.  This eliminates the successor
       // edges which simplifies the CFG.
       if (pred_begin(BB) == pred_end(BB) &&
@@ -165,48 +165,46 @@ bool JumpThreading::runOnFunction(Function &F) {
         DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
               << "' with terminator: " << *BB->getTerminator() << '\n');
         LoopHeaders.erase(BB);
-        if (LVI) LVI->eraseBlock(BB);
+        LVI->eraseBlock(BB);
         DeleteDeadBlock(BB);
         Changed = true;
-      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-        // Can't thread an unconditional jump, but if the block is "almost
-        // empty", we can replace uses of it with uses of the successor and make
-        // this dead.
-        if (BI->isUnconditional() && 
-            BB != &BB->getParent()->getEntryBlock()) {
-          BasicBlock::iterator BBI = BB->getFirstNonPHI();
-          // Ignore dbg intrinsics.
-          while (isa<DbgInfoIntrinsic>(BBI))
-            ++BBI;
+        continue;
+      }
+      
+      BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+      
+      // Can't thread an unconditional jump, but if the block is "almost
+      // empty", we can replace uses of it with uses of the successor and make
+      // this dead.
+      if (BI && BI->isUnconditional() &&
+          BB != &BB->getParent()->getEntryBlock() &&
           // If the terminator is the only non-phi instruction, try to nuke it.
-          if (BBI->isTerminator()) {
-            // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
-            // block, we have to make sure it isn't in the LoopHeaders set.  We
-            // reinsert afterward if needed.
-            bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
-            BasicBlock *Succ = BI->getSuccessor(0);
-            
-            // FIXME: It is always conservatively correct to drop the info
-            // for a block even if it doesn't get erased.  This isn't totally
-            // awesome, but it allows us to use AssertingVH to prevent nasty
-            // dangling pointer issues within LazyValueInfo.
-            if (LVI) LVI->eraseBlock(BB);
-            if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
-              Changed = true;
-              // If we deleted BB and BB was the header of a loop, then the
-              // successor is now the header of the loop.
-              BB = Succ;
-            }
-            
-            if (ErasedFromLoopHeaders)
-              LoopHeaders.insert(BB);
-          }
+          BB->getFirstNonPHIOrDbg()->isTerminator()) {
+        // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+        // block, we have to make sure it isn't in the LoopHeaders set.  We
+        // reinsert afterward if needed.
+        bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+        BasicBlock *Succ = BI->getSuccessor(0);
+
+        // FIXME: It is always conservatively correct to drop the info
+        // for a block even if it doesn't get erased.  This isn't totally
+        // awesome, but it allows us to use AssertingVH to prevent nasty
+        // dangling pointer issues within LazyValueInfo.
+        LVI->eraseBlock(BB);
+        if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+          Changed = true;
+          // If we deleted BB and BB was the header of a loop, then the
+          // successor is now the header of the loop.
+          BB = Succ;
         }
+
+        if (ErasedFromLoopHeaders)
+          LoopHeaders.insert(BB);
       }
     }
     EverChanged |= Changed;
   } while (Changed);
-  
+
   LoopHeaders.clear();
   return EverChanged;
 }
@@ -216,25 +214,25 @@ bool JumpThreading::runOnFunction(Function &F) {
 static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
   /// Ignore PHI nodes, these will be flattened when duplication happens.
   BasicBlock::const_iterator I = BB->getFirstNonPHI();
-  
+
   // FIXME: THREADING will delete values that are just used to compute the
   // branch, so they shouldn't count against the duplication cost.
-  
-  
+
+
   // Sum up the cost of each instruction until we get to the terminator.  Don't
   // include the terminator because the copy won't include it.
   unsigned Size = 0;
   for (; !isa<TerminatorInst>(I); ++I) {
     // Debugger intrinsics don't incur code size.
     if (isa<DbgInfoIntrinsic>(I)) continue;
-    
+
     // If this is a pointer->pointer bitcast, it is free.
     if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
       continue;
-    
+
     // All other instructions count for at least one unit.
     ++Size;
-    
+
     // Calls are more expensive.  If they are non-intrinsic calls, we model them
     // as having cost of 4.  If they are a non-vector intrinsic, we model them
     // as having cost of 2 total, and if they are a vector intrinsic, we model
@@ -246,12 +244,16 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
         Size += 1;
     }
   }
-  
+
   // Threading through a switch statement is particularly profitable.  If this
   // block ends in a switch, decrease its cost to make it more likely to happen.
   if (isa<SwitchInst>(I))
     Size = Size > 6 ? Size-6 : 0;
-  
+
+  // The same holds for indirect branches, but slightly more so.
+  if (isa<IndirectBrInst>(I))
+    Size = Size > 8 ? Size-8 : 0;
+
   return Size;
 }
 
@@ -273,57 +275,64 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
 void JumpThreading::FindLoopHeaders(Function &F) {
   SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
   FindFunctionBackedges(F, Edges);
-  
+
   for (unsigned i = 0, e = Edges.size(); i != e; ++i)
     LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
 }
 
-// Helper method for ComputeValueKnownInPredecessors.  If Value is a
-// ConstantInt, push it.  If it's an undef, push 0.  Otherwise, do nothing.
-static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
-                                                        BasicBlock*> > &Result,
-                              Constant *Value, BasicBlock* BB){
-  if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
-    Result.push_back(std::make_pair(FoldedCInt, BB));
-  else if (isa<UndefValue>(Value))
-    Result.push_back(std::make_pair((ConstantInt*)0, BB));
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+  if (!Val)
+    return 0;
+
+  // Undef is "known" enough.
+  if (UndefValue *U = dyn_cast<UndefValue>(Val))
+    return U;
+
+  if (Preference == WantBlockAddress)
+    return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+  return dyn_cast<ConstantInt>(Val);
 }
 
 /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
-/// if we can infer that the value is a known ConstantInt in any of our
-/// predecessors.  If so, return the known list of value and pred BB in the
-/// result vector.  If a value is known to be undef, it is returned as null.
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors.  If so, return the known list of value and pred
+/// BB in the result vector.
 ///
 /// This returns true if there were any known values.
 ///
 bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+                                ConstantPreference Preference) {
   // This method walks up use-def chains recursively.  Because of this, we could
   // get into an infinite loop going around loops in the use-def chain.  To
   // prevent this, keep track of what (value, block) pairs we've already visited
   // and terminate the search if we loop back to them
   if (!RecursionSet.insert(std::make_pair(V, BB)).second)
     return false;
-  
+
   // An RAII help to remove this pair from the recursion set once the recursion
   // stack pops back out again.
   RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-  
-  // If V is a constantint, then it is known in all predecessors.
-  if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
-    ConstantInt *CI = dyn_cast<ConstantInt>(V);
-    
+
+  // If V is a constant, then it is known in all predecessors.
+  if (Constant *KC = getKnownConstant(V, Preference)) {
     for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-      Result.push_back(std::make_pair(CI, *PI));
-    
+      Result.push_back(std::make_pair(KC, *PI));
+
     return true;
   }
-  
+
   // If V is a non-instruction value, or an instruction in a different block,
   // then it can't be derived from a PHI.
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0 || I->getParent() != BB) {
-    
+
     // Okay, if this is a live-in value, see if it has a known value at the end
     // of any of our predecessors.
     //
@@ -331,82 +340,78 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
     /// TODO: Per PR2563, we could infer value range information about a
     /// predecessor based on its terminator.
     //
-    if (LVI) {
-      // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
-      // "I" is a non-local compare-with-a-constant instruction.  This would be
-      // able to handle value inequalities better, for example if the compare is
-      // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
-      // Perhaps getConstantOnEdge should be smart enough to do this?
-      
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        // If the value is known by LazyValueInfo to be a constant in a
-        // predecessor, use that information to try to thread this block.
-        Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
-        if (PredCst == 0 ||
-            (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
-          continue;
-        
-        Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
-      }
-      
-      return !Result.empty();
+    // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+    // "I" is a non-local compare-with-a-constant instruction.  This would be
+    // able to handle value inequalities better, for example if the compare is
+    // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+    // Perhaps getConstantOnEdge should be smart enough to do this?
+
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      // If the value is known by LazyValueInfo to be a constant in a
+      // predecessor, use that information to try to thread this block.
+      Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+      if (Constant *KC = getKnownConstant(PredCst, Preference))
+        Result.push_back(std::make_pair(KC, P));
     }
-    
-    return false;
+
+    return !Result.empty();
   }
-  
+
   /// If I is a PHI node, then we know the incoming values for any constants.
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       Value *InVal = PN->getIncomingValue(i);
-      if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
-        ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
-        Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
-      } else if (LVI) {
+      if (Constant *KC = getKnownConstant(InVal, Preference)) {
+        Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+      } else {
         Constant *CI = LVI->getConstantOnEdge(InVal,
                                               PN->getIncomingBlock(i), BB);
-        // LVI returns null is no value could be determined.
-        if (!CI) continue;
-        PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
+        if (Constant *KC = getKnownConstant(CI, Preference))
+          Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
       }
     }
-    
+
     return !Result.empty();
   }
-  
-  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+  PredValueInfoTy LHSVals, RHSVals;
 
   // Handle some boolean conditions.
-  if (I->getType()->getPrimitiveSizeInBits() == 1) { 
+  if (I->getType()->getPrimitiveSizeInBits() == 1) {
+    assert(Preference == WantInteger && "One-bit non-integer type?");
     // X | true -> true
     // X & false -> false
     if (I->getOpcode() == Instruction::Or ||
         I->getOpcode() == Instruction::And) {
-      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
-      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
-      
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+                                      WantInteger);
+      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+                                      WantInteger);
+
       if (LHSVals.empty() && RHSVals.empty())
         return false;
-      
+
       ConstantInt *InterestingVal;
       if (I->getOpcode() == Instruction::Or)
         InterestingVal = ConstantInt::getTrue(I->getContext());
       else
         InterestingVal = ConstantInt::getFalse(I->getContext());
-      
+
       SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
-      
+
       // Scan for the sentinel.  If we find an undef, force it to the
       // interesting value: x|undef -> true and x&undef -> false.
       for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
-        if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
+        if (LHSVals[i].first == InterestingVal ||
+            isa<UndefValue>(LHSVals[i].first)) {
           Result.push_back(LHSVals[i]);
           Result.back().first = InterestingVal;
           LHSKnownBBs.insert(LHSVals[i].second);
         }
       for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
-        if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
+        if (RHSVals[i].first == InterestingVal ||
+            isa<UndefValue>(RHSVals[i].first)) {
           // If we already inferred a value for this block on the LHS, don't
           // re-add it.
           if (!LHSKnownBBs.count(RHSVals[i].second)) {
@@ -414,48 +419,51 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
             Result.back().first = InterestingVal;
           }
         }
-      
+
       return !Result.empty();
     }
-    
+
     // Handle the NOT form of XOR.
     if (I->getOpcode() == Instruction::Xor &&
         isa<ConstantInt>(I->getOperand(1)) &&
         cast<ConstantInt>(I->getOperand(1))->isOne()) {
-      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+                                      WantInteger);
       if (Result.empty())
         return false;
 
       // Invert the known values.
       for (unsigned i = 0, e = Result.size(); i != e; ++i)
-        if (Result[i].first)
-          Result[i].first =
-            cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
-      
+        Result[i].first = ConstantExpr::getNot(Result[i].first);
+
       return true;
     }
-  
+
   // Try to simplify some other binary operator values.
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    assert(Preference != WantBlockAddress
+            && "A binary operator creating a block address?");
     if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-      SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
-      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
-    
+      PredValueInfoTy LHSVals;
+      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+                                      WantInteger);
+
       // Try to use constant folding to simplify the binary operator.
       for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
-        Constant *V = LHSVals[i].first ? LHSVals[i].first :
-                                 cast<Constant>(UndefValue::get(BO->getType()));
+        Constant *V = LHSVals[i].first;
         Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
-        
-        PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+
+        if (Constant *KC = getKnownConstant(Folded, WantInteger))
+          Result.push_back(std::make_pair(KC, LHSVals[i].second));
       }
     }
-      
+
     return !Result.empty();
   }
-  
+
   // Handle compare with phi operand, where the PHI is defined in this block.
   if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    assert(Preference == WantInteger && "Compares only produce integers");
     PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
     if (PN && PN->getParent() == BB) {
       // We can do this simplification if any comparisons fold to true or false.
@@ -464,32 +472,31 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
         BasicBlock *PredBB = PN->getIncomingBlock(i);
         Value *LHS = PN->getIncomingValue(i);
         Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
-        
+
         Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
         if (Res == 0) {
-          if (!LVI || !isa<Constant>(RHS))
+          if (!isa<Constant>(RHS))
             continue;
-          
-          LazyValueInfo::Tristate 
+
+          LazyValueInfo::Tristate
             ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
                                            cast<Constant>(RHS), PredBB, BB);
           if (ResT == LazyValueInfo::Unknown)
             continue;
           Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
         }
-        
-        if (Constant *ConstRes = dyn_cast<Constant>(Res))
-          PushConstantIntOrUndef(Result, ConstRes, PredBB);
+
+        if (Constant *KC = getKnownConstant(Res, WantInteger))
+          Result.push_back(std::make_pair(KC, PredBB));
       }
-      
+
       return !Result.empty();
     }
-    
-    
+
+
     // If comparing a live-in value against a constant, see if we know the
     // live-in value on any predecessors.
-    if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
-        Cmp->getType()->isIntegerTy()) {
+    if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
       if (!isa<Instruction>(Cmp->getOperand(0)) ||
           cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
         Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
@@ -505,44 +512,74 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
             continue;
 
           Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
-          Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+          Result.push_back(std::make_pair(ResC, P));
         }
 
         return !Result.empty();
       }
-      
+
       // Try to find a constant value for the LHS of a comparison,
       // and evaluate it statically if we can.
       if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
-        SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
-        ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
-        
+        PredValueInfoTy LHSVals;
+        ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+                                        WantInteger);
+
         for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
-          Constant *V = LHSVals[i].first ? LHSVals[i].first :
-                           cast<Constant>(UndefValue::get(CmpConst->getType()));
+          Constant *V = LHSVals[i].first;
           Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
                                                       V, CmpConst);
-          PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+          if (Constant *KC = getKnownConstant(Folded, WantInteger))
+            Result.push_back(std::make_pair(KC, LHSVals[i].second));
         }
-        
+
         return !Result.empty();
       }
     }
   }
-  
-  if (LVI) {
-    // If all else fails, see if LVI can figure out a constant value for us.
-    Constant *CI = LVI->getConstant(V, BB);
-    ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
-    if (CInt) {
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-        Result.push_back(std::make_pair(CInt, *PI));
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+    // Handle select instructions where at least one operand is a known constant
+    // and we can figure out the condition value for any predecessor block.
+    Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+    Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+    PredValueInfoTy Conds;
+    if ((TrueVal || FalseVal) &&
+        ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+                                        WantInteger)) {
+      for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+        Constant *Cond = Conds[i].first;
+
+        // Figure out what value to use for the condition.
+        bool KnownCond;
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+          // A known boolean.
+          KnownCond = CI->isOne();
+        } else {
+          assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+          // Either operand will do, so be sure to pick the one that's a known
+          // constant.
+          // FIXME: Do this more cleverly if both values are known constants?
+          KnownCond = (TrueVal != 0);
+        }
+
+        // See if the select has a known constant value for this predecessor.
+        if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+          Result.push_back(std::make_pair(Val, Conds[i].second));
+      }
+
+      return !Result.empty();
     }
-    
-    return !Result.empty();
   }
-  
-  return false;
+
+  // If all else fails, see if LVI can figure out a constant value for us.
+  Constant *CI = LVI->getConstant(V, BB);
+  if (Constant *KC = getKnownConstant(CI, Preference)) {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(KC, *PI));
+  }
+
+  return !Result.empty();
 }
 
 
@@ -565,10 +602,20 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
     if (NumPreds < MinNumPreds)
       MinSucc = i;
   }
-  
+
   return MinSucc;
 }
 
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+  if (!BB->hasAddressTaken()) return false;
+  
+  // If the block has its address taken, it may be a tree of dead constants
+  // hanging off of it.  These shouldn't keep the block alive.
+  BlockAddress *BA = BlockAddress::get(BB);
+  BA->removeDeadConstantUsers();
+  return !BA->use_empty();
+}
+
 /// ProcessBlock - If there are any predecessors whose control can be threaded
 /// through to a successor, transform them now.
 bool JumpThreading::ProcessBlock(BasicBlock *BB) {
@@ -577,167 +624,122 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   if (pred_begin(BB) == pred_end(BB) &&
       BB != &BB->getParent()->getEntryBlock())
     return false;
-  
+
   // If this block has a single predecessor, and if that pred has a single
   // successor, merge the blocks.  This encourages recursive jump threading
   // because now the condition in this block can be threaded through
   // predecessors of our predecessor block.
   if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
     if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
-        SinglePred != BB) {
+        SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
       // If SinglePred was a loop header, BB becomes one.
       if (LoopHeaders.erase(SinglePred))
         LoopHeaders.insert(BB);
-      
+
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      if (LVI) LVI->eraseBlock(SinglePred);
+      LVI->eraseBlock(SinglePred);
       MergeBasicBlockIntoOnlyPred(BB);
-      
+
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
       return true;
     }
   }
 
-  // Look to see if the terminator is a branch of switch, if not we can't thread
-  // it.
+  // What kind of constant we're looking for.
+  ConstantPreference Preference = WantInteger;
+
+  // Look to see if the terminator is a conditional branch, switch or indirect
+  // branch, if not we can't thread it.
   Value *Condition;
-  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+  Instruction *Terminator = BB->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
     // Can't thread an unconditional jump.
     if (BI->isUnconditional()) return false;
     Condition = BI->getCondition();
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
     Condition = SI->getCondition();
-  else
+  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+    Condition = IB->getAddress()->stripPointerCasts();
+    Preference = WantBlockAddress;
+  } else {
     return false; // Must be an invoke.
-  
-  // If the terminator of this block is branching on a constant, simplify the
-  // terminator to an unconditional branch.  This can occur due to threading in
-  // other blocks.
-  if (isa<ConstantInt>(Condition)) {
-    DEBUG(dbgs() << "  In block '" << BB->getName()
-          << "' folding terminator: " << *BB->getTerminator() << '\n');
-    ++NumFolds;
-    ConstantFoldTerminator(BB);
-    return true;
   }
-  
+
   // If the terminator is branching on an undef, we can pick any of the
   // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
   if (isa<UndefValue>(Condition)) {
     unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
-    
+
     // Fold the branch/switch.
     TerminatorInst *BBTerm = BB->getTerminator();
     for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
       if (i == BestSucc) continue;
-      RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
+      BBTerm->getSuccessor(i)->removePredecessor(BB, true);
     }
-    
+
     DEBUG(dbgs() << "  In block '" << BB->getName()
           << "' folding undef terminator: " << *BBTerm << '\n');
     BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
     BBTerm->eraseFromParent();
     return true;
   }
-  
-  Instruction *CondInst = dyn_cast<Instruction>(Condition);
 
-  // If the condition is an instruction defined in another block, see if a
-  // predecessor has the same condition:
-  //     br COND, BBX, BBY
-  //  BBX:
-  //     br COND, BBZ, BBW
-  if (!LVI &&
-      !Condition->hasOneUse() && // Multiple uses.
-      (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
-    pred_iterator PI = pred_begin(BB), E = pred_end(BB);
-    if (isa<BranchInst>(BB->getTerminator())) {
-      for (; PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
-          if (PBI->isConditional() && PBI->getCondition() == Condition &&
-              ProcessBranchOnDuplicateCond(P, BB))
-            return true;
-      }
-    } else {
-      assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
-      for (; PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
-          if (PSI->getCondition() == Condition &&
-              ProcessSwitchOnDuplicateCond(P, BB))
-            return true;
-      }
-    }
+  // If the terminator of this block is branching on a constant, simplify the
+  // terminator to an unconditional branch.  This can occur due to threading in
+  // other blocks.
+  if (getKnownConstant(Condition, Preference)) {
+    DEBUG(dbgs() << "  In block '" << BB->getName()
+          << "' folding terminator: " << *BB->getTerminator() << '\n');
+    ++NumFolds;
+    ConstantFoldTerminator(BB);
+    return true;
   }
 
+  Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
   // All the rest of our checks depend on the condition being an instruction.
   if (CondInst == 0) {
     // FIXME: Unify this with code below.
-    if (LVI && ProcessThreadableEdges(Condition, BB))
+    if (ProcessThreadableEdges(Condition, BB, Preference))
       return true;
     return false;
-  }  
-    
-  
+  }
+
+
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
-    if (!LVI &&
-        (!isa<PHINode>(CondCmp->getOperand(0)) ||
-         cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
-      // If we have a comparison, loop over the predecessors to see if there is
-      // a condition with a lexically identical value.
-      pred_iterator PI = pred_begin(BB), E = pred_end(BB);
-      for (; PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
-          if (PBI->isConditional() && P != BB) {
-            if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
-              if (CI->getOperand(0) == CondCmp->getOperand(0) &&
-                  CI->getOperand(1) == CondCmp->getOperand(1) &&
-                  CI->getPredicate() == CondCmp->getPredicate()) {
-                // TODO: Could handle things like (x != 4) --> (x == 17)
-                if (ProcessBranchOnDuplicateCond(P, BB))
-                  return true;
-              }
-            }
-          }
-      }
-    }
-    
     // For a comparison where the LHS is outside this block, it's possible
     // that we've branched on it before.  Used LVI to see if we can simplify
     // the branch based on that.
     BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
     Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
     pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-    if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+    if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
         (!isa<Instruction>(CondCmp->getOperand(0)) ||
          cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
       // For predecessor edge, determine if the comparison is true or false
       // on that edge.  If they're all true or all false, we can simplify the
       // branch.
       // FIXME: We could handle mixed true/false by duplicating code.
-      LazyValueInfo::Tristate Baseline =      
+      LazyValueInfo::Tristate Baseline =
         LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
                                 CondConst, *PI, BB);
       if (Baseline != LazyValueInfo::Unknown) {
         // Check that all remaining incoming values match the first one.
         while (++PI != PE) {
-          LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
-                                          CondCmp->getPredicate(),
-                                          CondCmp->getOperand(0),
-                                          CondConst, *PI, BB);
+          LazyValueInfo::Tristate Ret =
+            LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+                                    CondCmp->getOperand(0), CondConst, *PI, BB);
           if (Ret != Baseline) break;
         }
-        
+
         // If we terminated early, then one of the values didn't match.
         if (PI == PE) {
           unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
           unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
-          RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+          CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
           BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
           CondBr->eraseFromParent();
           return true;
@@ -755,174 +757,37 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
     if (isa<Constant>(CondCmp->getOperand(1)))
       SimplifyValue = CondCmp->getOperand(0);
-  
+
   // TODO: There are other places where load PRE would be profitable, such as
   // more complex comparisons.
   if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
     if (SimplifyPartiallyRedundantLoad(LI))
       return true;
-  
-  
+
+
   // Handle a variety of cases where we are branching on something derived from
   // a PHI node in the current block.  If we can prove that any predecessors
   // compute a predictable value based on a PHI node, thread those predecessors.
   //
-  if (ProcessThreadableEdges(CondInst, BB))
+  if (ProcessThreadableEdges(CondInst, BB, Preference))
     return true;
-  
+
   // If this is an otherwise-unfoldable branch on a phi node in the current
   // block, see if we can simplify.
   if (PHINode *PN = dyn_cast<PHINode>(CondInst))
     if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
       return ProcessBranchOnPHI(PN);
-  
-  
+
+
   // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
   if (CondInst->getOpcode() == Instruction::Xor &&
       CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
     return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
-  
-  
-  // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
-  // "(X == 4)", thread through this block.
-  
-  return false;
-}
 
-/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
-/// block that jump on exactly the same condition.  This means that we almost
-/// always know the direction of the edge in the DESTBB:
-///  PREDBB:
-///     br COND, DESTBB, BBY
-///  DESTBB:
-///     br COND, BBZ, BBW
-///
-/// If DESTBB has multiple predecessors, we can't just constant fold the branch
-/// in DESTBB, we have to thread over it.
-bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
-                                                 BasicBlock *BB) {
-  BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
-  
-  // If both successors of PredBB go to DESTBB, we don't know anything.  We can
-  // fold the branch to an unconditional one, which allows other recursive
-  // simplifications.
-  bool BranchDir;
-  if (PredBI->getSuccessor(1) != BB)
-    BranchDir = true;
-  else if (PredBI->getSuccessor(0) != BB)
-    BranchDir = false;
-  else {
-    DEBUG(dbgs() << "  In block '" << PredBB->getName()
-          << "' folding terminator: " << *PredBB->getTerminator() << '\n');
-    ++NumFolds;
-    ConstantFoldTerminator(PredBB);
-    return true;
-  }
-   
-  BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
 
-  // If the dest block has one predecessor, just fix the branch condition to a
-  // constant and fold it.
-  if (BB->getSinglePredecessor()) {
-    DEBUG(dbgs() << "  In block '" << BB->getName()
-          << "' folding condition to '" << BranchDir << "': "
-          << *BB->getTerminator() << '\n');
-    ++NumFolds;
-    Value *OldCond = DestBI->getCondition();
-    DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
-                                          BranchDir));
-    // Delete dead instructions before we fold the branch.  Folding the branch
-    // can eliminate edges from the CFG which can end up deleting OldCond.
-    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
-    ConstantFoldTerminator(BB);
-    return true;
-  }
- 
-  
-  // Next, figure out which successor we are threading to.
-  BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
-  
-  SmallVector<BasicBlock*, 2> Preds;
-  Preds.push_back(PredBB);
-  
-  // Ok, try to thread it!
-  return ThreadEdge(BB, Preds, SuccBB);
-}
-
-/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
-/// block that switch on exactly the same condition.  This means that we almost
-/// always know the direction of the edge in the DESTBB:
-///  PREDBB:
-///     switch COND [... DESTBB, BBY ... ]
-///  DESTBB:
-///     switch COND [... BBZ, BBW ]
-///
-/// Optimizing switches like this is very important, because simplifycfg builds
-/// switches out of repeated 'if' conditions.
-bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
-                                                 BasicBlock *DestBB) {
-  // Can't thread edge to self.
-  if (PredBB == DestBB)
-    return false;
-  
-  SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
-  SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
-
-  // There are a variety of optimizations that we can potentially do on these
-  // blocks: we order them from most to least preferable.
-  
-  // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
-  // directly to their destination.  This does not introduce *any* code size
-  // growth.  Skip debug info first.
-  BasicBlock::iterator BBI = DestBB->begin();
-  while (isa<DbgInfoIntrinsic>(BBI))
-    BBI++;
-  
-  // FIXME: Thread if it just contains a PHI.
-  if (isa<SwitchInst>(BBI)) {
-    bool MadeChange = false;
-    // Ignore the default edge for now.
-    for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
-      ConstantInt *DestVal = DestSI->getCaseValue(i);
-      BasicBlock *DestSucc = DestSI->getSuccessor(i);
-      
-      // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'.  See if
-      // PredSI has an explicit case for it.  If so, forward.  If it is covered
-      // by the default case, we can't update PredSI.
-      unsigned PredCase = PredSI->findCaseValue(DestVal);
-      if (PredCase == 0) continue;
-      
-      // If PredSI doesn't go to DestBB on this value, then it won't reach the
-      // case on this condition.
-      if (PredSI->getSuccessor(PredCase) != DestBB &&
-          DestSI->getSuccessor(i) != DestBB)
-        continue;
-      
-      // Do not forward this if it already goes to this destination, this would
-      // be an infinite loop.
-      if (PredSI->getSuccessor(PredCase) == DestSucc)
-        continue;
-
-      // Otherwise, we're safe to make the change.  Make sure that the edge from
-      // DestSI to DestSucc is not critical and has no PHI nodes.
-      DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI);
-      DEBUG(dbgs() << "THROUGH: " << *DestSI);
+  // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
+  // "(X == 4)", thread through this block.
 
-      // If the destination has PHI nodes, just split the edge for updating
-      // simplicity.
-      if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
-        SplitCriticalEdge(DestSI, i, this);
-        DestSucc = DestSI->getSuccessor(i);
-      }
-      FoldSingleEntryPHINodes(DestSucc);
-      PredSI->setSuccessor(PredCase, DestSucc);
-      MadeChange = true;
-    }
-    
-    if (MadeChange)
-      return true;
-  }
-  
   return false;
 }
 
@@ -934,13 +799,13 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
 bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   // Don't hack volatile loads.
   if (LI->isVolatile()) return false;
-  
+
   // If the load is defined in a block with exactly one predecessor, it can't be
   // partially redundant.
   BasicBlock *LoadBB = LI->getParent();
   if (LoadBB->getSinglePredecessor())
     return false;
-  
+
   Value *LoadedPtr = LI->getOperand(0);
 
   // If the loaded operand is defined in the LoadBB, it can't be available.
@@ -948,17 +813,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
     if (PtrOp->getParent() == LoadBB)
       return false;
-  
+
   // Scan a few instructions up from the load, to see if it is obviously live at
   // the entry to its block.
   BasicBlock::iterator BBIt = LI;
 
-  if (Value *AvailableVal = 
+  if (Value *AvailableVal =
         FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
     // If the value if the load is locally available within the block, just use
     // it.  This frequently occurs for reg2mem'd allocas.
     //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
-    
+
     // If the returned value is the load itself, replace with an undef. This can
     // only happen in dead loops.
     if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
@@ -972,13 +837,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   // might clobber its value.
   if (BBIt != LoadBB->begin())
     return false;
-  
-  
+
+
   SmallPtrSet<BasicBlock*, 8> PredsScanned;
   typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
   AvailablePredsTy AvailablePreds;
   BasicBlock *OneUnavailablePred = 0;
-  
+
   // If we got here, the loaded value is transparent through to the start of the
   // block.  Check to see if it is available in any of the predecessor blocks.
   for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
@@ -996,23 +861,23 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
       OneUnavailablePred = PredBB;
       continue;
     }
-    
+
     // If so, this load is partially redundant.  Remember this info so that we
     // can create a PHI node.
     AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
   }
-  
+
   // If the loaded value isn't available in any predecessor, it isn't partially
   // redundant.
   if (AvailablePreds.empty()) return false;
-  
+
   // Okay, the loaded value is available in at least one (and maybe all!)
   // predecessors.  If the value is unavailable in more than one unique
   // predecessor, we want to insert a merge block for those common predecessors.
   // This ensures that we only have to insert one reload, thus not increasing
   // code size.
   BasicBlock *UnavailablePred = 0;
-  
+
   // If there is exactly one predecessor where the value is unavailable, the
   // already computed 'OneUnavailablePred' block is it.  If it ends in an
   // unconditional branch, we know that it isn't a critical edge.
@@ -1035,17 +900,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
       // If the predecessor is an indirect goto, we can't split the edge.
       if (isa<IndirectBrInst>(P->getTerminator()))
         return false;
-      
+
       if (!AvailablePredSet.count(P))
         PredsToSplit.push_back(P);
     }
-    
+
     // Split them out to their own block.
     UnavailablePred =
       SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
                              "thread-pre-split", this);
   }
-  
+
   // If the value isn't available in all predecessors, then there will be
   // exactly one where it isn't available.  Insert a load on that edge and add
   // it to the AvailablePreds list.
@@ -1057,35 +922,35 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
                                  UnavailablePred->getTerminator());
     AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
   }
-  
+
   // Now we know that each predecessor of this block has a value in
   // AvailablePreds, sort them for efficient access as we're walking the preds.
   array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
-  
+
   // Create a PHI node at the start of the block for the PRE'd load value.
   PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
   PN->takeName(LI);
-  
+
   // Insert new entries into the PHI for each predecessor.  A single block may
   // have multiple entries here.
   for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
        ++PI) {
     BasicBlock *P = *PI;
-    AvailablePredsTy::iterator I = 
+    AvailablePredsTy::iterator I =
       std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
                        std::make_pair(P, (Value*)0));
-    
+
     assert(I != AvailablePreds.end() && I->first == P &&
            "Didn't find entry for predecessor!");
-    
+
     PN->addIncoming(I->second, I->first);
   }
-  
+
   //cerr << "PRE: " << *LI << *PN << "\n";
-  
+
   LI->replaceAllUsesWith(PN);
   LI->eraseFromParent();
-  
+
   return true;
 }
 
@@ -1097,7 +962,7 @@ FindMostPopularDest(BasicBlock *BB,
                     const SmallVectorImpl<std::pair<BasicBlock*,
                                   BasicBlock*> > &PredToDestList) {
   assert(!PredToDestList.empty());
-  
+
   // Determine popularity.  If there are multiple possible destinations, we
   // explicitly choose to ignore 'undef' destinations.  We prefer to thread
   // blocks with known and real destinations to threading undef.  We'll handle
@@ -1106,13 +971,13 @@ FindMostPopularDest(BasicBlock *BB,
   for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
     if (PredToDestList[i].second)
       DestPopularity[PredToDestList[i].second]++;
-  
+
   // Find the most popular dest.
   DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
   BasicBlock *MostPopularDest = DPI->first;
   unsigned Popularity = DPI->second;
   SmallVector<BasicBlock*, 4> SamePopularity;
-  
+
   for (++DPI; DPI != DestPopularity.end(); ++DPI) {
     // If the popularity of this entry isn't higher than the popularity we've
     // seen so far, ignore it.
@@ -1126,10 +991,10 @@ FindMostPopularDest(BasicBlock *BB,
       SamePopularity.clear();
       MostPopularDest = DPI->first;
       Popularity = DPI->second;
-    }      
+    }
   }
-  
-  // Okay, now we know the most popular destination.  If there is more than
+
+  // Okay, now we know the most popular destination.  If there is more than one
   // destination, we need to determine one.  This is arbitrary, but we need
   // to make a deterministic decision.  Pick the first one that appears in the
   // successor list.
@@ -1138,105 +1003,105 @@ FindMostPopularDest(BasicBlock *BB,
     TerminatorInst *TI = BB->getTerminator();
     for (unsigned i = 0; ; ++i) {
       assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
-      
+
       if (std::find(SamePopularity.begin(), SamePopularity.end(),
                     TI->getSuccessor(i)) == SamePopularity.end())
         continue;
-      
+
       MostPopularDest = TI->getSuccessor(i);
       break;
     }
   }
-  
+
   // Okay, we have finally picked the most popular destination.
   return MostPopularDest;
 }
 
-bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+                                           ConstantPreference Preference) {
   // If threading this would thread across a loop header, don't even try to
   // thread the edge.
   if (LoopHeaders.count(BB))
     return false;
-  
-  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
-  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+
+  PredValueInfoTy PredValues;
+  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
     return false;
-  
+
   assert(!PredValues.empty() &&
          "ComputeValueKnownInPredecessors returned true with no values");
 
   DEBUG(dbgs() << "IN BB: " << *BB;
         for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
-          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = ";
-          if (PredValues[i].first)
-            dbgs() << *PredValues[i].first;
-          else
-            dbgs() << "UNDEF";
-          dbgs() << " for pred '" << PredValues[i].second->getName()
-          << "'.\n";
+          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = "
+            << *PredValues[i].first
+            << " for pred '" << PredValues[i].second->getName() << "'.\n";
         });
-  
+
   // Decide what we want to thread through.  Convert our list of known values to
   // a list of known destinations for each pred.  This also discards duplicate
   // predecessors and keeps track of the undefined inputs (which are represented
   // as a null dest in the PredToDestList).
   SmallPtrSet<BasicBlock*, 16> SeenPreds;
   SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
-  
+
   BasicBlock *OnlyDest = 0;
   BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
-  
+
   for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
     BasicBlock *Pred = PredValues[i].second;
     if (!SeenPreds.insert(Pred))
       continue;  // Duplicate predecessor entry.
-    
+
     // If the predecessor ends with an indirect goto, we can't change its
     // destination.
     if (isa<IndirectBrInst>(Pred->getTerminator()))
       continue;
-    
-    ConstantInt *Val = PredValues[i].first;
-    
+
+    Constant *Val = PredValues[i].first;
+
     BasicBlock *DestBB;
-    if (Val == 0)      // Undef.
+    if (isa<UndefValue>(Val))
       DestBB = 0;
     else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
-      DestBB = BI->getSuccessor(Val->isZero());
+      DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+      DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
     else {
-      SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
-      DestBB = SI->getSuccessor(SI->findCaseValue(Val));
+      assert(isa<IndirectBrInst>(BB->getTerminator())
+              && "Unexpected terminator");
+      DestBB = cast<BlockAddress>(Val)->getBasicBlock();
     }
 
     // If we have exactly one destination, remember it for efficiency below.
-    if (i == 0)
+    if (PredToDestList.empty())
       OnlyDest = DestBB;
     else if (OnlyDest != DestBB)
       OnlyDest = MultipleDestSentinel;
-    
+
     PredToDestList.push_back(std::make_pair(Pred, DestBB));
   }
-  
+
   // If all edges were unthreadable, we fail.
   if (PredToDestList.empty())
     return false;
-  
+
   // Determine which is the most common successor.  If we have many inputs and
   // this block is a switch, we want to start by threading the batch that goes
   // to the most popular destination first.  If we only know about one
   // threadable destination (the common case) we can avoid this.
   BasicBlock *MostPopularDest = OnlyDest;
-  
+
   if (MostPopularDest == MultipleDestSentinel)
     MostPopularDest = FindMostPopularDest(BB, PredToDestList);
-  
+
   // Now that we know what the most popular destination is, factor all
   // predecessors that will jump to it into a single predecessor.
   SmallVector<BasicBlock*, 16> PredsToFactor;
   for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
     if (PredToDestList[i].second == MostPopularDest) {
       BasicBlock *Pred = PredToDestList[i].first;
-      
+
       // This predecessor may be a switch or something else that has multiple
       // edges to the block.  Factor each of these edges by listing them
       // according to # occurrences in PredsToFactor.
@@ -1251,7 +1116,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
   if (MostPopularDest == 0)
     MostPopularDest = BB->getTerminator()->
                             getSuccessor(GetBestDestForJumpOnUndef(BB));
-        
+
   // Ok, try to thread it!
   return ThreadEdge(BB, PredsToFactor, MostPopularDest);
 }
@@ -1259,15 +1124,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
 /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
 /// a PHI node in the current block.  See if there are any simplifications we
 /// can do based on inputs to the phi node.
-/// 
+///
 bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
   BasicBlock *BB = PN->getParent();
-  
+
   // TODO: We could make use of this to do it once for blocks with common PHI
   // values.
   SmallVector<BasicBlock*, 1> PredBBs;
   PredBBs.resize(1);
-  
+
   // If any of the predecessor blocks end in an unconditional branch, we can
   // *duplicate* the conditional branch into that block in order to further
   // encourage jump threading and to eliminate cases where we have branch on a
@@ -1289,21 +1154,21 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
 /// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
 /// a xor instruction in the current block.  See if there are any
 /// simplifications we can do based on inputs to the xor.
-/// 
+///
 bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
   BasicBlock *BB = BO->getParent();
-  
+
   // If either the LHS or RHS of the xor is a constant, don't do this
   // optimization.
   if (isa<ConstantInt>(BO->getOperand(0)) ||
       isa<ConstantInt>(BO->getOperand(1)))
     return false;
-  
+
   // If the first instruction in BB isn't a phi, we won't be able to infer
   // anything special about any particular predecessor.
   if (!isa<PHINode>(BB->front()))
     return false;
-  
+
   // If we have a xor as the branch input to this block, and we know that the
   // LHS or RHS of the xor in any predecessor is true/false, then we can clone
   // the condition into the predecessor and fix that value to true, saving some
@@ -1322,15 +1187,17 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
   //    %Y = icmp ne i32 %A, %B
   //    br i1 %Z, ...
 
-  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+  PredValueInfoTy XorOpValues;
   bool isLHS = true;
-  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+                                       WantInteger)) {
     assert(XorOpValues.empty());
-    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+                                         WantInteger))
       return false;
     isLHS = false;
   }
-  
+
   assert(!XorOpValues.empty() &&
          "ComputeValueKnownInPredecessors returned true with no values");
 
@@ -1338,29 +1205,33 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
   // predecessors can be of the set true, false, or undef.
   unsigned NumTrue = 0, NumFalse = 0;
   for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
-    if (!XorOpValues[i].first) continue;  // Ignore undefs for the count.
-    if (XorOpValues[i].first->isZero())
+    if (isa<UndefValue>(XorOpValues[i].first))
+      // Ignore undefs for the count.
+      continue;
+    if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
       ++NumFalse;
     else
       ++NumTrue;
   }
-  
+
   // Determine which value to split on, true, false, or undef if neither.
   ConstantInt *SplitVal = 0;
   if (NumTrue > NumFalse)
     SplitVal = ConstantInt::getTrue(BB->getContext());
   else if (NumTrue != 0 || NumFalse != 0)
     SplitVal = ConstantInt::getFalse(BB->getContext());
-  
+
   // Collect all of the blocks that this can be folded into so that we can
   // factor this once and clone it once.
   SmallVector<BasicBlock*, 8> BlocksToFoldInto;
   for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
-    if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+    if (XorOpValues[i].first != SplitVal &&
+        !isa<UndefValue>(XorOpValues[i].first))
+      continue;
 
     BlocksToFoldInto.push_back(XorOpValues[i].second);
   }
-  
+
   // If we inferred a value for all of the predecessors, then duplication won't
   // help us.  However, we can just replace the LHS or RHS with the constant.
   if (BlocksToFoldInto.size() ==
@@ -1377,10 +1248,10 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
       // If all preds provide 1, set the computed value to 1.
       BO->setOperand(!isLHS, SplitVal);
     }
-    
+
     return true;
   }
-  
+
   // Try to duplicate BB into PredBB.
   return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
 }
@@ -1398,14 +1269,14 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
     // Ok, we have a PHI node.  Figure out what the incoming value was for the
     // DestBlock.
     Value *IV = PN->getIncomingValueForBlock(OldPred);
-    
+
     // Remap the value if necessary.
     if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
       DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
       if (I != ValueMap.end())
         IV = I->second;
     }
-    
+
     PN->addIncoming(IV, NewPred);
   }
 }
@@ -1413,8 +1284,8 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
 /// ThreadEdge - We have decided that it is safe and profitable to factor the
 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
 /// across BB.  Transform the IR to reflect this change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB, 
-                               const SmallVectorImpl<BasicBlock*> &PredBBs, 
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+                               const SmallVectorImpl<BasicBlock*> &PredBBs,
                                BasicBlock *SuccBB) {
   // If threading to the same block as we come from, we would infinite loop.
   if (SuccBB == BB) {
@@ -1422,7 +1293,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
           << "' - would thread to self!\n");
     return false;
   }
-  
+
   // If threading this would thread across a loop header, don't thread the edge.
   // See the comments above FindLoopHeaders for justifications and caveats.
   if (LoopHeaders.count(BB)) {
@@ -1438,7 +1309,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
           << "' - Cost is too high: " << JumpThreadCost << "\n");
     return false;
   }
-  
+
   // And finally, do it!  Start by factoring the predecessors is needed.
   BasicBlock *PredBB;
   if (PredBBs.size() == 1)
@@ -1449,30 +1320,29 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
                                     ".thr_comm", this);
   }
-  
+
   // And finally, do it!
   DEBUG(dbgs() << "  Threading edge from '" << PredBB->getName() << "' to '"
         << SuccBB->getName() << "' with cost: " << JumpThreadCost
         << ", across block:\n    "
         << *BB << "\n");
-  
-  if (LVI)
-    LVI->threadEdge(PredBB, BB, SuccBB);
-  
+
+  LVI->threadEdge(PredBB, BB, SuccBB);
+
   // We are going to have to map operands from the original BB block to the new
   // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
   // account for entry from PredBB.
   DenseMap<Instruction*, Value*> ValueMapping;
-  
-  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), 
-                                         BB->getName()+".thread", 
+
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+                                         BB->getName()+".thread",
                                          BB->getParent(), BB);
   NewBB->moveAfter(PredBB);
-  
+
   BasicBlock::iterator BI = BB->begin();
   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
     ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-  
+
   // Clone the non-phi instructions of BB into NewBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
   for (; !isa<TerminatorInst>(BI); ++BI) {
@@ -1480,7 +1350,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
     ValueMapping[BI] = New;
-   
+
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
       if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1489,15 +1359,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
           New->setOperand(i, I->second);
       }
   }
-  
+
   // We didn't copy the terminator from BB over to NewBB, because there is now
   // an unconditional jump to SuccBB.  Insert the unconditional jump.
   BranchInst::Create(SuccBB, NewBB);
-  
+
   // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
   // PHI nodes for NewBB now.
   AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
-  
+
   // If there were values defined in BB that are used outside the block, then we
   // now have to update all uses of the value to use either the original value,
   // the cloned value, or some PHI derived value.  This can require arbitrary
@@ -1515,14 +1385,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
           continue;
       } else if (User->getParent() == BB)
         continue;
-      
+
       UsesToRename.push_back(&UI.getUse());
     }
-    
+
     // If there are no uses outside the block, we're done with this instruction.
     if (UsesToRename.empty())
       continue;
-    
+
     DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
 
     // We found a use of I outside of BB.  Rename all uses of I that are outside
@@ -1531,28 +1401,28 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     SSAUpdate.Initialize(I->getType(), I->getName());
     SSAUpdate.AddAvailableValue(BB, I);
     SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
-    
+
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
     DEBUG(dbgs() << "\n");
   }
-  
-  
+
+
   // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
   // NewBB instead of BB.  This eliminates predecessors from BB, which requires
   // us to simplify any PHI nodes in BB.
   TerminatorInst *PredTerm = PredBB->getTerminator();
   for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
     if (PredTerm->getSuccessor(i) == BB) {
-      RemovePredecessorAndSimplify(BB, PredBB, TD);
+      BB->removePredecessor(PredBB, true);
       PredTerm->setSuccessor(i, NewBB);
     }
-  
+
   // At this point, the IR is fully up to date and consistent.  Do a quick scan
   // over the new instructions and zap any that are constants or dead.  This
   // frequently happens because of phi translation.
   SimplifyInstructionsInBlock(NewBB, TD);
-  
+
   // Threaded an edge!
   ++NumThreads;
   return true;
@@ -1576,14 +1446,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
           << "' - it might create an irreducible loop!\n");
     return false;
   }
-  
+
   unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
   if (DuplicationCost > Threshold) {
     DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
           << "' - Cost is too high: " << DuplicationCost << "\n");
     return false;
   }
-  
+
   // And finally, do it!  Start by factoring the predecessors is needed.
   BasicBlock *PredBB;
   if (PredBBs.size() == 1)
@@ -1594,35 +1464,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
                                     ".thr_comm", this);
   }
-  
+
   // Okay, we decided to do this!  Clone all the instructions in BB onto the end
   // of PredBB.
   DEBUG(dbgs() << "  Duplicating block '" << BB->getName() << "' into end of '"
         << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
         << DuplicationCost << " block is:" << *BB << "\n");
-  
+
   // Unless PredBB ends with an unconditional branch, split the edge so that we
   // can just clone the bits from BB into the end of the new PredBB.
   BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
-  
+
   if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
     PredBB = SplitEdge(PredBB, BB, this);
     OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
   }
-  
+
   // We are going to have to map operands from the original BB block into the
   // PredBB block.  Evaluate PHI nodes in BB.
   DenseMap<Instruction*, Value*> ValueMapping;
-  
+
   BasicBlock::iterator BI = BB->begin();
   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
     ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-  
+
   // Clone the non-phi instructions of BB into PredBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
   for (; BI != BB->end(); ++BI) {
     Instruction *New = BI->clone();
-    
+
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
       if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1644,7 +1514,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
       ValueMapping[BI] = New;
     }
   }
-  
+
   // Check to see if the targets of the branch had PHI nodes. If so, we need to
   // add entries to the PHI nodes for branch from PredBB now.
   BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
@@ -1652,7 +1522,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
                                   ValueMapping);
   AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
                                   ValueMapping);
-  
+
   // If there were values defined in BB that are used outside the block, then we
   // now have to update all uses of the value to use either the original value,
   // the cloned value, or some PHI derived value.  This can require arbitrary
@@ -1670,35 +1540,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
           continue;
       } else if (User->getParent() == BB)
         continue;
-      
+
       UsesToRename.push_back(&UI.getUse());
     }
-    
+
     // If there are no uses outside the block, we're done with this instruction.
     if (UsesToRename.empty())
       continue;
-    
+
     DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
-    
+
     // We found a use of I outside of BB.  Rename all uses of I that are outside
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
     SSAUpdate.Initialize(I->getType(), I->getName());
     SSAUpdate.AddAvailableValue(BB, I);
     SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
-    
+
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
     DEBUG(dbgs() << "\n");
   }
-  
+
   // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
   // that we nuked.
-  RemovePredecessorAndSimplify(BB, PredBB, TD);
-  
+  BB->removePredecessor(PredBB, true);
+
   // Remove the unconditional branch at the end of the PredBB block.
   OldPredBranch->eraseFromParent();
-  
+
   ++NumDupes;
   return true;
 }
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 2ef85446bd9b..07867933d08c 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -36,13 +36,13 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Support/CFG.h"
@@ -66,7 +66,9 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
 namespace {
   struct LICM : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LICM() : LoopPass(ID) {}
+    LICM() : LoopPass(ID) {
+      initializeLICMPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -80,7 +82,7 @@ namespace {
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<AliasAnalysis>();
       AU.addPreserved<AliasAnalysis>();
-      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved("scalar-evolution");
       AU.addPreservedID(LoopSimplifyID);
     }
 
@@ -129,42 +131,7 @@ namespace {
     ///
     bool inSubLoop(BasicBlock *BB) {
       assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
-      for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I)
-        if ((*I)->contains(BB))
-          return true;  // A subloop actually contains this block!
-      return false;
-    }
-
-    /// isExitBlockDominatedByBlockInLoop - This method checks to see if the
-    /// specified exit block of the loop is dominated by the specified block
-    /// that is in the body of the loop.  We use these constraints to
-    /// dramatically limit the amount of the dominator tree that needs to be
-    /// searched.
-    bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock,
-                                           BasicBlock *BlockInLoop) const {
-      // If the block in the loop is the loop header, it must be dominated!
-      BasicBlock *LoopHeader = CurLoop->getHeader();
-      if (BlockInLoop == LoopHeader)
-        return true;
-
-      DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop);
-      DomTreeNode *IDom            = DT->getNode(ExitBlock);
-
-      // Because the exit block is not in the loop, we know we have to get _at
-      // least_ its immediate dominator.
-      IDom = IDom->getIDom();
-      
-      while (IDom && IDom != BlockInLoopNode) {
-        // If we have got to the header of the loop, then the instructions block
-        // did not dominate the exit node, so we can't hoist it.
-        if (IDom->getBlock() == LoopHeader)
-          return false;
-
-        // Get next Immediate Dominator.
-        IDom = IDom->getIDom();
-      };
-
-      return true;
+      return LI->getLoopFor(BB) != CurLoop;
     }
 
     /// sink - When an instruction is found to only be used outside of the loop,
@@ -187,13 +154,13 @@ namespace {
     /// pointerInvalidatedByLoop - Return true if the body of this loop may
     /// store into the memory location pointed to by V.
     ///
-    bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+    bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+                                  const MDNode *TBAAInfo) {
       // Check to see if any of the basic blocks in CurLoop invalidate *V.
-      return CurAST->getAliasSetForPointer(V, Size).isMod();
+      return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
     }
 
     bool canSinkOrHoistInst(Instruction &I);
-    bool isLoopInvariantInst(Instruction &I);
     bool isNotUsedInLoop(Instruction &I);
 
     void PromoteAliasSet(AliasSet &AS);
@@ -201,7 +168,12 @@ namespace {
 }
 
 char LICM::ID = 0;
-INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 
 Pass *llvm::createLICMPass() { return new LICM(); }
 
@@ -369,7 +341,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
       //
-      if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+      if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
           isSafeToExecuteUnconditionally(I))
         hoist(I);
     }
@@ -394,16 +366,17 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
       return true;
     
     // Don't hoist loads which have may-aliased stores in loop.
-    unsigned Size = 0;
+    uint64_t Size = 0;
     if (LI->getType()->isSized())
       Size = AA->getTypeStoreSize(LI->getType());
-    return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+    return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+                                     LI->getMetadata(LLVMContext::MD_tbaa));
   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
     // Handle obvious cases efficiently.
     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == AliasAnalysis::DoesNotAccessMemory)
       return true;
-    else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+    if (AliasAnalysis::onlyReadsMemory(Behavior)) {
       // If this call only reads from memory and there are no writes to memory
       // in the loop, we can hoist or sink the call as appropriate.
       bool FoundMod = false;
@@ -452,20 +425,6 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
 }
 
 
-/// isLoopInvariantInst - Return true if all operands of this instruction are
-/// loop invariant.  We also filter out non-hoistable instructions here just for
-/// efficiency.
-///
-bool LICM::isLoopInvariantInst(Instruction &I) {
-  // The instruction is loop invariant if all of its operands are loop-invariant
-  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-    if (!CurLoop->isLoopInvariant(I.getOperand(i)))
-      return false;
-
-  // If we got this far, the instruction is loop invariant!
-  return true;
-}
-
 /// sink - When an instruction is found to only be used outside of the loop,
 /// this function moves it to the exit blocks and patches up SSA form as needed.
 /// This method is guaranteed to remove the original instruction from its
@@ -486,7 +445,7 @@ void LICM::sink(Instruction &I) {
   // enough that we handle it as a special (more efficient) case.  It is more
   // efficient to handle because there are no PHI nodes that need to be placed.
   if (ExitBlocks.size() == 1) {
-    if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
+    if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
       // Instruction is not used, just delete it.
       CurAST->deleteValue(&I);
       // If I has users in unreachable blocks, eliminate.
@@ -537,7 +496,7 @@ void LICM::sink(Instruction &I) {
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
     
-    if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+    if (!DT->dominates(InstOrigBB, ExitBlock))
       continue;
     
     // Insert the code after the last PHI node.
@@ -628,15 +587,61 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getExitBlocks(ExitBlocks);
 
-  // For each exit block, get the DT node and walk up the DT until the
-  // instruction's basic block is found or we exit the loop.
+  // Verify that the block dominates each of the exit blocks of the loop.
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-    if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent()))
+    if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
       return false;
 
   return true;
 }
 
+namespace {
+  class LoopPromoter : public LoadAndStorePromoter {
+    Value *SomePtr;  // Designated pointer to store to.
+    SmallPtrSet<Value*, 4> &PointerMustAliases;
+    SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+    AliasSetTracker &AST;
+  public:
+    LoopPromoter(Value *SP,
+                 const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+                 SmallPtrSet<Value*, 4> &PMA,
+                 SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+        LoopExitBlocks(LEB), AST(ast) {}
+    
+    virtual bool isInstInList(Instruction *I,
+                              const SmallVectorImpl<Instruction*> &) const {
+      Value *Ptr;
+      if (LoadInst *LI = dyn_cast<LoadInst>(I))
+        Ptr = LI->getOperand(0);
+      else
+        Ptr = cast<StoreInst>(I)->getPointerOperand();
+      return PointerMustAliases.count(Ptr);
+    }
+    
+    virtual void doExtraRewritesBeforeFinalDeletion() const {
+      // Insert stores after in the loop exit blocks.  Each exit block gets a
+      // store of the live-out values that feed them.  Since we've already told
+      // the SSA updater about the defs in the loop and the preheader
+      // definition, it is all set and we can start using it.
+      for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+        BasicBlock *ExitBlock = LoopExitBlocks[i];
+        Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+        Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+        new StoreInst(LiveInValue, SomePtr, InsertPos);
+      }
+    }
+
+    virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+      // Update alias analysis.
+      AST.copyValue(LI, V);
+    }
+    virtual void instructionDeleted(Instruction *I) const {
+      AST.deleteValue(I);
+    }
+  };
+} // end anon namespace
+
 /// PromoteAliasSet - Try to promote memory values to scalars by sinking
 /// stores out of the loop and moving loads to before the loop.  We do this by
 /// looping over the stores in the loop, looking for stores to Must pointers
@@ -697,8 +702,11 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
       if (isa<LoadInst>(Use))
         assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
       else if (isa<StoreInst>(Use)) {
+        // Stores *of* the pointer are not interesting, only stores *to* the
+        // pointer.
+        if (Use->getOperand(1) != ASIV)
+          continue;
         assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
-        if (Use->getOperand(0) == ASIV) return;
       } else
         return; // Not a load or store.
       
@@ -718,179 +726,43 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   Changed = true;
   ++NumPromoted;
 
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
+  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+                        *CurAST);
   
-  // It wants to know some value of the same type as what we'll be inserting.
-  Value *SomeValue;
-  if (isa<LoadInst>(LoopUses[0]))
-    SomeValue = LoopUses[0];
-  else
-    SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
-  SSA.Initialize(SomeValue->getType(), SomeValue->getName());
-
-  // First step: bucket up uses of the pointers by the block they occur in.
-  // This is important because we have to handle multiple defs/uses in a block
-  // ourselves: SSAUpdater is purely for cross-block references.
-  // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
-  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
-  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
-    Instruction *User = LoopUses[i];
-    UsesByBlock[User->getParent()].push_back(User);
-  }
-  
-  // Okay, now we can iterate over all the blocks in the loop with uses,
-  // processing them.  Keep track of which loads are loading a live-in value.
-  SmallVector<LoadInst*, 32> LiveInLoads;
-  DenseMap<Value*, Value*> ReplacedLoads;
-  
-  for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
-    Instruction *User = LoopUses[LoopUse];
-    std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
-    
-    // If this block has already been processed, ignore this repeat use.
-    if (BlockUses.empty()) continue;
-    
-    // Okay, this is the first use in the block.  If this block just has a
-    // single user in it, we can rewrite it trivially.
-    if (BlockUses.size() == 1) {
-      // If it is a store, it is a trivial def of the value in the block.
-      if (isa<StoreInst>(User)) {
-        SSA.AddAvailableValue(User->getParent(),
-                              cast<StoreInst>(User)->getOperand(0));
-      } else {
-        // Otherwise it is a load, queue it to rewrite as a live-in load.
-        LiveInLoads.push_back(cast<LoadInst>(User));
-      }
-      BlockUses.clear();
-      continue;
-    }
-    
-    // Otherwise, check to see if this block is all loads.  If so, we can queue
-    // them all as live in loads.
-    bool HasStore = false;
-    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
-      if (isa<StoreInst>(BlockUses[i])) {
-        HasStore = true;
-        break;
-      }
-    }
-    
-    if (!HasStore) {
-      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
-        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
-      BlockUses.clear();
-      continue;
-    }
-
-    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
-    // Since SSAUpdater is purely for cross-block values, we need to determine
-    // the order of these instructions in the block.  If the first use in the
-    // block is a load, then it uses the live in value.  The last store defines
-    // the live out value.  We handle this by doing a linear scan of the block.
-    BasicBlock *BB = User->getParent();
-    Value *StoredValue = 0;
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
-        // If this is a load from an unrelated pointer, ignore it.
-        if (!PointerMustAliases.count(L->getOperand(0))) continue;
-
-        // If we haven't seen a store yet, this is a live in use, otherwise
-        // use the stored value.
-        if (StoredValue) {
-          L->replaceAllUsesWith(StoredValue);
-          ReplacedLoads[L] = StoredValue;
-        } else {
-          LiveInLoads.push_back(L);
-        }
-        continue;
-      }
-      
-      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
-        // If this is a store to an unrelated pointer, ignore it.
-        if (!PointerMustAliases.count(S->getOperand(1))) continue;
-
-        // Remember that this is the active value in the block.
-        StoredValue = S->getOperand(0);
-      }
-    }
-    
-    // The last stored value that happened is the live-out for the block.
-    assert(StoredValue && "Already checked that there is a store in block");
-    SSA.AddAvailableValue(BB, StoredValue);
-    BlockUses.clear();
-  }
-  
-  // Now that all the intra-loop values are classified, set up the preheader.
-  // It gets a load of the pointer we're promoting, and it is the live-out value
-  // from the preheader.
-  LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
-                                         Preheader->getTerminator());
+  // Set up the preheader to have a definition of the value.  It is the live-out
+  // value from the preheader that uses in the loop will use.
+  LoadInst *PreheaderLoad =
+    new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+                 Preheader->getTerminator());
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
-  // Now that the preheader is good to go, set up the exit blocks.  Each exit
-  // block gets a store of the live-out values that feed them.  Since we've
-  // already told the SSA updater about the defs in the loop and the preheader
-  // definition, it is all set and we can start using it.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getUniqueExitBlocks(ExitBlocks);
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
-    BasicBlock *ExitBlock = ExitBlocks[i];
-    Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
-    Instruction *InsertPos = ExitBlock->getFirstNonPHI();
-    new StoreInst(LiveInValue, SomePtr, InsertPos);
+  // Copy any value stored to or loaded from a must-alias of the pointer.
+  if (PreheaderLoad->getType()->isPointerTy()) {
+    Value *SomeValue;
+    if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
+      SomeValue = LI;
+    else
+      SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
+    
+    CurAST->copyValue(SomeValue, PreheaderLoad);
   }
 
-  // Okay, now we rewrite all loads that use live-in values in the loop,
-  // inserting PHI nodes as necessary.
-  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
-    LoadInst *ALoad = LiveInLoads[i];
-    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
-    ALoad->replaceAllUsesWith(NewVal);
-    CurAST->copyValue(ALoad, NewVal);
-    ReplacedLoads[ALoad] = NewVal;
-  }
+  // Rewrite all the loads in the loop and remember all the definitions from
+  // stores in the loop.
+  Promoter.run(LoopUses);
   
   // If the preheader load is itself a pointer, we need to tell alias analysis
   // about the new pointer we created in the preheader block and about any PHI
   // nodes that just got inserted.
   if (PreheaderLoad->getType()->isPointerTy()) {
-    // Copy any value stored to or loaded from a must-alias of the pointer.
-    CurAST->copyValue(SomeValue, PreheaderLoad);
-    
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
-      CurAST->copyValue(SomeValue, NewPHIs[i]);
-  }
-  
-  // Now that everything is rewritten, delete the old instructions from the body
-  // of the loop.  They should all be dead now.
-  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
-    Instruction *User = LoopUses[i];
-    
-    // If this is a load that still has uses, then the load must have been added
-    // as a live value in the SSAUpdate data structure for a block (e.g. because
-    // the loaded value was stored later).  In this case, we need to recursively
-    // propagate the updates until we get to the real value.
-    if (!User->use_empty()) {
-      Value *NewVal = ReplacedLoads[User];
-      assert(NewVal && "not a replaced load?");
-      
-      // Propagate down to the ultimate replacee.  The intermediately loads
-      // could theoretically already have been deleted, so we don't want to
-      // dereference the Value*'s.
-      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
-      while (RLI != ReplacedLoads.end()) {
-        NewVal = RLI->second;
-        RLI = ReplacedLoads.find(NewVal);
-      }
-      
-      User->replaceAllUsesWith(NewVal);
-      CurAST->copyValue(User, NewVal);
-    }
-    
-    CurAST->deleteValue(User);
-    User->eraseFromParent();
+      CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
   }
   
   // fwew, we're done!
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 543dfc1cba09..6d1d344a9296 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
 #define DEBUG_TYPE "loop-delete"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
@@ -28,7 +29,9 @@ namespace {
   class LoopDeletion : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopDeletion() : LoopPass(ID) {}
+    LoopDeletion() : LoopPass(ID) {
+      initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+    }
     
     // Possibly eliminate loop L if it is dead.
     bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -49,14 +52,20 @@ namespace {
       AU.addPreserved<LoopInfo>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
-      AU.addPreserved<DominanceFrontier>();
     }
   };
 }
   
 char LoopDeletion::ID = 0;
-INITIALIZE_PASS(LoopDeletion, "loop-deletion",
-                "Delete dead loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
 
 Pass* llvm::createLoopDeletionPass() {
   return new LoopDeletion();
@@ -183,22 +192,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   // Update the dominator tree and remove the instructions and blocks that will
   // be deleted from the reference counting scheme.
   DominatorTree& DT = getAnalysis<DominatorTree>();
-  DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>();
-  SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+  SmallVector<DomTreeNode*, 8> ChildNodes;
   for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
        LI != LE; ++LI) {
     // Move all of the block's children to be children of the preheader, which
     // allows us to remove the domtree entry for the block.
-    ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
-    for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+    ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+    for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
          DE = ChildNodes.end(); DI != DE; ++DI) {
       DT.changeImmediateDominator(*DI, DT[preheader]);
-      if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT);
     }
     
     ChildNodes.clear();
     DT.eraseNode(*LI);
-    if (DF) DF->removeBlock(*LI);
 
     // Remove the block from the reference counting scheme, so that we can
     // delete it freely later.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 000000000000..d7fa149492bd
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,594 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form.  In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+//   memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+//   fpowi
+// Future integer operation idioms to recognize:
+//   ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set.  It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop.  This would handle things like:
+//   void foo(_Complex float *P)
+//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+  class LoopIdiomRecognize : public LoopPass {
+    Loop *CurLoop;
+    const TargetData *TD;
+    DominatorTree *DT;
+    ScalarEvolution *SE;
+    TargetLibraryInfo *TLI;
+  public:
+    static char ID;
+    explicit LoopIdiomRecognize() : LoopPass(ID) {
+      initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                        SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+    bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+    
+    bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                                 unsigned StoreAlignment,
+                                 Value *SplatValue, Instruction *TheStore,
+                                 const SCEVAddRecExpr *Ev,
+                                 const SCEV *BECount);
+    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                                    const SCEVAddRecExpr *StoreEv,
+                                    const SCEVAddRecExpr *LoadEv,
+                                    const SCEV *BECount);
+      
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<TargetLibraryInfo>();
+    }
+  };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+                    false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// DeleteDeadInstruction - Delete this instruction.  Before we do, go through
+/// and zero out all the operands of this instruction.  If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+  SmallVector<Instruction*, 32> NowDeadInsts;
+  
+  NowDeadInsts.push_back(I);
+  
+  // Before we touch this instruction, remove it from SE!
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
+    
+    // This instruction is dead, zap it, in stages.  Start by removing it from
+    // SCEV.
+    SE.forgetValue(DeadInst);
+    
+    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+      Value *Op = DeadInst->getOperand(op);
+      DeadInst->setOperand(op, 0);
+      
+      // If this operand just became dead, add it to the NowDeadInsts list.
+      if (!Op->use_empty()) continue;
+      
+      if (Instruction *OpI = dyn_cast<Instruction>(Op))
+        if (isInstructionTriviallyDead(OpI))
+          NowDeadInsts.push_back(OpI);
+    }
+    
+    DeadInst->eraseFromParent();
+    
+  } while (!NowDeadInsts.empty());
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+  CurLoop = L;
+  
+  // The trip count of the loop must be analyzable.
+  SE = &getAnalysis<ScalarEvolution>();
+  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+    return false;
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BECount)) return false;
+  
+  // If this loop executes exactly one time, then it should be peeled, not
+  // optimized by this pass.
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    if (BECst->getValue()->getValue() == 0)
+      return false;
+  
+  // We require target data for now.
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (TD == 0) return false;
+
+  DT = &getAnalysis<DominatorTree>();
+  LoopInfo &LI = getAnalysis<LoopInfo>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+  
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  DEBUG(dbgs() << "loop-idiom Scanning: F["
+               << L->getHeader()->getParent()->getName()
+               << "] Loop %" << L->getHeader()->getName() << "\n");
+  
+  bool MadeChange = false;
+  // Scan all the blocks in the loop that are not in subloops.
+  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+       ++BI) {
+    // Ignore blocks in subloops.
+    if (LI.getLoopFor(*BI) != CurLoop)
+      continue;
+    
+    MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+  }
+  return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count.  This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                                     SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  // We can only promote stores in this block if they are unconditionally
+  // executed in the loop.  For a block to be unconditionally executed, it has
+  // to dominate all the exit blocks of the loop.  Verify this now.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (!DT->dominates(BB, ExitBlocks[i]))
+      return false;
+  
+  bool MadeChange = false;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    // Look for store instructions, which may be optimized to memset/memcpy.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopStore(SI, BECount)) continue;
+      MadeChange = true;
+      
+      // If processing the store invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
+    
+    // Look for memset instructions, which may be optimized to a larger memset.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopMemSet(MSI, BECount)) continue;
+      MadeChange = true;
+      
+      // If processing the memset invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
+  }
+  
+  return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+  if (SI->isVolatile()) return false;
+
+  Value *StoredVal = SI->getValueOperand();
+  Value *StorePtr = SI->getPointerOperand();
+  
+  // Reject stores that are so large that they overflow an unsigned.
+  uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+    return false;
+  
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *StoreEv =
+    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+    return false;
+
+  // Check to see if the stride matches the size of the store.  If so, then we
+  // know that every byte is touched in the loop.
+  unsigned StoreSize = (unsigned)SizeInBits >> 3; 
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+  
+  // TODO: Could also handle negative stride here someday, that will require the
+  // validity check in mayLoopAccessLocation to be updated though.
+  if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
+    return false;
+
+  // See if we can optimize just this store in isolation.
+  if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+                              StoredVal, SI, StoreEv, BECount))
+    return true;
+
+  // If the stored value is a strided load in the same loop with the same stride
+  // this this may be transformable into a memcpy.  This kicks in for stuff like
+  //   for (i) A[i] = B[i];
+  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+    const SCEVAddRecExpr *LoadEv =
+      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+        StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+        return true;
+  }
+  //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+  return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+  // We can only handle non-volatile memsets with a constant size.
+  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+  // If we're not allowed to hack on memset, we fail.
+  if (!TLI->has(LibFunc::memset))
+    return false;
+  
+  Value *Pointer = MSI->getDest();
+  
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+  if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+    return false;
+
+  // Reject memsets that are so large that they overflow an unsigned.
+  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+  if ((SizeInBytes >> 32) != 0)
+    return false;
+  
+  // Check to see if the stride matches the size of the memset.  If so, then we
+  // know that every byte is touched in the loop.
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+  
+  // TODO: Could also handle negative stride here someday, that will require the
+  // validity check in mayLoopAccessLocation to be updated though.
+  if (Stride == 0 || MSI->getLength() != Stride->getValue())
+    return false;
+  
+  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+                                 MSI->getAlignment(), MSI->getValue(),
+                                 MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access.  The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+                                  Loop *L, const SCEV *BECount,
+                                  unsigned StoreSize, AliasAnalysis &AA,
+                                  Instruction *IgnoredStore) {
+  // Get the location that may be stored across the loop.  Since the access is
+  // strided positively through memory, we say that the modified location starts
+  // at the pointer and has infinite size.
+  uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+  // If the loop iterates a fixed number of times, we can refine the access size
+  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+  
+  // TODO: For this to be really effective, we have to dive into the pointer
+  // operand in the store.  Store to &A[i] of 100 will always return may alias
+  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+  // which will then no-alias a store to &A[100].
+  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+       ++BI)
+    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+      if (&*I != IgnoredStore &&
+          (AA.getModRefInfo(I, StoreLoc) & Access))
+        return true;
+
+  return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in.  Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+  // If the value isn't a constant, we can't promote it to being in a constant
+  // array.  We could theoretically do a store to an alloca or something, but
+  // that doesn't seem worthwhile.
+  Constant *C = dyn_cast<Constant>(V);
+  if (C == 0) return 0;
+  
+  // Only handle simple values that are a power of two bytes in size.
+  uint64_t Size = TD.getTypeSizeInBits(V->getType());
+  if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+    return 0;
+  
+  // Don't care enough about darwin/ppc to implement this.
+  if (TD.isBigEndian())
+    return 0;
+
+  // Convert to size in bytes.
+  Size /= 8;
+
+  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+  // if the top and bottom are the same (e.g. for vectors and large integers).
+  if (Size > 16) return 0;
+  
+  // If the constant is exactly 16 bytes, just use it.
+  if (Size == 16) return C;
+
+  // Otherwise, we'll use an array of the constants.
+  unsigned ArraySize = 16/Size;
+  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value.  If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                        unsigned StoreAlignment, Value *StoredVal,
+                        Instruction *TheStore, const SCEVAddRecExpr *Ev,
+                        const SCEV *BECount) {
+  
+  // If the stored value is a byte-wise value (like i32 -1), then it may be
+  // turned into a memset of i8 -1, assuming that all the consecutive bytes
+  // are stored.  A store of i32 0x01020304 can never be turned into a memset,
+  // but it can be turned into memset_pattern if the target supports it.
+  Value *SplatValue = isBytewiseValue(StoredVal);
+  Constant *PatternValue = 0;
+  
+  // If we're allowed to form a memset, and the stored value would be acceptable
+  // for memset, use it.
+  if (SplatValue && TLI->has(LibFunc::memset) &&
+      // Verify that the stored value is loop invariant.  If not, we can't
+      // promote the memset.
+      CurLoop->isLoopInvariant(SplatValue)) {
+    // Keep and use SplatValue.
+    PatternValue = 0;
+  } else if (TLI->has(LibFunc::memset_pattern16) &&
+             (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+    // It looks like we can use PatternValue!
+    SplatValue = 0;
+  } else {
+    // Otherwise, this isn't an idiom we can transform.  For example, we can't
+    // do anything with a 3-byte store, for example.
+    return false;
+  }
+  
+  
+  // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
+  // this into a memset in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the aliased location.  Check for an alias.
+  if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
+                            CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
+    return false;
+  
+  // Okay, everything looks good, insert the memset.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  
+  IRBuilder<> Builder(Preheader->getTerminator());
+  
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  Just insert code for it in the preheader.
+  SCEVExpander Expander(*SE);
+  
+  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+  Value *BasePtr = 
+    Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+                           Preheader->getTerminator());
+  
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+  
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         true /*no unsigned overflow*/);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               true /*no unsigned overflow*/);
+  
+  Value *NumBytes = 
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+  
+  Value *NewCall;
+  if (SplatValue)
+    NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+  else {
+    Module *M = TheStore->getParent()->getParent()->getParent();
+    Value *MSP = M->getOrInsertFunction("memset_pattern16",
+                                        Builder.getVoidTy(),
+                                        Builder.getInt8PtrTy(), 
+                                        Builder.getInt8PtrTy(), IntPtr,
+                                        (void*)0);
+    
+    // Otherwise we should form a memset_pattern16.  PatternValue is known to be
+    // an constant array of 16-bytes.  Plop the value into a mergable global.
+    GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+                                            GlobalValue::InternalLinkage,
+                                            PatternValue, ".memset_pattern");
+    GV->setUnnamedAddr(true); // Ok to merge these.
+    GV->setAlignment(16);
+    Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+    NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+  }
+  
+  DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
+               << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
+  (void)NewCall;
+  
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  DeleteDeadInstruction(TheStore, *SE);
+  ++NumMemSet;
+  return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                           const SCEVAddRecExpr *StoreEv,
+                           const SCEVAddRecExpr *LoadEv,
+                           const SCEV *BECount) {
+  // If we're not allowed to form memcpy, we fail.
+  if (!TLI->has(LibFunc::memcpy))
+    return false;
+  
+  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+  
+  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
+  // this into a memcpy in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the stored location (including the load feeding the stores).
+  // Check for an alias.
+  if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+                            CurLoop, BECount, StoreSize,
+                            getAnalysis<AliasAnalysis>(), SI))
+    return false;
+
+  // For a memcpy, we have to make sure that the input array is not being
+  // mutated by the loop.
+  if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
+                            CurLoop, BECount, StoreSize,
+                            getAnalysis<AliasAnalysis>(), SI))
+    return false;
+  
+  // Okay, everything looks good, insert the memcpy.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  
+  IRBuilder<> Builder(Preheader->getTerminator());
+  
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  Just insert code for it in the preheader.
+  SCEVExpander Expander(*SE);
+
+  Value *LoadBasePtr = 
+    Expander.expandCodeFor(LoadEv->getStart(),
+                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+  Value *StoreBasePtr = 
+    Expander.expandCodeFor(StoreEv->getStart(),
+                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+  
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+  
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         true /*no unsigned overflow*/);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               true /*no unsigned overflow*/);
+  
+  Value *NumBytes =
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+  
+  Value *NewCall =
+    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+                         std::min(SI->getAlignment(), LI->getAlignment()));
+  
+  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
+               << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+               << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+  (void)NewCall;
+  
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  DeleteDeadInstruction(SI, *SE);
+  ++NumMemCpy;
+  return true;
+}
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
deleted file mode 100644
index a4336743a8f0..000000000000
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ /dev/null
@@ -1,1270 +0,0 @@
-//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Loop Index Splitting Pass. This pass handles three
-// kinds of loops.
-//
-// [1] A loop may be eliminated if the body is executed exactly once.
-//     For example,
-//
-// for (i = 0; i < N; ++i) {
-//   if (i == X) {
-//     body;
-//   }
-// }
-//
-// is transformed to
-//
-// i = X;
-// body;
-//
-// [2] A loop's iteration space may be shrunk if the loop body is executed
-//     for a proper sub-range of the loop's iteration space. For example,
-//
-// for (i = 0; i < N; ++i) {
-//   if (i > A && i < B) {
-//     ...
-//   }
-// }
-//
-// is transformed to iterators from A to B, if A > 0 and B < N.
-//
-// [3] A loop may be split if the loop body is dominated by a branch.
-//     For example,
-//
-// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
-//
-// is transformed into
-//
-// AEV = BSV = SV
-// for (i = LB; i < min(UB, AEV); ++i)
-//    A;
-// for (i = max(LB, BSV); i < UB; ++i);
-//    B;
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-index-split"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-STATISTIC(NumIndexSplit, "Number of loop index split");
-STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
-STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
-
-namespace {
-
-  class LoopIndexSplit : public LoopPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    LoopIndexSplit() : LoopPass(ID) {}
-
-    // Index split Loop L. Return true if loop is split.
-    bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<ScalarEvolution>();
-      AU.addRequiredID(LCSSAID);
-      AU.addPreservedID(LCSSAID);
-      AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();
-      AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
-    }
-
-  private:
-    /// processOneIterationLoop -- Eliminate loop if loop body is executed 
-    /// only once. For example,
-    /// for (i = 0; i < N; ++i) {
-    ///   if ( i == X) {
-    ///     ...
-    ///   }
-    /// }
-    ///
-    bool processOneIterationLoop();
-
-    // -- Routines used by updateLoopIterationSpace();
-
-    /// updateLoopIterationSpace -- Update loop's iteration space if loop 
-    /// body is executed for certain IV range only. For example,
-    /// 
-    /// for (i = 0; i < N; ++i) {
-    ///   if ( i > A && i < B) {
-    ///     ...
-    ///   }
-    /// }
-    /// is transformed to iterators from A to B, if A > 0 and B < N.
-    ///
-    bool updateLoopIterationSpace();
-
-    /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
-    /// with a loop invariant value. Update loop's lower and upper bound based on
-    /// the loop invariant value.
-    bool restrictLoopBound(ICmpInst &Op);
-
-    // --- Routines used by splitLoop(). --- /
-
-    bool splitLoop();
-
-    /// removeBlocks - Remove basic block DeadBB and all blocks dominated by 
-    /// DeadBB. This routine is used to remove split condition's dead branch, 
-    /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
-    void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
-    
-    /// moveExitCondition - Move exit condition EC into split condition block.
-    void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
-                           BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
-                           PHINode *IV, Instruction *IVAdd, Loop *LP,
-                           unsigned);
-    
-    /// updatePHINodes - CFG has been changed. 
-    /// Before 
-    ///   - ExitBB's single predecessor was Latch
-    ///   - Latch's second successor was Header
-    /// Now
-    ///   - ExitBB's single predecessor was Header
-    ///   - Latch's one and only successor was Header
-    ///
-    /// Update ExitBB PHINodes' to reflect this change.
-    void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, 
-                        BasicBlock *Header,
-                        PHINode *IV, Instruction *IVIncrement, Loop *LP);
-
-    // --- Utility routines --- /
-
-    /// cleanBlock - A block is considered clean if all non terminal 
-    /// instructions are either PHINodes or IV based values.
-    bool cleanBlock(BasicBlock *BB);
-
-    /// IVisLT - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is less than  the loop invariant then return the loop 
-    /// invariant. Otherwise return NULL.
-    Value * IVisLT(ICmpInst &Op);
-
-    /// IVisLE - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is less than or equal to the loop invariant then 
-    /// return the loop invariant. Otherwise return NULL.
-    Value * IVisLE(ICmpInst &Op);
-
-    /// IVisGT - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is greater than  the loop invariant then return the loop 
-    /// invariant. Otherwise return NULL.
-    Value * IVisGT(ICmpInst &Op);
-
-    /// IVisGE - If Op is comparing IV based value with an loop invariant and 
-    /// IV based value is greater than or equal to the loop invariant then 
-    /// return the loop invariant. Otherwise return NULL.
-    Value * IVisGE(ICmpInst &Op);
-
-  private:
-
-    // Current Loop information.
-    Loop *L;
-    LPPassManager *LPM;
-    LoopInfo *LI;
-    DominatorTree *DT;
-    DominanceFrontier *DF;
-
-    PHINode *IndVar;
-    ICmpInst *ExitCondition;
-    ICmpInst *SplitCondition;
-    Value *IVStartValue;
-    Value *IVExitValue;
-    Instruction *IVIncrement;
-    SmallPtrSet<Value *, 4> IVBasedValues;
-  };
-}
-
-char LoopIndexSplit::ID = 0;
-INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
-                "Index Split Loops", false, false);
-
-Pass *llvm::createLoopIndexSplitPass() {
-  return new LoopIndexSplit();
-}
-
-// Index split Loop L. Return true if loop is split.
-bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
-  L = IncomingLoop;
-  LPM = &LPM_Ref;
-
-  // If LoopSimplify form is not available, stay out of trouble.
-  if (!L->isLoopSimplifyForm())
-    return false;
-
-  // FIXME - Nested loops make dominator info updates tricky. 
-  if (!L->getSubLoops().empty())
-    return false;
-
-  DT = &getAnalysis<DominatorTree>();
-  LI = &getAnalysis<LoopInfo>();
-  DF = &getAnalysis<DominanceFrontier>();
-
-  // Initialize loop data.
-  IndVar = L->getCanonicalInductionVariable();
-  if (!IndVar) return false;
-
-  bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
-  IVStartValue = IndVar->getIncomingValue(!P1InLoop);
-  IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
-  if (!IVIncrement) return false;
-  
-  IVBasedValues.clear();
-  IVBasedValues.insert(IndVar);
-  IVBasedValues.insert(IVIncrement);
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) 
-    for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); 
-        BI != BE; ++BI) {
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI)) 
-        if (BO != IVIncrement 
-            && (BO->getOpcode() == Instruction::Add
-                || BO->getOpcode() == Instruction::Sub))
-          if (IVBasedValues.count(BO->getOperand(0))
-              && L->isLoopInvariant(BO->getOperand(1)))
-            IVBasedValues.insert(BO);
-    }
-
-  // Reject loop if loop exit condition is not suitable.
-  BasicBlock *ExitingBlock = L->getExitingBlock();
-  if (!ExitingBlock)
-    return false;
-  BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (!EBR) return false;
-  ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
-  if (!ExitCondition) return false;
-  if (ExitingBlock != L->getLoopLatch()) return false;
-  IVExitValue = ExitCondition->getOperand(1);
-  if (!L->isLoopInvariant(IVExitValue))
-    IVExitValue = ExitCondition->getOperand(0);
-  if (!L->isLoopInvariant(IVExitValue))
-    return false;
-  if (!IVBasedValues.count(
-        ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
-    return false;
-
-  // If start value is more then exit value where induction variable
-  // increments by 1 then we are potentially dealing with an infinite loop.
-  // Do not index split this loop.
-  if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
-    if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
-      if (SV->getSExtValue() > EV->getSExtValue())
-        return false;
-
-  if (processOneIterationLoop())
-    return true;
-
-  if (updateLoopIterationSpace())
-    return true;
-
-  if (splitLoop())
-    return true;
-
-  return false;
-}
-
-// --- Helper routines --- 
-// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
-static bool isUsedOutsideLoop(Value *V, Loop *L) {
-  for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (!L->contains(cast<Instruction>(*UI)))
-      return true;
-  return false;
-}
-
-// Return V+1
-static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, 
-                         LLVMContext &Context) {
-  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
-  return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
-}
-
-// Return V-1
-static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
-                          LLVMContext &Context) {
-  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
-  return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
-}
-
-// Return min(V1, V1)
-static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
- 
-  Value *C = new ICmpInst(InsertPt,
-                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp");
-  return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
-}
-
-// Return max(V1, V2)
-static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
- 
-  Value *C = new ICmpInst(InsertPt, 
-                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                          V1, V2, "lsp");
-  return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
-}
-
-/// processOneIterationLoop -- Eliminate loop if loop body is executed 
-/// only once. For example,
-/// for (i = 0; i < N; ++i) {
-///   if ( i == X) {
-///     ...
-///   }
-/// }
-///
-bool LoopIndexSplit::processOneIterationLoop() {
-  SplitCondition = NULL;
-  BasicBlock *Latch = L->getLoopLatch();
-  BasicBlock *Header = L->getHeader();
-  BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
-  if (!BR) return false;
-  if (!isa<BranchInst>(Latch->getTerminator())) return false;
-  if (BR->isUnconditional()) return false;
-  SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
-  if (!SplitCondition) return false;
-  if (SplitCondition == ExitCondition) return false;
-  if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
-  if (BR->getOperand(1) != Latch) return false;
-  if (!IVBasedValues.count(SplitCondition->getOperand(0))
-      && !IVBasedValues.count(SplitCondition->getOperand(1)))
-    return false;
-
-  // If IV is used outside the loop then this loop traversal is required.
-  // FIXME: Calculate and use last IV value. 
-  if (isUsedOutsideLoop(IVIncrement, L))
-    return false;
-
-  // If BR operands are not IV or not loop invariants then skip this loop.
-  Value *OPV = SplitCondition->getOperand(0);
-  Value *SplitValue = SplitCondition->getOperand(1);
-  if (!L->isLoopInvariant(SplitValue))
-    std::swap(OPV, SplitValue);
-  if (!L->isLoopInvariant(SplitValue))
-    return false;
-  Instruction *OPI = dyn_cast<Instruction>(OPV);
-  if (!OPI) 
-    return false;
-  if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
-    return false;
-  Value *StartValue = IVStartValue;
-  Value *ExitValue = IVExitValue;;
-
-  if (OPV != IndVar) {
-    // If BR operand is IV based then use this operand to calculate
-    // effective conditions for loop body.
-    BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
-    if (!BOPV) 
-      return false;
-    if (BOPV->getOpcode() != Instruction::Add) 
-      return false;
-    StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
-    ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
-  }
-
-  if (!cleanBlock(Header))
-    return false;
-
-  if (!cleanBlock(Latch))
-    return false;
-    
-  // If the merge point for BR is not loop latch then skip this loop.
-  if (BR->getSuccessor(0) != Latch) {
-    DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
-    assert (DF0 != DF->end() && "Unable to find dominance frontier");
-    if (!DF0->second.count(Latch))
-      return false;
-  }
-  
-  if (BR->getSuccessor(1) != Latch) {
-    DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
-    assert (DF1 != DF->end() && "Unable to find dominance frontier");
-    if (!DF1->second.count(Latch))
-      return false;
-  }
-    
-  // Now, Current loop L contains compare instruction
-  // that compares induction variable, IndVar, against loop invariant. And
-  // entire (i.e. meaningful) loop body is dominated by this compare
-  // instruction. In such case eliminate 
-  // loop structure surrounding this loop body. For example,
-  //     for (int i = start; i < end; ++i) {
-  //         if ( i == somevalue) {
-  //           loop_body
-  //         }
-  //     }
-  // can be transformed into
-  //     if (somevalue >= start && somevalue < end) {
-  //        i = somevalue;
-  //        loop_body
-  //     }
-
-  // Replace index variable with split value in loop body. Loop body is executed
-  // only when index variable is equal to split value.
-  IndVar->replaceAllUsesWith(SplitValue);
-
-  // Replace split condition in header.
-  // Transform 
-  //      SplitCondition : icmp eq i32 IndVar, SplitValue
-  // into
-  //      c1 = icmp uge i32 SplitValue, StartValue
-  //      c2 = icmp ult i32 SplitValue, ExitValue
-  //      and i32 c1, c2 
-  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ? 
-                                 ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
-                                 SplitValue, StartValue, "lisplit");
-
-  CmpInst::Predicate C2P  = ExitCondition->getPredicate();
-  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-  if (LatchBR->getOperand(1) != Header)
-    C2P = CmpInst::getInversePredicate(C2P);
-  Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
-  Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
-
-  SplitCondition->replaceAllUsesWith(NSplitCond);
-  SplitCondition->eraseFromParent();
-
-  // Remove Latch to Header edge.
-  BasicBlock *LatchSucc = NULL;
-  Header->removePredecessor(Latch);
-  for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
-       SI != E; ++SI) {
-    if (Header != *SI)
-      LatchSucc = *SI;
-  }
-
-  // Clean up latch block.
-  Value *LatchBRCond = LatchBR->getCondition();
-  LatchBR->setUnconditionalDest(LatchSucc);
-  RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
-  
-  LPM->deleteLoopFromQueue(L);
-
-  // Update Dominator Info.
-  // Only CFG change done is to remove Latch to Header edge. This
-  // does not change dominator tree because Latch did not dominate
-  // Header.
-  if (DF) {
-    DominanceFrontier::iterator HeaderDF = DF->find(Header);
-    if (HeaderDF != DF->end()) 
-      DF->removeFromFrontier(HeaderDF, Header);
-
-    DominanceFrontier::iterator LatchDF = DF->find(Latch);
-    if (LatchDF != DF->end()) 
-      DF->removeFromFrontier(LatchDF, Header);
-  }
-
-  ++NumIndexSplitRemoved;
-  return true;
-}
-
-/// restrictLoopBound - Op dominates loop body. Op compares an IV based value 
-/// with a loop invariant value. Update loop's lower and upper bound based on 
-/// the loop invariant value.
-bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
-  bool Sign = Op.isSigned();
-  Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
-  if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
-    BranchInst *EBR = 
-      cast<BranchInst>(ExitCondition->getParent()->getTerminator());
-    ExitCondition->setPredicate(ExitCondition->getInversePredicate());
-    BasicBlock *T = EBR->getSuccessor(0);
-    EBR->setSuccessor(0, EBR->getSuccessor(1));
-    EBR->setSuccessor(1, T);
-  }
-
-  LLVMContext &Context = Op.getContext();
-
-  // New upper and lower bounds.
-  Value *NLB = NULL;
-  Value *NUB = NULL;
-  if (Value *V = IVisLT(Op)) {
-    // Restrict upper bound.
-    if (IVisLE(*ExitCondition)) 
-      V = getMinusOne(V, Sign, PHTerm, Context);
-    NUB = getMin(V, IVExitValue, Sign, PHTerm);
-  } else if (Value *V = IVisLE(Op)) {
-    // Restrict upper bound.
-    if (IVisLT(*ExitCondition)) 
-      V = getPlusOne(V, Sign, PHTerm, Context);
-    NUB = getMin(V, IVExitValue, Sign, PHTerm);
-  } else if (Value *V = IVisGT(Op)) {
-    // Restrict lower bound.
-    V = getPlusOne(V, Sign, PHTerm, Context);
-    NLB = getMax(V, IVStartValue, Sign, PHTerm);
-  } else if (Value *V = IVisGE(Op))
-    // Restrict lower bound.
-    NLB = getMax(V, IVStartValue, Sign, PHTerm);
-
-  if (!NLB && !NUB) 
-    return false;
-
-  if (NLB) {
-    unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
-    IndVar->setIncomingValue(i, NLB);
-  }
-
-  if (NUB) {
-    unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
-    ExitCondition->setOperand(i, NUB);
-  }
-  return true;
-}
-
-/// updateLoopIterationSpace -- Update loop's iteration space if loop 
-/// body is executed for certain IV range only. For example,
-/// 
-/// for (i = 0; i < N; ++i) {
-///   if ( i > A && i < B) {
-///     ...
-///   }
-/// }
-/// is transformed to iterators from A to B, if A > 0 and B < N.
-///
-bool LoopIndexSplit::updateLoopIterationSpace() {
-  SplitCondition = NULL;
-  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
-      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
-    return false;
-  BasicBlock *Latch = L->getLoopLatch();
-  BasicBlock *Header = L->getHeader();
-  BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
-  if (!BR) return false;
-  if (!isa<BranchInst>(Latch->getTerminator())) return false;
-  if (BR->isUnconditional()) return false;
-  BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
-  if (!AND) return false;
-  if (AND->getOpcode() != Instruction::And) return false;
-  ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
-  ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
-  if (!Op0 || !Op1)
-    return false;
-  IVBasedValues.insert(AND);
-  IVBasedValues.insert(Op0);
-  IVBasedValues.insert(Op1);
-  if (!cleanBlock(Header)) return false;
-  BasicBlock *ExitingBlock = ExitCondition->getParent();
-  if (!cleanBlock(ExitingBlock)) return false;
-
-  // If the merge point for BR is not loop latch then skip this loop.
-  if (BR->getSuccessor(0) != Latch) {
-    DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
-    assert (DF0 != DF->end() && "Unable to find dominance frontier");
-    if (!DF0->second.count(Latch))
-      return false;
-  }
-  
-  if (BR->getSuccessor(1) != Latch) {
-    DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
-    assert (DF1 != DF->end() && "Unable to find dominance frontier");
-    if (!DF1->second.count(Latch))
-      return false;
-  }
-    
-  // Verify that loop exiting block has only two predecessor, where one pred
-  // is split condition block. The other predecessor will become exiting block's
-  // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
-  // more then two predecessors. This requires extra work in updating dominator
-  // information.
-  BasicBlock *ExitingBBPred = NULL;
-  for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
-       PI != PE; ++PI) {
-    BasicBlock *BB = *PI;
-    if (Header == BB)
-      continue;
-    if (ExitingBBPred)
-      return false;
-    else
-      ExitingBBPred = BB;
-  }
-
-  if (!restrictLoopBound(*Op0))
-    return false;
-
-  if (!restrictLoopBound(*Op1))
-    return false;
-
-  // Update CFG.
-  if (BR->getSuccessor(0) == ExitingBlock)
-    BR->setUnconditionalDest(BR->getSuccessor(1));
-  else
-    BR->setUnconditionalDest(BR->getSuccessor(0));
-
-  AND->eraseFromParent();
-  if (Op0->use_empty())
-    Op0->eraseFromParent();
-  if (Op1->use_empty())
-    Op1->eraseFromParent();
-
-  // Update domiantor info. Now, ExitingBlock has only one predecessor, 
-  // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
-  DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
-
-  BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
-  if (L->contains(ExitBlock))
-    ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
-
-  // If ExitingBlock is a member of the loop basic blocks' DF list then
-  // replace ExitingBlock with header and exit block in the DF list
-  DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-    if (BB == Header || BB == ExitingBlock)
-      continue;
-    DominanceFrontier::iterator BBDF = DF->find(BB);
-    DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
-    DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
-    while (DomSetI != DomSetE) {
-      DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
-      ++DomSetI;
-      BasicBlock *DFBB = *CurrentItr;
-      if (DFBB == ExitingBlock) {
-        BBDF->second.erase(DFBB);
-        for (DominanceFrontier::DomSetType::iterator 
-               EBI = ExitingBlockDF->second.begin(),
-               EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI) 
-          BBDF->second.insert(*EBI);
-      }
-    }
-  }
-  ++NumRestrictBounds;
-  return true;
-}
-
-/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
-/// This routine is used to remove split condition's dead branch, dominated by
-/// DeadBB. LiveBB dominates split conidition's other branch.
-void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, 
-                                  BasicBlock *LiveBB) {
-
-  // First update DeadBB's dominance frontier. 
-  SmallVector<BasicBlock *, 8> FrontierBBs;
-  DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
-  if (DeadBBDF != DF->end()) {
-    SmallVector<BasicBlock *, 8> PredBlocks;
-    
-    DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
-    for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
-           DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI) 
-      {
-      BasicBlock *FrontierBB = *DeadBBSetI;
-      FrontierBBs.push_back(FrontierBB);
-
-      // Rremove any PHI incoming edge from blocks dominated by DeadBB.
-      PredBlocks.clear();
-      for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
-          PI != PE; ++PI) {
-        BasicBlock *P = *PI;
-        if (DT->dominates(DeadBB, P))
-          PredBlocks.push_back(P);
-      }
-
-      for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
-          FBI != FBE; ++FBI) {
-        if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
-          for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
-                PE = PredBlocks.end(); PI != PE; ++PI) {
-            BasicBlock *P = *PI;
-            PN->removeIncomingValue(P);
-          }
-        }
-        else
-          break;
-      }      
-    }
-  }
-  
-  // Now remove DeadBB and all nodes dominated by DeadBB in df order.
-  SmallVector<BasicBlock *, 32> WorkList;
-  DomTreeNode *DN = DT->getNode(DeadBB);
-  for (df_iterator<DomTreeNode*> DI = df_begin(DN),
-         E = df_end(DN); DI != E; ++DI) {
-    BasicBlock *BB = DI->getBlock();
-    WorkList.push_back(BB);
-    BB->replaceAllUsesWith(UndefValue::get(
-                                       Type::getLabelTy(DeadBB->getContext())));
-  }
-
-  while (!WorkList.empty()) {
-    BasicBlock *BB = WorkList.pop_back_val();
-    LPM->deleteSimpleAnalysisValue(BB, LP);
-    for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); 
-        BBI != BBE; ) {
-      Instruction *I = BBI;
-      ++BBI;
-      I->replaceAllUsesWith(UndefValue::get(I->getType()));
-      LPM->deleteSimpleAnalysisValue(I, LP);
-      I->eraseFromParent();
-    }
-    DT->eraseNode(BB);
-    DF->removeBlock(BB);
-    LI->removeBlock(BB);
-    BB->eraseFromParent();
-  }
-
-  // Update Frontier BBs' dominator info.
-  while (!FrontierBBs.empty()) {
-    BasicBlock *FBB = FrontierBBs.pop_back_val();
-    BasicBlock *NewDominator = FBB->getSinglePredecessor();
-    if (!NewDominator) {
-      pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
-      NewDominator = *PI;
-      ++PI;
-      if (NewDominator != LiveBB) {
-        for(; PI != PE; ++PI) {
-          BasicBlock *P = *PI;
-          if (P == LiveBB) {
-            NewDominator = LiveBB;
-            break;
-          }
-          NewDominator = DT->findNearestCommonDominator(NewDominator, P);
-        }
-      }
-    }
-    assert (NewDominator && "Unable to fix dominator info.");
-    DT->changeImmediateDominator(FBB, NewDominator);
-    DF->changeImmediateDominator(FBB, NewDominator, DT);
-  }
-
-}
-
-// moveExitCondition - Move exit condition EC into split condition block CondBB.
-void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
-                                       BasicBlock *ExitBB, ICmpInst *EC, 
-                                       ICmpInst *SC, PHINode *IV, 
-                                       Instruction *IVAdd, Loop *LP,
-                                       unsigned ExitValueNum) {
-
-  BasicBlock *ExitingBB = EC->getParent();
-  Instruction *CurrentBR = CondBB->getTerminator();
-
-  // Move exit condition into split condition block.
-  EC->moveBefore(CurrentBR);
-  EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
-
-  // Move exiting block's branch into split condition block. Update its branch
-  // destination.
-  BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
-  ExitingBR->moveBefore(CurrentBR);
-  BasicBlock *OrigDestBB = NULL;
-  if (ExitingBR->getSuccessor(0) == ExitBB) {
-    OrigDestBB = ExitingBR->getSuccessor(1);
-    ExitingBR->setSuccessor(1, ActiveBB);
-  }
-  else {
-    OrigDestBB = ExitingBR->getSuccessor(0);
-    ExitingBR->setSuccessor(0, ActiveBB);
-  }
-    
-  // Remove split condition and current split condition branch.
-  SC->eraseFromParent();
-  CurrentBR->eraseFromParent();
-
-  // Connect exiting block to original destination.
-  BranchInst::Create(OrigDestBB, ExitingBB);
-
-  // Update PHINodes
-  updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
-
-  // Fix dominator info.
-  // ExitBB is now dominated by CondBB
-  DT->changeImmediateDominator(ExitBB, CondBB);
-  DF->changeImmediateDominator(ExitBB, CondBB, DT);
-
-  // Blocks outside the loop may have been in the dominance frontier of blocks
-  // inside the condition; this is now impossible because the blocks inside the
-  // condition no loger dominate the exit.  Remove the relevant blocks from
-  // the dominance frontiers.
-  for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
-       I != E; ++I) {
-    if (!DT->properlyDominates(CondBB, *I)) continue;
-    DominanceFrontier::iterator BBDF = DF->find(*I);
-    DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
-    DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
-    while (DomSetI != DomSetE) {
-      DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
-      ++DomSetI;
-      BasicBlock *DFBB = *CurrentItr;
-      if (!LP->contains(DFBB))
-        BBDF->second.erase(DFBB);
-    }
-  }
-}
-
-/// updatePHINodes - CFG has been changed. 
-/// Before 
-///   - ExitBB's single predecessor was Latch
-///   - Latch's second successor was Header
-/// Now
-///   - ExitBB's single predecessor is Header
-///   - Latch's one and only successor is Header
-///
-/// Update ExitBB PHINodes' to reflect this change.
-void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, 
-                                    BasicBlock *Header,
-                                    PHINode *IV, Instruction *IVIncrement,
-                                    Loop *LP) {
-
-  for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end(); 
-       BI != BE; ) {
-    PHINode *PN = dyn_cast<PHINode>(BI);
-    ++BI;
-    if (!PN)
-      break;
-
-    Value *V = PN->getIncomingValueForBlock(Latch);
-    if (PHINode *PHV = dyn_cast<PHINode>(V)) {
-      // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
-      // in Header which is new incoming value for PN.
-      Value *NewV = NULL;
-      for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); 
-           UI != E; ++UI) 
-        if (PHINode *U = dyn_cast<PHINode>(*UI)) 
-          if (LP->contains(U)) {
-            NewV = U;
-            break;
-          }
-
-      // Add incoming value from header only if PN has any use inside the loop.
-      if (NewV)
-        PN->addIncoming(NewV, Header);
-
-    } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
-      // If this instruction is IVIncrement then IV is new incoming value 
-      // from header otherwise this instruction must be incoming value from 
-      // header because loop is in LCSSA form.
-      if (PHI == IVIncrement)
-        PN->addIncoming(IV, Header);
-      else
-        PN->addIncoming(V, Header);
-    } else
-      // Otherwise this is an incoming value from header because loop is in 
-      // LCSSA form.
-      PN->addIncoming(V, Header);
-    
-    // Remove incoming value from Latch.
-    PN->removeIncomingValue(Latch);
-  }
-}
-
-bool LoopIndexSplit::splitLoop() {
-  SplitCondition = NULL;
-  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
-      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
-    return false;
-  BasicBlock *Header = L->getHeader();
-  BasicBlock *Latch = L->getLoopLatch();
-  BranchInst *SBR = NULL; // Split Condition Branch
-  BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
-  // If Exiting block includes loop variant instructions then this
-  // loop may not be split safely.
-  BasicBlock *ExitingBlock = ExitCondition->getParent();
-  if (!cleanBlock(ExitingBlock)) return false;
-
-  LLVMContext &Context = Header->getContext();
-
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
-    if (!BR || BR->isUnconditional()) continue;
-    ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
-    if (!CI || CI == ExitCondition 
-        || CI->getPredicate() == ICmpInst::ICMP_NE
-        || CI->getPredicate() == ICmpInst::ICMP_EQ)
-      continue;
-
-    // Unable to handle triangle loops at the moment.
-    // In triangle loop, split condition is in header and one of the
-    // the split destination is loop latch. If split condition is EQ
-    // then such loops are already handle in processOneIterationLoop().
-    if (Header == (*I)
-        && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
-      continue;
-
-    // If the block does not dominate the latch then this is not a diamond.
-    // Such loop may not benefit from index split.
-    if (!DT->dominates((*I), Latch))
-      continue;
-
-    // If split condition branches heads do not have single predecessor, 
-    // SplitCondBlock, then is not possible to remove inactive branch.
-    if (!BR->getSuccessor(0)->getSinglePredecessor() 
-        || !BR->getSuccessor(1)->getSinglePredecessor())
-      return false;
-
-    // If the merge point for BR is not loop latch then skip this condition.
-    if (BR->getSuccessor(0) != Latch) {
-      DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
-      assert (DF0 != DF->end() && "Unable to find dominance frontier");
-      if (!DF0->second.count(Latch))
-        continue;
-    }
-    
-    if (BR->getSuccessor(1) != Latch) {
-      DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
-      assert (DF1 != DF->end() && "Unable to find dominance frontier");
-      if (!DF1->second.count(Latch))
-        continue;
-    }
-    SplitCondition = CI;
-    SBR = BR;
-    break;
-  }
-   
-  if (!SplitCondition)
-    return false;
-
-  // If the predicate sign does not match then skip.
-  if (ExitCondition->isSigned() != SplitCondition->isSigned())
-    return false;
-
-  unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
-  unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
-  Value *SplitValue = SplitCondition->getOperand(SVOpNum);
-  if (!L->isLoopInvariant(SplitValue))
-    return false;
-  if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
-    return false;
-
-  // Check for side effects.
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-
-    assert(DT->dominates(Header, BB));
-    if (DT->properlyDominates(SplitCondition->getParent(), BB))
-      continue;
-
-    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
-         BI != BE; ++BI) {
-      Instruction *Inst = BI;
-
-      if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
-          && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
-        return false;
-    }
-  }
-
-  // Normalize loop conditions so that it is easier to calculate new loop
-  // bounds.
-  if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
-    ExitCondition->setPredicate(ExitCondition->getInversePredicate());
-    BasicBlock *T = EBR->getSuccessor(0);
-    EBR->setSuccessor(0, EBR->getSuccessor(1));
-    EBR->setSuccessor(1, T);
-  }
-
-  if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
-    SplitCondition->setPredicate(SplitCondition->getInversePredicate());
-    BasicBlock *T = SBR->getSuccessor(0);
-    SBR->setSuccessor(0, SBR->getSuccessor(1));
-    SBR->setSuccessor(1, T);
-  }
-
-  //[*] Calculate new loop bounds.
-  Value *AEV = SplitValue;
-  Value *BSV = SplitValue;
-  bool Sign = SplitCondition->isSigned();
-  Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
-  if (IVisLT(*ExitCondition)) {
-    if (IVisLT(*SplitCondition)) {
-      /* Do nothing */
-    }
-    else if (IVisLE(*SplitCondition)) {
-      AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
-      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
-    } else {
-      assert (0 && "Unexpected split condition!");
-    }
-  }
-  else if (IVisLE(*ExitCondition)) {
-    if (IVisLT(*SplitCondition)) {
-      AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
-    }
-    else if (IVisLE(*SplitCondition)) {
-      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
-    } else {
-      assert (0 && "Unexpected split condition!");
-    }
-  } else {
-    assert (0 && "Unexpected exit condition!");
-  }
-  AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
-  BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
-
-  // [*] Clone Loop
-  ValueMap<const Value *, Value *> VMap;
-  Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
-  Loop *ALoop = L;
-
-  // [*] ALoop's exiting edge enters BLoop's header.
-  //    ALoop's original exit block becomes BLoop's exit block.
-  PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
-  BasicBlock *A_ExitingBlock = ExitCondition->getParent();
-  BranchInst *A_ExitInsn =
-    dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
-  assert (A_ExitInsn && "Unable to find suitable loop exit branch");
-  BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
-  BasicBlock *B_Header = BLoop->getHeader();
-  if (ALoop->contains(B_ExitBlock)) {
-    B_ExitBlock = A_ExitInsn->getSuccessor(0);
-    A_ExitInsn->setSuccessor(0, B_Header);
-  } else
-    A_ExitInsn->setSuccessor(1, B_Header);
-
-  // [*] Update ALoop's exit value using new exit value.
-  ExitCondition->setOperand(EVOpNum, AEV);
-
-  // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
-  //     original loop's preheader. Add incoming PHINode values from
-  //     ALoop's exiting block. Update BLoop header's domiantor info.
-
-  // Collect inverse map of Header PHINodes.
-  DenseMap<Value *, Value *> InverseMap;
-  for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), 
-         BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      PHINode *PNClone = cast<PHINode>(VMap[PN]);
-      InverseMap[PNClone] = PN;
-    } else
-      break;
-  }
-
-  BasicBlock *A_Preheader = ALoop->getLoopPreheader();
-  for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
-       BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      // Remove incoming value from original preheader.
-      PN->removeIncomingValue(A_Preheader);
-
-      // Add incoming value from A_ExitingBlock.
-      if (PN == B_IndVar)
-        PN->addIncoming(BSV, A_ExitingBlock);
-      else { 
-        PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
-        Value *V2 = NULL;
-        // If loop header is also loop exiting block then
-        // OrigPN is incoming value for B loop header.
-        if (A_ExitingBlock == ALoop->getHeader())
-          V2 = OrigPN;
-        else
-          V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
-        PN->addIncoming(V2, A_ExitingBlock);
-      }
-    } else
-      break;
-  }
-
-  DT->changeImmediateDominator(B_Header, A_ExitingBlock);
-  DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
-  
-  // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
-  //     block. Remove incoming PHINode values from ALoop's exiting block.
-  //     Add new incoming values from BLoop's incoming exiting value.
-  //     Update BLoop exit block's dominator info..
-  BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
-  for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
-       BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], 
-                                                            B_ExitingBlock);
-      PN->removeIncomingValue(A_ExitingBlock);
-    } else
-      break;
-  }
-
-  DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
-  DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
-
-  //[*] Split ALoop's exit edge. This creates a new block which
-  //    serves two purposes. First one is to hold PHINode defnitions
-  //    to ensure that ALoop's LCSSA form. Second use it to act
-  //    as a preheader for BLoop.
-  BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
-
-  //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
-  //    in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
-  for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
-      BI != BE; ++BI) {
-    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
-      Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
-      PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
-      newPHI->addIncoming(V1, A_ExitingBlock);
-      A_ExitBlock->getInstList().push_front(newPHI);
-      PN->removeIncomingValue(A_ExitBlock);
-      PN->addIncoming(newPHI, A_ExitBlock);
-    } else
-      break;
-  }
-
-  //[*] Eliminate split condition's inactive branch from ALoop.
-  BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
-  BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
-  BasicBlock *A_InactiveBranch = NULL;
-  BasicBlock *A_ActiveBranch = NULL;
-  A_ActiveBranch = A_BR->getSuccessor(0);
-  A_InactiveBranch = A_BR->getSuccessor(1);
-  A_BR->setUnconditionalDest(A_ActiveBranch);
-  removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
-
-  //[*] Eliminate split condition's inactive branch in from BLoop.
-  BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
-  BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
-  BasicBlock *B_InactiveBranch = NULL;
-  BasicBlock *B_ActiveBranch = NULL;
-  B_ActiveBranch = B_BR->getSuccessor(1);
-  B_InactiveBranch = B_BR->getSuccessor(0);
-  B_BR->setUnconditionalDest(B_ActiveBranch);
-  removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
-
-  BasicBlock *A_Header = ALoop->getHeader();
-  if (A_ExitingBlock == A_Header)
-    return true;
-
-  //[*] Move exit condition into split condition block to avoid
-  //    executing dead loop iteration.
-  ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
-  Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
-  ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
-
-  moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
-                    cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, 
-                    ALoop, EVOpNum);
-
-  moveExitCondition(B_SplitCondBlock, B_ActiveBranch, 
-                    B_ExitBlock, B_ExitCondition,
-                    B_SplitCondition, B_IndVar, B_IndVarIncrement, 
-                    BLoop, EVOpNum);
-
-  ++NumIndexSplit;
-  return true;
-}
-
-/// cleanBlock - A block is considered clean if all non terminal instructions 
-/// are either, PHINodes, IV based.
-bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
-  Instruction *Terminator = BB->getTerminator();
-  for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 
-      BI != BE; ++BI) {
-    Instruction *I = BI;
-
-    if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
-        || I == SplitCondition || IVBasedValues.count(I) 
-        || isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    if (I->mayHaveSideEffects())
-      return false;
-
-    // I is used only inside this block then it is OK.
-    bool usedOutsideBB = false;
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 
-         UI != UE; ++UI) {
-      Instruction *U = cast<Instruction>(*UI);
-      if (U->getParent() != BB)
-        usedOutsideBB = true;
-    }
-    if (!usedOutsideBB)
-      continue;
-
-    // Otherwise we have a instruction that may not allow loop spliting.
-    return false;
-  }
-  return true;
-}
-
-/// IVisLT - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is less than  the loop invariant then return the loop 
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) 
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
-/// IVisLE - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is less than or equal to the loop invariant then 
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
-/// IVisGT - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is greater than  the loop invariant then return the loop 
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT) 
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
-/// IVisGE - If Op is comparing IV based value with an loop invariant and 
-/// IV based value is greater than or equal to the loop invariant then 
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
-  ICmpInst::Predicate P = Op.getPredicate();
-  if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
-      && IVBasedValues.count(Op.getOperand(0)) 
-      && L->isLoopInvariant(Op.getOperand(1)))
-    return Op.getOperand(1);
-
-  if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE) 
-      && IVBasedValues.count(Op.getOperand(1)) 
-      && L->isLoopInvariant(Op.getOperand(0)))
-    return Op.getOperand(0);
-
-  return NULL;
-}
-
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 000000000000..af25c5c1a661
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+  class LoopInstSimplify : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopInstSimplify() : LoopPass(ID) {
+      initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop*, LPPassManager&);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved("scalar-evolution");
+    }
+  };
+}
+  
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+                "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+                "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+  return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = &getAnalysis<LoopInfo>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+  array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+  SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+  // The bit we are stealing from the pointer represents whether this basic
+  // block is the header of a subloop, in which case we only process its phis.
+  typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+  SmallVector<WorklistItem, 16> VisitStack;
+  SmallPtrSet<BasicBlock*, 32> Visited;
+
+  bool Changed = false;
+  bool LocalChanged;
+  do {
+    LocalChanged = false;
+
+    VisitStack.clear();
+    Visited.clear();
+
+    VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+    while (!VisitStack.empty()) {
+      WorklistItem Item = VisitStack.pop_back_val();
+      BasicBlock *BB = Item.getPointer();
+      bool IsSubloopHeader = Item.getInt();
+
+      // Simplify instructions in the current basic block.
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+        Instruction *I = BI++;
+
+        // The first time through the loop ToSimplify is empty and we try to
+        // simplify all instructions. On later iterations ToSimplify is not
+        // empty and we only bother simplifying instructions that are in it.
+        if (!ToSimplify->empty() && !ToSimplify->count(I))
+          continue;
+
+        // Don't bother simplifying unused instructions.
+        if (!I->use_empty()) {
+          Value *V = SimplifyInstruction(I, TD, DT);
+          if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+            // Mark all uses for resimplification next time round the loop.
+            for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                 UI != UE; ++UI)
+              Next->insert(cast<Instruction>(*UI));
+
+            I->replaceAllUsesWith(V);
+            LocalChanged = true;
+            ++NumSimplified;
+          }
+        }
+        LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+        if (IsSubloopHeader && !isa<PHINode>(I))
+          break;
+      }
+
+      // Add all successors to the worklist, except for loop exit blocks and the
+      // bodies of subloops. We visit the headers of loops so that we can process
+      // their phis, but we contract the rest of the subloop body and only follow
+      // edges leading back to the original loop.
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        BasicBlock *SuccBB = *SI;
+        if (!Visited.insert(SuccBB))
+          continue;
+
+        const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+        if (SuccLoop && SuccLoop->getHeader() == SuccBB
+                     && L->contains(SuccLoop)) {
+          VisitStack.push_back(WorklistItem(SuccBB, true));
+
+          SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+          SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+          for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+            BasicBlock *ExitBB = SubLoopExitBlocks[i];
+            if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+              VisitStack.push_back(WorklistItem(ExitBB, false));
+          }
+
+          continue;
+        }
+
+        bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+                                              ExitBlocks.end(), SuccBB);
+        if (IsExitBlock)
+          continue;
+
+        VisitStack.push_back(WorklistItem(SuccBB, false));
+      }
+    }
+
+    // Place the list of instructions to simplify on the next loop iteration
+    // into ToSimplify.
+    std::swap(ToSimplify, Next);
+    Next->clear();
+
+    Changed |= LocalChanged;
+  } while (LocalChanged);
+
+  return Changed;
+}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 65acc1d9257a..95e15784df2c 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,16 +15,16 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 #define MAX_HEADER_SIZE 16
@@ -35,16 +35,13 @@ namespace {
   class LoopRotate : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopRotate() : LoopPass(ID) {}
-
-    // Rotate Loop L as many times as possible. Return true if
-    // loop is rotated at least once.
-    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    LoopRotate() : LoopPass(ID) {
+      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+    }
 
     // LCSSA form makes instruction renaming easier.
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
       AU.addRequired<LoopInfo>();
       AU.addPreserved<LoopInfo>();
       AU.addRequiredID(LoopSimplifyID);
@@ -54,79 +51,119 @@ namespace {
       AU.addPreserved<ScalarEvolution>();
     }
 
-    // Helper functions
-
-    /// Do actual work
-    bool rotateLoop(Loop *L, LPPassManager &LPM);
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool rotateLoop(Loop *L);
     
-    /// Initialize local data
-    void initialize();
-
-    /// After loop rotation, loop pre-header has multiple sucessors.
-    /// Insert one forwarding basic block to ensure that loop pre-header
-    /// has only one successor.
-    void preserveCanonicalLoopForm(LPPassManager &LPM);
-
   private:
-    Loop *L;
-    BasicBlock *OrigHeader;
-    BasicBlock *OrigPreHeader;
-    BasicBlock *OrigLatch;
-    BasicBlock *NewHeader;
-    BasicBlock *Exit;
-    LPPassManager *LPM_Ptr;
+    LoopInfo *LI;
   };
 }
   
 char LoopRotate::ID = 0;
-INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
 
 Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 
 /// Rotate Loop L as many times as possible. Return true if
 /// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
-
-  bool RotatedOneLoop = false;
-  initialize();
-  LPM_Ptr = &LPM;
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+  LI = &getAnalysis<LoopInfo>();
 
   // One loop can be rotated multiple times.
-  while (rotateLoop(Lp,LPM)) {
-    RotatedOneLoop = true;
-    initialize();
-  }
+  bool MadeChange = false;
+  while (rotateLoop(L))
+    MadeChange = true;
 
-  return RotatedOneLoop;
+  return MadeChange;
 }
 
-/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
-  L = Lp;
-
-  OrigPreHeader = L->getLoopPreheader();
-  if (!OrigPreHeader) return false;
-
-  OrigLatch = L->getLoopLatch();
-  if (!OrigLatch) return false;
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader.  If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values.  Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+                                            BasicBlock *OrigPreheader,
+                                            ValueToValueMapTy &ValueMap) {
+  // Remove PHI node entries that are no longer live.
+  BasicBlock::iterator I, E = OrigHeader->end();
+  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+    
+  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+  // as necessary.
+  SSAUpdater SSA;
+  for (I = OrigHeader->begin(); I != E; ++I) {
+    Value *OrigHeaderVal = I;
+    
+    // If there are no uses of the value (e.g. because it returns void), there
+    // is nothing to rewrite.
+    if (OrigHeaderVal->use_empty())
+      continue;
+    
+    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
 
-  OrigHeader =  L->getHeader();
+    // The value now exits in two versions: the initial value in the preheader
+    // and the loop "next" value in the original header.
+    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+    
+    // Visit each use of the OrigHeader instruction.
+    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+         UE = OrigHeaderVal->use_end(); UI != UE; ) {
+      // Grab the use before incrementing the iterator.
+      Use &U = UI.getUse();
+      
+      // Increment the iterator before removing the use from the list.
+      ++UI;
+      
+      // SSAUpdater can't handle a non-PHI use in the same block as an
+      // earlier def. We can easily handle those cases manually.
+      Instruction *UserInst = cast<Instruction>(U.getUser());
+      if (!isa<PHINode>(UserInst)) {
+        BasicBlock *UserBB = UserInst->getParent();
+        
+        // The original users in the OrigHeader are already using the
+        // original definitions.
+        if (UserBB == OrigHeader)
+          continue;
+        
+        // Users in the OrigPreHeader need to use the value to which the
+        // original definitions are mapped.
+        if (UserBB == OrigPreheader) {
+          U = OrigPreHeaderVal;
+          continue;
+        }
+      }
+      
+      // Anything else can be handled by SSAUpdater.
+      SSA.RewriteUse(U);
+    }
+  }
+}  
 
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
   // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
-
+  
+  BasicBlock *OrigHeader = L->getHeader();
+  
+  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+  if (BI == 0 || BI->isUnconditional())
+    return false;
+  
   // If the loop header is not one of the loop exiting blocks then
   // either this loop is already rotated or it is not
   // suitable for loop rotation transformations.
   if (!L->isLoopExiting(OrigHeader))
     return false;
 
-  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
-  if (!BI)
-    return false;
-  assert(BI->isConditional() && "Branch Instruction is not conditional");
-
   // Updating PHInodes in loops with multiple exits adds complexity. 
   // Keep it simple, and restrict loop rotation to loops with one exit only.
   // In future, lift this restriction and support for multiple exits if
@@ -136,24 +173,18 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   if (ExitBlocks.size() > 1)
     return false;
 
-  // Check size of original header and reject
-  // loop if it is very big.
-  unsigned Size = 0;
-  
-  // FIXME: Use common api to estimate size.
-  for (BasicBlock::const_iterator OI = OrigHeader->begin(), 
-         OE = OrigHeader->end(); OI != OE; ++OI) {
-      if (isa<PHINode>(OI)) 
-        continue;           // PHI nodes don't count.
-      if (isa<DbgInfoIntrinsic>(OI))
-        continue;  // Debug intrinsics don't count as size.
-      ++Size;
+  // Check size of original header and reject loop if it is very big.
+  {
+    CodeMetrics Metrics;
+    Metrics.analyzeBasicBlock(OrigHeader);
+    if (Metrics.NumInsts > MAX_HEADER_SIZE)
+      return false;
   }
 
-  if (Size > MAX_HEADER_SIZE)
-    return false;
-
   // Now, this loop is suitable for rotation.
+  BasicBlock *OrigPreheader = L->getLoopPreheader();
+  BasicBlock *OrigLatch = L->getLoopLatch();
+  assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
 
   // Anything ScalarEvolution may know about this loop or the PHI nodes
   // in its header will soon be invalidated.
@@ -163,8 +194,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   // Find new Loop header. NewHeader is a Header's one and only successor
   // that is inside loop.  Header's other successor is outside the
   // loop.  Otherwise loop is not suitable for rotation.
-  Exit = BI->getSuccessor(0);
-  NewHeader = BI->getSuccessor(1);
+  BasicBlock *Exit = BI->getSuccessor(0);
+  BasicBlock *NewHeader = BI->getSuccessor(1);
   if (L->contains(Exit))
     std::swap(Exit, NewHeader);
   assert(NewHeader && "Unable to determine new loop header");
@@ -180,20 +211,54 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   // Begin by walking OrigHeader and populating ValueMap with an entry for
   // each Instruction.
   BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
-  DenseMap<const Value *, Value *> ValueMap;
+  ValueToValueMapTy ValueMap;
 
   // For PHI nodes, the value available in OldPreHeader is just the
   // incoming value from OldPreHeader.
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
 
-  // For the rest of the instructions, create a clone in the OldPreHeader.
-  TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
-  for (; I != E; ++I) {
-    Instruction *C = I->clone();
-    C->setName(I->getName());
-    C->insertBefore(LoopEntryBranch);
-    ValueMap[I] = C;
+  // For the rest of the instructions, either hoist to the OrigPreheader if
+  // possible or create a clone in the OldPreHeader if not.
+  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+  while (I != E) {
+    Instruction *Inst = I++;
+    
+    // If the instruction's operands are invariant and it doesn't read or write
+    // memory, then it is safe to hoist.  Doing this doesn't change the order of
+    // execution in the preheader, but does prevent the instruction from
+    // executing in each iteration of the loop.  This means it is safe to hoist
+    // something that might trap, but isn't safe to hoist something that reads
+    // memory (without proving that the loop doesn't write).
+    if (L->hasLoopInvariantOperands(Inst) &&
+        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+      Inst->moveBefore(LoopEntryBranch);
+      continue;
+    }
+    
+    // Otherwise, create a duplicate of the instruction.
+    Instruction *C = Inst->clone();
+    
+    // Eagerly remap the operands of the instruction.
+    RemapInstruction(C, ValueMap,
+                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+    
+    // With the operands remapped, see if the instruction constant folds or is
+    // otherwise simplifyable.  This commonly occurs because the entry from PHI
+    // nodes allows icmps and other instructions to fold.
+    Value *V = SimplifyInstruction(C);
+    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+      // If so, then delete the temporary instruction and stick the folded value
+      // in the map.
+      delete C;
+      ValueMap[Inst] = V;
+    } else {
+      // Otherwise, stick the new instruction into the new block!
+      C->setName(Inst->getName());
+      C->insertBefore(LoopEntryBranch);
+      ValueMap[Inst] = C;
+    }
   }
 
   // Along with all the other instructions, we just cloned OrigHeader's
@@ -203,221 +268,81 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
     for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
          PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
-      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
+      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
 
   // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
   // OrigPreHeader's old terminator (the original branch into the loop), and
   // remove the corresponding incoming values from the PHI nodes in OrigHeader.
   LoopEntryBranch->eraseFromParent();
-  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
 
-  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
-  // as necessary.
-  SSAUpdater SSA;
-  for (I = OrigHeader->begin(); I != E; ++I) {
-    Value *OrigHeaderVal = I;
-    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
-
-    // The value now exits in two versions: the initial value in the preheader
-    // and the loop "next" value in the original header.
-    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
-    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
-    SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
-
-    // Visit each use of the OrigHeader instruction.
-    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
-         UE = OrigHeaderVal->use_end(); UI != UE; ) {
-      // Grab the use before incrementing the iterator.
-      Use &U = UI.getUse();
-
-      // Increment the iterator before removing the use from the list.
-      ++UI;
-
-      // SSAUpdater can't handle a non-PHI use in the same block as an
-      // earlier def. We can easily handle those cases manually.
-      Instruction *UserInst = cast<Instruction>(U.getUser());
-      if (!isa<PHINode>(UserInst)) {
-        BasicBlock *UserBB = UserInst->getParent();
-
-        // The original users in the OrigHeader are already using the
-        // original definitions.
-        if (UserBB == OrigHeader)
-          continue;
-
-        // Users in the OrigPreHeader need to use the value to which the
-        // original definitions are mapped.
-        if (UserBB == OrigPreHeader) {
-          U = OrigPreHeaderVal;
-          continue;
-        }
-      }
-
-      // Anything else can be handled by SSAUpdater.
-      SSA.RewriteUse(U);
-    }
-  }
+  // If there were any uses of instructions in the duplicated block outside the
+  // loop, update them, inserting PHI nodes as required
+  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
 
   // NewHeader is now the header of the loop.
   L->moveToHeader(NewHeader);
+  assert(L->getHeader() == NewHeader && "Latch block is our new header");
 
-  // Move the original header to the bottom of the loop, where it now more
-  // naturally belongs. This isn't necessary for correctness, and CodeGen can
-  // usually reorder blocks on its own to fix things like this up, but it's
-  // still nice to keep the IR readable.
-  //
-  // The original header should have only one predecessor at this point, since
-  // we checked that the loop had a proper preheader and unique backedge before
-  // we started.
-  assert(OrigHeader->getSinglePredecessor() &&
-         "Original loop header has too many predecessors after loop rotation!");
-  OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
-
-  // Also, since this original header only has one predecessor, zap its
-  // PHI nodes, which are now trivial.
-  FoldSingleEntryPHINodes(OrigHeader);
-
-  // TODO: We could just go ahead and merge OrigHeader into its predecessor
-  // at this point, if we don't mind updating dominator info.
-
-  // Establish a new preheader, update dominators, etc.
-  preserveCanonicalLoopForm(LPM);
-
-  ++NumRotated;
-  return true;
-}
-
-/// Initialize local data
-void LoopRotate::initialize() {
-  L = NULL;
-  OrigHeader = NULL;
-  OrigPreHeader = NULL;
-  NewHeader = NULL;
-  Exit = NULL;
-}
-
-/// After loop rotation, loop pre-header has multiple sucessors.
-/// Insert one forwarding basic block to ensure that loop pre-header
-/// has only one successor.
-void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
-
-  // Right now original pre-header has two successors, new header and
-  // exit block. Insert new block between original pre-header and
-  // new header such that loop's new pre-header has only one successor.
-  BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
-                                                "bb.nph",
-                                                OrigHeader->getParent(), 
-                                                NewHeader);
-  LoopInfo &LI = getAnalysis<LoopInfo>();
-  if (Loop *PL = LI.getLoopFor(OrigPreHeader))
-    PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
-  BranchInst::Create(NewHeader, NewPreHeader);
   
-  BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
-  if (OrigPH_BI->getSuccessor(0) == NewHeader)
-    OrigPH_BI->setSuccessor(0, NewPreHeader);
-  else {
-    assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
-           "Unexpected original pre-header terminator");
-    OrigPH_BI->setSuccessor(1, NewPreHeader);
-  }
-
-  PHINode *PN;
-  for (BasicBlock::iterator I = NewHeader->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I) {
-    int index = PN->getBasicBlockIndex(OrigPreHeader);
-    assert(index != -1 && "Expected incoming value from Original PreHeader");
-    PN->setIncomingBlock(index, NewPreHeader);
-    assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 && 
-           "Expected only one incoming value from Original PreHeader");
-  }
-
-  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
-    DT->addNewBlock(NewPreHeader, OrigPreHeader);
-    DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
-    DT->changeImmediateDominator(Exit, OrigPreHeader);
-    for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
-         BI != BE; ++BI) {
-      BasicBlock *B = *BI;
-      if (L->getHeader() != B) {
-        DomTreeNode *Node = DT->getNode(B);
-        if (Node && Node->getBlock() == OrigHeader)
-          DT->changeImmediateDominator(*BI, L->getHeader());
-      }
-    }
-    DT->changeImmediateDominator(OrigHeader, OrigLatch);
-  }
-
-  if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
-    // New Preheader's dominance frontier is Exit block.
-    DominanceFrontier::DomSetType NewPHSet;
-    NewPHSet.insert(Exit);
-    DF->addBasicBlock(NewPreHeader, NewPHSet);
-
-    // New Header's dominance frontier now includes itself and Exit block
-    DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
-    if (HeadI != DF->end()) {
-      DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
-      HeaderSet.clear();
-      HeaderSet.insert(L->getHeader());
-      HeaderSet.insert(Exit);
-    } else {
-      DominanceFrontier::DomSetType HeaderSet;
-      HeaderSet.insert(L->getHeader());
-      HeaderSet.insert(Exit);
-      DF->addBasicBlock(L->getHeader(), HeaderSet);
-    }
-
-    // Original header (new Loop Latch)'s dominance frontier is Exit.
-    DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
-    if (LatchI != DF->end()) {
-      DominanceFrontier::DomSetType &LatchSet = LatchI->second;
-      LatchSet = LatchI->second;
-      LatchSet.clear();
-      LatchSet.insert(Exit);
-    } else {
-      DominanceFrontier::DomSetType LatchSet;
-      LatchSet.insert(Exit);
-      DF->addBasicBlock(L->getHeader(), LatchSet);
+  // At this point, we've finished our major CFG changes.  As part of cloning
+  // the loop into the preheader we've simplified instructions and the
+  // duplicated conditional branch may now be branching on a constant.  If it is
+  // branching on a constant and if that constant means that we enter the loop,
+  // then we fold away the cond branch to an uncond branch.  This simplifies the
+  // loop in cases important for nested loops, and it also means we don't have
+  // to split as many edges.
+  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+  if (!isa<ConstantInt>(PHBI->getCondition()) ||
+      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+          != NewHeader) {
+    // The conditional branch can't be folded, handle the general case.
+    // Update DominatorTree to reflect the CFG change we just made.  Then split
+    // edges as necessary to preserve LoopSimplify form.
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      // Since OrigPreheader now has the conditional branch to Exit block, it is
+      // the dominator of Exit.
+      DT->changeImmediateDominator(Exit, OrigPreheader);
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      
+      // Update OrigHeader to be dominated by the new header block.
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
     }
-
-    // If a loop block dominates new loop latch then add to its frontiers
-    // new header and Exit and remove new latch (which is equal to original
-    // header).
-    BasicBlock *NewLatch = L->getLoopLatch();
-
-    assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
-
+    
+    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+    // thus is not a preheader anymore.  Split the edge to form a real preheader.
+    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+    NewPH->setName(NewHeader->getName() + ".lr.ph");
+    
+    // Preserve canonical loop form, which means that 'Exit' should have only one
+    // predecessor.
+    BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+    ExitSplit->moveBefore(Exit);
+  } else {
+    // We can fold the conditional branch in the preheader, this makes things
+    // simpler. The first step is to remove the extra edge to the Exit block.
+    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+    BranchInst::Create(NewHeader, PHBI);
+    PHBI->eraseFromParent();
+    
+    // With our CFG finalized, update DomTree if it is available.
     if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
-      for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
-           BI != BE; ++BI) {
-        BasicBlock *B = *BI;
-        if (DT->dominates(B, NewLatch)) {
-          DominanceFrontier::iterator BDFI = DF->find(B);
-          if (BDFI != DF->end()) {
-            DominanceFrontier::DomSetType &BSet = BDFI->second;
-            BSet.erase(NewLatch);
-            BSet.insert(L->getHeader());
-            BSet.insert(Exit);
-          } else {
-            DominanceFrontier::DomSetType BSet;
-            BSet.insert(L->getHeader());
-            BSet.insert(Exit);
-            DF->addBasicBlock(B, BSet);
-          }
-        }
-      }
+      // Update OrigHeader to be dominated by the new header block.
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
     }
   }
-
-  // Preserve canonical loop form, which means Exit block should
-  // have only one predecessor.
-  SplitEdge(L->getLoopLatch(), Exit, this);
-
-  assert(NewHeader && L->getHeader() == NewHeader &&
-         "Invalid loop header after loop rotation");
-  assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
-         "Invalid loop preheader after loop rotation");
-  assert(L->getLoopLatch() &&
-         "Invalid loop latch after loop rotation");
+  
+  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+  // Now that the CFG and DomTree are in a consistent state again, try to merge
+  // the OrigHeader block into OrigLatch.  This will succeed if they are
+  // connected by an unconditional branch.  This is just a cleanup so the
+  // emitted code isn't too gross in this common case.
+  MergeBlockIntoPredecessor(OrigHeader, this);
+  
+  ++NumRotated;
+  return true;
 }
+
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e8dc5d3a640e..ac4aea2e404e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -63,6 +63,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallBitVector.h"
@@ -113,7 +114,7 @@ class RegUseTracker {
 public:
   void CountRegister(const SCEV *Reg, size_t LUIdx);
   void DropRegister(const SCEV *Reg, size_t LUIdx);
-  void DropUse(size_t LUIdx);
+  void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
 
   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
 
@@ -152,11 +153,19 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
 }
 
 void
-RegUseTracker::DropUse(size_t LUIdx) {
-  // Remove the use index from every register's use list.
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+  assert(LUIdx <= LastLUIdx);
+
+  // Update RegUses. The data structure is not optimized for this purpose;
+  // we must iterate through it and update each of the bit vectors.
   for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
-       I != E; ++I)
-    I->second.UsedByIndices.reset(LUIdx);
+       I != E; ++I) {
+    SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+    if (LUIdx < UsedByIndices.size())
+      UsedByIndices[LUIdx] =
+        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+    UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+  }
 }
 
 bool
@@ -202,8 +211,7 @@ struct Formula {
 
   Formula() : ScaledReg(0) {}
 
-  void InitialMatch(const SCEV *S, Loop *L,
-                    ScalarEvolution &SE, DominatorTree &DT);
+  void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
 
   unsigned getNumRegs() const;
   const Type *getType() const;
@@ -224,9 +232,9 @@ struct Formula {
 static void DoInitialMatch(const SCEV *S, Loop *L,
                            SmallVectorImpl<const SCEV *> &Good,
                            SmallVectorImpl<const SCEV *> &Bad,
-                           ScalarEvolution &SE, DominatorTree &DT) {
+                           ScalarEvolution &SE) {
   // Collect expressions which properly dominate the loop header.
-  if (S->properlyDominates(L->getHeader(), &DT)) {
+  if (SE.properlyDominates(S, L->getHeader())) {
     Good.push_back(S);
     return;
   }
@@ -235,18 +243,18 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
          I != E; ++I)
-      DoInitialMatch(*I, L, Good, Bad, SE, DT);
+      DoInitialMatch(*I, L, Good, Bad, SE);
     return;
   }
 
   // Look at addrec operands.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
     if (!AR->getStart()->isZero()) {
-      DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+      DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                       AR->getStepRecurrence(SE),
                                       AR->getLoop()),
-                     L, Good, Bad, SE, DT);
+                     L, Good, Bad, SE);
       return;
     }
 
@@ -258,7 +266,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
 
       SmallVector<const SCEV *, 4> MyGood;
       SmallVector<const SCEV *, 4> MyBad;
-      DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+      DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
       const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
         SE.getEffectiveSCEVType(NewMul->getType())));
       for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
@@ -278,11 +286,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
 /// InitialMatch - Incorporate loop-variant parts of S into this Formula,
 /// attempting to keep all loop-invariant and loop-computable values in a
 /// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L,
-                           ScalarEvolution &SE, DominatorTree &DT) {
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
   SmallVector<const SCEV *, 4> Good;
   SmallVector<const SCEV *, 4> Bad;
-  DoInitialMatch(S, L, Good, Bad, SE, DT);
+  DoInitialMatch(S, L, Good, Bad, SE);
   if (!Good.empty()) {
     const SCEV *Sum = SE.getAddExpr(Good);
     if (!Sum->isZero())
@@ -608,7 +615,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
   bool Changed = false;
 
   while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+    Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
 
     if (I == 0 || !isInstructionTriviallyDead(I))
       continue;
@@ -645,8 +652,6 @@ public:
     : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
       SetupCost(0) {}
 
-  unsigned getNumRegs() const { return NumRegs; }
-
   bool operator<(const Cost &Other) const;
 
   void Loose();
@@ -722,6 +727,9 @@ void Cost::RateRegister(const SCEV *Reg,
         (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
          isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
     ++SetupCost;
+
+    NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+                 SE.hasComputableLoopEvolution(Reg, L);
 }
 
 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
@@ -756,9 +764,6 @@ void Cost::RateFormula(const Formula &F,
       return;
     }
     RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
-
-    NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
-                 BaseReg->hasComputableLoopEvolution(L);
   }
 
   if (F.BaseRegs.size() > 1)
@@ -1257,32 +1262,6 @@ struct UseMapDenseMapInfo {
   }
 };
 
-/// FormulaSorter - This class implements an ordering for formulae which sorts
-/// the by their standalone cost.
-class FormulaSorter {
-  /// These two sets are kept empty, so that we compute standalone costs.
-  DenseSet<const SCEV *> VisitedRegs;
-  SmallPtrSet<const SCEV *, 16> Regs;
-  Loop *L;
-  LSRUse *LU;
-  ScalarEvolution &SE;
-  DominatorTree &DT;
-
-public:
-  FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
-    : L(l), LU(&lu), SE(se), DT(dt) {}
-
-  bool operator()(const Formula &A, const Formula &B) {
-    Cost CostA;
-    CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
-    Regs.clear();
-    Cost CostB;
-    CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
-    Regs.clear();
-    return CostA < CostB;
-  }
-};
-
 /// LSRInstance - This class holds state for the main loop strength reduction
 /// logic.
 class LSRInstance {
@@ -1341,7 +1320,7 @@ class LSRInstance {
                                     LSRUse::KindType Kind,
                                     const Type *AccessTy);
 
-  void DeleteUse(LSRUse &LU);
+  void DeleteUse(LSRUse &LU, size_t LUIdx);
 
   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
 
@@ -1925,10 +1904,13 @@ LSRInstance::getUse(const SCEV *&Expr,
 }
 
 /// DeleteUse - Delete the given use from the Uses list.
-void LSRInstance::DeleteUse(LSRUse &LU) {
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
   if (&LU != &Uses.back())
     std::swap(LU, Uses.back());
   Uses.pop_back();
+
+  // Update RegUses.
+  RegUses.SwapAndDropUse(LUIdx, Uses.size());
 }
 
 /// FindUseWithFormula - Look for a use distinct from OrigLU which is has
@@ -2073,7 +2055,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
 
         // x == y  -->  x - y == 0
         const SCEV *N = SE.getSCEV(NV);
-        if (N->isLoopInvariant(L)) {
+        if (SE.isLoopInvariant(N, L)) {
           Kind = LSRUse::ICmpZero;
           S = SE.getMinusSCEV(N, S);
         }
@@ -2113,7 +2095,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
 void
 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   Formula F;
-  F.InitialMatch(S, L, SE, DT);
+  F.InitialMatch(S, L, SE);
   bool Inserted = InsertFormula(LU, LUIdx, F);
   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
 }
@@ -2213,7 +2195,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
           unsigned OtherIdx = !UI.getOperandNo();
           Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
-          if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+          if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
             continue;
         }
 
@@ -2296,7 +2278,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Loop-variant "unknown" values are uninteresting; we won't be able to
       // do anything meaningful with them.
-      if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+      if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
         continue;
 
       // Don't pull a constant into a register if the constant could be folded
@@ -2347,8 +2329,8 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
   for (SmallVectorImpl<const SCEV *>::const_iterator
        I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
     const SCEV *BaseReg = *I;
-    if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
-        !BaseReg->hasComputableLoopEvolution(L))
+    if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+        !SE.hasComputableLoopEvolution(BaseReg, L))
       Ops.push_back(BaseReg);
     else
       F.BaseRegs.push_back(BaseReg);
@@ -2813,9 +2795,11 @@ LSRInstance::GenerateAllReuseFormulae() {
         print_uses(dbgs()));
 }
 
-/// If their are multiple formulae with the same set of registers used
+/// If there are multiple formulae with the same set of registers used
 /// by other uses, pick the best one and delete the others.
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
 #ifndef NDEBUG
   bool ChangedFormulae = false;
 #endif
@@ -2828,7 +2812,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 
   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
     LSRUse &LU = Uses[LUIdx];
-    FormulaSorter Sorter(L, LU, SE, DT);
     DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
 
     bool Any = false;
@@ -2854,7 +2837,14 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
         BestFormulae.insert(std::make_pair(Key, FIdx));
       if (!P.second) {
         Formula &Best = LU.Formulae[P.first->second];
-        if (Sorter.operator()(F, Best))
+
+        Cost CostF;
+        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        Cost CostBest;
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        if (CostF < CostBest)
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
@@ -2894,7 +2884,7 @@ static const size_t ComplexityLimit = UINT16_MAX;
 /// this many solutions because it prune the search space, but the pruning
 /// isn't always sufficient.
 size_t LSRInstance::EstimateSearchSpaceComplexity() const {
-  uint32_t Power = 1;
+  size_t Power = 1;
   for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
        E = Uses.end(); I != E; ++I) {
     size_t FSize = I->Formulae.size();
@@ -3001,6 +2991,28 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
 
               LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
 
+              // Update the relocs to reference the new use.
+              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+                   E = Fixups.end(); I != E; ++I) {
+                LSRFixup &Fixup = *I;
+                if (Fixup.LUIdx == LUIdx) {
+                  Fixup.LUIdx = LUThatHas - &Uses.front();
+                  Fixup.Offset += F.AM.BaseOffs;
+                  // Add the new offset to LUThatHas' offset list.
+                  if (LUThatHas->Offsets.back() != Fixup.Offset) {
+                    LUThatHas->Offsets.push_back(Fixup.Offset);
+                    if (Fixup.Offset > LUThatHas->MaxOffset)
+                      LUThatHas->MaxOffset = Fixup.Offset;
+                    if (Fixup.Offset < LUThatHas->MinOffset)
+                      LUThatHas->MinOffset = Fixup.Offset;
+                  }
+                  DEBUG(dbgs() << "New fixup has offset "
+                               << Fixup.Offset << '\n');
+                }
+                if (Fixup.LUIdx == NumUses-1)
+                  Fixup.LUIdx = LUIdx;
+              }
+
               // Delete formulae from the new use which are no longer legal.
               bool Any = false;
               for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -3019,22 +3031,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
               if (Any)
                 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
 
-              // Update the relocs to reference the new use.
-              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
-                   E = Fixups.end(); I != E; ++I) {
-                LSRFixup &Fixup = *I;
-                if (Fixup.LUIdx == LUIdx) {
-                  Fixup.LUIdx = LUThatHas - &Uses.front();
-                  Fixup.Offset += F.AM.BaseOffs;
-                  DEBUG(dbgs() << "New fixup has offset "
-                               << Fixup.Offset << '\n');
-                }
-                if (Fixup.LUIdx == NumUses-1)
-                  Fixup.LUIdx = LUIdx;
-              }
-
               // Delete the old use.
-              DeleteUse(LU);
+              DeleteUse(LU, LUIdx);
               --LUIdx;
               --NumUses;
               break;
@@ -3546,21 +3544,23 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
       // is the canonical backedge for this loop, which complicates post-inc
       // users.
       if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
-          !isa<IndirectBrInst>(BB->getTerminator()) &&
-          (PN->getParent() != L->getHeader() || !L->contains(BB))) {
-        // Split the critical edge.
-        BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
-        // If PN is outside of the loop and BB is in the loop, we want to
-        // move the block to be immediately before the PHI block, not
-        // immediately after BB.
-        if (L->contains(BB) && !L->contains(PN))
-          NewBB->moveBefore(PN->getParent());
-
-        // Splitting the edge can reduce the number of PHI entries we have.
-        e = PN->getNumIncomingValues();
-        BB = NewBB;
-        i = PN->getBasicBlockIndex(BB);
+          !isa<IndirectBrInst>(BB->getTerminator())) {
+        Loop *PNLoop = LI.getLoopFor(PN->getParent());
+        if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+          // Split the critical edge.
+          BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+          // If PN is outside of the loop and BB is in the loop, we want to
+          // move the block to be immediately before the PHI block, not
+          // immediately after BB.
+          if (L->contains(BB) && !L->contains(PN))
+            NewBB->moveBefore(PN->getParent());
+
+          // Splitting the edge can reduce the number of PHI entries we have.
+          e = PN->getNumIncomingValues();
+          BB = NewBB;
+          i = PN->getBasicBlockIndex(BB);
+        }
       }
 
       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
@@ -3792,21 +3792,30 @@ private:
 }
 
 char LoopStrengthReduce::ID = 0;
-INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
-                "Loop Strength Reduction", false, false);
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+
 
 Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
   return new LoopStrengthReduce(TLI);
 }
 
 LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(ID), TLI(tli) {}
+  : LoopPass(ID), TLI(tli) {
+    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+  }
 
 void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update
   // many analyses if they are around.
   AU.addPreservedID(LoopSimplifyID);
-  AU.addPreserved("domfrontier");
 
   AU.addRequired<LoopInfo>();
   AU.addPreserved<LoopInfo>();
@@ -3815,6 +3824,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<DominatorTree>();
   AU.addRequired<ScalarEvolution>();
   AU.addPreserved<ScalarEvolution>();
+  // Requiring LoopSimplify a second time here prevents IVUsers from running
+  // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+  AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<IVUsers>();
   AU.addPreserved<IVUsers>();
 }
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d0edfa220051..80b263a30cb8 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,7 +16,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -27,7 +27,7 @@
 using namespace llvm;
 
 static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
   cl::desc("The cut-off point for automatic loop unrolling"));
 
 static cl::opt<unsigned>
@@ -43,12 +43,20 @@ namespace {
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll() : LoopPass(ID) {}
+    LoopUnroll() : LoopPass(ID) {
+      initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+    }
 
     /// A magic value for use with the Threshold parameter to indicate
     /// that the loop unroll should be performed regardless of how much
     /// code expansion would result.
     static const unsigned NoThreshold = UINT_MAX;
+    
+    // Threshold to use when optsize is specified (and there is no
+    // explicit -unroll-threshold).
+    static const unsigned OptSizeUnrollThreshold = 50;
+    
+    unsigned CurrentThreshold;
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -73,7 +81,11 @@ namespace {
 }
 
 char LoopUnroll::ID = 0;
-INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
 
@@ -83,8 +95,16 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     Metrics.analyzeBasicBlock(*I);
-  NumCalls = Metrics.NumCalls;
-  return Metrics.NumInsts;
+  NumCalls = Metrics.NumInlineCandidates;
+  
+  unsigned LoopSize = Metrics.NumInsts;
+  
+  // Don't allow an estimate of size zero.  This would allows unrolling of loops
+  // with huge iteration counts, which is a compile time problem even if it's
+  // not a problem for code quality.
+  if (LoopSize == 0) LoopSize = 1;
+  
+  return LoopSize;
 }
 
 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -94,6 +114,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
         << "] Loop %" << Header->getName() << "\n");
   (void)Header;
+  
+  // Determine the current unrolling threshold.  While this is normally set
+  // from UnrollThreshold, it is overridden to a smaller value if the current
+  // function is marked as optimize-for-size, and the unroll threshold was
+  // not user specified.
+  CurrentThreshold = UnrollThreshold;
+  if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
+      UnrollThreshold.getNumOccurrences() == 0)
+    CurrentThreshold = OptSizeUnrollThreshold;
 
   // Find trip count
   unsigned TripCount = L->getSmallConstantTripCount();
@@ -111,25 +140,25 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Enforce the threshold.
-  if (UnrollThreshold != NoThreshold) {
-    unsigned NumCalls;
-    unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
+  if (CurrentThreshold != NoThreshold) {
+    unsigned NumInlineCandidates;
+    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
-    if (NumCalls != 0) {
-      DEBUG(dbgs() << "  Not unrolling loop with function calls.\n");
+    if (NumInlineCandidates != 0) {
+      DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
       return false;
     }
     uint64_t Size = (uint64_t)LoopSize*Count;
-    if (TripCount != 1 && Size > UnrollThreshold) {
+    if (TripCount != 1 && Size > CurrentThreshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
-            << " because size: " << Size << ">" << UnrollThreshold << "\n");
+            << " because size: " << Size << ">" << CurrentThreshold << "\n");
       if (!UnrollAllowPartial) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
       // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = UnrollThreshold / LoopSize;
+      Count = CurrentThreshold / LoopSize;
       while (Count != 0 && TripCount%Count != 0) {
         Count--;
       }
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9afe428ba569..b4e3d318a575 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,12 +32,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -93,7 +93,9 @@ namespace {
     explicit LoopUnswitch(bool Os = false) : 
       LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 
       currentLoop(NULL), DT(NULL), loopHeader(NULL),
-      loopPreheader(NULL) {}
+      loopPreheader(NULL) {
+        initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+      }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
     bool processCurrentLoop();
@@ -109,6 +111,7 @@ namespace {
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ScalarEvolution>();
     }
 
   private:
@@ -158,7 +161,13 @@ namespace {
   };
 }
 char LoopUnswitch::ID = 0;
-INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
 
 Pass *llvm::createLoopUnswitchPass(bool Os) { 
   return new LoopUnswitch(Os); 
@@ -450,22 +459,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   return true;
 }
 
-// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by VMap.
-//
-static inline void RemapInstruction(Instruction *I,
-                                    ValueMap<const Value *, Value*> &VMap) {
-  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
-    Value *Op = I->getOperand(op);
-    ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
-    if (It != VMap.end()) Op = It->second;
-    I->setOperand(op, Op);
-  }
-}
-
 /// CloneLoop - Recursively clone the specified loop and all of its children,
 /// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
                        LoopInfo *LI, LPPassManager *LPM) {
   Loop *New = new Loop();
   LPM->insertLoop(New, PL);
@@ -580,6 +576,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
         << " blocks] in Function " << F->getName()
         << " when '" << *Val << "' == " << *LIC << "\n");
 
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
   LoopBlocks.clear();
   NewBlocks.clear();
 
@@ -609,7 +608,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   // the loop preheader and exit blocks), keeping track of the mapping between
   // the instructions and blocks.
   NewBlocks.reserve(LoopBlocks.size());
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
     BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
     NewBlocks.push_back(NewBB);
@@ -647,7 +646,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
     for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
       PN = cast<PHINode>(I);
       Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
-      ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+      ValueToValueMapTy::iterator It = VMap.find(V);
       if (It != VMap.end()) V = It->second;
       PN->addIncoming(V, NewExit);
     }
@@ -657,7 +656,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
     for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
-      RemapInstruction(I, VMap);
+      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
   
   // Rewrite the original preheader to select between versions of the loop.
   BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -961,13 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
   while (!Worklist.empty()) {
     Instruction *I = Worklist.back();
     Worklist.pop_back();
-    
-    // Simple constant folding.
-    if (Constant *C = ConstantFoldInstruction(I)) {
-      ReplaceUsesOfWith(I, C, Worklist, L, LPM);
-      continue;
-    }
-    
+
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
       DEBUG(dbgs() << "Remove dead instruction '" << *I);
@@ -982,15 +975,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
       ++NumSimplify;
       continue;
     }
-    
+
     // See if instruction simplification can hack this up.  This is common for
     // things like "select false, X, Y" after unswitching made the condition be
     // 'false'.
-    if (Value *V = SimplifyInstruction(I)) {
-      ReplaceUsesOfWith(I, V, Worklist, L, LPM);
-      continue;
-    }
-    
+    if (Value *V = SimplifyInstruction(I, 0, DT))
+      if (LI->replacementPreservesLCSSAForm(I, V)) {
+        ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+        continue;
+      }
+
     // Special case hacks that appear commonly in unswitched code.
     if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
       if (BI->isUnconditional()) {
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 973ffe7e6a40..9087b46c138b 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,26 +14,15 @@
 
 #define DEBUG_TYPE "loweratomic"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/Function.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/IRBuilder.h"
-
 using namespace llvm;
 
-namespace {
-
-bool LowerAtomicIntrinsic(CallInst *CI) {
-  IRBuilder<> Builder(CI->getParent(), CI);
-
-  Function *Callee = CI->getCalledFunction();
-  if (!Callee)
-    return false;
-
-  unsigned IID = Callee->getIntrinsicID();
+static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
+  IRBuilder<> Builder(II->getParent(), II);
+  unsigned IID = II->getIntrinsicID();
   switch (IID) {
   case Intrinsic::memory_barrier:
     break;
@@ -48,80 +37,70 @@ bool LowerAtomicIntrinsic(CallInst *CI) {
   case Intrinsic::atomic_load_min:
   case Intrinsic::atomic_load_umax:
   case Intrinsic::atomic_load_umin: {
-    Value *Ptr = CI->getArgOperand(0);
-    Value *Delta = CI->getArgOperand(1);
+    Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
 
     LoadInst *Orig = Builder.CreateLoad(Ptr);
     Value *Res = NULL;
     switch (IID) {
-      default: assert(0 && "Unrecognized atomic modify operation");
-      case Intrinsic::atomic_load_add:
-        Res = Builder.CreateAdd(Orig, Delta);
-        break;
-      case Intrinsic::atomic_load_sub:
-        Res = Builder.CreateSub(Orig, Delta);
-        break;
-      case Intrinsic::atomic_load_and:
-        Res = Builder.CreateAnd(Orig, Delta);
-        break;
-      case Intrinsic::atomic_load_nand:
-        Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
-        break;
-      case Intrinsic::atomic_load_or:
-        Res = Builder.CreateOr(Orig, Delta);
-        break;
-      case Intrinsic::atomic_load_xor:
-        Res = Builder.CreateXor(Orig, Delta);
-        break;
-      case Intrinsic::atomic_load_max:
-        Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
-                                   Delta,
-                                   Orig);
-        break;
-      case Intrinsic::atomic_load_min:
-        Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
-                                   Orig,
-                                   Delta);
-        break;
-      case Intrinsic::atomic_load_umax:
-        Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
-                                   Delta,
-                                   Orig);
-        break;
-      case Intrinsic::atomic_load_umin:
-        Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
-                                   Orig,
-                                   Delta);
-        break;
+    default: assert(0 && "Unrecognized atomic modify operation");
+    case Intrinsic::atomic_load_add:
+      Res = Builder.CreateAdd(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_sub:
+      Res = Builder.CreateSub(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_and:
+      Res = Builder.CreateAnd(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_nand:
+      Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+      break;
+    case Intrinsic::atomic_load_or:
+      Res = Builder.CreateOr(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_xor:
+      Res = Builder.CreateXor(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_max:
+      Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                 Delta, Orig);
+      break;
+    case Intrinsic::atomic_load_min:
+      Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                 Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_umax:
+      Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                 Delta, Orig);
+      break;
+    case Intrinsic::atomic_load_umin:
+      Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                 Orig, Delta);
+      break;
     }
     Builder.CreateStore(Res, Ptr);
 
-    CI->replaceAllUsesWith(Orig);
+    II->replaceAllUsesWith(Orig);
     break;
   }
 
   case Intrinsic::atomic_swap: {
-    Value *Ptr = CI->getArgOperand(0);
-    Value *Val = CI->getArgOperand(1);
-
+    Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
     LoadInst *Orig = Builder.CreateLoad(Ptr);
     Builder.CreateStore(Val, Ptr);
-
-    CI->replaceAllUsesWith(Orig);
+    II->replaceAllUsesWith(Orig);
     break;
   }
 
   case Intrinsic::atomic_cmp_swap: {
-    Value *Ptr = CI->getArgOperand(0);
-    Value *Cmp = CI->getArgOperand(1);
-    Value *Val = CI->getArgOperand(2);
+    Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
+    Value *Val = II->getArgOperand(2);
 
     LoadInst *Orig = Builder.CreateLoad(Ptr);
     Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
     Value *Res = Builder.CreateSelect(Equal, Val, Orig);
     Builder.CreateStore(Res, Ptr);
-
-    CI->replaceAllUsesWith(Orig);
+    II->replaceAllUsesWith(Orig);
     break;
   }
 
@@ -129,33 +108,32 @@ bool LowerAtomicIntrinsic(CallInst *CI) {
     return false;
   }
 
-  assert(CI->use_empty() &&
+  assert(II->use_empty() &&
          "Lowering should have eliminated any uses of the intrinsic call!");
-  CI->eraseFromParent();
+  II->eraseFromParent();
 
   return true;
 }
 
-struct LowerAtomic : public BasicBlockPass {
-  static char ID;
-  LowerAtomic() : BasicBlockPass(ID) {}
-  bool runOnBasicBlock(BasicBlock &BB) {
-    bool Changed = false;
-    for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
-      Instruction *Inst = DI++;
-      if (CallInst *CI = dyn_cast<CallInst>(Inst))
-        Changed |= LowerAtomicIntrinsic(CI);
+namespace {
+  struct LowerAtomic : public BasicBlockPass {
+    static char ID;
+    LowerAtomic() : BasicBlockPass(ID) {
+      initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
     }
-    return Changed;
-  }
-
-};
-
+    bool runOnBasicBlock(BasicBlock &BB) {
+      bool Changed = false;
+      for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
+          Changed |= LowerAtomicIntrinsic(II);
+      return Changed;
+    }
+  };
 }
 
 char LowerAtomic::ID = 0;
 INITIALIZE_PASS(LowerAtomic, "loweratomic",
                 "Lower atomic intrinsics to non-atomic form",
-                false, false);
+                false, false)
 
 Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 24fae423d2f7..bde0e5316c3a 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,16 +14,18 @@
 
 #define DEBUG_TYPE "memcpyopt"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include <list>
@@ -32,62 +34,10 @@ using namespace llvm;
 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
 STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
-
-/// isBytewiseValue - If the specified value can be set by repeating the same
-/// byte in memory, return the i8 value that it is represented with.  This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
-  LLVMContext &Context = V->getContext();
-  
-  // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType()->isIntegerTy(8)) return V;
-  
-  // Constant float and double values can be handled as integer values if the
-  // corresponding integer value is "byteable".  An important case is 0.0. 
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType()->isFloatTy())
-      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
-    if (CFP->getType()->isDoubleTy())
-      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
-    // Don't handle long double formats, which have strange constraints.
-  }
-  
-  // We can handle constant integers that are power of two in size and a 
-  // multiple of 8 bits.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    unsigned Width = CI->getBitWidth();
-    if (isPowerOf2_32(Width) && Width > 8) {
-      // We can handle this value if the recursive binary decomposition is the
-      // same at all levels.
-      APInt Val = CI->getValue();
-      APInt Val2;
-      while (Val.getBitWidth() != 8) {
-        unsigned NextWidth = Val.getBitWidth()/2;
-        Val2  = Val.lshr(NextWidth);
-        Val2.trunc(Val.getBitWidth()/2);
-        Val.trunc(Val.getBitWidth()/2);
-
-        // If the top/bottom halves aren't the same, reject it.
-        if (Val != Val2)
-          return 0;
-      }
-      return ConstantInt::get(Context, Val);
-    }
-  }
-  
-  // Conceptually, we could handle things like:
-  //   %a = zext i8 %X to i16
-  //   %b = shl i16 %a, 8
-  //   %c = or i16 %a, %b
-  // but until there is an example that actually needs this, it doesn't seem
-  // worth worrying about.
-  return 0;
-}
+STATISTIC(NumCpyToSet,    "Number of memcpys converted to memset");
 
 static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
-                                  bool &VariableIdxFound, TargetData &TD) {
+                                  bool &VariableIdxFound, const TargetData &TD){
   // Skip over the first indices.
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -120,14 +70,31 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
 /// constant offset, and return that constant offset.  For example, Ptr1 might
 /// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
 static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
-                            TargetData &TD) {
+                            const TargetData &TD) {
+  Ptr1 = Ptr1->stripPointerCasts();
+  Ptr2 = Ptr2->stripPointerCasts();
+  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+  
+  bool VariableIdxFound = false;
+
+  // If one pointer is a GEP and the other isn't, then see if the GEP is a
+  // constant offset from the base, as in "P" and "gep P, 1".
+  if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+    Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+    return !VariableIdxFound;
+  }
+
+  if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+    Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+    return !VariableIdxFound;
+  }
+  
   // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
   // base.  After that base, they may have some number of common (and
   // potentially variable) indices.  After that they handle some constant
   // offset, which determines their offset from each other.  At this point, we
   // handle no other case.
-  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
-  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
   if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
     return false;
   
@@ -137,7 +104,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
     if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
       break;
 
-  bool VariableIdxFound = false;
   int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
   int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
   if (VariableIdxFound) return false;
@@ -171,7 +137,7 @@ struct MemsetRange {
   unsigned Alignment;
   
   /// TheStores - The actual stores that make up this range.
-  SmallVector<StoreInst*, 16> TheStores;
+  SmallVector<Instruction*, 16> TheStores;
   
   bool isProfitableToUseMemset(const TargetData &TD) const;
 
@@ -181,10 +147,19 @@ struct MemsetRange {
 bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
   // If we found more than 8 stores to merge or 64 bytes, use memset.
   if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+  // If there is nothing to merge, don't do anything.
+  if (TheStores.size() < 2) return false;
+  
+  // If any of the stores are a memset, then it is always good to extend the
+  // memset.
+  for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+    if (!isa<StoreInst>(TheStores[i]))
+      return true;
   
   // Assume that the code generator is capable of merging pairs of stores
   // together if it wants to.
-  if (TheStores.size() <= 2) return false;
+  if (TheStores.size() == 2) return false;
   
   // If we have fewer than 8 stores, it can still be worthwhile to do this.
   // For example, merging 4 i8 stores into an i32 store is useful almost always.
@@ -215,31 +190,53 @@ class MemsetRanges {
   /// because each element is relatively large and expensive to copy.
   std::list<MemsetRange> Ranges;
   typedef std::list<MemsetRange>::iterator range_iterator;
-  TargetData &TD;
+  const TargetData &TD;
 public:
-  MemsetRanges(TargetData &td) : TD(td) {}
+  MemsetRanges(const TargetData &td) : TD(td) {}
   
   typedef std::list<MemsetRange>::const_iterator const_iterator;
   const_iterator begin() const { return Ranges.begin(); }
   const_iterator end() const { return Ranges.end(); }
   bool empty() const { return Ranges.empty(); }
   
-  void addStore(int64_t OffsetFromFirst, StoreInst *SI);
+  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+      addStore(OffsetFromFirst, SI);
+    else
+      addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+  }
+
+  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+    int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+    
+    addRange(OffsetFromFirst, StoreSize,
+             SI->getPointerOperand(), SI->getAlignment(), SI);
+  }
+  
+  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+    int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+  }
+  
+  void addRange(int64_t Start, int64_t Size, Value *Ptr,
+                unsigned Alignment, Instruction *Inst);
+
 };
   
 } // end anon namespace
 
 
-/// addStore - Add a new store to the MemsetRanges data structure.  This adds a
+/// addRange - Add a new store to the MemsetRanges data structure.  This adds a
 /// new range for the specified store at the specified offset, merging into
 /// existing ranges as appropriate.
-void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
-  int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
-  
-  // Do a linear search of the ranges to see if this can be joined and/or to
-  // find the insertion point in the list.  We keep the ranges sorted for
-  // simplicity here.  This is a linear search of a linked list, which is ugly,
-  // however the number of ranges is limited, so this won't get crazy slow.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list.  We keep the ranges sorted for
+/// simplicity here.  This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+                            unsigned Alignment, Instruction *Inst) {
+  int64_t End = Start+Size;
   range_iterator I = Ranges.begin(), E = Ranges.end();
   
   while (I != E && Start > I->End)
@@ -252,14 +249,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
     MemsetRange &R = *Ranges.insert(I, MemsetRange());
     R.Start        = Start;
     R.End          = End;
-    R.StartPtr     = SI->getPointerOperand();
-    R.Alignment    = SI->getAlignment();
-    R.TheStores.push_back(SI);
+    R.StartPtr     = Ptr;
+    R.Alignment    = Alignment;
+    R.TheStores.push_back(Inst);
     return;
   }
-
+  
   // This store overlaps with I, add it.
-  I->TheStores.push_back(SI);
+  I->TheStores.push_back(Inst);
   
   // At this point, we may have an interval that completely contains our store.
   // If so, just add it to the interval and return.
@@ -274,8 +271,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
   // stopped on *it*.
   if (Start < I->Start) {
     I->Start = Start;
-    I->StartPtr = SI->getPointerOperand();
-    I->Alignment = SI->getAlignment();
+    I->StartPtr = Ptr;
+    I->Alignment = Alignment;
   }
     
   // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@@ -301,10 +298,16 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
 
 namespace {
   class MemCpyOpt : public FunctionPass {
-    bool runOnFunction(Function &F);
+    MemoryDependenceAnalysis *MD;
+    const TargetData *TD;
   public:
     static char ID; // Pass identification, replacement for typeid
-    MemCpyOpt() : FunctionPass(ID) {}
+    MemCpyOpt() : FunctionPass(ID) {
+      initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+      MD = 0;
+    }
+
+    bool runOnFunction(Function &F);
 
   private:
     // This transformation requires dominator postdominator info
@@ -319,9 +322,17 @@ namespace {
   
     // Helper fuctions
     bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+    bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
     bool processMemCpy(MemCpyInst *M);
     bool processMemMove(MemMoveInst *M);
-    bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+    bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+                              uint64_t cpyLen, CallInst *C);
+    bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+                                       uint64_t MSize);
+    bool processByValArgument(CallSite CS, unsigned ArgNo);
+    Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+                                      Value *ByteVal);
+
     bool iterateOnFunction(Function &F);
   };
   
@@ -331,165 +342,199 @@ namespace {
 // createMemCpyOptPass - The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
 
-INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
-
-
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                    false, false)
 
-/// processStore - When GVN is scanning forward over instructions, we look for
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
 /// some other patterns to fold away.  In particular, this looks for stores to
-/// neighboring locations of memory.  If it sees enough consequtive ones
-/// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
-  if (SI->isVolatile()) return false;
-  
-  LLVMContext &Context = SI->getContext();
-
-  // There are two cases that are interesting for this code to handle: memcpy
-  // and memset.  Right now we only handle memset.
+/// neighboring locations of memory.  If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, 
+                                             Value *StartPtr, Value *ByteVal) {
+  if (TD == 0) return 0;
   
-  // Ensure that the value being stored is something that can be memset'able a
-  // byte at a time like "0" or "-1" or any width, as well as things like
-  // 0xA0A0A0A0 and 0.0.
-  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
-  if (!ByteVal)
-    return false;
-
-  TargetData *TD = getAnalysisIfAvailable<TargetData>();
-  if (!TD) return false;
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  Module *M = SI->getParent()->getParent()->getParent();
-
   // Okay, so we now have a single store that can be splatable.  Scan to find
   // all subsequent stores of the same value to offset from the same pointer.
   // Join these together into ranges, so we can decide whether contiguous blocks
   // are stored.
   MemsetRanges Ranges(*TD);
   
-  Value *StartPtr = SI->getPointerOperand();
-  
-  BasicBlock::iterator BI = SI;
+  BasicBlock::iterator BI = StartInst;
   for (++BI; !isa<TerminatorInst>(BI); ++BI) {
-    if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 
-      // If the call is readnone, ignore it, otherwise bail out.  We don't even
-      // allow readonly here because we don't want something like:
+    if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+      // If the instruction is readnone, ignore it, otherwise bail out.  We
+      // don't even allow readonly here because we don't want something like:
       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
-      if (AA.getModRefBehavior(CallSite(BI)) ==
-            AliasAnalysis::DoesNotAccessMemory)
-        continue;
-      
-      // TODO: If this is a memset, try to join it in.
-      
-      break;
-    } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
-      break;
-
-    // If this is a non-store instruction it is fine, ignore it.
-    StoreInst *NextStore = dyn_cast<StoreInst>(BI);
-    if (NextStore == 0) continue;
+      if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+        break;
+      continue;
+    }
     
-    // If this is a store, see if we can merge it in.
-    if (NextStore->isVolatile()) break;
+    if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+      // If this is a store, see if we can merge it in.
+      if (NextStore->isVolatile()) break;
     
-    // Check to see if this stored value is of the same byte-splattable value.
-    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
-      break;
-
-    // Check to see if this store is to a constant offset from the start ptr.
-    int64_t Offset;
-    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
-      break;
-
-    Ranges.addStore(Offset, NextStore);
+      // Check to see if this stored value is of the same byte-splattable value.
+      if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+        break;
+      
+      // Check to see if this store is to a constant offset from the start ptr.
+      int64_t Offset;
+      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+                           Offset, *TD))
+        break;
+      
+      Ranges.addStore(Offset, NextStore);
+    } else {
+      MemSetInst *MSI = cast<MemSetInst>(BI);
+      
+      if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+          !isa<ConstantInt>(MSI->getLength()))
+        break;
+      
+      // Check to see if this store is to a constant offset from the start ptr.
+      int64_t Offset;
+      if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+        break;
+      
+      Ranges.addMemSet(Offset, MSI);
+    }
   }
-
+  
   // If we have no ranges, then we just had a single store with nothing that
   // could be merged in.  This is a very common case of course.
   if (Ranges.empty())
-    return false;
+    return 0;
   
   // If we had at least one store that could be merged in, add the starting
   // store as well.  We try to avoid this unless there is at least something
   // interesting as a small compile-time optimization.
-  Ranges.addStore(0, SI);
-  
-  
+  Ranges.addInst(0, StartInst);
+
+  // If we create any memsets, we put it right before the first instruction that
+  // isn't part of the memset block.  This ensure that the memset is dominated
+  // by any addressing instruction needed by the start of the block.
+  IRBuilder<> Builder(BI);
+
   // Now that we have full information about ranges, loop over the ranges and
   // emit memset's for anything big enough to be worthwhile.
-  bool MadeChange = false;
+  Instruction *AMemSet = 0;
   for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
        I != E; ++I) {
     const MemsetRange &Range = *I;
-
+    
     if (Range.TheStores.size() == 1) continue;
     
     // If it is profitable to lower this range to memset, do so now.
     if (!Range.isProfitableToUseMemset(*TD))
       continue;
     
-    // Otherwise, we do want to transform this!  Create a new memset.  We put
-    // the memset right before the first instruction that isn't part of this
-    // memset block.  This ensure that the memset is dominated by any addressing
-    // instruction needed by the start of the block.
-    BasicBlock::iterator InsertPt = BI;
-
+    // Otherwise, we do want to transform this!  Create a new memset.
     // Get the starting pointer of the block.
     StartPtr = Range.StartPtr;
-
+    
     // Determine alignment
     unsigned Alignment = Range.Alignment;
     if (Alignment == 0) {
       const Type *EltType = 
-         cast<PointerType>(StartPtr->getType())->getElementType();
+        cast<PointerType>(StartPtr->getType())->getElementType();
       Alignment = TD->getABITypeAlignment(EltType);
     }
-
-    // Cast the start ptr to be i8* as memset requires.
-    const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
-    const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
-                                                  StartPTy->getAddressSpace());
-    if (StartPTy!= i8Ptr)
-      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
-                                 InsertPt);
-
-    Value *Ops[] = {
-      StartPtr, ByteVal,   // Start, value
-      // size
-      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
-      // align
-      ConstantInt::get(Type::getInt32Ty(Context), Alignment),
-      // volatile
-      ConstantInt::get(Type::getInt1Ty(Context), 0),
-    };
-    const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
-
-    Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
-
-    Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
+    
+    AMemSet = 
+      Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+    
     DEBUG(dbgs() << "Replace stores:\n";
           for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
-            dbgs() << *Range.TheStores[i];
-          dbgs() << "With: " << *C); C=C;
-  
-    // Don't invalidate the iterator
-    BBI = BI;
-  
+            dbgs() << *Range.TheStores[i] << '\n';
+          dbgs() << "With: " << *AMemSet << '\n');
+    
     // Zap all the stores.
-    for (SmallVector<StoreInst*, 16>::const_iterator
+    for (SmallVector<Instruction*, 16>::const_iterator
          SI = Range.TheStores.begin(),
-         SE = Range.TheStores.end(); SI != SE; ++SI)
+         SE = Range.TheStores.end(); SI != SE; ++SI) {
+      MD->removeInstruction(*SI);
       (*SI)->eraseFromParent();
+    }
     ++NumMemSetInfer;
-    MadeChange = true;
   }
   
-  return MadeChange;
+  return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+  if (SI->isVolatile()) return false;
+  
+  if (TD == 0) return false;
+
+  // Detect cases where we're performing call slot forwarding, but
+  // happen to be using a load-store pair to implement it, rather than
+  // a memcpy.
+  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+    if (!LI->isVolatile() && LI->hasOneUse()) {
+      MemDepResult dep = MD->getDependency(LI);
+      CallInst *C = 0;
+      if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+        C = dyn_cast<CallInst>(dep.getInst());
+      
+      if (C) {
+        bool changed = performCallSlotOptzn(LI,
+                        SI->getPointerOperand()->stripPointerCasts(), 
+                        LI->getPointerOperand()->stripPointerCasts(),
+                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+        if (changed) {
+          MD->removeInstruction(SI);
+          SI->eraseFromParent();
+          MD->removeInstruction(LI);
+          LI->eraseFromParent();
+          ++NumMemCpyInstr;
+          return true;
+        }
+      }
+    }
+  }
+  
+  // There are two cases that are interesting for this code to handle: memcpy
+  // and memset.  Right now we only handle memset.
+  
+  // Ensure that the value being stored is something that can be memset'able a
+  // byte at a time like "0" or "-1" or any width, as well as things like
+  // 0xA0A0A0A0 and 0.0.
+  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+                                              ByteVal)) {
+      BBI = I;  // Don't invalidate iterator.
+      return true;
+    }
+  
+  return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+  // See if there is another memset or store neighboring this memset which
+  // allows us to widen out the memset to do a single larger store.
+  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+    if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+                                              MSI->getValue())) {
+      BBI = I;  // Don't invalidate iterator.
+      return true;
+    }
+  return false;
 }
 
 
 /// performCallSlotOptzn - takes a memcpy and a call that it depends on,
 /// and checks for the possibility of a call slot optimization by having
 /// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+                                     Value *cpyDest, Value *cpySrc,
+                                     uint64_t cpyLen, CallInst *C) {
   // The general transformation to keep in mind is
   //
   //   call @func(..., src, ...)
@@ -506,24 +551,15 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Deliberately get the source and destination with bitcasts stripped away,
   // because we'll need to do type comparisons based on the underlying type.
-  Value *cpyDest = cpy->getDest();
-  Value *cpySrc = cpy->getSource();
   CallSite CS(C);
 
-  // We need to be able to reason about the size of the memcpy, so we require
-  // that it be a constant.
-  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
-  if (!cpyLength)
-    return false;
-
   // Require that src be an alloca.  This simplifies the reasoning considerably.
   AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
   if (!srcAlloca)
     return false;
 
   // Check that all of src is copied to dest.
-  TargetData *TD = getAnalysisIfAvailable<TargetData>();
-  if (!TD) return false;
+  if (TD == 0) return false;
 
   ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
   if (!srcArraySize)
@@ -532,7 +568,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
     srcArraySize->getZExtValue();
 
-  if (cpyLength->getZExtValue() < srcSize)
+  if (cpyLen < srcSize)
     return false;
 
   // Check that accessing the first srcSize bytes of dest will not cause a
@@ -601,8 +637,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
   // the use analysis, we also need to know that it does not sneakily
   // access dest.  We rely on AA to figure this out for us.
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
-      AliasAnalysis::NoModRef)
+  if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
     return false;
 
   // All the checks have passed, so do the transformation.
@@ -625,99 +660,142 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
 
   // Drop any cached information about the call, because we may have changed
   // its dependence information by changing its parameter.
-  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-  MD.removeInstruction(C);
+  MD->removeInstruction(C);
 
-  // Remove the memcpy
-  MD.removeInstruction(cpy);
-  cpy->eraseFromParent();
+  // Remove the memcpy.
+  MD->removeInstruction(cpy);
   ++NumMemCpyInstr;
 
   return true;
 }
 
-/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
-/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
-/// B to be a memcpy from X to Z (or potentially a memmove, depending on
-/// circumstances). This allows later passes to remove the first memcpy
-/// altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
-  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
-  // The are two possible optimizations we can do for memcpy:
-  //   a) memcpy-memcpy xform which exposes redundance for DSE.
-  //   b) call-memcpy xform for return slot optimization.
-  MemDepResult dep = MD.getDependency(M);
-  if (!dep.isClobber())
-    return false;
-  if (!isa<MemCpyInst>(dep.getInst())) {
-    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
-      return performCallSlotOptzn(M, C);
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
+/// copy from MDep's input if we can.  MSize is the size of M's copy.
+/// 
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+                                              uint64_t MSize) {
+  // We can only transforms memcpy's where the dest of one is the source of the
+  // other.
+  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
     return false;
-  }
-  
-  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
   
-  // We can only transforms memcpy's where the dest of one is the source of the
-  // other
-  if (M->getSource() != MDep->getDest())
+  // If dep instruction is reading from our current input, then it is a noop
+  // transfer and substituting the input won't change this instruction.  Just
+  // ignore the input and let someone else zap MDep.  This handles cases like:
+  //    memcpy(a <- a)
+  //    memcpy(b <- a)
+  if (M->getSource() == MDep->getSource())
     return false;
   
   // Second, the length of the memcpy's must be the same, or the preceeding one
   // must be larger than the following one.
-  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
-  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
-  if (!C1 || !C2)
-    return false;
-  
-  uint64_t DepSize = C1->getValue().getZExtValue();
-  uint64_t CpySize = C2->getValue().getZExtValue();
-  
-  if (DepSize < CpySize)
+  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
     return false;
   
-  // Finally, we have to make sure that the dest of the second does not
-  // alias the source of the first
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
-      AliasAnalysis::NoAlias)
+
+  // Verify that the copied-from memory doesn't change in between the two
+  // transfers.  For example, in:
+  //    memcpy(a <- b)
+  //    *b = 42;
+  //    memcpy(c <- a)
+  // It would be invalid to transform the second memcpy into memcpy(c <- b).
+  //
+  // TODO: If the code between M and MDep is transparent to the destination "c",
+  // then we could still perform the xform by moving M up to the first memcpy.
+  //
+  // NOTE: This is conservative, it will stop on any read from the source loc,
+  // not just the defining memcpy.
+  MemDepResult SourceDep =
+    MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+                                 false, M, M->getParent());
+  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
-  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
-           AliasAnalysis::NoAlias)
+  
+  // If the dest of the second might alias the source of the first, then the
+  // source and dest might overlap.  We still want to eliminate the intermediate
+  // value, but we have to generate a memmove instead of memcpy.
+  bool UseMemMove = false;
+  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+    UseMemMove = true;
+  
+  // If all checks passed, then we can transform M.
+  
+  // Make sure to use the lesser of the alignment of the source and the dest
+  // since we're changing where we're reading from, but don't want to increase
+  // the alignment past what can be read from or written to.
+  // TODO: Is this worth it if we're creating a less aligned memcpy? For
+  // example we could be moving from movaps -> movq on x86.
+  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+  
+  IRBuilder<> Builder(M);
+  if (UseMemMove)
+    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+                          Align, M->isVolatile());
+  else
+    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+                         Align, M->isVolatile());
+
+  // Remove the instruction we're replacing.
+  MD->removeInstruction(M);
+  M->eraseFromParent();
+  ++NumMemCpyInstr;
+  return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+  // We can only optimize statically-sized memcpy's that are non-volatile.
+  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+  if (CopySize == 0 || M->isVolatile()) return false;
+
+  // If the source and destination of the memcpy are the same, then zap it.
+  if (M->getSource() == M->getDest()) {
+    MD->removeInstruction(M);
+    M->eraseFromParent();
     return false;
-  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
-           != AliasAnalysis::NoAlias)
+  }
+
+  // If copying from a constant, try to turn the memcpy into a memset.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+    if (GV->isConstant() && GV->hasDefinitiveInitializer())
+      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+        IRBuilder<> Builder(M);
+        Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+                             M->getAlignment(), false);
+        MD->removeInstruction(M);
+        M->eraseFromParent();
+        ++NumCpyToSet;
+        return true;
+      }
+
+  // The are two possible optimizations we can do for memcpy:
+  //   a) memcpy-memcpy xform which exposes redundance for DSE.
+  //   b) call-memcpy xform for return slot optimization.
+  MemDepResult DepInfo = MD->getDependency(M);
+  if (!DepInfo.isClobber())
     return false;
   
-  // If all checks passed, then we can transform these memcpy's
-  const Type *ArgTys[3] = { M->getRawDest()->getType(),
-                            MDep->getRawSource()->getType(),
-                            M->getLength()->getType() };
-  Function *MemCpyFun = Intrinsic::getDeclaration(
-                                 M->getParent()->getParent()->getParent(),
-                                 M->getIntrinsicID(), ArgTys, 3);
+  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+    return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
     
-  Value *Args[5] = {
-    M->getRawDest(), MDep->getRawSource(), M->getLength(),
-    M->getAlignmentCst(), M->getVolatileCst()
-  };
-  
-  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
-  
-  
-  // If C and M don't interfere, then this is a valid transformation.  If they
-  // did, this would mean that the two sources overlap, which would be bad.
-  if (MD.getDependency(C) == dep) {
-    MD.removeInstruction(M);
-    M->eraseFromParent();
-    ++NumMemCpyInstr;
-    return true;
+  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+    if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+                             CopySize->getZExtValue(), C)) {
+      MD->removeInstruction(M);
+      M->eraseFromParent();
+      return true;
+    }
   }
   
-  // Otherwise, there was no point in doing this, so we remove the call we
-  // inserted and act like nothing happened.
-  MD.removeInstruction(C);
-  C->eraseFromParent();
   return false;
 }
 
@@ -726,15 +804,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
 bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
 
-  // If the memmove is a constant size, use it for the alias query, this allows
-  // us to optimize things like: memmove(P, P+64, 64);
-  uint64_t MemMoveSize = ~0ULL;
-  if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
-    MemMoveSize = Len->getZExtValue();
-  
   // See if the pointers alias.
-  if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
-      AliasAnalysis::NoAlias)
+  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
     return false;
   
   DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
@@ -749,33 +820,107 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
-  getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
+  MD->removeInstruction(M);
 
   ++NumMoveToCpy;
   return true;
 }
   
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+  if (TD == 0) return false;
+
+  // Find out what feeds this byval argument.
+  Value *ByValArg = CS.getArgument(ArgNo);
+  const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+  uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+  MemDepResult DepInfo =
+    MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+                                 true, CS.getInstruction(),
+                                 CS.getInstruction()->getParent());
+  if (!DepInfo.isClobber())
+    return false;
+
+  // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
+  // a memcpy, see if we can byval from the source of the memcpy instead of the
+  // result.
+  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+  if (MDep == 0 || MDep->isVolatile() ||
+      ByValArg->stripPointerCasts() != MDep->getDest())
+    return false;
+  
+  // The length of the memcpy must be larger or equal to the size of the byval.
+  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+  if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+    return false;
+
+  // Get the alignment of the byval.  If it is greater than the memcpy, then we
+  // can't do the substitution.  If the call doesn't specify the alignment, then
+  // it is some target specific value that we can't know.
+  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+  if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
+    return false;  
+  
+  // Verify that the copied-from memory doesn't change in between the memcpy and
+  // the byval call.
+  //    memcpy(a <- b)
+  //    *b = 42;
+  //    foo(*a)
+  // It would be invalid to transform the second memcpy into foo(*b).
+  //
+  // NOTE: This is conservative, it will stop on any read from the source loc,
+  // not just the defining memcpy.
+  MemDepResult SourceDep =
+    MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+                                 false, CS.getInstruction(), MDep->getParent());
+  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+    return false;
+  
+  Value *TmpCast = MDep->getSource();
+  if (MDep->getSource()->getType() != ByValArg->getType())
+    TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+                              "tmpcast", CS.getInstruction());
+  
+  DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+               << "  " << *MDep << "\n"
+               << "  " << *CS.getInstruction() << "\n");
+  
+  // Otherwise we're good!  Update the byval argument.
+  CS.setArgument(ArgNo, TmpCast);
+  ++NumMemCpyInstr;
+  return true;
+}
 
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
 bool MemCpyOpt::iterateOnFunction(Function &F) {
   bool MadeChange = false;
 
   // Walk all instruction in the function.
   for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
-    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
-         BI != BE;) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
       // Avoid invalidating the iterator.
       Instruction *I = BI++;
       
+      bool RepeatInstruction = false;
+      
       if (StoreInst *SI = dyn_cast<StoreInst>(I))
         MadeChange |= processStore(SI, BI);
+      else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+        RepeatInstruction = processMemSet(M, BI);
       else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
-        MadeChange |= processMemCpy(M);
-      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
-        if (processMemMove(M)) {
-          --BI;         // Reprocess the new memcpy.
-          MadeChange = true;
-        }
+        RepeatInstruction = processMemCpy(M);
+      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+        RepeatInstruction = processMemMove(M);
+      else if (CallSite CS = (Value*)I) {
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.paramHasAttr(i+1, Attribute::ByVal))
+            MadeChange |= processByValArgument(CS, i);
+      }
+
+      // Reprocess the instruction if desired.
+      if (RepeatInstruction) {
+        if (BI != BB->begin()) --BI;
+        MadeChange = true;
       }
     }
   }
@@ -788,14 +933,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
 //
 bool MemCpyOpt::runOnFunction(Function &F) {
   bool MadeChange = false;
+  MD = &getAnalysis<MemoryDependenceAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
   while (1) {
     if (!iterateOnFunction(F))
       break;
     MadeChange = true;
   }
   
+  MD = 0;
   return MadeChange;
 }
-
-
-
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index b8afcc12d927..e093b52571af 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,9 @@ namespace {
     bool MadeChange;
   public:
     static char ID; // Pass identification, replacement for typeid
-    Reassociate() : FunctionPass(ID) {}
+    Reassociate() : FunctionPass(ID) {
+      initializeReassociatePass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
 
@@ -104,7 +106,7 @@ namespace {
 
 char Reassociate::ID = 0;
 INITIALIZE_PASS(Reassociate, "reassociate",
-                "Reassociate expressions", false, false);
+                "Reassociate expressions", false, false)
 
 // Public interface to the Reassociate pass
 FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
@@ -238,6 +240,12 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
   RHS->setOperand(0, LHS);
   I->setOperand(0, RHS);
 
+  // Conservatively clear all the optional flags, which may not hold
+  // after the reassociation.
+  I->clearSubclassOptionalData();
+  LHS->clearSubclassOptionalData();
+  RHS->clearSubclassOptionalData();
+
   ++NumLinear;
   MadeChange = true;
   DEBUG(dbgs() << "Linearized: " << *I << '\n');
@@ -339,6 +347,12 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
       DEBUG(dbgs() << "RA: " << *I << '\n');
       I->setOperand(0, Ops[i].Op);
       I->setOperand(1, Ops[i+1].Op);
+
+      // Clear all the optional flags, which may not hold after the
+      // reassociation if the expression involved more than just this operation.
+      if (Ops.size() != 2)
+        I->clearSubclassOptionalData();
+
       DEBUG(dbgs() << "TO: " << *I << '\n');
       MadeChange = true;
       ++NumChanged;
@@ -354,6 +368,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
   if (I->getOperand(1) != Ops[i].Op) {
     DEBUG(dbgs() << "RA: " << *I << '\n');
     I->setOperand(1, Ops[i].Op);
+
+    // Conservatively clear all the optional flags, which may not hold
+    // after the reassociation.
+    I->clearSubclassOptionalData();
+
     DEBUG(dbgs() << "TO: " << *I << '\n');
     MadeChange = true;
     ++NumChanged;
@@ -809,16 +828,23 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     // RemoveFactorFromExpression on successive values to behave differently.
     Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
     SmallVector<Value*, 4> NewMulOps;
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    for (unsigned i = 0; i != Ops.size(); ++i) {
       // Only try to remove factors from expressions we're allowed to.
       BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
       if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
         continue;
       
       if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
-        NewMulOps.push_back(V);
-        Ops.erase(Ops.begin()+i);
-        --i; --e;
+        // The factorized operand may occur several times.  Convert them all in
+        // one fell swoop.
+        for (unsigned j = Ops.size(); j != i;) {
+          --j;
+          if (Ops[j].Op == Ops[i].Op) {
+            NewMulOps.push_back(V);
+            Ops.erase(Ops.begin()+j);
+          }
+        }
+        --i;
       }
     }
     
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 506b72ac34e0..459bb0621f88 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,9 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
 namespace {
   struct RegToMem : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    RegToMem() : FunctionPass(ID) {}
+    RegToMem() : FunctionPass(ID) {
+      initializeRegToMemPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,9 +61,11 @@ namespace {
 }
   
 char RegToMem::ID = 0;
-INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
-                false, false);
-
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
 
 bool RegToMem::runOnFunction(Function &F) {
   if (F.isDeclaration()) 
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 6115c05c20ac..c82e929b364e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -481,6 +481,19 @@ private:
     }
   }
 
+  /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+  /// map for I and PN, but if one is there already, do not create another.
+  /// (Duplicate entries do not break anything directly, but can lead to
+  /// exponential growth of the table in rare cases.)
+  void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+    std::multimap<PHINode*, Instruction*>::iterator J, E;
+    tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+    for (; J != E; ++J)
+      if (J->second == I)
+        return;
+    UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+  }
+
 private:
   friend class InstVisitor<SCCPSolver>;
 
@@ -973,9 +986,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
         if (Result.isConstant()) {
           markConstant(IV, &I, Result.getConstant());
           // Remember that this instruction is virtually using the PHI node
-          // operands.
-          UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
-          UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+          // operands. 
+          InsertInOverdefinedPHIs(&I, PN1);
+          InsertInOverdefinedPHIs(&I, PN2);
           return;
         }
         
@@ -1056,8 +1069,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
           markConstant(&I, Result.getConstant());
           // Remember that this instruction is virtually using the PHI node
           // operands.
-          UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
-          UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+          InsertInOverdefinedPHIs(&I, PN1);
+          InsertInOverdefinedPHIs(&I, PN2);
           return;
         }
         
@@ -1585,22 +1598,20 @@ namespace {
   ///
   struct SCCP : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    SCCP() : FunctionPass(ID) {}
+    SCCP() : FunctionPass(ID) {
+      initializeSCCPPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnFunction - Run the Sparse Conditional Constant Propagation
     // algorithm, and return true if the function was modified.
     //
     bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-    }
   };
 } // end anonymous namespace
 
 char SCCP::ID = 0;
 INITIALIZE_PASS(SCCP, "sccp",
-                "Sparse Conditional Constant Propagation", false, false);
+                "Sparse Conditional Constant Propagation", false, false)
 
 // createSCCPPass - This is the public interface to this file.
 FunctionPass *llvm::createSCCPPass() {
@@ -1701,7 +1712,9 @@ namespace {
   ///
   struct IPSCCP : public ModulePass {
     static char ID;
-    IPSCCP() : ModulePass(ID) {}
+    IPSCCP() : ModulePass(ID) {
+      initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+    }
     bool runOnModule(Module &M);
   };
 } // end anonymous namespace
@@ -1709,7 +1722,7 @@ namespace {
 char IPSCCP::ID = 0;
 INITIALIZE_PASS(IPSCCP, "ipsccp",
                 "Interprocedural Sparse Conditional Constant Propagation",
-                false, false);
+                false, false)
 
 // createIPSCCPPass - This is the public interface to this file.
 ModulePass *llvm::createIPSCCPPass() {
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index cb034232df53..bf9ca6d803b6 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -7,12 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMScalarOpts.a, which implements
-// several scalar transformations over the LLVM intermediate representation.
+// This file implements common infrastructure for libLLVMScalarOpts.a, which 
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
@@ -20,6 +23,50 @@
 
 using namespace llvm;
 
+/// initializeScalarOptsPasses - Initialize all passes linked into the 
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+  initializeADCEPass(Registry);
+  initializeBlockPlacementPass(Registry);
+  initializeCodeGenPreparePass(Registry);
+  initializeConstantPropagationPass(Registry);
+  initializeCorrelatedValuePropagationPass(Registry);
+  initializeDCEPass(Registry);
+  initializeDeadInstEliminationPass(Registry);
+  initializeDSEPass(Registry);
+  initializeGEPSplitterPass(Registry);
+  initializeGVNPass(Registry);
+  initializeEarlyCSEPass(Registry);
+  initializeIndVarSimplifyPass(Registry);
+  initializeJumpThreadingPass(Registry);
+  initializeLICMPass(Registry);
+  initializeLoopDeletionPass(Registry);
+  initializeLoopInstSimplifyPass(Registry);
+  initializeLoopRotatePass(Registry);
+  initializeLoopStrengthReducePass(Registry);
+  initializeLoopUnrollPass(Registry);
+  initializeLoopUnswitchPass(Registry);
+  initializeLoopIdiomRecognizePass(Registry);
+  initializeLowerAtomicPass(Registry);
+  initializeMemCpyOptPass(Registry);
+  initializeReassociatePass(Registry);
+  initializeRegToMemPass(Registry);
+  initializeSCCPPass(Registry);
+  initializeIPSCCPPass(Registry);
+  initializeSROA_DTPass(Registry);
+  initializeSROA_SSAUpPass(Registry);
+  initializeCFGSimplifyPassPass(Registry);
+  initializeSimplifyHalfPowrLibCallsPass(Registry);
+  initializeSimplifyLibCallsPass(Registry);
+  initializeSinkingPass(Registry);
+  initializeTailDupPass(Registry);
+  initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+  initializeScalarOpts(*unwrap(R));
+}
+
 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createAggressiveDCEPass());
 }
@@ -56,10 +103,6 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopDeletionPass());
 }
 
-void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoopIndexSplitPass());
-}
-
 void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopRotatePass());
 }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index fee317dbd9ab..c3ca85280ee7 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -31,28 +31,34 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumReplaced,  "Number of allocas broken up");
 STATISTIC(NumPromoted,  "Number of allocas promoted");
+STATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
 STATISTIC(NumConverted, "Number of aggregates converted to scalar");
 STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
 
 namespace {
   struct SROA : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    explicit SROA(signed T = -1) : FunctionPass(ID) {
+    SROA(int T, bool hasDT, char &ID)
+      : FunctionPass(ID), HasDomTree(hasDT) {
       if (T == -1)
         SRThreshold = 128;
       else
@@ -64,17 +70,10 @@ namespace {
     bool performScalarRepl(Function &F);
     bool performPromotion(Function &F);
 
-    // getAnalysisUsage - This pass does not require any passes, but we know it
-    // will not alter the CFG, so say so.
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();
-      AU.setPreservesCFG();
-    }
-
   private:
+    bool HasDomTree;
     TargetData *TD;
-    
+
     /// DeadInsts - Keep track of instructions we have made dead, so that
     /// we can remove them after we are done working.
     SmallVector<Value*, 32> DeadInsts;
@@ -83,39 +82,61 @@ namespace {
     /// information about the uses.  All these fields are initialized to false
     /// and set to true when something is learned.
     struct AllocaInfo {
+      /// The alloca to promote.
+      AllocaInst *AI;
+      
+      /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+      /// looping and avoid redundant work.
+      SmallPtrSet<PHINode*, 8> CheckedPHIs;
+      
       /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
       bool isUnsafe : 1;
-      
+
       /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
       bool isMemCpySrc : 1;
 
       /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
       bool isMemCpyDst : 1;
 
-      AllocaInfo()
-        : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
+      /// hasSubelementAccess - This is true if a subelement of the alloca is
+      /// ever accessed, or false if the alloca is only accessed with mem
+      /// intrinsics or load/store that only access the entire alloca at once.
+      bool hasSubelementAccess : 1;
+      
+      /// hasALoadOrStore - This is true if there are any loads or stores to it.
+      /// The alloca may just be accessed with memcpy, for example, which would
+      /// not set this.
+      bool hasALoadOrStore : 1;
+      
+      explicit AllocaInfo(AllocaInst *ai)
+        : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+          hasSubelementAccess(false), hasALoadOrStore(false) {}
     };
-    
+
     unsigned SRThreshold;
 
-    void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
+    void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+      I.isUnsafe = true;
+      DEBUG(dbgs() << "  Transformation preventing inst: " << *User << '\n');
+    }
 
     bool isSafeAllocaToScalarRepl(AllocaInst *AI);
 
-    void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
-                             AllocaInfo &Info);
-    void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
-                   AllocaInfo &Info);
-    void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
-                         const Type *MemOpType, bool isStore, AllocaInfo &Info);
+    void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+    void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+                                         AllocaInfo &Info);
+    void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+    void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+                         const Type *MemOpType, bool isStore, AllocaInfo &Info,
+                         Instruction *TheAccess, bool AllowWholeAccess);
     bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
     uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
                                   const Type *&IdxTy);
-    
-    void DoScalarReplacement(AllocaInst *AI, 
+
+    void DoScalarReplacement(AllocaInst *AI,
                              std::vector<AllocaInst*> &WorkList);
     void DeleteDeadInstructions();
-   
+
     void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
                               SmallVector<AllocaInst*, 32> &NewElts);
     void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -129,18 +150,63 @@ namespace {
                                        SmallVector<AllocaInst*, 32> &NewElts);
     void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
-    
+
     static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
   };
+  
+  // SROA_DT - SROA that uses DominatorTree.
+  struct SROA_DT : public SROA {
+    static char ID;
+  public:
+    SROA_DT(int T = -1) : SROA(T, true, ID) {
+      initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // getAnalysisUsage - This pass does not require any passes, but we know it
+    // will not alter the CFG, so say so.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.setPreservesCFG();
+    }
+  };
+  
+  // SROA_SSAUp - SROA that uses SSAUpdater.
+  struct SROA_SSAUp : public SROA {
+    static char ID;
+  public:
+    SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+      initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // getAnalysisUsage - This pass does not require any passes, but we know it
+    // will not alter the CFG, so say so.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+  
 }
 
-char SROA::ID = 0;
-INITIALIZE_PASS(SROA, "scalarrepl",
-                "Scalar Replacement of Aggregates", false, false);
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+                "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+                "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+                      "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+                    "Scalar Replacement of Aggregates (SSAUp)", false, false)
 
 // Public interface to the ScalarReplAggregates pass
-FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 
-  return new SROA(Threshold);
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+                                                   bool UseDomTree) {
+  if (UseDomTree)
+    return new SROA_DT(Threshold);
+  return new SROA_SSAUp(Threshold);
 }
 
 
@@ -156,16 +222,16 @@ class ConvertToScalarInfo {
   /// AllocaSize - The size of the alloca being considered.
   unsigned AllocaSize;
   const TargetData &TD;
- 
+
   /// IsNotTrivial - This is set to true if there is some access to the object
   /// which means that mem2reg can't promote it.
   bool IsNotTrivial;
-  
+
   /// VectorTy - This tracks the type that we should promote the vector to if
   /// it is possible to turn it into a vector.  This starts out null, and if it
   /// isn't possible to turn into a vector type, it gets set to VoidTy.
   const Type *VectorTy;
-  
+
   /// HadAVector - True if there is at least one vector access to the alloca.
   /// We don't want to turn random arrays into vectors and use vector element
   /// insert/extract, but if there are element accesses to something that is
@@ -179,14 +245,14 @@ public:
     VectorTy = 0;
     HadAVector = false;
   }
-  
+
   AllocaInst *TryConvert(AllocaInst *AI);
-  
+
 private:
   bool CanConvertToScalar(Value *V, uint64_t Offset);
   void MergeInType(const Type *In, uint64_t Offset);
   void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
-  
+
   Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
                                     uint64_t Offset, IRBuilder<> &Builder);
   Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
@@ -195,26 +261,6 @@ private:
 } // end anonymous namespace.
 
 
-/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
-/// allowed to form.  We do this to avoid MMX types, which is a complete hack,
-/// but is required until the backend is fixed.
-static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
-  StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
-  if (!Triple.startswith("i386") &&
-      !Triple.startswith("x86_64"))
-    return false;
-  
-  // Reject all the MMX vector types.
-  switch (VTy->getNumElements()) {
-  default: return false;
-  case 1: return VTy->getElementType()->isIntegerTy(64);
-  case 2: return VTy->getElementType()->isIntegerTy(32);
-  case 4: return VTy->getElementType()->isIntegerTy(16);
-  case 8: return VTy->getElementType()->isIntegerTy(8);
-  }
-}
-
-
 /// TryConvert - Analyze the specified alloca, and if it is safe to do so,
 /// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
 /// alloca if possible or null if not.
@@ -223,7 +269,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   // out.
   if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
     return 0;
-  
+
   // If we were able to find a vector type that can handle this with
   // insert/extract elements, and if there was at least one use that had
   // a vector type, promote this to a vector.  We don't want to promote
@@ -231,8 +277,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   // we just get a lot of insert/extracts.  If at least one vector is
   // involved, then we probably really do have a union of vector/array.
   const Type *NewTy;
-  if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
-      !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
+  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
     DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
@@ -263,7 +308,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
   // nothing to be done.
   if (VectorTy && VectorTy->isVoidTy())
     return;
-  
+
   // If this could be contributing to a vector, analyze it.
 
   // If the In type is a vector that is the same size as the alloca, see if it
@@ -271,7 +316,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
   if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
     // Remember if we saw a vector type.
     HadAVector = true;
-    
+
     if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
       // If we're storing/loading a vector of the right size, allow it as a
       // vector.  If this the first vector we see, remember the type so that
@@ -290,7 +335,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
     // compatible with it.
     unsigned EltSize = In->getPrimitiveSizeInBits()/8;
     if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
-        (VectorTy == 0 || 
+        (VectorTy == 0 ||
          cast<VectorType>(VectorTy)->getElementType()
                ->getPrimitiveSizeInBits()/8 == EltSize)) {
       if (VectorTy == 0)
@@ -298,7 +343,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
       return;
     }
   }
-  
+
   // Otherwise, we have a case that we can't handle with an optimized vector
   // form.  We can still turn this into a large integer.
   VectorTy = Type::getVoidTy(In->getContext());
@@ -316,22 +361,28 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
 bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
     Instruction *User = cast<Instruction>(*UI);
-    
+
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
       // Don't break volatile loads.
       if (LI->isVolatile())
         return false;
+      // Don't touch MMX operations.
+      if (LI->getType()->isX86_MMXTy())
+        return false;
       MergeInType(LI->getType(), Offset);
       continue;
     }
-    
+
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       // Storing the pointer, not into the value?
       if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+      // Don't touch MMX operations.
+      if (SI->getOperand(0)->getType()->isX86_MMXTy())
+        return false;
       MergeInType(SI->getOperand(0)->getType(), Offset);
       continue;
     }
-    
+
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
       IsNotTrivial = true;  // Can't be mem2reg'd.
       if (!CanConvertToScalar(BCI, Offset))
@@ -343,7 +394,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       // If this is a GEP with a variable indices, we can't handle it.
       if (!GEP->hasAllConstantIndices())
         return false;
-      
+
       // Compute the offset that this GEP adds to the pointer.
       SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
       uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
@@ -372,15 +423,15 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
       if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
         return false;
-      
+
       IsNotTrivial = true;  // Can't be mem2reg'd.
       continue;
     }
-    
+
     // Otherwise, we cannot handle this!
     return false;
   }
-  
+
   return true;
 }
 
@@ -411,9 +462,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
       GEP->eraseFromParent();
       continue;
     }
-    
-    IRBuilder<> Builder(User->getParent(), User);
-    
+
+    IRBuilder<> Builder(User);
+
     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
       // The load is a bit extract from NewAI shifted right by Offset bits.
       Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
@@ -423,7 +474,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
       LI->eraseFromParent();
       continue;
     }
-    
+
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       assert(SI->getOperand(0) != Ptr && "Consistency error!");
       Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
@@ -431,14 +482,14 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
                                              Builder);
       Builder.CreateStore(New, NewAI);
       SI->eraseFromParent();
-      
+
       // If the load we just inserted is now dead, then the inserted store
       // overwrote the entire thing.
       if (Old->use_empty())
         Old->eraseFromParent();
       continue;
     }
-    
+
     // If this is a constant sized memset of a constant value (e.g. 0) we can
     // transform it into a store of the expanded constant value.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
@@ -446,7 +497,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
       unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
       if (NumBytes != 0) {
         unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-        
+
         // Compute the value replicated the right number of times.
         APInt APVal(NumBytes*8, Val);
 
@@ -454,17 +505,17 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
         if (Val)
           for (unsigned i = 1; i != NumBytes; ++i)
             APVal |= APVal << 8;
-        
+
         Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
         Value *New = ConvertScalar_InsertValue(
                                     ConstantInt::get(User->getContext(), APVal),
                                                Old, Offset, Builder);
         Builder.CreateStore(New, NewAI);
-        
+
         // If the load we just inserted is now dead, then the memset overwrote
         // the entire thing.
         if (Old->use_empty())
-          Old->eraseFromParent();        
+          Old->eraseFromParent();
       }
       MSI->eraseFromParent();
       continue;
@@ -474,29 +525,42 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
     // can handle it like a load or store of the scalar type.
     if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
       assert(Offset == 0 && "must be store to start of alloca");
-      
+
       // If the source and destination are both to the same alloca, then this is
       // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
       // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
-      
-      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+      if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
         // Dest must be OrigAI, change this to be a load from the original
         // pointer (bitcasted), then a store to our new alloca.
         assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
         Value *SrcPtr = MTI->getSource();
-        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
-        
+        const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+        if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+          AIPTy = PointerType::get(AIPTy->getElementType(),
+                                   SPTy->getAddressSpace());
+        }
+        SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
         LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
         SrcVal->setAlignment(MTI->getAlignment());
         Builder.CreateStore(SrcVal, NewAI);
-      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+      } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
         // Src must be OrigAI, change this to be a load from NewAI then a store
         // through the original dest pointer (bitcasted).
         assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
         LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
 
-        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+        const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+        if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+          AIPTy = PointerType::get(AIPTy->getElementType(),
+                                   DPTy->getAddressSpace());
+        }
+        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
         StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
         NewStore->setAlignment(MTI->getAlignment());
       } else {
@@ -506,7 +570,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
       MTI->eraseFromParent();
       continue;
     }
-    
+
     llvm_unreachable("Unsupported operation!");
   }
 }
@@ -548,7 +612,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
       V = Builder.CreateBitCast(V, ToType, "tmp");
     return V;
   }
-  
+
   // If ToType is a first class aggregate, extract out each of the pieces and
   // use insertvalue's to form the FCA.
   if (const StructType *ST = dyn_cast<StructType>(ToType)) {
@@ -562,7 +626,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
     }
     return Res;
   }
-  
+
   if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
     uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
     Value *Res = UndefValue::get(AT);
@@ -598,7 +662,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
                                  ConstantInt::get(FromVal->getType(),
                                                            ShAmt), "tmp");
   else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
-    FromVal = Builder.CreateShl(FromVal, 
+    FromVal = Builder.CreateShl(FromVal,
                                 ConstantInt::get(FromVal->getType(),
                                                           -ShAmt), "tmp");
 
@@ -606,11 +670,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
   if (LIBitWidth < NTy->getBitWidth())
     FromVal =
-      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
                                                     LIBitWidth), "tmp");
   else if (LIBitWidth > NTy->getBitWidth())
     FromVal =
-       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
                                                     LIBitWidth), "tmp");
 
   // If the result is an integer, this is a trunc or bitcast.
@@ -647,7 +711,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
   if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
     uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
     uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
-    
+
     // Changing the whole vector with memset or with an access of a different
     // vector type?
     if (ValSize == VecSize)
@@ -657,28 +721,28 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
 
     // Must be an element insertion.
     unsigned Elt = Offset/EltSize;
-    
+
     if (SV->getType() != VTy->getElementType())
       SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-    
-    SV = Builder.CreateInsertElement(Old, SV, 
+
+    SV = Builder.CreateInsertElement(Old, SV,
                      ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
                                      "tmp");
     return SV;
   }
-  
+
   // If SV is a first-class aggregate value, insert each value recursively.
   if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
     const StructLayout &Layout = *TD.getStructLayout(ST);
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
       Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
-      Old = ConvertScalar_InsertValue(Elt, Old, 
+      Old = ConvertScalar_InsertValue(Elt, Old,
                                       Offset+Layout.getElementOffsetInBits(i),
                                       Builder);
     }
     return Old;
   }
-  
+
   if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
     uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
     for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
@@ -778,16 +842,298 @@ bool SROA::runOnFunction(Function &F) {
   return Changed;
 }
 
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+  AllocaInst *AI;
+public:
+  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
+    : LoadAndStorePromoter(Insts, S), AI(0) {}
+  
+  void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+    // Remember which alloca we're promoting (for isInstInList).
+    this->AI = AI;
+    LoadAndStorePromoter::run(Insts);
+    AI->eraseFromParent();
+  }
+  
+  virtual bool isInstInList(Instruction *I,
+                            const SmallVectorImpl<Instruction*> &Insts) const {
+    if (LoadInst *LI = dyn_cast<LoadInst>(I))
+      return LI->getOperand(0) == AI;
+    return cast<StoreInst>(I)->getPointerOperand() == AI;
+  }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+///   %V = load i32* %P2
+/// to:
+///   %V1 = load i32* %Alloca      -> will be mem2reg'd
+///   %V2 = load i32* %Other
+///   %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+  bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+  bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+  
+  for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+       UI != UE; ++UI) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI);
+    if (LI == 0 || LI->isVolatile()) return false;
+    
+    // Both operands to the select need to be dereferencable, either absolutely
+    // (e.g. allocas) or at this point because we can see other accesses to it.
+    if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+                                                    LI->getAlignment(), TD))
+      return false;
+    if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+                                                    LI->getAlignment(), TD))
+      return false;
+  }
+  
+  return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+///   %P2 = phi [i32* %Alloca, i32* %Other]
+///   %V = load i32* %P2
+/// to:
+///   %V1 = load i32* %Alloca      -> will be mem2reg'd
+///   ...
+///   %V2 = load i32* %Other
+///   ...
+///   %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+  // For now, we can only do this promotion if the load is in the same block as
+  // the PHI, and if there are no stores between the phi and load.
+  // TODO: Allow recursive phi users.
+  // TODO: Allow stores.
+  BasicBlock *BB = PN->getParent();
+  unsigned MaxAlign = 0;
+  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+       UI != UE; ++UI) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI);
+    if (LI == 0 || LI->isVolatile()) return false;
+    
+    // For now we only allow loads in the same block as the PHI.  This is a
+    // common case that happens when instcombine merges two loads through a PHI.
+    if (LI->getParent() != BB) return false;
+    
+    // Ensure that there are no instructions between the PHI and the load that
+    // could store.
+    for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+      if (BBI->mayWriteToMemory())
+        return false;
+    
+    MaxAlign = std::max(MaxAlign, LI->getAlignment());
+  }
+  
+  // Okay, we know that we have one or more loads in the same block as the PHI.
+  // We can transform this if it is safe to push the loads into the predecessor
+  // blocks.  The only thing to watch out for is that we can't put a possibly
+  // trapping load in the predecessor if it is a critical edge.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *Pred = PN->getIncomingBlock(i);
+
+    // If the predecessor has a single successor, then the edge isn't critical.
+    if (Pred->getTerminator()->getNumSuccessors() == 1)
+      continue;
+    
+    Value *InVal = PN->getIncomingValue(i);
+    
+    // If the InVal is an invoke in the pred, we can't put a load on the edge.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+      if (II->getParent() == Pred)
+        return false;
+
+    // If this pointer is always safe to load, or if we can prove that there is
+    // already a load in the block, then we can move the load to the pred block.
+    if (InVal->isDereferenceablePointer() ||
+        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+      continue;
+    
+    return false;
+  }
+    
+  return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it.  If the alloca is close but
+/// not quite there, this will transform the code to allow promotion.  As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+  SetVector<Instruction*, SmallVector<Instruction*, 4>,
+            SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+  
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) {
+    User *U = *UI;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return false;
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI || SI->isVolatile())
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+      continue;
+    }
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+      // If the condition being selected on is a constant, fold the select, yes
+      // this does (rarely) happen early on.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+        Value *Result = SI->getOperand(1+CI->isZero());
+        SI->replaceAllUsesWith(Result);
+        SI->eraseFromParent();
+        
+        // This is very rare and we just scrambled the use list of AI, start
+        // over completely.
+        return tryToMakeAllocaBePromotable(AI, TD);
+      }
+
+      // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+      // loads, then we can transform this by rewriting the select.
+      if (!isSafeSelectToSpeculate(SI, TD))
+        return false;
+      
+      InstsToRewrite.insert(SI);
+      continue;
+    }
+    
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      if (PN->use_empty()) {  // Dead PHIs can be stripped.
+        InstsToRewrite.insert(PN);
+        continue;
+      }
+      
+      // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+      // in the pred blocks, then we can transform this by rewriting the PHI.
+      if (!isSafePHIToSpeculate(PN, TD))
+        return false;
+      
+      InstsToRewrite.insert(PN);
+      continue;
+    }
+    
+    return false;
+  }
+
+  // If there are no instructions to rewrite, then all uses are load/stores and
+  // we're done!
+  if (InstsToRewrite.empty())
+    return true;
+  
+  // If we have instructions that need to be rewritten for this to be promotable
+  // take care of it now.
+  for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+    if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+      // Selects in InstsToRewrite only have load uses.  Rewrite each as two
+      // loads with a new select.
+      while (!SI->use_empty()) {
+        LoadInst *LI = cast<LoadInst>(SI->use_back());
+      
+        IRBuilder<> Builder(LI);
+        LoadInst *TrueLoad = 
+          Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+        LoadInst *FalseLoad = 
+          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+        
+        // Transfer alignment and TBAA info if present.
+        TrueLoad->setAlignment(LI->getAlignment());
+        FalseLoad->setAlignment(LI->getAlignment());
+        if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+          TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+          FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+        }
+        
+        Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+        V->takeName(LI);
+        LI->replaceAllUsesWith(V);
+        LI->eraseFromParent();
+      }
+    
+      // Now that all the loads are gone, the select is gone too.
+      SI->eraseFromParent();
+      continue;
+    }
+    
+    // Otherwise, we have a PHI node which allows us to push the loads into the
+    // predecessors.
+    PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+    if (PN->use_empty()) {
+      PN->eraseFromParent();
+      continue;
+    }
+    
+    const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+    PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+
+    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
+    // matter which one we get and if any differ, it doesn't matter.
+    LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+    unsigned Align = SomeLoad->getAlignment();
+    
+    // Rewrite all loads of the PN to use the new PHI.
+    while (!PN->use_empty()) {
+      LoadInst *LI = cast<LoadInst>(PN->use_back());
+      LI->replaceAllUsesWith(NewPN);
+      LI->eraseFromParent();
+    }
+    
+    // Inject loads into all of the pred blocks.  Keep track of which blocks we
+    // insert them into in case we have multiple edges from the same block.
+    DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+    
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *Pred = PN->getIncomingBlock(i);
+      LoadInst *&Load = InsertedLoads[Pred];
+      if (Load == 0) {
+        Load = new LoadInst(PN->getIncomingValue(i),
+                            PN->getName() + "." + Pred->getName(),
+                            Pred->getTerminator());
+        Load->setAlignment(Align);
+        if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+      }
+      
+      NewPN->addIncoming(Load, Pred);
+    }
+    
+    PN->eraseFromParent();
+  }
+    
+  ++NumAdjusted;
+  return true;
+}
+
 
 bool SROA::performPromotion(Function &F) {
   std::vector<AllocaInst*> Allocas;
-  DominatorTree         &DT = getAnalysis<DominatorTree>();
-  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+  DominatorTree *DT = 0;
+  if (HasDomTree)
+    DT = &getAnalysis<DominatorTree>();
 
   BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
 
   bool Changed = false;
-
+  SmallVector<Instruction*, 64> Insts;
   while (1) {
     Allocas.clear();
 
@@ -795,12 +1141,27 @@ bool SROA::performPromotion(Function &F) {
     // the entry node
     for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
       if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
-        if (isAllocaPromotable(AI))
+        if (tryToMakeAllocaBePromotable(AI, TD))
           Allocas.push_back(AI);
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    if (HasDomTree)
+      PromoteMemToReg(Allocas, *DT);
+    else {
+      SSAUpdater SSA;
+      for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+        AllocaInst *AI = Allocas[i];
+        
+        // Build list of instructions to promote.
+        for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+             UI != E; ++UI)
+          Insts.push_back(cast<Instruction>(*UI));
+        
+        AllocaPromoter(Insts, SSA).run(AI, Insts);
+        Insts.clear();
+      }
+    }
     NumPromoted += Allocas.size();
     Changed = true;
   }
@@ -842,7 +1203,7 @@ bool SROA::performScalarRepl(Function &F) {
   while (!WorkList.empty()) {
     AllocaInst *AI = WorkList.back();
     WorkList.pop_back();
-    
+
     // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
     // with unused elements.
     if (AI->use_empty()) {
@@ -854,7 +1215,7 @@ bool SROA::performScalarRepl(Function &F) {
     // If this alloca is impossible for us to promote, reject it early.
     if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
       continue;
-    
+
     // Check to see if this allocation is only modified by a memcpy/memmove from
     // a constant global.  If this is the case, we can change all users to use
     // the constant global instead.  This is commonly produced by the CFE by
@@ -871,7 +1232,7 @@ bool SROA::performScalarRepl(Function &F) {
       Changed = true;
       continue;
     }
-    
+
     // Check to see if we can perform the core SROA transformation.  We cannot
     // transform the allocation instruction if it is an array allocation
     // (allocations OF arrays are ok though), and an allocation of a scalar
@@ -880,10 +1241,10 @@ bool SROA::performScalarRepl(Function &F) {
 
     // Do not promote [0 x %struct].
     if (AllocaSize == 0) continue;
-    
+
     // Do not promote any struct whose size is too big.
     if (AllocaSize > SRThreshold) continue;
-    
+
     // If the alloca looks like a good candidate for scalar replacement, and if
     // all its users can be transformed, then split up the aggregate into its
     // separate elements.
@@ -906,8 +1267,8 @@ bool SROA::performScalarRepl(Function &F) {
       ++NumConverted;
       Changed = true;
       continue;
-    }      
-    
+    }
+
     // Otherwise, couldn't process this alloca.
   }
 
@@ -916,14 +1277,14 @@ bool SROA::performScalarRepl(Function &F) {
 
 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
 /// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocaInst *AI, 
+void SROA::DoScalarReplacement(AllocaInst *AI,
                                std::vector<AllocaInst*> &WorkList) {
   DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
   SmallVector<AllocaInst*, 32> ElementAllocas;
   if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
     ElementAllocas.reserve(ST->getNumContainedTypes());
     for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
-      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 
+      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
                                       AI->getAlignment(),
                                       AI->getName() + "." + Twine(i), AI);
       ElementAllocas.push_back(NA);
@@ -971,48 +1332,106 @@ void SROA::DeleteDeadInstructions() {
     I->eraseFromParent();
   }
 }
-    
+
 /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
 /// performing scalar replacement of alloca AI.  The results are flagged in
 /// the Info parameter.  Offset indicates the position within AI that is
 /// referenced by this instruction.
-void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
                                AllocaInfo &Info) {
   for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
     Instruction *User = cast<Instruction>(*UI);
 
     if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
-      isSafeForScalarRepl(BC, AI, Offset, Info);
+      isSafeForScalarRepl(BC, Offset, Info);
     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
       uint64_t GEPOffset = Offset;
-      isSafeGEP(GEPI, AI, GEPOffset, Info);
+      isSafeGEP(GEPI, GEPOffset, Info);
       if (!Info.isUnsafe)
-        isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+        isSafeForScalarRepl(GEPI, GEPOffset, Info);
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
-      if (Length)
-        isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
-                        UI.getOperandNo() == 0, Info);
-      else
-        MarkUnsafe(Info);
+      if (Length == 0)
+        return MarkUnsafe(Info, User);
+      isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+                      UI.getOperandNo() == 0, Info, MI,
+                      true /*AllowWholeAccess*/);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      if (LI->isVolatile())
+        return MarkUnsafe(Info, User);
+      const Type *LIType = LI->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+                      LIType, false, Info, LI, true /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+        
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Store is ok if storing INTO the pointer, not storing the pointer
+      if (SI->isVolatile() || SI->getOperand(0) == I)
+        return MarkUnsafe(Info, User);
+        
+      const Type *SIType = SI->getOperand(0)->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+                      SIType, true, Info, SI, true /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+      isSafePHISelectUseForScalarRepl(User, Offset, Info);
+    } else {
+      return MarkUnsafe(Info, User);
+    }
+    if (Info.isUnsafe) return;
+  }
+}
+ 
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca.  The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+                                           AllocaInfo &Info) {
+  // If we've already checked this PHI, don't do it again.
+  if (PHINode *PN = dyn_cast<PHINode>(I))
+    if (!Info.CheckedPHIs.insert(PN))
+      return;
+  
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      // Only allow "bitcast" GEPs for simplicity.  We could generalize this,
+      // but would have to prove that we're staying inside of an element being
+      // promoted.
+      if (!GEPI->hasAllZeroIndices())
+        return MarkUnsafe(Info, User);
+      isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
     } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
-      if (!LI->isVolatile()) {
-        const Type *LIType = LI->getType();
-        isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
-                        LIType, false, Info);
-      } else
-        MarkUnsafe(Info);
+      if (LI->isVolatile())
+        return MarkUnsafe(Info, User);
+      const Type *LIType = LI->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+                      LIType, false, Info, LI, false /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+      
     } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       // Store is ok if storing INTO the pointer, not storing the pointer
-      if (!SI->isVolatile() && SI->getOperand(0) != I) {
-        const Type *SIType = SI->getOperand(0)->getType();
-        isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
-                        SIType, true, Info);
-      } else
-        MarkUnsafe(Info);
+      if (SI->isVolatile() || SI->getOperand(0) == I)
+        return MarkUnsafe(Info, User);
+      
+      const Type *SIType = SI->getOperand(0)->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+                      SIType, true, Info, SI, false /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+      isSafePHISelectUseForScalarRepl(User, Offset, Info);
     } else {
-      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
-      MarkUnsafe(Info);
+      return MarkUnsafe(Info, User);
     }
     if (Info.isUnsafe) return;
   }
@@ -1023,7 +1442,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
 /// references, and when the resulting offset corresponds to an element within
 /// the alloca type.  The results are flagged in the Info parameter.  Upon
 /// return, Offset is adjusted as specified by the GEP indices.
-void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
                      uint64_t &Offset, AllocaInfo &Info) {
   gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
   if (GEPIt == E)
@@ -1038,7 +1457,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
 
     ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
     if (!IdxVal)
-      return MarkUnsafe(Info);
+      return MarkUnsafe(Info, GEPI);
   }
 
   // Compute the offset due to this GEP and check if the alloca has a
@@ -1046,40 +1465,92 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
   SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
   Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
                                  &Indices[0], Indices.size());
-  if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
-    MarkUnsafe(Info);
+  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+    MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays).  If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+                                   const Type *&EltTy) {
+  if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+    NumElts = AT->getNumElements();
+    EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+    return true;
+  }
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    NumElts = ST->getNumContainedTypes();
+    EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+    for (unsigned n = 1; n < NumElts; ++n) {
+      if (ST->getContainedType(n) != EltTy)
+        return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+  if (T1 == T2)
+    return true;
+
+  unsigned NumElts1, NumElts2;
+  const Type *EltTy1, *EltTy2;
+  if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+      isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+      NumElts1 == NumElts2 &&
+      EltTy1 == EltTy2)
+    return true;
+
+  return false;
 }
 
 /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
 /// alloca or has an offset and size that corresponds to a component element
 /// within it.  The offset checked here may have been formed from a GEP with a
 /// pointer bitcasted to a different type.
-void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit.  If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
                            const Type *MemOpType, bool isStore,
-                           AllocaInfo &Info) {
+                           AllocaInfo &Info, Instruction *TheAccess,
+                           bool AllowWholeAccess) {
   // Check if this is a load/store of the entire alloca.
-  if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
-    bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
-    // This is safe for MemIntrinsics (where MemOpType is 0), integer types
-    // (which are essentially the same as the MemIntrinsics, especially with
-    // regard to copying padding between elements), or references using the
-    // aggregate type of the alloca.
-    if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
-      if (!UsesAggregateType) {
-        if (isStore)
-          Info.isMemCpyDst = true;
-        else
-          Info.isMemCpySrc = true;
-      }
+  if (Offset == 0 && AllowWholeAccess &&
+      MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+    // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+    // loads/stores (which are essentially the same as the MemIntrinsics with
+    // regard to copying padding between elements).  But, if an alloca is
+    // flagged as both a source and destination of such operations, we'll need
+    // to check later for padding between elements.
+    if (!MemOpType || MemOpType->isIntegerTy()) {
+      if (isStore)
+        Info.isMemCpyDst = true;
+      else
+        Info.isMemCpySrc = true;
+      return;
+    }
+    // This is also safe for references using a type that is compatible with
+    // the type of the alloca, so that loads/stores can be rewritten using
+    // insertvalue/extractvalue.
+    if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+      Info.hasSubelementAccess = true;
       return;
     }
   }
   // Check if the offset/size correspond to a component within the alloca type.
-  const Type *T = AI->getAllocatedType();
-  if (TypeHasComponent(T, Offset, MemSize))
+  const Type *T = Info.AI->getAllocatedType();
+  if (TypeHasComponent(T, Offset, MemSize)) {
+    Info.hasSubelementAccess = true;
     return;
+  }
 
-  return MarkUnsafe(Info);
+  return MarkUnsafe(Info, TheAccess);
 }
 
 /// TypeHasComponent - Return true if T has a component type with the
@@ -1116,14 +1587,21 @@ bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
 /// instruction.
 void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
                                 SmallVector<AllocaInst*, 32> &NewElts) {
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
-    Instruction *User = cast<Instruction>(*UI);
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI++);
 
     if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
       RewriteBitCast(BC, AI, Offset, NewElts);
-    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      continue;
+    }
+    
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
       RewriteGEP(GEPI, AI, Offset, NewElts);
-    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+      continue;
+    }
+    
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
       uint64_t MemSize = Length->getZExtValue();
       if (Offset == 0 &&
@@ -1131,9 +1609,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
         RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
       // Otherwise the intrinsic can only touch a single element and the
       // address operand will be updated, so nothing else needs to be done.
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      continue;
+    }
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
       const Type *LIType = LI->getType();
-      if (LIType == AI->getAllocatedType()) {
+      
+      if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
         // Replace:
         //   %res = load { i32, i32 }* %alloc
         // with:
@@ -1155,10 +1637,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
         // If this is a load of the entire alloca to an integer, rewrite it.
         RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
       }
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       Value *Val = SI->getOperand(0);
       const Type *SIType = Val->getType();
-      if (SIType == AI->getAllocatedType()) {
+      if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
         // Replace:
         //   store { i32, i32 } %val, { i32, i32 }* %alloc
         // with:
@@ -1178,6 +1663,26 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
         // If this is a store of the entire alloca from an integer, rewrite it.
         RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
       }
+      continue;
+    }
+    
+    if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+      // If we have a PHI user of the alloca itself (as opposed to a GEP or 
+      // bitcast) we have to rewrite it.  GEP and bitcast uses will be RAUW'd to
+      // the new pointer.
+      if (!isa<AllocaInst>(I)) continue;
+      
+      assert(Offset == 0 && NewElts[0] &&
+             "Direct alloca use should have a zero offset");
+      
+      // If we have a use of the alloca, we know the derived uses will be
+      // utilizing just the first element of the scalarized result.  Insert a
+      // bitcast of the first alloca before the user as required.
+      AllocaInst *NewAI = NewElts[0];
+      BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+      NewAI->moveBefore(BCI);
+      TheUse = BCI;
+      continue;
     }
   }
 }
@@ -1305,7 +1810,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
     // function is only called for mem intrinsics that access the whole
     // aggregate, so non-zero GEPs are not an issue here.)
     OtherPtr = OtherPtr->stripPointerCasts();
-    
+
     // Copying the alloca to itself is a no-op: just delete it.
     if (OtherPtr == AI || OtherPtr == NewElts[0]) {
       // This code will run twice for a no-op memcpy -- once for each operand.
@@ -1316,28 +1821,26 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       DeadInsts.push_back(MI);
       return;
     }
-    
+
     // If the pointer is not the right type, insert a bitcast to the right
     // type.
     const Type *NewTy =
       PointerType::get(AI->getType()->getElementType(), AddrSpace);
-    
+
     if (OtherPtr->getType() != NewTy)
       OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
   }
-  
+
   // Process each element of the aggregate.
-  Value *TheFn = MI->getCalledValue();
-  const Type *BytePtrTy = MI->getRawDest()->getType();
   bool SROADest = MI->getRawDest() == Inst;
-  
+
   Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
 
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // If this is a memcpy/memmove, emit a GEP of the other element address.
     Value *OtherElt = 0;
     unsigned OtherEltAlign = MemAlignment;
-    
+
     if (OtherPtr) {
       Value *Idx[2] = { Zero,
                       ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
@@ -1353,7 +1856,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
         const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
         EltOffset = TD->getTypeAllocSize(EltTy)*i;
       }
-      
+
       // The alignment of the other pointer is the guaranteed alignment of the
       // element, which is affected by both the known alignment of the whole
       // mem intrinsic and the alignment of the element.  If the alignment of
@@ -1361,10 +1864,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       // known alignment is just 4 bytes.
       OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
     }
-    
+
     Value *EltPtr = NewElts[i];
     const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
-    
+
     // If we got down to a scalar, insert a load or store as appropriate.
     if (EltTy->isSingleValueType()) {
       if (isa<MemTransferInst>(MI)) {
@@ -1380,7 +1883,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
         continue;
       }
       assert(isa<MemSetInst>(MI));
-      
+
       // If the stored element is zero (common case), just store a null
       // constant.
       Constant *StoreVal;
@@ -1400,7 +1903,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
             TotalVal = TotalVal.shl(8);
             TotalVal |= OneVal;
           }
-          
+
           // Convert the integer value to the appropriate type.
           StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
           if (ValTy->isPointerTy())
@@ -1408,12 +1911,12 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           else if (ValTy->isFloatingPointTy())
             StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
           assert(StoreVal->getType() == ValTy && "Type mismatch!");
-          
+
           // If the requested value was a vector constant, create it.
           if (EltTy != ValTy) {
             unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
             SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
-            StoreVal = ConstantVector::get(&Elts[0], NumElts);
+            StoreVal = ConstantVector::get(Elts);
           }
         }
         new StoreInst(StoreVal, EltPtr, MI);
@@ -1422,55 +1925,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
       // Otherwise, if we're storing a byte variable, use a memset call for
       // this element.
     }
-    
-    // Cast the element pointer to BytePtrTy.
-    if (EltPtr->getType() != BytePtrTy)
-      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
-    
-    // Cast the other pointer (if we have one) to BytePtrTy. 
-    if (OtherElt && OtherElt->getType() != BytePtrTy) {
-      // Preserve address space of OtherElt
-      const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
-      const PointerType* PTy = cast<PointerType>(BytePtrTy);
-      if (OtherPTy->getElementType() != PTy->getElementType()) {
-        Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
-                                             OtherPTy->getAddressSpace());
-        OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
-                                   OtherElt->getNameStr(), MI);
-      }
-    }
-    
+
     unsigned EltSize = TD->getTypeAllocSize(EltTy);
-    
+
+    IRBuilder<> Builder(MI);
+
     // Finally, insert the meminst for this element.
-    if (isa<MemTransferInst>(MI)) {
-      Value *Ops[] = {
-        SROADest ? EltPtr : OtherElt,  // Dest ptr
-        SROADest ? OtherElt : EltPtr,  // Src ptr
-        ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
-        // Align
-        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
-        MI->getVolatileCst()
-      };
-      // In case we fold the address space overloaded memcpy of A to B
-      // with memcpy of B to C, change the function to be a memcpy of A to C.
-      const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
-                            Ops[2]->getType() };
-      Module *M = MI->getParent()->getParent()->getParent();
-      TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
-      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+    if (isa<MemSetInst>(MI)) {
+      Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+                           MI->isVolatile());
     } else {
-      assert(isa<MemSetInst>(MI));
-      Value *Ops[] = {
-        EltPtr, MI->getArgOperand(1),  // Dest, Value,
-        ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
-        Zero,  // Align
-        ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
-      };
-      const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
-      Module *M = MI->getParent()->getParent()->getParent();
-      TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
-      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+      assert(isa<MemTransferInst>(MI));
+      Value *Dst = SROADest ? EltPtr : OtherElt;  // Dest ptr
+      Value *Src = SROADest ? OtherElt : EltPtr;  // Src ptr
+
+      if (isa<MemCpyInst>(MI))
+        Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+      else
+        Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
     }
   }
   DeadInsts.push_back(MI);
@@ -1486,12 +1958,13 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   Value *SrcVal = SI->getOperand(0);
   const Type *AllocaEltTy = AI->getAllocatedType();
   uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+  IRBuilder<> Builder(SI);
   
   // Handle tail padding by extending the operand
   if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
-    SrcVal = new ZExtInst(SrcVal,
-                          IntegerType::get(SI->getContext(), AllocaSizeBits), 
-                          "", SI);
+    SrcVal = Builder.CreateZExt(SrcVal,
+                            IntegerType::get(SI->getContext(), AllocaSizeBits));
 
   DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
                << '\n');
@@ -1500,47 +1973,44 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   // have different ways to compute the element offset.
   if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
     const StructLayout *Layout = TD->getStructLayout(EltSTy);
-    
+
     for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
       // Get the number of bits to shift SrcVal to get the value.
       const Type *FieldTy = EltSTy->getElementType(i);
       uint64_t Shift = Layout->getElementOffsetInBits(i);
-      
+
       if (TD->isBigEndian())
         Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
-      
+
       Value *EltVal = SrcVal;
       if (Shift) {
         Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
-        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
-                                            "sroa.store.elt", SI);
+        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
       }
-      
+
       // Truncate down to an integer of the right size.
       uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-      
+
       // Ignore zero sized fields like {}, they obviously contain no data.
       if (FieldSizeBits == 0) continue;
-      
+
       if (FieldSizeBits != AllocaSizeBits)
-        EltVal = new TruncInst(EltVal,
-                             IntegerType::get(SI->getContext(), FieldSizeBits),
-                              "", SI);
+        EltVal = Builder.CreateTrunc(EltVal,
+                             IntegerType::get(SI->getContext(), FieldSizeBits));
       Value *DestField = NewElts[i];
       if (EltVal->getType() == FieldTy) {
         // Storing to an integer field of this size, just do it.
       } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
         // Bitcast to the right element type (for fp/vector values).
-        EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+        EltVal = Builder.CreateBitCast(EltVal, FieldTy);
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
-        DestField = new BitCastInst(DestField,
-                              PointerType::getUnqual(EltVal->getType()),
-                                    "", SI);
+        DestField = Builder.CreateBitCast(DestField,
+                                     PointerType::getUnqual(EltVal->getType()));
       }
       new StoreInst(EltVal, DestField, SI);
     }
-    
+
   } else {
     const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
     const Type *ArrayEltTy = ATy->getElementType();
@@ -1548,50 +2018,48 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
     uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
 
     uint64_t Shift;
-    
+
     if (TD->isBigEndian())
       Shift = AllocaSizeBits-ElementOffset;
-    else 
+    else
       Shift = 0;
-    
+
     for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
       // Ignore zero sized fields like {}, they obviously contain no data.
       if (ElementSizeBits == 0) continue;
-      
+
       Value *EltVal = SrcVal;
       if (Shift) {
         Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
-        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
-                                            "sroa.store.elt", SI);
+        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
       }
-      
+
       // Truncate down to an integer of the right size.
       if (ElementSizeBits != AllocaSizeBits)
-        EltVal = new TruncInst(EltVal, 
-                               IntegerType::get(SI->getContext(), 
-                                                ElementSizeBits),"",SI);
+        EltVal = Builder.CreateTrunc(EltVal,
+                                     IntegerType::get(SI->getContext(),
+                                                      ElementSizeBits));
       Value *DestField = NewElts[i];
       if (EltVal->getType() == ArrayEltTy) {
         // Storing to an integer field of this size, just do it.
       } else if (ArrayEltTy->isFloatingPointTy() ||
                  ArrayEltTy->isVectorTy()) {
         // Bitcast to the right element type (for fp/vector values).
-        EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+        EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
       } else {
         // Otherwise, bitcast the dest pointer (for aggregates).
-        DestField = new BitCastInst(DestField,
-                              PointerType::getUnqual(EltVal->getType()),
-                                    "", SI);
+        DestField = Builder.CreateBitCast(DestField,
+                                     PointerType::getUnqual(EltVal->getType()));
       }
       new StoreInst(EltVal, DestField, SI);
-      
+
       if (TD->isBigEndian())
         Shift -= ElementOffset;
-      else 
+      else
         Shift += ElementOffset;
     }
   }
-  
+
   DeadInsts.push_back(SI);
 }
 
@@ -1603,10 +2071,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
   // and form the result value.
   const Type *AllocaEltTy = AI->getAllocatedType();
   uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
-  
+
   DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
                << '\n');
-  
+
   // There are two forms here: AI could be an array or struct.  Both cases
   // have different ways to compute the element offset.
   const StructLayout *Layout = 0;
@@ -1616,11 +2084,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
   } else {
     const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
     ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
-  }    
-  
-  Value *ResultVal = 
+  }
+
+  Value *ResultVal =
     Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
-  
+
   for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
     // Load the value from the alloca.  If the NewElt is an aggregate, cast
     // the pointer to an integer of the same size before doing the load.
@@ -1628,11 +2096,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
     const Type *FieldTy =
       cast<PointerType>(SrcField->getType())->getElementType();
     uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-    
+
     // Ignore zero sized fields like {}, they obviously contain no data.
     if (FieldSizeBits == 0) continue;
-    
-    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 
+
+    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
                                                      FieldSizeBits);
     if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
         !FieldTy->isVectorTy())
@@ -1650,17 +2118,17 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
     // we can shift and insert it.
     if (SrcField->getType() != ResultVal->getType())
       SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
-    
+
     // Determine the number of bits to shift SrcField.
     uint64_t Shift;
     if (Layout) // Struct case.
       Shift = Layout->getElementOffsetInBits(i);
     else  // Array case.
       Shift = i*ArrayEltBitOffset;
-    
+
     if (TD->isBigEndian())
       Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
-    
+
     if (Shift) {
       Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
       SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
@@ -1683,46 +2151,39 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
 }
 
 /// HasPadding - Return true if the specified type has any structure or
-/// alignment padding, false otherwise.
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
 static bool HasPadding(const Type *Ty, const TargetData &TD) {
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
-    return HasPadding(ATy->getElementType(), TD);
-  
-  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return HasPadding(VTy->getElementType(), TD);
-  
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TD.getStructLayout(STy);
-    unsigned PrevFieldBitOffset = 0;
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
-
-      // Padding in sub-elements?
-      if (HasPadding(STy->getElementType(i), TD))
-        return true;
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Ty = ATy->getElementType();
+    return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+  }
 
-      // Check to see if there is any padding between this element and the
-      // previous one.
-      if (i) {
-        unsigned PrevFieldEnd =
+  // SROA currently handles only Arrays and Structs.
+  const StructType *STy = cast<StructType>(Ty);
+  const StructLayout *SL = TD.getStructLayout(STy);
+  unsigned PrevFieldBitOffset = 0;
+  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+    // Check to see if there is any padding between this element and the
+    // previous one.
+    if (i) {
+      unsigned PrevFieldEnd =
         PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
-        if (PrevFieldEnd < FieldBitOffset)
-          return true;
-      }
-
-      PrevFieldBitOffset = FieldBitOffset;
-    }
-
-    //  Check for tail padding.
-    if (unsigned EltCount = STy->getNumElements()) {
-      unsigned PrevFieldEnd = PrevFieldBitOffset +
-                   TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
-      if (PrevFieldEnd < SL->getSizeInBits())
+      if (PrevFieldEnd < FieldBitOffset)
         return true;
     }
+    PrevFieldBitOffset = FieldBitOffset;
   }
-  
-  return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+  // Check for tail padding.
+  if (unsigned EltCount = STy->getNumElements()) {
+    unsigned PrevFieldEnd = PrevFieldBitOffset +
+      TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+    if (PrevFieldEnd < SL->getSizeInBits())
+      return true;
+  }
+  return false;
 }
 
 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@@ -1731,14 +2192,14 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
 bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   // Loop over the use list of the alloca.  We can only transform it if all of
   // the users are safe to transform.
-  AllocaInfo Info;
-  
-  isSafeForScalarRepl(AI, AI, 0, Info);
+  AllocaInfo Info(AI);
+
+  isSafeForScalarRepl(AI, 0, Info);
   if (Info.isUnsafe) {
     DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
     return false;
   }
-  
+
   // Okay, we know all the users are promotable.  If the aggregate is a memcpy
   // source and destination, we have to be careful.  In particular, the memcpy
   // could be moving around elements that live in structure padding of the LLVM
@@ -1748,6 +2209,20 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
       HasPadding(AI->getAllocatedType(), *TD))
     return false;
 
+  // If the alloca never has an access to just *part* of it, but is accessed
+  // via loads and stores, then we should use ConvertToScalarInfo to promote
+  // the alloca instead of promoting each piece at a time and inserting fission
+  // and fusion code.
+  if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+    // If the struct/array just has one element, use basic SRoA.
+    if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+      if (ST->getNumElements() > 1) return false;
+    } else {
+      if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+        return false;
+    }
+  }
+  
   return true;
 }
 
@@ -1760,7 +2235,7 @@ static bool PointsToConstantGlobal(Value *V) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     return GV->isConstant();
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::BitCast || 
+    if (CE->getOpcode() == Instruction::BitCast ||
         CE->getOpcode() == Instruction::GetElementPtr)
       return PointsToConstantGlobal(CE->getOperand(0));
   return false;
@@ -1771,18 +2246,19 @@ static bool PointsToConstantGlobal(Value *V) {
 /// see any stores or other unknown uses.  If we see pointer arithmetic, keep
 /// track of whether it moves the pointer (with isOffset) but otherwise traverse
 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant  global, we
+/// the alloca, and if the source pointer is a pointer to a constant global, we
 /// can optimize this.
 static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
                                            bool isOffset) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
     User *U = cast<Instruction>(*UI);
 
-    if (LoadInst *LI = dyn_cast<LoadInst>(U))
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       // Ignore non-volatile loads, they are always ok.
-      if (!LI->isVolatile())
-        continue;
-    
+      if (LI->isVolatile()) return false;
+      continue;
+    }
+
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       // If uses of the bitcast are ok, we are ok.
       if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
@@ -1797,27 +2273,52 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         return false;
       continue;
     }
-    
+
+    if (CallSite CS = U) {
+      // If this is a readonly/readnone call site, then we know it is just a
+      // load and we can ignore it.
+      if (CS.onlyReadsMemory())
+        continue;
+
+      // If this is the function being called then we treat it like a load and
+      // ignore it.
+      if (CS.isCallee(UI))
+        continue;
+
+      // If this is being passed as a byval argument, the caller is making a
+      // copy, so it is only a read of the alloca.
+      unsigned ArgNo = CS.getArgumentNo(UI);
+      if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+        continue;
+    }
+
     // If this is isn't our memcpy/memmove, reject it as something we can't
     // handle.
     MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
     if (MI == 0)
       return false;
 
+    // If the transfer is using the alloca as a source of the transfer, then
+    // ignore it since it is a load (unless the transfer is volatile).
+    if (UI.getOperandNo() == 1) {
+      if (MI->isVolatile()) return false;
+      continue;
+    }
+
     // If we already have seen a copy, reject the second one.
     if (TheCopy) return false;
-    
+
     // If the pointer has been offset from the start of the alloca, we can't
     // safely handle this.
     if (isOffset) return false;
 
     // If the memintrinsic isn't using the alloca as the dest, reject it.
     if (UI.getOperandNo() != 0) return false;
-    
+
     // If the source of the memcpy/move is not a constant global, reject it.
     if (!PointsToConstantGlobal(MI->getSource()))
       return false;
-    
+
     // Otherwise, the transform is safe.  Remember the copy instruction.
     TheCopy = MI;
   }
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 360749caf111..ce5dd73ace32 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,7 +42,9 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
 namespace {
   struct CFGSimplifyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGSimplifyPass() : FunctionPass(ID) {}
+    CFGSimplifyPass() : FunctionPass(ID) {
+      initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
   };
@@ -50,7 +52,7 @@ namespace {
 
 char CFGSimplifyPass::ID = 0;
 INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
-                "Simplify the CFG", false, false);
+                "Simplify the CFG", false, false)
 
 // Public interface to the CFGSimplification pass
 FunctionPass *llvm::createCFGSimplificationPass() {
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 3ec70ec2e024..70ff32e02310 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,9 @@ namespace {
     const TargetData *TD;
   public:
     static char ID; // Pass identification
-    SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
+    SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
+      initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnFunction(Function &F);
 
@@ -47,7 +49,7 @@ namespace {
 } // end anonymous namespace.
 
 INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
-                "Simplify half_powr library calls", false, false);
+                "Simplify half_powr library calls", false, false)
 
 // Public interface to the Simplify HalfPowr LibCalls pass.
 FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
@@ -95,7 +97,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
 
     InlineFunctionInfo IFI(0, TD);
     bool B = InlineFunction(Call, IFI);
-    assert(B && "half_powr didn't inline?"); B=B;
+    assert(B && "half_powr didn't inline?");
+    (void)B;
 
     BasicBlock *NewBody = NewBlock->getSinglePredecessor();
     assert(NewBody);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d7ce53f36715..ec45b71dd368 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -123,7 +123,7 @@ struct StrCatOpt : public LibCallOptimization {
     // Verify the "strcat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType())
       return 0;
@@ -160,9 +160,8 @@ struct StrCatOpt : public LibCallOptimization {
 
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-    EmitMemCpy(CpyDst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
-                                1, false, B, TD);
+    B.CreateMemCpy(CpyDst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
   }
 };
 
@@ -174,7 +173,7 @@ struct StrNCatOpt : public StrCatOpt {
     // Verify the "strncat" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType() ||
         !FT->getParamType(2)->isIntegerTy())
@@ -222,8 +221,9 @@ struct StrChrOpt : public LibCallOptimization {
     // Verify the "strchr" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
-        FT->getParamType(0) != FT->getReturnType())
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
       return 0;
 
     Value *SrcStr = CI->getArgOperand(0);
@@ -252,22 +252,55 @@ struct StrChrOpt : public LibCallOptimization {
 
     // strchr can find the nul character.
     Str += '\0';
-    char CharValue = CharC->getSExtValue();
 
     // Compute the offset.
-    uint64_t i = 0;
-    while (1) {
-      if (i == Str.size())    // Didn't find the char.  strchr returns null.
-        return Constant::getNullValue(CI->getType());
-      // Did we find our match?
-      if (Str[i] == CharValue)
-        break;
-      ++i;
-    }
+    size_t I = Str.find(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char.  strchr returns null.
+      return Constant::getNullValue(CI->getType());
 
     // strchr(s+n,c)  -> gep(s+n+i,c)
-    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
-    return B.CreateGEP(SrcStr, Idx, "strchr");
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  }
+};
+
+//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strrchr" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+    // Cannot fold anything if we're not looking for a constant.
+    if (!CharC)
+      return 0;
+
+    std::string Str;
+    if (!GetConstantStringInfo(SrcStr, Str)) {
+      // strrchr(s, 0) -> strchr(s, 0)
+      if (TD && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, TD);
+      return 0;
+    }
+
+    // strrchr can find the nul character.
+    Str += '\0';
+
+    // Compute the offset.
+    size_t I = Str.rfind(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char. Return null.
+      return Constant::getNullValue(CI->getType());
+
+    // strrchr(s+n,c) -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
   }
 };
 
@@ -281,7 +314,7 @@ struct StrCmpOpt : public LibCallOptimization {
     if (FT->getNumParams() != 2 ||
         !FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+        FT->getParamType(0) != B.getInt8PtrTy())
       return 0;
 
     Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
@@ -329,7 +362,7 @@ struct StrNCmpOpt : public LibCallOptimization {
     if (FT->getNumParams() != 3 ||
         !FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
         !FT->getParamType(2)->isIntegerTy())
       return 0;
 
@@ -384,7 +417,7 @@ struct StrCpyOpt : public LibCallOptimization {
     if (FT->getNumParams() != NumParams ||
         FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+        FT->getParamType(0) != B.getInt8PtrTy())
       return 0;
 
     Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -405,9 +438,8 @@ struct StrCpyOpt : public LibCallOptimization {
                     ConstantInt::get(TD->getIntPtrType(*Context), Len),
                     CI->getArgOperand(2), B, TD);
     else
-      EmitMemCpy(Dst, Src,
-                 ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                                  1, false, B, TD);
+      B.CreateMemCpy(Dst, Src,
+                     ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
     return Dst;
   }
 };
@@ -420,7 +452,7 @@ struct StrNCpyOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
         !FT->getParamType(2)->isIntegerTy())
       return 0;
 
@@ -435,8 +467,7 @@ struct StrNCpyOpt : public LibCallOptimization {
 
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
-      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
-                 LenOp, false, B, TD);
+      B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
       return Dst;
     }
 
@@ -455,9 +486,8 @@ struct StrNCpyOpt : public LibCallOptimization {
     if (Len > SrcLen+1) return 0;
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
-    EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                                1, false, B, TD);
+    B.CreateMemCpy(Dst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
 
     return Dst;
   }
@@ -470,7 +500,7 @@ struct StrLenOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
-        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
         !FT->getReturnType()->isIntegerTy())
       return 0;
 
@@ -488,6 +518,45 @@ struct StrLenOpt : public LibCallOptimization {
   }
 };
 
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        FT->getReturnType() != FT->getParamType(0))
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strpbrk(s, "") -> NULL
+    // strpbrk("", s) -> NULL
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2) {
+      size_t I = S1.find_first_of(S2);
+      if (I == std::string::npos) // No match.
+        return Constant::getNullValue(CI->getType());
+
+      return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    }
+
+    // strpbrk(s, "a") -> strchr(s, 'a')
+    if (TD && HasS2 && S2.size() == 1)
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+    return 0;
+  }
+};
+
 //===---------------------------------------===//
 // 'strto*' Optimizations.  This handles strtol, strtod, strtof, strtoul, etc.
 
@@ -501,7 +570,8 @@ struct StrToOpt : public LibCallOptimization {
 
     Value *EndPtr = CI->getArgOperand(1);
     if (isa<ConstantPointerNull>(EndPtr)) {
-      CI->setOnlyReadsMemory();
+      // With a null EndPtr, this function won't capture the main argument.
+      // It would be readonly too, except that it still may write to errno.
       CI->addAttribute(1, Attribute::NoCapture);
     }
 
@@ -509,6 +579,67 @@ struct StrToOpt : public LibCallOptimization {
   }
 };
 
+//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strspn(s, "") -> 0
+    // strspn("", s) -> 0
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strcspn("", s) -> 0
+    if (HasS1 && S1.empty())
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+    // strcspn(s, "") -> strlen(s)
+    if (TD && HasS2 && S2.empty())
+      return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+    return 0;
+  }
+};
+
 //===---------------------------------------===//
 // 'strstr' Optimizations
 
@@ -637,8 +768,8 @@ struct MemCpyOpt : public LibCallOptimization {
       return 0;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
-    EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-               CI->getArgOperand(2), 1, false, B, TD);
+    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                   CI->getArgOperand(2), 1);
     return CI->getArgOperand(0);
   }
 };
@@ -659,8 +790,8 @@ struct MemMoveOpt : public LibCallOptimization {
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
-    EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
-                CI->getArgOperand(2), 1, false, B, TD);
+    B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                    CI->getArgOperand(2), 1);
     return CI->getArgOperand(0);
   }
 };
@@ -681,9 +812,8 @@ struct MemSetOpt : public LibCallOptimization {
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
-    Value *Val = B.CreateIntCast(CI->getArgOperand(1),
-                                 Type::getInt8Ty(*Context), false);
-    EmitMemSet(CI->getArgOperand(0), Val,  CI->getArgOperand(2), false, B, TD);
+    Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+    B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
     return CI->getArgOperand(0);
   }
 };
@@ -765,12 +895,10 @@ struct Exp2Opt : public LibCallOptimization {
     Value *LdExpArg = 0;
     if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
-        LdExpArg = B.CreateSExt(OpC->getOperand(0),
-                                Type::getInt32Ty(*Context), "tmp");
+        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
     } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
-        LdExpArg = B.CreateZExt(OpC->getOperand(0),
-                                Type::getInt32Ty(*Context), "tmp");
+        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
     }
 
     if (LdExpArg) {
@@ -789,7 +917,7 @@ struct Exp2Opt : public LibCallOptimization {
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                              Op->getType(),
-                                             Type::getInt32Ty(*Context),NULL);
+                                             B.getInt32Ty(), NULL);
       CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
         CI->setCallingConv(F->getCallingConv());
@@ -819,7 +947,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
     Value *V = Cast->getOperand(0);
     V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
                              Callee->getAttributes());
-    return B.CreateFPExt(V, Type::getDoubleTy(*Context));
+    return B.CreateFPExt(V, B.getDoubleTy());
   }
 };
 
@@ -846,8 +974,8 @@ struct FFSOpt : public LibCallOptimization {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
       if (CI->getValue() == 0)  // ffs(0) -> 0.
         return Constant::getNullValue(CI->getType());
-      return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
-                              CI->getValue().countTrailingZeros()+1);
+      // ffs(c) -> cttz(c)+1
+      return B.getInt32(CI->getValue().countTrailingZeros() + 1);
     }
 
     // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
@@ -856,11 +984,10 @@ struct FFSOpt : public LibCallOptimization {
                                          Intrinsic::cttz, &ArgType, 1);
     Value *V = B.CreateCall(F, Op, "cttz");
     V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
-    V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+    V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
 
     Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
-    return B.CreateSelect(Cond, V,
-                          ConstantInt::get(Type::getInt32Ty(*Context), 0));
+    return B.CreateSelect(Cond, V, B.getInt32(0));
   }
 };
 
@@ -877,10 +1004,8 @@ struct IsDigitOpt : public LibCallOptimization {
 
     // isdigit(c) -> (c-'0') <u 10
     Value *Op = CI->getArgOperand(0);
-    Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
-                     "isdigittmp");
-    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
-                         "isdigit");
+    Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+    Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
     return B.CreateZExt(Op, CI->getType());
   }
 };
@@ -898,8 +1023,7 @@ struct IsAsciiOpt : public LibCallOptimization {
 
     // isascii(c) -> c <u 128
     Value *Op = CI->getArgOperand(0);
-    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
-                         "isascii");
+    Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
     return B.CreateZExt(Op, CI->getType());
   }
 };
@@ -917,8 +1041,7 @@ struct AbsOpt : public LibCallOptimization {
 
     // abs(x) -> x >s -1 ? x : -x
     Value *Op = CI->getArgOperand(0);
-    Value *Pos = B.CreateICmpSGT(Op,
-                             Constant::getAllOnesValue(Op->getType()),
+    Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
                                  "ispos");
     Value *Neg = B.CreateNeg(Op, "neg");
     return B.CreateSelect(Pos, Op, Neg);
@@ -969,11 +1092,15 @@ struct PrintFOpt : public LibCallOptimization {
       return CI->use_empty() ? (Value*)CI :
                                ConstantInt::get(CI->getType(), 0);
 
-    // printf("x") -> putchar('x'), even for '%'.  Return the result of putchar
-    // in case there is an error writing to stdout.
+    // Do not do any of the following transformations if the printf return value
+    // is used, in general the printf return value is not compatible with either
+    // putchar() or puts().
+    if (!CI->use_empty())
+      return 0;
+
+    // printf("x") -> putchar('x'), even for '%'.
     if (FormatStr.size() == 1) {
-      Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
-                                                FormatStr[0]), B, TD);
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
@@ -1004,8 +1131,7 @@ struct PrintFOpt : public LibCallOptimization {
 
     // printf("%s\n", str) --> puts(str)
     if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
-        CI->getArgOperand(1)->getType()->isPointerTy() &&
-        CI->use_empty()) {
+        CI->getArgOperand(1)->getType()->isPointerTy()) {
       EmitPutS(CI->getArgOperand(1), B, TD);
       return CI;
     }
@@ -1042,9 +1168,9 @@ struct SPrintFOpt : public LibCallOptimization {
       if (!TD) return 0;
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
-      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),   // Copy the
-                 ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
-                 FormatStr.size() + 1), 1, false, B, TD);
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                                      FormatStr.size() + 1), 1);   // nul byte.
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1058,13 +1184,11 @@ struct SPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
       if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *V = B.CreateTrunc(CI->getArgOperand(2),
-                               Type::getInt8Ty(*Context), "char");
+      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
       Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
       B.CreateStore(V, Ptr);
-      Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
-                        "nul");
-      B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+      B.CreateStore(B.getInt8(0), Ptr);
 
       return ConstantInt::get(CI->getType(), 1);
     }
@@ -1080,8 +1204,7 @@ struct SPrintFOpt : public LibCallOptimization {
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
-      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
-                 IncLen, 1, false, B, TD);
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
 
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
@@ -1208,6 +1331,34 @@ struct FPrintFOpt : public LibCallOptimization {
   }
 };
 
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    // Check for a constant string.
+    std::string Str;
+    if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+      return 0;
+
+    if (Str.empty() && CI->use_empty()) {
+      // puts("") -> putchar('\n')
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    return 0;
+  }
+};
+
 } // end anonymous namespace.
 
 //===----------------------------------------------------------------------===//
@@ -1220,10 +1371,10 @@ namespace {
   class SimplifyLibCalls : public FunctionPass {
     StringMap<LibCallOptimization*> Optimizations;
     // String and Memory LibCall Optimizations
-    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
-    StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
-    StrNCpyOpt StrNCpy; StrLenOpt StrLen;
-    StrToOpt StrTo; StrStrOpt StrStr;
+    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+    StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+    StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+    StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
     MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
     // Math Library Optimizations
     PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
@@ -1233,11 +1384,14 @@ namespace {
     // Formatting and IO Optimizations
     SPrintFOpt SPrintF; PrintFOpt PrintF;
     FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+    PutsOpt Puts;
 
     bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
-    SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
+    SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+      initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+    }
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
@@ -1255,7 +1409,7 @@ namespace {
 } // end anonymous namespace.
 
 INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
-                "Simplify well-known library calls", false, false);
+                "Simplify well-known library calls", false, false)
 
 // Public interface to the Simplify LibCalls pass.
 FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1269,11 +1423,13 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["strcat"] = &StrCat;
   Optimizations["strncat"] = &StrNCat;
   Optimizations["strchr"] = &StrChr;
+  Optimizations["strrchr"] = &StrRChr;
   Optimizations["strcmp"] = &StrCmp;
   Optimizations["strncmp"] = &StrNCmp;
   Optimizations["strcpy"] = &StrCpy;
   Optimizations["strncpy"] = &StrNCpy;
   Optimizations["strlen"] = &StrLen;
+  Optimizations["strpbrk"] = &StrPBrk;
   Optimizations["strtol"] = &StrTo;
   Optimizations["strtod"] = &StrTo;
   Optimizations["strtof"] = &StrTo;
@@ -1281,6 +1437,8 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["strtoll"] = &StrTo;
   Optimizations["strtold"] = &StrTo;
   Optimizations["strtoull"] = &StrTo;
+  Optimizations["strspn"] = &StrSpn;
+  Optimizations["strcspn"] = &StrCSpn;
   Optimizations["strstr"] = &StrStr;
   Optimizations["memcmp"] = &MemCmp;
   Optimizations["memcpy"] = &MemCpy;
@@ -1341,6 +1499,7 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["fwrite"] = &FWrite;
   Optimizations["fputs"] = &FPuts;
   Optimizations["fprintf"] = &FPrintF;
+  Optimizations["puts"] = &Puts;
 }
 
 
@@ -2155,9 +2314,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
 //   * pow(pow(x,y),z)-> pow(x,y*z)
 //
-// puts:
-//   * puts("") -> putchar('\n')
-//
 // round, roundf, roundl:
 //   * round(cnst) -> cnst'
 //
@@ -2173,24 +2329,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 // stpcpy:
 //   * stpcpy(str, "literal") ->
 //           llvm.memcpy(str,"literal",strlen("literal")+1,1)
-// strrchr:
-//   * strrchr(s,c) -> reverse_offset_of_in(c,s)
-//      (if c is a constant integer and s is a constant string)
-//   * strrchr(s1,0) -> strchr(s1,0)
-//
-// strpbrk:
-//   * strpbrk(s,a) -> offset_in_for(s,a)
-//      (if s and a are both constant strings)
-//   * strpbrk(s,"") -> 0
-//   * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
-//
-// strspn, strcspn:
-//   * strspn(s,a)   -> const_int (if both args are constant)
-//   * strspn("",a)  -> 0
-//   * strspn(s,"")  -> 0
-//   * strcspn(s,a)  -> const_int (if both args are constant)
-//   * strcspn("",a) -> 0
-//   * strcspn(s,"") -> strlen(a)
 //
 // tan, tanf, tanl:
 //   * tan(atan(x)) -> x
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 95d3dedfb62d..705f44204900 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,9 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    Sinking() : FunctionPass(ID) {}
+    Sinking() : FunctionPass(ID) {
+      initializeSinkingPass(*PassRegistry::getPassRegistry());
+    }
     
     virtual bool runOnFunction(Function &F);
     
@@ -56,7 +58,11 @@ namespace {
 } // end anonymous namespace
   
 char Sinking::ID = 0;
-INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
 
 FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
 
@@ -150,11 +156,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
   if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
     if (L->isVolatile()) return false;
 
-    Value *Ptr = L->getPointerOperand();
-    unsigned Size = AA->getTypeStoreSize(L->getType());
+    AliasAnalysis::Location Loc = AA->getLocation(L);
     for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
          E = Stores.end(); I != E; ++I)
-      if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+      if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
         return false;
   }
 
@@ -163,7 +168,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
     return false;
   }
 
-  return Inst->isSafeToSpeculativelyExecute();
+  if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+    return false;
+
+  return true;
 }
 
 /// SinkInstruction - Determine whether it is safe to sink the specified machine
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 2e437ac778c8..9dd83c04fa61 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -26,14 +26,14 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <map>
 using namespace llvm;
 
@@ -49,7 +49,9 @@ namespace {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    TailDup() : FunctionPass(ID) {}
+    TailDup() : FunctionPass(ID) {
+      initializeTailDupPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +61,7 @@ namespace {
 }
 
 char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
 
 // Public interface to the Tail Duplication pass
 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
@@ -360,8 +362,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
       Instruction *Inst = BI++;
       if (isInstructionTriviallyDead(Inst))
         Inst->eraseFromParent();
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
-        Inst->replaceAllUsesWith(C);
+      else if (Value *V = SimplifyInstruction(Inst)) {
+        Inst->replaceAllUsesWith(V);
         Inst->eraseFromParent();
       }
     }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 371725467a24..5b6bc04cc1c2 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,31 +52,52 @@
 
 #define DEBUG_TYPE "tailcallelim"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped,   "Number of return duplicated");
 STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 
 namespace {
   struct TailCallElim : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    TailCallElim() : FunctionPass(ID) {}
+    TailCallElim() : FunctionPass(ID) {
+      initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
   private:
+    CallInst *FindTRECandidate(Instruction *I,
+                               bool CannotTailCallElimCallsMarkedTail);
+    bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+                                    BasicBlock *&OldEntry,
+                                    bool &TailCallsAreMarkedTail,
+                                    SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                    bool CannotTailCallElimCallsMarkedTail);
+    bool FoldReturnAndProcessPred(BasicBlock *BB,
+                                  ReturnInst *Ret, BasicBlock *&OldEntry,
+                                  bool &TailCallsAreMarkedTail,
+                                  SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                  bool CannotTailCallElimCallsMarkedTail);
     bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
                                bool &TailCallsAreMarkedTail,
                                SmallVector<PHINode*, 8> &ArgumentPHIs,
@@ -88,7 +109,7 @@ namespace {
 
 char TailCallElim::ID = 0;
 INITIALIZE_PASS(TailCallElim, "tailcallelim",
-                "Tail Call Elimination", false, false);
+                "Tail Call Elimination", false, false)
 
 // Public interface to the TailCallElimination pass
 FunctionPass *llvm::createTailCallEliminationPass() {
@@ -133,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
   bool TailCallsAreMarkedTail = false;
   SmallVector<PHINode*, 8> ArgumentPHIs;
   bool MadeChange = false;
-
   bool FunctionContainsEscapingAllocas = false;
 
   // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
@@ -160,10 +180,17 @@ bool TailCallElim::runOnFunction(Function &F) {
     return false;
 
   // Second pass, change any tail calls to loops.
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
-      MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
                                           ArgumentPHIs,CannotTCETailMarkedCall);
+      if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+        Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+                                          TailCallsAreMarkedTail, ArgumentPHIs,
+                                          CannotTCETailMarkedCall);
+      MadeChange |= Change;
+    }
+  }
 
   // If we eliminated any tail recursions, it's possible that we inserted some
   // silly PHI nodes which just merge an initial value (the incoming operand)
@@ -175,7 +202,7 @@ bool TailCallElim::runOnFunction(Function &F) {
       PHINode *PN = ArgumentPHIs[i];
 
       // If the PHI Node is a dynamic constant, replace it with the value it is.
-      if (Value *PNV = PN->hasConstantValue()) {
+      if (Value *PNV = SimplifyInstruction(PN)) {
         PN->replaceAllUsesWith(PNV);
         PN->eraseFromParent();
       }
@@ -322,41 +349,47 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
   return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
 }
 
-bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
-                                         bool &TailCallsAreMarkedTail,
-                                         SmallVector<PHINode*, 8> &ArgumentPHIs,
-                                       bool CannotTailCallElimCallsMarkedTail) {
-  BasicBlock *BB = Ret->getParent();
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+  while (isa<DbgInfoIntrinsic>(I))
+    ++I;
+  return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+                               bool CannotTailCallElimCallsMarkedTail) {
+  BasicBlock *BB = TI->getParent();
   Function *F = BB->getParent();
 
-  if (&BB->front() == Ret) // Make sure there is something before the ret...
-    return false;
+  if (&BB->front() == TI) // Make sure there is something before the terminator.
+    return 0;
   
   // Scan backwards from the return, checking to see if there is a tail call in
   // this block.  If so, set CI to it.
-  CallInst *CI;
-  BasicBlock::iterator BBI = Ret;
-  while (1) {
+  CallInst *CI = 0;
+  BasicBlock::iterator BBI = TI;
+  while (true) {
     CI = dyn_cast<CallInst>(BBI);
     if (CI && CI->getCalledFunction() == F)
       break;
 
     if (BBI == BB->begin())
-      return false;          // Didn't find a potential tail call.
+      return 0;          // Didn't find a potential tail call.
     --BBI;
   }
 
   // If this call is marked as a tail call, and if there are dynamic allocas in
   // the function, we cannot perform this optimization.
   if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
-    return false;
+    return 0;
 
   // As a special case, detect code like this:
   //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
   // and disable this xform in this case, because the code generator will
   // lower the call to fabs into inline code.
   if (BB == &F->getEntryBlock() && 
-      &BB->front() == CI && &*++BB->begin() == Ret &&
+      FirstNonDbg(BB->front()) == CI &&
+      FirstNonDbg(llvm::next(BB->begin())) == TI &&
       callIsSmall(F)) {
     // A single-block function with just a call and a return. Check that
     // the arguments match.
@@ -367,9 +400,17 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
     for (; I != E && FI != FE; ++I, ++FI)
       if (*I != &*FI) break;
     if (I == E && FI == FE)
-      return false;
+      return 0;
   }
 
+  return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+                                       BasicBlock *&OldEntry,
+                                       bool &TailCallsAreMarkedTail,
+                                       SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
   // If we are introducing accumulator recursion to eliminate operations after
   // the call instruction that are both associative and commutative, the initial
   // value for the accumulator is placed in this variable.  If this value is set
@@ -387,7 +428,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   // tail call if all of the instructions between the call and the return are
   // movable to above the call itself, leaving the call next to the return.
   // Check that this is the case now.
-  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+  BasicBlock::iterator BBI = CI;
+  for (++BBI; &*BBI != Ret; ++BBI) {
     if (CanMoveAboveCall(BBI, CI)) continue;
     
     // If we can't move the instruction above the call, it might be because it
@@ -424,6 +466,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
       return false;
   }
 
+  BasicBlock *BB = Ret->getParent();
+  Function *F = BB->getParent();
+
   // OK! We can transform this tail call.  If this is the first one found,
   // create the new entry block, allowing us to branch back to the old entry.
   if (OldEntry == 0) {
@@ -533,3 +578,53 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
   ++NumEliminated;
   return true;
 }
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+                                       ReturnInst *Ret, BasicBlock *&OldEntry,
+                                       bool &TailCallsAreMarkedTail,
+                                       SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  bool Change = false;
+
+  // If the return block contains nothing but the return and PHI's,
+  // there might be an opportunity to duplicate the return in its
+  // predecessors and perform TRC there. Look for predecessors that end
+  // in unconditional branch and recursive call(s).
+  SmallVector<BranchInst*, 8> UncondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    TerminatorInst *PTI = Pred->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(BI);
+  }
+
+  while (!UncondBranchPreds.empty()) {
+    BranchInst *BI = UncondBranchPreds.pop_back_val();
+    BasicBlock *Pred = BI->getParent();
+    if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+                                 OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+                                 CannotTailCallElimCallsMarkedTail);
+      ++NumRetDuped;
+      Change = true;
+    }
+  }
+
+  return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+                                         bool &TailCallsAreMarkedTail,
+                                         SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+  if (!CI)
+    return false;
+
+  return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+                                    ArgumentPHIs,
+                                    CannotTailCallElimCallsMarkedTail);
+}
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 4d64c8578efe..be7bed1cecdf 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -379,27 +380,10 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
 /// return false.
 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
                                     const TargetLowering &TLI) {
-  std::vector<InlineAsm::ConstraintInfo>
-  Constraints = IA->ParseConstraints();
-
-  unsigned ArgNo = 0;   // The argument of the CallInst.
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
-    // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-      case InlineAsm::isOutput:
-        if (OpInfo.isIndirect)
-          OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
-        break;
-      case InlineAsm::isInput:
-        OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
-        break;
-      case InlineAsm::isClobber:
-        // Nothing to do.
-        break;
-    }
-
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
     // Compute the constraint code and ConstraintType to use.
     TLI.ComputeConstraintToUse(OpInfo, SDValue());
 
@@ -584,7 +568,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
                                   MemoryInst, Result);
     Matcher.IgnoreProfitability = true;
     bool Success = Matcher.MatchAddr(Address, 0);
-    Success = Success; assert(Success && "Couldn't select *anything*?");
+    (void)Success; assert(Success && "Couldn't select *anything*?");
 
     // If the match didn't cover I, then it won't be shared by it.
     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 093083a630cf..acaea195e710 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -19,8 +19,9 @@
 #include "llvm/Constant.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Scalar.h"
@@ -63,12 +64,27 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 /// any single-entry PHI nodes in it, fold them away.  This handles the case
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+  if (!isa<PHINode>(BB->begin())) return;
+  
+  AliasAnalysis *AA = 0;
+  MemoryDependenceAnalysis *MemDep = 0;
+  if (P) {
+    AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+    MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+  }
+  
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     if (PN->getIncomingValue(0) != PN)
       PN->replaceAllUsesWith(PN->getIncomingValue(0));
     else
       PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    
+    if (MemDep)
+      MemDep->removeInstruction(PN);  // Memdep updates AA itself.
+    else if (AA && isa<PointerType>(PN->getType()))
+      AA->deleteValue(PN);
+    
     PN->eraseFromParent();
   }
 }
@@ -110,7 +126,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   if (isa<InvokeInst>(PredBB->getTerminator())) return false;
   
   succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
-  BasicBlock* OnlySucc = BB;
+  BasicBlock *OnlySucc = BB;
   for (; SI != SE; ++SI)
     if (*SI != OnlySucc) {
       OnlySucc = 0;     // There are multiple distinct successors!
@@ -131,10 +147,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   }
 
   // Begin by getting rid of unneeded PHIs.
-  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-    PN->replaceAllUsesWith(PN->getIncomingValue(0));
-    BB->getInstList().pop_front();  // Delete the phi node...
-  }
+  if (isa<PHINode>(BB->front()))
+    FoldSingleEntryPHINodes(BB, P);
   
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
@@ -152,24 +166,27 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   
   // Finally, erase the old block and update dominator info.
   if (P) {
-    if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) {
-      DomTreeNode* DTN = DT->getNode(BB);
-      DomTreeNode* PredDTN = DT->getNode(PredBB);
-  
-      if (DTN) {
-        SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
-        for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(),
+    if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+      if (DomTreeNode *DTN = DT->getNode(BB)) {
+        DomTreeNode *PredDTN = DT->getNode(PredBB);
+        SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+        for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
              DE = Children.end(); DI != DE; ++DI)
           DT->changeImmediateDominator(*DI, PredDTN);
 
         DT->eraseNode(BB);
       }
+      
+      if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+        LI->removeBlock(BB);
+      
+      if (MemoryDependenceAnalysis *MD =
+            P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+        MD->invalidateCachedPredecessors();
     }
   }
   
   BB->eraseFromParent();
-  
-  
   return true;
 }
 
@@ -218,52 +235,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
   ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
 }
 
-/// RemoveSuccessor - Change the specified terminator instruction such that its
-/// successor SuccNum no longer exists.  Because this reduces the outgoing
-/// degree of the current basic block, the actual terminator instruction itself
-/// may have to be changed.  In the case where the last successor of the block 
-/// is deleted, a return instruction is inserted in its place which can cause a
-/// surprising change in program behavior if it is not expected.
-///
-void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
-  assert(SuccNum < TI->getNumSuccessors() &&
-         "Trying to remove a nonexistant successor!");
-
-  // If our old successor block contains any PHI nodes, remove the entry in the
-  // PHI nodes that comes from this branch...
-  //
-  BasicBlock *BB = TI->getParent();
-  TI->getSuccessor(SuccNum)->removePredecessor(BB);
-
-  TerminatorInst *NewTI = 0;
-  switch (TI->getOpcode()) {
-  case Instruction::Br:
-    // If this is a conditional branch... convert to unconditional branch.
-    if (TI->getNumSuccessors() == 2) {
-      cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum));
-    } else {                    // Otherwise convert to a return instruction...
-      Value *RetVal = 0;
-
-      // Create a value to return... if the function doesn't return null...
-      if (!BB->getParent()->getReturnType()->isVoidTy())
-        RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
-
-      // Create the return...
-      NewTI = ReturnInst::Create(TI->getContext(), RetVal);
-    }
-    break;
-
-  case Instruction::Invoke:    // Should convert to call
-  case Instruction::Switch:    // Should remove entry
-  default:
-  case Instruction::Ret:       // Cannot happen, has no successors!
-    llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
-  }
-
-  if (NewTI)   // If it's a different instruction, replace.
-    ReplaceInstWithInst(TI, NewTI);
-}
-
 /// GetSuccessorNumber - Search for the specified successor of basic block BB
 /// and return its position in the terminator instruction's list of
 /// successors.  It is an error to call this with a block that is not a
@@ -300,13 +271,13 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
     assert(SP == BB && "CFG broken");
     SP = NULL;
     return SplitBlock(Succ, Succ->begin(), P);
-  } else {
-    // Otherwise, if BB has a single successor, split it at the bottom of the
-    // block.
-    assert(BB->getTerminator()->getNumSuccessors() == 1 &&
-           "Should have a single succ!"); 
-    return SplitBlock(BB, BB->getTerminator(), P);
   }
+  
+  // Otherwise, if BB has a single successor, split it at the bottom of the
+  // block.
+  assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+         "Should have a single succ!"); 
+  return SplitBlock(BB, BB->getTerminator(), P);
 }
 
 /// SplitBlock - Split the specified block at the specified instruction - every
@@ -322,12 +293,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 
   // The new block lives in whichever loop the old one did. This preserves
   // LCSSA as well, because we force the split point to be after any PHI nodes.
-  if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
+  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
     if (Loop *L = LI->getLoopFor(Old))
       L->addBasicBlockToLoop(New, LI->getBase());
 
   if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
-    // Old dominates New. New node domiantes all other nodes dominated by Old.
+    // Old dominates New. New node dominates all other nodes dominated by Old.
     DomTreeNode *OldNode = DT->getNode(Old);
     std::vector<DomTreeNode *> Children;
     for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
@@ -340,9 +311,6 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
         DT->changeImmediateDominator(*I, NewNode);
   }
 
-  if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>())
-    DF->splitBlock(Old);
-    
   return New;
 }
 
@@ -354,10 +322,9 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
 /// suffix of 'Suffix'.
 ///
 /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
-/// In particular, it does not preserve LoopSimplify (because it's
-/// complicated to handle the case where one of the edges being split
-/// is an exit of a loop with other exits).
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
 ///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
                                          BasicBlock *const *Preds,
@@ -407,13 +374,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     }
   }
 
-  // Update dominator tree and dominator frontier if available.
+  // Update dominator tree if available.
   DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
   if (DT)
     DT->splitBlock(NewBB);
-  if (DominanceFrontier *DF =
-        P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
-    DF->splitBlock(NewBB);
 
   // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
   // node becomes an incoming value for BB's phi node.  However, if the Preds
@@ -545,7 +509,32 @@ void llvm::FindFunctionBackedges(const Function &F,
       // Go up one level.
       InStack.erase(VisitStack.pop_back_val().first);
     }
-  } while (!VisitStack.empty());
-  
-  
+  } while (!VisitStack.empty()); 
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+                                             BasicBlock *Pred) {
+  Instruction *UncondBranch = Pred->getTerminator();
+  // Clone the return and add it to the end of the predecessor.
+  Instruction *NewRet = RI->clone();
+  Pred->getInstList().push_back(NewRet);
+      
+  // If the return instruction returns a value, and if the value was a
+  // PHI node in "BB", propagate the right value into the return.
+  for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+       i != e; ++i)
+    if (PHINode *PN = dyn_cast<PHINode>(*i))
+      if (PN->getParent() == BB)
+        *i = PN->getIncomingValueForBlock(Pred);
+      
+  // Update any PHI nodes in the returning block to realize that we no
+  // longer branch to them.
+  BB->removePredecessor(Pred);
+  UncondBranch->eraseFromParent();
+  return cast<ReturnInst>(NewRet);
 }
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index f75ffe6105fa..616b066b5ab1 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -11,8 +11,7 @@
 // inserting a dummy basic block.  This pass may be "required" by passes that
 // cannot deal with critical edges.  For this usage, the structure type is
 // forward declared.  This pass obviously invalidates the CFG, but can update
-// forward dominator (set, immediate dominators, tree, and frontier)
-// information.
+// dominator trees.
 //
 //===----------------------------------------------------------------------===//
 
@@ -36,13 +35,14 @@ STATISTIC(NumBroken, "Number of blocks inserted");
 namespace {
   struct BreakCriticalEdges : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    BreakCriticalEdges() : FunctionPass(ID) {}
+    BreakCriticalEdges() : FunctionPass(ID) {
+      initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnFunction(Function &F);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
       AU.addPreserved<LoopInfo>();
       AU.addPreserved<ProfileInfo>();
 
@@ -54,7 +54,7 @@ namespace {
 
 char BreakCriticalEdges::ID = 0;
 INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
-                "Break critical edges in CFG", false, false);
+                "Break critical edges in CFG", false, false)
 
 // Publically exposed interface to pass...
 char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
@@ -150,10 +150,9 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
 }
 
 /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
-/// split the critical edge.  This will update DominatorTree and
-/// DominatorFrontier information if it is available, thus calling this pass
-/// will not invalidate either of them. This returns the new block if the edge
-/// was split, null otherwise.
+/// split the critical edge.  This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
 ///
 /// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
 /// specified successor will be merged into the same critical edge block.  
@@ -255,12 +254,11 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   if (P == 0) return NewBB;
   
   DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
-  DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
   LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
   ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
   
   // If we have nothing to update, just return.
-  if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+  if (DT == 0 && LI == 0 && PI == 0)
     return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
@@ -281,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
          I != E; ++I) {
       BasicBlock *P = *I;
       if (P != NewBB)
-          OtherPreds.push_back(P);
+        OtherPreds.push_back(P);
     }
   }
 
@@ -318,40 +316,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
     }
   }
 
-  // Should we update DominanceFrontier information?
-  if (DF) {
-    // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
-    if (!OtherPreds.empty()) {
-      // FIXME: IMPLEMENT THIS!
-      llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
-                       " not implemented yet!");
-    }
-    
-    // Since the new block is dominated by its only predecessor TIBB,
-    // it cannot be in any block's dominance frontier.  If NewBB dominates
-    // DestBB, its dominance frontier is the same as DestBB's, otherwise it is
-    // just {DestBB}.
-    DominanceFrontier::DomSetType NewDFSet;
-    if (NewBBDominatesDestBB) {
-      DominanceFrontier::iterator I = DF->find(DestBB);
-      if (I != DF->end()) {
-        DF->addBasicBlock(NewBB, I->second);
-        
-        if (I->second.count(DestBB)) {
-          // However NewBB's frontier does not include DestBB.
-          DominanceFrontier::iterator NF = DF->find(NewBB);
-          DF->removeFromFrontier(NF, DestBB);
-        }
-      }
-      else
-        DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType());
-    } else {
-      DominanceFrontier::DomSetType NewDFSet;
-      NewDFSet.insert(DestBB);
-      DF->addBasicBlock(NewBB, NewDFSet);
-    }
-  }
-  
   // Update LoopInfo if it is around.
   if (LI) {
     if (Loop *TIL = LI->getLoopFor(TIBB)) {
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index c3139498c250..4a90751936b5 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -131,21 +131,6 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
   return CI;
 }
 
-
-/// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
-/// expects that Len has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
-                        bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
-  Module *M = B.GetInsertBlock()->getParent()->getParent();
-  Dst = CastToCStr(Dst, B);
-  Src = CastToCStr(Src, B);
-  const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
-  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
-  return B.CreateCall5(MemCpy, Dst, Src, Len,
-                       ConstantInt::get(B.getInt32Ty(), Align),
-                       ConstantInt::get(B.getInt1Ty(), isVolatile));
-}
-
 /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
 /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
 /// are pointers.
@@ -170,22 +155,6 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
   return CI;
 }
 
-/// EmitMemMove - Emit a call to the memmove function to the builder.  This
-/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
-                         bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
-  Module *M = B.GetInsertBlock()->getParent()->getParent();
-  LLVMContext &Context = B.GetInsertBlock()->getContext();
-  const Type *ArgTys[3] = { Dst->getType(), Src->getType(),
-                            TD->getIntPtrType(Context) };
-  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3);
-  Dst = CastToCStr(Dst, B);
-  Src = CastToCStr(Src, B);
-  Value *A = ConstantInt::get(B.getInt32Ty(), Align);
-  Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
-  return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol);
-}
-
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
 /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
 Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
@@ -233,18 +202,6 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
   return CI;
 }
 
-/// EmitMemSet - Emit a call to the memset function
-Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
-                        IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[2] = { Dst->getType(), Len->getType() };
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2);
- Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
- Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
- return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol);
-}
-
 /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
 /// 'floor').  This function is known to take a single of type matching 'Op' and
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
@@ -422,8 +379,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
       return false;
 
     if (isFoldable(3, 2, false)) {
-      EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                 CI->getArgOperand(2), 1, false, B, TD);
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     CI->getArgOperand(2), 1);
       replaceCall(CI->getArgOperand(0));
       return true;
     }
@@ -445,8 +402,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
       return false;
 
     if (isFoldable(3, 2, false)) {
-      EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
-                  CI->getArgOperand(2), 1, false, B, TD);
+      B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                      CI->getArgOperand(2), 1);
       replaceCall(CI->getArgOperand(0));
       return true;
     }
@@ -465,8 +422,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
     if (isFoldable(3, 2, false)) {
       Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
                                    false);
-      EmitMemSet(CI->getArgOperand(0), Val,  CI->getArgOperand(2),
-                 false, B, TD);
+      B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
       replaceCall(CI->getArgOperand(0));
       return true;
     }
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 61cbeb2bd35b..5b76bb26e404 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -21,8 +21,9 @@ add_llvm_library(LLVMTransformUtils
   PromoteMemoryToRegister.cpp
   SSAUpdater.cpp
   SimplifyCFG.cpp
+  SimplifyInstructions.cpp
   UnifyFunctionExitNodes.cpp
+  Utils.cpp
   ValueMapper.cpp
   )
 
-target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index f43186edae43..d967ceb96856 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -112,8 +112,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     const BasicBlock &BB = *BI;
 
     // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
-                                      CodeInfo);
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
     VMap[&BB] = CBB;                       // Add basic block mapping.
 
     if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
@@ -122,12 +121,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 
   // Loop over all of the instructions in the function, fixing up operand
   // references as we go.  This uses VMap to do all the hard work.
-  //
   for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
          BE = NewFunc->end(); BB != BE; ++BB)
     // Loop over all instructions, fixing each one as we find it...
     for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
-      RemapInstruction(II, VMap, ModuleLevelChanges);
+      RemapInstruction(II, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
 }
 
 /// CloneFunction - Return a copy of the specified function, but without
@@ -138,8 +137,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 /// updated to include mappings from all of the instructions and basicblocks in
 /// the function from their old to new values.
 ///
-Function *llvm::CloneFunction(const Function *F,
-                              ValueToValueMapTy &VMap,
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
                               bool ModuleLevelChanges,
                               ClonedCodeInfo *CodeInfo) {
   std::vector<const Type*> ArgTypes;
@@ -216,7 +214,7 @@ namespace {
 /// anything that it can reach.
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                        std::vector<const BasicBlock*> &ToClone){
-  Value *&BBEntry = VMap[BB];
+  TrackingVH<Value> &BBEntry = VMap[BB];
 
   // Have we already cloned this block?
   if (BBEntry) return;
@@ -262,8 +260,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       // If the condition was a known constant in the callee...
       ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
       // Or is a known constant in the caller...
-      if (Cond == 0)  
-        Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
+      if (Cond == 0) {
+        Value *V = VMap[BI->getCondition()];
+        Cond = dyn_cast_or_null<ConstantInt>(V);
+      }
 
       // Constant fold to uncond branch!
       if (Cond) {
@@ -276,8 +276,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
     // If switching on a value known constant in the caller.
     ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
-    if (Cond == 0)  // Or known constant after constant prop in the callee...
-      Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
+    if (Cond == 0) { // Or known constant after constant prop in the callee...
+      Value *V = VMap[SI->getCondition()];
+      Cond = dyn_cast_or_null<ConstantInt>(V);
+    }
     if (Cond) {     // Constant fold to uncond branch!
       BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
       VMap[OldTI] = BranchInst::Create(Dest, NewBB);
@@ -318,7 +320,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
   SmallVector<Constant*, 8> Ops;
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                   VMap, ModuleLevelChanges)))
+                                                           VMap,
+                  ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
       Ops.push_back(Op);
     else
       return 0;  // All operands not constant!
@@ -394,7 +397,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
   SmallVector<const PHINode*, 16> PHIToResolve;
   for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
        BI != BE; ++BI) {
-    BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
+    Value *V = VMap[BI];
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
     if (NewBB == 0) continue;  // Dead block.
 
     // Add the new block to the new function.
@@ -455,7 +459,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
           I->setDebugLoc(DebugLoc());
         }
       }
-      RemapInstruction(I, VMap, ModuleLevelChanges);
+      RemapInstruction(I, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
     }
   }
   
@@ -474,10 +479,11 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
       OPN = PHIToResolve[phino];
       PHINode *PN = cast<PHINode>(VMap[OPN]);
       for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
-        if (BasicBlock *MappedBlock = 
-            cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
+        Value *V = VMap[PN->getIncomingBlock(pred)];
+        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
           Value *InVal = MapValue(PN->getIncomingValue(pred),
-                                  VMap, ModuleLevelChanges);
+                                  VMap, 
+                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
           PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp
index 551b63039a0e..87dd14153a19 100644
--- a/lib/Transforms/Utils/CloneLoop.cpp
+++ b/lib/Transforms/Utils/CloneLoop.cpp
@@ -19,15 +19,14 @@
 
 using namespace llvm;
 
-/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
-/// dominance info. It is expected that basic block is already cloned.
+/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
+/// that the basic block is already cloned.
 static void CloneDominatorInfo(BasicBlock *BB, 
-                               ValueMap<const Value *, Value *> &VMap,
-                               DominatorTree *DT,
-                               DominanceFrontier *DF) {
+                               ValueToValueMapTy &VMap,
+                               DominatorTree *DT) {
 
   assert (DT && "DominatorTree is not available");
-  ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+  ValueToValueMapTy::iterator BI = VMap.find(BB);
   assert (BI != VMap.end() && "BasicBlock clone is missing");
   BasicBlock *NewBB = cast<BasicBlock>(BI->second);
 
@@ -42,45 +41,23 @@ static void CloneDominatorInfo(BasicBlock *BB,
 
   // NewBB's dominator is either BB's dominator or BB's dominator's clone.
   BasicBlock *NewBBDom = BBDom;
-  ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+  ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
   if (BBDomI != VMap.end()) {
     NewBBDom = cast<BasicBlock>(BBDomI->second);
     if (!DT->getNode(NewBBDom))
-      CloneDominatorInfo(BBDom, VMap, DT, DF);
+      CloneDominatorInfo(BBDom, VMap, DT);
   }
   DT->addNewBlock(NewBB, NewBBDom);
-
-  // Copy cloned dominance frontiner set
-  if (DF) {
-    DominanceFrontier::DomSetType NewDFSet;
-    DominanceFrontier::iterator DFI = DF->find(BB);
-    if ( DFI != DF->end()) {
-      DominanceFrontier::DomSetType S = DFI->second;
-        for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
-             I != E; ++I) {
-          BasicBlock *DB = *I;
-          ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
-          if (IDM != VMap.end())
-            NewDFSet.insert(cast<BasicBlock>(IDM->second));
-          else
-            NewDFSet.insert(DB);
-        }
-    }
-    DF->addBasicBlock(NewBB, NewDFSet);
-  }
 }
 
 /// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
 /// using old blocks to new blocks mapping.
 Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
-                      ValueMap<const Value *, Value *> &VMap, Pass *P) {
+                      ValueToValueMapTy &VMap, Pass *P) {
   
   DominatorTree *DT = NULL;
-  DominanceFrontier *DF = NULL;
-  if (P) {
+  if (P)
     DT = P->getAnalysisIfAvailable<DominatorTree>();
-    DF = P->getAnalysisIfAvailable<DominanceFrontier>();
-  }
 
   SmallVector<BasicBlock *, 16> NewBlocks;
 
@@ -116,7 +93,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
       for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
            I != E; ++I) {
         BasicBlock *BB = *I;
-        CloneDominatorInfo(BB, VMap, DT, DF);
+        CloneDominatorInfo(BB, VMap, DT);
       }
 
     // Process sub loops
@@ -134,7 +111,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
       for (unsigned index = 0, num_ops = Insn->getNumOperands(); 
            index != num_ops; ++index) {
         Value *Op = Insn->getOperand(index);
-        ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+        ValueToValueMapTy::iterator OpItr = VMap.find(Op);
         if (OpItr != VMap.end())
           Insn->setOperand(index, OpItr->second);
       }
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index b347bf597f8e..1046c38ec01d 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -89,8 +89,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
     if (I->hasInitializer())
       GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 VMap,
-                                                 true)));
+                                                 VMap, RF_None)));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -121,7 +120,7 @@ Module *llvm::CloneModule(const Module *M,
     GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     GA->setLinkage(I->getLinkage());
     if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
+      GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
   }
 
   // And named metadata....
@@ -130,7 +129,8 @@ Module *llvm::CloneModule(const Module *M,
     const NamedMDNode &NMD = *I;
     NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
     for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
-      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
+      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
+                                               RF_None)));
   }
 
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index b51f751e1317..e6337722c8bd 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -186,8 +186,8 @@ void CodeExtractor::splitReturnBlocks() {
     if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
       BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
       if (DT) {
-        // Old dominates New. New node domiantes all other nodes dominated
-        //by Old.
+        // Old dominates New. New node dominates all other nodes dominated
+        // by Old.
         DomTreeNode *OldNode = DT->getNode(*I);
         SmallVector<DomTreeNode*, 8> Children;
         for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 8e82a02caa69..8cc26492c292 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -129,7 +129,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
       assert(II->getParent() != P->getIncomingBlock(i) &&
-             "Invoke edge not supported yet"); II=II;
+             "Invoke edge not supported yet"); (void)II;
     }
     new StoreInst(P->getIncomingValue(i), Slot,
                   P->getIncomingBlock(i)->getTerminator());
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 88979e862df2..c1faf2411331 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -22,7 +22,9 @@
 #include "llvm/Attributes.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CallSite.h"
@@ -170,7 +172,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 /// some edges of the callgraph may remain.
 static void UpdateCallGraphAfterInlining(CallSite CS,
                                          Function::iterator FirstNewBlock,
-                                         ValueMap<const Value*, Value*> &VMap,
+                                         ValueToValueMapTy &VMap,
                                          InlineFunctionInfo &IFI) {
   CallGraph &CG = *IFI.CG;
   const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -193,7 +195,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   for (; I != E; ++I) {
     const Value *OrigCall = I->first;
 
-    ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
+    ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
     // Only copy the edge if the call was inlined!
     if (VMI == VMap.end() || VMI->second == 0)
       continue;
@@ -228,6 +230,90 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   CallerNode->removeCallEdgeFor(CS);
 }
 
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+                                  const Function *CalledFunc,
+                                  InlineFunctionInfo &IFI,
+                                  unsigned ByValAlignment) {
+  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+  // If the called function is readonly, then it could not mutate the caller's
+  // copy of the byval'd memory.  In this case, it is safe to elide the copy and
+  // temporary.
+  if (CalledFunc->onlyReadsMemory()) {
+    // If the byval argument has a specified alignment that is greater than the
+    // passed in pointer, then we either have to round up the input pointer or
+    // give up on this transformation.
+    if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
+      return Arg;
+
+    // If the pointer is already known to be sufficiently aligned, or if we can
+    // round it up to a larger alignment, then we don't need a temporary.
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+                                   IFI.TD) >= ByValAlignment)
+      return Arg;
+    
+    // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
+    // for code quality, but rarely happens and is required for correctness.
+  }
+  
+  LLVMContext &Context = Arg->getContext();
+
+  const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  
+  // Create the alloca.  If we have TargetData, use nice alignment.
+  unsigned Align = 1;
+  if (IFI.TD)
+    Align = IFI.TD->getPrefTypeAlignment(AggTy);
+  
+  // If the byval had an alignment specified, we *must* use at least that
+  // alignment, as it is required by the byval argument (and uses of the
+  // pointer inside the callee).
+  Align = std::max(Align, ByValAlignment);
+  
+  Function *Caller = TheCall->getParent()->getParent(); 
+  
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+                                    &*Caller->begin()->begin());
+  // Emit a memcpy.
+  const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+                                                 Intrinsic::memcpy, 
+                                                 Tys, 3);
+  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  
+  Value *Size;
+  if (IFI.TD == 0)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.TD->getTypeStoreSize(AggTy));
+  
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DestCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  CallInst *TheMemCpy =
+    CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+  
+  // If we have a call graph, update it.
+  if (CallGraph *CG = IFI.CG) {
+    CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+    CallGraphNode *CallerNode = (*CG)[Caller];
+    CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+  }
+  
+  // Uses of the argument in the function should use our new alloca
+  // instead.
+  return NewAlloca;
+}
+
 // InlineFunction - This function inlines the called function into the basic
 // block of the caller.  This returns false if it is not possible to inline this
 // call.  The program is still in a well defined state if this occurs though.
@@ -251,7 +337,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       CalledFunc->isDeclaration() || // call, or call to a vararg function!
       CalledFunc->getFunctionType()->isVarArg()) return false;
 
-
   // If the call to the callee is not a tail call, we must clear the 'tail'
   // flags on any calls that we inline.
   bool MustClearTailCallFlags =
@@ -287,7 +372,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   Function::iterator FirstNewBlock;
 
   { // Scope to destroy VMap after cloning.
-    ValueMap<const Value*, Value*> VMap;
+    ValueToValueMapTy VMap;
 
     assert(CalledFunc->arg_size() == CS.arg_size() &&
            "No varargs calls can be inlined!");
@@ -304,58 +389,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       // by them explicit.  However, we don't do this if the callee is readonly
       // or readnone, because the copy would be unneeded: the callee doesn't
       // modify the struct.
-      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
-          !CalledFunc->onlyReadsMemory()) {
-        const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
-        const Type *VoidPtrTy = 
-            Type::getInt8PtrTy(Context);
-
-        // Create the alloca.  If we have TargetData, use nice alignment.
-        unsigned Align = 1;
-        if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
-        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, 
-                                          I->getName(), 
-                                          &*Caller->begin()->begin());
-        // Emit a memcpy.
-        const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
-        Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
-                                                       Intrinsic::memcpy, 
-                                                       Tys, 3);
-        Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-        Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
-
-        Value *Size;
-        if (IFI.TD == 0)
-          Size = ConstantExpr::getSizeOf(AggTy);
-        else
-          Size = ConstantInt::get(Type::getInt64Ty(Context),
-                                  IFI.TD->getTypeStoreSize(AggTy));
-
-        // Always generate a memcpy of alignment 1 here because we don't know
-        // the alignment of the src pointer.  Other optimizations can infer
-        // better alignment.
-        Value *CallArgs[] = {
-          DestCast, SrcCast, Size,
-          ConstantInt::get(Type::getInt32Ty(Context), 1),
-          ConstantInt::get(Type::getInt1Ty(Context), 0)
-        };
-        CallInst *TheMemCpy =
-          CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
-
-        // If we have a call graph, update it.
-        if (CallGraph *CG = IFI.CG) {
-          CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
-          CallGraphNode *CallerNode = (*CG)[Caller];
-          CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
-        }
-
-        // Uses of the argument in the function should use our new alloca
-        // instead.
-        ActualArg = NewAlloca;
-
+      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+                                        CalledFunc->getParamAlignment(ArgNo+1));
+ 
         // Calls that we inline may use the new alloca, so we need to clear
-        // their 'tail' flags.
-        MustClearTailCallFlags = true;
+        // their 'tail' flags if HandleByValArgument introduced a new alloca and
+        // the callee has calls.
+        MustClearTailCallFlags |= ActualArg != *AI;
       }
 
       VMap[I] = ActualArg;
@@ -399,8 +440,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       if (!isa<Constant>(AI->getArraySize()))
         continue;
       
-      // Keep track of the static allocas that we inline into the caller if the
-      // StaticAllocas pointer is non-null.
+      // Keep track of the static allocas that we inline into the caller.
       IFI.StaticAllocas.push_back(AI);
       
       // Scan for the block of allocas that we can move over, and move them
@@ -579,10 +619,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   // any users of the original call/invoke instruction.
   const Type *RTy = CalledFunc->getReturnType();
 
+  PHINode *PHI = 0;
   if (Returns.size() > 1) {
     // The PHI node should go at the front of the new basic block to merge all
     // possible incoming values.
-    PHINode *PHI = 0;
     if (!TheCall->use_empty()) {
       PHI = PHINode::Create(RTy, TheCall->getName(),
                             AfterCallBB->begin());
@@ -600,14 +640,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
                "Ret value not consistent in function!");
         PHI->addIncoming(RI->getReturnValue(), RI->getParent());
       }
-    
-      // Now that we inserted the PHI, check to see if it has a single value
-      // (e.g. all the entries are the same or undef).  If so, remove the PHI so
-      // it doesn't block other optimizations.
-      if (Value *V = PHI->hasConstantValue()) {
-        PHI->replaceAllUsesWith(V);
-        PHI->eraseFromParent();
-      }
     }
 
 
@@ -664,5 +696,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   // Now we can remove the CalleeEntry block, which is now empty.
   Caller->getBasicBlockList().erase(CalleeEntry);
 
+  // If we inserted a phi node, check to see if it has a single value (e.g. all
+  // the entries are the same or undef).  If so, remove the PHI so it doesn't
+  // block other optimizations.
+  if (PHI)
+    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+      PHI->replaceAllUsesWith(V);
+      PHI->eraseFromParent();
+    }
+
   return true;
 }
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 5ca82996b42f..45c15de9437f 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -23,7 +23,9 @@ using namespace llvm;
 namespace {
   struct InstNamer : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    InstNamer() : FunctionPass(ID) {}
+    InstNamer() : FunctionPass(ID) {
+      initializeInstNamerPass(*PassRegistry::getPassRegistry());
+    }
     
     void getAnalysisUsage(AnalysisUsage &Info) const {
       Info.setPreservesAll();
@@ -48,11 +50,10 @@ namespace {
   };
   
   char InstNamer::ID = 0;
-  INITIALIZE_PASS(InstNamer, "instnamer", 
-                  "Assign names to anonymous instructions", false, false);
 }
 
-
+INITIALIZE_PASS(InstNamer, "instnamer", 
+                "Assign names to anonymous instructions", false, false)
 char &llvm::InstructionNamerID = InstNamer::ID;
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 275b26508f99..b2e5fa6d7e3a 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -47,7 +47,9 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables");
 namespace {
   struct LCSSA : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LCSSA() : LoopPass(ID) {}
+    LCSSA() : LoopPass(ID) {
+      initializeLCSSAPass(*PassRegistry::getPassRegistry());
+    }
 
     // Cached analysis information for the current function.
     DominatorTree *DT;
@@ -65,10 +67,7 @@ namespace {
       AU.setPreservesCFG();
 
       AU.addRequired<DominatorTree>();
-      AU.addPreserved<DominatorTree>();
-      AU.addPreserved<DominanceFrontier>();
       AU.addRequired<LoopInfo>();
-      AU.addPreserved<LoopInfo>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreserved<ScalarEvolution>();
     }
@@ -90,7 +89,10 @@ namespace {
 }
   
 char LCSSA::ID = 0;
-INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false);
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
 
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
 char &llvm::LCSSAID = LCSSA::ID;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 52f0499f39b0..063c76e9522c 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -22,9 +22,11 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
@@ -66,9 +68,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
       assert(BI->getParent() && "Terminator not inserted in block!");
       OldDest->removePredecessor(BI->getParent());
 
-      // Set the unconditional destination, and change the insn to be an
-      // unconditional branch.
-      BI->setUnconditionalDest(Destination);
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Destination, BI);
+      BI->eraseFromParent();
       return true;
     }
     
@@ -81,8 +83,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
       assert(BI->getParent() && "Terminator not inserted in block!");
       Dest1->removePredecessor(BI->getParent());
 
-      // Change a conditional branch to unconditional.
-      BI->setUnconditionalDest(Dest1);
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Dest1, BI);
+      BI->eraseFromParent();
       return true;
     }
     return false;
@@ -209,9 +212,6 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
   // We don't want debug info removed by anything this general.
   if (isa<DbgInfoIntrinsic>(I)) return false;
 
-  // Likewise for memory use markers.
-  if (isa<MemoryUseIntrinsic>(I)) return false;
-
   if (!I->mayHaveSideEffects()) return true;
 
   // Special case intrinsics that "may have side effects" but can be deleted
@@ -260,29 +260,45 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
   return true;
 }
 
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are multiple uses that all refer to the same value.
+static bool areAllUsesEqual(Instruction *I) {
+  Value::use_iterator UI = I->use_begin();
+  Value::use_iterator UE = I->use_end();
+  if (UI == UE)
+    return false;
+
+  User *TheUse = *UI;
+  for (++UI; UI != UE; ++UI) {
+    if (*UI != TheUse)
+      return false;
+  }
+  return true;
+}
+
 /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
 /// dead PHI node, due to being a def-use chain of single-use nodes that
 /// either forms a cycle or is terminated by a trivially dead instruction,
 /// delete it.  If that makes any of its operands trivially dead, delete them
 /// too, recursively.  Return true if the PHI node is actually deleted.
-bool
-llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
   // We can remove a PHI if it is on a cycle in the def-use graph
   // where each node in the cycle has degree one, i.e. only one use,
   // and is an instruction with no side effects.
-  if (!PN->hasOneUse())
+  if (!areAllUsesEqual(PN))
     return false;
 
   bool Changed = false;
   SmallPtrSet<PHINode *, 4> PHIs;
   PHIs.insert(PN);
   for (Instruction *J = cast<Instruction>(*PN->use_begin());
-       J->hasOneUse() && !J->mayHaveSideEffects();
+       areAllUsesEqual(J) && !J->mayHaveSideEffects();
        J = cast<Instruction>(*J->use_begin()))
     // If we find a PHI more than once, we're on a cycle that
     // won't prove fruitful.
     if (PHINode *JP = dyn_cast<PHINode>(J))
-      if (!PHIs.insert(cast<PHINode>(JP))) {
+      if (!PHIs.insert(JP)) {
         // Break the cycle and delete the PHI and its operands.
         JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
         (void)RecursivelyDeleteTriviallyDeadInstructions(JP);
@@ -346,13 +362,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
   WeakVH PhiIt = &BB->front();
   while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
     PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
-    
-    Value *PNV = PN->hasConstantValue();
+
+    Value *PNV = SimplifyInstruction(PN, TD);
     if (PNV == 0) continue;
-    
+
     // If we're able to simplify the phi to a single value, substitute the new
     // value into all of its uses.
-    assert(PNV != PN && "hasConstantValue broken");
+    assert(PNV != PN && "SimplifyInstruction broken!");
     
     Value *OldPhiIt = PhiIt;
     ReplaceAndSimplifyAllUses(PN, PNV, TD);
@@ -402,6 +418,12 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   PredBB->replaceAllUsesWith(DestBB);
   
   if (P) {
+    DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+    if (DT) {
+      BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+      DT->changeImmediateDominator(DestBB, PredBBIDom);
+      DT->eraseNode(PredBB);
+    }
     ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
     if (PI) {
       PI->replaceAllUses(PredBB, DestBB);
@@ -645,3 +667,95 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
 
   return Changed;
 }
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+                                      unsigned PrefAlign) {
+
+  User *U = dyn_cast<User>(V);
+  if (!U) return Align;
+
+  switch (Operator::getOpcode(U)) {
+  default: break;
+  case Instruction::BitCast:
+    return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+  case Instruction::GetElementPtr: {
+    // If all indexes are zero, it is just the alignment of the base pointer.
+    bool AllZeroOperands = true;
+    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+      if (!isa<Constant>(*i) ||
+          !cast<Constant>(*i)->isNullValue()) {
+        AllZeroOperands = false;
+        break;
+      }
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+    }
+    return Align;
+  }
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(V);
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      return AI->getAlignment();
+    AI->setAlignment(PrefAlign);
+    return PrefAlign;
+  }
+  }
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (GV->isDeclaration()) return Align;
+    
+    if (GV->getAlignment() >= PrefAlign)
+      return GV->getAlignment();
+    // We can only increase the alignment of the global if it has no alignment
+    // specified or if it is not assigned a section.  If it is assigned a
+    // section, the global could be densely packed with other objects in the
+    // section, increasing the alignment could cause padding issues.
+    if (!GV->hasSection() || GV->getAlignment() == 0)
+      GV->setAlignment(PrefAlign);
+    return GV->getAlignment();
+  }
+
+  return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+                                          const TargetData *TD) {
+  assert(V->getType()->isPointerTy() &&
+         "getOrEnforceKnownAlignment expects a pointer!");
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  
+  // Avoid trouble with rediculously large TrailZ values, such as
+  // those computed from a null pointer.
+  TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+  
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+  
+  // LLVM doesn't support alignments larger than this currently.
+  Align = std::min(Align, +Value::MaximumAlignment);
+  
+  if (PrefAlign > Align)
+    Align = enforceKnownAlignment(V, Align, PrefAlign);
+    
+  // We don't need to make any adjustment.
+  return Align;
+}
+
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index b3c4801a4f15..246263026bb4 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,7 +37,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "loopsimplify"
+#define DEBUG_TYPE "loop-simplify"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
@@ -46,9 +46,10 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
@@ -65,7 +66,9 @@ STATISTIC(NumNested  , "Number of nested loops split out");
 namespace {
   struct LoopSimplify : public LoopPass {
     static char ID; // Pass identification, replacement for typeid
-    LoopSimplify() : LoopPass(ID) {}
+    LoopSimplify() : LoopPass(ID) {
+      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+    }
 
     // AA - If we have an alias analysis object to update, this is it, otherwise
     // this is null.
@@ -87,8 +90,6 @@ namespace {
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
-      AU.addPreserved<DominanceFrontier>();
-      AU.addPreservedID(LCSSAID);
     }
 
     /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -107,8 +108,12 @@ namespace {
 }
 
 char LoopSimplify::ID = 0;
-INITIALIZE_PASS(LoopSimplify, "loopsimplify",
-                "Canonicalize natural loops", true, false);
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
 
 // Publically exposed interface to pass...
 char &llvm::LoopSimplifyID = LoopSimplify::ID;
@@ -157,9 +162,8 @@ ReprocessLoop:
     for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
          E = BadPreds.end(); I != E; ++I) {
 
-      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
-            WriteAsOperand(dbgs(), *I, false);
-            dbgs() << "\n");
+      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+                   << (*I)->getName() << "\n");
 
       // Inform each successor of each dead pred.
       for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
@@ -184,9 +188,8 @@ ReprocessLoop:
       if (BI->isConditional()) {
         if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
 
-          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
-                WriteAsOperand(dbgs(), *I, false);
-                dbgs() << "\n");
+          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+                       << (*I)->getName() << "\n");
 
           BI->setCondition(ConstantInt::get(Cond->getType(),
                                             !L->contains(BI->getSuccessor(0))));
@@ -262,8 +265,9 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = PN->hasConstantValue(DT)) {
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
       if (AA) AA->deleteValue(PN);
+      if (SE) SE->forgetValue(PN);
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
     }
@@ -317,29 +321,22 @@ ReprocessLoop:
       if (!FoldBranchToCommonDest(BI)) continue;
 
       // Success. The block is now dead, so remove it from the loop,
-      // update the dominator tree and dominance frontier, and delete it.
-
-      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
-            WriteAsOperand(dbgs(), ExitingBlock, false);
-            dbgs() << "\n");
+      // update the dominator tree and delete it.
+      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+                   << ExitingBlock->getName() << "\n");
 
       assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
       Changed = true;
       LI->removeBlock(ExitingBlock);
 
-      DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
       DomTreeNode *Node = DT->getNode(ExitingBlock);
       const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
         Node->getChildren();
       while (!Children.empty()) {
         DomTreeNode *Child = Children.front();
         DT->changeImmediateDominator(Child, Node->getIDom());
-        if (DF) DF->changeImmediateDominator(Child->getBlock(),
-                                             Node->getIDom()->getBlock(),
-                                             DT);
       }
       DT->eraseNode(ExitingBlock);
-      if (DF) DF->removeBlock(ExitingBlock);
 
       BI->getSuccessor(0)->removePredecessor(ExitingBlock);
       BI->getSuccessor(1)->removePredecessor(ExitingBlock);
@@ -378,9 +375,8 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
 
-  DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
-        WriteAsOperand(dbgs(), NewBB, false);
-        dbgs() << "\n");
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+               << "\n");
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -409,10 +405,8 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                              LoopBlocks.size(), ".loopexit",
                                              this);
 
-  DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
-        WriteAsOperand(dbgs(), NewBB, false);
-        dbgs() << "\n");
-
+  DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+               << NewBB->getName() << "\n");
   return NewBB;
 }
 
@@ -438,11 +432,11 @@ static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
 /// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
 /// PHI node that tells us how to partition the loops.
 static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
-                                        AliasAnalysis *AA) {
+                                        AliasAnalysis *AA, LoopInfo *LI) {
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = PN->hasConstantValue(DT)) {
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       if (AA) AA->deleteValue(PN);
@@ -516,7 +510,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// created.
 ///
 Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
-  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
+  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
   if (PN == 0) return 0;  // No known way to partition.
 
   // Pull out all predecessors that have varying values in the loop.  This
@@ -643,9 +637,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
                                            Header->getName()+".backedge", F);
   BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
 
-  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
-        WriteAsOperand(dbgs(), BEBlock, false);
-        dbgs() << "\n");
+  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+               << BEBlock->getName() << "\n");
 
   // Move the new backedge block to right after the last backedge block.
   Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
@@ -721,8 +714,6 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
 
   // Update dominator information
   DT->splitBlock(BEBlock);
-  if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
-    DF->splitBlock(BEBlock);
 
   return BEBlock;
 }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 236bbe9057bf..7da7271e642c 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,13 +16,14 @@
 //
 // The process of unrolling can produce extraneous basic blocks linked with
 // unconditional branches.  This will be corrected in the future.
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Support/Debug.h"
@@ -30,20 +31,19 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
-
 using namespace llvm;
 
 // TODO: Should these be here or in LoopUnroll?
 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
-STATISTIC(NumUnrolled,    "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
 
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 static inline void RemapInstruction(Instruction *I,
-                                    ValueMap<const Value *, Value*> &VMap) {
+                                    ValueToValueMapTy &VMap) {
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
-    ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
     if (It != VMap.end())
       I->setOperand(op, It->second);
   }
@@ -96,7 +96,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
 }
 
 /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
-/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
 /// can only fail when the loop's latch block is not terminated by a conditional
 /// branch instruction. However, if the trip count (and multiple) are not known,
 /// loop unrolling will mostly produce more code that is no faster.
@@ -105,7 +105,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
 ///
 /// If a LoopPassManager is passed in, and the loop is fully removed, it will be
 /// removed from the LoopPassManager as well. LPM can also be NULL.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
+bool llvm::UnrollLoop(Loop *L, unsigned Count,
+                      LoopInfo *LI, LPPassManager *LPM) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -127,6 +128,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
              "  Can't unroll; loop not terminated by a conditional branch.\n");
     return false;
   }
+  
+  if (Header->hasAddressTaken()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+          "  Won't unroll loop: address of header block is taken.\n");
+    return false;
+  }
 
   // Notify ScalarEvolution that the loop will be substantially changed,
   // if not outright eliminated.
@@ -189,7 +197,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
   // For the first iteration of the loop, we should use the precloned values for
   // PHI nodes.  Insert associations now.
-  typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
   ValueToValueMapTy LastValueMap;
   std::vector<PHINode*> OrigPHINode;
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -274,7 +281,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
     for (unsigned i = 0; i < NewBlocks.size(); ++i)
       for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
-        RemapInstruction(I, LastValueMap);
+        ::RemapInstruction(I, LastValueMap);
   }
   
   // The latch block exits the loop.  If there are any PHI nodes in the
@@ -342,7 +349,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
       // iteration.
       Term->setSuccessor(!ContinueOnTrue, Dest);
     } else {
-      Term->setUnconditionalDest(Dest);
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Dest, Term);
+      Term->eraseFromParent();
       // Merge adjacent basic blocks, if possible.
       if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
@@ -362,10 +371,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
-      else if (Constant *C = ConstantFoldInstruction(Inst)) {
-        Inst->replaceAllUsesWith(C);
-        (*BB)->getInstList().erase(Inst);
-      }
+      else if (Value *V = SimplifyInstruction(Inst))
+        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+          Inst->replaceAllUsesWith(V);
+          (*BB)->getInstList().erase(Inst);
+        }
     }
 
   NumCompletelyUnrolled += CompletelyUnroll;
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index a46dd8402aca..025ae0d61696 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -79,7 +79,9 @@ namespace {
     explicit LowerInvoke(const TargetLowering *tli = NULL,
                          bool useExpensiveEHSupport = ExpensiveEHSupport)
       : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
-        TLI(tli) { }
+        TLI(tli) {
+      initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+    }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
 
@@ -102,7 +104,7 @@ namespace {
 char LowerInvoke::ID = 0;
 INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
                 "Lower invoke and unwind, for unwindless code generators",
-                false, false);
+                false, false)
 
 char &llvm::LowerInvokePassID = LowerInvoke::ID;
 
@@ -148,19 +150,20 @@ bool LowerInvoke::doInitialization(Module &M) {
                                       "llvm.sjljeh.jblist");
     }
 
-// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
-// so it looks like Intrinsic::_setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
 #endif
 
     SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
 
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
-// let's return it to _setjmp state in case anyone ever needs it after this
-// point under VisualStudio
-#define setjmp _setjmp
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+   // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
 #endif
 
     LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
@@ -186,6 +189,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
+      NewCall->setDebugLoc(II->getDebugLoc());
       II->replaceAllUsesWith(NewCall);
 
       // Insert an unconditional branch to the normal destination.
@@ -266,6 +270,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
   II->replaceAllUsesWith(NewCall);
 
   // Replace the invoke with an uncond branch.
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 5530b4700aac..914a439718d4 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -33,7 +33,9 @@ namespace {
   class LowerSwitch : public FunctionPass {
   public:
     static char ID; // Pass identification, replacement for typeid
-    LowerSwitch() : FunctionPass(ID) {} 
+    LowerSwitch() : FunctionPass(ID) {
+      initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+    } 
 
     virtual bool runOnFunction(Function &F);
     
@@ -80,7 +82,7 @@ namespace {
 
 char LowerSwitch::ID = 0;
 INITIALIZE_PASS(LowerSwitch, "lowerswitch",
-                "Lower SwitchInst's to branches", false, false);
+                "Lower SwitchInst's to branches", false, false)
 
 // Publically exposed interface to pass...
 char &llvm::LowerSwitchID = LowerSwitch::ID;
@@ -107,7 +109,8 @@ bool LowerSwitch::runOnFunction(Function &F) {
 // operator<< - Used for debugging purposes.
 //
 static raw_ostream& operator<<(raw_ostream &O,
-                               const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+                               const LowerSwitch::CaseVector &C)
+    LLVM_ATTRIBUTE_USED;
 static raw_ostream& operator<<(raw_ostream &O,
                                const LowerSwitch::CaseVector &C) {
   O << "[";
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 101645bd92b7..f4ca81af6d87 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -27,18 +27,17 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
 namespace {
   struct PromotePass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    PromotePass() : FunctionPass(ID) {}
+    PromotePass() : FunctionPass(ID) {
+      initializePromotePassPass(*PassRegistry::getPassRegistry());
+    }
 
     // runOnFunction - To run this pass, first we calculate the alloca
     // instructions that are safe for promotion, then we promote each one.
     //
     virtual bool runOnFunction(Function &F);
 
-    // getAnalysisUsage - We need dominance frontiers
-    //
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
-      AU.addRequired<DominanceFrontier>();
       AU.setPreservesCFG();
       // This is a cluster of orthogonal Transforms
       AU.addPreserved<UnifyFunctionExitNodes>();
@@ -49,8 +48,11 @@ namespace {
 }  // end of anonymous namespace
 
 char PromotePass::ID = 0;
-INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register",
-                false, false);
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
 
 bool PromotePass::runOnFunction(Function &F) {
   std::vector<AllocaInst*> Allocas;
@@ -60,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) {
   bool Changed  = false;
 
   DominatorTree &DT = getAnalysis<DominatorTree>();
-  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
 
   while (1) {
     Allocas.clear();
@@ -74,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DF);
+    PromoteMemToReg(Allocas, DT);
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a4e3029e3a5a..e6a4373c495b 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -9,10 +9,19 @@
 //
 // This file promotes memory references to be register references.  It promotes
 // alloca instructions which only have loads and stores as uses.  An alloca is
-// transformed by using dominator frontiers to place PHI nodes, then traversing
-// the function in depth-first order to rewrite loads and stores as appropriate.
-// This is just the standard SSA construction algorithm to construct "pruned"
-// SSA form.
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+//   Programming Languages
+//   POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,9 +33,10 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -34,6 +44,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include <algorithm>
+#include <map>
+#include <queue>
 using namespace llvm;
 
 STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
@@ -178,7 +190,6 @@ namespace {
     ///
     std::vector<AllocaInst*> Allocas;
     DominatorTree &DT;
-    DominanceFrontier &DF;
     DIFactory *DIF;
 
     /// AST - An AliasSetTracker object to update.  If null, don't update it.
@@ -187,7 +198,7 @@ namespace {
     
     /// AllocaLookup - Reverse mapping of Allocas.
     ///
-    std::map<AllocaInst*, unsigned>  AllocaLookup;
+    DenseMap<AllocaInst*, unsigned>  AllocaLookup;
 
     /// NewPhiNodes - The PhiNodes we're adding.
     ///
@@ -216,12 +227,15 @@ namespace {
     /// non-determinstic behavior.
     DenseMap<BasicBlock*, unsigned> BBNumbers;
 
+    /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+    DenseMap<DomTreeNode*, unsigned> DomLevels;
+
     /// BBNumPreds - Lazily compute the number of predecessors a block has.
     DenseMap<const BasicBlock*, unsigned> BBNumPreds;
   public:
     PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
-                   DominanceFrontier &df, AliasSetTracker *ast)
-      : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
+                   AliasSetTracker *ast)
+      : Allocas(A), DT(dt), DIF(0), AST(ast) {}
     ~PromoteMem2Reg() {
       delete DIF;
     }
@@ -264,13 +278,12 @@ namespace {
     void RenamePass(BasicBlock *BB, BasicBlock *Pred,
                     RenamePassData::ValVector &IncVals,
                     std::vector<RenamePassData> &Worklist);
-    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
-                      SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
+    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
   };
   
   struct AllocaInfo {
-    std::vector<BasicBlock*> DefiningBlocks;
-    std::vector<BasicBlock*> UsingBlocks;
+    SmallVector<BasicBlock*, 32> DefiningBlocks;
+    SmallVector<BasicBlock*, 32> UsingBlocks;
     
     StoreInst  *OnlyStore;
     BasicBlock *OnlyBlock;
@@ -325,11 +338,19 @@ namespace {
       DbgDeclare = FindAllocaDbgDeclare(AI);
     }
   };
+
+  typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+  struct DomTreeNodeCompare {
+    bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+      return LHS.second < RHS.second;
+    }
+  };
 }  // end of anonymous namespace
 
 
 void PromoteMem2Reg::run() {
-  Function &F = *DF.getRoot()->getParent();
+  Function &F = *DT.getRoot()->getParent();
 
   if (AST) PointerAllocaValues.resize(Allocas.size());
   AllocaDbgDeclares.resize(Allocas.size());
@@ -422,7 +443,26 @@ void PromoteMem2Reg::run() {
         continue;
       }
     }
-    
+
+    // If we haven't computed dominator tree levels, do so now.
+    if (DomLevels.empty()) {
+      SmallVector<DomTreeNode*, 32> Worklist;
+
+      DomTreeNode *Root = DT.getRootNode();
+      DomLevels[Root] = 0;
+      Worklist.push_back(Root);
+
+      while (!Worklist.empty()) {
+        DomTreeNode *Node = Worklist.pop_back_val();
+        unsigned ChildLevel = DomLevels[Node] + 1;
+        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+             CI != CE; ++CI) {
+          DomLevels[*CI] = ChildLevel;
+          Worklist.push_back(*CI);
+        }
+      }
+    }
+
     // If we haven't computed a numbering for the BB's in the function, do so
     // now.
     if (BBNumbers.empty()) {
@@ -484,9 +524,8 @@ void PromoteMem2Reg::run() {
     Instruction *A = Allocas[i];
 
     // If there are any uses of the alloca instructions left, they must be in
-    // sections of dead code that were not processed on the dominance frontier.
-    // Just delete the users now.
-    //
+    // unreachable basic blocks that were not processed by walking the dominator
+    // tree. Just delete the users now.
     if (!A->use_empty())
       A->replaceAllUsesWith(UndefValue::get(A->getType()));
     if (AST) AST->deleteValue(A);
@@ -509,9 +548,9 @@ void PromoteMem2Reg::run() {
     for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
            NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
       PHINode *PN = I->second;
-      
+
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = PN->hasConstantValue(&DT)) {
+      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
         if (AST && PN->getType()->isPointerTy())
           AST->deleteValue(PN);
         PN->replaceAllUsesWith(V);
@@ -663,7 +702,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
 /// avoiding insertion of dead phi nodes.
 void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
                                              AllocaInfo &Info) {
-
   // Unique the set of defining blocks for efficient lookup.
   SmallPtrSet<BasicBlock*, 32> DefBlocks;
   DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
@@ -673,47 +711,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
   SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
   ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
 
-  // Compute the locations where PhiNodes need to be inserted.  Look at the
-  // dominance frontier of EACH basic-block we have a write in.
-  unsigned CurrentVersion = 0;
-  SmallPtrSet<PHINode*, 16> InsertedPHINodes;
-  std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
-  while (!Info.DefiningBlocks.empty()) {
-    BasicBlock *BB = Info.DefiningBlocks.back();
-    Info.DefiningBlocks.pop_back();
-    
-    // Look up the DF for this write, add it to defining blocks.
-    DominanceFrontier::const_iterator it = DF.find(BB);
-    if (it == DF.end()) continue;
-    
-    const DominanceFrontier::DomSetType &S = it->second;
-    
-    // In theory we don't need the indirection through the DFBlocks vector.
-    // In practice, the order of calling QueuePhiNode would depend on the
-    // (unspecified) ordering of basic blocks in the dominance frontier,
-    // which would give PHI nodes non-determinstic subscripts.  Fix this by
-    // processing blocks in order of the occurance in the function.
-    for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
-         PE = S.end(); P != PE; ++P) {
-      // If the frontier block is not in the live-in set for the alloca, don't
-      // bother processing it.
-      if (!LiveInBlocks.count(*P))
-        continue;
-      
-      DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
-    }
-    
-    // Sort by which the block ordering in the function.
-    if (DFBlocks.size() > 1)
-      std::sort(DFBlocks.begin(), DFBlocks.end());
-    
-    for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
-      BasicBlock *BB = DFBlocks[i].second;
-      if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
-        Info.DefiningBlocks.push_back(BB);
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards.
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              DomTreeNodeCompare> IDFPriorityQueue;
+  IDFPriorityQueue PQ;
+
+  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+       E = DefBlocks.end(); I != E; ++I) {
+    if (DomTreeNode *Node = DT.getNode(*I))
+      PQ.push(std::make_pair(Node, DomLevels[Node]));
+  }
+
+  SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+  SmallPtrSet<DomTreeNode*, 32> Visited;
+  SmallVector<DomTreeNode*, 32> Worklist;
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNode *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNode *Node = Worklist.pop_back_val();
+      BasicBlock *BB = Node->getBlock();
+
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        DomTreeNode *SuccNode = DT.getNode(*SI);
+
+        // Quickly skip all CFG edges that are also dominator tree edges instead
+        // of catching them below.
+        if (SuccNode->getIDom() == Node)
+          continue;
+
+        unsigned SuccLevel = DomLevels[SuccNode];
+        if (SuccLevel > RootLevel)
+          continue;
+
+        if (!Visited.insert(SuccNode))
+          continue;
+
+        BasicBlock *SuccBB = SuccNode->getBlock();
+        if (!LiveInBlocks.count(SuccBB))
+          continue;
+
+        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+        if (!DefBlocks.count(SuccBB))
+          PQ.push(std::make_pair(SuccNode, SuccLevel));
+      }
+
+      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+           ++CI) {
+        if (!Visited.count(*CI))
+          Worklist.push_back(*CI);
+      }
     }
-    DFBlocks.clear();
   }
+
+  if (DFBlocks.size() > 1)
+    std::sort(DFBlocks.begin(), DFBlocks.end());
+
+  unsigned CurrentVersion = 0;
+  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
 }
 
 /// RewriteSingleStoreAlloca - If there is only a single store to this value,
@@ -900,8 +969,7 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
 // Alloca returns true if there wasn't already a phi-node for that variable
 //
 bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
-                                  unsigned &Version,
-                                  SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
+                                  unsigned &Version) {
   // Look up the basic-block in question.
   PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
 
@@ -916,8 +984,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
   PN->reserveOperandSpace(getNumPreds(BB));
-  
-  InsertedPHINodes.insert(PN);
 
   if (AST && PN->getType()->isPointerTy())
     AST->copyValue(PointerAllocaValues[AllocaNo], PN);
@@ -986,7 +1052,7 @@ NextIteration:
       AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
       if (!Src) continue;
   
-      std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+      DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
       if (AI == AllocaLookup.end()) continue;
 
       Value *V = IncomingVals[AI->second];
@@ -1002,7 +1068,7 @@ NextIteration:
       AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
       if (!Dest) continue;
       
-      std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
       if (ai == AllocaLookup.end())
         continue;
       
@@ -1036,18 +1102,17 @@ NextIteration:
 }
 
 /// PromoteMemToReg - Promote the specified list of alloca instructions into
-/// scalar registers, inserting PHI nodes as appropriate.  This function makes
-/// use of DominanceFrontier information.  This function does not modify the CFG
-/// of the function at all.  All allocas must be from the same function.
+/// scalar registers, inserting PHI nodes as appropriate.  This function does
+/// not modify the CFG of the function at all.  All allocas must be from the
+/// same function.
 ///
 /// If AST is specified, the specified tracker is updated to reflect changes
 /// made to the IR.
 ///
 void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
-                           DominatorTree &DT, DominanceFrontier &DF,
-                           AliasSetTracker *AST) {
+                           DominatorTree &DT, AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty()) return;
 
-  PromoteMem2Reg(Allocas, DT, DF, AST).run();
+  PromoteMem2Reg(Allocas, DT, AST).run();
 }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index c855988307ea..3896d9851b26 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "ssaupdater"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
@@ -178,9 +179,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
-  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+  if (Value *V = SimplifyInstruction(InsertedPHI)) {
     InsertedPHI->eraseFromParent();
-    return ConstVal;
+    return V;
   }
 
   // If the client wants to know about all new instructions, tell it.
@@ -342,3 +343,169 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
   return Impl.GetValue(BB);
 }
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
+  if (Insts.empty()) return;
+  
+  Value *SomeVal;
+  if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+    SomeVal = LI;
+  else
+    SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+  if (BaseName.empty())
+    BaseName = SomeVal->getName();
+  SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+  
+  // First step: bucket up uses of the alloca by the block they occur in.
+  // This is important because we have to handle multiple defs/uses in a block
+  // ourselves: SSAUpdater is purely for cross-block references.
+  // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
+  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    UsesByBlock[User->getParent()].push_back(User);
+  }
+  
+  // Okay, now we can iterate over all the blocks in the function with uses,
+  // processing them.  Keep track of which loads are loading a live-in value.
+  // Walk the uses in the use-list order to be determinstic.
+  SmallVector<LoadInst*, 32> LiveInLoads;
+  DenseMap<Value*, Value*> ReplacedLoads;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    BasicBlock *BB = User->getParent();
+    std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+    
+    // If this block has already been processed, ignore this repeat use.
+    if (BlockUses.empty()) continue;
+    
+    // Okay, this is the first use in the block.  If this block just has a
+    // single user in it, we can rewrite it trivially.
+    if (BlockUses.size() == 1) {
+      // If it is a store, it is a trivial def of the value in the block.
+      if (StoreInst *SI = dyn_cast<StoreInst>(User))
+        SSA.AddAvailableValue(BB, SI->getOperand(0));
+      else 
+        // Otherwise it is a load, queue it to rewrite as a live-in load.
+        LiveInLoads.push_back(cast<LoadInst>(User));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, check to see if this block is all loads.
+    bool HasStore = false;
+    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+      if (isa<StoreInst>(BlockUses[i])) {
+        HasStore = true;
+        break;
+      }
+    }
+    
+    // If so, we can queue them all as live in loads.  We don't have an
+    // efficient way to tell which on is first in the block and don't want to
+    // scan large blocks, so just add all loads as live ins.
+    if (!HasStore) {
+      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+    // Since SSAUpdater is purely for cross-block values, we need to determine
+    // the order of these instructions in the block.  If the first use in the
+    // block is a load, then it uses the live in value.  The last store defines
+    // the live out value.  We handle this by doing a linear scan of the block.
+    Value *StoredValue = 0;
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+        // If this is a load from an unrelated pointer, ignore it.
+        if (!isInstInList(L, Insts)) continue;
+        
+        // If we haven't seen a store yet, this is a live in use, otherwise
+        // use the stored value.
+        if (StoredValue) {
+          replaceLoadWithValue(L, StoredValue);
+          L->replaceAllUsesWith(StoredValue);
+          ReplacedLoads[L] = StoredValue;
+        } else {
+          LiveInLoads.push_back(L);
+        }
+        continue;
+      }
+      
+      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+        // If this is a store to an unrelated pointer, ignore it.
+        if (!isInstInList(S, Insts)) continue;
+        
+        // Remember that this is the active value in the block.
+        StoredValue = S->getOperand(0);
+      }
+    }
+    
+    // The last stored value that happened is the live-out for the block.
+    assert(StoredValue && "Already checked that there is a store in block");
+    SSA.AddAvailableValue(BB, StoredValue);
+    BlockUses.clear();
+  }
+  
+  // Okay, now we rewrite all loads that use live-in values in the loop,
+  // inserting PHI nodes as necessary.
+  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+    LoadInst *ALoad = LiveInLoads[i];
+    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+    replaceLoadWithValue(ALoad, NewVal);
+
+    // Avoid assertions in unreachable code.
+    if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+    ALoad->replaceAllUsesWith(NewVal);
+    ReplacedLoads[ALoad] = NewVal;
+  }
+  
+  // Allow the client to do stuff before we start nuking things.
+  doExtraRewritesBeforeFinalDeletion();
+  
+  // Now that everything is rewritten, delete the old instructions from the
+  // function.  They should all be dead now.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    
+    // If this is a load that still has uses, then the load must have been added
+    // as a live value in the SSAUpdate data structure for a block (e.g. because
+    // the loaded value was stored later).  In this case, we need to recursively
+    // propagate the updates until we get to the real value.
+    if (!User->use_empty()) {
+      Value *NewVal = ReplacedLoads[User];
+      assert(NewVal && "not a replaced load?");
+      
+      // Propagate down to the ultimate replacee.  The intermediately loads
+      // could theoretically already have been deleted, so we don't want to
+      // dereference the Value*'s.
+      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+      while (RLI != ReplacedLoads.end()) {
+        NewVal = RLI->second;
+        RLI = ReplacedLoads.find(NewVal);
+      }
+      
+      replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+      User->replaceAllUsesWith(NewVal);
+    }
+    
+    instructionDeleted(User);
+    User->eraseFromParent();
+  }
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 28d7afbf1c33..fb660dbfac10 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,33 +19,34 @@
 #include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <functional>
 #include <set>
 #include <map>
 using namespace llvm;
 
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+       cl::desc("Duplicate return instructions into unconditional branches"));
+
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
 namespace {
 class SimplifyCFGOpt {
   const TargetData *const TD;
 
-  ConstantInt *GetConstantInt(Value *V);
-  Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values);
-  Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values);
-  bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
-                              std::vector<ConstantInt*> &Values);
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
     std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
@@ -53,6 +54,14 @@ class SimplifyCFGOpt {
                                                      BasicBlock *Pred);
   bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI);
 
+  bool SimplifyReturn(ReturnInst *RI);
+  bool SimplifyUnwind(UnwindInst *UI);
+  bool SimplifyUnreachable(UnreachableInst *UI);
+  bool SimplifySwitch(SwitchInst *SI);
+  bool SimplifyIndirectBr(IndirectBrInst *IBI);
+  bool SimplifyUncondBranch(BranchInst *BI);
+  bool SimplifyCondBranch(BranchInst *BI);
+
 public:
   explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
   bool run(BasicBlock *BB);
@@ -91,8 +100,6 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
 /// ExistPred, an existing predecessor of Succ.
 static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
                                   BasicBlock *ExistPred) {
-  assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
-         succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
   if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
   
   PHINode *PN;
@@ -102,28 +109,29 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
 }
 
 
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and
-/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
 /// to an "if condition".  If so, return the boolean condition that determines
 /// which entry into BB will be taken.  Also, return by references the block
 /// that will be entered from if the condition is true, and the block that will
 /// be entered if the condition is false.
 ///
-///
-static Value *GetIfCondition(BasicBlock *BB,
-                             BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
-  assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+                             BasicBlock *&IfFalse) {
+  PHINode *SomePHI = cast<PHINode>(BB->begin());
+  assert(SomePHI->getNumIncomingValues() == 2 &&
          "Function can only handle blocks with 2 predecessors!");
-  BasicBlock *Pred1 = *pred_begin(BB);
-  BasicBlock *Pred2 = *++pred_begin(BB);
+  BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+  BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
 
   // We can only handle branches.  Other control flow will be lowered to
   // branches if possible anyway.
-  if (!isa<BranchInst>(Pred1->getTerminator()) ||
-      !isa<BranchInst>(Pred2->getTerminator()))
+  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+  if (Pred1Br == 0 || Pred2Br == 0)
     return 0;
-  BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
-  BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
 
   // Eliminate code duplication by ensuring that Pred1Br is conditional if
   // either are.
@@ -140,6 +148,12 @@ static Value *GetIfCondition(BasicBlock *BB,
   }
 
   if (Pred1Br->isConditional()) {
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (Pred2->getSinglePredecessor() == 0)
+      return 0;
+    
     // If we found a conditional branch predecessor, make sure that it branches
     // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
     if (Pred1Br->getSuccessor(0) == BB &&
@@ -156,39 +170,29 @@ static Value *GetIfCondition(BasicBlock *BB,
       return 0;
     }
 
-    // The only thing we have to watch out for here is to make sure that Pred2
-    // doesn't have incoming edges from other blocks.  If it does, the condition
-    // doesn't dominate BB.
-    if (++pred_begin(Pred2) != pred_end(Pred2))
-      return 0;
-
     return Pred1Br->getCondition();
   }
 
   // Ok, if we got here, both predecessors end with an unconditional branch to
   // BB.  Don't panic!  If both blocks only have a single (identical)
   // predecessor, and THAT is a conditional branch, then we're all ok!
-  if (pred_begin(Pred1) == pred_end(Pred1) ||
-      ++pred_begin(Pred1) != pred_end(Pred1) ||
-      pred_begin(Pred2) == pred_end(Pred2) ||
-      ++pred_begin(Pred2) != pred_end(Pred2) ||
-      *pred_begin(Pred1) != *pred_begin(Pred2))
+  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
     return 0;
 
   // Otherwise, if this is a conditional branch, then we can use it!
-  BasicBlock *CommonPred = *pred_begin(Pred1);
-  if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
-    assert(BI->isConditional() && "Two successors but not conditional?");
-    if (BI->getSuccessor(0) == Pred1) {
-      IfTrue = Pred1;
-      IfFalse = Pred2;
-    } else {
-      IfTrue = Pred2;
-      IfFalse = Pred1;
-    }
-    return BI->getCondition();
+  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+  if (BI == 0) return 0;
+  
+  assert(BI->isConditional() && "Two successors but not conditional?");
+  if (BI->getSuccessor(0) == Pred1) {
+    IfTrue = Pred1;
+    IfFalse = Pred2;
+  } else {
+    IfTrue = Pred2;
+    IfFalse = Pred1;
   }
-  return 0;
+  return BI->getCondition();
 }
 
 /// DominatesMergePoint - If we have a merge point of an "if condition" as
@@ -201,7 +205,7 @@ static Value *GetIfCondition(BasicBlock *BB,
 /// non-trapping.  If both are true, the instruction is inserted into the set
 /// and true is returned.
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
-                                std::set<Instruction*> *AggressiveInsts) {
+                                SmallPtrSet<Instruction*, 4> *AggressiveInsts) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -219,56 +223,55 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
 
   // If this instruction is defined in a block that contains an unconditional
   // branch to BB, then it must be in the 'conditional' part of the "if
-  // statement".
-  if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()))
-    if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
-      if (!AggressiveInsts) return false;
-      // Okay, it looks like the instruction IS in the "condition".  Check to
-      // see if it's a cheap instruction to unconditionally compute, and if it
-      // only uses stuff defined outside of the condition.  If so, hoist it out.
-      if (!I->isSafeToSpeculativelyExecute())
-        return false;
+  // statement".  If not, it definitely dominates the region.
+  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+  if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+    return true;
 
-      switch (I->getOpcode()) {
-      default: return false;  // Cannot hoist this out safely.
-      case Instruction::Load: {
-        // We have to check to make sure there are no instructions before the
-        // load in its basic block, as we are going to hoist the loop out to
-        // its predecessor.
-        BasicBlock::iterator IP = PBB->begin();
-        while (isa<DbgInfoIntrinsic>(IP))
-          IP++;
-        if (IP != BasicBlock::iterator(I))
-          return false;
-        break;
-      }
-      case Instruction::Add:
-      case Instruction::Sub:
-      case Instruction::And:
-      case Instruction::Or:
-      case Instruction::Xor:
-      case Instruction::Shl:
-      case Instruction::LShr:
-      case Instruction::AShr:
-      case Instruction::ICmp:
-        break;   // These are all cheap and non-trapping instructions.
-      }
+  // If we aren't allowing aggressive promotion anymore, then don't consider
+  // instructions in the 'if region'.
+  if (AggressiveInsts == 0) return false;
+  
+  // Okay, it looks like the instruction IS in the "condition".  Check to
+  // see if it's a cheap instruction to unconditionally compute, and if it
+  // only uses stuff defined outside of the condition.  If so, hoist it out.
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
 
-      // Okay, we can only really hoist these out if their operands are not
-      // defined in the conditional region.
-      for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-        if (!DominatesMergePoint(*i, BB, 0))
-          return false;
-      // Okay, it's safe to do this!  Remember this instruction.
-      AggressiveInsts->insert(I);
-    }
+  switch (I->getOpcode()) {
+  default: return false;  // Cannot hoist this out safely.
+  case Instruction::Load:
+    // We have to check to make sure there are no instructions before the
+    // load in its basic block, as we are going to hoist the load out to its
+    // predecessor.
+    if (PBB->getFirstNonPHIOrDbg() != I)
+      return false;
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::ICmp:
+    break;   // These are all cheap and non-trapping instructions.
+  }
 
+  // Okay, we can only really hoist these out if their operands are not
+  // defined in the conditional region.
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+    if (!DominatesMergePoint(*i, BB, 0))
+      return false;
+  // Okay, it's safe to do this!  Remember this instruction.
+  AggressiveInsts->insert(I);
   return true;
 }
 
 /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
 /// and PointerNullValue. Return NULL if value is not a constant int.
-ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
   // Normal constant int.
   ConstantInt *CI = dyn_cast<ConstantInt>(V);
   if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
@@ -296,77 +299,94 @@ ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
   return 0;
 }
 
-/// GatherConstantSetEQs - Given a potentially 'or'd together collection of
-/// icmp_eq instructions that compare a value against a constant, return the
-/// value being compared, and stick the constant into the Values vector.
-Value *SimplifyCFGOpt::
-GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) {
-  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Inst->getOpcode() == Instruction::ICmp &&
-        cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
-      if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
-        Values.push_back(C);
-        return Inst->getOperand(0);
-      } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
-        Values.push_back(C);
-        return Inst->getOperand(1);
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+                       const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  // If this is an icmp against a constant, handle this as one of the cases.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+    if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+      if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+        UsedICmps++;
+        Vals.push_back(C);
+        return I->getOperand(0);
       }
-    } else if (Inst->getOpcode() == Instruction::Or) {
-      if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
-        if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
-          if (LHS == RHS)
-            return LHS;
+      
+      // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+      // the set.
+      ConstantRange Span =
+        ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+      
+      // If this is an and/!= check then we want to optimize "x ugt 2" into
+      // x != 0 && x != 1.
+      if (!isEQ)
+        Span = Span.inverse();
+      
+      // If there are a ton of values, we don't want to make a ginormous switch.
+      if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+          // We don't handle wrapped sets yet.
+          Span.isWrappedSet())
+        return 0;
+      
+      for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+        Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+      UsedICmps++;
+      return I->getOperand(0);
     }
+    return 0;
   }
-  return 0;
-}
+  
+  // Otherwise, we can only handle an | or &, depending on isEQ.
+  if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+    return 0;
+  
+  unsigned NumValsBeforeLHS = Vals.size();
+  unsigned UsedICmpsBeforeLHS = UsedICmps;
+  if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+                                          isEQ, UsedICmps)) {
+    unsigned NumVals = Vals.size();
+    unsigned UsedICmpsBeforeRHS = UsedICmps;
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps)) {
+      if (LHS == RHS)
+        return LHS;
+      Vals.resize(NumVals);
+      UsedICmps = UsedICmpsBeforeRHS;
+    }
 
-/// GatherConstantSetNEs - Given a potentially 'and'd together collection of
-/// setne instructions that compare a value against a constant, return the value
-/// being compared, and stick the constant into the Values vector.
-Value *SimplifyCFGOpt::
-GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) {
-  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Inst->getOpcode() == Instruction::ICmp &&
-               cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
-      if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
-        Values.push_back(C);
-        return Inst->getOperand(0);
-      } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
-        Values.push_back(C);
-        return Inst->getOperand(1);
-      }
-    } else if (Inst->getOpcode() == Instruction::And) {
-      if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
-        if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
-          if (LHS == RHS)
-            return LHS;
+    // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+    // set it and return success.
+    if (Extra == 0 || Extra == I->getOperand(1)) {
+      Extra = I->getOperand(1);
+      return LHS;
     }
+    
+    Vals.resize(NumValsBeforeLHS);
+    UsedICmps = UsedICmpsBeforeLHS;
+    return 0;
   }
-  return 0;
-}
-
-/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
-/// bunch of comparisons of one value against constants, return the value and
-/// the constants being compared.
-bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal,
-                                            std::vector<ConstantInt*> &Values) {
-  if (Cond->getOpcode() == Instruction::Or) {
-    CompVal = GatherConstantSetEQs(Cond, Values);
-
-    // Return true to indicate that the condition is true if the CompVal is
-    // equal to one of the constants.
-    return true;
-  } else if (Cond->getOpcode() == Instruction::And) {
-    CompVal = GatherConstantSetNEs(Cond, Values);
-
-    // Return false to indicate that the condition is false if the CompVal is
-    // equal to one of the constants.
-    return false;
+  
+  // If the LHS can't be folded in, but Extra is available and RHS can, try to
+  // use LHS as Extra.
+  if (Extra == 0 || Extra == I->getOperand(0)) {
+    Value *OldExtra = Extra;
+    Extra = I->getOperand(0);
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps))
+      return RHS;
+    assert(Vals.size() == NumValsBeforeLHS);
+    Extra = OldExtra;
   }
-  return false;
+  
+  return 0;
 }
-
+      
 static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
   Instruction* Cond = 0;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -374,6 +394,8 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
     if (BI->isConditional())
       Cond = dyn_cast<Instruction>(BI->getCondition());
+  } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+    Cond = dyn_cast<Instruction>(IBI->getAddress());
   }
 
   TI->eraseFromParent();
@@ -395,7 +417,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
         if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
              ICI->getPredicate() == ICmpInst::ICMP_NE) &&
-            GetConstantInt(ICI->getOperand(1)))
+            GetConstantInt(ICI->getOperand(1), TD))
           CV = ICI->getOperand(0);
 
   // Unwrap any lossless ptrtoint cast.
@@ -420,7 +442,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
 
   BranchInst *BI = cast<BranchInst>(TI);
   ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
-  Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)),
+  Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
                                  BI->getSuccessor(ICI->getPredicate() ==
                                                   ICmpInst::ICMP_NE)));
   return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
@@ -459,8 +481,8 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
   }
 
   // Otherwise, just sort both lists and compare element by element.
-  std::sort(V1->begin(), V1->end());
-  std::sort(V2->begin(), V2->end());
+  array_pod_sort(V1->begin(), V1->end());
+  array_pod_sort(V2->begin(), V2->end());
   unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
   while (i1 != e1 && i2 != e2) {
     if ((*V1)[i1].first == (*V2)[i2].first)
@@ -506,90 +528,87 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     // If we are here, we know that the value is none of those cases listed in
     // PredCases.  If there are any cases in ThisCases that are in PredCases, we
     // can simplify TI.
-    if (ValuesOverlap(PredCases, ThisCases)) {
-      if (isa<BranchInst>(TI)) {
-        // Okay, one of the successors of this condbr is dead.  Convert it to a
-        // uncond br.
-        assert(ThisCases.size() == 1 && "Branch can only have one case!");
-        // Insert the new branch.
-        Instruction *NI = BranchInst::Create(ThisDef, TI);
-        (void) NI;
-
-        // Remove PHI node entries for the dead edge.
-        ThisCases[0].second->removePredecessor(TI->getParent());
-
-        DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
-             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
-
-        EraseTerminatorInstAndDCECond(TI);
-        return true;
-
-      } else {
-        SwitchInst *SI = cast<SwitchInst>(TI);
-        // Okay, TI has cases that are statically dead, prune them away.
-        SmallPtrSet<Constant*, 16> DeadCases;
-        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
-          DeadCases.insert(PredCases[i].first);
+    if (!ValuesOverlap(PredCases, ThisCases))
+      return false;
+    
+    if (isa<BranchInst>(TI)) {
+      // Okay, one of the successors of this condbr is dead.  Convert it to a
+      // uncond br.
+      assert(ThisCases.size() == 1 && "Branch can only have one case!");
+      // Insert the new branch.
+      Instruction *NI = BranchInst::Create(ThisDef, TI);
+      (void) NI;
 
-        DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
-                     << "Through successor TI: " << *TI);
+      // Remove PHI node entries for the dead edge.
+      ThisCases[0].second->removePredecessor(TI->getParent());
 
-        for (unsigned i = SI->getNumCases()-1; i != 0; --i)
-          if (DeadCases.count(SI->getCaseValue(i))) {
-            SI->getSuccessor(i)->removePredecessor(TI->getParent());
-            SI->removeCase(i);
-          }
+      DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+           << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
 
-        DEBUG(dbgs() << "Leaving: " << *TI << "\n");
-        return true;
-      }
+      EraseTerminatorInstAndDCECond(TI);
+      return true;
     }
-
-  } else {
-    // Otherwise, TI's block must correspond to some matched value.  Find out
-    // which value (or set of values) this is.
-    ConstantInt *TIV = 0;
-    BasicBlock *TIBB = TI->getParent();
+      
+    SwitchInst *SI = cast<SwitchInst>(TI);
+    // Okay, TI has cases that are statically dead, prune them away.
+    SmallPtrSet<Constant*, 16> DeadCases;
     for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
-      if (PredCases[i].second == TIBB) {
-        if (TIV == 0)
-          TIV = PredCases[i].first;
-        else
-          return false;  // Cannot handle multiple values coming to this block.
-      }
-    assert(TIV && "No edge from pred to succ?");
-
-    // Okay, we found the one constant that our value can be if we get into TI's
-    // BB.  Find out which successor will unconditionally be branched to.
-    BasicBlock *TheRealDest = 0;
-    for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
-      if (ThisCases[i].first == TIV) {
-        TheRealDest = ThisCases[i].second;
-        break;
+      DeadCases.insert(PredCases[i].first);
+
+    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+                 << "Through successor TI: " << *TI);
+
+    for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+      if (DeadCases.count(SI->getCaseValue(i))) {
+        SI->getSuccessor(i)->removePredecessor(TI->getParent());
+        SI->removeCase(i);
       }
 
-    // If not handled by any explicit cases, it is handled by the default case.
-    if (TheRealDest == 0) TheRealDest = ThisDef;
+    DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+    return true;
+  }
+  
+  // Otherwise, TI's block must correspond to some matched value.  Find out
+  // which value (or set of values) this is.
+  ConstantInt *TIV = 0;
+  BasicBlock *TIBB = TI->getParent();
+  for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+    if (PredCases[i].second == TIBB) {
+      if (TIV != 0)
+        return false;  // Cannot handle multiple values coming to this block.
+      TIV = PredCases[i].first;
+    }
+  assert(TIV && "No edge from pred to succ?");
+
+  // Okay, we found the one constant that our value can be if we get into TI's
+  // BB.  Find out which successor will unconditionally be branched to.
+  BasicBlock *TheRealDest = 0;
+  for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+    if (ThisCases[i].first == TIV) {
+      TheRealDest = ThisCases[i].second;
+      break;
+    }
 
-    // Remove PHI node entries for dead edges.
-    BasicBlock *CheckEdge = TheRealDest;
-    for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
-      if (*SI != CheckEdge)
-        (*SI)->removePredecessor(TIBB);
-      else
-        CheckEdge = 0;
+  // If not handled by any explicit cases, it is handled by the default case.
+  if (TheRealDest == 0) TheRealDest = ThisDef;
 
-    // Insert the new branch.
-    Instruction *NI = BranchInst::Create(TheRealDest, TI);
-    (void) NI;
+  // Remove PHI node entries for dead edges.
+  BasicBlock *CheckEdge = TheRealDest;
+  for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+    if (*SI != CheckEdge)
+      (*SI)->removePredecessor(TIBB);
+    else
+      CheckEdge = 0;
 
-    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
-              << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+  // Insert the new branch.
+  Instruction *NI = BranchInst::Create(TheRealDest, TI);
+  (void) NI;
 
-    EraseTerminatorInstAndDCECond(TI);
-    return true;
-  }
-  return false;
+  DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+            << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+  EraseTerminatorInstAndDCECond(TI);
+  return true;
 }
 
 namespace {
@@ -603,6 +622,16 @@ namespace {
   };
 }
 
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+  const ConstantInt *LHS = *(const ConstantInt**)P1;
+  const ConstantInt *RHS = *(const ConstantInt**)P2;
+  if (LHS->getValue().ult(RHS->getValue()))
+    return 1;
+  if (LHS->getValue() == RHS->getValue())
+    return 0;
+  return -1;
+}
+
 /// FoldValueComparisonIntoPredecessors - The specified terminator is a value
 /// equality comparison instruction (either a switch or a branch on "X == c").
 /// See if any of the predecessors of the terminator block are value comparisons
@@ -798,7 +827,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     if (!I2->use_empty())
       I2->replaceAllUsesWith(I1);
     I1->intersectOptionalDataWith(I2);
-    BB2->getInstList().erase(I2);
+    I2->eraseFromParent();
 
     I1 = BB1_Itr++;
     while (isa<DbgInfoIntrinsic>(I1))
@@ -836,18 +865,18 @@ HoistTerminator:
          (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
       Value *BB1V = PN->getIncomingValueForBlock(BB1);
       Value *BB2V = PN->getIncomingValueForBlock(BB2);
-      if (BB1V != BB2V) {
-        // These values do not agree.  Insert a select instruction before NT
-        // that determines the right value.
-        SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
-        if (SI == 0)
-          SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
-                                  BB1V->getName()+"."+BB2V->getName(), NT);
-        // Make the PHI node use the select for all incoming values for BB1/BB2
-        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-          if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
-            PN->setIncomingValue(i, SI);
-      }
+      if (BB1V == BB2V) continue;
+      
+      // These values do not agree.  Insert a select instruction before NT
+      // that determines the right value.
+      SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+      if (SI == 0)
+        SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
+                                BB1V->getName()+"."+BB2V->getName(), NT);
+      // Make the PHI node use the select for all incoming values for BB1/BB2
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+          PN->setIncomingValue(i, SI);
     }
   }
 
@@ -872,21 +901,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
        BBI != BBE; ++BBI) {
     Instruction *I = BBI;
     // Skip debug info.
-    if (isa<DbgInfoIntrinsic>(I))   continue;
-    if (I == Term)  break;
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+    if (I == Term) break;
 
-    if (!HInst)
-      HInst = I;
-    else
+    if (HInst)
       return false;
+    HInst = I;
   }
   if (!HInst)
     return false;
 
   // Be conservative for now. FP select instruction can often be expensive.
   Value *BrCond = BI->getCondition();
-  if (isa<Instruction>(BrCond) &&
-      cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp)
+  if (isa<FCmpInst>(BrCond))
     return false;
 
   // If BB1 is actually on the false edge of the conditional branch, remember
@@ -990,12 +1017,12 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
     for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
         UI != UE; ++UI) {
       Instruction *Use = cast<Instruction>(*UI);
-      if (BB1Insns.count(Use)) {
-        // If BrCond uses the instruction that place it just before
-        // branch instruction.
-        InsertPos = BI;
-        break;
-      }
+      if (!BB1Insns.count(Use)) continue;
+      
+      // If BrCond uses the instruction that place it just before
+      // branch instruction.
+      InsertPos = BI;
+      break;
     }
   } else
     InsertPos = BI;
@@ -1016,8 +1043,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
     PHINode *PN = PHIUses[i];
     for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
-      if (PN->getIncomingBlock(j) == BB1 ||
-          PN->getIncomingBlock(j) == BIParent)
+      if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
         PN->setIncomingValue(j, SI);
   }
 
@@ -1055,7 +1081,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
 /// that is defined in the same block as the branch and if any PHI entries are
 /// constants, thread edges corresponding to that entry to be branches to their
 /// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
   BasicBlock *BB = BI->getParent();
   PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
   // NOTE: we currently cannot transform this case if the PHI node is used
@@ -1075,78 +1101,73 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   // Okay, this is a simple enough basic block.  See if any phi values are
   // constants.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    ConstantInt *CB;
-    if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType()->isIntegerTy(1)) {
-      // Okay, we now know that all edges from PredBB should be revectored to
-      // branch to RealDest.
-      BasicBlock *PredBB = PN->getIncomingBlock(i);
-      BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+    ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+    if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+    
+    // Okay, we now know that all edges from PredBB should be revectored to
+    // branch to RealDest.
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+    
+    if (RealDest == BB) continue;  // Skip self loops.
+    
+    // The dest block might have PHI nodes, other predecessors and other
+    // difficult cases.  Instead of being smart about this, just insert a new
+    // block that jumps to the destination block, effectively splitting
+    // the edge we are about to create.
+    BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                            RealDest->getName()+".critedge",
+                                            RealDest->getParent(), RealDest);
+    BranchInst::Create(RealDest, EdgeBB);
+    
+    // Update PHI nodes.
+    AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+    // BB may have instructions that are being threaded over.  Clone these
+    // instructions into EdgeBB.  We know that there will be no uses of the
+    // cloned instructions outside of EdgeBB.
+    BasicBlock::iterator InsertPt = EdgeBB->begin();
+    DenseMap<Value*, Value*> TranslateMap;  // Track translated values.
+    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+        TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+        continue;
+      }
+      // Clone the instruction.
+      Instruction *N = BBI->clone();
+      if (BBI->hasName()) N->setName(BBI->getName()+".c");
       
-      if (RealDest == BB) continue;  // Skip self loops.
+      // Update operands due to translation.
+      for (User::op_iterator i = N->op_begin(), e = N->op_end();
+           i != e; ++i) {
+        DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+        if (PI != TranslateMap.end())
+          *i = PI->second;
+      }
       
-      // The dest block might have PHI nodes, other predecessors and other
-      // difficult cases.  Instead of being smart about this, just insert a new
-      // block that jumps to the destination block, effectively splitting
-      // the edge we are about to create.
-      BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
-                                              RealDest->getName()+".critedge",
-                                              RealDest->getParent(), RealDest);
-      BranchInst::Create(RealDest, EdgeBB);
-      PHINode *PN;
-      for (BasicBlock::iterator BBI = RealDest->begin();
-           (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
-        Value *V = PN->getIncomingValueForBlock(BB);
-        PN->addIncoming(V, EdgeBB);
+      // Check for trivial simplification.
+      if (Value *V = SimplifyInstruction(N, TD)) {
+        TranslateMap[BBI] = V;
+        delete N;   // Instruction folded away, don't need actual inst
+      } else {
+        // Insert the new instruction into its new home.
+        EdgeBB->getInstList().insert(InsertPt, N);
+        if (!BBI->use_empty())
+          TranslateMap[BBI] = N;
       }
+    }
 
-      // BB may have instructions that are being threaded over.  Clone these
-      // instructions into EdgeBB.  We know that there will be no uses of the
-      // cloned instructions outside of EdgeBB.
-      BasicBlock::iterator InsertPt = EdgeBB->begin();
-      std::map<Value*, Value*> TranslateMap;  // Track translated values.
-      for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
-        if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
-          TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
-        } else {
-          // Clone the instruction.
-          Instruction *N = BBI->clone();
-          if (BBI->hasName()) N->setName(BBI->getName()+".c");
-          
-          // Update operands due to translation.
-          for (User::op_iterator i = N->op_begin(), e = N->op_end();
-               i != e; ++i) {
-            std::map<Value*, Value*>::iterator PI =
-              TranslateMap.find(*i);
-            if (PI != TranslateMap.end())
-              *i = PI->second;
-          }
-          
-          // Check for trivial simplification.
-          if (Constant *C = ConstantFoldInstruction(N)) {
-            TranslateMap[BBI] = C;
-            delete N;   // Constant folded away, don't need actual inst
-          } else {
-            // Insert the new instruction into its new home.
-            EdgeBB->getInstList().insert(InsertPt, N);
-            if (!BBI->use_empty())
-              TranslateMap[BBI] = N;
-          }
-        }
+    // Loop over all of the edges from PredBB to BB, changing them to branch
+    // to EdgeBB instead.
+    TerminatorInst *PredBBTI = PredBB->getTerminator();
+    for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+      if (PredBBTI->getSuccessor(i) == BB) {
+        BB->removePredecessor(PredBB);
+        PredBBTI->setSuccessor(i, EdgeBB);
       }
-
-      // Loop over all of the edges from PredBB to BB, changing them to branch
-      // to EdgeBB instead.
-      TerminatorInst *PredBBTI = PredBB->getTerminator();
-      for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
-        if (PredBBTI->getSuccessor(i) == BB) {
-          BB->removePredecessor(PredBB);
-          PredBBTI->setSuccessor(i, EdgeBB);
-        }
-      
-      // Recurse, simplifying any other constants.
-      return FoldCondBranchOnPHI(BI) | true;
-    }
+    
+    // Recurse, simplifying any other constants.
+    return FoldCondBranchOnPHI(BI, TD) | true;
   }
 
   return false;
@@ -1154,18 +1175,20 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
 
 /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
 /// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
   // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
   // statement", which has a very simple dominance structure.  Basically, we
   // are trying to find the condition that is being branched on, which
   // subsequently causes this merge to happen.  We really want control
   // dependence information for this check, but simplifycfg can't keep it up
   // to date, and this catches most of the cases we care about anyway.
-  //
   BasicBlock *BB = PN->getParent();
   BasicBlock *IfTrue, *IfFalse;
   Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
-  if (!IfCond) return false;
+  if (!IfCond ||
+      // Don't bother if the branch will be constant folded trivially.
+      isa<ConstantInt>(IfCond))
+    return false;
   
   // Okay, we found that we can merge this two-entry phi node into a select.
   // Doing so would require us to fold *all* two entry phi nodes in this block.
@@ -1177,42 +1200,49 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
     if (NumPhis > 2)
       return false;
   
-  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
-        << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
-  
   // Loop over the PHI's seeing if we can promote them all to select
   // instructions.  While we are at it, keep track of the instructions
   // that need to be moved to the dominating block.
-  std::set<Instruction*> AggressiveInsts;
-  
-  BasicBlock::iterator AfterPHIIt = BB->begin();
-  while (isa<PHINode>(AfterPHIIt)) {
-    PHINode *PN = cast<PHINode>(AfterPHIIt++);
-    if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) {
-      if (PN->getIncomingValue(0) != PN)
-        PN->replaceAllUsesWith(PN->getIncomingValue(0));
-      else
-        PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-    } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB,
-                                    &AggressiveInsts) ||
-               !DominatesMergePoint(PN->getIncomingValue(1), BB,
-                                    &AggressiveInsts)) {
-      return false;
+  SmallPtrSet<Instruction*, 4> AggressiveInsts;
+  
+  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+    PHINode *PN = cast<PHINode>(II++);
+    if (Value *V = SimplifyInstruction(PN, TD)) {
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+      continue;
     }
+    
+    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) ||
+        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts))
+      return false;
   }
   
+  // If we folded the the first phi, PN dangles at this point.  Refresh it.  If
+  // we ran out of PHIs then we simplified them all.
+  PN = dyn_cast<PHINode>(BB->begin());
+  if (PN == 0) return true;
+  
+  // Don't fold i1 branches on PHIs which contain binary operators.  These can
+  // often be turned into switches and other things.
+  if (PN->getType()->isIntegerTy(1) &&
+      (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+       isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+       isa<BinaryOperator>(IfCond)))
+    return false;
+  
   // If we all PHI nodes are promotable, check to make sure that all
   // instructions in the predecessor blocks can be promoted as well.  If
   // not, we won't be able to get rid of the control flow, so it's not
   // worth promoting to select instructions.
-  BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0;
-  PN = cast<PHINode>(BB->begin());
-  BasicBlock *Pred = PN->getIncomingBlock(0);
-  if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
-    IfBlock1 = Pred;
-    DomBlock = *pred_begin(Pred);
-    for (BasicBlock::iterator I = Pred->begin();
-         !isa<TerminatorInst>(I); ++I)
+  BasicBlock *DomBlock = 0;
+  BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+  BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+  if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+    IfBlock1 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock1);
+    for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
       if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control
@@ -1221,12 +1251,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
       }
   }
     
-  Pred = PN->getIncomingBlock(1);
-  if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
-    IfBlock2 = Pred;
-    DomBlock = *pred_begin(Pred);
-    for (BasicBlock::iterator I = Pred->begin();
-         !isa<TerminatorInst>(I); ++I)
+  if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+    IfBlock2 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock2);
+    for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
       if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control
@@ -1234,56 +1263,45 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
         return false;
       }
   }
+  
+  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+               << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
       
   // If we can still promote the PHI nodes after this gauntlet of tests,
   // do all of the PHI's now.
-
+  Instruction *InsertPt = DomBlock->getTerminator();
+  
   // Move all 'aggressive' instructions, which are defined in the
   // conditional parts of the if's up to the dominating block.
-  if (IfBlock1) {
-    DomBlock->getInstList().splice(DomBlock->getTerminator(),
-                                   IfBlock1->getInstList(),
-                                   IfBlock1->begin(),
+  if (IfBlock1)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock1->getInstList(), IfBlock1->begin(),
                                    IfBlock1->getTerminator());
-  }
-  if (IfBlock2) {
-    DomBlock->getInstList().splice(DomBlock->getTerminator(),
-                                   IfBlock2->getInstList(),
-                                   IfBlock2->begin(),
+  if (IfBlock2)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock2->getInstList(), IfBlock2->begin(),
                                    IfBlock2->getTerminator());
-  }
   
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     // Change the PHI node into a select instruction.
-    Value *TrueVal =
-      PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
-    Value *FalseVal =
-      PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+    Value *TrueVal  = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+    Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
     
-    Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt);
+    Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", InsertPt);
     PN->replaceAllUsesWith(NV);
     NV->takeName(PN);
-    
-    BB->getInstList().erase(PN);
+    PN->eraseFromParent();
   }
+  
+  // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+  // has been flattened.  Change DomBlock to jump directly to our new block to
+  // avoid other simplifycfg's kicking in on the diamond.
+  TerminatorInst *OldTI = DomBlock->getTerminator();
+  BranchInst::Create(BB, OldTI);
+  OldTI->eraseFromParent();
   return true;
 }
 
-/// isTerminatorFirstRelevantInsn - Return true if Term is very first 
-/// instruction ignoring Phi nodes and dbg intrinsics.
-static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) {
-  BasicBlock::iterator BBI = Term;
-  while (BBI != BB->begin()) {
-    --BBI;
-    if (!isa<DbgInfoIntrinsic>(BBI))
-      break;
-  }
-
-  if (isa<PHINode>(BBI) || &*BBI == Term || isa<DbgInfoIntrinsic>(BBI))
-    return true;
-  return false;
-}
-
 /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
 /// to two returning blocks, try to merge them together into one return,
 /// introducing a select if the return values disagree.
@@ -1297,9 +1315,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
   // Check to ensure both blocks are empty (just a return) or optionally empty
   // with PHI nodes.  If there are other instructions, merging would cause extra
   // computation on one path or the other.
-  if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet))
+  if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
     return false;
-  if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet))
+  if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
     return false;
 
   // Okay, we found a branch that is going to two return nodes.  If
@@ -1386,7 +1404,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   // must be at the front of the block.
   BasicBlock::iterator FrontIt = BB->front();
   // Ignore dbg intrinsics.
-  while(isa<DbgInfoIntrinsic>(FrontIt))
+  while (isa<DbgInfoIntrinsic>(FrontIt))
     ++FrontIt;
     
   // Allow a single instruction to be hoisted in addition to the compare
@@ -1470,7 +1488,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
         UsedValues.erase(Pair.first);
         if (UsedValues.empty()) break;
         
-        if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+        if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
           for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
                OI != OE; ++OI)
             Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
@@ -1498,9 +1516,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     
     // If we need to invert the condition in the pred block to match, do so now.
     if (InvertPredCond) {
-      Value *NewCond =
-        BinaryOperator::CreateNot(PBI->getCondition(),
+      Value *NewCond = PBI->getCondition();
+      
+      if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+        CmpInst *CI = cast<CmpInst>(NewCond);
+        CI->setPredicate(CI->getInversePredicate());
+      } else {
+        NewCond = BinaryOperator::CreateNot(NewCond,
                                   PBI->getCondition()->getName()+".not", PBI);
+      }
+      
       PBI->setCondition(NewCond);
       BasicBlock *OldTrue = PBI->getSuccessor(0);
       BasicBlock *OldFalse = PBI->getSuccessor(1);
@@ -1686,17 +1711,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   
   // OtherDest may have phi nodes.  If so, add an entry from PBI's
   // block that are identical to the entries for BI's block.
-  PHINode *PN;
-  for (BasicBlock::iterator II = OtherDest->begin();
-       (PN = dyn_cast<PHINode>(II)); ++II) {
-    Value *V = PN->getIncomingValueForBlock(BB);
-    PN->addIncoming(V, PBI->getParent());
-  }
+  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
   
   // We know that the CommonDest already had an edge from PBI to
   // it.  If it has PHIs though, the PHIs may have different
   // entries for BB and PBI's BB.  If so, insert a select to make
   // them agree.
+  PHINode *PN;
   for (BasicBlock::iterator II = CommonDest->begin();
        (PN = dyn_cast<PHINode>(II)); ++II) {
     Value *BIV = PN->getIncomingValueForBlock(BB);
@@ -1718,481 +1739,789 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   return true;
 }
 
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
-  bool Changed = false;
-  Function *M = BB->getParent();
-
-  assert(BB && BB->getParent() && "Block not embedded in function!");
-  assert(BB->getTerminator() && "Degenerate basic block encountered!");
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+                                       BasicBlock *TrueBB, BasicBlock *FalseBB){
+  // Remove any superfluous successor edges from the CFG.
+  // First, figure out which successors to preserve.
+  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+  // successor.
+  BasicBlock *KeepEdge1 = TrueBB;
+  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+  // Then remove the rest.
+  for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+    BasicBlock *Succ = OldTerm->getSuccessor(I);
+    // Make sure only to keep exactly one copy of each edge.
+    if (Succ == KeepEdge1)
+      KeepEdge1 = 0;
+    else if (Succ == KeepEdge2)
+      KeepEdge2 = 0;
+    else
+      Succ->removePredecessor(OldTerm->getParent());
+  }
 
-  // Remove basic blocks that have no predecessors (except the entry block)...
-  // or that just have themself as a predecessor.  These are unreachable.
-  if ((pred_begin(BB) == pred_end(BB) &&
-       &BB->getParent()->getEntryBlock() != BB) ||
-      BB->getSinglePredecessor() == BB) {
-    DEBUG(dbgs() << "Removing BB: \n" << *BB);
-    DeleteDeadBlock(BB);
-    return true;
+  // Insert an appropriate new terminator.
+  if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+    if (TrueBB == FalseBB)
+      // We were only looking for one successor, and it was present.
+      // Create an unconditional branch to it.
+      BranchInst::Create(TrueBB, OldTerm);
+    else
+      // We found both of the successors we were looking for.
+      // Create a conditional branch sharing the condition of the select.
+      BranchInst::Create(TrueBB, FalseBB, Cond, OldTerm);
+  } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+    // Neither of the selected blocks were successors, so this
+    // terminator must be unreachable.
+    new UnreachableInst(OldTerm->getContext(), OldTerm);
+  } else {
+    // One of the selected values was a successor, but the other wasn't.
+    // Insert an unconditional branch to the one that was found;
+    // the edge to the one that wasn't must be unreachable.
+    if (KeepEdge1 == 0)
+      // Only TrueBB was found.
+      BranchInst::Create(TrueBB, OldTerm);
+    else
+      // Only FalseBB was found.
+      BranchInst::Create(FalseBB, OldTerm);
   }
 
-  // Check to see if we can constant propagate this terminator instruction
-  // away...
-  Changed |= ConstantFoldTerminator(BB);
+  EraseTerminatorInstAndDCECond(OldTerm);
+  return true;
+}
 
-  // Check for and eliminate duplicate PHI nodes in this block.
-  Changed |= EliminateDuplicatePHINodes(BB);
+// SimplifyIndirectBrOnSelect - Replaces
+//   (indirectbr (select cond, blockaddress(@fn, BlockA),
+//                             blockaddress(@fn, BlockB)))
+// with
+//   (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+  // Check that both operands of the select are block addresses.
+  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+  if (!TBA || !FBA)
+    return false;
 
-  // If there is a trivial two-entry PHI node in this basic block, and we can
-  // eliminate it, do so now.
-  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
-    if (PN->getNumIncomingValues() == 2)
-      Changed |= FoldTwoEntryPHINode(PN); 
+  // Extract the actual blocks.
+  BasicBlock *TrueBB = TBA->getBasicBlock();
+  BasicBlock *FalseBB = FBA->getBasicBlock();
 
-  // If this is a returning block with only PHI nodes in it, fold the return
-  // instruction into any unconditional branch predecessors.
-  //
-  // If any predecessor is a conditional branch that just selects among
-  // different return values, fold the replace the branch/return with a select
-  // and return.
-  if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
-    if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) {
-      // Find predecessors that end with branches.
-      SmallVector<BasicBlock*, 8> UncondBranchPreds;
-      SmallVector<BranchInst*, 8> CondBranchPreds;
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-        BasicBlock *P = *PI;
-        TerminatorInst *PTI = P->getTerminator();
-        if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
-          if (BI->isUnconditional())
-            UncondBranchPreds.push_back(P);
-          else
-            CondBranchPreds.push_back(BI);
-        }
-      }
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
 
-      // If we found some, do the transformation!
-      if (!UncondBranchPreds.empty()) {
-        while (!UncondBranchPreds.empty()) {
-          BasicBlock *Pred = UncondBranchPreds.pop_back_val();
-          DEBUG(dbgs() << "FOLDING: " << *BB
-                       << "INTO UNCOND BRANCH PRED: " << *Pred);
-          Instruction *UncondBranch = Pred->getTerminator();
-          // Clone the return and add it to the end of the predecessor.
-          Instruction *NewRet = RI->clone();
-          Pred->getInstList().push_back(NewRet);
-
-          // If the return instruction returns a value, and if the value was a
-          // PHI node in "BB", propagate the right value into the return.
-          for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
-               i != e; ++i)
-            if (PHINode *PN = dyn_cast<PHINode>(*i))
-              if (PN->getParent() == BB)
-                *i = PN->getIncomingValueForBlock(Pred);
-          
-          // Update any PHI nodes in the returning block to realize that we no
-          // longer branch to them.
-          BB->removePredecessor(Pred);
-          Pred->getInstList().erase(UncondBranch);
-        }
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch.  We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
+/// DEFAULT:
+///   %tmp = icmp eq i8 %A, 92
+///   br label %end
+/// end:
+///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+/// 
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+                                                  const TargetData *TD) {
+  BasicBlock *BB = ICI->getParent();
+  // If the block has any PHIs in it or the icmp has multiple uses, it is too
+  // complex.
+  if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+  Value *V = ICI->getOperand(0);
+  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+  
+  // The pattern we're looking for is where our only predecessor is a switch on
+  // 'V' and this block is the default case for the switch.  In this case we can
+  // fold the compared value into the switch to simplify things.
+  BasicBlock *Pred = BB->getSinglePredecessor();
+  if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+  
+  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+  if (SI->getCondition() != V)
+    return false;
+  
+  // If BB is reachable on a non-default case, then we simply know the value of
+  // V in this block.  Substitute it and constant fold the icmp instruction
+  // away.
+  if (SI->getDefaultDest() != BB) {
+    ConstantInt *VVal = SI->findCaseDest(BB);
+    assert(VVal && "Should have a unique destination value");
+    ICI->setOperand(0, VVal);
+    
+    if (Value *V = SimplifyInstruction(ICI, TD)) {
+      ICI->replaceAllUsesWith(V);
+      ICI->eraseFromParent();
+    }
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // Ok, the block is reachable from the default dest.  If the constant we're
+  // comparing exists in one of the other edges, then we can constant fold ICI
+  // and zap it.
+  if (SI->findCaseValue(Cst) != 0) {
+    Value *V;
+    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+      V = ConstantInt::getFalse(BB->getContext());
+    else
+      V = ConstantInt::getTrue(BB->getContext());
+    
+    ICI->replaceAllUsesWith(V);
+    ICI->eraseFromParent();
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // The use of the icmp has to be in the 'end' block, by the only PHI node in
+  // the block.
+  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+  PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+  if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+    return false;
 
-        // If we eliminated all predecessors of the block, delete the block now.
-        if (pred_begin(BB) == pred_end(BB))
-          // We know there are no successors, so just nuke the block.
-          M->getBasicBlockList().erase(BB);
+  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+  // true in the PHI.
+  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+  Constant *NewCst     = ConstantInt::getFalse(BB->getContext());
 
-        return true;
-      }
+  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+    std::swap(DefaultCst, NewCst);
 
-      // Check out all of the conditional branches going to this return
-      // instruction.  If any of them just select between returns, change the
-      // branch itself into a select/return pair.
-      while (!CondBranchPreds.empty()) {
-        BranchInst *BI = CondBranchPreds.pop_back_val();
-
-        // Check to see if the non-BB successor is also a return block.
-        if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
-            isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
-            SimplifyCondBranchToTwoReturns(BI))
-          return true;
-      }
-    }
-  } else if (isa<UnwindInst>(BB->begin())) {
-    // Check to see if the first instruction in this block is just an unwind.
-    // If so, replace any invoke instructions which use this as an exception
-    // destination with call instructions.
-    //
-    SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
-    while (!Preds.empty()) {
-      BasicBlock *Pred = Preds.back();
-      if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
-        if (II->getUnwindDest() == BB) {
-          // Insert a new branch instruction before the invoke, because this
-          // is now a fall through.
-          BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
-          Pred->getInstList().remove(II);   // Take out of symbol table
-
-          // Insert the call now.
-          SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
-          CallInst *CI = CallInst::Create(II->getCalledValue(),
-                                          Args.begin(), Args.end(),
-                                          II->getName(), BI);
-          CI->setCallingConv(II->getCallingConv());
-          CI->setAttributes(II->getAttributes());
-          // If the invoke produced a value, the Call now does instead.
-          II->replaceAllUsesWith(CI);
-          delete II;
-          Changed = true;
-        }
+  // Replace ICI (which is used by the PHI for the default value) with true or
+  // false depending on if it is EQ or NE.
+  ICI->replaceAllUsesWith(DefaultCst);
+  ICI->eraseFromParent();
 
-      Preds.pop_back();
-    }
+  // Okay, the switch goes to this block on a default value.  Add an edge from
+  // the switch to the merge point on the compared value.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+                                         BB->getParent(), BB);
+  SI->addCase(Cst, NewBB);
+  
+  // NewBB branches to the phi block, add the uncond branch and the phi entry.
+  BranchInst::Create(SuccBlock, NewBB);
+  PHIUse->addIncoming(NewCst, NewBB);
+  return true;
+}
 
-    // If this block is now dead, remove it.
-    if (pred_begin(BB) == pred_end(BB)) {
-      // We know there are no successors, so just nuke the block.
-      M->getBasicBlockList().erase(BB);
-      return true;
-    }
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) {
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0) return false;
+  
+  
+  // Change br (X == 0 | X == 1), T, F into a switch instruction.
+  // If this is a bunch of seteq's or'd together, or if it's a bunch of
+  // 'setne's and'ed together, collect them.
+  Value *CompVal = 0;
+  std::vector<ConstantInt*> Values;
+  bool TrueWhenEqual = true;
+  Value *ExtraCase = 0;
+  unsigned UsedICmps = 0;
+  
+  if (Cond->getOpcode() == Instruction::Or) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+                                     UsedICmps);
+  } else if (Cond->getOpcode() == Instruction::And) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+                                     UsedICmps);
+    TrueWhenEqual = false;
+  }
+  
+  // If we didn't have a multiply compared value, fail.
+  if (CompVal == 0) return false;
 
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
-    if (isValueEqualityComparison(SI)) {
-      // If we only have one predecessor, and if it is a branch on this value,
-      // see if that predecessor totally determines the outcome of this switch.
-      if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
-        if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
-          return SimplifyCFG(BB) || 1;
-
-      // If the block only contains the switch, see if we can fold the block
-      // away into any preds.
-      BasicBlock::iterator BBI = BB->begin();
-      // Ignore dbg intrinsics.
-      while (isa<DbgInfoIntrinsic>(BBI))
-        ++BBI;
-      if (SI == &*BBI)
-        if (FoldValueComparisonIntoPredecessors(SI))
-          return SimplifyCFG(BB) || 1;
-    }
-  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-    if (BI->isUnconditional()) {
-      BasicBlock::iterator BBI = BB->getFirstNonPHI();
+  // Avoid turning single icmps into a switch.
+  if (UsedICmps <= 1)
+    return false;
 
-      // Ignore dbg intrinsics.
-      while (isa<DbgInfoIntrinsic>(BBI))
-        ++BBI;
-      if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
-        if (BB != &BB->getParent()->getEntryBlock())
-          if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
-            return true;
+  // There might be duplicate constants in the list, which the switch
+  // instruction can't handle, remove them now.
+  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+  
+  // If Extra was used, we require at least two switch values to do the
+  // transformation.  A switch with one value is just an cond branch.
+  if (ExtraCase && Values.size() < 2) return false;
+  
+  // Figure out which block is which destination.
+  BasicBlock *DefaultBB = BI->getSuccessor(1);
+  BasicBlock *EdgeBB    = BI->getSuccessor(0);
+  if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+  
+  BasicBlock *BB = BI->getParent();
+  
+  DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+               << " cases into SWITCH.  BB is:\n" << *BB);
+  
+  // If there are any extra values that couldn't be folded into the switch
+  // then we evaluate them with an explicit branch first.  Split the block
+  // right before the condbr to handle it.
+  if (ExtraCase) {
+    BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+    // Remove the uncond branch added to the old block.
+    TerminatorInst *OldTI = BB->getTerminator();
+    
+    if (TrueWhenEqual)
+      BranchInst::Create(EdgeBB, NewBB, ExtraCase, OldTI);
+    else
+      BranchInst::Create(NewBB, EdgeBB, ExtraCase, OldTI);
       
-    } else {  // Conditional branch
-      if (isValueEqualityComparison(BI)) {
-        // If we only have one predecessor, and if it is a branch on this value,
-        // see if that predecessor totally determines the outcome of this
-        // switch.
-        if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
-          if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
-            return SimplifyCFG(BB) | true;
-
-        // This block must be empty, except for the setcond inst, if it exists.
-        // Ignore dbg intrinsics.
-        BasicBlock::iterator I = BB->begin();
-        // Ignore dbg intrinsics.
-        while (isa<DbgInfoIntrinsic>(I))
-          ++I;
-        if (&*I == BI) {
-          if (FoldValueComparisonIntoPredecessors(BI))
-            return SimplifyCFG(BB) | true;
-        } else if (&*I == cast<Instruction>(BI->getCondition())){
-          ++I;
-          // Ignore dbg intrinsics.
-          while (isa<DbgInfoIntrinsic>(I))
-            ++I;
-          if(&*I == BI) {
-            if (FoldValueComparisonIntoPredecessors(BI))
-              return SimplifyCFG(BB) | true;
-          }
-        }
-      }
+    OldTI->eraseFromParent();
+    
+    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+    // for the edge we just added.
+    AddPredecessorToBlock(EdgeBB, BB, NewBB);
+    
+    DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
+          << "\nEXTRABB = " << *BB);
+    BB = NewBB;
+  }
+  
+  // Convert pointer to int before we switch.
+  if (CompVal->getType()->isPointerTy()) {
+    assert(TD && "Cannot switch on pointer without TargetData");
+    CompVal = new PtrToIntInst(CompVal,
+                               TD->getIntPtrType(CompVal->getContext()),
+                               "magicptr", BI);
+  }
+  
+  // Create the new switch instruction now.
+  SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI);
+  
+  // Add all of the 'cases' to the switch instruction.
+  for (unsigned i = 0, e = Values.size(); i != e; ++i)
+    New->addCase(Values[i], EdgeBB);
+  
+  // We added edges from PI to the EdgeBB.  As such, if there were any
+  // PHI nodes in EdgeBB, they need entries to be added corresponding to
+  // the number of edges added.
+  for (BasicBlock::iterator BBI = EdgeBB->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    PHINode *PN = cast<PHINode>(BBI);
+    Value *InVal = PN->getIncomingValueForBlock(BB);
+    for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+      PN->addIncoming(InVal, BB);
+  }
+  
+  // Erase the old branch instruction.
+  EraseTerminatorInstAndDCECond(BI);
+  
+  DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
+  return true;
+}
 
-      // If this is a branch on a phi node in the current block, thread control
-      // through this block if any PHI node entries are constants.
-      if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
-        if (PN->getParent() == BI->getParent())
-          if (FoldCondBranchOnPHI(BI))
-            return SimplifyCFG(BB) | true;
-
-      // If this basic block is ONLY a setcc and a branch, and if a predecessor
-      // branches to us and one of our successors, fold the setcc into the
-      // predecessor and use logical operations to pick the right destination.
-      if (FoldBranchToCommonDest(BI))
-        return SimplifyCFG(BB) | true;
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) {
+  BasicBlock *BB = RI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+  
+  // Find predecessors that end with branches.
+  SmallVector<BasicBlock*, 8> UncondBranchPreds;
+  SmallVector<BranchInst*, 8> CondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *P = *PI;
+    TerminatorInst *PTI = P->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(P);
+      else
+        CondBranchPreds.push_back(BI);
+    }
+  }
+  
+  // If we found some, do the transformation!
+  if (!UncondBranchPreds.empty() && DupRet) {
+    while (!UncondBranchPreds.empty()) {
+      BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+    }
+    
+    // If we eliminated all predecessors of the block, delete the block now.
+    if (pred_begin(BB) == pred_end(BB))
+      // We know there are no successors, so just nuke the block.
+      BB->eraseFromParent();
+    
+    return true;
+  }
+  
+  // Check out all of the conditional branches going to this return
+  // instruction.  If any of them just select between returns, change the
+  // branch itself into a select/return pair.
+  while (!CondBranchPreds.empty()) {
+    BranchInst *BI = CondBranchPreds.pop_back_val();
+    
+    // Check to see if the non-BB successor is also a return block.
+    if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+        isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+        SimplifyCondBranchToTwoReturns(BI))
+      return true;
+  }
+  return false;
+}
 
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) {
+  // Check to see if the first instruction in this block is just an unwind.
+  // If so, replace any invoke instructions which use this as an exception
+  // destination with call instructions.
+  BasicBlock *BB = UI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
 
-      // Scan predecessor blocks for conditional branches.
-      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-        if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
-          if (PBI != BI && PBI->isConditional())
-            if (SimplifyCondBranchToCondBranch(PBI, BI))
-              return SimplifyCFG(BB) | true;
-    }
-  } else if (isa<UnreachableInst>(BB->getTerminator())) {
-    // If there are any instructions immediately before the unreachable that can
-    // be removed, do so.
-    Instruction *Unreachable = BB->getTerminator();
-    while (Unreachable != BB->begin()) {
-      BasicBlock::iterator BBI = Unreachable;
-      --BBI;
-      // Do not delete instructions that can have side effects, like calls
-      // (which may never return) and volatile loads and stores.
-      if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
-
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
-        if (SI->isVolatile())
-          break;
-
-      if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
-        if (LI->isVolatile())
-          break;
-
-      // Delete this instruction
-      BB->getInstList().erase(BBI);
+  bool Changed = false;
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.back();
+    InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+    if (II && II->getUnwindDest() == BB) {
+      // Insert a new branch instruction before the invoke, because this
+      // is now a fall through.
+      BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+      Pred->getInstList().remove(II);   // Take out of symbol table
+      
+      // Insert the call now.
+      SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+      CallInst *CI = CallInst::Create(II->getCalledValue(),
+                                      Args.begin(), Args.end(),
+                                      II->getName(), BI);
+      CI->setCallingConv(II->getCallingConv());
+      CI->setAttributes(II->getAttributes());
+      // If the invoke produced a value, the Call now does instead.
+      II->replaceAllUsesWith(CI);
+      delete II;
       Changed = true;
     }
+    
+    Preds.pop_back();
+  }
+  
+  // If this block is now dead (and isn't the entry block), remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+  
+  return Changed;  
+}
 
-    // If the unreachable instruction is the first in the block, take a gander
-    // at all of the predecessors of this instruction, and simplify them.
-    if (&BB->front() == Unreachable) {
-      SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
-      for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
-        TerminatorInst *TI = Preds[i]->getTerminator();
-
-        if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-          if (BI->isUnconditional()) {
-            if (BI->getSuccessor(0) == BB) {
-              new UnreachableInst(TI->getContext(), TI);
-              TI->eraseFromParent();
-              Changed = true;
-            }
-          } else {
-            if (BI->getSuccessor(0) == BB) {
-              BranchInst::Create(BI->getSuccessor(1), BI);
-              EraseTerminatorInstAndDCECond(BI);
-            } else if (BI->getSuccessor(1) == BB) {
-              BranchInst::Create(BI->getSuccessor(0), BI);
-              EraseTerminatorInstAndDCECond(BI);
-              Changed = true;
-            }
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+  BasicBlock *BB = UI->getParent();
+  
+  bool Changed = false;
+  
+  // If there are any instructions immediately before the unreachable that can
+  // be removed, do so.
+  while (UI != BB->begin()) {
+    BasicBlock::iterator BBI = UI;
+    --BBI;
+    // Do not delete instructions that can have side effects, like calls
+    // (which may never return) and volatile loads and stores.
+    if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+      if (SI->isVolatile())
+        break;
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+      if (LI->isVolatile())
+        break;
+    
+    // Delete this instruction
+    BBI->eraseFromParent();
+    Changed = true;
+  }
+  
+  // If the unreachable instruction is the first in the block, take a gander
+  // at all of the predecessors of this instruction, and simplify them.
+  if (&BB->front() != UI) return Changed;
+  
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    TerminatorInst *TI = Preds[i]->getTerminator();
+    
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isUnconditional()) {
+        if (BI->getSuccessor(0) == BB) {
+          new UnreachableInst(TI->getContext(), TI);
+          TI->eraseFromParent();
+          Changed = true;
+        }
+      } else {
+        if (BI->getSuccessor(0) == BB) {
+          BranchInst::Create(BI->getSuccessor(1), BI);
+          EraseTerminatorInstAndDCECond(BI);
+        } else if (BI->getSuccessor(1) == BB) {
+          BranchInst::Create(BI->getSuccessor(0), BI);
+          EraseTerminatorInstAndDCECond(BI);
+          Changed = true;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+        if (SI->getSuccessor(i) == BB) {
+          BB->removePredecessor(SI->getParent());
+          SI->removeCase(i);
+          --i; --e;
+          Changed = true;
+        }
+      // If the default value is unreachable, figure out the most popular
+      // destination and make it the default.
+      if (SI->getSuccessor(0) == BB) {
+        std::map<BasicBlock*, unsigned> Popularity;
+        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+          Popularity[SI->getSuccessor(i)]++;
+        
+        // Find the most popular block.
+        unsigned MaxPop = 0;
+        BasicBlock *MaxBlock = 0;
+        for (std::map<BasicBlock*, unsigned>::iterator
+             I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+          if (I->second > MaxPop) {
+            MaxPop = I->second;
+            MaxBlock = I->first;
           }
-        } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+        }
+        if (MaxBlock) {
+          // Make this the new default, allowing us to delete any explicit
+          // edges to it.
+          SI->setSuccessor(0, MaxBlock);
+          Changed = true;
+          
+          // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+          // it.
+          if (isa<PHINode>(MaxBlock->begin()))
+            for (unsigned i = 0; i != MaxPop-1; ++i)
+              MaxBlock->removePredecessor(SI->getParent());
+          
           for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-            if (SI->getSuccessor(i) == BB) {
-              BB->removePredecessor(SI->getParent());
+            if (SI->getSuccessor(i) == MaxBlock) {
               SI->removeCase(i);
               --i; --e;
-              Changed = true;
-            }
-          // If the default value is unreachable, figure out the most popular
-          // destination and make it the default.
-          if (SI->getSuccessor(0) == BB) {
-            std::map<BasicBlock*, unsigned> Popularity;
-            for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-              Popularity[SI->getSuccessor(i)]++;
-
-            // Find the most popular block.
-            unsigned MaxPop = 0;
-            BasicBlock *MaxBlock = 0;
-            for (std::map<BasicBlock*, unsigned>::iterator
-                   I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
-              if (I->second > MaxPop) {
-                MaxPop = I->second;
-                MaxBlock = I->first;
-              }
-            }
-            if (MaxBlock) {
-              // Make this the new default, allowing us to delete any explicit
-              // edges to it.
-              SI->setSuccessor(0, MaxBlock);
-              Changed = true;
-
-              // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
-              // it.
-              if (isa<PHINode>(MaxBlock->begin()))
-                for (unsigned i = 0; i != MaxPop-1; ++i)
-                  MaxBlock->removePredecessor(SI->getParent());
-
-              for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-                if (SI->getSuccessor(i) == MaxBlock) {
-                  SI->removeCase(i);
-                  --i; --e;
-                }
             }
-          }
-        } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
-          if (II->getUnwindDest() == BB) {
-            // Convert the invoke to a call instruction.  This would be a good
-            // place to note that the call does not throw though.
-            BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
-            II->removeFromParent();   // Take out of symbol table
-
-            // Insert the call now...
-            SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
-            CallInst *CI = CallInst::Create(II->getCalledValue(),
-                                            Args.begin(), Args.end(),
-                                            II->getName(), BI);
-            CI->setCallingConv(II->getCallingConv());
-            CI->setAttributes(II->getAttributes());
-            // If the invoke produced a value, the call does now instead.
-            II->replaceAllUsesWith(CI);
-            delete II;
-            Changed = true;
-          }
         }
       }
-
-      // If this block is now dead, remove it.
-      if (pred_begin(BB) == pred_end(BB) &&
-          BB != &BB->getParent()->getEntryBlock()) {
-        // We know there are no successors, so just nuke the block.
-        M->getBasicBlockList().erase(BB);
-        return true;
-      }
-    }
-  } else if (IndirectBrInst *IBI =
-               dyn_cast<IndirectBrInst>(BB->getTerminator())) {
-    // Eliminate redundant destinations.
-    SmallPtrSet<Value *, 8> Succs;
-    for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
-      BasicBlock *Dest = IBI->getDestination(i);
-      if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
-        Dest->removePredecessor(BB);
-        IBI->removeDestination(i);
-        --i; --e;
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+      if (II->getUnwindDest() == BB) {
+        // Convert the invoke to a call instruction.  This would be a good
+        // place to note that the call does not throw though.
+        BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+        II->removeFromParent();   // Take out of symbol table
+        
+        // Insert the call now...
+        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+        CallInst *CI = CallInst::Create(II->getCalledValue(),
+                                        Args.begin(), Args.end(),
+                                        II->getName(), BI);
+        CI->setCallingConv(II->getCallingConv());
+        CI->setAttributes(II->getAttributes());
+        // If the invoke produced a value, the call does now instead.
+        II->replaceAllUsesWith(CI);
+        delete II;
         Changed = true;
       }
-    } 
+    }
+  }
+  
+  // If this block is now dead, remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
 
-    if (IBI->getNumDestinations() == 0) {
-      // If the indirectbr has no successors, change it to unreachable.
-      new UnreachableInst(IBI->getContext(), IBI);
-      IBI->eraseFromParent();
-      Changed = true;
-    } else if (IBI->getNumDestinations() == 1) {
-      // If the indirectbr has one successor, change it to a direct branch.
-      BranchInst::Create(IBI->getDestination(0), IBI);
-      IBI->eraseFromParent();
+  return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) {
+  assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+  // Make sure all cases point to the same destination and gather the values.
+  SmallVector<ConstantInt *, 16> Cases;
+  Cases.push_back(SI->getCaseValue(1));
+  for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+    if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+      return false;
+    Cases.push_back(SI->getCaseValue(I));
+  }
+  assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+  // Sort the case values, then check if they form a range we can transform.
+  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+  for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+    if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+      return false;
+  }
+
+  Constant *Offset = ConstantExpr::getNeg(Cases.back());
+  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+  Value *Sub = SI->getCondition();
+  if (!Offset->isNullValue())
+    Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI);
+  Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch");
+  BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI);
+
+  // Prune obsolete incoming values off the successor's PHI nodes.
+  for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+  }
+  SI->eraseFromParent();
+
+  return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) {
+  // If this switch is too complex to want to look at, ignore it.
+  if (!isValueEqualityComparison(SI))
+    return false;
+
+  BasicBlock *BB = SI->getParent();
+
+  // If we only have one predecessor, and if it is a branch on this value,
+  // see if that predecessor totally determines the outcome of this switch.
+  if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+    if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+      return SimplifyCFG(BB) | true;
+  
+  // If the block only contains the switch, see if we can fold the block
+  // away into any preds.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (SI == &*BBI)
+    if (FoldValueComparisonIntoPredecessors(SI))
+      return SimplifyCFG(BB) | true;
+
+  // Try to transform the switch into an icmp and a branch.
+  if (TurnSwitchRangeIntoICmp(SI))
+    return SimplifyCFG(BB) | true;
+  
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+  BasicBlock *BB = IBI->getParent();
+  bool Changed = false;
+  
+  // Eliminate redundant destinations.
+  SmallPtrSet<Value *, 8> Succs;
+  for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+    BasicBlock *Dest = IBI->getDestination(i);
+    if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+      Dest->removePredecessor(BB);
+      IBI->removeDestination(i);
+      --i; --e;
       Changed = true;
     }
+  } 
+
+  if (IBI->getNumDestinations() == 0) {
+    // If the indirectbr has no successors, change it to unreachable.
+    new UnreachableInst(IBI->getContext(), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (IBI->getNumDestinations() == 1) {
+    // If the indirectbr has one successor, change it to a direct branch.
+    BranchInst::Create(IBI->getDestination(0), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
   }
+  
+  if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+    if (SimplifyIndirectBrOnSelect(IBI, SI))
+      return SimplifyCFG(BB) | true;
+  }
+  return Changed;
+}
 
-  // Merge basic blocks into their predecessor if there is only one distinct
-  // pred, and if there is only one distinct successor of the predecessor, and
-  // if there are no PHI nodes.
-  //
-  if (MergeBlockIntoPredecessor(BB))
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+  
+  // If the Terminator is the only non-phi instruction, simplify the block.
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+  if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+      TryToSimplifyUncondBranchFromEmptyBlock(BB))
     return true;
+  
+  // If the only instruction in the block is a seteq/setne comparison
+  // against a constant, try to simplify the block.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+    if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+      for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+        ;
+      if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD))
+        return true;
+    }
+  
+  return false;
+}
 
-  // Otherwise, if this block only has a single predecessor, and if that block
-  // is a conditional branch, see if we can hoist any code from this block up
-  // into our predecessor.
-  pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
-  BasicBlock *OnlyPred = 0;
-  for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
-    if (!OnlyPred)
-      OnlyPred = *PI;
-    else if (*PI != OnlyPred) {
-      OnlyPred = 0;       // There are multiple different predecessors...
-      break;
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+  
+  // Conditional branch
+  if (isValueEqualityComparison(BI)) {
+    // If we only have one predecessor, and if it is a branch on this value,
+    // see if that predecessor totally determines the outcome of this
+    // switch.
+    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+      if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+        return SimplifyCFG(BB) | true;
+    
+    // This block must be empty, except for the setcond inst, if it exists.
+    // Ignore dbg intrinsics.
+    BasicBlock::iterator I = BB->begin();
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(I))
+      ++I;
+    if (&*I == BI) {
+      if (FoldValueComparisonIntoPredecessors(BI))
+        return SimplifyCFG(BB) | true;
+    } else if (&*I == cast<Instruction>(BI->getCondition())){
+      ++I;
+      // Ignore dbg intrinsics.
+      while (isa<DbgInfoIntrinsic>(I))
+        ++I;
+      if (&*I == BI && FoldValueComparisonIntoPredecessors(BI))
+        return SimplifyCFG(BB) | true;
     }
   }
   
-  if (OnlyPred)
-    if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
-      if (BI->isConditional()) {
-        // Get the other block.
-        BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB);
-        PI = pred_begin(OtherBB);
-        ++PI;
-        
-        if (PI == pred_end(OtherBB)) {
-          // We have a conditional branch to two blocks that are only reachable
-          // from the condbr.  We know that the condbr dominates the two blocks,
-          // so see if there is any identical code in the "then" and "else"
-          // blocks.  If so, we can hoist it up to the branching block.
-          Changed |= HoistThenElseCodeToIf(BI);
-        } else {
-          BasicBlock* OnlySucc = NULL;
-          for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
-               SI != SE; ++SI) {
-            if (!OnlySucc)
-              OnlySucc = *SI;
-            else if (*SI != OnlySucc) {
-              OnlySucc = 0;     // There are multiple distinct successors!
-              break;
-            }
-          }
+  // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+  if (SimplifyBranchOnICmpChain(BI, TD))
+    return true;
+  
+  // We have a conditional branch to two blocks that are only reachable
+  // from BI.  We know that the condbr dominates the two blocks, so see if
+  // there is any identical code in the "then" and "else" blocks.  If so, we
+  // can hoist it up to the branching block.
+  if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+    if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+      if (HoistThenElseCodeToIf(BI))
+        return SimplifyCFG(BB) | true;
+    } else {
+      // If Successor #1 has multiple preds, we may be able to conditionally
+      // execute Successor #0 if it branches to successor #1.
+      TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+      if (Succ0TI->getNumSuccessors() == 1 &&
+          Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+          return SimplifyCFG(BB) | true;
+    }
+  } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+    // If Successor #0 has multiple preds, we may be able to conditionally
+    // execute Successor #1 if it branches to successor #0.
+    TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+    if (Succ1TI->getNumSuccessors() == 1 &&
+        Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+        return SimplifyCFG(BB) | true;
+  }
+  
+  // If this is a branch on a phi node in the current block, thread control
+  // through this block if any PHI node entries are constants.
+  if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+    if (PN->getParent() == BI->getParent())
+      if (FoldCondBranchOnPHI(BI, TD))
+        return SimplifyCFG(BB) | true;
+  
+  // If this basic block is ONLY a setcc and a branch, and if a predecessor
+  // branches to us and one of our successors, fold the setcc into the
+  // predecessor and use logical operations to pick the right destination.
+  if (FoldBranchToCommonDest(BI))
+    return SimplifyCFG(BB) | true;
+  
+  // Scan predecessor blocks for conditional branches.
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+      if (PBI != BI && PBI->isConditional())
+        if (SimplifyCondBranchToCondBranch(PBI, BI))
+          return SimplifyCFG(BB) | true;
 
-          if (OnlySucc == OtherBB) {
-            // If BB's only successor is the other successor of the predecessor,
-            // i.e. a triangle, see if we can hoist any code from this block up
-            // to the "if" block.
-            Changed |= SpeculativelyExecuteBB(BI, BB);
-          }
-        }
-      }
+  return false;
+}
 
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-    if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
-      // Change br (X == 0 | X == 1), T, F into a switch instruction.
-      if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
-        Instruction *Cond = cast<Instruction>(BI->getCondition());
-        // If this is a bunch of seteq's or'd together, or if it's a bunch of
-        // 'setne's and'ed together, collect them.
-        Value *CompVal = 0;
-        std::vector<ConstantInt*> Values;
-        bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
-        if (CompVal) {
-          // There might be duplicate constants in the list, which the switch
-          // instruction can't handle, remove them now.
-          std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
-          Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
-          // Figure out which block is which destination.
-          BasicBlock *DefaultBB = BI->getSuccessor(1);
-          BasicBlock *EdgeBB    = BI->getSuccessor(0);
-          if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-
-          // Convert pointer to int before we switch.
-          if (CompVal->getType()->isPointerTy()) {
-            assert(TD && "Cannot switch on pointer without TargetData");
-            CompVal = new PtrToIntInst(CompVal,
-                                       TD->getIntPtrType(CompVal->getContext()),
-                                       "magicptr", BI);
-          }
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+  bool Changed = false;
 
-          // Create the new switch instruction now.
-          SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB,
-                                               Values.size(), BI);
-
-          // Add all of the 'cases' to the switch instruction.
-          for (unsigned i = 0, e = Values.size(); i != e; ++i)
-            New->addCase(Values[i], EdgeBB);
-
-          // We added edges from PI to the EdgeBB.  As such, if there were any
-          // PHI nodes in EdgeBB, they need entries to be added corresponding to
-          // the number of edges added.
-          for (BasicBlock::iterator BBI = EdgeBB->begin();
-               isa<PHINode>(BBI); ++BBI) {
-            PHINode *PN = cast<PHINode>(BBI);
-            Value *InVal = PN->getIncomingValueForBlock(*PI);
-            for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
-              PN->addIncoming(InVal, *PI);
-          }
+  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB->getTerminator() && "Degenerate basic block encountered!");
 
-          // Erase the old branch instruction.
-          EraseTerminatorInstAndDCECond(BI);
-          return true;
-        }
-      }
+  // Remove basic blocks that have no predecessors (except the entry block)...
+  // or that just have themself as a predecessor.  These are unreachable.
+  if ((pred_begin(BB) == pred_end(BB) &&
+       BB != &BB->getParent()->getEntryBlock()) ||
+      BB->getSinglePredecessor() == BB) {
+    DEBUG(dbgs() << "Removing BB: \n" << *BB);
+    DeleteDeadBlock(BB);
+    return true;
+  }
+
+  // Check to see if we can constant propagate this terminator instruction
+  // away...
+  Changed |= ConstantFoldTerminator(BB);
+
+  // Check for and eliminate duplicate PHI nodes in this block.
+  Changed |= EliminateDuplicatePHINodes(BB);
+
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  //
+  if (MergeBlockIntoPredecessor(BB))
+    return true;
+  
+  // If there is a trivial two-entry PHI node in this basic block, and we can
+  // eliminate it, do so now.
+  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+    if (PN->getNumIncomingValues() == 2)
+      Changed |= FoldTwoEntryPHINode(PN, TD);
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+    if (BI->isUnconditional()) {
+      if (SimplifyUncondBranch(BI)) return true;
+    } else {
+      if (SimplifyCondBranch(BI)) return true;
+    }
+  } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+    if (SimplifyReturn(RI)) return true;
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    if (SimplifySwitch(SI)) return true;
+  } else if (UnreachableInst *UI =
+               dyn_cast<UnreachableInst>(BB->getTerminator())) {
+    if (SimplifyUnreachable(UI)) return true;
+  } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+    if (SimplifyUnwind(UI)) return true;
+  } else if (IndirectBrInst *IBI =
+               dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+    if (SimplifyIndirectBr(IBI)) return true;
+  }
 
   return Changed;
 }
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 000000000000..ac005f95b33a
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification.  If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+  struct InstSimplifier : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstSimplifier() : FunctionPass(ID) {
+      initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+
+    /// runOnFunction - Remove instructions that simplify.
+    bool runOnFunction(Function &F) {
+      const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+      const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+      SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+      bool Changed = false;
+
+      do {
+        for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+             DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+          for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+            Instruction *I = BI++;
+            // The first time through the loop ToSimplify is empty and we try to
+            // simplify all instructions.  On later iterations ToSimplify is not
+            // empty and we only bother simplifying instructions that are in it.
+            if (!ToSimplify->empty() && !ToSimplify->count(I))
+              continue;
+            // Don't waste time simplifying unused instructions.
+            if (!I->use_empty())
+              if (Value *V = SimplifyInstruction(I, TD, DT)) {
+                // Mark all uses for resimplification next time round the loop.
+                for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                     UI != UE; ++UI)
+                  Next->insert(cast<Instruction>(*UI));
+                I->replaceAllUsesWith(V);
+                ++NumSimplified;
+                Changed = true;
+              }
+            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+          }
+
+        // Place the list of instructions to simplify on the next loop iteration
+        // into ToSimplify.
+        std::swap(ToSimplify, Next);
+        Next->clear();
+      } while (!ToSimplify->empty());
+
+      return Changed;
+    }
+  };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+                false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+  return new InstSimplifier();
+}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index a51f1e1a47f6..ccb8287d7969 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
 
 char UnifyFunctionExitNodes::ID = 0;
 INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
-                "Unify function exit nodes", false, false);
+                "Unify function exit nodes", false, false)
 
 Pass *llvm::createUnifyFunctionExitNodesPass() {
   return new UnifyFunctionExitNodes();
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 000000000000..24e8c8ff5c5f
--- /dev/null
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+  initializeBreakCriticalEdgesPass(Registry);
+  initializeInstNamerPass(Registry);
+  initializeLCSSAPass(Registry);
+  initializeLoopSimplifyPass(Registry);
+  initializeLowerInvokePass(Registry);
+  initializeLowerSwitchPass(Registry);
+  initializePromotePassPass(Registry);
+  initializeUnifyFunctionExitNodesPass(Registry);
+  initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+  initializeTransformUtils(*unwrap(R));
+}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index fc4bde77d4f9..f5481d31eb8a 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,147 +21,111 @@
 using namespace llvm;
 
 Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
-                      bool ModuleLevelChanges) {
-  Value *&VMSlot = VM[V];
-  if (VMSlot) return VMSlot;      // Does it exist in the map yet?
+                      RemapFlags Flags) {
+  ValueToValueMapTy::iterator I = VM.find(V);
+  
+  // If the value already exists in the map, use it.
+  if (I != VM.end() && I->second) return I->second;
   
-  // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
-  // DenseMap.  This includes any recursive calls to MapValue.
-
   // Global values do not need to be seeded into the VM if they
   // are using the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
-      (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
-       !ModuleLevelChanges))
-    return VMSlot = const_cast<Value*>(V);
+  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+    return VM[V] = const_cast<Value*>(V);
 
   if (const MDNode *MD = dyn_cast<MDNode>(V)) {
-    // Start by assuming that we'll use the identity mapping.
-    VMSlot = const_cast<Value*>(V);
-
+    // If this is a module-level metadata and we know that nothing at the module
+    // level is changing, then use an identity mapping.
+    if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+      return VM[V] = const_cast<Value*>(V);
+    
+    // Create a dummy node in case we have a metadata cycle.
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+    VM[V] = Dummy;
+    
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+      if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
 
-      // Ok, at least one operand needs remapping.
-      MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
-      VM[V] = Dummy;
+      // Ok, at least one operand needs remapping.  
       SmallVector<Value*, 4> Elts;
       Elts.reserve(MD->getNumOperands());
-      for (i = 0; i != e; ++i)
-        Elts.push_back(MD->getOperand(i) ? 
-                       MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+      for (i = 0; i != e; ++i) {
+        Value *Op = MD->getOperand(i);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+      }
       MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
       Dummy->replaceAllUsesWith(NewMD);
+      VM[V] = NewMD;
       MDNode::deleteTemporary(Dummy);
-      return VM[V] = NewMD;
+      return NewMD;
     }
 
-    // No operands needed remapping; keep the identity map.
+    VM[V] = const_cast<Value*>(V);
+    MDNode::deleteTemporary(Dummy);
+
+    // No operands needed remapping.  Use an identity mapping.
     return const_cast<Value*>(V);
   }
 
+  // Okay, this either must be a constant (which may or may not be mappable) or
+  // is something that is not in the mapping table.
   Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
   if (C == 0)
     return 0;
   
-  if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
-      isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
-      isa<UndefValue>(C))
-    return VMSlot = C;           // Primitive constants map directly
-  
-  if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
-    for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
-         i != e; ++i) {
-      Value *MV = MapValue(*i, VM, ModuleLevelChanges);
-      if (MV != *i) {
-        // This array must contain a reference to a global, make a new array
-        // and return it.
-        //
-        std::vector<Constant*> Values;
-        Values.reserve(CA->getNumOperands());
-        for (User::op_iterator j = b; j != i; ++j)
-          Values.push_back(cast<Constant>(*j));
-        Values.push_back(cast<Constant>(MV));
-        for (++i; i != e; ++i)
-          Values.push_back(cast<Constant>(MapValue(*i, VM,
-                                                   ModuleLevelChanges)));
-        return VM[V] = ConstantArray::get(CA->getType(), Values);
-      }
-    }
-    return VM[V] = C;
-  }
-  
-  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
-    for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
-         i != e; ++i) {
-      Value *MV = MapValue(*i, VM, ModuleLevelChanges);
-      if (MV != *i) {
-        // This struct must contain a reference to a global, make a new struct
-        // and return it.
-        //
-        std::vector<Constant*> Values;
-        Values.reserve(CS->getNumOperands());
-        for (User::op_iterator j = b; j != i; ++j)
-          Values.push_back(cast<Constant>(*j));
-        Values.push_back(cast<Constant>(MV));
-        for (++i; i != e; ++i)
-          Values.push_back(cast<Constant>(MapValue(*i, VM,
-                                                   ModuleLevelChanges)));
-        return VM[V] = ConstantStruct::get(CS->getType(), Values);
-      }
-    }
-    return VM[V] = C;
+  if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+    Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+    BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+                                                       Flags));
+    return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
   }
   
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+    Value *Op = C->getOperand(i);
+    Value *Mapped = MapValue(Op, VM, Flags);
+    if (Mapped == C) continue;
+    
+    // Okay, the operands don't all match.  We've already processed some or all
+    // of the operands, set them up now.
     std::vector<Constant*> Ops;
-    for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
-      Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
-    return VM[V] = CE->getWithOperands(Ops);
+    Ops.reserve(C->getNumOperands());
+    for (unsigned j = 0; j != i; ++j)
+      Ops.push_back(cast<Constant>(C->getOperand(i)));
+    Ops.push_back(cast<Constant>(Mapped));
+    
+    // Map the rest of the operands that aren't processed yet.
+    for (++i; i != e; ++i)
+      Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
+    
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+      return VM[V] = CE->getWithOperands(Ops);
+    if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+      return VM[V] = ConstantArray::get(CA->getType(), Ops);
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+      return VM[V] = ConstantStruct::get(CS->getType(), Ops);
+    assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
+    return VM[V] = ConstantVector::get(Ops);
   }
-  
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
-    for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
-         i != e; ++i) {
-      Value *MV = MapValue(*i, VM, ModuleLevelChanges);
-      if (MV != *i) {
-        // This vector value must contain a reference to a global, make a new
-        // vector constant and return it.
-        //
-        std::vector<Constant*> Values;
-        Values.reserve(CV->getNumOperands());
-        for (User::op_iterator j = b; j != i; ++j)
-          Values.push_back(cast<Constant>(*j));
-        Values.push_back(cast<Constant>(MV));
-        for (++i; i != e; ++i)
-          Values.push_back(cast<Constant>(MapValue(*i, VM,
-                                                   ModuleLevelChanges)));
-        return VM[V] = ConstantVector::get(Values);
-      }
-    }
-    return VM[V] = C;
-  }
-  
-  BlockAddress *BA = cast<BlockAddress>(C);
-  Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
-                                        ModuleLevelChanges));
-  BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
-                                             ModuleLevelChanges));
-  return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+
+  // If we reach here, all of the operands of the constant match.
+  return VM[V] = C;
 }
 
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 ///
 void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
-                            bool ModuleLevelChanges) {
+                            RemapFlags Flags) {
   // Remap operands.
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, VMap, ModuleLevelChanges);
-    assert(V && "Referenced value not in value map!");
-    *op = V;
+    Value *V = MapValue(*op, VMap, Flags);
+    // If we aren't ignoring missing entries, assert that something happened.
+    if (V != 0)
+      *op = V;
+    else
+      assert((Flags & RF_IgnoreMissingEntries) &&
+             "Referenced value not in value map!");
   }
 
   // Remap attached metadata.
@@ -170,7 +134,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
   for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
        MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
     Value *Old = MI->second;
-    Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+    Value *New = MapValue(Old, VMap, Flags);
     if (New != Old)
       I->setMetadata(MI->first, cast<MDNode>(New));
   }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 831a9960463d..cbc874a53f63 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -198,6 +198,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
   case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
   case Type::LabelTyID:     OS << "label"; break;
   case Type::MetadataTyID:  OS << "metadata"; break;
+  case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
   case Type::IntegerTyID:
     OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
     break;
@@ -830,7 +831,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
       Out << " nuw";
     if (OBO->hasNoSignedWrap())
       Out << " nsw";
-  } else if (const SDivOperator *Div = dyn_cast<SDivOperator>(U)) {
+  } else if (const PossiblyExactOperator *Div =
+               dyn_cast<PossiblyExactOperator>(U)) {
     if (Div->isExact())
       Out << " exact";
   } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
@@ -1057,11 +1059,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     return;
   }
 
-  if (const MDNode *Node = dyn_cast<MDNode>(CV)) {
-    Out << "!" << Machine->getMetadataSlot(Node);
-    return;
-  }
-
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     Out << CE->getOpcodeName();
     WriteOptimizationInfo(Out, CE);
@@ -1165,7 +1162,11 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
       else
         Machine = new SlotTracker(Context);
     }
-    Out << '!' << Machine->getMetadataSlot(N);
+    int Slot = Machine->getMetadataSlot(N);
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
     return;
   }
 
@@ -1395,7 +1396,11 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
   Out << "!" << NMD->getName() << " = !{";
   for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
     if (i) Out << ", ";
-    Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i));
+    int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
   }
   Out << "}\n";
 }
@@ -1455,6 +1460,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
   if (GV->isThreadLocal()) Out << "thread_local ";
   if (unsigned AddressSpace = GV->getType()->getAddressSpace())
     Out << "addrspace(" << AddressSpace << ") ";
+  if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
   Out << (GV->isConstant() ? "constant " : "global ");
   TypePrinter.print(GV->getType()->getElementType(), Out);
 
@@ -1575,6 +1581,8 @@ void AssemblyWriter::printFunction(const Function *F) {
   case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
   case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
   case CallingConv::MSP430_INTR:  Out << "msp430_intrcc "; break;
+  case CallingConv::PTX_Kernel:   Out << "ptx_kernel"; break;
+  case CallingConv::PTX_Device:   Out << "ptx_device"; break;
   default: Out << "cc" << F->getCallingConv() << " "; break;
   }
 
@@ -1622,6 +1630,8 @@ void AssemblyWriter::printFunction(const Function *F) {
     Out << "...";  // Output varargs portion of signature!
   }
   Out << ')';
+  if (F->hasUnnamedAddr())
+    Out << " unnamed_addr";
   Attributes FnAttrs = Attrs.getFnAttributes();
   if (FnAttrs != Attribute::None)
     Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
@@ -1843,6 +1853,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
     default: Out << " cc" << CI->getCallingConv(); break;
     }
 
@@ -1897,6 +1909,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
     default: Out << " cc" << II->getCallingConv(); break;
     }
 
@@ -2033,15 +2047,7 @@ static void WriteMDNodeComment(const MDNode *Node,
     return;
   
   Out.PadToColumn(50);
-  if (Tag == dwarf::DW_TAG_auto_variable)
-    Out << "; [ DW_TAG_auto_variable ]";
-  else if (Tag == dwarf::DW_TAG_arg_variable)
-    Out << "; [ DW_TAG_arg_variable ]";
-  else if (Tag == dwarf::DW_TAG_return_variable)
-    Out << "; [ DW_TAG_return_variable ]";
-  else if (Tag == dwarf::DW_TAG_vector_type)
-    Out << "; [ DW_TAG_vector_type ]";
-  else if (Tag == dwarf::DW_TAG_user_base)
+  if (Tag == dwarf::DW_TAG_user_base)
     Out << "; [ DW_TAG_user_base ]";
   else if (Tag.isIntN(32)) {
     if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index a000aee2ab45..92152a3b90ae 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -15,8 +15,8 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
@@ -70,6 +70,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "noimplicitfloat ";
   if (Attrs & Attribute::Naked)
     Result += "naked ";
+  if (Attrs & Attribute::Hotpatch)
+    Result += "hotpatch ";
   if (Attrs & Attribute::StackAlignment) {
     Result += "alignstack(";
     Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
@@ -105,6 +107,14 @@ Attributes Attribute::typeIncompatible(const Type *Ty) {
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
+  class AttributeListImpl;
+}
+
+static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
+
+namespace llvm {
+static ManagedStatic<sys::SmartMutex<true> > ALMutex;
+
 class AttributeListImpl : public FoldingSetNode {
   sys::cas_flag RefCount;
   
@@ -120,10 +130,17 @@ public:
     RefCount = 0;
   }
   
-  void AddRef() { sys::AtomicIncrement(&RefCount); }
+  void AddRef() {
+    sys::SmartScopedLock<true> Lock(*ALMutex);
+    ++RefCount;
+  }
   void DropRef() {
-    sys::cas_flag old = sys::AtomicDecrement(&RefCount);
-    if (old == 0) delete this;
+    sys::SmartScopedLock<true> Lock(*ALMutex);
+    if (!AttributesLists.isConstructed())
+      return;
+    sys::cas_flag new_val = --RefCount;
+    if (new_val == 0)
+      delete this;
   }
   
   void Profile(FoldingSetNodeID &ID) const {
@@ -137,11 +154,8 @@ public:
 };
 }
 
-static ManagedStatic<sys::SmartMutex<true> > ALMutex;
-static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
-
 AttributeListImpl::~AttributeListImpl() {
-  sys::SmartScopedLock<true> Lock(*ALMutex);
+  // NOTE: Lock must be acquired by caller.
   AttributesLists->RemoveNode(this);
 }
 
@@ -195,6 +209,7 @@ AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
 }
 
 const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+  sys::SmartScopedLock<true> Lock(*ALMutex);
   if (AttrList == RHS.AttrList) return *this;
   if (AttrList) AttrList->DropRef();
   AttrList = RHS.AttrList;
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 9330e141c341..b32354035644 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -288,37 +288,224 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     break;
   case 'x': 
     // This fixes all MMX shift intrinsic instructions to take a
-    // v1i64 instead of a v2i32 as the second parameter.
-    if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
-        (Name.compare(13,4,"psll", 4) == 0 ||
-         Name.compare(13,4,"psra", 4) == 0 ||
-         Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') {
-      
-      const llvm::Type *VT =
-                    VectorType::get(IntegerType::get(FTy->getContext(), 64), 1);
-      
-      // We don't have to do anything if the parameter already has
-      // the correct type.
-      if (FTy->getParamType(1) == VT)
+    // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
+    if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
+      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+
+      if (Name.compare(13, 4, "padd", 4) == 0   ||
+          Name.compare(13, 4, "psub", 4) == 0   ||
+          Name.compare(13, 4, "pmul", 4) == 0   ||
+          Name.compare(13, 5, "pmadd", 5) == 0  ||
+          Name.compare(13, 4, "pand", 4) == 0   ||
+          Name.compare(13, 3, "por", 3) == 0    ||
+          Name.compare(13, 4, "pxor", 4) == 0   ||
+          Name.compare(13, 4, "pavg", 4) == 0   ||
+          Name.compare(13, 4, "pmax", 4) == 0   ||
+          Name.compare(13, 4, "pmin", 4) == 0   ||
+          Name.compare(13, 4, "psad", 4) == 0   ||
+          Name.compare(13, 4, "psll", 4) == 0   ||
+          Name.compare(13, 4, "psrl", 4) == 0   ||
+          Name.compare(13, 4, "psra", 4) == 0   ||
+          Name.compare(13, 4, "pack", 4) == 0   ||
+          Name.compare(13, 6, "punpck", 6) == 0 ||
+          Name.compare(13, 4, "pcmp", 4) == 0) {
+        assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
+        const Type *SecondParamTy = X86_MMXTy;
+
+        if (Name.compare(13, 5, "pslli", 5) == 0 ||
+            Name.compare(13, 5, "psrli", 5) == 0 ||
+            Name.compare(13, 5, "psrai", 5) == 0)
+          SecondParamTy = FTy->getParamType(1);
+
+        // Don't do anything if it has the correct types.
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy &&
+            FTy->getParamType(1) == SecondParamTy)
+          break;
+
+        // We first need to change the name of the old (bad) intrinsic, because
+        // its type is incorrect, but we cannot overload that name. We
+        // arbitrarily unique it here allowing us to construct a correctly named
+        // and typed function below.
+        F->setName("");
+
+        // Now construct the new intrinsic with the correct name and type. We
+        // leave the old function around in order to query its type, whatever it
+        // may be, and correctly convert up to the new type.
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy, X86_MMXTy,
+                                                      SecondParamTy, (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 8, "maskmovq", 8) == 0) {
+        // Don't do anything if it has the correct types.
+        if (FTy->getParamType(0) == X86_MMXTy &&
+            FTy->getParamType(1) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(2),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
+        if (FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 5, "movnt", 5) == 0) {
+        if (FTy->getParamType(1) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      FTy->getParamType(0),
+                                                      X86_MMXTy,
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 7, "palignr", 7) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy &&
+            FTy->getParamType(1) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(2),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 5, "pextr", 5) == 0) {
+        if (FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(1),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 5, "pinsr", 5) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(1),
+                                                      FTy->getParamType(2),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(0),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
+        if (FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 8, "vec.init", 8) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy)
+          break;
+
+        F->setName("");
+
+        if (Name.compare(21, 2, ".b", 2) == 0)
+          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                        X86_MMXTy,
+                                                        FTy->getParamType(0),
+                                                        FTy->getParamType(1),
+                                                        FTy->getParamType(2),
+                                                        FTy->getParamType(3),
+                                                        FTy->getParamType(4),
+                                                        FTy->getParamType(5),
+                                                        FTy->getParamType(6),
+                                                        FTy->getParamType(7),
+                                                        (Type*)0));
+        else if (Name.compare(21, 2, ".w", 2) == 0)
+          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                        X86_MMXTy,
+                                                        FTy->getParamType(0),
+                                                        FTy->getParamType(1),
+                                                        FTy->getParamType(2),
+                                                        FTy->getParamType(3),
+                                                        (Type*)0));
+        else if (Name.compare(21, 2, ".d", 2) == 0)
+          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                        X86_MMXTy,
+                                                        FTy->getParamType(0),
+                                                        FTy->getParamType(1),
+                                                        (Type*)0));
+        return true;
+      }
+
+
+      if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(1),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 9, "emms", 4) == 0 ||
+          Name.compare(13, 9, "femms", 5) == 0) {
+        NewFn = 0;
         break;
-      
-      //  We first need to change the name of the old (bad) intrinsic, because 
-      //  its type is incorrect, but we cannot overload that name. We 
-      //  arbitrarily unique it here allowing us to construct a correctly named 
-      //  and typed function below.
-      F->setName("");
+      }
 
-      assert(FTy->getNumParams() == 2 && "MMX shift intrinsics take 2 args!");
-      
-      //  Now construct the new intrinsic with the correct name and type. We 
-      //  leave the old function around in order to query its type, whatever it 
-      //  may be, and correctly convert up to the new type.
-      NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                    FTy->getReturnType(),
-                                                    FTy->getParamType(0),
-                                                    VT,
-                                                    (Type *)0));
-      return true;
+      // We really shouldn't get here ever.
+      assert(0 && "Invalid MMX intrinsic!");
+      break;
     } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
                Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
                Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
@@ -341,6 +528,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       // or 0.
       NewFn = 0;
       return true;           
+    } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
+      // This is an SSE/MMX instruction.
+      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+      NewFn =
+        cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
+                                              X86_MMXTy,
+                                              X86_MMXTy,
+                                              Type::getInt8Ty(F->getContext()),
+                                              (Type*)0));
+      return true;
     }
 
     break;
@@ -432,6 +629,39 @@ static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
                           "upgraded."+CI->getName(), CI);
 }
 
+/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
+static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
+                                 Value **Operands, unsigned NumOps,
+                                 bool AssignName = true) {
+  // Construct a new CallInst.
+  CallInst *NewCI =
+    CallInst::Create(NewFn, Operands, Operands + NumOps,
+                     AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
+
+  NewCI->setTailCall(OldCI->isTailCall());
+  NewCI->setCallingConv(OldCI->getCallingConv());
+
+  // Handle any uses of the old CallInst. If the type has changed, add a cast.
+  if (!OldCI->use_empty()) {
+    if (OldCI->getType() != NewCI->getType()) {
+      Function *OldFn = OldCI->getCalledFunction();
+      CastInst *RetCast =
+        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
+                                                 OldFn->getReturnType(), true),
+                         NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
+
+      // Replace all uses of the old call with the new cast which has the
+      // correct type.
+      OldCI->replaceAllUsesWith(RetCast);
+    } else {
+      OldCI->replaceAllUsesWith(NewCI);
+    }
+  }
+
+  // Clean up the old call now that it has been completely upgraded.
+  OldCI->eraseFromParent();
+}
+
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
 // upgraded intrinsic. All argument and return casting must be provided in 
 // order to seamlessly integrate with existing context.
@@ -629,7 +859,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         for (unsigned i = 0; i != 8; ++i)
           Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
 
-        Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+        Value *SV = ConstantVector::get(Indices);
         Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
         Rep = Builder.CreateBitCast(Rep, F->getReturnType());
       }
@@ -685,7 +915,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         for (unsigned i = 0; i != 16; ++i)
           Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
 
-        Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+        Value *SV = ConstantVector::get(Indices);
         Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
         Rep = Builder.CreateBitCast(Rep, F->getReturnType());
       }
@@ -759,40 +989,265 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     break;
   }        
 
+  case Intrinsic::x86_mmx_padd_b:
+  case Intrinsic::x86_mmx_padd_w:
+  case Intrinsic::x86_mmx_padd_d:
+  case Intrinsic::x86_mmx_padd_q:
+  case Intrinsic::x86_mmx_padds_b:
+  case Intrinsic::x86_mmx_padds_w:
+  case Intrinsic::x86_mmx_paddus_b:
+  case Intrinsic::x86_mmx_paddus_w:
+  case Intrinsic::x86_mmx_psub_b:
+  case Intrinsic::x86_mmx_psub_w:
+  case Intrinsic::x86_mmx_psub_d:
+  case Intrinsic::x86_mmx_psub_q:
+  case Intrinsic::x86_mmx_psubs_b:
+  case Intrinsic::x86_mmx_psubs_w:
+  case Intrinsic::x86_mmx_psubus_b:
+  case Intrinsic::x86_mmx_psubus_w:
+  case Intrinsic::x86_mmx_pmulh_w:
+  case Intrinsic::x86_mmx_pmull_w:
+  case Intrinsic::x86_mmx_pmulhu_w:
+  case Intrinsic::x86_mmx_pmulu_dq:
+  case Intrinsic::x86_mmx_pmadd_wd:
+  case Intrinsic::x86_mmx_pand:
+  case Intrinsic::x86_mmx_pandn:
+  case Intrinsic::x86_mmx_por:
+  case Intrinsic::x86_mmx_pxor:
+  case Intrinsic::x86_mmx_pavg_b:
+  case Intrinsic::x86_mmx_pavg_w:
+  case Intrinsic::x86_mmx_pmaxu_b:
+  case Intrinsic::x86_mmx_pmaxs_w:
+  case Intrinsic::x86_mmx_pminu_b:
+  case Intrinsic::x86_mmx_pmins_w:
+  case Intrinsic::x86_mmx_psad_bw:
+  case Intrinsic::x86_mmx_psll_w:
   case Intrinsic::x86_mmx_psll_d:
   case Intrinsic::x86_mmx_psll_q:
-  case Intrinsic::x86_mmx_psll_w:
-  case Intrinsic::x86_mmx_psra_d:
-  case Intrinsic::x86_mmx_psra_w:
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrl_w:
   case Intrinsic::x86_mmx_psrl_d:
   case Intrinsic::x86_mmx_psrl_q:
-  case Intrinsic::x86_mmx_psrl_w: {
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psra_w:
+  case Intrinsic::x86_mmx_psra_d:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d:
+  case Intrinsic::x86_mmx_packsswb:
+  case Intrinsic::x86_mmx_packssdw:
+  case Intrinsic::x86_mmx_packuswb:
+  case Intrinsic::x86_mmx_punpckhbw:
+  case Intrinsic::x86_mmx_punpckhwd:
+  case Intrinsic::x86_mmx_punpckhdq:
+  case Intrinsic::x86_mmx_punpcklbw:
+  case Intrinsic::x86_mmx_punpcklwd:
+  case Intrinsic::x86_mmx_punpckldq:
+  case Intrinsic::x86_mmx_pcmpeq_b:
+  case Intrinsic::x86_mmx_pcmpeq_w:
+  case Intrinsic::x86_mmx_pcmpeq_d:
+  case Intrinsic::x86_mmx_pcmpgt_b:
+  case Intrinsic::x86_mmx_pcmpgt_w:
+  case Intrinsic::x86_mmx_pcmpgt_d: {
     Value *Operands[2];
     
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+
+    switch (NewFn->getIntrinsicID()) {
+    default:
+      // Cast to the X86 MMX type.
+      Operands[1] = new BitCastInst(CI->getArgOperand(1), 
+                                    NewFn->getFunctionType()->getParamType(1),
+                                    "upgraded.", CI);
+      break;
+    case Intrinsic::x86_mmx_pslli_w:
+    case Intrinsic::x86_mmx_pslli_d:
+    case Intrinsic::x86_mmx_pslli_q:
+    case Intrinsic::x86_mmx_psrli_w:
+    case Intrinsic::x86_mmx_psrli_d:
+    case Intrinsic::x86_mmx_psrli_q:
+    case Intrinsic::x86_mmx_psrai_w:
+    case Intrinsic::x86_mmx_psrai_d:
+      // These take an i32 as their second parameter.
+      Operands[1] = CI->getArgOperand(1);
+      break;
+    }
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+  case Intrinsic::x86_mmx_maskmovq: {
+    Value *Operands[3];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = new BitCastInst(CI->getArgOperand(1), 
+                                  NewFn->getFunctionType()->getParamType(1),
+                                  "upgraded.", CI);
+    Operands[2] = CI->getArgOperand(2);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 3, false);
+    break;
+  }
+  case Intrinsic::x86_mmx_pmovmskb: {
+    Value *Operands[1];
+
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 1);
+    break;
+  }
+  case Intrinsic::x86_mmx_movnt_dq: {
+    Value *Operands[2];
+
     Operands[0] = CI->getArgOperand(0);
-    
-    // Cast the second parameter to the correct type.
-    BitCastInst *BC = new BitCastInst(CI->getArgOperand(1), 
-                                      NewFn->getFunctionType()->getParamType(1),
-                                      "upgraded.", CI);
-    Operands[1] = BC;
-    
-    //  Construct a new CallInst
-    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+2, 
-                                       "upgraded."+CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-    
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
+
+    // Cast the operand to the X86 MMX type.
+    Operands[1] = new BitCastInst(CI->getArgOperand(1),
+                                  NewFn->getFunctionType()->getParamType(1),
+                                  "upgraded.", CI);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2, false);
     break;
-  }        
+  }
+  case Intrinsic::x86_mmx_palignr_b: {
+    Value *Operands[3];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = new BitCastInst(CI->getArgOperand(1),
+                                  NewFn->getFunctionType()->getParamType(1),
+                                  "upgraded.", CI);
+    Operands[2] = CI->getArgOperand(2);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 3);
+    break;
+  }
+  case Intrinsic::x86_mmx_pextr_w: {
+    Value *Operands[2];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = CI->getArgOperand(1);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+  case Intrinsic::x86_mmx_pinsr_w: {
+    Value *Operands[3];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = CI->getArgOperand(1);
+    Operands[2] = CI->getArgOperand(2);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 3);
+    break;
+  }
+  case Intrinsic::x86_sse_pshuf_w: {
+    IRBuilder<> Builder(C);
+    Builder.SetInsertPoint(CI->getParent(), CI);
+
+    // Cast the operand to the X86 MMX type.
+    Value *Operands[2];
+    Operands[0] =
+      Builder.CreateBitCast(CI->getArgOperand(0), 
+                            NewFn->getFunctionType()->getParamType(0),
+                            "upgraded.");
+    Operands[1] =
+      Builder.CreateTrunc(CI->getArgOperand(1),
+                          Type::getInt8Ty(C),
+                          "upgraded.");
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+
+#if 0
+  case Intrinsic::x86_mmx_cvtsi32_si64: {
+    // The return type needs to be changed.
+    Value *Operands[1];
+    Operands[0] = CI->getArgOperand(0);
+    ConstructNewCallInst(NewFn, CI, Operands, 1);
+    break;
+  }
+  case Intrinsic::x86_mmx_cvtsi64_si32: {
+    Value *Operands[1];
+
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 1);
+    break;
+  }
+  case Intrinsic::x86_mmx_vec_init_b:
+  case Intrinsic::x86_mmx_vec_init_w:
+  case Intrinsic::x86_mmx_vec_init_d: {
+    // The return type needs to be changed.
+    Value *Operands[8];
+    unsigned NumOps = 0;
+
+    switch (NewFn->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::x86_mmx_vec_init_b: NumOps = 8; break;
+    case Intrinsic::x86_mmx_vec_init_w: NumOps = 4; break;
+    case Intrinsic::x86_mmx_vec_init_d: NumOps = 2; break;
+    }
+
+    switch (NewFn->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::x86_mmx_vec_init_b:
+      Operands[7] = CI->getArgOperand(7);
+      Operands[6] = CI->getArgOperand(6);
+      Operands[5] = CI->getArgOperand(5);
+      Operands[4] = CI->getArgOperand(4);
+      // FALLTHRU
+    case Intrinsic::x86_mmx_vec_init_w:
+      Operands[3] = CI->getArgOperand(3);
+      Operands[2] = CI->getArgOperand(2);
+      // FALLTHRU
+    case Intrinsic::x86_mmx_vec_init_d:
+      Operands[1] = CI->getArgOperand(1);
+      Operands[0] = CI->getArgOperand(0);
+      break;
+    }
+
+    ConstructNewCallInst(NewFn, CI, Operands, NumOps);
+    break;
+  }
+  case Intrinsic::x86_mmx_vec_ext_d: {
+    Value *Operands[2];
+
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = CI->getArgOperand(1);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+#endif
+
   case Intrinsic::ctlz:
   case Intrinsic::ctpop:
   case Intrinsic::cttz: {
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 8ad53736c993..955a0285b260 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -248,10 +248,11 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
       // If all incoming values to the Phi are the same, we can replace the Phi
       // with that value.
       Value* PNV = 0;
-      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) {
-        PN->replaceAllUsesWith(PNV);
-        PN->eraseFromParent();
-      }
+      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+        if (PNV != PN) {
+          PN->replaceAllUsesWith(PNV);
+          PN->eraseFromParent();
+        }
     }
   }
 }
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 1388c93cce39..1abd031dae4e 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -1,3 +1,5 @@
+set(LLVM_REQUIRES_RTTI 1)
+
 add_llvm_library(LLVMCore
   AsmWriter.cpp
   Attributes.cpp
@@ -28,6 +30,7 @@ add_llvm_library(LLVMCore
   Type.cpp
   TypeSymbolTable.cpp
   Use.cpp
+  User.cpp
   Value.cpp
   ValueSymbolTable.cpp
   ValueTypes.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 9a91dafab2ff..573efb7e5731 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -42,6 +42,10 @@ using namespace llvm;
 /// input vector constant are all simple integer or FP values.
 static Constant *BitCastConstantVector(ConstantVector *CV,
                                        const VectorType *DstTy) {
+
+  if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+  if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
   // If this cast changes element count then we can't handle it here:
   // doing so requires endianness information.  This should be handled by
   // Analysis/ConstantFolding.cpp
@@ -145,7 +149,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
     // This allows for other simplifications (although some of them
     // can only be handled by Analysis/ConstantFolding.cpp).
     if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
-      return ConstantExpr::getBitCast(ConstantVector::get(&V, 1), DestPTy);
+      return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
   }
 
   // Finally, implement bitcast folding now.   The code below doesn't handle
@@ -202,7 +206,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
     APInt V = CI->getValue();
     if (ByteStart)
       V = V.lshr(ByteStart*8);
-    V.trunc(ByteSize*8);
+    V = V.trunc(ByteSize*8);
     return ConstantInt::get(CI->getContext(), V);
   }
   
@@ -511,10 +515,14 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
       return Constant::getNullValue(DestTy);
     return UndefValue::get(DestTy);
   }
+
   // No compile-time operations on this type yet.
   if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
     return 0;
 
+  if (V->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+
   // If the cast operand is a constant expression, there's a few things we can
   // do to try to simplify it.
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
@@ -637,9 +645,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
   case Instruction::SIToFP:
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       APInt api = CI->getValue();
-      const uint64_t zero[] = {0, 0};
-      APFloat apf = APFloat(APInt(DestTy->getPrimitiveSizeInBits(),
-                                  2, zero));
+      APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true);
       (void)apf.convertFromAPInt(api, 
                                  opc==Instruction::SIToFP,
                                  APFloat::rmNearestTiesToEven);
@@ -649,25 +655,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
   case Instruction::ZExt:
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
-      APInt Result(CI->getValue());
-      Result.zext(BitWidth);
-      return ConstantInt::get(V->getContext(), Result);
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().zext(BitWidth));
     }
     return 0;
   case Instruction::SExt:
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
-      APInt Result(CI->getValue());
-      Result.sext(BitWidth);
-      return ConstantInt::get(V->getContext(), Result);
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().sext(BitWidth));
     }
     return 0;
   case Instruction::Trunc: {
     uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      APInt Result(CI->getValue());
-      Result.trunc(DestBitWidth);
-      return ConstantInt::get(V->getContext(), Result);
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().trunc(DestBitWidth));
     }
     
     // The input must be a constantexpr.  See if we can simplify this based on
@@ -690,10 +693,58 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
   if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
     return CB->getZExtValue() ? V1 : V2;
 
+  // Check for zero aggregate and ConstantVector of zeros
+  if (Cond->isNullValue()) return V2;
+
+  if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
+
+    if (CondV->isAllOnesValue()) return V1;
+
+    const VectorType *VTy = cast<VectorType>(V1->getType());
+    ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
+    ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
+
+    if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
+        (CP2 || isa<ConstantAggregateZero>(V2))) {
+
+      // Find the element type of the returned vector
+      const Type *EltTy = VTy->getElementType();
+      unsigned NumElem = VTy->getNumElements();
+      std::vector<Constant*> Res(NumElem);
+
+      bool Valid = true;
+      for (unsigned i = 0; i < NumElem; ++i) {
+        ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
+        if (!c) {
+          Valid = false;
+          break;
+        }
+        Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+        Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+        Res[i] = c->getZExtValue() ? C1 : C2;
+      }
+      // If we were able to build the vector, return it
+      if (Valid) return ConstantVector::get(Res);
+    }
+  }
+
+
   if (isa<UndefValue>(V1)) return V2;
   if (isa<UndefValue>(V2)) return V1;
   if (isa<UndefValue>(Cond)) return V1;
   if (V1 == V2) return V1;
+
+  if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+    if (TrueVal->getOpcode() == Instruction::Select)
+      if (TrueVal->getOperand(0) == Cond)
+	return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+  }
+  if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+    if (FalseVal->getOpcode() == Instruction::Select)
+      if (FalseVal->getOperand(0) == Cond)
+	return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+  }
+
   return 0;
 }
 
@@ -821,7 +872,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
     Result.push_back(InElt);
   }
 
-  return ConstantVector::get(&Result[0], Result.size());
+  return ConstantVector::get(Result);
 }
 
 Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
@@ -982,8 +1033,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
                                                     // undef lshr X -> 0
     case Instruction::AShr:
-      if (!isa<UndefValue>(C2))
-        return C1;                                  // undef ashr X --> undef
+      if (!isa<UndefValue>(C2))                     // undef ashr X --> all ones
+        return Constant::getAllOnesValue(C1->getType());
       else if (isa<UndefValue>(C1)) 
         return C1;                                  // undef ashr undef -> undef
       else
@@ -1343,8 +1394,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
 
     // Given ((a + b) + c), if (b + c) folds to something interesting, return
     // (a + (b + c)).
-    if (Instruction::isAssociative(Opcode, C1->getType()) &&
-        CE1->getOpcode() == Opcode) {
+    if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
       Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
       if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
         return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
@@ -1413,7 +1463,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) {
 /// first is less than the second, return -1, if the second is less than the
 /// first, return 1.  If the constants are not integral, return -2.
 ///
-static int IdxCompare(Constant *C1, Constant *C2,  const Type *ElTy) {
+static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
   if (C1 == C2) return 0;
 
   // Ok, we found a different index.  If they are not ConstantInt, we can't do
@@ -1896,11 +1946,11 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     // If we can constant fold the comparison of each element, constant fold
     // the whole vector comparison.
     SmallVector<Constant*, 4> ResElts;
-    for (unsigned i = 0, e = C1Elts.size(); i != e; ++i) {
-      // Compare the elements, producing an i1 result or constant expr.
+    // Compare the elements, producing an i1 result or constant expr.
+    for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
       ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
-    }
-    return ConstantVector::get(&ResElts[0], ResElts.size());
+
+    return ConstantVector::get(ResElts);
   }
 
   if (C1->getType()->isFloatingPointTy()) {
@@ -1948,7 +1998,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
         Result = 1;
       break;
-    case ICmpInst::ICMP_NE: // We know that C1 != C2
+    case FCmpInst::FCMP_ONE: // We know that C1 != C2
       // We can only partially decide this relation.
       if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ) 
         Result = 0;
@@ -2073,56 +2123,55 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
 /// isInBoundsIndices - Test whether the given sequence of *normalized* indices
 /// is "inbounds".
-static bool isInBoundsIndices(Constant *const *Idxs, size_t NumIdx) {
+template<typename IndexTy>
+static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
   // No indices means nothing that could be out of bounds.
   if (NumIdx == 0) return true;
 
   // If the first index is zero, it's in bounds.
-  if (Idxs[0]->isNullValue()) return true;
+  if (cast<Constant>(Idxs[0])->isNullValue()) return true;
 
   // If the first index is one and all the rest are zero, it's in bounds,
   // by the one-past-the-end rule.
   if (!cast<ConstantInt>(Idxs[0])->isOne())
     return false;
   for (unsigned i = 1, e = NumIdx; i != e; ++i)
-    if (!Idxs[i]->isNullValue())
+    if (!cast<Constant>(Idxs[i])->isNullValue())
       return false;
   return true;
 }
 
-Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
-                                          bool inBounds,
-                                          Constant* const *Idxs,
-                                          unsigned NumIdx) {
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+                                               bool inBounds,
+                                               IndexTy const *Idxs,
+                                               unsigned NumIdx) {
+  Constant *Idx0 = cast<Constant>(Idxs[0]);
   if (NumIdx == 0 ||
-      (NumIdx == 1 && Idxs[0]->isNullValue()))
+      (NumIdx == 1 && Idx0->isNullValue()))
     return C;
 
   if (isa<UndefValue>(C)) {
     const PointerType *Ptr = cast<PointerType>(C->getType());
-    const Type *Ty = GetElementPtrInst::getIndexedType(Ptr,
-                                                       (Value **)Idxs,
-                                                       (Value **)Idxs+NumIdx);
+    const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx);
     assert(Ty != 0 && "Invalid indices for GEP!");
     return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
   }
 
-  Constant *Idx0 = Idxs[0];
   if (C->isNullValue()) {
     bool isNull = true;
     for (unsigned i = 0, e = NumIdx; i != e; ++i)
-      if (!Idxs[i]->isNullValue()) {
+      if (!cast<Constant>(Idxs[i])->isNullValue()) {
         isNull = false;
         break;
       }
     if (isNull) {
       const PointerType *Ptr = cast<PointerType>(C->getType());
-      const Type *Ty = GetElementPtrInst::getIndexedType(Ptr,
-                                                         (Value**)Idxs,
-                                                         (Value**)Idxs+NumIdx);
+      const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs,
+                                                         Idxs+NumIdx);
       assert(Ty != 0 && "Invalid indices for GEP!");
-      return  ConstantPointerNull::get(
-                            PointerType::get(Ty,Ptr->getAddressSpace()));
+      return ConstantPointerNull::get(PointerType::get(Ty,
+                                                       Ptr->getAddressSpace()));
     }
   }
 
@@ -2173,9 +2222,9 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
     }
 
     // Implement folding of:
-    //    int* getelementptr ([2 x int]* bitcast ([3 x int]* %X to [2 x int]*),
-    //                        long 0, long 0)
-    // To: int* getelementptr ([3 x int]* %X, long 0, long 0)
+    //    i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+    //                        i64 0, i64 0)
+    // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
     //
     if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
       if (const PointerType *SPT =
@@ -2214,7 +2263,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
                                                    ATy->getNumElements());
             NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
 
-            Constant *PrevIdx = Idxs[i-1];
+            Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
             Constant *Div = ConstantExpr::getSDiv(CI, Factor);
 
             // Before adding, extend both operands to i64 to avoid
@@ -2242,7 +2291,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
   // If we did any factoring, start over with the adjusted indices.
   if (!NewIdxs.empty()) {
     for (unsigned i = 0; i != NumIdx; ++i)
-      if (!NewIdxs[i]) NewIdxs[i] = Idxs[i];
+      if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
     return inBounds ?
       ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
                                              NewIdxs.size()) :
@@ -2257,3 +2306,17 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
 
   return 0;
 }
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          Constant* const *Idxs,
+                                          unsigned NumIdx) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          Value* const *Idxs,
+                                          unsigned NumIdx) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
index d2dbbdd74c24..0ecd7b49a48e 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/VMCore/ConstantFold.h
@@ -49,6 +49,8 @@ namespace llvm {
                                            Constant *C1, Constant *C2);
   Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
                                       Constant* const *Idxs, unsigned NumIdx);
+  Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+                                      Value* const *Idxs, unsigned NumIdx);
 } // End llvm namespace
 
 #endif
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 16eaca81048b..246fde1569ae 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -40,22 +40,25 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 // Constructor to create a '0' constant of arbitrary type...
-static const uint64_t zero[2] = {0, 0};
 Constant *Constant::getNullValue(const Type *Ty) {
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
     return ConstantInt::get(Ty, 0);
   case Type::FloatTyID:
-    return ConstantFP::get(Ty->getContext(), APFloat(APInt(32, 0)));
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEsingle));
   case Type::DoubleTyID:
-    return ConstantFP::get(Ty->getContext(), APFloat(APInt(64, 0)));
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEdouble));
   case Type::X86_FP80TyID:
-    return ConstantFP::get(Ty->getContext(), APFloat(APInt(80, 2, zero)));
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::x87DoubleExtended));
   case Type::FP128TyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat(APInt(128, 2, zero), true));
+                           APFloat::getZero(APFloat::IEEEquad));
   case Type::PPC_FP128TyID:
-    return ConstantFP::get(Ty->getContext(), APFloat(APInt(128, 2, zero)));
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat(APInt::getNullValue(128)));
   case Type::PointerTyID:
     return ConstantPointerNull::get(cast<PointerType>(Ty));
   case Type::StructTyID:
@@ -69,7 +72,7 @@ Constant *Constant::getNullValue(const Type *Ty) {
   }
 }
 
-Constant* Constant::getIntegerValue(const Type *Ty, const APInt &V) {
+Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
   const Type *ScalarTy = Ty->getScalarType();
 
   // Create the base integer constant.
@@ -86,12 +89,18 @@ Constant* Constant::getIntegerValue(const Type *Ty, const APInt &V) {
   return C;
 }
 
-Constant* Constant::getAllOnesValue(const Type *Ty) {
+Constant *Constant::getAllOnesValue(const Type *Ty) {
   if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
     return ConstantInt::get(Ty->getContext(),
                             APInt::getAllOnesValue(ITy->getBitWidth()));
-  
-  std::vector<Constant*> Elts;
+
+  if (Ty->isFloatingPointTy()) {
+    APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+                                          !Ty->isPPC_FP128Ty());
+    return ConstantFP::get(Ty->getContext(), FL);
+  }
+
+  SmallVector<Constant*, 16> Elts;
   const VectorType *VTy = cast<VectorType>(Ty);
   Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
   assert(Elts[0] && "Not a vector integer type!");
@@ -253,6 +262,59 @@ void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
 }
 
 
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it.  This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+  if (isa<GlobalValue>(C)) return false; // Cannot remove this
+  
+  while (!C->use_empty()) {
+    const Constant *User = dyn_cast<Constant>(C->use_back());
+    if (!User) return false; // Non-constant usage;
+    if (!removeDeadUsersOfConstant(User))
+      return false; // Constant wasn't dead
+  }
+  
+  const_cast<Constant*>(C)->destroyConstant();
+  return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them.  This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+  Value::const_use_iterator I = use_begin(), E = use_end();
+  Value::const_use_iterator LastNonDeadUser = E;
+  while (I != E) {
+    const Constant *User = dyn_cast<Constant>(*I);
+    if (User == 0) {
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+    
+    if (!removeDeadUsersOfConstant(User)) {
+      // If the constant wasn't dead, remember that this was the last live use
+      // and move on to the next constant.
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+    
+    // If the constant was dead, then the iterator is invalidated.
+    if (LastNonDeadUser == E) {
+      I = use_begin();
+      if (I == E) break;
+    } else {
+      I = LastNonDeadUser;
+      ++I;
+    }
+  }
+}
+
+
 
 //===----------------------------------------------------------------------===//
 //                                ConstantInt
@@ -265,20 +327,16 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
 
 ConstantInt* ConstantInt::getTrue(LLVMContext &Context) {
   LLVMContextImpl *pImpl = Context.pImpl;
-  if (pImpl->TheTrueVal)
-    return pImpl->TheTrueVal;
-  else
-    return (pImpl->TheTrueVal =
-              ConstantInt::get(IntegerType::get(Context, 1), 1));
+  if (!pImpl->TheTrueVal)
+    pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+  return pImpl->TheTrueVal;
 }
 
 ConstantInt* ConstantInt::getFalse(LLVMContext &Context) {
   LLVMContextImpl *pImpl = Context.pImpl;
-  if (pImpl->TheFalseVal)
-    return pImpl->TheFalseVal;
-  else
-    return (pImpl->TheFalseVal =
-              ConstantInt::get(IntegerType::get(Context, 1), 0));
+  if (!pImpl->TheFalseVal)
+    pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+  return pImpl->TheFalseVal;
 }
 
 
@@ -297,14 +355,14 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) {
   return Slot;
 }
 
-Constant* ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
+Constant *ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
   Constant *C = get(cast<IntegerType>(Ty->getScalarType()),
                                V, isSigned);
 
   // For vectors, broadcast the value.
   if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::get(
-      std::vector<Constant *>(VTy->getNumElements(), C));
+    return ConstantVector::get(SmallVector<Constant*,
+                                           16>(VTy->getNumElements(), C));
 
   return C;
 }
@@ -322,7 +380,7 @@ Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
   return get(Ty, V, true);
 }
 
-Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
+Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
   ConstantInt *C = get(Ty->getContext(), V);
   assert(C->getType() == Ty->getScalarType() &&
          "ConstantInt type doesn't match the type implied by its value!");
@@ -330,7 +388,7 @@ Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
   // For vectors, broadcast the value.
   if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
     return ConstantVector::get(
-      std::vector<Constant *>(VTy->getNumElements(), C));
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
 
   return C;
 }
@@ -361,7 +419,7 @@ static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
 /// get() - This returns a constant fp for the specified value in the
 /// specified type.  This should only be used for simple constant values like
 /// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant* ConstantFP::get(const Type* Ty, double V) {
+Constant *ConstantFP::get(const Type* Ty, double V) {
   LLVMContext &Context = Ty->getContext();
   
   APFloat FV(V);
@@ -373,13 +431,13 @@ Constant* ConstantFP::get(const Type* Ty, double V) {
   // For vectors, broadcast the value.
   if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
     return ConstantVector::get(
-      std::vector<Constant *>(VTy->getNumElements(), C));
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
 
   return C;
 }
 
 
-Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
+Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
   LLVMContext &Context = Ty->getContext();
 
   APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -388,7 +446,7 @@ Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
   // For vectors, broadcast the value.
   if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
     return ConstantVector::get(
-      std::vector<Constant *>(VTy->getNumElements(), C));
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
 
   return C; 
 }
@@ -402,12 +460,12 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
 }
 
 
-Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
+Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
   if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
     if (PTy->getElementType()->isFloatingPointTy()) {
-      std::vector<Constant*> zeros(PTy->getNumElements(),
+      SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
                            getNegativeZero(PTy->getElementType()));
-      return ConstantVector::get(PTy, zeros);
+      return ConstantVector::get(zeros);
     }
 
   if (Ty->isFloatingPointTy()) 
@@ -510,7 +568,7 @@ Constant *ConstantArray::get(const ArrayType *Ty,
 }
 
 
-Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
+Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
                              unsigned NumVals) {
   // FIXME: make this the primary ctor method.
   return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
@@ -522,7 +580,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
 /// Otherwise, the length parameter specifies how much of the string to use 
 /// and it won't be null terminated.
 ///
-Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
+Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
                              bool AddNull) {
   std::vector<Constant*> ElementVals;
   ElementVals.reserve(Str.size() + size_t(AddNull));
@@ -558,7 +616,7 @@ ConstantStruct::ConstantStruct(const StructType *T,
 }
 
 // ConstantStruct accessors.
-Constant* ConstantStruct::get(const StructType* T,
+Constant *ConstantStruct::get(const StructType* T,
                               const std::vector<Constant*>& V) {
   LLVMContextImpl* pImpl = T->getContext().pImpl;
   
@@ -570,7 +628,7 @@ Constant* ConstantStruct::get(const StructType* T,
   return ConstantAggregateZero::get(T);
 }
 
-Constant* ConstantStruct::get(LLVMContext &Context,
+Constant *ConstantStruct::get(LLVMContext &Context,
                               const std::vector<Constant*>& V, bool packed) {
   std::vector<const Type*> StructEls;
   StructEls.reserve(V.size());
@@ -579,8 +637,8 @@ Constant* ConstantStruct::get(LLVMContext &Context,
   return get(StructType::get(Context, StructEls, packed), V);
 }
 
-Constant* ConstantStruct::get(LLVMContext &Context,
-                              Constant* const *Vals, unsigned NumVals,
+Constant *ConstantStruct::get(LLVMContext &Context,
+                              Constant *const *Vals, unsigned NumVals,
                               bool Packed) {
   // FIXME: make this the primary ctor method.
   return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
@@ -592,23 +650,22 @@ ConstantVector::ConstantVector(const VectorType *T,
              OperandTraits<ConstantVector>::op_end(this) - V.size(),
              V.size()) {
   Use *OL = OperandList;
-    for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
-         I != E; ++I, ++OL) {
-      Constant *C = *I;
-      assert(C->getType() == T->getElementType() &&
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert(C->getType() == T->getElementType() &&
            "Initializer for vector element doesn't match vector element type!");
     *OL = C;
   }
 }
 
 // ConstantVector accessors.
-Constant* ConstantVector::get(const VectorType* T,
-                              const std::vector<Constant*>& V) {
-   assert(!V.empty() && "Vectors can't be empty");
-   LLVMContext &Context = T->getContext();
-   LLVMContextImpl *pImpl = Context.pImpl;
-   
-  // If this is an all-undef or alll-zero vector, return a
+Constant *ConstantVector::get(const VectorType *T,
+                              const std::vector<Constant*> &V) {
+  assert(!V.empty() && "Vectors can't be empty");
+  LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+  // If this is an all-undef or all-zero vector, return a
   // ConstantAggregateZero or UndefValue.
   Constant *C = V[0];
   bool isZero = C->isNullValue();
@@ -630,61 +687,10 @@ Constant* ConstantVector::get(const VectorType* T,
   return pImpl->VectorConstants.getOrCreate(T, V);
 }
 
-Constant* ConstantVector::get(const std::vector<Constant*>& V) {
-  assert(!V.empty() && "Cannot infer type if V is empty");
-  return get(VectorType::get(V.front()->getType(),V.size()), V);
-}
-
-Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
   // FIXME: make this the primary ctor method.
-  return get(std::vector<Constant*>(Vals, Vals+NumVals));
-}
-
-Constant* ConstantExpr::getNSWNeg(Constant* C) {
-  assert(C->getType()->isIntOrIntVectorTy() &&
-         "Cannot NEG a nonintegral value!");
-  return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
-}
-
-Constant* ConstantExpr::getNUWNeg(Constant* C) {
-  assert(C->getType()->isIntOrIntVectorTy() &&
-         "Cannot NEG a nonintegral value!");
-  return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
-}
-
-Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::Add, C1, C2,
-               OverflowingBinaryOperator::NoSignedWrap);
-}
-
-Constant* ConstantExpr::getNUWAdd(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::Add, C1, C2,
-               OverflowingBinaryOperator::NoUnsignedWrap);
-}
-
-Constant* ConstantExpr::getNSWSub(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::Sub, C1, C2,
-               OverflowingBinaryOperator::NoSignedWrap);
-}
-
-Constant* ConstantExpr::getNUWSub(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::Sub, C1, C2,
-               OverflowingBinaryOperator::NoUnsignedWrap);
-}
-
-Constant* ConstantExpr::getNSWMul(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::Mul, C1, C2,
-               OverflowingBinaryOperator::NoSignedWrap);
-}
-
-Constant* ConstantExpr::getNUWMul(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::Mul, C1, C2,
-               OverflowingBinaryOperator::NoUnsignedWrap);
-}
-
-Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) {
-  return getTy(C1->getType(), Instruction::SDiv, C1, C2,
-               SDivOperator::IsExact);
+  assert(!V.empty() && "Vectors cannot be empty");
+  return get(VectorType::get(V.front()->getType(), V.size()), V.vec());
 }
 
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
@@ -812,7 +818,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
 /// operands replaced with the specified values.  The specified operands must
 /// match count and type with the existing ones.
 Constant *ConstantExpr::
-getWithOperands(Constant* const *Ops, unsigned NumOps) const {
+getWithOperands(Constant *const *Ops, unsigned NumOps) const {
   assert(NumOps == getNumOperands() && "Operand count mismatch!");
   bool AnyChange = false;
   for (unsigned i = 0; i != NumOps; ++i) {
@@ -1034,7 +1040,7 @@ bool ConstantVector::isAllOnesValue() const {
 
 /// getSplatValue - If this is a splat constant, where all of the
 /// elements have the same value, return that value. Otherwise return null.
-Constant *ConstantVector::getSplatValue() {
+Constant *ConstantVector::getSplatValue() const {
   // Check out first element.
   Constant *Elt = getOperand(0);
   // Then make sure all remaining elements point to the same value.
@@ -1241,7 +1247,7 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
   if (SrcBits == DstBits)
     return C; // Avoid a useless cast
   Instruction::CastOps opcode =
-     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+    (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
   return getCast(opcode, C, Ty);
 }
 
@@ -1482,7 +1488,7 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
   return getTy(C1->getType(), Opcode, C1, C2, Flags);
 }
 
-Constant* ConstantExpr::getSizeOf(const Type* Ty) {
+Constant *ConstantExpr::getSizeOf(const Type* Ty) {
   // sizeof is implemented as: (i64) gep (Ty*)null, 1
   // Note that a non-inbounds gep is used, as null isn't within any object.
   Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -1492,7 +1498,7 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) {
                      Type::getInt64Ty(Ty->getContext()));
 }
 
-Constant* ConstantExpr::getAlignOf(const Type* Ty) {
+Constant *ConstantExpr::getAlignOf(const Type* Ty) {
   // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
   // Note that a non-inbounds gep is used, as null isn't within any object.
   const Type *AligningTy = StructType::get(Ty->getContext(),
@@ -1506,12 +1512,12 @@ Constant* ConstantExpr::getAlignOf(const Type* Ty) {
                      Type::getInt64Ty(Ty->getContext()));
 }
 
-Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
   return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
                                            FieldNo));
 }
 
-Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
   // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
   // Note that a non-inbounds gep is used, as null isn't within any object.
   Constant *GEPIdx[] = {
@@ -1547,44 +1553,17 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
+template<typename IndexTy>
 Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
-                                           Value* const *Idxs,
-                                           unsigned NumIdx) {
-  assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
-                                           Idxs+NumIdx) ==
-         cast<PointerType>(ReqTy)->getElementType() &&
-         "GEP indices invalid!");
-
-  if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/false,
-                                               (Constant**)Idxs, NumIdx))
-    return FC;          // Fold a few common cases...
-
-  assert(C->getType()->isPointerTy() &&
-         "Non-pointer type for constant GetElementPtr expression");
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec;
-  ArgVec.reserve(NumIdx+1);
-  ArgVec.push_back(C);
-  for (unsigned i = 0; i != NumIdx; ++i)
-    ArgVec.push_back(cast<Constant>(Idxs[i]));
-  const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
-
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
-Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
-                                                   Constant *C,
-                                                   Value *const *Idxs,
-                                                   unsigned NumIdx) {
+                                           IndexTy const *Idxs,
+                                           unsigned NumIdx, bool InBounds) {
   assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
                                            Idxs+NumIdx) ==
          cast<PointerType>(ReqTy)->getElementType() &&
          "GEP indices invalid!");
 
-  if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/true,
-                                               (Constant**)Idxs, NumIdx))
-    return FC;          // Fold a few common cases...
+  if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
+    return FC;          // Fold a few common cases.
 
   assert(C->getType()->isPointerTy() &&
          "Non-pointer type for constant GetElementPtr expression");
@@ -1595,42 +1574,31 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
   for (unsigned i = 0; i != NumIdx; ++i)
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
-                           GEPOperator::IsInBounds);
+                           InBounds ? GEPOperator::IsInBounds : 0);
 
   LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
-                                         unsigned NumIdx) {
+template<typename IndexTy>
+Constant *ConstantExpr::getGetElementPtrImpl(Constant *C, IndexTy const *Idxs,
+                                             unsigned NumIdx, bool InBounds) {
   // Get the result type of the getelementptr!
   const Type *Ty = 
     GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
   assert(Ty && "GEP indices invalid!");
   unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
-  return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
+  return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx,InBounds);
 }
 
-Constant *ConstantExpr::getInBoundsGetElementPtr(Constant *C,
-                                                 Value* const *Idxs,
-                                                 unsigned NumIdx) {
-  // Get the result type of the getelementptr!
-  const Type *Ty = 
-    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
-  assert(Ty && "GEP indices invalid!");
-  unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
-  return getInBoundsGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant* const *Idxs,
-                                         unsigned NumIdx) {
-  return getGetElementPtr(C, (Value* const *)Idxs, NumIdx);
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
+  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
 }
 
-Constant *ConstantExpr::getInBoundsGetElementPtr(Constant *C,
-                                                 Constant* const *Idxs,
-                                                 unsigned NumIdx) {
-  return getInBoundsGetElementPtr(C, (Value* const *)Idxs, NumIdx);
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant *const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
+  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
 }
 
 Constant *
@@ -1804,98 +1772,111 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
   return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
 }
 
-Constant* ConstantExpr::getNeg(Constant* C) {
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
   assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
-  return get(Instruction::Sub,
-             ConstantFP::getZeroValueForNegation(C->getType()),
-             C);
+  return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+                C, HasNUW, HasNSW);
 }
 
-Constant* ConstantExpr::getFNeg(Constant* C) {
+Constant *ConstantExpr::getFNeg(Constant *C) {
   assert(C->getType()->isFPOrFPVectorTy() &&
          "Cannot FNEG a non-floating-point value!");
-  return get(Instruction::FSub,
-             ConstantFP::getZeroValueForNegation(C->getType()),
-             C);
+  return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
 }
 
-Constant* ConstantExpr::getNot(Constant* C) {
+Constant *ConstantExpr::getNot(Constant *C) {
   assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NOT a nonintegral value!");
   return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
 }
 
-Constant* ConstantExpr::getAdd(Constant* C1, Constant* C2) {
-  return get(Instruction::Add, C1, C2);
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Add, C1, C2, Flags);
 }
 
-Constant* ConstantExpr::getFAdd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
   return get(Instruction::FAdd, C1, C2);
 }
 
-Constant* ConstantExpr::getSub(Constant* C1, Constant* C2) {
-  return get(Instruction::Sub, C1, C2);
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Sub, C1, C2, Flags);
 }
 
-Constant* ConstantExpr::getFSub(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
   return get(Instruction::FSub, C1, C2);
 }
 
-Constant* ConstantExpr::getMul(Constant* C1, Constant* C2) {
-  return get(Instruction::Mul, C1, C2);
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Mul, C1, C2, Flags);
 }
 
-Constant* ConstantExpr::getFMul(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
   return get(Instruction::FMul, C1, C2);
 }
 
-Constant* ConstantExpr::getUDiv(Constant* C1, Constant* C2) {
-  return get(Instruction::UDiv, C1, C2);
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::UDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
 }
 
-Constant* ConstantExpr::getSDiv(Constant* C1, Constant* C2) {
-  return get(Instruction::SDiv, C1, C2);
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::SDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
 }
 
-Constant* ConstantExpr::getFDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
   return get(Instruction::FDiv, C1, C2);
 }
 
-Constant* ConstantExpr::getURem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
   return get(Instruction::URem, C1, C2);
 }
 
-Constant* ConstantExpr::getSRem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
   return get(Instruction::SRem, C1, C2);
 }
 
-Constant* ConstantExpr::getFRem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
   return get(Instruction::FRem, C1, C2);
 }
 
-Constant* ConstantExpr::getAnd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
   return get(Instruction::And, C1, C2);
 }
 
-Constant* ConstantExpr::getOr(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
   return get(Instruction::Or, C1, C2);
 }
 
-Constant* ConstantExpr::getXor(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
   return get(Instruction::Xor, C1, C2);
 }
 
-Constant* ConstantExpr::getShl(Constant* C1, Constant* C2) {
-  return get(Instruction::Shl, C1, C2);
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Shl, C1, C2, Flags);
 }
 
-Constant* ConstantExpr::getLShr(Constant* C1, Constant* C2) {
-  return get(Instruction::LShr, C1, C2);
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::LShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
 }
 
-Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) {
-  return get(Instruction::AShr, C1, C2);
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::AShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
 }
 
 // destroyConstant - Remove the constant from the constant table...
@@ -2127,7 +2108,8 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
       Indices.push_back(Val);
     }
     Replacement = ConstantExpr::getGetElementPtr(Pointer,
-                                                 &Indices[0], Indices.size());
+                                                 &Indices[0], Indices.size(),
+                                         cast<GEPOperator>(this)->isInBounds());
   } else if (getOpcode() == Instruction::ExtractValue) {
     Constant *Agg = getOperand(0);
     if (Agg == From) Agg = To;
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 1c04c3e1987e..ffc673fac0da 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -239,54 +239,64 @@ struct CompareConstantExpr : public ConstantExpr {
 };
 
 template <>
-struct OperandTraits<UnaryConstantExpr> : public FixedNumOperandTraits<1> {
+struct OperandTraits<UnaryConstantExpr> :
+  public FixedNumOperandTraits<UnaryConstantExpr, 1> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
 
 template <>
-struct OperandTraits<BinaryConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<BinaryConstantExpr> :
+  public FixedNumOperandTraits<BinaryConstantExpr, 2> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
 
 template <>
-struct OperandTraits<SelectConstantExpr> : public FixedNumOperandTraits<3> {
+struct OperandTraits<SelectConstantExpr> :
+  public FixedNumOperandTraits<SelectConstantExpr, 3> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
 
 template <>
-struct OperandTraits<ExtractElementConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<ExtractElementConstantExpr> :
+  public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
 
 template <>
-struct OperandTraits<InsertElementConstantExpr> : public FixedNumOperandTraits<3> {
+struct OperandTraits<InsertElementConstantExpr> :
+  public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
 
 template <>
-struct OperandTraits<ShuffleVectorConstantExpr> : public FixedNumOperandTraits<3> {
+struct OperandTraits<ShuffleVectorConstantExpr> :
+    public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
 
 template <>
-struct OperandTraits<ExtractValueConstantExpr> : public FixedNumOperandTraits<1> {
+struct OperandTraits<ExtractValueConstantExpr> :
+  public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
 
 template <>
-struct OperandTraits<InsertValueConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<InsertValueConstantExpr> :
+  public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
 
 template <>
-struct OperandTraits<GetElementPtrConstantExpr> : public VariadicOperandTraits<1> {
+struct OperandTraits<GetElementPtrConstantExpr> :
+  public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
 };
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
 
 
 template <>
-struct OperandTraits<CompareConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<CompareConstantExpr> :
+  public FixedNumOperandTraits<CompareConstantExpr, 2> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
 
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 5aad19dd2a4a..35c3a2e92587 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the C bindings for libLLVMCore.a, which implements
-// the LLVM intermediate representation.
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
 //
 //===----------------------------------------------------------------------===//
 
@@ -28,12 +28,24 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
 
 using namespace llvm;
 
+void llvm::initializeCore(PassRegistry &Registry) {
+  initializeDominatorTreePass(Registry);
+  initializePrintModulePassPass(Registry);
+  initializePrintFunctionPassPass(Registry);
+  initializeVerifierPass(Registry);
+  initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+  initializeCore(*unwrap(R));
+}
 
 /*===-- Error handling ----------------------------------------------------===*/
 
@@ -116,6 +128,10 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
   return wrap(unwrap(M)->getTypeByName(Name));
 }
 
+const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty) {
+  return unwrap(M)->getTypeName(unwrap(Ty)).c_str();
+}
+
 void LLVMDumpModule(LLVMModuleRef M) {
   unwrap(M)->dump();
 }
@@ -126,6 +142,12 @@ void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
 }
 
 
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+  return wrap(&unwrap(M)->getContext());
+}
+
+
 /*===-- Operations on types -----------------------------------------------===*/
 
 /*--.. Operations on all types (mostly) ....................................--*/
@@ -164,6 +186,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
     return LLVMOpaqueTypeKind;
   case Type::VectorTyID:
     return LLVMVectorTypeKind;
+  case Type::X86_MMXTyID:
+    return LLVMX86_MMXTypeKind;
   }
 }
 
@@ -232,6 +256,9 @@ LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
 LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
   return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
 }
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
 
 LLVMTypeRef LLVMFloatType(void) {
   return LLVMFloatTypeInContext(LLVMGetGlobalContext());
@@ -248,6 +275,9 @@ LLVMTypeRef LLVMFP128Type(void) {
 LLVMTypeRef LLVMPPCFP128Type(void) {
   return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
 }
+LLVMTypeRef LLVMX86MMXType(void) {
+  return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
 
 /*--.. Operations on function types ........................................--*/
 
@@ -527,6 +557,14 @@ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
   return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
 }
 
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+                                              unsigned NumWords,
+                                              const uint64_t Words[]) {
+    IntegerType *Ty = unwrap<IntegerType>(IntTy);
+    return wrap(ConstantInt::get(Ty->getContext(),
+                                 APInt(Ty->getBitWidth(), NumWords, Words)));
+}
+
 LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
                                   uint8_t Radix) {
   return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
@@ -567,7 +605,7 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
                                       LLVMBool DontNullTerminate) {
   /* Inverted the sense of AddNull because ', 0)' is a
      better mnemonic for null termination than ', 1)'. */
-  return wrap(ConstantArray::get(*unwrap(C), std::string(Str, Length),
+  return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
                                  DontNullTerminate == 0));
 }
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
@@ -595,8 +633,8 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                                   Packed);
 }
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
-  return wrap(ConstantVector::get(
-                            unwrap<Constant>(ScalarConstantVals, Size), Size));
+  return wrap(ConstantVector::get(ArrayRef<Constant*>(
+                            unwrap<Constant>(ScalarConstantVals, Size), Size)));
 }
 /*--.. Constant expressions ................................................--*/
 
@@ -613,74 +651,62 @@ LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
 }
 
 LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNeg(
-                                   unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
 }
 
 LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNSWNeg(
-                                      unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
 }
 
 LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNUWNeg(
-                                      unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
 }
 
 
 LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getFNeg(
-                                    unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
 }
 
 LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNot(
-                                   unwrap<Constant>(ConstantVal)));
+  return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
 }
 
 LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAdd(
-                                   unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
                              LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNSWAdd(
-                                      unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
                                       unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
                              LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNUWAdd(
-                                      unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
                                       unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFAdd(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSub(
-                                   unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
                              LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNSWSub(
-                                      unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
                                       unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
                              LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNUWSub(
-                                      unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
                                       unwrap<Constant>(RHSConstant)));
 }
 
@@ -690,89 +716,75 @@ LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
 }
 
 LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getMul(
-                                   unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
                              LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNSWMul(
-                                      unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
                                       unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
                              LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNUWMul(
-                                      unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
                                       unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFMul(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getUDiv(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSDiv(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
                                 LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getExactSDiv(
-                                         unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
                                          unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFDiv(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getURem(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSRem(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFRem(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAnd(
-                                   unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getOr(
-                                  unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
                                   unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getXor(
-                                   unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
                                    unwrap<Constant>(RHSConstant)));
 }
 
@@ -791,27 +803,23 @@ LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
 }
 
 LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getShl(
-                                  unwrap<Constant>(LHSConstant),
-                                  unwrap<Constant>(RHSConstant)));
+  return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getLShr(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAShr(
-                                    unwrap<Constant>(LHSConstant),
+  return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
 }
 
 LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices) {
-  return wrap(ConstantExpr::getGetElementPtr(
-                                             unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
                                              unwrap<Constant>(ConstantIndices, 
                                                               NumIndices),
                                              NumIndices));
@@ -826,38 +834,32 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
 }
 
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getTrunc(
-                                     unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
                                      unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getSExt(
-                                    unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
                                     unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getZExt(
-                                    unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
                                     unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPTrunc(
-                                       unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
                                        unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPExtend(
-                                        unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
                                         unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getUIToFP(
-                                      unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
                                       unwrap(ToType)));
 }
 
@@ -872,92 +874,78 @@ LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
 }
 
 LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPToSI(
-                                      unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
                                       unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getPtrToInt(
-                                        unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
                                         unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getIntToPtr(
-                                        unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
                                         unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getBitCast(
-                                       unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
                                        unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
                                     LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getZExtOrBitCast(
-                                             unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
                                              unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
                                     LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getSExtOrBitCast(
-                                             unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
                                              unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
                                      LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getTruncOrBitCast(
-                                              unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
                                               unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
                                   LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getPointerCast(
-                                           unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
                                            unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
                               LLVMBool isSigned) {
-  return wrap(ConstantExpr::getIntegerCast(
-                                           unwrap<Constant>(ConstantVal),
-                                           unwrap(ToType),
-                                           isSigned));
+  return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType), isSigned));
 }
 
 LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPCast(
-                                      unwrap<Constant>(ConstantVal),
+  return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
                                       unwrap(ToType)));
 }
 
 LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
                              LLVMValueRef ConstantIfTrue,
                              LLVMValueRef ConstantIfFalse) {
-  return wrap(ConstantExpr::getSelect(
-                                      unwrap<Constant>(ConstantCondition),
+  return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
                                       unwrap<Constant>(ConstantIfTrue),
                                       unwrap<Constant>(ConstantIfFalse)));
 }
 
 LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
                                      LLVMValueRef IndexConstant) {
-  return wrap(ConstantExpr::getExtractElement(
-                                              unwrap<Constant>(VectorConstant),
+  return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
                                               unwrap<Constant>(IndexConstant)));
 }
 
 LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
                                     LLVMValueRef ElementValueConstant,
                                     LLVMValueRef IndexConstant) {
-  return wrap(ConstantExpr::getInsertElement(
-                                         unwrap<Constant>(VectorConstant),
+  return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
                                          unwrap<Constant>(ElementValueConstant),
                                              unwrap<Constant>(IndexConstant)));
 }
@@ -965,24 +953,21 @@ LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
 LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
                                     LLVMValueRef VectorBConstant,
                                     LLVMValueRef MaskConstant) {
-  return wrap(ConstantExpr::getShuffleVector(
-                                             unwrap<Constant>(VectorAConstant),
+  return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
                                              unwrap<Constant>(VectorBConstant),
                                              unwrap<Constant>(MaskConstant)));
 }
 
 LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
                                    unsigned NumIdx) {
-  return wrap(ConstantExpr::getExtractValue(
-                                            unwrap<Constant>(AggConstant),
+  return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
                                             IdxList, NumIdx));
 }
 
 LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
                                   LLVMValueRef ElementValueConstant,
                                   unsigned *IdxList, unsigned NumIdx) {
-  return wrap(ConstantExpr::getInsertValue(
-                                         unwrap<Constant>(AggConstant),
+  return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
                                          unwrap<Constant>(ElementValueConstant),
                                            IdxList, NumIdx));
 }
@@ -2186,25 +2171,27 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
     LLVMMemoryBufferRef *OutMemBuf,
     char **OutMessage) {
 
-  std::string Error;
-  if (MemoryBuffer *MB = MemoryBuffer::getFile(Path, &Error)) {
-    *OutMemBuf = wrap(MB);
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+    *OutMemBuf = wrap(MB.take());
     return 0;
   }
-  
-  *OutMessage = strdup(Error.c_str());
+
+  *OutMessage = strdup(ec.message().c_str());
   return 1;
 }
 
 LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
                                          char **OutMessage) {
-  std::string Error;
-  if (MemoryBuffer *MB = MemoryBuffer::getSTDIN(&Error)) {
-    *OutMemBuf = wrap(MB);
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+    *OutMemBuf = wrap(MB.take());
     return 0;
   }
 
-  *OutMessage = strdup(Error.c_str());
+  *OutMessage = strdup(ec.message().c_str());
   return 1;
 }
 
@@ -2212,6 +2199,11 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
   delete unwrap(MemBuf);
 }
 
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+  return wrap(PassRegistry::getPassRegistry());
+}
 
 /*===-- Pass Manager ------------------------------------------------------===*/
 
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index f3dad824461d..c374b067d72c 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -19,10 +19,10 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Instructions.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/CommandLine.h"
@@ -44,7 +44,7 @@ VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
 //===----------------------------------------------------------------------===//
 //
 // Provide public access to DominatorTree information.  Implementation details
-// can be found in DominatorCalculation.h.
+// can be found in DominatorInternals.h.
 //
 //===----------------------------------------------------------------------===//
 
@@ -53,7 +53,7 @@ TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
 
 char DominatorTree::ID = 0;
 INITIALIZE_PASS(DominatorTree, "domtree",
-                "Dominator Tree Construction", true, true);
+                "Dominator Tree Construction", true, true)
 
 bool DominatorTree::runOnFunction(Function &F) {
   DT->recalculate(F);
@@ -67,7 +67,14 @@ void DominatorTree::verifyAnalysis() const {
 
   DominatorTree OtherDT;
   OtherDT.getBase().recalculate(F);
-  assert(!compare(OtherDT) && "Invalid DominatorTree info!");
+  if (compare(OtherDT)) {
+    errs() << "DominatorTree is not up to date!  Computed:\n";
+    print(errs());
+    
+    errs() << "\nActual:\n";
+    OtherDT.print(errs());
+    abort();
+  }
 }
 
 void DominatorTree::print(raw_ostream &OS, const Module *) const {
@@ -98,263 +105,3 @@ bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
   
   return &*I == A;
 }
-
-
-
-//===----------------------------------------------------------------------===//
-//  DominanceFrontier Implementation
-//===----------------------------------------------------------------------===//
-
-char DominanceFrontier::ID = 0;
-INITIALIZE_PASS(DominanceFrontier, "domfrontier",
-                "Dominance Frontier Construction", true, true);
-
-void DominanceFrontier::verifyAnalysis() const {
-  if (!VerifyDomInfo) return;
-
-  DominatorTree &DT = getAnalysis<DominatorTree>();
-
-  DominanceFrontier OtherDF;
-  const std::vector<BasicBlock*> &DTRoots = DT.getRoots();
-  OtherDF.calculate(DT, DT.getNode(DTRoots[0]));
-  assert(!compare(OtherDF) && "Invalid DominanceFrontier info!");
-}
-
-// NewBB is split and now it has one successor. Update dominance frontier to
-// reflect this change.
-void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
-  assert(NewBB->getTerminator()->getNumSuccessors() == 1 &&
-         "NewBB should have a single successor!");
-  BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0);
-
-  // NewBBSucc inherits original NewBB frontier.
-  DominanceFrontier::iterator NewBBI = find(NewBB);
-  if (NewBBI != end())
-    addBasicBlock(NewBBSucc, NewBBI->second);
-
-  // If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the
-  // DF(NewBBSucc) without the stuff that the new block does not dominate
-  // a predecessor of.
-  DominatorTree &DT = getAnalysis<DominatorTree>();
-  DomTreeNode *NewBBNode = DT.getNode(NewBB);
-  DomTreeNode *NewBBSuccNode = DT.getNode(NewBBSucc);
-  if (DT.dominates(NewBBNode, NewBBSuccNode)) {
-    DominanceFrontier::iterator DFI = find(NewBBSucc);
-    if (DFI != end()) {
-      DominanceFrontier::DomSetType Set = DFI->second;
-      // Filter out stuff in Set that we do not dominate a predecessor of.
-      for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
-             E = Set.end(); SetI != E;) {
-        bool DominatesPred = false;
-        for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
-             PI != E; ++PI)
-          if (DT.dominates(NewBBNode, DT.getNode(*PI))) {
-            DominatesPred = true;
-            break;
-          }
-        if (!DominatesPred)
-          Set.erase(SetI++);
-        else
-          ++SetI;
-      }
-
-      if (NewBBI != end()) {
-        for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
-               E = Set.end(); SetI != E; ++SetI) {
-          BasicBlock *SB = *SetI;
-          addToFrontier(NewBBI, SB);
-        }
-      } else 
-        addBasicBlock(NewBB, Set);
-    }
-    
-  } else {
-    // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate
-    // NewBBSucc, but it does dominate itself (and there is an edge (NewBB ->
-    // NewBBSucc)).  NewBBSucc is the single successor of NewBB.
-    DominanceFrontier::DomSetType NewDFSet;
-    NewDFSet.insert(NewBBSucc);
-    addBasicBlock(NewBB, NewDFSet);
-  }
-
-  // Now update dominance frontiers which either used to contain NewBBSucc
-  // or which now need to include NewBB.
-
-  // Collect the set of blocks which dominate a predecessor of NewBB or
-  // NewSuccBB and which don't dominate both. This is an initial
-  // approximation of the blocks whose dominance frontiers will need updates.
-  SmallVector<DomTreeNode *, 16> AllPredDoms;
-
-  // Compute the block which dominates both NewBBSucc and NewBB. This is
-  // the immediate dominator of NewBBSucc unless NewBB dominates NewBBSucc.
-  // The code below which climbs dominator trees will stop at this point,
-  // because from this point up, dominance frontiers are unaffected.
-  DomTreeNode *DominatesBoth = 0;
-  if (NewBBSuccNode) {
-    DominatesBoth = NewBBSuccNode->getIDom();
-    if (DominatesBoth == NewBBNode)
-      DominatesBoth = NewBBNode->getIDom();
-  }
-
-  // Collect the set of all blocks which dominate a predecessor of NewBB.
-  SmallPtrSet<DomTreeNode *, 8> NewBBPredDoms;
-  for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI)
-    for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
-      if (DTN == DominatesBoth)
-        break;
-      if (!NewBBPredDoms.insert(DTN))
-        break;
-      AllPredDoms.push_back(DTN);
-    }
-
-  // Collect the set of all blocks which dominate a predecessor of NewSuccBB.
-  SmallPtrSet<DomTreeNode *, 8> NewBBSuccPredDoms;
-  for (pred_iterator PI = pred_begin(NewBBSucc),
-       E = pred_end(NewBBSucc); PI != E; ++PI)
-    for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
-      if (DTN == DominatesBoth)
-        break;
-      if (!NewBBSuccPredDoms.insert(DTN))
-        break;
-      if (!NewBBPredDoms.count(DTN))
-        AllPredDoms.push_back(DTN);
-    }
-
-  // Visit all relevant dominance frontiers and make any needed updates.
-  for (SmallVectorImpl<DomTreeNode *>::const_iterator I = AllPredDoms.begin(),
-       E = AllPredDoms.end(); I != E; ++I) {
-    DomTreeNode *DTN = *I;
-    iterator DFI = find((*I)->getBlock());
-
-    // Only consider nodes that have NewBBSucc in their dominator frontier.
-    if (DFI == end() || !DFI->second.count(NewBBSucc)) continue;
-
-    // If the block dominates a predecessor of NewBB but does not properly
-    // dominate NewBB itself, add NewBB to its dominance frontier.
-    if (NewBBPredDoms.count(DTN) &&
-        !DT.properlyDominates(DTN, NewBBNode))
-      addToFrontier(DFI, NewBB);
-
-    // If the block does not dominate a predecessor of NewBBSucc or
-    // properly dominates NewBBSucc itself, remove NewBBSucc from its
-    // dominance frontier.
-    if (!NewBBSuccPredDoms.count(DTN) ||
-        DT.properlyDominates(DTN, NewBBSuccNode))
-      removeFromFrontier(DFI, NewBBSucc);
-  }
-}
-
-namespace {
-  class DFCalculateWorkObject {
-  public:
-    DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, 
-                          const DomTreeNode *N,
-                          const DomTreeNode *PN)
-    : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
-    BasicBlock *currentBB;
-    BasicBlock *parentBB;
-    const DomTreeNode *Node;
-    const DomTreeNode *parentNode;
-  };
-}
-
-const DominanceFrontier::DomSetType &
-DominanceFrontier::calculate(const DominatorTree &DT,
-                             const DomTreeNode *Node) {
-  BasicBlock *BB = Node->getBlock();
-  DomSetType *Result = NULL;
-
-  std::vector<DFCalculateWorkObject> workList;
-  SmallPtrSet<BasicBlock *, 32> visited;
-
-  workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
-  do {
-    DFCalculateWorkObject *currentW = &workList.back();
-    assert (currentW && "Missing work object.");
-
-    BasicBlock *currentBB = currentW->currentBB;
-    BasicBlock *parentBB = currentW->parentBB;
-    const DomTreeNode *currentNode = currentW->Node;
-    const DomTreeNode *parentNode = currentW->parentNode;
-    assert (currentBB && "Invalid work object. Missing current Basic Block");
-    assert (currentNode && "Invalid work object. Missing current Node");
-    DomSetType &S = Frontiers[currentBB];
-
-    // Visit each block only once.
-    if (visited.count(currentBB) == 0) {
-      visited.insert(currentBB);
-
-      // Loop over CFG successors to calculate DFlocal[currentNode]
-      for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
-           SI != SE; ++SI) {
-        // Does Node immediately dominate this successor?
-        if (DT[*SI]->getIDom() != currentNode)
-          S.insert(*SI);
-      }
-    }
-
-    // At this point, S is DFlocal.  Now we union in DFup's of our children...
-    // Loop through and visit the nodes that Node immediately dominates (Node's
-    // children in the IDomTree)
-    bool visitChild = false;
-    for (DomTreeNode::const_iterator NI = currentNode->begin(), 
-           NE = currentNode->end(); NI != NE; ++NI) {
-      DomTreeNode *IDominee = *NI;
-      BasicBlock *childBB = IDominee->getBlock();
-      if (visited.count(childBB) == 0) {
-        workList.push_back(DFCalculateWorkObject(childBB, currentBB,
-                                                 IDominee, currentNode));
-        visitChild = true;
-      }
-    }
-
-    // If all children are visited or there is any child then pop this block
-    // from the workList.
-    if (!visitChild) {
-
-      if (!parentBB) {
-        Result = &S;
-        break;
-      }
-
-      DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
-      DomSetType &parentSet = Frontiers[parentBB];
-      for (; CDFI != CDFE; ++CDFI) {
-        if (!DT.properlyDominates(parentNode, DT[*CDFI]))
-          parentSet.insert(*CDFI);
-      }
-      workList.pop_back();
-    }
-
-  } while (!workList.empty());
-
-  return *Result;
-}
-
-void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
-  for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    OS << "  DomFrontier for BB ";
-    if (I->first)
-      WriteAsOperand(OS, I->first, false);
-    else
-      OS << " <<exit node>>";
-    OS << " is:\t";
-    
-    const std::set<BasicBlock*> &BBs = I->second;
-    
-    for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
-         I != E; ++I) {
-      OS << ' ';
-      if (*I)
-        WriteAsOperand(OS, *I, false);
-      else
-        OS << "<<exit node>>";
-    }
-    OS << "\n";
-  }
-}
-
-void DominanceFrontierBase::dump() const {
-  print(dbgs());
-}
-
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 8f94efc6673a..00d1d7873247 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/StringPool.h"
-#include "llvm/System/RWMutex.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Threading.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
@@ -227,19 +227,10 @@ void Function::dropAllReferences() {
   for (iterator I = begin(), E = end(); I != E; ++I)
     I->dropAllReferences();
   
-  // Delete all basic blocks.
-  while (!BasicBlocks.empty()) {
-    // If there is still a reference to the block, it must be a 'blockaddress'
-    // constant pointing to it.  Just replace the BlockAddress with undef.
-    BasicBlock *BB = BasicBlocks.begin();
-    if (!BB->use_empty()) {
-      BlockAddress *BA = cast<BlockAddress>(BB->use_back());
-      BA->replaceAllUsesWith(UndefValue::get(BA->getType()));
-      BA->destroyConstant();
-    }
-    
-    BB->eraseFromParent();
-  }
+  // Delete all basic blocks. They are now unused, except possibly by
+  // blockaddresses, but BasicBlock's destructor takes care of those.
+  while (!BasicBlocks.empty())
+    BasicBlocks.begin()->eraseFromParent();
 }
 
 void Function::addAttribute(unsigned i, Attributes attr) {
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 96716eeb349b..60000ad1b50e 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -26,23 +26,6 @@ using namespace llvm;
 //                            GlobalValue Class
 //===----------------------------------------------------------------------===//
 
-/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
-/// it.  This involves recursively eliminating any dead users of the
-/// constantexpr.
-static bool removeDeadUsersOfConstant(const Constant *C) {
-  if (isa<GlobalValue>(C)) return false; // Cannot remove this
-
-  while (!C->use_empty()) {
-    const Constant *User = dyn_cast<Constant>(C->use_back());
-    if (!User) return false; // Non-constant usage;
-    if (!removeDeadUsersOfConstant(User))
-      return false; // Constant wasn't dead
-  }
-
-  const_cast<Constant*>(C)->destroyConstant();
-  return true;
-}
-
 bool GlobalValue::isMaterializable() const {
   return getParent() && getParent()->isMaterializable(this);
 }
@@ -56,38 +39,6 @@ void GlobalValue::Dematerialize() {
   getParent()->Dematerialize(this);
 }
 
-/// removeDeadConstantUsers - If there are any dead constant users dangling
-/// off of this global value, remove them.  This method is useful for clients
-/// that want to check to see if a global is unused, but don't want to deal
-/// with potentially dead constants hanging off of the globals.
-void GlobalValue::removeDeadConstantUsers() const {
-  Value::const_use_iterator I = use_begin(), E = use_end();
-  Value::const_use_iterator LastNonDeadUser = E;
-  while (I != E) {
-    if (const Constant *User = dyn_cast<Constant>(*I)) {
-      if (!removeDeadUsersOfConstant(User)) {
-        // If the constant wasn't dead, remember that this was the last live use
-        // and move on to the next constant.
-        LastNonDeadUser = I;
-        ++I;
-      } else {
-        // If the constant was dead, then the iterator is invalidated.
-        if (LastNonDeadUser == E) {
-          I = use_begin();
-          if (I == E) break;
-        } else {
-          I = LastNonDeadUser;
-          ++I;
-        }
-      }
-    } else {
-      LastNonDeadUser = I;
-      ++I;
-    }
-  }
-}
-
-
 /// Override destroyConstant to make sure it doesn't get called on
 /// GlobalValue's because they shouldn't be treated like other constants.
 void GlobalValue::destroyConstant() {
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index c1b783c75210..595dea470bc3 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/LLVMContext.h"
 using namespace llvm;
 
@@ -36,3 +37,83 @@ const Type *IRBuilderBase::getCurrentFunctionReturnType() const {
   assert(BB && BB->getParent() && "No current function!");
   return BB->getParent()->getReturnType();
 }
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+  const PointerType *PT = cast<PointerType>(Ptr->getType());
+  if (PT->getElementType()->isIntegerTy(8))
+    return Ptr;
+  
+  // Otherwise, we need to insert a bitcast.
+  PT = getInt8PtrTy(PT->getAddressSpace());
+  BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+  BB->getInstList().insert(InsertPt, BCI);
+  SetInstDebugLocation(BCI);
+  return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, Value *const* Ops,
+                                  unsigned NumOps, IRBuilderBase *Builder) {
+  CallInst *CI = CallInst::Create(Callee, Ops, Ops + NumOps, "");
+  Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+  Builder->SetInstDebugLocation(CI);
+  return CI;  
+}
+
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Ptr = getCastedInt8PtrValue(Ptr);
+  Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+  const Type *Tys[] = { Ptr->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys, 3);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+              bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+  
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys, 3);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 69f713b2c42c..e4f99f09a5c2 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -47,26 +47,54 @@ InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
 }
 
 void InlineAsm::destroyConstant() {
+  getRawType()->getContext().pImpl->InlineAsms.remove(this);
   delete this;
 }
 
 const FunctionType *InlineAsm::getFunctionType() const {
   return cast<FunctionType>(getType()->getElementType());
 }
+    
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+  Type(isInput), isEarlyClobber(false),
+  MatchingInput(-1), isCommutative(false),
+  isIndirect(false), isMultipleAlternative(false),
+  currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+  Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+  MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+  isIndirect(other.isIndirect), Codes(other.Codes),
+  isMultipleAlternative(other.isMultipleAlternative),
+  multipleAlternatives(other.multipleAlternatives),
+  currentAlternativeIndex(other.currentAlternativeIndex) {
+}
 
 /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
 /// fields in this structure.  If the constraint string is not understood,
 /// return true, otherwise return false.
 bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
-                     std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) {
+                     InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
   StringRef::iterator I = Str.begin(), E = Str.end();
+  unsigned multipleAlternativeCount = Str.count('|') + 1;
+  unsigned multipleAlternativeIndex = 0;
+  ConstraintCodeVector *pCodes = &Codes;
   
   // Initialize
+  isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+  if (isMultipleAlternative) {
+    multipleAlternatives.resize(multipleAlternativeCount);
+    pCodes = &multipleAlternatives[0].Codes;
+  }
   Type = isInput;
   isEarlyClobber = false;
   MatchingInput = -1;
   isCommutative = false;
   isIndirect = false;
+  currentAlternativeIndex = 0;
   
   // Parse prefixes.
   if (*I == '~') {
@@ -120,15 +148,15 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
       // Find the end of the register name.
       StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
       if (ConstraintEnd == E) return true;  // "{foo"
-      Codes.push_back(std::string(I, ConstraintEnd+1));
+      pCodes->push_back(std::string(I, ConstraintEnd+1));
       I = ConstraintEnd+1;
     } else if (isdigit(*I)) {     // Matching Constraint
       // Maximal munch numbers.
       StringRef::iterator NumStart = I;
       while (I != E && isdigit(*I))
         ++I;
-      Codes.push_back(std::string(NumStart, I));
-      unsigned N = atoi(Codes.back().c_str());
+      pCodes->push_back(std::string(NumStart, I));
+      unsigned N = atoi(pCodes->back().c_str());
       // Check that this is a valid matching constraint!
       if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
           Type != isInput)
@@ -136,14 +164,26 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
       
       // If Operand N already has a matching input, reject this.  An output
       // can't be constrained to the same value as multiple inputs.
-      if (ConstraintsSoFar[N].hasMatchingInput())
-        return true;
-      
-      // Note that operand #n has a matching input.
-      ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+      if (isMultipleAlternative) {
+        InlineAsm::SubConstraintInfo &scInfo =
+          ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+        if (scInfo.MatchingInput != -1)
+          return true;
+        // Note that operand #n has a matching input.
+        scInfo.MatchingInput = ConstraintsSoFar.size();
+      } else {
+        if (ConstraintsSoFar[N].hasMatchingInput())
+          return true;
+        // Note that operand #n has a matching input.
+        ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+        }
+    } else if (*I == '|') {
+      multipleAlternativeIndex++;
+      pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+      ++I;
     } else {
       // Single letter constraint.
-      Codes.push_back(std::string(I, I+1));
+      pCodes->push_back(std::string(I, I+1));
       ++I;
     }
   }
@@ -151,9 +191,21 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
   return false;
 }
 
-std::vector<InlineAsm::ConstraintInfo>
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+  if (index < multipleAlternatives.size()) {
+    currentAlternativeIndex = index;
+    InlineAsm::SubConstraintInfo &scInfo =
+      multipleAlternatives[currentAlternativeIndex];
+    MatchingInput = scInfo.MatchingInput;
+    Codes = scInfo.Codes;
+  }
+}
+
+InlineAsm::ConstraintInfoVector
 InlineAsm::ParseConstraints(StringRef Constraints) {
-  std::vector<ConstraintInfo> Result;
+  ConstraintInfoVector Result;
   
   // Scan the constraints string.
   for (StringRef::iterator I = Constraints.begin(),
@@ -183,13 +235,12 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
   return Result;
 }
 
-
 /// Verify - Verify that the specified constraint string is reasonable for the
 /// specified function type, and otherwise validate the constraint string.
 bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
   if (Ty->isVarArg()) return false;
   
-  std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr);
+  ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
   
   // Error parsing constraints.
   if (Constraints.empty() && !ConstStr.empty()) return false;
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 05bed4c64316..2c8b8b23b18e 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -200,12 +200,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
   if (const CallInst *CI = dyn_cast<CallInst>(this))
     return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
            CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
-           CI->getAttributes().getRawPointer() ==
-             cast<CallInst>(I)->getAttributes().getRawPointer();
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
     return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
-           CI->getAttributes().getRawPointer() ==
-             cast<InvokeInst>(I)->getAttributes().getRawPointer();
+           CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
     if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
       return false;
@@ -253,12 +251,11 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
   if (const CallInst *CI = dyn_cast<CallInst>(this))
     return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
            CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
-           CI->getAttributes().getRawPointer() ==
-             cast<CallInst>(I)->getAttributes().getRawPointer();
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
     return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
-           CI->getAttributes().getRawPointer() ==
-             cast<InvokeInst>(I)->getAttributes().getRawPointer();
+           CI->getAttributes() ==
+             cast<InvokeInst>(I)->getAttributes();
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
     if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
       return false;
@@ -348,7 +345,7 @@ bool Instruction::mayThrow() const {
 ///
 /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
 ///
-bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
+bool Instruction::isAssociative(unsigned Opcode) {
   return Opcode == And || Opcode == Or || Opcode == Xor ||
          Opcode == Add || Opcode == Mul;
 }
@@ -398,25 +395,10 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
     return Op && !Op->isNullValue() && !Op->isAllOnesValue();
   }
   case Load: {
-    if (cast<LoadInst>(this)->isVolatile())
+    const LoadInst *LI = cast<LoadInst>(this);
+    if (LI->isVolatile())
       return false;
-    // Note that it is not safe to speculate into a malloc'd region because
-    // malloc may return null.
-    // It's also not safe to follow a bitcast, for example:
-    //   bitcast i8* (alloca i8) to i32*
-    // would result in a 4-byte load from a 1-byte alloca.
-    Value *Op0 = getOperand(0);
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) {
-      // TODO: it's safe to do this for any GEP with constant indices that
-      // compute inside the allocated type, but not for any inbounds gep.
-      if (GEP->hasAllZeroIndices())
-        Op0 = GEP->getPointerOperand();
-    }
-    if (isa<AllocaInst>(Op0))
-      return true;
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0)))
-      return !GV->hasExternalWeakLinkage();
-    return false;
+    return LI->getPointerOperand()->isDereferenceablePointer();
   }
   case Call:
     return false; // The called function could have undefined behavior or
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 401802ed13d5..d1290281cb1a 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
@@ -97,8 +96,7 @@ PHINode::PHINode(const PHINode &PN)
 }
 
 PHINode::~PHINode() {
-  if (OperandList)
-    dropHungoffUses(OperandList);
+  dropHungoffUses();
 }
 
 // removeIncomingValue - Remove an incoming value.  This is useful if a
@@ -159,66 +157,18 @@ void PHINode::resizeOperands(unsigned NumOps) {
   Use *NewOps = allocHungoffUses(NumOps);
   std::copy(OldOps, OldOps + e, NewOps);
   OperandList = NewOps;
-  if (OldOps) Use::zap(OldOps, OldOps + e, true);
+  Use::zap(OldOps, OldOps + e, true);
 }
 
 /// hasConstantValue - If the specified PHI node always merges together the same
 /// value, return the value, otherwise return null.
-///
-/// If the PHI has undef operands, but all the rest of the operands are
-/// some unique value, return that value if it can be proved that the
-/// value dominates the PHI. If DT is null, use a conservative check,
-/// otherwise use DT to test for dominance.
-///
-Value *PHINode::hasConstantValue(DominatorTree *DT) const {
-  // If the PHI node only has one incoming value, eliminate the PHI node.
-  if (getNumIncomingValues() == 1) {
-    if (getIncomingValue(0) != this)   // not  X = phi X
-      return getIncomingValue(0);
-    return UndefValue::get(getType());  // Self cycle is dead.
-  }
-      
-  // Otherwise if all of the incoming values are the same for the PHI, replace
-  // the PHI node with the incoming value.
-  //
-  Value *InVal = 0;
-  bool HasUndefInput = false;
-  for (unsigned i = 0, e = getNumIncomingValues(); i != e; ++i)
-    if (isa<UndefValue>(getIncomingValue(i))) {
-      HasUndefInput = true;
-    } else if (getIncomingValue(i) != this) { // Not the PHI node itself...
-      if (InVal && getIncomingValue(i) != InVal)
-        return 0;  // Not the same, bail out.
-      InVal = getIncomingValue(i);
-    }
-  
-  // The only case that could cause InVal to be null is if we have a PHI node
-  // that only has entries for itself.  In this case, there is no entry into the
-  // loop, so kill the PHI.
-  //
-  if (InVal == 0) InVal = UndefValue::get(getType());
-  
-  // If we have a PHI node like phi(X, undef, X), where X is defined by some
-  // instruction, we cannot always return X as the result of the PHI node.  Only
-  // do this if X is not an instruction (thus it must dominate the PHI block),
-  // or if the client is prepared to deal with this possibility.
-  if (!HasUndefInput || !isa<Instruction>(InVal))
-    return InVal;
-  
-  Instruction *IV = cast<Instruction>(InVal);
-  if (DT) {
-    // We have a DominatorTree. Do a precise test.
-    if (!DT->dominates(IV, this))
-      return 0;
-  } else {
-    // If it is in the entry block, it obviously dominates everything.
-    if (IV->getParent() != &IV->getParent()->getParent()->getEntryBlock() ||
-        isa<InvokeInst>(IV))
-      return 0;   // Cannot guarantee that InVal dominates this PHINode.
-  }
-
-  // All of the incoming values are the same, return the value now.
-  return InVal;
+Value *PHINode::hasConstantValue() const {
+  // Exploit the fact that phi nodes always have at least one entry.
+  Value *ConstantValue = getIncomingValue(0);
+  for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+    if (getIncomingValue(i) != ConstantValue)
+      return 0; // Incoming values not all the same.
+  return ConstantValue;
 }
 
 
@@ -235,7 +185,7 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  FTy = FTy;  // silence warning.
+  (void)FTy;  // silence warning.
 
   assert((NumParams == FTy->getNumParams() ||
           (FTy->isVarArg() && NumParams > FTy->getNumParams())) &&
@@ -256,7 +206,7 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  FTy = FTy;  // silence warning.
+  (void)FTy;  // silence warning.
 
   assert((FTy->getNumParams() == 2 ||
           (FTy->isVarArg() && FTy->getNumParams() < 2)) &&
@@ -276,7 +226,7 @@ void CallInst::init(Value *Func, Value *Actual) {
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  FTy = FTy;  // silence warning.
+  (void)FTy;  // silence warning.
 
   assert((FTy->getNumParams() == 1 ||
           (FTy->isVarArg() && FTy->getNumParams() == 0)) &&
@@ -292,7 +242,7 @@ void CallInst::init(Value *Func) {
 
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  FTy = FTy;  // silence warning.
+  (void)FTy;  // silence warning.
 
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
 }
@@ -549,7 +499,7 @@ void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
   Op<-1>() = IfException;
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
-  FTy = FTy;  // silence warning.
+  (void)FTy;  // silence warning.
 
   assert(((NumArgs == FTy->getNumParams()) ||
           (FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
@@ -779,31 +729,6 @@ BranchInst::BranchInst(const BranchInst &BI) :
   SubclassOptionalData = BI.SubclassOptionalData;
 }
 
-
-Use* Use::getPrefix() {
-  PointerIntPair<Use**, 2, PrevPtrTag> &PotentialPrefix(this[-1].Prev);
-  if (PotentialPrefix.getOpaqueValue())
-    return 0;
-
-  return reinterpret_cast<Use*>((char*)&PotentialPrefix + 1);
-}
-
-BranchInst::~BranchInst() {
-  if (NumOperands == 1) {
-    if (Use *Prefix = OperandList->getPrefix()) {
-      Op<-1>() = 0;
-      //
-      // mark OperandList to have a special value for scrutiny
-      // by baseclass destructors and operator delete
-      OperandList = Prefix;
-    } else {
-      NumOperands = 3;
-      OperandList = op_begin();
-    }
-  }
-}
-
-
 BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
   return getSuccessor(idx);
 }
@@ -899,7 +824,7 @@ void AllocaInst::setAlignment(unsigned Align) {
 
 bool AllocaInst::isArrayAllocation() const {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
-    return CI->getZExtValue() != 1;
+    return !CI->isOne();
   return true;
 }
 
@@ -1247,6 +1172,12 @@ const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                              Constant* const *Idxs,
+                                              unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
 const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
                                               uint64_t const *Idxs,
                                               unsigned NumIdx) {
@@ -1473,6 +1404,8 @@ int ShuffleVectorInst::getMaskValue(unsigned i) const {
 void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, 
                            unsigned NumIdx, const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx, Idx + NumIdx) ==
+         Val->getType() && "Inserted value must match indexed type!");
   Op<0>() = Agg;
   Op<1>() = Val;
 
@@ -1483,6 +1416,8 @@ void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx,
 void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx, 
                            const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx) == Val->getType()
+         && "Inserted value must match indexed type!");
   Op<0>() = Agg;
   Op<1>() = Val;
 
@@ -1555,13 +1490,26 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
 const Type* ExtractValueInst::getIndexedType(const Type *Agg,
                                              const unsigned *Idxs,
                                              unsigned NumIdx) {
-  unsigned CurIdx = 0;
-  for (; CurIdx != NumIdx; ++CurIdx) {
-    const CompositeType *CT = dyn_cast<CompositeType>(Agg);
-    if (!CT || CT->isPointerTy() || CT->isVectorTy()) return 0;
+  for (unsigned CurIdx = 0; CurIdx != NumIdx; ++CurIdx) {
     unsigned Index = Idxs[CurIdx];
-    if (!CT->indexValid(Index)) return 0;
-    Agg = CT->getTypeAtIndex(Index);
+    // We can't use CompositeType::indexValid(Index) here.
+    // indexValid() always returns true for arrays because getelementptr allows
+    // out-of-bounds indices. Since we don't allow those for extractvalue and
+    // insertvalue we need to check array indexing manually.
+    // Since the only other types we can index into are struct types it's just
+    // as easy to check those manually as well.
+    if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+      if (Index >= AT->getNumElements())
+        return 0;
+    } else if (const StructType *ST = dyn_cast<StructType>(Agg)) {
+      if (Index >= ST->getNumElements())
+        return 0;
+    } else {
+      // Not a valid type to index into.
+      return 0;
+    }
+
+    Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
 
     // If the new type forwards to another type, then it is in the middle
     // of being refined to another type (and hence, may have dropped all
@@ -1570,7 +1518,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
     if (const Type *Ty = Agg->getForwardedType())
       Agg = Ty;
   }
-  return CurIdx == NumIdx ? Agg : 0;
+  return Agg;
 }
 
 const Type* ExtractValueInst::getIndexedType(const Type *Agg,
@@ -1611,7 +1559,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
 
 void BinaryOperator::init(BinaryOps iType) {
   Value *LHS = getOperand(0), *RHS = getOperand(1);
-  LHS = LHS; RHS = RHS; // Silence warnings.
+  (void)LHS; (void)RHS; // Silence warnings.
   assert(LHS->getType() == RHS->getType() &&
          "Binary operator operand types must match!");
 #ifndef NDEBUG
@@ -1874,7 +1822,7 @@ void BinaryOperator::setHasNoSignedWrap(bool b) {
 }
 
 void BinaryOperator::setIsExact(bool b) {
-  cast<SDivOperator>(this)->setIsExact(b);
+  cast<PossiblyExactOperator>(this)->setIsExact(b);
 }
 
 bool BinaryOperator::hasNoUnsignedWrap() const {
@@ -1886,7 +1834,7 @@ bool BinaryOperator::hasNoSignedWrap() const {
 }
 
 bool BinaryOperator::isExact() const {
-  return cast<SDivOperator>(this)->isExact();
+  return cast<PossiblyExactOperator>(this)->isExact();
 }
 
 //===----------------------------------------------------------------------===//
@@ -2360,6 +2308,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     } else {                                    // Casting from something else
       return false;
     }
+  } else if (DestTy->isX86_MMXTy()) {     
+    return SrcBits == 64;
   } else {                                      // Casting to something else
     return false;
   }
@@ -2441,6 +2391,10 @@ CastInst::getCastOpcode(
       return BitCast;                             // vector -> vector
     } else if (DestPTy->getBitWidth() == SrcBits) {
       return BitCast;                               // float/int -> vector
+    } else if (SrcTy->isX86_MMXTy()) {
+      assert(DestPTy->getBitWidth()==64 &&
+             "Casting X86_MMX to vector of wrong width");
+      return BitCast;                             // MMX to 64-bit vector
     } else {
       assert(!"Illegal cast to vector (wrong type or size)");
     }
@@ -2452,6 +2406,14 @@ CastInst::getCastOpcode(
     } else {
       assert(!"Casting pointer to other than pointer or int");
     }
+  } else if (DestTy->isX86_MMXTy()) {
+    if (isa<VectorType>(SrcTy)) {
+      assert(cast<VectorType>(SrcTy)->getBitWidth() == 64 &&
+             "Casting vector of wrong width to X86_MMX");
+      return BitCast;                               // 64-bit vector to MMX
+    } else {
+      assert(!"Illegal cast to X86_MMX");
+    }
   } else {
     assert(!"Casting to type that is not first-class");
   }
@@ -2754,14 +2716,14 @@ void CmpInst::swapOperands() {
     cast<FCmpInst>(this)->swapOperands();
 }
 
-bool CmpInst::isCommutative() {
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+bool CmpInst::isCommutative() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
     return IC->isCommutative();
   return cast<FCmpInst>(this)->isCommutative();
 }
 
-bool CmpInst::isEquality() {
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+bool CmpInst::isEquality() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
     return IC->isEquality();
   return cast<FCmpInst>(this)->isEquality();
 }
@@ -2974,9 +2936,9 @@ bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
 //                        SwitchInst Implementation
 //===----------------------------------------------------------------------===//
 
-void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumCases) {
-  assert(Value && Default);
-  ReservedSpace = 2+NumCases*2;
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+  assert(Value && Default && NumReserved);
+  ReservedSpace = NumReserved;
   NumOperands = 2;
   OperandList = allocHungoffUses(ReservedSpace);
 
@@ -2992,7 +2954,7 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
                        Instruction *InsertBefore)
   : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
                    0, 0, InsertBefore) {
-  init(Value, Default, NumCases);
+  init(Value, Default, 2+NumCases*2);
 }
 
 /// SwitchInst ctor - Create a new switch instruction, specifying a value to
@@ -3003,14 +2965,15 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
                        BasicBlock *InsertAtEnd)
   : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
                    0, 0, InsertAtEnd) {
-  init(Value, Default, NumCases);
+  init(Value, Default, 2+NumCases*2);
 }
 
 SwitchInst::SwitchInst(const SwitchInst &SI)
-  : TerminatorInst(Type::getVoidTy(SI.getContext()), Instruction::Switch,
-                   allocHungoffUses(SI.getNumOperands()), SI.getNumOperands()) {
+  : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+  init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+  NumOperands = SI.getNumOperands();
   Use *OL = OperandList, *InOL = SI.OperandList;
-  for (unsigned i = 0, E = SI.getNumOperands(); i != E; i+=2) {
+  for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
     OL[i] = InOL[i];
     OL[i+1] = InOL[i+1];
   }
@@ -3018,7 +2981,7 @@ SwitchInst::SwitchInst(const SwitchInst &SI)
 }
 
 SwitchInst::~SwitchInst() {
-  dropHungoffUses(OperandList);
+  dropHungoffUses();
 }
 
 
@@ -3046,14 +3009,10 @@ void SwitchInst::removeCase(unsigned idx) {
   unsigned NumOps = getNumOperands();
   Use *OL = OperandList;
 
-  // Move everything after this operand down.
-  //
-  // FIXME: we could just swap with the end of the list, then erase.  However,
-  // client might not expect this to happen.  The code as it is thrashes the
-  // use/def lists, which is kinda lame.
-  for (unsigned i = (idx+1)*2; i != NumOps; i += 2) {
-    OL[i-2] = OL[i];
-    OL[i-2+1] = OL[i+1];
+  // Overwrite this case with the end of the list.
+  if ((idx + 1) * 2 != NumOps) {
+    OL[idx * 2] = OL[NumOps - 2];
+    OL[idx * 2 + 1] = OL[NumOps - 1];
   }
 
   // Nuke the last value.
@@ -3089,7 +3048,7 @@ void SwitchInst::resizeOperands(unsigned NumOps) {
       NewOps[i] = OldOps[i];
   }
   OperandList = NewOps;
-  if (OldOps) Use::zap(OldOps, OldOps + e, true);
+  Use::zap(OldOps, OldOps + e, true);
 }
 
 
@@ -3104,7 +3063,7 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 }
 
 //===----------------------------------------------------------------------===//
-//                        SwitchInst Implementation
+//                        IndirectBrInst Implementation
 //===----------------------------------------------------------------------===//
 
 void IndirectBrInst::init(Value *Address, unsigned NumDests) {
@@ -3144,7 +3103,7 @@ void IndirectBrInst::resizeOperands(unsigned NumOps) {
   for (unsigned i = 0; i != e; ++i)
     NewOps[i] = OldOps[i];
   OperandList = NewOps;
-  if (OldOps) Use::zap(OldOps, OldOps + e, true);
+  Use::zap(OldOps, OldOps + e, true);
 }
 
 IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
@@ -3172,7 +3131,7 @@ IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
 }
 
 IndirectBrInst::~IndirectBrInst() {
-  dropHungoffUses(OperandList);
+  dropHungoffUses();
 }
 
 /// addDestination - Add a destination.
@@ -3346,8 +3305,7 @@ ReturnInst *ReturnInst::clone_impl() const {
 }
 
 BranchInst *BranchInst::clone_impl() const {
-  unsigned Ops(getNumOperands());
-  return new(Ops, Ops == 1) BranchInst(*this);
+  return new(getNumOperands()) BranchInst(*this);
 }
 
 SwitchInst *SwitchInst::clone_impl() const {
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 563c651315a3..1bd497d05d4e 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/SourceMgr.h"
 #include "LLVMContextImpl.h"
+#include <cctype>
 using namespace llvm;
 
 static ManagedStatic<LLVMContext> GlobalContext;
@@ -28,25 +29,42 @@ LLVMContext& llvm::getGlobalContext() {
 }
 
 LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
-  // Create the first metadata kind, which is always 'dbg'.
+  // Create the fixed metadata kinds. This is done in the same order as the
+  // MD_* enum values so that they correspond.
+
+  // Create the 'dbg' metadata kind.
   unsigned DbgID = getMDKindID("dbg");
   assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+  // Create the 'tbaa' metadata kind.
+  unsigned TBAAID = getMDKindID("tbaa");
+  assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
+void LLVMContext::addModule(Module *M) {
+  pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+  pImpl->OwnedModules.erase(M);
+}
+
 //===----------------------------------------------------------------------===//
 // Recoverable Backend Errors
 //===----------------------------------------------------------------------===//
 
-void LLVMContext::setInlineAsmDiagnosticHandler(void *DiagHandler, 
-                                                void *DiagContext) {
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                              void *DiagContext) {
   pImpl->InlineAsmDiagHandler = DiagHandler;
   pImpl->InlineAsmDiagContext = DiagContext;
 }
 
 /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
 /// setInlineAsmDiagnosticHandler.
-void *LLVMContext::getInlineAsmDiagnosticHandler() const {
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
   return pImpl->InlineAsmDiagHandler;
 }
 
@@ -76,13 +94,11 @@ void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
     errs() << "error: " << ErrorStr << "\n";
     exit(1);
   }
-  
+
   // If we do have an error handler, we can report the error and keep going.
   SMDiagnostic Diag("", "error: " + ErrorStr.str());
-  
-  ((SourceMgr::DiagHandlerTy)(intptr_t)pImpl->InlineAsmDiagHandler)
-      (Diag, pImpl->InlineAsmDiagContext, LocCookie);
-  
+
+  pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
 }
 
 //===----------------------------------------------------------------------===//
@@ -94,13 +110,13 @@ void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
 static bool isValidName(StringRef MDName) {
   if (MDName.empty())
     return false;
-  
-  if (!isalpha(MDName[0]))
+
+  if (!std::isalpha(MDName[0]))
     return false;
-  
+
   for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
        ++I) {
-    if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
+    if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
       return false;
   }
   return true;
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 93a075f0fccb..ccb8dc500fcd 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMContextImpl.h"
+#include "llvm/Module.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -25,6 +26,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
     X86_FP80Ty(C, Type::X86_FP80TyID),
     FP128Ty(C, Type::FP128TyID),
     PPC_FP128Ty(C, Type::PPC_FP128TyID),
+    X86_MMXTy(C, Type::X86_MMXTyID),
     Int1Ty(C, 1),
     Int8Ty(C, 8),
     Int16Ty(C, 16),
@@ -51,6 +53,15 @@ struct DropReferences {
 }
 
 LLVMContextImpl::~LLVMContextImpl() {
+  // NOTE: We need to delete the contents of OwnedModules, but we have to
+  // duplicate it into a temporary vector, because the destructor of Module
+  // will try to remove itself from OwnedModules set.  This would cause
+  // iterator invalidation if we iterated on the set directly.
+  std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+  for (std::vector<Module*>::iterator I = Modules.begin(), E = Modules.end();
+       I != E; ++I)
+    delete *I;
+  
   std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
                 DropReferences());
   std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
@@ -90,7 +101,7 @@ LLVMContextImpl::~LLVMContextImpl() {
     MDNodes.push_back(&*I);
   }
   MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
-  for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(),
+  for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
          E = MDNodes.end(); I != E; ++I) {
     (*I)->destroy();
   }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 51b2992898c0..23971aafa74d 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -115,7 +115,12 @@ public:
   
 class LLVMContextImpl {
 public:
-  void *InlineAsmDiagHandler, *InlineAsmDiagContext;
+  /// OwnedModules - The set of modules instantiated in this context, and which
+  /// will be automatically deleted if this context is deleted.
+  SmallPtrSet<Module*, 4> OwnedModules;
+  
+  LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+  void *InlineAsmDiagContext;
   
   typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
                          DenseMapAPIntKeyInfo> IntMapTy;
@@ -170,6 +175,7 @@ public:
   const Type X86_FP80Ty;
   const Type FP128Ty;
   const Type PPC_FP128Ty;
+  const Type X86_MMXTy;
   const IntegerType Int1Ty;
   const IntegerType Int8Ty;
   const IntegerType Int16Ty;
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index a44f61d822ee..f6651e93e273 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -16,8 +16,8 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
 #include "llvm/Value.h"
 using namespace llvm;
 
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index da69c43ff735..0b8e8dfa8b36 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -339,17 +339,14 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
 
   // Now that the node is out of the folding set, get ready to reinsert it.
   // First, check to see if another node with the same operands already exists
-  // in the set.  If it doesn't exist, this returns the position to insert it.
+  // in the set.  If so, then this node is redundant.
   FoldingSetNodeID ID;
   Profile(ID);
   void *InsertPoint;
-  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
-
-  if (N) {
-    N->replaceAllUsesWith(this);
-    N->destroy();
-    N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
-    assert(N == 0 && "shouldn't be in the map now!"); (void)N;
+  if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+    replaceAllUsesWith(N);
+    destroy();
+    return;
   }
 
   // InsertPoint will have been set by the FindNodeOrInsertPos call.
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index d7ddf96cb070..341e527acb5b 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -62,9 +62,11 @@ Module::Module(StringRef MID, LLVMContext& C)
   ValSymTab = new ValueSymbolTable();
   TypeSymTab = new TypeSymbolTable();
   NamedMDSymTab = new StringMap<NamedMDNode *>();
+  Context.addModule(this);
 }
 
 Module::~Module() {
+  Context.removeModule(this);
   dropAllReferences();
   GlobalList.clear();
   FunctionList.clear();
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index a7d7f61dd762..9afc54063321 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -213,7 +213,6 @@ RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
                                                          *this, isDefault);
 }
 
-
 //===----------------------------------------------------------------------===//
 // PassRegistrationListener implementation
 //
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index ab4d4e55c750..8bfef9855ca2 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -24,7 +24,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include <algorithm>
 #include <cstdio>
 #include <map>
@@ -497,9 +497,14 @@ PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
 }
 
 /// Set pass P as the last user of the given analysis passes.
-void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
-                                    Pass *P) {
-  for (SmallVector<Pass *, 12>::iterator I = AnalysisPasses.begin(),
+void
+PMTopLevelManager::setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses,
+                               Pass *P) {
+  unsigned PDepth = 0;
+  if (P->getResolver())
+    PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+  for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
          E = AnalysisPasses.end(); I != E; ++I) {
     Pass *AP = *I;
     LastUser[AP] = P;
@@ -507,20 +512,47 @@ void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
     if (P == AP)
       continue;
 
+    // Update the last users of passes that are required transitive by AP.
+    AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+    const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+    SmallVector<Pass *, 12> LastUses;
+    SmallVector<Pass *, 12> LastPMUses;
+    for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      assert(AnalysisPass && "Expected analysis pass to exist.");
+      AnalysisResolver *AR = AnalysisPass->getResolver();
+      assert(AR && "Expected analysis resolver to exist.");
+      unsigned APDepth = AR->getPMDataManager().getDepth();
+
+      if (PDepth == APDepth)
+        LastUses.push_back(AnalysisPass);
+      else if (PDepth > APDepth)
+        LastPMUses.push_back(AnalysisPass);
+    }
+
+    setLastUser(LastUses, P);
+
+    // If this pass has a corresponding pass manager, push higher level
+    // analysis to this pass manager.
+    if (P->getResolver())
+      setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
     // If AP is the last user of other passes then make P last user of
     // such passes.
     for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
            LUE = LastUser.end(); LUI != LUE; ++LUI) {
       if (LUI->second == AP)
         // DenseMap iterator is not invalidated here because
-        // this is just updating exisitng entry.
+        // this is just updating existing entries.
         LastUser[LUI->first] = P;
     }
   }
 }
 
 /// Collect passes whose last user is P
-void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
                                         Pass *P) {
   DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
     InversedLastUser.find(P);
@@ -612,41 +644,40 @@ void PMTopLevelManager::schedulePass(Pass *P) {
 /// then return NULL.
 Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
 
-  Pass *P = NULL;
   // Check pass managers
-  for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
-         E = PassManagers.end(); P == NULL && I != E; ++I) {
-    PMDataManager *PMD = *I;
-    P = PMD->findAnalysisPass(AID, false);
-  }
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
 
   // Check other pass managers
-  for (SmallVector<PMDataManager *, 8>::iterator
+  for (SmallVectorImpl<PMDataManager *>::iterator
          I = IndirectPassManagers.begin(),
-         E = IndirectPassManagers.end(); P == NULL && I != E; ++I)
-    P = (*I)->findAnalysisPass(AID, false);
-
-  for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(),
-         E = ImmutablePasses.end(); P == NULL && I != E; ++I) {
+         E = IndirectPassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check the immutable passes. Iterate in reverse order so that we find
+  // the most recently registered passes first.
+  for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+       ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
     AnalysisID PI = (*I)->getPassID();
     if (PI == AID)
-      P = *I;
+      return *I;
 
     // If Pass not found then check the interfaces implemented by Immutable Pass
-    if (!P) {
-      const PassInfo *PassInf =
-        PassRegistry::getPassRegistry()->getPassInfo(PI);
-      const std::vector<const PassInfo*> &ImmPI =
-        PassInf->getInterfacesImplemented();
-      for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
-           EE = ImmPI.end(); II != EE; ++II) {
-        if ((*II)->getTypeInfo() == AID)
-          P = *I;
-      }
+    const PassInfo *PassInf =
+      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    const std::vector<const PassInfo*> &ImmPI =
+      PassInf->getInterfacesImplemented();
+    for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+         EE = ImmPI.end(); II != EE; ++II) {
+      if ((*II)->getTypeInfo() == AID)
+        return *I;
     }
   }
 
-  return P;
+  return 0;
 }
 
 // Print passes managed by this top level manager.
@@ -675,6 +706,12 @@ void PMTopLevelManager::dumpArguments() const {
     return;
 
   dbgs() << "Pass Arguments: ";
+  for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+       ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    if (const PassInfo *PI =
+          PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+      if (!PI->isAnalysisGroup())
+        dbgs() << " -" << PI->getPassArgument();
   for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
          E = PassManagers.end(); I != E; ++I)
     (*I)->dumpPassArguments();
@@ -682,12 +719,12 @@ void PMTopLevelManager::dumpArguments() const {
 }
 
 void PMTopLevelManager::initializeAllAnalysisInfo() {
-  for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
          E = PassManagers.end(); I != E; ++I)
     (*I)->initializeAnalysisInfo();
 
   // Initailize other pass managers
-  for (SmallVector<PMDataManager *, 8>::iterator
+  for (SmallVectorImpl<PMDataManager *>::iterator
        I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
        I != E; ++I)
     (*I)->initializeAnalysisInfo();
@@ -708,11 +745,11 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
 
 /// Destructor
 PMTopLevelManager::~PMTopLevelManager() {
-  for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
          E = PassManagers.end(); I != E; ++I)
     delete *I;
 
-  for (SmallVector<ImmutablePass *, 8>::iterator
+  for (SmallVectorImpl<ImmutablePass *>::iterator
          I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
     delete *I;
 
@@ -749,7 +786,7 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
     return true;
 
   const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
-  for (SmallVector<Pass *, 8>::iterator I = HigherLevelAnalysis.begin(),
+  for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
          E = HigherLevelAnalysis.end(); I  != E; ++I) {
     Pass *P1 = *I;
     if (P1->getAsImmutablePass() == 0 &&
@@ -849,7 +886,7 @@ void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
     dbgs() << " Free these instances\n";
   }
 
-  for (SmallVector<Pass *, 12>::iterator I = DeadPasses.begin(),
+  for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
          E = DeadPasses.end(); I != E; ++I)
     freePass(*I, Msg, DBG_STR);
 }
@@ -910,7 +947,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
 
   collectRequiredAnalysis(RequiredPasses,
                           ReqAnalysisNotAvailable, P);
-  for (SmallVector<Pass *, 8>::iterator I = RequiredPasses.begin(),
+  for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
          E = RequiredPasses.end(); I != E; ++I) {
     Pass *PRequired = *I;
     unsigned RDepth = 0;
@@ -944,7 +981,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
   }
 
   // Now, take care of required analyses that are not available.
-  for (SmallVector<AnalysisID, 8>::iterator
+  for (SmallVectorImpl<AnalysisID>::iterator
          I = ReqAnalysisNotAvailable.begin(),
          E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
     const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
@@ -965,8 +1002,8 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
 /// Populate RP with analysis pass that are required by
 /// pass P and are available. Populate RP_NotAvail with analysis
 /// pass that are required by pass P but are not available.
-void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
-                                       SmallVector<AnalysisID, 8> &RP_NotAvail,
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+                                       SmallVectorImpl<AnalysisID> &RP_NotAvail,
                                             Pass *P) {
   AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
   const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
@@ -1038,7 +1075,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
 
   TPM->collectLastUses(LUses, P);
 
-  for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
+  for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
          E = LUses.end(); I != E; ++I) {
     llvm::dbgs() << "--" << std::string(Offset*2, ' ');
     (*I)->dumpPassStructure(0);
@@ -1046,7 +1083,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
 }
 
 void PMDataManager::dumpPassArguments() const {
-  for (SmallVector<Pass *, 8>::const_iterator I = PassVector.begin(),
+  for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
         E = PassVector.end(); I != E; ++I) {
     if (PMDataManager *PMD = (*I)->getAsPMDataManager())
       PMD->dumpPassArguments();
@@ -1087,6 +1124,9 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
   case ON_MODULE_MSG:
     dbgs() << "' on Module '"  << Msg << "'...\n";
     break;
+  case ON_REGION_MSG:
+    dbgs() << "' on Region '"  << Msg << "'...\n";
+    break;
   case ON_LOOP_MSG:
     dbgs() << "' on Loop '" << Msg << "'...\n";
     break;
@@ -1163,7 +1203,7 @@ Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
 
 // Destructor
 PMDataManager::~PMDataManager() {
-  for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(),
+  for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
          E = PassVector.end(); I != E; ++I)
     delete *I;
 }
@@ -1563,7 +1603,7 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
   FPP->add(RequiredPass);
 
   // Register P as the last user of RequiredPass.
-  SmallVector<Pass *, 12> LU;
+  SmallVector<Pass *, 1> LU;
   LU.push_back(RequiredPass);
   FPP->setLastUser(LU,  P);
 }
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
index 21dba56aad72..c97a170f501f 100644
--- a/lib/VMCore/PassRegistry.cpp
+++ b/lib/VMCore/PassRegistry.cpp
@@ -16,93 +16,125 @@
 #include "llvm/PassSupport.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
 
 using namespace llvm;
 
-static PassRegistry *PassRegistryObj = 0;
-PassRegistry *PassRegistry::getPassRegistry() {
-  // Use double-checked locking to safely initialize the registrar when
-  // we're running in multithreaded mode.
-  PassRegistry* tmp = PassRegistryObj;
-  if (llvm_is_multithreaded()) {
-    sys::MemoryFence();
-    if (!tmp) {
-      llvm_acquire_global_lock();
-      tmp = PassRegistryObj;
-      if (!tmp) {
-        tmp = new PassRegistry();
-        sys::MemoryFence();
-        PassRegistryObj = tmp;
-      }
-      llvm_release_global_lock();
-    }
-  } else if (!tmp) {
-    PassRegistryObj = new PassRegistry();
-  }
-  
-  return PassRegistryObj;
-}
-
-namespace {
-
-// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
 // Unfortunately, passes are registered with static ctors, and having
 // llvm_shutdown clear this map prevents successful ressurection after 
 // llvm_shutdown is run.  Ideally we should find a solution so that we don't
 // leak the map, AND can still resurrect after shutdown.
-void cleanupPassRegistry(void*) {
-  if (PassRegistryObj) {
-    delete PassRegistryObj;
-    PassRegistryObj = 0;
-  }
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+  return &*PassRegistryObj;
 }
-ManagedCleanup<&cleanupPassRegistry> registryCleanup ATTRIBUTE_USED;
 
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+  /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+  typedef DenseMap<const void*, const PassInfo*> MapType;
+  MapType PassInfoMap;
+  
+  typedef StringMap<const PassInfo*> StringMapType;
+  StringMapType PassInfoStringMap;
+  
+  /// AnalysisGroupInfo - Keep track of information for each analysis group.
+  struct AnalysisGroupInfo {
+    SmallPtrSet<const PassInfo *, 8> Implementations;
+  };
+  DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+  
+  std::vector<const PassInfo*> ToFree;
+  std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+  if (!pImpl)
+    pImpl = new PassRegistryImpl();
+  return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+  
+  for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+       E = Impl->ToFree.end(); I != E; ++I)
+    delete *I;
+  
+  delete Impl;
+  pImpl = 0;
 }
 
 const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
-  sys::SmartScopedLock<true> Guard(Lock);
-  MapType::const_iterator I = PassInfoMap.find(TI);
-  return I != PassInfoMap.end() ? I->second : 0;
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+  return I != Impl->PassInfoMap.end() ? I->second : 0;
 }
 
 const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
-  sys::SmartScopedLock<true> Guard(Lock);
-  StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
-  return I != PassInfoStringMap.end() ? I->second : 0;
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::StringMapType::const_iterator
+    I = Impl->PassInfoStringMap.find(Arg);
+  return I != Impl->PassInfoStringMap.end() ? I->second : 0;
 }
 
 //===----------------------------------------------------------------------===//
 // Pass Registration mechanism
 //
 
-void PassRegistry::registerPass(const PassInfo &PI) {
-  sys::SmartScopedLock<true> Guard(Lock);
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
   bool Inserted =
-    PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
-  assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
-  PassInfoStringMap[PI.getPassArgument()] = &PI;
+    Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+  assert(Inserted && "Pass registered multiple times!");
+  (void)Inserted;
+  Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
   
   // Notify any listeners.
   for (std::vector<PassRegistrationListener*>::iterator
-       I = Listeners.begin(), E = Listeners.end(); I != E; ++I)
+       I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
     (*I)->passRegistered(&PI);
+  
+  if (ShouldFree) Impl->ToFree.push_back(&PI);
 }
 
 void PassRegistry::unregisterPass(const PassInfo &PI) {
-  sys::SmartScopedLock<true> Guard(Lock);
-  MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
-  assert(I != PassInfoMap.end() && "Pass registered but not in map!");
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::iterator I = 
+    Impl->PassInfoMap.find(PI.getTypeInfo());
+  assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
   
   // Remove pass from the map.
-  PassInfoMap.erase(I);
-  PassInfoStringMap.erase(PI.getPassArgument());
+  Impl->PassInfoMap.erase(I);
+  Impl->PassInfoStringMap.erase(PI.getPassArgument());
 }
 
 void PassRegistry::enumerateWith(PassRegistrationListener *L) {
-  sys::SmartScopedLock<true> Guard(Lock);
-  for (MapType::const_iterator I = PassInfoMap.begin(),
-       E = PassInfoMap.end(); I != E; ++I)
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+       E = Impl->PassInfoMap.end(); I != E; ++I)
     L->passEnumerate(I->second);
 }
 
@@ -111,7 +143,8 @@ void PassRegistry::enumerateWith(PassRegistrationListener *L) {
 void PassRegistry::registerAnalysisGroup(const void *InterfaceID, 
                                          const void *PassID,
                                          PassInfo& Registeree,
-                                         bool isDefault) {
+                                         bool isDefault,
+                                         bool ShouldFree) {
   PassInfo *InterfaceInfo =  const_cast<PassInfo*>(getPassInfo(InterfaceID));
   if (InterfaceInfo == 0) {
     // First reference to Interface, register it now.
@@ -126,12 +159,15 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
     assert(ImplementationInfo &&
            "Must register pass before adding to AnalysisGroup!");
 
+    sys::SmartScopedLock<true> Guard(*Lock);
+    
     // Make sure we keep track of the fact that the implementation implements
     // the interface.
     ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
 
-    sys::SmartScopedLock<true> Guard(Lock);
-    AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
+    PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+    PassRegistryImpl::AnalysisGroupInfo &AGI =
+      Impl->AnalysisGroupInfoMap[InterfaceInfo];
     assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
            "Cannot add a pass to the same analysis group more than once!");
     AGI.Implementations.insert(ImplementationInfo);
@@ -143,17 +179,30 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
       InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
     }
   }
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  if (ShouldFree) Impl->ToFree.push_back(&Registeree);
 }
 
 void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
-  sys::SmartScopedLock<true> Guard(Lock);
-  Listeners.push_back(L);
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  Impl->Listeners.push_back(L);
 }
 
 void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
-  sys::SmartScopedLock<true> Guard(Lock);
+  sys::SmartScopedLock<true> Guard(*Lock);
+  
+  // NOTE: This is necessary, because removeRegistrationListener() can be called
+  // as part of the llvm_shutdown sequence.  Since we have no control over the
+  // order of that sequence, we need to gracefully handle the case where the
+  // PassRegistry is destructed before the object that triggers this call.
+  if (!pImpl) return;
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
   std::vector<PassRegistrationListener*>::iterator I =
-    std::find(Listeners.begin(), Listeners.end(), L);
-  assert(I != Listeners.end() && "PassRegistrationListener not registered!");
-  Listeners.erase(I);
+    std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+  assert(I != Impl->Listeners.end() &&
+         "PassRegistrationListener not registered!");
+  Impl->Listeners.erase(I);
 }
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index 2ee49d235963..1f1fbc91bc31 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -78,10 +78,10 @@ namespace {
 
 char PrintModulePass::ID = 0;
 INITIALIZE_PASS(PrintModulePass, "print-module",
-                "Print module to stderr", false, false);
+                "Print module to stderr", false, false)
 char PrintFunctionPass::ID = 0;
 INITIALIZE_PASS(PrintFunctionPass, "print-function",
-                "Print function to stderr", false, false);
+                "Print function to stderr", false, false)
 
 /// createPrintModulePass - Create and return a pass that writes the
 /// module to the specified raw_ostream.
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index c55e6267836a..be28ad1f7122 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -27,7 +27,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Threading.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
@@ -109,6 +109,7 @@ const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   case PPC_FP128TyID : return getPPC_FP128Ty(C);
   case LabelTyID     : return getLabelTy(C);
   case MetadataTyID  : return getMetadataTy(C);
+  case X86_MMXTyID   : return getX86_MMXTy(C);
   default:
     return 0;
   }
@@ -172,10 +173,20 @@ bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
     return false;
 
   // Vector -> Vector conversions are always lossless if the two vector types
-  // have the same size, otherwise not.
-  if (const VectorType *thisPTy = dyn_cast<VectorType>(this))
+  // have the same size, otherwise not.  Also, 64-bit vector types can be
+  // converted to x86mmx.
+  if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
     if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
       return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+    if (Ty->getTypeID() == Type::X86_MMXTyID &&
+        thisPTy->getBitWidth() == 64)
+      return true;
+  }
+
+  if (this->getTypeID() == Type::X86_MMXTyID)
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      if (thatPTy->getBitWidth() == 64)
+        return true;
 
   // At this point we have only various mismatches of the first class types
   // remaining and ptr->ptr. Just select the lossless conversions. Everything
@@ -192,6 +203,7 @@ unsigned Type::getPrimitiveSizeInBits() const {
   case Type::X86_FP80TyID: return 80;
   case Type::FP128TyID: return 128;
   case Type::PPC_FP128TyID: return 128;
+  case Type::X86_MMXTyID: return 64;
   case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
   case Type::VectorTyID:  return cast<VectorType>(this)->getBitWidth();
   default: return 0;
@@ -354,6 +366,10 @@ const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
   return &C.pImpl->PPC_FP128Ty;
 }
 
+const Type *Type::getX86_MMXTy(LLVMContext &C) {
+  return &C.pImpl->X86_MMXTy;
+}
+
 const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
   return IntegerType::get(C, N);
 }
@@ -398,6 +414,10 @@ const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
   return getPPC_FP128Ty(C)->getPointerTo(AS);
 }
 
+const PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
 const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
   return getIntNTy(C, N)->getPointerTo(AS);
 }
@@ -1083,7 +1103,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   while (!AbstractTypeUsers.empty() && NewTy != this) {
     AbstractTypeUser *User = AbstractTypeUsers.back();
 
-    unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
+    unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
 #ifdef DEBUG_MERGE_TYPES
     DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User
                  << "] of abstract type [" << (void*)this << " "
@@ -1110,7 +1130,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
   DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
 #endif
 
-  unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
+  unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
   while (!AbstractTypeUsers.empty()) {
     AbstractTypeUser *ATU = AbstractTypeUsers.back();
     ATU->typeBecameConcrete(this);
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 5a90917977b0..4694486c41b6 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -317,7 +317,7 @@ public:
     // The old record is now out-of-date, because one of the children has been
     // updated.  Remove the obsolete entry from the map.
     unsigned NumErased = Map.erase(ValType::get(Ty));
-    assert(NumErased && "Element not found!"); NumErased = NumErased;
+    assert(NumErased && "Element not found!"); (void)NumErased;
 
     // Remember the structural hash for the type before we start hacking on it,
     // in case we need it later.
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index fec710b39459..2258b8d985ae 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/User.h"
+#include "llvm/Value.h"
 
 namespace llvm {
 
@@ -85,7 +85,8 @@ const Use *Use::getImpliedUser() const {
 //                         Use initTags Implementation
 //===----------------------------------------------------------------------===//
 
-Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
+Use *Use::initTags(Use * const Start, Use *Stop) {
+  ptrdiff_t Done = 0;
   while (Done < 20) {
     if (Start == Stop--)
       return Start;
@@ -97,20 +98,18 @@ Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
                                          oneDigitTag, oneDigitTag, oneDigitTag,
                                          oneDigitTag, stopTag
                                        };
-    Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(tags[Done++]));
-    Stop->Val = 0;
+    new(Stop) Use(tags[Done++]);
   }
 
   ptrdiff_t Count = Done;
   while (Start != Stop) {
     --Stop;
-    Stop->Val = 0;
     if (!Count) {
-      Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(stopTag));
+      new(Stop) Use(stopTag);
       ++Done;
       Count = Done;
     } else {
-      Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Count & 1));
+      new(Stop) Use(PrevPtrTag(Count & 1));
       Count >>= 1;
       ++Done;
     }
@@ -124,123 +123,24 @@ Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
 //===----------------------------------------------------------------------===//
 
 void Use::zap(Use *Start, const Use *Stop, bool del) {
-  if (del) {
-    while (Start != Stop) {
-      (--Stop)->~Use();
-    }
+  while (Start != Stop)
+    (--Stop)->~Use();
+  if (del)
     ::operator delete(Start);
-    return;
-  }
-
-  while (Start != Stop) {
-    (Start++)->set(0);
-  }
 }
 
-//===----------------------------------------------------------------------===//
-//                         AugmentedUse layout struct
-//===----------------------------------------------------------------------===//
-
-struct AugmentedUse : public Use {
-  PointerIntPair<User*, 1, Tag> ref;
-  AugmentedUse(); // not implemented
-};
-
-
 //===----------------------------------------------------------------------===//
 //                         Use getUser Implementation
 //===----------------------------------------------------------------------===//
 
 User *Use::getUser() const {
   const Use *End = getImpliedUser();
-  const PointerIntPair<User*, 1, Tag>& ref(
-                                static_cast<const AugmentedUse*>(End - 1)->ref);
+  const PointerIntPair<User*, 1, unsigned>&
+    ref(static_cast<const AugmentedUse*>(End - 1)->ref);
   User *She = ref.getPointer();
   return ref.getInt()
     ? She
     : (User*)End;
 }
 
-//===----------------------------------------------------------------------===//
-//                         User allocHungoffUses Implementation
-//===----------------------------------------------------------------------===//
-
-Use *User::allocHungoffUses(unsigned N) const {
-  Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
-                                                + sizeof(AugmentedUse)
-                                                - sizeof(Use)));
-  Use *End = Begin + N;
-  PointerIntPair<User*, 1, Tag>& ref(static_cast<AugmentedUse&>(End[-1]).ref);
-  ref.setPointer(const_cast<User*>(this));
-  ref.setInt(tagOne);
-  return Use::initTags(Begin, End);
-}
-
-//===----------------------------------------------------------------------===//
-//                         User operator new Implementations
-//===----------------------------------------------------------------------===//
-
-void *User::operator new(size_t s, unsigned Us) {
-  void *Storage = ::operator new(s + sizeof(Use) * Us);
-  Use *Start = static_cast<Use*>(Storage);
-  Use *End = Start + Us;
-  User *Obj = reinterpret_cast<User*>(End);
-  Obj->OperandList = Start;
-  Obj->NumOperands = Us;
-  Use::initTags(Start, End);
-  return Obj;
-}
-
-/// Prefixed allocation - just before the first Use, allocate a NULL pointer.
-/// The destructor can detect its presence and readjust the OperandList
-/// for deletition.
-///
-void *User::operator new(size_t s, unsigned Us, bool Prefix) {
-  // currently prefixed allocation only admissible for
-  // unconditional branch instructions
-  if (!Prefix)
-    return operator new(s, Us);
-
-  assert(Us == 1 && "Other than one Use allocated?");
-  typedef PointerIntPair<void*, 2, Use::PrevPtrTag> TaggedPrefix;
-  void *Raw = ::operator new(s + sizeof(TaggedPrefix) + sizeof(Use) * Us);
-  TaggedPrefix *Pre = static_cast<TaggedPrefix*>(Raw);
-  Pre->setFromOpaqueValue(0);
-  void *Storage = Pre + 1; // skip over prefix
-  Use *Start = static_cast<Use*>(Storage);
-  Use *End = Start + Us;
-  User *Obj = reinterpret_cast<User*>(End);
-  Obj->OperandList = Start;
-  Obj->NumOperands = Us;
-  Use::initTags(Start, End);
-  return Obj;
-}
-
-//===----------------------------------------------------------------------===//
-//                         User operator delete Implementation
-//===----------------------------------------------------------------------===//
-
-void User::operator delete(void *Usr) {
-  User *Start = static_cast<User*>(Usr);
-  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
-  //
-  // look for a variadic User
-  if (Storage == Start->OperandList) {
-    ::operator delete(Storage);
-    return;
-  }
-  //
-  // check for the flag whether the destructor has detected a prefixed
-  // allocation, in which case we remove the flag and delete starting
-  // at OperandList
-  if (reinterpret_cast<intptr_t>(Start->OperandList) & 1) {
-    ::operator delete(reinterpret_cast<char*>(Start->OperandList) - 1);
-    return;
-  }
-  //
-  // in all other cases just delete the nullary User (covers hung-off
-  // uses also
-  ::operator delete(Usr);
-}
-
 } // End llvm namespace
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
new file mode 100644
index 000000000000..2f4587debb66
--- /dev/null
+++ b/lib/VMCore/User.cpp
@@ -0,0 +1,81 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constant.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                                 User Class
+//===----------------------------------------------------------------------===//
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+  if (From == To) return;   // Duh what?
+
+  assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+         "Cannot call User::replaceUsesOfWith on a constant!");
+
+  for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+    if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
+      // The side effects of this setOperand call include linking to
+      // "To", adding "this" to the uses list of To, and
+      // most importantly, removing "this" from the use list of "From".
+      setOperand(i, To); // Fix it now...
+    }
+}
+
+//===----------------------------------------------------------------------===//
+//                         User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+  Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
+                                                + sizeof(AugmentedUse)
+                                                - sizeof(Use)));
+  Use *End = Begin + N;
+  PointerIntPair<User*, 1, unsigned>&
+    ref(static_cast<AugmentedUse&>(End[-1]).ref);
+  ref.setPointer(const_cast<User*>(this));
+  ref.setInt(1);
+  return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+  void *Storage = ::operator new(s + sizeof(Use) * Us);
+  Use *Start = static_cast<Use*>(Storage);
+  Use *End = Start + Us;
+  User *Obj = reinterpret_cast<User*>(End);
+  Obj->OperandList = Start;
+  Obj->NumOperands = Us;
+  Use::initTags(Start, End);
+  return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+  User *Start = static_cast<User*>(Usr);
+  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+  // If there were hung-off uses, they will have been freed already and
+  // NumOperands reset to 0, so here we just free the User itself.
+  ::operator delete(Storage);
+}
+
+} // End llvm namespace
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index b8c677565467..29f6a8094f0b 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -254,7 +255,7 @@ void Value::takeName(Value *V) {
   // Get V's ST, this should always succed, because V has a name.
   ValueSymbolTable *VST;
   bool Failure = getSymTab(V, VST);
-  assert(!Failure && "V has a name, so it should have a ST!"); Failure=Failure;
+  assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
 
   // If these values are both in the same symtab, we can do this very fast.
   // This works even if both values have no symtab yet.
@@ -345,25 +346,62 @@ Value *Value::stripPointerCasts() {
   return V;
 }
 
-Value *Value::getUnderlyingObject(unsigned MaxLookup) {
-  if (!getType()->isPointerTy())
-    return this;
-  Value *V = this;
-  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
-      V = cast<Operator>(V)->getOperand(0);
-    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->mayBeOverridden())
-        return V;
-      V = GA->getAliasee();
-    } else {
-      return V;
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+  // Note that it is not safe to speculate into a malloc'd region because
+  // malloc may return null.
+  // It's also not always safe to follow a bitcast, for example:
+  //   bitcast i8* (alloca i8) to i32*
+  // would result in a 4-byte load from a 1-byte alloca. Some cases could
+  // be handled using TargetData to check sizes and alignments though.
+
+  // These are obviously ok.
+  if (isa<AllocaInst>(this)) return true;
+
+  // Global variables which can't collapse to null are ok.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return !GV->hasExternalWeakLinkage();
+
+  // byval arguments are ok.
+  if (const Argument *A = dyn_cast<Argument>(this))
+    return A->hasByValAttr();
+  
+  // For GEPs, determine if the indexing lands within the allocated object.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+    // Conservatively require that the base pointer be fully dereferenceable.
+    if (!GEP->getOperand(0)->isDereferenceablePointer())
+      return false;
+    // Check the indices.
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::const_op_iterator I = GEP->op_begin()+1,
+         E = GEP->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      const Type *Ty = *GTI++;
+      // Struct indices can't be out of bounds.
+      if (isa<StructType>(Ty))
+        continue;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+      if (!CI)
+        return false;
+      // Zero is always ok.
+      if (CI->isZero())
+        continue;
+      // Check to see that it's within the bounds of an array.
+      const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+      if (!ATy)
+        return false;
+      if (CI->getValue().getActiveBits() > 64)
+        return false;
+      if (CI->getZExtValue() >= ATy->getNumElements())
+        return false;
     }
-    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+    // Indices check out; this is dereferenceable.
+    return true;
   }
-  return V;
+
+  // If we don't know, assume the worst.
+  return false;
 }
 
 /// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
@@ -600,26 +638,3 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
 /// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
 /// more than once.
 CallbackVH::~CallbackVH() {}
-
-
-//===----------------------------------------------------------------------===//
-//                                 User Class
-//===----------------------------------------------------------------------===//
-
-// replaceUsesOfWith - Replaces all references to the "From" definition with
-// references to the "To" definition.
-//
-void User::replaceUsesOfWith(Value *From, Value *To) {
-  if (From == To) return;   // Duh what?
-
-  assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
-         "Cannot call User::replaceUsesOfWith on a constant!");
-
-  for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
-    if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
-      // The side effects of this setOperand call include linking to
-      // "To", adding "this" to the uses list of To, and
-      // most importantly, removing "this" from the use list of "From".
-      setOperand(i, To); // Fix it now...
-    }
-}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index d2a8ce34ae47..c054ae46f23b 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -109,7 +109,8 @@ std::string EVT::getEVTString() const {
   case MVT::ppcf128: return "ppcf128";
   case MVT::isVoid:  return "isVoid";
   case MVT::Other:   return "ch";
-  case MVT::Flag:    return "flag";
+  case MVT::Glue:    return "glue";
+  case MVT::x86mmx:  return "x86mmx";
   case MVT::v2i8:    return "v2i8";
   case MVT::v4i8:    return "v4i8";
   case MVT::v8i8:    return "v8i8";
@@ -155,6 +156,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::f80:     return Type::getX86_FP80Ty(Context);
   case MVT::f128:    return Type::getFP128Ty(Context);
   case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+  case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
   case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
   case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
   case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
@@ -196,6 +198,7 @@ EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
   case Type::FloatTyID:     return MVT(MVT::f32);
   case Type::DoubleTyID:    return MVT(MVT::f64);
   case Type::X86_FP80TyID:  return MVT(MVT::f80);
+  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
   case Type::FP128TyID:     return MVT(MVT::f128);
   case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
   case Type::PointerTyID:   return MVT(MVT::iPTR);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index e3ecc979bf12..58ec6fe88d35 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -72,7 +72,9 @@ namespace {  // Anonymous namespace for class
   struct PreVerifier : public FunctionPass {
     static char ID; // Pass ID, replacement for typeid
 
-    PreVerifier() : FunctionPass(ID) { }
+    PreVerifier() : FunctionPass(ID) {
+      initializePreVerifierPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -103,8 +105,8 @@ namespace {  // Anonymous namespace for class
 
 char PreVerifier::ID = 0;
 INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", 
-                false, false);
-char &PreVerifyID = PreVerifier::ID;
+                false, false)
+static char &PreVerifyID = PreVerifier::ID;
 
 namespace {
   class TypeSet : public AbstractTypeUser {
@@ -184,11 +186,15 @@ namespace {
     Verifier()
       : FunctionPass(ID), 
       Broken(false), RealPass(true), action(AbortProcessAction),
-      Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
+      Mod(0), Context(0), DT(0), MessagesStr(Messages) {
+        initializeVerifierPass(*PassRegistry::getPassRegistry());
+      }
     explicit Verifier(VerifierFailureAction ctn)
       : FunctionPass(ID), 
       Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
-      MessagesStr(Messages) {}
+      MessagesStr(Messages) {
+        initializeVerifierPass(*PassRegistry::getPassRegistry());
+      }
 
     bool doInitialization(Module &M) {
       Mod = &M;
@@ -393,7 +399,10 @@ namespace {
 } // End anonymous namespace
 
 char Verifier::ID = 0;
-INITIALIZE_PASS(Verifier, "verify", "Module Verifier", false, false);
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
 
 // Assert - We know that cond should be true, if not print an error message.
 #define Assert(C, M) \
@@ -475,6 +484,7 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
           "Aliasee cannot be NULL!", &GA);
   Assert1(GA.getType() == GA.getAliasee()->getType(),
           "Alias and aliasee types should match!", &GA);
+  Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
 
   if (!isa<GlobalValue>(GA.getAliasee())) {
     const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
@@ -685,6 +695,8 @@ void Verifier::visitFunction(Function &F) {
   case CallingConv::Cold:
   case CallingConv::X86_FastCall:
   case CallingConv::X86_ThisCall:
+  case CallingConv::PTX_Kernel:
+  case CallingConv::PTX_Device:
     Assert1(!F.isVarArg(),
             "Varargs functions must have C calling conventions!", &F);
     break;
@@ -1643,10 +1655,14 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     if (ID == Intrinsic::gcroot) {
       AllocaInst *AI =
         dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
-      Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
-              "llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
+      Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
       Assert1(isa<Constant>(CI.getArgOperand(1)),
               "llvm.gcroot parameter #2 must be a constant.", &CI);
+      if (!AI->getType()->getElementType()->isPointerTy()) {
+        Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+                "llvm.gcroot parameter #1 must either be a pointer alloca, "
+                "or argument #2 must be a non-null constant.", &CI);
+      }
     }
 
     Assert1(CI.getParent()->getParent()->hasGC(),
diff --git a/projects/Makefile b/projects/Makefile
index 491d596eca9a..1318a35d941e 100644
--- a/projects/Makefile
+++ b/projects/Makefile
@@ -14,9 +14,12 @@ include $(LEVEL)/Makefile.config
 # Before 2008.06.24 it lived in llvm-test, so exclude that as well for now.
 DIRS:= $(filter-out llvm-test test-suite,$(patsubst $(PROJ_SRC_DIR)/%/Makefile,%,$(wildcard $(PROJ_SRC_DIR)/*/Makefile)))
 
-# Don't build compiler-rt either, it isn't designed to be built directly.
+# Don't build compiler-rt, it isn't designed to be built directly.
 DIRS := $(filter-out compiler-rt,$(DIRS))
 
+# Don't build libcxx, it isn't designed to be built directly.
+DIRS := $(filter-out libcxx,$(DIRS))
+
 # Sparc cannot link shared libraries (libtool problem?)
 ifeq ($(ARCH), Sparc)
 DIRS := $(filter-out sample, $(DIRS))
diff --git a/projects/sample/autoconf/AutoRegen.sh b/projects/sample/autoconf/AutoRegen.sh
index 126703ba5aaf..6e6931c7a9bc 100755
--- a/projects/sample/autoconf/AutoRegen.sh
+++ b/projects/sample/autoconf/AutoRegen.sh
@@ -13,14 +13,14 @@ cwd=`pwd`
 if test -d ../../../autoconf/m4 ; then
   cd ../../../autoconf/m4
   llvm_m4=`pwd`
-  llvm_src_root=../../..
-  llvm_obj_root=../../..
+  llvm_src_root=../..
+  llvm_obj_root=../..
   cd $cwd
 elif test -d ../../llvm/autoconf/m4 ; then
   cd ../../llvm/autoconf/m4
   llvm_m4=`pwd`
-  llvm_src_root=../..
-  llvm_obj_root=../..
+  llvm_src_root=..
+  llvm_obj_root=..
   cd $cwd
 else
   while true ; do
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index 221dcc45edc0..4e61bee5e790 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -4,12 +4,16 @@ dnl **************************************************************************
 AC_INIT([[[SAMPLE]]],[[[x.xx]]],[bugs@yourdomain])
 
 dnl Identify where LLVM source tree is
-LLVM_SRC_ROOT="../../"
-LLVM_OBJ_ROOT="../../"
+LLVM_SRC_ROOT="../.."
+LLVM_OBJ_ROOT="../.."
+
+dnl Find absolute paths to LLVM source and object trees
+LLVM_ABS_SRC_ROOT="`cd $srcdir ; cd $LLVM_SRC_ROOT ; pwd`"
+LLVM_ABS_OBJ_ROOT="`cd $LLVM_OBJ_ROOT ; pwd`"
 
 dnl Tell autoconf that this is an LLVM project being configured
 dnl This provides the --with-llvmsrc and --with-llvmobj options
-LLVM_CONFIG_PROJECT($LLVM_SRC_ROOT,$LLVM_OBJ_ROOT)
+LLVM_CONFIG_PROJECT($LLVM_ABS_SRC_ROOT,$LLVM_ABS_OBJ_ROOT)
 
 dnl Tell autoconf that the auxilliary files are actually located in
 dnl the LLVM autoconf directory, not here.
diff --git a/projects/sample/configure b/projects/sample/configure
index 5a13388871da..27b64bf8084a 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -1,10 +1,11 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for [SAMPLE] [x.xx].
+# Generated by GNU Autoconf 2.60 for [SAMPLE] [x.xx].
 #
 # Report bugs to <bugs@yourdomain>.
 #
-# Copyright (C) 2003 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 ## --------------------- ##
@@ -18,11 +19,35 @@ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
   # is contrary to our usage.  Disable this feature.
   alias -g '${1+"$@"}'='"$@"'
-elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
-  set -o posix
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
 DUALCASE=1; export DUALCASE # for MKS sh
 
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
 # Support unset when possible.
 if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
   as_unset=unset
@@ -31,8 +56,43 @@ else
 fi
 
 
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
 # Work around bugs in pre-3.0 UWIN ksh.
-$as_unset ENV MAIL MAILPATH
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
 PS1='$ '
 PS2='> '
 PS4='+ '
@@ -46,18 +106,19 @@ do
   if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
     eval $as_var=C; export $as_var
   else
-    $as_unset $as_var
+    ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
   fi
 done
 
 # Required to use basename.
-if expr a : '\(a\)' >/dev/null 2>&1; then
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
   as_expr=expr
 else
   as_expr=false
 fi
 
-if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
   as_basename=basename
 else
   as_basename=false
@@ -65,157 +126,386 @@ fi
 
 
 # Name of the executable.
-as_me=`$as_basename "$0" ||
+as_me=`$as_basename -- "$0" ||
 $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
 	 X"$0" : 'X\(//\)$' \| \
-	 X"$0" : 'X\(/\)$' \| \
-	 .     : '\(.\)' 2>/dev/null ||
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
 echo X/"$0" |
-    sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
-  	  /^X\/\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\/\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
 
+# CDPATH.
+$as_unset CDPATH
 
-# PATH needs CR, and LINENO needs CR and PATH.
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
 
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
-  echo "#! /bin/sh" >conf$$.sh
-  echo  "exit 0"   >>conf$$.sh
-  chmod +x conf$$.sh
-  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
-    PATH_SEPARATOR=';'
-  else
-    PATH_SEPARATOR=:
-  fi
-  rm -f conf$$.sh
+if test "x$CONFIG_SHELL" = x; then
+  if (eval ":") 2>/dev/null; then
+  as_have_required=yes
+else
+  as_have_required=no
 fi
 
+  if test $as_have_required = yes && 	 (eval ":
+(as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
 
-  as_lineno_1=$LINENO
-  as_lineno_2=$LINENO
-  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
-  test "x$as_lineno_1" != "x$as_lineno_2" &&
-  test "x$as_lineno_3"  = "x$as_lineno_2"  || {
-  # Find who we are.  Look in the path if we contain no path at all
-  # relative or not.
-  case $0 in
-    *[\\/]* ) as_myself=$0 ;;
-    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
-done
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
 
-       ;;
-  esac
-  # We did not find ourselves, most probably we were run as `sh COMMAND'
-  # in which case we are not to be found in the path.
-  if test "x$as_myself" = x; then
-    as_myself=$0
-  fi
-  if test ! -f "$as_myself"; then
-    { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
-   { (exit 1); exit 1; }; }
-  fi
-  case $CONFIG_SHELL in
-  '')
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=\$LINENO
+  as_lineno_2=\$LINENO
+  test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+  test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+  :
+else
+  as_candidate_shells=
     as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+for as_dir in /usr/bin/posix$PATH_SEPARATOR/bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
 do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
-  for as_base in sh bash ksh sh5; do
-	 case $as_dir in
+  case $as_dir in
 	 /*)
-	   if ("$as_dir/$as_base" -c '
+	   for as_base in sh bash ksh sh5; do
+	     as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
+	   done;;
+       esac
+done
+IFS=$as_save_IFS
+
+
+      for as_shell in $as_candidate_shells $SHELL; do
+	 # Try only shells that exist, to save several forks.
+	 if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		{ ("$as_shell") 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+:
+_ASEOF
+}; then
+  CONFIG_SHELL=$as_shell
+	       as_have_required=yes
+	       if { "$as_shell" 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+:
+(as_func_return () {
+  (exit $1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test $exitcode = 0) || { (exit 1); exit 1; }
+
+(
   as_lineno_1=$LINENO
   as_lineno_2=$LINENO
-  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
   test "x$as_lineno_1" != "x$as_lineno_2" &&
-  test "x$as_lineno_3"  = "x$as_lineno_2" ') 2>/dev/null; then
-	     $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
-	     $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
-	     CONFIG_SHELL=$as_dir/$as_base
-	     export CONFIG_SHELL
-	     exec "$CONFIG_SHELL" "$0" ${1+"$@"}
-	   fi;;
-	 esac
-       done
-done
-;;
-  esac
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+
+_ASEOF
+}; then
+  break
+fi
+
+fi
+
+      done
+
+      if test "x$CONFIG_SHELL" != x; then
+  for as_var in BASH_ENV ENV
+        do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+        done
+        export CONFIG_SHELL
+        exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
+
+
+    if test $as_have_required = no; then
+  echo This script requires a shell more modern than all the
+      echo shells that I found on your system.  Please install a
+      echo modern shell, or manually run the script under such a
+      echo shell if you do have one.
+      { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0") || {
+  echo No shell found that supports shell functions.
+  echo Please tell autoconf@gnu.org about your system,
+  echo including any error possibly output before this
+  echo message
+}
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
 
   # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
   # uniformly replaced by the line number.  The first 'sed' inserts a
-  # line-number line before each line; the second 'sed' does the real
-  # work.  The second script uses 'N' to pair each line-number line
-  # with the numbered line, and appends trailing '-' during
-  # substitution so that $LINENO is not a special case at line end.
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
   # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
-  # second 'sed' script.  Blame Lee E. McMahon for sed's syntax.  :-)
-  sed '=' <$as_myself |
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
     sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
       N
-      s,$,-,
-      : loop
-      s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
       t loop
-      s,-$,,
-      s,^['$as_cr_digits']*\n,,
+      s/-\n.*//
     ' >$as_me.lineno &&
-  chmod +x $as_me.lineno ||
+  chmod +x "$as_me.lineno" ||
     { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
    { (exit 1); exit 1; }; }
 
   # Don't try to exec as it changes $[0], causing all sort of problems
   # (the dirname of $[0] is not the place where we might find the
-  # original and so on.  Autoconf is especially sensible to this).
-  . ./$as_me.lineno
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
   # Exit status is that of the last command.
   exit
 }
 
 
-case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
-  *c*,-n*) ECHO_N= ECHO_C='
-' ECHO_T='	' ;;
-  *c*,*  ) ECHO_N=-n ECHO_C= ECHO_T= ;;
-  *)       ECHO_N= ECHO_C='\c' ECHO_T= ;;
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
 esac
 
-if expr a : '\(a\)' >/dev/null 2>&1; then
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
   as_expr=expr
 else
   as_expr=false
 fi
 
 rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir
+fi
 echo >conf$$.file
 if ln -s conf$$.file conf$$ 2>/dev/null; then
-  # We could just check for DJGPP; but this test a) works b) is more generic
-  # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
-  if test -f conf$$.exe; then
-    # Don't use ln at all; we don't have any links
+  as_ln_s='ln -s'
+  # ... but there are two gotchas:
+  # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+  # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+  # In both cases, we have to default to `cp -p'.
+  ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
     as_ln_s='cp -p'
-  else
-    as_ln_s='ln -s'
-  fi
 elif ln conf$$.file conf$$ 2>/dev/null; then
   as_ln_s=ln
 else
   as_ln_s='cp -p'
 fi
-rm -f conf$$ conf$$.exe conf$$.file
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
 
 if mkdir -p . 2>/dev/null; then
   as_mkdir_p=:
@@ -224,7 +514,19 @@ else
   as_mkdir_p=false
 fi
 
-as_executable_p="test -f"
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
+else
+  as_executable_p=:
+fi
+rm -f conf$$.file
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -233,39 +535,27 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
 as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
 
 
-# IFS
-# We need space, tab and new line, in precisely that order.
-as_nl='
-'
-IFS=" 	$as_nl"
-
-# CDPATH.
-$as_unset CDPATH
 
+exec 7<&0 </dev/null 6>&1
 
 # Name of the host.
 # hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
 # so uname gets run too.
 ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
 
-exec 6>&1
-
 #
 # Initializations.
 #
 ac_default_prefix=/usr/local
+ac_clean_files=
 ac_config_libobj_dir=.
+LIBOBJS=
 cross_compiling=no
 subdirs=
 MFLAGS=
 MAKEFLAGS=
 SHELL=${CONFIG_SHELL-/bin/sh}
 
-# Maximum number of lines to put in a shell here document.
-# This variable seems obsolete.  It should probably be removed, and
-# only ac_max_sed_lines should be used.
-: ${ac_max_here_lines=38}
-
 # Identity of this package.
 PACKAGE_NAME='[SAMPLE]'
 PACKAGE_TARNAME='--sample--'
@@ -274,8 +564,52 @@ PACKAGE_STRING='[SAMPLE] [x.xx]'
 PACKAGE_BUGREPORT='bugs@yourdomain'
 
 ac_unique_file=""Makefile.common.in""
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS LLVM_SRC LLVM_OBJ LIBOBJS LTLIBOBJS'
+ac_subst_vars='SHELL
+PATH_SEPARATOR
+PACKAGE_NAME
+PACKAGE_TARNAME
+PACKAGE_VERSION
+PACKAGE_STRING
+PACKAGE_BUGREPORT
+exec_prefix
+prefix
+program_transform_name
+bindir
+sbindir
+libexecdir
+datarootdir
+datadir
+sysconfdir
+sharedstatedir
+localstatedir
+includedir
+oldincludedir
+docdir
+infodir
+htmldir
+dvidir
+pdfdir
+psdir
+libdir
+localedir
+mandir
+DEFS
+ECHO_C
+ECHO_N
+ECHO_T
+LIBS
+build_alias
+host_alias
+target_alias
+LLVM_SRC
+LLVM_OBJ
+LIBOBJS
+LTLIBOBJS'
 ac_subst_files=''
+      ac_precious_vars='build_alias
+host_alias
+target_alias'
+
 
 # Initialize some variables set by options.
 ac_init_help=
@@ -302,34 +636,48 @@ x_libraries=NONE
 # and all the variables that are supposed to be based on exec_prefix
 # by default will actually change.
 # Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
 bindir='${exec_prefix}/bin'
 sbindir='${exec_prefix}/sbin'
 libexecdir='${exec_prefix}/libexec'
-datadir='${prefix}/share'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
-libdir='${exec_prefix}/lib'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
-infodir='${prefix}/info'
-mandir='${prefix}/man'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
 
 ac_prev=
+ac_dashdash=
 for ac_option
 do
   # If the previous option needs an argument, assign it.
   if test -n "$ac_prev"; then
-    eval "$ac_prev=\$ac_option"
+    eval $ac_prev=\$ac_option
     ac_prev=
     continue
   fi
 
-  ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
+  case $ac_option in
+  *=*)	ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *)	ac_optarg=yes ;;
+  esac
 
   # Accept the important Cygnus configure options, so we can diagnose typos.
 
-  case $ac_option in
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
 
   -bindir | --bindir | --bindi | --bind | --bin | --bi)
     ac_prev=bindir ;;
@@ -351,12 +699,18 @@ do
   --config-cache | -C)
     cache_file=config.cache ;;
 
-  -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+  -datadir | --datadir | --datadi | --datad)
     ac_prev=datadir ;;
-  -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
-  | --da=*)
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
     datadir=$ac_optarg ;;
 
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
   -disable-* | --disable-*)
     ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
     # Reject names that are not valid shell variable names.
@@ -364,7 +718,17 @@ do
       { echo "$as_me: error: invalid feature name: $ac_feature" >&2
    { (exit 1); exit 1; }; }
     ac_feature=`echo $ac_feature | sed 's/-/_/g'`
-    eval "enable_$ac_feature=no" ;;
+    eval enable_$ac_feature=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
 
   -enable-* | --enable-*)
     ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
@@ -373,11 +737,7 @@ do
       { echo "$as_me: error: invalid feature name: $ac_feature" >&2
    { (exit 1); exit 1; }; }
     ac_feature=`echo $ac_feature | sed 's/-/_/g'`
-    case $ac_option in
-      *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
-      *) ac_optarg=yes ;;
-    esac
-    eval "enable_$ac_feature='$ac_optarg'" ;;
+    eval enable_$ac_feature=\$ac_optarg ;;
 
   -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
   | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
@@ -404,6 +764,12 @@ do
   -host=* | --host=* | --hos=* | --ho=*)
     host_alias=$ac_optarg ;;
 
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
   -includedir | --includedir | --includedi | --included | --include \
   | --includ | --inclu | --incl | --inc)
     ac_prev=includedir ;;
@@ -428,13 +794,16 @@ do
   | --libexe=* | --libex=* | --libe=*)
     libexecdir=$ac_optarg ;;
 
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
   -localstatedir | --localstatedir | --localstatedi | --localstated \
-  | --localstate | --localstat | --localsta | --localst \
-  | --locals | --local | --loca | --loc | --lo)
+  | --localstate | --localstat | --localsta | --localst | --locals)
     ac_prev=localstatedir ;;
   -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
-  | --localstate=* | --localstat=* | --localsta=* | --localst=* \
-  | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
     localstatedir=$ac_optarg ;;
 
   -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
@@ -499,6 +868,16 @@ do
   | --progr-tra=* | --program-tr=* | --program-t=*)
     program_transform_name=$ac_optarg ;;
 
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
   -q | -quiet | --quiet | --quie | --qui | --qu | --q \
   | -silent | --silent | --silen | --sile | --sil)
     silent=yes ;;
@@ -555,11 +934,7 @@ do
       { echo "$as_me: error: invalid package name: $ac_package" >&2
    { (exit 1); exit 1; }; }
     ac_package=`echo $ac_package| sed 's/-/_/g'`
-    case $ac_option in
-      *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
-      *) ac_optarg=yes ;;
-    esac
-    eval "with_$ac_package='$ac_optarg'" ;;
+    eval with_$ac_package=\$ac_optarg ;;
 
   -without-* | --without-*)
     ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
@@ -568,7 +943,7 @@ do
       { echo "$as_me: error: invalid package name: $ac_package" >&2
    { (exit 1); exit 1; }; }
     ac_package=`echo $ac_package | sed 's/-/_/g'`
-    eval "with_$ac_package=no" ;;
+    eval with_$ac_package=no ;;
 
   --x)
     # Obsolete; use --with-x.
@@ -599,8 +974,7 @@ Try \`$0 --help' for more information." >&2
     expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
       { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
    { (exit 1); exit 1; }; }
-    ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
-    eval "$ac_envvar='$ac_optarg'"
+    eval $ac_envvar=\$ac_optarg
     export $ac_envvar ;;
 
   *)
@@ -620,27 +994,19 @@ if test -n "$ac_prev"; then
    { (exit 1); exit 1; }; }
 fi
 
-# Be sure to have absolute paths.
-for ac_var in exec_prefix prefix
-do
-  eval ac_val=$`echo $ac_var`
-  case $ac_val in
-    [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
-    *)  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
-   { (exit 1); exit 1; }; };;
-  esac
-done
-
-# Be sure to have absolute paths.
-for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
-	      localstatedir libdir includedir oldincludedir infodir mandir
+# Be sure to have absolute directory names.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
 do
-  eval ac_val=$`echo $ac_var`
+  eval ac_val=\$$ac_var
   case $ac_val in
-    [\\/$]* | ?:[\\/]* ) ;;
-    *)  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
-   { (exit 1); exit 1; }; };;
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
   esac
+  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+   { (exit 1); exit 1; }; }
 done
 
 # There might be people who depend on the old broken behavior: `$host'
@@ -667,54 +1033,76 @@ test -n "$host_alias" && ac_tool_prefix=$host_alias-
 test "$silent" = yes && exec 6>/dev/null
 
 
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  { echo "$as_me: error: Working directory cannot be determined" >&2
+   { (exit 1); exit 1; }; }
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  { echo "$as_me: error: pwd does not report name of working directory" >&2
+   { (exit 1); exit 1; }; }
+
+
 # Find the source files, if location was not specified.
 if test -z "$srcdir"; then
   ac_srcdir_defaulted=yes
-  # Try the directory containing this script, then its parent.
-  ac_confdir=`(dirname "$0") 2>/dev/null ||
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$0" ||
 $as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
 	 X"$0" : 'X\(//\)[^/]' \| \
 	 X"$0" : 'X\(//\)$' \| \
-	 X"$0" : 'X\(/\)' \| \
-	 .     : '\(.\)' 2>/dev/null ||
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
 echo X"$0" |
-    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
-  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
-  	  /^X\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
   srcdir=$ac_confdir
-  if test ! -r $srcdir/$ac_unique_file; then
+  if test ! -r "$srcdir/$ac_unique_file"; then
     srcdir=..
   fi
 else
   ac_srcdir_defaulted=no
 fi
-if test ! -r $srcdir/$ac_unique_file; then
-  if test "$ac_srcdir_defaulted" = yes; then
-    { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
-   { (exit 1); exit 1; }; }
-  else
-    { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
    { (exit 1); exit 1; }; }
-  fi
 fi
-(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
-  { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2
    { (exit 1); exit 1; }; }
-srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
-ac_env_build_alias_set=${build_alias+set}
-ac_env_build_alias_value=$build_alias
-ac_cv_env_build_alias_set=${build_alias+set}
-ac_cv_env_build_alias_value=$build_alias
-ac_env_host_alias_set=${host_alias+set}
-ac_env_host_alias_value=$host_alias
-ac_cv_env_host_alias_set=${host_alias+set}
-ac_cv_env_host_alias_value=$host_alias
-ac_env_target_alias_set=${target_alias+set}
-ac_env_target_alias_value=$target_alias
-ac_cv_env_target_alias_set=${target_alias+set}
-ac_cv_env_target_alias_value=$target_alias
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
 
 #
 # Report the --help message.
@@ -743,9 +1131,6 @@ Configuration:
   -n, --no-create         do not create output files
       --srcdir=DIR        find the sources in DIR [configure dir or \`..']
 
-_ACEOF
-
-  cat <<_ACEOF
 Installation directories:
   --prefix=PREFIX         install architecture-independent files in PREFIX
 			  [$ac_default_prefix]
@@ -763,15 +1148,22 @@ Fine tuning of the installation directories:
   --bindir=DIR           user executables [EPREFIX/bin]
   --sbindir=DIR          system admin executables [EPREFIX/sbin]
   --libexecdir=DIR       program executables [EPREFIX/libexec]
-  --datadir=DIR          read-only architecture-independent data [PREFIX/share]
   --sysconfdir=DIR       read-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIR   modifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR    modifiable single-machine data [PREFIX/var]
   --libdir=DIR           object code libraries [EPREFIX/lib]
   --includedir=DIR       C header files [PREFIX/include]
   --oldincludedir=DIR    C header files for non-gcc [/usr/include]
-  --infodir=DIR          info documentation [PREFIX/info]
-  --mandir=DIR           man documentation [PREFIX/man]
+  --datarootdir=DIR      read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR          read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR          info documentation [DATAROOTDIR/info]
+  --localedir=DIR        locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR           man documentation [DATAROOTDIR/man]
+  --docdir=DIR           documentation root [DATAROOTDIR/doc/--sample--]
+  --htmldir=DIR          html documentation [DOCDIR]
+  --dvidir=DIR           dvi documentation [DOCDIR]
+  --pdfdir=DIR           pdf documentation [DOCDIR]
+  --psdir=DIR            ps documentation [DOCDIR]
 _ACEOF
 
   cat <<\_ACEOF
@@ -792,120 +1184,86 @@ Optional Packages:
 
 Report bugs to <bugs@yourdomain>.
 _ACEOF
+ac_status=$?
 fi
 
 if test "$ac_init_help" = "recursive"; then
   # If there are subdirs, report their specific --help.
-  ac_popdir=`pwd`
   for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
-    test -d $ac_dir || continue
+    test -d "$ac_dir" || continue
     ac_builddir=.
 
-if test "$ac_dir" != .; then
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
   ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
-  # A "../" for each directory in $ac_dir_suffix.
-  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
-  ac_dir_suffix= ac_top_builddir=
-fi
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
 
 case $srcdir in
-  .)  # No --srcdir option.  We are building in place.
+  .)  # We are building in place.
     ac_srcdir=.
-    if test -z "$ac_top_builddir"; then
-       ac_top_srcdir=.
-    else
-       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
-    fi ;;
-  [\\/]* | ?:[\\/]* )  # Absolute path.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
     ac_srcdir=$srcdir$ac_dir_suffix;
-    ac_top_srcdir=$srcdir ;;
-  *) # Relative path.
-    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
-    ac_top_srcdir=$ac_top_builddir$srcdir ;;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
 esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
 
-# Do not use `cd foo && pwd` to compute absolute paths, because
-# the directories may not exist.
-case `pwd` in
-.) ac_abs_builddir="$ac_dir";;
-*)
-  case "$ac_dir" in
-  .) ac_abs_builddir=`pwd`;;
-  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
-  *) ac_abs_builddir=`pwd`/"$ac_dir";;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_builddir=${ac_top_builddir}.;;
-*)
-  case ${ac_top_builddir}. in
-  .) ac_abs_top_builddir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
-  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_srcdir=$ac_srcdir;;
-*)
-  case $ac_srcdir in
-  .) ac_abs_srcdir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
-  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_srcdir=$ac_top_srcdir;;
-*)
-  case $ac_top_srcdir in
-  .) ac_abs_top_srcdir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
-  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
-  esac;;
-esac
-
-    cd $ac_dir
-    # Check for guested configure; otherwise get Cygnus style configure.
-    if test -f $ac_srcdir/configure.gnu; then
-      echo
-      $SHELL $ac_srcdir/configure.gnu  --help=recursive
-    elif test -f $ac_srcdir/configure; then
-      echo
-      $SHELL $ac_srcdir/configure  --help=recursive
-    elif test -f $ac_srcdir/configure.ac ||
-	   test -f $ac_srcdir/configure.in; then
-      echo
-      $ac_configure --help
-    else
-      echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
-    fi
-    cd $ac_popdir
-  done
-fi
-
-test -n "$ac_init_help" && exit 0
+test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
 [SAMPLE] configure [x.xx]
-generated by GNU Autoconf 2.59
+generated by GNU Autoconf 2.60
 
-Copyright (C) 2003 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 _ACEOF
-  exit 0
+  exit
 fi
-exec 5>config.log
-cat >&5 <<_ACEOF
+cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
 It was created by [SAMPLE] $as_me [x.xx], which was
-generated by GNU Autoconf 2.59.  Invocation command line was
+generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
 
 _ACEOF
+exec 5>>config.log
 {
 cat <<_ASUNAME
 ## --------- ##
@@ -924,7 +1282,7 @@ uname -v = `(uname -v) 2>/dev/null || echo unknown`
 /bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
 /usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
 /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
-hostinfo               = `(hostinfo) 2>/dev/null               || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
 /bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
 /usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
 /bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
@@ -938,6 +1296,7 @@ do
   test -z "$as_dir" && as_dir=.
   echo "PATH: $as_dir"
 done
+IFS=$as_save_IFS
 
 } >&5
 
@@ -959,7 +1318,6 @@ _ACEOF
 ac_configure_args=
 ac_configure_args0=
 ac_configure_args1=
-ac_sep=
 ac_must_keep_next=false
 for ac_pass in 1 2
 do
@@ -970,7 +1328,7 @@ do
     -q | -quiet | --quiet | --quie | --qui | --qu | --q \
     | -silent | --silent | --silen | --sile | --sil)
       continue ;;
-    *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+    *\'*)
       ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
     esac
     case $ac_pass in
@@ -992,9 +1350,7 @@ do
 	  -* ) ac_must_keep_next=true ;;
 	esac
       fi
-      ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
-      # Get rid of the leading space.
-      ac_sep=" "
+      ac_configure_args="$ac_configure_args '$ac_arg'"
       ;;
     esac
   done
@@ -1005,8 +1361,8 @@ $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_
 # When interrupted or exit'd, cleanup temporary files, and complete
 # config.log.  We remove comments because anyway the quotes in there
 # would cause problems or look ugly.
-# WARNING: Be sure not to use single quotes in there, as some shells,
-# such as our DU 5.0 friend, will then `close' the trap.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
 trap 'exit_status=$?
   # Save into config.log some information that might help in debugging.
   {
@@ -1019,20 +1375,34 @@ trap 'exit_status=$?
 _ASBOX
     echo
     # The following way of writing the cache mishandles newlines in values,
-{
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
   (set) 2>&1 |
-    case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
-    *ac_space=\ *)
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
       sed -n \
-	"s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
-	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
-      ;;
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
     *)
-      sed -n \
-	"s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
       ;;
-    esac;
-}
+    esac |
+    sort
+)
     echo
 
     cat <<\_ASBOX
@@ -1043,22 +1413,28 @@ _ASBOX
     echo
     for ac_var in $ac_subst_vars
     do
-      eval ac_val=$`echo $ac_var`
-      echo "$ac_var='"'"'$ac_val'"'"'"
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      echo "$ac_var='\''$ac_val'\''"
     done | sort
     echo
 
     if test -n "$ac_subst_files"; then
       cat <<\_ASBOX
-## ------------- ##
-## Output files. ##
-## ------------- ##
+## ------------------- ##
+## File substitutions. ##
+## ------------------- ##
 _ASBOX
       echo
       for ac_var in $ac_subst_files
       do
-	eval ac_val=$`echo $ac_var`
-	echo "$ac_var='"'"'$ac_val'"'"'"
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	echo "$ac_var='\''$ac_val'\''"
       done | sort
       echo
     fi
@@ -1070,26 +1446,24 @@ _ASBOX
 ## ----------- ##
 _ASBOX
       echo
-      sed "/^$/d" confdefs.h | sort
+      cat confdefs.h
       echo
     fi
     test "$ac_signal" != 0 &&
       echo "$as_me: caught signal $ac_signal"
     echo "$as_me: exit $exit_status"
   } >&5
-  rm -f core *.core &&
-  rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
     exit $exit_status
-     ' 0
+' 0
 for ac_signal in 1 2 13 15; do
   trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
 done
 ac_signal=0
 
 # confdefs.h avoids OS command line length limits that DEFS can exceed.
-rm -rf conftest* confdefs.h
-# AIX cpp loses on an empty file, so make sure it contains at least a newline.
-echo >confdefs.h
+rm -f -r conftest* confdefs.h
 
 # Predefined preprocessor variables.
 
@@ -1120,14 +1494,17 @@ _ACEOF
 
 # Let the site file select an alternate cache file if it wants to.
 # Prefer explicitly selected file to automatically selected ones.
-if test -z "$CONFIG_SITE"; then
-  if test "x$prefix" != xNONE; then
-    CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
-  else
-    CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
-  fi
+if test -n "$CONFIG_SITE"; then
+  set x "$CONFIG_SITE"
+elif test "x$prefix" != xNONE; then
+  set x "$prefix/share/config.site" "$prefix/etc/config.site"
+else
+  set x "$ac_default_prefix/share/config.site" \
+	"$ac_default_prefix/etc/config.site"
 fi
-for ac_site_file in $CONFIG_SITE; do
+shift
+for ac_site_file
+do
   if test -r "$ac_site_file"; then
     { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
 echo "$as_me: loading site script $ac_site_file" >&6;}
@@ -1143,8 +1520,8 @@ if test -r "$cache_file"; then
     { echo "$as_me:$LINENO: loading cache $cache_file" >&5
 echo "$as_me: loading cache $cache_file" >&6;}
     case $cache_file in
-      [\\/]* | ?:[\\/]* ) . $cache_file;;
-      *)                      . ./$cache_file;;
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
     esac
   fi
 else
@@ -1156,12 +1533,11 @@ fi
 # Check that the precious variables saved in the cache have kept the same
 # value.
 ac_cache_corrupted=false
-for ac_var in `(set) 2>&1 |
-	       sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
+for ac_var in $ac_precious_vars; do
   eval ac_old_set=\$ac_cv_env_${ac_var}_set
   eval ac_new_set=\$ac_env_${ac_var}_set
-  eval ac_old_val="\$ac_cv_env_${ac_var}_value"
-  eval ac_new_val="\$ac_env_${ac_var}_value"
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
   case $ac_old_set,$ac_new_set in
     set,)
       { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
@@ -1186,8 +1562,7 @@ echo "$as_me:   current value: $ac_new_val" >&2;}
   # Pass precious variables to config.status.
   if test "$ac_new_set" = set; then
     case $ac_new_val in
-    *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
-      ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
     *) ac_arg=$ac_var=$ac_new_val ;;
     esac
     case " $ac_configure_args " in
@@ -1204,11 +1579,6 @@ echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start ov
    { (exit 1); exit 1; }; }
 fi
 
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
@@ -1233,78 +1603,94 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+LLVM_SRC_ROOT="../.."
+LLVM_OBJ_ROOT="../.."
+
+LLVM_ABS_SRC_ROOT="`cd $srcdir ; cd $LLVM_SRC_ROOT ; pwd`"
+LLVM_ABS_OBJ_ROOT="`cd $LLVM_OBJ_ROOT ; pwd`"
+
+
+# Check whether --with-llvmsrc was given.
+if test "${with_llvmsrc+set}" = set; then
+  withval=$with_llvmsrc; llvm_src="$withval"
+else
+  llvm_src="$LLVM_ABS_SRC_ROOT"
+fi
+
+  LLVM_SRC=$llvm_src
+
+
+# Check whether --with-llvmobj was given.
+if test "${with_llvmobj+set}" = set; then
+  withval=$with_llvmobj; llvm_obj="$withval"
+else
+  llvm_obj="$LLVM_ABS_OBJ_ROOT"
+fi
+
+  LLVM_OBJ=$llvm_obj
 
+  ac_config_commands="$ac_config_commands setup"
 
 
 
 ac_aux_dir=
-for ac_dir in ../../autoconf $srcdir/../../autoconf; do
-  if test -f $ac_dir/install-sh; then
+for ac_dir in $LLVM_SRC/autoconf "$srcdir"/$LLVM_SRC/autoconf; do
+  if test -f "$ac_dir/install-sh"; then
     ac_aux_dir=$ac_dir
     ac_install_sh="$ac_aux_dir/install-sh -c"
     break
-  elif test -f $ac_dir/install.sh; then
+  elif test -f "$ac_dir/install.sh"; then
     ac_aux_dir=$ac_dir
     ac_install_sh="$ac_aux_dir/install.sh -c"
     break
-  elif test -f $ac_dir/shtool; then
+  elif test -f "$ac_dir/shtool"; then
     ac_aux_dir=$ac_dir
     ac_install_sh="$ac_aux_dir/shtool install -c"
     break
   fi
 done
 if test -z "$ac_aux_dir"; then
-  { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in ../../autoconf $srcdir/../../autoconf" >&5
-echo "$as_me: error: cannot find install-sh or install.sh in ../../autoconf $srcdir/../../autoconf" >&2;}
+  { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $LLVM_SRC/autoconf \"$srcdir\"/$LLVM_SRC/autoconf" >&5
+echo "$as_me: error: cannot find install-sh or install.sh in $LLVM_SRC/autoconf \"$srcdir\"/$LLVM_SRC/autoconf" >&2;}
    { (exit 1); exit 1; }; }
 fi
-ac_config_guess="$SHELL $ac_aux_dir/config.guess"
-ac_config_sub="$SHELL $ac_aux_dir/config.sub"
-ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure.
 
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
 
 
-# Check whether --with-llvmsrc or --without-llvmsrc was given.
-if test "${with_llvmsrc+set}" = set; then
-  withval="$with_llvmsrc"
-  llvm_src="$withval"
-else
-  llvm_src=`cd ${srcdir}/../..; pwd`
-fi;
-  LLVM_SRC=$llvm_src
-
-
-# Check whether --with-llvmobj or --without-llvmobj was given.
-if test "${with_llvmobj+set}" = set; then
-  withval="$with_llvmobj"
-  llvm_obj="$withval"
-else
-  llvm_obj=`cd ../..; pwd`
-fi;
-  LLVM_OBJ=$llvm_obj
-
-            ac_config_commands="$ac_config_commands setup"
 
 
 
+ac_config_files="$ac_config_files Makefile.common"
 
 
-          ac_config_files="$ac_config_files Makefile.common"
+ac_config_commands="$ac_config_commands Makefile"
 
 
-          ac_config_commands="$ac_config_commands Makefile"
+ac_config_commands="$ac_config_commands lib/Makefile"
 
 
-          ac_config_commands="$ac_config_commands lib/Makefile"
+ac_config_commands="$ac_config_commands lib/sample/Makefile"
 
 
-          ac_config_commands="$ac_config_commands lib/sample/Makefile"
+ac_config_commands="$ac_config_commands tools/Makefile"
 
 
-          ac_config_commands="$ac_config_commands tools/Makefile"
-
-
-          ac_config_commands="$ac_config_commands tools/sample/Makefile"
+ac_config_commands="$ac_config_commands tools/sample/Makefile"
 
 
 
@@ -1335,39 +1721,58 @@ _ACEOF
 
 # The following way of writing the cache mishandles newlines in values,
 # but we know of no workaround that is simple, portable, and efficient.
-# So, don't put newlines in cache variables' values.
+# So, we kill variables containing newlines.
 # Ultrix sh set writes to stderr and can't be redirected directly,
 # and sets the high bit in the cache file unless we assign to the vars.
-{
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+
   (set) 2>&1 |
-    case `(ac_space=' '; set | grep ac_space) 2>&1` in
-    *ac_space=\ *)
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
       # `set' does not quote correctly, so add quotes (double-quote
       # substitution turns \\\\ into \\, and sed turns \\ into \).
       sed -n \
 	"s/'/'\\\\''/g;
 	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
-      ;;
+      ;; #(
     *)
       # `set' quotes correctly as required by POSIX, so do not add quotes.
-      sed -n \
-	"s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
       ;;
-    esac;
-} |
+    esac |
+    sort
+) |
   sed '
+     /^ac_cv_env_/b end
      t clear
-     : clear
+     :clear
      s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
      t end
-     /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
-     : end' >>confcache
-if diff $cache_file confcache >/dev/null 2>&1; then :; else
-  if test -w $cache_file; then
-    test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    test "x$cache_file" != "x/dev/null" &&
+      { echo "$as_me:$LINENO: updating cache $cache_file" >&5
+echo "$as_me: updating cache $cache_file" >&6;}
     cat confcache >$cache_file
   else
-    echo "not updating unwritable cache $cache_file"
+    { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
+echo "$as_me: not updating unwritable cache $cache_file" >&6;}
   fi
 fi
 rm -f confcache
@@ -1376,63 +1781,48 @@ test "x$prefix" = xNONE && prefix=$ac_default_prefix
 # Let make expand exec_prefix.
 test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
 
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
-# trailing colons and then remove the whole line if VPATH becomes empty
-# (actually we leave an empty line to preserve line numbers).
-if test "x$srcdir" = x.; then
-  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
-s/:*\$(srcdir):*/:/;
-s/:*\${srcdir}:*/:/;
-s/:*@srcdir@:*/:/;
-s/^\([^=]*=[	 ]*\):*/\1/;
-s/:*$//;
-s/^[^=]*=[	 ]*$//;
-}'
-fi
-
 # Transform confdefs.h into DEFS.
 # Protect against shell expansion while executing Makefile rules.
 # Protect against Makefile macro expansion.
 #
 # If the first sed substitution is executed (which looks for macros that
-# take arguments), then we branch to the quote section.  Otherwise,
+# take arguments), then branch to the quote section.  Otherwise,
 # look for a macro that doesn't take arguments.
-cat >confdef2opt.sed <<\_ACEOF
+ac_script='
 t clear
-: clear
-s,^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\),-D\1=\2,g
+:clear
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\)/-D\1=\2/g
 t quote
-s,^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\),-D\1=\2,g
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\)/-D\1=\2/g
 t quote
-d
-: quote
-s,[	 `~#$^&*(){}\\|;'"<>?],\\&,g
-s,\[,\\&,g
-s,\],\\&,g
-s,\$,$$,g
-p
-_ACEOF
-# We use echo to avoid assuming a particular line-breaking character.
-# The extra dot is to prevent the shell from consuming trailing
-# line-breaks from the sub-command output.  A line-break within
-# single-quotes doesn't work because, if this script is created in a
-# platform that uses two characters for line-breaks (e.g., DOS), tr
-# would break.
-ac_LF_and_DOT=`echo; echo .`
-DEFS=`sed -n -f confdef2opt.sed confdefs.h | tr "$ac_LF_and_DOT" ' .'`
-rm -f confdef2opt.sed
+b any
+:quote
+s/[	 `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\[/\\&/g
+s/\]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+	g
+	s/^\n//
+	s/\n/ /g
+	p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
 
 
 ac_libobjs=
 ac_ltlibobjs=
 for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
   # 1. Remove the extension, and $U if already installed.
-  ac_i=`echo "$ac_i" |
-	 sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
-  # 2. Add them.
-  ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
-  ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
 done
 LIBOBJS=$ac_libobjs
 
@@ -1470,11 +1860,35 @@ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
   # is contrary to our usage.  Disable this feature.
   alias -g '${1+"$@"}'='"$@"'
-elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
-  set -o posix
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
 DUALCASE=1; export DUALCASE # for MKS sh
 
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
 # Support unset when possible.
 if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
   as_unset=unset
@@ -1483,8 +1897,43 @@ else
 fi
 
 
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
 # Work around bugs in pre-3.0 UWIN ksh.
-$as_unset ENV MAIL MAILPATH
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
 PS1='$ '
 PS2='> '
 PS4='+ '
@@ -1498,18 +1947,19 @@ do
   if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
     eval $as_var=C; export $as_var
   else
-    $as_unset $as_var
+    ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
   fi
 done
 
 # Required to use basename.
-if expr a : '\(a\)' >/dev/null 2>&1; then
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
   as_expr=expr
 else
   as_expr=false
 fi
 
-if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
   as_basename=basename
 else
   as_basename=false
@@ -1517,159 +1967,120 @@ fi
 
 
 # Name of the executable.
-as_me=`$as_basename "$0" ||
+as_me=`$as_basename -- "$0" ||
 $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
 	 X"$0" : 'X\(//\)$' \| \
-	 X"$0" : 'X\(/\)$' \| \
-	 .     : '\(.\)' 2>/dev/null ||
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
 echo X/"$0" |
-    sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
-  	  /^X\/\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\/\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
 
-
-# PATH needs CR, and LINENO needs CR and PATH.
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
-  echo "#! /bin/sh" >conf$$.sh
-  echo  "exit 0"   >>conf$$.sh
-  chmod +x conf$$.sh
-  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
-    PATH_SEPARATOR=';'
-  else
-    PATH_SEPARATOR=:
-  fi
-  rm -f conf$$.sh
-fi
+# CDPATH.
+$as_unset CDPATH
 
 
-  as_lineno_1=$LINENO
-  as_lineno_2=$LINENO
-  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
-  test "x$as_lineno_1" != "x$as_lineno_2" &&
-  test "x$as_lineno_3"  = "x$as_lineno_2"  || {
-  # Find who we are.  Look in the path if we contain no path at all
-  # relative or not.
-  case $0 in
-    *[\\/]* ) as_myself=$0 ;;
-    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
-done
 
-       ;;
-  esac
-  # We did not find ourselves, most probably we were run as `sh COMMAND'
-  # in which case we are not to be found in the path.
-  if test "x$as_myself" = x; then
-    as_myself=$0
-  fi
-  if test ! -f "$as_myself"; then
-    { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
-echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
-   { (exit 1); exit 1; }; }
-  fi
-  case $CONFIG_SHELL in
-  '')
-    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for as_base in sh bash ksh sh5; do
-	 case $as_dir in
-	 /*)
-	   if ("$as_dir/$as_base" -c '
   as_lineno_1=$LINENO
   as_lineno_2=$LINENO
-  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
   test "x$as_lineno_1" != "x$as_lineno_2" &&
-  test "x$as_lineno_3"  = "x$as_lineno_2" ') 2>/dev/null; then
-	     $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
-	     $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
-	     CONFIG_SHELL=$as_dir/$as_base
-	     export CONFIG_SHELL
-	     exec "$CONFIG_SHELL" "$0" ${1+"$@"}
-	   fi;;
-	 esac
-       done
-done
-;;
-  esac
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
 
   # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
   # uniformly replaced by the line number.  The first 'sed' inserts a
-  # line-number line before each line; the second 'sed' does the real
-  # work.  The second script uses 'N' to pair each line-number line
-  # with the numbered line, and appends trailing '-' during
-  # substitution so that $LINENO is not a special case at line end.
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
   # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
-  # second 'sed' script.  Blame Lee E. McMahon for sed's syntax.  :-)
-  sed '=' <$as_myself |
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
     sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
       N
-      s,$,-,
-      : loop
-      s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
       t loop
-      s,-$,,
-      s,^['$as_cr_digits']*\n,,
+      s/-\n.*//
     ' >$as_me.lineno &&
-  chmod +x $as_me.lineno ||
-    { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
-echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+  chmod +x "$as_me.lineno" ||
+    { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
    { (exit 1); exit 1; }; }
 
   # Don't try to exec as it changes $[0], causing all sort of problems
   # (the dirname of $[0] is not the place where we might find the
-  # original and so on.  Autoconf is especially sensible to this).
-  . ./$as_me.lineno
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
   # Exit status is that of the last command.
   exit
 }
 
 
-case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
-  *c*,-n*) ECHO_N= ECHO_C='
-' ECHO_T='	' ;;
-  *c*,*  ) ECHO_N=-n ECHO_C= ECHO_T= ;;
-  *)       ECHO_N= ECHO_C='\c' ECHO_T= ;;
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
 esac
 
-if expr a : '\(a\)' >/dev/null 2>&1; then
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
   as_expr=expr
 else
   as_expr=false
 fi
 
 rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir
+fi
 echo >conf$$.file
 if ln -s conf$$.file conf$$ 2>/dev/null; then
-  # We could just check for DJGPP; but this test a) works b) is more generic
-  # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
-  if test -f conf$$.exe; then
-    # Don't use ln at all; we don't have any links
+  as_ln_s='ln -s'
+  # ... but there are two gotchas:
+  # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+  # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+  # In both cases, we have to default to `cp -p'.
+  ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
     as_ln_s='cp -p'
-  else
-    as_ln_s='ln -s'
-  fi
 elif ln conf$$.file conf$$ 2>/dev/null; then
   as_ln_s=ln
 else
   as_ln_s='cp -p'
 fi
-rm -f conf$$ conf$$.exe conf$$.file
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
 
 if mkdir -p . 2>/dev/null; then
   as_mkdir_p=:
@@ -1678,7 +2089,19 @@ else
   as_mkdir_p=false
 fi
 
-as_executable_p="test -f"
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
+else
+  as_executable_p=:
+fi
+rm -f conf$$.file
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -1687,31 +2110,14 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
 as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
 
 
-# IFS
-# We need space, tab and new line, in precisely that order.
-as_nl='
-'
-IFS=" 	$as_nl"
-
-# CDPATH.
-$as_unset CDPATH
-
 exec 6>&1
 
-# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# Save the log message, to keep $[0] and so on meaningful, and to
 # report actual input values of CONFIG_FILES etc. instead of their
-# values after options handling.  Logging --version etc. is OK.
-exec 5>>config.log
-{
-  echo
-  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
-## Running $as_me. ##
-_ASBOX
-} >&5
-cat >&5 <<_CSEOF
-
+# values after options handling.
+ac_log="
 This file was extended by [SAMPLE] $as_me [x.xx], which was
-generated by GNU Autoconf 2.59.  Invocation command line was
+generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -1719,30 +2125,19 @@ generated by GNU Autoconf 2.59.  Invocation command line was
   CONFIG_COMMANDS = $CONFIG_COMMANDS
   $ $0 $@
 
-_CSEOF
-echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
-echo >&5
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
 _ACEOF
 
+cat >>$CONFIG_STATUS <<_ACEOF
 # Files that config.status was made for.
-if test -n "$ac_config_files"; then
-  echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_headers"; then
-  echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
-fi
+config_files="$ac_config_files"
+config_commands="$ac_config_commands"
 
-if test -n "$ac_config_links"; then
-  echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_commands"; then
-  echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
-fi
+_ACEOF
 
 cat >>$CONFIG_STATUS <<\_ACEOF
-
 ac_cs_usage="\
 \`$as_me' instantiates files from templates according to the
 current configuration.
@@ -1764,18 +2159,20 @@ Configuration commands:
 $config_commands
 
 Report bugs to <bug-autoconf@gnu.org>."
-_ACEOF
 
+_ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
 [SAMPLE] config.status [x.xx]
-configured by $0, generated by GNU Autoconf 2.59,
-  with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
+configured by $0, generated by GNU Autoconf 2.60,
+  with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
-Copyright (C) 2003 Free Software Foundation, Inc.
+Copyright (C) 2006 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
-srcdir=$srcdir
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
 _ACEOF
 
 cat >>$CONFIG_STATUS <<\_ACEOF
@@ -1786,60 +2183,42 @@ while test $# != 0
 do
   case $1 in
   --*=*)
-    ac_option=`expr "x$1" : 'x\([^=]*\)='`
-    ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
     ac_shift=:
     ;;
-  -*)
+  *)
     ac_option=$1
     ac_optarg=$2
     ac_shift=shift
     ;;
-  *) # This is not an option, so the user has probably given explicit
-     # arguments.
-     ac_option=$1
-     ac_need_defaults=false;;
   esac
 
   case $ac_option in
   # Handling of the options.
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
   -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
     ac_cs_recheck=: ;;
-  --version | --vers* | -V )
-    echo "$ac_cs_version"; exit 0 ;;
-  --he | --h)
-    # Conflict between --help and --header
-    { { echo "$as_me:$LINENO: error: ambiguous option: $1
-Try \`$0 --help' for more information." >&5
-echo "$as_me: error: ambiguous option: $1
-Try \`$0 --help' for more information." >&2;}
-   { (exit 1); exit 1; }; };;
-  --help | --hel | -h )
-    echo "$ac_cs_usage"; exit 0 ;;
-  --debug | --d* | -d )
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    echo "$ac_cs_version"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
     debug=: ;;
   --file | --fil | --fi | --f )
     $ac_shift
     CONFIG_FILES="$CONFIG_FILES $ac_optarg"
     ac_need_defaults=false;;
-  --header | --heade | --head | --hea )
-    $ac_shift
-    CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
-    ac_need_defaults=false;;
+  --he | --h |  --help | --hel | -h )
+    echo "$ac_cs_usage"; exit ;;
   -q | -quiet | --quiet | --quie | --qui | --qu | --q \
   | -silent | --silent | --silen | --sile | --sil | --si | --s)
     ac_cs_silent=: ;;
 
   # This is an error.
-  -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
-Try \`$0 --help' for more information." >&5
-echo "$as_me: error: unrecognized option: $1
-Try \`$0 --help' for more information." >&2;}
+  -*) { echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
    { (exit 1); exit 1; }; } ;;
 
-  *) ac_config_targets="$ac_config_targets $1" ;;
+  *) ac_config_targets="$ac_config_targets $1"
+     ac_need_defaults=false ;;
 
   esac
   shift
@@ -1855,41 +2234,53 @@ fi
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 if \$ac_cs_recheck; then
-  echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
-  exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+  CONFIG_SHELL=$SHELL
+  export CONFIG_SHELL
+  exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
 fi
 
 _ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  echo "$ac_log"
+} >&5
 
+_ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 #
-# INIT-COMMANDS section.
+# INIT-COMMANDS
 #
-
 llvm_src="${LLVM_SRC}"
 
 _ACEOF
 
-
-
 cat >>$CONFIG_STATUS <<\_ACEOF
+
+# Handling of arguments.
 for ac_config_target in $ac_config_targets
 do
-  case "$ac_config_target" in
-  # Handling of arguments.
-  "Makefile.common" ) CONFIG_FILES="$CONFIG_FILES Makefile.common" ;;
-  "setup" ) CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
-  "Makefile" ) CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
-  "lib/Makefile" ) CONFIG_COMMANDS="$CONFIG_COMMANDS lib/Makefile" ;;
-  "lib/sample/Makefile" ) CONFIG_COMMANDS="$CONFIG_COMMANDS lib/sample/Makefile" ;;
-  "tools/Makefile" ) CONFIG_COMMANDS="$CONFIG_COMMANDS tools/Makefile" ;;
-  "tools/sample/Makefile" ) CONFIG_COMMANDS="$CONFIG_COMMANDS tools/sample/Makefile" ;;
+  case $ac_config_target in
+    "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
+    "Makefile.common") CONFIG_FILES="$CONFIG_FILES Makefile.common" ;;
+    "Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
+    "lib/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/Makefile" ;;
+    "lib/sample/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/sample/Makefile" ;;
+    "tools/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS tools/Makefile" ;;
+    "tools/sample/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS tools/sample/Makefile" ;;
+
   *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
 echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
    { (exit 1); exit 1; }; };;
   esac
 done
 
+
 # If the user did not use the arguments to specify the items to instantiate,
 # then the envvar interface is used.  Set only those that are not.
 # We use the long form for the default assignment because of an extremely
@@ -1900,284 +2291,351 @@ if $ac_need_defaults; then
 fi
 
 # Have a temporary directory for convenience.  Make it in the build tree
-# simply because there is no reason to put it here, and in addition,
+# simply because there is no reason against having it here, and in addition,
 # creating and moving files from /tmp can sometimes cause problems.
-# Create a temporary directory, and hook for its removal unless debugging.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
 $debug ||
 {
-  trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+  tmp=
+  trap 'exit_status=$?
+  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
   trap '{ (exit 1); exit 1; }' 1 2 13 15
 }
-
 # Create a (secure) tmp directory for tmp files.
 
 {
-  tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
   test -n "$tmp" && test -d "$tmp"
 }  ||
 {
-  tmp=./confstat$$-$RANDOM
-  (umask 077 && mkdir $tmp)
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
 } ||
 {
    echo "$me: cannot create a temporary directory in ." >&2
    { (exit 1); exit 1; }
 }
 
-_ACEOF
-
-cat >>$CONFIG_STATUS <<_ACEOF
-
 #
-# CONFIG_FILES section.
+# Set up the sed scripts for CONFIG_FILES section.
 #
 
 # No need to generate the scripts if there are no CONFIG_FILES.
 # This happens for instance when ./config.status config.h
-if test -n "\$CONFIG_FILES"; then
-  # Protect against being on the right side of a sed subst in config.status.
-  sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
-   s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
-s,@SHELL@,$SHELL,;t t
-s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
-s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t
-s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
-s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
-s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t
-s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
-s,@exec_prefix@,$exec_prefix,;t t
-s,@prefix@,$prefix,;t t
-s,@program_transform_name@,$program_transform_name,;t t
-s,@bindir@,$bindir,;t t
-s,@sbindir@,$sbindir,;t t
-s,@libexecdir@,$libexecdir,;t t
-s,@datadir@,$datadir,;t t
-s,@sysconfdir@,$sysconfdir,;t t
-s,@sharedstatedir@,$sharedstatedir,;t t
-s,@localstatedir@,$localstatedir,;t t
-s,@libdir@,$libdir,;t t
-s,@includedir@,$includedir,;t t
-s,@oldincludedir@,$oldincludedir,;t t
-s,@infodir@,$infodir,;t t
-s,@mandir@,$mandir,;t t
-s,@build_alias@,$build_alias,;t t
-s,@host_alias@,$host_alias,;t t
-s,@target_alias@,$target_alias,;t t
-s,@DEFS@,$DEFS,;t t
-s,@ECHO_C@,$ECHO_C,;t t
-s,@ECHO_N@,$ECHO_N,;t t
-s,@ECHO_T@,$ECHO_T,;t t
-s,@LIBS@,$LIBS,;t t
-s,@LLVM_SRC@,$LLVM_SRC,;t t
-s,@LLVM_OBJ@,$LLVM_OBJ,;t t
-s,@LIBOBJS@,$LIBOBJS,;t t
-s,@LTLIBOBJS@,$LTLIBOBJS,;t t
-CEOF
+if test -n "$CONFIG_FILES"; then
 
 _ACEOF
 
-  cat >>$CONFIG_STATUS <<\_ACEOF
-  # Split the substitutions into bite-sized pieces for seds with
-  # small command number limits, like on Digital OSF/1 and HP-UX.
-  ac_max_sed_lines=48
-  ac_sed_frag=1 # Number of current file.
-  ac_beg=1 # First line for current file.
-  ac_end=$ac_max_sed_lines # Line after last line for current file.
-  ac_more_lines=:
-  ac_sed_cmds=
-  while $ac_more_lines; do
-    if test $ac_beg -gt 1; then
-      sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
-    else
-      sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
-    fi
-    if test ! -s $tmp/subs.frag; then
-      ac_more_lines=false
-    else
-      # The purpose of the label and of the branching condition is to
-      # speed up the sed processing (if there are no `@' at all, there
-      # is no need to browse any of the substitutions).
-      # These are the two extra sed commands mentioned above.
-      (echo ':t
-  /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
-      if test -z "$ac_sed_cmds"; then
-	ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
-      else
-	ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
-      fi
-      ac_sed_frag=`expr $ac_sed_frag + 1`
-      ac_beg=$ac_end
-      ac_end=`expr $ac_end + $ac_max_sed_lines`
-    fi
-  done
-  if test -z "$ac_sed_cmds"; then
-    ac_sed_cmds=cat
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
+SHELL!$SHELL$ac_delim
+PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim
+PACKAGE_NAME!$PACKAGE_NAME$ac_delim
+PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim
+PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim
+PACKAGE_STRING!$PACKAGE_STRING$ac_delim
+PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim
+exec_prefix!$exec_prefix$ac_delim
+prefix!$prefix$ac_delim
+program_transform_name!$program_transform_name$ac_delim
+bindir!$bindir$ac_delim
+sbindir!$sbindir$ac_delim
+libexecdir!$libexecdir$ac_delim
+datarootdir!$datarootdir$ac_delim
+datadir!$datadir$ac_delim
+sysconfdir!$sysconfdir$ac_delim
+sharedstatedir!$sharedstatedir$ac_delim
+localstatedir!$localstatedir$ac_delim
+includedir!$includedir$ac_delim
+oldincludedir!$oldincludedir$ac_delim
+docdir!$docdir$ac_delim
+infodir!$infodir$ac_delim
+htmldir!$htmldir$ac_delim
+dvidir!$dvidir$ac_delim
+pdfdir!$pdfdir$ac_delim
+psdir!$psdir$ac_delim
+libdir!$libdir$ac_delim
+localedir!$localedir$ac_delim
+mandir!$mandir$ac_delim
+DEFS!$DEFS$ac_delim
+ECHO_C!$ECHO_C$ac_delim
+ECHO_N!$ECHO_N$ac_delim
+ECHO_T!$ECHO_T$ac_delim
+LIBS!$LIBS$ac_delim
+build_alias!$build_alias$ac_delim
+host_alias!$host_alias$ac_delim
+target_alias!$target_alias$ac_delim
+LLVM_SRC!$LLVM_SRC$ac_delim
+LLVM_OBJ!$LLVM_OBJ$ac_delim
+LIBOBJS!$LIBOBJS$ac_delim
+LTLIBOBJS!$LTLIBOBJS$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 41; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
   fi
-fi # test -n "$CONFIG_FILES"
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
 
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+:end
+s/|#_!!_#|//g
+CEOF$ac_eof
 _ACEOF
+
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[	 ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
 cat >>$CONFIG_STATUS <<\_ACEOF
-for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
-  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
-  case $ac_file in
-  - | *:- | *:-:* ) # input from stdin
-	cat >$tmp/stdin
-	ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
-	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
-  *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
-	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
-  * )   ac_file_in=$ac_file.in ;;
+fi # test -n "$CONFIG_FILES"
+
+
+for ac_tag in  :F $CONFIG_FILES      :C $CONFIG_COMMANDS
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5
+echo "$as_me: error: Invalid tag $ac_tag." >&2;}
+   { (exit 1); exit 1; }; };;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
   esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
 
-  # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
-  ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+   { (exit 1); exit 1; }; };;
+      esac
+      ac_file_inputs="$ac_file_inputs $ac_f"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input="Generated from "`IFS=:
+	  echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure."
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+    fi
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$tmp/stdin";;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
 $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
 	 X"$ac_file" : 'X\(//\)[^/]' \| \
 	 X"$ac_file" : 'X\(//\)$' \| \
-	 X"$ac_file" : 'X\(/\)' \| \
-	 .     : '\(.\)' 2>/dev/null ||
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
 echo X"$ac_file" |
-    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
-  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
-  	  /^X\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
-  { if $as_mkdir_p; then
-    mkdir -p "$ac_dir"
-  else
-    as_dir="$ac_dir"
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  { as_dir="$ac_dir"
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
     as_dirs=
-    while test ! -d "$as_dir"; do
-      as_dirs="$as_dir $as_dirs"
-      as_dir=`(dirname "$as_dir") 2>/dev/null ||
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
 $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
 	 X"$as_dir" : 'X\(//\)[^/]' \| \
 	 X"$as_dir" : 'X\(//\)$' \| \
-	 X"$as_dir" : 'X\(/\)' \| \
-	 .     : '\(.\)' 2>/dev/null ||
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
 echo X"$as_dir" |
-    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
-  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
-  	  /^X\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
     done
-    test ! -n "$as_dirs" || mkdir $as_dirs
-  fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+echo "$as_me: error: cannot create directory $as_dir" >&2;}
    { (exit 1); exit 1; }; }; }
-
   ac_builddir=.
 
-if test "$ac_dir" != .; then
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
   ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
-  # A "../" for each directory in $ac_dir_suffix.
-  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
-  ac_dir_suffix= ac_top_builddir=
-fi
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
 
 case $srcdir in
-  .)  # No --srcdir option.  We are building in place.
+  .)  # We are building in place.
     ac_srcdir=.
-    if test -z "$ac_top_builddir"; then
-       ac_top_srcdir=.
-    else
-       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
-    fi ;;
-  [\\/]* | ?:[\\/]* )  # Absolute path.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
     ac_srcdir=$srcdir$ac_dir_suffix;
-    ac_top_srcdir=$srcdir ;;
-  *) # Relative path.
-    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
-    ac_top_srcdir=$ac_top_builddir$srcdir ;;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
 esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
 
-# Do not use `cd foo && pwd` to compute absolute paths, because
-# the directories may not exist.
-case `pwd` in
-.) ac_abs_builddir="$ac_dir";;
-*)
-  case "$ac_dir" in
-  .) ac_abs_builddir=`pwd`;;
-  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
-  *) ac_abs_builddir=`pwd`/"$ac_dir";;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_builddir=${ac_top_builddir}.;;
-*)
-  case ${ac_top_builddir}. in
-  .) ac_abs_top_builddir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
-  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_srcdir=$ac_srcdir;;
-*)
-  case $ac_srcdir in
-  .) ac_abs_srcdir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
-  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_srcdir=$ac_top_srcdir;;
-*)
-  case $ac_top_srcdir in
-  .) ac_abs_top_srcdir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
-  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
-  esac;;
-esac
 
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
 
+_ACEOF
 
-  if test x"$ac_file" != x-; then
-    { echo "$as_me:$LINENO: creating $ac_file" >&5
-echo "$as_me: creating $ac_file" >&6;}
-    rm -f "$ac_file"
-  fi
-  # Let's still pretend it is `configure' which instantiates (i.e., don't
-  # use $as_me), people would be surprised to read:
-  #    /* config.h.  Generated by config.status.  */
-  if test x"$ac_file" = x-; then
-    configure_input=
-  else
-    configure_input="$ac_file.  "
-  fi
-  configure_input=$configure_input"Generated from `echo $ac_file_in |
-				     sed 's,.*/,,'` by configure."
-
-  # First look for the input files in the build tree, otherwise in the
-  # src tree.
-  ac_file_inputs=`IFS=:
-    for f in $ac_file_in; do
-      case $f in
-      -) echo $tmp/stdin ;;
-      [\\/$]*)
-	 # Absolute (can't be DOS-style, as IFS=:)
-	 test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
-   { (exit 1); exit 1; }; }
-	 echo "$f";;
-      *) # Relative
-	 if test -f "$f"; then
-	   # Build tree
-	   echo "$f"
-	 elif test -f "$srcdir/$f"; then
-	   # Source tree
-	   echo "$srcdir/$f"
-	 else
-	   # /dev/null tree
-	   { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
-   { (exit 1); exit 1; }; }
-	 fi;;
-      esac
-    done` || { (exit 1); exit 1; }
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+
+case `sed -n '/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p
+' $ac_file_inputs` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
 _ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+    s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
 cat >>$CONFIG_STATUS <<_ACEOF
   sed "$ac_vpsub
 $extrasub
@@ -2185,155 +2643,55 @@ _ACEOF
 cat >>$CONFIG_STATUS <<\_ACEOF
 :t
 /@[a-zA-Z_][a-zA-Z_0-9]*@/!b
-s,@configure_input@,$configure_input,;t t
-s,@srcdir@,$ac_srcdir,;t t
-s,@abs_srcdir@,$ac_abs_srcdir,;t t
-s,@top_srcdir@,$ac_top_srcdir,;t t
-s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
-s,@builddir@,$ac_builddir,;t t
-s,@abs_builddir@,$ac_abs_builddir,;t t
-s,@top_builddir@,$ac_top_builddir,;t t
-s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
-" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
-  rm -f $tmp/stdin
-  if test x"$ac_file" != x-; then
-    mv $tmp/out $ac_file
-  else
-    cat $tmp/out
-    rm -f $tmp/out
-  fi
-
-done
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-#
-# CONFIG_COMMANDS section.
-#
-for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue
-  ac_dest=`echo "$ac_file" | sed 's,:.*,,'`
-  ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'`
-  ac_dir=`(dirname "$ac_dest") 2>/dev/null ||
-$as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
-	 X"$ac_dest" : 'X\(//\)[^/]' \| \
-	 X"$ac_dest" : 'X\(//\)$' \| \
-	 X"$ac_dest" : 'X\(/\)' \| \
-	 .     : '\(.\)' 2>/dev/null ||
-echo X"$ac_dest" |
-    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
-  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
-  	  /^X\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
-  { if $as_mkdir_p; then
-    mkdir -p "$ac_dir"
-  else
-    as_dir="$ac_dir"
-    as_dirs=
-    while test ! -d "$as_dir"; do
-      as_dirs="$as_dir $as_dirs"
-      as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
-	 X"$as_dir" : 'X\(//\)[^/]' \| \
-	 X"$as_dir" : 'X\(//\)$' \| \
-	 X"$as_dir" : 'X\(/\)' \| \
-	 .     : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
-    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
-  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
-  	  /^X\(\/\/\)$/{ s//\1/; q; }
-  	  /^X\(\/\).*/{ s//\1/; q; }
-  	  s/.*/./; q'`
-    done
-    test ! -n "$as_dirs" || mkdir $as_dirs
-  fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
-   { (exit 1); exit 1; }; }; }
-
-  ac_builddir=.
-
-if test "$ac_dir" != .; then
-  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
-  # A "../" for each directory in $ac_dir_suffix.
-  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
-  ac_dir_suffix= ac_top_builddir=
-fi
+s&@configure_input@&$configure_input&;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+$ac_datarootdir_hack
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" >$tmp/out
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&5
+echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&2;}
+
+  rm -f "$tmp/stdin"
+  case $ac_file in
+  -) cat "$tmp/out"; rm -f "$tmp/out";;
+  *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;;
+  esac
+ ;;
 
-case $srcdir in
-  .)  # No --srcdir option.  We are building in place.
-    ac_srcdir=.
-    if test -z "$ac_top_builddir"; then
-       ac_top_srcdir=.
-    else
-       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
-    fi ;;
-  [\\/]* | ?:[\\/]* )  # Absolute path.
-    ac_srcdir=$srcdir$ac_dir_suffix;
-    ac_top_srcdir=$srcdir ;;
-  *) # Relative path.
-    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
-    ac_top_srcdir=$ac_top_builddir$srcdir ;;
-esac
 
-# Do not use `cd foo && pwd` to compute absolute paths, because
-# the directories may not exist.
-case `pwd` in
-.) ac_abs_builddir="$ac_dir";;
-*)
-  case "$ac_dir" in
-  .) ac_abs_builddir=`pwd`;;
-  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
-  *) ac_abs_builddir=`pwd`/"$ac_dir";;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_builddir=${ac_top_builddir}.;;
-*)
-  case ${ac_top_builddir}. in
-  .) ac_abs_top_builddir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
-  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_srcdir=$ac_srcdir;;
-*)
-  case $ac_srcdir in
-  .) ac_abs_srcdir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
-  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
-  esac;;
-esac
-case $ac_abs_builddir in
-.) ac_abs_top_srcdir=$ac_top_srcdir;;
-*)
-  case $ac_top_srcdir in
-  .) ac_abs_top_srcdir=$ac_abs_builddir;;
-  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
-  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
-  esac;;
-esac
+  :C)  { echo "$as_me:$LINENO: executing $ac_file commands" >&5
+echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
 
 
-  { echo "$as_me:$LINENO: executing $ac_dest commands" >&5
-echo "$as_me: executing $ac_dest commands" >&6;}
-  case $ac_dest in
-    Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
+  case $ac_file$ac_mode in
+    "Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
    ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
-    lib/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
+    "lib/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
    ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
-    lib/sample/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/sample/Makefile`
+    "lib/sample/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/sample/Makefile`
    ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/sample/Makefile lib/sample/Makefile ;;
-    tools/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
+    "tools/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
    ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
-    tools/sample/Makefile ) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/sample/Makefile`
+    "tools/sample/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/sample/Makefile`
    ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/sample/Makefile tools/sample/Makefile ;;
+
   esac
-done
-_ACEOF
+done # for ac_tag
 
-cat >>$CONFIG_STATUS <<\_ACEOF
 
 { (exit 0); exit 0; }
 _ACEOF
@@ -2362,4 +2720,3 @@ if test "$no_create" != yes; then
   $ac_cs_success || { (exit 1); exit 1; }
 fi
 
-
diff --git a/projects/sample/lib/sample/sample.c b/projects/sample/lib/sample/sample.c
index 8ebb5ecfae68..a5ae28091bc4 100644
--- a/projects/sample/lib/sample/sample.c
+++ b/projects/sample/lib/sample/sample.c
@@ -11,7 +11,7 @@
 #include <stdlib.h>
 
 /* LLVM Header File
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 */
 
 /* Header file global to this project */
diff --git a/runtime/libprofile/CommonProfiling.c b/runtime/libprofile/CommonProfiling.c
index 8b27a2576974..1c1771c3063e 100644
--- a/runtime/libprofile/CommonProfiling.c
+++ b/runtime/libprofile/CommonProfiling.c
@@ -2,17 +2,18 @@
 |*
 |*                     The LLVM Compiler Infrastructure
 |*
-|* This file is distributed under the University of Illinois Open Source      
-|* License. See LICENSE.TXT for details.                                      
-|* 
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
 |*===----------------------------------------------------------------------===*|
-|* 
+|*
 |* This file implements functions used by the various different types of
 |* profiling implementations.
 |*
 \*===----------------------------------------------------------------------===*/
 
 #include "Profiling.h"
+#include <assert.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -74,26 +75,23 @@ int save_arguments(int argc, const char **argv) {
 }
 
 
-/* write_profiling_data - Write a raw block of profiling counters out to the
- * llvmprof.out file.  Note that we allow programs to be instrumented with
- * multiple different kinds of instrumentation.  For this reason, this function
- * may be called more than once.
+/*
+ * Retrieves the file descriptor for the profile file.
  */
-void write_profiling_data(enum ProfilingType PT, unsigned *Start,
-                          unsigned NumElements) {
+int getOutFile() {
   static int OutFile = -1;
-  int PTy;
-  
-  /* If this is the first time this function is called, open the output file for
-   * appending, creating it if it does not already exist.
+
+  /* If this is the first time this function is called, open the output file
+   * for appending, creating it if it does not already exist.
    */
   if (OutFile == -1) {
-    OutFile = open(OutputFilename, O_CREAT | O_WRONLY | O_APPEND, 0666);
+    OutFile = open(OutputFilename, O_CREAT | O_WRONLY, 0666);
+    lseek(OutFile, 0, SEEK_END); /* O_APPEND prevents seeking */
     if (OutFile == -1) {
       fprintf(stderr, "LLVM profiling runtime: while opening '%s': ",
               OutputFilename);
       perror("");
-      return;
+      return(OutFile);
     }
 
     /* Output the command line arguments to the file. */
@@ -108,10 +106,25 @@ void write_profiling_data(enum ProfilingType PT, unsigned *Start,
         write(OutFile, &Zeros, 4-(SavedArgsLength&3));
     }
   }
- 
+  return(OutFile);
+}
+
+/* write_profiling_data - Write a raw block of profiling counters out to the
+ * llvmprof.out file.  Note that we allow programs to be instrumented with
+ * multiple different kinds of instrumentation.  For this reason, this function
+ * may be called more than once.
+ */
+void write_profiling_data(enum ProfilingType PT, unsigned *Start,
+                          unsigned NumElements) {
+  int PTy;
+  int outFile = getOutFile();
+
   /* Write out this record! */
   PTy = PT;
-  write(OutFile, &PTy, sizeof(int));
-  write(OutFile, &NumElements, sizeof(unsigned));
-  write(OutFile, Start, NumElements*sizeof(unsigned));
+  if( write(outFile, &PTy, sizeof(int)) < 0 ||
+      write(outFile, &NumElements, sizeof(unsigned)) < 0 ||
+      write(outFile, Start, NumElements*sizeof(unsigned)) < 0 ) {
+    fprintf(stderr,"error: unable to write to output file.");
+    exit(0);
+  }
 }
diff --git a/runtime/libprofile/PathProfiling.c b/runtime/libprofile/PathProfiling.c
new file mode 100644
index 000000000000..651e63cbdd21
--- /dev/null
+++ b/runtime/libprofile/PathProfiling.c
@@ -0,0 +1,266 @@
+/*===-- PathProfiling.c - Support library for path profiling --------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===*|
+|*
+|* This file implements the call back routines for the path profiling
+|* instrumentation pass.  This should be used with the -insert-path-profiling
+|* LLVM pass.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* note that this is used for functions with large path counts,
+         but it is unlikely those paths will ALL be executed */
+#define ARBITRARY_HASH_BIN_COUNT 100
+
+typedef struct pathHashEntry_s {
+  uint32_t pathNumber;
+  uint32_t pathCount;
+  struct pathHashEntry_s* next;
+} pathHashEntry_t;
+
+typedef struct pathHashTable_s {
+  pathHashEntry_t* hashBins[ARBITRARY_HASH_BIN_COUNT];
+  uint32_t pathCounts;
+} pathHashTable_t;
+
+typedef struct {
+  enum ProfilingStorageType type;
+  uint32_t size;
+  void* array;
+} ftEntry_t;
+
+/* pointer to the function table allocated in the instrumented program */
+ftEntry_t* ft;
+uint32_t ftSize;
+
+/* write an array table to file */
+void writeArrayTable(uint32_t fNumber, ftEntry_t* ft, uint32_t* funcCount) {
+  int outFile = getOutFile();
+  uint32_t arrayHeaderLocation = 0;
+  uint32_t arrayCurrentLocation = 0;
+  uint32_t arrayIterator = 0;
+  uint32_t functionUsed = 0;
+  uint32_t pathCounts = 0;
+
+  /* look through each entry in the array to determine whether the function
+     was executed at all */
+  for( arrayIterator = 0; arrayIterator < ft->size; arrayIterator++ ) {
+    uint32_t pc = ((uint32_t*)ft->array)[arrayIterator];
+
+    /* was this path executed? */
+    if( pc ) {
+      PathProfileTableEntry pte;
+      pte.pathNumber = arrayIterator;
+      pte.pathCounter = pc;
+      pathCounts++;
+
+      /* one-time initialization stuff */
+      if(!functionUsed) {
+        arrayHeaderLocation = lseek(outFile, 0, SEEK_CUR);
+        lseek(outFile, sizeof(PathProfileHeader), SEEK_CUR);
+        functionUsed = 1;
+        (*funcCount)++;
+      }
+
+      /* write path data */
+      if (write(outFile, &pte, sizeof(PathProfileTableEntry)) < 0) {
+        fprintf(stderr, "error: unable to write path entry to output file.\n");
+        return;
+      }
+    }
+  }
+
+  /* If this function was executed, write the header */
+  if( functionUsed ) {
+    PathProfileHeader fHeader;
+    fHeader.fnNumber = fNumber;
+    fHeader.numEntries = pathCounts;
+
+    arrayCurrentLocation = lseek(outFile, 0, SEEK_CUR);
+    lseek(outFile, arrayHeaderLocation, SEEK_SET);
+
+    if (write(outFile, &fHeader, sizeof(PathProfileHeader)) < 0) {
+      fprintf(stderr,
+              "error: unable to write function header to output file.\n");
+      return;
+    }
+
+    lseek(outFile, arrayCurrentLocation, SEEK_SET);
+  }
+}
+
+inline uint32_t hash (uint32_t key) {
+  /* this may benifit from a proper hash function */
+  return key%ARBITRARY_HASH_BIN_COUNT;
+}
+
+/* output a specific function's hash table to the profile file */
+void writeHashTable(uint32_t functionNumber, pathHashTable_t* hashTable) {
+  int outFile = getOutFile();
+  PathProfileHeader header;
+  uint32_t i;
+
+  header.fnNumber = functionNumber;
+  header.numEntries = hashTable->pathCounts;
+
+  if (write(outFile, &header, sizeof(PathProfileHeader)) < 0) {
+    fprintf(stderr, "error: unable to write function header to output file.\n");
+    return;
+  }
+
+  for (i = 0; i < ARBITRARY_HASH_BIN_COUNT; i++) {
+    pathHashEntry_t* hashEntry = hashTable->hashBins[i];
+
+    while (hashEntry) {
+      pathHashEntry_t* temp;
+
+      PathProfileTableEntry pte;
+      pte.pathNumber = hashEntry->pathNumber;
+      pte.pathCounter = hashEntry->pathCount;
+
+      if (write(outFile, &pte, sizeof(PathProfileTableEntry)) < 0) {
+        fprintf(stderr, "error: unable to write path entry to output file.\n");
+        return;
+      }
+
+      temp = hashEntry;
+      hashEntry = hashEntry->next;
+      free (temp);
+
+    }
+  }
+}
+
+/* Return a pointer to this path's specific path counter */
+inline uint32_t* getPathCounter(uint32_t functionNumber, uint32_t pathNumber) {
+  pathHashTable_t* hashTable;
+  pathHashEntry_t* hashEntry;
+  uint32_t index = hash(pathNumber);
+
+  if( ft[functionNumber-1].array == 0)
+    ft[functionNumber-1].array = calloc(sizeof(pathHashTable_t), 1);
+
+  hashTable = (pathHashTable_t*)((ftEntry_t*)ft)[functionNumber-1].array;
+  hashEntry = hashTable->hashBins[index];
+
+  while (hashEntry) {
+    if (hashEntry->pathNumber == pathNumber) {
+      return &hashEntry->pathCount;
+    }
+
+    hashEntry = hashEntry->next;
+  }
+
+  hashEntry = malloc(sizeof(pathHashEntry_t));
+  hashEntry->pathNumber = pathNumber;
+  hashEntry->pathCount = 0;
+  hashEntry->next = hashTable->hashBins[index];
+  hashTable->hashBins[index] = hashEntry;
+  hashTable->pathCounts++;
+  return &hashEntry->pathCount;
+}
+
+/* Increment a specific path's count */
+void llvm_increment_path_count (uint32_t functionNumber, uint32_t pathNumber) {
+  uint32_t* pathCounter = getPathCounter(functionNumber, pathNumber);
+  if( *pathCounter < 0xffffffff )
+    (*pathCounter)++;
+}
+
+/* Increment a specific path's count */
+void llvm_decrement_path_count (uint32_t functionNumber, uint32_t pathNumber) {
+  uint32_t* pathCounter = getPathCounter(functionNumber, pathNumber);
+  (*pathCounter)--;
+}
+
+/*
+ * Writes out a path profile given a function table, in the following format.
+ *
+ *
+ *      | <-- 32 bits --> |
+ *      +-----------------+-----------------+
+ * 0x00 | profileType     | functionCount   |
+ *      +-----------------+-----------------+
+ * 0x08 | functionNum     | profileEntries  |  // function 1
+ *      +-----------------+-----------------+
+ * 0x10 | pathNumber      | pathCounter     |  // entry 1.1
+ *      +-----------------+-----------------+
+ * 0x18 | pathNumber      | pathCounter     |  // entry 1.2
+ *      +-----------------+-----------------+
+ *  ... |       ...       |       ...       |  // entry 1.n
+ *      +-----------------+-----------------+
+ *  ... | functionNum     | profileEntries  |  // function 2
+ *      +-----------------+-----------------+
+ *  ... | pathNumber      | pathCounter     |  // entry 2.1
+ *      +-----------------+-----------------+
+ *  ... | pathNumber      | pathCounter     |  // entry 2.2
+ *      +-----------------+-----------------+
+ *  ... |       ...       |       ...       |  // entry 2.n
+ *      +-----------------+-----------------+
+ *
+ */
+static void pathProfAtExitHandler() {
+  int outFile = getOutFile();
+  uint32_t i;
+  uint32_t header[2] = { PathInfo, 0 };
+  uint32_t headerLocation;
+  uint32_t currentLocation;
+
+  /* skip over the header for now */
+  headerLocation = lseek(outFile, 0, SEEK_CUR);
+  lseek(outFile, 2*sizeof(uint32_t), SEEK_CUR);
+
+  /* Iterate through each function */
+  for( i = 0; i < ftSize; i++ ) {
+    if( ft[i].type == ProfilingArray ) {
+      writeArrayTable(i+1,&ft[i],header + 1);
+
+    } else if( ft[i].type == ProfilingHash ) {
+      /* If the hash exists, write it to file */
+      if( ft[i].array ) {
+        writeHashTable(i+1,ft[i].array);
+        header[1]++;
+        free(ft[i].array);
+      }
+    }
+  }
+
+  /* Setup and write the path profile header */
+  currentLocation = lseek(outFile, 0, SEEK_CUR);
+  lseek(outFile, headerLocation, SEEK_SET);
+
+  if (write(outFile, header, sizeof(header)) < 0) {
+    fprintf(stderr,
+            "error: unable to write path profile header to output file.\n");
+    return;
+  }
+
+  lseek(outFile, currentLocation, SEEK_SET);
+}
+/* llvm_start_path_profiling - This is the main entry point of the path
+ * profiling library.  It is responsible for setting up the atexit handler.
+ */
+int llvm_start_path_profiling(int argc, const char** argv,
+                              void* functionTable, uint32_t numElements) {
+  int Ret = save_arguments(argc, argv);
+  ft = functionTable;
+  ftSize = numElements;
+  atexit(pathProfAtExitHandler);
+
+  return Ret;
+}
diff --git a/runtime/libprofile/Profiling.h b/runtime/libprofile/Profiling.h
index a7e3ccc72b6c..c6b9a4d71c02 100644
--- a/runtime/libprofile/Profiling.h
+++ b/runtime/libprofile/Profiling.h
@@ -1,9 +1,9 @@
-/*===-- Profiling.h - Profiling support library support routines --*- C -*-===*\
+/*===-- Profiling.h - Profiling support library support routines ----------===*\
 |*
 |*                     The LLVM Compiler Infrastructure
 |*
-|* This file is distributed under the University of Illinois Open Source      
-|* License. See LICENSE.TXT for details.                                      
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
 |*
 |*===----------------------------------------------------------------------===*|
 |*
@@ -22,6 +22,11 @@
  */
 int save_arguments(int argc, const char **argv);
 
+/*
+ * Retrieves the file descriptor for the profile file.
+ */
+int getOutFile();
+
 /* write_profiling_data - Write out a typed packet of profiling data to the
  * current output file.
  */
diff --git a/runtime/libprofile/libprofile.exports b/runtime/libprofile/libprofile.exports
index f45ff4760189..b8057c7aac96 100644
--- a/runtime/libprofile/libprofile.exports
+++ b/runtime/libprofile/libprofile.exports
@@ -1,4 +1,7 @@
 llvm_start_edge_profiling
 llvm_start_opt_edge_profiling
+llvm_start_path_profiling
 llvm_start_basic_block_tracing
 llvm_trace_basic_block
+llvm_increment_path_count
+llvm_decrement_path_count
diff --git a/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll b/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
index 6b50a168cd0a..1c2d910c1091 100644
--- a/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
+++ b/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
@@ -2,7 +2,7 @@
 ; is performed.  It is not legal to delete the second load instruction because
 ; the value computed by the first load instruction is changed by the store.
 
-; RUN: opt < %s -gvn -instcombine -S | grep DONOTREMOVE
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep DONOTREMOVE
 
 define i32 @test() {
 	%A = alloca i32
diff --git a/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll b/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
index f7e82951da76..5d200774da5f 100644
--- a/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
+++ b/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -instcombine -S | grep sub
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep sub
 
 ; BasicAA was incorrectly concluding that P1 and P2 didn't conflict!
 
diff --git a/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll b/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
index d439dfc530ba..8ca346985593 100644
--- a/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
+++ b/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm -disable-output
+; RUN: opt < %s -basicaa -licm -disable-output
 	%struct..apr_array_header_t = type { i32*, i32, i32, i32, i8* }
 	%struct..apr_table_t = type { %struct..apr_array_header_t, i32, [32 x i32], [32 x i32] }
 
diff --git a/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll b/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
index 637d8f0db486..56e333937013 100644
--- a/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
+++ b/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
@@ -1,6 +1,6 @@
 ; In this test, a local alloca cannot alias an incoming argument.
 
-; RUN: opt < %s -gvn -instcombine -S | not grep sub
+; RUN: opt < %s -basicaa -gvn -instcombine -S | not grep sub
 
 define i32 @test(i32* %P) {
 	%X = alloca i32
diff --git a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
index 911f78cc827e..010a45881039 100644
--- a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
+++ b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
@@ -1,7 +1,7 @@
 ; This testcase consists of alias relations which should be completely
 ; resolvable by basicaa.
 
-; RUN: opt < %s -aa-eval -print-may-aliases -disable-output \
+; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output \
 ; RUN: |& not grep May:
 
 %T = type { i32, [10 x i8] }
diff --git a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
index 8166b979ddab..ce01db647ffa 100644
--- a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
+++ b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
@@ -1,7 +1,7 @@
 ; This testcase consists of alias relations which should be completely
 ; resolvable by basicaa, but require analysis of getelementptr constant exprs.
 
-; RUN: opt < %s -aa-eval -print-may-aliases -disable-output \
+; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output \
 ; RUN: |& not grep May:
 
 %T = type { i32, [10 x i8] }
diff --git a/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll b/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
index e1cfd0348f35..56e4ed05cefe 100644
--- a/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
+++ b/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -dse -S | grep {store i32 0}
+; RUN: opt < %s -basicaa -dse -S | grep {store i32 0}
 
 define void @test({i32,i32 }* %P) {
 	%Q = getelementptr {i32,i32}* %P, i32 1
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
index 81248db32881..50fb222a5d6f 100644
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
+++ b/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm
+; RUN: opt < %s -basicaa -licm
 
 %"java/lang/Object" = type { %struct.llvm_java_object_base }
 %"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
index 0e03db330c04..cc8431496ed7 100644
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
+++ b/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -dse
+; RUN: opt < %s -basicaa -dse
 
 %"java/lang/Object" = type { %struct.llvm_java_object_base }
 %"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
index 49327acdae0e..832059447272 100644
--- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
+++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -aa-eval -disable-output |& grep {2 no alias respon}
+; RUN: opt < %s -basicaa -aa-eval -disable-output |& grep {2 no alias respon}
 ; TEST that A[1][0] may alias A[0][i].
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
diff --git a/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll b/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
index 85f53a6cda48..0db58156547a 100644
--- a/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
+++ b/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm -disable-output
+; RUN: opt < %s -basicaa -licm -disable-output
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
 
diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index f699ba2911c5..563d3326367b 100644
--- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -disable-output
+; RUN: opt < %s -basicaa -gvn -disable-output
 ; PR1774
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
index 8028afb0d003..52d0af1b81ce 100644
--- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
+++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -disable-output
+; RUN: opt < %s -basicaa -gvn -disable-output
 ; PR1782
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll b/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
index ba29f3abcffe..170914447644 100644
--- a/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
+++ b/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -disable-output
+; RUN: opt < %s -basicaa -gvn -disable-output
 ; PR2395
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
index 06018ccd5b57..c9e553d69476 100644
--- a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
+++ b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -aa-eval |& grep {1 no alias response}
+; RUN: opt < %s -basicaa -aa-eval |& grep {1 no alias response}
 
 declare noalias i32* @_Znwj(i32 %x) nounwind
 
diff --git a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
index 64754712d43a..5078dd53a79f 100644
--- a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
+++ b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn -instcombine -S < %s | FileCheck %s
+; RUN: opt -basicaa -gvn -instcombine -S < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8)
diff --git a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
index 771636f42cfa..17db2fd739ad 100644
--- a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
+++ b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
 ; If GEP base doesn't alias Z, then GEP doesn't alias Z.
 ; rdar://7282591
 
 @Y = common global i32 0
 @Z = common global i32 0
 
-define void @foo(i32 %cond) nounwind ssp {
+define void @foo(i32 %cond) nounwind {
 entry:
   %a = alloca i32
   %tmp = icmp ne i32 %cond, 0
diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
new file mode 100644
index 000000000000..2b0cd78fece3
--- /dev/null
+++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {1 may alias}
+; PR7959
+
+target datalayout = "e-p:32:32:32"
+
+define i32 @test(i32* %tab, i32 %indvar) nounwind {
+  %tmp31 = mul i32 %indvar, -2
+  %tmp32 = add i32 %tmp31, 30
+  %t.5 = getelementptr i32* %tab, i32 %tmp32
+  %loada = load i32* %tab
+  store i32 0, i32* %t.5
+  %loadb = load i32* %tab
+  %rval = add i32 %loada, %loadb
+  ret i32 %rval
+}
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
index 7555a4c2a9b0..c3c4afcc2396 100644
--- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll
+++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -aa-eval -print-all-alias-modref-info -disable-output < %s |& FileCheck  %s
+; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s |& FileCheck  %s
 
 declare void @callee(double* %callee_arg)
 declare void @nocap_callee(double* nocapture %nocap_callee_arg)
diff --git a/test/Analysis/BasicAA/byval.ll b/test/Analysis/BasicAA/byval.ll
index cdcafdf474f3..2aba7538ed5e 100644
--- a/test/Analysis/BasicAA/byval.ll
+++ b/test/Analysis/BasicAA/byval.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep {ret i32 1}
+; RUN: opt < %s -basicaa -gvn -S | grep {ret i32 1}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
 	%struct.x = type { i32, i32, i32, i32 }
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index 0e0c45c8ad5c..8a8ac4f72103 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -aa-eval -print-all-alias-modref-info |& FileCheck %s
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
 ; PR4267
 
 ; CHECK: MayAlias: double* %p.0.i.0, double* %p3
diff --git a/test/Analysis/BasicAA/empty.ll b/test/Analysis/BasicAA/empty.ll
index 689efec26adb..7b06780e6b18 100644
--- a/test/Analysis/BasicAA/empty.ll
+++ b/test/Analysis/BasicAA/empty.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output \
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output \
 ; RUN:   |& grep {NoAlias:	\{\}\\* \[%\]p, \{\}\\* \[%\]q}
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
new file mode 100644
index 000000000000..4fa6375c1439
--- /dev/null
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -tbaa -basicaa -gvn < %s | grep {ret i32 %}
+; RUN: opt -S -tbaa -gvn < %s | grep {ret i32 0}
+; rdar://8875631, rdar://8875069
+
+; BasicAA should notice that the store stores to the entire %u object,
+; so the %tmp5 load is PartialAlias with the store and suppress TBAA.
+; Without BasicAA, TBAA should say that %tmp5 is NoAlias with the store.
+
+target datalayout = "e-p:64:64:64"
+
+%union.anon = type { double }
+
+@u = global %union.anon { double -2.500000e-01 }, align 8
+@endianness_test = global i64 1, align 8
+
+define i32 @signbit(double %x) nounwind {
+entry:
+  %u = alloca %union.anon, align 8
+  %tmp9 = getelementptr inbounds %union.anon* %u, i64 0, i32 0
+  store double %x, double* %tmp9, align 8, !tbaa !0
+  %tmp2 = load i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
+  %idxprom = sext i32 %tmp2 to i64
+  %tmp4 = bitcast %union.anon* %u to [2 x i32]*
+  %arrayidx = getelementptr inbounds [2 x i32]* %tmp4, i64 0, i64 %idxprom
+  %tmp5 = load i32* %arrayidx, align 4, !tbaa !3
+  %tmp5.lobit = lshr i32 %tmp5, 31
+  ret i32 %tmp5.lobit
+}
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll
index eba9599ba07b..69f7fafaca07 100644
--- a/test/Analysis/BasicAA/gep-alias.ll
+++ b/test/Analysis/BasicAA/gep-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -instcombine -S |& FileCheck %s
+; RUN: opt < %s -basicaa -gvn -instcombine -S |& FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
index 12b088b1f651..062ea59f128b 100644
--- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
+++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s
 
 
 ; CHECK: Just Ref: call void @ro() <-> call void @f0()
diff --git a/test/Analysis/BasicAA/global-size.ll b/test/Analysis/BasicAA/global-size.ll
index b9cbbcc59ef6..a7e5aab6c1f2 100644
--- a/test/Analysis/BasicAA/global-size.ll
+++ b/test/Analysis/BasicAA/global-size.ll
@@ -1,16 +1,40 @@
 ; A store or load cannot alias a global if the accessed amount is larger then
 ; the global.
 
-; RUN: opt < %s -basicaa -gvn -instcombine -S | not grep load
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-@B = global i16 8               ; <i16*> [#uses=2]
+@B = global i16 8
 
-define i16 @test(i32* %P) {
-        %X = load i16* @B               ; <i16> [#uses=1]
+; CHECK: @test1
+define i16 @test1(i32* %P) {
+        %X = load i16* @B
         store i32 7, i32* %P
-        %Y = load i16* @B               ; <i16> [#uses=1]
-        %Z = sub i16 %Y, %X             ; <i16> [#uses=1]
+        %Y = load i16* @B
+        %Z = sub i16 %Y, %X
         ret i16 %Z
+; CHECK: ret i16 0
+}
+
+; Cannot know anything about the size of this global.
+; rdar://8813415
+@window = external global [0 x i8]
+
+; CHECK: @test2
+define i8 @test2(i32 %tmp79, i32 %w.2, i32 %indvar89) nounwind {
+  %tmp92 = add i32 %tmp79, %indvar89
+  %arrayidx412 = getelementptr [0 x i8]* @window, i32 0, i32 %tmp92
+  %tmp93 = add i32 %w.2, %indvar89
+  %arrayidx416 = getelementptr [0 x i8]* @window, i32 0, i32 %tmp93
+
+  %A = load i8* %arrayidx412, align 1
+  store i8 4, i8* %arrayidx416, align 1
+
+  %B = load i8* %arrayidx412, align 1
+  %C = sub i8 %A, %B
+  ret i8 %C
+
+; CHECK: %B = load i8
+; CHECK: ret i8 %C
 }
 
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index b9a3c5e58f68..ec0c8a734447 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -85,11 +85,11 @@ define void @test3a(i8* %P, i8 %X) {
   %Y = add i8 %X, 1     ;; Dead, because the only use (the store) is dead.
   
   %P2 = getelementptr i8* %P, i32 2
-  store i8 %Y, i8* %P2  ;; FIXME: Killed by llvm.lifetime.end, should be zapped.
-; CHECK: store i8 %Y, i8* %P2
+  store i8 %Y, i8* %P2
+; CHECK-NEXT: call void @llvm.lifetime.end
   call void @llvm.lifetime.end(i64 10, i8* %P)
   ret void
-; CHECK: ret void
+; CHECK-NEXT: ret void
 }
 
 @G1 = external global i32
@@ -105,7 +105,7 @@ define i32 @test4(i8* %P) {
 ; CHECK: load i32* @G
 ; CHECK: memset.p0i8.i32
 ; CHECK-NOT: load
-; CHECK: sub i32 %tmp, %tmp
+; CHECK: ret i32 0
 }
 
 ; Verify that basicaa is handling variable length memcpy, knowing it doesn't
@@ -120,7 +120,7 @@ define i32 @test5(i8* %P, i32 %Len) {
 ; CHECK: load i32* @G
 ; CHECK: memcpy.p0i8.p0i8.i32
 ; CHECK-NOT: load
-; CHECK: sub i32 %tmp, %tmp
+; CHECK: ret i32 0
 }
 
 define i8 @test6(i8* %p, i8* noalias %a) {
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 02889600fb8f..50fd5cd22ba6 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -5,7 +5,7 @@
 @Y = common global i32 0
 @Z = common global i32 0
 
-define void @foo(i32 %cond) nounwind ssp {
+define void @foo(i32 %cond) nounwind {
 entry:
   %"alloca point" = bitcast i32 0 to i32
   %tmp = icmp ne i32 %cond, 0
diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll
index c69e824035a8..9bc47ae44a97 100644
--- a/test/Analysis/BasicAA/phi-and-select.ll
+++ b/test/Analysis/BasicAA/phi-and-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output \
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output \
 ; RUN:   |& grep {NoAlias:	double\\* \[%\]a, double\\* \[%\]b\$} | count 4
 
 ; BasicAA should detect NoAliases in PHIs and Selects.
diff --git a/test/Analysis/BasicAA/unreachable-block.ll b/test/Analysis/BasicAA/unreachable-block.ll
index 3382188f4bbe..1ca1e66f8947 100644
--- a/test/Analysis/BasicAA/unreachable-block.ll
+++ b/test/Analysis/BasicAA/unreachable-block.ll
@@ -1,4 +1,4 @@
-; RUN: opt -aa-eval -disable-output < %s >& /dev/null
+; RUN: opt -basicaa -aa-eval -disable-output < %s >& /dev/null
 
 ; BasicAA shouldn't infinitely recurse on the use-def cycles in
 ; unreachable code.
diff --git a/test/Analysis/GlobalsModRef/aliastest.ll b/test/Analysis/GlobalsModRef/aliastest.ll
index 3e5d11907aa6..75af4dc5b934 100644
--- a/test/Analysis/GlobalsModRef/aliastest.ll
+++ b/test/Analysis/GlobalsModRef/aliastest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -S | not grep load
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
 @X = internal global i32 4		; <i32*> [#uses=1]
 
 define i32 @test(i32* %P) {
diff --git a/test/Analysis/GlobalsModRef/chaining-analysis.ll b/test/Analysis/GlobalsModRef/chaining-analysis.ll
index b1d4593ac992..431b2a68cf4c 100644
--- a/test/Analysis/GlobalsModRef/chaining-analysis.ll
+++ b/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -S | not grep load
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
 
 ; This test requires the use of previous analyses to determine that
 ; doesnotmodX does not modify X (because 'sin' doesn't).
diff --git a/test/Analysis/GlobalsModRef/indirect-global.ll b/test/Analysis/GlobalsModRef/indirect-global.ll
index 4074909ce785..1eab0bc20811 100644
--- a/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -instcombine -S | \
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -instcombine -S | \
 ; RUN:   grep {ret i32 0}
 
 @G = internal global i32* null		; <i32**> [#uses=3]
diff --git a/test/Analysis/GlobalsModRef/modreftest.ll b/test/Analysis/GlobalsModRef/modreftest.ll
index 257c0ee7deb2..3a02a94a99b5 100644
--- a/test/Analysis/GlobalsModRef/modreftest.ll
+++ b/test/Analysis/GlobalsModRef/modreftest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -S | not grep load
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
 @X = internal global i32 4		; <i32*> [#uses=2]
 
 define i32 @test(i32* %P) {
diff --git a/test/Analysis/LoopDependenceAnalysis/alias.ll b/test/Analysis/LoopDependenceAnalysis/alias.ll
index 97be3fd03590..78d0bf4fee1a 100644
--- a/test/Analysis/LoopDependenceAnalysis/alias.ll
+++ b/test/Analysis/LoopDependenceAnalysis/alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -lda | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
 
 ;; x[5] = x[6] // with x being a pointer passed as argument
 
diff --git a/test/Analysis/LoopDependenceAnalysis/siv-strong.ll b/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
index 36ac15336d6a..401e466d6669 100644
--- a/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
+++ b/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -lda | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
 
 @x = common global [256 x i32] zeroinitializer, align 4
 @y = common global [256 x i32] zeroinitializer, align 4
diff --git a/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll b/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
index a7f9bdaa59e2..9d0128c5fec4 100644
--- a/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
+++ b/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -lda | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
 
 @x = common global [256 x i32] zeroinitializer, align 4
 @y = common global [256 x i32] zeroinitializer, align 4
diff --git a/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll b/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
index e75aefd64d3d..1c5ae4c490e3 100644
--- a/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
+++ b/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -lda | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
 
 @x = common global [256 x i32] zeroinitializer, align 4
 @y = common global [256 x i32] zeroinitializer, align 4
diff --git a/test/Analysis/LoopDependenceAnalysis/ziv.ll b/test/Analysis/LoopDependenceAnalysis/ziv.ll
index ba4594877434..645ae7f152e2 100644
--- a/test/Analysis/LoopDependenceAnalysis/ziv.ll
+++ b/test/Analysis/LoopDependenceAnalysis/ziv.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -lda | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
 
 @x = common global [256 x i32] zeroinitializer, align 4
 
diff --git a/test/Analysis/PointerTracking/dg.exp b/test/Analysis/PointerTracking/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/PointerTracking/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll
deleted file mode 100644
index c8ca648e877b..000000000000
--- a/test/Analysis/PointerTracking/sizes.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt < %s -pointertracking -analyze | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-@.str = internal constant [5 x i8] c"1234\00"		; <[5 x i8]*> [#uses=1]
-@test1p = global i8* getelementptr ([5 x i8]* @.str, i32 0, i32 0), align 8		; <i8**> [#uses=1]
-@test1a = global [5 x i8] c"1234\00", align 1		; <[5 x i8]*> [#uses=1]
-@test2a = global [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4		; <[5 x i32]*> [#uses=2]
-@test2p = global i32* getelementptr ([5 x i32]* @test2a, i32 0, i32 0), align 8		; <i32**> [#uses=1]
-@test0p = common global i32* null, align 8		; <i32**> [#uses=1]
-@test0i = common global i32 0, align 4		; <i32*> [#uses=1]
-
-define i32 @foo0() nounwind {
-entry:
-	%tmp = load i32** @test0p		; <i32*> [#uses=1]
-	%conv = bitcast i32* %tmp to i8*		; <i8*> [#uses=1]
-	%call = tail call i32 @bar(i8* %conv) nounwind		; <i32> [#uses=1]
-	%tmp1 = load i8** @test1p		; <i8*> [#uses=1]
-	%call2 = tail call i32 @bar(i8* %tmp1) nounwind		; <i32> [#uses=1]
-	%call3 = tail call i32 @bar(i8* getelementptr ([5 x i8]* @test1a, i32 0, i32 0)) nounwind		; <i32> [#uses=1]
-	%call5 = tail call i32 @bar(i8* bitcast ([5 x i32]* @test2a to i8*)) nounwind		; <i32> [#uses=1]
-	%tmp7 = load i32** @test2p		; <i32*> [#uses=1]
-	%conv8 = bitcast i32* %tmp7 to i8*		; <i8*> [#uses=1]
-	%call9 = tail call i32 @bar(i8* %conv8) nounwind		; <i32> [#uses=1]
-	%call11 = tail call i32 @bar(i8* bitcast (i32* @test0i to i8*)) nounwind		; <i32> [#uses=1]
-	%add = add i32 %call2, %call		; <i32> [#uses=1]
-	%add4 = add i32 %add, %call3		; <i32> [#uses=1]
-	%add6 = add i32 %add4, %call5		; <i32> [#uses=1]
-	%add10 = add i32 %add6, %call9		; <i32> [#uses=1]
-	%add12 = add i32 %add10, %call11		; <i32> [#uses=1]
-	ret i32 %add12
-}
-
-declare i32 @bar(i8*)
-
-define i32 @foo1(i32 %n) nounwind {
-entry:
-; CHECK: 'foo1':
-	%test4a = alloca [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
-; CHECK: %test4a =
-; CHECK: ==> 1 elements, 10 bytes allocated
-	%test6a = alloca [10 x i32], align 4		; <[10 x i32]*> [#uses=1]
-; CHECK: %test6a =
-; CHECK: ==> 1 elements, 40 bytes allocated
-	%vla = alloca i8, i32 %n, align 1		; <i8*> [#uses=1]
-; CHECK: %vla =
-; CHECK: ==> %n elements, %n bytes allocated
-	%0 = shl i32 %n, 2		; <i32> [#uses=1]
-	%vla7 = alloca i8, i32 %0, align 1		; <i8*> [#uses=1]
-; CHECK: %vla7 =
-; CHECK: ==> (4 * %n) elements, (4 * %n) bytes allocated
-	%call = call i32 @bar(i8* %vla) nounwind		; <i32> [#uses=1]
-	%arraydecay = getelementptr [10 x i8]* %test4a, i64 0, i64 0		; <i8*> [#uses=1]
-	%call10 = call i32 @bar(i8* %arraydecay) nounwind		; <i32> [#uses=1]
-	%call11 = call i32 @bar(i8* %vla7) nounwind		; <i32> [#uses=1]
-	%ptrconv14 = bitcast [10 x i32]* %test6a to i8*		; <i8*> [#uses=1]
-	%call15 = call i32 @bar(i8* %ptrconv14) nounwind		; <i32> [#uses=1]
-	%add = add i32 %call10, %call		; <i32> [#uses=1]
-	%add12 = add i32 %add, %call11		; <i32> [#uses=1]
-	%add16 = add i32 %add12, %call15		; <i32> [#uses=1]
-	ret i32 %add16
-}
-
-define i32 @foo2(i64 %n) nounwind {
-entry:
-	%call = tail call i8* @malloc(i64 %n)  ; <i8*> [#uses=1]
-; CHECK: %call =
-; CHECK: ==> %n elements, %n bytes allocated
-	%call2 = tail call i8* @calloc(i64 2, i64 4) nounwind		; <i8*> [#uses=1]
-; CHECK: %call2 =
-; CHECK: ==> 8 elements, 8 bytes allocated
-	%call4 = tail call i8* @realloc(i8* null, i64 16) nounwind		; <i8*> [#uses=1]
-; CHECK: %call4 =
-; CHECK: ==> 16 elements, 16 bytes allocated
-	%call6 = tail call i32 @bar(i8* %call) nounwind		; <i32> [#uses=1]
-	%call8 = tail call i32 @bar(i8* %call2) nounwind		; <i32> [#uses=1]
-	%call10 = tail call i32 @bar(i8* %call4) nounwind		; <i32> [#uses=1]
-	%add = add i32 %call8, %call6                   ; <i32> [#uses=1]
-	%add11 = add i32 %add, %call10                ; <i32> [#uses=1]
-	ret i32 %add11
-}
-
-declare noalias i8* @malloc(i64) nounwind
-
-declare noalias i8* @calloc(i64, i64) nounwind
-
-declare noalias i8* @realloc(i8* nocapture, i64) nounwind
diff --git a/test/Analysis/Profiling/profiling-tool-chain.ll b/test/Analysis/Profiling/profiling-tool-chain.ll
index 5ac31b59bdcb..9135a85dc3ad 100644
--- a/test/Analysis/Profiling/profiling-tool-chain.ll
+++ b/test/Analysis/Profiling/profiling-tool-chain.ll
@@ -9,8 +9,8 @@
 
 ; Test the creation, reading and displaying of profile
 ; RUX: rm -f llvmprof.out
-; RUX: lli -load %llvmlibsdir/profile_rt%shlibext %t2
-; RUX: lli -load %llvmlibsdir/profile_rt%shlibext %t2 1 2
+; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2
+; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2 1 2
 ; RUX: llvm-prof -print-all-code %t1 | FileCheck --check-prefix=PROF %s
 
 ; Test the loaded profile also with verifier.
diff --git a/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll b/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
new file mode 100644
index 000000000000..aba0ce74678f
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
@@ -0,0 +1,24 @@
+; RUN: opt -indvars -scalar-evolution -analyze %s
+; This test checks if the SCEV analysis is printed out at all.
+; It failed once as the RequiredTransitive option was not implemented
+; correctly.
+
+define i32 @main() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ] ; <i64> [#uses=3]
+  %exitcond = icmp ne i64 %indvar1, 1024          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvar.next2 = add i64 %indvar1, 1             ; <i64> [#uses=1]
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/fold.ll b/test/Analysis/ScalarEvolution/fold.ll
new file mode 100644
index 000000000000..4e2adf187e8b
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/fold.ll
@@ -0,0 +1,62 @@
+; RUN: opt -analyze -scalar-evolution %s -S | FileCheck %s
+
+define i16 @test1(i8 %x) {
+  %A = zext i8 %x to i12
+  %B = sext i12 %A to i16
+; CHECK: zext i8 %x to i16
+  ret i16 %B
+}
+
+define i8 @test2(i8 %x) {
+  %A = zext i8 %x to i16
+  %B = add i16 %A, 1025
+  %C = trunc i16 %B to i8
+; CHECK: (1 + %x)
+  ret i8 %C
+}
+
+define i8 @test3(i8 %x) {
+  %A = zext i8 %x to i16
+  %B = mul i16 %A, 1027
+  %C = trunc i16 %B to i8
+; CHECK: (3 * %x)
+  ret i8 %C
+}
+
+define void @test4(i32 %x, i32 %y) {
+entry:
+  %Y = and i32 %y, 3
+  br label %loop
+loop:
+  %A = phi i32 [0, %entry], [%I, %loop]
+  %rand1 = icmp sgt i32 %A, %Y
+  %Z1 = select i1 %rand1, i32 %A, i32 %Y
+  %rand2 = icmp ugt i32 %A, %Z1
+  %Z2 = select i1 %rand2, i32 %A, i32 %Z1
+; CHECK: %Z2 =
+; CHECK-NEXT: -->  ([[EXPR:.*]]){{ +}}Exits: 20
+  %B = trunc i32 %Z2 to i16
+  %C = sext i16 %B to i30
+; CHECK: %C =
+; CHECK-NEXT: (trunc i32 ([[EXPR]]) to i30)
+  %D = sext i16 %B to i32
+; CHECK: %D =
+; CHECK-NEXT: ([[EXPR]])
+  %E = sext i16 %B to i34
+; CHECK: %E =
+; CHECK-NEXT: (zext i32 ([[EXPR]]) to i34)
+  %F = zext i16 %B to i30
+; CHECK: %F =
+; CHECK-NEXT: (trunc i32 ([[EXPR]]) to i30
+  %G = zext i16 %B to i32
+; CHECK: %G =
+; CHECK-NEXT: ([[EXPR]])
+  %H = zext i16 %B to i34
+; CHECK: %H =
+; CHECK-NEXT: (zext i32 ([[EXPR]]) to i34)
+  %I = add i32 %A, 1
+  %0 = icmp ne i32 %A, 20
+  br i1 %0, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index 456f3f059fe3..9d8e2b62a9d2 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -1,10 +1,11 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep { -->  {.*,+,.*}<%bb>} | count 8
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
 ; The addrecs in this loop are analyzable only by using nsw information.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
 
-define void @foo(double* %p) nounwind {
+; CHECK: Classifying expressions for: @test1
+define void @test1(double* %p) nounwind {
 entry:
 	%tmp = load double* %p, align 8		; <double> [#uses=1]
 	%tmp1 = fcmp ogt double %tmp, 2.000000e+00		; <i1> [#uses=1]
@@ -15,19 +16,29 @@ bb.nph:		; preds = %entry
 
 bb:		; preds = %bb1, %bb.nph
 	%i.01 = phi i32 [ %tmp8, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+; CHECK: %i.01
+; CHECK-NEXT: -->  {0,+,1}<nuw><nsw><%bb>
 	%tmp2 = sext i32 %i.01 to i64		; <i64> [#uses=1]
 	%tmp3 = getelementptr double* %p, i64 %tmp2		; <double*> [#uses=1]
 	%tmp4 = load double* %tmp3, align 8		; <double> [#uses=1]
 	%tmp5 = fmul double %tmp4, 9.200000e+00		; <double> [#uses=1]
 	%tmp6 = sext i32 %i.01 to i64		; <i64> [#uses=1]
 	%tmp7 = getelementptr double* %p, i64 %tmp6		; <double*> [#uses=1]
+; CHECK: %tmp7
+; CHECK-NEXT:   -->  {%p,+,8}<%bb>
 	store double %tmp5, double* %tmp7, align 8
 	%tmp8 = add nsw i32 %i.01, 1		; <i32> [#uses=2]
+; CHECK: %tmp8
+; CHECK-NEXT: -->  {1,+,1}<nuw><nsw><%bb>
 	br label %bb1
 
 bb1:		; preds = %bb
 	%phitmp = sext i32 %tmp8 to i64		; <i64> [#uses=1]
+; CHECK: %phitmp
+; CHECK-NEXT: -->  {1,+,1}<%bb>
 	%tmp9 = getelementptr double* %p, i64 %phitmp		; <double*> [#uses=1]
+; CHECK: %tmp9
+; CHECK-NEXT:  -->  {(8 + %p),+,8}<%bb>
 	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
 	%tmp11 = fcmp ogt double %tmp10, 2.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp11, label %bb, label %bb1.return_crit_edge
@@ -38,3 +49,58 @@ bb1.return_crit_edge:		; preds = %bb1
 return:		; preds = %bb1.return_crit_edge, %entry
 	ret void
 }
+
+; CHECK: Classifying expressions for: @test2
+define void @test2(i32* %begin, i32* %end) ssp {
+entry:
+  %cmp1.i.i = icmp eq i32* %begin, %end
+  br i1 %cmp1.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.lr.ph.i.i
+
+for.body.lr.ph.i.i:                               ; preds = %entry
+  br label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %for.body.i.i, %for.body.lr.ph.i.i
+  %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ]
+; CHECK: %__first.addr.02.i.i
+; CHECK-NEXT: -->  {%begin,+,4}<nsw><%for.body.i.i>	
+  store i32 0, i32* %__first.addr.02.i.i, align 4
+  %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1
+; CHECK: %ptrincdec.i.i
+; CHECK-NEXT: -->  {(4 + %begin),+,4}<nsw><%for.body.i.i>
+  %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
+  br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i
+
+for.cond.for.end_crit_edge.i.i:                   ; preds = %for.body.i.i
+  br label %_ZSt4fillIPiiEvT_S1_RKT0_.exit
+
+_ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %entry, %for.cond.for.end_crit_edge.i.i
+  ret void
+}
+
+; Various checks for inbounds geps.
+define void @test3(i32* %begin, i32* %end) nounwind ssp {
+entry:
+  %cmp7.i.i = icmp eq i32* %begin, %end
+  br i1 %cmp7.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %entry, %for.body.i.i
+  %indvar.i.i = phi i64 [ %tmp, %for.body.i.i ], [ 0, %entry ]
+; CHECK: %indvar.i.i
+; CHECK: {0,+,1}<nuw><nsw><%for.body.i.i>
+  %tmp = add nsw i64 %indvar.i.i, 1
+; CHECK: %tmp = 
+; CHECK: {1,+,1}<nuw><nsw><%for.body.i.i>
+  %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp
+; CHECK: %ptrincdec.i.i =
+; CHECK: {(4 + %begin),+,4}<nsw><%for.body.i.i>
+  %__first.addr.08.i.i = getelementptr inbounds i32* %begin, i64 %indvar.i.i
+; CHECK: %__first.addr.08.i.i
+; CHECK: {%begin,+,4}<nsw><%for.body.i.i>
+  store i32 0, i32* %__first.addr.08.i.i, align 4
+  %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
+  br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
+; CHECK: Loop %for.body.i.i: Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i.i: Unpredictable max backedge-taken count.
+_ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
+  ret void
+}
\ No newline at end of file
diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll
index 866664a3d66b..dd5a66ccb44c 100644
--- a/test/Analysis/ScalarEvolution/scev-aa.ll
+++ b/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -190,9 +190,8 @@ define void @bar() {
   ret void
 }
 
-; TODO: This is theoretically provable to be NoAlias.
 ; CHECK: Function: nonnegative: 2 pointers, 0 call sites
-; CHECK: MayAlias:  i64* %arrayidx, i64* %p
+; CHECK: NoAlias:  i64* %arrayidx, i64* %p
 
 define void @nonnegative(i64* %p) nounwind {
 entry:
@@ -211,6 +210,6 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-; CHECK: 13 no alias responses
-; CHECK: 27 may alias responses
+; CHECK: 14 no alias responses
+; CHECK: 26 may alias responses
 ; CHECK: 18 must alias responses
diff --git a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
new file mode 100644
index 000000000000..d59e3924acd3
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -tbaa -basicaa -gvn -S | FileCheck %s
+
+; Test that basic alias queries work.
+
+; CHECK: @test0_yes
+; CHECK: add i8 %x, %x
+define i8 @test0_yes(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !1
+  store i8 0, i8* %b, !tbaa !2
+  %y = load i8* %a, !tbaa !1
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; CHECK: @test0_no
+; CHECK: add i8 %x, %y
+define i8 @test0_no(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !3
+  store i8 0, i8* %b, !tbaa !4
+  %y = load i8* %a, !tbaa !3
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; Test that basic invariant-memory queries work.
+
+; CHECK: @test1_yes
+; CHECK: add i8 %x, %x
+define i8 @test1_yes(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !5
+  store i8 0, i8* %b
+  %y = load i8* %a, !tbaa !5
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; CHECK: @test1_no
+; CHECK: add i8 %x, %y
+define i8 @test1_no(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !6
+  store i8 0, i8* %b
+  %y = load i8* %a, !tbaa !6
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; Root note.
+!0 = metadata !{ }
+; Some type.
+!1 = metadata !{ metadata !"foo", metadata !0 }
+; Some other non-aliasing type.
+!2 = metadata !{ metadata !"bar", metadata !0 }
+
+; Some type.
+!3 = metadata !{ metadata !"foo", metadata !0 }
+; Some type in a different type system.
+!4 = metadata !{ metadata !"bar", metadata !"different" }
+
+; Invariant memory.
+!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+; Not invariant memory.
+!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
new file mode 100644
index 000000000000..3b5211e5999d
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -tbaa -basicaa -argpromotion -mem2reg -S | not grep alloca
+
+target datalayout = "E-p:64:64:64"
+
+define internal i32 @test(i32* %X, i32* %Y, i32* %Q) {
+  store i32 77, i32* %Q, !tbaa !2
+  %A = load i32* %X, !tbaa !1
+  %B = load i32* %Y, !tbaa !1
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define internal i32 @caller(i32* %B, i32* %Q) {
+  %A = alloca i32
+  store i32 78, i32* %Q, !tbaa !2
+  store i32 1, i32* %A, !tbaa !1
+  %C = call i32 @test(i32* %A, i32* %B, i32* %Q)
+  ret i32 %C
+}
+
+define i32 @callercaller(i32* %Q) {
+  %B = alloca i32
+  store i32 2, i32* %B, !tbaa !1
+  store i32 79, i32* %Q, !tbaa !2
+  %X = call i32 @caller(i32* %B, i32* %Q)
+  ret i32 %X
+}
+
+!0 = metadata !{metadata !"test"}
+!1 = metadata !{metadata !"green", metadata !0}
+!2 = metadata !{metadata !"blue", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dg.exp b/test/Analysis/TypeBasedAliasAnalysis/dg.exp
new file mode 100644
index 000000000000..f2005891a59a
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
new file mode 100644
index 000000000000..6b44eb638423
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -tbaa -basicaa -dse -S | FileCheck %s
+
+; DSE should make use of TBAA.
+
+; CHECK: @test0_yes
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test0_yes(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a, !tbaa !1
+  %y = load i8* %b, !tbaa !2
+  store i8 1, i8* %a, !tbaa !1
+  ret i8 %y
+}
+
+; CHECK: @test0_no
+; CHECK-NEXT: store i8 0, i8* %a
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test0_no(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a, !tbaa !3
+  %y = load i8* %b, !tbaa !4
+  store i8 1, i8* %a, !tbaa !3
+  ret i8 %y
+}
+
+; CHECK: @test1_yes
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test1_yes(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a
+  %y = load i8* %b, !tbaa !5
+  store i8 1, i8* %a
+  ret i8 %y
+}
+
+; CHECK: @test1_no
+; CHECK-NEXT: store i8 0, i8* %a
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test1_no(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a
+  %y = load i8* %b, !tbaa !6
+  store i8 1, i8* %a
+  ret i8 %y
+}
+
+; Root note.
+!0 = metadata !{ }
+; Some type.
+!1 = metadata !{ metadata !"foo", metadata !0 }
+; Some other non-aliasing type.
+!2 = metadata !{ metadata !"bar", metadata !0 }
+
+; Some type.
+!3 = metadata !{ metadata !"foo", metadata !0 }
+; Some type in a different type system.
+!4 = metadata !{ metadata !"bar", metadata !"different" }
+
+; Invariant memory.
+!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+; Not invariant memory.
+!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
new file mode 100644
index 000000000000..8fb5ffffbaea
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -tbaa -basicaa -functionattrs -S | FileCheck %s
+
+; FunctionAttrs should make use of TBAA.
+
+; Add the readnone attribute, since the only access is a store which TBAA
+; says is to constant memory.
+;
+; It's unusual to see a store to constant memory, but it isn't necessarily
+; invalid, as it's possible that this only happens after optimization on a
+; code path which isn't ever executed.
+
+; CHECK: define void @test0_yes(i32* nocapture %p) nounwind readnone {
+define void @test0_yes(i32* %p) nounwind {
+  store i32 0, i32* %p, !tbaa !1
+  ret void
+}
+
+; CHECK: define void @test0_no(i32* nocapture %p) nounwind {
+define void @test0_no(i32* %p) nounwind {
+  store i32 0, i32* %p, !tbaa !2
+  ret void
+}
+
+; Add the readonly attribute, since there's just a call to a function which 
+; TBAA says doesn't modify any memory.
+
+; CHECK: define void @test1_yes(i32* %p) nounwind readonly {
+define void @test1_yes(i32* %p) nounwind {
+  call void @callee(i32* %p), !tbaa !1
+  ret void
+}
+
+; CHECK: define void @test1_no(i32* %p) nounwind {
+define void @test1_no(i32* %p) nounwind {
+  call void @callee(i32* %p), !tbaa !2
+  ret void
+}
+
+; Add the readonly attribute, as above, but this time BasicAA will say
+; that the function accesses memory through its arguments, which TBAA
+; still says that the function doesn't write to memory.
+;
+; This is unusual, since the function is memcpy, but as above, this
+; isn't necessarily invalid.
+
+; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind readnone {
+define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1
+  ret void
+}
+
+; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind {
+define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2
+  ret void
+}
+
+; Similar to the others, va_arg only accesses memory through its operand.
+
+; CHECK: define i32 @test3_yes(i8* nocapture %p) nounwind readnone {
+define i32 @test3_yes(i8* %p) nounwind {
+  %t = va_arg i8* %p, i32, !tbaa !1
+  ret i32 %t
+}
+
+; CHECK: define i32 @test3_no(i8* nocapture %p) nounwind {
+define i32 @test3_no(i8* %p) nounwind {
+  %t = va_arg i8* %p, i32, !tbaa !2
+  ret i32 %t
+}
+
+declare void @callee(i32* %p) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
+
+; Root note.
+!0 = metadata !{ }
+
+; Invariant memory.
+!1 = metadata !{ metadata !"foo", metadata !0, i1 1 }
+; Not invariant memory.
+!2 = metadata !{ metadata !"foo", metadata !0, i1 0 }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
new file mode 100644
index 000000000000..eceaa2cf02d3
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -0,0 +1,91 @@
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should ignore the store to p1 to see that the load from p is
+; fully redundant.
+
+; CHECK: @yes
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+
+define void @yes(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p, !tbaa !1
+  store i32 1, i32* %p1, !tbaa !2
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p, !tbaa !1
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  ret void
+}
+
+; GVN should ignore the store to p1 to see that the first load from p is
+; fully redundant. However, the second load uses a different type. Theoretically
+; the other type could be unified with the first type, however for now, GVN
+; should just be conservative.
+
+; CHECK: @watch_out_for_type_change
+; CHECK: if.then:
+; CHECK:   %t = load i32* %p
+; CHECK:   store i32 %t, i32* %q
+; CHECK:   ret void
+; CHECK: if.else:
+; CHECK:   %u = load i32* %p
+; CHECK:   store i32 %u, i32* %q
+
+define void @watch_out_for_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p, !tbaa !1
+  store i32 1, i32* %p1, !tbaa !2
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p, !tbaa !4
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %u = load i32* %p, !tbaa !3
+  store i32 %u, i32* %q
+  ret void
+}
+
+; As before, but the types are swapped. This time GVN does managed to
+; eliminate one of the loads before noticing the type mismatch.
+
+; CHECK: @watch_out_for_another_type_change
+; CHECK: if.then:
+; CHECK:   %t = load i32* %p
+; CHECK:   store i32 %t, i32* %q
+; CHECK:   ret void
+; CHECK: if.else:
+; CHECK:   store i32 0, i32* %q
+
+define void @watch_out_for_another_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p, !tbaa !1
+  store i32 1, i32* %p1, !tbaa !2
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p, !tbaa !3
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %u = load i32* %p, !tbaa !4
+  store i32 %u, i32* %q
+  ret void
+}
+
+!0 = metadata !{}
+!1 = metadata !{metadata !"red", metadata !0}
+!2 = metadata !{metadata !"blu", metadata !0}
+!3 = metadata !{metadata !"outer space"}
+!4 = metadata !{metadata !"brick red", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
new file mode 100644
index 000000000000..12a9c1dc5649
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -0,0 +1,61 @@
+; RUN: opt -tbaa -licm -S < %s | FileCheck %s
+
+; LICM should be able to hoist the address load out of the loop
+; by using TBAA information.
+
+; CHECK: @foo
+; CHECK:      entry:
+; CHECK-NEXT:   %tmp3 = load double** @P, !tbaa !0
+; CHECK-NEXT:   br label %for.body
+
+@P = common global double* null
+
+define void @foo(i64 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %tmp3 = load double** @P, !tbaa !1
+  %scevgep = getelementptr double* %tmp3, i64 %i.07
+  %tmp4 = load double* %scevgep, !tbaa !2
+  %mul = fmul double %tmp4, 2.300000e+00
+  store double %mul, double* %scevgep, !tbaa !2
+  %inc = add i64 %i.07, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"root", null}
+!1 = metadata !{metadata !"pointer", metadata !0}
+!2 = metadata !{metadata !"double", metadata !0}
+
+; LICM shouldn't hoist anything here.
+
+; CHECK: @bar
+; CHECK: loop:
+; CHECK: load
+; CHECK: store
+; CHECK: load
+; CHECK: store
+; CHECK: br label %loop
+
+define void @bar(i8** %p) nounwind {
+entry:
+  %q = bitcast i8** %p to i8*
+  br label %loop
+
+loop:
+  %tmp51 = load i8** %p, !tbaa !4
+  store i8* %tmp51, i8** %p
+  %tmp40 = load i8* %q, !tbaa !5
+  store i8 %tmp40, i8* %q
+  br label %loop
+}
+
+!3 = metadata !{metadata !"pointer", metadata !4}
+!4 = metadata !{metadata !"char", metadata !5}
+!5 = metadata !{metadata !"root", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
new file mode 100644
index 000000000000..c2407dfd4c89
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -tbaa -basicaa -memcpyopt -instcombine < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; The second memcpy is redundant and can be deleted. There's an intervening store, but
+; it has a TBAA tag which declares that it is unrelated.
+
+; CHECK: @foo
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0
+; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !2
+; CHECK-NEXT: ret void
+define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2
+  store i8 2, i8* %s, align 1, !tbaa !1
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!0 = metadata !{metadata !"tbaa root", null}
+!1 = metadata !{metadata !"A", metadata !0}
+!2 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
new file mode 100644
index 000000000000..47cb5f2256d3
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -0,0 +1,46 @@
+; RUN: opt -basicaa -tbaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=TBAA
+; RUN: opt -tbaa -basicaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=BASICAA
+
+; According to the TBAA metadata the load and store don't alias. However,
+; according to the actual code, they do. The order of the alias analysis
+; passes should determine which of these takes precedence.
+
+target datalayout = "e-p:64:64:64"
+
+; Test for simple MustAlias aliasing.
+
+; TBAA:    @trouble
+; TBAA:      ret i32 0
+; BASICAA: @trouble
+; BASICAA:   ret i32 1075000115
+define i32 @trouble(i32* %x) nounwind {
+entry:
+  store i32 0, i32* %x, !tbaa !0
+  %0 = bitcast i32* %x to float*
+  store float 0x4002666660000000, float* %0, !tbaa !3
+  %tmp3 = load i32* %x, !tbaa !0
+  ret i32 %tmp3
+}
+
+; Test for PartialAlias aliasing. GVN doesn't yet eliminate the load
+; in the BasicAA case.
+
+; TBAA:    @offset
+; TBAA:      ret i64 0
+; BASICAA: @offset
+; BASICAA:   ret i64 %tmp3
+define i64 @offset(i64* %x) nounwind {
+entry:
+  store i64 0, i64* %x, !tbaa !4
+  %0 = bitcast i64* %x to i8*
+  %1 = getelementptr i8* %0, i64 1
+  store i8 1, i8* %1, !tbaa !5
+  %tmp3 = load i64* %x, !tbaa !4
+  ret i64 %tmp3
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"simple"}
+!3 = metadata !{metadata !"float", metadata !1}
+!4 = metadata !{metadata !"long", metadata !1}
+!5 = metadata !{metadata !"small", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/sink.ll b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
new file mode 100644
index 000000000000..fd32d6a7a58e
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
@@ -0,0 +1,20 @@
+; RUN: opt -tbaa -sink -S < %s | FileCheck %s
+
+; CHECK: a:
+; CHECK:   %f = load float* %p, !tbaa !2
+; CHECK:   store float %f, float* %q
+
+define void @foo(float* %p, i1 %c, float* %q, float* %r) {
+  %f = load float* %p, !tbaa !0
+  store float 0.0, float* %r, !tbaa !1
+  br i1 %c, label %a, label %b
+a:
+  store float %f, float* %q
+  br label %b
+b:
+  ret void
+}
+
+!0 = metadata !{metadata !"A", metadata !2}
+!1 = metadata !{metadata !"B", metadata !2}
+!2 = metadata !{metadata !"test"}
diff --git a/test/Archive/GNU.toc b/test/Archive/GNU.toc
deleted file mode 100644
index d9934139136c..000000000000
--- a/test/Archive/GNU.toc
+++ /dev/null
@@ -1,4 +0,0 @@
-evenlen
-oddlen
-very_long_bytecode_file_name.bc
-IsNAN.o
diff --git a/test/Archive/MacOSX.toc b/test/Archive/MacOSX.toc
deleted file mode 100644
index f971df73325c..000000000000
--- a/test/Archive/MacOSX.toc
+++ /dev/null
@@ -1,5 +0,0 @@
-__.SYMDEF SORTED
-evenlen
-oddlen
-very_long_bytecode_file_name.bc
-IsNAN.o
diff --git a/test/Archive/SVR4.toc b/test/Archive/SVR4.toc
deleted file mode 100644
index d9934139136c..000000000000
--- a/test/Archive/SVR4.toc
+++ /dev/null
@@ -1,4 +0,0 @@
-evenlen
-oddlen
-very_long_bytecode_file_name.bc
-IsNAN.o
diff --git a/test/Archive/extract.ll b/test/Archive/extract.ll
index 3649714259b8..714c5f1ed983 100644
--- a/test/Archive/extract.ll
+++ b/test/Archive/extract.ll
@@ -4,13 +4,13 @@
 ; from various style archives.
 
 ; RUN: llvm-ar x %p/GNU.a very_long_bytecode_file_name.bc
-; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
 
 ; RUN: llvm-ar x %p/MacOSX.a very_long_bytecode_file_name.bc
-; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc > /dev/null 2>/dev/null
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
 
 ; RUN: llvm-ar x %p/SVR4.a very_long_bytecode_file_name.bc
-; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
 
 ; RUN: llvm-ar x %p/xpg4.a very_long_bytecode_file_name.bc
-; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
diff --git a/test/Archive/toc_GNU.ll b/test/Archive/toc_GNU.ll
index 136f603a831e..9ed7d8eb8cbd 100644
--- a/test/Archive/toc_GNU.ll
+++ b/test/Archive/toc_GNU.ll
@@ -1,5 +1,8 @@
 ;This isn't really an assembly file, its just here to run the test.
 ;This test just makes sure that llvm-ar can generate a table of contents for
 ;GNU style archives
-;RUN: llvm-ar t %p/GNU.a > %t1
-;RUN: diff %t1 %p/GNU.toc
+;RUN: llvm-ar t %p/GNU.a | FileCheck %s
+;CHECK:      evenlen
+;CHECK-NEXT: oddlen
+;CHECK-NEXT: very_long_bytecode_file_name.bc
+;CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/toc_MacOSX.ll b/test/Archive/toc_MacOSX.ll
index fb0322324081..6dbc9d2ea4a6 100644
--- a/test/Archive/toc_MacOSX.ll
+++ b/test/Archive/toc_MacOSX.ll
@@ -1,5 +1,9 @@
 ;This isn't really an assembly file, its just here to run the test.
 ;This test just makes sure that llvm-ar can generate a table of contents for
 ;MacOSX style archives
-;RUN: llvm-ar t %p/MacOSX.a > %t1
-;RUN: diff %t1 %p/MacOSX.toc
+;RUN: llvm-ar t %p/MacOSX.a | FileCheck %s
+;CHECK:      __.SYMDEF SORTED
+;CHECK-NEXT: evenlen
+;CHECK-NEXT: oddlen
+;CHECK-NEXT: very_long_bytecode_file_name.bc
+;CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/toc_SVR4.ll b/test/Archive/toc_SVR4.ll
index 930a26fd353e..d447b9219990 100644
--- a/test/Archive/toc_SVR4.ll
+++ b/test/Archive/toc_SVR4.ll
@@ -1,5 +1,8 @@
 ;This isn't really an assembly file, its just here to run the test.
 ;This test just makes sure that llvm-ar can generate a table of contents for
 ;SVR4 style archives
-;RUN: llvm-ar t %p/SVR4.a > %t1
-;RUN: diff %t1 %p/SVR4.toc
+;RUN: llvm-ar t %p/SVR4.a | FileCheck %s
+;CHECK:      evenlen
+;CHECK-NEXT: oddlen
+;CHECK-NEXT: very_long_bytecode_file_name.bc
+;CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/toc_xpg4.ll b/test/Archive/toc_xpg4.ll
index 441af036f3cf..fd875eebdaab 100644
--- a/test/Archive/toc_xpg4.ll
+++ b/test/Archive/toc_xpg4.ll
@@ -1,5 +1,8 @@
 ;This isn't really an assembly file, its just here to run the test.
 ;This test just makes sure that llvm-ar can generate a table of contents for
 ;xpg4 style archives
-;RUN: llvm-ar t %p/xpg4.a > %t1
-;RUN: diff %t1 %p/xpg4.toc
+;RUN: llvm-ar t %p/xpg4.a | FileCheck %s
+CHECK:      evenlen
+CHECK-NEXT: oddlen
+CHECK-NEXT: very_long_bytecode_file_name.bc
+CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/xpg4.toc b/test/Archive/xpg4.toc
deleted file mode 100644
index d9934139136c..000000000000
--- a/test/Archive/xpg4.toc
+++ /dev/null
@@ -1,4 +0,0 @@
-evenlen
-oddlen
-very_long_bytecode_file_name.bc
-IsNAN.o
diff --git a/test/Assembler/2003-05-21-MalformedShiftCrash.ll b/test/Assembler/2003-05-21-MalformedShiftCrash.ll
index c661f7c0771e..a845d89bb6c9 100644
--- a/test/Assembler/2003-05-21-MalformedShiftCrash.ll
+++ b/test/Assembler/2003-05-21-MalformedShiftCrash.ll
@@ -1,4 +1,4 @@
 ; Found by inspection of the code
-; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer or integer vector operands}
+; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer operands}
 
 global i32 ashr (float 1.0, float 2.0)
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index af4ec92e0991..6752bd8281bd 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -7,7 +7,7 @@
 ; RUN: llvm-as < %s | llvm-dis | \
 ; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
 ; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {\\\<2 x i32\\\>} | count 6
+; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
 
 declare i32 @llvm.ctpop.i28(i28 %val)
 declare i32 @llvm.cttz.i29(i29 %val)
diff --git a/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
new file mode 100644
index 000000000000..54120ff9dce6
--- /dev/null
+++ b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
@@ -0,0 +1,223 @@
+; Tests to make sure MMX intrinsics are automatically upgraded.
+; RUN: llvm-as < %s | llvm-dis -o %t
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<1 x i64\\\>}
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>}
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>}
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>}
+; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32
+
+; Addition
+declare <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @add(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
+                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8> %A,  <8 x i8> %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16> %C, <4 x i16> %D)
+  %r3 = call <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32> %E, <2 x i32> %F)
+  %r4 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %G, <1 x i64> %H)
+  %r5 = call <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r6 = call <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16> %C, <4 x i16> %D)
+  %r7 = call <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r8 = call <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Subtraction
+declare <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @sub(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
+                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8> %A,  <8 x i8> %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16> %C, <4 x i16> %D)
+  %r3 = call <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32> %E, <2 x i32> %F)
+  %r4 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %G, <1 x i64> %H)
+  %r5 = call <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r6 = call <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16> %C, <4 x i16> %D)
+  %r7 = call <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r8 = call <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Multiplication
+declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>) nounwind readnone
+define void @mul(<4 x i16> %A, <4 x i16> %B) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16> %A, <4 x i16> %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16> %A, <4 x i16> %B)
+  %r3 = call <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16> %A, <4 x i16> %B)
+  %r4 = call <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16> %A, <4 x i16> %B)
+  %r5 = call <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16> %A, <4 x i16> %B)
+  ret void
+}
+
+; Bitwise operations
+declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>)  nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>)   nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>)  nounwind readnone
+define void @bit(<1 x i64> %A, <1 x i64> %B) {
+  %r1 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %A, <1 x i64> %B)
+  %r2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %A, <1 x i64> %B)
+  %r3 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %A, <1 x i64> %B)
+  %r4 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %A, <1 x i64> %B)
+  ret void
+}
+
+; Averages
+declare <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @avg(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Maximum
+declare <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @max(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Minimum
+declare <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @min(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Packed sum of absolute differences
+declare <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8>, <8 x i8>) nounwind readnone
+define void @psad(<8 x i8> %A, <8 x i8> %B) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8> %A, <8 x i8> %B)
+  ret void
+}
+
+; Shift left
+declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <1 x i64>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <1 x i64>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16>, i32) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32>, i32) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
+define void @shl(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16> %A, <1 x i64> %C)
+  %r2 = call <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32> %B, <1 x i64> %C)
+  %r3 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %C, <1 x i64> %C)
+  %r4 = call <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16> %A, i32 %D)
+  %r5 = call <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32> %B, i32 %D)
+  %r6 = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %C, i32 %D)
+  ret void
+}
+
+; Shift right logical
+declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <1 x i64>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <1 x i64>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32>, i32) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone 
+define void @shr(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16> %A, <1 x i64> %C)
+  %r2 = call <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32> %B, <1 x i64> %C)
+  %r3 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %C, <1 x i64> %C)
+  %r4 = call <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16> %A, i32 %D)
+  %r5 = call <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32> %B, i32 %D)
+  %r6 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %C, i32 %D)
+  ret void
+}
+
+; Shift right arithmetic
+declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <1 x i64>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <1 x i64>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16>, i32) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32>, i32) nounwind readnone 
+define void @sra(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16> %A, <1 x i64> %C)
+  %r2 = call <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32> %B, <1 x i64> %C)
+  %r3 = call <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16> %A, i32 %D)
+  %r4 = call <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32> %B, i32 %D)
+  ret void
+}
+
+; Pack/Unpack ops
+declare <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32>, <2 x i32>) nounwind readnone 
+define void @pack_unpack(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
+                         <2 x i32> %E, <2 x i32> %F) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16> %C, <4 x i16> %D)
+  %r2 = call <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32> %E, <2 x i32> %F)
+  %r3 = call <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16> %C, <4 x i16> %D)
+  %r4 = call <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>  %A, <8 x i8>  %B)
+  %r5 = call <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16> %C, <4 x i16> %D)
+  %r6 = call <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32> %E, <2 x i32> %F)
+  %r7 = call <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>  %A, <8 x i8>  %B)
+  %r8 = call <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16> %C, <4 x i16> %D)
+  %r9 = call <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32> %E, <2 x i32> %F)
+  ret void
+}
+
+; Integer comparison ops
+declare <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32>, <2 x i32>) nounwind readnone 
+define void @cmp(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
+                 <2 x i32> %E, <2 x i32> %F) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16> %C, <4 x i16> %D)
+  %r3 = call <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32> %E, <2 x i32> %F)
+  %r4 = call <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r5 = call <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16> %C, <4 x i16> %D)
+  %r6 = call <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32> %E, <2 x i32> %F)
+  ret void
+}
+
+; Miscellaneous
+declare void      @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i32*) nounwind readnone 
+declare i32       @llvm.x86.mmx.pmovmskb(<8 x i8>) nounwind readnone 
+declare void      @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>,  i8) nounwind readnone 
+declare i32       @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone 
+declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone 
+define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
+                  <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H,
+                  i32* %I, i8 %J, i16 %K, i32 %L) {
+        call void      @llvm.x86.mmx.maskmovq(<8 x i8> %A, <8 x i8> %B, i32* %I)
+  %r1 = call i32       @llvm.x86.mmx.pmovmskb(<8 x i8> %A)
+        call void      @llvm.x86.mmx.movnt.dq(i32* %I, <1 x i64> %G)
+  %r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J)
+  %r3 = call i32       @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37)
+  %r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927)
+  %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37)
+  ret void
+}
diff --git a/test/Assembler/extractvalue-invalid-idx.ll b/test/Assembler/extractvalue-invalid-idx.ll
new file mode 100644
index 000000000000..f9644eadbd59
--- /dev/null
+++ b/test/Assembler/extractvalue-invalid-idx.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as < %s |& grep {invalid indices for extractvalue}
+; PR4170
+
+define void @test() {
+entry:
+        extractvalue [0 x i32] undef, 0
+        ret void
+}
diff --git a/test/Assembler/flags.ll b/test/Assembler/flags.ll
index 324190905975..310b807c5d56 100644
--- a/test/Assembler/flags.ll
+++ b/test/Assembler/flags.ll
@@ -92,6 +92,12 @@ define i64 @mul_both_reversed(i64 %x, i64 %y) {
 	ret i64 %z
 }
 
+define i64 @shl_both(i64 %x, i64 %y) {
+; CHECK: %z = shl nuw nsw i64 %x, %y
+	%z = shl nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
 define i64 @sdiv_exact(i64 %x, i64 %y) {
 ; CHECK: %z = sdiv exact i64 %x, %y
 	%z = sdiv exact i64 %x, %y
@@ -104,6 +110,42 @@ define i64 @sdiv_plain(i64 %x, i64 %y) {
 	ret i64 %z
 }
 
+define i64 @udiv_exact(i64 %x, i64 %y) {
+; CHECK: %z = udiv exact i64 %x, %y
+	%z = udiv exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @udiv_plain(i64 %x, i64 %y) {
+; CHECK: %z = udiv i64 %x, %y
+	%z = udiv i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @ashr_plain(i64 %x, i64 %y) {
+; CHECK: %z = ashr i64 %x, %y
+	%z = ashr i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @ashr_exact(i64 %x, i64 %y) {
+; CHECK: %z = ashr exact i64 %x, %y
+	%z = ashr exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @lshr_plain(i64 %x, i64 %y) {
+; CHECK: %z = lshr i64 %x, %y
+	%z = lshr i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @lshr_exact(i64 %x, i64 %y) {
+; CHECK: %z = lshr exact i64 %x, %y
+	%z = lshr exact i64 %x, %y
+	ret i64 %z
+}
+
 define i64* @gep_nw(i64* %p, i64 %x) {
 ; CHECK: %z = getelementptr inbounds i64* %p, i64 %x
 	%z = getelementptr inbounds i64* %p, i64 %x
@@ -136,6 +178,21 @@ define i64 @sdiv_exact_ce() {
 	ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
 }
 
+define i64 @udiv_exact_ce() {
+; CHECK: ret i64 udiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 udiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @ashr_exact_ce() {
+; CHECK: ret i64 ashr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+	ret i64 ashr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+}
+
+define i64 @lshr_exact_ce() {
+; CHECK: ret i64 lshr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+	ret i64 lshr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+}
+
 define i64* @gep_nw_ce() {
 ; CHECK: ret i64* getelementptr inbounds (i64* @addr, i64 171)
         ret i64* getelementptr inbounds (i64* @addr, i64 171)
@@ -196,6 +253,12 @@ define i64 @mul_signed_ce() {
 	ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
 }
 
+define i64 @shl_signed_ce() {
+; CHECK: ret i64 shl nsw (i64 ptrtoint (i64* @addr to i64), i64 17)
+	ret i64 shl nsw (i64 ptrtoint (i64* @addr to i64), i64 17)
+}
+
+
 define i64 @add_unsigned_ce() {
 ; CHECK: ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
 	ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
@@ -210,3 +273,4 @@ define i64 @mul_unsigned_ce() {
 ; CHECK: ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
 	ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
 }
+
diff --git a/test/Assembler/insertvalue-invalid-idx.ll b/test/Assembler/insertvalue-invalid-idx.ll
new file mode 100644
index 000000000000..86e7258cc593
--- /dev/null
+++ b/test/Assembler/insertvalue-invalid-idx.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s |& grep {invalid indices for insertvalue}
+
+define void @test() {
+entry:
+        insertvalue [0 x i32] undef, i32 0, 0
+        ret void
+}
diff --git a/test/Assembler/unnamed-addr.ll b/test/Assembler/unnamed-addr.ll
new file mode 100644
index 000000000000..3c94ca213081
--- /dev/null
+++ b/test/Assembler/unnamed-addr.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+%struct.foobar = type { i32 }
+
+@bar.d = internal unnamed_addr constant %struct.foobar zeroinitializer, align 4
+@foo.d = internal constant %struct.foobar zeroinitializer, align 4
+
+define i32 @main() unnamed_addr nounwind ssp {
+entry:
+  %call2 = tail call i32 @zed(%struct.foobar* @foo.d, %struct.foobar* @bar.d) nounwind
+  ret i32 0
+}
+
+declare i32 @zed(%struct.foobar*, %struct.foobar*)
+
+; CHECK: @bar.d = internal unnamed_addr constant %struct.foobar zeroinitializer, align 4
+; CHECK: @foo.d = internal constant %struct.foobar zeroinitializer, align 4
+; CHECK: define i32 @main() unnamed_addr nounwind ssp {
diff --git a/test/Assembler/x86mmx.ll b/test/Assembler/x86mmx.ll
new file mode 100644
index 000000000000..732d3be8619d
--- /dev/null
+++ b/test/Assembler/x86mmx.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; Basic smoke test for x86_mmx type.
+
+; CHECK: define x86_mmx @sh16
+define x86_mmx  @sh16(x86_mmx %A) {
+; CHECK: ret x86_mmx %A
+        ret x86_mmx %A
+}
diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml
index bf2178254409..7df8e21203a9 100644
--- a/test/Bindings/Ocaml/analysis.ml
+++ b/test/Bindings/Ocaml/analysis.ml
@@ -1,5 +1,6 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t
  * RUN: %t
+ * XFAIL: vg_leak
  *)
 
 open Llvm
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
index 30b07d2199f3..a3bd91a0af2a 100644
--- a/test/Bindings/Ocaml/bitreader.ml
+++ b/test/Bindings/Ocaml/bitreader.ml
@@ -1,6 +1,7 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc | grep caml_int_ty
+ * XFAIL: vg_leak
  *)
 
 (* Note that this takes a moment to link, so it's best to keep the number of
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index 8eb923ea32c7..3f55fb9fa1d4 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,6 +1,7 @@
 (* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc | grep caml_int_ty
+ * XFAIL: vg_leak
  *)
 
 (* Note that this takes a moment to link, so it's best to keep the number of
diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml
index 63040e4a33fc..f7a49bb284c0 100644
--- a/test/Bindings/Ocaml/executionengine.ml
+++ b/test/Bindings/Ocaml/executionengine.ml
@@ -1,5 +1,6 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t
  * RUN: %t
+ * XFAIL: vg_leak
  *)
 
 open Llvm
diff --git a/test/Bindings/Ocaml/ext_exc.ml b/test/Bindings/Ocaml/ext_exc.ml
new file mode 100644
index 000000000000..b4d2e6dc6414
--- /dev/null
+++ b/test/Bindings/Ocaml/ext_exc.ml
@@ -0,0 +1,17 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %s -o %t
+ * RUN: %t </dev/null
+ * XFAIL: vg_leak
+ *)
+let context = Llvm.global_context ()
+(* this used to crash, we must not use 'external' in .mli files, but 'val' if we
+ * want the let _ bindings executed, see http://caml.inria.fr/mantis/view.php?id=4166 *)
+let _ =
+    try
+        ignore (Llvm_bitreader.get_module context (Llvm.MemoryBuffer.of_stdin ()))
+    with
+    Llvm_bitreader.Error _ -> ();;
+let _ =
+    try
+        ignore (Llvm.MemoryBuffer.of_file "/path/to/nonexistent/file")
+    with
+    Llvm.IoError _ -> ();;
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
index 8a6af012ac24..1ea97858edf6 100644
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ b/test/Bindings/Ocaml/scalar_opts.ml
@@ -1,5 +1,6 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t
  * RUN: %t %t.bc
+ * XFAIL: vg_leak
  *)
 
 (* Note: It takes several seconds for ocamlopt to link an executable with
@@ -42,7 +43,7 @@ let test_transforms () =
   ignore (PassManager.create_function m
            ++ TargetData.add td
            ++ add_constant_propagation
-					 ++ add_sccp
+           ++ add_sccp
            ++ add_dead_store_elimination
            ++ add_aggressive_dce
            ++ add_scalar_repl_aggregation
@@ -52,7 +53,6 @@ let test_transforms () =
            ++ add_loop_unswitch
            ++ add_loop_unroll
            ++ add_loop_rotation
-           ++ add_loop_index_split
            ++ add_memory_to_register_promotion
            ++ add_memory_to_register_demotion
            ++ add_reassociation
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
index bfaf37ca879d..5e3ab4bf9354 100644
--- a/test/Bindings/Ocaml/target.ml
+++ b/test/Bindings/Ocaml/target.ml
@@ -1,5 +1,6 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t
  * RUN: %t %t.bc
+ * XFAIL: vg_leak
  *)
 
 (* Note: It takes several seconds for ocamlopt to link an executable with
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index e55ab9643e43..ceb650e1ca4a 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -1,6 +1,7 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc > %t.ll
+ * XFAIL: vg_leak
  *)
 
 (* Note: It takes several seconds for ocamlopt to link an executable with
diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll
new file mode 100644
index 000000000000..5d3dfab5753a
--- /dev/null
+++ b/test/Bitcode/null-type.ll
@@ -0,0 +1,2 @@
+; RUN: not llvm-dis < %s.bc > /dev/null |& grep "Invalid MODULE_CODE_FUNCTION record"
+; PR8494
diff --git a/test/Bitcode/null-type.ll.bc b/test/Bitcode/null-type.ll.bc
new file mode 100644
index 000000000000..f0a97d94ce90
Binary files /dev/null and b/test/Bitcode/null-type.ll.bc differ
diff --git a/test/Bitcode/ssse3_palignr.ll.bc b/test/Bitcode/ssse3_palignr.ll.bc
index 642f4dedc417..3fc9cdf15a35 100644
Binary files a/test/Bitcode/ssse3_palignr.ll.bc and b/test/Bitcode/ssse3_palignr.ll.bc differ
diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll
index 18a31eb45d36..d080d9dd4b0c 100644
--- a/test/BugPoint/crash-narrowfunctiontest.ll
+++ b/test/BugPoint/crash-narrowfunctiontest.ll
@@ -1,8 +1,7 @@
 ; Test that bugpoint can narrow down the testcase to the important function
-; FIXME: This likely fails on windows
 ;
-; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
-; XFAIL: mingw
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; REQUIRES: loadable_module
 
 define i32 @foo() { ret i32 1 }
 
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index f2541ee3f9ac..0eda5667ba4a 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -1,6 +1,6 @@
-; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
-; XFAIL: mingw
+; REQUIRES: loadable_module
 
 ; Bugpoint should keep the call's metadata attached to the call.
 
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index 791ec69a23d2..29a03b831077 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -1,7 +1,6 @@
-; FIXME: This likely fails on windows
-; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
-; XFAIL: mingw
+; REQUIRES: loadable_module
 
 ; Test to make sure that arguments are removed from the function if they are 
 ; unnecessary. And clean up any types that that frees up too.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ad9a2432dbfa..302e141a3ed1 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -7,6 +7,8 @@ set(TARGETS_TO_BUILD ${TARGETS_BUILT})
 set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
 set(SHLIBEXT "${LTDL_SHLIB_EXT}")
 
+set(SHLIBDIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
+
 if(BUILD_SHARED_LIBS)
   set(LLVM_SHARED_LIBS_ENABLED "1")
 else()
@@ -18,12 +20,15 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
 else() # Default for all other unix like systems.
   # CMake hardcodes the library locaction using rpath.
   # Therefore LD_LIBRARY_PATH is not required to run binaries in the
-  # build dir. We pass it anyways. 
+  # build dir. We pass it anyways.
   set(SHLIBPATH_VAR "LD_LIBRARY_PATH")
 endif()
 
 include(FindPythonInterp)
 if(PYTHONINTERP_FOUND)
+  set(LIT_ARGS "${LLVM_LIT_ARGS}")
+  separate_arguments(LIT_ARGS)
+
   get_directory_property(DEFINITIONS COMPILE_DEFINITIONS)
   foreach(DEF ${DEFINITIONS})
     set(DEFS "${DEFS} -D${DEF}")
@@ -47,30 +52,44 @@ if(PYTHONINTERP_FOUND)
 
   MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
 
+  # Configuration-time: See Unit/lit.site.cfg.in
+  set(LLVM_BUILD_MODE "%(build_mode)s")
+
+  set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
+  set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
+  set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
+  set(LLVMGCCDIR "")
+  set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
+  set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
+  set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
+
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    @ONLY)
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+    @ONLY)
+
   add_custom_target(check
-    COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#"
-                -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#"
-                -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_BINARY_DIR}/${CMAKE_CFG_INTDIR}#"
-                -e "s#\@LLVMGCCDIR\@##"
-                -e "s#\@PYTHON_EXECUTABLE\@#${PYTHON_EXECUTABLE}#"
-                ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in >
-                ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-    COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#"
-                -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#"
-                -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_BINARY_DIR}/${CMAKE_CFG_INTDIR}#"
-                -e "s#\@LLVMGCCDIR\@##"
-                -e "s#\@LLVM_BUILD_MODE\@#${CMAKE_CFG_INTDIR}#"
-                -e "s#\@ENABLE_SHARED\@#${LLVM_SHARED_LIBS_ENABLED}#"
-                -e "s#\@SHLIBPATH_VAR\@#${SHLIBPATH_VAR}#"
-                ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in >
-                ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
     COMMAND ${PYTHON_EXECUTABLE}
                 ${LLVM_SOURCE_DIR}/utils/lit/lit.py
                 --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
                 --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-                -sv
+                --param build_config=${CMAKE_CFG_INTDIR}
+                --param build_mode=${RUNTIME_BUILD_MODE}
+                ${LIT_ARGS}
                 ${CMAKE_CURRENT_BINARY_DIR}
-                DEPENDS
                 COMMENT "Running LLVM regression tests")
 
+  add_custom_target(check.deps)
+  add_dependencies(check check.deps)
+  add_dependencies(check.deps
+                UnitTests
+                BugpointPasses LLVMHello
+                llc lli llvm-ar llvm-as llvm-dis llvm-extract
+                llvm-ld llvm-link llvm-mc llvm-nm macho-dump opt
+                FileCheck count not)
+
 endif()
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index ee63656b26d3..3694aaad5549 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@@ -8,8 +8,9 @@
 define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
 entry:
 ; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK: push
 ; CHECK-NOT: sub sp
-; CHECK: vstmdb sp!
+; CHECK: push 
 	%predicted_block = alloca [4 x [4 x i32]], align 4		; <[4 x [4 x i32]]*> [#uses=1]
 	br label %cond_next489
 
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
deleted file mode 100644
index 5cfc68d09408..000000000000
--- a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
-target triple = "armv7-apple-darwin9"
-
-@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
-@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
-@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
-
-declare i32 @getUnknown(i32, ...) nounwind
-
-declare void @llvm.va_start(i8*) nounwind
-
-declare void @llvm.va_end(i8*) nounwind
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-define i32 @main() nounwind {
-entry:
-  %0 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
-  %1 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
-  %2 = tail call  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
-  %3 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
-  ret i32 0
-}
diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
deleted file mode 100644
index 06a152d56e4d..000000000000
--- a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-apple-darwin9"
-
-@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
-@nodes = internal global i64 0                    ; <i64*> [#uses=4]
-@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
-@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
-@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
-@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
-@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
-@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
-@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
-@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
-@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
-@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
-@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
-@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
-@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
-@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
-@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
-@he = internal global i8* null                    ; <i8**> [#uses=9]
-@hits = internal global i64 0                     ; <i64*> [#uses=8]
-@posed = internal global i64 0                    ; <i64*> [#uses=7]
-@ht = internal global i32* null                   ; <i32**> [#uses=5]
-@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
-@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
-@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
-
-declare i32 @puts(i8* nocapture) nounwind
-
-declare i32 @getchar() nounwind
-
-define internal i32 @transpose() nounwind readonly {
-; CHECK: push
-entry:
-  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
-  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
-  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
-  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
-  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
-  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
-  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
-  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
-  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
-  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
-  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
-  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
-  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
-  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
-  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
-  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
-  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
-  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
-  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
-  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
-  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
-  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
-  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
-  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
-  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
-  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
-  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
-  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
-  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
-  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
-  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
-  br label %bb5
-
-bb:                                               ; preds = %bb5
-  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
-  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
-  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
-  br i1 %24, label %bb1, label %bb2
-
-bb1:                                              ; preds = %bb
-  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
-  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
-  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
-  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
-  ret i32 %28
-
-bb2:                                              ; preds = %bb
-  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
-  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
-  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
-  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
-  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
-  br label %bb5
-
-bb5:                                              ; preds = %bb2, %entry
-  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
-  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
-  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
-  br i1 %34, label %bb7, label %bb
-
-bb7:                                              ; preds = %bb5
-  ret i32 -128
-}
-
-declare noalias i8* @calloc(i32, i32) nounwind
-
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
index 4aa879dc4092..0fe3b39a622d 100644
--- a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -5,7 +5,7 @@
 
 define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
 ; CHECK: foo:
-; CHECK: bl __adddf3
+; CHECK: bl __aeabi_dadd
 ; CHECK-NOT: strd
 ; CHECK: mov
   %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index 89c9037bd9f6..ca5ae8b62e8b 100644
--- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vdup.16
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
 
@@ -7,6 +7,7 @@ entry:
   br i1 undef, label %return, label %bb
 
 bb:                                               ; preds = %bb, %entry
+; CHECK: vld1.16 {d16[], d17[]}
   %0 = load i16* undef, align 2
   %1 = insertelement <8 x i16> undef, i16 %0, i32 2
   %2 = insertelement <8 x i16> %1, i16 undef, i32 3
diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
index 31525eff4461..d9e1a1486a3c 100644
--- a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
+++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=V4
 ; RUN: llc < %s -mtriple=armv5-unknown-eabi | FileCheck %s
 ; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s
 
@@ -7,6 +7,8 @@ entry:
   %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
   %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
 ; CHECK: ldmia	sp!, {r11, pc}
+; V4: pop
+; V4-NEXT: mov pc, lr
   ret i32 %1
 }
 
diff --git a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
index 8a24cfa39785..642268992062 100644
--- a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll b/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
deleted file mode 100644
index 2a4bbd1d8cc6..000000000000
--- a/test/CodeGen/ARM/2010-05-17-DAGCombineAssert.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
-; PR7158
-
-define arm_aapcs_vfpcc i32 @main() nounwind {
-bb.nph55.bb.nph55.split_crit_edge:
-  br label %bb3
-
-bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
-  br i1 undef, label %bb.i19, label %bb3
-
-bb.i19:                                           ; preds = %bb.i19, %bb3
-  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
-  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
-  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
-  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
-  br label %bb.i19
-}
diff --git a/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll b/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll
deleted file mode 100644
index ad2810b5bb9a..000000000000
--- a/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon
-
-define void @main() nounwind {
-entry:
-  store <2 x i64> undef, <2 x i64>* undef, align 16
-  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
-  %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
-  store <16 x i8> %1, <16 x i8>* undef, align 16
-  ret void
-}
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index ffc47ebdf196..b9d5600d2ad8 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -10,9 +10,9 @@ target triple = "thumbv7-apple-darwin10"
 ; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial
 ; redef, it cannot also get %Q0.
 
-; CHECK: vld1.64 {d0, d1}, [r{{.}}]
-; CHECK-NOT: vld1.64 {d0, d1}
-; CHECK: vmov.f64 d3, d0
+; CHECK: vld1.64 {d16, d17}, [r{{.}}]
+; CHECK-NOT: vld1.64 {d16, d17}
+; CHECK: vmov.f64 d19, d16
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll b/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
new file mode 100644
index 000000000000..d2820918626a
--- /dev/null
+++ b/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+declare noalias i8* @malloc(i32) nounwind
+
+define internal void @gl_DrawPixels(i32 %width, i32 %height, i32 %format, i32 %type, i8* %pixels) nounwind {
+entry:
+  br i1 undef, label %bb3.i, label %bb3
+
+bb3.i:                                            ; preds = %entry
+  unreachable
+
+gl_error.exit:                                    ; preds = %bb22
+  ret void
+
+bb3:                                              ; preds = %entry
+  br i1 false, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  br i1 undef, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb5
+  switch i32 %type, label %bb3.i6.i [
+    i32 5120, label %bb1.i13
+    i32 5121, label %bb1.i13
+    i32 6656, label %bb9.i.i6
+  ]
+
+bb9.i.i6:                                         ; preds = %bb19
+  br label %bb1.i13
+
+bb3.i6.i:                                         ; preds = %bb19
+  unreachable
+
+bb1.i13:                                          ; preds = %bb9.i.i6, %bb19, %bb19
+  br i1 undef, label %bb3.i17, label %bb2.i16
+
+bb2.i16:                                          ; preds = %bb1.i13
+  unreachable
+
+bb3.i17:                                          ; preds = %bb1.i13
+  br i1 undef, label %bb4.i18, label %bb23.i
+
+bb4.i18:                                          ; preds = %bb3.i17
+  %0 = mul nsw i32 %height, %width
+  %1 = and i32 %0, 7
+  %not..i = icmp ne i32 %1, 0
+  %2 = zext i1 %not..i to i32
+  %storemerge2.i = add i32 0, %2
+  %3 = call noalias i8* @malloc(i32 %storemerge2.i) nounwind
+  br i1 undef, label %bb3.i9, label %bb9.i
+
+bb9.i:                                            ; preds = %bb4.i18
+  br i1 undef, label %bb13.i19, label %bb.i24.i
+
+bb13.i19:                                         ; preds = %bb9.i
+  br i1 undef, label %bb14.i20, label %bb15.i
+
+bb14.i20:                                         ; preds = %bb13.i19
+  unreachable
+
+bb15.i:                                           ; preds = %bb13.i19
+  unreachable
+
+bb.i24.i:                                         ; preds = %bb.i24.i, %bb9.i
+  %storemerge1.i21.i = phi i32 [ %4, %bb.i24.i ], [ 0, %bb9.i ]
+  %4 = add i32 %storemerge1.i21.i, 1
+  %exitcond47.i = icmp eq i32 %4, %storemerge2.i
+  br i1 %exitcond47.i, label %bb22, label %bb.i24.i
+
+bb23.i:                                           ; preds = %bb3.i17
+  unreachable
+
+bb3.i9:                                           ; preds = %bb4.i18
+  unreachable
+
+bb22:                                             ; preds = %bb.i24.i, %bb5
+  br i1 undef, label %gl_error.exit, label %bb23
+
+bb23:                                             ; preds = %bb22
+  ret void
+}
diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
new file mode 100644
index 000000000000..bda14bcb1520
--- /dev/null
+++ b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; This tests that MC/asm header conversion is smooth
+;
+; CHECK:      .syntax unified
+; CHECK: .eabi_attribute 20, 1
+; CHECK: .eabi_attribute 21, 1
+; CHECK: .eabi_attribute 23, 3
+; CHECK: .eabi_attribute 24, 1
+; CHECK: .eabi_attribute 25, 1
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
new file mode 100644
index 000000000000..ee443febcc1e
--- /dev/null
+++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -0,0 +1,37 @@
+; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
+; RUN:    -mattr=-neon -mattr=+vfp2 \
+; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+
+
+; This tests that the extpected ARM attributes are emitted.
+;
+; BASIC:        .ARM.attributes
+; BASIC-NEXT:         0x70000003
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x0000003c
+; BASIC-NEXT:         0x00000020
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000001
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         '411f0000 00616561 62690001 15000000 06020801 09011401 15011703 18011901'
+
+; CORTEXA8:        .ARM.attributes
+; CORTEXA8-NEXT:         0x70000003
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x0000003c
+; CORTEXA8-NEXT:         0x0000002f
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000001
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+
+define i32 @f(i64 %z) {
+       ret i32 0
+}
diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
new file mode 100644
index 000000000000..163c9b030ec8
--- /dev/null
+++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s
+; Radar 8589805: Counting the number of microcoded operations, such as for an
+; LDM instruction, was causing an assertion failure because the microop count
+; was being treated as an instruction count.
+
+; CHECK: push
+; CHECK: ldmia
+; CHECK: ldmia
+; CHECK: ldmia
+
+define i32 @test(i32 %x) {
+entry:
+  %0 = tail call signext i16 undef(i32* undef)
+  switch i32 undef, label %bb3 [
+    i32 0, label %bb4
+    i32 1, label %bb1
+    i32 2, label %bb2
+  ]
+
+bb1:
+  ret i32 1
+
+bb2:
+  ret i32 2
+
+bb3:
+  ret i32 1
+
+bb4:
+  ret i32 3
+}
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
new file mode 100644
index 000000000000..04220949027f
--- /dev/null
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=inline
+; PR8612
+;
+; This test has an inline asm with early-clobber arguments.
+; It is big enough that one of the early clobber registers is spilled.
+;
+; All the spillers would get the live ranges wrong when spilling an early
+; clobber, allowing the undef register to be allocated to the same register as
+; the early clobber.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv7-eabi"
+
+%0 = type { i32, i32 }
+
+define void @foo(i32* %in) nounwind {
+entry:
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %entry
+  br i1 undef, label %bb10.preheader.i, label %bb.i
+
+bb10.preheader.i:                                 ; preds = %bb.i
+  br label %bb10.i
+
+bb10.i:                                           ; preds = %bb10.i, %bb10.preheader.i
+  br i1 undef, label %bb27.i, label %bb10.i
+
+bb27.i:                                           ; preds = %bb10.i
+  br label %bb28.i
+
+bb28.i:                                           ; preds = %bb28.i, %bb27.i
+  br i1 undef, label %presymmetry.exit, label %bb28.i
+
+presymmetry.exit:                                 ; preds = %bb28.i
+  %tmp175387 = or i32 undef, 12
+  %scevgep101.i = getelementptr i32* %in, i32 undef
+  %tmp189401 = or i32 undef, 7
+  %scevgep97.i = getelementptr i32* %in, i32 undef
+  %tmp198410 = or i32 undef, 1
+  %scevgep.i48 = getelementptr i32* %in, i32 undef
+  %0 = load i32* %scevgep.i48, align 4
+  %1 = add nsw i32 %0, 0
+  store i32 %1, i32* undef, align 4
+  %asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
+  %asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
+  %2 = shl i32 %asmresult1.i.i34.i.i.i, 1
+  %3 = load i32* null, align 4
+  %4 = load i32* undef, align 4
+  %5 = sub nsw i32 %3, %4
+  %6 = load i32* undef, align 4
+  %7 = load i32* null, align 4
+  %8 = sub nsw i32 %6, %7
+  %9 = load i32* %scevgep97.i, align 4
+  %10 = load i32* undef, align 4
+  %11 = sub nsw i32 %9, %10
+  %12 = load i32* null, align 4
+  %13 = load i32* %scevgep101.i, align 4
+  %14 = sub nsw i32 %12, %13
+  %15 = load i32* %scevgep.i48, align 4
+  %16 = load i32* null, align 4
+  %17 = add nsw i32 %16, %15
+  %18 = sub nsw i32 %15, %16
+  %19 = load i32* undef, align 4
+  %20 = add nsw i32 %19, %2
+  %21 = sub nsw i32 %19, %2
+  %22 = add nsw i32 %14, %5
+  %23 = sub nsw i32 %5, %14
+  %24 = add nsw i32 %11, %8
+  %25 = sub nsw i32 %8, %11
+  %26 = add nsw i32 %21, %23
+  store i32 %26, i32* %scevgep.i48, align 4
+  %27 = sub nsw i32 %25, %18
+  store i32 %27, i32* null, align 4
+  %28 = sub nsw i32 %23, %21
+  store i32 %28, i32* undef, align 4
+  %29 = add nsw i32 %18, %25
+  store i32 %29, i32* undef, align 4
+  %30 = add nsw i32 %17, %22
+  store i32 %30, i32* %scevgep101.i, align 4
+  %31 = add nsw i32 %20, %24
+  store i32 %31, i32* null, align 4
+  unreachable
+}
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
new file mode 100644
index 000000000000..8d7541feae94
--- /dev/null
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; rdar://8690640
+
+define i32* @t(i32* %x) nounwind {
+entry:
+; ARM: t:
+; ARM: push
+; ARM: mov r7, sp
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: ldmia sp!, {r7, pc}
+
+; THUMB2: t:
+; THUMB2: push
+; THUMB2: mov r7, sp
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: pop
+  %0 = tail call i32* @foo(i32* %x) nounwind
+  %1 = tail call i32* @foo(i32* %0) nounwind
+  %2 = tail call i32* @foo(i32* %1) nounwind
+  ret i32* %2
+}
+
+declare i32* @foo(i32*)
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
new file mode 100644
index 000000000000..930cd8d41563
--- /dev/null
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -0,0 +1,42 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+
+target triple = "armv7-none-linux-gnueabi"
+
+@a = external global i8
+
+define arm_aapcs_vfpcc i32 @barf() nounwind {
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %0
+; OBJ:         '.text'
+; OBJ-NEXT:    'sh_type'
+; OBJ-NEXT:    'sh_flags'
+; OBJ-NEXT:    'sh_addr'
+; OBJ-NEXT:    'sh_offset'
+; OBJ-NEXT:    'sh_size'
+; OBJ-NEXT:    'sh_link'
+; OBJ-NEXT:    'sh_info'
+; OBJ-NEXT:    'sh_addralign'
+; OBJ-NEXT:    'sh_entsize'
+; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
+
+; OBJ:            Relocation 0x00000000
+; OBJ-NEXT:       'r_offset', 0x00000004
+; OBJ-NEXT:       'r_sym', 0x00000007
+; OBJ-NEXT:        'r_type', 0x0000002b
+
+; OBJ:          Relocation 0x00000001
+; OBJ-NEXT:       'r_offset', 0x00000008
+; OBJ-NEXT:       'r_sym'
+; OBJ-NEXT:        'r_type', 0x0000002c
+
+; OBJ:          # Relocation 0x00000002
+; OBJ-NEXT:       'r_offset', 0x0000000c
+; OBJ-NEXT:       'r_sym', 0x00000008
+; OBJ-NEXT:       'r_type', 0x0000001c
+
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
new file mode 100644
index 000000000000..c65952be3c64
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; rdar://8728956
+
+define hidden void @foo() nounwind ssp {
+entry:
+; CHECK: foo:
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vpush {d10, d11}
+  %tmp40 = load <4 x i8>* undef
+  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
+  %conv42 = zext i8 %tmp41 to i32
+  %conv43 = sitofp i32 %conv42 to float
+  %div44 = fdiv float %conv43, 2.560000e+02
+  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
+  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit46, <4 x float>* undef
+  br i1 undef, label %if.then105, label %if.else109
+
+if.then105:                                       ; preds = %entry
+  br label %if.end114
+
+if.else109:                                       ; preds = %entry
+  br label %if.end114
+
+if.end114:                                        ; preds = %if.else109, %if.then105
+  %call185 = call float @bar()
+  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
+  %call189 = call float @bar()
+  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
+  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit191, <4 x float>* undef
+; CHECK: vpop {d10, d11}
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+  ret void
+}
+
+declare hidden float @bar() nounwind readnone ssp
diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
new file mode 100644
index 000000000000..b8ed8199d398
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -0,0 +1,52 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ELFASM %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+
+;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; in the obj case. 
+
+@i = external thread_local global i32
+@a = external global i8
+@b = external global [10 x i8]
+
+define arm_aapcs_vfpcc i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  switch i32 %0, label %bb2 [
+    i32 12, label %bb
+    i32 13, label %bb1
+  ]
+
+bb:                                               ; preds = %entry
+  %1 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %1
+; ELFASM:       	bl	__aeabi_read_tp
+
+
+; ELFOBJ:   '.text'
+; ELFOBJ-NEXT:  'sh_type'
+; ELFOBJ-NEXT:  'sh_flags'
+; ELFOBJ-NEXT:  'sh_addr'
+; ELFOBJ-NEXT:  'sh_offset'
+; ELFOBJ-NEXT:  'sh_size'
+; ELFOBJ-NEXT:  'sh_link'
+; ELFOBJ-NEXT:  'sh_info'
+; ELFOBJ-NEXT:  'sh_addralign'
+; ELFOBJ-NEXT:  'sh_entsize'
+;;;               BL __aeabi_read_tp is ---+
+;;;                                        V
+; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+
+
+bb1:                                              ; preds = %entry
+  %2 = tail call arm_aapcs_vfpcc  i32 @bar(i32* bitcast ([10 x i8]* @b to i32*)) nounwind
+  ret i32 %2
+
+bb2:                                              ; preds = %entry
+  ret i32 -1
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
+declare arm_aapcs_vfpcc i32 @bar(i32*)
diff --git a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
new file mode 100644
index 000000000000..d5aefbee197c
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
@@ -0,0 +1,100 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -relocation-model=pic -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=PIC01 %s
+
+;; FIXME: Reduce this test further, or even better,
+;; redo as .s -> .o test once ARM AsmParser is working better
+
+; ModuleID = 'large2.pnacl.bc'
+target triple = "armv7-none-linux-gnueabi"
+
+%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
+%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, %struct._reent*, i8*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i32, i32)*, i32 (%struct._reent*, i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._flock_t, %struct._mbstate_t, i32 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args* }
+%struct._flock_t = type { i32, i32, i32, i32, i32 }
+%struct._glue = type { %struct._glue*, i32, %struct.__FILE* }
+%struct._mbstate_t = type { i32, %union.anon }
+%struct._misc_reent = type { i8*, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, [8 x i8], i32, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t }
+%struct._mprec = type { %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint** }
+%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 }
+%struct._rand48 = type { [3 x i16], [3 x i16], i16, i64 }
+%struct._reent = type { %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, i32, i8*, i32, i32, i8*, %struct._mprec*, void (%struct._reent*)*, i32, i32, i8*, %struct._rand48*, %struct.__tm*, i8*, void (i32)**, %struct._atexit*, %struct._atexit, %struct._glue, %struct.__FILE*, %struct._misc_reent*, i8* }
+%union.anon = type { i32 }
+
+@buf = constant [2 x i8] c"x\00", align 4
+@_impure_ptr = external thread_local global %struct._reent*
+@.str = private constant [22 x i8] c"This should fault...\0A\00", align 4
+@.str1 = private constant [40 x i8] c"We're still running. This is not good.\0A\00", align 4
+
+define i32 @main() nounwind {
+entry:
+  %0 = load %struct._reent** @_impure_ptr, align 4
+  %1 = getelementptr inbounds %struct._reent* %0, i32 0, i32 1
+  %2 = load %struct.__FILE** %1, align 4
+  %3 = bitcast %struct.__FILE* %2 to i8*
+  %4 = tail call i32 @fwrite(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 1, i32 21, i8* %3) nounwind
+  %5 = load %struct._reent** @_impure_ptr, align 4
+  %6 = getelementptr inbounds %struct._reent* %5, i32 0, i32 1
+  %7 = load %struct.__FILE** %6, align 4
+  %8 = tail call i32 @fflush(%struct.__FILE* %7) nounwind
+  store i8 121, i8* getelementptr inbounds ([2 x i8]* @buf, i32 0, i32 0), align 4
+  %9 = load %struct._reent** @_impure_ptr, align 4
+  %10 = getelementptr inbounds %struct._reent* %9, i32 0, i32 1
+  %11 = load %struct.__FILE** %10, align 4
+  %12 = bitcast %struct.__FILE* %11 to i8*
+  %13 = tail call i32 @fwrite(i8* getelementptr inbounds ([40 x i8]* @.str1, i32 0, i32 0), i32 1, i32 39, i8* %12) nounwind
+  ret i32 1
+}
+
+
+; PIC01:             Relocation 0x00000000
+; PIC01-NEXT:        'r_offset', 0x0000001c
+; PIC01-NEXT:          'r_sym'
+; PIC01-NEXT:          'r_type', 0x0000001b
+
+
+; PIC01:             Relocation 0x00000001
+; PIC01-NEXT:      'r_offset', 0x00000038
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000002
+; PIC01-NEXT:      'r_offset', 0x00000044
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000003
+; PIC01-NEXT:      'r_offset', 0x00000070
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000004
+; PIC01-NEXT:      'r_offset', 0x0000007c
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000019
+
+
+; PIC01:              Relocation 0x00000005
+; PIC01-NEXT:      'r_offset', 0x00000080
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000018
+
+; PIC01:              Relocation 0x00000006
+; PIC01-NEXT:      'r_offset', 0x00000084
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000068
+
+; PIC01:              Relocation 0x00000007
+; PIC01-NEXT:      'r_offset', 0x00000088
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001a
+
+; PIC01:              Relocation 0x00000008
+; PIC01-NEXT:      'r_offset', 0x0000008c
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000018
+
+declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind
+
+declare i32 @fflush(%struct.__FILE* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
new file mode 100644
index 000000000000..eaa34e7960fb
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -0,0 +1,35 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ASM %s
+
+
+@dummy = internal global i32 666
+@array00 = internal global [20 x i32] zeroinitializer
+@sum = internal global i32 55
+@STRIDE = internal global i32 8
+
+; ASM:          .type   array00,%object         @ @array00
+; ASM-NEXT:     .lcomm  array00,80              @ @array00
+; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
+
+
+
+; OBJ:          Section 0x00000003
+; OBJ-NEXT:     '.bss'
+
+; OBJ:          'array00'
+; OBJ-NEXT:     'st_value', 0x00000000
+; OBJ-NEXT:     'st_size', 0x00000050
+; OBJ-NEXT:     'st_bind', 0x00000000
+; OBJ-NEXT:     'st_type', 0x00000001
+; OBJ-NEXT:     'st_other', 0x00000000
+; OBJ-NEXT:     'st_shndx', 0x00000003
+
+define i32 @main(i32 %argc) nounwind {
+  %1 = load i32* @sum, align 4
+  %2 = getelementptr  [20 x i32]* @array00, i32 0, i32 %argc
+  %3 = load i32* %2, align 4
+  %4 = add i32 %1, %3
+  ret i32 %4;
+}
diff --git a/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll b/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
new file mode 100644
index 000000000000..a2f50b587b22
--- /dev/null
+++ b/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+; <rdar://problem/8782198>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @func() nounwind optsize {
+entry:
+  %buf = alloca [8096 x i8], align 1
+  br label %bb
+
+bb:
+  %p.2 = getelementptr [8096 x i8]* %buf, i32 0, i32 0
+  store i8 undef, i8* %p.2, align 1
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
new file mode 100644
index 000000000000..99baad2d38d1
--- /dev/null
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i8 1
+@x2 = internal global i8 1
+@x3 = internal global i8 1
+@x4 = internal global i8 1
+@x5 = global i8 1
+
+; Check debug info output for merged global.
+; DW_AT_location
+; DW_OP_addr
+; DW_OP_plus
+; .long __MergedGlobals
+; DW_OP_constu
+; offset
+
+;CHECK:        .byte   7                       @ Abbrev [7] 0x1a5:0x13 DW_TAG_variable
+;CHECK-NEXT:        .ascii   "x2"                   @ DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        .long   93                      @ DW_AT_type
+;CHECK-NEXT:        .byte   1                       @ DW_AT_decl_file
+;CHECK-NEXT:        .byte   6                       @ DW_AT_decl_line
+;CHECK-NEXT:        .byte   8                       @ DW_AT_location
+;CHECK-NEXT:        .byte   3
+;CHECK-NEXT:        .long   __MergedGlobals
+;CHECK-NEXT:        .byte   16
+;CHECK-NEXT:        .byte   1
+;CHECK-NEXT:        .byte   34
+
+define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !10), !dbg !30
+  %0 = load i8* @x1, align 4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !11), !dbg !30
+  store i8 %a, i8* @x1, align 4, !dbg !30
+  ret i8 %0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !18), !dbg !32
+  %0 = load i8* @x2, align 4, !dbg !32
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !19), !dbg !32
+  store i8 %a, i8* @x2, align 4, !dbg !32
+  ret i8 %0, !dbg !33
+}
+
+define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !21), !dbg !34
+  %0 = load i8* @x3, align 4, !dbg !34
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !22), !dbg !34
+  store i8 %a, i8* @x3, align 4, !dbg !34
+  ret i8 %0, !dbg !35
+}
+
+define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !24), !dbg !36
+  %0 = load i8* @x4, align 4, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !25), !dbg !36
+  store i8 %a, i8* @x4, align 4, !dbg !36
+  ret i8 %0, !dbg !37
+}
+
+define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !27), !dbg !38
+  %0 = load i8* @x5, align 4, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !28), !dbg !38
+  store i8 %a, i8* @x5, align 4, !dbg !38
+  ret i8 %0, !dbg !39
+}
+
+!llvm.dbg.sp = !{!0, !6, !7, !8, !9}
+!llvm.dbg.lv.get1 = !{!10, !11}
+!llvm.dbg.gv = !{!13, !14, !15, !16, !17}
+!llvm.dbg.lv.get2 = !{!18, !19}
+!llvm.dbg.lv.get3 = !{!21, !22}
+!llvm.dbg.lv.get4 = !{!24, !25}
+!llvm.dbg.lv.get5 = !{!27, !28}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5}
+!5 = metadata !{i32 589860, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590080, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 589835, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 590080, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 589835, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 590080, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 589835, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 4, i32 0, metadata !0, null}
+!31 = metadata !{i32 4, i32 0, metadata !12, null}
+!32 = metadata !{i32 7, i32 0, metadata !6, null}
+!33 = metadata !{i32 7, i32 0, metadata !20, null}
+!34 = metadata !{i32 10, i32 0, metadata !7, null}
+!35 = metadata !{i32 10, i32 0, metadata !23, null}
+!36 = metadata !{i32 13, i32 0, metadata !8, null}
+!37 = metadata !{i32 13, i32 0, metadata !26, null}
+!38 = metadata !{i32 16, i32 0, metadata !9, null}
+!39 = metadata !{i32 16, i32 0, metadata !29, null}
diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
new file mode 100644
index 000000000000..85a113755bf4
--- /dev/null
+++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic  -mcpu=arm1136jf-s | FileCheck %s
+; rdar://8959122 illegal register operands for UMULL instruction
+;   in cfrac nightly test.
+; Armv6 generates a umull that must write to two distinct destination regs.
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @ptoa() nounwind {
+entry:
+  br i1 false, label %bb3, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb, %entry
+  %0 = call noalias i8* @malloc() nounwind
+  br i1 undef, label %bb46, label %bb8
+
+bb8:                                              ; preds = %bb3
+  %1 = getelementptr inbounds i8* %0, i32 0
+  store i8 0, i8* %1, align 1
+  %2 = call i32 @ptou() nounwind
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %3 = udiv i32 %2, 10
+  %4 = urem i32 %3, 10
+  %5 = icmp ult i32 %4, 10
+  %6 = trunc i32 %4 to i8
+  %7 = or i8 %6, 48
+  %8 = add i8 %6, 87
+  %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
+  store i8 %iftmp.5.0.1, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %9 = udiv i32 %2, 100
+  %10 = urem i32 %9, 10
+  %11 = icmp ult i32 %10, 10
+  %12 = trunc i32 %10 to i8
+  %13 = or i8 %12, 48
+  %14 = add i8 %12, 87
+  %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
+  store i8 %iftmp.5.0.2, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %15 = udiv i32 %2, 10000
+  %16 = urem i32 %15, 10
+  %17 = icmp ult i32 %16, 10
+  %18 = trunc i32 %16 to i8
+  %19 = or i8 %18, 48
+  %20 = add i8 %18, 87
+  %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
+  store i8 %iftmp.5.0.4, i8* null, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %21 = udiv i32 %2, 100000
+  %22 = urem i32 %21, 10
+  %23 = icmp ult i32 %22, 10
+  %iftmp.5.0.5 = select i1 %23, i8 0, i8 undef
+  store i8 %iftmp.5.0.5, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %24 = udiv i32 %2, 1000000
+  %25 = urem i32 %24, 10
+  %26 = icmp ult i32 %25, 10
+  %27 = trunc i32 %25 to i8
+  %28 = or i8 %27, 48
+  %29 = add i8 %27, 87
+  %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
+  store i8 %iftmp.5.0.6, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %30 = udiv i32 %2, 10000000
+  %31 = urem i32 %30, 10
+  %32 = icmp ult i32 %31, 10
+  %33 = trunc i32 %31 to i8
+  %34 = or i8 %33, 48
+  %35 = add i8 %33, 87
+  %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
+  store i8 %iftmp.5.0.7, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %36 = udiv i32 %2, 100000000
+  %37 = urem i32 %36, 10
+  %38 = icmp ult i32 %37, 10
+  %39 = trunc i32 %37 to i8
+  %40 = or i8 %39, 48
+  %41 = add i8 %39, 87
+  %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
+  store i8 %iftmp.5.0.8, i8* null, align 1
+  unreachable
+
+bb46:                                             ; preds = %bb3
+  ret void
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()
diff --git a/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
new file mode 100644
index 000000000000..f3d788818afc
--- /dev/null
+++ b/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -asm-verbose=false -O3  -mtriple=armv5e-none-linux-gnueabi | FileCheck %s
+; PR8986: PostRA antidependence breaker must respect "earlyclobber".
+; armv5e generates mulv5 that cannot used the same reg for src/dest.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv5e-none-linux-gnueabi"
+
+define hidden fastcc void @storeAtts() nounwind {
+entry:
+  %.SV116 = alloca i8**
+  br i1 undef, label %meshBB520, label %meshBB464
+
+bb15:                                             ; preds = %meshBB424
+  br i1 undef, label %bb216, label %meshBB396
+
+bb22:                                             ; preds = %meshBB396
+  br label %cBB564
+
+cBB564:                                           ; preds = %cBB564, %bb22
+  br label %cBB564
+
+poolStoreString.exit.thread:                      ; preds = %meshBB424
+  ret void
+
+bb78:                                             ; preds = %meshBB412
+  unreachable
+
+bb129:                                            ; preds = %meshBB540
+  br i1 undef, label %bb131.loopexit, label %meshBB540
+
+bb131.loopexit:                                   ; preds = %bb129
+  br label %bb131
+
+bb131:                                            ; preds = %bb135, %bb131.loopexit
+  br i1 undef, label %bb134, label %meshBB396
+
+bb134:                                            ; preds = %bb131
+  unreachable
+
+bb135:                                            ; preds = %meshBB396
+  %uriHash.1.phi.load = load i32* undef
+  %.load120 = load i8*** %.SV116
+  %.phi24 = load i8* null
+  %.phi26 = load i8** null
+  store i8 %.phi24, i8* %.phi26, align 1
+  %0 = getelementptr inbounds i8* %.phi26, i32 1
+  store i8* %0, i8** %.load120, align 4
+  ; CHECK: mul [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{(lr|r[0-9]+)$}}
+  %1 = mul i32 %uriHash.1.phi.load, 1000003
+  %2 = xor i32 0, %1
+  store i32 %2, i32* null
+  %3 = load i8* null, align 1
+  %4 = icmp eq i8 %3, 0
+  store i8* %0, i8** undef
+  br i1 %4, label %meshBB472, label %bb131
+
+bb212:                                            ; preds = %meshBB540
+  unreachable
+
+bb216:                                            ; preds = %bb15
+  ret void
+
+meshBB396:                                        ; preds = %bb131, %bb15
+  br i1 undef, label %bb135, label %bb22
+
+meshBB412:                                        ; preds = %meshBB464
+  br i1 undef, label %meshBB504, label %bb78
+
+meshBB424:                                        ; preds = %meshBB464
+  br i1 undef, label %poolStoreString.exit.thread, label %bb15
+
+meshBB464:                                        ; preds = %entry
+  br i1 undef, label %meshBB424, label %meshBB412
+
+meshBB472:                                        ; preds = %meshBB504, %bb135
+  unreachable
+
+meshBB504:                                        ; preds = %meshBB412
+  br label %meshBB472
+
+meshBB520:                                        ; preds = %entry
+  br label %meshBB540
+
+meshBB540:                                        ; preds = %meshBB520, %bb129
+  br i1 undef, label %bb212, label %bb129
+}
diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll
index d4d01288f29b..d57c159b85cb 100644
--- a/test/CodeGen/ARM/align.ll
+++ b/test/CodeGen/ARM/align.ll
@@ -22,7 +22,7 @@
 @e = global i64 4
 ;ELF: .align 3
 ;ELF: e
-;DARWIN: .align 2
+;DARWIN: .align 3
 ;DARWIN: _e:
 
 @f = global float 5.0
@@ -34,7 +34,7 @@
 @g = global double 6.0
 ;ELF: .align 3
 ;ELF: g:
-;DARWIN: .align 2
+;DARWIN: .align 3
 ;DARWIN: _g:
 
 @bar = common global [75 x i8] zeroinitializer, align 128
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index bb7853e66ef4..c7fcb9755d9e 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -13,8 +13,8 @@ define i32 @f1(i32 %a, i64 %b) {
 ; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
 define i32 @f2() nounwind optsize {
 ; ELF: f2:
-; ELF: mov  r0, #128
-; ELF: str  r0, [sp]
+; ELF: mov  [[REGISTER:(r[0-9]+)]], #128
+; ELF: str  [[REGISTER]], [sp]
 ; DARWIN: f2:
 ; DARWIN: mov	r3, #128
 entry:
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
new file mode 100644
index 000000000000..50c638b73931
--- /dev/null
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
+; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+
+; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
+
+%struct.Foo = type { i8* }
+
+; ARM:   foo
+; THUMB: foo
+; T2:    foo
+define %struct.Foo* @foo(%struct.Foo* %this, i32 %acc) nounwind readonly align 2 {
+entry:
+  %scevgep = getelementptr %struct.Foo* %this, i32 1
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %sw.bb, %entry
+  %lsr.iv2 = phi %struct.Foo* [ %scevgep3, %sw.bb ], [ %scevgep, %entry ]
+  %lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ]
+  %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
+  %lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
+  %scevgep5 = getelementptr i8** %lsr.iv24, i32 -1
+  %tmp2 = load i8** %scevgep5
+  %0 = ptrtoint i8* %tmp2 to i32
+
+; ARM:      ands r12, r12, #3
+; ARM-NEXT: beq
+
+; THUMB:      movs r5, #3
+; THUMB-NEXT: ands r5, r4
+; THUMB-NEXT: cmp r5, #0
+; THUMB-NEXT: beq
+
+; T2:      ands r12, r12, #3
+; T2-NEXT: beq
+
+  %and = and i32 %0, 3
+  %tst = icmp eq i32 %and, 0
+  br i1 %tst, label %sw.bb, label %tailrecurse.switch
+
+tailrecurse.switch:                               ; preds = %tailrecurse
+  switch i32 %and, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 3, label %sw.bb6
+    i32 2, label %sw.bb8
+  ]
+
+sw.bb:                                            ; preds = %tailrecurse.switch, %tailrecurse
+  %shl = shl i32 %acc.tr, 1
+  %or = or i32 %and, %shl
+  %lsr.iv.next = add i32 %lsr.iv, 1
+  %scevgep3 = getelementptr %struct.Foo* %lsr.iv2, i32 1
+  br label %tailrecurse
+
+sw.bb6:                                           ; preds = %tailrecurse.switch
+  ret %struct.Foo* %lsr.iv2
+
+sw.bb8:                                           ; preds = %tailrecurse.switch
+  %tmp1 = add i32 %acc.tr, %lsr.iv
+  %add.ptr11 = getelementptr inbounds %struct.Foo* %this, i32 %tmp1
+  ret %struct.Foo* %add.ptr11
+
+sw.epilog:                                        ; preds = %tailrecurse.switch
+  ret %struct.Foo* undef
+}
+
+; Another test that exercises the AND/TST peephole optimization and also
+; generates a predicated ANDS instruction. Check that the predicate is printed
+; after the "S" modifier on the instruction.
+
+%struct.S = type { i8* (i8*)*, [1 x i8] }
+
+; ARM: bar
+; THUMB: bar
+; T2: bar
+define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
+entry:
+  %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
+  %1 = load i8* %0, align 1
+  %2 = zext i8 %1 to i32
+; ARM: ands
+; THUMB: ands
+; T2: ands
+  %3 = and i32 %2, 112
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i32
+; ARM: andsne
+; THUMB: ands
+; T2: andsne
+  %8 = and i32 %7, 112
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  %10 = icmp eq i32 %3, 16
+  %11 = icmp eq i32 %8, 16
+  %or.cond = or i1 %10, %11
+  br i1 %or.cond, label %bb4, label %return
+
+bb4:                                              ; preds = %bb2
+  %12 = ptrtoint %struct.S* %x to i32
+  %phitmp = trunc i32 %12 to i8
+  ret i8 %phitmp
+
+return:                                           ; preds = %bb2, %bb, %entry
+  ret i8 1
+}
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
new file mode 100644
index 000000000000..f31aa7bc58e3
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=T2
+; rdar://8964854
+
+define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
+; ARM: t:
+; ARM: ldrexb
+; ARM: strexb
+
+; T2: t:
+; T2: ldrexb
+; T2: strexb
+  %tmp0 = tail call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %a, i8 %b, i8 %c)
+  ret i8 %tmp0
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 59e2b43a9172..946db1909fe5 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -16,10 +16,10 @@ entry:
   ret void
 }
 
-define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+define i32 @f2(i32 %A, i32 %B) nounwind {
 entry:
 ; CHECK: f2
-; CHECK: mov r1, r1, lsr #7
+; CHECK: lsr{{.*}}#7
 ; CHECK: bfi r0, r1, #7, #16
   %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
   %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
@@ -27,10 +27,10 @@ entry:
   ret i32 %or
 }
 
-define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+define i32 @f3(i32 %A, i32 %B) nounwind {
 entry:
 ; CHECK: f3
-; CHECK: mov r2, r0, lsr #7
+; CHECK: lsr{{.*}} #7
 ; CHECK: mov r0, r1
 ; CHECK: bfi r0, r2, #7, #16
   %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
@@ -38,3 +38,27 @@ entry:
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
   ret i32 %or
 }
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw r1, #3137
+; CHECK: bfi r1, r0, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
+
+; rdar://8458663
+define i32 @f5(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: f5:
+; CHECK-NOT: bfc
+; CHECK: bfi r0, r1, #20, #4
+  %0 = and i32 %a, -15728641
+  %1 = shl i32 %b, 20
+  %2 = and i32 %1, 15728640
+  %3 = or i32 %2, %0
+  ret i32 %3
+}
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
index 9e94efe3f9db..ce1b2ad5fad3 100644
--- a/test/CodeGen/ARM/bits.ll
+++ b/test/CodeGen/ARM/bits.ll
@@ -1,36 +1,41 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep and      %t | count 1
-; RUN: grep orr      %t | count 1
-; RUN: grep eor      %t | count 1
-; RUN: grep mov.*lsl %t | count 1
-; RUN: grep mov.*asr %t | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
+; CHECK: f1
+; CHECK: and r0, r1, r0
 	%tmp2 = and i32 %b, %a		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @f2(i32 %a, i32 %b) {
 entry:
+; CHECK: f2
+; CHECK: orr r0, r1, r0
 	%tmp2 = or i32 %b, %a		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @f3(i32 %a, i32 %b) {
 entry:
+; CHECK: f3
+; CHECK: eor r0, r1, r0
 	%tmp2 = xor i32 %b, %a		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
 
 define i32 @f4(i32 %a, i32 %b) {
 entry:
+; CHECK: f4
+; CHECK: lsl
 	%tmp3 = shl i32 %a, %b		; <i32> [#uses=1]
 	ret i32 %tmp3
 }
 
 define i32 @f5(i32 %a, i32 %b) {
 entry:
+; CHECK: f5
+; CHECK: asr
 	%tmp3 = ashr i32 %a, %b		; <i32> [#uses=1]
 	ret i32 %tmp3
 }
diff --git a/test/CodeGen/ARM/bswap-inline-asm.ll b/test/CodeGen/ARM/bswap-inline-asm.ll
new file mode 100644
index 000000000000..472213d5f85f
--- /dev/null
+++ b/test/CodeGen/ARM/bswap-inline-asm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind {
+; CHECK: t1:
+; CHECK-NOT: InlineAsm
+; CHECK: rev
+  %asmtmp = tail call i32 asm "rev $0, $1\0A", "=l,l"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 0e3e070a818f..09f1aae0a9f0 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm | not grep bx
+; RUN: llc < %s -mtriple=armv5t-apple-darwin | FileCheck %s
 
 define void @test(i32 %Ptr, i8* %L) {
 entry:
@@ -24,6 +23,8 @@ bb1:		; preds = %bb, %entry
 	br i1 %bothcond, label %bb, label %bb18
 
 bb18:		; preds = %bb1
+; CHECK-NOT: bx
+; CHECK: ldmia sp!
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index db5afe3f56cb..a77aba037be5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,8 +1,6 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -march=arm | FileCheck %s -check-prefix=CHECKV4
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
-; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
-; XFAIL: *
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
@@ -10,40 +8,80 @@ declare void @g(i32, i32, i32, i32)
 
 define void @t1() {
 ; CHECKELF: t1:
-; CHECKELF: PLT
+; CHECKELF: bl g(PLT)
         call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 
 define void @t2() {
-; CHECKV4: t2:
-; CHECKV4: bx r0 @ TAILCALL
-; CHECKV5: t2:
-; CHECKV5: bx r0 @ TAILCALL
+; CHECKV6: t2:
+; CHECKV6: bx r0 @ TAILCALL
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
 
-define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind {
-; CHECKV4: t3:
-; CHECKV4: bx r{{.*}}
-BB0:
-  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
-  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
-  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
-  %8 = load i32** %7                              ; <i32*> [#uses=1]
-  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
-  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
-  ret i32* %10
-}
-
-define void @t4() {
-; CHECKV4: t4:
-; CHECKV4: b _t2  @ TAILCALL
-; CHECKV5: t4:
-; CHECKV5: b _t2  @ TAILCALL
+define void @t3() {
+; CHECKV6: t3:
+; CHECKV6: b _t2  @ TAILCALL
+; CHECKELF: t3:
+; CHECKELF: b t2(PLT) @ TAILCALL
         tail call void @t2( )            ; <i32> [#uses=0]
         ret void
 }
+
+; Sibcall optimization of expanded libcalls. rdar://8707777
+define double @t4(double %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t4:
+; CHECKV6: b _sin @ TAILCALL
+; CHECKELF: t4:
+; CHECKELF: b sin(PLT) @ TAILCALL
+  %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
+  ret double %0
+}
+
+define float @t5(float %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t5:
+; CHECKV6: b _sinf @ TAILCALL
+; CHECKELF: t5:
+; CHECKELF: b sinf(PLT) @ TAILCALL
+  %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECKV6: t6:
+; CHECKV6: b ___divsi3 @ TAILCALL
+; CHECKELF: t6:
+; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
+  %0 = sdiv i32 %a, %b
+  ret i32 %0
+}
+
+; Make sure the tail call instruction isn't deleted
+; rdar://8309338
+declare void @foo() nounwind
+
+define void @t7() nounwind {
+entry:
+; CHECKT2: t7:
+; CHECKT2: blxeq _foo
+; CHECKT2-NEXT: pop.w
+; CHECKT2-NEXT: b.w _foo
+  br i1 undef, label %bb, label %bb1.lr.ph
+
+bb1.lr.ph:
+  tail call void @foo() nounwind
+  unreachable
+
+bb:
+  tail call void @foo() nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index d2235c9221ce..e381e0029819 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+v5t | grep clz
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
 declare i32 @llvm.ctlz.i32(i32)
 
 define i32 @test(i32 %x) {
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )              ; <i32> [#uses=1]
+; CHECK: test
+; CHECK: clz r0, r0
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 25c556889fc4..845be8c20ea5 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin -cgp-critical-edge-splitting=0 | FileCheck %s
 ; PHI elimination shouldn't break backedge.
 ; rdar://8263994
 
 %struct.list_data_s = type { i16, i16 }
 %struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
 
-define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind {
+define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
 entry:
+; CHECK: t1:
   %0 = icmp eq %struct.list_head* %list, null
   br i1 %0, label %bb2, label %bb
 
@@ -27,3 +28,52 @@ bb2:
   %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
   ret %struct.list_head* %next.0.lcssa
 }
+
+; Optimize loop entry, eliminate intra loop branches
+; rdar://8117827
+define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
+entry:
+; CHECK: t2:
+; CHECK: beq LBB1_[[RET:.]]
+  %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb.nph15
+
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
+  %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
+  %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb3, label %bb1
+
+bb3:                                              ; preds = %bb1, %bb2.preheader
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: bne LBB1_[[PREHDR]]
+; CHECK-NOT: b LBB1_
+  %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
+  %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
+  %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
+  br i1 %exitcond18, label %bb5, label %bb2.preheader
+
+bb.nph15:                                         ; preds = %entry
+  %i.07 = add i32 %size, -1                       ; <i32> [#uses=2]
+  %4 = icmp sgt i32 %i.07, -1                     ; <i1> [#uses=1]
+  br label %bb2.preheader
+
+bb2.preheader:                                    ; preds = %bb3, %bb.nph15
+  %pass.011 = phi i32 [ 0, %bb.nph15 ], [ %3, %bb3 ] ; <i32> [#uses=1]
+  %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  br i1 %4, label %bb1, label %bb3
+
+; CHECK: LBB1_[[RET]]: @ %bb5
+; CHECK: ldmia sp!
+bb5:                                              ; preds = %bb3, %entry
+  %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %sum.1.lcssa
+}
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index ce919361619a..542cf02f2a90 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -14,34 +14,33 @@ define i32 @f2() {
 
 define i32 @f3() {
 ; CHECK: f3
-; CHECK: mov r0{{.*}}256
+; CHECK: mov r0, #1, 24
         ret i32 256
 }
 
 define i32 @f4() {
 ; CHECK: f4
-; CHECK: orr{{.*}}256
+; CHECK: orr{{.*}}#1, 24
         ret i32 257
 }
 
 define i32 @f5() {
 ; CHECK: f5
-; CHECK: mov r0, {{.*}}-1073741761
+; CHECK: mov r0, #255, 2
         ret i32 -1073741761
 }
 
 define i32 @f6() {
 ; CHECK: f6
-; CHECK: mov r0, {{.*}}1008
+; CHECK: mov r0, #63, 28
         ret i32 1008
 }
 
 define void @f7(i32 %a) {
 ; CHECK: f7
 ; CHECK: cmp r0, #1, 16
-        %b = icmp ugt i32 %a, 65536             ; <i1> [#uses=1]
+        %b = icmp ugt i32 %a, 65536
         br i1 %b, label %r, label %r
-
-r:              ; preds = %0, %0
+r:
         ret void
 }
diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll
new file mode 100644
index 000000000000..4b6876df4a03
--- /dev/null
+++ b/test/CodeGen/ARM/crash.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+; <rdar://problem/8529919>
+%struct.foo = type { i32, i32 }
+
+define void @func() nounwind {
+entry:
+  %tmp = load i32* undef, align 4
+  br label %bb1
+
+bb1:
+  %tmp1 = and i32 %tmp, 16
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %invok.1.i = select i1 %tmp2, i32 undef, i32 0
+  %tmp119 = add i32 %invok.1.i, 0
+  br i1 undef, label %bb2, label %exit
+
+bb2:
+  %tmp120 = add i32 %tmp119, 0
+  %scevgep810.i = getelementptr %struct.foo* null, i32 %tmp120, i32 1
+  store i32 undef, i32* %scevgep810.i, align 4
+  br i1 undef, label %bb2, label %bb3
+
+bb3:
+  br i1 %tmp2, label %bb2, label %bb2
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 448b437ddf46..3d29e05a0ccf 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index dfc1e0a957c3..f03282bdab7f 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -24,4 +24,4 @@ declare float @fabsf(float)
 ; CORTEXA8: test:
 ; CORTEXA8: 	vabs.f32	d1, d1
 ; CORTEXA9: test:
-; CORTEXA9: 	vabs.f32	s0, s0
+; CORTEXA9: 	vabs.f32	s1, s1
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 113f0e29bd15..749690e98d0f 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vadd.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s0, s0, s1
+; CORTEXA9: 	vadd.f32	s0, s1, s0
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
new file mode 100644
index 000000000000..370c70f174fd
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+
+%union.anon = type { <16 x i32> }
+
+@__md0 = external global [137 x i8]
+
+define internal void @stretch(<4 x i8> addrspace(1)* %src, <4 x i8> addrspace(1)* %dst, i32 %width, i32 %height, i32 %iLS, i32 %oLS, <2 x float> %c, <4 x float> %param) nounwind {
+entry:
+  ret void
+}
+
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+entry:
+  ret i32 undef
+}
+
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+entry:
+  call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
new file mode 100644
index 000000000000..8f58480be164
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+
+define void @myadd(float* %sum, float* %addend) nounwind {
+entry:
+  %sum.addr = alloca float*, align 4
+  %addend.addr = alloca float*, align 4
+  store float* %sum, float** %sum.addr, align 4
+  store float* %addend, float** %addend.addr, align 4
+  %tmp = load float** %sum.addr, align 4
+  %tmp1 = load float* %tmp
+  %tmp2 = load float** %addend.addr, align 4
+  %tmp3 = load float* %tmp2
+  %add = fadd float %tmp1, %tmp3
+  %tmp4 = load float** %sum.addr, align 4
+  store float %add, float* %tmp4
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+  %ztot = alloca float, align 4
+  %z = alloca float, align 4
+  store float 0.000000e+00, float* %ztot, align 4
+  store float 1.000000e+00, float* %z, align 4
+; CHECK-LONG: blx     r2
+; CHECK-NORM: blx     _myadd
+  call void @myadd(float* %ztot, float* %z)
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 3bee84d84de4..dd806ec6f1ae 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=armv7-apple-darwin
-; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin
 
 ; Very basic fast-isel functionality.
 
-define i32 @add(i32 %a, i32 %b) nounwind ssp {
+define i32 @add(i32 %a, i32 %b) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %b.addr = alloca i32, align 4
@@ -13,27 +13,4 @@ entry:
   %tmp1 = load i32* %b.addr
   %add = add nsw i32 %tmp, %tmp1
   ret i32 %add
-}
-
-define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
-entry:
-  %r = load i32* %p
-  %s = load i32* %q
-  %y = load i32** %z
-  br label %fast
-
-fast:
-  %t0 = add i32 %r, %s
-  %t1 = mul i32 %t0, %s
-  %t2 = sub i32 %t1, %s
-  %t3 = and i32 %t2, %s
-  %t4 = xor i32 %t3, 3
-  %t5 = xor i32 %t4, %s
-  %t6 = add i32 %t5, 2
-  %t7 = getelementptr i32* %y, i32 1
-  %t8 = getelementptr i32* %t7, i32 %t6
-  br label %exit
-
-exit:
-  ret i32* %t8
-}
+}
\ No newline at end of file
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index a6d741087a89..1050cd265998 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,18 +1,45 @@
-; RUN: llc < %s -march=arm | grep bic | count 2
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep vneg | count 2
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
-define float @test1(float %x, double %y) {
-	%tmp = fpext float %x to double
-	%tmp2 = tail call double @copysign( double %tmp, double %y )
-	%tmp3 = fptrunc double %tmp2 to float
-	ret float %tmp3
+; rdar://8984306
+define float @test1(float %x, float %y) nounwind {
+entry:
+; SOFT: test1:
+; SOFT: lsr r1, r1, #31
+; SOFT: bfi r0, r1, #31, #1
+
+; HARD: test1:
+; HARD: vabs.f32 d0, d0
+; HARD: cmp r0, #0
+; HARD: vneglt.f32 s0, s0
+  %0 = tail call float @copysignf(float %x, float %y) nounwind
+  ret float %0
+}
+
+define double @test2(double %x, double %y) nounwind {
+entry:
+; SOFT: test2:
+; SOFT: lsr r2, r3, #31
+; SOFT: bfi r1, r2, #31, #1
+
+; HARD: test2:
+; HARD: vabs.f64 d0, d0
+; HARD: cmp r1, #0
+; HARD: vneglt.f64 d0, d0
+  %0 = tail call double @copysign(double %x, double %y) nounwind
+  ret double %0
 }
 
-define double @test2(double %x, float %y) {
-	%tmp = fpext float %y to double
-	%tmp2 = tail call double @copysign( double %x, double %tmp )
-	ret double %tmp2
+define double @test3(double %x, double %y, double %z) nounwind {
+entry:
+; SOFT: test3:
+; SOFT: vabs.f64
+; SOFT: cmp {{.*}}, #0
+; SOFT: vneglt.f64
+  %0 = fmul double %x, %y
+  %1 = tail call double @copysign(double %0, double %z) nounwind
+  ret double %1
 }
 
-declare double @copysign(double, double)
+declare double @copysign(double, double) nounwind
+declare float @copysignf(float, float) nounwind
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 9af1217de1d0..0c3149579297 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vdiv.f32	s0, s1, s0
 ; CORTEXA9: test:
-; CORTEXA9: 	vdiv.f32	s0, s0, s1
+; CORTEXA9: 	vdiv.f32	s0, s1, s0
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index c4ceca9828b0..fb83ef626af6 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,24 +1,51 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test(float %acc, float %a, float %b) {
+define float @t1(float %acc, float %a, float %b) {
 entry:
+; VFP2: t1:
+; VFP2: vmla.f32
+
+; NEON: t1:
+; NEON: vmla.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vadd.f32
 	%0 = fmul float %a, %b
         %1 = fadd float %acc, %0
 	ret float %1
 }
 
-; VFP2: test:
-; VFP2: 	vmla.f32	s2, s1, s0
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmla.f64
+
+; NEON: t2:
+; NEON: vmla.f64
 
-; NFP1: test:
-; NFP1: 	vmul.f32	d0, d1, d0
-; NFP0: test:
-; NFP0: 	vmla.f32	s2, s1, s0
+; A8: t2:
+; A8: vmul.f64
+; A8: vadd.f64
+	%0 = fmul double %a, %b
+        %1 = fadd double %acc, %0
+	ret double %1
+}
 
-; CORTEXA8: test:
-; CORTEXA8: 	vmul.f32	d0, d1, d0
-; CORTEXA9: test:
-; CORTEXA9: 	vmla.f32	s0, s1, s2
+define float @t3(float %acc, float %a, float %b) {
+entry:
+; VFP2: t3:
+; VFP2: vmla.f32
+
+; NEON: t3:
+; NEON: vmla.f32
+
+; A8: t3:
+; A8: vmul.f32
+; A8: vadd.f32
+	%0 = fmul float %a, %b
+        %1 = fadd float %0, %acc
+	ret float %1
+}
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index 103ce334519b..a182833a7a2c 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -1,24 +1,35 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test(float %acc, float %a, float %b) {
+define float @t1(float %acc, float %a, float %b) {
 entry:
+; VFP2: t1:
+; VFP2: vnmls.f32
+
+; NEON: t1:
+; NEON: vnmls.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
 	%0 = fmul float %a, %b
         %1 = fsub float %0, %acc
 	ret float %1
 }
 
-; VFP2: test:
-; VFP2: 	vnmls.f32	s2, s1, s0
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vnmls.f64
 
-; NFP1: test:
-; NFP1: 	vnmls.f32	s2, s1, s0
-; NFP0: test:
-; NFP0: 	vnmls.f32	s2, s1, s0
+; NEON: t2:
+; NEON: vnmls.f64
 
-; CORTEXA8: test:
-; CORTEXA8: 	vnmls.f32	s2, s1, s0
-; CORTEXA9: test:
-; CORTEXA9: 	vnmls.f32	s0, s1, s2
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %0, %acc
+	ret double %1
+}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index bfafd20c8602..ef4e3e52818e 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -20,4 +20,4 @@ entry:
 ; CORTEXA8: test:
 ; CORTEXA8: 	vmul.f32	d0, d1, d0
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s0, s0, s1
+; CORTEXA9: 	vmul.f32	s0, s1, s0
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index c15005e6e8ab..418b59803d30 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -13,19 +13,19 @@ entry:
 	ret float %retval
 }
 ; VFP2: test1:
-; VFP2: 	vneg.f32	s1, s0
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; NFP1: test1:
-; NFP1: 	vneg.f32	d1, d0
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; NFP0: test1:
-; NFP0: 	vneg.f32	s1, s0
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test1:
-; CORTEXA8: 	vneg.f32	d1, d0
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test1:
-; CORTEXA9: 	vneg.f32	s1, s0
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
 define float @test2(float* %a) {
 entry:
@@ -37,17 +37,17 @@ entry:
 	ret float %retval
 }
 ; VFP2: test2:
-; VFP2: 	vneg.f32	s1, s0
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; NFP1: test2:
-; NFP1: 	vneg.f32	d1, d0
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; NFP0: test2:
-; NFP0: 	vneg.f32	s1, s0
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test2:
-; CORTEXA8: 	vneg.f32	d1, d0
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test2:
-; CORTEXA9: 	vneg.f32	s1, s0
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 1d1d06a70ea6..1763d46e06c4 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -1,20 +1,35 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEONFP
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test(float %acc, float %a, float %b) {
+define float @t1(float %acc, float %a, float %b) {
 entry:
+; VFP2: t1:
 ; VFP2: vmls.f32
-; NEON: vmls.f32
 
-; NEONFP-NOT: vmls
-; NEONFP-NOT: vmov.f32
-; NEONFP:     vmul.f32
-; NEONFP:     vsub.f32
-; NEONFP:     vmov
+; NEON: t1:
+; NEON: vmls.f32
 
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
 	%0 = fmul float %a, %b
         %1 = fsub float %acc, %0
 	ret float %1
 }
 
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmls.f64
+
+; NEON: t2:
+; NEON: vmls.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %acc, %0
+	ret double %1
+}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 0b47edd5f1f1..76c806761f75 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,23 +1,71 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
 
-define float @test1(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
+define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
+; VFP2: t1:
+; VFP2: vnmla.f32
+
+; NEON: t1:
+; NEON: vnmla.f32
+
+; A8: t1:
+; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
+; A8: vsub.f32 d0, d0, d1
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
         %2 = fsub float %1, %acc
 	ret float %2
 }
 
-define float @test2(float %acc, float %a, float %b) nounwind {
-; CHECK: vnmla.f32 s{{.*}}, s{{.*}}, s{{.*}}
+define float @t2(float %acc, float %a, float %b) nounwind {
 entry:
+; VFP2: t2:
+; VFP2: vnmla.f32
+
+; NEON: t2:
+; NEON: vnmla.f32
+
+; A8: t2:
+; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
+; A8: vsub.f32 d0, d0, d1
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
         %2 = fsub float %1, %acc
 	ret float %2
 }
 
+define double @t3(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t3:
+; VFP2: vnmla.f64
+
+; NEON: t3:
+; NEON: vnmla.f64
+
+; A8: t3:
+; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d16, d16, d17
+	%0 = fmul double %a, %b
+	%1 = fsub double -0.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
+
+define double @t4(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t4:
+; VFP2: vnmla.f64
+
+; NEON: t4:
+; NEON: vnmla.f64
+
+; A8: t4:
+; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d16, d16, d17
+	%0 = fmul double %a, %b
+	%1 = fmul double -1.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 8fbd45b97579..b6e9c3c22e75 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -51,7 +51,7 @@ entry:
 
 define float @h2() {
 ;CHECK: h2:
-;CHECK: 1065353216
+;CHECK: mov r0, #254, 10
 entry:
         ret float 1.000000e+00
 }
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 64350591b87f..65b921bdf655 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -38,6 +38,7 @@ entry:
 ; FINITE: t2:
 ; FINITE-NOT: vldr
 ; FINITE: ldrd r0, [r0]
+; FINITE-NOT: b LBB
 ; FINITE: cmp r0, #0
 ; FINITE: cmpeq r1, #0
 ; FINITE-NOT: vcmpe.f32
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 67f70e9eb5ed..2e6b3e3167ae 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=arm | grep moveq 
-; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep moveq 
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @f7(float %a, float %b) {
 entry:
+; CHECK: f7:
+; CHECK: vcmpe.f32
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: movweq
+; CHECK-NOT: vmrs
+; CHECK: movwvs
     %tmp = fcmp ueq float %a,%b
     %retval = select i1 %tmp, i32 666, i32 42
     ret i32 %retval
diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll
index f1d6a16f3edb..638dde9d8a0f 100644
--- a/test/CodeGen/ARM/fpconsts.ll
+++ b/test/CodeGen/ARM/fpconsts.ll
@@ -3,7 +3,7 @@
 define float @t1(float %x) nounwind readnone optsize {
 entry:
 ; CHECK: t1:
-; CHECK: vmov.f32 s1, #4.000000e+00
+; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
   %0 = fadd float %x, 4.000000e+00
   ret float %0
 }
@@ -11,7 +11,7 @@ entry:
 define double @t2(double %x) nounwind readnone optsize {
 entry:
 ; CHECK: t2:
-; CHECK: vmov.f64 d1, #3.000000e+00
+; CHECK: vmov.f64 d{{.*}}, #3.000000e+00
   %0 = fadd double %x, 3.000000e+00
   ret double %0
 }
@@ -19,7 +19,7 @@ entry:
 define double @t3(double %x) nounwind readnone optsize {
 entry:
 ; CHECK: t3:
-; CHECK: vmov.f64 d1, #-1.300000e+01
+; CHECK: vmov.f64 d{{.*}}, #-1.300000e+01
   %0 = fmul double %x, -1.300000e+01
   ret double %0
 }
@@ -27,7 +27,7 @@ entry:
 define float @t4(float %x) nounwind readnone optsize {
 entry:
 ; CHECK: t4:
-; CHECK: vmov.f32 s1, #-2.400000e+01
+; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
   %0 = fmul float %x, -2.400000e+01
   ret float %0
 }
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index bf197a46cb77..1b4c008bb775 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 define float @f1(double %x) {
 ;CHECK-VFP: f1:
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
new file mode 100644
index 000000000000..28bf2214740a
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
+; value for the maximum offset (127).
+
+; A local array that exceeds the maximum offset should not be merged.
+; CHECK: g0:
+@g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
+
+; CHECK: _MergedGlobals:
+@g1 = internal global i32 1
+@g2 = internal global i32 2
+
+; Make sure that the complete variable fits within the range of the maximum
+; offset.  Having the starting offset in range is not sufficient.
+; When this works properly, @g3 is placed in a separate chunk of merged globals.
+; CHECK: _MergedGlobals1:
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+
+; Global variables that can be placed in BSS should be kept together in a
+; separate pool of merged globals.
+; CHECK: _MergedGlobals2
+@g4 = internal global i32 0
+@g5 = internal global i32 0
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index ccdc7bf4c140..bfed7a6630b4 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
-; RUN:   grep mov | count 3
+; RUN:   grep mov | count 2
 ; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
new file mode 100644
index 000000000000..75428ac21655
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; rdar://8402126
+; Make sure if-converter is not predicating vldmia and ldmia. These are
+; micro-coded and would have long issue latency even if predicated on
+; false predicate.
+
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
+entry:
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: ldmia sp!, {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: ldmia sp!, {r7, pc}
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul73 = fmul double undef, 0.000000e+00
+  %sub76 = fsub double %mul73, undef
+  store double %sub76, double* undef, align 4
+  %call88 = tail call double @cos(double 0.000000e+00) nounwind
+  %mul89 = fmul double undef, %call88
+  %sub92 = fsub double %mul89, undef
+  store double %sub92, double* undef, align 4
+  ret void
+
+if.else:                                          ; preds = %entry
+  %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+  %add112 = fadd double %tmp101, undef
+  %mul118 = fmul double %add112, undef
+  store double 0.000000e+00, double* %x, align 4
+  ret void
+}
+
+declare double @acos(double)
+
+declare double @sqrt(double) readnone
+
+declare double @cos(double) readnone
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
new file mode 100644
index 000000000000..63f8557d555b
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+
+%struct.xyz_t = type { double, double, double }
+
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+entry:
+  %0 = icmp sgt i32 %tsets, 0
+  br i1 %0, label %bb.nph, label %bb6
+
+bb.nph:                                           ; preds = %entry
+  %1 = add nsw i32 %b, %a
+  %2 = add nsw i32 %1, %c
+  br label %bb
+
+bb:                                               ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+  %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+  %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+  %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+  %3 = load double* %scevgep10, align 4
+  %4 = load double* %scevgep11, align 4
+  %5 = fcmp uge double %3, %4
+  br i1 %5, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+  %6 = load double* %scevgep12, align 4
+  %7 = fcmp uge double %3, %6
+  br i1 %7, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %8 = add nsw i32 %2, %r.19
+  br label %bb4
+
+bb3:                                              ; preds = %bb1, %bb
+  %9 = add nsw i32 %r.19, 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb2
+  %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+  %10 = add nsw i32 %n.08, 1
+  %exitcond = icmp eq i32 %10, %tsets
+  br i1 %exitcond, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb4, %entry
+  %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+  ret i32 %r.1.lcssa
+}
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index e2c0ba398c68..5edf32fd1af6 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,10 +1,9 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep cmpne | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmiahi | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
+; CHECK: cmpne
+; CHECK: ldmiahi sp!
 	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
 	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
 	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index eb97085ac004..62e13557cfdc 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -1,14 +1,12 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep cmpeq | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep moveq | count 1
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmiaeq | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
 
 define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+; CHECK: cmpeq
+; CHECK: moveq
+; CHECK: ldmiaeq sp!
 entry:
 	br label %tailrecurse
 
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index 1e39060e69f2..5fdfc4ea6805 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
-; RUN:   grep ldmiane | count 1
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 	%struct.SString = type { i8*, i32, i32 }
 
 declare void @abort()
 
 define fastcc void @t(%struct.SString* %word, i8 signext  %c) {
+; CHECK: ldmiane sp!
 entry:
 	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %cond_false
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 687e138c1b4e..9f77ad1f794c 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -7,7 +7,7 @@ define void @t() nounwind {
 entry:
 ; CHECK: vmov.I64 q15, #0
 ; CHECK: vmov.32 d30[0], r0
-; CHECK: vmov q0, q15
+; CHECK: vmov q8, q15
   %tmp = alloca %struct.int32x4_t, align 16
   call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
   ret void
@@ -18,7 +18,7 @@ entry:
 
 define void @t2() nounwind {
 entry:
-; CHECK: vmov d30, d0
+; CHECK: vmov d30, d16
 ; CHECK: vmov.32 r0, d30[0]
   %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
   ret void
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 245ed516f70b..2f1a2cfd7786 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @test1(i32 %X) {
-; CHECK: mov r0, r0, lsr #31
+; CHECK: lsr{{.*}}#31
 entry:
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 78201a6b341a..2f1b85ebbb04 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-apple-darwin | FileCheck %s -check-prefix=V4T
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
 define i32 @t1() {
 ; CHECK: t1:
 ; CHECK: ldmia
+; V4T: t1:
+; V4T: ldmia
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
@@ -14,6 +17,8 @@ define i32 @t1() {
 define i32 @t2() {
 ; CHECK: t2:
 ; CHECK: ldmia
+; V4T: t2:
+; V4T: ldmia
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
@@ -25,6 +30,10 @@ define i32 @t3() {
 ; CHECK: t3:
 ; CHECK: ldmib
 ; CHECK: ldmia sp!
+; V4T: t3:
+; V4T: ldmib
+; V4T: pop
+; V4T-NEXT: bx lr
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
         %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
new file mode 100644
index 000000000000..2d016f6cd423
--- /dev/null
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+  %src6 = bitcast float* %src to i8*
+  %0 = icmp eq i32 %width, 0
+  br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %index
+  %uglygep = getelementptr i8* %src6, i32 %tmp
+  %src_addr.04 = bitcast i8* %uglygep to float*
+  %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+  %1 = load float* %src_addr.04, align 4
+  store float %1, float* %dst_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %width
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
new file mode 100644
index 000000000000..15a415df731d
--- /dev/null
+++ b/test/CodeGen/ARM/load-global.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T
+; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
+
+@G = external global i32
+
+define i32 @test1() {
+; STATIC: _test1:
+; STATIC: ldr r0, LCPI0_0
+; STATIC: ldr r0, [r0]
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: ldr r0, LCPI0_0
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: ldr r0, LCPI0_0
+; PIC: ldr r0, [pc, r0]
+; PIC: ldr r0, [r0]
+; PIC: .long L_G$non_lazy_ptr-(LPC0_0+8)
+
+; PIC_T: _test1
+; PIC_T: ldr.n r0, LCPI0_0
+; PIC_T: add r0, pc
+; PIC_T: ldr r0, [r0]
+; PIC_T: ldr r0, [r0]
+; PIC_T: .long L_G$non_lazy_ptr-(LPC0_0+4)
+
+; PIC_V7: _test1
+; PIC_V7: movw r0, :lower16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: movt r0, :upper16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: ldr r0, [pc, r0]
+; PIC_V7: ldr r0, [r0]
+
+; LINUX: test1
+; LINUX: ldr r0, .LCPI0_0
+; LINUX: ldr r1, .LCPI0_1
+; LINUX: add r0, pc, r0
+; LINUX: ldr r0, [r1, r0]
+; LINUX: ldr r0, [r0]
+; LINUX: .long G(GOT)
+	%tmp = load i32* @G
+	ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index 16ef7cc2cb6c..74f8d783377d 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -14,22 +14,22 @@ entry:
 
 define i64 @f3() {
 ; CHECK: f3:
-; CHECK: mvn{{.*}}-2147483648
+; CHECK: mvn r0, #2, 2
 entry:
         ret i64 2147483647
 }
 
 define i64 @f4() {
 ; CHECK: f4:
-; CHECK: -2147483648
+; CHECK: mov r0, #2, 2
 entry:
         ret i64 2147483648
 }
 
 define i64 @f5() {
 ; CHECK: f5:
-; CHECK: mvn
-; CHECK: mvn{{.*}}-2147483648
+; CHECK: mvn r0, #0
+; CHECK: mvn r1, #2, 2
 entry:
         ret i64 9223372036854775807
 }
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 1ec4d15f6672..5e4f5730f8d2 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -2,8 +2,8 @@
 
 define i64 @f0(i64 %A, i64 %B) {
 ; CHECK: f0
-; CHECK:      movs    r3, r3, lsr #1
-; CHECK-NEXT: mov     r2, r2, rrx
+; CHECK:      lsrs    r3, r3, #1
+; CHECK-NEXT: rrx     r2, r2
 ; CHECK-NEXT: subs    r0, r0, r2
 ; CHECK-NEXT: sbc     r1, r1, r3
 	%tmp = bitcast i64 %A to i64
@@ -14,7 +14,7 @@ define i64 @f0(i64 %A, i64 %B) {
 
 define i32 @f1(i64 %x, i64 %y) {
 ; CHECK: f1
-; CHECK: mov r0, r0, lsl r2
+; CHECK: lsl{{.*}}r2
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -22,7 +22,7 @@ define i32 @f1(i64 %x, i64 %y) {
 
 define i32 @f2(i64 %x, i64 %y) {
 ; CHECK: f2
-; CHECK:      mov     r0, r0, lsr r2
+; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: subs    r2, r2, #32
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
@@ -34,7 +34,7 @@ define i32 @f2(i64 %x, i64 %y) {
 
 define i32 @f3(i64 %x, i64 %y) {
 ; CHECK: f3
-; CHECK:      mov     r0, r0, lsr r2
+; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: subs    r2, r2, #32
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index b8c543b1bd18..1bbb96deeefe 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -stats |& grep {38.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
 ; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
 ; This test really wants to check that the resultant "cond_true" block only 
 ; has a single store in it, and that cond_true55 only has code to materialize 
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
index 866be423c2cb..9882690da268 100644
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -4,14 +4,14 @@
 ; constant offset addressing, so that each of the following stores
 ; uses the same register.
 
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 
@@ -624,12 +624,11 @@ bb23:                                             ; preds = %bb22, %bb20, %bb9,
 bb24:                                             ; preds = %bb23
 
 ; LSR should use count-down iteration to avoid requiring the trip count
-; in a register, and it shouldn't require any reloads here.
+; in a register.
 
 ;      CHECK: @ %bb24
-; CHECK-NEXT: @   in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: sub{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1
-; CHECK-NEXT: bne.w
+; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
+; CHECK: bne.w
 
   %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
   %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll
new file mode 100644
index 000000000000..8656c5bbd72c
--- /dev/null
+++ b/test/CodeGen/ARM/machine-licm.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6t2 | FileCheck %s -check-prefix=MOVT
+; rdar://7353541
+; rdar://7354376
+; rdar://8887598
+
+; The generated code is no where near ideal. It's not recognizing the two
+; constantpool entries being loaded can be merged into one.
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define void @t(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; ARM: t:
+; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
+; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
+; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
+; to add the pseudo instructions to make sure they are CSE'ed at the same
+; time as the "ldr cp".
+; ARM: ldr r{{[0-9]+}}, LCPI0_1
+; ARM: LPC0_0:
+; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
+; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; MOVT: t:
+; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: LPC0_0:
+; MOVT: ldr r{{[0-9]+}}, [pc, [[REGISTER_2]]]
+; MOVT: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; THUMB: t:
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; ARM: LCPI0_0:
+; ARM: LCPI0_1:
+; ARM: .section
+
+; THUMB: BB#1
+; THUMB: ldr.n r2, LCPI0_0
+; THUMB: add r2, pc
+; THUMB: ldr r{{[0-9]+}}, [r2]
+; THUMB: LBB0_2
+; THUMB: LCPI0_0:
+; THUMB-NOT: LCPI0_1:
+; THUMB: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 8c102464612c..3cb8a8e816f6 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -36,7 +36,7 @@ define i32 @t12288(i32 %v) nounwind readnone {
 entry:
 ; CHECK: t12288:
 ; CHECK: add r0, r0, r0, lsl #1
-; CHECK: mov     r0, r0, lsl #12
+; CHECK: lsl{{.*}}#12
         %0 = mul i32 %v, 12288
         ret i32 %0
 }
diff --git a/test/CodeGen/ARM/mult-alt-generic-arm.ll b/test/CodeGen/ARM/mult-alt-generic-arm.ll
new file mode 100644
index 000000000000..a8104db337f5
--- /dev/null
+++ b/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=arm
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "arm"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define arm_aapcscc void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define arm_aapcscc void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define arm_aapcscc void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
new file mode 100644
index 000000000000..e33797079093
--- /dev/null
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecpe.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = udiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = udiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 4905dc28cf48..90151767b919 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -3,87 +3,78 @@
 ; CHECK: test1
 ; CHECK: pkhbt   r0, r0, r1, lsl #16
 define i32 @test1(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
-	ret i32 %tmp5
-}
-
-; CHECK: test1a
-; CHECK: pkhbt   r0, r0, r1, lsl #16
-define i32 @test1a(i32 %X, i32 %Y) {
-	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, 65535
+	%tmp4 = shl i32 %Y, 16
+	%tmp5 = or i32 %tmp4, %tmp1
 	ret i32 %tmp5
 }
 
 ; CHECK: test2
 ; CHECK: pkhbt   r0, r0, r1, lsl #12
 define i32 @test2(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
-	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, 65535
+	%tmp3 = shl i32 %Y, 12
+	%tmp4 = and i32 %tmp3, -65536
+	%tmp57 = or i32 %tmp4, %tmp1
 	ret i32 %tmp57
 }
 
 ; CHECK: test3
 ; CHECK: pkhbt   r0, r0, r1, lsl #18
 define i32 @test3(i32 %X, i32 %Y) {
-	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	%tmp19 = and i32 %X, 65535
+	%tmp37 = shl i32 %Y, 18
+	%tmp5 = or i32 %tmp37, %tmp19
 	ret i32 %tmp5
 }
 
 ; CHECK: test4
 ; CHECK: pkhbt   r0, r0, r1
 define i32 @test4(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
-	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
-	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, 65535
+	%tmp3 = and i32 %Y, -65536
+	%tmp46 = or i32 %tmp3, %tmp1
 	ret i32 %tmp46
 }
 
 ; CHECK: test5
 ; CHECK: pkhtb   r0, r0, r1, asr #16
 define i32 @test5(i32 %X, i32 %Y) {
-	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
-	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
-	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	%tmp17 = and i32 %X, -65536
+	%tmp2 = bitcast i32 %Y to i32
+	%tmp4 = lshr i32 %tmp2, 16
+	%tmp5 = or i32 %tmp4, %tmp17
 	ret i32 %tmp5
 }
 
 ; CHECK: test5a
 ; CHECK: pkhtb   r0, r0, r1, asr #16
 define i32 @test5a(i32 %X, i32 %Y) {
-	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
-	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
-	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	%tmp110 = and i32 %X, -65536
+	%tmp37 = lshr i32 %Y, 16
+	%tmp39 = bitcast i32 %tmp37 to i32
+	%tmp5 = or i32 %tmp39, %tmp110
 	ret i32 %tmp5
 }
 
 ; CHECK: test6
 ; CHECK: pkhtb   r0, r0, r1, asr #12
 define i32 @test6(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
-	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
-	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, -65536
+	%tmp37 = lshr i32 %Y, 12
+	%tmp38 = bitcast i32 %tmp37 to i32
+	%tmp4 = and i32 %tmp38, 65535
+	%tmp59 = or i32 %tmp4, %tmp1
 	ret i32 %tmp59
 }
 
 ; CHECK: test7
 ; CHECK: pkhtb   r0, r0, r1, asr #18
 define i32 @test7(i32 %X, i32 %Y) {
-	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
-	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
-	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = ashr i32 %Y, 18
+	%tmp4 = and i32 %tmp3, 65535
+	%tmp57 = or i32 %tmp4, %tmp1
 	ret i32 %tmp57
 }
 
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
new file mode 100644
index 000000000000..29e17c095a74
--- /dev/null
+++ b/test/CodeGen/ARM/phi.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; <rdar://problem/8686347>
+
+define i32 @test1(i1 %a, i32* %b) {
+; CHECK: test1
+entry:
+  br i1 %a, label %lblock, label %rblock
+
+lblock:
+  %lbranch = getelementptr i32* %b, i32 1
+  br label %end
+
+rblock:
+  %rbranch = getelementptr i32* %b, i32 1
+  br label %end
+  
+end:
+; CHECK: ldr	r0, [r1, #4]
+  %gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
+  %r = load i32* %gep
+; CHECK-NEXT: bx	lr
+  ret i32 %r
+}
\ No newline at end of file
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
new file mode 100644
index 000000000000..895b27b749db
--- /dev/null
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
+; RUN: llc < %s -march=thumb -mattr=+v7a     | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm   -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM-MP
+; rdar://8601536
+
+define void @t1(i8* %ptr) nounwind  {
+entry:
+; ARM-MP: t1:
+; ARM-MP: pldw [r0]
+; ARM-MP: pld [r0]
+
+; THUMB2: t1:
+; THUMB2-NOT: pldw [r0]
+; THUMB2: pld [r0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+  ret void
+}
+
+define void @t2(i8* %ptr) nounwind  {
+entry:
+; ARM-MP: t2:
+; ARM-MP: pld [r0, #1023]
+
+; THUMB2: t2:
+; THUMB2: pld [r0, #1023]
+  %tmp = getelementptr i8* %ptr, i32 1023
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+  ret void
+}
+
+define void @t3(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM-MP: t3:
+; ARM-MP: pld [r0, r1, lsr #2]
+
+; THUMB2: t3:
+; THUMB2: lsrs r1, r1, #2
+; THUMB2: pld [r0, r1]
+  %tmp1 = lshr i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+define void @t4(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM-MP: t4:
+; ARM-MP: pld [r0, r1, lsl #2]
+
+; THUMB2: t4:
+; THUMB2: pld [r0, r1, lsl #2]
+  %tmp1 = shl i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 2e4f10d8a63d..53214fd4c302 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -46,8 +46,8 @@ entry:
 ; CHECK:        t2:
 ; CHECK:        vld1.16
 ; CHECK-NOT:    vmov
-; CHECK:        vld1.16
 ; CHECK:        vmul.i16
+; CHECK:        vld1.16
 ; CHECK:        vmul.i16
 ; CHECK-NOT:    vmov
 ; CHECK:        vst1.16
@@ -75,7 +75,8 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
 ; CHECK:        t3:
 ; CHECK:        vld3.8
 ; CHECK:        vmul.i8
-; CHECK-NOT:    vmov
+; CHECK:        vmov r
+; CHECK-NOT:    vmov d
 ; CHECK:        vst3.8
   %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
@@ -122,9 +123,9 @@ return1:
 return2:
 ; CHECK:        %return2
 ; CHECK:        vadd.i32
-; CHECK:        vmov q1, q3
+; CHECK:        vmov q9, q11
 ; CHECK-NOT:    vmov
-; CHECK:        vst2.32 {d0, d1, d2, d3}
+; CHECK:        vst2.32 {d16, d17, d18, d19}
   %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
   %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
   %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
@@ -136,9 +137,9 @@ return2:
 define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK:        t5:
 ; CHECK:        vldmia
-; CHECK:        vmov q1, q0
+; CHECK:        vmov q9, q8
 ; CHECK-NOT:    vmov
-; CHECK:        vld2.16 {d0[1], d2[1]}, [r0]
+; CHECK:        vld2.16 {d16[1], d18[1]}, [r0]
 ; CHECK-NOT:    vmov
 ; CHECK:        vadd.i16
   %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
@@ -153,8 +154,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
 ; CHECK:        vldr.64
-; CHECK:        vmov d1, d0
-; CHECK-NEXT:   vld2.8 {d0[1], d1[1]}
+; CHECK:        vmov d17, d16
+; CHECK-NEXT:   vld2.8 {d16[1], d17[1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
@@ -168,10 +169,10 @@ entry:
 ; CHECK:        t7:
 ; CHECK:        vld2.32
 ; CHECK:        vst2.32
-; CHECK:        vld1.32 {d0, d1},
-; CHECK:        vmov q1, q0
+; CHECK:        vld1.32 {d16, d17},
+; CHECK:        vmov q9, q8
 ; CHECK-NOT:    vmov
-; CHECK:        vuzp.32 q0, q1
+; CHECK:        vuzp.32 q8, q9
 ; CHECK:        vst1.32
   %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
   %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
@@ -188,7 +189,7 @@ entry:
 ; PR7156
 define arm_aapcs_vfpcc i32 @t8() nounwind {
 ; CHECK: t8:
-; CHECK: vrsqrte.f32 q0, q0
+; CHECK: vrsqrte.f32 q8, q8
 bb.nph55.bb.nph55.split_crit_edge:
   br label %bb3
 
@@ -238,10 +239,10 @@ bb14:                                             ; preds = %bb6
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
 ; CHECK:        vldr.64
-; CHECK-NOT:    vmov d{{.*}}, d0
-; CHECK:        vmov.i32 d1
-; CHECK-NEXT:   vstmia r0, {d0, d1}
-; CHECK-NEXT:   vstmia r0, {d0, d1}
+; CHECK-NOT:    vmov d{{.*}}, d16
+; CHECK:        vmov.i32 d17
+; CHECK-NEXT:   vstmia r0, {d16, d17}
+; CHECK-NEXT:   vstmia r0, {d16, d17}
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   store <4 x float> %4, <4 x float>* undef, align 16
@@ -269,9 +270,9 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmov.i32 q1, #0x3F000000
-; CHECK: vmov d0, d1
-; CHECK: vmla.f32 q0, q0, d0[0]
+; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q9, #0x3F000000
+; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
   %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
deleted file mode 100644
index 6b86f1a9f368..000000000000
--- a/test/CodeGen/ARM/remat.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization"
-
-define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
-entry:
-  br i1 undef, label %smvp.exit, label %bb.i3
-
-bb.i3:                                            ; preds = %bb.i3, %bb134
-  br i1 undef, label %smvp.exit, label %bb.i3
-
-smvp.exit:                                        ; preds = %bb.i3
-  %0 = fmul double %d1, 2.400000e-03            ; <double> [#uses=2]
-  br i1 undef, label %bb138.preheader, label %bb159
-
-bb138.preheader:                                  ; preds = %smvp.exit
-  br label %bb138
-
-bb138:                                            ; preds = %bb138, %bb138.preheader
-  br i1 undef, label %bb138, label %bb145.loopexit
-
-bb142:                                            ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit
-  %1 = fmul double %d1, -1.200000e-03           ; <double> [#uses=1]
-  %2 = fadd double %d2, %1                      ; <double> [#uses=1]
-  %3 = fmul double %2, %d2                      ; <double> [#uses=1]
-  %4 = fsub double 0.000000e+00, %3               ; <double> [#uses=1]
-  br i1 %14, label %phi1.exit, label %bb.i35
-
-bb.i35:                                           ; preds = %bb142
-  %5 = call  double @sin(double %15) nounwind readonly ; <double> [#uses=1]
-  %6 = fmul double %5, 0x4031740AFA84AD8A         ; <double> [#uses=1]
-  %7 = fsub double 1.000000e+00, undef            ; <double> [#uses=1]
-  %8 = fdiv double %7, 6.000000e-01               ; <double> [#uses=1]
-  br label %phi1.exit
-
-phi1.exit:                                        ; preds = %bb.i35, %bb142
-  %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
-  %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
-  %10 = fmul double %.pn, %9                      ; <double> [#uses=1]
-  br i1 %14, label %phi0.exit, label %bb.i
-
-bb.i:                                             ; preds = %phi1.exit
-  unreachable
-
-phi0.exit:                                        ; preds = %phi1.exit
-  %11 = fsub double %4, %10                       ; <double> [#uses=1]
-  %12 = fadd double 0.000000e+00, %11             ; <double> [#uses=1]
-  store double %12, double* undef, align 4
-  br label %bb142
-
-bb145.loopexit:                                   ; preds = %bb138
-  br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159
-
-bb.nph218.bb.nph218.split_crit_edge:              ; preds = %bb145.loopexit
-  %13 = fmul double %0, 0x401921FB54442D18        ; <double> [#uses=1]
-  %14 = fcmp ugt double %0, 6.000000e-01          ; <i1> [#uses=2]
-  %15 = fdiv double %13, 6.000000e-01             ; <double> [#uses=1]
-  br label %bb142
-
-bb159:                                            ; preds = %bb145.loopexit, %smvp.exit, %bb134
-  unreachable
-
-bb166:                                            ; preds = %bb127
-  unreachable
-}
-
-declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 1c12268ef86c..687bf8834c9f 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,27 +1,30 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16
-; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
 
 define i32 @test1(i32 %X) {
-        %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=3]
-        %X15 = bitcast i32 %X to i32            ; <i32> [#uses=1]
-        %tmp4 = shl i32 %X15, 8         ; <i32> [#uses=2]
-        %tmp2 = and i32 %tmp1, 16711680         ; <i32> [#uses=1]
-        %tmp5 = and i32 %tmp4, -16777216                ; <i32> [#uses=1]
-        %tmp9 = and i32 %tmp1, 255              ; <i32> [#uses=1]
-        %tmp13 = and i32 %tmp4, 65280           ; <i32> [#uses=1]
-        %tmp6 = or i32 %tmp5, %tmp2             ; <i32> [#uses=1]
-        %tmp10 = or i32 %tmp6, %tmp13           ; <i32> [#uses=1]
-        %tmp14 = or i32 %tmp10, %tmp9           ; <i32> [#uses=1]
+; CHECK: test1
+; CHECK: rev16 r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
         ret i32 %tmp14
 }
 
 define i32 @test2(i32 %X) {
-        %tmp1 = lshr i32 %X, 8          ; <i32> [#uses=1]
-        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16          ; <i16> [#uses=1]
-        %tmp3 = trunc i32 %X to i16             ; <i16> [#uses=1]
-        %tmp2 = and i16 %tmp1.upgrd.1, 255              ; <i16> [#uses=1]
-        %tmp4 = shl i16 %tmp3, 8                ; <i16> [#uses=1]
-        %tmp5 = or i16 %tmp2, %tmp4             ; <i16> [#uses=1]
-        %tmp5.upgrd.2 = sext i16 %tmp5 to i32           ; <i32> [#uses=1]
+; CHECK: test2
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
         ret i32 %tmp5.upgrd.2
 }
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 6e15fde045fb..578834ec93bc 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm                | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s --check-prefix=T2
+; RUN: llc < %s -march=arm                  | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -march=arm -mattr=+thumb2   | FileCheck %s --check-prefix=ARMT2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2
 
 define i32 @t1(i32 %c) nounwind readnone {
 entry:
@@ -8,9 +9,13 @@ entry:
 ; ARM: orr r1, r1, #1, 24
 ; ARM: movgt r0, #123
 
-; T2: t1:
-; T2: movw r0, #357
-; T2: movgt r0, #123
+; ARMT2: t1:
+; ARMT2: movw r0, #357
+; ARMT2: movgt r0, #123
+
+; THUMB2: t1:
+; THUMB2: movw r0, #357
+; THUMB2: movgt r0, #123
 
   %0 = icmp sgt i32 %c, 1
   %1 = select i1 %0, i32 123, i32 357
@@ -20,13 +25,17 @@ entry:
 define i32 @t2(i32 %c) nounwind readnone {
 entry:
 ; ARM: t2:
-; ARM: mov r1, #101
-; ARM: orr r1, r1, #1, 24
-; ARM: movle r0, #123
+; ARM: mov r0, #123
+; ARM: movgt r0, #101
+; ARM: orrgt r0, r0, #1, 24
 
-; T2: t2:
-; T2: movw r0, #357
-; T2: movle r0, #123
+; ARMT2: t2:
+; ARMT2: mov r0, #123
+; ARMT2: movwgt r0, #357
+
+; THUMB2: t2:
+; THUMB2: mov.w r0, #123
+; THUMB2: movwgt r0, #357
 
   %0 = icmp sgt i32 %c, 1
   %1 = select i1 %0, i32 357, i32 123
@@ -39,10 +48,31 @@ entry:
 ; ARM: mov r0, #0
 ; ARM: moveq r0, #1
 
-; T2: t3:
-; T2: mov r0, #0
-; T2: moveq r0, #1
+; ARMT2: t3:
+; ARMT2: mov r0, #0
+; ARMT2: moveq r0, #1
+
+; THUMB2: t3:
+; THUMB2: mov.w r0, #0
+; THUMB2: moveq r0, #1
   %0 = icmp eq i32 %a, 160
   %1 = zext i1 %0 to i32
   ret i32 %1
 }
+
+define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
+entry:
+; ARM: t4:
+; ARM: ldr
+; ARM: movlt
+
+; ARMT2: t4:
+; ARMT2: movwlt r0, #65365
+; ARMT2: movtlt r0, #65365
+
+; THUMB2: t4:
+; THUMB2: mvnlt.w r0, #11141290
+  %0 = icmp slt i32 %a, %b
+  %1 = select i1 %0, i32 4283826005, i32 %x
+  ret i32 %1
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 7413bed5c5b1..1aa0d3904125 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
 ; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
 
@@ -79,9 +79,9 @@ define double @f7(double %a, double %b) {
 ; CHECK-NEON:      movw   [[REGISTER_1:r[0-9]+]], #1123
 ; CHECK-NEON-NEXT: movs   [[REGISTER_2:r[0-9]+]], #0
 ; CHECK-NEON-NEXT: cmp    r0, [[REGISTER_1]]
-; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
 ; CHECK-NEON-NEXT: it     eq
 ; CHECK-NEON-NEXT: moveq  [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 7fd91ceea5ad..5dabfc3a82a3 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -1,15 +1,60 @@
-; RUN: llc < %s -march=arm | grep mov | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2
+; rdar://8662825
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
-        %tmp1 = icmp sgt i32 %c, 10
-        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
-        %tmp3 = add i32 %tmp2, %b
-        ret i32 %tmp3
+; ARM: t1:
+; ARM: sub r0, r1, #6, 2
+; ARM: movgt r0, r1
+
+; T2: t1:
+; T2: mvn r0, #-2147483648
+; T2: add r0, r1
+; T2: movgt r0, r1
+  %tmp1 = icmp sgt i32 %c, 10
+  %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+  %tmp3 = add i32 %tmp2, %b
+  ret i32 %tmp3
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-        %tmp1 = icmp sgt i32 %c, 10
-        %tmp2 = select i1 %tmp1, i32 0, i32 10
-        %tmp3 = sub i32 %b, %tmp2
-        ret i32 %tmp3
+; ARM: t2:
+; ARM: sub r0, r1, #10
+; ARM: movgt r0, r1
+
+; T2: t2:
+; T2: sub.w r0, r1, #10
+; T2: movgt r0, r1
+  %tmp1 = icmp sgt i32 %c, 10
+  %tmp2 = select i1 %tmp1, i32 0, i32 10
+  %tmp3 = sub i32 %b, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
+; ARM: t3:
+; ARM: mvnlt r2, #0
+; ARM: and r0, r2, r3
+
+; T2: t3:
+; T2: movlt.w r2, #-1
+; T2: and.w r0, r2, r3
+  %cond = icmp slt i32 %a, %b
+  %z = select i1 %cond, i32 -1, i32 %x
+  %s = and i32 %z, %y
+ ret i32 %s
+}
+
+define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
+; ARM: t4:
+; ARM: movlt r2, #0
+; ARM: orr r0, r2, r3
+
+; T2: t4:
+; T2: movlt r2, #0
+; T2: orr.w r0, r2, r3
+  %cond = icmp slt i32 %a, %b
+  %z = select i1 %cond, i32 0, i32 %x
+  %s = or i32 %z, %y
+ ret i32 %s
 }
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 2bbe9fd2602c..01e3a922f656 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,18 +1,72 @@
-; RUN: llc < %s -march=arm | grep add | grep lsl
-; RUN: llc < %s -march=arm | grep bic | grep asr
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; rdar://8576755
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.1 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = shl i32 %Y, %shift.upgrd.1         ; <i32> [#uses=1]
-        %B = add i32 %X, %A             ; <i32> [#uses=1]
+; A8: test1:
+; A8: add r0, r0, r1, lsl r2
+
+; A9: test1:
+; A9: add r0, r0, r1, lsl r2
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-        %shift.upgrd.2 = zext i8 %sh to i32             ; <i32> [#uses=1]
-        %A = ashr i32 %Y, %shift.upgrd.2                ; <i32> [#uses=1]
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        %C = and i32 %X, %B             ; <i32> [#uses=1]
+; A8: test2:
+; A8: bic r0, r0, r1, asr r2
+
+; A9: test2:
+; A9: bic r0, r0, r1, asr r2
+        %shift.upgrd.2 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.2
+        %B = xor i32 %A, -1
+        %C = and i32 %X, %B
         ret i32 %C
 }
+
+define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
+entry:
+; A8: test3:
+; A8: ldr r0, [r0, r2, lsl #2]
+; A8: ldr r1, [r1, r2, lsl #2]
+
+; lsl #2 is free
+; A9: test3:
+; A9: ldr r0, [r0, r2, lsl #2]
+; A9: ldr r1, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = add i32 %base2, %tmp1
+        %tmp5 = inttoptr i32 %tmp4 to i32*
+        %tmp6 = load i32* %tmp3
+        %tmp7 = load i32* %tmp5
+        %tmp8 = add i32 %tmp7, %tmp6
+        ret i32 %tmp8
+}
+
+declare i8* @malloc(...)
+
+define fastcc void @test4() nounwind {
+entry:
+; A8: test4:
+; A8: ldr r1, [r0, r0, lsl #2]
+; A8: str r1, [r0, r0, lsl #2]
+
+; A9: test4:
+; A9: add r0, r0, r0, lsl #2
+; A9: ldr r1, [r0]
+; A9: str r1, [r0]
+  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+  %1 = bitcast i8* %0 to i32*
+  %2 = sext i16 undef to i32
+  %3 = getelementptr inbounds i32* %1, i32 %2
+  %4 = load i32* %3, align 4
+  %5 = add nsw i32 %4, 1
+  store i32 %5, i32* %3, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index ae1ba2f73825..bf4e55cb06c4 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -15,11 +15,34 @@ define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
   %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
   %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
@@ -44,7 +67,16 @@ bb4:                                              ; preds = %bb193, %entry
   %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
   %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
-  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
   %tmp = extractelement <4 x i1> %22, i32 0
   br i1 %tmp, label %bb193, label %bb186
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 22a7ecb4aa28..2f5fadbee28a 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
 
 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
 @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
@@ -7,6 +7,9 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 define i32 @main() nounwind {
 entry:
+; CHECK: main
+; CHECK: push
+; CHECK: stmib
 	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
 	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 553cd64fce94..465c7e676c56 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!}
-; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #4}
+; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
+; CHECK: str lr, [sp, #-4]!
+; CHECK: ldr lr, [sp], #4
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
index 17c8baedbfa8..5b3dce386bb7 100644
--- a/test/CodeGen/ARM/tail-opts.ll
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -17,13 +17,16 @@ declare i8* @choose(i8*, i8*)
 ; CHECK: tail_duplicate_me:
 ; CHECK:      qux
 ; CHECK:      qux
-; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
-; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
-; CHECK:      ldr r{{.}}, LCPI
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
 
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
new file mode 100644
index 000000000000..25093fee225a
--- /dev/null
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; rdar://8819685
+
+@__bar = external hidden global i8*
+@__baz = external hidden global i8*
+
+define i8* @_foo() {
+entry:
+; CHECK: foo:
+
+	%size = alloca i32, align 4
+	%0 = load i8** @__bar, align 4
+	%1 = icmp eq i8* %0, null
+	br i1 %1, label %bb1, label %bb3
+		
+bb1:
+	store i32 1026, i32* %size, align 4
+	%2 = alloca [1026 x i8], align 1
+; CHECK: mov     r0, sp
+; CHECK: adds    r4, r0, r4
+	%3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
+	%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
+	%5 = icmp eq i32 %4, 0
+	br i1 %5, label %bb2, label %bb3
+	
+bb2:
+	%6 = call i8* @strdup(i8* %3) nounwind
+	store i8* %6, i8** @__baz, align 4
+	br label %bb3
+	
+bb3:
+	%.0 = phi i8* [ %0, %entry ], [ %6, %bb2 ], [ %3, %bb1 ]
+; CHECK: subs    r4, #5
+; CHECK-NEXT: mov     sp, r4
+; CHECK-NEXT: pop     {r4, r5, r6, r7, pc}
+	ret i8* %.0
+}
+
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare i32 @_called_func(i8*, i32*) nounwind
\ No newline at end of file
diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll
new file mode 100644
index 000000000000..aa7d28a62349
--- /dev/null
+++ b/test/CodeGen/ARM/umulo-32.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+
+%umul.ty = type { i32, i1 }
+
+define i32 @func(i32 %a) nounwind {
+; CHECK: func
+; CHECK: muldi3
+  %tmp0 = tail call %umul.ty @llvm.umul.with.overflow.i32(i32 %a, i32 37)
+  %tmp1 = extractvalue %umul.ty %tmp0, 0
+  %tmp2 = select i1 undef, i32 -1, i32 %tmp1
+  ret i32 %tmp2
+}
+
+declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index e2794919d9da..b42e11f2c4ab 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
 
 ; rdar://7113725
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index 293d22938a76..51f9bdf9718b 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
 
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: v_andi8:
@@ -505,3 +505,43 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
         %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
 	ret <4 x i32> %tmp5
 }
+
+define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
+; CHECK: v_orrimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_orrimmQ
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
+; CHECK: v_bicimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_bicimmQ:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <16 x i8> %tmp3
+}
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index e4787518e731..051c349a06a4 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -79,3 +79,14 @@ define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 2c161113c113..bf5f0b9efb2f 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -160,3 +160,44 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+; Radar 8782191
+; Floating-point comparisons against zero produce results with integer
+; elements, not floating-point elements.
+define void @test_vclez_fp() nounwind optsize {
+;CHECK: test_vclez_fp
+;CHECK: vcle.f32
+entry:
+  %0 = fcmp ole <4 x float> undef, zeroinitializer
+  %1 = sext <4 x i1> %0 to <4 x i16>
+  %2 = add <4 x i16> %1, zeroinitializer
+  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %5 = trunc <8 x i16> %4 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 194093c8418c..c3c4cb356307 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -161,9 +161,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; rdar://7923010
 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vcgt_zext:
-;CHECK: vcgt.f32 q0
-;CHECK: vmov.i32 q1, #0x1
-;CHECK: vand q0, q0, q1
+;CHECK: vmov.i32 q10, #0x1
+;CHECK: vcgt.f32 q8
+;CHECK: vand q8, q8, q10
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
@@ -173,3 +173,25 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index e6733051f269..527f93b6637c 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK: vcombine8
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -8,6 +11,9 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK: vcombine16
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -15,6 +21,9 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK: vcombine32
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -22,6 +31,9 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK: vcombinefloat
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -29,8 +41,32 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+; CHECK: vcombine64
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
 	ret <2 x i64> %tmp3
 }
+
+; Check for vget_low and vget_high implemented with shufflevector.  PR8411.
+; They should not require storing to the stack.
+
+define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
+; CHECK: vget_low16
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d16
+	%tmp1 = load <8 x i16>* %A
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
+; CHECK: vget_high8
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d17
+	%tmp1 = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <8 x i8> %tmp2
+}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index f4cc5368d9aa..c078f493094b 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK: vcvt_f32tos32:
@@ -138,3 +138,21 @@ declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwi
 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
 
+define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
+;CHECK: vcvt_f16tof32:
+;CHECK: vcvt.f32.f16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
+;CHECK: vcvt_f32tof16:
+;CHECK: vcvt.f16.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index a545f6c03d5b..e99fac1f1e67 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -162,24 +162,6 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 	ret <4 x float> %tmp2
 }
 
-define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
-;CHECK: v_shuffledupfloat2:
-;CHECK: vdup.32
-	%tmp0 = load float* %A
-        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
-        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
-        ret <2 x float> %tmp2
-}
-
-define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
-;CHECK: v_shuffledupQfloat2:
-;CHECK: vdup.32
-        %tmp0 = load float* %A
-        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
-        ret <4 x float> %tmp2
-}
-
 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 ;CHECK: vduplane8:
 ;CHECK: vdup.8
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
new file mode 100644
index 000000000000..3ab0cfcbbc77
--- /dev/null
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; PR7158
+define i32 @test_pr7158() nounwind {
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb.i19, label %bb3
+
+bb.i19:                                           ; preds = %bb.i19, %bb3
+  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
+  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
+  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
+  br label %bb.i19
+}
+
+; Check that the DAG combiner does not arbitrarily modify BUILD_VECTORs
+; after legalization.
+define void @test_illegal_build_vector() nounwind {
+entry:
+  store <2 x i64> undef, <2 x i64>* undef, align 16
+  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
+  %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %1, <16 x i8>* undef, align 16
+  ret void
+}
+
+; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
+; converted back to be used as a vector type.
+; CHECK: test_vmovrrd_combine
+define <4 x i32> @test_vmovrrd_combine() nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %0 = bitcast <2 x i64> zeroinitializer to <2 x double>
+  %1 = extractelement <2 x double> %0, i32 0
+  %2 = bitcast double %1 to i64
+  %3 = insertelement <1 x i64> undef, i64 %2, i32 0
+; CHECK-NOT: vmov s
+; CHECK: vext.8
+  %4 = shufflevector <1 x i64> %3, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp2006.3 = bitcast <2 x i64> %4 to <16 x i8>
+  %5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32>
+  br i1 undef, label %bb2, label %bb1
+
+bb2:
+  %result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ]
+  ret <4 x i32> %result
+}
+
+; Test trying to do a ShiftCombine on illegal types.
+; The vector should be split first.
+define void @lshrIllegalType(<8 x i32>* %A) nounwind {
+       %tmp1 = load <8 x i32>* %A
+       %tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+       store <8 x i32> %tmp2, <8 x i32>* %A
+       ret void
+}
+
+; Test folding a binary vector operation with constant BUILD_VECTOR
+; operands with i16 elements.
+define void @test_i16_constant_fold() nounwind optsize {
+entry:
+  %0 = sext <4 x i1> zeroinitializer to <4 x i16>
+  %1 = add <4 x i16> %0, zeroinitializer
+  %2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %4 = trunc <8 x i16> %3 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+; Test that loads and stores of i64 vector elements are handled as f64 values
+; so they are not split up into i32 values.  Radar 8755338.
+define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_buildvector
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_insertelement
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %vec = load <2 x i64>* %vp
+  %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_extractelement
+; CHECK: vstr.64
+  %vec = load <2 x i64>* %vp
+  %t1 = extractelement <2 x i64> %vec, i32 0
+  store i64 %t1, i64* %ptr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index e460a84f6265..55abefef0fa7 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -74,3 +74,62 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 	ret <16 x i8> %tmp3
 }
 
+; Tests for ReconstructShuffle function. Indices have to be carefully
+; chosen to reach lowering phase as a BUILD_VECTOR.
+
+; One vector needs vext, the other can be handled by extract_subvector
+; Also checks interleaving of sources is handled correctly.
+; Essence: a vext is used on %A and something saner than stack load/store for final result.
+define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_interleaved:
+;CHECK: vext.16
+;CHECK-NOT: vext.16
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; An undef in the shuffle list should still be optimizable
+define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_undef:
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; We should ignore a build_vector with more than two sources.
+; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
+; Try to look for fallback to stack expansion.
+define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
+;CHECK: test_multisource:
+;CHECK: vst1.16
+        %tmp1 = load <32 x i16>* %B
+        %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+        ret <4 x i16> %tmp2
+}
+
+; We don't handle shuffles using more than half of a 128-bit vector.
+; Again, test for fallback to stack expansion
+define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
+;CHECK: test_largespan:
+;CHECK: vst1.16
+        %tmp1 = load <8 x i16>* %B
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+        ret <4 x i16> %tmp2
+}
+
+; The actual shuffle code only handles some cases, make sure we check
+; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
+; lowering loop can result otherwise).
+define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_illegal:
+;CHECK: vst1.8
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 25, i32 3, i32 2, i32 2, i32 26>
+       ret <8 x i8> %tmp3
+}
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index 05e7f5090952..1fc885d61372 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -96,13 +96,14 @@ define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
 
 define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
 entry:
-; CHECK: vmov.u16 r0, d0[1]
+; CHECK: vmov.u16 r0, d{{.*}}[1]
   %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
-  store i16 %1, i16* %out_uint16_t, align 2
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
   br label %return
 
 return:                                           ; preds = %entry
@@ -111,13 +112,14 @@ return:                                           ; preds = %entry
 
 define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
 entry:
-; CHECK: vmov.u8 r0, d0[1]
+; CHECK: vmov.u8 r0, d{{.*}}[1]
   %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
   %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
-  store i8 %1, i8* %out_uint8_t, align 1
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
   br label %return
 
 return:                                           ; preds = %entry
@@ -126,13 +128,14 @@ return:                                           ; preds = %entry
 
 define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
 entry:
-; CHECK: vmov.u16 r0, d0[1]
+; CHECK: vmov.u16 r0, d{{.*}}[1]
   %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
   %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
-  store i16 %1, i16* %out_uint16_t, align 2
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
   br label %return
 
 return:                                           ; preds = %entry
@@ -141,13 +144,14 @@ return:                                           ; preds = %entry
 
 define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
 entry:
-; CHECK: vmov.u8 r0, d0[1]
+; CHECK: vmov.u8 r0, d{{.*}}[1]
   %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
   %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
-  store i8 %1, i8* %out_uint8_t, align 1
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
   br label %return
 
 return:                                           ; preds = %entry
@@ -210,3 +214,20 @@ entry:
   %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
   ret <2 x float> %0
 }
+
+; The llvm extractelement instruction does not require that the lane number
+; be an immediate constant.  Make sure a variable lane number is handled.
+
+define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = add <4 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
+	ret i32 %tmp3
+}
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 2488e8a0d0cc..c886125a2fb0 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -2,8 +2,9 @@
 
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
-;CHECK: vld1.8
-	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.8 {d16}, [r0, :64]
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
 	ret <8 x i8> %tmp1
 }
 
@@ -15,6 +16,18 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
 	ret <4 x i16> %tmp1
 }
 
+;Check for a post-increment updating load. 
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+;CHECK: vld1i16_update:
+;CHECK: vld1.16 {d16}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 4
+	       store i16* %tmp2, i16** %ptr
+	ret <4 x i16> %tmp1
+}
+
 define <2 x i32> @vld1i32(i32* %A) nounwind {
 ;CHECK: vld1i32:
 ;CHECK: vld1.32
@@ -23,6 +36,18 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
 	ret <2 x i32> %tmp1
 }
 
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
+;CHECK: vld1i32_update:
+;CHECK: vld1.32 {d16}, [r2], r1
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 %inc
+	store i32* %tmp2, i32** %ptr
+	ret <2 x i32> %tmp1
+}
+
 define <2 x float> @vld1f(float* %A) nounwind {
 ;CHECK: vld1f:
 ;CHECK: vld1.32
@@ -41,16 +66,29 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK: vld1Qi8:
-;CHECK: vld1.8
-	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.8 {d16, d17}, [r0, :64]
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	ret <16 x i8> %tmp1
+}
+
+;Check for a post-increment updating load.
+define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
+;CHECK: vld1Qi8_update:
+;CHECK: vld1.8 {d16, d17}, [r1, :64]!
+	%A = load i8** %ptr
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 16
+	store i8* %tmp2, i8** %ptr
 	ret <16 x i8> %tmp1
 }
 
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
-;CHECK: vld1.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.16 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 1)
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
 	ret <8 x i16> %tmp1
 }
 
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 811f6e6db96f..29b379465db5 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -13,8 +13,9 @@
 
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
-;CHECK: vld2.8
-	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.8 {d16, d17}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -23,9 +24,10 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
-;CHECK: vld2.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.16 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -54,11 +56,27 @@ define <2 x float> @vld2f(float* %A) nounwind {
 	ret <2 x float> %tmp4
 }
 
+;Check for a post-increment updating load. 
+define <2 x float> @vld2f_update(float** %ptr) nounwind {
+;CHECK: vld2f_update:
+;CHECK: vld2.32 {d16, d17}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+	%tmp4 = fadd <2 x float> %tmp2, %tmp3
+	%tmp5 = getelementptr float* %A, i32 4
+	store float* %tmp5, float** %ptr
+	ret <2 x float> %tmp4
+}
+
 define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;CHECK: vld2i64:
-;CHECK: vld1.64
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.64 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -67,19 +85,35 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 
 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;CHECK: vld2Qi8:
-;CHECK: vld2.8
-	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld2Qi8_update:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
         %tmp4 = add <16 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
 	ret <16 x i8> %tmp4
 }
 
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;CHECK: vld2Qi16:
-;CHECK: vld2.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -88,9 +122,10 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;CHECK: vld2Qi32:
-;CHECK: vld2.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
         %tmp4 = add <4 x i32> %tmp2, %tmp3
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 92538c34f5b8..dde530f6df1f 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -13,8 +13,9 @@
 
 define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK: vld3i8:
-;CHECK: vld3.8
-	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -32,6 +33,21 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 	ret <4 x i16> %tmp4
 }
 
+;Check for a post-increment updating load with register increment.
+define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
+;CHECK: vld3i16_update:
+;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp5, i16** %ptr
+	ret <4 x i16> %tmp4
+}
+
 define <2 x i32> @vld3i32(i32* %A) nounwind {
 ;CHECK: vld3i32:
 ;CHECK: vld3.32
@@ -56,9 +72,10 @@ define <2 x float> @vld3f(float* %A) nounwind {
 
 define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;CHECK: vld3i64:
-;CHECK: vld1.64
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -67,9 +84,10 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK: vld3Qi8:
-;CHECK: vld3.8
-;CHECK: vld3.8
-	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -100,6 +118,22 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
 	ret <4 x i32> %tmp4
 }
 
+;Check for a post-increment updating load. 
+define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
+;CHECK: vld3Qi32_update:
+;CHECK: vld3.32 {d16, d18, d20}, [r1]!
+;CHECK: vld3.32 {d17, d19, d21}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i32> %tmp2, %tmp3
+	%tmp5 = getelementptr i32* %A, i32 12
+	store i32* %tmp5, i32** %ptr
+	ret <4 x i32> %tmp4
+}
+
 define <4 x float> @vld3Qf(float* %A) nounwind {
 ;CHECK: vld3Qf:
 ;CHECK: vld3.32
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index d1bf957ebadc..59a73db3187e 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -13,19 +13,35 @@
 
 define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK: vld4i8:
-;CHECK: vld4.8
-	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
 	ret <8 x i8> %tmp4
 }
 
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld4i8_update:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
+	ret <8 x i8> %tmp4
+}
+
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
-;CHECK: vld4.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -34,9 +50,10 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
-;CHECK: vld4.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -56,9 +73,10 @@ define <2 x float> @vld4f(float* %A) nounwind {
 
 define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;CHECK: vld4i64:
-;CHECK: vld1.64
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -67,9 +85,10 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK: vld4Qi8:
-;CHECK: vld4.8
-;CHECK: vld4.8
-	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 1)
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
         %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -78,8 +97,9 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 
 define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;CHECK: vld4Qi16:
-;CHECK: vld4.16
-;CHECK: vld4.16
+;Check for no alignment specifier.
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
@@ -88,6 +108,22 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 	ret <8 x i16> %tmp4
 }
 
+;Check for a post-increment updating load. 
+define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
+;CHECK: vld4Qi16_update:
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 32
+	store i16* %tmp5, i16** %ptr
+	ret <8 x i16> %tmp4
+}
+
 define <4 x i32> @vld4Qi32(i32* %A) nounwind {
 ;CHECK: vld4Qi32:
 ;CHECK: vld4.32
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
new file mode 100644
index 000000000000..d0e9ac3ad3c4
--- /dev/null
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -0,0 +1,212 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1dupi8(i8* %A) nounwind {
+;CHECK: vld1dupi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1dupi16(i16* %A) nounwind {
+;CHECK: vld1dupi16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[]}, [r0, :16]
+	%tmp1 = load i16* %A, align 8
+	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
+	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1dupi32(i32* %A) nounwind {
+;CHECK: vld1dupi32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[]}, [r0, :32]
+	%tmp1 = load i32* %A, align 8
+	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1dupf(float* %A) nounwind {
+;CHECK: vld1dupf:
+;CHECK: vld1.32 {d16[]}, [r0]
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
+;CHECK: vld1dupQi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[], d17[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+        ret <16 x i8> %tmp3
+}
+
+define <4 x float> @vld1dupQf(float* %A) nounwind {
+;CHECK: vld1dupQf:
+;CHECK: vld1.32 {d16[], d17[]}, [r0]
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
+
+define <8 x i8> @vld2dupi8(i8* %A) nounwind {
+;CHECK: vld2dupi8:
+;Check the (default) alignment value.
+;CHECK: vld2.8 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+        %tmp5 = add <8 x i8> %tmp2, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+;CHECK: vld2dupi16:
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld2.16 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp5 = add <4 x i16> %tmp2, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+;Check for a post-increment updating load. 
+define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld2dupi16_update:
+;CHECK: vld2.16 {d16[], d17[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = add <4 x i16> %tmp2, %tmp4
+	%tmp6 = getelementptr i16* %A, i32 2
+	store i16* %tmp6, i16** %ptr
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+;CHECK: vld2dupi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp5 = add <2 x i32> %tmp2, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld3dupi8_update:
+;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
+	%A = load i8** %ptr
+	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 2
+	%tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp7 = add <8 x i8> %tmp2, %tmp4
+	%tmp8 = add <8 x i8> %tmp7, %tmp6
+	%tmp9 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp9, i8** %ptr
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+;CHECK: vld3dupi16:
+;Check the (default) alignment value. VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp7 = add <4 x i16> %tmp2, %tmp4
+        %tmp8 = add <4 x i16> %tmp7, %tmp6
+        ret <4 x i16> %tmp8
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld4dupi16_update:
+;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+	%tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp9 = add <4 x i16> %tmp2, %tmp4
+	%tmp10 = add <4 x i16> %tmp6, %tmp8
+	%tmp11 = add <4 x i16> %tmp9, %tmp10
+	%tmp12 = getelementptr i16* %A, i32 4
+	store i16* %tmp12, i16** %ptr
+	ret <4 x i16> %tmp11
+}
+
+define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+;CHECK: vld4dupi32:
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2
+	%tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3
+	%tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp9 = add <2 x i32> %tmp2, %tmp4
+        %tmp10 = add <2 x i32> %tmp6, %tmp8
+        %tmp11 = add <2 x i32> %tmp9, %tmp10
+        ret <2 x i32> %tmp11
+}
+
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 31ee64fa598f..770ed071ac12 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -1,5 +1,80 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld1lanei8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld1lanef:
+;CHECK: vld1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = load float* %A, align 4
+	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vld1laneQi8:
+;CHECK: vld1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld1laneQi16:
+;CHECK: vld1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld1laneQi32:
+;CHECK: vld1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld1laneQf:
+;CHECK: vld1.32 {d16[0]}, [r0]
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = load float* %A
+	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
+	ret <4 x float> %tmp3
+}
+
 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
 %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
@@ -11,9 +86,10 @@
 
 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld2lanei8:
-;CHECK: vld2.8
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
 	%tmp1 = load <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -22,10 +98,11 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2lanei16:
-;CHECK: vld2.16
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -44,6 +121,22 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 	ret <2 x i32> %tmp5
 }
 
+;Check for a post-increment updating load.
+define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32_update:
+;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+	%tmp5 = add <2 x i32> %tmp3, %tmp4
+	%tmp6 = getelementptr i32* %A, i32 2
+	store i32* %tmp6, i32** %ptr
+	ret <2 x i32> %tmp5
+}
+
 define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld2lanef:
 ;CHECK: vld2.32
@@ -58,10 +151,11 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 
 define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld2laneQi16:
-;CHECK: vld2.16
+;Check the (default) alignment.
+;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -70,10 +164,11 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
-;CHECK: vld2.32
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -125,10 +220,11 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld3lanei16:
-;CHECK: vld3.16
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
@@ -167,10 +263,11 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 
 define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld3laneQi16:
-;CHECK: vld3.16
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -179,6 +276,24 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 	ret <8 x i16> %tmp7
 }
 
+;Check for a post-increment updating load with register increment.
+define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vld3laneQi16_update:
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+	%tmp6 = add <8 x i16> %tmp3, %tmp4
+	%tmp7 = add <8 x i16> %tmp5, %tmp6
+	%tmp8 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp8, i16** %ptr
+	ret <8 x i16> %tmp7
+}
+
 define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld3laneQi32:
 ;CHECK: vld3.32
@@ -227,9 +342,10 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
-;CHECK: vld4.8
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
 	%tmp1 = load <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -240,12 +356,33 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 	ret <8 x i8> %tmp9
 }
 
+;Check for a post-increment updating load.
+define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8_update:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+	%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = add <8 x i8> %tmp3, %tmp4
+	%tmp8 = add <8 x i8> %tmp5, %tmp6
+	%tmp9 = add <8 x i8> %tmp7, %tmp8
+	%tmp10 = getelementptr i8* %A, i32 4
+	store i8* %tmp10, i8** %ptr
+	ret <8 x i8> %tmp9
+}
+
 define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld4lanei16:
-;CHECK: vld4.16
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
@@ -258,10 +395,12 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 
 define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
-;CHECK: vld4.32
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
@@ -290,10 +429,11 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
-;CHECK: vld4.16
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
@@ -306,10 +446,11 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld4laneQi32:
-;CHECK: vld4.32
+;Check the (default) alignment.
+;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
@@ -344,3 +485,22 @@ declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x flo
 declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
 declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
 declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
+; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
+; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
+; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
+define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
+;CHECK: test_qqqq_regsequence_subreg
+;CHECK: vld3.16
+  %tmp63 = extractvalue [6 x i64] %b, 5
+  %tmp64 = zext i64 %tmp63 to i128
+  %tmp65 = shl i128 %tmp64, 64
+  %ins67 = or i128 %tmp65, 0
+  %tmp78 = bitcast i128 %ins67 to <8 x i16>
+  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 8cd94576b0c2..a86be32bd203 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -2,169 +2,169 @@
 
 define <8 x i8> @v_movi8() nounwind {
 ;CHECK: v_movi8:
-;CHECK: vmov.i8 d0, #0x8
+;CHECK: vmov.i8 d{{.*}}, #0x8
 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <4 x i16> @v_movi16a() nounwind {
 ;CHECK: v_movi16a:
-;CHECK: vmov.i16 d0, #0x10
+;CHECK: vmov.i16 d{{.*}}, #0x10
 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
 }
 
 define <4 x i16> @v_movi16b() nounwind {
 ;CHECK: v_movi16b:
-;CHECK: vmov.i16 d0, #0x1000
+;CHECK: vmov.i16 d{{.*}}, #0x1000
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i16> @v_mvni16a() nounwind {
 ;CHECK: v_mvni16a:
-;CHECK: vmvn.i16 d0, #0x10
+;CHECK: vmvn.i16 d{{.*}}, #0x10
 	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
 }
 
 define <4 x i16> @v_mvni16b() nounwind {
 ;CHECK: v_mvni16b:
-;CHECK: vmvn.i16 d0, #0x1000
+;CHECK: vmvn.i16 d{{.*}}, #0x1000
 	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
 }
 
 define <2 x i32> @v_movi32a() nounwind {
 ;CHECK: v_movi32a:
-;CHECK: vmov.i32 d0, #0x20
+;CHECK: vmov.i32 d{{.*}}, #0x20
 	ret <2 x i32> < i32 32, i32 32 >
 }
 
 define <2 x i32> @v_movi32b() nounwind {
 ;CHECK: v_movi32b:
-;CHECK: vmov.i32 d0, #0x2000
+;CHECK: vmov.i32 d{{.*}}, #0x2000
 	ret <2 x i32> < i32 8192, i32 8192 >
 }
 
 define <2 x i32> @v_movi32c() nounwind {
 ;CHECK: v_movi32c:
-;CHECK: vmov.i32 d0, #0x200000
+;CHECK: vmov.i32 d{{.*}}, #0x200000
 	ret <2 x i32> < i32 2097152, i32 2097152 >
 }
 
 define <2 x i32> @v_movi32d() nounwind {
 ;CHECK: v_movi32d:
-;CHECK: vmov.i32 d0, #0x20000000
+;CHECK: vmov.i32 d{{.*}}, #0x20000000
 	ret <2 x i32> < i32 536870912, i32 536870912 >
 }
 
 define <2 x i32> @v_movi32e() nounwind {
 ;CHECK: v_movi32e:
-;CHECK: vmov.i32 d0, #0x20FF
+;CHECK: vmov.i32 d{{.*}}, #0x20FF
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
 ;CHECK: v_movi32f:
-;CHECK: vmov.i32 d0, #0x20FFFF
+;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
 define <2 x i32> @v_mvni32a() nounwind {
 ;CHECK: v_mvni32a:
-;CHECK: vmvn.i32 d0, #0x20
+;CHECK: vmvn.i32 d{{.*}}, #0x20
 	ret <2 x i32> < i32 4294967263, i32 4294967263 >
 }
 
 define <2 x i32> @v_mvni32b() nounwind {
 ;CHECK: v_mvni32b:
-;CHECK: vmvn.i32 d0, #0x2000
+;CHECK: vmvn.i32 d{{.*}}, #0x2000
 	ret <2 x i32> < i32 4294959103, i32 4294959103 >
 }
 
 define <2 x i32> @v_mvni32c() nounwind {
 ;CHECK: v_mvni32c:
-;CHECK: vmvn.i32 d0, #0x200000
+;CHECK: vmvn.i32 d{{.*}}, #0x200000
 	ret <2 x i32> < i32 4292870143, i32 4292870143 >
 }
 
 define <2 x i32> @v_mvni32d() nounwind {
 ;CHECK: v_mvni32d:
-;CHECK: vmvn.i32 d0, #0x20000000
+;CHECK: vmvn.i32 d{{.*}}, #0x20000000
 	ret <2 x i32> < i32 3758096383, i32 3758096383 >
 }
 
 define <2 x i32> @v_mvni32e() nounwind {
 ;CHECK: v_mvni32e:
-;CHECK: vmvn.i32 d0, #0x20FF
+;CHECK: vmvn.i32 d{{.*}}, #0x20FF
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
 ;CHECK: v_mvni32f:
-;CHECK: vmvn.i32 d0, #0x20FFFF
+;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
-;CHECK: vmov.i64 d0, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
 define <16 x i8> @v_movQi8() nounwind {
 ;CHECK: v_movQi8:
-;CHECK: vmov.i8 q0, #0x8
+;CHECK: vmov.i8 q{{.*}}, #0x8
 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <8 x i16> @v_movQi16a() nounwind {
 ;CHECK: v_movQi16a:
-;CHECK: vmov.i16 q0, #0x10
+;CHECK: vmov.i16 q{{.*}}, #0x10
 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 }
 
 define <8 x i16> @v_movQi16b() nounwind {
 ;CHECK: v_movQi16b:
-;CHECK: vmov.i16 q0, #0x1000
+;CHECK: vmov.i16 q{{.*}}, #0x1000
 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i32> @v_movQi32a() nounwind {
 ;CHECK: v_movQi32a:
-;CHECK: vmov.i32 q0, #0x20
+;CHECK: vmov.i32 q{{.*}}, #0x20
 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
 }
 
 define <4 x i32> @v_movQi32b() nounwind {
 ;CHECK: v_movQi32b:
-;CHECK: vmov.i32 q0, #0x2000
+;CHECK: vmov.i32 q{{.*}}, #0x2000
 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
 }
 
 define <4 x i32> @v_movQi32c() nounwind {
 ;CHECK: v_movQi32c:
-;CHECK: vmov.i32 q0, #0x200000
+;CHECK: vmov.i32 q{{.*}}, #0x200000
 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
 }
 
 define <4 x i32> @v_movQi32d() nounwind {
 ;CHECK: v_movQi32d:
-;CHECK: vmov.i32 q0, #0x20000000
+;CHECK: vmov.i32 q{{.*}}, #0x20000000
 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
 }
 
 define <4 x i32> @v_movQi32e() nounwind {
 ;CHECK: v_movQi32e:
-;CHECK: vmov.i32 q0, #0x20FF
+;CHECK: vmov.i32 q{{.*}}, #0x20FF
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
 ;CHECK: v_movQi32f:
-;CHECK: vmov.i32 q0, #0x20FFFF
+;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
 ;CHECK: v_movQi64:
-;CHECK: vmov.i64 q0, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
@@ -173,7 +173,7 @@ define <2 x i64> @v_movQi64() nounwind {
 define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupn128:
-;CHECK: vmov.i8 d0, #0x80
+;CHECK: vmov.i8 d{{.*}}, #0x80
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
   ret void
@@ -182,7 +182,7 @@ entry:
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupnneg75:
-;CHECK: vmov.i8 d0, #0xB5
+;CHECK: vmov.i8 d{{.*}}, #0xB5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
   ret void
@@ -343,3 +343,13 @@ declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
 declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
+
+; Truncating vector stores are not supported.  The following should not crash.
+; Radar 8598391.
+define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
+;CHECK: vmovn
+  %tmp1 = load <4 x i32>* %a, align 16
+  %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+  store <4 x i16> %tmp2, <4 x i16>* %b, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 5383425018f8..ee033caa00d0 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -267,3 +267,75 @@ entry:
 }
 
 declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+
+
+; Radar 8687140
+; VMULL needs to recognize BUILD_VECTORs with sign/zero-extended elements.
+
+define <8 x i16> @vmull_extvec_s8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_s8
+; CHECK: vmull.s8
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_extvec_u8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_u8
+; CHECK: vmull.u8
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_s8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_s8
+; CHECK: vmovl.s8
+; CHECK: vmul.i16
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_u8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_u8
+; CHECK: vmovl.u8
+; CHECK: vmul.i16
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_s16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_s16
+; CHECK: vmull.s16
+  %tmp3 = sext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
+  ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_u16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_u16
+; CHECK: vmull.u16
+  %tmp3 = zext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_s32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_s32
+; CHECK: vmull.s32
+  %tmp3 = sext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_u32
+; CHECK: vmull.u32
+  %tmp3 = zext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
+  ret <2 x i64> %tmp4
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index e1fe64b02d9d..f0f9e4e339b4 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -129,3 +129,21 @@ define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 	ret <8 x i16> %tmp2
 }
+
+; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
+
+define void @test_with_vcombine(<4 x float>* %v) nounwind {
+;CHECK: test_with_vcombine:
+;CHECK-NOT: vext
+;CHECK: vrev64.32
+  %tmp1 = load <4 x float>* %v, align 16
+  %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
+  %tmp3 = extractelement <2 x double> %tmp2, i32 0
+  %tmp4 = bitcast double %tmp3 to <2 x float>
+  %tmp5 = extractelement <2 x double> %tmp2, i32 1
+  %tmp6 = bitcast double %tmp5 to <2 x float>
+  %tmp7 = fadd <2 x float> %tmp6, %tmp6
+  %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x float> %tmp8, <4 x float>* %v, align 16
+  ret void
+}
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 2b535ada3072..364d44b7116f 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -2,9 +2,10 @@
 
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst1i8:
-;CHECK: vst1.8
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst1.8 {d16}, [r0, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
 }
 
@@ -35,6 +36,19 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
+;CHECK: vst1f_update:
+;CHECK: vst1.32 {d16}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 2
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
 define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1i64:
 ;CHECK: vst1.64
@@ -46,18 +60,33 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 
 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1Qi8:
-;CHECK: vst1.8
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.8 {d16, d17}, [r0, :64]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
 }
 
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
-;CHECK: vst1.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst1Qi16_update:
+;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index aed15fd51c56..915a84b67767 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -2,18 +2,32 @@
 
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2i8:
-;CHECK: vst2.8
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.8 {d16, d17}, [r0, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst2i8_update:
+;CHECK: vst2.8 {d16, d17}, [r1], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
 	ret void
 }
 
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
-;CHECK: vst2.16
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.16 {d16, d17}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
 	ret void
 }
 
@@ -37,36 +51,53 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64:
-;CHECK: vst1.64
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.64 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64_update:
+;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+	%A = load i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+	%tmp2 = getelementptr i64* %A, i32 2
+	store i64* %tmp2, i64** %ptr
 	ret void
 }
 
 define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst2Qi8:
-;CHECK: vst2.8
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
 }
 
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2Qi16:
-;CHECK: vst2.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
 	ret void
 }
 
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2Qi32:
-;CHECK: vst2.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
 	ret void
 }
 
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 1feaed5a1044..d262303bc60e 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -2,9 +2,11 @@
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
-;CHECK: vst3.8
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
 }
 
@@ -26,6 +28,19 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32_update:
+;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 6
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
 define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3f:
 ;CHECK: vst3.32
@@ -37,19 +52,23 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst3i64:
-;CHECK: vst1.64
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
 	ret void
 }
 
 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst3Qi8:
-;CHECK: vst3.8
-;CHECK: vst3.8
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
 }
 
@@ -63,6 +82,20 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16_update:
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 24
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
 define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3Qi32:
 ;CHECK: vst3.32
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index d302f097fc1f..e94acb66bf2e 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -2,27 +2,42 @@
 
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4i8:
-;CHECK: vst4.8
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst4i8_update:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
 	ret void
 }
 
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
-;CHECK: vst4.16
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
 	ret void
 }
 
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
-;CHECK: vst4.32
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
 	ret void
 }
 
@@ -37,26 +52,29 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst4i64:
-;CHECK: vst1.64
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
 	ret void
 }
 
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst4Qi8:
-;CHECK: vst4.8
-;CHECK: vst4.8
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
 }
 
 define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4Qi16:
-;CHECK: vst4.16
-;CHECK: vst4.16
+;Check for no alignment specifier.
+;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
@@ -83,6 +101,20 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf_update:
+;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
+;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 16
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
 declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 30ec52ac6420..6cc052bbeb1c 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -1,19 +1,109 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8:
+;Check the (default) alignment.
+;CHECK: vst1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+        %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+        %tmp2 = extractelement <4 x i16> %tmp1, i32 2
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+        %tmp2 = extractelement <2 x i32> %tmp1, i32 1
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1lanef:
+;CHECK: vst1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+        %tmp2 = extractelement <2 x float> %tmp1, i32 1
+        store float %tmp2, float* %A
+	ret void
+}
+
+define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1laneQi8:
+;CHECK: vst1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+        %tmp2 = extractelement <16 x i8> %tmp1, i32 9
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1laneQi16:
+;CHECK: vst1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+        %tmp2 = extractelement <8 x i16> %tmp1, i32 5
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32:
+;CHECK: vst1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+        %tmp2 = extractelement <4 x i32> %tmp1, i32 3
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1laneQf:
+;CHECK: vst1.32 {d17[1]}, [r0]
+	%tmp1 = load <4 x float>* %B
+        %tmp2 = extractelement <4 x float> %tmp1, i32 3
+        store float %tmp2, float* %A
+	ret void
+}
+
 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2lanei8:
-;CHECK: vst2.8
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
 }
 
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2lanei16:
-;CHECK: vst2.16
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst2lanei16_update:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
 	ret void
 }
 
@@ -37,19 +127,21 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2laneQi16:
-;CHECK: vst2.16
+;Check the (default) alignment.
+;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
 	ret void
 }
 
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2laneQi32:
-;CHECK: vst2.32
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
 	ret void
 }
 
@@ -81,10 +173,11 @@ define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3lanei16:
-;CHECK: vst3.16
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
 
@@ -108,10 +201,11 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3laneQi16:
-;CHECK: vst3.16
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 1)
+	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
 	ret void
 }
 
@@ -124,6 +218,19 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 	ret void
 }
 
+;Check for a post-increment updating store.
+define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32_update:
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 3
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
 define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3laneQf:
 ;CHECK: vst3.32
@@ -145,9 +252,22 @@ declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x f
 
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8:
-;CHECK: vst4.8
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8_update:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 4
+	store i8* %tmp2, i8** %ptr
 	ret void
 }
 
@@ -162,10 +282,11 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4lanei32:
-;CHECK: vst4.32
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
 	ret void
 }
 
@@ -180,16 +301,18 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4laneQi16:
-;CHECK: vst4.16
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 1)
+	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
 	ret void
 }
 
 define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4laneQi32:
-;CHECK: vst4.32
+;Check the (default) alignment.
+;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
diff --git a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
index cf3f0b90037d..4590f1245ae9 100644
--- a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
index 45d53c8c975f..401399face9a 100644
--- a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll
index 743292a58d59..7410b724d6fc 100644
--- a/test/CodeGen/CellSPU/arg_ret.ll
+++ b/test/CodeGen/CellSPU/arg_ret.ll
@@ -26,7 +26,8 @@ define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
 
 define ccc %paramstruct @test_return( i32 %param,  %paramstruct %prm )
 {
-;CHECK:  lqd	$75, 80($sp)
+;CHECK:  lqd	{{\$[0-9]+}}, 80($sp)
+;CHECK-NOT:	ori	{{\$[0-9]+, \$[0-9]+, 0}}
 ;CHECK:  lr    $3, $4
   ret %paramstruct %prm
 }
diff --git a/test/CodeGen/CellSPU/div_ops.ll b/test/CodeGen/CellSPU/div_ops.ll
new file mode 100644
index 000000000000..0c93d83ca76d
--- /dev/null
+++ b/test/CodeGen/CellSPU/div_ops.ll
@@ -0,0 +1,22 @@
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; signed division rounds towards zero, rotma don't.
+define i32 @sdivide (i32 %val )
+{
+; CHECK: rotmai
+; CHECK: rotmi
+; CHECK: a
+; CHECK: rotmai
+; CHECK: bi $lr
+   %rv = sdiv i32 %val, 4
+   ret i32 %rv
+}
+
+define i32 @udivide (i32 %val )
+{
+; CHECK: rotmi
+; CHECK: bi $lr
+   %rv = udiv i32 %val, 4
+   ret i32 %rv
+}
+
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index f07fe6fdab28..c14fd7ba4a46 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,9 +1,4 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep fceq  %t1.s | count 1
-; RUN: grep fcmeq %t1.s | count 1
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
 
 ; Exercise the floating point comparison operators for f32:
 
@@ -11,13 +6,31 @@ declare double @fabs(double)
 declare float @fabsf(float)
 
 define i1 @fcmp_eq(float %arg1, float %arg2) {
+; CHECK: fceq
+; CHECK: bi $lr
         %A = fcmp oeq float %arg1,  %arg2
         ret i1 %A
 }
 
 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
+; CHECK: fcmeq
+; CHECK: bi $lr
         %1 = call float @fabsf(float %arg1)
         %2 = call float @fabsf(float %arg2)
         %3 = fcmp oeq float %1, %2
         ret i1 %3
 }
+
+define i1 @test_ogt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ogt float %a, %b
+	ret i1 %cmp
+}
+
+define i1 @test_ugt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ugt float %a, %b
+	ret i1 %cmp
+}
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
index 119f526847ce..8e48f0b52c17 100644
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilhu  %t1.s | count 8
-; RUN: grep iohl  %t1.s | count 6
+; RUN: grep ilhu  %t1.s | count 9
+; RUN: grep iohl  %t1.s | count 7
 ; RUN: grep -w il    %t1.s | count 3
 ; RUN: grep 16429 %t1.s | count 1
 ; RUN: grep 63572 %t1.s | count 1
@@ -12,6 +12,7 @@
 ; RUN: grep 49077 %t1.s | count 1
 ; RUN: grep  1267 %t1.s | count 2
 ; RUN: grep 16309 %t1.s | count 1
+; RUN: cat %t1.s | FileCheck %s
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
@@ -31,6 +32,16 @@ define i32 @test_4() {
   ret i32 -512                  ;; IL via pattern
 }
 
+define i32 @test_5()
+{
+;CHECK: test_5:
+;CHECK-NOT: ila $3, 40000
+;CHECK: ilhu
+;CHECK: iohl
+;CHECK: bi $lr
+  ret i32 400000
+}
+
 ;; double             float       floatval
 ;; 0x4005bf0a80000000 0x402d|f854 2.718282
 define float @float_const_1() {
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
index d40217dacfea..03d7ad1153a1 100644
--- a/test/CodeGen/CellSPU/loads.ll
+++ b/test/CodeGen/CellSPU/loads.ll
@@ -38,3 +38,15 @@ define <4 x float> @load_undef(){
 	%val = load <4 x float>* undef
 	ret <4 x float> %val
 }
+
+;check that 'misaligned' loads that may span two memory chunks
+;have two loads. Don't check for the bitmanipulation, as that 
+;might change with improved algorithms or scheduling 
+define i32 @load_misaligned( i32* %ptr ){
+;CHECK: load_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: bi $lr
+  %rv = load i32* %ptr, align 2
+  ret i32 %rv
+}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index a504c002ae12..e1172089c703 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 85
+; RUN: grep rot          %t1.s | count 86
 ; RUN: grep roth         %t1.s | count 8
 ; RUN: grep roti.*5      %t1.s | count 1
 ; RUN: grep roti.*27     %t1.s | count 1
@@ -8,6 +8,7 @@
 ; RUN grep rothi.*,.3    %t1.s | count 1
 ; RUN: grep andhi        %t1.s | count 4
 ; RUN: grep shlhi        %t1.s | count 4
+; RUN: cat %t1.s | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -158,3 +159,14 @@ define i8 @rotri8(i8 %A) {
         %D = or i8 %B, %C               ; <i8> [#uses=1]
         ret i8 %D
 }
+
+define <2 x float> @test1(<4 x float> %param )
+{
+; CHECK: test1
+; CHECK: rotqbyi
+  %el = extractelement <4 x float> %param, i32 1
+  %vec1 = insertelement <1 x float> undef, float %el, i32 0
+  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
+; CHECK: bi $lr
+  ret <2 x float> %rv
+} 
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
index 0c0b3599b110..6ae9aa51202f 100644
--- a/test/CodeGen/CellSPU/sext128.ll
+++ b/test/CodeGen/CellSPU/sext128.ll
@@ -12,8 +12,9 @@ entry:
 ; CHECK: 	long	269488144
 ; CHECK:	long	66051
 ; CHECK: 	long	67438087
-; CHECK: 	rotmai
+; CHECK-NOT: rotqmbyi
 ; CHECK:	lqa
+; CHECK: 	rotmai
 ; CHECK:	shufb
 }
 
@@ -25,8 +26,9 @@ entry:
 ; CHECK: 	long	269488144
 ; CHECK: 	long	269488144
 ; CHECK:	long	66051
-; CHECK: 	rotmai
+; CHECK-NOT: rotqmbyi
 ; CHECK:	lqa
+; CHECK: 	rotmai
 ; CHECK:	shufb
 }
 
@@ -39,9 +41,31 @@ entry:
 ; CHECK: 	long	269488144
 ; CHECK: 	long	269488144
 ; CHECK:	long	66051
-; CHECK: 	rotmai
+; CHECK-NOT: rotqmbyi
 ; CHECK:	lqa
+; CHECK: 	rotmai
 ; CHECK:	shufb
 }
 
 declare i32 @myfunc(float)
+
+define i128 @func1(i8 %u) {
+entry:
+; CHECK: xsbh
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i8 %u to i128
+      ret i128 %0
+}
+
+define i128 @func2(i16 %u) {
+entry:
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i16 %u to i128
+      ret i128 %0
+}
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 0264fc830ea8..92390abf9465 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -4,17 +4,18 @@
 ; RUN: grep {shl	}  %t1.s | count 9
 ; RUN: grep {shli	}  %t1.s | count 3
 ; RUN: grep {xshw	}  %t1.s | count 5
-; RUN: grep {and	}  %t1.s | count 5
+; RUN: grep {and	}  %t1.s | count 14
 ; RUN: grep {andi	}  %t1.s | count 2
 ; RUN: grep {rotmi	}  %t1.s | count 2
 ; RUN: grep {rotqmbyi	}  %t1.s | count 1
 ; RUN: grep {rotqmbii	}  %t1.s | count 2
 ; RUN: grep {rotqmby	}  %t1.s | count 1
-; RUN: grep {rotqmbi	}  %t1.s | count 1
+; RUN: grep {rotqmbi	}  %t1.s | count 2
 ; RUN: grep {rotqbyi	}  %t1.s | count 1
 ; RUN: grep {rotqbii	}  %t1.s | count 2
 ; RUN: grep {rotqbybi	}  %t1.s | count 1
-; RUN: grep {sfi	}  %t1.s | count 3
+; RUN: grep {sfi	}  %t1.s | count 4
+; RUN: cat %t1.s | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
@@ -281,3 +282,14 @@ define i32 @hi32_i64(i64 %arg) {
 	%2 = trunc i64 %1 to i32
 	ret i32 %2
 }
+
+; some random tests
+define i128 @test_lshr_i128( i128 %val ) {
+ 	;CHECK: test_lshr_i128
+	;CHECK: sfi
+	;CHECK: rotqmbi
+	;CHECK: rotqmbybi
+	;CHECK: bi $lr
+	%rv = lshr i128 %val, 64
+	ret i128 %rv
+}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
index f37d2ae89b00..c88a258c26c7 100644
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -1,4 +1,4 @@
-; RUN: llc --march=cellspu < %s | FileCheck %s
+; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
 
 define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
   ; CHECK: cwd {{\$.}}, 0($sp)
@@ -39,3 +39,29 @@ define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
   ret <4 x float> %rv
 }
 
+define <2 x i32> @test_v2i32(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
+  ret <2 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 8
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 2,i32 3,i32 0, i32 1>
+  ret <4 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 1,i32 2,i32 3, i32 0>
+  ret <4 x i32> %rv
+}
+
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index 05f44f4be046..7e0bf06b4e45 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -14,6 +14,7 @@
 ; RUN: grep iohl                %t1.s | count 8
 ; RUN: grep shufb               %t1.s | count 15
 ; RUN: grep frds                %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
 
 ; ModuleID = 'stores.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -149,3 +150,24 @@ entry:
 	store float %conv, float* %dest
 	ret float %conv
 }
+
+;Check stores that might span two 16 byte memory blocks
+define void @store_misaligned( i32 %val, i32* %ptr) {	
+;CHECK: store_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: stqd
+;CHECK: stqd
+;CHECK: bi $lr
+	store i32 %val, i32*%ptr, align 2
+	ret void
+}
+
+define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
+{
+;CHECK: stq
+;CHECK: stq
+;CHECK: bi $lr
+	store <8 x float> %val, <8 x float>* %ptr
+	ret void
+}
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
index b81c0cdbb299..efd032031002 100644
--- a/test/CodeGen/CellSPU/v2f32.ll
+++ b/test/CodeGen/CellSPU/v2f32.ll
@@ -62,8 +62,7 @@ define %vec @test_insert(){
 }
 
 define void @test_unaligned_store()  {
-;CHECK:	cdd	$3, 8($3)
-;CHECK: 	lqd	
+;CHECK:	cdd
 ;CHECK:	shufb
 ;CHECK:	stqd
   %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
index dd51be5a71d2..71d4aba63332 100644
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -37,9 +37,8 @@ define %vec @test_mul(%vec %param)
 }
 
 define <2 x i32> @test_splat(i32 %param ) {
-;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the 
-;     somewhat redundant: 
-;CHECK-NOT or $3, $3, $3
+;see svn log for why this is here...
+;CHECK-NOT: or $3, $3, $3
 ;CHECK: lqa
 ;CHECK: shufb
   %sv = insertelement <1 x i32> undef, i32 %param, i32 0 
@@ -62,3 +61,17 @@ define void @test_store( %vec %val, %vec* %ptr)
   store %vec %val, %vec* %ptr
   ret void
 }
+
+;Alignment of <2 x i32> is not *directly* defined in the ABI
+;It probably is safe to interpret it as an array, thus having 8 byte
+;alignment (according to ABI). This tests that the size of
+;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
+;two arrays
+define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
+{
+; CHECK-NOT:	ai	$3, $3, 16
+; CHECK:	ai	$3, $3, 8
+; CHECK:	bi	$lr
+   %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
+   ret <2 x i32>* %rv
+}
diff --git a/test/CodeGen/Generic/2010-11-04-BigByval.ll b/test/CodeGen/Generic/2010-11-04-BigByval.ll
new file mode 100644
index 000000000000..df2ca4c18a05
--- /dev/null
+++ b/test/CodeGen/Generic/2010-11-04-BigByval.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR7170
+
+%big = type [131072 x i8]
+
+declare void @foo(%big* byval align 1)
+
+define void @bar(%big* byval align 1 %x) {
+  call void @foo(%big* byval align 1 %x)
+  ret void
+}
diff --git a/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll b/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
new file mode 100644
index 000000000000..05fdf4c7449b
--- /dev/null
+++ b/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+; PR8582
+
+define void @uint82() nounwind {
+entry:
+  %tmp3 = select i1 undef, i960 4872657003430991806293355221650511486142000513558154090491761976385142772940676648094983476628187266917101386048750715027104076737938178423519545241493072038894065019132638919037781494702597609951702322267198307200588774905587225212622510286498675097141625012190497682454879271766334636032, i960 0
+  br i1 undef, label %for.body25.for.body25_crit_edge, label %if.end
+
+for.body25.for.body25_crit_edge:                  ; preds = %entry
+  %ins = or i960 %tmp3, undef
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/Generic/2011-02-12-shuffle.ll b/test/CodeGen/Generic/2011-02-12-shuffle.ll
new file mode 100644
index 000000000000..b4d56d193ca3
--- /dev/null
+++ b/test/CodeGen/Generic/2011-02-12-shuffle.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; PR9165
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @m_387() nounwind {
+entry:
+  br i1 undef, label %if.end, label %UnifiedReturnBlock
+
+if.end:                                           ; preds = %entry
+  %tmp1067 = load <16 x i32> addrspace(1)* null, align 64
+  %tmp1082 = shufflevector         <16 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 26, i32 5, i32 6, i32 undef, i32 8, i32 9, i32 31, i32 30, i32 12, i32 undef, i32 undef, i32 undef>
+  
+  %tmp1100 = shufflevector         <16 x i32> %tmp1082, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 18, i32 8, i32 9, i32 10, i32 11, i32 12, i32 25, i32 undef, i32 17>
+  
+  %tmp1112 = shufflevector         <16 x i32> %tmp1100, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 24, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 18, i32 15>
+  
+  store <16 x i32> %tmp1112, <16 x i32> addrspace(1)* undef, align 64
+  
+  ret void
+
+UnifiedReturnBlock:                               ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/Generic/add-with-overflow-128.ll b/test/CodeGen/Generic/add-with-overflow-128.ll
index c46c820a7907..33f44d6e4436 100644
--- a/test/CodeGen/Generic/add-with-overflow-128.ll
+++ b/test/CodeGen/Generic/add-with-overflow-128.ll
@@ -3,22 +3,7 @@
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
 
-define i1 @func1(i128 signext %v1, i128 signext %v2) nounwind {
-entry:
-  %t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
-  %sum = extractvalue {i128, i1} %t, 0
-  %sum32 = trunc i128 %sum to i32
-  %obit = extractvalue {i128, i1} %t, 1
-  br i1 %obit, label %overflow, label %normal
-
-normal:
-  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
-  ret i1 true
 
-overflow:
-  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
-  ret i1 false
-}
 
 define i1 @func2(i128 zeroext %v1, i128 zeroext %v2) nounwind {
 entry:
@@ -38,5 +23,12 @@ carry:
 }
 
 declare i32 @printf(i8*, ...) nounwind
-declare {i128, i1} @llvm.sadd.with.overflow.i128(i128, i128)
+declare {i96, i1} @llvm.sadd.with.overflow.i96(i96, i96)
 declare {i128, i1} @llvm.uadd.with.overflow.i128(i128, i128)
+
+define i1 @func1(i96 signext %v1, i96 signext %v2) nounwind {
+entry:
+  %t = call {i96, i1} @llvm.sadd.with.overflow.i96(i96 %v1, i96 %v2)
+  %obit = extractvalue {i96, i1} %t, 1
+  ret i1 %obit
+}
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index 7218565617fc..042739884df7 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -6,3 +6,35 @@
 @tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
 inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
 
+
+; rdar://8878965
+
+%struct.CAMERA = type { [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], double, double, i32, double, double, i32, double, i32* }
+
+define void @Parse_Camera(%struct.CAMERA** nocapture %Camera_Ptr) nounwind {
+entry:
+%.pre = load %struct.CAMERA** %Camera_Ptr, align 4
+%0 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
+%1 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
+br label %bb32
+
+bb32:                                             ; preds = %bb6
+%2 = load double* %0, align 4
+%3 = load double* %1, align 4
+%4 = load double* %0, align 4
+call void @Parse_Vector(double* %0) nounwind
+%5 = call i32 @llvm.objectsize.i32(i8* undef, i1 false)
+%6 = icmp eq i32 %5, -1
+br i1 %6, label %bb34, label %bb33
+
+bb33:                                             ; preds = %bb32
+unreachable
+
+bb34:                                             ; preds = %bb32
+unreachable
+
+}
+
+declare void @Parse_Vector(double*)
+declare i32 @llvm.objectsize.i32(i8*, i1)
+
diff --git a/test/CodeGen/Generic/overflow.ll b/test/CodeGen/Generic/overflow.ll
new file mode 100644
index 000000000000..4196855c4ee7
--- /dev/null
+++ b/test/CodeGen/Generic/overflow.ll
@@ -0,0 +1,220 @@
+; RUN: llc < %s
+; Verify codegen's don't crash on overflow intrinsics.
+
+;; SADD
+
+define zeroext i8 @sadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %sadd, 1
+  %sadd.result = extractvalue { i8, i1 } %sadd, 0
+  %X = select i1 %cmp, i8 %sadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @sadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %sadd, 1
+  %sadd.result = extractvalue { i16, i1 } %sadd, 0
+  %X = select i1 %cmp, i16 %sadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @sadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %sadd, 1
+  %sadd.result = extractvalue { i32, i1 } %sadd, 0
+  %X = select i1 %cmp, i32 %sadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; UADD
+
+define zeroext i8 @uadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %uadd, 1
+  %uadd.result = extractvalue { i8, i1 } %uadd, 0
+  %X = select i1 %cmp, i8 %uadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @uadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %uadd, 1
+  %uadd.result = extractvalue { i16, i1 } %uadd, 0
+  %X = select i1 %cmp, i16 %uadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @uadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %uadd.result = extractvalue { i32, i1 } %uadd, 0
+  %X = select i1 %cmp, i32 %uadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; ssub
+
+define zeroext i8 @ssub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %ssub, 1
+  %ssub.result = extractvalue { i8, i1 } %ssub, 0
+  %X = select i1 %cmp, i8 %ssub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @ssub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %ssub, 1
+  %ssub.result = extractvalue { i16, i1 } %ssub, 0
+  %X = select i1 %cmp, i16 %ssub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.ssub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @ssub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %ssub, 1
+  %ssub.result = extractvalue { i32, i1 } %ssub, 0
+  %X = select i1 %cmp, i32 %ssub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; usub
+
+define zeroext i8 @usub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %usub, 1
+  %usub.result = extractvalue { i8, i1 } %usub, 0
+  %X = select i1 %cmp, i8 %usub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @usub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %usub, 1
+  %usub.result = extractvalue { i16, i1 } %usub, 0
+  %X = select i1 %cmp, i16 %usub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @usub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %usub, 1
+  %usub.result = extractvalue { i32, i1 } %usub, 0
+  %X = select i1 %cmp, i32 %usub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; smul
+
+define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 %smul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %smul, 1
+  %smul.result = extractvalue { i16, i1 } %smul, 0
+  %X = select i1 %cmp, i16 %smul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @smul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %smul, 1
+  %smul.result = extractvalue { i32, i1 } %smul, 0
+  %X = select i1 %cmp, i32 %smul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; umul
+
+define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 %umul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %umul, 1
+  %umul.result = extractvalue { i16, i1 } %umul, 0
+  %X = select i1 %cmp, i16 %umul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @umul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %umul, 1
+  %umul.result = extractvalue { i32, i1 } %umul, 0
+  %X = select i1 %cmp, i32 %umul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
diff --git a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
index 854352a70111..d8970eac9007 100644
--- a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/MBlaze/brind.ll b/test/CodeGen/MBlaze/brind.ll
index 7798e0f56aac..2229a873827a 100644
--- a/test/CodeGen/MBlaze/brind.ll
+++ b/test/CodeGen/MBlaze/brind.ll
@@ -28,32 +28,31 @@ loop:
                              label %L3,
                              label %L4,
                              label %L5 ]
-    ; CHECK:        br {{r[0-9]*}}
+    ; CHECK:        brad {{r[0-9]*}}
 
 L1:
     %tmp.1 = add i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L2:
     %tmp.2 = sub i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L3:
     %tmp.3 = mul i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L4:
     %tmp.4 = sdiv i32 %a, %b
     br label %finish
-    ; CHECK:        br
+    ; CHECK:        brid
 
 L5:
     %tmp.5 = srem i32 %a, %b
     br label %finish
-    ; CHECK:        br
 
 finish:
     %tmp.6 = phi i32 [ %tmp.1, %L1 ],
@@ -69,5 +68,5 @@ finish:
     %tmp.8 = urem i32 %tmp.7, 5
 
     br label %loop
-    ; CHECK:        br
+    ; CHECK:        brad {{r[0-9]*}}
 }
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
index aaa918ffc343..b1eb22aee9fd 100644
--- a/test/CodeGen/MBlaze/cc.ll
+++ b/test/CodeGen/MBlaze/cc.ll
@@ -12,7 +12,7 @@ declare i32 @printf(i8*, ...)
 define void @params0_noret() {
     ; CHECK:        params0_noret:
     ret void
-    ; CHECK-NOT:    {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
 }
@@ -20,81 +20,88 @@ define void @params0_noret() {
 define i8 @params0_8bitret() {
     ; CHECK:        params0_8bitret:
     ret i8 1
-    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
 }
 
 define i16 @params0_16bitret() {
     ; CHECK:        params0_16bitret:
     ret i16 1
+    ; CHECK:        rtsd
     ; CHECK:        {{.* r3, r0, 1}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
 }
 
 define i32 @params0_32bitret() {
     ; CHECK:        params0_32bitret:
     ret i32 1
-    ; CHECK:        {{.* r3, r0, 1}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
 }
 
 define i64 @params0_64bitret() {
     ; CHECK:        params0_64bitret:
     ret i64 1
     ; CHECK:        {{.* r3, r0, .*}}
-    ; CHECK:        {{.* r4, r0, 1}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r4, r0, 1}}
 }
 
 define i32 @params1_32bitret(i32 %a) {
     ; CHECK:        params1_32bitret:
     ret i32 %a
-    ; CHECK:        {{.* r3, r5, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r5, r0}}
 }
 
 define i32 @params2_32bitret(i32 %a, i32 %b) {
     ; CHECK:        params2_32bitret:
     ret i32 %b
-    ; CHECK:        {{.* r3, r6, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r6, r0}}
 }
 
 define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) {
     ; CHECK:        params3_32bitret:
     ret i32 %c
-    ; CHECK:        {{.* r3, r7, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r7, r0}}
 }
 
 define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) {
     ; CHECK:        params4_32bitret:
     ret i32 %d
-    ; CHECK:        {{.* r3, r8, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r8, r0}}
 }
 
 define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
     ; CHECK:        params5_32bitret:
     ret i32 %e
-    ; CHECK:        {{.* r3, r9, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r9, r0}}
 }
 
 define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
     ; CHECK:        params6_32bitret:
     ret i32 %f
-    ; CHECK:        {{.* r3, r10, r0}}
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
     ; CHECK-NOT:    {{.* r4, .*, .*}}
     ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r10, r0}}
 }
 
 define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
@@ -142,53 +149,29 @@ define void @testing() {
     %tmp.1 = call i8 @params0_8bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.2 = call i16 @params0_16bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.3 = call i32 @params0_32bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.4 = call i64 @params0_64bitret()
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK:        {{.* r7, r4, r0}}
-    ; CHECK:        brlid
 
     %tmp.5 = call i32 @params1_32bitret(i32 1)
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2)
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3)
     ; CHECK:        {{.* r5, .*, .*}}
@@ -196,10 +179,6 @@ define void @testing() {
     ; CHECK:        {{.* r7, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4)
     ; CHECK:        {{.* r5, .*, .*}}
@@ -208,10 +187,6 @@ define void @testing() {
     ; CHECK:        {{.* r8, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5)
     ; CHECK:        {{.* r5, .*, .*}}
@@ -221,10 +196,6 @@ define void @testing() {
     ; CHECK:        {{.* r9, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6)
@@ -236,10 +207,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7)
@@ -252,10 +219,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8)
@@ -269,10 +232,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8, i32 9)
@@ -287,10 +246,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                           i32 6, i32 7, i32 8, i32 9, i32 10)
@@ -306,10 +261,6 @@ define void @testing() {
     ; CHECK:        {{.* r10, .*, .*}}
     ; CHECK:        brlid
     call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, r3, r0}}
-    ; CHECK-NOT:    {{.* r7, .*, .*}}
-    ; CHECK:        brlid
 
     ret void
 }
diff --git a/test/CodeGen/MBlaze/fpu.ll b/test/CodeGen/MBlaze/fpu.ll
index 83f4d831241d..2aef4fd64105 100644
--- a/test/CodeGen/MBlaze/fpu.ll
+++ b/test/CodeGen/MBlaze/fpu.ll
@@ -10,14 +10,14 @@ define float @test_add(float %a, float %b) {
     ; FPU:        test_add:
 
     %tmp.1 = fadd float %a, %b
-    ; FUN-NOT:    fadd
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        fadd
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    fadd
+    ; FPU-NEXT:   fadd
 }
 
 define float @test_sub(float %a, float %b) {
@@ -25,14 +25,14 @@ define float @test_sub(float %a, float %b) {
     ; FPU:        test_sub:
 
     %tmp.1 = fsub float %a, %b
-    ; FUN-NOT:    frsub
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        frsub
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    frsub
+    ; FPU-NEXT:   frsub
 }
 
 define float @test_mul(float %a, float %b) {
@@ -40,14 +40,14 @@ define float @test_mul(float %a, float %b) {
     ; FPU:        test_mul:
 
     %tmp.1 = fmul float %a, %b
-    ; FUN-NOT:    fmul
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        fmul
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    fmul
+    ; FPU-NEXT:   fmul
 }
 
 define float @test_div(float %a, float %b) {
@@ -55,12 +55,12 @@ define float @test_div(float %a, float %b) {
     ; FPU:        test_div:
 
     %tmp.1 = fdiv float %a, %b
-    ; FUN-NOT:    fdiv
     ; FUN:        brlid
     ; FPU-NOT:    brlid
-    ; FPU:        fdiv
 
     ret float %tmp.1
     ; FUN:        rtsd
     ; FPU:        rtsd
+    ; FUN-NOT:    fdiv
+    ; FPU-NEXT:   fdiv
 }
diff --git a/test/CodeGen/MBlaze/imm.ll b/test/CodeGen/MBlaze/imm.ll
index 85fad175b77f..6effd3e09a24 100644
--- a/test/CodeGen/MBlaze/imm.ll
+++ b/test/CodeGen/MBlaze/imm.ll
@@ -7,22 +7,22 @@
 
 define i8 @retimm_i8() {
     ; CHECK:        retimm_i8:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
     ; FPU:          retimm_i8:
-    ; FPU:          add
-    ; FPU-NEXT:     rtsd
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
     ret i8 123
 }
 
 define i16 @retimm_i16() {
     ; CHECK:        retimm_i16:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
     ; FPU:          retimm_i16:
-    ; FPU:          add
-    ; FPU-NEXT:     rtsd
-    ret i16 38212
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
+    ret i16 31212
 }
 
 define i32 @retimm_i32() {
@@ -38,12 +38,12 @@ define i32 @retimm_i32() {
 define i64 @retimm_i64() {
     ; CHECK:        retimm_i64:
     ; CHECK:        add
-    ; CHECK-NEXT:   add
     ; CHECK-NEXT:   rtsd
+    ; CHECK-NEXT:   add
     ; FPU:          retimm_i64:
     ; FPU:          add
-    ; FPU-NEXT:     add
     ; FPU-NEXT:     rtsd
+    ; FPU-NEXT:     add
     ret i64 94581823
 }
 
@@ -53,7 +53,7 @@ define float @retimm_float() {
     ; CHECK-NEXT:   rtsd
     ; FPU:          retimm_float:
     ; FPU:          or
-    ; FPU:          rtsd
+    ; FPU-NEXT:     rtsd
     ret float 12.0
 }
 
diff --git a/test/CodeGen/MBlaze/intr.ll b/test/CodeGen/MBlaze/intr.ll
new file mode 100644
index 000000000000..79c6bffd00cb
--- /dev/null
+++ b/test/CodeGen/MBlaze/intr.ll
@@ -0,0 +1,48 @@
+; Ensure that the MBlaze interrupt_handler calling convention (cc73) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+@_interrupt_handler = alias void ()* @myintr
+
+define cc73 void @myintr() nounwind noinline {
+  ; CHECK:        myintr:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK:        mfs   r11, rmsr
+  ; CHECK:        swi   r11, r1
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtid  r14, 0
+}
+
+  ; CHECK:    .globl  _interrupt_handler
+  ; CHECK:    _interrupt_handler = myintr
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/MBlaze/jumptable.ll b/test/CodeGen/MBlaze/jumptable.ll
index 3f27c12f19a3..299084d0ed23 100644
--- a/test/CodeGen/MBlaze/jumptable.ll
+++ b/test/CodeGen/MBlaze/jumptable.ll
@@ -18,8 +18,8 @@ define i32 @jmptable(i32 %arg)
                                       i32 8, label %L8
                                       i32 9, label %L9 ]
 
-    ; CHECK:        lw [[REG:r[0-9]*]]
-    ; CHECK:        br [[REG]]
+    ; CHECK:        lw   [[REG:r[0-9]*]]
+    ; CHECK:        brad [[REG]]
 L0:
     %var0 = add i32 %arg, 0
     br label %DONE
diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll
index b473020e6641..8973f75aa1dc 100644
--- a/test/CodeGen/MBlaze/loop.ll
+++ b/test/CodeGen/MBlaze/loop.ll
@@ -27,11 +27,10 @@ loop_inner:
 
 loop_inner_finish:
     %inner.5 = add i32 %inner.2, 1
-    ; CHECK:        addi {{.*, 1}}
-
     call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
                                  i32 %inner.0, i32 %inner.1, i32 %inner.2 )
     ; CHECK:        brlid
+    ; CHECK:        addik {{.*, 1}}
 
     %inner.6 = icmp eq i32 %inner.5, 100
     ; CHECK:        cmp
diff --git a/test/CodeGen/MBlaze/mul.ll b/test/CodeGen/MBlaze/mul.ll
index 65d3e22a3e74..cefdb8d56f21 100644
--- a/test/CodeGen/MBlaze/mul.ll
+++ b/test/CodeGen/MBlaze/mul.ll
@@ -13,11 +13,11 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; FUN-NOT:    mul
     ; FUN:        brlid
     ; MUL-NOT:    brlid
-    ; MUL:        mul
 
     ret i8 %tmp.1
     ; FUN:        rtsd
     ; MUL:        rtsd
+    ; MUL:        mul
 }
 
 define i16 @test_i16(i16 %a, i16 %b) {
@@ -28,11 +28,11 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; FUN-NOT:    mul
     ; FUN:        brlid
     ; MUL-NOT:    brlid
-    ; MUL:        mul
 
     ret i16 %tmp.1
     ; FUN:        rtsd
     ; MUL:        rtsd
+    ; MUL:        mul
 }
 
 define i32 @test_i32(i32 %a, i32 %b) {
@@ -43,9 +43,9 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; FUN-NOT:    mul
     ; FUN:        brlid
     ; MUL-NOT:    brlid
-    ; MUL:        mul
 
     ret i32 %tmp.1
     ; FUN:        rtsd
     ; MUL:        rtsd
+    ; MUL:        mul
 }
diff --git a/test/CodeGen/MBlaze/shift.ll b/test/CodeGen/MBlaze/shift.ll
index 186115ec192b..99f0519c020c 100644
--- a/test/CodeGen/MBlaze/shift.ll
+++ b/test/CodeGen/MBlaze/shift.ll
@@ -10,17 +10,16 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; SHT:        test_i8:
 
     %tmp.1 = shl i8 %a, %b
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
-    ; SHT-NOT:    andi
     ; SHT-NOT:    bnei
-    ; SHT:        bsll
 
     ret i8 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
 }
 
 define i8 @testc_i8(i8 %a, i8 %b) {
@@ -28,18 +27,18 @@ define i8 @testc_i8(i8 %a, i8 %b) {
     ; SHT:        testc_i8:
 
     %tmp.1 = shl i8 %a, 5
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    add
     ; SHT-NOT:    bnei
-    ; SHT:        bslli
 
     ret i8 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
 }
 
 define i16 @test_i16(i16 %a, i16 %b) {
@@ -47,17 +46,16 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; SHT:        test_i16:
 
     %tmp.1 = shl i16 %a, %b
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
-    ; SHT-NOT:    andi
     ; SHT-NOT:    bnei
-    ; SHT:        bsll
 
     ret i16 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
 }
 
 define i16 @testc_i16(i16 %a, i16 %b) {
@@ -65,18 +63,18 @@ define i16 @testc_i16(i16 %a, i16 %b) {
     ; SHT:        testc_i16:
 
     %tmp.1 = shl i16 %a, 5
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    add
     ; SHT-NOT:    bnei
-    ; SHT:        bslli
 
     ret i16 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
 }
 
 define i32 @test_i32(i32 %a, i32 %b) {
@@ -84,17 +82,17 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; SHT:        test_i32:
 
     %tmp.1 = shl i32 %a, %b
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    bnei
-    ; SHT:        bsll
 
     ret i32 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
 }
 
 define i32 @testc_i32(i32 %a, i32 %b) {
@@ -102,16 +100,16 @@ define i32 @testc_i32(i32 %a, i32 %b) {
     ; SHT:        testc_i32:
 
     %tmp.1 = shl i32 %a, 5
-    ; FUN-NOT:    bsll
     ; FUN:        andi
     ; FUN:        add
     ; FUN:        bnei
     ; SHT-NOT:    andi
     ; SHT-NOT:    add
     ; SHT-NOT:    bnei
-    ; SHT:        bslli
 
     ret i32 %tmp.1
     ; FUN:        rtsd
     ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
 }
diff --git a/test/CodeGen/MBlaze/svol.ll b/test/CodeGen/MBlaze/svol.ll
new file mode 100644
index 000000000000..c1e96202845a
--- /dev/null
+++ b/test/CodeGen/MBlaze/svol.ll
@@ -0,0 +1,80 @@
+; Ensure that the MBlaze save_volatiles calling convention (cc74) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+
+define cc74 void @mysvol() nounwind noinline {
+  ; CHECK:        mysvol:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+}
+
+define cc74 void @mysvol2() nounwind noinline {
+  ; CHECK:        mysvol2:
+  ; CHECK-NOT:    swi   r3, r1
+  ; CHECK-NOT:    swi   r4, r1
+  ; CHECK-NOT:    swi   r5, r1
+  ; CHECK-NOT:    swi   r6, r1
+  ; CHECK-NOT:    swi   r7, r1
+  ; CHECK-NOT:    swi   r8, r1
+  ; CHECK-NOT:    swi   r9, r1
+  ; CHECK-NOT:    swi   r10, r1
+  ; CHECK-NOT:    swi   r11, r1
+  ; CHECK-NOT:    swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+entry:
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK-NOT:    lwi   r12, r1
+  ; CHECK-NOT:    lwi   r11, r1
+  ; CHECK-NOT:    lwi   r10, r1
+  ; CHECK-NOT:    lwi   r9, r1
+  ; CHECK-NOT:    lwi   r8, r1
+  ; CHECK-NOT:    lwi   r7, r1
+  ; CHECK-NOT:    lwi   r6, r1
+  ; CHECK-NOT:    lwi   r5, r1
+  ; CHECK-NOT:    lwi   r4, r1
+  ; CHECK-NOT:    lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+  ret void
+}
+
+  ; CHECK-NOT:    .globl  _interrupt_handler
+  ; CHECK-NOT:    _interrupt_handler = mysvol
+  ; CHECK-NOT:    _interrupt_handler = mysvol2
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
index 8de044cf48ba..9d549da8a93a 100644
--- a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
new file mode 100644
index 000000000000..342afed66053
--- /dev/null
+++ b/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=msp430
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430"
+
+@mout0 = common global i16 0, align 2
+@min1 = common global i16 0, align 2
+@marray = common global [2 x i16] zeroinitializer, align 2
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i16* @mout0, i16* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r,X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i16* @min1, align 2
+  call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|m"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index bda4a3172f30..c3db6387aff3 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -7,8 +7,8 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
-@.str = internal constant [10 x i8] c"AAAAAAAAA\00"
-@i0 = internal constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] 
+@.str = internal unnamed_addr constant [10 x i8] c"AAAAAAAAA\00"
+@i0 = internal unnamed_addr constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] 
 
 define i8* @foo() nounwind {
 entry:
diff --git a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
index 4161c1d686e6..994e19af4f87 100644
--- a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll
index 8b7f9a919378..891b5d9e1884 100644
--- a/test/CodeGen/Mips/2010-07-20-Select.ll
+++ b/test/CodeGen/Mips/2010-07-20-Select.ll
@@ -9,12 +9,12 @@ entry:
   volatile store i32 0, i32* %c, align 4
   %0 = volatile load i32* %a, align 4             ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
-; CHECK: addiu $4, $zero, 3
+; CHECK: addiu $3, $zero, 0
   %iftmp.0.0 = select i1 %1, i32 3, i32 0         ; <i32> [#uses=1]
   %2 = volatile load i32* %c, align 4             ; <i32> [#uses=1]
   %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
-; CHECK: addu $4, $zero, $3
-; CHECK: addu $2, $5, $4
+; CHECK: addiu $3, $zero, 3
+; CHECK: addu $2, $5, $3
   %iftmp.2.0 = select i1 %3, i32 0, i32 5         ; <i32> [#uses=1]
   %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0         ; <i32> [#uses=1]
   ret i32 %4
diff --git a/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
new file mode 100644
index 000000000000..d592fef331af
--- /dev/null
+++ b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: clz $2, $4
+define i32 @t1(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+; CHECK: clz $2, $4
+define i32 @t2(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t3(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t4(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
diff --git a/test/CodeGen/Mips/2010-11-09-Mul.ll b/test/CodeGen/Mips/2010-11-09-Mul.ll
new file mode 100644
index 000000000000..65a10b5836cc
--- /dev/null
+++ b/test/CodeGen/Mips/2010-11-09-Mul.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: mul $2, $5, $4
+define i32 @mul1(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul i32 %b, %a
+  ret i32 %mul
+}
+
+; CHECK: mul $2, $5, $4
+define i32 @mul2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul nsw i32 %b, %a
+  ret i32 %mul
+}
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
new file mode 100755
index 000000000000..7d3e0252e3c9
--- /dev/null
+++ b/test/CodeGen/Mips/cmov.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+@i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
+@i3 = common global i32* null, align 4
+
+; CHECK:  lw  $3, %got(i3)($gp)
+; CHECK:  addiu $5, $gp, %got(i1)
+define i32* @cmov1(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32** @i3, align 4
+  %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
+  ret i32* %cond
+}
+
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
new file mode 100644
index 000000000000..4a205b1f3ffb
--- /dev/null
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: madd $5, $4
+define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = sext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: maddu $5, $4
+define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %a to i64
+  %conv2 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = zext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: madd $5, $4
+define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %add = add nsw i64 %mul, %c
+  ret i64 %add
+}
+
+; CHECK: msub $5, $4
+define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %c to i64
+  %conv2 = sext i32 %a to i64
+  %conv4 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msubu $5, $4
+define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %c to i64
+  %conv2 = zext i32 %a to i64
+  %conv4 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msub $5, $4
+define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv3 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv3, %conv
+  %sub = sub nsw i64 %c, %mul
+  ret i64 %sub
+}
diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll
new file mode 100644
index 000000000000..b6df62be6603
--- /dev/null
+++ b/test/CodeGen/Mips/o32_cc.ll
@@ -0,0 +1,325 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; FIXME: Disabled because it unpredictably fails on certain platforms.
+; REQUIRES: disabled
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall0() nounwind {
+entry:
+  tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f0(double, double)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall1() nounwind {
+entry:
+  tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f1(float, float)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall2() nounwind {
+entry:
+  tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f2(float, double)
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall3() nounwind {
+entry:
+  tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f3(double, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 12
+; CHECK: addiu $5, $zero, 13
+; CHECK: addiu $6, $zero, 14
+; CHECK: addiu $7, $zero, 15
+define void @testlowercall4() nounwind {
+entry:
+  tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
+  ret void
+}
+
+declare void @f4(i32, i32, i32, i32)
+
+; $f12, $6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 23
+define void @testlowercall5() nounwind {
+entry:
+  tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
+  ret void
+}
+
+declare void @f5(double, i32, double)
+
+; $f12, $6, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 33
+; CHECK: addiu $7, $zero, 24
+define void @testlowercall6() nounwind {
+entry:
+  tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
+  ret void
+}
+
+declare void @f6(double, i32, i32)
+
+; $f12, $5, $6
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 43
+; CHECK: addiu $6, $zero, 34
+define void @testlowercall7() nounwind {
+entry:
+  tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
+  ret void
+}
+
+declare void @f7(float, i32, i32)
+
+; $4, $5, $6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: addiu $4, $zero, 22
+; CHECK: addiu $5, $zero, 53
+; CHECK: addiu $6, $zero, 44
+define void @testlowercall8() nounwind {
+entry:
+  tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
+  ret void
+}
+
+declare void @f8(i32, i32, i32, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 32
+; CHECK: addiu $5, $zero, 63
+; CHECK: addiu $6, $zero, 54
+; CHECK: ori $7, $2, 0
+define void @testlowercall9() nounwind {
+entry:
+  tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
+  ret void
+}
+
+declare void @f9(i32, i32, i32, float)
+
+; $4, $5, ($6, $7)
+; CHECK: addiu $4, $zero, 42
+; CHECK: addiu $5, $zero, 73
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $2, 0
+define void @testlowercall10() nounwind {
+entry:
+  tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
+  ret void
+}
+
+declare void @f10(i32, i32, double)
+
+; $4, ($6, $7)
+; CHECK: addiu $4, $zero, 52
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $2, 0
+define void @testlowercall11() nounwind {
+entry:
+  tail call void @f11(i32 52, double 1.600000e+01) nounwind
+  ret void
+}
+
+declare void @f11(i32, double)
+
+; $f12, $f14, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6, $4, 0
+; CHECK: ori $7, $5, 0
+define void @testlowercall12() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  ret void
+}
+
+declare void @f12(float, float, float, float)
+
+; $f12, $5, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6, $3, 0
+; CHECK: addiu $7, $zero, 25
+define void @testlowercall13() nounwind {
+entry:
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  ret void
+}
+
+
+declare void @f13(float, i32, float, i32)
+
+; $f12, $f14, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $7, $4, 0
+define void @testlowercall14() nounwind {
+entry:
+  tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
+  ret void
+}
+
+declare void @f14(double, float, float)
+
+; $f12, $f14, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $4, 32768
+define void @testlowercall15() nounwind {
+entry:
+  tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
+  ret void
+}
+
+declare void @f15(float, float, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 62
+; CHECK: ori $5, $2, 0
+; CHECK: addiu $6, $zero, 64
+; CHECK: ori $7, $3, 0
+define void @testlowercall16() nounwind {
+entry:
+  tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
+  ret void
+}
+
+declare void @f16(i32, float, i32, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 72
+; CHECK: ori $5, $2, 0
+; CHECK: addiu $6, $zero, 74
+; CHECK: addiu $7, $zero, 35
+define void @testlowercall17() nounwind {
+entry:
+  tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
+  ret void
+}
+
+declare void @f17(i32, float, i32, i32)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6, $2, 0
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall18() nounwind {
+entry:
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
+
+declare void @f18(i32, i32, float, i32)
+
+
+; $4, ($6, $7), stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: addiu $4, $zero, 92
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $3, 0
+define void @testlowercall20() nounwind {
+entry:
+  tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
+  ret void
+}
+
+declare void @f20(i32, double, double)
+
+; $f12, $5
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 103
+define void @testlowercall21() nounwind {
+entry:
+  tail call void @f21(float 5.800000e+01, i32 103) nounwind
+  ret void
+}
+
+declare void @f21(float, i32)
+
+; $f12, $5, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 113
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $3, 32768
+define void @testlowercall22() nounwind {
+entry:
+  tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
+  ret void
+}
+
+declare void @f22(float, i32, double)
+
+; $f12, f6
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 123
+define void @testlowercall23() nounwind {
+entry:
+  tail call void @f23(double 4.500000e+01, i32 123) nounwind
+  ret void
+}
+
+declare void @f23(double, i32)
+
+; $f12,$6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 133
+define void @testlowercall24() nounwind {
+entry:
+  tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
+  ret void
+}
+
+declare void @f24(double, i32, double)
+
+; CHECK: lwc1 $f12, %lo
+; lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6, $4, 0
+; CHECK: ori $7, $5, 0
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6, $3, 0
+; CHECK: addiu $7, $zero, 25
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6, $2, 0
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall25() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
new file mode 100644
index 000000000000..e7dc30932144
--- /dev/null
+++ b/test/CodeGen/Mips/rotate.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK:  rotrv $2, $4, $2
+define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shl = shl i32 %a, %b
+  %sub = sub i32 32, %b
+  %shr = lshr i32 %a, %sub
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 22
+define i32 @rot1(i32 %a) nounwind readnone {
+entry:
+  %shl = shl i32 %a, 10
+  %shr = lshr i32 %a, 22
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotrv $2, $4, $5
+define i32 @rot2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, %b
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 10
+define i32 @rot3(i32 %a) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, 10
+  %shl = shl i32 %a, 22
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
diff --git a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
deleted file mode 100644
index 5b5e11f2df0c..000000000000
--- a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=pic16 | FileCheck %s
-; XFAIL: vg_leak
-
-target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-f32:32:32"
-target triple = "pic16-"
-@i = global i32 -10, align 1		; <i32*> [#uses=1]
-@j = global i32 -20, align 1		; <i32*> [#uses=1]
-@pc = global i8* inttoptr (i64 160 to i8*), align 1		; <i8**> [#uses=3]
-@main.auto.k = internal global i32 0		; <i32*> [#uses=2]
-
-define void @main() nounwind {
-entry:
-	%tmp = load i32* @i		; <i32> [#uses=1]
-	%tmp1 = load i32* @j		; <i32> [#uses=1]
-	%add = add i32 %tmp, %tmp1		; <i32> [#uses=1]
-	store i32 %add, i32* @main.auto.k
-	%tmp2 = load i32* @main.auto.k		; <i32> [#uses=1]
-	%add3 = add i32 %tmp2, 32		; <i32> [#uses=1]
-	%conv = trunc i32 %add3 to i8		; <i8> [#uses=1]
-	%tmp4 = load i8** @pc		; <i8*> [#uses=1]
-	store i8 %conv, i8* %tmp4
-	%tmp5 = load i8** @pc		; <i8*> [#uses=1]
-	%tmp6 = load i8* %tmp5		; <i8> [#uses=1]
-	%conv7 = sext i8 %tmp6 to i16		; <i16> [#uses=1]
-	%sub = sub i16 %conv7, 1		; <i16> [#uses=1]
-	%conv8 = trunc i16 %sub to i8		; <i8> [#uses=1]
-	%tmp9 = load i8** @pc		; <i8*> [#uses=1]
-	store i8 %conv8, i8* %tmp9
-	ret void
-}
-
-; CHECK: movf @i + 0, W
diff --git a/test/CodeGen/PIC16/2009-11-20-NewNode.ll b/test/CodeGen/PIC16/2009-11-20-NewNode.ll
deleted file mode 100644
index d68f0f41c4a5..000000000000
--- a/test/CodeGen/PIC16/2009-11-20-NewNode.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc -march=pic16 < %s
-; PR5558
-
-define i64 @_strtoll_r(i16 %base) nounwind {
-entry:
-  br i1 undef, label %if.then, label %if.end27
-
-if.then:                                          ; preds = %do.end
-  br label %if.end27
-
-if.end27:                                         ; preds = %if.then, %do.end
-  %cond66 = select i1 undef, i64 -9223372036854775808, i64 9223372036854775807 ; <i64> [#uses=3]
-  %conv69 = sext i16 %base to i64                 ; <i64> [#uses=1]
-  %div = udiv i64 %cond66, %conv69                ; <i64> [#uses=1]
-  br label %for.cond
-
-for.cond:                                         ; preds = %if.end116, %if.end27
-  br i1 undef, label %if.then152, label %if.then93
-
-if.then93:                                        ; preds = %for.cond
-  br i1 undef, label %if.end116, label %if.then152
-
-if.end116:                                        ; preds = %if.then93
-  %cmp123 = icmp ugt i64 undef, %div              ; <i1> [#uses=1]
-  %or.cond = or i1 undef, %cmp123                 ; <i1> [#uses=0]
-  br label %for.cond
-
-if.then152:                                       ; preds = %if.then93, %for.cond
-  br i1 undef, label %if.end182, label %if.then172
-
-if.then172:                                       ; preds = %if.then152
-  ret i64 %cond66
-
-if.end182:                                        ; preds = %if.then152
-  ret i64 %cond66
-}
diff --git a/test/CodeGen/PIC16/C16-11.ll b/test/CodeGen/PIC16/C16-11.ll
deleted file mode 100644
index 8a5a0ac11f75..000000000000
--- a/test/CodeGen/PIC16/C16-11.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=pic16
-; XFAIL: *
-; This fails because PIC16 doesn't define a (xor reg, reg) pattern.
-; 
-
-@c612.auto.a.b = internal global i1 false         ; <i1*> [#uses=2]
-@c612.auto.A.b = internal global i1 false         ; <i1*> [#uses=2]
-
-define void @c612() nounwind {
-entry:
-  %tmp3.b = load i1* @c612.auto.a.b               ; <i1> [#uses=1]
-  %tmp3 = zext i1 %tmp3.b to i16                  ; <i16> [#uses=1]
-  %tmp4.b = load i1* @c612.auto.A.b               ; <i1> [#uses=1]
-  %tmp4 = select i1 %tmp4.b, i16 2, i16 0         ; <i16> [#uses=1]
-  %cmp5 = icmp ne i16 %tmp3, %tmp4                ; <i1> [#uses=1]
-  %conv7 = zext i1 %cmp5 to i8                    ; <i8> [#uses=1]
-  tail call void @expectWrap(i8 %conv7, i8 2)
-  ret void
-}
-
-define void @expectWrap(i8 %boolresult, i8 %errCode) nounwind {
-entry:
-  %tobool = icmp eq i8 %boolresult, 0             ; <i1> [#uses=1]
-  br i1 %tobool, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  tail call void @exit(i16 1)
-  unreachable
-
-if.end:                                           ; preds = %entry
-  ret void
-}
-
-define i16 @main() nounwind {
-entry:
-  tail call void @c612()
-  ret i16 0
-}
-
-declare void @exit(i16) noreturn nounwind
diff --git a/test/CodeGen/PIC16/C16-15.ll b/test/CodeGen/PIC16/C16-15.ll
deleted file mode 100644
index 020b0dd6743e..000000000000
--- a/test/CodeGen/PIC16/C16-15.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: llc < %s -march=pic16 | grep "extern" | grep "@.lib.unordered.f32" | count 3
-; XFAIL: vg_leak
-
-@pc = global i8* inttoptr (i64 160 to i8*), align 1 ; <i8**> [#uses=2]
-@aa = common global i16 0, align 1                ; <i16*> [#uses=0]
-@c6214.auto.d = internal global float 0.000000e+00, align 4 ; <float*> [#uses=1]
-@c6214.auto.l = internal global float 0.000000e+00, align 4 ; <float*> [#uses=1]
-
-define float @dvalue(float %f) nounwind {
-entry:
-  ret float %f
-}
-
-define void @_assert(i16 %line, i16 %result) nounwind {
-entry:
-  %add = add i16 %line, %result                   ; <i16> [#uses=1]
-  %conv = trunc i16 %add to i8                    ; <i8> [#uses=1]
-  %tmp2 = load i8** @pc                           ; <i8*> [#uses=1]
-  store i8 %conv, i8* %tmp2
-  ret void
-}
-
-define i16 @main() nounwind {
-entry:
-  %retval = alloca i16, align 1                   ; <i16*> [#uses=2]
-  store i16 0, i16* %retval
-  call void @c6214()
-  %0 = load i16* %retval                          ; <i16> [#uses=1]
-  ret i16 %0
-}
-
-define internal void @c6214() nounwind {
-entry:
-  %call = call float @dvalue(float 0x3FF3C0CA40000000) ; <float> [#uses=3]
-  store float %call, float* @c6214.auto.d
-  store float %call, float* @c6214.auto.l
-  %cmp = fcmp ord float %call, 0.000000e+00       ; <i1> [#uses=1]
-  %conv = zext i1 %cmp to i16                     ; <i16> [#uses=1]
-  call void @_assert(i16 10, i16 %conv)
-  %tmp3 = load i8** @pc                           ; <i8*> [#uses=2]
-  %tmp4 = load i8* %tmp3                          ; <i8> [#uses=1]
-  %sub = add i8 %tmp4, -10                        ; <i8> [#uses=1]
-  store i8 %sub, i8* %tmp3
-  ret void
-}
diff --git a/test/CodeGen/PIC16/C16-49.ll b/test/CodeGen/PIC16/C16-49.ll
deleted file mode 100644
index e59800b9a926..000000000000
--- a/test/CodeGen/PIC16/C16-49.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llvm-as < %s | llc -march=pic16
-
-@aa = global i16 55, align 1                      ; <i16*> [#uses=1]
-@bb = global i16 44, align 1                      ; <i16*> [#uses=1]
-@PORTD = external global i8                       ; <i8*> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-  %tmp = volatile load i16* @aa                   ; <i16> [#uses=1]
-  %tmp1 = volatile load i16* @bb                  ; <i16> [#uses=1]
-  %sub = sub i16 %tmp, %tmp1                      ; <i16> [#uses=1]
-  %conv = trunc i16 %sub to i8                    ; <i8> [#uses=1]
-  store i8 %conv, i8* @PORTD
-  ret void
-}
diff --git a/test/CodeGen/PIC16/check_inc_files.ll b/test/CodeGen/PIC16/check_inc_files.ll
deleted file mode 100644
index 436d41607374..000000000000
--- a/test/CodeGen/PIC16/check_inc_files.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llvm-as < %s | llc -march=pic16 | FileCheck %s 
-
-;CHECK: #include p16f1xxx.inc
-;CHECK: #include stdmacros.inc
-
-define void @foo() nounwind {
-entry:
-  ret void
-}
diff --git a/test/CodeGen/PIC16/dg.exp b/test/CodeGen/PIC16/dg.exp
deleted file mode 100644
index b08b9858e048..000000000000
--- a/test/CodeGen/PIC16/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PIC16] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PIC16/global-in-user-section.ll b/test/CodeGen/PIC16/global-in-user-section.ll
deleted file mode 100644
index 6cdb64864ad5..000000000000
--- a/test/CodeGen/PIC16/global-in-user-section.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=pic16 | FileCheck %s
-; XFAIL: vg_leak
-
-@G1 = common global i16 0, section "usersection", align 1 
-; CHECK: usersection UDATA
-; CHECK: @G1 RES 2 
diff --git a/test/CodeGen/PIC16/globals.ll b/test/CodeGen/PIC16/globals.ll
deleted file mode 100644
index 3ee2e25265d3..000000000000
--- a/test/CodeGen/PIC16/globals.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=pic16 | FileCheck %s
-; XFAIL: vg_leak
-
-@G1 = global i32 4712, section "Address=412"
-; CHECK: @G1.412..user_section.#	IDATA	412
-; CHECK: @G1
-; CHECK:     dl 4712
-
-@G2 = global i32 0, section "Address=412"
-; CHECK: @G2.412..user_section.#	UDATA	412
-; CHECK: @G2 RES 4
-
-@G3 = addrspace(1) constant i32 4712, section "Address=412"
-; CHECK: @G3.412..user_section.#	ROMDATA	412
-; CHECK: @G3
-; CHECK:     rom_dl 4712
-
-
diff --git a/test/CodeGen/PIC16/result_direction.ll b/test/CodeGen/PIC16/result_direction.ll
deleted file mode 100644
index 8549e21b3333..000000000000
--- a/test/CodeGen/PIC16/result_direction.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as < %s | llc -march=pic16 | FileCheck %s
-
-@a = common global i16 0, align 1                 ; <i16*> [#uses=2]
-
-define void @foo() nounwind {
-entry:
-  %tmp = load i16* @a                             ; <i16> [#uses=1]
-  %add = add nsw i16 %tmp, 1                      ; <i16> [#uses=1]
-  store i16 %add, i16* @a
-;CHECK: movlw 1
-;CHECK: addwf @a + 0, F
-  ret void
-}
diff --git a/test/CodeGen/PIC16/sext.ll b/test/CodeGen/PIC16/sext.ll
deleted file mode 100644
index e51a54287ce6..000000000000
--- a/test/CodeGen/PIC16/sext.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=pic16
-; XFAIL: vg_leak
-
-@main.auto.c = internal global i8 0		; <i8*> [#uses=1]
-
-define i16 @main() nounwind {
-entry:
-	%tmp = load i8* @main.auto.c		; <i8> [#uses=1]
-	%conv = sext i8 %tmp to i16		; <i16> [#uses=1]
-	ret i16 %conv
-}
diff --git a/test/CodeGen/PIC16/test_indf_name.ll b/test/CodeGen/PIC16/test_indf_name.ll
deleted file mode 100644
index d52fc1125d7c..000000000000
--- a/test/CodeGen/PIC16/test_indf_name.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llvm-as < %s | llc -march=pic16 | FileCheck %s
-
-@pi = common global i16* null, align 1            ; <i16**> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-  %tmp = load i16** @pi                           ; <i16*> [#uses=1]
-  store i16 1, i16* %tmp
-; CHECK: movwi {{[0-1]}}[INDF{{[0-1]}}]
-; CHECK: movwi {{[0-1]}}[INDF{{[0-1]}}]
-  ret void
-}
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
new file mode 100644
index 000000000000..1259d03e96c9
--- /dev/null
+++ b/test/CodeGen/PTX/add.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: add.s32 r0, r1, r2;
+	%z = add i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: add.s32 r0, r1, 1;
+	%z = add i32 %x, 1
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PTX/dg.exp b/test/CodeGen/PTX/dg.exp
new file mode 100644
index 000000000000..2c304b57741e
--- /dev/null
+++ b/test/CodeGen/PTX/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PTX] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll
new file mode 100644
index 000000000000..4071babb80ce
--- /dev/null
+++ b/test/CodeGen/PTX/exit.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_kernel void @t1() {
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
+
+define ptx_kernel void @t2(i32* %p, i32 %x) {
+  store i32 %x, i32* %p
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
new file mode 100644
index 000000000000..836c4d41045a
--- /dev/null
+++ b/test/CodeGen/PTX/ld.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .s32 array[];
+@array = external global [10 x i32]
+
+;CHECK: .extern .const .s32 array_constant[];
+@array_constant = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .s32 array_local[];
+@array_local = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .s32 array_shared[];
+@array_shared = external addrspace(4) global [10 x i32]
+
+define ptx_device i32 @t1(i32* %p) {
+entry:
+;CHECK: ld.global.s32 r0, [r1];
+  %x = load i32* %p
+  ret i32 %x
+}
+
+define ptx_device i32 @t2(i32* %p) {
+entry:
+;CHECK: ld.global.s32 r0, [r1+4];
+  %i = getelementptr i32* %p, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t3(i32* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK: add.s32 r0, r1, r0;
+;CHECK: ld.global.s32 r0, [r0];
+  %i = getelementptr i32* %p, i32 %q
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_global() {
+entry:
+;CHECK: ld.global.s32 r0, [array];
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_const() {
+entry:
+;CHECK: ld.const.s32 r0, [array_constant];
+  %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
+  %x = load i32 addrspace(1)* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_local() {
+entry:
+;CHECK: ld.local.s32 r0, [array_local];
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+  %x = load i32 addrspace(2)* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t4_shared() {
+entry:
+;CHECK: ld.shared.s32 r0, [array_shared];
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+  %x = load i32 addrspace(4)* %i
+  ret i32 %x
+}
+
+define ptx_device i32 @t5() {
+entry:
+;CHECK: ld.global.s32 r0, [array+4];
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
new file mode 100644
index 000000000000..c365e9beb897
--- /dev/null
+++ b/test/CodeGen/PTX/mov.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1() {
+; CHECK: mov.s32 r0, 0;
+; CHECK: ret;
+	ret i32 0
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: mov.s32 r0, r1;
+; CHECK: ret;
+	ret i32 %x
+}
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
new file mode 100644
index 000000000000..a14d5c9c27ba
--- /dev/null
+++ b/test/CodeGen/PTX/options.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0"
+; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20"
+
+define ptx_device void @t1() {
+	ret void
+}
diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll
new file mode 100644
index 000000000000..d5037f25fd36
--- /dev/null
+++ b/test/CodeGen/PTX/ret.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device void @t1() {
+; CHECK: ret;
+; CHECK-NOT: exit;
+	ret void
+}
diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll
new file mode 100644
index 000000000000..b564b43ab932
--- /dev/null
+++ b/test/CodeGen/PTX/shl.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shl.b32 r0, r1, r2
+	%z = shl i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shl.b32 r0, r1, 3
+	%z = shl i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shl.b32 r0, 3, r1
+	%z = shl i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll
new file mode 100644
index 000000000000..3f8ade862b75
--- /dev/null
+++ b/test/CodeGen/PTX/shr.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shr.u32 r0, r1, r2
+	%z = lshr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shr.u32 r0, r1, 3
+	%z = lshr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shr.u32 r0, 3, r1
+	%z = lshr i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t4(i32 %x, i32 %y) {
+; CHECK: shr.s32 r0, r1, r2
+	%z = ashr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t5(i32 %x) {
+; CHECK: shr.s32 r0, r1, 3
+	%z = ashr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t6(i32 %x) {
+; CHECK: shr.s32 r0, -3, r1
+	%z = ashr i32 -3, %x
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
new file mode 100644
index 000000000000..2cbacb9ee59c
--- /dev/null
+++ b/test/CodeGen/PTX/st.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .s32 array[];
+@array = external global [10 x i32]
+
+;CHECK: .extern .const .s32 array_constant[];
+@array_constant = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .s32 array_local[];
+@array_local = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .s32 array_shared[];
+@array_shared = external addrspace(4) global [10 x i32]
+
+define ptx_device void @t1(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.s32 [r1], r2;
+  store i32 %x, i32* %p
+  ret void
+}
+
+define ptx_device void @t2(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.s32 [r1+4], r2;
+  %i = getelementptr i32* %p, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
+;CHECK: .reg .s32 r0;
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK: add.s32 r0, r1, r0;
+;CHECK: st.global.s32 [r0], r3;
+  %i = getelementptr i32* %p, i32 %q
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t4_global(i32 %x) {
+entry:
+;CHECK: st.global.s32 [array], r1;
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t4_local(i32 %x) {
+entry:
+;CHECK: st.local.s32 [array_local], r1;
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+  store i32 %x, i32 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared(i32 %x) {
+entry:
+;CHECK: st.shared.s32 [array_shared], r1;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+  store i32 %x, i32 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t5(i32 %x) {
+entry:
+;CHECK: st.global.s32 [array+4], r1;
+  %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
new file mode 100644
index 000000000000..aab3fdadad13
--- /dev/null
+++ b/test/CodeGen/PTX/sub.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+;CHECK: sub.s32 r0, r1, r2;
+	%z = sub i32 %x, %y
+;CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+;CHECK: add.s32 r0, r1, -1;
+	%z = sub i32 %x, 1
+;CHECK: ret;
+	ret i32 %z
+}
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index e93395a67ec6..cca9e658ad5f 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
 
-define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
+define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
         %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
diff --git a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
index f48f32f8fb17..4a850984a909 100644
--- a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll b/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
new file mode 100644
index 000000000000..da77b2878543
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -O0
+; PR8357
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd9.0"
+
+; RegAllocFast requires that each physreg only be used once. The varargs
+; lowering code needs to use virtual registers when storing live-in registers on
+; the stack.
+
+define i32 @testing(i32 %x, float %a, ...) nounwind {
+  %1 = alloca i32, align 4
+  %2 = alloca float, align 4
+  store i32 %x, i32* %1, align 4
+  store float %a, float* %2, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
new file mode 100644
index 000000000000..bf3d577a3677
--- /dev/null
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -0,0 +1,22 @@
+; RUN: llc -disable-fp-elim < %s | FileCheck %s
+; PR8749
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.8"
+
+define i32 @main() nounwind {
+entry:
+; Make sure we're generating references using the red zone
+; CHECK: main:
+; CHECK: stw r3, -12(r1)
+  %retval = alloca i32
+  %0 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 0, i32* %0, align 4
+  %1 = load i32* %0, align 4
+  store i32 %1, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval
+  ret i32 %retval1
+}
diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll
index 109a83726e98..0797ca8d0be8 100644
--- a/test/CodeGen/PowerPC/align.ll
+++ b/test/CodeGen/PowerPC/align.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=powerpc-linux-gnu | FileCheck %s -check-prefix=ELF
 ; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=DARWIN8
 
 @a = global i1 true
 ; no alignment
@@ -40,3 +41,6 @@
 @bar = common global [75 x i8] zeroinitializer, align 128
 ;ELF: .comm bar,75,128
 ;DARWIN: .comm _bar,75,7
+
+;; Darwin8 doesn't support aligned comm.  Just miscompile this.
+; DARWIN8: .comm _bar,75 ;
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index 5ba050060fcb..92d1dbe902a1 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
 ; RUN:   grep {cmpwi cr0, r3, -1}
 
-define i32 @test(i32 %x) {
+define i32 @test(i32 %x) nounwind {
         %c = icmp eq i32 %x, -1
 	br i1 %c, label %T, label %F
 T:
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index ab8d9dca5dc4..5122ab39d232 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -43,8 +43,8 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r4, r4, ha16(Ltmp0-"L0$pb")
-; PIC: li r6, lo16(Ltmp0-"L0$pb")
+; PIC: addis r4, r4, ha16(Ltmp0-L0$pb)
+; PIC: li r6, lo16(Ltmp0-L0$pb)
 ; PIC: add r4, r4, r6
 ; PIC: stw r4
 ; STATIC: li r5, lo16(Ltmp0)
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
new file mode 100644
index 000000000000..659cdf74d026
--- /dev/null
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc32
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
new file mode 100644
index 000000000000..3da06f65db83
--- /dev/null
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
index 59a36555bf86..1bee4e03f1b0 100644
--- a/test/CodeGen/PowerPC/rlwimi2.ll
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -4,7 +4,7 @@
 ; RUN: grep srwi   %t | count 1
 ; RUN: not grep slwi %t
 
-define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) {
+define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) nounwind {
 entry:
 	%tmp.1 = shl i32 %srcA, 15		; <i32> [#uses=1]
 	%tmp.4 = and i32 %tmp.1, 32505856		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
index d1c3f5234a26..1ad558c6abc9 100644
--- a/test/CodeGen/PowerPC/stfiwx.ll
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -6,13 +6,13 @@
 ; RUN: not grep stfiwx %t2
 ; RUN: grep r1 %t2
 
-define void @test(float %a, i32* %b) {
+define void @test(float %a, i32* %b) nounwind {
         %tmp.2 = fptosi float %a to i32         ; <i32> [#uses=1]
         store i32 %tmp.2, i32* %b
         ret void
 }
 
-define void @test2(float %a, i32* %b, i32 %i) {
+define void @test2(float %a, i32* %b, i32 %i) nounwind {
         %tmp.2 = getelementptr i32* %b, i32 1           ; <i32*> [#uses=1]
         %tmp.5 = getelementptr i32* %b, i32 %i          ; <i32*> [#uses=1]
         %tmp.7 = fptosi float %a to i32         ; <i32> [#uses=3]
diff --git a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll b/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
deleted file mode 100644
index 6f103462664f..000000000000
--- a/test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll
+++ /dev/null
@@ -1,585 +0,0 @@
-; RN: llc < %s
-; RUN: false
-; XFAIL: *
-; PR4534
-
-; ModuleID = 'tango.net.ftp.FtpClient.bc'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9.6.0"
-	%"byte[]" = type { i32, i8* }
-@.str167 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
-@.str170 = external constant [11 x i8]		; <[11 x i8]*> [#uses=2]
-@.str171 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
-@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%"byte[]")* @foo to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define fastcc void @foo(%"byte[]" %line_arg) {
-entry:
-	%line_arg830 = extractvalue %"byte[]" %line_arg, 0		; <i32> [#uses=12]
-	%line_arg831 = extractvalue %"byte[]" %line_arg, 1		; <i8*> [#uses=17]
-	%t5 = load i8* %line_arg831		; <i8> [#uses=1]
-	br label %forcondi
-
-forcondi:		; preds = %forbodyi, %entry
-	%l.0i = phi i32 [ 10, %entry ], [ %t4i, %forbodyi ]		; <i32> [#uses=2]
-	%p.0i = phi i8* [ getelementptr ([11 x i8]* @.str167, i32 0, i32 -1), %entry ], [ %t7i, %forbodyi ]		; <i8*> [#uses=1]
-	%t4i = add i32 %l.0i, -1		; <i32> [#uses=1]
-	%t5i = icmp eq i32 %l.0i, 0		; <i1> [#uses=1]
-	br i1 %t5i, label %forcond.i, label %forbodyi
-
-forbodyi:		; preds = %forcondi
-	%t7i = getelementptr i8* %p.0i, i32 1		; <i8*> [#uses=2]
-	%t8i = load i8* %t7i		; <i8> [#uses=1]
-	%t12i = icmp eq i8 %t8i, %t5		; <i1> [#uses=1]
-	br i1 %t12i, label %forcond.i, label %forcondi
-
-forcond.i:		; preds = %forbody.i, %forbodyi, %forcondi
-	%storemerge.i = phi i32 [ %t106.i, %forbody.i ], [ 1, %forcondi ], [ 1, %forbodyi ]		; <i32> [#uses=1]
-	%t77.i286 = phi i1 [ %phit3, %forbody.i ], [ false, %forcondi ], [ false, %forbodyi ]		; <i1> [#uses=1]
-	br i1 %t77.i286, label %forcond.i295, label %forbody.i
-
-forbody.i:		; preds = %forcond.i
-	%t106.i = add i32 %storemerge.i, 1		; <i32> [#uses=2]
-	%phit3 = icmp ugt i32 %t106.i, 3		; <i1> [#uses=1]
-	br label %forcond.i
-
-forcond.i295:		; preds = %forbody.i301, %forcond.i
-	%storemerge.i292 = phi i32 [ %t106.i325, %forbody.i301 ], [ 4, %forcond.i ]		; <i32> [#uses=1]
-	%t77.i293 = phi i1 [ %phit2, %forbody.i301 ], [ false, %forcond.i ]		; <i1> [#uses=1]
-	br i1 %t77.i293, label %forcond.i332, label %forbody.i301
-
-forbody.i301:		; preds = %forcond.i295
-	%t106.i325 = add i32 %storemerge.i292, 1		; <i32> [#uses=2]
-	%phit2 = icmp ugt i32 %t106.i325, 6		; <i1> [#uses=1]
-	br label %forcond.i295
-
-forcond.i332:		; preds = %forbody.i338, %forcond.i295
-	%storemerge.i329 = phi i32 [ %t106.i362, %forbody.i338 ], [ 7, %forcond.i295 ]		; <i32> [#uses=3]
-	%t77.i330 = phi i1 [ %phit1, %forbody.i338 ], [ false, %forcond.i295 ]		; <i1> [#uses=1]
-	br i1 %t77.i330, label %wcond.i370, label %forbody.i338
-
-forbody.i338:		; preds = %forcond.i332
-	%t106.i362 = add i32 %storemerge.i329, 1		; <i32> [#uses=2]
-	%phit1 = icmp ugt i32 %t106.i362, 9		; <i1> [#uses=1]
-	br label %forcond.i332
-
-wcond.i370:		; preds = %wbody.i372, %forcond.i332
-	%.frame.0.11 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=2]
-	%t3.i368 = phi i32 [ %t18.i371.c, %wbody.i372 ], [ %storemerge.i329, %forcond.i332 ]		; <i32> [#uses=5]
-	%t4.i369 = icmp ult i32 %t3.i368, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i369, label %andand.i378, label %wcond22.i383
-
-wbody.i372:		; preds = %andand.i378
-	%t18.i371.c = add i32 %t3.i368, 1		; <i32> [#uses=2]
-	br label %wcond.i370
-
-andand.i378:		; preds = %wcond.i370
-	%t11.i375 = getelementptr i8* %line_arg831, i32 %t3.i368		; <i8*> [#uses=1]
-	%t12.i376 = load i8* %t11.i375		; <i8> [#uses=1]
-	%t14.i377 = icmp eq i8 %t12.i376, 32		; <i1> [#uses=1]
-	br i1 %t14.i377, label %wbody.i372, label %wcond22.i383
-
-wcond22.i383:		; preds = %wbody23.i385, %andand.i378, %wcond.i370
-	%.frame.0.10 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %.frame.0.11, %wcond.i370 ], [ %.frame.0.11, %andand.i378 ]		; <i32> [#uses=2]
-	%t49.i381 = phi i32 [ %t50.i384, %wbody23.i385 ], [ %t3.i368, %wcond.i370 ], [ %t3.i368, %andand.i378 ]		; <i32> [#uses=5]
-	%t32.i382 = icmp ult i32 %t49.i381, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i382, label %andand33.i391, label %wcond54.i396
-
-wbody23.i385:		; preds = %andand33.i391
-	%t50.i384 = add i32 %t49.i381, 1		; <i32> [#uses=2]
-	br label %wcond22.i383
-
-andand33.i391:		; preds = %wcond22.i383
-	%t42.i388 = getelementptr i8* %line_arg831, i32 %t49.i381		; <i8*> [#uses=1]
-	%t43.i389 = load i8* %t42.i388		; <i8> [#uses=1]
-	%t45.i390 = icmp eq i8 %t43.i389, 32		; <i1> [#uses=1]
-	br i1 %t45.i390, label %wcond54.i396, label %wbody23.i385
-
-wcond54.i396:		; preds = %wbody55.i401, %andand33.i391, %wcond22.i383
-	%.frame.0.9 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %.frame.0.10, %wcond22.i383 ], [ %.frame.0.10, %andand33.i391 ]		; <i32> [#uses=2]
-	%t81.i394 = phi i32 [ %t82.i400, %wbody55.i401 ], [ %t49.i381, %wcond22.i383 ], [ %t49.i381, %andand33.i391 ]		; <i32> [#uses=3]
-	%t64.i395 = icmp ult i32 %t81.i394, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i395, label %andand65.i407, label %wcond.i716
-
-wbody55.i401:		; preds = %andand65.i407
-	%t82.i400 = add i32 %t81.i394, 1		; <i32> [#uses=2]
-	br label %wcond54.i396
-
-andand65.i407:		; preds = %wcond54.i396
-	%t74.i404 = getelementptr i8* %line_arg831, i32 %t81.i394		; <i8*> [#uses=1]
-	%t75.i405 = load i8* %t74.i404		; <i8> [#uses=1]
-	%t77.i406 = icmp eq i8 %t75.i405, 32		; <i1> [#uses=1]
-	br i1 %t77.i406, label %wbody55.i401, label %wcond.i716
-
-wcond.i716:		; preds = %wbody.i717, %andand65.i407, %wcond54.i396
-	%.frame.0.0 = phi i32 [ %t18.i.c829, %wbody.i717 ], [ %.frame.0.9, %wcond54.i396 ], [ %.frame.0.9, %andand65.i407 ]		; <i32> [#uses=7]
-	%t4.i715 = icmp ult i32 %.frame.0.0, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i715, label %andand.i721, label %wcond22.i724
-
-wbody.i717:		; preds = %andand.i721
-	%t18.i.c829 = add i32 %.frame.0.0, 1		; <i32> [#uses=1]
-	br label %wcond.i716
-
-andand.i721:		; preds = %wcond.i716
-	%t11.i718 = getelementptr i8* %line_arg831, i32 %.frame.0.0		; <i8*> [#uses=1]
-	%t12.i719 = load i8* %t11.i718		; <i8> [#uses=1]
-	%t14.i720 = icmp eq i8 %t12.i719, 32		; <i1> [#uses=1]
-	br i1 %t14.i720, label %wbody.i717, label %wcond22.i724
-
-wcond22.i724:		; preds = %wbody23.i726, %andand.i721, %wcond.i716
-	%.frame.0.1 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=2]
-	%t49.i722 = phi i32 [ %t50.i725, %wbody23.i726 ], [ %.frame.0.0, %wcond.i716 ], [ %.frame.0.0, %andand.i721 ]		; <i32> [#uses=5]
-	%t32.i723 = icmp ult i32 %t49.i722, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i723, label %andand33.i731, label %wcond54.i734
-
-wbody23.i726:		; preds = %andand33.i731
-	%t50.i725 = add i32 %t49.i722, 1		; <i32> [#uses=2]
-	br label %wcond22.i724
-
-andand33.i731:		; preds = %wcond22.i724
-	%t42.i728 = getelementptr i8* %line_arg831, i32 %t49.i722		; <i8*> [#uses=1]
-	%t43.i729 = load i8* %t42.i728		; <i8> [#uses=1]
-	%t45.i730 = icmp eq i8 %t43.i729, 32		; <i1> [#uses=1]
-	br i1 %t45.i730, label %wcond54.i734, label %wbody23.i726
-
-wcond54.i734:		; preds = %wbody55.i736, %andand33.i731, %wcond22.i724
-	%.frame.0.2 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %.frame.0.1, %wcond22.i724 ], [ %.frame.0.1, %andand33.i731 ]		; <i32> [#uses=2]
-	%t81.i732 = phi i32 [ %t82.i735, %wbody55.i736 ], [ %t49.i722, %wcond22.i724 ], [ %t49.i722, %andand33.i731 ]		; <i32> [#uses=3]
-	%t64.i733 = icmp ult i32 %t81.i732, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i733, label %andand65.i740, label %wcond.i750
-
-wbody55.i736:		; preds = %andand65.i740
-	%t82.i735 = add i32 %t81.i732, 1		; <i32> [#uses=2]
-	br label %wcond54.i734
-
-andand65.i740:		; preds = %wcond54.i734
-	%t74.i737 = getelementptr i8* %line_arg831, i32 %t81.i732		; <i8*> [#uses=1]
-	%t75.i738 = load i8* %t74.i737		; <i8> [#uses=1]
-	%t77.i739 = icmp eq i8 %t75.i738, 32		; <i1> [#uses=1]
-	br i1 %t77.i739, label %wbody55.i736, label %wcond.i750
-
-wcond.i750:		; preds = %wbody.i752, %andand65.i740, %wcond54.i734
-	%.frame.0.3 = phi i32 [ %t18.i751.c, %wbody.i752 ], [ %.frame.0.2, %wcond54.i734 ], [ %.frame.0.2, %andand65.i740 ]		; <i32> [#uses=11]
-	%t4.i749 = icmp ult i32 %.frame.0.3, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i749, label %andand.i758, label %wcond22.i761
-
-wbody.i752:		; preds = %andand.i758
-	%t18.i751.c = add i32 %.frame.0.3, 1		; <i32> [#uses=1]
-	br label %wcond.i750
-
-andand.i758:		; preds = %wcond.i750
-	%t11.i755 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=1]
-	%t12.i756 = load i8* %t11.i755		; <i8> [#uses=1]
-	%t14.i757 = icmp eq i8 %t12.i756, 32		; <i1> [#uses=1]
-	br i1 %t14.i757, label %wbody.i752, label %wcond22.i761
-
-wcond22.i761:		; preds = %wbody23.i763, %andand.i758, %wcond.i750
-	%.frame.0.4 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=2]
-	%t49.i759 = phi i32 [ %t50.i762, %wbody23.i763 ], [ %.frame.0.3, %wcond.i750 ], [ %.frame.0.3, %andand.i758 ]		; <i32> [#uses=7]
-	%t32.i760 = icmp ult i32 %t49.i759, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i760, label %andand33.i769, label %wcond54.i773
-
-wbody23.i763:		; preds = %andand33.i769
-	%t50.i762 = add i32 %t49.i759, 1		; <i32> [#uses=2]
-	br label %wcond22.i761
-
-andand33.i769:		; preds = %wcond22.i761
-	%t42.i766 = getelementptr i8* %line_arg831, i32 %t49.i759		; <i8*> [#uses=1]
-	%t43.i767 = load i8* %t42.i766		; <i8> [#uses=1]
-	%t45.i768 = icmp eq i8 %t43.i767, 32		; <i1> [#uses=1]
-	br i1 %t45.i768, label %wcond54.i773, label %wbody23.i763
-
-wcond54.i773:		; preds = %wbody55.i775, %andand33.i769, %wcond22.i761
-	%.frame.0.5 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %.frame.0.4, %wcond22.i761 ], [ %.frame.0.4, %andand33.i769 ]		; <i32> [#uses=1]
-	%t81.i770 = phi i32 [ %t82.i774, %wbody55.i775 ], [ %t49.i759, %wcond22.i761 ], [ %t49.i759, %andand33.i769 ]		; <i32> [#uses=3]
-	%t64.i771 = icmp ult i32 %t81.i770, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i771, label %andand65.i780, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
-
-wbody55.i775:		; preds = %andand65.i780
-	%t82.i774 = add i32 %t81.i770, 1		; <i32> [#uses=2]
-	br label %wcond54.i773
-
-andand65.i780:		; preds = %wcond54.i773
-	%t74.i777 = getelementptr i8* %line_arg831, i32 %t81.i770		; <i8*> [#uses=1]
-	%t75.i778 = load i8* %t74.i777		; <i8> [#uses=1]
-	%t77.i779 = icmp eq i8 %t75.i778, 32		; <i1> [#uses=1]
-	br i1 %t77.i779, label %wbody55.i775, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
-
-Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786:		; preds = %andand65.i780, %wcond54.i773
-	%t89.i782 = getelementptr i8* %line_arg831, i32 %.frame.0.3		; <i8*> [#uses=4]
-	%t90.i783 = sub i32 %t49.i759, %.frame.0.3		; <i32> [#uses=2]
-	br label %wcond.i792
-
-wcond.i792:		; preds = %wbody.i794, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786
-	%.frame.0.6 = phi i32 [ %.frame.0.5, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit786 ], [ %t18.i793.c, %wbody.i794 ]		; <i32> [#uses=9]
-	%t4.i791 = icmp ult i32 %.frame.0.6, %line_arg830		; <i1> [#uses=1]
-	br i1 %t4.i791, label %andand.i800, label %wcond22.i803
-
-wbody.i794:		; preds = %andand.i800
-	%t18.i793.c = add i32 %.frame.0.6, 1		; <i32> [#uses=1]
-	br label %wcond.i792
-
-andand.i800:		; preds = %wcond.i792
-	%t11.i797 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=1]
-	%t12.i798 = load i8* %t11.i797		; <i8> [#uses=1]
-	%t14.i799 = icmp eq i8 %t12.i798, 32		; <i1> [#uses=1]
-	br i1 %t14.i799, label %wbody.i794, label %wcond22.i803
-
-wcond22.i803:		; preds = %wbody23.i805, %andand.i800, %wcond.i792
-	%t49.i801 = phi i32 [ %t50.i804, %wbody23.i805 ], [ %.frame.0.6, %wcond.i792 ], [ %.frame.0.6, %andand.i800 ]		; <i32> [#uses=7]
-	%t32.i802 = icmp ult i32 %t49.i801, %line_arg830		; <i1> [#uses=1]
-	br i1 %t32.i802, label %andand33.i811, label %wcond54.i815
-
-wbody23.i805:		; preds = %andand33.i811
-	%t50.i804 = add i32 %t49.i801, 1		; <i32> [#uses=1]
-	br label %wcond22.i803
-
-andand33.i811:		; preds = %wcond22.i803
-	%t42.i808 = getelementptr i8* %line_arg831, i32 %t49.i801		; <i8*> [#uses=1]
-	%t43.i809 = load i8* %t42.i808		; <i8> [#uses=1]
-	%t45.i810 = icmp eq i8 %t43.i809, 32		; <i1> [#uses=1]
-	br i1 %t45.i810, label %wcond54.i815, label %wbody23.i805
-
-wcond54.i815:		; preds = %wbody55.i817, %andand33.i811, %wcond22.i803
-	%t81.i812 = phi i32 [ %t82.i816, %wbody55.i817 ], [ %t49.i801, %wcond22.i803 ], [ %t49.i801, %andand33.i811 ]		; <i32> [#uses=3]
-	%t64.i813 = icmp ult i32 %t81.i812, %line_arg830		; <i1> [#uses=1]
-	br i1 %t64.i813, label %andand65.i822, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
-
-wbody55.i817:		; preds = %andand65.i822
-	%t82.i816 = add i32 %t81.i812, 1		; <i32> [#uses=1]
-	br label %wcond54.i815
-
-andand65.i822:		; preds = %wcond54.i815
-	%t74.i819 = getelementptr i8* %line_arg831, i32 %t81.i812		; <i8*> [#uses=1]
-	%t75.i820 = load i8* %t74.i819		; <i8> [#uses=1]
-	%t77.i821 = icmp eq i8 %t75.i820, 32		; <i1> [#uses=1]
-	br i1 %t77.i821, label %wbody55.i817, label %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
-
-Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828:		; preds = %andand65.i822, %wcond54.i815
-	%t89.i824 = getelementptr i8* %line_arg831, i32 %.frame.0.6		; <i8*> [#uses=4]
-	%t90.i825 = sub i32 %t49.i801, %.frame.0.6		; <i32> [#uses=2]
-	%t63 = load i8* %t89.i824		; <i8> [#uses=2]
-	br label %forcondi622
-
-forcondi622:		; preds = %forbodyi626, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828
-	%l.0i618 = phi i32 [ 10, %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t4i620, %forbodyi626 ]		; <i32> [#uses=2]
-	%p.0i619 = phi i8* [ getelementptr ([11 x i8]* @.str170, i32 0, i32 -1), %Dt3net3ftp9FClient13FConnection13pListLineMFAaZS5t3net3ftp9FClient11FFileInfo10p_wordMFZAa.exit828 ], [ %t7i623, %forbodyi626 ]		; <i8*> [#uses=1]
-	%t4i620 = add i32 %l.0i618, -1		; <i32> [#uses=1]
-	%t5i621 = icmp eq i32 %l.0i618, 0		; <i1> [#uses=1]
-	br i1 %t5i621, label %if65, label %forbodyi626
-
-forbodyi626:		; preds = %forcondi622
-	%t7i623 = getelementptr i8* %p.0i619, i32 1		; <i8*> [#uses=3]
-	%t8i624 = load i8* %t7i623		; <i8> [#uses=1]
-	%t12i625 = icmp eq i8 %t8i624, %t63		; <i1> [#uses=1]
-	br i1 %t12i625, label %ifi630, label %forcondi622
-
-ifi630:		; preds = %forbodyi626
-	%t15i627 = ptrtoint i8* %t7i623 to i32		; <i32> [#uses=1]
-	%t17i629 = sub i32 %t15i627, ptrtoint ([11 x i8]* @.str170 to i32)		; <i32> [#uses=1]
-	%phit636 = icmp eq i32 %t17i629, 10		; <i1> [#uses=1]
-	br i1 %phit636, label %if65, label %e67
-
-if65:		; preds = %ifi630, %forcondi622
-	%t4i532 = icmp eq i32 %t49.i759, %.frame.0.3		; <i1> [#uses=1]
-	br i1 %t4i532, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %forcondi539
-
-forcondi539:		; preds = %zi546, %if65
-	%sign.1.i533 = phi i1 [ %sign.0.i543, %zi546 ], [ false, %if65 ]		; <i1> [#uses=2]
-	%l.0i534 = phi i32 [ %t33i545, %zi546 ], [ %t90.i783, %if65 ]		; <i32> [#uses=3]
-	%p.0i535 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=6]
-	%c.0.ini536 = phi i8* [ %t30i544, %zi546 ], [ %t89.i782, %if65 ]		; <i8*> [#uses=1]
-	%c.0i537 = load i8* %c.0.ini536		; <i8> [#uses=2]
-	%t8i538 = icmp eq i32 %l.0i534, 0		; <i1> [#uses=1]
-	br i1 %t8i538, label %endfori550, label %forbodyi540
-
-forbodyi540:		; preds = %forcondi539
-	switch i8 %c.0i537, label %endfori550 [
-		i8 32, label %zi546
-		i8 9, label %zi546
-		i8 45, label %if20i541
-		i8 43, label %if26i542
-	]
-
-if20i541:		; preds = %forbodyi540
-	br label %zi546
-
-if26i542:		; preds = %forbodyi540
-	br label %zi546
-
-zi546:		; preds = %if26i542, %if20i541, %forbodyi540, %forbodyi540
-	%sign.0.i543 = phi i1 [ false, %if26i542 ], [ true, %if20i541 ], [ %sign.1.i533, %forbodyi540 ], [ %sign.1.i533, %forbodyi540 ]		; <i1> [#uses=1]
-	%t30i544 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
-	%t33i545 = add i32 %l.0i534, -1		; <i32> [#uses=1]
-	br label %forcondi539
-
-endfori550:		; preds = %forbodyi540, %forcondi539
-	%t37i547 = icmp eq i8 %c.0i537, 48		; <i1> [#uses=1]
-	%t39i548 = icmp sgt i32 %l.0i534, 1		; <i1> [#uses=1]
-	%or.condi549 = and i1 %t37i547, %t39i548		; <i1> [#uses=1]
-	br i1 %or.condi549, label %if40i554, label %endif41i564
-
-if40i554:		; preds = %endfori550
-	%t43i551 = getelementptr i8* %p.0i535, i32 1		; <i8*> [#uses=2]
-	%t44i552 = load i8* %t43i551		; <i8> [#uses=1]
-	%t45i553 = zext i8 %t44i552 to i32		; <i32> [#uses=1]
-	switch i32 %t45i553, label %endif41i564 [
-		i32 120, label %case46i556
-		i32 88, label %case46i556
-		i32 98, label %case51i558
-		i32 66, label %case51i558
-		i32 111, label %case56i560
-		i32 79, label %case56i560
-	]
-
-case46i556:		; preds = %if40i554, %if40i554
-	%t48i555 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
-	br label %endif41i564
-
-case51i558:		; preds = %if40i554, %if40i554
-	%t53i557 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
-	br label %endif41i564
-
-case56i560:		; preds = %if40i554, %if40i554
-	%t58i559 = getelementptr i8* %p.0i535, i32 2		; <i8*> [#uses=1]
-	br label %endif41i564
-
-endif41i564:		; preds = %case56i560, %case51i558, %case46i556, %if40i554, %endfori550
-	%r.0i561 = phi i32 [ 0, %if40i554 ], [ 8, %case56i560 ], [ 2, %case51i558 ], [ 16, %case46i556 ], [ 0, %endfori550 ]		; <i32> [#uses=2]
-	%p.2i562 = phi i8* [ %t43i551, %if40i554 ], [ %t58i559, %case56i560 ], [ %t53i557, %case51i558 ], [ %t48i555, %case46i556 ], [ %p.0i535, %endfori550 ]		; <i8*> [#uses=2]
-	%t63i563 = icmp eq i32 %r.0i561, 0		; <i1> [#uses=1]
-	br i1 %t63i563, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576, label %if70i568
-
-if70i568:		; preds = %endif41i564
-	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
-
-Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576:		; preds = %if70i568, %endif41i564, %if65
-	%radix.0.i570 = phi i32 [ 0, %if65 ], [ %r.0i561, %if70i568 ], [ 10, %endif41i564 ]		; <i32> [#uses=2]
-	%p.1i571 = phi i8* [ %p.2i562, %if70i568 ], [ %t89.i782, %if65 ], [ %p.2i562, %endif41i564 ]		; <i8*> [#uses=1]
-	%t84i572 = ptrtoint i8* %p.1i571 to i32		; <i32> [#uses=1]
-	%t85i573 = ptrtoint i8* %t89.i782 to i32		; <i32> [#uses=1]
-	%t86i574 = sub i32 %t84i572, %t85i573		; <i32> [#uses=2]
-	%t6.i575 = sub i32 %t90.i783, %t86i574		; <i32> [#uses=1]
-	%t59i604 = zext i32 %radix.0.i570 to i64		; <i64> [#uses=1]
-	br label %fcondi581
-
-fcondi581:		; preds = %if55i610, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576
-	%value.0i577 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t65i607, %if55i610 ]		; <i64> [#uses=1]
-	%fkey.0i579 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i576 ], [ %t70i609, %if55i610 ]		; <i32> [#uses=3]
-	%t3i580 = icmp ult i32 %fkey.0i579, %t6.i575		; <i1> [#uses=1]
-	br i1 %t3i580, label %fbodyi587, label %wcond.i422
-
-fbodyi587:		; preds = %fcondi581
-	%t5.s.i582 = add i32 %t86i574, %fkey.0i579		; <i32> [#uses=1]
-	%t89.i782.s = add i32 %.frame.0.3, %t5.s.i582		; <i32> [#uses=1]
-	%t5i583 = getelementptr i8* %line_arg831, i32 %t89.i782.s		; <i8*> [#uses=1]
-	%t6i584 = load i8* %t5i583		; <i8> [#uses=6]
-	%t6.off84i585 = add i8 %t6i584, -48		; <i8> [#uses=1]
-	%or.cond.i28.i586 = icmp ugt i8 %t6.off84i585, 9		; <i1> [#uses=1]
-	br i1 %or.cond.i28.i586, label %ei590, label %endifi603
-
-ei590:		; preds = %fbodyi587
-	%t6.off83i588 = add i8 %t6i584, -97		; <i8> [#uses=1]
-	%or.cond81i589 = icmp ugt i8 %t6.off83i588, 25		; <i1> [#uses=1]
-	br i1 %or.cond81i589, label %e24i595, label %if22i592
-
-if22i592:		; preds = %ei590
-	%t27i591 = add i8 %t6i584, -39		; <i8> [#uses=1]
-	br label %endifi603
-
-e24i595:		; preds = %ei590
-	%t6.offi593 = add i8 %t6i584, -65		; <i8> [#uses=1]
-	%or.cond82i594 = icmp ugt i8 %t6.offi593, 25		; <i1> [#uses=1]
-	br i1 %or.cond82i594, label %wcond.i422, label %if39i597
-
-if39i597:		; preds = %e24i595
-	%t44.i29.i596 = add i8 %t6i584, -7		; <i8> [#uses=1]
-	br label %endifi603
-
-endifi603:		; preds = %if39i597, %if22i592, %fbodyi587
-	%c.0.i30.i598 = phi i8 [ %t27i591, %if22i592 ], [ %t44.i29.i596, %if39i597 ], [ %t6i584, %fbodyi587 ]		; <i8> [#uses=1]
-	%t48.i31.i599 = zext i8 %c.0.i30.i598 to i32		; <i32> [#uses=1]
-	%t49i600 = add i32 %t48.i31.i599, 208		; <i32> [#uses=1]
-	%t52i601 = and i32 %t49i600, 255		; <i32> [#uses=2]
-	%t54i602 = icmp ult i32 %t52i601, %radix.0.i570		; <i1> [#uses=1]
-	br i1 %t54i602, label %if55i610, label %wcond.i422
-
-if55i610:		; preds = %endifi603
-	%t61i605 = mul i64 %value.0i577, %t59i604		; <i64> [#uses=1]
-	%t64i606 = zext i32 %t52i601 to i64		; <i64> [#uses=1]
-	%t65i607 = add i64 %t61i605, %t64i606		; <i64> [#uses=1]
-	%t70i609 = add i32 %fkey.0i579, 1		; <i32> [#uses=1]
-	br label %fcondi581
-
-e67:		; preds = %ifi630
-	%t4i447 = icmp eq i32 %t49.i801, %.frame.0.6		; <i1> [#uses=1]
-	br i1 %t4i447, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %forcondi454
-
-forcondi454:		; preds = %zi461, %e67
-	%c.0i452 = phi i8 [ %c.0i452.pre, %zi461 ], [ %t63, %e67 ]		; <i8> [#uses=2]
-	%sign.1.i448 = phi i1 [ %sign.0.i458, %zi461 ], [ false, %e67 ]		; <i1> [#uses=2]
-	%l.0i449 = phi i32 [ %t33i460, %zi461 ], [ %t90.i825, %e67 ]		; <i32> [#uses=3]
-	%p.0i450 = phi i8* [ %t30i459, %zi461 ], [ %t89.i824, %e67 ]		; <i8*> [#uses=5]
-	%t8i453 = icmp eq i32 %l.0i449, 0		; <i1> [#uses=1]
-	br i1 %t8i453, label %endfori465, label %forbodyi455
-
-forbodyi455:		; preds = %forcondi454
-	switch i8 %c.0i452, label %endfori465 [
-		i8 32, label %zi461
-		i8 9, label %zi461
-		i8 45, label %if20i456
-		i8 43, label %if26i457
-	]
-
-if20i456:		; preds = %forbodyi455
-	br label %zi461
-
-if26i457:		; preds = %forbodyi455
-	br label %zi461
-
-zi461:		; preds = %if26i457, %if20i456, %forbodyi455, %forbodyi455
-	%sign.0.i458 = phi i1 [ false, %if26i457 ], [ true, %if20i456 ], [ %sign.1.i448, %forbodyi455 ], [ %sign.1.i448, %forbodyi455 ]		; <i1> [#uses=1]
-	%t30i459 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
-	%t33i460 = add i32 %l.0i449, -1		; <i32> [#uses=1]
-	%c.0i452.pre = load i8* %t30i459		; <i8> [#uses=1]
-	br label %forcondi454
-
-endfori465:		; preds = %forbodyi455, %forcondi454
-	%t37i462 = icmp eq i8 %c.0i452, 48		; <i1> [#uses=1]
-	%t39i463 = icmp sgt i32 %l.0i449, 1		; <i1> [#uses=1]
-	%or.condi464 = and i1 %t37i462, %t39i463		; <i1> [#uses=1]
-	br i1 %or.condi464, label %if40i469, label %endif41i479
-
-if40i469:		; preds = %endfori465
-	%t43i466 = getelementptr i8* %p.0i450, i32 1		; <i8*> [#uses=2]
-	%t44i467 = load i8* %t43i466		; <i8> [#uses=1]
-	%t45i468 = zext i8 %t44i467 to i32		; <i32> [#uses=1]
-	switch i32 %t45i468, label %endif41i479 [
-		i32 120, label %case46i471
-		i32 111, label %case56i475
-	]
-
-case46i471:		; preds = %if40i469
-	%t48i470 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
-	br label %endif41i479
-
-case56i475:		; preds = %if40i469
-	%t58i474 = getelementptr i8* %p.0i450, i32 2		; <i8*> [#uses=1]
-	br label %endif41i479
-
-endif41i479:		; preds = %case56i475, %case46i471, %if40i469, %endfori465
-	%r.0i476 = phi i32 [ 0, %if40i469 ], [ 8, %case56i475 ], [ 16, %case46i471 ], [ 0, %endfori465 ]		; <i32> [#uses=2]
-	%p.2i477 = phi i8* [ %t43i466, %if40i469 ], [ %t58i474, %case56i475 ], [ %t48i470, %case46i471 ], [ %p.0i450, %endfori465 ]		; <i8*> [#uses=2]
-	%t63i478 = icmp eq i32 %r.0i476, 0		; <i1> [#uses=1]
-	br i1 %t63i478, label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491, label %if70i483
-
-if70i483:		; preds = %endif41i479
-	br label %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
-
-Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491:		; preds = %if70i483, %endif41i479, %e67
-	%radix.0.i485 = phi i32 [ 0, %e67 ], [ %r.0i476, %if70i483 ], [ 10, %endif41i479 ]		; <i32> [#uses=2]
-	%p.1i486 = phi i8* [ %p.2i477, %if70i483 ], [ %t89.i824, %e67 ], [ %p.2i477, %endif41i479 ]		; <i8*> [#uses=1]
-	%t84i487 = ptrtoint i8* %p.1i486 to i32		; <i32> [#uses=1]
-	%t85i488 = ptrtoint i8* %t89.i824 to i32		; <i32> [#uses=1]
-	%t86i489 = sub i32 %t84i487, %t85i488		; <i32> [#uses=2]
-	%ttt = sub i32 %t90.i825, %t86i489		; <i32> [#uses=1]
-	%t59i519 = zext i32 %radix.0.i485 to i64		; <i64> [#uses=1]
-	br label %fcondi496
-
-fcondi496:		; preds = %if55i525, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491
-	%value.0i492 = phi i64 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t65i522, %if55i525 ]		; <i64> [#uses=1]
-	%fkey.0i494 = phi i32 [ 0, %Dt4x7c7I11V4tTaZ4tFAaKbKkZk.exit.i491 ], [ %t70i524, %if55i525 ]		; <i32> [#uses=3]
-	%t3i495 = icmp ult i32 %fkey.0i494, %ttt		; <i1> [#uses=1]
-	br i1 %t3i495, label %fbodyi502, label %wcond.i422
-
-fbodyi502:		; preds = %fcondi496
-	%t5.s.i497 = add i32 %t86i489, %fkey.0i494		; <i32> [#uses=1]
-	%t89.i824.s = add i32 %.frame.0.6, %t5.s.i497		; <i32> [#uses=1]
-	%t5i498 = getelementptr i8* %line_arg831, i32 %t89.i824.s		; <i8*> [#uses=1]
-	%t6i499 = load i8* %t5i498		; <i8> [#uses=6]
-	%t6.off84i500 = add i8 %t6i499, -48		; <i8> [#uses=1]
-	%or.cond.i28.i501 = icmp ugt i8 %t6.off84i500, 9		; <i1> [#uses=1]
-	br i1 %or.cond.i28.i501, label %ei505, label %endifi518
-
-ei505:		; preds = %fbodyi502
-	%t6.off83i503 = add i8 %t6i499, -97		; <i8> [#uses=1]
-	%or.cond81i504 = icmp ugt i8 %t6.off83i503, 25		; <i1> [#uses=1]
-	br i1 %or.cond81i504, label %e24i510, label %if22i507
-
-if22i507:		; preds = %ei505
-	%t27i506 = add i8 %t6i499, -39		; <i8> [#uses=1]
-	br label %endifi518
-
-e24i510:		; preds = %ei505
-	%t6.offi508 = add i8 %t6i499, -65		; <i8> [#uses=1]
-	%or.cond82i509 = icmp ugt i8 %t6.offi508, 25		; <i1> [#uses=1]
-	br i1 %or.cond82i509, label %wcond.i422, label %if39i512
-
-if39i512:		; preds = %e24i510
-	%t44.i29.i511 = add i8 %t6i499, -7		; <i8> [#uses=1]
-	br label %endifi518
-
-endifi518:		; preds = %if39i512, %if22i507, %fbodyi502
-	%c.0.i30.i513 = phi i8 [ %t27i506, %if22i507 ], [ %t44.i29.i511, %if39i512 ], [ %t6i499, %fbodyi502 ]		; <i8> [#uses=1]
-	%t48.i31.i514 = zext i8 %c.0.i30.i513 to i32		; <i32> [#uses=1]
-	%t49i515 = add i32 %t48.i31.i514, 208		; <i32> [#uses=1]
-	%t52i516 = and i32 %t49i515, 255		; <i32> [#uses=2]
-	%t54i517 = icmp ult i32 %t52i516, %radix.0.i485		; <i1> [#uses=1]
-	br i1 %t54i517, label %if55i525, label %wcond.i422
-
-if55i525:		; preds = %endifi518
-	%t61i520 = mul i64 %value.0i492, %t59i519		; <i64> [#uses=1]
-	%t64i521 = zext i32 %t52i516 to i64		; <i64> [#uses=1]
-	%t65i522 = add i64 %t61i520, %t64i521		; <i64> [#uses=1]
-	%t70i524 = add i32 %fkey.0i494, 1		; <i32> [#uses=1]
-	br label %fcondi496
-
-wcond.i422:		; preds = %e40.i, %endifi518, %e24i510, %fcondi496, %endifi603, %e24i595, %fcondi581
-	%sarg60.pn.i = phi i8* [ %p.0.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i8*> [#uses=3]
-	%start_arg.pn.i = phi i32 [ %t49.i443, %e40.i ], [ 0, %fcondi496 ], [ 0, %e24i510 ], [ 0, %endifi518 ], [ 0, %endifi603 ], [ 0, %e24i595 ], [ 0, %fcondi581 ]		; <i32> [#uses=3]
-	%extent.0.i = phi i32 [ %t51.i, %e40.i ], [ undef, %fcondi496 ], [ undef, %e24i510 ], [ undef, %endifi518 ], [ undef, %endifi603 ], [ undef, %e24i595 ], [ undef, %fcondi581 ]		; <i32> [#uses=3]
-	%p.0.i = getelementptr i8* %sarg60.pn.i, i32 %start_arg.pn.i		; <i8*> [#uses=2]
-	%p.0.s63.i = add i32 %start_arg.pn.i, -1		; <i32> [#uses=1]
-	%t2i424 = getelementptr i8* %sarg60.pn.i, i32 %p.0.s63.i		; <i8*> [#uses=1]
-	br label %forcondi430
-
-forcondi430:		; preds = %forbodyi434, %wcond.i422
-	%l.0i426 = phi i32 [ %extent.0.i, %wcond.i422 ], [ %t4i428, %forbodyi434 ]		; <i32> [#uses=2]
-	%p.0i427 = phi i8* [ %t2i424, %wcond.i422 ], [ %t7i431, %forbodyi434 ]		; <i8*> [#uses=1]
-	%t4i428 = add i32 %l.0i426, -1		; <i32> [#uses=1]
-	%t5i429 = icmp eq i32 %l.0i426, 0		; <i1> [#uses=1]
-	br i1 %t5i429, label %e.i441, label %forbodyi434
-
-forbodyi434:		; preds = %forcondi430
-	%t7i431 = getelementptr i8* %p.0i427, i32 1		; <i8*> [#uses=3]
-	%t8i432 = load i8* %t7i431		; <i8> [#uses=1]
-	%t12i433 = icmp eq i8 %t8i432, 32		; <i1> [#uses=1]
-	br i1 %t12i433, label %ifi438, label %forcondi430
-
-ifi438:		; preds = %forbodyi434
-	%t15i435 = ptrtoint i8* %t7i431 to i32		; <i32> [#uses=1]
-	%t16i436 = ptrtoint i8* %p.0.i to i32		; <i32> [#uses=1]
-	%t17i437 = sub i32 %t15i435, %t16i436		; <i32> [#uses=1]
-	br label %e.i441
-
-e.i441:		; preds = %ifi438, %forcondi430
-	%t2561.i = phi i32 [ %t17i437, %ifi438 ], [ %extent.0.i, %forcondi430 ]		; <i32> [#uses=2]
-	%p.0.s.i = add i32 %start_arg.pn.i, %t2561.i		; <i32> [#uses=1]
-	%t32.s.i = add i32 %p.0.s.i, -1		; <i32> [#uses=1]
-	%t2i.i = getelementptr i8* %sarg60.pn.i, i32 %t32.s.i		; <i8*> [#uses=1]
-	br label %forbodyi.i
-
-forbodyi.i:		; preds = %forbodyi.i, %e.i441
-	%p.0i.i = phi i8* [ %t2i.i, %e.i441 ], [ %t7i.i, %forbodyi.i ]		; <i8*> [#uses=1]
-	%s2.0i.i = phi i8* [ getelementptr ([5 x i8]* @.str171, i32 0, i32 0), %e.i441 ], [ %t11i.i, %forbodyi.i ]		; <i8*> [#uses=2]
-	%t7i.i = getelementptr i8* %p.0i.i, i32 1		; <i8*> [#uses=2]
-	%t8i.i = load i8* %t7i.i		; <i8> [#uses=1]
-	%t11i.i = getelementptr i8* %s2.0i.i, i32 1		; <i8*> [#uses=1]
-	%t12i.i = load i8* %s2.0i.i		; <i8> [#uses=1]
-	%t14i.i = icmp eq i8 %t8i.i, %t12i.i		; <i1> [#uses=1]
-	br i1 %t14i.i, label %forbodyi.i, label %e40.i
-
-e40.i:		; preds = %forbodyi.i
-	%t49.i443 = add i32 %t2561.i, 1		; <i32> [#uses=2]
-	%t51.i = sub i32 %extent.0.i, %t49.i443		; <i32> [#uses=1]
-	br label %wcond.i422
-}
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index ef9791277dcd..b0bdcc28d28e 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
 ; RUN:   grep fmul | count 1
 
-define double @foo(double %X) {
+define double @foo(double %X) nounwind {
         %tmp1 = fmul double %X, 1.23
         %tmp2 = fmul double %tmp1, 4.124
         ret double %tmp2
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
new file mode 100644
index 000000000000..1769be957ac4
--- /dev/null
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=P32 %s
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck -check-prefix=P64 %s
+
+; PR8327
+define i8* @test1(i8** %foo) nounwind {
+  %A = va_arg i8** %foo, i8*
+  ret i8* %A
+}
+
+; P32: test1:
+; P32: 	lwz r4, 0(r3)
+; P32:	addi r5, r4, 4
+; P32:	stw r5, 0(r3)
+; P32:	lwz r3, 0(r4)
+; P32:	blr 
+
+; P64: test1:
+; P64: ld r4, 0(r3)
+; P64: addi r5, r4, 8
+; P64: std r5, 0(r3)
+; P64: ld r3, 0(r4)
+; P64: blr
diff --git a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
index f66ee216089d..3b644986f2e4 100644
--- a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/SPARC/2011-01-11-CC.ll b/test/CodeGen/SPARC/2011-01-11-CC.ll
new file mode 100755
index 000000000000..3ceda958de6e
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -0,0 +1,105 @@
+; RUN: llc -march=sparc <%s | FileCheck %s -check-prefix=V8
+; RUN: llc -march=sparc -mattr=v9 <%s | FileCheck %s -check-prefix=V9
+
+
+define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind readnone noinline {
+entry:
+; V8: addcc
+; V8-NOT: subcc
+; V8: addx
+; V9: addcc
+; V9-NOT: subcc
+; V9: addx
+; V9: mov{{e|ne}} %icc
+  %0 = add i64 %a, %b
+  %1 = icmp ugt i64 %0, %c
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+
+define i32 @test_select_int_icc(i32 %a, i32 %b, i32 %c) nounwind readnone noinline {
+entry:
+; V8: test_select_int_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_int_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: mov{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, i32 %b, i32 %c
+  ret i32 %1
+}
+
+
+define float @test_select_fp_icc(i32 %a, float %f1, float %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_fp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_fp_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: fmovs{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_icc(i32 %a, double %f1, double %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_dfp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_dfp_icc
+; V9: subcc
+; V9=NOT: {{be|bne}}
+; V9: fmovd{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
+
+define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noinline {
+entry:
+;V8: test_select_int_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_int_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: mov{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %a.b = select i1 %0, i32 %a, i32 %b
+  ret i32 %a.b
+}
+
+
+define float @test_select_fp_fcc(float %f, float %f1, float %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_fp_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_fp_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: fmovs{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_dfp_fcc
+;V8: fcmpd
+;V8: {{fbne|fbe}}
+;V9: test_select_dfp_fcc
+;V9: fcmpd
+;V9-NOT: {{fbne|fbe}}
+;V9: fmovd{{e|ne}} %fcc0
+  %0 = fcmp une double %f, 0.000000e+00
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
diff --git a/test/CodeGen/SPARC/2011-01-11-Call.ll b/test/CodeGen/SPARC/2011-01-11-Call.ll
new file mode 100644
index 000000000000..7350e9232428
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=sparc -O0 <%s
+
+define void @test() nounwind {
+entry:
+ %0 = tail call i32 (...)* @foo() nounwind
+ tail call void (...)* @bar() nounwind
+ ret void
+}
+
+declare i32 @foo(...)
+
+declare void @bar(...)
+
diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
new file mode 100644
index 000000000000..fbf724270566
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
@@ -0,0 +1,64 @@
+;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
+;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+
+define i8* @frameaddr() nounwind readnone {
+entry:
+;V8: frameaddr
+;V8: or %g0, %fp, {{.+}}
+
+;V9: frameaddr
+;V9: or %g0, %fp, {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @frameaddr2() nounwind readnone {
+entry:
+;V8: frameaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+
+;V9: frameaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+
+
+define i8* @retaddr() nounwind readnone {
+entry:
+;V8: retaddr
+;V8: or %g0, %i7, {{.+}}
+
+;V9: retaddr
+;V9: or %g0, %i7, {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @retaddr2() nounwind readnone {
+entry:
+;V8: retaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+60], {{.+}}
+
+;V9: retaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+60], {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
new file mode 100644
index 000000000000..bc27e987a179
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -0,0 +1,90 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+;RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=UNOPT
+
+
+define i32 @test(i32 %a) nounwind {
+entry:
+; CHECK: test
+; CHECK: call bar
+; CHECK-NOT: nop
+; CHECK: ret
+; CHECK-NEXT: restore
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %0
+}
+
+define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind {
+entry:
+; CHECK:      test_jmpl
+; CHECK:      call
+; CHECK-NOT:  nop
+; CHECK:      ret
+; CHECK-NEXT: restore
+  %0 = tail call i32 %f(i32 %a, i32 %b) nounwind
+  ret i32 %0
+}
+
+define i32 @test_loop(i32 %a, i32 %b) nounwind readnone {
+; CHECK: test_loop
+entry:
+  %0 = icmp sgt i32 %b, 0
+  br i1 %0, label %bb, label %bb5
+
+bb:                                               ; preds = %entry, %bb
+  %a_addr.18 = phi i32 [ %a_addr.0, %bb ], [ %a, %entry ]
+  %1 = phi i32 [ %3, %bb ], [ 0, %entry ]
+  %tmp9 = mul i32 %1, %b
+  %2 = and i32 %1, 1
+  %tmp = xor i32 %2, 1
+  %.pn = shl i32 %tmp9, %tmp
+  %a_addr.0 = add i32 %.pn, %a_addr.18
+  %3 = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %3, %b
+;CHECK:      subcc
+;CHECK:      bne
+;CHECK-NOT:  nop
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb, %entry
+  %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
+;CHECK:      ret
+;CHECK-NEXT: restore
+  ret i32 %a_addr.1.lcssa
+}
+
+define i32 @test_inlineasm(i32 %a) nounwind {
+entry:
+;CHECK:      test_inlineasm
+;CHECK:      sethi
+;CHECK:      !NO_APP
+;CHECK-NEXT: subcc
+;CHECK-NEXT: bg
+;CHECK-NEXT: nop
+  tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
+  %0 = icmp slt i32 %a, 0
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = tail call i32 (...)* @foo(i32 %a) nounwind
+  ret i32 %1
+
+bb1:                                              ; preds = %entry
+  %2 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %2
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(i32)
+
+
+define i32 @test_implicit_def() nounwind {
+entry:
+;UNOPT:       test_implicit_def
+;UNOPT:       call func
+;UNOPT-NEXT:  nop
+  %0 = tail call i32 @func(i32* undef) nounwind
+  ret i32 0
+}
+
+declare i32 @func(i32*)
diff --git a/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll b/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
new file mode 100644
index 000000000000..85c16e4684ed
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
@@ -0,0 +1,18 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+@s = internal unnamed_addr global %struct.foo_t { i32 10, i32 20, i32 30 }
+
+define i32 @test() nounwind {
+entry:
+;CHECK:     test
+;CHECK:     st
+;CHECK:     st
+;CHECK:     st
+;CHECK:     bar
+  %0 = tail call i32 @bar(%struct.foo_t* byval @s) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar(%struct.foo_t* byval)
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
new file mode 100644
index 000000000000..2f684b009c96
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -0,0 +1,36 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+;CHECK: make_foo
+;CHECK: ld [%fp+64], {{.+}}
+;CHECK: or {{.+}}, {{.+}}, %i0
+;CHECK: ret
+  %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
+  store i32 %a, i32* %0, align 4
+  %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1
+  store i32 %b, i32* %1, align 4
+  %2 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 2
+  store i32 %c, i32* %2, align 4
+  ret void
+}
+
+define i32 @test() nounwind {
+entry:
+;CHECK: test
+;CHECK: st {{.+}}, [%sp+64]
+;CHECK: make_foo
+  %f = alloca %struct.foo_t, align 8
+  call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
+  %0 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 0
+  %1 = load i32* %0, align 8
+  %2 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 1
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 2
+  %5 = load i32* %4, align 8
+  %6 = add nsw i32 %3, %1
+  %7 = add nsw i32 %6, %5
+  ret i32 %7
+}
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 9c2c16a6947c..4352e6246301 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -1,6 +1,26 @@
-; RUN: llc < %s -march=sparc
+; RUN: llc < %s -march=sparc | FileCheck %s
 
-define i32 @test(i32 %X) {
+define i32 @test0(i32 %X) {
 	%tmp.1 = add i32 %X, 1
 	ret i32 %tmp.1
+; CHECK: test0:
+; CHECK: add %i0, 1, %i0
+}
+
+
+;; xnor tests.
+define i32 @test1(i32 %X, i32 %Y) {
+        %A = xor i32 %X, %Y
+        %B = xor i32 %A, -1
+        ret i32 %B
+; CHECK: test1:
+; CHECK: xnor %i0, %i1, %i0
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+        %A = xor i32 %X, -1
+        %B = xor i32 %A, %Y
+        ret i32 %B
+; CHECK: test2:
+; CHECK: xnor %i0, %i1, %i0
 }
diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
new file mode 100644
index 000000000000..6013b17d9372
--- /dev/null
+++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=sparc
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "sparc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/SPARC/xnor.ll b/test/CodeGen/SPARC/xnor.ll
deleted file mode 100644
index 6ff66bd6fcc6..000000000000
--- a/test/CodeGen/SPARC/xnor.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=sparc | \
-; RUN:   grep xnor | count 2
-
-define i32 @test1(i32 %X, i32 %Y) {
-        %A = xor i32 %X, %Y             ; <i32> [#uses=1]
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        ret i32 %B
-}
-
-define i32 @test2(i32 %X, i32 %Y) {
-        %A = xor i32 %X, -1             ; <i32> [#uses=1]
-        %B = xor i32 %A, %Y             ; <i32> [#uses=1]
-        ret i32 %B
-}
-
diff --git a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
index 610aa40197e3..c2877ac55ed1 100644
--- a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
index 6b6c14f40871..b9039774d42e 100644
--- a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 9a6321bb43c4..06c0dfec5bab 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -10,7 +10,7 @@
 define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
 ; CHECK: blx ___muldf3
 ; CHECK: blx ___muldf3
-; CHECK: beq LBB0_8
+; CHECK: beq LBB0_7
 ; CHECK: blx ___muldf3
 ; <label>:3
   switch i32 %1, label %4 [
diff --git a/test/CodeGen/Thumb/2011-EpilogueBug.ll b/test/CodeGen/Thumb/2011-EpilogueBug.ll
new file mode 100644
index 000000000000..16789e66cc18
--- /dev/null
+++ b/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s | FileCheck %s
+; r8869722
+
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+; CHECK: push {r4
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br label %return
+
+return:                                           ; preds = %0
+; CHECK: mov sp, r4
+  ret void
+}
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
index c611b865f67d..419c3baa3da3 100644
--- a/test/CodeGen/Thumb/barrier.ll
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -1,15 +1,16 @@
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin  | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=-db | FileCheck %s -check-prefix=V6
 ; RUN: llc < %s -march=thumb -mattr=+v6m       | FileCheck %s -check-prefix=V6M
 
-declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
 
 define void @t1() {
 ; V6: t1:
 ; V6: blx {{_*}}sync_synchronize
 
 ; V6M: t1:
-; V6M: dsb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+; V6M: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
   ret void
 }
 
@@ -18,7 +19,7 @@ define void @t2() {
 ; V6: blx {{_*}}sync_synchronize
 
 ; V6M: t2:
-; V6M: dmb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+; V6M: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
   ret void
 }
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 5c8ad974bc0e..1f31dca0524d 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=thumb | not grep {ldr sp}
-; RUN: llc < %s -mtriple=thumb-apple-darwin | \
-; RUN:   not grep {sub.*r7}
-; RUN: llc < %s -march=thumb | grep {mov.*r6, sp}
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
 
 define void @t1(%struct.state* %v) {
+; CHECK: t1:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: mov r2, sp
+; CHECK: subs r4, r2, r1
+; CHECK: mov sp, r4
 	%tmp6 = load i32* null
 	%tmp8 = alloca float, i32 %tmp6
 	store i32 1, i32* null
@@ -34,6 +37,18 @@ declare fastcc void @f2(float*, float*, float*, i32)
 @str215 = external global [2 x i8]
 
 define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+; CHECK: t2:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: sub sp, #8
+; CHECK: mov r6, sp
+; CHECK: str r2, [r6, #4]
+; CHECK: str r0, [r6]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: ldr r0, [r6, #4]
+; CHECK: mov r0, sp
+; CHECK: subs r5, r0, r1
+; CHECK: mov sp, r5
 	%tmp1 = call i32 @strlen( i8* %tag )
 	%tmp3 = call i32 @strlen( i8* %contents )
 	%tmp4 = add i32 %tmp1, 2
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index b289484f5efb..fbacabaedc35 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -10,22 +10,22 @@ define void @test1() {
 
 define void @test2() {
 ; CHECK: test2:
-; CHECK: ldr r0, LCPI
+; CHECK: ldr.n r0, LCPI
 ; CHECK: add sp, r0
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
     %tmp = alloca [ 4168 x i8 ] , align 4
     ret void
 }
 
 define i32 @test3() {
 ; CHECK: test3:
-; CHECK: ldr r2, LCPI
+; CHECK: ldr.n r2, LCPI
 ; CHECK: add sp, r2
-; CHECK: ldr r1, LCPI
+; CHECK: ldr.n r1, LCPI
 ; CHECK: add r1, sp
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index e3ef44a87586..197e19e31b49 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -4,7 +4,7 @@
 ; RUN:   grep adc | count 1
 ; RUN: llc < %s -march=thumb | \
 ; RUN:   grep sbc | count 1
-; RUN: llc < %s -march=thumb | grep __muldi3
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __muldi3
 
 define i64 @f1() {
 entry:
diff --git a/test/CodeGen/Thumb/machine-licm.ll b/test/CodeGen/Thumb/machine-licm.ll
deleted file mode 100644
index a87e82c21dd7..000000000000
--- a/test/CodeGen/Thumb/machine-licm.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s
-; rdar://7353541
-; rdar://7354376
-
-; The generated code is no where near ideal. It's not recognizing the two
-; constantpool entries being loaded can be merged into one.
-
-@GV = external global i32                         ; <i32*> [#uses=2]
-
-define void @t(i32* nocapture %vals, i32 %c) nounwind {
-entry:
-; CHECK: t:
-  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
-  br i1 %0, label %return, label %bb.nph
-
-bb.nph:                                           ; preds = %entry
-; CHECK: BB#1
-; CHECK: ldr.n r2, LCPI0_0
-; CHECK: add r2, pc
-; CHECK: ldr r{{[0-9]+}}, [r2]
-; CHECK: LBB0_2
-; CHECK: LCPI0_0:
-; CHECK-NOT: LCPI0_1:
-; CHECK: .section
-  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
-  br label %bb
-
-bb:                                               ; preds = %bb, %bb.nph
-  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
-  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
-  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
-  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
-  store i32 %3, i32* @GV, align 4
-  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
-  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
-  br i1 %exitcond, label %return, label %bb
-
-return:                                           ; preds = %bb, %entry
-  ret void
-}
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index 7a183b0f9e26..780e5fac02b9 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -march=thumb | grep ble | count 1
 ; RUN: llc < %s -march=thumb | grep bls | count 1
 ; RUN: llc < %s -march=thumb | grep bhi | count 1
-; RUN: llc < %s -march=thumb | grep __ltdf2
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __ltdf2
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index f26c6d114b8e..550b3efae998 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -5,8 +5,13 @@
 define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
 entry:
 ; CHECK: __gcov_execlp:
-; CHECK: mov sp, r7
-; CHECK: sub sp, #4
+; CHECK: sub sp, #8
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+; CHECK-NOT: mov sp, r7
+; CHECK: add sp, #8
 	call void @__gcov_flush() nounwind
 	br i1 undef, label %bb5, label %bb
 
diff --git a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
new file mode 100644
index 000000000000..5cfc68d09408
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %0 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
new file mode 100644
index 000000000000..06a152d56e4d
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare i32 @getchar() nounwind
+
+define internal i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index 7ee19863de19..458569ec93b5 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=source | FileCheck -check-prefix=SOURCE %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=list-hybrid | FileCheck -check-prefix=HYBRID %s
 ; Radar 7459078
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 
@@ -10,9 +13,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 %s5 = type { i32 }
 
 ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
-; CHECK: InlineAsm End
-; CHECK: cmp
-; CHECK: beq
+; SOURCE: InlineAsm End
+; SOURCE: cmp
+; SOURCE: beq
+; HYBRID: InlineAsm End
+; HYBRID: cbz
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 26750065af3f..9ed6a01255f8 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,8 +23,9 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64 [[LDR:d.]]
-; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]]
+; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: LPC0_0:
+; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
 ; CHECK: vmov.f64 [[LDR]]
   %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
   %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index c5fc5098cd46..f91e1c9febe2 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
 ; rdar://8115404
 ; Tail merging must not split an IT block.
 
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index abcf13a3e38f..41f7f299555d 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -5,6 +5,10 @@
 define internal fastcc i32 @Callee(i32 %i) nounwind {
 entry:
 ; CHECK: Callee:
+; CHECK: push
+; CHECK: mov r4, sp
+; CHECK: sub.w r12, r4, #1000
+; CHECK: mov sp, r12
   %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
   br i1 %0, label %bb2, label %bb
 
@@ -17,9 +21,11 @@ bb:                                               ; preds = %entry
   ret i32 %4
 
 bb2:                                              ; preds = %entry
-; Must restore sp from fp here
-; CHECK: mov sp, r7
-; CHECK: sub sp, #8
+; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid
+; state though. rdar://8465407
+; CHECK-NOT: mov sp, r7
+; CHECK: sub.w r4, r7, #8
+; CHECK: mov sp, r4
 ; CHECK: pop
   ret i32 0
 }
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
new file mode 100644
index 000000000000..313728c1b56a
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -0,0 +1,34 @@
+; rdar://8465407
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%struct.buf = type opaque
+
+declare void @bar() nounwind optsize
+
+define void @foo() nounwind optsize {
+; CHECK: foo:
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: sub sp, #4
+entry:
+  %m.i = alloca %struct.buf*, align 4
+  br label %bb
+
+bb:
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  call void @bar() nounwind optsize
+  br i1 undef, label %bb, label %bb3
+
+bb3:
+  br i1 undef, label %return, label %bb
+
+return:
+; CHECK: %return
+; 'mov sp, r7' would have left sp in an invalid state
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub, sp, #4
+; CHECK: add sp, #4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll b/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
new file mode 100644
index 000000000000..5b91a5f65aee
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8724703: Make sure that a t2ADDrSPi instruction with SP as the
+; destination register is narrowed to tADDspi instead of tADDrSPi.
+
+define void @test() nounwind {
+entry:
+; CHECK: sub.w
+; CHECK: add.w
+  %Buffer.i = alloca [512 x i8], align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 22473bb35a0a..6fb2fc888d9f 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -38,3 +38,14 @@ entry:
   %or = or i32 %and2, %and                        ; <i32> [#uses=1]
   ret i32 %or
 }
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw r1, #3137
+; CHECK: bfi r1, r0, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll
new file mode 100644
index 000000000000..01ef472d3104
--- /dev/null
+++ b/test/CodeGen/Thumb2/buildvector-crash.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; Formerly crashed, 3573915.
+
+define void @RotateStarsFP_Vec() nounwind {
+bb.nph372:
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb.nph372
+  %0 = fadd <4 x float> undef, <float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000>
+  %1 = fmul <4 x float> %0, undef
+  %2 = fmul <4 x float> %1, undef
+  %3 = fadd <4 x float> undef, %2
+  store <4 x float> %3, <4 x float>* undef, align 4
+  br label %bb8
+; CHECK: RotateStarsFP_Vec:
+; CHECK: vldmia
+}
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index f7ec5a3b577c..d06f8a7beeb0 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -19,6 +19,6 @@ entry:
   %0 = fmul double %a, %b
 ; CORTEXM3: blx ___muldf3
 ; CORTEXM4: blx ___muldf3
-; CORTEXA8: vmul.f64  d0, d1, d0
+; CORTEXA8: vmul.f64  d16, d17, d16
   ret double %0
 }
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 583f4057bcd9..b8c8cb122a19 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,15 +1,20 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
 
 define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
+; CHECK: fht:
 entry:
   br label %bb5
 
 bb5:                                              ; preds = %bb5, %entry
+; CHECK: %bb5
+; CHECK: bne
   br i1 undef, label %bb5, label %bb.nph
 
 bb.nph:                                           ; preds = %bb5
   br label %bb7
 
+; Loop preheader
+; CHECK: vmov.f32
 bb7:                                              ; preds = %bb9, %bb.nph
   %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
   %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
@@ -19,6 +24,9 @@ bb7:                                              ; preds = %bb9, %bb.nph
   br label %bb8
 
 bb8:                                              ; preds = %bb8, %bb7
+; CHECK: %bb8
+; CHECK-NOT: vmov.f32
+; CHECK: blt
   %tmp54 = add i32 0, %tmp53                      ; <i32> [#uses=0]
   %fi.1 = getelementptr float* %fz, i32 undef     ; <float*> [#uses=2]
   %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index e63a115273ff..2c00c70c0db6 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 \
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
 ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 97295341858c..68b5d1cc94fb 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -27,7 +27,7 @@ define i32 @test3() {
 ; DARWIN: sub.w sp, sp, #805306368
 ; DARWIN: sub sp, #20
 ; LINUX: test3:
-; LINUX: stmdb   sp!, {r4, r7, r11, lr}
+; LINUX: push.w {r4, r7, r11, lr}
 ; LINUX: sub.w sp, sp, #805306368
 ; LINUX: sub sp, #16
     %retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
deleted file mode 100644
index 46e053ca4ea5..000000000000
--- a/test/CodeGen/Thumb2/load-global.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
-
-@G = external global i32
-
-define i32 @test1() {
-; STATIC: _test1:
-; STATIC: .long _G
-
-; DYNAMIC: _test1:
-; DYNAMIC: .long L_G$non_lazy_ptr
-
-; PIC: _test1
-; PIC: add r0, pc
-; PIC: .long L_G$non_lazy_ptr-(LPC0_0+4)
-
-; LINUX: test1
-; LINUX: .long G(GOT)
-	%tmp = load i32* @G
-	ret i32 %tmp
-}
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll
deleted file mode 100644
index fde2ee0ab0c9..000000000000
--- a/test/CodeGen/Thumb2/machine-licm-vdup.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim                -arm-vdup-splat | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s 
-; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
-; Eventually this should become the default and be moved into machine-licm.ll.
-; FIXME: the vdup should be hoisted out of the loop, 8248029.
-
-define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
-entry:
-; CHECK: t2:
-; CHECK: mov.w r3, #1065353216
-  br i1 undef, label %bb1, label %bb2
-
-bb1:
-; CHECK-NEXT: %bb1
-; CHECK: vdup.32 q1, r3
-  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
-  %tmp1 = shl i32 %indvar, 2
-  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
-  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
-  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
-  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
-  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
-  %indvar.next = add i32 %indvar, 1
-  %cond = icmp eq i32 %indvar.next, 10
-  br i1 %cond, label %bb2, label %bb1
-
-bb2:
-  ret void
-}
-
-; CHECK-NOT: LCPI1_0:
-; CHECK: .subsections_via_symbols
-
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index b949b2f30506..5e776dd8937c 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -3,9 +3,6 @@
 ; rdar://7353541
 ; rdar://7354376
 
-; The generated code is no where near ideal. It's not recognizing the two
-; constantpool entries being loaded can be merged into one.
-
 @GV = external global i32                         ; <i32*> [#uses=2]
 
 define void @t1(i32* nocapture %vals, i32 %c) nounwind {
@@ -17,21 +14,21 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; CHECK: BB#1
-; CHECK: ldr.n r2, LCPI0_0
+; CHECK: movw r2, :lower16:L_GV$non_lazy_ptr
+; CHECK: movt r2, :upper16:L_GV$non_lazy_ptr
 ; CHECK: ldr r2, [r2]
 ; CHECK: ldr r3, [r2]
 ; CHECK: LBB0_2
-; CHECK: LCPI0_0:
-; CHECK-NOT: LCPI0_1:
+; CHECK-NOT: LCPI0_0:
 
 ; PIC: BB#1
-; PIC: ldr.n r2, LCPI0_0
+; PIC: movw r2, :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: movt r2, :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
 ; PIC: add r2, pc
 ; PIC: ldr r2, [r2]
 ; PIC: ldr r3, [r2]
 ; PIC: LBB0_2
-; PIC: LCPI0_0:
-; PIC-NOT: LCPI0_1:
+; PIC-NOT: LCPI0_0:
 ; PIC: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
@@ -55,8 +52,8 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: adr r{{.}}, #LCPI1_0
-; CHECK: vldmia r3, {d0, d1}
+; CHECK: mov.w r3, #1065353216
+; CHECK: vdup.32 q{{.*}}, r3
   br i1 undef, label %bb1, label %bb2
 
 bb1:
@@ -76,11 +73,50 @@ bb2:
   ret void
 }
 
-; CHECK: LCPI1_0:
-; CHECK: .section
+; CHECK-NOT: LCPI1_0:
 
 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
 
 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; rdar://8241368
+; isel should not fold immediate into eor's which would have prevented LICM.
+define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
+; CHECK: t3:
+bb.nph:
+; CHECK: bb.nph
+; CHECK: movw {{(r[0-9])|(lr)}}, #32768
+; CHECK: movs {{(r[0-9])|(lr)}}, #8
+; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386
+; CHECK: movw {{(r[0-9])|(lr)}}, #65534
+; CHECK: movt {{(r[0-9])|(lr)}}, #65535
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: bb
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eor.w
+; CHECK-NOT: eor
+; CHECK: and
+  %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
+  %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; <i16> [#uses=3]
+  %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ]     ; <i8> [#uses=1]
+  %0 = trunc i16 %crc_addr.112 to i8              ; <i8> [#uses=1]
+  %1 = xor i8 %data_addr.013, %0                  ; <i8> [#uses=1]
+  %2 = and i8 %1, 1                               ; <i8> [#uses=1]
+  %3 = icmp eq i8 %2, 0                           ; <i1> [#uses=2]
+  %4 = xor i16 %crc_addr.112, 16386               ; <i16> [#uses=1]
+  %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; <i16> [#uses=1]
+  %5 = lshr i16 %crc_addr.0, 1                    ; <i16> [#uses=2]
+  %6 = or i16 %5, -32768                          ; <i16> [#uses=1]
+  %crc_addr.2 = select i1 %3, i16 %5, i16 %6      ; <i16> [#uses=2]
+  %7 = add i8 %i.011, 1                           ; <i8> [#uses=2]
+  %8 = lshr i8 %data_addr.013, 1                  ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %7, 8                    ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb8, label %bb
+
+bb8:                                              ; preds = %bb
+  ret i16 %crc_addr.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
deleted file mode 100644
index 4df06b836fc5..000000000000
--- a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
-
-define void @b(i32 %x) nounwind optsize {
-entry:
-; CHECK: b
-; CHECK: mov r2, sp
-; CHECK: mls r0, r0, r1, r2
-; CHECK: mov sp, r0
-  %0 = mul i32 %x, 24                             ; <i32> [#uses=1]
-  %vla = alloca i8, i32 %0, align 1               ; <i8*> [#uses=1]
-  call arm_aapcscc  void @a(i8* %vla) nounwind optsize
-  ret void
-}
-
-declare void @a(i8*) optsize
diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll
index a54d09e62919..93ae7c428bdf 100644
--- a/test/CodeGen/Thumb2/thumb2-barrier.ll
+++ b/test/CodeGen/Thumb2/thumb2-barrier.ll
@@ -1,17 +1,31 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
 
-declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
 
-define void @t1() {
-; CHECK: t1:
-; CHECK: dsb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+define void @t_st() {
+; CHECK: t_st:
+; CHECK: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
   ret void
 }
 
-define void @t2() {
-; CHECK: t2:
-; CHECK: dmb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+define void @t_sy() {
+; CHECK: t_sy:
+; CHECK: dmb sy
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 true)
+  ret void
+}
+
+define void @t_ishst() {
+; CHECK: t_ishst:
+; CHECK: dmb ishst
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 false)
+  ret void
+}
+
+define void @t_ish() {
+; CHECK: t_ish:
+; CHECK: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
   ret void
 }
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index cc2ef140d113..bcf10eff729b 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -23,7 +23,6 @@ bb52:                                             ; preds = %newFuncRoot
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-; CHECK-NEXT: @ BB#1:
   %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index 22d4e88ed17d..a747d5f75697 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -4,7 +4,7 @@
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;CHECK: ldrd r2, [r2]
+;CHECK: ldrd r2, r3, [r2]
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 1dc3614993bd..adb6dde2c788 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -53,7 +53,7 @@ define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
 
 define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_2:
-;CHECK: add.w   r0, r0, #-1426063360
+;CHECK: add.w   r0, r0, #2868903936
 ;CHECK: add.w   r0, r0, #47616
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
     ret i32 %ret
@@ -61,7 +61,7 @@ define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
 
 define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_3:
-;CHECK: add.w   r0, r0, #-1426019584
+;CHECK: add.w   r0, r0, #2868947712
 ;CHECK: adds    r0, #16
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
     ret i32 %ret
@@ -69,7 +69,7 @@ define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
 
 define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
 ;CHECK: t2_const_var2_2_ok_4:
-;CHECK: add.w   r0, r0, #-1426019584
+;CHECK: add.w   r0, r0, #2868947712
 ;CHECK: add.w   r0, r0, #1048592
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
     ret i32 %ret
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index b1515b514820..8d1de55b4dc6 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -6,3 +6,21 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
+
+%struct.CMPoint = type { %struct.Point, float, float, [5 x float] }
+%struct.Point = type { float, float }
+
+define %struct.CMPoint* @t1(i32 %i, i32 %j, i32 %n, %struct.CMPoint* %thePoints) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: mla     r0, r2, r0, r1
+; CHECK: add.w   r0, r0, r0, lsl #3
+; CHECL: add.w   r0, r3, r0, lsl #2
+  %mul = mul i32 %n, %i
+  %add = add i32 %mul, %j
+  %0 = ptrtoint %struct.CMPoint* %thePoints to i32
+  %mul5 = mul i32 %add, 36
+  %add6 = add i32 %mul5, %0
+  %1 = inttoptr i32 %add6 to %struct.CMPoint*
+  ret %struct.CMPoint* %1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index 56cb1f6fb409..ceefabbbfa21 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -2,8 +2,8 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
-; CHECK: sub.w r0, r1, #-2147483648
-; CHECK: subs r0, #1
+; CHECK: mvn r0, #-2147483648
+; CHECK: add r0, r1
 ; CHECK: cmp r2, #10
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 4f92c9333806..d9a0617f5a46 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -15,11 +15,34 @@ define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
 ; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
 entry:
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind 
   store float 6.300000e+01, float* undef, align 4
   %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
   %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
@@ -44,7 +67,16 @@ bb4:                                              ; preds = %bb193, %entry
   %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
   %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
-  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
   %tmp = extractelement <4 x i1> %22, i32 0
   br i1 %tmp, label %bb193, label %bb186
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index c39b82a1fe36..a662dd58df57 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s
+; There are no MMX instructions here.  We use add+adcl for the adds.
 
 define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
 entry:
@@ -7,9 +8,8 @@ entry:
 
 bb26:		; preds = %bb26, %entry
 
-; CHECK:  movq	({{.*}},8), %mm
-; CHECK:  paddq	({{.*}},8), %mm
-; CHECK:  paddq	%mm{{[0-7]}}, %mm
+; CHECK:  addl  %e
+; CHECK:  adcl  %e
 
 	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
 	%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
@@ -27,3 +27,38 @@ bb31:		; preds = %bb26, %entry
 	%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
 	ret <1 x i64> %sum.035.1
 }
+
+
+; This is the original test converted to use MMX intrinsics.
+
+define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {
+entry:
+        %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx
+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]
+	br i1 %tmp2942, label %bb31, label %bb26
+
+bb26:		; preds = %bb26, %entry
+
+; CHECK:  movq	({{.*}},8), %mm
+; CHECK:  paddq	({{.*}},8), %mm
+; CHECK:  paddq	%mm{{[0-7]}}, %mm
+
+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
+	%sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]
+	%tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0		; <x86_mmx*> [#uses=1]
+	%tmp14 = load x86_mmx* %tmp13		; <x86_mmx> [#uses=1]
+	%tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0		; <x86_mmx*> [#uses=1]
+	%tmp19 = load x86_mmx* %tmp18		; <x86_mmx> [#uses=1]
+	%tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14)		; <x86_mmx> [#uses=1]
+	%tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0)		; <x86_mmx> [#uses=2]
+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]
+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb26, label %bb31
+
+bb31:		; preds = %bb26, %entry
+	%sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]
+        %t = bitcast x86_mmx %sum.035.1 to <1 x i64>
+	ret <1 x i64> %t
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 2093b8f68744..006cf2e43a2f 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -5,10 +5,10 @@ target triple = "i686-apple-darwin8"
 
 define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {
 entry:
-	%tmp4 = bitcast <1 x i64> %mask1 to <8 x i8>		; <<8 x i8>> [#uses=1]
-	%tmp6 = bitcast <1 x i64> %c64 to <8 x i8>		; <<8 x i8>> [#uses=1]
-	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> %tmp6, <8 x i8> %tmp4, i8* %P )
+	%tmp4 = bitcast <1 x i64> %mask1 to x86_mmx		; <x86_mmx> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %c64 to x86_mmx		; <x86_mmx> [#uses=1]
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, i8* %P )
 	ret void
 }
 
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index 6128d8b92d11..660d4fe7b19e 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,17 +1,16 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
-@R = external global <1 x i64>          ; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx          ; <x86_mmx*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
 entry:
-        %tmp4 = bitcast <1 x i64> %B to <4 x i16>               ; <<4 x i16>> [#uses=1]
-        %tmp6 = bitcast <1 x i64> %A to <4 x i16>               ; <<4 x i16>> [#uses=1]
-        %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 )   ; <<4 x i16>> [#uses=1]
-        %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64>            ; <<1 x i64>> [#uses=1]
-        store <1 x i64> %tmp8, <1 x i64>* @R
+        %tmp2 = bitcast <1 x i64> %A to x86_mmx
+        %tmp3 = bitcast <1 x i64> %B to x86_mmx
+        %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 )   ; <x86_mmx> [#uses=1]
+        store x86_mmx %tmp7, x86_mmx* @R
         tail call void @llvm.x86.mmx.emms( )
         ret void
 }
 
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
 
 declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 2c513f17811a..1c5e6766fd6e 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -2,19 +2,17 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
 
-@R = external global <1 x i64>		; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx		; <x86_mmx*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
 entry:
-	%tmp4 = bitcast <1 x i64> %B to <4 x i16>		; <<4 x i16>> [#uses=1]
-	%tmp6 = bitcast <1 x i64> %A to <4 x i16>		; <<4 x i16>> [#uses=1]
-	%tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 )		; <<4 x i16>> [#uses=1]
-	%tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	store <1 x i64> %tmp8, <1 x i64>* @R
+	%tmp4 = bitcast <1 x i64> %B to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %A to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 )		; <x86_mmx> [#uses=1]
+	store x86_mmx %tmp7, x86_mmx* @R
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
-
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
 declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
deleted file mode 100644
index 3f9845c3c3ec..000000000000
--- a/test/CodeGen/X86/2007-10-16-fp80_select.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86
-; ModuleID = 'bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
-        %struct.wxPoint2DInt = type { i32, i32 }
-
-define x86_fp80 @_ZNK12wxPoint2DInt14GetVectorAngleEv(%struct.wxPoint2DInt* %this) {
-entry:
-        br i1 false, label %cond_true, label %UnifiedReturnBlock
-
-cond_true:              ; preds = %entry
-        %tmp8 = load i32* null, align 4         ; <i32> [#uses=1]
-        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
-        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000           ; <x86_fp80> [#uses=1]
-        ret x86_fp80 %retval
-
-UnifiedReturnBlock:             ; preds = %entry
-        ret x86_fp80 0xK4005B400000000000000
-}
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 7463a0eebf34..bdacf5071128 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
 ; PR1909
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index dc8c097efc50..5089e8c5b69d 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -5,15 +5,15 @@ entry:
 	tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind 
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	%tmp1 = tail call <2 x i32> asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <<2 x i32>> [#uses=1]
+	%tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <x86_mmx> [#uses=1]
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef ) nounwind 		; <i32> [#uses=1]
+	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind 		; <i32> [#uses=1]
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef, i32 undef, i32 %tmp3 ) nounwind 
+	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind 
 	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
 	tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
-	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> %tmp1 ) nounwind 		; <i32> [#uses=0]
+	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind 		; <i32> [#uses=0]
 	ret i32 undef
 }
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 500cd1f08cfa..86652826aeac 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
 @atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index 98919ee5221a..cf04dcf0f18c 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 75
 ; PR2539
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
 
 external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
 external global <4 x float>, align 1		; <<4 x float>*>:1 [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index c76dd7de1256..53402c04511c 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -17,11 +17,13 @@ entry:
 	br i1 false, label %bb.nph144.split, label %bb133
 
 bb.nph144.split:		; preds = %entry
-	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> zeroinitializer, i8* null ) nounwind
+        %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx
+        %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, i8* null ) nounwind
 	unreachable
 
 bb133:		; preds = %entry
 	ret void
 }
 
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*) nounwind
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 60be0d51e7e7..2dc1deaf1738 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,6 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
-; PR2687
+; originally from PR2687, but things don't work that way any more.
+; there are no MMX instructions here; we use XMM.
 
 define <2 x double> @a(<2 x i32> %x) nounwind {
 entry:
@@ -13,3 +16,20 @@ entry:
   %y = fptosi <2 x double> %x to <2 x i32>
   ret <2 x i32> %y
 }
+
+; This is how to get MMX instructions.
+
+define <2 x double> @a2(x86_mmx %x) nounwind {
+entry:
+  %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
+  ret <2 x double> %y
+}
+
+define x86_mmx @b2(<2 x double> %x) nounwind {
+entry:
+  %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
+  ret x86_mmx %y
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>)
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 3c64fe45c997..86e50c98bfdb 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -15,14 +15,16 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
-@x = common global i32 0		; <i32*> [#uses=1]
+@x = common global i32 0
 
 define i32 @aci(i32* %pw) nounwind {
 entry:
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
-	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=0]
-	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=1]
-	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0		; <i32> [#uses=1]
-	%1 = add i32 %asmresult3, %0		; <i32> [#uses=1]
-	ret i32 %1
+	%0 = load i32* @x, align 4
+	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
+	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0
+	%1 = add i32 %asmresult2, %asmresult3
+	%2 = add i32 %0, %1
+	ret i32 %2
 }
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index afeb358da572..9d144a4be0e9 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
 
 define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
 entry:
 	br label %bb
 
@@ -11,6 +14,11 @@ bb:		; preds = %bb, %entry
 	%1 = icmp sgt i32 %0, 2		; <i1> [#uses=1]
 	br i1 %1, label %bb30.loopexit, label %bb
 
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: .align
+; CHECK-NEXT: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
 	%2 = load i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index 784bc72f42e9..3d0766cde845 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -1,8 +1,8 @@
 ; Linux doesn't support stack realignment for functions with allocas (PR2888).
 ; Until it does, we shouldn't use movaps to access the stack.  On targets with
 ; sufficiently aligned stack (e.g. darwin) we should.
-
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
 
 
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
deleted file mode 100644
index 2e114ab5ae88..000000000000
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define zeroext i16 @a(i16 zeroext %x) nounwind {
-entry:
-	%div = udiv i16 %x, 33		; <i32> [#uses=1]
-	ret i16 %div
-}
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
deleted file mode 100644
index 7c811afa51d3..000000000000
--- a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define signext i16 @a(i16 signext %x) nounwind {
-entry:
-	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
-	ret i16 %div
-}
diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
index 9c71469b5b20..4feb764bec6b 100644
--- a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-legalize-types-checking
 
 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
 
diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
index 8684f4a19ca4..8b3094be4b06 100644
--- a/test/CodeGen/X86/2009-01-27-NullStrings.ll
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
 ; CHECK: .section __TEXT,__cstring,cstring_literals
 
-@x = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@x = internal unnamed_addr constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
 
 @y = global [1 x i8]* @x
 
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index bb01e5afceff..0b5b7bdd94d7 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 757042e5be42..dd8823574cde 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
-; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
+; RUN: grep {leaq.*TLSGD} %t2
+; RUN; grep {__tls_get_addr} %t2
 ; PR4004
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c5982285afe0..98b1e0ed2f42 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
 
 define x86_fp80 @a(i64 %x) nounwind readnone {
 entry:
-	%conv = sitofp i64 %x to x86_fp80		; <x86_fp80> [#uses=1]
-	ret x86_fp80 %conv
+        %conv = sitofp i64 %x to x86_fp80               ; <x86_fp80> [#uses=1]
+        ret x86_fp80 %conv
 }
-
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 810a6f4d6c65..12bd28518762 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -o %t1
-; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
-; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
-target triple = "x86_64-pc-mingw64"
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK: subq    $40, %rsp
+; CHECK: movaps  %xmm8, (%rsp)
+; CHECK: movaps  %xmm7, 16(%rsp)
 
 define i32 @a() nounwind {
 entry:
-	tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
-	ret i32 undef
+        tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+        ret i32 undef
 }
-
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 336f17e2a325..01852a6eca1f 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | not grep movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
index eb9378b9527b..366985678e54 100644
--- a/test/CodeGen/X86/2009-07-07-SplitICmp.ll
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86
 
 define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
        %D = icmp sgt <2 x i32> %A, %B
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
index b9b09a3f0004..288eef4f6991 100644
--- a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -1,10 +1,12 @@
 ; RUN: llc < %s -march=x86-64
 ; PR4669
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
 
 define <1 x i64> @test(i64 %t) {
 entry:
 	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
-	%t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
-	ret <1 x i64> %t2
+        %t0 = bitcast <1 x i64> %t1 to x86_mmx
+	%t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+        %t3 = bitcast x86_mmx %t2 to <1 x i64>
+	ret <1 x i64> %t3
 }
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
index de32c2159ce1..f9b5f9e0b1fd 100644
--- a/test/CodeGen/X86/2009-08-06-inlineasm.ll
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=i386-pc-linux-gnu < %s
+; RUN: false
+; XRUN: llc -mtriple=i386-pc-linux-gnu < %s
 ; PR4668
 ; XFAIL: *
 ; FIXME: If the coalescer happens to coalesce %level.1 with the copy to EAX
 ; (for ret) then this will fail to compile. The fundamental problem is
 ; once the coalescer fixes a virtual register to physical register we can't
-; evict it.
+; evict it. This started passing again due to the changes for PR8969
+; so I've disabled it with a bigger stick.
 
 define i32 @x(i32 %qscale) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
index f9ca861c558a..adac20336048 100644
--- a/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -2,9 +2,9 @@
 
 ; This test shouldn't require spills.
 
-; CHECK: subq  $8, %rsp
+; CHECK: pushq
 ; CHECK-NOT: $rsp
-; CHECK: addq  $8, %rsp
+; CHECK: popq
 
 	%struct..0anon = type { i32 }
 	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
index f7ba661c4f75..823e0ca465ef 100644
--- a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -21,7 +21,7 @@ define void @leaf() nounwind {
 ; CHECK: leaf:
 ; CHECK-NOT: -8(%rsp)
 ; CHECK: leaq link_ptr@TLSGD
-; CHECK: call __tls_get_addr@PLT
+; CHECK: callq __tls_get_addr@PLT
 "file foo2.c, line 14, bb1":
   %p = alloca %test*, align 8                     ; <%test**> [#uses=4]
   br label %"file foo2.c, line 14, bb2"
diff --git a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
index 76cc1a497d3d..42f19b3ad86a 100644
--- a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
@@ -2,33 +2,27 @@
 ; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 4cd3be35e820..fa3d5fbcdc48 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+; There are no MMX operations here, so we use XMM or i64.
 
 define void @ti8(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <8 x i8>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <8 x i8>
-; CHECK: movdq2q
         %tmp3 = add <8 x i8> %tmp1, %tmp2
+; CHECK:  paddb %xmm1, %xmm0
         store <8 x i8> %tmp3, <8 x i8>* null
         ret void
 }
@@ -14,10 +14,9 @@ entry:
 define void @ti16(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <4 x i16>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <4 x i16>
-; CHECK: movdq2q
         %tmp3 = add <4 x i16> %tmp1, %tmp2
+; CHECK:  paddw %xmm1, %xmm0
         store <4 x i16> %tmp3, <4 x i16>* null
         ret void
 }
@@ -25,10 +24,9 @@ entry:
 define void @ti32(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <2 x i32>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <2 x i32>
-; CHECK: movdq2q
         %tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK:  paddd %xmm1, %xmm0
         store <2 x i32> %tmp3, <2 x i32>* null
         ret void
 }
@@ -36,10 +34,60 @@ entry:
 define void @ti64(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <1 x i64>
-; CHECK: movdq2q
         %tmp2 = bitcast double %b to <1 x i64>
-; CHECK: movdq2q
         %tmp3 = add <1 x i64> %tmp1, %tmp2
+; CHECK:  addq  %rax, %rcx
         store <1 x i64> %tmp3, <1 x i64>* null
         ret void
 }
+
+; MMX intrinsics calls get us MMX instructions.
+
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+ 
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index e20f1d8c79ce..3738f802e95a 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -11,7 +11,7 @@ target triple = "i386-apple-darwin10.0.0"
 ; Verify that %esi gets spilled before the call.
 ; CHECK: Z4test1SiS
 ; CHECK: movl %esi,{{.*}}(%ebp) 
-; CHECK: call __Z6throwsv
+; CHECK: calll __Z6throwsv
 
 define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
 entry:
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index d2115496f8f4..f9bda7f1007e 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12
-; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
+; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
+; Test to check .debug_loc support. This test case emits many debug_loc entries.
+
+; CHECK: Loc expr size
+; CHECK-NEXT: DW_OP_reg
 
 %0 = type { double }
 
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 13f72a99d2b2..60171eb62973 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -61,6 +61,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK-NEXT: .short  1
 ; CHECK-NEXT: .byte   85
 ; CHECK-NEXT: .quad   Ltmp3
-; CHECK-NEXT: .quad   Lfunc_end
+; CHECK-NEXT: .quad   Ltmp6
 ; CHECK-NEXT: .short  1
 ; CHECK-NEXT: .byte   83
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 80643d0792ac..ad8546ef8ce8 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -39,6 +39,6 @@ entry:
 !13 = metadata !{i32 7, i32 0, metadata !14, null}
 !14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
 
-;CHECK:	        DEBUG_VALUE: bar:x <- EBX+0
-;CHECK-NEXT:Ltmp
-;CHECK-NEXT:	DEBUG_VALUE: foo:y <- 1+0
+;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: Ltmp
+;CHECK:	DEBUG_VALUE: foo:y <- 1+0
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index c6421a247eaa..6db3ce1f42c0 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
 ; <rdar://problem/8124405>
 
 %struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
@@ -21,9 +21,9 @@ bb:
 ; statement. It can be an ADD or LEA instruction, it's not important which one
 ; it is.
 ;
-;      CHECK: ## %bb
-; CHECK-NEXT: addq $64036, %rdi
-;      CHECK: rep;stosl
+; CHECK: # %bb
+; CHECK: addq $64036, %rdi
+; CHECK: rep;stosl
 
   %tmp5 = bitcast i32* %tmp4 to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
diff --git a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
index cb47d208dd44..0bbb24f6ecdf 100644
--- a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
+++ b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
@@ -3,7 +3,7 @@
 define void @foo() nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: subq $8, %rsp
+; CHECK: pushq
 ; CHECK: int $3
   call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
   call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
@@ -18,7 +18,7 @@ return:                                           ; preds = %entry
 define void @bar() nounwind ssp {
 entry:
 ; CHECK: bar
-; CHECK-NOT: subq $8, %rsp
+; CHECK-NOT: pushq
 ; CHECK: int $3
   call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
   call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
new file mode 100644
index 000000000000..bed8c8a77b9a
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin10 < %s - | FileCheck %s
+; Radar 8286101
+; CHECK: .file   2 "<stdin>"
+
+define i32 @foo() nounwind ssp {
+entry:
+  ret i32 42, !dbg !8
+}
+
+define i32 @bar() nounwind ssp {
+entry:
+  ret i32 21, !dbg !10
+}
+
+!llvm.dbg.sp = !{!0, !6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 53, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 4, i32 13, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-16-asmcrash.ll b/test/CodeGen/X86/2010-09-16-asmcrash.ll
new file mode 100644
index 000000000000..9bbd6919421f
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd8.1 -o /dev/null
+; This formerly crashed, PR 8154.
+
+module asm ".weak sem_close"
+module asm ".equ sem_close, _sem_close"
+module asm ".weak sem_destroy"
+module asm ".equ sem_destroy, _sem_destroy"
+module asm ".weak sem_getvalue"
+module asm ".equ sem_getvalue, _sem_getvalue"
+module asm ".weak sem_init"
+module asm ".equ sem_init, _sem_init"
+module asm ".weak sem_open"
+module asm ".equ sem_open, _sem_open"
+module asm ".weak sem_post"
+module asm ".equ sem_post, _sem_post"
+module asm ".weak sem_timedwait"
+module asm ".equ sem_timedwait, _sem_timedwait"
+module asm ".weak sem_trywait"
+module asm ".equ sem_trywait, _sem_trywait"
+module asm ".weak sem_unlink"
+module asm ".equ sem_unlink, _sem_unlink"
+module asm ".weak sem_wait"
+module asm ".equ sem_wait, _sem_wait"
+
+%struct._sem = type { i32, %struct._usem }
+%struct._usem = type { i32, i32, i32 }
+
+define void @_sem_timedwait(%struct._sem* noalias %sem) nounwind ssp {
+entry:
+  br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
+
+while.cond.preheader:                             ; preds = %entry
+  %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+  br label %while.cond
+
+sem_check_validity.exit:                          ; preds = %entry
+  ret void
+
+while.cond:                                       ; preds = %while.body, %while.cond.preheader
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgl $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_int", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %tmp4, i32 undef, i32 undef, i32* %tmp4) nounwind, !srcloc !0
+  br i1 undef, label %while.cond, label %return
+
+while.end:                                        ; preds = %while.cond
+  br i1 undef, label %if.end18, label %return
+
+if.end18:                                         ; preds = %while.end
+  unreachable
+
+return:                                           ; preds = %while.end, %while.body
+  ret void
+}
+
+!0 = metadata !{i32 158484}
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
new file mode 100644
index 000000000000..8fe0309421e5
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
+entry:
+  %a = alloca [64 x i8]
+  %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
+  %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
+  %d = load i8* %b, align 8
+  %e = load i8* %c, align 8
+  %f = bitcast [64 x i8]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
+  store i8 %d, i8* %b, align 8
+  store i8 %e, i8* %c, align 8
+  ret i32 0
+}
+
+; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movb	30(%rsp), %dl
+; CHECK: movb	(%rsp), %sil
+; CHECK: movb	%sil, (%rsp)
+; CHECK: movb	%dl, 30(%rsp)
+; CHECK: callq	___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
new file mode 100644
index 000000000000..cae81d086ea1
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -0,0 +1,71 @@
+; RUN: llc -verify-machineinstrs -cgp-critical-edge-splitting=0 -mcpu=i386 < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; The bb.i basic block gets split while emitting the schedule because
+; -mcpu=i386 doesn't have CMOV.'
+;
+; That causes the PHI to be updated wrong because the jumptable data structure is remembering the original MBB.
+;
+; -cgp-critical-edge-splitting=0 prevents the edge to PHI from being split.
+
+@.str146 = external constant [4 x i8], align 1
+@.str706 = external constant [4 x i8], align 1
+@.str1189 = external constant [5 x i8], align 1
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
+bb.i:
+  %0 = icmp eq i8 undef, 0
+  %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+  %1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
+  switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
+    i32 3, label %bb1.i
+    i32 4, label %bb1.i1237
+    i32 5, label %bb1.i1266
+    i32 6, label %bb1.i1275
+    i32 2, label %bb1.i1434
+    i32 8, label %bb1.i1523
+    i32 7, label %bb1.i1537
+  ]
+
+bb1.i:                                            ; preds = %bb.i
+  unreachable
+
+bb1.i1237:                                        ; preds = %bb.i
+  br i1 undef, label %bb.i1820, label %bb1.i1241
+
+bb1.i1241:                                        ; preds = %bb1.i1237
+  unreachable
+
+bb1.i1266:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1275:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1434:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1523:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1537:                                        ; preds = %bb.i
+  unreachable
+
+bb.i1820:                                         ; preds = %bb1.i1237
+  br label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+
+_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i1820, %bb.i
+  %PatchedName.0.0 = phi i8* [ undef, %bb.i1820 ], [ %Name.0, %bb.i ]
+  br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit:         ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+  unreachable
+
+bb141:                                            ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  unreachable
+}
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
new file mode 100644
index 000000000000..40e7f017dc3d
--- /dev/null
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; PR8297
+;
+; On i386, i64 cmpxchg is lowered during legalize types to extract the
+; 64-bit result into a pair of fixed regs. So creation of the DAG node
+; happens in a different place. See
+; X86TargetLowering::ReplaceNodeResults, case ATOMIC_CMP_SWAP.
+;
+; Neither Atomic-xx.ll nor atomic_op.ll cover this. Those tests were
+; autogenerated from C source before 64-bit variants were supported.
+;
+; Note that this case requires a loop around the cmpxchg to force
+; machine licm to query alias anlysis, exposing a bad
+; MachineMemOperand.
+define void @foo(i64* %ptr) nounwind inlinehint {
+entry:
+  br label %loop
+loop:
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+  %r = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %ptr, i64 0, i64 1)
+  %stored1  = icmp eq i64 %r, 0
+  br i1 %stored1, label %loop, label %continue
+continue:
+  ret void
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
new file mode 100644
index 000000000000..79c0cf35c660
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -0,0 +1,35 @@
+; RUN: llc -O2 -asm-verbose < %s | FileCheck %s
+; Radar 8616981
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+%struct.bar = type { i32, i32 }
+
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+; CHECK: TAG_formal_parameter
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+  ret i32 1, !dbg !13
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!12 = metadata !{i32 3, i32 47, metadata !0, null}
+!13 = metadata !{i32 4, i32 2, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
new file mode 100644
index 000000000000..2368f3f69195
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -O0
+; PR8211
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
+
+%0 = type { %"int[]" }
+%float = type float
+%"float[]" = type [4 x float]
+%int = type i32
+%"int[]" = type [4 x i32]
+%"long unsigned int" = type i64
+
+define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
+entry:
+  %a_addr = alloca i8*
+  %b_addr = alloca %0*
+  %c_addr = alloca %0*
+  %"alloca point" = bitcast i32 0 to i32
+  store i8* %a, i8** %a_addr
+  store %0* %b, %0** %b_addr
+  store %0* %c, %0** %c_addr
+  %0 = load i8** %a_addr, align 64
+  %1 = load %0** %b_addr, align 64
+  %2 = load %0** %c_addr, align 64
+  %"ssa point" = bitcast i32 0 to i32
+  br label %"2"
+
+"2":                                              ; preds = %entry
+  %3 = bitcast i8* %0 to <2 x i32>*
+  %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %5 = bitcast %"int[]"* %4 to <4 x float>*
+  %6 = load <4 x float>* %5, align 16
+  %7 = bitcast <2 x i32>* %3 to <2 x float>*
+  %8 = bitcast <2 x float>* %7 to double*
+  %9 = load double* %8
+  %10 = insertelement <2 x double> undef, double %9, i32 0
+  %11 = insertelement <2 x double> %10, double undef, i32 1
+  %12 = bitcast <2 x double> %11 to <4 x float>
+  %13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %15 = bitcast %"int[]"* %14 to <4 x float>*
+  store <4 x float> %13, <4 x float>* %15, align 16
+  %16 = bitcast i8* %0 to <2 x i32>*
+  %17 = bitcast <2 x i32>* %16 to i8*
+  %18 = getelementptr i8* %17, i64 8
+  %19 = bitcast i8* %18 to <2 x i32>*
+  %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %21 = bitcast %"int[]"* %20 to <4 x float>*
+  %22 = load <4 x float>* %21, align 16
+  %23 = bitcast <2 x i32>* %19 to <2 x float>*
+  %24 = bitcast <2 x float>* %23 to double*
+  %25 = load double* %24
+  %26 = insertelement <2 x double> undef, double %25, i32 0
+  %27 = insertelement <2 x double> %26, double undef, i32 1
+  %28 = bitcast <2 x double> %27 to <4 x float>
+  %29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %31 = bitcast %"int[]"* %30 to <4 x float>*
+  store <4 x float> %29, <4 x float>* %31, align 16
+  br label %return
+
+return:                                           ; preds = %"2"
+  ret void
+}
diff --git a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
new file mode 100644
index 000000000000..a1074b6b8f3c
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; Both values were being zero extended.
+@u = external global i8
+@s = external global i8
+define i32 @foo(i1 %cond) {
+; CHECK: @foo
+  %u_base = load i8* @u
+  %u_val = zext i8 %u_base to i32
+; CHECK: movzbl
+; CHECK: movsbl
+  %s_base = load i8* @s
+  %s_val = sext i8 %s_base to i32
+  %val = select i1 %cond, i32 %u_val, i32 %s_val
+  ret i32 %val
+}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
new file mode 100644
index 000000000000..31446786ec15
--- /dev/null
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-dot-loc -mtriple=x86_64-apple-darwin -O0 | FileCheck %s
+
+
+define void @foo() nounwind ssp {
+entry:
+  ret void, !dbg !5
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 1, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+
+; CHECK: .subsections_via_symbols
+; CHECK-NEXT: __debug_line
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll b/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
new file mode 100644
index 000000000000..b9cf65b1e733
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -enable-legalize-types-checking
+; PR8582
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+ %i17 = icmp eq <4 x i8> undef, zeroinitializer
+ %cond = extractelement <4 x i1> %i17, i32 0
+ %_comp = select i1 %cond, i8 0, i8 undef
+ %merge = insertelement <4 x i8> undef, i8 %_comp, i32 0
+ %cond3 = extractelement <4 x i1> %i17, i32 1
+ %_comp4 = select i1 %cond3, i8 0, i8 undef
+ %merge5 = insertelement <4 x i8> %merge, i8 %_comp4, i32 1
+ %cond8 = extractelement <4 x i1> %i17, i32 2
+ %_comp9 = select i1 %cond8, i8 0, i8 undef
+ %m387 = insertelement <4 x i8> %merge5, i8 %_comp9, i32 2
+ store <4 x i8> %m387, <4 x i8>* undef
+ ret void
+}
diff --git a/test/CodeGen/X86/2011-01-10-DagCombineHang.ll b/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
new file mode 100644
index 000000000000..bf438b82edf8
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; This formerly got DagCombine into a loop, PR 8916.
+
+define i32 @foo(i64 %x, i64 %y, i64 %z, i32 %a, i32 %b) {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %t1 = shl i64 %x, 15
+  %t2 = and i64 %t1, 4294934528
+  %t3 = or i64 %t2, %y
+  %t4 = xor i64 %z, %t3
+  %t5 = trunc i64 %t4 to i32
+  %t6 = add i32 %a, %t5
+  %t7 = add i32 %t6, %b
+  ret i32 %t7
+}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
new file mode 100644
index 000000000000..973975b658a3
--- /dev/null
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Check debug info for variable z_s
+;CHECK:       .ascii   "z_s"                  ## DW_AT_name
+;CHECK-NEXT:  .byte   0
+;CHECK-NEXT:  ## DW_AT_decl_file
+;CHECK-NEXT:  ## DW_AT_decl_line
+;CHECK-NEXT:  ## DW_AT_type
+;CHECK-NEXT:  ## DW_AT_location
+
+
+@.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
+@str = internal constant [21 x i8] c"Failing test vector:\00"
+
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !10), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i64 %b}, i64 0, metadata !11), !dbg !19
+  br label %while.body, !dbg !20
+
+while.body:                                       ; preds = %while.body, %entry
+  %b.addr.0 = phi i64 [ %b, %entry ], [ %rem, %while.body ]
+  %a.addr.0 = phi i64 [ %a, %entry ], [ %b.addr.0, %while.body ]
+  %rem = srem i64 %a.addr.0, %b.addr.0, !dbg !21
+  %cmp = icmp eq i64 %rem, 0, !dbg !23
+  br i1 %cmp, label %if.then, label %while.body, !dbg !23
+
+if.then:                                          ; preds = %while.body
+  tail call void @llvm.dbg.value(metadata !{i64 %rem}, i64 0, metadata !12), !dbg !21
+  ret i64 %b.addr.0, !dbg !23
+}
+
+define i32 @main() nounwind optsize ssp {
+entry:
+  %call = tail call i32 @rand() nounwind optsize, !dbg !24
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !14), !dbg !24
+  %cmp = icmp ugt i32 %call, 21, !dbg !25
+  br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
+
+cond.true:                                        ; preds = %entry
+  %call1 = tail call i32 @rand() nounwind optsize, !dbg !25
+  br label %cond.end, !dbg !25
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
+  tail call void @llvm.dbg.value(metadata !{i32 %cond}, i64 0, metadata !17), !dbg !25
+  %conv = sext i32 %cond to i64, !dbg !26
+  %conv5 = zext i32 %call to i64, !dbg !26
+  %call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
+  %cmp7 = icmp eq i64 %call6, 0
+  br i1 %cmp7, label %return, label %if.then, !dbg !26
+
+if.then:                                          ; preds = %cond.end
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8]* @str, i64 0, i64 0))
+  %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+  ret i32 1, !dbg !27
+
+return:                                           ; preds = %cond.end
+  ret i32 0, !dbg !27
+}
+
+declare i32 @rand() optsize
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.gcd = !{!10, !11, !12}
+!llvm.dbg.lv.main = !{!14, !17}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 5, i32 41, metadata !0, null}
+!19 = metadata !{i32 5, i32 49, metadata !0, null}
+!20 = metadata !{i32 7, i32 5, metadata !13, null}
+!21 = metadata !{i32 8, i32 9, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 9, metadata !22, null}
+!24 = metadata !{i32 26, i32 38, metadata !15, null}
+!25 = metadata !{i32 27, i32 38, metadata !15, null}
+!26 = metadata !{i32 28, i32 9, metadata !15, null}
+!27 = metadata !{i32 30, i32 1, metadata !15, null}
diff --git a/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll b/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
new file mode 100644
index 000000000000..cedd6a2a1b8e
--- /dev/null
+++ b/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @foo()
+
+define i32 @bar() nounwind {
+; CHECK: bar
+; CHECK-NOT: pop.*ax
+  %call = call i32 @foo()
+  ret i32 %call
+}
+
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
index 30a1f36850de..912bdc215474 100644
--- a/test/CodeGen/X86/3addr-or.ll
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 ; rdar://7527734
 
-define i32 @test(i32 %x) nounwind readnone ssp {
+define i32 @test1(i32 %x) nounwind readnone ssp {
 entry:
-; CHECK: test:
+; CHECK: test1:
 ; CHECK: leal 3(%rdi), %eax
   %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
   %1 = or i32 %0, 3                               ; <i32> [#uses=1]
@@ -25,3 +25,37 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
   %H = or i64 %G, %E                              ; <i64> [#uses=1]
   ret i64 %H
 }
+
+;; Test that OR is only emitted as LEA, not as ADD.
+
+define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
+entry:
+; No reason to emit an add here, should be an or.
+; CHECK: test3:
+; CHECK: orl $3, %edi
+  %0 = shl i32 %x, 5
+  %1 = or i32 %0, 3
+  store i32 %1, i32* %P
+  ret void
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  ret i32 %or
+; CHECK: test4:
+; CHECK: leal	(%rsi,%rdi), %eax
+}
+
+define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  store i32 %or, i32* %P, align 4
+  ret void
+; CHECK: test5:
+; CHECK: orl
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 23042b6eff3e..5b4d79fa22b9 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
@@ -72,7 +72,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo00:
-; DARWIN-32-PIC: 	call	L0$pb
+; DARWIN-32-PIC: 	calll	L0$pb
 ; DARWIN-32-PIC-NEXT: L0$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb(%eax), %ecx
@@ -144,7 +144,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo00:
-; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC: 	calll	L1$pb
 ; DARWIN-32-PIC-NEXT: L1$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
@@ -208,7 +208,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo01:
-; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC: 	calll	L2$pb
 ; DARWIN-32-PIC-NEXT: L2$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
@@ -268,7 +268,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo01:
-; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC: 	calll	L3$pb
 ; DARWIN-32-PIC-NEXT: L3$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
@@ -342,7 +342,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo02:
-; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC: 	calll	L4$pb
 ; DARWIN-32-PIC-NEXT: L4$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb(%eax), %ecx
@@ -424,7 +424,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _fxo02:
-; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC: 	calll	L5$pb
 ; DARWIN-32-PIC-NEXT: L5$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
@@ -497,7 +497,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo03:
-; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC: 	calll	L6$pb
 ; DARWIN-32-PIC-NEXT: L6$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb(%eax), %ecx
@@ -551,7 +551,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo04:
-; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC: 	calll	L7$pb
 ; DARWIN-32-PIC-NEXT: L7$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb(%eax), %ecx
@@ -619,7 +619,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo05:
-; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC: 	calll	L8$pb
 ; DARWIN-32-PIC-NEXT: L8$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb(%eax), %ecx
@@ -682,7 +682,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo06:
-; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC: 	calll	L9$pb
 ; DARWIN-32-PIC-NEXT: L9$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb(%eax), %ecx
@@ -735,7 +735,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo07:
-; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC: 	calll	L10$pb
 ; DARWIN-32-PIC-NEXT: L10$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb(%eax), %ecx
@@ -801,7 +801,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _foo08:
-; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC: 	calll	L11$pb
 ; DARWIN-32-PIC-NEXT: L11$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb(%eax), %ecx
@@ -868,7 +868,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux00:
-; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC: 	calll	L12$pb
 ; DARWIN-32-PIC-NEXT: L12$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb(%eax), %ecx
@@ -939,7 +939,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx00:
-; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC: 	calll	L13$pb
 ; DARWIN-32-PIC-NEXT: L13$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
@@ -1005,7 +1005,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux01:
-; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC: 	calll	L14$pb
 ; DARWIN-32-PIC-NEXT: L14$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
@@ -1071,7 +1071,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx01:
-; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC: 	calll	L15$pb
 ; DARWIN-32-PIC-NEXT: L15$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
@@ -1150,7 +1150,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux02:
-; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC: 	calll	L16$pb
 ; DARWIN-32-PIC-NEXT: L16$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb(%eax), %ecx
@@ -1233,7 +1233,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qxx02:
-; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC: 	calll	L17$pb
 ; DARWIN-32-PIC-NEXT: L17$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
@@ -1306,7 +1306,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux03:
-; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC: 	calll	L18$pb
 ; DARWIN-32-PIC-NEXT: L18$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64(%eax), %ecx
@@ -1361,7 +1361,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux04:
-; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC: 	calll	L19$pb
 ; DARWIN-32-PIC-NEXT: L19$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64(%eax), %ecx
@@ -1430,7 +1430,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux05:
-; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC: 	calll	L20$pb
 ; DARWIN-32-PIC-NEXT: L20$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64(%eax), %ecx
@@ -1493,7 +1493,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux06:
-; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC: 	calll	L21$pb
 ; DARWIN-32-PIC-NEXT: L21$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64(%eax), %ecx
@@ -1546,7 +1546,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux07:
-; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC: 	calll	L22$pb
 ; DARWIN-32-PIC-NEXT: L22$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64(%eax), %ecx
@@ -1613,7 +1613,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _qux08:
-; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC: 	calll	L23$pb
 ; DARWIN-32-PIC-NEXT: L23$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64(%eax), %ecx
@@ -1686,7 +1686,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind00:
-; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC: 	calll	L24$pb
 ; DARWIN-32-PIC-NEXT: L24$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -1764,7 +1764,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd00:
-; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC: 	calll	L25$pb
 ; DARWIN-32-PIC-NEXT: L25$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -1840,7 +1840,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind01:
-; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC: 	calll	L26$pb
 ; DARWIN-32-PIC-NEXT: L26$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -1916,7 +1916,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd01:
-; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC: 	calll	L27$pb
 ; DARWIN-32-PIC-NEXT: L27$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2001,7 +2001,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind02:
-; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC: 	calll	L28$pb
 ; DARWIN-32-PIC-NEXT: L28$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2090,7 +2090,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ixd02:
-; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC: 	calll	L29$pb
 ; DARWIN-32-PIC-NEXT: L29$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2170,7 +2170,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind03:
-; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC: 	calll	L30$pb
 ; DARWIN-32-PIC-NEXT: L30$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2242,7 +2242,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind04:
-; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC: 	calll	L31$pb
 ; DARWIN-32-PIC-NEXT: L31$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2320,7 +2320,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind05:
-; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC: 	calll	L32$pb
 ; DARWIN-32-PIC-NEXT: L32$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2395,7 +2395,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind06:
-; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC: 	calll	L33$pb
 ; DARWIN-32-PIC-NEXT: L33$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2466,7 +2466,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind07:
-; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC: 	calll	L34$pb
 ; DARWIN-32-PIC-NEXT: L34$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2543,7 +2543,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ind08:
-; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC: 	calll	L35$pb
 ; DARWIN-32-PIC-NEXT: L35$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2621,7 +2621,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off00:
-; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC: 	calll	L36$pb
 ; DARWIN-32-PIC-NEXT: L36$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2700,7 +2700,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf00:
-; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC: 	calll	L37$pb
 ; DARWIN-32-PIC-NEXT: L37$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2777,7 +2777,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off01:
-; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC: 	calll	L38$pb
 ; DARWIN-32-PIC-NEXT: L38$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2854,7 +2854,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf01:
-; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC: 	calll	L39$pb
 ; DARWIN-32-PIC-NEXT: L39$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -2940,7 +2940,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off02:
-; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC: 	calll	L40$pb
 ; DARWIN-32-PIC-NEXT: L40$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3030,7 +3030,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _oxf02:
-; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC: 	calll	L41$pb
 ; DARWIN-32-PIC-NEXT: L41$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3111,7 +3111,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off03:
-; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC: 	calll	L42$pb
 ; DARWIN-32-PIC-NEXT: L42$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3184,7 +3184,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off04:
-; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC: 	calll	L43$pb
 ; DARWIN-32-PIC-NEXT: L43$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3263,7 +3263,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off05:
-; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC: 	calll	L44$pb
 ; DARWIN-32-PIC-NEXT: L44$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3339,7 +3339,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off06:
-; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC: 	calll	L45$pb
 ; DARWIN-32-PIC-NEXT: L45$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3411,7 +3411,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off07:
-; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC: 	calll	L46$pb
 ; DARWIN-32-PIC-NEXT: L46$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3489,7 +3489,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _off08:
-; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC: 	calll	L47$pb
 ; DARWIN-32-PIC-NEXT: L47$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -3560,7 +3560,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo00:
-; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC: 	calll	L48$pb
 ; DARWIN-32-PIC-NEXT: L48$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb(%eax), %ecx
@@ -3626,7 +3626,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo01:
-; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC: 	calll	L49$pb
 ; DARWIN-32-PIC-NEXT: L49$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
@@ -3705,7 +3705,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo02:
-; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC: 	calll	L50$pb
 ; DARWIN-32-PIC-NEXT: L50$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb(%eax), %ecx
@@ -3778,7 +3778,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo03:
-; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC: 	calll	L51$pb
 ; DARWIN-32-PIC-NEXT: L51$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144(%eax), %ecx
@@ -3833,7 +3833,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo04:
-; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC: 	calll	L52$pb
 ; DARWIN-32-PIC-NEXT: L52$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144(%eax), %ecx
@@ -3902,7 +3902,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo05:
-; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC: 	calll	L53$pb
 ; DARWIN-32-PIC-NEXT: L53$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144(%eax), %ecx
@@ -3965,7 +3965,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo06:
-; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC: 	calll	L54$pb
 ; DARWIN-32-PIC-NEXT: L54$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144(%eax), %ecx
@@ -4018,7 +4018,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo07:
-; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC: 	calll	L55$pb
 ; DARWIN-32-PIC-NEXT: L55$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144(%eax), %ecx
@@ -4085,7 +4085,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _moo08:
-; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC: 	calll	L56$pb
 ; DARWIN-32-PIC-NEXT: L56$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144(%eax), %ecx
@@ -4159,7 +4159,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big00:
-; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC: 	calll	L57$pb
 ; DARWIN-32-PIC-NEXT: L57$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4236,7 +4236,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big01:
-; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC: 	calll	L58$pb
 ; DARWIN-32-PIC-NEXT: L58$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4322,7 +4322,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big02:
-; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC: 	calll	L59$pb
 ; DARWIN-32-PIC-NEXT: L59$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4403,7 +4403,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big03:
-; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC: 	calll	L60$pb
 ; DARWIN-32-PIC-NEXT: L60$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4476,7 +4476,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big04:
-; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC: 	calll	L61$pb
 ; DARWIN-32-PIC-NEXT: L61$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4555,7 +4555,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big05:
-; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC: 	calll	L62$pb
 ; DARWIN-32-PIC-NEXT: L62$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4631,7 +4631,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big06:
-; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC: 	calll	L63$pb
 ; DARWIN-32-PIC-NEXT: L63$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4703,7 +4703,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big07:
-; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC: 	calll	L64$pb
 ; DARWIN-32-PIC-NEXT: L64$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4781,7 +4781,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _big08:
-; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC: 	calll	L65$pb
 ; DARWIN-32-PIC-NEXT: L65$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -4840,7 +4840,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar00:
-; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC: 	calll	L66$pb
 ; DARWIN-32-PIC-NEXT: L66$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb(%eax), %eax
@@ -4887,7 +4887,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxr00:
-; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC: 	calll	L67$pb
 ; DARWIN-32-PIC-NEXT: L67$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
@@ -4934,7 +4934,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar01:
-; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC: 	calll	L68$pb
 ; DARWIN-32-PIC-NEXT: L68$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb(%eax), %eax
@@ -4981,7 +4981,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxr01:
-; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC: 	calll	L69$pb
 ; DARWIN-32-PIC-NEXT: L69$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
@@ -5028,7 +5028,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar02:
-; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC: 	calll	L70$pb
 ; DARWIN-32-PIC-NEXT: L70$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
@@ -5075,7 +5075,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar03:
-; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC: 	calll	L71$pb
 ; DARWIN-32-PIC-NEXT: L71$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb(%eax), %eax
@@ -5122,7 +5122,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar04:
-; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC: 	calll	L72$pb
 ; DARWIN-32-PIC-NEXT: L72$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb(%eax), %eax
@@ -5169,7 +5169,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar05:
-; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC: 	calll	L73$pb
 ; DARWIN-32-PIC-NEXT: L73$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb(%eax), %eax
@@ -5216,7 +5216,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar06:
-; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC: 	calll	L74$pb
 ; DARWIN-32-PIC-NEXT: L74$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb(%eax), %eax
@@ -5263,7 +5263,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar07:
-; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC: 	calll	L75$pb
 ; DARWIN-32-PIC-NEXT: L75$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb(%eax), %eax
@@ -5310,7 +5310,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bar08:
-; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC: 	calll	L76$pb
 ; DARWIN-32-PIC-NEXT: L76$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb(%eax), %eax
@@ -5357,7 +5357,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har00:
-; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC: 	calll	L77$pb
 ; DARWIN-32-PIC-NEXT: L77$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb(%eax), %eax
@@ -5404,7 +5404,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _hxr00:
-; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC: 	calll	L78$pb
 ; DARWIN-32-PIC-NEXT: L78$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
@@ -5451,7 +5451,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har01:
-; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC: 	calll	L79$pb
 ; DARWIN-32-PIC-NEXT: L79$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb(%eax), %eax
@@ -5498,7 +5498,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _hxr01:
-; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC: 	calll	L80$pb
 ; DARWIN-32-PIC-NEXT: L80$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
@@ -5549,7 +5549,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har02:
-; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC: 	calll	L81$pb
 ; DARWIN-32-PIC-NEXT: L81$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
@@ -5600,7 +5600,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har03:
-; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC: 	calll	L82$pb
 ; DARWIN-32-PIC-NEXT: L82$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb(%eax), %eax
@@ -5647,7 +5647,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har04:
-; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC: 	calll	L83$pb
 ; DARWIN-32-PIC-NEXT: L83$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb(%eax), %eax
@@ -5697,7 +5697,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har05:
-; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC: 	calll	L84$pb
 ; DARWIN-32-PIC-NEXT: L84$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb(%eax), %eax
@@ -5744,7 +5744,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har06:
-; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC: 	calll	L85$pb
 ; DARWIN-32-PIC-NEXT: L85$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb(%eax), %eax
@@ -5791,7 +5791,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har07:
-; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC: 	calll	L86$pb
 ; DARWIN-32-PIC-NEXT: L86$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb(%eax), %eax
@@ -5840,7 +5840,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _har08:
-; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC: 	calll	L87$pb
 ; DARWIN-32-PIC-NEXT: L87$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb(%eax), %eax
@@ -5889,7 +5889,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat00:
-; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC: 	calll	L88$pb
 ; DARWIN-32-PIC-NEXT: L88$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb(%eax), %eax
@@ -5942,7 +5942,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxt00:
-; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC: 	calll	L89$pb
 ; DARWIN-32-PIC-NEXT: L89$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
@@ -5995,7 +5995,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat01:
-; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC: 	calll	L90$pb
 ; DARWIN-32-PIC-NEXT: L90$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb(%eax), %eax
@@ -6048,7 +6048,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxt01:
-; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC: 	calll	L91$pb
 ; DARWIN-32-PIC-NEXT: L91$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
@@ -6110,7 +6110,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat02:
-; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC: 	calll	L92$pb
 ; DARWIN-32-PIC-NEXT: L92$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
@@ -6166,7 +6166,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat03:
-; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC: 	calll	L93$pb
 ; DARWIN-32-PIC-NEXT: L93$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64(%eax), %eax
@@ -6214,7 +6214,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat04:
-; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC: 	calll	L94$pb
 ; DARWIN-32-PIC-NEXT: L94$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64(%eax), %eax
@@ -6271,7 +6271,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat05:
-; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC: 	calll	L95$pb
 ; DARWIN-32-PIC-NEXT: L95$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb(%eax), %eax
@@ -6322,7 +6322,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat06:
-; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC: 	calll	L96$pb
 ; DARWIN-32-PIC-NEXT: L96$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64(%eax), %eax
@@ -6369,7 +6369,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat07:
-; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC: 	calll	L97$pb
 ; DARWIN-32-PIC-NEXT: L97$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64(%eax), %eax
@@ -6425,7 +6425,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bat08:
-; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC: 	calll	L98$pb
 ; DARWIN-32-PIC-NEXT: L98$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb(%eax), %eax
@@ -6478,7 +6478,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam00:
-; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC: 	calll	L99$pb
 ; DARWIN-32-PIC-NEXT: L99$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6531,7 +6531,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam01:
-; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC: 	calll	L100$pb
 ; DARWIN-32-PIC-NEXT: L100$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6584,7 +6584,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bxm01:
-; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC: 	calll	L101$pb
 ; DARWIN-32-PIC-NEXT: L101$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6646,7 +6646,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam02:
-; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC: 	calll	L102$pb
 ; DARWIN-32-PIC-NEXT: L102$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
@@ -6702,7 +6702,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam03:
-; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC: 	calll	L103$pb
 ; DARWIN-32-PIC-NEXT: L103$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144(%eax), %eax
@@ -6750,7 +6750,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam04:
-; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC: 	calll	L104$pb
 ; DARWIN-32-PIC-NEXT: L104$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144(%eax), %eax
@@ -6807,7 +6807,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam05:
-; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC: 	calll	L105$pb
 ; DARWIN-32-PIC-NEXT: L105$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -6858,7 +6858,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam06:
-; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC: 	calll	L106$pb
 ; DARWIN-32-PIC-NEXT: L106$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144(%eax), %eax
@@ -6905,7 +6905,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam07:
-; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC: 	calll	L107$pb
 ; DARWIN-32-PIC-NEXT: L107$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144(%eax), %eax
@@ -6961,7 +6961,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _bam08:
-; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC: 	calll	L108$pb
 ; DARWIN-32-PIC-NEXT: L108$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%ecx
 ; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
@@ -7021,7 +7021,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat00:
-; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC: 	calll	L109$pb
 ; DARWIN-32-PIC-NEXT: L109$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7082,7 +7082,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt00:
-; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC: 	calll	L110$pb
 ; DARWIN-32-PIC-NEXT: L110$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7143,7 +7143,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat01:
-; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC: 	calll	L111$pb
 ; DARWIN-32-PIC-NEXT: L111$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7204,7 +7204,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxt01:
-; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC: 	calll	L112$pb
 ; DARWIN-32-PIC-NEXT: L112$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7272,7 +7272,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat02:
-; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC: 	calll	L113$pb
 ; DARWIN-32-PIC-NEXT: L113$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
@@ -7336,7 +7336,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat03:
-; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC: 	calll	L114$pb
 ; DARWIN-32-PIC-NEXT: L114$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7395,7 +7395,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat04:
-; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC: 	calll	L115$pb
 ; DARWIN-32-PIC-NEXT: L115$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7461,7 +7461,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat05:
-; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC: 	calll	L116$pb
 ; DARWIN-32-PIC-NEXT: L116$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7521,7 +7521,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat06:
-; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC: 	calll	L117$pb
 ; DARWIN-32-PIC-NEXT: L117$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7580,7 +7580,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat07:
-; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC: 	calll	L118$pb
 ; DARWIN-32-PIC-NEXT: L118$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7645,7 +7645,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cat08:
-; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC: 	calll	L119$pb
 ; DARWIN-32-PIC-NEXT: L119$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7706,7 +7706,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam00:
-; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC: 	calll	L120$pb
 ; DARWIN-32-PIC-NEXT: L120$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7767,7 +7767,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm00:
-; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC: 	calll	L121$pb
 ; DARWIN-32-PIC-NEXT: L121$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7828,7 +7828,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam01:
-; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC: 	calll	L122$pb
 ; DARWIN-32-PIC-NEXT: L122$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7889,7 +7889,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cxm01:
-; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC: 	calll	L123$pb
 ; DARWIN-32-PIC-NEXT: L123$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -7957,7 +7957,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam02:
-; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC: 	calll	L124$pb
 ; DARWIN-32-PIC-NEXT: L124$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
@@ -8021,7 +8021,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam03:
-; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC: 	calll	L125$pb
 ; DARWIN-32-PIC-NEXT: L125$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8080,7 +8080,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam04:
-; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC: 	calll	L126$pb
 ; DARWIN-32-PIC-NEXT: L126$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8146,7 +8146,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam05:
-; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC: 	calll	L127$pb
 ; DARWIN-32-PIC-NEXT: L127$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8206,7 +8206,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam06:
-; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC: 	calll	L128$pb
 ; DARWIN-32-PIC-NEXT: L128$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8265,7 +8265,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam07:
-; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC: 	calll	L129$pb
 ; DARWIN-32-PIC-NEXT: L129$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8330,7 +8330,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _cam08:
-; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC: 	calll	L130$pb
 ; DARWIN-32-PIC-NEXT: L130$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
@@ -8375,31 +8375,32 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: lcallee:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	call	x
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: lcallee:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	call	x
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: lcallee:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
@@ -8407,47 +8408,47 @@ entry:
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _lcallee:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
-; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _lcallee:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _lcallee:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
-; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _lcallee:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
@@ -8455,11 +8456,11 @@ entry:
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
 ; DARWIN-64-STATIC-NEXT: 	callq	_x
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _lcallee:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
@@ -8467,11 +8468,11 @@ entry:
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _lcallee:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
@@ -8479,7 +8480,7 @@ entry:
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
 ; DARWIN-64-PIC-NEXT: 	callq	_x
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8506,31 +8507,32 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dcallee:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	call	y
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dcallee:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	call	y
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dcallee:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
@@ -8538,47 +8540,47 @@ entry:
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dcallee:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
-; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dcallee:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dcallee:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
-; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dcallee:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
@@ -8586,11 +8588,11 @@ entry:
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
 ; DARWIN-64-STATIC-NEXT: 	callq	_y
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dcallee:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
@@ -8598,11 +8600,11 @@ entry:
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dcallee:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
@@ -8610,7 +8612,7 @@ entry:
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
 ; DARWIN-64-PIC-NEXT: 	callq	_y
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8644,7 +8646,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _address:
-; DARWIN-32-PIC: 	call	L133$pb
+; DARWIN-32-PIC: 	calll	L133$pb
 ; DARWIN-32-PIC-NEXT: L133$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb(%eax), %eax
@@ -8693,7 +8695,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _laddress:
-; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC: 	calll	L134$pb
 ; DARWIN-32-PIC-NEXT: L134$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb(%eax), %eax
@@ -8740,7 +8742,7 @@ entry:
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _daddress:
-; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC: 	calll	L135$pb
 ; DARWIN-32-PIC-NEXT: L135$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb(%eax), %eax
@@ -8770,66 +8772,67 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: caller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	callee
-; LINUX-32-STATIC-NEXT: 	call	callee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: caller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	callee
-; LINUX-32-PIC-NEXT: 	call	callee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: caller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _caller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_callee
-; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _caller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _caller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
-; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _caller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_callee
 ; DARWIN-64-STATIC-NEXT: 	callq	_callee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _caller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _caller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_callee
 ; DARWIN-64-PIC-NEXT: 	callq	_callee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8844,66 +8847,67 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	dcallee
-; LINUX-32-STATIC-NEXT: 	call	dcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	dcallee
-; LINUX-32-PIC-NEXT: 	call	dcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_dcallee
-; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
-; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_dcallee
-; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
 ; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_dcallee
 ; DARWIN-64-PIC-NEXT: 	callq	_dcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8918,66 +8922,67 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: lcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	lcallee
-; LINUX-32-STATIC-NEXT: 	call	lcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: lcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	lcallee
-; LINUX-32-PIC-NEXT: 	call	lcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: lcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _lcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_lcallee
-; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _lcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
-; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _lcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_lcallee
-; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _lcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
 ; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _lcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _lcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_lcallee
 ; DARWIN-64-PIC-NEXT: 	callq	_lcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -8990,57 +8995,58 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: tailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	callee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: tailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	callee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: tailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _tailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _tailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _tailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _tailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_callee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _tailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _tailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_callee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9053,57 +9059,58 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dtailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	dcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dtailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	dcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dtailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _dtailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dtailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dtailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dtailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dtailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dtailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_dcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9116,57 +9123,58 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ltailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	lcallee
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ltailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	lcallee
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ltailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ltailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ltailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ltailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ltailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ltailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ltailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	_lcallee
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9183,17 +9191,18 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: icaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: icaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: icaller:
@@ -9206,8 +9215,8 @@ entry:
 
 ; DARWIN-32-STATIC: _icaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
@@ -9215,8 +9224,8 @@ entry:
 ; DARWIN-32-DYNAMIC: 	pushl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
@@ -9224,12 +9233,12 @@ entry:
 ; DARWIN-32-PIC: _icaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L142$pb
+; DARWIN-32-PIC-NEXT: 	calll	L142$pb
 ; DARWIN-32-PIC-NEXT: L142$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -9272,17 +9281,18 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: dicaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*difunc
-; LINUX-32-STATIC-NEXT: 	call	*difunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: dicaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*difunc
-; LINUX-32-PIC-NEXT: 	call	*difunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: dicaller:
@@ -9295,49 +9305,49 @@ entry:
 
 ; DARWIN-32-STATIC: _dicaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_difunc
-; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _dicaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _dicaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: 	calll	L143$pb
 ; DARWIN-32-PIC-NEXT: L143$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L143$pb(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _dicaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
 ; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _dicaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _dicaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
 ; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9354,71 +9364,72 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: licaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*lifunc
-; LINUX-32-STATIC-NEXT: 	call	*lifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: licaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*lifunc
-; LINUX-32-PIC-NEXT: 	call	*lifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: licaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _licaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
-; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _licaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _licaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: 	calll	L144$pb
 ; DARWIN-32-PIC-NEXT: L144$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L144$pb(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _licaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
 ; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _licaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _licaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
 ; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9435,17 +9446,18 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: itailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	call	*ifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: itailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	call	*ifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: itailcaller:
@@ -9458,8 +9470,8 @@ entry:
 
 ; DARWIN-32-STATIC: _itailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
-; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
@@ -9467,8 +9479,8 @@ entry:
 ; DARWIN-32-DYNAMIC: 	pushl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
-; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
@@ -9476,12 +9488,12 @@ entry:
 ; DARWIN-32-PIC: _itailcaller:
 ; DARWIN-32-PIC: 	pushl	%esi
 ; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
-; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: 	calll	L145$pb
 ; DARWIN-32-PIC-NEXT: L145$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
 ; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
-; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
 ; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
 ; DARWIN-32-PIC-NEXT: 	popl	%esi
 ; DARWIN-32-PIC-NEXT: 	ret
@@ -9521,60 +9533,61 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: ditailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*difunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: ditailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*difunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: ditailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	callq	*(%rax)
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _ditailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _ditailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _ditailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: 	calll	L146$pb
 ; DARWIN-32-PIC-NEXT: L146$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	call	*_difunc-L146$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L146$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _ditailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _ditailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _ditailcaller:
 ; DARWIN-64-PIC: 	callq	*_difunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
 
@@ -9588,59 +9601,60 @@ entry:
 ; LINUX-64-STATIC: ret
 
 ; LINUX-32-STATIC: litailcaller:
-; LINUX-32-STATIC: 	subl	$4, %esp
-; LINUX-32-STATIC-NEXT: 	call	*lifunc
-; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
 ; LINUX-32-PIC: litailcaller:
-; LINUX-32-PIC: 	subl	$4, %esp
-; LINUX-32-PIC-NEXT: 	call	*lifunc
-; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
 ; LINUX-32-PIC-NEXT: 	ret
 
 ; LINUX-64-PIC: litailcaller:
-; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
-; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	popq
 ; LINUX-64-PIC-NEXT: 	ret
 
 ; DARWIN-32-STATIC: _litailcaller:
 ; DARWIN-32-STATIC: 	subl	$12, %esp
-; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-STATIC-NEXT: 	ret
 
 ; DARWIN-32-DYNAMIC: _litailcaller:
 ; DARWIN-32-DYNAMIC: 	subl	$12, %esp
-; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
 ; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-32-PIC: _litailcaller:
 ; DARWIN-32-PIC: 	subl	$12, %esp
-; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: 	calll	L147$pb
 ; DARWIN-32-PIC-NEXT: L147$pb:
 ; DARWIN-32-PIC-NEXT: 	popl	%eax
-; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L147$pb(%eax)
 ; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _litailcaller:
-; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC: 	pushq
 ; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	popq
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _litailcaller:
-; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC: 	pushq
 ; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	popq
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _litailcaller:
-; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC: 	pushq
 ; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
-; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	popq
 ; DARWIN-64-PIC-NEXT: 	ret
 }
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
new file mode 100644
index 000000000000..f924ec8132ee
--- /dev/null
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK:	sbbl	%ecx, %ecx
+; CHECK-NOT: addl
+; CHECK: subl	%ecx, %eax
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %add4, %inc
+  ret i32 %z.0
+}
+
+; Instcombine transforms test1 into test2:
+; CHECK: test2:
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: subl
+; CHECK-NEXT: ret
+define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
+  %0 = extractvalue { i32, i1 } %uadd, 0
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %0, %inc
+  ret i32 %z.0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 3991a6849f3e..3ec5358affb3 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -92,3 +92,43 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
 ; X64:	ret
 }
 
+define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
+   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+   ret {i32, i1} %t
+}
+
+; X64: test7:
+; X64: addl %esi, %eax
+; X64-NEXT: setb %dl
+; X64-NEXT: ret
+
+; PR5443
+define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
+entry:
+    %extleft = zext i64 %left to i65
+    %extright = zext i64 %right to i65
+    %sum = add i65 %extleft, %extright
+    %res.0 = trunc i65 %sum to i64
+    %overflow = and i65 %sum, -18446744073709551616
+    %res.1 = icmp ne i65 %overflow, 0
+    %final0 = insertvalue {i64, i1} undef, i64 %res.0, 0
+    %final1 = insertvalue {i64, i1} %final0, i1 %res.1, 1
+    ret {i64, i1} %final1
+}
+
+; X64: test8:
+; X64: addq
+; X64-NEXT: sbbq
+; X64-NEXT: testb
+
+define i32 @test9(i32 %x, i32 %y) nounwind readnone {
+  %cmp = icmp eq i32 %x, 10
+  %sub = sext i1 %cmp to i32
+  %cond = add i32 %sub, %y
+  ret i32 %cond
+; X64: test9:
+; X64: cmpl $10
+; X64: sete
+; X64: subl
+; X64: ret
+}
diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
index be0908aa1a9d..49abd8a92e64 100644
--- a/test/CodeGen/X86/addr-label-difference.ll
+++ b/test/CodeGen/X86/addr-label-difference.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin10.0"
 
 ; This array should go into the __TEXT,__const section, not into the
 ; __DATA,__const section, because the elements don't need relocations.
-@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+@test.array = internal unnamed_addr constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
 
 define void @test(i32 %i) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/alldiv-divdi3.ll b/test/CodeGen/X86/alldiv-divdi3.ll
new file mode 100644
index 000000000000..86aa1fde1957
--- /dev/null
+++ b/test/CodeGen/X86/alldiv-divdi3.ll
@@ -0,0 +1,17 @@
+; Test that, for a 64 bit signed div, a libcall to alldiv is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEDIVDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEDIVDI
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %conv4 = sext i32 %argc to i64
+  %div = sdiv i64 84, %conv4
+  %conv7 = trunc i64 %div to i32
+  ret i32 %conv7
+}
+
+; CHECK: alldiv
+; USEDIVDI: divdi3
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
new file mode 100644
index 000000000000..640237d0b504
--- /dev/null
+++ b/test/CodeGen/X86/andimm8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+
+; PR8365
+; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
+
+define i64 @bra(i32 %zed) nounwind {
+ %t1 = zext i32 %zed to i64
+ %t2 = and i64  %t1, 4294967232
+ ret i64 %t2
+}
+
+; CHECK:  orq     $2, %rdi                # encoding: [0x48,0x83,0xcf,0x02]
+
+define void @foo(i64 %zed, i64* %x) nounwind {
+  %t1 = and i64 %zed, -4
+  %t2 = or i64 %t1, 2
+  store i64 %t2, i64* %x, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
new file mode 100644
index 000000000000..d0c64f243386
--- /dev/null
+++ b/test/CodeGen/X86/apm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR8573
+
+; CHECK: foo:
+; CHECK: leaq    (%rdi), %rax
+; CHECK-NEXT: movl    %esi, %ecx
+; CHECK-NEXT: monitor
+define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: bar:
+; CHECK: movl    %edi, %ecx
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: mwait
+define void @bar(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 3ef1887083d0..f3ade93c8a30 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,16 +1,8 @@
-; RUN: llc < %s -march=x86 -o %t1
-; RUN: grep "lock" %t1 | count 17
-; RUN: grep "xaddl" %t1 | count 4 
-; RUN: grep "cmpxchgl"  %t1 | count 13 
-; RUN: grep "xchgl" %t1 | count 14
-; RUN: grep "cmova" %t1 | count 2
-; RUN: grep "cmovb" %t1 | count 2
-; RUN: grep "cmovg" %t1 | count 2
-; RUN: grep "cmovl" %t1 | count 2
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
-define void @main(i32 %argc, i8** %argv) {
+define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
 	%argc.addr = alloca i32		; <i32*> [#uses=1]
 	%argv.addr = alloca i8**		; <i8***> [#uses=1]
@@ -29,48 +21,105 @@ entry:
 	store i32 3855, i32* %ort
 	store i32 3855, i32* %xort
 	store i32 4, i32* %temp
-	%tmp = load i32* %temp		; <i32> [#uses=1]
+	%tmp = load i32* %temp
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp )		; <i32>:0 [#uses=1]
 	store i32 %0, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 )		; <i32>:1 [#uses=1]
 	store i32 %1, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 )		; <i32>:2 [#uses=1]
 	store i32 %2, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
 	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 )		; <i32>:3 [#uses=1]
 	store i32 %3, i32* %old
+        ; CHECK: andl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 )		; <i32>:4 [#uses=1]
 	store i32 %4, i32* %old
+        ; CHECK: orl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 )		; <i32>:5 [#uses=1]
 	store i32 %5, i32* %old
+        ; CHECK: xorl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 )		; <i32>:6 [#uses=1]
 	store i32 %6, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 )		; <i32>:7 [#uses=1]
 	store i32 %7, i32* %old
 	%neg = sub i32 0, 1		; <i32> [#uses=1]
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg )		; <i32>:8 [#uses=1]
 	store i32 %8, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 )		; <i32>:9 [#uses=1]
 	store i32 %9, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 )		; <i32>:10 [#uses=1]
 	store i32 %10, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 65535 )		; <i32>:11 [#uses=1]
 	store i32 %11, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 10 )		; <i32>:12 [#uses=1]
 	store i32 %12, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 1 )		; <i32>:13 [#uses=1]
 	store i32 %13, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 10 )		; <i32>:14 [#uses=1]
 	store i32 %14, i32* %old
+        ; CHECK: xchgl   %{{.*}}, {{.*}}(%esp)
 	call i32 @llvm.atomic.swap.i32.p0i32( i32* %val2, i32 1976 )		; <i32>:15 [#uses=1]
 	store i32 %15, i32* %old
 	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 %neg1, i32 1 )		; <i32>:16 [#uses=1]
 	store i32 %16, i32* %old
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
 	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 1976, i32 1 )		; <i32>:17 [#uses=1]
 	store i32 %17, i32* %old
 	ret void
 }
 
+define void @test2(i32 addrspace(256)* nocapture %P) nounwind {
+entry:
+; CHECK: lock
+; CHECK:	cmpxchgl	%{{.*}}, %gs:(%{{.*}})
+
+  %0 = tail call i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* %P, i32 0, i32 1)
+  ret void
+}
+
+declare i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* nocapture, i32, i32) nounwind
+
 declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind 
 
 declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind 
diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
index a72160be719a..2bd3b5dfedd6 100644
--- a/test/CodeGen/X86/avx-128.ll
+++ b/test/CodeGen/X86/avx-128.ll
@@ -4,7 +4,7 @@
 
 define void @zero() nounwind ssp {
 entry:
-  ; CHECK: vpxor
+  ; CHECK: vxorps
   ; CHECK: vmovaps
   store <4 x float> zeroinitializer, <4 x float>* @z, align 16
   ret void
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 9de90237d146..6c32396a4177 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -114,8 +114,8 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vcomisd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -230,7 +230,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
 
 
 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
-  ; CHECK: vcvttss2si
+  ; CHECK: vcvttsd2si
   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -825,8 +825,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vucomisd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1183,8 +1182,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vptest 
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1455,8 +1453,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vcomiss
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbb
   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1697,8 +1694,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vucomiss
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2173,8 +2169,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vptest
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2451,8 +2446,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
 
 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vtestpd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2461,8 +2455,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
 
 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   ; CHECK: vtestpd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2471,8 +2464,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
 
 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vtestps
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -2481,8 +2473,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   ; CHECK: vtestps
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
index b1867105ce85..5a466fc3250f 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -17,7 +17,7 @@ declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readn
 
 
 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
-  ; CHECK: vcvttss2si
+  ; CHECK: vcvttsd2si
   %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
   ret i64 %res
 }
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
new file mode 100644
index 000000000000..ac972a8e2e5b
--- /dev/null
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+
+define float @extractFloat1() nounwind {
+entry:
+  ; CHECK: 1065353216
+  %tmp0 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp1 = extractelement <2 x float> %tmp0, i32 0 
+  ret float %tmp1
+}
+
+define float @extractFloat2() nounwind {
+entry:
+  ; CHECK: pxor	%xmm0, %xmm0
+  %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp5 = extractelement <2 x float> %tmp4, i32 1
+  ret float %tmp5
+}
+
+define i32 @extractInt2() nounwind {
+entry:
+  ; CHECK: xorl	%eax, %eax
+  %tmp4 = bitcast <1 x i64> <i64 256> to <2 x i32>
+  %tmp5 = extractelement <2 x i32> %tmp4, i32 1
+  ret i32 %tmp5
+}
+
diff --git a/test/CodeGen/X86/bit-test-shift.ll b/test/CodeGen/X86/bit-test-shift.ll
new file mode 100644
index 000000000000..7497613f2565
--- /dev/null
+++ b/test/CodeGen/X86/bit-test-shift.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8285015>
+
+define i32 @x(i32 %t) nounwind readnone ssp {
+entry:
+; CHECK: shll	$23, %eax
+; CHECK: sarl	$31, %eax
+; CHECK: andl	$-26, %eax
+  %and = and i32 %t, 256
+  %tobool = icmp eq i32 %and, 0
+  %retval.0 = select i1 %tobool, i32 0, i32 -26
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 2b7019371a17..3bb9124633d6 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep APP %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
+; RUN: not grep InlineAsm %t
 ; RUN: FileCheck %s < %t
 
 ; CHECK: foo:
@@ -65,6 +65,13 @@ define i32 @t32(i32 %x) nounwind {
   ret i32 %asmtmp
 }
 
+; CHECK: u32:
+; CHECK: bswapl
+define i32 @u32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
 ; CHECK: s64:
 ; CHECK: bswapq
 define i64 @s64(i64 %x) nounwind {
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index af36e1bb8cb4..ac0bc094e56e 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep {movq	8(%rsp), %rax}
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movl	8(%esp), %edx} %t
-; RUN: grep {movl	4(%esp), %eax} %t
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+
+; X86: movl	4(%esp), %eax
+; X86: movl	8(%esp), %edx
+
+; X86-64: movq	8(%rsp), %rax
 
 %struct.s = type { i64, i64, i64 }
 
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
deleted file mode 100644
index 898c09b82f5e..000000000000
--- a/test/CodeGen/X86/cmp-test.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -march=x86 | grep cmp | count 1
-; RUN: llc < %s -march=x86 | grep test | count 1
-
-define i32 @f1(i32 %X, i32* %y) {
-	%tmp = load i32* %y		; <i32> [#uses=1]
-	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
-	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
-
-cond_true:		; preds = %0
-	ret i32 1
-
-ReturnBlock:		; preds = %0
-	ret i32 0
-}
-
-define i32 @f2(i32 %X, i32* %y) {
-	%tmp = load i32* %y		; <i32> [#uses=1]
-	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
-	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
-	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
-
-cond_true:		; preds = %0
-	ret i32 1
-
-ReturnBlock:		; preds = %0
-	ret i32 0
-}
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
new file mode 100644
index 000000000000..ef5e353e9f9f
--- /dev/null
+++ b/test/CodeGen/X86/cmp.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
+
+define i32 @test1(i32 %X, i32* %y) nounwind {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+; CHECK: test1:
+; CHECK: cmpl	$0, (%rsi)
+}
+
+define i32 @test2(i32 %X, i32* %y) nounwind {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
+	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+; CHECK: test2:
+; CHECK: movl	(%rsi), %eax
+; CHECK: shll	$3, %eax
+; CHECK: testl	%eax, %eax
+}
+
+define i64 @test3(i64 %x) nounwind {
+  %t = icmp eq i64 %x, 0
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test3:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+define i64 @test4(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test4:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+
+define i32 @test5(double %A) nounwind  {
+ entry:
+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
+ br i1 %bothcond, label %bb8, label %bb12
+
+ bb8:; preds = %entry
+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ ret i32 %tmp9
+
+ bb12:; preds = %entry
+ ret i32 32
+; CHECK: test5:
+; CHECK: ucomisd	LCPI4_0(%rip), %xmm0
+; CHECK: ucomisd	LCPI4_1(%rip), %xmm0
+}
+
+declare i32 @foo(...)
+
+define i32 @test6() nounwind align 2 {
+  %A = alloca {i64, i64}, align 8
+  %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1
+  %C = load i64* %B
+  %D = icmp eq i64 %C, 0
+  br i1 %D, label %T, label %F
+T:
+  ret i32 1
+  
+F:
+  ret i32 0
+; CHECK: test6:
+; CHECK: cmpq	$0, -8(%rsp)
+; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
deleted file mode 100644
index 4878448800cc..000000000000
--- a/test/CodeGen/X86/cmp0.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-define i64 @test0(i64 %x) nounwind {
-  %t = icmp eq i64 %x, 0
-  %r = zext i1 %t to i64
-  ret i64 %r
-; CHECK: test0:
-; CHECK: 	testq	%rdi, %rdi
-; CHECK: 	sete	%al
-; CHECK: 	movzbl	%al, %eax
-; CHECK: 	ret
-}
-
-define i64 @test1(i64 %x) nounwind {
-  %t = icmp slt i64 %x, 1
-  %r = zext i1 %t to i64
-  ret i64 %r
-; CHECK: test1:
-; CHECK: 	testq	%rdi, %rdi
-; CHECK: 	setle	%al
-; CHECK: 	movzbl	%al, %eax
-; CHECK: 	ret
-}
-
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
deleted file mode 100644
index 9a8e00c8bca0..000000000000
--- a/test/CodeGen/X86/cmp2.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
-
-define i32 @test(double %A) nounwind  {
- entry:
- %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
- %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
- %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
- br i1 %bothcond, label %bb8, label %bb12
-
- bb8:; preds = %entry
- %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
- ret i32 %tmp9
-
- bb12:; preds = %entry
- ret i32 32
-}
-
-declare i32 @foo(...)
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 56ea26b658d8..89b436e75c9e 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,24 +2,62 @@
 ; insertion of register-register copies.
 
 ; Make sure there are only 3 mov's for each testcase
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep {\\\<mov\\\>} | count 6
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu   | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
 
-target triple = "i686-pc-linux-gnu"
 @G = external global i32                ; <i32*> [#uses=2]
 
 declare void @ext(i32)
 
-define i32 @add_test(i32 %X, i32 %Y) {
+define i32 @t1(i32 %X, i32 %Y) nounwind {
+; LINUX: t1:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: addl %eax, %ecx
+; LINUX: movl %ecx, G
         %Z = add i32 %X, %Y             ; <i32> [#uses=1]
         store i32 %Z, i32* @G
         ret i32 %X
 }
 
-define i32 @xor_test(i32 %X, i32 %Y) {
+define i32 @t2(i32 %X, i32 %Y) nounwind {
+; LINUX: t2:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: xorl %eax, %ecx
+; LINUX: movl %ecx, G
         %Z = xor i32 %X, %Y             ; <i32> [#uses=1]
         store i32 %Z, i32* @G
         ret i32 %X
 }
 
+; rdar://8762995
+%0 = type { i64, i32 }
+
+define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
+entry:
+; DARWIN: t3:
+; DARWIN: shlq $32, %rcx
+; DARWIN-NOT: leaq
+; DARWIN: orq %rcx, %rax
+; DARWIN-NOT: mov
+; DARWIN: shll $16
+  %tmp21 = zext i32 %lb to i64
+  %tmp23 = zext i32 %ub to i64
+  %tmp24 = shl i64 %tmp23, 32
+  %ins26 = or i64 %tmp24, %tmp21
+  %tmp28 = zext i8 %has_lb to i32
+  %tmp33 = zext i8 %has_ub to i32
+  %tmp34 = shl i32 %tmp33, 8
+  %tmp38 = zext i8 %lb_inclusive to i32
+  %tmp39 = shl i32 %tmp38, 16
+  %tmp43 = zext i8 %ub_inclusive to i32
+  %tmp44 = shl i32 %tmp43, 24
+  %ins31 = or i32 %tmp39, %tmp28
+  %ins36 = or i32 %ins31, %tmp34
+  %ins46 = or i32 %ins36, %tmp44
+  %tmp16 = insertvalue %0 undef, i64 %ins26, 0
+  %tmp19 = insertvalue %0 %tmp16, i32 %ins46, 1
+  ret %0 %tmp19
+}
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
index 2be90c9764c2..9aa44a30af57 100644
--- a/test/CodeGen/X86/compare-inf.ll
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -5,7 +5,7 @@
 
 ; CHECK: oeq_inff:
 ; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
 define float @oeq_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp oeq float %x, 0x7FF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -14,7 +14,7 @@ define float @oeq_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: oeq_inf:
 ; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
 define double @oeq_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp oeq double %x, 0x7FF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
@@ -23,7 +23,7 @@ define double @oeq_inf(double %x, double %y) nounwind readonly {
 
 ; CHECK: une_inff:
 ; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
 define float @une_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp une float %x, 0x7FF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -32,7 +32,7 @@ define float @une_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: une_inf:
 ; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
 define double @une_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp une double %x, 0x7FF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
@@ -41,7 +41,7 @@ define double @une_inf(double %x, double %y) nounwind readonly {
 
 ; CHECK: oeq_neg_inff:
 ; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
 define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp oeq float %x, 0xFFF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -50,7 +50,7 @@ define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: oeq_neg_inf:
 ; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
 define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp oeq double %x, 0xFFF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
@@ -59,7 +59,7 @@ define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
 
 ; CHECK: une_neg_inff:
 ; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
 define float @une_neg_inff(float %x, float %y) nounwind readonly {
   %t0 = fcmp une float %x, 0xFFF0000000000000
   %t1 = select i1 %t0, float 1.0, float %y
@@ -68,7 +68,7 @@ define float @une_neg_inff(float %x, float %y) nounwind readonly {
 
 ; CHECK: une_neg_inf:
 ; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
 define double @une_neg_inf(double %x, double %y) nounwind readonly {
   %t0 = fcmp une double %x, 0xFFF0000000000000
   %t1 = select i1 %t0, double 1.0, double %y
diff --git a/test/CodeGen/X86/complex-asm.ll b/test/CodeGen/X86/complex-asm.ll
new file mode 100644
index 000000000000..49878b982db3
--- /dev/null
+++ b/test/CodeGen/X86/complex-asm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; This formerly crashed.
+
+%0 = type { i64, i64 }
+
+define %0 @f() nounwind ssp {
+entry:
+  %v = alloca %0, align 8
+  call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
+  %0 = getelementptr inbounds %0* %v, i64 0, i32 0
+  %1 = load i64* %0, align 8
+  %2 = getelementptr inbounds %0* %v, i64 0, i32 1
+  %3 = load i64* %2, align 8
+  %mrv4 = insertvalue %0 undef, i64 %1, 0
+  %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
+  ret %0 %mrv5
+}
diff --git a/test/CodeGen/X86/conditional-indecrement.ll b/test/CodeGen/X86/conditional-indecrement.ll
new file mode 100644
index 000000000000..a3a0c39905aa
--- /dev/null
+++ b/test/CodeGen/X86/conditional-indecrement.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test1:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test2:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test3:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test4:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test5:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
+
+define i32 @test6(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test6:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test7:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test8:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
deleted file mode 100644
index 665984ce28ed..000000000000
--- a/test/CodeGen/X86/const-select.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
-
-; RUN: llc < %s | grep {LCPI0_0(,%eax,4)}
-define float @f(i32 %x) nounwind readnone {
-entry:
-	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
-	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
-	ret float %iftmp.0.0
-}
-
-; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
-define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
-entry:
-	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
-	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
-	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
-	%2 = load i8* %1, align 1		; <i8> [#uses=1]
-	ret i8 %2
-}
-
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index a14a48baa355..2d8e63e31342 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -141,3 +141,61 @@ entry:
   call void asm sideeffect "outb $0, ${1:w}", "{ax},N{dx},~{dirflag},~{fpsr},~{flags}"(i8 %conv4.i, i32 1017) nounwind
   unreachable
 }
+
+; Crash trying to form conditional increment with fp value.
+; PR8981
+define i32 @test9(double %X) ssp align 2 {
+entry:
+  %0 = fcmp one double %X, 0.000000e+00
+  %cond = select i1 %0, i32 1, i32 2
+  ret i32 %cond
+}
+
+
+; PR8514 - Crash in match address do to "heroics" turning and-of-shift into
+; shift of and.
+%struct.S0 = type { i8, [2 x i8], i8 }
+
+define void @func_59(i32 %p_63) noreturn nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc44, %entry
+  %p_63.addr.1 = phi i32 [ %p_63, %entry ], [ 0, %for.inc44 ]
+  %l_74.0 = phi i32 [ 0, %entry ], [ %add46, %for.inc44 ]
+  br i1 undef, label %for.inc44, label %bb.nph81
+
+bb.nph81:                                         ; preds = %for.body
+  %tmp98 = add i32 %p_63.addr.1, 0
+  br label %for.body22
+
+for.body22:                                       ; preds = %for.body22, %bb.nph81
+  %l_75.077 = phi i64 [ %ins, %for.body22 ], [ undef, %bb.nph81 ]
+  %tmp110 = trunc i64 %l_75.077 to i32
+  %tmp111 = and i32 %tmp110, 65535
+  %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
+  store i8 1, i8* %arrayidx32.0, align 4
+  %tmp106 = shl i32 %tmp110, 2
+  %tmp107 = and i32 %tmp106, 262140
+  %scevgep99.sum114 = or i32 %tmp107, 1
+  %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
+  store i8 0, i8* %arrayidx32.1.1, align 1
+  %ins = or i64 undef, undef
+  br label %for.body22
+
+for.inc44:                                        ; preds = %for.body
+  %add46 = add i32 %l_74.0, 1
+  br label %for.body
+}
+
+; PR9028
+define void @f(i64 %A) nounwind {
+entry:
+  %0 = zext i64 %A to i160
+  %1 = shl i160 %0, 64
+  %2 = zext i160 %1 to i576
+  %3 = zext i96 undef to i576
+  %4 = or i576 %3, %2
+  store i576 %4, i576* undef, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/critical-edge-split-2.ll b/test/CodeGen/X86/critical-edge-split-2.ll
new file mode 100644
index 000000000000..70301cd9bcc4
--- /dev/null
+++ b/test/CodeGen/X86/critical-edge-split-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type <{ %1, %1 }>
+%1 = type { i8, i8, i8, i8 }
+
+@g_2 = global %0 zeroinitializer
+@g_4 = global %1 zeroinitializer, align 4
+
+
+; PR8642
+define i16 @test1(i1 zeroext %C, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 %C, label %cond.end.i, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  br label %cond.end.i
+
+cond.end.i:                                       ; preds = %entry
+  %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
+  ret i16 %call1
+}
+
+; CHECK: test1:
+; CHECK: testb %dil, %dil
+; CHECK: jne LBB0_2
+; CHECK: divl
+; CHECK: LBB0_2:
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
deleted file mode 100644
index 96fef0fbfc61..000000000000
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29
-
-	%CC = type { %Register }
-	%II = type { %"struct.XX::II::$_74" }
-	%JITFunction = type %YYValue* (%CC*, %YYValue**)
-	%YYValue = type { i32 (...)** }
-	%Register = type { %"struct.XX::ByteCodeFeatures" }
-	%"struct.XX::ByteCodeFeatures" = type { i32 }
-	%"struct.XX::II::$_74" = type { i8* }
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define %YYValue* @loop(%CC*, %YYValue**) nounwind {
-; <label>:2
-	%3 = getelementptr %CC* %0, i32 -9		; <%CC*> [#uses=1]
-	%4 = bitcast %CC* %3 to %YYValue**		; <%YYValue**> [#uses=2]
-	%5 = load %YYValue** %4		; <%YYValue*> [#uses=3]
-	%unique_1.i = ptrtoint %YYValue* %5 to i1		; <i1> [#uses=1]
-	br i1 %unique_1.i, label %loop, label %11
-
-loop:		; preds = %6, %2
-	%.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ]		; <%YYValue*> [#uses=3]
-	%immediateCmp = icmp slt %YYValue* %.1, %5		; <i1> [#uses=1]
-	br i1 %immediateCmp, label %6, label %8
-
-; <label>:6		; preds = %loop
-	%lhsInt = ptrtoint %YYValue* %.1 to i32		; <i32> [#uses=1]
-	%7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2)		; <{ i32, i1 }> [#uses=2]
-	%intAdd = extractvalue { i32, i1 } %7, 0		; <i32> [#uses=1]
-	%intAddValue = inttoptr i32 %intAdd to %YYValue*		; <%YYValue*> [#uses=1]
-	%intAddOverflow = extractvalue { i32, i1 } %7, 1		; <i1> [#uses=1]
-	br i1 %intAddOverflow, label %.loopexit, label %loop
-
-; <label>:8		; preds = %loop
-	ret %YYValue* inttoptr (i32 10 to %YYValue*)
-
-.loopexit:		; preds = %6
-	%9 = bitcast %CC* %0 to %YYValue**		; <%YYValue**> [#uses=1]
-	store %YYValue* %.1, %YYValue** %9
-	store %YYValue* %5, %YYValue** %4
-	%10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1)		; <%YYValue*> [#uses=1]
-	ret %YYValue* %10
-
-; <label>:11		; preds = %2
-	%12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1)		; <%YYValue*> [#uses=1]
-	ret %YYValue* %12
-}
-
-declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
-
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
new file mode 100644
index 000000000000..c957d385a24a
--- /dev/null
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+
+define i32 @test1(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i32
+  %cmp = icmp ugt i32 %cast, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test1:
+; CHECK: leaq -1(%rdi)
+; CHECK-NEXT: testq
+; CHECK-NEXT: setne
+; CHECK: ret
+}
+
+
+define i32 @test2(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp ult i64 %count, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test2:
+; CHECK: leaq -1(%rdi)
+; CHECK-NEXT: testq
+; CHECK-NEXT: sete
+; CHECK: ret
+}
+
+define i32 @test3(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i6 ; Too small for 0-64
+  %cmp = icmp ult i6 %cast, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test3:
+; CHECK: cmpb $2
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index 5cc6eaa405ad..dae91d5ccdd6 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
 
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
diff --git a/test/CodeGen/X86/dbg-live-in-location.ll b/test/CodeGen/X86/dbg-live-in-location.ll
new file mode 100644
index 000000000000..9b1464d415f9
--- /dev/null
+++ b/test/CodeGen/X86/dbg-live-in-location.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+@str = internal constant [3 x i8] c"Hi\00"
+
+define void @foo() nounwind ssp {
+entry:
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0))
+  ret void, !dbg !17
+}
+
+; CHECK: arg.c:5:14
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !9), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !10), !dbg !20
+  %cmp = icmp sgt i32 %argc, 1, !dbg !21
+  br i1 %cmp, label %cond.end, label %for.body.lr.ph, !dbg !21
+
+cond.end:                                         ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !21
+  %tmp2 = load i8** %arrayidx, align 8, !dbg !21, !tbaa !22
+  %call = tail call i32 (...)* @atoi(i8* %tmp2) nounwind, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !16), !dbg !21
+  tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !14), !dbg !26
+  %cmp57 = icmp sgt i32 %call, 0, !dbg !26
+  br i1 %cmp57, label %for.body.lr.ph, label %for.end, !dbg !26
+
+for.body.lr.ph:                                   ; preds = %entry, %cond.end
+  %cond10 = phi i32 [ %call, %cond.end ], [ 300, %entry ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0)) nounwind
+  %inc = add nsw i32 %i.08, 1, !dbg !27
+  %exitcond = icmp eq i32 %inc, %cond10
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !26
+
+for.end:                                          ; preds = %for.body, %cond.end
+  ret i32 0, !dbg !29
+}
+
+declare i32 @atoi(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !5}
+!llvm.dbg.lv.main = !{!9, !10, !14, !16}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"arg.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"arg.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124504)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 590081, metadata !5, metadata !"argc", metadata !1, i32 5, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 590081, metadata !5, metadata !"argv", metadata !1, i32 5, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"i", metadata !1, i32 7, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !5, i32 6, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 590080, metadata !15, metadata !"iterations", metadata !1, i32 8, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 4, i32 1, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 5, i32 14, metadata !5, null}
+!20 = metadata !{i32 5, i32 26, metadata !5, null}
+!21 = metadata !{i32 8, i32 51, metadata !15, null}
+!22 = metadata !{metadata !"any pointer", metadata !23}
+!23 = metadata !{metadata !"omnipotent char", metadata !24}
+!24 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!25 = metadata !{i32 0}
+!26 = metadata !{i32 9, i32 2, metadata !15, null}
+!27 = metadata !{i32 9, i32 30, metadata !28, null}
+!28 = metadata !{i32 589835, metadata !15, i32 9, i32 2, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 12, i32 9, metadata !15, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
new file mode 100644
index 000000000000..83df1478cf18
--- /dev/null
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin8"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:	.quad	Lfunc_begin0
+;CHECK-NEXT:	.quad	Lfunc_end0
+;CHECK-NEXT:	.short	1                       ## Loc expr size
+;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
+
+%0 = type { i64, i1 }
+
+@__clz_tab = external constant [256 x i8]
+
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+  br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2:                                              ; preds = %entry
+  br label %bb4, !dbg !23
+
+bb4:                                              ; preds = %bb2, %entry
+  br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i:                                           ; preds = %bb4
+  unreachable
+
+__udivmodti4.exit:                                ; preds = %bb4
+  ret i128 undef, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
+!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !12, metadata !12}
+!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
new file mode 100644
index 000000000000..89bbf34a1286
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_low_pc
+;CHECK-NEXT: DW_AT_high_pc
+;CHECK-NEXT: DW_AT_call_file
+;CHECK-NEXT: DW_AT_call_line
+;CHECK-NEXT: DW_TAG_formal_parameter
+;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+
+%struct.S1 = type { float*, i32 }
+
+@p = common global %struct.S1 zeroinitializer, align 8
+
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
+  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
+  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
+  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+  %cmp = icmp ne float* %call, null, !dbg !29
+  %cond = zext i1 %cmp to i32, !dbg !29
+  ret i32 %cond, !dbg !29
+}
+
+declare float* @bar(i32) optsize
+
+define void @foobar() nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
+  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
+  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+  ret void, !dbg !38
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.foo = !{!9, !18}
+!llvm.dbg.gv = !{!19}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !17}
+!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 7, i32 13, metadata !0, null}
+!21 = metadata !{i32 7, i32 21, metadata !0, null}
+!22 = metadata !{i32 9, i32 3, metadata !23, null}
+!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !"int", metadata !25}
+!25 = metadata !{metadata !"omnipotent char", metadata !26}
+!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!27 = metadata !{i32 10, i32 3, metadata !23, null}
+!28 = metadata !{metadata !"any pointer", metadata !25}
+!29 = metadata !{i32 11, i32 3, metadata !23, null}
+!30 = metadata !{%struct.S1* @p}
+!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
+!32 = metadata !{i32 16, i32 3, metadata !33, null}
+!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 1}
+!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
+!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
+!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
+!38 = metadata !{i32 17, i32 1, metadata !33, null}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
new file mode 100644
index 000000000000..2449046c65fb
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+;Radar 8950491
+
+;CHECK:        .ascii   "var"                  ## DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        .byte   2                       ## DW_AT_decl_file
+;CHECK-NEXT:        .short  19509                   ## DW_AT_decl_line
+;CHECK-NEXT:        .long   68                      ## DW_AT_type
+;CHECK-NEXT:        .byte   1                       ## DW_AT_location
+
+@dfm = external global i32, align 4
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+  %tmp.i = load i32* @dfm, align 4, !dbg !14
+  %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
+  br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
+
+if.end.i:                                         ; preds = %entry
+  switch i64 %cmd, label %if.then [
+    i64 2147772420, label %bb.i
+    i64 536897538, label %bb116.i
+  ], !dbg !22
+
+bb.i:                                             ; preds = %if.end.i
+  unreachable
+
+bb116.i:                                          ; preds = %if.end.i
+  unreachable
+
+if.then:                                          ; preds = %if.end.i
+  ret i32 undef, !dbg !23
+
+if.else:                                          ; preds = %entry
+  ret i32 0
+}
+
+declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
+declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
+declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7, !8}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 19509, i32 20, metadata !0, null}
+!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
+!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 19514, i32 2, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
+!23 = metadata !{i32 19524, i32 1, metadata !18, null}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
new file mode 100644
index 000000000000..2985224d9dbd
--- /dev/null
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+%struct.a = type { i32 }
+
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+  %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+  %add = add nsw i32 %tmp2, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare i32 @foo(...) 
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.bar = !{!6, !11}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 5, i32 19, metadata !0, null}
+!14 = metadata !{i32 6, i32 14, metadata !12, null}
+!15 = metadata !{metadata !"int", metadata !16}
+!16 = metadata !{metadata !"omnipotent char", metadata !17}
+!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!18 = metadata !{i32 7, i32 2, metadata !12, null}
+!19 = metadata !{i32 8, i32 2, metadata !12, null}
+
+; check that variable bar:b value range is appropriately trucated in debug info. Here Ltmp5 is end of
+; location range.
+
+;CHECK:Ltmp6
+;CHECK-NEXT: DEBUG_VALUE: bar:b <- undef
+
+;CHECK:Ldebug_loc0:
+;CHECK-NEXT:	.quad	Ltmp
+;CHECK-NEXT:	.quad	Ltmp6
+;CHECK-NEXT:	.short	1
+;CHECK-NEXT:	.byte	85
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
deleted file mode 100644
index f0ada41338b2..000000000000
--- a/test/CodeGen/X86/div_const.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86 | grep 365384439
-
-define i32 @f9188_mul365384439_shift27(i32 %A) {
-        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
-        ret i32 %tmp1
-}
-
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
new file mode 100644
index 000000000000..7ceb972f61bb
--- /dev/null
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @test1(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33
+	ret i16 %div
+; CHECK: test1:
+; CHECK: imull	$63551, %eax, %eax
+; CHECK-NEXT: shrl	$21, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i16 %c, 3
+  ret i16 %div
+
+; CHECK: test2:
+; CHECK: imull	$43691, %eax, %eax
+; CHECK-NEXT: shrl	$17, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i8 %c, 3
+  ret i8 %div
+
+; CHECK: test3:
+; CHECK: movzbl  8(%esp), %eax
+; CHECK-NEXT: imull	$171, %eax, %eax
+; CHECK-NEXT: shrl	$9, %eax
+; CHECK-NEXT: ret
+}
+
+define signext i16 @test4(i16 signext %x) nounwind {
+entry:
+	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+; CHECK: test4:
+; CHECK: imull	$-1985, %ecx, %ecx 
+}
+
+define i32 @test5(i32 %A) nounwind {
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+; CHECK: test5:
+; CHECK: movl	$365384439, %eax
+; CHECK: mull	4(%esp)
+}
+
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+  %div = sdiv i16 %x, 10
+  ret i16 %div
+; CHECK: test6:
+; CHECK: imull	$26215, %eax, %eax
+; CHECK: shrl	$31, %ecx
+; CHECK: sarl	$18, %eax
+}
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
index c634c7e1fd42..913617585206 100644
--- a/test/CodeGen/X86/dll-linkage.ll
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -3,7 +3,7 @@
 declare dllimport void @foo()
 
 define void @bar() nounwind {
-; CHECK: call	*__imp__foo
+; CHECK: calll	*__imp__foo
   call void @foo()
   ret void
 }
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 3b263194a5a8..2ecd72909cb1 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -7,7 +7,7 @@
 define i32 @"$foo"() nounwind {
 ; CHECK: movl	($bar),
 ; CHECK: addl	($qux),
-; CHECK: call	($hen)
+; CHECK: calll	($hen)
   %m = load i32* @"$bar"
   %n = load i32* @"$qux"
   %t = add i32 %m, %n
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
new file mode 100644
index 000000000000..9233d3f7c1a0
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; rdar://8396318
+
+; Don't emit a PIC base register if no addresses are needed.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %z.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i32 %z, i32* %z.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tmp1 = load i32* %y.addr, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  %tmp2 = load i32* %z.addr, align 4
+  %add3 = add nsw i32 %add, %tmp2
+  ret i32 %add3
+}
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 8d7dc8f9a7f8..4abc3b5b3c85 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,19 +1,23 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.8"
 
-declare void @func2(<1 x i64>)
+declare void @func2(x86_mmx)
 
 define void @func1() nounwind {
 
 ; This isn't spectacular, but it's MMX code at -O0...
-; CHECK: movl $2, %eax
-; CHECK: movd %rax, %mm0
-; CHECK: movd %mm0, %rdi
+; CHECK:  movq2dq %mm0, %xmm0
+; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
+; so we get pretty poor code.  The below is preferable.
+; CHEK: movl $2, %eax
+; CHEK: movd %rax, %mm0
+; CHEK: movd %mm0, %rdi
 
-        call void @func2(<1 x i64> <i64 2>)
+        %tmp0 = bitcast <2 x i32><i32 0, i32 2> to x86_mmx
+        call void @func2(x86_mmx %tmp0)
         ret void
 }
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 577dd7223a4d..622a1ff831d0 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -70,3 +70,20 @@ entry:
 ; X64: test4:
 ; X64: 128(%r{{.*}},%r{{.*}},8)
 }
+
+; PR8961 - Make sure the sext for the GEP addressing comes before the load that
+; is folded.
+define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
+  %v8 = getelementptr i8* %A, i32 %I
+  %v9 = bitcast i8* %v8 to i64*
+  %v10 = load i64* %v9
+  %v11 = add i64 %B, %v10
+  ret i64 %v11
+; X64: test5:
+; X64: movslq	%esi, %rax
+; X64-NEXT: movq	(%rdi,%rax), %rax
+; X64-NEXT: addq	%rdx, %rax
+; X64-NEXT: ret
+}
+
+
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 35ec1e7115b2..8db1936bc20e 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
-; RUN:   grep lazy_ptr, | count 2
-; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
-; RUN:   grep lea
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
 
 @src = external global i32
 
+; rdar://6653118
 define i32 @loadgv() nounwind {
 entry:
 	%0 = load i32* @src, align 4
@@ -12,6 +10,14 @@ entry:
         %2 = add i32 %0, %1
         store i32 %2, i32* @src
 	ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
+; CHECK: 	movl	(%ecx), %eax
+; CHECK: 	addl	(%ecx), %eax
+; CHECK: 	movl	%eax, (%ecx)
+; CHECK: 	ret
+
 }
 
 %stuff = type { i32 (...)** }
@@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind {
 entry:
 	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
 	ret void
+; CHECK: _t:
+; CHECK:	movl	$0, %eax
+; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
+
 }
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
new file mode 100644
index 000000000000..2ffcb966782a
--- /dev/null
+++ b/test/CodeGen/X86/fltused.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 08ea77d75f26..6966cf049789 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT:     {{((xor|and)ps|movd)}}
 
 ; These operations should be done in integer registers, eliminating constant
 ; pool loads, movd's etc.
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4bdf4590b07c..b216914d2391 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 | \
-; RUN:   grep {fucomi.*st.\[12\]}
+; RUN: llc < %s -march=x86 -mcpu=i386 | grep {fucompi.*st.\[12\]}
 ; PR1012
 
 define float @foo(float* %col.2.0) {
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 9393cf5a7383..0e65cfdbae30 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -11,9 +11,9 @@ define void @zap(i32 %a, i32 %b) nounwind {
 entry:
   ; CHECK: movl {{[0-9]*}}(%esp), %ebx
   ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
-  ; CHECK-NEXT: call addtwo
+  ; CHECK-NEXT: calll addtwo
   %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
-  ; CHECK: call foo
+  ; CHECK: calll foo
   call void @foo() nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 6d211913b015..39a69e17a100 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -15,7 +15,7 @@
 
 
 ; const int G2 __attribute__((weak)) = 42;
-@G2 = weak_odr constant i32 42	
+@G2 = weak_odr unnamed_addr constant i32 42	
 
 
 ; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
@@ -26,7 +26,7 @@
 
 
 ; int * const G3 = &G1;
-@G3 = constant i32* @G1
+@G3 = unnamed_addr constant i32* @G1
 
 ; DARWIN: .section        __DATA,__const
 ; DARWIN: .globl _G3
@@ -41,7 +41,7 @@
 
 
 ; _Complex long long const G4 = 34;
-@G4 = constant {i64,i64} { i64 34, i64 0 }
+@G4 = unnamed_addr constant {i64,i64} { i64 34, i64 0 }
 
 ; DARWIN: .section        __TEXT,__const
 ; DARWIN: _G4:
@@ -66,7 +66,7 @@
 @"foo bar" = linkonce global i32 42
 
 ; LINUX: .type	foo_20_bar,@object
-; LINUX:.section	.gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
 ; LINUX: .weak	foo_20_bar
 ; LINUX: foo_20_bar:
 
@@ -76,10 +76,10 @@
 ; DARWIN: "_foo bar":
 
 ; PR4650
-@G6 = weak_odr constant [1 x i8] c"\01"
+@G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
 
 ; LINUX:   .type	G6,@object
-; LINUX:   .section	.gnu.linkonce.r.G6,"a",@progbits
+; LINUX:   .section	.rodata.G6,"aG",@progbits,G6,comdat
 ; LINUX:   .weak	G6
 ; LINUX: G6:
 ; LINUX:   .byte	1
@@ -92,7 +92,7 @@
 ; DARWIN:  .byte 1
 
 
-@G7 = constant [10 x i8] c"abcdefghi\00"
+@G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00"
 
 ; DARWIN:	__TEXT,__cstring,cstring_literals
 ; DARWIN:	.globl _G7
@@ -108,7 +108,7 @@
 ; LINUX-SECTIONS:	.globl G7
 
 
-@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+@G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
 
 ; DARWIN:	.section	__TEXT,__const
 ; DARWIN:	.globl _G8
@@ -118,7 +118,7 @@
 ; LINUX:	.globl G8
 ; LINUX:G8:
 
-@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+@G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
 
 ; DARWIN:	.globl _G9
 ; DARWIN: _G9:
diff --git a/test/CodeGen/X86/inline-asm-h.ll b/test/CodeGen/X86/inline-asm-h.ll
new file mode 100644
index 000000000000..53cf419bd11a
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-h.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck %s
+
+@foobar = common global i32 0, align 4
+
+define void @zed() nounwind {
+entry:
+  call void asm "movq %mm2,${0:H}", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* @foobar) nounwind
+  ret void
+}
+
+; CHECK: zed
+; CHECK: movq %mm2,foobar+8(%rip)
diff --git a/test/CodeGen/X86/inline-asm-ptr-cast.ll b/test/CodeGen/X86/inline-asm-ptr-cast.ll
new file mode 100644
index 000000000000..50e302101814
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-ptr-cast.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu <%s
+; ModuleID = 'bug.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@func.flagmask = internal constant i64 1, align 8
+
+define void @func() nounwind {
+entry:
+  %src = alloca i32, align 4
+  %dst = alloca i32, align 4
+  %flags = alloca i64, align 8
+  %newflags = alloca i64, align 8
+  store i32 0, i32* %src, align 4
+  store i32 0, i32* %dst, align 4
+  store i64 1, i64* %flags, align 8
+  store i64 -1, i64* %newflags, align 8
+  %0 = bitcast i32* %dst to i8*
+  %tmp = load i64* %flags, align 8
+  %and = and i64 %tmp, 1
+  %1 = bitcast i32* %src to i8*
+  %tmp1 = load i8* %1
+  %2 = bitcast i32* %dst to i8*
+  %tmp2 = load i8* %2
+  call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq  $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
index 18aade2bb302..3805cbbaaaf8 100644
--- a/test/CodeGen/X86/insertelement-legalize.ll
+++ b/test/CodeGen/X86/insertelement-legalize.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx
+; RUN: llc < %s -march=x86
 
 ; Test to check that we properly legalize an insert vector element
 define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
diff --git a/test/CodeGen/X86/legalize-sub-zero-2.ll b/test/CodeGen/X86/legalize-sub-zero-2.ll
new file mode 100644
index 000000000000..f02ca715aeeb
--- /dev/null
+++ b/test/CodeGen/X86/legalize-sub-zero-2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+define fastcc void @foo(i32 %type) nounwind optsize {
+entry:
+  switch i32 %type, label %bb26 [
+    i32 33634, label %bb11
+    i32 5121, label %bb27
+  ]
+
+bb11:                                             ; preds = %entry
+  br label %bb27
+
+bb26:                                             ; preds = %entry
+  unreachable
+
+bb27:                                             ; preds = %bb11, %entry
+  %srcpb.0 = phi i32 [ 1, %bb11 ], [ 0, %entry ]
+  br i1 undef, label %bb348, label %bb30.lr.ph
+
+bb30.lr.ph:                                       ; preds = %bb27
+  %.sum743 = shl i32 %srcpb.0, 1
+  %0 = mul i32 %srcpb.0, -2
+  %.sum745 = add i32 %.sum743, %0
+  br i1 undef, label %bb70, label %bb71
+
+bb70:                                             ; preds = %bb30.lr.ph
+  unreachable
+
+bb71:                                             ; preds = %bb30.lr.ph
+  br i1 undef, label %bb92, label %bb80
+
+bb80:                                             ; preds = %bb71
+  unreachable
+
+bb92:                                             ; preds = %bb71
+  %1 = getelementptr inbounds i8* undef, i32 %.sum745
+  unreachable
+
+bb348:                                            ; preds = %bb27
+  ret void
+}
diff --git a/test/CodeGen/X86/legalize-sub-zero.ll b/test/CodeGen/X86/legalize-sub-zero.ll
new file mode 100644
index 000000000000..ee76d468e811
--- /dev/null
+++ b/test/CodeGen/X86/legalize-sub-zero.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-win32
+
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+;target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+  %1 = fdiv <3 x double> zeroinitializer, undef
+  %2 = fdiv <2 x double> zeroinitializer, undef
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <3 x i32> <i32 0, i32
+1, i32 undef>
+  %4 = insertelement <3 x double> %3, double undef, i32 2
+  %5 = bitcast <3 x double> %1 to <3 x i64>
+  %6 = bitcast <3 x double> %4 to <3 x i64>
+  %7 = sub <3 x i64> %5, %6
+  %8 = shufflevector <3 x i64> %7, <3 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %9 = xor <2 x i64> %8, zeroinitializer
+  %10 = add nsw <2 x i64> %9, zeroinitializer
+  %11 = shufflevector <2 x i64> %10, <2 x i64> undef, <3 x i32> <i32 0, i32 1,
+i32 undef>
+  %12 = insertelement <3 x i64> %11, i64 0, i32 2
+  %13 = shufflevector <3 x i64> %12, <3 x i64> undef, <4 x i32> <i32 0, i32 1,
+i32 2, i32 3>
+  %14 = shufflevector <4 x i64> %13, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %15 = bitcast <2 x i64> %14 to <4 x i32>
+  %16 = shufflevector <4 x i32> %15, <4 x i32> undef, <4 x i32> <i32 0, i32 2,
+i32 0, i32 2>
+  %17 = bitcast <4 x i32> %16 to <2 x i64>
+  %18 = shufflevector <2 x i64> %17, <2 x i64> undef, <2 x i32> <i32 0, i32 2>
+  %19 = bitcast <2 x i64> %18 to <4 x i32>
+  %20 = shufflevector <4 x i32> %19, <4 x i32> undef, <3 x i32> <i32 0, i32 1,
+i32 2>
+  %21 = or <3 x i32> %20, zeroinitializer
+  store <3 x i32> %21, <3 x i32> addrspace(1)* undef, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 574b46acea60..dff693120fb6 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
-; RUN: grep {call.*divdi3}  %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 | FileCheck %s
 
 
 ; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +11,7 @@
 define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
   %div.r = sdiv <2 x i64> %num, %div
   ret <2 x i64>  %div.r
-}                                     
+}
+
+; CHECK: call{{.*(divdi3|alldiv)}}
+; CHECK: call{{.*(divdi3|alldiv)}}
diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll
index 08306c2950e2..c3d1938e9dbd 100644
--- a/test/CodeGen/X86/licm-symbol.ll
+++ b/test/CodeGen/X86/licm-symbol.ll
@@ -3,7 +3,7 @@
 ; MachineLICM should be able to hoist the sF reference out of the loop.
 
 ; CHECK: pushl %esi
-; CHECK: subl  $4, %esp
+; CHECK: pushl
 ; CHECK: movl  $176, %esi
 ; CHECK: addl  L___sF$non_lazy_ptr, %esi
 ; CHECK: .align  4, 0x90
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index 354d08206972..faba63007127 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -70,6 +70,7 @@ exit:
 
 ; Same as slightly_more_involved, but block_a is now a CFG diamond with
 ; fallthrough edges which should be preserved.
+; "callq block_a_merge_func" is tail duped.
 
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB2_1
@@ -78,12 +79,12 @@ exit:
 ; CHECK-NEXT:   callq bar99
 ; CHECK-NEXT:   callq get
 ; CHECK-NEXT:   cmpl $2999, %eax
-; CHECK-NEXT:   jg .LBB2_6
-; CHECK-NEXT:   callq block_a_true_func
-; CHECK-NEXT:   jmp .LBB2_7
-; CHECK-NEXT: .LBB2_6:
+; CHECK-NEXT:   jle .LBB2_5
 ; CHECK-NEXT:   callq block_a_false_func
-; CHECK-NEXT: .LBB2_7:
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT:   jmp .LBB2_1
+; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT:   callq block_a_true_func
 ; CHECK-NEXT:   callq block_a_merge_func
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index d2ff58be1055..2a9762928329 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -353,11 +353,11 @@ return:
 
 ; CHECK: count_me_3:
 ; CHECK: call
-; CHECK: movsd   (%r15,%r13,8), %xmm0
-; CHECK: mulsd   (%r14,%r13,8), %xmm0
-; CHECK: movsd   %xmm0, (%r12,%r13,8)
-; CHECK: incq    %r13
-; CHECK: cmpq    %r13, %rbx
+; CHECK: movsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: mulsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: movsd   %xmm0, (%r{{[^,]*}},%r{{[^,]*}},8)
+; CHECK: incq    %r{{.*}}
+; CHECK: cmpq    %r{{.*}}, %r{{.*}}
 ; CHECK: jne
 
 declare void @use(i64)
@@ -389,7 +389,7 @@ return:
 ; rdar://7657764
 
 ; CHECK: asd:
-; CHECK: BB9_5:
+; CHECK: BB9_4:
 ; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
 ; CHECK-NEXT: incq  %rdi
 ; CHECK-NEXT: cmpq  %rdi, %r{{[^,]*}}
@@ -464,7 +464,7 @@ bb5:                                              ; preds = %bb3, %entry
 
 ; And the one at %bb68, where we want to be sure to use superhero mode:
 
-; CHECK:      BB10_9:
+; CHECK:      BB10_7:
 ; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
 ; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
 ; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
@@ -484,7 +484,6 @@ bb5:                                              ; preds = %bb3, %entry
 ; CHECK-NEXT:   addq    $64, %r{{.*}}
 ; CHECK-NEXT:   addq    $64, %r{{.*}}
 ; CHECK-NEXT:   addq    $-16, %r{{.*}}
-; CHECK-NEXT: BB10_10:
 ; CHECK-NEXT:   cmpq    $15, %r{{.*}}
 ; CHECK-NEXT:   jg
 
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a8afdc84c51f..e284776ed02d 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -37,3 +37,43 @@ bb3:
 declare void @bar(i32*)
 
 declare fastcc i8* @foo(%struct.s2*) nounwind
+
+; rdar://8773371
+
+declare void @printf(...) nounwind
+
+define void @commute(i32 %test_case, i32 %scale) nounwind ssp {
+; CHECK: commute:
+entry:
+  switch i32 %test_case, label %sw.bb307 [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry
+  %mul = mul nsw i32 %test_case, 3
+  %mul20 = mul nsw i32 %mul, %scale
+  br i1 undef, label %if.end34, label %sw.bb307
+
+if.end34:                                         ; preds = %sw.bb
+; CHECK: %if.end34
+; CHECK: imull
+; CHECK: leal
+; CHECK-NOT: imull
+  tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+  %tmp = mul i32 %scale, %test_case
+  %tmp752 = mul i32 %tmp, 3
+  %tmp753 = zext i32 %tmp752 to i64
+  br label %bb.nph743.us
+
+for.body53.us:                                    ; preds = %bb.nph743.us, %for.body53.us
+  %exitcond = icmp eq i64 undef, %tmp753
+  br i1 %exitcond, label %bb.nph743.us, label %for.body53.us
+
+bb.nph743.us:                                     ; preds = %for.body53.us, %if.end34
+  br label %for.body53.us
+
+sw.bb307:                                         ; preds = %sw.bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index b90d2e211878..36be1f308ccd 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -20,8 +20,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp2:
-; CHECK: movw    (%rsi), %ax
-; CHECK: cmpw    %ax, (%rdi)
+; CHECK: movw    (%rdi), %ax
+; CHECK: cmpw    (%rsi), %ax
 }
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
@@ -54,8 +54,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp4:
-; CHECK: movl    (%rsi), %eax
-; CHECK: cmpl    %eax, (%rdi)
+; CHECK: movl    (%rdi), %eax
+; CHECK: cmpl    (%rsi), %eax
 }
 
 define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
@@ -87,8 +87,8 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 ; CHECK: memcmp8:
-; CHECK: movq    (%rsi), %rax
-; CHECK: cmpq    %rax, (%rdi)
+; CHECK: movq    (%rdi), %rax
+; CHECK: cmpq    (%rsi), %rax
 }
 
 define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 7bc31bec163d..72342cbacb4f 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
@@ -9,8 +10,8 @@ entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
 	ret i8* %a
         
-; CHECK: test1:
-; CHECK: memcpy
+; LINUX: test1:
+; LINUX: memcpy
 }
 
 ; Variable memcpy's should lower to calls.
@@ -21,18 +22,41 @@ entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
 	ret i8* %tmp14
         
-; CHECK: test2:
-; CHECK: memcpy
+; LINUX: test2:
+; LINUX: memcpy
 }
 
 ; Large constant memcpy's should lower to a call when optimizing for size.
 ; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
 define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
-; CHECK: test3:
-; CHECK: memcpy
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
 }
 
 ; Large constant memcpy's should be inlined when not optimizing for size.
@@ -40,18 +64,18 @@ define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
-; CHECK: test4:
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
-; CHECK: movq
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
 }
 
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
deleted file mode 100644
index d4050689f594..000000000000
--- a/test/CodeGen/X86/memmove-0.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s, i64 %l)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
deleted file mode 100644
index 2057be88174d..000000000000
--- a/test/CodeGen/X86/memmove-1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s, i64 %l)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
deleted file mode 100644
index 68a9f4dfb9cb..000000000000
--- a/test/CodeGen/X86/memmove-2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
deleted file mode 100644
index d8a419c07457..000000000000
--- a/test/CodeGen/X86/memmove-3.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s)
-{
-  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
-  ret void
-}
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 0e1559548e2b..993583b4a49b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=i386-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
 define fastcc void @t1() nounwind {
 entry:
 ; CHECK: t1:
-; CHECK: call _memset
+; CHECK: calll _memset
   call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
   unreachable
 }
@@ -13,7 +13,27 @@ entry:
 define fastcc void @t2(i8 signext %c) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: call _memset
+; CHECK: calll _memset
   call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
   unreachable
 }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+  ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+  ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index c0cd271d985e..3f069b4a1aa8 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movaps | count 5
+; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movups | count 5
 ; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=core2   | grep movl   | count 20
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2   | grep movq   | count 10
 
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 7dcd84d8a157..ded4b73d0931 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -6,7 +6,7 @@ target triple = "i386-pc-mingw32"
 define void @foo1(i32 %N) nounwind {
 entry:
 ; CHECK: _foo1:
-; CHECK: call __alloca
+; CHECK: calll __alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -19,7 +19,7 @@ entry:
 ; CHECK: _foo2:
 ; CHECK: andl $-16, %esp
 ; CHECK: pushl %eax
-; CHECK: call __alloca
+; CHECK: calll __alloca
 ; CHECK: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/misaligned-memset.ll b/test/CodeGen/X86/misaligned-memset.ll
new file mode 100644
index 000000000000..21f8bf2bf29e
--- /dev/null
+++ b/test/CodeGen/X86/misaligned-memset.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=nehalem < %s | FileCheck %s
+
+@a = common global [3 x i64] zeroinitializer, align 16
+
+define i32 @main() nounwind ssp {
+; CHECK: movups
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 426e98e019bc..b348512b5798 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
 ;
 ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
-; On Darwin x86-32, v1i64 values are passed in memory.
+; On Darwin x86-32, v1i64 values are passed in memory.  In this example, they
+;                   are never moved into an MM register at all.
 ; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
 ; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
 
-@u1 = external global <8 x i8>
+@u1 = external global x86_mmx
 
-define void @t1(<8 x i8> %v1) nounwind  {
-	store <8 x i8> %v1, <8 x i8>* @u1, align 8
+define void @t1(x86_mmx %v1) nounwind  {
+	store x86_mmx %v1, x86_mmx* @u1, align 8
 	ret void
 }
 
-@u2 = external global <1 x i64>
+@u2 = external global x86_mmx
 
 define void @t2(<1 x i64> %v1) nounwind  {
-	store <1 x i64> %v1, <1 x i64>* @u2, align 8
+        %tmp = bitcast <1 x i64> %v1 to x86_mmx
+	store x86_mmx %tmp, x86_mmx* @u2, align 8
 	ret void
 }
+
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index c42af082364c..c132d311b94b 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; Since the add is not an MMX add, we don't have a movq2dq any more.
 
 @g_v8qi = external global <8 x i8>
 
 define void @t1() nounwind  {
 	%tmp3 = load <8 x i8>* @g_v8qi, align 8
-	%tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+        %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+	%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
 	ret void
 }
 
-define void @t2(<8 x i8> %v1, <8 x i8> %v2) nounwind  {
-       %tmp3 = add <8 x i8> %v1, %v2
-       %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind  {
+       %v1a = bitcast x86_mmx %v1 to <8 x i8>
+       %v2b = bitcast x86_mmx %v2 to <8 x i8>
+       %tmp3 = add <8 x i8> %v1a, %v2b
+       %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+       %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
        ret void
 }
 
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index e4dfdbfe1bb1..681748732401 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,131 +1,309 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx
 
 ;; A basic sanity check to make sure that MMX arithmetic actually compiles.
+;; First is a straight translation of the original with bitcasts as needed.
 
-define void @foo(<8 x i8>* %A, <8 x i8>* %B) {
+define void @foo(x86_mmx* %A, x86_mmx* %B) {
 entry:
-	%tmp1 = load <8 x i8>* %A		; <<8 x i8>> [#uses=1]
-	%tmp3 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp4 = add <8 x i8> %tmp1, %tmp3		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp4, <8 x i8>* %A
-	%tmp7 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp12 = tail call <8 x i8> @llvm.x86.mmx.padds.b( <8 x i8> %tmp4, <8 x i8> %tmp7 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp12, <8 x i8>* %A
-	%tmp16 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp21 = tail call <8 x i8> @llvm.x86.mmx.paddus.b( <8 x i8> %tmp12, <8 x i8> %tmp16 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp21, <8 x i8>* %A
-	%tmp27 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp28 = sub <8 x i8> %tmp21, %tmp27		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp28, <8 x i8>* %A
-	%tmp31 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp36 = tail call <8 x i8> @llvm.x86.mmx.psubs.b( <8 x i8> %tmp28, <8 x i8> %tmp31 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp36, <8 x i8>* %A
-	%tmp40 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp45 = tail call <8 x i8> @llvm.x86.mmx.psubus.b( <8 x i8> %tmp36, <8 x i8> %tmp40 )		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp45, <8 x i8>* %A
-	%tmp51 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp52 = mul <8 x i8> %tmp45, %tmp51		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp52, <8 x i8>* %A
-	%tmp57 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp58 = and <8 x i8> %tmp52, %tmp57		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp58, <8 x i8>* %A
-	%tmp63 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp64 = or <8 x i8> %tmp58, %tmp63		; <<8 x i8>> [#uses=2]
-	store <8 x i8> %tmp64, <8 x i8>* %A
-	%tmp69 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
-	%tmp70 = xor <8 x i8> %tmp64, %tmp69		; <<8 x i8>> [#uses=1]
-	store <8 x i8> %tmp70, <8 x i8>* %A
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
+        %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
+	%tmp4 = add <8 x i8> %tmp1a, %tmp3a		; <<8 x i8>> [#uses=2]
+        %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
+        %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
+	%tmp28 = sub <8 x i8> %tmp21a, %tmp27a		; <<8 x i8>> [#uses=2]
+        %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
+        %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
+	%tmp52 = mul <8 x i8> %tmp45a, %tmp51a		; <<8 x i8>> [#uses=2]
+        %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
+	store x86_mmx %tmp52a, x86_mmx* %A
+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
+	%tmp58 = and <8 x i8> %tmp52, %tmp57a		; <<8 x i8>> [#uses=2]
+        %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
+	store x86_mmx %tmp58a, x86_mmx* %A
+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
+	%tmp64 = or <8 x i8> %tmp58, %tmp63a		; <<8 x i8>> [#uses=2]
+        %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
+	store x86_mmx %tmp64a, x86_mmx* %A
+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
+        %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
+	%tmp70 = xor <8 x i8> %tmp64b, %tmp69a		; <<8 x i8>> [#uses=1]
+        %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
+	store x86_mmx %tmp70a, x86_mmx* %A
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-define void @baz(<2 x i32>* %A, <2 x i32>* %B) {
+define void @baz(x86_mmx* %A, x86_mmx* %B) {
 entry:
-	%tmp1 = load <2 x i32>* %A		; <<2 x i32>> [#uses=1]
-	%tmp3 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp4 = add <2 x i32> %tmp1, %tmp3		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp4, <2 x i32>* %A
-	%tmp9 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp10 = sub <2 x i32> %tmp4, %tmp9		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp10, <2 x i32>* %A
-	%tmp15 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp16 = mul <2 x i32> %tmp10, %tmp15		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp16, <2 x i32>* %A
-	%tmp21 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp22 = and <2 x i32> %tmp16, %tmp21		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp22, <2 x i32>* %A
-	%tmp27 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp28 = or <2 x i32> %tmp22, %tmp27		; <<2 x i32>> [#uses=2]
-	store <2 x i32> %tmp28, <2 x i32>* %A
-	%tmp33 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
-	%tmp34 = xor <2 x i32> %tmp28, %tmp33		; <<2 x i32>> [#uses=1]
-	store <2 x i32> %tmp34, <2 x i32>* %A
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
+        %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
+	%tmp4 = add <2 x i32> %tmp1a, %tmp3a		; <<2 x i32>> [#uses=2]
+        %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
+	%tmp10 = sub <2 x i32> %tmp4, %tmp9a		; <<2 x i32>> [#uses=2]
+        %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
+	store x86_mmx %tmp10a, x86_mmx* %A
+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+	%tmp16 = mul <2 x i32> %tmp10b, %tmp15a		; <<2 x i32>> [#uses=2]
+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+	store x86_mmx %tmp16a, x86_mmx* %A
+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
+        %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
+	%tmp22 = and <2 x i32> %tmp16b, %tmp21a		; <<2 x i32>> [#uses=2]
+        %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
+	store x86_mmx %tmp22a, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
+        %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
+	%tmp28 = or <2 x i32> %tmp22b, %tmp27a		; <<2 x i32>> [#uses=2]
+        %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
+        %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
+	%tmp34 = xor <2 x i32> %tmp28b, %tmp33a		; <<2 x i32>> [#uses=1]
+        %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
+	store x86_mmx %tmp34a, x86_mmx* %A
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-define void @bar(<4 x i16>* %A, <4 x i16>* %B) {
+define void @bar(x86_mmx* %A, x86_mmx* %B) {
 entry:
-	%tmp1 = load <4 x i16>* %A		; <<4 x i16>> [#uses=1]
-	%tmp3 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp4 = add <4 x i16> %tmp1, %tmp3		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp4, <4 x i16>* %A
-	%tmp7 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp12 = tail call <4 x i16> @llvm.x86.mmx.padds.w( <4 x i16> %tmp4, <4 x i16> %tmp7 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp12, <4 x i16>* %A
-	%tmp16 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp21 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp12, <4 x i16> %tmp16 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp21, <4 x i16>* %A
-	%tmp27 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp28 = sub <4 x i16> %tmp21, %tmp27		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp28, <4 x i16>* %A
-	%tmp31 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp36 = tail call <4 x i16> @llvm.x86.mmx.psubs.w( <4 x i16> %tmp28, <4 x i16> %tmp31 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp36, <4 x i16>* %A
-	%tmp40 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp45 = tail call <4 x i16> @llvm.x86.mmx.psubus.w( <4 x i16> %tmp36, <4 x i16> %tmp40 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp45, <4 x i16>* %A
-	%tmp51 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp52 = mul <4 x i16> %tmp45, %tmp51		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp52, <4 x i16>* %A
-	%tmp55 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp60 = tail call <4 x i16> @llvm.x86.mmx.pmulh.w( <4 x i16> %tmp52, <4 x i16> %tmp55 )		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp60, <4 x i16>* %A
-	%tmp64 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp69 = tail call <2 x i32> @llvm.x86.mmx.pmadd.wd( <4 x i16> %tmp60, <4 x i16> %tmp64 )		; <<2 x i32>> [#uses=1]
-	%tmp70 = bitcast <2 x i32> %tmp69 to <4 x i16>		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp70, <4 x i16>* %A
-	%tmp75 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp76 = and <4 x i16> %tmp70, %tmp75		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp76, <4 x i16>* %A
-	%tmp81 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp82 = or <4 x i16> %tmp76, %tmp81		; <<4 x i16>> [#uses=2]
-	store <4 x i16> %tmp82, <4 x i16>* %A
-	%tmp87 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
-	%tmp88 = xor <4 x i16> %tmp82, %tmp87		; <<4 x i16>> [#uses=1]
-	store <4 x i16> %tmp88, <4 x i16>* %A
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
+        %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
+	%tmp4 = add <4 x i16> %tmp1a, %tmp3a		; <<4 x i16>> [#uses=2]
+        %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
+        %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
+	%tmp28 = sub <4 x i16> %tmp21a, %tmp27a		; <<4 x i16>> [#uses=2]
+        %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
+        %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
+	%tmp52 = mul <4 x i16> %tmp45a, %tmp51a		; <<4 x i16>> [#uses=2]
+        %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
+	store x86_mmx %tmp52a, x86_mmx* %A
+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp60, x86_mmx* %A
+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]
+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
+        %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
+	%tmp76 = and <4 x i16> %tmp70a, %tmp75a		; <<4 x i16>> [#uses=2]
+        %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
+	store x86_mmx %tmp76a, x86_mmx* %A
+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
+        %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
+	%tmp82 = or <4 x i16> %tmp76b, %tmp81a		; <<4 x i16>> [#uses=2]
+        %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
+	store x86_mmx %tmp82a, x86_mmx* %A
+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
+        %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
+	%tmp88 = xor <4 x i16> %tmp82b, %tmp87a		; <<4 x i16>> [#uses=1]
+        %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
+	store x86_mmx %tmp88a, x86_mmx* %A
 	tail call void @llvm.x86.mmx.emms( )
 	ret void
 }
 
-declare <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8>, <8 x i8>)
+;; The following is modified to use MMX intrinsics everywhere they work.
 
-declare <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8>, <8 x i8>)
+define void @fooa(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp51a = bitcast x86_mmx %tmp51 to i64
+        %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
+        %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
+	%tmp52 = mul <8 x i8> %tmp51b, %tmp51aa		; <x86_mmx> [#uses=2]
+        %tmp52a = bitcast <8 x i8> %tmp52 to i64
+        %tmp52aa = bitcast i64 %tmp52a to x86_mmx
+	store x86_mmx %tmp52aa, x86_mmx* %A
+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp58, x86_mmx* %A
+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 )		; <x86_mmx> [#uses=2]	
+	store x86_mmx %tmp64, x86_mmx* %A
+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
 
-declare <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8>, <8 x i8>)
+define void @baza(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp10, x86_mmx* %A
+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+	%tmp16 = mul <2 x i32> %tmp10a, %tmp15a		; <x86_mmx> [#uses=2]
+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+	store x86_mmx %tmp16a, x86_mmx* %A
+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp22, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp34, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
 
-declare <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8>, <8 x i8>)
+define void @bara(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp52, x86_mmx* %A
+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp60, x86_mmx* %A
+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]
+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp76, x86_mmx* %A
+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp82, x86_mmx* %A
+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp88, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
 
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
 
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
 
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
 
 declare void @llvm.x86.mmx.emms()
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)
+
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index 1fd8f67a0ccc..8b1840abf615 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,26 +1,31 @@
 ; RUN: llc < %s -march=x86-64 | grep movd | count 4
 
-define i64 @foo(<1 x i64>* %p) {
-  %t = load <1 x i64>* %p
-  %u = add <1 x i64> %t, %t
-  %s = bitcast <1 x i64> %u to i64
+define i64 @foo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
-define i64 @goo(<2 x i32>* %p) {
-  %t = load <2 x i32>* %p
-  %u = add <2 x i32> %t, %t
-  %s = bitcast <2 x i32> %u to i64
+define i64 @goo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
-define i64 @hoo(<4 x i16>* %p) {
-  %t = load <4 x i16>* %p
-  %u = add <4 x i16> %t, %t
-  %s = bitcast <4 x i16> %u to i64
+define i64 @hoo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
-define i64 @ioo(<8 x i8>* %p) {
-  %t = load <8 x i8>* %p
-  %u = add <8 x i8> %t, %t
-  %s = bitcast <8 x i8> %u to i64
+define i64 @ioo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
   ret i64 %s
 }
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
new file mode 100644
index 000000000000..3ac0e4ee4b85
--- /dev/null
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -0,0 +1,1324 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckldq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhdq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packuswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packssdw
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packsswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pxor
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: por
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pandn
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pand
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psadbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
+
+define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
+; CHECK: movntq
+entry:
+  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
+  ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pmovmskb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+  ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
+; CHECK: maskmovq
+entry:
+  %0 = bitcast <1 x i64> %n to <8 x i8>
+  %1 = bitcast <1 x i64> %d to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
+  ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhuw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pshufw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmuludq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpi2pd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+  ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvttpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: palignr
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pshufb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhrsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index a063ee1d6cf4..348dac8d4d59 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
+; This is not an MMX operation; promoted to XMM.
 
-define <2 x i32> @qux(i32 %A) nounwind {
+define x86_mmx @qux(i32 %A) nounwind {
 	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
-	ret <2 x i32> %tmp3
+        %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
+	ret x86_mmx %tmp4
 }
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 3af09f4998d3..6062b505a569 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 0af7e017b626..689f7bf59564 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; There are no MMX operations in bork; promoted to XMM.
 
 define void @bork(<1 x i64>* %x) {
+; CHECK: bork
+; CHECK: pextrd
 entry:
 	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]
 	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]
@@ -11,4 +14,18 @@ entry:
 	ret void
 }
 
+; pork uses MMX.
+
+define void @pork(x86_mmx* %x) {
+; CHECK: pork
+; CHECK: punpckhdq
+entry:
+	%tmp2 = load x86_mmx* %x		; <x86_mmx> [#uses=1]
+        %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)
+	store x86_mmx %tmp9, x86_mmx* %x
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
 declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index dd0aa2ca31f4..bafc75444d91 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -5,28 +5,28 @@
 
 define i64 @t1(<1 x i64> %mm1) nounwind  {
 entry:
-	%tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 )		; <<1 x i64>> [#uses=1]
-	%retval1112 = bitcast <1 x i64> %tmp6 to i64		; <i64> [#uses=1]
+        %tmp = bitcast <1 x i64> %mm1 to x86_mmx
+	%tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 )		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp6 to i64
 	ret i64 %retval1112
 }
 
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 
 
-define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind  {
+define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind  {
 entry:
-	%tmp7 = tail call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %mm1, <2 x i32> %mm2 ) nounwind readnone 		; <<2 x i32>> [#uses=1]
-	%retval1112 = bitcast <2 x i32> %tmp7 to i64		; <i64> [#uses=1]
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp7 to i64
 	ret i64 %retval1112
 }
 
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 
 
-define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind  {
+define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind  {
 entry:
-	%tmp6 = bitcast <1 x i64> %mm1 to <4 x i16>		; <<4 x i16>> [#uses=1]
-	%tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone 		; <<4 x i16>> [#uses=1]
-	%retval1314 = bitcast <4 x i16> %tmp8 to i64		; <i64> [#uses=1]
+	%tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1314 = bitcast x86_mmx %tmp8 to i64
 	ret i64 %retval1314
 }
 
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index e3125c7345b8..9f7501eb7c5d 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -22,8 +22,10 @@ entry:
 	%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>		; <<4 x i16>> [#uses=1]
 	%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >		; <<4 x i16>> [#uses=1]
 	%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>		; <<8 x i8>> [#uses=1]
-	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> %tmp555, i8* null )
+        %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
+        %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )
 	ret void
 }
 
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 8253c200323c..a7ce7d93920e 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
 
 	%struct.vS1024 = type { [8 x <4 x i32>] }
 	%struct.vS512 = type { [4 x <4 x i32>] }
 
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
 
 define void @t() nounwind {
 entry:
@@ -12,14 +12,18 @@ entry:
 
 bb554:		; preds = %bb554, %entry
 	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
-	%0 = load <1 x i64>* null, align 8		; <<1 x i64>> [#uses=2]
-	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
+	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
 	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%2 = bitcast <2 x i32> %tmp555 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	%3 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
+	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
         store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
-	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %2		; <<1 x i64>> [#uses=1]
-	%4 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %tmp558, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-	%tmp562 = add <1 x i64> %4, %3		; <<1 x i64>> [#uses=1]
+        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
+	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
+        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
+	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
+        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
+	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
 	br label %bb554
 }
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index d21e2404882d..191e261f616f 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
+; There are no MMX operations here; this is promoted to XMM.
 
 define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
 entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index b04048b92c13..00190e802fc9 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,8 +1,57 @@
-; RUN: llc < %s -march=x86 | grep gs
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
 
-define i32 @foo() nounwind readonly {
+define i32 @test1() nounwind readonly {
 entry:
 	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
+; X32: test1:
+; X32: 	movl	%gs:196, %eax
+; X32: 	movl	(%eax), %eax
+; X32: 	ret
+
+; X64: test1:
+; X64: 	movq	%gs:320, %rax
+; X64: 	movl	(%rax), %eax
+; X64: 	ret
+
+define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+entry:
+  %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+  tail call void %tmp9(i8* undef) nounwind optsize
+  ret i64 0
+}
+
+; rdar://8453210
+; X32: test2:
+; X32: movl	{{.*}}(%esp), %eax
+; X32: calll	*%gs:(%eax)
+
+; X64: test2:
+; X64: callq	*%gs:(%rdi)
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+  %0 = load i64 addrspace(256)* %p
+  %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+  %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+  %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+  %3 = bitcast <4 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
+  
+; X32: pmovsxwd_1:
+; X32: 	movl	4(%esp), %eax
+; X32: 	pmovsxwd	%gs:(%eax), %xmm0
+; X32: 	ret
+
+; X64: pmovsxwd_1:
+; X64:	pmovsxwd	%gs:(%rdi), %xmm0
+; X64:	ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/mult-alt-generic-i686.ll b/test/CodeGen/X86/mult-alt-generic-i686.ll
new file mode 100644
index 000000000000..7c3499f178a6
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
new file mode 100644
index 000000000000..f35bb5e34079
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
new file mode 100644
index 000000000000..06175da46454
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -0,0 +1,358 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; ModuleID = 'mult-alt-x86.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@dout0 = common global double 0.000000e+000, align 8
+@din1 = common global double 0.000000e+000, align 8
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_f() nounwind {
+entry:
+  ret void
+}
+
+define void @single_t() nounwind {
+entry:
+  ret void
+}
+
+define void @single_u() nounwind {
+entry:
+  ret void
+}
+
+define void @single_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @single_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,I,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,J,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,K,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*m,L,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*m,M,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,N,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*m,G,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*m,C,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,e,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,Z,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_f() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_t() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_u() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|I|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|J|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|K|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|L|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|M|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|N|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|G|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|C|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|e|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|Z|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
new file mode 100644
index 000000000000..ef27cbc3418c
--- /dev/null
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; DAGCombiner should fold this code in finite time.
+; rdar://8606584
+
+define void @test1() nounwind readnone {
+bb.nph:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %bb.nph
+  %tmp6 = load i32* undef, align 4
+  %and = or i64 undef, undef
+  %conv11 = zext i32 undef to i64
+  %conv14 = zext i32 %tmp6 to i64
+  %shl15 = shl i64 %conv14, 1
+  %shl15.masked = and i64 %shl15, 4294967294
+  %and17 = or i64 %shl15.masked, %conv11
+  %add = add i64 %and17, 1
+  %xor = xor i64 %add, %and
+  %tmp20 = load i64* undef, align 8
+  %add21 = add i64 %xor, %tmp20
+  %conv22 = trunc i64 %add21 to i32
+  store i32 %conv22, i32* undef, align 4
+  br i1 false, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+
+; DAGCombiner shouldn't fold the sdiv (ashr) away.
+; rdar://8636812
+; CHECK: test2:
+; CHECK:   sarl
+
+define i32 @test2() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i8, align 1
+  store i32 127, i32* %i, align 4
+  store i8 0, i8* %j, align 1
+  %tmp3 = load i32* %i, align 4
+  %mul = mul nsw i32 %tmp3, 2
+  %conv4 = trunc i32 %mul to i8
+  %conv5 = sext i8 %conv4 to i32
+  %div6 = sdiv i32 %conv5, 2
+  %conv7 = trunc i32 %div6 to i8
+  %conv9 = sext i8 %conv7 to i32
+  %cmp = icmp eq i32 %conv9, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  call void @abort() noreturn
+  unreachable
+}
+
+declare void @abort() noreturn
+
+declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+  volatile store i32 128, i32* %P
+  %tmp4.pre = load i32* %P
+  %phitmp = trunc i32 %tmp4.pre to i16
+  %phitmp13 = shl i16 %phitmp, 8
+  %phitmp14 = ashr i16 %phitmp13, 8
+  %phitmp15 = lshr i16 %phitmp14, 8
+  %phitmp16 = zext i16 %phitmp15 to i32
+  ret i32 %phitmp16
+  
+; CHECK: movl	$128, (%rdi)
+; CHECK-NEXT: movsbl	(%rdi), %eax
+; CHECK-NEXT: movzbl	%ah, %eax
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
index 7842eb8456eb..76e557b84225 100644
--- a/test/CodeGen/X86/negative-sin.ll
+++ b/test/CodeGen/X86/negative-sin.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
-; RUN:   not egrep {addsd|subsd|xor}
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | FileCheck %s
+; CHECK-NOT:     {{addsd|subsd|xor}}
 
 declare double @sin(double %f)
 
diff --git a/test/CodeGen/X86/non-globl-eh-frame.ll b/test/CodeGen/X86/non-globl-eh-frame.ll
new file mode 100644
index 000000000000..71349ecafeb6
--- /dev/null
+++ b/test/CodeGen/X86/non-globl-eh-frame.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin10 -march x86 | not grep {{.globl\[\[:space:\]\]*__Z4funcv.eh}}
+; RUN: llc < %s -mtriple x86_64-apple-darwin9  -march x86 | FileCheck %s -check-prefix=DARWIN9
+
+%struct.__pointer_type_info_pseudo = type { %struct.__type_info_pseudo, i32, %"struct.std::type_info"* }
+%struct.__type_info_pseudo = type { i8*, i8* }
+%"struct.std::type_info" = type opaque
+
+@.str = private constant [12 x i8] c"hello world\00", align 1
+@_ZTIPc = external constant %struct.__pointer_type_info_pseudo
+
+define void @_Z4funcv() noreturn optsize ssp {
+entry:
+  %0 = tail call i8* @__cxa_allocate_exception(i64 8) nounwind
+  %1 = bitcast i8* %0 to i8**
+  store i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0), i8** %1, align 8
+  tail call void @__cxa_throw(i8* %0, i8* bitcast (%struct.__pointer_type_info_pseudo* @_ZTIPc to i8*), void (i8*)* null) noreturn
+  unreachable
+}
+
+; DARWIN9: .globl __Z4funcv.eh
+
+declare i8* @__cxa_allocate_exception(i64) nounwind
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 8bed62488070..ef02af2d7851 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
 ; PR1296
-; RUN: llc < %s -march=x86 | grep {movl	\$1} | count 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 23c509c9936b..13e804d94a57 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 4
 ; PR2659
 
 define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index a1a9759dd36c..dc5fcd78dc84 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -12,7 +12,7 @@ entry:
     ret void
     
 ; LINUX:    test0:
-; LINUX:	call	.L0$pb
+; LINUX:	calll	.L0$pb
 ; LINUX-NEXT: .L0$pb:
 ; LINUX-NEXT:	popl
 ; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),
@@ -34,7 +34,7 @@ entry:
     ret void
     
 ; LINUX: test1:
-; LINUX:	call	.L1$pb
+; LINUX:	calll	.L1$pb
 ; LINUX-NEXT: .L1$pb:
 ; LINUX-NEXT:	popl
 ; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax
@@ -54,12 +54,12 @@ entry:
 ; LINUX: test2:
 ; LINUX: 	pushl	%ebx
 ; LINUX-NEXT: 	subl	$8, %esp
-; LINUX-NEXT: 	call	.L2$pb
+; LINUX-NEXT: 	calll	.L2$pb
 ; LINUX-NEXT: .L2$pb:
 ; LINUX-NEXT: 	popl	%ebx
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx
 ; LINUX: 	movl	$40, (%esp)
-; LINUX: 	call	malloc@PLT
+; LINUX: 	calll	malloc@PLT
 ; LINUX: 	addl	$8, %esp
 ; LINUX: 	popl	%ebx
 ; LINUX: 	ret
@@ -75,13 +75,13 @@ entry:
     call void(...)* %tmp1()
     ret void
 ; LINUX: test3:
-; LINUX: 	call	.L3$pb
+; LINUX: 	calll	.L3$pb
 ; LINUX-NEXT: .L3$pb:
 ; LINUX: 	popl
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
 ; LINUX: 	movl	pfoo@GOT(%[[REG3]]),
-; LINUX: 	call	afoo@PLT
-; LINUX: 	call	*
+; LINUX: 	calll	afoo@PLT
+; LINUX: 	calll	*
 }
 
 declare void(...)* @afoo(...)
@@ -91,10 +91,10 @@ entry:
     call void(...)* @foo()
     ret void
 ; LINUX: test4:
-; LINUX: call	.L4$pb
+; LINUX: calll	.L4$pb
 ; LINUX: popl	%ebx
 ; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
-; LINUX: call	foo@PLT
+; LINUX: calll	foo@PLT
 }
 
 declare void @foo(...)
@@ -112,7 +112,7 @@ entry:
     ret void
     
 ; LINUX: test5:
-; LINUX: 	call	.L5$pb
+; LINUX: 	calll	.L5$pb
 ; LINUX-NEXT: .L5$pb:
 ; LINUX-NEXT: 	popl	%eax
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax
@@ -134,7 +134,7 @@ entry:
 ; LINUX: .LCPI6_0:
 
 ; LINUX: test6:
-; LINUX:    call .L6$pb
+; LINUX:    calll .L6$pb
 ; LINUX: .L6$pb:
 ; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), 
 ; LINUX:    fldl	.LCPI6_0@GOTOFF(
@@ -186,7 +186,7 @@ bb12:
     ret void
     
 ; LINUX: test7:
-; LINUX:   call	.L7$pb
+; LINUX:   calll	.L7$pb
 ; LINUX: .L7$pb:
 ; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
 ; LINUX:   .LJTI7_0@GOTOFF(
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 31071bc74a78..b6761e338aa9 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | grep -F .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
 ; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | FileCheck %s
 ; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
diff --git a/test/CodeGen/X86/popcnt.ll b/test/CodeGen/X86/popcnt.ll
new file mode 100644
index 000000000000..430214c73b13
--- /dev/null
+++ b/test/CodeGen/X86/popcnt.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: cnt8:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: cnt16:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: cnt32:
+; CHECK: popcntl
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: cnt64:
+; CHECK: popcntq
+; CHECK: ret
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 97cc7b4977cf..902c69b471db 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -68,7 +68,7 @@ bb26.preheader:                                   ; preds = %imix_test.exit
 
 bb23:                                             ; preds = %imix_test.exit
   unreachable
-; X86-32: %bb26.preheader.bb28_crit_edge
+; X86-32: %bb26.preheader
 ; X86-32: movl -16(%ebp),
 ; X86-32-NEXT: .align 4
 ; X86-32-NEXT: %bb28
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index e5daf5da9f3e..54d043d54f83 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
 ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
 ; PR2659
 
@@ -14,10 +14,11 @@ forcond.preheader:              ; preds = %entry
   %cmp44 = icmp eq i32 %k, 0            ; <i1> [#uses=1]
   br i1 %cmp44, label %afterfor, label %forbody
 
-; CHECK: %forcond.preheader.forbody_crit_edge
+; CHECK: %forcond.preheader
 ; CHECK: movl $1
 ; CHECK-NOT: xorl
-; CHECK-NEXT: movl
+; CHECK-NOT: movl
+; CHECK-NEXT: je
 
 ifthen:         ; preds = %entry
   ret i32 0
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 7cdeaa099271..da1623721d1c 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
 ; PR3522
 
 target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
new file mode 100644
index 000000000000..45b0c6c78706
--- /dev/null
+++ b/test/CodeGen/X86/pr9127.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+define i8 @foobar(double %d, double* %x) {
+entry:
+  %tmp2 = load double* %x, align 8
+  %cmp = fcmp oeq double %tmp2, %d
+  %conv3 = zext i1 %cmp to i8
+  ret i8 %conv3
+}
+
+; test that the load is folded.
+; CHECK: ucomisd	(%rdi), %xmm0
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index fac5915aae88..48d2673e4884 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=x86 -mattr=+sse > %t
-; RUN: grep prefetchnta %t
-; RUN: grep prefetcht0 %t
-; RUN: grep prefetcht1 %t
-; RUN: grep prefetcht2 %t
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 
 define void @t(i8* %ptr) nounwind  {
 entry:
+; CHECK: prefetcht2
+; CHECK: prefetcht1
+; CHECK: prefetcht0
+; CHECK: prefetchnta
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index 276f8bb48d06..9291200f0110 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -8,14 +8,14 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
-@a = internal constant [2 x i32] [i32 1, i32 2]
-@a1 = constant [2 x i32] [i32 1, i32 2]
-@e = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
-@e1 = constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
-@p = constant i8* bitcast ([2 x i32]* @a to i8*)
-@t = constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
-@p1 = constant i8* bitcast ([2 x i32]* @a1 to i8*)
-@t1 = constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
+@a = internal unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@a1 = unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@e = internal  unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@e1 = unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@p = unnamed_addr constant i8* bitcast ([2 x i32]* @a to i8*)
+@t = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
+@p1 = unnamed_addr constant i8* bitcast ([2 x i32]* @a1 to i8*)
+@t1 = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
 @p2 = internal global i8* bitcast([2 x i32]* @a1 to i8*)
 @t2 = internal global i8* bitcast([2 x [2 x i32]]* @e1 to i8*)
 @p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index 77f320f1056e..adc58ac34b9e 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-mmx -march=x86-64 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
 
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
diff --git a/test/CodeGen/X86/select-aggregate.ll b/test/CodeGen/X86/select-aggregate.ll
deleted file mode 100644
index 44cafe22af14..000000000000
--- a/test/CodeGen/X86/select-aggregate.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-; PR5757
-
-; CHECK: cmovneq %rdi, %rsi
-; CHECK: movl (%rsi), %eax
-
-%0 = type { i64, i32 }
-
-define i32 @foo(%0* %p, %0* %q, i1 %r) nounwind {
-  %t0 = load %0* %p
-  %t1 = load %0* %q
-  %t4 = select i1 %r, %0 %t0, %0 %t1
-  %t5 = extractvalue %0 %t4, 1
-  ret i32 %t5
-}
diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
deleted file mode 100644
index c38a02080523..000000000000
--- a/test/CodeGen/X86/select-zero-one.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
-
-@r1 = weak global i32 0
-
-define void @t1(i32 %a, double %b) {
-  %tmp114 = fcmp ugt double %b, 1.000000e-09
-  %tmp120 = icmp eq i32 %a, 0		; <i1> [#uses=1]
-  %bothcond = or i1 %tmp114, %tmp120		; <i1> [#uses=1]
-  %storemerge = select i1 %bothcond, i32 0, i32 1		; <i32> [#uses=2]
-  store i32 %storemerge, i32* @r1, align 4
-  ret void
-}
-
-@r2 = weak global i8 0
-
-define void @t2(i32 %a, double %b) {
-  %tmp114 = fcmp ugt double %b, 1.000000e-09
-  %tmp120 = icmp eq i32 %a, 0		; <i1> [#uses=1]
-  %bothcond = or i1 %tmp114, %tmp120		; <i1> [#uses=1]
-  %storemerge = select i1 %bothcond, i8 0, i8 1		; <i32> [#uses=2]
-  store i8 %storemerge, i8* @r2, align 4
-  ret void
-}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 95ed9e97cdfd..ce04e07854a4 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,63 +1,220 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium 
-; RUN: llc < %s -march=x86 -mcpu=yonah 
-; RUN: llc < %s -march=x86 -mcpu=yonah  | not grep set
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; PR5757
 
-define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
-	%X = select i1 %A, i1 %B, i1 %C		; <i1> [#uses=1]
-	ret i1 %X
+%0 = type { i64, i32 }
+
+define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
+  %t0 = load %0* %p
+  %t1 = load %0* %q
+  %t4 = select i1 %r, %0 %t0, %0 %t1
+  %t5 = extractvalue %0 %t4, 1
+  ret i32 %t5
+; CHECK: test1:
+; CHECK: cmovneq %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+}
+
+
+; PR2139
+define i32 @test2() nounwind {
+entry:
+	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
+	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
+	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
+	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
+	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
+	br i1 %tmp87, label %bb90, label %bb91
+bb90:		; preds = %bb84, %bb72
+	unreachable
+bb91:		; preds = %bb84
+	ret i32 0
+; CHECK: test2:
+; CHECK: movnew
+; CHECK: movswl
+}
+
+declare i1 @return_false()
+
+
+;; Select between two floating point constants.
+define float @test3(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
+	ret float %iftmp.0.0
+; CHECK: test3:
+; CHECK: movss	{{.*}},4), %xmm0
+}
+
+define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
+	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
+	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	ret i8 %2
+; CHECK: test4:
+; CHECK: movsbl	({{.*}},4), %eax
+}
+
+define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
+  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
+  store <2 x i16> %x, <2 x i16>* %p
+  ret void
+; CHECK: test5:
 }
 
-define i8 @byteSel(i1 %A, i8 %B, i8 %C) nounwind {
-	%X = select i1 %A, i8 %B, i8 %C		; <i8> [#uses=1]
-	ret i8 %X
+define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
+        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
+        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
+        store <4 x float> %iftmp.38.0, <4 x float>* %A
+        ret void
+; Verify that the fmul gets sunk into the one part of the diamond where it is
+; needed.
+; CHECK: test6:
+; CHECK: jne
+; CHECK: mulps
+; CHECK: ret
+; CHECK: ret
 }
 
-define i16 @shortSel(i1 %A, i16 %B, i16 %C) nounwind {
-	%X = select i1 %A, i16 %B, i16 %C		; <i16> [#uses=1]
-	ret i16 %X
+; Select with fp80's
+define x86_fp80 @test7(i32 %tmp8) nounwind {
+        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
+        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
+        ret x86_fp80 %retval
+; CHECK: test7:
+; CHECK: leaq
+; CHECK: fldt (%r{{.}}x,%r{{.}}x)
 }
 
-define i32 @intSel(i1 %A, i32 %B, i32 %C) nounwind {
-	%X = select i1 %A, i32 %B, i32 %C		; <i32> [#uses=1]
-	ret i32 %X
+; widening select v6i32 and then a sub
+define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
+	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
+	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	store <6 x i32> %val, <6 x i32>* %dst.addr
+	ret void
+        
+; CHECK: test8:
 }
 
-define i64 @longSel(i1 %A, i64 %B, i64 %C) nounwind {
-	%X = select i1 %A, i64 %B, i64 %C		; <i64> [#uses=1]
-	ret i64 %X
+
+;; Test integer select between values and constants.
+
+define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test9:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+;; Same as test9
+define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test9a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %A = sext i1 %cmp to i64
+  %cond = or i64 %y, %A
+  ret i64 %cond
+; CHECK: test9b:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
 }
 
-define double @doubleSel(i1 %A, double %B, double %C) nounwind {
-	%X = select i1 %A, double %B, double %C		; <double> [#uses=1]
-	ret double %X
+;; Select between -1 and 1.
+define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 1
+  ret i64 %cond
+; CHECK: test10:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	$1, %rax
+; CHECK: ret
 }
 
-define i8 @foldSel(i1 %A, i8 %B, i8 %C) nounwind {
-	%Cond = icmp slt i8 %B, %C		; <i1> [#uses=1]
-	%X = select i1 %Cond, i8 %B, i8 %C		; <i8> [#uses=1]
-	ret i8 %X
+
+
+define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test11:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
 }
 
-define i32 @foldSel2(i1 %A, i32 %B, i32 %C) nounwind {
-	%Cond = icmp eq i32 %B, %C		; <i1> [#uses=1]
-	%X = select i1 %Cond, i32 %B, i32 %C		; <i32> [#uses=1]
-	ret i32 %X
+define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test11a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
 }
 
-define i32 @foldSel2a(i1 %A, i32 %B, i32 %C, double %X, double %Y) nounwind {
-	%Cond = fcmp olt double %X, %Y		; <i1> [#uses=1]
-	%X.upgrd.1 = select i1 %Cond, i32 %B, i32 %C		; <i32> [#uses=1]
-	ret i32 %X.upgrd.1
+
+declare noalias i8* @_Znam(i64) noredzone
+
+define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
+entry:
+  %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
+  %B = extractvalue { i64, i1 } %A, 1
+  %C = extractvalue { i64, i1 } %A, 0
+  %D = select i1 %B, i64 -1, i64 %C
+  %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
+  ret i8* %call
+; CHECK: test12:
+; CHECK: mulq
+; CHECK: movq $-1, %rdi
+; CHECK: cmovnoq	%rax, %rdi
+; CHECK: jmp	__Znam
 }
 
-define float @foldSel3(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
-	%Cond = icmp ult i32 %X, %Y		; <i1> [#uses=1]
-	%X.upgrd.2 = select i1 %Cond, float %B, float %C		; <float> [#uses=1]
-	ret float %X.upgrd.2
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
+define i32 @test13(i32 %a, i32 %b) nounwind {
+  %c = icmp ult i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test13:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: ret
 }
 
-define float @nofoldSel4(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
-	%Cond = icmp slt i32 %X, %Y		; <i1> [#uses=1]
-	%X.upgrd.3 = select i1 %Cond, float %B, float %C		; <float> [#uses=1]
-	ret float %X.upgrd.3
+define i32 @test14(i32 %a, i32 %b) nounwind {
+  %c = icmp uge i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test14:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: notl
+; CHECK-NEXT: ret
 }
+
diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
deleted file mode 100644
index 4aca0407b36f..000000000000
--- a/test/CodeGen/X86/sext-select.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep movsw
-; PR2139
-
-declare void @abort()
-
-define i32 @main() {
-entry:
-	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
-	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
-	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
-	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
-	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
-	br i1 %tmp87, label %bb90, label %bb91
-bb90:		; preds = %bb84, %bb72
-	tail call void @abort()
-	unreachable
-bb91:		; preds = %bb84
-	ret i32 0
-}
-
-define i1 @return_false() {
-	ret i1 0
-}
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 48ca36ca9813..d9c3061ff687 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,21 +1,21 @@
 ; RUN: llc < %s -march=x86 | \
 ; RUN:   grep {s\[ah\]\[rl\]l} | count 1
 
-define i32* @test1(i32* %P, i32 %X) {
+define i32* @test1(i32* %P, i32 %X) nounwind {
         %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
         %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
         %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
         ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) {
+define i32* @test2(i32* %P, i32 %X) nounwind {
         %Y = shl i32 %X, 2              ; <i32> [#uses=1]
         %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
         %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
         ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) {
+define i32* @test3(i32* %P, i32 %X) nounwind {
         %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
         %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
         ret i32* %P2
diff --git a/test/CodeGen/X86/sibcall-3.ll b/test/CodeGen/X86/sibcall-3.ll
index f0d66cf7b696..f97abe002957 100644
--- a/test/CodeGen/X86/sibcall-3.ll
+++ b/test/CodeGen/X86/sibcall-3.ll
@@ -3,7 +3,7 @@
 
 define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
 ; CHECK: t1:
-; CHECK: call 0
+; CHECK: calll 0
   tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
new file mode 100644
index 000000000000..9d74121b4301
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; X32: foo:
+; X32: jmp _sin$stub
+
+; X64: foo:
+; X64: jmp _sin
+  %0 = tail call double @sin(double %a) nounwind readonly
+  ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; X32: bar:
+; X32: jmp _sinf$stub
+
+; X64: bar:
+; X64: jmp _sinf
+entry:
+  %0 = tail call float @sinf(float %a) nounwind readonly
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index a3c9957be34e..de2a81e80bd4 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=x86    -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
-; Darwin 8 generates stubs, which don't match
-; XFAIL: apple-darwin8
+; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
@@ -45,7 +43,7 @@ declare i32 @foo3()
 define void @t4(void (i32)* nocapture %x) nounwind ssp {
 entry:
 ; 32: t4:
-; 32: call *
+; 32: calll *
 ; FIXME: gcc can generate a tailcall for this. But it's tricky.
 
 ; 64: t4:
@@ -71,7 +69,7 @@ entry:
 define i32 @t6(i32 %x) nounwind ssp {
 entry:
 ; 32: t6:
-; 32: call {{_?}}t6
+; 32: calll {{_?}}t6
 ; 32: jmp {{_?}}bar
 
 ; 64: t6:
@@ -108,7 +106,7 @@ declare i32 @bar2(i32, i32, i32)
 define signext i16 @t8() nounwind ssp {
 entry:
 ; 32: t8:
-; 32: call {{_?}}bar3
+; 32: calll {{_?}}bar3
 
 ; 64: t8:
 ; 64: callq {{_?}}bar3
@@ -121,7 +119,7 @@ declare signext i16 @bar3()
 define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
 entry:
 ; 32: t9:
-; 32: call *
+; 32: calll *
 
 ; 64: t9:
 ; 64: callq *
@@ -133,7 +131,7 @@ entry:
 define void @t10() nounwind ssp {
 entry:
 ; 32: t10:
-; 32: call
+; 32: calll
 
 ; 64: t10:
 ; 64: callq
@@ -205,12 +203,12 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
 ; 32: t13:
 ; 32-NOT: jmp
-; 32: call
+; 32: calll
 ; 32: ret
 
 ; 64: t13:
 ; 64-NOT: jmp
-; 64: call
+; 64: callq
 ; 64: ret
 entry:
   %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
@@ -248,7 +246,7 @@ entry:
 
 define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
 ; 32: t15:
-; 32: call {{_?}}f
+; 32: calll {{_?}}f
 ; 32: ret $4
 
 ; 64: t15:
@@ -263,7 +261,7 @@ declare void @f(%struct.foo* noalias sret) nounwind
 define void @t16() nounwind ssp {
 entry:
 ; 32: t16:
-; 32: call {{_?}}bar4
+; 32: calll {{_?}}bar4
 ; 32: fstp
 
 ; 64: t16:
@@ -293,7 +291,7 @@ declare void @bar5(...)
 define void @t18() nounwind ssp {
 entry:
 ; 32: t18:
-; 32: call {{_?}}bar6
+; 32: calll {{_?}}bar6
 ; 32: fstp %st(0)
 
 ; 64: t18:
@@ -309,7 +307,7 @@ define void @t19() alignstack(32) nounwind {
 entry:
 ; CHECK: t19:
 ; CHECK: andl $-32
-; CHECK: call {{_?}}foo
+; CHECK: calll {{_?}}foo
   tail call void @foo() nounwind
   ret void
 }
@@ -323,7 +321,7 @@ declare void @foo()
 define double @t20(double %x) nounwind {
 entry:
 ; 32: t20:
-; 32: call {{_?}}foo20
+; 32: calll {{_?}}foo20
 ; 32: fldl (%esp)
 
 ; 64: t20:
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index acba5288c0d1..31f41eebc5aa 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -6,10 +6,11 @@
 ; that it's conditionally evaluated.
 
 ; CHECK: foo:
-; CHECK:      divsd
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne
+; CHECK-NEXT: je
 ; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      divsd
 
 define double @foo(double %x, double %y, i1 %c) nounwind {
   %a = fdiv double %x, 3.2
@@ -18,6 +19,24 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
   ret double %z
 }
 
+; Make sure the critical edge is broken so the divsd is sunken below
+; the conditional branch.
+; rdar://8454886
+
+; CHECK: split:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      movaps
+; CHECK-NEXT: ret
+define double @split(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %z = select i1 %c, double %a, double %y
+  ret double %z
+}
+
+
 ; Hoist floating-point constant-pool loads out of loops.
 
 ; CHECK: bar:
@@ -68,9 +87,9 @@ return:
 ; Codegen should hoist and CSE these constants.
 
 ; CHECK: vv:
-; CHECK: LCPI2_0(%rip), %xmm0
-; CHECK: LCPI2_1(%rip), %xmm1
-; CHECK: LCPI2_2(%rip), %xmm2
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
 ; CHECK: align
 ; CHECK-NOT: LCPI
 ; CHECK: ret
diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
deleted file mode 100644
index 07d4d52f97a3..000000000000
--- a/test/CodeGen/X86/split-select.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86-64 | grep test | count 1
-
-define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
-  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
-  store <2 x i16> %x, <2 x i16>* %p
-  ret void
-}
diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
index 3cc83ca0db2a..9f5d4b40d61a 100644
--- a/test/CodeGen/X86/sse-align-11.ll
+++ b/test/CodeGen/X86/sse-align-11.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
-; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movups
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
 
 define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
 entry:
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 6fc019071f8b..5c3e32f016a7 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -192,3 +192,33 @@ entry:
 ; CHECK: test15:
 ; CHECK: 	movhlps	%xmm1, %xmm0
 }
+
+; PR8900
+; CHECK: test16:
+; CHECK: unpcklpd
+; CHECK: ret
+
+define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
+  %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
+  %i6 = load <4 x double>* %i5, align 32
+  %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %i7
+}
+
+; PR9009
+define fastcc void @test17() nounwind {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 undef, i32 1
+  %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %2 = bitcast <4 x i32> %1 to <4 x float>
+  store <4 x float> %2, <4 x float> * undef
+  ret void
+}
+
+; PR9210
+define <4 x float> @f(<4 x double>) nounwind {
+entry:
+ %double2float.i = fptrunc <4 x double> %0 to <4 x float>
+ ret <4 x float> %double2float.i
+}
+
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 206cdff1ba7d..9a60091a0cf0 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -169,7 +169,7 @@ define internal void @t10() nounwind {
         ret void
 ; X64: 	t10:
 ; X64: 		pextrw	$4, %xmm0, %eax
-; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		unpcklpd %xmm1, %xmm1
 ; X64: 		pshuflw	$8, %xmm1, %xmm1
 ; X64: 		pinsrw	$2, %eax, %xmm1
 ; X64: 		pextrw	$6, %xmm0, %eax
@@ -260,3 +260,18 @@ entry:
 ; X64: 		pinsrw	$1, %eax, %xmm0
 ; X64: 		ret
 }
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64:          movddup (%rax), %xmm0
+  %tmp1 = load <4 x float>* undef, align 16
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %tmp3 = load <4 x float>* undef, align 16
+  %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+  ret <4 x i32> %tmp7
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 3a14fa26300c..2ac4cb435a75 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -200,11 +200,11 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
         ret i32 %tmp1
 ; X32: _ptestz_2:
 ; X32:    ptest 	%xmm1, %xmm0
-; X32:    setb	%al
+; X32:    sbbl	%eax
 
 ; X64: _ptestz_2:
 ; X64:    ptest 	%xmm1, %xmm0
-; X64:    setb	%al
+; X64:    sbbl	%eax
 }
 
 define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 8ca0b12b547f..793c0267124c 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i686-apple-darwin8"
 @G = external global double
 
-define void @test({ double, double }* byval  %z, double* %P) {
+define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
@@ -21,14 +21,14 @@ entry:
 	ret void
 }
 
-define void @test2() alignstack(16) {
+define void @test2() alignstack(16) nounwind {
 entry:
     ; CHECK: andl{{.*}}$-16, %esp
     ret void
 }
 
 ; Use a call to force a spill.
-define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) {
+define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
 entry:
     ; CHECK: andl{{.*}}$-32, %esp
     call void @test2()
@@ -38,3 +38,14 @@ entry:
 
 declare double @fabs(double)
 
+; The pointer is already known aligned, so and x,-16 is eliminable.
+define i32 @test4() nounwind {
+entry:
+  %buffer = alloca [2048 x i8], align 16
+  %0 = ptrtoint [2048 x i8]* %buffer to i32
+  %and = and i32 %0, -16
+  ret i32 %and
+; CHECK: test4:
+; CHECK-NOT: and
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/stdcall-notailcall.ll b/test/CodeGen/X86/stdcall-notailcall.ll
new file mode 100644
index 000000000000..8e33c30bf293
--- /dev/null
+++ b/test/CodeGen/X86/stdcall-notailcall.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=i386-apple-darwin11 -O2 < %s | FileCheck %s
+
+%struct.I = type { i32 (...)** }
+define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
+; CHECK: bar:
+; CHECK-NOT: jmp
+; CHECK: ret $4
+entry:
+  tail call void @foo()
+  ret void
+}
+
+declare void @foo()
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index abc5174c98de..0dd228eb145f 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -152,3 +152,17 @@ define void @test9() nounwind {
   store i32 %or, i32* @g_16
   ret void
 }
+
+; rdar://8494845 + PR8244
+; X64: test10:
+; X64-NEXT: movsbl	(%rdi), %eax
+; X64-NEXT: shrl	$8, %eax
+; X64-NEXT: ret
+define i8 @test10(i8* %P) nounwind ssp {
+entry:
+  %tmp = load i8* %P, align 1
+  %conv = sext i8 %tmp to i32
+  %shr3 = lshr i32 %conv, 8
+  %conv2 = trunc i32 %shr3 to i8
+  ret i8 %conv2
+}
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 46e59e95e53f..11686227ab9c 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index ed3266ec422b..9f491d452fa8 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -49,3 +49,33 @@ sw.epilog:                                        ; preds = %sw.default, %sw.bb4
 }
 
 declare void @foo(i32)
+
+; Don't zero extend the test operands to pointer type if it can be avoided.
+; rdar://8781238
+define void @test2(i32 %x) nounwind ssp {
+; CHECK: test2:
+; CHECK: cmpl $6
+; CHECK: ja
+
+; CHECK-NEXT: movl $91
+; CHECK-NOT: movl
+; CHECK-NEXT: btl
+; CHECK-NEXT: jb
+entry:
+  switch i32 %x, label %if.end [
+    i32 6, label %if.then
+    i32 4, label %if.then
+    i32 3, label %if.then
+    i32 1, label %if.then
+    i32 0, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry, %entry, %entry, %entry
+  tail call void @bar() nounwind
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
+declare void @bar()
diff --git a/test/CodeGen/X86/switch-or.ll b/test/CodeGen/X86/switch-or.ll
new file mode 100644
index 000000000000..75832c7d304c
--- /dev/null
+++ b/test/CodeGen/X86/switch-or.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s
+
+; Check that merging switch cases that differ in one bit works.
+; CHECK: orl $2
+; CHECK-NEXT: cmpl $6
+
+define void @foo(i32 %variable) nounwind {
+entry:
+  switch i32 %variable, label %if.end [
+    i32 4, label %if.then
+    i32 6, label %if.then
+  ]
+
+if.then:
+  %call = tail call i32 (...)* @bar() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+declare i32 @bar(...) nounwind
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 9662ad6cd740..9291695f4d65 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -62,11 +62,11 @@ declare i8* @choose(i8*, i8*)
 
 ; CHECK: tail_duplicate_me:
 ; CHECK:      movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
 ; CHECK:      movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
 ; CHECK:      movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
 
 define void @tail_duplicate_me() nounwind {
 entry:
@@ -153,19 +153,16 @@ bb30:
 ; an unconditional jump to complete a two-way conditional branch.
 
 ; CHECK: c_expand_expr_stmt:
-; CHECK:        jmp .LBB3_7
-; CHECK-NEXT: .LBB3_12:
+; CHECK:        jmp .LBB3_11
+; CHECK-NEXT: .LBB3_9:
 ; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   xorb %dl, %dl
 ; CHECK-NEXT:   movb 16(%rax), %al
 ; CHECK-NEXT:   cmpb $16, %al
-; CHECK-NEXT:   je .LBB3_6
+; CHECK-NEXT:   je .LBB3_11
 ; CHECK-NEXT:   cmpb $23, %al
-; CHECK-NEXT:   je .LBB3_6
-; CHECK-NEXT:   jmp .LBB3_15
-; CHECK-NEXT: .LBB3_14:
-; CHECK-NEXT:   cmpb $23, %bl
-; CHECK-NEXT:   jne .LBB3_15
-; CHECK-NEXT: .LBB3_15:
+; CHECK-NEXT:   jne .LBB3_14
+; CHECK-NEXT: .LBB3_11:
 
 %0 = type { %struct.rtx_def* }
 %struct.lang_decl = type opaque
@@ -276,7 +273,7 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
 ; CHECK: foo:
 ; CHECK:        callq func
 ; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT:   addq $8, %rsp
+; CHECK-NEXT:   popq
 ; CHECK-NEXT:   ret
 
 define void @foo(i1* %V) nounwind {
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c7070f2abd25..c3f4278aecbe 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -17,7 +17,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
 ; Adjust the stack to enter the function.  (The amount of the
 ; adjustment may change in the future, in which case the location of
 ; the stack argument and the return adjustment will change too.)
-;  CHECK: subq $8, %rsp
+;  CHECK: pushq
 ; Put the call target into R11, which won't be clobbered while restoring
 ; callee-saved registers and won't be used for passing arguments.
 ;  CHECK: movq %rdi, %rax
@@ -31,7 +31,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
 ;  CHECK: movl $5, %r8d
 ;  CHECK: movl $6, %r9d
 ; Adjust the stack to "return".
-;  CHECK: addq $8, %rsp
+;  CHECK: popq
 ; And tail-call to the target.
 ;  CHECK: jmpq *%rax  # TAILCALL
   %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
@@ -46,7 +46,7 @@ define fastcc i32 @direct_manyargs() {
 ; Adjust the stack to enter the function.  (The amount of the
 ; adjustment may change in the future, in which case the location of
 ; the stack argument and the return adjustment will change too.)
-;  CHECK: subq $8, %rsp
+;  CHECK: pushq
 ; Pass the stack argument.
 ;  CHECK: movl $7, 16(%rsp)
 ; Pass the register arguments, in the right registers.
@@ -62,7 +62,7 @@ define fastcc i32 @direct_manyargs() {
 ; arguments.
 ;  CHECK: movabsq $manyargs_callee, %rax
 ; Adjust the stack to "return".
-;  CHECK: addq $8, %rsp
+;  CHECK: popq
 ; And tail-call to the target.
 ;  CHECK: jmpq *%rax  # TAILCALL
   %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
diff --git a/test/CodeGen/X86/tailcall-ri64.ll b/test/CodeGen/X86/tailcall-ri64.ll
new file mode 100644
index 000000000000..914d8f7b8bc7
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-ri64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8743
+; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
+
+; AMD64: jmpq
+; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
+
+; WIN64: jmpq
+; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
+
+%class = type { [8 x i8] }
+%vt = type { i32 (...)** }
+
+define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
+%this, %vt* %Ty) align 2 {
+entry:
+  %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
+  %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+  %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
+  %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+  %call = tail call %vt* %1(%vt* %Ty, %class* %this)
+  ret %vt* %call
+}
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index 0233139e8082..d3f811cff248 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -19,5 +19,5 @@ define i32 @main(i32 %argc, i8** %argv) {
  ret i32 0
 }
 
-; CHECK: call tailcaller
+; CHECK: calll tailcaller
 ; CHECK-NEXT: subl $12
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 4ec127f81ac7..04c4e95710c5 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx}
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
 
 declare i32 @putchar(i32)
 
 define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK: checktail:
         %tmp1 = icmp sgt i32 %x, 0
         br i1 %tmp1, label %if-then, label %if-else
 
@@ -10,6 +11,7 @@ if-then:
         %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)* 
         %arg1    = add i32 %x, -1
         call i32 @putchar(i32 90)       
+; CHECK: jmpl *%e{{.*}}
         %res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
         ret i32 %res
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 107bdf9de3e7..0c732d56b6ca 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,16 +1,20 @@
-; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq  ${{24|88}}, %rsp
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl  32(%rsp), %eax
+; CHECK: movl  [[A1:32|144]](%rsp), %eax
 ; Move param %in1 to temp register (%r10d).
-; CHECK: movl  40(%rsp), %r10d
+; CHECK: movl  [[A2:40|152]](%rsp), %r10d
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: addl %edi, %eax
+; CHECK: addl {{%edi|%ecx}}, %eax
 ; Move param %in2 to stack.
-; CHECK: movl  %r10d, 32(%rsp)
+; CHECK: movl  %r10d, [[A1]](%rsp)
 ; Move result of addition to stack.
-; CHECK: movl  %eax, 40(%rsp)
+; CHECK: movl  %eax, [[A2]](%rsp)
 ; Eventually, do a TAILCALL
 ; CHECK: TAILCALL
 
@@ -22,4 +26,3 @@ entry:
         %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
         ret i32 %retval
 }
-
diff --git a/test/CodeGen/X86/tls-1.ll b/test/CodeGen/X86/tls-1.ll
deleted file mode 100644
index de694d8d471f..000000000000
--- a/test/CodeGen/X86/tls-1.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
-
-@a = thread_local global i32 0                    ; <i32*> [#uses=0]
-@b = thread_local global i32 0                    ; <i32*> [#uses=0]
-
-; CHECK: .tbss _a$tlv$init, 4, 2
-; CHECK:        .section        __DATA,__thread_vars,thread_local_variables
-; CHECK:        .globl  _a
-; CHECK: _a:
-; CHECK:        .quad   __tlv_bootstrap
-; CHECK:        .quad   0
-; CHECK:        .quad   _a$tlv$init
-
-; CHECK: .tbss _b$tlv$init, 4, 2
-; CHECK:        .globl  _b
-; CHECK: _b:
-; CHECK:        .quad   __tlv_bootstrap
-; CHECK:        .quad   0
-; CHECK:        .quad   _b$tlv$init
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index 4cad8376d8d9..b83416d4b32b 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -11,11 +11,11 @@ entry:
 
 ; X32: f1:
 ; X32:   leal i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f1:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 @i2 = external thread_local global i32
@@ -27,11 +27,11 @@ entry:
 
 ; X32: f2:
 ; X32:   leal i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f2:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 
@@ -43,11 +43,11 @@ entry:
 
 ; X32: f3:
 ; X32:   leal	i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f3:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 define i32* @f4() nounwind {
@@ -57,11 +57,11 @@ entry:
 
 ; X32: f4:
 ; X32:   leal	i@TLSGD(,%ebx), %eax
-; X32:   call ___tls_get_addr@PLT
+; X32:   calll ___tls_get_addr@PLT
 
 ; X64: f4:
 ; X64:   leaq i@TLSGD(%rip), %rdi
-; X64:   call __tls_get_addr@PLT
+; X64:   callq __tls_get_addr@PLT
 
 
 
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index 214146fe998c..7d08df84a9fa 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -5,7 +5,7 @@
 
 @i = external hidden thread_local global i32
 
-define i32 @f() {
+define i32 @f() nounwind {
 entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
diff --git a/test/CodeGen/X86/tlv-1.ll b/test/CodeGen/X86/tlv-1.ll
new file mode 100644
index 000000000000..42940f147ed8
--- /dev/null
+++ b/test/CodeGen/X86/tlv-1.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+%struct.A = type { [48 x i8], i32, i32, i32 }
+
+@c = external thread_local global %struct.A, align 4
+
+define void @main() nounwind ssp {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
+  unreachable  
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK-NEXT: callq   *(%rdi)
+  ; CHECK-NEXT: movl    $0, 56(%rax)
+  ; CHECK-NEXT: movq    $0, 48(%rax)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+@a = thread_local global i32 0                    ; <i32*> [#uses=0]
+@b = thread_local global i32 0                    ; <i32*> [#uses=0]
+
+; CHECK: .tbss _a$tlv$init, 4, 2
+; CHECK:        .section        __DATA,__thread_vars,thread_local_variables
+; CHECK:        .globl  _a
+; CHECK: _a:
+; CHECK:        .quad   __tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _a$tlv$init
+
+; CHECK: .tbss _b$tlv$init, 4, 2
+; CHECK:        .globl  _b
+; CHECK: _b:
+; CHECK:        .quad   __tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _b$tlv$init
diff --git a/test/CodeGen/X86/tlv-2.ll b/test/CodeGen/X86/tlv-2.ll
new file mode 100644
index 000000000000..5f29a60bef5a
--- /dev/null
+++ b/test/CodeGen/X86/tlv-2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin -O0 | FileCheck %s
+
+@b = thread_local global i32 5, align 4
+@a = thread_local global i32 0, align 4
+@c = internal thread_local global i32 0, align 4
+@d = internal thread_local global i32 5, align 4
+
+define void @foo() nounwind ssp {
+entry:
+  store i32 1, i32* @a, align 4
+  ; CHECK: movq    _a@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $1, (%rax)
+  
+  store i32 2, i32* @b, align 4
+  ; CHECK: movq    _b@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $2, (%rax)
+
+  store i32 3, i32* @c, align 4
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $3, (%rax)
+  
+  store i32 4, i32* @d, align 4
+  ; CHECK: movq    _d@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $4, (%rax)
+  ; CHECK: popq
+  
+  ret void
+}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a245ed7caa84..ec16dfe172e3 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,20 +5,32 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep {mov E.X, E.X}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
-@G = external global i32                ; <i32*> [#uses=3]
+@G = external global i32
 
-define i32 @test1(i32 %X, i32 %Y) {
-        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
-        volatile store i32 %Y, i32* @G
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1:
+; CHECK-NOT: mov
+; CHECK: leal 1(%rdi)
+        %Z = add i32 %X, 1
         volatile store i32 %Z, i32* @G
         ret i32 %X
 }
 
-define i32 @test2(i32 %X) {
-        %Z = add i32 %X, 1              ; <i32> [#uses=1]
-        volatile store i32 %Z, i32* @G
-        ret i32 %X
+; rdar://8977508
+; The second add should not be transformed to leal nor should it be
+; commutted (which would require inserting a copy).
+define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: leal
+; CHECK-NOT: leal
+; CHECK-NOT: mov
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+ %add = add i32 %b, %a
+ %add3 = add i32 %add, %c
+ %add5 = add i32 %add3, %d
+ ret i32 %add5
 }
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
new file mode 100644
index 000000000000..d9f753c7a88e
--- /dev/null
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
+; by the compiler_rt implementation of __floatundisf.
+; <rdar://problem/8493982>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: jns LBB0_2
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
+; CHECK: LBB0_2
+; CHECK-NEXT: cvtsi2ss
+define float @test(i64 %a) {
+entry:
+  %b = uitofp i64 %a to float
+  ret float %b
+}
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index d522bd80acfd..c9976617a247 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 define i1 @a(i32 %x) zeroext nounwind {
   %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
   %obil = extractvalue {i32, i1} %res, 1
   ret i1 %obil
+  
+; CHECK: a:
+; CHECK: mull
+; CHECK: seto %al
+; CHECK: movzbl	%al, %eax
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/umulo-64.ll b/test/CodeGen/X86/umulo-64.ll
new file mode 100644
index 000000000000..280bd9cb066d
--- /dev/null
+++ b/test/CodeGen/X86/umulo-64.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+%0 = type { i64, i1 }
+
+define i32 @f0(i64 %a, i64 %b) nounwind ssp {
+  %1 = alloca i64, align 4
+  %2 = alloca i64, align 4
+  store i64 %a, i64* %1, align 8
+  store i64 %b, i64* %2, align 8
+  %3 = load i64* %1, align 8
+  %4 = load i64* %2, align 8
+  %5 = call %0 @llvm.smul.with.overflow.i64(i64 %3, i64 %4)
+  %6 = extractvalue %0 %5, 0
+  %7 = extractvalue %0 %5, 1
+  br i1 %7, label %8, label %9
+
+; <label>:8                                       ; preds = %0
+  call void @llvm.trap()
+  unreachable
+
+; <label>:9                                       ; preds = %0
+  %10 = trunc i64 %6 to i32
+  ret i32 %10
+}
+
+declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index a99af0605b12..6a493c0594de 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -13,7 +13,7 @@ entry:
 bb:
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
   call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; I386: call {{_?}}memcpy
+; I386: calll {{_?}}memcpy
 
 ; CORE2: movabsq
 ; CORE2: movabsq
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index fa98b781e4ea..09431b5564ae 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -asm-verbose=false -march=x86-64 -use-unknown-locations | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-apple-darwin10 -use-unknown-locations | FileCheck %s
 
 ; The divide instruction does not have a debug location. CodeGen should
-; represent this in the debug information. This is checked by a check
-; for a label between the code for the add and the code for the divide,
-; which indicates that the add's location doesn't spill over unto the
-; divide.
+; represent this in the debug information. This is done by setting line
+; and column to 0
 
 ;      CHECK:         leal    (%rdi,%rsi), %eax
+; CHECK-NEXT:         .loc 1 0 0
 ; CHECK-NEXT: Ltmp
 ; CHECK-NEXT:         cltd
 ; CHECK-NEXT:         idivl   %r8d
+; CHECK-NEXT:         .loc 1 4 3
 ; CHECK-NEXT: Ltmp
 ; CHECK-NEXT:         addl    %ecx, %eax
 ; CHECK-NEXT:         ret
diff --git a/test/CodeGen/X86/vec-sign.ll b/test/CodeGen/X86/vec-sign.ll
new file mode 100644
index 000000000000..31b9c2eb4c77
--- /dev/null
+++ b/test/CodeGen/X86/vec-sign.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mcpu=nehalem | FileCheck %s
+
+define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %a, %0
+  %2 = and <4 x i32> %b.lobit, %sub
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
+
+define <4 x i32> @blendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %c, %0
+  %2 = and <4 x i32> %a, %b.lobit
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll
index 2f57d7b571f0..4d665f1843ef 100644
--- a/test/CodeGen/X86/vec-trunc-store.ll
+++ b/test/CodeGen/X86/vec-trunc-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -disable-mmx >/dev/null
+; RUN: llc < %s -march=x86-64
 
 define void @foo(<8 x i32>* %p) nounwind {
   %t = load <8 x i32>* %p
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index f8531646effa..95289c9685a1 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2
-; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx
 
 
 define <8 x i32> @a(<8 x i16> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 091641b3bc3b..04bb7254fb08 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll
index 8d2a3c31aedf..02b16a79f4a0 100644
--- a/test/CodeGen/X86/vec_ext_inreg.ll
+++ b/test/CodeGen/X86/vec_ext_inreg.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -march=x86-64 
-; RUN: llc < %s -march=x86-64 -disable-mmx
 
 define <8 x i32> @a(<8 x i32> %a) nounwind {
   %b = trunc <8 x i32> %a to <8 x i16>
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 291fc0454c9c..471cc1611fce 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,15 +1,16 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep psllq %t | grep 32
+; RUN: grep shll %t | grep 12
 ; RUN: grep pslldq %t | grep 12
 ; RUN: grep psrldq %t | grep 8
 ; RUN: grep psrldq %t | grep 12
+; There are no MMX operations in @t1
 
-define void  @t1(i32 %a, <1 x i64>* %P) nounwind {
+define void  @t1(i32 %a, x86_mmx* %P) nounwind {
        %tmp12 = shl i32 %a, 12
        %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
        %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
-       %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
-       store <1 x i64> %tmp23, <1 x i64>* %P
+       %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+       store x86_mmx %tmp23, x86_mmx* %P
        ret void
 }
 
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 9ede10f63d3e..268b5c4bf972 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,8 +1,15 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; MMX insertelement is not available; these are promoted to XMM.
+; (Without SSE they are split to two ints, and the code is much better.)
 
-define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind  {
+define x86_mmx @mmx_movzl(x86_mmx %x) nounwind  {
 entry:
-	%tmp3 = insertelement <2 x i32> %x, i32 32, i32 0		; <<2 x i32>> [#uses=1]
+; CHECK: mmx_movzl
+; CHECK: pinsrd
+; CHECK: pinsrd
+        %tmp = bitcast x86_mmx %x to <2 x i32> 
+	%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0		; <<2 x i32>> [#uses=1]
 	%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1		; <<2 x i32>> [#uses=1]
-	ret <2 x i32> %tmp8
+        %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
+	ret x86_mmx %tmp9
 }
diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
deleted file mode 100644
index 033e9f7027f9..000000000000
--- a/test/CodeGen/X86/vec_select.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse
-
-define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
-        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
-        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
-        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
-        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
-        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
-        store <4 x float> %iftmp.38.0, <4 x float>* %A
-        ret void
-}
-
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index 4f0acb2d151d..6dd3cb0abeb9 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movq
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep mov | count 3
 
 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
 	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index d700ccbf5303..dec98c7400a5 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
-; RUN: grep addps %t | count 2
-; RUN: grep mulps %t | count 2
-; RUN: grep subps %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
 
 ; ModuleID = 'vec_shuffle-27.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -9,9 +6,33 @@ target triple = "i686-apple-cl.1.0"
 
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
 	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
 	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
 	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
 	ret <8 x float> %add
 }
+
+; Test case for r122206
+define void @test2(<4 x i64>* %ap, <4 x i64>* %bp) nounwind {
+entry:
+; CHECK: movdqa
+  %a = load <4 x i64> * %ap
+  %b = load <4 x i64> * %bp
+  %mulaa = mul <4 x i64> %a, %a
+  %mulbb = mul <4 x i64> %b, %b
+  %mulab = mul <4 x i64> %a, %b
+  %vect1271 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %vect1272 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %vect1487 = shufflevector <4 x i64> %vect1271, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %vect1488 = shufflevector <4 x i64> %vect1272, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  store <4 x i64> %vect1487, <4 x i64>* %ap
+  store <4 x i64> %vect1488, <4 x i64>* %bp
+  ret void;
+}
\ No newline at end of file
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
index 3f69150ac533..1651c4cdace2 100644
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ b/test/CodeGen/X86/vec_shuffle-30.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
 ; RUN: grep pshufhw %t | grep -- -95 | count 1
 ; RUN: grep shufps %t | count 1
 ; RUN: not grep pslldq %t
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 1ed858de64e8..b09093089c5a 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
@@ -12,3 +13,12 @@ entry:
   ret <4 x i32> %2
 }
 
+define void @t01(double* %a0) nounwind ssp {
+entry:
+; CHECK_O0: movsd (%eax), %xmm0
+; CHECK_O0: unpcklpd  %xmm0, %xmm0
+  %tmp93 = load double* %a0, align 8
+  %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
+  store <2 x double> %vecinit94, <2 x double>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 3b15d4cc407b..8aa50945e635 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; 64-bit stores here do not use MMX.
 
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/visibility.ll b/test/CodeGen/X86/visibility.ll
new file mode 100644
index 000000000000..a8d287083a80
--- /dev/null
+++ b/test/CodeGen/X86/visibility.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+define hidden void @foo() nounwind {
+entry:
+  call void @bar()
+  ret void
+}
+
+declare hidden void @bar()
+
+;CHECK: .hidden	bar
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index ae845e0a33d1..49551562c5ae 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 36feb11603d8..9a9b419abea5 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 20d3f48a1a67..8e8a9aa04b27 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same.
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 9773cbed0ae3..8e24fda1835d 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; test vector shifts converted to proper SSE2 vector shifts when the shift
 ; amounts are the same when using a shuffle splat.
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index a543f382b513..cb254aeb5735 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; When loading the shift amount from memory, avoid generating the splat.
 
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index a247c6eb00d7..97dacfdf09e0 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-mmx |  FileCheck %s
+; RUN: llc < %s -march=x86 |  FileCheck %s
 
 
 define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index f8d06902c553..4b8016dc7132 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx |  FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
 
 ; Widen a v3i8 to v16i8 to use a vector add
 
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index fdecaa3f77ff..03b3fea01f6c 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx  | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: paddb
 ; CHECK: pand
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 1f2c25068ca4..057492377a27 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
 ; CHECK: paddw
 ; CHECK: pextrw
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index f7506ae3e3cd..5931d639f19b 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
 ; CHECK: psubw
 ; CHECK-NEXT: pmullw
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index bae5c54eea64..7f2eff09f473 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42  | FileCheck %s
 ; CHECK: movdqa
 ; CHECK: pmulld
 ; CHECK: psubd
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 538123f10c25..b983d141ddf6 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: mulps
 ; CHECK: addps
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index d4ab174ae9fb..1eace9e024e0 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s -disable-mmx | FileCheck %s
+; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
 ; CHECK: paddw
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 14e8f7562482..5c695ea00033 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: pextrd
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 02674dd1459c..87486d96611b 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: pextrd
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 5f31e560f500..8e1adf58f869 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: sarb
 ; CHECK: sarb
 ; CHECK: sarb
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index d1d7fecbd275..136578df1e8e 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: movl
 ; CHECK: movd
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 08759bf5510c..39032347c018 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
 ; CHECK: movd
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index a2029dd2748d..f6810cda9e35 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: pshufd
 ; CHECK: paddd
 
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index b24a9b36673c..969cb512beb3 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: movswl
 ; CHECK: movswl
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index 1a40800de975..a25fae9e1bc8 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: cvtsi2ss
 
 ; sign to float v2i16 to v2f32
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index e505b62a3dbf..80f3a492c494 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index 308e6b859be6..4bcac58f2b6c 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
 ; widen extract subvector
 
 define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index d397645f193f..639617f17774 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; PR4891
 
 ; This load should be before the call, not after.
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 551704c498fa..642206316c6b 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
deleted file mode 100644
index d9de892933e0..000000000000
--- a/test/CodeGen/X86/widen_select-1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
-; CHECK: jne
-
-; widening select v6i32 and then a sub
-
-define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
-entry:
-	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
-	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
-	store <6 x i32> %val, <6 x i32>* %dst.addr
-	ret void
-}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 463f522a11df..034c42c758be 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
new file mode 100644
index 000000000000..f9d4bf9c3094
--- /dev/null
+++ b/test/CodeGen/X86/win64_params.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the 5th and 6th parameters are coming from the correct location
+; on the stack.
+define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl    48(%rsp), %eax
+; CHECK: addl    40(%rsp), %eax
+  %add = add nsw i32 %p6, %p5
+  ret i32 %add
+}
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
new file mode 100644
index 000000000000..a451318f6e8c
--- /dev/null
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq   %r9, 40(%rsp)
+; CHECK: movq   %r8, 32(%rsp)
+; CHECK: movq   %rdx, 24(%rsp)
+; CHECK: leaq   24(%rsp), %rax
+
+  %ap = alloca i8*, align 8                       ; <i8**> [#uses=1]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=1]
+  call void @llvm.va_start(i8* %ap1)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
index 27d3358d4ac1..82ce81d4ae70 100644
--- a/test/CodeGen/X86/win_chkstk.ll
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
 ; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
@@ -12,10 +13,10 @@
 ; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
 define i32 @main4k() nounwind {
 entry:
-; WIN_X32:    call __chkstk
-; WIN_X64:    call __chkstk
-; MINGW_X32:  call __alloca
-; MINGW_X64:  call _alloca
+; WIN_X32:    calll __chkstk
+; WIN_X64:    callq __chkstk
+; MINGW_X32:  calll __alloca
+; MINGW_X64:  callq __chkstk
 ; LINUX-NOT:  call __chkstk
   %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
   ret i32 0
@@ -26,15 +27,15 @@ entry:
 define i32 @main128() nounwind {
 entry:
 ; WIN_X32:       # BB#0:
-; WIN_X32-NOT:   call __chkstk
+; WIN_X32-NOT:   calll __chkstk
 ; WIN_X32:       ret
 
 ; WIN_X64:       # BB#0:
-; WIN_X64-NOT:   call __chkstk
+; WIN_X64-NOT:   callq __chkstk
 ; WIN_X64:       ret
 
 ; MINGW_X64:     # BB#0:
-; MINGW_X64-NOT: call _alloca
+; MINGW_X64-NOT: callq _alloca
 ; MINGW_X64:     ret
 
 ; LINUX:         # BB#0:
diff --git a/test/CodeGen/X86/x86-64-extend-shift.ll b/test/CodeGen/X86/x86-64-extend-shift.ll
new file mode 100644
index 000000000000..6852785fd6af
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-extend-shift.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.
+
+define i64 @baz(i32 %A) nounwind {
+; CHECK:  shlq  $49, %rax
+        %tmp1 = shl i32 %A, 17
+        %tmp2 = zext i32 %tmp1 to i64
+        %tmp3 = shl i64 %tmp2, 32
+        ret i64 %tmp3
+}
diff --git a/test/CodeGen/X86/x86_64-mul-by-const.ll b/test/CodeGen/X86/x86_64-mul-by-const.ll
new file mode 100644
index 000000000000..df48a29156ca
--- /dev/null
+++ b/test/CodeGen/X86/x86_64-mul-by-const.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.  rdar://8771012.
+
+define i32 @f9188_mul365384439_shift27(i32 %A) nounwind {
+; CHECK:  imulq $365384439,
+; CHECK:  shrq  $59, %rax
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
new file mode 100644
index 000000000000..e61e8805a2fd
--- /dev/null
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define void @t() nounwind ssp {
+; CHECK: t:
+entry:
+  br i1 undef, label %return, label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %tmp7.i = load i32* undef, align 4, !tbaa !0
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %if.end.i
+; CHECK: %if.end
+; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movl [[REG]], [[REG]]
+; CHECK-NEXT: xorb
+  %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
+  %tmp867 = zext i32 %tmp138 to i64
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %if.end
+  %tmp869 = sub i64 %tmp867, 0
+  %scale2.0 = trunc i64 %tmp869 to i32
+  %cmp149 = icmp eq i32 %scale2.0, 0
+  br i1 %cmp149, label %while.end, label %land.rhs
+
+land.rhs:                                         ; preds = %while.cond
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.rhs
+  br label %while.cond
+
+while.end:                                        ; preds = %land.rhs, %while.cond
+  br i1 undef, label %cond.false205, label %cond.true190
+
+cond.true190:                                     ; preds = %while.end
+  br i1 undef, label %cond.false242, label %cond.true225
+
+cond.false205:                                    ; preds = %while.end
+  unreachable
+
+cond.true225:                                     ; preds = %cond.true190
+  br i1 undef, label %cond.false280, label %cond.true271
+
+cond.false242:                                    ; preds = %cond.true190
+  unreachable
+
+cond.true271:                                     ; preds = %cond.true225
+  unreachable
+
+cond.false280:                                    ; preds = %cond.true225
+  unreachable
+
+return:                                           ; preds = %if.end.i, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
index f24e1d1851b4..80cf3a6d678f 100644
--- a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
+++ b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
@@ -1,33 +1,28 @@
 ; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-%tart.reflect.ComplexType = type { double, double }
-
-@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
-
-define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
-  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
-  ret i32 3
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
 }
 
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll b/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
new file mode 100644
index 000000000000..f8fe0d2136ff
--- /dev/null
+++ b/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=xcore
+%struct.st = type <{ i8, i32, i8, i32, i8, i32 }>
+
+@x = external global %struct.st, align 4
+
+define i32 @test_entry() nounwind {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
+  ret i32 %0
+}
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
index d99808fc4a2f..4514fdb8bf3b 100644
--- a/test/CodeGen/XCore/ashr.ll
+++ b/test/CodeGen/XCore/ashr.ll
@@ -50,9 +50,9 @@ define i32 @f3(i32 %a) {
 	ret i32 %2
 }
 ; CHECK: f3:
-; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
 ; CHECK-NEXT: ldc r0, 10
-; CHECK-NEXT: bt r1
 ; CHECK: ldc r0, 17
 
 define i32 @f4(i32 %a) {
@@ -61,9 +61,9 @@ define i32 @f4(i32 %a) {
 	ret i32 %2
 }
 ; CHECK: f4:
-; CHECK-NEXT: ashr r1, r0, 32
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
 ; CHECK-NEXT: ldc r0, 17
-; CHECK-NEXT: bt r1
 ; CHECK: ldc r0, 10
 
 define i32 @f5(i32 %a) {
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
index 342e5932dd10..7487561dec96 100644
--- a/test/CodeGen/XCore/globals.ll
+++ b/test/CodeGen/XCore/globals.ll
@@ -67,7 +67,7 @@ entry:
 ; CHECK: .section .dp.bss,"awd",@nobits
 ; CHECK: G2:
 
-@G3 = constant i32 9401
+@G3 = unnamed_addr constant i32 9401
 ; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
 ; CHECK: G3:
 
@@ -75,7 +75,7 @@ entry:
 ; CHECK: .section .dp.data,"awd",@progbits
 ; CHECK: G4:
 
-@G5 = constant i32* @G1
+@G5 = unnamed_addr constant i32* @G1
 ; CHECK: .section .cp.rodata,"ac",@progbits
 ; CHECK: G5:
 
@@ -83,7 +83,7 @@ entry:
 ; CHECK: .section .dp.data,"awd",@progbits
 ; CHECK: G6:
 
-@G7 = constant i32* @G8
+@G7 = unnamed_addr constant i32* @G8
 ; CHECK: .section .cp.rodata,"ac",@progbits
 ; CHECK: G7:
 
diff --git a/test/CodeGen/XCore/resources.ll b/test/CodeGen/XCore/resources.ll
new file mode 100644
index 000000000000..3114bdcd1777
--- /dev/null
+++ b/test/CodeGen/XCore/resources.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+declare i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 %type)
+declare void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+
+define i8 addrspace(1)* @getr() {
+; CHECK: getr:
+; CHECK: getr r0, 5
+	%result = call i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 5)
+	ret i8 addrspace(1)* %result
+}
+
+define void @freer(i8 addrspace(1)* %r) {
+; CHECK: freer:
+; CHECK: freer res[r0]
+	call void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define i32 @in(i8 addrspace(1)* %r) {
+; CHECK: in:
+; CHECK: in r0, res[r0]
+	%result = call i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @int(i8 addrspace(1)* %r) {
+; CHECK: int:
+; CHECK: int r0, res[r0]
+	%result = call i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @inct(i8 addrspace(1)* %r) {
+; CHECK: inct:
+; CHECK: inct r0, res[r0]
+	%result = call i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define void @out(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: out:
+; CHECK: out res[r0], r1
+	call void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outt(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outt:
+; CHECK: outt res[r0], r1
+	call void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outct:
+; CHECK: outct res[r0], r1
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outcti(i8 addrspace(1)* %r) {
+; CHECK: outcti:
+; CHECK: outct res[r0], 11
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @chkct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: chkct:
+; CHECK: chkct res[r0], r1
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @chkcti(i8 addrspace(1)* %r) {
+; CHECK: chkcti:
+; CHECK: chkct res[r0], 11
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @setd(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setd:
+; CHECK: setd res[r0], r1
+	call void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setc(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setc:
+; CHECK: setc res[r0], r1
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setci(i8 addrspace(1)* %r) {
+; CHECK: setci:
+; CHECK: setc res[r0], 2
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 2)
+	ret void
+}
diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll
new file mode 100644
index 000000000000..18cc45edbf9f
--- /dev/null
+++ b/test/CodeGen/XCore/trampoline.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%struct.FRAME.f = type { i32, i32 ()* }
+
+define void @f() nounwind {
+entry:
+; CHECK: f:
+; CHECK ldap r11, g.1101
+; CHECK stw r11, sp[7]
+  %TRAMP.23 = alloca [20 x i8], align 2
+  %FRAME.0 = alloca %struct.FRAME.f, align 4
+  %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
+  %FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8*
+  %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
+  %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1
+  %1 = bitcast i8* %tramp to i32 ()*
+  store i32 ()* %1, i32 ()** %0, align 4
+  %2 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 0
+  store i32 1, i32* %2, align 4
+  call void @h(i32 ()* %1) nounwind
+  ret void
+}
+
+define internal i32 @g.1101(%struct.FRAME.f* nocapture nest %CHAIN.1) nounwind readonly {
+entry:
+; CHECK: g.1101:
+; CHECK: ldw r11, sp[0]
+; CHECK-NEXT: ldw r0, r11[0]
+; CHECK-NEXT: retsp 0
+  %0 = getelementptr inbounds %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+
+declare void @h(i32 ()*)
diff --git a/test/DebugInfo/2009-10-16-Scope.ll b/test/DebugInfo/2009-10-16-Scope.ll
index 9f9fa65d5b79..037294fc63a4 100644
--- a/test/DebugInfo/2009-10-16-Scope.ll
+++ b/test/DebugInfo/2009-10-16-Scope.ll
@@ -1,4 +1,5 @@
-; RUN: llc %s -O0 -o /dev/null
+; RUN: llc %s -O0 -o /dev/null -mtriple=x86_64-apple-darwin
+; RUN: llc %s -O0 -o /dev/null -mtriple=arm-apple-darwin
 ; PR 5197
 ; There is not any llvm instruction assocated with !5. The code generator
 ; should be able to handle this.
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index 3b24c0cead35..721b70839fef 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -1,7 +1,21 @@
-; RUN: llc -O0 -asm-verbose  %s -o %t
-; RUN: grep DW_TAG_compile_unit %t | count 3
+; RUN: llc -O0 -asm-verbose < %s | FileCheck %s
 ; One for a.c, second one for b.c and third one for abbrev.
 
+; CHECK: info_begin
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_compile_unit
+; CHECK: info_end
+
+; CHECK: info_begin
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_compile_unit
+; CHECK: info_end
+
+; CHECK: abbrev_begin
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_compile_unit
+; CHECK: abbrev_end
+
 define i32 @foo() nounwind readnone ssp {
 return:
   ret i32 42, !dbg !0
diff --git a/test/DebugInfo/2010-08-04-StackVariable.ll b/test/DebugInfo/2010-08-04-StackVariable.ll
index 61cd20bb1ab3..c35c3d36d261 100644
--- a/test/DebugInfo/2010-08-04-StackVariable.ll
+++ b/test/DebugInfo/2010-08-04-StackVariable.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 < %s | grep DW_OP_fbreg
+; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s | grep DW_OP_fbreg
 ; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot.
 
 %struct.SVal = type { i8*, i32 }
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
new file mode 100644
index 000000000000..e61f63f40d8f
--- /dev/null
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O0 %s -o /dev/null
+; PR 8235
+
+define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
+entry:
+  call void @llvm.dbg.declare(metadata !{i32* %rect}, metadata !23), !dbg !24
+  ret void
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !1, i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop", metadata !2}
+!2 = metadata !{i32 589841, i32 0, i32 16, metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop", metadata !"clang version 2.9 (trunk 115292)", i1 true, i1 false, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589846, metadata !1, metadata !"CGRect", metadata !1, i32 49, i64 0, i64 0, i64 0, i32 0, null}
+!23 = metadata !{i32 590081, metadata !0, metadata !"rect", metadata !1, i32 53, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 53, i32 33, metadata !0, null}
+
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index 455196923e84..eba58ccca423 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
index 5d37e9664162..4cfd1ebb69e2 100644
--- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
 	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll
index 653cf79a52a3..668c8fd7ad0c 100644
--- a/test/ExecutionEngine/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
diff --git a/test/ExecutionEngine/2003-01-04-PhiTest.ll b/test/ExecutionEngine/2003-01-04-PhiTest.ll
index b5c9d8132432..2bc70d749f50 100644
--- a/test/ExecutionEngine/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/2003-01-04-PhiTest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/2003-01-09-SARTest.ll b/test/ExecutionEngine/2003-01-09-SARTest.ll
index 81478972d57f..560cd3eae9a2 100644
--- a/test/ExecutionEngine/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/2003-01-09-SARTest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index d996fa53d9c5..8512f634323f 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
index a55d74df0d44..9df21c1ccd6d 100644
--- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @bar(i8* %X) {
         ; pointer should be 4 byte aligned!
diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
index 57fe95b2d915..e670d11d4a15 100644
--- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
@@ -1,6 +1,6 @@
-; This testcase shoudl return with an exit code of 1.
+; This testcase should return with an exit code of 1.
 ;
-; RUN: llvm-as < %s | not lli
+; RUN: not lli %s
 
 @test = global i64 0		; <i64*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
index fa15d7110c32..baf42e5e83d8 100644
--- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | lli - test
+; RUN: lli %s test
 
 declare i32 @puts(i8*)
 
diff --git a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
index 5a13b21b5f49..bcdb11468dca 100644
--- a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
index 6e2da70f736b..37dae861c983 100644
--- a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-06-05-PHIBug.ll b/test/ExecutionEngine/2003-06-05-PHIBug.ll
index 50b48da49445..f7bd8b77244f 100644
--- a/test/ExecutionEngine/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/2003-06-05-PHIBug.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
index 6c90b33cb431..5970628eaedf 100644
--- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; This testcase failed to work because two variable sized allocas confused the
 ; local register allocator.
diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
index 3a4a4e439fb8..9c8ec1dd9ce5 100644
--- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ;
 ; Regression Test: EnvironmentTest.ll
diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
index b165a1cf30e3..152482d436d1 100644
--- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; This testcase exposes a bug in the local register allocator where it runs out
 ; of registers (due to too many overlapping live ranges), but then attempts to
diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index aa9d7e7d3632..97e84cd4092c 100644
--- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 @A = global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
index 59a40ae81403..874ce39e662f 100644
--- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
@@ -1,5 +1,6 @@
 ; PR672
-; RUN: llvm-as < %s | lli
+; RUN: lli %s
+; XFAIL: arm
 
 define i32 @main() {
 	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
index f347f5d35724..c0dc4cf61abb 100644
--- a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as %s -o - | lli -force-interpreter
+; RUN: lli -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index e7e434f271c6..07cc659cd040 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli -force-interpreter=true %t.bc | grep 1
+; RUN: lli -force-interpreter=true %s | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/2010-01-15-UndefValue.ll b/test/ExecutionEngine/2010-01-15-UndefValue.ll
index 33ca63a02050..6e7a392125e1 100644
--- a/test/ExecutionEngine/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/2010-01-15-UndefValue.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli -force-interpreter=true %t.bc
+; RUN: lli -force-interpreter=true %s
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/fpbitcast.ll b/test/ExecutionEngine/fpbitcast.ll
index 34ca129ead6f..47cbb02db180 100644
--- a/test/ExecutionEngine/fpbitcast.ll
+++ b/test/ExecutionEngine/fpbitcast.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s > %t.bc
-; RUN: lli -force-interpreter=true %t.bc | grep 40091eb8
+; RUN: lli -force-interpreter=true %s | grep 40091eb8
 ;
 define i32 @test(double %x) {
 entry:
diff --git a/test/ExecutionEngine/hello.ll b/test/ExecutionEngine/hello.ll
index fad36ed58361..0b75c10f354b 100644
--- a/test/ExecutionEngine/hello.ll
+++ b/test/ExecutionEngine/hello.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/hello2.ll b/test/ExecutionEngine/hello2.ll
index 7ca0d8827d54..9f060be1c342 100644
--- a/test/ExecutionEngine/hello2.ll
+++ b/test/ExecutionEngine/hello2.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 @X = global i32 7		; <i32*> [#uses=0]
 @msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/simplesttest.ll b/test/ExecutionEngine/simplesttest.ll
index 5d9cf767bcb3..ad38485d6e02 100644
--- a/test/ExecutionEngine/simplesttest.ll
+++ b/test/ExecutionEngine/simplesttest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/simpletest.ll b/test/ExecutionEngine/simpletest.ll
index 53fb79c2c768..68eb7580b4c1 100644
--- a/test/ExecutionEngine/simpletest.ll
+++ b/test/ExecutionEngine/simpletest.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @bar() {
 	ret i32 0
diff --git a/test/ExecutionEngine/stubs.ll b/test/ExecutionEngine/stubs.ll
index 525d135ff45c..bd9f69adb833 100644
--- a/test/ExecutionEngine/stubs.ll
+++ b/test/ExecutionEngine/stubs.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | lli -disable-lazy-compilation=false
+; RUN: lli -disable-lazy-compilation=false %s
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/test-arith.ll b/test/ExecutionEngine/test-arith.ll
index 8c51e6b2e224..354ecd24bc83 100644
--- a/test/ExecutionEngine/test-arith.ll
+++ b/test/ExecutionEngine/test-arith.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-branch.ll b/test/ExecutionEngine/test-branch.ll
index dd8db5465f0b..7d4fd5605922 100644
--- a/test/ExecutionEngine/test-branch.ll
+++ b/test/ExecutionEngine/test-branch.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index 4464ebd39bff..c4131a20f795 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
+; XFAIL: arm
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/test-cast.ll b/test/ExecutionEngine/test-cast.ll
index 82d4949782a8..f41448cc60aa 100644
--- a/test/ExecutionEngine/test-cast.ll
+++ b/test/ExecutionEngine/test-cast.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/test-constantexpr.ll b/test/ExecutionEngine/test-constantexpr.ll
index cd5c635331d3..d6d90e3e1982 100644
--- a/test/ExecutionEngine/test-constantexpr.ll
+++ b/test/ExecutionEngine/test-constantexpr.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index 4ebcf6f7aa7c..f653660fb832 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-loadstore.ll b/test/ExecutionEngine/test-loadstore.ll
index ba0f0baf8d6d..53155e8ac90a 100644
--- a/test/ExecutionEngine/test-loadstore.ll
+++ b/test/ExecutionEngine/test-loadstore.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
 	%V = load i8* %P		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-logical.ll b/test/ExecutionEngine/test-logical.ll
index e560e52d568d..710763a30bd8 100644
--- a/test/ExecutionEngine/test-logical.ll
+++ b/test/ExecutionEngine/test-logical.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/test-loop.ll b/test/ExecutionEngine/test-loop.ll
index 7cd69e2943bc..f0e6f7a6f9c1 100644
--- a/test/ExecutionEngine/test-loop.ll
+++ b/test/ExecutionEngine/test-loop.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/test-malloc.ll b/test/ExecutionEngine/test-malloc.ll
index 8f79d974edb5..b3400df44092 100644
--- a/test/ExecutionEngine/test-malloc.ll
+++ b/test/ExecutionEngine/test-malloc.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	%X = malloc i32		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/test-phi.ll b/test/ExecutionEngine/test-phi.ll
index f1aaefa50585..c5848a8b5cce 100644
--- a/test/ExecutionEngine/test-phi.ll
+++ b/test/ExecutionEngine/test-phi.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/test-ret.ll b/test/ExecutionEngine/test-ret.ll
index eae91f553752..beec39960751 100644
--- a/test/ExecutionEngine/test-ret.ll
+++ b/test/ExecutionEngine/test-ret.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index 4264e2c593fc..d1d6d05b3352 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-setcond-int.ll b/test/ExecutionEngine/test-setcond-int.ll
index 772f4fa70a4b..f59d325a5b59 100644
--- a/test/ExecutionEngine/test-setcond-int.ll
+++ b/test/ExecutionEngine/test-setcond-int.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/test-shift.ll b/test/ExecutionEngine/test-shift.ll
index 2791b8534a58..d0fb90a42750 100644
--- a/test/ExecutionEngine/test-shift.ll
+++ b/test/ExecutionEngine/test-shift.ll
@@ -1,5 +1,4 @@
-; RUN: llvm-as %s -o %t.bc
-; RUN: lli %t.bc > /dev/null
+; RUN: lli %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/Feature/load_module.ll b/test/Feature/load_module.ll
index e2e222f4edcf..05f6c238134f 100644
--- a/test/Feature/load_module.ll
+++ b/test/Feature/load_module.ll
@@ -1,6 +1,8 @@
 ; PR1318
-; RUN: opt < %s -load=%llvmlibsdir/LLVMHello%shlibext -hello \
+; RUN: opt < %s -load=%llvmshlibdir/LLVMHello%shlibext -hello \
 ; RUN:   -disable-output |& grep Hello
+; REQUIRES: loadable_module
+; FIXME: On Cygming, it might fail without building LLVMHello manually.
 
 @junk = global i32 0
 
diff --git a/test/FrontendAda/Support/real_cst.ads b/test/FrontendAda/Support/real_cst.ads
new file mode 100644
index 000000000000..54a34bc4056f
--- /dev/null
+++ b/test/FrontendAda/Support/real_cst.ads
@@ -0,0 +1,4 @@
+with Ada.Streams;
+package Real_Cst is
+   procedure Write (Stream : access Ada.Streams.Root_Stream_Type'Class);
+end;
diff --git a/test/FrontendAda/array_constructor.adb b/test/FrontendAda/array_constructor.adb
index de64b45a1e3e..13517c7565a5 100644
--- a/test/FrontendAda/array_constructor.adb
+++ b/test/FrontendAda/array_constructor.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure Array_Constructor is
    A : array (Integer range <>) of Boolean := (True, False);
 begin
diff --git a/test/FrontendAda/array_range_ref.adb b/test/FrontendAda/array_range_ref.adb
index ae9bdc6cfcdf..037c5aa8b6f0 100644
--- a/test/FrontendAda/array_range_ref.adb
+++ b/test/FrontendAda/array_range_ref.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure Array_Range_Ref is
    A : String (1 .. 3);
    B : String := A (A'RANGE)(1 .. 3);
diff --git a/test/FrontendAda/array_ref.adb b/test/FrontendAda/array_ref.adb
index 9577e21ad70f..2bf4b7988d6d 100644
--- a/test/FrontendAda/array_ref.adb
+++ b/test/FrontendAda/array_ref.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure Array_Ref is
    type A is array (Natural range <>, Natural range <>) of Boolean;
    type A_Access is access A;
diff --git a/test/FrontendAda/array_size.adb b/test/FrontendAda/array_size.adb
index 2f07d0639082..c73616c98079 100644
--- a/test/FrontendAda/array_size.adb
+++ b/test/FrontendAda/array_size.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure Array_Size is
    subtype S is String (1 .. 2);
    type R is record
diff --git a/test/FrontendAda/asm.adb b/test/FrontendAda/asm.adb
index 575617c08788..844885e67152 100644
--- a/test/FrontendAda/asm.adb
+++ b/test/FrontendAda/asm.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 with System.Machine_Code;
 procedure Asm is
 begin
diff --git a/test/FrontendAda/debug_var_size.ads b/test/FrontendAda/debug_var_size.ads
index ea966fbae0fe..e8863cbddf94 100644
--- a/test/FrontendAda/debug_var_size.ads
+++ b/test/FrontendAda/debug_var_size.ads
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c -g %s
+-- RUN: %llvmgcc -S -g %s
 package Debug_Var_Size is
    subtype Length_Type is Positive range 1 .. 64;
    type T (Length : Length_Type := 1) is record
diff --git a/test/FrontendAda/element_copy.adb b/test/FrontendAda/element_copy.adb
index bffcb973265b..29274fa744ae 100644
--- a/test/FrontendAda/element_copy.adb
+++ b/test/FrontendAda/element_copy.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -S -O2 %s -I%p/Support -o - | grep 6899714
+-- RUN: %llvmgcc -S -O2 %s -I%p/Support -o - | grep 105 | count 2
 package body Element_Copy is
    function F return VariableSizedField is
       X : VariableSizedField;
diff --git a/test/FrontendAda/emit_var.ads b/test/FrontendAda/emit_var.ads
index 35d45448cb6d..47e2538691ac 100644
--- a/test/FrontendAda/emit_var.ads
+++ b/test/FrontendAda/emit_var.ads
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 with Ada.Finalization;
 package Emit_Var is
    type Search_Type is new Ada.Finalization.Controlled with null record;
diff --git a/test/FrontendAda/fat_fields.adb b/test/FrontendAda/fat_fields.adb
index 510105f6840f..443a9b679c55 100644
--- a/test/FrontendAda/fat_fields.adb
+++ b/test/FrontendAda/fat_fields.adb
@@ -1,5 +1,5 @@
--- RUN: %llvmgcc -c %s -I%p/Support
--- RUN: %llvmgcc -c %s -I%p/Support -O2
+-- RUN: %llvmgcc -S %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support -O2
 package body Fat_Fields is
    procedure Proc is
    begin
diff --git a/test/FrontendAda/field_order.ads b/test/FrontendAda/field_order.ads
index b49185d6dd80..9b2b37c20db0 100644
--- a/test/FrontendAda/field_order.ads
+++ b/test/FrontendAda/field_order.ads
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 package Field_Order is
    type Tagged_Type is abstract tagged null record;
    type With_Discriminant (L : Positive) is new Tagged_Type with record
diff --git a/test/FrontendAda/global_constant.adb b/test/FrontendAda/global_constant.adb
index ce9f406dba58..330f97b5d0e0 100644
--- a/test/FrontendAda/global_constant.adb
+++ b/test/FrontendAda/global_constant.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support
 package body Global_Constant is
 begin
    raise An_Error;
diff --git a/test/FrontendAda/init_size.ads b/test/FrontendAda/init_size.ads
index 1d76ba24f341..f423682bec72 100644
--- a/test/FrontendAda/init_size.ads
+++ b/test/FrontendAda/init_size.ads
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 package Init_Size is
    type T (B : Boolean := False) is record
       case B is
diff --git a/test/FrontendAda/negative_field_offset.adb b/test/FrontendAda/negative_field_offset.adb
index f8b85108504d..ec8184dde47b 100644
--- a/test/FrontendAda/negative_field_offset.adb
+++ b/test/FrontendAda/negative_field_offset.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 with System;
 procedure Negative_Field_Offset (N : Integer) is
    type String_Pointer is access String;
diff --git a/test/FrontendAda/non_bitfield.ads b/test/FrontendAda/non_bitfield.ads
index 8f5845adb431..8a49d46f6b42 100644
--- a/test/FrontendAda/non_bitfield.ads
+++ b/test/FrontendAda/non_bitfield.ads
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 package Non_Bitfield is
    type SP is access String;
    type E is (A, B, C);
diff --git a/test/FrontendAda/non_lvalue.adb b/test/FrontendAda/non_lvalue.adb
index 157f3ddd287f..71e7e102d05b 100644
--- a/test/FrontendAda/non_lvalue.adb
+++ b/test/FrontendAda/non_lvalue.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support
 package body Non_LValue is
    function A (Y : U) return String is
    begin
diff --git a/test/FrontendAda/placeholder.adb b/test/FrontendAda/placeholder.adb
index f33c9a5ab89d..88908263f871 100644
--- a/test/FrontendAda/placeholder.adb
+++ b/test/FrontendAda/placeholder.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure Placeholder is
    subtype Bounded is Integer range 1 .. 5;
    type Vector is array (Bounded range <>) of Integer;
diff --git a/test/FrontendAda/real_cst.adb b/test/FrontendAda/real_cst.adb
new file mode 100644
index 000000000000..c9708301d62f
--- /dev/null
+++ b/test/FrontendAda/real_cst.adb
@@ -0,0 +1,8 @@
+-- RUN: %llvmgcc -S -O2 -gnatn %s
+package body Real_Cst is
+   Cst : constant Float := 0.0;
+   procedure Write (Stream : access Ada.Streams.Root_Stream_Type'Class) is
+   begin
+      Float'Write (Stream, Cst);
+   end;
+end;
diff --git a/test/FrontendAda/switch.adb b/test/FrontendAda/switch.adb
index f214bca2ec42..0c83a2e6420e 100644
--- a/test/FrontendAda/switch.adb
+++ b/test/FrontendAda/switch.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 function Switch (N : Integer) return Integer is
 begin
    case N is
diff --git a/test/FrontendAda/unc_constructor.adb b/test/FrontendAda/unc_constructor.adb
index bc3002c6a95b..ee10de6c811d 100644
--- a/test/FrontendAda/unc_constructor.adb
+++ b/test/FrontendAda/unc_constructor.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support
 package body Unc_Constructor is
    procedure P (X : A) is
    begin
diff --git a/test/FrontendAda/var_offset.adb b/test/FrontendAda/var_offset.adb
index 09f1c155a061..1d3ca98fa49d 100644
--- a/test/FrontendAda/var_offset.adb
+++ b/test/FrontendAda/var_offset.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support
 package body Var_Offset is
    function F (X : T) return Character is
    begin
diff --git a/test/FrontendAda/var_size.adb b/test/FrontendAda/var_size.adb
index b3db9a36fe1b..291f91d4eebd 100644
--- a/test/FrontendAda/var_size.adb
+++ b/test/FrontendAda/var_size.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support
 package body Var_Size is
    function A (X : T) return String is
    begin
diff --git a/test/FrontendAda/vce.adb b/test/FrontendAda/vce.adb
index f24045cbee3f..85cdca066a29 100644
--- a/test/FrontendAda/vce.adb
+++ b/test/FrontendAda/vce.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure VCE is
   S : String (1 .. 2);
   B : Character := 'B';
diff --git a/test/FrontendAda/vce_lv.adb b/test/FrontendAda/vce_lv.adb
index 4ca4d5c8b542..d1b9e0824f17 100644
--- a/test/FrontendAda/vce_lv.adb
+++ b/test/FrontendAda/vce_lv.adb
@@ -1,4 +1,4 @@
--- RUN: %llvmgcc -c %s
+-- RUN: %llvmgcc -S %s
 procedure VCE_LV is
    type P is access String ;
    type T is new P (5 .. 7);
diff --git a/test/FrontendC++/2003-08-20-ExceptionFail.cpp b/test/FrontendC++/2003-08-20-ExceptionFail.cpp
deleted file mode 100644
index f071c3c0e80c..000000000000
--- a/test/FrontendC++/2003-08-20-ExceptionFail.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-void foo();
-
-void bar() {
-  struct local {
-    ~local() { foo(); }
-  } local_obj;
-
-  foo();
-}
-
diff --git a/test/FrontendC++/2003-08-21-EmptyClass.cpp b/test/FrontendC++/2003-08-21-EmptyClass.cpp
deleted file mode 100644
index 5dbfa33e03c3..000000000000
--- a/test/FrontendC++/2003-08-21-EmptyClass.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-// This tests compilation of EMPTY_CLASS_EXPR's
-
-struct empty {};
-
-void foo(empty) {}
-
-void bar() { foo(empty()); }
diff --git a/test/FrontendC++/2003-08-24-Cleanup.cpp b/test/FrontendC++/2003-08-24-Cleanup.cpp
deleted file mode 100644
index ab0d1a0d11cd..000000000000
--- a/test/FrontendC++/2003-08-24-Cleanup.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | grep unwind
-
-struct S { ~S(); };
-
-int mightthrow();
-
-int test() {
-  S s;
-  mightthrow();
-}
diff --git a/test/FrontendC++/2003-08-27-TypeNamespaces.cpp b/test/FrontendC++/2003-08-27-TypeNamespaces.cpp
deleted file mode 100644
index dec97180a420..000000000000
--- a/test/FrontendC++/2003-08-27-TypeNamespaces.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-
-namespace foo {
-  namespace bar {
-    struct X { X(); };
-
-    X::X() {}
-  }
-}
-
-
-namespace {
-  struct Y { Y(); };
-  Y::Y() {}
-}
diff --git a/test/FrontendC++/2003-08-28-ForwardType.cpp b/test/FrontendC++/2003-08-28-ForwardType.cpp
deleted file mode 100644
index 9330e94aec57..000000000000
--- a/test/FrontendC++/2003-08-28-ForwardType.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-// Default placement versions of operator new.
-#include <new>
-
-void* operator new(size_t, void* __p) throw();
-
-
-template<typename _CharT>
-struct stdio_filebuf
-{  stdio_filebuf();
-
-};
-
-extern stdio_filebuf<char> buf_cout;
-
-void foo() {
-  // Create stream buffers for the standard streams and use
-  // those buffers without destroying and recreating the
-  // streams.
-  new (&buf_cout) stdio_filebuf<char>();
-
-}
diff --git a/test/FrontendC++/2003-08-28-SaveExprBug.cpp b/test/FrontendC++/2003-08-28-SaveExprBug.cpp
deleted file mode 100644
index 98c5f5d8d659..000000000000
--- a/test/FrontendC++/2003-08-28-SaveExprBug.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-
-char* eback();
-
-template<typename foo>
-struct basic_filebuf {
-  char *instancevar;
-
-  void callee() {
-    instancevar += eback() != eback();
-  }
-
-  void caller();
-};
-
-
-template<typename _CharT>
-void basic_filebuf<_CharT>::caller() {
-  callee();
-}
-
-
-template class basic_filebuf<char>;
diff --git a/test/FrontendC++/2003-08-29-ArgPassingBug.cpp b/test/FrontendC++/2003-08-29-ArgPassingBug.cpp
deleted file mode 100644
index d4cddff3147c..000000000000
--- a/test/FrontendC++/2003-08-29-ArgPassingBug.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-
-// RUN: %llvmgcc -xc++ -c -o /dev/null %s |& not grep WARNING
-
-struct iterator {
-  iterator();
-  iterator(const iterator &I);
-};
-
-iterator foo(const iterator &I) { return I; }
-
-void test() {
-  foo(iterator());
-}
diff --git a/test/FrontendC++/2003-08-31-StructLayout.cpp b/test/FrontendC++/2003-08-31-StructLayout.cpp
deleted file mode 100644
index a45ad030e3cc..000000000000
--- a/test/FrontendC++/2003-08-31-StructLayout.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-// There is a HOLE in the derived2 object due to not wanting to place the two
-// baseclass instances at the same offset!
-
-struct baseclass {};
-
-class derived1 : public baseclass {
-  void * NodePtr;
-};
-
-class derived2 : public baseclass {
-  derived1 current;
-};
-
-derived2 RI;
diff --git a/test/FrontendC++/2003-09-22-CompositeExprValue.cpp b/test/FrontendC++/2003-09-22-CompositeExprValue.cpp
deleted file mode 100644
index 3bd707ed8657..000000000000
--- a/test/FrontendC++/2003-09-22-CompositeExprValue.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-struct duration {
- duration operator/=(int c) {
-  return *this;
-  }
-};
-
-void a000090() {
-  duration() /= 1;
-}
diff --git a/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp b/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp
deleted file mode 100644
index 72997c524b85..000000000000
--- a/test/FrontendC++/2003-09-29-ArgumentNumberMismatch.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-// Non-POD classes cannot be passed into a function by component, because their
-// dtors must be run.  Instead, pass them in by reference.  The C++ front-end
-// was mistakenly "thinking" that 'foo' took a structure by component.
-
-struct C {
-  int A, B;
-  ~C() {}
-};
-
-void foo(C b);
-
-void test(C *P) {
-  foo(*P);
-}
-
diff --git a/test/FrontendC++/2003-09-30-CommaExprBug.cpp b/test/FrontendC++/2003-09-30-CommaExprBug.cpp
deleted file mode 100644
index 365795dafde4..000000000000
--- a/test/FrontendC++/2003-09-30-CommaExprBug.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-class Empty {};
-
-void foo(Empty E);
-
-void bar() {
-  foo(Empty());
-}
-
diff --git a/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp b/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp
deleted file mode 100644
index 63f62f28dbf1..000000000000
--- a/test/FrontendC++/2003-09-30-ForIncrementExprBug.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-struct C {};
-
-C &foo();
-
-void foox() {
-  for (; ; foo());
-}
-
diff --git a/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp b/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp
deleted file mode 100644
index a1eee71f52c5..000000000000
--- a/test/FrontendC++/2003-09-30-ForIncrementExprBug2.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-// Test with an opaque type
-
-struct C;
-
-C &foo();
-
-void foox() {
-  for (; ; foo());
-}
-
diff --git a/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp b/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp
deleted file mode 100644
index 94c11998963b..000000000000
--- a/test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-// The C++ front-end thinks the two foo's are different, the LLVM emitter
-// thinks they are the same.  The disconnect causes problems.
-
-void foo() { }
-
-void bar() {
-  void foo();
-
-  foo();
-}
diff --git a/test/FrontendC++/2003-10-17-BoolBitfields.cpp b/test/FrontendC++/2003-10-17-BoolBitfields.cpp
deleted file mode 100644
index 103945df8aeb..000000000000
--- a/test/FrontendC++/2003-10-17-BoolBitfields.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-struct test {
-  bool A : 1;
-  bool B : 1;
-};
-
-void foo(test *T) {
-  T->B = true;
-}
-
diff --git a/test/FrontendC++/2003-10-21-InnerClass.cpp b/test/FrontendC++/2003-10-21-InnerClass.cpp
deleted file mode 100644
index fadd51d22670..000000000000
--- a/test/FrontendC++/2003-10-21-InnerClass.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %llvmgcc -xc++ -S -o - %s | grep {struct.X::Y}
-struct X {
-
-  struct Y {
-    Y();
-  };
-
-};
-
-X::Y::Y() {
-
-}
diff --git a/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp b/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp
deleted file mode 100644
index abda017ab96b..000000000000
--- a/test/FrontendC++/2003-10-27-VirtualBaseClassCrash.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-
-template<class T>
-struct super {
-  int Y;
-  void foo();
-};
-
-template <class T>
-struct test : virtual super<int> {};
-
-extern test<int> X;
-
-void foo() {
-  X.foo();
-}
diff --git a/test/FrontendC++/2003-11-04-ArrayConstructors.cpp b/test/FrontendC++/2003-11-04-ArrayConstructors.cpp
deleted file mode 100644
index 4ab33988ebb8..000000000000
--- a/test/FrontendC++/2003-11-04-ArrayConstructors.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-
-struct Foo {
-  Foo(int);
-  ~Foo();
-};
-void foo() {
-  struct {
-    Foo name;
-  } Int[] =  { 1 };
-}
diff --git a/test/FrontendC++/2003-11-04-CatchLabelName.cpp b/test/FrontendC++/2003-11-04-CatchLabelName.cpp
deleted file mode 100644
index 7dbe788f4e17..000000000000
--- a/test/FrontendC++/2003-11-04-CatchLabelName.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-#include <string>
-
-void bar();
-
-void test() {
-  try {
-    bar();
-  } catch (std::string) {}
-}
diff --git a/test/FrontendC++/2003-11-08-ArrayAddress.cpp b/test/FrontendC++/2003-11-08-ArrayAddress.cpp
deleted file mode 100644
index 9ad1b8f82f72..000000000000
--- a/test/FrontendC++/2003-11-08-ArrayAddress.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | grep getelementptr
-
-struct foo {
-  int array[100];
-  void *getAddr(unsigned i);
-};
-
-void *foo::getAddr(unsigned i) {
-  return &array[i];
-}
diff --git a/test/FrontendC++/2003-11-18-EnumArray.cpp b/test/FrontendC++/2003-11-18-EnumArray.cpp
deleted file mode 100644
index bb1b3bf301e5..000000000000
--- a/test/FrontendC++/2003-11-18-EnumArray.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
-
-enum TchkType {
-  tchkNum, tchkString, tchkSCN, tchkNone
-};
-
-struct Operator {
-  enum TchkType tchk[8];
-};
-
-struct Operator opTab[] = {
-  {{tchkNum, tchkNum, tchkString} }
-};
-
diff --git a/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp b/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp
index b019e0c0ef3d..a600e841e7a3 100644
--- a/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp
+++ b/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc++ -c -o - %s | llvm-dis | grep _ZN11AccessFlags6strlenEv
+// RUN: %llvmgcc -xc++ -S -o - %s | grep _ZN11AccessFlags6strlenEv
 
 struct AccessFlags {
   void strlen();
diff --git a/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp b/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp
index 706d541bee67..66b970cb6fd8 100644
--- a/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp
+++ b/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | grep callDefaultCtor | \
+// RUN: %llvmgxx -xc++ %s -S -o - | grep callDefaultCtor | \
 // RUN:   not grep declare
 
 // This is a testcase for LLVM PR445, which was a problem where the 
diff --git a/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp b/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp
index 7711cff6d360..ca600d6433df 100644
--- a/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp
+++ b/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -xc++ %s -c -o - | opt -die | llvm-dis | not grep cast
+// RUN: %llvmgxx -xc++ %s -S -o - | opt -die -S | not grep cast
 
 void foo(int*);
 
diff --git a/test/FrontendC++/2006-09-27-Debug-Protection.cpp b/test/FrontendC++/2006-09-27-Debug-Protection.cpp
index cb09bd0a974b..2a70a0f5b457 100644
--- a/test/FrontendC++/2006-09-27-Debug-Protection.cpp
+++ b/test/FrontendC++/2006-09-27-Debug-Protection.cpp
@@ -1,5 +1,5 @@
-// RUN: %llvmgxx -O0 -emit-llvm -S -g -o - %s | grep {i32 1,}
-// RUN: %llvmgxx -O0 -emit-llvm -S -g -o - %s | grep {i32 2,}
+// RUN: %llvmgxx -O0 -S -g -o - %s | grep {i32 1,}
+// RUN: %llvmgxx -O0 -S -g -o - %s | grep {i32 2,}
 class A {
 public:
   int x;
diff --git a/test/FrontendC++/2006-10-30-ClassBitfield.cpp b/test/FrontendC++/2006-10-30-ClassBitfield.cpp
index bd3b173cf8fd..b3b43fb30ce6 100644
--- a/test/FrontendC++/2006-10-30-ClassBitfield.cpp
+++ b/test/FrontendC++/2006-10-30-ClassBitfield.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -emit-llvm -S -o -
+// RUN: %llvmgxx %s -S -o -
 // PR954
 
 struct _Refcount_Base   {
diff --git a/test/FrontendC++/2006-11-20-GlobalSymbols.cpp b/test/FrontendC++/2006-11-20-GlobalSymbols.cpp
index fc896b330a4d..c4afd32b66d2 100644
--- a/test/FrontendC++/2006-11-20-GlobalSymbols.cpp
+++ b/test/FrontendC++/2006-11-20-GlobalSymbols.cpp
@@ -1,7 +1,7 @@
 // PR1013
 // Check to make sure debug symbols use the correct name for globals and
 // functions.  Will not assemble if it fails to.
-// RUN: %llvmgcc -O0 -g -c %s
+// RUN: %llvmgcc_only -O0 -g -c %s
 
 int foo __asm__("f\001oo");
 
diff --git a/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp b/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp
index 365c8e80fbf5..d351b9413af3 100644
--- a/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp
+++ b/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -emit-llvm -S -o -
+// RUN: %llvmgxx %s -S -o -
 // PR1027
 
 struct sys_var {
diff --git a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
deleted file mode 100644
index 242a37e41336..000000000000
--- a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-// This is a regression test on debug info to make sure we don't hit a compile 
-// unit size issue with gdb.
-// RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN:   llc --disable-fp-elim -o NoCompileUnit.s
-// RUN: %compile_c NoCompileUnit.s -o NoCompileUnit.o
-// RUN: %link NoCompileUnit.o -o NoCompileUnit.exe
-// RUN: echo {break main\nrun\np NoCompileUnit::pubname} > %t2
-// RUN: gdb -q -batch -n -x %t2 NoCompileUnit.exe | \
-// RUN:   tee NoCompileUnit.out | not grep {"low == high"}
-// XFAIL: alpha,arm
-// XFAIL: *
-// See PR2454
-
-
-class MamaDebugTest {
-private:
-  int N;
-  
-protected:
-  MamaDebugTest(int n) : N(n) {}
-  
-  int getN() const { return N; }
-
-};
-
-class BabyDebugTest : public MamaDebugTest {
-private:
-
-public:
-  BabyDebugTest(int n) : MamaDebugTest(n) {}
-  
-  static int doh;
-  
-  int  doit() {
-    int N = getN();
-    int Table[N];
-    
-    int sum = 0;
-    
-    for (int i = 0; i < N; ++i) {
-      int j = i;
-      Table[i] = j;
-    }
-    for (int i = 0; i < N; ++i) {
-      int j = Table[i];
-      sum += j;
-    }
-    
-    return sum;
-  }
-
-};
-
-int BabyDebugTest::doh;
-
-
-int main(int argc, const char *argv[]) {
-  BabyDebugTest BDT(20);
-  return BDT.doit();
-}
diff --git a/test/FrontendC++/2007-01-02-UnboundedArray.cpp b/test/FrontendC++/2007-01-02-UnboundedArray.cpp
index 648d19be62b8..310308694b78 100644
--- a/test/FrontendC++/2007-01-02-UnboundedArray.cpp
+++ b/test/FrontendC++/2007-01-02-UnboundedArray.cpp
@@ -1,6 +1,6 @@
 // Make sure unbounded arrays compile with debug information.
 // 
-// RUN: %llvmgcc -O0 -c -g %s
+// RUN: %llvmgcc -O0 -S -g %s
 
 // PR1068
 
diff --git a/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp b/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp
index 654e11be1ffc..5206640be10a 100644
--- a/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp
+++ b/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -emit-llvm -S -o - | not grep gnu.linkonce.
+// RUN: %llvmgxx %s -S -o - | not grep gnu.linkonce.
 // PR1085
 
 class 
diff --git a/test/FrontendC++/2007-01-06-PtrMethodInit.cpp b/test/FrontendC++/2007-01-06-PtrMethodInit.cpp
index f87c8d888fce..beb79457b559 100644
--- a/test/FrontendC++/2007-01-06-PtrMethodInit.cpp
+++ b/test/FrontendC++/2007-01-06-PtrMethodInit.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -emit-llvm -S -o -
+// RUN: %llvmgxx %s -S -o -
 // PR1084
 
 extern "C"
diff --git a/test/FrontendC++/2007-03-27-FunctionVarRename.cpp b/test/FrontendC++/2007-03-27-FunctionVarRename.cpp
index 538d6df1813c..6ff1284ddb40 100644
--- a/test/FrontendC++/2007-03-27-FunctionVarRename.cpp
+++ b/test/FrontendC++/2007-03-27-FunctionVarRename.cpp
@@ -1,5 +1,5 @@
-// RUN: %llvmgxx %s -emit-llvm -S -o - | not grep eprintf1
-// RUN: %llvmgxx %s -emit-llvm -S -o - | grep eprintf
+// RUN: %llvmgxx %s -S -o - | not grep eprintf1
+// RUN: %llvmgxx %s -S -o - | grep eprintf
 
 // Only one eprintf should exist in the output
 
diff --git a/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp b/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp
index eabcd5732708..4c2aad397c89 100644
--- a/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp
+++ b/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp
@@ -1,16 +1,16 @@
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xglobWeak | grep linkonce | count 1
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xextWeak | grep linkonce | count 1
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xWeaknoinline | grep weak | count 1
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xWeakextnoinline | grep weak | count 1
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xglobnoWeak | grep linkonce | count 1
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
 // RUN:   grep xextnoWeak | grep linkonce | count 1
 inline int xglobWeak(int) __attribute__((weak));
 inline int xglobWeak (int i) {
diff --git a/test/FrontendC++/2007-05-03-VectorInit.cpp b/test/FrontendC++/2007-05-03-VectorInit.cpp
index b87f4d4665ca..af56d3a08a84 100644
--- a/test/FrontendC++/2007-05-03-VectorInit.cpp
+++ b/test/FrontendC++/2007-05-03-VectorInit.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -S -emit-llvm -O0 -o - 
+// RUN: %llvmgxx %s -S -O0 -o - 
 // PR1378
 
 typedef float v4sf __attribute__((vector_size(16)));
diff --git a/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp b/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp
index 8392c0b94a52..42342fc9486c 100644
--- a/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp
+++ b/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -emit-llvm -S -o -
+// RUN: %llvmgxx %s -S -o -
 
 #pragma reverse_bitfields on
 typedef unsigned long UINT32;
diff --git a/test/FrontendC++/2007-05-23-TryFinally.cpp b/test/FrontendC++/2007-05-23-TryFinally.cpp
index 38f0b021aba0..c7971820ec75 100644
--- a/test/FrontendC++/2007-05-23-TryFinally.cpp
+++ b/test/FrontendC++/2007-05-23-TryFinally.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -S -emit-llvm -O2 -o - | ignore grep _Unwind_Resume | \
+// RUN: %llvmgxx %s -S -O2 -o - | ignore grep _Unwind_Resume | \
 // RUN:   wc -l | grep {\[23\]}
 
 struct One { };
diff --git a/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp b/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp
index d54dfbe57121..2e85abdf25ea 100644
--- a/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp
+++ b/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o - | llvm-dis | grep noalias
+// RUN: %llvmgxx -S %s -o - | grep noalias
 
 void foo(int * __restrict myptr1, int * myptr2) {
   myptr1[0] = 0;
diff --git a/test/FrontendC++/2007-07-29-RestrictRefArg.cpp b/test/FrontendC++/2007-07-29-RestrictRefArg.cpp
index 0c28e4d27a2b..128ddb3ab592 100644
--- a/test/FrontendC++/2007-07-29-RestrictRefArg.cpp
+++ b/test/FrontendC++/2007-07-29-RestrictRefArg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o - | llvm-dis | grep noalias
+// RUN: %llvmgxx -S %s -o - | grep noalias
 
 void foo(int & __restrict myptr1, int & myptr2) {
   myptr1 = 0;
diff --git a/test/FrontendC++/2007-08-01-RestrictMethod.cpp b/test/FrontendC++/2007-08-01-RestrictMethod.cpp
index b4922beab1b2..feefaa1759c5 100644
--- a/test/FrontendC++/2007-08-01-RestrictMethod.cpp
+++ b/test/FrontendC++/2007-08-01-RestrictMethod.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o - | llvm-dis | grep noalias
+// RUN: %llvmgxx -S %s -o - | grep noalias
 
 
 class foo {
diff --git a/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp b/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp
index f81394409d4a..1fcf15f0d9d8 100644
--- a/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp
+++ b/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o -
+// RUN: %llvmgxx -S %s -o -
 // PR1634
 
 namespace Manta
diff --git a/test/FrontendC++/2007-10-01-StructResize.cpp b/test/FrontendC++/2007-10-01-StructResize.cpp
index d37057a901a4..71109eb7b6a5 100644
--- a/test/FrontendC++/2007-10-01-StructResize.cpp
+++ b/test/FrontendC++/2007-10-01-StructResize.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c %s -o /dev/null
+// RUN: %llvmgxx -S %s -o /dev/null
 
 #pragma pack(4)
 
diff --git a/test/FrontendC++/2008-10-29-WrongOffset.cpp b/test/FrontendC++/2008-10-29-WrongOffset.cpp
index 1b3be2132b18..c261c3123767 100644
--- a/test/FrontendC++/2008-10-29-WrongOffset.cpp
+++ b/test/FrontendC++/2008-10-29-WrongOffset.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -c -o /dev/null
+// RUN: %llvmgxx %s -S -o /dev/null
 // PR2917
 
 #include <complex>
diff --git a/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp b/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp
index 8f1b598aa2bf..eb69963e5343 100644
--- a/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp
+++ b/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -g --emit-llvm %s -o - | grep "\~A"
+// RUN: %llvmgcc -S -g %s -o - | grep "\~A"
 class A {
   int i;
 public:
diff --git a/test/FrontendC++/2009-03-17-dbg.cpp b/test/FrontendC++/2009-03-17-dbg.cpp
index 93da61873bfc..6708e12f9b93 100644
--- a/test/FrontendC++/2009-03-17-dbg.cpp
+++ b/test/FrontendC++/2009-03-17-dbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o /dev/null -g
+// RUN: %llvmgxx -S %s -o /dev/null -g
 // XTARGET: darwin,linux
 // XFAIL: *
 template <typename T1,typename T2>
diff --git a/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp b/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
index 997c3f703508..e3616da073bf 100644
--- a/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
+++ b/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c -g %s -o - | llc -O0 -o %t.s
+// RUN: %llvmgcc -S -g %s -o - | llc -O0 -o %t.s
 // RUN: %compile_c %t.s -o %t.o
 // PR4025
 
diff --git a/test/FrontendC++/2009-04-23-bool2.cpp b/test/FrontendC++/2009-04-23-bool2.cpp
index 8614a37d36c4..2c76d982ea6e 100644
--- a/test/FrontendC++/2009-04-23-bool2.cpp
+++ b/test/FrontendC++/2009-04-23-bool2.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o /dev/null
+// RUN: %llvmgxx -S %s -o /dev/null
 // g++.old-deja/g++.jason/bool2.C from gcc testsuite.
 // Crashed before 67975 went in.
 struct F {
diff --git a/test/FrontendC++/2009-05-04-PureConstNounwind.cpp b/test/FrontendC++/2009-05-04-PureConstNounwind.cpp
index a4b4653e122e..e275c340a950 100644
--- a/test/FrontendC++/2009-05-04-PureConstNounwind.cpp
+++ b/test/FrontendC++/2009-05-04-PureConstNounwind.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S -emit-llvm %s -o - | grep nounwind | count 4
+// RUN: %llvmgxx -S %s -o - | grep nounwind | count 4
 int c(void) __attribute__((const));
 int p(void) __attribute__((pure));
 int t(void);
diff --git a/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp b/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
index b3758d2782ca..c2a841b1a674 100644
--- a/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
+++ b/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o /dev/null -g
+// RUN: %llvmgxx -S %s -o /dev/null -g
 // This crashes if we try to emit debug info for TEMPLATE_DECL members.
 template <class T> class K2PtrVectorBase {};
 template <class T> class K2Vector {};
diff --git a/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp b/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
index dcb2f16a5dc8..e0bc043adad9 100644
--- a/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
+++ b/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
@@ -1,10 +1,10 @@
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep baz | grep global | grep {struct.bar}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep ccc | grep global | grep {struct.CC}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep quux | grep global | grep {struct.bar}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep foo | grep global | grep {struct.SRCFilter::FilterEntry}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.bar} | grep {1 x i32}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.CC} | grep {struct.payre<KBFP,float*} | grep {.base.32} | grep {1 x i32}
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.SRCFilter::FilterEntry} | not grep {1 x i32}
+// RUN: %llvmgxx -S -m32 %s -o - | grep baz | grep global | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 %s -o - | grep ccc | grep global | grep {struct.CC}
+// RUN: %llvmgxx -S -m32 %s -o - | grep quux | grep global | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 %s -o - | grep foo | grep global | grep {struct.SRCFilter::FilterEntry}
+// RUN: %llvmgxx -S -m32 %s -o - | grep {struct.bar} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 %s -o - | grep {struct.CC} | grep {struct.payre<KBFP,float*} | grep {.base.32} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 %s -o - | grep {struct.SRCFilter::FilterEntry} | not grep {1 x i32}
 // XFAIL: *
 // XTARGET: powerpc-apple-darwin
 
diff --git a/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp b/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
index bc862e70bde1..89a79f244639 100644
--- a/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
+++ b/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -c -emit-llvm %s -o -
+// RUN: %llvmgxx -S %s -o -
 // rdar://7114564
 struct A {
   unsigned long long : (sizeof(unsigned long long) * 8) - 16;
diff --git a/test/FrontendC++/2009-08-11-VectorRetTy.cpp b/test/FrontendC++/2009-08-11-VectorRetTy.cpp
index b2c3ba185b5c..403b59d8f99e 100644
--- a/test/FrontendC++/2009-08-11-VectorRetTy.cpp
+++ b/test/FrontendC++/2009-08-11-VectorRetTy.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -c -o /dev/null
+// RUN: %llvmgxx %s -S -o /dev/null
 // <rdar://problem/7096460>
 typedef void (*Func) ();
 typedef long long m64 __attribute__((__vector_size__(8), __may_alias__));
diff --git a/test/FrontendC++/2009-09-04-modify-crash.cpp b/test/FrontendC++/2009-09-04-modify-crash.cpp
index ac16f8c36d18..89274e09c7ed 100644
--- a/test/FrontendC++/2009-09-04-modify-crash.cpp
+++ b/test/FrontendC++/2009-09-04-modify-crash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -emit-llvm -fapple-kext -S -o -
+// RUN: %llvmgxx %s -fapple-kext -S -o -
 // The extra check in 71555 caused this to crash on Darwin X86
 // in an assert build.
 class foo {
diff --git a/test/FrontendC++/2009-09-09-packed-layout.cpp b/test/FrontendC++/2009-09-09-packed-layout.cpp
index a569f9f78767..921aad79f73c 100644
--- a/test/FrontendC++/2009-09-09-packed-layout.cpp
+++ b/test/FrontendC++/2009-09-09-packed-layout.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S -m32 -emit-llvm %s -o /dev/null
+// RUN: %llvmgxx -S -m32 %s -o /dev/null
 class X { 
  public:
   virtual ~X();
diff --git a/test/FrontendC++/2009-10-27-crash.cpp b/test/FrontendC++/2009-10-27-crash.cpp
index 21d0064c687b..da73988b6976 100644
--- a/test/FrontendC++/2009-10-27-crash.cpp
+++ b/test/FrontendC++/2009-10-27-crash.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -emit-llvm -S %s -o /dev/null
+// RUN: %llvmgxx -S %s -o /dev/null
 // Radar 7328944
 
 typedef struct
diff --git a/test/FrontendC++/2010-03-22-empty-baseclass.cpp b/test/FrontendC++/2010-03-22-empty-baseclass.cpp
index b6bdea40c3f2..bb741c42c842 100644
--- a/test/FrontendC++/2010-03-22-empty-baseclass.cpp
+++ b/test/FrontendC++/2010-03-22-empty-baseclass.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S -emit-llvm %s -o - -O2 | FileCheck %s
+// RUN: %llvmgxx -S %s -o - -O2 | FileCheck %s
 namespace boost {
   namespace detail {
     template <typename T> struct cv_traits_imp {};
diff --git a/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp b/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
index 7052dc0dccd5..761c0dc097a4 100644
--- a/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
+++ b/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
@@ -7,12 +7,12 @@ public:
 };
 
 int foo::bar(int x) {
-  // CHECK: {{i1 false, i1 true(, i[0-9]+ [^\}]+[}]|[}]) ; \[ DW_TAG_subprogram \]}}
+  // CHECK: {{i32 [0-9]+, i1 true(, i[0-9]+ [^\}]+[}]|[}]) ; \[ DW_TAG_subprogram \]}}
     return x*4 + 1;
 }
 
 int foo::baz(int x) {
-  // CHECK: {{i1 false, i1 true(, i[0-9]+ [^\},]+[}]|[}]) ; \[ DW_TAG_subprogram \]}}
+  // CHECK: {{i32 [0-9]+, i1 true(, i[0-9]+ [^\},]+[}]|[}]) ; \[ DW_TAG_subprogram \]}}
     return x*4 + 1;
 }
 
diff --git a/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp b/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
index 203b542b784b..9203dbd0bd9f 100644
--- a/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
+++ b/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
@@ -1,5 +1,5 @@
-// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | not grep ZN12basic_stringIcEC1Ev
-// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | grep ZN12basic_stringIcED1Ev | count 2
+// RUN: %llvmgxx -xc++ %s -S -o - | not grep ZN12basic_stringIcEC1Ev
+// RUN: %llvmgxx -xc++ %s -S -o - | grep ZN12basic_stringIcED1Ev | count 2
 
 template<class charT> 
 class basic_string
diff --git a/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp b/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
index 66acfbe4b326..c2d6abe97fc5 100644
--- a/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
+++ b/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
@@ -1,4 +1,4 @@
-//RUN: %llvmgxx -O0 -emit-llvm -S -g -o - %s | grep DW_TAG_auto_variable
+//RUN: %llvmgxx -O0 -S -g -o - %s | grep DW_TAG_auto_variable
 class Foo
 {
  public:
diff --git a/test/FrontendC++/2010-06-22-BitfieldInit.cpp b/test/FrontendC++/2010-06-22-BitfieldInit.cpp
index 1cfe1f9f6fd5..8dceb78bfc67 100644
--- a/test/FrontendC++/2010-06-22-BitfieldInit.cpp
+++ b/test/FrontendC++/2010-06-22-BitfieldInit.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -g -c %s
+// RUN: %llvmgxx -g -S %s
 struct TEST2
 {
   int subid:32;
diff --git a/test/FrontendC++/2010-06-22-ZeroBitfield.cpp b/test/FrontendC++/2010-06-22-ZeroBitfield.cpp
index c979f8d9b918..9c4f2629f748 100644
--- a/test/FrontendC++/2010-06-22-ZeroBitfield.cpp
+++ b/test/FrontendC++/2010-06-22-ZeroBitfield.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -g -c %s
+// RUN: %llvmgxx -g -S %s
 struct s8_0 { unsigned : 0; };
 struct s8_1 { double x; };
 struct s8 { s8_0 a; s8_1 b; };
diff --git a/test/FrontendC++/2010-07-19-nowarn.cpp b/test/FrontendC++/2010-07-19-nowarn.cpp
index 8742bf152329..a61a84ff28b0 100644
--- a/test/FrontendC++/2010-07-19-nowarn.cpp
+++ b/test/FrontendC++/2010-07-19-nowarn.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -m32 -fasm-blocks -o /dev/null
+// RUN: %llvmgcc %s -S -m32 -fasm-blocks -o /dev/null
 // This should not warn about unreferenced label. 8195660.
 // XFAIL: *
 // XTARGET: x86,i386,i686
diff --git a/test/FrontendC++/2010-07-23-DeclLoc.cpp b/test/FrontendC++/2010-07-23-DeclLoc.cpp
index c72de3b33623..9bf432beb727 100644
--- a/test/FrontendC++/2010-07-23-DeclLoc.cpp
+++ b/test/FrontendC++/2010-07-23-DeclLoc.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -emit-llvm -S -g %s -o - | FileCheck %s
+// RUN: %llvmgxx -S -g %s -o - | FileCheck %s
 // Require the template function declaration refer to the correct filename.
 // First, locate the function decl in metadata, and pluck out the file handle:
 // CHECK: {{extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*[^ ]+", metadata !}}[[filehandle:[0-9]+]],
diff --git a/test/FrontendC++/member-alignment.cpp b/test/FrontendC++/member-alignment.cpp
index 6afc0aaede1c..c5b20b279325 100644
--- a/test/FrontendC++/member-alignment.cpp
+++ b/test/FrontendC++/member-alignment.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %llvmgxx -S %s -o - | FileCheck %s
 // XFAIL: arm,powerpc
 
 // rdar://7268289
diff --git a/test/FrontendC++/ptr-to-method-devirt.cpp b/test/FrontendC++/ptr-to-method-devirt.cpp
index 358b801af556..a5ca5c76559a 100644
--- a/test/FrontendC++/ptr-to-method-devirt.cpp
+++ b/test/FrontendC++/ptr-to-method-devirt.cpp
@@ -1,6 +1,6 @@
 // PR1602
-// RUN: %llvmgxx -c -emit-llvm %s -o - -O3 | llvm-dis | not grep ptrtoint
-// RUN: %llvmgxx -c -emit-llvm %s -o - -O3 | llvm-dis | grep getelementptr | count 1
+// RUN: %llvmgxx -S %s -o - -O3 | not grep ptrtoint
+// RUN: %llvmgxx -S %s -o - -O3 | grep getelementptr | count 1
 
 
 struct S { virtual void f(); };
diff --git a/test/FrontendC++/varargs.cpp b/test/FrontendC++/varargs.cpp
index 1c07aedd093d..c4de76acc30b 100644
--- a/test/FrontendC++/varargs.cpp
+++ b/test/FrontendC++/varargs.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %llvmgxx -S %s -o - | FileCheck %s
 // rdar://7309675
 // PR4678
 
diff --git a/test/FrontendC++/weak-external.cpp b/test/FrontendC++/weak-external.cpp
index 94360c2e97ee..f4f0ba19ef37 100644
--- a/test/FrontendC++/weak-external.cpp
+++ b/test/FrontendC++/weak-external.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx %s -S -emit-llvm -O2 -o - | not grep {_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag}
+// RUN: %llvmgxx %s -S -O2 -o - | not grep {_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag}
 // PR4262
 
 // The "basic_string" extern template instantiation declaration is supposed to
diff --git a/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp b/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp
index ec69afc21541..f81854e0cb92 100644
--- a/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp
+++ b/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp
@@ -1,4 +1,4 @@
-// RUN: %llvmgxx -S -emit-llvm %s -o - | grep byval | count 2
+// RUN: %llvmgxx -S %s -o - | grep byval | count 2
 // XTARGET: x86
 // PR4242
 // (PR 4242 bug is on 64-bit only, test passes on x86-32 as well)
diff --git a/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c b/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c
index 39412e5f84ee..12b4f7b93329 100644
--- a/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c
+++ b/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c
@@ -1,4 +1,4 @@
-/* RUN: %llvmgcc -xc %s -c -o - | llvm-dis | not grep __builtin_
+/* RUN: %llvmgcc -xc %s -S -o - | not grep __builtin_
  *
  * __builtin_longjmp/setjmp should get transformed into llvm.setjmp/longjmp 
  * just like explicit setjmp/longjmp calls are.
diff --git a/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c b/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c
index c275fee5d09a..9ae633ee0812 100644
--- a/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c
+++ b/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o %t.o
+// RUN: %llvmgcc -xc %s -S -o %t.o
 
 int test(_Bool pos, _Bool color) {
   return 0;
diff --git a/test/FrontendC/2003-11-03-AddrArrayElement.c b/test/FrontendC/2003-11-03-AddrArrayElement.c
index ed3fc1a6cb42..4337da7d1e43 100644
--- a/test/FrontendC/2003-11-03-AddrArrayElement.c
+++ b/test/FrontendC/2003-11-03-AddrArrayElement.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep getelementptr
+// RUN: %llvmgcc -xc %s -S -o - | grep getelementptr
 
 // This should be turned into a tasty getelementptr instruction, not a nasty
 // series of casts and address arithmetic.
diff --git a/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c b/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c
index 443dfbdb37fc..58f9f82e1543 100644
--- a/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c
+++ b/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep getelementptr
+// RUN: %llvmgcc -xc %s -S -o - | grep getelementptr
 
 char *test(char* C) {
   return C-1;   // Should turn into a GEP
diff --git a/test/FrontendC/2003-11-13-TypeSafety.c b/test/FrontendC/2003-11-13-TypeSafety.c
index 128b767e22da..9b76bb11c984 100644
--- a/test/FrontendC/2003-11-13-TypeSafety.c
+++ b/test/FrontendC/2003-11-13-TypeSafety.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep getelementptr
+// RUN: %llvmgcc -xc %s -S -o - | grep getelementptr
 
 int *test(int *X, int Y) {
   return X + Y;
diff --git a/test/FrontendC/2003-12-14-ExternInlineSupport.c b/test/FrontendC/2003-12-14-ExternInlineSupport.c
index fb92ec773c1a..a45eb98dca2a 100644
--- a/test/FrontendC/2003-12-14-ExternInlineSupport.c
+++ b/test/FrontendC/2003-12-14-ExternInlineSupport.c
@@ -1,3 +1,3 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | not grep dead_function
+// RUN: %llvmgcc -xc %s -S -o - | not grep dead_function
 
 extern __inline__ void dead_function() {}
diff --git a/test/FrontendC/2004-02-12-LargeAggregateCopy.c b/test/FrontendC/2004-02-12-LargeAggregateCopy.c
index b3c9bcf38108..93b7fe44bf60 100644
--- a/test/FrontendC/2004-02-12-LargeAggregateCopy.c
+++ b/test/FrontendC/2004-02-12-LargeAggregateCopy.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep llvm.memcpy
+// RUN: %llvmgcc -xc %s -S -o - | grep llvm.memcpy
 
 struct X { int V[10000]; };
 struct X Global1, Global2;
diff --git a/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c b/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c
index 162d32a658c0..f115b5a5f013 100644
--- a/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c
+++ b/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep llvm.*address | count 4
+// RUN: %llvmgcc -xc %s -S -o - | grep llvm.*address | count 4
 
 void *test1() {
   return __builtin_return_address(1);
diff --git a/test/FrontendC/2004-02-13-IllegalVararg.c b/test/FrontendC/2004-02-13-IllegalVararg.c
index 21039c6b0fa9..0d003c8033ca 100644
--- a/test/FrontendC/2004-02-13-IllegalVararg.c
+++ b/test/FrontendC/2004-02-13-IllegalVararg.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -w -c -o - | llc
+// RUN: %llvmgcc -xc %s -w -S -o - | llc
 // XFAIL: *
 // See PR2452
 
diff --git a/test/FrontendC/2004-02-13-Memset.c b/test/FrontendC/2004-02-13-Memset.c
index fc26051f6d85..fb6ed2352ea3 100644
--- a/test/FrontendC/2004-02-13-Memset.c
+++ b/test/FrontendC/2004-02-13-Memset.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep llvm.memset | count 3
+// RUN: %llvmgcc -xc %s -S -o - | grep llvm.memset | count 3
 
 void *memset(void*, int, long);
 void bzero(void*, long);
diff --git a/test/FrontendC/2004-02-20-Builtins.c b/test/FrontendC/2004-02-20-Builtins.c
index 0c9ac7cae8af..c056a8405f73 100644
--- a/test/FrontendC/2004-02-20-Builtins.c
+++ b/test/FrontendC/2004-02-20-Builtins.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -O3 -xc %s -c -o - | llvm-dis | not grep builtin
+// RUN: %llvmgcc -O3 -xc %s -S -o - | not grep builtin
 
 #include <math.h>
 
diff --git a/test/FrontendC/2004-03-07-ExternalConstant.c b/test/FrontendC/2004-03-07-ExternalConstant.c
index b8e13a35548a..4a9094bdf344 100644
--- a/test/FrontendC/2004-03-07-ExternalConstant.c
+++ b/test/FrontendC/2004-03-07-ExternalConstant.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep constant
+// RUN: %llvmgcc -xc %s -S -o - | grep constant
 
 extern const int a[];   // 'a' should be marked constant even though it's external!
 int foo () {
diff --git a/test/FrontendC/2004-06-17-UnorderedCompares.c b/test/FrontendC/2004-06-17-UnorderedCompares.c
index f91ed6687ce4..286e7bc7cf70 100644
--- a/test/FrontendC/2004-06-17-UnorderedCompares.c
+++ b/test/FrontendC/2004-06-17-UnorderedCompares.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc -std=c99 %s -c -o - | llvm-dis | grep -v llvm.isunordered | not grep call
+// RUN: %llvmgcc -xc -std=c99 %s -S -o - | grep -v llvm.isunordered | not grep call
 
 #include <math.h>
 
diff --git a/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c b/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c
index b1e14212732e..994ac8f8436f 100644
--- a/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c
+++ b/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc -c -emit-llvm %s -o - | \
-// RUN:   opt -std-compile-opts | llvm-dis | not grep {declare i32.*func}
+// RUN: %llvmgcc -S %s -o - | \
+// RUN:   opt -std-compile-opts -S | not grep {declare i32.*func}
 
 // There should not be an unresolved reference to func here.  Believe it or not,
 // the "expected result" is a function named 'func' which is internal and 
diff --git a/test/FrontendC/2005-01-02-PointerDifference.c b/test/FrontendC/2005-01-02-PointerDifference.c
index a351da2ed8c8..2c108e5f6cac 100644
--- a/test/FrontendC/2005-01-02-PointerDifference.c
+++ b/test/FrontendC/2005-01-02-PointerDifference.c
@@ -1,3 +1,3 @@
-// RUN: %llvmgcc -xc %s -c -o - | llvm-dis | grep -v div
+// RUN: %llvmgcc -xc %s -S -o - | grep -v div
 
 int Diff(int *P, int *Q) { return P-Q; }
diff --git a/test/FrontendC/2005-02-27-MarkGlobalConstant.c b/test/FrontendC/2005-02-27-MarkGlobalConstant.c
index b9fbbb6369a5..6806c94c10b3 100644
--- a/test/FrontendC/2005-02-27-MarkGlobalConstant.c
+++ b/test/FrontendC/2005-02-27-MarkGlobalConstant.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc %s -S -o - | grep {private constant }
+// RUN: %llvmgcc -xc %s -S -o - | grep {private unnamed_addr constant }
 
 // The synthetic global made by the CFE for big initializer should be marked
 // constant.
diff --git a/test/FrontendC/2005-12-04-AttributeUsed.c b/test/FrontendC/2005-12-04-AttributeUsed.c
index 33e27e89f4c8..f47e977f4861 100644
--- a/test/FrontendC/2005-12-04-AttributeUsed.c
+++ b/test/FrontendC/2005-12-04-AttributeUsed.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llvm-as | llvm-dis | \
+// RUN: %llvmgcc %s -S -o - | llvm-as | llvm-dis | \
 // RUN:   grep llvm.used | grep foo | grep X
 
 int X __attribute__((used));
diff --git a/test/FrontendC/2006-03-03-MissingInitializer.c b/test/FrontendC/2006-03-03-MissingInitializer.c
index 19d4bc7fe7a8..5e027b1894ac 100644
--- a/test/FrontendC/2006-03-03-MissingInitializer.c
+++ b/test/FrontendC/2006-03-03-MissingInitializer.c
@@ -1,5 +1,5 @@
 // RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
-// RUN:    llvm-dis | grep {@nate.*internal global i32 0}
+// RUN:    llvm-dis | grep {@nate.*internal unnamed_addr global i32 0}
 
 struct X { int *XX; int Y;};
 
diff --git a/test/FrontendC/2007-01-06-KNR-Proto.c b/test/FrontendC/2007-01-06-KNR-Proto.c
index eb2f25482d9d..6aa74d4cb25f 100644
--- a/test/FrontendC/2007-01-06-KNR-Proto.c
+++ b/test/FrontendC/2007-01-06-KNR-Proto.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -o - -emit-llvm %s
+// RUN: %llvmgcc -S -o - %s
 // PR1083
 
 int svc_register (void (*dispatch) (int));
diff --git a/test/FrontendC/2007-02-04-AddrLValue-2.c b/test/FrontendC/2007-02-04-AddrLValue-2.c
index 90251e6debdf..fa20faff3e15 100644
--- a/test/FrontendC/2007-02-04-AddrLValue-2.c
+++ b/test/FrontendC/2007-02-04-AddrLValue-2.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -O3 -S -o - -emit-llvm
+// RUN: %llvmgcc %s -O3 -S -o -
 // PR1173
 
 struct S { char s; };
diff --git a/test/FrontendC/2007-02-04-AddrLValue.c b/test/FrontendC/2007-02-04-AddrLValue.c
index c8b65a946f5e..214fce7747ce 100644
--- a/test/FrontendC/2007-02-04-AddrLValue.c
+++ b/test/FrontendC/2007-02-04-AddrLValue.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -O3 -S -o - -emit-llvm
+// RUN: %llvmgcc %s -O3 -S -o -
 // PR1176
 
 typedef struct
diff --git a/test/FrontendC/2007-02-04-EmptyStruct.c b/test/FrontendC/2007-02-04-EmptyStruct.c
index 48ad31f7c3fd..5ad2c705cce8 100644
--- a/test/FrontendC/2007-02-04-EmptyStruct.c
+++ b/test/FrontendC/2007-02-04-EmptyStruct.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -O3 -S -o - -emit-llvm
+// RUN: %llvmgcc %s -O3 -S -o -
 // PR1175
 
 struct empty { };
diff --git a/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c b/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c
index f02a44b15733..d5a9fbb0ecc2 100644
--- a/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c
+++ b/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -O3 -S -o - -emit-llvm
+// RUN: %llvmgcc %s -O3 -S -o -
 // PR1174
 
 void zzz (char *s1, char *s2, int len, int *q)
diff --git a/test/FrontendC/2007-02-05-nested.c b/test/FrontendC/2007-02-05-nested.c
index be23f175c8c8..bd6d30695ba0 100644
--- a/test/FrontendC/2007-02-05-nested.c
+++ b/test/FrontendC/2007-02-05-nested.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -fnested-functions -O0 -o - -emit-llvm %s 
+// RUN: %llvmgcc -S -fnested-functions -O0 -o - %s 
 // PR915
 
 extern void abort(void);
diff --git a/test/FrontendC/2007-02-07-AddrLabel.c b/test/FrontendC/2007-02-07-AddrLabel.c
index 144f62d0992b..03ed4c987e44 100644
--- a/test/FrontendC/2007-02-07-AddrLabel.c
+++ b/test/FrontendC/2007-02-07-AddrLabel.c
@@ -1,5 +1,5 @@
 // PR947
-// RUN: %llvmgcc %s -c -o - 
+// RUN: %llvmgcc %s -S -o - 
 
 void foo() {
     void *ptr;
diff --git a/test/FrontendC/2007-02-16-VoidPtrDiff.c b/test/FrontendC/2007-02-16-VoidPtrDiff.c
index 713b9b28b075..15df28cae3fe 100644
--- a/test/FrontendC/2007-02-16-VoidPtrDiff.c
+++ b/test/FrontendC/2007-02-16-VoidPtrDiff.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -o - -emit-llvm
+// RUN: %llvmgcc %s -S -o -
 
 void foo(void *ptr, int test) {
   (ptr - ((void *) test + 0x2000));
diff --git a/test/FrontendC/2007-02-16-WritableStrings.c b/test/FrontendC/2007-02-16-WritableStrings.c
index 0f281ce7c5e3..8fa7f15dc6c3 100644
--- a/test/FrontendC/2007-02-16-WritableStrings.c
+++ b/test/FrontendC/2007-02-16-WritableStrings.c
@@ -1,7 +1,7 @@
 // Test the -fwritable-strings option.
 
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm -fwritable-strings %s | \
-// RUN:    grep {internal global}
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep {private constant}
+// RUN: %llvmgcc -O3 -S -o - -fwritable-strings %s | \
+// RUN:    grep {internal unnamed_addr global}
+// RUN: %llvmgcc -O3 -S -o - %s | grep {private unnamed_addr constant}
 
 char *X = "foo";
diff --git a/test/FrontendC/2007-02-25-C-DotDotDot.c b/test/FrontendC/2007-02-25-C-DotDotDot.c
index 969602200c19..3f96fd1f9e1f 100644
--- a/test/FrontendC/2007-02-25-C-DotDotDot.c
+++ b/test/FrontendC/2007-02-25-C-DotDotDot.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -O0 -S -o - -emit-llvm -fno-inline -fno-unit-at-a-time %s | \
+// RUN: %llvmgcc -O0 -S -o - -fno-inline -fno-unit-at-a-time %s | \
 // RUN:   grep {call float @foo}
 
 // Make sure the call to foo is compiled as:
diff --git a/test/FrontendC/2007-03-01-VarSizeArrayIdx.c b/test/FrontendC/2007-03-01-VarSizeArrayIdx.c
index a3d480cafe66..6ebe79672f58 100644
--- a/test/FrontendC/2007-03-01-VarSizeArrayIdx.c
+++ b/test/FrontendC/2007-03-01-VarSizeArrayIdx.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -O3 -S -o - -emit-llvm | grep mul
+// RUN: %llvmgcc %s -O3 -S -o - | grep mul
 // PR1233
 
 float foo(int w, float A[][w], int g, int h) {
diff --git a/test/FrontendC/2007-04-11-InlineAsmStruct.c b/test/FrontendC/2007-04-11-InlineAsmStruct.c
index 49741c68ee6e..6c6c1509903d 100644
--- a/test/FrontendC/2007-04-11-InlineAsmStruct.c
+++ b/test/FrontendC/2007-04-11-InlineAsmStruct.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llc
+// RUN: %llvmgcc %s -S -o - | llc
 
 struct V { short X, Y; };
 int bar() {
diff --git a/test/FrontendC/2007-04-11-InlineAsmUnion.c b/test/FrontendC/2007-04-11-InlineAsmUnion.c
index 83fe7db771f5..014470102d32 100644
--- a/test/FrontendC/2007-04-11-InlineAsmUnion.c
+++ b/test/FrontendC/2007-04-11-InlineAsmUnion.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llc
+// RUN: %llvmgcc %s -S -o - | llc
 
 union U { int x; float p; };
 void foo() {
diff --git a/test/FrontendC/2007-04-11-InlineStorageClassC89.c b/test/FrontendC/2007-04-11-InlineStorageClassC89.c
index ec2b1ec70bec..834fb07a2623 100644
--- a/test/FrontendC/2007-04-11-InlineStorageClassC89.c
+++ b/test/FrontendC/2007-04-11-InlineStorageClassC89.c
@@ -1,17 +1,17 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | grep xglobWeak | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | grep xglobWeak | \
 // RUN:   grep weak | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | grep xextWeak | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | grep xextWeak | \
 // RUN:   grep weak | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
 // RUN:   grep xWeaknoinline | grep weak | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
 // RUN:   grep xWeakextnoinline | grep weak | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
 // RUN:   grep xglobnoWeak | grep -v internal | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
 // RUN:   grep xextnoWeak | grep available_externally | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
 inline int xglobWeak(int) __attribute__((weak));
diff --git a/test/FrontendC/2007-04-11-InlineStorageClassC99.c b/test/FrontendC/2007-04-11-InlineStorageClassC99.c
index 89af2789b3e5..6031071e3464 100644
--- a/test/FrontendC/2007-04-11-InlineStorageClassC99.c
+++ b/test/FrontendC/2007-04-11-InlineStorageClassC99.c
@@ -1,17 +1,17 @@
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep declare | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep declare | \
 // RUN:   grep xglobWeak | grep extern_weak | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
 // RUN:   grep xextWeak | grep weak | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
 // RUN:   grep xWeaknoinline | grep weak | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
 // RUN:   grep xWeakextnoinline | grep weak | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
 // RUN:   grep xglobnoWeak | grep available_externally | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
 // RUN:   grep xextnoWeak | grep -v available_externally | grep -v weak | \
 // RUN:   grep -v linkonce | count 1
 inline int xglobWeak(int) __attribute__((weak));
diff --git a/test/FrontendC/2007-04-13-InlineAsmStruct2.c b/test/FrontendC/2007-04-13-InlineAsmStruct2.c
index e4870e75be2f..44ddeb3f95d8 100644
--- a/test/FrontendC/2007-04-13-InlineAsmStruct2.c
+++ b/test/FrontendC/2007-04-13-InlineAsmStruct2.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | grep {call void asm}
+// RUN: %llvmgcc %s -S -o - | grep {call void asm}
 
 struct V { short X, Y; };
 int bar() {
diff --git a/test/FrontendC/2007-04-13-InlineAsmUnion2.c b/test/FrontendC/2007-04-13-InlineAsmUnion2.c
index 284654d223c0..a0944a7b6407 100644
--- a/test/FrontendC/2007-04-13-InlineAsmUnion2.c
+++ b/test/FrontendC/2007-04-13-InlineAsmUnion2.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | grep {call void asm}
+// RUN: %llvmgcc %s -S -o - | grep {call void asm}
 
 union U { int x; char* p; };
 void foo() {
diff --git a/test/FrontendC/2007-04-24-VolatileStructCopy.c b/test/FrontendC/2007-04-24-VolatileStructCopy.c
index 4765921f1d21..d49e75e02541 100644
--- a/test/FrontendC/2007-04-24-VolatileStructCopy.c
+++ b/test/FrontendC/2007-04-24-VolatileStructCopy.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep {volatile store}
+// RUN: %llvmgcc -O3 -S -o - %s | grep {volatile store}
 // PR1352
 
 struct foo {
diff --git a/test/FrontendC/2007-04-24-bit-not-expr.c b/test/FrontendC/2007-04-24-bit-not-expr.c
index 1c27f181c0e4..fab0b90bb15f 100644
--- a/test/FrontendC/2007-04-24-bit-not-expr.c
+++ b/test/FrontendC/2007-04-24-bit-not-expr.c
@@ -1,5 +1,5 @@
 // PR 1346
-// RUN: %llvmgcc -c %s  -o /dev/null
+// RUN: %llvmgcc -S %s  -o /dev/null
 extern bar(void *);
 
 void f(void *cd) {
diff --git a/test/FrontendC/2007-04-24-str-const.c b/test/FrontendC/2007-04-24-str-const.c
index 4c109c41b019..3c3dab372ab5 100644
--- a/test/FrontendC/2007-04-24-str-const.c
+++ b/test/FrontendC/2007-04-24-str-const.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c %s  -o /dev/null
+// RUN: %llvmgcc -S %s  -o /dev/null
 static char *str;
 
 static const struct {
diff --git a/test/FrontendC/2007-05-07-PaddingElements.c b/test/FrontendC/2007-05-07-PaddingElements.c
index 9be8850895ae..1e4f4d0a7512 100644
--- a/test/FrontendC/2007-05-07-PaddingElements.c
+++ b/test/FrontendC/2007-05-07-PaddingElements.c
@@ -1,6 +1,6 @@
 // PR 1278
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep {struct.s} | not grep "4 x i8] zeroinitializer"
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | not grep "i32 0, i32 2"
+// RUN: %llvmgcc %s -S -O0 -o - | grep {struct.s} | not grep "4 x i8] zeroinitializer"
+// RUN: %llvmgcc %s -S -O0 -o - | not grep "i32 0, i32 2"
 struct s {
   double d1;
   int s1;
diff --git a/test/FrontendC/2007-05-11-str-const.c b/test/FrontendC/2007-05-11-str-const.c
index 48deddbb019e..46a74c19e017 100644
--- a/test/FrontendC/2007-05-11-str-const.c
+++ b/test/FrontendC/2007-05-11-str-const.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c -g %s  -o /dev/null
+// RUN: %llvmgcc -S -g %s  -o /dev/null
 
 static unsigned char out[]={0,1};
 static const unsigned char str1[]="1";
diff --git a/test/FrontendC/2007-05-15-PaddingElement.c b/test/FrontendC/2007-05-15-PaddingElement.c
index a218b3594d72..bad6a11dae81 100644
--- a/test/FrontendC/2007-05-15-PaddingElement.c
+++ b/test/FrontendC/2007-05-15-PaddingElement.c
@@ -1,6 +1,6 @@
 // PR 1419
 
-// RUN: %llvmgcc -xc  -O2 %s -c -o - | llvm-dis | grep "ret i32 1"
+// RUN: %llvmgcc -xc  -O2 %s -S -o - | grep "ret i32 1"
 struct A {
   short x;
   long long :0;
diff --git a/test/FrontendC/2007-05-16-EmptyStruct.c b/test/FrontendC/2007-05-16-EmptyStruct.c
index 23c0b1d6a3f6..7b2ab61bccaf 100644
--- a/test/FrontendC/2007-05-16-EmptyStruct.c
+++ b/test/FrontendC/2007-05-16-EmptyStruct.c
@@ -1,5 +1,5 @@
 // PR 1417
 
-// RUN: %llvmgcc -xc  %s -c -o - | llvm-dis | grep "struct.anon = type \{\}"
+// RUN: %llvmgcc -xc  %s -S -o - | grep "struct.anon = type \{\}"
 
 struct { } *X;
diff --git a/test/FrontendC/2007-05-29-UnionCopy.c b/test/FrontendC/2007-05-29-UnionCopy.c
index ded67d4d6b28..95ab388c842f 100644
--- a/test/FrontendC/2007-05-29-UnionCopy.c
+++ b/test/FrontendC/2007-05-29-UnionCopy.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -o - -emit-llvm %s | grep memcpy
+// RUN: %llvmgcc -S -o - %s | grep memcpy
 // PR1421
 
 struct A {
diff --git a/test/FrontendC/2007-06-05-NoInlineAttribute.c b/test/FrontendC/2007-06-05-NoInlineAttribute.c
index b11b3c779684..9543538fb1b9 100644
--- a/test/FrontendC/2007-06-05-NoInlineAttribute.c
+++ b/test/FrontendC/2007-06-05-NoInlineAttribute.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -O2 -c -emit-llvm %s -o - | llvm-dis | grep call
+// RUN: %llvmgcc -O2 -S %s -o - | grep call
 
 static int bar(int x, int y) __attribute__((noinline));
 
diff --git a/test/FrontendC/2007-06-15-AnnotateAttribute.c b/test/FrontendC/2007-06-15-AnnotateAttribute.c
index 009911737036..115c3f73b90b 100644
--- a/test/FrontendC/2007-06-15-AnnotateAttribute.c
+++ b/test/FrontendC/2007-06-15-AnnotateAttribute.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc -c -emit-llvm %s -o - | llvm-dis | grep llvm.global.annotations
-// RUN: %llvmgcc -c -emit-llvm %s -o - | llvm-dis | grep llvm.var.annotation | count 3 
+// RUN: %llvmgcc -S %s -o - | grep llvm.global.annotations
+// RUN: %llvmgcc -S %s -o - | grep llvm.var.annotation | count 3 
 
 #include <stdio.h>
 
diff --git a/test/FrontendC/2007-06-18-SextAttrAggregate.c b/test/FrontendC/2007-06-18-SextAttrAggregate.c
index 2fcd72607a2a..c395db220dc6 100644
--- a/test/FrontendC/2007-06-18-SextAttrAggregate.c
+++ b/test/FrontendC/2007-06-18-SextAttrAggregate.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -o - -S -emit-llvm -O3 | grep {i8 signext}
+// RUN: %llvmgcc %s -o - -S -O3 | grep {i8 signext}
 // PR1513
 
 struct s{
diff --git a/test/FrontendC/2007-07-29-RestrictPtrArg.c b/test/FrontendC/2007-07-29-RestrictPtrArg.c
index 99eae39054b3..5925d972b269 100644
--- a/test/FrontendC/2007-07-29-RestrictPtrArg.c
+++ b/test/FrontendC/2007-07-29-RestrictPtrArg.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c -emit-llvm %s -o - | llvm-dis | grep noalias
+// RUN: %llvmgcc -S %s -o - | grep noalias
 
 void foo(int * __restrict myptr1, int * myptr2) {
   myptr1[0] = 0;
diff --git a/test/FrontendC/2007-08-01-LoadStoreAlign.c b/test/FrontendC/2007-08-01-LoadStoreAlign.c
index 75a82c14ad0f..5365c06c2579 100644
--- a/test/FrontendC/2007-08-01-LoadStoreAlign.c
+++ b/test/FrontendC/2007-08-01-LoadStoreAlign.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep {align 1} | count 2
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | llc
+// RUN: %llvmgcc -O3 -S -o - %s | grep {align 1} | count 2
+// RUN: %llvmgcc -O3 -S -o - %s | llc
 
 struct p {
   char a;
diff --git a/test/FrontendC/2007-08-21-ComplexCst.c b/test/FrontendC/2007-08-21-ComplexCst.c
index 7ddd87c08e48..ebdee14bba35 100644
--- a/test/FrontendC/2007-08-21-ComplexCst.c
+++ b/test/FrontendC/2007-08-21-ComplexCst.c
@@ -1,3 +1,3 @@
-// RUN: %llvmgcc -O2 -c %s -o /dev/null
+// RUN: %llvmgcc -O2 -S %s -o /dev/null
 void f(_Complex float z);
 void g() { f(1.0i); }
diff --git a/test/FrontendC/2007-09-05-ConstCtor.c b/test/FrontendC/2007-09-05-ConstCtor.c
index 8e0e9945ffaa..adae4a69b103 100644
--- a/test/FrontendC/2007-09-05-ConstCtor.c
+++ b/test/FrontendC/2007-09-05-ConstCtor.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -xc -Os -c %s -o /dev/null
+// RUN: %llvmgcc -xc -Os -S %s -o /dev/null
 // PR1641
 
 struct A {
diff --git a/test/FrontendC/2007-09-20-GcrootAttribute.c b/test/FrontendC/2007-09-20-GcrootAttribute.c
index 23cd37ff6a55..b67b474c4c1b 100644
--- a/test/FrontendC/2007-09-20-GcrootAttribute.c
+++ b/test/FrontendC/2007-09-20-GcrootAttribute.c
@@ -1,6 +1,6 @@
-// RUN: %llvmgcc -S -emit-llvm %s -o - | grep llvm.gcroot
-// RUN: %llvmgcc -S -emit-llvm %s -o - | grep llvm.gcroot | count 6
-// RUN: %llvmgcc -S -emit-llvm %s -o - | llvm-as
+// RUN: %llvmgcc -S %s -o - | grep llvm.gcroot
+// RUN: %llvmgcc -S %s -o - | grep llvm.gcroot | count 6
+// RUN: %llvmgcc -S %s -o - | llvm-as
 
 typedef struct foo_s
 {
diff --git a/test/FrontendC/2007-10-01-BuildArrayRef.c b/test/FrontendC/2007-10-01-BuildArrayRef.c
index e9037552308a..e87a5b630540 100644
--- a/test/FrontendC/2007-10-01-BuildArrayRef.c
+++ b/test/FrontendC/2007-10-01-BuildArrayRef.c
@@ -1,8 +1,20 @@
-// RUN: not %llvmgcc -S %s -o /dev/null |& grep "error: assignment of read-only location"
+// RUN: not %llvmgcc_only -c %s -o /dev/null |& FileCheck %s
 // PR 1603
-int func()
+void func()
 {
    const int *arr;
-   arr[0] = 1;
+   arr[0] = 1;  // CHECK: error: assignment of read-only location
 }
 
+struct foo {
+  int bar;
+};
+struct foo sfoo = { 0 };
+
+int func2()
+{
+  const struct foo *fp;
+  fp = &sfoo;
+  fp[0].bar = 1;  // CHECK: error: assignment of read-only member 'bar'
+  return sfoo.bar;
+}
diff --git a/test/FrontendC/2007-11-07-AlignedMemcpy.c b/test/FrontendC/2007-11-07-AlignedMemcpy.c
index f1900bb646b4..eb9d22c62523 100644
--- a/test/FrontendC/2007-11-07-AlignedMemcpy.c
+++ b/test/FrontendC/2007-11-07-AlignedMemcpy.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c %s -o /dev/null
+// RUN: %llvmgcc -S %s -o /dev/null
 void bork() {
   int Qux[33] = {0};
 }
diff --git a/test/FrontendC/2007-11-27-SExtZExt.c b/test/FrontendC/2007-11-27-SExtZExt.c
index 2b6cd6232d52..8ea4786af369 100644
--- a/test/FrontendC/2007-11-27-SExtZExt.c
+++ b/test/FrontendC/2007-11-27-SExtZExt.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -emit-llvm -o - | grep "signext" | count 4
+// RUN: %llvmgcc -S %s -o - | grep "signext" | count 4
 
 signed char foo1() { return 1; }
 
diff --git a/test/FrontendC/2008-01-25-ByValReadNone.c b/test/FrontendC/2008-01-25-ByValReadNone.c
index 42e9c3626297..4cb1a6394eaa 100644
--- a/test/FrontendC/2008-01-25-ByValReadNone.c
+++ b/test/FrontendC/2008-01-25-ByValReadNone.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | not grep readonly
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | not grep readnone
+// RUN: %llvmgcc -O3 -S -o - %s | not grep readonly
+// RUN: %llvmgcc -O3 -S -o - %s | not grep readnone
 
 
 // The struct being passed byval means that we cannot mark the
diff --git a/test/FrontendC/2008-01-28-PragmaMark.c b/test/FrontendC/2008-01-28-PragmaMark.c
index 0b3ac17df32a..6a4b5b52ff2f 100644
--- a/test/FrontendC/2008-01-28-PragmaMark.c
+++ b/test/FrontendC/2008-01-28-PragmaMark.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -Werror -c %s -o /dev/null
+// RUN: %llvmgcc -Werror -S %s -o /dev/null
 #pragma mark LLVM's world
 #ifdef DO_ERROR
 #error LLVM's world
diff --git a/test/FrontendC/2008-03-03-CtorAttrType.c b/test/FrontendC/2008-03-03-CtorAttrType.c
index dc0e47d811a4..96648f4ec5a6 100644
--- a/test/FrontendC/2008-03-03-CtorAttrType.c
+++ b/test/FrontendC/2008-03-03-CtorAttrType.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | grep llvm.global_ctors
+// RUN: %llvmgcc %s -S -o - | grep llvm.global_ctors
 int __attribute__((constructor)) foo(void) {
   return 0;
 }
diff --git a/test/FrontendC/2008-03-05-syncPtr.c b/test/FrontendC/2008-03-05-syncPtr.c
index 43e46717b279..7b271f7ee747 100644
--- a/test/FrontendC/2008-03-05-syncPtr.c
+++ b/test/FrontendC/2008-03-05-syncPtr.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | grep llvm.atomic
+// RUN: %llvmgcc %s -S -o - | grep llvm.atomic
 // XFAIL: sparc-sun-solaris2|arm
 // Feature currently implemented only for x86, alpha, powerpc.
 
diff --git a/test/FrontendC/2008-05-19-AlwaysInline.c b/test/FrontendC/2008-05-19-AlwaysInline.c
index 506f6cf9c509..8dcb57b1862e 100644
--- a/test/FrontendC/2008-05-19-AlwaysInline.c
+++ b/test/FrontendC/2008-05-19-AlwaysInline.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc %s -S -fno-unit-at-a-time -emit-llvm -O0 -o - | not grep sabrina
-// RUN: %llvmgcc %s -S -funit-at-a-time -emit-llvm -O0 -o - | not grep sabrina
+// RUN: %llvmgcc %s -S -fno-unit-at-a-time -O0 -o - | not grep sabrina
+// RUN: %llvmgcc %s -S -funit-at-a-time -O0 -o - | not grep sabrina
 
 static inline int sabrina (void) __attribute__((always_inline));
 static inline int sabrina (void)
diff --git a/test/FrontendC/2008-08-07-AlignPadding1.c b/test/FrontendC/2008-08-07-AlignPadding1.c
index 776b1052f135..6be9fe4ed3b5 100644
--- a/test/FrontendC/2008-08-07-AlignPadding1.c
+++ b/test/FrontendC/2008-08-07-AlignPadding1.c
@@ -1,4 +1,4 @@
-/* RUN: %llvmgcc %s -S -o - -emit-llvm -O0 | grep {zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer}
+/* RUN: %llvmgcc %s -S -o - -O0 | grep {zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer}
 
 The FE must generate padding here both at the end of each PyG_Head and
 between array elements.  Reduced from Python. */
diff --git a/test/FrontendC/2008-08-07-AlignPadding2.c b/test/FrontendC/2008-08-07-AlignPadding2.c
index ea13a0a1bc67..51135ba633a2 100644
--- a/test/FrontendC/2008-08-07-AlignPadding2.c
+++ b/test/FrontendC/2008-08-07-AlignPadding2.c
@@ -1,4 +1,4 @@
-/* RUN: %llvmgcc %s -S -o - -emit-llvm -O0 | grep zeroinitializer | count 1
+/* RUN: %llvmgcc %s -S -o - -O0 | grep zeroinitializer | count 1
 
 The FE must not generate padding here between array elements.  PR 2533. */
 
diff --git a/test/FrontendC/2008-10-30-ZeroPlacement.c b/test/FrontendC/2008-10-30-ZeroPlacement.c
index ec4ea94a115f..d73442dca8b9 100644
--- a/test/FrontendC/2008-10-30-ZeroPlacement.c
+++ b/test/FrontendC/2008-10-30-ZeroPlacement.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c %s
+// RUN: %llvmgcc -S %s
 // PR2987
 struct S2045
 {
diff --git a/test/FrontendC/2008-11-02-WeakAlias.c b/test/FrontendC/2008-11-02-WeakAlias.c
index befafe455149..d10e57f5efe0 100644
--- a/test/FrontendC/2008-11-02-WeakAlias.c
+++ b/test/FrontendC/2008-11-02-WeakAlias.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -emit-llvm -o - %s | grep weak
+// RUN: %llvmgcc -S -o - %s | grep weak
 // PR2691
 
 void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
diff --git a/test/FrontendC/2008-11-08-InstCombineSelect.c b/test/FrontendC/2008-11-08-InstCombineSelect.c
index 70c8d3a22486..b850d3ff6f25 100644
--- a/test/FrontendC/2008-11-08-InstCombineSelect.c
+++ b/test/FrontendC/2008-11-08-InstCombineSelect.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O2 -o -
+// RUN: %llvmgcc %s -S -O2 -o -
 // PR3028
 
 int g_187;
diff --git a/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c b/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c
index fa5713e2482b..8af59d54f751 100644
--- a/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c
+++ b/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c -emit-llvm %s -o - | llvm-dis | grep llvm.ptr.annotation | count 3
+// RUN: %llvmgcc -S %s -o - | grep llvm.ptr.annotation | count 3
 
 #include <stdio.h>
 
diff --git a/test/FrontendC/2008-12-23-AsmIntPointerTie.c b/test/FrontendC/2008-12-23-AsmIntPointerTie.c
index da2eda6628ed..57061422b8f2 100644
--- a/test/FrontendC/2008-12-23-AsmIntPointerTie.c
+++ b/test/FrontendC/2008-12-23-AsmIntPointerTie.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O1 -o - 
+// RUN: %llvmgcc %s -S -O1 -o - 
 
 #include <stdint.h>
 
diff --git a/test/FrontendC/2009-01-05-BlockInlining.c b/test/FrontendC/2009-01-05-BlockInlining.c
index 9692d8f688ab..8fb6e54514a5 100644
--- a/test/FrontendC/2009-01-05-BlockInlining.c
+++ b/test/FrontendC/2009-01-05-BlockInlining.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O2 -o %t.s
+// RUN: %llvmgcc %s -S -O2 -o %t.s
 // RUN: grep {call i32 .*printf.*argc} %t.s | count 3
 // RUN: not grep __block_holder_tmp %t.s
 // rdar://5865221
diff --git a/test/FrontendC/2009-03-13-dbg.c b/test/FrontendC/2009-03-13-dbg.c
index aa13af41248c..46abd3a96382 100644
--- a/test/FrontendC/2009-03-13-dbg.c
+++ b/test/FrontendC/2009-03-13-dbg.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -g -o /dev/null
+// RUN: %llvmgcc %s -S -g -o /dev/null
 // XTARGET: darwin,linux
 // XFAIL: *
 void foo() {}
diff --git a/test/FrontendC/2009-05-04-EnumInreg.c b/test/FrontendC/2009-05-04-EnumInreg.c
index 6dbdb54db5f6..fb0c03e439e6 100644
--- a/test/FrontendC/2009-05-04-EnumInreg.c
+++ b/test/FrontendC/2009-05-04-EnumInreg.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -m32 -mregparm=3 %s -emit-llvm -o - | grep {inreg %action}
+// RUN: %llvmgcc -S -m32 -mregparm=3 %s -o - | grep {inreg %action}
 // XFAIL: *
 // XTARGET: x86,i386,i686
 // PR3967
diff --git a/test/FrontendC/2010-01-13-MemBarrier.c b/test/FrontendC/2010-01-13-MemBarrier.c
index 8fcd5228781e..a540e59c6caa 100644
--- a/test/FrontendC/2010-01-13-MemBarrier.c
+++ b/test/FrontendC/2010-01-13-MemBarrier.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
 // XFAIL: sparc
 // rdar://7536390
 
diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c
index 12e91405d10f..33b87703220c 100644
--- a/test/FrontendC/2010-05-18-asmsched.c
+++ b/test/FrontendC/2010-05-18-asmsched.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -O3 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s
+// RUN: %llvmgcc %s -S -O3 -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s
 // r9 used to be clobbered before its value was moved to r10.  7993104.
 
 void foo(int x, int y) {
diff --git a/test/FrontendC/2010-05-26-AsmSideEffect.c b/test/FrontendC/2010-05-26-AsmSideEffect.c
index c5f75799dd3b..acc38b783ba3 100644
--- a/test/FrontendC/2010-05-26-AsmSideEffect.c
+++ b/test/FrontendC/2010-05-26-AsmSideEffect.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
 // Radar 8026855
 
 int test (void *src) {
diff --git a/test/FrontendC/2010-06-28-nowarn.c b/test/FrontendC/2010-06-28-nowarn.c
index 9cfb6636fdef..3db8df10c189 100644
--- a/test/FrontendC/2010-06-28-nowarn.c
+++ b/test/FrontendC/2010-06-28-nowarn.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -m32 -fasm-blocks -o /dev/null
+// RUN: %llvmgcc %s -S -m32 -fasm-blocks -o /dev/null
 // This should not warn about unreferenced label. 7729514.
 // XFAIL: *
 // XTARGET: x86,i386,i686
diff --git a/test/FrontendC/2010-07-14-overconservative-align.c b/test/FrontendC/2010-07-14-overconservative-align.c
index 1744ba84185d..c4a9caac6666 100644
--- a/test/FrontendC/2010-07-14-overconservative-align.c
+++ b/test/FrontendC/2010-07-14-overconservative-align.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -emit-llvm -S -o - | FileCheck %s
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
 // PR 5995
 struct s {
     int word;
diff --git a/test/FrontendC/2010-11-16-asmblock.c b/test/FrontendC/2010-11-16-asmblock.c
new file mode 100644
index 000000000000..c2642235cfcc
--- /dev/null
+++ b/test/FrontendC/2010-11-16-asmblock.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S %s -fasm-blocks -o - | FileCheck %s
+// XFAIL: *
+// XTARGET: x86,i386,i686
+// 84282548
+
+void foo()
+{
+// CHECK:  %0 = call i32 asm sideeffect "", "={ecx}"() nounwind 
+// CHECK:  %asmtmp = call i32 asm sideeffect alignstack "sall $$3, $0", "={ecx},{ecx},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %0) nounwind 
+// CHECK:  store i32 %asmtmp, i32* %"%ecx"
+ __asm {
+   sal ecx, 3;
+   add esi, ecx;
+   add edi, ecx;
+ }
+}
diff --git a/test/FrontendC/2010-12-01-CommonGlobal.c b/test/FrontendC/2010-12-01-CommonGlobal.c
new file mode 100644
index 000000000000..3f6d7e885807
--- /dev/null
+++ b/test/FrontendC/2010-12-01-CommonGlobal.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+// Don't crash on a common-linkage constant global.
+extern const int kABSourceTypeProperty;
+int foo(void) {
+  return kABSourceTypeProperty;
+}
+const int kABSourceTypeProperty;
diff --git a/test/FrontendC/arrayderef.c b/test/FrontendC/arrayderef.c
new file mode 100644
index 000000000000..66c2e0ba4165
--- /dev/null
+++ b/test/FrontendC/arrayderef.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc %s -S -O -o - | FileCheck %s
+// The load here was getting lost because this code was close
+// enough to the traditional (wrong) implementation of offsetof
+// to confuse the gcc FE.  8629268.
+
+struct foo {
+  int x;
+  int *y;
+};
+
+struct foo Foo[1];
+
+int * bar(unsigned int ix) {
+// CHECK: load
+  return &Foo->y[ix];
+}
+
diff --git a/test/FrontendC/attribute_constructor.c b/test/FrontendC/attribute_constructor.c
index b2f7c9b19fc0..da17a37e2606 100644
--- a/test/FrontendC/attribute_constructor.c
+++ b/test/FrontendC/attribute_constructor.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -o - | llvm-dis | grep llvm.global_ctors
+// RUN: %llvmgcc %s -S -o - | grep llvm.global_ctors
 
 void foo() __attribute__((constructor));
 void foo() {
diff --git a/test/FrontendC/block-copy.c b/test/FrontendC/block-copy.c
index a53732e4f9ff..c088f2dc1955 100644
--- a/test/FrontendC/block-copy.c
+++ b/test/FrontendC/block-copy.c
@@ -1,4 +1,4 @@
-/* RUN: %llvmgcc %s -S -o - -emit-llvm -O3 | grep {call.*memcpy}
+/* RUN: %llvmgcc %s -S -o - -O3 | grep {call.*memcpy}
 
  This should compile into a memcpy from a global, not 128 stores. */
 
diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c
index 764126e02184..544c9f3d3fbb 100644
--- a/test/FrontendC/cstring-align.c
+++ b/test/FrontendC/cstring-align.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -Os -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s
+// RUN: %llvmgcc %s -S -Os -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s
 
 extern void func(const char *, const char *);
 
diff --git a/test/FrontendC/extern-weak.c b/test/FrontendC/extern-weak.c
index 4729b048fbe0..73b59cc48c40 100644
--- a/test/FrontendC/extern-weak.c
+++ b/test/FrontendC/extern-weak.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep extern_weak
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | llc
+// RUN: %llvmgcc -O3 -S -o - %s | grep extern_weak
+// RUN: %llvmgcc -O3 -S -o - %s | llc
 
 #if !defined(__linux__) && !defined(__FreeBSD__) && \
     !defined(__OpenBSD__) && !defined(__CYGWIN__) && !defined(__DragonFly__)
diff --git a/test/FrontendC/func-aligned.c b/test/FrontendC/func-aligned.c
index 40149f49d8ec..477e82418aef 100644
--- a/test/FrontendC/func-aligned.c
+++ b/test/FrontendC/func-aligned.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
 
 // rdar://7270273
 void foo() __attribute__((aligned (64)));
diff --git a/test/FrontendC/hidden-visibility.c b/test/FrontendC/hidden-visibility.c
index fc2ae444b404..589bb53453f3 100644
--- a/test/FrontendC/hidden-visibility.c
+++ b/test/FrontendC/hidden-visibility.c
@@ -1,3 +1,3 @@
-// RUN: %llvmgcc %s -emit-llvm -S -o - | grep {hidden global}
+// RUN: %llvmgcc %s -S -o - | grep {hidden unnamed_addr global}
 
 int X __attribute__ ((__visibility__ ("hidden"))) = 123;
diff --git a/test/FrontendC/implicit-arg.c b/test/FrontendC/implicit-arg.c
index 971245f3badc..a6cb8bce7ed6 100644
--- a/test/FrontendC/implicit-arg.c
+++ b/test/FrontendC/implicit-arg.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o -
-// RUN: %llvmgcc %s -S -emit-llvm -O1 -o -
+// RUN: %llvmgcc %s -S -O0 -o -
+// RUN: %llvmgcc %s -S -O1 -o -
 // rdar://6518089
 
 static int bar();
diff --git a/test/FrontendC/libcalls-d.c b/test/FrontendC/libcalls-d.c
index 126866ad6297..d92208d89edd 100644
--- a/test/FrontendC/libcalls-d.c
+++ b/test/FrontendC/libcalls-d.c
@@ -1,10 +1,10 @@
 // llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
 // and -fno-builtins shouldn't.
 // -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
-// RUN: %llvmgcc %s -S -fno-math-errno -emit-llvm -O0 -o - | grep {call.*exp2\\.f64}
-// RUN: %llvmgcc %s -S -fmath-errno -emit-llvm -O0 -o - | grep {call.*exp2}
-// RUN: %llvmgcc %s -S -emit-llvm -O1 -o - | grep {call.*ldexp}
-// RUN: %llvmgcc %s -S -emit-llvm -O3 -fno-builtin -o - | grep {call.*exp2}
+// RUN: %llvmgcc %s -S -fno-math-errno -O0 -o - | grep {call.*exp2\\.f64}
+// RUN: %llvmgcc %s -S -fmath-errno -O0 -o - | grep {call.*exp2}
+// RUN: %llvmgcc %s -S -O1 -o - | grep {call.*ldexp}
+// RUN: %llvmgcc %s -S -O3 -fno-builtin -o - | grep {call.*exp2}
 
 double exp2(double);
 
diff --git a/test/FrontendC/libcalls-ld.c b/test/FrontendC/libcalls-ld.c
index 6533eb88d9e5..cf71d19eaa35 100644
--- a/test/FrontendC/libcalls-ld.c
+++ b/test/FrontendC/libcalls-ld.c
@@ -1,10 +1,10 @@
 // llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
 // and -fno-builtins shouldn't.
 // -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
-// RUN: %llvmgcc %s -S -fno-math-errno -emit-llvm -O0 -o - | grep {call.*exp2\\..*f}
-// RUN: %llvmgcc %s -S -fmath-errno -emit-llvm -O0 -o - | grep {call.*exp2l}
-// RUN: %llvmgcc %s -S -emit-llvm -O1 -o - | grep {call.*ldexp}
-// RUN: %llvmgcc %s -S -emit-llvm -O3 -fno-builtin -o - | grep {call.*exp2l}
+// RUN: %llvmgcc %s -S -fno-math-errno -O0 -o - | grep {call.*exp2\\..*f}
+// RUN: %llvmgcc %s -S -fmath-errno -O0 -o - | grep {call.*exp2l}
+// RUN: %llvmgcc %s -S -O1 -o - | grep {call.*ldexp}
+// RUN: %llvmgcc %s -S -O3 -fno-builtin -o - | grep {call.*exp2l}
 
 // If this fails for you because your target doesn't support long double,
 // please xfail the test.
diff --git a/test/FrontendC/libcalls.c b/test/FrontendC/libcalls.c
index a2761dd5b004..60e22e7e690a 100644
--- a/test/FrontendC/libcalls.c
+++ b/test/FrontendC/libcalls.c
@@ -1,10 +1,10 @@
 // llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
 // and -fno-builtins shouldn't.
 // -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
-// RUN: %llvmgcc %s -S -emit-llvm -fno-math-errno -O0 -o - | grep {call.*exp2\\.f32}
-// RUN: %llvmgcc %s -S -emit-llvm -fmath-errno -O0 -o - | grep {call.*exp2f}
-// RUN: %llvmgcc %s -S -emit-llvm -O1 -o - | grep {call.*ldexp}
-// RUN: %llvmgcc %s -S -emit-llvm -O3 -fno-builtin -o - | grep {call.*exp2f}
+// RUN: %llvmgcc %s -S -fno-math-errno -O0 -o - | grep {call.*exp2\\.f32}
+// RUN: %llvmgcc %s -S -fmath-errno -O0 -o - | grep {call.*exp2f}
+// RUN: %llvmgcc %s -S -O1 -o - | grep {call.*ldexp}
+// RUN: %llvmgcc %s -S -O3 -fno-builtin -o - | grep {call.*exp2f}
 
 float exp2f(float);
 
diff --git a/test/FrontendC/pr3518.c b/test/FrontendC/pr3518.c
index 4c193c7739b2..112394a651b4 100644
--- a/test/FrontendC/pr3518.c
+++ b/test/FrontendC/pr3518.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep {= internal global} | count 4
+// RUN: %llvmgcc %s -S -O0 -o - | grep {= internal unnamed_addr global} | count 4
 // PR 3518
 // Some of the objects were coming out as unintialized (external) before 3518
 // was fixed.  Internal names are different between llvm-gcc and clang so they
diff --git a/test/FrontendC/pr4349.c b/test/FrontendC/pr4349.c
index fbd7e56eba15..24acd9c950f9 100644
--- a/test/FrontendC/pr4349.c
+++ b/test/FrontendC/pr4349.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | FileCheck %s
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
 // PR 4349
 
 union reg
@@ -16,22 +16,22 @@ struct svar
 {
     void *ptr;
 };
-// CHECK: @svars1 = global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
+// CHECK: @svars1 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
 struct svar svars1[] =
 {
     { &((cpu.pc).w[0]) }
 };
-// CHECK: @svars2 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x i8]* bitcast (%struct.cpu* @cpu to [2 x i8]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) }]
+// CHECK: @svars2 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x i8]* bitcast (%struct.cpu* @cpu to [2 x i8]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) }]
 struct svar svars2[] =
 {
     { &((cpu.pc).b[0][1]) }
 };
-// CHECK: @svars3 = global [1 x %struct.svar] [%struct.svar { i8* bitcast (i16* getelementptr ([2 x i16]* bitcast (%struct.cpu* @cpu to [2 x i16]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) to i8*) }]
+// CHECK: @svars3 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* bitcast (i16* getelementptr ([2 x i16]* bitcast (%struct.cpu* @cpu to [2 x i16]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) to i8*) }]
 struct svar svars3[] =
 {
     { &((cpu.pc).w[1]) }
 };
-// CHECK: @svars4 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x [2 x i8]]* bitcast (%struct.cpu* @cpu to [2 x [2 x i8]]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1, i{{[0-9]+}} 1) }]
+// CHECK: @svars4 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x [2 x i8]]* bitcast (%struct.cpu* @cpu to [2 x [2 x i8]]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1, i{{[0-9]+}} 1) }]
 struct svar svars4[] =
 {
     { &((cpu.pc).b[1][1]) }
diff --git a/test/FrontendC/pr5406.c b/test/FrontendC/pr5406.c
index 492bdafa7eda..0b1f277592fb 100644
--- a/test/FrontendC/pr5406.c
+++ b/test/FrontendC/pr5406.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | FileCheck %s
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
 // PR 5406
 
 // XFAIL: *
diff --git a/test/FrontendC/ptr-rotate.c b/test/FrontendC/ptr-rotate.c
index 56c21f46e7ae..36d9755dd674 100644
--- a/test/FrontendC/ptr-rotate.c
+++ b/test/FrontendC/ptr-rotate.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc %s -c -m32 -o /dev/null
-// RUN: %llvmgcc %s -c -O1 -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+// RUN: %llvmgcc %s -S -m32 -o /dev/null
+// RUN: %llvmgcc %s -S -O1 -m32 -o - | llc -march=x86 -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
 
 unsigned int func(void *A) {
   // DARWIN: roll $27
diff --git a/test/FrontendC/sret.c b/test/FrontendC/sret.c
index 11ac5d6824e8..42666917a8df 100644
--- a/test/FrontendC/sret.c
+++ b/test/FrontendC/sret.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep sret | count 5
+// RUN: %llvmgcc %s -S -O0 -o - | grep sret | count 5
 
 struct abc {
  long a;
diff --git a/test/FrontendC/sret2.c b/test/FrontendC/sret2.c
index 7b621f942fa9..0f35b1c2586f 100644
--- a/test/FrontendC/sret2.c
+++ b/test/FrontendC/sret2.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep sret | count 2
+// RUN: %llvmgcc %s -S -O0 -o - | grep sret | count 2
 
 struct abc {
  long a;
diff --git a/test/FrontendC/unaligned-memcpy.c b/test/FrontendC/unaligned-memcpy.c
index 9e6ce07e367e..8fb84e4f5150 100644
--- a/test/FrontendC/unaligned-memcpy.c
+++ b/test/FrontendC/unaligned-memcpy.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llc
+// RUN: %llvmgcc %s -S -o - | llc
 
 void bork() {
   char Qux[33] = {0};
diff --git a/test/FrontendFortran/2008-11-03-OptionOverride.f90 b/test/FrontendFortran/2008-11-03-OptionOverride.f90
index 316e722cd341..d65ba9b4736f 100644
--- a/test/FrontendFortran/2008-11-03-OptionOverride.f90
+++ b/test/FrontendFortran/2008-11-03-OptionOverride.f90
@@ -1,4 +1,4 @@
-! RUN: %llvmgcc -c %s -march=k8
+! RUN: %llvmgcc -S %s -march=k8
 ! XTARGET: x86
 ! Note: this file intentionally left blank, the problem itself is in
 ! frontend initialization routines and march flag!
diff --git a/test/FrontendFortran/2009-02-09-FloorDivExpr.f90 b/test/FrontendFortran/2009-02-09-FloorDivExpr.f90
index 870e99bd0866..ddd05c549496 100644
--- a/test/FrontendFortran/2009-02-09-FloorDivExpr.f90
+++ b/test/FrontendFortran/2009-02-09-FloorDivExpr.f90
@@ -1,4 +1,4 @@
-! RUN: %llvmgcc -c %s
+! RUN: %llvmgcc -S %s
 ! PR2437
 program main
   implicit none
diff --git a/test/FrontendFortran/cpow.f90 b/test/FrontendFortran/cpow.f90
index 19ae3784c218..25156fd58971 100644
--- a/test/FrontendFortran/cpow.f90
+++ b/test/FrontendFortran/cpow.f90
@@ -1,4 +1,4 @@
-! RUN: %llvmgcc -c %s
+! RUN: %llvmgcc -S %s
 ! PR2443
 
 ! Program to test the power (**) operator
diff --git a/test/FrontendObjC++/2007-10-03-MetadataPointers.mm b/test/FrontendObjC++/2007-10-03-MetadataPointers.mm
index 5975e38f873d..2ab76c1db595 100644
--- a/test/FrontendObjC++/2007-10-03-MetadataPointers.mm
+++ b/test/FrontendObjC++/2007-10-03-MetadataPointers.mm
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -w -x objective-c++ -c %s -o /dev/null
+// RUN: %llvmgcc -w -x objective-c++ -S %s -o /dev/null
 
 @class NSImage;
 void bork() {
diff --git a/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
index 298844e97b5d..da47ed0c1222 100644
--- a/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
+++ b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
@@ -1,4 +1,4 @@
-// RUN: not %llvmgcc %s -S -emit-llvm -o - |& FileCheck %s
+// RUN: not %llvmgcc %s -S -o - |& FileCheck %s
 // This tests for a specific diagnostic in LLVM-GCC.
 // Clang compiles this correctly with no diagnostic,
 // ergo this test will fail with a Clang-based front-end.
diff --git a/test/FrontendObjC++/2010-08-04-Template.mm b/test/FrontendObjC++/2010-08-04-Template.mm
index d0383406d7e2..2ebfd3e17cef 100644
--- a/test/FrontendObjC++/2010-08-04-Template.mm
+++ b/test/FrontendObjC++/2010-08-04-Template.mm
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm
+// RUN: %llvmgcc %s -S
 struct TRunSoon {
   template <class P1> static void Post() {}
 };
diff --git a/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
index b33d7307af49..986094c07235 100644
--- a/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
+++ b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm
+// RUN: %llvmgcc %s -S
 struct TFENode {
   TFENode(const TFENode& inNode);
 };
diff --git a/test/FrontendObjC/2007-04-03-ObjcEH.m b/test/FrontendObjC/2007-04-03-ObjcEH.m
index 353323d325f1..ae744c785009 100644
--- a/test/FrontendObjC/2007-04-03-ObjcEH.m
+++ b/test/FrontendObjC/2007-04-03-ObjcEH.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c %s -o /dev/null
+// RUN: %llvmgcc -S %s -o /dev/null
 
 @interface B 
 -(int)bar;
diff --git a/test/FrontendObjC/2007-05-02-Strong.m b/test/FrontendObjC/2007-05-02-Strong.m
index 3778fd29f436..34b41ad964f5 100644
--- a/test/FrontendObjC/2007-05-02-Strong.m
+++ b/test/FrontendObjC/2007-05-02-Strong.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c %s -fobjc-gc -o /dev/null
+// RUN: %llvmgcc -S %s -fobjc-gc -o /dev/null
 typedef int NSInteger;
 typedef struct _NSRect {
   int origin;
diff --git a/test/FrontendObjC/2007-09-25-EH.m b/test/FrontendObjC/2007-09-25-EH.m
index 5fa9cbb85d82..d625584a6c54 100644
--- a/test/FrontendObjC/2007-09-25-EH.m
+++ b/test/FrontendObjC/2007-09-25-EH.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c -w -m64 -mmacosx-version-min=10.5 %s -o /dev/null
+// RUN: %llvmgcc -S -w -m64 -mmacosx-version-min=10.5 %s -o /dev/null
 // XFAIL: *
 // XTARGET: darwin
 @class NSDictionary, DSoBuffer, DSoDirectory, NSMutableArray;
diff --git a/test/FrontendObjC/2007-10-18-ProDescriptor.m b/test/FrontendObjC/2007-10-18-ProDescriptor.m
index e87a43ffc979..220fdd2c2329 100644
--- a/test/FrontendObjC/2007-10-18-ProDescriptor.m
+++ b/test/FrontendObjC/2007-10-18-ProDescriptor.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -x objective-c -c %s -o /dev/null
+// RUN: %llvmgcc -x objective-c -S %s -o /dev/null
 @protocol O
 @end
 @interface O < O > {
diff --git a/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m b/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m
index 866c3304887e..4bbe4407bed9 100644
--- a/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m
+++ b/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -x objective-c -c %s -o /dev/null -fobjc-gc
+// RUN: %llvmgcc -x objective-c -S %s -o /dev/null -fobjc-gc
 // rdar://5541393
 
 typedef unsigned int NSUInteger;
diff --git a/test/FrontendObjC/2008-10-3-EhValue.m b/test/FrontendObjC/2008-10-3-EhValue.m
index a4c0cae075ad..c7aabe271eeb 100644
--- a/test/FrontendObjC/2008-10-3-EhValue.m
+++ b/test/FrontendObjC/2008-10-3-EhValue.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -w -x objective-c -c %s -o /dev/null
+// RUN: %llvmgcc -w -x objective-c -S %s -o /dev/null
 
 @interface Object {
 @public
diff --git a/test/FrontendObjC/2008-11-12-Metadata.m b/test/FrontendObjC/2008-11-12-Metadata.m
index 7e9f02853991..be8ee41e77ad 100644
--- a/test/FrontendObjC/2008-11-12-Metadata.m
+++ b/test/FrontendObjC/2008-11-12-Metadata.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -x objective-c -m64 -c %s -o /dev/null
+// RUN: %llvmgcc -x objective-c -m64 -S %s -o /dev/null
 
 @interface A
 @end
diff --git a/test/FrontendObjC/2008-11-25-Blocks.m b/test/FrontendObjC/2008-11-25-Blocks.m
index 258d70a54f0f..c5cd3d2a0b2f 100644
--- a/test/FrontendObjC/2008-11-25-Blocks.m
+++ b/test/FrontendObjC/2008-11-25-Blocks.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -c %s -o /dev/null
+// RUN: %llvmgcc -S %s -o /dev/null
 // rdar://6394879
 
 @interface bork
diff --git a/test/FrontendObjC/2009-02-05-VolatileProp.m b/test/FrontendObjC/2009-02-05-VolatileProp.m
index 461f92b51d2c..1deef739bee2 100644
--- a/test/FrontendObjC/2009-02-05-VolatileProp.m
+++ b/test/FrontendObjC/2009-02-05-VolatileProp.m
@@ -1,4 +1,4 @@
-/* RUN: %llvmgcc -w -x objective-c -c %s -o /dev/null -pedantic-errors
+/* RUN: %llvmgcc -w -x objective-c -S %s -o /dev/null -pedantic-errors
    rdar://6551276 */
 
 void foo(const unsigned short *);
diff --git a/test/FrontendObjC/2009-04-14-AsmSection.m b/test/FrontendObjC/2009-04-14-AsmSection.m
index de2cef00ff56..aefe08876716 100644
--- a/test/FrontendObjC/2009-04-14-AsmSection.m
+++ b/test/FrontendObjC/2009-04-14-AsmSection.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S %s -fobjc-abi-version=2 -emit-llvm -o %t
+// RUN: %llvmgcc -S %s -fobjc-abi-version=2 -o %t
 // RUN: grep {OBJC_CLASS_\\\$_A.*section.*__DATA, __objc_data.*align} %t
 // XTARGET: darwin
 
diff --git a/test/FrontendObjC/2009-08-05-utf16.m b/test/FrontendObjC/2009-08-05-utf16.m
index 2964ecfd4991..df3745c48700 100644
--- a/test/FrontendObjC/2009-08-05-utf16.m
+++ b/test/FrontendObjC/2009-08-05-utf16.m
@@ -1,4 +1,4 @@
-/* RUN: %llvmgcc -w -x objective-c -S %s -o - | grep {__utf16_string_1} | grep {internal constant} | grep {12 x i8}
+/* RUN: %llvmgcc -w -x objective-c -S %s -o - | grep {__utf16_string_1} | grep {internal unnamed_addr constant} | grep {12 x i8}
    rdar://7095855 rdar://7115749 */
 
 void *P = @"iPod™";
diff --git a/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m b/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m
index 13e16312cf66..bb00f6a1e2c6 100644
--- a/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m
+++ b/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m
@@ -1,6 +1,6 @@
 // RUN: %llvmgcc -x objective-c -fwritable-strings -S %s -o - | FileCheck %s
-// CHECK: @.str = private constant
-// CHECK: @.str1 = internal global
+// CHECK: @.str = private unnamed_addr constant
+// CHECK: @.str1 = internal unnamed_addr global
 
 // rdar://7634471
 
diff --git a/test/LLVMC/C++/dg.exp b/test/LLVMC/C++/dg.exp
index fc852e30acf8..209345540c11 100644
--- a/test/LLVMC/C++/dg.exp
+++ b/test/LLVMC/C++/dg.exp
@@ -1,5 +1,5 @@
 load_lib llvm.exp
 
 if [ llvm_gcc_supports c++ ] then {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{unk,ll,c,cpp}]]
 }
diff --git a/test/LLVMC/C++/just-compile.cpp b/test/LLVMC/C++/just-compile.cpp
new file mode 100644
index 000000000000..771c9822da69
--- /dev/null
+++ b/test/LLVMC/C++/just-compile.cpp
@@ -0,0 +1,10 @@
+// Test that the -c flag works.
+// RUN: llvmc -c %s -o %t.o
+// RUN: llvmc --linker=c++ %t.o -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/test/LLVMC/C++/unknown_suffix.unk b/test/LLVMC/C++/unknown_suffix.unk
new file mode 100644
index 000000000000..bf4aea286247
--- /dev/null
+++ b/test/LLVMC/C++/unknown_suffix.unk
@@ -0,0 +1,9 @@
+// Test that the -x option works for files with unknown suffixes.
+// RUN: llvmc -x c++ %s -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/test/LLVMC/C/emit-llvm-opt.c b/test/LLVMC/C/emit-llvm-opt.c
new file mode 100644
index 000000000000..50710cf9dba2
--- /dev/null
+++ b/test/LLVMC/C/emit-llvm-opt.c
@@ -0,0 +1,9 @@
+// Check that -emit-llvm [-S] works with -opt.
+
+// RUN: llvmc -c -opt -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+// RUN: llvmc -c -opt -emit-llvm -S -o - %s | grep "@f0()" | count 1
+// RUN: llvmc --dry-run -c -opt -emit-llvm %s |& grep "^opt"
+// XFAIL: vg_leak
+
+int f0(void) {
+}
diff --git a/test/LLVMC/C/emit-llvm.c b/test/LLVMC/C/emit-llvm.c
index 9844bc757cc6..56a1e30b2e42 100644
--- a/test/LLVMC/C/emit-llvm.c
+++ b/test/LLVMC/C/emit-llvm.c
@@ -1,4 +1,7 @@
+// Check that -emit-llvm [-S] works correctly.
+
 // RUN: llvmc -c -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+// RUN: llvmc -c -emit-llvm -S -o - %s | grep "@f0()" | count 1
 // XFAIL: vg_leak
 
 int f0(void) {
diff --git a/test/LLVMC/MultipleOutputLanguages.td b/test/LLVMC/MultipleOutputLanguages.td
new file mode 100644
index 000000000000..ae0c92eefcf1
--- /dev/null
+++ b/test/LLVMC/MultipleOutputLanguages.td
@@ -0,0 +1,27 @@
+// Check that multiple output languages work.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def dummy_tool : Tool<[
+    (command "dummy_cmd"),
+    (in_language "dummy_lang"),
+    (out_language "another_dummy_lang", "yet_another_dummy_lang")
+]>;
+
+def another_dummy_tool : Tool<[
+    (command "another_dummy_cmd"),
+    (in_language "another_dummy_lang", "some_other_dummy_lang"),
+    (out_language "executable"),
+    (join)
+]>;
+
+// CHECK: new SimpleEdge("dummy_tool")
+// CHECK: new SimpleEdge("another_dummy_tool")
+def DummyGraph : CompilationGraph<[
+    (edge "root", "dummy_tool"),
+    (edge "dummy_tool", "another_dummy_tool")
+]>;
diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td
index 8019c42634f3..5fdc35a187eb 100644
--- a/test/LLVMC/OptionPreprocessor.td
+++ b/test/LLVMC/OptionPreprocessor.td
@@ -22,7 +22,7 @@ def Preprocess : OptionPreprocessor<
       // CHECK: foo = false;
       // CHECK: foo_p = "";
       // CHECK: foo_l.clear();
-      (and (switch_on "foo"), (any_switch_on ["bar", "baz"])),
+      (and (switch_on "foo"), (any_switch_on "bar", "baz")),
            [(warning "W1"), (unset_option "foo"),
                             (unset_option "foo_p"), (unset_option "foo_l")],
       // CHECK: W2
@@ -34,7 +34,7 @@ def Preprocess : OptionPreprocessor<
       // CHECK: foo_l.push_back("qwert");
       // CHECK: foo_l.push_back("yuiop");
       // CHECK: foo_l.push_back("asdf");
-      (and (switch_on ["foo", "bar"]), (any_empty ["foo_p", "bar_p"])),
+      (and (switch_on "foo", "bar"), (any_empty "foo_p", "bar_p")),
            [(warning "W2"), (set_option "foo"),
                             (set_option "bar", true),
                             (set_option "baz", false),
@@ -44,8 +44,8 @@ def Preprocess : OptionPreprocessor<
       // CHECK: foo = true;
       // CHECK: bar = true;
       // CHECK: baz = true;
-      (and (empty ["foo_p", "bar_p"]), (any_not_empty ["baz_p"])),
-           [(warning "W3"), (set_option ["foo", "bar", "baz"])])
+      (and (empty "foo_p", "bar_p"), (any_not_empty "baz_p")),
+           [(warning "W3"), (set_option "foo", "bar", "baz")])
 >;
 
 // Shut up warnings...
diff --git a/test/Linker/PR8300.ll b/test/Linker/PR8300.ll
new file mode 100644
index 000000000000..f0fc1e7a5cc4
--- /dev/null
+++ b/test/Linker/PR8300.ll
@@ -0,0 +1,13 @@
+; RUN: echo {%foo2 = type \{ \[8 x i8\] \} \
+; RUN:       declare void @zed(%foo2*) } > %t.ll
+; RUN: llvm-link %t.ll %s -o %t.bc
+
+%foo = type { [8 x i8] }
+%bar = type { [9 x i8] }
+
+@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
+
+define void @xyz(%bar* %this) {
+entry:
+  ret void
+}
diff --git a/test/Linker/available_externally_a.ll b/test/Linker/available_externally_a.ll
new file mode 100644
index 000000000000..3ae4ce29140a
--- /dev/null
+++ b/test/Linker/available_externally_a.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-link %s %p/available_externally_b.ll -S -o - | FileCheck %s
+
+@foo = available_externally unnamed_addr constant i32 0
+
+; CHECK: @foo = hidden unnamed_addr constant i32 0
diff --git a/test/Linker/available_externally_b.ll b/test/Linker/available_externally_b.ll
new file mode 100644
index 000000000000..526981715a6e
--- /dev/null
+++ b/test/Linker/available_externally_b.ll
@@ -0,0 +1,4 @@
+; This file is for use with available_externally_a.ll
+; RUN: true
+
+@foo = hidden unnamed_addr constant i32 0
diff --git a/test/Linker/link-archive.ll b/test/Linker/link-archive.ll
index 6696fcc68c37..9251b4e597ff 100644
--- a/test/Linker/link-archive.ll
+++ b/test/Linker/link-archive.ll
@@ -1,5 +1,6 @@
 ; Test linking of a bc file to an archive via llvm-ld. 
 ; PR1434
+; RUN: rm -f %t.bar.a %t.foo.a
 ; RUN: llvm-as %s -o %t.bar.bc
 ; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
 ; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
diff --git a/test/Linker/linkmdnode.ll b/test/Linker/linkmdnode.ll
index be7455056cd9..5f1158039fce 100644
--- a/test/Linker/linkmdnode.ll
+++ b/test/Linker/linkmdnode.ll
@@ -10,3 +10,4 @@ define void @foo() {
   %x = call i8 @llvm.something(metadata !21)
   ret void
 }
+
diff --git a/test/Linker/linkmdnode2.ll b/test/Linker/linkmdnode2.ll
index 54a5a578b60b..a7d991a8a4f8 100644
--- a/test/Linker/linkmdnode2.ll
+++ b/test/Linker/linkmdnode2.ll
@@ -10,3 +10,13 @@ define void @foo1() {
   %x = call i8 @llvm.something(metadata !22)
   ret void
 }
+
+
+
+; PR9015
+define void @test() {
+  ret void, !abc !0
+}
+
+!0 = metadata !{metadata !0, i32 42 }
+
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
new file mode 100644
index 000000000000..1ddac9ccc028
--- /dev/null
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-link %s %p/unnamed-addr1-b.ll -S -o - | sort | FileCheck %s
+
+; Only in this file
+@a = common global i32 0
+; CHECK: @a = common global i32 0
+@b = common unnamed_addr global i32 0
+; CHECK: @b = common unnamed_addr global i32 0
+
+; Other file has unnamed_addr definition
+@c = common unnamed_addr global i32 0
+; CHECK: @c = common unnamed_addr global i32 0
+@d = external global i32
+; CHECK: @d = global i32 42
+@e = external unnamed_addr global i32
+; CHECK: @e = unnamed_addr global i32 42
+@f = weak global i32 42
+; CHECK: @f = global i32 42
+
+; Other file has non-unnamed_addr definition
+@g = common unnamed_addr global i32 0
+; CHECK: @g = common global i32 0
+@h = external global i32
+; CHECK: @h = global i32 42
+@i = external unnamed_addr global i32
+; CHECK: @i = global i32 42
+@j = weak global i32 42
+; CHECK: @j = global i32 42
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
new file mode 100644
index 000000000000..7d94dc1928c4
--- /dev/null
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -0,0 +1,12 @@
+; This file is for use with unnamed-addr1-a.ll
+; RUN: true
+
+@c = common unnamed_addr global i32 42
+@d = unnamed_addr global i32 42
+@e = unnamed_addr global i32 42
+@f = unnamed_addr global i32 42
+
+@g = common global i32 42
+@h = global i32 42
+@i = global i32 42
+@j = global i32 42
diff --git a/test/MC/ARM/arm_fixups.s b/test/MC/ARM/arm_fixups.s
new file mode 100644
index 000000000000..0dceb83c24ab
--- /dev/null
+++ b/test/MC/ARM/arm_fixups.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple arm-unknown-unknown %s --show-encoding > %t
+// RUN: FileCheck < %t %s
+
+// CHECK: bl _printf @ encoding: [A,A,A,0xeb]
+// CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbranch
+bl _printf
+        
\ No newline at end of file
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
new file mode 100644
index 000000000000..fbec7891c801
--- /dev/null
+++ b/test/MC/ARM/arm_instructions.s
@@ -0,0 +1,284 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s | FileCheck %s
+
+@ CHECK: nop
+@ CHECK: encoding: [0x00,0xf0,0x20,0xe3]
+        nop
+
+@ CHECK: nopeq
+@ CHECK: encoding: [0x00,0xf0,0x20,0x03]
+        nopeq
+
+@ CHECK: trap
+@ CHECK: encoding: [0xfe,0xde,0xff,0xe7]
+        trap
+
+@ CHECK: bx	lr
+@ CHECK: encoding: [0x1e,0xff,0x2f,0xe1]
+        bx lr
+
+@ CHECK: vqdmull.s32	q8, d17, d16
+@ CHECK: encoding: [0xa0,0x0d,0xe1,0xf2]
+        vqdmull.s32     q8, d17, d16
+
+@ CHECK: ldmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
+@ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
+@ CHECK: ldmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe8]
+@ CHECK: ldmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe9]
+        ldmia     r2, {r1,r3-r6,sp}
+        ldmib     r2, {r1,r3-r6,sp}
+        ldmda     r2, {r1,r3-r6,sp}
+        ldmdb     r2, {r1,r3-r6,sp}
+
+@ CHECK: stmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8]
+@ CHECK: stmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe9]
+@ CHECK: stmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe8]
+@ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9]
+        stmia     r2, {r1,r3-r6,sp}
+        stmib     r2, {r1,r3-r6,sp}
+        stmda     r2, {r1,r3-r6,sp}
+        stmdb     r2, {r1,r3-r6,sp}
+
+@ CHECK: ldmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe8]
+@ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
+@ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
+@ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
+        ldmia     r2!, {r1,r3-r6,sp}
+        ldmib     r2!, {r1,r3-r6,sp}
+        ldmda     r2!, {r1,r3-r6,sp}
+        ldmdb     r2!, {r1,r3-r6,sp}
+
+@ CHECK: stmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe8]
+@ CHECK: stmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe9]
+@ CHECK: stmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe8]
+@ CHECK: stmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe9]
+        stmia     r2!, {r1,r3-r6,sp}
+        stmib     r2!, {r1,r3-r6,sp}
+        stmda     r2!, {r1,r3-r6,sp}
+        stmdb     r2!, {r1,r3-r6,sp}
+
+@ CHECK: and	r1, r2, r3 @ encoding: [0x03,0x10,0x02,0xe0]
+        and r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: ands	r1, r2, r3 @ encoding: [0x03,0x10,0x12,0xe0]
+        ands r1,r2,r3
+
+@ CHECK: eor	r1, r2, r3 @ encoding: [0x03,0x10,0x22,0xe0]
+        eor r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: eors	r1, r2, r3 @ encoding: [0x03,0x10,0x32,0xe0]
+        eors r1,r2,r3
+
+@ CHECK: sub	r1, r2, r3 @ encoding: [0x03,0x10,0x42,0xe0]
+        sub r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: subs	r1, r2, r3 @ encoding: [0x03,0x10,0x52,0xe0]
+        subs r1,r2,r3
+
+@ CHECK: add	r1, r2, r3 @ encoding: [0x03,0x10,0x82,0xe0]
+        add r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: adds	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe0]
+        adds r1,r2,r3
+
+@ CHECK: adc	r1, r2, r3 @ encoding: [0x03,0x10,0xa2,0xe0]
+        adc r1,r2,r3
+
+@ CHECK: sbc	r1, r2, r3 @ encoding: [0x03,0x10,0xc2,0xe0]
+        sbc r1,r2,r3
+
+@ CHECK: orr	r1, r2, r3 @ encoding: [0x03,0x10,0x82,0xe1]
+        orr r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: orrs	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe1]
+        orrs r1,r2,r3
+
+@ CHECK: bic	r1, r2, r3 @ encoding: [0x03,0x10,0xc2,0xe1]
+        bic r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: bics	r1, r2, r3 @ encoding: [0x03,0x10,0xd2,0xe1]
+        bics r1,r2,r3
+
+@ CHECK: mov	r1, r2 @ encoding: [0x02,0x10,0xa0,0xe1]
+        mov r1,r2
+
+@ CHECK: mvn	r1, r2 @ encoding: [0x02,0x10,0xe0,0xe1]
+        mvn r1,r2
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: mvns	r1, r2 @ encoding: [0x02,0x10,0xf0,0xe1]
+        mvns r1,r2
+
+@ CHECK: rsb	r1, r2, r3 @ encoding: [0x03,0x10,0x62,0xe0]
+        rsb r1,r2,r3
+
+@ CHECK: rsc	r1, r2, r3 @ encoding: [0x03,0x10,0xe2,0xe0]
+        rsc r1,r2,r3
+
+@ FIXME: This is broken, CCOut operands don't work correctly when their presence
+@ may depend on flags.
+@ CHECK-FIXME:: mlas	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x31,0xe0]
+@        mlas r1,r2,r3,r4
+
+@ CHECK: bfi  r0, r0, #5, #7 @ encoding: [0x90,0x02,0xcb,0xe7]
+        bfi  r0, r0, #5, #7
+
+@ CHECK: bkpt  #10 @ encoding: [0x7a,0x00,0x20,0xe1]
+        bkpt  #10
+
+@ CHECK: isb @ encoding: [0x6f,0xf0,0x7f,0xf5]
+        isb
+@ CHECK: mrs  r8, cpsr @ encoding: [0x00,0x80,0x0f,0xe1]
+        mrs  r8, cpsr
+
+@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xee]
+        mcr  p7, #1, r5, c1, c1, #4
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xee]
+        mrc  p14, #0, r1, c1, c2, #4
+@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x44,0xec]
+        mcrr  p7, #1, r5, r4, c1
+@ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x54,0xec]
+        mrrc  p7, #1, r5, r4, c1
+
+@ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xfe]
+        mcr2  p7, #1, r5, c1, c1, #4
+@ CHECK: mrc2  p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xfe]
+        mrc2  p14, #0, r1, c1, c2, #4
+@ CHECK: mcrr2  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x44,0xfc]
+        mcrr2  p7, #1, r5, r4, c1
+@ CHECK: mrrc2  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x54,0xfc]
+        mrrc2  p7, #1, r5, r4, c1
+
+@ CHECK: cdp  p7, #1, c1, c1, c1, #4 @ encoding: [0x81,0x17,0x11,0xee]
+        cdp  p7, #1, c1, c1, c1, #4
+@ CHECK: cdp2  p7, #1, c1, c1, c1, #4 @ encoding: [0x81,0x17,0x11,0xfe]
+        cdp2  p7, #1, c1, c1, c1, #4
+
+@ CHECK: clrex @ encoding: [0x1f,0xf0,0x7f,0xf5]
+        clrex
+
+@ CHECK: clz  r9, r0 @ encoding: [0x10,0x9f,0x6f,0xe1]
+        clz  r9, r0
+
+@ CHECK: qadd  r1, r2, r3 @ encoding: [0x52,0x10,0x03,0xe1]
+        qadd  r1, r2, r3
+
+@ CHECK: qsub  r1, r2, r3 @ encoding: [0x52,0x10,0x23,0xe1]
+        qsub  r1, r2, r3
+
+@ CHECK: qdadd  r1, r2, r3 @ encoding: [0x52,0x10,0x43,0xe1]
+        qdadd  r1, r2, r3
+
+@ CHECK: qdsub  r1, r2, r3 @ encoding: [0x52,0x10,0x63,0xe1]
+        qdsub  r1, r2, r3
+
+@ CHECK: wfe @ encoding: [0x02,0xf0,0x20,0xe3]
+        wfe
+
+@ CHECK: wfi @ encoding: [0x03,0xf0,0x20,0xe3]
+        wfi
+
+@ CHECK: yield @ encoding: [0x01,0xf0,0x20,0xe3]
+        yield
+
+@ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3]
+        nop
+
+@ CHECK: dmb  sy @ encoding: [0x5f,0xf0,0x7f,0xf5]
+        dmb  sy
+
+@ CHECK: dmb  st @ encoding: [0x5e,0xf0,0x7f,0xf5]
+        dmb  st
+
+@ CHECK: dmb  ish @ encoding: [0x5b,0xf0,0x7f,0xf5]
+        dmb  ish
+
+@ CHECK: dmb  ishst @ encoding: [0x5a,0xf0,0x7f,0xf5]
+        dmb  ishst
+
+@ CHECK: dmb  nsh @ encoding: [0x57,0xf0,0x7f,0xf5]
+        dmb  nsh
+
+@ CHECK: dmb  nshst @ encoding: [0x56,0xf0,0x7f,0xf5]
+        dmb  nshst
+
+@ CHECK: dmb  osh @ encoding: [0x53,0xf0,0x7f,0xf5]
+        dmb  osh
+
+@ CHECK: dmb  oshst @ encoding: [0x52,0xf0,0x7f,0xf5]
+        dmb  oshst
+
+@ CHECK: dsb  sy @ encoding: [0x4f,0xf0,0x7f,0xf5]
+        dsb  sy
+
+@ CHECK: dsb  st @ encoding: [0x4e,0xf0,0x7f,0xf5]
+        dsb  st
+
+@ CHECK: dsb  ish @ encoding: [0x4b,0xf0,0x7f,0xf5]
+        dsb  ish
+
+@ CHECK: dsb  ishst @ encoding: [0x4a,0xf0,0x7f,0xf5]
+        dsb  ishst
+
+@ CHECK: dsb  nsh @ encoding: [0x47,0xf0,0x7f,0xf5]
+        dsb  nsh
+
+@ CHECK: dsb  nshst @ encoding: [0x46,0xf0,0x7f,0xf5]
+        dsb  nshst
+
+@ CHECK: dsb  osh @ encoding: [0x43,0xf0,0x7f,0xf5]
+        dsb  osh
+
+@ CHECK: dsb  oshst @ encoding: [0x42,0xf0,0x7f,0xf5]
+        dsb  oshst
+
+@ CHECK: cpsie  aif @ encoding: [0xc0,0x01,0x08,0xf1]
+        cpsie  aif
+
+@ CHECK: cps  #15 @ encoding: [0x0f,0x00,0x02,0xf1]
+        cps  #15
+
+@ CHECK: cpsie  if, #10 @ encoding: [0xca,0x00,0x0a,0xf1]
+        cpsie  if, #10
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  apsr, r0
+
+@ CHECK: msr  cpsr_s, r0 @ encoding: [0x00,0xf0,0x24,0xe1]
+        msr  apsr_g, r0
+
+@ CHECK: msr  cpsr_f, r0 @ encoding: [0x00,0xf0,0x28,0xe1]
+        msr  apsr_nzcvq, r0
+
+@ CHECK: msr  cpsr_fs, r0 @ encoding: [0x00,0xf0,0x2c,0xe1]
+        msr  apsr_nzcvqg, r0
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  cpsr_fc, r0
+
+@ CHECK: msr  cpsr_c, r0 @ encoding: [0x00,0xf0,0x21,0xe1]
+        msr  cpsr_c, r0
+
+@ CHECK: msr  cpsr_x, r0 @ encoding: [0x00,0xf0,0x22,0xe1]
+        msr  cpsr_x, r0
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  cpsr_fc, r0
+
+@ CHECK: msr  cpsr_fsx, r0 @ encoding: [0x00,0xf0,0x2e,0xe1]
+        msr  cpsr_fsx, r0
+
+@ CHECK: msr  spsr_fc, r0 @ encoding: [0x00,0xf0,0x69,0xe1]
+        msr  spsr_fc, r0
+
+@ CHECK: msr  spsr_fsxc, r0 @ encoding: [0x00,0xf0,0x6f,0xe1]
+        msr  spsr_fsxc, r0
+
+@ CHECK: msr  cpsr_fsxc, r0 @ encoding: [0x00,0xf0,0x2f,0xe1]
+        msr  cpsr_fsxc, r0
+
diff --git a/test/MC/ARM/arm_word_directive.s b/test/MC/ARM/arm_word_directive.s
new file mode 100644
index 000000000000..e782479b6081
--- /dev/null
+++ b/test/MC/ARM/arm_word_directive.s
@@ -0,0 +1,6 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown %s | FileCheck %s
+
+@ CHECK: TEST0:
+@ CHECK: .long 3
+TEST0:  
+        .word 3
diff --git a/test/MC/ARM/dg.exp b/test/MC/ARM/dg.exp
new file mode 100644
index 000000000000..055fa2507d3c
--- /dev/null
+++ b/test/MC/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/MC/ARM/elf-eflags-eabi.s b/test/MC/ARM/elf-eflags-eabi.s
new file mode 100644
index 000000000000..ea89eacf74fb
--- /dev/null
+++ b/test/MC/ARM/elf-eflags-eabi.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
+@ RUN:    elf-dump --dump-section-data  | FileCheck -check-prefix=OBJ %s
+	.syntax unified
+	.text
+	.globl	barf
+	.align	2
+	.type	barf,%function
+barf:                                   @ @barf
+@ BB#0:                                 @ %entry
+        b foo
+
+@@@ make sure the EF_ARM_EABIMASK comes out OK
+@OBJ:    'e_flags', 0x05000000
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
new file mode 100644
index 000000000000..0fe7c50a3133
--- /dev/null
+++ b/test/MC/ARM/elf-movt.s
@@ -0,0 +1,39 @@
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck -check-prefix=ASM %s
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
+@ RUN:    elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+	.syntax unified
+	.text
+	.globl	barf
+	.align	2
+	.type	barf,%function
+barf:                                   @ @barf
+@ BB#0:                                 @ %entry
+	movw	r0, :lower16:GOT-(.LPC0_2+8)
+	movt	r0, :upper16:GOT-(.LPC0_2+16)
+.LPC0_2:
+@ ASM:          movw    r0, :lower16:(GOT-(.LPC0_2+8))
+@ ASM-NEXT:     movt    r0, :upper16:(GOT-(.LPC0_2+16))
+
+@@ make sure that the text section fixups are sane too
+@ OBJ:                 '.text'
+@ OBJ-NEXT:            'sh_type', 0x00000001
+@ OBJ-NEXT:            'sh_flags', 0x00000006
+@ OBJ-NEXT:            'sh_addr', 0x00000000
+@ OBJ-NEXT:            'sh_offset', 0x00000034
+@ OBJ-NEXT:            'sh_size', 0x00000008
+@ OBJ-NEXT:            'sh_link', 0x00000000
+@ OBJ-NEXT:            'sh_info', 0x00000000
+@ OBJ-NEXT:            'sh_addralign', 0x00000004
+@ OBJ-NEXT:            'sh_entsize', 0x00000000
+@ OBJ-NEXT:            '_section_data', 'f00f0fe3 ff0f4fe3'
+
+@ OBJ:              Relocation 0x00000000
+@ OBJ-NEXT:         'r_offset', 0x00000000
+@ OBJ-NEXT:         'r_sym'
+@ OBJ-NEXT:         'r_type', 0x0000002d
+
+@ OBJ:              Relocation 0x00000001
+@ OBJ-NEXT:         'r_offset', 0x00000004
+@ OBJ-NEXT:         'r_sym'
+@ OBJ-NEXT:         'r_type', 0x0000002e
+
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
new file mode 100644
index 000000000000..6b83c95032cd
--- /dev/null
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -0,0 +1,71 @@
+;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
+;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
+;; RUN:    -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+
+;; FIXME: This file needs to be in .s form!
+;; The args to llc are there to constrain the codegen only.
+;; 
+;; Ensure no regression on ARM/gcc compatibility for 
+;; emitting explicit symbol relocs for nonexternal symbols 
+;; versus section symbol relocs (with offset) - 
+;;
+;; Default llvm behavior is to emit as section symbol relocs nearly
+;; everything that is not an undefined external. Unfortunately, this 
+;; diverges from what codesourcery ARM/gcc does!
+;;
+;; Tests that reloc to _MergedGlobals show up as explicit symbol reloc
+
+
+target triple = "armv7-none-linux-gnueabi"
+
+@var_tls = thread_local global i32 1
+@var_tls_double = thread_local global double 1.000000e+00
+@var_static = internal global i32 1
+@var_static_double = internal global double 1.000000e+00
+@var_global = global i32 1
+@var_global_double = global double 1.000000e+00
+
+declare i32 @mystrlen(i8* nocapture %s) nounwind  
+
+declare void @myhextochar(i32 %n, i8* nocapture %buffer)
+
+declare void @__aeabi_read_tp() nounwind 
+
+declare void @__nacl_read_tp() nounwind  
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  switch i32 %argc, label %bb3 [
+    i32 555, label %bb
+    i32 6666, label %bb2
+  ]
+
+bb:                                               ; preds = %entry
+  volatile store i32 11, i32* @var_tls, align 4
+  volatile store double 2.200000e+01, double* @var_tls_double, align 8
+  volatile store i32 33, i32* @var_static, align 4
+  volatile store double 4.400000e+01, double* @var_static_double, align 8
+  volatile store i32 55, i32* @var_global, align 4
+  volatile store double 6.600000e+01, double* @var_global_double, align 8
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  ret i32 add (i32 add (i32 add (i32 ptrtoint (i32* @var_tls to i32), i32 add (i32 ptrtoint (i32* @var_static to i32), i32 ptrtoint (i32* @var_global to i32))), i32 ptrtoint (double* @var_tls_double to i32)), i32 add (i32 ptrtoint (double* @var_static_double to i32), i32 ptrtoint (double* @var_global_double to i32)))
+
+bb3:                                              ; preds = %bb, %entry
+  tail call void @exit(i32 55) noreturn nounwind
+  unreachable
+}
+
+declare void @exit(i32) noreturn nounwind
+
+
+;; OBJ:         Symbol 0x00000002
+;; OBJ-NEXT:    '_MergedGlobals'
+;; OBJ-NEXT:    'st_value', 0x00000010
+
+;; OBJ:          Relocation 0x00000001
+;; OBJ-NEXT:     'r_offset', 
+;; OBJ-NEXT:     'r_sym', 0x00000002
+;; OBJ-NEXT:     'r_type', 0x0000002b
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
new file mode 100644
index 000000000000..132a47758dad
--- /dev/null
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -0,0 +1,51 @@
+;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
+;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
+;; RUN:    -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+
+;; FIXME: This file needs to be in .s form!
+;; The args to llc are there to constrain the codegen only.
+;; 
+;; Ensure no regression on ARM/gcc compatibility for 
+;; emitting explicit symbol relocs for nonexternal symbols 
+;; versus section symbol relocs (with offset) - 
+;;
+;; Default llvm behavior is to emit as section symbol relocs nearly
+;; everything that is not an undefined external. Unfortunately, this 
+;; diverges from what codesourcery ARM/gcc does!
+;;
+;; Tests that reloc to .L.str* show up as explicit symbols
+
+target triple = "armv7-none-linux-gnueabi"
+
+@.str = private constant [7 x i8] c"@null\0A\00", align 4
+@.str1 = private constant [8 x i8] c"@write\0A\00", align 4
+@.str2 = private constant [13 x i8] c"hello worldn\00", align 4
+@.str3 = private constant [7 x i8] c"@exit\0A\00", align 4
+
+declare i32 @mystrlen(i8* nocapture %s) nounwind readonly 
+
+declare void @myhextochar(i32 %n, i8* nocapture %buffer) nounwind 
+
+define i32 @main() nounwind {
+entry:
+  %0 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 6) nounwind
+  %1 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), i32 7) nounwind
+  %2 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([13 x i8]* @.str2, i32 0, i32 0), i32 12) nounwind
+  %3 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), i32 6) nounwind
+  tail call void @exit(i32 55) noreturn nounwind
+  unreachable
+}
+
+declare i32 @write(...)
+
+declare void @exit(i32) noreturn nounwind
+
+
+;; OBJ:          Symbol 0x00000002
+;; OBJ-NEXT:    '.L.str'
+
+;; OBJ:        Relocation 0x00000000
+;; OBJ-NEXT:    'r_offset', 
+;; OBJ-NEXT:    'r_sym', 0x00000002
+;; OBJ-NEXT:    'r_type', 0x0000002b
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
new file mode 100644
index 000000000000..e052f39a615a
--- /dev/null
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -0,0 +1,98 @@
+;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
+;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
+;; RUN:    -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+
+;; FIXME: This file needs to be in .s form!
+;; The args to llc are there to constrain the codegen only.
+;; 
+;; Ensure no regression on ARM/gcc compatibility for 
+;; emitting explicit symbol relocs for nonexternal symbols 
+;; versus section symbol relocs (with offset) - 
+;;
+;; Default llvm behavior is to emit as section symbol relocs nearly
+;; everything that is not an undefined external. Unfortunately, this 
+;; diverges from what codesourcery ARM/gcc does!
+;;
+;; Verifies that internal constants appear as explict symbol relocs
+
+
+target triple = "armv7-none-linux-gnueabi"
+
+@startval = global i32 5
+@vtable = internal constant [10 x i32 (...)*] [i32 (...)* bitcast (i32 ()* @foo0 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo1 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo2 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo3 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo4 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo5 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo6 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo7 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo8 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo9 to i32 (...)*)]
+
+declare i32 @mystrlen(i8* nocapture %s) nounwind readonly 
+
+declare void @myhextochar(i32 %n, i8* nocapture %buffer) nounwind 
+
+define internal i32 @foo0() nounwind readnone {
+entry:
+  ret i32 0
+}
+
+define internal i32 @foo1() nounwind readnone {
+entry:
+  ret i32 1
+}
+
+define internal i32 @foo2() nounwind readnone {
+entry:
+  ret i32 2
+}
+
+define internal i32 @foo3() nounwind readnone {
+entry:
+  ret i32 3
+}
+
+define internal i32 @foo4() nounwind readnone {
+entry:
+  ret i32 4
+}
+
+define internal i32 @foo5() nounwind readnone {
+entry:
+  ret i32 55
+}
+
+define internal i32 @foo6() nounwind readnone {
+entry:
+  ret i32 6
+}
+
+define internal i32 @foo7() nounwind readnone {
+entry:
+  ret i32 7
+}
+
+define internal i32 @foo8() nounwind readnone {
+entry:
+  ret i32 8
+}
+
+define internal i32 @foo9() nounwind readnone {
+entry:
+  ret i32 9
+}
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @startval, align 4
+  %1 = getelementptr inbounds [10 x i32 (...)*]* @vtable, i32 0, i32 %0
+  %2 = load i32 (...)** %1, align 4
+  %3 = tail call i32 (...)* %2() nounwind
+  tail call void @exit(i32 %3) noreturn nounwind
+  unreachable
+}
+
+declare void @exit(i32) noreturn nounwind
+
+
+;; OBJ:      Symbol 0x0000000c
+;; OBJ-NEXT:    'vtable'
+
+;; OBJ:           Relocation 0x00000001
+;; OBJ-NEXT:     'r_offset', 
+;; OBJ-NEXT:     'r_sym', 0x0000000c
+;; OBJ-NEXT:     'r_type', 0x0000002b
diff --git a/test/MC/ARM/hilo-16bit-relocations.s b/test/MC/ARM/hilo-16bit-relocations.s
new file mode 100644
index 000000000000..7d6b4988dffd
--- /dev/null
+++ b/test/MC/ARM/hilo-16bit-relocations.s
@@ -0,0 +1,20 @@
+@ RUN: llvm-mc %s -triple armv7-apple-darwin | FileCheck %s
+@ RUN: llvm-mc %s -triple armv7-apple-darwin | FileCheck %s        
+        
+_t:
+        movw    r0, :lower16:(L_foo$non_lazy_ptr - (L1 + 8))
+        movt    r0, :upper16:(L_foo$non_lazy_ptr - (L1 + 8))
+L1:
+
+@ CHECK: movw	r0, :lower16:(L_foo$non_lazy_ptr-(L1+8))
+@ CHECK: movt	r0, :upper16:(L_foo$non_lazy_ptr-(L1+8))
+        
+        .comm	_foo,4,2
+
+	.section	__DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+	.align	2
+L_foo$non_lazy_ptr:
+	.indirect_symbol	_foo
+	.long	0
+        
+.subsections_via_symbols
diff --git a/test/MC/ARM/neon-abs-encoding.s b/test/MC/ARM/neon-abs-encoding.s
new file mode 100644
index 000000000000..398f2db039df
--- /dev/null
+++ b/test/MC/ARM/neon-abs-encoding.s
@@ -0,0 +1,31 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vabs.s8	d16, d16                @ encoding: [0x20,0x03,0xf1,0xf3]
+	vabs.s8	d16, d16
+@ CHECK: vabs.s16	d16, d16        @ encoding: [0x20,0x03,0xf5,0xf3]
+	vabs.s16	d16, d16
+@ CHECK: vabs.s32	d16, d16        @ encoding: [0x20,0x03,0xf9,0xf3]
+	vabs.s32	d16, d16
+@ CHECK: vabs.f32	d16, d16        @ encoding: [0x20,0x07,0xf9,0xf3]
+	vabs.f32	d16, d16
+@ CHECK: vabs.s8	q8, q8                  @ encoding: [0x60,0x03,0xf1,0xf3]
+	vabs.s8	q8, q8
+@ CHECK: vabs.s16	q8, q8          @ encoding: [0x60,0x03,0xf5,0xf3]
+	vabs.s16	q8, q8
+@ CHECK: vabs.s32	q8, q8          @ encoding: [0x60,0x03,0xf9,0xf3]
+	vabs.s32	q8, q8
+@ CHECK: vabs.f32	q8, q8          @ encoding: [0x60,0x07,0xf9,0xf3]
+	vabs.f32	q8, q8
+
+@ CHECK: vqabs.s8	d16, d16        @ encoding: [0x20,0x07,0xf0,0xf3]
+	vqabs.s8	d16, d16
+@ CHECK: vqabs.s16	d16, d16        @ encoding: [0x20,0x07,0xf4,0xf3]
+	vqabs.s16	d16, d16
+@ CHECK: vqabs.s32	d16, d16        @ encoding: [0x20,0x07,0xf8,0xf3]
+	vqabs.s32	d16, d16
+@ CHECK: vqabs.s8	q8, q8          @ encoding: [0x60,0x07,0xf0,0xf3]
+	vqabs.s8	q8, q8
+@ CHECK: vqabs.s16	q8, q8          @ encoding: [0x60,0x07,0xf4,0xf3]
+	vqabs.s16	q8, q8
+@ CHECK: vqabs.s32	q8, q8          @ encoding: [0x60,0x07,0xf8,0xf3]
+	vqabs.s32	q8, q8
diff --git a/test/MC/ARM/neon-absdiff-encoding.s b/test/MC/ARM/neon-absdiff-encoding.s
new file mode 100644
index 000000000000..f43ea6582a28
--- /dev/null
+++ b/test/MC/ARM/neon-absdiff-encoding.s
@@ -0,0 +1,82 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vabd.s8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xf2]
+	vabd.s8	d16, d16, d17
+@ CHECK: vabd.s16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xf2]
+	vabd.s16	d16, d16, d17
+@ CHECK: vabd.s32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xf2]
+	vabd.s32	d16, d16, d17
+@ CHECK: vabd.u8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xf3]
+	vabd.u8	d16, d16, d17
+@ CHECK: vabd.u16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xf3]
+	vabd.u16	d16, d16, d17
+  @ CHECK: vabd.u32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xf3]
+	vabd.u32	d16, d16, d17
+@ CHECK: vabd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf3]
+	vabd.f32	d16, d16, d17
+@ CHECK: vabd.s8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xf2]
+	vabd.s8	q8, q8, q9
+@ CHECK: vabd.s16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xf2]
+	vabd.s16	q8, q8, q9
+@ CHECK: vabd.s32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xf2]
+	vabd.s32	q8, q8, q9
+@ CHECK: vabd.u8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xf3]
+	vabd.u8	q8, q8, q9
+@ CHECK: vabd.u16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xf3]
+	vabd.u16	q8, q8, q9
+@ CHECK: vabd.u32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xf3]
+	vabd.u32	q8, q8, q9
+@ CHECK: vabd.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf3]
+	vabd.f32	q8, q8, q9
+
+@ CHECK: vabdl.s8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xf2]
+	vabdl.s8	q8, d16, d17
+@ CHECK: vabdl.s16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xf2]
+	vabdl.s16	q8, d16, d17
+@ CHECK: vabdl.s32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xf2]
+	vabdl.s32	q8, d16, d17
+@ CHECK: vabdl.u8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xf3]
+	vabdl.u8	q8, d16, d17
+@ CHECK: vabdl.u16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xf3]
+	vabdl.u16	q8, d16, d17
+@ CHECK: vabdl.u32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xf3]
+	vabdl.u32	q8, d16, d17
+
+@ CHECK: vaba.s8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xf2]
+	vaba.s8	d16, d18, d17
+@ CHECK: vaba.s16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xf2]
+	vaba.s16	d16, d18, d17
+@ CHECK: vaba.s32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xf2]
+	vaba.s32	d16, d18, d17
+@ CHECK: vaba.u8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xf3]
+	vaba.u8	d16, d18, d17
+@ CHECK: vaba.u16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xf3]
+	vaba.u16	d16, d18, d17
+@ CHECK: vaba.u32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xf3]
+	vaba.u32	d16, d18, d17
+@ CHECK: vaba.s8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xf2]
+	vaba.s8	q9, q8, q10
+@ CHECK: vaba.s16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xf2]
+	vaba.s16	q9, q8, q10
+@ CHECK: vaba.s32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xf2]
+	vaba.s32	q9, q8, q10
+@ CHECK: vaba.u8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xf3]
+	vaba.u8	q9, q8, q10
+@ CHECK: vaba.u16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xf3]
+	vaba.u16	q9, q8, q10
+@ CHECK: vaba.u32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xf3]
+	vaba.u32	q9, q8, q10
+
+@ CHECK: vabal.s8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xf2]
+	vabal.s8	q8, d19, d18
+@ CHECK: vabal.s16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xf2]
+	vabal.s16	q8, d19, d18
+@ CHECK: vabal.s32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xf2]
+	vabal.s32	q8, d19, d18
+@ CHECK: vabal.u8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xf3]
+	vabal.u8	q8, d19, d18
+@ CHECK: 	vabal.u16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xf3]
+	vabal.u16	q8, d19, d18
+@ CHECK: vabal.u32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xf3]
+	vabal.u32	q8, d19, d18
+
diff --git a/test/MC/ARM/neon-add-encoding.s b/test/MC/ARM/neon-add-encoding.s
new file mode 100644
index 000000000000..e425397b7901
--- /dev/null
+++ b/test/MC/ARM/neon-add-encoding.s
@@ -0,0 +1,137 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+
+
+@ CHECK: vadd.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf2]
+	vadd.i8	d16, d17, d16
+@ CHECK: vadd.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf2]
+	vadd.i16	d16, d17, d16
+@ CHECK: vadd.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf2]
+	vadd.i64	d16, d17, d16
+@ CHECK: vadd.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf2]
+	vadd.i32	d16, d17, d16
+@ CHECK: vadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf2]
+	vadd.f32	d16, d16, d17
+@ CHECK: vadd.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x40,0xf2]
+	vadd.f32	q8, q8, q9
+
+@ CHECK: vaddl.s8	q8, d17, d16    @ encoding: [0xa0,0x00,0xc1,0xf2]
+	vaddl.s8	q8, d17, d16
+@ CHECK: vaddl.s16	q8, d17, d16    @ encoding: [0xa0,0x00,0xd1,0xf2]
+	vaddl.s16	q8, d17, d16
+@ CHECK: vaddl.s32	q8, d17, d16    @ encoding: [0xa0,0x00,0xe1,0xf2]
+	vaddl.s32	q8, d17, d16
+@ CHECK: vaddl.u8	q8, d17, d16    @ encoding: [0xa0,0x00,0xc1,0xf3]
+	vaddl.u8	q8, d17, d16
+@ CHECK: vaddl.u16	q8, d17, d16    @ encoding: [0xa0,0x00,0xd1,0xf3]
+	vaddl.u16	q8, d17, d16
+@ CHECK: vaddl.u32	q8, d17, d16    @ encoding: [0xa0,0x00,0xe1,0xf3]
+	vaddl.u32	q8, d17, d16
+
+@ CHECK: vaddw.s8	q8, q8, d18     @ encoding: [0xa2,0x01,0xc0,0xf2]
+	vaddw.s8	q8, q8, d18
+@ CHECK: vaddw.s16	q8, q8, d18     @ encoding: [0xa2,0x01,0xd0,0xf2]
+	vaddw.s16	q8, q8, d18
+@ CHECK: vaddw.s32	q8, q8, d18     @ encoding: [0xa2,0x01,0xe0,0xf2]
+	vaddw.s32	q8, q8, d18
+@ CHECK: vaddw.u8	q8, q8, d18     @ encoding: [0xa2,0x01,0xc0,0xf3]
+	vaddw.u8	q8, q8, d18
+@ CHECK: vaddw.u16	q8, q8, d18     @ encoding: [0xa2,0x01,0xd0,0xf3]
+	vaddw.u16	q8, q8, d18
+@ CHECK: vaddw.u32	q8, q8, d18     @ encoding: [0xa2,0x01,0xe0,0xf3]
+	vaddw.u32	q8, q8, d18
+
+@ CHECK: vhadd.s8	d16, d16, d17   @ encoding: [0xa1,0x00,0x40,0xf2]
+	vhadd.s8	d16, d16, d17
+@ CHECK: vhadd.s16	d16, d16, d17   @ encoding: [0xa1,0x00,0x50,0xf2]
+	vhadd.s16	d16, d16, d17
+@ CHECK: vhadd.s32	d16, d16, d17   @ encoding: [0xa1,0x00,0x60,0xf2]
+	vhadd.s32	d16, d16, d17
+@ CHECK: vhadd.u8	d16, d16, d17   @ encoding: [0xa1,0x00,0x40,0xf3]
+	vhadd.u8	d16, d16, d17
+@ CHECK: vhadd.u16	d16, d16, d17   @ encoding: [0xa1,0x00,0x50,0xf3]
+	vhadd.u16	d16, d16, d17
+@ CHECK: vhadd.u32	d16, d16, d17   @ encoding: [0xa1,0x00,0x60,0xf3]
+	vhadd.u32	d16, d16, d17
+@ CHECK: vhadd.s8	q8, q8, q9      @ encoding: [0xe2,0x00,0x40,0xf2]
+	vhadd.s8	q8, q8, q9
+@ CHECK: vhadd.s16	q8, q8, q9      @ encoding: [0xe2,0x00,0x50,0xf2]
+	vhadd.s16	q8, q8, q9
+@ CHECK: vhadd.s32	q8, q8, q9      @ encoding: [0xe2,0x00,0x60,0xf2]
+	vhadd.s32	q8, q8, q9
+  @ CHECK: vhadd.u8	q8, q8, q9      @ encoding: [0xe2,0x00,0x40,0xf3]
+	vhadd.u8	q8, q8, q9
+@ CHECK: vhadd.u16	q8, q8, q9      @ encoding: [0xe2,0x00,0x50,0xf3]
+	vhadd.u16	q8, q8, q9
+@ CHECK: vhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x00,0x60,0xf3]
+	vhadd.u32	q8, q8, q9
+	
+@ CHECK: vrhadd.s8	d16, d16, d17   @ encoding: [0xa1,0x01,0x40,0xf2]
+	vrhadd.s8	d16, d16, d17
+@ CHECK: vrhadd.s16	d16, d16, d17   @ encoding: [0xa1,0x01,0x50,0xf2]
+	vrhadd.s16	d16, d16, d17
+@ CHECK: vrhadd.s32	d16, d16, d17   @ encoding: [0xa1,0x01,0x60,0xf2]
+	vrhadd.s32	d16, d16, d17
+@ CHECK: vrhadd.u8	d16, d16, d17   @ encoding: [0xa1,0x01,0x40,0xf3]
+	vrhadd.u8	d16, d16, d17
+@ CHECK: vrhadd.u16	d16, d16, d17   @ encoding: [0xa1,0x01,0x50,0xf3]
+	vrhadd.u16	d16, d16, d17
+@ CHECK: vrhadd.u32	d16, d16, d17   @ encoding: [0xa1,0x01,0x60,0xf3]
+	vrhadd.u32	d16, d16, d17
+@ CHECK: vrhadd.s8	q8, q8, q9      @ encoding: [0xe2,0x01,0x40,0xf2]
+	vrhadd.s8	q8, q8, q9
+@ CHECK: vrhadd.s16	q8, q8, q9      @ encoding: [0xe2,0x01,0x50,0xf2]
+	vrhadd.s16	q8, q8, q9
+@ CHECK: vrhadd.s32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf2]
+	vrhadd.s32	q8, q8, q9
+@ CHECK: vrhadd.u8	q8, q8, q9      @ encoding: [0xe2,0x01,0x40,0xf3]
+	vrhadd.u8	q8, q8, q9
+@ CHECK: vrhadd.u16	q8, q8, q9      @ encoding: [0xe2,0x01,0x50,0xf3]
+	vrhadd.u16	q8, q8, q9
+@ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf3]
+	vrhadd.u32	q8, q8, q9
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+	vqadd.s8	d16, d16, d17
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+	vqadd.s16	d16, d16, d17
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+	vqadd.s32	d16, d16, d17
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+	vqadd.s64	d16, d16, d17
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+	vqadd.u8	d16, d16, d17
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+	vqadd.u16	d16, d16, d17
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+	vqadd.u32	d16, d16, d17
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+	vqadd.u64	d16, d16, d17
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+	vqadd.s8	q8, q8, q9
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+	vqadd.s16	q8, q8, q9
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+	vqadd.s32	q8, q8, q9
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+	vqadd.s64	q8, q8, q9
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+	vqadd.u8	q8, q8, q9
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+	vqadd.u16	q8, q8, q9
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+	vqadd.u32	q8, q8, q9
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+	vqadd.u64	q8, q8, q9
+
+@ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf2]
+	vaddhn.i16	d16, q8, q9
+@ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf2]
+	vaddhn.i32	d16, q8, q9
+@ CHECK: vaddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf2]
+	vaddhn.i64	d16, q8, q9
+@ CHECK: vraddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf3]
+	vraddhn.i16	d16, q8, q9
+@ CHECK: vraddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf3]
+	vraddhn.i32	d16, q8, q9
+@ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf3]
+	vraddhn.i64	d16, q8, q9
diff --git a/test/MC/ARM/neon-bitcount-encoding.s b/test/MC/ARM/neon-bitcount-encoding.s
new file mode 100644
index 000000000000..2c9518b32c1b
--- /dev/null
+++ b/test/MC/ARM/neon-bitcount-encoding.s
@@ -0,0 +1,31 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vcnt.8	d16, d16                @ encoding: [0x20,0x05,0xf0,0xf3]
+	vcnt.8	d16, d16
+@ CHECK: vcnt.8	q8, q8                  @ encoding: [0x60,0x05,0xf0,0xf3]
+	vcnt.8	q8, q8
+@ CHECK: vclz.i8	d16, d16                @ encoding: [0xa0,0x04,0xf0,0xf3]
+	vclz.i8	d16, d16
+@ CHECK: vclz.i16	d16, d16        @ encoding: [0xa0,0x04,0xf4,0xf3]
+	vclz.i16	d16, d16
+@ CHECK: vclz.i32	d16, d16        @ encoding: [0xa0,0x04,0xf8,0xf3]
+	vclz.i32	d16, d16
+@ CHECK: vclz.i8	q8, q8                  @ encoding: [0xe0,0x04,0xf0,0xf3]
+	vclz.i8	q8, q8
+@ CHECK: vclz.i16	q8, q8          @ encoding: [0xe0,0x04,0xf4,0xf3]
+	vclz.i16	q8, q8
+@ CHECK: vclz.i32	q8, q8          @ encoding: [0xe0,0x04,0xf8,0xf3]
+	vclz.i32	q8, q8
+@ CHECK: vcls.s8	d16, d16                @ encoding: [0x20,0x04,0xf0,0xf3]
+	vcls.s8	d16, d16
+@ CHECK: vcls.s16	d16, d16        @ encoding: [0x20,0x04,0xf4,0xf3]
+	vcls.s16	d16, d16
+@ CHECK: vcls.s32	d16, d16        @ encoding: [0x20,0x04,0xf8,0xf3]
+	vcls.s32	d16, d16
+@ CHECK: vcls.s8	q8, q8                  @ encoding: [0x60,0x04,0xf0,0xf3]
+	vcls.s8	q8, q8
+@ CHECK: vcls.s16	q8, q8          @ encoding: [0x60,0x04,0xf4,0xf3]
+	vcls.s16	q8, q8
+@ CHECK: vcls.s32	q8, q8          @ encoding: [0x60,0x04,0xf8,0xf3]
+	vcls.s32	q8, q8
+
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
new file mode 100644
index 000000000000..8710923c670d
--- /dev/null
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -0,0 +1,47 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vand	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xf2]
+	vand	d16, d17, d16
+@ CHECK: vand	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xf2]
+	vand	q8, q8, q9
+
+@ CHECK: veor	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xf3]
+	veor	d16, d17, d16
+@ CHECK: veor	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xf3]
+	veor	q8, q8, q9
+
+@ CHECK: vorr	d16, d17, d16           @ encoding: [0xb0,0x01,0x61,0xf2]
+	vorr	d16, d17, d16
+@ CHECK: vorr	q8, q8, q9              @ encoding: [0xf2,0x01,0x60,0xf2]
+	vorr	q8, q8, q9
+@ CHECK: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
+  vorr.i32	d16, #0x1000000
+@ CHECK: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xf2]
+  vorr.i32	q8, #0x1000000
+@ CHECK: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xf2]
+  vorr.i32	q8, #0x0
+
+@ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xf2]
+	vbic	d16, d17, d16
+@ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xf2]
+	vbic	q8, q8, q9
+@ CHECK: vbic.i32	d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
+  vbic.i32	d16, #0xFF000000
+@ CHECK: vbic.i32	q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
+  vbic.i32	q8, #0xFF000000
+
+@ CHECK: vorn	d16, d17, d16           @ encoding: [0xb0,0x01,0x71,0xf2]
+	vorn	d16, d17, d16
+@ CHECK: vorn	q8, q8, q9              @ encoding: [0xf2,0x01,0x70,0xf2]
+	vorn	q8, q8, q9
+
+@ CHECK: vmvn	d16, d16                @ encoding: [0xa0,0x05,0xf0,0xf3]
+	vmvn	d16, d16
+@ CHECK: vmvn	q8, q8                  @ encoding: [0xe0,0x05,0xf0,0xf3]
+	vmvn	q8, q8
+
+@ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xf3]
+	vbsl	d18, d17, d16
+@ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xf3]
+	vbsl	q8, q10, q9
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
new file mode 100644
index 000000000000..6bfc549e9474
--- /dev/null
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -0,0 +1,115 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ FIXME: We cannot currently test the following instructions, which are 
+@ currently marked as for-disassembly only in the .td files:
+@  - VCEQz
+@  - VCGEz, VCLEz
+@  - VCGTz, VCLTz
+
+@ CHECK: vceq.i8	d16, d16, d17           @ encoding: [0xb1,0x08,0x40,0xf3]
+	vceq.i8	d16, d16, d17
+@ CHECK: vceq.i16	d16, d16, d17   @ encoding: [0xb1,0x08,0x50,0xf3]
+	vceq.i16	d16, d16, d17
+@ CHECK: vceq.i32	d16, d16, d17   @ encoding: [0xb1,0x08,0x60,0xf3]
+	vceq.i32	d16, d16, d17
+@ CHECK: vceq.f32	d16, d16, d17   @ encoding: [0xa1,0x0e,0x40,0xf2]
+	vceq.f32	d16, d16, d17
+@ CHECK: vceq.i8	q8, q8, q9              @ encoding: [0xf2,0x08,0x40,0xf3]
+	vceq.i8	q8, q8, q9
+@ CHECK: vceq.i16	q8, q8, q9      @ encoding: [0xf2,0x08,0x50,0xf3]
+	vceq.i16	q8, q8, q9
+@ CHECK: vceq.i32	q8, q8, q9      @ encoding: [0xf2,0x08,0x60,0xf3]
+	vceq.i32	q8, q8, q9
+@ CHECK: vceq.f32	q8, q8, q9      @ encoding: [0xe2,0x0e,0x40,0xf2]
+	vceq.f32	q8, q8, q9
+
+@ CHECK: vcge.s8	d16, d16, d17           @ encoding: [0xb1,0x03,0x40,0xf2]
+	vcge.s8	d16, d16, d17
+@ CHECK: vcge.s16	d16, d16, d17   @ encoding: [0xb1,0x03,0x50,0xf2]
+	vcge.s16	d16, d16, d17
+@ CHECK: vcge.s32	d16, d16, d17   @ encoding: [0xb1,0x03,0x60,0xf2]
+	vcge.s32	d16, d16, d17
+@ CHECK: vcge.u8	d16, d16, d17           @ encoding: [0xb1,0x03,0x40,0xf3]
+	vcge.u8	d16, d16, d17
+@ CHECK: vcge.u16	d16, d16, d17   @ encoding: [0xb1,0x03,0x50,0xf3]
+	vcge.u16	d16, d16, d17
+@ CHECK: vcge.u32	d16, d16, d17   @ encoding: [0xb1,0x03,0x60,0xf3]
+	vcge.u32	d16, d16, d17
+@ CHECK: vcge.f32	d16, d16, d17   @ encoding: [0xa1,0x0e,0x40,0xf3]
+	vcge.f32	d16, d16, d17
+@ CHECK: vcge.s8	q8, q8, q9              @ encoding: [0xf2,0x03,0x40,0xf2]
+	vcge.s8	q8, q8, q9
+@ CHECK: vcge.s16	q8, q8, q9      @ encoding: [0xf2,0x03,0x50,0xf2]
+	vcge.s16	q8, q8, q9
+@ CHECK: vcge.s32	q8, q8, q9      @ encoding: [0xf2,0x03,0x60,0xf2]
+	vcge.s32	q8, q8, q9
+@ CHECK: vcge.u8	q8, q8, q9              @ encoding: [0xf2,0x03,0x40,0xf3]
+	vcge.u8	q8, q8, q9
+@ CHECK: vcge.u16	q8, q8, q9      @ encoding: [0xf2,0x03,0x50,0xf3]
+	vcge.u16	q8, q8, q9
+@ CHECK: vcge.u32	q8, q8, q9      @ encoding: [0xf2,0x03,0x60,0xf3]
+	vcge.u32	q8, q8, q9
+@ CHECK: vcge.f32	q8, q8, q9      @ encoding: [0xe2,0x0e,0x40,0xf3]
+	vcge.f32	q8, q8, q9
+@ CHECK: vacge.f32	d16, d16, d17   @ encoding: [0xb1,0x0e,0x40,0xf3]
+	vacge.f32	d16, d16, d17
+@ CHECK: vacge.f32	q8, q8, q9      @ encoding: [0xf2,0x0e,0x40,0xf3]
+	vacge.f32	q8, q8, q9
+
+@ CHECK: vcgt.s8	d16, d16, d17           @ encoding: [0xa1,0x03,0x40,0xf2]
+	vcgt.s8	d16, d16, d17
+@ CHECK: vcgt.s16	d16, d16, d17   @ encoding: [0xa1,0x03,0x50,0xf2]
+	vcgt.s16	d16, d16, d17
+@ CHECK: vcgt.s32	d16, d16, d17   @ encoding: [0xa1,0x03,0x60,0xf2]
+	vcgt.s32	d16, d16, d17
+@ CHECK: vcgt.u8	d16, d16, d17           @ encoding: [0xa1,0x03,0x40,0xf3]
+	vcgt.u8	d16, d16, d17
+@ CHECK: vcgt.u16	d16, d16, d17   @ encoding: [0xa1,0x03,0x50,0xf3]
+	vcgt.u16	d16, d16, d17
+@ CHECK: vcgt.u32	d16, d16, d17   @ encoding: [0xa1,0x03,0x60,0xf3]
+	vcgt.u32	d16, d16, d17
+@ CHECK: vcgt.f32	d16, d16, d17   @ encoding: [0xa1,0x0e,0x60,0xf3]
+	vcgt.f32	d16, d16, d17
+@ CHECK: vcgt.s8	q8, q8, q9              @ encoding: [0xe2,0x03,0x40,0xf2]
+	vcgt.s8	q8, q8, q9
+@ CHECK: vcgt.s16	q8, q8, q9      @ encoding: [0xe2,0x03,0x50,0xf2]
+	vcgt.s16	q8, q8, q9
+@ CHECK: vcgt.s32	q8, q8, q9      @ encoding: [0xe2,0x03,0x60,0xf2]
+	vcgt.s32	q8, q8, q9
+@ CHECK: vcgt.u8	q8, q8, q9              @ encoding: [0xe2,0x03,0x40,0xf3]
+	vcgt.u8	q8, q8, q9
+@ CHECK: vcgt.u16	q8, q8, q9      @ encoding: [0xe2,0x03,0x50,0xf3]
+	vcgt.u16	q8, q8, q9
+@ CHECK: vcgt.u32	q8, q8, q9      @ encoding: [0xe2,0x03,0x60,0xf3]
+	vcgt.u32	q8, q8, q9
+@ CHECK: vcgt.f32	q8, q8, q9      @ encoding: [0xe2,0x0e,0x60,0xf3]
+	vcgt.f32	q8, q8, q9
+@ CHECK: vacgt.f32	d16, d16, d17   @ encoding: [0xb1,0x0e,0x60,0xf3]
+	vacgt.f32	d16, d16, d17
+@ CHECK: vacgt.f32	q8, q8, q9      @ encoding: [0xf2,0x0e,0x60,0xf3]
+	vacgt.f32	q8, q8, q9
+
+@ CHECK: vtst.8	d16, d16, d17           @ encoding: [0xb1,0x08,0x40,0xf2]
+	vtst.8	d16, d16, d17
+@ CHECK: vtst.16	d16, d16, d17           @ encoding: [0xb1,0x08,0x50,0xf2]
+	vtst.16	d16, d16, d17
+@ CHECK: vtst.32	d16, d16, d17           @ encoding: [0xb1,0x08,0x60,0xf2]
+	vtst.32	d16, d16, d17
+@ CHECK: vtst.8	q8, q8, q9              @ encoding: [0xf2,0x08,0x40,0xf2]
+	vtst.8	q8, q8, q9
+@ CHECK: vtst.16	q8, q8, q9              @ encoding: [0xf2,0x08,0x50,0xf2]
+	vtst.16	q8, q8, q9
+@ CHECK: vtst.32	q8, q8, q9              @ encoding: [0xf2,0x08,0x60,0xf2]
+	vtst.32	q8, q8, q9
+
+@ CHECK: vceq.i8	d16, d16, #0            @ encoding: [0x20,0x01,0xf1,0xf3]
+  vceq.i8	d16, d16, #0
+@ CHECK: vcge.s8	d16, d16, #0            @ encoding: [0xa0,0x00,0xf1,0xf3]
+  vcge.s8	d16, d16, #0
+@ CHECK: vcle.s8	d16, d16, #0            @ encoding: [0xa0,0x01,0xf1,0xf3]
+  vcle.s8	d16, d16, #0
+@ CHECK: vcgt.s8	d16, d16, #0            @ encoding: [0x20,0x00,0xf1,0xf3]
+  vcgt.s8	d16, d16, #0
+@ CHECK: vclt.s8	d16, d16, #0            @ encoding: [0x20,0x02,0xf1,0xf3]
+  vclt.s8	d16, d16, #0
diff --git a/test/MC/ARM/neon-convert-encoding.s b/test/MC/ARM/neon-convert-encoding.s
new file mode 100644
index 000000000000..1733c5222bee
--- /dev/null
+++ b/test/MC/ARM/neon-convert-encoding.s
@@ -0,0 +1,38 @@
+@ RUN: llvm-mc -mcpu=cortex-a9 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0x20,0x07,0xfb,0xf3]
+	vcvt.s32.f32	d16, d16
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xa0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	d16, d16
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0x20,0x06,0xfb,0xf3]
+	vcvt.f32.s32	d16, d16
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xa0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	d16, d16
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0x60,0x07,0xfb,0xf3]
+	vcvt.s32.f32	q8, q8
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xe0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	q8, q8
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0x60,0x06,0xfb,0xf3]
+	vcvt.f32.s32	q8, q8
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xe0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	q8, q8
+@ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf2]
+	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf3]
+	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf2]
+	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf3]
+	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf2]
+	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf3]
+	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf2]
+	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf3]
+	vcvt.f32.u32	q8, q8, #1
+@ CHECK: vcvt.f32.f16	q8, d16         @ encoding: [0x20,0x07,0xf6,0xf3]
+	vcvt.f32.f16	q8, d16
+@ CHECK: vcvt.f16.f32	d16, q8         @ encoding: [0x20,0x06,0xf6,0xf3]
+	vcvt.f16.f32	d16, q8
diff --git a/test/MC/ARM/neon-dup-encoding.s b/test/MC/ARM/neon-dup-encoding.s
new file mode 100644
index 000000000000..0aebdce304a4
--- /dev/null
+++ b/test/MC/ARM/neon-dup-encoding.s
@@ -0,0 +1,27 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vdup.8	d16, r0                 @ encoding: [0x90,0x0b,0xc0,0xee]
+	vdup.8	d16, r0
+@ CHECK: vdup.16	d16, r0                 @ encoding: [0xb0,0x0b,0x80,0xee]
+	vdup.16	d16, r0
+@ CHECK: vdup.32	d16, r0                 @ encoding: [0x90,0x0b,0x80,0xee]
+	vdup.32	d16, r0
+@ CHECK: vdup.8	q8, r0                  @ encoding: [0x90,0x0b,0xe0,0xee]
+	vdup.8	q8, r0
+@ CHECK: vdup.16	q8, r0                  @ encoding: [0xb0,0x0b,0xa0,0xee]
+	vdup.16	q8, r0
+@ CHECK: vdup.32	q8, r0                  @ encoding: [0x90,0x0b,0xa0,0xee]
+	vdup.32	q8, r0
+@ CHECK: vdup.8	d16, d16[1]             @ encoding: [0x20,0x0c,0xf3,0xf3]
+	vdup.8	d16, d16[1]
+@ CHECK: vdup.16	d16, d16[1]             @ encoding: [0x20,0x0c,0xf6,0xf3]
+	vdup.16	d16, d16[1]
+@ CHECK: vdup.32	d16, d16[1]             @ encoding: [0x20,0x0c,0xfc,0xf3]
+	vdup.32	d16, d16[1]
+@ CHECK: vdup.8	q8, d16[1]              @ encoding: [0x60,0x0c,0xf3,0xf3]
+	vdup.8	q8, d16[1]
+@ CHECK: vdup.16	q8, d16[1]              @ encoding: [0x60,0x0c,0xf6,0xf3]
+	vdup.16	q8, d16[1]
+@ CHECK: vdup.32	q8, d16[1]              @ encoding: [0x60,0x0c,0xfc,0xf3]
+	vdup.32	q8, d16[1]
diff --git a/test/MC/ARM/neon-minmax-encoding.s b/test/MC/ARM/neon-minmax-encoding.s
new file mode 100644
index 000000000000..2d0d8c9b8aeb
--- /dev/null
+++ b/test/MC/ARM/neon-minmax-encoding.s
@@ -0,0 +1,58 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf2]
+	vmin.s8	d16, d16, d17
+@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf2]
+	vmin.s16	d16, d16, d17
+@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf2]
+	vmin.s32	d16, d16, d17
+@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf3]
+	vmin.u8	d16, d16, d17
+@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf3]
+	vmin.u16	d16, d16, d17
+@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf3]
+	vmin.u32	d16, d16, d17
+@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf2]
+	vmin.f32	d16, d16, d17
+@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf2]
+	vmin.s8	q8, q8, q9
+@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf2]
+	vmin.s16	q8, q8, q9
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
+	vmin.s32	q8, q8, q9
+@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf3]
+	vmin.u8	q8, q8, q9
+@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf3]
+	vmin.u16	q8, q8, q9
+@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf3]
+	vmin.u32	q8, q8, q9
+@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x60,0xf2]
+	vmin.f32	q8, q8, q9
+@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf2]
+	vmax.s8	d16, d16, d17
+@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf2]
+	vmax.s16	d16, d16, d17
+@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf2]
+	vmax.s32	d16, d16, d17
+@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf3]
+	vmax.u8	d16, d16, d17
+@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf3]
+	vmax.u16	d16, d16, d17
+@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf3]
+	vmax.u32	d16, d16, d17
+@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf2]
+	vmax.f32	d16, d16, d17
+@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf2]
+	vmax.s8	q8, q8, q9
+@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf2]
+	vmax.s16	q8, q8, q9
+@ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf2]
+	vmax.s32	q8, q8, q9
+@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf3]
+	vmax.u8	q8, q8, q9
+@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf3]
+	vmax.u16	q8, q8, q9
+@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf3]
+	vmax.u32	q8, q8, q9
+@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x40,0xf2]
+	vmax.f32	q8, q8, q9
diff --git a/test/MC/ARM/neon-mov-encoding.s b/test/MC/ARM/neon-mov-encoding.s
new file mode 100644
index 000000000000..ca678d022df8
--- /dev/null
+++ b/test/MC/ARM/neon-mov-encoding.s
@@ -0,0 +1,117 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vmov.i8	d16, #0x8               @ encoding: [0x18,0x0e,0xc0,0xf2]
+	vmov.i8	d16, #0x8
+@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0x10,0x08,0xc1,0xf2]
+	vmov.i16	d16, #0x10
+@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0x10,0x0a,0xc1,0xf2]
+	vmov.i16	d16, #0x1000
+@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0x10,0x00,0xc2,0xf2]
+	vmov.i32	d16, #0x20
+@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xf2]
+	vmov.i32	d16, #0x2000
+@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xf2]
+	vmov.i32	d16, #0x200000
+@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xf2]
+	vmov.i32	d16, #0x20000000
+@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xf2]
+	vmov.i32	d16, #0x20FF
+@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xf2]
+	vmov.i32	d16, #0x20FFFF
+@ CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xf3]
+	vmov.i64	d16, #0xFF0000FF0000FFFF
+@ CHECK: vmov.i8	q8, #0x8                @ encoding: [0x58,0x0e,0xc0,0xf2]
+	vmov.i8	q8, #0x8
+@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0x50,0x08,0xc1,0xf2]
+	vmov.i16	q8, #0x10
+@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0x50,0x0a,0xc1,0xf2]
+	vmov.i16	q8, #0x1000
+@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0x50,0x00,0xc2,0xf2]
+	vmov.i32	q8, #0x20
+@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xf2]
+	vmov.i32	q8, #0x2000
+@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xf2]
+	vmov.i32	q8, #0x200000
+@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xf2]
+	vmov.i32	q8, #0x20000000
+@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xf2]
+	vmov.i32	q8, #0x20FF
+@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xf2]
+	vmov.i32	q8, #0x20FFFF
+@ CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xf3]
+	vmov.i64	q8, #0xFF0000FF0000FFFF
+@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0x30,0x08,0xc1,0xf2]
+	vmvn.i16	d16, #0x10
+@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0x30,0x0a,0xc1,0xf2]
+	vmvn.i16	d16, #0x1000
+@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0x30,0x00,0xc2,0xf2]
+	vmvn.i32	d16, #0x20
+@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xf2]
+	vmvn.i32	d16, #0x2000
+@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xf2]
+	vmvn.i32	d16, #0x200000
+@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xf2]
+	vmvn.i32	d16, #0x20000000
+@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xf2]
+	vmvn.i32	d16, #0x20FF
+@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xf2]
+	vmvn.i32	d16, #0x20FFFF
+@ CHECK: vmovl.s8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xf2]
+	vmovl.s8	q8, d16
+@ CHECK: vmovl.s16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xf2]
+	vmovl.s16	q8, d16
+@ CHECK: vmovl.s32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xf2]
+	vmovl.s32	q8, d16
+@ CHECK: vmovl.u8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xf3]
+	vmovl.u8	q8, d16
+@ CHECK: vmovl.u16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xf3]
+	vmovl.u16	q8, d16
+@ CHECK: vmovl.u32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xf3]
+	vmovl.u32	q8, d16
+@ CHECK: vmovn.i16	d16, q8         @ encoding: [0x20,0x02,0xf2,0xf3]
+	vmovn.i16	d16, q8
+@ CHECK: vmovn.i32	d16, q8         @ encoding: [0x20,0x02,0xf6,0xf3]
+	vmovn.i32	d16, q8
+@ CHECK: vmovn.i64	d16, q8         @ encoding: [0x20,0x02,0xfa,0xf3]
+	vmovn.i64	d16, q8
+@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xa0,0x02,0xf2,0xf3]
+	vqmovn.s16	d16, q8
+@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xa0,0x02,0xf6,0xf3]
+	vqmovn.s32	d16, q8
+@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xa0,0x02,0xfa,0xf3]
+	vqmovn.s64	d16, q8
+@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xe0,0x02,0xf2,0xf3]
+	vqmovn.u16	d16, q8
+@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xe0,0x02,0xf6,0xf3]
+	vqmovn.u32	d16, q8
+@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xe0,0x02,0xfa,0xf3]
+	vqmovn.u64	d16, q8
+@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0x60,0x02,0xf2,0xf3]
+	vqmovun.s16	d16, q8
+@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0x60,0x02,0xf6,0xf3]
+	vqmovun.s32	d16, q8
+@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0x60,0x02,0xfa,0xf3]
+	vqmovun.s64	d16, q8
+@ CHECK: vmov.s8	r0, d16[1]              @ encoding: [0xb0,0x0b,0x50,0xee]
+	vmov.s8	r0, d16[1]
+@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x10,0xee]
+	vmov.s16	r0, d16[1]
+@ CHECK: vmov.u8	r0, d16[1]              @ encoding: [0xb0,0x0b,0xd0,0xee]
+	vmov.u8	r0, d16[1]
+@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x90,0xee]
+	vmov.u16	r0, d16[1]
+@ CHECK: vmov.32	r0, d16[1]              @ encoding: [0x90,0x0b,0x30,0xee]
+	vmov.32	r0, d16[1]
+@ CHECK: vmov.8	d16[1], r1              @ encoding: [0xb0,0x1b,0x40,0xee]
+	vmov.8	d16[1], r1
+@ CHECK: vmov.16	d16[1], r1              @ encoding: [0xf0,0x1b,0x00,0xee]
+	vmov.16	d16[1], r1
+@ CHECK: vmov.32	d16[1], r1              @ encoding: [0x90,0x1b,0x20,0xee]
+	vmov.32	d16[1], r1
+@ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
+	vmov.8	d18[1], r1
+@ CHECK: vmov.16	d18[1], r1              @ encoding: [0xf0,0x1b,0x02,0xee]
+	vmov.16	d18[1], r1
+@ CHECK: vmov.32	d18[1], r1              @ encoding: [0x90,0x1b,0x22,0xee]
+	vmov.32	d18[1], r1
diff --git a/test/MC/ARM/neon-mul-accum-encoding.s b/test/MC/ARM/neon-mul-accum-encoding.s
new file mode 100644
index 000000000000..e269dea360f4
--- /dev/null
+++ b/test/MC/ARM/neon-mul-accum-encoding.s
@@ -0,0 +1,67 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vmla.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf2]
+	vmla.i8	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
+	vmla.i16	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
+	vmla.i32	d16, d18, d17
+@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
+	vmla.f32	d16, d18, d17
+@ CHECK: vmla.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf2]
+	vmla.i8	q9, q8, q10
+@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
+	vmla.i16	q9, q8, q10
+@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
+	vmla.i32	q9, q8, q10
+@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
+	vmla.f32	q9, q8, q10
+@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
+	vmlal.s8	q8, d19, d18
+@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
+	vmlal.s16	q8, d19, d18
+@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
+	vmlal.s32	q8, d19, d18
+@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
+	vmlal.u8	q8, d19, d18
+@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
+	vmlal.u16	q8, d19, d18
+@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
+	vmlal.u32	q8, d19, d18
+@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
+	vqdmlal.s16	q8, d19, d18
+@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
+	vqdmlal.s32	q8, d19, d18
+@ CHECK: vmls.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf3]
+	vmls.i8	d16, d18, d17
+@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
+	vmls.i16	d16, d18, d17
+@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
+	vmls.i32	d16, d18, d17
+@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
+	vmls.f32	d16, d18, d17
+@ CHECK: vmls.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf3]
+	vmls.i8	q9, q8, q10
+@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
+	vmls.i16	q9, q8, q10
+@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
+	vmls.i32	q9, q8, q10
+@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
+	vmls.f32	q9, q8, q10
+@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
+	vmlsl.s8	q8, d19, d18
+@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
+	vmlsl.s16	q8, d19, d18
+@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
+	vmlsl.s32	q8, d19, d18
+@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
+	vmlsl.u8	q8, d19, d18
+@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
+	vmlsl.u16	q8, d19, d18
+@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
+	vmlsl.u32	q8, d19, d18
+@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
+	vqdmlsl.s16	q8, d19, d18
+@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
+	vqdmlsl.s32	q8, d19, d18
diff --git a/test/MC/ARM/neon-mul-encoding.s b/test/MC/ARM/neon-mul-encoding.s
new file mode 100644
index 000000000000..4ff192f6e557
--- /dev/null
+++ b/test/MC/ARM/neon-mul-encoding.s
@@ -0,0 +1,56 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vmul.i8	d16, d16, d17           @ encoding: [0xb1,0x09,0x40,0xf2]
+	vmul.i8	d16, d16, d17
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+	vmul.i16	d16, d16, d17
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+	vmul.i32	d16, d16, d17
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+	vmul.f32	d16, d16, d17
+@ CHECK: vmul.i8	q8, q8, q9              @ encoding: [0xf2,0x09,0x40,0xf2]
+	vmul.i8	q8, q8, q9
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+	vmul.i16	q8, q8, q9
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+	vmul.i32	q8, q8, q9
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+	vmul.f32	q8, q8, q9
+@ CHECK: vmul.p8	d16, d16, d17           @ encoding: [0xb1,0x09,0x40,0xf3]
+	vmul.p8	d16, d16, d17
+@ CHECK: vmul.p8	q8, q8, q9              @ encoding: [0xf2,0x09,0x40,0xf3]
+	vmul.p8	q8, q8, q9
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+	vqdmulh.s16	d16, d16, d17
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+	vqdmulh.s32	d16, d16, d17
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+	vqdmulh.s16	q8, q8, q9
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+	vqdmulh.s32	q8, q8, q9
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf3]
+	vqrdmulh.s16	d16, d16, d17
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf3]
+	vqrdmulh.s32	d16, d16, d17
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf3]
+	vqrdmulh.s16	q8, q8, q9
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf3]
+	vqrdmulh.s32	q8, q8, q9
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf2]
+	vmull.s8	q8, d16, d17
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf2]
+	vmull.s16	q8, d16, d17
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf2]
+	vmull.s32	q8, d16, d17
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf3]
+	vmull.u8	q8, d16, d17
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf3]
+	vmull.u16	q8, d16, d17
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf3]
+	vmull.u32	q8, d16, d17
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xa1,0x0e,0xc0,0xf2]
+	vmull.p8	q8, d16, d17
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0d,0xd0,0xf2]
+	vqdmull.s16	q8, d16, d17
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0d,0xe0,0xf2]
+	vqdmull.s32	q8, d16, d17
diff --git a/test/MC/ARM/neon-neg-encoding.s b/test/MC/ARM/neon-neg-encoding.s
new file mode 100644
index 000000000000..014bdb0a86d1
--- /dev/null
+++ b/test/MC/ARM/neon-neg-encoding.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vneg.s8	d16, d16                @ encoding: [0xa0,0x03,0xf1,0xf3]
+	vneg.s8	d16, d16
+@ CHECK: vneg.s16	d16, d16        @ encoding: [0xa0,0x03,0xf5,0xf3]
+	vneg.s16	d16, d16
+@ CHECK: vneg.s32	d16, d16        @ encoding: [0xa0,0x03,0xf9,0xf3]
+	vneg.s32	d16, d16
+@ CHECK: vneg.f32	d16, d16        @ encoding: [0xa0,0x07,0xf9,0xf3]
+	vneg.f32	d16, d16
+@ CHECK: vneg.s8	q8, q8                  @ encoding: [0xe0,0x03,0xf1,0xf3]
+	vneg.s8	q8, q8
+@ CHECK: vneg.s16	q8, q8          @ encoding: [0xe0,0x03,0xf5,0xf3]
+	vneg.s16	q8, q8
+@ CHECK: vneg.s32	q8, q8          @ encoding: [0xe0,0x03,0xf9,0xf3]
+	vneg.s32	q8, q8
+@ CHECK: vneg.f32	q8, q8          @ encoding: [0xe0,0x07,0xf9,0xf3]
+	vneg.f32	q8, q8
+@ CHECK: vqneg.s8	d16, d16        @ encoding: [0xa0,0x07,0xf0,0xf3]
+	vqneg.s8	d16, d16
+@ CHECK: vqneg.s16	d16, d16        @ encoding: [0xa0,0x07,0xf4,0xf3]
+	vqneg.s16	d16, d16
+@ CHECK: vqneg.s32	d16, d16        @ encoding: [0xa0,0x07,0xf8,0xf3]
+	vqneg.s32	d16, d16
+@ CHECK: vqneg.s8	q8, q8          @ encoding: [0xe0,0x07,0xf0,0xf3]
+	vqneg.s8	q8, q8
+@ CHECK: vqneg.s16	q8, q8          @ encoding: [0xe0,0x07,0xf4,0xf3]
+	vqneg.s16	q8, q8
+@ CHECK: vqneg.s32	q8, q8          @ encoding: [0xe0,0x07,0xf8,0xf3]
+	vqneg.s32	q8, q8
diff --git a/test/MC/ARM/neon-pairwise-encoding.s b/test/MC/ARM/neon-pairwise-encoding.s
new file mode 100644
index 000000000000..65c47bd64aeb
--- /dev/null
+++ b/test/MC/ARM/neon-pairwise-encoding.s
@@ -0,0 +1,86 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vpadd.i8	d16, d17, d16   @ encoding: [0xb0,0x0b,0x41,0xf2]
+	vpadd.i8	d16, d17, d16
+@ CHECK: vpadd.i16	d16, d17, d16   @ encoding: [0xb0,0x0b,0x51,0xf2]
+	vpadd.i16	d16, d17, d16
+@ CHECK: vpadd.i32	d16, d17, d16   @ encoding: [0xb0,0x0b,0x61,0xf2]
+	vpadd.i32	d16, d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
+	vpadd.f32	d16, d16, d17
+@ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xf3]
+	vpaddl.s8	d16, d16
+@ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xf3]
+	vpaddl.s16	d16, d16
+@ CHECK: vpaddl.s32	d16, d16        @ encoding: [0x20,0x02,0xf8,0xf3]
+	vpaddl.s32	d16, d16
+@ CHECK: vpaddl.u8	d16, d16        @ encoding: [0xa0,0x02,0xf0,0xf3]
+	vpaddl.u8	d16, d16
+@ CHECK: vpaddl.u16	d16, d16        @ encoding: [0xa0,0x02,0xf4,0xf3]
+	vpaddl.u16	d16, d16
+@ CHECK: vpaddl.u32	d16, d16        @ encoding: [0xa0,0x02,0xf8,0xf3]
+	vpaddl.u32	d16, d16
+@ CHECK: vpaddl.s8	q8, q8          @ encoding: [0x60,0x02,0xf0,0xf3]
+	vpaddl.s8	q8, q8
+@ CHECK: vpaddl.s16	q8, q8          @ encoding: [0x60,0x02,0xf4,0xf3]
+	vpaddl.s16	q8, q8
+@ CHECK: vpaddl.s32	q8, q8          @ encoding: [0x60,0x02,0xf8,0xf3]
+	vpaddl.s32	q8, q8
+@ CHECK: vpaddl.u8	q8, q8          @ encoding: [0xe0,0x02,0xf0,0xf3]
+	vpaddl.u8	q8, q8
+@ CHECK: vpaddl.u16	q8, q8          @ encoding: [0xe0,0x02,0xf4,0xf3]
+	vpaddl.u16	q8, q8
+@ CHECK: vpaddl.u32	q8, q8          @ encoding: [0xe0,0x02,0xf8,0xf3]
+	vpaddl.u32	q8, q8
+@ CHECK: vpadal.s8	d16, d17        @ encoding: [0x21,0x06,0xf0,0xf3]
+	vpadal.s8	d16, d17
+@ CHECK: vpadal.s16	d16, d17        @ encoding: [0x21,0x06,0xf4,0xf3]
+	vpadal.s16	d16, d17
+@ CHECK: vpadal.s32	d16, d17        @ encoding: [0x21,0x06,0xf8,0xf3]
+	vpadal.s32	d16, d17
+@ CHECK: vpadal.u8	d16, d17        @ encoding: [0xa1,0x06,0xf0,0xf3]
+	vpadal.u8	d16, d17
+@ CHECK: vpadal.u16	d16, d17        @ encoding: [0xa1,0x06,0xf4,0xf3]
+	vpadal.u16	d16, d17
+@ CHECK: vpadal.u32	d16, d17        @ encoding: [0xa1,0x06,0xf8,0xf3]
+	vpadal.u32	d16, d17
+@ CHECK: vpadal.s8	q9, q8          @ encoding: [0x60,0x26,0xf0,0xf3]
+	vpadal.s8	q9, q8
+@ CHECK: vpadal.s16	q9, q8          @ encoding: [0x60,0x26,0xf4,0xf3]
+	vpadal.s16	q9, q8
+@ CHECK: vpadal.s32	q9, q8          @ encoding: [0x60,0x26,0xf8,0xf3]
+	vpadal.s32	q9, q8
+@ CHECK: vpadal.u8	q9, q8          @ encoding: [0xe0,0x26,0xf0,0xf3]
+	vpadal.u8	q9, q8
+@ CHECK: vpadal.u16	q9, q8          @ encoding: [0xe0,0x26,0xf4,0xf3]
+	vpadal.u16	q9, q8
+@ CHECK: vpadal.u32	q9, q8          @ encoding: [0xe0,0x26,0xf8,0xf3]
+	vpadal.u32	q9, q8
+@ CHECK: vpmin.s8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xf2]
+	vpmin.s8	d16, d16, d17
+@ CHECK: vpmin.s16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xf2]
+	vpmin.s16	d16, d16, d17
+@ CHECK: vpmin.s32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xf2]
+	vpmin.s32	d16, d16, d17
+@ CHECK: vpmin.u8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xf3]
+	vpmin.u8	d16, d16, d17
+@ CHECK: vpmin.u16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xf3]
+	vpmin.u16	d16, d16, d17
+@ CHECK: vpmin.u32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xf3]
+	vpmin.u32	d16, d16, d17
+@ CHECK: vpmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf3]
+	vpmin.f32	d16, d16, d17
+@ CHECK: vpmax.s8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xf2]
+	vpmax.s8	d16, d16, d17
+@ CHECK: vpmax.s16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xf2]
+	vpmax.s16	d16, d16, d17
+@ CHECK: vpmax.s32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xf2]
+	vpmax.s32	d16, d16, d17
+@ CHECK: vpmax.u8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xf3]
+	vpmax.u8	d16, d16, d17
+@ CHECK: vpmax.u16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xf3]
+	vpmax.u16	d16, d16, d17
+@ CHECK: vpmax.u32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xf3]
+	vpmax.u32	d16, d16, d17
+@ CHECK: vpmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf3]
+	vpmax.f32	d16, d16, d17
diff --git a/test/MC/ARM/neon-reciprocal-encoding.s b/test/MC/ARM/neon-reciprocal-encoding.s
new file mode 100644
index 000000000000..e12a47308765
--- /dev/null
+++ b/test/MC/ARM/neon-reciprocal-encoding.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vrecpe.u32	d16, d16        @ encoding: [0x20,0x04,0xfb,0xf3]
+	vrecpe.u32	d16, d16
+@ CHECK: vrecpe.u32	q8, q8          @ encoding: [0x60,0x04,0xfb,0xf3]
+	vrecpe.u32	q8, q8
+@ CHECK: vrecpe.f32	d16, d16        @ encoding: [0x20,0x05,0xfb,0xf3]
+	vrecpe.f32	d16, d16
+@ CHECK: vrecpe.f32	q8, q8          @ encoding: [0x60,0x05,0xfb,0xf3]
+	vrecpe.f32	q8, q8
+@ CHECK: vrecps.f32	d16, d16, d17   @ encoding: [0xb1,0x0f,0x40,0xf2]
+	vrecps.f32	d16, d16, d17
+@ CHECK: vrecps.f32	q8, q8, q9      @ encoding: [0xf2,0x0f,0x40,0xf2]
+	vrecps.f32	q8, q8, q9
+@ CHECK: vrsqrte.u32	d16, d16        @ encoding: [0xa0,0x04,0xfb,0xf3]
+	vrsqrte.u32	d16, d16
+@ CHECK: vrsqrte.u32	q8, q8          @ encoding: [0xe0,0x04,0xfb,0xf3]
+	vrsqrte.u32	q8, q8
+@ CHECK: vrsqrte.f32	d16, d16        @ encoding: [0xa0,0x05,0xfb,0xf3]
+	vrsqrte.f32	d16, d16
+@ CHECK: vrsqrte.f32	q8, q8          @ encoding: [0xe0,0x05,0xfb,0xf3]
+	vrsqrte.f32	q8, q8
+@ CHECK: vrsqrts.f32	d16, d16, d17   @ encoding: [0xb1,0x0f,0x60,0xf2]
+	vrsqrts.f32	d16, d16, d17
+@ CHECK: vrsqrts.f32	q8, q8, q9      @ encoding: [0xf2,0x0f,0x60,0xf2]
+	vrsqrts.f32	q8, q8, q9
diff --git a/test/MC/ARM/neon-reverse-encoding.s b/test/MC/ARM/neon-reverse-encoding.s
new file mode 100644
index 000000000000..e33b9f3f2371
--- /dev/null
+++ b/test/MC/ARM/neon-reverse-encoding.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vrev64.8	d16, d16        @ encoding: [0x20,0x00,0xf0,0xf3]
+	vrev64.8	d16, d16
+@ CHECK: vrev64.16	d16, d16        @ encoding: [0x20,0x00,0xf4,0xf3]
+	vrev64.16	d16, d16
+@ CHECK: vrev64.32	d16, d16        @ encoding: [0x20,0x00,0xf8,0xf3]
+	vrev64.32	d16, d16
+@ CHECK: vrev64.8	q8, q8          @ encoding: [0x60,0x00,0xf0,0xf3]
+	vrev64.8	q8, q8
+@ CHECK: vrev64.16	q8, q8          @ encoding: [0x60,0x00,0xf4,0xf3]
+	vrev64.16	q8, q8
+@ CHECK: vrev64.32	q8, q8          @ encoding: [0x60,0x00,0xf8,0xf3]
+	vrev64.32	q8, q8
+@ CHECK: vrev32.8	d16, d16        @ encoding: [0xa0,0x00,0xf0,0xf3]
+	vrev32.8	d16, d16
+@ CHECK: vrev32.16	d16, d16        @ encoding: [0xa0,0x00,0xf4,0xf3]
+	vrev32.16	d16, d16
+@ CHECK: vrev32.8	q8, q8          @ encoding: [0xe0,0x00,0xf0,0xf3]
+	vrev32.8	q8, q8
+@ CHECK: vrev32.16	q8, q8          @ encoding: [0xe0,0x00,0xf4,0xf3]
+	vrev32.16	q8, q8
+@ CHECK: vrev16.8	d16, d16        @ encoding: [0x20,0x01,0xf0,0xf3]
+	vrev16.8	d16, d16
+@ CHECK: vrev16.8	q8, q8          @ encoding: [0x60,0x01,0xf0,0xf3]
+	vrev16.8	q8, q8
diff --git a/test/MC/ARM/neon-satshift-encoding.s b/test/MC/ARM/neon-satshift-encoding.s
new file mode 100644
index 000000000000..506f48a6e3c7
--- /dev/null
+++ b/test/MC/ARM/neon-satshift-encoding.s
@@ -0,0 +1,150 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vqshl.s8	d16, d16, d17   @ encoding: [0xb0,0x04,0x41,0xf2]
+	vqshl.s8	d16, d16, d17
+@ CHECK: vqshl.s16	d16, d16, d17   @ encoding: [0xb0,0x04,0x51,0xf2]
+	vqshl.s16	d16, d16, d17
+@ CHECK: vqshl.s32	d16, d16, d17   @ encoding: [0xb0,0x04,0x61,0xf2]
+	vqshl.s32	d16, d16, d17
+@ CHECK: vqshl.s64	d16, d16, d17   @ encoding: [0xb0,0x04,0x71,0xf2]
+	vqshl.s64	d16, d16, d17
+@ CHECK: vqshl.u8	d16, d16, d17   @ encoding: [0xb0,0x04,0x41,0xf3]
+	vqshl.u8	d16, d16, d17
+@ CHECK: vqshl.u16	d16, d16, d17   @ encoding: [0xb0,0x04,0x51,0xf3]
+	vqshl.u16	d16, d16, d17
+@ CHECK: vqshl.u32	d16, d16, d17   @ encoding: [0xb0,0x04,0x61,0xf3]
+	vqshl.u32	d16, d16, d17
+@ CHECK: vqshl.u64	d16, d16, d17   @ encoding: [0xb0,0x04,0x71,0xf3]
+	vqshl.u64	d16, d16, d17
+@ CHECK: vqshl.s8	q8, q8, q9      @ encoding: [0xf0,0x04,0x42,0xf2]
+	vqshl.s8	q8, q8, q9
+@ CHECK: vqshl.s16	q8, q8, q9      @ encoding: [0xf0,0x04,0x52,0xf2]
+	vqshl.s16	q8, q8, q9
+@ CHECK: vqshl.s32	q8, q8, q9      @ encoding: [0xf0,0x04,0x62,0xf2]
+	vqshl.s32	q8, q8, q9
+@ CHECK: vqshl.s64	q8, q8, q9      @ encoding: [0xf0,0x04,0x72,0xf2]
+	vqshl.s64	q8, q8, q9
+@ CHECK: vqshl.u8	q8, q8, q9      @ encoding: [0xf0,0x04,0x42,0xf3]
+	vqshl.u8	q8, q8, q9
+@ CHECK: vqshl.u16	q8, q8, q9      @ encoding: [0xf0,0x04,0x52,0xf3]
+	vqshl.u16	q8, q8, q9
+@ CHECK: vqshl.u32	q8, q8, q9      @ encoding: [0xf0,0x04,0x62,0xf3]
+	vqshl.u32	q8, q8, q9
+@ CHECK: vqshl.u64	q8, q8, q9      @ encoding: [0xf0,0x04,0x72,0xf3]
+	vqshl.u64	q8, q8, q9
+@ CHECK: vqshl.s8	d16, d16, #7    @ encoding: [0x30,0x07,0xcf,0xf2]
+	vqshl.s8	d16, d16, #7
+@ CHECK: vqshl.s16	d16, d16, #15   @ encoding: [0x30,0x07,0xdf,0xf2]
+	vqshl.s16	d16, d16, #15
+@ CHECK: vqshl.s32	d16, d16, #31   @ encoding: [0x30,0x07,0xff,0xf2]
+	vqshl.s32	d16, d16, #31
+@ CHECK: vqshl.s64	d16, d16, #63   @ encoding: [0xb0,0x07,0xff,0xf2]
+	vqshl.s64	d16, d16, #63
+@ CHECK: vqshl.u8	d16, d16, #7    @ encoding: [0x30,0x07,0xcf,0xf3]
+	vqshl.u8	d16, d16, #7
+@ CHECK: vqshl.u16	d16, d16, #15   @ encoding: [0x30,0x07,0xdf,0xf3]
+	vqshl.u16	d16, d16, #15
+@ CHECK: vqshl.u32	d16, d16, #31   @ encoding: [0x30,0x07,0xff,0xf3]
+	vqshl.u32	d16, d16, #31
+@ CHECK: vqshl.u64	d16, d16, #63   @ encoding: [0xb0,0x07,0xff,0xf3]
+	vqshl.u64	d16, d16, #63
+@ CHECK: vqshlu.s8	d16, d16, #7    @ encoding: [0x30,0x06,0xcf,0xf3]
+	vqshlu.s8	d16, d16, #7
+@ CHECK: vqshlu.s16	d16, d16, #15   @ encoding: [0x30,0x06,0xdf,0xf3]
+	vqshlu.s16	d16, d16, #15
+@ CHECK: vqshlu.s32	d16, d16, #31   @ encoding: [0x30,0x06,0xff,0xf3]
+	vqshlu.s32	d16, d16, #31
+@ CHECK: vqshlu.s64	d16, d16, #63   @ encoding: [0xb0,0x06,0xff,0xf3]
+	vqshlu.s64	d16, d16, #63
+@ CHECK: vqshl.s8	q8, q8, #7      @ encoding: [0x70,0x07,0xcf,0xf2]
+	vqshl.s8	q8, q8, #7
+@ CHECK: vqshl.s16	q8, q8, #15     @ encoding: [0x70,0x07,0xdf,0xf2]
+	vqshl.s16	q8, q8, #15
+@ CHECK: vqshl.s32	q8, q8, #31     @ encoding: [0x70,0x07,0xff,0xf2]
+	vqshl.s32	q8, q8, #31
+@ CHECK: vqshl.s64	q8, q8, #63     @ encoding: [0xf0,0x07,0xff,0xf2]
+	vqshl.s64	q8, q8, #63
+@ CHECK: vqshl.u8	q8, q8, #7      @ encoding: [0x70,0x07,0xcf,0xf3]
+	vqshl.u8	q8, q8, #7
+@ CHECK: vqshl.u16	q8, q8, #15     @ encoding: [0x70,0x07,0xdf,0xf3]
+	vqshl.u16	q8, q8, #15
+@ CHECK: vqshl.u32	q8, q8, #31     @ encoding: [0x70,0x07,0xff,0xf3]
+	vqshl.u32	q8, q8, #31
+@ CHECK: vqshl.u64	q8, q8, #63     @ encoding: [0xf0,0x07,0xff,0xf3]
+	vqshl.u64	q8, q8, #63
+@ CHECK: vqshlu.s8	q8, q8, #7      @ encoding: [0x70,0x06,0xcf,0xf3]
+	vqshlu.s8	q8, q8, #7
+@ CHECK: vqshlu.s16	q8, q8, #15     @ encoding: [0x70,0x06,0xdf,0xf3]
+	vqshlu.s16	q8, q8, #15
+@ CHECK: vqshlu.s32	q8, q8, #31     @ encoding: [0x70,0x06,0xff,0xf3]
+	vqshlu.s32	q8, q8, #31
+@ CHECK: vqshlu.s64	q8, q8, #63     @ encoding: [0xf0,0x06,0xff,0xf3]
+	vqshlu.s64	q8, q8, #63
+@ CHECK:   vqrshl.s8	d16, d16, d17   @ encoding: [0xb0,0x05,0x41,0xf2]
+	vqrshl.s8	d16, d16, d17
+@ CHECK: vqrshl.s16	d16, d16, d17   @ encoding: [0xb0,0x05,0x51,0xf2]
+	vqrshl.s16	d16, d16, d17
+@ CHECK: vqrshl.s32	d16, d16, d17   @ encoding: [0xb0,0x05,0x61,0xf2]
+	vqrshl.s32	d16, d16, d17
+@ CHECK: vqrshl.s64	d16, d16, d17   @ encoding: [0xb0,0x05,0x71,0xf2]
+	vqrshl.s64	d16, d16, d17
+@ CHECK: vqrshl.u8	d16, d16, d17   @ encoding: [0xb0,0x05,0x41,0xf3]
+	vqrshl.u8	d16, d16, d17
+@ CHECK: vqrshl.u16	d16, d16, d17   @ encoding: [0xb0,0x05,0x51,0xf3]
+	vqrshl.u16	d16, d16, d17
+@ CHECK: vqrshl.u32	d16, d16, d17   @ encoding: [0xb0,0x05,0x61,0xf3]
+	vqrshl.u32	d16, d16, d17
+@ CHECK: vqrshl.u64	d16, d16, d17   @ encoding: [0xb0,0x05,0x71,0xf3]
+	vqrshl.u64	d16, d16, d17
+@ CHECK: vqrshl.s8	q8, q8, q9      @ encoding: [0xf0,0x05,0x42,0xf2]
+	vqrshl.s8	q8, q8, q9
+@ CHECK: vqrshl.s16	q8, q8, q9      @ encoding: [0xf0,0x05,0x52,0xf2]
+	vqrshl.s16	q8, q8, q9
+@ CHECK: vqrshl.s32	q8, q8, q9      @ encoding: [0xf0,0x05,0x62,0xf2]
+	vqrshl.s32	q8, q8, q9
+@ CHECK: vqrshl.s64	q8, q8, q9      @ encoding: [0xf0,0x05,0x72,0xf2]
+	vqrshl.s64	q8, q8, q9
+@ CHECK: vqrshl.u8	q8, q8, q9      @ encoding: [0xf0,0x05,0x42,0xf3]
+	vqrshl.u8	q8, q8, q9
+@ CHECK: vqrshl.u16	q8, q8, q9      @ encoding: [0xf0,0x05,0x52,0xf3]
+	vqrshl.u16	q8, q8, q9
+@ CHECK: vqrshl.u32	q8, q8, q9      @ encoding: [0xf0,0x05,0x62,0xf3]
+	vqrshl.u32	q8, q8, q9
+@ CHECK: vqrshl.u64	q8, q8, q9      @ encoding: [0xf0,0x05,0x72,0xf3]
+	vqrshl.u64	q8, q8, q9
+@ CHECK: vqshrn.s16	d16, q8, #8     @ encoding: [0x30,0x09,0xc8,0xf2]
+	vqshrn.s16	d16, q8, #8
+@ CHECK: vqshrn.s32	d16, q8, #16    @ encoding: [0x30,0x09,0xd0,0xf2]
+	vqshrn.s32	d16, q8, #16
+@ CHECK: vqshrn.s64	d16, q8, #32    @ encoding: [0x30,0x09,0xe0,0xf2]
+	vqshrn.s64	d16, q8, #32
+@ CHECK: vqshrn.u16	d16, q8, #8     @ encoding: [0x30,0x09,0xc8,0xf3]
+	vqshrn.u16	d16, q8, #8
+@ CHECK: vqshrn.u32	d16, q8, #16    @ encoding: [0x30,0x09,0xd0,0xf3]
+	vqshrn.u32	d16, q8, #16
+@ CHECK: vqshrn.u64	d16, q8, #32    @ encoding: [0x30,0x09,0xe0,0xf3]
+	vqshrn.u64	d16, q8, #32
+@ CHECK: vqshrun.s16	d16, q8, #8     @ encoding: [0x30,0x08,0xc8,0xf3]
+	vqshrun.s16	d16, q8, #8
+@ CHECK: vqshrun.s32	d16, q8, #16    @ encoding: [0x30,0x08,0xd0,0xf3]
+	vqshrun.s32	d16, q8, #16
+@ CHECK: vqshrun.s64	d16, q8, #32    @ encoding: [0x30,0x08,0xe0,0xf3]
+	vqshrun.s64	d16, q8, #32
+@ CHECK: vqrshrn.s16	d16, q8, #8     @ encoding: [0x70,0x09,0xc8,0xf2]
+	vqrshrn.s16	d16, q8, #8
+@ CHECK: vqrshrn.s32	d16, q8, #16    @ encoding: [0x70,0x09,0xd0,0xf2]
+	vqrshrn.s32	d16, q8, #16
+@ CHECK: vqrshrn.s64	d16, q8, #32    @ encoding: [0x70,0x09,0xe0,0xf2]
+	vqrshrn.s64	d16, q8, #32
+@ CHECK: vqrshrn.u16	d16, q8, #8     @ encoding: [0x70,0x09,0xc8,0xf3]
+	vqrshrn.u16	d16, q8, #8
+@ CHECK: vqrshrn.u32	d16, q8, #16    @ encoding: [0x70,0x09,0xd0,0xf3]
+	vqrshrn.u32	d16, q8, #16
+@ CHECK: vqrshrn.u64	d16, q8, #32    @ encoding: [0x70,0x09,0xe0,0xf3]
+	vqrshrn.u64	d16, q8, #32
+@ CHECK: vqrshrun.s16	d16, q8, #8     @ encoding: [0x70,0x08,0xc8,0xf3]
+	vqrshrun.s16	d16, q8, #8
+@ CHECK: vqrshrun.s32	d16, q8, #16    @ encoding: [0x70,0x08,0xd0,0xf3]
+	vqrshrun.s32	d16, q8, #16
+@ CHECK: vqrshrun.s64	d16, q8, #32    @ encoding: [0x70,0x08,0xe0,0xf3]
+	vqrshrun.s64	d16, q8, #32
diff --git a/test/MC/ARM/neon-shift-encoding.s b/test/MC/ARM/neon-shift-encoding.s
new file mode 100644
index 000000000000..4b4fa0876ccd
--- /dev/null
+++ b/test/MC/ARM/neon-shift-encoding.s
@@ -0,0 +1,160 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vshl.u8	d16, d17, d16           @ encoding: [0xa1,0x04,0x40,0xf3]
+	vshl.u8	d16, d17, d16
+@ CHECK: vshl.u16	d16, d17, d16   @ encoding: [0xa1,0x04,0x50,0xf3]
+	vshl.u16	d16, d17, d16
+@ CHECK: vshl.u32	d16, d17, d16   @ encoding: [0xa1,0x04,0x60,0xf3]
+	vshl.u32	d16, d17, d16
+@ CHECK: vshl.u64	d16, d17, d16   @ encoding: [0xa1,0x04,0x70,0xf3]
+	vshl.u64	d16, d17, d16
+@ CHECK: vshl.i8	d16, d16, #7            @ encoding: [0x30,0x05,0xcf,0xf2]
+	vshl.i8	d16, d16, #7
+@ CHECK: vshl.i16	d16, d16, #15   @ encoding: [0x30,0x05,0xdf,0xf2]
+	vshl.i16	d16, d16, #15
+@ CHECK: vshl.i32	d16, d16, #31   @ encoding: [0x30,0x05,0xff,0xf2]
+	vshl.i32	d16, d16, #31
+@ CHECK: vshl.i64	d16, d16, #63   @ encoding: [0xb0,0x05,0xff,0xf2]
+	vshl.i64	d16, d16, #63
+@ CHECK: vshl.u8	q8, q9, q8              @ encoding: [0xe2,0x04,0x40,0xf3]
+	vshl.u8	q8, q9, q8
+@ CHECK: vshl.u16	q8, q9, q8      @ encoding: [0xe2,0x04,0x50,0xf3]
+	vshl.u16	q8, q9, q8
+@ CHECK: vshl.u32	q8, q9, q8      @ encoding: [0xe2,0x04,0x60,0xf3]
+	vshl.u32	q8, q9, q8
+@ CHECK: vshl.u64	q8, q9, q8      @ encoding: [0xe2,0x04,0x70,0xf3]
+	vshl.u64	q8, q9, q8
+@ CHECK: vshl.i8	q8, q8, #7              @ encoding: [0x70,0x05,0xcf,0xf2]
+	vshl.i8	q8, q8, #7
+@ CHECK: vshl.i16	q8, q8, #15     @ encoding: [0x70,0x05,0xdf,0xf2]
+	vshl.i16	q8, q8, #15
+@ CHECK: vshl.i32	q8, q8, #31     @ encoding: [0x70,0x05,0xff,0xf2]
+	vshl.i32	q8, q8, #31
+@ CHECK: vshl.i64	q8, q8, #63     @ encoding: [0xf0,0x05,0xff,0xf2]
+	vshl.i64	q8, q8, #63
+@ CHECK: vshr.u8	d16, d16, #8            @ encoding: [0x30,0x00,0xc8,0xf3]
+	vshr.u8	d16, d16, #8
+@ CHECK: vshr.u16	d16, d16, #16   @ encoding: [0x30,0x00,0xd0,0xf3]
+	vshr.u16	d16, d16, #16
+@ CHECK: vshr.u32	d16, d16, #32   @ encoding: [0x30,0x00,0xe0,0xf3]
+	vshr.u32	d16, d16, #32
+@ CHECK: vshr.u64	d16, d16, #64   @ encoding: [0xb0,0x00,0xc0,0xf3]
+	vshr.u64	d16, d16, #64
+@ CHECK: vshr.u8	q8, q8, #8              @ encoding: [0x70,0x00,0xc8,0xf3]
+	vshr.u8	q8, q8, #8
+@ CHECK: vshr.u16	q8, q8, #16     @ encoding: [0x70,0x00,0xd0,0xf3]
+	vshr.u16	q8, q8, #16
+@ CHECK: vshr.u32	q8, q8, #32     @ encoding: [0x70,0x00,0xe0,0xf3]
+	vshr.u32	q8, q8, #32
+@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf3]
+	vshr.u64	q8, q8, #64
+@ CHECK: vshr.s8	d16, d16, #8            @ encoding: [0x30,0x00,0xc8,0xf2]
+	vshr.s8	d16, d16, #8
+@ CHECK: vshr.s16	d16, d16, #16   @ encoding: [0x30,0x00,0xd0,0xf2]
+	vshr.s16	d16, d16, #16
+@ CHECK: vshr.s32	d16, d16, #32   @ encoding: [0x30,0x00,0xe0,0xf2]
+	vshr.s32	d16, d16, #32
+@ CHECK: vshr.s64	d16, d16, #64   @ encoding: [0xb0,0x00,0xc0,0xf2]
+	vshr.s64	d16, d16, #64
+@ CHECK: vshr.s8	q8, q8, #8              @ encoding: [0x70,0x00,0xc8,0xf2]
+	vshr.s8	q8, q8, #8
+@ CHECK: vshr.s16	q8, q8, #16     @ encoding: [0x70,0x00,0xd0,0xf2]
+	vshr.s16	q8, q8, #16
+@ CHECK: vshr.s32	q8, q8, #32     @ encoding: [0x70,0x00,0xe0,0xf2
+	vshr.s32	q8, q8, #32
+@ CHECK: vshr.s64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf2]
+	vshr.s64	q8, q8, #64
+@ CHECK: vshll.s8	q8, d16, #7     @ encoding: [0x30,0x0a,0xcf,0xf2]
+	vshll.s8	q8, d16, #7
+@ CHECK: vshll.s16	q8, d16, #15    @ encoding: [0x30,0x0a,0xdf,0xf2]
+	vshll.s16	q8, d16, #15
+@ CHECK: vshll.s32	q8, d16, #31    @ encoding: [0x30,0x0a,0xff,0xf2]
+	vshll.s32	q8, d16, #31
+@ CHECK: vshll.u8	q8, d16, #7     @ encoding: [0x30,0x0a,0xcf,0xf3]
+	vshll.u8	q8, d16, #7
+@ CHECK: vshll.u16	q8, d16, #15    @ encoding: [0x30,0x0a,0xdf,0xf3]
+	vshll.u16	q8, d16, #15
+@ CHECK: vshll.u32	q8, d16, #31    @ encoding: [0x30,0x0a,0xff,0xf3]
+	vshll.u32	q8, d16, #31
+@ CHECK: vshll.i8	q8, d16, #8     @ encoding: [0x20,0x03,0xf2,0xf3]
+	vshll.i8	q8, d16, #8
+@ CHECK: vshll.i16	q8, d16, #16    @ encoding: [0x20,0x03,0xf6,0xf3]
+	vshll.i16	q8, d16, #16
+@ CHECK: vshll.i32	q8, d16, #32    @ encoding: [0x20,0x03,0xfa,0xf3]
+	vshll.i32	q8, d16, #32
+@ CHECK: vshrn.i16	d16, q8, #8     @ encoding: [0x30,0x08,0xc8,0xf2]
+	vshrn.i16	d16, q8, #8
+@ CHECK: vshrn.i32	d16, q8, #16    @ encoding: [0x30,0x08,0xd0,0xf2]
+	vshrn.i32	d16, q8, #16
+@ CHECK: vshrn.i64	d16, q8, #32    @ encoding: [0x30,0x08,0xe0,0xf2]
+	vshrn.i64	d16, q8, #32
+@ CHECK: vrshl.s8	d16, d17, d16   @ encoding: [0xa1,0x05,0x40,0xf2]
+	vrshl.s8	d16, d17, d16
+@ CHECK: vrshl.s16	d16, d17, d16   @ encoding: [0xa1,0x05,0x50,0xf2]
+	vrshl.s16	d16, d17, d16
+@ CHECK: vrshl.s32	d16, d17, d16   @ encoding: [0xa1,0x05,0x60,0xf2]
+	vrshl.s32	d16, d17, d16
+@ CHECK: vrshl.s64	d16, d17, d16   @ encoding: [0xa1,0x05,0x70,0
+	vrshl.s64	d16, d17, d16
+@ CHECK: vrshl.u8	d16, d17, d16   @ encoding: [0xa1,0x05,0x40,0xf3]
+	vrshl.u8	d16, d17, d16
+@ CHECK: vrshl.u16	d16, d17, d16   @ encoding: [0xa1,0x05,0x50,0xf3]
+	vrshl.u16	d16, d17, d16
+@ CHECK: vrshl.u32	d16, d17, d16   @ encoding: [0xa1,0x05,0x60,0xf3]
+	vrshl.u32	d16, d17, d16
+@ CHECK: vrshl.u64	d16, d17, d16   @ encoding: [0xa1,0x05,0x70,0xf3]
+	vrshl.u64	d16, d17, d16
+@ CHECK: vrshl.s8	q8, q9, q8      @ encoding: [0xe2,0x05,0x40,0xf2]
+	vrshl.s8	q8, q9, q8
+@ CHECK: vrshl.s16	q8, q9, q8      @ encoding: [0xe2,0x05,0x50,0xf2]
+	vrshl.s16	q8, q9, q8
+@ CHECK: vrshl.s32	q8, q9, q8      @ encoding: [0xe2,0x05,0x60,0xf2]
+	vrshl.s32	q8, q9, q8
+@ CHECK: vrshl.s64	q8, q9, q8      @ encoding: [0xe2,0x05,0x70,0xf2]
+	vrshl.s64	q8, q9, q8
+@ CHECK: vrshl.u8	q8, q9, q8      @ encoding: [0xe2,0x05,0x40,0xf3]
+	vrshl.u8	q8, q9, q8
+@ CHECK: vrshl.u16	q8, q9, q8      @ encoding: [0xe2,0x05,0x50,0xf3]
+	vrshl.u16	q8, q9, q8
+@ CHECK: vrshl.u32	q8, q9, q8      @ encoding: [0xe2,0x05,0x60,0xf3]
+	vrshl.u32	q8, q9, q8
+@ CHECK: vrshl.u64	q8, q9, q8      @ encoding: [0xe2,0x05,0x70,0xf3]
+	vrshl.u64	q8, q9, q8
+@ CHECK: vrshr.s8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf2]
+	vrshr.s8	d16, d16, #8
+@ CHECK: vrshr.s16	d16, d16, #16   @ encoding: [0x30,0x02,0xd0,0xf2]
+	vrshr.s16	d16, d16, #16
+@ CHECK: vrshr.s32	d16, d16, #32   @ encoding: [0x30,0x02,0xe0,0xf2]
+	vrshr.s32	d16, d16, #32
+@ CHECK: vrshr.s64	d16, d16, #64   @ encoding: [0xb0,0x02,0xc0,0xf2]
+	vrshr.s64	d16, d16, #64
+@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf3]
+	vrshr.u8	d16, d16, #8
+@ CHECK: vrshr.u16	d16, d16, #16   @ encoding: [0x30,0x02,0xd0,0xf3]
+	vrshr.u16	d16, d16, #16
+@ CHECK: vrshr.u32	d16, d16, #32   @ encoding: [0x30,0x02,0xe0,0xf3]
+	vrshr.u32	d16, d16, #32
+@ CHECK: vrshr.u64	d16, d16, #64   @ encoding: [0xb0,0x02,0xc0,0xf3]
+	vrshr.u64	d16, d16, #64
+@ CHECK: vrshr.s8	q8, q8, #8      @ encoding: [0x70,0x02,0xc8,0xf2]
+	vrshr.s8	q8, q8, #8
+@ CHECK: vrshr.s16	q8, q8, #16     @ encoding: [0x70,0x02,0xd0,0xf2]
+	vrshr.s16	q8, q8, #16
+@ CHECK: vrshr.s32	q8, q8, #32     @ encoding: [0x70,0x02,0xe0,0xf2]
+	vrshr.s32	q8, q8, #32
+@ CHECK: vrshr.s64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf2]
+	vrshr.s64	q8, q8, #64
+@ CHECK: vrshr.u8	q8, q8, #8      @ encoding: [0x70,0x02,0xc8,0xf3]
+	vrshr.u8	q8, q8, #8
+@ CHECK: vrshr.u16	q8, q8, #16     @ encoding: [0x70,0x02,0xd0,0xf3]
+	vrshr.u16	q8, q8, #16
+@ CHECK: vrshr.u32	q8, q8, #32     @ encoding: [0x70,0x02,0xe0,0xf3]
+	vrshr.u32	q8, q8, #32
+@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf3]
+	vrshr.u64	q8, q8, #64
+@ CHECK: vrshrn.i16	d16, q8, #8     @ encoding: [0x70,0x08,0xc8,0xf2]
+	vrshrn.i16	d16, q8, #8
+@ CHECK: vrshrn.i32	d16, q8, #16    @ encoding: [0x70,0x08,0xd0,0xf2]
+	vrshrn.i32	d16, q8, #16
+@ CHECK: vrshrn.i64	d16, q8, #32    @ encoding: [0x70,0x08,0xe0,0xf2]
+	vrshrn.i64	d16, q8, #32
diff --git a/test/MC/ARM/neon-shiftaccum-encoding.s b/test/MC/ARM/neon-shiftaccum-encoding.s
new file mode 100644
index 000000000000..0dc630d395dc
--- /dev/null
+++ b/test/MC/ARM/neon-shiftaccum-encoding.s
@@ -0,0 +1,98 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf2]
+	vsra.s8	d17, d16, #8
+@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf2]
+	vsra.s16	d17, d16, #16
+@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf2]
+	vsra.s32	d17, d16, #32
+@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf2]
+	vsra.s64	d17, d16, #64
+@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf2]
+	vsra.s8	q8, q9, #8
+@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf2]
+	vsra.s16	q8, q9, #16
+@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf2]
+	vsra.s32	q8, q9, #32
+@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf2]
+	vsra.s64	q8, q9, #64
+@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf3]
+	vsra.u8	d17, d16, #8
+@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf3]
+	vsra.u16	d17, d16, #16
+@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf3]
+	vsra.u32	d17, d16, #32
+@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf3]
+	vsra.u64	d17, d16, #64
+@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf3]
+	vsra.u8	q8, q9, #8
+@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf3]
+	vsra.u16	q8, q9, #16
+@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf3]
+	vsra.u32	q8, q9, #32
+@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf3]
+	vsra.u64	q8, q9, #64
+@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf2]
+	vrsra.s8	d17, d16, #8
+@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf2]
+	vrsra.s16	d17, d16, #16
+@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf2]
+	vrsra.s32	d17, d16, #32
+@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf2]
+	vrsra.s64	d17, d16, #64
+@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf3]
+	vrsra.u8	d17, d16, #8
+@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf3]
+	vrsra.u16	d17, d16, #16
+@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf3]
+	vrsra.u32	d17, d16, #32
+@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf3]
+	vrsra.u64	d17, d16, #64
+@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf2]
+	vrsra.s8	q8, q9, #8
+@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf2]
+	vrsra.s16	q8, q9, #16
+@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf2]
+	vrsra.s32	q8, q9, #32
+@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf2]
+	vrsra.s64	q8, q9, #64
+@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf3]
+	vrsra.u8	q8, q9, #8
+@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf3]
+	vrsra.u16	q8, q9, #16
+@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf3]
+	vrsra.u32	q8, q9, #32
+@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf3]
+	vrsra.u64	q8, q9, #64
+@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0x30,0x15,0xcf,0xf3]
+	vsli.8	d17, d16, #7
+@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0x30,0x15,0xdf,0xf3]
+	vsli.16	d17, d16, #15
+@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0x30,0x15,0xff,0xf3]
+	vsli.32	d17, d16, #31
+@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xb0,0x15,0xff,0xf3]
+	vsli.64	d17, d16, #63
+@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0x70,0x25,0xcf,0xf3]
+	vsli.8	q9, q8, #7
+@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0x70,0x25,0xdf,0xf3]
+	vsli.16	q9, q8, #15
+@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0x70,0x25,0xff,0xf3]
+	vsli.32	q9, q8, #31
+@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xf0,0x25,0xff,0xf3]
+	vsli.64	q9, q8, #63
+@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0x30,0x14,0xc8,0xf3]
+	vsri.8	d17, d16, #8
+@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0x30,0x14,0xd0,0xf3]
+	vsri.16	d17, d16, #16
+@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0x30,0x14,0xe0,0xf3]
+	vsri.32	d17, d16, #32
+@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xb0,0x14,0xc0,0xf3]
+	vsri.64	d17, d16, #64
+@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0x70,0x24,0xc8,0xf3]
+	vsri.8	q9, q8, #8
+@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0x70,0x24,0xd0,0xf3]
+	vsri.16	q9, q8, #16
+@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0x70,0x24,0xe0,0xf3]
+	vsri.32	q9, q8, #32
+@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xf0,0x24,0xc0,0xf3]
+	vsri.64	q9, q8, #64
diff --git a/test/MC/ARM/neon-shuffle-encoding.s b/test/MC/ARM/neon-shuffle-encoding.s
new file mode 100644
index 000000000000..ce7eb66a08a7
--- /dev/null
+++ b/test/MC/ARM/neon-shuffle-encoding.s
@@ -0,0 +1,46 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
+	vext.8	d16, d17, d16, #3
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
+	vext.8	d16, d17, d16, #5
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
+	vext.8	q8, q9, q8, #3
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
+	vext.8	q8, q9, q8, #7
+@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xa0,0x06,0xf1,0xf2]
+	vext.16	d16, d17, d16, #3
+@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xe0,0x0c,0xf2,0xf2]
+	vext.32	q8, q9, q8, #3
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+	vtrn.8	d17, d16
+@ CHECK: vtrn.16	d17, d16                @ encoding: [0xa0,0x10,0xf6,0xf3]
+	vtrn.16	d17, d16
+@ CHECK: vtrn.32	d17, d16                @ encoding: [0xa0,0x10,0xfa,0xf3]
+	vtrn.32	d17, d16
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
+	vtrn.8	q9, q8
+@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xe0,0x20,0xf6,0xf3]
+	vtrn.16	q9, q8
+@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xe0,0x20,0xfa,0xf3]
+	vtrn.32	q9, q8
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+	vuzp.8	d17, d16
+@ CHECK: vuzp.16	d17, d16                @ encoding: [0x20,0x11,0xf6,0xf3]
+	vuzp.16	d17, d16
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
+	vuzp.8	q9, q8
+@ CHECK: vuzp.16	q9, q8                  @ encoding: [0x60,0x21,0xf6,0xf3]
+	vuzp.16	q9, q8
+@ CHECK: vuzp.32	q9, q8                  @ encoding: [0x60,0x21,0xfa,0xf3]
+	vuzp.32	q9, q8
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
+	vzip.8	d17, d16
+@ CHECK: vzip.16	d17, d16                @ encoding: [0xa0,0x11,0xf6,0xf3]
+	vzip.16	d17, d16
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
+	vzip.8	q9, q8
+@ CHECK: vzip.16	q9, q8                  @ encoding: [0xe0,0x21,0xf6,0xf3]
+	vzip.16	q9, q8
+@ CHECK: vzip.32	q9, q8                  @ encoding: [0xe0,0x21,0xfa,0xf3]
+	vzip.32	q9, q8
diff --git a/test/MC/ARM/neon-sub-encoding.s b/test/MC/ARM/neon-sub-encoding.s
new file mode 100644
index 000000000000..241a01ffd4d4
--- /dev/null
+++ b/test/MC/ARM/neon-sub-encoding.s
@@ -0,0 +1,108 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vsub.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf3]
+	vsub.i8	d16, d17, d16
+@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
+	vsub.i16	d16, d17, d16
+@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
+	vsub.i32	d16, d17, d16
+@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
+	vsub.i64	d16, d17, d16
+@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
+	vsub.f32	d16, d16, d17
+@ CHECK: vsub.i8	q8, q8, q9              @ encoding: [0xe2,0x08,0x40,0xf3]
+	vsub.i8	q8, q8, q9
+@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
+	vsub.i16	q8, q8, q9
+@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
+	vsub.i32	q8, q8, q9
+@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
+	vsub.i64	q8, q8, q9
+@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
+	vsub.f32	q8, q8, q9
+@ CHECK: vsubl.s8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf2]
+	vsubl.s8	q8, d17, d16
+@ CHECK: vsubl.s16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf2]
+	vsubl.s16	q8, d17, d16
+@ CHECK: vsubl.s32	q8, d17, d16    @ encoding: [0xa0,0x02,0xe1,0xf2]
+	vsubl.s32	q8, d17, d16
+@ CHECK: vsubl.u8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf3]
+	vsubl.u8	q8, d17, d16
+@ CHECK: vsubl.u16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf3]
+	vsubl.u16	q8, d17, d16
+@ CHECK: vsubl.u32	q8, d17, d16    @ encoding: [0xa0,0x02,0xe1,0xf3]
+	vsubl.u32	q8, d17, d16
+@ CHECK: vsubw.s8	q8, q8, d18     @ encoding: [0xa2,0x03,0xc0,0xf2]
+	vsubw.s8	q8, q8, d18
+@ CHECK: vsubw.s16	q8, q8, d18     @ encoding: [0xa2,0x03,0xd0,0xf2]
+	vsubw.s16	q8, q8, d18
+@ CHECK: vsubw.s32	q8, q8, d18     @ encoding: [0xa2,0x03,0xe0,0xf2]
+	vsubw.s32	q8, q8, d18
+@ CHECK: vsubw.u8	q8, q8, d18     @ encoding: [0xa2,0x03,0xc0,0xf3]
+	vsubw.u8	q8, q8, d18
+@ CHECK: vsubw.u16	q8, q8, d18     @ encoding: [0xa2,0x03,0xd0,0xf3]
+	vsubw.u16	q8, q8, d18
+@ CHECK: vsubw.u32	q8, q8, d18     @ encoding: [0xa2,0x03,0xe0,0xf3]
+	vsubw.u32	q8, q8, d18
+@ CHECK: vhsub.s8	d16, d16, d17   @ encoding: [0xa1,0x02,0x40,0xf2]
+	vhsub.s8	d16, d16, d17
+@ CHECK: vhsub.s16	d16, d16, d17   @ encoding: [0xa1,0x02,0x50,0xf2]
+	vhsub.s16	d16, d16, d17
+@ CHECK: vhsub.s32	d16, d16, d17   @ encoding: [0xa1,0x02,0x60,0xf2]
+	vhsub.s32	d16, d16, d17
+@ CHECK: vhsub.u8	d16, d16, d17   @ encoding: [0xa1,0x02,0x40,0xf3]
+	vhsub.u8	d16, d16, d17
+@ CHECK: vhsub.u16	d16, d16, d17   @ encoding: [0xa1,0x02,0x50,0xf3]
+	vhsub.u16	d16, d16, d17
+@ CHECK: vhsub.u32	d16, d16, d17   @ encoding: [0xa1,0x02,0x60,0xf3]
+	vhsub.u32	d16, d16, d17
+@ CHECK: vhsub.s8	q8, q8, q9      @ encoding: [0xe2,0x02,0x40,0xf2]
+	vhsub.s8	q8, q8, q9
+@ CHECK: vhsub.s16	q8, q8, q9      @ encoding: [0xe2,0x02,0x50,0xf2]
+	vhsub.s16	q8, q8, q9
+@ CHECK: vhsub.s32	q8, q8, q9      @ encoding: [0xe2,0x02,0x60,0xf2]
+	vhsub.s32	q8, q8, q9
+@ CHECK: vqsub.s8	d16, d16, d17   @ encoding: [0xb1,0x02,0x40,0xf2]
+	vqsub.s8	d16, d16, d17
+@ CHECK: vqsub.s16	d16, d16, d17   @ encoding: [0xb1,0x02,0x50,0xf2]
+	vqsub.s16	d16, d16, d17
+@ CHECK: vqsub.s32	d16, d16, d17   @ encoding: [0xb1,0x02,0x60,0xf2]
+	vqsub.s32	d16, d16, d17
+@ CHECK: vqsub.s64	d16, d16, d17   @ encoding: [0xb1,0x02,0x70,0xf2]
+	vqsub.s64	d16, d16, d17
+@ CHECK: vqsub.u8	d16, d16, d17   @ encoding: [0xb1,0x02,0x40,0xf3]
+	vqsub.u8	d16, d16, d17
+@ CHECK: vqsub.u16	d16, d16, d17   @ encoding: [0xb1,0x02,0x50,0xf3]
+	vqsub.u16	d16, d16, d17
+@ CHECK: vqsub.u32	d16, d16, d17   @ encoding: [0xb1,0x02,0x60,0xf3]
+	vqsub.u32	d16, d16, d17
+@ CHECK: vqsub.u64	d16, d16, d17   @ encoding: [0xb1,0x02,0x70,0xf3]
+	vqsub.u64	d16, d16, d17
+@ CHECK: vqsub.s8	q8, q8, q9      @ encoding: [0xf2,0x02,0x40,0xf2]
+	vqsub.s8	q8, q8, q9
+@ CHECK: vqsub.s16	q8, q8, q9      @ encoding: [0xf2,0x02,0x50,0xf2]
+	vqsub.s16	q8, q8, q9
+@ CHECK: vqsub.s32	q8, q8, q9      @ encoding: [0xf2,0x02,0x60,0xf2]
+	vqsub.s32	q8, q8, q9
+@ CHECK: vqsub.s64	q8, q8, q9      @ encoding: [0xf2,0x02,0x70,0xf2]
+	vqsub.s64	q8, q8, q9
+@ CHECK: vqsub.u8	q8, q8, q9      @ encoding: [0xf2,0x02,0x40,0xf3]
+	vqsub.u8	q8, q8, q9
+@ CHECK: vqsub.u16	q8, q8, q9      @ encoding: [0xf2,0x02,0x50,0xf3]
+	vqsub.u16	q8, q8, q9
+@ CHECK: vqsub.u32	q8, q8, q9      @ encoding: [0xf2,0x02,0x60,0xf3]
+	vqsub.u32	q8, q8, q9
+@ CHECK: vqsub.u64	q8, q8, q9      @ encoding: [0xf2,0x02,0x70,0xf3]
+	vqsub.u64	q8, q8, q9
+@ CHECK: vsubhn.i16	d16, q8, q9     @ encoding: [0xa2,0x06,0xc0,0xf2]
+	vsubhn.i16	d16, q8, q9
+@ CHECK: vsubhn.i32	d16, q8, q9     @ encoding: [0xa2,0x06,0xd0,0xf2]
+	vsubhn.i32	d16, q8, q9
+@ CHECK: vsubhn.i64	d16, q8, q9     @ encoding: [0xa2,0x06,0xe0,0xf2]
+	vsubhn.i64	d16, q8, q9
+@ CHECK: vrsubhn.i16	d16, q8, q9     @ encoding: [0xa2,0x06,0xc0,0xf3]
+	vrsubhn.i16	d16, q8, q9
+@ CHECK: vrsubhn.i32	d16, q8, q9     @ encoding: [0xa2,0x06,0xd0,0xf3]
+	vrsubhn.i32	d16, q8, q9
+@ CHECK: vrsubhn.i64	d16, q8, q9     @ encoding: [0xa2,0x06,0xe0,0xf3]
+	vrsubhn.i64	d16, q8, q9
diff --git a/test/MC/ARM/neon-table-encoding.s b/test/MC/ARM/neon-table-encoding.s
new file mode 100644
index 000000000000..7bf47c782f8a
--- /dev/null
+++ b/test/MC/ARM/neon-table-encoding.s
@@ -0,0 +1,19 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
+	vtbl.8	d16, {d17}, d16
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
+	vtbl.8	d16, {d16, d17}, d18
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
+	vtbl.8	d16, {d16, d17, d18}, d20
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
+	vtbl.8	d16, {d16, d17, d18, d19}, d20
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
+	vtbx.8	d18, {d16}, d17
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
+	vtbx.8	d19, {d16, d17}, d18
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
+	vtbx.8	d20, {d16, d17, d18}, d21
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
+	vtbx.8	d20, {d16, d17, d18, d19}, d21
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
new file mode 100644
index 000000000000..be55f47900c6
--- /dev/null
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -0,0 +1,110 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
+	vld1.8	{d16}, [r0, :64]
+@ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf4]
+  vld1.16	{d16}, [r0]
+@ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf4]
+  vld1.32	{d16}, [r0]
+@ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf4]
+  vld1.64	{d16}, [r0]
+@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
+  vld1.8	{d16, d17}, [r0, :64]
+@ CHECK: vld1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x60,0xf4]
+  vld1.16	{d16, d17}, [r0, :128]
+@ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf4]
+  vld1.32	{d16, d17}, [r0]
+@ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf4]
+  vld1.64	{d16, d17}, [r0]
+
+@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
+  vld2.8	{d16, d17}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x60,0xf4]
+  vld2.16	{d16, d17}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf4]
+  vld2.32	{d16, d17}, [r0]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
+  vld2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
+  vld2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
+  vld2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4]
+  vld3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf4]
+  vld3.16	{d16, d17, d18}, [r0]
+@ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf4]
+  vld3.32	{d16, d17, d18}, [r0]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
+  vld3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4]
+  vld3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf4] 
+  vld3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf4]
+  vld3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vld3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x60,0xf4]
+  vld3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf4]
+  vld3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf4]
+  vld4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf4]
+  vld4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf4]
+  vld4.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf4]
+  vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf4]
+  vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4]
+  vld4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4]
+  vld4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vld4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4]
+  vld4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
+  vld4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
+  vld1.8	{d16[3]}, [r0]
+@ CHECK: vld1.16	{d16[2]}, [r0, :16]     @ encoding: [0x9f,0x04,0xe0,0xf4]
+  vld1.16	{d16[2]}, [r0, :16]
+@ CHECK: vld1.32	{d16[1]}, [r0, :32]     @ encoding: [0xbf,0x08,0xe0,0xf4]
+  vld1.32	{d16[1]}, [r0, :32]
+
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
+  vld2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
+  vld2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
+  vld2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
+  vld2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+  vld2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf4]
+  vld3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xe0,0xf4]
+  vld3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xe0,0xf4]
+  vld3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d18[1], d20[1]}, [r0] @ encoding: [0x6f,0x06,0xe0,0xf4]
+  vld3.16	{d16[1], d18[1], d20[1]}, [r0]
+@ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf4]
+  vld3.32	{d17[1], d19[1], d21[1]}, [r0]
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4]
+  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4]
+  vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4]
+  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4]
+  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+@ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4]
+  vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
new file mode 100644
index 000000000000..c595aa2d5a4d
--- /dev/null
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -0,0 +1,101 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
+  vst1.8	{d16}, [r0, :64]
+@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf4]
+  vst1.16	{d16}, [r0]
+@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf4]
+  vst1.32	{d16}, [r0]
+@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf4]
+  vst1.64	{d16}, [r0]
+@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
+  vst1.8	{d16, d17}, [r0, :64]
+@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf4]
+  vst1.16	{d16, d17}, [r0, :128]
+@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf4]
+  vst1.32	{d16, d17}, [r0]
+@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf4]
+  vst1.64	{d16, d17}, [r0]
+
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+  vst2.8	{d16, d17}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf4]
+  vst2.16	{d16, d17}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf4]
+  vst2.32	{d16, d17}, [r0]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
+  vst2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
+  vst2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
+  vst2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf4]
+  vst3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf4]
+  vst3.16	{d16, d17, d18}, [r0]
+@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf4]
+  vst3.32	{d16, d17, d18}, [r0]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
+  vst3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf4]
+  vst3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf4]
+  vst3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf4]
+  vst3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf4]
+  vst3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf4]
+  vst3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf4]
+  vst4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf4]
+  vst4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf4]
+  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf4]
+  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf4]
+  vst4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf4]
+  vst4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf4]
+  vst4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4]
+  vst4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
+  vst2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+  vst2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
+  vst2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+  vst2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+  vst2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4]
+  vst3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf4]
+  vst3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf4]
+  vst3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf4]
+  vst3.16	{d17[2], d19[2], d21[2]}, [r0]
+@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf4]
+  vst3.32	{d16[0], d18[0], d20[0]}, [r0]
+
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4]
+  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4]
+  vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4]
+  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4]
+  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4]
+  vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/neont2-abs-encoding.s b/test/MC/ARM/neont2-abs-encoding.s
new file mode 100644
index 000000000000..5c8bc33d0f81
--- /dev/null
+++ b/test/MC/ARM/neont2-abs-encoding.s
@@ -0,0 +1,33 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vabs.s8	d16, d16                @ encoding: [0xf1,0xff,0x20,0x03]
+	vabs.s8	d16, d16
+@ CHECK: vabs.s16	d16, d16        @ encoding: [0xf5,0xff,0x20,0x03]
+	vabs.s16	d16, d16
+@ CHECK: vabs.s32	d16, d16        @ encoding: [0xf9,0xff,0x20,0x03]
+	vabs.s32	d16, d16
+@ CHECK: vabs.f32	d16, d16        @ encoding: [0xf9,0xff,0x20,0x07]
+	vabs.f32	d16, d16
+@ CHECK: vabs.s8	q8, q8                  @ encoding: [0xf1,0xff,0x60,0x03]
+	vabs.s8	q8, q8
+@ CHECK: vabs.s16	q8, q8          @ encoding: [0xf5,0xff,0x60,0x03]
+	vabs.s16	q8, q8
+@ CHECK: vabs.s32	q8, q8          @ encoding: [0xf9,0xff,0x60,0x03]
+	vabs.s32	q8, q8
+@ CHECK: vabs.f32	q8, q8          @ encoding: [0xf9,0xff,0x60,0x07]
+	vabs.f32	q8, q8
+
+@ CHECK: vqabs.s8	d16, d16        @ encoding: [0xf0,0xff,0x20,0x07]
+	vqabs.s8	d16, d16
+@ CHECK: vqabs.s16	d16, d16        @ encoding: [0xf4,0xff,0x20,0x07]
+	vqabs.s16	d16, d16
+@ CHECK: vqabs.s32	d16, d16        @ encoding: [0xf8,0xff,0x20,0x07]
+	vqabs.s32	d16, d16
+@ CHECK: vqabs.s8	q8, q8          @ encoding: [0xf0,0xff,0x60,0x07]
+	vqabs.s8	q8, q8
+@ CHECK: vqabs.s16	q8, q8          @ encoding: [0xf4,0xff,0x60,0x07]
+	vqabs.s16	q8, q8
+@ CHECK: vqabs.s32	q8, q8          @ encoding: [0xf8,0xff,0x60,0x07]
+	vqabs.s32	q8, q8
diff --git a/test/MC/ARM/neont2-absdiff-encoding.s b/test/MC/ARM/neont2-absdiff-encoding.s
new file mode 100644
index 000000000000..2096357ce8ff
--- /dev/null
+++ b/test/MC/ARM/neont2-absdiff-encoding.s
@@ -0,0 +1,86 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+@ NOTE: This currently fails because the ASM parser doesn't parse vabal.
+
+.code 16
+
+@ CHECK: vabd.s8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xef]
+	vabd.s8	d16, d16, d17
+@ CHECK: vabd.s16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xef]
+	vabd.s16	d16, d16, d17
+@ CHECK: vabd.s32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xef]
+	vabd.s32	d16, d16, d17
+@ CHECK: vabd.u8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xff]
+	vabd.u8	d16, d16, d17
+@ CHECK: vabd.u16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xff]
+	vabd.u16	d16, d16, d17
+  @ CHECK: vabd.u32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xff]
+	vabd.u32	d16, d16, d17
+@ CHECK: vabd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xff]
+	vabd.f32	d16, d16, d17
+@ CHECK: vabd.s8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xef]
+	vabd.s8	q8, q8, q9
+@ CHECK: vabd.s16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xef]
+	vabd.s16	q8, q8, q9
+@ CHECK: vabd.s32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xef]
+	vabd.s32	q8, q8, q9
+@ CHECK: vabd.u8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xff]
+	vabd.u8	q8, q8, q9
+@ CHECK: vabd.u16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xff]
+	vabd.u16	q8, q8, q9
+@ CHECK: vabd.u32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xff]
+	vabd.u32	q8, q8, q9
+@ CHECK: vabd.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xff]
+	vabd.f32	q8, q8, q9
+
+@ CHECK: vabdl.s8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xef]
+	vabdl.s8	q8, d16, d17
+@ CHECK: vabdl.s16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xef]
+	vabdl.s16	q8, d16, d17
+@ CHECK: vabdl.s32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xef]
+	vabdl.s32	q8, d16, d17
+@ CHECK: vabdl.u8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xff]
+	vabdl.u8	q8, d16, d17
+@ CHECK: vabdl.u16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xff]
+	vabdl.u16	q8, d16, d17
+@ CHECK: vabdl.u32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xff]
+	vabdl.u32	q8, d16, d17
+
+@ CHECK: vaba.s8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xef]
+	vaba.s8	d16, d18, d17
+@ CHECK: vaba.s16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xef]
+	vaba.s16	d16, d18, d17
+@ CHECK: vaba.s32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xef]
+	vaba.s32	d16, d18, d17
+@ CHECK: vaba.u8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xff]
+	vaba.u8	d16, d18, d17
+@ CHECK: vaba.u16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xff]
+	vaba.u16	d16, d18, d17
+@ CHECK: vaba.u32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xff]
+	vaba.u32	d16, d18, d17
+@ CHECK: vaba.s8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xef]
+	vaba.s8	q9, q8, q10
+@ CHECK: vaba.s16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xef]
+	vaba.s16	q9, q8, q10
+@ CHECK: vaba.s32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xef]
+	vaba.s32	q9, q8, q10
+@ CHECK: vaba.u8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xff]
+	vaba.u8	q9, q8, q10
+@ CHECK: vaba.u16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xff]
+	vaba.u16	q9, q8, q10
+@ CHECK: vaba.u32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xff]
+	vaba.u32	q9, q8, q10
+
+@ CHECK: vabal.s8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xef]
+	vabal.s8	q8, d19, d18
+@ CHECK: vabal.s16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xef]
+	vabal.s16	q8, d19, d18
+@ CHECK: vabal.s32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xef]
+	vabal.s32	q8, d19, d18
+@ CHECK: vabal.u8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xff]
+	vabal.u8	q8, d19, d18
+@ CHECK: 	vabal.u16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xff]
+	vabal.u16	q8, d19, d18
+@ CHECK: vabal.u32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xff]
+	vabal.u32	q8, d19, d18
+
diff --git a/test/MC/ARM/neont2-add-encoding.s b/test/MC/ARM/neont2-add-encoding.s
new file mode 100644
index 000000000000..c384d76dbea2
--- /dev/null
+++ b/test/MC/ARM/neont2-add-encoding.s
@@ -0,0 +1,138 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vadd.i8	d16, d17, d16           @ encoding: [0x41,0xef,0xa0,0x08]
+	vadd.i8	d16, d17, d16
+@ CHECK: vadd.i16	d16, d17, d16   @ encoding: [0x51,0xef,0xa0,0x08]
+	vadd.i16	d16, d17, d16
+@ CHECK: vadd.i64	d16, d17, d16   @ encoding: [0x71,0xef,0xa0,0x08]
+	vadd.i64	d16, d17, d16
+@ CHECK: vadd.i32	d16, d17, d16   @ encoding: [0x61,0xef,0xa0,0x08]
+	vadd.i32	d16, d17, d16
+@ CHECK: vadd.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0d]
+	vadd.f32	d16, d16, d17
+@ CHECK: vadd.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0d]
+	vadd.f32	q8, q8, q9
+
+@ CHECK: vaddl.s8	q8, d17, d16    @ encoding: [0xc1,0xef,0xa0,0x00]
+	vaddl.s8	q8, d17, d16
+@ CHECK: vaddl.s16	q8, d17, d16    @ encoding: [0xd1,0xef,0xa0,0x00]
+	vaddl.s16	q8, d17, d16
+@ CHECK: vaddl.s32	q8, d17, d16    @ encoding: [0xe1,0xef,0xa0,0x00]
+	vaddl.s32	q8, d17, d16
+@ CHECK: vaddl.u8	q8, d17, d16    @ encoding: [0xc1,0xff,0xa0,0x00]
+	vaddl.u8	q8, d17, d16
+@ CHECK: vaddl.u16	q8, d17, d16    @ encoding: [0xd1,0xff,0xa0,0x00]
+	vaddl.u16	q8, d17, d16
+@ CHECK: vaddl.u32	q8, d17, d16    @ encoding: [0xe1,0xff,0xa0,0x00]
+	vaddl.u32	q8, d17, d16
+
+@ CHECK: vaddw.s8	q8, q8, d18     @ encoding: [0xc0,0xef,0xa2,0x01]
+	vaddw.s8	q8, q8, d18
+@ CHECK: vaddw.s16	q8, q8, d18     @ encoding: [0xd0,0xef,0xa2,0x01]
+	vaddw.s16	q8, q8, d18
+@ CHECK: vaddw.s32	q8, q8, d18     @ encoding: [0xe0,0xef,0xa2,0x01]
+	vaddw.s32	q8, q8, d18
+@ CHECK: vaddw.u8	q8, q8, d18     @ encoding: [0xc0,0xff,0xa2,0x01]
+	vaddw.u8	q8, q8, d18
+@ CHECK: vaddw.u16	q8, q8, d18     @ encoding: [0xd0,0xff,0xa2,0x01]
+	vaddw.u16	q8, q8, d18
+@ CHECK: vaddw.u32	q8, q8, d18     @ encoding: [0xe0,0xff,0xa2,0x01]
+	vaddw.u32	q8, q8, d18
+
+@ CHECK: vhadd.s8	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x00]
+	vhadd.s8	d16, d16, d17
+@ CHECK: vhadd.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x00]
+	vhadd.s16	d16, d16, d17
+@ CHECK: vhadd.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x00]
+	vhadd.s32	d16, d16, d17
+@ CHECK: vhadd.u8	d16, d16, d17   @ encoding: [0x40,0xff,0xa1,0x00]
+	vhadd.u8	d16, d16, d17
+@ CHECK: vhadd.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x00]
+	vhadd.u16	d16, d16, d17
+@ CHECK: vhadd.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x00]
+	vhadd.u32	d16, d16, d17
+@ CHECK: vhadd.s8	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x00]
+	vhadd.s8	q8, q8, q9
+@ CHECK: vhadd.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x00]
+	vhadd.s16	q8, q8, q9
+@ CHECK: vhadd.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x00]
+	vhadd.s32	q8, q8, q9
+  @ CHECK: vhadd.u8	q8, q8, q9      @ encoding: [0x40,0xff,0xe2,0x00]
+	vhadd.u8	q8, q8, q9
+@ CHECK: vhadd.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x00]
+	vhadd.u16	q8, q8, q9
+@ CHECK: vhadd.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x00]
+	vhadd.u32	q8, q8, q9
+	
+@ CHECK: vrhadd.s8	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x01]
+	vrhadd.s8	d16, d16, d17
+@ CHECK: vrhadd.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x01]
+	vrhadd.s16	d16, d16, d17
+@ CHECK: vrhadd.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x01]
+	vrhadd.s32	d16, d16, d17
+@ CHECK: vrhadd.u8	d16, d16, d17   @ encoding: [0x40,0xff,0xa1,0x01]
+	vrhadd.u8	d16, d16, d17
+@ CHECK: vrhadd.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x01]
+	vrhadd.u16	d16, d16, d17
+@ CHECK: vrhadd.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x01]
+	vrhadd.u32	d16, d16, d17
+@ CHECK: vrhadd.s8	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x01]
+	vrhadd.s8	q8, q8, q9
+@ CHECK: vrhadd.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x01]
+	vrhadd.s16	q8, q8, q9
+@ CHECK: vrhadd.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x01]
+	vrhadd.s32	q8, q8, q9
+@ CHECK: vrhadd.u8	q8, q8, q9      @ encoding: [0x40,0xff,0xe2,0x01]
+	vrhadd.u8	q8, q8, q9
+@ CHECK: vrhadd.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x01]
+	vrhadd.u16	q8, q8, q9
+@ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x01]
+	vrhadd.u32	q8, q8, q9
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0x40,0xef,0xb1,0x00]
+	vqadd.s8	d16, d16, d17
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x00]
+	vqadd.s16	d16, d16, d17
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x00]
+	vqadd.s32	d16, d16, d17
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0x70,0xef,0xb1,0x00]
+	vqadd.s64	d16, d16, d17
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x00]
+	vqadd.u8	d16, d16, d17
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x00]
+	vqadd.u16	d16, d16, d17
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x00]
+	vqadd.u32	d16, d16, d17
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0x70,0xff,0xb1,0x00]
+	vqadd.u64	d16, d16, d17
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0x40,0xef,0xf2,0x00]
+	vqadd.s8	q8, q8, q9
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x00]
+	vqadd.s16	q8, q8, q9
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x00]
+	vqadd.s32	q8, q8, q9
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0x70,0xef,0xf2,0x00]
+	vqadd.s64	q8, q8, q9
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x00]
+	vqadd.u8	q8, q8, q9
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x00]
+	vqadd.u16	q8, q8, q9
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x00]
+	vqadd.u32	q8, q8, q9
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0x70,0xff,0xf2,0x00]
+	vqadd.u64	q8, q8, q9
+
+@ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xc0,0xef,0xa2,0x04]
+	vaddhn.i16	d16, q8, q9
+@ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xd0,0xef,0xa2,0x04]
+	vaddhn.i32	d16, q8, q9
+@ CHECK: vaddhn.i64	d16, q8, q9     @ encoding: [0xe0,0xef,0xa2,0x04]
+	vaddhn.i64	d16, q8, q9
+@ CHECK: vraddhn.i16	d16, q8, q9     @ encoding: [0xc0,0xff,0xa2,0x04]
+	vraddhn.i16	d16, q8, q9
+@ CHECK: vraddhn.i32	d16, q8, q9     @ encoding: [0xd0,0xff,0xa2,0x04]
+	vraddhn.i32	d16, q8, q9
+@ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xe0,0xff,0xa2,0x04]
+	vraddhn.i64	d16, q8, q9
diff --git a/test/MC/ARM/neont2-bitcount-encoding.s b/test/MC/ARM/neont2-bitcount-encoding.s
new file mode 100644
index 000000000000..4280cbd4a44f
--- /dev/null
+++ b/test/MC/ARM/neont2-bitcount-encoding.s
@@ -0,0 +1,34 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vcnt.8	d16, d16                @ encoding: [0x20,0x05,0xf0,0xff]
+	vcnt.8	d16, d16
+@ CHECK: vcnt.8	q8, q8                  @ encoding: [0x60,0x05,0xf0,0xff]
+	vcnt.8	q8, q8
+@ CHECK: vclz.i8	d16, d16                @ encoding: [0xa0,0x04,0xf0,0xff]
+	vclz.i8	d16, d16
+@ CHECK: vclz.i16	d16, d16        @ encoding: [0xa0,0x04,0xf4,0xff]
+	vclz.i16	d16, d16
+@ CHECK: vclz.i32	d16, d16        @ encoding: [0xa0,0x04,0xf8,0xff]
+	vclz.i32	d16, d16
+@ CHECK: vclz.i8	q8, q8                  @ encoding: [0xe0,0x04,0xf0,0xff]
+	vclz.i8	q8, q8
+@ CHECK: vclz.i16	q8, q8          @ encoding: [0xe0,0x04,0xf4,0xff]
+	vclz.i16	q8, q8
+@ CHECK: vclz.i32	q8, q8          @ encoding: [0xe0,0x04,0xf8,0xff]
+	vclz.i32	q8, q8
+@ CHECK: vcls.s8	d16, d16                @ encoding: [0x20,0x04,0xf0,0xff]
+	vcls.s8	d16, d16
+@ CHECK: vcls.s16	d16, d16        @ encoding: [0x20,0x04,0xf4,0xff]
+	vcls.s16	d16, d16
+@ CHECK: vcls.s32	d16, d16        @ encoding: [0x20,0x04,0xf8,0xff]
+	vcls.s32	d16, d16
+@ CHECK: vcls.s8	q8, q8                  @ encoding: [0x60,0x04,0xf0,0xff]
+	vcls.s8	q8, q8
+@ CHECK: vcls.s16	q8, q8          @ encoding: [0x60,0x04,0xf4,0xff]
+	vcls.s16	q8, q8
+@ CHECK: vcls.s32	q8, q8          @ encoding: [0x60,0x04,0xf8,0xff]
+	vcls.s32	q8, q8
+
diff --git a/test/MC/ARM/neont2-bitwise-encoding.s b/test/MC/ARM/neont2-bitwise-encoding.s
new file mode 100644
index 000000000000..3acd7a8c9911
--- /dev/null
+++ b/test/MC/ARM/neont2-bitwise-encoding.s
@@ -0,0 +1,49 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vand	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xef]
+	vand	d16, d17, d16
+@ CHECK: vand	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xef]
+	vand	q8, q8, q9
+
+@ CHECK: veor	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xff]
+	veor	d16, d17, d16
+@ CHECK: veor	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xff]
+	veor	q8, q8, q9
+
+@ CHECK: vorr	d16, d17, d16           @ encoding: [0xb0,0x01,0x61,0xef]
+	vorr	d16, d17, d16
+@ CHECK: vorr	q8, q8, q9              @ encoding: [0xf2,0x01,0x60,0xef]
+	vorr	q8, q8, q9
+@ CHECK: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xef]
+  vorr.i32	d16, #0x1000000
+@ CHECK: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xef]
+  vorr.i32	q8, #0x1000000
+@ CHECK: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xef]
+  vorr.i32	q8, #0x0
+
+@ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xef]
+	vbic	d16, d17, d16
+@ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xef]
+	vbic	q8, q8, q9
+@ CHECK: vbic.i32	d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xff]
+  vbic.i32	d16, #0xFF000000
+@ CHECK: vbic.i32	q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xff]
+  vbic.i32	q8, #0xFF000000
+
+@ CHECK: vorn	d16, d17, d16           @ encoding: [0xb0,0x01,0x71,0xef]
+	vorn	d16, d17, d16
+@ CHECK: vorn	q8, q8, q9              @ encoding: [0xf2,0x01,0x70,0xef]
+	vorn	q8, q8, q9
+
+@ CHECK: vmvn	d16, d16                @ encoding: [0xa0,0x05,0xf0,0xff]
+	vmvn	d16, d16
+@ CHECK: vmvn	q8, q8                  @ encoding: [0xe0,0x05,0xf0,0xff]
+	vmvn	q8, q8
+
+@ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xff]
+	vbsl	d18, d17, d16
+@ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xff]
+	vbsl	q8, q10, q9
diff --git a/test/MC/ARM/neont2-cmp-encoding.s b/test/MC/ARM/neont2-cmp-encoding.s
new file mode 100644
index 000000000000..1dbd42a3946c
--- /dev/null
+++ b/test/MC/ARM/neont2-cmp-encoding.s
@@ -0,0 +1,36 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x07]
+	vcvt.s32.f32	d16, d16
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x07]
+	vcvt.u32.f32	d16, d16
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x06]
+	vcvt.f32.s32	d16, d16
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x06]
+	vcvt.f32.u32	d16, d16
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x07]
+	vcvt.s32.f32	q8, q8
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x07]
+	vcvt.u32.f32	q8, q8
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x06]
+	vcvt.f32.s32	q8, q8
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x06]
+	vcvt.f32.u32	q8, q8
+@ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0f]
+	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0f]
+	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0e]
+	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0e]
+	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0f]
+	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0f]
+	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0e]
+	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0e]
+	vcvt.f32.u32	q8, q8, #1
diff --git a/test/MC/ARM/neont2-convert-encoding.s b/test/MC/ARM/neont2-convert-encoding.s
new file mode 100644
index 000000000000..1df3b43f305f
--- /dev/null
+++ b/test/MC/ARM/neont2-convert-encoding.s
@@ -0,0 +1,40 @@
+@ RUN: llvm-mc -mcpu=cortex-a9 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x07]
+	vcvt.s32.f32	d16, d16
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x07]
+	vcvt.u32.f32	d16, d16
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x06]
+	vcvt.f32.s32	d16, d16
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x06]
+	vcvt.f32.u32	d16, d16
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x07]
+	vcvt.s32.f32	q8, q8
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x07]
+	vcvt.u32.f32	q8, q8
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x06]
+	vcvt.f32.s32	q8, q8
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x06]
+	vcvt.f32.u32	q8, q8
+@ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0f]
+	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0f]
+	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0e]
+	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0e]
+	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0f]
+	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0f]
+	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0e]
+	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0e]
+	vcvt.f32.u32	q8, q8, #1
+@ CHECK: vcvt.f32.f16	q8, d16         @ encoding: [0xf6,0xff,0x20,0x07]
+	vcvt.f32.f16	q8, d16
+@ CHECK: vcvt.f16.f32	d16, q8         @ encoding: [0xf6,0xff,0x20,0x06]
+	vcvt.f16.f32	d16, q8
diff --git a/test/MC/ARM/neont2-dup-encoding.s b/test/MC/ARM/neont2-dup-encoding.s
new file mode 100644
index 000000000000..da6e78f56012
--- /dev/null
+++ b/test/MC/ARM/neont2-dup-encoding.s
@@ -0,0 +1,29 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vdup.8	d16, r0                 @ encoding: [0x90,0x0b,0xc0,0xee]
+	vdup.8	d16, r0
+@ CHECK: vdup.16	d16, r0                 @ encoding: [0xb0,0x0b,0x80,0xee]
+	vdup.16	d16, r0
+@ CHECK: vdup.32	d16, r0                 @ encoding: [0x90,0x0b,0x80,0xee]
+	vdup.32	d16, r0
+@ CHECK: vdup.8	q8, r0                  @ encoding: [0x90,0x0b,0xe0,0xee]
+	vdup.8	q8, r0
+@ CHECK: vdup.16	q8, r0                  @ encoding: [0xb0,0x0b,0xa0,0xee]
+	vdup.16	q8, r0
+@ CHECK: vdup.32	q8, r0                  @ encoding: [0x90,0x0b,0xa0,0xee]
+	vdup.32	q8, r0
+@ CHECK: vdup.8	d16, d16[1]             @ encoding: [0x20,0x0c,0xf3,0xff]
+	vdup.8	d16, d16[1]
+@ CHECK: vdup.16	d16, d16[1]             @ encoding: [0x20,0x0c,0xf6,0xff]
+	vdup.16	d16, d16[1]
+@ CHECK: vdup.32	d16, d16[1]             @ encoding: [0x20,0x0c,0xfc,0xff]
+	vdup.32	d16, d16[1]
+@ CHECK: vdup.8	q8, d16[1]              @ encoding: [0x60,0x0c,0xf3,0xff]
+	vdup.8	q8, d16[1]
+@ CHECK: vdup.16	q8, d16[1]              @ encoding: [0x60,0x0c,0xf6,0xff]
+	vdup.16	q8, d16[1]
+@ CHECK: vdup.32	q8, d16[1]              @ encoding: [0x60,0x0c,0xfc,0xff]
+	vdup.32	q8, d16[1]
diff --git a/test/MC/ARM/neont2-minmax-encoding.s b/test/MC/ARM/neont2-minmax-encoding.s
new file mode 100644
index 000000000000..7e86d45bb14a
--- /dev/null
+++ b/test/MC/ARM/neont2-minmax-encoding.s
@@ -0,0 +1,60 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x06]
+	vmin.s8	d16, d16, d17
+@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x06]
+	vmin.s16	d16, d16, d17
+@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x06]
+	vmin.s32	d16, d16, d17
+@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x06]
+	vmin.u8	d16, d16, d17
+@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x06]
+	vmin.u16	d16, d16, d17
+@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x06]
+	vmin.u32	d16, d16, d17
+@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0f]
+	vmin.f32	d16, d16, d17
+@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x06]
+	vmin.s8	q8, q8, q9
+@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x06]
+	vmin.s16	q8, q8, q9
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
+	vmin.s32	q8, q8, q9
+@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x06]
+	vmin.u8	q8, q8, q9
+@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x06]
+	vmin.u16	q8, q8, q9
+@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x06]
+	vmin.u32	q8, q8, q9
+@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0f]
+	vmin.f32	q8, q8, q9
+@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xa1,0x06]
+	vmax.s8	d16, d16, d17
+@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x06]
+	vmax.s16	d16, d16, d17
+@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x06]
+	vmax.s32	d16, d16, d17
+@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xa1,0x06]
+	vmax.u8	d16, d16, d17
+@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x06]
+	vmax.u16	d16, d16, d17
+@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x06]
+	vmax.u32	d16, d16, d17
+@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0f]
+	vmax.f32	d16, d16, d17
+@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xe2,0x06]
+	vmax.s8	q8, q8, q9
+@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x06]
+	vmax.s16	q8, q8, q9
+@ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x06]
+	vmax.s32	q8, q8, q9
+@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xe2,0x06]
+	vmax.u8	q8, q8, q9
+@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x06]
+	vmax.u16	q8, q8, q9
+@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x06]
+	vmax.u32	q8, q8, q9
+@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0f]
+	vmax.f32	q8, q8, q9
diff --git a/test/MC/ARM/neont2-mov-encoding.s b/test/MC/ARM/neont2-mov-encoding.s
new file mode 100644
index 000000000000..ababbb795729
--- /dev/null
+++ b/test/MC/ARM/neont2-mov-encoding.s
@@ -0,0 +1,119 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vmov.i8	d16, #0x8               @ encoding: [0x18,0x0e,0xc0,0xef]
+	vmov.i8	d16, #0x8
+@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0x10,0x08,0xc1,0xef]
+	vmov.i16	d16, #0x10
+@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0x10,0x0a,0xc1,0xef]
+	vmov.i16	d16, #0x1000
+@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0x10,0x00,0xc2,0xef]
+	vmov.i32	d16, #0x20
+@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xef]
+	vmov.i32	d16, #0x2000
+@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xef]
+	vmov.i32	d16, #0x200000
+@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xef]
+	vmov.i32	d16, #0x20000000
+@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xef]
+	vmov.i32	d16, #0x20FF
+@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xef]
+	vmov.i32	d16, #0x20FFFF
+@ CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xff]
+	vmov.i64	d16, #0xFF0000FF0000FFFF
+@ CHECK: vmov.i8	q8, #0x8                @ encoding: [0x58,0x0e,0xc0,0xef]
+	vmov.i8	q8, #0x8
+@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0x50,0x08,0xc1,0xef]
+	vmov.i16	q8, #0x10
+@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0x50,0x0a,0xc1,0xef]
+	vmov.i16	q8, #0x1000
+@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0x50,0x00,0xc2,0xef]
+	vmov.i32	q8, #0x20
+@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xef]
+	vmov.i32	q8, #0x2000
+@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xef]
+	vmov.i32	q8, #0x200000
+@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xef]
+	vmov.i32	q8, #0x20000000
+@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xef]
+	vmov.i32	q8, #0x20FF
+@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xef]
+	vmov.i32	q8, #0x20FFFF
+@ CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xff]
+	vmov.i64	q8, #0xFF0000FF0000FFFF
+@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0x30,0x08,0xc1,0xef]
+	vmvn.i16	d16, #0x10
+@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0x30,0x0a,0xc1,0xef]
+	vmvn.i16	d16, #0x1000
+@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0x30,0x00,0xc2,0xef]
+	vmvn.i32	d16, #0x20
+@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xef]
+	vmvn.i32	d16, #0x2000
+@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xef]
+	vmvn.i32	d16, #0x200000
+@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xef]
+	vmvn.i32	d16, #0x20000000
+@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xef]
+	vmvn.i32	d16, #0x20FF
+@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xef]
+	vmvn.i32	d16, #0x20FFFF
+@ CHECK: vmovl.s8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xef]
+	vmovl.s8	q8, d16
+@ CHECK: vmovl.s16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xef]
+	vmovl.s16	q8, d16
+@ CHECK: vmovl.s32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xef]
+	vmovl.s32	q8, d16
+@ CHECK: vmovl.u8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xff]
+	vmovl.u8	q8, d16
+@ CHECK: vmovl.u16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xff]
+	vmovl.u16	q8, d16
+@ CHECK: vmovl.u32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xff]
+	vmovl.u32	q8, d16
+@ CHECK: vmovn.i16	d16, q8         @ encoding: [0x20,0x02,0xf2,0xff]
+	vmovn.i16	d16, q8
+@ CHECK: vmovn.i32	d16, q8         @ encoding: [0x20,0x02,0xf6,0xff]
+	vmovn.i32	d16, q8
+@ CHECK: vmovn.i64	d16, q8         @ encoding: [0x20,0x02,0xfa,0xff]
+	vmovn.i64	d16, q8
+@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xa0,0x02,0xf2,0xff]
+	vqmovn.s16	d16, q8
+@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xa0,0x02,0xf6,0xff]
+	vqmovn.s32	d16, q8
+@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xa0,0x02,0xfa,0xff]
+	vqmovn.s64	d16, q8
+@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xe0,0x02,0xf2,0xff]
+	vqmovn.u16	d16, q8
+@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xe0,0x02,0xf6,0xff]
+	vqmovn.u32	d16, q8
+@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xe0,0x02,0xfa,0xff]
+	vqmovn.u64	d16, q8
+@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0x60,0x02,0xf2,0xff]
+	vqmovun.s16	d16, q8
+@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0x60,0x02,0xf6,0xff]
+	vqmovun.s32	d16, q8
+@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0x60,0x02,0xfa,0xff]
+	vqmovun.s64	d16, q8
+@ CHECK: vmov.s8	r0, d16[1]              @ encoding: [0xb0,0x0b,0x50,0xee]
+	vmov.s8	r0, d16[1]
+@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x10,0xee]
+	vmov.s16	r0, d16[1]
+@ CHECK: vmov.u8	r0, d16[1]              @ encoding: [0xb0,0x0b,0xd0,0xee]
+	vmov.u8	r0, d16[1]
+@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x90,0xee]
+	vmov.u16	r0, d16[1]
+@ CHECK: vmov.32	r0, d16[1]              @ encoding: [0x90,0x0b,0x30,0xee]
+	vmov.32	r0, d16[1]
+@ CHECK: vmov.8	d16[1], r1              @ encoding: [0xb0,0x1b,0x40,0xee]
+	vmov.8	d16[1], r1
+@ CHECK: vmov.16	d16[1], r1              @ encoding: [0xf0,0x1b,0x00,0xee]
+	vmov.16	d16[1], r1
+@ CHECK: vmov.32	d16[1], r1              @ encoding: [0x90,0x1b,0x20,0xee]
+	vmov.32	d16[1], r1
+@ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
+	vmov.8	d18[1], r1
+@ CHECK: vmov.16	d18[1], r1              @ encoding: [0xf0,0x1b,0x02,0xee]
+	vmov.16	d18[1], r1
+@ CHECK: vmov.32	d18[1], r1              @ encoding: [0x90,0x1b,0x22,0xee]
+	vmov.32	d18[1], r1
diff --git a/test/MC/ARM/neont2-mul-accum-encoding.s b/test/MC/ARM/neont2-mul-accum-encoding.s
new file mode 100644
index 000000000000..e21c67d2e8d6
--- /dev/null
+++ b/test/MC/ARM/neont2-mul-accum-encoding.s
@@ -0,0 +1,69 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vmla.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xef]
+	vmla.i8	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xef]
+	vmla.i16	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xef]
+	vmla.i32	d16, d18, d17
+@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xef]
+	vmla.f32	d16, d18, d17
+@ CHECK: vmla.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xef]
+	vmla.i8	q9, q8, q10
+@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xef]
+	vmla.i16	q9, q8, q10
+@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xef]
+	vmla.i32	q9, q8, q10
+@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xef]
+	vmla.f32	q9, q8, q10
+@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xef]
+	vmlal.s8	q8, d19, d18
+@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xef]
+	vmlal.s16	q8, d19, d18
+@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xef]
+	vmlal.s32	q8, d19, d18
+@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xff]
+	vmlal.u8	q8, d19, d18
+@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xff]
+	vmlal.u16	q8, d19, d18
+@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xff]
+	vmlal.u32	q8, d19, d18
+@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xef]
+	vqdmlal.s16	q8, d19, d18
+@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xef]
+	vqdmlal.s32	q8, d19, d18
+@ CHECK: vmls.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xff]
+	vmls.i8	d16, d18, d17
+@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xff]
+	vmls.i16	d16, d18, d17
+@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xff]
+	vmls.i32	d16, d18, d17
+@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xef]
+	vmls.f32	d16, d18, d17
+@ CHECK: vmls.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xff]
+	vmls.i8	q9, q8, q10
+@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xff]
+	vmls.i16	q9, q8, q10
+@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xff]
+	vmls.i32	q9, q8, q10
+@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xef]
+	vmls.f32	q9, q8, q10
+@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xef]
+	vmlsl.s8	q8, d19, d18
+@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xef]
+	vmlsl.s16	q8, d19, d18
+@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xef]
+	vmlsl.s32	q8, d19, d18
+@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xff]
+	vmlsl.u8	q8, d19, d18
+@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xff]
+	vmlsl.u16	q8, d19, d18
+@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xff]
+	vmlsl.u32	q8, d19, d18
+@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xef]
+	vqdmlsl.s16	q8, d19, d18
+@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xef]
+	vqdmlsl.s32	q8, d19, d18
diff --git a/test/MC/ARM/neont2-mul-encoding.s b/test/MC/ARM/neont2-mul-encoding.s
new file mode 100644
index 000000000000..93ecabb50bb3
--- /dev/null
+++ b/test/MC/ARM/neont2-mul-encoding.s
@@ -0,0 +1,58 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vmul.i8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x09]
+	vmul.i8	d16, d16, d17
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x09]
+	vmul.i16	d16, d16, d17
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x09]
+	vmul.i32	d16, d16, d17
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x0d]
+	vmul.f32	d16, d16, d17
+@ CHECK: vmul.i8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x09]
+	vmul.i8	q8, q8, q9
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x09]
+	vmul.i16	q8, q8, q9
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x09]
+	vmul.i32	q8, q8, q9
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x0d]
+	vmul.f32	q8, q8, q9
+@ CHECK: vmul.p8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x09]
+	vmul.p8	d16, d16, d17
+@ CHECK: vmul.p8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x09]
+	vmul.p8	q8, q8, q9
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x0b]
+	vqdmulh.s16	d16, d16, d17
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0b]
+	vqdmulh.s32	d16, d16, d17
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x0b]
+	vqdmulh.s16	q8, q8, q9
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0b]
+	vqdmulh.s32	q8, q8, q9
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x0b]
+	vqrdmulh.s16	d16, d16, d17
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x0b]
+	vqrdmulh.s32	d16, d16, d17
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x0b]
+	vqrdmulh.s16	q8, q8, q9
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x0b]
+	vqrdmulh.s32	q8, q8, q9
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0c]
+	vmull.s8	q8, d16, d17
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0c]
+	vmull.s16	q8, d16, d17
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0c]
+	vmull.s32	q8, d16, d17
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xc0,0xff,0xa1,0x0c]
+	vmull.u8	q8, d16, d17
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xd0,0xff,0xa1,0x0c]
+	vmull.u16	q8, d16, d17
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xe0,0xff,0xa1,0x0c]
+	vmull.u32	q8, d16, d17
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0e]
+	vmull.p8	q8, d16, d17
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
+	vqdmull.s16	q8, d16, d17
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
+	vqdmull.s32	q8, d16, d17
diff --git a/test/MC/ARM/neont2-neg-encoding.s b/test/MC/ARM/neont2-neg-encoding.s
new file mode 100644
index 000000000000..21dab65cc968
--- /dev/null
+++ b/test/MC/ARM/neont2-neg-encoding.s
@@ -0,0 +1,32 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vneg.s8	d16, d16                @ encoding: [0xf1,0xff,0xa0,0x03]
+	vneg.s8	d16, d16
+@ CHECK: vneg.s16	d16, d16        @ encoding: [0xf5,0xff,0xa0,0x03]
+	vneg.s16	d16, d16
+@ CHECK: vneg.s32	d16, d16        @ encoding: [0xf9,0xff,0xa0,0x03]
+	vneg.s32	d16, d16
+@ CHECK: vneg.f32	d16, d16        @ encoding: [0xf9,0xff,0xa0,0x07]
+	vneg.f32	d16, d16
+@ CHECK: vneg.s8	q8, q8                  @ encoding: [0xf1,0xff,0xe0,0x03]
+	vneg.s8	q8, q8
+@ CHECK: vneg.s16	q8, q8          @ encoding: [0xf5,0xff,0xe0,0x03]
+	vneg.s16	q8, q8
+@ CHECK: vneg.s32	q8, q8          @ encoding: [0xf9,0xff,0xe0,0x03]
+	vneg.s32	q8, q8
+@ CHECK: vneg.f32	q8, q8          @ encoding: [0xf9,0xff,0xe0,0x07]
+	vneg.f32	q8, q8
+@ CHECK: vqneg.s8	d16, d16        @ encoding: [0xf0,0xff,0xa0,0x07]
+	vqneg.s8	d16, d16
+@ CHECK: vqneg.s16	d16, d16        @ encoding: [0xf4,0xff,0xa0,0x07]
+	vqneg.s16	d16, d16
+@ CHECK: vqneg.s32	d16, d16        @ encoding: [0xf8,0xff,0xa0,0x07]
+	vqneg.s32	d16, d16
+@ CHECK: vqneg.s8	q8, q8          @ encoding: [0xf0,0xff,0xe0,0x07]
+	vqneg.s8	q8, q8
+@ CHECK: vqneg.s16	q8, q8          @ encoding: [0xf4,0xff,0xe0,0x07]
+	vqneg.s16	q8, q8
+@ CHECK: vqneg.s32	q8, q8          @ encoding: [0xf8,0xff,0xe0,0x07]
+	vqneg.s32	q8, q8
diff --git a/test/MC/ARM/neont2-pairwise-encoding.s b/test/MC/ARM/neont2-pairwise-encoding.s
new file mode 100644
index 000000000000..ef9092214cf2
--- /dev/null
+++ b/test/MC/ARM/neont2-pairwise-encoding.s
@@ -0,0 +1,89 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vpadd.i8	d16, d17, d16   @ encoding: [0xb0,0x0b,0x41,0xef]
+	vpadd.i8	d16, d17, d16
+@ CHECK: vpadd.i16	d16, d17, d16   @ encoding: [0xb0,0x0b,0x51,0xef]
+	vpadd.i16	d16, d17, d16
+@ CHECK: vpadd.i32	d16, d17, d16   @ encoding: [0xb0,0x0b,0x61,0xef]
+	vpadd.i32	d16, d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xff]
+	vpadd.f32	d16, d16, d17
+@ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xff]
+	vpaddl.s8	d16, d16
+@ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xff]
+	vpaddl.s16	d16, d16
+@ CHECK: vpaddl.s32	d16, d16        @ encoding: [0x20,0x02,0xf8,0xff]
+	vpaddl.s32	d16, d16
+@ CHECK: vpaddl.u8	d16, d16        @ encoding: [0xa0,0x02,0xf0,0xff]
+	vpaddl.u8	d16, d16
+@ CHECK: vpaddl.u16	d16, d16        @ encoding: [0xa0,0x02,0xf4,0xff]
+	vpaddl.u16	d16, d16
+@ CHECK: vpaddl.u32	d16, d16        @ encoding: [0xa0,0x02,0xf8,0xff]
+	vpaddl.u32	d16, d16
+@ CHECK: vpaddl.s8	q8, q8          @ encoding: [0x60,0x02,0xf0,0xff]
+	vpaddl.s8	q8, q8
+@ CHECK: vpaddl.s16	q8, q8          @ encoding: [0x60,0x02,0xf4,0xff]
+	vpaddl.s16	q8, q8
+@ CHECK: vpaddl.s32	q8, q8          @ encoding: [0x60,0x02,0xf8,0xff]
+	vpaddl.s32	q8, q8
+@ CHECK: vpaddl.u8	q8, q8          @ encoding: [0xe0,0x02,0xf0,0xff]
+	vpaddl.u8	q8, q8
+@ CHECK: vpaddl.u16	q8, q8          @ encoding: [0xe0,0x02,0xf4,0xff]
+	vpaddl.u16	q8, q8
+@ CHECK: vpaddl.u32	q8, q8          @ encoding: [0xe0,0x02,0xf8,0xff]
+	vpaddl.u32	q8, q8
+@ CHECK: vpadal.s8	d16, d17        @ encoding: [0x21,0x06,0xf0,0xff]
+	vpadal.s8	d16, d17
+@ CHECK: vpadal.s16	d16, d17        @ encoding: [0x21,0x06,0xf4,0xff]
+	vpadal.s16	d16, d17
+@ CHECK: vpadal.s32	d16, d17        @ encoding: [0x21,0x06,0xf8,0xff]
+	vpadal.s32	d16, d17
+@ CHECK: vpadal.u8	d16, d17        @ encoding: [0xa1,0x06,0xf0,0xff]
+	vpadal.u8	d16, d17
+@ CHECK: vpadal.u16	d16, d17        @ encoding: [0xa1,0x06,0xf4,0xff]
+	vpadal.u16	d16, d17
+@ CHECK: vpadal.u32	d16, d17        @ encoding: [0xa1,0x06,0xf8,0xff]
+	vpadal.u32	d16, d17
+@ CHECK: vpadal.s8	q9, q8          @ encoding: [0x60,0x26,0xf0,0xff]
+	vpadal.s8	q9, q8
+@ CHECK: vpadal.s16	q9, q8          @ encoding: [0x60,0x26,0xf4,0xff]
+	vpadal.s16	q9, q8
+@ CHECK: vpadal.s32	q9, q8          @ encoding: [0x60,0x26,0xf8,0xff]
+	vpadal.s32	q9, q8
+@ CHECK: vpadal.u8	q9, q8          @ encoding: [0xe0,0x26,0xf0,0xff]
+	vpadal.u8	q9, q8
+@ CHECK: vpadal.u16	q9, q8          @ encoding: [0xe0,0x26,0xf4,0xff]
+	vpadal.u16	q9, q8
+@ CHECK: vpadal.u32	q9, q8          @ encoding: [0xe0,0x26,0xf8,0xff]
+	vpadal.u32	q9, q8
+@ CHECK: vpmin.s8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xef]
+	vpmin.s8	d16, d16, d17
+@ CHECK: vpmin.s16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xef]
+	vpmin.s16	d16, d16, d17
+@ CHECK: vpmin.s32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xef]
+	vpmin.s32	d16, d16, d17
+@ CHECK: vpmin.u8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xff]
+	vpmin.u8	d16, d16, d17
+@ CHECK: vpmin.u16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xff]
+	vpmin.u16	d16, d16, d17
+@ CHECK: vpmin.u32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xff]
+	vpmin.u32	d16, d16, d17
+@ CHECK: vpmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xff]
+	vpmin.f32	d16, d16, d17
+@ CHECK: vpmax.s8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xef]
+	vpmax.s8	d16, d16, d17
+@ CHECK: vpmax.s16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xef]
+	vpmax.s16	d16, d16, d17
+@ CHECK: vpmax.s32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xef]
+	vpmax.s32	d16, d16, d17
+@ CHECK: vpmax.u8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xff]
+	vpmax.u8	d16, d16, d17
+@ CHECK: vpmax.u16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xff]
+	vpmax.u16	d16, d16, d17
+@ CHECK: vpmax.u32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xff]
+	vpmax.u32	d16, d16, d17
+@ CHECK: vpmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xff]
+	vpmax.f32	d16, d16, d17
diff --git a/test/MC/ARM/neont2-reciprocal-encoding.s b/test/MC/ARM/neont2-reciprocal-encoding.s
new file mode 100644
index 000000000000..8ea77d78c339
--- /dev/null
+++ b/test/MC/ARM/neont2-reciprocal-encoding.s
@@ -0,0 +1,28 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vrecpe.u32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x04]
+	vrecpe.u32	d16, d16
+@ CHECK: vrecpe.u32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x04]
+	vrecpe.u32	q8, q8
+@ CHECK: vrecpe.f32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x05]
+	vrecpe.f32	d16, d16
+@ CHECK: vrecpe.f32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x05]
+	vrecpe.f32	q8, q8
+@ CHECK: vrecps.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xb1,0x0f]
+	vrecps.f32	d16, d16, d17
+@ CHECK: vrecps.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xf2,0x0f]
+	vrecps.f32	q8, q8, q9
+@ CHECK: vrsqrte.u32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x04]
+	vrsqrte.u32	d16, d16
+@ CHECK: vrsqrte.u32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x04]
+	vrsqrte.u32	q8, q8
+@ CHECK: vrsqrte.f32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x05]
+	vrsqrte.f32	d16, d16
+@ CHECK: vrsqrte.f32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x05]
+	vrsqrte.f32	q8, q8
+@ CHECK: vrsqrts.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x0f]
+	vrsqrts.f32	d16, d16, d17
+@ CHECK: vrsqrts.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x0f]
+	vrsqrts.f32	q8, q8, q9
diff --git a/test/MC/ARM/neont2-reverse-encoding.s b/test/MC/ARM/neont2-reverse-encoding.s
new file mode 100644
index 000000000000..f37d72da11af
--- /dev/null
+++ b/test/MC/ARM/neont2-reverse-encoding.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vrev64.8	d16, d16        @ encoding: [0xf0,0xff,0x20,0x00]
+	vrev64.8	d16, d16
+@ CHECK: vrev64.16	d16, d16        @ encoding: [0xf4,0xff,0x20,0x00]
+	vrev64.16	d16, d16
+@ CHECK: vrev64.32	d16, d16        @ encoding: [0xf8,0xff,0x20,0x00]
+	vrev64.32	d16, d16
+@ CHECK: vrev64.8	q8, q8          @ encoding: [0xf0,0xff,0x60,0x00]
+	vrev64.8	q8, q8
+@ CHECK: vrev64.16	q8, q8          @ encoding: [0xf4,0xff,0x60,0x00]
+	vrev64.16	q8, q8
+@ CHECK: vrev64.32	q8, q8          @ encoding: [0xf8,0xff,0x60,0x00]
+	vrev64.32	q8, q8
+@ CHECK: vrev32.8	d16, d16        @ encoding: [0xf0,0xff,0xa0,0x00]
+	vrev32.8	d16, d16
+@ CHECK: vrev32.16	d16, d16        @ encoding: [0xf4,0xff,0xa0,0x00]
+	vrev32.16	d16, d16
+@ CHECK: vrev32.8	q8, q8          @ encoding: [0xf0,0xff,0xe0,0x00]
+	vrev32.8	q8, q8
+@ CHECK: vrev32.16	q8, q8          @ encoding: [0xf4,0xff,0xe0,0x00]
+	vrev32.16	q8, q8
+@ CHECK: vrev16.8	d16, d16        @ encoding: [0xf0,0xff,0x20,0x01]
+	vrev16.8	d16, d16
+@ CHECK: vrev16.8	q8, q8          @ encoding: [0xf0,0xff,0x60,0x01]
+	vrev16.8	q8, q8
diff --git a/test/MC/ARM/neont2-satshift-encoding.s b/test/MC/ARM/neont2-satshift-encoding.s
new file mode 100644
index 000000000000..34e50f1e29e4
--- /dev/null
+++ b/test/MC/ARM/neont2-satshift-encoding.s
@@ -0,0 +1,152 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vqshl.s8	d16, d16, d17   @ encoding: [0x41,0xef,0xb0,0x04]
+	vqshl.s8	d16, d16, d17
+@ CHECK: vqshl.s16	d16, d16, d17   @ encoding: [0x51,0xef,0xb0,0x04]
+	vqshl.s16	d16, d16, d17
+@ CHECK: vqshl.s32	d16, d16, d17   @ encoding: [0x61,0xef,0xb0,0x04]
+	vqshl.s32	d16, d16, d17
+@ CHECK: vqshl.s64	d16, d16, d17   @ encoding: [0x71,0xef,0xb0,0x04]
+	vqshl.s64	d16, d16, d17
+@ CHECK: vqshl.u8	d16, d16, d17   @ encoding: [0x41,0xff,0xb0,0x04]
+	vqshl.u8	d16, d16, d17
+@ CHECK: vqshl.u16	d16, d16, d17   @ encoding: [0x51,0xff,0xb0,0x04]
+	vqshl.u16	d16, d16, d17
+@ CHECK: vqshl.u32	d16, d16, d17   @ encoding: [0x61,0xff,0xb0,0x04]
+	vqshl.u32	d16, d16, d17
+@ CHECK: vqshl.u64	d16, d16, d17   @ encoding: [0x71,0xff,0xb0,0x04]
+	vqshl.u64	d16, d16, d17
+@ CHECK: vqshl.s8	q8, q8, q9      @ encoding: [0x42,0xef,0xf0,0x04]
+	vqshl.s8	q8, q8, q9
+@ CHECK: vqshl.s16	q8, q8, q9      @ encoding: [0x52,0xef,0xf0,0x04]
+	vqshl.s16	q8, q8, q9
+@ CHECK: vqshl.s32	q8, q8, q9      @ encoding: [0x62,0xef,0xf0,0x04]
+	vqshl.s32	q8, q8, q9
+@ CHECK: vqshl.s64	q8, q8, q9      @ encoding: [0x72,0xef,0xf0,0x04]
+	vqshl.s64	q8, q8, q9
+@ CHECK: vqshl.u8	q8, q8, q9      @ encoding: [0x42,0xff,0xf0,0x04]
+	vqshl.u8	q8, q8, q9
+@ CHECK: vqshl.u16	q8, q8, q9      @ encoding: [0x52,0xff,0xf0,0x04]
+	vqshl.u16	q8, q8, q9
+@ CHECK: vqshl.u32	q8, q8, q9      @ encoding: [0x62,0xff,0xf0,0x04]
+	vqshl.u32	q8, q8, q9
+@ CHECK: vqshl.u64	q8, q8, q9      @ encoding: [0x72,0xff,0xf0,0x04]
+	vqshl.u64	q8, q8, q9
+@ CHECK: vqshl.s8	d16, d16, #7    @ encoding: [0xcf,0xef,0x30,0x07]
+	vqshl.s8	d16, d16, #7
+@ CHECK: vqshl.s16	d16, d16, #15   @ encoding: [0xdf,0xef,0x30,0x07]
+	vqshl.s16	d16, d16, #15
+@ CHECK: vqshl.s32	d16, d16, #31   @ encoding: [0xff,0xef,0x30,0x07]
+	vqshl.s32	d16, d16, #31
+@ CHECK: vqshl.s64	d16, d16, #63   @ encoding: [0xff,0xef,0xb0,0x07]
+	vqshl.s64	d16, d16, #63
+@ CHECK: vqshl.u8	d16, d16, #7    @ encoding: [0xcf,0xff,0x30,0x07]
+	vqshl.u8	d16, d16, #7
+@ CHECK: vqshl.u16	d16, d16, #15   @ encoding: [0xdf,0xff,0x30,0x07]
+	vqshl.u16	d16, d16, #15
+@ CHECK: vqshl.u32	d16, d16, #31   @ encoding: [0xff,0xff,0x30,0x07]
+	vqshl.u32	d16, d16, #31
+@ CHECK: vqshl.u64	d16, d16, #63   @ encoding: [0xff,0xff,0xb0,0x07]
+	vqshl.u64	d16, d16, #63
+@ CHECK: vqshlu.s8	d16, d16, #7    @ encoding: [0xcf,0xff,0x30,0x06]
+	vqshlu.s8	d16, d16, #7
+@ CHECK: vqshlu.s16	d16, d16, #15   @ encoding: [0xdf,0xff,0x30,0x06]
+	vqshlu.s16	d16, d16, #15
+@ CHECK: vqshlu.s32	d16, d16, #31   @ encoding: [0xff,0xff,0x30,0x06]
+	vqshlu.s32	d16, d16, #31
+@ CHECK: vqshlu.s64	d16, d16, #63   @ encoding: [0xff,0xff,0xb0,0x06]
+	vqshlu.s64	d16, d16, #63
+@ CHECK: vqshl.s8	q8, q8, #7      @ encoding: [0xcf,0xef,0x70,0x07]
+	vqshl.s8	q8, q8, #7
+@ CHECK: vqshl.s16	q8, q8, #15     @ encoding: [0xdf,0xef,0x70,0x07]
+	vqshl.s16	q8, q8, #15
+@ CHECK: vqshl.s32	q8, q8, #31     @ encoding: [0xff,0xef,0x70,0x07]
+	vqshl.s32	q8, q8, #31
+@ CHECK: vqshl.s64	q8, q8, #63     @ encoding: [0xff,0xef,0xf0,0x07]
+	vqshl.s64	q8, q8, #63
+@ CHECK: vqshl.u8	q8, q8, #7      @ encoding: [0xcf,0xff,0x70,0x07]
+	vqshl.u8	q8, q8, #7
+@ CHECK: vqshl.u16	q8, q8, #15     @ encoding: [0xdf,0xff,0x70,0x07]
+	vqshl.u16	q8, q8, #15
+@ CHECK: vqshl.u32	q8, q8, #31     @ encoding: [0xff,0xff,0x70,0x07]
+	vqshl.u32	q8, q8, #31
+@ CHECK: vqshl.u64	q8, q8, #63     @ encoding: [0xff,0xff,0xf0,0x07]
+	vqshl.u64	q8, q8, #63
+@ CHECK: vqshlu.s8	q8, q8, #7      @ encoding: [0xcf,0xff,0x70,0x06]
+	vqshlu.s8	q8, q8, #7
+@ CHECK: vqshlu.s16	q8, q8, #15     @ encoding: [0xdf,0xff,0x70,0x06]
+	vqshlu.s16	q8, q8, #15
+@ CHECK: vqshlu.s32	q8, q8, #31     @ encoding: [0xff,0xff,0x70,0x06]
+	vqshlu.s32	q8, q8, #31
+@ CHECK: vqshlu.s64	q8, q8, #63     @ encoding: [0xff,0xff,0xf0,0x06]
+	vqshlu.s64	q8, q8, #63
+@ CHECK:   vqrshl.s8	d16, d16, d17   @ encoding: [0x41,0xef,0xb0,0x05]
+	vqrshl.s8	d16, d16, d17
+@ CHECK: vqrshl.s16	d16, d16, d17   @ encoding: [0x51,0xef,0xb0,0x05]
+	vqrshl.s16	d16, d16, d17
+@ CHECK: vqrshl.s32	d16, d16, d17   @ encoding: [0x61,0xef,0xb0,0x05]
+	vqrshl.s32	d16, d16, d17
+@ CHECK: vqrshl.s64	d16, d16, d17   @ encoding: [0x71,0xef,0xb0,0x05]
+	vqrshl.s64	d16, d16, d17
+@ CHECK: vqrshl.u8	d16, d16, d17   @ encoding: [0x41,0xff,0xb0,0x05]
+	vqrshl.u8	d16, d16, d17
+@ CHECK: vqrshl.u16	d16, d16, d17   @ encoding: [0x51,0xff,0xb0,0x05]
+	vqrshl.u16	d16, d16, d17
+@ CHECK: vqrshl.u32	d16, d16, d17   @ encoding: [0x61,0xff,0xb0,0x05]
+	vqrshl.u32	d16, d16, d17
+@ CHECK: vqrshl.u64	d16, d16, d17   @ encoding: [0x71,0xff,0xb0,0x05]
+	vqrshl.u64	d16, d16, d17
+@ CHECK: vqrshl.s8	q8, q8, q9      @ encoding: [0x42,0xef,0xf0,0x05]
+	vqrshl.s8	q8, q8, q9
+@ CHECK: vqrshl.s16	q8, q8, q9      @ encoding: [0x52,0xef,0xf0,0x05]
+	vqrshl.s16	q8, q8, q9
+@ CHECK: vqrshl.s32	q8, q8, q9      @ encoding: [0x62,0xef,0xf0,0x05]
+	vqrshl.s32	q8, q8, q9
+@ CHECK: vqrshl.s64	q8, q8, q9      @ encoding: [0x72,0xef,0xf0,0x05]
+	vqrshl.s64	q8, q8, q9
+@ CHECK: vqrshl.u8	q8, q8, q9      @ encoding: [0x42,0xff,0xf0,0x05]
+	vqrshl.u8	q8, q8, q9
+@ CHECK: vqrshl.u16	q8, q8, q9      @ encoding: [0x52,0xff,0xf0,0x05]
+	vqrshl.u16	q8, q8, q9
+@ CHECK: vqrshl.u32	q8, q8, q9      @ encoding: [0x62,0xff,0xf0,0x05]
+	vqrshl.u32	q8, q8, q9
+@ CHECK: vqrshl.u64	q8, q8, q9      @ encoding: [0x72,0xff,0xf0,0x05]
+	vqrshl.u64	q8, q8, q9
+@ CHECK: vqshrn.s16	d16, q8, #8     @ encoding: [0xc8,0xef,0x30,0x09]
+	vqshrn.s16	d16, q8, #8
+@ CHECK: vqshrn.s32	d16, q8, #16    @ encoding: [0xd0,0xef,0x30,0x09]
+	vqshrn.s32	d16, q8, #16
+@ CHECK: vqshrn.s64	d16, q8, #32    @ encoding: [0xe0,0xef,0x30,0x09]
+	vqshrn.s64	d16, q8, #32
+@ CHECK: vqshrn.u16	d16, q8, #8     @ encoding: [0xc8,0xff,0x30,0x09]
+	vqshrn.u16	d16, q8, #8
+@ CHECK: vqshrn.u32	d16, q8, #16    @ encoding: [0xd0,0xff,0x30,0x09]
+	vqshrn.u32	d16, q8, #16
+@ CHECK: vqshrn.u64	d16, q8, #32    @ encoding: [0xe0,0xff,0x30,0x09]
+	vqshrn.u64	d16, q8, #32
+@ CHECK: vqshrun.s16	d16, q8, #8     @ encoding: [0xc8,0xff,0x30,0x08]
+	vqshrun.s16	d16, q8, #8
+@ CHECK: vqshrun.s32	d16, q8, #16    @ encoding: [0xd0,0xff,0x30,0x08]
+	vqshrun.s32	d16, q8, #16
+@ CHECK: vqshrun.s64	d16, q8, #32    @ encoding: [0xe0,0xff,0x30,0x08]
+	vqshrun.s64	d16, q8, #32
+@ CHECK: vqrshrn.s16	d16, q8, #8     @ encoding: [0xc8,0xef,0x70,0x09]
+	vqrshrn.s16	d16, q8, #8
+@ CHECK: vqrshrn.s32	d16, q8, #16    @ encoding: [0xd0,0xef,0x70,0x09]
+	vqrshrn.s32	d16, q8, #16
+@ CHECK: vqrshrn.s64	d16, q8, #32    @ encoding: [0xe0,0xef,0x70,0x09]
+	vqrshrn.s64	d16, q8, #32
+@ CHECK: vqrshrn.u16	d16, q8, #8     @ encoding: [0xc8,0xff,0x70,0x09]
+	vqrshrn.u16	d16, q8, #8
+@ CHECK: vqrshrn.u32	d16, q8, #16    @ encoding: [0xd0,0xff,0x70,0x09]
+	vqrshrn.u32	d16, q8, #16
+@ CHECK: vqrshrn.u64	d16, q8, #32    @ encoding: [0xe0,0xff,0x70,0x09]
+	vqrshrn.u64	d16, q8, #32
+@ CHECK: vqrshrun.s16	d16, q8, #8     @ encoding: [0xc8,0xff,0x70,0x08]
+	vqrshrun.s16	d16, q8, #8
+@ CHECK: vqrshrun.s32	d16, q8, #16    @ encoding: [0xd0,0xff,0x70,0x08]
+	vqrshrun.s32	d16, q8, #16
+@ CHECK: vqrshrun.s64	d16, q8, #32    @ encoding: [0xe0,0xff,0x70,0x08]
+	vqrshrun.s64	d16, q8, #32
diff --git a/test/MC/ARM/neont2-shift-encoding.s b/test/MC/ARM/neont2-shift-encoding.s
new file mode 100644
index 000000000000..d098f543c9cd
--- /dev/null
+++ b/test/MC/ARM/neont2-shift-encoding.s
@@ -0,0 +1,162 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vshl.u8	d16, d17, d16           @ encoding: [0x40,0xff,0xa1,0x04]
+	vshl.u8	d16, d17, d16
+@ CHECK: vshl.u16	d16, d17, d16   @ encoding: [0x50,0xff,0xa1,0x04]
+	vshl.u16	d16, d17, d16
+@ CHECK: vshl.u32	d16, d17, d16   @ encoding: [0x60,0xff,0xa1,0x04]
+	vshl.u32	d16, d17, d16
+@ CHECK: vshl.u64	d16, d17, d16   @ encoding: [0x70,0xff,0xa1,0x04]
+	vshl.u64	d16, d17, d16
+@ CHECK: vshl.i8	d16, d16, #7            @ encoding: [0xcf,0xef,0x30,0x05]
+	vshl.i8	d16, d16, #7
+@ CHECK: vshl.i16	d16, d16, #15   @ encoding: [0xdf,0xef,0x30,0x05]
+	vshl.i16	d16, d16, #15
+@ CHECK: vshl.i32	d16, d16, #31   @ encoding: [0xff,0xef,0x30,0x05]
+	vshl.i32	d16, d16, #31
+@ CHECK: vshl.i64	d16, d16, #63   @ encoding: [0xff,0xef,0xb0,0x05]
+	vshl.i64	d16, d16, #63
+@ CHECK: vshl.u8	q8, q9, q8              @ encoding: [0x40,0xff,0xe2,0x04]
+	vshl.u8	q8, q9, q8
+@ CHECK: vshl.u16	q8, q9, q8      @ encoding: [0x50,0xff,0xe2,0x04]
+	vshl.u16	q8, q9, q8
+@ CHECK: vshl.u32	q8, q9, q8      @ encoding: [0x60,0xff,0xe2,0x04]
+	vshl.u32	q8, q9, q8
+@ CHECK: vshl.u64	q8, q9, q8      @ encoding: [0x70,0xff,0xe2,0x04]
+	vshl.u64	q8, q9, q8
+@ CHECK: vshl.i8	q8, q8, #7              @ encoding: [0xcf,0xef,0x70,0x05]
+	vshl.i8	q8, q8, #7
+@ CHECK: vshl.i16	q8, q8, #15     @ encoding: [0xdf,0xef,0x70,0x05]
+	vshl.i16	q8, q8, #15
+@ CHECK: vshl.i32	q8, q8, #31     @ encoding: [0xff,0xef,0x70,0x05]
+	vshl.i32	q8, q8, #31
+@ CHECK: vshl.i64	q8, q8, #63     @ encoding: [0xff,0xef,0xf0,0x05]
+	vshl.i64	q8, q8, #63
+@ CHECK: vshr.u8	d16, d16, #8            @ encoding: [0xc8,0xff,0x30,0x00]
+	vshr.u8	d16, d16, #8
+@ CHECK: vshr.u16	d16, d16, #16   @ encoding: [0xd0,0xff,0x30,0x00]
+	vshr.u16	d16, d16, #16
+@ CHECK: vshr.u32	d16, d16, #32   @ encoding: [0xe0,0xff,0x30,0x00]
+	vshr.u32	d16, d16, #32
+@ CHECK: vshr.u64	d16, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x00]
+	vshr.u64	d16, d16, #64
+@ CHECK: vshr.u8	q8, q8, #8              @ encoding: [0xc8,0xff,0x70,0x00]
+	vshr.u8	q8, q8, #8
+@ CHECK: vshr.u16	q8, q8, #16     @ encoding: [0xd0,0xff,0x70,0x00]
+	vshr.u16	q8, q8, #16
+@ CHECK: vshr.u32	q8, q8, #32     @ encoding: [0xe0,0xff,0x70,0x00]
+	vshr.u32	q8, q8, #32
+@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xc0,0xff,0xf0,0x00]
+	vshr.u64	q8, q8, #64
+@ CHECK: vshr.s8	d16, d16, #8            @ encoding: [0xc8,0xef,0x30,0x00]
+	vshr.s8	d16, d16, #8
+@ CHECK: vshr.s16	d16, d16, #16   @ encoding: [0xd0,0xef,0x30,0x00]
+	vshr.s16	d16, d16, #16
+@ CHECK: vshr.s32	d16, d16, #32   @ encoding: [0xe0,0xef,0x30,0x00]
+	vshr.s32	d16, d16, #32
+@ CHECK: vshr.s64	d16, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x00]
+	vshr.s64	d16, d16, #64
+@ CHECK: vshr.s8	q8, q8, #8              @ encoding: [0xc8,0xef,0x70,0x00]
+	vshr.s8	q8, q8, #8
+@ CHECK: vshr.s16	q8, q8, #16     @ encoding: [0xd0,0xef,0x70,0x00]
+	vshr.s16	q8, q8, #16
+@ CHECK: vshr.s32	q8, q8, #32     @ encoding: [0xe0,0xef,0x70,0x00]
+	vshr.s32	q8, q8, #32
+@ CHECK: vshr.s64	q8, q8, #64     @ encoding: [0xc0,0xef,0xf0,0x00]
+	vshr.s64	q8, q8, #64
+@ CHECK: vshll.s8	q8, d16, #7     @ encoding: [0xcf,0xef,0x30,0x0a]
+	vshll.s8	q8, d16, #7
+@ CHECK: vshll.s16	q8, d16, #15    @ encoding: [0xdf,0xef,0x30,0x0a]
+	vshll.s16	q8, d16, #15
+@ CHECK: vshll.s32	q8, d16, #31    @ encoding: [0xff,0xef,0x30,0x0a]
+	vshll.s32	q8, d16, #31
+@ CHECK: vshll.u8	q8, d16, #7     @ encoding: [0xcf,0xff,0x30,0x0a]
+	vshll.u8	q8, d16, #7
+@ CHECK: vshll.u16	q8, d16, #15    @ encoding: [0xdf,0xff,0x30,0x0a]
+	vshll.u16	q8, d16, #15
+@ CHECK: vshll.u32	q8, d16, #31    @ encoding: [0xff,0xff,0x30,0x0a]
+	vshll.u32	q8, d16, #31
+@ CHECK: vshll.i8	q8, d16, #8     @ encoding: [0xf2,0xff,0x20,0x03]
+	vshll.i8	q8, d16, #8
+@ CHECK: vshll.i16	q8, d16, #16    @ encoding: [0xf6,0xff,0x20,0x03]
+	vshll.i16	q8, d16, #16
+@ CHECK: vshll.i32	q8, d16, #32    @ encoding: [0xfa,0xff,0x20,0x03]
+	vshll.i32	q8, d16, #32
+@ CHECK: vshrn.i16	d16, q8, #8     @ encoding: [0xc8,0xef,0x30,0x08]
+	vshrn.i16	d16, q8, #8
+@ CHECK: vshrn.i32	d16, q8, #16    @ encoding: [0xd0,0xef,0x30,0x08]
+	vshrn.i32	d16, q8, #16
+@ CHECK: vshrn.i64	d16, q8, #32    @ encoding: [0xe0,0xef,0x30,0x08]
+	vshrn.i64	d16, q8, #32
+@ CHECK: vrshl.s8	d16, d17, d16   @ encoding: [0x40,0xef,0xa1,0x05]
+	vrshl.s8	d16, d17, d16
+@ CHECK: vrshl.s16	d16, d17, d16   @ encoding: [0x50,0xef,0xa1,0x05]
+	vrshl.s16	d16, d17, d16
+@ CHECK: vrshl.s32	d16, d17, d16   @ encoding: [0x60,0xef,0xa1,0x05]
+	vrshl.s32	d16, d17, d16
+@ CHECK: vrshl.s64	d16, d17, d16   @ encoding: [0x70,0xef,0xa1,0x05]
+	vrshl.s64	d16, d17, d16
+@ CHECK: vrshl.u8	d16, d17, d16   @ encoding: [0x40,0xff,0xa1,0x05]
+	vrshl.u8	d16, d17, d16
+@ CHECK: vrshl.u16	d16, d17, d16   @ encoding: [0x50,0xff,0xa1,0x05]
+	vrshl.u16	d16, d17, d16
+@ CHECK: vrshl.u32	d16, d17, d16   @ encoding: [0x60,0xff,0xa1,0x05]
+	vrshl.u32	d16, d17, d16
+@ CHECK: vrshl.u64	d16, d17, d16   @ encoding: [0x70,0xff,0xa1,0x05]
+	vrshl.u64	d16, d17, d16
+@ CHECK: vrshl.s8	q8, q9, q8      @ encoding: [0x40,0xef,0xe2,0x05]
+	vrshl.s8	q8, q9, q8
+@ CHECK: vrshl.s16	q8, q9, q8      @ encoding: [0x50,0xef,0xe2,0x05]
+	vrshl.s16	q8, q9, q8
+@ CHECK: vrshl.s32	q8, q9, q8      @ encoding: [0x60,0xef,0xe2,0x05]
+	vrshl.s32	q8, q9, q8
+@ CHECK: vrshl.s64	q8, q9, q8      @ encoding: [0x70,0xef,0xe2,0x05]
+	vrshl.s64	q8, q9, q8
+@ CHECK: vrshl.u8	q8, q9, q8      @ encoding: [0x40,0xff,0xe2,0x05]
+	vrshl.u8	q8, q9, q8
+@ CHECK: vrshl.u16	q8, q9, q8      @ encoding: [0x50,0xff,0xe2,0x05]
+	vrshl.u16	q8, q9, q8
+@ CHECK: vrshl.u32	q8, q9, q8      @ encoding: [0x60,0xff,0xe2,0x05]
+	vrshl.u32	q8, q9, q8
+@ CHECK: vrshl.u64	q8, q9, q8      @ encoding: [0x70,0xff,0xe2,0x05]
+	vrshl.u64	q8, q9, q8
+@ CHECK: vrshr.s8	d16, d16, #8    @ encoding: [0xc8,0xef,0x30,0x02]
+	vrshr.s8	d16, d16, #8
+@ CHECK: vrshr.s16	d16, d16, #16   @ encoding: [0xd0,0xef,0x30,0x02]
+	vrshr.s16	d16, d16, #16
+@ CHECK: vrshr.s32	d16, d16, #32   @ encoding: [0xe0,0xef,0x30,0x02]
+	vrshr.s32	d16, d16, #32
+@ CHECK: vrshr.s64	d16, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x02]
+	vrshr.s64	d16, d16, #64
+@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0xc8,0xff,0x30,0x02]
+	vrshr.u8	d16, d16, #8
+@ CHECK: vrshr.u16	d16, d16, #16   @ encoding: [0xd0,0xff,0x30,0x02]
+	vrshr.u16	d16, d16, #16
+@ CHECK: vrshr.u32	d16, d16, #32   @ encoding: [0xe0,0xff,0x30,0x02]
+	vrshr.u32	d16, d16, #32
+@ CHECK: vrshr.u64	d16, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x02]
+	vrshr.u64	d16, d16, #64
+@ CHECK: vrshr.s8	q8, q8, #8      @ encoding: [0xc8,0xef,0x70,0x02]
+	vrshr.s8	q8, q8, #8
+@ CHECK: vrshr.s16	q8, q8, #16     @ encoding: [0xd0,0xef,0x70,0x02]
+	vrshr.s16	q8, q8, #16
+@ CHECK: vrshr.s32	q8, q8, #32     @ encoding: [0xe0,0xef,0x70,0x02]
+	vrshr.s32	q8, q8, #32
+@ CHECK: vrshr.s64	q8, q8, #64     @ encoding: [0xc0,0xef,0xf0,0x02]
+	vrshr.s64	q8, q8, #64
+@ CHECK: vrshr.u8	q8, q8, #8      @ encoding: [0xc8,0xff,0x70,0x02]
+	vrshr.u8	q8, q8, #8
+@ CHECK: vrshr.u16	q8, q8, #16     @ encoding: [0xd0,0xff,0x70,0x02]
+	vrshr.u16	q8, q8, #16
+@ CHECK: vrshr.u32	q8, q8, #32     @ encoding: [0xe0,0xff,0x70,0x02]
+	vrshr.u32	q8, q8, #32
+@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xc0,0xff,0xf0,0x02]
+	vrshr.u64	q8, q8, #64
+@ CHECK: vrshrn.i16	d16, q8, #8     @ encoding: [0xc8,0xef,0x70,0x08]
+	vrshrn.i16	d16, q8, #8
+@ CHECK: vrshrn.i32	d16, q8, #16    @ encoding: [0xd0,0xef,0x70,0x08]
+	vrshrn.i32	d16, q8, #16
+@ CHECK: vrshrn.i64	d16, q8, #32    @ encoding: [0xe0,0xef,0x70,0x08]
+	vrshrn.i64	d16, q8, #32
diff --git a/test/MC/ARM/neont2-shiftaccum-encoding.s b/test/MC/ARM/neont2-shiftaccum-encoding.s
new file mode 100644
index 000000000000..a3a18fcee87e
--- /dev/null
+++ b/test/MC/ARM/neont2-shiftaccum-encoding.s
@@ -0,0 +1,100 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0xc8,0xef,0x30,0x11]
+	vsra.s8	d17, d16, #8
+@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0xd0,0xef,0x30,0x11]
+	vsra.s16	d17, d16, #16
+@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0xe0,0xef,0x30,0x11]
+	vsra.s32	d17, d16, #32
+@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x11]
+	vsra.s64	d17, d16, #64
+@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0xc8,0xef,0x72,0x01]
+	vsra.s8	q8, q9, #8
+@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0xd0,0xef,0x72,0x01]
+	vsra.s16	q8, q9, #16
+@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0xe0,0xef,0x72,0x01]
+	vsra.s32	q8, q9, #32
+@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xc0,0xef,0xf2,0x01]
+	vsra.s64	q8, q9, #64
+@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0xc8,0xff,0x30,0x11]
+	vsra.u8	d17, d16, #8
+@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0xd0,0xff,0x30,0x11]
+	vsra.u16	d17, d16, #16
+@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0xe0,0xff,0x30,0x11]
+	vsra.u32	d17, d16, #32
+@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x11]
+	vsra.u64	d17, d16, #64
+@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0xc8,0xff,0x72,0x01]
+	vsra.u8	q8, q9, #8
+@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0xd0,0xff,0x72,0x01]
+	vsra.u16	q8, q9, #16
+@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0xe0,0xff,0x72,0x01]
+	vsra.u32	q8, q9, #32
+@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x01]
+	vsra.u64	q8, q9, #64
+@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0xc8,0xef,0x30,0x13]
+	vrsra.s8	d17, d16, #8
+@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0xd0,0xef,0x30,0x13]
+	vrsra.s16	d17, d16, #16
+@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0xe0,0xef,0x30,0x13]
+	vrsra.s32	d17, d16, #32
+@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x13]
+	vrsra.s64	d17, d16, #64
+@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0xc8,0xff,0x30,0x13]
+	vrsra.u8	d17, d16, #8
+@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0xd0,0xff,0x30,0x13]
+	vrsra.u16	d17, d16, #16
+@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0xe0,0xff,0x30,0x13]
+	vrsra.u32	d17, d16, #32
+@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x13]
+	vrsra.u64	d17, d16, #64
+@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0xc8,0xef,0x72,0x03]
+	vrsra.s8	q8, q9, #8
+@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0xd0,0xef,0x72,0x03]
+	vrsra.s16	q8, q9, #16
+@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0xe0,0xef,0x72,0x03]
+	vrsra.s32	q8, q9, #32
+@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xc0,0xef,0xf2,0x03]
+	vrsra.s64	q8, q9, #64
+@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0xc8,0xff,0x72,0x03]
+	vrsra.u8	q8, q9, #8
+@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0xd0,0xff,0x72,0x03]
+	vrsra.u16	q8, q9, #16
+@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0xe0,0xff,0x72,0x03]
+	vrsra.u32	q8, q9, #32
+@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x03]
+	vrsra.u64	q8, q9, #64
+@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0xcf,0xff,0x30,0x15]
+	vsli.8	d17, d16, #7
+@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0xdf,0xff,0x30,0x15]
+	vsli.16	d17, d16, #15
+@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0xff,0xff,0x30,0x15]
+	vsli.32	d17, d16, #31
+@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xff,0xff,0xb0,0x15]
+	vsli.64	d17, d16, #63
+@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0xcf,0xff,0x70,0x25]
+	vsli.8	q9, q8, #7
+@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0xdf,0xff,0x70,0x25]
+	vsli.16	q9, q8, #15
+@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0xff,0xff,0x70,0x25]
+	vsli.32	q9, q8, #31
+@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xff,0xff,0xf0,0x25]
+	vsli.64	q9, q8, #63
+@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0xc8,0xff,0x30,0x14]
+	vsri.8	d17, d16, #8
+@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0xd0,0xff,0x30,0x14]
+	vsri.16	d17, d16, #16
+@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0xe0,0xff,0x30,0x14]
+	vsri.32	d17, d16, #32
+@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xc0,0xff,0xb0,0x14]
+	vsri.64	d17, d16, #64
+@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0xc8,0xff,0x70,0x24]
+	vsri.8	q9, q8, #8
+@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0xd0,0xff,0x70,0x24]
+	vsri.16	q9, q8, #16
+@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0xe0,0xff,0x70,0x24]
+	vsri.32	q9, q8, #32
+@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xc0,0xff,0xf0,0x24]
+	vsri.64	q9, q8, #64
diff --git a/test/MC/ARM/neont2-shuffle-encoding.s b/test/MC/ARM/neont2-shuffle-encoding.s
new file mode 100644
index 000000000000..f471a2b54178
--- /dev/null
+++ b/test/MC/ARM/neont2-shuffle-encoding.s
@@ -0,0 +1,48 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x03]
+	vext.8	d16, d17, d16, #3
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xf1,0xef,0xa0,0x05]
+	vext.8	d16, d17, d16, #5
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x03]
+	vext.8	q8, q9, q8, #3
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xf2,0xef,0xe0,0x07]
+	vext.8	q8, q9, q8, #7
+@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x06]
+	vext.16	d16, d17, d16, #3
+@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x0c]
+	vext.32	q8, q9, q8, #3
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x10]
+	vtrn.8	d17, d16
+@ CHECK: vtrn.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x10]
+	vtrn.16	d17, d16
+@ CHECK: vtrn.32	d17, d16                @ encoding: [0xfa,0xff,0xa0,0x10]
+	vtrn.32	d17, d16
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x20]
+	vtrn.8	q9, q8
+@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x20]
+	vtrn.16	q9, q8
+@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x20]
+	vtrn.32	q9, q8
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0xf2,0xff,0x20,0x11]
+	vuzp.8	d17, d16
+@ CHECK: vuzp.16	d17, d16                @ encoding: [0xf6,0xff,0x20,0x11]
+	vuzp.16	d17, d16
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0xf2,0xff,0x60,0x21]
+	vuzp.8	q9, q8
+@ CHECK: vuzp.16	q9, q8                  @ encoding: [0xf6,0xff,0x60,0x21]
+	vuzp.16	q9, q8
+@ CHECK: vuzp.32	q9, q8                  @ encoding: [0xfa,0xff,0x60,0x21]
+	vuzp.32	q9, q8
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x11]
+	vzip.8	d17, d16
+@ CHECK: vzip.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x11]
+	vzip.16	d17, d16
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x21]
+	vzip.8	q9, q8
+@ CHECK: vzip.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x21]
+	vzip.16	q9, q8
+@ CHECK: vzip.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x21]
+	vzip.32	q9, q8
diff --git a/test/MC/ARM/neont2-sub-encoding.s b/test/MC/ARM/neont2-sub-encoding.s
new file mode 100644
index 000000000000..fa9d145df92c
--- /dev/null
+++ b/test/MC/ARM/neont2-sub-encoding.s
@@ -0,0 +1,46 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x03]
+	vext.8	d16, d17, d16, #3
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xf1,0xef,0xa0,0x05]
+	vext.8	d16, d17, d16, #5
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x03]
+	vext.8	q8, q9, q8, #3
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xf2,0xef,0xe0,0x07]
+	vext.8	q8, q9, q8, #7
+@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x06]
+	vext.16	d16, d17, d16, #3
+@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x0c]
+	vext.32	q8, q9, q8, #3
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x10]
+	vtrn.8	d17, d16
+@ CHECK: vtrn.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x10]
+	vtrn.16	d17, d16
+@ CHECK: vtrn.32	d17, d16                @ encoding: [0xfa,0xff,0xa0,0x10]
+	vtrn.32	d17, d16
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x20]
+	vtrn.8	q9, q8
+@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x20]
+	vtrn.16	q9, q8
+@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x20]
+	vtrn.32	q9, q8
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0xf2,0xff,0x20,0x11]
+	vuzp.8	d17, d16
+@ CHECK: vuzp.16	d17, d16                @ encoding: [0xf6,0xff,0x20,0x11]
+	vuzp.16	d17, d16
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0xf2,0xff,0x60,0x21]
+	vuzp.8	q9, q8
+@ CHECK: vuzp.16	q9, q8                  @ encoding: [0xf6,0xff,0x60,0x21]
+	vuzp.16	q9, q8
+@ CHECK: vuzp.32	q9, q8                  @ encoding: [0xfa,0xff,0x60,0x21]
+	vuzp.32	q9, q8
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x11]
+	vzip.8	d17, d16
+@ CHECK: vzip.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x11]
+	vzip.16	d17, d16
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x21]
+	vzip.8	q9, q8
+@ CHECK: vzip.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x21]
+	vzip.16	q9, q8
+@ CHECK: vzip.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x21]
+	vzip.32	q9, q8
diff --git a/test/MC/ARM/neont2-table-encoding.s b/test/MC/ARM/neont2-table-encoding.s
new file mode 100644
index 000000000000..46fb9345fbb6
--- /dev/null
+++ b/test/MC/ARM/neont2-table-encoding.s
@@ -0,0 +1,21 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xff]
+	vtbl.8	d16, {d17}, d16
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xff]
+	vtbl.8	d16, {d16, d17}, d18
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xff]
+	vtbl.8	d16, {d16, d17, d18}, d20
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xff]
+	vtbl.8	d16, {d16, d17, d18, d19}, d20
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xff]
+	vtbx.8	d18, {d16}, d17
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xff]
+	vtbx.8	d19, {d16, d17}, d18
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xff]
+	vtbx.8	d20, {d16, d17, d18}, d21
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xff]
+	vtbx.8	d20, {d16, d17, d18, d19}, d21
diff --git a/test/MC/ARM/neont2-vld-encoding.s b/test/MC/ARM/neont2-vld-encoding.s
new file mode 100644
index 000000000000..031205a5cc8a
--- /dev/null
+++ b/test/MC/ARM/neont2-vld-encoding.s
@@ -0,0 +1,112 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf9]
+	vld1.8	{d16}, [r0, :64]
+@ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf9]
+  vld1.16	{d16}, [r0]
+@ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf9]
+  vld1.32	{d16}, [r0]
+@ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf9]
+  vld1.64	{d16}, [r0]
+@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
+  vld1.8	{d16, d17}, [r0, :64]
+@ CHECK: vld1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
+  vld1.16	{d16, d17}, [r0, :128]
+@ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf9]
+  vld1.32	{d16, d17}, [r0]
+@ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf9]
+  vld1.64	{d16, d17}, [r0]
+
+@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf9]
+  vld2.8	{d16, d17}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x60,0xf9]
+  vld2.16	{d16, d17}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf9]
+  vld2.32	{d16, d17}, [r0]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf9]
+  vld2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf9]
+  vld2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf9]
+  vld2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf9]
+  vld3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf9]
+  vld3.16	{d16, d17, d18}, [r0]
+@ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf9]
+  vld3.32	{d16, d17, d18}, [r0]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf9]
+  vld3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf9]
+  vld3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf9] 
+  vld3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf9]
+  vld3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vld3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x60,0xf9]
+  vld3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf9]
+  vld3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf9]
+  vld4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf9]
+  vld4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf9]
+  vld4.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf9]
+  vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf9]
+  vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf9]
+  vld4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf9]
+  vld4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vld4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf9]
+  vld4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf9]
+  vld4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf9]
+  vld1.8	{d16[3]}, [r0]
+@ CHECK: vld1.16	{d16[2]}, [r0, :16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
+  vld1.16	{d16[2]}, [r0, :16]
+@ CHECK: vld1.32	{d16[1]}, [r0, :32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
+  vld1.32	{d16[1]}, [r0, :32]
+
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf9]
+  vld2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf9]
+  vld2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf9]
+  vld2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf9]
+  vld2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf9]
+  vld2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf9]
+  vld3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xe0,0xf9]
+  vld3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xe0,0xf9]
+  vld3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d18[1], d20[1]}, [r0] @ encoding: [0x6f,0x06,0xe0,0xf9]
+  vld3.16	{d16[1], d18[1], d20[1]}, [r0]
+@ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf9]
+  vld3.32	{d17[1], d19[1], d21[1]}, [r0]
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf9]
+  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf9]
+  vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
+  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf9]
+  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+@ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf9]
+  vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
new file mode 100644
index 000000000000..1722f12a00f6
--- /dev/null
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -0,0 +1,103 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf9]
+  vst1.8	{d16}, [r0, :64]
+@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf9]
+  vst1.16	{d16}, [r0]
+@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf9]
+  vst1.32	{d16}, [r0]
+@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf9]
+  vst1.64	{d16}, [r0]
+@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
+  vst1.8	{d16, d17}, [r0, :64]
+@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
+  vst1.16	{d16, d17}, [r0, :128]
+@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf9]
+  vst1.32	{d16, d17}, [r0]
+@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf9]
+  vst1.64	{d16, d17}, [r0]
+
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf9]
+  vst2.8	{d16, d17}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf9]
+  vst2.16	{d16, d17}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf9]
+  vst2.32	{d16, d17}, [r0]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf9]
+  vst2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf9]
+  vst2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf9]
+  vst2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf9]
+  vst3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf9]
+  vst3.16	{d16, d17, d18}, [r0]
+@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf9]
+  vst3.32	{d16, d17, d18}, [r0]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf9]
+  vst3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf9]
+  vst3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf9]
+  vst3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf9]
+  vst3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf9]
+  vst3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf9]
+  vst3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf9]
+  vst4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf9]
+  vst4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf9]
+  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf9]
+  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf9]
+  vst4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf9]
+  vst4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf9]
+  vst4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf9]
+  vst4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf9]
+  vst2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf9]
+  vst2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf9]
+  vst2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf9]
+  vst2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf9]
+  vst2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf9]
+  vst3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf9]
+  vst3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf9]
+  vst3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf9]
+  vst3.16	{d17[2], d19[2], d21[2]}, [r0]
+@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf9]
+  vst3.32	{d16[0], d18[0], d20[0]}, [r0]
+
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf9]
+  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf9]
+  vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
+  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf9]
+  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
+  vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/prefetch.ll b/test/MC/ARM/prefetch.ll
new file mode 100644
index 000000000000..674b8f323f04
--- /dev/null
+++ b/test/MC/ARM/prefetch.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7a,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7a     -show-mc-encoding | FileCheck %s -check-prefix=T2
+; rdar://8924681
+
+define void @t1(i8* %ptr) nounwind  {
+entry:
+; ARM: t1:
+; ARM: pldw [r0]                        @ encoding: [0x00,0xf0,0x90,0xf5]
+; ARM: pld [r0]                         @ encoding: [0x00,0xf0,0xd0,0xf5]
+
+; T2: t1:
+; T2: pld [r0]                      @ encoding: [0x90,0xf8,0x00,0xf0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+  ret void
+}
+
+define void @t2(i8* %ptr) nounwind  {
+entry:
+; ARM: t2:
+; ARM: pld [r0, #1023]                  @ encoding: [0xff,0xf3,0xd0,0xf5]
+
+; T2: t2:
+; T2: pld [r0, #1023]               @ encoding: [0x90,0xf8,0xff,0xf3]
+  %tmp = getelementptr i8* %ptr, i32 1023
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+  ret void
+}
+
+define void @t3(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM: t3:
+; ARM: pld [r0, r1, lsr #2]             @ encoding: [0x21,0xf1,0xd0,0xf7]
+
+; T2: t3:
+; T2: pld [r0, r1]                  @ encoding: [0x10,0xf8,0x01,0xf0]
+  %tmp1 = lshr i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+define void @t4(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM: t4:
+; ARM: pld [r0, r1, lsl #2]             @ encoding: [0x01,0xf1,0xd0,0xf7]
+
+; T2: t4:
+; T2: pld [r0, r1, lsl #2]          @ encoding: [0x10,0xf8,0x21,0xf0]
+  %tmp1 = shl i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/test/MC/ARM/reg-list.s b/test/MC/ARM/reg-list.s
new file mode 100644
index 000000000000..4dd392e83791
--- /dev/null
+++ b/test/MC/ARM/reg-list.s
@@ -0,0 +1,8 @@
+@ RUN: llvm-mc -triple thumb-apple-darwin10 -show-encoding < %s 2> %t | FileCheck %s
+@ RUN: FileCheck --check-prefix=CHECK-WARNINGS < %t %s
+        
+        push    {r7, lr}
+@ CHECK-WARNINGS: register not in ascending order in register list
+
+        push	{lr, r7}
+@ CHECK: push {lr, r7}
diff --git a/test/MC/ARM/simple-encoding.ll b/test/MC/ARM/simple-encoding.ll
new file mode 100644
index 000000000000..0877e8e30c6f
--- /dev/null
+++ b/test/MC/ARM/simple-encoding.ll
@@ -0,0 +1,237 @@
+;RUN: llc -mtriple=armv7-apple-darwin -show-mc-encoding < %s | FileCheck %s
+
+
+;FIXME: Once the ARM integrated assembler is up and going, these sorts of tests
+;       should run on .s source files rather than using llc to generate the
+;       assembly. There's also a large number of instruction encodings the
+;       compiler never generates, so we need the integrated assembler to be
+;       able to test those at all.
+
+declare void @llvm.trap() nounwind
+declare i32 @llvm.ctlz.i32(i32)
+
+define i32 @foo(i32 %a, i32 %b) {
+; CHECK: foo
+; CHECK: trap                         @ encoding: [0xfe,0xde,0xff,0xe7]
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+
+  tail call void @llvm.trap()
+  ret i32 undef
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2
+; CHECK: add  r0, r1, r0              @ encoding: [0x00,0x00,0x81,0xe0]
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+  %add = add nsw i32 %b, %a
+  ret i32 %add
+}
+
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3
+; CHECK: add  r0, r0, r1, lsl #3      @ encoding: [0x81,0x01,0x80,0xe0]
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+  %mul = shl i32 %b, 3
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4
+; CHECK: add r0, r0, #254, 28         @ encoding: [0xfe,0x0e,0x80,0xe2]
+; CHECK:                              @ 4064
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+  %add = add nsw i32 %a, 4064
+  ret i32 %add
+}
+
+define i32 @f5(i32 %a, i32 %b, i32 %c) {
+; CHECK: f5
+; CHECK: cmp r0, r1                   @ encoding: [0x01,0x00,0x50,0xe1]
+; CHECK: mov r0, r2                   @ encoding: [0x02,0x00,0xa0,0xe1]
+; CHECK: movgt r0, r1                 @ encoding: [0x01,0x00,0xa0,0xc1]
+  %cmp = icmp sgt i32 %a, %b
+  %retval.0 = select i1 %cmp, i32 %b, i32 %c
+  ret i32 %retval.0
+}
+
+define i64 @f6(i64 %a, i64 %b, i64 %c) {
+; CHECK: f6
+; CHECK: adds r0, r2, r0              @ encoding: [0x00,0x00,0x92,0xe0]
+; CHECK: adc r1, r3, r1               @ encoding: [0x01,0x10,0xa3,0xe0]
+  %add = add nsw i64 %b, %a
+  ret i64 %add
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7
+; CHECK: uxtab  r0, r0, r1            @ encoding: [0x71,0x00,0xe0,0xe6]
+  %and = and i32 %b, 255
+  %add = add i32 %and, %a
+  ret i32 %add
+}
+
+define i32 @f8(i32 %a) {
+; CHECK: f8
+; CHECK: movt r0, #42405              @ encoding: [0xa5,0x05,0x4a,0xe3]
+  %and = and i32 %a, 65535
+  %or = or i32 %and, -1515913216
+  ret i32 %or
+}
+
+define i32 @f9() {
+; CHECK: f9
+; CHECK: movw r0, #42405              @ encoding: [0xa5,0x05,0x0a,0xe3]
+  ret i32 42405
+}
+
+define i64 @f10(i64 %a) {
+; CHECK: f10
+; CHECK: asrs  r1, r1, #1             @ encoding: [0xc1,0x10,0xb0,0xe1]
+; CHECK: rrx r0, r0                   @ encoding: [0x60,0x00,0xa0,0xe1]
+  %shr = ashr i64 %a, 1
+  ret i64 %shr
+}
+
+define i32 @f11([1 x i32] %A.coerce0, [1 x i32] %B.coerce0) {
+; CHECK: f11
+; CHECK: ubfx  r1, r1, #8, #5         @ encoding: [0x51,0x14,0xe4,0xe7]
+; CHECK: sbfx  r0, r0, #13, #7        @ encoding: [0xd0,0x06,0xa6,0xe7]
+  %tmp1 = extractvalue [1 x i32] %A.coerce0, 0
+  %tmp2 = extractvalue [1 x i32] %B.coerce0, 0
+  %tmp3 = shl i32 %tmp1, 12
+  %bf.val.sext = ashr i32 %tmp3, 25
+  %tmp4 = lshr i32 %tmp2, 8
+  %bf.clear2 = and i32 %tmp4, 31
+  %mul = mul nsw i32 %bf.val.sext, %bf.clear2
+  ret i32 %mul
+}
+
+define i32 @f12(i32 %a) {
+; CHECK: f12:
+; CHECK: bfc  r0, #4, #20             @ encoding: [0x1f,0x02,0xd7,0xe7]
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+define i64 @f13() {
+; CHECK: f13:
+; CHECK: mvn r0, #0                   @ encoding: [0x00,0x00,0xe0,0xe3]
+; CHECK: mvn r1, #2, 2                @ encoding: [0x02,0x11,0xe0,0xe3]
+        ret i64 9223372036854775807
+}
+
+define i32 @f14(i32 %x, i32 %y) {
+; CHECK: f14:
+; CHECK: smmul  r0, r1, r0            @ encoding: [0x11,0xf0,0x50,0xe7]
+        %tmp = sext i32 %x to i64
+        %tmp1 = sext i32 %y to i64
+        %tmp2 = mul i64 %tmp1, %tmp
+        %tmp3 = lshr i64 %tmp2, 32
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @f15(i32 %x, i32 %y) {
+; CHECK: f15:
+; CHECK: umull  r1, r0, r1, r0        @ encoding: [0x91,0x10,0x80,0xe0]
+        %tmp = zext i32 %x to i64
+        %tmp1 = zext i32 %y to i64
+        %tmp2 = mul i64 %tmp1, %tmp
+        %tmp3 = lshr i64 %tmp2, 32
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32
+        ret i32 %tmp3.upgrd.2
+}
+
+define i32 @f16(i16 %x, i32 %y) {
+; CHECK: f16:
+; CHECK: smulbt r0, r0, r1            @ encoding: [0xc0,0x01,0x60,0xe1]
+        %tmp1 = add i16 %x, 2
+        %tmp2 = sext i16 %tmp1 to i32
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp2, %tmp3
+        ret i32 %tmp4
+}
+
+define i32 @f17(i32 %x, i32 %y) {
+; CHECK: f17:
+; CHECK: smultt r0, r1, r0            @ encoding: [0xe1,0x00,0x60,0xe1]
+        %tmp1 = ashr i32 %x, 16
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp3, %tmp1
+        ret i32 %tmp4
+}
+
+define i32 @f18(i32 %a, i16 %x, i32 %y) {
+; CHECK: f18:
+; CHECK: smlabt r0, r1, r2, r0        @ encoding: [0xc1,0x02,0x00,0xe1]
+        %tmp = sext i16 %x to i32
+        %tmp2 = ashr i32 %y, 16
+        %tmp3 = mul i32 %tmp2, %tmp
+        %tmp5 = add i32 %tmp3, %a
+        ret i32 %tmp5
+}
+
+define i32 @f19(i32 %x) {
+; CHECK: f19
+; CHECK: clz r0, r0                   @ encoding: [0x10,0x0f,0x6f,0xe1]
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        ret i32 %tmp.1
+}
+
+define i32 @f20(i32 %X) {
+; CHECK: f20
+; CHECK: rev16 r0, r0                 @ encoding: [0xb0,0x0f,0xbf,0xe6]
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
+        ret i32 %tmp14
+}
+
+define i32 @f21(i32 %X) {
+; CHECK: f21
+; CHECK: revsh r0, r0                 @ encoding: [0xb0,0x0f,0xff,0xe6]
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
+
+define i32 @f22(i32 %X, i32 %Y) {
+; CHECK: f22
+; CHECK: pkhtb   r0, r0, r1, asr #22  @ encoding: [0x51,0x0b,0x80,0xe6]
+	%tmp1 = and i32 %X, -65536
+	%tmp2 = lshr i32 %Y, 22
+	%tmp3 = or i32 %tmp2, %tmp1
+	ret i32 %tmp3
+}
+
+define i32 @f23(i32 %X, i32 %Y) {
+; CHECK: f23
+; CHECK: pkhbt   r0, r0, r1, lsl #18  @ encoding: [0x11,0x09,0x80,0xe6]
+	%tmp1 = and i32 %X, 65535
+	%tmp2 = shl i32 %Y, 18
+	%tmp3 = or i32 %tmp1, %tmp2
+	ret i32 %tmp3
+}
+
+define void @f24(i32 %a) {
+; CHECK: f24
+; CHECK: cmp r0, #1, 16               @ encoding: [0x01,0x08,0x50,0xe3]
+        %b = icmp ugt i32 %a, 65536
+        br i1 %b, label %r, label %r
+r:
+        ret void
+}
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
new file mode 100644
index 000000000000..891738085a2d
--- /dev/null
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -0,0 +1,236 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+
+@ CHECK: vadd.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x71,0xee]
+        vadd.f64        d16, d17, d16
+        
+@ CHECK: vadd.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x30,0xee]
+        vadd.f32        s0, s1, s0
+
+@ CHECK: vsub.f64 d16, d17, d16      @ encoding: [0xe0,0x0b,0x71,0xee]
+        vsub.f64        d16, d17, d16
+
+@ CHECK: vsub.f32 s0, s1, s0         @ encoding: [0xc0,0x0a,0x30,0xee]
+        vsub.f32        s0, s1, s0
+
+@ CHECK: vdiv.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0xc1,0xee]
+        vdiv.f64        d16, d17, d16
+
+@ CHECK: vdiv.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x80,0xee]
+        vdiv.f32        s0, s1, s0
+
+@ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
+        vmul.f64        d16, d17, d16
+
+@ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
+        vmul.f32        s0, s1, s0
+
+@ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
+        vnmul.f64       d16, d17, d16
+
+@ CHECK: vnmul.f32 s0, s1, s0        @ encoding: [0xc0,0x0a,0x20,0xee]
+        vnmul.f32       s0, s1, s0
+
+@ CHECK: vcmpe.f64 d17, d16          @ encoding: [0xe0,0x1b,0xf4,0xee]
+        vcmpe.f64       d17, d16
+
+@ CHECK: vcmpe.f32 s1, s0            @ encoding: [0xc0,0x0a,0xf4,0xee]
+        vcmpe.f32       s1, s0
+
+@ FIXME: vcmpe.f64 d16, #0           @ encoding: [0xc0,0x0b,0xf5,0xee]
+@        vcmpe.f64       d16, #0
+
+@ FIXME: vcmpe.f32 s0, #0            @ encoding: [0xc0,0x0a,0xb5,0xee]
+@        vcmpe.f32       s0, #0
+
+@ CHECK: vabs.f64 d16, d16           @ encoding: [0xe0,0x0b,0xf0,0xee]
+        vabs.f64        d16, d16
+
+@ CHECK: vabs.f32 s0, s0             @ encoding: [0xc0,0x0a,0xb0,0xee]
+        vabs.f32        s0, s0
+        
+@ CHECK: vcvt.f32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xb7,0xee]
+        vcvt.f32.f64    s0, d16
+
+@ CHECK: vcvt.f64.f32 d16, s0        @ encoding: [0xc0,0x0a,0xf7,0xee]
+        vcvt.f64.f32    d16, s0
+
+@ CHECK: vneg.f64 d16, d16           @ encoding: [0x60,0x0b,0xf1,0xee]
+        vneg.f64        d16, d16
+
+@ CHECK: vneg.f32 s0, s0             @ encoding: [0x40,0x0a,0xb1,0xee]
+        vneg.f32        s0, s0
+
+@ CHECK: vsqrt.f64 d16, d16          @ encoding: [0xe0,0x0b,0xf1,0xee]
+        vsqrt.f64       d16, d16
+
+@ CHECK: vsqrt.f32 s0, s0            @ encoding: [0xc0,0x0a,0xb1,0xee]
+        vsqrt.f32       s0, s0
+
+@ CHECK: vcvt.f64.s32 d16, s0        @ encoding: [0xc0,0x0b,0xf8,0xee]
+        vcvt.f64.s32    d16, s0
+
+@ CHECK: vcvt.f32.s32 s0, s0         @ encoding: [0xc0,0x0a,0xb8,0xee]
+        vcvt.f32.s32    s0, s0
+
+@ CHECK: vcvt.f64.u32 d16, s0        @ encoding: [0x40,0x0b,0xf8,0xee]
+        vcvt.f64.u32    d16, s0
+
+@ CHECK: vcvt.f32.u32 s0, s0         @ encoding: [0x40,0x0a,0xb8,0xee]
+        vcvt.f32.u32    s0, s0
+
+@ CHECK: vcvt.s32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbd,0xee]
+        vcvt.s32.f64    s0, d16
+
+@ CHECK: vcvt.s32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbd,0xee]
+        vcvt.s32.f32    s0, s0
+
+@ CHECK: vcvt.u32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbc,0xee]
+        vcvt.u32.f64    s0, d16
+
+@ CHECK: vcvt.u32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbc,0xee]
+        vcvt.u32.f32    s0, s0
+
+@ CHECK: vmla.f64 d16, d18, d17      @ encoding: [0xa1,0x0b,0x42,0xee]
+        vmla.f64        d16, d18, d17
+
+@ CHECK: vmla.f32 s1, s2, s0         @ encoding: [0x00,0x0a,0x41,0xee]
+        vmla.f32        s1, s2, s0
+
+@ CHECK: vmls.f64 d16, d18, d17      @ encoding: [0xe1,0x0b,0x42,0xee]
+        vmls.f64        d16, d18, d17
+
+@ CHECK: vmls.f32 s1, s2, s0         @ encoding: [0x40,0x0a,0x41,0xee]
+        vmls.f32        s1, s2, s0
+
+@ CHECK: vnmla.f64 d16, d18, d17     @ encoding: [0xe1,0x0b,0x52,0xee]
+        vnmla.f64       d16, d18, d17
+
+@ CHECK: vnmla.f32 s1, s2, s0        @ encoding: [0x40,0x0a,0x51,0xee]
+        vnmla.f32       s1, s2, s0
+
+@ CHECK: vnmls.f64 d16, d18, d17     @ encoding: [0xa1,0x0b,0x52,0xee]
+        vnmls.f64       d16, d18, d17
+
+@ CHECK: vnmls.f32 s1, s2, s0        @ encoding: [0x00,0x0a,0x51,0xee]
+        vnmls.f32       s1, s2, s0
+
+@ FIXME: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@        vmrs    apsr_nzcv, fpscr
+        
+@ CHECK: vnegne.f64 d16, d16         @ encoding: [0x60,0x0b,0xf1,0x1e]
+        vnegne.f64      d16, d16
+
+@ CHECK: vmovne s0, r0               @ encoding: [0x10,0x0a,0x00,0x1e]
+@ CHECK: vmoveq s0, r1               @ encoding: [0x10,0x1a,0x00,0x0e]
+        vmovne  s0, r0
+        vmoveq  s0, r1
+
+@ CHECK: vmrs r0, fpscr              @ encoding: [0x10,0x0a,0xf1,0xee]
+        vmrs    r0, fpscr
+@ CHECK: vmrs  r0, fpexc             @ encoding: [0x10,0x0a,0xf8,0xee]
+        vmrs  r0, fpexc
+@ CHECK: vmrs  r0, fpsid             @ encoding: [0x10,0x0a,0xf0,0xee]
+        vmrs  r0, fpsid
+
+@ CHECK: vmsr fpscr, r0              @ encoding: [0x10,0x0a,0xe1,0xee]
+        vmsr    fpscr, r0
+@ CHECK: vmsr  fpexc, r0             @ encoding: [0x10,0x0a,0xe8,0xee]
+        vmsr  fpexc, r0
+@ CHECK: vmsr  fpsid, r0             @ encoding: [0x10,0x0a,0xe0,0xee]
+        vmsr  fpsid, r0
+
+@ FIXME: vmov.f64 d16, #3.000000e+00 @ encoding: [0x08,0x0b,0xf0,0xee]
+@        vmov.f64        d16, #3.000000e+00
+
+@ FIXME: vmov.f32 s0, #3.000000e+00  @ encoding: [0x08,0x0a,0xb0,0xee]
+@        vmov.f32        s0, #3.000000e+00
+
+@ CHECK: vmov s0, r0                 @ encoding: [0x10,0x0a,0x00,0xee]
+@ CHECK: vmov s1, r1                 @ encoding: [0x90,0x1a,0x00,0xee]
+@ CHECK: vmov s2, r2                 @ encoding: [0x10,0x2a,0x01,0xee]
+@ CHECK: vmov s3, r3                 @ encoding: [0x90,0x3a,0x01,0xee]
+        vmov    s0, r0
+        vmov    s1, r1
+        vmov    s2, r2
+        vmov    s3, r3
+
+@ CHECK: vmov r0, s0                 @ encoding: [0x10,0x0a,0x10,0xee]
+@ CHECK: vmov r1, s1                 @ encoding: [0x90,0x1a,0x10,0xee]
+@ CHECK: vmov r2, s2                 @ encoding: [0x10,0x2a,0x11,0xee]
+@ CHECK: vmov r3, s3                 @ encoding: [0x90,0x3a,0x11,0xee]
+        vmov    r0, s0
+        vmov    r1, s1
+        vmov    r2, s2
+        vmov    r3, s3
+
+@ CHECK: vmov r0, r1, d16            @ encoding: [0x30,0x0b,0x51,0xec]
+        vmov    r0, r1, d16
+
+@ CHECK: vldr.64 d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
+        vldr.64	d17, [r0]
+
+@ CHECK: vldr.64 d1, [r2, #32]       @ encoding: [0x08,0x1b,0x92,0xed]
+@ CHECK: vldr.64 d1, [r2, #-32]      @ encoding: [0x08,0x1b,0x12,0xed]
+        vldr.64	d1, [r2, #32]
+        vldr.64	d1, [r2, #-32]
+        
+@ CHECK: vldr.64 d2, [r3]            @ encoding: [0x00,0x2b,0x93,0xed]
+        vldr.64 d2, [r3]
+
+@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+        vldr.64 d3, [pc]
+        vldr.64 d3, [pc,#0]
+        vldr.64 d3, [pc,#-0]
+
+@ CHECK: vldr.32 s13, [r0]           @ encoding: [0x00,0x6a,0xd0,0xed]
+        vldr.32	s13, [r0]
+
+@ CHECK: vldr.32 s1, [r2, #32]       @ encoding: [0x08,0x0a,0xd2,0xed]
+@ CHECK: vldr.32 s1, [r2, #-32]      @ encoding: [0x08,0x0a,0x52,0xed]
+        vldr.32	s1, [r2, #32]
+        vldr.32	s1, [r2, #-32]
+        
+@ CHECK: vldr.32 s2, [r3]            @ encoding: [0x00,0x1a,0x93,0xed]
+        vldr.32 s2, [r3]
+
+@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+        vldr.32 s5, [pc]
+        vldr.32 s5, [pc,#0]
+        vldr.32 s5, [pc,#-0]
+
+@ CHECK: vstr.64 d4, [r1]            @ encoding: [0x00,0x4b,0x81,0xed]
+@ CHECK: vstr.64 d4, [r1, #24]       @ encoding: [0x06,0x4b,0x81,0xed]
+@ CHECK: vstr.64 d4, [r1, #-24]      @ encoding: [0x06,0x4b,0x01,0xed]
+        vstr.64 d4, [r1]
+        vstr.64 d4, [r1, #24]
+        vstr.64 d4, [r1, #-24]
+
+@ CHECK: vstr.32 s4, [r1]            @ encoding: [0x00,0x2a,0x81,0xed]
+@ CHECK: vstr.32 s4, [r1, #24]       @ encoding: [0x06,0x2a,0x81,0xed]
+@ CHECK: vstr.32 s4, [r1, #-24]      @ encoding: [0x06,0x2a,0x01,0xed]
+        vstr.32 s4, [r1]
+        vstr.32 s4, [r1, #24]
+        vstr.32 s4, [r1, #-24]
+
+@ CHECK: vldmia r1, {d2, d3, d4, d5, d6, d7} @ encoding: [0x0c,0x2b,0x91,0xec]
+@ CHECK: vldmia r1, {s2, s3, s4, s5, s6, s7} @ encoding: [0x06,0x1a,0x91,0xec]
+        vldmia  r1, {d2,d3-d6,d7}
+        vldmia  r1, {s2,s3-s6,s7}
+
+@ CHECK: vstmia r1, {d2, d3, d4, d5, d6, d7} @ encoding: [0x0c,0x2b,0x81,0xec]
+@ CHECK: vstmia	r1, {s2, s3, s4, s5, s6, s7} @ encoding: [0x06,0x1a,0x81,0xec]
+        vstmia  r1, {d2,d3-d6,d7}
+        vstmia  r1, {s2,s3-s6,s7}
+
+@ CHECK: vcvtr.s32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbd,0xee]
+@ CHECK: vcvtr.s32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbd,0xee]
+@ CHECK: vcvtr.u32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbc,0xee]
+@ CHECK: vcvtr.u32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbc,0xee]
+        vcvtr.s32.f64  s0, d0
+        vcvtr.s32.f32  s0, s1
+        vcvtr.u32.f64  s0, d0
+        vcvtr.u32.f32  s0, s1
diff --git a/test/MC/ARM/thumb.s b/test/MC/ARM/thumb.s
new file mode 100644
index 000000000000..342a390d81ac
--- /dev/null
+++ b/test/MC/ARM/thumb.s
@@ -0,0 +1,70 @@
+@ RUN: llvm-mc -triple thumbv6-apple-darwin -show-encoding < %s | FileCheck %s
+        .code 16
+
+@ CHECK: cmp	r1, r2               @ encoding: [0x91,0x42]
+        cmp     r1, r2
+
+@ CHECK: pop    {r1, r2, r4}         @ encoding: [0x16,0xbc]
+        pop     {r1, r2, r4}
+
+@ CHECK: trap                        @ encoding: [0xfe,0xde]
+        trap
+
+@ CHECK: blx	r9                   @ encoding: [0xc8,0x47]
+	blx	r9
+
+@ CHECK: rev	r2, r3               @ encoding: [0x1a,0xba]
+@ CHECK: rev16	r3, r4               @ encoding: [0x63,0xba]
+@ CHECK: revsh	r5, r6               @ encoding: [0xf5,0xba]
+        rev     r2, r3
+        rev16   r3, r4
+        revsh   r5, r6
+
+@ CHECK: sxtb	r2, r3               @ encoding: [0x5a,0xb2]
+@ CHECK: sxth	r2, r3               @ encoding: [0x1a,0xb2]
+	sxtb	r2, r3
+	sxth	r2, r3
+
+@ CHECK: tst	r4, r5               @ encoding: [0x2c,0x42]
+	tst	r4, r5
+
+@ CHECK: uxtb	r3, r6               @ encoding: [0xf3,0xb2]
+@ CHECK: uxth	r3, r6               @ encoding: [0xb3,0xb2]
+	uxtb	r3, r6
+	uxth	r3, r6
+
+@ CHECK: ldr	r3, [r1, r2]         @ encoding: [0x8b,0x58]
+	ldr	r3, [r1, r2]
+
+@ CHECK: bkpt  #2                  @ encoding: [0x02,0xbe]
+         bkpt  #2
+
+@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xee,0x91,0x57]
+        mcr  p7, #1, r5, c1, c1, #4
+
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
+        mrc  p14, #0, r1, c1, c2, #4
+
+@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x44,0xec,0x11,0x57]
+        mcrr  p7, #1, r5, r4, c1
+
+@ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x54,0xec,0x11,0x57]
+        mrrc  p7, #1, r5, r4, c1
+
+@ CHECK: cdp  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xee,0x81,0x17]
+        cdp  p7, #1, c1, c1, c1, #4
+
+@ CHECK: nop @ encoding: [0x00,0xbf]
+        nop
+
+@ CHECK: yield @ encoding: [0x10,0xbf]
+        yield
+
+@ CHECK: wfe @ encoding: [0x20,0xbf]
+        wfe
+
+@ CHECK: wfi @ encoding: [0x30,0xbf]
+        wfi
+
+@ CHECK: cpsie aif @ encoding: [0x67,0xb6]
+        cpsie aif
diff --git a/test/MC/ARM/thumb2.s b/test/MC/ARM/thumb2.s
new file mode 100644
index 000000000000..cd093119e58a
--- /dev/null
+++ b/test/MC/ARM/thumb2.s
@@ -0,0 +1,286 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+.code 16
+
+@ CHECK: adc	r1, r1, #171            @ encoding: [0xab,0x01,0x41,0xf1]
+  adc	r1, r1, #171
+@ CHECK: adc	r1, r1, #1179666        @ encoding: [0x12,0x11,0x41,0xf1]
+  adc	r1, r1, #1179666
+@ CHECK: adc	r1, r1, #872428544      @ encoding: [0x34,0x21,0x41,0xf1]
+  adc	r1, r1, #872428544
+@ CHECK: adc	r1, r1, #1448498774     @ encoding: [0x56,0x31,0x41,0xf1]
+  adc	r1, r1, #1448498774
+@ CHECK: adc	r1, r1, #66846720       @ encoding: [0x7f,0x71,0x41,0xf1]
+  adc	r1, r1, #66846720
+
+@ CHECK: mvn	r0, #187                @ encoding: [0xbb,0x00,0x6f,0xf0]
+  mvn	r0, #187
+@ CHECK: mvn	r0, #11141290           @ encoding: [0xaa,0x10,0x6f,0xf0]
+  mvn	r0, #11141290
+@ CHECK: mvn	r0, #-872363008         @ encoding: [0xcc,0x20,0x6f,0xf0]
+  mvn	r0, #-872363008
+@ CHECK: mvn	r0, #1114112            @ encoding: [0x88,0x10,0x6f,0xf4]
+  mvn	r0, #1114112
+
+@ CHECK: cmp.w	r0, #11141290           @ encoding: [0xaa,0x1f,0xb0,0xf1]
+  cmp.w	r0, #11141290
+@ CHECK: cmp.w	r0, #-872363008         @ encoding: [0xcc,0x2f,0xb0,0xf1]
+  cmp.w	r0, #-872363008
+@ CHECK: cmp.w	r0, #-572662307         @ encoding: [0xdd,0x3f,0xb0,0xf1]
+  cmp.w	r0, #-572662307
+@ CHECK: cmp.w	r0, #1114112            @ encoding: [0x88,0x1f,0xb0,0xf5]
+  cmp.w	r0, #1114112
+@ CHECK: cmp.w	r0, r1, lsl #5          @ encoding: [0x41,0x1f,0xb0,0xeb]
+  cmp.w	r0, r1, lsl #5
+
+@ CHECK: sxtab	r0, r1, r0              @ encoding: [0x80,0xf0,0x41,0xfa]
+  sxtab	r0, r1, r0              @ encoding: [0x80,0xf0,0x41,0xfa]
+  
+@ CHECK: movw	r0, #65535              @ encoding: [0xff,0x70,0x4f,0xf6]
+  movw	r0, #65535
+@ CHECK: movw	r1, #43777              @ encoding: [0x01,0x31,0x4a,0xf6]
+  movw	r1, #43777
+@ CHECK: movt	r1, #427                @ encoding: [0xab,0x11,0xc0,0xf2]
+  movt	r1, #427
+@ CHECK: movw	r1, #43792              @ encoding: [0x10,0x31,0x4a,0xf6]
+  movw	r1, #43792
+@ CHECK: movt	r1, #4267               @ encoding: [0xab,0x01,0xc0,0xf2]
+  movt	r1, #4267
+@ CHECK: mov.w	r0, #66846720           @ encoding: [0x7f,0x70,0x4f,0xf0]
+  mov.w	r0, #66846720
+
+@ CHECK: rrx	r0, r0                  @ encoding: [0x30,0x00,0x4f,0xea]
+  rrx	r0, r0
+
+@ CHECK: bfc	r0, #4, #20             @ encoding: [0x17,0x10,0x6f,0xf3]
+  bfc	r0, #4, #20
+@ CHECK: bfc	r0, #0, #23             @ encoding: [0x16,0x00,0x6f,0xf3]
+  bfc	r0, #0, #23
+@ CHECK: bfc	r0, #12, #20            @ encoding: [0x1f,0x30,0x6f,0xf3]
+  bfc	r0, #12, #20
+
+@ CHECK: sbfx	r0, r0, #7, #11         @ encoding: [0xca,0x10,0x40,0xf3]
+  sbfx	r0, r0, #7, #11
+@ CHECK: ubfx	r0, r0, #7, #11         @ encoding: [0xca,0x10,0xc0,0xf3]
+  ubfx	r0, r0, #7, #11
+
+@ CHECK: mla	r0, r0, r1, r2          @ encoding: [0x01,0x20,0x00,0xfb]
+  mla	r0, r0, r1, r2
+@ CHECK: mls	r0, r0, r1, r2          @ encoding: [0x11,0x20,0x00,0xfb]
+  mls	r0, r0, r1, r2
+
+@ CHECK: smlabt	r0, r1, r2, r0          @ encoding: [0x12,0x00,0x11,0xfb]
+  smlabt	r0, r1, r2, r0
+
+@ CHECK: clz	r0, r0                  @ encoding: [0x80,0xf0,0xb0,0xfa]
+  clz	r0, r0
+
+@ CHECK: pkhbt	r0, r0, r1, lsl #16     @ encoding: [0x01,0x40,0xc0,0xea]
+  pkhbt	r0, r0, r1, lsl #16
+@ CHECK: pkhbt	r0, r0, r1, lsl #12     @ encoding: [0x01,0x30,0xc0,0xea]
+  pkhbt	r0, r0, r1, lsl #16
+@ CHECK: pkhbt	r0, r0, r1, lsl #18     @ encoding: [0x81,0x40,0xc0,0xea]
+  pkhbt	r0, r0, r1, lsl #18
+@ CHECK: pkhbt	r0, r0, r1              @ encoding: [0x01,0x00,0xc0,0xea]
+  pkhbt	r0, r0, r1
+@ CHECK: pkhtb	r0, r0, r1, asr #16     @ encoding: [0x21,0x40,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #16
+@ CHECK: pkhtb	r0, r0, r1, asr #12     @ encoding: [0x21,0x30,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #12
+@ CHECK: pkhtb	r0, r0, r1, asr #18     @ encoding: [0xa1,0x40,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #18
+@ CHECK: pkhtb	r0, r0, r1, asr #22     @ encoding: [0xa1,0x50,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #22
+
+@ CHECK: str.w	r0, [r1, #4092]         @ encoding: [0xfc,0x0f,0xc1,0xf8]
+  str.w	r0, [r1, #4092]
+@ CHECK: str	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x41,0xf8]
+  str	r0, [r1, #-128]
+@ CHECK: str.w	r0, [r1, r2, lsl #2]    @ encoding: [0x22,0x00,0x41,0xf8
+  str.w	r0, [r1, r2, lsl #2]
+
+@ CHECK: ldr.w	r0, [r0, #4092]         @ encoding: [0xfc,0x0f,0xd0,0xf8]
+  ldr.w	r0, [r0, #4092]
+@ CHECK: ldr	r0, [r0, #-128]         @ encoding: [0x80,0x0c,0x50,0xf8]
+  ldr	r0, [r0, #-128]
+@ CHECK: ldr.w	r0, [r0, r1, lsl #2]    @ encoding: [0x21,0x00,0x50,0xf8]
+  ldr.w	r0, [r0, r1, lsl #2]
+
+@ CHECK: str	r1, [r0, #16]!          @ encoding: [0x10,0x1f,0x40,0xf8]
+  str	r1, [r0, #16]!
+@ CHECK: strh	r1, [r0, #8]!           @ encoding: [0x08,0x1f,0x20,0xf8]
+  strh	r1, [r0, #8]!
+@ CHECK: strh	r2, [r0], #-4           @ encoding: [0x04,0x29,0x20,0xf8]
+  strh	r2, [r0], #-4
+@ CHECK: str	r2, [r0], #-4           @ encoding: [0x04,0x29,0x40,0xf8]
+  str	r2, [r0], #-4
+
+@ CHECK: ldr	r2, [r0, #16]!          @ encoding: [0x10,0x2f,0x50,0xf8]
+  ldr	r2, [r0, #16]!
+@ CHECK: ldr	r2, [r0, #-64]!         @ encoding: [0x40,0x2d,0x50,0xf8]
+  ldr	r2, [r0, #-64]!
+@ CHECK: ldrsb	r2, [r0, #4]!           @ encoding: [0x04,0x2f,0x10,0xf9]
+  ldrsb	r2, [r0, #4]!
+
+@ CHECK: strb.w	r0, [r1, #4092]         @ encoding: [0xfc,0x0f,0x81,0xf8]
+  strb.w	r0, [r1, #4092]
+@ CHECK: strb	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x01,0xf8]
+  strb	r0, [r1, #-128]
+@ CHECK: strb.w	r0, [r1, r2, lsl #2]    @ encoding: [0x22,0x00,0x01,0xf8]
+  strb.w	r0, [r1, r2, lsl #2]
+@ CHECK: strh.w	r0, [r1, #4092]         @ encoding: [0xfc,0x0f,0xa1,0xf8]
+  strh.w	r0, [r1, #4092]
+@ CHECK: strh	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x21,0xf8]
+  strh	r0, [r1, #-128]
+@ CHECK: strh	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x21,0xf8]
+  strh	r0, [r1, #-128]
+@ CHECK: strh.w	r0, [r1, r2, lsl #2]    @ encoding: [0x22,0x00,0x21,0xf8]
+  strh.w	r0, [r1, r2, lsl #2]
+
+@ CHECK: ldrb	r0, [r0, #-1]           @ encoding: [0x01,0x0c,0x10,0xf8]
+  ldrb	r0, [r0, #-1]
+@ CHECK: ldrb	r0, [r0, #-128]         @ encoding: [0x80,0x0c,0x10,0xf8]
+  ldrb	r0, [r0, #-128]
+@ CHECK: ldrb.w	r0, [r0, r1, lsl #2]    @ encoding: [0x21,0x00,0x10,0xf8]
+  ldrb.w	r0, [r0, r1, lsl #2]
+@ CHECK: ldrh.w	r0, [r0, #2046]         @ encoding: [0xfe,0x07,0xb0,0xf8]
+  ldrh.w	r0, [r0, #2046]
+@ CHECK: ldrh	r0, [r0, #-128]         @ encoding: [0x80,0x0c,0x30,0xf8]
+  ldrh	r0, [r0, #-128]
+@ CHECK: ldrh.w	r0, [r0, r1, lsl #2]    @ encoding: [0x21,0x00,0x30,0xf8]
+  ldrh.w	r0, [r0, r1, lsl #2]
+@ CHECK: ldrsb.w	r0, [r0]                @ encoding: [0x00,0x00,0x90,0xf9]
+  ldrsb.w	r0, [r0]
+@ CHECK: ldrsh.w	r0, [r0]                @ encoding: [0x00,0x00,0xb0,0xf9]
+  ldrsh.w	r0, [r0]
+@ CHECK: bfi  r0, r0, #5, #7 @ encoding: [0x60,0xf3,0x4b,0x10]
+  bfi  r0, r0, #5, #7
+@ CHECK: isb @ encoding: [0xbf,0xf3,0x6f,0x8f]
+  isb
+@ CHECK: mrs  r0, cpsr @ encoding: [0xef,0xf3,0x00,0x80]
+  mrs  r0, cpsr
+@ CHECK: vmrs  r0, fpscr @ encoding: [0xf1,0xee,0x10,0x0a]
+  vmrs  r0, fpscr
+@ CHECK: vmrs  r0, fpexc @ encoding: [0xf8,0xee,0x10,0x0a]
+  vmrs  r0, fpexc
+@ CHECK: vmrs  r0, fpsid @ encoding: [0xf0,0xee,0x10,0x0a]
+  vmrs  r0, fpsid
+
+@ CHECK: vmsr  fpscr, r0 @ encoding: [0xe1,0xee,0x10,0x0a]
+  vmsr  fpscr, r0
+@ CHECK: vmsr  fpexc, r0 @ encoding: [0xe8,0xee,0x10,0x0a]
+  vmsr  fpexc, r0
+@ CHECK: vmsr  fpsid, r0 @ encoding: [0xe0,0xee,0x10,0x0a]
+  vmsr  fpsid, r0
+
+@ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xfe,0x91,0x57]
+        mcr2  p7, #1, r5, c1, c1, #4
+
+@ CHECK: mrc2  p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xfe,0x92,0x1e]
+        mrc2  p14, #0, r1, c1, c2, #4
+
+@ CHECK: mcrr2  p7, #1, r5, r4, c1 @ encoding: [0x44,0xfc,0x11,0x57]
+        mcrr2  p7, #1, r5, r4, c1
+
+@ CHECK: mrrc2  p7, #1, r5, r4, c1 @ encoding: [0x54,0xfc,0x11,0x57]
+        mrrc2  p7, #1, r5, r4, c1
+
+@ CHECK: cdp2  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xfe,0x81,0x17]
+        cdp2  p7, #1, c1, c1, c1, #4
+
+@ CHECK: clrex @ encoding: [0xbf,0xf3,0x2f,0x8f]
+        clrex
+
+@ CHECK: clz  r9, r0 @ encoding: [0xb0,0xfa,0x80,0xf9]
+        clz  r9, r0
+
+@ CHECK: qadd  r1, r2, r3 @ encoding: [0x83,0xfa,0x82,0xf1]
+        qadd  r1, r2, r3
+
+@ CHECK: qsub  r1, r2, r3 @ encoding: [0x83,0xfa,0xa2,0xf1]
+        qsub  r1, r2, r3
+
+@ CHECK: qdadd  r1, r2, r3 @ encoding: [0x83,0xfa,0x92,0xf1]
+        qdadd  r1, r2, r3
+
+@ CHECK: qdsub  r1, r2, r3 @ encoding: [0x83,0xfa,0xb2,0xf1]
+        qdsub  r1, r2, r3
+
+@ CHECK: nop.w @ encoding: [0xaf,0xf3,0x00,0x80]
+        nop.w
+
+@ CHECK: yield.w @ encoding: [0xaf,0xf3,0x01,0x80]
+        yield.w
+
+@ CHECK: wfe.w @ encoding: [0xaf,0xf3,0x02,0x80]
+        wfe.w
+
+@ CHECK: wfi.w @ encoding: [0xaf,0xf3,0x03,0x80]
+        wfi.w
+
+@ CHECK: dmb  sy @ encoding: [0xbf,0xf3,0x5f,0x8f]
+  dmb  sy
+@ CHECK: dmb  st @ encoding: [0xbf,0xf3,0x5e,0x8f]
+  dmb  st
+@ CHECK: dmb  ish @ encoding: [0xbf,0xf3,0x5b,0x8f]
+  dmb  ish
+@ CHECK: dmb  ishst @ encoding: [0xbf,0xf3,0x5a,0x8f]
+  dmb  ishst
+@ CHECK: dmb  nsh @ encoding: [0xbf,0xf3,0x57,0x8f]
+  dmb  nsh
+@ CHECK: dmb  nshst @ encoding: [0xbf,0xf3,0x56,0x8f]
+  dmb  nshst
+@ CHECK: dmb  osh @ encoding: [0xbf,0xf3,0x53,0x8f]
+  dmb  osh
+@ CHECK: dmb  oshst @ encoding: [0xbf,0xf3,0x52,0x8f]
+  dmb  oshst
+
+@ CHECK: dsb  sy @ encoding: [0xbf,0xf3,0x4f,0x8f]
+  dsb  sy
+@ CHECK: dsb  st @ encoding: [0xbf,0xf3,0x4e,0x8f]
+  dsb  st
+@ CHECK: dsb  ish @ encoding: [0xbf,0xf3,0x4b,0x8f]
+  dsb  ish
+@ CHECK: dsb  ishst @ encoding: [0xbf,0xf3,0x4a,0x8f]
+  dsb  ishst
+@ CHECK: dsb  nsh @ encoding: [0xbf,0xf3,0x47,0x8f]
+  dsb  nsh
+@ CHECK: dsb  nshst @ encoding: [0xbf,0xf3,0x46,0x8f]
+  dsb  nshst
+@ CHECK: dsb  osh @ encoding: [0xbf,0xf3,0x43,0x8f]
+  dsb  osh
+@ CHECK: dsb  oshst @ encoding: [0xbf,0xf3,0x42,0x8f]
+  dsb  oshst
+
+@ CHECK: cpsie.w  aif @ encoding: [0xaf,0xf3,0xe0,0x84]
+  cpsie.w  aif
+@ CHECK: cps  #15 @ encoding: [0xaf,0xf3,0x0f,0x81]
+  cps  #15
+@ CHECK: cpsie.w  if, #10 @ encoding: [0xaf,0xf3,0x6a,0x85]
+  cpsie.w  if, #10
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x80,0xf3,0x00,0x89]
+  msr  apsr, r0
+@ CHECK: msr  cpsr_s, r0 @ encoding: [0x80,0xf3,0x00,0x84]
+  msr  apsr_g, r0
+@ CHECK: msr  cpsr_f, r0 @ encoding: [0x80,0xf3,0x00,0x88]
+  msr  apsr_nzcvq, r0
+@ CHECK: msr  cpsr_fs, r0 @ encoding: [0x80,0xf3,0x00,0x8c]
+  msr  apsr_nzcvqg, r0
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x80,0xf3,0x00,0x89]
+  msr  cpsr_fc, r0
+@ CHECK: msr  cpsr_c, r0 @ encoding: [0x80,0xf3,0x00,0x81]
+  msr  cpsr_c, r0
+@ CHECK: msr  cpsr_x, r0 @ encoding: [0x80,0xf3,0x00,0x82]
+  msr  cpsr_x, r0
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x80,0xf3,0x00,0x89]
+  msr  cpsr_fc, r0
+@ CHECK: msr  cpsr_fsx, r0 @ encoding: [0x80,0xf3,0x00,0x8e]
+  msr  cpsr_fsx, r0
+@ CHECK: msr  spsr_fc, r0 @ encoding: [0x90,0xf3,0x00,0x89]
+  msr  spsr_fc, r0
+@ CHECK: msr  spsr_fsxc, r0 @ encoding: [0x90,0xf3,0x00,0x8f]
+  msr  spsr_fsxc, r0
+@ CHECK: msr  cpsr_fsxc, r0 @ encoding: [0x80,0xf3,0x00,0x8f]
+  msr  cpsr_fsxc, r0
+
diff --git a/test/MC/ARM/thumb2_instructions.s b/test/MC/ARM/thumb2_instructions.s
new file mode 100644
index 000000000000..71cd4aea2f85
--- /dev/null
+++ b/test/MC/ARM/thumb2_instructions.s
@@ -0,0 +1,12 @@
+@ RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding %s > %t
+@ RUN: FileCheck < %t %s
+
+	.syntax unified
+	.text
+
+@ FIXME: This is not the correct instruction representation, but at least we are
+@ parsing the ldr to something.
+@
+@ CHECK: ldr r0, [r7, #258]
+	ldr	r0, [r7, #-8]
+        
diff --git a/test/MC/AsmParser/ARM/arm_instructions.s b/test/MC/AsmParser/ARM/arm_instructions.s
deleted file mode 100644
index 8632cb0cefd8..000000000000
--- a/test/MC/AsmParser/ARM/arm_instructions.s
+++ /dev/null
@@ -1,8 +0,0 @@
-@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s
-
-@ CHECK: nop
-        nop
-
-@ CHECK: nopeq
-        nopeq
-
diff --git a/test/MC/AsmParser/ARM/arm_word_directive.s b/test/MC/AsmParser/ARM/arm_word_directive.s
deleted file mode 100644
index 78336913169f..000000000000
--- a/test/MC/AsmParser/ARM/arm_word_directive.s
+++ /dev/null
@@ -1,6 +0,0 @@
-@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s
-
-@ CHECK: TEST0:
-@ CHECK: .long 3
-TEST0:  
-        .word 3
diff --git a/test/MC/AsmParser/ARM/dg.exp b/test/MC/AsmParser/ARM/dg.exp
deleted file mode 100644
index 3ff359aab39b..000000000000
--- a/test/MC/AsmParser/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/MC/AsmParser/ELF/dg.exp b/test/MC/AsmParser/ELF/dg.exp
deleted file mode 100644
index ca6aefe9c53d..000000000000
--- a/test/MC/AsmParser/ELF/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
-
diff --git a/test/MC/AsmParser/ELF/directive_previous.s b/test/MC/AsmParser/ELF/directive_previous.s
deleted file mode 100644
index 5db1eac03d39..000000000000
--- a/test/MC/AsmParser/ELF/directive_previous.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
-
-.bss
-# CHECK: .bss
-
-.text
-# CHECK: .text
-
-.previous
-# CHECK: .bss
-
-.previous
-# CHECK: .text
diff --git a/test/MC/AsmParser/ELF/directive_section.s b/test/MC/AsmParser/ELF/directive_section.s
deleted file mode 100644
index 9531c026e674..000000000000
--- a/test/MC/AsmParser/ELF/directive_section.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
-
-	.bss
-# CHECK: .bss
-
-	.data.rel.ro
-# CHECK: .data.rel.ro
-
-	.data.rel
-# CHECK: .data.rel
-
-	.eh_frame
-# CHECK: .eh_frame
-
-	.rodata
-# CHECK: .rodata
-
-	.tbss
-# CHECK: .tbss
-
-	.tdata
-# CHECK: .tdata
-
diff --git a/test/MC/AsmParser/X86/dg.exp b/test/MC/AsmParser/X86/dg.exp
deleted file mode 100644
index ec87b695b7ef..000000000000
--- a/test/MC/AsmParser/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s
deleted file mode 100644
index 47bf980894d0..000000000000
--- a/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s
+++ /dev/null
@@ -1,42 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
-          vpclmulhqhqdq %xmm2, %xmm5, %xmm1
-
-// CHECK: vpclmulqdq  $17, (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
-          vpclmulhqhqdq (%eax), %xmm5, %xmm3
-
-// CHECK: vpclmulqdq  $1, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01]
-          vpclmulhqlqdq %xmm2, %xmm5, %xmm1
-
-// CHECK: vpclmulqdq  $1, (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01]
-          vpclmulhqlqdq (%eax), %xmm5, %xmm3
-
-// CHECK: vpclmulqdq  $16, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10]
-          vpclmullqhqdq %xmm2, %xmm5, %xmm1
-
-// CHECK: vpclmulqdq  $16, (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10]
-          vpclmullqhqdq (%eax), %xmm5, %xmm3
-
-// CHECK: vpclmulqdq  $0, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00]
-          vpclmullqlqdq %xmm2, %xmm5, %xmm1
-
-// CHECK: vpclmulqdq  $0, (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00]
-          vpclmullqlqdq (%eax), %xmm5, %xmm3
-
-// CHECK: vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
-          vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
-
-// CHECK: vpclmulqdq  $17, (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
-          vpclmulqdq  $17, (%eax), %xmm5, %xmm3
-
diff --git a/test/MC/AsmParser/X86/x86_32-avx-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-encoding.s
deleted file mode 100644
index b7ade6670a01..000000000000
--- a/test/MC/AsmParser/X86/x86_32-avx-encoding.s
+++ /dev/null
@@ -1,3241 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: vaddss  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xca,0x58,0xd4]
-          vaddss  %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulss  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xca,0x59,0xd4]
-          vmulss  %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubss  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xca,0x5c,0xd4]
-          vsubss  %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivss  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xca,0x5e,0xd4]
-          vdivss  %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddsd  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xcb,0x58,0xd4]
-          vaddsd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulsd  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xcb,0x59,0xd4]
-          vmulsd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubsd  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xcb,0x5c,0xd4]
-          vsubsd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivsd  %xmm4, %xmm6, %xmm2
-// CHECK:  encoding: [0xc5,0xcb,0x5e,0xd4]
-          vdivsd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vaddss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vsubss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vmulss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vdivss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vaddsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vaddsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vsubsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vmulsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK:  encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vaddps  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x58,0xd4]
-          vaddps  %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubps  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4]
-          vsubps  %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulps  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x59,0xd4]
-          vmulps  %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivps  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4]
-          vdivps  %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddpd  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x58,0xd4]
-          vaddpd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubpd  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4]
-          vsubpd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulpd  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x59,0xd4]
-          vmulpd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivpd  %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4]
-          vdivpd  %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vaddps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vsubps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vmulps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vdivps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vaddpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vaddpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vsubpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vmulpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          vdivpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: vmaxss  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5f,0xf2]
-          vmaxss  %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxsd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2]
-          vmaxsd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vminss  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5d,0xf2]
-          vminss  %xmm2, %xmm4, %xmm6
-
-// CHECK: vminsd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2]
-          vminsd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxss  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc]
-          vmaxss  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmaxsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc]
-          vmaxsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminss  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc]
-          vminss  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
-          vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmaxps  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
-          vmaxps  %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxpd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
-          vmaxpd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vminps  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
-          vminps  %xmm2, %xmm4, %xmm6
-
-// CHECK: vminpd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
-          vminpd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
-          vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
-          vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
-          vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
-          vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandps  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
-          vandps  %xmm2, %xmm4, %xmm6
-
-// CHECK: vandpd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
-          vandpd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
-          vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
-          vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vorps  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
-          vorps  %xmm2, %xmm4, %xmm6
-
-// CHECK: vorpd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
-          vorpd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
-          vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
-          vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vxorps  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
-          vxorps  %xmm2, %xmm4, %xmm6
-
-// CHECK: vxorpd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
-          vxorpd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
-          vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
-          vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandnps  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
-          vandnps  %xmm2, %xmm4, %xmm6
-
-// CHECK: vandnpd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
-          vandnpd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
-          vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
-          vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmovss  -4(%ebx,%ecx,8), %xmm5
-// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
-          vmovss  -4(%ebx,%ecx,8), %xmm5
-
-// CHECK: vmovss  %xmm4, %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x10,0xec]
-          vmovss  %xmm4, %xmm2, %xmm5
-
-// CHECK: vmovsd  -4(%ebx,%ecx,8), %xmm5
-// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
-          vmovsd  -4(%ebx,%ecx,8), %xmm5
-
-// CHECK: vmovsd  %xmm4, %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
-          vmovsd  %xmm4, %xmm2, %xmm5
-
-// CHECK: vunpckhps  %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
-          vunpckhps  %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpckhpd  %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
-          vunpckhpd  %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpcklps  %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
-          vunpcklps  %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpcklpd  %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
-          vunpcklpd  %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
-          vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
-          vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
-          vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
-          vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vcmpps  $0, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
-          vcmpps  $0, %xmm0, %xmm6, %xmm1
-
-// CHECK: vcmpps  $0, (%eax), %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
-          vcmpps  $0, (%eax), %xmm6, %xmm1
-
-// CHECK: vcmpps  $7, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
-          vcmpps  $7, %xmm0, %xmm6, %xmm1
-
-// CHECK: vcmppd  $0, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
-          vcmppd  $0, %xmm0, %xmm6, %xmm1
-
-// CHECK: vcmppd  $0, (%eax), %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
-          vcmppd  $0, (%eax), %xmm6, %xmm1
-
-// CHECK: vcmppd  $7, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
-          vcmppd  $7, %xmm0, %xmm6, %xmm1
-
-// CHECK: vshufps  $8, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
-          vshufps  $8, %xmm1, %xmm2, %xmm3
-
-// CHECK: vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
-          vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vshufpd  $8, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
-          vshufpd  $8, %xmm1, %xmm2, %xmm3
-
-// CHECK: vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
-          vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
-          vcmpeqps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
-          vcmpleps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
-          vcmpltps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
-          vcmpneqps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
-          vcmpnleps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
-          vcmpnltps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
-          vcmpordps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
-          vcmpunordps   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
-          vcmpeqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
-          vcmpleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
-          vcmpltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
-          vcmpneqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
-          vcmpnleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
-          vcmpnltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordps   -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
-          vcmpunordps   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
-          vcmpeqpd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
-          vcmplepd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
-          vcmpltpd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
-          vcmpneqpd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
-          vcmpnlepd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
-          vcmpnltpd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
-          vcmpordpd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
-          vcmpunordpd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
-          vcmpeqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
-          vcmplepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
-          vcmpltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
-          vcmpneqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
-          vcmpnlepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
-          vcmpnltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordpd   -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
-          vcmpunordpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vmovmskps  %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
-          vmovmskps  %xmm2, %eax
-
-// CHECK: vmovmskpd  %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
-          vmovmskpd  %xmm2, %eax
-
-// CHECK: vcmpss  $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
-          vcmpeqss   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
-          vcmpless   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
-          vcmpltss   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
-          vcmpneqss   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
-          vcmpnless   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
-          vcmpnltss   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
-          vcmpordss   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
-          vcmpunordss   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
-          vcmpeqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
-          vcmpless   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
-          vcmpltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
-          vcmpneqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
-          vcmpnless   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
-          vcmpnltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordss   -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmpss  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
-          vcmpunordss   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
-          vcmpeqsd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
-          vcmplesd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
-          vcmpltsd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
-          vcmpneqsd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
-          vcmpnlesd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
-          vcmpnltsd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
-          vcmpordsd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
-          vcmpunordsd   %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
-          vcmpeqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
-          vcmplesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
-          vcmpltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
-          vcmpneqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
-          vcmpnlesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
-          vcmpnltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordsd   -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmpsd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
-          vcmpunordsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vucomiss  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
-          vucomiss  %xmm1, %xmm2
-
-// CHECK: vucomiss  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
-          vucomiss  (%eax), %xmm2
-
-// CHECK: vcomiss  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
-          vcomiss  %xmm1, %xmm2
-
-// CHECK: vcomiss  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
-          vcomiss  (%eax), %xmm2
-
-// CHECK: vucomisd  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
-          vucomisd  %xmm1, %xmm2
-
-// CHECK: vucomisd  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
-          vucomisd  (%eax), %xmm2
-
-// CHECK: vcomisd  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
-          vcomisd  %xmm1, %xmm2
-
-// CHECK: vcomisd  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
-          vcomisd  (%eax), %xmm2
-
-// CHECK: vcvttss2si  %xmm1, %eax
-// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
-          vcvttss2si  %xmm1, %eax
-
-// CHECK: vcvttss2si  (%ecx), %eax
-// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
-          vcvttss2si  (%ecx), %eax
-
-// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
-          vcvtsi2ss  (%eax), %xmm1, %xmm2
-
-// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
-          vcvtsi2ss  (%eax), %xmm1, %xmm2
-
-// CHECK: vcvttsd2si  %xmm1, %eax
-// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
-          vcvttsd2si  %xmm1, %eax
-
-// CHECK: vcvttsd2si  (%ecx), %eax
-// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
-          vcvttsd2si  (%ecx), %eax
-
-// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
-          vcvtsi2sd  (%eax), %xmm1, %xmm2
-
-// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
-          vcvtsi2sd  (%eax), %xmm1, %xmm2
-
-// CHECK: vmovaps  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
-          vmovaps  (%eax), %xmm2
-
-// CHECK: vmovaps  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
-          vmovaps  %xmm1, %xmm2
-
-// CHECK: vmovaps  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
-          vmovaps  %xmm1, (%eax)
-
-// CHECK: vmovapd  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
-          vmovapd  (%eax), %xmm2
-
-// CHECK: vmovapd  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
-          vmovapd  %xmm1, %xmm2
-
-// CHECK: vmovapd  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
-          vmovapd  %xmm1, (%eax)
-
-// CHECK: vmovups  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
-          vmovups  (%eax), %xmm2
-
-// CHECK: vmovups  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
-          vmovups  %xmm1, %xmm2
-
-// CHECK: vmovups  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
-          vmovups  %xmm1, (%eax)
-
-// CHECK: vmovupd  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
-          vmovupd  (%eax), %xmm2
-
-// CHECK: vmovupd  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
-          vmovupd  %xmm1, %xmm2
-
-// CHECK: vmovupd  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
-          vmovupd  %xmm1, (%eax)
-
-// CHECK: vmovlps  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
-          vmovlps  %xmm1, (%eax)
-
-// CHECK: vmovlps  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
-          vmovlps  (%eax), %xmm2, %xmm3
-
-// CHECK: vmovlpd  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
-          vmovlpd  %xmm1, (%eax)
-
-// CHECK: vmovlpd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
-          vmovlpd  (%eax), %xmm2, %xmm3
-
-// CHECK: vmovhps  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
-          vmovhps  %xmm1, (%eax)
-
-// CHECK: vmovhps  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
-          vmovhps  (%eax), %xmm2, %xmm3
-
-// CHECK: vmovhpd  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
-          vmovhpd  %xmm1, (%eax)
-
-// CHECK: vmovhpd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
-          vmovhpd  (%eax), %xmm2, %xmm3
-
-// CHECK: vmovlhps  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
-          vmovlhps  %xmm1, %xmm2, %xmm3
-
-// CHECK: vmovhlps  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
-          vmovhlps  %xmm1, %xmm2, %xmm3
-
-// CHECK: vcvtss2sil  %xmm1, %eax
-// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
-          vcvtss2si  %xmm1, %eax
-
-// CHECK: vcvtss2sil  (%eax), %ebx
-// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
-          vcvtss2si  (%eax), %ebx
-
-// CHECK: vcvtdq2ps  %xmm5, %xmm6
-// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
-          vcvtdq2ps  %xmm5, %xmm6
-
-// CHECK: vcvtdq2ps  (%eax), %xmm6
-// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
-          vcvtdq2ps  (%eax), %xmm6
-
-// CHECK: vcvtsd2ss  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
-          vcvtsd2ss  %xmm2, %xmm4, %xmm6
-
-// CHECK: vcvtsd2ss  (%eax), %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
-          vcvtsd2ss  (%eax), %xmm4, %xmm6
-
-// CHECK: vcvtps2dq  %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
-          vcvtps2dq  %xmm2, %xmm3
-
-// CHECK: vcvtps2dq  (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
-          vcvtps2dq  (%eax), %xmm3
-
-// CHECK: vcvtss2sd  %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
-          vcvtss2sd  %xmm2, %xmm4, %xmm6
-
-// CHECK: vcvtss2sd  (%eax), %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
-          vcvtss2sd  (%eax), %xmm4, %xmm6
-
-// CHECK: vcvtdq2ps  %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
-          vcvtdq2ps  %xmm4, %xmm6
-
-// CHECK: vcvtdq2ps  (%ecx), %xmm4
-// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
-          vcvtdq2ps  (%ecx), %xmm4
-
-// CHECK: vcvttps2dq  %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
-          vcvttps2dq  %xmm2, %xmm3
-
-// CHECK: vcvttps2dq  (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
-          vcvttps2dq  (%eax), %xmm3
-
-// CHECK: vcvtps2pd  %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
-          vcvtps2pd  %xmm2, %xmm3
-
-// CHECK: vcvtps2pd  (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
-          vcvtps2pd  (%eax), %xmm3
-
-// CHECK: vcvtpd2ps  %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
-          vcvtpd2ps  %xmm2, %xmm3
-
-// CHECK: vsqrtpd  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
-          vsqrtpd  %xmm1, %xmm2
-
-// CHECK: vsqrtpd  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
-          vsqrtpd  (%eax), %xmm2
-
-// CHECK: vsqrtps  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
-          vsqrtps  %xmm1, %xmm2
-
-// CHECK: vsqrtps  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
-          vsqrtps  (%eax), %xmm2
-
-// CHECK: vsqrtsd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
-          vsqrtsd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vsqrtsd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
-          vsqrtsd  (%eax), %xmm2, %xmm3
-
-// CHECK: vsqrtss  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
-          vsqrtss  %xmm1, %xmm2, %xmm3
-
-// CHECK: vsqrtss  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x51,0x18]
-          vsqrtss  (%eax), %xmm2, %xmm3
-
-// CHECK: vrsqrtps  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
-          vrsqrtps  %xmm1, %xmm2
-
-// CHECK: vrsqrtps  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
-          vrsqrtps  (%eax), %xmm2
-
-// CHECK: vrsqrtss  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
-          vrsqrtss  %xmm1, %xmm2, %xmm3
-
-// CHECK: vrsqrtss  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x52,0x18]
-          vrsqrtss  (%eax), %xmm2, %xmm3
-
-// CHECK: vrcpps  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
-          vrcpps  %xmm1, %xmm2
-
-// CHECK: vrcpps  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
-          vrcpps  (%eax), %xmm2
-
-// CHECK: vrcpss  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
-          vrcpss  %xmm1, %xmm2, %xmm3
-
-// CHECK: vrcpss  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x53,0x18]
-          vrcpss  (%eax), %xmm2, %xmm3
-
-// CHECK: vmovntdq  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
-          vmovntdq  %xmm1, (%eax)
-
-// CHECK: vmovntpd  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
-          vmovntpd  %xmm1, (%eax)
-
-// CHECK: vmovntps  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
-          vmovntps  %xmm1, (%eax)
-
-// CHECK: vldmxcsr  (%eax)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
-          vldmxcsr  (%eax)
-
-// CHECK: vstmxcsr  (%eax)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
-          vstmxcsr  (%eax)
-
-// CHECK: vldmxcsr  3735928559
-// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
-          vldmxcsr  0xdeadbeef
-
-// CHECK: vstmxcsr  3735928559
-// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
-          vstmxcsr  0xdeadbeef
-
-// CHECK: vpsubb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
-          vpsubb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
-          vpsubb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
-          vpsubw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
-          vpsubw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
-          vpsubd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
-          vpsubd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
-          vpsubq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
-          vpsubq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubsb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
-          vpsubsb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubsb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
-          vpsubsb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
-          vpsubsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
-          vpsubsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubusb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
-          vpsubusb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubusb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
-          vpsubusb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubusw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
-          vpsubusw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubusw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
-          vpsubusw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
-          vpaddb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
-          vpaddb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
-          vpaddw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
-          vpaddw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
-          vpaddd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
-          vpaddd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
-          vpaddq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
-          vpaddq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddsb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
-          vpaddsb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddsb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
-          vpaddsb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
-          vpaddsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
-          vpaddsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddusb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
-          vpaddusb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddusb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
-          vpaddusb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddusw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
-          vpaddusw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddusw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
-          vpaddusw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulhuw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
-          vpmulhuw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmulhuw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
-          vpmulhuw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulhw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
-          vpmulhw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmulhw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
-          vpmulhw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmullw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
-          vpmullw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmullw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
-          vpmullw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmuludq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
-          vpmuludq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmuludq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
-          vpmuludq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpavgb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
-          vpavgb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpavgb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
-          vpavgb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpavgw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
-          vpavgw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpavgw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
-          vpavgw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpminsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
-          vpminsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpminsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
-          vpminsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpminub  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
-          vpminub  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpminub  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
-          vpminub  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
-          vpmaxsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmaxsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
-          vpmaxsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxub  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
-          vpmaxub  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmaxub  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
-          vpmaxub  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsadbw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
-          vpsadbw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsadbw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
-          vpsadbw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsllw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
-          vpsllw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsllw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
-          vpsllw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpslld  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
-          vpslld  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpslld  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
-          vpslld  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsllq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
-          vpsllq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsllq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
-          vpsllq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsraw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
-          vpsraw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsraw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
-          vpsraw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrad  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
-          vpsrad  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrad  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
-          vpsrad  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrlw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
-          vpsrlw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrlw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
-          vpsrlw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrld  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
-          vpsrld  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrld  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
-          vpsrld  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrlq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
-          vpsrlq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrlq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
-          vpsrlq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpslld  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
-          vpslld  $10, %xmm2, %xmm3
-
-// CHECK: vpslldq  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
-          vpslldq  $10, %xmm2, %xmm3
-
-// CHECK: vpsllq  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
-          vpsllq  $10, %xmm2, %xmm3
-
-// CHECK: vpsllw  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
-          vpsllw  $10, %xmm2, %xmm3
-
-// CHECK: vpsrad  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
-          vpsrad  $10, %xmm2, %xmm3
-
-// CHECK: vpsraw  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
-          vpsraw  $10, %xmm2, %xmm3
-
-// CHECK: vpsrld  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
-          vpsrld  $10, %xmm2, %xmm3
-
-// CHECK: vpsrldq  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
-          vpsrldq  $10, %xmm2, %xmm3
-
-// CHECK: vpsrlq  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
-          vpsrlq  $10, %xmm2, %xmm3
-
-// CHECK: vpsrlw  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
-          vpsrlw  $10, %xmm2, %xmm3
-
-// CHECK: vpslld  $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
-          vpslld  $10, %xmm2, %xmm3
-
-// CHECK: vpand  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
-          vpand  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpand  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
-          vpand  (%eax), %xmm2, %xmm3
-
-// CHECK: vpor  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
-          vpor  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpor  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
-          vpor  (%eax), %xmm2, %xmm3
-
-// CHECK: vpxor  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
-          vpxor  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpxor  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
-          vpxor  (%eax), %xmm2, %xmm3
-
-// CHECK: vpandn  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
-          vpandn  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpandn  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
-          vpandn  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
-          vpcmpeqb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpeqb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
-          vpcmpeqb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
-          vpcmpeqw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpeqw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
-          vpcmpeqw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
-          vpcmpeqd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpeqd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
-          vpcmpeqd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpgtb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
-          vpcmpgtb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpgtb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
-          vpcmpgtb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpgtw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
-          vpcmpgtw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpgtw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
-          vpcmpgtw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpgtd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
-          vpcmpgtd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpgtd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
-          vpcmpgtd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpacksswb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
-          vpacksswb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpacksswb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
-          vpacksswb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpackssdw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
-          vpackssdw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpackssdw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
-          vpackssdw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpackuswb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
-          vpackuswb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpackuswb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
-          vpackuswb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpshufd  $4, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
-          vpshufd  $4, %xmm2, %xmm3
-
-// CHECK: vpshufd  $4, (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
-          vpshufd  $4, (%eax), %xmm3
-
-// CHECK: vpshufhw  $4, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
-          vpshufhw  $4, %xmm2, %xmm3
-
-// CHECK: vpshufhw  $4, (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
-          vpshufhw  $4, (%eax), %xmm3
-
-// CHECK: vpshuflw  $4, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
-          vpshuflw  $4, %xmm2, %xmm3
-
-// CHECK: vpshuflw  $4, (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
-          vpshuflw  $4, (%eax), %xmm3
-
-// CHECK: vpunpcklbw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
-          vpunpcklbw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpcklbw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
-          vpunpcklbw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpcklwd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
-          vpunpcklwd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpcklwd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
-          vpunpcklwd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckldq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
-          vpunpckldq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckldq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
-          vpunpckldq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpcklqdq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
-          vpunpcklqdq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpcklqdq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
-          vpunpcklqdq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhbw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
-          vpunpckhbw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhbw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
-          vpunpckhbw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhwd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
-          vpunpckhwd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhwd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
-          vpunpckhwd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhdq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
-          vpunpckhdq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhdq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
-          vpunpckhdq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhqdq  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
-          vpunpckhqdq  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhqdq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
-          vpunpckhqdq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
-          vpinsrw  $7, %eax, %xmm2, %xmm3
-
-// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
-          vpinsrw  $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vpextrw  $7, %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
-          vpextrw  $7, %xmm2, %eax
-
-// CHECK: vpmovmskb  %xmm1, %eax
-// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
-          vpmovmskb  %xmm1, %eax
-
-// CHECK: vmaskmovdqu  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
-          vmaskmovdqu  %xmm1, %xmm2
-
-// CHECK: vmovd  %xmm1, %eax
-// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
-          vmovd  %xmm1, %eax
-
-// CHECK: vmovd  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
-          vmovd  %xmm1, (%eax)
-
-// CHECK: vmovd  %eax, %xmm1
-// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
-          vmovd  %eax, %xmm1
-
-// CHECK: vmovd  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
-          vmovd  (%eax), %xmm1
-
-// CHECK: vmovq  %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
-          vmovq  %xmm1, (%eax)
-
-// CHECK: vmovq  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
-          vmovq  %xmm1, %xmm2
-
-// CHECK: vmovq  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
-          vmovq  (%eax), %xmm1
-
-// CHECK: vcvtpd2dq  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
-          vcvtpd2dq  %xmm1, %xmm2
-
-// CHECK: vcvtdq2pd  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
-          vcvtdq2pd  %xmm1, %xmm2
-
-// CHECK: vcvtdq2pd  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
-          vcvtdq2pd  (%eax), %xmm2
-
-// CHECK: vmovshdup  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
-          vmovshdup  %xmm1, %xmm2
-
-// CHECK: vmovshdup  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
-          vmovshdup  (%eax), %xmm2
-
-// CHECK: vmovsldup  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
-          vmovsldup  %xmm1, %xmm2
-
-// CHECK: vmovsldup  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
-          vmovsldup  (%eax), %xmm2
-
-// CHECK: vmovddup  %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
-          vmovddup  %xmm1, %xmm2
-
-// CHECK: vmovddup  (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
-          vmovddup  (%eax), %xmm2
-
-// CHECK: vaddsubps  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
-          vaddsubps  %xmm1, %xmm2, %xmm3
-
-// CHECK: vaddsubps  (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
-          vaddsubps  (%eax), %xmm1, %xmm2
-
-// CHECK: vaddsubpd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
-          vaddsubpd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vaddsubpd  (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
-          vaddsubpd  (%eax), %xmm1, %xmm2
-
-// CHECK: vhaddps  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
-          vhaddps  %xmm1, %xmm2, %xmm3
-
-// CHECK: vhaddps  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
-          vhaddps  (%eax), %xmm2, %xmm3
-
-// CHECK: vhaddpd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
-          vhaddpd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vhaddpd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
-          vhaddpd  (%eax), %xmm2, %xmm3
-
-// CHECK: vhsubps  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
-          vhsubps  %xmm1, %xmm2, %xmm3
-
-// CHECK: vhsubps  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
-          vhsubps  (%eax), %xmm2, %xmm3
-
-// CHECK: vhsubpd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
-          vhsubpd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vhsubpd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
-          vhsubpd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpabsb  %xmm1, %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1]
-          vpabsb  %xmm1, %xmm2
-
-// CHECK: vpabsb  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10]
-          vpabsb  (%eax), %xmm2
-
-// CHECK: vpabsw  %xmm1, %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1]
-          vpabsw  %xmm1, %xmm2
-
-// CHECK: vpabsw  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10]
-          vpabsw  (%eax), %xmm2
-
-// CHECK: vpabsd  %xmm1, %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1]
-          vpabsd  %xmm1, %xmm2
-
-// CHECK: vpabsd  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10]
-          vpabsd  (%eax), %xmm2
-
-// CHECK: vphaddw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9]
-          vphaddw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vphaddw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18]
-          vphaddw  (%eax), %xmm2, %xmm3
-
-// CHECK: vphaddd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9]
-          vphaddd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vphaddd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18]
-          vphaddd  (%eax), %xmm2, %xmm3
-
-// CHECK: vphaddsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9]
-          vphaddsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vphaddsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18]
-          vphaddsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vphsubw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9]
-          vphsubw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vphsubw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18]
-          vphsubw  (%eax), %xmm2, %xmm3
-
-// CHECK: vphsubd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9]
-          vphsubd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vphsubd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18]
-          vphsubd  (%eax), %xmm2, %xmm3
-
-// CHECK: vphsubsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9]
-          vphsubsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vphsubsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18]
-          vphsubsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaddubsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9]
-          vpmaddubsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmaddubsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18]
-          vpmaddubsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpshufb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9]
-          vpshufb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpshufb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18]
-          vpshufb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsignb  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9]
-          vpsignb  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsignb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18]
-          vpsignb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsignw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9]
-          vpsignw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsignw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18]
-          vpsignw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpsignd  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9]
-          vpsignd  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsignd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18]
-          vpsignd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulhrsw  %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9]
-          vpmulhrsw  %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmulhrsw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18]
-          vpmulhrsw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpalignr  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07]
-          vpalignr  $7, %xmm1, %xmm2, %xmm3
-
-// CHECK: vpalignr  $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07]
-          vpalignr  $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vroundsd  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07]
-          vroundsd  $7, %xmm1, %xmm2, %xmm3
-
-// CHECK: vroundsd  $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07]
-          vroundsd  $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vroundss  $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07]
-          vroundss  $7, %xmm1, %xmm2, %xmm3
-
-// CHECK: vroundss  $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07]
-          vroundss  $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vroundpd  $7, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07]
-          vroundpd  $7, %xmm2, %xmm3
-
-// CHECK: vroundpd  $7, (%eax), %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07]
-          vroundpd  $7, (%eax), %xmm3
-
-// CHECK: vroundps  $7, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07]
-          vroundps  $7, %xmm2, %xmm3
-
-// CHECK: vroundps  $7, (%eax), %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07]
-          vroundps  $7, (%eax), %xmm3
-
-// CHECK: vphminposuw  %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda]
-          vphminposuw  %xmm2, %xmm3
-
-// CHECK: vphminposuw  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10]
-          vphminposuw  (%eax), %xmm2
-
-// CHECK: vpackusdw  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca]
-          vpackusdw  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpackusdw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18]
-          vpackusdw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqq  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca]
-          vpcmpeqq  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpcmpeqq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18]
-          vpcmpeqq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpminsb  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca]
-          vpminsb  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminsb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18]
-          vpminsb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpminsd  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca]
-          vpminsd  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminsd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18]
-          vpminsd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpminud  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca]
-          vpminud  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminud  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18]
-          vpminud  (%eax), %xmm2, %xmm3
-
-// CHECK: vpminuw  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca]
-          vpminuw  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminuw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18]
-          vpminuw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxsb  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca]
-          vpmaxsb  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxsb  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18]
-          vpmaxsb  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxsd  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca]
-          vpmaxsd  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxsd  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18]
-          vpmaxsd  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxud  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca]
-          vpmaxud  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxud  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18]
-          vpmaxud  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxuw  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca]
-          vpmaxuw  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxuw  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18]
-          vpmaxuw  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmuldq  %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca]
-          vpmuldq  %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmuldq  (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18]
-          vpmuldq  (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulld  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca]
-          vpmulld  %xmm2, %xmm5, %xmm1
-
-// CHECK: vpmulld  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18]
-          vpmulld  (%eax), %xmm5, %xmm3
-
-// CHECK: vblendps  $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03]
-          vblendps  $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vblendps  $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03]
-          vblendps  $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vblendpd  $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03]
-          vblendpd  $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vblendpd  $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03]
-          vblendpd  $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vpblendw  $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03]
-          vpblendw  $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vpblendw  $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03]
-          vpblendw  $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vmpsadbw  $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03]
-          vmpsadbw  $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vmpsadbw  $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03]
-          vmpsadbw  $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vdpps  $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03]
-          vdpps  $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vdpps  $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03]
-          vdpps  $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vdppd  $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03]
-          vdppd  $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vdppd  $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03]
-          vdppd  $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20]
-          vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
-
-// CHECK: vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20]
-          vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
-
-// CHECK: vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20]
-          vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
-
-// CHECK: vblendvps  %xmm2, (%eax), %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20]
-          vblendvps  %xmm2, (%eax), %xmm1, %xmm3
-
-// CHECK: vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20]
-          vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
-
-// CHECK: vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20]
-          vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
-
-// CHECK: vpmovsxbw  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea]
-          vpmovsxbw  %xmm2, %xmm5
-
-// CHECK: vpmovsxbw  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10]
-          vpmovsxbw  (%eax), %xmm2
-
-// CHECK: vpmovsxwd  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea]
-          vpmovsxwd  %xmm2, %xmm5
-
-// CHECK: vpmovsxwd  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10]
-          vpmovsxwd  (%eax), %xmm2
-
-// CHECK: vpmovsxdq  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea]
-          vpmovsxdq  %xmm2, %xmm5
-
-// CHECK: vpmovsxdq  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10]
-          vpmovsxdq  (%eax), %xmm2
-
-// CHECK: vpmovzxbw  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea]
-          vpmovzxbw  %xmm2, %xmm5
-
-// CHECK: vpmovzxbw  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10]
-          vpmovzxbw  (%eax), %xmm2
-
-// CHECK: vpmovzxwd  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea]
-          vpmovzxwd  %xmm2, %xmm5
-
-// CHECK: vpmovzxwd  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10]
-          vpmovzxwd  (%eax), %xmm2
-
-// CHECK: vpmovzxdq  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea]
-          vpmovzxdq  %xmm2, %xmm5
-
-// CHECK: vpmovzxdq  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10]
-          vpmovzxdq  (%eax), %xmm2
-
-// CHECK: vpmovsxbq  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea]
-          vpmovsxbq  %xmm2, %xmm5
-
-// CHECK: vpmovsxbq  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10]
-          vpmovsxbq  (%eax), %xmm2
-
-// CHECK: vpmovzxbq  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea]
-          vpmovzxbq  %xmm2, %xmm5
-
-// CHECK: vpmovzxbq  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10]
-          vpmovzxbq  (%eax), %xmm2
-
-// CHECK: vpmovsxbd  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea]
-          vpmovsxbd  %xmm2, %xmm5
-
-// CHECK: vpmovsxbd  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10]
-          vpmovsxbd  (%eax), %xmm2
-
-// CHECK: vpmovsxwq  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea]
-          vpmovsxwq  %xmm2, %xmm5
-
-// CHECK: vpmovsxwq  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10]
-          vpmovsxwq  (%eax), %xmm2
-
-// CHECK: vpmovzxbd  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea]
-          vpmovzxbd  %xmm2, %xmm5
-
-// CHECK: vpmovzxbd  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10]
-          vpmovzxbd  (%eax), %xmm2
-
-// CHECK: vpmovzxwq  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea]
-          vpmovzxwq  %xmm2, %xmm5
-
-// CHECK: vpmovzxwq  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10]
-          vpmovzxwq  (%eax), %xmm2
-
-// CHECK: vpextrw  $7, %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
-          vpextrw  $7, %xmm2, %eax
-
-// CHECK: vpextrw  $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07]
-          vpextrw  $7, %xmm2, (%eax)
-
-// CHECK: vpextrd  $7, %xmm2, %eax
-// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07]
-          vpextrd  $7, %xmm2, %eax
-
-// CHECK: vpextrd  $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07]
-          vpextrd  $7, %xmm2, (%eax)
-
-// CHECK: vpextrb  $7, %xmm2, %eax
-// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07]
-          vpextrb  $7, %xmm2, %eax
-
-// CHECK: vpextrb  $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07]
-          vpextrb  $7, %xmm2, (%eax)
-
-// CHECK: vextractps  $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07]
-          vextractps  $7, %xmm2, (%eax)
-
-// CHECK: vextractps  $7, %xmm2, %eax
-// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07]
-          vextractps  $7, %xmm2, %eax
-
-// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07]
-          vpinsrw  $7, %eax, %xmm2, %xmm5
-
-// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07]
-          vpinsrw  $7, (%eax), %xmm2, %xmm5
-
-// CHECK: vpinsrb  $7, %eax, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07]
-          vpinsrb  $7, %eax, %xmm2, %xmm5
-
-// CHECK: vpinsrb  $7, (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07]
-          vpinsrb  $7, (%eax), %xmm2, %xmm5
-
-// CHECK: vpinsrd  $7, %eax, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07]
-          vpinsrd  $7, %eax, %xmm2, %xmm5
-
-// CHECK: vpinsrd  $7, (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
-          vpinsrd  $7, (%eax), %xmm2, %xmm5
-
-// CHECK: vinsertps  $7, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07]
-          vinsertps  $7, %xmm2, %xmm5, %xmm1
-
-// CHECK: vinsertps  $7, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07]
-          vinsertps  $7, (%eax), %xmm5, %xmm1
-
-// CHECK: vptest  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea]
-          vptest  %xmm2, %xmm5
-
-// CHECK: vptest  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10]
-          vptest  (%eax), %xmm2
-
-// CHECK: vmovntdqa  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10]
-          vmovntdqa  (%eax), %xmm2
-
-// CHECK: vpcmpgtq  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca]
-          vpcmpgtq  %xmm2, %xmm5, %xmm1
-
-// CHECK: vpcmpgtq  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18]
-          vpcmpgtq  (%eax), %xmm5, %xmm3
-
-// CHECK: vpcmpistrm  $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07]
-          vpcmpistrm  $7, %xmm2, %xmm5
-
-// CHECK: vpcmpistrm  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07]
-          vpcmpistrm  $7, (%eax), %xmm5
-
-// CHECK: vpcmpestrm  $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07]
-          vpcmpestrm  $7, %xmm2, %xmm5
-
-// CHECK: vpcmpestrm  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07]
-          vpcmpestrm  $7, (%eax), %xmm5
-
-// CHECK: vpcmpistri  $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07]
-          vpcmpistri  $7, %xmm2, %xmm5
-
-// CHECK: vpcmpistri  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07]
-          vpcmpistri  $7, (%eax), %xmm5
-
-// CHECK: vpcmpestri  $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07]
-          vpcmpestri  $7, %xmm2, %xmm5
-
-// CHECK: vpcmpestri  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07]
-          vpcmpestri  $7, (%eax), %xmm5
-
-// CHECK: vaesimc  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea]
-          vaesimc  %xmm2, %xmm5
-
-// CHECK: vaesimc  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10]
-          vaesimc  (%eax), %xmm2
-
-// CHECK: vaesenc  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca]
-          vaesenc  %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesenc  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18]
-          vaesenc  (%eax), %xmm5, %xmm3
-
-// CHECK: vaesenclast  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca]
-          vaesenclast  %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesenclast  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18]
-          vaesenclast  (%eax), %xmm5, %xmm3
-
-// CHECK: vaesdec  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca]
-          vaesdec  %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesdec  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18]
-          vaesdec  (%eax), %xmm5, %xmm3
-
-// CHECK: vaesdeclast  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca]
-          vaesdeclast  %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesdeclast  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18]
-          vaesdeclast  (%eax), %xmm5, %xmm3
-
-// CHECK: vaeskeygenassist  $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07]
-          vaeskeygenassist  $7, %xmm2, %xmm5
-
-// CHECK: vaeskeygenassist  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07]
-          vaeskeygenassist  $7, (%eax), %xmm5
-
-// CHECK: vcmpps  $8, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08]
-          vcmpeq_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $9, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09]
-          vcmpngeps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $10, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a]
-          vcmpngtps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $11, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b]
-          vcmpfalseps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $12, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c]
-          vcmpneq_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $13, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d]
-          vcmpgeps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $14, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e]
-          vcmpgtps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $15, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f]
-          vcmptrueps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $16, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10]
-          vcmpeq_osps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $17, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11]
-          vcmplt_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $18, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12]
-          vcmple_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $19, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13]
-          vcmpunord_sps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $20, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14]
-          vcmpneq_usps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $21, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15]
-          vcmpnlt_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $22, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16]
-          vcmpnle_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $23, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17]
-          vcmpord_sps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $24, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18]
-          vcmpeq_usps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $25, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19]
-          vcmpnge_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $26, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a]
-          vcmpngt_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $27, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b]
-          vcmpfalse_osps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $28, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c]
-          vcmpneq_osps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $29, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d]
-          vcmpge_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $30, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e]
-          vcmpgt_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps  $31, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f]
-          vcmptrue_usps %xmm1, %xmm2, %xmm3
-
-// CHECK: vmovaps  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x28,0x10]
-          vmovaps  (%eax), %ymm2
-
-// CHECK: vmovaps  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x28,0xd1]
-          vmovaps  %ymm1, %ymm2
-
-// CHECK: vmovaps  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfc,0x29,0x08]
-          vmovaps  %ymm1, (%eax)
-
-// CHECK: vmovapd  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x28,0x10]
-          vmovapd  (%eax), %ymm2
-
-// CHECK: vmovapd  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x28,0xd1]
-          vmovapd  %ymm1, %ymm2
-
-// CHECK: vmovapd  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x29,0x08]
-          vmovapd  %ymm1, (%eax)
-
-// CHECK: vmovups  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x10,0x10]
-          vmovups  (%eax), %ymm2
-
-// CHECK: vmovups  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x10,0xd1]
-          vmovups  %ymm1, %ymm2
-
-// CHECK: vmovups  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfc,0x11,0x08]
-          vmovups  %ymm1, (%eax)
-
-// CHECK: vmovupd  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x10,0x10]
-          vmovupd  (%eax), %ymm2
-
-// CHECK: vmovupd  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x10,0xd1]
-          vmovupd  %ymm1, %ymm2
-
-// CHECK: vmovupd  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
-          vmovupd  %ymm1, (%eax)
-
-// CHECK: vunpckhps  %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xec,0x15,0xe1]
-          vunpckhps  %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpckhpd  %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xed,0x15,0xe1]
-          vunpckhpd  %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpcklps  %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xec,0x14,0xe1]
-          vunpcklps  %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpcklpd  %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xed,0x14,0xe1]
-          vunpcklpd  %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpckhps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc]
-          vunpckhps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc]
-          vunpckhpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vunpcklps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc]
-          vunpcklps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc]
-          vunpcklpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vmovntdq  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0xe7,0x08]
-          vmovntdq  %ymm1, (%eax)
-
-// CHECK: vmovntpd  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x2b,0x08]
-          vmovntpd  %ymm1, (%eax)
-
-// CHECK: vmovntps  %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfc,0x2b,0x08]
-          vmovntps  %ymm1, (%eax)
-
-// CHECK: vmovmskps  %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
-          vmovmskps  %xmm2, %eax
-
-// CHECK: vmovmskpd  %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
-          vmovmskpd  %xmm2, %eax
-
-// CHECK: vmaxps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2]
-          vmaxps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vmaxpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2]
-          vmaxpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vminps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2]
-          vminps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vminpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2]
-          vminpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vsubps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2]
-          vsubps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vsubpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2]
-          vsubpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vdivps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2]
-          vdivps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vdivpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2]
-          vdivpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vaddps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x58,0xf2]
-          vaddps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vaddpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x58,0xf2]
-          vaddpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vmulps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x59,0xf2]
-          vmulps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vmulpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x59,0xf2]
-          vmulpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vmaxps  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
-          vmaxps  (%eax), %ymm4, %ymm6
-
-// CHECK: vmaxpd  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
-          vmaxpd  (%eax), %ymm4, %ymm6
-
-// CHECK: vminps  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
-          vminps  (%eax), %ymm4, %ymm6
-
-// CHECK: vminpd  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
-          vminpd  (%eax), %ymm4, %ymm6
-
-// CHECK: vsubps  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
-          vsubps  (%eax), %ymm4, %ymm6
-
-// CHECK: vsubpd  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
-          vsubpd  (%eax), %ymm4, %ymm6
-
-// CHECK: vdivps  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
-          vdivps  (%eax), %ymm4, %ymm6
-
-// CHECK: vdivpd  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
-          vdivpd  (%eax), %ymm4, %ymm6
-
-// CHECK: vaddps  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
-          vaddps  (%eax), %ymm4, %ymm6
-
-// CHECK: vaddpd  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
-          vaddpd  (%eax), %ymm4, %ymm6
-
-// CHECK: vmulps  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
-          vmulps  (%eax), %ymm4, %ymm6
-
-// CHECK: vmulpd  (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
-          vmulpd  (%eax), %ymm4, %ymm6
-
-// CHECK: vsqrtpd  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x51,0xd1]
-          vsqrtpd  %ymm1, %ymm2
-
-// CHECK: vsqrtpd  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x51,0x10]
-          vsqrtpd  (%eax), %ymm2
-
-// CHECK: vsqrtps  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x51,0xd1]
-          vsqrtps  %ymm1, %ymm2
-
-// CHECK: vsqrtps  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x51,0x10]
-          vsqrtps  (%eax), %ymm2
-
-// CHECK: vrsqrtps  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x52,0xd1]
-          vrsqrtps  %ymm1, %ymm2
-
-// CHECK: vrsqrtps  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x52,0x10]
-          vrsqrtps  (%eax), %ymm2
-
-// CHECK: vrcpps  %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x53,0xd1]
-          vrcpps  %ymm1, %ymm2
-
-// CHECK: vrcpps  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x53,0x10]
-          vrcpps  (%eax), %ymm2
-
-// CHECK: vandps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x54,0xf2]
-          vandps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vandpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x54,0xf2]
-          vandpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vandps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc]
-          vandps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vandpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc]
-          vandpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vorps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x56,0xf2]
-          vorps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vorpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x56,0xf2]
-          vorpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc]
-          vorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc]
-          vorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vxorps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x57,0xf2]
-          vxorps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vxorpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x57,0xf2]
-          vxorpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vxorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc]
-          vxorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vxorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc]
-          vxorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vandnps  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x55,0xf2]
-          vandnps  %ymm2, %ymm4, %ymm6
-
-// CHECK: vandnpd  %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x55,0xf2]
-          vandnpd  %ymm2, %ymm4, %ymm6
-
-// CHECK: vandnps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc]
-          vandnps  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc]
-          vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vcvtps2pd  %xmm3, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3]
-          vcvtps2pd  %xmm3, %ymm2
-
-// CHECK: vcvtps2pd  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x5a,0x10]
-          vcvtps2pd  (%eax), %ymm2
-
-// CHECK: vcvtdq2pd  %xmm3, %ymm2
-// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3]
-          vcvtdq2pd  %xmm3, %ymm2
-
-// CHECK: vcvtdq2pd  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfe,0xe6,0x10]
-          vcvtdq2pd  (%eax), %ymm2
-
-// CHECK: vcvtdq2ps  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfc,0x5b,0xea]
-          vcvtdq2ps  %ymm2, %ymm5
-
-// CHECK: vcvtdq2ps  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x5b,0x10]
-          vcvtdq2ps  (%eax), %ymm2
-
-// CHECK: vcvtps2dq  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfd,0x5b,0xea]
-          vcvtps2dq  %ymm2, %ymm5
-
-// CHECK: vcvtps2dq  (%eax), %ymm5
-// CHECK: encoding: [0xc5,0xfd,0x5b,0x28]
-          vcvtps2dq  (%eax), %ymm5
-
-// CHECK: vcvttps2dq  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x5b,0xea]
-          vcvttps2dq  %ymm2, %ymm5
-
-// CHECK: vcvttps2dq  (%eax), %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x5b,0x28]
-          vcvttps2dq  (%eax), %ymm5
-
-// CHECK: vcvttpd2dq  %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
-          vcvttpd2dq  %xmm1, %xmm5
-
-// CHECK: vcvttpd2dq  %ymm2, %xmm5
-// CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
-          vcvttpd2dq  %ymm2, %xmm5
-
-// CHECK: vcvttpd2dqx  %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
-          vcvttpd2dqx  %xmm1, %xmm5
-
-// CHECK: vcvttpd2dqx  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xf9,0xe6,0x08]
-          vcvttpd2dqx  (%eax), %xmm1
-
-// CHECK: vcvttpd2dqy  %ymm2, %xmm1
-// CHECK: encoding: [0xc5,0xfd,0xe6,0xca]
-          vcvttpd2dqy  %ymm2, %xmm1
-
-// CHECK: vcvttpd2dqy  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
-          vcvttpd2dqy  (%eax), %xmm1
-
-// CHECK: vcvtpd2ps  %ymm2, %xmm5
-// CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
-          vcvtpd2ps  %ymm2, %xmm5
-
-// CHECK: vcvtpd2psx  %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
-          vcvtpd2psx  %xmm1, %xmm5
-
-// CHECK: vcvtpd2psx  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xf9,0x5a,0x08]
-          vcvtpd2psx  (%eax), %xmm1
-
-// CHECK: vcvtpd2psy  %ymm2, %xmm1
-// CHECK: encoding: [0xc5,0xfd,0x5a,0xca]
-          vcvtpd2psy  %ymm2, %xmm1
-
-// CHECK: vcvtpd2psy  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
-          vcvtpd2psy  (%eax), %xmm1
-
-// CHECK: vcvtpd2dq  %ymm2, %xmm5
-// CHECK: encoding: [0xc5,0xff,0xe6,0xea]
-          vcvtpd2dq  %ymm2, %xmm5
-
-// CHECK: vcvtpd2dqy  %ymm2, %xmm1
-// CHECK: encoding: [0xc5,0xff,0xe6,0xca]
-          vcvtpd2dqy  %ymm2, %xmm1
-
-// CHECK: vcvtpd2dqy  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xff,0xe6,0x08]
-          vcvtpd2dqy  (%eax), %xmm1
-
-// CHECK: vcvtpd2dqx  %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
-          vcvtpd2dqx  %xmm1, %xmm5
-
-// CHECK: vcvtpd2dqx  (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfb,0xe6,0x08]
-          vcvtpd2dqx  (%eax), %xmm1
-
-// CHECK: vcmpps  $0, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00]
-          vcmpeqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $2, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02]
-          vcmpleps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $1, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01]
-          vcmpltps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $4, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04]
-          vcmpneqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $6, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06]
-          vcmpnleps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $5, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05]
-          vcmpnltps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $7, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07]
-          vcmpordps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $3, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03]
-          vcmpunordps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00]
-          vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02]
-          vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01]
-          vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04]
-          vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06]
-          vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05]
-          vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
-// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2
-
-// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03]
-          vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $0, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00]
-          vcmpeqpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $2, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02]
-          vcmplepd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $1, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01]
-          vcmpltpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $4, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04]
-          vcmpneqpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $6, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06]
-          vcmpnlepd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $5, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05]
-          vcmpnltpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $7, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07]
-          vcmpordpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $3, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03]
-          vcmpunordpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00]
-          vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02]
-          vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01]
-          vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04]
-          vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06]
-          vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05]
-          vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
-// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2
-
-// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03]
-          vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps  $8, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08]
-          vcmpeq_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $9, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09]
-          vcmpngeps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $10, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a]
-          vcmpngtps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $11, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b]
-          vcmpfalseps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $12, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c]
-          vcmpneq_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $13, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d]
-          vcmpgeps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $14, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e]
-          vcmpgtps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $15, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f]
-          vcmptrueps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $16, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10]
-          vcmpeq_osps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $17, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11]
-          vcmplt_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $18, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12]
-          vcmple_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $19, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13]
-          vcmpunord_sps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $20, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14]
-          vcmpneq_usps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $21, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15]
-          vcmpnlt_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $22, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16]
-          vcmpnle_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $23, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17]
-          vcmpord_sps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $24, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18]
-          vcmpeq_usps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $25, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19]
-          vcmpnge_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $26, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a]
-          vcmpngt_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $27, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b]
-          vcmpfalse_osps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $28, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c]
-          vcmpneq_osps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $29, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d]
-          vcmpge_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $30, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e]
-          vcmpgt_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps  $31, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
-          vcmptrue_usps %ymm1, %ymm2, %ymm3
-
-// CHECK: vaddsubps  %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xef,0xd0,0xd9]
-          vaddsubps  %ymm1, %ymm2, %ymm3
-
-// CHECK: vaddsubps  (%eax), %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xf7,0xd0,0x10]
-          vaddsubps  (%eax), %ymm1, %ymm2
-
-// CHECK: vaddsubpd  %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xd0,0xd9]
-          vaddsubpd  %ymm1, %ymm2, %ymm3
-
-// CHECK: vaddsubpd  (%eax), %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xf5,0xd0,0x10]
-          vaddsubpd  (%eax), %ymm1, %ymm2
-
-// CHECK: vhaddps  %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xef,0x7c,0xd9]
-          vhaddps  %ymm1, %ymm2, %ymm3
-
-// CHECK: vhaddps  (%eax), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xef,0x7c,0x18]
-          vhaddps  (%eax), %ymm2, %ymm3
-
-// CHECK: vhaddpd  %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0x7c,0xd9]
-          vhaddpd  %ymm1, %ymm2, %ymm3
-
-// CHECK: vhaddpd  (%eax), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0x7c,0x18]
-          vhaddpd  (%eax), %ymm2, %ymm3
-
-// CHECK: vhsubps  %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xef,0x7d,0xd9]
-          vhsubps  %ymm1, %ymm2, %ymm3
-
-// CHECK: vhsubps  (%eax), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xef,0x7d,0x18]
-          vhsubps  (%eax), %ymm2, %ymm3
-
-// CHECK: vhsubpd  %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0x7d,0xd9]
-          vhsubpd  %ymm1, %ymm2, %ymm3
-
-// CHECK: vhsubpd  (%eax), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0x7d,0x18]
-          vhsubpd  (%eax), %ymm2, %ymm3
-
-// CHECK: vblendps  $3, %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03]
-          vblendps  $3, %ymm2, %ymm5, %ymm1
-
-// CHECK: vblendps  $3, (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03]
-          vblendps  $3, (%eax), %ymm5, %ymm1
-
-// CHECK: vblendpd  $3, %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03]
-          vblendpd  $3, %ymm2, %ymm5, %ymm1
-
-// CHECK: vblendpd  $3, (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03]
-          vblendpd  $3, (%eax), %ymm5, %ymm1
-
-// CHECK: vdpps  $3, %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03]
-          vdpps  $3, %ymm2, %ymm5, %ymm1
-
-// CHECK: vdpps  $3, (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03]
-          vdpps  $3, (%eax), %ymm5, %ymm1
-
-// CHECK: vbroadcastf128  (%eax), %ymm2
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10]
-          vbroadcastf128  (%eax), %ymm2
-
-// CHECK: vbroadcastsd  (%eax), %ymm2
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10]
-          vbroadcastsd  (%eax), %ymm2
-
-// CHECK: vbroadcastss  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10]
-          vbroadcastss  (%eax), %xmm2
-
-// CHECK: vbroadcastss  (%eax), %ymm2
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10]
-          vbroadcastss  (%eax), %ymm2
-
-// CHECK: vinsertf128  $7, %xmm2, %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07]
-          vinsertf128  $7, %xmm2, %ymm2, %ymm5
-
-// CHECK: vinsertf128  $7, (%eax), %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07]
-          vinsertf128  $7, (%eax), %ymm2, %ymm5
-
-// CHECK: vextractf128  $7, %ymm2, %xmm2
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07]
-          vextractf128  $7, %ymm2, %xmm2
-
-// CHECK: vextractf128  $7, %ymm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07]
-          vextractf128  $7, %ymm2, (%eax)
-
-// CHECK: vmaskmovpd  %xmm2, %xmm5, (%eax)
-// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10]
-          vmaskmovpd  %xmm2, %xmm5, (%eax)
-
-// CHECK: vmaskmovpd  %ymm2, %ymm5, (%eax)
-// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10]
-          vmaskmovpd  %ymm2, %ymm5, (%eax)
-
-// CHECK: vmaskmovpd  (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28]
-          vmaskmovpd  (%eax), %xmm2, %xmm5
-
-// CHECK: vmaskmovpd  (%eax), %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28]
-          vmaskmovpd  (%eax), %ymm2, %ymm5
-
-// CHECK: vmaskmovps  %xmm2, %xmm5, (%eax)
-// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10]
-          vmaskmovps  %xmm2, %xmm5, (%eax)
-
-// CHECK: vmaskmovps  %ymm2, %ymm5, (%eax)
-// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10]
-          vmaskmovps  %ymm2, %ymm5, (%eax)
-
-// CHECK: vmaskmovps  (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28]
-          vmaskmovps  (%eax), %xmm2, %xmm5
-
-// CHECK: vmaskmovps  (%eax), %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28]
-          vmaskmovps  (%eax), %ymm2, %ymm5
-
-// CHECK: vpermilps  $7, %xmm1, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07]
-          vpermilps  $7, %xmm1, %xmm5
-
-// CHECK: vpermilps  $7, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07]
-          vpermilps  $7, %ymm5, %ymm1
-
-// CHECK: vpermilps  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07]
-          vpermilps  $7, (%eax), %xmm5
-
-// CHECK: vpermilps  $7, (%eax), %ymm5
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07]
-          vpermilps  $7, (%eax), %ymm5
-
-// CHECK: vpermilps  %xmm1, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9]
-          vpermilps  %xmm1, %xmm5, %xmm1
-
-// CHECK: vpermilps  %ymm1, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9]
-          vpermilps  %ymm1, %ymm5, %ymm1
-
-// CHECK: vpermilps  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18]
-          vpermilps  (%eax), %xmm5, %xmm3
-
-// CHECK: vpermilps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08]
-          vpermilps  (%eax), %ymm5, %ymm1
-
-// CHECK: vpermilpd  $7, %xmm1, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07]
-          vpermilpd  $7, %xmm1, %xmm5
-
-// CHECK: vpermilpd  $7, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07]
-          vpermilpd  $7, %ymm5, %ymm1
-
-// CHECK: vpermilpd  $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07]
-          vpermilpd  $7, (%eax), %xmm5
-
-// CHECK: vpermilpd  $7, (%eax), %ymm5
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07]
-          vpermilpd  $7, (%eax), %ymm5
-
-// CHECK: vpermilpd  %xmm1, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9]
-          vpermilpd  %xmm1, %xmm5, %xmm1
-
-// CHECK: vpermilpd  %ymm1, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9]
-          vpermilpd  %ymm1, %ymm5, %ymm1
-
-// CHECK: vpermilpd  (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18]
-          vpermilpd  (%eax), %xmm5, %xmm3
-
-// CHECK: vpermilpd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08]
-          vpermilpd  (%eax), %ymm5, %ymm1
-
-// CHECK: vperm2f128  $7, %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07]
-          vperm2f128  $7, %ymm2, %ymm5, %ymm1
-
-// CHECK: vperm2f128  $7, (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07]
-          vperm2f128  $7, (%eax), %ymm5, %ymm1
-
-// CHECK: vzeroall
-// CHECK: encoding: [0xc5,0xfc,0x77]
-          vzeroall
-
-// CHECK: vzeroupper
-// CHECK: encoding: [0xc5,0xf8,0x77]
-          vzeroupper
-
-// CHECK: vcvtsd2si  %xmm4, %ecx
-// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
-          vcvtsd2si  %xmm4, %ecx
-
-// CHECK: vcvtsd2si  (%ecx), %ecx
-// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2si  (%ecx), %ecx
-
-// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
-// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
-          vcvtsi2sdl  (%ebp), %xmm0, %xmm7
-
-// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
-// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
-          vcvtsi2sdl  (%esp), %xmm0, %xmm7
-
-// CHECK: vlddqu  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xff,0xf0,0x10]
-          vlddqu  (%eax), %ymm2
-
-// CHECK: vmovddup  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xff,0x12,0xea]
-          vmovddup  %ymm2, %ymm5
-
-// CHECK: vmovddup  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xff,0x12,0x10]
-          vmovddup  (%eax), %ymm2
-
-// CHECK: vmovdqa  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfd,0x6f,0xea]
-          vmovdqa  %ymm2, %ymm5
-
-// CHECK: vmovdqa  %ymm2, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x7f,0x10]
-          vmovdqa  %ymm2, (%eax)
-
-// CHECK: vmovdqa  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x6f,0x10]
-          vmovdqa  (%eax), %ymm2
-
-// CHECK: vmovdqu  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x6f,0xea]
-          vmovdqu  %ymm2, %ymm5
-
-// CHECK: vmovdqu  %ymm2, (%eax)
-// CHECK: encoding: [0xc5,0xfe,0x7f,0x10]
-          vmovdqu  %ymm2, (%eax)
-
-// CHECK: vmovdqu  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfe,0x6f,0x10]
-          vmovdqu  (%eax), %ymm2
-
-// CHECK: vmovshdup  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x16,0xea]
-          vmovshdup  %ymm2, %ymm5
-
-// CHECK: vmovshdup  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfe,0x16,0x10]
-          vmovshdup  (%eax), %ymm2
-
-// CHECK: vmovsldup  %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x12,0xea]
-          vmovsldup  %ymm2, %ymm5
-
-// CHECK: vmovsldup  (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfe,0x12,0x10]
-          vmovsldup  (%eax), %ymm2
-
-// CHECK: vptest  %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea]
-          vptest  %ymm2, %ymm5
-
-// CHECK: vptest  (%eax), %ymm2
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10]
-          vptest  (%eax), %ymm2
-
-// CHECK: vroundpd  $7, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07]
-          vroundpd  $7, %ymm5, %ymm1
-
-// CHECK: vroundpd  $7, (%eax), %ymm5
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07]
-          vroundpd  $7, (%eax), %ymm5
-
-// CHECK: vroundps  $7, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07]
-          vroundps  $7, %ymm5, %ymm1
-
-// CHECK: vroundps  $7, (%eax), %ymm5
-// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07]
-          vroundps  $7, (%eax), %ymm5
-
-// CHECK: vshufpd  $7, %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07]
-          vshufpd  $7, %ymm2, %ymm5, %ymm1
-
-// CHECK: vshufpd  $7, (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07]
-          vshufpd  $7, (%eax), %ymm5, %ymm1
-
-// CHECK: vshufps  $7, %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07]
-          vshufps  $7, %ymm2, %ymm5, %ymm1
-
-// CHECK: vshufps  $7, (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07]
-          vshufps  $7, (%eax), %ymm5, %ymm1
-
-// CHECK: vtestpd  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea]
-          vtestpd  %xmm2, %xmm5
-
-// CHECK: vtestpd  %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea]
-          vtestpd  %ymm2, %ymm5
-
-// CHECK: vtestpd  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10]
-          vtestpd  (%eax), %xmm2
-
-// CHECK: vtestpd  (%eax), %ymm2
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10]
-          vtestpd  (%eax), %ymm2
-
-// CHECK: vtestps  %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea]
-          vtestps  %xmm2, %xmm5
-
-// CHECK: vtestps  %ymm2, %ymm5
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea]
-          vtestps  %ymm2, %ymm5
-
-// CHECK: vtestps  (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10]
-          vtestps  (%eax), %xmm2
-
-// CHECK: vtestps  (%eax), %ymm2
-// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10]
-          vtestps  (%eax), %ymm2
-
-// CHECK: vblendvpd  %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2
-// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00]
-          vblendvpd  %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2
-
diff --git a/test/MC/AsmParser/X86/x86_32-bit.s b/test/MC/AsmParser/X86/x86_32-bit.s
deleted file mode 100644
index ca0b26bef62a..000000000000
--- a/test/MC/AsmParser/X86/x86_32-bit.s
+++ /dev/null
@@ -1,1631 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
-// XFAIL: *
-
-// CHECK: 	movb	$127, 3735928559(%ebx,%ecx,8)
-        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movw	$31438, 3735928559(%ebx,%ecx,8)
-        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movl	$324478056, 3735928559(%ebx,%ecx,8)
-        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movsbl	3735928559(%ebx,%ecx,8), %ecx
-        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movswl	3735928559(%ebx,%ecx,8), %ecx
-        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movzbl	3735928559(%ebx,%ecx,8), %ecx  # NOREX
-        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movzwl	3735928559(%ebx,%ecx,8), %ecx
-        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	pushl	3735928559(%ebx,%ecx,8)
-        	pushl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	popl	3735928559(%ebx,%ecx,8)
-        	popl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	lahf
-        	lahf
-
-// CHECK: 	sahf
-        	sahf
-
-// CHECK: 	addb	$254, 3735928559(%ebx,%ecx,8)
-        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addb	$127, 3735928559(%ebx,%ecx,8)
-        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addw	$31438, 3735928559(%ebx,%ecx,8)
-        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addl	$324478056, 3735928559(%ebx,%ecx,8)
-        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	incl	3735928559(%ebx,%ecx,8)
-        	incl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subb	$254, 3735928559(%ebx,%ecx,8)
-        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subb	$127, 3735928559(%ebx,%ecx,8)
-        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subw	$31438, 3735928559(%ebx,%ecx,8)
-        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subl	$324478056, 3735928559(%ebx,%ecx,8)
-        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	decl	3735928559(%ebx,%ecx,8)
-        	decl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbw	$31438, 3735928559(%ebx,%ecx,8)
-        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbl	$324478056, 3735928559(%ebx,%ecx,8)
-        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpb	$254, 3735928559(%ebx,%ecx,8)
-        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpb	$127, 3735928559(%ebx,%ecx,8)
-        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpw	$31438, 3735928559(%ebx,%ecx,8)
-        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpl	$324478056, 3735928559(%ebx,%ecx,8)
-        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testb	$127, 3735928559(%ebx,%ecx,8)
-        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testw	$31438, 3735928559(%ebx,%ecx,8)
-        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testl	$324478056, 3735928559(%ebx,%ecx,8)
-        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andb	$254, 3735928559(%ebx,%ecx,8)
-        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andb	$127, 3735928559(%ebx,%ecx,8)
-        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andw	$31438, 3735928559(%ebx,%ecx,8)
-        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andl	$324478056, 3735928559(%ebx,%ecx,8)
-        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orb	$254, 3735928559(%ebx,%ecx,8)
-        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orb	$127, 3735928559(%ebx,%ecx,8)
-        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orw	$31438, 3735928559(%ebx,%ecx,8)
-        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orl	$324478056, 3735928559(%ebx,%ecx,8)
-        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorb	$254, 3735928559(%ebx,%ecx,8)
-        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorb	$127, 3735928559(%ebx,%ecx,8)
-        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorw	$31438, 3735928559(%ebx,%ecx,8)
-        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorl	$324478056, 3735928559(%ebx,%ecx,8)
-        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcb	$254, 3735928559(%ebx,%ecx,8)
-        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcb	$127, 3735928559(%ebx,%ecx,8)
-        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcw	$31438, 3735928559(%ebx,%ecx,8)
-        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcl	$324478056, 3735928559(%ebx,%ecx,8)
-        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	negl	3735928559(%ebx,%ecx,8)
-        	negl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	notl	3735928559(%ebx,%ecx,8)
-        	notl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cbtw
-        	cbtw
-
-// CHECK: 	cwtl
-        	cwtl
-
-// CHECK: 	cwtd
-        	cwtd
-
-// CHECK: 	cltd
-        	cltd
-
-// CHECK: 	mull	3735928559(%ebx,%ecx,8)
-        	mull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	imull	3735928559(%ebx,%ecx,8)
-        	imull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	divl	3735928559(%ebx,%ecx,8)
-        	divl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	idivl	3735928559(%ebx,%ecx,8)
-        	idivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	roll	$0, 3735928559(%ebx,%ecx,8)
-        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rolb	$127, 3735928559(%ebx,%ecx,8)
-        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	roll	3735928559(%ebx,%ecx,8)
-        	roll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rorl	$0, 3735928559(%ebx,%ecx,8)
-        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rorb	$127, 3735928559(%ebx,%ecx,8)
-        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rorl	3735928559(%ebx,%ecx,8)
-        	rorl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
-        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
-        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shll	3735928559(%ebx,%ecx,8)
-        	shll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shrl	$0, 3735928559(%ebx,%ecx,8)
-        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shrb	$127, 3735928559(%ebx,%ecx,8)
-        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shrl	3735928559(%ebx,%ecx,8)
-        	shrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sarl	$0, 3735928559(%ebx,%ecx,8)
-        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sarb	$127, 3735928559(%ebx,%ecx,8)
-        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sarl	3735928559(%ebx,%ecx,8)
-        	sarl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	call	*%ecx
-        	call	*%ecx
-
-// CHECK: 	call	*3735928559(%ebx,%ecx,8)
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	call	*3735928559(%ebx,%ecx,8)
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	jmp	*3735928559(%ebx,%ecx,8)  # TAILCALL
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	jmp	*3735928559(%ebx,%ecx,8)  # TAILCALL
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ljmpl	*3735928559(%ebx,%ecx,8)
-        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	lret
-        	lret
-
-// CHECK: 	leave
-        	leave
-
-// CHECK: 	seto	%bl
-        	seto	%bl
-
-// CHECK: 	seto	3735928559(%ebx,%ecx,8)
-        	seto	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setno	%bl
-        	setno	%bl
-
-// CHECK: 	setno	3735928559(%ebx,%ecx,8)
-        	setno	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setb	%bl
-        	setb	%bl
-
-// CHECK: 	setb	3735928559(%ebx,%ecx,8)
-        	setb	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setae	%bl
-        	setae	%bl
-
-// CHECK: 	setae	3735928559(%ebx,%ecx,8)
-        	setae	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sete	%bl
-        	sete	%bl
-
-// CHECK: 	sete	3735928559(%ebx,%ecx,8)
-        	sete	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setne	%bl
-        	setne	%bl
-
-// CHECK: 	setne	3735928559(%ebx,%ecx,8)
-        	setne	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setbe	%bl
-        	setbe	%bl
-
-// CHECK: 	setbe	3735928559(%ebx,%ecx,8)
-        	setbe	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	seta	%bl
-        	seta	%bl
-
-// CHECK: 	seta	3735928559(%ebx,%ecx,8)
-        	seta	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sets	%bl
-        	sets	%bl
-
-// CHECK: 	sets	3735928559(%ebx,%ecx,8)
-        	sets	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setns	%bl
-        	setns	%bl
-
-// CHECK: 	setns	3735928559(%ebx,%ecx,8)
-        	setns	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setp	%bl
-        	setp	%bl
-
-// CHECK: 	setp	3735928559(%ebx,%ecx,8)
-        	setp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setnp	%bl
-        	setnp	%bl
-
-// CHECK: 	setnp	3735928559(%ebx,%ecx,8)
-        	setnp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setl	%bl
-        	setl	%bl
-
-// CHECK: 	setl	3735928559(%ebx,%ecx,8)
-        	setl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setge	%bl
-        	setge	%bl
-
-// CHECK: 	setge	3735928559(%ebx,%ecx,8)
-        	setge	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setle	%bl
-        	setle	%bl
-
-// CHECK: 	setle	3735928559(%ebx,%ecx,8)
-        	setle	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setg	%bl
-        	setg	%bl
-
-// CHECK: 	setg	3735928559(%ebx,%ecx,8)
-        	setg	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	nopl	3735928559(%ebx,%ecx,8)
-        	nopl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	nop
-        	nop
-
-// CHECK: 	fldl	3735928559(%ebx,%ecx,8)
-        	fldl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fildl	3735928559(%ebx,%ecx,8)
-        	fildl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fildll	3735928559(%ebx,%ecx,8)
-        	fildll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fldt	3735928559(%ebx,%ecx,8)
-        	fldt	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fbld	3735928559(%ebx,%ecx,8)
-        	fbld	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fstl	3735928559(%ebx,%ecx,8)
-        	fstl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fistl	3735928559(%ebx,%ecx,8)
-        	fistl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fstpl	3735928559(%ebx,%ecx,8)
-        	fstpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fistpl	3735928559(%ebx,%ecx,8)
-        	fistpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fistpll	3735928559(%ebx,%ecx,8)
-        	fistpll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fstpt	3735928559(%ebx,%ecx,8)
-        	fstpt	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fbstp	3735928559(%ebx,%ecx,8)
-        	fbstp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ficoml	3735928559(%ebx,%ecx,8)
-        	ficoml	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ficompl	3735928559(%ebx,%ecx,8)
-        	ficompl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fucompp
-        	fucompp
-
-// CHECK: 	ftst
-        	ftst
-
-// CHECK: 	fld1
-        	fld1
-
-// CHECK: 	fldz
-        	fldz
-
-// CHECK: 	faddl	3735928559(%ebx,%ecx,8)
-        	faddl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fiaddl	3735928559(%ebx,%ecx,8)
-        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fsubl	3735928559(%ebx,%ecx,8)
-        	fsubl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fisubl	3735928559(%ebx,%ecx,8)
-        	fisubl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fsubrl	3735928559(%ebx,%ecx,8)
-        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fisubrl	3735928559(%ebx,%ecx,8)
-        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fmull	3735928559(%ebx,%ecx,8)
-        	fmull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fimull	3735928559(%ebx,%ecx,8)
-        	fimull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fdivl	3735928559(%ebx,%ecx,8)
-        	fdivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fidivl	3735928559(%ebx,%ecx,8)
-        	fidivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fdivrl	3735928559(%ebx,%ecx,8)
-        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fidivrl	3735928559(%ebx,%ecx,8)
-        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fsqrt
-        	fsqrt
-
-// CHECK: 	fsin
-        	fsin
-
-// CHECK: 	fcos
-        	fcos
-
-// CHECK: 	fchs
-        	fchs
-
-// CHECK: 	fabs
-        	fabs
-
-// CHECK: 	fldcw	3735928559(%ebx,%ecx,8)
-        	fldcw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fnstcw	3735928559(%ebx,%ecx,8)
-        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rdtsc
-        	rdtsc
-
-// CHECK: 	sysenter
-        	sysenter
-
-// CHECK: 	sysexit
-        	sysexit
-
-// CHECK: 	ud2
-        	ud2
-
-// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
-        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	clflush	3735928559(%ebx,%ecx,8)
-        	clflush	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	emms
-        	emms
-
-// CHECK: 	movd	%ecx, %mm3
-        	movd	%ecx,%mm3
-
-// CHECK: 	movd	3735928559(%ebx,%ecx,8), %mm3
-        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	movd	%ecx, %xmm5
-        	movd	%ecx,%xmm5
-
-// CHECK: 	movd	3735928559(%ebx,%ecx,8), %xmm5
-        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movd	%xmm5, %ecx
-        	movd	%xmm5,%ecx
-
-// CHECK: 	movd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movq	3735928559(%ebx,%ecx,8), %mm3
-        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	movq	%mm3, %mm3
-        	movq	%mm3,%mm3
-
-// CHECK: 	movq	%mm3, %mm3
-        	movq	%mm3,%mm3
-
-// CHECK: 	movq	%xmm5, %xmm5
-        	movq	%xmm5,%xmm5
-
-// CHECK: 	movq	%xmm5, %xmm5
-        	movq	%xmm5,%xmm5
-
-// CHECK: 	packssdw	%mm3, %mm3
-        	packssdw	%mm3,%mm3
-
-// CHECK: 	packssdw	%xmm5, %xmm5
-        	packssdw	%xmm5,%xmm5
-
-// CHECK: 	packsswb	%mm3, %mm3
-        	packsswb	%mm3,%mm3
-
-// CHECK: 	packsswb	%xmm5, %xmm5
-        	packsswb	%xmm5,%xmm5
-
-// CHECK: 	packuswb	%mm3, %mm3
-        	packuswb	%mm3,%mm3
-
-// CHECK: 	packuswb	%xmm5, %xmm5
-        	packuswb	%xmm5,%xmm5
-
-// CHECK: 	paddb	%mm3, %mm3
-        	paddb	%mm3,%mm3
-
-// CHECK: 	paddb	%xmm5, %xmm5
-        	paddb	%xmm5,%xmm5
-
-// CHECK: 	paddw	%mm3, %mm3
-        	paddw	%mm3,%mm3
-
-// CHECK: 	paddw	%xmm5, %xmm5
-        	paddw	%xmm5,%xmm5
-
-// CHECK: 	paddd	%mm3, %mm3
-        	paddd	%mm3,%mm3
-
-// CHECK: 	paddd	%xmm5, %xmm5
-        	paddd	%xmm5,%xmm5
-
-// CHECK: 	paddq	%mm3, %mm3
-        	paddq	%mm3,%mm3
-
-// CHECK: 	paddq	%xmm5, %xmm5
-        	paddq	%xmm5,%xmm5
-
-// CHECK: 	paddsb	%mm3, %mm3
-        	paddsb	%mm3,%mm3
-
-// CHECK: 	paddsb	%xmm5, %xmm5
-        	paddsb	%xmm5,%xmm5
-
-// CHECK: 	paddsw	%mm3, %mm3
-        	paddsw	%mm3,%mm3
-
-// CHECK: 	paddsw	%xmm5, %xmm5
-        	paddsw	%xmm5,%xmm5
-
-// CHECK: 	paddusb	%mm3, %mm3
-        	paddusb	%mm3,%mm3
-
-// CHECK: 	paddusb	%xmm5, %xmm5
-        	paddusb	%xmm5,%xmm5
-
-// CHECK: 	paddusw	%mm3, %mm3
-        	paddusw	%mm3,%mm3
-
-// CHECK: 	paddusw	%xmm5, %xmm5
-        	paddusw	%xmm5,%xmm5
-
-// CHECK: 	pand	%mm3, %mm3
-        	pand	%mm3,%mm3
-
-// CHECK: 	pand	%xmm5, %xmm5
-        	pand	%xmm5,%xmm5
-
-// CHECK: 	pandn	%mm3, %mm3
-        	pandn	%mm3,%mm3
-
-// CHECK: 	pandn	%xmm5, %xmm5
-        	pandn	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqb	%mm3, %mm3
-        	pcmpeqb	%mm3,%mm3
-
-// CHECK: 	pcmpeqb	%xmm5, %xmm5
-        	pcmpeqb	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqw	%mm3, %mm3
-        	pcmpeqw	%mm3,%mm3
-
-// CHECK: 	pcmpeqw	%xmm5, %xmm5
-        	pcmpeqw	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqd	%mm3, %mm3
-        	pcmpeqd	%mm3,%mm3
-
-// CHECK: 	pcmpeqd	%xmm5, %xmm5
-        	pcmpeqd	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtb	%mm3, %mm3
-        	pcmpgtb	%mm3,%mm3
-
-// CHECK: 	pcmpgtb	%xmm5, %xmm5
-        	pcmpgtb	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtw	%mm3, %mm3
-        	pcmpgtw	%mm3,%mm3
-
-// CHECK: 	pcmpgtw	%xmm5, %xmm5
-        	pcmpgtw	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtd	%mm3, %mm3
-        	pcmpgtd	%mm3,%mm3
-
-// CHECK: 	pcmpgtd	%xmm5, %xmm5
-        	pcmpgtd	%xmm5,%xmm5
-
-// CHECK: 	pmaddwd	%mm3, %mm3
-        	pmaddwd	%mm3,%mm3
-
-// CHECK: 	pmaddwd	%xmm5, %xmm5
-        	pmaddwd	%xmm5,%xmm5
-
-// CHECK: 	pmulhw	%mm3, %mm3
-        	pmulhw	%mm3,%mm3
-
-// CHECK: 	pmulhw	%xmm5, %xmm5
-        	pmulhw	%xmm5,%xmm5
-
-// CHECK: 	pmullw	%mm3, %mm3
-        	pmullw	%mm3,%mm3
-
-// CHECK: 	pmullw	%xmm5, %xmm5
-        	pmullw	%xmm5,%xmm5
-
-// CHECK: 	por	%mm3, %mm3
-        	por	%mm3,%mm3
-
-// CHECK: 	por	%xmm5, %xmm5
-        	por	%xmm5,%xmm5
-
-// CHECK: 	psllw	%mm3, %mm3
-        	psllw	%mm3,%mm3
-
-// CHECK: 	psllw	%xmm5, %xmm5
-        	psllw	%xmm5,%xmm5
-
-// CHECK: 	psllw	$127, %mm3
-        	psllw	$0x7f,%mm3
-
-// CHECK: 	psllw	$127, %xmm5
-        	psllw	$0x7f,%xmm5
-
-// CHECK: 	pslld	%mm3, %mm3
-        	pslld	%mm3,%mm3
-
-// CHECK: 	pslld	%xmm5, %xmm5
-        	pslld	%xmm5,%xmm5
-
-// CHECK: 	pslld	$127, %mm3
-        	pslld	$0x7f,%mm3
-
-// CHECK: 	pslld	$127, %xmm5
-        	pslld	$0x7f,%xmm5
-
-// CHECK: 	psllq	%mm3, %mm3
-        	psllq	%mm3,%mm3
-
-// CHECK: 	psllq	%xmm5, %xmm5
-        	psllq	%xmm5,%xmm5
-
-// CHECK: 	psllq	$127, %mm3
-        	psllq	$0x7f,%mm3
-
-// CHECK: 	psllq	$127, %xmm5
-        	psllq	$0x7f,%xmm5
-
-// CHECK: 	psraw	%mm3, %mm3
-        	psraw	%mm3,%mm3
-
-// CHECK: 	psraw	%xmm5, %xmm5
-        	psraw	%xmm5,%xmm5
-
-// CHECK: 	psraw	$127, %mm3
-        	psraw	$0x7f,%mm3
-
-// CHECK: 	psraw	$127, %xmm5
-        	psraw	$0x7f,%xmm5
-
-// CHECK: 	psrad	%mm3, %mm3
-        	psrad	%mm3,%mm3
-
-// CHECK: 	psrad	%xmm5, %xmm5
-        	psrad	%xmm5,%xmm5
-
-// CHECK: 	psrad	$127, %mm3
-        	psrad	$0x7f,%mm3
-
-// CHECK: 	psrad	$127, %xmm5
-        	psrad	$0x7f,%xmm5
-
-// CHECK: 	psrlw	%mm3, %mm3
-        	psrlw	%mm3,%mm3
-
-// CHECK: 	psrlw	%xmm5, %xmm5
-        	psrlw	%xmm5,%xmm5
-
-// CHECK: 	psrlw	$127, %mm3
-        	psrlw	$0x7f,%mm3
-
-// CHECK: 	psrlw	$127, %xmm5
-        	psrlw	$0x7f,%xmm5
-
-// CHECK: 	psrld	%mm3, %mm3
-        	psrld	%mm3,%mm3
-
-// CHECK: 	psrld	%xmm5, %xmm5
-        	psrld	%xmm5,%xmm5
-
-// CHECK: 	psrld	$127, %mm3
-        	psrld	$0x7f,%mm3
-
-// CHECK: 	psrld	$127, %xmm5
-        	psrld	$0x7f,%xmm5
-
-// CHECK: 	psrlq	%mm3, %mm3
-        	psrlq	%mm3,%mm3
-
-// CHECK: 	psrlq	%xmm5, %xmm5
-        	psrlq	%xmm5,%xmm5
-
-// CHECK: 	psrlq	$127, %mm3
-        	psrlq	$0x7f,%mm3
-
-// CHECK: 	psrlq	$127, %xmm5
-        	psrlq	$0x7f,%xmm5
-
-// CHECK: 	psubb	%mm3, %mm3
-        	psubb	%mm3,%mm3
-
-// CHECK: 	psubb	%xmm5, %xmm5
-        	psubb	%xmm5,%xmm5
-
-// CHECK: 	psubw	%mm3, %mm3
-        	psubw	%mm3,%mm3
-
-// CHECK: 	psubw	%xmm5, %xmm5
-        	psubw	%xmm5,%xmm5
-
-// CHECK: 	psubd	%mm3, %mm3
-        	psubd	%mm3,%mm3
-
-// CHECK: 	psubd	%xmm5, %xmm5
-        	psubd	%xmm5,%xmm5
-
-// CHECK: 	psubq	%mm3, %mm3
-        	psubq	%mm3,%mm3
-
-// CHECK: 	psubq	%xmm5, %xmm5
-        	psubq	%xmm5,%xmm5
-
-// CHECK: 	psubsb	%mm3, %mm3
-        	psubsb	%mm3,%mm3
-
-// CHECK: 	psubsb	%xmm5, %xmm5
-        	psubsb	%xmm5,%xmm5
-
-// CHECK: 	psubsw	%mm3, %mm3
-        	psubsw	%mm3,%mm3
-
-// CHECK: 	psubsw	%xmm5, %xmm5
-        	psubsw	%xmm5,%xmm5
-
-// CHECK: 	psubusb	%mm3, %mm3
-        	psubusb	%mm3,%mm3
-
-// CHECK: 	psubusb	%xmm5, %xmm5
-        	psubusb	%xmm5,%xmm5
-
-// CHECK: 	psubusw	%mm3, %mm3
-        	psubusw	%mm3,%mm3
-
-// CHECK: 	psubusw	%xmm5, %xmm5
-        	psubusw	%xmm5,%xmm5
-
-// CHECK: 	punpckhbw	%mm3, %mm3
-        	punpckhbw	%mm3,%mm3
-
-// CHECK: 	punpckhbw	%xmm5, %xmm5
-        	punpckhbw	%xmm5,%xmm5
-
-// CHECK: 	punpckhwd	%mm3, %mm3
-        	punpckhwd	%mm3,%mm3
-
-// CHECK: 	punpckhwd	%xmm5, %xmm5
-        	punpckhwd	%xmm5,%xmm5
-
-// CHECK: 	punpckhdq	%mm3, %mm3
-        	punpckhdq	%mm3,%mm3
-
-// CHECK: 	punpckhdq	%xmm5, %xmm5
-        	punpckhdq	%xmm5,%xmm5
-
-// CHECK: 	punpcklbw	%mm3, %mm3
-        	punpcklbw	%mm3,%mm3
-
-// CHECK: 	punpcklbw	%xmm5, %xmm5
-        	punpcklbw	%xmm5,%xmm5
-
-// CHECK: 	punpcklwd	%mm3, %mm3
-        	punpcklwd	%mm3,%mm3
-
-// CHECK: 	punpcklwd	%xmm5, %xmm5
-        	punpcklwd	%xmm5,%xmm5
-
-// CHECK: 	punpckldq	%mm3, %mm3
-        	punpckldq	%mm3,%mm3
-
-// CHECK: 	punpckldq	%xmm5, %xmm5
-        	punpckldq	%xmm5,%xmm5
-
-// CHECK: 	pxor	%mm3, %mm3
-        	pxor	%mm3,%mm3
-
-// CHECK: 	pxor	%xmm5, %xmm5
-        	pxor	%xmm5,%xmm5
-
-// CHECK: 	addps	%xmm5, %xmm5
-        	addps	%xmm5,%xmm5
-
-// CHECK: 	addss	%xmm5, %xmm5
-        	addss	%xmm5,%xmm5
-
-// CHECK: 	andnps	%xmm5, %xmm5
-        	andnps	%xmm5,%xmm5
-
-// CHECK: 	andps	%xmm5, %xmm5
-        	andps	%xmm5,%xmm5
-
-// CHECK: 	cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpi2ps	%mm3, %xmm5
-        	cvtpi2ps	%mm3,%xmm5
-
-// CHECK: 	cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvtps2pi	%xmm5, %mm3
-        	cvtps2pi	%xmm5,%mm3
-
-// CHECK: 	cvtsi2ss	%ecx, %xmm5
-        	cvtsi2ss	%ecx,%xmm5
-
-// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvttps2pi	%xmm5, %mm3
-        	cvttps2pi	%xmm5,%mm3
-
-// CHECK: 	cvttss2si	3735928559(%ebx,%ecx,8), %ecx
-        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	cvttss2si	%xmm5, %ecx
-        	cvttss2si	%xmm5,%ecx
-
-// CHECK: 	divps	%xmm5, %xmm5
-        	divps	%xmm5,%xmm5
-
-// CHECK: 	divss	%xmm5, %xmm5
-        	divss	%xmm5,%xmm5
-
-// CHECK: 	ldmxcsr	3735928559(%ebx,%ecx,8)
-        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	maskmovq	%mm3, %mm3
-        	maskmovq	%mm3,%mm3
-
-// CHECK: 	maxps	%xmm5, %xmm5
-        	maxps	%xmm5,%xmm5
-
-// CHECK: 	maxss	%xmm5, %xmm5
-        	maxss	%xmm5,%xmm5
-
-// CHECK: 	minps	%xmm5, %xmm5
-        	minps	%xmm5,%xmm5
-
-// CHECK: 	minss	%xmm5, %xmm5
-        	minss	%xmm5,%xmm5
-
-// CHECK: 	movaps	3735928559(%ebx,%ecx,8), %xmm5
-        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movaps	%xmm5, %xmm5
-        	movaps	%xmm5,%xmm5
-
-// CHECK: 	movaps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movaps	%xmm5, %xmm5
-        	movaps	%xmm5,%xmm5
-
-// CHECK: 	movhlps	%xmm5, %xmm5
-        	movhlps	%xmm5,%xmm5
-
-// CHECK: 	movhps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movlhps	%xmm5, %xmm5
-        	movlhps	%xmm5,%xmm5
-
-// CHECK: 	movlps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movmskps	%xmm5, %ecx
-        	movmskps	%xmm5,%ecx
-
-// CHECK: 	movntps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movntq	%mm3, 3735928559(%ebx,%ecx,8)
-        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movss	3735928559(%ebx,%ecx,8), %xmm5
-        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movss	%xmm5, %xmm5
-        	movss	%xmm5,%xmm5
-
-// CHECK: 	movss	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movss	%xmm5, %xmm5
-        	movss	%xmm5,%xmm5
-
-// CHECK: 	movups	3735928559(%ebx,%ecx,8), %xmm5
-        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movups	%xmm5, %xmm5
-        	movups	%xmm5,%xmm5
-
-// CHECK: 	movups	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movups	%xmm5, %xmm5
-        	movups	%xmm5,%xmm5
-
-// CHECK: 	mulps	%xmm5, %xmm5
-        	mulps	%xmm5,%xmm5
-
-// CHECK: 	mulss	%xmm5, %xmm5
-        	mulss	%xmm5,%xmm5
-
-// CHECK: 	orps	%xmm5, %xmm5
-        	orps	%xmm5,%xmm5
-
-// CHECK: 	pavgb	%mm3, %mm3
-        	pavgb	%mm3,%mm3
-
-// CHECK: 	pavgb	%xmm5, %xmm5
-        	pavgb	%xmm5,%xmm5
-
-// CHECK: 	pavgw	%mm3, %mm3
-        	pavgw	%mm3,%mm3
-
-// CHECK: 	pavgw	%xmm5, %xmm5
-        	pavgw	%xmm5,%xmm5
-
-// CHECK: 	pmaxsw	%mm3, %mm3
-        	pmaxsw	%mm3,%mm3
-
-// CHECK: 	pmaxsw	%xmm5, %xmm5
-        	pmaxsw	%xmm5,%xmm5
-
-// CHECK: 	pmaxub	%mm3, %mm3
-        	pmaxub	%mm3,%mm3
-
-// CHECK: 	pmaxub	%xmm5, %xmm5
-        	pmaxub	%xmm5,%xmm5
-
-// CHECK: 	pminsw	%mm3, %mm3
-        	pminsw	%mm3,%mm3
-
-// CHECK: 	pminsw	%xmm5, %xmm5
-        	pminsw	%xmm5,%xmm5
-
-// CHECK: 	pminub	%mm3, %mm3
-        	pminub	%mm3,%mm3
-
-// CHECK: 	pminub	%xmm5, %xmm5
-        	pminub	%xmm5,%xmm5
-
-// CHECK: 	pmovmskb	%mm3, %ecx
-        	pmovmskb	%mm3,%ecx
-
-// CHECK: 	pmovmskb	%xmm5, %ecx
-        	pmovmskb	%xmm5,%ecx
-
-// CHECK: 	pmulhuw	%mm3, %mm3
-        	pmulhuw	%mm3,%mm3
-
-// CHECK: 	pmulhuw	%xmm5, %xmm5
-        	pmulhuw	%xmm5,%xmm5
-
-// CHECK: 	prefetchnta	3735928559(%ebx,%ecx,8)
-        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetcht0	3735928559(%ebx,%ecx,8)
-        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetcht1	3735928559(%ebx,%ecx,8)
-        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetcht2	3735928559(%ebx,%ecx,8)
-        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	psadbw	%mm3, %mm3
-        	psadbw	%mm3,%mm3
-
-// CHECK: 	psadbw	%xmm5, %xmm5
-        	psadbw	%xmm5,%xmm5
-
-// CHECK: 	rcpps	3735928559(%ebx,%ecx,8), %xmm5
-        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rcpps	%xmm5, %xmm5
-        	rcpps	%xmm5,%xmm5
-
-// CHECK: 	rcpss	3735928559(%ebx,%ecx,8), %xmm5
-        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rcpss	%xmm5, %xmm5
-        	rcpss	%xmm5,%xmm5
-
-// CHECK: 	rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
-        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rsqrtps	%xmm5, %xmm5
-        	rsqrtps	%xmm5,%xmm5
-
-// CHECK: 	rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
-        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rsqrtss	%xmm5, %xmm5
-        	rsqrtss	%xmm5,%xmm5
-
-// CHECK: 	sqrtps	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtps	%xmm5, %xmm5
-        	sqrtps	%xmm5,%xmm5
-
-// CHECK: 	sqrtss	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtss	%xmm5, %xmm5
-        	sqrtss	%xmm5,%xmm5
-
-// CHECK: 	stmxcsr	3735928559(%ebx,%ecx,8)
-        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subps	%xmm5, %xmm5
-        	subps	%xmm5,%xmm5
-
-// CHECK: 	subss	%xmm5, %xmm5
-        	subss	%xmm5,%xmm5
-
-// CHECK: 	ucomiss	3735928559(%ebx,%ecx,8), %xmm5
-        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	ucomiss	%xmm5, %xmm5
-        	ucomiss	%xmm5,%xmm5
-
-// CHECK: 	unpckhps	%xmm5, %xmm5
-        	unpckhps	%xmm5,%xmm5
-
-// CHECK: 	unpcklps	%xmm5, %xmm5
-        	unpcklps	%xmm5,%xmm5
-
-// CHECK: 	xorps	%xmm5, %xmm5
-        	xorps	%xmm5,%xmm5
-
-// CHECK: 	addpd	%xmm5, %xmm5
-        	addpd	%xmm5,%xmm5
-
-// CHECK: 	addsd	%xmm5, %xmm5
-        	addsd	%xmm5,%xmm5
-
-// CHECK: 	andnpd	%xmm5, %xmm5
-        	andnpd	%xmm5,%xmm5
-
-// CHECK: 	andpd	%xmm5, %xmm5
-        	andpd	%xmm5,%xmm5
-
-// CHECK: 	comisd	3735928559(%ebx,%ecx,8), %xmm5
-        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	comisd	%xmm5, %xmm5
-        	comisd	%xmm5,%xmm5
-
-// CHECK: 	cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpi2pd	%mm3, %xmm5
-        	cvtpi2pd	%mm3,%xmm5
-
-// CHECK: 	cvtsi2sd	%ecx, %xmm5
-        	cvtsi2sd	%ecx,%xmm5
-
-// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	divpd	%xmm5, %xmm5
-        	divpd	%xmm5,%xmm5
-
-// CHECK: 	divsd	%xmm5, %xmm5
-        	divsd	%xmm5,%xmm5
-
-// CHECK: 	maxpd	%xmm5, %xmm5
-        	maxpd	%xmm5,%xmm5
-
-// CHECK: 	maxsd	%xmm5, %xmm5
-        	maxsd	%xmm5,%xmm5
-
-// CHECK: 	minpd	%xmm5, %xmm5
-        	minpd	%xmm5,%xmm5
-
-// CHECK: 	minsd	%xmm5, %xmm5
-        	minsd	%xmm5,%xmm5
-
-// CHECK: 	movapd	3735928559(%ebx,%ecx,8), %xmm5
-        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movapd	%xmm5, %xmm5
-        	movapd	%xmm5,%xmm5
-
-// CHECK: 	movapd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movapd	%xmm5, %xmm5
-        	movapd	%xmm5,%xmm5
-
-// CHECK: 	movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movmskpd	%xmm5, %ecx
-        	movmskpd	%xmm5,%ecx
-
-// CHECK: 	movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movsd	3735928559(%ebx,%ecx,8), %xmm5
-        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movsd	%xmm5, %xmm5
-        	movsd	%xmm5,%xmm5
-
-// CHECK: 	movsd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movsd	%xmm5, %xmm5
-        	movsd	%xmm5,%xmm5
-
-// CHECK: 	movupd	3735928559(%ebx,%ecx,8), %xmm5
-        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movupd	%xmm5, %xmm5
-        	movupd	%xmm5,%xmm5
-
-// CHECK: 	movupd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movupd	%xmm5, %xmm5
-        	movupd	%xmm5,%xmm5
-
-// CHECK: 	mulpd	%xmm5, %xmm5
-        	mulpd	%xmm5,%xmm5
-
-// CHECK: 	mulsd	%xmm5, %xmm5
-        	mulsd	%xmm5,%xmm5
-
-// CHECK: 	orpd	%xmm5, %xmm5
-        	orpd	%xmm5,%xmm5
-
-// CHECK: 	sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtpd	%xmm5, %xmm5
-        	sqrtpd	%xmm5,%xmm5
-
-// CHECK: 	sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtsd	%xmm5, %xmm5
-        	sqrtsd	%xmm5,%xmm5
-
-// CHECK: 	subpd	%xmm5, %xmm5
-        	subpd	%xmm5,%xmm5
-
-// CHECK: 	subsd	%xmm5, %xmm5
-        	subsd	%xmm5,%xmm5
-
-// CHECK: 	ucomisd	3735928559(%ebx,%ecx,8), %xmm5
-        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	ucomisd	%xmm5, %xmm5
-        	ucomisd	%xmm5,%xmm5
-
-// CHECK: 	unpckhpd	%xmm5, %xmm5
-        	unpckhpd	%xmm5,%xmm5
-
-// CHECK: 	unpcklpd	%xmm5, %xmm5
-        	unpcklpd	%xmm5,%xmm5
-
-// CHECK: 	xorpd	%xmm5, %xmm5
-        	xorpd	%xmm5,%xmm5
-
-// CHECK: 	cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtdq2pd	%xmm5, %xmm5
-        	cvtdq2pd	%xmm5,%xmm5
-
-// CHECK: 	cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpd2dq	%xmm5, %xmm5
-        	cvtpd2dq	%xmm5,%xmm5
-
-// CHECK: 	cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtdq2ps	%xmm5, %xmm5
-        	cvtdq2ps	%xmm5,%xmm5
-
-// CHECK: 	cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvtpd2pi	%xmm5, %mm3
-        	cvtpd2pi	%xmm5,%mm3
-
-// CHECK: 	cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtps2dq	%xmm5, %xmm5
-        	cvtps2dq	%xmm5,%xmm5
-
-// CHECK: 	cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtsd2ss	%xmm5, %xmm5
-        	cvtsd2ss	%xmm5,%xmm5
-
-// CHECK: 	cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtss2sd	%xmm5, %xmm5
-        	cvtss2sd	%xmm5,%xmm5
-
-// CHECK: 	cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvttpd2pi	%xmm5, %mm3
-        	cvttpd2pi	%xmm5,%mm3
-
-// CHECK: 	cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
-        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	cvttsd2si	%xmm5, %ecx
-        	cvttsd2si	%xmm5,%ecx
-
-// CHECK: 	maskmovdqu	%xmm5, %xmm5
-        	maskmovdqu	%xmm5,%xmm5
-
-// CHECK: 	movdqa	3735928559(%ebx,%ecx,8), %xmm5
-        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movdqa	%xmm5, %xmm5
-        	movdqa	%xmm5,%xmm5
-
-// CHECK: 	movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movdqa	%xmm5, %xmm5
-        	movdqa	%xmm5,%xmm5
-
-// CHECK: 	movdqu	3735928559(%ebx,%ecx,8), %xmm5
-        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movdq2q	%xmm5, %mm3
-        	movdq2q	%xmm5,%mm3
-
-// CHECK: 	movq2dq	%mm3, %xmm5
-        	movq2dq	%mm3,%xmm5
-
-// CHECK: 	pmuludq	%mm3, %mm3
-        	pmuludq	%mm3,%mm3
-
-// CHECK: 	pmuludq	%xmm5, %xmm5
-        	pmuludq	%xmm5,%xmm5
-
-// CHECK: 	pslldq	$127, %xmm5
-        	pslldq	$0x7f,%xmm5
-
-// CHECK: 	psrldq	$127, %xmm5
-        	psrldq	$0x7f,%xmm5
-
-// CHECK: 	punpckhqdq	%xmm5, %xmm5
-        	punpckhqdq	%xmm5,%xmm5
-
-// CHECK: 	punpcklqdq	%xmm5, %xmm5
-        	punpcklqdq	%xmm5,%xmm5
-
-// CHECK: 	addsubpd	%xmm5, %xmm5
-        	addsubpd	%xmm5,%xmm5
-
-// CHECK: 	addsubps	%xmm5, %xmm5
-        	addsubps	%xmm5,%xmm5
-
-// CHECK: 	haddpd	%xmm5, %xmm5
-        	haddpd	%xmm5,%xmm5
-
-// CHECK: 	haddps	%xmm5, %xmm5
-        	haddps	%xmm5,%xmm5
-
-// CHECK: 	hsubpd	%xmm5, %xmm5
-        	hsubpd	%xmm5,%xmm5
-
-// CHECK: 	hsubps	%xmm5, %xmm5
-        	hsubps	%xmm5,%xmm5
-
-// CHECK: 	lddqu	3735928559(%ebx,%ecx,8), %xmm5
-        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movddup	3735928559(%ebx,%ecx,8), %xmm5
-        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movddup	%xmm5, %xmm5
-        	movddup	%xmm5,%xmm5
-
-// CHECK: 	movshdup	3735928559(%ebx,%ecx,8), %xmm5
-        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movshdup	%xmm5, %xmm5
-        	movshdup	%xmm5,%xmm5
-
-// CHECK: 	movsldup	3735928559(%ebx,%ecx,8), %xmm5
-        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movsldup	%xmm5, %xmm5
-        	movsldup	%xmm5,%xmm5
-
-// CHECK: 	phaddw	%mm3, %mm3
-        	phaddw	%mm3,%mm3
-
-// CHECK: 	phaddw	%xmm5, %xmm5
-        	phaddw	%xmm5,%xmm5
-
-// CHECK: 	phaddd	%mm3, %mm3
-        	phaddd	%mm3,%mm3
-
-// CHECK: 	phaddd	%xmm5, %xmm5
-        	phaddd	%xmm5,%xmm5
-
-// CHECK: 	phaddsw	%mm3, %mm3
-        	phaddsw	%mm3,%mm3
-
-// CHECK: 	phaddsw	%xmm5, %xmm5
-        	phaddsw	%xmm5,%xmm5
-
-// CHECK: 	phsubw	%mm3, %mm3
-        	phsubw	%mm3,%mm3
-
-// CHECK: 	phsubw	%xmm5, %xmm5
-        	phsubw	%xmm5,%xmm5
-
-// CHECK: 	phsubd	%mm3, %mm3
-        	phsubd	%mm3,%mm3
-
-// CHECK: 	phsubd	%xmm5, %xmm5
-        	phsubd	%xmm5,%xmm5
-
-// CHECK: 	phsubsw	%mm3, %mm3
-        	phsubsw	%mm3,%mm3
-
-// CHECK: 	phsubsw	%xmm5, %xmm5
-        	phsubsw	%xmm5,%xmm5
-
-// CHECK: 	pmaddubsw	%mm3, %mm3
-        	pmaddubsw	%mm3,%mm3
-
-// CHECK: 	pmaddubsw	%xmm5, %xmm5
-        	pmaddubsw	%xmm5,%xmm5
-
-// CHECK: 	pmulhrsw	%mm3, %mm3
-        	pmulhrsw	%mm3,%mm3
-
-// CHECK: 	pmulhrsw	%xmm5, %xmm5
-        	pmulhrsw	%xmm5,%xmm5
-
-// CHECK: 	pshufb	%mm3, %mm3
-        	pshufb	%mm3,%mm3
-
-// CHECK: 	pshufb	%xmm5, %xmm5
-        	pshufb	%xmm5,%xmm5
-
-// CHECK: 	psignb	%mm3, %mm3
-        	psignb	%mm3,%mm3
-
-// CHECK: 	psignb	%xmm5, %xmm5
-        	psignb	%xmm5,%xmm5
-
-// CHECK: 	psignw	%mm3, %mm3
-        	psignw	%mm3,%mm3
-
-// CHECK: 	psignw	%xmm5, %xmm5
-        	psignw	%xmm5,%xmm5
-
-// CHECK: 	psignd	%mm3, %mm3
-        	psignd	%mm3,%mm3
-
-// CHECK: 	psignd	%xmm5, %xmm5
-        	psignd	%xmm5,%xmm5
-
-// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %mm3
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pabsb	%mm3, %mm3
-        	pabsb	%mm3,%mm3
-
-// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %xmm5
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pabsb	%xmm5, %xmm5
-        	pabsb	%xmm5,%xmm5
-
-// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %mm3
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pabsw	%mm3, %mm3
-        	pabsw	%mm3,%mm3
-
-// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %xmm5
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pabsw	%xmm5, %xmm5
-        	pabsw	%xmm5,%xmm5
-
-// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %mm3
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pabsd	%mm3, %mm3
-        	pabsd	%mm3,%mm3
-
-// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %xmm5
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pabsd	%xmm5, %xmm5
-        	pabsd	%xmm5,%xmm5
-
-// CHECK: 	femms
-        	femms
-
-// CHECK: 	packusdw	%xmm5, %xmm5
-        	packusdw	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqq	%xmm5, %xmm5
-        	pcmpeqq	%xmm5,%xmm5
-
-// CHECK: 	phminposuw	3735928559(%ebx,%ecx,8), %xmm5
-        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phminposuw	%xmm5, %xmm5
-        	phminposuw	%xmm5,%xmm5
-
-// CHECK: 	pmaxsb	%xmm5, %xmm5
-        	pmaxsb	%xmm5,%xmm5
-
-// CHECK: 	pmaxsd	%xmm5, %xmm5
-        	pmaxsd	%xmm5,%xmm5
-
-// CHECK: 	pmaxud	%xmm5, %xmm5
-        	pmaxud	%xmm5,%xmm5
-
-// CHECK: 	pmaxuw	%xmm5, %xmm5
-        	pmaxuw	%xmm5,%xmm5
-
-// CHECK: 	pminsb	%xmm5, %xmm5
-        	pminsb	%xmm5,%xmm5
-
-// CHECK: 	pminsd	%xmm5, %xmm5
-        	pminsd	%xmm5,%xmm5
-
-// CHECK: 	pminud	%xmm5, %xmm5
-        	pminud	%xmm5,%xmm5
-
-// CHECK: 	pminuw	%xmm5, %xmm5
-        	pminuw	%xmm5,%xmm5
-
-// CHECK: 	pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxbw	%xmm5, %xmm5
-        	pmovsxbw	%xmm5,%xmm5
-
-// CHECK: 	pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxbd	%xmm5, %xmm5
-        	pmovsxbd	%xmm5,%xmm5
-
-// CHECK: 	pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxbq	%xmm5, %xmm5
-        	pmovsxbq	%xmm5,%xmm5
-
-// CHECK: 	pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxwd	%xmm5, %xmm5
-        	pmovsxwd	%xmm5,%xmm5
-
-// CHECK: 	pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxwq	%xmm5, %xmm5
-        	pmovsxwq	%xmm5,%xmm5
-
-// CHECK: 	pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxdq	%xmm5, %xmm5
-        	pmovsxdq	%xmm5,%xmm5
-
-// CHECK: 	pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxbw	%xmm5, %xmm5
-        	pmovzxbw	%xmm5,%xmm5
-
-// CHECK: 	pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxbd	%xmm5, %xmm5
-        	pmovzxbd	%xmm5,%xmm5
-
-// CHECK: 	pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxbq	%xmm5, %xmm5
-        	pmovzxbq	%xmm5,%xmm5
-
-// CHECK: 	pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxwd	%xmm5, %xmm5
-        	pmovzxwd	%xmm5,%xmm5
-
-// CHECK: 	pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxwq	%xmm5, %xmm5
-        	pmovzxwq	%xmm5,%xmm5
-
-// CHECK: 	pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxdq	%xmm5, %xmm5
-        	pmovzxdq	%xmm5,%xmm5
-
-// CHECK: 	pmuldq	%xmm5, %xmm5
-        	pmuldq	%xmm5,%xmm5
-
-// CHECK: 	pmulld	%xmm5, %xmm5
-        	pmulld	%xmm5,%xmm5
-
-// CHECK: 	ptest 	3735928559(%ebx,%ecx,8), %xmm5
-        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	ptest 	%xmm5, %xmm5
-        	ptest	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtq	%xmm5, %xmm5
-        	pcmpgtq	%xmm5,%xmm5
diff --git a/test/MC/AsmParser/X86/x86_32-bit_cat.s b/test/MC/AsmParser/X86/x86_32-bit_cat.s
deleted file mode 100644
index 9002c604887b..000000000000
--- a/test/MC/AsmParser/X86/x86_32-bit_cat.s
+++ /dev/null
@@ -1,7862 +0,0 @@
-// This is the current set of tests that can pass though llvm-mc as it were a
-// logical cat(1) and then reassemble to the same instruction.  All of these
-// will not yet encode correctly.  The subset that will encode correctly are in
-// the file x86_32-encoding.s (and other tests that encode are in x86_32-bit.s).
-
-// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
-// XFAIL: *
-
-
-// CHECK: 	movb	$127, 3735928559(%ebx,%ecx,8)
-        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movb	$127, 69
-        	movb	$0x7f,0x45
-
-// CHECK: 	movb	$127, 32493
-        	movb	$0x7f,0x7eed
-
-// CHECK: 	movb	$127, 3133065982
-        	movb	$0x7f,0xbabecafe
-
-// CHECK: 	movb	$127, 305419896
-        	movb	$0x7f,0x12345678
-
-// CHECK: 	movw	$31438, 3735928559(%ebx,%ecx,8)
-        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movw	$31438, 69
-        	movw	$0x7ace,0x45
-
-// CHECK: 	movw	$31438, 32493
-        	movw	$0x7ace,0x7eed
-
-// CHECK: 	movw	$31438, 3133065982
-        	movw	$0x7ace,0xbabecafe
-
-// CHECK: 	movw	$31438, 305419896
-        	movw	$0x7ace,0x12345678
-
-// CHECK: 	movl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movl	$2063514302, 69
-        	movl	$0x7afebabe,0x45
-
-// CHECK: 	movl	$2063514302, 32493
-        	movl	$0x7afebabe,0x7eed
-
-// CHECK: 	movl	$2063514302, 3133065982
-        	movl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	movl	$2063514302, 305419896
-        	movl	$0x7afebabe,0x12345678
-
-// CHECK: 	movl	$324478056, 3735928559(%ebx,%ecx,8)
-        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movl	$324478056, 69
-        	movl	$0x13572468,0x45
-
-// CHECK: 	movl	$324478056, 32493
-        	movl	$0x13572468,0x7eed
-
-// CHECK: 	movl	$324478056, 3133065982
-        	movl	$0x13572468,0xbabecafe
-
-// CHECK: 	movl	$324478056, 305419896
-        	movl	$0x13572468,0x12345678
-
-// CHECK: 	movsbl	3735928559(%ebx,%ecx,8), %ecx
-        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movsbl	69, %ecx
-        	movsbl	0x45,%ecx
-
-// CHECK: 	movsbl	32493, %ecx
-        	movsbl	0x7eed,%ecx
-
-// CHECK: 	movsbl	3133065982, %ecx
-        	movsbl	0xbabecafe,%ecx
-
-// CHECK: 	movsbl	305419896, %ecx
-        	movsbl	0x12345678,%ecx
-
-// CHECK: 	movsbw	3735928559(%ebx,%ecx,8), %bx
-        	movsbw	0xdeadbeef(%ebx,%ecx,8),%bx
-
-// CHECK: 	movsbw	69, %bx
-        	movsbw	0x45,%bx
-
-// CHECK: 	movsbw	32493, %bx
-        	movsbw	0x7eed,%bx
-
-// CHECK: 	movsbw	3133065982, %bx
-        	movsbw	0xbabecafe,%bx
-
-// CHECK: 	movsbw	305419896, %bx
-        	movsbw	0x12345678,%bx
-
-// CHECK: 	movswl	3735928559(%ebx,%ecx,8), %ecx
-        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movswl	69, %ecx
-        	movswl	0x45,%ecx
-
-// CHECK: 	movswl	32493, %ecx
-        	movswl	0x7eed,%ecx
-
-// CHECK: 	movswl	3133065982, %ecx
-        	movswl	0xbabecafe,%ecx
-
-// CHECK: 	movswl	305419896, %ecx
-        	movswl	0x12345678,%ecx
-
-// CHECK: 	movzbl	3735928559(%ebx,%ecx,8), %ecx  # NOREX
-        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movzbl	69, %ecx  # NOREX
-        	movzbl	0x45,%ecx
-
-// CHECK: 	movzbl	32493, %ecx  # NOREX
-        	movzbl	0x7eed,%ecx
-
-// CHECK: 	movzbl	3133065982, %ecx  # NOREX
-        	movzbl	0xbabecafe,%ecx
-
-// CHECK: 	movzbl	305419896, %ecx  # NOREX
-        	movzbl	0x12345678,%ecx
-
-// CHECK: 	movzbw	3735928559(%ebx,%ecx,8), %bx
-        	movzbw	0xdeadbeef(%ebx,%ecx,8),%bx
-
-// CHECK: 	movzbw	69, %bx
-        	movzbw	0x45,%bx
-
-// CHECK: 	movzbw	32493, %bx
-        	movzbw	0x7eed,%bx
-
-// CHECK: 	movzbw	3133065982, %bx
-        	movzbw	0xbabecafe,%bx
-
-// CHECK: 	movzbw	305419896, %bx
-        	movzbw	0x12345678,%bx
-
-// CHECK: 	movzwl	3735928559(%ebx,%ecx,8), %ecx
-        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	movzwl	69, %ecx
-        	movzwl	0x45,%ecx
-
-// CHECK: 	movzwl	32493, %ecx
-        	movzwl	0x7eed,%ecx
-
-// CHECK: 	movzwl	3133065982, %ecx
-        	movzwl	0xbabecafe,%ecx
-
-// CHECK: 	movzwl	305419896, %ecx
-        	movzwl	0x12345678,%ecx
-
-// CHECK: 	pushw	32493
-        	pushw	0x7eed
-
-// CHECK: 	popw	32493
-        	popw	0x7eed
-
-// CHECK: 	pushf
-        	pushfl
-
-// CHECK: 	pushfl
-        	pushfl
-
-// CHECK: 	popf
-        	popfl
-
-// CHECK: 	popfl
-        	popfl
-
-// CHECK: 	clc
-        	clc
-
-// CHECK: 	cld
-        	cld
-
-// CHECK: 	cli
-        	cli
-
-// CHECK: 	clts
-        	clts
-
-// CHECK: 	cmc
-        	cmc
-
-// CHECK: 	lahf
-        	lahf
-
-// CHECK: 	sahf
-        	sahf
-
-// CHECK: 	stc
-        	stc
-
-// CHECK: 	std
-        	std
-
-// CHECK: 	sti
-        	sti
-
-// CHECK: 	addb	$254, 3735928559(%ebx,%ecx,8)
-        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addb	$254, 69
-        	addb	$0xfe,0x45
-
-// CHECK: 	addb	$254, 32493
-        	addb	$0xfe,0x7eed
-
-// CHECK: 	addb	$254, 3133065982
-        	addb	$0xfe,0xbabecafe
-
-// CHECK: 	addb	$254, 305419896
-        	addb	$0xfe,0x12345678
-
-// CHECK: 	addb	$127, 3735928559(%ebx,%ecx,8)
-        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addb	$127, 69
-        	addb	$0x7f,0x45
-
-// CHECK: 	addb	$127, 32493
-        	addb	$0x7f,0x7eed
-
-// CHECK: 	addb	$127, 3133065982
-        	addb	$0x7f,0xbabecafe
-
-// CHECK: 	addb	$127, 305419896
-        	addb	$0x7f,0x12345678
-
-// CHECK: 	addw	$31438, 3735928559(%ebx,%ecx,8)
-        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addw	$31438, 69
-        	addw	$0x7ace,0x45
-
-// CHECK: 	addw	$31438, 32493
-        	addw	$0x7ace,0x7eed
-
-// CHECK: 	addw	$31438, 3133065982
-        	addw	$0x7ace,0xbabecafe
-
-// CHECK: 	addw	$31438, 305419896
-        	addw	$0x7ace,0x12345678
-
-// CHECK: 	addl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addl	$2063514302, 69
-        	addl	$0x7afebabe,0x45
-
-// CHECK: 	addl	$2063514302, 32493
-        	addl	$0x7afebabe,0x7eed
-
-// CHECK: 	addl	$2063514302, 3133065982
-        	addl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	addl	$2063514302, 305419896
-        	addl	$0x7afebabe,0x12345678
-
-// CHECK: 	addl	$324478056, 3735928559(%ebx,%ecx,8)
-        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	addl	$324478056, 69
-        	addl	$0x13572468,0x45
-
-// CHECK: 	addl	$324478056, 32493
-        	addl	$0x13572468,0x7eed
-
-// CHECK: 	addl	$324478056, 3133065982
-        	addl	$0x13572468,0xbabecafe
-
-// CHECK: 	addl	$324478056, 305419896
-        	addl	$0x13572468,0x12345678
-
-// CHECK: 	incl	3735928559(%ebx,%ecx,8)
-        	incl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	incw	32493
-        	incw	0x7eed
-
-// CHECK: 	incl	3133065982
-        	incl	0xbabecafe
-
-// CHECK: 	incl	305419896
-        	incl	0x12345678
-
-// CHECK: 	subb	$254, 3735928559(%ebx,%ecx,8)
-        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subb	$254, 69
-        	subb	$0xfe,0x45
-
-// CHECK: 	subb	$254, 32493
-        	subb	$0xfe,0x7eed
-
-// CHECK: 	subb	$254, 3133065982
-        	subb	$0xfe,0xbabecafe
-
-// CHECK: 	subb	$254, 305419896
-        	subb	$0xfe,0x12345678
-
-// CHECK: 	subb	$127, 3735928559(%ebx,%ecx,8)
-        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subb	$127, 69
-        	subb	$0x7f,0x45
-
-// CHECK: 	subb	$127, 32493
-        	subb	$0x7f,0x7eed
-
-// CHECK: 	subb	$127, 3133065982
-        	subb	$0x7f,0xbabecafe
-
-// CHECK: 	subb	$127, 305419896
-        	subb	$0x7f,0x12345678
-
-// CHECK: 	subw	$31438, 3735928559(%ebx,%ecx,8)
-        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subw	$31438, 69
-        	subw	$0x7ace,0x45
-
-// CHECK: 	subw	$31438, 32493
-        	subw	$0x7ace,0x7eed
-
-// CHECK: 	subw	$31438, 3133065982
-        	subw	$0x7ace,0xbabecafe
-
-// CHECK: 	subw	$31438, 305419896
-        	subw	$0x7ace,0x12345678
-
-// CHECK: 	subl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subl	$2063514302, 69
-        	subl	$0x7afebabe,0x45
-
-// CHECK: 	subl	$2063514302, 32493
-        	subl	$0x7afebabe,0x7eed
-
-// CHECK: 	subl	$2063514302, 3133065982
-        	subl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	subl	$2063514302, 305419896
-        	subl	$0x7afebabe,0x12345678
-
-// CHECK: 	subl	$324478056, 3735928559(%ebx,%ecx,8)
-        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	subl	$324478056, 69
-        	subl	$0x13572468,0x45
-
-// CHECK: 	subl	$324478056, 32493
-        	subl	$0x13572468,0x7eed
-
-// CHECK: 	subl	$324478056, 3133065982
-        	subl	$0x13572468,0xbabecafe
-
-// CHECK: 	subl	$324478056, 305419896
-        	subl	$0x13572468,0x12345678
-
-// CHECK: 	decl	3735928559(%ebx,%ecx,8)
-        	decl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	decw	32493
-        	decw	0x7eed
-
-// CHECK: 	decl	3133065982
-        	decl	0xbabecafe
-
-// CHECK: 	decl	305419896
-        	decl	0x12345678
-
-// CHECK: 	sbbb	$254, 3735928559(%ebx,%ecx,8)
-        	sbbb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbb	$254, 69
-        	sbbb	$0xfe,0x45
-
-// CHECK: 	sbbb	$254, 32493
-        	sbbb	$0xfe,0x7eed
-
-// CHECK: 	sbbb	$254, 3133065982
-        	sbbb	$0xfe,0xbabecafe
-
-// CHECK: 	sbbb	$254, 305419896
-        	sbbb	$0xfe,0x12345678
-
-// CHECK: 	sbbb	$127, 3735928559(%ebx,%ecx,8)
-        	sbbb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbb	$127, 69
-        	sbbb	$0x7f,0x45
-
-// CHECK: 	sbbb	$127, 32493
-        	sbbb	$0x7f,0x7eed
-
-// CHECK: 	sbbb	$127, 3133065982
-        	sbbb	$0x7f,0xbabecafe
-
-// CHECK: 	sbbb	$127, 305419896
-        	sbbb	$0x7f,0x12345678
-
-// CHECK: 	sbbw	$31438, 3735928559(%ebx,%ecx,8)
-        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbw	$31438, 69
-        	sbbw	$0x7ace,0x45
-
-// CHECK: 	sbbw	$31438, 32493
-        	sbbw	$0x7ace,0x7eed
-
-// CHECK: 	sbbw	$31438, 3133065982
-        	sbbw	$0x7ace,0xbabecafe
-
-// CHECK: 	sbbw	$31438, 305419896
-        	sbbw	$0x7ace,0x12345678
-
-// CHECK: 	sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbl	$2063514302, 69
-        	sbbl	$0x7afebabe,0x45
-
-// CHECK: 	sbbl	$2063514302, 32493
-        	sbbl	$0x7afebabe,0x7eed
-
-// CHECK: 	sbbl	$2063514302, 3133065982
-        	sbbl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	sbbl	$2063514302, 305419896
-        	sbbl	$0x7afebabe,0x12345678
-
-// CHECK: 	sbbl	$324478056, 3735928559(%ebx,%ecx,8)
-        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sbbl	$324478056, 69
-        	sbbl	$0x13572468,0x45
-
-// CHECK: 	sbbl	$324478056, 32493
-        	sbbl	$0x13572468,0x7eed
-
-// CHECK: 	sbbl	$324478056, 3133065982
-        	sbbl	$0x13572468,0xbabecafe
-
-// CHECK: 	sbbl	$324478056, 305419896
-        	sbbl	$0x13572468,0x12345678
-
-// CHECK: 	cmpb	$254, 3735928559(%ebx,%ecx,8)
-        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpb	$254, 69
-        	cmpb	$0xfe,0x45
-
-// CHECK: 	cmpb	$254, 32493
-        	cmpb	$0xfe,0x7eed
-
-// CHECK: 	cmpb	$254, 3133065982
-        	cmpb	$0xfe,0xbabecafe
-
-// CHECK: 	cmpb	$254, 305419896
-        	cmpb	$0xfe,0x12345678
-
-// CHECK: 	cmpb	$127, 3735928559(%ebx,%ecx,8)
-        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpb	$127, 69
-        	cmpb	$0x7f,0x45
-
-// CHECK: 	cmpb	$127, 32493
-        	cmpb	$0x7f,0x7eed
-
-// CHECK: 	cmpb	$127, 3133065982
-        	cmpb	$0x7f,0xbabecafe
-
-// CHECK: 	cmpb	$127, 305419896
-        	cmpb	$0x7f,0x12345678
-
-// CHECK: 	cmpw	$31438, 3735928559(%ebx,%ecx,8)
-        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpw	$31438, 69
-        	cmpw	$0x7ace,0x45
-
-// CHECK: 	cmpw	$31438, 32493
-        	cmpw	$0x7ace,0x7eed
-
-// CHECK: 	cmpw	$31438, 3133065982
-        	cmpw	$0x7ace,0xbabecafe
-
-// CHECK: 	cmpw	$31438, 305419896
-        	cmpw	$0x7ace,0x12345678
-
-// CHECK: 	cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpl	$2063514302, 69
-        	cmpl	$0x7afebabe,0x45
-
-// CHECK: 	cmpl	$2063514302, 32493
-        	cmpl	$0x7afebabe,0x7eed
-
-// CHECK: 	cmpl	$2063514302, 3133065982
-        	cmpl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	cmpl	$2063514302, 305419896
-        	cmpl	$0x7afebabe,0x12345678
-
-// CHECK: 	cmpl	$324478056, 3735928559(%ebx,%ecx,8)
-        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpl	$324478056, 69
-        	cmpl	$0x13572468,0x45
-
-// CHECK: 	cmpl	$324478056, 32493
-        	cmpl	$0x13572468,0x7eed
-
-// CHECK: 	cmpl	$324478056, 3133065982
-        	cmpl	$0x13572468,0xbabecafe
-
-// CHECK: 	cmpl	$324478056, 305419896
-        	cmpl	$0x13572468,0x12345678
-
-// CHECK: 	testb	$127, 3735928559(%ebx,%ecx,8)
-        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testb	$127, 69
-        	testb	$0x7f,0x45
-
-// CHECK: 	testb	$127, 32493
-        	testb	$0x7f,0x7eed
-
-// CHECK: 	testb	$127, 3133065982
-        	testb	$0x7f,0xbabecafe
-
-// CHECK: 	testb	$127, 305419896
-        	testb	$0x7f,0x12345678
-
-// CHECK: 	testw	$31438, 3735928559(%ebx,%ecx,8)
-        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testw	$31438, 69
-        	testw	$0x7ace,0x45
-
-// CHECK: 	testw	$31438, 32493
-        	testw	$0x7ace,0x7eed
-
-// CHECK: 	testw	$31438, 3133065982
-        	testw	$0x7ace,0xbabecafe
-
-// CHECK: 	testw	$31438, 305419896
-        	testw	$0x7ace,0x12345678
-
-// CHECK: 	testl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testl	$2063514302, 69
-        	testl	$0x7afebabe,0x45
-
-// CHECK: 	testl	$2063514302, 32493
-        	testl	$0x7afebabe,0x7eed
-
-// CHECK: 	testl	$2063514302, 3133065982
-        	testl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	testl	$2063514302, 305419896
-        	testl	$0x7afebabe,0x12345678
-
-// CHECK: 	testl	$324478056, 3735928559(%ebx,%ecx,8)
-        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	testl	$324478056, 69
-        	testl	$0x13572468,0x45
-
-// CHECK: 	testl	$324478056, 32493
-        	testl	$0x13572468,0x7eed
-
-// CHECK: 	testl	$324478056, 3133065982
-        	testl	$0x13572468,0xbabecafe
-
-// CHECK: 	testl	$324478056, 305419896
-        	testl	$0x13572468,0x12345678
-
-// CHECK: 	andb	$254, 3735928559(%ebx,%ecx,8)
-        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andb	$254, 69
-        	andb	$0xfe,0x45
-
-// CHECK: 	andb	$254, 32493
-        	andb	$0xfe,0x7eed
-
-// CHECK: 	andb	$254, 3133065982
-        	andb	$0xfe,0xbabecafe
-
-// CHECK: 	andb	$254, 305419896
-        	andb	$0xfe,0x12345678
-
-// CHECK: 	andb	$127, 3735928559(%ebx,%ecx,8)
-        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andb	$127, 69
-        	andb	$0x7f,0x45
-
-// CHECK: 	andb	$127, 32493
-        	andb	$0x7f,0x7eed
-
-// CHECK: 	andb	$127, 3133065982
-        	andb	$0x7f,0xbabecafe
-
-// CHECK: 	andb	$127, 305419896
-        	andb	$0x7f,0x12345678
-
-// CHECK: 	andw	$31438, 3735928559(%ebx,%ecx,8)
-        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andw	$31438, 69
-        	andw	$0x7ace,0x45
-
-// CHECK: 	andw	$31438, 32493
-        	andw	$0x7ace,0x7eed
-
-// CHECK: 	andw	$31438, 3133065982
-        	andw	$0x7ace,0xbabecafe
-
-// CHECK: 	andw	$31438, 305419896
-        	andw	$0x7ace,0x12345678
-
-// CHECK: 	andl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andl	$2063514302, 69
-        	andl	$0x7afebabe,0x45
-
-// CHECK: 	andl	$2063514302, 32493
-        	andl	$0x7afebabe,0x7eed
-
-// CHECK: 	andl	$2063514302, 3133065982
-        	andl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	andl	$2063514302, 305419896
-        	andl	$0x7afebabe,0x12345678
-
-// CHECK: 	andl	$324478056, 3735928559(%ebx,%ecx,8)
-        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	andl	$324478056, 69
-        	andl	$0x13572468,0x45
-
-// CHECK: 	andl	$324478056, 32493
-        	andl	$0x13572468,0x7eed
-
-// CHECK: 	andl	$324478056, 3133065982
-        	andl	$0x13572468,0xbabecafe
-
-// CHECK: 	andl	$324478056, 305419896
-        	andl	$0x13572468,0x12345678
-
-// CHECK: 	orb	$254, 3735928559(%ebx,%ecx,8)
-        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orb	$254, 69
-        	orb	$0xfe,0x45
-
-// CHECK: 	orb	$254, 32493
-        	orb	$0xfe,0x7eed
-
-// CHECK: 	orb	$254, 3133065982
-        	orb	$0xfe,0xbabecafe
-
-// CHECK: 	orb	$254, 305419896
-        	orb	$0xfe,0x12345678
-
-// CHECK: 	orb	$127, 3735928559(%ebx,%ecx,8)
-        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orb	$127, 69
-        	orb	$0x7f,0x45
-
-// CHECK: 	orb	$127, 32493
-        	orb	$0x7f,0x7eed
-
-// CHECK: 	orb	$127, 3133065982
-        	orb	$0x7f,0xbabecafe
-
-// CHECK: 	orb	$127, 305419896
-        	orb	$0x7f,0x12345678
-
-// CHECK: 	orw	$31438, 3735928559(%ebx,%ecx,8)
-        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orw	$31438, 69
-        	orw	$0x7ace,0x45
-
-// CHECK: 	orw	$31438, 32493
-        	orw	$0x7ace,0x7eed
-
-// CHECK: 	orw	$31438, 3133065982
-        	orw	$0x7ace,0xbabecafe
-
-// CHECK: 	orw	$31438, 305419896
-        	orw	$0x7ace,0x12345678
-
-// CHECK: 	orl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orl	$2063514302, 69
-        	orl	$0x7afebabe,0x45
-
-// CHECK: 	orl	$2063514302, 32493
-        	orl	$0x7afebabe,0x7eed
-
-// CHECK: 	orl	$2063514302, 3133065982
-        	orl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	orl	$2063514302, 305419896
-        	orl	$0x7afebabe,0x12345678
-
-// CHECK: 	orl	$324478056, 3735928559(%ebx,%ecx,8)
-        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	orl	$324478056, 69
-        	orl	$0x13572468,0x45
-
-// CHECK: 	orl	$324478056, 32493
-        	orl	$0x13572468,0x7eed
-
-// CHECK: 	orl	$324478056, 3133065982
-        	orl	$0x13572468,0xbabecafe
-
-// CHECK: 	orl	$324478056, 305419896
-        	orl	$0x13572468,0x12345678
-
-// CHECK: 	xorb	$254, 3735928559(%ebx,%ecx,8)
-        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorb	$254, 69
-        	xorb	$0xfe,0x45
-
-// CHECK: 	xorb	$254, 32493
-        	xorb	$0xfe,0x7eed
-
-// CHECK: 	xorb	$254, 3133065982
-        	xorb	$0xfe,0xbabecafe
-
-// CHECK: 	xorb	$254, 305419896
-        	xorb	$0xfe,0x12345678
-
-// CHECK: 	xorb	$127, 3735928559(%ebx,%ecx,8)
-        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorb	$127, 69
-        	xorb	$0x7f,0x45
-
-// CHECK: 	xorb	$127, 32493
-        	xorb	$0x7f,0x7eed
-
-// CHECK: 	xorb	$127, 3133065982
-        	xorb	$0x7f,0xbabecafe
-
-// CHECK: 	xorb	$127, 305419896
-        	xorb	$0x7f,0x12345678
-
-// CHECK: 	xorw	$31438, 3735928559(%ebx,%ecx,8)
-        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorw	$31438, 69
-        	xorw	$0x7ace,0x45
-
-// CHECK: 	xorw	$31438, 32493
-        	xorw	$0x7ace,0x7eed
-
-// CHECK: 	xorw	$31438, 3133065982
-        	xorw	$0x7ace,0xbabecafe
-
-// CHECK: 	xorw	$31438, 305419896
-        	xorw	$0x7ace,0x12345678
-
-// CHECK: 	xorl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorl	$2063514302, 69
-        	xorl	$0x7afebabe,0x45
-
-// CHECK: 	xorl	$2063514302, 32493
-        	xorl	$0x7afebabe,0x7eed
-
-// CHECK: 	xorl	$2063514302, 3133065982
-        	xorl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	xorl	$2063514302, 305419896
-        	xorl	$0x7afebabe,0x12345678
-
-// CHECK: 	xorl	$324478056, 3735928559(%ebx,%ecx,8)
-        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	xorl	$324478056, 69
-        	xorl	$0x13572468,0x45
-
-// CHECK: 	xorl	$324478056, 32493
-        	xorl	$0x13572468,0x7eed
-
-// CHECK: 	xorl	$324478056, 3133065982
-        	xorl	$0x13572468,0xbabecafe
-
-// CHECK: 	xorl	$324478056, 305419896
-        	xorl	$0x13572468,0x12345678
-
-// CHECK: 	adcb	$254, 3735928559(%ebx,%ecx,8)
-        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcb	$254, 69
-        	adcb	$0xfe,0x45
-
-// CHECK: 	adcb	$254, 32493
-        	adcb	$0xfe,0x7eed
-
-// CHECK: 	adcb	$254, 3133065982
-        	adcb	$0xfe,0xbabecafe
-
-// CHECK: 	adcb	$254, 305419896
-        	adcb	$0xfe,0x12345678
-
-// CHECK: 	adcb	$127, 3735928559(%ebx,%ecx,8)
-        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcb	$127, 69
-        	adcb	$0x7f,0x45
-
-// CHECK: 	adcb	$127, 32493
-        	adcb	$0x7f,0x7eed
-
-// CHECK: 	adcb	$127, 3133065982
-        	adcb	$0x7f,0xbabecafe
-
-// CHECK: 	adcb	$127, 305419896
-        	adcb	$0x7f,0x12345678
-
-// CHECK: 	adcw	$31438, 3735928559(%ebx,%ecx,8)
-        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcw	$31438, 69
-        	adcw	$0x7ace,0x45
-
-// CHECK: 	adcw	$31438, 32493
-        	adcw	$0x7ace,0x7eed
-
-// CHECK: 	adcw	$31438, 3133065982
-        	adcw	$0x7ace,0xbabecafe
-
-// CHECK: 	adcw	$31438, 305419896
-        	adcw	$0x7ace,0x12345678
-
-// CHECK: 	adcl	$2063514302, 3735928559(%ebx,%ecx,8)
-        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcl	$2063514302, 69
-        	adcl	$0x7afebabe,0x45
-
-// CHECK: 	adcl	$2063514302, 32493
-        	adcl	$0x7afebabe,0x7eed
-
-// CHECK: 	adcl	$2063514302, 3133065982
-        	adcl	$0x7afebabe,0xbabecafe
-
-// CHECK: 	adcl	$2063514302, 305419896
-        	adcl	$0x7afebabe,0x12345678
-
-// CHECK: 	adcl	$324478056, 3735928559(%ebx,%ecx,8)
-        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	adcl	$324478056, 69
-        	adcl	$0x13572468,0x45
-
-// CHECK: 	adcl	$324478056, 32493
-        	adcl	$0x13572468,0x7eed
-
-// CHECK: 	adcl	$324478056, 3133065982
-        	adcl	$0x13572468,0xbabecafe
-
-// CHECK: 	adcl	$324478056, 305419896
-        	adcl	$0x13572468,0x12345678
-
-// CHECK: 	negl	3735928559(%ebx,%ecx,8)
-        	negl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	negw	32493
-        	negw	0x7eed
-
-// CHECK: 	negl	3133065982
-        	negl	0xbabecafe
-
-// CHECK: 	negl	305419896
-        	negl	0x12345678
-
-// CHECK: 	notl	3735928559(%ebx,%ecx,8)
-        	notl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	notw	32493
-        	notw	0x7eed
-
-// CHECK: 	notl	3133065982
-        	notl	0xbabecafe
-
-// CHECK: 	notl	305419896
-        	notl	0x12345678
-
-// CHECK: 	cbtw
-        	cbtw
-
-// CHECK: 	cwtl
-        	cwtl
-
-// CHECK: 	cwtd
-        	cwtd
-
-// CHECK: 	cltd
-        	cltd
-
-// CHECK: 	mull	3735928559(%ebx,%ecx,8)
-        	mull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	mulw	32493
-        	mulw	0x7eed
-
-// CHECK: 	mull	3133065982
-        	mull	0xbabecafe
-
-// CHECK: 	mull	305419896
-        	mull	0x12345678
-
-// CHECK: 	imull	3735928559(%ebx,%ecx,8)
-        	imull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	imulw	32493
-        	imulw	0x7eed
-
-// CHECK: 	imull	3133065982
-        	imull	0xbabecafe
-
-// CHECK: 	imull	305419896
-        	imull	0x12345678
-
-// CHECK: 	divl	3735928559(%ebx,%ecx,8)
-        	divl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	divw	32493
-        	divw	0x7eed
-
-// CHECK: 	divl	3133065982
-        	divl	0xbabecafe
-
-// CHECK: 	divl	305419896
-        	divl	0x12345678
-
-// CHECK: 	idivl	3735928559(%ebx,%ecx,8)
-        	idivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	idivw	32493
-        	idivw	0x7eed
-
-// CHECK: 	idivl	3133065982
-        	idivl	0xbabecafe
-
-// CHECK: 	idivl	305419896
-        	idivl	0x12345678
-
-// CHECK: 	roll	$0, 3735928559(%ebx,%ecx,8)
-        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	roll	$0, 69
-        	roll	$0,0x45
-
-// CHECK: 	roll	$0, 32493
-        	roll	$0,0x7eed
-
-// CHECK: 	roll	$0, 3133065982
-        	roll	$0,0xbabecafe
-
-// CHECK: 	roll	$0, 305419896
-        	roll	$0,0x12345678
-
-// CHECK: 	rolb	$127, 3735928559(%ebx,%ecx,8)
-        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rolb	$127, 69
-        	rolb	$0x7f,0x45
-
-// CHECK: 	rolb	$127, 32493
-        	rolb	$0x7f,0x7eed
-
-// CHECK: 	rolb	$127, 3133065982
-        	rolb	$0x7f,0xbabecafe
-
-// CHECK: 	rolb	$127, 305419896
-        	rolb	$0x7f,0x12345678
-
-// CHECK: 	roll	3735928559(%ebx,%ecx,8)
-        	roll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rolw	32493
-        	rolw	0x7eed
-
-// CHECK: 	roll	3133065982
-        	roll	0xbabecafe
-
-// CHECK: 	roll	305419896
-        	roll	0x12345678
-
-// CHECK: 	rorl	$0, 3735928559(%ebx,%ecx,8)
-        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rorl	$0, 69
-        	rorl	$0,0x45
-
-// CHECK: 	rorl	$0, 32493
-        	rorl	$0,0x7eed
-
-// CHECK: 	rorl	$0, 3133065982
-        	rorl	$0,0xbabecafe
-
-// CHECK: 	rorl	$0, 305419896
-        	rorl	$0,0x12345678
-
-// CHECK: 	rorb	$127, 3735928559(%ebx,%ecx,8)
-        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rorb	$127, 69
-        	rorb	$0x7f,0x45
-
-// CHECK: 	rorb	$127, 32493
-        	rorb	$0x7f,0x7eed
-
-// CHECK: 	rorb	$127, 3133065982
-        	rorb	$0x7f,0xbabecafe
-
-// CHECK: 	rorb	$127, 305419896
-        	rorb	$0x7f,0x12345678
-
-// CHECK: 	rorl	3735928559(%ebx,%ecx,8)
-        	rorl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rorw	32493
-        	rorw	0x7eed
-
-// CHECK: 	rorl	3133065982
-        	rorl	0xbabecafe
-
-// CHECK: 	rorl	305419896
-        	rorl	0x12345678
-
-// CHECK: 	rcll	$0, 3735928559(%ebx,%ecx,8)
-        	rcll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rcll	$0, 69
-        	rcll	$0,0x45
-
-// CHECK: 	rcll	$0, 32493
-        	rcll	$0,0x7eed
-
-// CHECK: 	rcll	$0, 3133065982
-        	rcll	$0,0xbabecafe
-
-// CHECK: 	rcll	$0, 305419896
-        	rcll	$0,0x12345678
-
-// CHECK: 	rclb	$127, 3735928559(%ebx,%ecx,8)
-        	rclb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rclb	$127, 69
-        	rclb	$0x7f,0x45
-
-// CHECK: 	rclb	$127, 32493
-        	rclb	$0x7f,0x7eed
-
-// CHECK: 	rclb	$127, 3133065982
-        	rclb	$0x7f,0xbabecafe
-
-// CHECK: 	rclb	$127, 305419896
-        	rclb	$0x7f,0x12345678
-
-// CHECK: 	rcrl	$0, 3735928559(%ebx,%ecx,8)
-        	rcrl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rcrl	$0, 69
-        	rcrl	$0,0x45
-
-// CHECK: 	rcrl	$0, 32493
-        	rcrl	$0,0x7eed
-
-// CHECK: 	rcrl	$0, 3133065982
-        	rcrl	$0,0xbabecafe
-
-// CHECK: 	rcrl	$0, 305419896
-        	rcrl	$0,0x12345678
-
-// CHECK: 	rcrb	$127, 3735928559(%ebx,%ecx,8)
-        	rcrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	rcrb	$127, 69
-        	rcrb	$0x7f,0x45
-
-// CHECK: 	rcrb	$127, 32493
-        	rcrb	$0x7f,0x7eed
-
-// CHECK: 	rcrb	$127, 3133065982
-        	rcrb	$0x7f,0xbabecafe
-
-// CHECK: 	rcrb	$127, 305419896
-        	rcrb	$0x7f,0x12345678
-
-// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
-        	sall	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shll	$0, 69
-        	sall	$0,0x45
-
-// CHECK: 	shll	$0, 32493
-        	sall	$0,0x7eed
-
-// CHECK: 	shll	$0, 3133065982
-        	sall	$0,0xbabecafe
-
-// CHECK: 	shll	$0, 305419896
-        	sall	$0,0x12345678
-
-// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
-        	salb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shlb	$127, 69
-        	salb	$0x7f,0x45
-
-// CHECK: 	shlb	$127, 32493
-        	salb	$0x7f,0x7eed
-
-// CHECK: 	shlb	$127, 3133065982
-        	salb	$0x7f,0xbabecafe
-
-// CHECK: 	shlb	$127, 305419896
-        	salb	$0x7f,0x12345678
-
-// CHECK: 	shll	3735928559(%ebx,%ecx,8)
-        	sall	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shlw	32493
-        	salw	0x7eed
-
-// CHECK: 	shll	3133065982
-        	sall	0xbabecafe
-
-// CHECK: 	shll	305419896
-        	sall	0x12345678
-
-// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
-        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shll	$0, 69
-        	shll	$0,0x45
-
-// CHECK: 	shll	$0, 32493
-        	shll	$0,0x7eed
-
-// CHECK: 	shll	$0, 3133065982
-        	shll	$0,0xbabecafe
-
-// CHECK: 	shll	$0, 305419896
-        	shll	$0,0x12345678
-
-// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
-        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shlb	$127, 69
-        	shlb	$0x7f,0x45
-
-// CHECK: 	shlb	$127, 32493
-        	shlb	$0x7f,0x7eed
-
-// CHECK: 	shlb	$127, 3133065982
-        	shlb	$0x7f,0xbabecafe
-
-// CHECK: 	shlb	$127, 305419896
-        	shlb	$0x7f,0x12345678
-
-// CHECK: 	shll	3735928559(%ebx,%ecx,8)
-        	shll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shlw	32493
-        	shlw	0x7eed
-
-// CHECK: 	shll	3133065982
-        	shll	0xbabecafe
-
-// CHECK: 	shll	305419896
-        	shll	0x12345678
-
-// CHECK: 	shrl	$0, 3735928559(%ebx,%ecx,8)
-        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shrl	$0, 69
-        	shrl	$0,0x45
-
-// CHECK: 	shrl	$0, 32493
-        	shrl	$0,0x7eed
-
-// CHECK: 	shrl	$0, 3133065982
-        	shrl	$0,0xbabecafe
-
-// CHECK: 	shrl	$0, 305419896
-        	shrl	$0,0x12345678
-
-// CHECK: 	shrb	$127, 3735928559(%ebx,%ecx,8)
-        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shrb	$127, 69
-        	shrb	$0x7f,0x45
-
-// CHECK: 	shrb	$127, 32493
-        	shrb	$0x7f,0x7eed
-
-// CHECK: 	shrb	$127, 3133065982
-        	shrb	$0x7f,0xbabecafe
-
-// CHECK: 	shrb	$127, 305419896
-        	shrb	$0x7f,0x12345678
-
-// CHECK: 	shrl	3735928559(%ebx,%ecx,8)
-        	shrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	shrw	32493
-        	shrw	0x7eed
-
-// CHECK: 	shrl	3133065982
-        	shrl	0xbabecafe
-
-// CHECK: 	shrl	305419896
-        	shrl	0x12345678
-
-// CHECK: 	sarl	$0, 3735928559(%ebx,%ecx,8)
-        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sarl	$0, 69
-        	sarl	$0,0x45
-
-// CHECK: 	sarl	$0, 32493
-        	sarl	$0,0x7eed
-
-// CHECK: 	sarl	$0, 3133065982
-        	sarl	$0,0xbabecafe
-
-// CHECK: 	sarl	$0, 305419896
-        	sarl	$0,0x12345678
-
-// CHECK: 	sarb	$127, 3735928559(%ebx,%ecx,8)
-        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sarb	$127, 69
-        	sarb	$0x7f,0x45
-
-// CHECK: 	sarb	$127, 32493
-        	sarb	$0x7f,0x7eed
-
-// CHECK: 	sarb	$127, 3133065982
-        	sarb	$0x7f,0xbabecafe
-
-// CHECK: 	sarb	$127, 305419896
-        	sarb	$0x7f,0x12345678
-
-// CHECK: 	sarl	3735928559(%ebx,%ecx,8)
-        	sarl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sarw	32493
-        	sarw	0x7eed
-
-// CHECK: 	sarl	3133065982
-        	sarl	0xbabecafe
-
-// CHECK: 	sarl	305419896
-        	sarl	0x12345678
-
-// CHECK: 	call	3133065982
-        	call	0xbabecafe
-
-// CHECK: 	call	*3735928559(%ebx,%ecx,8)
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	call	3133065982
-        	call	0xbabecafe
-
-// CHECK: 	call	305419896
-        	call	0x12345678
-
-// CHECK: 	call	*3135175374
-        	call	*0xbadeface
-
-// CHECK: 	call	*3735928559(%ebx,%ecx,8)
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	call	32493
-        	call	0x7eed
-
-// CHECK: 	call	3133065982
-        	call	0xbabecafe
-
-// CHECK: 	call	305419896
-        	call	0x12345678
-
-// CHECK: 	call	*3135175374
-        	call	*0xbadeface
-
-// CHECK: 	lcallw	*32493
-        	lcallw	*0x7eed
-
-// CHECK: 	jmp	32493
-        	jmp	0x7eed
-
-// CHECK: 	jmp	3133065982
-        	jmp	0xbabecafe
-
-// CHECK: 	jmp	305419896
-        	jmp	0x12345678
-
-// CHECK: 	jmp	-77129852792157442
-        	jmp	0xfeedfacebabecafe
-
-// CHECK: 	jmp	*3735928559(%ebx,%ecx,8)
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	jmp	32493
-        	jmp	0x7eed
-
-// CHECK: 	jmp	3133065982
-        	jmp	0xbabecafe
-
-// CHECK: 	jmp	305419896
-        	jmp	0x12345678
-
-// CHECK: 	jmp	*3135175374
-        	jmp	*0xbadeface
-
-// CHECK: 	jmp	*3735928559(%ebx,%ecx,8)
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	jmp	32493
-        	jmp	0x7eed
-
-// CHECK: 	jmp	3133065982
-        	jmp	0xbabecafe
-
-// CHECK: 	jmp	305419896
-        	jmp	0x12345678
-
-// CHECK: 	jmp	*3135175374
-        	jmp	*0xbadeface
-
-// CHECK: 	ljmpl	*3735928559(%ebx,%ecx,8)
-        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ljmpw	*32493
-        	ljmpw	*0x7eed
-
-// CHECK: 	ljmpl	*3133065982
-        	ljmpl	*0xbabecafe
-
-// CHECK: 	ljmpl	*305419896
-        	ljmpl	*0x12345678
-
-// CHECK: 	ret
-        	ret
-
-// CHECK: 	lret
-        	lret
-
-// CHECK: 	enter	$31438, $127
-        	enter	$0x7ace,$0x7f
-
-// CHECK: 	leave
-        	leave
-
-// CHECK: 	jo	32493
-        	jo	0x7eed
-
-// CHECK: 	jo	3133065982
-        	jo	0xbabecafe
-
-// CHECK: 	jo	305419896
-        	jo	0x12345678
-
-// CHECK: 	jo	-77129852792157442
-        	jo	0xfeedfacebabecafe
-
-// CHECK: 	jno	32493
-        	jno	0x7eed
-
-// CHECK: 	jno	3133065982
-        	jno	0xbabecafe
-
-// CHECK: 	jno	305419896
-        	jno	0x12345678
-
-// CHECK: 	jno	-77129852792157442
-        	jno	0xfeedfacebabecafe
-
-// CHECK: 	jb	32493
-        	jb	0x7eed
-
-// CHECK: 	jb	3133065982
-        	jb	0xbabecafe
-
-// CHECK: 	jb	305419896
-        	jb	0x12345678
-
-// CHECK: 	jb	-77129852792157442
-        	jb	0xfeedfacebabecafe
-
-// CHECK: 	jae	32493
-        	jae	0x7eed
-
-// CHECK: 	jae	3133065982
-        	jae	0xbabecafe
-
-// CHECK: 	jae	305419896
-        	jae	0x12345678
-
-// CHECK: 	jae	-77129852792157442
-        	jae	0xfeedfacebabecafe
-
-// CHECK: 	je	32493
-        	je	0x7eed
-
-// CHECK: 	je	3133065982
-        	je	0xbabecafe
-
-// CHECK: 	je	305419896
-        	je	0x12345678
-
-// CHECK: 	je	-77129852792157442
-        	je	0xfeedfacebabecafe
-
-// CHECK: 	jne	32493
-        	jne	0x7eed
-
-// CHECK: 	jne	3133065982
-        	jne	0xbabecafe
-
-// CHECK: 	jne	305419896
-        	jne	0x12345678
-
-// CHECK: 	jne	-77129852792157442
-        	jne	0xfeedfacebabecafe
-
-// CHECK: 	jbe	32493
-        	jbe	0x7eed
-
-// CHECK: 	jbe	3133065982
-        	jbe	0xbabecafe
-
-// CHECK: 	jbe	305419896
-        	jbe	0x12345678
-
-// CHECK: 	jbe	-77129852792157442
-        	jbe	0xfeedfacebabecafe
-
-// CHECK: 	ja	32493
-        	ja	0x7eed
-
-// CHECK: 	ja	3133065982
-        	ja	0xbabecafe
-
-// CHECK: 	ja	305419896
-        	ja	0x12345678
-
-// CHECK: 	ja	-77129852792157442
-        	ja	0xfeedfacebabecafe
-
-// CHECK: 	js	32493
-        	js	0x7eed
-
-// CHECK: 	js	3133065982
-        	js	0xbabecafe
-
-// CHECK: 	js	305419896
-        	js	0x12345678
-
-// CHECK: 	js	-77129852792157442
-        	js	0xfeedfacebabecafe
-
-// CHECK: 	jns	32493
-        	jns	0x7eed
-
-// CHECK: 	jns	3133065982
-        	jns	0xbabecafe
-
-// CHECK: 	jns	305419896
-        	jns	0x12345678
-
-// CHECK: 	jns	-77129852792157442
-        	jns	0xfeedfacebabecafe
-
-// CHECK: 	jp	32493
-        	jp	0x7eed
-
-// CHECK: 	jp	3133065982
-        	jp	0xbabecafe
-
-// CHECK: 	jp	305419896
-        	jp	0x12345678
-
-// CHECK: 	jp	-77129852792157442
-        	jp	0xfeedfacebabecafe
-
-// CHECK: 	jnp	32493
-        	jnp	0x7eed
-
-// CHECK: 	jnp	3133065982
-        	jnp	0xbabecafe
-
-// CHECK: 	jnp	305419896
-        	jnp	0x12345678
-
-// CHECK: 	jnp	-77129852792157442
-        	jnp	0xfeedfacebabecafe
-
-// CHECK: 	jl	32493
-        	jl	0x7eed
-
-// CHECK: 	jl	3133065982
-        	jl	0xbabecafe
-
-// CHECK: 	jl	305419896
-        	jl	0x12345678
-
-// CHECK: 	jl	-77129852792157442
-        	jl	0xfeedfacebabecafe
-
-// CHECK: 	jge	32493
-        	jge	0x7eed
-
-// CHECK: 	jge	3133065982
-        	jge	0xbabecafe
-
-// CHECK: 	jge	305419896
-        	jge	0x12345678
-
-// CHECK: 	jge	-77129852792157442
-        	jge	0xfeedfacebabecafe
-
-// CHECK: 	jle	32493
-        	jle	0x7eed
-
-// CHECK: 	jle	3133065982
-        	jle	0xbabecafe
-
-// CHECK: 	jle	305419896
-        	jle	0x12345678
-
-// CHECK: 	jle	-77129852792157442
-        	jle	0xfeedfacebabecafe
-
-// CHECK: 	jg	32493
-        	jg	0x7eed
-
-// CHECK: 	jg	3133065982
-        	jg	0xbabecafe
-
-// CHECK: 	jg	305419896
-        	jg	0x12345678
-
-// CHECK: 	jg	-77129852792157442
-        	jg	0xfeedfacebabecafe
-
-// CHECK: 	seto	%bl
-        	seto	%bl
-
-// CHECK: 	seto	3735928559(%ebx,%ecx,8)
-        	seto	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	seto	32493
-        	seto	0x7eed
-
-// CHECK: 	seto	3133065982
-        	seto	0xbabecafe
-
-// CHECK: 	seto	305419896
-        	seto	0x12345678
-
-// CHECK: 	setno	%bl
-        	setno	%bl
-
-// CHECK: 	setno	3735928559(%ebx,%ecx,8)
-        	setno	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setno	32493
-        	setno	0x7eed
-
-// CHECK: 	setno	3133065982
-        	setno	0xbabecafe
-
-// CHECK: 	setno	305419896
-        	setno	0x12345678
-
-// CHECK: 	setb	%bl
-        	setb	%bl
-
-// CHECK: 	setb	3735928559(%ebx,%ecx,8)
-        	setb	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setb	32493
-        	setb	0x7eed
-
-// CHECK: 	setb	3133065982
-        	setb	0xbabecafe
-
-// CHECK: 	setb	305419896
-        	setb	0x12345678
-
-// CHECK: 	setae	%bl
-        	setae	%bl
-
-// CHECK: 	setae	3735928559(%ebx,%ecx,8)
-        	setae	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setae	32493
-        	setae	0x7eed
-
-// CHECK: 	setae	3133065982
-        	setae	0xbabecafe
-
-// CHECK: 	setae	305419896
-        	setae	0x12345678
-
-// CHECK: 	sete	%bl
-        	sete	%bl
-
-// CHECK: 	sete	3735928559(%ebx,%ecx,8)
-        	sete	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sete	32493
-        	sete	0x7eed
-
-// CHECK: 	sete	3133065982
-        	sete	0xbabecafe
-
-// CHECK: 	sete	305419896
-        	sete	0x12345678
-
-// CHECK: 	setne	%bl
-        	setne	%bl
-
-// CHECK: 	setne	3735928559(%ebx,%ecx,8)
-        	setne	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setne	32493
-        	setne	0x7eed
-
-// CHECK: 	setne	3133065982
-        	setne	0xbabecafe
-
-// CHECK: 	setne	305419896
-        	setne	0x12345678
-
-// CHECK: 	setbe	%bl
-        	setbe	%bl
-
-// CHECK: 	setbe	3735928559(%ebx,%ecx,8)
-        	setbe	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setbe	32493
-        	setbe	0x7eed
-
-// CHECK: 	setbe	3133065982
-        	setbe	0xbabecafe
-
-// CHECK: 	setbe	305419896
-        	setbe	0x12345678
-
-// CHECK: 	seta	%bl
-        	seta	%bl
-
-// CHECK: 	seta	3735928559(%ebx,%ecx,8)
-        	seta	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	seta	32493
-        	seta	0x7eed
-
-// CHECK: 	seta	3133065982
-        	seta	0xbabecafe
-
-// CHECK: 	seta	305419896
-        	seta	0x12345678
-
-// CHECK: 	sets	%bl
-        	sets	%bl
-
-// CHECK: 	sets	3735928559(%ebx,%ecx,8)
-        	sets	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	sets	32493
-        	sets	0x7eed
-
-// CHECK: 	sets	3133065982
-        	sets	0xbabecafe
-
-// CHECK: 	sets	305419896
-        	sets	0x12345678
-
-// CHECK: 	setns	%bl
-        	setns	%bl
-
-// CHECK: 	setns	3735928559(%ebx,%ecx,8)
-        	setns	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setns	32493
-        	setns	0x7eed
-
-// CHECK: 	setns	3133065982
-        	setns	0xbabecafe
-
-// CHECK: 	setns	305419896
-        	setns	0x12345678
-
-// CHECK: 	setp	%bl
-        	setp	%bl
-
-// CHECK: 	setp	3735928559(%ebx,%ecx,8)
-        	setp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setp	32493
-        	setp	0x7eed
-
-// CHECK: 	setp	3133065982
-        	setp	0xbabecafe
-
-// CHECK: 	setp	305419896
-        	setp	0x12345678
-
-// CHECK: 	setnp	%bl
-        	setnp	%bl
-
-// CHECK: 	setnp	3735928559(%ebx,%ecx,8)
-        	setnp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setnp	32493
-        	setnp	0x7eed
-
-// CHECK: 	setnp	3133065982
-        	setnp	0xbabecafe
-
-// CHECK: 	setnp	305419896
-        	setnp	0x12345678
-
-// CHECK: 	setl	%bl
-        	setl	%bl
-
-// CHECK: 	setl	3735928559(%ebx,%ecx,8)
-        	setl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setl	32493
-        	setl	0x7eed
-
-// CHECK: 	setl	3133065982
-        	setl	0xbabecafe
-
-// CHECK: 	setl	305419896
-        	setl	0x12345678
-
-// CHECK: 	setge	%bl
-        	setge	%bl
-
-// CHECK: 	setge	3735928559(%ebx,%ecx,8)
-        	setge	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setge	32493
-        	setge	0x7eed
-
-// CHECK: 	setge	3133065982
-        	setge	0xbabecafe
-
-// CHECK: 	setge	305419896
-        	setge	0x12345678
-
-// CHECK: 	setle	%bl
-        	setle	%bl
-
-// CHECK: 	setle	3735928559(%ebx,%ecx,8)
-        	setle	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setle	32493
-        	setle	0x7eed
-
-// CHECK: 	setle	3133065982
-        	setle	0xbabecafe
-
-// CHECK: 	setle	305419896
-        	setle	0x12345678
-
-// CHECK: 	setg	%bl
-        	setg	%bl
-
-// CHECK: 	setg	3735928559(%ebx,%ecx,8)
-        	setg	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	setg	32493
-        	setg	0x7eed
-
-// CHECK: 	setg	3133065982
-        	setg	0xbabecafe
-
-// CHECK: 	setg	305419896
-        	setg	0x12345678
-
-// CHECK: 	int	$127
-        	int	$0x7f
-
-// CHECK: 	rsm
-        	rsm
-
-// CHECK: 	hlt
-        	hlt
-
-// CHECK: 	nopl	3735928559(%ebx,%ecx,8)
-        	nopl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	nopw	32493
-        	nopw	0x7eed
-
-// CHECK: 	nopl	3133065982
-        	nopl	0xbabecafe
-
-// CHECK: 	nopl	305419896
-        	nopl	0x12345678
-
-// CHECK: 	nop
-        	nop
-
-// CHECK: 	lldtw	32493
-        	lldtw	0x7eed
-
-// CHECK: 	lmsww	32493
-        	lmsww	0x7eed
-
-// CHECK: 	ltrw	32493
-        	ltrw	0x7eed
-
-// CHECK: 	sldtw	32493
-        	sldtw	0x7eed
-
-// CHECK: 	smsww	32493
-        	smsww	0x7eed
-
-// CHECK: 	strw	32493
-        	strw	0x7eed
-
-// CHECK: 	verr	%bx
-        	verr	%bx
-
-// CHECK: 	verr	3735928559(%ebx,%ecx,8)
-        	verr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	verr	3133065982
-        	verr	0xbabecafe
-
-// CHECK: 	verr	305419896
-        	verr	0x12345678
-
-// CHECK: 	verw	%bx
-        	verw	%bx
-
-// CHECK: 	verw	3735928559(%ebx,%ecx,8)
-        	verw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	verw	3133065982
-        	verw	0xbabecafe
-
-// CHECK: 	verw	305419896
-        	verw	0x12345678
-
-// CHECK: 	fld	%st(2)
-        	fld	%st(2)
-
-// CHECK: 	fldl	3735928559(%ebx,%ecx,8)
-        	fldl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fldl	3133065982
-        	fldl	0xbabecafe
-
-// CHECK: 	fldl	305419896
-        	fldl	0x12345678
-
-// CHECK: 	fld	%st(2)
-        	fld	%st(2)
-
-// CHECK: 	fildl	3735928559(%ebx,%ecx,8)
-        	fildl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fildl	3133065982
-        	fildl	0xbabecafe
-
-// CHECK: 	fildl	305419896
-        	fildl	0x12345678
-
-// CHECK: 	fildll	3735928559(%ebx,%ecx,8)
-        	fildll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fildll	32493
-        	fildll	0x7eed
-
-// CHECK: 	fildll	3133065982
-        	fildll	0xbabecafe
-
-// CHECK: 	fildll	305419896
-        	fildll	0x12345678
-
-// CHECK: 	fldt	3735928559(%ebx,%ecx,8)
-        	fldt	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fldt	32493
-        	fldt	0x7eed
-
-// CHECK: 	fldt	3133065982
-        	fldt	0xbabecafe
-
-// CHECK: 	fldt	305419896
-        	fldt	0x12345678
-
-// CHECK: 	fbld	3735928559(%ebx,%ecx,8)
-        	fbld	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fbld	32493
-        	fbld	0x7eed
-
-// CHECK: 	fbld	3133065982
-        	fbld	0xbabecafe
-
-// CHECK: 	fbld	305419896
-        	fbld	0x12345678
-
-// CHECK: 	fst	%st(2)
-        	fst	%st(2)
-
-// CHECK: 	fstl	3735928559(%ebx,%ecx,8)
-        	fstl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fstl	3133065982
-        	fstl	0xbabecafe
-
-// CHECK: 	fstl	305419896
-        	fstl	0x12345678
-
-// CHECK: 	fst	%st(2)
-        	fst	%st(2)
-
-// CHECK: 	fistl	3735928559(%ebx,%ecx,8)
-        	fistl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fistl	3133065982
-        	fistl	0xbabecafe
-
-// CHECK: 	fistl	305419896
-        	fistl	0x12345678
-
-// CHECK: 	fstp	%st(2)
-        	fstp	%st(2)
-
-// CHECK: 	fstpl	3735928559(%ebx,%ecx,8)
-        	fstpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fstpl	3133065982
-        	fstpl	0xbabecafe
-
-// CHECK: 	fstpl	305419896
-        	fstpl	0x12345678
-
-// CHECK: 	fstp	%st(2)
-        	fstp	%st(2)
-
-// CHECK: 	fistpl	3735928559(%ebx,%ecx,8)
-        	fistpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fistpl	3133065982
-        	fistpl	0xbabecafe
-
-// CHECK: 	fistpl	305419896
-        	fistpl	0x12345678
-
-// CHECK: 	fistpll	3735928559(%ebx,%ecx,8)
-        	fistpll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fistpll	32493
-        	fistpll	0x7eed
-
-// CHECK: 	fistpll	3133065982
-        	fistpll	0xbabecafe
-
-// CHECK: 	fistpll	305419896
-        	fistpll	0x12345678
-
-// CHECK: 	fstpt	3735928559(%ebx,%ecx,8)
-        	fstpt	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fstpt	32493
-        	fstpt	0x7eed
-
-// CHECK: 	fstpt	3133065982
-        	fstpt	0xbabecafe
-
-// CHECK: 	fstpt	305419896
-        	fstpt	0x12345678
-
-// CHECK: 	fbstp	3735928559(%ebx,%ecx,8)
-        	fbstp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fbstp	32493
-        	fbstp	0x7eed
-
-// CHECK: 	fbstp	3133065982
-        	fbstp	0xbabecafe
-
-// CHECK: 	fbstp	305419896
-        	fbstp	0x12345678
-
-// CHECK: 	fxch	%st(2)
-        	fxch	%st(2)
-
-// CHECK: 	fcom	%st(2)
-        	fcom	%st(2)
-
-// CHECK: 	fcoml	3735928559(%ebx,%ecx,8)
-        	fcoml	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fcoml	3133065982
-        	fcoml	0xbabecafe
-
-// CHECK: 	fcoml	305419896
-        	fcoml	0x12345678
-
-// CHECK: 	fcom	%st(2)
-        	fcom	%st(2)
-
-// CHECK: 	ficoml	3735928559(%ebx,%ecx,8)
-        	ficoml	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ficoml	3133065982
-        	ficoml	0xbabecafe
-
-// CHECK: 	ficoml	305419896
-        	ficoml	0x12345678
-
-// CHECK: 	fcomp	%st(2)
-        	fcomp	%st(2)
-
-// CHECK: 	fcompl	3735928559(%ebx,%ecx,8)
-        	fcompl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fcompl	3133065982
-        	fcompl	0xbabecafe
-
-// CHECK: 	fcompl	305419896
-        	fcompl	0x12345678
-
-// CHECK: 	fcomp	%st(2)
-        	fcomp	%st(2)
-
-// CHECK: 	ficompl	3735928559(%ebx,%ecx,8)
-        	ficompl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ficompl	3133065982
-        	ficompl	0xbabecafe
-
-// CHECK: 	ficompl	305419896
-        	ficompl	0x12345678
-
-// CHECK: 	fcompp
-        	fcompp
-
-// CHECK: 	fucom	%st(2)
-        	fucom	%st(2)
-
-// CHECK: 	fucomp	%st(2)
-        	fucomp	%st(2)
-
-// CHECK: 	fucompp
-        	fucompp
-
-// CHECK: 	ftst
-        	ftst
-
-// CHECK: 	fxam
-        	fxam
-
-// CHECK: 	fld1
-        	fld1
-
-// CHECK: 	fldl2t
-        	fldl2t
-
-// CHECK: 	fldl2e
-        	fldl2e
-
-// CHECK: 	fldpi
-        	fldpi
-
-// CHECK: 	fldlg2
-        	fldlg2
-
-// CHECK: 	fldln2
-        	fldln2
-
-// CHECK: 	fldz
-        	fldz
-
-// CHECK: 	fadd	%st(2)
-        	fadd	%st(2)
-
-// CHECK: 	faddl	3735928559(%ebx,%ecx,8)
-        	faddl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	faddl	3133065982
-        	faddl	0xbabecafe
-
-// CHECK: 	faddl	305419896
-        	faddl	0x12345678
-
-// CHECK: 	fiaddl	3735928559(%ebx,%ecx,8)
-        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fiaddl	3133065982
-        	fiaddl	0xbabecafe
-
-// CHECK: 	fiaddl	305419896
-        	fiaddl	0x12345678
-
-// CHECK: 	faddp	%st(2)
-        	faddp	%st(2)
-
-// CHECK: 	fsub	%st(2)
-        	fsub	%st(2)
-
-// CHECK: 	fsubl	3735928559(%ebx,%ecx,8)
-        	fsubl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fsubl	3133065982
-        	fsubl	0xbabecafe
-
-// CHECK: 	fsubl	305419896
-        	fsubl	0x12345678
-
-// CHECK: 	fisubl	3735928559(%ebx,%ecx,8)
-        	fisubl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fisubl	3133065982
-        	fisubl	0xbabecafe
-
-// CHECK: 	fisubl	305419896
-        	fisubl	0x12345678
-
-// CHECK: 	fsubp	%st(2)
-        	fsubp	%st(2)
-
-// CHECK: 	fsubr	%st(2)
-        	fsubr	%st(2)
-
-// CHECK: 	fsubrl	3735928559(%ebx,%ecx,8)
-        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fsubrl	3133065982
-        	fsubrl	0xbabecafe
-
-// CHECK: 	fsubrl	305419896
-        	fsubrl	0x12345678
-
-// CHECK: 	fisubrl	3735928559(%ebx,%ecx,8)
-        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fisubrl	3133065982
-        	fisubrl	0xbabecafe
-
-// CHECK: 	fisubrl	305419896
-        	fisubrl	0x12345678
-
-// CHECK: 	fsubrp	%st(2)
-        	fsubrp	%st(2)
-
-// CHECK: 	fmul	%st(2)
-        	fmul	%st(2)
-
-// CHECK: 	fmull	3735928559(%ebx,%ecx,8)
-        	fmull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fmull	3133065982
-        	fmull	0xbabecafe
-
-// CHECK: 	fmull	305419896
-        	fmull	0x12345678
-
-// CHECK: 	fimull	3735928559(%ebx,%ecx,8)
-        	fimull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fimull	3133065982
-        	fimull	0xbabecafe
-
-// CHECK: 	fimull	305419896
-        	fimull	0x12345678
-
-// CHECK: 	fmulp	%st(2)
-        	fmulp	%st(2)
-
-// CHECK: 	fdiv	%st(2)
-        	fdiv	%st(2)
-
-// CHECK: 	fdivl	3735928559(%ebx,%ecx,8)
-        	fdivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fdivl	3133065982
-        	fdivl	0xbabecafe
-
-// CHECK: 	fdivl	305419896
-        	fdivl	0x12345678
-
-// CHECK: 	fidivl	3735928559(%ebx,%ecx,8)
-        	fidivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fidivl	3133065982
-        	fidivl	0xbabecafe
-
-// CHECK: 	fidivl	305419896
-        	fidivl	0x12345678
-
-// CHECK: 	fdivp	%st(2)
-        	fdivp	%st(2)
-
-// CHECK: 	fdivr	%st(2)
-        	fdivr	%st(2)
-
-// CHECK: 	fdivrl	3735928559(%ebx,%ecx,8)
-        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fdivrl	3133065982
-        	fdivrl	0xbabecafe
-
-// CHECK: 	fdivrl	305419896
-        	fdivrl	0x12345678
-
-// CHECK: 	fidivrl	3735928559(%ebx,%ecx,8)
-        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fidivrl	3133065982
-        	fidivrl	0xbabecafe
-
-// CHECK: 	fidivrl	305419896
-        	fidivrl	0x12345678
-
-// CHECK: 	fdivrp	%st(2)
-        	fdivrp	%st(2)
-
-// CHECK: 	f2xm1
-        	f2xm1
-
-// CHECK: 	fyl2x
-        	fyl2x
-
-// CHECK: 	fptan
-        	fptan
-
-// CHECK: 	fpatan
-        	fpatan
-
-// CHECK: 	fxtract
-        	fxtract
-
-// CHECK: 	fprem1
-        	fprem1
-
-// CHECK: 	fdecstp
-        	fdecstp
-
-// CHECK: 	fincstp
-        	fincstp
-
-// CHECK: 	fprem
-        	fprem
-
-// CHECK: 	fyl2xp1
-        	fyl2xp1
-
-// CHECK: 	fsqrt
-        	fsqrt
-
-// CHECK: 	fsincos
-        	fsincos
-
-// CHECK: 	frndint
-        	frndint
-
-// CHECK: 	fscale
-        	fscale
-
-// CHECK: 	fsin
-        	fsin
-
-// CHECK: 	fcos
-        	fcos
-
-// CHECK: 	fchs
-        	fchs
-
-// CHECK: 	fabs
-        	fabs
-
-// CHECK: 	fninit
-        	fninit
-
-// CHECK: 	fldcw	3735928559(%ebx,%ecx,8)
-        	fldcw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fldcw	3133065982
-        	fldcw	0xbabecafe
-
-// CHECK: 	fldcw	305419896
-        	fldcw	0x12345678
-
-// CHECK: 	fnstcw	3735928559(%ebx,%ecx,8)
-        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fnstcw	3133065982
-        	fnstcw	0xbabecafe
-
-// CHECK: 	fnstcw	305419896
-        	fnstcw	0x12345678
-
-// CHECK: 	fnstsw	3735928559(%ebx,%ecx,8)
-        	fnstsw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fnstsw	3133065982
-        	fnstsw	0xbabecafe
-
-// CHECK: 	fnstsw	305419896
-        	fnstsw	0x12345678
-
-// CHECK: 	fnclex
-        	fnclex
-
-// CHECK: 	fnstenv	32493
-        	fnstenv	0x7eed
-
-// CHECK: 	fldenv	32493
-        	fldenv	0x7eed
-
-// CHECK: 	fnsave	32493
-        	fnsave	0x7eed
-
-// CHECK: 	frstor	32493
-        	frstor	0x7eed
-
-// CHECK: 	ffree	%st(2)
-        	ffree	%st(2)
-
-// CHECK: 	fnop
-        	fnop
-
-// CHECK: 	invd
-        	invd
-
-// CHECK: 	wbinvd
-        	wbinvd
-
-// CHECK: 	cpuid
-        	cpuid
-
-// CHECK: 	wrmsr
-        	wrmsr
-
-// CHECK: 	rdtsc
-        	rdtsc
-
-// CHECK: 	rdmsr
-        	rdmsr
-
-// CHECK: 	cmpxchg8b	3735928559(%ebx,%ecx,8)
-        	cmpxchg8b	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	cmpxchg8b	32493
-        	cmpxchg8b	0x7eed
-
-// CHECK: 	cmpxchg8b	3133065982
-        	cmpxchg8b	0xbabecafe
-
-// CHECK: 	cmpxchg8b	305419896
-        	cmpxchg8b	0x12345678
-
-// CHECK: 	sysenter
-        	sysenter
-
-// CHECK: 	sysexit
-        	sysexit
-
-// CHECK: 	fxsave	3735928559(%ebx,%ecx,8)
-        	fxsave	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fxsave	32493
-        	fxsave	0x7eed
-
-// CHECK: 	fxsave	3133065982
-        	fxsave	0xbabecafe
-
-// CHECK: 	fxsave	305419896
-        	fxsave	0x12345678
-
-// CHECK: 	fxrstor	3735928559(%ebx,%ecx,8)
-        	fxrstor	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fxrstor	32493
-        	fxrstor	0x7eed
-
-// CHECK: 	fxrstor	3133065982
-        	fxrstor	0xbabecafe
-
-// CHECK: 	fxrstor	305419896
-        	fxrstor	0x12345678
-
-// CHECK: 	rdpmc
-        	rdpmc
-
-// CHECK: 	ud2
-        	ud2
-
-// CHECK: 	fcmovb	%st(2), %st(0)
-        	fcmovb	%st(2),%st
-
-// CHECK: 	fcmove	%st(2), %st(0)
-        	fcmove	%st(2),%st
-
-// CHECK: 	fcmovbe	%st(2), %st(0)
-        	fcmovbe	%st(2),%st
-
-// CHECK: 	fcmovu	 %st(2), %st(0)
-        	fcmovu	%st(2),%st
-
-// CHECK: 	fcmovnb	%st(2), %st(0)
-        	fcmovnb	%st(2),%st
-
-// CHECK: 	fcmovne	%st(2), %st(0)
-        	fcmovne	%st(2),%st
-
-// CHECK: 	fcmovnbe	%st(2), %st(0)
-        	fcmovnbe	%st(2),%st
-
-// CHECK: 	fcmovnu	%st(2), %st(0)
-        	fcmovnu	%st(2),%st
-
-// CHECK: 	fcomi	%st(2), %st(0)
-        	fcomi	%st(2),%st
-
-// CHECK: 	fucomi	%st(2), %st(0)
-        	fucomi	%st(2),%st
-
-// CHECK: 	fcomip	%st(2), %st(0)
-        	fcomip	%st(2),%st
-
-// CHECK: 	fucomip	%st(2), %st(0)
-        	fucomip	%st(2),%st
-
-// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
-        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movnti	%ecx, 69
-        	movnti	%ecx,0x45
-
-// CHECK: 	movnti	%ecx, 32493
-        	movnti	%ecx,0x7eed
-
-// CHECK: 	movnti	%ecx, 3133065982
-        	movnti	%ecx,0xbabecafe
-
-// CHECK: 	movnti	%ecx, 305419896
-        	movnti	%ecx,0x12345678
-
-// CHECK: 	clflush	3735928559(%ebx,%ecx,8)
-        	clflush	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	clflush	32493
-        	clflush	0x7eed
-
-// CHECK: 	clflush	3133065982
-        	clflush	0xbabecafe
-
-// CHECK: 	clflush	305419896
-        	clflush	0x12345678
-
-// CHECK: 	pause
-        	pause
-
-// CHECK: 	sfence
-        	sfence
-
-// CHECK: 	lfence
-        	lfence
-
-// CHECK: 	mfence
-        	mfence
-
-// CHECK: 	emms
-        	emms
-
-// CHECK: 	movd	%ecx, %mm3
-        	movd	%ecx,%mm3
-
-// CHECK: 	movd	3735928559(%ebx,%ecx,8), %mm3
-        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	movd	69, %mm3
-        	movd	0x45,%mm3
-
-// CHECK: 	movd	32493, %mm3
-        	movd	0x7eed,%mm3
-
-// CHECK: 	movd	3133065982, %mm3
-        	movd	0xbabecafe,%mm3
-
-// CHECK: 	movd	305419896, %mm3
-        	movd	0x12345678,%mm3
-
-// CHECK: 	movd	%mm3, %ecx
-        	movd	%mm3,%ecx
-
-// CHECK: 	movd	%mm3, 3735928559(%ebx,%ecx,8)
-        	movd	%mm3,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movd	%mm3, 69
-        	movd	%mm3,0x45
-
-// CHECK: 	movd	%mm3, 32493
-        	movd	%mm3,0x7eed
-
-// CHECK: 	movd	%mm3, 3133065982
-        	movd	%mm3,0xbabecafe
-
-// CHECK: 	movd	%mm3, 305419896
-        	movd	%mm3,0x12345678
-
-// CHECK: 	movd	%ecx, %xmm5
-        	movd	%ecx,%xmm5
-
-// CHECK: 	movd	3735928559(%ebx,%ecx,8), %xmm5
-        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movd	69, %xmm5
-        	movd	0x45,%xmm5
-
-// CHECK: 	movd	32493, %xmm5
-        	movd	0x7eed,%xmm5
-
-// CHECK: 	movd	3133065982, %xmm5
-        	movd	0xbabecafe,%xmm5
-
-// CHECK: 	movd	305419896, %xmm5
-        	movd	0x12345678,%xmm5
-
-// CHECK: 	movd	%xmm5, %ecx
-        	movd	%xmm5,%ecx
-
-// CHECK: 	movd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movd	%xmm5, 69
-        	movd	%xmm5,0x45
-
-// CHECK: 	movd	%xmm5, 32493
-        	movd	%xmm5,0x7eed
-
-// CHECK: 	movd	%xmm5, 3133065982
-        	movd	%xmm5,0xbabecafe
-
-// CHECK: 	movd	%xmm5, 305419896
-        	movd	%xmm5,0x12345678
-
-// CHECK: 	movq	3735928559(%ebx,%ecx,8), %mm3
-        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	movq	69, %mm3
-        	movq	0x45,%mm3
-
-// CHECK: 	movq	32493, %mm3
-        	movq	0x7eed,%mm3
-
-// CHECK: 	movq	3133065982, %mm3
-        	movq	0xbabecafe,%mm3
-
-// CHECK: 	movq	305419896, %mm3
-        	movq	0x12345678,%mm3
-
-// CHECK: 	movq	%mm3, %mm3
-        	movq	%mm3,%mm3
-
-// CHECK: 	movq	%mm3, 3735928559(%ebx,%ecx,8)
-        	movq	%mm3,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movq	%mm3, 69
-        	movq	%mm3,0x45
-
-// CHECK: 	movq	%mm3, 32493
-        	movq	%mm3,0x7eed
-
-// CHECK: 	movq	%mm3, 3133065982
-        	movq	%mm3,0xbabecafe
-
-// CHECK: 	movq	%mm3, 305419896
-        	movq	%mm3,0x12345678
-
-// CHECK: 	movq	%mm3, %mm3
-        	movq	%mm3,%mm3
-
-// CHECK: 	movq	3735928559(%ebx,%ecx,8), %xmm5
-        	movq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movq	69, %xmm5
-        	movq	0x45,%xmm5
-
-// CHECK: 	movq	32493, %xmm5
-        	movq	0x7eed,%xmm5
-
-// CHECK: 	movq	3133065982, %xmm5
-        	movq	0xbabecafe,%xmm5
-
-// CHECK: 	movq	305419896, %xmm5
-        	movq	0x12345678,%xmm5
-
-// CHECK: 	movq	%xmm5, %xmm5
-        	movq	%xmm5,%xmm5
-
-// CHECK: 	movq	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movq	%xmm5, 69
-        	movq	%xmm5,0x45
-
-// CHECK: 	movq	%xmm5, 32493
-        	movq	%xmm5,0x7eed
-
-// CHECK: 	movq	%xmm5, 3133065982
-        	movq	%xmm5,0xbabecafe
-
-// CHECK: 	movq	%xmm5, 305419896
-        	movq	%xmm5,0x12345678
-
-// CHECK: 	movq	%xmm5, %xmm5
-        	movq	%xmm5,%xmm5
-
-// CHECK: 	packssdw	3735928559(%ebx,%ecx,8), %mm3
-        	packssdw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	packssdw	69, %mm3
-        	packssdw	0x45,%mm3
-
-// CHECK: 	packssdw	32493, %mm3
-        	packssdw	0x7eed,%mm3
-
-// CHECK: 	packssdw	3133065982, %mm3
-        	packssdw	0xbabecafe,%mm3
-
-// CHECK: 	packssdw	305419896, %mm3
-        	packssdw	0x12345678,%mm3
-
-// CHECK: 	packssdw	%mm3, %mm3
-        	packssdw	%mm3,%mm3
-
-// CHECK: 	packssdw	3735928559(%ebx,%ecx,8), %xmm5
-        	packssdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	packssdw	69, %xmm5
-        	packssdw	0x45,%xmm5
-
-// CHECK: 	packssdw	32493, %xmm5
-        	packssdw	0x7eed,%xmm5
-
-// CHECK: 	packssdw	3133065982, %xmm5
-        	packssdw	0xbabecafe,%xmm5
-
-// CHECK: 	packssdw	305419896, %xmm5
-        	packssdw	0x12345678,%xmm5
-
-// CHECK: 	packssdw	%xmm5, %xmm5
-        	packssdw	%xmm5,%xmm5
-
-// CHECK: 	packsswb	3735928559(%ebx,%ecx,8), %mm3
-        	packsswb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	packsswb	69, %mm3
-        	packsswb	0x45,%mm3
-
-// CHECK: 	packsswb	32493, %mm3
-        	packsswb	0x7eed,%mm3
-
-// CHECK: 	packsswb	3133065982, %mm3
-        	packsswb	0xbabecafe,%mm3
-
-// CHECK: 	packsswb	305419896, %mm3
-        	packsswb	0x12345678,%mm3
-
-// CHECK: 	packsswb	%mm3, %mm3
-        	packsswb	%mm3,%mm3
-
-// CHECK: 	packsswb	3735928559(%ebx,%ecx,8), %xmm5
-        	packsswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	packsswb	69, %xmm5
-        	packsswb	0x45,%xmm5
-
-// CHECK: 	packsswb	32493, %xmm5
-        	packsswb	0x7eed,%xmm5
-
-// CHECK: 	packsswb	3133065982, %xmm5
-        	packsswb	0xbabecafe,%xmm5
-
-// CHECK: 	packsswb	305419896, %xmm5
-        	packsswb	0x12345678,%xmm5
-
-// CHECK: 	packsswb	%xmm5, %xmm5
-        	packsswb	%xmm5,%xmm5
-
-// CHECK: 	packuswb	3735928559(%ebx,%ecx,8), %mm3
-        	packuswb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	packuswb	69, %mm3
-        	packuswb	0x45,%mm3
-
-// CHECK: 	packuswb	32493, %mm3
-        	packuswb	0x7eed,%mm3
-
-// CHECK: 	packuswb	3133065982, %mm3
-        	packuswb	0xbabecafe,%mm3
-
-// CHECK: 	packuswb	305419896, %mm3
-        	packuswb	0x12345678,%mm3
-
-// CHECK: 	packuswb	%mm3, %mm3
-        	packuswb	%mm3,%mm3
-
-// CHECK: 	packuswb	3735928559(%ebx,%ecx,8), %xmm5
-        	packuswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	packuswb	69, %xmm5
-        	packuswb	0x45,%xmm5
-
-// CHECK: 	packuswb	32493, %xmm5
-        	packuswb	0x7eed,%xmm5
-
-// CHECK: 	packuswb	3133065982, %xmm5
-        	packuswb	0xbabecafe,%xmm5
-
-// CHECK: 	packuswb	305419896, %xmm5
-        	packuswb	0x12345678,%xmm5
-
-// CHECK: 	packuswb	%xmm5, %xmm5
-        	packuswb	%xmm5,%xmm5
-
-// CHECK: 	paddb	3735928559(%ebx,%ecx,8), %mm3
-        	paddb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddb	69, %mm3
-        	paddb	0x45,%mm3
-
-// CHECK: 	paddb	32493, %mm3
-        	paddb	0x7eed,%mm3
-
-// CHECK: 	paddb	3133065982, %mm3
-        	paddb	0xbabecafe,%mm3
-
-// CHECK: 	paddb	305419896, %mm3
-        	paddb	0x12345678,%mm3
-
-// CHECK: 	paddb	%mm3, %mm3
-        	paddb	%mm3,%mm3
-
-// CHECK: 	paddb	3735928559(%ebx,%ecx,8), %xmm5
-        	paddb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddb	69, %xmm5
-        	paddb	0x45,%xmm5
-
-// CHECK: 	paddb	32493, %xmm5
-        	paddb	0x7eed,%xmm5
-
-// CHECK: 	paddb	3133065982, %xmm5
-        	paddb	0xbabecafe,%xmm5
-
-// CHECK: 	paddb	305419896, %xmm5
-        	paddb	0x12345678,%xmm5
-
-// CHECK: 	paddb	%xmm5, %xmm5
-        	paddb	%xmm5,%xmm5
-
-// CHECK: 	paddw	3735928559(%ebx,%ecx,8), %mm3
-        	paddw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddw	69, %mm3
-        	paddw	0x45,%mm3
-
-// CHECK: 	paddw	32493, %mm3
-        	paddw	0x7eed,%mm3
-
-// CHECK: 	paddw	3133065982, %mm3
-        	paddw	0xbabecafe,%mm3
-
-// CHECK: 	paddw	305419896, %mm3
-        	paddw	0x12345678,%mm3
-
-// CHECK: 	paddw	%mm3, %mm3
-        	paddw	%mm3,%mm3
-
-// CHECK: 	paddw	3735928559(%ebx,%ecx,8), %xmm5
-        	paddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddw	69, %xmm5
-        	paddw	0x45,%xmm5
-
-// CHECK: 	paddw	32493, %xmm5
-        	paddw	0x7eed,%xmm5
-
-// CHECK: 	paddw	3133065982, %xmm5
-        	paddw	0xbabecafe,%xmm5
-
-// CHECK: 	paddw	305419896, %xmm5
-        	paddw	0x12345678,%xmm5
-
-// CHECK: 	paddw	%xmm5, %xmm5
-        	paddw	%xmm5,%xmm5
-
-// CHECK: 	paddd	3735928559(%ebx,%ecx,8), %mm3
-        	paddd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddd	69, %mm3
-        	paddd	0x45,%mm3
-
-// CHECK: 	paddd	32493, %mm3
-        	paddd	0x7eed,%mm3
-
-// CHECK: 	paddd	3133065982, %mm3
-        	paddd	0xbabecafe,%mm3
-
-// CHECK: 	paddd	305419896, %mm3
-        	paddd	0x12345678,%mm3
-
-// CHECK: 	paddd	%mm3, %mm3
-        	paddd	%mm3,%mm3
-
-// CHECK: 	paddd	3735928559(%ebx,%ecx,8), %xmm5
-        	paddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddd	69, %xmm5
-        	paddd	0x45,%xmm5
-
-// CHECK: 	paddd	32493, %xmm5
-        	paddd	0x7eed,%xmm5
-
-// CHECK: 	paddd	3133065982, %xmm5
-        	paddd	0xbabecafe,%xmm5
-
-// CHECK: 	paddd	305419896, %xmm5
-        	paddd	0x12345678,%xmm5
-
-// CHECK: 	paddd	%xmm5, %xmm5
-        	paddd	%xmm5,%xmm5
-
-// CHECK: 	paddq	3735928559(%ebx,%ecx,8), %mm3
-        	paddq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddq	69, %mm3
-        	paddq	0x45,%mm3
-
-// CHECK: 	paddq	32493, %mm3
-        	paddq	0x7eed,%mm3
-
-// CHECK: 	paddq	3133065982, %mm3
-        	paddq	0xbabecafe,%mm3
-
-// CHECK: 	paddq	305419896, %mm3
-        	paddq	0x12345678,%mm3
-
-// CHECK: 	paddq	%mm3, %mm3
-        	paddq	%mm3,%mm3
-
-// CHECK: 	paddq	3735928559(%ebx,%ecx,8), %xmm5
-        	paddq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddq	69, %xmm5
-        	paddq	0x45,%xmm5
-
-// CHECK: 	paddq	32493, %xmm5
-        	paddq	0x7eed,%xmm5
-
-// CHECK: 	paddq	3133065982, %xmm5
-        	paddq	0xbabecafe,%xmm5
-
-// CHECK: 	paddq	305419896, %xmm5
-        	paddq	0x12345678,%xmm5
-
-// CHECK: 	paddq	%xmm5, %xmm5
-        	paddq	%xmm5,%xmm5
-
-// CHECK: 	paddsb	3735928559(%ebx,%ecx,8), %mm3
-        	paddsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddsb	69, %mm3
-        	paddsb	0x45,%mm3
-
-// CHECK: 	paddsb	32493, %mm3
-        	paddsb	0x7eed,%mm3
-
-// CHECK: 	paddsb	3133065982, %mm3
-        	paddsb	0xbabecafe,%mm3
-
-// CHECK: 	paddsb	305419896, %mm3
-        	paddsb	0x12345678,%mm3
-
-// CHECK: 	paddsb	%mm3, %mm3
-        	paddsb	%mm3,%mm3
-
-// CHECK: 	paddsb	3735928559(%ebx,%ecx,8), %xmm5
-        	paddsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddsb	69, %xmm5
-        	paddsb	0x45,%xmm5
-
-// CHECK: 	paddsb	32493, %xmm5
-        	paddsb	0x7eed,%xmm5
-
-// CHECK: 	paddsb	3133065982, %xmm5
-        	paddsb	0xbabecafe,%xmm5
-
-// CHECK: 	paddsb	305419896, %xmm5
-        	paddsb	0x12345678,%xmm5
-
-// CHECK: 	paddsb	%xmm5, %xmm5
-        	paddsb	%xmm5,%xmm5
-
-// CHECK: 	paddsw	3735928559(%ebx,%ecx,8), %mm3
-        	paddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddsw	69, %mm3
-        	paddsw	0x45,%mm3
-
-// CHECK: 	paddsw	32493, %mm3
-        	paddsw	0x7eed,%mm3
-
-// CHECK: 	paddsw	3133065982, %mm3
-        	paddsw	0xbabecafe,%mm3
-
-// CHECK: 	paddsw	305419896, %mm3
-        	paddsw	0x12345678,%mm3
-
-// CHECK: 	paddsw	%mm3, %mm3
-        	paddsw	%mm3,%mm3
-
-// CHECK: 	paddsw	3735928559(%ebx,%ecx,8), %xmm5
-        	paddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddsw	69, %xmm5
-        	paddsw	0x45,%xmm5
-
-// CHECK: 	paddsw	32493, %xmm5
-        	paddsw	0x7eed,%xmm5
-
-// CHECK: 	paddsw	3133065982, %xmm5
-        	paddsw	0xbabecafe,%xmm5
-
-// CHECK: 	paddsw	305419896, %xmm5
-        	paddsw	0x12345678,%xmm5
-
-// CHECK: 	paddsw	%xmm5, %xmm5
-        	paddsw	%xmm5,%xmm5
-
-// CHECK: 	paddusb	3735928559(%ebx,%ecx,8), %mm3
-        	paddusb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddusb	69, %mm3
-        	paddusb	0x45,%mm3
-
-// CHECK: 	paddusb	32493, %mm3
-        	paddusb	0x7eed,%mm3
-
-// CHECK: 	paddusb	3133065982, %mm3
-        	paddusb	0xbabecafe,%mm3
-
-// CHECK: 	paddusb	305419896, %mm3
-        	paddusb	0x12345678,%mm3
-
-// CHECK: 	paddusb	%mm3, %mm3
-        	paddusb	%mm3,%mm3
-
-// CHECK: 	paddusb	3735928559(%ebx,%ecx,8), %xmm5
-        	paddusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddusb	69, %xmm5
-        	paddusb	0x45,%xmm5
-
-// CHECK: 	paddusb	32493, %xmm5
-        	paddusb	0x7eed,%xmm5
-
-// CHECK: 	paddusb	3133065982, %xmm5
-        	paddusb	0xbabecafe,%xmm5
-
-// CHECK: 	paddusb	305419896, %xmm5
-        	paddusb	0x12345678,%xmm5
-
-// CHECK: 	paddusb	%xmm5, %xmm5
-        	paddusb	%xmm5,%xmm5
-
-// CHECK: 	paddusw	3735928559(%ebx,%ecx,8), %mm3
-        	paddusw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	paddusw	69, %mm3
-        	paddusw	0x45,%mm3
-
-// CHECK: 	paddusw	32493, %mm3
-        	paddusw	0x7eed,%mm3
-
-// CHECK: 	paddusw	3133065982, %mm3
-        	paddusw	0xbabecafe,%mm3
-
-// CHECK: 	paddusw	305419896, %mm3
-        	paddusw	0x12345678,%mm3
-
-// CHECK: 	paddusw	%mm3, %mm3
-        	paddusw	%mm3,%mm3
-
-// CHECK: 	paddusw	3735928559(%ebx,%ecx,8), %xmm5
-        	paddusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	paddusw	69, %xmm5
-        	paddusw	0x45,%xmm5
-
-// CHECK: 	paddusw	32493, %xmm5
-        	paddusw	0x7eed,%xmm5
-
-// CHECK: 	paddusw	3133065982, %xmm5
-        	paddusw	0xbabecafe,%xmm5
-
-// CHECK: 	paddusw	305419896, %xmm5
-        	paddusw	0x12345678,%xmm5
-
-// CHECK: 	paddusw	%xmm5, %xmm5
-        	paddusw	%xmm5,%xmm5
-
-// CHECK: 	pand	3735928559(%ebx,%ecx,8), %mm3
-        	pand	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pand	69, %mm3
-        	pand	0x45,%mm3
-
-// CHECK: 	pand	32493, %mm3
-        	pand	0x7eed,%mm3
-
-// CHECK: 	pand	3133065982, %mm3
-        	pand	0xbabecafe,%mm3
-
-// CHECK: 	pand	305419896, %mm3
-        	pand	0x12345678,%mm3
-
-// CHECK: 	pand	%mm3, %mm3
-        	pand	%mm3,%mm3
-
-// CHECK: 	pand	3735928559(%ebx,%ecx,8), %xmm5
-        	pand	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pand	69, %xmm5
-        	pand	0x45,%xmm5
-
-// CHECK: 	pand	32493, %xmm5
-        	pand	0x7eed,%xmm5
-
-// CHECK: 	pand	3133065982, %xmm5
-        	pand	0xbabecafe,%xmm5
-
-// CHECK: 	pand	305419896, %xmm5
-        	pand	0x12345678,%xmm5
-
-// CHECK: 	pand	%xmm5, %xmm5
-        	pand	%xmm5,%xmm5
-
-// CHECK: 	pandn	3735928559(%ebx,%ecx,8), %mm3
-        	pandn	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pandn	69, %mm3
-        	pandn	0x45,%mm3
-
-// CHECK: 	pandn	32493, %mm3
-        	pandn	0x7eed,%mm3
-
-// CHECK: 	pandn	3133065982, %mm3
-        	pandn	0xbabecafe,%mm3
-
-// CHECK: 	pandn	305419896, %mm3
-        	pandn	0x12345678,%mm3
-
-// CHECK: 	pandn	%mm3, %mm3
-        	pandn	%mm3,%mm3
-
-// CHECK: 	pandn	3735928559(%ebx,%ecx,8), %xmm5
-        	pandn	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pandn	69, %xmm5
-        	pandn	0x45,%xmm5
-
-// CHECK: 	pandn	32493, %xmm5
-        	pandn	0x7eed,%xmm5
-
-// CHECK: 	pandn	3133065982, %xmm5
-        	pandn	0xbabecafe,%xmm5
-
-// CHECK: 	pandn	305419896, %xmm5
-        	pandn	0x12345678,%xmm5
-
-// CHECK: 	pandn	%xmm5, %xmm5
-        	pandn	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqb	3735928559(%ebx,%ecx,8), %mm3
-        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pcmpeqb	69, %mm3
-        	pcmpeqb	0x45,%mm3
-
-// CHECK: 	pcmpeqb	32493, %mm3
-        	pcmpeqb	0x7eed,%mm3
-
-// CHECK: 	pcmpeqb	3133065982, %mm3
-        	pcmpeqb	0xbabecafe,%mm3
-
-// CHECK: 	pcmpeqb	305419896, %mm3
-        	pcmpeqb	0x12345678,%mm3
-
-// CHECK: 	pcmpeqb	%mm3, %mm3
-        	pcmpeqb	%mm3,%mm3
-
-// CHECK: 	pcmpeqb	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpeqb	69, %xmm5
-        	pcmpeqb	0x45,%xmm5
-
-// CHECK: 	pcmpeqb	32493, %xmm5
-        	pcmpeqb	0x7eed,%xmm5
-
-// CHECK: 	pcmpeqb	3133065982, %xmm5
-        	pcmpeqb	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpeqb	305419896, %xmm5
-        	pcmpeqb	0x12345678,%xmm5
-
-// CHECK: 	pcmpeqb	%xmm5, %xmm5
-        	pcmpeqb	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqw	3735928559(%ebx,%ecx,8), %mm3
-        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pcmpeqw	69, %mm3
-        	pcmpeqw	0x45,%mm3
-
-// CHECK: 	pcmpeqw	32493, %mm3
-        	pcmpeqw	0x7eed,%mm3
-
-// CHECK: 	pcmpeqw	3133065982, %mm3
-        	pcmpeqw	0xbabecafe,%mm3
-
-// CHECK: 	pcmpeqw	305419896, %mm3
-        	pcmpeqw	0x12345678,%mm3
-
-// CHECK: 	pcmpeqw	%mm3, %mm3
-        	pcmpeqw	%mm3,%mm3
-
-// CHECK: 	pcmpeqw	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpeqw	69, %xmm5
-        	pcmpeqw	0x45,%xmm5
-
-// CHECK: 	pcmpeqw	32493, %xmm5
-        	pcmpeqw	0x7eed,%xmm5
-
-// CHECK: 	pcmpeqw	3133065982, %xmm5
-        	pcmpeqw	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpeqw	305419896, %xmm5
-        	pcmpeqw	0x12345678,%xmm5
-
-// CHECK: 	pcmpeqw	%xmm5, %xmm5
-        	pcmpeqw	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqd	3735928559(%ebx,%ecx,8), %mm3
-        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pcmpeqd	69, %mm3
-        	pcmpeqd	0x45,%mm3
-
-// CHECK: 	pcmpeqd	32493, %mm3
-        	pcmpeqd	0x7eed,%mm3
-
-// CHECK: 	pcmpeqd	3133065982, %mm3
-        	pcmpeqd	0xbabecafe,%mm3
-
-// CHECK: 	pcmpeqd	305419896, %mm3
-        	pcmpeqd	0x12345678,%mm3
-
-// CHECK: 	pcmpeqd	%mm3, %mm3
-        	pcmpeqd	%mm3,%mm3
-
-// CHECK: 	pcmpeqd	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpeqd	69, %xmm5
-        	pcmpeqd	0x45,%xmm5
-
-// CHECK: 	pcmpeqd	32493, %xmm5
-        	pcmpeqd	0x7eed,%xmm5
-
-// CHECK: 	pcmpeqd	3133065982, %xmm5
-        	pcmpeqd	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpeqd	305419896, %xmm5
-        	pcmpeqd	0x12345678,%xmm5
-
-// CHECK: 	pcmpeqd	%xmm5, %xmm5
-        	pcmpeqd	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtb	3735928559(%ebx,%ecx,8), %mm3
-        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pcmpgtb	69, %mm3
-        	pcmpgtb	0x45,%mm3
-
-// CHECK: 	pcmpgtb	32493, %mm3
-        	pcmpgtb	0x7eed,%mm3
-
-// CHECK: 	pcmpgtb	3133065982, %mm3
-        	pcmpgtb	0xbabecafe,%mm3
-
-// CHECK: 	pcmpgtb	305419896, %mm3
-        	pcmpgtb	0x12345678,%mm3
-
-// CHECK: 	pcmpgtb	%mm3, %mm3
-        	pcmpgtb	%mm3,%mm3
-
-// CHECK: 	pcmpgtb	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpgtb	69, %xmm5
-        	pcmpgtb	0x45,%xmm5
-
-// CHECK: 	pcmpgtb	32493, %xmm5
-        	pcmpgtb	0x7eed,%xmm5
-
-// CHECK: 	pcmpgtb	3133065982, %xmm5
-        	pcmpgtb	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpgtb	305419896, %xmm5
-        	pcmpgtb	0x12345678,%xmm5
-
-// CHECK: 	pcmpgtb	%xmm5, %xmm5
-        	pcmpgtb	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtw	3735928559(%ebx,%ecx,8), %mm3
-        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pcmpgtw	69, %mm3
-        	pcmpgtw	0x45,%mm3
-
-// CHECK: 	pcmpgtw	32493, %mm3
-        	pcmpgtw	0x7eed,%mm3
-
-// CHECK: 	pcmpgtw	3133065982, %mm3
-        	pcmpgtw	0xbabecafe,%mm3
-
-// CHECK: 	pcmpgtw	305419896, %mm3
-        	pcmpgtw	0x12345678,%mm3
-
-// CHECK: 	pcmpgtw	%mm3, %mm3
-        	pcmpgtw	%mm3,%mm3
-
-// CHECK: 	pcmpgtw	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpgtw	69, %xmm5
-        	pcmpgtw	0x45,%xmm5
-
-// CHECK: 	pcmpgtw	32493, %xmm5
-        	pcmpgtw	0x7eed,%xmm5
-
-// CHECK: 	pcmpgtw	3133065982, %xmm5
-        	pcmpgtw	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpgtw	305419896, %xmm5
-        	pcmpgtw	0x12345678,%xmm5
-
-// CHECK: 	pcmpgtw	%xmm5, %xmm5
-        	pcmpgtw	%xmm5,%xmm5
-
-// CHECK: 	pcmpgtd	3735928559(%ebx,%ecx,8), %mm3
-        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pcmpgtd	69, %mm3
-        	pcmpgtd	0x45,%mm3
-
-// CHECK: 	pcmpgtd	32493, %mm3
-        	pcmpgtd	0x7eed,%mm3
-
-// CHECK: 	pcmpgtd	3133065982, %mm3
-        	pcmpgtd	0xbabecafe,%mm3
-
-// CHECK: 	pcmpgtd	305419896, %mm3
-        	pcmpgtd	0x12345678,%mm3
-
-// CHECK: 	pcmpgtd	%mm3, %mm3
-        	pcmpgtd	%mm3,%mm3
-
-// CHECK: 	pcmpgtd	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpgtd	69, %xmm5
-        	pcmpgtd	0x45,%xmm5
-
-// CHECK: 	pcmpgtd	32493, %xmm5
-        	pcmpgtd	0x7eed,%xmm5
-
-// CHECK: 	pcmpgtd	3133065982, %xmm5
-        	pcmpgtd	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpgtd	305419896, %xmm5
-        	pcmpgtd	0x12345678,%xmm5
-
-// CHECK: 	pcmpgtd	%xmm5, %xmm5
-        	pcmpgtd	%xmm5,%xmm5
-
-// CHECK: 	pmaddwd	3735928559(%ebx,%ecx,8), %mm3
-        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmaddwd	69, %mm3
-        	pmaddwd	0x45,%mm3
-
-// CHECK: 	pmaddwd	32493, %mm3
-        	pmaddwd	0x7eed,%mm3
-
-// CHECK: 	pmaddwd	3133065982, %mm3
-        	pmaddwd	0xbabecafe,%mm3
-
-// CHECK: 	pmaddwd	305419896, %mm3
-        	pmaddwd	0x12345678,%mm3
-
-// CHECK: 	pmaddwd	%mm3, %mm3
-        	pmaddwd	%mm3,%mm3
-
-// CHECK: 	pmaddwd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaddwd	69, %xmm5
-        	pmaddwd	0x45,%xmm5
-
-// CHECK: 	pmaddwd	32493, %xmm5
-        	pmaddwd	0x7eed,%xmm5
-
-// CHECK: 	pmaddwd	3133065982, %xmm5
-        	pmaddwd	0xbabecafe,%xmm5
-
-// CHECK: 	pmaddwd	305419896, %xmm5
-        	pmaddwd	0x12345678,%xmm5
-
-// CHECK: 	pmaddwd	%xmm5, %xmm5
-        	pmaddwd	%xmm5,%xmm5
-
-// CHECK: 	pmulhw	3735928559(%ebx,%ecx,8), %mm3
-        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmulhw	69, %mm3
-        	pmulhw	0x45,%mm3
-
-// CHECK: 	pmulhw	32493, %mm3
-        	pmulhw	0x7eed,%mm3
-
-// CHECK: 	pmulhw	3133065982, %mm3
-        	pmulhw	0xbabecafe,%mm3
-
-// CHECK: 	pmulhw	305419896, %mm3
-        	pmulhw	0x12345678,%mm3
-
-// CHECK: 	pmulhw	%mm3, %mm3
-        	pmulhw	%mm3,%mm3
-
-// CHECK: 	pmulhw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmulhw	69, %xmm5
-        	pmulhw	0x45,%xmm5
-
-// CHECK: 	pmulhw	32493, %xmm5
-        	pmulhw	0x7eed,%xmm5
-
-// CHECK: 	pmulhw	3133065982, %xmm5
-        	pmulhw	0xbabecafe,%xmm5
-
-// CHECK: 	pmulhw	305419896, %xmm5
-        	pmulhw	0x12345678,%xmm5
-
-// CHECK: 	pmulhw	%xmm5, %xmm5
-        	pmulhw	%xmm5,%xmm5
-
-// CHECK: 	pmullw	3735928559(%ebx,%ecx,8), %mm3
-        	pmullw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmullw	69, %mm3
-        	pmullw	0x45,%mm3
-
-// CHECK: 	pmullw	32493, %mm3
-        	pmullw	0x7eed,%mm3
-
-// CHECK: 	pmullw	3133065982, %mm3
-        	pmullw	0xbabecafe,%mm3
-
-// CHECK: 	pmullw	305419896, %mm3
-        	pmullw	0x12345678,%mm3
-
-// CHECK: 	pmullw	%mm3, %mm3
-        	pmullw	%mm3,%mm3
-
-// CHECK: 	pmullw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmullw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmullw	69, %xmm5
-        	pmullw	0x45,%xmm5
-
-// CHECK: 	pmullw	32493, %xmm5
-        	pmullw	0x7eed,%xmm5
-
-// CHECK: 	pmullw	3133065982, %xmm5
-        	pmullw	0xbabecafe,%xmm5
-
-// CHECK: 	pmullw	305419896, %xmm5
-        	pmullw	0x12345678,%xmm5
-
-// CHECK: 	pmullw	%xmm5, %xmm5
-        	pmullw	%xmm5,%xmm5
-
-// CHECK: 	por	3735928559(%ebx,%ecx,8), %mm3
-        	por	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	por	69, %mm3
-        	por	0x45,%mm3
-
-// CHECK: 	por	32493, %mm3
-        	por	0x7eed,%mm3
-
-// CHECK: 	por	3133065982, %mm3
-        	por	0xbabecafe,%mm3
-
-// CHECK: 	por	305419896, %mm3
-        	por	0x12345678,%mm3
-
-// CHECK: 	por	%mm3, %mm3
-        	por	%mm3,%mm3
-
-// CHECK: 	por	3735928559(%ebx,%ecx,8), %xmm5
-        	por	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	por	69, %xmm5
-        	por	0x45,%xmm5
-
-// CHECK: 	por	32493, %xmm5
-        	por	0x7eed,%xmm5
-
-// CHECK: 	por	3133065982, %xmm5
-        	por	0xbabecafe,%xmm5
-
-// CHECK: 	por	305419896, %xmm5
-        	por	0x12345678,%xmm5
-
-// CHECK: 	por	%xmm5, %xmm5
-        	por	%xmm5,%xmm5
-
-// CHECK: 	psllw	3735928559(%ebx,%ecx,8), %mm3
-        	psllw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psllw	69, %mm3
-        	psllw	0x45,%mm3
-
-// CHECK: 	psllw	32493, %mm3
-        	psllw	0x7eed,%mm3
-
-// CHECK: 	psllw	3133065982, %mm3
-        	psllw	0xbabecafe,%mm3
-
-// CHECK: 	psllw	305419896, %mm3
-        	psllw	0x12345678,%mm3
-
-// CHECK: 	psllw	%mm3, %mm3
-        	psllw	%mm3,%mm3
-
-// CHECK: 	psllw	3735928559(%ebx,%ecx,8), %xmm5
-        	psllw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psllw	69, %xmm5
-        	psllw	0x45,%xmm5
-
-// CHECK: 	psllw	32493, %xmm5
-        	psllw	0x7eed,%xmm5
-
-// CHECK: 	psllw	3133065982, %xmm5
-        	psllw	0xbabecafe,%xmm5
-
-// CHECK: 	psllw	305419896, %xmm5
-        	psllw	0x12345678,%xmm5
-
-// CHECK: 	psllw	%xmm5, %xmm5
-        	psllw	%xmm5,%xmm5
-
-// CHECK: 	psllw	$127, %mm3
-        	psllw	$0x7f,%mm3
-
-// CHECK: 	psllw	$127, %xmm5
-        	psllw	$0x7f,%xmm5
-
-// CHECK: 	pslld	3735928559(%ebx,%ecx,8), %mm3
-        	pslld	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pslld	69, %mm3
-        	pslld	0x45,%mm3
-
-// CHECK: 	pslld	32493, %mm3
-        	pslld	0x7eed,%mm3
-
-// CHECK: 	pslld	3133065982, %mm3
-        	pslld	0xbabecafe,%mm3
-
-// CHECK: 	pslld	305419896, %mm3
-        	pslld	0x12345678,%mm3
-
-// CHECK: 	pslld	%mm3, %mm3
-        	pslld	%mm3,%mm3
-
-// CHECK: 	pslld	3735928559(%ebx,%ecx,8), %xmm5
-        	pslld	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pslld	69, %xmm5
-        	pslld	0x45,%xmm5
-
-// CHECK: 	pslld	32493, %xmm5
-        	pslld	0x7eed,%xmm5
-
-// CHECK: 	pslld	3133065982, %xmm5
-        	pslld	0xbabecafe,%xmm5
-
-// CHECK: 	pslld	305419896, %xmm5
-        	pslld	0x12345678,%xmm5
-
-// CHECK: 	pslld	%xmm5, %xmm5
-        	pslld	%xmm5,%xmm5
-
-// CHECK: 	pslld	$127, %mm3
-        	pslld	$0x7f,%mm3
-
-// CHECK: 	pslld	$127, %xmm5
-        	pslld	$0x7f,%xmm5
-
-// CHECK: 	psllq	3735928559(%ebx,%ecx,8), %mm3
-        	psllq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psllq	69, %mm3
-        	psllq	0x45,%mm3
-
-// CHECK: 	psllq	32493, %mm3
-        	psllq	0x7eed,%mm3
-
-// CHECK: 	psllq	3133065982, %mm3
-        	psllq	0xbabecafe,%mm3
-
-// CHECK: 	psllq	305419896, %mm3
-        	psllq	0x12345678,%mm3
-
-// CHECK: 	psllq	%mm3, %mm3
-        	psllq	%mm3,%mm3
-
-// CHECK: 	psllq	3735928559(%ebx,%ecx,8), %xmm5
-        	psllq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psllq	69, %xmm5
-        	psllq	0x45,%xmm5
-
-// CHECK: 	psllq	32493, %xmm5
-        	psllq	0x7eed,%xmm5
-
-// CHECK: 	psllq	3133065982, %xmm5
-        	psllq	0xbabecafe,%xmm5
-
-// CHECK: 	psllq	305419896, %xmm5
-        	psllq	0x12345678,%xmm5
-
-// CHECK: 	psllq	%xmm5, %xmm5
-        	psllq	%xmm5,%xmm5
-
-// CHECK: 	psllq	$127, %mm3
-        	psllq	$0x7f,%mm3
-
-// CHECK: 	psllq	$127, %xmm5
-        	psllq	$0x7f,%xmm5
-
-// CHECK: 	psraw	3735928559(%ebx,%ecx,8), %mm3
-        	psraw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psraw	69, %mm3
-        	psraw	0x45,%mm3
-
-// CHECK: 	psraw	32493, %mm3
-        	psraw	0x7eed,%mm3
-
-// CHECK: 	psraw	3133065982, %mm3
-        	psraw	0xbabecafe,%mm3
-
-// CHECK: 	psraw	305419896, %mm3
-        	psraw	0x12345678,%mm3
-
-// CHECK: 	psraw	%mm3, %mm3
-        	psraw	%mm3,%mm3
-
-// CHECK: 	psraw	3735928559(%ebx,%ecx,8), %xmm5
-        	psraw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psraw	69, %xmm5
-        	psraw	0x45,%xmm5
-
-// CHECK: 	psraw	32493, %xmm5
-        	psraw	0x7eed,%xmm5
-
-// CHECK: 	psraw	3133065982, %xmm5
-        	psraw	0xbabecafe,%xmm5
-
-// CHECK: 	psraw	305419896, %xmm5
-        	psraw	0x12345678,%xmm5
-
-// CHECK: 	psraw	%xmm5, %xmm5
-        	psraw	%xmm5,%xmm5
-
-// CHECK: 	psraw	$127, %mm3
-        	psraw	$0x7f,%mm3
-
-// CHECK: 	psraw	$127, %xmm5
-        	psraw	$0x7f,%xmm5
-
-// CHECK: 	psrad	3735928559(%ebx,%ecx,8), %mm3
-        	psrad	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psrad	69, %mm3
-        	psrad	0x45,%mm3
-
-// CHECK: 	psrad	32493, %mm3
-        	psrad	0x7eed,%mm3
-
-// CHECK: 	psrad	3133065982, %mm3
-        	psrad	0xbabecafe,%mm3
-
-// CHECK: 	psrad	305419896, %mm3
-        	psrad	0x12345678,%mm3
-
-// CHECK: 	psrad	%mm3, %mm3
-        	psrad	%mm3,%mm3
-
-// CHECK: 	psrad	3735928559(%ebx,%ecx,8), %xmm5
-        	psrad	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psrad	69, %xmm5
-        	psrad	0x45,%xmm5
-
-// CHECK: 	psrad	32493, %xmm5
-        	psrad	0x7eed,%xmm5
-
-// CHECK: 	psrad	3133065982, %xmm5
-        	psrad	0xbabecafe,%xmm5
-
-// CHECK: 	psrad	305419896, %xmm5
-        	psrad	0x12345678,%xmm5
-
-// CHECK: 	psrad	%xmm5, %xmm5
-        	psrad	%xmm5,%xmm5
-
-// CHECK: 	psrad	$127, %mm3
-        	psrad	$0x7f,%mm3
-
-// CHECK: 	psrad	$127, %xmm5
-        	psrad	$0x7f,%xmm5
-
-// CHECK: 	psrlw	3735928559(%ebx,%ecx,8), %mm3
-        	psrlw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psrlw	69, %mm3
-        	psrlw	0x45,%mm3
-
-// CHECK: 	psrlw	32493, %mm3
-        	psrlw	0x7eed,%mm3
-
-// CHECK: 	psrlw	3133065982, %mm3
-        	psrlw	0xbabecafe,%mm3
-
-// CHECK: 	psrlw	305419896, %mm3
-        	psrlw	0x12345678,%mm3
-
-// CHECK: 	psrlw	%mm3, %mm3
-        	psrlw	%mm3,%mm3
-
-// CHECK: 	psrlw	3735928559(%ebx,%ecx,8), %xmm5
-        	psrlw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psrlw	69, %xmm5
-        	psrlw	0x45,%xmm5
-
-// CHECK: 	psrlw	32493, %xmm5
-        	psrlw	0x7eed,%xmm5
-
-// CHECK: 	psrlw	3133065982, %xmm5
-        	psrlw	0xbabecafe,%xmm5
-
-// CHECK: 	psrlw	305419896, %xmm5
-        	psrlw	0x12345678,%xmm5
-
-// CHECK: 	psrlw	%xmm5, %xmm5
-        	psrlw	%xmm5,%xmm5
-
-// CHECK: 	psrlw	$127, %mm3
-        	psrlw	$0x7f,%mm3
-
-// CHECK: 	psrlw	$127, %xmm5
-        	psrlw	$0x7f,%xmm5
-
-// CHECK: 	psrld	3735928559(%ebx,%ecx,8), %mm3
-        	psrld	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psrld	69, %mm3
-        	psrld	0x45,%mm3
-
-// CHECK: 	psrld	32493, %mm3
-        	psrld	0x7eed,%mm3
-
-// CHECK: 	psrld	3133065982, %mm3
-        	psrld	0xbabecafe,%mm3
-
-// CHECK: 	psrld	305419896, %mm3
-        	psrld	0x12345678,%mm3
-
-// CHECK: 	psrld	%mm3, %mm3
-        	psrld	%mm3,%mm3
-
-// CHECK: 	psrld	3735928559(%ebx,%ecx,8), %xmm5
-        	psrld	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psrld	69, %xmm5
-        	psrld	0x45,%xmm5
-
-// CHECK: 	psrld	32493, %xmm5
-        	psrld	0x7eed,%xmm5
-
-// CHECK: 	psrld	3133065982, %xmm5
-        	psrld	0xbabecafe,%xmm5
-
-// CHECK: 	psrld	305419896, %xmm5
-        	psrld	0x12345678,%xmm5
-
-// CHECK: 	psrld	%xmm5, %xmm5
-        	psrld	%xmm5,%xmm5
-
-// CHECK: 	psrld	$127, %mm3
-        	psrld	$0x7f,%mm3
-
-// CHECK: 	psrld	$127, %xmm5
-        	psrld	$0x7f,%xmm5
-
-// CHECK: 	psrlq	3735928559(%ebx,%ecx,8), %mm3
-        	psrlq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psrlq	69, %mm3
-        	psrlq	0x45,%mm3
-
-// CHECK: 	psrlq	32493, %mm3
-        	psrlq	0x7eed,%mm3
-
-// CHECK: 	psrlq	3133065982, %mm3
-        	psrlq	0xbabecafe,%mm3
-
-// CHECK: 	psrlq	305419896, %mm3
-        	psrlq	0x12345678,%mm3
-
-// CHECK: 	psrlq	%mm3, %mm3
-        	psrlq	%mm3,%mm3
-
-// CHECK: 	psrlq	3735928559(%ebx,%ecx,8), %xmm5
-        	psrlq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psrlq	69, %xmm5
-        	psrlq	0x45,%xmm5
-
-// CHECK: 	psrlq	32493, %xmm5
-        	psrlq	0x7eed,%xmm5
-
-// CHECK: 	psrlq	3133065982, %xmm5
-        	psrlq	0xbabecafe,%xmm5
-
-// CHECK: 	psrlq	305419896, %xmm5
-        	psrlq	0x12345678,%xmm5
-
-// CHECK: 	psrlq	%xmm5, %xmm5
-        	psrlq	%xmm5,%xmm5
-
-// CHECK: 	psrlq	$127, %mm3
-        	psrlq	$0x7f,%mm3
-
-// CHECK: 	psrlq	$127, %xmm5
-        	psrlq	$0x7f,%xmm5
-
-// CHECK: 	psubb	3735928559(%ebx,%ecx,8), %mm3
-        	psubb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubb	69, %mm3
-        	psubb	0x45,%mm3
-
-// CHECK: 	psubb	32493, %mm3
-        	psubb	0x7eed,%mm3
-
-// CHECK: 	psubb	3133065982, %mm3
-        	psubb	0xbabecafe,%mm3
-
-// CHECK: 	psubb	305419896, %mm3
-        	psubb	0x12345678,%mm3
-
-// CHECK: 	psubb	%mm3, %mm3
-        	psubb	%mm3,%mm3
-
-// CHECK: 	psubb	3735928559(%ebx,%ecx,8), %xmm5
-        	psubb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubb	69, %xmm5
-        	psubb	0x45,%xmm5
-
-// CHECK: 	psubb	32493, %xmm5
-        	psubb	0x7eed,%xmm5
-
-// CHECK: 	psubb	3133065982, %xmm5
-        	psubb	0xbabecafe,%xmm5
-
-// CHECK: 	psubb	305419896, %xmm5
-        	psubb	0x12345678,%xmm5
-
-// CHECK: 	psubb	%xmm5, %xmm5
-        	psubb	%xmm5,%xmm5
-
-// CHECK: 	psubw	3735928559(%ebx,%ecx,8), %mm3
-        	psubw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubw	69, %mm3
-        	psubw	0x45,%mm3
-
-// CHECK: 	psubw	32493, %mm3
-        	psubw	0x7eed,%mm3
-
-// CHECK: 	psubw	3133065982, %mm3
-        	psubw	0xbabecafe,%mm3
-
-// CHECK: 	psubw	305419896, %mm3
-        	psubw	0x12345678,%mm3
-
-// CHECK: 	psubw	%mm3, %mm3
-        	psubw	%mm3,%mm3
-
-// CHECK: 	psubw	3735928559(%ebx,%ecx,8), %xmm5
-        	psubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubw	69, %xmm5
-        	psubw	0x45,%xmm5
-
-// CHECK: 	psubw	32493, %xmm5
-        	psubw	0x7eed,%xmm5
-
-// CHECK: 	psubw	3133065982, %xmm5
-        	psubw	0xbabecafe,%xmm5
-
-// CHECK: 	psubw	305419896, %xmm5
-        	psubw	0x12345678,%xmm5
-
-// CHECK: 	psubw	%xmm5, %xmm5
-        	psubw	%xmm5,%xmm5
-
-// CHECK: 	psubd	3735928559(%ebx,%ecx,8), %mm3
-        	psubd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubd	69, %mm3
-        	psubd	0x45,%mm3
-
-// CHECK: 	psubd	32493, %mm3
-        	psubd	0x7eed,%mm3
-
-// CHECK: 	psubd	3133065982, %mm3
-        	psubd	0xbabecafe,%mm3
-
-// CHECK: 	psubd	305419896, %mm3
-        	psubd	0x12345678,%mm3
-
-// CHECK: 	psubd	%mm3, %mm3
-        	psubd	%mm3,%mm3
-
-// CHECK: 	psubd	3735928559(%ebx,%ecx,8), %xmm5
-        	psubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubd	69, %xmm5
-        	psubd	0x45,%xmm5
-
-// CHECK: 	psubd	32493, %xmm5
-        	psubd	0x7eed,%xmm5
-
-// CHECK: 	psubd	3133065982, %xmm5
-        	psubd	0xbabecafe,%xmm5
-
-// CHECK: 	psubd	305419896, %xmm5
-        	psubd	0x12345678,%xmm5
-
-// CHECK: 	psubd	%xmm5, %xmm5
-        	psubd	%xmm5,%xmm5
-
-// CHECK: 	psubq	3735928559(%ebx,%ecx,8), %mm3
-        	psubq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubq	69, %mm3
-        	psubq	0x45,%mm3
-
-// CHECK: 	psubq	32493, %mm3
-        	psubq	0x7eed,%mm3
-
-// CHECK: 	psubq	3133065982, %mm3
-        	psubq	0xbabecafe,%mm3
-
-// CHECK: 	psubq	305419896, %mm3
-        	psubq	0x12345678,%mm3
-
-// CHECK: 	psubq	%mm3, %mm3
-        	psubq	%mm3,%mm3
-
-// CHECK: 	psubq	3735928559(%ebx,%ecx,8), %xmm5
-        	psubq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubq	69, %xmm5
-        	psubq	0x45,%xmm5
-
-// CHECK: 	psubq	32493, %xmm5
-        	psubq	0x7eed,%xmm5
-
-// CHECK: 	psubq	3133065982, %xmm5
-        	psubq	0xbabecafe,%xmm5
-
-// CHECK: 	psubq	305419896, %xmm5
-        	psubq	0x12345678,%xmm5
-
-// CHECK: 	psubq	%xmm5, %xmm5
-        	psubq	%xmm5,%xmm5
-
-// CHECK: 	psubsb	3735928559(%ebx,%ecx,8), %mm3
-        	psubsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubsb	69, %mm3
-        	psubsb	0x45,%mm3
-
-// CHECK: 	psubsb	32493, %mm3
-        	psubsb	0x7eed,%mm3
-
-// CHECK: 	psubsb	3133065982, %mm3
-        	psubsb	0xbabecafe,%mm3
-
-// CHECK: 	psubsb	305419896, %mm3
-        	psubsb	0x12345678,%mm3
-
-// CHECK: 	psubsb	%mm3, %mm3
-        	psubsb	%mm3,%mm3
-
-// CHECK: 	psubsb	3735928559(%ebx,%ecx,8), %xmm5
-        	psubsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubsb	69, %xmm5
-        	psubsb	0x45,%xmm5
-
-// CHECK: 	psubsb	32493, %xmm5
-        	psubsb	0x7eed,%xmm5
-
-// CHECK: 	psubsb	3133065982, %xmm5
-        	psubsb	0xbabecafe,%xmm5
-
-// CHECK: 	psubsb	305419896, %xmm5
-        	psubsb	0x12345678,%xmm5
-
-// CHECK: 	psubsb	%xmm5, %xmm5
-        	psubsb	%xmm5,%xmm5
-
-// CHECK: 	psubsw	3735928559(%ebx,%ecx,8), %mm3
-        	psubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubsw	69, %mm3
-        	psubsw	0x45,%mm3
-
-// CHECK: 	psubsw	32493, %mm3
-        	psubsw	0x7eed,%mm3
-
-// CHECK: 	psubsw	3133065982, %mm3
-        	psubsw	0xbabecafe,%mm3
-
-// CHECK: 	psubsw	305419896, %mm3
-        	psubsw	0x12345678,%mm3
-
-// CHECK: 	psubsw	%mm3, %mm3
-        	psubsw	%mm3,%mm3
-
-// CHECK: 	psubsw	3735928559(%ebx,%ecx,8), %xmm5
-        	psubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubsw	69, %xmm5
-        	psubsw	0x45,%xmm5
-
-// CHECK: 	psubsw	32493, %xmm5
-        	psubsw	0x7eed,%xmm5
-
-// CHECK: 	psubsw	3133065982, %xmm5
-        	psubsw	0xbabecafe,%xmm5
-
-// CHECK: 	psubsw	305419896, %xmm5
-        	psubsw	0x12345678,%xmm5
-
-// CHECK: 	psubsw	%xmm5, %xmm5
-        	psubsw	%xmm5,%xmm5
-
-// CHECK: 	psubusb	3735928559(%ebx,%ecx,8), %mm3
-        	psubusb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubusb	69, %mm3
-        	psubusb	0x45,%mm3
-
-// CHECK: 	psubusb	32493, %mm3
-        	psubusb	0x7eed,%mm3
-
-// CHECK: 	psubusb	3133065982, %mm3
-        	psubusb	0xbabecafe,%mm3
-
-// CHECK: 	psubusb	305419896, %mm3
-        	psubusb	0x12345678,%mm3
-
-// CHECK: 	psubusb	%mm3, %mm3
-        	psubusb	%mm3,%mm3
-
-// CHECK: 	psubusb	3735928559(%ebx,%ecx,8), %xmm5
-        	psubusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubusb	69, %xmm5
-        	psubusb	0x45,%xmm5
-
-// CHECK: 	psubusb	32493, %xmm5
-        	psubusb	0x7eed,%xmm5
-
-// CHECK: 	psubusb	3133065982, %xmm5
-        	psubusb	0xbabecafe,%xmm5
-
-// CHECK: 	psubusb	305419896, %xmm5
-        	psubusb	0x12345678,%xmm5
-
-// CHECK: 	psubusb	%xmm5, %xmm5
-        	psubusb	%xmm5,%xmm5
-
-// CHECK: 	psubusw	3735928559(%ebx,%ecx,8), %mm3
-        	psubusw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psubusw	69, %mm3
-        	psubusw	0x45,%mm3
-
-// CHECK: 	psubusw	32493, %mm3
-        	psubusw	0x7eed,%mm3
-
-// CHECK: 	psubusw	3133065982, %mm3
-        	psubusw	0xbabecafe,%mm3
-
-// CHECK: 	psubusw	305419896, %mm3
-        	psubusw	0x12345678,%mm3
-
-// CHECK: 	psubusw	%mm3, %mm3
-        	psubusw	%mm3,%mm3
-
-// CHECK: 	psubusw	3735928559(%ebx,%ecx,8), %xmm5
-        	psubusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psubusw	69, %xmm5
-        	psubusw	0x45,%xmm5
-
-// CHECK: 	psubusw	32493, %xmm5
-        	psubusw	0x7eed,%xmm5
-
-// CHECK: 	psubusw	3133065982, %xmm5
-        	psubusw	0xbabecafe,%xmm5
-
-// CHECK: 	psubusw	305419896, %xmm5
-        	psubusw	0x12345678,%xmm5
-
-// CHECK: 	psubusw	%xmm5, %xmm5
-        	psubusw	%xmm5,%xmm5
-
-// CHECK: 	punpckhbw	3735928559(%ebx,%ecx,8), %mm3
-        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	punpckhbw	69, %mm3
-        	punpckhbw	0x45,%mm3
-
-// CHECK: 	punpckhbw	32493, %mm3
-        	punpckhbw	0x7eed,%mm3
-
-// CHECK: 	punpckhbw	3133065982, %mm3
-        	punpckhbw	0xbabecafe,%mm3
-
-// CHECK: 	punpckhbw	305419896, %mm3
-        	punpckhbw	0x12345678,%mm3
-
-// CHECK: 	punpckhbw	%mm3, %mm3
-        	punpckhbw	%mm3,%mm3
-
-// CHECK: 	punpckhbw	3735928559(%ebx,%ecx,8), %xmm5
-        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpckhbw	69, %xmm5
-        	punpckhbw	0x45,%xmm5
-
-// CHECK: 	punpckhbw	32493, %xmm5
-        	punpckhbw	0x7eed,%xmm5
-
-// CHECK: 	punpckhbw	3133065982, %xmm5
-        	punpckhbw	0xbabecafe,%xmm5
-
-// CHECK: 	punpckhbw	305419896, %xmm5
-        	punpckhbw	0x12345678,%xmm5
-
-// CHECK: 	punpckhbw	%xmm5, %xmm5
-        	punpckhbw	%xmm5,%xmm5
-
-// CHECK: 	punpckhwd	3735928559(%ebx,%ecx,8), %mm3
-        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	punpckhwd	69, %mm3
-        	punpckhwd	0x45,%mm3
-
-// CHECK: 	punpckhwd	32493, %mm3
-        	punpckhwd	0x7eed,%mm3
-
-// CHECK: 	punpckhwd	3133065982, %mm3
-        	punpckhwd	0xbabecafe,%mm3
-
-// CHECK: 	punpckhwd	305419896, %mm3
-        	punpckhwd	0x12345678,%mm3
-
-// CHECK: 	punpckhwd	%mm3, %mm3
-        	punpckhwd	%mm3,%mm3
-
-// CHECK: 	punpckhwd	3735928559(%ebx,%ecx,8), %xmm5
-        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpckhwd	69, %xmm5
-        	punpckhwd	0x45,%xmm5
-
-// CHECK: 	punpckhwd	32493, %xmm5
-        	punpckhwd	0x7eed,%xmm5
-
-// CHECK: 	punpckhwd	3133065982, %xmm5
-        	punpckhwd	0xbabecafe,%xmm5
-
-// CHECK: 	punpckhwd	305419896, %xmm5
-        	punpckhwd	0x12345678,%xmm5
-
-// CHECK: 	punpckhwd	%xmm5, %xmm5
-        	punpckhwd	%xmm5,%xmm5
-
-// CHECK: 	punpckhdq	3735928559(%ebx,%ecx,8), %mm3
-        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	punpckhdq	69, %mm3
-        	punpckhdq	0x45,%mm3
-
-// CHECK: 	punpckhdq	32493, %mm3
-        	punpckhdq	0x7eed,%mm3
-
-// CHECK: 	punpckhdq	3133065982, %mm3
-        	punpckhdq	0xbabecafe,%mm3
-
-// CHECK: 	punpckhdq	305419896, %mm3
-        	punpckhdq	0x12345678,%mm3
-
-// CHECK: 	punpckhdq	%mm3, %mm3
-        	punpckhdq	%mm3,%mm3
-
-// CHECK: 	punpckhdq	3735928559(%ebx,%ecx,8), %xmm5
-        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpckhdq	69, %xmm5
-        	punpckhdq	0x45,%xmm5
-
-// CHECK: 	punpckhdq	32493, %xmm5
-        	punpckhdq	0x7eed,%xmm5
-
-// CHECK: 	punpckhdq	3133065982, %xmm5
-        	punpckhdq	0xbabecafe,%xmm5
-
-// CHECK: 	punpckhdq	305419896, %xmm5
-        	punpckhdq	0x12345678,%xmm5
-
-// CHECK: 	punpckhdq	%xmm5, %xmm5
-        	punpckhdq	%xmm5,%xmm5
-
-// CHECK: 	punpcklbw	3735928559(%ebx,%ecx,8), %mm3
-        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	punpcklbw	69, %mm3
-        	punpcklbw	0x45,%mm3
-
-// CHECK: 	punpcklbw	32493, %mm3
-        	punpcklbw	0x7eed,%mm3
-
-// CHECK: 	punpcklbw	3133065982, %mm3
-        	punpcklbw	0xbabecafe,%mm3
-
-// CHECK: 	punpcklbw	305419896, %mm3
-        	punpcklbw	0x12345678,%mm3
-
-// CHECK: 	punpcklbw	%mm3, %mm3
-        	punpcklbw	%mm3,%mm3
-
-// CHECK: 	punpcklbw	3735928559(%ebx,%ecx,8), %xmm5
-        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpcklbw	69, %xmm5
-        	punpcklbw	0x45,%xmm5
-
-// CHECK: 	punpcklbw	32493, %xmm5
-        	punpcklbw	0x7eed,%xmm5
-
-// CHECK: 	punpcklbw	3133065982, %xmm5
-        	punpcklbw	0xbabecafe,%xmm5
-
-// CHECK: 	punpcklbw	305419896, %xmm5
-        	punpcklbw	0x12345678,%xmm5
-
-// CHECK: 	punpcklbw	%xmm5, %xmm5
-        	punpcklbw	%xmm5,%xmm5
-
-// CHECK: 	punpcklwd	3735928559(%ebx,%ecx,8), %mm3
-        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	punpcklwd	69, %mm3
-        	punpcklwd	0x45,%mm3
-
-// CHECK: 	punpcklwd	32493, %mm3
-        	punpcklwd	0x7eed,%mm3
-
-// CHECK: 	punpcklwd	3133065982, %mm3
-        	punpcklwd	0xbabecafe,%mm3
-
-// CHECK: 	punpcklwd	305419896, %mm3
-        	punpcklwd	0x12345678,%mm3
-
-// CHECK: 	punpcklwd	%mm3, %mm3
-        	punpcklwd	%mm3,%mm3
-
-// CHECK: 	punpcklwd	3735928559(%ebx,%ecx,8), %xmm5
-        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpcklwd	69, %xmm5
-        	punpcklwd	0x45,%xmm5
-
-// CHECK: 	punpcklwd	32493, %xmm5
-        	punpcklwd	0x7eed,%xmm5
-
-// CHECK: 	punpcklwd	3133065982, %xmm5
-        	punpcklwd	0xbabecafe,%xmm5
-
-// CHECK: 	punpcklwd	305419896, %xmm5
-        	punpcklwd	0x12345678,%xmm5
-
-// CHECK: 	punpcklwd	%xmm5, %xmm5
-        	punpcklwd	%xmm5,%xmm5
-
-// CHECK: 	punpckldq	3735928559(%ebx,%ecx,8), %mm3
-        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	punpckldq	69, %mm3
-        	punpckldq	0x45,%mm3
-
-// CHECK: 	punpckldq	32493, %mm3
-        	punpckldq	0x7eed,%mm3
-
-// CHECK: 	punpckldq	3133065982, %mm3
-        	punpckldq	0xbabecafe,%mm3
-
-// CHECK: 	punpckldq	305419896, %mm3
-        	punpckldq	0x12345678,%mm3
-
-// CHECK: 	punpckldq	%mm3, %mm3
-        	punpckldq	%mm3,%mm3
-
-// CHECK: 	punpckldq	3735928559(%ebx,%ecx,8), %xmm5
-        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpckldq	69, %xmm5
-        	punpckldq	0x45,%xmm5
-
-// CHECK: 	punpckldq	32493, %xmm5
-        	punpckldq	0x7eed,%xmm5
-
-// CHECK: 	punpckldq	3133065982, %xmm5
-        	punpckldq	0xbabecafe,%xmm5
-
-// CHECK: 	punpckldq	305419896, %xmm5
-        	punpckldq	0x12345678,%xmm5
-
-// CHECK: 	punpckldq	%xmm5, %xmm5
-        	punpckldq	%xmm5,%xmm5
-
-// CHECK: 	pxor	3735928559(%ebx,%ecx,8), %mm3
-        	pxor	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pxor	69, %mm3
-        	pxor	0x45,%mm3
-
-// CHECK: 	pxor	32493, %mm3
-        	pxor	0x7eed,%mm3
-
-// CHECK: 	pxor	3133065982, %mm3
-        	pxor	0xbabecafe,%mm3
-
-// CHECK: 	pxor	305419896, %mm3
-        	pxor	0x12345678,%mm3
-
-// CHECK: 	pxor	%mm3, %mm3
-        	pxor	%mm3,%mm3
-
-// CHECK: 	pxor	3735928559(%ebx,%ecx,8), %xmm5
-        	pxor	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pxor	69, %xmm5
-        	pxor	0x45,%xmm5
-
-// CHECK: 	pxor	32493, %xmm5
-        	pxor	0x7eed,%xmm5
-
-// CHECK: 	pxor	3133065982, %xmm5
-        	pxor	0xbabecafe,%xmm5
-
-// CHECK: 	pxor	305419896, %xmm5
-        	pxor	0x12345678,%xmm5
-
-// CHECK: 	pxor	%xmm5, %xmm5
-        	pxor	%xmm5,%xmm5
-
-// CHECK: 	addps	3735928559(%ebx,%ecx,8), %xmm5
-        	addps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	addps	69, %xmm5
-        	addps	0x45,%xmm5
-
-// CHECK: 	addps	32493, %xmm5
-        	addps	0x7eed,%xmm5
-
-// CHECK: 	addps	3133065982, %xmm5
-        	addps	0xbabecafe,%xmm5
-
-// CHECK: 	addps	305419896, %xmm5
-        	addps	0x12345678,%xmm5
-
-// CHECK: 	addps	%xmm5, %xmm5
-        	addps	%xmm5,%xmm5
-
-// CHECK: 	addss	3735928559(%ebx,%ecx,8), %xmm5
-        	addss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	addss	69, %xmm5
-        	addss	0x45,%xmm5
-
-// CHECK: 	addss	32493, %xmm5
-        	addss	0x7eed,%xmm5
-
-// CHECK: 	addss	3133065982, %xmm5
-        	addss	0xbabecafe,%xmm5
-
-// CHECK: 	addss	305419896, %xmm5
-        	addss	0x12345678,%xmm5
-
-// CHECK: 	addss	%xmm5, %xmm5
-        	addss	%xmm5,%xmm5
-
-// CHECK: 	andnps	3735928559(%ebx,%ecx,8), %xmm5
-        	andnps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	andnps	69, %xmm5
-        	andnps	0x45,%xmm5
-
-// CHECK: 	andnps	32493, %xmm5
-        	andnps	0x7eed,%xmm5
-
-// CHECK: 	andnps	3133065982, %xmm5
-        	andnps	0xbabecafe,%xmm5
-
-// CHECK: 	andnps	305419896, %xmm5
-        	andnps	0x12345678,%xmm5
-
-// CHECK: 	andnps	%xmm5, %xmm5
-        	andnps	%xmm5,%xmm5
-
-// CHECK: 	andps	3735928559(%ebx,%ecx,8), %xmm5
-        	andps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	andps	69, %xmm5
-        	andps	0x45,%xmm5
-
-// CHECK: 	andps	32493, %xmm5
-        	andps	0x7eed,%xmm5
-
-// CHECK: 	andps	3133065982, %xmm5
-        	andps	0xbabecafe,%xmm5
-
-// CHECK: 	andps	305419896, %xmm5
-        	andps	0x12345678,%xmm5
-
-// CHECK: 	andps	%xmm5, %xmm5
-        	andps	%xmm5,%xmm5
-
-// CHECK: 	comiss	3735928559(%ebx,%ecx,8), %xmm5
-        	comiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	comiss	69, %xmm5
-        	comiss	0x45,%xmm5
-
-// CHECK: 	comiss	32493, %xmm5
-        	comiss	0x7eed,%xmm5
-
-// CHECK: 	comiss	3133065982, %xmm5
-        	comiss	0xbabecafe,%xmm5
-
-// CHECK: 	comiss	305419896, %xmm5
-        	comiss	0x12345678,%xmm5
-
-// CHECK: 	comiss	%xmm5, %xmm5
-        	comiss	%xmm5,%xmm5
-
-// CHECK: 	cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpi2ps	69, %xmm5
-        	cvtpi2ps	0x45,%xmm5
-
-// CHECK: 	cvtpi2ps	32493, %xmm5
-        	cvtpi2ps	0x7eed,%xmm5
-
-// CHECK: 	cvtpi2ps	3133065982, %xmm5
-        	cvtpi2ps	0xbabecafe,%xmm5
-
-// CHECK: 	cvtpi2ps	305419896, %xmm5
-        	cvtpi2ps	0x12345678,%xmm5
-
-// CHECK: 	cvtpi2ps	%mm3, %xmm5
-        	cvtpi2ps	%mm3,%xmm5
-
-// CHECK: 	cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvtps2pi	69, %mm3
-        	cvtps2pi	0x45,%mm3
-
-// CHECK: 	cvtps2pi	32493, %mm3
-        	cvtps2pi	0x7eed,%mm3
-
-// CHECK: 	cvtps2pi	3133065982, %mm3
-        	cvtps2pi	0xbabecafe,%mm3
-
-// CHECK: 	cvtps2pi	305419896, %mm3
-        	cvtps2pi	0x12345678,%mm3
-
-// CHECK: 	cvtps2pi	%xmm5, %mm3
-        	cvtps2pi	%xmm5,%mm3
-
-// CHECK: 	cvtsi2ss	%ecx, %xmm5
-        	cvtsi2ss	%ecx,%xmm5
-
-// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtsi2ss	69, %xmm5
-        	cvtsi2ss	0x45,%xmm5
-
-// CHECK: 	cvtsi2ss	32493, %xmm5
-        	cvtsi2ss	0x7eed,%xmm5
-
-// CHECK: 	cvtsi2ss	3133065982, %xmm5
-        	cvtsi2ss	0xbabecafe,%xmm5
-
-// CHECK: 	cvtsi2ss	305419896, %xmm5
-        	cvtsi2ss	0x12345678,%xmm5
-
-// CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvttps2pi	69, %mm3
-        	cvttps2pi	0x45,%mm3
-
-// CHECK: 	cvttps2pi	32493, %mm3
-        	cvttps2pi	0x7eed,%mm3
-
-// CHECK: 	cvttps2pi	3133065982, %mm3
-        	cvttps2pi	0xbabecafe,%mm3
-
-// CHECK: 	cvttps2pi	305419896, %mm3
-        	cvttps2pi	0x12345678,%mm3
-
-// CHECK: 	cvttps2pi	%xmm5, %mm3
-        	cvttps2pi	%xmm5,%mm3
-
-// CHECK: 	cvttss2si	3735928559(%ebx,%ecx,8), %ecx
-        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	cvttss2si	69, %ecx
-        	cvttss2si	0x45,%ecx
-
-// CHECK: 	cvttss2si	32493, %ecx
-        	cvttss2si	0x7eed,%ecx
-
-// CHECK: 	cvttss2si	3133065982, %ecx
-        	cvttss2si	0xbabecafe,%ecx
-
-// CHECK: 	cvttss2si	305419896, %ecx
-        	cvttss2si	0x12345678,%ecx
-
-// CHECK: 	cvttss2si	%xmm5, %ecx
-        	cvttss2si	%xmm5,%ecx
-
-// CHECK: 	divps	3735928559(%ebx,%ecx,8), %xmm5
-        	divps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	divps	69, %xmm5
-        	divps	0x45,%xmm5
-
-// CHECK: 	divps	32493, %xmm5
-        	divps	0x7eed,%xmm5
-
-// CHECK: 	divps	3133065982, %xmm5
-        	divps	0xbabecafe,%xmm5
-
-// CHECK: 	divps	305419896, %xmm5
-        	divps	0x12345678,%xmm5
-
-// CHECK: 	divps	%xmm5, %xmm5
-        	divps	%xmm5,%xmm5
-
-// CHECK: 	divss	3735928559(%ebx,%ecx,8), %xmm5
-        	divss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	divss	69, %xmm5
-        	divss	0x45,%xmm5
-
-// CHECK: 	divss	32493, %xmm5
-        	divss	0x7eed,%xmm5
-
-// CHECK: 	divss	3133065982, %xmm5
-        	divss	0xbabecafe,%xmm5
-
-// CHECK: 	divss	305419896, %xmm5
-        	divss	0x12345678,%xmm5
-
-// CHECK: 	divss	%xmm5, %xmm5
-        	divss	%xmm5,%xmm5
-
-// CHECK: 	ldmxcsr	3735928559(%ebx,%ecx,8)
-        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	ldmxcsr	32493
-        	ldmxcsr	0x7eed
-
-// CHECK: 	ldmxcsr	3133065982
-        	ldmxcsr	0xbabecafe
-
-// CHECK: 	ldmxcsr	305419896
-        	ldmxcsr	0x12345678
-
-// CHECK: 	maskmovq	%mm3, %mm3
-        	maskmovq	%mm3,%mm3
-
-// CHECK: 	maxps	3735928559(%ebx,%ecx,8), %xmm5
-        	maxps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	maxps	69, %xmm5
-        	maxps	0x45,%xmm5
-
-// CHECK: 	maxps	32493, %xmm5
-        	maxps	0x7eed,%xmm5
-
-// CHECK: 	maxps	3133065982, %xmm5
-        	maxps	0xbabecafe,%xmm5
-
-// CHECK: 	maxps	305419896, %xmm5
-        	maxps	0x12345678,%xmm5
-
-// CHECK: 	maxps	%xmm5, %xmm5
-        	maxps	%xmm5,%xmm5
-
-// CHECK: 	maxss	3735928559(%ebx,%ecx,8), %xmm5
-        	maxss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	maxss	69, %xmm5
-        	maxss	0x45,%xmm5
-
-// CHECK: 	maxss	32493, %xmm5
-        	maxss	0x7eed,%xmm5
-
-// CHECK: 	maxss	3133065982, %xmm5
-        	maxss	0xbabecafe,%xmm5
-
-// CHECK: 	maxss	305419896, %xmm5
-        	maxss	0x12345678,%xmm5
-
-// CHECK: 	maxss	%xmm5, %xmm5
-        	maxss	%xmm5,%xmm5
-
-// CHECK: 	minps	3735928559(%ebx,%ecx,8), %xmm5
-        	minps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	minps	69, %xmm5
-        	minps	0x45,%xmm5
-
-// CHECK: 	minps	32493, %xmm5
-        	minps	0x7eed,%xmm5
-
-// CHECK: 	minps	3133065982, %xmm5
-        	minps	0xbabecafe,%xmm5
-
-// CHECK: 	minps	305419896, %xmm5
-        	minps	0x12345678,%xmm5
-
-// CHECK: 	minps	%xmm5, %xmm5
-        	minps	%xmm5,%xmm5
-
-// CHECK: 	minss	3735928559(%ebx,%ecx,8), %xmm5
-        	minss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	minss	69, %xmm5
-        	minss	0x45,%xmm5
-
-// CHECK: 	minss	32493, %xmm5
-        	minss	0x7eed,%xmm5
-
-// CHECK: 	minss	3133065982, %xmm5
-        	minss	0xbabecafe,%xmm5
-
-// CHECK: 	minss	305419896, %xmm5
-        	minss	0x12345678,%xmm5
-
-// CHECK: 	minss	%xmm5, %xmm5
-        	minss	%xmm5,%xmm5
-
-// CHECK: 	movaps	3735928559(%ebx,%ecx,8), %xmm5
-        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movaps	69, %xmm5
-        	movaps	0x45,%xmm5
-
-// CHECK: 	movaps	32493, %xmm5
-        	movaps	0x7eed,%xmm5
-
-// CHECK: 	movaps	3133065982, %xmm5
-        	movaps	0xbabecafe,%xmm5
-
-// CHECK: 	movaps	305419896, %xmm5
-        	movaps	0x12345678,%xmm5
-
-// CHECK: 	movaps	%xmm5, %xmm5
-        	movaps	%xmm5,%xmm5
-
-// CHECK: 	movaps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movaps	%xmm5, 69
-        	movaps	%xmm5,0x45
-
-// CHECK: 	movaps	%xmm5, 32493
-        	movaps	%xmm5,0x7eed
-
-// CHECK: 	movaps	%xmm5, 3133065982
-        	movaps	%xmm5,0xbabecafe
-
-// CHECK: 	movaps	%xmm5, 305419896
-        	movaps	%xmm5,0x12345678
-
-// CHECK: 	movaps	%xmm5, %xmm5
-        	movaps	%xmm5,%xmm5
-
-// CHECK: 	movhlps	%xmm5, %xmm5
-        	movhlps	%xmm5,%xmm5
-
-// CHECK: 	movhps	3735928559(%ebx,%ecx,8), %xmm5
-        	movhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movhps	69, %xmm5
-        	movhps	0x45,%xmm5
-
-// CHECK: 	movhps	32493, %xmm5
-        	movhps	0x7eed,%xmm5
-
-// CHECK: 	movhps	3133065982, %xmm5
-        	movhps	0xbabecafe,%xmm5
-
-// CHECK: 	movhps	305419896, %xmm5
-        	movhps	0x12345678,%xmm5
-
-// CHECK: 	movhps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movhps	%xmm5, 69
-        	movhps	%xmm5,0x45
-
-// CHECK: 	movhps	%xmm5, 32493
-        	movhps	%xmm5,0x7eed
-
-// CHECK: 	movhps	%xmm5, 3133065982
-        	movhps	%xmm5,0xbabecafe
-
-// CHECK: 	movhps	%xmm5, 305419896
-        	movhps	%xmm5,0x12345678
-
-// CHECK: 	movlhps	%xmm5, %xmm5
-        	movlhps	%xmm5,%xmm5
-
-// CHECK: 	movlps	3735928559(%ebx,%ecx,8), %xmm5
-        	movlps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movlps	69, %xmm5
-        	movlps	0x45,%xmm5
-
-// CHECK: 	movlps	32493, %xmm5
-        	movlps	0x7eed,%xmm5
-
-// CHECK: 	movlps	3133065982, %xmm5
-        	movlps	0xbabecafe,%xmm5
-
-// CHECK: 	movlps	305419896, %xmm5
-        	movlps	0x12345678,%xmm5
-
-// CHECK: 	movlps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movlps	%xmm5, 69
-        	movlps	%xmm5,0x45
-
-// CHECK: 	movlps	%xmm5, 32493
-        	movlps	%xmm5,0x7eed
-
-// CHECK: 	movlps	%xmm5, 3133065982
-        	movlps	%xmm5,0xbabecafe
-
-// CHECK: 	movlps	%xmm5, 305419896
-        	movlps	%xmm5,0x12345678
-
-// CHECK: 	movmskps	%xmm5, %ecx
-        	movmskps	%xmm5,%ecx
-
-// CHECK: 	movntps	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movntps	%xmm5, 69
-        	movntps	%xmm5,0x45
-
-// CHECK: 	movntps	%xmm5, 32493
-        	movntps	%xmm5,0x7eed
-
-// CHECK: 	movntps	%xmm5, 3133065982
-        	movntps	%xmm5,0xbabecafe
-
-// CHECK: 	movntps	%xmm5, 305419896
-        	movntps	%xmm5,0x12345678
-
-// CHECK: 	movntq	%mm3, 3735928559(%ebx,%ecx,8)
-        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movntq	%mm3, 69
-        	movntq	%mm3,0x45
-
-// CHECK: 	movntq	%mm3, 32493
-        	movntq	%mm3,0x7eed
-
-// CHECK: 	movntq	%mm3, 3133065982
-        	movntq	%mm3,0xbabecafe
-
-// CHECK: 	movntq	%mm3, 305419896
-        	movntq	%mm3,0x12345678
-
-// CHECK: 	movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movntdq	%xmm5, 69
-        	movntdq	%xmm5,0x45
-
-// CHECK: 	movntdq	%xmm5, 32493
-        	movntdq	%xmm5,0x7eed
-
-// CHECK: 	movntdq	%xmm5, 3133065982
-        	movntdq	%xmm5,0xbabecafe
-
-// CHECK: 	movntdq	%xmm5, 305419896
-        	movntdq	%xmm5,0x12345678
-
-// CHECK: 	movss	3735928559(%ebx,%ecx,8), %xmm5
-        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movss	69, %xmm5
-        	movss	0x45,%xmm5
-
-// CHECK: 	movss	32493, %xmm5
-        	movss	0x7eed,%xmm5
-
-// CHECK: 	movss	3133065982, %xmm5
-        	movss	0xbabecafe,%xmm5
-
-// CHECK: 	movss	305419896, %xmm5
-        	movss	0x12345678,%xmm5
-
-// CHECK: 	movss	%xmm5, %xmm5
-        	movss	%xmm5,%xmm5
-
-// CHECK: 	movss	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movss	%xmm5, 69
-        	movss	%xmm5,0x45
-
-// CHECK: 	movss	%xmm5, 32493
-        	movss	%xmm5,0x7eed
-
-// CHECK: 	movss	%xmm5, 3133065982
-        	movss	%xmm5,0xbabecafe
-
-// CHECK: 	movss	%xmm5, 305419896
-        	movss	%xmm5,0x12345678
-
-// CHECK: 	movss	%xmm5, %xmm5
-        	movss	%xmm5,%xmm5
-
-// CHECK: 	movups	3735928559(%ebx,%ecx,8), %xmm5
-        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movups	69, %xmm5
-        	movups	0x45,%xmm5
-
-// CHECK: 	movups	32493, %xmm5
-        	movups	0x7eed,%xmm5
-
-// CHECK: 	movups	3133065982, %xmm5
-        	movups	0xbabecafe,%xmm5
-
-// CHECK: 	movups	305419896, %xmm5
-        	movups	0x12345678,%xmm5
-
-// CHECK: 	movups	%xmm5, %xmm5
-        	movups	%xmm5,%xmm5
-
-// CHECK: 	movups	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movups	%xmm5, 69
-        	movups	%xmm5,0x45
-
-// CHECK: 	movups	%xmm5, 32493
-        	movups	%xmm5,0x7eed
-
-// CHECK: 	movups	%xmm5, 3133065982
-        	movups	%xmm5,0xbabecafe
-
-// CHECK: 	movups	%xmm5, 305419896
-        	movups	%xmm5,0x12345678
-
-// CHECK: 	movups	%xmm5, %xmm5
-        	movups	%xmm5,%xmm5
-
-// CHECK: 	mulps	3735928559(%ebx,%ecx,8), %xmm5
-        	mulps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	mulps	69, %xmm5
-        	mulps	0x45,%xmm5
-
-// CHECK: 	mulps	32493, %xmm5
-        	mulps	0x7eed,%xmm5
-
-// CHECK: 	mulps	3133065982, %xmm5
-        	mulps	0xbabecafe,%xmm5
-
-// CHECK: 	mulps	305419896, %xmm5
-        	mulps	0x12345678,%xmm5
-
-// CHECK: 	mulps	%xmm5, %xmm5
-        	mulps	%xmm5,%xmm5
-
-// CHECK: 	mulss	3735928559(%ebx,%ecx,8), %xmm5
-        	mulss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	mulss	69, %xmm5
-        	mulss	0x45,%xmm5
-
-// CHECK: 	mulss	32493, %xmm5
-        	mulss	0x7eed,%xmm5
-
-// CHECK: 	mulss	3133065982, %xmm5
-        	mulss	0xbabecafe,%xmm5
-
-// CHECK: 	mulss	305419896, %xmm5
-        	mulss	0x12345678,%xmm5
-
-// CHECK: 	mulss	%xmm5, %xmm5
-        	mulss	%xmm5,%xmm5
-
-// CHECK: 	orps	3735928559(%ebx,%ecx,8), %xmm5
-        	orps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	orps	69, %xmm5
-        	orps	0x45,%xmm5
-
-// CHECK: 	orps	32493, %xmm5
-        	orps	0x7eed,%xmm5
-
-// CHECK: 	orps	3133065982, %xmm5
-        	orps	0xbabecafe,%xmm5
-
-// CHECK: 	orps	305419896, %xmm5
-        	orps	0x12345678,%xmm5
-
-// CHECK: 	orps	%xmm5, %xmm5
-        	orps	%xmm5,%xmm5
-
-// CHECK: 	pavgb	3735928559(%ebx,%ecx,8), %mm3
-        	pavgb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pavgb	69, %mm3
-        	pavgb	0x45,%mm3
-
-// CHECK: 	pavgb	32493, %mm3
-        	pavgb	0x7eed,%mm3
-
-// CHECK: 	pavgb	3133065982, %mm3
-        	pavgb	0xbabecafe,%mm3
-
-// CHECK: 	pavgb	305419896, %mm3
-        	pavgb	0x12345678,%mm3
-
-// CHECK: 	pavgb	%mm3, %mm3
-        	pavgb	%mm3,%mm3
-
-// CHECK: 	pavgb	3735928559(%ebx,%ecx,8), %xmm5
-        	pavgb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pavgb	69, %xmm5
-        	pavgb	0x45,%xmm5
-
-// CHECK: 	pavgb	32493, %xmm5
-        	pavgb	0x7eed,%xmm5
-
-// CHECK: 	pavgb	3133065982, %xmm5
-        	pavgb	0xbabecafe,%xmm5
-
-// CHECK: 	pavgb	305419896, %xmm5
-        	pavgb	0x12345678,%xmm5
-
-// CHECK: 	pavgb	%xmm5, %xmm5
-        	pavgb	%xmm5,%xmm5
-
-// CHECK: 	pavgw	3735928559(%ebx,%ecx,8), %mm3
-        	pavgw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pavgw	69, %mm3
-        	pavgw	0x45,%mm3
-
-// CHECK: 	pavgw	32493, %mm3
-        	pavgw	0x7eed,%mm3
-
-// CHECK: 	pavgw	3133065982, %mm3
-        	pavgw	0xbabecafe,%mm3
-
-// CHECK: 	pavgw	305419896, %mm3
-        	pavgw	0x12345678,%mm3
-
-// CHECK: 	pavgw	%mm3, %mm3
-        	pavgw	%mm3,%mm3
-
-// CHECK: 	pavgw	3735928559(%ebx,%ecx,8), %xmm5
-        	pavgw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pavgw	69, %xmm5
-        	pavgw	0x45,%xmm5
-
-// CHECK: 	pavgw	32493, %xmm5
-        	pavgw	0x7eed,%xmm5
-
-// CHECK: 	pavgw	3133065982, %xmm5
-        	pavgw	0xbabecafe,%xmm5
-
-// CHECK: 	pavgw	305419896, %xmm5
-        	pavgw	0x12345678,%xmm5
-
-// CHECK: 	pavgw	%xmm5, %xmm5
-        	pavgw	%xmm5,%xmm5
-
-// CHECK: 	pmaxsw	3735928559(%ebx,%ecx,8), %mm3
-        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmaxsw	69, %mm3
-        	pmaxsw	0x45,%mm3
-
-// CHECK: 	pmaxsw	32493, %mm3
-        	pmaxsw	0x7eed,%mm3
-
-// CHECK: 	pmaxsw	3133065982, %mm3
-        	pmaxsw	0xbabecafe,%mm3
-
-// CHECK: 	pmaxsw	305419896, %mm3
-        	pmaxsw	0x12345678,%mm3
-
-// CHECK: 	pmaxsw	%mm3, %mm3
-        	pmaxsw	%mm3,%mm3
-
-// CHECK: 	pmaxsw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaxsw	69, %xmm5
-        	pmaxsw	0x45,%xmm5
-
-// CHECK: 	pmaxsw	32493, %xmm5
-        	pmaxsw	0x7eed,%xmm5
-
-// CHECK: 	pmaxsw	3133065982, %xmm5
-        	pmaxsw	0xbabecafe,%xmm5
-
-// CHECK: 	pmaxsw	305419896, %xmm5
-        	pmaxsw	0x12345678,%xmm5
-
-// CHECK: 	pmaxsw	%xmm5, %xmm5
-        	pmaxsw	%xmm5,%xmm5
-
-// CHECK: 	pmaxub	3735928559(%ebx,%ecx,8), %mm3
-        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmaxub	69, %mm3
-        	pmaxub	0x45,%mm3
-
-// CHECK: 	pmaxub	32493, %mm3
-        	pmaxub	0x7eed,%mm3
-
-// CHECK: 	pmaxub	3133065982, %mm3
-        	pmaxub	0xbabecafe,%mm3
-
-// CHECK: 	pmaxub	305419896, %mm3
-        	pmaxub	0x12345678,%mm3
-
-// CHECK: 	pmaxub	%mm3, %mm3
-        	pmaxub	%mm3,%mm3
-
-// CHECK: 	pmaxub	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaxub	69, %xmm5
-        	pmaxub	0x45,%xmm5
-
-// CHECK: 	pmaxub	32493, %xmm5
-        	pmaxub	0x7eed,%xmm5
-
-// CHECK: 	pmaxub	3133065982, %xmm5
-        	pmaxub	0xbabecafe,%xmm5
-
-// CHECK: 	pmaxub	305419896, %xmm5
-        	pmaxub	0x12345678,%xmm5
-
-// CHECK: 	pmaxub	%xmm5, %xmm5
-        	pmaxub	%xmm5,%xmm5
-
-// CHECK: 	pminsw	3735928559(%ebx,%ecx,8), %mm3
-        	pminsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pminsw	69, %mm3
-        	pminsw	0x45,%mm3
-
-// CHECK: 	pminsw	32493, %mm3
-        	pminsw	0x7eed,%mm3
-
-// CHECK: 	pminsw	3133065982, %mm3
-        	pminsw	0xbabecafe,%mm3
-
-// CHECK: 	pminsw	305419896, %mm3
-        	pminsw	0x12345678,%mm3
-
-// CHECK: 	pminsw	%mm3, %mm3
-        	pminsw	%mm3,%mm3
-
-// CHECK: 	pminsw	3735928559(%ebx,%ecx,8), %xmm5
-        	pminsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pminsw	69, %xmm5
-        	pminsw	0x45,%xmm5
-
-// CHECK: 	pminsw	32493, %xmm5
-        	pminsw	0x7eed,%xmm5
-
-// CHECK: 	pminsw	3133065982, %xmm5
-        	pminsw	0xbabecafe,%xmm5
-
-// CHECK: 	pminsw	305419896, %xmm5
-        	pminsw	0x12345678,%xmm5
-
-// CHECK: 	pminsw	%xmm5, %xmm5
-        	pminsw	%xmm5,%xmm5
-
-// CHECK: 	pminub	3735928559(%ebx,%ecx,8), %mm3
-        	pminub	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pminub	69, %mm3
-        	pminub	0x45,%mm3
-
-// CHECK: 	pminub	32493, %mm3
-        	pminub	0x7eed,%mm3
-
-// CHECK: 	pminub	3133065982, %mm3
-        	pminub	0xbabecafe,%mm3
-
-// CHECK: 	pminub	305419896, %mm3
-        	pminub	0x12345678,%mm3
-
-// CHECK: 	pminub	%mm3, %mm3
-        	pminub	%mm3,%mm3
-
-// CHECK: 	pminub	3735928559(%ebx,%ecx,8), %xmm5
-        	pminub	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pminub	69, %xmm5
-        	pminub	0x45,%xmm5
-
-// CHECK: 	pminub	32493, %xmm5
-        	pminub	0x7eed,%xmm5
-
-// CHECK: 	pminub	3133065982, %xmm5
-        	pminub	0xbabecafe,%xmm5
-
-// CHECK: 	pminub	305419896, %xmm5
-        	pminub	0x12345678,%xmm5
-
-// CHECK: 	pminub	%xmm5, %xmm5
-        	pminub	%xmm5,%xmm5
-
-// CHECK: 	pmovmskb	%mm3, %ecx
-        	pmovmskb	%mm3,%ecx
-
-// CHECK: 	pmovmskb	%xmm5, %ecx
-        	pmovmskb	%xmm5,%ecx
-
-// CHECK: 	pmulhuw	3735928559(%ebx,%ecx,8), %mm3
-        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmulhuw	69, %mm3
-        	pmulhuw	0x45,%mm3
-
-// CHECK: 	pmulhuw	32493, %mm3
-        	pmulhuw	0x7eed,%mm3
-
-// CHECK: 	pmulhuw	3133065982, %mm3
-        	pmulhuw	0xbabecafe,%mm3
-
-// CHECK: 	pmulhuw	305419896, %mm3
-        	pmulhuw	0x12345678,%mm3
-
-// CHECK: 	pmulhuw	%mm3, %mm3
-        	pmulhuw	%mm3,%mm3
-
-// CHECK: 	pmulhuw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmulhuw	69, %xmm5
-        	pmulhuw	0x45,%xmm5
-
-// CHECK: 	pmulhuw	32493, %xmm5
-        	pmulhuw	0x7eed,%xmm5
-
-// CHECK: 	pmulhuw	3133065982, %xmm5
-        	pmulhuw	0xbabecafe,%xmm5
-
-// CHECK: 	pmulhuw	305419896, %xmm5
-        	pmulhuw	0x12345678,%xmm5
-
-// CHECK: 	pmulhuw	%xmm5, %xmm5
-        	pmulhuw	%xmm5,%xmm5
-
-// CHECK: 	prefetchnta	3735928559(%ebx,%ecx,8)
-        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetchnta	32493
-        	prefetchnta	0x7eed
-
-// CHECK: 	prefetchnta	3133065982
-        	prefetchnta	0xbabecafe
-
-// CHECK: 	prefetchnta	305419896
-        	prefetchnta	0x12345678
-
-// CHECK: 	prefetcht0	3735928559(%ebx,%ecx,8)
-        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetcht0	32493
-        	prefetcht0	0x7eed
-
-// CHECK: 	prefetcht0	3133065982
-        	prefetcht0	0xbabecafe
-
-// CHECK: 	prefetcht0	305419896
-        	prefetcht0	0x12345678
-
-// CHECK: 	prefetcht1	3735928559(%ebx,%ecx,8)
-        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetcht1	32493
-        	prefetcht1	0x7eed
-
-// CHECK: 	prefetcht1	3133065982
-        	prefetcht1	0xbabecafe
-
-// CHECK: 	prefetcht1	305419896
-        	prefetcht1	0x12345678
-
-// CHECK: 	prefetcht2	3735928559(%ebx,%ecx,8)
-        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	prefetcht2	32493
-        	prefetcht2	0x7eed
-
-// CHECK: 	prefetcht2	3133065982
-        	prefetcht2	0xbabecafe
-
-// CHECK: 	prefetcht2	305419896
-        	prefetcht2	0x12345678
-
-// CHECK: 	psadbw	3735928559(%ebx,%ecx,8), %mm3
-        	psadbw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psadbw	69, %mm3
-        	psadbw	0x45,%mm3
-
-// CHECK: 	psadbw	32493, %mm3
-        	psadbw	0x7eed,%mm3
-
-// CHECK: 	psadbw	3133065982, %mm3
-        	psadbw	0xbabecafe,%mm3
-
-// CHECK: 	psadbw	305419896, %mm3
-        	psadbw	0x12345678,%mm3
-
-// CHECK: 	psadbw	%mm3, %mm3
-        	psadbw	%mm3,%mm3
-
-// CHECK: 	psadbw	3735928559(%ebx,%ecx,8), %xmm5
-        	psadbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psadbw	69, %xmm5
-        	psadbw	0x45,%xmm5
-
-// CHECK: 	psadbw	32493, %xmm5
-        	psadbw	0x7eed,%xmm5
-
-// CHECK: 	psadbw	3133065982, %xmm5
-        	psadbw	0xbabecafe,%xmm5
-
-// CHECK: 	psadbw	305419896, %xmm5
-        	psadbw	0x12345678,%xmm5
-
-// CHECK: 	psadbw	%xmm5, %xmm5
-        	psadbw	%xmm5,%xmm5
-
-// CHECK: 	rcpps	3735928559(%ebx,%ecx,8), %xmm5
-        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rcpps	69, %xmm5
-        	rcpps	0x45,%xmm5
-
-// CHECK: 	rcpps	32493, %xmm5
-        	rcpps	0x7eed,%xmm5
-
-// CHECK: 	rcpps	3133065982, %xmm5
-        	rcpps	0xbabecafe,%xmm5
-
-// CHECK: 	rcpps	305419896, %xmm5
-        	rcpps	0x12345678,%xmm5
-
-// CHECK: 	rcpps	%xmm5, %xmm5
-        	rcpps	%xmm5,%xmm5
-
-// CHECK: 	rcpss	3735928559(%ebx,%ecx,8), %xmm5
-        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rcpss	69, %xmm5
-        	rcpss	0x45,%xmm5
-
-// CHECK: 	rcpss	32493, %xmm5
-        	rcpss	0x7eed,%xmm5
-
-// CHECK: 	rcpss	3133065982, %xmm5
-        	rcpss	0xbabecafe,%xmm5
-
-// CHECK: 	rcpss	305419896, %xmm5
-        	rcpss	0x12345678,%xmm5
-
-// CHECK: 	rcpss	%xmm5, %xmm5
-        	rcpss	%xmm5,%xmm5
-
-// CHECK: 	rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
-        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rsqrtps	69, %xmm5
-        	rsqrtps	0x45,%xmm5
-
-// CHECK: 	rsqrtps	32493, %xmm5
-        	rsqrtps	0x7eed,%xmm5
-
-// CHECK: 	rsqrtps	3133065982, %xmm5
-        	rsqrtps	0xbabecafe,%xmm5
-
-// CHECK: 	rsqrtps	305419896, %xmm5
-        	rsqrtps	0x12345678,%xmm5
-
-// CHECK: 	rsqrtps	%xmm5, %xmm5
-        	rsqrtps	%xmm5,%xmm5
-
-// CHECK: 	rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
-        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	rsqrtss	69, %xmm5
-        	rsqrtss	0x45,%xmm5
-
-// CHECK: 	rsqrtss	32493, %xmm5
-        	rsqrtss	0x7eed,%xmm5
-
-// CHECK: 	rsqrtss	3133065982, %xmm5
-        	rsqrtss	0xbabecafe,%xmm5
-
-// CHECK: 	rsqrtss	305419896, %xmm5
-        	rsqrtss	0x12345678,%xmm5
-
-// CHECK: 	rsqrtss	%xmm5, %xmm5
-        	rsqrtss	%xmm5,%xmm5
-
-// CHECK: 	sfence
-        	sfence
-
-// CHECK: 	sqrtps	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtps	69, %xmm5
-        	sqrtps	0x45,%xmm5
-
-// CHECK: 	sqrtps	32493, %xmm5
-        	sqrtps	0x7eed,%xmm5
-
-// CHECK: 	sqrtps	3133065982, %xmm5
-        	sqrtps	0xbabecafe,%xmm5
-
-// CHECK: 	sqrtps	305419896, %xmm5
-        	sqrtps	0x12345678,%xmm5
-
-// CHECK: 	sqrtps	%xmm5, %xmm5
-        	sqrtps	%xmm5,%xmm5
-
-// CHECK: 	sqrtss	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtss	69, %xmm5
-        	sqrtss	0x45,%xmm5
-
-// CHECK: 	sqrtss	32493, %xmm5
-        	sqrtss	0x7eed,%xmm5
-
-// CHECK: 	sqrtss	3133065982, %xmm5
-        	sqrtss	0xbabecafe,%xmm5
-
-// CHECK: 	sqrtss	305419896, %xmm5
-        	sqrtss	0x12345678,%xmm5
-
-// CHECK: 	sqrtss	%xmm5, %xmm5
-        	sqrtss	%xmm5,%xmm5
-
-// CHECK: 	stmxcsr	3735928559(%ebx,%ecx,8)
-        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	stmxcsr	32493
-        	stmxcsr	0x7eed
-
-// CHECK: 	stmxcsr	3133065982
-        	stmxcsr	0xbabecafe
-
-// CHECK: 	stmxcsr	305419896
-        	stmxcsr	0x12345678
-
-// CHECK: 	subps	3735928559(%ebx,%ecx,8), %xmm5
-        	subps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	subps	69, %xmm5
-        	subps	0x45,%xmm5
-
-// CHECK: 	subps	32493, %xmm5
-        	subps	0x7eed,%xmm5
-
-// CHECK: 	subps	3133065982, %xmm5
-        	subps	0xbabecafe,%xmm5
-
-// CHECK: 	subps	305419896, %xmm5
-        	subps	0x12345678,%xmm5
-
-// CHECK: 	subps	%xmm5, %xmm5
-        	subps	%xmm5,%xmm5
-
-// CHECK: 	subss	3735928559(%ebx,%ecx,8), %xmm5
-        	subss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	subss	69, %xmm5
-        	subss	0x45,%xmm5
-
-// CHECK: 	subss	32493, %xmm5
-        	subss	0x7eed,%xmm5
-
-// CHECK: 	subss	3133065982, %xmm5
-        	subss	0xbabecafe,%xmm5
-
-// CHECK: 	subss	305419896, %xmm5
-        	subss	0x12345678,%xmm5
-
-// CHECK: 	subss	%xmm5, %xmm5
-        	subss	%xmm5,%xmm5
-
-// CHECK: 	ucomiss	3735928559(%ebx,%ecx,8), %xmm5
-        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	ucomiss	69, %xmm5
-        	ucomiss	0x45,%xmm5
-
-// CHECK: 	ucomiss	32493, %xmm5
-        	ucomiss	0x7eed,%xmm5
-
-// CHECK: 	ucomiss	3133065982, %xmm5
-        	ucomiss	0xbabecafe,%xmm5
-
-// CHECK: 	ucomiss	305419896, %xmm5
-        	ucomiss	0x12345678,%xmm5
-
-// CHECK: 	ucomiss	%xmm5, %xmm5
-        	ucomiss	%xmm5,%xmm5
-
-// CHECK: 	unpckhps	3735928559(%ebx,%ecx,8), %xmm5
-        	unpckhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	unpckhps	69, %xmm5
-        	unpckhps	0x45,%xmm5
-
-// CHECK: 	unpckhps	32493, %xmm5
-        	unpckhps	0x7eed,%xmm5
-
-// CHECK: 	unpckhps	3133065982, %xmm5
-        	unpckhps	0xbabecafe,%xmm5
-
-// CHECK: 	unpckhps	305419896, %xmm5
-        	unpckhps	0x12345678,%xmm5
-
-// CHECK: 	unpckhps	%xmm5, %xmm5
-        	unpckhps	%xmm5,%xmm5
-
-// CHECK: 	unpcklps	3735928559(%ebx,%ecx,8), %xmm5
-        	unpcklps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	unpcklps	69, %xmm5
-        	unpcklps	0x45,%xmm5
-
-// CHECK: 	unpcklps	32493, %xmm5
-        	unpcklps	0x7eed,%xmm5
-
-// CHECK: 	unpcklps	3133065982, %xmm5
-        	unpcklps	0xbabecafe,%xmm5
-
-// CHECK: 	unpcklps	305419896, %xmm5
-        	unpcklps	0x12345678,%xmm5
-
-// CHECK: 	unpcklps	%xmm5, %xmm5
-        	unpcklps	%xmm5,%xmm5
-
-// CHECK: 	xorps	3735928559(%ebx,%ecx,8), %xmm5
-        	xorps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	xorps	69, %xmm5
-        	xorps	0x45,%xmm5
-
-// CHECK: 	xorps	32493, %xmm5
-        	xorps	0x7eed,%xmm5
-
-// CHECK: 	xorps	3133065982, %xmm5
-        	xorps	0xbabecafe,%xmm5
-
-// CHECK: 	xorps	305419896, %xmm5
-        	xorps	0x12345678,%xmm5
-
-// CHECK: 	xorps	%xmm5, %xmm5
-        	xorps	%xmm5,%xmm5
-
-// CHECK: 	addpd	3735928559(%ebx,%ecx,8), %xmm5
-        	addpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	addpd	69, %xmm5
-        	addpd	0x45,%xmm5
-
-// CHECK: 	addpd	32493, %xmm5
-        	addpd	0x7eed,%xmm5
-
-// CHECK: 	addpd	3133065982, %xmm5
-        	addpd	0xbabecafe,%xmm5
-
-// CHECK: 	addpd	305419896, %xmm5
-        	addpd	0x12345678,%xmm5
-
-// CHECK: 	addpd	%xmm5, %xmm5
-        	addpd	%xmm5,%xmm5
-
-// CHECK: 	addsd	3735928559(%ebx,%ecx,8), %xmm5
-        	addsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	addsd	69, %xmm5
-        	addsd	0x45,%xmm5
-
-// CHECK: 	addsd	32493, %xmm5
-        	addsd	0x7eed,%xmm5
-
-// CHECK: 	addsd	3133065982, %xmm5
-        	addsd	0xbabecafe,%xmm5
-
-// CHECK: 	addsd	305419896, %xmm5
-        	addsd	0x12345678,%xmm5
-
-// CHECK: 	addsd	%xmm5, %xmm5
-        	addsd	%xmm5,%xmm5
-
-// CHECK: 	andnpd	3735928559(%ebx,%ecx,8), %xmm5
-        	andnpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	andnpd	69, %xmm5
-        	andnpd	0x45,%xmm5
-
-// CHECK: 	andnpd	32493, %xmm5
-        	andnpd	0x7eed,%xmm5
-
-// CHECK: 	andnpd	3133065982, %xmm5
-        	andnpd	0xbabecafe,%xmm5
-
-// CHECK: 	andnpd	305419896, %xmm5
-        	andnpd	0x12345678,%xmm5
-
-// CHECK: 	andnpd	%xmm5, %xmm5
-        	andnpd	%xmm5,%xmm5
-
-// CHECK: 	andpd	3735928559(%ebx,%ecx,8), %xmm5
-        	andpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	andpd	69, %xmm5
-        	andpd	0x45,%xmm5
-
-// CHECK: 	andpd	32493, %xmm5
-        	andpd	0x7eed,%xmm5
-
-// CHECK: 	andpd	3133065982, %xmm5
-        	andpd	0xbabecafe,%xmm5
-
-// CHECK: 	andpd	305419896, %xmm5
-        	andpd	0x12345678,%xmm5
-
-// CHECK: 	andpd	%xmm5, %xmm5
-        	andpd	%xmm5,%xmm5
-
-// CHECK: 	comisd	3735928559(%ebx,%ecx,8), %xmm5
-        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	comisd	69, %xmm5
-        	comisd	0x45,%xmm5
-
-// CHECK: 	comisd	32493, %xmm5
-        	comisd	0x7eed,%xmm5
-
-// CHECK: 	comisd	3133065982, %xmm5
-        	comisd	0xbabecafe,%xmm5
-
-// CHECK: 	comisd	305419896, %xmm5
-        	comisd	0x12345678,%xmm5
-
-// CHECK: 	comisd	%xmm5, %xmm5
-        	comisd	%xmm5,%xmm5
-
-// CHECK: 	cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpi2pd	69, %xmm5
-        	cvtpi2pd	0x45,%xmm5
-
-// CHECK: 	cvtpi2pd	32493, %xmm5
-        	cvtpi2pd	0x7eed,%xmm5
-
-// CHECK: 	cvtpi2pd	3133065982, %xmm5
-        	cvtpi2pd	0xbabecafe,%xmm5
-
-// CHECK: 	cvtpi2pd	305419896, %xmm5
-        	cvtpi2pd	0x12345678,%xmm5
-
-// CHECK: 	cvtpi2pd	%mm3, %xmm5
-        	cvtpi2pd	%mm3,%xmm5
-
-// CHECK: 	cvtsi2sd	%ecx, %xmm5
-        	cvtsi2sd	%ecx,%xmm5
-
-// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtsi2sd	69, %xmm5
-        	cvtsi2sd	0x45,%xmm5
-
-// CHECK: 	cvtsi2sd	32493, %xmm5
-        	cvtsi2sd	0x7eed,%xmm5
-
-// CHECK: 	cvtsi2sd	3133065982, %xmm5
-        	cvtsi2sd	0xbabecafe,%xmm5
-
-// CHECK: 	cvtsi2sd	305419896, %xmm5
-        	cvtsi2sd	0x12345678,%xmm5
-
-// CHECK: 	divpd	3735928559(%ebx,%ecx,8), %xmm5
-        	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	divpd	69, %xmm5
-        	divpd	0x45,%xmm5
-
-// CHECK: 	divpd	32493, %xmm5
-        	divpd	0x7eed,%xmm5
-
-// CHECK: 	divpd	3133065982, %xmm5
-        	divpd	0xbabecafe,%xmm5
-
-// CHECK: 	divpd	305419896, %xmm5
-        	divpd	0x12345678,%xmm5
-
-// CHECK: 	divpd	%xmm5, %xmm5
-        	divpd	%xmm5,%xmm5
-
-// CHECK: 	divsd	3735928559(%ebx,%ecx,8), %xmm5
-        	divsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	divsd	69, %xmm5
-        	divsd	0x45,%xmm5
-
-// CHECK: 	divsd	32493, %xmm5
-        	divsd	0x7eed,%xmm5
-
-// CHECK: 	divsd	3133065982, %xmm5
-        	divsd	0xbabecafe,%xmm5
-
-// CHECK: 	divsd	305419896, %xmm5
-        	divsd	0x12345678,%xmm5
-
-// CHECK: 	divsd	%xmm5, %xmm5
-        	divsd	%xmm5,%xmm5
-
-// CHECK: 	maxpd	3735928559(%ebx,%ecx,8), %xmm5
-        	maxpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	maxpd	69, %xmm5
-        	maxpd	0x45,%xmm5
-
-// CHECK: 	maxpd	32493, %xmm5
-        	maxpd	0x7eed,%xmm5
-
-// CHECK: 	maxpd	3133065982, %xmm5
-        	maxpd	0xbabecafe,%xmm5
-
-// CHECK: 	maxpd	305419896, %xmm5
-        	maxpd	0x12345678,%xmm5
-
-// CHECK: 	maxpd	%xmm5, %xmm5
-        	maxpd	%xmm5,%xmm5
-
-// CHECK: 	maxsd	3735928559(%ebx,%ecx,8), %xmm5
-        	maxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	maxsd	69, %xmm5
-        	maxsd	0x45,%xmm5
-
-// CHECK: 	maxsd	32493, %xmm5
-        	maxsd	0x7eed,%xmm5
-
-// CHECK: 	maxsd	3133065982, %xmm5
-        	maxsd	0xbabecafe,%xmm5
-
-// CHECK: 	maxsd	305419896, %xmm5
-        	maxsd	0x12345678,%xmm5
-
-// CHECK: 	maxsd	%xmm5, %xmm5
-        	maxsd	%xmm5,%xmm5
-
-// CHECK: 	minpd	3735928559(%ebx,%ecx,8), %xmm5
-        	minpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	minpd	69, %xmm5
-        	minpd	0x45,%xmm5
-
-// CHECK: 	minpd	32493, %xmm5
-        	minpd	0x7eed,%xmm5
-
-// CHECK: 	minpd	3133065982, %xmm5
-        	minpd	0xbabecafe,%xmm5
-
-// CHECK: 	minpd	305419896, %xmm5
-        	minpd	0x12345678,%xmm5
-
-// CHECK: 	minpd	%xmm5, %xmm5
-        	minpd	%xmm5,%xmm5
-
-// CHECK: 	minsd	3735928559(%ebx,%ecx,8), %xmm5
-        	minsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	minsd	69, %xmm5
-        	minsd	0x45,%xmm5
-
-// CHECK: 	minsd	32493, %xmm5
-        	minsd	0x7eed,%xmm5
-
-// CHECK: 	minsd	3133065982, %xmm5
-        	minsd	0xbabecafe,%xmm5
-
-// CHECK: 	minsd	305419896, %xmm5
-        	minsd	0x12345678,%xmm5
-
-// CHECK: 	minsd	%xmm5, %xmm5
-        	minsd	%xmm5,%xmm5
-
-// CHECK: 	movapd	3735928559(%ebx,%ecx,8), %xmm5
-        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movapd	69, %xmm5
-        	movapd	0x45,%xmm5
-
-// CHECK: 	movapd	32493, %xmm5
-        	movapd	0x7eed,%xmm5
-
-// CHECK: 	movapd	3133065982, %xmm5
-        	movapd	0xbabecafe,%xmm5
-
-// CHECK: 	movapd	305419896, %xmm5
-        	movapd	0x12345678,%xmm5
-
-// CHECK: 	movapd	%xmm5, %xmm5
-        	movapd	%xmm5,%xmm5
-
-// CHECK: 	movapd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movapd	%xmm5, 69
-        	movapd	%xmm5,0x45
-
-// CHECK: 	movapd	%xmm5, 32493
-        	movapd	%xmm5,0x7eed
-
-// CHECK: 	movapd	%xmm5, 3133065982
-        	movapd	%xmm5,0xbabecafe
-
-// CHECK: 	movapd	%xmm5, 305419896
-        	movapd	%xmm5,0x12345678
-
-// CHECK: 	movapd	%xmm5, %xmm5
-        	movapd	%xmm5,%xmm5
-
-// CHECK: 	movhpd	3735928559(%ebx,%ecx,8), %xmm5
-        	movhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movhpd	69, %xmm5
-        	movhpd	0x45,%xmm5
-
-// CHECK: 	movhpd	32493, %xmm5
-        	movhpd	0x7eed,%xmm5
-
-// CHECK: 	movhpd	3133065982, %xmm5
-        	movhpd	0xbabecafe,%xmm5
-
-// CHECK: 	movhpd	305419896, %xmm5
-        	movhpd	0x12345678,%xmm5
-
-// CHECK: 	movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movhpd	%xmm5, 69
-        	movhpd	%xmm5,0x45
-
-// CHECK: 	movhpd	%xmm5, 32493
-        	movhpd	%xmm5,0x7eed
-
-// CHECK: 	movhpd	%xmm5, 3133065982
-        	movhpd	%xmm5,0xbabecafe
-
-// CHECK: 	movhpd	%xmm5, 305419896
-        	movhpd	%xmm5,0x12345678
-
-// CHECK: 	movlpd	3735928559(%ebx,%ecx,8), %xmm5
-        	movlpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movlpd	69, %xmm5
-        	movlpd	0x45,%xmm5
-
-// CHECK: 	movlpd	32493, %xmm5
-        	movlpd	0x7eed,%xmm5
-
-// CHECK: 	movlpd	3133065982, %xmm5
-        	movlpd	0xbabecafe,%xmm5
-
-// CHECK: 	movlpd	305419896, %xmm5
-        	movlpd	0x12345678,%xmm5
-
-// CHECK: 	movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movlpd	%xmm5, 69
-        	movlpd	%xmm5,0x45
-
-// CHECK: 	movlpd	%xmm5, 32493
-        	movlpd	%xmm5,0x7eed
-
-// CHECK: 	movlpd	%xmm5, 3133065982
-        	movlpd	%xmm5,0xbabecafe
-
-// CHECK: 	movlpd	%xmm5, 305419896
-        	movlpd	%xmm5,0x12345678
-
-// CHECK: 	movmskpd	%xmm5, %ecx
-        	movmskpd	%xmm5,%ecx
-
-// CHECK: 	movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movntpd	%xmm5, 69
-        	movntpd	%xmm5,0x45
-
-// CHECK: 	movntpd	%xmm5, 32493
-        	movntpd	%xmm5,0x7eed
-
-// CHECK: 	movntpd	%xmm5, 3133065982
-        	movntpd	%xmm5,0xbabecafe
-
-// CHECK: 	movntpd	%xmm5, 305419896
-        	movntpd	%xmm5,0x12345678
-
-// CHECK: 	movsd	3735928559(%ebx,%ecx,8), %xmm5
-        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movsd	69, %xmm5
-        	movsd	0x45,%xmm5
-
-// CHECK: 	movsd	32493, %xmm5
-        	movsd	0x7eed,%xmm5
-
-// CHECK: 	movsd	3133065982, %xmm5
-        	movsd	0xbabecafe,%xmm5
-
-// CHECK: 	movsd	305419896, %xmm5
-        	movsd	0x12345678,%xmm5
-
-// CHECK: 	movsd	%xmm5, %xmm5
-        	movsd	%xmm5,%xmm5
-
-// CHECK: 	movsd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movsd	%xmm5, 69
-        	movsd	%xmm5,0x45
-
-// CHECK: 	movsd	%xmm5, 32493
-        	movsd	%xmm5,0x7eed
-
-// CHECK: 	movsd	%xmm5, 3133065982
-        	movsd	%xmm5,0xbabecafe
-
-// CHECK: 	movsd	%xmm5, 305419896
-        	movsd	%xmm5,0x12345678
-
-// CHECK: 	movsd	%xmm5, %xmm5
-        	movsd	%xmm5,%xmm5
-
-// CHECK: 	movupd	3735928559(%ebx,%ecx,8), %xmm5
-        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movupd	69, %xmm5
-        	movupd	0x45,%xmm5
-
-// CHECK: 	movupd	32493, %xmm5
-        	movupd	0x7eed,%xmm5
-
-// CHECK: 	movupd	3133065982, %xmm5
-        	movupd	0xbabecafe,%xmm5
-
-// CHECK: 	movupd	305419896, %xmm5
-        	movupd	0x12345678,%xmm5
-
-// CHECK: 	movupd	%xmm5, %xmm5
-        	movupd	%xmm5,%xmm5
-
-// CHECK: 	movupd	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movupd	%xmm5, 69
-        	movupd	%xmm5,0x45
-
-// CHECK: 	movupd	%xmm5, 32493
-        	movupd	%xmm5,0x7eed
-
-// CHECK: 	movupd	%xmm5, 3133065982
-        	movupd	%xmm5,0xbabecafe
-
-// CHECK: 	movupd	%xmm5, 305419896
-        	movupd	%xmm5,0x12345678
-
-// CHECK: 	movupd	%xmm5, %xmm5
-        	movupd	%xmm5,%xmm5
-
-// CHECK: 	mulpd	3735928559(%ebx,%ecx,8), %xmm5
-        	mulpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	mulpd	69, %xmm5
-        	mulpd	0x45,%xmm5
-
-// CHECK: 	mulpd	32493, %xmm5
-        	mulpd	0x7eed,%xmm5
-
-// CHECK: 	mulpd	3133065982, %xmm5
-        	mulpd	0xbabecafe,%xmm5
-
-// CHECK: 	mulpd	305419896, %xmm5
-        	mulpd	0x12345678,%xmm5
-
-// CHECK: 	mulpd	%xmm5, %xmm5
-        	mulpd	%xmm5,%xmm5
-
-// CHECK: 	mulsd	3735928559(%ebx,%ecx,8), %xmm5
-        	mulsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	mulsd	69, %xmm5
-        	mulsd	0x45,%xmm5
-
-// CHECK: 	mulsd	32493, %xmm5
-        	mulsd	0x7eed,%xmm5
-
-// CHECK: 	mulsd	3133065982, %xmm5
-        	mulsd	0xbabecafe,%xmm5
-
-// CHECK: 	mulsd	305419896, %xmm5
-        	mulsd	0x12345678,%xmm5
-
-// CHECK: 	mulsd	%xmm5, %xmm5
-        	mulsd	%xmm5,%xmm5
-
-// CHECK: 	orpd	3735928559(%ebx,%ecx,8), %xmm5
-        	orpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	orpd	69, %xmm5
-        	orpd	0x45,%xmm5
-
-// CHECK: 	orpd	32493, %xmm5
-        	orpd	0x7eed,%xmm5
-
-// CHECK: 	orpd	3133065982, %xmm5
-        	orpd	0xbabecafe,%xmm5
-
-// CHECK: 	orpd	305419896, %xmm5
-        	orpd	0x12345678,%xmm5
-
-// CHECK: 	orpd	%xmm5, %xmm5
-        	orpd	%xmm5,%xmm5
-
-// CHECK: 	sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtpd	69, %xmm5
-        	sqrtpd	0x45,%xmm5
-
-// CHECK: 	sqrtpd	32493, %xmm5
-        	sqrtpd	0x7eed,%xmm5
-
-// CHECK: 	sqrtpd	3133065982, %xmm5
-        	sqrtpd	0xbabecafe,%xmm5
-
-// CHECK: 	sqrtpd	305419896, %xmm5
-        	sqrtpd	0x12345678,%xmm5
-
-// CHECK: 	sqrtpd	%xmm5, %xmm5
-        	sqrtpd	%xmm5,%xmm5
-
-// CHECK: 	sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
-        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	sqrtsd	69, %xmm5
-        	sqrtsd	0x45,%xmm5
-
-// CHECK: 	sqrtsd	32493, %xmm5
-        	sqrtsd	0x7eed,%xmm5
-
-// CHECK: 	sqrtsd	3133065982, %xmm5
-        	sqrtsd	0xbabecafe,%xmm5
-
-// CHECK: 	sqrtsd	305419896, %xmm5
-        	sqrtsd	0x12345678,%xmm5
-
-// CHECK: 	sqrtsd	%xmm5, %xmm5
-        	sqrtsd	%xmm5,%xmm5
-
-// CHECK: 	subpd	3735928559(%ebx,%ecx,8), %xmm5
-        	subpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	subpd	69, %xmm5
-        	subpd	0x45,%xmm5
-
-// CHECK: 	subpd	32493, %xmm5
-        	subpd	0x7eed,%xmm5
-
-// CHECK: 	subpd	3133065982, %xmm5
-        	subpd	0xbabecafe,%xmm5
-
-// CHECK: 	subpd	305419896, %xmm5
-        	subpd	0x12345678,%xmm5
-
-// CHECK: 	subpd	%xmm5, %xmm5
-        	subpd	%xmm5,%xmm5
-
-// CHECK: 	subsd	3735928559(%ebx,%ecx,8), %xmm5
-        	subsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	subsd	69, %xmm5
-        	subsd	0x45,%xmm5
-
-// CHECK: 	subsd	32493, %xmm5
-        	subsd	0x7eed,%xmm5
-
-// CHECK: 	subsd	3133065982, %xmm5
-        	subsd	0xbabecafe,%xmm5
-
-// CHECK: 	subsd	305419896, %xmm5
-        	subsd	0x12345678,%xmm5
-
-// CHECK: 	subsd	%xmm5, %xmm5
-        	subsd	%xmm5,%xmm5
-
-// CHECK: 	ucomisd	3735928559(%ebx,%ecx,8), %xmm5
-        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	ucomisd	69, %xmm5
-        	ucomisd	0x45,%xmm5
-
-// CHECK: 	ucomisd	32493, %xmm5
-        	ucomisd	0x7eed,%xmm5
-
-// CHECK: 	ucomisd	3133065982, %xmm5
-        	ucomisd	0xbabecafe,%xmm5
-
-// CHECK: 	ucomisd	305419896, %xmm5
-        	ucomisd	0x12345678,%xmm5
-
-// CHECK: 	ucomisd	%xmm5, %xmm5
-        	ucomisd	%xmm5,%xmm5
-
-// CHECK: 	unpckhpd	3735928559(%ebx,%ecx,8), %xmm5
-        	unpckhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	unpckhpd	69, %xmm5
-        	unpckhpd	0x45,%xmm5
-
-// CHECK: 	unpckhpd	32493, %xmm5
-        	unpckhpd	0x7eed,%xmm5
-
-// CHECK: 	unpckhpd	3133065982, %xmm5
-        	unpckhpd	0xbabecafe,%xmm5
-
-// CHECK: 	unpckhpd	305419896, %xmm5
-        	unpckhpd	0x12345678,%xmm5
-
-// CHECK: 	unpckhpd	%xmm5, %xmm5
-        	unpckhpd	%xmm5,%xmm5
-
-// CHECK: 	unpcklpd	3735928559(%ebx,%ecx,8), %xmm5
-        	unpcklpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	unpcklpd	69, %xmm5
-        	unpcklpd	0x45,%xmm5
-
-// CHECK: 	unpcklpd	32493, %xmm5
-        	unpcklpd	0x7eed,%xmm5
-
-// CHECK: 	unpcklpd	3133065982, %xmm5
-        	unpcklpd	0xbabecafe,%xmm5
-
-// CHECK: 	unpcklpd	305419896, %xmm5
-        	unpcklpd	0x12345678,%xmm5
-
-// CHECK: 	unpcklpd	%xmm5, %xmm5
-        	unpcklpd	%xmm5,%xmm5
-
-// CHECK: 	xorpd	3735928559(%ebx,%ecx,8), %xmm5
-        	xorpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	xorpd	69, %xmm5
-        	xorpd	0x45,%xmm5
-
-// CHECK: 	xorpd	32493, %xmm5
-        	xorpd	0x7eed,%xmm5
-
-// CHECK: 	xorpd	3133065982, %xmm5
-        	xorpd	0xbabecafe,%xmm5
-
-// CHECK: 	xorpd	305419896, %xmm5
-        	xorpd	0x12345678,%xmm5
-
-// CHECK: 	xorpd	%xmm5, %xmm5
-        	xorpd	%xmm5,%xmm5
-
-// CHECK: 	cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtdq2pd	69, %xmm5
-        	cvtdq2pd	0x45,%xmm5
-
-// CHECK: 	cvtdq2pd	32493, %xmm5
-        	cvtdq2pd	0x7eed,%xmm5
-
-// CHECK: 	cvtdq2pd	3133065982, %xmm5
-        	cvtdq2pd	0xbabecafe,%xmm5
-
-// CHECK: 	cvtdq2pd	305419896, %xmm5
-        	cvtdq2pd	0x12345678,%xmm5
-
-// CHECK: 	cvtdq2pd	%xmm5, %xmm5
-        	cvtdq2pd	%xmm5,%xmm5
-
-// CHECK: 	cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpd2dq	69, %xmm5
-        	cvtpd2dq	0x45,%xmm5
-
-// CHECK: 	cvtpd2dq	32493, %xmm5
-        	cvtpd2dq	0x7eed,%xmm5
-
-// CHECK: 	cvtpd2dq	3133065982, %xmm5
-        	cvtpd2dq	0xbabecafe,%xmm5
-
-// CHECK: 	cvtpd2dq	305419896, %xmm5
-        	cvtpd2dq	0x12345678,%xmm5
-
-// CHECK: 	cvtpd2dq	%xmm5, %xmm5
-        	cvtpd2dq	%xmm5,%xmm5
-
-// CHECK: 	cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtdq2ps	69, %xmm5
-        	cvtdq2ps	0x45,%xmm5
-
-// CHECK: 	cvtdq2ps	32493, %xmm5
-        	cvtdq2ps	0x7eed,%xmm5
-
-// CHECK: 	cvtdq2ps	3133065982, %xmm5
-        	cvtdq2ps	0xbabecafe,%xmm5
-
-// CHECK: 	cvtdq2ps	305419896, %xmm5
-        	cvtdq2ps	0x12345678,%xmm5
-
-// CHECK: 	cvtdq2ps	%xmm5, %xmm5
-        	cvtdq2ps	%xmm5,%xmm5
-
-// CHECK: 	cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvtpd2pi	69, %mm3
-        	cvtpd2pi	0x45,%mm3
-
-// CHECK: 	cvtpd2pi	32493, %mm3
-        	cvtpd2pi	0x7eed,%mm3
-
-// CHECK: 	cvtpd2pi	3133065982, %mm3
-        	cvtpd2pi	0xbabecafe,%mm3
-
-// CHECK: 	cvtpd2pi	305419896, %mm3
-        	cvtpd2pi	0x12345678,%mm3
-
-// CHECK: 	cvtpd2pi	%xmm5, %mm3
-        	cvtpd2pi	%xmm5,%mm3
-
-// CHECK: 	cvtpd2ps	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtpd2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtpd2ps	69, %xmm5
-        	cvtpd2ps	0x45,%xmm5
-
-// CHECK: 	cvtpd2ps	32493, %xmm5
-        	cvtpd2ps	0x7eed,%xmm5
-
-// CHECK: 	cvtpd2ps	3133065982, %xmm5
-        	cvtpd2ps	0xbabecafe,%xmm5
-
-// CHECK: 	cvtpd2ps	305419896, %xmm5
-        	cvtpd2ps	0x12345678,%xmm5
-
-// CHECK: 	cvtpd2ps	%xmm5, %xmm5
-        	cvtpd2ps	%xmm5,%xmm5
-
-// CHECK: 	cvtps2pd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtps2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtps2pd	69, %xmm5
-        	cvtps2pd	0x45,%xmm5
-
-// CHECK: 	cvtps2pd	32493, %xmm5
-        	cvtps2pd	0x7eed,%xmm5
-
-// CHECK: 	cvtps2pd	3133065982, %xmm5
-        	cvtps2pd	0xbabecafe,%xmm5
-
-// CHECK: 	cvtps2pd	305419896, %xmm5
-        	cvtps2pd	0x12345678,%xmm5
-
-// CHECK: 	cvtps2pd	%xmm5, %xmm5
-        	cvtps2pd	%xmm5,%xmm5
-
-// CHECK: 	cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtps2dq	69, %xmm5
-        	cvtps2dq	0x45,%xmm5
-
-// CHECK: 	cvtps2dq	32493, %xmm5
-        	cvtps2dq	0x7eed,%xmm5
-
-// CHECK: 	cvtps2dq	3133065982, %xmm5
-        	cvtps2dq	0xbabecafe,%xmm5
-
-// CHECK: 	cvtps2dq	305419896, %xmm5
-        	cvtps2dq	0x12345678,%xmm5
-
-// CHECK: 	cvtps2dq	%xmm5, %xmm5
-        	cvtps2dq	%xmm5,%xmm5
-
-// CHECK: 	cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtsd2ss	69, %xmm5
-        	cvtsd2ss	0x45,%xmm5
-
-// CHECK: 	cvtsd2ss	32493, %xmm5
-        	cvtsd2ss	0x7eed,%xmm5
-
-// CHECK: 	cvtsd2ss	3133065982, %xmm5
-        	cvtsd2ss	0xbabecafe,%xmm5
-
-// CHECK: 	cvtsd2ss	305419896, %xmm5
-        	cvtsd2ss	0x12345678,%xmm5
-
-// CHECK: 	cvtsd2ss	%xmm5, %xmm5
-        	cvtsd2ss	%xmm5,%xmm5
-
-// CHECK: 	cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvtss2sd	69, %xmm5
-        	cvtss2sd	0x45,%xmm5
-
-// CHECK: 	cvtss2sd	32493, %xmm5
-        	cvtss2sd	0x7eed,%xmm5
-
-// CHECK: 	cvtss2sd	3133065982, %xmm5
-        	cvtss2sd	0xbabecafe,%xmm5
-
-// CHECK: 	cvtss2sd	305419896, %xmm5
-        	cvtss2sd	0x12345678,%xmm5
-
-// CHECK: 	cvtss2sd	%xmm5, %xmm5
-        	cvtss2sd	%xmm5,%xmm5
-
-// CHECK: 	cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
-        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	cvttpd2pi	69, %mm3
-        	cvttpd2pi	0x45,%mm3
-
-// CHECK: 	cvttpd2pi	32493, %mm3
-        	cvttpd2pi	0x7eed,%mm3
-
-// CHECK: 	cvttpd2pi	3133065982, %mm3
-        	cvttpd2pi	0xbabecafe,%mm3
-
-// CHECK: 	cvttpd2pi	305419896, %mm3
-        	cvttpd2pi	0x12345678,%mm3
-
-// CHECK: 	cvttpd2pi	%xmm5, %mm3
-        	cvttpd2pi	%xmm5,%mm3
-
-// CHECK: 	cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
-        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	cvttsd2si	69, %ecx
-        	cvttsd2si	0x45,%ecx
-
-// CHECK: 	cvttsd2si	32493, %ecx
-        	cvttsd2si	0x7eed,%ecx
-
-// CHECK: 	cvttsd2si	3133065982, %ecx
-        	cvttsd2si	0xbabecafe,%ecx
-
-// CHECK: 	cvttsd2si	305419896, %ecx
-        	cvttsd2si	0x12345678,%ecx
-
-// CHECK: 	cvttsd2si	%xmm5, %ecx
-        	cvttsd2si	%xmm5,%ecx
-
-// CHECK: 	cvttps2dq	3735928559(%ebx,%ecx,8), %xmm5
-        	cvttps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	cvttps2dq	69, %xmm5
-        	cvttps2dq	0x45,%xmm5
-
-// CHECK: 	cvttps2dq	32493, %xmm5
-        	cvttps2dq	0x7eed,%xmm5
-
-// CHECK: 	cvttps2dq	3133065982, %xmm5
-        	cvttps2dq	0xbabecafe,%xmm5
-
-// CHECK: 	cvttps2dq	305419896, %xmm5
-        	cvttps2dq	0x12345678,%xmm5
-
-// CHECK: 	cvttps2dq	%xmm5, %xmm5
-        	cvttps2dq	%xmm5,%xmm5
-
-// CHECK: 	maskmovdqu	%xmm5, %xmm5
-        	maskmovdqu	%xmm5,%xmm5
-
-// CHECK: 	movdqa	3735928559(%ebx,%ecx,8), %xmm5
-        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movdqa	69, %xmm5
-        	movdqa	0x45,%xmm5
-
-// CHECK: 	movdqa	32493, %xmm5
-        	movdqa	0x7eed,%xmm5
-
-// CHECK: 	movdqa	3133065982, %xmm5
-        	movdqa	0xbabecafe,%xmm5
-
-// CHECK: 	movdqa	305419896, %xmm5
-        	movdqa	0x12345678,%xmm5
-
-// CHECK: 	movdqa	%xmm5, %xmm5
-        	movdqa	%xmm5,%xmm5
-
-// CHECK: 	movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movdqa	%xmm5, 69
-        	movdqa	%xmm5,0x45
-
-// CHECK: 	movdqa	%xmm5, 32493
-        	movdqa	%xmm5,0x7eed
-
-// CHECK: 	movdqa	%xmm5, 3133065982
-        	movdqa	%xmm5,0xbabecafe
-
-// CHECK: 	movdqa	%xmm5, 305419896
-        	movdqa	%xmm5,0x12345678
-
-// CHECK: 	movdqa	%xmm5, %xmm5
-        	movdqa	%xmm5,%xmm5
-
-// CHECK: 	movdqu	3735928559(%ebx,%ecx,8), %xmm5
-        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movdqu	69, %xmm5
-        	movdqu	0x45,%xmm5
-
-// CHECK: 	movdqu	32493, %xmm5
-        	movdqu	0x7eed,%xmm5
-
-// CHECK: 	movdqu	3133065982, %xmm5
-        	movdqu	0xbabecafe,%xmm5
-
-// CHECK: 	movdqu	305419896, %xmm5
-        	movdqu	0x12345678,%xmm5
-
-// CHECK: 	movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
-        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	movdqu	%xmm5, 69
-        	movdqu	%xmm5,0x45
-
-// CHECK: 	movdqu	%xmm5, 32493
-        	movdqu	%xmm5,0x7eed
-
-// CHECK: 	movdqu	%xmm5, 3133065982
-        	movdqu	%xmm5,0xbabecafe
-
-// CHECK: 	movdqu	%xmm5, 305419896
-        	movdqu	%xmm5,0x12345678
-
-// CHECK: 	movdq2q	%xmm5, %mm3
-        	movdq2q	%xmm5,%mm3
-
-// CHECK: 	movq2dq	%mm3, %xmm5
-        	movq2dq	%mm3,%xmm5
-
-// CHECK: 	pmuludq	3735928559(%ebx,%ecx,8), %mm3
-        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmuludq	69, %mm3
-        	pmuludq	0x45,%mm3
-
-// CHECK: 	pmuludq	32493, %mm3
-        	pmuludq	0x7eed,%mm3
-
-// CHECK: 	pmuludq	3133065982, %mm3
-        	pmuludq	0xbabecafe,%mm3
-
-// CHECK: 	pmuludq	305419896, %mm3
-        	pmuludq	0x12345678,%mm3
-
-// CHECK: 	pmuludq	%mm3, %mm3
-        	pmuludq	%mm3,%mm3
-
-// CHECK: 	pmuludq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmuludq	69, %xmm5
-        	pmuludq	0x45,%xmm5
-
-// CHECK: 	pmuludq	32493, %xmm5
-        	pmuludq	0x7eed,%xmm5
-
-// CHECK: 	pmuludq	3133065982, %xmm5
-        	pmuludq	0xbabecafe,%xmm5
-
-// CHECK: 	pmuludq	305419896, %xmm5
-        	pmuludq	0x12345678,%xmm5
-
-// CHECK: 	pmuludq	%xmm5, %xmm5
-        	pmuludq	%xmm5,%xmm5
-
-// CHECK: 	pslldq	$127, %xmm5
-        	pslldq	$0x7f,%xmm5
-
-// CHECK: 	psrldq	$127, %xmm5
-        	psrldq	$0x7f,%xmm5
-
-// CHECK: 	punpckhqdq	3735928559(%ebx,%ecx,8), %xmm5
-        	punpckhqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpckhqdq	69, %xmm5
-        	punpckhqdq	0x45,%xmm5
-
-// CHECK: 	punpckhqdq	32493, %xmm5
-        	punpckhqdq	0x7eed,%xmm5
-
-// CHECK: 	punpckhqdq	3133065982, %xmm5
-        	punpckhqdq	0xbabecafe,%xmm5
-
-// CHECK: 	punpckhqdq	305419896, %xmm5
-        	punpckhqdq	0x12345678,%xmm5
-
-// CHECK: 	punpckhqdq	%xmm5, %xmm5
-        	punpckhqdq	%xmm5,%xmm5
-
-// CHECK: 	punpcklqdq	3735928559(%ebx,%ecx,8), %xmm5
-        	punpcklqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	punpcklqdq	69, %xmm5
-        	punpcklqdq	0x45,%xmm5
-
-// CHECK: 	punpcklqdq	32493, %xmm5
-        	punpcklqdq	0x7eed,%xmm5
-
-// CHECK: 	punpcklqdq	3133065982, %xmm5
-        	punpcklqdq	0xbabecafe,%xmm5
-
-// CHECK: 	punpcklqdq	305419896, %xmm5
-        	punpcklqdq	0x12345678,%xmm5
-
-// CHECK: 	punpcklqdq	%xmm5, %xmm5
-        	punpcklqdq	%xmm5,%xmm5
-
-// CHECK: 	addsubpd	3735928559(%ebx,%ecx,8), %xmm5
-        	addsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	addsubpd	69, %xmm5
-        	addsubpd	0x45,%xmm5
-
-// CHECK: 	addsubpd	32493, %xmm5
-        	addsubpd	0x7eed,%xmm5
-
-// CHECK: 	addsubpd	3133065982, %xmm5
-        	addsubpd	0xbabecafe,%xmm5
-
-// CHECK: 	addsubpd	305419896, %xmm5
-        	addsubpd	0x12345678,%xmm5
-
-// CHECK: 	addsubpd	%xmm5, %xmm5
-        	addsubpd	%xmm5,%xmm5
-
-// CHECK: 	addsubps	3735928559(%ebx,%ecx,8), %xmm5
-        	addsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	addsubps	69, %xmm5
-        	addsubps	0x45,%xmm5
-
-// CHECK: 	addsubps	32493, %xmm5
-        	addsubps	0x7eed,%xmm5
-
-// CHECK: 	addsubps	3133065982, %xmm5
-        	addsubps	0xbabecafe,%xmm5
-
-// CHECK: 	addsubps	305419896, %xmm5
-        	addsubps	0x12345678,%xmm5
-
-// CHECK: 	addsubps	%xmm5, %xmm5
-        	addsubps	%xmm5,%xmm5
-
-// CHECK: 	fisttpl	3735928559(%ebx,%ecx,8)
-        	fisttpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	fisttpl	3133065982
-        	fisttpl	0xbabecafe
-
-// CHECK: 	fisttpl	305419896
-        	fisttpl	0x12345678
-
-// CHECK: 	haddpd	3735928559(%ebx,%ecx,8), %xmm5
-        	haddpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	haddpd	69, %xmm5
-        	haddpd	0x45,%xmm5
-
-// CHECK: 	haddpd	32493, %xmm5
-        	haddpd	0x7eed,%xmm5
-
-// CHECK: 	haddpd	3133065982, %xmm5
-        	haddpd	0xbabecafe,%xmm5
-
-// CHECK: 	haddpd	305419896, %xmm5
-        	haddpd	0x12345678,%xmm5
-
-// CHECK: 	haddpd	%xmm5, %xmm5
-        	haddpd	%xmm5,%xmm5
-
-// CHECK: 	haddps	3735928559(%ebx,%ecx,8), %xmm5
-        	haddps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	haddps	69, %xmm5
-        	haddps	0x45,%xmm5
-
-// CHECK: 	haddps	32493, %xmm5
-        	haddps	0x7eed,%xmm5
-
-// CHECK: 	haddps	3133065982, %xmm5
-        	haddps	0xbabecafe,%xmm5
-
-// CHECK: 	haddps	305419896, %xmm5
-        	haddps	0x12345678,%xmm5
-
-// CHECK: 	haddps	%xmm5, %xmm5
-        	haddps	%xmm5,%xmm5
-
-// CHECK: 	hsubpd	3735928559(%ebx,%ecx,8), %xmm5
-        	hsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	hsubpd	69, %xmm5
-        	hsubpd	0x45,%xmm5
-
-// CHECK: 	hsubpd	32493, %xmm5
-        	hsubpd	0x7eed,%xmm5
-
-// CHECK: 	hsubpd	3133065982, %xmm5
-        	hsubpd	0xbabecafe,%xmm5
-
-// CHECK: 	hsubpd	305419896, %xmm5
-        	hsubpd	0x12345678,%xmm5
-
-// CHECK: 	hsubpd	%xmm5, %xmm5
-        	hsubpd	%xmm5,%xmm5
-
-// CHECK: 	hsubps	3735928559(%ebx,%ecx,8), %xmm5
-        	hsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	hsubps	69, %xmm5
-        	hsubps	0x45,%xmm5
-
-// CHECK: 	hsubps	32493, %xmm5
-        	hsubps	0x7eed,%xmm5
-
-// CHECK: 	hsubps	3133065982, %xmm5
-        	hsubps	0xbabecafe,%xmm5
-
-// CHECK: 	hsubps	305419896, %xmm5
-        	hsubps	0x12345678,%xmm5
-
-// CHECK: 	hsubps	%xmm5, %xmm5
-        	hsubps	%xmm5,%xmm5
-
-// CHECK: 	lddqu	3735928559(%ebx,%ecx,8), %xmm5
-        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	lddqu	69, %xmm5
-        	lddqu	0x45,%xmm5
-
-// CHECK: 	lddqu	32493, %xmm5
-        	lddqu	0x7eed,%xmm5
-
-// CHECK: 	lddqu	3133065982, %xmm5
-        	lddqu	0xbabecafe,%xmm5
-
-// CHECK: 	lddqu	305419896, %xmm5
-        	lddqu	0x12345678,%xmm5
-
-// CHECK: 	monitor
-        	monitor
-
-// CHECK: 	movddup	3735928559(%ebx,%ecx,8), %xmm5
-        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movddup	69, %xmm5
-        	movddup	0x45,%xmm5
-
-// CHECK: 	movddup	32493, %xmm5
-        	movddup	0x7eed,%xmm5
-
-// CHECK: 	movddup	3133065982, %xmm5
-        	movddup	0xbabecafe,%xmm5
-
-// CHECK: 	movddup	305419896, %xmm5
-        	movddup	0x12345678,%xmm5
-
-// CHECK: 	movddup	%xmm5, %xmm5
-        	movddup	%xmm5,%xmm5
-
-// CHECK: 	movshdup	3735928559(%ebx,%ecx,8), %xmm5
-        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movshdup	69, %xmm5
-        	movshdup	0x45,%xmm5
-
-// CHECK: 	movshdup	32493, %xmm5
-        	movshdup	0x7eed,%xmm5
-
-// CHECK: 	movshdup	3133065982, %xmm5
-        	movshdup	0xbabecafe,%xmm5
-
-// CHECK: 	movshdup	305419896, %xmm5
-        	movshdup	0x12345678,%xmm5
-
-// CHECK: 	movshdup	%xmm5, %xmm5
-        	movshdup	%xmm5,%xmm5
-
-// CHECK: 	movsldup	3735928559(%ebx,%ecx,8), %xmm5
-        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movsldup	69, %xmm5
-        	movsldup	0x45,%xmm5
-
-// CHECK: 	movsldup	32493, %xmm5
-        	movsldup	0x7eed,%xmm5
-
-// CHECK: 	movsldup	3133065982, %xmm5
-        	movsldup	0xbabecafe,%xmm5
-
-// CHECK: 	movsldup	305419896, %xmm5
-        	movsldup	0x12345678,%xmm5
-
-// CHECK: 	movsldup	%xmm5, %xmm5
-        	movsldup	%xmm5,%xmm5
-
-// CHECK: 	mwait
-        	mwait
-
-// CHECK: 	vmcall
-        	vmcall
-
-// CHECK: 	vmclear	3735928559(%ebx,%ecx,8)
-        	vmclear	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	vmclear	32493
-        	vmclear	0x7eed
-
-// CHECK: 	vmclear	3133065982
-        	vmclear	0xbabecafe
-
-// CHECK: 	vmclear	305419896
-        	vmclear	0x12345678
-
-// CHECK: 	vmlaunch
-        	vmlaunch
-
-// CHECK: 	vmresume
-        	vmresume
-
-// CHECK: 	vmptrld	3735928559(%ebx,%ecx,8)
-        	vmptrld	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	vmptrld	32493
-        	vmptrld	0x7eed
-
-// CHECK: 	vmptrld	3133065982
-        	vmptrld	0xbabecafe
-
-// CHECK: 	vmptrld	305419896
-        	vmptrld	0x12345678
-
-// CHECK: 	vmptrst	3735928559(%ebx,%ecx,8)
-        	vmptrst	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	vmptrst	32493
-        	vmptrst	0x7eed
-
-// CHECK: 	vmptrst	3133065982
-        	vmptrst	0xbabecafe
-
-// CHECK: 	vmptrst	305419896
-        	vmptrst	0x12345678
-
-// CHECK: 	vmxoff
-        	vmxoff
-
-// CHECK: 	vmxon	3735928559(%ebx,%ecx,8)
-        	vmxon	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: 	vmxon	32493
-        	vmxon	0x7eed
-
-// CHECK: 	vmxon	3133065982
-        	vmxon	0xbabecafe
-
-// CHECK: 	vmxon	305419896
-        	vmxon	0x12345678
-
-// CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %mm3
-        	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	phaddw	69, %mm3
-        	phaddw	0x45,%mm3
-
-// CHECK: 	phaddw	32493, %mm3
-        	phaddw	0x7eed,%mm3
-
-// CHECK: 	phaddw	3133065982, %mm3
-        	phaddw	0xbabecafe,%mm3
-
-// CHECK: 	phaddw	305419896, %mm3
-        	phaddw	0x12345678,%mm3
-
-// CHECK: 	phaddw	%mm3, %mm3
-        	phaddw	%mm3,%mm3
-
-// CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %xmm5
-        	phaddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phaddw	69, %xmm5
-        	phaddw	0x45,%xmm5
-
-// CHECK: 	phaddw	32493, %xmm5
-        	phaddw	0x7eed,%xmm5
-
-// CHECK: 	phaddw	3133065982, %xmm5
-        	phaddw	0xbabecafe,%xmm5
-
-// CHECK: 	phaddw	305419896, %xmm5
-        	phaddw	0x12345678,%xmm5
-
-// CHECK: 	phaddw	%xmm5, %xmm5
-        	phaddw	%xmm5,%xmm5
-
-// CHECK: 	phaddd	3735928559(%ebx,%ecx,8), %mm3
-        	phaddd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	phaddd	69, %mm3
-        	phaddd	0x45,%mm3
-
-// CHECK: 	phaddd	32493, %mm3
-        	phaddd	0x7eed,%mm3
-
-// CHECK: 	phaddd	3133065982, %mm3
-        	phaddd	0xbabecafe,%mm3
-
-// CHECK: 	phaddd	305419896, %mm3
-        	phaddd	0x12345678,%mm3
-
-// CHECK: 	phaddd	%mm3, %mm3
-        	phaddd	%mm3,%mm3
-
-// CHECK: 	phaddd	3735928559(%ebx,%ecx,8), %xmm5
-        	phaddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phaddd	69, %xmm5
-        	phaddd	0x45,%xmm5
-
-// CHECK: 	phaddd	32493, %xmm5
-        	phaddd	0x7eed,%xmm5
-
-// CHECK: 	phaddd	3133065982, %xmm5
-        	phaddd	0xbabecafe,%xmm5
-
-// CHECK: 	phaddd	305419896, %xmm5
-        	phaddd	0x12345678,%xmm5
-
-// CHECK: 	phaddd	%xmm5, %xmm5
-        	phaddd	%xmm5,%xmm5
-
-// CHECK: 	phaddsw	3735928559(%ebx,%ecx,8), %mm3
-        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	phaddsw	69, %mm3
-        	phaddsw	0x45,%mm3
-
-// CHECK: 	phaddsw	32493, %mm3
-        	phaddsw	0x7eed,%mm3
-
-// CHECK: 	phaddsw	3133065982, %mm3
-        	phaddsw	0xbabecafe,%mm3
-
-// CHECK: 	phaddsw	305419896, %mm3
-        	phaddsw	0x12345678,%mm3
-
-// CHECK: 	phaddsw	%mm3, %mm3
-        	phaddsw	%mm3,%mm3
-
-// CHECK: 	phaddsw	3735928559(%ebx,%ecx,8), %xmm5
-        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phaddsw	69, %xmm5
-        	phaddsw	0x45,%xmm5
-
-// CHECK: 	phaddsw	32493, %xmm5
-        	phaddsw	0x7eed,%xmm5
-
-// CHECK: 	phaddsw	3133065982, %xmm5
-        	phaddsw	0xbabecafe,%xmm5
-
-// CHECK: 	phaddsw	305419896, %xmm5
-        	phaddsw	0x12345678,%xmm5
-
-// CHECK: 	phaddsw	%xmm5, %xmm5
-        	phaddsw	%xmm5,%xmm5
-
-// CHECK: 	phsubw	3735928559(%ebx,%ecx,8), %mm3
-        	phsubw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	phsubw	69, %mm3
-        	phsubw	0x45,%mm3
-
-// CHECK: 	phsubw	32493, %mm3
-        	phsubw	0x7eed,%mm3
-
-// CHECK: 	phsubw	3133065982, %mm3
-        	phsubw	0xbabecafe,%mm3
-
-// CHECK: 	phsubw	305419896, %mm3
-        	phsubw	0x12345678,%mm3
-
-// CHECK: 	phsubw	%mm3, %mm3
-        	phsubw	%mm3,%mm3
-
-// CHECK: 	phsubw	3735928559(%ebx,%ecx,8), %xmm5
-        	phsubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phsubw	69, %xmm5
-        	phsubw	0x45,%xmm5
-
-// CHECK: 	phsubw	32493, %xmm5
-        	phsubw	0x7eed,%xmm5
-
-// CHECK: 	phsubw	3133065982, %xmm5
-        	phsubw	0xbabecafe,%xmm5
-
-// CHECK: 	phsubw	305419896, %xmm5
-        	phsubw	0x12345678,%xmm5
-
-// CHECK: 	phsubw	%xmm5, %xmm5
-        	phsubw	%xmm5,%xmm5
-
-// CHECK: 	phsubd	3735928559(%ebx,%ecx,8), %mm3
-        	phsubd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	phsubd	69, %mm3
-        	phsubd	0x45,%mm3
-
-// CHECK: 	phsubd	32493, %mm3
-        	phsubd	0x7eed,%mm3
-
-// CHECK: 	phsubd	3133065982, %mm3
-        	phsubd	0xbabecafe,%mm3
-
-// CHECK: 	phsubd	305419896, %mm3
-        	phsubd	0x12345678,%mm3
-
-// CHECK: 	phsubd	%mm3, %mm3
-        	phsubd	%mm3,%mm3
-
-// CHECK: 	phsubd	3735928559(%ebx,%ecx,8), %xmm5
-        	phsubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phsubd	69, %xmm5
-        	phsubd	0x45,%xmm5
-
-// CHECK: 	phsubd	32493, %xmm5
-        	phsubd	0x7eed,%xmm5
-
-// CHECK: 	phsubd	3133065982, %xmm5
-        	phsubd	0xbabecafe,%xmm5
-
-// CHECK: 	phsubd	305419896, %xmm5
-        	phsubd	0x12345678,%xmm5
-
-// CHECK: 	phsubd	%xmm5, %xmm5
-        	phsubd	%xmm5,%xmm5
-
-// CHECK: 	phsubsw	3735928559(%ebx,%ecx,8), %mm3
-        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	phsubsw	69, %mm3
-        	phsubsw	0x45,%mm3
-
-// CHECK: 	phsubsw	32493, %mm3
-        	phsubsw	0x7eed,%mm3
-
-// CHECK: 	phsubsw	3133065982, %mm3
-        	phsubsw	0xbabecafe,%mm3
-
-// CHECK: 	phsubsw	305419896, %mm3
-        	phsubsw	0x12345678,%mm3
-
-// CHECK: 	phsubsw	%mm3, %mm3
-        	phsubsw	%mm3,%mm3
-
-// CHECK: 	phsubsw	3735928559(%ebx,%ecx,8), %xmm5
-        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phsubsw	69, %xmm5
-        	phsubsw	0x45,%xmm5
-
-// CHECK: 	phsubsw	32493, %xmm5
-        	phsubsw	0x7eed,%xmm5
-
-// CHECK: 	phsubsw	3133065982, %xmm5
-        	phsubsw	0xbabecafe,%xmm5
-
-// CHECK: 	phsubsw	305419896, %xmm5
-        	phsubsw	0x12345678,%xmm5
-
-// CHECK: 	phsubsw	%xmm5, %xmm5
-        	phsubsw	%xmm5,%xmm5
-
-// CHECK: 	pmaddubsw	3735928559(%ebx,%ecx,8), %mm3
-        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmaddubsw	69, %mm3
-        	pmaddubsw	0x45,%mm3
-
-// CHECK: 	pmaddubsw	32493, %mm3
-        	pmaddubsw	0x7eed,%mm3
-
-// CHECK: 	pmaddubsw	3133065982, %mm3
-        	pmaddubsw	0xbabecafe,%mm3
-
-// CHECK: 	pmaddubsw	305419896, %mm3
-        	pmaddubsw	0x12345678,%mm3
-
-// CHECK: 	pmaddubsw	%mm3, %mm3
-        	pmaddubsw	%mm3,%mm3
-
-// CHECK: 	pmaddubsw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaddubsw	69, %xmm5
-        	pmaddubsw	0x45,%xmm5
-
-// CHECK: 	pmaddubsw	32493, %xmm5
-        	pmaddubsw	0x7eed,%xmm5
-
-// CHECK: 	pmaddubsw	3133065982, %xmm5
-        	pmaddubsw	0xbabecafe,%xmm5
-
-// CHECK: 	pmaddubsw	305419896, %xmm5
-        	pmaddubsw	0x12345678,%xmm5
-
-// CHECK: 	pmaddubsw	%xmm5, %xmm5
-        	pmaddubsw	%xmm5,%xmm5
-
-// CHECK: 	pmulhrsw	3735928559(%ebx,%ecx,8), %mm3
-        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pmulhrsw	69, %mm3
-        	pmulhrsw	0x45,%mm3
-
-// CHECK: 	pmulhrsw	32493, %mm3
-        	pmulhrsw	0x7eed,%mm3
-
-// CHECK: 	pmulhrsw	3133065982, %mm3
-        	pmulhrsw	0xbabecafe,%mm3
-
-// CHECK: 	pmulhrsw	305419896, %mm3
-        	pmulhrsw	0x12345678,%mm3
-
-// CHECK: 	pmulhrsw	%mm3, %mm3
-        	pmulhrsw	%mm3,%mm3
-
-// CHECK: 	pmulhrsw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmulhrsw	69, %xmm5
-        	pmulhrsw	0x45,%xmm5
-
-// CHECK: 	pmulhrsw	32493, %xmm5
-        	pmulhrsw	0x7eed,%xmm5
-
-// CHECK: 	pmulhrsw	3133065982, %xmm5
-        	pmulhrsw	0xbabecafe,%xmm5
-
-// CHECK: 	pmulhrsw	305419896, %xmm5
-        	pmulhrsw	0x12345678,%xmm5
-
-// CHECK: 	pmulhrsw	%xmm5, %xmm5
-        	pmulhrsw	%xmm5,%xmm5
-
-// CHECK: 	pshufb	3735928559(%ebx,%ecx,8), %mm3
-        	pshufb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pshufb	69, %mm3
-        	pshufb	0x45,%mm3
-
-// CHECK: 	pshufb	32493, %mm3
-        	pshufb	0x7eed,%mm3
-
-// CHECK: 	pshufb	3133065982, %mm3
-        	pshufb	0xbabecafe,%mm3
-
-// CHECK: 	pshufb	305419896, %mm3
-        	pshufb	0x12345678,%mm3
-
-// CHECK: 	pshufb	%mm3, %mm3
-        	pshufb	%mm3,%mm3
-
-// CHECK: 	pshufb	3735928559(%ebx,%ecx,8), %xmm5
-        	pshufb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pshufb	69, %xmm5
-        	pshufb	0x45,%xmm5
-
-// CHECK: 	pshufb	32493, %xmm5
-        	pshufb	0x7eed,%xmm5
-
-// CHECK: 	pshufb	3133065982, %xmm5
-        	pshufb	0xbabecafe,%xmm5
-
-// CHECK: 	pshufb	305419896, %xmm5
-        	pshufb	0x12345678,%xmm5
-
-// CHECK: 	pshufb	%xmm5, %xmm5
-        	pshufb	%xmm5,%xmm5
-
-// CHECK: 	psignb	3735928559(%ebx,%ecx,8), %mm3
-        	psignb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psignb	69, %mm3
-        	psignb	0x45,%mm3
-
-// CHECK: 	psignb	32493, %mm3
-        	psignb	0x7eed,%mm3
-
-// CHECK: 	psignb	3133065982, %mm3
-        	psignb	0xbabecafe,%mm3
-
-// CHECK: 	psignb	305419896, %mm3
-        	psignb	0x12345678,%mm3
-
-// CHECK: 	psignb	%mm3, %mm3
-        	psignb	%mm3,%mm3
-
-// CHECK: 	psignb	3735928559(%ebx,%ecx,8), %xmm5
-        	psignb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psignb	69, %xmm5
-        	psignb	0x45,%xmm5
-
-// CHECK: 	psignb	32493, %xmm5
-        	psignb	0x7eed,%xmm5
-
-// CHECK: 	psignb	3133065982, %xmm5
-        	psignb	0xbabecafe,%xmm5
-
-// CHECK: 	psignb	305419896, %xmm5
-        	psignb	0x12345678,%xmm5
-
-// CHECK: 	psignb	%xmm5, %xmm5
-        	psignb	%xmm5,%xmm5
-
-// CHECK: 	psignw	3735928559(%ebx,%ecx,8), %mm3
-        	psignw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psignw	69, %mm3
-        	psignw	0x45,%mm3
-
-// CHECK: 	psignw	32493, %mm3
-        	psignw	0x7eed,%mm3
-
-// CHECK: 	psignw	3133065982, %mm3
-        	psignw	0xbabecafe,%mm3
-
-// CHECK: 	psignw	305419896, %mm3
-        	psignw	0x12345678,%mm3
-
-// CHECK: 	psignw	%mm3, %mm3
-        	psignw	%mm3,%mm3
-
-// CHECK: 	psignw	3735928559(%ebx,%ecx,8), %xmm5
-        	psignw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psignw	69, %xmm5
-        	psignw	0x45,%xmm5
-
-// CHECK: 	psignw	32493, %xmm5
-        	psignw	0x7eed,%xmm5
-
-// CHECK: 	psignw	3133065982, %xmm5
-        	psignw	0xbabecafe,%xmm5
-
-// CHECK: 	psignw	305419896, %xmm5
-        	psignw	0x12345678,%xmm5
-
-// CHECK: 	psignw	%xmm5, %xmm5
-        	psignw	%xmm5,%xmm5
-
-// CHECK: 	psignd	3735928559(%ebx,%ecx,8), %mm3
-        	psignd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	psignd	69, %mm3
-        	psignd	0x45,%mm3
-
-// CHECK: 	psignd	32493, %mm3
-        	psignd	0x7eed,%mm3
-
-// CHECK: 	psignd	3133065982, %mm3
-        	psignd	0xbabecafe,%mm3
-
-// CHECK: 	psignd	305419896, %mm3
-        	psignd	0x12345678,%mm3
-
-// CHECK: 	psignd	%mm3, %mm3
-        	psignd	%mm3,%mm3
-
-// CHECK: 	psignd	3735928559(%ebx,%ecx,8), %xmm5
-        	psignd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	psignd	69, %xmm5
-        	psignd	0x45,%xmm5
-
-// CHECK: 	psignd	32493, %xmm5
-        	psignd	0x7eed,%xmm5
-
-// CHECK: 	psignd	3133065982, %xmm5
-        	psignd	0xbabecafe,%xmm5
-
-// CHECK: 	psignd	305419896, %xmm5
-        	psignd	0x12345678,%xmm5
-
-// CHECK: 	psignd	%xmm5, %xmm5
-        	psignd	%xmm5,%xmm5
-
-// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %mm3
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pabsb	69, %mm3
-        	pabsb	0x45,%mm3
-
-// CHECK: 	pabsb	32493, %mm3
-        	pabsb	0x7eed,%mm3
-
-// CHECK: 	pabsb	3133065982, %mm3
-        	pabsb	0xbabecafe,%mm3
-
-// CHECK: 	pabsb	305419896, %mm3
-        	pabsb	0x12345678,%mm3
-
-// CHECK: 	pabsb	%mm3, %mm3
-        	pabsb	%mm3,%mm3
-
-// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %xmm5
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pabsb	69, %xmm5
-        	pabsb	0x45,%xmm5
-
-// CHECK: 	pabsb	32493, %xmm5
-        	pabsb	0x7eed,%xmm5
-
-// CHECK: 	pabsb	3133065982, %xmm5
-        	pabsb	0xbabecafe,%xmm5
-
-// CHECK: 	pabsb	305419896, %xmm5
-        	pabsb	0x12345678,%xmm5
-
-// CHECK: 	pabsb	%xmm5, %xmm5
-        	pabsb	%xmm5,%xmm5
-
-// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %mm3
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pabsw	69, %mm3
-        	pabsw	0x45,%mm3
-
-// CHECK: 	pabsw	32493, %mm3
-        	pabsw	0x7eed,%mm3
-
-// CHECK: 	pabsw	3133065982, %mm3
-        	pabsw	0xbabecafe,%mm3
-
-// CHECK: 	pabsw	305419896, %mm3
-        	pabsw	0x12345678,%mm3
-
-// CHECK: 	pabsw	%mm3, %mm3
-        	pabsw	%mm3,%mm3
-
-// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %xmm5
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pabsw	69, %xmm5
-        	pabsw	0x45,%xmm5
-
-// CHECK: 	pabsw	32493, %xmm5
-        	pabsw	0x7eed,%xmm5
-
-// CHECK: 	pabsw	3133065982, %xmm5
-        	pabsw	0xbabecafe,%xmm5
-
-// CHECK: 	pabsw	305419896, %xmm5
-        	pabsw	0x12345678,%xmm5
-
-// CHECK: 	pabsw	%xmm5, %xmm5
-        	pabsw	%xmm5,%xmm5
-
-// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %mm3
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: 	pabsd	69, %mm3
-        	pabsd	0x45,%mm3
-
-// CHECK: 	pabsd	32493, %mm3
-        	pabsd	0x7eed,%mm3
-
-// CHECK: 	pabsd	3133065982, %mm3
-        	pabsd	0xbabecafe,%mm3
-
-// CHECK: 	pabsd	305419896, %mm3
-        	pabsd	0x12345678,%mm3
-
-// CHECK: 	pabsd	%mm3, %mm3
-        	pabsd	%mm3,%mm3
-
-// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %xmm5
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pabsd	69, %xmm5
-        	pabsd	0x45,%xmm5
-
-// CHECK: 	pabsd	32493, %xmm5
-        	pabsd	0x7eed,%xmm5
-
-// CHECK: 	pabsd	3133065982, %xmm5
-        	pabsd	0xbabecafe,%xmm5
-
-// CHECK: 	pabsd	305419896, %xmm5
-        	pabsd	0x12345678,%xmm5
-
-// CHECK: 	pabsd	%xmm5, %xmm5
-        	pabsd	%xmm5,%xmm5
-
-// CHECK: 	femms
-        	femms
-
-// CHECK: 	movntdqa	3735928559(%ebx,%ecx,8), %xmm5
-        	movntdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	movntdqa	69, %xmm5
-        	movntdqa	0x45,%xmm5
-
-// CHECK: 	movntdqa	32493, %xmm5
-        	movntdqa	0x7eed,%xmm5
-
-// CHECK: 	movntdqa	3133065982, %xmm5
-        	movntdqa	0xbabecafe,%xmm5
-
-// CHECK: 	movntdqa	305419896, %xmm5
-        	movntdqa	0x12345678,%xmm5
-
-// CHECK: 	packusdw	3735928559(%ebx,%ecx,8), %xmm5
-        	packusdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	packusdw	69, %xmm5
-        	packusdw	0x45,%xmm5
-
-// CHECK: 	packusdw	32493, %xmm5
-        	packusdw	0x7eed,%xmm5
-
-// CHECK: 	packusdw	3133065982, %xmm5
-        	packusdw	0xbabecafe,%xmm5
-
-// CHECK: 	packusdw	305419896, %xmm5
-        	packusdw	0x12345678,%xmm5
-
-// CHECK: 	packusdw	%xmm5, %xmm5
-        	packusdw	%xmm5,%xmm5
-
-// CHECK: 	pcmpeqq	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpeqq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpeqq	69, %xmm5
-        	pcmpeqq	0x45,%xmm5
-
-// CHECK: 	pcmpeqq	32493, %xmm5
-        	pcmpeqq	0x7eed,%xmm5
-
-// CHECK: 	pcmpeqq	3133065982, %xmm5
-        	pcmpeqq	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpeqq	305419896, %xmm5
-        	pcmpeqq	0x12345678,%xmm5
-
-// CHECK: 	pcmpeqq	%xmm5, %xmm5
-        	pcmpeqq	%xmm5,%xmm5
-
-// CHECK: 	phminposuw	3735928559(%ebx,%ecx,8), %xmm5
-        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	phminposuw	69, %xmm5
-        	phminposuw	0x45,%xmm5
-
-// CHECK: 	phminposuw	32493, %xmm5
-        	phminposuw	0x7eed,%xmm5
-
-// CHECK: 	phminposuw	3133065982, %xmm5
-        	phminposuw	0xbabecafe,%xmm5
-
-// CHECK: 	phminposuw	305419896, %xmm5
-        	phminposuw	0x12345678,%xmm5
-
-// CHECK: 	phminposuw	%xmm5, %xmm5
-        	phminposuw	%xmm5,%xmm5
-
-// CHECK: 	pmaxsb	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaxsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaxsb	69, %xmm5
-        	pmaxsb	0x45,%xmm5
-
-// CHECK: 	pmaxsb	32493, %xmm5
-        	pmaxsb	0x7eed,%xmm5
-
-// CHECK: 	pmaxsb	3133065982, %xmm5
-        	pmaxsb	0xbabecafe,%xmm5
-
-// CHECK: 	pmaxsb	305419896, %xmm5
-        	pmaxsb	0x12345678,%xmm5
-
-// CHECK: 	pmaxsb	%xmm5, %xmm5
-        	pmaxsb	%xmm5,%xmm5
-
-// CHECK: 	pmaxsd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaxsd	69, %xmm5
-        	pmaxsd	0x45,%xmm5
-
-// CHECK: 	pmaxsd	32493, %xmm5
-        	pmaxsd	0x7eed,%xmm5
-
-// CHECK: 	pmaxsd	3133065982, %xmm5
-        	pmaxsd	0xbabecafe,%xmm5
-
-// CHECK: 	pmaxsd	305419896, %xmm5
-        	pmaxsd	0x12345678,%xmm5
-
-// CHECK: 	pmaxsd	%xmm5, %xmm5
-        	pmaxsd	%xmm5,%xmm5
-
-// CHECK: 	pmaxud	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaxud	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaxud	69, %xmm5
-        	pmaxud	0x45,%xmm5
-
-// CHECK: 	pmaxud	32493, %xmm5
-        	pmaxud	0x7eed,%xmm5
-
-// CHECK: 	pmaxud	3133065982, %xmm5
-        	pmaxud	0xbabecafe,%xmm5
-
-// CHECK: 	pmaxud	305419896, %xmm5
-        	pmaxud	0x12345678,%xmm5
-
-// CHECK: 	pmaxud	%xmm5, %xmm5
-        	pmaxud	%xmm5,%xmm5
-
-// CHECK: 	pmaxuw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmaxuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmaxuw	69, %xmm5
-        	pmaxuw	0x45,%xmm5
-
-// CHECK: 	pmaxuw	32493, %xmm5
-        	pmaxuw	0x7eed,%xmm5
-
-// CHECK: 	pmaxuw	3133065982, %xmm5
-        	pmaxuw	0xbabecafe,%xmm5
-
-// CHECK: 	pmaxuw	305419896, %xmm5
-        	pmaxuw	0x12345678,%xmm5
-
-// CHECK: 	pmaxuw	%xmm5, %xmm5
-        	pmaxuw	%xmm5,%xmm5
-
-// CHECK: 	pminsb	3735928559(%ebx,%ecx,8), %xmm5
-        	pminsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pminsb	69, %xmm5
-        	pminsb	0x45,%xmm5
-
-// CHECK: 	pminsb	32493, %xmm5
-        	pminsb	0x7eed,%xmm5
-
-// CHECK: 	pminsb	3133065982, %xmm5
-        	pminsb	0xbabecafe,%xmm5
-
-// CHECK: 	pminsb	305419896, %xmm5
-        	pminsb	0x12345678,%xmm5
-
-// CHECK: 	pminsb	%xmm5, %xmm5
-        	pminsb	%xmm5,%xmm5
-
-// CHECK: 	pminsd	3735928559(%ebx,%ecx,8), %xmm5
-        	pminsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pminsd	69, %xmm5
-        	pminsd	0x45,%xmm5
-
-// CHECK: 	pminsd	32493, %xmm5
-        	pminsd	0x7eed,%xmm5
-
-// CHECK: 	pminsd	3133065982, %xmm5
-        	pminsd	0xbabecafe,%xmm5
-
-// CHECK: 	pminsd	305419896, %xmm5
-        	pminsd	0x12345678,%xmm5
-
-// CHECK: 	pminsd	%xmm5, %xmm5
-        	pminsd	%xmm5,%xmm5
-
-// CHECK: 	pminud	3735928559(%ebx,%ecx,8), %xmm5
-        	pminud	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pminud	69, %xmm5
-        	pminud	0x45,%xmm5
-
-// CHECK: 	pminud	32493, %xmm5
-        	pminud	0x7eed,%xmm5
-
-// CHECK: 	pminud	3133065982, %xmm5
-        	pminud	0xbabecafe,%xmm5
-
-// CHECK: 	pminud	305419896, %xmm5
-        	pminud	0x12345678,%xmm5
-
-// CHECK: 	pminud	%xmm5, %xmm5
-        	pminud	%xmm5,%xmm5
-
-// CHECK: 	pminuw	3735928559(%ebx,%ecx,8), %xmm5
-        	pminuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pminuw	69, %xmm5
-        	pminuw	0x45,%xmm5
-
-// CHECK: 	pminuw	32493, %xmm5
-        	pminuw	0x7eed,%xmm5
-
-// CHECK: 	pminuw	3133065982, %xmm5
-        	pminuw	0xbabecafe,%xmm5
-
-// CHECK: 	pminuw	305419896, %xmm5
-        	pminuw	0x12345678,%xmm5
-
-// CHECK: 	pminuw	%xmm5, %xmm5
-        	pminuw	%xmm5,%xmm5
-
-// CHECK: 	pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxbw	69, %xmm5
-        	pmovsxbw	0x45,%xmm5
-
-// CHECK: 	pmovsxbw	32493, %xmm5
-        	pmovsxbw	0x7eed,%xmm5
-
-// CHECK: 	pmovsxbw	3133065982, %xmm5
-        	pmovsxbw	0xbabecafe,%xmm5
-
-// CHECK: 	pmovsxbw	305419896, %xmm5
-        	pmovsxbw	0x12345678,%xmm5
-
-// CHECK: 	pmovsxbw	%xmm5, %xmm5
-        	pmovsxbw	%xmm5,%xmm5
-
-// CHECK: 	pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxbd	69, %xmm5
-        	pmovsxbd	0x45,%xmm5
-
-// CHECK: 	pmovsxbd	32493, %xmm5
-        	pmovsxbd	0x7eed,%xmm5
-
-// CHECK: 	pmovsxbd	3133065982, %xmm5
-        	pmovsxbd	0xbabecafe,%xmm5
-
-// CHECK: 	pmovsxbd	305419896, %xmm5
-        	pmovsxbd	0x12345678,%xmm5
-
-// CHECK: 	pmovsxbd	%xmm5, %xmm5
-        	pmovsxbd	%xmm5,%xmm5
-
-// CHECK: 	pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxbq	69, %xmm5
-        	pmovsxbq	0x45,%xmm5
-
-// CHECK: 	pmovsxbq	32493, %xmm5
-        	pmovsxbq	0x7eed,%xmm5
-
-// CHECK: 	pmovsxbq	3133065982, %xmm5
-        	pmovsxbq	0xbabecafe,%xmm5
-
-// CHECK: 	pmovsxbq	305419896, %xmm5
-        	pmovsxbq	0x12345678,%xmm5
-
-// CHECK: 	pmovsxbq	%xmm5, %xmm5
-        	pmovsxbq	%xmm5,%xmm5
-
-// CHECK: 	pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxwd	69, %xmm5
-        	pmovsxwd	0x45,%xmm5
-
-// CHECK: 	pmovsxwd	32493, %xmm5
-        	pmovsxwd	0x7eed,%xmm5
-
-// CHECK: 	pmovsxwd	3133065982, %xmm5
-        	pmovsxwd	0xbabecafe,%xmm5
-
-// CHECK: 	pmovsxwd	305419896, %xmm5
-        	pmovsxwd	0x12345678,%xmm5
-
-// CHECK: 	pmovsxwd	%xmm5, %xmm5
-        	pmovsxwd	%xmm5,%xmm5
-
-// CHECK: 	pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxwq	69, %xmm5
-        	pmovsxwq	0x45,%xmm5
-
-// CHECK: 	pmovsxwq	32493, %xmm5
-        	pmovsxwq	0x7eed,%xmm5
-
-// CHECK: 	pmovsxwq	3133065982, %xmm5
-        	pmovsxwq	0xbabecafe,%xmm5
-
-// CHECK: 	pmovsxwq	305419896, %xmm5
-        	pmovsxwq	0x12345678,%xmm5
-
-// CHECK: 	pmovsxwq	%xmm5, %xmm5
-        	pmovsxwq	%xmm5,%xmm5
-
-// CHECK: 	pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovsxdq	69, %xmm5
-        	pmovsxdq	0x45,%xmm5
-
-// CHECK: 	pmovsxdq	32493, %xmm5
-        	pmovsxdq	0x7eed,%xmm5
-
-// CHECK: 	pmovsxdq	3133065982, %xmm5
-        	pmovsxdq	0xbabecafe,%xmm5
-
-// CHECK: 	pmovsxdq	305419896, %xmm5
-        	pmovsxdq	0x12345678,%xmm5
-
-// CHECK: 	pmovsxdq	%xmm5, %xmm5
-        	pmovsxdq	%xmm5,%xmm5
-
-// CHECK: 	pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxbw	69, %xmm5
-        	pmovzxbw	0x45,%xmm5
-
-// CHECK: 	pmovzxbw	32493, %xmm5
-        	pmovzxbw	0x7eed,%xmm5
-
-// CHECK: 	pmovzxbw	3133065982, %xmm5
-        	pmovzxbw	0xbabecafe,%xmm5
-
-// CHECK: 	pmovzxbw	305419896, %xmm5
-        	pmovzxbw	0x12345678,%xmm5
-
-// CHECK: 	pmovzxbw	%xmm5, %xmm5
-        	pmovzxbw	%xmm5,%xmm5
-
-// CHECK: 	pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxbd	69, %xmm5
-        	pmovzxbd	0x45,%xmm5
-
-// CHECK: 	pmovzxbd	32493, %xmm5
-        	pmovzxbd	0x7eed,%xmm5
-
-// CHECK: 	pmovzxbd	3133065982, %xmm5
-        	pmovzxbd	0xbabecafe,%xmm5
-
-// CHECK: 	pmovzxbd	305419896, %xmm5
-        	pmovzxbd	0x12345678,%xmm5
-
-// CHECK: 	pmovzxbd	%xmm5, %xmm5
-        	pmovzxbd	%xmm5,%xmm5
-
-// CHECK: 	pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxbq	69, %xmm5
-        	pmovzxbq	0x45,%xmm5
-
-// CHECK: 	pmovzxbq	32493, %xmm5
-        	pmovzxbq	0x7eed,%xmm5
-
-// CHECK: 	pmovzxbq	3133065982, %xmm5
-        	pmovzxbq	0xbabecafe,%xmm5
-
-// CHECK: 	pmovzxbq	305419896, %xmm5
-        	pmovzxbq	0x12345678,%xmm5
-
-// CHECK: 	pmovzxbq	%xmm5, %xmm5
-        	pmovzxbq	%xmm5,%xmm5
-
-// CHECK: 	pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxwd	69, %xmm5
-        	pmovzxwd	0x45,%xmm5
-
-// CHECK: 	pmovzxwd	32493, %xmm5
-        	pmovzxwd	0x7eed,%xmm5
-
-// CHECK: 	pmovzxwd	3133065982, %xmm5
-        	pmovzxwd	0xbabecafe,%xmm5
-
-// CHECK: 	pmovzxwd	305419896, %xmm5
-        	pmovzxwd	0x12345678,%xmm5
-
-// CHECK: 	pmovzxwd	%xmm5, %xmm5
-        	pmovzxwd	%xmm5,%xmm5
-
-// CHECK: 	pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxwq	69, %xmm5
-        	pmovzxwq	0x45,%xmm5
-
-// CHECK: 	pmovzxwq	32493, %xmm5
-        	pmovzxwq	0x7eed,%xmm5
-
-// CHECK: 	pmovzxwq	3133065982, %xmm5
-        	pmovzxwq	0xbabecafe,%xmm5
-
-// CHECK: 	pmovzxwq	305419896, %xmm5
-        	pmovzxwq	0x12345678,%xmm5
-
-// CHECK: 	pmovzxwq	%xmm5, %xmm5
-        	pmovzxwq	%xmm5,%xmm5
-
-// CHECK: 	pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmovzxdq	69, %xmm5
-        	pmovzxdq	0x45,%xmm5
-
-// CHECK: 	pmovzxdq	32493, %xmm5
-        	pmovzxdq	0x7eed,%xmm5
-
-// CHECK: 	pmovzxdq	3133065982, %xmm5
-        	pmovzxdq	0xbabecafe,%xmm5
-
-// CHECK: 	pmovzxdq	305419896, %xmm5
-        	pmovzxdq	0x12345678,%xmm5
-
-// CHECK: 	pmovzxdq	%xmm5, %xmm5
-        	pmovzxdq	%xmm5,%xmm5
-
-// CHECK: 	pmuldq	3735928559(%ebx,%ecx,8), %xmm5
-        	pmuldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmuldq	69, %xmm5
-        	pmuldq	0x45,%xmm5
-
-// CHECK: 	pmuldq	32493, %xmm5
-        	pmuldq	0x7eed,%xmm5
-
-// CHECK: 	pmuldq	3133065982, %xmm5
-        	pmuldq	0xbabecafe,%xmm5
-
-// CHECK: 	pmuldq	305419896, %xmm5
-        	pmuldq	0x12345678,%xmm5
-
-// CHECK: 	pmuldq	%xmm5, %xmm5
-        	pmuldq	%xmm5,%xmm5
-
-// CHECK: 	pmulld	3735928559(%ebx,%ecx,8), %xmm5
-        	pmulld	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pmulld	69, %xmm5
-        	pmulld	0x45,%xmm5
-
-// CHECK: 	pmulld	32493, %xmm5
-        	pmulld	0x7eed,%xmm5
-
-// CHECK: 	pmulld	3133065982, %xmm5
-        	pmulld	0xbabecafe,%xmm5
-
-// CHECK: 	pmulld	305419896, %xmm5
-        	pmulld	0x12345678,%xmm5
-
-// CHECK: 	pmulld	%xmm5, %xmm5
-        	pmulld	%xmm5,%xmm5
-
-// CHECK: 	ptest 	3735928559(%ebx,%ecx,8), %xmm5
-        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	ptest 	69, %xmm5
-        	ptest	0x45,%xmm5
-
-// CHECK: 	ptest 	32493, %xmm5
-        	ptest	0x7eed,%xmm5
-
-// CHECK: 	ptest 	3133065982, %xmm5
-        	ptest	0xbabecafe,%xmm5
-
-// CHECK: 	ptest 	305419896, %xmm5
-        	ptest	0x12345678,%xmm5
-
-// CHECK: 	ptest 	%xmm5, %xmm5
-        	ptest	%xmm5,%xmm5
-
-// CHECK: 	crc32b 	%bl, %eax
-                crc32b %bl, %eax
-
-// CHECK: 	crc32b 	4(%ebx), %eax
-                crc32b 4(%ebx), %eax
-
-// CHECK: 	crc32w 	%bx, %eax
-                crc32w %bx, %eax
-
-// CHECK: 	crc32w 	4(%ebx), %eax
-                crc32w 4(%ebx), %eax
-
-// CHECK: 	crc32l 	%ebx, %eax
-                crc32l %ebx, %eax
-
-// CHECK: 	crc32l 	4(%ebx), %eax
-                crc32l 4(%ebx), %eax
-
-// CHECK: 	crc32l 	3735928559(%ebx,%ecx,8), %ecx
-                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: 	crc32l 	69, %ecx
-                crc32l 0x45,%ecx
-
-// CHECK: 	crc32l 	32493, %ecx
-                crc32l 0x7eed,%ecx
-
-// CHECK: 	crc32l 	3133065982, %ecx
-                crc32l 0xbabecafe,%ecx
-
-// CHECK: 	crc32l 	%ecx, %ecx
-                crc32l %ecx,%ecx
-
-// CHECK: 	pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
-        	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: 	pcmpgtq	69, %xmm5
-        	pcmpgtq	0x45,%xmm5
-
-// CHECK: 	pcmpgtq	32493, %xmm5
-        	pcmpgtq	0x7eed,%xmm5
-
-// CHECK: 	pcmpgtq	3133065982, %xmm5
-        	pcmpgtq	0xbabecafe,%xmm5
-
-// CHECK: 	pcmpgtq	305419896, %xmm5
-        	pcmpgtq	0x12345678,%xmm5
-
-// CHECK: 	pcmpgtq	%xmm5, %xmm5
-        	pcmpgtq	%xmm5,%xmm5
-
-// CHECK: 	aesimc	%xmm0, %xmm1
-                aesimc %xmm0,%xmm1
-
-// CHECK: 	aesimc	(%eax), %xmm1
-                aesimc (%eax),%xmm1
-
-// CHECK: 	aesenc	%xmm1, %xmm2
-                aesenc %xmm1,%xmm2
-
-// CHECK: 	aesenc	4(%ebx), %xmm2
-                aesenc 4(%ebx),%xmm2
-
-// CHECK: 	aesenclast	%xmm3, %xmm4
-                aesenclast %xmm3,%xmm4
-
-// CHECK: 	aesenclast	4(%edx,%edi), %xmm4
-                aesenclast 4(%edx,%edi),%xmm4
-
-// CHECK: 	aesdec	%xmm5, %xmm6
-                aesdec %xmm5,%xmm6
-
-// CHECK: 	aesdec	4(%ecx,%eax,8), %xmm6
-                aesdec 4(%ecx,%eax,8),%xmm6
-
-// CHECK: 	aesdeclast	%xmm7, %xmm0
-                aesdeclast %xmm7,%xmm0
-
-// CHECK: 	aesdeclast	3405691582, %xmm0
-                aesdeclast 0xcafebabe,%xmm0
-
-// CHECK: 	aeskeygenassist	$125, %xmm1, %xmm2
-                aeskeygenassist $125, %xmm1, %xmm2
-
-// CHECK: 	aeskeygenassist	$125, (%edx,%eax,4), %xmm2
-                aeskeygenassist $125, (%edx,%eax,4), %xmm2
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
deleted file mode 100644
index ef774239ffe8..000000000000
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ /dev/null
@@ -1,10069 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
-// XFAIL: *
-
-
-// CHECK: movb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc6,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movb	$127, 69
-// CHECK:  encoding: [0xc6,0x05,0x45,0x00,0x00,0x00,0x7f]
-        	movb	$0x7f,0x45
-
-// CHECK: movb	$127, 32493
-// CHECK:  encoding: [0xc6,0x05,0xed,0x7e,0x00,0x00,0x7f]
-        	movb	$0x7f,0x7eed
-
-// CHECK: movb	$127, 3133065982
-// CHECK:  encoding: [0xc6,0x05,0xfe,0xca,0xbe,0xba,0x7f]
-        	movb	$0x7f,0xbabecafe
-
-// CHECK: movb	$127, 305419896
-// CHECK:  encoding: [0xc6,0x05,0x78,0x56,0x34,0x12,0x7f]
-        	movb	$0x7f,0x12345678
-
-// CHECK: movw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movw	$31438, 69
-// CHECK:  encoding: [0x66,0xc7,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	movw	$0x7ace,0x45
-
-// CHECK: movw	$31438, 32493
-// CHECK:  encoding: [0x66,0xc7,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	movw	$0x7ace,0x7eed
-
-// CHECK: movw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0xc7,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	movw	$0x7ace,0xbabecafe
-
-// CHECK: movw	$31438, 305419896
-// CHECK:  encoding: [0x66,0xc7,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	movw	$0x7ace,0x12345678
-
-// CHECK: movl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movl	$2063514302, 69
-// CHECK:  encoding: [0xc7,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	movl	$0x7afebabe,0x45
-
-// CHECK: movl	$2063514302, 32493
-// CHECK:  encoding: [0xc7,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	movl	$0x7afebabe,0x7eed
-
-// CHECK: movl	$2063514302, 3133065982
-// CHECK:  encoding: [0xc7,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	movl	$0x7afebabe,0xbabecafe
-
-// CHECK: movl	$2063514302, 305419896
-// CHECK:  encoding: [0xc7,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	movl	$0x7afebabe,0x12345678
-
-// CHECK: movl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movl	$324478056, 69
-// CHECK:  encoding: [0xc7,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	movl	$0x13572468,0x45
-
-// CHECK: movl	$324478056, 32493
-// CHECK:  encoding: [0xc7,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	movl	$0x13572468,0x7eed
-
-// CHECK: movl	$324478056, 3133065982
-// CHECK:  encoding: [0xc7,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	movl	$0x13572468,0xbabecafe
-
-// CHECK: movl	$324478056, 305419896
-// CHECK:  encoding: [0xc7,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	movl	$0x13572468,0x12345678
-
-// CHECK: movsbl	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0x0f,0xbe,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: movsbl	69, %ecx
-// CHECK:  encoding: [0x0f,0xbe,0x0d,0x45,0x00,0x00,0x00]
-        	movsbl	0x45,%ecx
-
-// CHECK: movsbl	32493, %ecx
-// CHECK:  encoding: [0x0f,0xbe,0x0d,0xed,0x7e,0x00,0x00]
-        	movsbl	0x7eed,%ecx
-
-// CHECK: movsbl	3133065982, %ecx
-// CHECK:  encoding: [0x0f,0xbe,0x0d,0xfe,0xca,0xbe,0xba]
-        	movsbl	0xbabecafe,%ecx
-
-// CHECK: movsbl	305419896, %ecx
-// CHECK:  encoding: [0x0f,0xbe,0x0d,0x78,0x56,0x34,0x12]
-        	movsbl	0x12345678,%ecx
-
-// CHECK: movsbw	3735928559(%ebx,%ecx,8), %bx
-// CHECK:  encoding: [0x66,0x0f,0xbe,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	movsbw	0xdeadbeef(%ebx,%ecx,8),%bx
-
-// CHECK: movsbw	69, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0x45,0x00,0x00,0x00]
-        	movsbw	0x45,%bx
-
-// CHECK: movsbw	32493, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0xed,0x7e,0x00,0x00]
-        	movsbw	0x7eed,%bx
-
-// CHECK: movsbw	3133065982, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0xfe,0xca,0xbe,0xba]
-        	movsbw	0xbabecafe,%bx
-
-// CHECK: movsbw	305419896, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0x78,0x56,0x34,0x12]
-        	movsbw	0x12345678,%bx
-
-// CHECK: movswl	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0x0f,0xbf,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: movswl	69, %ecx
-// CHECK:  encoding: [0x0f,0xbf,0x0d,0x45,0x00,0x00,0x00]
-        	movswl	0x45,%ecx
-
-// CHECK: movswl	32493, %ecx
-// CHECK:  encoding: [0x0f,0xbf,0x0d,0xed,0x7e,0x00,0x00]
-        	movswl	0x7eed,%ecx
-
-// CHECK: movswl	3133065982, %ecx
-// CHECK:  encoding: [0x0f,0xbf,0x0d,0xfe,0xca,0xbe,0xba]
-        	movswl	0xbabecafe,%ecx
-
-// CHECK: movswl	305419896, %ecx
-// CHECK:  encoding: [0x0f,0xbf,0x0d,0x78,0x56,0x34,0x12]
-        	movswl	0x12345678,%ecx
-
-// CHECK: movzbl	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0x0f,0xb6,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: movzbl	69, %ecx
-// CHECK:  encoding: [0x0f,0xb6,0x0d,0x45,0x00,0x00,0x00]
-        	movzbl	0x45,%ecx
-
-// CHECK: movzbl	32493, %ecx
-// CHECK:  encoding: [0x0f,0xb6,0x0d,0xed,0x7e,0x00,0x00]
-        	movzbl	0x7eed,%ecx
-
-// CHECK: movzbl	3133065982, %ecx
-// CHECK:  encoding: [0x0f,0xb6,0x0d,0xfe,0xca,0xbe,0xba]
-        	movzbl	0xbabecafe,%ecx
-
-// CHECK: movzbl	305419896, %ecx
-// CHECK:  encoding: [0x0f,0xb6,0x0d,0x78,0x56,0x34,0x12]
-        	movzbl	0x12345678,%ecx
-
-// CHECK: movzbw	3735928559(%ebx,%ecx,8), %bx
-// CHECK:  encoding: [0x66,0x0f,0xb6,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	movzbw	0xdeadbeef(%ebx,%ecx,8),%bx
-
-// CHECK: movzbw	69, %bx
-// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0x45,0x00,0x00,0x00]
-        	movzbw	0x45,%bx
-
-// CHECK: movzbw	32493, %bx
-// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0xed,0x7e,0x00,0x00]
-        	movzbw	0x7eed,%bx
-
-// CHECK: movzbw	3133065982, %bx
-// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0xfe,0xca,0xbe,0xba]
-        	movzbw	0xbabecafe,%bx
-
-// CHECK: movzbw	305419896, %bx
-// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0x78,0x56,0x34,0x12]
-        	movzbw	0x12345678,%bx
-
-// CHECK: movzwl	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0x0f,0xb7,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: movzwl	69, %ecx
-// CHECK:  encoding: [0x0f,0xb7,0x0d,0x45,0x00,0x00,0x00]
-        	movzwl	0x45,%ecx
-
-// CHECK: movzwl	32493, %ecx
-// CHECK:  encoding: [0x0f,0xb7,0x0d,0xed,0x7e,0x00,0x00]
-        	movzwl	0x7eed,%ecx
-
-// CHECK: movzwl	3133065982, %ecx
-// CHECK:  encoding: [0x0f,0xb7,0x0d,0xfe,0xca,0xbe,0xba]
-        	movzwl	0xbabecafe,%ecx
-
-// CHECK: movzwl	305419896, %ecx
-// CHECK:  encoding: [0x0f,0xb7,0x0d,0x78,0x56,0x34,0x12]
-        	movzwl	0x12345678,%ecx
-
-// CHECK: pushl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	pushl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: pushw	32493
-// CHECK:  encoding: [0x66,0xff,0x35,0xed,0x7e,0x00,0x00]
-        	pushw	0x7eed
-
-// CHECK: pushl	3133065982
-// CHECK:  encoding: [0xff,0x35,0xfe,0xca,0xbe,0xba]
-        	pushl	0xbabecafe
-
-// CHECK: pushl	305419896
-// CHECK:  encoding: [0xff,0x35,0x78,0x56,0x34,0x12]
-        	pushl	0x12345678
-
-// CHECK: popl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x8f,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	popl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: popw	32493
-// CHECK:  encoding: [0x66,0x8f,0x05,0xed,0x7e,0x00,0x00]
-        	popw	0x7eed
-
-// CHECK: popl	3133065982
-// CHECK:  encoding: [0x8f,0x05,0xfe,0xca,0xbe,0xba]
-        	popl	0xbabecafe
-
-// CHECK: popl	305419896
-// CHECK:  encoding: [0x8f,0x05,0x78,0x56,0x34,0x12]
-        	popl	0x12345678
-
-// CHECK: clc
-// CHECK:  encoding: [0xf8]
-        	clc
-
-// CHECK: cld
-// CHECK:  encoding: [0xfc]
-        	cld
-
-// CHECK: cli
-// CHECK:  encoding: [0xfa]
-        	cli
-
-// CHECK: clts
-// CHECK:  encoding: [0x0f,0x06]
-        	clts
-
-// CHECK: cmc
-// CHECK:  encoding: [0xf5]
-        	cmc
-
-// CHECK: lahf
-// CHECK:  encoding: [0x9f]
-        	lahf
-
-// CHECK: sahf
-// CHECK:  encoding: [0x9e]
-        	sahf
-
-// CHECK: stc
-// CHECK:  encoding: [0xf9]
-        	stc
-
-// CHECK: std
-// CHECK:  encoding: [0xfd]
-        	std
-
-// CHECK: sti
-// CHECK:  encoding: [0xfb]
-        	sti
-
-// CHECK: addb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x84,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: addb	$254, 69
-// CHECK:  encoding: [0x80,0x05,0x45,0x00,0x00,0x00,0xfe]
-        	addb	$0xfe,0x45
-
-// CHECK: addb	$254, 32493
-// CHECK:  encoding: [0x80,0x05,0xed,0x7e,0x00,0x00,0xfe]
-        	addb	$0xfe,0x7eed
-
-// CHECK: addb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x05,0xfe,0xca,0xbe,0xba,0xfe]
-        	addb	$0xfe,0xbabecafe
-
-// CHECK: addb	$254, 305419896
-// CHECK:  encoding: [0x80,0x05,0x78,0x56,0x34,0x12,0xfe]
-        	addb	$0xfe,0x12345678
-
-// CHECK: addb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: addb	$127, 69
-// CHECK:  encoding: [0x80,0x05,0x45,0x00,0x00,0x00,0x7f]
-        	addb	$0x7f,0x45
-
-// CHECK: addb	$127, 32493
-// CHECK:  encoding: [0x80,0x05,0xed,0x7e,0x00,0x00,0x7f]
-        	addb	$0x7f,0x7eed
-
-// CHECK: addb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x05,0xfe,0xca,0xbe,0xba,0x7f]
-        	addb	$0x7f,0xbabecafe
-
-// CHECK: addb	$127, 305419896
-// CHECK:  encoding: [0x80,0x05,0x78,0x56,0x34,0x12,0x7f]
-        	addb	$0x7f,0x12345678
-
-// CHECK: addw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: addw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	addw	$0x7ace,0x45
-
-// CHECK: addw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	addw	$0x7ace,0x7eed
-
-// CHECK: addw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	addw	$0x7ace,0xbabecafe
-
-// CHECK: addw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	addw	$0x7ace,0x12345678
-
-// CHECK: addl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: addl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	addl	$0x7afebabe,0x45
-
-// CHECK: addl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	addl	$0x7afebabe,0x7eed
-
-// CHECK: addl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	addl	$0x7afebabe,0xbabecafe
-
-// CHECK: addl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	addl	$0x7afebabe,0x12345678
-
-// CHECK: addl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: addl	$324478056, 69
-// CHECK:  encoding: [0x81,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	addl	$0x13572468,0x45
-
-// CHECK: addl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	addl	$0x13572468,0x7eed
-
-// CHECK: addl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	addl	$0x13572468,0xbabecafe
-
-// CHECK: addl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	addl	$0x13572468,0x12345678
-
-// CHECK: incl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	incl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: incw	32493
-// CHECK:  encoding: [0x66,0xff,0x05,0xed,0x7e,0x00,0x00]
-        	incw	0x7eed
-
-// CHECK: incl	3133065982
-// CHECK:  encoding: [0xff,0x05,0xfe,0xca,0xbe,0xba]
-        	incl	0xbabecafe
-
-// CHECK: incl	305419896
-// CHECK:  encoding: [0xff,0x05,0x78,0x56,0x34,0x12]
-        	incl	0x12345678
-
-// CHECK: subb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xac,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: subb	$254, 69
-// CHECK:  encoding: [0x80,0x2d,0x45,0x00,0x00,0x00,0xfe]
-        	subb	$0xfe,0x45
-
-// CHECK: subb	$254, 32493
-// CHECK:  encoding: [0x80,0x2d,0xed,0x7e,0x00,0x00,0xfe]
-        	subb	$0xfe,0x7eed
-
-// CHECK: subb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x2d,0xfe,0xca,0xbe,0xba,0xfe]
-        	subb	$0xfe,0xbabecafe
-
-// CHECK: subb	$254, 305419896
-// CHECK:  encoding: [0x80,0x2d,0x78,0x56,0x34,0x12,0xfe]
-        	subb	$0xfe,0x12345678
-
-// CHECK: subb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xac,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: subb	$127, 69
-// CHECK:  encoding: [0x80,0x2d,0x45,0x00,0x00,0x00,0x7f]
-        	subb	$0x7f,0x45
-
-// CHECK: subb	$127, 32493
-// CHECK:  encoding: [0x80,0x2d,0xed,0x7e,0x00,0x00,0x7f]
-        	subb	$0x7f,0x7eed
-
-// CHECK: subb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x2d,0xfe,0xca,0xbe,0xba,0x7f]
-        	subb	$0x7f,0xbabecafe
-
-// CHECK: subb	$127, 305419896
-// CHECK:  encoding: [0x80,0x2d,0x78,0x56,0x34,0x12,0x7f]
-        	subb	$0x7f,0x12345678
-
-// CHECK: subw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: subw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x2d,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	subw	$0x7ace,0x45
-
-// CHECK: subw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x2d,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	subw	$0x7ace,0x7eed
-
-// CHECK: subw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x2d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	subw	$0x7ace,0xbabecafe
-
-// CHECK: subw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x2d,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	subw	$0x7ace,0x12345678
-
-// CHECK: subl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: subl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x2d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	subl	$0x7afebabe,0x45
-
-// CHECK: subl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x2d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	subl	$0x7afebabe,0x7eed
-
-// CHECK: subl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x2d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	subl	$0x7afebabe,0xbabecafe
-
-// CHECK: subl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x2d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	subl	$0x7afebabe,0x12345678
-
-// CHECK: subl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: subl	$324478056, 69
-// CHECK:  encoding: [0x81,0x2d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	subl	$0x13572468,0x45
-
-// CHECK: subl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x2d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	subl	$0x13572468,0x7eed
-
-// CHECK: subl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x2d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	subl	$0x13572468,0xbabecafe
-
-// CHECK: subl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x2d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	subl	$0x13572468,0x12345678
-
-// CHECK: decl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	decl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: decw	32493
-// CHECK:  encoding: [0x66,0xff,0x0d,0xed,0x7e,0x00,0x00]
-        	decw	0x7eed
-
-// CHECK: decl	3133065982
-// CHECK:  encoding: [0xff,0x0d,0xfe,0xca,0xbe,0xba]
-        	decl	0xbabecafe
-
-// CHECK: decl	305419896
-// CHECK:  encoding: [0xff,0x0d,0x78,0x56,0x34,0x12]
-        	decl	0x12345678
-
-// CHECK: sbbb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	sbbb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sbbb	$254, 69
-// CHECK:  encoding: [0x80,0x1d,0x45,0x00,0x00,0x00,0xfe]
-        	sbbb	$0xfe,0x45
-
-// CHECK: sbbb	$254, 32493
-// CHECK:  encoding: [0x80,0x1d,0xed,0x7e,0x00,0x00,0xfe]
-        	sbbb	$0xfe,0x7eed
-
-// CHECK: sbbb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x1d,0xfe,0xca,0xbe,0xba,0xfe]
-        	sbbb	$0xfe,0xbabecafe
-
-// CHECK: sbbb	$254, 305419896
-// CHECK:  encoding: [0x80,0x1d,0x78,0x56,0x34,0x12,0xfe]
-        	sbbb	$0xfe,0x12345678
-
-// CHECK: sbbb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x9c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	sbbb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sbbb	$127, 69
-// CHECK:  encoding: [0x80,0x1d,0x45,0x00,0x00,0x00,0x7f]
-        	sbbb	$0x7f,0x45
-
-// CHECK: sbbb	$127, 32493
-// CHECK:  encoding: [0x80,0x1d,0xed,0x7e,0x00,0x00,0x7f]
-        	sbbb	$0x7f,0x7eed
-
-// CHECK: sbbb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x1d,0xfe,0xca,0xbe,0xba,0x7f]
-        	sbbb	$0x7f,0xbabecafe
-
-// CHECK: sbbb	$127, 305419896
-// CHECK:  encoding: [0x80,0x1d,0x78,0x56,0x34,0x12,0x7f]
-        	sbbb	$0x7f,0x12345678
-
-// CHECK: sbbw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sbbw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x1d,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	sbbw	$0x7ace,0x45
-
-// CHECK: sbbw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x1d,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	sbbw	$0x7ace,0x7eed
-
-// CHECK: sbbw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x1d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	sbbw	$0x7ace,0xbabecafe
-
-// CHECK: sbbw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x1d,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	sbbw	$0x7ace,0x12345678
-
-// CHECK: sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sbbl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x1d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	sbbl	$0x7afebabe,0x45
-
-// CHECK: sbbl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x1d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	sbbl	$0x7afebabe,0x7eed
-
-// CHECK: sbbl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x1d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	sbbl	$0x7afebabe,0xbabecafe
-
-// CHECK: sbbl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x1d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	sbbl	$0x7afebabe,0x12345678
-
-// CHECK: sbbl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sbbl	$324478056, 69
-// CHECK:  encoding: [0x81,0x1d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	sbbl	$0x13572468,0x45
-
-// CHECK: sbbl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x1d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	sbbl	$0x13572468,0x7eed
-
-// CHECK: sbbl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x1d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	sbbl	$0x13572468,0xbabecafe
-
-// CHECK: sbbl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x1d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	sbbl	$0x13572468,0x12345678
-
-// CHECK: cmpb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: cmpb	$254, 69
-// CHECK:  encoding: [0x80,0x3d,0x45,0x00,0x00,0x00,0xfe]
-        	cmpb	$0xfe,0x45
-
-// CHECK: cmpb	$254, 32493
-// CHECK:  encoding: [0x80,0x3d,0xed,0x7e,0x00,0x00,0xfe]
-        	cmpb	$0xfe,0x7eed
-
-// CHECK: cmpb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x3d,0xfe,0xca,0xbe,0xba,0xfe]
-        	cmpb	$0xfe,0xbabecafe
-
-// CHECK: cmpb	$254, 305419896
-// CHECK:  encoding: [0x80,0x3d,0x78,0x56,0x34,0x12,0xfe]
-        	cmpb	$0xfe,0x12345678
-
-// CHECK: cmpb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: cmpb	$127, 69
-// CHECK:  encoding: [0x80,0x3d,0x45,0x00,0x00,0x00,0x7f]
-        	cmpb	$0x7f,0x45
-
-// CHECK: cmpb	$127, 32493
-// CHECK:  encoding: [0x80,0x3d,0xed,0x7e,0x00,0x00,0x7f]
-        	cmpb	$0x7f,0x7eed
-
-// CHECK: cmpb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x3d,0xfe,0xca,0xbe,0xba,0x7f]
-        	cmpb	$0x7f,0xbabecafe
-
-// CHECK: cmpb	$127, 305419896
-// CHECK:  encoding: [0x80,0x3d,0x78,0x56,0x34,0x12,0x7f]
-        	cmpb	$0x7f,0x12345678
-
-// CHECK: cmpw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: cmpw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x3d,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	cmpw	$0x7ace,0x45
-
-// CHECK: cmpw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x3d,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	cmpw	$0x7ace,0x7eed
-
-// CHECK: cmpw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x3d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	cmpw	$0x7ace,0xbabecafe
-
-// CHECK: cmpw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x3d,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	cmpw	$0x7ace,0x12345678
-
-// CHECK: cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: cmpl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x3d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	cmpl	$0x7afebabe,0x45
-
-// CHECK: cmpl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x3d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	cmpl	$0x7afebabe,0x7eed
-
-// CHECK: cmpl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x3d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	cmpl	$0x7afebabe,0xbabecafe
-
-// CHECK: cmpl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x3d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	cmpl	$0x7afebabe,0x12345678
-
-// CHECK: cmpl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: cmpl	$324478056, 69
-// CHECK:  encoding: [0x81,0x3d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	cmpl	$0x13572468,0x45
-
-// CHECK: cmpl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x3d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	cmpl	$0x13572468,0x7eed
-
-// CHECK: cmpl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x3d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	cmpl	$0x13572468,0xbabecafe
-
-// CHECK: cmpl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x3d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	cmpl	$0x13572468,0x12345678
-
-// CHECK: testb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf6,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: testb	$127, 69
-// CHECK:  encoding: [0xf6,0x05,0x45,0x00,0x00,0x00,0x7f]
-        	testb	$0x7f,0x45
-
-// CHECK: testb	$127, 32493
-// CHECK:  encoding: [0xf6,0x05,0xed,0x7e,0x00,0x00,0x7f]
-        	testb	$0x7f,0x7eed
-
-// CHECK: testb	$127, 3133065982
-// CHECK:  encoding: [0xf6,0x05,0xfe,0xca,0xbe,0xba,0x7f]
-        	testb	$0x7f,0xbabecafe
-
-// CHECK: testb	$127, 305419896
-// CHECK:  encoding: [0xf6,0x05,0x78,0x56,0x34,0x12,0x7f]
-        	testb	$0x7f,0x12345678
-
-// CHECK: testw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: testw	$31438, 69
-// CHECK:  encoding: [0x66,0xf7,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	testw	$0x7ace,0x45
-
-// CHECK: testw	$31438, 32493
-// CHECK:  encoding: [0x66,0xf7,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	testw	$0x7ace,0x7eed
-
-// CHECK: testw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0xf7,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	testw	$0x7ace,0xbabecafe
-
-// CHECK: testw	$31438, 305419896
-// CHECK:  encoding: [0x66,0xf7,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	testw	$0x7ace,0x12345678
-
-// CHECK: testl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: testl	$2063514302, 69
-// CHECK:  encoding: [0xf7,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	testl	$0x7afebabe,0x45
-
-// CHECK: testl	$2063514302, 32493
-// CHECK:  encoding: [0xf7,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	testl	$0x7afebabe,0x7eed
-
-// CHECK: testl	$2063514302, 3133065982
-// CHECK:  encoding: [0xf7,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	testl	$0x7afebabe,0xbabecafe
-
-// CHECK: testl	$2063514302, 305419896
-// CHECK:  encoding: [0xf7,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	testl	$0x7afebabe,0x12345678
-
-// CHECK: testl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: testl	$324478056, 69
-// CHECK:  encoding: [0xf7,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	testl	$0x13572468,0x45
-
-// CHECK: testl	$324478056, 32493
-// CHECK:  encoding: [0xf7,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	testl	$0x13572468,0x7eed
-
-// CHECK: testl	$324478056, 3133065982
-// CHECK:  encoding: [0xf7,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	testl	$0x13572468,0xbabecafe
-
-// CHECK: testl	$324478056, 305419896
-// CHECK:  encoding: [0xf7,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	testl	$0x13572468,0x12345678
-
-// CHECK: andb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: andb	$254, 69
-// CHECK:  encoding: [0x80,0x25,0x45,0x00,0x00,0x00,0xfe]
-        	andb	$0xfe,0x45
-
-// CHECK: andb	$254, 32493
-// CHECK:  encoding: [0x80,0x25,0xed,0x7e,0x00,0x00,0xfe]
-        	andb	$0xfe,0x7eed
-
-// CHECK: andb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x25,0xfe,0xca,0xbe,0xba,0xfe]
-        	andb	$0xfe,0xbabecafe
-
-// CHECK: andb	$254, 305419896
-// CHECK:  encoding: [0x80,0x25,0x78,0x56,0x34,0x12,0xfe]
-        	andb	$0xfe,0x12345678
-
-// CHECK: andb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: andb	$127, 69
-// CHECK:  encoding: [0x80,0x25,0x45,0x00,0x00,0x00,0x7f]
-        	andb	$0x7f,0x45
-
-// CHECK: andb	$127, 32493
-// CHECK:  encoding: [0x80,0x25,0xed,0x7e,0x00,0x00,0x7f]
-        	andb	$0x7f,0x7eed
-
-// CHECK: andb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x25,0xfe,0xca,0xbe,0xba,0x7f]
-        	andb	$0x7f,0xbabecafe
-
-// CHECK: andb	$127, 305419896
-// CHECK:  encoding: [0x80,0x25,0x78,0x56,0x34,0x12,0x7f]
-        	andb	$0x7f,0x12345678
-
-// CHECK: andw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: andw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x25,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	andw	$0x7ace,0x45
-
-// CHECK: andw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x25,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	andw	$0x7ace,0x7eed
-
-// CHECK: andw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x25,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	andw	$0x7ace,0xbabecafe
-
-// CHECK: andw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x25,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	andw	$0x7ace,0x12345678
-
-// CHECK: andl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: andl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x25,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	andl	$0x7afebabe,0x45
-
-// CHECK: andl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x25,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	andl	$0x7afebabe,0x7eed
-
-// CHECK: andl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x25,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	andl	$0x7afebabe,0xbabecafe
-
-// CHECK: andl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x25,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	andl	$0x7afebabe,0x12345678
-
-// CHECK: andl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: andl	$324478056, 69
-// CHECK:  encoding: [0x81,0x25,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	andl	$0x13572468,0x45
-
-// CHECK: andl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x25,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	andl	$0x13572468,0x7eed
-
-// CHECK: andl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x25,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	andl	$0x13572468,0xbabecafe
-
-// CHECK: andl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x25,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	andl	$0x13572468,0x12345678
-
-// CHECK: orb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: orb	$254, 69
-// CHECK:  encoding: [0x80,0x0d,0x45,0x00,0x00,0x00,0xfe]
-        	orb	$0xfe,0x45
-
-// CHECK: orb	$254, 32493
-// CHECK:  encoding: [0x80,0x0d,0xed,0x7e,0x00,0x00,0xfe]
-        	orb	$0xfe,0x7eed
-
-// CHECK: orb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x0d,0xfe,0xca,0xbe,0xba,0xfe]
-        	orb	$0xfe,0xbabecafe
-
-// CHECK: orb	$254, 305419896
-// CHECK:  encoding: [0x80,0x0d,0x78,0x56,0x34,0x12,0xfe]
-        	orb	$0xfe,0x12345678
-
-// CHECK: orb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: orb	$127, 69
-// CHECK:  encoding: [0x80,0x0d,0x45,0x00,0x00,0x00,0x7f]
-        	orb	$0x7f,0x45
-
-// CHECK: orb	$127, 32493
-// CHECK:  encoding: [0x80,0x0d,0xed,0x7e,0x00,0x00,0x7f]
-        	orb	$0x7f,0x7eed
-
-// CHECK: orb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x0d,0xfe,0xca,0xbe,0xba,0x7f]
-        	orb	$0x7f,0xbabecafe
-
-// CHECK: orb	$127, 305419896
-// CHECK:  encoding: [0x80,0x0d,0x78,0x56,0x34,0x12,0x7f]
-        	orb	$0x7f,0x12345678
-
-// CHECK: orw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: orw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x0d,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	orw	$0x7ace,0x45
-
-// CHECK: orw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x0d,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	orw	$0x7ace,0x7eed
-
-// CHECK: orw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x0d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	orw	$0x7ace,0xbabecafe
-
-// CHECK: orw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x0d,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	orw	$0x7ace,0x12345678
-
-// CHECK: orl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: orl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x0d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	orl	$0x7afebabe,0x45
-
-// CHECK: orl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x0d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	orl	$0x7afebabe,0x7eed
-
-// CHECK: orl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x0d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	orl	$0x7afebabe,0xbabecafe
-
-// CHECK: orl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x0d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	orl	$0x7afebabe,0x12345678
-
-// CHECK: orl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: orl	$324478056, 69
-// CHECK:  encoding: [0x81,0x0d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	orl	$0x13572468,0x45
-
-// CHECK: orl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x0d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	orl	$0x13572468,0x7eed
-
-// CHECK: orl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x0d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	orl	$0x13572468,0xbabecafe
-
-// CHECK: orl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x0d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	orl	$0x13572468,0x12345678
-
-// CHECK: xorb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: xorb	$254, 69
-// CHECK:  encoding: [0x80,0x35,0x45,0x00,0x00,0x00,0xfe]
-        	xorb	$0xfe,0x45
-
-// CHECK: xorb	$254, 32493
-// CHECK:  encoding: [0x80,0x35,0xed,0x7e,0x00,0x00,0xfe]
-        	xorb	$0xfe,0x7eed
-
-// CHECK: xorb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x35,0xfe,0xca,0xbe,0xba,0xfe]
-        	xorb	$0xfe,0xbabecafe
-
-// CHECK: xorb	$254, 305419896
-// CHECK:  encoding: [0x80,0x35,0x78,0x56,0x34,0x12,0xfe]
-        	xorb	$0xfe,0x12345678
-
-// CHECK: xorb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0xb4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: xorb	$127, 69
-// CHECK:  encoding: [0x80,0x35,0x45,0x00,0x00,0x00,0x7f]
-        	xorb	$0x7f,0x45
-
-// CHECK: xorb	$127, 32493
-// CHECK:  encoding: [0x80,0x35,0xed,0x7e,0x00,0x00,0x7f]
-        	xorb	$0x7f,0x7eed
-
-// CHECK: xorb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x35,0xfe,0xca,0xbe,0xba,0x7f]
-        	xorb	$0x7f,0xbabecafe
-
-// CHECK: xorb	$127, 305419896
-// CHECK:  encoding: [0x80,0x35,0x78,0x56,0x34,0x12,0x7f]
-        	xorb	$0x7f,0x12345678
-
-// CHECK: xorw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: xorw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x35,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	xorw	$0x7ace,0x45
-
-// CHECK: xorw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x35,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	xorw	$0x7ace,0x7eed
-
-// CHECK: xorw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x35,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	xorw	$0x7ace,0xbabecafe
-
-// CHECK: xorw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x35,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	xorw	$0x7ace,0x12345678
-
-// CHECK: xorl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: xorl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x35,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	xorl	$0x7afebabe,0x45
-
-// CHECK: xorl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x35,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	xorl	$0x7afebabe,0x7eed
-
-// CHECK: xorl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x35,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	xorl	$0x7afebabe,0xbabecafe
-
-// CHECK: xorl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x35,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	xorl	$0x7afebabe,0x12345678
-
-// CHECK: xorl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: xorl	$324478056, 69
-// CHECK:  encoding: [0x81,0x35,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	xorl	$0x13572468,0x45
-
-// CHECK: xorl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x35,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	xorl	$0x13572468,0x7eed
-
-// CHECK: xorl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x35,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	xorl	$0x13572468,0xbabecafe
-
-// CHECK: xorl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x35,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	xorl	$0x13572468,0x12345678
-
-// CHECK: adcb	$254, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x94,0xcb,0xef,0xbe,0xad,0xde,0xfe]
-        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: adcb	$254, 69
-// CHECK:  encoding: [0x80,0x15,0x45,0x00,0x00,0x00,0xfe]
-        	adcb	$0xfe,0x45
-
-// CHECK: adcb	$254, 32493
-// CHECK:  encoding: [0x80,0x15,0xed,0x7e,0x00,0x00,0xfe]
-        	adcb	$0xfe,0x7eed
-
-// CHECK: adcb	$254, 3133065982
-// CHECK:  encoding: [0x80,0x15,0xfe,0xca,0xbe,0xba,0xfe]
-        	adcb	$0xfe,0xbabecafe
-
-// CHECK: adcb	$254, 305419896
-// CHECK:  encoding: [0x80,0x15,0x78,0x56,0x34,0x12,0xfe]
-        	adcb	$0xfe,0x12345678
-
-// CHECK: adcb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x80,0x94,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: adcb	$127, 69
-// CHECK:  encoding: [0x80,0x15,0x45,0x00,0x00,0x00,0x7f]
-        	adcb	$0x7f,0x45
-
-// CHECK: adcb	$127, 32493
-// CHECK:  encoding: [0x80,0x15,0xed,0x7e,0x00,0x00,0x7f]
-        	adcb	$0x7f,0x7eed
-
-// CHECK: adcb	$127, 3133065982
-// CHECK:  encoding: [0x80,0x15,0xfe,0xca,0xbe,0xba,0x7f]
-        	adcb	$0x7f,0xbabecafe
-
-// CHECK: adcb	$127, 305419896
-// CHECK:  encoding: [0x80,0x15,0x78,0x56,0x34,0x12,0x7f]
-        	adcb	$0x7f,0x12345678
-
-// CHECK: adcw	$31438, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
-        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: adcw	$31438, 69
-// CHECK:  encoding: [0x66,0x81,0x15,0x45,0x00,0x00,0x00,0xce,0x7a]
-        	adcw	$0x7ace,0x45
-
-// CHECK: adcw	$31438, 32493
-// CHECK:  encoding: [0x66,0x81,0x15,0xed,0x7e,0x00,0x00,0xce,0x7a]
-        	adcw	$0x7ace,0x7eed
-
-// CHECK: adcw	$31438, 3133065982
-// CHECK:  encoding: [0x66,0x81,0x15,0xfe,0xca,0xbe,0xba,0xce,0x7a]
-        	adcw	$0x7ace,0xbabecafe
-
-// CHECK: adcw	$31438, 305419896
-// CHECK:  encoding: [0x66,0x81,0x15,0x78,0x56,0x34,0x12,0xce,0x7a]
-        	adcw	$0x7ace,0x12345678
-
-// CHECK: adcl	$2063514302, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
-        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: adcl	$2063514302, 69
-// CHECK:  encoding: [0x81,0x15,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	adcl	$0x7afebabe,0x45
-
-// CHECK: adcl	$2063514302, 32493
-// CHECK:  encoding: [0x81,0x15,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
-        	adcl	$0x7afebabe,0x7eed
-
-// CHECK: adcl	$2063514302, 3133065982
-// CHECK:  encoding: [0x81,0x15,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
-        	adcl	$0x7afebabe,0xbabecafe
-
-// CHECK: adcl	$2063514302, 305419896
-// CHECK:  encoding: [0x81,0x15,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
-        	adcl	$0x7afebabe,0x12345678
-
-// CHECK: adcl	$324478056, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
-        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: adcl	$324478056, 69
-// CHECK:  encoding: [0x81,0x15,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
-        	adcl	$0x13572468,0x45
-
-// CHECK: adcl	$324478056, 32493
-// CHECK:  encoding: [0x81,0x15,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
-        	adcl	$0x13572468,0x7eed
-
-// CHECK: adcl	$324478056, 3133065982
-// CHECK:  encoding: [0x81,0x15,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
-        	adcl	$0x13572468,0xbabecafe
-
-// CHECK: adcl	$324478056, 305419896
-// CHECK:  encoding: [0x81,0x15,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
-        	adcl	$0x13572468,0x12345678
-
-// CHECK: negl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	negl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: negw	32493
-// CHECK:  encoding: [0x66,0xf7,0x1d,0xed,0x7e,0x00,0x00]
-        	negw	0x7eed
-
-// CHECK: negl	3133065982
-// CHECK:  encoding: [0xf7,0x1d,0xfe,0xca,0xbe,0xba]
-        	negl	0xbabecafe
-
-// CHECK: negl	305419896
-// CHECK:  encoding: [0xf7,0x1d,0x78,0x56,0x34,0x12]
-        	negl	0x12345678
-
-// CHECK: notl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	notl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: notw	32493
-// CHECK:  encoding: [0x66,0xf7,0x15,0xed,0x7e,0x00,0x00]
-        	notw	0x7eed
-
-// CHECK: notl	3133065982
-// CHECK:  encoding: [0xf7,0x15,0xfe,0xca,0xbe,0xba]
-        	notl	0xbabecafe
-
-// CHECK: notl	305419896
-// CHECK:  encoding: [0xf7,0x15,0x78,0x56,0x34,0x12]
-        	notl	0x12345678
-
-// CHECK: cbtw
-// CHECK:  encoding: [0x66,0x98]
-        	cbtw
-
-// CHECK: cwtl
-// CHECK:  encoding: [0x98]
-        	cwtl
-
-// CHECK: cwtd
-// CHECK:  encoding: [0x66,0x99]
-        	cwtd
-
-// CHECK: cltd
-// CHECK:  encoding: [0x99]
-        	cltd
-
-// CHECK: mull	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	mull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: mulw	32493
-// CHECK:  encoding: [0x66,0xf7,0x25,0xed,0x7e,0x00,0x00]
-        	mulw	0x7eed
-
-// CHECK: mull	3133065982
-// CHECK:  encoding: [0xf7,0x25,0xfe,0xca,0xbe,0xba]
-        	mull	0xbabecafe
-
-// CHECK: mull	305419896
-// CHECK:  encoding: [0xf7,0x25,0x78,0x56,0x34,0x12]
-        	mull	0x12345678
-
-// CHECK: imull	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	imull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: imulw	32493
-// CHECK:  encoding: [0x66,0xf7,0x2d,0xed,0x7e,0x00,0x00]
-        	imulw	0x7eed
-
-// CHECK: imull	3133065982
-// CHECK:  encoding: [0xf7,0x2d,0xfe,0xca,0xbe,0xba]
-        	imull	0xbabecafe
-
-// CHECK: imull	305419896
-// CHECK:  encoding: [0xf7,0x2d,0x78,0x56,0x34,0x12]
-        	imull	0x12345678
-
-// CHECK: divl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	divl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: divw	32493
-// CHECK:  encoding: [0x66,0xf7,0x35,0xed,0x7e,0x00,0x00]
-        	divw	0x7eed
-
-// CHECK: divl	3133065982
-// CHECK:  encoding: [0xf7,0x35,0xfe,0xca,0xbe,0xba]
-        	divl	0xbabecafe
-
-// CHECK: divl	305419896
-// CHECK:  encoding: [0xf7,0x35,0x78,0x56,0x34,0x12]
-        	divl	0x12345678
-
-// CHECK: idivl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf7,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	idivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: idivw	32493
-// CHECK:  encoding: [0x66,0xf7,0x3d,0xed,0x7e,0x00,0x00]
-        	idivw	0x7eed
-
-// CHECK: idivl	3133065982
-// CHECK:  encoding: [0xf7,0x3d,0xfe,0xca,0xbe,0xba]
-        	idivl	0xbabecafe
-
-// CHECK: idivl	305419896
-// CHECK:  encoding: [0xf7,0x3d,0x78,0x56,0x34,0x12]
-        	idivl	0x12345678
-
-// CHECK: roll	$0, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc1,0x84,0xcb,0xef,0xbe,0xad,0xde,0x00]
-        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: roll	$0, 69
-// CHECK:  encoding: [0xc1,0x05,0x45,0x00,0x00,0x00,0x00]
-        	roll	$0,0x45
-
-// CHECK: roll	$0, 32493
-// CHECK:  encoding: [0xc1,0x05,0xed,0x7e,0x00,0x00,0x00]
-        	roll	$0,0x7eed
-
-// CHECK: roll	$0, 3133065982
-// CHECK:  encoding: [0xc1,0x05,0xfe,0xca,0xbe,0xba,0x00]
-        	roll	$0,0xbabecafe
-
-// CHECK: roll	$0, 305419896
-// CHECK:  encoding: [0xc1,0x05,0x78,0x56,0x34,0x12,0x00]
-        	roll	$0,0x12345678
-
-// CHECK: rolb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc0,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: rolb	$127, 69
-// CHECK:  encoding: [0xc0,0x05,0x45,0x00,0x00,0x00,0x7f]
-        	rolb	$0x7f,0x45
-
-// CHECK: rolb	$127, 32493
-// CHECK:  encoding: [0xc0,0x05,0xed,0x7e,0x00,0x00,0x7f]
-        	rolb	$0x7f,0x7eed
-
-// CHECK: rolb	$127, 3133065982
-// CHECK:  encoding: [0xc0,0x05,0xfe,0xca,0xbe,0xba,0x7f]
-        	rolb	$0x7f,0xbabecafe
-
-// CHECK: rolb	$127, 305419896
-// CHECK:  encoding: [0xc0,0x05,0x78,0x56,0x34,0x12,0x7f]
-        	rolb	$0x7f,0x12345678
-
-// CHECK: roll	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd1,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	roll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: rolw	32493
-// CHECK:  encoding: [0x66,0xd1,0x05,0xed,0x7e,0x00,0x00]
-        	rolw	0x7eed
-
-// CHECK: roll	3133065982
-// CHECK:  encoding: [0xd1,0x05,0xfe,0xca,0xbe,0xba]
-        	roll	0xbabecafe
-
-// CHECK: roll	305419896
-// CHECK:  encoding: [0xd1,0x05,0x78,0x56,0x34,0x12]
-        	roll	0x12345678
-
-// CHECK: rorl	$0, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc1,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x00]
-        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: rorl	$0, 69
-// CHECK:  encoding: [0xc1,0x0d,0x45,0x00,0x00,0x00,0x00]
-        	rorl	$0,0x45
-
-// CHECK: rorl	$0, 32493
-// CHECK:  encoding: [0xc1,0x0d,0xed,0x7e,0x00,0x00,0x00]
-        	rorl	$0,0x7eed
-
-// CHECK: rorl	$0, 3133065982
-// CHECK:  encoding: [0xc1,0x0d,0xfe,0xca,0xbe,0xba,0x00]
-        	rorl	$0,0xbabecafe
-
-// CHECK: rorl	$0, 305419896
-// CHECK:  encoding: [0xc1,0x0d,0x78,0x56,0x34,0x12,0x00]
-        	rorl	$0,0x12345678
-
-// CHECK: rorb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc0,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: rorb	$127, 69
-// CHECK:  encoding: [0xc0,0x0d,0x45,0x00,0x00,0x00,0x7f]
-        	rorb	$0x7f,0x45
-
-// CHECK: rorb	$127, 32493
-// CHECK:  encoding: [0xc0,0x0d,0xed,0x7e,0x00,0x00,0x7f]
-        	rorb	$0x7f,0x7eed
-
-// CHECK: rorb	$127, 3133065982
-// CHECK:  encoding: [0xc0,0x0d,0xfe,0xca,0xbe,0xba,0x7f]
-        	rorb	$0x7f,0xbabecafe
-
-// CHECK: rorb	$127, 305419896
-// CHECK:  encoding: [0xc0,0x0d,0x78,0x56,0x34,0x12,0x7f]
-        	rorb	$0x7f,0x12345678
-
-// CHECK: rorl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	rorl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: rorw	32493
-// CHECK:  encoding: [0x66,0xd1,0x0d,0xed,0x7e,0x00,0x00]
-        	rorw	0x7eed
-
-// CHECK: rorl	3133065982
-// CHECK:  encoding: [0xd1,0x0d,0xfe,0xca,0xbe,0xba]
-        	rorl	0xbabecafe
-
-// CHECK: rorl	305419896
-// CHECK:  encoding: [0xd1,0x0d,0x78,0x56,0x34,0x12]
-        	rorl	0x12345678
-
-// CHECK: shll	$0, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc1,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x00]
-        	sall	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shll	$0, 69
-// CHECK:  encoding: [0xc1,0x25,0x45,0x00,0x00,0x00,0x00]
-        	sall	$0,0x45
-
-// CHECK: shll	$0, 32493
-// CHECK:  encoding: [0xc1,0x25,0xed,0x7e,0x00,0x00,0x00]
-        	sall	$0,0x7eed
-
-// CHECK: shll	$0, 3133065982
-// CHECK:  encoding: [0xc1,0x25,0xfe,0xca,0xbe,0xba,0x00]
-        	sall	$0,0xbabecafe
-
-// CHECK: shll	$0, 305419896
-// CHECK:  encoding: [0xc1,0x25,0x78,0x56,0x34,0x12,0x00]
-        	sall	$0,0x12345678
-
-// CHECK: shlb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc0,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	salb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shlb	$127, 69
-// CHECK:  encoding: [0xc0,0x25,0x45,0x00,0x00,0x00,0x7f]
-        	salb	$0x7f,0x45
-
-// CHECK: shlb	$127, 32493
-// CHECK:  encoding: [0xc0,0x25,0xed,0x7e,0x00,0x00,0x7f]
-        	salb	$0x7f,0x7eed
-
-// CHECK: shlb	$127, 3133065982
-// CHECK:  encoding: [0xc0,0x25,0xfe,0xca,0xbe,0xba,0x7f]
-        	salb	$0x7f,0xbabecafe
-
-// CHECK: shlb	$127, 305419896
-// CHECK:  encoding: [0xc0,0x25,0x78,0x56,0x34,0x12,0x7f]
-        	salb	$0x7f,0x12345678
-
-// CHECK: shll	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd1,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	sall	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shlw	32493
-// CHECK:  encoding: [0x66,0xd1,0x25,0xed,0x7e,0x00,0x00]
-        	salw	0x7eed
-
-// CHECK: shll	3133065982
-// CHECK:  encoding: [0xd1,0x25,0xfe,0xca,0xbe,0xba]
-        	sall	0xbabecafe
-
-// CHECK: shll	305419896
-// CHECK:  encoding: [0xd1,0x25,0x78,0x56,0x34,0x12]
-        	sall	0x12345678
-
-// CHECK: shll	$0, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc1,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x00]
-        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shll	$0, 69
-// CHECK:  encoding: [0xc1,0x25,0x45,0x00,0x00,0x00,0x00]
-        	shll	$0,0x45
-
-// CHECK: shll	$0, 32493
-// CHECK:  encoding: [0xc1,0x25,0xed,0x7e,0x00,0x00,0x00]
-        	shll	$0,0x7eed
-
-// CHECK: shll	$0, 3133065982
-// CHECK:  encoding: [0xc1,0x25,0xfe,0xca,0xbe,0xba,0x00]
-        	shll	$0,0xbabecafe
-
-// CHECK: shll	$0, 305419896
-// CHECK:  encoding: [0xc1,0x25,0x78,0x56,0x34,0x12,0x00]
-        	shll	$0,0x12345678
-
-// CHECK: shlb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc0,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shlb	$127, 69
-// CHECK:  encoding: [0xc0,0x25,0x45,0x00,0x00,0x00,0x7f]
-        	shlb	$0x7f,0x45
-
-// CHECK: shlb	$127, 32493
-// CHECK:  encoding: [0xc0,0x25,0xed,0x7e,0x00,0x00,0x7f]
-        	shlb	$0x7f,0x7eed
-
-// CHECK: shlb	$127, 3133065982
-// CHECK:  encoding: [0xc0,0x25,0xfe,0xca,0xbe,0xba,0x7f]
-        	shlb	$0x7f,0xbabecafe
-
-// CHECK: shlb	$127, 305419896
-// CHECK:  encoding: [0xc0,0x25,0x78,0x56,0x34,0x12,0x7f]
-        	shlb	$0x7f,0x12345678
-
-// CHECK: shll	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd1,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	shll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shlw	32493
-// CHECK:  encoding: [0x66,0xd1,0x25,0xed,0x7e,0x00,0x00]
-        	shlw	0x7eed
-
-// CHECK: shll	3133065982
-// CHECK:  encoding: [0xd1,0x25,0xfe,0xca,0xbe,0xba]
-        	shll	0xbabecafe
-
-// CHECK: shll	305419896
-// CHECK:  encoding: [0xd1,0x25,0x78,0x56,0x34,0x12]
-        	shll	0x12345678
-
-// CHECK: shrl	$0, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc1,0xac,0xcb,0xef,0xbe,0xad,0xde,0x00]
-        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shrl	$0, 69
-// CHECK:  encoding: [0xc1,0x2d,0x45,0x00,0x00,0x00,0x00]
-        	shrl	$0,0x45
-
-// CHECK: shrl	$0, 32493
-// CHECK:  encoding: [0xc1,0x2d,0xed,0x7e,0x00,0x00,0x00]
-        	shrl	$0,0x7eed
-
-// CHECK: shrl	$0, 3133065982
-// CHECK:  encoding: [0xc1,0x2d,0xfe,0xca,0xbe,0xba,0x00]
-        	shrl	$0,0xbabecafe
-
-// CHECK: shrl	$0, 305419896
-// CHECK:  encoding: [0xc1,0x2d,0x78,0x56,0x34,0x12,0x00]
-        	shrl	$0,0x12345678
-
-// CHECK: shrb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc0,0xac,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shrb	$127, 69
-// CHECK:  encoding: [0xc0,0x2d,0x45,0x00,0x00,0x00,0x7f]
-        	shrb	$0x7f,0x45
-
-// CHECK: shrb	$127, 32493
-// CHECK:  encoding: [0xc0,0x2d,0xed,0x7e,0x00,0x00,0x7f]
-        	shrb	$0x7f,0x7eed
-
-// CHECK: shrb	$127, 3133065982
-// CHECK:  encoding: [0xc0,0x2d,0xfe,0xca,0xbe,0xba,0x7f]
-        	shrb	$0x7f,0xbabecafe
-
-// CHECK: shrb	$127, 305419896
-// CHECK:  encoding: [0xc0,0x2d,0x78,0x56,0x34,0x12,0x7f]
-        	shrb	$0x7f,0x12345678
-
-// CHECK: shrl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd1,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	shrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: shrw	32493
-// CHECK:  encoding: [0x66,0xd1,0x2d,0xed,0x7e,0x00,0x00]
-        	shrw	0x7eed
-
-// CHECK: shrl	3133065982
-// CHECK:  encoding: [0xd1,0x2d,0xfe,0xca,0xbe,0xba]
-        	shrl	0xbabecafe
-
-// CHECK: shrl	305419896
-// CHECK:  encoding: [0xd1,0x2d,0x78,0x56,0x34,0x12]
-        	shrl	0x12345678
-
-// CHECK: sarl	$0, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc1,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x00]
-        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sarl	$0, 69
-// CHECK:  encoding: [0xc1,0x3d,0x45,0x00,0x00,0x00,0x00]
-        	sarl	$0,0x45
-
-// CHECK: sarl	$0, 32493
-// CHECK:  encoding: [0xc1,0x3d,0xed,0x7e,0x00,0x00,0x00]
-        	sarl	$0,0x7eed
-
-// CHECK: sarl	$0, 3133065982
-// CHECK:  encoding: [0xc1,0x3d,0xfe,0xca,0xbe,0xba,0x00]
-        	sarl	$0,0xbabecafe
-
-// CHECK: sarl	$0, 305419896
-// CHECK:  encoding: [0xc1,0x3d,0x78,0x56,0x34,0x12,0x00]
-        	sarl	$0,0x12345678
-
-// CHECK: sarb	$127, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xc0,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x7f]
-        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sarb	$127, 69
-// CHECK:  encoding: [0xc0,0x3d,0x45,0x00,0x00,0x00,0x7f]
-        	sarb	$0x7f,0x45
-
-// CHECK: sarb	$127, 32493
-// CHECK:  encoding: [0xc0,0x3d,0xed,0x7e,0x00,0x00,0x7f]
-        	sarb	$0x7f,0x7eed
-
-// CHECK: sarb	$127, 3133065982
-// CHECK:  encoding: [0xc0,0x3d,0xfe,0xca,0xbe,0xba,0x7f]
-        	sarb	$0x7f,0xbabecafe
-
-// CHECK: sarb	$127, 305419896
-// CHECK:  encoding: [0xc0,0x3d,0x78,0x56,0x34,0x12,0x7f]
-        	sarb	$0x7f,0x12345678
-
-// CHECK: sarl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd1,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	sarl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sarw	32493
-// CHECK:  encoding: [0x66,0xd1,0x3d,0xed,0x7e,0x00,0x00]
-        	sarw	0x7eed
-
-// CHECK: sarl	3133065982
-// CHECK:  encoding: [0xd1,0x3d,0xfe,0xca,0xbe,0xba]
-        	sarl	0xbabecafe
-
-// CHECK: sarl	305419896
-// CHECK:  encoding: [0xd1,0x3d,0x78,0x56,0x34,0x12]
-        	sarl	0x12345678
-
-// CHECK: call	*%ecx
-// CHECK:  encoding: [0xff,0xd1]
-        	call	*%ecx
-
-// CHECK: call	*3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: call	*3135175374
-// CHECK:  encoding: [0xff,0x15,0xce,0xfa,0xde,0xba]
-        	call	*0xbadeface
-
-// CHECK: call	*3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	call	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: call	*3135175374
-// CHECK:  encoding: [0xff,0x15,0xce,0xfa,0xde,0xba]
-        	call	*0xbadeface
-
-// CHECK: lcallw	*32493
-// CHECK:  encoding: [0x66,0xff,0x1d,0xed,0x7e,0x00,0x00]
-        	lcallw	*0x7eed
-
-// CHECK: jmp	*3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: jmp	*3135175374
-// CHECK:  encoding: [0xff,0x25,0xce,0xfa,0xde,0xba]
-        	jmp	*0xbadeface
-
-// CHECK: jmp	*3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	jmp	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: jmp	*3135175374
-// CHECK:  encoding: [0xff,0x25,0xce,0xfa,0xde,0xba]
-        	jmp	*0xbadeface
-
-// CHECK: ljmpl	*3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xff,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: ljmpw	*32493
-// CHECK:  encoding: [0x66,0xff,0x2d,0xed,0x7e,0x00,0x00]
-        	ljmpw	*0x7eed
-
-// CHECK: ljmpl	*3133065982
-// CHECK:  encoding: [0xff,0x2d,0xfe,0xca,0xbe,0xba]
-        	ljmpl	*0xbabecafe
-
-// CHECK: ljmpl	*305419896
-// CHECK:  encoding: [0xff,0x2d,0x78,0x56,0x34,0x12]
-        	ljmpl	*0x12345678
-
-// CHECK: ret
-// CHECK:  encoding: [0xc3]
-        	ret
-
-// CHECK: lret
-// CHECK:  encoding: [0xcb]
-        	lret
-
-// CHECK: leave
-// CHECK:  encoding: [0xc9]
-        	leave
-
-// CHECK: seto	%bl
-// CHECK:  encoding: [0x0f,0x90,0xc3]
-        	seto	%bl
-
-// CHECK: seto	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x90,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	seto	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: seto	32493
-// CHECK:  encoding: [0x0f,0x90,0x05,0xed,0x7e,0x00,0x00]
-        	seto	0x7eed
-
-// CHECK: seto	3133065982
-// CHECK:  encoding: [0x0f,0x90,0x05,0xfe,0xca,0xbe,0xba]
-        	seto	0xbabecafe
-
-// CHECK: seto	305419896
-// CHECK:  encoding: [0x0f,0x90,0x05,0x78,0x56,0x34,0x12]
-        	seto	0x12345678
-
-// CHECK: setno	%bl
-// CHECK:  encoding: [0x0f,0x91,0xc3]
-        	setno	%bl
-
-// CHECK: setno	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x91,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setno	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setno	32493
-// CHECK:  encoding: [0x0f,0x91,0x05,0xed,0x7e,0x00,0x00]
-        	setno	0x7eed
-
-// CHECK: setno	3133065982
-// CHECK:  encoding: [0x0f,0x91,0x05,0xfe,0xca,0xbe,0xba]
-        	setno	0xbabecafe
-
-// CHECK: setno	305419896
-// CHECK:  encoding: [0x0f,0x91,0x05,0x78,0x56,0x34,0x12]
-        	setno	0x12345678
-
-// CHECK: setb	%bl
-// CHECK:  encoding: [0x0f,0x92,0xc3]
-        	setb	%bl
-
-// CHECK: setb	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x92,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setb	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setb	32493
-// CHECK:  encoding: [0x0f,0x92,0x05,0xed,0x7e,0x00,0x00]
-        	setb	0x7eed
-
-// CHECK: setb	3133065982
-// CHECK:  encoding: [0x0f,0x92,0x05,0xfe,0xca,0xbe,0xba]
-        	setb	0xbabecafe
-
-// CHECK: setb	305419896
-// CHECK:  encoding: [0x0f,0x92,0x05,0x78,0x56,0x34,0x12]
-        	setb	0x12345678
-
-// CHECK: setae	%bl
-// CHECK:  encoding: [0x0f,0x93,0xc3]
-        	setae	%bl
-
-// CHECK: setae	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x93,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setae	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setae	32493
-// CHECK:  encoding: [0x0f,0x93,0x05,0xed,0x7e,0x00,0x00]
-        	setae	0x7eed
-
-// CHECK: setae	3133065982
-// CHECK:  encoding: [0x0f,0x93,0x05,0xfe,0xca,0xbe,0xba]
-        	setae	0xbabecafe
-
-// CHECK: setae	305419896
-// CHECK:  encoding: [0x0f,0x93,0x05,0x78,0x56,0x34,0x12]
-        	setae	0x12345678
-
-// CHECK: sete	%bl
-// CHECK:  encoding: [0x0f,0x94,0xc3]
-        	sete	%bl
-
-// CHECK: sete	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x94,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	sete	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sete	32493
-// CHECK:  encoding: [0x0f,0x94,0x05,0xed,0x7e,0x00,0x00]
-        	sete	0x7eed
-
-// CHECK: sete	3133065982
-// CHECK:  encoding: [0x0f,0x94,0x05,0xfe,0xca,0xbe,0xba]
-        	sete	0xbabecafe
-
-// CHECK: sete	305419896
-// CHECK:  encoding: [0x0f,0x94,0x05,0x78,0x56,0x34,0x12]
-        	sete	0x12345678
-
-// CHECK: setne	%bl
-// CHECK:  encoding: [0x0f,0x95,0xc3]
-        	setne	%bl
-
-// CHECK: setne	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x95,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setne	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setne	32493
-// CHECK:  encoding: [0x0f,0x95,0x05,0xed,0x7e,0x00,0x00]
-        	setne	0x7eed
-
-// CHECK: setne	3133065982
-// CHECK:  encoding: [0x0f,0x95,0x05,0xfe,0xca,0xbe,0xba]
-        	setne	0xbabecafe
-
-// CHECK: setne	305419896
-// CHECK:  encoding: [0x0f,0x95,0x05,0x78,0x56,0x34,0x12]
-        	setne	0x12345678
-
-// CHECK: setbe	%bl
-// CHECK:  encoding: [0x0f,0x96,0xc3]
-        	setbe	%bl
-
-// CHECK: setbe	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x96,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setbe	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setbe	32493
-// CHECK:  encoding: [0x0f,0x96,0x05,0xed,0x7e,0x00,0x00]
-        	setbe	0x7eed
-
-// CHECK: setbe	3133065982
-// CHECK:  encoding: [0x0f,0x96,0x05,0xfe,0xca,0xbe,0xba]
-        	setbe	0xbabecafe
-
-// CHECK: setbe	305419896
-// CHECK:  encoding: [0x0f,0x96,0x05,0x78,0x56,0x34,0x12]
-        	setbe	0x12345678
-
-// CHECK: seta	%bl
-// CHECK:  encoding: [0x0f,0x97,0xc3]
-        	seta	%bl
-
-// CHECK: seta	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x97,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	seta	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: seta	32493
-// CHECK:  encoding: [0x0f,0x97,0x05,0xed,0x7e,0x00,0x00]
-        	seta	0x7eed
-
-// CHECK: seta	3133065982
-// CHECK:  encoding: [0x0f,0x97,0x05,0xfe,0xca,0xbe,0xba]
-        	seta	0xbabecafe
-
-// CHECK: seta	305419896
-// CHECK:  encoding: [0x0f,0x97,0x05,0x78,0x56,0x34,0x12]
-        	seta	0x12345678
-
-// CHECK: sets	%bl
-// CHECK:  encoding: [0x0f,0x98,0xc3]
-        	sets	%bl
-
-// CHECK: sets	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x98,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	sets	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: sets	32493
-// CHECK:  encoding: [0x0f,0x98,0x05,0xed,0x7e,0x00,0x00]
-        	sets	0x7eed
-
-// CHECK: sets	3133065982
-// CHECK:  encoding: [0x0f,0x98,0x05,0xfe,0xca,0xbe,0xba]
-        	sets	0xbabecafe
-
-// CHECK: sets	305419896
-// CHECK:  encoding: [0x0f,0x98,0x05,0x78,0x56,0x34,0x12]
-        	sets	0x12345678
-
-// CHECK: setns	%bl
-// CHECK:  encoding: [0x0f,0x99,0xc3]
-        	setns	%bl
-
-// CHECK: setns	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x99,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setns	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setns	32493
-// CHECK:  encoding: [0x0f,0x99,0x05,0xed,0x7e,0x00,0x00]
-        	setns	0x7eed
-
-// CHECK: setns	3133065982
-// CHECK:  encoding: [0x0f,0x99,0x05,0xfe,0xca,0xbe,0xba]
-        	setns	0xbabecafe
-
-// CHECK: setns	305419896
-// CHECK:  encoding: [0x0f,0x99,0x05,0x78,0x56,0x34,0x12]
-        	setns	0x12345678
-
-// CHECK: setp	%bl
-// CHECK:  encoding: [0x0f,0x9a,0xc3]
-        	setp	%bl
-
-// CHECK: setp	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x9a,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setp	32493
-// CHECK:  encoding: [0x0f,0x9a,0x05,0xed,0x7e,0x00,0x00]
-        	setp	0x7eed
-
-// CHECK: setp	3133065982
-// CHECK:  encoding: [0x0f,0x9a,0x05,0xfe,0xca,0xbe,0xba]
-        	setp	0xbabecafe
-
-// CHECK: setp	305419896
-// CHECK:  encoding: [0x0f,0x9a,0x05,0x78,0x56,0x34,0x12]
-        	setp	0x12345678
-
-// CHECK: setnp	%bl
-// CHECK:  encoding: [0x0f,0x9b,0xc3]
-        	setnp	%bl
-
-// CHECK: setnp	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x9b,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setnp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setnp	32493
-// CHECK:  encoding: [0x0f,0x9b,0x05,0xed,0x7e,0x00,0x00]
-        	setnp	0x7eed
-
-// CHECK: setnp	3133065982
-// CHECK:  encoding: [0x0f,0x9b,0x05,0xfe,0xca,0xbe,0xba]
-        	setnp	0xbabecafe
-
-// CHECK: setnp	305419896
-// CHECK:  encoding: [0x0f,0x9b,0x05,0x78,0x56,0x34,0x12]
-        	setnp	0x12345678
-
-// CHECK: setl	%bl
-// CHECK:  encoding: [0x0f,0x9c,0xc3]
-        	setl	%bl
-
-// CHECK: setl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x9c,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setl	32493
-// CHECK:  encoding: [0x0f,0x9c,0x05,0xed,0x7e,0x00,0x00]
-        	setl	0x7eed
-
-// CHECK: setl	3133065982
-// CHECK:  encoding: [0x0f,0x9c,0x05,0xfe,0xca,0xbe,0xba]
-        	setl	0xbabecafe
-
-// CHECK: setl	305419896
-// CHECK:  encoding: [0x0f,0x9c,0x05,0x78,0x56,0x34,0x12]
-        	setl	0x12345678
-
-// CHECK: setge	%bl
-// CHECK:  encoding: [0x0f,0x9d,0xc3]
-        	setge	%bl
-
-// CHECK: setge	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x9d,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setge	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setge	32493
-// CHECK:  encoding: [0x0f,0x9d,0x05,0xed,0x7e,0x00,0x00]
-        	setge	0x7eed
-
-// CHECK: setge	3133065982
-// CHECK:  encoding: [0x0f,0x9d,0x05,0xfe,0xca,0xbe,0xba]
-        	setge	0xbabecafe
-
-// CHECK: setge	305419896
-// CHECK:  encoding: [0x0f,0x9d,0x05,0x78,0x56,0x34,0x12]
-        	setge	0x12345678
-
-// CHECK: setle	%bl
-// CHECK:  encoding: [0x0f,0x9e,0xc3]
-        	setle	%bl
-
-// CHECK: setle	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x9e,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setle	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setle	32493
-// CHECK:  encoding: [0x0f,0x9e,0x05,0xed,0x7e,0x00,0x00]
-        	setle	0x7eed
-
-// CHECK: setle	3133065982
-// CHECK:  encoding: [0x0f,0x9e,0x05,0xfe,0xca,0xbe,0xba]
-        	setle	0xbabecafe
-
-// CHECK: setle	305419896
-// CHECK:  encoding: [0x0f,0x9e,0x05,0x78,0x56,0x34,0x12]
-        	setle	0x12345678
-
-// CHECK: setg	%bl
-// CHECK:  encoding: [0x0f,0x9f,0xc3]
-        	setg	%bl
-
-// CHECK: setg	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x9f,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	setg	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: setg	32493
-// CHECK:  encoding: [0x0f,0x9f,0x05,0xed,0x7e,0x00,0x00]
-        	setg	0x7eed
-
-// CHECK: setg	3133065982
-// CHECK:  encoding: [0x0f,0x9f,0x05,0xfe,0xca,0xbe,0xba]
-        	setg	0xbabecafe
-
-// CHECK: setg	305419896
-// CHECK:  encoding: [0x0f,0x9f,0x05,0x78,0x56,0x34,0x12]
-        	setg	0x12345678
-
-// CHECK: rsm
-// CHECK:  encoding: [0x0f,0xaa]
-        	rsm
-
-// CHECK: hlt
-// CHECK:  encoding: [0xf4]
-        	hlt
-
-// CHECK: nopl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x1f,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	nopl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: nopw	32493
-// CHECK:  encoding: [0x66,0x0f,0x1f,0x05,0xed,0x7e,0x00,0x00]
-        	nopw	0x7eed
-
-// CHECK: nopl	3133065982
-// CHECK:  encoding: [0x0f,0x1f,0x05,0xfe,0xca,0xbe,0xba]
-        	nopl	0xbabecafe
-
-// CHECK: nopl	305419896
-// CHECK:  encoding: [0x0f,0x1f,0x05,0x78,0x56,0x34,0x12]
-        	nopl	0x12345678
-
-// CHECK: nop
-// CHECK:  encoding: [0x90]
-        	nop
-
-// CHECK: lldtw	32493
-// CHECK:  encoding: [0x0f,0x00,0x15,0xed,0x7e,0x00,0x00]
-        	lldtw	0x7eed
-
-// CHECK: lmsww	32493
-// CHECK:  encoding: [0x0f,0x01,0x35,0xed,0x7e,0x00,0x00]
-        	lmsww	0x7eed
-
-// CHECK: ltrw	32493
-// CHECK:  encoding: [0x0f,0x00,0x1d,0xed,0x7e,0x00,0x00]
-        	ltrw	0x7eed
-
-// CHECK: sldtw	32493
-// CHECK:  encoding: [0x0f,0x00,0x05,0xed,0x7e,0x00,0x00]
-        	sldtw	0x7eed
-
-// CHECK: smsww	32493
-// CHECK:  encoding: [0x0f,0x01,0x25,0xed,0x7e,0x00,0x00]
-        	smsww	0x7eed
-
-// CHECK: strw	32493
-// CHECK:  encoding: [0x0f,0x00,0x0d,0xed,0x7e,0x00,0x00]
-        	strw	0x7eed
-
-// CHECK: verr	%bx
-// CHECK:  encoding: [0x0f,0x00,0xe3]
-        	verr	%bx
-
-// CHECK: verr	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x00,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	verr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: verr	3133065982
-// CHECK:  encoding: [0x0f,0x00,0x25,0xfe,0xca,0xbe,0xba]
-        	verr	0xbabecafe
-
-// CHECK: verr	305419896
-// CHECK:  encoding: [0x0f,0x00,0x25,0x78,0x56,0x34,0x12]
-        	verr	0x12345678
-
-// CHECK: verw	%bx
-// CHECK:  encoding: [0x0f,0x00,0xeb]
-        	verw	%bx
-
-// CHECK: verw	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x00,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	verw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: verw	3133065982
-// CHECK:  encoding: [0x0f,0x00,0x2d,0xfe,0xca,0xbe,0xba]
-        	verw	0xbabecafe
-
-// CHECK: verw	305419896
-// CHECK:  encoding: [0x0f,0x00,0x2d,0x78,0x56,0x34,0x12]
-        	verw	0x12345678
-
-// CHECK: fld	%st(2)
-// CHECK:  encoding: [0xd9,0xc2]
-        	fld	%st(2)
-
-// CHECK: fldl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdd,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	fldl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fldl	3133065982
-// CHECK:  encoding: [0xdd,0x05,0xfe,0xca,0xbe,0xba]
-        	fldl	0xbabecafe
-
-// CHECK: fldl	305419896
-// CHECK:  encoding: [0xdd,0x05,0x78,0x56,0x34,0x12]
-        	fldl	0x12345678
-
-// CHECK: fld	%st(2)
-// CHECK:  encoding: [0xd9,0xc2]
-        	fld	%st(2)
-
-// CHECK: fildl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdb,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	fildl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fildl	3133065982
-// CHECK:  encoding: [0xdb,0x05,0xfe,0xca,0xbe,0xba]
-        	fildl	0xbabecafe
-
-// CHECK: fildl	305419896
-// CHECK:  encoding: [0xdb,0x05,0x78,0x56,0x34,0x12]
-        	fildl	0x12345678
-
-// CHECK: fildll	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdf,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	fildll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fildll	32493
-// CHECK:  encoding: [0xdf,0x2d,0xed,0x7e,0x00,0x00]
-        	fildll	0x7eed
-
-// CHECK: fildll	3133065982
-// CHECK:  encoding: [0xdf,0x2d,0xfe,0xca,0xbe,0xba]
-        	fildll	0xbabecafe
-
-// CHECK: fildll	305419896
-// CHECK:  encoding: [0xdf,0x2d,0x78,0x56,0x34,0x12]
-        	fildll	0x12345678
-
-// CHECK: fldt	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdb,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	fldt	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fldt	32493
-// CHECK:  encoding: [0xdb,0x2d,0xed,0x7e,0x00,0x00]
-        	fldt	0x7eed
-
-// CHECK: fldt	3133065982
-// CHECK:  encoding: [0xdb,0x2d,0xfe,0xca,0xbe,0xba]
-        	fldt	0xbabecafe
-
-// CHECK: fldt	305419896
-// CHECK:  encoding: [0xdb,0x2d,0x78,0x56,0x34,0x12]
-        	fldt	0x12345678
-
-// CHECK: fbld	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdf,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	fbld	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fbld	32493
-// CHECK:  encoding: [0xdf,0x25,0xed,0x7e,0x00,0x00]
-        	fbld	0x7eed
-
-// CHECK: fbld	3133065982
-// CHECK:  encoding: [0xdf,0x25,0xfe,0xca,0xbe,0xba]
-        	fbld	0xbabecafe
-
-// CHECK: fbld	305419896
-// CHECK:  encoding: [0xdf,0x25,0x78,0x56,0x34,0x12]
-        	fbld	0x12345678
-
-// CHECK: fst	%st(2)
-// CHECK:  encoding: [0xdd,0xd2]
-        	fst	%st(2)
-
-// CHECK: fstl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdd,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	fstl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fstl	3133065982
-// CHECK:  encoding: [0xdd,0x15,0xfe,0xca,0xbe,0xba]
-        	fstl	0xbabecafe
-
-// CHECK: fstl	305419896
-// CHECK:  encoding: [0xdd,0x15,0x78,0x56,0x34,0x12]
-        	fstl	0x12345678
-
-// CHECK: fst	%st(2)
-// CHECK:  encoding: [0xdd,0xd2]
-        	fst	%st(2)
-
-// CHECK: fistl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdb,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	fistl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fistl	3133065982
-// CHECK:  encoding: [0xdb,0x15,0xfe,0xca,0xbe,0xba]
-        	fistl	0xbabecafe
-
-// CHECK: fistl	305419896
-// CHECK:  encoding: [0xdb,0x15,0x78,0x56,0x34,0x12]
-        	fistl	0x12345678
-
-// CHECK: fstp	%st(2)
-// CHECK:  encoding: [0xdd,0xda]
-        	fstp	%st(2)
-
-// CHECK: fstpl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	fstpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fstpl	3133065982
-// CHECK:  encoding: [0xdd,0x1d,0xfe,0xca,0xbe,0xba]
-        	fstpl	0xbabecafe
-
-// CHECK: fstpl	305419896
-// CHECK:  encoding: [0xdd,0x1d,0x78,0x56,0x34,0x12]
-        	fstpl	0x12345678
-
-// CHECK: fstp	%st(2)
-// CHECK:  encoding: [0xdd,0xda]
-        	fstp	%st(2)
-
-// CHECK: fistpl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	fistpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fistpl	3133065982
-// CHECK:  encoding: [0xdb,0x1d,0xfe,0xca,0xbe,0xba]
-        	fistpl	0xbabecafe
-
-// CHECK: fistpl	305419896
-// CHECK:  encoding: [0xdb,0x1d,0x78,0x56,0x34,0x12]
-        	fistpl	0x12345678
-
-// CHECK: fistpll	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdf,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	fistpll	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fistpll	32493
-// CHECK:  encoding: [0xdf,0x3d,0xed,0x7e,0x00,0x00]
-        	fistpll	0x7eed
-
-// CHECK: fistpll	3133065982
-// CHECK:  encoding: [0xdf,0x3d,0xfe,0xca,0xbe,0xba]
-        	fistpll	0xbabecafe
-
-// CHECK: fistpll	305419896
-// CHECK:  encoding: [0xdf,0x3d,0x78,0x56,0x34,0x12]
-        	fistpll	0x12345678
-
-// CHECK: fstpt	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdb,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	fstpt	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fstpt	32493
-// CHECK:  encoding: [0xdb,0x3d,0xed,0x7e,0x00,0x00]
-        	fstpt	0x7eed
-
-// CHECK: fstpt	3133065982
-// CHECK:  encoding: [0xdb,0x3d,0xfe,0xca,0xbe,0xba]
-        	fstpt	0xbabecafe
-
-// CHECK: fstpt	305419896
-// CHECK:  encoding: [0xdb,0x3d,0x78,0x56,0x34,0x12]
-        	fstpt	0x12345678
-
-// CHECK: fbstp	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdf,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	fbstp	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fbstp	32493
-// CHECK:  encoding: [0xdf,0x35,0xed,0x7e,0x00,0x00]
-        	fbstp	0x7eed
-
-// CHECK: fbstp	3133065982
-// CHECK:  encoding: [0xdf,0x35,0xfe,0xca,0xbe,0xba]
-        	fbstp	0xbabecafe
-
-// CHECK: fbstp	305419896
-// CHECK:  encoding: [0xdf,0x35,0x78,0x56,0x34,0x12]
-        	fbstp	0x12345678
-
-// CHECK: fxch	%st(2)
-// CHECK:  encoding: [0xd9,0xca]
-        	fxch	%st(2)
-
-// CHECK: fcom	%st(2)
-// CHECK:  encoding: [0xd8,0xd2]
-        	fcom	%st(2)
-
-// CHECK: fcom	%st(2)
-// CHECK:  encoding: [0xd8,0xd2]
-        	fcom	%st(2)
-
-// CHECK: ficoml	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	ficoml	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: ficoml	3133065982
-// CHECK:  encoding: [0xda,0x15,0xfe,0xca,0xbe,0xba]
-        	ficoml	0xbabecafe
-
-// CHECK: ficoml	305419896
-// CHECK:  encoding: [0xda,0x15,0x78,0x56,0x34,0x12]
-        	ficoml	0x12345678
-
-// CHECK: fcomp	%st(2)
-// CHECK:  encoding: [0xd8,0xda]
-        	fcomp	%st(2)
-
-// CHECK: fcomp	%st(2)
-// CHECK:  encoding: [0xd8,0xda]
-        	fcomp	%st(2)
-
-// CHECK: ficompl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	ficompl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: ficompl	3133065982
-// CHECK:  encoding: [0xda,0x1d,0xfe,0xca,0xbe,0xba]
-        	ficompl	0xbabecafe
-
-// CHECK: ficompl	305419896
-// CHECK:  encoding: [0xda,0x1d,0x78,0x56,0x34,0x12]
-        	ficompl	0x12345678
-
-// CHECK: fcompp
-// CHECK:  encoding: [0xde,0xd9]
-        	fcompp
-
-// CHECK: fucom	%st(2)
-// CHECK:  encoding: [0xdd,0xe2]
-        	fucom	%st(2)
-
-// CHECK: fucomp	%st(2)
-// CHECK:  encoding: [0xdd,0xea]
-        	fucomp	%st(2)
-
-// CHECK: fucompp
-// CHECK:  encoding: [0xda,0xe9]
-        	fucompp
-
-// CHECK: ftst
-// CHECK:  encoding: [0xd9,0xe4]
-        	ftst
-
-// CHECK: fxam
-// CHECK:  encoding: [0xd9,0xe5]
-        	fxam
-
-// CHECK: fld1
-// CHECK:  encoding: [0xd9,0xe8]
-        	fld1
-
-// CHECK: fldl2t
-// CHECK:  encoding: [0xd9,0xe9]
-        	fldl2t
-
-// CHECK: fldl2e
-// CHECK:  encoding: [0xd9,0xea]
-        	fldl2e
-
-// CHECK: fldpi
-// CHECK:  encoding: [0xd9,0xeb]
-        	fldpi
-
-// CHECK: fldlg2
-// CHECK:  encoding: [0xd9,0xec]
-        	fldlg2
-
-// CHECK: fldln2
-// CHECK:  encoding: [0xd9,0xed]
-        	fldln2
-
-// CHECK: fldz
-// CHECK:  encoding: [0xd9,0xee]
-        	fldz
-
-// CHECK: fadd	%st(2)
-// CHECK:  encoding: [0xd8,0xc2]
-        	fadd	%st(2)
-
-// CHECK: faddl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	faddl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: faddl	3133065982
-// CHECK:  encoding: [0xdc,0x05,0xfe,0xca,0xbe,0xba]
-        	faddl	0xbabecafe
-
-// CHECK: faddl	305419896
-// CHECK:  encoding: [0xdc,0x05,0x78,0x56,0x34,0x12]
-        	faddl	0x12345678
-
-// CHECK: fiaddl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fiaddl	3133065982
-// CHECK:  encoding: [0xda,0x05,0xfe,0xca,0xbe,0xba]
-        	fiaddl	0xbabecafe
-
-// CHECK: fiaddl	305419896
-// CHECK:  encoding: [0xda,0x05,0x78,0x56,0x34,0x12]
-        	fiaddl	0x12345678
-
-// CHECK: faddp	%st(2)
-// CHECK:  encoding: [0xde,0xc2]
-        	faddp	%st(2)
-
-// CHECK: fsub	%st(2)
-// CHECK:  encoding: [0xd8,0xe2]
-        	fsub	%st(2)
-
-// CHECK: fsubl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	fsubl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fsubl	3133065982
-// CHECK:  encoding: [0xdc,0x25,0xfe,0xca,0xbe,0xba]
-        	fsubl	0xbabecafe
-
-// CHECK: fsubl	305419896
-// CHECK:  encoding: [0xdc,0x25,0x78,0x56,0x34,0x12]
-        	fsubl	0x12345678
-
-// CHECK: fisubl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-        	fisubl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fisubl	3133065982
-// CHECK:  encoding: [0xda,0x25,0xfe,0xca,0xbe,0xba]
-        	fisubl	0xbabecafe
-
-// CHECK: fisubl	305419896
-// CHECK:  encoding: [0xda,0x25,0x78,0x56,0x34,0x12]
-        	fisubl	0x12345678
-
-// CHECK: fsubp	%st(2)
-// CHECK:  encoding: [0xde,0xe2]
-        	fsubp	%st(2)
-
-// CHECK: fsubr	%st(2)
-// CHECK:  encoding: [0xd8,0xea]
-        	fsubr	%st(2)
-
-// CHECK: fsubrl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fsubrl	3133065982
-// CHECK:  encoding: [0xdc,0x2d,0xfe,0xca,0xbe,0xba]
-        	fsubrl	0xbabecafe
-
-// CHECK: fsubrl	305419896
-// CHECK:  encoding: [0xdc,0x2d,0x78,0x56,0x34,0x12]
-        	fsubrl	0x12345678
-
-// CHECK: fisubrl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fisubrl	3133065982
-// CHECK:  encoding: [0xda,0x2d,0xfe,0xca,0xbe,0xba]
-        	fisubrl	0xbabecafe
-
-// CHECK: fisubrl	305419896
-// CHECK:  encoding: [0xda,0x2d,0x78,0x56,0x34,0x12]
-        	fisubrl	0x12345678
-
-// CHECK: fsubrp	%st(2)
-// CHECK:  encoding: [0xde,0xea]
-        	fsubrp	%st(2)
-
-// CHECK: fmul	%st(2)
-// CHECK:  encoding: [0xd8,0xca]
-        	fmul	%st(2)
-
-// CHECK: fmull	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	fmull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fmull	3133065982
-// CHECK:  encoding: [0xdc,0x0d,0xfe,0xca,0xbe,0xba]
-        	fmull	0xbabecafe
-
-// CHECK: fmull	305419896
-// CHECK:  encoding: [0xdc,0x0d,0x78,0x56,0x34,0x12]
-        	fmull	0x12345678
-
-// CHECK: fimull	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	fimull	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fimull	3133065982
-// CHECK:  encoding: [0xda,0x0d,0xfe,0xca,0xbe,0xba]
-        	fimull	0xbabecafe
-
-// CHECK: fimull	305419896
-// CHECK:  encoding: [0xda,0x0d,0x78,0x56,0x34,0x12]
-        	fimull	0x12345678
-
-// CHECK: fmulp	%st(2)
-// CHECK:  encoding: [0xde,0xca]
-        	fmulp	%st(2)
-
-// CHECK: fdiv	%st(2)
-// CHECK:  encoding: [0xd8,0xf2]
-        	fdiv	%st(2)
-
-// CHECK: fdivl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	fdivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fdivl	3133065982
-// CHECK:  encoding: [0xdc,0x35,0xfe,0xca,0xbe,0xba]
-        	fdivl	0xbabecafe
-
-// CHECK: fdivl	305419896
-// CHECK:  encoding: [0xdc,0x35,0x78,0x56,0x34,0x12]
-        	fdivl	0x12345678
-
-// CHECK: fidivl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	fidivl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fidivl	3133065982
-// CHECK:  encoding: [0xda,0x35,0xfe,0xca,0xbe,0xba]
-        	fidivl	0xbabecafe
-
-// CHECK: fidivl	305419896
-// CHECK:  encoding: [0xda,0x35,0x78,0x56,0x34,0x12]
-        	fidivl	0x12345678
-
-// CHECK: fdivp	%st(2)
-// CHECK:  encoding: [0xde,0xf2]
-        	fdivp	%st(2)
-
-// CHECK: fdivr	%st(2)
-// CHECK:  encoding: [0xd8,0xfa]
-        	fdivr	%st(2)
-
-// CHECK: fdivrl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fdivrl	3133065982
-// CHECK:  encoding: [0xdc,0x3d,0xfe,0xca,0xbe,0xba]
-        	fdivrl	0xbabecafe
-
-// CHECK: fdivrl	305419896
-// CHECK:  encoding: [0xdc,0x3d,0x78,0x56,0x34,0x12]
-        	fdivrl	0x12345678
-
-// CHECK: fidivrl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fidivrl	3133065982
-// CHECK:  encoding: [0xda,0x3d,0xfe,0xca,0xbe,0xba]
-        	fidivrl	0xbabecafe
-
-// CHECK: fidivrl	305419896
-// CHECK:  encoding: [0xda,0x3d,0x78,0x56,0x34,0x12]
-        	fidivrl	0x12345678
-
-// CHECK: fdivrp	%st(2)
-// CHECK:  encoding: [0xde,0xfa]
-        	fdivrp	%st(2)
-
-// CHECK: f2xm1
-// CHECK:  encoding: [0xd9,0xf0]
-        	f2xm1
-
-// CHECK: fyl2x
-// CHECK:  encoding: [0xd9,0xf1]
-        	fyl2x
-
-// CHECK: fptan
-// CHECK:  encoding: [0xd9,0xf2]
-        	fptan
-
-// CHECK: fpatan
-// CHECK:  encoding: [0xd9,0xf3]
-        	fpatan
-
-// CHECK: fxtract
-// CHECK:  encoding: [0xd9,0xf4]
-        	fxtract
-
-// CHECK: fprem1
-// CHECK:  encoding: [0xd9,0xf5]
-        	fprem1
-
-// CHECK: fdecstp
-// CHECK:  encoding: [0xd9,0xf6]
-        	fdecstp
-
-// CHECK: fincstp
-// CHECK:  encoding: [0xd9,0xf7]
-        	fincstp
-
-// CHECK: fprem
-// CHECK:  encoding: [0xd9,0xf8]
-        	fprem
-
-// CHECK: fyl2xp1
-// CHECK:  encoding: [0xd9,0xf9]
-        	fyl2xp1
-
-// CHECK: fsqrt
-// CHECK:  encoding: [0xd9,0xfa]
-        	fsqrt
-
-// CHECK: fsincos
-// CHECK:  encoding: [0xd9,0xfb]
-        	fsincos
-
-// CHECK: frndint
-// CHECK:  encoding: [0xd9,0xfc]
-        	frndint
-
-// CHECK: fscale
-// CHECK:  encoding: [0xd9,0xfd]
-        	fscale
-
-// CHECK: fsin
-// CHECK:  encoding: [0xd9,0xfe]
-        	fsin
-
-// CHECK: fcos
-// CHECK:  encoding: [0xd9,0xff]
-        	fcos
-
-// CHECK: fchs
-// CHECK:  encoding: [0xd9,0xe0]
-        	fchs
-
-// CHECK: fabs
-// CHECK:  encoding: [0xd9,0xe1]
-        	fabs
-
-// CHECK: fninit
-// CHECK:  encoding: [0xdb,0xe3]
-        	fninit
-
-// CHECK: fldcw	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd9,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	fldcw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fldcw	3133065982
-// CHECK:  encoding: [0xd9,0x2d,0xfe,0xca,0xbe,0xba]
-        	fldcw	0xbabecafe
-
-// CHECK: fldcw	305419896
-// CHECK:  encoding: [0xd9,0x2d,0x78,0x56,0x34,0x12]
-        	fldcw	0x12345678
-
-// CHECK: fnstcw	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xd9,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fnstcw	3133065982
-// CHECK:  encoding: [0xd9,0x3d,0xfe,0xca,0xbe,0xba]
-        	fnstcw	0xbabecafe
-
-// CHECK: fnstcw	305419896
-// CHECK:  encoding: [0xd9,0x3d,0x78,0x56,0x34,0x12]
-        	fnstcw	0x12345678
-
-// CHECK: fnstsw	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdd,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	fnstsw	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fnstsw	3133065982
-// CHECK:  encoding: [0xdd,0x3d,0xfe,0xca,0xbe,0xba]
-        	fnstsw	0xbabecafe
-
-// CHECK: fnstsw	305419896
-// CHECK:  encoding: [0xdd,0x3d,0x78,0x56,0x34,0x12]
-        	fnstsw	0x12345678
-
-// CHECK: fnclex
-// CHECK:  encoding: [0xdb,0xe2]
-        	fnclex
-
-// CHECK: fnstenv	32493
-// CHECK:  encoding: [0xd9,0x35,0xed,0x7e,0x00,0x00]
-        	fnstenv	0x7eed
-
-// CHECK: fldenv	32493
-// CHECK:  encoding: [0xd9,0x25,0xed,0x7e,0x00,0x00]
-        	fldenv	0x7eed
-
-// CHECK: fnsave	32493
-// CHECK:  encoding: [0xdd,0x35,0xed,0x7e,0x00,0x00]
-        	fnsave	0x7eed
-
-// CHECK: frstor	32493
-// CHECK:  encoding: [0xdd,0x25,0xed,0x7e,0x00,0x00]
-        	frstor	0x7eed
-
-// CHECK: ffree	%st(2)
-// CHECK:  encoding: [0xdd,0xc2]
-        	ffree	%st(2)
-
-// CHECK: fnop
-// CHECK:  encoding: [0xd9,0xd0]
-        	fnop
-
-// CHECK: invd
-// CHECK:  encoding: [0x0f,0x08]
-        	invd
-
-// CHECK: wbinvd
-// CHECK:  encoding: [0x0f,0x09]
-        	wbinvd
-
-// CHECK: cpuid
-// CHECK:  encoding: [0x0f,0xa2]
-        	cpuid
-
-// CHECK: wrmsr
-// CHECK:  encoding: [0x0f,0x30]
-        	wrmsr
-
-// CHECK: rdtsc
-// CHECK:  encoding: [0x0f,0x31]
-        	rdtsc
-
-// CHECK: rdmsr
-// CHECK:  encoding: [0x0f,0x32]
-        	rdmsr
-
-// CHECK: cmpxchg8b	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xc7,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	cmpxchg8b	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: cmpxchg8b	32493
-// CHECK:  encoding: [0x0f,0xc7,0x0d,0xed,0x7e,0x00,0x00]
-        	cmpxchg8b	0x7eed
-
-// CHECK: cmpxchg8b	3133065982
-// CHECK:  encoding: [0x0f,0xc7,0x0d,0xfe,0xca,0xbe,0xba]
-        	cmpxchg8b	0xbabecafe
-
-// CHECK: cmpxchg8b	305419896
-// CHECK:  encoding: [0x0f,0xc7,0x0d,0x78,0x56,0x34,0x12]
-        	cmpxchg8b	0x12345678
-
-// CHECK: sysenter
-// CHECK:  encoding: [0x0f,0x34]
-        	sysenter
-
-// CHECK: sysexit
-// CHECK:  encoding: [0x0f,0x35]
-        	sysexit
-
-// CHECK: fxsave	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xae,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	fxsave	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fxsave	32493
-// CHECK:  encoding: [0x0f,0xae,0x05,0xed,0x7e,0x00,0x00]
-        	fxsave	0x7eed
-
-// CHECK: fxsave	3133065982
-// CHECK:  encoding: [0x0f,0xae,0x05,0xfe,0xca,0xbe,0xba]
-        	fxsave	0xbabecafe
-
-// CHECK: fxsave	305419896
-// CHECK:  encoding: [0x0f,0xae,0x05,0x78,0x56,0x34,0x12]
-        	fxsave	0x12345678
-
-// CHECK: fxrstor	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xae,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	fxrstor	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fxrstor	32493
-// CHECK:  encoding: [0x0f,0xae,0x0d,0xed,0x7e,0x00,0x00]
-        	fxrstor	0x7eed
-
-// CHECK: fxrstor	3133065982
-// CHECK:  encoding: [0x0f,0xae,0x0d,0xfe,0xca,0xbe,0xba]
-        	fxrstor	0xbabecafe
-
-// CHECK: fxrstor	305419896
-// CHECK:  encoding: [0x0f,0xae,0x0d,0x78,0x56,0x34,0x12]
-        	fxrstor	0x12345678
-
-// CHECK: rdpmc
-// CHECK:  encoding: [0x0f,0x33]
-        	rdpmc
-
-// CHECK: ud2
-// CHECK:  encoding: [0x0f,0x0b]
-        	ud2
-
-// CHECK: fcmovb	%st(2), %st(0)
-// CHECK:  encoding: [0xda,0xc2]
-        	fcmovb	%st(2),%st
-
-// CHECK: fcmove	%st(2), %st(0)
-// CHECK:  encoding: [0xda,0xca]
-        	fcmove	%st(2),%st
-
-// CHECK: fcmovbe	%st(2), %st(0)
-// CHECK:  encoding: [0xda,0xd2]
-        	fcmovbe	%st(2),%st
-
-// CHECK: fcmovu	 %st(2), %st(0)
-// CHECK:  encoding: [0xda,0xda]
-        	fcmovu	%st(2),%st
-
-// CHECK: fcmovnb	%st(2), %st(0)
-// CHECK:  encoding: [0xdb,0xc2]
-        	fcmovnb	%st(2),%st
-
-// CHECK: fcmovne	%st(2), %st(0)
-// CHECK:  encoding: [0xdb,0xca]
-        	fcmovne	%st(2),%st
-
-// CHECK: fcmovnbe	%st(2), %st(0)
-// CHECK:  encoding: [0xdb,0xd2]
-        	fcmovnbe	%st(2),%st
-
-// CHECK: fcmovnu	%st(2), %st(0)
-// CHECK:  encoding: [0xdb,0xda]
-        	fcmovnu	%st(2),%st
-
-// CHECK: fcomi	%st(2), %st(0)
-// CHECK:  encoding: [0xdb,0xf2]
-        	fcomi	%st(2),%st
-
-// CHECK: fucomi	%st(2), %st(0)
-// CHECK:  encoding: [0xdb,0xea]
-        	fucomi	%st(2),%st
-
-// CHECK: fcomip	%st(2), %st(0)
-// CHECK:  encoding: [0xdf,0xf2]
-        	fcomip	%st(2),%st
-
-// CHECK: fucomip	%st(2), %st(0)
-// CHECK:  encoding: [0xdf,0xea]
-        	fucomip	%st(2),%st
-
-// CHECK: movnti	%ecx, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xc3,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movnti	%ecx, 69
-// CHECK:  encoding: [0x0f,0xc3,0x0d,0x45,0x00,0x00,0x00]
-        	movnti	%ecx,0x45
-
-// CHECK: movnti	%ecx, 32493
-// CHECK:  encoding: [0x0f,0xc3,0x0d,0xed,0x7e,0x00,0x00]
-        	movnti	%ecx,0x7eed
-
-// CHECK: movnti	%ecx, 3133065982
-// CHECK:  encoding: [0x0f,0xc3,0x0d,0xfe,0xca,0xbe,0xba]
-        	movnti	%ecx,0xbabecafe
-
-// CHECK: movnti	%ecx, 305419896
-// CHECK:  encoding: [0x0f,0xc3,0x0d,0x78,0x56,0x34,0x12]
-        	movnti	%ecx,0x12345678
-
-// CHECK: clflush	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xae,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	clflush	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: clflush	32493
-// CHECK:  encoding: [0x0f,0xae,0x3d,0xed,0x7e,0x00,0x00]
-        	clflush	0x7eed
-
-// CHECK: clflush	3133065982
-// CHECK:  encoding: [0x0f,0xae,0x3d,0xfe,0xca,0xbe,0xba]
-        	clflush	0xbabecafe
-
-// CHECK: clflush	305419896
-// CHECK:  encoding: [0x0f,0xae,0x3d,0x78,0x56,0x34,0x12]
-        	clflush	0x12345678
-
-// CHECK: emms
-// CHECK:  encoding: [0x0f,0x77]
-        	emms
-
-// CHECK: movd	%ecx, %mm3
-// CHECK:  encoding: [0x0f,0x6e,0xd9]
-        	movd	%ecx,%mm3
-
-// CHECK: movd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x6e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: movd	69, %mm3
-// CHECK:  encoding: [0x0f,0x6e,0x1d,0x45,0x00,0x00,0x00]
-        	movd	0x45,%mm3
-
-// CHECK: movd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x6e,0x1d,0xed,0x7e,0x00,0x00]
-        	movd	0x7eed,%mm3
-
-// CHECK: movd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x6e,0x1d,0xfe,0xca,0xbe,0xba]
-        	movd	0xbabecafe,%mm3
-
-// CHECK: movd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x6e,0x1d,0x78,0x56,0x34,0x12]
-        	movd	0x12345678,%mm3
-
-// CHECK: movd	%mm3, %ecx
-// CHECK:  encoding: [0x0f,0x7e,0xd9]
-        	movd	%mm3,%ecx
-
-// CHECK: movd	%mm3, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	movd	%mm3,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movd	%mm3, 69
-// CHECK:  encoding: [0x0f,0x7e,0x1d,0x45,0x00,0x00,0x00]
-        	movd	%mm3,0x45
-
-// CHECK: movd	%mm3, 32493
-// CHECK:  encoding: [0x0f,0x7e,0x1d,0xed,0x7e,0x00,0x00]
-        	movd	%mm3,0x7eed
-
-// CHECK: movd	%mm3, 3133065982
-// CHECK:  encoding: [0x0f,0x7e,0x1d,0xfe,0xca,0xbe,0xba]
-        	movd	%mm3,0xbabecafe
-
-// CHECK: movd	%mm3, 305419896
-// CHECK:  encoding: [0x0f,0x7e,0x1d,0x78,0x56,0x34,0x12]
-        	movd	%mm3,0x12345678
-
-// CHECK: movd	%ecx, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0xe9]
-        	movd	%ecx,%xmm5
-
-// CHECK: movd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0x45,0x00,0x00,0x00]
-        	movd	0x45,%xmm5
-
-// CHECK: movd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0xed,0x7e,0x00,0x00]
-        	movd	0x7eed,%xmm5
-
-// CHECK: movd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0xfe,0xca,0xbe,0xba]
-        	movd	0xbabecafe,%xmm5
-
-// CHECK: movd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0x78,0x56,0x34,0x12]
-        	movd	0x12345678,%xmm5
-
-// CHECK: movd	%xmm5, %ecx
-// CHECK:  encoding: [0x66,0x0f,0x7e,0xe9]
-        	movd	%xmm5,%ecx
-
-// CHECK: movd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movd	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0x45,0x00,0x00,0x00]
-        	movd	%xmm5,0x45
-
-// CHECK: movd	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0xed,0x7e,0x00,0x00]
-        	movd	%xmm5,0x7eed
-
-// CHECK: movd	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0xfe,0xca,0xbe,0xba]
-        	movd	%xmm5,0xbabecafe
-
-// CHECK: movd	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0x78,0x56,0x34,0x12]
-        	movd	%xmm5,0x12345678
-
-// CHECK: movq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x6f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: movq	69, %mm3
-// CHECK:  encoding: [0x0f,0x6f,0x1d,0x45,0x00,0x00,0x00]
-        	movq	0x45,%mm3
-
-// CHECK: movq	32493, %mm3
-// CHECK:  encoding: [0x0f,0x6f,0x1d,0xed,0x7e,0x00,0x00]
-        	movq	0x7eed,%mm3
-
-// CHECK: movq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x6f,0x1d,0xfe,0xca,0xbe,0xba]
-        	movq	0xbabecafe,%mm3
-
-// CHECK: movq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x6f,0x1d,0x78,0x56,0x34,0x12]
-        	movq	0x12345678,%mm3
-
-// CHECK: movq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x6f,0xdb]
-        	movq	%mm3,%mm3
-
-// CHECK: movq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x6f,0xdb]
-        	movq	%mm3,%mm3
-
-// CHECK: movq	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x7e,0xed]
-        	movq	%xmm5,%xmm5
-
-// CHECK: movq	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0xd6,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movq	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0x45,0x00,0x00,0x00]
-        	movq	%xmm5,0x45
-
-// CHECK: movq	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0xed,0x7e,0x00,0x00]
-        	movq	%xmm5,0x7eed
-
-// CHECK: movq	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0xfe,0xca,0xbe,0xba]
-        	movq	%xmm5,0xbabecafe
-
-// CHECK: movq	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0x78,0x56,0x34,0x12]
-        	movq	%xmm5,0x12345678
-
-// CHECK: movq	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x7e,0xed]
-        	movq	%xmm5,%xmm5
-
-// CHECK: packssdw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x6b,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	packssdw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: packssdw	69, %mm3
-// CHECK:  encoding: [0x0f,0x6b,0x1d,0x45,0x00,0x00,0x00]
-        	packssdw	0x45,%mm3
-
-// CHECK: packssdw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x6b,0x1d,0xed,0x7e,0x00,0x00]
-        	packssdw	0x7eed,%mm3
-
-// CHECK: packssdw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x6b,0x1d,0xfe,0xca,0xbe,0xba]
-        	packssdw	0xbabecafe,%mm3
-
-// CHECK: packssdw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x6b,0x1d,0x78,0x56,0x34,0x12]
-        	packssdw	0x12345678,%mm3
-
-// CHECK: packssdw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x6b,0xdb]
-        	packssdw	%mm3,%mm3
-
-// CHECK: packssdw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	packssdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: packssdw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0x45,0x00,0x00,0x00]
-        	packssdw	0x45,%xmm5
-
-// CHECK: packssdw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0xed,0x7e,0x00,0x00]
-        	packssdw	0x7eed,%xmm5
-
-// CHECK: packssdw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0xfe,0xca,0xbe,0xba]
-        	packssdw	0xbabecafe,%xmm5
-
-// CHECK: packssdw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0x78,0x56,0x34,0x12]
-        	packssdw	0x12345678,%xmm5
-
-// CHECK: packssdw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6b,0xed]
-        	packssdw	%xmm5,%xmm5
-
-// CHECK: packsswb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x63,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	packsswb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: packsswb	69, %mm3
-// CHECK:  encoding: [0x0f,0x63,0x1d,0x45,0x00,0x00,0x00]
-        	packsswb	0x45,%mm3
-
-// CHECK: packsswb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x63,0x1d,0xed,0x7e,0x00,0x00]
-        	packsswb	0x7eed,%mm3
-
-// CHECK: packsswb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x63,0x1d,0xfe,0xca,0xbe,0xba]
-        	packsswb	0xbabecafe,%mm3
-
-// CHECK: packsswb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x63,0x1d,0x78,0x56,0x34,0x12]
-        	packsswb	0x12345678,%mm3
-
-// CHECK: packsswb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x63,0xdb]
-        	packsswb	%mm3,%mm3
-
-// CHECK: packsswb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x63,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	packsswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: packsswb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0x45,0x00,0x00,0x00]
-        	packsswb	0x45,%xmm5
-
-// CHECK: packsswb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0xed,0x7e,0x00,0x00]
-        	packsswb	0x7eed,%xmm5
-
-// CHECK: packsswb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0xfe,0xca,0xbe,0xba]
-        	packsswb	0xbabecafe,%xmm5
-
-// CHECK: packsswb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0x78,0x56,0x34,0x12]
-        	packsswb	0x12345678,%xmm5
-
-// CHECK: packsswb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x63,0xed]
-        	packsswb	%xmm5,%xmm5
-
-// CHECK: packuswb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x67,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	packuswb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: packuswb	69, %mm3
-// CHECK:  encoding: [0x0f,0x67,0x1d,0x45,0x00,0x00,0x00]
-        	packuswb	0x45,%mm3
-
-// CHECK: packuswb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x67,0x1d,0xed,0x7e,0x00,0x00]
-        	packuswb	0x7eed,%mm3
-
-// CHECK: packuswb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x67,0x1d,0xfe,0xca,0xbe,0xba]
-        	packuswb	0xbabecafe,%mm3
-
-// CHECK: packuswb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x67,0x1d,0x78,0x56,0x34,0x12]
-        	packuswb	0x12345678,%mm3
-
-// CHECK: packuswb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x67,0xdb]
-        	packuswb	%mm3,%mm3
-
-// CHECK: packuswb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x67,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	packuswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: packuswb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0x45,0x00,0x00,0x00]
-        	packuswb	0x45,%xmm5
-
-// CHECK: packuswb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0xed,0x7e,0x00,0x00]
-        	packuswb	0x7eed,%xmm5
-
-// CHECK: packuswb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0xfe,0xca,0xbe,0xba]
-        	packuswb	0xbabecafe,%xmm5
-
-// CHECK: packuswb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0x78,0x56,0x34,0x12]
-        	packuswb	0x12345678,%xmm5
-
-// CHECK: packuswb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x67,0xed]
-        	packuswb	%xmm5,%xmm5
-
-// CHECK: paddb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xfc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddb	69, %mm3
-// CHECK:  encoding: [0x0f,0xfc,0x1d,0x45,0x00,0x00,0x00]
-        	paddb	0x45,%mm3
-
-// CHECK: paddb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xfc,0x1d,0xed,0x7e,0x00,0x00]
-        	paddb	0x7eed,%mm3
-
-// CHECK: paddb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xfc,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddb	0xbabecafe,%mm3
-
-// CHECK: paddb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xfc,0x1d,0x78,0x56,0x34,0x12]
-        	paddb	0x12345678,%mm3
-
-// CHECK: paddb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xfc,0xdb]
-        	paddb	%mm3,%mm3
-
-// CHECK: paddb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfc,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0x45,0x00,0x00,0x00]
-        	paddb	0x45,%xmm5
-
-// CHECK: paddb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0xed,0x7e,0x00,0x00]
-        	paddb	0x7eed,%xmm5
-
-// CHECK: paddb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddb	0xbabecafe,%xmm5
-
-// CHECK: paddb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0x78,0x56,0x34,0x12]
-        	paddb	0x12345678,%xmm5
-
-// CHECK: paddb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfc,0xed]
-        	paddb	%xmm5,%xmm5
-
-// CHECK: paddw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xfd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddw	69, %mm3
-// CHECK:  encoding: [0x0f,0xfd,0x1d,0x45,0x00,0x00,0x00]
-        	paddw	0x45,%mm3
-
-// CHECK: paddw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xfd,0x1d,0xed,0x7e,0x00,0x00]
-        	paddw	0x7eed,%mm3
-
-// CHECK: paddw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xfd,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddw	0xbabecafe,%mm3
-
-// CHECK: paddw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xfd,0x1d,0x78,0x56,0x34,0x12]
-        	paddw	0x12345678,%mm3
-
-// CHECK: paddw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xfd,0xdb]
-        	paddw	%mm3,%mm3
-
-// CHECK: paddw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfd,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0x45,0x00,0x00,0x00]
-        	paddw	0x45,%xmm5
-
-// CHECK: paddw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0xed,0x7e,0x00,0x00]
-        	paddw	0x7eed,%xmm5
-
-// CHECK: paddw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddw	0xbabecafe,%xmm5
-
-// CHECK: paddw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0x78,0x56,0x34,0x12]
-        	paddw	0x12345678,%xmm5
-
-// CHECK: paddw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfd,0xed]
-        	paddw	%xmm5,%xmm5
-
-// CHECK: paddd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xfe,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddd	69, %mm3
-// CHECK:  encoding: [0x0f,0xfe,0x1d,0x45,0x00,0x00,0x00]
-        	paddd	0x45,%mm3
-
-// CHECK: paddd	32493, %mm3
-// CHECK:  encoding: [0x0f,0xfe,0x1d,0xed,0x7e,0x00,0x00]
-        	paddd	0x7eed,%mm3
-
-// CHECK: paddd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xfe,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddd	0xbabecafe,%mm3
-
-// CHECK: paddd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xfe,0x1d,0x78,0x56,0x34,0x12]
-        	paddd	0x12345678,%mm3
-
-// CHECK: paddd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xfe,0xdb]
-        	paddd	%mm3,%mm3
-
-// CHECK: paddd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfe,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0x45,0x00,0x00,0x00]
-        	paddd	0x45,%xmm5
-
-// CHECK: paddd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0xed,0x7e,0x00,0x00]
-        	paddd	0x7eed,%xmm5
-
-// CHECK: paddd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddd	0xbabecafe,%xmm5
-
-// CHECK: paddd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0x78,0x56,0x34,0x12]
-        	paddd	0x12345678,%xmm5
-
-// CHECK: paddd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfe,0xed]
-        	paddd	%xmm5,%xmm5
-
-// CHECK: paddq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddq	69, %mm3
-// CHECK:  encoding: [0x0f,0xd4,0x1d,0x45,0x00,0x00,0x00]
-        	paddq	0x45,%mm3
-
-// CHECK: paddq	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd4,0x1d,0xed,0x7e,0x00,0x00]
-        	paddq	0x7eed,%mm3
-
-// CHECK: paddq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd4,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddq	0xbabecafe,%mm3
-
-// CHECK: paddq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd4,0x1d,0x78,0x56,0x34,0x12]
-        	paddq	0x12345678,%mm3
-
-// CHECK: paddq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd4,0xdb]
-        	paddq	%mm3,%mm3
-
-// CHECK: paddq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd4,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0x45,0x00,0x00,0x00]
-        	paddq	0x45,%xmm5
-
-// CHECK: paddq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0xed,0x7e,0x00,0x00]
-        	paddq	0x7eed,%xmm5
-
-// CHECK: paddq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddq	0xbabecafe,%xmm5
-
-// CHECK: paddq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0x78,0x56,0x34,0x12]
-        	paddq	0x12345678,%xmm5
-
-// CHECK: paddq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd4,0xed]
-        	paddq	%xmm5,%xmm5
-
-// CHECK: paddsb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xec,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddsb	69, %mm3
-// CHECK:  encoding: [0x0f,0xec,0x1d,0x45,0x00,0x00,0x00]
-        	paddsb	0x45,%mm3
-
-// CHECK: paddsb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xec,0x1d,0xed,0x7e,0x00,0x00]
-        	paddsb	0x7eed,%mm3
-
-// CHECK: paddsb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xec,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddsb	0xbabecafe,%mm3
-
-// CHECK: paddsb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xec,0x1d,0x78,0x56,0x34,0x12]
-        	paddsb	0x12345678,%mm3
-
-// CHECK: paddsb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xec,0xdb]
-        	paddsb	%mm3,%mm3
-
-// CHECK: paddsb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xec,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddsb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0x45,0x00,0x00,0x00]
-        	paddsb	0x45,%xmm5
-
-// CHECK: paddsb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0xed,0x7e,0x00,0x00]
-        	paddsb	0x7eed,%xmm5
-
-// CHECK: paddsb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddsb	0xbabecafe,%xmm5
-
-// CHECK: paddsb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0x78,0x56,0x34,0x12]
-        	paddsb	0x12345678,%xmm5
-
-// CHECK: paddsb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xec,0xed]
-        	paddsb	%xmm5,%xmm5
-
-// CHECK: paddsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xed,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddsw	69, %mm3
-// CHECK:  encoding: [0x0f,0xed,0x1d,0x45,0x00,0x00,0x00]
-        	paddsw	0x45,%mm3
-
-// CHECK: paddsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xed,0x1d,0xed,0x7e,0x00,0x00]
-        	paddsw	0x7eed,%mm3
-
-// CHECK: paddsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xed,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddsw	0xbabecafe,%mm3
-
-// CHECK: paddsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xed,0x1d,0x78,0x56,0x34,0x12]
-        	paddsw	0x12345678,%mm3
-
-// CHECK: paddsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xed,0xdb]
-        	paddsw	%mm3,%mm3
-
-// CHECK: paddsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xed,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0x45,0x00,0x00,0x00]
-        	paddsw	0x45,%xmm5
-
-// CHECK: paddsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0xed,0x7e,0x00,0x00]
-        	paddsw	0x7eed,%xmm5
-
-// CHECK: paddsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddsw	0xbabecafe,%xmm5
-
-// CHECK: paddsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0x78,0x56,0x34,0x12]
-        	paddsw	0x12345678,%xmm5
-
-// CHECK: paddsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xed,0xed]
-        	paddsw	%xmm5,%xmm5
-
-// CHECK: paddusb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddusb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddusb	69, %mm3
-// CHECK:  encoding: [0x0f,0xdc,0x1d,0x45,0x00,0x00,0x00]
-        	paddusb	0x45,%mm3
-
-// CHECK: paddusb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xdc,0x1d,0xed,0x7e,0x00,0x00]
-        	paddusb	0x7eed,%mm3
-
-// CHECK: paddusb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xdc,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddusb	0xbabecafe,%mm3
-
-// CHECK: paddusb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xdc,0x1d,0x78,0x56,0x34,0x12]
-        	paddusb	0x12345678,%mm3
-
-// CHECK: paddusb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xdc,0xdb]
-        	paddusb	%mm3,%mm3
-
-// CHECK: paddusb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdc,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddusb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0x45,0x00,0x00,0x00]
-        	paddusb	0x45,%xmm5
-
-// CHECK: paddusb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0xed,0x7e,0x00,0x00]
-        	paddusb	0x7eed,%xmm5
-
-// CHECK: paddusb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddusb	0xbabecafe,%xmm5
-
-// CHECK: paddusb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0x78,0x56,0x34,0x12]
-        	paddusb	0x12345678,%xmm5
-
-// CHECK: paddusb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdc,0xed]
-        	paddusb	%xmm5,%xmm5
-
-// CHECK: paddusw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xdd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	paddusw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: paddusw	69, %mm3
-// CHECK:  encoding: [0x0f,0xdd,0x1d,0x45,0x00,0x00,0x00]
-        	paddusw	0x45,%mm3
-
-// CHECK: paddusw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xdd,0x1d,0xed,0x7e,0x00,0x00]
-        	paddusw	0x7eed,%mm3
-
-// CHECK: paddusw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xdd,0x1d,0xfe,0xca,0xbe,0xba]
-        	paddusw	0xbabecafe,%mm3
-
-// CHECK: paddusw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xdd,0x1d,0x78,0x56,0x34,0x12]
-        	paddusw	0x12345678,%mm3
-
-// CHECK: paddusw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xdd,0xdb]
-        	paddusw	%mm3,%mm3
-
-// CHECK: paddusw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdd,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	paddusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: paddusw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0x45,0x00,0x00,0x00]
-        	paddusw	0x45,%xmm5
-
-// CHECK: paddusw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0xed,0x7e,0x00,0x00]
-        	paddusw	0x7eed,%xmm5
-
-// CHECK: paddusw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0xfe,0xca,0xbe,0xba]
-        	paddusw	0xbabecafe,%xmm5
-
-// CHECK: paddusw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0x78,0x56,0x34,0x12]
-        	paddusw	0x12345678,%xmm5
-
-// CHECK: paddusw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdd,0xed]
-        	paddusw	%xmm5,%xmm5
-
-// CHECK: pand	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xdb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pand	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pand	69, %mm3
-// CHECK:  encoding: [0x0f,0xdb,0x1d,0x45,0x00,0x00,0x00]
-        	pand	0x45,%mm3
-
-// CHECK: pand	32493, %mm3
-// CHECK:  encoding: [0x0f,0xdb,0x1d,0xed,0x7e,0x00,0x00]
-        	pand	0x7eed,%mm3
-
-// CHECK: pand	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xdb,0x1d,0xfe,0xca,0xbe,0xba]
-        	pand	0xbabecafe,%mm3
-
-// CHECK: pand	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xdb,0x1d,0x78,0x56,0x34,0x12]
-        	pand	0x12345678,%mm3
-
-// CHECK: pand	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xdb,0xdb]
-        	pand	%mm3,%mm3
-
-// CHECK: pand	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdb,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pand	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pand	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0x45,0x00,0x00,0x00]
-        	pand	0x45,%xmm5
-
-// CHECK: pand	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0xed,0x7e,0x00,0x00]
-        	pand	0x7eed,%xmm5
-
-// CHECK: pand	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0xfe,0xca,0xbe,0xba]
-        	pand	0xbabecafe,%xmm5
-
-// CHECK: pand	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0x78,0x56,0x34,0x12]
-        	pand	0x12345678,%xmm5
-
-// CHECK: pand	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdb,0xed]
-        	pand	%xmm5,%xmm5
-
-// CHECK: pandn	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xdf,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pandn	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pandn	69, %mm3
-// CHECK:  encoding: [0x0f,0xdf,0x1d,0x45,0x00,0x00,0x00]
-        	pandn	0x45,%mm3
-
-// CHECK: pandn	32493, %mm3
-// CHECK:  encoding: [0x0f,0xdf,0x1d,0xed,0x7e,0x00,0x00]
-        	pandn	0x7eed,%mm3
-
-// CHECK: pandn	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xdf,0x1d,0xfe,0xca,0xbe,0xba]
-        	pandn	0xbabecafe,%mm3
-
-// CHECK: pandn	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xdf,0x1d,0x78,0x56,0x34,0x12]
-        	pandn	0x12345678,%mm3
-
-// CHECK: pandn	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xdf,0xdb]
-        	pandn	%mm3,%mm3
-
-// CHECK: pandn	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdf,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pandn	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pandn	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0x45,0x00,0x00,0x00]
-        	pandn	0x45,%xmm5
-
-// CHECK: pandn	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0xed,0x7e,0x00,0x00]
-        	pandn	0x7eed,%xmm5
-
-// CHECK: pandn	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0xfe,0xca,0xbe,0xba]
-        	pandn	0xbabecafe,%xmm5
-
-// CHECK: pandn	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0x78,0x56,0x34,0x12]
-        	pandn	0x12345678,%xmm5
-
-// CHECK: pandn	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xdf,0xed]
-        	pandn	%xmm5,%xmm5
-
-// CHECK: pcmpeqb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x74,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pcmpeqb	69, %mm3
-// CHECK:  encoding: [0x0f,0x74,0x1d,0x45,0x00,0x00,0x00]
-        	pcmpeqb	0x45,%mm3
-
-// CHECK: pcmpeqb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x74,0x1d,0xed,0x7e,0x00,0x00]
-        	pcmpeqb	0x7eed,%mm3
-
-// CHECK: pcmpeqb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x74,0x1d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqb	0xbabecafe,%mm3
-
-// CHECK: pcmpeqb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x74,0x1d,0x78,0x56,0x34,0x12]
-        	pcmpeqb	0x12345678,%mm3
-
-// CHECK: pcmpeqb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x74,0xdb]
-        	pcmpeqb	%mm3,%mm3
-
-// CHECK: pcmpeqb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x74,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpeqb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpeqb	0x45,%xmm5
-
-// CHECK: pcmpeqb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpeqb	0x7eed,%xmm5
-
-// CHECK: pcmpeqb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqb	0xbabecafe,%xmm5
-
-// CHECK: pcmpeqb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpeqb	0x12345678,%xmm5
-
-// CHECK: pcmpeqb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x74,0xed]
-        	pcmpeqb	%xmm5,%xmm5
-
-// CHECK: pcmpeqw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x75,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pcmpeqw	69, %mm3
-// CHECK:  encoding: [0x0f,0x75,0x1d,0x45,0x00,0x00,0x00]
-        	pcmpeqw	0x45,%mm3
-
-// CHECK: pcmpeqw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x75,0x1d,0xed,0x7e,0x00,0x00]
-        	pcmpeqw	0x7eed,%mm3
-
-// CHECK: pcmpeqw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x75,0x1d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqw	0xbabecafe,%mm3
-
-// CHECK: pcmpeqw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x75,0x1d,0x78,0x56,0x34,0x12]
-        	pcmpeqw	0x12345678,%mm3
-
-// CHECK: pcmpeqw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x75,0xdb]
-        	pcmpeqw	%mm3,%mm3
-
-// CHECK: pcmpeqw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x75,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpeqw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpeqw	0x45,%xmm5
-
-// CHECK: pcmpeqw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpeqw	0x7eed,%xmm5
-
-// CHECK: pcmpeqw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqw	0xbabecafe,%xmm5
-
-// CHECK: pcmpeqw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpeqw	0x12345678,%xmm5
-
-// CHECK: pcmpeqw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x75,0xed]
-        	pcmpeqw	%xmm5,%xmm5
-
-// CHECK: pcmpeqd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x76,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pcmpeqd	69, %mm3
-// CHECK:  encoding: [0x0f,0x76,0x1d,0x45,0x00,0x00,0x00]
-        	pcmpeqd	0x45,%mm3
-
-// CHECK: pcmpeqd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x76,0x1d,0xed,0x7e,0x00,0x00]
-        	pcmpeqd	0x7eed,%mm3
-
-// CHECK: pcmpeqd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x76,0x1d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqd	0xbabecafe,%mm3
-
-// CHECK: pcmpeqd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x76,0x1d,0x78,0x56,0x34,0x12]
-        	pcmpeqd	0x12345678,%mm3
-
-// CHECK: pcmpeqd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x76,0xdb]
-        	pcmpeqd	%mm3,%mm3
-
-// CHECK: pcmpeqd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x76,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpeqd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpeqd	0x45,%xmm5
-
-// CHECK: pcmpeqd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpeqd	0x7eed,%xmm5
-
-// CHECK: pcmpeqd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqd	0xbabecafe,%xmm5
-
-// CHECK: pcmpeqd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpeqd	0x12345678,%xmm5
-
-// CHECK: pcmpeqd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x76,0xed]
-        	pcmpeqd	%xmm5,%xmm5
-
-// CHECK: pcmpgtb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x64,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pcmpgtb	69, %mm3
-// CHECK:  encoding: [0x0f,0x64,0x1d,0x45,0x00,0x00,0x00]
-        	pcmpgtb	0x45,%mm3
-
-// CHECK: pcmpgtb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x64,0x1d,0xed,0x7e,0x00,0x00]
-        	pcmpgtb	0x7eed,%mm3
-
-// CHECK: pcmpgtb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x64,0x1d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtb	0xbabecafe,%mm3
-
-// CHECK: pcmpgtb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x64,0x1d,0x78,0x56,0x34,0x12]
-        	pcmpgtb	0x12345678,%mm3
-
-// CHECK: pcmpgtb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x64,0xdb]
-        	pcmpgtb	%mm3,%mm3
-
-// CHECK: pcmpgtb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x64,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpgtb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpgtb	0x45,%xmm5
-
-// CHECK: pcmpgtb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpgtb	0x7eed,%xmm5
-
-// CHECK: pcmpgtb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtb	0xbabecafe,%xmm5
-
-// CHECK: pcmpgtb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpgtb	0x12345678,%xmm5
-
-// CHECK: pcmpgtb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x64,0xed]
-        	pcmpgtb	%xmm5,%xmm5
-
-// CHECK: pcmpgtw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x65,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pcmpgtw	69, %mm3
-// CHECK:  encoding: [0x0f,0x65,0x1d,0x45,0x00,0x00,0x00]
-        	pcmpgtw	0x45,%mm3
-
-// CHECK: pcmpgtw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x65,0x1d,0xed,0x7e,0x00,0x00]
-        	pcmpgtw	0x7eed,%mm3
-
-// CHECK: pcmpgtw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x65,0x1d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtw	0xbabecafe,%mm3
-
-// CHECK: pcmpgtw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x65,0x1d,0x78,0x56,0x34,0x12]
-        	pcmpgtw	0x12345678,%mm3
-
-// CHECK: pcmpgtw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x65,0xdb]
-        	pcmpgtw	%mm3,%mm3
-
-// CHECK: pcmpgtw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x65,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpgtw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpgtw	0x45,%xmm5
-
-// CHECK: pcmpgtw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpgtw	0x7eed,%xmm5
-
-// CHECK: pcmpgtw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtw	0xbabecafe,%xmm5
-
-// CHECK: pcmpgtw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpgtw	0x12345678,%xmm5
-
-// CHECK: pcmpgtw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x65,0xed]
-        	pcmpgtw	%xmm5,%xmm5
-
-// CHECK: pcmpgtd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x66,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pcmpgtd	69, %mm3
-// CHECK:  encoding: [0x0f,0x66,0x1d,0x45,0x00,0x00,0x00]
-        	pcmpgtd	0x45,%mm3
-
-// CHECK: pcmpgtd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x66,0x1d,0xed,0x7e,0x00,0x00]
-        	pcmpgtd	0x7eed,%mm3
-
-// CHECK: pcmpgtd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x66,0x1d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtd	0xbabecafe,%mm3
-
-// CHECK: pcmpgtd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x66,0x1d,0x78,0x56,0x34,0x12]
-        	pcmpgtd	0x12345678,%mm3
-
-// CHECK: pcmpgtd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x66,0xdb]
-        	pcmpgtd	%mm3,%mm3
-
-// CHECK: pcmpgtd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x66,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpgtd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpgtd	0x45,%xmm5
-
-// CHECK: pcmpgtd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpgtd	0x7eed,%xmm5
-
-// CHECK: pcmpgtd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtd	0xbabecafe,%xmm5
-
-// CHECK: pcmpgtd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpgtd	0x12345678,%xmm5
-
-// CHECK: pcmpgtd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x66,0xed]
-        	pcmpgtd	%xmm5,%xmm5
-
-// CHECK: pmaddwd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmaddwd	69, %mm3
-// CHECK:  encoding: [0x0f,0xf5,0x1d,0x45,0x00,0x00,0x00]
-        	pmaddwd	0x45,%mm3
-
-// CHECK: pmaddwd	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf5,0x1d,0xed,0x7e,0x00,0x00]
-        	pmaddwd	0x7eed,%mm3
-
-// CHECK: pmaddwd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf5,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmaddwd	0xbabecafe,%mm3
-
-// CHECK: pmaddwd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf5,0x1d,0x78,0x56,0x34,0x12]
-        	pmaddwd	0x12345678,%mm3
-
-// CHECK: pmaddwd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf5,0xdb]
-        	pmaddwd	%mm3,%mm3
-
-// CHECK: pmaddwd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf5,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaddwd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0x45,0x00,0x00,0x00]
-        	pmaddwd	0x45,%xmm5
-
-// CHECK: pmaddwd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaddwd	0x7eed,%xmm5
-
-// CHECK: pmaddwd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaddwd	0xbabecafe,%xmm5
-
-// CHECK: pmaddwd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0x78,0x56,0x34,0x12]
-        	pmaddwd	0x12345678,%xmm5
-
-// CHECK: pmaddwd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf5,0xed]
-        	pmaddwd	%xmm5,%xmm5
-
-// CHECK: pmulhw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmulhw	69, %mm3
-// CHECK:  encoding: [0x0f,0xe5,0x1d,0x45,0x00,0x00,0x00]
-        	pmulhw	0x45,%mm3
-
-// CHECK: pmulhw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe5,0x1d,0xed,0x7e,0x00,0x00]
-        	pmulhw	0x7eed,%mm3
-
-// CHECK: pmulhw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe5,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmulhw	0xbabecafe,%mm3
-
-// CHECK: pmulhw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe5,0x1d,0x78,0x56,0x34,0x12]
-        	pmulhw	0x12345678,%mm3
-
-// CHECK: pmulhw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe5,0xdb]
-        	pmulhw	%mm3,%mm3
-
-// CHECK: pmulhw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe5,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmulhw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0x45,0x00,0x00,0x00]
-        	pmulhw	0x45,%xmm5
-
-// CHECK: pmulhw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0xed,0x7e,0x00,0x00]
-        	pmulhw	0x7eed,%xmm5
-
-// CHECK: pmulhw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmulhw	0xbabecafe,%xmm5
-
-// CHECK: pmulhw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0x78,0x56,0x34,0x12]
-        	pmulhw	0x12345678,%xmm5
-
-// CHECK: pmulhw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe5,0xed]
-        	pmulhw	%xmm5,%xmm5
-
-// CHECK: pmullw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmullw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmullw	69, %mm3
-// CHECK:  encoding: [0x0f,0xd5,0x1d,0x45,0x00,0x00,0x00]
-        	pmullw	0x45,%mm3
-
-// CHECK: pmullw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd5,0x1d,0xed,0x7e,0x00,0x00]
-        	pmullw	0x7eed,%mm3
-
-// CHECK: pmullw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd5,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmullw	0xbabecafe,%mm3
-
-// CHECK: pmullw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd5,0x1d,0x78,0x56,0x34,0x12]
-        	pmullw	0x12345678,%mm3
-
-// CHECK: pmullw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd5,0xdb]
-        	pmullw	%mm3,%mm3
-
-// CHECK: pmullw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd5,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmullw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmullw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0x45,0x00,0x00,0x00]
-        	pmullw	0x45,%xmm5
-
-// CHECK: pmullw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0xed,0x7e,0x00,0x00]
-        	pmullw	0x7eed,%xmm5
-
-// CHECK: pmullw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmullw	0xbabecafe,%xmm5
-
-// CHECK: pmullw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0x78,0x56,0x34,0x12]
-        	pmullw	0x12345678,%xmm5
-
-// CHECK: pmullw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd5,0xed]
-        	pmullw	%xmm5,%xmm5
-
-// CHECK: por	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xeb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	por	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: por	69, %mm3
-// CHECK:  encoding: [0x0f,0xeb,0x1d,0x45,0x00,0x00,0x00]
-        	por	0x45,%mm3
-
-// CHECK: por	32493, %mm3
-// CHECK:  encoding: [0x0f,0xeb,0x1d,0xed,0x7e,0x00,0x00]
-        	por	0x7eed,%mm3
-
-// CHECK: por	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xeb,0x1d,0xfe,0xca,0xbe,0xba]
-        	por	0xbabecafe,%mm3
-
-// CHECK: por	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xeb,0x1d,0x78,0x56,0x34,0x12]
-        	por	0x12345678,%mm3
-
-// CHECK: por	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xeb,0xdb]
-        	por	%mm3,%mm3
-
-// CHECK: por	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xeb,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	por	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: por	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0x45,0x00,0x00,0x00]
-        	por	0x45,%xmm5
-
-// CHECK: por	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0xed,0x7e,0x00,0x00]
-        	por	0x7eed,%xmm5
-
-// CHECK: por	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0xfe,0xca,0xbe,0xba]
-        	por	0xbabecafe,%xmm5
-
-// CHECK: por	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0x78,0x56,0x34,0x12]
-        	por	0x12345678,%xmm5
-
-// CHECK: por	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xeb,0xed]
-        	por	%xmm5,%xmm5
-
-// CHECK: psllw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psllw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psllw	69, %mm3
-// CHECK:  encoding: [0x0f,0xf1,0x1d,0x45,0x00,0x00,0x00]
-        	psllw	0x45,%mm3
-
-// CHECK: psllw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf1,0x1d,0xed,0x7e,0x00,0x00]
-        	psllw	0x7eed,%mm3
-
-// CHECK: psllw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf1,0x1d,0xfe,0xca,0xbe,0xba]
-        	psllw	0xbabecafe,%mm3
-
-// CHECK: psllw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf1,0x1d,0x78,0x56,0x34,0x12]
-        	psllw	0x12345678,%mm3
-
-// CHECK: psllw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf1,0xdb]
-        	psllw	%mm3,%mm3
-
-// CHECK: psllw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf1,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psllw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psllw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0x45,0x00,0x00,0x00]
-        	psllw	0x45,%xmm5
-
-// CHECK: psllw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0xed,0x7e,0x00,0x00]
-        	psllw	0x7eed,%xmm5
-
-// CHECK: psllw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0xfe,0xca,0xbe,0xba]
-        	psllw	0xbabecafe,%xmm5
-
-// CHECK: psllw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0x78,0x56,0x34,0x12]
-        	psllw	0x12345678,%xmm5
-
-// CHECK: psllw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf1,0xed]
-        	psllw	%xmm5,%xmm5
-
-// CHECK: psllw	$127, %mm3
-// CHECK:  encoding: [0x0f,0x71,0xf3,0x7f]
-        	psllw	$0x7f,%mm3
-
-// CHECK: psllw	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x71,0xf5,0x7f]
-        	psllw	$0x7f,%xmm5
-
-// CHECK: pslld	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pslld	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pslld	69, %mm3
-// CHECK:  encoding: [0x0f,0xf2,0x1d,0x45,0x00,0x00,0x00]
-        	pslld	0x45,%mm3
-
-// CHECK: pslld	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf2,0x1d,0xed,0x7e,0x00,0x00]
-        	pslld	0x7eed,%mm3
-
-// CHECK: pslld	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf2,0x1d,0xfe,0xca,0xbe,0xba]
-        	pslld	0xbabecafe,%mm3
-
-// CHECK: pslld	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf2,0x1d,0x78,0x56,0x34,0x12]
-        	pslld	0x12345678,%mm3
-
-// CHECK: pslld	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf2,0xdb]
-        	pslld	%mm3,%mm3
-
-// CHECK: pslld	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf2,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pslld	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pslld	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0x45,0x00,0x00,0x00]
-        	pslld	0x45,%xmm5
-
-// CHECK: pslld	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0xed,0x7e,0x00,0x00]
-        	pslld	0x7eed,%xmm5
-
-// CHECK: pslld	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0xfe,0xca,0xbe,0xba]
-        	pslld	0xbabecafe,%xmm5
-
-// CHECK: pslld	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0x78,0x56,0x34,0x12]
-        	pslld	0x12345678,%xmm5
-
-// CHECK: pslld	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf2,0xed]
-        	pslld	%xmm5,%xmm5
-
-// CHECK: pslld	$127, %mm3
-// CHECK:  encoding: [0x0f,0x72,0xf3,0x7f]
-        	pslld	$0x7f,%mm3
-
-// CHECK: pslld	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x72,0xf5,0x7f]
-        	pslld	$0x7f,%xmm5
-
-// CHECK: psllq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psllq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psllq	69, %mm3
-// CHECK:  encoding: [0x0f,0xf3,0x1d,0x45,0x00,0x00,0x00]
-        	psllq	0x45,%mm3
-
-// CHECK: psllq	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf3,0x1d,0xed,0x7e,0x00,0x00]
-        	psllq	0x7eed,%mm3
-
-// CHECK: psllq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf3,0x1d,0xfe,0xca,0xbe,0xba]
-        	psllq	0xbabecafe,%mm3
-
-// CHECK: psllq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf3,0x1d,0x78,0x56,0x34,0x12]
-        	psllq	0x12345678,%mm3
-
-// CHECK: psllq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf3,0xdb]
-        	psllq	%mm3,%mm3
-
-// CHECK: psllq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf3,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psllq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psllq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0x45,0x00,0x00,0x00]
-        	psllq	0x45,%xmm5
-
-// CHECK: psllq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0xed,0x7e,0x00,0x00]
-        	psllq	0x7eed,%xmm5
-
-// CHECK: psllq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0xfe,0xca,0xbe,0xba]
-        	psllq	0xbabecafe,%xmm5
-
-// CHECK: psllq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0x78,0x56,0x34,0x12]
-        	psllq	0x12345678,%xmm5
-
-// CHECK: psllq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf3,0xed]
-        	psllq	%xmm5,%xmm5
-
-// CHECK: psllq	$127, %mm3
-// CHECK:  encoding: [0x0f,0x73,0xf3,0x7f]
-        	psllq	$0x7f,%mm3
-
-// CHECK: psllq	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x73,0xf5,0x7f]
-        	psllq	$0x7f,%xmm5
-
-// CHECK: psraw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psraw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psraw	69, %mm3
-// CHECK:  encoding: [0x0f,0xe1,0x1d,0x45,0x00,0x00,0x00]
-        	psraw	0x45,%mm3
-
-// CHECK: psraw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe1,0x1d,0xed,0x7e,0x00,0x00]
-        	psraw	0x7eed,%mm3
-
-// CHECK: psraw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe1,0x1d,0xfe,0xca,0xbe,0xba]
-        	psraw	0xbabecafe,%mm3
-
-// CHECK: psraw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe1,0x1d,0x78,0x56,0x34,0x12]
-        	psraw	0x12345678,%mm3
-
-// CHECK: psraw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe1,0xdb]
-        	psraw	%mm3,%mm3
-
-// CHECK: psraw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe1,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psraw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psraw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0x45,0x00,0x00,0x00]
-        	psraw	0x45,%xmm5
-
-// CHECK: psraw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0xed,0x7e,0x00,0x00]
-        	psraw	0x7eed,%xmm5
-
-// CHECK: psraw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0xfe,0xca,0xbe,0xba]
-        	psraw	0xbabecafe,%xmm5
-
-// CHECK: psraw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0x78,0x56,0x34,0x12]
-        	psraw	0x12345678,%xmm5
-
-// CHECK: psraw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe1,0xed]
-        	psraw	%xmm5,%xmm5
-
-// CHECK: psraw	$127, %mm3
-// CHECK:  encoding: [0x0f,0x71,0xe3,0x7f]
-        	psraw	$0x7f,%mm3
-
-// CHECK: psraw	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x71,0xe5,0x7f]
-        	psraw	$0x7f,%xmm5
-
-// CHECK: psrad	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psrad	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psrad	69, %mm3
-// CHECK:  encoding: [0x0f,0xe2,0x1d,0x45,0x00,0x00,0x00]
-        	psrad	0x45,%mm3
-
-// CHECK: psrad	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe2,0x1d,0xed,0x7e,0x00,0x00]
-        	psrad	0x7eed,%mm3
-
-// CHECK: psrad	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe2,0x1d,0xfe,0xca,0xbe,0xba]
-        	psrad	0xbabecafe,%mm3
-
-// CHECK: psrad	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe2,0x1d,0x78,0x56,0x34,0x12]
-        	psrad	0x12345678,%mm3
-
-// CHECK: psrad	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe2,0xdb]
-        	psrad	%mm3,%mm3
-
-// CHECK: psrad	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe2,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psrad	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psrad	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0x45,0x00,0x00,0x00]
-        	psrad	0x45,%xmm5
-
-// CHECK: psrad	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0xed,0x7e,0x00,0x00]
-        	psrad	0x7eed,%xmm5
-
-// CHECK: psrad	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0xfe,0xca,0xbe,0xba]
-        	psrad	0xbabecafe,%xmm5
-
-// CHECK: psrad	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0x78,0x56,0x34,0x12]
-        	psrad	0x12345678,%xmm5
-
-// CHECK: psrad	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe2,0xed]
-        	psrad	%xmm5,%xmm5
-
-// CHECK: psrad	$127, %mm3
-// CHECK:  encoding: [0x0f,0x72,0xe3,0x7f]
-        	psrad	$0x7f,%mm3
-
-// CHECK: psrad	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x72,0xe5,0x7f]
-        	psrad	$0x7f,%xmm5
-
-// CHECK: psrlw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psrlw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psrlw	69, %mm3
-// CHECK:  encoding: [0x0f,0xd1,0x1d,0x45,0x00,0x00,0x00]
-        	psrlw	0x45,%mm3
-
-// CHECK: psrlw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd1,0x1d,0xed,0x7e,0x00,0x00]
-        	psrlw	0x7eed,%mm3
-
-// CHECK: psrlw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd1,0x1d,0xfe,0xca,0xbe,0xba]
-        	psrlw	0xbabecafe,%mm3
-
-// CHECK: psrlw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd1,0x1d,0x78,0x56,0x34,0x12]
-        	psrlw	0x12345678,%mm3
-
-// CHECK: psrlw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd1,0xdb]
-        	psrlw	%mm3,%mm3
-
-// CHECK: psrlw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd1,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psrlw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psrlw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0x45,0x00,0x00,0x00]
-        	psrlw	0x45,%xmm5
-
-// CHECK: psrlw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0xed,0x7e,0x00,0x00]
-        	psrlw	0x7eed,%xmm5
-
-// CHECK: psrlw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0xfe,0xca,0xbe,0xba]
-        	psrlw	0xbabecafe,%xmm5
-
-// CHECK: psrlw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0x78,0x56,0x34,0x12]
-        	psrlw	0x12345678,%xmm5
-
-// CHECK: psrlw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd1,0xed]
-        	psrlw	%xmm5,%xmm5
-
-// CHECK: psrlw	$127, %mm3
-// CHECK:  encoding: [0x0f,0x71,0xd3,0x7f]
-        	psrlw	$0x7f,%mm3
-
-// CHECK: psrlw	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x71,0xd5,0x7f]
-        	psrlw	$0x7f,%xmm5
-
-// CHECK: psrld	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psrld	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psrld	69, %mm3
-// CHECK:  encoding: [0x0f,0xd2,0x1d,0x45,0x00,0x00,0x00]
-        	psrld	0x45,%mm3
-
-// CHECK: psrld	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd2,0x1d,0xed,0x7e,0x00,0x00]
-        	psrld	0x7eed,%mm3
-
-// CHECK: psrld	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd2,0x1d,0xfe,0xca,0xbe,0xba]
-        	psrld	0xbabecafe,%mm3
-
-// CHECK: psrld	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd2,0x1d,0x78,0x56,0x34,0x12]
-        	psrld	0x12345678,%mm3
-
-// CHECK: psrld	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd2,0xdb]
-        	psrld	%mm3,%mm3
-
-// CHECK: psrld	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd2,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psrld	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psrld	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0x45,0x00,0x00,0x00]
-        	psrld	0x45,%xmm5
-
-// CHECK: psrld	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0xed,0x7e,0x00,0x00]
-        	psrld	0x7eed,%xmm5
-
-// CHECK: psrld	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0xfe,0xca,0xbe,0xba]
-        	psrld	0xbabecafe,%xmm5
-
-// CHECK: psrld	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0x78,0x56,0x34,0x12]
-        	psrld	0x12345678,%xmm5
-
-// CHECK: psrld	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd2,0xed]
-        	psrld	%xmm5,%xmm5
-
-// CHECK: psrld	$127, %mm3
-// CHECK:  encoding: [0x0f,0x72,0xd3,0x7f]
-        	psrld	$0x7f,%mm3
-
-// CHECK: psrld	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x72,0xd5,0x7f]
-        	psrld	$0x7f,%xmm5
-
-// CHECK: psrlq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psrlq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psrlq	69, %mm3
-// CHECK:  encoding: [0x0f,0xd3,0x1d,0x45,0x00,0x00,0x00]
-        	psrlq	0x45,%mm3
-
-// CHECK: psrlq	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd3,0x1d,0xed,0x7e,0x00,0x00]
-        	psrlq	0x7eed,%mm3
-
-// CHECK: psrlq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd3,0x1d,0xfe,0xca,0xbe,0xba]
-        	psrlq	0xbabecafe,%mm3
-
-// CHECK: psrlq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd3,0x1d,0x78,0x56,0x34,0x12]
-        	psrlq	0x12345678,%mm3
-
-// CHECK: psrlq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd3,0xdb]
-        	psrlq	%mm3,%mm3
-
-// CHECK: psrlq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd3,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psrlq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psrlq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0x45,0x00,0x00,0x00]
-        	psrlq	0x45,%xmm5
-
-// CHECK: psrlq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0xed,0x7e,0x00,0x00]
-        	psrlq	0x7eed,%xmm5
-
-// CHECK: psrlq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0xfe,0xca,0xbe,0xba]
-        	psrlq	0xbabecafe,%xmm5
-
-// CHECK: psrlq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0x78,0x56,0x34,0x12]
-        	psrlq	0x12345678,%xmm5
-
-// CHECK: psrlq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd3,0xed]
-        	psrlq	%xmm5,%xmm5
-
-// CHECK: psrlq	$127, %mm3
-// CHECK:  encoding: [0x0f,0x73,0xd3,0x7f]
-        	psrlq	$0x7f,%mm3
-
-// CHECK: psrlq	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x73,0xd5,0x7f]
-        	psrlq	$0x7f,%xmm5
-
-// CHECK: psubb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubb	69, %mm3
-// CHECK:  encoding: [0x0f,0xf8,0x1d,0x45,0x00,0x00,0x00]
-        	psubb	0x45,%mm3
-
-// CHECK: psubb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf8,0x1d,0xed,0x7e,0x00,0x00]
-        	psubb	0x7eed,%mm3
-
-// CHECK: psubb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf8,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubb	0xbabecafe,%mm3
-
-// CHECK: psubb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf8,0x1d,0x78,0x56,0x34,0x12]
-        	psubb	0x12345678,%mm3
-
-// CHECK: psubb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf8,0xdb]
-        	psubb	%mm3,%mm3
-
-// CHECK: psubb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf8,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0x45,0x00,0x00,0x00]
-        	psubb	0x45,%xmm5
-
-// CHECK: psubb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0xed,0x7e,0x00,0x00]
-        	psubb	0x7eed,%xmm5
-
-// CHECK: psubb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubb	0xbabecafe,%xmm5
-
-// CHECK: psubb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0x78,0x56,0x34,0x12]
-        	psubb	0x12345678,%xmm5
-
-// CHECK: psubb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf8,0xed]
-        	psubb	%xmm5,%xmm5
-
-// CHECK: psubw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubw	69, %mm3
-// CHECK:  encoding: [0x0f,0xf9,0x1d,0x45,0x00,0x00,0x00]
-        	psubw	0x45,%mm3
-
-// CHECK: psubw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf9,0x1d,0xed,0x7e,0x00,0x00]
-        	psubw	0x7eed,%mm3
-
-// CHECK: psubw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf9,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubw	0xbabecafe,%mm3
-
-// CHECK: psubw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf9,0x1d,0x78,0x56,0x34,0x12]
-        	psubw	0x12345678,%mm3
-
-// CHECK: psubw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf9,0xdb]
-        	psubw	%mm3,%mm3
-
-// CHECK: psubw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf9,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0x45,0x00,0x00,0x00]
-        	psubw	0x45,%xmm5
-
-// CHECK: psubw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0xed,0x7e,0x00,0x00]
-        	psubw	0x7eed,%xmm5
-
-// CHECK: psubw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubw	0xbabecafe,%xmm5
-
-// CHECK: psubw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0x78,0x56,0x34,0x12]
-        	psubw	0x12345678,%xmm5
-
-// CHECK: psubw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf9,0xed]
-        	psubw	%xmm5,%xmm5
-
-// CHECK: psubd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xfa,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubd	69, %mm3
-// CHECK:  encoding: [0x0f,0xfa,0x1d,0x45,0x00,0x00,0x00]
-        	psubd	0x45,%mm3
-
-// CHECK: psubd	32493, %mm3
-// CHECK:  encoding: [0x0f,0xfa,0x1d,0xed,0x7e,0x00,0x00]
-        	psubd	0x7eed,%mm3
-
-// CHECK: psubd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xfa,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubd	0xbabecafe,%mm3
-
-// CHECK: psubd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xfa,0x1d,0x78,0x56,0x34,0x12]
-        	psubd	0x12345678,%mm3
-
-// CHECK: psubd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xfa,0xdb]
-        	psubd	%mm3,%mm3
-
-// CHECK: psubd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfa,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0x45,0x00,0x00,0x00]
-        	psubd	0x45,%xmm5
-
-// CHECK: psubd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0xed,0x7e,0x00,0x00]
-        	psubd	0x7eed,%xmm5
-
-// CHECK: psubd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubd	0xbabecafe,%xmm5
-
-// CHECK: psubd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0x78,0x56,0x34,0x12]
-        	psubd	0x12345678,%xmm5
-
-// CHECK: psubd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfa,0xed]
-        	psubd	%xmm5,%xmm5
-
-// CHECK: psubq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xfb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubq	69, %mm3
-// CHECK:  encoding: [0x0f,0xfb,0x1d,0x45,0x00,0x00,0x00]
-        	psubq	0x45,%mm3
-
-// CHECK: psubq	32493, %mm3
-// CHECK:  encoding: [0x0f,0xfb,0x1d,0xed,0x7e,0x00,0x00]
-        	psubq	0x7eed,%mm3
-
-// CHECK: psubq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xfb,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubq	0xbabecafe,%mm3
-
-// CHECK: psubq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xfb,0x1d,0x78,0x56,0x34,0x12]
-        	psubq	0x12345678,%mm3
-
-// CHECK: psubq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xfb,0xdb]
-        	psubq	%mm3,%mm3
-
-// CHECK: psubq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfb,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0x45,0x00,0x00,0x00]
-        	psubq	0x45,%xmm5
-
-// CHECK: psubq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0xed,0x7e,0x00,0x00]
-        	psubq	0x7eed,%xmm5
-
-// CHECK: psubq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubq	0xbabecafe,%xmm5
-
-// CHECK: psubq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0x78,0x56,0x34,0x12]
-        	psubq	0x12345678,%xmm5
-
-// CHECK: psubq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xfb,0xed]
-        	psubq	%xmm5,%xmm5
-
-// CHECK: psubsb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubsb	69, %mm3
-// CHECK:  encoding: [0x0f,0xe8,0x1d,0x45,0x00,0x00,0x00]
-        	psubsb	0x45,%mm3
-
-// CHECK: psubsb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe8,0x1d,0xed,0x7e,0x00,0x00]
-        	psubsb	0x7eed,%mm3
-
-// CHECK: psubsb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe8,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubsb	0xbabecafe,%mm3
-
-// CHECK: psubsb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe8,0x1d,0x78,0x56,0x34,0x12]
-        	psubsb	0x12345678,%mm3
-
-// CHECK: psubsb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe8,0xdb]
-        	psubsb	%mm3,%mm3
-
-// CHECK: psubsb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe8,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubsb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0x45,0x00,0x00,0x00]
-        	psubsb	0x45,%xmm5
-
-// CHECK: psubsb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0xed,0x7e,0x00,0x00]
-        	psubsb	0x7eed,%xmm5
-
-// CHECK: psubsb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubsb	0xbabecafe,%xmm5
-
-// CHECK: psubsb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0x78,0x56,0x34,0x12]
-        	psubsb	0x12345678,%xmm5
-
-// CHECK: psubsb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe8,0xed]
-        	psubsb	%xmm5,%xmm5
-
-// CHECK: psubsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubsw	69, %mm3
-// CHECK:  encoding: [0x0f,0xe9,0x1d,0x45,0x00,0x00,0x00]
-        	psubsw	0x45,%mm3
-
-// CHECK: psubsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe9,0x1d,0xed,0x7e,0x00,0x00]
-        	psubsw	0x7eed,%mm3
-
-// CHECK: psubsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe9,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubsw	0xbabecafe,%mm3
-
-// CHECK: psubsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe9,0x1d,0x78,0x56,0x34,0x12]
-        	psubsw	0x12345678,%mm3
-
-// CHECK: psubsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe9,0xdb]
-        	psubsw	%mm3,%mm3
-
-// CHECK: psubsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe9,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0x45,0x00,0x00,0x00]
-        	psubsw	0x45,%xmm5
-
-// CHECK: psubsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0xed,0x7e,0x00,0x00]
-        	psubsw	0x7eed,%xmm5
-
-// CHECK: psubsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubsw	0xbabecafe,%xmm5
-
-// CHECK: psubsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0x78,0x56,0x34,0x12]
-        	psubsw	0x12345678,%xmm5
-
-// CHECK: psubsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe9,0xed]
-        	psubsw	%xmm5,%xmm5
-
-// CHECK: psubusb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubusb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubusb	69, %mm3
-// CHECK:  encoding: [0x0f,0xd8,0x1d,0x45,0x00,0x00,0x00]
-        	psubusb	0x45,%mm3
-
-// CHECK: psubusb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd8,0x1d,0xed,0x7e,0x00,0x00]
-        	psubusb	0x7eed,%mm3
-
-// CHECK: psubusb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd8,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubusb	0xbabecafe,%mm3
-
-// CHECK: psubusb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd8,0x1d,0x78,0x56,0x34,0x12]
-        	psubusb	0x12345678,%mm3
-
-// CHECK: psubusb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd8,0xdb]
-        	psubusb	%mm3,%mm3
-
-// CHECK: psubusb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd8,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubusb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0x45,0x00,0x00,0x00]
-        	psubusb	0x45,%xmm5
-
-// CHECK: psubusb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0xed,0x7e,0x00,0x00]
-        	psubusb	0x7eed,%xmm5
-
-// CHECK: psubusb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubusb	0xbabecafe,%xmm5
-
-// CHECK: psubusb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0x78,0x56,0x34,0x12]
-        	psubusb	0x12345678,%xmm5
-
-// CHECK: psubusb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd8,0xed]
-        	psubusb	%xmm5,%xmm5
-
-// CHECK: psubusw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xd9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psubusw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psubusw	69, %mm3
-// CHECK:  encoding: [0x0f,0xd9,0x1d,0x45,0x00,0x00,0x00]
-        	psubusw	0x45,%mm3
-
-// CHECK: psubusw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xd9,0x1d,0xed,0x7e,0x00,0x00]
-        	psubusw	0x7eed,%mm3
-
-// CHECK: psubusw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xd9,0x1d,0xfe,0xca,0xbe,0xba]
-        	psubusw	0xbabecafe,%mm3
-
-// CHECK: psubusw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xd9,0x1d,0x78,0x56,0x34,0x12]
-        	psubusw	0x12345678,%mm3
-
-// CHECK: psubusw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xd9,0xdb]
-        	psubusw	%mm3,%mm3
-
-// CHECK: psubusw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd9,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psubusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psubusw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0x45,0x00,0x00,0x00]
-        	psubusw	0x45,%xmm5
-
-// CHECK: psubusw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0xed,0x7e,0x00,0x00]
-        	psubusw	0x7eed,%xmm5
-
-// CHECK: psubusw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0xfe,0xca,0xbe,0xba]
-        	psubusw	0xbabecafe,%xmm5
-
-// CHECK: psubusw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0x78,0x56,0x34,0x12]
-        	psubusw	0x12345678,%xmm5
-
-// CHECK: psubusw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd9,0xed]
-        	psubusw	%xmm5,%xmm5
-
-// CHECK: punpckhbw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x68,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: punpckhbw	69, %mm3
-// CHECK:  encoding: [0x0f,0x68,0x1d,0x45,0x00,0x00,0x00]
-        	punpckhbw	0x45,%mm3
-
-// CHECK: punpckhbw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x68,0x1d,0xed,0x7e,0x00,0x00]
-        	punpckhbw	0x7eed,%mm3
-
-// CHECK: punpckhbw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x68,0x1d,0xfe,0xca,0xbe,0xba]
-        	punpckhbw	0xbabecafe,%mm3
-
-// CHECK: punpckhbw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x68,0x1d,0x78,0x56,0x34,0x12]
-        	punpckhbw	0x12345678,%mm3
-
-// CHECK: punpckhbw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x68,0xdb]
-        	punpckhbw	%mm3,%mm3
-
-// CHECK: punpckhbw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x68,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpckhbw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0x45,0x00,0x00,0x00]
-        	punpckhbw	0x45,%xmm5
-
-// CHECK: punpckhbw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0xed,0x7e,0x00,0x00]
-        	punpckhbw	0x7eed,%xmm5
-
-// CHECK: punpckhbw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpckhbw	0xbabecafe,%xmm5
-
-// CHECK: punpckhbw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0x78,0x56,0x34,0x12]
-        	punpckhbw	0x12345678,%xmm5
-
-// CHECK: punpckhbw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x68,0xed]
-        	punpckhbw	%xmm5,%xmm5
-
-// CHECK: punpckhwd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x69,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: punpckhwd	69, %mm3
-// CHECK:  encoding: [0x0f,0x69,0x1d,0x45,0x00,0x00,0x00]
-        	punpckhwd	0x45,%mm3
-
-// CHECK: punpckhwd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x69,0x1d,0xed,0x7e,0x00,0x00]
-        	punpckhwd	0x7eed,%mm3
-
-// CHECK: punpckhwd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x69,0x1d,0xfe,0xca,0xbe,0xba]
-        	punpckhwd	0xbabecafe,%mm3
-
-// CHECK: punpckhwd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x69,0x1d,0x78,0x56,0x34,0x12]
-        	punpckhwd	0x12345678,%mm3
-
-// CHECK: punpckhwd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x69,0xdb]
-        	punpckhwd	%mm3,%mm3
-
-// CHECK: punpckhwd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x69,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpckhwd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0x45,0x00,0x00,0x00]
-        	punpckhwd	0x45,%xmm5
-
-// CHECK: punpckhwd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0xed,0x7e,0x00,0x00]
-        	punpckhwd	0x7eed,%xmm5
-
-// CHECK: punpckhwd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpckhwd	0xbabecafe,%xmm5
-
-// CHECK: punpckhwd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0x78,0x56,0x34,0x12]
-        	punpckhwd	0x12345678,%xmm5
-
-// CHECK: punpckhwd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x69,0xed]
-        	punpckhwd	%xmm5,%xmm5
-
-// CHECK: punpckhdq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x6a,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: punpckhdq	69, %mm3
-// CHECK:  encoding: [0x0f,0x6a,0x1d,0x45,0x00,0x00,0x00]
-        	punpckhdq	0x45,%mm3
-
-// CHECK: punpckhdq	32493, %mm3
-// CHECK:  encoding: [0x0f,0x6a,0x1d,0xed,0x7e,0x00,0x00]
-        	punpckhdq	0x7eed,%mm3
-
-// CHECK: punpckhdq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x6a,0x1d,0xfe,0xca,0xbe,0xba]
-        	punpckhdq	0xbabecafe,%mm3
-
-// CHECK: punpckhdq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x6a,0x1d,0x78,0x56,0x34,0x12]
-        	punpckhdq	0x12345678,%mm3
-
-// CHECK: punpckhdq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x6a,0xdb]
-        	punpckhdq	%mm3,%mm3
-
-// CHECK: punpckhdq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpckhdq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0x45,0x00,0x00,0x00]
-        	punpckhdq	0x45,%xmm5
-
-// CHECK: punpckhdq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0xed,0x7e,0x00,0x00]
-        	punpckhdq	0x7eed,%xmm5
-
-// CHECK: punpckhdq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpckhdq	0xbabecafe,%xmm5
-
-// CHECK: punpckhdq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0x78,0x56,0x34,0x12]
-        	punpckhdq	0x12345678,%xmm5
-
-// CHECK: punpckhdq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6a,0xed]
-        	punpckhdq	%xmm5,%xmm5
-
-// CHECK: punpcklbw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x60,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: punpcklbw	69, %mm3
-// CHECK:  encoding: [0x0f,0x60,0x1d,0x45,0x00,0x00,0x00]
-        	punpcklbw	0x45,%mm3
-
-// CHECK: punpcklbw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x60,0x1d,0xed,0x7e,0x00,0x00]
-        	punpcklbw	0x7eed,%mm3
-
-// CHECK: punpcklbw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x60,0x1d,0xfe,0xca,0xbe,0xba]
-        	punpcklbw	0xbabecafe,%mm3
-
-// CHECK: punpcklbw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x60,0x1d,0x78,0x56,0x34,0x12]
-        	punpcklbw	0x12345678,%mm3
-
-// CHECK: punpcklbw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x60,0xdb]
-        	punpcklbw	%mm3,%mm3
-
-// CHECK: punpcklbw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x60,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpcklbw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0x45,0x00,0x00,0x00]
-        	punpcklbw	0x45,%xmm5
-
-// CHECK: punpcklbw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0xed,0x7e,0x00,0x00]
-        	punpcklbw	0x7eed,%xmm5
-
-// CHECK: punpcklbw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpcklbw	0xbabecafe,%xmm5
-
-// CHECK: punpcklbw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0x78,0x56,0x34,0x12]
-        	punpcklbw	0x12345678,%xmm5
-
-// CHECK: punpcklbw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x60,0xed]
-        	punpcklbw	%xmm5,%xmm5
-
-// CHECK: punpcklwd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x61,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: punpcklwd	69, %mm3
-// CHECK:  encoding: [0x0f,0x61,0x1d,0x45,0x00,0x00,0x00]
-        	punpcklwd	0x45,%mm3
-
-// CHECK: punpcklwd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x61,0x1d,0xed,0x7e,0x00,0x00]
-        	punpcklwd	0x7eed,%mm3
-
-// CHECK: punpcklwd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x61,0x1d,0xfe,0xca,0xbe,0xba]
-        	punpcklwd	0xbabecafe,%mm3
-
-// CHECK: punpcklwd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x61,0x1d,0x78,0x56,0x34,0x12]
-        	punpcklwd	0x12345678,%mm3
-
-// CHECK: punpcklwd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x61,0xdb]
-        	punpcklwd	%mm3,%mm3
-
-// CHECK: punpcklwd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x61,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpcklwd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0x45,0x00,0x00,0x00]
-        	punpcklwd	0x45,%xmm5
-
-// CHECK: punpcklwd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0xed,0x7e,0x00,0x00]
-        	punpcklwd	0x7eed,%xmm5
-
-// CHECK: punpcklwd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpcklwd	0xbabecafe,%xmm5
-
-// CHECK: punpcklwd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0x78,0x56,0x34,0x12]
-        	punpcklwd	0x12345678,%xmm5
-
-// CHECK: punpcklwd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x61,0xed]
-        	punpcklwd	%xmm5,%xmm5
-
-// CHECK: punpckldq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x62,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: punpckldq	69, %mm3
-// CHECK:  encoding: [0x0f,0x62,0x1d,0x45,0x00,0x00,0x00]
-        	punpckldq	0x45,%mm3
-
-// CHECK: punpckldq	32493, %mm3
-// CHECK:  encoding: [0x0f,0x62,0x1d,0xed,0x7e,0x00,0x00]
-        	punpckldq	0x7eed,%mm3
-
-// CHECK: punpckldq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x62,0x1d,0xfe,0xca,0xbe,0xba]
-        	punpckldq	0xbabecafe,%mm3
-
-// CHECK: punpckldq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x62,0x1d,0x78,0x56,0x34,0x12]
-        	punpckldq	0x12345678,%mm3
-
-// CHECK: punpckldq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x62,0xdb]
-        	punpckldq	%mm3,%mm3
-
-// CHECK: punpckldq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x62,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpckldq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0x45,0x00,0x00,0x00]
-        	punpckldq	0x45,%xmm5
-
-// CHECK: punpckldq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0xed,0x7e,0x00,0x00]
-        	punpckldq	0x7eed,%xmm5
-
-// CHECK: punpckldq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpckldq	0xbabecafe,%xmm5
-
-// CHECK: punpckldq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0x78,0x56,0x34,0x12]
-        	punpckldq	0x12345678,%xmm5
-
-// CHECK: punpckldq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x62,0xed]
-        	punpckldq	%xmm5,%xmm5
-
-// CHECK: pxor	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xef,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pxor	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pxor	69, %mm3
-// CHECK:  encoding: [0x0f,0xef,0x1d,0x45,0x00,0x00,0x00]
-        	pxor	0x45,%mm3
-
-// CHECK: pxor	32493, %mm3
-// CHECK:  encoding: [0x0f,0xef,0x1d,0xed,0x7e,0x00,0x00]
-        	pxor	0x7eed,%mm3
-
-// CHECK: pxor	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xef,0x1d,0xfe,0xca,0xbe,0xba]
-        	pxor	0xbabecafe,%mm3
-
-// CHECK: pxor	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xef,0x1d,0x78,0x56,0x34,0x12]
-        	pxor	0x12345678,%mm3
-
-// CHECK: pxor	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xef,0xdb]
-        	pxor	%mm3,%mm3
-
-// CHECK: pxor	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xef,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pxor	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pxor	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0x45,0x00,0x00,0x00]
-        	pxor	0x45,%xmm5
-
-// CHECK: pxor	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0xed,0x7e,0x00,0x00]
-        	pxor	0x7eed,%xmm5
-
-// CHECK: pxor	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0xfe,0xca,0xbe,0xba]
-        	pxor	0xbabecafe,%xmm5
-
-// CHECK: pxor	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0x78,0x56,0x34,0x12]
-        	pxor	0x12345678,%xmm5
-
-// CHECK: pxor	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xef,0xed]
-        	pxor	%xmm5,%xmm5
-
-// CHECK: addps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	addps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: addps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
-        	addps	0x45,%xmm5
-
-// CHECK: addps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
-        	addps	0x7eed,%xmm5
-
-// CHECK: addps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
-        	addps	0xbabecafe,%xmm5
-
-// CHECK: addps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
-        	addps	0x12345678,%xmm5
-
-// CHECK: addps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x58,0xed]
-        	addps	%xmm5,%xmm5
-
-// CHECK: addss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	addss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: addss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
-        	addss	0x45,%xmm5
-
-// CHECK: addss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
-        	addss	0x7eed,%xmm5
-
-// CHECK: addss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
-        	addss	0xbabecafe,%xmm5
-
-// CHECK: addss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
-        	addss	0x12345678,%xmm5
-
-// CHECK: addss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x58,0xed]
-        	addss	%xmm5,%xmm5
-
-// CHECK: andnps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x55,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	andnps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: andnps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x55,0x2d,0x45,0x00,0x00,0x00]
-        	andnps	0x45,%xmm5
-
-// CHECK: andnps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x55,0x2d,0xed,0x7e,0x00,0x00]
-        	andnps	0x7eed,%xmm5
-
-// CHECK: andnps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x55,0x2d,0xfe,0xca,0xbe,0xba]
-        	andnps	0xbabecafe,%xmm5
-
-// CHECK: andnps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x55,0x2d,0x78,0x56,0x34,0x12]
-        	andnps	0x12345678,%xmm5
-
-// CHECK: andnps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x55,0xed]
-        	andnps	%xmm5,%xmm5
-
-// CHECK: andps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x54,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	andps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: andps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x54,0x2d,0x45,0x00,0x00,0x00]
-        	andps	0x45,%xmm5
-
-// CHECK: andps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x54,0x2d,0xed,0x7e,0x00,0x00]
-        	andps	0x7eed,%xmm5
-
-// CHECK: andps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x54,0x2d,0xfe,0xca,0xbe,0xba]
-        	andps	0xbabecafe,%xmm5
-
-// CHECK: andps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x54,0x2d,0x78,0x56,0x34,0x12]
-        	andps	0x12345678,%xmm5
-
-// CHECK: andps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x54,0xed]
-        	andps	%xmm5,%xmm5
-
-// CHECK: comiss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x2f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	comiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: comiss	69, %xmm5
-// CHECK:  encoding: [0x0f,0x2f,0x2d,0x45,0x00,0x00,0x00]
-        	comiss	0x45,%xmm5
-
-// CHECK: comiss	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x2f,0x2d,0xed,0x7e,0x00,0x00]
-        	comiss	0x7eed,%xmm5
-
-// CHECK: comiss	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x2f,0x2d,0xfe,0xca,0xbe,0xba]
-        	comiss	0xbabecafe,%xmm5
-
-// CHECK: comiss	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x2f,0x2d,0x78,0x56,0x34,0x12]
-        	comiss	0x12345678,%xmm5
-
-// CHECK: comiss	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x2f,0xed]
-        	comiss	%xmm5,%xmm5
-
-// CHECK: cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtpi2ps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtpi2ps	0x45,%xmm5
-
-// CHECK: cvtpi2ps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtpi2ps	0x7eed,%xmm5
-
-// CHECK: cvtpi2ps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtpi2ps	0xbabecafe,%xmm5
-
-// CHECK: cvtpi2ps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtpi2ps	0x12345678,%xmm5
-
-// CHECK: cvtpi2ps	%mm3, %xmm5
-// CHECK:  encoding: [0x0f,0x2a,0xeb]
-        	cvtpi2ps	%mm3,%xmm5
-
-// CHECK: cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x2d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: cvtps2pi	69, %mm3
-// CHECK:  encoding: [0x0f,0x2d,0x1d,0x45,0x00,0x00,0x00]
-        	cvtps2pi	0x45,%mm3
-
-// CHECK: cvtps2pi	32493, %mm3
-// CHECK:  encoding: [0x0f,0x2d,0x1d,0xed,0x7e,0x00,0x00]
-        	cvtps2pi	0x7eed,%mm3
-
-// CHECK: cvtps2pi	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x2d,0x1d,0xfe,0xca,0xbe,0xba]
-        	cvtps2pi	0xbabecafe,%mm3
-
-// CHECK: cvtps2pi	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x2d,0x1d,0x78,0x56,0x34,0x12]
-        	cvtps2pi	0x12345678,%mm3
-
-// CHECK: cvtps2pi	%xmm5, %mm3
-// CHECK:  encoding: [0x0f,0x2d,0xdd]
-        	cvtps2pi	%xmm5,%mm3
-
-// CHECK: cvtsi2ss	%ecx, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x2a,0xe9]
-        	cvtsi2ss	%ecx,%xmm5
-
-// CHECK: cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtsi2ss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtsi2ss	0x45,%xmm5
-
-// CHECK: cvtsi2ss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtsi2ss	0x7eed,%xmm5
-
-// CHECK: cvtsi2ss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtsi2ss	0xbabecafe,%xmm5
-
-// CHECK: cvtsi2ss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtsi2ss	0x12345678,%xmm5
-
-// CHECK: cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: cvttps2pi	69, %mm3
-// CHECK:  encoding: [0x0f,0x2c,0x1d,0x45,0x00,0x00,0x00]
-        	cvttps2pi	0x45,%mm3
-
-// CHECK: cvttps2pi	32493, %mm3
-// CHECK:  encoding: [0x0f,0x2c,0x1d,0xed,0x7e,0x00,0x00]
-        	cvttps2pi	0x7eed,%mm3
-
-// CHECK: cvttps2pi	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x2c,0x1d,0xfe,0xca,0xbe,0xba]
-        	cvttps2pi	0xbabecafe,%mm3
-
-// CHECK: cvttps2pi	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x2c,0x1d,0x78,0x56,0x34,0x12]
-        	cvttps2pi	0x12345678,%mm3
-
-// CHECK: cvttps2pi	%xmm5, %mm3
-// CHECK:  encoding: [0x0f,0x2c,0xdd]
-        	cvttps2pi	%xmm5,%mm3
-
-// CHECK: cvttss2si	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0xf3,0x0f,0x2c,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: cvttss2si	69, %ecx
-// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0x45,0x00,0x00,0x00]
-        	cvttss2si	0x45,%ecx
-
-// CHECK: cvttss2si	32493, %ecx
-// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0xed,0x7e,0x00,0x00]
-        	cvttss2si	0x7eed,%ecx
-
-// CHECK: cvttss2si	3133065982, %ecx
-// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0xfe,0xca,0xbe,0xba]
-        	cvttss2si	0xbabecafe,%ecx
-
-// CHECK: cvttss2si	305419896, %ecx
-// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0x78,0x56,0x34,0x12]
-        	cvttss2si	0x12345678,%ecx
-
-// CHECK: cvttss2si	%xmm5, %ecx
-// CHECK:  encoding: [0xf3,0x0f,0x2c,0xcd]
-        	cvttss2si	%xmm5,%ecx
-
-// CHECK: divps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	divps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: divps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
-        	divps	0x45,%xmm5
-
-// CHECK: divps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
-        	divps	0x7eed,%xmm5
-
-// CHECK: divps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
-        	divps	0xbabecafe,%xmm5
-
-// CHECK: divps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
-        	divps	0x12345678,%xmm5
-
-// CHECK: divps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x5e,0xed]
-        	divps	%xmm5,%xmm5
-
-// CHECK: divss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	divss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: divss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
-        	divss	0x45,%xmm5
-
-// CHECK: divss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
-        	divss	0x7eed,%xmm5
-
-// CHECK: divss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
-        	divss	0xbabecafe,%xmm5
-
-// CHECK: divss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
-        	divss	0x12345678,%xmm5
-
-// CHECK: divss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5e,0xed]
-        	divss	%xmm5,%xmm5
-
-// CHECK: ldmxcsr	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xae,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: ldmxcsr	32493
-// CHECK:  encoding: [0x0f,0xae,0x15,0xed,0x7e,0x00,0x00]
-        	ldmxcsr	0x7eed
-
-// CHECK: ldmxcsr	3133065982
-// CHECK:  encoding: [0x0f,0xae,0x15,0xfe,0xca,0xbe,0xba]
-        	ldmxcsr	0xbabecafe
-
-// CHECK: ldmxcsr	305419896
-// CHECK:  encoding: [0x0f,0xae,0x15,0x78,0x56,0x34,0x12]
-        	ldmxcsr	0x12345678
-
-// CHECK: maskmovq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf7,0xdb]
-        	maskmovq	%mm3,%mm3
-
-// CHECK: maxps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	maxps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: maxps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
-        	maxps	0x45,%xmm5
-
-// CHECK: maxps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
-        	maxps	0x7eed,%xmm5
-
-// CHECK: maxps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
-        	maxps	0xbabecafe,%xmm5
-
-// CHECK: maxps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
-        	maxps	0x12345678,%xmm5
-
-// CHECK: maxps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x5f,0xed]
-        	maxps	%xmm5,%xmm5
-
-// CHECK: maxss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	maxss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: maxss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
-        	maxss	0x45,%xmm5
-
-// CHECK: maxss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
-        	maxss	0x7eed,%xmm5
-
-// CHECK: maxss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
-        	maxss	0xbabecafe,%xmm5
-
-// CHECK: maxss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
-        	maxss	0x12345678,%xmm5
-
-// CHECK: maxss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5f,0xed]
-        	maxss	%xmm5,%xmm5
-
-// CHECK: minps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	minps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: minps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
-        	minps	0x45,%xmm5
-
-// CHECK: minps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
-        	minps	0x7eed,%xmm5
-
-// CHECK: minps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
-        	minps	0xbabecafe,%xmm5
-
-// CHECK: minps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
-        	minps	0x12345678,%xmm5
-
-// CHECK: minps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x5d,0xed]
-        	minps	%xmm5,%xmm5
-
-// CHECK: minss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	minss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: minss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
-        	minss	0x45,%xmm5
-
-// CHECK: minss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
-        	minss	0x7eed,%xmm5
-
-// CHECK: minss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
-        	minss	0xbabecafe,%xmm5
-
-// CHECK: minss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
-        	minss	0x12345678,%xmm5
-
-// CHECK: minss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5d,0xed]
-        	minss	%xmm5,%xmm5
-
-// CHECK: movaps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movaps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x28,0x2d,0x45,0x00,0x00,0x00]
-        	movaps	0x45,%xmm5
-
-// CHECK: movaps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x28,0x2d,0xed,0x7e,0x00,0x00]
-        	movaps	0x7eed,%xmm5
-
-// CHECK: movaps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x28,0x2d,0xfe,0xca,0xbe,0xba]
-        	movaps	0xbabecafe,%xmm5
-
-// CHECK: movaps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x28,0x2d,0x78,0x56,0x34,0x12]
-        	movaps	0x12345678,%xmm5
-
-// CHECK: movaps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x28,0xed]
-        	movaps	%xmm5,%xmm5
-
-// CHECK: movaps	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movaps	%xmm5, 69
-// CHECK:  encoding: [0x0f,0x29,0x2d,0x45,0x00,0x00,0x00]
-        	movaps	%xmm5,0x45
-
-// CHECK: movaps	%xmm5, 32493
-// CHECK:  encoding: [0x0f,0x29,0x2d,0xed,0x7e,0x00,0x00]
-        	movaps	%xmm5,0x7eed
-
-// CHECK: movaps	%xmm5, 3133065982
-// CHECK:  encoding: [0x0f,0x29,0x2d,0xfe,0xca,0xbe,0xba]
-        	movaps	%xmm5,0xbabecafe
-
-// CHECK: movaps	%xmm5, 305419896
-// CHECK:  encoding: [0x0f,0x29,0x2d,0x78,0x56,0x34,0x12]
-        	movaps	%xmm5,0x12345678
-
-// CHECK: movaps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x28,0xed]
-        	movaps	%xmm5,%xmm5
-
-// CHECK: movhlps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x12,0xed]
-        	movhlps	%xmm5,%xmm5
-
-// CHECK: movhps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movhps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
-        	movhps	0x45,%xmm5
-
-// CHECK: movhps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
-        	movhps	0x7eed,%xmm5
-
-// CHECK: movhps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
-        	movhps	0xbabecafe,%xmm5
-
-// CHECK: movhps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
-        	movhps	0x12345678,%xmm5
-
-// CHECK: movhps	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movhps	%xmm5, 69
-// CHECK:  encoding: [0x0f,0x17,0x2d,0x45,0x00,0x00,0x00]
-        	movhps	%xmm5,0x45
-
-// CHECK: movhps	%xmm5, 32493
-// CHECK:  encoding: [0x0f,0x17,0x2d,0xed,0x7e,0x00,0x00]
-        	movhps	%xmm5,0x7eed
-
-// CHECK: movhps	%xmm5, 3133065982
-// CHECK:  encoding: [0x0f,0x17,0x2d,0xfe,0xca,0xbe,0xba]
-        	movhps	%xmm5,0xbabecafe
-
-// CHECK: movhps	%xmm5, 305419896
-// CHECK:  encoding: [0x0f,0x17,0x2d,0x78,0x56,0x34,0x12]
-        	movhps	%xmm5,0x12345678
-
-// CHECK: movlhps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x16,0xed]
-        	movlhps	%xmm5,%xmm5
-
-// CHECK: movlps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movlps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movlps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
-        	movlps	0x45,%xmm5
-
-// CHECK: movlps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
-        	movlps	0x7eed,%xmm5
-
-// CHECK: movlps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
-        	movlps	0xbabecafe,%xmm5
-
-// CHECK: movlps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
-        	movlps	0x12345678,%xmm5
-
-// CHECK: movlps	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x13,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movlps	%xmm5, 69
-// CHECK:  encoding: [0x0f,0x13,0x2d,0x45,0x00,0x00,0x00]
-        	movlps	%xmm5,0x45
-
-// CHECK: movlps	%xmm5, 32493
-// CHECK:  encoding: [0x0f,0x13,0x2d,0xed,0x7e,0x00,0x00]
-        	movlps	%xmm5,0x7eed
-
-// CHECK: movlps	%xmm5, 3133065982
-// CHECK:  encoding: [0x0f,0x13,0x2d,0xfe,0xca,0xbe,0xba]
-        	movlps	%xmm5,0xbabecafe
-
-// CHECK: movlps	%xmm5, 305419896
-// CHECK:  encoding: [0x0f,0x13,0x2d,0x78,0x56,0x34,0x12]
-        	movlps	%xmm5,0x12345678
-
-// CHECK: movmskps	%xmm5, %ecx
-// CHECK:  encoding: [0x0f,0x50,0xcd]
-        	movmskps	%xmm5,%ecx
-
-// CHECK: movntps	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movntps	%xmm5, 69
-// CHECK:  encoding: [0x0f,0x2b,0x2d,0x45,0x00,0x00,0x00]
-        	movntps	%xmm5,0x45
-
-// CHECK: movntps	%xmm5, 32493
-// CHECK:  encoding: [0x0f,0x2b,0x2d,0xed,0x7e,0x00,0x00]
-        	movntps	%xmm5,0x7eed
-
-// CHECK: movntps	%xmm5, 3133065982
-// CHECK:  encoding: [0x0f,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
-        	movntps	%xmm5,0xbabecafe
-
-// CHECK: movntps	%xmm5, 305419896
-// CHECK:  encoding: [0x0f,0x2b,0x2d,0x78,0x56,0x34,0x12]
-        	movntps	%xmm5,0x12345678
-
-// CHECK: movntq	%mm3, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xe7,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movntq	%mm3, 69
-// CHECK:  encoding: [0x0f,0xe7,0x1d,0x45,0x00,0x00,0x00]
-        	movntq	%mm3,0x45
-
-// CHECK: movntq	%mm3, 32493
-// CHECK:  encoding: [0x0f,0xe7,0x1d,0xed,0x7e,0x00,0x00]
-        	movntq	%mm3,0x7eed
-
-// CHECK: movntq	%mm3, 3133065982
-// CHECK:  encoding: [0x0f,0xe7,0x1d,0xfe,0xca,0xbe,0xba]
-        	movntq	%mm3,0xbabecafe
-
-// CHECK: movntq	%mm3, 305419896
-// CHECK:  encoding: [0x0f,0xe7,0x1d,0x78,0x56,0x34,0x12]
-        	movntq	%mm3,0x12345678
-
-// CHECK: movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0xe7,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movntdq	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0x45,0x00,0x00,0x00]
-        	movntdq	%xmm5,0x45
-
-// CHECK: movntdq	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0xed,0x7e,0x00,0x00]
-        	movntdq	%xmm5,0x7eed
-
-// CHECK: movntdq	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0xfe,0xca,0xbe,0xba]
-        	movntdq	%xmm5,0xbabecafe
-
-// CHECK: movntdq	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0x78,0x56,0x34,0x12]
-        	movntdq	%xmm5,0x12345678
-
-// CHECK: movss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
-        	movss	0x45,%xmm5
-
-// CHECK: movss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
-        	movss	0x7eed,%xmm5
-
-// CHECK: movss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
-        	movss	0xbabecafe,%xmm5
-
-// CHECK: movss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
-        	movss	0x12345678,%xmm5
-
-// CHECK: movss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0xed]
-        	movss	%xmm5,%xmm5
-
-// CHECK: movss	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf3,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movss	%xmm5, 69
-// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
-        	movss	%xmm5,0x45
-
-// CHECK: movss	%xmm5, 32493
-// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
-        	movss	%xmm5,0x7eed
-
-// CHECK: movss	%xmm5, 3133065982
-// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
-        	movss	%xmm5,0xbabecafe
-
-// CHECK: movss	%xmm5, 305419896
-// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
-        	movss	%xmm5,0x12345678
-
-// CHECK: movss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x10,0xed]
-        	movss	%xmm5,%xmm5
-
-// CHECK: movups	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movups	69, %xmm5
-// CHECK:  encoding: [0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
-        	movups	0x45,%xmm5
-
-// CHECK: movups	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
-        	movups	0x7eed,%xmm5
-
-// CHECK: movups	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
-        	movups	0xbabecafe,%xmm5
-
-// CHECK: movups	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
-        	movups	0x12345678,%xmm5
-
-// CHECK: movups	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x10,0xed]
-        	movups	%xmm5,%xmm5
-
-// CHECK: movups	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movups	%xmm5, 69
-// CHECK:  encoding: [0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
-        	movups	%xmm5,0x45
-
-// CHECK: movups	%xmm5, 32493
-// CHECK:  encoding: [0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
-        	movups	%xmm5,0x7eed
-
-// CHECK: movups	%xmm5, 3133065982
-// CHECK:  encoding: [0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
-        	movups	%xmm5,0xbabecafe
-
-// CHECK: movups	%xmm5, 305419896
-// CHECK:  encoding: [0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
-        	movups	%xmm5,0x12345678
-
-// CHECK: movups	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x10,0xed]
-        	movups	%xmm5,%xmm5
-
-// CHECK: mulps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	mulps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: mulps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
-        	mulps	0x45,%xmm5
-
-// CHECK: mulps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
-        	mulps	0x7eed,%xmm5
-
-// CHECK: mulps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
-        	mulps	0xbabecafe,%xmm5
-
-// CHECK: mulps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
-        	mulps	0x12345678,%xmm5
-
-// CHECK: mulps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x59,0xed]
-        	mulps	%xmm5,%xmm5
-
-// CHECK: mulss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	mulss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: mulss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
-        	mulss	0x45,%xmm5
-
-// CHECK: mulss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
-        	mulss	0x7eed,%xmm5
-
-// CHECK: mulss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
-        	mulss	0xbabecafe,%xmm5
-
-// CHECK: mulss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
-        	mulss	0x12345678,%xmm5
-
-// CHECK: mulss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x59,0xed]
-        	mulss	%xmm5,%xmm5
-
-// CHECK: orps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x56,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	orps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: orps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x56,0x2d,0x45,0x00,0x00,0x00]
-        	orps	0x45,%xmm5
-
-// CHECK: orps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x56,0x2d,0xed,0x7e,0x00,0x00]
-        	orps	0x7eed,%xmm5
-
-// CHECK: orps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x56,0x2d,0xfe,0xca,0xbe,0xba]
-        	orps	0xbabecafe,%xmm5
-
-// CHECK: orps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x56,0x2d,0x78,0x56,0x34,0x12]
-        	orps	0x12345678,%xmm5
-
-// CHECK: orps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x56,0xed]
-        	orps	%xmm5,%xmm5
-
-// CHECK: pavgb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe0,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pavgb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pavgb	69, %mm3
-// CHECK:  encoding: [0x0f,0xe0,0x1d,0x45,0x00,0x00,0x00]
-        	pavgb	0x45,%mm3
-
-// CHECK: pavgb	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe0,0x1d,0xed,0x7e,0x00,0x00]
-        	pavgb	0x7eed,%mm3
-
-// CHECK: pavgb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe0,0x1d,0xfe,0xca,0xbe,0xba]
-        	pavgb	0xbabecafe,%mm3
-
-// CHECK: pavgb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe0,0x1d,0x78,0x56,0x34,0x12]
-        	pavgb	0x12345678,%mm3
-
-// CHECK: pavgb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe0,0xdb]
-        	pavgb	%mm3,%mm3
-
-// CHECK: pavgb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe0,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pavgb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pavgb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0x45,0x00,0x00,0x00]
-        	pavgb	0x45,%xmm5
-
-// CHECK: pavgb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0xed,0x7e,0x00,0x00]
-        	pavgb	0x7eed,%xmm5
-
-// CHECK: pavgb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0xfe,0xca,0xbe,0xba]
-        	pavgb	0xbabecafe,%xmm5
-
-// CHECK: pavgb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0x78,0x56,0x34,0x12]
-        	pavgb	0x12345678,%xmm5
-
-// CHECK: pavgb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe0,0xed]
-        	pavgb	%xmm5,%xmm5
-
-// CHECK: pavgw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pavgw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pavgw	69, %mm3
-// CHECK:  encoding: [0x0f,0xe3,0x1d,0x45,0x00,0x00,0x00]
-        	pavgw	0x45,%mm3
-
-// CHECK: pavgw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe3,0x1d,0xed,0x7e,0x00,0x00]
-        	pavgw	0x7eed,%mm3
-
-// CHECK: pavgw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe3,0x1d,0xfe,0xca,0xbe,0xba]
-        	pavgw	0xbabecafe,%mm3
-
-// CHECK: pavgw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe3,0x1d,0x78,0x56,0x34,0x12]
-        	pavgw	0x12345678,%mm3
-
-// CHECK: pavgw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe3,0xdb]
-        	pavgw	%mm3,%mm3
-
-// CHECK: pavgw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe3,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pavgw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pavgw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0x45,0x00,0x00,0x00]
-        	pavgw	0x45,%xmm5
-
-// CHECK: pavgw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0xed,0x7e,0x00,0x00]
-        	pavgw	0x7eed,%xmm5
-
-// CHECK: pavgw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0xfe,0xca,0xbe,0xba]
-        	pavgw	0xbabecafe,%xmm5
-
-// CHECK: pavgw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0x78,0x56,0x34,0x12]
-        	pavgw	0x12345678,%xmm5
-
-// CHECK: pavgw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe3,0xed]
-        	pavgw	%xmm5,%xmm5
-
-// CHECK: pmaxsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xee,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmaxsw	69, %mm3
-// CHECK:  encoding: [0x0f,0xee,0x1d,0x45,0x00,0x00,0x00]
-        	pmaxsw	0x45,%mm3
-
-// CHECK: pmaxsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xee,0x1d,0xed,0x7e,0x00,0x00]
-        	pmaxsw	0x7eed,%mm3
-
-// CHECK: pmaxsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xee,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmaxsw	0xbabecafe,%mm3
-
-// CHECK: pmaxsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xee,0x1d,0x78,0x56,0x34,0x12]
-        	pmaxsw	0x12345678,%mm3
-
-// CHECK: pmaxsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xee,0xdb]
-        	pmaxsw	%mm3,%mm3
-
-// CHECK: pmaxsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xee,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaxsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0x45,0x00,0x00,0x00]
-        	pmaxsw	0x45,%xmm5
-
-// CHECK: pmaxsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaxsw	0x7eed,%xmm5
-
-// CHECK: pmaxsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaxsw	0xbabecafe,%xmm5
-
-// CHECK: pmaxsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0x78,0x56,0x34,0x12]
-        	pmaxsw	0x12345678,%xmm5
-
-// CHECK: pmaxsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xee,0xed]
-        	pmaxsw	%xmm5,%xmm5
-
-// CHECK: pmaxub	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xde,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmaxub	69, %mm3
-// CHECK:  encoding: [0x0f,0xde,0x1d,0x45,0x00,0x00,0x00]
-        	pmaxub	0x45,%mm3
-
-// CHECK: pmaxub	32493, %mm3
-// CHECK:  encoding: [0x0f,0xde,0x1d,0xed,0x7e,0x00,0x00]
-        	pmaxub	0x7eed,%mm3
-
-// CHECK: pmaxub	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xde,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmaxub	0xbabecafe,%mm3
-
-// CHECK: pmaxub	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xde,0x1d,0x78,0x56,0x34,0x12]
-        	pmaxub	0x12345678,%mm3
-
-// CHECK: pmaxub	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xde,0xdb]
-        	pmaxub	%mm3,%mm3
-
-// CHECK: pmaxub	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xde,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaxub	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0x45,0x00,0x00,0x00]
-        	pmaxub	0x45,%xmm5
-
-// CHECK: pmaxub	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaxub	0x7eed,%xmm5
-
-// CHECK: pmaxub	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaxub	0xbabecafe,%xmm5
-
-// CHECK: pmaxub	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0x78,0x56,0x34,0x12]
-        	pmaxub	0x12345678,%xmm5
-
-// CHECK: pmaxub	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xde,0xed]
-        	pmaxub	%xmm5,%xmm5
-
-// CHECK: pminsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xea,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pminsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pminsw	69, %mm3
-// CHECK:  encoding: [0x0f,0xea,0x1d,0x45,0x00,0x00,0x00]
-        	pminsw	0x45,%mm3
-
-// CHECK: pminsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xea,0x1d,0xed,0x7e,0x00,0x00]
-        	pminsw	0x7eed,%mm3
-
-// CHECK: pminsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xea,0x1d,0xfe,0xca,0xbe,0xba]
-        	pminsw	0xbabecafe,%mm3
-
-// CHECK: pminsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xea,0x1d,0x78,0x56,0x34,0x12]
-        	pminsw	0x12345678,%mm3
-
-// CHECK: pminsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xea,0xdb]
-        	pminsw	%mm3,%mm3
-
-// CHECK: pminsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xea,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pminsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pminsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0x45,0x00,0x00,0x00]
-        	pminsw	0x45,%xmm5
-
-// CHECK: pminsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0xed,0x7e,0x00,0x00]
-        	pminsw	0x7eed,%xmm5
-
-// CHECK: pminsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0xfe,0xca,0xbe,0xba]
-        	pminsw	0xbabecafe,%xmm5
-
-// CHECK: pminsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0x78,0x56,0x34,0x12]
-        	pminsw	0x12345678,%xmm5
-
-// CHECK: pminsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xea,0xed]
-        	pminsw	%xmm5,%xmm5
-
-// CHECK: pminub	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pminub	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pminub	69, %mm3
-// CHECK:  encoding: [0x0f,0xda,0x1d,0x45,0x00,0x00,0x00]
-        	pminub	0x45,%mm3
-
-// CHECK: pminub	32493, %mm3
-// CHECK:  encoding: [0x0f,0xda,0x1d,0xed,0x7e,0x00,0x00]
-        	pminub	0x7eed,%mm3
-
-// CHECK: pminub	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xda,0x1d,0xfe,0xca,0xbe,0xba]
-        	pminub	0xbabecafe,%mm3
-
-// CHECK: pminub	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xda,0x1d,0x78,0x56,0x34,0x12]
-        	pminub	0x12345678,%mm3
-
-// CHECK: pminub	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xda,0xdb]
-        	pminub	%mm3,%mm3
-
-// CHECK: pminub	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xda,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pminub	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pminub	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0x45,0x00,0x00,0x00]
-        	pminub	0x45,%xmm5
-
-// CHECK: pminub	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0xed,0x7e,0x00,0x00]
-        	pminub	0x7eed,%xmm5
-
-// CHECK: pminub	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0xfe,0xca,0xbe,0xba]
-        	pminub	0xbabecafe,%xmm5
-
-// CHECK: pminub	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0x78,0x56,0x34,0x12]
-        	pminub	0x12345678,%xmm5
-
-// CHECK: pminub	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xda,0xed]
-        	pminub	%xmm5,%xmm5
-
-// CHECK: pmovmskb	%mm3, %ecx
-// CHECK:  encoding: [0x0f,0xd7,0xcb]
-        	pmovmskb	%mm3,%ecx
-
-// CHECK: pmovmskb	%xmm5, %ecx
-// CHECK:  encoding: [0x66,0x0f,0xd7,0xcd]
-        	pmovmskb	%xmm5,%ecx
-
-// CHECK: pmulhuw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xe4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmulhuw	69, %mm3
-// CHECK:  encoding: [0x0f,0xe4,0x1d,0x45,0x00,0x00,0x00]
-        	pmulhuw	0x45,%mm3
-
-// CHECK: pmulhuw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xe4,0x1d,0xed,0x7e,0x00,0x00]
-        	pmulhuw	0x7eed,%mm3
-
-// CHECK: pmulhuw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xe4,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmulhuw	0xbabecafe,%mm3
-
-// CHECK: pmulhuw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xe4,0x1d,0x78,0x56,0x34,0x12]
-        	pmulhuw	0x12345678,%mm3
-
-// CHECK: pmulhuw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xe4,0xdb]
-        	pmulhuw	%mm3,%mm3
-
-// CHECK: pmulhuw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe4,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmulhuw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0x45,0x00,0x00,0x00]
-        	pmulhuw	0x45,%xmm5
-
-// CHECK: pmulhuw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0xed,0x7e,0x00,0x00]
-        	pmulhuw	0x7eed,%xmm5
-
-// CHECK: pmulhuw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmulhuw	0xbabecafe,%xmm5
-
-// CHECK: pmulhuw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0x78,0x56,0x34,0x12]
-        	pmulhuw	0x12345678,%xmm5
-
-// CHECK: pmulhuw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xe4,0xed]
-        	pmulhuw	%xmm5,%xmm5
-
-// CHECK: prefetchnta	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x18,0x84,0xcb,0xef,0xbe,0xad,0xde]
-        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: prefetchnta	32493
-// CHECK:  encoding: [0x0f,0x18,0x05,0xed,0x7e,0x00,0x00]
-        	prefetchnta	0x7eed
-
-// CHECK: prefetchnta	3133065982
-// CHECK:  encoding: [0x0f,0x18,0x05,0xfe,0xca,0xbe,0xba]
-        	prefetchnta	0xbabecafe
-
-// CHECK: prefetchnta	305419896
-// CHECK:  encoding: [0x0f,0x18,0x05,0x78,0x56,0x34,0x12]
-        	prefetchnta	0x12345678
-
-// CHECK: prefetcht0	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x18,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: prefetcht0	32493
-// CHECK:  encoding: [0x0f,0x18,0x0d,0xed,0x7e,0x00,0x00]
-        	prefetcht0	0x7eed
-
-// CHECK: prefetcht0	3133065982
-// CHECK:  encoding: [0x0f,0x18,0x0d,0xfe,0xca,0xbe,0xba]
-        	prefetcht0	0xbabecafe
-
-// CHECK: prefetcht0	305419896
-// CHECK:  encoding: [0x0f,0x18,0x0d,0x78,0x56,0x34,0x12]
-        	prefetcht0	0x12345678
-
-// CHECK: prefetcht1	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x18,0x94,0xcb,0xef,0xbe,0xad,0xde]
-        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: prefetcht1	32493
-// CHECK:  encoding: [0x0f,0x18,0x15,0xed,0x7e,0x00,0x00]
-        	prefetcht1	0x7eed
-
-// CHECK: prefetcht1	3133065982
-// CHECK:  encoding: [0x0f,0x18,0x15,0xfe,0xca,0xbe,0xba]
-        	prefetcht1	0xbabecafe
-
-// CHECK: prefetcht1	305419896
-// CHECK:  encoding: [0x0f,0x18,0x15,0x78,0x56,0x34,0x12]
-        	prefetcht1	0x12345678
-
-// CHECK: prefetcht2	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x18,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: prefetcht2	32493
-// CHECK:  encoding: [0x0f,0x18,0x1d,0xed,0x7e,0x00,0x00]
-        	prefetcht2	0x7eed
-
-// CHECK: prefetcht2	3133065982
-// CHECK:  encoding: [0x0f,0x18,0x1d,0xfe,0xca,0xbe,0xba]
-        	prefetcht2	0xbabecafe
-
-// CHECK: prefetcht2	305419896
-// CHECK:  encoding: [0x0f,0x18,0x1d,0x78,0x56,0x34,0x12]
-        	prefetcht2	0x12345678
-
-// CHECK: psadbw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf6,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psadbw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psadbw	69, %mm3
-// CHECK:  encoding: [0x0f,0xf6,0x1d,0x45,0x00,0x00,0x00]
-        	psadbw	0x45,%mm3
-
-// CHECK: psadbw	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf6,0x1d,0xed,0x7e,0x00,0x00]
-        	psadbw	0x7eed,%mm3
-
-// CHECK: psadbw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf6,0x1d,0xfe,0xca,0xbe,0xba]
-        	psadbw	0xbabecafe,%mm3
-
-// CHECK: psadbw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf6,0x1d,0x78,0x56,0x34,0x12]
-        	psadbw	0x12345678,%mm3
-
-// CHECK: psadbw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf6,0xdb]
-        	psadbw	%mm3,%mm3
-
-// CHECK: psadbw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf6,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psadbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psadbw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0x45,0x00,0x00,0x00]
-        	psadbw	0x45,%xmm5
-
-// CHECK: psadbw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0xed,0x7e,0x00,0x00]
-        	psadbw	0x7eed,%xmm5
-
-// CHECK: psadbw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0xfe,0xca,0xbe,0xba]
-        	psadbw	0xbabecafe,%xmm5
-
-// CHECK: psadbw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0x78,0x56,0x34,0x12]
-        	psadbw	0x12345678,%xmm5
-
-// CHECK: psadbw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf6,0xed]
-        	psadbw	%xmm5,%xmm5
-
-// CHECK: rcpps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x53,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: rcpps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x53,0x2d,0x45,0x00,0x00,0x00]
-        	rcpps	0x45,%xmm5
-
-// CHECK: rcpps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x53,0x2d,0xed,0x7e,0x00,0x00]
-        	rcpps	0x7eed,%xmm5
-
-// CHECK: rcpps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x53,0x2d,0xfe,0xca,0xbe,0xba]
-        	rcpps	0xbabecafe,%xmm5
-
-// CHECK: rcpps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x53,0x2d,0x78,0x56,0x34,0x12]
-        	rcpps	0x12345678,%xmm5
-
-// CHECK: rcpps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x53,0xed]
-        	rcpps	%xmm5,%xmm5
-
-// CHECK: rcpss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x53,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: rcpss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0x45,0x00,0x00,0x00]
-        	rcpss	0x45,%xmm5
-
-// CHECK: rcpss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0xed,0x7e,0x00,0x00]
-        	rcpss	0x7eed,%xmm5
-
-// CHECK: rcpss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0xfe,0xca,0xbe,0xba]
-        	rcpss	0xbabecafe,%xmm5
-
-// CHECK: rcpss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0x78,0x56,0x34,0x12]
-        	rcpss	0x12345678,%xmm5
-
-// CHECK: rcpss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x53,0xed]
-        	rcpss	%xmm5,%xmm5
-
-// CHECK: rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x52,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: rsqrtps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x52,0x2d,0x45,0x00,0x00,0x00]
-        	rsqrtps	0x45,%xmm5
-
-// CHECK: rsqrtps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x52,0x2d,0xed,0x7e,0x00,0x00]
-        	rsqrtps	0x7eed,%xmm5
-
-// CHECK: rsqrtps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x52,0x2d,0xfe,0xca,0xbe,0xba]
-        	rsqrtps	0xbabecafe,%xmm5
-
-// CHECK: rsqrtps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x52,0x2d,0x78,0x56,0x34,0x12]
-        	rsqrtps	0x12345678,%xmm5
-
-// CHECK: rsqrtps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x52,0xed]
-        	rsqrtps	%xmm5,%xmm5
-
-// CHECK: rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x52,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: rsqrtss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0x45,0x00,0x00,0x00]
-        	rsqrtss	0x45,%xmm5
-
-// CHECK: rsqrtss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0xed,0x7e,0x00,0x00]
-        	rsqrtss	0x7eed,%xmm5
-
-// CHECK: rsqrtss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0xfe,0xca,0xbe,0xba]
-        	rsqrtss	0xbabecafe,%xmm5
-
-// CHECK: rsqrtss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0x78,0x56,0x34,0x12]
-        	rsqrtss	0x12345678,%xmm5
-
-// CHECK: rsqrtss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x52,0xed]
-        	rsqrtss	%xmm5,%xmm5
-
-// CHECK: sqrtps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: sqrtps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
-        	sqrtps	0x45,%xmm5
-
-// CHECK: sqrtps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
-        	sqrtps	0x7eed,%xmm5
-
-// CHECK: sqrtps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
-        	sqrtps	0xbabecafe,%xmm5
-
-// CHECK: sqrtps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
-        	sqrtps	0x12345678,%xmm5
-
-// CHECK: sqrtps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x51,0xed]
-        	sqrtps	%xmm5,%xmm5
-
-// CHECK: sqrtss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: sqrtss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
-        	sqrtss	0x45,%xmm5
-
-// CHECK: sqrtss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
-        	sqrtss	0x7eed,%xmm5
-
-// CHECK: sqrtss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
-        	sqrtss	0xbabecafe,%xmm5
-
-// CHECK: sqrtss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
-        	sqrtss	0x12345678,%xmm5
-
-// CHECK: sqrtss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x51,0xed]
-        	sqrtss	%xmm5,%xmm5
-
-// CHECK: stmxcsr	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xae,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: stmxcsr	32493
-// CHECK:  encoding: [0x0f,0xae,0x1d,0xed,0x7e,0x00,0x00]
-        	stmxcsr	0x7eed
-
-// CHECK: stmxcsr	3133065982
-// CHECK:  encoding: [0x0f,0xae,0x1d,0xfe,0xca,0xbe,0xba]
-        	stmxcsr	0xbabecafe
-
-// CHECK: stmxcsr	305419896
-// CHECK:  encoding: [0x0f,0xae,0x1d,0x78,0x56,0x34,0x12]
-        	stmxcsr	0x12345678
-
-// CHECK: subps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	subps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: subps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
-        	subps	0x45,%xmm5
-
-// CHECK: subps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
-        	subps	0x7eed,%xmm5
-
-// CHECK: subps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
-        	subps	0xbabecafe,%xmm5
-
-// CHECK: subps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
-        	subps	0x12345678,%xmm5
-
-// CHECK: subps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x5c,0xed]
-        	subps	%xmm5,%xmm5
-
-// CHECK: subss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	subss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: subss	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
-        	subss	0x45,%xmm5
-
-// CHECK: subss	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
-        	subss	0x7eed,%xmm5
-
-// CHECK: subss	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
-        	subss	0xbabecafe,%xmm5
-
-// CHECK: subss	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
-        	subss	0x12345678,%xmm5
-
-// CHECK: subss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5c,0xed]
-        	subss	%xmm5,%xmm5
-
-// CHECK: ucomiss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x2e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: ucomiss	69, %xmm5
-// CHECK:  encoding: [0x0f,0x2e,0x2d,0x45,0x00,0x00,0x00]
-        	ucomiss	0x45,%xmm5
-
-// CHECK: ucomiss	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x2e,0x2d,0xed,0x7e,0x00,0x00]
-        	ucomiss	0x7eed,%xmm5
-
-// CHECK: ucomiss	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x2e,0x2d,0xfe,0xca,0xbe,0xba]
-        	ucomiss	0xbabecafe,%xmm5
-
-// CHECK: ucomiss	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x2e,0x2d,0x78,0x56,0x34,0x12]
-        	ucomiss	0x12345678,%xmm5
-
-// CHECK: ucomiss	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x2e,0xed]
-        	ucomiss	%xmm5,%xmm5
-
-// CHECK: unpckhps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x15,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	unpckhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: unpckhps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x15,0x2d,0x45,0x00,0x00,0x00]
-        	unpckhps	0x45,%xmm5
-
-// CHECK: unpckhps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x15,0x2d,0xed,0x7e,0x00,0x00]
-        	unpckhps	0x7eed,%xmm5
-
-// CHECK: unpckhps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x15,0x2d,0xfe,0xca,0xbe,0xba]
-        	unpckhps	0xbabecafe,%xmm5
-
-// CHECK: unpckhps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x15,0x2d,0x78,0x56,0x34,0x12]
-        	unpckhps	0x12345678,%xmm5
-
-// CHECK: unpckhps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x15,0xed]
-        	unpckhps	%xmm5,%xmm5
-
-// CHECK: unpcklps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x14,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	unpcklps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: unpcklps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x14,0x2d,0x45,0x00,0x00,0x00]
-        	unpcklps	0x45,%xmm5
-
-// CHECK: unpcklps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x14,0x2d,0xed,0x7e,0x00,0x00]
-        	unpcklps	0x7eed,%xmm5
-
-// CHECK: unpcklps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x14,0x2d,0xfe,0xca,0xbe,0xba]
-        	unpcklps	0xbabecafe,%xmm5
-
-// CHECK: unpcklps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x14,0x2d,0x78,0x56,0x34,0x12]
-        	unpcklps	0x12345678,%xmm5
-
-// CHECK: unpcklps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x14,0xed]
-        	unpcklps	%xmm5,%xmm5
-
-// CHECK: xorps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x57,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	xorps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: xorps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x57,0x2d,0x45,0x00,0x00,0x00]
-        	xorps	0x45,%xmm5
-
-// CHECK: xorps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x57,0x2d,0xed,0x7e,0x00,0x00]
-        	xorps	0x7eed,%xmm5
-
-// CHECK: xorps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x57,0x2d,0xfe,0xca,0xbe,0xba]
-        	xorps	0xbabecafe,%xmm5
-
-// CHECK: xorps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x57,0x2d,0x78,0x56,0x34,0x12]
-        	xorps	0x12345678,%xmm5
-
-// CHECK: xorps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x57,0xed]
-        	xorps	%xmm5,%xmm5
-
-// CHECK: addpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	addpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: addpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
-        	addpd	0x45,%xmm5
-
-// CHECK: addpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
-        	addpd	0x7eed,%xmm5
-
-// CHECK: addpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
-        	addpd	0xbabecafe,%xmm5
-
-// CHECK: addpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
-        	addpd	0x12345678,%xmm5
-
-// CHECK: addpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x58,0xed]
-        	addpd	%xmm5,%xmm5
-
-// CHECK: addsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	addsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: addsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
-        	addsd	0x45,%xmm5
-
-// CHECK: addsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
-        	addsd	0x7eed,%xmm5
-
-// CHECK: addsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
-        	addsd	0xbabecafe,%xmm5
-
-// CHECK: addsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
-        	addsd	0x12345678,%xmm5
-
-// CHECK: addsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x58,0xed]
-        	addsd	%xmm5,%xmm5
-
-// CHECK: andnpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x55,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	andnpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: andnpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0x45,0x00,0x00,0x00]
-        	andnpd	0x45,%xmm5
-
-// CHECK: andnpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0xed,0x7e,0x00,0x00]
-        	andnpd	0x7eed,%xmm5
-
-// CHECK: andnpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0xfe,0xca,0xbe,0xba]
-        	andnpd	0xbabecafe,%xmm5
-
-// CHECK: andnpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0x78,0x56,0x34,0x12]
-        	andnpd	0x12345678,%xmm5
-
-// CHECK: andnpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x55,0xed]
-        	andnpd	%xmm5,%xmm5
-
-// CHECK: andpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x54,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	andpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: andpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0x45,0x00,0x00,0x00]
-        	andpd	0x45,%xmm5
-
-// CHECK: andpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0xed,0x7e,0x00,0x00]
-        	andpd	0x7eed,%xmm5
-
-// CHECK: andpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0xfe,0xca,0xbe,0xba]
-        	andpd	0xbabecafe,%xmm5
-
-// CHECK: andpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0x78,0x56,0x34,0x12]
-        	andpd	0x12345678,%xmm5
-
-// CHECK: andpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x54,0xed]
-        	andpd	%xmm5,%xmm5
-
-// CHECK: comisd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: comisd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0x45,0x00,0x00,0x00]
-        	comisd	0x45,%xmm5
-
-// CHECK: comisd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0xed,0x7e,0x00,0x00]
-        	comisd	0x7eed,%xmm5
-
-// CHECK: comisd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0xfe,0xca,0xbe,0xba]
-        	comisd	0xbabecafe,%xmm5
-
-// CHECK: comisd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0x78,0x56,0x34,0x12]
-        	comisd	0x12345678,%xmm5
-
-// CHECK: comisd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2f,0xed]
-        	comisd	%xmm5,%xmm5
-
-// CHECK: cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtpi2pd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtpi2pd	0x45,%xmm5
-
-// CHECK: cvtpi2pd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtpi2pd	0x7eed,%xmm5
-
-// CHECK: cvtpi2pd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtpi2pd	0xbabecafe,%xmm5
-
-// CHECK: cvtpi2pd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtpi2pd	0x12345678,%xmm5
-
-// CHECK: cvtpi2pd	%mm3, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2a,0xeb]
-        	cvtpi2pd	%mm3,%xmm5
-
-// CHECK: cvtsi2sd	%ecx, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x2a,0xe9]
-        	cvtsi2sd	%ecx,%xmm5
-
-// CHECK: cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtsi2sd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtsi2sd	0x45,%xmm5
-
-// CHECK: cvtsi2sd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtsi2sd	0x7eed,%xmm5
-
-// CHECK: cvtsi2sd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtsi2sd	0xbabecafe,%xmm5
-
-// CHECK: cvtsi2sd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtsi2sd	0x12345678,%xmm5
-
-// CHECK: divpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: divpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
-        	divpd	0x45,%xmm5
-
-// CHECK: divpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
-        	divpd	0x7eed,%xmm5
-
-// CHECK: divpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
-        	divpd	0xbabecafe,%xmm5
-
-// CHECK: divpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
-        	divpd	0x12345678,%xmm5
-
-// CHECK: divpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5e,0xed]
-        	divpd	%xmm5,%xmm5
-
-// CHECK: divsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	divsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: divsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
-        	divsd	0x45,%xmm5
-
-// CHECK: divsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
-        	divsd	0x7eed,%xmm5
-
-// CHECK: divsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
-        	divsd	0xbabecafe,%xmm5
-
-// CHECK: divsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
-        	divsd	0x12345678,%xmm5
-
-// CHECK: divsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5e,0xed]
-        	divsd	%xmm5,%xmm5
-
-// CHECK: maxpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	maxpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: maxpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
-        	maxpd	0x45,%xmm5
-
-// CHECK: maxpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
-        	maxpd	0x7eed,%xmm5
-
-// CHECK: maxpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
-        	maxpd	0xbabecafe,%xmm5
-
-// CHECK: maxpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
-        	maxpd	0x12345678,%xmm5
-
-// CHECK: maxpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5f,0xed]
-        	maxpd	%xmm5,%xmm5
-
-// CHECK: maxsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	maxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: maxsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
-        	maxsd	0x45,%xmm5
-
-// CHECK: maxsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
-        	maxsd	0x7eed,%xmm5
-
-// CHECK: maxsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
-        	maxsd	0xbabecafe,%xmm5
-
-// CHECK: maxsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
-        	maxsd	0x12345678,%xmm5
-
-// CHECK: maxsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5f,0xed]
-        	maxsd	%xmm5,%xmm5
-
-// CHECK: minpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	minpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: minpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
-        	minpd	0x45,%xmm5
-
-// CHECK: minpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
-        	minpd	0x7eed,%xmm5
-
-// CHECK: minpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
-        	minpd	0xbabecafe,%xmm5
-
-// CHECK: minpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
-        	minpd	0x12345678,%xmm5
-
-// CHECK: minpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5d,0xed]
-        	minpd	%xmm5,%xmm5
-
-// CHECK: minsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	minsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: minsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
-        	minsd	0x45,%xmm5
-
-// CHECK: minsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
-        	minsd	0x7eed,%xmm5
-
-// CHECK: minsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
-        	minsd	0xbabecafe,%xmm5
-
-// CHECK: minsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
-        	minsd	0x12345678,%xmm5
-
-// CHECK: minsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5d,0xed]
-        	minsd	%xmm5,%xmm5
-
-// CHECK: movapd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movapd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0x45,0x00,0x00,0x00]
-        	movapd	0x45,%xmm5
-
-// CHECK: movapd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0xed,0x7e,0x00,0x00]
-        	movapd	0x7eed,%xmm5
-
-// CHECK: movapd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0xfe,0xca,0xbe,0xba]
-        	movapd	0xbabecafe,%xmm5
-
-// CHECK: movapd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0x78,0x56,0x34,0x12]
-        	movapd	0x12345678,%xmm5
-
-// CHECK: movapd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0xed]
-        	movapd	%xmm5,%xmm5
-
-// CHECK: movapd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movapd	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0x45,0x00,0x00,0x00]
-        	movapd	%xmm5,0x45
-
-// CHECK: movapd	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0xed,0x7e,0x00,0x00]
-        	movapd	%xmm5,0x7eed
-
-// CHECK: movapd	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0xfe,0xca,0xbe,0xba]
-        	movapd	%xmm5,0xbabecafe
-
-// CHECK: movapd	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0x78,0x56,0x34,0x12]
-        	movapd	%xmm5,0x12345678
-
-// CHECK: movapd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x28,0xed]
-        	movapd	%xmm5,%xmm5
-
-// CHECK: movhpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movhpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
-        	movhpd	0x45,%xmm5
-
-// CHECK: movhpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
-        	movhpd	0x7eed,%xmm5
-
-// CHECK: movhpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
-        	movhpd	0xbabecafe,%xmm5
-
-// CHECK: movhpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
-        	movhpd	0x12345678,%xmm5
-
-// CHECK: movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movhpd	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0x45,0x00,0x00,0x00]
-        	movhpd	%xmm5,0x45
-
-// CHECK: movhpd	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0xed,0x7e,0x00,0x00]
-        	movhpd	%xmm5,0x7eed
-
-// CHECK: movhpd	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0xfe,0xca,0xbe,0xba]
-        	movhpd	%xmm5,0xbabecafe
-
-// CHECK: movhpd	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0x78,0x56,0x34,0x12]
-        	movhpd	%xmm5,0x12345678
-
-// CHECK: movlpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movlpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movlpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
-        	movlpd	0x45,%xmm5
-
-// CHECK: movlpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
-        	movlpd	0x7eed,%xmm5
-
-// CHECK: movlpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
-        	movlpd	0xbabecafe,%xmm5
-
-// CHECK: movlpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
-        	movlpd	0x12345678,%xmm5
-
-// CHECK: movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x13,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movlpd	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0x45,0x00,0x00,0x00]
-        	movlpd	%xmm5,0x45
-
-// CHECK: movlpd	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0xed,0x7e,0x00,0x00]
-        	movlpd	%xmm5,0x7eed
-
-// CHECK: movlpd	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0xfe,0xca,0xbe,0xba]
-        	movlpd	%xmm5,0xbabecafe
-
-// CHECK: movlpd	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0x78,0x56,0x34,0x12]
-        	movlpd	%xmm5,0x12345678
-
-// CHECK: movmskpd	%xmm5, %ecx
-// CHECK:  encoding: [0x66,0x0f,0x50,0xcd]
-        	movmskpd	%xmm5,%ecx
-
-// CHECK: movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movntpd	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0x45,0x00,0x00,0x00]
-        	movntpd	%xmm5,0x45
-
-// CHECK: movntpd	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0xed,0x7e,0x00,0x00]
-        	movntpd	%xmm5,0x7eed
-
-// CHECK: movntpd	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
-        	movntpd	%xmm5,0xbabecafe
-
-// CHECK: movntpd	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0x78,0x56,0x34,0x12]
-        	movntpd	%xmm5,0x12345678
-
-// CHECK: movsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
-        	movsd	0x45,%xmm5
-
-// CHECK: movsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
-        	movsd	0x7eed,%xmm5
-
-// CHECK: movsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
-        	movsd	0xbabecafe,%xmm5
-
-// CHECK: movsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
-        	movsd	0x12345678,%xmm5
-
-// CHECK: movsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0xed]
-        	movsd	%xmm5,%xmm5
-
-// CHECK: movsd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf2,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movsd	%xmm5, 69
-// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
-        	movsd	%xmm5,0x45
-
-// CHECK: movsd	%xmm5, 32493
-// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
-        	movsd	%xmm5,0x7eed
-
-// CHECK: movsd	%xmm5, 3133065982
-// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
-        	movsd	%xmm5,0xbabecafe
-
-// CHECK: movsd	%xmm5, 305419896
-// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
-        	movsd	%xmm5,0x12345678
-
-// CHECK: movsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x10,0xed]
-        	movsd	%xmm5,%xmm5
-
-// CHECK: movupd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movupd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
-        	movupd	0x45,%xmm5
-
-// CHECK: movupd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
-        	movupd	0x7eed,%xmm5
-
-// CHECK: movupd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
-        	movupd	0xbabecafe,%xmm5
-
-// CHECK: movupd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
-        	movupd	0x12345678,%xmm5
-
-// CHECK: movupd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0xed]
-        	movupd	%xmm5,%xmm5
-
-// CHECK: movupd	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movupd	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
-        	movupd	%xmm5,0x45
-
-// CHECK: movupd	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
-        	movupd	%xmm5,0x7eed
-
-// CHECK: movupd	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
-        	movupd	%xmm5,0xbabecafe
-
-// CHECK: movupd	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
-        	movupd	%xmm5,0x12345678
-
-// CHECK: movupd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x10,0xed]
-        	movupd	%xmm5,%xmm5
-
-// CHECK: mulpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	mulpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: mulpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
-        	mulpd	0x45,%xmm5
-
-// CHECK: mulpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
-        	mulpd	0x7eed,%xmm5
-
-// CHECK: mulpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
-        	mulpd	0xbabecafe,%xmm5
-
-// CHECK: mulpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
-        	mulpd	0x12345678,%xmm5
-
-// CHECK: mulpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x59,0xed]
-        	mulpd	%xmm5,%xmm5
-
-// CHECK: mulsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	mulsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: mulsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
-        	mulsd	0x45,%xmm5
-
-// CHECK: mulsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
-        	mulsd	0x7eed,%xmm5
-
-// CHECK: mulsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
-        	mulsd	0xbabecafe,%xmm5
-
-// CHECK: mulsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
-        	mulsd	0x12345678,%xmm5
-
-// CHECK: mulsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x59,0xed]
-        	mulsd	%xmm5,%xmm5
-
-// CHECK: orpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x56,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	orpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: orpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0x45,0x00,0x00,0x00]
-        	orpd	0x45,%xmm5
-
-// CHECK: orpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0xed,0x7e,0x00,0x00]
-        	orpd	0x7eed,%xmm5
-
-// CHECK: orpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0xfe,0xca,0xbe,0xba]
-        	orpd	0xbabecafe,%xmm5
-
-// CHECK: orpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0x78,0x56,0x34,0x12]
-        	orpd	0x12345678,%xmm5
-
-// CHECK: orpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x56,0xed]
-        	orpd	%xmm5,%xmm5
-
-// CHECK: sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: sqrtpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
-        	sqrtpd	0x45,%xmm5
-
-// CHECK: sqrtpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
-        	sqrtpd	0x7eed,%xmm5
-
-// CHECK: sqrtpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
-        	sqrtpd	0xbabecafe,%xmm5
-
-// CHECK: sqrtpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
-        	sqrtpd	0x12345678,%xmm5
-
-// CHECK: sqrtpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x51,0xed]
-        	sqrtpd	%xmm5,%xmm5
-
-// CHECK: sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: sqrtsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
-        	sqrtsd	0x45,%xmm5
-
-// CHECK: sqrtsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
-        	sqrtsd	0x7eed,%xmm5
-
-// CHECK: sqrtsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
-        	sqrtsd	0xbabecafe,%xmm5
-
-// CHECK: sqrtsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
-        	sqrtsd	0x12345678,%xmm5
-
-// CHECK: sqrtsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x51,0xed]
-        	sqrtsd	%xmm5,%xmm5
-
-// CHECK: subpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	subpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: subpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
-        	subpd	0x45,%xmm5
-
-// CHECK: subpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
-        	subpd	0x7eed,%xmm5
-
-// CHECK: subpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
-        	subpd	0xbabecafe,%xmm5
-
-// CHECK: subpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
-        	subpd	0x12345678,%xmm5
-
-// CHECK: subpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5c,0xed]
-        	subpd	%xmm5,%xmm5
-
-// CHECK: subsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	subsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: subsd	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
-        	subsd	0x45,%xmm5
-
-// CHECK: subsd	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
-        	subsd	0x7eed,%xmm5
-
-// CHECK: subsd	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
-        	subsd	0xbabecafe,%xmm5
-
-// CHECK: subsd	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
-        	subsd	0x12345678,%xmm5
-
-// CHECK: subsd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5c,0xed]
-        	subsd	%xmm5,%xmm5
-
-// CHECK: ucomisd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: ucomisd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0x45,0x00,0x00,0x00]
-        	ucomisd	0x45,%xmm5
-
-// CHECK: ucomisd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0xed,0x7e,0x00,0x00]
-        	ucomisd	0x7eed,%xmm5
-
-// CHECK: ucomisd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0xfe,0xca,0xbe,0xba]
-        	ucomisd	0xbabecafe,%xmm5
-
-// CHECK: ucomisd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0x78,0x56,0x34,0x12]
-        	ucomisd	0x12345678,%xmm5
-
-// CHECK: ucomisd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x2e,0xed]
-        	ucomisd	%xmm5,%xmm5
-
-// CHECK: unpckhpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x15,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	unpckhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: unpckhpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0x45,0x00,0x00,0x00]
-        	unpckhpd	0x45,%xmm5
-
-// CHECK: unpckhpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0xed,0x7e,0x00,0x00]
-        	unpckhpd	0x7eed,%xmm5
-
-// CHECK: unpckhpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0xfe,0xca,0xbe,0xba]
-        	unpckhpd	0xbabecafe,%xmm5
-
-// CHECK: unpckhpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0x78,0x56,0x34,0x12]
-        	unpckhpd	0x12345678,%xmm5
-
-// CHECK: unpckhpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x15,0xed]
-        	unpckhpd	%xmm5,%xmm5
-
-// CHECK: unpcklpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x14,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	unpcklpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: unpcklpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0x45,0x00,0x00,0x00]
-        	unpcklpd	0x45,%xmm5
-
-// CHECK: unpcklpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0xed,0x7e,0x00,0x00]
-        	unpcklpd	0x7eed,%xmm5
-
-// CHECK: unpcklpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0xfe,0xca,0xbe,0xba]
-        	unpcklpd	0xbabecafe,%xmm5
-
-// CHECK: unpcklpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0x78,0x56,0x34,0x12]
-        	unpcklpd	0x12345678,%xmm5
-
-// CHECK: unpcklpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x14,0xed]
-        	unpcklpd	%xmm5,%xmm5
-
-// CHECK: xorpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x57,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	xorpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: xorpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0x45,0x00,0x00,0x00]
-        	xorpd	0x45,%xmm5
-
-// CHECK: xorpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0xed,0x7e,0x00,0x00]
-        	xorpd	0x7eed,%xmm5
-
-// CHECK: xorpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0xfe,0xca,0xbe,0xba]
-        	xorpd	0xbabecafe,%xmm5
-
-// CHECK: xorpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0x78,0x56,0x34,0x12]
-        	xorpd	0x12345678,%xmm5
-
-// CHECK: xorpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x57,0xed]
-        	xorpd	%xmm5,%xmm5
-
-// CHECK: cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtdq2pd	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0x45,0x00,0x00,0x00]
-        	cvtdq2pd	0x45,%xmm5
-
-// CHECK: cvtdq2pd	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtdq2pd	0x7eed,%xmm5
-
-// CHECK: cvtdq2pd	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtdq2pd	0xbabecafe,%xmm5
-
-// CHECK: cvtdq2pd	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0x78,0x56,0x34,0x12]
-        	cvtdq2pd	0x12345678,%xmm5
-
-// CHECK: cvtdq2pd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xe6,0xed]
-        	cvtdq2pd	%xmm5,%xmm5
-
-// CHECK: cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtpd2dq	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0x45,0x00,0x00,0x00]
-        	cvtpd2dq	0x45,%xmm5
-
-// CHECK: cvtpd2dq	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtpd2dq	0x7eed,%xmm5
-
-// CHECK: cvtpd2dq	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtpd2dq	0xbabecafe,%xmm5
-
-// CHECK: cvtpd2dq	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0x78,0x56,0x34,0x12]
-        	cvtpd2dq	0x12345678,%xmm5
-
-// CHECK: cvtpd2dq	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xe6,0xed]
-        	cvtpd2dq	%xmm5,%xmm5
-
-// CHECK: cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtdq2ps	69, %xmm5
-// CHECK:  encoding: [0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
-        	cvtdq2ps	0x45,%xmm5
-
-// CHECK: cvtdq2ps	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtdq2ps	0x7eed,%xmm5
-
-// CHECK: cvtdq2ps	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtdq2ps	0xbabecafe,%xmm5
-
-// CHECK: cvtdq2ps	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
-        	cvtdq2ps	0x12345678,%xmm5
-
-// CHECK: cvtdq2ps	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x5b,0xed]
-        	cvtdq2ps	%xmm5,%xmm5
-
-// CHECK: cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: cvtpd2pi	69, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0x45,0x00,0x00,0x00]
-        	cvtpd2pi	0x45,%mm3
-
-// CHECK: cvtpd2pi	32493, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0xed,0x7e,0x00,0x00]
-        	cvtpd2pi	0x7eed,%mm3
-
-// CHECK: cvtpd2pi	3133065982, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0xfe,0xca,0xbe,0xba]
-        	cvtpd2pi	0xbabecafe,%mm3
-
-// CHECK: cvtpd2pi	305419896, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0x78,0x56,0x34,0x12]
-        	cvtpd2pi	0x12345678,%mm3
-
-// CHECK: cvtpd2pi	%xmm5, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2d,0xdd]
-        	cvtpd2pi	%xmm5,%mm3
-
-// CHECK: cvtpd2ps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtpd2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtpd2ps	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtpd2ps	0x45,%xmm5
-
-// CHECK: cvtpd2ps	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtpd2ps	0x7eed,%xmm5
-
-// CHECK: cvtpd2ps	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtpd2ps	0xbabecafe,%xmm5
-
-// CHECK: cvtpd2ps	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtpd2ps	0x12345678,%xmm5
-
-// CHECK: cvtpd2ps	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5a,0xed]
-        	cvtpd2ps	%xmm5,%xmm5
-
-// CHECK: cvtps2pd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtps2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtps2pd	69, %xmm5
-// CHECK:  encoding: [0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtps2pd	0x45,%xmm5
-
-// CHECK: cvtps2pd	32493, %xmm5
-// CHECK:  encoding: [0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtps2pd	0x7eed,%xmm5
-
-// CHECK: cvtps2pd	3133065982, %xmm5
-// CHECK:  encoding: [0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtps2pd	0xbabecafe,%xmm5
-
-// CHECK: cvtps2pd	305419896, %xmm5
-// CHECK:  encoding: [0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtps2pd	0x12345678,%xmm5
-
-// CHECK: cvtps2pd	%xmm5, %xmm5
-// CHECK:  encoding: [0x0f,0x5a,0xed]
-        	cvtps2pd	%xmm5,%xmm5
-
-// CHECK: cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtps2dq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
-        	cvtps2dq	0x45,%xmm5
-
-// CHECK: cvtps2dq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtps2dq	0x7eed,%xmm5
-
-// CHECK: cvtps2dq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtps2dq	0xbabecafe,%xmm5
-
-// CHECK: cvtps2dq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
-        	cvtps2dq	0x12345678,%xmm5
-
-// CHECK: cvtps2dq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x5b,0xed]
-        	cvtps2dq	%xmm5,%xmm5
-
-// CHECK: cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtsd2ss	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtsd2ss	0x45,%xmm5
-
-// CHECK: cvtsd2ss	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtsd2ss	0x7eed,%xmm5
-
-// CHECK: cvtsd2ss	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtsd2ss	0xbabecafe,%xmm5
-
-// CHECK: cvtsd2ss	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtsd2ss	0x12345678,%xmm5
-
-// CHECK: cvtsd2ss	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x5a,0xed]
-        	cvtsd2ss	%xmm5,%xmm5
-
-// CHECK: cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvtss2sd	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtss2sd	0x45,%xmm5
-
-// CHECK: cvtss2sd	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtss2sd	0x7eed,%xmm5
-
-// CHECK: cvtss2sd	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtss2sd	0xbabecafe,%xmm5
-
-// CHECK: cvtss2sd	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtss2sd	0x12345678,%xmm5
-
-// CHECK: cvtss2sd	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5a,0xed]
-        	cvtss2sd	%xmm5,%xmm5
-
-// CHECK: cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: cvttpd2pi	69, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0x45,0x00,0x00,0x00]
-        	cvttpd2pi	0x45,%mm3
-
-// CHECK: cvttpd2pi	32493, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0xed,0x7e,0x00,0x00]
-        	cvttpd2pi	0x7eed,%mm3
-
-// CHECK: cvttpd2pi	3133065982, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0xfe,0xca,0xbe,0xba]
-        	cvttpd2pi	0xbabecafe,%mm3
-
-// CHECK: cvttpd2pi	305419896, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0x78,0x56,0x34,0x12]
-        	cvttpd2pi	0x12345678,%mm3
-
-// CHECK: cvttpd2pi	%xmm5, %mm3
-// CHECK:  encoding: [0x66,0x0f,0x2c,0xdd]
-        	cvttpd2pi	%xmm5,%mm3
-
-// CHECK: cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x2c,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: cvttsd2si	69, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0x45,0x00,0x00,0x00]
-        	cvttsd2si	0x45,%ecx
-
-// CHECK: cvttsd2si	32493, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0xed,0x7e,0x00,0x00]
-        	cvttsd2si	0x7eed,%ecx
-
-// CHECK: cvttsd2si	3133065982, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0xfe,0xca,0xbe,0xba]
-        	cvttsd2si	0xbabecafe,%ecx
-
-// CHECK: cvttsd2si	305419896, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0x78,0x56,0x34,0x12]
-        	cvttsd2si	0x12345678,%ecx
-
-// CHECK: cvttsd2si	%xmm5, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x2c,0xcd]
-        	cvttsd2si	%xmm5,%ecx
-
-// CHECK: cvttps2dq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvttps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: cvttps2dq	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
-        	cvttps2dq	0x45,%xmm5
-
-// CHECK: cvttps2dq	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
-        	cvttps2dq	0x7eed,%xmm5
-
-// CHECK: cvttps2dq	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvttps2dq	0xbabecafe,%xmm5
-
-// CHECK: cvttps2dq	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
-        	cvttps2dq	0x12345678,%xmm5
-
-// CHECK: cvttps2dq	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x5b,0xed]
-        	cvttps2dq	%xmm5,%xmm5
-
-// CHECK: maskmovdqu	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf7,0xed]
-        	maskmovdqu	%xmm5,%xmm5
-
-// CHECK: movdqa	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movdqa	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0x45,0x00,0x00,0x00]
-        	movdqa	0x45,%xmm5
-
-// CHECK: movdqa	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0xed,0x7e,0x00,0x00]
-        	movdqa	0x7eed,%xmm5
-
-// CHECK: movdqa	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0xfe,0xca,0xbe,0xba]
-        	movdqa	0xbabecafe,%xmm5
-
-// CHECK: movdqa	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0x78,0x56,0x34,0x12]
-        	movdqa	0x12345678,%xmm5
-
-// CHECK: movdqa	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0xed]
-        	movdqa	%xmm5,%xmm5
-
-// CHECK: movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0x7f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movdqa	%xmm5, 69
-// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0x45,0x00,0x00,0x00]
-        	movdqa	%xmm5,0x45
-
-// CHECK: movdqa	%xmm5, 32493
-// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0xed,0x7e,0x00,0x00]
-        	movdqa	%xmm5,0x7eed
-
-// CHECK: movdqa	%xmm5, 3133065982
-// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0xfe,0xca,0xbe,0xba]
-        	movdqa	%xmm5,0xbabecafe
-
-// CHECK: movdqa	%xmm5, 305419896
-// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0x78,0x56,0x34,0x12]
-        	movdqa	%xmm5,0x12345678
-
-// CHECK: movdqa	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6f,0xed]
-        	movdqa	%xmm5,%xmm5
-
-// CHECK: movdqu	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x6f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movdqu	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0x45,0x00,0x00,0x00]
-        	movdqu	0x45,%xmm5
-
-// CHECK: movdqu	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0xed,0x7e,0x00,0x00]
-        	movdqu	0x7eed,%xmm5
-
-// CHECK: movdqu	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0xfe,0xca,0xbe,0xba]
-        	movdqu	0xbabecafe,%xmm5
-
-// CHECK: movdqu	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0x78,0x56,0x34,0x12]
-        	movdqu	0x12345678,%xmm5
-
-// CHECK: movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xf3,0x0f,0x7f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: movdqu	%xmm5, 69
-// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0x45,0x00,0x00,0x00]
-        	movdqu	%xmm5,0x45
-
-// CHECK: movdqu	%xmm5, 32493
-// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0xed,0x7e,0x00,0x00]
-        	movdqu	%xmm5,0x7eed
-
-// CHECK: movdqu	%xmm5, 3133065982
-// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0xfe,0xca,0xbe,0xba]
-        	movdqu	%xmm5,0xbabecafe
-
-// CHECK: movdqu	%xmm5, 305419896
-// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0x78,0x56,0x34,0x12]
-        	movdqu	%xmm5,0x12345678
-
-// CHECK: movdq2q	%xmm5, %mm3
-// CHECK:  encoding: [0xf2,0x0f,0xd6,0xdd]
-        	movdq2q	%xmm5,%mm3
-
-// CHECK: movq2dq	%mm3, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0xd6,0xeb]
-        	movq2dq	%mm3,%xmm5
-
-// CHECK: pmuludq	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0xf4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmuludq	69, %mm3
-// CHECK:  encoding: [0x0f,0xf4,0x1d,0x45,0x00,0x00,0x00]
-        	pmuludq	0x45,%mm3
-
-// CHECK: pmuludq	32493, %mm3
-// CHECK:  encoding: [0x0f,0xf4,0x1d,0xed,0x7e,0x00,0x00]
-        	pmuludq	0x7eed,%mm3
-
-// CHECK: pmuludq	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0xf4,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmuludq	0xbabecafe,%mm3
-
-// CHECK: pmuludq	305419896, %mm3
-// CHECK:  encoding: [0x0f,0xf4,0x1d,0x78,0x56,0x34,0x12]
-        	pmuludq	0x12345678,%mm3
-
-// CHECK: pmuludq	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0xf4,0xdb]
-        	pmuludq	%mm3,%mm3
-
-// CHECK: pmuludq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf4,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmuludq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0x45,0x00,0x00,0x00]
-        	pmuludq	0x45,%xmm5
-
-// CHECK: pmuludq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0xed,0x7e,0x00,0x00]
-        	pmuludq	0x7eed,%xmm5
-
-// CHECK: pmuludq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmuludq	0xbabecafe,%xmm5
-
-// CHECK: pmuludq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0x78,0x56,0x34,0x12]
-        	pmuludq	0x12345678,%xmm5
-
-// CHECK: pmuludq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xf4,0xed]
-        	pmuludq	%xmm5,%xmm5
-
-// CHECK: pslldq	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x73,0xfd,0x7f]
-        	pslldq	$0x7f,%xmm5
-
-// CHECK: psrldq	$127, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x73,0xdd,0x7f]
-        	psrldq	$0x7f,%xmm5
-
-// CHECK: punpckhqdq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpckhqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpckhqdq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0x45,0x00,0x00,0x00]
-        	punpckhqdq	0x45,%xmm5
-
-// CHECK: punpckhqdq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0xed,0x7e,0x00,0x00]
-        	punpckhqdq	0x7eed,%xmm5
-
-// CHECK: punpckhqdq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpckhqdq	0xbabecafe,%xmm5
-
-// CHECK: punpckhqdq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0x78,0x56,0x34,0x12]
-        	punpckhqdq	0x12345678,%xmm5
-
-// CHECK: punpckhqdq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6d,0xed]
-        	punpckhqdq	%xmm5,%xmm5
-
-// CHECK: punpcklqdq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	punpcklqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: punpcklqdq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0x45,0x00,0x00,0x00]
-        	punpcklqdq	0x45,%xmm5
-
-// CHECK: punpcklqdq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0xed,0x7e,0x00,0x00]
-        	punpcklqdq	0x7eed,%xmm5
-
-// CHECK: punpcklqdq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0xfe,0xca,0xbe,0xba]
-        	punpcklqdq	0xbabecafe,%xmm5
-
-// CHECK: punpcklqdq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0x78,0x56,0x34,0x12]
-        	punpcklqdq	0x12345678,%xmm5
-
-// CHECK: punpcklqdq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6c,0xed]
-        	punpcklqdq	%xmm5,%xmm5
-
-// CHECK: addsubpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd0,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	addsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: addsubpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0x45,0x00,0x00,0x00]
-        	addsubpd	0x45,%xmm5
-
-// CHECK: addsubpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0xed,0x7e,0x00,0x00]
-        	addsubpd	0x7eed,%xmm5
-
-// CHECK: addsubpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0xfe,0xca,0xbe,0xba]
-        	addsubpd	0xbabecafe,%xmm5
-
-// CHECK: addsubpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0x78,0x56,0x34,0x12]
-        	addsubpd	0x12345678,%xmm5
-
-// CHECK: addsubpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0xd0,0xed]
-        	addsubpd	%xmm5,%xmm5
-
-// CHECK: addsubps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xd0,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	addsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: addsubps	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0x45,0x00,0x00,0x00]
-        	addsubps	0x45,%xmm5
-
-// CHECK: addsubps	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0xed,0x7e,0x00,0x00]
-        	addsubps	0x7eed,%xmm5
-
-// CHECK: addsubps	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0xfe,0xca,0xbe,0xba]
-        	addsubps	0xbabecafe,%xmm5
-
-// CHECK: addsubps	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0x78,0x56,0x34,0x12]
-        	addsubps	0x12345678,%xmm5
-
-// CHECK: addsubps	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xd0,0xed]
-        	addsubps	%xmm5,%xmm5
-
-// CHECK: fisttpl	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdb,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	fisttpl	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: fisttpl	3133065982
-// CHECK:  encoding: [0xdb,0x0d,0xfe,0xca,0xbe,0xba]
-        	fisttpl	0xbabecafe
-
-// CHECK: fisttpl	305419896
-// CHECK:  encoding: [0xdb,0x0d,0x78,0x56,0x34,0x12]
-        	fisttpl	0x12345678
-
-// CHECK: haddpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	haddpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: haddpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0x45,0x00,0x00,0x00]
-        	haddpd	0x45,%xmm5
-
-// CHECK: haddpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0xed,0x7e,0x00,0x00]
-        	haddpd	0x7eed,%xmm5
-
-// CHECK: haddpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0xfe,0xca,0xbe,0xba]
-        	haddpd	0xbabecafe,%xmm5
-
-// CHECK: haddpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0x78,0x56,0x34,0x12]
-        	haddpd	0x12345678,%xmm5
-
-// CHECK: haddpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7c,0xed]
-        	haddpd	%xmm5,%xmm5
-
-// CHECK: haddps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	haddps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: haddps	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0x45,0x00,0x00,0x00]
-        	haddps	0x45,%xmm5
-
-// CHECK: haddps	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0xed,0x7e,0x00,0x00]
-        	haddps	0x7eed,%xmm5
-
-// CHECK: haddps	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0xfe,0xca,0xbe,0xba]
-        	haddps	0xbabecafe,%xmm5
-
-// CHECK: haddps	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0x78,0x56,0x34,0x12]
-        	haddps	0x12345678,%xmm5
-
-// CHECK: haddps	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7c,0xed]
-        	haddps	%xmm5,%xmm5
-
-// CHECK: hsubpd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	hsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: hsubpd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0x45,0x00,0x00,0x00]
-        	hsubpd	0x45,%xmm5
-
-// CHECK: hsubpd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0xed,0x7e,0x00,0x00]
-        	hsubpd	0x7eed,%xmm5
-
-// CHECK: hsubpd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0xfe,0xca,0xbe,0xba]
-        	hsubpd	0xbabecafe,%xmm5
-
-// CHECK: hsubpd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0x78,0x56,0x34,0x12]
-        	hsubpd	0x12345678,%xmm5
-
-// CHECK: hsubpd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x7d,0xed]
-        	hsubpd	%xmm5,%xmm5
-
-// CHECK: hsubps	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	hsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: hsubps	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0x45,0x00,0x00,0x00]
-        	hsubps	0x45,%xmm5
-
-// CHECK: hsubps	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0xed,0x7e,0x00,0x00]
-        	hsubps	0x7eed,%xmm5
-
-// CHECK: hsubps	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0xfe,0xca,0xbe,0xba]
-        	hsubps	0xbabecafe,%xmm5
-
-// CHECK: hsubps	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0x78,0x56,0x34,0x12]
-        	hsubps	0x12345678,%xmm5
-
-// CHECK: hsubps	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x7d,0xed]
-        	hsubps	%xmm5,%xmm5
-
-// CHECK: lddqu	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xf0,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: lddqu	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0x45,0x00,0x00,0x00]
-        	lddqu	0x45,%xmm5
-
-// CHECK: lddqu	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0xed,0x7e,0x00,0x00]
-        	lddqu	0x7eed,%xmm5
-
-// CHECK: lddqu	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0xfe,0xca,0xbe,0xba]
-        	lddqu	0xbabecafe,%xmm5
-
-// CHECK: lddqu	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0x78,0x56,0x34,0x12]
-        	lddqu	0x12345678,%xmm5
-
-// CHECK: movddup	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movddup	69, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
-        	movddup	0x45,%xmm5
-
-// CHECK: movddup	32493, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
-        	movddup	0x7eed,%xmm5
-
-// CHECK: movddup	3133065982, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
-        	movddup	0xbabecafe,%xmm5
-
-// CHECK: movddup	305419896, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
-        	movddup	0x12345678,%xmm5
-
-// CHECK: movddup	%xmm5, %xmm5
-// CHECK:  encoding: [0xf2,0x0f,0x12,0xed]
-        	movddup	%xmm5,%xmm5
-
-// CHECK: movshdup	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movshdup	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
-        	movshdup	0x45,%xmm5
-
-// CHECK: movshdup	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
-        	movshdup	0x7eed,%xmm5
-
-// CHECK: movshdup	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
-        	movshdup	0xbabecafe,%xmm5
-
-// CHECK: movshdup	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
-        	movshdup	0x12345678,%xmm5
-
-// CHECK: movshdup	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x16,0xed]
-        	movshdup	%xmm5,%xmm5
-
-// CHECK: movsldup	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movsldup	69, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
-        	movsldup	0x45,%xmm5
-
-// CHECK: movsldup	32493, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
-        	movsldup	0x7eed,%xmm5
-
-// CHECK: movsldup	3133065982, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
-        	movsldup	0xbabecafe,%xmm5
-
-// CHECK: movsldup	305419896, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
-        	movsldup	0x12345678,%xmm5
-
-// CHECK: movsldup	%xmm5, %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x12,0xed]
-        	movsldup	%xmm5,%xmm5
-
-// CHECK: vmclear	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x66,0x0f,0xc7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	vmclear	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: vmclear	32493
-// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0xed,0x7e,0x00,0x00]
-        	vmclear	0x7eed
-
-// CHECK: vmclear	3133065982
-// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0xfe,0xca,0xbe,0xba]
-        	vmclear	0xbabecafe
-
-// CHECK: vmclear	305419896
-// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0x78,0x56,0x34,0x12]
-        	vmclear	0x12345678
-
-// CHECK: vmptrld	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xc7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
-        	vmptrld	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: vmptrld	32493
-// CHECK:  encoding: [0x0f,0xc7,0x35,0xed,0x7e,0x00,0x00]
-        	vmptrld	0x7eed
-
-// CHECK: vmptrld	3133065982
-// CHECK:  encoding: [0x0f,0xc7,0x35,0xfe,0xca,0xbe,0xba]
-        	vmptrld	0xbabecafe
-
-// CHECK: vmptrld	305419896
-// CHECK:  encoding: [0x0f,0xc7,0x35,0x78,0x56,0x34,0x12]
-        	vmptrld	0x12345678
-
-// CHECK: vmptrst	3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0xc7,0xbc,0xcb,0xef,0xbe,0xad,0xde]
-        	vmptrst	0xdeadbeef(%ebx,%ecx,8)
-
-// CHECK: vmptrst	32493
-// CHECK:  encoding: [0x0f,0xc7,0x3d,0xed,0x7e,0x00,0x00]
-        	vmptrst	0x7eed
-
-// CHECK: vmptrst	3133065982
-// CHECK:  encoding: [0x0f,0xc7,0x3d,0xfe,0xca,0xbe,0xba]
-        	vmptrst	0xbabecafe
-
-// CHECK: vmptrst	305419896
-// CHECK:  encoding: [0x0f,0xc7,0x3d,0x78,0x56,0x34,0x12]
-        	vmptrst	0x12345678
-
-// CHECK: phaddw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x01,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: phaddw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0x45,0x00,0x00,0x00]
-        	phaddw	0x45,%mm3
-
-// CHECK: phaddw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0xed,0x7e,0x00,0x00]
-        	phaddw	0x7eed,%mm3
-
-// CHECK: phaddw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0xfe,0xca,0xbe,0xba]
-        	phaddw	0xbabecafe,%mm3
-
-// CHECK: phaddw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0x78,0x56,0x34,0x12]
-        	phaddw	0x12345678,%mm3
-
-// CHECK: phaddw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x01,0xdb]
-        	phaddw	%mm3,%mm3
-
-// CHECK: phaddw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phaddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phaddw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0x45,0x00,0x00,0x00]
-        	phaddw	0x45,%xmm5
-
-// CHECK: phaddw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0xed,0x7e,0x00,0x00]
-        	phaddw	0x7eed,%xmm5
-
-// CHECK: phaddw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0xfe,0xca,0xbe,0xba]
-        	phaddw	0xbabecafe,%xmm5
-
-// CHECK: phaddw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0x78,0x56,0x34,0x12]
-        	phaddw	0x12345678,%xmm5
-
-// CHECK: phaddw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0xed]
-        	phaddw	%xmm5,%xmm5
-
-// CHECK: phaddd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x02,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	phaddd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: phaddd	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0x45,0x00,0x00,0x00]
-        	phaddd	0x45,%mm3
-
-// CHECK: phaddd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0xed,0x7e,0x00,0x00]
-        	phaddd	0x7eed,%mm3
-
-// CHECK: phaddd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0xfe,0xca,0xbe,0xba]
-        	phaddd	0xbabecafe,%mm3
-
-// CHECK: phaddd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0x78,0x56,0x34,0x12]
-        	phaddd	0x12345678,%mm3
-
-// CHECK: phaddd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x02,0xdb]
-        	phaddd	%mm3,%mm3
-
-// CHECK: phaddd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phaddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phaddd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0x45,0x00,0x00,0x00]
-        	phaddd	0x45,%xmm5
-
-// CHECK: phaddd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0xed,0x7e,0x00,0x00]
-        	phaddd	0x7eed,%xmm5
-
-// CHECK: phaddd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0xfe,0xca,0xbe,0xba]
-        	phaddd	0xbabecafe,%xmm5
-
-// CHECK: phaddd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0x78,0x56,0x34,0x12]
-        	phaddd	0x12345678,%xmm5
-
-// CHECK: phaddd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0xed]
-        	phaddd	%xmm5,%xmm5
-
-// CHECK: phaddsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x03,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: phaddsw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0x45,0x00,0x00,0x00]
-        	phaddsw	0x45,%mm3
-
-// CHECK: phaddsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0xed,0x7e,0x00,0x00]
-        	phaddsw	0x7eed,%mm3
-
-// CHECK: phaddsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0xfe,0xca,0xbe,0xba]
-        	phaddsw	0xbabecafe,%mm3
-
-// CHECK: phaddsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0x78,0x56,0x34,0x12]
-        	phaddsw	0x12345678,%mm3
-
-// CHECK: phaddsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x03,0xdb]
-        	phaddsw	%mm3,%mm3
-
-// CHECK: phaddsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phaddsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0x45,0x00,0x00,0x00]
-        	phaddsw	0x45,%xmm5
-
-// CHECK: phaddsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0xed,0x7e,0x00,0x00]
-        	phaddsw	0x7eed,%xmm5
-
-// CHECK: phaddsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0xfe,0xca,0xbe,0xba]
-        	phaddsw	0xbabecafe,%xmm5
-
-// CHECK: phaddsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0x78,0x56,0x34,0x12]
-        	phaddsw	0x12345678,%xmm5
-
-// CHECK: phaddsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0xed]
-        	phaddsw	%xmm5,%xmm5
-
-// CHECK: phsubw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x05,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	phsubw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: phsubw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0x45,0x00,0x00,0x00]
-        	phsubw	0x45,%mm3
-
-// CHECK: phsubw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0xed,0x7e,0x00,0x00]
-        	phsubw	0x7eed,%mm3
-
-// CHECK: phsubw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0xfe,0xca,0xbe,0xba]
-        	phsubw	0xbabecafe,%mm3
-
-// CHECK: phsubw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0x78,0x56,0x34,0x12]
-        	phsubw	0x12345678,%mm3
-
-// CHECK: phsubw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x05,0xdb]
-        	phsubw	%mm3,%mm3
-
-// CHECK: phsubw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phsubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phsubw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0x45,0x00,0x00,0x00]
-        	phsubw	0x45,%xmm5
-
-// CHECK: phsubw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0xed,0x7e,0x00,0x00]
-        	phsubw	0x7eed,%xmm5
-
-// CHECK: phsubw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0xfe,0xca,0xbe,0xba]
-        	phsubw	0xbabecafe,%xmm5
-
-// CHECK: phsubw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0x78,0x56,0x34,0x12]
-        	phsubw	0x12345678,%xmm5
-
-// CHECK: phsubw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0xed]
-        	phsubw	%xmm5,%xmm5
-
-// CHECK: phsubd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x06,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	phsubd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: phsubd	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0x45,0x00,0x00,0x00]
-        	phsubd	0x45,%mm3
-
-// CHECK: phsubd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0xed,0x7e,0x00,0x00]
-        	phsubd	0x7eed,%mm3
-
-// CHECK: phsubd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0xfe,0xca,0xbe,0xba]
-        	phsubd	0xbabecafe,%mm3
-
-// CHECK: phsubd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0x78,0x56,0x34,0x12]
-        	phsubd	0x12345678,%mm3
-
-// CHECK: phsubd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x06,0xdb]
-        	phsubd	%mm3,%mm3
-
-// CHECK: phsubd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phsubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phsubd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0x45,0x00,0x00,0x00]
-        	phsubd	0x45,%xmm5
-
-// CHECK: phsubd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0xed,0x7e,0x00,0x00]
-        	phsubd	0x7eed,%xmm5
-
-// CHECK: phsubd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0xfe,0xca,0xbe,0xba]
-        	phsubd	0xbabecafe,%xmm5
-
-// CHECK: phsubd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0x78,0x56,0x34,0x12]
-        	phsubd	0x12345678,%xmm5
-
-// CHECK: phsubd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0xed]
-        	phsubd	%xmm5,%xmm5
-
-// CHECK: phsubsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x07,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: phsubsw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0x45,0x00,0x00,0x00]
-        	phsubsw	0x45,%mm3
-
-// CHECK: phsubsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0xed,0x7e,0x00,0x00]
-        	phsubsw	0x7eed,%mm3
-
-// CHECK: phsubsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0xfe,0xca,0xbe,0xba]
-        	phsubsw	0xbabecafe,%mm3
-
-// CHECK: phsubsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0x78,0x56,0x34,0x12]
-        	phsubsw	0x12345678,%mm3
-
-// CHECK: phsubsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x07,0xdb]
-        	phsubsw	%mm3,%mm3
-
-// CHECK: phsubsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phsubsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0x45,0x00,0x00,0x00]
-        	phsubsw	0x45,%xmm5
-
-// CHECK: phsubsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0xed,0x7e,0x00,0x00]
-        	phsubsw	0x7eed,%xmm5
-
-// CHECK: phsubsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0xfe,0xca,0xbe,0xba]
-        	phsubsw	0xbabecafe,%xmm5
-
-// CHECK: phsubsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0x78,0x56,0x34,0x12]
-        	phsubsw	0x12345678,%xmm5
-
-// CHECK: phsubsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0xed]
-        	phsubsw	%xmm5,%xmm5
-
-// CHECK: pmaddubsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x04,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmaddubsw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0x45,0x00,0x00,0x00]
-        	pmaddubsw	0x45,%mm3
-
-// CHECK: pmaddubsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0xed,0x7e,0x00,0x00]
-        	pmaddubsw	0x7eed,%mm3
-
-// CHECK: pmaddubsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmaddubsw	0xbabecafe,%mm3
-
-// CHECK: pmaddubsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0x78,0x56,0x34,0x12]
-        	pmaddubsw	0x12345678,%mm3
-
-// CHECK: pmaddubsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x04,0xdb]
-        	pmaddubsw	%mm3,%mm3
-
-// CHECK: pmaddubsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaddubsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0x45,0x00,0x00,0x00]
-        	pmaddubsw	0x45,%xmm5
-
-// CHECK: pmaddubsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaddubsw	0x7eed,%xmm5
-
-// CHECK: pmaddubsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaddubsw	0xbabecafe,%xmm5
-
-// CHECK: pmaddubsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0x78,0x56,0x34,0x12]
-        	pmaddubsw	0x12345678,%xmm5
-
-// CHECK: pmaddubsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0xed]
-        	pmaddubsw	%xmm5,%xmm5
-
-// CHECK: pmulhrsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0b,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pmulhrsw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0x45,0x00,0x00,0x00]
-        	pmulhrsw	0x45,%mm3
-
-// CHECK: pmulhrsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0xed,0x7e,0x00,0x00]
-        	pmulhrsw	0x7eed,%mm3
-
-// CHECK: pmulhrsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0xfe,0xca,0xbe,0xba]
-        	pmulhrsw	0xbabecafe,%mm3
-
-// CHECK: pmulhrsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0x78,0x56,0x34,0x12]
-        	pmulhrsw	0x12345678,%mm3
-
-// CHECK: pmulhrsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0b,0xdb]
-        	pmulhrsw	%mm3,%mm3
-
-// CHECK: pmulhrsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmulhrsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0x45,0x00,0x00,0x00]
-        	pmulhrsw	0x45,%xmm5
-
-// CHECK: pmulhrsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0xed,0x7e,0x00,0x00]
-        	pmulhrsw	0x7eed,%xmm5
-
-// CHECK: pmulhrsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmulhrsw	0xbabecafe,%xmm5
-
-// CHECK: pmulhrsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0x78,0x56,0x34,0x12]
-        	pmulhrsw	0x12345678,%xmm5
-
-// CHECK: pmulhrsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0xed]
-        	pmulhrsw	%xmm5,%xmm5
-
-// CHECK: pshufb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x00,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pshufb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pshufb	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0x45,0x00,0x00,0x00]
-        	pshufb	0x45,%mm3
-
-// CHECK: pshufb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0xed,0x7e,0x00,0x00]
-        	pshufb	0x7eed,%mm3
-
-// CHECK: pshufb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0xfe,0xca,0xbe,0xba]
-        	pshufb	0xbabecafe,%mm3
-
-// CHECK: pshufb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0x78,0x56,0x34,0x12]
-        	pshufb	0x12345678,%mm3
-
-// CHECK: pshufb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x00,0xdb]
-        	pshufb	%mm3,%mm3
-
-// CHECK: pshufb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pshufb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pshufb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0x45,0x00,0x00,0x00]
-        	pshufb	0x45,%xmm5
-
-// CHECK: pshufb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0xed,0x7e,0x00,0x00]
-        	pshufb	0x7eed,%xmm5
-
-// CHECK: pshufb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0xfe,0xca,0xbe,0xba]
-        	pshufb	0xbabecafe,%xmm5
-
-// CHECK: pshufb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0x78,0x56,0x34,0x12]
-        	pshufb	0x12345678,%xmm5
-
-// CHECK: pshufb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0xed]
-        	pshufb	%xmm5,%xmm5
-
-// CHECK: psignb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x08,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psignb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psignb	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0x45,0x00,0x00,0x00]
-        	psignb	0x45,%mm3
-
-// CHECK: psignb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0xed,0x7e,0x00,0x00]
-        	psignb	0x7eed,%mm3
-
-// CHECK: psignb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0xfe,0xca,0xbe,0xba]
-        	psignb	0xbabecafe,%mm3
-
-// CHECK: psignb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0x78,0x56,0x34,0x12]
-        	psignb	0x12345678,%mm3
-
-// CHECK: psignb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x08,0xdb]
-        	psignb	%mm3,%mm3
-
-// CHECK: psignb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psignb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psignb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0x45,0x00,0x00,0x00]
-        	psignb	0x45,%xmm5
-
-// CHECK: psignb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0xed,0x7e,0x00,0x00]
-        	psignb	0x7eed,%xmm5
-
-// CHECK: psignb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0xfe,0xca,0xbe,0xba]
-        	psignb	0xbabecafe,%xmm5
-
-// CHECK: psignb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0x78,0x56,0x34,0x12]
-        	psignb	0x12345678,%xmm5
-
-// CHECK: psignb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0xed]
-        	psignb	%xmm5,%xmm5
-
-// CHECK: psignw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x09,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psignw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psignw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0x45,0x00,0x00,0x00]
-        	psignw	0x45,%mm3
-
-// CHECK: psignw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0xed,0x7e,0x00,0x00]
-        	psignw	0x7eed,%mm3
-
-// CHECK: psignw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0xfe,0xca,0xbe,0xba]
-        	psignw	0xbabecafe,%mm3
-
-// CHECK: psignw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0x78,0x56,0x34,0x12]
-        	psignw	0x12345678,%mm3
-
-// CHECK: psignw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x09,0xdb]
-        	psignw	%mm3,%mm3
-
-// CHECK: psignw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psignw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psignw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0x45,0x00,0x00,0x00]
-        	psignw	0x45,%xmm5
-
-// CHECK: psignw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0xed,0x7e,0x00,0x00]
-        	psignw	0x7eed,%xmm5
-
-// CHECK: psignw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0xfe,0xca,0xbe,0xba]
-        	psignw	0xbabecafe,%xmm5
-
-// CHECK: psignw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0x78,0x56,0x34,0x12]
-        	psignw	0x12345678,%xmm5
-
-// CHECK: psignw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0xed]
-        	psignw	%xmm5,%xmm5
-
-// CHECK: psignd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0a,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	psignd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: psignd	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0x45,0x00,0x00,0x00]
-        	psignd	0x45,%mm3
-
-// CHECK: psignd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0xed,0x7e,0x00,0x00]
-        	psignd	0x7eed,%mm3
-
-// CHECK: psignd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0xfe,0xca,0xbe,0xba]
-        	psignd	0xbabecafe,%mm3
-
-// CHECK: psignd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0x78,0x56,0x34,0x12]
-        	psignd	0x12345678,%mm3
-
-// CHECK: psignd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x0a,0xdb]
-        	psignd	%mm3,%mm3
-
-// CHECK: psignd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	psignd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: psignd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0x45,0x00,0x00,0x00]
-        	psignd	0x45,%xmm5
-
-// CHECK: psignd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0xed,0x7e,0x00,0x00]
-        	psignd	0x7eed,%xmm5
-
-// CHECK: psignd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0xfe,0xca,0xbe,0xba]
-        	psignd	0xbabecafe,%xmm5
-
-// CHECK: psignd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0x78,0x56,0x34,0x12]
-        	psignd	0x12345678,%xmm5
-
-// CHECK: psignd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0xed]
-        	psignd	%xmm5,%xmm5
-
-// CHECK: pabsb	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pabsb	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0x45,0x00,0x00,0x00]
-        	pabsb	0x45,%mm3
-
-// CHECK: pabsb	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0xed,0x7e,0x00,0x00]
-        	pabsb	0x7eed,%mm3
-
-// CHECK: pabsb	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0xfe,0xca,0xbe,0xba]
-        	pabsb	0xbabecafe,%mm3
-
-// CHECK: pabsb	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0x78,0x56,0x34,0x12]
-        	pabsb	0x12345678,%mm3
-
-// CHECK: pabsb	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1c,0xdb]
-        	pabsb	%mm3,%mm3
-
-// CHECK: pabsb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pabsb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0x45,0x00,0x00,0x00]
-        	pabsb	0x45,%xmm5
-
-// CHECK: pabsb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0xed,0x7e,0x00,0x00]
-        	pabsb	0x7eed,%xmm5
-
-// CHECK: pabsb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0xfe,0xca,0xbe,0xba]
-        	pabsb	0xbabecafe,%xmm5
-
-// CHECK: pabsb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0x78,0x56,0x34,0x12]
-        	pabsb	0x12345678,%xmm5
-
-// CHECK: pabsb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0xed]
-        	pabsb	%xmm5,%xmm5
-
-// CHECK: pabsw	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pabsw	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0x45,0x00,0x00,0x00]
-        	pabsw	0x45,%mm3
-
-// CHECK: pabsw	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0xed,0x7e,0x00,0x00]
-        	pabsw	0x7eed,%mm3
-
-// CHECK: pabsw	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0xfe,0xca,0xbe,0xba]
-        	pabsw	0xbabecafe,%mm3
-
-// CHECK: pabsw	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0x78,0x56,0x34,0x12]
-        	pabsw	0x12345678,%mm3
-
-// CHECK: pabsw	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1d,0xdb]
-        	pabsw	%mm3,%mm3
-
-// CHECK: pabsw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pabsw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0x45,0x00,0x00,0x00]
-        	pabsw	0x45,%xmm5
-
-// CHECK: pabsw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0xed,0x7e,0x00,0x00]
-        	pabsw	0x7eed,%xmm5
-
-// CHECK: pabsw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0xfe,0xca,0xbe,0xba]
-        	pabsw	0xbabecafe,%xmm5
-
-// CHECK: pabsw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0x78,0x56,0x34,0x12]
-        	pabsw	0x12345678,%xmm5
-
-// CHECK: pabsw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0xed]
-        	pabsw	%xmm5,%xmm5
-
-// CHECK: pabsd	3735928559(%ebx,%ecx,8), %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
-
-// CHECK: pabsd	69, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0x45,0x00,0x00,0x00]
-        	pabsd	0x45,%mm3
-
-// CHECK: pabsd	32493, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0xed,0x7e,0x00,0x00]
-        	pabsd	0x7eed,%mm3
-
-// CHECK: pabsd	3133065982, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0xfe,0xca,0xbe,0xba]
-        	pabsd	0xbabecafe,%mm3
-
-// CHECK: pabsd	305419896, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0x78,0x56,0x34,0x12]
-        	pabsd	0x12345678,%mm3
-
-// CHECK: pabsd	%mm3, %mm3
-// CHECK:  encoding: [0x0f,0x38,0x1e,0xdb]
-        	pabsd	%mm3,%mm3
-
-// CHECK: pabsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pabsd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0x45,0x00,0x00,0x00]
-        	pabsd	0x45,%xmm5
-
-// CHECK: pabsd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0xed,0x7e,0x00,0x00]
-        	pabsd	0x7eed,%xmm5
-
-// CHECK: pabsd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0xfe,0xca,0xbe,0xba]
-        	pabsd	0xbabecafe,%xmm5
-
-// CHECK: pabsd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0x78,0x56,0x34,0x12]
-        	pabsd	0x12345678,%xmm5
-
-// CHECK: pabsd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0xed]
-        	pabsd	%xmm5,%xmm5
-
-// CHECK: femms
-// CHECK:  encoding: [0x0f,0x0e]
-        	femms
-
-// CHECK: movntdqa	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	movntdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: movntdqa	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	movntdqa	0x45,%xmm5
-
-// CHECK: movntdqa	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	movntdqa	0x7eed,%xmm5
-
-// CHECK: movntdqa	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	movntdqa	0xbabecafe,%xmm5
-
-// CHECK: movntdqa	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	movntdqa	0x12345678,%xmm5
-
-// CHECK: packusdw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	packusdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: packusdw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0x45,0x00,0x00,0x00]
-        	packusdw	0x45,%xmm5
-
-// CHECK: packusdw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0xed,0x7e,0x00,0x00]
-        	packusdw	0x7eed,%xmm5
-
-// CHECK: packusdw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
-        	packusdw	0xbabecafe,%xmm5
-
-// CHECK: packusdw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0x78,0x56,0x34,0x12]
-        	packusdw	0x12345678,%xmm5
-
-// CHECK: packusdw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0xed]
-        	packusdw	%xmm5,%xmm5
-
-// CHECK: pcmpeqq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpeqq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpeqq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpeqq	0x45,%xmm5
-
-// CHECK: pcmpeqq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpeqq	0x7eed,%xmm5
-
-// CHECK: pcmpeqq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpeqq	0xbabecafe,%xmm5
-
-// CHECK: pcmpeqq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpeqq	0x12345678,%xmm5
-
-// CHECK: pcmpeqq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0xed]
-        	pcmpeqq	%xmm5,%xmm5
-
-// CHECK: phminposuw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: phminposuw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0x45,0x00,0x00,0x00]
-        	phminposuw	0x45,%xmm5
-
-// CHECK: phminposuw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0xed,0x7e,0x00,0x00]
-        	phminposuw	0x7eed,%xmm5
-
-// CHECK: phminposuw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0xfe,0xca,0xbe,0xba]
-        	phminposuw	0xbabecafe,%xmm5
-
-// CHECK: phminposuw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0x78,0x56,0x34,0x12]
-        	phminposuw	0x12345678,%xmm5
-
-// CHECK: phminposuw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0xed]
-        	phminposuw	%xmm5,%xmm5
-
-// CHECK: pmaxsb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaxsb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0x45,0x00,0x00,0x00]
-        	pmaxsb	0x45,%xmm5
-
-// CHECK: pmaxsb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaxsb	0x7eed,%xmm5
-
-// CHECK: pmaxsb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaxsb	0xbabecafe,%xmm5
-
-// CHECK: pmaxsb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0x78,0x56,0x34,0x12]
-        	pmaxsb	0x12345678,%xmm5
-
-// CHECK: pmaxsb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0xed]
-        	pmaxsb	%xmm5,%xmm5
-
-// CHECK: pmaxsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaxsd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0x45,0x00,0x00,0x00]
-        	pmaxsd	0x45,%xmm5
-
-// CHECK: pmaxsd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaxsd	0x7eed,%xmm5
-
-// CHECK: pmaxsd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaxsd	0xbabecafe,%xmm5
-
-// CHECK: pmaxsd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0x78,0x56,0x34,0x12]
-        	pmaxsd	0x12345678,%xmm5
-
-// CHECK: pmaxsd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0xed]
-        	pmaxsd	%xmm5,%xmm5
-
-// CHECK: pmaxud	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxud	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaxud	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0x45,0x00,0x00,0x00]
-        	pmaxud	0x45,%xmm5
-
-// CHECK: pmaxud	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaxud	0x7eed,%xmm5
-
-// CHECK: pmaxud	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaxud	0xbabecafe,%xmm5
-
-// CHECK: pmaxud	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0x78,0x56,0x34,0x12]
-        	pmaxud	0x12345678,%xmm5
-
-// CHECK: pmaxud	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0xed]
-        	pmaxud	%xmm5,%xmm5
-
-// CHECK: pmaxuw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmaxuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmaxuw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0x45,0x00,0x00,0x00]
-        	pmaxuw	0x45,%xmm5
-
-// CHECK: pmaxuw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0xed,0x7e,0x00,0x00]
-        	pmaxuw	0x7eed,%xmm5
-
-// CHECK: pmaxuw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmaxuw	0xbabecafe,%xmm5
-
-// CHECK: pmaxuw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0x78,0x56,0x34,0x12]
-        	pmaxuw	0x12345678,%xmm5
-
-// CHECK: pmaxuw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0xed]
-        	pmaxuw	%xmm5,%xmm5
-
-// CHECK: pminsb	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pminsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pminsb	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0x45,0x00,0x00,0x00]
-        	pminsb	0x45,%xmm5
-
-// CHECK: pminsb	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0xed,0x7e,0x00,0x00]
-        	pminsb	0x7eed,%xmm5
-
-// CHECK: pminsb	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0xfe,0xca,0xbe,0xba]
-        	pminsb	0xbabecafe,%xmm5
-
-// CHECK: pminsb	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0x78,0x56,0x34,0x12]
-        	pminsb	0x12345678,%xmm5
-
-// CHECK: pminsb	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0xed]
-        	pminsb	%xmm5,%xmm5
-
-// CHECK: pminsd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pminsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pminsd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0x45,0x00,0x00,0x00]
-        	pminsd	0x45,%xmm5
-
-// CHECK: pminsd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0xed,0x7e,0x00,0x00]
-        	pminsd	0x7eed,%xmm5
-
-// CHECK: pminsd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0xfe,0xca,0xbe,0xba]
-        	pminsd	0xbabecafe,%xmm5
-
-// CHECK: pminsd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0x78,0x56,0x34,0x12]
-        	pminsd	0x12345678,%xmm5
-
-// CHECK: pminsd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0xed]
-        	pminsd	%xmm5,%xmm5
-
-// CHECK: pminud	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pminud	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pminud	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0x45,0x00,0x00,0x00]
-        	pminud	0x45,%xmm5
-
-// CHECK: pminud	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0xed,0x7e,0x00,0x00]
-        	pminud	0x7eed,%xmm5
-
-// CHECK: pminud	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0xfe,0xca,0xbe,0xba]
-        	pminud	0xbabecafe,%xmm5
-
-// CHECK: pminud	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0x78,0x56,0x34,0x12]
-        	pminud	0x12345678,%xmm5
-
-// CHECK: pminud	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0xed]
-        	pminud	%xmm5,%xmm5
-
-// CHECK: pminuw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pminuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pminuw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0x45,0x00,0x00,0x00]
-        	pminuw	0x45,%xmm5
-
-// CHECK: pminuw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0xed,0x7e,0x00,0x00]
-        	pminuw	0x7eed,%xmm5
-
-// CHECK: pminuw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0xfe,0xca,0xbe,0xba]
-        	pminuw	0xbabecafe,%xmm5
-
-// CHECK: pminuw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0x78,0x56,0x34,0x12]
-        	pminuw	0x12345678,%xmm5
-
-// CHECK: pminuw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0xed]
-        	pminuw	%xmm5,%xmm5
-
-// CHECK: pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovsxbw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0x45,0x00,0x00,0x00]
-        	pmovsxbw	0x45,%xmm5
-
-// CHECK: pmovsxbw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovsxbw	0x7eed,%xmm5
-
-// CHECK: pmovsxbw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovsxbw	0xbabecafe,%xmm5
-
-// CHECK: pmovsxbw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0x78,0x56,0x34,0x12]
-        	pmovsxbw	0x12345678,%xmm5
-
-// CHECK: pmovsxbw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0xed]
-        	pmovsxbw	%xmm5,%xmm5
-
-// CHECK: pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovsxbd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0x45,0x00,0x00,0x00]
-        	pmovsxbd	0x45,%xmm5
-
-// CHECK: pmovsxbd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovsxbd	0x7eed,%xmm5
-
-// CHECK: pmovsxbd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovsxbd	0xbabecafe,%xmm5
-
-// CHECK: pmovsxbd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0x78,0x56,0x34,0x12]
-        	pmovsxbd	0x12345678,%xmm5
-
-// CHECK: pmovsxbd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0xed]
-        	pmovsxbd	%xmm5,%xmm5
-
-// CHECK: pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovsxbq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0x45,0x00,0x00,0x00]
-        	pmovsxbq	0x45,%xmm5
-
-// CHECK: pmovsxbq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovsxbq	0x7eed,%xmm5
-
-// CHECK: pmovsxbq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovsxbq	0xbabecafe,%xmm5
-
-// CHECK: pmovsxbq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0x78,0x56,0x34,0x12]
-        	pmovsxbq	0x12345678,%xmm5
-
-// CHECK: pmovsxbq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0xed]
-        	pmovsxbq	%xmm5,%xmm5
-
-// CHECK: pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovsxwd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0x45,0x00,0x00,0x00]
-        	pmovsxwd	0x45,%xmm5
-
-// CHECK: pmovsxwd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovsxwd	0x7eed,%xmm5
-
-// CHECK: pmovsxwd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovsxwd	0xbabecafe,%xmm5
-
-// CHECK: pmovsxwd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0x78,0x56,0x34,0x12]
-        	pmovsxwd	0x12345678,%xmm5
-
-// CHECK: pmovsxwd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0xed]
-        	pmovsxwd	%xmm5,%xmm5
-
-// CHECK: pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovsxwq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0x45,0x00,0x00,0x00]
-        	pmovsxwq	0x45,%xmm5
-
-// CHECK: pmovsxwq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovsxwq	0x7eed,%xmm5
-
-// CHECK: pmovsxwq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovsxwq	0xbabecafe,%xmm5
-
-// CHECK: pmovsxwq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0x78,0x56,0x34,0x12]
-        	pmovsxwq	0x12345678,%xmm5
-
-// CHECK: pmovsxwq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0xed]
-        	pmovsxwq	%xmm5,%xmm5
-
-// CHECK: pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovsxdq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0x45,0x00,0x00,0x00]
-        	pmovsxdq	0x45,%xmm5
-
-// CHECK: pmovsxdq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovsxdq	0x7eed,%xmm5
-
-// CHECK: pmovsxdq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovsxdq	0xbabecafe,%xmm5
-
-// CHECK: pmovsxdq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0x78,0x56,0x34,0x12]
-        	pmovsxdq	0x12345678,%xmm5
-
-// CHECK: pmovsxdq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0xed]
-        	pmovsxdq	%xmm5,%xmm5
-
-// CHECK: pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovzxbw	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0x45,0x00,0x00,0x00]
-        	pmovzxbw	0x45,%xmm5
-
-// CHECK: pmovzxbw	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovzxbw	0x7eed,%xmm5
-
-// CHECK: pmovzxbw	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovzxbw	0xbabecafe,%xmm5
-
-// CHECK: pmovzxbw	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0x78,0x56,0x34,0x12]
-        	pmovzxbw	0x12345678,%xmm5
-
-// CHECK: pmovzxbw	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0xed]
-        	pmovzxbw	%xmm5,%xmm5
-
-// CHECK: pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovzxbd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0x45,0x00,0x00,0x00]
-        	pmovzxbd	0x45,%xmm5
-
-// CHECK: pmovzxbd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovzxbd	0x7eed,%xmm5
-
-// CHECK: pmovzxbd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovzxbd	0xbabecafe,%xmm5
-
-// CHECK: pmovzxbd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0x78,0x56,0x34,0x12]
-        	pmovzxbd	0x12345678,%xmm5
-
-// CHECK: pmovzxbd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0xed]
-        	pmovzxbd	%xmm5,%xmm5
-
-// CHECK: pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovzxbq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0x45,0x00,0x00,0x00]
-        	pmovzxbq	0x45,%xmm5
-
-// CHECK: pmovzxbq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovzxbq	0x7eed,%xmm5
-
-// CHECK: pmovzxbq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovzxbq	0xbabecafe,%xmm5
-
-// CHECK: pmovzxbq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0x78,0x56,0x34,0x12]
-        	pmovzxbq	0x12345678,%xmm5
-
-// CHECK: pmovzxbq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0xed]
-        	pmovzxbq	%xmm5,%xmm5
-
-// CHECK: pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovzxwd	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0x45,0x00,0x00,0x00]
-        	pmovzxwd	0x45,%xmm5
-
-// CHECK: pmovzxwd	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovzxwd	0x7eed,%xmm5
-
-// CHECK: pmovzxwd	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovzxwd	0xbabecafe,%xmm5
-
-// CHECK: pmovzxwd	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0x78,0x56,0x34,0x12]
-        	pmovzxwd	0x12345678,%xmm5
-
-// CHECK: pmovzxwd	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0xed]
-        	pmovzxwd	%xmm5,%xmm5
-
-// CHECK: pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovzxwq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0x45,0x00,0x00,0x00]
-        	pmovzxwq	0x45,%xmm5
-
-// CHECK: pmovzxwq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovzxwq	0x7eed,%xmm5
-
-// CHECK: pmovzxwq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovzxwq	0xbabecafe,%xmm5
-
-// CHECK: pmovzxwq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0x78,0x56,0x34,0x12]
-        	pmovzxwq	0x12345678,%xmm5
-
-// CHECK: pmovzxwq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0xed]
-        	pmovzxwq	%xmm5,%xmm5
-
-// CHECK: pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmovzxdq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0x45,0x00,0x00,0x00]
-        	pmovzxdq	0x45,%xmm5
-
-// CHECK: pmovzxdq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0xed,0x7e,0x00,0x00]
-        	pmovzxdq	0x7eed,%xmm5
-
-// CHECK: pmovzxdq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmovzxdq	0xbabecafe,%xmm5
-
-// CHECK: pmovzxdq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0x78,0x56,0x34,0x12]
-        	pmovzxdq	0x12345678,%xmm5
-
-// CHECK: pmovzxdq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0xed]
-        	pmovzxdq	%xmm5,%xmm5
-
-// CHECK: pmuldq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmuldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmuldq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0x45,0x00,0x00,0x00]
-        	pmuldq	0x45,%xmm5
-
-// CHECK: pmuldq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0xed,0x7e,0x00,0x00]
-        	pmuldq	0x7eed,%xmm5
-
-// CHECK: pmuldq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmuldq	0xbabecafe,%xmm5
-
-// CHECK: pmuldq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0x78,0x56,0x34,0x12]
-        	pmuldq	0x12345678,%xmm5
-
-// CHECK: pmuldq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0xed]
-        	pmuldq	%xmm5,%xmm5
-
-// CHECK: pmulld	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pmulld	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pmulld	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0x45,0x00,0x00,0x00]
-        	pmulld	0x45,%xmm5
-
-// CHECK: pmulld	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0xed,0x7e,0x00,0x00]
-        	pmulld	0x7eed,%xmm5
-
-// CHECK: pmulld	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0xfe,0xca,0xbe,0xba]
-        	pmulld	0xbabecafe,%xmm5
-
-// CHECK: pmulld	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0x78,0x56,0x34,0x12]
-        	pmulld	0x12345678,%xmm5
-
-// CHECK: pmulld	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0xed]
-        	pmulld	%xmm5,%xmm5
-
-// CHECK: ptest 	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: ptest 	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0x45,0x00,0x00,0x00]
-        	ptest	0x45,%xmm5
-
-// CHECK: ptest 	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0xed,0x7e,0x00,0x00]
-        	ptest	0x7eed,%xmm5
-
-// CHECK: ptest 	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0xfe,0xca,0xbe,0xba]
-        	ptest	0xbabecafe,%xmm5
-
-// CHECK: ptest 	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0x78,0x56,0x34,0x12]
-        	ptest	0x12345678,%xmm5
-
-// CHECK: ptest 	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0xed]
-        	ptest	%xmm5,%xmm5
-
-// CHECK: pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-
-// CHECK: pcmpgtq	69, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0x45,0x00,0x00,0x00]
-        	pcmpgtq	0x45,%xmm5
-
-// CHECK: pcmpgtq	32493, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0xed,0x7e,0x00,0x00]
-        	pcmpgtq	0x7eed,%xmm5
-
-// CHECK: pcmpgtq	3133065982, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0xfe,0xca,0xbe,0xba]
-        	pcmpgtq	0xbabecafe,%xmm5
-
-// CHECK: pcmpgtq	305419896, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0x78,0x56,0x34,0x12]
-        	pcmpgtq	0x12345678,%xmm5
-
-// CHECK: pcmpgtq	%xmm5, %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xed]
-        	pcmpgtq	%xmm5,%xmm5
-
-// CHECK: crc32b 	%bl, %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
-                crc32b %bl, %eax
-
-// CHECK: crc32b 	4(%ebx), %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
-                crc32b 4(%ebx), %eax
-
-// CHECK: crc32w 	%bx, %eax
-// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
-                crc32w %bx, %eax
-
-// CHECK: crc32w 	4(%ebx), %eax
-// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
-                crc32w 4(%ebx), %eax
-
-// CHECK: crc32l 	%ebx, %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
-                crc32l %ebx, %eax
-
-// CHECK: crc32l 	4(%ebx), %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
-                crc32l 4(%ebx), %eax
-
-// CHECK: crc32l 	3735928559(%ebx,%ecx,8), %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
-
-// CHECK: crc32l 	69, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0x45,0x00,0x00,0x00]
-                crc32l 0x45,%ecx
-
-// CHECK: crc32l 	32493, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xed,0x7e,0x00,0x00]
-                crc32l 0x7eed,%ecx
-
-// CHECK: crc32l 	3133065982, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xfe,0xca,0xbe,0xba]
-                crc32l 0xbabecafe,%ecx
-
-// CHECK: crc32l 	%ecx, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
-                crc32l %ecx,%ecx
-
-// CHECK: pcmpistrm	$125, %xmm1, %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x3a,0x62,0xd1,0x7d]
-                pcmpistrm $125, %xmm1, %xmm2
-
-// CHECK: pcmpistrm	$125, (%edx,%eax,4), %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x3a,0x62,0x14,0x82,0x7d]
-                pcmpistrm $125, (%edx,%eax,4), %xmm2
-
-// CHECK: aesimc	%xmm0, %xmm1
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdb,0xc8]
-                aesimc %xmm0,%xmm1
-
-// CHECK: aesimc	(%eax), %xmm1
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdb,0x08]
-                aesimc (%eax),%xmm1
-
-// CHECK: aesenc	%xmm1, %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdc,0xd1]
-                aesenc %xmm1,%xmm2
-
-// CHECK: aesenc	4(%ebx), %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdc,0x53,0x04]
-                aesenc 4(%ebx),%xmm2
-
-// CHECK: aesenclast	%xmm3, %xmm4
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdd,0xe3]
-                aesenclast %xmm3,%xmm4
-
-// CHECK: aesenclast	4(%edx,%edi), %xmm4
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdd,0x64,0x3a,0x04]
-                aesenclast 4(%edx,%edi),%xmm4
-
-// CHECK: aesdec	%xmm5, %xmm6
-// CHECK:  encoding: [0x66,0x0f,0x38,0xde,0xf5]
-                aesdec %xmm5,%xmm6
-
-// CHECK: aesdec	4(%ecx,%eax,8), %xmm6
-// CHECK:  encoding: [0x66,0x0f,0x38,0xde,0x74,0xc1,0x04]
-                aesdec 4(%ecx,%eax,8),%xmm6
-
-// CHECK: aesdeclast	%xmm7, %xmm0
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdf,0xc7]
-                aesdeclast %xmm7,%xmm0
-
-// CHECK: aesdeclast	3405691582, %xmm0
-// CHECK:  encoding: [0x66,0x0f,0x38,0xdf,0x05,0xbe,0xba,0xfe,0xca]
-                aesdeclast 0xcafebabe,%xmm0
-
-// CHECK: aeskeygenassist	$125, %xmm1, %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0xd1,0x7d]
-                aeskeygenassist $125, %xmm1, %xmm2
-
-// CHECK: aeskeygenassist	$125, (%edx,%eax,4), %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x7d]
-                aeskeygenassist $125, (%edx,%eax,4), %xmm2
-
-// rdar://8017638
-// CHECK: aeskeygenassist	$128, %xmm1, %xmm2
-// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x80]
-		aeskeygenassist $128, %xmm1, %xmm2
-
-// rdar://7910087
-// CHECK: bsfw	%bx, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbc,0xdb]
-          bsfw  %bx, %bx
-
-// CHECK: bsfw	3735928559(%ebx,%ecx,8), %bx
-// CHECK:  encoding: [0x66,0x0f,0xbc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-          bsfw  3735928559(%ebx,%ecx,8), %bx
-
-// CHECK: bsrw	%bx, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbd,0xdb]
-          bsrw  %bx, %bx
-
-// CHECK: bsrw	305419896, %bx
-// CHECK:  encoding: [0x66,0x0f,0xbd,0x1d,0x78,0x56,0x34,0x12]
-          bsrw  305419896, %bx
-
-// radr://7901779
-// CHECK: pushl   $127
-// CHECK:  encoding: [0x6a,0xfe]
-          pushl   $127
-
-// CHECK: pushw   $254
-// CHECK:  encoding: [0x66,0x68,0xfe,0x00]
-          pushw   $254
-
-// CHECK: pushl   $254
-// CHECK:  encoding: [0x68,0xfe,0x00,0x00,0x00]
-          pushl   $254
-
-// radr://7928400
-// CHECK: movq    %mm3, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x7f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-          movq    %mm3, 3735928559(%ebx,%ecx,8)
-
-// CHECK: movd    %mm3, 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-          movd    %mm3, 3735928559(%ebx,%ecx,8)
-
-// CHECK: movq    3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0xf3,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          movq    3735928559(%ebx,%ecx,8), %xmm5
-
-// CHECK: movd    3735928559(%ebx,%ecx,8), %xmm5
-// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
-          movd    3735928559(%ebx,%ecx,8), %xmm5
-
-// radr://7914715
-// CHECK: fcoml   3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0x94,0xcb,0xef,0xbe,0xad,0xde]
-          fcoml   3735928559(%ebx,%ecx,8)
-
-// CHECK: fcoms   32493
-// CHECK:  encoding: [0xd8,0x15,0xed,0x7e,0x00,0x00]
-          fcoms   32493
-
-// CHECK: fcompl  3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-          fcompl  3735928559(%ebx,%ecx,8)
-
-// CHECK: fcomps  32493
-// CHECK:  encoding: [0xd8,0x1d,0xed,0x7e,0x00,0x00]
-          fcomps  32493
-
-// CHECK: ficoml  3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
-          ficoml  3735928559(%ebx,%ecx,8)
-
-// CHECK: ficoms  32493
-// CHECK:  encoding: [0xde,0x15,0xed,0x7e,0x00,0x00]
-          ficoms  32493
-
-// CHECK: ficompl 3735928559(%ebx,%ecx,8)
-// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
-          ficompl 3735928559(%ebx,%ecx,8)
-
-// CHECK: ficomps 32493
-// CHECK:  encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00]
-          ficomps 32493
-
-// CHECK: movl  57005(,%eiz), %ebx
-// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
-          movl  57005(,%eiz), %ebx
-
-// CHECK: movl  48879(,%eiz), %eax
-// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
-          movl  48879(,%eiz), %eax
-
-// CHECK: movl  -4(,%eiz,8), %eax
-// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
-          movl  -4(,%eiz,8), %eax
-
-// CHECK: movl  (%ecx,%eiz), %eax
-// CHECK: encoding: [0x8b,0x04,0x21]
-          movl  (%ecx,%eiz), %eax
-
-// CHECK: movl  (%ecx,%eiz,8), %eax
-// CHECK: encoding: [0x8b,0x04,0xe1]
-          movl  (%ecx,%eiz,8), %eax
-
diff --git a/test/MC/AsmParser/X86/x86_32-fma3-encoding.s b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s
deleted file mode 100644
index db7efecfb51b..000000000000
--- a/test/MC/AsmParser/X86/x86_32-fma3-encoding.s
+++ /dev/null
@@ -1,674 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: vfmadd132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
-          vfmadd132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
-          vfmadd132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
-          vfmadd132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
-          vfmadd132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
-          vfmadd213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
-          vfmadd213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
-          vfmadd213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
-          vfmadd213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
-          vfmadd231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
-          vfmadd231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
-          vfmadd231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
-          vfmadd231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
-          vfmadd132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
-          vfmadd132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
-          vfmadd132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
-          vfmadd132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
-          vfmadd213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
-          vfmadd213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
-          vfmadd213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
-          vfmadd213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
-          vfmadd231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
-          vfmadd231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
-          vfmadd231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
-          vfmadd231ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
-          vfmadd132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
-          vfmadd132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
-          vfmadd132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
-          vfmadd132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
-          vfmadd213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
-          vfmadd213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
-          vfmadd213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
-          vfmadd213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
-          vfmadd231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
-          vfmadd231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
-          vfmadd231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmadd231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
-          vfmadd231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmaddsub132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca]
-          vfmaddsub132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmaddsub132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08]
-          vfmaddsub132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmaddsub132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca]
-          vfmaddsub132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmaddsub132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08]
-          vfmaddsub132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmaddsub213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca]
-          vfmaddsub213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmaddsub213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08]
-          vfmaddsub213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmaddsub213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca]
-          vfmaddsub213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmaddsub213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08]
-          vfmaddsub213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmaddsub231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca]
-          vfmaddsub231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmaddsub231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08]
-          vfmaddsub231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmaddsub231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca]
-          vfmaddsub231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmaddsub231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08]
-          vfmaddsub231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsubadd132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca]
-          vfmsubadd132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsubadd132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08]
-          vfmsubadd132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsubadd132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca]
-          vfmsubadd132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsubadd132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08]
-          vfmsubadd132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsubadd213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca]
-          vfmsubadd213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsubadd213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08]
-          vfmsubadd213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsubadd213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca]
-          vfmsubadd213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsubadd213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08]
-          vfmsubadd213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsubadd231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca]
-          vfmsubadd231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsubadd231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08]
-          vfmsubadd231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsubadd231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca]
-          vfmsubadd231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsubadd231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08]
-          vfmsubadd231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsub132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca]
-          vfmsub132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsub132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08]
-          vfmsub132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsub132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca]
-          vfmsub132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsub132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08]
-          vfmsub132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsub213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca]
-          vfmsub213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsub213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08]
-          vfmsub213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsub213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca]
-          vfmsub213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsub213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08]
-          vfmsub213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsub231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca]
-          vfmsub231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsub231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08]
-          vfmsub231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmsub231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca]
-          vfmsub231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfmsub231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08]
-          vfmsub231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmadd132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca]
-          vfnmadd132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmadd132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08]
-          vfnmadd132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmadd132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca]
-          vfnmadd132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmadd132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08]
-          vfnmadd132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmadd213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca]
-          vfnmadd213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmadd213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08]
-          vfnmadd213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmadd213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca]
-          vfnmadd213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmadd213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08]
-          vfnmadd213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmadd231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca]
-          vfnmadd231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmadd231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08]
-          vfnmadd231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmadd231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca]
-          vfnmadd231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmadd231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08]
-          vfnmadd231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmsub132pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca]
-          vfnmsub132pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmsub132pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08]
-          vfnmsub132pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmsub132ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca]
-          vfnmsub132ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmsub132ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08]
-          vfnmsub132ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmsub213pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca]
-          vfnmsub213pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmsub213pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08]
-          vfnmsub213pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmsub213ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca]
-          vfnmsub213ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmsub213ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08]
-          vfnmsub213ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmsub231pd  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca]
-          vfnmsub231pd  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmsub231pd  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08]
-          vfnmsub231pd  (%eax), %xmm5, %xmm1
-
-// CHECK: vfnmsub231ps  %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca]
-          vfnmsub231ps  %xmm2, %xmm5, %xmm1
-
-// CHECK: vfnmsub231ps  (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08]
-          vfnmsub231ps  (%eax), %xmm5, %xmm1
-
-// CHECK: vfmadd132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
-          vfmadd132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
-          vfmadd132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
-          vfmadd132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
-          vfmadd132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
-          vfmadd213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
-          vfmadd213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
-          vfmadd213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
-          vfmadd213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
-          vfmadd231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
-          vfmadd231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmadd231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
-          vfmadd231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmadd231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
-          vfmadd231ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmaddsub132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca]
-          vfmaddsub132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmaddsub132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08]
-          vfmaddsub132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmaddsub132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca]
-          vfmaddsub132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmaddsub132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08]
-          vfmaddsub132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmaddsub213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca]
-          vfmaddsub213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmaddsub213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08]
-          vfmaddsub213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmaddsub213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca]
-          vfmaddsub213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmaddsub213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08]
-          vfmaddsub213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmaddsub231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca]
-          vfmaddsub231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmaddsub231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08]
-          vfmaddsub231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmaddsub231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca]
-          vfmaddsub231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmaddsub231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08]
-          vfmaddsub231ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsubadd132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca]
-          vfmsubadd132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsubadd132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08]
-          vfmsubadd132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsubadd132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca]
-          vfmsubadd132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsubadd132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08]
-          vfmsubadd132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsubadd213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca]
-          vfmsubadd213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsubadd213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08]
-          vfmsubadd213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsubadd213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca]
-          vfmsubadd213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsubadd213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08]
-          vfmsubadd213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsubadd231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca]
-          vfmsubadd231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsubadd231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08]
-          vfmsubadd231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsubadd231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca]
-          vfmsubadd231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsubadd231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08]
-          vfmsubadd231ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsub132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca]
-          vfmsub132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsub132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08]
-          vfmsub132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsub132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca]
-          vfmsub132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsub132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08]
-          vfmsub132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsub213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca]
-          vfmsub213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsub213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08]
-          vfmsub213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsub213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca]
-          vfmsub213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsub213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08]
-          vfmsub213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsub231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca]
-          vfmsub231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsub231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08]
-          vfmsub231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfmsub231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca]
-          vfmsub231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfmsub231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08]
-          vfmsub231ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmadd132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca]
-          vfnmadd132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmadd132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08]
-          vfnmadd132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmadd132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca]
-          vfnmadd132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmadd132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08]
-          vfnmadd132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmadd213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca]
-          vfnmadd213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmadd213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08]
-          vfnmadd213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmadd213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca]
-          vfnmadd213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmadd213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08]
-          vfnmadd213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmadd231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca]
-          vfnmadd231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmadd231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08]
-          vfnmadd231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmadd231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca]
-          vfnmadd231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmadd231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08]
-          vfnmadd231ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmsub132pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca]
-          vfnmsub132pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmsub132pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08]
-          vfnmsub132pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmsub132ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca]
-          vfnmsub132ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmsub132ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08]
-          vfnmsub132ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmsub213pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca]
-          vfnmsub213pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmsub213pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08]
-          vfnmsub213pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmsub213ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca]
-          vfnmsub213ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmsub213ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08]
-          vfnmsub213ps  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmsub231pd  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca]
-          vfnmsub231pd  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmsub231pd  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08]
-          vfnmsub231pd  (%eax), %ymm5, %ymm1
-
-// CHECK: vfnmsub231ps  %ymm2, %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca]
-          vfnmsub231ps  %ymm2, %ymm5, %ymm1
-
-// CHECK: vfnmsub231ps  (%eax), %ymm5, %ymm1
-// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08]
-          vfnmsub231ps  (%eax), %ymm5, %ymm1
-
diff --git a/test/MC/AsmParser/X86/x86_32-mismatched-add.s b/test/MC/AsmParser/X86/x86_32-mismatched-add.s
deleted file mode 100644
index 0840c65ca95a..000000000000
--- a/test/MC/AsmParser/X86/x86_32-mismatched-add.s
+++ /dev/null
@@ -1,8 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
-// XFAIL: *
-
-// CHECK: addl	$4294967295, %eax       # encoding: [0x83,0xc0,0xff]
-        addl $0xFFFFFFFF, %eax
-
-// CHECK: addl	$65535, %eax       # encoding: [0x66,0x83,0xc0,0xff]
-        addw $0xFFFF, %ax
diff --git a/test/MC/AsmParser/X86/x86_32-new-encoder.s b/test/MC/AsmParser/X86/x86_32-new-encoder.s
deleted file mode 100644
index e3aa1887ef81..000000000000
--- a/test/MC/AsmParser/X86/x86_32-new-encoder.s
+++ /dev/null
@@ -1,425 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
-
-	pause
-// CHECK: pause
-// CHECK: encoding: [0xf3,0x90]
-	sfence
-// CHECK: sfence
-// CHECK: encoding: [0x0f,0xae,0xf8]
-	lfence
-// CHECK: lfence
-// CHECK: encoding: [0x0f,0xae,0xe8]
-	mfence
-// CHECK: mfence
-// CHECK: encoding: [0x0f,0xae,0xf0]
-	monitor
-// CHECK: monitor
-// CHECK: encoding: [0x0f,0x01,0xc8]
-	mwait
-// CHECK: mwait
-// CHECK: encoding: [0x0f,0x01,0xc9]
-
-	vmcall
-// CHECK: vmcall
-// CHECK: encoding: [0x0f,0x01,0xc1]
-	vmlaunch
-// CHECK: vmlaunch
-// CHECK: encoding: [0x0f,0x01,0xc2]
-	vmresume
-// CHECK: vmresume
-// CHECK: encoding: [0x0f,0x01,0xc3]
-	vmxoff
-// CHECK: vmxoff
-// CHECK: encoding: [0x0f,0x01,0xc4]
-	swapgs
-// CHECK: swapgs
-// CHECK: encoding: [0x0f,0x01,0xf8]
-
-rdtscp
-// CHECK: rdtscp
-// CHECK:  encoding: [0x0f,0x01,0xf9]
-
-
-// CHECK: movl	%eax, 16(%ebp)          # encoding: [0x89,0x45,0x10]
-	movl	%eax, 16(%ebp)
-// CHECK: movl	%eax, -16(%ebp)          # encoding: [0x89,0x45,0xf0]
-	movl	%eax, -16(%ebp)
-
-// CHECK: testb	%bl, %cl                # encoding: [0x84,0xcb]
-        testb %bl, %cl
-
-// CHECK: cmpl	%eax, %ebx              # encoding: [0x39,0xc3]
-        cmpl %eax, %ebx
-
-// CHECK: addw	%ax, %ax                # encoding: [0x66,0x01,0xc0]
-        addw %ax, %ax
-
-// CHECK: shrl	%eax                    # encoding: [0xd1,0xe8]
-        shrl $1, %eax
-
-// moffset forms of moves, rdar://7947184
-movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
-movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
-movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
-
-// rdar://7973775
-into
-// CHECK: into
-// CHECK:  encoding: [0xce]
-int3
-// CHECK: int3
-// CHECK:  encoding: [0xcc]
-int $4
-// CHECK: int $4
-// CHECK:  encoding: [0xcd,0x04]
-int $255
-// CHECK: int $255
-// CHECK:  encoding: [0xcd,0xff]
-
-// CHECK: pushfl	# encoding: [0x9c]
-        pushf
-// CHECK: pushfl	# encoding: [0x9c]
-        pushfl
-// CHECK: popfl	        # encoding: [0x9d]
-        popf
-// CHECK: popfl	        # encoding: [0x9d]
-        popfl
-
-// rdar://8014869
-retl
-// CHECK: ret
-// CHECK:  encoding: [0xc3]
-
-// rdar://7973854
-// CHECK: cmoval	%eax, %edx
-// CHECK:  encoding: [0x0f,0x47,0xd0]
-        	cmoval	%eax,%edx
-
-// CHECK: cmovael	%eax, %edx
-// CHECK:  encoding: [0x0f,0x43,0xd0]
-        	cmovael	%eax,%edx
-
-// CHECK: cmovbel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x46,0xd0]
-        	cmovbel	%eax,%edx
-
-// CHECK: cmovbl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x42,0xd0]
-        	cmovbl	%eax,%edx
-
-// CHECK: cmovbel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x46,0xd0]
-        	cmovbel	%eax,%edx
-
-// CHECK: cmovbl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x42,0xd0]
-        	cmovcl	%eax,%edx
-
-// CHECK: cmovel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x44,0xd0]
-        	cmovel	%eax,%edx
-
-// CHECK: cmovgl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4f,0xd0]
-        	cmovgl	%eax,%edx
-
-// CHECK: cmovgel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4d,0xd0]
-        	cmovgel	%eax,%edx
-
-// CHECK: cmovll	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4c,0xd0]
-        	cmovll	%eax,%edx
-
-// CHECK: cmovlel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4e,0xd0]
-        	cmovlel	%eax,%edx
-
-// CHECK: cmovbel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x46,0xd0]
-        	cmovnal	%eax,%edx
-
-// CHECK: cmovnel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x45,0xd0]
-        	cmovnel	%eax,%edx
-
-// CHECK: cmovael	%eax, %edx
-// CHECK:  encoding: [0x0f,0x43,0xd0]
-        	cmovnbl	%eax,%edx
-
-// CHECK: cmoval	%eax, %edx
-// CHECK:  encoding: [0x0f,0x47,0xd0]
-        	cmovnbel	%eax,%edx
-
-// CHECK: cmovael	%eax, %edx
-// CHECK:  encoding: [0x0f,0x43,0xd0]
-        	cmovncl	%eax,%edx
-
-// CHECK: cmovnel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x45,0xd0]
-        	cmovnel	%eax,%edx
-
-// CHECK: cmovlel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4e,0xd0]
-        	cmovngl	%eax,%edx
-
-// CHECK: cmovgel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4d,0xd0]
-        	cmovnl	%eax,%edx
-
-// CHECK: cmovnel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x45,0xd0]
-        	cmovnel	%eax,%edx
-
-// CHECK: cmovlel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4e,0xd0]
-        	cmovngl	%eax,%edx
-
-// CHECK: cmovll	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4c,0xd0]
-        	cmovngel	%eax,%edx
-
-// CHECK: cmovgel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4d,0xd0]
-        	cmovnll	%eax,%edx
-
-// CHECK: cmovgl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4f,0xd0]
-        	cmovnlel	%eax,%edx
-
-// CHECK: cmovnol	%eax, %edx
-// CHECK:  encoding: [0x0f,0x41,0xd0]
-        	cmovnol	%eax,%edx
-
-// CHECK: cmovnpl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4b,0xd0]
-        	cmovnpl	%eax,%edx
-
-// CHECK: cmovnsl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x49,0xd0]
-        	cmovnsl	%eax,%edx
-
-// CHECK: cmovnel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x45,0xd0]
-        	cmovnzl	%eax,%edx
-
-// CHECK: cmovol	%eax, %edx
-// CHECK:  encoding: [0x0f,0x40,0xd0]
-        	cmovol	%eax,%edx
-
-// CHECK: cmovpl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x4a,0xd0]
-        	cmovpl	%eax,%edx
-
-// CHECK: cmovsl	%eax, %edx
-// CHECK:  encoding: [0x0f,0x48,0xd0]
-        	cmovsl	%eax,%edx
-
-// CHECK: cmovel	%eax, %edx
-// CHECK:  encoding: [0x0f,0x44,0xd0]
-        	cmovzl	%eax,%edx
-
-// CHECK: cmpps	$0, %xmm0, %xmm1
-// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
-        cmpps $0, %xmm0, %xmm1
-// CHECK:	cmpps	$0, (%eax), %xmm1
-// CHECK: encoding: [0x0f,0xc2,0x08,0x00]
-        cmpps $0, 0(%eax), %xmm1
-// CHECK:	cmppd	$0, %xmm0, %xmm1
-// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x00]
-        cmppd $0, %xmm0, %xmm1
-// CHECK:	cmppd	$0, (%eax), %xmm1
-// CHECK: encoding: [0x66,0x0f,0xc2,0x08,0x00]
-        cmppd $0, 0(%eax), %xmm1
-// CHECK:	cmpss	$0, %xmm0, %xmm1
-// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x00]
-        cmpss $0, %xmm0, %xmm1
-// CHECK:	cmpss	$0, (%eax), %xmm1
-// CHECK: encoding: [0xf3,0x0f,0xc2,0x08,0x00]
-        cmpss $0, 0(%eax), %xmm1
-// CHECK:	cmpsd	$0, %xmm0, %xmm1
-// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x00]
-        cmpsd $0, %xmm0, %xmm1
-// CHECK:	cmpsd	$0, (%eax), %xmm1
-// CHECK: encoding: [0xf2,0x0f,0xc2,0x08,0x00]
-        cmpsd $0, 0(%eax), %xmm1
-
-// Check matching of instructions which embed the SSE comparison code.
-
-// CHECK: cmpps $0, %xmm0, %xmm1
-// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
-        cmpeqps %xmm0, %xmm1
-
-// CHECK: cmppd $1, %xmm0, %xmm1
-// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x01]
-        cmpltpd %xmm0, %xmm1
-
-// CHECK: cmpss $2, %xmm0, %xmm1
-// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x02]
-        cmpless %xmm0, %xmm1
-
-// CHECK: cmppd $3, %xmm0, %xmm1
-// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x03]
-        cmpunordpd %xmm0, %xmm1
-
-// CHECK: cmpps $4, %xmm0, %xmm1
-// CHECK: encoding: [0x0f,0xc2,0xc8,0x04]
-        cmpneqps %xmm0, %xmm1
-
-// CHECK: cmppd $5, %xmm0, %xmm1
-// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x05]
-        cmpnltpd %xmm0, %xmm1
-
-// CHECK: cmpss $6, %xmm0, %xmm1
-// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x06]
-        cmpnless %xmm0, %xmm1
-
-// CHECK: cmpsd $7, %xmm0, %xmm1
-// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x07]
-        cmpordsd %xmm0, %xmm1
-
-// rdar://7995856
-// CHECK: fmul	%st(0)
-// CHECK:  encoding: [0xd8,0xc8]
-        fmul %st(0), %st
-
-// CHECK: fadd	%st(0)
-// CHECK:  encoding: [0xd8,0xc0]
-        fadd %st(0), %st
-
-// CHECK: fsub	%st(0)
-// CHECK:  encoding: [0xd8,0xe0]
-        fsub %st(0), %st
-
-// CHECK: fsubr	%st(0)
-// CHECK:  encoding: [0xd8,0xe8]
-        fsubr %st(0), %st
-
-// CHECK: fdivr	%st(0)
-// CHECK:  encoding: [0xd8,0xf8]
-        fdivr %st(0), %st
-
-// CHECK: fdiv	%st(0)
-// CHECK:  encoding: [0xd8,0xf0]
-        fdiv %st(0), %st
-
-// radr://8017519
-// CHECK: movl	%cs, %eax
-// CHECK:  encoding: [0x8c,0xc8]
-        movl %cs, %eax
-
-// CHECK: movw	%cs, %ax
-// CHECK:  encoding: [0x66,0x8c,0xc8]
-        movw %cs, %ax
-
-// CHECK: movl	%cs, (%eax)
-// CHECK:  encoding: [0x8c,0x08]
-        movl %cs, (%eax)
-
-// CHECK: movw	%cs, (%eax)
-// CHECK:  encoding: [0x66,0x8c,0x08]
-        movw %cs, (%eax)
-
-// CHECK: movl	%eax, %cs
-// CHECK:  encoding: [0x8e,0xc8]
-        movl %eax, %cs
-
-// CHECK: movl	(%eax), %cs
-// CHECK:  encoding: [0x8e,0x08]
-        movl (%eax), %cs
-
-// CHECK: movw	(%eax), %cs
-// CHECK:  encoding: [0x66,0x8e,0x08]
-        movw (%eax), %cs
-
-// radr://8033374
-// CHECK: movl	%cr0, %eax
-// CHECK:  encoding: [0x0f,0x20,0xc0]
-        movl %cr0,%eax
-
-// CHECK: movl	%cr1, %eax
-// CHECK:  encoding: [0x0f,0x20,0xc8]
-        movl %cr1,%eax
-
-// CHECK: movl	%cr2, %eax
-// CHECK:  encoding: [0x0f,0x20,0xd0]
-        movl %cr2,%eax
-
-// CHECK: movl	%cr3, %eax
-// CHECK:  encoding: [0x0f,0x20,0xd8]
-        movl %cr3,%eax
-
-// CHECK: movl	%cr4, %eax
-// CHECK:  encoding: [0x0f,0x20,0xe0]
-        movl %cr4,%eax
-
-// CHECK: movl	%dr0, %eax
-// CHECK:  encoding: [0x0f,0x21,0xc0]
-        movl %dr0,%eax
-
-// CHECK: movl	%dr1, %eax
-// CHECK:  encoding: [0x0f,0x21,0xc8]
-        movl %dr1,%eax
-
-// CHECK: movl	%dr1, %eax
-// CHECK:  encoding: [0x0f,0x21,0xc8]
-        movl %dr1,%eax
-
-// CHECK: movl	%dr2, %eax
-// CHECK:  encoding: [0x0f,0x21,0xd0]
-        movl %dr2,%eax
-
-// CHECK: movl	%dr3, %eax
-// CHECK:  encoding: [0x0f,0x21,0xd8]
-        movl %dr3,%eax
-
-// CHECK: movl	%dr4, %eax
-// CHECK:  encoding: [0x0f,0x21,0xe0]
-        movl %dr4,%eax
-
-// CHECK: movl	%dr5, %eax
-// CHECK:  encoding: [0x0f,0x21,0xe8]
-        movl %dr5,%eax
-
-// CHECK: movl	%dr6, %eax
-// CHECK:  encoding: [0x0f,0x21,0xf0]
-        movl %dr6,%eax
-
-// CHECK: movl	%dr7, %eax
-// CHECK:  encoding: [0x0f,0x21,0xf8]
-        movl %dr7,%eax
-
-// radr://8017522
-// CHECK: wait
-// CHECK:  encoding: [0x9b]
-	fwait
-
-// rdar://7873482
-// CHECK: [0x65,0x8b,0x05,0x7c,0x00,0x00,0x00]
-// FIXME: This is a correct bug poor encoding: Use 65 a1 7c 00 00 00 
-        movl	%gs:124, %eax
-
-// CHECK: pusha
-// CHECK:  encoding: [0x60]
-        	pusha
-
-// CHECK: popa
-// CHECK:  encoding: [0x61]
-        	popa
-
-// CHECK: pushal
-// CHECK:  encoding: [0x60]
-        	pushal
-
-// CHECK: popal
-// CHECK:  encoding: [0x61]
-        	popal
-
-// CHECK: jmpl *8(%eax)
-// CHECK:   encoding: [0xff,0x60,0x08]
-	jmp	*8(%eax)
-
-// PR7465
-// CHECK: lcalll $2, $4660
-// CHECK:   encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00]
-lcalll $0x2, $0x1234
diff --git a/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s
deleted file mode 100644
index 67e82c6cd0d2..000000000000
--- a/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s
+++ /dev/null
@@ -1,42 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
-          vpclmulhqhqdq %xmm12, %xmm10, %xmm11
-
-// CHECK: vpclmulqdq  $17, (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
-          vpclmulhqhqdq (%rax), %xmm10, %xmm13
-
-// CHECK: vpclmulqdq  $1, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01]
-          vpclmulhqlqdq %xmm12, %xmm10, %xmm11
-
-// CHECK: vpclmulqdq  $1, (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01]
-          vpclmulhqlqdq (%rax), %xmm10, %xmm13
-
-// CHECK: vpclmulqdq  $16, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10]
-          vpclmullqhqdq %xmm12, %xmm10, %xmm11
-
-// CHECK: vpclmulqdq  $16, (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10]
-          vpclmullqhqdq (%rax), %xmm10, %xmm13
-
-// CHECK: vpclmulqdq  $0, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00]
-          vpclmullqlqdq %xmm12, %xmm10, %xmm11
-
-// CHECK: vpclmulqdq  $0, (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00]
-          vpclmullqlqdq (%rax), %xmm10, %xmm13
-
-// CHECK: vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
-          vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
-
-// CHECK: vpclmulqdq  $17, (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
-          vpclmulqdq  $17, (%rax), %xmm10, %xmm13
-
diff --git a/test/MC/AsmParser/X86/x86_64-avx-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-encoding.s
deleted file mode 100644
index 7a96bb5a2b48..000000000000
--- a/test/MC/AsmParser/X86/x86_64-avx-encoding.s
+++ /dev/null
@@ -1,3318 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: vaddss  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x32,0x58,0xd0]
-vaddss  %xmm8, %xmm9, %xmm10
-
-// CHECK: vmulss  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x32,0x59,0xd0]
-vmulss  %xmm8, %xmm9, %xmm10
-
-// CHECK: vsubss  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x32,0x5c,0xd0]
-vsubss  %xmm8, %xmm9, %xmm10
-
-// CHECK: vdivss  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x32,0x5e,0xd0]
-vdivss  %xmm8, %xmm9, %xmm10
-
-// CHECK: vaddsd  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x33,0x58,0xd0]
-vaddsd  %xmm8, %xmm9, %xmm10
-
-// CHECK: vmulsd  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x33,0x59,0xd0]
-vmulsd  %xmm8, %xmm9, %xmm10
-
-// CHECK: vsubsd  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x33,0x5c,0xd0]
-vsubsd  %xmm8, %xmm9, %xmm10
-
-// CHECK: vdivsd  %xmm8, %xmm9, %xmm10
-// CHECK:  encoding: [0xc4,0x41,0x33,0x5e,0xd0]
-vdivsd  %xmm8, %xmm9, %xmm10
-
-// CHECK:   vaddss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc]
-vaddss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vsubss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc]
-vsubss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vmulss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc]
-vmulss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vdivss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc]
-vdivss  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vaddsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc]
-vaddsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vsubsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc]
-vsubsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc]
-vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK:   vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK:   encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc]
-vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vaddps  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa]
-vaddps  %xmm10, %xmm11, %xmm15
-
-// CHECK: vsubps  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa]
-vsubps  %xmm10, %xmm11, %xmm15
-
-// CHECK: vmulps  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa]
-vmulps  %xmm10, %xmm11, %xmm15
-
-// CHECK: vdivps  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa]
-vdivps  %xmm10, %xmm11, %xmm15
-
-// CHECK: vaddpd  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa]
-vaddpd  %xmm10, %xmm11, %xmm15
-
-// CHECK: vsubpd  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa]
-vsubpd  %xmm10, %xmm11, %xmm15
-
-// CHECK: vmulpd  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa]
-vmulpd  %xmm10, %xmm11, %xmm15
-
-// CHECK: vdivpd  %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa]
-vdivpd  %xmm10, %xmm11, %xmm15
-
-// CHECK: vaddps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc]
-vaddps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vsubps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc]
-vsubps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmulps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc]
-vmulps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vdivps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc]
-vdivps  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vaddpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc]
-vaddpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vsubpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc]
-vsubpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmulpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc]
-vmulpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc]
-vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmaxss  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2]
-          vmaxss  %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxsd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2]
-          vmaxsd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vminss  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2]
-          vminss  %xmm10, %xmm14, %xmm12
-
-// CHECK: vminsd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2]
-          vminsd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxss  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc]
-          vmaxss  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmaxsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc]
-          vmaxsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminss  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc]
-          vminss  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
-          vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmaxps  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
-          vmaxps  %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxpd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
-          vmaxpd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vminps  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
-          vminps  %xmm10, %xmm14, %xmm12
-
-// CHECK: vminpd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
-          vminpd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
-          vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
-          vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
-          vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
-          vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandps  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
-          vandps  %xmm10, %xmm14, %xmm12
-
-// CHECK: vandpd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
-          vandpd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
-          vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
-          vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vorps  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
-          vorps  %xmm10, %xmm14, %xmm12
-
-// CHECK: vorpd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
-          vorpd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
-          vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
-          vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vxorps  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
-          vxorps  %xmm10, %xmm14, %xmm12
-
-// CHECK: vxorpd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
-          vxorpd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
-          vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
-          vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandnps  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
-          vandnps  %xmm10, %xmm14, %xmm12
-
-// CHECK: vandnpd  %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
-          vandnpd  %xmm10, %xmm14, %xmm12
-
-// CHECK: vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
-          vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
-          vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmovss  -4(%rbx,%rcx,8), %xmm10
-// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
-          vmovss  -4(%rbx,%rcx,8), %xmm10
-
-// CHECK: vmovss  %xmm14, %xmm10, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
-          vmovss  %xmm14, %xmm10, %xmm15
-
-// CHECK: vmovsd  -4(%rbx,%rcx,8), %xmm10
-// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
-          vmovsd  -4(%rbx,%rcx,8), %xmm10
-
-// CHECK: vmovsd  %xmm14, %xmm10, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
-          vmovsd  %xmm14, %xmm10, %xmm15
-
-// CHECK: vunpckhps  %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
-          vunpckhps  %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpckhpd  %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
-          vunpckhpd  %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpcklps  %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
-          vunpcklps  %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpcklpd  %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
-          vunpcklpd  %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
-          vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
-          vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
-          vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
-          vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vcmpps  $0, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
-          vcmpps  $0, %xmm10, %xmm12, %xmm15
-
-// CHECK: vcmpps  $0, (%rax), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
-          vcmpps  $0, (%rax), %xmm12, %xmm15
-
-// CHECK: vcmpps  $7, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
-          vcmpps  $7, %xmm10, %xmm12, %xmm15
-
-// CHECK: vcmppd  $0, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
-          vcmppd  $0, %xmm10, %xmm12, %xmm15
-
-// CHECK: vcmppd  $0, (%rax), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
-          vcmppd  $0, (%rax), %xmm12, %xmm15
-
-// CHECK: vcmppd  $7, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
-          vcmppd  $7, %xmm10, %xmm12, %xmm15
-
-// CHECK: vshufps  $8, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
-          vshufps  $8, %xmm11, %xmm12, %xmm13
-
-// CHECK: vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
-          vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vshufpd  $8, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
-          vshufpd  $8, %xmm11, %xmm12, %xmm13
-
-// CHECK: vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
-          vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
-          vcmpeqps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
-          vcmpleps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
-          vcmpltps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
-          vcmpneqps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
-          vcmpnleps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
-          vcmpnltps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
-          vcmpordps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
-          vcmpunordps   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
-          vcmpeqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
-          vcmpleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
-          vcmpltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
-          vcmpneqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
-          vcmpnleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
-          vcmpnltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordps   -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
-          vcmpunordps   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
-          vcmpeqpd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
-          vcmplepd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
-          vcmpltpd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
-          vcmpneqpd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
-          vcmpnlepd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
-          vcmpnltpd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
-          vcmpordpd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
-          vcmpunordpd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
-          vcmpeqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
-          vcmplepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
-          vcmpltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
-          vcmpneqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
-          vcmpnlepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
-          vcmpnltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
-          vcmpunordpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
-          vcmpeqss   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
-          vcmpless   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
-          vcmpltss   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
-          vcmpneqss   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
-          vcmpnless   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
-          vcmpnltss   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
-          vcmpordss   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
-          vcmpunordss   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
-          vcmpeqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
-          vcmpless   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
-          vcmpltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
-          vcmpneqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
-          vcmpnless   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
-          vcmpnltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordss   -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmpss  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
-          vcmpunordss   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
-          vcmpeqsd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
-          vcmplesd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
-          vcmpltsd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
-          vcmpneqsd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
-          vcmpnlesd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
-          vcmpnltsd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
-          vcmpordsd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
-          vcmpunordsd   %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
-          vcmpeqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
-          vcmplesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
-          vcmpltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
-          vcmpneqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
-          vcmpnlesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
-          vcmpnltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
-          vcmpordsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmpsd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
-          vcmpunordsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vucomiss  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
-          vucomiss  %xmm11, %xmm12
-
-// CHECK: vucomiss  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
-          vucomiss  (%rax), %xmm12
-
-// CHECK: vcomiss  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
-          vcomiss  %xmm11, %xmm12
-
-// CHECK: vcomiss  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
-          vcomiss  (%rax), %xmm12
-
-// CHECK: vucomisd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
-          vucomisd  %xmm11, %xmm12
-
-// CHECK: vucomisd  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
-          vucomisd  (%rax), %xmm12
-
-// CHECK: vcomisd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
-          vcomisd  %xmm11, %xmm12
-
-// CHECK: vcomisd  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
-          vcomisd  (%rax), %xmm12
-
-// CHECK: vcvttss2si  (%rcx), %eax
-// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
-          vcvttss2si  (%rcx), %eax
-
-// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
-          vcvtsi2ss  (%rax), %xmm11, %xmm12
-
-// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
-          vcvtsi2ss  (%rax), %xmm11, %xmm12
-
-// CHECK: vcvttsd2si  (%rcx), %eax
-// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
-          vcvttsd2si  (%rcx), %eax
-
-// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
-          vcvtsi2sd  (%rax), %xmm11, %xmm12
-
-// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
-          vcvtsi2sd  (%rax), %xmm11, %xmm12
-
-// CHECK: vmovaps  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x28,0x20]
-          vmovaps  (%rax), %xmm12
-
-// CHECK: vmovaps  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
-          vmovaps  %xmm11, %xmm12
-
-// CHECK: vmovaps  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x29,0x18]
-          vmovaps  %xmm11, (%rax)
-
-// CHECK: vmovapd  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x28,0x20]
-          vmovapd  (%rax), %xmm12
-
-// CHECK: vmovapd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
-          vmovapd  %xmm11, %xmm12
-
-// CHECK: vmovapd  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x29,0x18]
-          vmovapd  %xmm11, (%rax)
-
-// CHECK: vmovups  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x10,0x20]
-          vmovups  (%rax), %xmm12
-
-// CHECK: vmovups  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
-          vmovups  %xmm11, %xmm12
-
-// CHECK: vmovups  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x11,0x18]
-          vmovups  %xmm11, (%rax)
-
-// CHECK: vmovupd  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x10,0x20]
-          vmovupd  (%rax), %xmm12
-
-// CHECK: vmovupd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
-          vmovupd  %xmm11, %xmm12
-
-// CHECK: vmovupd  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x11,0x18]
-          vmovupd  %xmm11, (%rax)
-
-// CHECK: vmovlps  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x13,0x18]
-          vmovlps  %xmm11, (%rax)
-
-// CHECK: vmovlps  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0x12,0x28]
-          vmovlps  (%rax), %xmm12, %xmm13
-
-// CHECK: vmovlpd  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x13,0x18]
-          vmovlpd  %xmm11, (%rax)
-
-// CHECK: vmovlpd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x12,0x28]
-          vmovlpd  (%rax), %xmm12, %xmm13
-
-// CHECK: vmovhps  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x17,0x18]
-          vmovhps  %xmm11, (%rax)
-
-// CHECK: vmovhps  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0x16,0x28]
-          vmovhps  (%rax), %xmm12, %xmm13
-
-// CHECK: vmovhpd  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x17,0x18]
-          vmovhpd  %xmm11, (%rax)
-
-// CHECK: vmovhpd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x16,0x28]
-          vmovhpd  (%rax), %xmm12, %xmm13
-
-// CHECK: vmovlhps  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
-          vmovlhps  %xmm11, %xmm12, %xmm13
-
-// CHECK: vmovhlps  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
-          vmovhlps  %xmm11, %xmm12, %xmm13
-
-// CHECK: vcvtss2sil  %xmm11, %eax
-// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
-          vcvtss2si  %xmm11, %eax
-
-// CHECK: vcvtss2sil  (%rax), %ebx
-// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
-          vcvtss2si  (%rax), %ebx
-
-// CHECK: vcvtdq2ps  %xmm10, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
-          vcvtdq2ps  %xmm10, %xmm12
-
-// CHECK: vcvtdq2ps  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
-          vcvtdq2ps  (%rax), %xmm12
-
-// CHECK: vcvtsd2ss  %xmm12, %xmm13, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
-          vcvtsd2ss  %xmm12, %xmm13, %xmm10
-
-// CHECK: vcvtsd2ss  (%rax), %xmm13, %xmm10
-// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
-          vcvtsd2ss  (%rax), %xmm13, %xmm10
-
-// CHECK: vcvtps2dq  %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
-          vcvtps2dq  %xmm12, %xmm11
-
-// CHECK: vcvtps2dq  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
-          vcvtps2dq  (%rax), %xmm11
-
-// CHECK: vcvtss2sd  %xmm12, %xmm13, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
-          vcvtss2sd  %xmm12, %xmm13, %xmm10
-
-// CHECK: vcvtss2sd  (%rax), %xmm13, %xmm10
-// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
-          vcvtss2sd  (%rax), %xmm13, %xmm10
-
-// CHECK: vcvtdq2ps  %xmm13, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
-          vcvtdq2ps  %xmm13, %xmm10
-
-// CHECK: vcvtdq2ps  (%ecx), %xmm13
-// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
-          vcvtdq2ps  (%ecx), %xmm13
-
-// CHECK: vcvttps2dq  %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
-          vcvttps2dq  %xmm12, %xmm11
-
-// CHECK: vcvttps2dq  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
-          vcvttps2dq  (%rax), %xmm11
-
-// CHECK: vcvtps2pd  %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
-          vcvtps2pd  %xmm12, %xmm11
-
-// CHECK: vcvtps2pd  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
-          vcvtps2pd  (%rax), %xmm11
-
-// CHECK: vcvtpd2ps  %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
-          vcvtpd2ps  %xmm12, %xmm11
-
-// CHECK: vsqrtpd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
-          vsqrtpd  %xmm11, %xmm12
-
-// CHECK: vsqrtpd  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x51,0x20]
-          vsqrtpd  (%rax), %xmm12
-
-// CHECK: vsqrtps  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
-          vsqrtps  %xmm11, %xmm12
-
-// CHECK: vsqrtps  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x51,0x20]
-          vsqrtps  (%rax), %xmm12
-
-// CHECK: vsqrtsd  %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
-          vsqrtsd  %xmm11, %xmm12, %xmm10
-
-// CHECK: vsqrtsd  (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
-          vsqrtsd  (%rax), %xmm12, %xmm10
-
-// CHECK: vsqrtss  %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
-          vsqrtss  %xmm11, %xmm12, %xmm10
-
-// CHECK: vsqrtss  (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
-          vsqrtss  (%rax), %xmm12, %xmm10
-
-// CHECK: vrsqrtps  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
-          vrsqrtps  %xmm11, %xmm12
-
-// CHECK: vrsqrtps  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x52,0x20]
-          vrsqrtps  (%rax), %xmm12
-
-// CHECK: vrsqrtss  %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
-          vrsqrtss  %xmm11, %xmm12, %xmm10
-
-// CHECK: vrsqrtss  (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
-          vrsqrtss  (%rax), %xmm12, %xmm10
-
-// CHECK: vrcpps  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
-          vrcpps  %xmm11, %xmm12
-
-// CHECK: vrcpps  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x53,0x20]
-          vrcpps  (%rax), %xmm12
-
-// CHECK: vrcpss  %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
-          vrcpss  %xmm11, %xmm12, %xmm10
-
-// CHECK: vrcpss  (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
-          vrcpss  (%rax), %xmm12, %xmm10
-
-// CHECK: vmovntdq  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
-          vmovntdq  %xmm11, (%rax)
-
-// CHECK: vmovntpd  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
-          vmovntpd  %xmm11, (%rax)
-
-// CHECK: vmovntps  %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
-          vmovntps  %xmm11, (%rax)
-
-// CHECK: vldmxcsr  -4(%rip)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
-          vldmxcsr  -4(%rip)
-
-// CHECK: vstmxcsr  -4(%rsp)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
-          vstmxcsr  -4(%rsp)
-
-// CHECK: vpsubb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
-          vpsubb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
-          vpsubb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
-          vpsubw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
-          vpsubw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
-          vpsubd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
-          vpsubd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
-          vpsubq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
-          vpsubq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubsb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
-          vpsubsb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubsb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
-          vpsubsb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
-          vpsubsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
-          vpsubsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubusb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
-          vpsubusb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubusb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
-          vpsubusb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubusw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
-          vpsubusw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubusw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
-          vpsubusw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
-          vpaddb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
-          vpaddb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
-          vpaddw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
-          vpaddw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
-          vpaddd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
-          vpaddd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
-          vpaddq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
-          vpaddq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddsb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
-          vpaddsb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddsb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xec,0x28]
-          vpaddsb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
-          vpaddsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xed,0x28]
-          vpaddsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddusb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
-          vpaddusb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddusb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
-          vpaddusb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddusw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
-          vpaddusw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddusw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
-          vpaddusw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulhuw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
-          vpmulhuw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmulhuw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
-          vpmulhuw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulhw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
-          vpmulhw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmulhw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
-          vpmulhw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmullw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
-          vpmullw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmullw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
-          vpmullw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmuludq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
-          vpmuludq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmuludq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
-          vpmuludq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpavgb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
-          vpavgb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpavgb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
-          vpavgb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpavgw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
-          vpavgw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpavgw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
-          vpavgw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpminsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
-          vpminsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpminsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xea,0x28]
-          vpminsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpminub  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
-          vpminub  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpminub  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xda,0x28]
-          vpminub  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
-          vpmaxsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmaxsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xee,0x28]
-          vpmaxsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxub  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
-          vpmaxub  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmaxub  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xde,0x28]
-          vpmaxub  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsadbw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
-          vpsadbw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsadbw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
-          vpsadbw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsllw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
-          vpsllw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsllw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
-          vpsllw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpslld  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
-          vpslld  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpslld  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
-          vpslld  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsllq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
-          vpsllq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsllq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
-          vpsllq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsraw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
-          vpsraw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsraw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
-          vpsraw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrad  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
-          vpsrad  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrad  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
-          vpsrad  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrlw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
-          vpsrlw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrlw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
-          vpsrlw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrld  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
-          vpsrld  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrld  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
-          vpsrld  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrlq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
-          vpsrlq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrlq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
-          vpsrlq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpslld  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
-          vpslld  $10, %xmm12, %xmm13
-
-// CHECK: vpslldq  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
-          vpslldq  $10, %xmm12, %xmm13
-
-// CHECK: vpsllq  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
-          vpsllq  $10, %xmm12, %xmm13
-
-// CHECK: vpsllw  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
-          vpsllw  $10, %xmm12, %xmm13
-
-// CHECK: vpsrad  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
-          vpsrad  $10, %xmm12, %xmm13
-
-// CHECK: vpsraw  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
-          vpsraw  $10, %xmm12, %xmm13
-
-// CHECK: vpsrld  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
-          vpsrld  $10, %xmm12, %xmm13
-
-// CHECK: vpsrldq  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
-          vpsrldq  $10, %xmm12, %xmm13
-
-// CHECK: vpsrlq  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
-          vpsrlq  $10, %xmm12, %xmm13
-
-// CHECK: vpsrlw  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
-          vpsrlw  $10, %xmm12, %xmm13
-
-// CHECK: vpslld  $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
-          vpslld  $10, %xmm12, %xmm13
-
-// CHECK: vpand  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
-          vpand  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpand  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
-          vpand  (%rax), %xmm12, %xmm13
-
-// CHECK: vpor  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
-          vpor  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpor  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
-          vpor  (%rax), %xmm12, %xmm13
-
-// CHECK: vpxor  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
-          vpxor  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpxor  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xef,0x28]
-          vpxor  (%rax), %xmm12, %xmm13
-
-// CHECK: vpandn  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
-          vpandn  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpandn  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
-          vpandn  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
-          vpcmpeqb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpeqb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x74,0x28]
-          vpcmpeqb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
-          vpcmpeqw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpeqw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x75,0x28]
-          vpcmpeqw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
-          vpcmpeqd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpeqd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x76,0x28]
-          vpcmpeqd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpgtb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
-          vpcmpgtb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpgtb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x64,0x28]
-          vpcmpgtb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpgtw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
-          vpcmpgtw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpgtw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x65,0x28]
-          vpcmpgtw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpgtd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
-          vpcmpgtd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpgtd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x66,0x28]
-          vpcmpgtd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpacksswb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
-          vpacksswb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpacksswb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x63,0x28]
-          vpacksswb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpackssdw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
-          vpackssdw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpackssdw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
-          vpackssdw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpackuswb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
-          vpackuswb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpackuswb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x67,0x28]
-          vpackuswb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpshufd  $4, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
-          vpshufd  $4, %xmm12, %xmm13
-
-// CHECK: vpshufd  $4, (%rax), %xmm13
-// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
-          vpshufd  $4, (%rax), %xmm13
-
-// CHECK: vpshufhw  $4, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
-          vpshufhw  $4, %xmm12, %xmm13
-
-// CHECK: vpshufhw  $4, (%rax), %xmm13
-// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
-          vpshufhw  $4, (%rax), %xmm13
-
-// CHECK: vpshuflw  $4, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
-          vpshuflw  $4, %xmm12, %xmm13
-
-// CHECK: vpshuflw  $4, (%rax), %xmm13
-// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
-          vpshuflw  $4, (%rax), %xmm13
-
-// CHECK: vpunpcklbw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
-          vpunpcklbw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpcklbw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x60,0x28]
-          vpunpcklbw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpcklwd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
-          vpunpcklwd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpcklwd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x61,0x28]
-          vpunpcklwd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckldq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
-          vpunpckldq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckldq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x62,0x28]
-          vpunpckldq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpcklqdq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
-          vpunpcklqdq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpcklqdq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
-          vpunpcklqdq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhbw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
-          vpunpckhbw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhbw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x68,0x28]
-          vpunpckhbw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhwd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
-          vpunpckhwd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhwd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x69,0x28]
-          vpunpckhwd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhdq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
-          vpunpckhdq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhdq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
-          vpunpckhdq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhqdq  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
-          vpunpckhqdq  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhqdq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
-          vpunpckhqdq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
-          vpinsrw  $7, %eax, %xmm12, %xmm13
-
-// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
-          vpinsrw  $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vpextrw  $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
-          vpextrw  $7, %xmm12, %eax
-
-// CHECK: vpmovmskb  %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
-          vpmovmskb  %xmm12, %eax
-
-// CHECK: vmaskmovdqu  %xmm14, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
-          vmaskmovdqu  %xmm14, %xmm15
-
-// CHECK: vmovd  %eax, %xmm14
-// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
-          vmovd  %eax, %xmm14
-
-// CHECK: vmovd  (%rax), %xmm14
-// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
-          vmovd  (%rax), %xmm14
-
-// CHECK: vmovd  %xmm14, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
-          vmovd  %xmm14, (%rax)
-
-// CHECK: vmovd  %rax, %xmm14
-// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
-          vmovd  %rax, %xmm14
-
-// CHECK: vmovq  %xmm14, (%rax)
-// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
-          vmovq  %xmm14, (%rax)
-
-// CHECK: vmovq  %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
-          vmovq  %xmm14, %xmm12
-
-// CHECK: vmovq  (%rax), %xmm14
-// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
-          vmovq  (%rax), %xmm14
-
-// CHECK: vmovq  %rax, %xmm14
-// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
-          vmovq  %rax, %xmm14
-
-// CHECK: vmovq  %xmm14, %rax
-// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
-          vmovq  %xmm14, %rax
-
-// CHECK: vcvtpd2dq  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
-          vcvtpd2dq  %xmm11, %xmm12
-
-// CHECK: vcvtdq2pd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
-          vcvtdq2pd  %xmm11, %xmm12
-
-// CHECK: vcvtdq2pd  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
-          vcvtdq2pd  (%rax), %xmm12
-
-// CHECK: vmovshdup  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
-          vmovshdup  %xmm11, %xmm12
-
-// CHECK: vmovshdup  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
-          vmovshdup  (%rax), %xmm12
-
-// CHECK: vmovsldup  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
-          vmovsldup  %xmm11, %xmm12
-
-// CHECK: vmovsldup  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
-          vmovsldup  (%rax), %xmm12
-
-// CHECK: vmovddup  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
-          vmovddup  %xmm11, %xmm12
-
-// CHECK: vmovddup  (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
-          vmovddup  (%rax), %xmm12
-
-// CHECK: vaddsubps  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
-          vaddsubps  %xmm11, %xmm12, %xmm13
-
-// CHECK: vaddsubps  (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
-          vaddsubps  (%rax), %xmm11, %xmm12
-
-// CHECK: vaddsubpd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
-          vaddsubpd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vaddsubpd  (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
-          vaddsubpd  (%rax), %xmm11, %xmm12
-
-// CHECK: vhaddps  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
-          vhaddps  %xmm11, %xmm12, %xmm13
-
-// CHECK: vhaddps  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
-          vhaddps  (%rax), %xmm12, %xmm13
-
-// CHECK: vhaddpd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
-          vhaddpd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vhaddpd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
-          vhaddpd  (%rax), %xmm12, %xmm13
-
-// CHECK: vhsubps  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
-          vhsubps  %xmm11, %xmm12, %xmm13
-
-// CHECK: vhsubps  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
-          vhsubps  (%rax), %xmm12, %xmm13
-
-// CHECK: vhsubpd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
-          vhsubpd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vhsubpd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
-          vhsubpd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpabsb  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3]
-          vpabsb  %xmm11, %xmm12
-
-// CHECK: vpabsb  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20]
-          vpabsb  (%rax), %xmm12
-
-// CHECK: vpabsw  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3]
-          vpabsw  %xmm11, %xmm12
-
-// CHECK: vpabsw  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20]
-          vpabsw  (%rax), %xmm12
-
-// CHECK: vpabsd  %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3]
-          vpabsd  %xmm11, %xmm12
-
-// CHECK: vpabsd  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20]
-          vpabsd  (%rax), %xmm12
-
-// CHECK: vphaddw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb]
-          vphaddw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vphaddw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28]
-          vphaddw  (%rax), %xmm12, %xmm13
-
-// CHECK: vphaddd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb]
-          vphaddd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vphaddd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28]
-          vphaddd  (%rax), %xmm12, %xmm13
-
-// CHECK: vphaddsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb]
-          vphaddsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vphaddsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28]
-          vphaddsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vphsubw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb]
-          vphsubw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vphsubw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28]
-          vphsubw  (%rax), %xmm12, %xmm13
-
-// CHECK: vphsubd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb]
-          vphsubd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vphsubd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28]
-          vphsubd  (%rax), %xmm12, %xmm13
-
-// CHECK: vphsubsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb]
-          vphsubsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vphsubsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28]
-          vphsubsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaddubsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb]
-          vpmaddubsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmaddubsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28]
-          vpmaddubsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpshufb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb]
-          vpshufb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpshufb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28]
-          vpshufb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsignb  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb]
-          vpsignb  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsignb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28]
-          vpsignb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsignw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb]
-          vpsignw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsignw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28]
-          vpsignw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpsignd  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb]
-          vpsignd  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsignd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28]
-          vpsignd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulhrsw  %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb]
-          vpmulhrsw  %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmulhrsw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28]
-          vpmulhrsw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpalignr  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07]
-          vpalignr  $7, %xmm11, %xmm12, %xmm13
-
-// CHECK: vpalignr  $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07]
-          vpalignr  $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vroundsd  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07]
-          vroundsd  $7, %xmm11, %xmm12, %xmm13
-
-// CHECK: vroundsd  $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07]
-          vroundsd  $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vroundss  $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07]
-          vroundss  $7, %xmm11, %xmm12, %xmm13
-
-// CHECK: vroundss  $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07]
-          vroundss  $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vroundpd  $7, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07]
-          vroundpd  $7, %xmm12, %xmm13
-
-// CHECK: vroundpd  $7, (%rax), %xmm13
-// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07]
-          vroundpd  $7, (%rax), %xmm13
-
-// CHECK: vroundps  $7, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07]
-          vroundps  $7, %xmm12, %xmm13
-
-// CHECK: vroundps  $7, (%rax), %xmm13
-// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07]
-          vroundps  $7, (%rax), %xmm13
-
-// CHECK: vphminposuw  %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec]
-          vphminposuw  %xmm12, %xmm13
-
-// CHECK: vphminposuw  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20]
-          vphminposuw  (%rax), %xmm12
-
-// CHECK: vpackusdw  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc]
-          vpackusdw  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpackusdw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28]
-          vpackusdw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqq  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc]
-          vpcmpeqq  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpcmpeqq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28]
-          vpcmpeqq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpminsb  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc]
-          vpminsb  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminsb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28]
-          vpminsb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpminsd  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc]
-          vpminsd  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminsd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28]
-          vpminsd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpminud  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc]
-          vpminud  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminud  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28]
-          vpminud  (%rax), %xmm12, %xmm13
-
-// CHECK: vpminuw  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc]
-          vpminuw  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminuw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28]
-          vpminuw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxsb  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc]
-          vpmaxsb  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxsb  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28]
-          vpmaxsb  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxsd  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc]
-          vpmaxsd  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxsd  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28]
-          vpmaxsd  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxud  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc]
-          vpmaxud  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxud  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28]
-          vpmaxud  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxuw  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc]
-          vpmaxuw  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxuw  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28]
-          vpmaxuw  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmuldq  %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc]
-          vpmuldq  %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmuldq  (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28]
-          vpmuldq  (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulld  %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc]
-          vpmulld  %xmm12, %xmm5, %xmm11
-
-// CHECK: vpmulld  (%rax), %xmm5, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28]
-          vpmulld  (%rax), %xmm5, %xmm13
-
-// CHECK: vblendps  $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03]
-          vblendps  $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vblendps  $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03]
-          vblendps  $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vblendpd  $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03]
-          vblendpd  $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vblendpd  $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03]
-          vblendpd  $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vpblendw  $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03]
-          vpblendw  $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vpblendw  $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03]
-          vpblendw  $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vmpsadbw  $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03]
-          vmpsadbw  $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vmpsadbw  $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03]
-          vmpsadbw  $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vdpps  $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03]
-          vdpps  $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vdpps  $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03]
-          vdpps  $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vdppd  $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03]
-          vdppd  $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vdppd  $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03]
-          vdppd  $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0]
-          vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
-
-// CHECK: vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0]
-          vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
-
-// CHECK: vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0]
-          vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
-
-// CHECK: vblendvps  %xmm12, (%rax), %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0]
-          vblendvps  %xmm12, (%rax), %xmm11, %xmm13
-
-// CHECK: vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0]
-          vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
-
-// CHECK: vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0]
-          vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
-
-// CHECK: vpmovsxbw  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4]
-          vpmovsxbw  %xmm12, %xmm10
-
-// CHECK: vpmovsxbw  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20]
-          vpmovsxbw  (%rax), %xmm12
-
-// CHECK: vpmovsxwd  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4]
-          vpmovsxwd  %xmm12, %xmm10
-
-// CHECK: vpmovsxwd  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20]
-          vpmovsxwd  (%rax), %xmm12
-
-// CHECK: vpmovsxdq  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4]
-          vpmovsxdq  %xmm12, %xmm10
-
-// CHECK: vpmovsxdq  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20]
-          vpmovsxdq  (%rax), %xmm12
-
-// CHECK: vpmovzxbw  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4]
-          vpmovzxbw  %xmm12, %xmm10
-
-// CHECK: vpmovzxbw  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20]
-          vpmovzxbw  (%rax), %xmm12
-
-// CHECK: vpmovzxwd  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4]
-          vpmovzxwd  %xmm12, %xmm10
-
-// CHECK: vpmovzxwd  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20]
-          vpmovzxwd  (%rax), %xmm12
-
-// CHECK: vpmovzxdq  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4]
-          vpmovzxdq  %xmm12, %xmm10
-
-// CHECK: vpmovzxdq  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20]
-          vpmovzxdq  (%rax), %xmm12
-
-// CHECK: vpmovsxbq  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4]
-          vpmovsxbq  %xmm12, %xmm10
-
-// CHECK: vpmovsxbq  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20]
-          vpmovsxbq  (%rax), %xmm12
-
-// CHECK: vpmovzxbq  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4]
-          vpmovzxbq  %xmm12, %xmm10
-
-// CHECK: vpmovzxbq  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20]
-          vpmovzxbq  (%rax), %xmm12
-
-// CHECK: vpmovsxbd  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4]
-          vpmovsxbd  %xmm12, %xmm10
-
-// CHECK: vpmovsxbd  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20]
-          vpmovsxbd  (%rax), %xmm12
-
-// CHECK: vpmovsxwq  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4]
-          vpmovsxwq  %xmm12, %xmm10
-
-// CHECK: vpmovsxwq  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20]
-          vpmovsxwq  (%rax), %xmm12
-
-// CHECK: vpmovzxbd  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4]
-          vpmovzxbd  %xmm12, %xmm10
-
-// CHECK: vpmovzxbd  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20]
-          vpmovzxbd  (%rax), %xmm12
-
-// CHECK: vpmovzxwq  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4]
-          vpmovzxwq  %xmm12, %xmm10
-
-// CHECK: vpmovzxwq  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20]
-          vpmovzxwq  (%rax), %xmm12
-
-// CHECK: vpextrw  $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
-          vpextrw  $7, %xmm12, %eax
-
-// CHECK: vpextrw  $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07]
-          vpextrw  $7, %xmm12, (%rax)
-
-// CHECK: vpextrd  $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07]
-          vpextrd  $7, %xmm12, %eax
-
-// CHECK: vpextrd  $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07]
-          vpextrd  $7, %xmm12, (%rax)
-
-// CHECK: vpextrb  $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07]
-          vpextrb  $7, %xmm12, %eax
-
-// CHECK: vpextrb  $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07]
-          vpextrb  $7, %xmm12, (%rax)
-
-// CHECK: vpextrq  $7, %xmm12, %rcx
-// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07]
-          vpextrq  $7, %xmm12, %rcx
-
-// CHECK: vpextrq  $7, %xmm12, (%rcx)
-// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07]
-          vpextrq  $7, %xmm12, (%rcx)
-
-// CHECK: vextractps  $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07]
-          vextractps  $7, %xmm12, (%rax)
-
-// CHECK: vextractps  $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07]
-          vextractps  $7, %xmm12, %eax
-
-// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07]
-          vpinsrw  $7, %eax, %xmm12, %xmm10
-
-// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07]
-          vpinsrw  $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vpinsrb  $7, %eax, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07]
-          vpinsrb  $7, %eax, %xmm12, %xmm10
-
-// CHECK: vpinsrb  $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07]
-          vpinsrb  $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vpinsrd  $7, %eax, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07]
-          vpinsrd  $7, %eax, %xmm12, %xmm10
-
-// CHECK: vpinsrd  $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07]
-          vpinsrd  $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vpinsrq  $7, %rax, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07]
-          vpinsrq  $7, %rax, %xmm12, %xmm10
-
-// CHECK: vpinsrq  $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
-          vpinsrq  $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vinsertps  $7, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07]
-          vinsertps  $7, %xmm12, %xmm10, %xmm11
-
-// CHECK: vinsertps  $7, (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07]
-          vinsertps  $7, (%rax), %xmm10, %xmm11
-
-// CHECK: vptest  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4]
-          vptest  %xmm12, %xmm10
-
-// CHECK: vptest  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20]
-          vptest  (%rax), %xmm12
-
-// CHECK: vmovntdqa  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20]
-          vmovntdqa  (%rax), %xmm12
-
-// CHECK: vpcmpgtq  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc]
-          vpcmpgtq  %xmm12, %xmm10, %xmm11
-
-// CHECK: vpcmpgtq  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28]
-          vpcmpgtq  (%rax), %xmm10, %xmm13
-
-// CHECK: vpcmpistrm  $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07]
-          vpcmpistrm  $7, %xmm12, %xmm10
-
-// CHECK: vpcmpistrm  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07]
-          vpcmpistrm  $7, (%rax), %xmm10
-
-// CHECK: vpcmpestrm  $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07]
-          vpcmpestrm  $7, %xmm12, %xmm10
-
-// CHECK: vpcmpestrm  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07]
-          vpcmpestrm  $7, (%rax), %xmm10
-
-// CHECK: vpcmpistri  $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07]
-          vpcmpistri  $7, %xmm12, %xmm10
-
-// CHECK: vpcmpistri  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07]
-          vpcmpistri  $7, (%rax), %xmm10
-
-// CHECK: vpcmpestri  $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07]
-          vpcmpestri  $7, %xmm12, %xmm10
-
-// CHECK: vpcmpestri  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07]
-          vpcmpestri  $7, (%rax), %xmm10
-
-// CHECK: vaesimc  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4]
-          vaesimc  %xmm12, %xmm10
-
-// CHECK: vaesimc  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20]
-          vaesimc  (%rax), %xmm12
-
-// CHECK: vaesenc  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc]
-          vaesenc  %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesenc  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28]
-          vaesenc  (%rax), %xmm10, %xmm13
-
-// CHECK: vaesenclast  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc]
-          vaesenclast  %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesenclast  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28]
-          vaesenclast  (%rax), %xmm10, %xmm13
-
-// CHECK: vaesdec  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc]
-          vaesdec  %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesdec  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28]
-          vaesdec  (%rax), %xmm10, %xmm13
-
-// CHECK: vaesdeclast  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc]
-          vaesdeclast  %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesdeclast  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28]
-          vaesdeclast  (%rax), %xmm10, %xmm13
-
-// CHECK: vaeskeygenassist  $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07]
-          vaeskeygenassist  $7, %xmm12, %xmm10
-
-// CHECK: vaeskeygenassist  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07]
-          vaeskeygenassist  $7, (%rax), %xmm10
-
-// CHECK: vcmpps  $8, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
-          vcmpeq_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $9, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
-          vcmpngeps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $10, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
-          vcmpngtps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $11, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
-          vcmpfalseps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $12, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
-          vcmpneq_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $13, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
-          vcmpgeps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $14, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
-          vcmpgtps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $15, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
-          vcmptrueps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $16, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
-          vcmpeq_osps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $17, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
-          vcmplt_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $18, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
-          vcmple_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $19, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
-          vcmpunord_sps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $20, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
-          vcmpneq_usps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $21, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
-          vcmpnlt_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $22, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
-          vcmpnle_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $23, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
-          vcmpord_sps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $24, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
-          vcmpeq_usps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $25, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
-          vcmpnge_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $26, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
-          vcmpngt_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $27, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
-          vcmpfalse_osps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $28, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
-          vcmpneq_osps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $29, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
-          vcmpge_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $30, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
-          vcmpgt_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps  $31, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
-          vcmptrue_usps %xmm11, %xmm12, %xmm13
-
-// CHECK: vmovaps  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x28,0x20]
-          vmovaps  (%rax), %ymm12
-
-// CHECK: vmovaps  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3]
-          vmovaps  %ymm11, %ymm12
-
-// CHECK: vmovaps  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7c,0x29,0x18]
-          vmovaps  %ymm11, (%rax)
-
-// CHECK: vmovapd  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x28,0x20]
-          vmovapd  (%rax), %ymm12
-
-// CHECK: vmovapd  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3]
-          vmovapd  %ymm11, %ymm12
-
-// CHECK: vmovapd  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x29,0x18]
-          vmovapd  %ymm11, (%rax)
-
-// CHECK: vmovups  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x10,0x20]
-          vmovups  (%rax), %ymm12
-
-// CHECK: vmovups  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3]
-          vmovups  %ymm11, %ymm12
-
-// CHECK: vmovups  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7c,0x11,0x18]
-          vmovups  %ymm11, (%rax)
-
-// CHECK: vmovupd  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x10,0x20]
-          vmovupd  (%rax), %ymm12
-
-// CHECK: vmovupd  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3]
-          vmovupd  %ymm11, %ymm12
-
-// CHECK: vmovupd  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
-          vmovupd  %ymm11, (%rax)
-
-// CHECK: vunpckhps  %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3]
-          vunpckhps  %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpckhpd  %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3]
-          vunpckhpd  %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpcklps  %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3]
-          vunpcklps  %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpcklpd  %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3]
-          vunpcklpd  %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpckhps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc]
-          vunpckhps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc]
-          vunpckhpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vunpcklps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc]
-          vunpcklps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc]
-          vunpcklpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vmovntdq  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0xe7,0x18]
-          vmovntdq  %ymm11, (%rax)
-
-// CHECK: vmovntpd  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x2b,0x18]
-          vmovntpd  %ymm11, (%rax)
-
-// CHECK: vmovntps  %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7c,0x2b,0x18]
-          vmovntps  %ymm11, (%rax)
-
-// CHECK: vmovmskps  %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4]
-          vmovmskps  %xmm12, %eax
-
-// CHECK: vmovmskpd  %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4]
-          vmovmskpd  %xmm12, %eax
-
-// CHECK: vmaxps  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4]
-          vmaxps  %ymm12, %ymm4, %ymm6
-
-// CHECK: vmaxpd  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4]
-          vmaxpd  %ymm12, %ymm4, %ymm6
-
-// CHECK: vminps  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4]
-          vminps  %ymm12, %ymm4, %ymm6
-
-// CHECK: vminpd  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4]
-          vminpd  %ymm12, %ymm4, %ymm6
-
-// CHECK: vsubps  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4]
-          vsubps  %ymm12, %ymm4, %ymm6
-
-// CHECK: vsubpd  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4]
-          vsubpd  %ymm12, %ymm4, %ymm6
-
-// CHECK: vdivps  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4]
-          vdivps  %ymm12, %ymm4, %ymm6
-
-// CHECK: vdivpd  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
-          vdivpd  %ymm12, %ymm4, %ymm6
-
-// CHECK: vaddps  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
-          vaddps  %ymm12, %ymm4, %ymm6
-
-// CHECK: vaddpd  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
-          vaddpd  %ymm12, %ymm4, %ymm6
-
-// CHECK: vmulps  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
-          vmulps  %ymm12, %ymm4, %ymm6
-
-// CHECK: vmulpd  %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
-          vmulpd  %ymm12, %ymm4, %ymm6
-
-// CHECK: vmaxps  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
-          vmaxps  (%rax), %ymm4, %ymm6
-
-// CHECK: vmaxpd  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
-          vmaxpd  (%rax), %ymm4, %ymm6
-
-// CHECK: vminps  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
-          vminps  (%rax), %ymm4, %ymm6
-
-// CHECK: vminpd  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
-          vminpd  (%rax), %ymm4, %ymm6
-
-// CHECK: vsubps  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
-          vsubps  (%rax), %ymm4, %ymm6
-
-// CHECK: vsubpd  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
-          vsubpd  (%rax), %ymm4, %ymm6
-
-// CHECK: vdivps  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
-          vdivps  (%rax), %ymm4, %ymm6
-
-// CHECK: vdivpd  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
-          vdivpd  (%rax), %ymm4, %ymm6
-
-// CHECK: vaddps  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
-          vaddps  (%rax), %ymm4, %ymm6
-
-// CHECK: vaddpd  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
-          vaddpd  (%rax), %ymm4, %ymm6
-
-// CHECK: vmulps  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
-          vmulps  (%rax), %ymm4, %ymm6
-
-// CHECK: vmulpd  (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
-          vmulpd  (%rax), %ymm4, %ymm6
-
-// CHECK: vsqrtpd  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3]
-          vsqrtpd  %ymm11, %ymm12
-
-// CHECK: vsqrtpd  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x51,0x20]
-          vsqrtpd  (%rax), %ymm12
-
-// CHECK: vsqrtps  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3]
-          vsqrtps  %ymm11, %ymm12
-
-// CHECK: vsqrtps  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x51,0x20]
-          vsqrtps  (%rax), %ymm12
-
-// CHECK: vrsqrtps  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3]
-          vrsqrtps  %ymm11, %ymm12
-
-// CHECK: vrsqrtps  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x52,0x20]
-          vrsqrtps  (%rax), %ymm12
-
-// CHECK: vrcpps  %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3]
-          vrcpps  %ymm11, %ymm12
-
-// CHECK: vrcpps  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x53,0x20]
-          vrcpps  (%rax), %ymm12
-
-// CHECK: vandps  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc]
-          vandps  %ymm12, %ymm14, %ymm11
-
-// CHECK: vandpd  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc]
-          vandpd  %ymm12, %ymm14, %ymm11
-
-// CHECK: vandps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc]
-          vandps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vandpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc]
-          vandpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vorps  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc]
-          vorps  %ymm12, %ymm14, %ymm11
-
-// CHECK: vorpd  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc]
-          vorpd  %ymm12, %ymm14, %ymm11
-
-// CHECK: vorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc]
-          vorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc]
-          vorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vxorps  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc]
-          vxorps  %ymm12, %ymm14, %ymm11
-
-// CHECK: vxorpd  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc]
-          vxorpd  %ymm12, %ymm14, %ymm11
-
-// CHECK: vxorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc]
-          vxorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vxorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc]
-          vxorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vandnps  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc]
-          vandnps  %ymm12, %ymm14, %ymm11
-
-// CHECK: vandnpd  %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc]
-          vandnpd  %ymm12, %ymm14, %ymm11
-
-// CHECK: vandnps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc]
-          vandnps  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc]
-          vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vcvtps2pd  %xmm13, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5]
-          vcvtps2pd  %xmm13, %ymm12
-
-// CHECK: vcvtps2pd  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x5a,0x20]
-          vcvtps2pd  (%rax), %ymm12
-
-// CHECK: vcvtdq2pd  %xmm13, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5]
-          vcvtdq2pd  %xmm13, %ymm12
-
-// CHECK: vcvtdq2pd  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7e,0xe6,0x20]
-          vcvtdq2pd  (%rax), %ymm12
-
-// CHECK: vcvtdq2ps  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4]
-          vcvtdq2ps  %ymm12, %ymm10
-
-// CHECK: vcvtdq2ps  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x5b,0x20]
-          vcvtdq2ps  (%rax), %ymm12
-
-// CHECK: vcvtps2dq  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4]
-          vcvtps2dq  %ymm12, %ymm10
-
-// CHECK: vcvtps2dq  (%rax), %ymm10
-// CHECK: encoding: [0xc5,0x7d,0x5b,0x10]
-          vcvtps2dq  (%rax), %ymm10
-
-// CHECK: vcvttps2dq  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4]
-          vcvttps2dq  %ymm12, %ymm10
-
-// CHECK: vcvttps2dq  (%rax), %ymm10
-// CHECK: encoding: [0xc5,0x7e,0x5b,0x10]
-          vcvttps2dq  (%rax), %ymm10
-
-// CHECK: vcvttpd2dq  %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
-          vcvttpd2dq  %xmm11, %xmm10
-
-// CHECK: vcvttpd2dq  %ymm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
-          vcvttpd2dq  %ymm12, %xmm10
-
-// CHECK: vcvttpd2dqx  %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
-          vcvttpd2dqx  %xmm11, %xmm10
-
-// CHECK: vcvttpd2dqx  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x79,0xe6,0x18]
-          vcvttpd2dqx  (%rax), %xmm11
-
-// CHECK: vcvttpd2dqy  %ymm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc]
-          vcvttpd2dqy  %ymm12, %xmm11
-
-// CHECK: vcvttpd2dqy  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
-          vcvttpd2dqy  (%rax), %xmm11
-
-// CHECK: vcvtpd2ps  %ymm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
-          vcvtpd2ps  %ymm12, %xmm10
-
-// CHECK: vcvtpd2psx  %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
-          vcvtpd2psx  %xmm11, %xmm10
-
-// CHECK: vcvtpd2psx  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x5a,0x18]
-          vcvtpd2psx  (%rax), %xmm11
-
-// CHECK: vcvtpd2psy  %ymm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc]
-          vcvtpd2psy  %ymm12, %xmm11
-
-// CHECK: vcvtpd2psy  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
-          vcvtpd2psy  (%rax), %xmm11
-
-// CHECK: vcvtpd2dq  %ymm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
-          vcvtpd2dq  %ymm12, %xmm10
-
-// CHECK: vcvtpd2dqy  %ymm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc]
-          vcvtpd2dqy  %ymm12, %xmm11
-
-// CHECK: vcvtpd2dqy  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
-          vcvtpd2dqy  (%rax), %xmm11
-
-// CHECK: vcvtpd2dqx  %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
-          vcvtpd2dqx  %xmm11, %xmm10
-
-// CHECK: vcvtpd2dqx  (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7b,0xe6,0x18]
-          vcvtpd2dqx  (%rax), %xmm11
-
-// CHECK: vcmpps  $0, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00]
-          vcmpeqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $2, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02]
-          vcmpleps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $1, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01]
-          vcmpltps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $4, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04]
-          vcmpneqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $6, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06]
-          vcmpnleps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $5, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05]
-          vcmpnltps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $7, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07]
-          vcmpordps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $3, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03]
-          vcmpunordps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00]
-          vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02]
-          vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01]
-          vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04]
-          vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06]
-          vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05]
-          vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
-// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07]
-          vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12
-
-// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03]
-          vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $0, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00]
-          vcmpeqpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $2, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02]
-          vcmplepd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $1, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01]
-          vcmpltpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $4, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04]
-          vcmpneqpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $6, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06]
-          vcmpnlepd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $5, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05]
-          vcmpnltpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $7, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07]
-          vcmpordpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $3, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03]
-          vcmpunordpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00]
-          vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02]
-          vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01]
-          vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04]
-          vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06]
-          vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05]
-          vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
-// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07]
-          vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12
-
-// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03]
-          vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps  $8, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08]
-          vcmpeq_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $9, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09]
-          vcmpngeps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $10, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a]
-          vcmpngtps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $11, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b]
-          vcmpfalseps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $12, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c]
-          vcmpneq_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $13, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d]
-          vcmpgeps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $14, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e]
-          vcmpgtps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $15, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f]
-          vcmptrueps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $16, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10]
-          vcmpeq_osps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $17, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11]
-          vcmplt_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $18, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12]
-          vcmple_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $19, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13]
-          vcmpunord_sps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $20, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14]
-          vcmpneq_usps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $21, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15]
-          vcmpnlt_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $22, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16]
-          vcmpnle_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $23, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17]
-          vcmpord_sps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $24, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18]
-          vcmpeq_usps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $25, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19]
-          vcmpnge_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $26, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a]
-          vcmpngt_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $27, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b]
-          vcmpfalse_osps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $28, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c]
-          vcmpneq_osps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $29, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d]
-          vcmpge_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $30, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e]
-          vcmpgt_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps  $31, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
-          vcmptrue_usps %ymm11, %ymm12, %ymm13
-
-// CHECK: vaddsubps  %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb]
-          vaddsubps  %ymm11, %ymm12, %ymm13
-
-// CHECK: vaddsubps  (%rax), %ymm11, %ymm12
-// CHECK: encoding: [0xc5,0x27,0xd0,0x20]
-          vaddsubps  (%rax), %ymm11, %ymm12
-
-// CHECK: vaddsubpd  %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb]
-          vaddsubpd  %ymm11, %ymm12, %ymm13
-
-// CHECK: vaddsubpd  (%rax), %ymm11, %ymm12
-// CHECK: encoding: [0xc5,0x25,0xd0,0x20]
-          vaddsubpd  (%rax), %ymm11, %ymm12
-
-// CHECK: vhaddps  %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb]
-          vhaddps  %ymm11, %ymm12, %ymm13
-
-// CHECK: vhaddps  (%rax), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1f,0x7c,0x28]
-          vhaddps  (%rax), %ymm12, %ymm13
-
-// CHECK: vhaddpd  %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb]
-          vhaddpd  %ymm11, %ymm12, %ymm13
-
-// CHECK: vhaddpd  (%rax), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0x7c,0x28]
-          vhaddpd  (%rax), %ymm12, %ymm13
-
-// CHECK: vhsubps  %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb]
-          vhsubps  %ymm11, %ymm12, %ymm13
-
-// CHECK: vhsubps  (%rax), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1f,0x7d,0x28]
-          vhsubps  (%rax), %ymm12, %ymm13
-
-// CHECK: vhsubpd  %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb]
-          vhsubpd  %ymm11, %ymm12, %ymm13
-
-// CHECK: vhsubpd  (%rax), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0x7d,0x28]
-          vhsubpd  (%rax), %ymm12, %ymm13
-
-// CHECK: vblendps  $3, %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03]
-          vblendps  $3, %ymm12, %ymm10, %ymm11
-
-// CHECK: vblendps  $3, (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03]
-          vblendps  $3, (%rax), %ymm10, %ymm11
-
-// CHECK: vblendpd  $3, %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03]
-          vblendpd  $3, %ymm12, %ymm10, %ymm11
-
-// CHECK: vblendpd  $3, (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03]
-          vblendpd  $3, (%rax), %ymm10, %ymm11
-
-// CHECK: vdpps  $3, %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03]
-          vdpps  $3, %ymm12, %ymm10, %ymm11
-
-// CHECK: vdpps  $3, (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03]
-          vdpps  $3, (%rax), %ymm10, %ymm11
-
-// CHECK: vbroadcastf128  (%rax), %ymm12
-// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20]
-          vbroadcastf128  (%rax), %ymm12
-
-// CHECK: vbroadcastsd  (%rax), %ymm12
-// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20]
-          vbroadcastsd  (%rax), %ymm12
-
-// CHECK: vbroadcastss  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20]
-          vbroadcastss  (%rax), %xmm12
-
-// CHECK: vbroadcastss  (%rax), %ymm12
-// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20]
-          vbroadcastss  (%rax), %ymm12
-
-// CHECK: vinsertf128  $7, %xmm12, %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07]
-          vinsertf128  $7, %xmm12, %ymm12, %ymm10
-
-// CHECK: vinsertf128  $7, (%rax), %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07]
-          vinsertf128  $7, (%rax), %ymm12, %ymm10
-
-// CHECK: vextractf128  $7, %ymm12, %xmm12
-// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07]
-          vextractf128  $7, %ymm12, %xmm12
-
-// CHECK: vextractf128  $7, %ymm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07]
-          vextractf128  $7, %ymm12, (%rax)
-
-// CHECK: vmaskmovpd  %xmm12, %xmm10, (%rax)
-// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20]
-          vmaskmovpd  %xmm12, %xmm10, (%rax)
-
-// CHECK: vmaskmovpd  %ymm12, %ymm10, (%rax)
-// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20]
-          vmaskmovpd  %ymm12, %ymm10, (%rax)
-
-// CHECK: vmaskmovpd  (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10]
-          vmaskmovpd  (%rax), %xmm12, %xmm10
-
-// CHECK: vmaskmovpd  (%rax), %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10]
-          vmaskmovpd  (%rax), %ymm12, %ymm10
-
-// CHECK: vmaskmovps  %xmm12, %xmm10, (%rax)
-// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20]
-          vmaskmovps  %xmm12, %xmm10, (%rax)
-
-// CHECK: vmaskmovps  %ymm12, %ymm10, (%rax)
-// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20]
-          vmaskmovps  %ymm12, %ymm10, (%rax)
-
-// CHECK: vmaskmovps  (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10]
-          vmaskmovps  (%rax), %xmm12, %xmm10
-
-// CHECK: vmaskmovps  (%rax), %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10]
-          vmaskmovps  (%rax), %ymm12, %ymm10
-
-// CHECK: vpermilps  $7, %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07]
-          vpermilps  $7, %xmm11, %xmm10
-
-// CHECK: vpermilps  $7, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07]
-          vpermilps  $7, %ymm10, %ymm11
-
-// CHECK: vpermilps  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07]
-          vpermilps  $7, (%rax), %xmm10
-
-// CHECK: vpermilps  $7, (%rax), %ymm10
-// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07]
-          vpermilps  $7, (%rax), %ymm10
-
-// CHECK: vpermilps  %xmm11, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb]
-          vpermilps  %xmm11, %xmm10, %xmm11
-
-// CHECK: vpermilps  %ymm11, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb]
-          vpermilps  %ymm11, %ymm10, %ymm11
-
-// CHECK: vpermilps  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28]
-          vpermilps  (%rax), %xmm10, %xmm13
-
-// CHECK: vpermilps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18]
-          vpermilps  (%rax), %ymm10, %ymm11
-
-// CHECK: vpermilpd  $7, %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07]
-          vpermilpd  $7, %xmm11, %xmm10
-
-// CHECK: vpermilpd  $7, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07]
-          vpermilpd  $7, %ymm10, %ymm11
-
-// CHECK: vpermilpd  $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07]
-          vpermilpd  $7, (%rax), %xmm10
-
-// CHECK: vpermilpd  $7, (%rax), %ymm10
-// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07]
-          vpermilpd  $7, (%rax), %ymm10
-
-// CHECK: vpermilpd  %xmm11, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb]
-          vpermilpd  %xmm11, %xmm10, %xmm11
-
-// CHECK: vpermilpd  %ymm11, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb]
-          vpermilpd  %ymm11, %ymm10, %ymm11
-
-// CHECK: vpermilpd  (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28]
-          vpermilpd  (%rax), %xmm10, %xmm13
-
-// CHECK: vpermilpd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18]
-          vpermilpd  (%rax), %ymm10, %ymm11
-
-// CHECK: vperm2f128  $7, %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07]
-          vperm2f128  $7, %ymm12, %ymm10, %ymm11
-
-// CHECK: vperm2f128  $7, (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
-          vperm2f128  $7, (%rax), %ymm10, %ymm11
-
-// CHECK: vcvtsd2si  %xmm8, %r8d
-// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
-          vcvtsd2si  %xmm8, %r8d
-
-// CHECK: vcvtsd2si  (%rcx), %ecx
-// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2si  (%rcx), %ecx
-
-// CHECK: vcvtss2si  %xmm4, %rcx
-// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
-          vcvtss2si  %xmm4, %rcx
-
-// CHECK: vcvtss2si  (%rcx), %r8
-// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
-          vcvtss2si  (%rcx), %r8
-
-// CHECK: vcvtsi2sdl  %r8d, %xmm8, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
-          vcvtsi2sdl  %r8d, %xmm8, %xmm15
-
-// CHECK: vcvtsi2sdl  (%rbp), %xmm8, %xmm15
-// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
-          vcvtsi2sdl  (%rbp), %xmm8, %xmm15
-
-// CHECK: vcvtsi2sdq  %rcx, %xmm4, %xmm6
-// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
-          vcvtsi2sdq  %rcx, %xmm4, %xmm6
-
-// CHECK: vcvtsi2sdq  (%rcx), %xmm4, %xmm6
-// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31]
-          vcvtsi2sdq  (%rcx), %xmm4, %xmm6
-
-// CHECK: vcvtsi2ssq  %rcx, %xmm4, %xmm6
-// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1]
-          vcvtsi2ssq  %rcx, %xmm4, %xmm6
-
-// CHECK: vcvtsi2ssq  (%rcx), %xmm4, %xmm6
-// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
-          vcvtsi2ssq  (%rcx), %xmm4, %xmm6
-
-// CHECK: vcvttsd2si  %xmm4, %rcx
-// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
-          vcvttsd2si  %xmm4, %rcx
-
-// CHECK: vcvttsd2si  (%rcx), %rcx
-// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
-          vcvttsd2si  (%rcx), %rcx
-
-// CHECK: vcvttss2si  %xmm4, %rcx
-// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
-          vcvttss2si  %xmm4, %rcx
-
-// CHECK: vcvttss2si  (%rcx), %rcx
-// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
-          vcvttss2si  (%rcx), %rcx
-
-// CHECK: vlddqu  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
-          vlddqu  (%rax), %ymm12
-
-// CHECK: vmovddup  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4]
-          vmovddup  %ymm12, %ymm10
-
-// CHECK: vmovddup  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7f,0x12,0x20]
-          vmovddup  (%rax), %ymm12
-
-// CHECK: vmovdqa  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4]
-          vmovdqa  %ymm12, %ymm10
-
-// CHECK: vmovdqa  %ymm12, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x7f,0x20]
-          vmovdqa  %ymm12, (%rax)
-
-// CHECK: vmovdqa  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x6f,0x20]
-          vmovdqa  (%rax), %ymm12
-
-// CHECK: vmovdqu  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4]
-          vmovdqu  %ymm12, %ymm10
-
-// CHECK: vmovdqu  %ymm12, (%rax)
-// CHECK: encoding: [0xc5,0x7e,0x7f,0x20]
-          vmovdqu  %ymm12, (%rax)
-
-// CHECK: vmovdqu  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7e,0x6f,0x20]
-          vmovdqu  (%rax), %ymm12
-
-// CHECK: vmovshdup  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4]
-          vmovshdup  %ymm12, %ymm10
-
-// CHECK: vmovshdup  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7e,0x16,0x20]
-          vmovshdup  (%rax), %ymm12
-
-// CHECK: vmovsldup  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4]
-          vmovsldup  %ymm12, %ymm10
-
-// CHECK: vmovsldup  (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7e,0x12,0x20]
-          vmovsldup  (%rax), %ymm12
-
-// CHECK: vptest  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4]
-          vptest  %ymm12, %ymm10
-
-// CHECK: vptest  (%rax), %ymm12
-// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20]
-          vptest  (%rax), %ymm12
-
-// CHECK: vroundpd  $7, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07]
-          vroundpd  $7, %ymm10, %ymm11
-
-// CHECK: vroundpd  $7, (%rax), %ymm10
-// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07]
-          vroundpd  $7, (%rax), %ymm10
-
-// CHECK: vroundps  $7, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07]
-          vroundps  $7, %ymm10, %ymm11
-
-// CHECK: vroundps  $7, (%rax), %ymm10
-// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07]
-          vroundps  $7, (%rax), %ymm10
-
-// CHECK: vshufpd  $7, %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07]
-          vshufpd  $7, %ymm12, %ymm10, %ymm11
-
-// CHECK: vshufpd  $7, (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07]
-          vshufpd  $7, (%rax), %ymm10, %ymm11
-
-// CHECK: vshufps  $7, %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07]
-          vshufps  $7, %ymm12, %ymm10, %ymm11
-
-// CHECK: vshufps  $7, (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07]
-          vshufps  $7, (%rax), %ymm10, %ymm11
-
-// CHECK: vtestpd  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4]
-          vtestpd  %xmm12, %xmm10
-
-// CHECK: vtestpd  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4]
-          vtestpd  %ymm12, %ymm10
-
-// CHECK: vtestpd  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20]
-          vtestpd  (%rax), %xmm12
-
-// CHECK: vtestpd  (%rax), %ymm12
-// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20]
-          vtestpd  (%rax), %ymm12
-
-// CHECK: vtestps  %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4]
-          vtestps  %xmm12, %xmm10
-
-// CHECK: vtestps  %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4]
-          vtestps  %ymm12, %ymm10
-
-// CHECK: vtestps  (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20]
-          vtestps  (%rax), %xmm12
-
-// CHECK: vtestps  (%rax), %ymm12
-// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20]
-          vtestps  (%rax), %ymm12
-
-// CHECK: vextractps   $10, %xmm8, %r8
-// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
-          vextractps   $10, %xmm8, %r8
-
-// CHECK: vextractps   $7, %xmm4, %rcx
-// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
-          vextractps   $7, %xmm4, %rcx
-
-// CHECK: vmovd  %xmm4, %rcx
-// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
-          vmovd  %xmm4, %rcx
-
-// CHECK: vmovmskpd  %xmm4, %rcx
-// CHECK: encoding: [0xc5,0xf9,0x50,0xcc]
-          vmovmskpd  %xmm4, %rcx
-
-// CHECK: vmovmskpd  %ymm4, %rcx
-// CHECK: encoding: [0xc5,0xfd,0x50,0xcc]
-          vmovmskpd  %ymm4, %rcx
-
-// CHECK: vmovmskps  %xmm4, %rcx
-// CHECK: encoding: [0xc5,0xf8,0x50,0xcc]
-          vmovmskps  %xmm4, %rcx
-
-// CHECK: vmovmskps  %ymm4, %rcx
-// CHECK: encoding: [0xc5,0xfc,0x50,0xcc]
-          vmovmskps  %ymm4, %rcx
-
-// CHECK: vpextrb  $7, %xmm4, %rcx
-// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07]
-          vpextrb  $7, %xmm4, %rcx
-
-// CHECK: vpinsrw  $7, %r8, %xmm15, %xmm8
-// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07]
-          vpinsrw  $7, %r8, %xmm15, %xmm8
-
-// CHECK: vpinsrw  $7, %rcx, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07]
-          vpinsrw  $7, %rcx, %xmm4, %xmm6
-
-// CHECK: vpmovmskb  %xmm4, %rcx
-// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc]
-          vpmovmskb  %xmm4, %rcx
-
-// CHECK: vblendvpd  %ymm11, 57005(%rax,%riz), %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0]
-          vblendvpd  %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13
-
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
deleted file mode 100644
index f45b0a23d5e8..000000000000
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ /dev/null
@@ -1,142 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-
-// PR7195
-// CHECK: callw 42
-// CHECK: encoding: [0x66,0xe8,A,A]
-       callw 42
-
-// rdar://8127102
-// CHECK: movq	%gs:(%rdi), %rax
-// CHECK: encoding: [0x65,0x48,0x8b,0x07]
-movq	%gs:(%rdi), %rax
-
-// CHECK: crc32b 	%bl, %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
-        crc32b	%bl, %eax
-
-// CHECK: crc32b 	4(%rbx), %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
-        crc32b	4(%rbx), %eax
-
-// CHECK: crc32w 	%bx, %eax
-// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
-        crc32w	%bx, %eax
-
-// CHECK: crc32w 	4(%rbx), %eax
-// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
-        crc32w	4(%rbx), %eax
-
-// CHECK: crc32l 	%ebx, %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
-        crc32l	%ebx, %eax
-
-// CHECK: crc32l 	4(%rbx), %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
-        crc32l	4(%rbx), %eax
-
-// CHECK: crc32l 	3735928559(%rbx,%rcx,8), %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	crc32l   0xdeadbeef(%rbx,%rcx,8),%ecx
-
-// CHECK: crc32l 	69, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0x45,0x00,0x00,0x00]
-        	crc32l   0x45,%ecx
-
-// CHECK: crc32l 	32493, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xed,0x7e,0x00,0x00]
-        	crc32l   0x7eed,%ecx
-
-// CHECK: crc32l 	3133065982, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xfe,0xca,0xbe,0xba]
-        	crc32l   0xbabecafe,%ecx
-
-// CHECK: crc32l 	%ecx, %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
-        	crc32l   %ecx,%ecx
-
-// CHECK: crc32b 	%r11b, %eax
-// CHECK:  encoding: [0xf2,0x41,0x0f,0x38,0xf0,0xc3]
-        crc32b	%r11b, %eax
-
-// CHECK: crc32b 	4(%rbx), %eax
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
-        crc32b	4(%rbx), %eax
-
-// CHECK: crc32b 	%dil, %rax
-// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0xc7]
-        crc32b	%dil,%rax
-
-// CHECK: crc32b 	%r11b, %rax
-// CHECK:  encoding: [0xf2,0x49,0x0f,0x38,0xf0,0xc3]
-        crc32b	%r11b,%rax
-
-// CHECK: crc32b 	4(%rbx), %rax
-// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0x43,0x04]
-        crc32b	4(%rbx), %rax
-
-// CHECK: crc32q 	%rbx, %rax
-// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc3]
-        crc32q	%rbx, %rax
-
-// CHECK: crc32q 	4(%rbx), %rax
-// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
-        crc32q	4(%rbx), %rax
-
-// CHECK: movd %r8, %mm1
-// CHECK:  encoding: [0x49,0x0f,0x6e,0xc8]
-movd %r8, %mm1
-
-// CHECK: movd %r8d, %mm1
-// CHECK:  encoding: [0x41,0x0f,0x6e,0xc8]
-movd %r8d, %mm1
-
-// CHECK: movd %rdx, %mm1
-// CHECK:  encoding: [0x48,0x0f,0x6e,0xca]
-movd %rdx, %mm1
-
-// CHECK: movd %edx, %mm1
-// CHECK:  encoding: [0x0f,0x6e,0xca]
-movd %edx, %mm1
-
-// CHECK: movd %mm1, %r8
-// CHECK:  encoding: [0x49,0x0f,0x7e,0xc8]
-movd %mm1, %r8
-
-// CHECK: movd %mm1, %r8d
-// CHECK:  encoding: [0x41,0x0f,0x7e,0xc8]
-movd %mm1, %r8d
-
-// CHECK: movd %mm1, %rdx
-// CHECK:  encoding: [0x48,0x0f,0x7e,0xca]
-movd %mm1, %rdx
-
-// CHECK: movd %mm1, %edx
-// CHECK:  encoding: [0x0f,0x7e,0xca]
-movd %mm1, %edx
-
-// rdar://7840289
-// CHECK: pshufb	CPI1_0(%rip), %xmm1
-// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
-// CHECK:  fixup A - offset: 5, value: CPI1_0-4
-pshufb	CPI1_0(%rip), %xmm1
-
-// CHECK: movq  57005(,%riz), %rbx
-// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
-          movq  57005(,%riz), %rbx
-
-// CHECK: movq  48879(,%riz), %rax
-// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
-          movq  48879(,%riz), %rax
-
-// CHECK: movq  -4(,%riz,8), %rax
-// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
-          movq  -4(,%riz,8), %rax
-
-// CHECK: movq  (%rcx,%riz), %rax
-// CHECK: encoding: [0x48,0x8b,0x04,0x21]
-          movq  (%rcx,%riz), %rax
-
-// CHECK: movq  (%rcx,%riz,8), %rax
-// CHECK: encoding: [0x48,0x8b,0x04,0xe1]
-          movq  (%rcx,%riz,8), %rax
-
diff --git a/test/MC/AsmParser/X86/x86_64-fma3-encoding.s b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s
deleted file mode 100644
index d08a7329a09f..000000000000
--- a/test/MC/AsmParser/X86/x86_64-fma3-encoding.s
+++ /dev/null
@@ -1,674 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-
-// CHECK: vfmadd132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
-          vfmadd132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
-          vfmadd132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
-          vfmadd132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
-          vfmadd132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
-          vfmadd213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
-          vfmadd213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
-          vfmadd213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
-          vfmadd213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
-          vfmadd231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
-          vfmadd231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
-          vfmadd231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
-          vfmadd231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
-          vfmadd132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
-          vfmadd132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
-          vfmadd132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
-          vfmadd132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
-          vfmadd213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
-          vfmadd213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
-          vfmadd213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
-          vfmadd213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
-          vfmadd231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
-          vfmadd231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
-          vfmadd231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
-          vfmadd231ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
-          vfmadd132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
-          vfmadd132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
-          vfmadd132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
-          vfmadd132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
-          vfmadd213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
-          vfmadd213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
-          vfmadd213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
-          vfmadd213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
-          vfmadd231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
-          vfmadd231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
-          vfmadd231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmadd231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
-          vfmadd231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmaddsub132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc]
-          vfmaddsub132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmaddsub132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18]
-          vfmaddsub132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmaddsub132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc]
-          vfmaddsub132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmaddsub132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18]
-          vfmaddsub132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmaddsub213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc]
-          vfmaddsub213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmaddsub213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18]
-          vfmaddsub213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmaddsub213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc]
-          vfmaddsub213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmaddsub213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18]
-          vfmaddsub213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmaddsub231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc]
-          vfmaddsub231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmaddsub231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18]
-          vfmaddsub231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmaddsub231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc]
-          vfmaddsub231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmaddsub231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18]
-          vfmaddsub231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsubadd132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc]
-          vfmsubadd132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsubadd132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18]
-          vfmsubadd132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsubadd132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc]
-          vfmsubadd132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsubadd132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18]
-          vfmsubadd132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsubadd213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc]
-          vfmsubadd213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsubadd213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18]
-          vfmsubadd213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsubadd213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc]
-          vfmsubadd213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsubadd213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18]
-          vfmsubadd213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsubadd231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc]
-          vfmsubadd231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsubadd231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18]
-          vfmsubadd231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsubadd231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc]
-          vfmsubadd231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsubadd231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18]
-          vfmsubadd231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsub132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc]
-          vfmsub132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsub132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18]
-          vfmsub132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsub132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc]
-          vfmsub132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsub132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18]
-          vfmsub132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsub213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc]
-          vfmsub213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsub213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18]
-          vfmsub213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsub213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc]
-          vfmsub213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsub213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18]
-          vfmsub213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsub231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc]
-          vfmsub231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsub231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18]
-          vfmsub231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmsub231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc]
-          vfmsub231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfmsub231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18]
-          vfmsub231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmadd132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc]
-          vfnmadd132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmadd132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18]
-          vfnmadd132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmadd132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc]
-          vfnmadd132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmadd132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18]
-          vfnmadd132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmadd213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc]
-          vfnmadd213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmadd213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18]
-          vfnmadd213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmadd213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc]
-          vfnmadd213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmadd213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18]
-          vfnmadd213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmadd231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc]
-          vfnmadd231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmadd231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18]
-          vfnmadd231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmadd231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc]
-          vfnmadd231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmadd231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18]
-          vfnmadd231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmsub132pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc]
-          vfnmsub132pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmsub132pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18]
-          vfnmsub132pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmsub132ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc]
-          vfnmsub132ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmsub132ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18]
-          vfnmsub132ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmsub213pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc]
-          vfnmsub213pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmsub213pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18]
-          vfnmsub213pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmsub213ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc]
-          vfnmsub213ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmsub213ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18]
-          vfnmsub213ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmsub231pd  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc]
-          vfnmsub231pd  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmsub231pd  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18]
-          vfnmsub231pd  (%rax), %xmm10, %xmm11
-
-// CHECK: vfnmsub231ps  %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc]
-          vfnmsub231ps  %xmm12, %xmm10, %xmm11
-
-// CHECK: vfnmsub231ps  (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18]
-          vfnmsub231ps  (%rax), %xmm10, %xmm11
-
-// CHECK: vfmadd132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
-          vfmadd132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
-          vfmadd132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
-          vfmadd132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
-          vfmadd132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
-          vfmadd213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
-          vfmadd213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
-          vfmadd213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
-          vfmadd213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
-          vfmadd231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
-          vfmadd231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmadd231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
-          vfmadd231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmadd231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
-          vfmadd231ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmaddsub132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc]
-          vfmaddsub132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmaddsub132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18]
-          vfmaddsub132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmaddsub132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc]
-          vfmaddsub132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmaddsub132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18]
-          vfmaddsub132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmaddsub213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc]
-          vfmaddsub213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmaddsub213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18]
-          vfmaddsub213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmaddsub213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc]
-          vfmaddsub213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmaddsub213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18]
-          vfmaddsub213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmaddsub231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc]
-          vfmaddsub231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmaddsub231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18]
-          vfmaddsub231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmaddsub231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc]
-          vfmaddsub231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmaddsub231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18]
-          vfmaddsub231ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsubadd132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc]
-          vfmsubadd132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsubadd132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18]
-          vfmsubadd132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsubadd132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc]
-          vfmsubadd132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsubadd132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18]
-          vfmsubadd132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsubadd213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc]
-          vfmsubadd213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsubadd213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18]
-          vfmsubadd213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsubadd213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc]
-          vfmsubadd213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsubadd213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18]
-          vfmsubadd213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsubadd231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc]
-          vfmsubadd231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsubadd231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18]
-          vfmsubadd231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsubadd231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc]
-          vfmsubadd231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsubadd231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18]
-          vfmsubadd231ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsub132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc]
-          vfmsub132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsub132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18]
-          vfmsub132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsub132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc]
-          vfmsub132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsub132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18]
-          vfmsub132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsub213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc]
-          vfmsub213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsub213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18]
-          vfmsub213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsub213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc]
-          vfmsub213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsub213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18]
-          vfmsub213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsub231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc]
-          vfmsub231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsub231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18]
-          vfmsub231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfmsub231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc]
-          vfmsub231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfmsub231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18]
-          vfmsub231ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmadd132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc]
-          vfnmadd132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmadd132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18]
-          vfnmadd132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmadd132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc]
-          vfnmadd132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmadd132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18]
-          vfnmadd132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmadd213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc]
-          vfnmadd213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmadd213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18]
-          vfnmadd213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmadd213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc]
-          vfnmadd213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmadd213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18]
-          vfnmadd213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmadd231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc]
-          vfnmadd231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmadd231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18]
-          vfnmadd231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmadd231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc]
-          vfnmadd231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmadd231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18]
-          vfnmadd231ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmsub132pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc]
-          vfnmsub132pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmsub132pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18]
-          vfnmsub132pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmsub132ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc]
-          vfnmsub132ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmsub132ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18]
-          vfnmsub132ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmsub213pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc]
-          vfnmsub213pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmsub213pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18]
-          vfnmsub213pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmsub213ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc]
-          vfnmsub213ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmsub213ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18]
-          vfnmsub213ps  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmsub231pd  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc]
-          vfnmsub231pd  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmsub231pd  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18]
-          vfnmsub231pd  (%rax), %ymm10, %ymm11
-
-// CHECK: vfnmsub231ps  %ymm12, %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc]
-          vfnmsub231ps  %ymm12, %ymm10, %ymm11
-
-// CHECK: vfnmsub231ps  (%rax), %ymm10, %ymm11
-// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18]
-          vfnmsub231ps  (%rax), %ymm10, %ymm11
-
diff --git a/test/MC/AsmParser/X86/x86_64-imm-widths.s b/test/MC/AsmParser/X86/x86_64-imm-widths.s
deleted file mode 100644
index 6243717ba829..000000000000
--- a/test/MC/AsmParser/X86/x86_64-imm-widths.s
+++ /dev/null
@@ -1,105 +0,0 @@
-// RUN: llvm-mc -triple x86_64- --show-encoding %s | FileCheck %s
-
-// CHECK: addb $0, %al
-// CHECK: encoding: [0x04,0x00]
- 	addb $0x00, %al
-
-// CHECK: addb $127, %al
-// CHECK: encoding: [0x04,0x7f]
- 	addb $0x7F, %al
-
-// CHECK: addb $128, %al
-// CHECK: encoding: [0x04,0x80]
- 	addb $0x80, %al
-
-// CHECK: addb $255, %al
-// CHECK: encoding: [0x04,0xff]
- 	addb $0xFF, %al
-
-// CHECK: addw $0, %ax
-// CHECK: encoding: [0x66,0x83,0xc0,0x00]
- 	addw $0x0000, %ax
-
-// CHECK: addw $127, %ax
-// CHECK: encoding: [0x66,0x83,0xc0,0x7f]
- 	addw $0x007F, %ax
-
-// CHECK: addw $65408, %ax
-// CHECK: encoding: [0x66,0x83,0xc0,0x80]
- 	addw $0xFF80, %ax
-
-// CHECK: addw $65535, %ax
-// CHECK: encoding: [0x66,0x83,0xc0,0xff]
-	addw $0xFFFF, %ax
-
-// CHECK: addl $0, %eax
-// CHECK: encoding: [0x83,0xc0,0x00]
- 	addl $0x00000000, %eax
-
-// CHECK: addl $127, %eax
-// CHECK: encoding: [0x83,0xc0,0x7f]
- 	addl $0x0000007F, %eax
-
-// CHECK: addl $65408, %eax
-// CHECK: encoding: [0x05,0x80,0xff,0x00,0x00]
- 	addl $0xFF80, %eax
-
-// CHECK: addl $65535, %eax
-// CHECK: encoding: [0x05,0xff,0xff,0x00,0x00]
-	addl $0xFFFF, %eax
-
-// CHECK: addl $4294967168, %eax
-// CHECK: encoding: [0x83,0xc0,0x80]
- 	addl $0xFFFFFF80, %eax
-
-// CHECK: addl $4294967295, %eax
-// CHECK: encoding: [0x83,0xc0,0xff]
- 	addl $0xFFFFFFFF, %eax
-
-// CHECK: addq $0, %rax
-// CHECK: encoding: [0x48,0x83,0xc0,0x00]
- 	addq $0x0000000000000000, %rax
-
-// CHECK: addq $127, %rax
-// CHECK: encoding: [0x48,0x83,0xc0,0x7f]
- 	addq $0x000000000000007F, %rax
-
-// CHECK: addq $-128, %rax
-// CHECK: encoding: [0x48,0x83,0xc0,0x80]
- 	addq $0xFFFFFFFFFFFFFF80, %rax
-
-// CHECK: addq $-1, %rax
-// CHECK: encoding: [0x48,0x83,0xc0,0xff]
- 	addq $0xFFFFFFFFFFFFFFFF, %rax
-
-// CHECK: addq $0, %rax
-// CHECK: encoding: [0x48,0x83,0xc0,0x00]
- 	addq $0x0000000000000000, %rax
-
-// CHECK: addq $65408, %rax
-// CHECK: encoding: [0x48,0x05,0x80,0xff,0x00,0x00]
- 	addq $0xFF80, %rax
-
-// CHECK: addq $65535, %rax
-// CHECK: encoding: [0x48,0x05,0xff,0xff,0x00,0x00]
-	addq $0xFFFF, %rax
-
-// CHECK: movq $4294967168, %rax
-// CHECK: encoding: [0x48,0xb8,0x80,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
- 	movq $0xFFFFFF80, %rax
-
-// CHECK: movq $4294967295, %rax
-// CHECK: encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
-        movq $0xFFFFFFFF, %rax
-
-// CHECK: addq $2147483647, %rax
-// CHECK: encoding: [0x48,0x05,0xff,0xff,0xff,0x7f]
- 	addq $0x000000007FFFFFFF, %rax
-
-// CHECK: addq $-2147483648, %rax
-// CHECK: encoding: [0x48,0x05,0x00,0x00,0x00,0x80]
-	addq $0xFFFFFFFF80000000, %rax
-
-// CHECK: addq $-256, %rax
-// CHECK: encoding: [0x48,0x05,0x00,0xff,0xff,0xff]
- 	addq $0xFFFFFFFFFFFFFF00, %rax
diff --git a/test/MC/AsmParser/X86/x86_64-incl_decl.s b/test/MC/AsmParser/X86/x86_64-incl_decl.s
deleted file mode 100644
index 51315f83f2cc..000000000000
--- a/test/MC/AsmParser/X86/x86_64-incl_decl.s
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck --check-prefix=CHECK-X86_32 %s
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck --check-prefix=CHECK-X86_64 %s
-
-# CHECK-X86_32:	incb	%al # encoding: [0xfe,0xc0]
-# CHECK-X86_64:	incb	%al # encoding: [0xfe,0xc0]
-	incb %al
-
-# CHECK-X86_32:	incw	%ax # encoding: [0x66,0x40]
-# CHECK-X86_64:	incw	%ax # encoding: [0x66,0xff,0xc0]
-	incw %ax
-
-# CHECK-X86_32:	incl	%eax # encoding: [0x40]
-# CHECK-X86_64:	incl	%eax # encoding: [0xff,0xc0]
-	incl %eax
-
-# CHECK-X86_32:	decb	%al # encoding: [0xfe,0xc8]
-# CHECK-X86_64:	decb	%al # encoding: [0xfe,0xc8]
-	decb %al
-
-# CHECK-X86_32:	decw	%ax # encoding: [0x66,0x48]
-# CHECK-X86_64:	decw	%ax # encoding: [0x66,0xff,0xc8]
-	decw %ax
-
-# CHECK-X86_32:	decl	%eax # encoding: [0x48]
-# CHECK-X86_64:	decl	%eax # encoding: [0xff,0xc8]
-	decl %eax
diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s
deleted file mode 100644
index 9f94d8404f42..000000000000
--- a/test/MC/AsmParser/X86/x86_64-new-encoder.s
+++ /dev/null
@@ -1,159 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-
-movl	foo(%rip), %eax
-// CHECK: movl	foo(%rip), %eax
-// CHECK: encoding: [0x8b,0x05,A,A,A,A]
-// CHECK: fixup A - offset: 2, value: foo-4, kind: reloc_riprel_4byte
-
-movb	$12, foo(%rip)
-// CHECK: movb	$12, foo(%rip)
-// CHECK: encoding: [0xc6,0x05,A,A,A,A,0x0c]
-// CHECK:    fixup A - offset: 2, value: foo-5, kind: reloc_riprel_4byte
-
-movw	$12, foo(%rip)
-// CHECK: movw	$12, foo(%rip)
-// CHECK: encoding: [0x66,0xc7,0x05,A,A,A,A,0x0c,0x00]
-// CHECK:    fixup A - offset: 3, value: foo-6, kind: reloc_riprel_4byte
-
-movl	$12, foo(%rip)
-// CHECK: movl	$12, foo(%rip)
-// CHECK: encoding: [0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
-// CHECK:    fixup A - offset: 2, value: foo-8, kind: reloc_riprel_4byte
-
-movq	$12, foo(%rip)
-// CHECK:  movq	$12, foo(%rip)
-// CHECK: encoding: [0x48,0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
-// CHECK:    fixup A - offset: 3, value: foo-8, kind: reloc_riprel_4byte
-
-// CHECK: addq	$-424, %rax
-// CHECK: encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
-addq $-424, %rax
-
-
-// CHECK: movq	_foo@GOTPCREL(%rip), %rax
-// CHECK:  encoding: [0x48,0x8b,0x05,A,A,A,A]
-// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
-movq _foo@GOTPCREL(%rip), %rax
-
-// CHECK: movq	_foo@GOTPCREL(%rip), %r14
-// CHECK:  encoding: [0x4c,0x8b,0x35,A,A,A,A]
-// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
-movq _foo@GOTPCREL(%rip), %r14
-
-
-// CHECK: movq	(%r13,%rax,8), %r13
-// CHECK:  encoding: [0x4d,0x8b,0x6c,0xc5,0x00]
-movq 0x00(%r13,%rax,8),%r13
-
-// CHECK: testq	%rax, %rbx
-// CHECK:  encoding: [0x48,0x85,0xd8]
-testq %rax, %rbx
-
-// CHECK: cmpq	%rbx, %r14
-// CHECK:   encoding: [0x49,0x39,0xde]
-        cmpq %rbx, %r14
-
-// rdar://7947167
-
-movsq
-// CHECK: movsq
-// CHECK:   encoding: [0x48,0xa5]
-
-movsl
-// CHECK: movsl
-// CHECK:   encoding: [0xa5]
-
-stosq
-// CHECK: stosq
-// CHECK:   encoding: [0x48,0xab]
-stosl
-// CHECK: stosl
-// CHECK:   encoding: [0xab]
-
-
-// Not moffset forms of moves, they are x86-32 only! rdar://7947184
-movb	0, %al    // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00]
-movw	0, %ax    // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
-movl	0, %eax   // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
-
-// CHECK: pushfq	# encoding: [0x9c]
-        pushf
-// CHECK: pushfq	# encoding: [0x9c]
-        pushfq
-// CHECK: popfq	        # encoding: [0x9d]
-        popf
-// CHECK: popfq	        # encoding: [0x9d]
-        popfq
-
-// CHECK: movabsq $-281474976710654, %rax
-// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
-        movabsq $0xFFFF000000000002, %rax
-
-// CHECK: movq $-281474976710654, %rax
-// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
-        movq $0xFFFF000000000002, %rax
-
-// CHECK: movq $-65536, %rax
-// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0x00,0xff,0xff]
-        movq $0xFFFFFFFFFFFF0000, %rax
-
-// CHECK: movq $-256, %rax
-// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0xff,0xff,0xff]
-        movq $0xFFFFFFFFFFFFFF00, %rax
-
-// CHECK: movq $10, %rax
-// CHECK: encoding: [0x48,0xc7,0xc0,0x0a,0x00,0x00,0x00]
-        movq $10, %rax
-
-// rdar://8014869
-//
-// CHECK: ret
-// CHECK:  encoding: [0xc3]
-        retq
-
-// CHECK: sete %al
-// CHECK: encoding: [0x0f,0x94,0xc0]
-        setz %al
-
-// CHECK: setne %al
-// CHECK: encoding: [0x0f,0x95,0xc0]
-        setnz %al
-
-// CHECK: je 0
-// CHECK: encoding: [0x74,A]
-        jz 0
-
-// CHECK: jne
-// CHECK: encoding: [0x75,A]
-        jnz 0
-
-// rdar://8017515
-btq $0x01,%rdx
-// CHECK: btq	$1, %rdx
-// CHECK:  encoding: [0x48,0x0f,0xba,0xe2,0x01]
-
-//rdar://8017633
-// CHECK: movzbl	%al, %esi
-// CHECK:  encoding: [0x0f,0xb6,0xf0]
-        movzx %al, %esi
-
-// CHECK: movzbq	%al, %rsi
-// CHECK:  encoding: [0x48,0x0f,0xb6,0xf0]
-        movzx %al, %rsi
-
-// CHECK: movzbq	(%rsp), %rsi
-// CHECK:  encoding: [0x48,0x0f,0xb6,0x34,0x24]
-        movzx 0(%rsp), %rsi
-
-
-// rdar://7873482
-// CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00]
-        movl	%gs:124, %eax
-
-// CHECK: jmpq *8(%rax)
-// CHECK:   encoding: [0xff,0x60,0x08]
-	jmp	*8(%rax)
-
-// CHECK: btq $61, -216(%rbp)
-// CHECK:   encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d]
-	btq	$61, -216(%rbp)
diff --git a/test/MC/AsmParser/X86/x86_64-operands.s b/test/MC/AsmParser/X86/x86_64-operands.s
deleted file mode 100644
index 88f1a64cc42f..000000000000
--- a/test/MC/AsmParser/X86/x86_64-operands.s
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
-
-# CHECK: callq a
-        callq a
-
-# CHECK: leaq	-40(%rbp), %r15
-	leaq	-40(%rbp), %r15
-
-
-
-// rdar://8013734 - Alias dr6=db6
-mov %dr6, %rax
-mov %db6, %rax
-# CHECK: movq	%dr6, %rax
-# CHECK: movq	%dr6, %rax
diff --git a/test/MC/AsmParser/X86/x86_64-suffix-matching.s b/test/MC/AsmParser/X86/x86_64-suffix-matching.s
deleted file mode 100644
index 0ae6fe502d1c..000000000000
--- a/test/MC/AsmParser/X86/x86_64-suffix-matching.s
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: llvm-mc -triple x86_64 -o - %s | FileCheck %s
-
-// CHECK: addl $0, %eax
-        add $0, %eax
-// CHECK: addb $255, %al
-        add $0xFF, %al
-// CHECK: orq %rax, %rdx
-        or %rax, %rdx
-// CHECK: shlq $3, %rax
-        shl $3, %rax
diff --git a/test/MC/AsmParser/X86/x86_instruction_errors.s b/test/MC/AsmParser/X86/x86_instruction_errors.s
deleted file mode 100644
index 183306be2c11..000000000000
--- a/test/MC/AsmParser/X86/x86_instruction_errors.s
+++ /dev/null
@@ -1,5 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err
-// RUN: FileCheck < %t.err %s
-
-// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq')
-cmp $0, 0(%eax)
diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s
deleted file mode 100644
index a82d2a1c0d41..000000000000
--- a/test/MC/AsmParser/X86/x86_instructions.s
+++ /dev/null
@@ -1,175 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err
-// RUN: FileCheck < %t %s
-// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s
-
-// CHECK: subb %al, %al
-        subb %al, %al
-
-// CHECK: addl $24, %eax
-        addl $24, %eax
-
-// CHECK: movl %eax, 10(%ebp)
-        movl %eax, 10(%ebp)
-// CHECK: movl %eax, 10(%ebp,%ebx)
-        movl %eax, 10(%ebp, %ebx)
-// CHECK: movl %eax, 10(%ebp,%ebx,4)
-        movl %eax, 10(%ebp, %ebx, 4)
-// CHECK: movl %eax, 10(,%ebx,4)
-        movl %eax, 10(, %ebx, 4)
-
-// CHECK: movl 0, %eax        
-        movl 0, %eax
-// CHECK: movl $0, %eax        
-        movl $0, %eax
-        
-// CHECK: ret
-        ret
-        
-// FIXME: Check that this matches SUB32ri8
-// CHECK: subl $1, %eax
-        subl $1, %eax
-        
-// FIXME: Check that this matches SUB32ri8
-// CHECK: subl $-1, %eax
-        subl $-1, %eax
-        
-// FIXME: Check that this matches SUB32ri
-// CHECK: subl $256, %eax
-        subl $256, %eax
-
-// FIXME: Check that this matches XOR64ri8
-// CHECK: xorq $1, %rax
-        xorq $1, %rax
-        
-// FIXME: Check that this matches XOR64ri32
-// CHECK: xorq $256, %rax
-        xorq $256, %rax
-
-// FIXME: Check that this matches SUB8rr
-// CHECK: subb %al, %bl
-        subb %al, %bl
-
-// FIXME: Check that this matches SUB16rr
-// CHECK: subw %ax, %bx
-        subw %ax, %bx
-        
-// FIXME: Check that this matches SUB32rr
-// CHECK: subl %eax, %ebx
-        subl %eax, %ebx
-        
-// FIXME: Check that this matches the correct instruction.
-// CHECK: callq *%rax
-        call *%rax
-
-// FIXME: Check that this matches the correct instruction.
-// CHECK: shldl %cl, %eax, %ebx
-        shldl %cl, %eax, %ebx
-
-// CHECK: shll $2, %eax
-        shll $2, %eax
-
-// CHECK: shll $2, %eax
-        sall $2, %eax
-
-// CHECK: rep
-// CHECK: insb
-        rep;insb
-
-// CHECK: rep
-// CHECK: outsb
-        rep;outsb
-
-// CHECK: rep
-// CHECK: movsb
-        rep;movsb
-
-// CHECK: rep
-// CHECK: lodsb
-        rep;lodsb
-
-// CHECK: rep
-// CHECK: stosb
-        rep;stosb
-
-// NOTE: repz and repe have the same opcode as rep
-// CHECK: rep
-// CHECK: cmpsb
-        repz;cmpsb
-
-// NOTE: repnz has the same opcode as repne
-// CHECK: repne
-// CHECK: cmpsb
-        repnz;cmpsb
-
-// NOTE: repe and repz have the same opcode as rep
-// CHECK: rep
-// CHECK: scasb
-        repe;scasb
-
-// CHECK: repne
-// CHECK: scasb
-        repne;scasb
-
-// CHECK: lock
-// CHECK: cmpxchgb %al, (%ebx)
-        lock;cmpxchgb %al, 0(%ebx)
-
-// CHECK: cs
-// CHECK: movb (%eax), %al
-        cs;movb 0(%eax), %al
-
-// CHECK: ss
-// CHECK: movb (%eax), %al
-        ss;movb 0(%eax), %al
-
-// CHECK: ds
-// CHECK: movb (%eax), %al
-        ds;movb 0(%eax), %al
-
-// CHECK: es
-// CHECK: movb (%eax), %al
-        es;movb 0(%eax), %al
-
-// CHECK: fs
-// CHECK: movb (%eax), %al
-        fs;movb 0(%eax), %al
-
-// CHECK: gs
-// CHECK: movb (%eax), %al
-        gs;movb 0(%eax), %al
-
-// CHECK: fadd %st(0)
-// CHECK: fadd %st(1)
-// CHECK: fadd %st(7)
-
-fadd %st(0)
-fadd %st(1)
-fadd %st(7)
-
-// CHECK: leal 0, %eax
-        leal 0, %eax
-
-// rdar://7986634 - Insensitivity on opcodes.
-// CHECK: int3
-INT3
-
-
-// Allow scale factor without index register.
-// CHECK: movaps	%xmm3, (%esi)
-// CHECK-STDERR: warning: scale factor without index register is ignored
-movaps %xmm3, (%esi, 2)
-
-// CHECK: imull $12, %eax, %eax
-imul $12, %eax
-
-// CHECK: imull %ecx, %eax
-imull %ecx, %eax
-
-// PR8114
-// CHECK: outb	%al, %dx
-// CHECK: outw	%ax, %dx
-// CHECK: outl	%eax, %dx
-
-out %al, (%dx)
-out %ax, (%dx)
-outl %eax, (%dx)
diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s
deleted file mode 100644
index ddadf7931895..000000000000
--- a/test/MC/AsmParser/X86/x86_operands.s
+++ /dev/null
@@ -1,58 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
-
-# Immediates
-# CHECK: addl $1, %eax
-        addl $1, %eax
-# CHECK: addl $3, %eax
-        addl $(1+2), %eax
-# CHECK: addl $a, %eax
-        addl $a, %eax
-# CHECK: addl $3, %eax
-        addl $1 + 2, %eax
-        
-# Disambiguation
-
-# CHECK: addl $1, 8
-        addl $1, 4+4
-# CHECK: addl $1, 8
-        addl $1, (4+4)
-# CHECK: addl $1, 8(%eax)
-        addl $1, 4+4(%eax)
-# CHECK: addl $1, 8(%eax)
-        addl $1, (4+4)(%eax)
-# CHECK: addl $1, 8(%eax)
-        addl $1, 8(%eax)
-# CHECK: addl $1, (%eax)
-        addl $1, (%eax)
-# CHECK: addl $1, 8(,%eax)
-        addl $1, (4+4)(,%eax)
-        
-# Indirect Memory Operands
-# CHECK: addl $1, 1(%eax)
-        addl $1, 1(%eax)
-# CHECK: addl $1, 1(%eax,%ebx)
-        addl $1, 1(%eax,%ebx)
-# CHECK: addl $1, 1(%eax,%ebx)
-        addl $1, 1(%eax,%ebx,)
-# CHECK: addl $1, 1(%eax,%ebx,4)
-        addl $1, 1(%eax,%ebx,4)
-# CHECK: addl $1, 1(,%ebx)
-        addl $1, 1(,%ebx)
-# CHECK: addl $1, 1(,%ebx)
-        addl $1, 1(,%ebx,)
-# CHECK: addl $1, 1(,%ebx,4)
-        addl $1, 1(,%ebx,4)
-# CHECK: addl $1, 1(,%ebx,4)
-        addl $1, 1(,%ebx,(2+2))
-
-# '*'
-# CHECK: call a
-        call a
-# CHECK: call *%eax
-        call *%eax
-# CHECK: call *4(%eax)
-        call *4(%eax)
-
-# CHECK: movl	%gs:8, %eax
-movl %gs:8, %eax
-
diff --git a/test/MC/AsmParser/X86/x86_word_directive.s b/test/MC/AsmParser/X86/x86_word_directive.s
deleted file mode 100644
index 2950c8cd5f12..000000000000
--- a/test/MC/AsmParser/X86/x86_word_directive.s
+++ /dev/null
@@ -1,6 +0,0 @@
-# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
-
-# CHECK: TEST0:
-# CHECK: .short 3
-TEST0:  
-        .word 3
diff --git a/test/MC/AsmParser/dash-n.s b/test/MC/AsmParser/dash-n.s
new file mode 100644
index 000000000000..b6243a4a01d0
--- /dev/null
+++ b/test/MC/AsmParser/dash-n.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -n -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.globl a
+// CHECK: error: expected section directive before assembly directive
+.long 0
+        
diff --git a/test/MC/AsmParser/directive_abort.s b/test/MC/AsmParser/directive_abort.s
index 1fd1f6e44a32..86e6267a7a1e 100644
--- a/test/MC/AsmParser/directive_abort.s
+++ b/test/MC/AsmParser/directive_abort.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple i386-unknown-unknown %s 2> %t
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
 # RUN: FileCheck -input-file %t %s
 
 # CHECK: error: .abort 'please stop assembing'
diff --git a/test/MC/AsmParser/directive_ascii.s b/test/MC/AsmParser/directive_ascii.s
index 5bfc1e9d6eac..a7ba7bbd5da1 100644
--- a/test/MC/AsmParser/directive_ascii.s
+++ b/test/MC/AsmParser/directive_ascii.s
@@ -32,3 +32,10 @@ TEST4:
 TEST5:
         .ascii "\b\f\n\r\t\\\""
         
+# CHECK: TEST6:
+# CHECK: .byte 66
+# CHECK: .byte 0
+# CHECK: .byte 67
+# CHECK: .byte 0
+TEST6:
+        .string "B", "C"
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
index b122fdc2cf29..164d42a3fa7c 100644
--- a/test/MC/AsmParser/directive_loc.s
+++ b/test/MC/AsmParser/directive_loc.s
@@ -5,4 +5,4 @@
         .loc 1
         .loc 1 2
         .loc 1 2 3
-
+        .loc 1 2 discriminator 1
diff --git a/test/MC/AsmParser/directive_set.s b/test/MC/AsmParser/directive_set.s
index f1fc30a85df1..69abce0db2ff 100644
--- a/test/MC/AsmParser/directive_set.s
+++ b/test/MC/AsmParser/directive_set.s
@@ -5,3 +5,8 @@
 TEST0:  
         .set a, 0
         
+# CHECK: TEST1:
+# CHECK: a = 0
+TEST1:  
+        .equ a, 0
+
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
index c7617a337e02..98259bdb29d5 100644
--- a/test/MC/AsmParser/directive_values.s
+++ b/test/MC/AsmParser/directive_values.s
@@ -36,3 +36,23 @@ TEST4:
 # CHECK: 	.quad	1075
 
 
+TEST5:
+        .value 8
+# CHECK: TEST5:
+# CHECK: .short 8
+
+TEST6:
+        .byte 'c'
+        .byte '\''
+        .byte '\\'
+        .byte '\#'
+        .byte '\t'
+        .byte '\n'
+
+# CHECK: TEST6
+# CHECK:        .byte   99
+# CHECK:        .byte   39
+# CHECK:        .byte   92
+# CHECK:        .byte   35
+# CHECK:        .byte   9
+# CHECK:        .byte   10
diff --git a/test/MC/AsmParser/equ.s b/test/MC/AsmParser/equ.s
new file mode 100644
index 000000000000..568f58fa1298
--- /dev/null
+++ b/test/MC/AsmParser/equ.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -n -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.equ	a, 0
+.set	a, 1
+.equ	a, 2
+.equiv	a, 3
+// CHECK: error: redefinition of 'a'
+
diff --git a/test/MC/AsmParser/expr_symbol_modifiers.s b/test/MC/AsmParser/expr_symbol_modifiers.s
new file mode 100644
index 000000000000..7371c97cbdf4
--- /dev/null
+++ b/test/MC/AsmParser/expr_symbol_modifiers.s
@@ -0,0 +1,14 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err
+// RUN: FileCheck < %t %s
+// RUN: FileCheck -check-prefix=CHECK-STDERR < %t.err %s
+
+// CHECK: .long 1
+.long 1
+// CHECK-STDERR: invalid modifier 'GOTPCREL' (no symbols present)
+.long 10 + 4@GOTPCREL
+// CHECK: .long a@GOTPCREL+4
+.long a + 4@GOTPCREL
+// CHECK: .long a@GOTPCREL+b@GOTPCREL
+.long (a + b)@GOTPCREL
+// CHECK: .long (10+b@GOTPCREL)+4
+.long 10 + b + 4@GOTPCREL
diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s
index 350360ce684e..0861922b603c 100644
--- a/test/MC/AsmParser/exprs.s
+++ b/test/MC/AsmParser/exprs.s
@@ -1,47 +1,54 @@
-// FIXME: For now this test just checks that llvm-mc -triple i386-unknown-unknown works. Once we have .macro,
-// .if, and .abort we can write a better test (without resorting to miles of
-// greps).
-        
 // RUN: llvm-mc -triple i386-unknown-unknown %s > %t
 
+.macro check_expr
+  .if ($0) != ($1)
+        .abort Unexpected $0 != $1.
+  .endif
+.endmacro
+        
         .text
 g:
 h:
 j:
 k:      
         .data
-        .byte !1 + 2
-        .byte !0
-        .byte ~0
-        .byte -1
-        .byte +1
-        .byte 1 + 2
-        .byte 1 & 3
-        .byte 4 / 2
-        .byte 4 / -2
-        .byte 1 == 1
-        .byte 1 == 0
-        .byte 1 > 0
-        .byte 1 >= 1
-        .byte 1 < 2
-        .byte 1 <= 1
-        .byte 4 % 3
-        .byte 2 * 2
-        .byte 2 != 2
-        .byte 2 <> 2
-        .byte 1 | 2
-        .byte 1 << 1
-        .byte 2 >> 1
-        .byte ~0 >> 1
-        .byte 3 - 2
-        .byte 1 ^ 3
-        .byte 1 && 2
-        .byte 3 && 0
-        .byte 1 || 2
-        .byte 0 || 0
-
+        check_expr !1 + 2, 2
+        check_expr !0, 1
+        check_expr ~0, -1
+        check_expr -1, ~0
+        check_expr +1, 1
+        check_expr 1 + 2, 3
+        check_expr 1 & 3, 1
+        check_expr 4 / 2, 2
+        check_expr 4 / -2, -2
+        check_expr 1 == 1, 1
+        check_expr 1 == 0, 0
+        check_expr 1 > 0, 1
+        check_expr 1 >= 1, 1
+        check_expr 1 < 2, 1
+        check_expr 1 <= 1, 1
+        check_expr 4 % 3, 1
+        check_expr 2 * 2, 4
+        check_expr 2 != 2, 0
+        check_expr 2 <> 2, 0
+        check_expr 1 | 2, 3
+        check_expr 1 << 1, 2
+        check_expr 2 >> 1, 1
+        check_expr (~0 >> 1), -1
+        check_expr [~0 >> 1], -1
+        check_expr 4 * [4 + (3 + [2 * 2] + 1)], 48
+        check_expr 3 - 2, 1
+        check_expr 1 ^ 3, 2
+        check_expr 1 && 2, 1
+        check_expr 3 && 0, 0
+        check_expr 0 && 1, 0
+        check_expr 1 || 2, 1
+        check_expr 0 || 1, 1
+        check_expr 0 || 0, 0
+        check_expr 1 + 2 < 3 + 4, 1
+        
         .set c, 10
-        .byte c + 1
+        check_expr c + 1, 11
 
         d = e + 10
         .long d
diff --git a/test/MC/AsmParser/floating-literals.s b/test/MC/AsmParser/floating-literals.s
new file mode 100644
index 000000000000..bd122a8cf0e3
--- /dev/null
+++ b/test/MC/AsmParser/floating-literals.s
@@ -0,0 +1,35 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: .long	1067412619
+# CHECK: .long	1075000115
+# CHECK: .long	1077936128
+# CHECK: .long	1082549862
+.single 1.2455, +2.3, 3, + 4.2
+
+# CHECK: .long  1067928519
+.float 1.307
+        
+# CHECK: .quad	4617315517961601024
+# CHECK: .quad	4597526701198935065
+# CHECK: .quad	-4600933674317040845
+.double 5, .232, -11.1
+
+# CHECK: .quad  0
+.double 0.0
+
+# CHECK: .quad  -4570379565595099136
+.double -1.2e3
+# CHECK: .quad  -4690170861623122860
+.double -1.2e-5
+# CHECK: .quad  -4465782973978902528
+.double -1.2e+10
+# CHECK: .quad  4681608360884174848
+.double 1e5
+# CHECK: .quad  4681608360884174848
+.double 1.e5
+# CHECK: .quad  4611686018427387904
+.double 2.
+
+// APFloat should reject these with an error, not crash:
+//.double -1.2e+
+//.double -1.2e
diff --git a/test/MC/AsmParser/full_line_comment.s b/test/MC/AsmParser/full_line_comment.s
new file mode 100644
index 000000000000..4c9198634830
--- /dev/null
+++ b/test/MC/AsmParser/full_line_comment.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple arm-apple-darwin10 %s | FileCheck %s
+# this is a full line comment starting at column 1
+ # this starting at column 2
+
+        .data
+// CHECK: .long 0
+.long 0
+# .long 1 this line is commented out
diff --git a/test/MC/AsmParser/ifdef.s b/test/MC/AsmParser/ifdef.s
new file mode 100644
index 000000000000..98bff6525b2b
--- /dev/null
+++ b/test/MC/AsmParser/ifdef.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifdef undefined
+	.byte 0
+.else
+	.byte 1
+.endif
+
+defined:
+
+# CHECK: .byte 1
+# CHECK-NOT: .byte 0
+.ifdef defined
+	.byte 1
+.else
+	.byte 0
+.endif
+
+	movl	%eax, undefined
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifdef undefined
+	.byte 0
+.else
+	.byte 1
+.endif
diff --git a/test/MC/AsmParser/ifndef.s b/test/MC/AsmParser/ifndef.s
new file mode 100644
index 000000000000..d9c9c5457a70
--- /dev/null
+++ b/test/MC/AsmParser/ifndef.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: .byte 1
+# CHECK-NOT: byte 0
+.ifndef undefined
+	.byte 1
+.else
+	.byte 0
+.endif
+
+defined:
+
+# CHECK-NOT: byte 0
+# CHECK: .byte 1
+.ifndef defined
+	.byte 0
+.else
+	.byte 1
+.endif
+
+	movl	%eax, undefined
+
+# CHECK: .byte 1
+# CHECK-NOT: byte 0
+.ifndef undefined
+	.byte 1
+.else
+	.byte 0
+.endif
diff --git a/test/MC/AsmParser/paren.s b/test/MC/AsmParser/paren.s
new file mode 100644
index 000000000000..702e309ddeeb
--- /dev/null
+++ b/test/MC/AsmParser/paren.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t1 > %t2
+// RUN: FileCheck < %t1 %s
+
+// CHECK: error: expected ']' in brackets expression
+.size	x, [.-x)
+
+// CHECK: error: expected ')' in parentheses expression
+.size	y, (.-y]
diff --git a/test/MC/AsmParser/rename.s b/test/MC/AsmParser/rename.s
new file mode 100644
index 000000000000..64ca51538128
--- /dev/null
+++ b/test/MC/AsmParser/rename.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+        .size bar, . - bar
+.Ltmp0:
+       .size foo, .Ltmp0 - foo
+
+// CHECK: .Ltmp0:
+// CHECK: .size  bar, .Ltmp0-bar
+// CHECK: .Ltmp01
+// CHECK: .size foo, .Ltmp01-foo
diff --git a/test/MC/AsmParser/section.s b/test/MC/AsmParser/section.s
new file mode 100644
index 000000000000..414fc6d5397f
--- /dev/null
+++ b/test/MC/AsmParser/section.s
@@ -0,0 +1,107 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
+# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+.section test1
+.byte 1
+.section test2
+.byte 2
+.previous
+.byte 1
+.section test2
+.byte 2
+.previous
+.byte 1
+.section test1
+.byte 1
+.previous
+.byte 1
+.section test2
+.byte 2
+.pushsection test3
+.byte 3
+.pushsection test4
+.byte 4
+.pushsection test5
+.byte 5
+.popsection
+.byte 4
+.popsection
+.byte 3
+.popsection
+.byte 2
+.pushsection test3
+.byte 3
+.pushsection test4
+.byte 4
+.previous
+.byte 3
+.popsection
+.byte 3
+.previous
+.byte 2
+.section test1
+.byte 1
+.popsection
+.byte 2
+.previous
+.byte 1
+.previous
+# CHECK:       (('sh_name', 0x00000012) # 'test1'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000034)
+# CHECK-NEXT:   ('sh_size', 0x00000007)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '01010101 010101')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x00000018) # 'test2'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x0000003b)
+# CHECK-NEXT:   ('sh_size', 0x00000006)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '02020202 0202')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x0000001e) # 'test3'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000041)
+# CHECK-NEXT:   ('sh_size', 0x00000005)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '03030303 03')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x00000024) # 'test4'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000046)
+# CHECK-NEXT:   ('sh_size', 0x00000003)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '040404')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x0000002a) # 'test5'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000049)
+# CHECK-NEXT:   ('sh_size', 0x00000001)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '05')
+# CHECK-NEXT:  ),
diff --git a/test/MC/COFF/align-nops.s b/test/MC/COFF/align-nops.s
new file mode 100644
index 000000000000..2971ec67798e
--- /dev/null
+++ b/test/MC/COFF/align-nops.s
@@ -0,0 +1,50 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+
+// Test that we get optimal nops in text
+    .text
+f0:
+    .long 0
+    .align  8, 0x90
+    .long 0
+    .align  8
+
+// But not in another section
+    .data
+    .long 0
+    .align  8, 0x90
+    .long 0
+    .align  8
+
+//CHECK:         Name                     = .text
+//CHECK-NEXT:    VirtualSize
+//CHECK-NEXT:    VirtualAddress
+//CHECK-NEXT:    SizeOfRawData            = 16
+//CHECK-NEXT:    PointerToRawData
+//CHECK-NEXT:    PointerToRelocations
+//CHECK-NEXT:    PointerToLineNumbers
+//CHECK-NEXT:    NumberOfRelocations
+//CHECK-NEXT:    NumberOfLineNumbers
+//CHECK-NEXT:    Charateristics           = 0x60400020
+//CHECK-NEXT:        IMAGE_SCN_CNT_CODE
+//CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:        IMAGE_SCN_MEM_EXECUTE
+//CHECK-NEXT:        IMAGE_SCN_MEM_READ
+//CHECK-NEXT:      SectionData              =
+//CHECK-NEXT:        00 00 00 00 0F 1F 40 00 - 00 00 00 00 0F 1F 40 00
+
+//CHECK:         Name                     = .data
+//CHECK-NEXT:      VirtualSize
+//CHECK-NEXT:      VirtualAddress
+//CHECK-NEXT:      SizeOfRawData            = 16
+//CHECK-NEXT:      PointerToRawData
+//CHECK-NEXT:      PointerToRelocations
+//CHECK-NEXT:      PointerToLineNumbers
+//CHECK-NEXT:      NumberOfRelocations
+//CHECK-NEXT:      NumberOfLineNumbers
+//CHECK-NEXT:      Charateristics           = 0xC0400040
+//CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
+//CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:        IMAGE_SCN_MEM_READ
+//CHECK-NEXT:        IMAGE_SCN_MEM_WRITE
+//CHECK-NEXT:      SectionData              =
+//CHECK-NEXT:        00 00 00 00 90 90 90 90 - 00 00 00 00 00 00 00 00
diff --git a/test/MC/COFF/basic-coff.ll b/test/MC/COFF/basic-coff.ll
deleted file mode 100644
index 1e67db0e5c42..000000000000
--- a/test/MC/COFF/basic-coff.ll
+++ /dev/null
@@ -1,136 +0,0 @@
-; This test checks that the COFF object emitter works for the most basic
-; programs.
-
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
-; RUN: coff-dump.py %abs_tmp | FileCheck %s
-; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
-
-@.str = private constant [12 x i8] c"Hello World\00" ; <[12 x i8]*> [#uses=1]
-
-define i32 @main() nounwind {
-entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
-  ret i32 0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-; CHECK: {
-; CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-; CHECK:   NumberOfSections         = 2
-; CHECK:   TimeDateStamp            = {{[0-9]+}}
-; CHECK:   PointerToSymbolTable     = 0x99
-; CHECK:   NumberOfSymbols          = 7
-; CHECK:   SizeOfOptionalHeader     = 0
-; CHECK:   Characteristics          = 0x0
-; CHECK:   Sections                 = [
-; CHECK:     0 = {
-; CHECK:       Name                     = .text
-; CHECK:       VirtualSize              = 0
-; CHECK:       VirtualAddress           = 0
-; CHECK:       SizeOfRawData            = 21
-; CHECK:       PointerToRawData         = 0x64
-; CHECK:       PointerToRelocations     = 0x79
-; CHECK:       PointerToLineNumbers     = 0x0
-; CHECK:       NumberOfRelocations      = 2
-; CHECK:       NumberOfLineNumbers      = 0
-; CHECK:       Charateristics           = 0x60500020
-; CHECK:         IMAGE_SCN_CNT_CODE
-; CHECK:         IMAGE_SCN_ALIGN_16BYTES
-; CHECK:         IMAGE_SCN_MEM_EXECUTE
-; CHECK:         IMAGE_SCN_MEM_READ
-; CHECK:       SectionData              =
-; CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 31 |.....$.........1|
-; CHECK:         C0 83 C4 04 C3                                    |.....|
-; CHECK:       Relocations              = [
-; CHECK:         0 = {
-; CHECK:           VirtualAddress           = 0x6
-; CHECK:           SymbolTableIndex         = 5
-; CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-; CHECK:           SymbolName               = _main
-; CHECK:         }
-; CHECK:         1 = {
-; CHECK:           VirtualAddress           = 0xB
-; CHECK:           SymbolTableIndex         = 6
-; CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-; CHECK:           SymbolName               = L_.str
-; CHECK:         }
-; CHECK:       ]
-; CHECK:     }
-; CHECK:     1 = {
-; CHECK:       Name                     = .data
-; CHECK:       VirtualSize              = 0
-; CHECK:       VirtualAddress           = 0
-; CHECK:       SizeOfRawData            = 12
-; CHECK:       PointerToRawData         = 0x8D
-; CHECK:       PointerToRelocations     = 0x0
-; CHECK:       PointerToLineNumbers     = 0x0
-; CHECK:       NumberOfRelocations      = 0
-; CHECK:       NumberOfLineNumbers      = 0
-; CHECK:       Charateristics           = 0xC0100040
-; CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-; CHECK:         IMAGE_SCN_ALIGN_1BYTES
-; CHECK:         IMAGE_SCN_MEM_READ
-; CHECK:         IMAGE_SCN_MEM_WRITE
-; CHECK:       SectionData              =
-; CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
-; CHECK:       Relocations              = None
-; CHECK:     }
-; CHECK:   ]
-; CHECK:   Symbols                  = [
-; CHECK:     0 = {
-; CHECK:       Name                     = .text
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 1
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 1
-; CHECK:       AuxillaryData            =
-; CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
-; CHECK:         00 00                                             |..|
-; CHECK:     }
-; CHECK:     1 = {
-; CHECK:       Name                     = .data
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 2
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 1
-; CHECK:       AuxillaryData            =
-; CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
-; CHECK:         00 00                                             |..|
-; CHECK:     }
-; CHECK:     2 = {
-; CHECK:       Name                     = _main
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 1
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-; CHECK:     }
-; CHECK:     3 = {
-; CHECK:       Name                     = L_.str
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 2
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-; CHECK:     }
-; CHECK:     4 = {
-; CHECK:       Name                     = _printf
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 0
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-; CHECK:     }
-; CHECK:   ]
-; CHECK: }
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
new file mode 100644
index 000000000000..0c8658258ed4
--- /dev/null
+++ b/test/MC/COFF/basic-coff.s
@@ -0,0 +1,133 @@
+// This test checks that the COFF object emitter works for the most basic
+// programs.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$L_.str, (%esp)
+	calll	_printf
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+L_.str:                                 # @.str
+	.asciz	 "Hello World"
+
+// CHECK: {
+// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   NumberOfSections         = 2
+// CHECK:   TimeDateStamp            = {{[0-9]+}}
+// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
+// CHECK:   NumberOfSymbols          = 6
+// CHECK:   SizeOfOptionalHeader     = 0
+// CHECK:   Characteristics          = 0x0
+// CHECK:   Sections                 = [
+// CHECK:     1 = {
+// CHECK:       Name                     = .text
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 2
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0x60500020
+// CHECK:         IMAGE_SCN_CNT_CODE
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_EXECUTE
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:       SectionData              =
+// CHECK:       Relocations              = [
+// CHECK:         0 = {
+// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         1 = {
+// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
+// CHECK:           SymbolTableIndex         = 5
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _printf
+// CHECK:         }
+// CHECK:       ]
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x0
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 0
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0xC0300040
+// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:         IMAGE_SCN_ALIGN_4BYTES
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:         IMAGE_SCN_MEM_WRITE
+// CHECK:       SectionData              =
+// CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
+// CHECK:       Relocations              = None
+// CHECK:     }
+// CHECK:   ]
+// CHECK:   Symbols                  = [
+// CHECK:     0 = {
+// CHECK:       Name                     = .text
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
+// CHECK:         00 00                                             |..|
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 2
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+// CHECK:         00 00                                             |..|
+// CHECK:     }
+// CHECK:     4 = {
+// CHECK:       Name                     = _main
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+// CHECK:     }
+// CHECK:     5 = {
+// CHECK:       Name                     = _printf
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
diff --git a/test/MC/COFF/bss.s b/test/MC/COFF/bss.s
new file mode 100644
index 000000000000..f44225b5a336
--- /dev/null
+++ b/test/MC/COFF/bss.s
@@ -0,0 +1,15 @@
+// The purpose of this test is to verify that bss sections are emited correctly.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+    .bss
+    .globl _g0
+    .align 4
+_g0:
+    .long 0
+
+// CHECK:      Name           = .bss
+// CHECK-NEXT: VirtualSize    = 0
+// CHECK-NEXT: VirtualAddress = 0
+// CHECK-NEXT: SizeOfRawData  = 4
diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp
index 7b7bd4e73807..d46d700975e5 100644
--- a/test/MC/COFF/dg.exp
+++ b/test/MC/COFF/dg.exp
@@ -1,5 +1,5 @@
 load_lib llvm.exp
 
 if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
 }
diff --git a/test/MC/COFF/module-asm.ll b/test/MC/COFF/module-asm.ll
new file mode 100644
index 000000000000..9c6d00d2f503
--- /dev/null
+++ b/test/MC/COFF/module-asm.ll
@@ -0,0 +1,26 @@
+; The purpose of this test is to verify that various module level assembly
+; constructs work.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+
+module asm ".text"
+module asm "_foo:"
+module asm "  ret"
+
+; CHECK:            Name                     = .text
+; CHECK-NEXT:       VirtualSize              = 0
+; CHECK-NEXT:       VirtualAddress           = 0
+; CHECK-NEXT:       SizeOfRawData            = {{[0-9]+}}
+; CHECK-NEXT:       PointerToRawData         = 0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToLineNumbers     = 0x0
+; CHECK-NEXT:       NumberOfRelocations      = 0
+; CHECK-NEXT:       NumberOfLineNumbers      = 0
+; CHECK-NEXT:       Charateristics           = 0x60300020
+; CHECK-NEXT:         IMAGE_SCN_CNT_CODE
+; CHECK-NEXT:         IMAGE_SCN_ALIGN_4BYTES
+; CHECK-NEXT:         IMAGE_SCN_MEM_EXECUTE
+; CHECK-NEXT:         IMAGE_SCN_MEM_READ
+; CHECK-NEXT:       SectionData              =
+; CHECK-NEXT:         C3
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
new file mode 100644
index 000000000000..f86f4a9ff9e4
--- /dev/null
+++ b/test/MC/COFF/simple-fixups.s
@@ -0,0 +1,50 @@
+// The purpose of this test is to verify that we do not produce unneeded
+// relocations when symbols are in the same section and we know their offset.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foo;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_foo
+	.align	16, 0x90
+_foo:                                   # @foo
+# BB#0:                                 # %e
+	.align	16, 0x90
+LBB0_1:                                 # %i
+                                        # =>This Inner Loop Header: Depth=1
+	jmp	LBB0_1
+
+	.def	 _bar;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	_bar
+	.align	16, 0x90
+_bar:                                   # @bar
+# BB#0:                                 # %e
+	.align	16, 0x90
+LBB1_1:                                 # %i
+                                        # =>This Inner Loop Header: Depth=1
+	jmp	LBB1_1
+
+	.def	 _baz;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	_baz
+	.align	16, 0x90
+_baz:                                   # @baz
+# BB#0:                                 # %e
+	subl	$4, %esp
+Ltmp0:
+	calll	_baz
+	addl	$4, %esp
+	ret
+
+// CHECK:     Sections = [
+// CHECK-NOT: NumberOfRelocations = {{[^0]}}
+// CHECK:     Symbols = [
diff --git a/test/MC/COFF/switch-relocations.ll b/test/MC/COFF/switch-relocations.ll
index 300c10732ec6..faf185fb7299 100644
--- a/test/MC/COFF/switch-relocations.ll
+++ b/test/MC/COFF/switch-relocations.ll
@@ -3,6 +3,9 @@
 
 ; This test case was reduced from Lua/lapi.c.
 
+; This test has yet to be converted to assembly becase llvm-mc cannot read
+; x86-64 COFF code yet.
+
 ; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
 ; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
 
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
new file mode 100644
index 000000000000..ede6b53c4511
--- /dev/null
+++ b/test/MC/COFF/symbol-alias.s
@@ -0,0 +1,62 @@
+// The purpose of this test is to verify that symbol aliases
+// (@foo = alias <type> @bar) generate the correct entries in the symbol table.
+// They should be identical except for the name.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foo;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_foo
+	.align	16, 0x90
+_foo:                                   # @foo
+# BB#0:                                 # %entry
+	ret
+
+	.data
+	.globl	_bar                    # @bar
+	.align	4
+_bar:
+	.long	0                       # 0x0
+
+
+	.globl	_foo_alias
+_foo_alias = _foo
+	.globl	_bar_alias
+_bar_alias = _bar
+
+// CHECK:      Name               = {{_?}}foo
+// CHECK-NEXT: Value              = [[FOO_VALUE:.*$]]
+// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
+
+// CHECK:      Name               = {{_?}}bar
+// CHECK-NEXT: Value              = [[BAR_VALUE:.*$]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
+
+// CHECK:      Name               = {{_?}}foo_alias
+// CHECK-NEXT: Value              = [[FOO_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
+
+// CHECK:      Name               = {{_?}}bar_alias
+// CHECK-NEXT: Value              = [[BAR_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+
diff --git a/test/MC/COFF/symbol-fragment-offset.ll b/test/MC/COFF/symbol-fragment-offset.ll
deleted file mode 100644
index af7ace19d59f..000000000000
--- a/test/MC/COFF/symbol-fragment-offset.ll
+++ /dev/null
@@ -1,182 +0,0 @@
-; The purpose of this test is to see if the COFF object writer is emitting the
-; proper relocations for multiple pieces of data in a single data fragment.
-
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
-; RUN: coff-dump.py %abs_tmp | FileCheck %s
-; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
-
-@.str = private constant [7 x i8] c"Hello \00"    ; <[7 x i8]*> [#uses=1]
-@str = internal constant [7 x i8] c"World!\00"    ; <[7 x i8]*> [#uses=1]
-
-define i32 @main() nounwind {
-entry:
-  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
-  %puts = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
-  ret i32 0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-declare i32 @puts(i8* nocapture) nounwind
-
-; CHECK: {
-; CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-; CHECK:   NumberOfSections         = 2
-; CHECK:   TimeDateStamp            = {{[0-9]+}}
-; CHECK:   PointerToSymbolTable     = 0xBB
-; CHECK:   NumberOfSymbols          = 9
-; CHECK:   SizeOfOptionalHeader     = 0
-; CHECK:   Characteristics          = 0x0
-; CHECK:   Sections                 = [
-; CHECK:     0 = {
-; CHECK:       Name                     = .text
-; CHECK:       VirtualSize              = 0
-; CHECK:       VirtualAddress           = 0
-; CHECK:       SizeOfRawData            = 33
-; CHECK:       PointerToRawData         = 0x64
-; CHECK:       PointerToRelocations     = 0x85
-; CHECK:       PointerToLineNumbers     = 0x0
-; CHECK:       NumberOfRelocations      = 4
-; CHECK:       NumberOfLineNumbers      = 0
-; CHECK:       Charateristics           = 0x60500020
-; CHECK:         IMAGE_SCN_CNT_CODE
-; CHECK:         IMAGE_SCN_ALIGN_16BYTES
-; CHECK:         IMAGE_SCN_MEM_EXECUTE
-; CHECK:         IMAGE_SCN_MEM_READ
-; CHECK:       SectionData              =
-; CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
-; CHECK:         04 24 00 00 00 00 E8 00 - 00 00 00 31 C0 83 C4 04 |.$.........1....|
-; CHECK:         C3                                                |.|
-
-; CHECK:       Relocations              = [
-; CHECK:         0 = {
-; CHECK:           VirtualAddress           = 0x6
-; CHECK:           SymbolTableIndex         = 5
-; CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-; CHECK:           SymbolName               = _main
-; CHECK:         }
-; CHECK:         1 = {
-; CHECK:           VirtualAddress           = 0xB
-; CHECK:           SymbolTableIndex         = 6
-; CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-; CHECK:           SymbolName               = L_.str
-; CHECK:         }
-; CHECK:         2 = {
-; CHECK:           VirtualAddress           = 0x12
-; CHECK:           SymbolTableIndex         = 7
-; CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-; CHECK:           SymbolName               = _printf
-; CHECK:         }
-; CHECK:         3 = {
-; CHECK:           VirtualAddress           = 0x17
-; CHECK:           SymbolTableIndex         = 8
-; CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-; CHECK:           SymbolName               = _str
-; CHECK:         }
-; CHECK:       ]
-; CHECK:     }
-; CHECK:     1 = {
-; CHECK:       Name                     = .data
-; CHECK:       VirtualSize              = 0
-; CHECK:       VirtualAddress           = 0
-; CHECK:       SizeOfRawData            = 14
-; CHECK:       PointerToRawData         = 0xAD
-; CHECK:       PointerToRelocations     = 0x0
-; CHECK:       PointerToLineNumbers     = 0x0
-; CHECK:       NumberOfRelocations      = 0
-; CHECK:       NumberOfLineNumbers      = 0
-; CHECK:       Charateristics           = 0xC0100040
-; CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-; CHECK:         IMAGE_SCN_ALIGN_1BYTES
-; CHECK:         IMAGE_SCN_MEM_READ
-; CHECK:         IMAGE_SCN_MEM_WRITE
-; CHECK:       SectionData              =
-; CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00       |Hello .World!.|
-
-; CHECK:       Relocations              = None
-; CHECK:     }
-; CHECK:   ]
-; CHECK:   Symbols                  = [
-; CHECK:     0 = {
-; CHECK:       Name                     = .text
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 1
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 1
-; CHECK:       AuxillaryData            =
-; CHECK:         21 00 00 00 04 00 00 00 - 00 00 00 00 01 00 00 00 |!...............|
-; CHECK:         00 00                                             |..|
-
-; CHECK:     }
-; CHECK:     1 = {
-; CHECK:       Name                     = .data
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 2
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 1
-; CHECK:       AuxillaryData            =
-; CHECK:         0E 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
-; CHECK:         00 00                                             |..|
-
-; CHECK:     }
-; CHECK:     2 = {
-; CHECK:       Name                     = _main
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 1
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-
-; CHECK:     }
-; CHECK:     3 = {
-; CHECK:       Name                     = L_.str
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 2
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-
-; CHECK:     }
-; CHECK:     4 = {
-; CHECK:       Name                     = _printf
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 0
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-
-; CHECK:     }
-; CHECK:     5 = {
-; CHECK:       Name                     = _str
-; CHECK:       Value                    = 7
-; CHECK:       SectionNumber            = 2
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-
-; CHECK:     }
-; CHECK:     6 = {
-; CHECK:       Name                     = _puts
-; CHECK:       Value                    = 0
-; CHECK:       SectionNumber            = 0
-; CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-; CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-; CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-; CHECK:       NumberOfAuxSymbols       = 0
-; CHECK:       AuxillaryData            =
-
-; CHECK:     }
-; CHECK:   ]
-; CHECK: }
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
new file mode 100644
index 000000000000..c314ac20f4db
--- /dev/null
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -0,0 +1,187 @@
+// The purpose of this test is to see if the COFF object writer is emitting the
+// proper relocations for multiple pieces of data in a single data fragment.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$L_.str0, (%esp)
+	calll	_printf
+	movl	$L_.str1, (%esp)
+	calll	_puts
+	movl	$L_.str2, (%esp)
+	calll	_puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+L_.str0:                                # @.str0
+	.asciz	 "Hello "
+
+L_.str1:                                # @.str1
+	.asciz	 "World!"
+
+	.align	16                      # @.str2
+L_.str2:
+	.asciz	 "I'm The Last Line."
+
+// CHECK: {
+// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   NumberOfSections         = 2
+// CHECK:   TimeDateStamp            = {{[0-9]+}}
+// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
+// CHECK:   NumberOfSymbols          = 7
+// CHECK:   SizeOfOptionalHeader     = 0
+// CHECK:   Characteristics          = 0x0
+// CHECK:   Sections                 = [
+// CHECK:     1 = {
+// CHECK:       Name                     = .text
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 6
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0x60500020
+// CHECK:         IMAGE_SCN_CNT_CODE
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_EXECUTE
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:       SectionData              =
+// CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+// CHECK:         04 24 07 00 00 00 E8 00 - 00 00 00 C7 04 24 10 00 |.$...........$..|
+// CHECK:         00 00 E8 00 00 00 00 31 - C0 83 C4 04 C3 |.......1.....|
+// CHECK:       Relocations              = [
+// CHECK:         0 = {
+// CHECK:           VirtualAddress           = 0x6
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         1 = {
+// CHECK:           VirtualAddress           = 0xB
+// CHECK:           SymbolTableIndex         = 5
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _printf
+// CHECK:         }
+// CHECK:         2 = {
+// CHECK:           VirtualAddress           = 0x12
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         3 = {
+// CHECK:           VirtualAddress           = 0x17
+// CHECK:           SymbolTableIndex         = 6
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _puts
+// CHECK:         }
+// CHECK:         4 = {
+// CHECK:           VirtualAddress           = 0x1E
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         5 = {
+// CHECK:           VirtualAddress           = 0x23
+// CHECK:           SymbolTableIndex         = 6
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _puts
+// CHECK:         }
+// CHECK:       ]
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x0
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 0
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0xC0500040
+// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:         IMAGE_SCN_MEM_WRITE
+// CHECK:       SectionData              =
+// CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 00 00 |Hello .World!...|
+// CHECK:         49 27 6D 20 54 68 65 20 - 4C 61 73 74 20 4C 69 6E |I'm The Last Lin|
+// CHECK:         65 2E 00                                          |e..|
+// CHECK:       Relocations              = None
+// CHECK:     }
+// CHECK:   ]
+// CHECK:   Symbols                  = [
+// CHECK:     0 = {
+// CHECK:       Name                     = .text
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         2D 00 00 00 06 00 00 00 - 00 00 00 00 01 00 00 00 |-...............|
+// CHECK:         00 00                                             |..|
+
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 2
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         23 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |#...............|
+// CHECK:         00 00                                             |..|
+
+// CHECK:     }
+// CHECK:     4 = {
+// CHECK:       Name                     = _main
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     5 = {
+// CHECK:       Name                     = _printf
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     }
+// CHECK:     6 = {
+// CHECK:       Name                     = _puts
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
diff --git a/test/MC/COFF/weak.s b/test/MC/COFF/weak.s
new file mode 100644
index 000000000000..a240d7152c76
--- /dev/null
+++ b/test/MC/COFF/weak.s
@@ -0,0 +1,51 @@
+// This tests that default-null weak symbols (a GNU extension) are created
+// properly via the .weak directive.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 < %s | coff-dump.py | FileCheck %s
+
+    .def    _main;
+    .scl    2;
+    .type   32;
+    .endef
+    .text
+    .globl  _main
+    .align  16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+    subl    $4, %esp
+    movl    $_test_weak, %eax
+    testl   %eax, %eax
+    je      LBB0_2
+# BB#1:                                 # %if.then
+    calll   _test_weak
+    movl    $1, %eax
+    addl    $4, %esp
+    ret
+LBB0_2:                                 # %return
+    xorl    %eax, %eax
+    addl    $4, %esp
+    ret
+
+    .weak   _test_weak
+
+// CHECK: Symbols = [
+
+// CHECK:      Name               = _test_weak
+// CHECK-NEXT: Value              = 0
+// CHECK-NEXT: SectionNumber      = 0
+// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
+// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_WEAK_EXTERNAL (105)
+// CHECK-NEXT: NumberOfAuxSymbols = 1
+// CHECK-NEXT: AuxillaryData      =
+// CHECK-NEXT: 05 00 00 00 02 00 00 00 - 00 00 00 00 00 00 00 00 |................|
+// CHECK-NEXT: 00 00                                             |..|
+
+// CHECK:      Name               = .weak._test_weak.default
+// CHECK-NEXT: Value              = 0
+// CHECK-NEXT: SectionNumber      = 65535
+// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
+// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK-NEXT: NumberOfAuxSymbols = 0
+// CHECK-NEXT: AuxillaryData      =
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
new file mode 100644
index 000000000000..0f6aeb7052b9
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -0,0 +1,132 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
+
+# CHECK:	b	#0
+0xfe 0xff 0xff 0xea
+
+# CHECK:	bfc	r8, #0, #16
+0x1f 0x80 0xcf 0xe7
+
+# CHECK:	bfi	r8, r0, #16, #1
+0x10 0x88 0xd0 0xe7
+
+# CHECK:	mov	pc, lr
+0x0e 0xf0 0xa0 0xe1
+
+# CHECK:	cmn	r0, #1
+0x01 0x00 0x70 0xe3
+
+# CHECK:	dmb
+0x5f 0xf0 0x7f 0xf5
+
+# CHECK:	dmb	nshst
+0x56 0xf0 0x7f 0xf5
+
+# CHECK:	dsb
+0x4f 0xf0 0x7f 0xf5
+
+# CHECK:	dsb	st
+0x4e 0xf0 0x7f 0xf5
+
+# CHECK:	isb
+0x6f 0xf0 0x7f 0xf5
+
+# CHECK:	ldclvc	p5, cr15, [r8], #-0
+0x00 0xf5 0x78 0x7c
+
+# CHECK:	ldr	r0, [r2], #15
+0x0f 0x00 0x92 0xe4
+
+# CHECK:	ldrh	r0, [r2], #0
+0xb0 0x00 0xd2 0xe0
+
+# CHECK:	ldrht	r0, [r2], #15
+0xbf 0x00 0xf2 0xe0
+
+# CHECK:	ldrsbtvs	lr, [r2], -r9
+0xd9 0xe9 0x32 0x60
+
+# CHECK:	lsls	r0, r2, #31
+0x82 0x0f 0xb0 0xe1
+
+# CHECK:	mcr2	p0, #0, r2, c1, c0, #7
+0xf0 0x20 0x01 0xfe
+
+# CHECK:	movt	r8, #65535
+0xff 0x8f 0x4f 0xe3
+
+# CHECK:	mvnspl	r7, #245, 2
+0xf5 0x71 0xf0 0x53
+
+# CHECK-NOT:	orr	r7, r8, r7, rrx #0
+# CHECK:	orr	r7, r8, r7, rrx
+0x67 0x70 0x88 0xe1
+
+# CHECK:	pkhbt	r8, r9, r10, lsl #4
+0x1a 0x82 0x89 0xe6
+
+# CHECK-NOT:	pkhbtls	pc, r11, r11, lsl #0
+# CHECK:	pkhbtls	pc, r11, r11
+0x1b 0xf0 0x8b 0x96
+
+# CHECK:	pop	{r0, r2, r4, r6, r8, r10}
+0x55 0x05 0xbd 0xe8
+
+# CHECK:	push	{r0, r2, r4, r6, r8, r10}
+0x55 0x05 0x2d 0xe9
+
+# CHECK:	qsax	r8, r9, r10
+0x5a 0x8f 0x29 0xe6
+
+# CHECK:	rfedb	r0!
+0x00 0x0a 0x30 0xf9
+
+# CHECK-NOT:	rsbeq	r0, r2, r0, lsl #0
+# CHECK:	rsbeq	r0, r2, r0
+0x00 0x00 0x62 0x00
+
+# CHECK-NOT:	rscseq	r0, r0, r1, lsl #0
+# CHECK:	rscseq	r0, r0, r1
+0x01 0x00 0xf0 0x00
+
+# CHECK:	sbcs	r0, pc, #1
+0x01 0x00 0xdf 0xe2
+
+# CHECK:	sbfx	r0, r1, #0, #8
+0x51 0x00 0xa7 0xe7
+
+# CHECK:	ssat	r8, #1, r10, lsl #8
+0x1a 0x84 0xa0 0xe6
+
+# CHECK-NOT:	ssatmi	r0, #17, r12, lsl #0
+# CHECK:	ssatmi	r0, #17, r12
+0x1c 0x00 0xb0 0x46
+
+# CHECK:	stmdb	r10!, {r4, r5, r6, r7, lr}
+0xf0 0x40 0x2a 0xe9
+
+# CHECK:	teq	r0, #31
+0x1f 0x00 0x30 0xe3
+
+# CHECK:	ubfx	r0, r0, #16, #1
+0x50 0x08 0xe0 0xe7
+
+# CHECK:	usat	r8, #0, r10, asr #32
+0x5a 0x80 0xe0 0xe6
+
+# CHECK:        setend be
+0x00 0x02 0x01 0xf1
+
+# CHECK:        setend le
+0x00 0x00 0x01 0xf1
+
+# CHECK: cpsie  aif
+0xc0 0x01 0x08 0xf1
+
+# CHECK: cps  #15
+0x0f 0x00 0x02 0xf1
+
+# CHECK: cpsie if, #10
+0xca 0x00 0x0a 0xf1
+
+# CHECK: msr cpsr_fc, r0
+0x00 0xf0 0x29 0xe1
diff --git a/test/MC/Disassembler/ARM/dg.exp b/test/MC/Disassembler/ARM/dg.exp
new file mode 100644
index 000000000000..fc2f17a6fbaa
--- /dev/null
+++ b/test/MC/Disassembler/ARM/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
+
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
new file mode 100644
index 000000000000..eb9adb7b6c2f
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -0,0 +1,61 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
+
+# CHECK:	vbif	q15, q7, q0
+0x50 0xe1 0x7e 0xf3
+
+# CHECK:	vcvt.f32.s32	q15, q0, #1
+0x50 0xee 0xff 0xf2
+
+# CHECK:	vdup.32	q3, d1[0]
+0x41 0x6c 0xb4 0xf3
+
+# CHECK:	vld1.8	{d17, d18}, [r6], r5
+0x05 0x1a 0x66 0xf4
+
+# CHECK:        vld1.8  {d17, d18, d19}, [r6], r5
+0x05 0x16 0x66 0xf4
+
+# CHECK:	vld4.8	{d0, d1, d2, d3}, [r2], r7
+0x07 0x00 0x22 0xf4
+
+# CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
+0x0f 0x41 0x22 0xf4
+
+# CHECK:	vmov	d0, d15
+0x1f 0x01 0x2f 0xf2
+
+# CHECK:	vmov.i64	q6, #0xFF00FF00FF
+0x75 0xce 0x81 0xf2
+
+# CHECK:	vmvn.i32	d0, #0x0
+0x30 0x00 0x80 0xf2
+
+# CHECK:	vmul.f32	d0, d0, d6
+0x16 0x0d 0x00 0xf3
+
+# CHECK:	vneg.f32	q0, q0
+0xc0 0x07 0xb9 0xf3
+
+# CHECK:	vqrdmulh.s32	d0, d0, d3[1]
+0x63 0x0d 0xa0 0xf2
+
+# CHECK:	vrshr.s32	d0, d0, #16
+0x10 0x02 0xb0 0xf2
+
+# CHECK:	vshll.i16	q3, d1, #16
+0x01 0x63 0xb6 0xf3
+
+# CHECK:	vsri.32	q15, q0, #1
+0x50 0xe4 0xff 0xf3
+
+# CHECK:	vtbx.8	d18, {d4, d5, d6}, d7
+0x47 0x2a 0xf4 0xf3
+
+# CHECK: vmov.f32 s0, #5.000000e-01
+0x00 0x0a 0xb6 0xee
+
+# CHECK: vmov.f32 s0, #1.328125e-01
+0x01 0x0a 0xb4 0xee
+
+# CHECK: vmov.f64 d0, #5.000000e-01
+0x00 0x0b 0xb6 0xee
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
new file mode 100644
index 000000000000..6dab1237a118
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -0,0 +1,120 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
+
+# CHECK:	add	r5, sp, #68
+0x11 0xad
+
+# CHECK:	adcs	r0, r0, #1
+0x50 0xf1 0x01 0x00
+
+# CHECK:	b	#34
+0x0f 0xe0
+
+# CHECK:	b.w	#-12
+0xff 0xf7 0xf8 0xaf
+
+# CHECK:	bfi	r2, r10, #0, #1
+0x6a 0xf3 0x00 0x02
+
+# CHECK:	cbnz	r7, #20
+0x57 0xb9
+
+# CHECK:	cmp	r3, r4
+0xa3 0x42
+
+# CHECK:	cmn.w	r0, #31
+0x10 0xf1 0x1f 0x0f
+
+# CHECK:	ldmia	r0!, {r1}
+0x02 0xc8
+
+# CHECK:	ldrb.w	r8, #-24
+0x1f 0xf8 0x18 0x80
+
+# CHECK:	ldrd	r0, r1, [r7, #64]!
+0xf7 0xe9 0x10 0x01
+
+# CHECK:	lsls.w	r0, pc, #1
+0x5f 0xea 0x4f 0x00
+
+# CHECK:	mov	r11, r7
+0xbb 0x46
+
+# CHECK:	pkhtb	r2, r4, r6, asr #16
+0xc4 0xea 0x26 0x42
+
+# CHECK-NOT:	pkhbt	r2, r4, r6, lsl #0
+# CHECK:	pkhbt	r2, r4, r6
+0xc4 0xea 0x06 0x02
+
+# CHECK:	pop.w	{r2, r4, r6, r8, r10, r12}
+0xbd 0xe8 0x54 0x15
+
+# CHECK:	push.w	{r2, r4, r6, r8, r10, r12}
+0x2d 0xe9 0x54 0x15
+
+# CHECK:	rsbs	r0, r0, #0
+0x40 0x42
+
+# CHECK-NOT:	rsb	r0, r2, r0, lsl #0
+# CHECK:	rsb	r0, r2, r0
+0xc2 0xeb 0x00 0x00
+
+# CHECK-NOT:	ssat	r0, #17, r12, lsl #0
+# CHECK:	ssat	r0, #17, r12
+0x0c 0xf3 0x10 0x00
+
+# CHECK:	strd	r0, r1, [r7, #64]
+0xc7 0xe9 0x10 0x01
+
+# CHECK:	sub	sp, #60
+0x8f 0xb0
+
+# CHECK:	subw	r0, pc, #1
+0xaf 0xf2 0x01 0x00
+
+# CHECK:	subw	r0, sp, #835
+0xad 0xf2 0x43 0x30
+
+# CHECK:	uqadd16	r3, r4, r5
+0x94 0xfa 0x55 0xf3
+
+# CHECK:	usada8	r5, r4, r3, r2
+0x74 0xfb 0x03 0x25
+
+# CHECK:	uxtab16	r1, r2, r3, ror #8
+0x32 0xfa 0x93 0xf1
+
+# IT block begin
+# CHECK:	ittte	eq
+0x03 0xbf
+
+# CHECK:	moveq	r3, #3
+0x03 0x23
+
+# CHECK:	asreq	r1, r0, #5
+0x41 0x11
+
+# CHECK:	lsleq	r1, r0, #28
+0x01 0x07
+
+# CHECK:	stmiane	r0!, {r1, r2, r3}
+0x0e 0xc0
+
+# IT block end
+# CHECK:	rsbs	r1, r2, #0
+0x51 0x42
+
+# CHECK: cpsid.w  f
+0xaf 0xf3 0x20 0x86
+
+# CHECK: cps  #15
+0xaf 0xf3 0x0f 0x81
+
+# CHECK: cpsie.w  if, #10
+0xaf 0xf3 0x6a 0x85
+
+# CHECK: cpsie aif
+0x67 0xb6
+
+# CHECK: msr cpsr_fc, r0
+0x80 0xf3 0x00 0x89
diff --git a/test/MC/Disassembler/MBlaze/dg.exp b/test/MC/Disassembler/MBlaze/dg.exp
new file mode 100644
index 000000000000..0be99a34235d
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
+
diff --git a/test/MC/Disassembler/MBlaze/mblaze_branch.txt b/test/MC/Disassembler/MBlaze/mblaze_branch.txt
new file mode 100644
index 000000000000..5f4051712fa6
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_branch.txt
@@ -0,0 +1,119 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Branch instructions
+################################################################################
+
+# CHECK:    beq     r2, r3
+0x9c 0x02 0x18 0x00
+
+# CHECK:    bge     r2, r3
+0x9c 0xa2 0x18 0x00
+
+# CHECK:    bgt     r2, r3
+0x9c 0x82 0x18 0x00
+
+# CHECK:    ble     r2, r3
+0x9c 0x62 0x18 0x00
+
+# CHECK:    blt     r2, r3
+0x9c 0x42 0x18 0x00
+
+# CHECK:    bne     r2, r3
+0x9c 0x22 0x18 0x00
+
+# CHECK:    beqd    r2, r3
+0x9e 0x02 0x18 0x00
+
+# CHECK:    bged    r2, r3
+0x9e 0xa2 0x18 0x00
+
+# CHECK:    bgtd    r2, r3
+0x9e 0x82 0x18 0x00
+
+# CHECK:    bled    r2, r3
+0x9e 0x62 0x18 0x00
+
+# CHECK:    bltd    r2, r3
+0x9e 0x42 0x18 0x00
+
+# CHECK:    bned    r2, r3
+0x9e 0x22 0x18 0x00
+
+# CHECK:    br      r3
+0x98 0x00 0x18 0x00
+
+# CHECK:    bra     r3
+0x98 0x08 0x18 0x00
+
+# CHECK:    brd     r3
+0x98 0x10 0x18 0x00
+
+# CHECK:    brad    r3
+0x98 0x18 0x18 0x00
+
+# CHECK:    brld    r15, r3
+0x99 0xf4 0x18 0x00
+
+# CHECK:    brald   r15, r3
+0x99 0xfc 0x18 0x00
+
+# CHECK:    brk     r15, r3
+0x99 0xec 0x18 0x00
+
+# CHECK:    beqi    r2, 0
+0xbc 0x02 0x00 0x00
+
+# CHECK:    bgei    r2, 0
+0xbc 0xa2 0x00 0x00
+
+# CHECK:    bgti    r2, 0
+0xbc 0x82 0x00 0x00
+
+ # CHECK:   blei    r2, 0
+0xbc 0x62 0x00 0x00
+
+# CHECK:    blti    r2, 0
+0xbc 0x42 0x00 0x00
+
+# CHECK:    bnei    r2, 0
+0xbc 0x22 0x00 0x00
+
+# CHECK:    beqid   r2, 0
+0xbe 0x02 0x00 0x00
+
+# CHECK:    bgeid   r2, 0
+0xbe 0xa2 0x00 0x00
+
+# CHECK:    bgtid   r2, 0
+0xbe 0x82 0x00 0x00
+
+# CHECK:    bleid   r2, 0
+0xbe 0x62 0x00 0x00
+
+# CHECK:    bltid   r2, 0
+0xbe 0x42 0x00 0x00
+
+# CHECK:    bneid   r2, 0
+0xbe 0x22 0x00 0x00
+
+# CHECK:    bri     0
+0xb8 0x00 0x00 0x00
+
+# CHECK:    brai    0
+0xb8 0x08 0x00 0x00
+
+# CHECK:    brid    0
+0xb8 0x10 0x00 0x00
+
+# CHECK:    braid   0
+0xb8 0x18 0x00 0x00
+
+# CHECK:    brlid   r15, 0
+0xb9 0xf4 0x00 0x00
+
+# CHECK:    bralid  r15, 0
+0xb9 0xfc 0x00 0x00
+
+# CHECK:    brki    r15, 0
+0xb9 0xec 0x00 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_fpu.txt b/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
new file mode 100644
index 000000000000..0fb7abcdea54
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
@@ -0,0 +1,47 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# FPU instructions
+################################################################################
+
+# CHECK:    fadd         r0, r1, r2
+0x58 0x01 0x10 0x00
+
+# CHECK:    frsub        r0, r1, r2
+0x58 0x01 0x10 0x80
+
+# CHECK:    fmul         r0, r1, r2
+0x58 0x01 0x11 0x00
+
+# CHECK:    fdiv         r0, r1, r2
+0x58 0x01 0x11 0x80
+
+# CHECK:    fsqrt        r0, r1
+0x58 0x01 0x03 0x80
+
+# CHECK:    fint         r0, r1
+0x58 0x01 0x03 0x00
+
+# CHECK:    flt          r0, r1
+0x58 0x01 0x02 0x80
+
+# CHECK:    fcmp.un     r0, r1, r2
+0x58 0x01 0x12 0x00
+
+# CHECK:    fcmp.lt     r0, r1, r2
+0x58 0x01 0x12 0x10
+
+# CHECK:    fcmp.eq     r0, r1, r2
+0x58 0x01 0x12 0x20
+
+# CHECK:    fcmp.le     r0, r1, r2
+0x58 0x01 0x12 0x30
+
+# CHECK:    fcmp.gt     r0, r1, r2
+0x58 0x01 0x12 0x40
+
+# CHECK:    fcmp.ne     r0, r1, r2
+0x58 0x01 0x12 0x50
+
+# CHECK:    fcmp.ge     r0, r1, r2
+0x58 0x01 0x12 0x60
diff --git a/test/MC/Disassembler/MBlaze/mblaze_fsl.txt b/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
new file mode 100644
index 000000000000..a12b3b486e60
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
@@ -0,0 +1,338 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# FSL instructions
+################################################################################
+
+# CHECK:    get         r0, rfsl0
+0x6c 0x00 0x00 0x00
+
+# CHECK:    nget        r0, rfsl0
+0x6c 0x00 0x40 0x00
+
+# CHECK:    cget        r0, rfsl0
+0x6c 0x00 0x20 0x00
+
+# CHECK:    ncget       r0, rfsl0
+0x6c 0x00 0x60 0x00
+
+# CHECK:    tget        r0, rfsl0
+0x6c 0x00 0x10 0x00
+
+# CHECK:    tnget       r0, rfsl0
+0x6c 0x00 0x50 0x00
+
+# CHECK:    tcget       r0, rfsl0
+0x6c 0x00 0x30 0x00
+
+# CHECK:    tncget      r0, rfsl0
+0x6c 0x00 0x70 0x00
+
+# CHECK:    aget        r0, rfsl0
+0x6c 0x00 0x08 0x00
+
+# CHECK:    naget       r0, rfsl0
+0x6c 0x00 0x48 0x00
+
+# CHECK:    caget       r0, rfsl0
+0x6c 0x00 0x28 0x00
+
+# CHECK:    ncaget      r0, rfsl0
+0x6c 0x00 0x68 0x00
+
+# CHECK:    taget       r0, rfsl0
+0x6c 0x00 0x18 0x00
+
+# CHECK:    tnaget      r0, rfsl0
+0x6c 0x00 0x58 0x00
+
+# CHECK:    tcaget      r0, rfsl0
+0x6c 0x00 0x38 0x00
+
+# CHECK:    tncaget     r0, rfsl0
+0x6c 0x00 0x78 0x00
+
+# CHECK:    eget        r0, rfsl0
+0x6c 0x00 0x04 0x00
+
+# CHECK:    neget       r0, rfsl0
+0x6c 0x00 0x44 0x00
+
+# CHECK:    ecget       r0, rfsl0
+0x6c 0x00 0x24 0x00
+
+# CHECK:    necget      r0, rfsl0
+0x6c 0x00 0x64 0x00
+
+# CHECK:    teget       r0, rfsl0
+0x6c 0x00 0x14 0x00
+
+# CHECK:    tneget      r0, rfsl0
+0x6c 0x00 0x54 0x00
+
+# CHECK:    tecget      r0, rfsl0
+0x6c 0x00 0x34 0x00
+
+# CHECK:    tnecget     r0, rfsl0
+0x6c 0x00 0x74 0x00
+
+# CHECK:    eaget       r0, rfsl0
+0x6c 0x00 0x0c 0x00
+
+# CHECK:    neaget      r0, rfsl0
+0x6c 0x00 0x4c 0x00
+
+# CHECK:    ecaget      r0, rfsl0
+0x6c 0x00 0x2c 0x00
+
+# CHECK:    necaget     r0, rfsl0
+0x6c 0x00 0x6c 0x00
+
+# CHECK:    teaget      r0, rfsl0
+0x6c 0x00 0x1c 0x00
+
+# CHECK:    tneaget     r0, rfsl0
+0x6c 0x00 0x5c 0x00
+
+# CHECK:    tecaget     r0, rfsl0
+0x6c 0x00 0x3c 0x00
+
+# CHECK:    tnecaget    r0, rfsl0
+0x6c 0x00 0x7c 0x00
+
+# CHECK:    getd        r0, r1
+0x4c 0x00 0x08 0x00
+
+# CHECK:    ngetd       r0, r1
+0x4c 0x00 0x0a 0x00
+
+# CHECK:    cgetd       r0, r1
+0x4c 0x00 0x09 0x00
+
+# CHECK:    ncgetd      r0, r1
+0x4c 0x00 0x0b 0x00
+
+# CHECK:    tgetd       r0, r1
+0x4c 0x00 0x08 0x80
+
+# CHECK:    tngetd      r0, r1
+0x4c 0x00 0x0a 0x80
+
+# CHECK:    tcgetd      r0, r1
+0x4c 0x00 0x09 0x80
+
+# CHECK:    tncgetd     r0, r1
+0x4c 0x00 0x0b 0x80
+
+# CHECK:    agetd       r0, r1
+0x4c 0x00 0x08 0x40
+
+# CHECK:    nagetd      r0, r1
+0x4c 0x00 0x0a 0x40
+
+# CHECK:    cagetd     r0, r1
+0x4c 0x00 0x09 0x40
+
+# CHECK:    ncagetd     r0, r1
+0x4c 0x00 0x0b 0x40
+
+# CHECK:    tagetd      r0, r1
+0x4c 0x00 0x08 0xc0
+
+# CHECK:    tnagetd     r0, r1
+0x4c 0x00 0x0a 0xc0
+
+# CHECK:    tcagetd     r0, r1
+0x4c 0x00 0x09 0xc0
+
+# CHECK:    tncagetd    r0, r1
+0x4c 0x00 0x0b 0xc0
+
+# CHECK:    egetd       r0, r1
+0x4c 0x00 0x08 0x20
+
+# CHECK:    negetd      r0, r1
+0x4c 0x00 0x0a 0x20
+
+# CHECK:    ecgetd      r0, r1
+0x4c 0x00 0x09 0x20
+
+# CHECK:    necgetd     r0, r1
+0x4c 0x00 0x0b 0x20
+
+# CHECK:    tegetd      r0, r1
+0x4c 0x00 0x08 0xa0
+
+# CHECK:    tnegetd     r0, r1
+0x4c 0x00 0x0a 0xa0
+
+# CHECK:    tecgetd     r0, r1
+0x4c 0x00 0x09 0xa0
+
+# CHECK:    tnecgetd    r0, r1
+0x4c 0x00 0x0b 0xa0
+
+# CHECK:    eagetd      r0, r1
+0x4c 0x00 0x08 0x60
+
+# CHECK:    neagetd     r0, r1
+0x4c 0x00 0x0a 0x60
+
+# CHECK:    ecagetd     r0, r1
+0x4c 0x00 0x09 0x60
+
+# CHECK:    necagetd    r0, r1
+0x4c 0x00 0x0b 0x60
+
+# CHECK:    teagetd     r0, r1
+0x4c 0x00 0x08 0xe0
+
+# CHECK:    tneagetd    r0, r1
+0x4c 0x00 0x0a 0xe0
+
+# CHECK:    tecagetd    r0, r1
+0x4c 0x00 0x09 0xe0
+
+# CHECK:    tnecagetd   r0, r1
+0x4c 0x00 0x0b 0xe0
+
+# CHECK:    put         r0, rfsl0
+0x6c 0x00 0x80 0x00
+
+# CHECK:    aput        r0, rfsl0
+0x6c 0x00 0x88 0x00
+
+# CHECK:    cput        r0, rfsl0
+0x6c 0x00 0xa0 0x00
+
+# CHECK:    caput       r0, rfsl0
+0x6c 0x00 0xa8 0x00
+
+# CHECK:    nput        r0, rfsl0
+0x6c 0x00 0xc0 0x00
+
+# CHECK:    naput       r0, rfsl0
+0x6c 0x00 0xc8 0x00
+
+# CHECK:    ncput       r0, rfsl0
+0x6c 0x00 0xe0 0x00
+
+# CHECK:    ncaput      r0, rfsl0
+0x6c 0x00 0xe8 0x00
+
+# CHECK:    tput        rfsl0
+0x6c 0x00 0x90 0x00
+
+# CHECK:    taput       rfsl0
+0x6c 0x00 0x98 0x00
+
+# CHECK:    tcput       rfsl0
+0x6c 0x00 0xb0 0x00
+
+# CHECK:    tcaput      rfsl0
+0x6c 0x00 0xb8 0x00
+
+# CHECK:    tnput       rfsl0
+0x6c 0x00 0xd0 0x00
+
+# CHECK:    tnaput      rfsl0
+0x6c 0x00 0xd8 0x00
+
+# CHECK:    tncput      rfsl0
+0x6c 0x00 0xf0 0x00
+
+# CHECK:    tncaput     rfsl0
+0x6c 0x00 0xf8 0x00
+
+# CHECK:    putd        r0, r1
+0x4c 0x00 0x0c 0x00
+
+# CHECK:    aputd       r0, r1
+0x4c 0x00 0x0c 0x40
+
+# CHECK:    cputd       r0, r1
+0x4c 0x00 0x0d 0x00
+
+# CHECK:    caputd      r0, r1
+0x4c 0x00 0x0d 0x40
+
+# CHECK:    nputd       r0, r1
+0x4c 0x00 0x0e 0x00
+
+# CHECK:    naputd      r0, r1
+0x4c 0x00 0x0e 0x40
+
+# CHECK:    ncputd      r0, r1
+0x4c 0x00 0x0f 0x00
+
+# CHECK:    ncaputd     r0, r1
+0x4c 0x00 0x0f 0x40
+
+# CHECK:    tputd       r1
+0x4c 0x00 0x0c 0x80
+
+# CHECK:    taputd      r1
+0x4c 0x00 0x0c 0xc0
+
+# CHECK:    tcputd      r1
+0x4c 0x00 0x0d 0x80
+
+# CHECK:    tcaputd     r1
+0x4c 0x00 0x0d 0xc0
+
+# CHECK:    tnputd      r1
+0x4c 0x00 0x0e 0x80
+
+# CHECK:    tnaputd     r1
+0x4c 0x00 0x0e 0xc0
+
+# CHECK:    tncputd     r1
+0x4c 0x00 0x0f 0x80
+
+# CHECK:    tncaputd    r1
+0x4c 0x00 0x0f 0xc0
+
+# CHECK:    get     r0, rfsl1
+0x6c 0x00 0x00 0x01
+
+# CHECK:    get     r0, rfsl2
+0x6c 0x00 0x00 0x02
+
+# CHECK:    get     r0, rfsl3
+0x6c 0x00 0x00 0x03
+
+# CHECK:    get     r0, rfsl4
+0x6c 0x00 0x00 0x04
+
+# CHECK:    get     r0, rfsl5
+0x6c 0x00 0x00 0x05
+
+# CHECK:    get     r0, rfsl6
+0x6c 0x00 0x00 0x06
+
+# CHECK:    get     r0, rfsl7
+0x6c 0x00 0x00 0x07
+
+# CHECK:    get     r0, rfsl8
+0x6c 0x00 0x00 0x08
+
+# CHECK:    get     r0, rfsl9
+0x6c 0x00 0x00 0x09
+
+# CHECK:    get     r0, rfsl10
+0x6c 0x00 0x00 0x0a
+
+# CHECK:    get     r0, rfsl11
+0x6c 0x00 0x00 0x0b
+
+# CHECK:    get     r0, rfsl12
+0x6c 0x00 0x00 0x0c
+
+# CHECK:    get     r0, rfsl13
+0x6c 0x00 0x00 0x0d
+
+# CHECK:    get     r0, rfsl14
+0x6c 0x00 0x00 0x0e
+
+# CHECK:    get     r0, rfsl15
+0x6c 0x00 0x00 0x0f
diff --git a/test/MC/Disassembler/MBlaze/mblaze_imm.txt b/test/MC/Disassembler/MBlaze/mblaze_imm.txt
new file mode 100644
index 000000000000..3833ea85d779
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_imm.txt
@@ -0,0 +1,121 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# IMM instruction processing
+################################################################################
+
+# CHECK:    addi    r0, r0, 0
+0x20 0x00 0x00 0x00
+
+# CHECK:    addi    r0, r0, 1
+0x20 0x00 0x00 0x01
+
+# CHECK:    addi    r0, r0, 2
+0x20 0x00 0x00 0x02
+
+# CHECK:    addi    r0, r0, 4
+0x20 0x00 0x00 0x04
+
+# CHECK:    addi    r0, r0, 8
+0x20 0x00 0x00 0x08
+
+# CHECK:    addi    r0, r0, 16
+0x20 0x00 0x00 0x10
+
+# CHECK:    addi    r0, r0, 32
+0x20 0x00 0x00 0x20
+
+# CHECK:    addi    r0, r0, 64
+0x20 0x00 0x00 0x40
+
+# CHECK:    addi    r0, r0, 128
+0x20 0x00 0x00 0x80
+
+# CHECK:    addi    r0, r0, 256
+0x20 0x00 0x01 0x00
+
+# CHECK:    addi    r0, r0, 512
+0x20 0x00 0x02 0x00
+
+# CHECK:    addi    r0, r0, 1024
+0x20 0x00 0x04 0x00
+
+# CHECK:    addi    r0, r0, 2048
+0x20 0x00 0x08 0x00
+
+# CHECK:    addi    r0, r0, 4096
+0x20 0x00 0x10 0x00
+
+# CHECK:    addi    r0, r0, 8192
+0x20 0x00 0x20 0x00
+
+# CHECK:    addi    r0, r0, 16384
+0x20 0x00 0x40 0x00
+
+# CHECK:    imm     0
+# CHECK:    addi    r0, r0, -32768
+0xb0 0x00 0x00 0x00 0x20 0x00 0x80 0x00
+
+# CHECK:    imm     1
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x01 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     2
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x02 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     4
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x04 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     8
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x08 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     16
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x10 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     32
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x20 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     64
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x40 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     128
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x80 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     256
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x01 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     512
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x02 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     1024
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x04 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     2048
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x08 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     4096
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x10 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     8192
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x20 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     16384
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x40 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     -32768
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x80 0x00 0x20 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_memory.txt b/test/MC/Disassembler/MBlaze/mblaze_memory.txt
new file mode 100644
index 000000000000..584d61c47dcf
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_memory.txt
@@ -0,0 +1,65 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Memory instructions
+################################################################################
+
+# CHECK:    lbu     r1, r2, r3
+0xc0 0x22 0x18 0x00
+
+# CHECK:    lbur    r1, r2, r3
+0xc0 0x22 0x1a 0x00
+
+# CHECK:    lbui    r1, r2, 28
+0xe0 0x22 0x00 0x1c
+
+# CHECK:    lhu     r1, r2, r3
+0xc4 0x22 0x18 0x00
+
+# CHECK:    lhur    r1, r2, r3
+0xc4 0x22 0x1a 0x00
+
+# CHECK:    lhui    r1, r2, 28
+0xe4 0x22 0x00 0x1c
+
+# CHECK:    lw      r1, r2, r3
+0xc8 0x22 0x18 0x00
+
+# CHECK:    lwr    r1, r2, r3
+0xc8 0x22 0x1a 0x00
+
+# CHECK:    lwi     r1, r2, 28
+0xe8 0x22 0x00 0x1c
+
+# CHECK:    lwx      r1, r2, r3
+0xc8 0x22 0x1c 0x00
+
+# CHECK:    sb      r1, r2, r3
+0xd0 0x22 0x18 0x00
+
+# CHECK:    sbr     r1, r2, r3
+0xd0 0x22 0x1a 0x00
+
+# CHECK:    sbi     r1, r2, 28
+0xf0 0x22 0x00 0x1c
+
+# CHECK:    sh      r1, r2, r3
+0xd4 0x22 0x18 0x00
+
+# CHECK:    shr     r1, r2, r3
+0xd4 0x22 0x1a 0x00
+
+# CHECK:    shi     r1, r2, 28
+0xf4 0x22 0x00 0x1c
+
+# CHECK:    sw      r1, r2, r3
+0xd8 0x22 0x18 0x00
+
+# CHECK:    swr    r1, r2, r3
+0xd8 0x22 0x1a 0x00
+
+# CHECK:    swi     r1, r2, 28
+0xf8 0x22 0x00 0x1c
+
+# CHECK:    swx      r1, r2, r3
+0xd8 0x22 0x1c 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_operands.txt b/test/MC/Disassembler/MBlaze/mblaze_operands.txt
new file mode 100644
index 000000000000..f0304b12bd7b
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_operands.txt
@@ -0,0 +1,197 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Operands disassembly
+################################################################################
+
+# CHECK:    add     r0, r0, r0
+0x00 0x00 0x00 0x00
+
+# CHECK:    add     r1, r1, r1
+0x00 0x21 0x08 0x00
+
+# CHECK:    add     r2, r2, r2
+0x00 0x42 0x10 0x00
+
+# CHECK:    add     r3, r3, r3
+0x00 0x63 0x18 0x00
+
+# CHECK:    add     r4, r4, r4
+0x00 0x84 0x20 0x00
+
+# CHECK:    add     r5, r5, r5
+0x00 0xa5 0x28 0x00
+
+# CHECK:    add     r6, r6, r6
+0x00 0xc6 0x30 0x00
+
+# CHECK:    add     r7, r7, r7
+0x00 0xe7 0x38 0x00
+
+# CHECK:    add     r8, r8, r8
+0x01 0x08 0x40 0x00
+
+# CHECK:    add     r9, r9, r9
+0x01 0x29 0x48 0x00
+
+# CHECK:    add     r10, r10, r10
+0x01 0x4a 0x50 0x00
+
+# CHECK:    add     r11, r11, r11
+0x01 0x6b 0x58 0x00
+
+# CHECK:    add     r12, r12, r12
+0x01 0x8c 0x60 0x00
+
+# CHECK:    add     r13, r13, r13
+0x01 0xad 0x68 0x00
+
+# CHECK:    add     r14, r14, r14
+0x01 0xce 0x70 0x00
+
+# CHECK:    add     r15, r15, r15
+0x01 0xef 0x78 0x00
+
+# CHECK:    add     r16, r16, r16
+0x02 0x10 0x80 0x00
+
+# CHECK:    add     r17, r17, r17
+0x02 0x31 0x88 0x00
+
+# CHECK:    add     r18, r18, r18
+0x02 0x52 0x90 0x00
+
+# CHECK:    add     r19, r19, r19
+0x02 0x73 0x98 0x00
+
+# CHECK:    add     r20, r20, r20
+0x02 0x94 0xa0 0x00
+
+# CHECK:    add     r21, r21, r21
+0x02 0xb5 0xa8 0x00
+
+# CHECK:    add     r22, r22, r22
+0x02 0xd6 0xb0 0x00
+
+# CHECK:    add     r23, r23, r23
+0x02 0xf7 0xb8 0x00
+
+# CHECK:    add     r24, r24, r24
+0x03 0x18 0xc0 0x00
+
+# CHECK:    add     r25, r25, r25
+0x03 0x39 0xc8 0x00
+
+# CHECK:    add     r26, r26, r26
+0x03 0x5a 0xd0 0x00
+
+# CHECK:    add     r27, r27, r27
+0x03 0x7b 0xd8 0x00
+
+# CHECK:    add     r28, r28, r28
+0x03 0x9c 0xe0 0x00
+
+# CHECK:    add     r29, r29, r29
+0x03 0xbd 0xe8 0x00
+
+# CHECK:    add     r30, r30, r30
+0x03 0xde 0xf0 0x00
+
+# CHECK:    add     r31, r31, r31
+0x03 0xff 0xf8 0x00
+
+# CHECK:    addi    r0, r0, 0
+0x20 0x00 0x00 0x00
+
+# CHECK:    addi    r0, r0, 1
+0x20 0x00 0x00 0x01
+
+# CHECK:    addi    r0, r0, 2
+0x20 0x00 0x00 0x02
+
+# CHECK:    addi    r0, r0, 4
+0x20 0x00 0x00 0x04
+
+# CHECK:    addi    r0, r0, 8
+0x20 0x00 0x00 0x08
+
+# CHECK:    addi    r0, r0, 16
+0x20 0x00 0x00 0x10
+
+# CHECK:    addi    r0, r0, 32
+0x20 0x00 0x00 0x20
+
+# CHECK:    addi    r0, r0, 64
+0x20 0x00 0x00 0x40
+
+# CHECK:    addi    r0, r0, 128
+0x20 0x00 0x00 0x80
+
+# CHECK:    addi    r0, r0, 256
+0x20 0x00 0x01 0x00
+
+# CHECK:    addi    r0, r0, 512
+0x20 0x00 0x02 0x00
+
+# CHECK:    addi    r0, r0, 1024
+0x20 0x00 0x04 0x00
+
+# CHECK:    addi    r0, r0, 2048
+0x20 0x00 0x08 0x00
+
+# CHECK:    addi    r0, r0, 4096
+0x20 0x00 0x10 0x00
+
+# CHECK:    addi    r0, r0, 8192
+0x20 0x00 0x20 0x00
+
+# CHECK:    addi    r0, r0, 16384
+0x20 0x00 0x40 0x00
+
+# CHECK:    addi    r0, r0, -1
+0x20 0x00 0xff 0xff
+
+# CHECK:    addi    r0, r0, -2
+0x20 0x00 0xff 0xfe
+
+# CHECK:    addi    r0, r0, -4
+0x20 0x00 0xff 0xfc
+
+# CHECK:    addi    r0, r0, -8
+0x20 0x00 0xff 0xf8
+
+# CHECK:    addi    r0, r0, -16
+0x20 0x00 0xff 0xf0
+
+# CHECK:    addi    r0, r0, -32
+0x20 0x00 0xff 0xe0
+
+# CHECK:    addi    r0, r0, -64
+0x20 0x00 0xff 0xc0
+
+# CHECK:    addi    r0, r0, -128
+0x20 0x00 0xff 0x80
+
+# CHECK:    addi    r0, r0, -256
+0x20 0x00 0xff 0x00
+
+# CHECK:    addi    r0, r0, -512
+0x20 0x00 0xfe 0x00
+
+# CHECK:    addi    r0, r0, -1024
+0x20 0x00 0xfc 0x00
+
+# CHECK:    addi    r0, r0, -2048
+0x20 0x00 0xf8 0x00
+
+# CHECK:    addi    r0, r0, -4096
+0x20 0x00 0xf0 0x00
+
+# CHECK:    addi    r0, r0, -8192
+0x20 0x00 0xe0 0x00
+
+# CHECK:    addi    r0, r0, -16384
+0x20 0x00 0xc0 0x00
+
+# CHECK:    addi    r0, r0, -32768
+0x20 0x00 0x80 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
new file mode 100644
index 000000000000..1268378fa0f8
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Pattern instructions
+################################################################################
+
+# CHECK:    pcmpbf      r0, r1, r2
+0x80 0x01 0x14 0x00
+
+# CHECK:    pcmpne      r0, r1, r2
+0x8c 0x01 0x14 0x00
+
+# CHECK:    pcmpeq      r0, r1, r2
+0x88 0x01 0x14 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_shift.txt b/test/MC/Disassembler/MBlaze/mblaze_shift.txt
new file mode 100644
index 000000000000..2783ffcb3e72
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_shift.txt
@@ -0,0 +1,29 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Shift instructions
+################################################################################
+
+# CHECK:    bsrl    r1, r2, r3
+0x44 0x22 0x18 0x00
+
+# CHECK:    bsra    r1, r2, r3
+0x44 0x22 0x1a 0x00
+
+# CHECK:    bsll    r1, r2, r3
+0x44 0x22 0x1c 0x00
+
+# CHECK:    bsrli   r1, r2, 0
+0x64 0x22 0x00 0x00
+
+# CHECK:    bsrai   r1, r2, 0
+0x64 0x22 0x02 0x00
+
+# CHECK:    bslli   r1, r2, 0
+0x64 0x22 0x04 0x00
+
+# CHECK:    sra     r1, r2
+0x90 0x22 0x00 0x01
+
+# CHECK:    srl     r1, r2
+0x90 0x22 0x00 0x41
diff --git a/test/MC/Disassembler/MBlaze/mblaze_special.txt b/test/MC/Disassembler/MBlaze/mblaze_special.txt
new file mode 100644
index 000000000000..a808cc9ccfb3
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_special.txt
@@ -0,0 +1,105 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Special instructions
+################################################################################
+
+# CHECK:    mfs         r0, rpc
+0x94 0x00 0x80 0x00
+
+# CHECK:    msrclr      r0, 0
+0x94 0x11 0x00 0x00
+
+# CHECK:    msrset      r0, 0
+0x94 0x10 0x00 0x00
+
+# CHECK:    mts         rpc, r0
+0x94 0x00 0xc0 0x00
+
+# CHECK:    wdc         r0, r1
+0x90 0x00 0x08 0x64
+
+# CHECK:    wdc.clear   r0, r1
+0x90 0x00 0x08 0x66
+
+# CHECK:    wdc.flush   r0, r1
+0x90 0x00 0x08 0x74
+
+# CHECK:    wic         r0, r1
+0x90 0x00 0x08 0x68
+
+################################################################################
+# Special registers
+################################################################################
+
+# CHECK:    mfs         r1, rpc
+0x94 0x20 0x80 0x00
+
+# CHECK:    mfs         r1, rmsr
+0x94 0x20 0x80 0x01
+
+# CHECK:    mfs         r1, rear
+0x94 0x20 0x80 0x03
+
+# CHECK:    mfs         r1, resr
+0x94 0x20 0x80 0x05
+
+# CHECK:    mfs         r1, rfsr
+0x94 0x20 0x80 0x07
+
+# CHECK:    mfs         r1, rbtr
+0x94 0x20 0x80 0x0b
+
+# CHECK:    mfs         r1, redr
+0x94 0x20 0x80 0x0d
+
+# CHECK:    mfs         r1, rpid
+0x94 0x20 0x90 0x00
+
+# CHECK:    mfs         r1, rzpr
+0x94 0x20 0x90 0x01
+
+# CHECK:    mfs         r1, rtlbx
+0x94 0x20 0x90 0x02
+
+# CHECK:    mfs         r1, rtlbhi
+0x94 0x20 0x90 0x04
+
+# CHECK:    mfs         r1, rtlblo
+0x94 0x20 0x90 0x03
+
+# CHECK:    mfs         r1, rpvr0
+0x94 0x20 0xa0 0x00
+
+# CHECK:    mfs         r1, rpvr1
+0x94 0x20 0xa0 0x01
+
+# CHECK:    mfs         r1, rpvr2
+0x94 0x20 0xa0 0x02
+
+# CHECK:    mfs         r1, rpvr3
+0x94 0x20 0xa0 0x03
+
+# CHECK:    mfs         r1, rpvr4
+0x94 0x20 0xa0 0x04
+
+# CHECK:    mfs         r1, rpvr5
+0x94 0x20 0xa0 0x05
+
+# CHECK:    mfs         r1, rpvr6
+0x94 0x20 0xa0 0x06
+
+# CHECK:    mfs         r1, rpvr7
+0x94 0x20 0xa0 0x07
+
+# CHECK:    mfs         r1, rpvr8
+0x94 0x20 0xa0 0x08
+
+# CHECK:    mfs         r1, rpvr9
+0x94 0x20 0xa0 0x09
+
+# CHECK:    mfs         r1, rpvr10
+0x94 0x20 0xa0 0x0a
+
+# CHECK:    mfs         r1, rpvr11
+0x94 0x20 0xa0 0x0b
diff --git a/test/MC/Disassembler/MBlaze/mblaze_typea.txt b/test/MC/Disassembler/MBlaze/mblaze_typea.txt
new file mode 100644
index 000000000000..ce99950548bb
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_typea.txt
@@ -0,0 +1,74 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# TYPE A instructions
+################################################################################
+
+# CHECK:    add     r1, r2, r3
+0x00 0x22 0x18 0x00
+
+# CHECK:    addc    r1, r2, r3
+0x08 0x22 0x18 0x00
+
+# CHECK:    addk    r1, r2, r3
+0x10 0x22 0x18 0x00
+
+# CHECK:    addkc   r1, r2, r3
+0x18 0x22 0x18 0x00
+
+# CHECK:    and     r1, r2, r3
+0x84 0x22 0x18 0x00
+
+# CHECK:    andn    r1, r2, r3
+0x8c 0x22 0x18 0x00
+
+# CHECK:    cmp     r1, r2, r3
+0x14 0x22 0x18 0x01
+
+# CHECK:    cmpu    r1, r2, r3
+0x14 0x22 0x18 0x03
+
+# CHECK:    idiv    r1, r2, r3
+0x48 0x22 0x18 0x00
+
+# CHECK:    idivu   r1, r2, r3
+0x48 0x22 0x18 0x02
+
+# CHECK:    mul    r1, r2, r3
+0x40 0x22 0x18 0x00
+
+# CHECK:    mulh   r1, r2, r3
+0x40 0x22 0x18 0x01
+
+# CHECK:    mulhu  r1, r2, r3
+0x40 0x22 0x18 0x03
+
+# CHECK:    mulhsu r1, r2, r3
+0x40 0x22 0x18 0x02
+
+# CHECK:    or      r1, r2, r3
+0x80 0x22 0x18 0x00
+
+# CHECK:    rsub    r1, r2, r3
+0x04 0x22 0x18 0x00
+
+# CHECK:    rsubc   r1, r2, r3
+0x0c 0x22 0x18 0x00
+
+# CHECK:    rsubk   r1, r2, r3
+0x14 0x22 0x18 0x00
+
+# CHECK:    rsubkc  r1, r2, r3
+0x1c 0x22 0x18 0x00
+
+# CHECK:    sext16  r1, r2
+0x90 0x22 0x00 0x61
+
+# CHECK:    sext8   r1, r2
+0x90 0x22 0x00 0x60
+
+# CHECK:    xor     r1, r2, r3
+0x88 0x22 0x18 0x00
+
+# CHECK:    or      r0, r0, r0
+0x80 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_typeb.txt b/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
new file mode 100644
index 000000000000..99782ac2c159
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
@@ -0,0 +1,56 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# TYPE B instructions
+################################################################################
+
+# CHECK:    addi    r1, r2, 15
+0x20 0x22 0x00 0x0f
+
+# CHECK:    addic   r1, r2, 15
+0x28 0x22 0x00 0x0f
+
+# CHECK:    addik   r1, r2, 15
+0x30 0x22 0x00 0x0f
+
+# CHECK:    addikc  r1, r2, 15
+0x38 0x22 0x00 0x0f
+
+# CHECK:    andi    r1, r2, 15
+0xa4 0x22 0x00 0x0f
+
+# CHECK:    andni   r1, r2, 15
+0xac 0x22 0x00 0x0f
+
+# CHECK:    muli    r1, r2, 15
+0x60 0x22 0x00 0x0f
+
+# CHECK:    ori     r1, r2, 15
+0xa0 0x22 0x00 0x0f
+
+# CHECK:    rsubi   r1, r2, 15
+0x24 0x22 0x00 0x0f
+
+# CHECK:    rsubic  r1, r2, 15
+0x2c 0x22 0x00 0x0f
+
+# CHECK:    rsubik  r1, r2, 15
+0x34 0x22 0x00 0x0f
+
+# CHECK:    rsubikc r1, r2, 15
+0x3c 0x22 0x00 0x0f
+
+# CHECK:    rtbd r15, 15
+0xb6 0x4f 0x00 0x0f
+
+# CHECK:    rted r15, 15
+0xb6 0x8f 0x00 0x0f
+
+# CHECK:    rtid r15, 15
+0xb6 0x2f 0x00 0x0f
+
+# CHECK:    rtsd r15, 15
+0xb6 0x0f 0x00 0x0f
+
+# CHECK:    xori r1, r2, 15
+0xa8 0x22 0x00 0x0f
diff --git a/test/MC/Disassembler/X86/dg.exp b/test/MC/Disassembler/X86/dg.exp
new file mode 100644
index 000000000000..a4d0e7c718c8
--- /dev/null
+++ b/test/MC/Disassembler/X86/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
+
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
new file mode 100644
index 000000000000..13a19d2ca4c3
--- /dev/null
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -0,0 +1,68 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: int	$33
+0xCD 0x21 
+
+# CHECK: int	$33
+0xCD 0x21
+
+
+# CHECK: addb	%al, (%rax)
+0 0
+
+# CHECK: callq	-1234
+0xe8 0x2e 0xfb 0xff 0xff
+
+# CHECK: lfence
+0x0f 0xae 0xe8
+
+# CHECK: mfence
+0x0f 0xae 0xf0
+
+# CHECK: monitor
+0x0f 0x01 0xc8
+
+# CHECK: mwait
+0x0f 0x01 0xc9
+
+# CHECK: vmcall
+0x0f 0x01 0xc1
+
+# CHECK: vmlaunch
+0x0f 0x01 0xc2
+
+# CHECK: vmresume
+0x0f 0x01 0xc3
+
+# CHECK: vmxoff
+0x0f 0x01 0xc4
+
+# CHECK: swapgs
+0x0f 0x01 0xf8
+
+# CHECK: rdtscp
+0x0f 0x01 0xf9
+
+# CHECK: vmxon
+0xf3 0x0f 0xc7 0x30
+
+# CHECK: vmptrld
+0x0f 0xc7 0x30
+
+# CHECK: vmptrst
+0x0f 0xc7 0x38
+
+# CHECK: movl $0, -4(%rbp)
+0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
+
+# CHECK: movq	%cr0, %rcx
+0x0f 0x20 0xc1
+
+# CHECK: leal	4(%rsp), %ecx
+0x8d 0x4c 0x24 0x04 
+
+# CHECK: enter	$1, $2
+0xc8 0x01 0x00 0x02
+
+# CHECK: movw	$47416, -66(%rbp)
+0x66 0xc7 0x45 0xbe 0x38 0xb9
diff --git a/test/MC/Disassembler/X86/truncated-input.txt b/test/MC/Disassembler/X86/truncated-input.txt
new file mode 100644
index 000000000000..34cf0382a74c
--- /dev/null
+++ b/test/MC/Disassembler/X86/truncated-input.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& FileCheck %s
+
+# CHECK: warning
+0x00
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt
deleted file mode 100644
index 0b4c2978fe3d..000000000000
--- a/test/MC/Disassembler/arm-tests.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
-
-# CHECK:	b	#0
-0xfe 0xff 0xff 0xea
-
-# CHECK:	bfc	r8, #0, #16
-0x1f 0x80 0xcf 0xe7
-
-# CHECK:	bfi	r8, r0, #16, #1
-0x10 0x88 0xd0 0xe7
-
-# CHECK:	cmn	r0, #1
-0x01 0x00 0x70 0xe3
-
-# CHECK:	dmb
-0x5f 0xf0 0x7f 0xf5
-
-# CHECK:	dmb	nshst
-0x56 0xf0 0x7f 0xf5
-
-# CHECK:	dsb
-0x4f 0xf0 0x7f 0xf5
-
-# CHECK:	dsb	st
-0x4e 0xf0 0x7f 0xf5
-
-# CHECK:	isb
-0x6f 0xf0 0x7f 0xf5
-
-# CHECK:	ldclvc	p5, cr15, [r8], #-0
-0x00 0xf5 0x78 0x7c
-
-# CHECK:	ldr	r0, [r2], #15
-0x0f 0x00 0x92 0xe4
-
-# CHECK:	ldrh	r0, [r2], #0
-0xb0 0x00 0xd2 0xe0
-
-# CHECK:	ldrht	r0, [r2], #15
-0xbf 0x00 0xf2 0xe0
-
-# CHECK:	ldrsbtvs	lr, [r2], -r9
-0xd9 0xe9 0x32 0x60
-
-# CHECK:	lsls	r0, r2, #31
-0x82 0x0f 0xb0 0xe1
-
-# CHECK:	mcr2	p0, #0, r2, cr1, cr0, #7
-0xf0 0x20 0x01 0xfe
-
-# CHECK:	movt	r8, #65535
-0xff 0x8f 0x4f 0xe3
-
-# CHECK:	mvnpls	r7, #245, 2
-0xf5 0x71 0xf0 0x53
-
-# CHECK-NOT:	orr	r7, r8, r7, rrx #0
-# CHECK:	orr	r7, r8, r7, rrx
-0x67 0x70 0x88 0xe1
-
-# CHECK:	pkhbt	r8, r9, r10, lsl #4
-0x1a 0x82 0x89 0xe6
-
-# CHECK-NOT:	pkhbtls	pc, r11, r11, lsl #0
-# CHECK:	pkhbtls	pc, r11, r11
-0x1b 0xf0 0x8b 0x96
-
-# CHECK:	pop	{r0, r2, r4, r6, r8, r10}
-0x55 0x05 0xbd 0xe8
-
-# CHECK:	push	{r0, r2, r4, r6, r8, r10}
-0x55 0x05 0x2d 0xe9
-
-# CHECK:	qsax	r8, r9, r10
-0x5a 0x8f 0x29 0xe6
-
-# CHECK:	rfedb	r0!
-0x00 0x0a 0x30 0xf9
-
-# CHECK-NOT:	rsbeq	r0, r2, r0, lsl #0
-# CHECK:	rsbeq	r0, r2, r0
-0x00 0x00 0x62 0x00
-
-# CHECK-NOT:	rsceqs	r0, r0, r1, lsl #0
-# CHECK:	rsceqs	r0, r0, r1
-0x01 0x00 0xf0 0x00
-
-# CHECK:	sbcs	r0, pc, #1
-0x01 0x00 0xdf 0xe2
-
-# CHECK:	sbfx	r0, r1, #0, #8
-0x51 0x00 0xa7 0xe7
-
-# CHECK:	ssat	r8, #1, r10, lsl #8
-0x1a 0x84 0xa0 0xe6
-
-# CHECK-NOT:	ssatmi	r0, #17, r12, lsl #0
-# CHECK:	ssatmi	r0, #17, r12
-0x1c 0x00 0xb0 0x46
-
-# CHECK:	stmdb	r10!, {r4, r5, r6, r7, lr}
-0xf0 0x40 0x2a 0xe9
-
-# CHECK:	teq	r0, #31
-0x1f 0x00 0x30 0xe3
-
-# CHECK:	ubfx	r0, r0, #16, #1
-0x50 0x08 0xe0 0xe7
-
-# CHECK:	usat	r8, #0, r10, asr #32
-0x5a 0x80 0xe0 0xe6
diff --git a/test/MC/Disassembler/dg.exp b/test/MC/Disassembler/dg.exp
deleted file mode 100644
index fc2f17a6fbaa..000000000000
--- a/test/MC/Disassembler/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt
deleted file mode 100644
index 826ff2272efa..000000000000
--- a/test/MC/Disassembler/neon-tests.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
-
-# CHECK:	vbif	q15, q7, q0
-0x50 0xe1 0x7e 0xf3
-
-# CHECK:	vcvt.f32.s32	q15, q0, #1
-0x50 0xee 0xff 0xf2
-
-# CHECK:	vdup.32	q3, d1[0]
-0x41 0x6c 0xb4 0xf3
-
-# VLD1q8_UPD (with ${dst:dregpair} operand)
-# CHECK:	vld1.8	{d17, d18}, [r6], r5
-0x05 0x1a 0x66 0xf4
-
-# CHECK:	vld4.8	{d0, d1, d2, d3}, [r2], r7
-0x07 0x00 0x22 0xf4
-
-# CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
-0x0f 0x41 0x22 0xf4
-
-# CHECK:	vmov	d0, d15
-0x1f 0x01 0x2f 0xf2
-
-# CHECK:	vmov.i64	q6, #0xFF00FF00FF
-0x75 0xce 0x81 0xf2
-
-# CHECK:	vmvn.i32	d0, #0x0
-0x30 0x00 0x80 0xf2
-
-# CHECK:	vmul.f32	d0, d0, d6
-0x16 0x0d 0x00 0xf3
-
-# CHECK:	vneg.f32	q0, q0
-0xc0 0x07 0xb9 0xf3
-
-# CHECK:	vqrdmulh.s32	d0, d0, d3[1]
-0x63 0x0d 0xa0 0xf2
-
-# CHECK:	vrshr.s32	d0, d0, #16
-0x10 0x02 0xb0 0xf2
-
-# CHECK:	vshll.i16	q3, d1, #16
-0x01 0x63 0xb6 0xf3
-
-# CHECK:	vsri.32	q15, q0, #1
-0x50 0xe4 0xff 0xf3
-
-# CHECK:	vtbx.8	d18, {d4, d5, d6}, d7
-0x47 0x2a 0xf4 0xf3
-
diff --git a/test/MC/Disassembler/simple-tests.txt b/test/MC/Disassembler/simple-tests.txt
deleted file mode 100644
index dcc3763b7b7d..000000000000
--- a/test/MC/Disassembler/simple-tests.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
-
-# CHECK: int	$33
-0xCD 0x21 
-
-# CHECK: int	$33
-0xCD 0x21
-
-
-# CHECK: addb	%al, (%rax)
-0 0
-
-# CHECK: callq	-1234
-0xe8 0x2e 0xfb 0xff 0xff
-
-# CHECK: lfence
-0x0f 0xae 0xe8
-
-# CHECK: mfence
-0x0f 0xae 0xf0
-
-# CHECK: monitor
-0x0f 0x01 0xc8
-
-# CHECK: mwait
-0x0f 0x01 0xc9
-
-# CHECK: vmcall
-0x0f 0x01 0xc1
-
-# CHECK: vmlaunch
-0x0f 0x01 0xc2
-
-# CHECK: vmresume
-0x0f 0x01 0xc3
-
-# CHECK: vmxoff
-0x0f 0x01 0xc4
-
-# CHECK: swapgs
-0x0f 0x01 0xf8
-
-# CHECK: rdtscp
-0x0f 0x01 0xf9
-
-# CHECK: vmxon
-0xf3 0x0f 0xc7 0x30
-
-# CHECK: vmptrld
-0x0f 0xc7 0x30
-
-# CHECK: vmptrst
-0x0f 0xc7 0x38
-
-# CHECK: movl $0, -4(%rbp)
-0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
-
-# CHECK: movq	%cr0, %rcx
-0x0f 0x20 0xc1
-
-# CHECK: leal	4(%rsp), %ecx
-0x8d 0x4c 0x24 0x04 
\ No newline at end of file
diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt
deleted file mode 100644
index 06d12fed87fb..000000000000
--- a/test/MC/Disassembler/thumb-tests.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
-
-# CHECK:	add	r5, sp, #68
-0x11 0xad
-
-# CHECK:	adcs	r0, r0, #1
-0x50 0xf1 0x01 0x00
-
-# CHECK:	b	#34
-0x0f 0xe0
-
-# CHECK:	b.w	#-12
-0xff 0xf7 0xf8 0xaf
-
-# CHECK:	bfi	r2, r10, #0, #1
-0x6a 0xf3 0x00 0x02
-
-# CHECK:	cbnz	r7, #20
-0x57 0xb9
-
-# CHECK:	cmp	r3, r4
-0xa3 0x42
-
-# CHECK:	cmn.w	r0, #31
-0x10 0xf1 0x1f 0x0f
-
-# CHECK:	ldmia	r0!, {r1}
-0x02 0xc8
-
-# CHECK:	ldrb.w	r8, #-24
-0x1f 0xf8 0x18 0x80
-
-# CHECK:	ldrd	r0, r1, [r7, #64]!
-0xf7 0xe9 0x10 0x01
-
-# CHECK:	lsls.w	r0, pc, #1
-0x5f 0xea 0x4f 0x00
-
-# CHECK:	mov	r11, r7
-0xbb 0x46
-
-# CHECK:	pkhtb	r2, r4, r6, asr #16
-0xc4 0xea 0x26 0x42
-
-# CHECK-NOT:	pkhbt	r2, r4, r6, lsl #0
-# CHECK:	pkhbt	r2, r4, r6
-0xc4 0xea 0x06 0x02
-
-# CHECK:	pop	{r2, r4, r6, r8, r10, r12}
-0xbd 0xe8 0x54 0x15
-
-# CHECK:	push	{r2, r4, r6, r8, r10, r12}
-0x2d 0xe9 0x54 0x15
-
-# CHECK:	rsbs	r0, r0, #0
-0x40 0x42
-
-# CHECK-NOT:	rsb	r0, r2, r0, lsl #0
-# CHECK:	rsb	r0, r2, r0
-0xc2 0xeb 0x00 0x00
-
-# CHECK-NOT:	ssat	r0, #17, r12, lsl #0
-# CHECK:	ssat	r0, #17, r12
-0x0c 0xf3 0x10 0x00
-
-# CHECK:	strd	r0, [r7, #64]
-0xc7 0xe9 0x10 0x01
-
-# CHECK:	sub	sp, #60
-0x8f 0xb0
-
-# CHECK:	subw	r0, pc, #1
-0xaf 0xf2 0x01 0x00
-
-# CHECK:	subw	r0, sp, #835
-0xad 0xf2 0x43 0x30
-
-# CHECK:	uqadd16	r3, r4, r5
-0x94 0xfa 0x55 0xf3
-
-# CHECK:	usada8	r5, r4, r3, r2
-0x74 0xfb 0x03 0x25
-
-# CHECK:	uxtab16	r1, r2, r3, ror #8
-0x32 0xfa 0x93 0xf1
-
-# IT block begin
-# CHECK:	ittte	eq
-0x03 0xbf
-
-# CHECK:	moveq	r3, #3
-0x03 0x23
-
-# CHECK:	asreq	r1, r0, #5
-0x41 0x11
-
-# CHECK:	lsleq	r1, r0, #28
-0x01 0x07
-
-# CHECK:	stmiane	r0!, {r1, r2, r3}
-0x0e 0xc0
-
-# IT block end
-# CHECK:	rsbs	r1, r2, #0
-0x51 0x42
diff --git a/test/MC/ELF/abs.s b/test/MC/ELF/abs.s
new file mode 100644
index 000000000000..c598b11e291d
--- /dev/null
+++ b/test/MC/ELF/abs.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that zed will be an ABS symbol
+
+.Lfoo:
+.Lbar:
+        zed = .Lfoo - .Lbar
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # 'zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x0000fff1)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
diff --git a/test/MC/ELF/alias-reloc.s b/test/MC/ELF/alias-reloc.s
new file mode 100644
index 000000000000..c908c12404d5
--- /dev/null
+++ b/test/MC/ELF/alias-reloc.s
@@ -0,0 +1,52 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a R_X86_64_PLT32 with bar.
+
+        .globl foo
+foo:
+bar = foo
+        .section zed, "", @progbits
+        call bar@PLT
+
+
+// Test that this produres a relocation with bar2
+
+    .weak    foo2
+foo2:
+    .weak    bar2
+    .set    bar2,foo2
+    .quad    bar2
+
+// CHECK:       # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+
+// CHECK:      # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x0000000e) # 'bar2'
+// CHECK-NEXT:  ('st_bind', 0x00000002)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+// CHECK-NEXT:  ('st_value', 0x0000000000000005)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:       # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000001)
+// CHECK-NEXT:   ('r_sym', 0x00000001)
+// CHECK-NEXT:   ('r_type', 0x00000004)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+// CHECK-NEXT:  ),
+
+// CHECK:      # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000005)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000001)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
new file mode 100644
index 000000000000..42d54bc431dc
--- /dev/null
+++ b/test/MC/ELF/alias.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+foo:
+bar = foo
+
+        .globl	foo2
+foo2 = bar2
+
+foo3:
+	.globl	bar3
+bar3 = foo3
+
+// Test that bar4 is also a function
+        .type	foo4,@function
+foo4:
+bar4 = foo4
+
+        .long foo2
+// CHECK:       # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x0000001d) # 'bar4'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000002)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Symbol 0x00000003
+// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x0000000e) # 'foo3'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000018) # 'foo4'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000002)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x00000013) # 'bar3'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK:       # Symbol 0x0000000a
+// CHECK-NEXT:  (('st_name', 0x00000009) # 'bar2'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
diff --git a/test/MC/ELF/align-bss.s b/test/MC/ELF/align-bss.s
new file mode 100644
index 000000000000..4f73a29f8e2d
--- /dev/null
+++ b/test/MC/ELF/align-bss.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the bss section is correctly aligned
+
+	.local	foo
+	.comm	foo,2048,16
+
+// CHECK:        ('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:   ('sh_type', 0x00000008)
+// CHECK-NEXT:   ('sh_flags', 0x00000003)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x00000800)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000010)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
diff --git a/test/MC/ELF/align-nops.s b/test/MC/ELF/align-nops.s
new file mode 100644
index 000000000000..28d4b895f5d9
--- /dev/null
+++ b/test/MC/ELF/align-nops.s
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+// Test that we get optimal nops in text
+    .text
+f0:
+    .long 0
+    .align  8, 0x00000090
+    .long 0
+    .align  8
+
+// But not in another section
+    .data
+    .long 0
+    .align  8, 0x00000090
+    .long 0
+    .align  8
+
+// CHECK: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr',
+// CHECK-NEXT:  ('sh_offset',
+// CHECK-NEXT:  ('sh_size', 0x00000010)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '00000000 0f1f4000 00000000 0f1f4000')
+
+// CHECK: (('sh_name', 0x00000007) # '.data'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000003)
+// CHECK-NEXT:  ('sh_addr',
+// CHECK-NEXT:  ('sh_offset',
+// CHECK-NEXT:  ('sh_size', 0x00000010)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '00000000 90909090 00000000 00000000')
diff --git a/test/MC/ELF/align-size.s b/test/MC/ELF/align-size.s
new file mode 100644
index 000000000000..85331d7ae915
--- /dev/null
+++ b/test/MC/ELF/align-size.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the alignment does contribute to the size of the section.
+
+	.zero 4
+	.align	8
+
+// CHECK:      (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000040)
+// CHECK-NEXT:  ('sh_size', 0x00000008)
diff --git a/test/MC/ELF/align-text.s b/test/MC/ELF/align-text.s
new file mode 100644
index 000000000000..1d2dacb3a067
--- /dev/null
+++ b/test/MC/ELF/align-text.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the .text directive doesn't cause alignment.
+
+        .zero 1
+        .text
+        .zero 1
+
+// CHECK:      (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x00000002)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/align.s b/test/MC/ELF/align.s
new file mode 100644
index 000000000000..c3912a7c67b0
--- /dev/null
+++ b/test/MC/ELF/align.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the alignment of rodata doesn't force a alignment of the
+// previous section (.bss)
+
+	nop
+	.section	.rodata,"a",@progbits
+	.align	8
+
+// CHECK: # Section 0x00000003
+// CHECK-NEXT:  (('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:   ('sh_type', 0x00000008)
+// CHECK-NEXT:   ('sh_flags', 0x00000003)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000044)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000012) # '.rodata'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000048)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
diff --git a/test/MC/ELF/bad-section.s b/test/MC/ELF/bad-section.s
new file mode 100644
index 000000000000..73d89ce765a4
--- /dev/null
+++ b/test/MC/ELF/bad-section.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o /dev/null 2>%t
+// RUN: FileCheck --input-file=%t %s
+
+// CHECK: error: unexpected token in directive
+// CHECK: .section "foo"-bar
+
+// test that we don't accept this, as gas doesn't.
+
+.section "foo"-bar
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
new file mode 100644
index 000000000000..fa97da44d422
--- /dev/null
+++ b/test/MC/ELF/basic-elf-32.s
@@ -0,0 +1,78 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+	.text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+# BB#0:
+	subl	$4, %esp
+	movl	$.L.str1, (%esp)
+	calll	puts
+	movl	$.L.str2, (%esp)
+	calll	puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+.Ltmp0:
+	.size	main, .Ltmp0-main
+
+	.type	.L.str1,@object         # @.str1
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str1:
+	.asciz	 "Hello"
+	.size	.L.str1, 6
+
+	.type	.L.str2,@object         # @.str2
+.L.str2:
+	.asciz	 "World!"
+	.size	.L.str2, 7
+
+	.section	.note.GNU-stack,"",@progbits
+
+// CHECK: ('e_indent[EI_CLASS]', 0x00000001)
+// CHECK: ('e_indent[EI_DATA]', 0x00000001)
+// CHECK: ('e_indent[EI_VERSION]', 0x00000001)
+// CHECK: ('_sections', [
+// CHECK:   # Section 0
+// CHECK:   (('sh_name', 0x00000000) # ''
+
+// CHECK:   # '.text'
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK:   # 'main'
+// CHECK:   ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000002)
+
+// CHECK:   # 'puts'
+// CHECK:   ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000000)
+
+// CHECK:   # '.rel.text'
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0x00000000
+// CHECK:     (('r_offset', 0x00000006)
+// CHECK:      ('r_type', 0x00000001)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000001
+// CHECK:     (('r_offset', 0x0000000b)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000002
+// CHECK:     (('r_offset', 0x00000012)
+// CHECK:      ('r_type', 0x00000001)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000003
+// CHECK:     (('r_offset', 0x00000017)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:     ),
+// CHECK:   ])
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
new file mode 100644
index 000000000000..7fc40b790ff8
--- /dev/null
+++ b/test/MC/ELF/basic-elf-64.s
@@ -0,0 +1,82 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+        .text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+# BB#0:
+	subq	$8, %rsp
+	movl	$.L.str1, %edi
+	callq	puts
+	movl	$.L.str2, %edi
+	callq	puts
+	xorl	%eax, %eax
+	addq	$8, %rsp
+	ret
+.Ltmp0:
+	.size	main, .Ltmp0-main
+
+	.type	.L.str1,@object         # @.str1
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str1:
+	.asciz	 "Hello"
+	.size	.L.str1, 6
+
+	.type	.L.str2,@object         # @.str2
+.L.str2:
+	.asciz	 "World!"
+	.size	.L.str2, 7
+
+	.section	.note.GNU-stack,"",@progbits
+
+// CHECK: ('e_indent[EI_CLASS]', 0x00000002)
+// CHECK: ('e_indent[EI_DATA]', 0x00000001)
+// CHECK: ('e_indent[EI_VERSION]', 0x00000001)
+// CHECK: ('_sections', [
+// CHECK:   # Section 0
+// CHECK:   (('sh_name', 0x00000000) # ''
+
+// CHECK:   # '.text'
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK:   # 'main'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000002)
+
+// CHECK:   # 'puts'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000000)
+
+// CHECK:   # '.rela.text'
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0x00000000
+// CHECK:     (('r_offset', 0x00000005)
+// CHECK:      ('r_type', 0x0000000a)
+// CHECK:      ('r_addend', 0x00000000)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000001
+// CHECK:     (('r_offset', 0x0000000a)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:      ('r_addend', 0xfffffffc)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000002
+// CHECK:     (('r_offset', 0x0000000f)
+// CHECK:      ('r_type', 0x0000000a)
+// CHECK:      ('r_addend', 0x00000006)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000003
+// CHECK:     (('r_offset', 0x00000014)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:      ('r_addend', 0xfffffffc)
+// CHECK:     ),
+// CHECK:   ])
diff --git a/test/MC/ELF/call-abs.s b/test/MC/ELF/call-abs.s
new file mode 100644
index 000000000000..885c2d19bad2
--- /dev/null
+++ b/test/MC/ELF/call-abs.s
@@ -0,0 +1,24 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+	.text
+	.globl	f
+	.type	f,@function
+f:                                      # @f
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	calll	42
+	incl	%eax
+	addl	$4, %esp
+	ret
+.Ltmp0:
+	.size	f, .Ltmp0-f
+
+	.section	.note.GNU-stack,"",@progbits
+
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:  # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000004)
+// CHECK-NEXT:   ('r_sym', 0x00000000)
+// CHECK-NEXT:   ('r_type', 0x00000002)
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
new file mode 100644
index 000000000000..3ffdd6cf0288
--- /dev/null
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -0,0 +1,45 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// test that this produces a correctly encoded cfi_advance_loc2
+
+f:
+	.cfi_startproc
+        nop
+        .zero 255, 0x90
+	.cfi_def_cfa_offset 8
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000148)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01010000 00030001 0e080000')
+// CHECK-NEXT: ),
+
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000258)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
new file mode 100644
index 000000000000..efefb8789dce
--- /dev/null
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+        nop
+	addq	$8, %rsp
+	.cfi_def_cfa_offset 8
+	ret
+	.cfi_endproc
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 0a000000 00440e10 450e0800')
+// CHECK-NEXT: ),
+
+// CHECK:       # Section 0x00000008
+// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000160)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
new file mode 100644
index 000000000000..3df20218a273
--- /dev/null
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_def_cfa_register 6
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410d06 00000000')
+// CHECK-NEXT: ),
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
new file mode 100644
index 000000000000..1ad427b310c3
--- /dev/null
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_def_cfa 7, 8
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410c07 08000000')
+// CHECK-NEXT: ),
+
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
new file mode 100644
index 000000000000..2f7e7976fa57
--- /dev/null
+++ b/test/MC/ELF/cfi-offset.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_offset %ebp, -16
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00418602 00000000')
+// CHECK-NEXT: ),
+
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
new file mode 100644
index 000000000000..b5b380368f06
--- /dev/null
+++ b/test/MC/ELF/cfi-remember.s
@@ -0,0 +1,45 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_remember_state
+        nop
+	.cfi_restore_state
+        nop
+	.cfi_endproc
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 03000000 00410a41 0b000000')
+// CHECK-NEXT: ),
+
+// CHECK:      # Section 0x00000008
+// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
new file mode 100644
index 000000000000..5585e296da54
--- /dev/null
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// Test that we don't produce a DW_CFA_advance_loc 0
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_def_cfa_offset 16
+        nop
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+        nop
+	.cfi_restore_state
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000038)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 04000000 00410e10 410a0e08 410b0000 00000000')
+// CHECK-NEXT: ),
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000160)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
new file mode 100644
index 000000000000..93fd2e792238
--- /dev/null
+++ b/test/MC/ELF/cfi.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f1:
+        .cfi_startproc
+	.cfi_lsda 0x3, bar
+        nop
+        .cfi_endproc
+
+f2:
+        .cfi_startproc
+        .cfi_personality 0x00, foo
+	.cfi_lsda 0x3, bar
+        nop
+        .cfi_endproc
+
+f3:
+        .cfi_startproc
+	.cfi_lsda 0x3, bar
+        nop
+        .cfi_endproc
+
+f4:
+        .cfi_startproc
+        .cfi_personality 0x00, foo
+	.cfi_lsda 0x2, bar
+        nop
+        .cfi_endproc
+
+f5:
+        .cfi_startproc
+        .cfi_personality 0x02, foo
+        nop
+        .cfi_endproc
+
+f6:
+        .cfi_startproc
+        .cfi_personality 0x03, foo
+        nop
+        .cfi_endproc
+
+f7:
+        .cfi_startproc
+        .cfi_personality 0x04, foo
+        nop
+        .cfi_endproc
+
+f8:
+        .cfi_startproc
+        .cfi_personality 0x0a, foo
+        nop
+        .cfi_endproc
+
+f9:
+        .cfi_startproc
+        .cfi_personality 0x0b, foo
+        nop
+        .cfi_endproc
+
+f10:
+        .cfi_startproc
+        .cfi_personality 0x0c, foo
+        nop
+        .cfi_endproc
+
+f11:
+        .cfi_startproc
+        .cfi_personality 0x08, foo
+        nop
+        .cfi_endproc
+
+f12:
+        .cfi_startproc
+        .cfi_personality 0x10, foo
+        nop
+        .cfi_endproc
+
+f13:
+        .cfi_startproc
+        .cfi_personality 0x12, foo
+        nop
+        .cfi_endproc
+
+f14:
+        .cfi_startproc
+        .cfi_personality 0x13, foo
+        nop
+        .cfi_endproc
+
+f15:
+        .cfi_startproc
+        .cfi_personality 0x14, foo
+        nop
+        .cfi_endproc
+
+f16:
+        .cfi_startproc
+        .cfi_personality 0x1a, foo
+        nop
+        .cfi_endproc
+
+f17:
+        .cfi_startproc
+        .cfi_personality 0x1b, foo
+        nop
+        .cfi_endproc
+
+f18:
+        .cfi_startproc
+        .cfi_personality 0x1c, foo
+        nop
+        .cfi_endproc
+
+f19:
+        .cfi_startproc
+        .cfi_personality 0x18, foo
+        nop
+        .cfi_endproc
+
+f20:
+        .cfi_startproc
+        .cfi_personality 0x80, foo
+        nop
+        .cfi_endproc
+
+f21:
+        .cfi_startproc
+        .cfi_personality 0x82, foo
+        nop
+        .cfi_endproc
+
+f22:
+        .cfi_startproc
+        .cfi_personality 0x83, foo
+        nop
+        .cfi_endproc
+
+f23:
+        .cfi_startproc
+        .cfi_personality 0x84, foo
+        nop
+        .cfi_endproc
+
+f24:
+        .cfi_startproc
+        .cfi_personality 0x8a, foo
+        nop
+        .cfi_endproc
+
+f25:
+        .cfi_startproc
+        .cfi_personality 0x8b, foo
+        nop
+        .cfi_endproc
+
+f26:
+        .cfi_startproc
+        .cfi_personality 0x8c, foo
+        nop
+        .cfi_endproc
+
+f27:
+        .cfi_startproc
+        .cfi_personality 0x88, foo
+        nop
+        .cfi_endproc
+
+f28:
+        .cfi_startproc
+        .cfi_personality 0x90, foo
+        nop
+        .cfi_endproc
+
+f29:
+        .cfi_startproc
+        .cfi_personality 0x92, foo
+        nop
+        .cfi_endproc
+
+f30:
+        .cfi_startproc
+        .cfi_personality 0x93, foo
+        nop
+        .cfi_endproc
+
+f31:
+        .cfi_startproc
+        .cfi_personality 0x94, foo
+        nop
+        .cfi_endproc
+
+f32:
+        .cfi_startproc
+        .cfi_personality 0x9a, foo
+        nop
+        .cfi_endproc
+
+f33:
+        .cfi_startproc
+        .cfi_personality 0x9b, foo
+        nop
+        .cfi_endproc
+
+f34:
+        .cfi_startproc
+        .cfi_personality 0x9c, foo
+        nop
+        .cfi_endproc
+
+f36:
+        .cfi_startproc
+        .cfi_personality 0x98, foo
+        nop
+        .cfi_endproc
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000068)
+// CHECK-NEXT:  ('sh_size', 0x000006c8)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a4c52 00017810 02031b0c 07089001 14000000 1c000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000003 1b0c0708 90010000 14000000 28000000 00000000 01000000 04000000 00000000 14000000 70000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000002 1b0c0708 90010000 10000000 28000000 00000000 01000000 02000000 18000000 00000000 017a5052 00017810 04020000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06030000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a040000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 040a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 060b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a0c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a080000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a100000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04120000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06130000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a140000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 041a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 061b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a1c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a180000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a800000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04820000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06830000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a840000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 048a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 068b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a8c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a880000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a900000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04920000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06930000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a940000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 049a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 069b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a9c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a980000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000')
+// CHECK-NEXT: ),
+
+// CHECK:        # Section 0x00000008
+// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000bf8)
+// CHECK-NEXT:  ('sh_size', 0x000006c0)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000001
+// CHECK-NEXT:   (('r_offset', 0x00000029)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000002
+// CHECK-NEXT:   (('r_offset', 0x00000043)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000003
+// CHECK-NEXT:   (('r_offset', 0x0000005c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000001)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000004
+// CHECK-NEXT:   (('r_offset', 0x00000065)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000005
+// CHECK-NEXT:   (('r_offset', 0x00000074)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000002)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000006
+// CHECK-NEXT:   (('r_offset', 0x0000007d)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000007
+// CHECK-NEXT:   (('r_offset', 0x00000097)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000008
+// CHECK-NEXT:   (('r_offset', 0x000000b0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000003)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000009
+// CHECK-NEXT:   (('r_offset', 0x000000b9)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000a
+// CHECK-NEXT:   (('r_offset', 0x000000ce)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000b
+// CHECK-NEXT:   (('r_offset', 0x000000e0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000004)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000c
+// CHECK-NEXT:   (('r_offset', 0x000000fe)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000d
+// CHECK-NEXT:   (('r_offset', 0x00000110)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000005)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000e
+// CHECK-NEXT:   (('r_offset', 0x0000012e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000f
+// CHECK-NEXT:   (('r_offset', 0x00000144)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000006)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000010
+// CHECK-NEXT:   (('r_offset', 0x00000162)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000011
+// CHECK-NEXT:   (('r_offset', 0x00000174)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000007)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000012
+// CHECK-NEXT:   (('r_offset', 0x00000192)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000013
+// CHECK-NEXT:   (('r_offset', 0x000001a4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000008)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000014
+// CHECK-NEXT:   (('r_offset', 0x000001c2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000015
+// CHECK-NEXT:   (('r_offset', 0x000001d8)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000009)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000016
+// CHECK-NEXT:   (('r_offset', 0x000001f6)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000017
+// CHECK-NEXT:   (('r_offset', 0x0000020c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000a)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000018
+// CHECK-NEXT:   (('r_offset', 0x0000022a)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000019
+// CHECK-NEXT:   (('r_offset', 0x00000240)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000b)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001a
+// CHECK-NEXT:   (('r_offset', 0x0000025e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001b
+// CHECK-NEXT:   (('r_offset', 0x00000270)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000c)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001c
+// CHECK-NEXT:   (('r_offset', 0x0000028e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001d
+// CHECK-NEXT:   (('r_offset', 0x000002a0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000d)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001e
+// CHECK-NEXT:   (('r_offset', 0x000002be)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001f
+// CHECK-NEXT:   (('r_offset', 0x000002d4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000e)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000020
+// CHECK-NEXT:   (('r_offset', 0x000002f2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000021
+// CHECK-NEXT:   (('r_offset', 0x00000304)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000f)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000022
+// CHECK-NEXT:   (('r_offset', 0x00000322)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000023
+// CHECK-NEXT:   (('r_offset', 0x00000334)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000010)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000024
+// CHECK-NEXT:   (('r_offset', 0x00000352)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000025
+// CHECK-NEXT:   (('r_offset', 0x00000368)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000011)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000026
+// CHECK-NEXT:   (('r_offset', 0x00000386)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000027
+// CHECK-NEXT:   (('r_offset', 0x0000039c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000012)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000028
+// CHECK-NEXT:   (('r_offset', 0x000003ba)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000029
+// CHECK-NEXT:   (('r_offset', 0x000003d0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000013)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002a
+// CHECK-NEXT:   (('r_offset', 0x000003ee)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002b
+// CHECK-NEXT:   (('r_offset', 0x00000400)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000014)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002c
+// CHECK-NEXT:   (('r_offset', 0x0000041e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002d
+// CHECK-NEXT:   (('r_offset', 0x00000430)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000015)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002e
+// CHECK-NEXT:   (('r_offset', 0x0000044e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002f
+// CHECK-NEXT:   (('r_offset', 0x00000464)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000016)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000030
+// CHECK-NEXT:   (('r_offset', 0x00000482)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000031
+// CHECK-NEXT:   (('r_offset', 0x00000494)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000017)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000032
+// CHECK-NEXT:   (('r_offset', 0x000004b2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000033
+// CHECK-NEXT:   (('r_offset', 0x000004c4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000018)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000034
+// CHECK-NEXT:   (('r_offset', 0x000004e2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000035
+// CHECK-NEXT:   (('r_offset', 0x000004f8)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000019)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000036
+// CHECK-NEXT:   (('r_offset', 0x00000516)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000037
+// CHECK-NEXT:   (('r_offset', 0x0000052c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001a)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000038
+// CHECK-NEXT:   (('r_offset', 0x0000054a)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000039
+// CHECK-NEXT:   (('r_offset', 0x00000560)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001b)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003a
+// CHECK-NEXT:   (('r_offset', 0x0000057e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003b
+// CHECK-NEXT:   (('r_offset', 0x00000590)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001c)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003c
+// CHECK-NEXT:   (('r_offset', 0x000005ae)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003d
+// CHECK-NEXT:   (('r_offset', 0x000005c0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001d)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003e
+// CHECK-NEXT:   (('r_offset', 0x000005de)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003f
+// CHECK-NEXT:   (('r_offset', 0x000005f4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001e)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000040
+// CHECK-NEXT:   (('r_offset', 0x00000612)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000041
+// CHECK-NEXT:   (('r_offset', 0x00000624)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001f)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000042
+// CHECK-NEXT:   (('r_offset', 0x00000642)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000043
+// CHECK-NEXT:   (('r_offset', 0x00000654)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000020)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000044
+// CHECK-NEXT:   (('r_offset', 0x00000672)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000045
+// CHECK-NEXT:   (('r_offset', 0x00000688)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000021)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000046
+// CHECK-NEXT:   (('r_offset', 0x000006a6)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000047
+// CHECK-NEXT:   (('r_offset', 0x000006bc)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000022)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
new file mode 100644
index 000000000000..0f1164e82068
--- /dev/null
+++ b/test/MC/ELF/comdat.s
@@ -0,0 +1,86 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+
+// Test that we produce the group sections and that they are a the beginning
+// of the file.
+
+// CHECK:       # Section 0x00000001
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:   ('sh_type', 0x00000011)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x0000000c)
+// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_info', 0x00000001)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000004)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000002
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:   ('sh_type', 0x00000011)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000004c)
+// CHECK-NEXT:   ('sh_size', 0x00000008)
+// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_info', 0x00000002)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000004)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000003
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:   ('sh_type', 0x00000011)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000054)
+// CHECK-NEXT:   ('sh_size', 0x00000008)
+// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_info', 0x0000000d)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000004)
+// CHECK-NEXT:  ),
+
+// Test that g1 and g2 are local, but g3 is an undefined global.
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # 'g1'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000007)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000004) # 'g2'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000002)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:      # Symbol 0x0000000d
+// CHECK-NEXT: (('st_name', 0x00000007) # 'g3'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+
+	.section	.foo,"axG",@progbits,g1,comdat
+g1:
+        nop
+
+        .section	.bar,"axG",@progbits,g1,comdat
+        nop
+
+        .section	.zed,"axG",@progbits,g2,comdat
+        nop
+
+        .section	.baz,"axG",@progbits,g3,comdat
+        .long g3
diff --git a/test/MC/ELF/common.s b/test/MC/ELF/common.s
new file mode 100644
index 000000000000..16b677b9e880
--- /dev/null
+++ b/test/MC/ELF/common.s
@@ -0,0 +1,88 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+
+	.text
+
+// Test that this produces a regular local symbol.
+	.type	common1,@object
+	.local	common1
+	.comm	common1,1,1
+
+// CHECK: ('st_name', 0x00000001) # 'common1'
+// CHECK-NEXT: ('st_bind', 0x00000000)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx',
+// CHECK-NEXT: ('st_value', 0x0000000000000000)
+// CHECK-NEXT: ('st_size', 0x0000000000000001)
+
+
+// Same as common1, but with directives in a different order.
+	.local	common2
+	.type	common2,@object
+	.comm	common2,1,1
+
+// CHECK: ('st_name', 0x00000009) # 'common2'
+// CHECK-NEXT: ('st_bind', 0x00000000)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx',
+// CHECK-NEXT: ('st_value', 0x0000000000000001)
+// CHECK-NEXT: ('st_size', 0x0000000000000001)
+
+        .local	common6
+        .comm	common6,8,16
+
+// CHECK:      # Symbol 0x00000003
+// CHECK-NEXT: (('st_name', 0x00000011) # 'common6'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000001)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
+// CHECK-NEXT:  ('st_value', 0x0000000000000010)
+// CHECK-NEXT:  ('st_size', 0x0000000000000008)
+// CHECK-NEXT: ),
+
+// Test that without an explicit .local we produce a global.
+	.type	common3,@object
+	.comm	common3,4,4
+
+// CHECK: ('st_name', 0x00000019) # 'common3'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx', 0x0000fff2)
+// CHECK-NEXT: ('st_value', 0x0000000000000004)
+// CHECK-NEXT: ('st_size', 0x0000000000000004)
+
+
+// Test that without an explicit .local we produce a global, even if the first
+// occurrence is not in a directive.
+	.globl	foo
+	.type	foo,@function
+foo:
+	movsbl	common4+3(%rip), %eax
+
+
+	.type	common4,@object
+	.comm	common4,40,16
+
+// CHECK: ('st_name', 0x00000025) # 'common4'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx', 0x0000fff2)
+// CHECK-NEXT: ('st_value', 0x0000000000000010)
+// CHECK-NEXT: ('st_size', 0x0000000000000028)
+
+        .comm	common5,4,4
+
+// CHECK:      # Symbol 0x00000009
+// CHECK-NEXT: (('st_name', 0x0000002d) # 'common5'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000001)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x0000fff2)
+// CHECK-NEXT:  ('st_value', 0x0000000000000004)
+// CHECK-NEXT:  ('st_size', 0x0000000000000004)
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/common2.s b/test/MC/ELF/common2.s
new file mode 100644
index 000000000000..b54cdfe143fb
--- /dev/null
+++ b/test/MC/ELF/common2.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the common symbols are placed at the end of .bss. In this example
+// it causes .bss to have size 9 instead of 8.
+
+	.local	vimvardict
+	.comm	vimvardict,1,8
+	.bss
+        .zero 1
+	.align	8
+
+// CHECK:      (('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:  ('sh_type',
+// CHECK-NEXT:  ('sh_flags'
+// CHECK-NEXT:  ('sh_addr',
+// CHECK-NEXT:  ('sh_offset',
+// CHECK-NEXT:  ('sh_size', 0x00000009)
+// CHECK-NEXT:  ('sh_link',
+// CHECK-NEXT:  ('sh_info',
+// CHECK-NEXT:  ('sh_addralign',
+// CHECK-NEXT:  ('sh_entsize',
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
new file mode 100644
index 000000000000..2979ca28f1ec
--- /dev/null
+++ b/test/MC/ELF/debug-line.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// Test that .debug_line is populated.
+
+// CHECK:     (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000000)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000044)
+// CHECK-NEXT: ('sh_size', 0x00000037)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000001)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', '33000000 02001c00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f2e 63000000 00000009 02000000 00000000 00150204 000101')
+
+	.section	.debug_line,"",@progbits
+	.text
+
+	.file 1 "foo.c"
+	.loc 1 4 0
+	subq	$8, %rsp
diff --git a/test/MC/ELF/debug-loc.s b/test/MC/ELF/debug-loc.s
new file mode 100644
index 000000000000..36ae485ef062
--- /dev/null
+++ b/test/MC/ELF/debug-loc.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that we don't regress on the size of the line info section. We used
+// to handle negative line diffs incorrectly which manifested as very
+// large integers being passed to DW_LNS_advance_line.
+
+// FIXME: This size is the same as gnu as, but we can probably do a bit better.
+// FIXME2: We need a debug_line dumper so that we can test the actual contents.
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000044)
+// CHECK-NEXT:  ('sh_size', 0x0000003d)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+
+	.section	.debug_line,"",@progbits
+	.text
+foo:
+	.file 1 "Driver.ii"
+	.loc 1 2 0
+        nop
+	.loc 1 4 0
+        nop
+	.loc 1 3 0
+        nop
diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp
index 7b7bd4e73807..d46d700975e5 100644
--- a/test/MC/ELF/dg.exp
+++ b/test/MC/ELF/dg.exp
@@ -1,5 +1,5 @@
 load_lib llvm.exp
 
 if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
 }
diff --git a/test/MC/ELF/diff.s b/test/MC/ELF/diff.s
new file mode 100644
index 000000000000..1879a39e8b9a
--- /dev/null
+++ b/test/MC/ELF/diff.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+        .global zed
+foo:
+        nop
+bar:
+        nop
+zed:
+        mov zed+(bar-foo), %eax
+
+// CHECK:       # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000005)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend', 0x00000001)
diff --git a/test/MC/ELF/diff2.s b/test/MC/ELF/diff2.s
new file mode 100644
index 000000000000..4a9fbd1d83a7
--- /dev/null
+++ b/test/MC/ELF/diff2.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+.global zed
+        .data
+foo:
+        .text
+        nop
+bar:
+        nop
+zed:
+// CHECK: expected relocatable expression
+        mov zed+(bar-foo), %eax
diff --git a/test/MC/ELF/elf_directive_previous.s b/test/MC/ELF/elf_directive_previous.s
new file mode 100644
index 000000000000..5db1eac03d39
--- /dev/null
+++ b/test/MC/ELF/elf_directive_previous.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+.bss
+# CHECK: .bss
+
+.text
+# CHECK: .text
+
+.previous
+# CHECK: .bss
+
+.previous
+# CHECK: .text
diff --git a/test/MC/ELF/elf_directive_section.s b/test/MC/ELF/elf_directive_section.s
new file mode 100644
index 000000000000..9531c026e674
--- /dev/null
+++ b/test/MC/ELF/elf_directive_section.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+	.bss
+# CHECK: .bss
+
+	.data.rel.ro
+# CHECK: .data.rel.ro
+
+	.data.rel
+# CHECK: .data.rel
+
+	.eh_frame
+# CHECK: .eh_frame
+
+	.rodata
+# CHECK: .rodata
+
+	.tbss
+# CHECK: .tbss
+
+	.tdata
+# CHECK: .tdata
+
diff --git a/test/MC/ELF/empty-dwarf-lines.s b/test/MC/ELF/empty-dwarf-lines.s
new file mode 100644
index 000000000000..0f791ae2aa3e
--- /dev/null
+++ b/test/MC/ELF/empty-dwarf-lines.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+// Test that the dwarf debug_line section contains no line directives.
+
+        .file   1 "test.c"
+        .globl  c
+c:
+        .asciz   "hi\n"
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000044)
+// CHECK-NEXT:  ('sh_size', 0x00000027)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
new file mode 100644
index 000000000000..e351936b901f
--- /dev/null
+++ b/test/MC/ELF/empty.s
@@ -0,0 +1,70 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+
+// Test that like gnu as we create text, data and bss by default. Also test
+// that shstrtab, symtab and strtab are listed in that order.
+
+// CHECK:      ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000000)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+
+// CHECK:      ('sh_name', 0x00000007) # '.data'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000003)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000000)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+
+// CHECK:      ('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT: ('sh_type', 0x00000008)
+// CHECK-NEXT: ('sh_flags', 0x00000003)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000000)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+
+// CHECK:      ('sh_name', 0x00000012) # '.shstrtab'
+// CHECK-NEXT: ('sh_type', 0x00000003)
+// CHECK-NEXT:    ('sh_flags', 0x00000000)
+// CHECK-NEXT:    ('sh_addr', 0x00000000)
+// CHECK-NEXT:    ('sh_offset', 0x00000040)
+// CHECK-NEXT:    ('sh_size', 0x0000002c)
+// CHECK-NEXT:    ('sh_link', 0x00000000)
+// CHECK-NEXT:    ('sh_info', 0x00000000)
+// CHECK-NEXT:    ('sh_addralign', 0x00000001)
+// CHECK-NEXT:    ('sh_entsize', 0x00000000)
+
+// CHECK: ('sh_name', 0x0000001c) # '.symtab'
+// CHECK-NEXT:    ('sh_type', 0x00000002)
+// CHECK-NEXT:    ('sh_flags', 0x00000000)
+// CHECK-NEXT:    ('sh_addr', 0x00000000)
+// CHECK-NEXT:    ('sh_offset',
+// CHECK-NEXT:    ('sh_size', 0x00000060)
+// CHECK-NEXT:    ('sh_link', 0x00000006)
+// CHECK-NEXT:    ('sh_info', 0x00000004)
+// CHECK-NEXT:    ('sh_addralign', 0x00000008)
+// CHECK-NEXT:    ('sh_entsize', 0x00000018)
+
+// CHECK: ('sh_name', 0x00000024) # '.strtab'
+// CHECK-NEXT:    ('sh_type', 0x00000003)
+// CHECK-NEXT:    ('sh_flags', 0x00000000)
+// CHECK-NEXT:    ('sh_addr', 0x00000000)
+// CHECK-NEXT:    ('sh_offset',
+// CHECK-NEXT:    ('sh_size', 0x00000001)
+// CHECK-NEXT:    ('sh_link', 0x00000000)
+// CHECK-NEXT:    ('sh_info', 0x00000000)
+// CHECK-NEXT:    ('sh_addralign', 0x00000001)
+// CHECK-NEXT:    ('sh_entsize', 0x00000000)
diff --git a/test/MC/ELF/entsize.ll b/test/MC/ELF/entsize.ll
new file mode 100644
index 000000000000..21179dfda9a3
--- /dev/null
+++ b/test/MC/ELF/entsize.ll
@@ -0,0 +1,44 @@
+; RUN: llc -filetype=obj -mtriple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck -check-prefix=64 %s
+
+; Test that constant mergeable strings have sh_entsize set.
+
+@.str1 = private unnamed_addr constant [6 x i8] c"tring\00"
+@.str2 = private unnamed_addr constant [7 x i8] c"String\00"
+@.c8a = private unnamed_addr constant [1 x i64] [i64 42]
+@.c8b = private unnamed_addr constant [1 x i64] [i64 42]
+
+define i32 @main() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0))
+  %2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i32 0, i32 0))
+  call void @foo(i64* getelementptr inbounds ([1 x i64]* @.c8a, i32 0, i32 0))
+  call void @foo(i64* getelementptr inbounds ([1 x i64]* @.c8b, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+declare void @foo(i64* nocapture) nounwind
+
+;;;;;
+
+; 64: (('sh_name', 0x00000012) # '.rodata.str1.1'
+; 64-NEXT:   ('sh_type', 0x00000001)
+; 64-NEXT:   ('sh_flags', 0x00000032)
+; 64-NEXT:   ('sh_addr',
+; 64-NEXT:   ('sh_offset',
+; 64-NEXT:   ('sh_size', 0x0000000d)
+; 64-NEXT:   ('sh_link',
+; 64-NEXT:   ('sh_info',
+; 64-NEXT:   ('sh_addralign', 0x00000001)
+; 64-NEXT:   ('sh_entsize', 0x00000001)
+
+; 64: (('sh_name', 0x00000021) # '.rodata.cst8'
+; 64-NEXT:   ('sh_type', 0x00000001)
+; 64-NEXT:   ('sh_flags', 0x00000012)
+; 64-NEXT:   ('sh_addr',
+; 64-NEXT:   ('sh_offset',
+; 64-NEXT:   ('sh_size', 0x00000010)
+; 64-NEXT:   ('sh_link',
+; 64-NEXT:   ('sh_info',
+; 64-NEXT:   ('sh_addralign', 0x00000008)
+; 64-NEXT:   ('sh_entsize', 0x00000008)
+
diff --git a/test/MC/ELF/entsize.s b/test/MC/ELF/entsize.s
new file mode 100644
index 000000000000..e8eb62eb2ea5
--- /dev/null
+++ b/test/MC/ELF/entsize.s
@@ -0,0 +1,69 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that mergeable constants have sh_entsize set.
+
+// 1 byte strings
+    .section	.rodata.str1.1,"aMS",@progbits,1
+
+    .type	.L.str1,@object         # @.str1
+.L.str1:
+	.asciz	 "tring"
+	.size	.L.str1, 6
+
+	.type	.L.str2,@object         # @.str2
+.L.str2:
+	.asciz	 "String"
+	.size	.L.str2, 7
+
+// 2 byte strings
+    .section	.rodata.str2.1,"aMS",@progbits,2
+	.type	.L.str3,@object         # @.str3
+.L.str3:
+	.asciz	 "L\000o\000n\000g\000"
+	.size	.L.str3, 9
+
+	.type	.L.str4,@object         # @.str4
+.L.str4:
+	.asciz	 "o\000n\000g\000"
+	.size	.L.str4, 7
+
+ // 8 byte constants
+    .section	.rodata.cst8,"aM",@progbits,8
+    .quad 42
+    .quad 42
+
+// CHECK: # Section 0x00000004
+// CHECK-NEXT:   ('sh_name', 0x00000012) # '.rodata.str1.1'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000032)
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size', 0x0000000d)
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000001)
+
+// CHECK: # Section 0x00000005
+// CHECK-NEXT:   ('sh_name', 0x00000021) # '.rodata.str2.1'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000032)
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size', 0x00000010)
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000002)
+
+// CHECK: # Section 0x00000006
+// CHECK-NEXT:   ('sh_name', 0x00000030) # '.rodata.cst8
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000012)
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size', 0x00000010)
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000008)
diff --git a/test/MC/ELF/file.s b/test/MC/ELF/file.s
new file mode 100644
index 000000000000..d8ccbe6e5aee
--- /dev/null
+++ b/test/MC/ELF/file.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+// Test that the STT_FILE symbol precedes the other local symbols.
+
+.file "foo"
+foa:
+// CHECK:    # Symbol 0x00000001
+// CHECK-NEXT:    (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000004)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x0000fff1)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:    # Symbol 0x00000002
+// CHECK-NEXT:    (('st_name', 0x00000005) # 'foa'
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000000)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000001)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
new file mode 100644
index 000000000000..aa6328760d44
--- /dev/null
+++ b/test/MC/ELF/global-offset.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// We test that _GLOBAL_OFFSET_TABLE_ will account for the two bytes at the
+// start of the addl.
+
+        addl    $_GLOBAL_OFFSET_TABLE_, %ebx
+
+// CHECK:      ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type',
+// CHECK-NEXT: ('sh_flags',
+// CHECK-NEXT: ('sh_addr',
+// CHECK-NEXT: ('sh_offset',
+// CHECK-NEXT: ('sh_size',
+// CHECK-NEXT: ('sh_link',
+// CHECK-NEXT: ('sh_info',
+// CHECK-NEXT: ('sh_addralign',
+// CHECK-NEXT: ('sh_entsize',
+// CHECK-NEXT: ('_section_data', '81c30200 0000')
diff --git a/test/MC/ELF/got.s b/test/MC/ELF/got.s
new file mode 100644
index 000000000000..798150e0f5f9
--- /dev/null
+++ b/test/MC/ELF/got.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a R_X86_64_GOT32 and that we have an undefined
+// reference to _GLOBAL_OFFSET_TABLE_.
+
+        movl	foo@GOT, %eax
+        movl	foo@GOTPCREL(%rip), %eax
+
+// CHECK:     (('st_name', 0x00000005) # '_GLOBAL_OFFSET_TABLE_'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset',
+// CHECK-NEXT:     ('r_sym',
+// CHECK-NEXT:     ('r_type', 0x00000003)
+// CHECK-NEXT:     ('r_addend',
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   # Relocation 0x00000001
+// CHECK-NEXT:    (('r_offset',
+// CHECK-NEXT:     ('r_sym',
+// CHECK-NEXT:     ('r_type', 0x00000009)
+// CHECK-NEXT:     ('r_addend',
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
diff --git a/test/MC/ELF/ident.s b/test/MC/ELF/ident.s
new file mode 100644
index 000000000000..f79458f34501
--- /dev/null
+++ b/test/MC/ELF/ident.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// CHECK:       (('sh_name', 0x00000012) # '.comment'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000030)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x0000000d)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000001)
+// CHECK-NEXT:   ('_section_data', '00666f6f 00626172 007a6564 00')
+
+        .ident "foo"
+        .ident "bar"
+        .ident "zed"
diff --git a/test/MC/ELF/invalid-symver.s b/test/MC/ELF/invalid-symver.s
new file mode 100644
index 000000000000..3c4f8c084b94
--- /dev/null
+++ b/test/MC/ELF/invalid-symver.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t 2> %t.out
+// RUN: FileCheck --input-file=%t.out %s
+
+// CHECK: A @@ version cannot be undefined
+
+        .symver undefined, foo@@bar
+        .long undefined
diff --git a/test/MC/ELF/leb128.s b/test/MC/ELF/leb128.s
new file mode 100644
index 000000000000..e5f31f4834dd
--- /dev/null
+++ b/test/MC/ELF/leb128.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+        .sleb128 .Lfoo - .Lbar
+.Lfoo:
+        .uleb128 .Lbar - .Lfoo
+        .fill 126, 1, 0x90
+.Lbar:
+
+// CHECK:     (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000081)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', '817f7f90 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90')
diff --git a/test/MC/ELF/local-reloc.s b/test/MC/ELF/local-reloc.s
new file mode 100644
index 000000000000..c2b477188cb2
--- /dev/null
+++ b/test/MC/ELF/local-reloc.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that relocations with local symbols are represented as relocations
+// with the section. They should be equivalent, but gas behaves like this.
+
+	movl	foo, %r14d
+foo:
+
+// Section number 1 is .text
+// CHECK:        # Section 0x00000001
+// CHECK-next:  (('sh_name', 0x00000001) # '.text'
+
+// Symbol number 2 is section number 1
+// CHECK:    # Symbol 0x00000002
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000003)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000001)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+
+// Relocation refers to symbol number 2
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:  # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type',
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
diff --git a/test/MC/ELF/merge.s b/test/MC/ELF/merge.s
new file mode 100644
index 000000000000..ec0222890071
--- /dev/null
+++ b/test/MC/ELF/merge.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that PIC relocations with local symbols in a mergeable section are done
+// with a reference to the symbol. Not sure if this is a linker limitation,
+// but this matches the behavior of gas.
+
+// Non-PIC relocations with 0 offset don't use the symbol.
+
+
+        movsd   .Lfoo(%rip), %xmm1
+        movl	$.Lfoo, %edi
+        movl	$.Lfoo+2, %edi
+        jmp	foo@PLT
+        movq 	foo@GOTPCREL, %rax
+        movq    zed, %rax
+
+        .section        .sec1,"aM",@progbits,16
+.Lfoo:
+zed:
+        .global zed
+
+        .section	bar,"ax",@progbits
+foo:
+
+// Section 4 is "sec1"
+// CHECK: # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000012) # '.sec1'
+
+// Symbol number 1 is .Lfoo
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # '.Lfoo'
+
+// Symbol number 2 is foo
+// CHECK:      # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000007) # 'foo'
+
+// Symbol number 6 is section 4
+// CHECK:        # Symbol 0x00000006
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000003)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000004)
+
+// Symbol number 8 is zed
+// CHECK:        # Symbol 0x00000008
+// CHECK-NEXT:    (('st_name', 0x0000000b) # 'zed'
+
+// Relocation 0 refers to symbol 1
+// CHECK:       ('_relocations', [
+// CHECK-NEXT:   # Relocation 0
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000001)
+// CHECK-NEXT:    ('r_type', 0x00000002
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 1 refers to symbol 6
+// CHECK-NEXT:  # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend',
+// CHECK-NEXT: ),
+
+// Relocation 2 refers to symbol 1
+// CHECK-NEXT:   # Relocation 0x00000002
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000001)
+// CHECK-NEXT:    ('r_type', 0x0000000a
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 3 refers to symbol 2
+// CHECK-NEXT:   # Relocation 0x00000003
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000004
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 4 refers to symbol 2
+// CHECK-NEXT:   # Relocation 0x00000004
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000009
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 5 refers to symbol 8
+// CHECK-NEXT:   # Relocation 0x00000005
+// CHECK-NEXT:   (('r_offset', 0x00000023)
+// CHECK-NEXT:    ('r_sym', 0x00000008)
+// CHECK-NEXT:    ('r_type', 0x0000000b)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
diff --git a/test/MC/ELF/n_bytes.s b/test/MC/ELF/n_bytes.s
new file mode 100644
index 000000000000..59d67bfa71f5
--- /dev/null
+++ b/test/MC/ELF/n_bytes.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+        .2byte 42, 1, 2, 3
+        .4byte 42, 1, 2, 3
+        .8byte 42, 1, 2, 3
+        .int 42, 1, 2, 3
+
+// CHECK:      # Section 0x00000001
+// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000040)
+// CHECK-NEXT:  ('sh_size', 0x00000048)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000004)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '2a000100 02000300 2a000000 01000000 02000000 03000000 2a000000 00000000 01000000 00000000 02000000 00000000 03000000 00000000 2a000000 01000000 02000000 03000000')
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/no-fixup.s b/test/MC/ELF/no-fixup.s
new file mode 100644
index 000000000000..6e719bcc8c11
--- /dev/null
+++ b/test/MC/ELF/no-fixup.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -stats 2>%t.out
+// RUN: FileCheck --input-file=%t.out %s
+
+// Test that we create no fixups for this file since "a" and "b" are in the
+// same fragment.
+
+// CHECK:      assembler - Number of assembler layout and relaxation steps
+// CHECK-NEXT: assembler - Number of emitted assembler fragments
+// CHECK-NEXT: assembler - Number of emitted object file bytes
+// CHECK-NEXT: assembler - Number of fragment layouts
+// CHECK-NEXT: mcexpr    - Number of MCExpr evaluations
+
+a:
+  nop
+b:
+  .long b - a
diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s
new file mode 100644
index 000000000000..87b6f3aa85b8
--- /dev/null
+++ b/test/MC/ELF/noexec.s
@@ -0,0 +1,24 @@
+// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck  %s
+
+// CHECK:       # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000012) # '.note.GNU-stack'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+
+// CHECK:       # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000004)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/norelocation.s b/test/MC/ELF/norelocation.s
new file mode 100644
index 000000000000..0a0efe1ed6d6
--- /dev/null
+++ b/test/MC/ELF/norelocation.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+        call bar
+bar:
+
+// CHECK: ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000005)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', 'e8000000 00')
+// CHECK-NOT: .rela.text
+// CHECK: shstrtab
diff --git a/test/MC/ELF/pic-diff.s b/test/MC/ELF/pic-diff.s
new file mode 100644
index 000000000000..d1fc909dba8a
--- /dev/null
+++ b/test/MC/ELF/pic-diff.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// CHECK:         # Symbol 0x00000005
+// CHECK-NEXT:    (('st_name', 0x00000005) # 'baz'
+// CHECK-NEXT:     ('st_bind', 0x00000001)
+// CHECK-NEXT:     ('st_type', 0x00000000)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000000)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECK-NEXT:    ),
+
+// CHECK:       ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x0000000c)
+// CHECK-NEXT:     ('r_sym', 0x00000005)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000008)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+
+.zero 4
+.data
+
+.zero 1
+.align 4
+foo:
+.zero 8
+.long baz - foo
diff --git a/test/MC/ELF/plt.s b/test/MC/ELF/plt.s
new file mode 100644
index 000000000000..7d0073c400d6
--- /dev/null
+++ b/test/MC/ELF/plt.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a R_X86_64_PLT32.
+
+	jmp	foo@PLT
+
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset',
+// CHECK-NEXT:     ('r_sym',
+// CHECK-NEXT:     ('r_type', 0x00000004)
+// CHECK-NEXT:     ('r_addend',
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
new file mode 100644
index 000000000000..3236b41e532c
--- /dev/null
+++ b/test/MC/ELF/relax-arith.s
@@ -0,0 +1,75 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we correctly relax these instructions into versions that use
+// 16 or 32 bit immediate values.
+
+bar:
+// CHECK: 'imul'
+// CHECK: ('_section_data', '6669db00 0066691c 25000000 00000069 db000000 00691c25 00000000 00000000 4869db00 00000048 691c2500 00000000 000000')
+        .section imul
+        imul $foo, %bx,  %bx
+        imul $foo, bar,  %bx
+        imul $foo, %ebx, %ebx
+        imul $foo, bar,  %ebx
+        imul $foo, %rbx, %rbx
+        imul $foo, bar,  %rbx
+
+// CHECK: and'
+// CHECK:('_section_data', '6681e300 00668124 25000000 00000081 e3000000 00812425 00000000 00000000 4881e300 00000048 81242500 00000000 000000')
+        .section and
+        and  $foo, %bx
+        andw $foo, bar
+        and  $foo, %ebx
+        andl $foo, bar
+        and  $foo, %rbx
+        andq $foo, bar
+
+// CHECK: 'or'
+// CHECK: ('_section_data', '6681cb00 0066810c 25000000 00000081 cb000000 00810c25 00000000 00000000 4881cb00 00000048 810c2500 00000000 000000')
+        .section or
+        or  $foo, %bx
+        orw $foo, bar
+        or  $foo, %ebx
+        orl $foo, bar
+        or  $foo, %rbx
+        orq $foo, bar
+
+// CHECK: 'xor'
+// CHECK: ('_section_data', '6681f300 00668134 25000000 00000081 f3000000 00813425 00000000 00000000 4881f300 00000048 81342500 00000000 000000')
+        .section xor
+        xor  $foo, %bx
+        xorw $foo, bar
+        xor  $foo, %ebx
+        xorl $foo, bar
+        xor  $foo, %rbx
+        xorq $foo, bar
+
+// CHECK: 'add'
+// CHECK: ('_section_data', '6681c300 00668104 25000000 00000081 c3000000 00810425 00000000 00000000 4881c300 00000048 81042500 00000000 000000')
+        .section add
+        add  $foo, %bx
+        addw $foo, bar
+        add  $foo, %ebx
+        addl $foo, bar
+        add  $foo, %rbx
+        addq $foo, bar
+
+// CHECK: 'sub'
+// CHECK: ('_section_data', '6681eb00 0066812c 25000000 00000081 eb000000 00812c25 00000000 00000000 4881eb00 00000048 812c2500 00000000 000000')
+        .section sub
+        sub  $foo, %bx
+        subw $foo, bar
+        sub  $foo, %ebx
+        subl $foo, bar
+        sub  $foo, %rbx
+        subq $foo, bar
+
+// CHECK: 'cmp'
+// CHECK: ('_section_data', '6681fb00 0066813c 25000000 00000081 fb000000 00813c25 00000000 00000000 4881fb00 00000048 813c2500 00000000 000000')
+        .section cmp
+        cmp  $foo, %bx
+        cmpw $foo, bar
+        cmp  $foo, %ebx
+        cmpl $foo, bar
+        cmp  $foo, %rbx
+        cmpq $foo, bar
diff --git a/test/MC/ELF/relax-crash.s b/test/MC/ELF/relax-crash.s
new file mode 100644
index 000000000000..442825db81a2
--- /dev/null
+++ b/test/MC/ELF/relax-crash.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
+
+// This is a test that we don't crash. We used to do so by going in a infinite
+// recursion trying to compute the size of a MCDwarfLineAddrFragment.
+
+       .section        .debug_line,"",@progbits
+       .text
+       .file 1 "Disassembler.ii"
+       .section foo
+       .loc 1 1 0
+       ret
diff --git a/test/MC/ELF/relax.s b/test/MC/ELF/relax.s
new file mode 100644
index 000000000000..2c0e285db597
--- /dev/null
+++ b/test/MC/ELF/relax.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we do not relax these.
+
+bar:
+.globl foo
+foo:
+        .set	zed,foo
+
+        jmp bar
+        jmp foo
+        jmp zed
+
+// CHECK: ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000006)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', 'ebfeebfc ebfa')
+
+// CHECK:       # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000005) # 'foo'
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
new file mode 100644
index 000000000000..f106f89b7083
--- /dev/null
+++ b/test/MC/ELF/relocation-386.s
@@ -0,0 +1,226 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump | FileCheck  %s
+
+// Test that we produce the correct relocation types and that the relocations
+// correctly point to the section or the symbol.
+
+// Section 3 is bss
+// CHECK:      # Section 0x00000003
+// CHECK-NEXT: (('sh_name', 0x0000000d) # '.bss'
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000005) # '.Lfoo'
+
+// Symbol 4 is zed
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000035) # 'zed'
+// CHECK-NEXT:  ('st_value', 0x00000000)
+// CHECK-NEXT:  ('st_size', 0x00000000)
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000006)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+
+// Symbol 7 is section 3
+// CHECK:      # Symbol 0x00000007
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_value', 0x00000000)
+// CHECK-NEXT:  ('st_size', 0x00000000)
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
+
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000002)
+// CHECK-NEXT:  ('r_sym', 0x00000001)
+// CHECK-NEXT:  ('r_type', 0x00000009)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Relocation 0x00000002
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT: ),
+
+// Relocation 3 (bar3@GOTOFF) is done with symbol 7 (bss)
+// CHECK-NEXT:  # Relocation 0x00000003
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym', 0x00000007
+// CHECK-NEXT:  ('r_type',
+// CHECK-NEXT: ),
+
+// Relocation 4 (bar2@GOT) is of type R_386_GOT32
+// CHECK-NEXT:  # Relocation 0x00000004
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x00000003
+// CHECK-NEXT: ),
+
+// Relocation 5 (foo@TLSGD) is of type R_386_TLS_GD
+// CHECK-NEXT: # Relocation 0x00000005
+// CHECK-NEXT: (('r_offset', 0x00000020)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000012)
+// CHECK-NEXT: ),
+
+// Relocation 6 ($foo@TPOFF) is of type R_386_TLS_LE_32
+// CHECK-NEXT: # Relocation 0x00000006
+// CHECK-NEXT: (('r_offset', 0x00000025)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000022)
+// CHECK-NEXT: ),
+
+// Relocation 7 (foo@INDNTPOFF) is of type R_386_TLS_IE
+// CHECK-NEXT: # Relocation 0x00000007
+// CHECK-NEXT: (('r_offset', 0x0000002b)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x0000000f)
+// CHECK-NEXT: ),
+
+// Relocation 8 (foo@NTPOFF) is of type R_386_TLS_LE
+// CHECK-NEXT: # Relocation 0x00000008
+// CHECK-NEXT: (('r_offset', 0x00000031)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000011)
+// CHECK-NEXT: ),
+
+// Relocation 9 (foo@GOTNTPOFF) is of type R_386_TLS_GOTIE
+// CHECK-NEXT: # Relocation 0x00000009
+// CHECK-NEXT: (('r_offset', 0x00000037)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000010)
+// CHECK-NEXT: ),
+
+// Relocation 10 (foo@TLSLDM) is of type R_386_TLS_LDM
+// CHECK-NEXT: # Relocation 0x0000000a
+// CHECK-NEXT: (('r_offset', 0x0000003d)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000013)
+// CHECK-NEXT: ),
+
+// Relocation 11 (foo@DTPOFF) is of type R_386_TLS_LDO_32
+// CHECK-NEXT: # Relocation 0x0000000b
+// CHECK-NEXT: (('r_offset', 0x00000043)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000020)
+// CHECK-NEXT: ),
+// Relocation 12 (calll 4096) is of type R_386_PC32
+// CHECK-NEXT: # Relocation 0x0000000c
+// CHECK-NEXT: (('r_offset', 0x00000048)
+// CHECK-NEXT:  ('r_sym', 0x00000000)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT: ),
+// Relocation 13 (zed@GOT) is of type R_386_GOT32 and uses the symbol
+// CHECK-NEXT: # Relocation 0x0000000d
+// CHECK-NEXT: (('r_offset', 0x0000004e)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000003)
+// CHECK-NEXT: ),
+// Relocation 14 (zed@GOTOFF) is of type R_386_GOTOFF and uses the symbol
+// CHECK-NEXT: # Relocation 0x0000000e
+// CHECK-NEXT: (('r_offset', 0x00000054)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000009)
+// CHECK-NEXT: ),
+// Relocation 15 (zed@INDNTPOFF) is of type R_386_TLS_IE and uses the symbol
+// CHECK-NEXT: # Relocation 0x0000000f
+// CHECK-NEXT: (('r_offset', 0x0000005a)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x0000000f)
+// CHECK-NEXT: ),
+// Relocation 16 (zed@NTPOFF) is of type R_386_TLS_LE and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000010
+// CHECK-NEXT: (('r_offset', 0x00000060)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000011)
+// CHECK-NEXT: ),
+// Relocation 17 (zed@GOTNTPOFF) is of type R_386_TLS_GOTIE and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000011
+// CHECK-NEXT: (('r_offset', 0x00000066)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000010)
+// CHECK-NEXT: ),
+// Relocation 18 (zed@PLT) is of type R_386_PLT32 and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000012
+// CHECK-NEXT: (('r_offset', 0x0000006b)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT: ),
+// Relocation 19 (zed@TLSGD) is of type R_386_TLS_GD and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000013
+// CHECK-NEXT: (('r_offset', 0x00000071)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000012)
+// CHECK-NEXT: ),
+// Relocation 20 (zed@TLSLDM) is of type R_386_TLS_LDM and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000014
+// CHECK-NEXT: (('r_offset', 0x00000077)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000013)
+// CHECK-NEXT: ),
+// Relocation 21 (zed@TPOFF) is of type R_386_TLS_LE_32 and uses the symbol
+// CHECK-NEXT:# Relocation 0x00000015
+// CHECK-NEXT: (('r_offset', 0x0000007d)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000022)
+// CHECK-NEXT: ),
+// Relocation 22 (zed@DTPOFF) is of type R_386_TLS_LDO_32 and uses the symbol
+// CHECK-NEXT: Relocation 0x00000016
+// CHECK-NEXT: (('r_offset', 0x00000083)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000020)
+// CHECK-NEXT: ),
+// Relocation 23 ($bar) is of type R_386_32 and uses the section
+// CHECK-NEXT: Relocation 0x00000017
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x00000001)
+// CHECK-NEXT: ),
+
+        .text
+bar:
+	leal	.Lfoo@GOTOFF(%ebx), %eax
+
+        .global bar2
+bar2:
+	calll	bar2@PLT
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	movb	bar3@GOTOFF(%ebx), %al
+
+	.type	bar3,@object
+	.local	bar3
+	.comm	bar3,1,1
+
+        movl	bar2j@GOT(%eax), %eax
+
+        leal foo@TLSGD(, %ebx,1), %eax
+        movl $foo@TPOFF, %edx
+        movl foo@INDNTPOFF, %ecx
+        addl foo@NTPOFF(%eax), %eax
+        addl foo@GOTNTPOFF(%ebx), %ecx
+        leal foo@TLSLDM(%ebx), %eax
+        leal foo@DTPOFF(%eax), %edx
+        calll 4096
+        movl zed@GOT(%eax), %eax
+        movl zed@GOTOFF(%eax), %eax
+        movl zed@INDNTPOFF(%eax), %eax
+        movl zed@NTPOFF(%eax), %eax
+        movl zed@GOTNTPOFF(%eax), %eax
+        call zed@PLT
+        movl zed@TLSGD(%eax), %eax
+        movl zed@TLSLDM(%eax), %eax
+        movl zed@TPOFF(%eax), %eax
+        movl zed@DTPOFF(%eax), %eax
+        pushl $bar
+
+        .section        zedsec,"awT",@progbits
+zed:
+        .long 0
+
+        .section	.rodata.str1.16,"aMS",@progbits,1
+.Lfoo:
+	.asciz	 "bool llvm::llvm_start_multithreaded()"
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
new file mode 100644
index 000000000000..dabe721d90bc
--- /dev/null
+++ b/test/MC/ELF/relocation.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we produce the correct relocation.
+
+bar:
+        movl	$bar, %edx        # R_X86_64_32
+        movq	$bar, %rdx        # R_X86_64_32S
+        movq	$bar, bar(%rip)   # R_X86_64_32S
+        movl	bar, %edx         # R_X86_64_32S
+        movq	bar, %rdx         # R_X86_64_32S
+.long bar                         # R_X86_64_32
+        leaq	foo@GOTTPOFF(%rip), %rax # R_X86_64_GOTTPOFF
+        leaq	foo@TLSGD(%rip), %rax    # R_X86_64_TLSGD
+        leaq	foo@TPOFF(%rax), %rax    # R_X86_64_TPOFF32
+        leaq	foo@TLSLD(%rip), %rdi    # R_X86_64_TLSLD
+        leaq	foo@dtpoff(%rax), %rcx   # R_X86_64_DTPOFF32
+        pushq    $bar
+        movq	foo(%rip), %rdx
+        leaq    foo-bar(%r14),%r14
+
+
+// CHECK:  # Section 0x00000001
+// CHECK: (('sh_name', 0x00000001) # '.text'
+
+// CHECK:   # Symbol 0x00000002
+// CHECK: (('st_name', 0x00000000) # ''
+// CHECK:  ('st_bind', 0x00000000)
+// CHECK:  ('st_type', 0x00000003)
+// CHECK:  ('st_other', 0x00000000)
+// CHECK:  ('st_shndx', 0x00000001)
+
+// CHECK: # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000001)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000a)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000001
+// CHECK-NEXT:  (('r_offset', 0x00000008)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000002
+// CHECK-NEXT:  (('r_offset', 0x00000013)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000003
+// CHECK-NEXT:  (('r_offset', 0x0000001a)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000004
+// CHECK-NEXT:  (('r_offset', 0x00000022)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000005
+// CHECK-NEXT:  (('r_offset', 0x00000026)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000a)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000006
+// CHECK-NEXT:  (('r_offset', 0x0000002d)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000016)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+
+// CHECK:  # Relocation 0x00000007
+// CHECK-NEXT:  (('r_offset', 0x00000034)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000013)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+
+// CHECK:  # Relocation 0x00000008
+// CHECK-NEXT:  (('r_offset', 0x0000003b)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000017)
+// CHECK-NEXT:   ('r_addend', 0x00000000)
+
+// CHECK:  # Relocation 0x00000009
+// CHECK-NEXT:  (('r_offset', 0x00000042)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000014)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+
+// CHECK:  # Relocation 0x0000000a
+// CHECK-NEXT:  (('r_offset', 0x00000049)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000015)
+// CHECK-NEXT:   ('r_addend', 0x00000000)
+
+// CHECK: # Relocation 0x0000000b
+// CHECK-NEXT:  (('r_offset', 0x0000004e)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend', 0x00000000)
+
+// CHECK: # Relocation 0x0000000c
+// CHECK-NEXT: (('r_offset', 0x00000055)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+
+// CHECK: # Relocation 0x0000000d
+// CHECK-NEXT: (('r_offset', 0x0000005c)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0x0000005c)
diff --git a/test/MC/ELF/rename.s b/test/MC/ELF/rename.s
new file mode 100644
index 000000000000..36065603e784
--- /dev/null
+++ b/test/MC/ELF/rename.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// When doing a rename, all the checks for where the relocation should go
+// should be performed with the original symbol. Only if we decide to relocate
+// with the symbol we should then use the renamed one.
+
+// This is a regression test for a bug where we used bar5@@@zed when deciding
+// if we should relocate with the symbol or with the section and we would then
+// not produce a relocation with .text.
+
+defined1:
+defined3:
+        .symver defined3, bar5@@@zed
+        .long defined3
+
+        .global defined1
+
+// Section 1 is .text
+// CHECK:      # Section 0x00000001
+// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000040)
+// CHECK-NEXT:  ('sh_size', 0x00000004)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000004)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+
+// Symbol 2 is section 1
+// CHECK:      # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+
+// The relocation uses symbol 2
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000000)
+// CHECK-NEXT:  ('r_sym', 0x00000002)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
new file mode 100644
index 000000000000..861dc4f057fe
--- /dev/null
+++ b/test/MC/ELF/section.s
@@ -0,0 +1,110 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that these names are accepted.
+
+.section	.note.GNU-stack,"",@progbits
+.section	.note.GNU-stack2,"",%progbits
+.section	.note.GNU-,"",@progbits
+.section	-.note.GNU,"",@progbits
+
+// CHECK: ('sh_name', 0x00000012) # '.note.GNU-stack'
+// CHECK: ('sh_name', 0x00000022) # '.note.GNU-stack2'
+// CHECK: ('sh_name', 0x00000033) # '.note.GNU-'
+// CHECK: ('sh_name', 0x0000003e) # '-.note.GNU'
+
+// Test that the defaults are used
+
+.section	.init
+.section	.fini
+.section	.rodata
+.section	zed, ""
+
+// CHECK:      (('sh_name', 0x00000049) # '.init'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 0x0000000b
+// CHECK-NEXT: (('sh_name', 0x0000004f) # '.fini'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 0x0000000c
+// CHECK-NEXT: (('sh_name', 0x00000055) # '.rodata'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 0x0000000d
+// CHECK-NEXT: (('sh_name', 0x0000005d) # 'zed'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+
+.section	.note.test,"",@note
+// CHECK:       (('sh_name', 0x00000061) # '.note.test'
+// CHECK-NEXT:   ('sh_type', 0x00000007)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000050)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+
+// Test that we can parse these
+foo:
+bar:
+.section        .text.foo,"axG",@progbits,foo,comdat
+.section        .text.bar,"axMG",@progbits,42,bar,comdat
+
+// Test that the default values are not used
+
+.section .eh_frame,"a",@unwind
+
+// CHECK:       (('sh_name', 0x00000080) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x70000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000050)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+
+// Test that we handle the strings like gas
+.section bar-"foo"
+.section "foo"
+
+// CHECK: ('sh_name', 0x0000008a) # 'bar-"foo"'
+// CHECK: ('sh_name', 0x00000094) # 'foo'
diff --git a/test/MC/ELF/set.s b/test/MC/ELF/set.s
new file mode 100644
index 000000000000..69d6c910636f
--- /dev/null
+++ b/test/MC/ELF/set.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we emit the correct value.
+
+.set kernbase,0xffffffff80000000
+
+// CHECK:         (('st_name', 0x00000001) # 'kernbase'
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000000)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x0000fff1)
+// CHECK-NEXT:     ('st_value', 0xffffffff80000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECK-NEXT:    ),
+
+// Test that we accept .set of a symbol after it has been used in a statement.
+
+        jmp foo
+        .set foo, bar
+
+// or a .quad
+
+        .quad	foo2
+	.set	foo2,bar2
+
+// Test that there is an undefined reference to bar
+// CHECK:      (('st_name', 0x0000000a) # 'bar'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/sleb.s b/test/MC/ELF/sleb.s
new file mode 100644
index 000000000000..00e5b4bf2821
--- /dev/null
+++ b/test/MC/ELF/sleb.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+
+	.text
+foo:
+	.sleb128	0
+	.sleb128	1
+	.sleb128	-1
+	.sleb128	63
+	.sleb128	-64
+
+	.sleb128	64
+	.sleb128	-65
+
+	.sleb128	8191
+	.sleb128        -8192
+
+	.sleb128        8193
+
+// ELF_32: ('sh_name', 0x00000001) # '.text'
+// ELF_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// ELF_64: ('sh_name', 0x00000001) # '.text'
+// ELF_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
diff --git a/test/MC/ELF/symref.s b/test/MC/ELF/symref.s
new file mode 100644
index 000000000000..b99e71b869bd
--- /dev/null
+++ b/test/MC/ELF/symref.s
@@ -0,0 +1,165 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+defined1:
+defined2:
+defined3:
+        .symver defined1, bar1@zed
+        .symver undefined1, bar2@zed
+
+        .symver defined2, bar3@@zed
+
+        .symver defined3, bar5@@@zed
+        .symver undefined3, bar6@@@zed
+
+        .long defined1
+        .long undefined1
+        .long defined2
+        .long defined3
+        .long undefined3
+
+        .global global1
+        .symver global1, g1@@zed
+global1:
+
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000013) # 'bar1@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000025) # 'bar3@@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000003
+// CHECK-NEXT: (('st_name', 0x0000002f) # 'bar5@@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000001) # 'defined1'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x0000000a) # 'defined2'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000007
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000002)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000008
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000009
+// CHECK-NEXT: (('st_name', 0x0000004a) # 'g1@@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000014)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x0000000a
+// CHECK-NEXT: (('st_name', 0x00000042) # 'global1'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000014)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x0000000b
+// CHECK-NEXT: (('st_name', 0x0000001c) # 'bar2@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x0000000c
+// CHECK-NEXT: (('st_name', 0x00000039) # 'bar6@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
+
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000000)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000004)
+// CHECK-NEXT:  ('r_sym', 0x0000000b)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000002
+// CHECK-NEXT: (('r_offset', 0x00000008)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000003
+// CHECK-NEXT: (('r_offset', 0x0000000c)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000004
+// CHECK-NEXT: (('r_offset', 0x00000010)
+// CHECK-NEXT:  ('r_sym', 0x0000000c)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
new file mode 100644
index 000000000000..459d4cc69df9
--- /dev/null
+++ b/test/MC/ELF/tls-i386.s
@@ -0,0 +1,64 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that all symbols are of type STT_TLS.
+
+        movl    foo1@NTPOFF(%eax), %eax
+        movl    foo2@GOTNTPOFF(%eax), %eax
+        movl    foo3@TLSGD(%eax), %eax
+        movl    foo4@TLSLDM(%eax), %eax
+        movl    foo5@TPOFF(%eax), %eax
+        movl    foo6@DTPOFF(%eax), %eax
+
+// CHECK:       (('st_name', 0x00000001) # 'foo1'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000006
+// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000a
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
new file mode 100644
index 000000000000..2517a5bddc1f
--- /dev/null
+++ b/test/MC/ELF/tls.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that all symbols are of type STT_TLS.
+
+	leaq	foo1@TLSGD(%rip), %rdi
+        leaq    foo2@GOTTPOFF(%rip), %rdi
+        leaq    foo3@TLSLD(%rip), %rdi
+
+	.section	.zed,"awT",@progbits
+foobar:
+	.long	43
+
+// CHECK:      (('st_name', 0x00000010) # 'foobar'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000006)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:       # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo1'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
new file mode 100644
index 000000000000..4b98c02e97d5
--- /dev/null
+++ b/test/MC/ELF/type.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that both % and @ are accepted.
+        .global foo
+        .type foo,%function
+foo:
+
+        .global bar
+        .type bar,@object
+bar:
+
+// Test that gnu_unique_object is accepted.
+        .type zed,@gnu_unique_object
+
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000001)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000002)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/uleb.s b/test/MC/ELF/uleb.s
new file mode 100644
index 000000000000..1e4734bcafc6
--- /dev/null
+++ b/test/MC/ELF/uleb.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+
+	.text
+foo:
+	.uleb128	0
+	.uleb128	1
+	.uleb128	127
+	.uleb128	128
+	.uleb128	16383
+	.uleb128	16384
+
+// ELF_32: ('sh_name', 0x00000001) # '.text'
+// ELF_32: ('_section_data', '00017f80 01ff7f80 8001')
+// ELF_64: ('sh_name', 0x00000001) # '.text'
+// ELF_64: ('_section_data', '00017f80 01ff7f80 8001')
+// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001')
+// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001')
diff --git a/test/MC/ELF/undef.s b/test/MC/ELF/undef.s
new file mode 100644
index 000000000000..fc3a2d23be39
--- /dev/null
+++ b/test/MC/ELF/undef.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test which symbols should be in the symbol table
+
+        .long	.Lsym1
+.Lsym2:
+.Lsym3:
+.Lsym4 = .Lsym2 - .Lsym3
+        .long	.Lsym4
+
+	.type	.Lsym5,@object
+        .type   sym6,@object
+        .long sym6
+
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.Lsym7:
+.Lsym8:
+
+        .text
+        movsd   .Lsym8(%rip), %xmm1
+
+// CHECK:      ('_symbols', [
+// CHECK-NEXT:  # Symbol 0x00000000
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x0000000d) # '.Lsym8'
+// CHECK:       # Symbol 0x00000002
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000003
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000005
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000006
+// CHECK-NEXT:  (('st_name', 0x00000001) # '.Lsym1'
+// CHECK:       # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000008) # 'sym6'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000001)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
diff --git a/test/MC/ELF/undef2.s b/test/MC/ELF/undef2.s
new file mode 100644
index 000000000000..9544fbc42ac8
--- /dev/null
+++ b/test/MC/ELF/undef2.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces an undefined reference to .Lfoo
+
+        je	.Lfoo
+
+// CHECK: ('_symbols', [
+// CHECK:      (('st_name', 0x00000001) # '.Lfoo'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK: (('sh_name', 0x00000024) # '.strtab'
diff --git a/test/MC/ELF/weak.s b/test/MC/ELF/weak.s
new file mode 100644
index 000000000000..67e9b188eb06
--- /dev/null
+++ b/test/MC/ELF/weak.s
@@ -0,0 +1,30 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a weak undefined symbol.
+
+	.weak	foo
+        .long   foo
+
+// And that bar is after all local symbols and has non zero value.
+        .weak bar
+bar:
+
+//CHECK:        # Symbol 0x00000004
+//CHECK-NEXT:   (('st_name', 0x00000005) # 'bar'
+//CHECK-NEXT:    ('st_bind', 0x00000002)
+//CHECK-NEXT:    ('st_type', 0x00000000)
+//CHECK-NEXT:    ('st_other', 0x00000000)
+//CHECK-NEXT:    ('st_shndx', 0x00000001)
+//CHECK-NEXT:    ('st_value', 0x0000000000000004)
+//CHECK-NEXT:    ('st_size', 0x0000000000000000)
+//CHECK-NEXT:   ),
+//CHECK-NEXT:   # Symbol 0x00000005
+//CHECK:       (('st_name', 0x00000001) # 'foo'
+//CHECK-NEXT:   ('st_bind', 0x00000002)
+//CHECK-NEXT:   ('st_type', 0x00000000)
+//CHECK-NEXT:   ('st_other', 0x00000000)
+//CHECK-NEXT:   ('st_shndx', 0x00000000)
+//CHECK-NEXT:   ('st_value', 0x0000000000000000)
+//CHECK-NEXT:   ('st_size', 0x0000000000000000)
+//CHECK-NEXT:  ),
+//CHECK-NEXT: ])
diff --git a/test/MC/ELF/weakref-plt.s b/test/MC/ELF/weakref-plt.s
new file mode 100644
index 000000000000..26ba3f6df543
--- /dev/null
+++ b/test/MC/ELF/weakref-plt.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+	.weakref	bar,foo
+	call	bar@PLT
+
+// CHECK:      # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000002)
diff --git a/test/MC/ELF/weakref-reloc.s b/test/MC/ELF/weakref-reloc.s
new file mode 100644
index 000000000000..c7cd7649d6f0
--- /dev/null
+++ b/test/MC/ELF/weakref-reloc.s
@@ -0,0 +1,49 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the relocations point to the correct symbols. We used to get the
+// symbol index wrong for weakrefs when creating _GLOBAL_OFFSET_TABLE_.
+
+	.weakref	bar,foo
+        call    zed@PLT
+	call	bar
+
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000009) # '_GLOBAL_OFFSET_TABLE_'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000002)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000005) # 'zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000001)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000006)
+// CHECK-NEXT:  ('r_sym', 0x00000005)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
new file mode 100644
index 000000000000..aea10d1d9294
--- /dev/null
+++ b/test/MC/ELF/weakref.s
@@ -0,0 +1,234 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// This is a long test that checks that the aliases created by weakref are
+// never in the symbol table and that the only case it causes a symbol to
+// be output as a weak undefined symbol is if that variable is not defined
+// in this file and all the references to it are done via the alias.
+
+        .weakref foo1, bar1
+
+        .weakref foo2, bar2
+        .long bar2
+
+        .weakref foo3, bar3
+        .long foo3
+
+        .weakref foo4, bar4
+        .long foo4
+        .long bar4
+
+        .weakref foo5, bar5
+        .long bar5
+        .long foo5
+
+bar6:
+        .weakref foo6, bar6
+
+bar7:
+        .weakref foo7, bar7
+        .long bar7
+
+bar8:
+        .weakref foo8, bar8
+        .long foo8
+
+bar9:
+        .weakref foo9, bar9
+        .long foo9
+        .long bar9
+
+bar10:
+        .global bar10
+        .weakref foo10, bar10
+        .long bar10
+        .long foo10
+
+bar11:
+        .global bar11
+        .weakref foo11, bar11
+
+bar12:
+        .global bar12
+        .weakref foo12, bar12
+        .long bar12
+
+bar13:
+        .global bar13
+        .weakref foo13, bar13
+        .long foo13
+
+bar14:
+        .global bar14
+        .weakref foo14, bar14
+        .long foo14
+        .long bar14
+
+bar15:
+        .global bar15
+        .weakref foo15, bar15
+        .long bar15
+        .long foo15
+
+// CHECK:       # Symbol 0x00000000
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'bar6'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000018)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000002
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'bar7'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000018)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000003
+// CHECK-NEXT:  (('st_name', 0x0000001f) # 'bar8'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x000000000000001c)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x00000024) # 'bar9'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000020)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000005
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000006
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000002)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000003)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000029) # 'bar10'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000028)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x0000002f) # 'bar11'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000030)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000a
+// CHECK-NEXT:  (('st_name', 0x00000035) # 'bar12'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000030)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000b
+// CHECK-NEXT:  (('st_name', 0x0000003b) # 'bar13'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000034)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000c
+// CHECK-NEXT:  (('st_name', 0x00000041) # 'bar14'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000038)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000d
+// CHECK-NEXT:  (('st_name', 0x00000047) # 'bar15'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000040)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000e
+// CHECK-NEXT:  (('st_name', 0x00000001) # 'bar2'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000f
+// CHECK-NEXT:  (('st_name', 0x00000006) # 'bar3'
+// CHECK-NEXT:   ('st_bind', 0x00000002)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000010
+// CHECK-NEXT:  (('st_name', 0x0000000b) # 'bar4'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000011
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'bar5'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/test/MC/ELF/zero.s b/test/MC/ELF/zero.s
new file mode 100644
index 000000000000..adf21f8a6bee
--- /dev/null
+++ b/test/MC/ELF/zero.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+.zero 4
+.zero 1,42
+
+// CHECK: ('sh_name', 0x00000001) # '.text'
+// CHECK: ('sh_type', 0x00000001)
+// CHECK: ('sh_flags', 0x00000006)
+// CHECK: ('sh_addr', 0x00000000)
+// CHECK: ('sh_offset', 0x00000040)
+// CHECK: ('sh_size', 0x00000005)
+// CHECK: ('sh_link', 0x00000000)
+// CHECK: ('sh_info', 0x00000000)
+// CHECK: ('sh_addralign', 0x00000004)
+// CHECK: ('sh_entsize', 0x00000000)
+// CHECK: ('_section_data', '00000000 2a')
diff --git a/test/MC/MBlaze/dg.exp b/test/MC/MBlaze/dg.exp
new file mode 100644
index 000000000000..0c4e78e88dc4
--- /dev/null
+++ b/test/MC/MBlaze/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/MC/MBlaze/mblaze_branch.s b/test/MC/MBlaze/mblaze_branch.s
new file mode 100644
index 000000000000..2ec431926604
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_branch.s
@@ -0,0 +1,197 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    beq
+# BINARY:   100111 00000 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x02,0x18,0x00]
+            beq     r2, r3
+
+# CHECK:    bge
+# BINARY:   100111 00101 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0xa2,0x18,0x00]
+            bge     r2, r3
+
+# CHECK:    bgt
+# BINARY:   100111 00100 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x82,0x18,0x00]
+            bgt     r2, r3
+
+# CHECK:    ble
+# BINARY:   100111 00011 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x62,0x18,0x00]
+            ble     r2, r3
+
+# CHECK:    blt
+# BINARY:   100111 00010 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x42,0x18,0x00]
+            blt     r2, r3
+
+# CHECK:    bne
+# BINARY:   100111 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x22,0x18,0x00]
+            bne     r2, r3
+
+# CHECK:    beqd
+# BINARY:   100111 10000 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x02,0x18,0x00]
+            beqd    r2, r3
+
+# CHECK:    bged
+# BINARY:   100111 10101 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0xa2,0x18,0x00]
+            bged    r2, r3
+
+# CHECK:    bgtd
+# BINARY:   100111 10100 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x82,0x18,0x00]
+            bgtd    r2, r3
+
+# CHECK:    bled
+# BINARY:   100111 10011 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x62,0x18,0x00]
+            bled    r2, r3
+
+# CHECK:    bltd
+# BINARY:   100111 10010 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x42,0x18,0x00]
+            bltd    r2, r3
+
+# CHECK:    bned
+# BINARY:   100111 10001 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x22,0x18,0x00]
+            bned    r2, r3
+
+# CHECK:    br
+# BINARY:   100110 00000 00000 00011 00000000000
+# CHECK:    encoding: [0x98,0x00,0x18,0x00]
+            br      r3
+
+# CHECK:    bra
+# BINARY:   100110 00000 01000 00011 00000000000
+# CHECK:    encoding: [0x98,0x08,0x18,0x00]
+            bra     r3
+
+# CHECK:    brd
+# BINARY:   100110 00000 10000 00011 00000000000
+# CHECK:    encoding: [0x98,0x10,0x18,0x00]
+            brd     r3
+
+# CHECK:    brad
+# BINARY:   100110 00000 11000 00011 00000000000
+# CHECK:    encoding: [0x98,0x18,0x18,0x00]
+            brad    r3
+
+# CHECK:    brld
+# BINARY:   100110 01111 10100 00011 00000000000
+# CHECK:    encoding: [0x99,0xf4,0x18,0x00]
+            brld    r15, r3
+
+# CHECK:    brald
+# BINARY:   100110 01111 11100 00011 00000000000
+# CHECK:    encoding: [0x99,0xfc,0x18,0x00]
+            brald   r15, r3
+
+# CHECK:    brk
+# BINARY:   100110 01111 01100 00011 00000000000
+# CHECK:    encoding: [0x99,0xec,0x18,0x00]
+            brk     r15, r3
+
+# CHECK:    beqi
+# BINARY:   101111 00000 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x02,0x00,0x00]
+            beqi    r2, 0
+
+# CHECK:    bgei
+# BINARY:   101111 00101 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0xa2,0x00,0x00]
+            bgei    r2, 0
+
+# CHECK:    bgti
+# BINARY:   101111 00100 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x82,0x00,0x00]
+            bgti    r2, 0
+
+# CHECK:    blei
+# BINARY:   101111 00011 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x62,0x00,0x00]
+            blei    r2, 0
+
+# CHECK:    blti
+# BINARY:   101111 00010 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x42,0x00,0x00]
+            blti    r2, 0
+
+# CHECK:    bnei
+# BINARY:   101111 00001 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x22,0x00,0x00]
+            bnei    r2, 0
+
+# CHECK:    beqid
+# BINARY:   101111 10000 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x02,0x00,0x00]
+            beqid   r2, 0
+
+# CHECK:    bgeid
+# BINARY:   101111 10101 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0xa2,0x00,0x00]
+            bgeid   r2, 0
+
+# CHECK:    bgtid
+# BINARY:   101111 10100 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x82,0x00,0x00]
+            bgtid   r2, 0
+
+# CHECK:    bleid
+# BINARY:   101111 10011 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x62,0x00,0x00]
+            bleid   r2, 0
+
+# CHECK:    bltid
+# BINARY:   101111 10010 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x42,0x00,0x00]
+            bltid   r2, 0
+
+# CHECK:    bneid
+# BINARY:   101111 10001 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x22,0x00,0x00]
+            bneid   r2, 0
+
+# CHECK:    bri
+# BINARY:   101110 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb8,0x00,0x00,0x00]
+            bri     0
+
+# CHECK:    brai
+# BINARY:   101110 00000 01000 0000000000000000
+# CHECK:    encoding: [0xb8,0x08,0x00,0x00]
+            brai    0
+
+# CHECK:    brid
+# BINARY:   101110 00000 10000 0000000000000000
+# CHECK:    encoding: [0xb8,0x10,0x00,0x00]
+            brid    0
+
+# CHECK:    braid
+# BINARY:   101110 00000 11000 0000000000000000
+# CHECK:    encoding: [0xb8,0x18,0x00,0x00]
+            braid   0
+
+# CHECK:    brlid
+# BINARY:   101110 01111 10100 0000000000000000
+# CHECK:    encoding: [0xb9,0xf4,0x00,0x00]
+            brlid   r15, 0
+
+# CHECK:    bralid
+# BINARY:   101110 01111 11100 0000000000000000
+# CHECK:    encoding: [0xb9,0xfc,0x00,0x00]
+            bralid  r15, 0
+
+# CHECK:    brki
+# BINARY:   101110 01111 01100 0000000000000000
+# CHECK:    encoding: [0xb9,0xec,0x00,0x00]
+            brki    r15, 0
diff --git a/test/MC/MBlaze/mblaze_fpu.s b/test/MC/MBlaze/mblaze_fpu.s
new file mode 100644
index 000000000000..a3b683848513
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_fpu.s
@@ -0,0 +1,77 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all FPU instructions can be parsed by the
+# assembly parser correctly.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   011011 00000 00000 00000 00000000000
+
+# CHECK:    fadd
+# BINARY:   010110 00000 00001 00010 00000000000
+# CHECK:    encoding: [0x58,0x01,0x10,0x00]
+            fadd         r0, r1, r2
+
+# CHECK:    frsub
+# BINARY:   010110 00000 00001 00010 00010000000
+# CHECK:    encoding: [0x58,0x01,0x10,0x80]
+            frsub        r0, r1, r2
+
+# CHECK:    fmul
+# BINARY:   010110 00000 00001 00010 00100000000
+# CHECK:    encoding: [0x58,0x01,0x11,0x00]
+            fmul         r0, r1, r2
+
+# CHECK:    fdiv
+# BINARY:   010110 00000 00001 00010 00110000000
+# CHECK:    encoding: [0x58,0x01,0x11,0x80]
+            fdiv         r0, r1, r2
+
+# CHECK:    fsqrt
+# BINARY:   010110 00000 00001 00000 01110000000
+# CHECK:    encoding: [0x58,0x01,0x03,0x80]
+            fsqrt        r0, r1
+
+# CHECK:    fint
+# BINARY:   010110 00000 00001 00000 01100000000
+# CHECK:    encoding: [0x58,0x01,0x03,0x00]
+            fint         r0, r1
+
+# CHECK:    flt
+# BINARY:   010110 00000 00001 00000 01010000000
+# CHECK:    encoding: [0x58,0x01,0x02,0x80]
+            flt          r0, r1
+
+# CHECK:    fcmp.un
+# BINARY:   010110 00000 00001 00010 01000000000
+# CHECK:    encoding: [0x58,0x01,0x12,0x00]
+            fcmp.un     r0, r1, r2
+
+# CHECK:    fcmp.lt
+# BINARY:   010110 00000 00001 00010 01000010000
+# CHECK:    encoding: [0x58,0x01,0x12,0x10]
+            fcmp.lt     r0, r1, r2
+
+# CHECK:    fcmp.eq
+# BINARY:   010110 00000 00001 00010 01000100000
+# CHECK:    encoding: [0x58,0x01,0x12,0x20]
+            fcmp.eq     r0, r1, r2
+
+# CHECK:    fcmp.le
+# BINARY:   010110 00000 00001 00010 01000110000
+# CHECK:    encoding: [0x58,0x01,0x12,0x30]
+            fcmp.le     r0, r1, r2
+
+# CHECK:    fcmp.gt
+# BINARY:   010110 00000 00001 00010 01001000000
+# CHECK:    encoding: [0x58,0x01,0x12,0x40]
+            fcmp.gt     r0, r1, r2
+
+# CHECK:    fcmp.ne
+# BINARY:   010110 00000 00001 00010 01001010000
+# CHECK:    encoding: [0x58,0x01,0x12,0x50]
+            fcmp.ne     r0, r1, r2
+
+# CHECK:    fcmp.ge
+# BINARY:   010110 00000 00001 00010 01001100000
+# CHECK:    encoding: [0x58,0x01,0x12,0x60]
+            fcmp.ge     r0, r1, r2
diff --git a/test/MC/MBlaze/mblaze_fsl.s b/test/MC/MBlaze/mblaze_fsl.s
new file mode 100644
index 000000000000..d0a42b34991f
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_fsl.s
@@ -0,0 +1,568 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all FSL immediate operands and FSL instructions
+# can be parsed by the assembly parser correctly.
+
+# TYPE F:   OPCODE RD           NCTAE        FSL
+# BINARY:   011011 00000 000000 00000 000000 0000
+
+# TYPE FD:  OPCODE RD          RB      NCTAE
+# BINARY:   011011 00000 00000 00000 0 00000 00000
+
+# TYPE FP:  OPCODE       RA      NCTA         FSL
+#           000000 00000 00000 1 0000 0000000 0000
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x00,0x00]
+            get         r0, rfsl0
+
+# CHECK:    nget
+# BINARY:   011011 00000 000000 10000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x40,0x00]
+            nget        r0, rfsl0
+
+# CHECK:    cget
+# BINARY:   011011 00000 000000 01000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x20,0x00]
+            cget        r0, rfsl0
+
+# CHECK:    ncget
+# BINARY:   011011 00000 000000 11000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x60,0x00]
+            ncget       r0, rfsl0
+
+# CHECK:    tget
+# BINARY:   011011 00000 000000 00100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x10,0x00]
+            tget        r0, rfsl0
+
+# CHECK:    tnget
+# BINARY:   011011 00000 000000 10100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x50,0x00]
+            tnget       r0, rfsl0
+
+# CHECK:    tcget
+# BINARY:   011011 00000 000000 01100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x30,0x00]
+            tcget       r0, rfsl0
+
+# CHECK:    tncget
+# BINARY:   011011 00000 000000 11100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x70,0x00]
+            tncget      r0, rfsl0
+
+# CHECK:    aget
+# BINARY:   011011 00000 000000 00010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x08,0x00]
+            aget        r0, rfsl0
+
+# CHECK:    naget
+# BINARY:   011011 00000 000000 10010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x48,0x00]
+            naget       r0, rfsl0
+
+# CHECK:    caget
+# BINARY:   011011 00000 000000 01010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x28,0x00]
+            caget       r0, rfsl0
+
+# CHECK:    ncaget
+# BINARY:   011011 00000 000000 11010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x68,0x00]
+            ncaget      r0, rfsl0
+
+# CHECK:    taget
+# BINARY:   011011 00000 000000 00110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x18,0x00]
+            taget       r0, rfsl0
+
+# CHECK:    tnaget
+# BINARY:   011011 00000 000000 10110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x58,0x00]
+            tnaget      r0, rfsl0
+
+# CHECK:    tcaget
+# BINARY:   011011 00000 000000 01110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x38,0x00]
+            tcaget      r0, rfsl0
+
+# CHECK:    tncaget
+# BINARY:   011011 00000 000000 11110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x78,0x00]
+            tncaget     r0, rfsl0
+
+# CHECK:    eget
+# BINARY:   011011 00000 000000 00001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x04,0x00]
+            eget        r0, rfsl0
+
+# CHECK:    neget
+# BINARY:   011011 00000 000000 10001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x44,0x00]
+            neget       r0, rfsl0
+
+# CHECK:    ecget
+# BINARY:   011011 00000 000000 01001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x24,0x00]
+            ecget       r0, rfsl0
+
+# CHECK:    necget
+# BINARY:   011011 00000 000000 11001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x64,0x00]
+            necget      r0, rfsl0
+
+# CHECK:    teget
+# BINARY:   011011 00000 000000 00101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x14,0x00]
+            teget       r0, rfsl0
+
+# CHECK:    tneget
+# BINARY:   011011 00000 000000 10101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x54,0x00]
+            tneget      r0, rfsl0
+
+# CHECK:    tecget
+# BINARY:   011011 00000 000000 01101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x34,0x00]
+            tecget      r0, rfsl0
+
+# CHECK:    tnecget
+# BINARY:   011011 00000 000000 11101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x74,0x00]
+            tnecget     r0, rfsl0
+
+# CHECK:    eaget
+# BINARY:   011011 00000 000000 00011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x0c,0x00]
+            eaget       r0, rfsl0
+
+# CHECK:    neaget
+# BINARY:   011011 00000 000000 10011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x4c,0x00]
+            neaget      r0, rfsl0
+
+# CHECK:    ecaget
+# BINARY:   011011 00000 000000 01011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x2c,0x00]
+            ecaget      r0, rfsl0
+
+# CHECK:    necaget
+# BINARY:   011011 00000 000000 11011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x6c,0x00]
+            necaget     r0, rfsl0
+
+# CHECK:    teaget
+# BINARY:   011011 00000 000000 00111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x1c,0x00]
+            teaget      r0, rfsl0
+
+# CHECK:    tneaget
+# BINARY:   011011 00000 000000 10111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x5c,0x00]
+            tneaget     r0, rfsl0
+
+# CHECK:    tecaget
+# BINARY:   011011 00000 000000 01111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x3c,0x00]
+            tecaget     r0, rfsl0
+
+# CHECK:    tnecaget
+# BINARY:   011011 00000 000000 11111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x7c,0x00]
+            tnecaget    r0, rfsl0
+
+# CHECK:    getd
+# BINARY:   010011 00000 00000 00001 0 00000 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x00]
+            getd        r0, r1
+
+# CHECK:    ngetd
+# BINARY:   010011 00000 00000 00001 0 10000 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x00]
+            ngetd       r0, r1
+
+# CHECK:    cgetd
+# BINARY:   010011 00000 00000 00001 0 01000 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x00]
+            cgetd       r0, r1
+
+# CHECK:    ncgetd
+# BINARY:   010011 00000 00000 00001 0 11000 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x00]
+            ncgetd      r0, r1
+
+# CHECK:    tgetd
+# BINARY:   010011 00000 00000 00001 0 00100 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x80]
+            tgetd       r0, r1
+
+# CHECK:    tngetd
+# BINARY:   010011 00000 00000 00001 0 10100 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x80]
+            tngetd      r0, r1
+
+# CHECK:    tcgetd
+# BINARY:   010011 00000 00000 00001 0 01100 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x80]
+            tcgetd      r0, r1
+
+# CHECK:    tncgetd
+# BINARY:   010011 00000 00000 00001 0 11100 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x80]
+            tncgetd     r0, r1
+
+# CHECK:    agetd
+# BINARY:   010011 00000 00000 00001 0 00010 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x40]
+            agetd       r0, r1
+
+# CHECK:    nagetd
+# BINARY:   010011 00000 00000 00001 0 10010 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x40]
+            nagetd      r0, r1
+
+# CHECK:    cagetd
+# BINARY:   010011 00000 00000 00001 0 01010 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x40]
+            cagetd     r0, r1
+
+# CHECK:    ncagetd
+# BINARY:   010011 00000 00000 00001 0 11010 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x40]
+            ncagetd     r0, r1
+
+# CHECK:    tagetd
+# BINARY:   010011 00000 00000 00001 0 00110 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0xc0]
+            tagetd      r0, r1
+
+# CHECK:    tnagetd
+# BINARY:   010011 00000 00000 00001 0 10110 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0xc0]
+            tnagetd     r0, r1
+
+# CHECK:    tcagetd
+# BINARY:   010011 00000 00000 00001 0 01110 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0xc0]
+            tcagetd     r0, r1
+
+# CHECK:    tncagetd
+# BINARY:   010011 00000 00000 00001 0 11110 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0xc0]
+            tncagetd    r0, r1
+
+# CHECK:    egetd
+# BINARY:   010011 00000 00000 00001 0 00001 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x20]
+            egetd       r0, r1
+
+# CHECK:    negetd
+# BINARY:   010011 00000 00000 00001 0 10001 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x20]
+            negetd      r0, r1
+
+# CHECK:    ecgetd
+# BINARY:   010011 00000 00000 00001 0 01001 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x20]
+            ecgetd      r0, r1
+
+# CHECK:    necgetd
+# BINARY:   010011 00000 00000 00001 0 11001 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x20]
+            necgetd     r0, r1
+
+# CHECK:    tegetd
+# BINARY:   010011 00000 00000 00001 0 00101 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0xa0]
+            tegetd      r0, r1
+
+# CHECK:    tnegetd
+# BINARY:   010011 00000 00000 00001 0 10101 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0xa0]
+            tnegetd     r0, r1
+
+# CHECK:    tecgetd
+# BINARY:   010011 00000 00000 00001 0 01101 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0xa0]
+            tecgetd     r0, r1
+
+# CHECK:    tnecgetd
+# BINARY:   010011 00000 00000 00001 0 11101 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0xa0]
+            tnecgetd    r0, r1
+
+# CHECK:    eagetd
+# BINARY:   010011 00000 00000 00001 0 00011 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x60]
+            eagetd      r0, r1
+
+# CHECK:    neagetd
+# BINARY:   010011 00000 00000 00001 0 10011 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x60]
+            neagetd     r0, r1
+
+# CHECK:    ecagetd
+# BINARY:   010011 00000 00000 00001 0 01011 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x60]
+            ecagetd     r0, r1
+
+# CHECK:    necagetd
+# BINARY:   010011 00000 00000 00001 0 11011 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x60]
+            necagetd    r0, r1
+
+# CHECK:    teagetd
+# BINARY:   010011 00000 00000 00001 0 00111 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0xe0]
+            teagetd     r0, r1
+
+# CHECK:    tneagetd
+# BINARY:   010011 00000 00000 00001 0 10111 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0xe0]
+            tneagetd    r0, r1
+
+# CHECK:    tecagetd
+# BINARY:   010011 00000 00000 00001 0 01111 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0xe0]
+            tecagetd    r0, r1
+
+# CHECK:    tnecagetd
+# BINARY:   010011 00000 00000 00001 0 11111 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0xe0]
+            tnecagetd   r0, r1
+
+# CHECK:    put
+# BINARY:   011011 00000 00000 1 0000 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x80,0x00]
+            put         r0, rfsl0
+
+# CHECK:    aput
+# BINARY:   011011 00000 00000 1 0001 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x88,0x00]
+            aput        r0, rfsl0
+
+# CHECK:    cput
+# BINARY:   011011 00000 00000 1 0100 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xa0,0x00]
+            cput        r0, rfsl0
+
+# CHECK:    caput
+# BINARY:   011011 00000 00000 1 0101 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xa8,0x00]
+            caput       r0, rfsl0
+
+# CHECK:    nput
+# BINARY:   011011 00000 00000 1 1000 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xc0,0x00]
+            nput        r0, rfsl0
+
+# CHECK:    naput
+# BINARY:   011011 00000 00000 1 1001 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xc8,0x00]
+            naput       r0, rfsl0
+
+# CHECK:    ncput
+# BINARY:   011011 00000 00000 1 1100 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xe0,0x00]
+            ncput       r0, rfsl0
+
+# CHECK:    ncaput
+# BINARY:   011011 00000 00000 1 1101 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xe8,0x00]
+            ncaput      r0, rfsl0
+
+# CHECK:    tput
+# BINARY:   011011 00000 00000 1 0010 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x90,0x00]
+            tput        rfsl0
+
+# CHECK:    taput
+# BINARY:   011011 00000 00000 1 0011 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x98,0x00]
+            taput       rfsl0
+
+# CHECK:    tcput
+# BINARY:   011011 00000 00000 1 0110 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xb0,0x00]
+            tcput       rfsl0
+
+# CHECK:    tcaput
+# BINARY:   011011 00000 00000 1 0111 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xb8,0x00]
+            tcaput      rfsl0
+
+# CHECK:    tnput
+# BINARY:   011011 00000 00000 1 1010 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xd0,0x00]
+            tnput       rfsl0
+
+# CHECK:    tnaput
+# BINARY:   011011 00000 00000 1 1011 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xd8,0x00]
+            tnaput      rfsl0
+
+# CHECK:    tncput
+# BINARY:   011011 00000 00000 1 1110 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xf0,0x00]
+            tncput      rfsl0
+
+# CHECK:    tncaput
+# BINARY:   011011 00000 00000 1 1111 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xf8,0x00]
+            tncaput     rfsl0
+
+# CHECK:    putd
+# BINARY:   010011 00000 00000 00001 1 0000 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0x00]
+            putd        r0, r1
+
+# CHECK:    aputd
+# BINARY:   010011 00000 00000 00001 1 0001 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0x40]
+            aputd       r0, r1
+
+# CHECK:    cputd
+# BINARY:   010011 00000 00000 00001 1 0100 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0x00]
+            cputd       r0, r1
+
+# CHECK:    caputd
+# BINARY:   010011 00000 00000 00001 1 0101 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0x40]
+            caputd      r0, r1
+
+# CHECK:    nputd
+# BINARY:   010011 00000 00000 00001 1 1000 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0x00]
+            nputd       r0, r1
+
+# CHECK:    naputd
+# BINARY:   010011 00000 00000 00001 1 1001 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0x40]
+            naputd      r0, r1
+
+# CHECK:    ncputd
+# BINARY:   010011 00000 00000 00001 1 1100 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0x00]
+            ncputd      r0, r1
+
+# CHECK:    ncaputd
+# BINARY:   010011 00000 00000 00001 1 1101 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0x40]
+            ncaputd     r0, r1
+
+# CHECK:    tputd
+# BINARY:   010011 00000 00000 00001 1 0010 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0x80]
+            tputd       r1
+
+# CHECK:    taputd
+# BINARY:   010011 00000 00000 00001 1 0011 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0xc0]
+            taputd      r1
+
+# CHECK:    tcputd
+# BINARY:   010011 00000 00000 00001 1 0110 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0x80]
+            tcputd      r1
+
+# CHECK:    tcaputd
+# BINARY:   010011 00000 00000 00001 1 0111 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0xc0]
+            tcaputd     r1
+
+# CHECK:    tnputd
+# BINARY:   010011 00000 00000 00001 1 1010 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0x80]
+            tnputd      r1
+
+# CHECK:    tnaputd
+# BINARY:   010011 00000 00000 00001 1 1011 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0xc0]
+            tnaputd     r1
+
+# CHECK:    tncputd
+# BINARY:   010011 00000 00000 00001 1 1110 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0x80]
+            tncputd     r1
+
+# CHECK:    tncaputd
+# BINARY:   010011 00000 00000 00001 1 1111 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0xc0]
+            tncaputd    r1
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0001
+# CHECK:    encoding: [0x6c,0x00,0x00,0x01]
+            get     r0, rfsl1
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0010
+# CHECK:    encoding: [0x6c,0x00,0x00,0x02]
+            get     r0, rfsl2
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0011
+# CHECK:    encoding: [0x6c,0x00,0x00,0x03]
+            get     r0, rfsl3
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0100
+# CHECK:    encoding: [0x6c,0x00,0x00,0x04]
+            get     r0, rfsl4
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0101
+# CHECK:    encoding: [0x6c,0x00,0x00,0x05]
+            get     r0, rfsl5
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0110
+# CHECK:    encoding: [0x6c,0x00,0x00,0x06]
+            get     r0, rfsl6
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0111
+# CHECK:    encoding: [0x6c,0x00,0x00,0x07]
+            get     r0, rfsl7
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1000
+# CHECK:    encoding: [0x6c,0x00,0x00,0x08]
+            get     r0, rfsl8
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1001
+# CHECK:    encoding: [0x6c,0x00,0x00,0x09]
+            get     r0, rfsl9
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1010
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0a]
+            get     r0, rfsl10
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1011
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0b]
+            get     r0, rfsl11
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1100
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0c]
+            get     r0, rfsl12
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1101
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0d]
+            get     r0, rfsl13
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1110
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0e]
+            get     r0, rfsl14
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1111
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0f]
+            get     r0, rfsl15
diff --git a/test/MC/MBlaze/mblaze_imm.s b/test/MC/MBlaze/mblaze_imm.s
new file mode 100644
index 000000000000..08b8a0f26818
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_imm.s
@@ -0,0 +1,194 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# In the microblaze instruction set, any TYPE-B instruction with a
+# signed immediate value requiring more than 16-bits must be prefixed
+# with an IMM instruction that contains the high 16-bits. The higher
+# 16-bits are then combined with the lower 16-bits in the original
+# instruction to form a 32-bit immediate value.
+#
+# The generation of IMM instructions is handled automatically by the
+# code emitter. Test to ensure that IMM instructions are generated
+# when they are suppose to and are not generated when they are not
+# needed.
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00000000
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000001
+# CHECK:    encoding: [0x20,0x00,0x00,0x01]
+            addi    r0, r0, 0x00000001
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000010
+# CHECK:    encoding: [0x20,0x00,0x00,0x02]
+            addi    r0, r0, 0x00000002
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000100
+# CHECK:    encoding: [0x20,0x00,0x00,0x04]
+            addi    r0, r0, 0x00000004
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000001000
+# CHECK:    encoding: [0x20,0x00,0x00,0x08]
+            addi    r0, r0, 0x00000008
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000010000
+# CHECK:    encoding: [0x20,0x00,0x00,0x10]
+            addi    r0, r0, 0x00000010
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000100000
+# CHECK:    encoding: [0x20,0x00,0x00,0x20]
+            addi    r0, r0, 0x00000020
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000001000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x40]
+            addi    r0, r0, 0x00000040
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000010000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x80]
+            addi    r0, r0, 0x00000080
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000100000000
+# CHECK:    encoding: [0x20,0x00,0x01,0x00]
+            addi    r0, r0, 0x00000100
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000001000000000
+# CHECK:    encoding: [0x20,0x00,0x02,0x00]
+            addi    r0, r0, 0x00000200
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000010000000000
+# CHECK:    encoding: [0x20,0x00,0x04,0x00]
+            addi    r0, r0, 0x00000400
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000100000000000
+# CHECK:    encoding: [0x20,0x00,0x08,0x00]
+            addi    r0, r0, 0x00000800
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0001000000000000
+# CHECK:    encoding: [0x20,0x00,0x10,0x00]
+            addi    r0, r0, 0x00001000
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0010000000000000
+# CHECK:    encoding: [0x20,0x00,0x20,0x00]
+            addi    r0, r0, 0x00002000
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0100000000000000
+# CHECK:    encoding: [0x20,0x00,0x40,0x00]
+            addi    r0, r0, 0x00004000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000000
+# BINARY:   001000 00000 00000 1000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x00,0x20,0x00,0x80,0x00]
+            addi    r0, r0, 0x00008000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000001
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x01,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00010000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000010
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x02,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00020000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000100
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x04,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00040000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000001000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x08,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00080000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000010000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x10,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00100000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000100000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x20,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00200000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000001000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x40,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00400000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000010000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x80,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00800000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000100000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x01,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x01000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000001000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x02,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x02000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000010000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x04,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x04000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000100000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x08,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x08000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0001000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x10,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x10000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0010000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x20,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x20000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0100000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x40,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x40000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 1000000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x80,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x80000000
diff --git a/test/MC/MBlaze/mblaze_memory.s b/test/MC/MBlaze/mblaze_memory.s
new file mode 100644
index 000000000000..fe744753ee4d
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_memory.s
@@ -0,0 +1,107 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    lbu
+# BINARY:   110000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xc0,0x22,0x18,0x00]
+            lbu     r1, r2, r3
+
+# CHECK:    lbur
+# BINARY:   110000 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xc0,0x22,0x1a,0x00]
+            lbur    r1, r2, r3
+
+# CHECK:    lbui
+# BINARY:   111000 00001 00010 0000000000011100
+# CHECK:    encoding: [0xe0,0x22,0x00,0x1c]
+            lbui    r1, r2, 28
+
+# CHECK:    lhu
+# BINARY:   110001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xc4,0x22,0x18,0x00]
+            lhu     r1, r2, r3
+
+# CHECK:    lhur
+# BINARY:   110001 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xc4,0x22,0x1a,0x00]
+            lhur    r1, r2, r3
+
+# CHECK:    lhui
+# BINARY:   111001 00001 00010 0000000000011100
+# CHECK:    encoding: [0xe4,0x22,0x00,0x1c]
+            lhui    r1, r2, 28
+
+# CHECK:    lw
+# BINARY:   110010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xc8,0x22,0x18,0x00]
+            lw      r1, r2, r3
+
+# CHECK:    lwr
+# BINARY:   110010 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xc8,0x22,0x1a,0x00]
+            lwr    r1, r2, r3
+
+# CHECK:    lwi
+# BINARY:   111010 00001 00010 0000000000011100
+# CHECK:    encoding: [0xe8,0x22,0x00,0x1c]
+            lwi     r1, r2, 28
+
+# CHECK:    lwx
+# BINARY:   110010 00001 00010 00011 10000000000
+# CHECK:    encoding: [0xc8,0x22,0x1c,0x00]
+            lwx      r1, r2, r3
+
+# CHECK:    sb
+# BINARY:   110100 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xd0,0x22,0x18,0x00]
+            sb      r1, r2, r3
+
+# CHECK:    sbr
+# BINARY:   110100 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xd0,0x22,0x1a,0x00]
+            sbr     r1, r2, r3
+
+# CHECK:    sbi
+# BINARY:   111100 00001 00010 0000000000011100
+# CHECK:    encoding: [0xf0,0x22,0x00,0x1c]
+            sbi     r1, r2, 28
+
+# CHECK:    sh
+# BINARY:   110101 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xd4,0x22,0x18,0x00]
+            sh      r1, r2, r3
+
+# CHECK:    shr
+# BINARY:   110101 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xd4,0x22,0x1a,0x00]
+            shr     r1, r2, r3
+
+# CHECK:    shi
+# BINARY:   111101 00001 00010 0000000000011100
+# CHECK:    encoding: [0xf4,0x22,0x00,0x1c]
+            shi     r1, r2, 28
+
+# CHECK:    sw
+# BINARY:   110110 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xd8,0x22,0x18,0x00]
+            sw      r1, r2, r3
+
+# CHECK:    swr
+# BINARY:   110110 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xd8,0x22,0x1a,0x00]
+            swr    r1, r2, r3
+
+# CHECK:    swi
+# BINARY:   111110 00001 00010 0000000000011100
+# CHECK:    encoding: [0xf8,0x22,0x00,0x1c]
+            swi     r1, r2, 28
+
+# CHECK:    swx
+# BINARY:   110110 00001 00010 00011 10000000000
+# CHECK:    encoding: [0xd8,0x22,0x1c,0x00]
+            swx      r1, r2, r3
diff --git a/test/MC/MBlaze/mblaze_operands.s b/test/MC/MBlaze/mblaze_operands.s
new file mode 100644
index 000000000000..d5f1d8059f37
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_operands.s
@@ -0,0 +1,328 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all register and immediate operands can be parsed by
+# the assembly parser correctly. Testing the parsing of FSL immediate
+# values is done in a different test.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    add
+# BINARY:   000000 00000 00000 00000 00000000000
+# CHECK:    encoding: [0x00,0x00,0x00,0x00]
+            add     r0, r0, r0
+
+# CHECK:    add
+# BINARY:   000000 00001 00001 00001 00000000000
+# CHECK:    encoding: [0x00,0x21,0x08,0x00]
+            add     r1, r1, r1
+
+# CHECK:    add
+# BINARY:   000000 00010 00010 00010 00000000000
+# CHECK:    encoding: [0x00,0x42,0x10,0x00]
+            add     r2, r2, r2
+
+# CHECK:    add
+# BINARY:   000000 00011 00011 00011 00000000000
+# CHECK:    encoding: [0x00,0x63,0x18,0x00]
+            add     r3, r3, r3
+
+# CHECK:    add
+# BINARY:   000000 00100 00100 00100 00000000000
+# CHECK:    encoding: [0x00,0x84,0x20,0x00]
+            add     r4, r4, r4
+
+# CHECK:    add
+# BINARY:   000000 00101 00101 00101 00000000000
+# CHECK:    encoding: [0x00,0xa5,0x28,0x00]
+            add     r5, r5, r5
+
+# CHECK:    add
+# BINARY:   000000 00110 00110 00110 00000000000
+# CHECK:    encoding: [0x00,0xc6,0x30,0x00]
+            add     r6, r6, r6
+
+# CHECK:    add
+# BINARY:   000000 00111 00111 00111 00000000000
+# CHECK:    encoding: [0x00,0xe7,0x38,0x00]
+            add     r7, r7, r7
+
+# CHECK:    add
+# BINARY:   000000 01000 01000 01000 00000000000
+# CHECK:    encoding: [0x01,0x08,0x40,0x00]
+            add     r8, r8, r8
+
+# CHECK:    add
+# BINARY:   000000 01001 01001 01001 00000000000
+# CHECK:    encoding: [0x01,0x29,0x48,0x00]
+            add     r9, r9, r9
+
+# CHECK:    add
+# BINARY:   000000 01010 01010 01010 00000000000
+# CHECK:    encoding: [0x01,0x4a,0x50,0x00]
+            add     r10, r10, r10
+
+# CHECK:    add
+# BINARY:   000000 01011 01011 01011 00000000000
+# CHECK:    encoding: [0x01,0x6b,0x58,0x00]
+            add     r11, r11, r11
+
+# CHECK:    add
+# BINARY:   000000 01100 01100 01100 00000000000
+# CHECK:    encoding: [0x01,0x8c,0x60,0x00]
+            add     r12, r12, r12
+
+# CHECK:    add
+# BINARY:   000000 01101 01101 01101 00000000000
+# CHECK:    encoding: [0x01,0xad,0x68,0x00]
+            add     r13, r13, r13
+
+# CHECK:    add
+# BINARY:   000000 01110 01110 01110 00000000000
+# CHECK:    encoding: [0x01,0xce,0x70,0x00]
+            add     r14, r14, r14
+
+# CHECK:    add
+# BINARY:   000000 01111 01111 01111 00000000000
+# CHECK:    encoding: [0x01,0xef,0x78,0x00]
+            add     r15, r15, r15
+
+# CHECK:    add
+# BINARY:   000000 10000 10000 10000 00000000000
+# CHECK:    encoding: [0x02,0x10,0x80,0x00]
+            add     r16, r16, r16
+
+# CHECK:    add
+# BINARY:   000000 10001 10001 10001 00000000000
+# CHECK:    encoding: [0x02,0x31,0x88,0x00]
+            add     r17, r17, r17
+
+# CHECK:    add
+# BINARY:   000000 10010 10010 10010 00000000000
+# CHECK:    encoding: [0x02,0x52,0x90,0x00]
+            add     r18, r18, r18
+
+# CHECK:    add
+# BINARY:   000000 10011 10011 10011 00000000000
+# CHECK:    encoding: [0x02,0x73,0x98,0x00]
+            add     r19, r19, r19
+
+# CHECK:    add
+# BINARY:   000000 10100 10100 10100 00000000000
+# CHECK:    encoding: [0x02,0x94,0xa0,0x00]
+            add     r20, r20, r20
+
+# CHECK:    add
+# BINARY:   000000 10101 10101 10101 00000000000
+# CHECK:    encoding: [0x02,0xb5,0xa8,0x00]
+            add     r21, r21, r21
+
+# CHECK:    add
+# BINARY:   000000 10110 10110 10110 00000000000
+# CHECK:    encoding: [0x02,0xd6,0xb0,0x00]
+            add     r22, r22, r22
+
+# CHECK:    add
+# BINARY:   000000 10111 10111 10111 00000000000
+# CHECK:    encoding: [0x02,0xf7,0xb8,0x00]
+            add     r23, r23, r23
+
+# CHECK:    add
+# BINARY:   000000 11000 11000 11000 00000000000
+# CHECK:    encoding: [0x03,0x18,0xc0,0x00]
+            add     r24, r24, r24
+
+# CHECK:    add
+# BINARY:   000000 11001 11001 11001 00000000000
+# CHECK:    encoding: [0x03,0x39,0xc8,0x00]
+            add     r25, r25, r25
+
+# CHECK:    add
+# BINARY:   000000 11010 11010 11010 00000000000
+# CHECK:    encoding: [0x03,0x5a,0xd0,0x00]
+            add     r26, r26, r26
+
+# CHECK:    add
+# BINARY:   000000 11011 11011 11011 00000000000
+# CHECK:    encoding: [0x03,0x7b,0xd8,0x00]
+            add     r27, r27, r27
+
+# CHECK:    add
+# BINARY:   000000 11100 11100 11100 00000000000
+# CHECK:    encoding: [0x03,0x9c,0xe0,0x00]
+            add     r28, r28, r28
+
+# CHECK:    add
+# BINARY:   000000 11101 11101 11101 00000000000
+# CHECK:    encoding: [0x03,0xbd,0xe8,0x00]
+            add     r29, r29, r29
+
+# CHECK:    add
+# BINARY:   000000 11110 11110 11110 00000000000
+# CHECK:    encoding: [0x03,0xde,0xf0,0x00]
+            add     r30, r30, r30
+
+# CHECK:    add
+# BINARY:   000000 11111 11111 11111 00000000000
+# CHECK:    encoding: [0x03,0xff,0xf8,0x00]
+            add     r31, r31, r31
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000001
+# CHECK:    encoding: [0x20,0x00,0x00,0x01]
+            addi    r0, r0, 1
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000010
+# CHECK:    encoding: [0x20,0x00,0x00,0x02]
+            addi    r0, r0, 2
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000100
+# CHECK:    encoding: [0x20,0x00,0x00,0x04]
+            addi    r0, r0, 4
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000001000
+# CHECK:    encoding: [0x20,0x00,0x00,0x08]
+            addi    r0, r0, 8
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000010000
+# CHECK:    encoding: [0x20,0x00,0x00,0x10]
+            addi    r0, r0, 16
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000100000
+# CHECK:    encoding: [0x20,0x00,0x00,0x20]
+            addi    r0, r0, 32
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000001000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x40]
+            addi    r0, r0, 64
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000010000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x80]
+            addi    r0, r0, 128
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000100000000
+# CHECK:    encoding: [0x20,0x00,0x01,0x00]
+            addi    r0, r0, 256
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000001000000000
+# CHECK:    encoding: [0x20,0x00,0x02,0x00]
+            addi    r0, r0, 512
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000010000000000
+# CHECK:    encoding: [0x20,0x00,0x04,0x00]
+            addi    r0, r0, 1024
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000100000000000
+# CHECK:    encoding: [0x20,0x00,0x08,0x00]
+            addi    r0, r0, 2048
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0001000000000000
+# CHECK:    encoding: [0x20,0x00,0x10,0x00]
+            addi    r0, r0, 4096
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0010000000000000
+# CHECK:    encoding: [0x20,0x00,0x20,0x00]
+            addi    r0, r0, 8192
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0100000000000000
+# CHECK:    encoding: [0x20,0x00,0x40,0x00]
+            addi    r0, r0, 16384
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111111
+# CHECK:    encoding: [0x20,0x00,0xff,0xff]
+            addi    r0, r0, -1
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111110
+# CHECK:    encoding: [0x20,0x00,0xff,0xfe]
+            addi    r0, r0, -2
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111100
+# CHECK:    encoding: [0x20,0x00,0xff,0xfc]
+            addi    r0, r0, -4
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111000
+# CHECK:    encoding: [0x20,0x00,0xff,0xf8]
+            addi    r0, r0, -8
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111110000
+# CHECK:    encoding: [0x20,0x00,0xff,0xf0]
+            addi    r0, r0, -16
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111100000
+# CHECK:    encoding: [0x20,0x00,0xff,0xe0]
+            addi    r0, r0, -32
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111000000
+# CHECK:    encoding: [0x20,0x00,0xff,0xc0]
+            addi    r0, r0, -64
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111110000000
+# CHECK:    encoding: [0x20,0x00,0xff,0x80]
+            addi    r0, r0, -128
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111100000000
+# CHECK:    encoding: [0x20,0x00,0xff,0x00]
+            addi    r0, r0, -256
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111000000000
+# CHECK:    encoding: [0x20,0x00,0xfe,0x00]
+            addi    r0, r0, -512
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111110000000000
+# CHECK:    encoding: [0x20,0x00,0xfc,0x00]
+            addi    r0, r0, -1024
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111100000000000
+# CHECK:    encoding: [0x20,0x00,0xf8,0x00]
+            addi    r0, r0, -2048
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111000000000000
+# CHECK:    encoding: [0x20,0x00,0xf0,0x00]
+            addi    r0, r0, -4096
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1110000000000000
+# CHECK:    encoding: [0x20,0x00,0xe0,0x00]
+            addi    r0, r0, -8192
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1100000000000000
+# CHECK:    encoding: [0x20,0x00,0xc0,0x00]
+            addi    r0, r0, -16384
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1000000000000000
+# CHECK:    encoding: [0x20,0x00,0x80,0x00]
+            addi    r0, r0, -32768
diff --git a/test/MC/MBlaze/mblaze_pattern.s b/test/MC/MBlaze/mblaze_pattern.s
new file mode 100644
index 000000000000..6bbc234e3d43
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_pattern.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all FPU instructions can be parsed by the
+# assembly parser correctly.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   011011 00000 00000 00000 00000000000
+
+# CHECK:    pcmpbf
+# BINARY:   100000 00000 00001 00010 10000000000
+# CHECK:    encoding: [0x80,0x01,0x14,0x00]
+            pcmpbf      r0, r1, r2
+
+# CHECK:    pcmpne
+# BINARY:   100011 00000 00001 00010 10000000000
+# CHECK:    encoding: [0x8c,0x01,0x14,0x00]
+            pcmpne      r0, r1, r2
+
+# CHECK:    pcmpeq
+# BINARY:   100010 00000 00001 00010 10000000000
+# CHECK:    encoding: [0x88,0x01,0x14,0x00]
+            pcmpeq      r0, r1, r2
diff --git a/test/MC/MBlaze/mblaze_shift.s b/test/MC/MBlaze/mblaze_shift.s
new file mode 100644
index 000000000000..a25502b35094
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_shift.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    bsrl
+# BINARY:   010001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x44,0x22,0x18,0x00]
+            bsrl    r1, r2, r3
+
+# CHECK:    bsra
+# BINARY:   010001 00001 00010 00011 01000000000
+# CHECK:    encoding: [0x44,0x22,0x1a,0x00]
+            bsra    r1, r2, r3
+
+# CHECK:    bsll
+# BINARY:   010001 00001 00010 00011 10000000000
+# CHECK:    encoding: [0x44,0x22,0x1c,0x00]
+            bsll    r1, r2, r3
+
+# CHECK:    bsrli
+# BINARY:   011001 00001 00010 0000000000000000
+# CHECK:    encoding: [0x64,0x22,0x00,0x00]
+            bsrli   r1, r2, 0
+
+# CHECK:    bsrai
+# BINARY:   011001 00001 00010 0000001000000000
+# CHECK:    encoding: [0x64,0x22,0x02,0x00]
+            bsrai   r1, r2, 0
+
+# CHECK:    bslli
+# BINARY:   011001 00001 00010 0000010000000000
+# CHECK:    encoding: [0x64,0x22,0x04,0x00]
+            bslli   r1, r2, 0
+
+# CHECK:    sra
+# BINARY:   100100 00001 00010 00000 00000000001
+# CHECK:    encoding: [0x90,0x22,0x00,0x01]
+            sra     r1, r2
+
+# CHECK:    srl
+# BINARY:   100100 00001 00010 00000 00001000001
+# CHECK:    encoding: [0x90,0x22,0x00,0x41]
+            srl     r1, r2
diff --git a/test/MC/MBlaze/mblaze_special.s b/test/MC/MBlaze/mblaze_special.s
new file mode 100644
index 000000000000..c55ec277c18a
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_special.s
@@ -0,0 +1,167 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all special instructions and special registers can be
+# parsed by the assembly parser correctly.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   011011 00000 00000 00000 00000000000
+
+# CHECK:    mfs
+# BINARY:   100101 00000 00000 10000 00000000000
+# CHECK:    encoding: [0x94,0x00,0x80,0x00]
+            mfs         r0, rpc
+
+# CHECK:    msrclr
+# BINARY:   100101 00000 100010 000000000000000
+# CHECK:    encoding: [0x94,0x11,0x00,0x00]
+            msrclr      r0, 0x0
+
+# CHECK:    msrset
+# BINARY:   100101 00000 100000 000000000000000
+# CHECK:    encoding: [0x94,0x10,0x00,0x00]
+            msrset      r0, 0x0
+
+# CHECK:    mts
+# BINARY:   100101 00000 00000 11 00000000000000
+# CHECK:    encoding: [0x94,0x00,0xc0,0x00]
+            mts         rpc, r0
+
+# CHECK:    wdc
+# BINARY:   100100 00000 00000 00001 00001100100
+# CHECK:    encoding: [0x90,0x00,0x08,0x64]
+            wdc         r0, r1
+
+# CHECK:    wdc.clear
+# BINARY:   100100 00000 00000 00001 00001100110
+# CHECK:    encoding: [0x90,0x00,0x08,0x66]
+            wdc.clear   r0, r1
+
+# CHECK:    wdc.flush
+# BINARY:   100100 00000 00000 00001 00001110100
+# CHECK:    encoding: [0x90,0x00,0x08,0x74]
+            wdc.flush   r0, r1
+
+# CHECK:    wic
+# BINARY:   100100 00000 00000 00001 00001101000
+# CHECK:    encoding: [0x90,0x00,0x08,0x68]
+            wic         r0, r1
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000000
+# CHECK:    encoding: [0x94,0x20,0x80,0x00]
+            mfs         r1, rpc
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000001
+# CHECK:    encoding: [0x94,0x20,0x80,0x01]
+            mfs         r1, rmsr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000011
+# CHECK:    encoding: [0x94,0x20,0x80,0x03]
+            mfs         r1, rear
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000101
+# CHECK:    encoding: [0x94,0x20,0x80,0x05]
+            mfs         r1, resr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000111
+# CHECK:    encoding: [0x94,0x20,0x80,0x07]
+            mfs         r1, rfsr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000001011
+# CHECK:    encoding: [0x94,0x20,0x80,0x0b]
+            mfs         r1, rbtr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000001101
+# CHECK:    encoding: [0x94,0x20,0x80,0x0d]
+            mfs         r1, redr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000000
+# CHECK:    encoding: [0x94,0x20,0x90,0x00]
+            mfs         r1, rpid
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000001
+# CHECK:    encoding: [0x94,0x20,0x90,0x01]
+            mfs         r1, rzpr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000010
+# CHECK:    encoding: [0x94,0x20,0x90,0x02]
+            mfs         r1, rtlbx
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000100
+# CHECK:    encoding: [0x94,0x20,0x90,0x04]
+            mfs         r1, rtlbhi
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000011
+# CHECK:    encoding: [0x94,0x20,0x90,0x03]
+            mfs         r1, rtlblo
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000000
+# CHECK:    encoding: [0x94,0x20,0xa0,0x00]
+            mfs         r1, rpvr0
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000001
+# CHECK:    encoding: [0x94,0x20,0xa0,0x01]
+            mfs         r1, rpvr1
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000010
+# CHECK:    encoding: [0x94,0x20,0xa0,0x02]
+            mfs         r1, rpvr2
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000011
+# CHECK:    encoding: [0x94,0x20,0xa0,0x03]
+            mfs         r1, rpvr3
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000100
+# CHECK:    encoding: [0x94,0x20,0xa0,0x04]
+            mfs         r1, rpvr4
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000101
+# CHECK:    encoding: [0x94,0x20,0xa0,0x05]
+            mfs         r1, rpvr5
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000110
+# CHECK:    encoding: [0x94,0x20,0xa0,0x06]
+            mfs         r1, rpvr6
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000111
+# CHECK:    encoding: [0x94,0x20,0xa0,0x07]
+            mfs         r1, rpvr7
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001000
+# CHECK:    encoding: [0x94,0x20,0xa0,0x08]
+            mfs         r1, rpvr8
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001001
+# CHECK:    encoding: [0x94,0x20,0xa0,0x09]
+            mfs         r1, rpvr9
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001010
+# CHECK:    encoding: [0x94,0x20,0xa0,0x0a]
+            mfs         r1, rpvr10
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001011
+# CHECK:    encoding: [0x94,0x20,0xa0,0x0b]
+            mfs         r1, rpvr11
diff --git a/test/MC/MBlaze/mblaze_typea.s b/test/MC/MBlaze/mblaze_typea.s
new file mode 100644
index 000000000000..a0735e482cbb
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_typea.s
@@ -0,0 +1,122 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    add
+# BINARY:   000000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x00,0x22,0x18,0x00]
+            add     r1, r2, r3
+
+# CHECK:    addc
+# BINARY:   000010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x08,0x22,0x18,0x00]
+            addc    r1, r2, r3
+
+# CHECK:    addk
+# BINARY:   000100 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x10,0x22,0x18,0x00]
+            addk    r1, r2, r3
+
+# CHECK:    addkc
+# BINARY:   000110 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x18,0x22,0x18,0x00]
+            addkc   r1, r2, r3
+
+# CHECK:    and
+# BINARY:   100001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x84,0x22,0x18,0x00]
+            and     r1, r2, r3
+
+# CHECK:    andn
+# BINARY:   100011 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x8c,0x22,0x18,0x00]
+            andn    r1, r2, r3
+
+# CHECK:    cmp
+# BINARY:   000101 00001 00010 00011 00000000001
+# CHECK:    encoding: [0x14,0x22,0x18,0x01]
+            cmp     r1, r2, r3
+
+# CHECK:    cmpu
+# BINARY:   000101 00001 00010 00011 00000000011
+# CHECK:    encoding: [0x14,0x22,0x18,0x03]
+            cmpu    r1, r2, r3
+
+# CHECK:    idiv
+# BINARY:   010010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x48,0x22,0x18,0x00]
+            idiv    r1, r2, r3
+
+# CHECK:    idivu
+# BINARY:   010010 00001 00010 00011 00000000010
+# CHECK:    encoding: [0x48,0x22,0x18,0x02]
+            idivu   r1, r2, r3
+
+# CHECK:    mul
+# BINARY:   010000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x40,0x22,0x18,0x00]
+            mul    r1, r2, r3
+
+# CHECK:    mulh
+# BINARY:   010000 00001 00010 00011 00000000001
+# CHECK:    encoding: [0x40,0x22,0x18,0x01]
+            mulh   r1, r2, r3
+
+# CHECK:    mulhu
+# BINARY:   010000 00001 00010 00011 00000000011
+# CHECK:    encoding: [0x40,0x22,0x18,0x03]
+            mulhu  r1, r2, r3
+
+# CHECK:    mulhsu
+# BINARY:   010000 00001 00010 00011 00000000010
+# CHECK:    encoding: [0x40,0x22,0x18,0x02]
+            mulhsu r1, r2, r3
+
+# CHECK:    or
+# BINARY:   100000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x80,0x22,0x18,0x00]
+            or      r1, r2, r3
+
+# CHECK:    rsub
+# BINARY:   000001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x04,0x22,0x18,0x00]
+            rsub    r1, r2, r3
+
+# CHECK:    rsubc
+# BINARY:   000011 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x0c,0x22,0x18,0x00]
+            rsubc   r1, r2, r3
+
+# CHECK:    rsubk
+# BINARY:   000101 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x14,0x22,0x18,0x00]
+            rsubk   r1, r2, r3
+
+# CHECK:    rsubkc
+# BINARY:   000111 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x1c,0x22,0x18,0x00]
+            rsubkc  r1, r2, r3
+
+# CHECK:    sext16
+# BINARY:   100100 00001 00010 00000 00001100001
+# CHECK:    encoding: [0x90,0x22,0x00,0x61]
+            sext16  r1, r2
+
+# CHECK:    sext8
+# BINARY:   100100 00001 00010 00000 00001100000
+# CHECK:    encoding: [0x90,0x22,0x00,0x60]
+            sext8   r1, r2
+
+# CHECK:    xor
+# BINARY:   100010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x88,0x22,0x18,0x00]
+            xor     r1, r2, r3
+
+# CHECK:    nop
+# BINARY:   100000 00000 00000 00000 00000000000
+# CHECK:    encoding: [0x80,0x00,0x00,0x00]
+        nop
diff --git a/test/MC/MBlaze/mblaze_typeb.s b/test/MC/MBlaze/mblaze_typeb.s
new file mode 100644
index 000000000000..ac4f1e2932a7
--- /dev/null
+++ b/test/MC/MBlaze/mblaze_typeb.s
@@ -0,0 +1,92 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-B instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE B:   OPCODE RD    RA    IMMEDIATE
+#           000000 00000 00000 0000000000000000
+
+# CHECK:    addi
+# BINARY:   001000 00001 00010 0000000000001111
+# CHECK:    encoding: [0x20,0x22,0x00,0x0f]
+            addi    r1, r2, 0x000F
+
+# CHECK:    addic
+# BINARY:   001010 00001 00010 0000000000001111
+# CHECK:    encoding: [0x28,0x22,0x00,0x0f]
+            addic   r1, r2, 0x000F
+
+# CHECK:    addik
+# BINARY:   001100 00001 00010 0000000000001111
+# CHECK:    encoding: [0x30,0x22,0x00,0x0f]
+            addik   r1, r2, 0x000F
+
+# CHECK:    addikc
+# BINARY:   001110 00001 00010 0000000000001111
+# CHECK:    encoding: [0x38,0x22,0x00,0x0f]
+            addikc  r1, r2, 0x000F
+
+# CHECK:    andi
+# BINARY:   101001 00001 00010 0000000000001111
+# CHECK:    encoding: [0xa4,0x22,0x00,0x0f]
+            andi    r1, r2, 0x000F
+
+# CHECK:    andni
+# BINARY:   101011 00001 00010 0000000000001111
+# CHECK:    encoding: [0xac,0x22,0x00,0x0f]
+            andni   r1, r2, 0x000F
+
+# CHECK:    muli
+# BINARY:   011000 00001 00010 0000000000001111
+# CHECK:    encoding: [0x60,0x22,0x00,0x0f]
+            muli    r1, r2, 0x000F
+
+# CHECK:    ori
+# BINARY:   101000 00001 00010 0000000000001111
+# CHECK:    encoding: [0xa0,0x22,0x00,0x0f]
+            ori     r1, r2, 0x000F
+
+# CHECK:    rsubi
+# BINARY:   001001 00001 00010 0000000000001111
+# CHECK:    encoding: [0x24,0x22,0x00,0x0f]
+            rsubi   r1, r2, 0x000F
+
+# CHECK:    rsubic
+# BINARY:   001011 00001 00010 0000000000001111
+# CHECK:    encoding: [0x2c,0x22,0x00,0x0f]
+            rsubic  r1, r2, 0x000F
+
+# CHECK:    rsubik
+# BINARY:   001101 00001 00010 0000000000001111
+# CHECK:    encoding: [0x34,0x22,0x00,0x0f]
+            rsubik  r1, r2, 0x000F
+
+# CHECK:    rsubikc
+# BINARY:   001111 00001 00010 0000000000001111
+# CHECK:    encoding: [0x3c,0x22,0x00,0x0f]
+            rsubikc r1, r2, 0x000F
+
+# CHECK:    rtbd
+# BINARY:   101101 10010 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x4f,0x00,0x0f]
+            rtbd r15, 0x000F
+
+# CHECK:    rted
+# BINARY:   101101 10001 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x8f,0x00,0x0f]
+            rted r15, 0x000F
+
+# CHECK:    rtid
+# BINARY:   101101 10001 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x2f,0x00,0x0f]
+            rtid r15, 0x000F
+
+# CHECK:    rtsd
+# BINARY:   101101 10000 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x0f,0x00,0x0f]
+            rtsd r15, 0x000F
+
+# CHECK:    xori
+# BINARY:   101010 00001 00010 0000000000001111
+# CHECK:    encoding: [0xa8,0x22,0x00,0x0f]
+            xori r1, r2, 0x000F
diff --git a/test/MC/MachO/absolutize.s b/test/MC/MachO/absolutize.s
index 76acd5bccfbe..39571dddebd6 100644
--- a/test/MC/MachO/absolutize.s
+++ b/test/MC/MachO/absolutize.s
@@ -50,7 +50,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -103,7 +103,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK:     (('word-0', 0xa1000000),
 // CHECK:      ('word-1', 0x2)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '1\xc01\xc01\xc01\xc0\xb8\xfe\xff\xff\xff\xb8\xfe\xff\xff\xff\xb8\x02\x00\x00\x00\xb8\x02\x00\x00\x00\xb8\x02\x00\x00\x00\xb8\x02\x00\x00\x00\xb8\xfe\xff\xff\xff')
+// CHECK:   ('_section_data', '31c031c0 31c031c0 b8feffff ffb8feff ffffb802 000000b8 02000000 b8020000 00b80200 0000b8fe ffffff')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -140,7 +140,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK:     (('word-0', 0xa1000000),
 // CHECK:      ('word-1', 0x2f)),
 // CHECK:   ])
-// CHECK:   ('_section_data', "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfc\xff\xff\xff\xfc\xff\xff\xff\x04\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00'\x00\x00\x00")
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 fcffffff fcffffff 04000000 04000000 04000000 04000000 27000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/comm-1.s b/test/MC/MachO/comm-1.s
index e979fb139204..5ffa979eb342 100644
--- a/test/MC/MachO/comm-1.s
+++ b/test/MC/MachO/comm-1.s
@@ -10,7 +10,7 @@
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 228)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/darwin-ARM-reloc.s b/test/MC/MachO/darwin-ARM-reloc.s
new file mode 100644
index 000000000000..86b45e07bf38
--- /dev/null
+++ b/test/MC/MachO/darwin-ARM-reloc.s
@@ -0,0 +1,171 @@
+@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+_f0:
+        bl _printf
+
+_f1:
+        bl _f0
+
+        .data
+_d0:
+Ld0_0:  
+        .long Lsc0_0 - Ld0_0
+        
+	.section	__TEXT,__cstring,cstring_literals
+Lsc0_0:
+        .long 0
+
+@ CHECK: ('cputype', 12)
+@ CHECK: ('cpusubtype', 9)
+@ CHECK: ('filetype', 1)
+@ CHECK: ('num_load_commands', 3)
+@ CHECK: ('load_commands_size', 364)
+@ CHECK: ('flag', 0)
+@ CHECK: ('load_commands', [
+@ CHECK:   # Load Command 0
+@ CHECK:  (('command', 1)
+@ CHECK:   ('size', 260)
+@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:   ('vm_addr', 0)
+@ CHECK:   ('vm_size', 16)
+@ CHECK:   ('file_offset', 392)
+@ CHECK:   ('file_size', 16)
+@ CHECK:   ('maxprot', 7)
+@ CHECK:   ('initprot', 7)
+@ CHECK:   ('num_sections', 3)
+@ CHECK:   ('flags', 0)
+@ CHECK:   ('sections', [
+@ CHECK:     # Section 0
+@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 0)
+@ CHECK:     ('size', 8)
+@ CHECK:     ('offset', 392)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 408)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x80000400)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0x4),
+@ CHECK:      ('word-1', 0x55000001)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0x0),
+@ CHECK:      ('word-1', 0x5d000003)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
+@ CHECK:     # Section 1
+@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 8)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 400)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 424)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x0)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0xa2000000),
+@ CHECK:      ('word-1', 0xc)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0xa1000000),
+@ CHECK:      ('word-1', 0x8)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '04000000')
+@ CHECK:     # Section 2
+@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 12)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 404)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 0)
+@ CHECK:     ('num_reloc', 0)
+@ CHECK:     ('flags', 0x2)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '00000000')
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 1
+@ CHECK:  (('command', 2)
+@ CHECK:   ('size', 24)
+@ CHECK:   ('symoff', 440)
+@ CHECK:   ('nsyms', 4)
+@ CHECK:   ('stroff', 488)
+@ CHECK:   ('strsize', 24)
+@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
+@ CHECK:   ('_symbols', [
+@ CHECK:     # Symbol 0
+@ CHECK:    (('n_strx', 9)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_f0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 1
+@ CHECK:    (('n_strx', 13)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 4)
+@ CHECK:     ('_string', '_f1')
+@ CHECK:    ),
+@ CHECK:     # Symbol 2
+@ CHECK:    (('n_strx', 17)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 2)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 8)
+@ CHECK:     ('_string', '_d0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 3
+@ CHECK:    (('n_strx', 1)
+@ CHECK:     ('n_type', 0x1)
+@ CHECK:     ('n_sect', 0)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_printf')
+@ CHECK:    ),
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 2
+@ CHECK:  (('command', 11)
+@ CHECK:   ('size', 80)
+@ CHECK:   ('ilocalsym', 0)
+@ CHECK:   ('nlocalsym', 3)
+@ CHECK:   ('iextdefsym', 3)
+@ CHECK:   ('nextdefsym', 0)
+@ CHECK:   ('iundefsym', 3)
+@ CHECK:   ('nundefsym', 1)
+@ CHECK:   ('tocoff', 0)
+@ CHECK:   ('ntoc', 0)
+@ CHECK:   ('modtaboff', 0)
+@ CHECK:   ('nmodtab', 0)
+@ CHECK:   ('extrefsymoff', 0)
+@ CHECK:   ('nextrefsyms', 0)
+@ CHECK:   ('indirectsymoff', 0)
+@ CHECK:   ('nindirectsyms', 0)
+@ CHECK:   ('extreloff', 0)
+@ CHECK:   ('nextrel', 0)
+@ CHECK:   ('locreloff', 0)
+@ CHECK:   ('nlocrel', 0)
+@ CHECK:   ('_indirect_symbols', [
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK: ])
diff --git a/test/MC/MachO/darwin-Thumb-reloc.s b/test/MC/MachO/darwin-Thumb-reloc.s
new file mode 100644
index 000000000000..567573d9ef19
--- /dev/null
+++ b/test/MC/MachO/darwin-Thumb-reloc.s
@@ -0,0 +1,139 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_main
+	.align	2
+	.code	16
+	.thumb_func	_main
+_main:
+LPC0_0:
+	blx	_printf
+	.align	2
+LCPI0_0:
+	.long	L_.str-(LPC0_0+4)
+
+	.section	__TEXT,__cstring,cstring_literals
+	.align	2
+L_.str:
+	.asciz	 "s0"
+
+.subsections_via_symbols
+
+@ CHECK: ('cputype', 12)
+@ CHECK: ('cpusubtype', 9)
+@ CHECK: ('filetype', 1)
+@ CHECK: ('num_load_commands', 3)
+@ CHECK: ('load_commands_size', 296)
+@ CHECK: ('flag', 8192)
+@ CHECK: ('load_commands', [
+@ CHECK:   # Load Command 0
+@ CHECK:  (('command', 1)
+@ CHECK:   ('size', 192)
+@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:   ('vm_addr', 0)
+@ CHECK:   ('vm_size', 11)
+@ CHECK:   ('file_offset', 324)
+@ CHECK:   ('file_size', 11)
+@ CHECK:   ('maxprot', 7)
+@ CHECK:   ('initprot', 7)
+@ CHECK:   ('num_sections', 2)
+@ CHECK:   ('flags', 0)
+@ CHECK:   ('sections', [
+@ CHECK:     # Section 0
+@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 0)
+@ CHECK:     ('size', 8)
+@ CHECK:     ('offset', 324)
+@ CHECK:     ('alignment', 2)
+@ CHECK:     ('reloc_offset', 336)
+@ CHECK:     ('num_reloc', 3)
+@ CHECK:     ('flags', 0x80000400)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0xa2000004),
+@ CHECK:      ('word-1', 0x8)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0xa1000000),
+@ CHECK:      ('word-1', 0x0)),
+@ CHECK:     # Relocation 2
+@ CHECK:     (('word-0', 0x0),
+@ CHECK:      ('word-1', 0x6d000001)),
+@ CHECK:   ])
+@ CHECK-FIXME:   ('_section_data', 'fff7feef 04000000')
+@ CHECK:     # Section 1
+@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 8)
+@ CHECK:     ('size', 3)
+@ CHECK:     ('offset', 332)
+@ CHECK:     ('alignment', 2)
+@ CHECK:     ('reloc_offset', 0)
+@ CHECK:     ('num_reloc', 0)
+@ CHECK:     ('flags', 0x2)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '733000')
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 1
+@ CHECK:  (('command', 2)
+@ CHECK:   ('size', 24)
+@ CHECK:   ('symoff', 360)
+@ CHECK:   ('nsyms', 2)
+@ CHECK:   ('stroff', 384)
+@ CHECK:   ('strsize', 16)
+@ CHECK:   ('_string_data', '\x00_main\x00_printf\x00\x00')
+@ CHECK:   ('_symbols', [
+@ CHECK:     # Symbol 0
+@ CHECK:    (('n_strx', 1)
+@ CHECK:     ('n_type', 0xf)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 8)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_main')
+@ CHECK:    ),
+@ CHECK:     # Symbol 1
+@ CHECK:    (('n_strx', 7)
+@ CHECK:     ('n_type', 0x1)
+@ CHECK:     ('n_sect', 0)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_printf')
+@ CHECK:    ),
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 2
+@ CHECK:  (('command', 11)
+@ CHECK:   ('size', 80)
+@ CHECK:   ('ilocalsym', 0)
+@ CHECK:   ('nlocalsym', 0)
+@ CHECK:   ('iextdefsym', 0)
+@ CHECK:   ('nextdefsym', 1)
+@ CHECK:   ('iundefsym', 1)
+@ CHECK:   ('nundefsym', 1)
+@ CHECK:   ('tocoff', 0)
+@ CHECK:   ('ntoc', 0)
+@ CHECK:   ('modtaboff', 0)
+@ CHECK:   ('nmodtab', 0)
+@ CHECK:   ('extrefsymoff', 0)
+@ CHECK:   ('nextrefsyms', 0)
+@ CHECK:   ('indirectsymoff', 0)
+@ CHECK:   ('nindirectsyms', 0)
+@ CHECK:   ('extreloff', 0)
+@ CHECK:   ('nextrel', 0)
+@ CHECK:   ('locreloff', 0)
+@ CHECK:   ('nlocrel', 0)
+@ CHECK:   ('_indirect_symbols', [
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK: ])
diff --git a/test/MC/MachO/darwin-complex-difference.s b/test/MC/MachO/darwin-complex-difference.s
new file mode 100644
index 000000000000..e66bd096711f
--- /dev/null
+++ b/test/MC/MachO/darwin-complex-difference.s
@@ -0,0 +1,129 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
+// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: FileCheck < %t.dump %s
+        
+_a:
+L0:     
+        .long 1
+L1:     
+        .long 2
+        .long _c - _d + 4
+        .long (_c - L0) - (_d - L1) // == (_c - _d) + (L1 - L0)
+                                    // == (_c - _d + 4)
+_c:
+        .long 0
+_d:
+        .long 0
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 256)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 152)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 24)
+// CHECK:   ('file_offset', 288)
+// CHECK:   ('file_size', 24)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 24)
+// CHECK:     ('offset', 288)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 312)
+// CHECK:     ('num_reloc', 4)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '01000000 02000000 04000000 04000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 344)
+// CHECK:   ('nsyms', 3)
+// CHECK:   ('stroff', 392)
+// CHECK:   ('strsize', 12)
+// CHECK:   ('_string_data', '\x00_a\x00_c\x00_d\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', '_c')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 20)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 3)
+// CHECK:   ('iextdefsym', 3)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 3)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
index 38fa074fde22..449d2f593e73 100644
--- a/test/MC/MachO/darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -120,7 +120,7 @@ L3:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 336)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
diff --git a/test/MC/MachO/darwin-x86_64-reloc-offsets.s b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
index ab6820e4a82a..f748064b2bf9 100644
--- a/test/MC/MachO/darwin-x86_64-reloc-offsets.s
+++ b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
@@ -117,7 +117,7 @@ L1:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 336)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -277,7 +277,7 @@ L1:
 // CHECK:     (('word-0', 0x2),
 // CHECK:      ('word-1', 0x6d000000)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xc6\x05\xff\xff\xff\xff\x12\xc6\x05\x00\x00\x00\x00\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\xc7\x05\xfd\xff\xff\xffxV4\x12\xc7\x05\xfe\xff\xff\xffxV4\x12\xc7\x05\xff\xff\xff\xffxV4\x12\xc7\x05\x00\x00\x00\x00xV4\x12\x88\x05\x00\x00\x00\x00\x88\x05\x01\x00\x00\x00\x89\x05\x00\x00\x00\x00\x89\x05\x01\x00\x00\x00\x89\x05\x02\x00\x00\x00\x89\x05\x03\x00\x00\x00\x89\x05\x04\x00\x00\x00\xc6\x05\xdd\x00\x00\x00\x12\xc6\x05\xd7\x00\x00\x00\x12\xc7\x05\xcc\x00\x00\x00xV4\x12\xc7\x05\xc3\x00\x00\x00xV4\x12\xc7\x05\xba\x00\x00\x00xV4\x12\xc7\x05\xb1\x00\x00\x00xV4\x12\xc7\x05\xa8\x00\x00\x00xV4\x12\x88\x05\x9e\x00\x00\x00\x88\x05\x99\x00\x00\x00\x89\x05\x92\x00\x00\x00\x89\x05\x8d\x00\x00\x00\x89\x05\x88\x00\x00\x00\x89\x05\x83\x00\x00\x00\x89\x05~\x00\x00\x00\xc6\x05\x03\x00\x00\x00\x12\xc6\x05\x04\x00\x00\x00\x12\xc7\x05\x00\x00\x00\x00xV4\x12\xc7\x05\x01\x00\x00\x00xV4\x12\xc7\x05\x02\x00\x00\x00xV4\x12\xc7\x05\x03\x00\x00\x00xV4\x12\xc7\x05\x04\x00\x00\x00xV4\x12\x88\x05\x04\x00\x00\x00\x88\x05\x05\x00\x00\x00\x89\x05\x04\x00\x00\x00\x89\x05\x05\x00\x00\x00\x89\x05\x06\x00\x00\x00\x89\x05\x07\x00\x00\x00\x89\x05\x08\x00\x00\x00')
+// CHECK:   ('_section_data', 'c605ffff ffff12c6 05000000 0012c705 fcffffff 78563412 c705fdff ffff7856 3412c705 feffffff 78563412 c705ffff ffff7856 3412c705 00000000 78563412 88050000 00008805 01000000 89050000 00008905 01000000 89050200 00008905 03000000 89050400 0000c605 dd000000 12c605d7 00000012 c705cc00 00007856 3412c705 c3000000 78563412 c705ba00 00007856 3412c705 b1000000 78563412 c705a800 00007856 34128805 9e000000 88059900 00008905 92000000 89058d00 00008905 88000000 89058300 00008905 7e000000 c6050300 000012c6 05040000 0012c705 00000000 78563412 c7050100 00007856 3412c705 02000000 78563412 c7050300 00007856 3412c705 04000000 78563412 88050400 00008805 05000000 89050400 00008905 05000000 89050600 00008905 07000000 89050800 0000')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -294,7 +294,7 @@ L1:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/darwin-x86_64-reloc.s b/test/MC/MachO/darwin-x86_64-reloc.s
index 19b434565a29..83c0de788f86 100644
--- a/test/MC/MachO/darwin-x86_64-reloc.s
+++ b/test/MC/MachO/darwin-x86_64-reloc.s
@@ -90,7 +90,7 @@ L6:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 496)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -139,7 +139,7 @@ L6:
 // CHECK:     (('word-0', 0x4),
 // CHECK:      ('word-1', 0x4d000008)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x1f\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00/\x01\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 04000000 04000000 00000000 1f010000 00000000 00000000 00000000 2f010000 00000000')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -252,7 +252,7 @@ L6:
 // CHECK:     (('word-0', 0x2),
 // CHECK:      ('word-1', 0x2d000000)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x05\x1f\xff\xff\xff\x8b\x05,\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x050\x00\x00\x00H\x83=\xff\xff\xff\xff\x00')
+// CHECK:   ('_section_data', 'c3e80000 0000e804 00000048 8b050000 0000ff35 00000000 8b050000 00008b05 04000000 c605ffff ffff12c7 05fcffff ff785634 12000000 00000000 00040000 00000000 00000000 00000000 00040000 00000000 00000000 00488d05 2c000000 488d0514 00000083 05130000 00066681 05120000 00f40181 05100000 00f40100 00909090 90909090 90909090 902c0000 00000000 00140000 00000000 00e4ffff ffffffff ffd4ffff ffffffff ff2c0000 00000000 0083c000 03042503 0000008b 051fffff ff8b052c 0000008b 05000000 008b0530 00000048 833dffff ffff00')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__debug_frame\x00\x00\x00')
 // CHECK:     ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -275,7 +275,7 @@ L6:
 // CHECK:     (('word-0', 0x0),
 // CHECK:      ('word-1', 0x6000002)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xd5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', 'd5000000 00000000 00000000 00000000')
 // CHECK:     # Section 3
 // CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -292,7 +292,7 @@ L6:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/diff-with-two-sections.s b/test/MC/MachO/diff-with-two-sections.s
new file mode 100644
index 000000000000..b5e09885f318
--- /dev/null
+++ b/test/MC/MachO/diff-with-two-sections.s
@@ -0,0 +1,64 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.section	__TEXT,__text,regular,pure_instructions
+Leh_func_begin0:
+	.section	__TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+Ltmp3:
+Ltmp4 = Leh_func_begin0-Ltmp3
+	.long	Ltmp4
+
+// CHECK:      ('cputype', 7)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 1)
+// CHECK-NEXT: ('load_commands_size', 192)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 1)
+// CHECK-NEXT:   ('size', 192)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 4)
+// CHECK-NEXT:   ('file_offset', 220)
+// CHECK-NEXT:   ('file_size', 4)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 2)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:     # Section 0
+// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 0)
+// CHECK-NEXT:     ('offset', 220)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x80000000)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '')
+// CHECK-NEXT:     # Section 1
+// CHECK-NEXT:    (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 4)
+// CHECK-NEXT:     ('offset', 220)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x6800000b)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '00000000')
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/test/MC/MachO/direction_labels.s b/test/MC/MachO/direction_labels.s
index 5a68f7f51dff..e224ed3a1473 100644
--- a/test/MC/MachO/direction_labels.s
+++ b/test/MC/MachO/direction_labels.s
@@ -14,7 +14,7 @@ direction_labels:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 228)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -46,7 +46,7 @@ direction_labels:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x90\xeb\xfd\x90u\x00\x90u\xfd\xeb\x00\x90\xc3')
+// CHECK:   ('_section_data', '90ebfd90 75009075 fdeb0090 c3')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/empty-dwarf-lines.s b/test/MC/MachO/empty-dwarf-lines.s
new file mode 100644
index 000000000000..4bdc16b55f5c
--- /dev/null
+++ b/test/MC/MachO/empty-dwarf-lines.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// This tests that when producing files for darwin9 or older we make sure
+// that debug_line sections are of a minimum size to avoid the linker bug
+// described in PR8715.
+
+        .section        __DATA,__data
+        .file   1 "test.c"
+        .globl  _c                      ## @c
+_c:
+        .asciz   "hi\n"
+
+// CHECK:      (('section_name', '__debug_line\x00\x00\x00\x00')
+// CHECK-NEXT:  ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:  ('address', 4)
+// CHECK-NEXT:  ('size', 44)
+// CHECK-NEXT:  ('offset', 452)
+// CHECK-NEXT:  ('alignment', 0)
+// CHECK-NEXT:  ('reloc_offset', 0)
+// CHECK-NEXT:  ('num_reloc', 0)
+// CHECK-NEXT:  ('flags', 0x2000000)
+// CHECK-NEXT:  ('reserved1', 0)
+// CHECK-NEXT:  ('reserved2', 0)
+// CHECK-NEXT:  ('reserved3', 0)
+// CHECK-NEXT: ),
diff --git a/test/MC/MachO/indirect-symbols.s b/test/MC/MachO/indirect-symbols.s
index 45a62f6bb2cd..90fd23154754 100644
--- a/test/MC/MachO/indirect-symbols.s
+++ b/test/MC/MachO/indirect-symbols.s
@@ -23,7 +23,7 @@ _e:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 364)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -71,7 +71,7 @@ _e:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4\xf4')
+// CHECK:   ('_section_data', 'f4f4f4f4 f4f4f4f4 f4f4f4f4 f4f4f4')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__pointers\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -87,7 +87,7 @@ _e:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/jcc.s b/test/MC/MachO/jcc.s
index 7640429a79de..2288a20fa273 100644
--- a/test/MC/MachO/jcc.s
+++ b/test/MC/MachO/jcc.s
@@ -10,7 +10,7 @@
 1: nop
    jc 1f
 1: nop
-   jcxz 1f
+   jecxz 1f
 1: nop
    jecxz 1f
 1: nop
@@ -100,7 +100,7 @@
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', 'w\x00\x90s\x00\x90r\x00\x90v\x00\x90r\x00\x90\xe3\x00\x90\xe3\x00\x90t\x00\x90\x7f\x00\x90}\x00\x90|\x00\x90~\x00\x90v\x00\x90r\x00\x90s\x00\x90w\x00\x90s\x00\x90u\x00\x90~\x00\x90|\x00\x90}\x00\x90\x7f\x00\x90q\x00\x90{\x00\x90y\x00\x90u\x00\x90p\x00\x90z\x00\x90z\x00\x90{\x00\x90x\x00\x90t\x00\x90')
+// CHECK:   ('_section_data', '77009073 00907200 90760090 720090e3 0090e300 90740090 7f00907d 00907c00 907e0090 76009072 00907300 90770090 73009075 00907e00 907c0090 7d00907f 00907100 907b0090 79009075 00907000 907a0090 7a00907b 00907800 90740090')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK: ])
diff --git a/test/MC/MachO/lcomm-attributes.s b/test/MC/MachO/lcomm-attributes.s
index 2685395e1ede..1e9592438fe0 100644
--- a/test/MC/MachO/lcomm-attributes.s
+++ b/test/MC/MachO/lcomm-attributes.s
@@ -17,7 +17,7 @@
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/loc.s b/test/MC/MachO/loc.s
new file mode 100644
index 000000000000..6e7faa3bf9aa
--- /dev/null
+++ b/test/MC/MachO/loc.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .file	1 "foo"
+	.loc	1 64 0
+        nop
+
+// CHECK:         # Section 1
+// CHECK-NEXT:   (('section_name', '__debug_line\x00\x00\x00\x00')
+// CHECK-NEXT:    ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('address', 1)
+// CHECK-NEXT:    ('size', 51)
+// CHECK-NEXT:    ('offset', 221)
+// CHECK-NEXT:    ('alignment', 0)
+// CHECK-NEXT:    ('reloc_offset', 272)
+// CHECK-NEXT:    ('num_reloc', 1)
+// CHECK-NEXT:    ('flags', 0x2000000)
+// CHECK-NEXT:    ('reserved1', 0)
+// CHECK-NEXT:    ('reserved2', 0)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('word-0', 0x27),
+// CHECK-NEXT:     ('word-1', 0x4000001)),
+// CHECK-NEXT:  ])
+// CHECK-NEXT:  ('_section_data', '2f000000 02001a00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f00 00000000 00050200 00000003 3f010201 000101')
diff --git a/test/MC/MachO/pcrel-to-other-section.s b/test/MC/MachO/pcrel-to-other-section.s
new file mode 100644
index 000000000000..22a7822d9576
--- /dev/null
+++ b/test/MC/MachO/pcrel-to-other-section.s
@@ -0,0 +1,107 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+nop
+	.section	__TEXT,__StaticInit,regular,pure_instructions
+	calll	foo
+
+// CHECK:      ('cputype', 7)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 3)
+// CHECK-NEXT: ('load_commands_size', 296)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 1)
+// CHECK-NEXT:   ('size', 192)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 6)
+// CHECK-NEXT:   ('file_offset', 324)
+// CHECK-NEXT:   ('file_size', 6)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 2)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:     # Section 0
+// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 1)
+// CHECK-NEXT:     ('offset', 324)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x80000400)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '90')
+// CHECK-NEXT:     # Section 1
+// CHECK-NEXT:    (('section_name', '__StaticInit\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 1)
+// CHECK-NEXT:     ('size', 5)
+// CHECK-NEXT:     ('offset', 325)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 332)
+// CHECK-NEXT:     ('num_reloc', 1)
+// CHECK-NEXT:     ('flags', 0x80000400)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:     # Relocation 0
+// CHECK-NEXT:     (('word-0', 0x1),
+// CHECK-NEXT:      ('word-1', 0xd000000)),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', 'e8faffff ff')
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 1
+// CHECK-NEXT:  (('command', 2)
+// CHECK-NEXT:   ('size', 24)
+// CHECK-NEXT:   ('symoff', 340)
+// CHECK-NEXT:   ('nsyms', 1)
+// CHECK-NEXT:   ('stroff', 352)
+// CHECK-NEXT:   ('strsize', 8)
+// CHECK-NEXT:   ('_string_data', '\x00foo\x00\x00\x00\x00')
+// CHECK-NEXT:   ('_symbols', [
+// CHECK-NEXT:     # Symbol 0
+// CHECK-NEXT:    (('n_strx', 1)
+// CHECK-NEXT:     ('n_type', 0x1)
+// CHECK-NEXT:     ('n_sect', 0)
+// CHECK-NEXT:     ('n_desc', 0)
+// CHECK-NEXT:     ('n_value', 0)
+// CHECK-NEXT:     ('_string', 'foo')
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 2
+// CHECK-NEXT:  (('command', 11)
+// CHECK-NEXT:   ('size', 80)
+// CHECK-NEXT:   ('ilocalsym', 0)
+// CHECK-NEXT:   ('nlocalsym', 0)
+// CHECK-NEXT:   ('iextdefsym', 0)
+// CHECK-NEXT:   ('nextdefsym', 0)
+// CHECK-NEXT:   ('iundefsym', 0)
+// CHECK-NEXT:   ('nundefsym', 1)
+// CHECK-NEXT:   ('tocoff', 0)
+// CHECK-NEXT:   ('ntoc', 0)
+// CHECK-NEXT:   ('modtaboff', 0)
+// CHECK-NEXT:   ('nmodtab', 0)
+// CHECK-NEXT:   ('extrefsymoff', 0)
+// CHECK-NEXT:   ('nextrefsyms', 0)
+// CHECK-NEXT:   ('indirectsymoff', 0)
+// CHECK-NEXT:   ('nindirectsyms', 0)
+// CHECK-NEXT:   ('extreloff', 0)
+// CHECK-NEXT:   ('nextrel', 0)
+// CHECK-NEXT:   ('locreloff', 0)
+// CHECK-NEXT:   ('nlocrel', 0)
+// CHECK-NEXT:   ('_indirect_symbols', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/test/MC/MachO/relax-jumps.s b/test/MC/MachO/relax-jumps.s
index 9c58aa768258..65a51e92b37c 100644
--- a/test/MC/MachO/relax-jumps.s
+++ b/test/MC/MachO/relax-jumps.s
@@ -10,9 +10,9 @@
 //    diff $f.{as,mc}.dump)
 // to examine the results in a more sensible fashion.
 
-// CHECK: ('_section_data', '\x90
-// CHECK: \x0f\x842\xff\xff\xff\x0f\x82\xe6\x00\x00\x00\x0f\x87&\xff\xff\xff\x0f\x8f\xda\x00\x00\x00\x0f\x88\x1a\xff\xff\xff\x0f\x83\xce\x00\x00\x00\x0f\x89\x0e\xff\xff\xff\x90
-// CHECK: \x901\xc0')
+// CHECK: ('_section_data', '90
+// CHECK: 0f8432ff ffff0f82 e6000000 0f8726ff ffff0f8f da000000 0f881aff ffff0f83 ce000000 0f890eff ffff90
+// CHECK: 9031c0')
 
 L1:
         .space 200, 0x90
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index 46dc3a9c57d8..e0f12bf4ba2f 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -4,7 +4,7 @@
 // CHECK: (('word-0', 0x1),
 // CHECK: ('word-1', 0x5000002)),
 // CHECK-NEXT: ])
-// CHECK: ('_section_data', '\xe8\xfb\xff\xff\xff')
+// CHECK: ('_section_data', 'e8fbffff ff')
 
         .data
         .long 0
diff --git a/test/MC/MachO/reloc.s b/test/MC/MachO/reloc.s
index f91f42559b0f..f6a3446b51b4 100644
--- a/test/MC/MachO/reloc.s
+++ b/test/MC/MachO/reloc.s
@@ -37,6 +37,7 @@ L0:
         .text
 _f0:
 L1:
+        jmp	0xbabecafe
         jmp L0
         jmp L1
         ret
@@ -55,7 +56,7 @@ _f1:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 364)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -64,9 +65,9 @@ _f1:
 // CHECK:   ('size', 260)
 // CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 71)
+// CHECK:   ('vm_size', 76)
 // CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 71)
+// CHECK:   ('file_size', 76)
 // CHECK:   ('maxprot', 7)
 // CHECK:   ('initprot', 7)
 // CHECK:   ('num_sections', 3)
@@ -76,29 +77,32 @@ _f1:
 // CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('address', 0)
-// CHECK:     ('size', 8)
+// CHECK:     ('size', 13)
 // CHECK:     ('offset', 392)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 464)
-// CHECK:     ('num_reloc', 1)
+// CHECK:     ('reloc_offset', 468)
+// CHECK:     ('num_reloc', 2)
 // CHECK:     ('flags', 0x80000400)
 // CHECK:     ('reserved1', 0)
 // CHECK:     ('reserved2', 0)
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x1),
+// CHECK:     (('word-0', 0x6),
 // CHECK:      ('word-1', 0x5000003)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x1),
+// CHECK:      ('word-1', 0x5000000)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xe9:\x00\x00\x00\xeb\xf9\xc3')
+// CHECK:   ('_section_data', 'e9f9cabe bae93a00 0000ebf4 c3')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 8)
+// CHECK:     ('address', 13)
 // CHECK:     ('size', 51)
-// CHECK:     ('offset', 400)
+// CHECK:     ('offset', 405)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 472)
+// CHECK:     ('reloc_offset', 484)
 // CHECK:     ('num_reloc', 11)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
@@ -113,22 +117,22 @@ _f1:
 // CHECK:      ('word-1', 0xc000007)),
 // CHECK:     # Relocation 2
 // CHECK:     (('word-0', 0x8000002a),
-// CHECK:      ('word-1', 0x18)),
+// CHECK:      ('word-1', 0x1d)),
 // CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0x90000028),
-// CHECK:      ('word-1', 0x18)),
+// CHECK:      ('word-1', 0x1d)),
 // CHECK:     # Relocation 4
 // CHECK:     (('word-0', 0xa0000024),
-// CHECK:      ('word-1', 0x18)),
+// CHECK:      ('word-1', 0x1d)),
 // CHECK:     # Relocation 5
 // CHECK:     (('word-0', 0xa0000020),
-// CHECK:      ('word-1', 0x18)),
+// CHECK:      ('word-1', 0x1d)),
 // CHECK:     # Relocation 6
 // CHECK:     (('word-0', 0xa4000014),
-// CHECK:      ('word-1', 0x1c)),
+// CHECK:      ('word-1', 0x21)),
 // CHECK:     # Relocation 7
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x24)),
+// CHECK:      ('word-1', 0x29)),
 // CHECK:     # Relocation 8
 // CHECK:     (('word-0', 0x8),
 // CHECK:      ('word-1', 0x4000002)),
@@ -139,15 +143,15 @@ _f1:
 // CHECK:     (('word-0', 0x0),
 // CHECK:      ('word-1', 0xc000009)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q\x00\x00\x00\x00\x04\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 04000000 15000000 00000000 00000000 ed000000 00000000 00000000 1e000000 27000000 31007600 00000004 000000')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 59)
+// CHECK:     ('address', 64)
 // CHECK:     ('size', 12)
-// CHECK:     ('offset', 451)
+// CHECK:     ('offset', 456)
 // CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 560)
+// CHECK:     ('reloc_offset', 572)
 // CHECK:     ('num_reloc', 4)
 // CHECK:     ('flags', 0x0)
 // CHECK:     ('reserved1', 0)
@@ -162,20 +166,20 @@ _f1:
 // CHECK:      ('word-1', 0x4000003)),
 // CHECK:     # Relocation 2
 // CHECK:     (('word-0', 0xa4000000),
-// CHECK:      ('word-1', 0x18)),
+// CHECK:      ('word-1', 0x1d)),
 // CHECK:     # Relocation 3
 // CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x3b)),
+// CHECK:      ('word-1', 0x40)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xfe\xff\xff\xff?\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', 'feffffff 44000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
-// CHECK:   ('symoff', 592)
+// CHECK:   ('symoff', 604)
 // CHECK:   ('nsyms', 10)
-// CHECK:   ('stroff', 712)
+// CHECK:   ('stroff', 724)
 // CHECK:   ('strsize', 88)
 // CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00.objc_class_name_A\x00_f1\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00\x00')
 // CHECK:   ('_symbols', [
@@ -184,7 +188,7 @@ _f1:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 20)
+// CHECK:     ('n_value', 25)
 // CHECK:     ('_string', 'local_a')
 // CHECK:    ),
 // CHECK:     # Symbol 1
@@ -192,7 +196,7 @@ _f1:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 24)
+// CHECK:     ('n_value', 29)
 // CHECK:     ('_string', 'local_a_elt')
 // CHECK:    ),
 // CHECK:     # Symbol 2
@@ -200,7 +204,7 @@ _f1:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 28)
+// CHECK:     ('n_value', 33)
 // CHECK:     ('_string', 'local_b')
 // CHECK:    ),
 // CHECK:     # Symbol 3
@@ -208,7 +212,7 @@ _f1:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 36)
+// CHECK:     ('n_value', 41)
 // CHECK:     ('_string', 'local_c')
 // CHECK:    ),
 // CHECK:     # Symbol 4
@@ -216,7 +220,7 @@ _f1:
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 59)
+// CHECK:     ('n_value', 64)
 // CHECK:     ('_string', 'bar')
 // CHECK:    ),
 // CHECK:     # Symbol 5
@@ -240,7 +244,7 @@ _f1:
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 128)
-// CHECK:     ('n_value', 8)
+// CHECK:     ('n_value', 13)
 // CHECK:     ('_string', '_f1')
 // CHECK:    ),
 // CHECK:     # Symbol 8
@@ -248,7 +252,7 @@ _f1:
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 16)
+// CHECK:     ('n_value', 21)
 // CHECK:     ('_string', 'local_a_ext')
 // CHECK:    ),
 // CHECK:     # Symbol 9
diff --git a/test/MC/MachO/section-align-1.s b/test/MC/MachO/section-align-1.s
index 6a5e247c938e..360c0a842313 100644
--- a/test/MC/MachO/section-align-1.s
+++ b/test/MC/MachO/section-align-1.s
@@ -9,7 +9,7 @@ name:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 228)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/section-align-2.s b/test/MC/MachO/section-align-2.s
index e0704734a8e6..e0d7b8df8f48 100644
--- a/test/MC/MachO/section-align-2.s
+++ b/test/MC/MachO/section-align-2.s
@@ -17,7 +17,7 @@ baz:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 364)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/string-table.s b/test/MC/MachO/string-table.s
index b811a0b82fcf..179528eaae52 100644
--- a/test/MC/MachO/string-table.s
+++ b/test/MC/MachO/string-table.s
@@ -5,7 +5,7 @@
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 228)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -43,7 +43,7 @@
 // CHECK:     (('word-0', 0x2),
 // CHECK:      ('word-1', 0xc000001)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xc7\x05\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', 'c7050000 00000000 0000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/symbol-diff.s b/test/MC/MachO/symbol-diff.s
new file mode 100644
index 000000000000..1483df16333c
--- /dev/null
+++ b/test/MC/MachO/symbol-diff.s
@@ -0,0 +1,122 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+_g:
+LFB2:
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+_g.eh:
+	.quad	LFB2-.
+
+// CHECK:      ('cputype', 16777223)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 3)
+// CHECK-NEXT: ('load_commands_size', 336)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('reserved', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 25)
+// CHECK-NEXT:   ('size', 232)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 8)
+// CHECK-NEXT:   ('file_offset', 368)
+// CHECK-NEXT:   ('file_size', 8)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 2)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:    # Section 0
+// CHECK-NEXT:   (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('address', 0)
+// CHECK-NEXT:    ('size', 0)
+// CHECK-NEXT:    ('offset', 368)
+// CHECK-NEXT:    ('alignment', 0)
+// CHECK-NEXT:    ('reloc_offset', 0)
+// CHECK-NEXT:    ('num_reloc', 0)
+// CHECK-NEXT:    ('flags', 0x80000000)
+// CHECK-NEXT:    ('reserved1', 0)
+// CHECK-NEXT:    ('reserved2', 0)
+// CHECK-NEXT:    ('reserved3', 0)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:  ])
+// CHECK-NEXT:  ('_section_data', '')
+// CHECK-NEXT:    # Section 1
+// CHECK-NEXT:   (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('address', 0)
+// CHECK-NEXT:    ('size', 8)
+// CHECK-NEXT:    ('offset', 368)
+// CHECK-NEXT:    ('alignment', 0)
+// CHECK-NEXT:    ('reloc_offset', 376)
+// CHECK-NEXT:    ('num_reloc', 2)
+// CHECK-NEXT:    ('flags', 0x6800000b)
+// CHECK-NEXT:    ('reserved1', 0)
+// CHECK-NEXT:    ('reserved2', 0)
+// CHECK-NEXT:    ('reserved3', 0)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('word-0', 0x0),
+// CHECK-NEXT:     ('word-1', 0x5e000001)),
+// CHECK-NEXT:    # Relocation 1
+// CHECK-NEXT:    (('word-0', 0x0),
+// CHECK-NEXT:     ('word-1', 0xe000000)),
+// CHECK-NEXT:  ])
+// CHECK-NEXT:  ('_section_data', '00000000 00000000')
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Load Command 1
+// CHECK-NEXT: (('command', 2)
+// CHECK-NEXT:  ('size', 24)
+// CHECK-NEXT:  ('symoff', 392)
+// CHECK-NEXT:  ('nsyms', 2)
+// CHECK-NEXT:  ('stroff', 424)
+// CHECK-NEXT:  ('strsize', 12)
+// CHECK-NEXT:  ('_string_data', '\x00_g\x00_g.eh\x00\x00\x00')
+// CHECK-NEXT:  ('_symbols', [
+// CHECK-NEXT:    # Symbol 0
+// CHECK-NEXT:   (('n_strx', 1)
+// CHECK-NEXT:    ('n_type', 0xe)
+// CHECK-NEXT:    ('n_sect', 1)
+// CHECK-NEXT:    ('n_desc', 0)
+// CHECK-NEXT:    ('n_value', 0)
+// CHECK-NEXT:    ('_string', '_g')
+// CHECK-NEXT:   ),
+// CHECK-NEXT:    # Symbol 1
+// CHECK-NEXT:   (('n_strx', 4)
+// CHECK-NEXT:    ('n_type', 0xe)
+// CHECK-NEXT:    ('n_sect', 2)
+// CHECK-NEXT:    ('n_desc', 0)
+// CHECK-NEXT:    ('n_value', 0)
+// CHECK-NEXT:    ('_string', '_g.eh')
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Load Command 2
+// CHECK-NEXT: (('command', 11)
+// CHECK-NEXT:  ('size', 80)
+// CHECK-NEXT:  ('ilocalsym', 0)
+// CHECK-NEXT:  ('nlocalsym', 2)
+// CHECK-NEXT:  ('iextdefsym', 2)
+// CHECK-NEXT:  ('nextdefsym', 0)
+// CHECK-NEXT:  ('iundefsym', 2)
+// CHECK-NEXT:  ('nundefsym', 0)
+// CHECK-NEXT:  ('tocoff', 0)
+// CHECK-NEXT:  ('ntoc', 0)
+// CHECK-NEXT:  ('modtaboff', 0)
+// CHECK-NEXT:  ('nmodtab', 0)
+// CHECK-NEXT:  ('extrefsymoff', 0)
+// CHECK-NEXT:  ('nextrefsyms', 0)
+// CHECK-NEXT:  ('indirectsymoff', 0)
+// CHECK-NEXT:  ('nindirectsyms', 0)
+// CHECK-NEXT:  ('extreloff', 0)
+// CHECK-NEXT:  ('nextrel', 0)
+// CHECK-NEXT:  ('locreloff', 0)
+// CHECK-NEXT:  ('nlocrel', 0)
+// CHECK-NEXT:  ('_indirect_symbols', [
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
diff --git a/test/MC/MachO/symbol-flags.s b/test/MC/MachO/symbol-flags.s
index 705fa39fad5d..7a4f8e4031ae 100644
--- a/test/MC/MachO/symbol-flags.s
+++ b/test/MC/MachO/symbol-flags.s
@@ -49,6 +49,9 @@ sym_private_ext_C:
 
         .no_dead_strip sym_no_dead_strip_A
 
+sym_symbol_resolver_A:
+	.symbol_resolver sym_symbol_resolver_A
+
         .reference sym_ref_A
         .desc sym_ref_A, 1
         .desc sym_ref_A, 0x1234
@@ -59,7 +62,7 @@ sym_desc_flags:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -112,10 +115,10 @@ sym_desc_flags:
 // CHECK:  (('command', 2)
 // CHECK:   ('size', 24)
 // CHECK:   ('symoff', 324)
-// CHECK:   ('nsyms', 23)
-// CHECK:   ('stroff', 600)
-// CHECK:   ('strsize', 368)
-// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_ref_def_D\x00sym_ref_def_E\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_weak_def_B\x00sym_weak_def_C\x00sym_lazy_ref_A\x00sym_lazy_ref_D\x00sym_lazy_ref_E\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_private_ext_E\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_desc_flags\x00\x00\x00\x00')
+// CHECK:   ('nsyms', 24)
+// CHECK:   ('stroff', 612)
+// CHECK:   ('strsize', 388)
+// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_ref_def_D\x00sym_ref_def_E\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_weak_def_B\x00sym_weak_def_C\x00sym_lazy_ref_A\x00sym_lazy_ref_D\x00sym_lazy_ref_E\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_private_ext_E\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_symbol_resolver_A\x00sym_desc_flags\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
 // CHECK:    (('n_strx', 254)
@@ -169,11 +172,19 @@ sym_desc_flags:
 // CHECK:    (('n_strx', 350)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 256)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_symbol_resolver_A')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 372)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 64)
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_desc_flags')
 // CHECK:    ),
-// CHECK:     # Symbol 7
+// CHECK:     # Symbol 8
 // CHECK:    (('n_strx', 162)
 // CHECK:     ('n_type', 0x1f)
 // CHECK:     ('n_sect', 2)
@@ -181,7 +192,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_B')
 // CHECK:    ),
-// CHECK:     # Symbol 8
+// CHECK:     # Symbol 9
 // CHECK:    (('n_strx', 180)
 // CHECK:     ('n_type', 0x1f)
 // CHECK:     ('n_sect', 2)
@@ -189,7 +200,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_C')
 // CHECK:    ),
-// CHECK:     # Symbol 9
+// CHECK:     # Symbol 10
 // CHECK:    (('n_strx', 54)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
@@ -197,7 +208,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_weak_def_A')
 // CHECK:    ),
-// CHECK:     # Symbol 10
+// CHECK:     # Symbol 11
 // CHECK:    (('n_strx', 69)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
@@ -205,7 +216,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_weak_def_B')
 // CHECK:    ),
-// CHECK:     # Symbol 11
+// CHECK:     # Symbol 12
 // CHECK:    (('n_strx', 84)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
@@ -213,7 +224,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_weak_def_C')
 // CHECK:    ),
-// CHECK:     # Symbol 12
+// CHECK:     # Symbol 13
 // CHECK:    (('n_strx', 99)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -221,7 +232,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_lazy_ref_A')
 // CHECK:    ),
-// CHECK:     # Symbol 13
+// CHECK:     # Symbol 14
 // CHECK:    (('n_strx', 114)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -229,7 +240,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_lazy_ref_D')
 // CHECK:    ),
-// CHECK:     # Symbol 14
+// CHECK:     # Symbol 15
 // CHECK:    (('n_strx', 129)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -237,7 +248,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_lazy_ref_E')
 // CHECK:    ),
-// CHECK:     # Symbol 15
+// CHECK:     # Symbol 16
 // CHECK:    (('n_strx', 234)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -245,7 +256,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_no_dead_strip_A')
 // CHECK:    ),
-// CHECK:     # Symbol 16
+// CHECK:     # Symbol 17
 // CHECK:    (('n_strx', 144)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
@@ -253,7 +264,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_A')
 // CHECK:    ),
-// CHECK:     # Symbol 17
+// CHECK:     # Symbol 18
 // CHECK:    (('n_strx', 198)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
@@ -261,7 +272,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_D')
 // CHECK:    ),
-// CHECK:     # Symbol 18
+// CHECK:     # Symbol 19
 // CHECK:    (('n_strx', 216)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
@@ -269,7 +280,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_private_ext_E')
 // CHECK:    ),
-// CHECK:     # Symbol 19
+// CHECK:     # Symbol 20
 // CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -277,7 +288,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_ref_A')
 // CHECK:    ),
-// CHECK:     # Symbol 20
+// CHECK:     # Symbol 21
 // CHECK:    (('n_strx', 11)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -285,7 +296,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_ref_def_D')
 // CHECK:    ),
-// CHECK:     # Symbol 21
+// CHECK:     # Symbol 22
 // CHECK:    (('n_strx', 25)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -293,7 +304,7 @@ sym_desc_flags:
 // CHECK:     ('n_value', 0)
 // CHECK:     ('_string', 'sym_ref_def_E')
 // CHECK:    ),
-// CHECK:     # Symbol 22
+// CHECK:     # Symbol 23
 // CHECK:    (('n_strx', 39)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
@@ -307,10 +318,10 @@ sym_desc_flags:
 // CHECK:  (('command', 11)
 // CHECK:   ('size', 80)
 // CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 7)
-// CHECK:   ('iextdefsym', 7)
+// CHECK:   ('nlocalsym', 8)
+// CHECK:   ('iextdefsym', 8)
 // CHECK:   ('nextdefsym', 5)
-// CHECK:   ('iundefsym', 12)
+// CHECK:   ('iundefsym', 13)
 // CHECK:   ('nundefsym', 11)
 // CHECK:   ('tocoff', 0)
 // CHECK:   ('ntoc', 0)
diff --git a/test/MC/MachO/symbol-indirect.s b/test/MC/MachO/symbol-indirect.s
index 461291a3c909..2412970322bf 100644
--- a/test/MC/MachO/symbol-indirect.s
+++ b/test/MC/MachO/symbol-indirect.s
@@ -72,7 +72,7 @@ sym_nlp_G:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 364)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/symbols-1.s b/test/MC/MachO/symbols-1.s
index 623e5285fe5b..cf05afa7509f 100644
--- a/test/MC/MachO/symbols-1.s
+++ b/test/MC/MachO/symbols-1.s
@@ -20,7 +20,7 @@ Lsym_asm_temp:
 // CHECK-X86_32: ('cputype', 7)
 // CHECK-X86_32: ('cpusubtype', 3)
 // CHECK-X86_32: ('filetype', 1)
-// CHECK-X86_32: ('num_load_commands', 1)
+// CHECK-X86_32: ('num_load_commands', 3)
 // CHECK-X86_32: ('load_commands_size', 228)
 // CHECK-X86_32: ('flag', 0)
 // CHECK-X86_32: ('load_commands', [
@@ -164,7 +164,7 @@ Lsym_asm_temp:
 // CHECK-X86_64: ('cputype', 16777223)
 // CHECK-X86_64: ('cpusubtype', 3)
 // CHECK-X86_64: ('filetype', 1)
-// CHECK-X86_64: ('num_load_commands', 1)
+// CHECK-X86_64: ('num_load_commands', 3)
 // CHECK-X86_64: ('load_commands_size', 256)
 // CHECK-X86_64: ('flag', 0)
 // CHECK-X86_64: ('reserved', 0)
diff --git a/test/MC/MachO/tbss.s b/test/MC/MachO/tbss.s
index 2131ea469cda..8eae14296865 100644
--- a/test/MC/MachO/tbss.s
+++ b/test/MC/MachO/tbss.s
@@ -6,7 +6,7 @@
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 336)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -57,7 +57,7 @@
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01\x03\x00\x00\x00')
+// CHECK:   ('_section_data', 'cffaedfe 07000001 03000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/tdata.s b/test/MC/MachO/tdata.s
index 64f88b514eb2..4829ca73a519 100644
--- a/test/MC/MachO/tdata.s
+++ b/test/MC/MachO/tdata.s
@@ -6,7 +6,7 @@ _a$tlv$init:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 336)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -57,7 +57,7 @@ _a$tlv$init:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x04\x00\x00\x00')
+// CHECK:   ('_section_data', '04000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/thread_init_func.s b/test/MC/MachO/thread_init_func.s
index eeab6e1b2945..d3ead83fd255 100644
--- a/test/MC/MachO/thread_init_func.s
+++ b/test/MC/MachO/thread_init_func.s
@@ -57,7 +57,7 @@
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK: ])
diff --git a/test/MC/MachO/tls.s b/test/MC/MachO/tls.s
index 07ea0aa16a33..44b61beeb489 100644
--- a/test/MC/MachO/tls.s
+++ b/test/MC/MachO/tls.s
@@ -48,7 +48,7 @@ _b:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 496)
 // CHECK: ('flag', 8192)
 // CHECK: ('reserved', 0)
@@ -99,7 +99,7 @@ _b:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x04\x00\x00\x00\x05\x00\x00\x00')
+// CHECK:   ('_section_data', '04000000 05000000')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -140,7 +140,7 @@ _b:
 // CHECK:     (('word-0', 0x0),
 // CHECK:      ('word-1', 0xe000008)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
 // CHECK:     # Section 3
 // CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -157,7 +157,7 @@ _b:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01')
+// CHECK:   ('_section_data', 'cffaedfe 07000001')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/tlv-reloc.s b/test/MC/MachO/tlv-reloc.s
index 04fc7aeda550..d11124184723 100644
--- a/test/MC/MachO/tlv-reloc.s
+++ b/test/MC/MachO/tlv-reloc.s
@@ -24,7 +24,7 @@ _foo:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 416)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -61,7 +61,7 @@ _foo:
 // CHECK:     (('word-0', 0x3),
 // CHECK:      ('word-1', 0x9d000001)),
 // CHECK:   ])
-// CHECK:   ('_section_data', 'H\x8b=\x00\x00\x00\x00\xff\x17\xc3')
+// CHECK:   ('_section_data', '488b3d00 000000ff 17c3')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__thread_data\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -78,7 +78,7 @@ _foo:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x04\x00\x00\x00')
+// CHECK:   ('_section_data', '04000000')
 // CHECK:     # Section 2
 // CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -101,7 +101,7 @@ _foo:
 // CHECK:     (('word-0', 0x0),
 // CHECK:      ('word-1', 0xe000003)),
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/tlv.s b/test/MC/MachO/tlv.s
index 7dd739014f69..0fe028e7d501 100644
--- a/test/MC/MachO/tlv.s
+++ b/test/MC/MachO/tlv.s
@@ -10,7 +10,7 @@ _a:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 336)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -61,7 +61,7 @@ _a:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/values.s b/test/MC/MachO/values.s
index 2a472ab60711..96115990636e 100644
--- a/test/MC/MachO/values.s
+++ b/test/MC/MachO/values.s
@@ -20,7 +20,7 @@ data_def_ext:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/weakdef.s b/test/MC/MachO/weakdef.s
new file mode 100644
index 000000000000..494079df5004
--- /dev/null
+++ b/test/MC/MachO/weakdef.s
@@ -0,0 +1,141 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.section	__DATA,__datacoal_nt,coalesced
+	.section	__TEXT,__const_coal,coalesced
+	.globl	__ZTS3optIbE            ## @_ZTS3optIbE
+	.weak_definition	__ZTS3optIbE
+__ZTS3optIbE:
+
+
+	.section	__DATA,__datacoal_nt,coalesced
+	.globl	__ZTI3optIbE            ## @_ZTI3optIbE
+	.weak_definition	__ZTI3optIbE
+
+__ZTI3optIbE:
+	.long	__ZTS3optIbE
+
+// CHECK:      ('cputype', 7)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 3)
+// CHECK-NEXT: ('load_commands_size', 364)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 1)
+// CHECK-NEXT:   ('size', 260)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 4)
+// CHECK-NEXT:   ('file_offset', 392)
+// CHECK-NEXT:   ('file_size', 4)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 3)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:     # Section 0
+// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 0)
+// CHECK-NEXT:     ('offset', 392)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x80000000)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '')
+// CHECK-NEXT:     # Section 1
+// CHECK-NEXT:    (('section_name', '__datacoal_nt\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 4)
+// CHECK-NEXT:     ('offset', 392)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 396)
+// CHECK-NEXT:     ('num_reloc', 1)
+// CHECK-NEXT:     ('flags', 0xb)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:     # Relocation 0
+// CHECK-NEXT:     (('word-0', 0x0),
+// CHECK-NEXT:      ('word-1', 0xc000001)),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '00000000')
+// CHECK-NEXT:     # Section 2
+// CHECK-NEXT:    (('section_name', '__const_coal\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 4)
+// CHECK-NEXT:     ('size', 0)
+// CHECK-NEXT:     ('offset', 396)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0xb)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '')
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 1
+// CHECK-NEXT:  (('command', 2)
+// CHECK-NEXT:   ('size', 24)
+// CHECK-NEXT:   ('symoff', 404)
+// CHECK-NEXT:   ('nsyms', 2)
+// CHECK-NEXT:   ('stroff', 428)
+// CHECK-NEXT:   ('strsize', 28)
+// CHECK-NEXT:   ('_string_data', '\x00__ZTS3optIbE\x00__ZTI3optIbE\x00\x00')
+// CHECK-NEXT:   ('_symbols', [
+// CHECK-NEXT:     # Symbol 0
+// CHECK-NEXT:    (('n_strx', 14)
+// CHECK-NEXT:     ('n_type', 0xf)
+// CHECK-NEXT:     ('n_sect', 2)
+// CHECK-NEXT:     ('n_desc', 128)
+// CHECK-NEXT:     ('n_value', 0)
+// CHECK-NEXT:     ('_string', '__ZTI3optIbE')
+// CHECK-NEXT:    ),
+// CHECK-NEXT:     # Symbol 1
+// CHECK-NEXT:    (('n_strx', 1)
+// CHECK-NEXT:     ('n_type', 0xf)
+// CHECK-NEXT:     ('n_sect', 3)
+// CHECK-NEXT:     ('n_desc', 128)
+// CHECK-NEXT:     ('n_value', 4)
+// CHECK-NEXT:     ('_string', '__ZTS3optIbE')
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 2
+// CHECK-NEXT:  (('command', 11)
+// CHECK-NEXT:   ('size', 80)
+// CHECK-NEXT:   ('ilocalsym', 0)
+// CHECK-NEXT:   ('nlocalsym', 0)
+// CHECK-NEXT:   ('iextdefsym', 0)
+// CHECK-NEXT:   ('nextdefsym', 2)
+// CHECK-NEXT:   ('iundefsym', 2)
+// CHECK-NEXT:   ('nundefsym', 0)
+// CHECK-NEXT:   ('tocoff', 0)
+// CHECK-NEXT:   ('ntoc', 0)
+// CHECK-NEXT:   ('modtaboff', 0)
+// CHECK-NEXT:   ('nmodtab', 0)
+// CHECK-NEXT:   ('extrefsymoff', 0)
+// CHECK-NEXT:   ('nextrefsyms', 0)
+// CHECK-NEXT:   ('indirectsymoff', 0)
+// CHECK-NEXT:   ('nindirectsyms', 0)
+// CHECK-NEXT:   ('extreloff', 0)
+// CHECK-NEXT:   ('nextrel', 0)
+// CHECK-NEXT:   ('locreloff', 0)
+// CHECK-NEXT:   ('nlocrel', 0)
+// CHECK-NEXT:   ('_indirect_symbols', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/test/MC/MachO/x86_32-optimal_nop.s b/test/MC/MachO/x86_32-optimal_nop.s
index e6d41d79bfca..24751409bdb4 100644
--- a/test/MC/MachO/x86_32-optimal_nop.s
+++ b/test/MC/MachO/x86_32-optimal_nop.s
@@ -104,7 +104,7 @@
         ret
         ret
         # nopw %cs:0L(%[re]ax,%[re]ax,1)
-        # 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
+        # 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
         .align 4, 0x90
         ret
 # 12 byte nop test
@@ -160,7 +160,7 @@ f0:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
@@ -192,7 +192,7 @@ f0:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xc3\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x90\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\x0f\x1f@\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\xc3\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\xc3\xc3f\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3\xc3\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3\xc3f\x0f\x1fD\x00\x00f\x0f\x1fD\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\xc3f\x0f\x1fD\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x80\x00\x00\x00\x00\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x0f\x1f\x80\x00\x00\x00\x00\x0f\x1f\x84\x00\x00\x00\x00\x00\xc3')
+// CHECK:   ('_section_data', 'c390c300 00000000 00000000 00000000 c3c36690 c3000000 00000000 00000000 c30f1f00 c3000000 00000000 00000000 c3c3c3c3 0f1f4000 c3000000 00000000 c3c3c30f 1f440000 c3000000 00000000 c3c3660f 1f440000 c3000000 00000000 c30f1f80 00000000 c3000000 00000000 c3c3c3c3 c3c3c3c3 c3000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c366662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c366 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c36666 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3666666 6666662e 0f1f8400 00000000 c3')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -208,7 +208,7 @@ f0:
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x00\x00\x00\x00')
+// CHECK:   ('_section_data', '00909090 90909090 90909090 90909090 00000000')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/MachO/x86_32-symbols.s b/test/MC/MachO/x86_32-symbols.s
index 629ba7de7ced..35ada354d29e 100644
--- a/test/MC/MachO/x86_32-symbols.s
+++ b/test/MC/MachO/x86_32-symbols.s
@@ -124,7 +124,7 @@ D39:
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 2608)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/x86_64-symbols.s b/test/MC/MachO/x86_64-symbols.s
index c5c39a6fd47f..804cee84791e 100644
--- a/test/MC/MachO/x86_64-symbols.s
+++ b/test/MC/MachO/x86_64-symbols.s
@@ -124,7 +124,7 @@ D38:
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 2656)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
diff --git a/test/MC/MachO/zerofill-1.s b/test/MC/MachO/zerofill-1.s
index a175d4c4d01e..805a7861e1fe 100644
--- a/test/MC/MachO/zerofill-1.s
+++ b/test/MC/MachO/zerofill-1.s
@@ -11,7 +11,7 @@
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 364)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/zerofill-2.s b/test/MC/MachO/zerofill-2.s
index e76de8453cbd..16577e41d03a 100644
--- a/test/MC/MachO/zerofill-2.s
+++ b/test/MC/MachO/zerofill-2.s
@@ -8,7 +8,7 @@
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/zerofill-3.s b/test/MC/MachO/zerofill-3.s
index e7f4c7b44418..cc81fa863420 100644
--- a/test/MC/MachO/zerofill-3.s
+++ b/test/MC/MachO/zerofill-3.s
@@ -22,7 +22,7 @@
 // CHECK: ('cputype', 7)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 296)
 // CHECK: ('flag', 0)
 // CHECK: ('load_commands', [
diff --git a/test/MC/MachO/zerofill-5.s b/test/MC/MachO/zerofill-5.s
index 3074f60d3f57..91f251b9983c 100644
--- a/test/MC/MachO/zerofill-5.s
+++ b/test/MC/MachO/zerofill-5.s
@@ -9,7 +9,7 @@
 // CHECK: ('cputype', 16777223)
 // CHECK: ('cpusubtype', 3)
 // CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
+// CHECK: ('num_load_commands', 3)
 // CHECK: ('load_commands_size', 336)
 // CHECK: ('flag', 0)
 // CHECK: ('reserved', 0)
@@ -43,7 +43,7 @@
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\x02\x00\x00\x00')
+// CHECK:   ('_section_data', '02000000')
 // CHECK:     # Section 1
 // CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
@@ -60,7 +60,7 @@
 // CHECK:    ),
 // CHECK:   ('_relocations', [
 // CHECK:   ])
-// CHECK:   ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01')
+// CHECK:   ('_section_data', 'cffaedfe 07000001')
 // CHECK:   ])
 // CHECK:  ),
 // CHECK:   # Load Command 1
diff --git a/test/MC/X86/3DNow.s b/test/MC/X86/3DNow.s
new file mode 100644
index 000000000000..4dc68aecf4fa
--- /dev/null
+++ b/test/MC/X86/3DNow.s
@@ -0,0 +1,92 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// PR8283
+
+// CHECK: pavgusb %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xbf]
+pavgusb	%mm2, %mm1
+
+// CHECK: pavgusb 9(%esi,%edx), %mm3 # encoding: [0x0f,0x0f,0x5c,0x16,0x09,0xbf]
+pavgusb	9(%esi,%edx), %mm3
+
+        
+// CHECK: pf2id %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x1d]
+pf2id	%mm2, %mm1
+
+// CHECK: pf2id 9(%esi,%edx), %mm3 # encoding: [0x0f,0x0f,0x5c,0x16,0x09,0x1d]
+pf2id	9(%esi,%edx), %mm3
+
+// CHECK: pfacc %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xae]
+pfacc	%mm2, %mm1
+
+// CHECK: pfadd %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x9e]
+pfadd	%mm2, %mm1
+
+// CHECK: pfcmpeq %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb0]
+pfcmpeq	%mm2, %mm1
+
+// CHECK: pfcmpge %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x90]
+pfcmpge	%mm2, %mm1
+
+// CHECK: pfcmpgt %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa0]
+pfcmpgt	%mm2, %mm1
+
+// CHECK: pfmax %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa4]
+pfmax	%mm2, %mm1
+
+// CHECK: pfmin %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x94]
+pfmin	%mm2, %mm1
+
+// CHECK: pfmul %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb4]
+pfmul	%mm2, %mm1
+
+// CHECK: pfrcp %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x96]
+pfrcp	%mm2, %mm1
+
+// CHECK: pfrcpit1 %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa6]
+pfrcpit1	%mm2, %mm1
+
+// CHECK: pfrcpit2 %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb6]
+pfrcpit2	%mm2, %mm1
+
+// CHECK: pfrsqit1 %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa7]
+pfrsqit1	%mm2, %mm1
+
+// CHECK: pfrsqrt %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x97]
+pfrsqrt	%mm2, %mm1
+
+// CHECK: pfsub %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x9a]
+pfsub	%mm2, %mm1
+
+// CHECK: pfsubr %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xaa]
+pfsubr	%mm2, %mm1
+
+// CHECK: pi2fd %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x0d]
+pi2fd	%mm2, %mm1
+
+// CHECK: pmulhrw %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb7]
+pmulhrw	%mm2, %mm1
+
+
+// CHECK: femms # encoding: [0x0f,0x0e]
+femms
+
+// CHECK: prefetch (%rax)   # encoding: [0x0f,0x0d,0x00]
+// CHECK: prefetchw (%rax)  # encoding: [0x0f,0x0d,0x08]
+prefetch (%rax)
+prefetchw (%rax)
+
+
+// CHECK: pf2iw %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x1c]
+pf2iw %mm2, %mm1
+
+// CHECK: pi2fw %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x0c]
+pi2fw %mm2, %mm1
+
+// CHECK: pfnacc %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x8a]
+pfnacc %mm2, %mm1
+
+// CHECK: pfpnacc %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x8e]
+pfpnacc %mm2, %mm1
+
+// CHECK: pswapd %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xbb]
+pswapd %mm2, %mm1
diff --git a/test/MC/X86/dg.exp b/test/MC/X86/dg.exp
new file mode 100644
index 000000000000..ec87b695b7ef
--- /dev/null
+++ b/test/MC/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/MC/X86/x86-32-avx.s b/test/MC/X86/x86-32-avx.s
new file mode 100644
index 000000000000..1927e4e7a6b9
--- /dev/null
+++ b/test/MC/X86/x86-32-avx.s
@@ -0,0 +1,3283 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vaddss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x58,0xd4]
+          vaddss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x59,0xd4]
+          vmulss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x5c,0xd4]
+          vsubss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x5e,0xd4]
+          vdivss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x58,0xd4]
+          vaddsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x59,0xd4]
+          vmulsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x5c,0xd4]
+          vsubsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x5e,0xd4]
+          vdivsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x58,0xd4]
+          vaddps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4]
+          vsubps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x59,0xd4]
+          vmulps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4]
+          vdivps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x58,0xd4]
+          vaddpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4]
+          vsubpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x59,0xd4]
+          vmulpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4]
+          vdivpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: vmaxss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5f,0xf2]
+          vmaxss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxsd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2]
+          vmaxsd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5d,0xf2]
+          vminss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminsd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2]
+          vminsd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc]
+          vmaxss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc]
+          vmaxsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc]
+          vminss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
+          vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
+          vmaxps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
+          vmaxpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
+          vminps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
+          vminpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
+          vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
+          vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
+          vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
+          vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
+          vandps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
+          vandpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
+          vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
+          vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
+          vorps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vorpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
+          vorpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
+          vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
+          vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
+          vxorps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
+          vxorpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
+          vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
+          vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
+          vandnps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
+          vandnpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
+          vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
+          vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmovss  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
+          vmovss  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovss  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x10,0xec]
+          vmovss  %xmm4, %xmm2, %xmm5
+
+// CHECK: vmovsd  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
+          vmovsd  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovsd  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
+          vmovsd  %xmm4, %xmm2, %xmm5
+
+// CHECK: vunpckhps  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
+          vunpckhps  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhpd  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
+          vunpckhpd  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklps  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
+          vunpcklps  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklpd  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
+          vunpcklpd  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
+          vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
+          vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
+          vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
+          vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vcmpps  $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
+          vcmpps  $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmpps  $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
+          vcmpps  $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmpps  $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
+          vcmpps  $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd  $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
+          vcmppd  $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd  $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
+          vcmppd  $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmppd  $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
+          vcmppd  $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vshufps  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
+          vshufps  $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
+          vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vshufpd  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
+          vshufpd  $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
+          vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
+          vcmpeqps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
+          vcmpleps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
+          vcmpltps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
+          vcmpneqps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
+          vcmpnleps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
+          vcmpnltps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
+          vcmpordps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
+          vcmpunordps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
+          vcmpeqpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
+          vcmplepd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
+          vcmpltpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
+          vcmpneqpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
+          vcmpnlepd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
+          vcmpnltpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
+          vcmpordpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
+          vcmpunordpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vmovmskps  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+          vmovmskps  %xmm2, %eax
+
+// CHECK: vmovmskpd  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+          vmovmskpd  %xmm2, %eax
+
+// CHECK: vcmpss  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
+          vcmpeqss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
+          vcmpless   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
+          vcmpltss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
+          vcmpneqss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
+          vcmpnless   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
+          vcmpnltss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
+          vcmpordss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
+          vcmpunordss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpless   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnless   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordss   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
+          vcmpeqsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
+          vcmplesd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
+          vcmpltsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
+          vcmpneqsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
+          vcmpnlesd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
+          vcmpnltsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
+          vcmpordsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
+          vcmpunordsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordsd   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vucomiss  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
+          vucomiss  %xmm1, %xmm2
+
+// CHECK: vucomiss  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
+          vucomiss  (%eax), %xmm2
+
+// CHECK: vcomiss  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
+          vcomiss  %xmm1, %xmm2
+
+// CHECK: vcomiss  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
+          vcomiss  (%eax), %xmm2
+
+// CHECK: vucomisd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
+          vucomisd  %xmm1, %xmm2
+
+// CHECK: vucomisd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
+          vucomisd  (%eax), %xmm2
+
+// CHECK: vcomisd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
+          vcomisd  %xmm1, %xmm2
+
+// CHECK: vcomisd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
+          vcomisd  (%eax), %xmm2
+
+// CHECK: vcvttss2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
+          vcvttss2si  %xmm1, %eax
+
+// CHECK: vcvttss2si  (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+          vcvttss2si  (%ecx), %eax
+
+// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ss  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ss  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvttsd2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
+          vcvttsd2si  %xmm1, %eax
+
+// CHECK: vcvttsd2si  (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+          vcvttsd2si  (%ecx), %eax
+
+// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sd  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sd  (%eax), %xmm1, %xmm2
+
+// CHECK: vmovaps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
+          vmovaps  (%eax), %xmm2
+
+// CHECK: vmovaps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
+          vmovaps  %xmm1, %xmm2
+
+// CHECK: vmovaps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
+          vmovaps  %xmm1, (%eax)
+
+// CHECK: vmovapd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
+          vmovapd  (%eax), %xmm2
+
+// CHECK: vmovapd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
+          vmovapd  %xmm1, %xmm2
+
+// CHECK: vmovapd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
+          vmovapd  %xmm1, (%eax)
+
+// CHECK: vmovups  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
+          vmovups  (%eax), %xmm2
+
+// CHECK: vmovups  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
+          vmovups  %xmm1, %xmm2
+
+// CHECK: vmovups  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
+          vmovups  %xmm1, (%eax)
+
+// CHECK: vmovupd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
+          vmovupd  (%eax), %xmm2
+
+// CHECK: vmovupd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
+          vmovupd  %xmm1, %xmm2
+
+// CHECK: vmovupd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
+          vmovupd  %xmm1, (%eax)
+
+// CHECK: vmovlps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
+          vmovlps  %xmm1, (%eax)
+
+// CHECK: vmovlps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
+          vmovlps  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
+          vmovlpd  %xmm1, (%eax)
+
+// CHECK: vmovlpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
+          vmovlpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
+          vmovhps  %xmm1, (%eax)
+
+// CHECK: vmovhps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
+          vmovhps  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
+          vmovhpd  %xmm1, (%eax)
+
+// CHECK: vmovhpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
+          vmovhpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlhps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
+          vmovlhps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovhlps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
+          vmovhlps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vcvtss2sil  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
+          vcvtss2si  %xmm1, %eax
+
+// CHECK: vcvtss2sil  (%eax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2si  (%eax), %ebx
+
+// CHECK: vcvtdq2ps  %xmm5, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
+          vcvtdq2ps  %xmm5, %xmm6
+
+// CHECK: vcvtdq2ps  (%eax), %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
+          vcvtdq2ps  (%eax), %xmm6
+
+// CHECK: vcvtsd2ss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
+          vcvtsd2ss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtsd2ss  (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
+          vcvtsd2ss  (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtps2dq  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
+          vcvtps2dq  %xmm2, %xmm3
+
+// CHECK: vcvtps2dq  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
+          vcvtps2dq  (%eax), %xmm3
+
+// CHECK: vcvtss2sd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
+          vcvtss2sd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtss2sd  (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
+          vcvtss2sd  (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps  %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
+          vcvtdq2ps  %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps  (%ecx), %xmm4
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
+          vcvtdq2ps  (%ecx), %xmm4
+
+// CHECK: vcvttps2dq  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
+          vcvttps2dq  %xmm2, %xmm3
+
+// CHECK: vcvttps2dq  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
+          vcvttps2dq  (%eax), %xmm3
+
+// CHECK: vcvtps2pd  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
+          vcvtps2pd  %xmm2, %xmm3
+
+// CHECK: vcvtps2pd  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
+          vcvtps2pd  (%eax), %xmm3
+
+// CHECK: vcvtpd2ps  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
+          vcvtpd2ps  %xmm2, %xmm3
+
+// CHECK: vsqrtpd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
+          vsqrtpd  %xmm1, %xmm2
+
+// CHECK: vsqrtpd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
+          vsqrtpd  (%eax), %xmm2
+
+// CHECK: vsqrtps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
+          vsqrtps  %xmm1, %xmm2
+
+// CHECK: vsqrtps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
+          vsqrtps  (%eax), %xmm2
+
+// CHECK: vsqrtsd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
+          vsqrtsd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
+          vsqrtsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vsqrtss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
+          vsqrtss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0x18]
+          vsqrtss  (%eax), %xmm2, %xmm3
+
+// CHECK: vrsqrtps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
+          vrsqrtps  %xmm1, %xmm2
+
+// CHECK: vrsqrtps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
+          vrsqrtps  (%eax), %xmm2
+
+// CHECK: vrsqrtss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
+          vrsqrtss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vrsqrtss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0x18]
+          vrsqrtss  (%eax), %xmm2, %xmm3
+
+// CHECK: vrcpps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
+          vrcpps  %xmm1, %xmm2
+
+// CHECK: vrcpps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
+          vrcpps  (%eax), %xmm2
+
+// CHECK: vrcpss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
+          vrcpss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vrcpss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0x18]
+          vrcpss  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovntdq  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
+          vmovntdq  %xmm1, (%eax)
+
+// CHECK: vmovntpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
+          vmovntpd  %xmm1, (%eax)
+
+// CHECK: vmovntps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
+          vmovntps  %xmm1, (%eax)
+
+// CHECK: vldmxcsr  (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
+          vldmxcsr  (%eax)
+
+// CHECK: vstmxcsr  (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
+          vstmxcsr  (%eax)
+
+// CHECK: vldmxcsr  3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
+          vldmxcsr  0xdeadbeef
+
+// CHECK: vstmxcsr  3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
+          vstmxcsr  0xdeadbeef
+
+// CHECK: vpsubb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
+          vpsubb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
+          vpsubb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
+          vpsubw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
+          vpsubw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
+          vpsubd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
+          vpsubd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
+          vpsubq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
+          vpsubq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
+          vpsubsb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
+          vpsubsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
+          vpsubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
+          vpsubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
+          vpsubusb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
+          vpsubusb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
+          vpsubusw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
+          vpsubusw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
+          vpaddb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
+          vpaddb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
+          vpaddw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
+          vpaddw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
+          vpaddd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
+          vpaddd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
+          vpaddq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
+          vpaddq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
+          vpaddsb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
+          vpaddsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
+          vpaddsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
+          vpaddsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
+          vpaddusb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
+          vpaddusb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
+          vpaddusw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
+          vpaddusw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhuw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
+          vpmulhuw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
+          vpmulhuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
+          vpmulhw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
+          vpmulhw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmullw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
+          vpmullw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmullw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
+          vpmullw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuludq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
+          vpmuludq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmuludq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
+          vpmuludq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
+          vpavgb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
+          vpavgb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
+          vpavgw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
+          vpavgw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
+          vpminsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
+          vpminsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminub  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
+          vpminub  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminub  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
+          vpminub  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
+          vpmaxsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
+          vpmaxsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxub  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
+          vpmaxub  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxub  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
+          vpmaxub  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsadbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
+          vpsadbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsadbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
+          vpsadbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
+          vpsllw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
+          vpsllw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
+          vpslld  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpslld  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
+          vpslld  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
+          vpsllq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
+          vpsllq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsraw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
+          vpsraw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsraw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
+          vpsraw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrad  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
+          vpsrad  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrad  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
+          vpsrad  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
+          vpsrlw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
+          vpsrlw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrld  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
+          vpsrld  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrld  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
+          vpsrld  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
+          vpsrlq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
+          vpsrlq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+          vpslld  $10, %xmm2, %xmm3
+
+// CHECK: vpslldq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
+          vpslldq  $10, %xmm2, %xmm3
+
+// CHECK: vpsllq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
+          vpsllq  $10, %xmm2, %xmm3
+
+// CHECK: vpsllw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
+          vpsllw  $10, %xmm2, %xmm3
+
+// CHECK: vpsrad  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
+          vpsrad  $10, %xmm2, %xmm3
+
+// CHECK: vpsraw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
+          vpsraw  $10, %xmm2, %xmm3
+
+// CHECK: vpsrld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
+          vpsrld  $10, %xmm2, %xmm3
+
+// CHECK: vpsrldq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
+          vpsrldq  $10, %xmm2, %xmm3
+
+// CHECK: vpsrlq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
+          vpsrlq  $10, %xmm2, %xmm3
+
+// CHECK: vpsrlw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
+          vpsrlw  $10, %xmm2, %xmm3
+
+// CHECK: vpslld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+          vpslld  $10, %xmm2, %xmm3
+
+// CHECK: vpand  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
+          vpand  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpand  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
+          vpand  (%eax), %xmm2, %xmm3
+
+// CHECK: vpor  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
+          vpor  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpor  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
+          vpor  (%eax), %xmm2, %xmm3
+
+// CHECK: vpxor  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
+          vpxor  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpxor  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
+          vpxor  (%eax), %xmm2, %xmm3
+
+// CHECK: vpandn  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
+          vpandn  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpandn  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
+          vpandn  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
+          vpcmpeqb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
+          vpcmpeqb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
+          vpcmpeqw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
+          vpcmpeqw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
+          vpcmpeqd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
+          vpcmpeqd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
+          vpcmpgtb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
+          vpcmpgtb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
+          vpcmpgtw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
+          vpcmpgtw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
+          vpcmpgtd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
+          vpcmpgtd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpacksswb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
+          vpacksswb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpacksswb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
+          vpacksswb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpackssdw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
+          vpackssdw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackssdw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
+          vpackssdw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpackuswb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
+          vpackuswb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackuswb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
+          vpackuswb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufd  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
+          vpshufd  $4, %xmm2, %xmm3
+
+// CHECK: vpshufd  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
+          vpshufd  $4, (%eax), %xmm3
+
+// CHECK: vpshufhw  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
+          vpshufhw  $4, %xmm2, %xmm3
+
+// CHECK: vpshufhw  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
+          vpshufhw  $4, (%eax), %xmm3
+
+// CHECK: vpshuflw  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
+          vpshuflw  $4, %xmm2, %xmm3
+
+// CHECK: vpshuflw  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
+          vpshuflw  $4, (%eax), %xmm3
+
+// CHECK: vpunpcklbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
+          vpunpcklbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
+          vpunpcklbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklwd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
+          vpunpcklwd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklwd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
+          vpunpcklwd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckldq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
+          vpunpckldq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckldq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
+          vpunpckldq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
+          vpunpcklqdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
+          vpunpcklqdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
+          vpunpckhbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
+          vpunpckhbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhwd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
+          vpunpckhwd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhwd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
+          vpunpckhwd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
+          vpunpckhdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
+          vpunpckhdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
+          vpunpckhqdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
+          vpunpckhqdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
+          vpinsrw  $7, %eax, %xmm2, %xmm3
+
+// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
+          vpinsrw  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vpextrw  $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+          vpextrw  $7, %xmm2, %eax
+
+// CHECK: vpmovmskb  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
+          vpmovmskb  %xmm1, %eax
+
+// CHECK: vmaskmovdqu  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
+          vmaskmovdqu  %xmm1, %xmm2
+
+// CHECK: vmovd  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+          vmovd  %xmm1, %eax
+
+// CHECK: vmovd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
+          vmovd  %xmm1, (%eax)
+
+// CHECK: vmovd  %eax, %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
+          vmovd  %eax, %xmm1
+
+// CHECK: vmovd  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
+          vmovd  (%eax), %xmm1
+
+// CHECK: vmovq  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
+          vmovq  %xmm1, (%eax)
+
+// CHECK: vmovq  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
+          vmovq  %xmm1, %xmm2
+
+// CHECK: vmovq  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
+          vmovq  (%eax), %xmm1
+
+// CHECK: vcvtpd2dq  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
+          vcvtpd2dq  %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
+          vcvtdq2pd  %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
+          vcvtdq2pd  (%eax), %xmm2
+
+// CHECK: vmovshdup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
+          vmovshdup  %xmm1, %xmm2
+
+// CHECK: vmovshdup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
+          vmovshdup  (%eax), %xmm2
+
+// CHECK: vmovsldup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
+          vmovsldup  %xmm1, %xmm2
+
+// CHECK: vmovsldup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
+          vmovsldup  (%eax), %xmm2
+
+// CHECK: vmovddup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
+          vmovddup  %xmm1, %xmm2
+
+// CHECK: vmovddup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
+          vmovddup  (%eax), %xmm2
+
+// CHECK: vaddsubps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
+          vaddsubps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubps  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
+          vaddsubps  (%eax), %xmm1, %xmm2
+
+// CHECK: vaddsubpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
+          vaddsubpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubpd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
+          vaddsubpd  (%eax), %xmm1, %xmm2
+
+// CHECK: vhaddps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
+          vhaddps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
+          vhaddps  (%eax), %xmm2, %xmm3
+
+// CHECK: vhaddpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
+          vhaddpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
+          vhaddpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
+          vhsubps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
+          vhsubps  (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
+          vhsubpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
+          vhsubpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpabsb  %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1]
+          vpabsb  %xmm1, %xmm2
+
+// CHECK: vpabsb  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10]
+          vpabsb  (%eax), %xmm2
+
+// CHECK: vpabsw  %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1]
+          vpabsw  %xmm1, %xmm2
+
+// CHECK: vpabsw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10]
+          vpabsw  (%eax), %xmm2
+
+// CHECK: vpabsd  %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1]
+          vpabsd  %xmm1, %xmm2
+
+// CHECK: vpabsd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10]
+          vpabsd  (%eax), %xmm2
+
+// CHECK: vphaddw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9]
+          vphaddw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18]
+          vphaddw  (%eax), %xmm2, %xmm3
+
+// CHECK: vphaddd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9]
+          vphaddd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18]
+          vphaddd  (%eax), %xmm2, %xmm3
+
+// CHECK: vphaddsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9]
+          vphaddsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18]
+          vphaddsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9]
+          vphsubw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18]
+          vphsubw  (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9]
+          vphsubd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18]
+          vphsubd  (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9]
+          vphsubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18]
+          vphsubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaddubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9]
+          vpmaddubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaddubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18]
+          vpmaddubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9]
+          vpshufb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpshufb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18]
+          vpshufb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9]
+          vpsignb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18]
+          vpsignb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9]
+          vpsignw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18]
+          vpsignw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9]
+          vpsignd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18]
+          vpsignd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhrsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9]
+          vpmulhrsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhrsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18]
+          vpmulhrsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpalignr  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07]
+          vpalignr  $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vpalignr  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07]
+          vpalignr  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundsd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07]
+          vroundsd  $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vroundsd  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07]
+          vroundsd  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundss  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07]
+          vroundss  $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vroundss  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07]
+          vroundss  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundpd  $7, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07]
+          vroundpd  $7, %xmm2, %xmm3
+
+// CHECK: vroundpd  $7, (%eax), %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07]
+          vroundpd  $7, (%eax), %xmm3
+
+// CHECK: vroundps  $7, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07]
+          vroundps  $7, %xmm2, %xmm3
+
+// CHECK: vroundps  $7, (%eax), %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07]
+          vroundps  $7, (%eax), %xmm3
+
+// CHECK: vphminposuw  %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda]
+          vphminposuw  %xmm2, %xmm3
+
+// CHECK: vphminposuw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10]
+          vphminposuw  (%eax), %xmm2
+
+// CHECK: vpackusdw  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca]
+          vpackusdw  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpackusdw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18]
+          vpackusdw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqq  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca]
+          vpcmpeqq  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpcmpeqq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18]
+          vpcmpeqq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsb  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca]
+          vpminsb  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18]
+          vpminsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsd  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca]
+          vpminsd  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18]
+          vpminsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminud  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca]
+          vpminud  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminud  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18]
+          vpminud  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminuw  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca]
+          vpminuw  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18]
+          vpminuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsb  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca]
+          vpmaxsb  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18]
+          vpmaxsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsd  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca]
+          vpmaxsd  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18]
+          vpmaxsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxud  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca]
+          vpmaxud  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxud  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18]
+          vpmaxud  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxuw  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca]
+          vpmaxuw  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18]
+          vpmaxuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuldq  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca]
+          vpmuldq  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmuldq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18]
+          vpmuldq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulld  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca]
+          vpmulld  %xmm2, %xmm5, %xmm1
+
+// CHECK: vpmulld  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18]
+          vpmulld  (%eax), %xmm5, %xmm3
+
+// CHECK: vblendps  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03]
+          vblendps  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vblendps  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03]
+          vblendps  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vblendpd  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03]
+          vblendpd  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vblendpd  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03]
+          vblendpd  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vpblendw  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03]
+          vpblendw  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vpblendw  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03]
+          vpblendw  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vmpsadbw  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03]
+          vmpsadbw  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vmpsadbw  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03]
+          vmpsadbw  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vdpps  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03]
+          vdpps  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vdpps  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03]
+          vdpps  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vdppd  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03]
+          vdppd  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vdppd  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03]
+          vdppd  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20]
+          vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20]
+          vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20]
+          vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvps  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20]
+          vblendvps  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20]
+          vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20]
+          vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpmovsxbw  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea]
+          vpmovsxbw  %xmm2, %xmm5
+
+// CHECK: vpmovsxbw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10]
+          vpmovsxbw  (%eax), %xmm2
+
+// CHECK: vpmovsxwd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea]
+          vpmovsxwd  %xmm2, %xmm5
+
+// CHECK: vpmovsxwd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10]
+          vpmovsxwd  (%eax), %xmm2
+
+// CHECK: vpmovsxdq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea]
+          vpmovsxdq  %xmm2, %xmm5
+
+// CHECK: vpmovsxdq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10]
+          vpmovsxdq  (%eax), %xmm2
+
+// CHECK: vpmovzxbw  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea]
+          vpmovzxbw  %xmm2, %xmm5
+
+// CHECK: vpmovzxbw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10]
+          vpmovzxbw  (%eax), %xmm2
+
+// CHECK: vpmovzxwd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea]
+          vpmovzxwd  %xmm2, %xmm5
+
+// CHECK: vpmovzxwd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10]
+          vpmovzxwd  (%eax), %xmm2
+
+// CHECK: vpmovzxdq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea]
+          vpmovzxdq  %xmm2, %xmm5
+
+// CHECK: vpmovzxdq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10]
+          vpmovzxdq  (%eax), %xmm2
+
+// CHECK: vpmovsxbq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea]
+          vpmovsxbq  %xmm2, %xmm5
+
+// CHECK: vpmovsxbq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10]
+          vpmovsxbq  (%eax), %xmm2
+
+// CHECK: vpmovzxbq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea]
+          vpmovzxbq  %xmm2, %xmm5
+
+// CHECK: vpmovzxbq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10]
+          vpmovzxbq  (%eax), %xmm2
+
+// CHECK: vpmovsxbd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea]
+          vpmovsxbd  %xmm2, %xmm5
+
+// CHECK: vpmovsxbd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10]
+          vpmovsxbd  (%eax), %xmm2
+
+// CHECK: vpmovsxwq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea]
+          vpmovsxwq  %xmm2, %xmm5
+
+// CHECK: vpmovsxwq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10]
+          vpmovsxwq  (%eax), %xmm2
+
+// CHECK: vpmovzxbd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea]
+          vpmovzxbd  %xmm2, %xmm5
+
+// CHECK: vpmovzxbd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10]
+          vpmovzxbd  (%eax), %xmm2
+
+// CHECK: vpmovzxwq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea]
+          vpmovzxwq  %xmm2, %xmm5
+
+// CHECK: vpmovzxwq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10]
+          vpmovzxwq  (%eax), %xmm2
+
+// CHECK: vpextrw  $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+          vpextrw  $7, %xmm2, %eax
+
+// CHECK: vpextrw  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07]
+          vpextrw  $7, %xmm2, (%eax)
+
+// CHECK: vpextrd  $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07]
+          vpextrd  $7, %xmm2, %eax
+
+// CHECK: vpextrd  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07]
+          vpextrd  $7, %xmm2, (%eax)
+
+// CHECK: vpextrb  $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07]
+          vpextrb  $7, %xmm2, %eax
+
+// CHECK: vpextrb  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07]
+          vpextrb  $7, %xmm2, (%eax)
+
+// CHECK: vextractps  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07]
+          vextractps  $7, %xmm2, (%eax)
+
+// CHECK: vextractps  $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07]
+          vextractps  $7, %xmm2, %eax
+
+// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07]
+          vpinsrw  $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07]
+          vpinsrw  $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vpinsrb  $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07]
+          vpinsrb  $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrb  $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07]
+          vpinsrb  $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vpinsrd  $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07]
+          vpinsrd  $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrd  $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
+          vpinsrd  $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vinsertps  $7, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07]
+          vinsertps  $7, %xmm2, %xmm5, %xmm1
+
+// CHECK: vinsertps  $7, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07]
+          vinsertps  $7, (%eax), %xmm5, %xmm1
+
+// CHECK: vptest  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea]
+          vptest  %xmm2, %xmm5
+
+// CHECK: vptest  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10]
+          vptest  (%eax), %xmm2
+
+// CHECK: vmovntdqa  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10]
+          vmovntdqa  (%eax), %xmm2
+
+// CHECK: vpcmpgtq  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca]
+          vpcmpgtq  %xmm2, %xmm5, %xmm1
+
+// CHECK: vpcmpgtq  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18]
+          vpcmpgtq  (%eax), %xmm5, %xmm3
+
+// CHECK: vpcmpistrm  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07]
+          vpcmpistrm  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpistrm  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07]
+          vpcmpistrm  $7, (%eax), %xmm5
+
+// CHECK: vpcmpestrm  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07]
+          vpcmpestrm  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpestrm  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07]
+          vpcmpestrm  $7, (%eax), %xmm5
+
+// CHECK: vpcmpistri  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07]
+          vpcmpistri  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpistri  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07]
+          vpcmpistri  $7, (%eax), %xmm5
+
+// CHECK: vpcmpestri  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07]
+          vpcmpestri  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpestri  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07]
+          vpcmpestri  $7, (%eax), %xmm5
+
+// CHECK: vaesimc  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea]
+          vaesimc  %xmm2, %xmm5
+
+// CHECK: vaesimc  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10]
+          vaesimc  (%eax), %xmm2
+
+// CHECK: vaesenc  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca]
+          vaesenc  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesenc  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18]
+          vaesenc  (%eax), %xmm5, %xmm3
+
+// CHECK: vaesenclast  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca]
+          vaesenclast  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesenclast  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18]
+          vaesenclast  (%eax), %xmm5, %xmm3
+
+// CHECK: vaesdec  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca]
+          vaesdec  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesdec  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18]
+          vaesdec  (%eax), %xmm5, %xmm3
+
+// CHECK: vaesdeclast  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca]
+          vaesdeclast  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesdeclast  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18]
+          vaesdeclast  (%eax), %xmm5, %xmm3
+
+// CHECK: vaeskeygenassist  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07]
+          vaeskeygenassist  $7, %xmm2, %xmm5
+
+// CHECK: vaeskeygenassist  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07]
+          vaeskeygenassist  $7, (%eax), %xmm5
+
+// CHECK: vcmpps  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08]
+          vcmpeq_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $9, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09]
+          vcmpngeps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $10, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a]
+          vcmpngtps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $11, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b]
+          vcmpfalseps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $12, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c]
+          vcmpneq_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $13, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d]
+          vcmpgeps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $14, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e]
+          vcmpgtps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $15, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f]
+          vcmptrueps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $16, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10]
+          vcmpeq_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $17, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11]
+          vcmplt_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $18, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12]
+          vcmple_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $19, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13]
+          vcmpunord_sps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $20, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14]
+          vcmpneq_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $21, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15]
+          vcmpnlt_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $22, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16]
+          vcmpnle_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $23, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17]
+          vcmpord_sps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $24, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18]
+          vcmpeq_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $25, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19]
+          vcmpnge_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $26, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a]
+          vcmpngt_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $27, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b]
+          vcmpfalse_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $28, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c]
+          vcmpneq_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $29, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d]
+          vcmpge_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $30, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e]
+          vcmpgt_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $31, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f]
+          vcmptrue_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovaps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x28,0x10]
+          vmovaps  (%eax), %ymm2
+
+// CHECK: vmovaps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x28,0xd1]
+          vmovaps  %ymm1, %ymm2
+
+// CHECK: vmovaps  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x29,0x08]
+          vmovaps  %ymm1, (%eax)
+
+// CHECK: vmovapd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x28,0x10]
+          vmovapd  (%eax), %ymm2
+
+// CHECK: vmovapd  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x28,0xd1]
+          vmovapd  %ymm1, %ymm2
+
+// CHECK: vmovapd  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x29,0x08]
+          vmovapd  %ymm1, (%eax)
+
+// CHECK: vmovups  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x10,0x10]
+          vmovups  (%eax), %ymm2
+
+// CHECK: vmovups  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x10,0xd1]
+          vmovups  %ymm1, %ymm2
+
+// CHECK: vmovups  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x11,0x08]
+          vmovups  %ymm1, (%eax)
+
+// CHECK: vmovupd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x10,0x10]
+          vmovupd  (%eax), %ymm2
+
+// CHECK: vmovupd  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x10,0xd1]
+          vmovupd  %ymm1, %ymm2
+
+// CHECK: vmovupd  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
+          vmovupd  %ymm1, (%eax)
+
+// CHECK: vunpckhps  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x15,0xe1]
+          vunpckhps  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhpd  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x15,0xe1]
+          vunpckhpd  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklps  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x14,0xe1]
+          vunpcklps  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklpd  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x14,0xe1]
+          vunpcklpd  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc]
+          vunpckhps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc]
+          vunpckhpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc]
+          vunpcklps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc]
+          vunpcklpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vmovntdq  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0xe7,0x08]
+          vmovntdq  %ymm1, (%eax)
+
+// CHECK: vmovntpd  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x2b,0x08]
+          vmovntpd  %ymm1, (%eax)
+
+// CHECK: vmovntps  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x2b,0x08]
+          vmovntps  %ymm1, (%eax)
+
+// CHECK: vmovmskps  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+          vmovmskps  %xmm2, %eax
+
+// CHECK: vmovmskpd  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+          vmovmskpd  %xmm2, %eax
+
+// CHECK: vmaxps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2]
+          vmaxps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmaxpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2]
+          vmaxpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vminps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2]
+          vminps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vminpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2]
+          vminpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vsubps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2]
+          vsubps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vsubpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2]
+          vsubpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vdivps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2]
+          vdivps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vdivpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2]
+          vdivpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vaddps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0xf2]
+          vaddps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vaddpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0xf2]
+          vaddpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmulps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0xf2]
+          vmulps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmulpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0xf2]
+          vmulpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmaxps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
+          vmaxps  (%eax), %ymm4, %ymm6
+
+// CHECK: vmaxpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
+          vmaxpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vminps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
+          vminps  (%eax), %ymm4, %ymm6
+
+// CHECK: vminpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
+          vminpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vsubps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
+          vsubps  (%eax), %ymm4, %ymm6
+
+// CHECK: vsubpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
+          vsubpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vdivps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
+          vdivps  (%eax), %ymm4, %ymm6
+
+// CHECK: vdivpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
+          vdivpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vaddps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
+          vaddps  (%eax), %ymm4, %ymm6
+
+// CHECK: vaddpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
+          vaddpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vmulps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
+          vmulps  (%eax), %ymm4, %ymm6
+
+// CHECK: vmulpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
+          vmulpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vsqrtpd  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x51,0xd1]
+          vsqrtpd  %ymm1, %ymm2
+
+// CHECK: vsqrtpd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x51,0x10]
+          vsqrtpd  (%eax), %ymm2
+
+// CHECK: vsqrtps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x51,0xd1]
+          vsqrtps  %ymm1, %ymm2
+
+// CHECK: vsqrtps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x51,0x10]
+          vsqrtps  (%eax), %ymm2
+
+// CHECK: vrsqrtps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x52,0xd1]
+          vrsqrtps  %ymm1, %ymm2
+
+// CHECK: vrsqrtps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x52,0x10]
+          vrsqrtps  (%eax), %ymm2
+
+// CHECK: vrcpps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x53,0xd1]
+          vrcpps  %ymm1, %ymm2
+
+// CHECK: vrcpps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x53,0x10]
+          vrcpps  (%eax), %ymm2
+
+// CHECK: vandps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x54,0xf2]
+          vandps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x54,0xf2]
+          vandpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc]
+          vandps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc]
+          vandpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vorps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x56,0xf2]
+          vorps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vorpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x56,0xf2]
+          vorpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc]
+          vorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc]
+          vorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vxorps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x57,0xf2]
+          vxorps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vxorpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x57,0xf2]
+          vxorpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vxorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc]
+          vxorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vxorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc]
+          vxorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandnps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x55,0xf2]
+          vandnps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandnpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x55,0xf2]
+          vandnpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandnps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc]
+          vandnps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc]
+          vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vcvtps2pd  %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3]
+          vcvtps2pd  %xmm3, %ymm2
+
+// CHECK: vcvtps2pd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0x10]
+          vcvtps2pd  (%eax), %ymm2
+
+// CHECK: vcvtdq2pd  %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3]
+          vcvtdq2pd  %xmm3, %ymm2
+
+// CHECK: vcvtdq2pd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0x10]
+          vcvtdq2pd  (%eax), %ymm2
+
+// CHECK: vcvtdq2ps  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfc,0x5b,0xea]
+          vcvtdq2ps  %ymm2, %ymm5
+
+// CHECK: vcvtdq2ps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5b,0x10]
+          vcvtdq2ps  (%eax), %ymm2
+
+// CHECK: vcvtps2dq  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0xea]
+          vcvtps2dq  %ymm2, %ymm5
+
+// CHECK: vcvtps2dq  (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0x28]
+          vcvtps2dq  (%eax), %ymm5
+
+// CHECK: vcvttps2dq  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0xea]
+          vcvttps2dq  %ymm2, %ymm5
+
+// CHECK: vcvttps2dq  (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0x28]
+          vcvttps2dq  (%eax), %ymm5
+
+// CHECK: vcvttpd2dq  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+          vcvttpd2dq  %xmm1, %xmm5
+
+// CHECK: vcvttpd2dq  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
+          vcvttpd2dq  %ymm2, %xmm5
+
+// CHECK: vcvttpd2dqx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+          vcvttpd2dqx  %xmm1, %xmm5
+
+// CHECK: vcvttpd2dqx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0xe6,0x08]
+          vcvttpd2dqx  (%eax), %xmm1
+
+// CHECK: vcvttpd2dqy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xca]
+          vcvttpd2dqy  %ymm2, %xmm1
+
+// CHECK: vcvttpd2dqy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
+          vcvttpd2dqy  (%eax), %xmm1
+
+// CHECK: vcvtpd2ps  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
+          vcvtpd2ps  %ymm2, %xmm5
+
+// CHECK: vcvtpd2psx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
+          vcvtpd2psx  %xmm1, %xmm5
+
+// CHECK: vcvtpd2psx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x5a,0x08]
+          vcvtpd2psx  (%eax), %xmm1
+
+// CHECK: vcvtpd2psy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xca]
+          vcvtpd2psy  %ymm2, %xmm1
+
+// CHECK: vcvtpd2psy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
+          vcvtpd2psy  (%eax), %xmm1
+
+// CHECK: vcvtpd2dq  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xff,0xe6,0xea]
+          vcvtpd2dq  %ymm2, %xmm5
+
+// CHECK: vcvtpd2dqy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0xca]
+          vcvtpd2dqy  %ymm2, %xmm1
+
+// CHECK: vcvtpd2dqy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0x08]
+          vcvtpd2dqy  (%eax), %xmm1
+
+// CHECK: vcvtpd2dqx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
+          vcvtpd2dqx  %xmm1, %xmm5
+
+// CHECK: vcvtpd2dqx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfb,0xe6,0x08]
+          vcvtpd2dqx  (%eax), %xmm1
+
+// CHECK: vcmpps  $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00]
+          vcmpeqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02]
+          vcmpleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01]
+          vcmpltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04]
+          vcmpneqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06]
+          vcmpnleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05]
+          vcmpnltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07]
+          vcmpordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03]
+          vcmpunordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00]
+          vcmpeqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02]
+          vcmplepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01]
+          vcmpltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04]
+          vcmpneqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06]
+          vcmpnlepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05]
+          vcmpnltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07]
+          vcmpordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03]
+          vcmpunordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $8, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08]
+          vcmpeq_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $9, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09]
+          vcmpngeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $10, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a]
+          vcmpngtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $11, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b]
+          vcmpfalseps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $12, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c]
+          vcmpneq_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $13, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d]
+          vcmpgeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $14, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e]
+          vcmpgtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $15, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f]
+          vcmptrueps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $16, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10]
+          vcmpeq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $17, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11]
+          vcmplt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $18, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12]
+          vcmple_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $19, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13]
+          vcmpunord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $20, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14]
+          vcmpneq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $21, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15]
+          vcmpnlt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $22, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16]
+          vcmpnle_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $23, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17]
+          vcmpord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $24, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18]
+          vcmpeq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $25, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19]
+          vcmpnge_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $26, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a]
+          vcmpngt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $27, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b]
+          vcmpfalse_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $28, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c]
+          vcmpneq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $29, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d]
+          vcmpge_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $30, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e]
+          vcmpgt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $31, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
+          vcmptrue_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubps  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0xd0,0xd9]
+          vaddsubps  %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubps  (%eax), %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xf7,0xd0,0x10]
+          vaddsubps  (%eax), %ymm1, %ymm2
+
+// CHECK: vaddsubpd  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xd0,0xd9]
+          vaddsubpd  %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubpd  (%eax), %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xf5,0xd0,0x10]
+          vaddsubpd  (%eax), %ymm1, %ymm2
+
+// CHECK: vhaddps  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7c,0xd9]
+          vhaddps  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhaddps  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7c,0x18]
+          vhaddps  (%eax), %ymm2, %ymm3
+
+// CHECK: vhaddpd  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7c,0xd9]
+          vhaddpd  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhaddpd  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7c,0x18]
+          vhaddpd  (%eax), %ymm2, %ymm3
+
+// CHECK: vhsubps  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7d,0xd9]
+          vhsubps  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhsubps  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7d,0x18]
+          vhsubps  (%eax), %ymm2, %ymm3
+
+// CHECK: vhsubpd  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7d,0xd9]
+          vhsubpd  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhsubpd  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7d,0x18]
+          vhsubpd  (%eax), %ymm2, %ymm3
+
+// CHECK: vblendps  $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03]
+          vblendps  $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vblendps  $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03]
+          vblendps  $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vblendpd  $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03]
+          vblendpd  $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vblendpd  $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03]
+          vblendpd  $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vdpps  $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03]
+          vdpps  $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vdpps  $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03]
+          vdpps  $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vbroadcastf128  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10]
+          vbroadcastf128  (%eax), %ymm2
+
+// CHECK: vbroadcastsd  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10]
+          vbroadcastsd  (%eax), %ymm2
+
+// CHECK: vbroadcastss  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10]
+          vbroadcastss  (%eax), %xmm2
+
+// CHECK: vbroadcastss  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10]
+          vbroadcastss  (%eax), %ymm2
+
+// CHECK: vinsertf128  $7, %xmm2, %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07]
+          vinsertf128  $7, %xmm2, %ymm2, %ymm5
+
+// CHECK: vinsertf128  $7, (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07]
+          vinsertf128  $7, (%eax), %ymm2, %ymm5
+
+// CHECK: vextractf128  $7, %ymm2, %xmm2
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07]
+          vextractf128  $7, %ymm2, %xmm2
+
+// CHECK: vextractf128  $7, %ymm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07]
+          vextractf128  $7, %ymm2, (%eax)
+
+// CHECK: vmaskmovpd  %xmm2, %xmm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10]
+          vmaskmovpd  %xmm2, %xmm5, (%eax)
+
+// CHECK: vmaskmovpd  %ymm2, %ymm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10]
+          vmaskmovpd  %ymm2, %ymm5, (%eax)
+
+// CHECK: vmaskmovpd  (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28]
+          vmaskmovpd  (%eax), %xmm2, %xmm5
+
+// CHECK: vmaskmovpd  (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28]
+          vmaskmovpd  (%eax), %ymm2, %ymm5
+
+// CHECK: vmaskmovps  %xmm2, %xmm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10]
+          vmaskmovps  %xmm2, %xmm5, (%eax)
+
+// CHECK: vmaskmovps  %ymm2, %ymm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10]
+          vmaskmovps  %ymm2, %ymm5, (%eax)
+
+// CHECK: vmaskmovps  (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28]
+          vmaskmovps  (%eax), %xmm2, %xmm5
+
+// CHECK: vmaskmovps  (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28]
+          vmaskmovps  (%eax), %ymm2, %ymm5
+
+// CHECK: vpermilps  $7, %xmm1, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07]
+          vpermilps  $7, %xmm1, %xmm5
+
+// CHECK: vpermilps  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07]
+          vpermilps  $7, %ymm5, %ymm1
+
+// CHECK: vpermilps  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07]
+          vpermilps  $7, (%eax), %xmm5
+
+// CHECK: vpermilps  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07]
+          vpermilps  $7, (%eax), %ymm5
+
+// CHECK: vpermilps  %xmm1, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9]
+          vpermilps  %xmm1, %xmm5, %xmm1
+
+// CHECK: vpermilps  %ymm1, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9]
+          vpermilps  %ymm1, %ymm5, %ymm1
+
+// CHECK: vpermilps  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18]
+          vpermilps  (%eax), %xmm5, %xmm3
+
+// CHECK: vpermilps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08]
+          vpermilps  (%eax), %ymm5, %ymm1
+
+// CHECK: vpermilpd  $7, %xmm1, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07]
+          vpermilpd  $7, %xmm1, %xmm5
+
+// CHECK: vpermilpd  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07]
+          vpermilpd  $7, %ymm5, %ymm1
+
+// CHECK: vpermilpd  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07]
+          vpermilpd  $7, (%eax), %xmm5
+
+// CHECK: vpermilpd  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07]
+          vpermilpd  $7, (%eax), %ymm5
+
+// CHECK: vpermilpd  %xmm1, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9]
+          vpermilpd  %xmm1, %xmm5, %xmm1
+
+// CHECK: vpermilpd  %ymm1, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9]
+          vpermilpd  %ymm1, %ymm5, %ymm1
+
+// CHECK: vpermilpd  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18]
+          vpermilpd  (%eax), %xmm5, %xmm3
+
+// CHECK: vpermilpd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08]
+          vpermilpd  (%eax), %ymm5, %ymm1
+
+// CHECK: vperm2f128  $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07]
+          vperm2f128  $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vperm2f128  $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07]
+          vperm2f128  $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vzeroall
+// CHECK: encoding: [0xc5,0xfc,0x77]
+          vzeroall
+
+// CHECK: vzeroupper
+// CHECK: encoding: [0xc5,0xf8,0x77]
+          vzeroupper
+
+// CHECK: vcvtsd2si  %xmm4, %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
+          vcvtsd2si  %xmm4, %ecx
+
+// CHECK: vcvtsd2si  (%ecx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+          vcvtsd2si  (%ecx), %ecx
+
+// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
+          vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+
+// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
+          vcvtsi2sdl  (%esp), %xmm0, %xmm7
+
+// CHECK: vlddqu  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xff,0xf0,0x10]
+          vlddqu  (%eax), %ymm2
+
+// CHECK: vmovddup  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xff,0x12,0xea]
+          vmovddup  %ymm2, %ymm5
+
+// CHECK: vmovddup  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xff,0x12,0x10]
+          vmovddup  (%eax), %ymm2
+
+// CHECK: vmovdqa  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x6f,0xea]
+          vmovdqa  %ymm2, %ymm5
+
+// CHECK: vmovdqa  %ymm2, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x7f,0x10]
+          vmovdqa  %ymm2, (%eax)
+
+// CHECK: vmovdqa  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x6f,0x10]
+          vmovdqa  (%eax), %ymm2
+
+// CHECK: vmovdqu  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x6f,0xea]
+          vmovdqu  %ymm2, %ymm5
+
+// CHECK: vmovdqu  %ymm2, (%eax)
+// CHECK: encoding: [0xc5,0xfe,0x7f,0x10]
+          vmovdqu  %ymm2, (%eax)
+
+// CHECK: vmovdqu  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x6f,0x10]
+          vmovdqu  (%eax), %ymm2
+
+// CHECK: vmovshdup  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x16,0xea]
+          vmovshdup  %ymm2, %ymm5
+
+// CHECK: vmovshdup  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x16,0x10]
+          vmovshdup  (%eax), %ymm2
+
+// CHECK: vmovsldup  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x12,0xea]
+          vmovsldup  %ymm2, %ymm5
+
+// CHECK: vmovsldup  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x12,0x10]
+          vmovsldup  (%eax), %ymm2
+
+// CHECK: vptest  %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea]
+          vptest  %ymm2, %ymm5
+
+// CHECK: vptest  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10]
+          vptest  (%eax), %ymm2
+
+// CHECK: vroundpd  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07]
+          vroundpd  $7, %ymm5, %ymm1
+
+// CHECK: vroundpd  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07]
+          vroundpd  $7, (%eax), %ymm5
+
+// CHECK: vroundps  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07]
+          vroundps  $7, %ymm5, %ymm1
+
+// CHECK: vroundps  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07]
+          vroundps  $7, (%eax), %ymm5
+
+// CHECK: vshufpd  $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07]
+          vshufpd  $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vshufpd  $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07]
+          vshufpd  $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vshufps  $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07]
+          vshufps  $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vshufps  $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07]
+          vshufps  $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vtestpd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea]
+          vtestpd  %xmm2, %xmm5
+
+// CHECK: vtestpd  %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea]
+          vtestpd  %ymm2, %ymm5
+
+// CHECK: vtestpd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10]
+          vtestpd  (%eax), %xmm2
+
+// CHECK: vtestpd  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10]
+          vtestpd  (%eax), %ymm2
+
+// CHECK: vtestps  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea]
+          vtestps  %xmm2, %xmm5
+
+// CHECK: vtestps  %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea]
+          vtestps  %ymm2, %ymm5
+
+// CHECK: vtestps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10]
+          vtestps  (%eax), %xmm2
+
+// CHECK: vtestps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10]
+          vtestps  (%eax), %ymm2
+
+// CHECK: vblendvpd  %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2
+// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00]
+          vblendvpd  %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2
+
+
+
+// CHECK: vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
+          vpclmulhqhqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $17, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
+          vpclmulhqhqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $1, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01]
+          vpclmulhqlqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $1, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01]
+          vpclmulhqlqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $16, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10]
+          vpclmullqhqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $16, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10]
+          vpclmullqhqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $0, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00]
+          vpclmullqlqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $0, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00]
+          vpclmullqlqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
+          vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $17, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
+          vpclmulqdq  $17, (%eax), %xmm5, %xmm3
+
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
new file mode 100644
index 000000000000..4ec9fcdb1ee5
--- /dev/null
+++ b/test/MC/X86/x86-32-coverage.s
@@ -0,0 +1,19564 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s --show-encoding  | FileCheck %s
+
+// CHECK: 	movb	$127, 3735928559(%ebx,%ecx,8)
+        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movw	$31438, 3735928559(%ebx,%ecx,8)
+        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$324478056, 3735928559(%ebx,%ecx,8)
+        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsbl	3735928559(%ebx,%ecx,8), %ecx
+        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movswl	3735928559(%ebx,%ecx,8), %ecx
+        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzbl	3735928559(%ebx,%ecx,8), %ecx
+        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzwl	3735928559(%ebx,%ecx,8), %ecx
+        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	pushl	3735928559(%ebx,%ecx,8)
+        	pushl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	popl	3735928559(%ebx,%ecx,8)
+        	popl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	lahf
+        	lahf
+
+// CHECK: 	sahf
+        	sahf
+
+// CHECK: 	addb	$254, 3735928559(%ebx,%ecx,8)
+        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addb	$127, 3735928559(%ebx,%ecx,8)
+        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addw	$31438, 3735928559(%ebx,%ecx,8)
+        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$324478056, 3735928559(%ebx,%ecx,8)
+        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	incl	3735928559(%ebx,%ecx,8)
+        	incl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$254, 3735928559(%ebx,%ecx,8)
+        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$127, 3735928559(%ebx,%ecx,8)
+        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subw	$31438, 3735928559(%ebx,%ecx,8)
+        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$324478056, 3735928559(%ebx,%ecx,8)
+        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	decl	3735928559(%ebx,%ecx,8)
+        	decl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbw	$31438, 3735928559(%ebx,%ecx,8)
+        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$324478056, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$254, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$127, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpw	$31438, 3735928559(%ebx,%ecx,8)
+        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$324478056, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testb	$127, 3735928559(%ebx,%ecx,8)
+        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testw	$31438, 3735928559(%ebx,%ecx,8)
+        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$324478056, 3735928559(%ebx,%ecx,8)
+        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$254, 3735928559(%ebx,%ecx,8)
+        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$127, 3735928559(%ebx,%ecx,8)
+        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andw	$31438, 3735928559(%ebx,%ecx,8)
+        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$324478056, 3735928559(%ebx,%ecx,8)
+        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$254, 3735928559(%ebx,%ecx,8)
+        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$127, 3735928559(%ebx,%ecx,8)
+        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orw	$31438, 3735928559(%ebx,%ecx,8)
+        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$324478056, 3735928559(%ebx,%ecx,8)
+        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$254, 3735928559(%ebx,%ecx,8)
+        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$127, 3735928559(%ebx,%ecx,8)
+        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorw	$31438, 3735928559(%ebx,%ecx,8)
+        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$324478056, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$254, 3735928559(%ebx,%ecx,8)
+        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$127, 3735928559(%ebx,%ecx,8)
+        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcw	$31438, 3735928559(%ebx,%ecx,8)
+        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$324478056, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	negl	3735928559(%ebx,%ecx,8)
+        	negl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	notl	3735928559(%ebx,%ecx,8)
+        	notl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cbtw
+        	cbtw
+
+// CHECK: 	cwtl
+        	cwtl
+
+// CHECK: 	cwtd
+        	cwtd
+
+// CHECK: 	cltd
+        	cltd
+
+// CHECK: 	mull	3735928559(%ebx,%ecx,8)
+        	mull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	imull	3735928559(%ebx,%ecx,8)
+        	imull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	divl	3735928559(%ebx,%ecx,8)
+        	divl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	idivl	3735928559(%ebx,%ecx,8)
+        	idivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	roll	$0, 3735928559(%ebx,%ecx,8)
+        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rolb	$127, 3735928559(%ebx,%ecx,8)
+        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	roll	3735928559(%ebx,%ecx,8)
+        	roll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorl	$0, 3735928559(%ebx,%ecx,8)
+        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorb	$127, 3735928559(%ebx,%ecx,8)
+        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorl	3735928559(%ebx,%ecx,8)
+        	rorl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
+        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
+        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	3735928559(%ebx,%ecx,8)
+        	shll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrl	$0, 3735928559(%ebx,%ecx,8)
+        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrb	$127, 3735928559(%ebx,%ecx,8)
+        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrl	3735928559(%ebx,%ecx,8)
+        	shrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarl	$0, 3735928559(%ebx,%ecx,8)
+        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarb	$127, 3735928559(%ebx,%ecx,8)
+        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarl	3735928559(%ebx,%ecx,8)
+        	sarl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	*%ecx
+        	call	*%ecx
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ljmpl	*3735928559(%ebx,%ecx,8)
+        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	lret
+        	lret
+
+// CHECK: 	leave
+        	leave
+
+// CHECK: 	leave
+        	leavel
+
+// CHECK: 	seto	%bl
+        	seto	%bl
+
+// CHECK: 	seto	3735928559(%ebx,%ecx,8)
+        	seto	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setno	%bl
+        	setno	%bl
+
+// CHECK: 	setno	3735928559(%ebx,%ecx,8)
+        	setno	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setb	%bl
+        	setb	%bl
+
+// CHECK: 	setb	3735928559(%ebx,%ecx,8)
+        	setb	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setae	%bl
+        	setae	%bl
+
+// CHECK: 	setae	3735928559(%ebx,%ecx,8)
+        	setae	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sete	%bl
+        	sete	%bl
+
+// CHECK: 	sete	3735928559(%ebx,%ecx,8)
+        	sete	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setne	%bl
+        	setne	%bl
+
+// CHECK: 	setne	3735928559(%ebx,%ecx,8)
+        	setne	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setbe	%bl
+        	setbe	%bl
+
+// CHECK: 	setbe	3735928559(%ebx,%ecx,8)
+        	setbe	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	seta	%bl
+        	seta	%bl
+
+// CHECK: 	seta	3735928559(%ebx,%ecx,8)
+        	seta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sets	%bl
+        	sets	%bl
+
+// CHECK: 	sets	3735928559(%ebx,%ecx,8)
+        	sets	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setns	%bl
+        	setns	%bl
+
+// CHECK: 	setns	3735928559(%ebx,%ecx,8)
+        	setns	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setp	%bl
+        	setp	%bl
+
+// CHECK: 	setp	3735928559(%ebx,%ecx,8)
+        	setp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setnp	%bl
+        	setnp	%bl
+
+// CHECK: 	setnp	3735928559(%ebx,%ecx,8)
+        	setnp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setl	%bl
+        	setl	%bl
+
+// CHECK: 	setl	3735928559(%ebx,%ecx,8)
+        	setl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setge	%bl
+        	setge	%bl
+
+// CHECK: 	setge	3735928559(%ebx,%ecx,8)
+        	setge	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setle	%bl
+        	setle	%bl
+
+// CHECK: 	setle	3735928559(%ebx,%ecx,8)
+        	setle	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setg	%bl
+        	setg	%bl
+
+// CHECK: 	setg	3735928559(%ebx,%ecx,8)
+        	setg	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	nopl	3735928559(%ebx,%ecx,8)
+        	nopl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	nop
+        	nop
+
+// CHECK: 	fldl	3735928559(%ebx,%ecx,8)
+        	fldl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildl	3735928559(%ebx,%ecx,8)
+        	fildl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildll	3735928559(%ebx,%ecx,8)
+        	fildll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldt	3735928559(%ebx,%ecx,8)
+        	fldt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbld	3735928559(%ebx,%ecx,8)
+        	fbld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstl	3735928559(%ebx,%ecx,8)
+        	fstl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistl	3735928559(%ebx,%ecx,8)
+        	fistl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpl	3735928559(%ebx,%ecx,8)
+        	fstpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpl	3735928559(%ebx,%ecx,8)
+        	fistpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpll	3735928559(%ebx,%ecx,8)
+        	fistpll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpt	3735928559(%ebx,%ecx,8)
+        	fstpt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbstp	3735928559(%ebx,%ecx,8)
+        	fbstp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficoml	3735928559(%ebx,%ecx,8)
+        	ficoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficompl	3735928559(%ebx,%ecx,8)
+        	ficompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fucompp
+        	fucompp
+
+// CHECK: 	ftst
+        	ftst
+
+// CHECK: 	fld1
+        	fld1
+
+// CHECK: 	fldz
+        	fldz
+
+// CHECK: 	faddl	3735928559(%ebx,%ecx,8)
+        	faddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fiaddl	3735928559(%ebx,%ecx,8)
+        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubl	3735928559(%ebx,%ecx,8)
+        	fsubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubl	3735928559(%ebx,%ecx,8)
+        	fisubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubrl	3735928559(%ebx,%ecx,8)
+        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubrl	3735928559(%ebx,%ecx,8)
+        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fmull	3735928559(%ebx,%ecx,8)
+        	fmull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fimull	3735928559(%ebx,%ecx,8)
+        	fimull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivl	3735928559(%ebx,%ecx,8)
+        	fdivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivl	3735928559(%ebx,%ecx,8)
+        	fidivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivrl	3735928559(%ebx,%ecx,8)
+        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivrl	3735928559(%ebx,%ecx,8)
+        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsqrt
+        	fsqrt
+
+// CHECK: 	fsin
+        	fsin
+
+// CHECK: 	fcos
+        	fcos
+
+// CHECK: 	fchs
+        	fchs
+
+// CHECK: 	fabs
+        	fabs
+
+// CHECK: 	fldcw	3735928559(%ebx,%ecx,8)
+        	fldcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fnstcw	3735928559(%ebx,%ecx,8)
+        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rdtsc
+        	rdtsc
+
+// CHECK: 	sysenter
+        	sysenter
+
+// CHECK: 	sysexit
+        	sysexit
+
+// CHECK: 	ud2
+        	ud2
+
+// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
+        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	clflush	3735928559(%ebx,%ecx,8)
+        	clflush	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	emms
+        	emms
+
+// CHECK: 	movd	%ecx, %mm3
+        	movd	%ecx,%mm3
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %mm3
+        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movd	%ecx, %xmm5
+        	movd	%ecx,%xmm5
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %xmm5
+        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movd	%xmm5, %ecx
+        	movd	%xmm5,%ecx
+
+// CHECK: 	movd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movq	3735928559(%ebx,%ecx,8), %mm3
+        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	packssdw	%mm3, %mm3
+        	packssdw	%mm3,%mm3
+
+// CHECK: 	packssdw	%xmm5, %xmm5
+        	packssdw	%xmm5,%xmm5
+
+// CHECK: 	packsswb	%mm3, %mm3
+        	packsswb	%mm3,%mm3
+
+// CHECK: 	packsswb	%xmm5, %xmm5
+        	packsswb	%xmm5,%xmm5
+
+// CHECK: 	packuswb	%mm3, %mm3
+        	packuswb	%mm3,%mm3
+
+// CHECK: 	packuswb	%xmm5, %xmm5
+        	packuswb	%xmm5,%xmm5
+
+// CHECK: 	paddb	%mm3, %mm3
+        	paddb	%mm3,%mm3
+
+// CHECK: 	paddb	%xmm5, %xmm5
+        	paddb	%xmm5,%xmm5
+
+// CHECK: 	paddw	%mm3, %mm3
+        	paddw	%mm3,%mm3
+
+// CHECK: 	paddw	%xmm5, %xmm5
+        	paddw	%xmm5,%xmm5
+
+// CHECK: 	paddd	%mm3, %mm3
+        	paddd	%mm3,%mm3
+
+// CHECK: 	paddd	%xmm5, %xmm5
+        	paddd	%xmm5,%xmm5
+
+// CHECK: 	paddq	%mm3, %mm3
+        	paddq	%mm3,%mm3
+
+// CHECK: 	paddq	%xmm5, %xmm5
+        	paddq	%xmm5,%xmm5
+
+// CHECK: 	paddsb	%mm3, %mm3
+        	paddsb	%mm3,%mm3
+
+// CHECK: 	paddsb	%xmm5, %xmm5
+        	paddsb	%xmm5,%xmm5
+
+// CHECK: 	paddsw	%mm3, %mm3
+        	paddsw	%mm3,%mm3
+
+// CHECK: 	paddsw	%xmm5, %xmm5
+        	paddsw	%xmm5,%xmm5
+
+// CHECK: 	paddusb	%mm3, %mm3
+        	paddusb	%mm3,%mm3
+
+// CHECK: 	paddusb	%xmm5, %xmm5
+        	paddusb	%xmm5,%xmm5
+
+// CHECK: 	paddusw	%mm3, %mm3
+        	paddusw	%mm3,%mm3
+
+// CHECK: 	paddusw	%xmm5, %xmm5
+        	paddusw	%xmm5,%xmm5
+
+// CHECK: 	pand	%mm3, %mm3
+        	pand	%mm3,%mm3
+
+// CHECK: 	pand	%xmm5, %xmm5
+        	pand	%xmm5,%xmm5
+
+// CHECK: 	pandn	%mm3, %mm3
+        	pandn	%mm3,%mm3
+
+// CHECK: 	pandn	%xmm5, %xmm5
+        	pandn	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqb	%mm3, %mm3
+        	pcmpeqb	%mm3,%mm3
+
+// CHECK: 	pcmpeqb	%xmm5, %xmm5
+        	pcmpeqb	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqw	%mm3, %mm3
+        	pcmpeqw	%mm3,%mm3
+
+// CHECK: 	pcmpeqw	%xmm5, %xmm5
+        	pcmpeqw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqd	%mm3, %mm3
+        	pcmpeqd	%mm3,%mm3
+
+// CHECK: 	pcmpeqd	%xmm5, %xmm5
+        	pcmpeqd	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtb	%mm3, %mm3
+        	pcmpgtb	%mm3,%mm3
+
+// CHECK: 	pcmpgtb	%xmm5, %xmm5
+        	pcmpgtb	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtw	%mm3, %mm3
+        	pcmpgtw	%mm3,%mm3
+
+// CHECK: 	pcmpgtw	%xmm5, %xmm5
+        	pcmpgtw	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtd	%mm3, %mm3
+        	pcmpgtd	%mm3,%mm3
+
+// CHECK: 	pcmpgtd	%xmm5, %xmm5
+        	pcmpgtd	%xmm5,%xmm5
+
+// CHECK: 	pmaddwd	%mm3, %mm3
+        	pmaddwd	%mm3,%mm3
+
+// CHECK: 	pmaddwd	%xmm5, %xmm5
+        	pmaddwd	%xmm5,%xmm5
+
+// CHECK: 	pmulhw	%mm3, %mm3
+        	pmulhw	%mm3,%mm3
+
+// CHECK: 	pmulhw	%xmm5, %xmm5
+        	pmulhw	%xmm5,%xmm5
+
+// CHECK: 	pmullw	%mm3, %mm3
+        	pmullw	%mm3,%mm3
+
+// CHECK: 	pmullw	%xmm5, %xmm5
+        	pmullw	%xmm5,%xmm5
+
+// CHECK: 	por	%mm3, %mm3
+        	por	%mm3,%mm3
+
+// CHECK: 	por	%xmm5, %xmm5
+        	por	%xmm5,%xmm5
+
+// CHECK: 	psllw	%mm3, %mm3
+        	psllw	%mm3,%mm3
+
+// CHECK: 	psllw	%xmm5, %xmm5
+        	psllw	%xmm5,%xmm5
+
+// CHECK: 	psllw	$127, %mm3
+        	psllw	$0x7f,%mm3
+
+// CHECK: 	psllw	$127, %xmm5
+        	psllw	$0x7f,%xmm5
+
+// CHECK: 	pslld	%mm3, %mm3
+        	pslld	%mm3,%mm3
+
+// CHECK: 	pslld	%xmm5, %xmm5
+        	pslld	%xmm5,%xmm5
+
+// CHECK: 	pslld	$127, %mm3
+        	pslld	$0x7f,%mm3
+
+// CHECK: 	pslld	$127, %xmm5
+        	pslld	$0x7f,%xmm5
+
+// CHECK: 	psllq	%mm3, %mm3
+        	psllq	%mm3,%mm3
+
+// CHECK: 	psllq	%xmm5, %xmm5
+        	psllq	%xmm5,%xmm5
+
+// CHECK: 	psllq	$127, %mm3
+        	psllq	$0x7f,%mm3
+
+// CHECK: 	psllq	$127, %xmm5
+        	psllq	$0x7f,%xmm5
+
+// CHECK: 	psraw	%mm3, %mm3
+        	psraw	%mm3,%mm3
+
+// CHECK: 	psraw	%xmm5, %xmm5
+        	psraw	%xmm5,%xmm5
+
+// CHECK: 	psraw	$127, %mm3
+        	psraw	$0x7f,%mm3
+
+// CHECK: 	psraw	$127, %xmm5
+        	psraw	$0x7f,%xmm5
+
+// CHECK: 	psrad	%mm3, %mm3
+        	psrad	%mm3,%mm3
+
+// CHECK: 	psrad	%xmm5, %xmm5
+        	psrad	%xmm5,%xmm5
+
+// CHECK: 	psrad	$127, %mm3
+        	psrad	$0x7f,%mm3
+
+// CHECK: 	psrad	$127, %xmm5
+        	psrad	$0x7f,%xmm5
+
+// CHECK: 	psrlw	%mm3, %mm3
+        	psrlw	%mm3,%mm3
+
+// CHECK: 	psrlw	%xmm5, %xmm5
+        	psrlw	%xmm5,%xmm5
+
+// CHECK: 	psrlw	$127, %mm3
+        	psrlw	$0x7f,%mm3
+
+// CHECK: 	psrlw	$127, %xmm5
+        	psrlw	$0x7f,%xmm5
+
+// CHECK: 	psrld	%mm3, %mm3
+        	psrld	%mm3,%mm3
+
+// CHECK: 	psrld	%xmm5, %xmm5
+        	psrld	%xmm5,%xmm5
+
+// CHECK: 	psrld	$127, %mm3
+        	psrld	$0x7f,%mm3
+
+// CHECK: 	psrld	$127, %xmm5
+        	psrld	$0x7f,%xmm5
+
+// CHECK: 	psrlq	%mm3, %mm3
+        	psrlq	%mm3,%mm3
+
+// CHECK: 	psrlq	%xmm5, %xmm5
+        	psrlq	%xmm5,%xmm5
+
+// CHECK: 	psrlq	$127, %mm3
+        	psrlq	$0x7f,%mm3
+
+// CHECK: 	psrlq	$127, %xmm5
+        	psrlq	$0x7f,%xmm5
+
+// CHECK: 	psubb	%mm3, %mm3
+        	psubb	%mm3,%mm3
+
+// CHECK: 	psubb	%xmm5, %xmm5
+        	psubb	%xmm5,%xmm5
+
+// CHECK: 	psubw	%mm3, %mm3
+        	psubw	%mm3,%mm3
+
+// CHECK: 	psubw	%xmm5, %xmm5
+        	psubw	%xmm5,%xmm5
+
+// CHECK: 	psubd	%mm3, %mm3
+        	psubd	%mm3,%mm3
+
+// CHECK: 	psubd	%xmm5, %xmm5
+        	psubd	%xmm5,%xmm5
+
+// CHECK: 	psubq	%mm3, %mm3
+        	psubq	%mm3,%mm3
+
+// CHECK: 	psubq	%xmm5, %xmm5
+        	psubq	%xmm5,%xmm5
+
+// CHECK: 	psubsb	%mm3, %mm3
+        	psubsb	%mm3,%mm3
+
+// CHECK: 	psubsb	%xmm5, %xmm5
+        	psubsb	%xmm5,%xmm5
+
+// CHECK: 	psubsw	%mm3, %mm3
+        	psubsw	%mm3,%mm3
+
+// CHECK: 	psubsw	%xmm5, %xmm5
+        	psubsw	%xmm5,%xmm5
+
+// CHECK: 	psubusb	%mm3, %mm3
+        	psubusb	%mm3,%mm3
+
+// CHECK: 	psubusb	%xmm5, %xmm5
+        	psubusb	%xmm5,%xmm5
+
+// CHECK: 	psubusw	%mm3, %mm3
+        	psubusw	%mm3,%mm3
+
+// CHECK: 	psubusw	%xmm5, %xmm5
+        	psubusw	%xmm5,%xmm5
+
+// CHECK: 	punpckhbw	%mm3, %mm3
+        	punpckhbw	%mm3,%mm3
+
+// CHECK: 	punpckhbw	%xmm5, %xmm5
+        	punpckhbw	%xmm5,%xmm5
+
+// CHECK: 	punpckhwd	%mm3, %mm3
+        	punpckhwd	%mm3,%mm3
+
+// CHECK: 	punpckhwd	%xmm5, %xmm5
+        	punpckhwd	%xmm5,%xmm5
+
+// CHECK: 	punpckhdq	%mm3, %mm3
+        	punpckhdq	%mm3,%mm3
+
+// CHECK: 	punpckhdq	%xmm5, %xmm5
+        	punpckhdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklbw	%mm3, %mm3
+        	punpcklbw	%mm3,%mm3
+
+// CHECK: 	punpcklbw	%xmm5, %xmm5
+        	punpcklbw	%xmm5,%xmm5
+
+// CHECK: 	punpcklwd	%mm3, %mm3
+        	punpcklwd	%mm3,%mm3
+
+// CHECK: 	punpcklwd	%xmm5, %xmm5
+        	punpcklwd	%xmm5,%xmm5
+
+// CHECK: 	punpckldq	%mm3, %mm3
+        	punpckldq	%mm3,%mm3
+
+// CHECK: 	punpckldq	%xmm5, %xmm5
+        	punpckldq	%xmm5,%xmm5
+
+// CHECK: 	pxor	%mm3, %mm3
+        	pxor	%mm3,%mm3
+
+// CHECK: 	pxor	%xmm5, %xmm5
+        	pxor	%xmm5,%xmm5
+
+// CHECK: 	addps	%xmm5, %xmm5
+        	addps	%xmm5,%xmm5
+
+// CHECK: 	addss	%xmm5, %xmm5
+        	addss	%xmm5,%xmm5
+
+// CHECK: 	andnps	%xmm5, %xmm5
+        	andnps	%xmm5,%xmm5
+
+// CHECK: 	andps	%xmm5, %xmm5
+        	andps	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2ps	%mm3, %xmm5
+        	cvtpi2ps	%mm3,%xmm5
+
+// CHECK: 	cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtps2pi	%xmm5, %mm3
+        	cvtps2pi	%xmm5,%mm3
+
+// CHECK: 	cvtsi2ss	%ecx, %xmm5
+        	cvtsi2ss	%ecx,%xmm5
+
+// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttps2pi	%xmm5, %mm3
+        	cvttps2pi	%xmm5,%mm3
+
+// CHECK: 	cvttss2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttss2si	%xmm5, %ecx
+        	cvttss2si	%xmm5,%ecx
+
+// CHECK: 	divps	%xmm5, %xmm5
+        	divps	%xmm5,%xmm5
+
+// CHECK: 	divss	%xmm5, %xmm5
+        	divss	%xmm5,%xmm5
+
+// CHECK: 	ldmxcsr	3735928559(%ebx,%ecx,8)
+        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	maskmovq	%mm3, %mm3
+        	maskmovq	%mm3,%mm3
+
+// CHECK: 	maxps	%xmm5, %xmm5
+        	maxps	%xmm5,%xmm5
+
+// CHECK: 	maxss	%xmm5, %xmm5
+        	maxss	%xmm5,%xmm5
+
+// CHECK: 	minps	%xmm5, %xmm5
+        	minps	%xmm5,%xmm5
+
+// CHECK: 	minss	%xmm5, %xmm5
+        	minss	%xmm5,%xmm5
+
+// CHECK: 	movaps	3735928559(%ebx,%ecx,8), %xmm5
+        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movaps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movhlps	%xmm5, %xmm5
+        	movhlps	%xmm5,%xmm5
+
+// CHECK: 	movhps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlhps	%xmm5, %xmm5
+        	movlhps	%xmm5,%xmm5
+
+// CHECK: 	movlps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movmskps	%xmm5, %ecx
+        	movmskps	%xmm5,%ecx
+
+// CHECK: 	movntps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntq	%mm3, 3735928559(%ebx,%ecx,8)
+        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movss	3735928559(%ebx,%ecx,8), %xmm5
+        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movss	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movups	3735928559(%ebx,%ecx,8), %xmm5
+        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	movups	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	mulps	%xmm5, %xmm5
+        	mulps	%xmm5,%xmm5
+
+// CHECK: 	mulss	%xmm5, %xmm5
+        	mulss	%xmm5,%xmm5
+
+// CHECK: 	orps	%xmm5, %xmm5
+        	orps	%xmm5,%xmm5
+
+// CHECK: 	pavgb	%mm3, %mm3
+        	pavgb	%mm3,%mm3
+
+// CHECK: 	pavgb	%xmm5, %xmm5
+        	pavgb	%xmm5,%xmm5
+
+// CHECK: 	pavgw	%mm3, %mm3
+        	pavgw	%mm3,%mm3
+
+// CHECK: 	pavgw	%xmm5, %xmm5
+        	pavgw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsw	%mm3, %mm3
+        	pmaxsw	%mm3,%mm3
+
+// CHECK: 	pmaxsw	%xmm5, %xmm5
+        	pmaxsw	%xmm5,%xmm5
+
+// CHECK: 	pmaxub	%mm3, %mm3
+        	pmaxub	%mm3,%mm3
+
+// CHECK: 	pmaxub	%xmm5, %xmm5
+        	pmaxub	%xmm5,%xmm5
+
+// CHECK: 	pminsw	%mm3, %mm3
+        	pminsw	%mm3,%mm3
+
+// CHECK: 	pminsw	%xmm5, %xmm5
+        	pminsw	%xmm5,%xmm5
+
+// CHECK: 	pminub	%mm3, %mm3
+        	pminub	%mm3,%mm3
+
+// CHECK: 	pminub	%xmm5, %xmm5
+        	pminub	%xmm5,%xmm5
+
+// CHECK: 	pmovmskb	%mm3, %ecx
+        	pmovmskb	%mm3,%ecx
+
+// CHECK: 	pmovmskb	%xmm5, %ecx
+        	pmovmskb	%xmm5,%ecx
+
+// CHECK: 	pmulhuw	%mm3, %mm3
+        	pmulhuw	%mm3,%mm3
+
+// CHECK: 	pmulhuw	%xmm5, %xmm5
+        	pmulhuw	%xmm5,%xmm5
+
+// CHECK: 	prefetchnta	3735928559(%ebx,%ecx,8)
+        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht0	3735928559(%ebx,%ecx,8)
+        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht1	3735928559(%ebx,%ecx,8)
+        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht2	3735928559(%ebx,%ecx,8)
+        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	psadbw	%mm3, %mm3
+        	psadbw	%mm3,%mm3
+
+// CHECK: 	psadbw	%xmm5, %xmm5
+        	psadbw	%xmm5,%xmm5
+
+// CHECK: 	rcpps	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpps	%xmm5, %xmm5
+        	rcpps	%xmm5,%xmm5
+
+// CHECK: 	rcpss	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpss	%xmm5, %xmm5
+        	rcpss	%xmm5,%xmm5
+
+// CHECK: 	rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtps	%xmm5, %xmm5
+        	rsqrtps	%xmm5,%xmm5
+
+// CHECK: 	rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtss	%xmm5, %xmm5
+        	rsqrtss	%xmm5,%xmm5
+
+// CHECK: 	sqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtps	%xmm5, %xmm5
+        	sqrtps	%xmm5,%xmm5
+
+// CHECK: 	sqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtss	%xmm5, %xmm5
+        	sqrtss	%xmm5,%xmm5
+
+// CHECK: 	stmxcsr	3735928559(%ebx,%ecx,8)
+        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subps	%xmm5, %xmm5
+        	subps	%xmm5,%xmm5
+
+// CHECK: 	subss	%xmm5, %xmm5
+        	subss	%xmm5,%xmm5
+
+// CHECK: 	ucomiss	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomiss	%xmm5, %xmm5
+        	ucomiss	%xmm5,%xmm5
+
+// CHECK: 	unpckhps	%xmm5, %xmm5
+        	unpckhps	%xmm5,%xmm5
+
+// CHECK: 	unpcklps	%xmm5, %xmm5
+        	unpcklps	%xmm5,%xmm5
+
+// CHECK: 	xorps	%xmm5, %xmm5
+        	xorps	%xmm5,%xmm5
+
+// CHECK: 	addpd	%xmm5, %xmm5
+        	addpd	%xmm5,%xmm5
+
+// CHECK: 	addsd	%xmm5, %xmm5
+        	addsd	%xmm5,%xmm5
+
+// CHECK: 	andnpd	%xmm5, %xmm5
+        	andnpd	%xmm5,%xmm5
+
+// CHECK: 	andpd	%xmm5, %xmm5
+        	andpd	%xmm5,%xmm5
+
+// CHECK: 	comisd	3735928559(%ebx,%ecx,8), %xmm5
+        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	comisd	%xmm5, %xmm5
+        	comisd	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2pd	%mm3, %xmm5
+        	cvtpi2pd	%mm3,%xmm5
+
+// CHECK: 	cvtsi2sd	%ecx, %xmm5
+        	cvtsi2sd	%ecx,%xmm5
+
+// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divpd	%xmm5, %xmm5
+        	divpd	%xmm5,%xmm5
+
+// CHECK: 	divsd	%xmm5, %xmm5
+        	divsd	%xmm5,%xmm5
+
+// CHECK: 	maxpd	%xmm5, %xmm5
+        	maxpd	%xmm5,%xmm5
+
+// CHECK: 	maxsd	%xmm5, %xmm5
+        	maxsd	%xmm5,%xmm5
+
+// CHECK: 	minpd	%xmm5, %xmm5
+        	minpd	%xmm5,%xmm5
+
+// CHECK: 	minsd	%xmm5, %xmm5
+        	minsd	%xmm5,%xmm5
+
+// CHECK: 	movapd	3735928559(%ebx,%ecx,8), %xmm5
+        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movapd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movmskpd	%xmm5, %ecx
+        	movmskpd	%xmm5,%ecx
+
+// CHECK: 	movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsd	3735928559(%ebx,%ecx,8), %xmm5
+        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movsd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movupd	3735928559(%ebx,%ecx,8), %xmm5
+        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	movupd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	mulpd	%xmm5, %xmm5
+        	mulpd	%xmm5,%xmm5
+
+// CHECK: 	mulsd	%xmm5, %xmm5
+        	mulsd	%xmm5,%xmm5
+
+// CHECK: 	orpd	%xmm5, %xmm5
+        	orpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtpd	%xmm5, %xmm5
+        	sqrtpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtsd	%xmm5, %xmm5
+        	sqrtsd	%xmm5,%xmm5
+
+// CHECK: 	subpd	%xmm5, %xmm5
+        	subpd	%xmm5,%xmm5
+
+// CHECK: 	subsd	%xmm5, %xmm5
+        	subsd	%xmm5,%xmm5
+
+// CHECK: 	ucomisd	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomisd	%xmm5, %xmm5
+        	ucomisd	%xmm5,%xmm5
+
+// CHECK: 	unpckhpd	%xmm5, %xmm5
+        	unpckhpd	%xmm5,%xmm5
+
+// CHECK: 	unpcklpd	%xmm5, %xmm5
+        	unpcklpd	%xmm5,%xmm5
+
+// CHECK: 	xorpd	%xmm5, %xmm5
+        	xorpd	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2pd	%xmm5, %xmm5
+        	cvtdq2pd	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpd2dq	%xmm5, %xmm5
+        	cvtpd2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2ps	%xmm5, %xmm5
+        	cvtdq2ps	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtpd2pi	%xmm5, %mm3
+        	cvtpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtps2dq	%xmm5, %xmm5
+        	cvtps2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsd2ss	%xmm5, %xmm5
+        	cvtsd2ss	%xmm5,%xmm5
+
+// CHECK: 	cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtss2sd	%xmm5, %xmm5
+        	cvtss2sd	%xmm5,%xmm5
+
+// CHECK: 	cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttpd2pi	%xmm5, %mm3
+        	cvttpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttsd2si	%xmm5, %ecx
+        	cvttsd2si	%xmm5,%ecx
+
+// CHECK: 	maskmovdqu	%xmm5, %xmm5
+        	maskmovdqu	%xmm5,%xmm5
+
+// CHECK: 	movdqa	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqu	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdq2q	%xmm5, %mm3
+        	movdq2q	%xmm5,%mm3
+
+// CHECK: 	movq2dq	%mm3, %xmm5
+        	movq2dq	%mm3,%xmm5
+
+// CHECK: 	pmuludq	%mm3, %mm3
+        	pmuludq	%mm3,%mm3
+
+// CHECK: 	pmuludq	%xmm5, %xmm5
+        	pmuludq	%xmm5,%xmm5
+
+// CHECK: 	pslldq	$127, %xmm5
+        	pslldq	$0x7f,%xmm5
+
+// CHECK: 	psrldq	$127, %xmm5
+        	psrldq	$0x7f,%xmm5
+
+// CHECK: 	punpckhqdq	%xmm5, %xmm5
+        	punpckhqdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklqdq	%xmm5, %xmm5
+        	punpcklqdq	%xmm5,%xmm5
+
+// CHECK: 	addsubpd	%xmm5, %xmm5
+        	addsubpd	%xmm5,%xmm5
+
+// CHECK: 	addsubps	%xmm5, %xmm5
+        	addsubps	%xmm5,%xmm5
+
+// CHECK: 	haddpd	%xmm5, %xmm5
+        	haddpd	%xmm5,%xmm5
+
+// CHECK: 	haddps	%xmm5, %xmm5
+        	haddps	%xmm5,%xmm5
+
+// CHECK: 	hsubpd	%xmm5, %xmm5
+        	hsubpd	%xmm5,%xmm5
+
+// CHECK: 	hsubps	%xmm5, %xmm5
+        	hsubps	%xmm5,%xmm5
+
+// CHECK: 	lddqu	3735928559(%ebx,%ecx,8), %xmm5
+        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movddup	3735928559(%ebx,%ecx,8), %xmm5
+        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movddup	%xmm5, %xmm5
+        	movddup	%xmm5,%xmm5
+
+// CHECK: 	movshdup	3735928559(%ebx,%ecx,8), %xmm5
+        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movshdup	%xmm5, %xmm5
+        	movshdup	%xmm5,%xmm5
+
+// CHECK: 	movsldup	3735928559(%ebx,%ecx,8), %xmm5
+        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsldup	%xmm5, %xmm5
+        	movsldup	%xmm5,%xmm5
+
+// CHECK: 	phaddw	%mm3, %mm3
+        	phaddw	%mm3,%mm3
+
+// CHECK: 	phaddw	%xmm5, %xmm5
+        	phaddw	%xmm5,%xmm5
+
+// CHECK: 	phaddd	%mm3, %mm3
+        	phaddd	%mm3,%mm3
+
+// CHECK: 	phaddd	%xmm5, %xmm5
+        	phaddd	%xmm5,%xmm5
+
+// CHECK: 	phaddsw	%mm3, %mm3
+        	phaddsw	%mm3,%mm3
+
+// CHECK: 	phaddsw	%xmm5, %xmm5
+        	phaddsw	%xmm5,%xmm5
+
+// CHECK: 	phsubw	%mm3, %mm3
+        	phsubw	%mm3,%mm3
+
+// CHECK: 	phsubw	%xmm5, %xmm5
+        	phsubw	%xmm5,%xmm5
+
+// CHECK: 	phsubd	%mm3, %mm3
+        	phsubd	%mm3,%mm3
+
+// CHECK: 	phsubd	%xmm5, %xmm5
+        	phsubd	%xmm5,%xmm5
+
+// CHECK: 	phsubsw	%mm3, %mm3
+        	phsubsw	%mm3,%mm3
+
+// CHECK: 	phsubsw	%xmm5, %xmm5
+        	phsubsw	%xmm5,%xmm5
+
+// CHECK: 	pmaddubsw	%mm3, %mm3
+        	pmaddubsw	%mm3,%mm3
+
+// CHECK: 	pmaddubsw	%xmm5, %xmm5
+        	pmaddubsw	%xmm5,%xmm5
+
+// CHECK: 	pmulhrsw	%mm3, %mm3
+        	pmulhrsw	%mm3,%mm3
+
+// CHECK: 	pmulhrsw	%xmm5, %xmm5
+        	pmulhrsw	%xmm5,%xmm5
+
+// CHECK: 	pshufb	%mm3, %mm3
+        	pshufb	%mm3,%mm3
+
+// CHECK: 	pshufb	%xmm5, %xmm5
+        	pshufb	%xmm5,%xmm5
+
+// CHECK: 	psignb	%mm3, %mm3
+        	psignb	%mm3,%mm3
+
+// CHECK: 	psignb	%xmm5, %xmm5
+        	psignb	%xmm5,%xmm5
+
+// CHECK: 	psignw	%mm3, %mm3
+        	psignw	%mm3,%mm3
+
+// CHECK: 	psignw	%xmm5, %xmm5
+        	psignw	%xmm5,%xmm5
+
+// CHECK: 	psignd	%mm3, %mm3
+        	psignd	%mm3,%mm3
+
+// CHECK: 	psignd	%xmm5, %xmm5
+        	psignd	%xmm5,%xmm5
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %mm3
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsb	%mm3, %mm3
+        	pabsb	%mm3,%mm3
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsb	%xmm5, %xmm5
+        	pabsb	%xmm5,%xmm5
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %mm3
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsw	%mm3, %mm3
+        	pabsw	%mm3,%mm3
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsw	%xmm5, %xmm5
+        	pabsw	%xmm5,%xmm5
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %mm3
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsd	%mm3, %mm3
+        	pabsd	%mm3,%mm3
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsd	%xmm5, %xmm5
+        	pabsd	%xmm5,%xmm5
+
+// CHECK: 	femms
+        	femms
+
+// CHECK: 	packusdw	%xmm5, %xmm5
+        	packusdw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqq	%xmm5, %xmm5
+        	pcmpeqq	%xmm5,%xmm5
+
+// CHECK: 	phminposuw	3735928559(%ebx,%ecx,8), %xmm5
+        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phminposuw	%xmm5, %xmm5
+        	phminposuw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsb	%xmm5, %xmm5
+        	pmaxsb	%xmm5,%xmm5
+
+// CHECK: 	pmaxsd	%xmm5, %xmm5
+        	pmaxsd	%xmm5,%xmm5
+
+// CHECK: 	pmaxud	%xmm5, %xmm5
+        	pmaxud	%xmm5,%xmm5
+
+// CHECK: 	pmaxuw	%xmm5, %xmm5
+        	pmaxuw	%xmm5,%xmm5
+
+// CHECK: 	pminsb	%xmm5, %xmm5
+        	pminsb	%xmm5,%xmm5
+
+// CHECK: 	pminsd	%xmm5, %xmm5
+        	pminsd	%xmm5,%xmm5
+
+// CHECK: 	pminud	%xmm5, %xmm5
+        	pminud	%xmm5,%xmm5
+
+// CHECK: 	pminuw	%xmm5, %xmm5
+        	pminuw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbw	%xmm5, %xmm5
+        	pmovsxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbd	%xmm5, %xmm5
+        	pmovsxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbq	%xmm5, %xmm5
+        	pmovsxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwd	%xmm5, %xmm5
+        	pmovsxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwq	%xmm5, %xmm5
+        	pmovsxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxdq	%xmm5, %xmm5
+        	pmovsxdq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbw	%xmm5, %xmm5
+        	pmovzxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbd	%xmm5, %xmm5
+        	pmovzxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbq	%xmm5, %xmm5
+        	pmovzxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwd	%xmm5, %xmm5
+        	pmovzxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwq	%xmm5, %xmm5
+        	pmovzxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxdq	%xmm5, %xmm5
+        	pmovzxdq	%xmm5,%xmm5
+
+// CHECK: 	pmuldq	%xmm5, %xmm5
+        	pmuldq	%xmm5,%xmm5
+
+// CHECK: 	pmulld	%xmm5, %xmm5
+        	pmulld	%xmm5,%xmm5
+
+// CHECK: 	ptest 	3735928559(%ebx,%ecx,8), %xmm5
+        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ptest 	%xmm5, %xmm5
+        	ptest	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtq	%xmm5, %xmm5
+        	pcmpgtq	%xmm5,%xmm5
+
+
+// CHECK: movb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc6,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movb	$127, 69
+// CHECK:  encoding: [0xc6,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	movb	$0x7f,0x45
+
+// CHECK: movb	$127, 32493
+// CHECK:  encoding: [0xc6,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	movb	$0x7f,0x7eed
+
+// CHECK: movb	$127, 3133065982
+// CHECK:  encoding: [0xc6,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	movb	$0x7f,0xbabecafe
+
+// CHECK: movb	$127, 305419896
+// CHECK:  encoding: [0xc6,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	movb	$0x7f,0x12345678
+
+// CHECK: movw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movw	$31438, 69
+// CHECK:  encoding: [0x66,0xc7,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	movw	$0x7ace,0x45
+
+// CHECK: movw	$31438, 32493
+// CHECK:  encoding: [0x66,0xc7,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	movw	$0x7ace,0x7eed
+
+// CHECK: movw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0xc7,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	movw	$0x7ace,0xbabecafe
+
+// CHECK: movw	$31438, 305419896
+// CHECK:  encoding: [0x66,0xc7,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	movw	$0x7ace,0x12345678
+
+// CHECK: movl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movl	$2063514302, 69
+// CHECK:  encoding: [0xc7,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0x45
+
+// CHECK: movl	$2063514302, 32493
+// CHECK:  encoding: [0xc7,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0x7eed
+
+// CHECK: movl	$2063514302, 3133065982
+// CHECK:  encoding: [0xc7,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0xbabecafe
+
+// CHECK: movl	$2063514302, 305419896
+// CHECK:  encoding: [0xc7,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0x12345678
+
+// CHECK: movl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movl	$324478056, 69
+// CHECK:  encoding: [0xc7,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0x45
+
+// CHECK: movl	$324478056, 32493
+// CHECK:  encoding: [0xc7,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0x7eed
+
+// CHECK: movl	$324478056, 3133065982
+// CHECK:  encoding: [0xc7,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0xbabecafe
+
+// CHECK: movl	$324478056, 305419896
+// CHECK:  encoding: [0xc7,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0x12345678
+
+// CHECK: movsbl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movsbl	69, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0x45,0x00,0x00,0x00]
+        	movsbl	0x45,%ecx
+
+// CHECK: movsbl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0xed,0x7e,0x00,0x00]
+        	movsbl	0x7eed,%ecx
+
+// CHECK: movsbl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0xfe,0xca,0xbe,0xba]
+        	movsbl	0xbabecafe,%ecx
+
+// CHECK: movsbl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0x78,0x56,0x34,0x12]
+        	movsbl	0x12345678,%ecx
+
+// CHECK: movsbw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movsbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: movsbw	69, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0x45,0x00,0x00,0x00]
+        	movsbw	0x45,%bx
+
+// CHECK: movsbw	32493, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0xed,0x7e,0x00,0x00]
+        	movsbw	0x7eed,%bx
+
+// CHECK: movsbw	3133065982, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0xfe,0xca,0xbe,0xba]
+        	movsbw	0xbabecafe,%bx
+
+// CHECK: movsbw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0x78,0x56,0x34,0x12]
+        	movsbw	0x12345678,%bx
+
+// CHECK: movswl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movswl	69, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0x45,0x00,0x00,0x00]
+        	movswl	0x45,%ecx
+
+// CHECK: movswl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0xed,0x7e,0x00,0x00]
+        	movswl	0x7eed,%ecx
+
+// CHECK: movswl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0xfe,0xca,0xbe,0xba]
+        	movswl	0xbabecafe,%ecx
+
+// CHECK: movswl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0x78,0x56,0x34,0x12]
+        	movswl	0x12345678,%ecx
+
+// CHECK: movzbl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movzbl	69, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0x45,0x00,0x00,0x00]
+        	movzbl	0x45,%ecx
+
+// CHECK: movzbl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0xed,0x7e,0x00,0x00]
+        	movzbl	0x7eed,%ecx
+
+// CHECK: movzbl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0xfe,0xca,0xbe,0xba]
+        	movzbl	0xbabecafe,%ecx
+
+// CHECK: movzbl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0x78,0x56,0x34,0x12]
+        	movzbl	0x12345678,%ecx
+
+// CHECK: movzbw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movzbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: movzbw	69, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0x45,0x00,0x00,0x00]
+        	movzbw	0x45,%bx
+
+// CHECK: movzbw	32493, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0xed,0x7e,0x00,0x00]
+        	movzbw	0x7eed,%bx
+
+// CHECK: movzbw	3133065982, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0xfe,0xca,0xbe,0xba]
+        	movzbw	0xbabecafe,%bx
+
+// CHECK: movzbw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0x78,0x56,0x34,0x12]
+        	movzbw	0x12345678,%bx
+
+// CHECK: movzwl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movzwl	69, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0x45,0x00,0x00,0x00]
+        	movzwl	0x45,%ecx
+
+// CHECK: movzwl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0xed,0x7e,0x00,0x00]
+        	movzwl	0x7eed,%ecx
+
+// CHECK: movzwl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0xfe,0xca,0xbe,0xba]
+        	movzwl	0xbabecafe,%ecx
+
+// CHECK: movzwl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0x78,0x56,0x34,0x12]
+        	movzwl	0x12345678,%ecx
+
+// CHECK: pushl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	pushl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: pushw	32493
+// CHECK:  encoding: [0x66,0xff,0x35,0xed,0x7e,0x00,0x00]
+        	pushw	0x7eed
+
+// CHECK: pushl	3133065982
+// CHECK:  encoding: [0xff,0x35,0xfe,0xca,0xbe,0xba]
+        	pushl	0xbabecafe
+
+// CHECK: pushl	305419896
+// CHECK:  encoding: [0xff,0x35,0x78,0x56,0x34,0x12]
+        	pushl	0x12345678
+
+// CHECK: popl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x8f,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	popl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: popw	32493
+// CHECK:  encoding: [0x66,0x8f,0x05,0xed,0x7e,0x00,0x00]
+        	popw	0x7eed
+
+// CHECK: popl	3133065982
+// CHECK:  encoding: [0x8f,0x05,0xfe,0xca,0xbe,0xba]
+        	popl	0xbabecafe
+
+// CHECK: popl	305419896
+// CHECK:  encoding: [0x8f,0x05,0x78,0x56,0x34,0x12]
+        	popl	0x12345678
+
+// CHECK: clc
+// CHECK:  encoding: [0xf8]
+        	clc
+
+// CHECK: cld
+// CHECK:  encoding: [0xfc]
+        	cld
+
+// CHECK: cli
+// CHECK:  encoding: [0xfa]
+        	cli
+
+// CHECK: clts
+// CHECK:  encoding: [0x0f,0x06]
+        	clts
+
+// CHECK: cmc
+// CHECK:  encoding: [0xf5]
+        	cmc
+
+// CHECK: lahf
+// CHECK:  encoding: [0x9f]
+        	lahf
+
+// CHECK: sahf
+// CHECK:  encoding: [0x9e]
+        	sahf
+
+// CHECK: stc
+// CHECK:  encoding: [0xf9]
+        	stc
+
+// CHECK: std
+// CHECK:  encoding: [0xfd]
+        	std
+
+// CHECK: sti
+// CHECK:  encoding: [0xfb]
+        	sti
+
+// CHECK: addb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x84,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addb	$254, 69
+// CHECK:  encoding: [0x80,0x05,0x45,0x00,0x00,0x00,0xfe]
+        	addb	$0xfe,0x45
+
+// CHECK: addb	$254, 32493
+// CHECK:  encoding: [0x80,0x05,0xed,0x7e,0x00,0x00,0xfe]
+        	addb	$0xfe,0x7eed
+
+// CHECK: addb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x05,0xfe,0xca,0xbe,0xba,0xfe]
+        	addb	$0xfe,0xbabecafe
+
+// CHECK: addb	$254, 305419896
+// CHECK:  encoding: [0x80,0x05,0x78,0x56,0x34,0x12,0xfe]
+        	addb	$0xfe,0x12345678
+
+// CHECK: addb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addb	$127, 69
+// CHECK:  encoding: [0x80,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	addb	$0x7f,0x45
+
+// CHECK: addb	$127, 32493
+// CHECK:  encoding: [0x80,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	addb	$0x7f,0x7eed
+
+// CHECK: addb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	addb	$0x7f,0xbabecafe
+
+// CHECK: addb	$127, 305419896
+// CHECK:  encoding: [0x80,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	addb	$0x7f,0x12345678
+
+// CHECK: addw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	addw	$0x7ace,0x45
+
+// CHECK: addw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	addw	$0x7ace,0x7eed
+
+// CHECK: addw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	addw	$0x7ace,0xbabecafe
+
+// CHECK: addw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	addw	$0x7ace,0x12345678
+
+// CHECK: addl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0x45
+
+// CHECK: addl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0x7eed
+
+// CHECK: addl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0xbabecafe
+
+// CHECK: addl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0x12345678
+
+// CHECK: addl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addl	$324478056, 69
+// CHECK:  encoding: [0x81,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0x45
+
+// CHECK: addl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0x7eed
+
+// CHECK: addl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0xbabecafe
+
+// CHECK: addl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0x12345678
+
+// CHECK: incl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	incl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: incw	32493
+// CHECK:  encoding: [0x66,0xff,0x05,0xed,0x7e,0x00,0x00]
+        	incw	0x7eed
+
+// CHECK: incl	3133065982
+// CHECK:  encoding: [0xff,0x05,0xfe,0xca,0xbe,0xba]
+        	incl	0xbabecafe
+
+// CHECK: incl	305419896
+// CHECK:  encoding: [0xff,0x05,0x78,0x56,0x34,0x12]
+        	incl	0x12345678
+
+// CHECK: subb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xac,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subb	$254, 69
+// CHECK:  encoding: [0x80,0x2d,0x45,0x00,0x00,0x00,0xfe]
+        	subb	$0xfe,0x45
+
+// CHECK: subb	$254, 32493
+// CHECK:  encoding: [0x80,0x2d,0xed,0x7e,0x00,0x00,0xfe]
+        	subb	$0xfe,0x7eed
+
+// CHECK: subb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x2d,0xfe,0xca,0xbe,0xba,0xfe]
+        	subb	$0xfe,0xbabecafe
+
+// CHECK: subb	$254, 305419896
+// CHECK:  encoding: [0x80,0x2d,0x78,0x56,0x34,0x12,0xfe]
+        	subb	$0xfe,0x12345678
+
+// CHECK: subb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xac,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subb	$127, 69
+// CHECK:  encoding: [0x80,0x2d,0x45,0x00,0x00,0x00,0x7f]
+        	subb	$0x7f,0x45
+
+// CHECK: subb	$127, 32493
+// CHECK:  encoding: [0x80,0x2d,0xed,0x7e,0x00,0x00,0x7f]
+        	subb	$0x7f,0x7eed
+
+// CHECK: subb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x2d,0xfe,0xca,0xbe,0xba,0x7f]
+        	subb	$0x7f,0xbabecafe
+
+// CHECK: subb	$127, 305419896
+// CHECK:  encoding: [0x80,0x2d,0x78,0x56,0x34,0x12,0x7f]
+        	subb	$0x7f,0x12345678
+
+// CHECK: subw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x2d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	subw	$0x7ace,0x45
+
+// CHECK: subw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x2d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	subw	$0x7ace,0x7eed
+
+// CHECK: subw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x2d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	subw	$0x7ace,0xbabecafe
+
+// CHECK: subw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x2d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	subw	$0x7ace,0x12345678
+
+// CHECK: subl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x2d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0x45
+
+// CHECK: subl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x2d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0x7eed
+
+// CHECK: subl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x2d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0xbabecafe
+
+// CHECK: subl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x2d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0x12345678
+
+// CHECK: subl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subl	$324478056, 69
+// CHECK:  encoding: [0x81,0x2d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0x45
+
+// CHECK: subl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x2d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0x7eed
+
+// CHECK: subl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x2d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0xbabecafe
+
+// CHECK: subl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x2d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0x12345678
+
+// CHECK: decl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	decl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: decw	32493
+// CHECK:  encoding: [0x66,0xff,0x0d,0xed,0x7e,0x00,0x00]
+        	decw	0x7eed
+
+// CHECK: decl	3133065982
+// CHECK:  encoding: [0xff,0x0d,0xfe,0xca,0xbe,0xba]
+        	decl	0xbabecafe
+
+// CHECK: decl	305419896
+// CHECK:  encoding: [0xff,0x0d,0x78,0x56,0x34,0x12]
+        	decl	0x12345678
+
+// CHECK: sbbb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	sbbb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbb	$254, 69
+// CHECK:  encoding: [0x80,0x1d,0x45,0x00,0x00,0x00,0xfe]
+        	sbbb	$0xfe,0x45
+
+// CHECK: sbbb	$254, 32493
+// CHECK:  encoding: [0x80,0x1d,0xed,0x7e,0x00,0x00,0xfe]
+        	sbbb	$0xfe,0x7eed
+
+// CHECK: sbbb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x1d,0xfe,0xca,0xbe,0xba,0xfe]
+        	sbbb	$0xfe,0xbabecafe
+
+// CHECK: sbbb	$254, 305419896
+// CHECK:  encoding: [0x80,0x1d,0x78,0x56,0x34,0x12,0xfe]
+        	sbbb	$0xfe,0x12345678
+
+// CHECK: sbbb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x9c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	sbbb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbb	$127, 69
+// CHECK:  encoding: [0x80,0x1d,0x45,0x00,0x00,0x00,0x7f]
+        	sbbb	$0x7f,0x45
+
+// CHECK: sbbb	$127, 32493
+// CHECK:  encoding: [0x80,0x1d,0xed,0x7e,0x00,0x00,0x7f]
+        	sbbb	$0x7f,0x7eed
+
+// CHECK: sbbb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x1d,0xfe,0xca,0xbe,0xba,0x7f]
+        	sbbb	$0x7f,0xbabecafe
+
+// CHECK: sbbb	$127, 305419896
+// CHECK:  encoding: [0x80,0x1d,0x78,0x56,0x34,0x12,0x7f]
+        	sbbb	$0x7f,0x12345678
+
+// CHECK: sbbw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x1d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	sbbw	$0x7ace,0x45
+
+// CHECK: sbbw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x1d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	sbbw	$0x7ace,0x7eed
+
+// CHECK: sbbw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x1d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	sbbw	$0x7ace,0xbabecafe
+
+// CHECK: sbbw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x1d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	sbbw	$0x7ace,0x12345678
+
+// CHECK: sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x1d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0x45
+
+// CHECK: sbbl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x1d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0x7eed
+
+// CHECK: sbbl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x1d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0xbabecafe
+
+// CHECK: sbbl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x1d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0x12345678
+
+// CHECK: sbbl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbl	$324478056, 69
+// CHECK:  encoding: [0x81,0x1d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0x45
+
+// CHECK: sbbl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x1d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0x7eed
+
+// CHECK: sbbl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x1d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0xbabecafe
+
+// CHECK: sbbl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x1d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0x12345678
+
+// CHECK: cmpb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpb	$254, 69
+// CHECK:  encoding: [0x80,0x3d,0x45,0x00,0x00,0x00,0xfe]
+        	cmpb	$0xfe,0x45
+
+// CHECK: cmpb	$254, 32493
+// CHECK:  encoding: [0x80,0x3d,0xed,0x7e,0x00,0x00,0xfe]
+        	cmpb	$0xfe,0x7eed
+
+// CHECK: cmpb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x3d,0xfe,0xca,0xbe,0xba,0xfe]
+        	cmpb	$0xfe,0xbabecafe
+
+// CHECK: cmpb	$254, 305419896
+// CHECK:  encoding: [0x80,0x3d,0x78,0x56,0x34,0x12,0xfe]
+        	cmpb	$0xfe,0x12345678
+
+// CHECK: cmpb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpb	$127, 69
+// CHECK:  encoding: [0x80,0x3d,0x45,0x00,0x00,0x00,0x7f]
+        	cmpb	$0x7f,0x45
+
+// CHECK: cmpb	$127, 32493
+// CHECK:  encoding: [0x80,0x3d,0xed,0x7e,0x00,0x00,0x7f]
+        	cmpb	$0x7f,0x7eed
+
+// CHECK: cmpb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x3d,0xfe,0xca,0xbe,0xba,0x7f]
+        	cmpb	$0x7f,0xbabecafe
+
+// CHECK: cmpb	$127, 305419896
+// CHECK:  encoding: [0x80,0x3d,0x78,0x56,0x34,0x12,0x7f]
+        	cmpb	$0x7f,0x12345678
+
+// CHECK: cmpw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x3d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	cmpw	$0x7ace,0x45
+
+// CHECK: cmpw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x3d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	cmpw	$0x7ace,0x7eed
+
+// CHECK: cmpw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x3d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	cmpw	$0x7ace,0xbabecafe
+
+// CHECK: cmpw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x3d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	cmpw	$0x7ace,0x12345678
+
+// CHECK: cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x3d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0x45
+
+// CHECK: cmpl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x3d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0x7eed
+
+// CHECK: cmpl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x3d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0xbabecafe
+
+// CHECK: cmpl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x3d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0x12345678
+
+// CHECK: cmpl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpl	$324478056, 69
+// CHECK:  encoding: [0x81,0x3d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0x45
+
+// CHECK: cmpl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x3d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0x7eed
+
+// CHECK: cmpl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x3d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0xbabecafe
+
+// CHECK: cmpl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x3d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0x12345678
+
+// CHECK: testb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf6,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testb	$127, 69
+// CHECK:  encoding: [0xf6,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	testb	$0x7f,0x45
+
+// CHECK: testb	$127, 32493
+// CHECK:  encoding: [0xf6,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	testb	$0x7f,0x7eed
+
+// CHECK: testb	$127, 3133065982
+// CHECK:  encoding: [0xf6,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	testb	$0x7f,0xbabecafe
+
+// CHECK: testb	$127, 305419896
+// CHECK:  encoding: [0xf6,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	testb	$0x7f,0x12345678
+
+// CHECK: testw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testw	$31438, 69
+// CHECK:  encoding: [0x66,0xf7,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	testw	$0x7ace,0x45
+
+// CHECK: testw	$31438, 32493
+// CHECK:  encoding: [0x66,0xf7,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	testw	$0x7ace,0x7eed
+
+// CHECK: testw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0xf7,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	testw	$0x7ace,0xbabecafe
+
+// CHECK: testw	$31438, 305419896
+// CHECK:  encoding: [0x66,0xf7,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	testw	$0x7ace,0x12345678
+
+// CHECK: testl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testl	$2063514302, 69
+// CHECK:  encoding: [0xf7,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0x45
+
+// CHECK: testl	$2063514302, 32493
+// CHECK:  encoding: [0xf7,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0x7eed
+
+// CHECK: testl	$2063514302, 3133065982
+// CHECK:  encoding: [0xf7,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0xbabecafe
+
+// CHECK: testl	$2063514302, 305419896
+// CHECK:  encoding: [0xf7,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0x12345678
+
+// CHECK: testl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testl	$324478056, 69
+// CHECK:  encoding: [0xf7,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0x45
+
+// CHECK: testl	$324478056, 32493
+// CHECK:  encoding: [0xf7,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0x7eed
+
+// CHECK: testl	$324478056, 3133065982
+// CHECK:  encoding: [0xf7,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0xbabecafe
+
+// CHECK: testl	$324478056, 305419896
+// CHECK:  encoding: [0xf7,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0x12345678
+
+// CHECK: andb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andb	$254, 69
+// CHECK:  encoding: [0x80,0x25,0x45,0x00,0x00,0x00,0xfe]
+        	andb	$0xfe,0x45
+
+// CHECK: andb	$254, 32493
+// CHECK:  encoding: [0x80,0x25,0xed,0x7e,0x00,0x00,0xfe]
+        	andb	$0xfe,0x7eed
+
+// CHECK: andb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x25,0xfe,0xca,0xbe,0xba,0xfe]
+        	andb	$0xfe,0xbabecafe
+
+// CHECK: andb	$254, 305419896
+// CHECK:  encoding: [0x80,0x25,0x78,0x56,0x34,0x12,0xfe]
+        	andb	$0xfe,0x12345678
+
+// CHECK: andb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andb	$127, 69
+// CHECK:  encoding: [0x80,0x25,0x45,0x00,0x00,0x00,0x7f]
+        	andb	$0x7f,0x45
+
+// CHECK: andb	$127, 32493
+// CHECK:  encoding: [0x80,0x25,0xed,0x7e,0x00,0x00,0x7f]
+        	andb	$0x7f,0x7eed
+
+// CHECK: andb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x25,0xfe,0xca,0xbe,0xba,0x7f]
+        	andb	$0x7f,0xbabecafe
+
+// CHECK: andb	$127, 305419896
+// CHECK:  encoding: [0x80,0x25,0x78,0x56,0x34,0x12,0x7f]
+        	andb	$0x7f,0x12345678
+
+// CHECK: andw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x25,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	andw	$0x7ace,0x45
+
+// CHECK: andw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x25,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	andw	$0x7ace,0x7eed
+
+// CHECK: andw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x25,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	andw	$0x7ace,0xbabecafe
+
+// CHECK: andw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x25,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	andw	$0x7ace,0x12345678
+
+// CHECK: andl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x25,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0x45
+
+// CHECK: andl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x25,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0x7eed
+
+// CHECK: andl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x25,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0xbabecafe
+
+// CHECK: andl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x25,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0x12345678
+
+// CHECK: andl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andl	$324478056, 69
+// CHECK:  encoding: [0x81,0x25,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0x45
+
+// CHECK: andl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x25,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0x7eed
+
+// CHECK: andl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x25,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0xbabecafe
+
+// CHECK: andl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x25,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0x12345678
+
+// CHECK: orb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orb	$254, 69
+// CHECK:  encoding: [0x80,0x0d,0x45,0x00,0x00,0x00,0xfe]
+        	orb	$0xfe,0x45
+
+// CHECK: orb	$254, 32493
+// CHECK:  encoding: [0x80,0x0d,0xed,0x7e,0x00,0x00,0xfe]
+        	orb	$0xfe,0x7eed
+
+// CHECK: orb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x0d,0xfe,0xca,0xbe,0xba,0xfe]
+        	orb	$0xfe,0xbabecafe
+
+// CHECK: orb	$254, 305419896
+// CHECK:  encoding: [0x80,0x0d,0x78,0x56,0x34,0x12,0xfe]
+        	orb	$0xfe,0x12345678
+
+// CHECK: orb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orb	$127, 69
+// CHECK:  encoding: [0x80,0x0d,0x45,0x00,0x00,0x00,0x7f]
+        	orb	$0x7f,0x45
+
+// CHECK: orb	$127, 32493
+// CHECK:  encoding: [0x80,0x0d,0xed,0x7e,0x00,0x00,0x7f]
+        	orb	$0x7f,0x7eed
+
+// CHECK: orb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x0d,0xfe,0xca,0xbe,0xba,0x7f]
+        	orb	$0x7f,0xbabecafe
+
+// CHECK: orb	$127, 305419896
+// CHECK:  encoding: [0x80,0x0d,0x78,0x56,0x34,0x12,0x7f]
+        	orb	$0x7f,0x12345678
+
+// CHECK: orw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x0d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	orw	$0x7ace,0x45
+
+// CHECK: orw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x0d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	orw	$0x7ace,0x7eed
+
+// CHECK: orw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x0d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	orw	$0x7ace,0xbabecafe
+
+// CHECK: orw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x0d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	orw	$0x7ace,0x12345678
+
+// CHECK: orl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x0d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0x45
+
+// CHECK: orl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x0d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0x7eed
+
+// CHECK: orl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x0d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0xbabecafe
+
+// CHECK: orl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x0d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0x12345678
+
+// CHECK: orl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orl	$324478056, 69
+// CHECK:  encoding: [0x81,0x0d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0x45
+
+// CHECK: orl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x0d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0x7eed
+
+// CHECK: orl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x0d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0xbabecafe
+
+// CHECK: orl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x0d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0x12345678
+
+// CHECK: xorb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorb	$254, 69
+// CHECK:  encoding: [0x80,0x35,0x45,0x00,0x00,0x00,0xfe]
+        	xorb	$0xfe,0x45
+
+// CHECK: xorb	$254, 32493
+// CHECK:  encoding: [0x80,0x35,0xed,0x7e,0x00,0x00,0xfe]
+        	xorb	$0xfe,0x7eed
+
+// CHECK: xorb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x35,0xfe,0xca,0xbe,0xba,0xfe]
+        	xorb	$0xfe,0xbabecafe
+
+// CHECK: xorb	$254, 305419896
+// CHECK:  encoding: [0x80,0x35,0x78,0x56,0x34,0x12,0xfe]
+        	xorb	$0xfe,0x12345678
+
+// CHECK: xorb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xb4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorb	$127, 69
+// CHECK:  encoding: [0x80,0x35,0x45,0x00,0x00,0x00,0x7f]
+        	xorb	$0x7f,0x45
+
+// CHECK: xorb	$127, 32493
+// CHECK:  encoding: [0x80,0x35,0xed,0x7e,0x00,0x00,0x7f]
+        	xorb	$0x7f,0x7eed
+
+// CHECK: xorb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x35,0xfe,0xca,0xbe,0xba,0x7f]
+        	xorb	$0x7f,0xbabecafe
+
+// CHECK: xorb	$127, 305419896
+// CHECK:  encoding: [0x80,0x35,0x78,0x56,0x34,0x12,0x7f]
+        	xorb	$0x7f,0x12345678
+
+// CHECK: xorw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x35,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	xorw	$0x7ace,0x45
+
+// CHECK: xorw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x35,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	xorw	$0x7ace,0x7eed
+
+// CHECK: xorw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x35,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	xorw	$0x7ace,0xbabecafe
+
+// CHECK: xorw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x35,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	xorw	$0x7ace,0x12345678
+
+// CHECK: xorl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x35,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0x45
+
+// CHECK: xorl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x35,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0x7eed
+
+// CHECK: xorl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x35,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0xbabecafe
+
+// CHECK: xorl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x35,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0x12345678
+
+// CHECK: xorl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorl	$324478056, 69
+// CHECK:  encoding: [0x81,0x35,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0x45
+
+// CHECK: xorl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x35,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0x7eed
+
+// CHECK: xorl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x35,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0xbabecafe
+
+// CHECK: xorl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x35,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0x12345678
+
+// CHECK: adcb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x94,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcb	$254, 69
+// CHECK:  encoding: [0x80,0x15,0x45,0x00,0x00,0x00,0xfe]
+        	adcb	$0xfe,0x45
+
+// CHECK: adcb	$254, 32493
+// CHECK:  encoding: [0x80,0x15,0xed,0x7e,0x00,0x00,0xfe]
+        	adcb	$0xfe,0x7eed
+
+// CHECK: adcb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x15,0xfe,0xca,0xbe,0xba,0xfe]
+        	adcb	$0xfe,0xbabecafe
+
+// CHECK: adcb	$254, 305419896
+// CHECK:  encoding: [0x80,0x15,0x78,0x56,0x34,0x12,0xfe]
+        	adcb	$0xfe,0x12345678
+
+// CHECK: adcb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x94,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcb	$127, 69
+// CHECK:  encoding: [0x80,0x15,0x45,0x00,0x00,0x00,0x7f]
+        	adcb	$0x7f,0x45
+
+// CHECK: adcb	$127, 32493
+// CHECK:  encoding: [0x80,0x15,0xed,0x7e,0x00,0x00,0x7f]
+        	adcb	$0x7f,0x7eed
+
+// CHECK: adcb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x15,0xfe,0xca,0xbe,0xba,0x7f]
+        	adcb	$0x7f,0xbabecafe
+
+// CHECK: adcb	$127, 305419896
+// CHECK:  encoding: [0x80,0x15,0x78,0x56,0x34,0x12,0x7f]
+        	adcb	$0x7f,0x12345678
+
+// CHECK: adcw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x15,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	adcw	$0x7ace,0x45
+
+// CHECK: adcw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x15,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	adcw	$0x7ace,0x7eed
+
+// CHECK: adcw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x15,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	adcw	$0x7ace,0xbabecafe
+
+// CHECK: adcw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x15,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	adcw	$0x7ace,0x12345678
+
+// CHECK: adcl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x15,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0x45
+
+// CHECK: adcl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x15,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0x7eed
+
+// CHECK: adcl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x15,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0xbabecafe
+
+// CHECK: adcl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x15,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0x12345678
+
+// CHECK: adcl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcl	$324478056, 69
+// CHECK:  encoding: [0x81,0x15,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0x45
+
+// CHECK: adcl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x15,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0x7eed
+
+// CHECK: adcl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x15,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0xbabecafe
+
+// CHECK: adcl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x15,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0x12345678
+
+// CHECK: negl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	negl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: negw	32493
+// CHECK:  encoding: [0x66,0xf7,0x1d,0xed,0x7e,0x00,0x00]
+        	negw	0x7eed
+
+// CHECK: negl	3133065982
+// CHECK:  encoding: [0xf7,0x1d,0xfe,0xca,0xbe,0xba]
+        	negl	0xbabecafe
+
+// CHECK: negl	305419896
+// CHECK:  encoding: [0xf7,0x1d,0x78,0x56,0x34,0x12]
+        	negl	0x12345678
+
+// CHECK: notl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	notl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: notw	32493
+// CHECK:  encoding: [0x66,0xf7,0x15,0xed,0x7e,0x00,0x00]
+        	notw	0x7eed
+
+// CHECK: notl	3133065982
+// CHECK:  encoding: [0xf7,0x15,0xfe,0xca,0xbe,0xba]
+        	notl	0xbabecafe
+
+// CHECK: notl	305419896
+// CHECK:  encoding: [0xf7,0x15,0x78,0x56,0x34,0x12]
+        	notl	0x12345678
+
+// CHECK: cbtw
+// CHECK:  encoding: [0x66,0x98]
+        	cbtw
+
+// CHECK: cwtl
+// CHECK:  encoding: [0x98]
+        	cwtl
+
+// CHECK: cwtd
+// CHECK:  encoding: [0x66,0x99]
+        	cwtd
+
+// CHECK: cltd
+// CHECK:  encoding: [0x99]
+        	cltd
+
+// CHECK: mull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	mull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: mulw	32493
+// CHECK:  encoding: [0x66,0xf7,0x25,0xed,0x7e,0x00,0x00]
+        	mulw	0x7eed
+
+// CHECK: mull	3133065982
+// CHECK:  encoding: [0xf7,0x25,0xfe,0xca,0xbe,0xba]
+        	mull	0xbabecafe
+
+// CHECK: mull	305419896
+// CHECK:  encoding: [0xf7,0x25,0x78,0x56,0x34,0x12]
+        	mull	0x12345678
+
+// CHECK: imull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	imull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: imulw	32493
+// CHECK:  encoding: [0x66,0xf7,0x2d,0xed,0x7e,0x00,0x00]
+        	imulw	0x7eed
+
+// CHECK: imull	3133065982
+// CHECK:  encoding: [0xf7,0x2d,0xfe,0xca,0xbe,0xba]
+        	imull	0xbabecafe
+
+// CHECK: imull	305419896
+// CHECK:  encoding: [0xf7,0x2d,0x78,0x56,0x34,0x12]
+        	imull	0x12345678
+
+// CHECK: divl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	divl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: divw	32493
+// CHECK:  encoding: [0x66,0xf7,0x35,0xed,0x7e,0x00,0x00]
+        	divw	0x7eed
+
+// CHECK: divl	3133065982
+// CHECK:  encoding: [0xf7,0x35,0xfe,0xca,0xbe,0xba]
+        	divl	0xbabecafe
+
+// CHECK: divl	305419896
+// CHECK:  encoding: [0xf7,0x35,0x78,0x56,0x34,0x12]
+        	divl	0x12345678
+
+// CHECK: idivl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	idivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: idivw	32493
+// CHECK:  encoding: [0x66,0xf7,0x3d,0xed,0x7e,0x00,0x00]
+        	idivw	0x7eed
+
+// CHECK: idivl	3133065982
+// CHECK:  encoding: [0xf7,0x3d,0xfe,0xca,0xbe,0xba]
+        	idivl	0xbabecafe
+
+// CHECK: idivl	305419896
+// CHECK:  encoding: [0xf7,0x3d,0x78,0x56,0x34,0x12]
+        	idivl	0x12345678
+
+// CHECK: roll	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0x84,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: roll	$0, 69
+// CHECK:  encoding: [0xc1,0x05,0x45,0x00,0x00,0x00,0x00]
+        	roll	$0,0x45
+
+// CHECK: roll	$0, 32493
+// CHECK:  encoding: [0xc1,0x05,0xed,0x7e,0x00,0x00,0x00]
+        	roll	$0,0x7eed
+
+// CHECK: roll	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x05,0xfe,0xca,0xbe,0xba,0x00]
+        	roll	$0,0xbabecafe
+
+// CHECK: roll	$0, 305419896
+// CHECK:  encoding: [0xc1,0x05,0x78,0x56,0x34,0x12,0x00]
+        	roll	$0,0x12345678
+
+// CHECK: rolb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rolb	$127, 69
+// CHECK:  encoding: [0xc0,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	rolb	$0x7f,0x45
+
+// CHECK: rolb	$127, 32493
+// CHECK:  encoding: [0xc0,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	rolb	$0x7f,0x7eed
+
+// CHECK: rolb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	rolb	$0x7f,0xbabecafe
+
+// CHECK: rolb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	rolb	$0x7f,0x12345678
+
+// CHECK: roll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	roll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rolw	32493
+// CHECK:  encoding: [0x66,0xd1,0x05,0xed,0x7e,0x00,0x00]
+        	rolw	0x7eed
+
+// CHECK: roll	3133065982
+// CHECK:  encoding: [0xd1,0x05,0xfe,0xca,0xbe,0xba]
+        	roll	0xbabecafe
+
+// CHECK: roll	305419896
+// CHECK:  encoding: [0xd1,0x05,0x78,0x56,0x34,0x12]
+        	roll	0x12345678
+
+// CHECK: rorl	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rorl	$0, 69
+// CHECK:  encoding: [0xc1,0x0d,0x45,0x00,0x00,0x00,0x00]
+        	rorl	$0,0x45
+
+// CHECK: rorl	$0, 32493
+// CHECK:  encoding: [0xc1,0x0d,0xed,0x7e,0x00,0x00,0x00]
+        	rorl	$0,0x7eed
+
+// CHECK: rorl	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x0d,0xfe,0xca,0xbe,0xba,0x00]
+        	rorl	$0,0xbabecafe
+
+// CHECK: rorl	$0, 305419896
+// CHECK:  encoding: [0xc1,0x0d,0x78,0x56,0x34,0x12,0x00]
+        	rorl	$0,0x12345678
+
+// CHECK: rorb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rorb	$127, 69
+// CHECK:  encoding: [0xc0,0x0d,0x45,0x00,0x00,0x00,0x7f]
+        	rorb	$0x7f,0x45
+
+// CHECK: rorb	$127, 32493
+// CHECK:  encoding: [0xc0,0x0d,0xed,0x7e,0x00,0x00,0x7f]
+        	rorb	$0x7f,0x7eed
+
+// CHECK: rorb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x0d,0xfe,0xca,0xbe,0xba,0x7f]
+        	rorb	$0x7f,0xbabecafe
+
+// CHECK: rorb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x0d,0x78,0x56,0x34,0x12,0x7f]
+        	rorb	$0x7f,0x12345678
+
+// CHECK: rorl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	rorl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rorw	32493
+// CHECK:  encoding: [0x66,0xd1,0x0d,0xed,0x7e,0x00,0x00]
+        	rorw	0x7eed
+
+// CHECK: rorl	3133065982
+// CHECK:  encoding: [0xd1,0x0d,0xfe,0xca,0xbe,0xba]
+        	rorl	0xbabecafe
+
+// CHECK: rorl	305419896
+// CHECK:  encoding: [0xd1,0x0d,0x78,0x56,0x34,0x12]
+        	rorl	0x12345678
+
+// CHECK: shll	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	sall	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shll	$0, 69
+// CHECK:  encoding: [0xc1,0x25,0x45,0x00,0x00,0x00,0x00]
+        	sall	$0,0x45
+
+// CHECK: shll	$0, 32493
+// CHECK:  encoding: [0xc1,0x25,0xed,0x7e,0x00,0x00,0x00]
+        	sall	$0,0x7eed
+
+// CHECK: shll	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x25,0xfe,0xca,0xbe,0xba,0x00]
+        	sall	$0,0xbabecafe
+
+// CHECK: shll	$0, 305419896
+// CHECK:  encoding: [0xc1,0x25,0x78,0x56,0x34,0x12,0x00]
+        	sall	$0,0x12345678
+
+// CHECK: shlb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	salb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlb	$127, 69
+// CHECK:  encoding: [0xc0,0x25,0x45,0x00,0x00,0x00,0x7f]
+        	salb	$0x7f,0x45
+
+// CHECK: shlb	$127, 32493
+// CHECK:  encoding: [0xc0,0x25,0xed,0x7e,0x00,0x00,0x7f]
+        	salb	$0x7f,0x7eed
+
+// CHECK: shlb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x25,0xfe,0xca,0xbe,0xba,0x7f]
+        	salb	$0x7f,0xbabecafe
+
+// CHECK: shlb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x25,0x78,0x56,0x34,0x12,0x7f]
+        	salb	$0x7f,0x12345678
+
+// CHECK: shll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	sall	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlw	32493
+// CHECK:  encoding: [0x66,0xd1,0x25,0xed,0x7e,0x00,0x00]
+        	salw	0x7eed
+
+// CHECK: shll	3133065982
+// CHECK:  encoding: [0xd1,0x25,0xfe,0xca,0xbe,0xba]
+        	sall	0xbabecafe
+
+// CHECK: shll	305419896
+// CHECK:  encoding: [0xd1,0x25,0x78,0x56,0x34,0x12]
+        	sall	0x12345678
+
+// CHECK: shll	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shll	$0, 69
+// CHECK:  encoding: [0xc1,0x25,0x45,0x00,0x00,0x00,0x00]
+        	shll	$0,0x45
+
+// CHECK: shll	$0, 32493
+// CHECK:  encoding: [0xc1,0x25,0xed,0x7e,0x00,0x00,0x00]
+        	shll	$0,0x7eed
+
+// CHECK: shll	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x25,0xfe,0xca,0xbe,0xba,0x00]
+        	shll	$0,0xbabecafe
+
+// CHECK: shll	$0, 305419896
+// CHECK:  encoding: [0xc1,0x25,0x78,0x56,0x34,0x12,0x00]
+        	shll	$0,0x12345678
+
+// CHECK: shlb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlb	$127, 69
+// CHECK:  encoding: [0xc0,0x25,0x45,0x00,0x00,0x00,0x7f]
+        	shlb	$0x7f,0x45
+
+// CHECK: shlb	$127, 32493
+// CHECK:  encoding: [0xc0,0x25,0xed,0x7e,0x00,0x00,0x7f]
+        	shlb	$0x7f,0x7eed
+
+// CHECK: shlb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x25,0xfe,0xca,0xbe,0xba,0x7f]
+        	shlb	$0x7f,0xbabecafe
+
+// CHECK: shlb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x25,0x78,0x56,0x34,0x12,0x7f]
+        	shlb	$0x7f,0x12345678
+
+// CHECK: shll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	shll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlw	32493
+// CHECK:  encoding: [0x66,0xd1,0x25,0xed,0x7e,0x00,0x00]
+        	shlw	0x7eed
+
+// CHECK: shll	3133065982
+// CHECK:  encoding: [0xd1,0x25,0xfe,0xca,0xbe,0xba]
+        	shll	0xbabecafe
+
+// CHECK: shll	305419896
+// CHECK:  encoding: [0xd1,0x25,0x78,0x56,0x34,0x12]
+        	shll	0x12345678
+
+// CHECK: shrl	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xac,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shrl	$0, 69
+// CHECK:  encoding: [0xc1,0x2d,0x45,0x00,0x00,0x00,0x00]
+        	shrl	$0,0x45
+
+// CHECK: shrl	$0, 32493
+// CHECK:  encoding: [0xc1,0x2d,0xed,0x7e,0x00,0x00,0x00]
+        	shrl	$0,0x7eed
+
+// CHECK: shrl	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x2d,0xfe,0xca,0xbe,0xba,0x00]
+        	shrl	$0,0xbabecafe
+
+// CHECK: shrl	$0, 305419896
+// CHECK:  encoding: [0xc1,0x2d,0x78,0x56,0x34,0x12,0x00]
+        	shrl	$0,0x12345678
+
+// CHECK: shrb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xac,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shrb	$127, 69
+// CHECK:  encoding: [0xc0,0x2d,0x45,0x00,0x00,0x00,0x7f]
+        	shrb	$0x7f,0x45
+
+// CHECK: shrb	$127, 32493
+// CHECK:  encoding: [0xc0,0x2d,0xed,0x7e,0x00,0x00,0x7f]
+        	shrb	$0x7f,0x7eed
+
+// CHECK: shrb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x2d,0xfe,0xca,0xbe,0xba,0x7f]
+        	shrb	$0x7f,0xbabecafe
+
+// CHECK: shrb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x2d,0x78,0x56,0x34,0x12,0x7f]
+        	shrb	$0x7f,0x12345678
+
+// CHECK: shrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	shrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shrw	32493
+// CHECK:  encoding: [0x66,0xd1,0x2d,0xed,0x7e,0x00,0x00]
+        	shrw	0x7eed
+
+// CHECK: shrl	3133065982
+// CHECK:  encoding: [0xd1,0x2d,0xfe,0xca,0xbe,0xba]
+        	shrl	0xbabecafe
+
+// CHECK: shrl	305419896
+// CHECK:  encoding: [0xd1,0x2d,0x78,0x56,0x34,0x12]
+        	shrl	0x12345678
+
+// CHECK: sarl	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sarl	$0, 69
+// CHECK:  encoding: [0xc1,0x3d,0x45,0x00,0x00,0x00,0x00]
+        	sarl	$0,0x45
+
+// CHECK: sarl	$0, 32493
+// CHECK:  encoding: [0xc1,0x3d,0xed,0x7e,0x00,0x00,0x00]
+        	sarl	$0,0x7eed
+
+// CHECK: sarl	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x3d,0xfe,0xca,0xbe,0xba,0x00]
+        	sarl	$0,0xbabecafe
+
+// CHECK: sarl	$0, 305419896
+// CHECK:  encoding: [0xc1,0x3d,0x78,0x56,0x34,0x12,0x00]
+        	sarl	$0,0x12345678
+
+// CHECK: sarb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sarb	$127, 69
+// CHECK:  encoding: [0xc0,0x3d,0x45,0x00,0x00,0x00,0x7f]
+        	sarb	$0x7f,0x45
+
+// CHECK: sarb	$127, 32493
+// CHECK:  encoding: [0xc0,0x3d,0xed,0x7e,0x00,0x00,0x7f]
+        	sarb	$0x7f,0x7eed
+
+// CHECK: sarb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x3d,0xfe,0xca,0xbe,0xba,0x7f]
+        	sarb	$0x7f,0xbabecafe
+
+// CHECK: sarb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x3d,0x78,0x56,0x34,0x12,0x7f]
+        	sarb	$0x7f,0x12345678
+
+// CHECK: sarl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	sarl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sarw	32493
+// CHECK:  encoding: [0x66,0xd1,0x3d,0xed,0x7e,0x00,0x00]
+        	sarw	0x7eed
+
+// CHECK: sarl	3133065982
+// CHECK:  encoding: [0xd1,0x3d,0xfe,0xca,0xbe,0xba]
+        	sarl	0xbabecafe
+
+// CHECK: sarl	305419896
+// CHECK:  encoding: [0xd1,0x3d,0x78,0x56,0x34,0x12]
+        	sarl	0x12345678
+
+// CHECK: calll	*%ecx
+// CHECK:  encoding: [0xff,0xd1]
+        	call	*%ecx
+
+// CHECK: calll	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: calll	*3135175374
+// CHECK:  encoding: [0xff,0x15,0xce,0xfa,0xde,0xba]
+        	call	*0xbadeface
+
+// CHECK: calll	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: calll	*3135175374
+// CHECK:  encoding: [0xff,0x15,0xce,0xfa,0xde,0xba]
+        	call	*0xbadeface
+
+// CHECK: lcallw	*32493
+// CHECK:  encoding: [0x66,0xff,0x1d,0xed,0x7e,0x00,0x00]
+        	lcallw	*0x7eed
+
+// CHECK: jmpl	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: jmpl	*3135175374
+// CHECK:  encoding: [0xff,0x25,0xce,0xfa,0xde,0xba]
+        	jmp	*0xbadeface
+
+// CHECK: jmpl	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: jmpl	*3135175374
+// CHECK:  encoding: [0xff,0x25,0xce,0xfa,0xde,0xba]
+        	jmp	*0xbadeface
+
+// CHECK: ljmpl	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ljmpw	*32493
+// CHECK:  encoding: [0x66,0xff,0x2d,0xed,0x7e,0x00,0x00]
+        	ljmpw	*0x7eed
+
+// CHECK: ljmpl	*3133065982
+// CHECK:  encoding: [0xff,0x2d,0xfe,0xca,0xbe,0xba]
+        	ljmpl	*0xbabecafe
+
+// CHECK: ljmpl	*305419896
+// CHECK:  encoding: [0xff,0x2d,0x78,0x56,0x34,0x12]
+        	ljmpl	*0x12345678
+
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+        	ret
+
+// CHECK: lret
+// CHECK:  encoding: [0xcb]
+        	lret
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leave
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leavel
+
+// CHECK: seto	%bl
+// CHECK:  encoding: [0x0f,0x90,0xc3]
+        	seto	%bl
+
+// CHECK: seto	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x90,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	seto	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: seto	32493
+// CHECK:  encoding: [0x0f,0x90,0x05,0xed,0x7e,0x00,0x00]
+        	seto	0x7eed
+
+// CHECK: seto	3133065982
+// CHECK:  encoding: [0x0f,0x90,0x05,0xfe,0xca,0xbe,0xba]
+        	seto	0xbabecafe
+
+// CHECK: seto	305419896
+// CHECK:  encoding: [0x0f,0x90,0x05,0x78,0x56,0x34,0x12]
+        	seto	0x12345678
+
+// CHECK: setno	%bl
+// CHECK:  encoding: [0x0f,0x91,0xc3]
+        	setno	%bl
+
+// CHECK: setno	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x91,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setno	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setno	32493
+// CHECK:  encoding: [0x0f,0x91,0x05,0xed,0x7e,0x00,0x00]
+        	setno	0x7eed
+
+// CHECK: setno	3133065982
+// CHECK:  encoding: [0x0f,0x91,0x05,0xfe,0xca,0xbe,0xba]
+        	setno	0xbabecafe
+
+// CHECK: setno	305419896
+// CHECK:  encoding: [0x0f,0x91,0x05,0x78,0x56,0x34,0x12]
+        	setno	0x12345678
+
+// CHECK: setb	%bl
+// CHECK:  encoding: [0x0f,0x92,0xc3]
+        	setb	%bl
+
+// CHECK: setb	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x92,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setb	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setb	32493
+// CHECK:  encoding: [0x0f,0x92,0x05,0xed,0x7e,0x00,0x00]
+        	setb	0x7eed
+
+// CHECK: setb	3133065982
+// CHECK:  encoding: [0x0f,0x92,0x05,0xfe,0xca,0xbe,0xba]
+        	setb	0xbabecafe
+
+// CHECK: setb	305419896
+// CHECK:  encoding: [0x0f,0x92,0x05,0x78,0x56,0x34,0x12]
+        	setb	0x12345678
+
+// CHECK: setae	%bl
+// CHECK:  encoding: [0x0f,0x93,0xc3]
+        	setae	%bl
+
+// CHECK: setae	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x93,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setae	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setae	32493
+// CHECK:  encoding: [0x0f,0x93,0x05,0xed,0x7e,0x00,0x00]
+        	setae	0x7eed
+
+// CHECK: setae	3133065982
+// CHECK:  encoding: [0x0f,0x93,0x05,0xfe,0xca,0xbe,0xba]
+        	setae	0xbabecafe
+
+// CHECK: setae	305419896
+// CHECK:  encoding: [0x0f,0x93,0x05,0x78,0x56,0x34,0x12]
+        	setae	0x12345678
+
+// CHECK: sete	%bl
+// CHECK:  encoding: [0x0f,0x94,0xc3]
+        	sete	%bl
+
+// CHECK: sete	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x94,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	sete	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sete	32493
+// CHECK:  encoding: [0x0f,0x94,0x05,0xed,0x7e,0x00,0x00]
+        	sete	0x7eed
+
+// CHECK: sete	3133065982
+// CHECK:  encoding: [0x0f,0x94,0x05,0xfe,0xca,0xbe,0xba]
+        	sete	0xbabecafe
+
+// CHECK: sete	305419896
+// CHECK:  encoding: [0x0f,0x94,0x05,0x78,0x56,0x34,0x12]
+        	sete	0x12345678
+
+// CHECK: setne	%bl
+// CHECK:  encoding: [0x0f,0x95,0xc3]
+        	setne	%bl
+
+// CHECK: setne	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x95,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setne	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setne	32493
+// CHECK:  encoding: [0x0f,0x95,0x05,0xed,0x7e,0x00,0x00]
+        	setne	0x7eed
+
+// CHECK: setne	3133065982
+// CHECK:  encoding: [0x0f,0x95,0x05,0xfe,0xca,0xbe,0xba]
+        	setne	0xbabecafe
+
+// CHECK: setne	305419896
+// CHECK:  encoding: [0x0f,0x95,0x05,0x78,0x56,0x34,0x12]
+        	setne	0x12345678
+
+// CHECK: setbe	%bl
+// CHECK:  encoding: [0x0f,0x96,0xc3]
+        	setbe	%bl
+
+// CHECK: setbe	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x96,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setbe	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setbe	32493
+// CHECK:  encoding: [0x0f,0x96,0x05,0xed,0x7e,0x00,0x00]
+        	setbe	0x7eed
+
+// CHECK: setbe	3133065982
+// CHECK:  encoding: [0x0f,0x96,0x05,0xfe,0xca,0xbe,0xba]
+        	setbe	0xbabecafe
+
+// CHECK: setbe	305419896
+// CHECK:  encoding: [0x0f,0x96,0x05,0x78,0x56,0x34,0x12]
+        	setbe	0x12345678
+
+// CHECK: seta	%bl
+// CHECK:  encoding: [0x0f,0x97,0xc3]
+        	seta	%bl
+
+// CHECK: seta	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x97,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	seta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: seta	32493
+// CHECK:  encoding: [0x0f,0x97,0x05,0xed,0x7e,0x00,0x00]
+        	seta	0x7eed
+
+// CHECK: seta	3133065982
+// CHECK:  encoding: [0x0f,0x97,0x05,0xfe,0xca,0xbe,0xba]
+        	seta	0xbabecafe
+
+// CHECK: seta	305419896
+// CHECK:  encoding: [0x0f,0x97,0x05,0x78,0x56,0x34,0x12]
+        	seta	0x12345678
+
+// CHECK: sets	%bl
+// CHECK:  encoding: [0x0f,0x98,0xc3]
+        	sets	%bl
+
+// CHECK: sets	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x98,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	sets	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sets	32493
+// CHECK:  encoding: [0x0f,0x98,0x05,0xed,0x7e,0x00,0x00]
+        	sets	0x7eed
+
+// CHECK: sets	3133065982
+// CHECK:  encoding: [0x0f,0x98,0x05,0xfe,0xca,0xbe,0xba]
+        	sets	0xbabecafe
+
+// CHECK: sets	305419896
+// CHECK:  encoding: [0x0f,0x98,0x05,0x78,0x56,0x34,0x12]
+        	sets	0x12345678
+
+// CHECK: setns	%bl
+// CHECK:  encoding: [0x0f,0x99,0xc3]
+        	setns	%bl
+
+// CHECK: setns	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x99,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setns	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setns	32493
+// CHECK:  encoding: [0x0f,0x99,0x05,0xed,0x7e,0x00,0x00]
+        	setns	0x7eed
+
+// CHECK: setns	3133065982
+// CHECK:  encoding: [0x0f,0x99,0x05,0xfe,0xca,0xbe,0xba]
+        	setns	0xbabecafe
+
+// CHECK: setns	305419896
+// CHECK:  encoding: [0x0f,0x99,0x05,0x78,0x56,0x34,0x12]
+        	setns	0x12345678
+
+// CHECK: setp	%bl
+// CHECK:  encoding: [0x0f,0x9a,0xc3]
+        	setp	%bl
+
+// CHECK: setp	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9a,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setp	32493
+// CHECK:  encoding: [0x0f,0x9a,0x05,0xed,0x7e,0x00,0x00]
+        	setp	0x7eed
+
+// CHECK: setp	3133065982
+// CHECK:  encoding: [0x0f,0x9a,0x05,0xfe,0xca,0xbe,0xba]
+        	setp	0xbabecafe
+
+// CHECK: setp	305419896
+// CHECK:  encoding: [0x0f,0x9a,0x05,0x78,0x56,0x34,0x12]
+        	setp	0x12345678
+
+// CHECK: setnp	%bl
+// CHECK:  encoding: [0x0f,0x9b,0xc3]
+        	setnp	%bl
+
+// CHECK: setnp	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9b,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setnp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setnp	32493
+// CHECK:  encoding: [0x0f,0x9b,0x05,0xed,0x7e,0x00,0x00]
+        	setnp	0x7eed
+
+// CHECK: setnp	3133065982
+// CHECK:  encoding: [0x0f,0x9b,0x05,0xfe,0xca,0xbe,0xba]
+        	setnp	0xbabecafe
+
+// CHECK: setnp	305419896
+// CHECK:  encoding: [0x0f,0x9b,0x05,0x78,0x56,0x34,0x12]
+        	setnp	0x12345678
+
+// CHECK: setl	%bl
+// CHECK:  encoding: [0x0f,0x9c,0xc3]
+        	setl	%bl
+
+// CHECK: setl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9c,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setl	32493
+// CHECK:  encoding: [0x0f,0x9c,0x05,0xed,0x7e,0x00,0x00]
+        	setl	0x7eed
+
+// CHECK: setl	3133065982
+// CHECK:  encoding: [0x0f,0x9c,0x05,0xfe,0xca,0xbe,0xba]
+        	setl	0xbabecafe
+
+// CHECK: setl	305419896
+// CHECK:  encoding: [0x0f,0x9c,0x05,0x78,0x56,0x34,0x12]
+        	setl	0x12345678
+
+// CHECK: setge	%bl
+// CHECK:  encoding: [0x0f,0x9d,0xc3]
+        	setge	%bl
+
+// CHECK: setge	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9d,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setge	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setge	32493
+// CHECK:  encoding: [0x0f,0x9d,0x05,0xed,0x7e,0x00,0x00]
+        	setge	0x7eed
+
+// CHECK: setge	3133065982
+// CHECK:  encoding: [0x0f,0x9d,0x05,0xfe,0xca,0xbe,0xba]
+        	setge	0xbabecafe
+
+// CHECK: setge	305419896
+// CHECK:  encoding: [0x0f,0x9d,0x05,0x78,0x56,0x34,0x12]
+        	setge	0x12345678
+
+// CHECK: setle	%bl
+// CHECK:  encoding: [0x0f,0x9e,0xc3]
+        	setle	%bl
+
+// CHECK: setle	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9e,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setle	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setle	32493
+// CHECK:  encoding: [0x0f,0x9e,0x05,0xed,0x7e,0x00,0x00]
+        	setle	0x7eed
+
+// CHECK: setle	3133065982
+// CHECK:  encoding: [0x0f,0x9e,0x05,0xfe,0xca,0xbe,0xba]
+        	setle	0xbabecafe
+
+// CHECK: setle	305419896
+// CHECK:  encoding: [0x0f,0x9e,0x05,0x78,0x56,0x34,0x12]
+        	setle	0x12345678
+
+// CHECK: setg	%bl
+// CHECK:  encoding: [0x0f,0x9f,0xc3]
+        	setg	%bl
+
+// CHECK: setg	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9f,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setg	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setg	32493
+// CHECK:  encoding: [0x0f,0x9f,0x05,0xed,0x7e,0x00,0x00]
+        	setg	0x7eed
+
+// CHECK: setg	3133065982
+// CHECK:  encoding: [0x0f,0x9f,0x05,0xfe,0xca,0xbe,0xba]
+        	setg	0xbabecafe
+
+// CHECK: setg	305419896
+// CHECK:  encoding: [0x0f,0x9f,0x05,0x78,0x56,0x34,0x12]
+        	setg	0x12345678
+
+// CHECK: rsm
+// CHECK:  encoding: [0x0f,0xaa]
+        	rsm
+
+// CHECK: hlt
+// CHECK:  encoding: [0xf4]
+        	hlt
+
+// CHECK: nopl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x1f,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	nopl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: nopw	32493
+// CHECK:  encoding: [0x66,0x0f,0x1f,0x05,0xed,0x7e,0x00,0x00]
+        	nopw	0x7eed
+
+// CHECK: nopl	3133065982
+// CHECK:  encoding: [0x0f,0x1f,0x05,0xfe,0xca,0xbe,0xba]
+        	nopl	0xbabecafe
+
+// CHECK: nopl	305419896
+// CHECK:  encoding: [0x0f,0x1f,0x05,0x78,0x56,0x34,0x12]
+        	nopl	0x12345678
+
+// CHECK: nop
+// CHECK:  encoding: [0x90]
+        	nop
+
+// CHECK: lldtw	32493
+// CHECK:  encoding: [0x0f,0x00,0x15,0xed,0x7e,0x00,0x00]
+        	lldtw	0x7eed
+
+// CHECK: lmsww	32493
+// CHECK:  encoding: [0x0f,0x01,0x35,0xed,0x7e,0x00,0x00]
+        	lmsww	0x7eed
+
+// CHECK: ltrw	32493
+// CHECK:  encoding: [0x0f,0x00,0x1d,0xed,0x7e,0x00,0x00]
+        	ltrw	0x7eed
+
+// CHECK: sldtw	32493
+// CHECK:  encoding: [0x0f,0x00,0x05,0xed,0x7e,0x00,0x00]
+        	sldtw	0x7eed
+
+// CHECK: smsww	32493
+// CHECK:  encoding: [0x0f,0x01,0x25,0xed,0x7e,0x00,0x00]
+        	smsww	0x7eed
+
+// CHECK: strw	32493
+// CHECK:  encoding: [0x0f,0x00,0x0d,0xed,0x7e,0x00,0x00]
+        	strw	0x7eed
+
+// CHECK: verr	%bx
+// CHECK:  encoding: [0x0f,0x00,0xe3]
+        	verr	%bx
+
+// CHECK: verr	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x00,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	verr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: verr	3133065982
+// CHECK:  encoding: [0x0f,0x00,0x25,0xfe,0xca,0xbe,0xba]
+        	verr	0xbabecafe
+
+// CHECK: verr	305419896
+// CHECK:  encoding: [0x0f,0x00,0x25,0x78,0x56,0x34,0x12]
+        	verr	0x12345678
+
+// CHECK: verw	%bx
+// CHECK:  encoding: [0x0f,0x00,0xeb]
+        	verw	%bx
+
+// CHECK: verw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x00,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	verw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: verw	3133065982
+// CHECK:  encoding: [0x0f,0x00,0x2d,0xfe,0xca,0xbe,0xba]
+        	verw	0xbabecafe
+
+// CHECK: verw	305419896
+// CHECK:  encoding: [0x0f,0x00,0x2d,0x78,0x56,0x34,0x12]
+        	verw	0x12345678
+
+// CHECK: fld	%st(2)
+// CHECK:  encoding: [0xd9,0xc2]
+        	fld	%st(2)
+
+// CHECK: fldl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fldl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fldl	3133065982
+// CHECK:  encoding: [0xdd,0x05,0xfe,0xca,0xbe,0xba]
+        	fldl	0xbabecafe
+
+// CHECK: fldl	305419896
+// CHECK:  encoding: [0xdd,0x05,0x78,0x56,0x34,0x12]
+        	fldl	0x12345678
+
+// CHECK: fld	%st(2)
+// CHECK:  encoding: [0xd9,0xc2]
+        	fld	%st(2)
+
+// CHECK: fildl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fildl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fildl	3133065982
+// CHECK:  encoding: [0xdb,0x05,0xfe,0xca,0xbe,0xba]
+        	fildl	0xbabecafe
+
+// CHECK: fildl	305419896
+// CHECK:  encoding: [0xdb,0x05,0x78,0x56,0x34,0x12]
+        	fildl	0x12345678
+
+// CHECK: fildll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fildll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fildll	32493
+// CHECK:  encoding: [0xdf,0x2d,0xed,0x7e,0x00,0x00]
+        	fildll	0x7eed
+
+// CHECK: fildll	3133065982
+// CHECK:  encoding: [0xdf,0x2d,0xfe,0xca,0xbe,0xba]
+        	fildll	0xbabecafe
+
+// CHECK: fildll	305419896
+// CHECK:  encoding: [0xdf,0x2d,0x78,0x56,0x34,0x12]
+        	fildll	0x12345678
+
+// CHECK: fldt	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fldt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fldt	32493
+// CHECK:  encoding: [0xdb,0x2d,0xed,0x7e,0x00,0x00]
+        	fldt	0x7eed
+
+// CHECK: fldt	3133065982
+// CHECK:  encoding: [0xdb,0x2d,0xfe,0xca,0xbe,0xba]
+        	fldt	0xbabecafe
+
+// CHECK: fldt	305419896
+// CHECK:  encoding: [0xdb,0x2d,0x78,0x56,0x34,0x12]
+        	fldt	0x12345678
+
+// CHECK: fbld	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	fbld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fbld	32493
+// CHECK:  encoding: [0xdf,0x25,0xed,0x7e,0x00,0x00]
+        	fbld	0x7eed
+
+// CHECK: fbld	3133065982
+// CHECK:  encoding: [0xdf,0x25,0xfe,0xca,0xbe,0xba]
+        	fbld	0xbabecafe
+
+// CHECK: fbld	305419896
+// CHECK:  encoding: [0xdf,0x25,0x78,0x56,0x34,0x12]
+        	fbld	0x12345678
+
+// CHECK: fst	%st(2)
+// CHECK:  encoding: [0xdd,0xd2]
+        	fst	%st(2)
+
+// CHECK: fstl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	fstl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fstl	3133065982
+// CHECK:  encoding: [0xdd,0x15,0xfe,0xca,0xbe,0xba]
+        	fstl	0xbabecafe
+
+// CHECK: fstl	305419896
+// CHECK:  encoding: [0xdd,0x15,0x78,0x56,0x34,0x12]
+        	fstl	0x12345678
+
+// CHECK: fst	%st(2)
+// CHECK:  encoding: [0xdd,0xd2]
+        	fst	%st(2)
+
+// CHECK: fistl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	fistl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fistl	3133065982
+// CHECK:  encoding: [0xdb,0x15,0xfe,0xca,0xbe,0xba]
+        	fistl	0xbabecafe
+
+// CHECK: fistl	305419896
+// CHECK:  encoding: [0xdb,0x15,0x78,0x56,0x34,0x12]
+        	fistl	0x12345678
+
+// CHECK: fstp	%st(2)
+// CHECK:  encoding: [0xdd,0xda]
+        	fstp	%st(2)
+
+// CHECK: fstpl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	fstpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fstpl	3133065982
+// CHECK:  encoding: [0xdd,0x1d,0xfe,0xca,0xbe,0xba]
+        	fstpl	0xbabecafe
+
+// CHECK: fstpl	305419896
+// CHECK:  encoding: [0xdd,0x1d,0x78,0x56,0x34,0x12]
+        	fstpl	0x12345678
+
+// CHECK: fstp	%st(2)
+// CHECK:  encoding: [0xdd,0xda]
+        	fstp	%st(2)
+
+// CHECK: fistpl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	fistpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fistpl	3133065982
+// CHECK:  encoding: [0xdb,0x1d,0xfe,0xca,0xbe,0xba]
+        	fistpl	0xbabecafe
+
+// CHECK: fistpl	305419896
+// CHECK:  encoding: [0xdb,0x1d,0x78,0x56,0x34,0x12]
+        	fistpl	0x12345678
+
+// CHECK: fistpll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fistpll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fistpll	32493
+// CHECK:  encoding: [0xdf,0x3d,0xed,0x7e,0x00,0x00]
+        	fistpll	0x7eed
+
+// CHECK: fistpll	3133065982
+// CHECK:  encoding: [0xdf,0x3d,0xfe,0xca,0xbe,0xba]
+        	fistpll	0xbabecafe
+
+// CHECK: fistpll	305419896
+// CHECK:  encoding: [0xdf,0x3d,0x78,0x56,0x34,0x12]
+        	fistpll	0x12345678
+
+// CHECK: fstpt	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fstpt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fstpt	32493
+// CHECK:  encoding: [0xdb,0x3d,0xed,0x7e,0x00,0x00]
+        	fstpt	0x7eed
+
+// CHECK: fstpt	3133065982
+// CHECK:  encoding: [0xdb,0x3d,0xfe,0xca,0xbe,0xba]
+        	fstpt	0xbabecafe
+
+// CHECK: fstpt	305419896
+// CHECK:  encoding: [0xdb,0x3d,0x78,0x56,0x34,0x12]
+        	fstpt	0x12345678
+
+// CHECK: fbstp	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	fbstp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fbstp	32493
+// CHECK:  encoding: [0xdf,0x35,0xed,0x7e,0x00,0x00]
+        	fbstp	0x7eed
+
+// CHECK: fbstp	3133065982
+// CHECK:  encoding: [0xdf,0x35,0xfe,0xca,0xbe,0xba]
+        	fbstp	0xbabecafe
+
+// CHECK: fbstp	305419896
+// CHECK:  encoding: [0xdf,0x35,0x78,0x56,0x34,0x12]
+        	fbstp	0x12345678
+
+// CHECK: fxch	%st(2)
+// CHECK:  encoding: [0xd9,0xca]
+        	fxch	%st(2)
+
+// CHECK: fcom	%st(2)
+// CHECK:  encoding: [0xd8,0xd2]
+        	fcom	%st(2)
+
+// CHECK: fcom	%st(2)
+// CHECK:  encoding: [0xd8,0xd2]
+        	fcom	%st(2)
+
+// CHECK: ficoml	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	ficoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ficoml	3133065982
+// CHECK:  encoding: [0xda,0x15,0xfe,0xca,0xbe,0xba]
+        	ficoml	0xbabecafe
+
+// CHECK: ficoml	305419896
+// CHECK:  encoding: [0xda,0x15,0x78,0x56,0x34,0x12]
+        	ficoml	0x12345678
+
+// CHECK: fcomp	%st(2)
+// CHECK:  encoding: [0xd8,0xda]
+        	fcomp	%st(2)
+
+// CHECK: fcomp	%st(2)
+// CHECK:  encoding: [0xd8,0xda]
+        	fcomp	%st(2)
+
+// CHECK: ficompl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	ficompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ficompl	3133065982
+// CHECK:  encoding: [0xda,0x1d,0xfe,0xca,0xbe,0xba]
+        	ficompl	0xbabecafe
+
+// CHECK: ficompl	305419896
+// CHECK:  encoding: [0xda,0x1d,0x78,0x56,0x34,0x12]
+        	ficompl	0x12345678
+
+// CHECK: fcompp
+// CHECK:  encoding: [0xde,0xd9]
+        	fcompp
+
+// CHECK: fucom	%st(2)
+// CHECK:  encoding: [0xdd,0xe2]
+        	fucom	%st(2)
+
+// CHECK: fucomp	%st(2)
+// CHECK:  encoding: [0xdd,0xea]
+        	fucomp	%st(2)
+
+// CHECK: fucompp
+// CHECK:  encoding: [0xda,0xe9]
+        	fucompp
+
+// CHECK: ftst
+// CHECK:  encoding: [0xd9,0xe4]
+        	ftst
+
+// CHECK: fxam
+// CHECK:  encoding: [0xd9,0xe5]
+        	fxam
+
+// CHECK: fld1
+// CHECK:  encoding: [0xd9,0xe8]
+        	fld1
+
+// CHECK: fldl2t
+// CHECK:  encoding: [0xd9,0xe9]
+        	fldl2t
+
+// CHECK: fldl2e
+// CHECK:  encoding: [0xd9,0xea]
+        	fldl2e
+
+// CHECK: fldpi
+// CHECK:  encoding: [0xd9,0xeb]
+        	fldpi
+
+// CHECK: fldlg2
+// CHECK:  encoding: [0xd9,0xec]
+        	fldlg2
+
+// CHECK: fldln2
+// CHECK:  encoding: [0xd9,0xed]
+        	fldln2
+
+// CHECK: fldz
+// CHECK:  encoding: [0xd9,0xee]
+        	fldz
+
+// CHECK: fadd	%st(2)
+// CHECK:  encoding: [0xd8,0xc2]
+        	fadd	%st(2)
+
+// CHECK: faddl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	faddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: faddl	3133065982
+// CHECK:  encoding: [0xdc,0x05,0xfe,0xca,0xbe,0xba]
+        	faddl	0xbabecafe
+
+// CHECK: faddl	305419896
+// CHECK:  encoding: [0xdc,0x05,0x78,0x56,0x34,0x12]
+        	faddl	0x12345678
+
+// CHECK: fiaddl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fiaddl	3133065982
+// CHECK:  encoding: [0xda,0x05,0xfe,0xca,0xbe,0xba]
+        	fiaddl	0xbabecafe
+
+// CHECK: fiaddl	305419896
+// CHECK:  encoding: [0xda,0x05,0x78,0x56,0x34,0x12]
+        	fiaddl	0x12345678
+
+// CHECK: faddp	%st(2)
+// CHECK:  encoding: [0xde,0xc2]
+        	faddp	%st(2)
+
+// CHECK: fsub	%st(2)
+// CHECK:  encoding: [0xd8,0xe2]
+        	fsub	%st(2)
+
+// CHECK: fsubl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	fsubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fsubl	3133065982
+// CHECK:  encoding: [0xdc,0x25,0xfe,0xca,0xbe,0xba]
+        	fsubl	0xbabecafe
+
+// CHECK: fsubl	305419896
+// CHECK:  encoding: [0xdc,0x25,0x78,0x56,0x34,0x12]
+        	fsubl	0x12345678
+
+// CHECK: fisubl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	fisubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fisubl	3133065982
+// CHECK:  encoding: [0xda,0x25,0xfe,0xca,0xbe,0xba]
+        	fisubl	0xbabecafe
+
+// CHECK: fisubl	305419896
+// CHECK:  encoding: [0xda,0x25,0x78,0x56,0x34,0x12]
+        	fisubl	0x12345678
+
+// CHECK: fsubp	%st(2)
+// CHECK:  encoding: [0xde,0xe2]
+        	fsubp	%st(2)
+
+// CHECK: fsubr	%st(2)
+// CHECK:  encoding: [0xd8,0xea]
+        	fsubr	%st(2)
+
+// CHECK: fsubrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fsubrl	3133065982
+// CHECK:  encoding: [0xdc,0x2d,0xfe,0xca,0xbe,0xba]
+        	fsubrl	0xbabecafe
+
+// CHECK: fsubrl	305419896
+// CHECK:  encoding: [0xdc,0x2d,0x78,0x56,0x34,0x12]
+        	fsubrl	0x12345678
+
+// CHECK: fisubrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fisubrl	3133065982
+// CHECK:  encoding: [0xda,0x2d,0xfe,0xca,0xbe,0xba]
+        	fisubrl	0xbabecafe
+
+// CHECK: fisubrl	305419896
+// CHECK:  encoding: [0xda,0x2d,0x78,0x56,0x34,0x12]
+        	fisubrl	0x12345678
+
+// CHECK: fsubrp	%st(2)
+// CHECK:  encoding: [0xde,0xea]
+        	fsubrp	%st(2)
+
+// CHECK: fmul	%st(2)
+// CHECK:  encoding: [0xd8,0xca]
+        	fmul	%st(2)
+
+// CHECK: fmull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fmull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fmull	3133065982
+// CHECK:  encoding: [0xdc,0x0d,0xfe,0xca,0xbe,0xba]
+        	fmull	0xbabecafe
+
+// CHECK: fmull	305419896
+// CHECK:  encoding: [0xdc,0x0d,0x78,0x56,0x34,0x12]
+        	fmull	0x12345678
+
+// CHECK: fimull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fimull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fimull	3133065982
+// CHECK:  encoding: [0xda,0x0d,0xfe,0xca,0xbe,0xba]
+        	fimull	0xbabecafe
+
+// CHECK: fimull	305419896
+// CHECK:  encoding: [0xda,0x0d,0x78,0x56,0x34,0x12]
+        	fimull	0x12345678
+
+// CHECK: fmulp	%st(2)
+// CHECK:  encoding: [0xde,0xca]
+        	fmulp	%st(2)
+
+// CHECK: fdiv	%st(2)
+// CHECK:  encoding: [0xd8,0xf2]
+        	fdiv	%st(2)
+
+// CHECK: fdivl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	fdivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fdivl	3133065982
+// CHECK:  encoding: [0xdc,0x35,0xfe,0xca,0xbe,0xba]
+        	fdivl	0xbabecafe
+
+// CHECK: fdivl	305419896
+// CHECK:  encoding: [0xdc,0x35,0x78,0x56,0x34,0x12]
+        	fdivl	0x12345678
+
+// CHECK: fidivl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	fidivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fidivl	3133065982
+// CHECK:  encoding: [0xda,0x35,0xfe,0xca,0xbe,0xba]
+        	fidivl	0xbabecafe
+
+// CHECK: fidivl	305419896
+// CHECK:  encoding: [0xda,0x35,0x78,0x56,0x34,0x12]
+        	fidivl	0x12345678
+
+// CHECK: fdivp	%st(2)
+// CHECK:  encoding: [0xde,0xf2]
+        	fdivp	%st(2)
+
+// CHECK: fdivr	%st(2)
+// CHECK:  encoding: [0xd8,0xfa]
+        	fdivr	%st(2)
+
+// CHECK: fdivrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fdivrl	3133065982
+// CHECK:  encoding: [0xdc,0x3d,0xfe,0xca,0xbe,0xba]
+        	fdivrl	0xbabecafe
+
+// CHECK: fdivrl	305419896
+// CHECK:  encoding: [0xdc,0x3d,0x78,0x56,0x34,0x12]
+        	fdivrl	0x12345678
+
+// CHECK: fidivrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fidivrl	3133065982
+// CHECK:  encoding: [0xda,0x3d,0xfe,0xca,0xbe,0xba]
+        	fidivrl	0xbabecafe
+
+// CHECK: fidivrl	305419896
+// CHECK:  encoding: [0xda,0x3d,0x78,0x56,0x34,0x12]
+        	fidivrl	0x12345678
+
+// CHECK: fdivrp	%st(2)
+// CHECK:  encoding: [0xde,0xfa]
+        	fdivrp	%st(2)
+
+// CHECK: f2xm1
+// CHECK:  encoding: [0xd9,0xf0]
+        	f2xm1
+
+// CHECK: fyl2x
+// CHECK:  encoding: [0xd9,0xf1]
+        	fyl2x
+
+// CHECK: fptan
+// CHECK:  encoding: [0xd9,0xf2]
+        	fptan
+
+// CHECK: fpatan
+// CHECK:  encoding: [0xd9,0xf3]
+        	fpatan
+
+// CHECK: fxtract
+// CHECK:  encoding: [0xd9,0xf4]
+        	fxtract
+
+// CHECK: fprem1
+// CHECK:  encoding: [0xd9,0xf5]
+        	fprem1
+
+// CHECK: fdecstp
+// CHECK:  encoding: [0xd9,0xf6]
+        	fdecstp
+
+// CHECK: fincstp
+// CHECK:  encoding: [0xd9,0xf7]
+        	fincstp
+
+// CHECK: fprem
+// CHECK:  encoding: [0xd9,0xf8]
+        	fprem
+
+// CHECK: fyl2xp1
+// CHECK:  encoding: [0xd9,0xf9]
+        	fyl2xp1
+
+// CHECK: fsqrt
+// CHECK:  encoding: [0xd9,0xfa]
+        	fsqrt
+
+// CHECK: fsincos
+// CHECK:  encoding: [0xd9,0xfb]
+        	fsincos
+
+// CHECK: frndint
+// CHECK:  encoding: [0xd9,0xfc]
+        	frndint
+
+// CHECK: fscale
+// CHECK:  encoding: [0xd9,0xfd]
+        	fscale
+
+// CHECK: fsin
+// CHECK:  encoding: [0xd9,0xfe]
+        	fsin
+
+// CHECK: fcos
+// CHECK:  encoding: [0xd9,0xff]
+        	fcos
+
+// CHECK: fchs
+// CHECK:  encoding: [0xd9,0xe0]
+        	fchs
+
+// CHECK: fabs
+// CHECK:  encoding: [0xd9,0xe1]
+        	fabs
+
+// CHECK: fninit
+// CHECK:  encoding: [0xdb,0xe3]
+        	fninit
+
+// CHECK: fldcw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fldcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fldcw	3133065982
+// CHECK:  encoding: [0xd9,0x2d,0xfe,0xca,0xbe,0xba]
+        	fldcw	0xbabecafe
+
+// CHECK: fldcw	305419896
+// CHECK:  encoding: [0xd9,0x2d,0x78,0x56,0x34,0x12]
+        	fldcw	0x12345678
+
+// CHECK: fnstcw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd9,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fnstcw	3133065982
+// CHECK:  encoding: [0xd9,0x3d,0xfe,0xca,0xbe,0xba]
+        	fnstcw	0xbabecafe
+
+// CHECK: fnstcw	305419896
+// CHECK:  encoding: [0xd9,0x3d,0x78,0x56,0x34,0x12]
+        	fnstcw	0x12345678
+
+// CHECK: fnstsw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fnstsw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fnstsw	3133065982
+// CHECK:  encoding: [0xdd,0x3d,0xfe,0xca,0xbe,0xba]
+        	fnstsw	0xbabecafe
+
+// CHECK: fnstsw	305419896
+// CHECK:  encoding: [0xdd,0x3d,0x78,0x56,0x34,0x12]
+        	fnstsw	0x12345678
+
+// CHECK: fnclex
+// CHECK:  encoding: [0xdb,0xe2]
+        	fnclex
+
+// CHECK: fnstenv	32493
+// CHECK:  encoding: [0xd9,0x35,0xed,0x7e,0x00,0x00]
+        	fnstenv	0x7eed
+
+// CHECK: fldenv	32493
+// CHECK:  encoding: [0xd9,0x25,0xed,0x7e,0x00,0x00]
+        	fldenv	0x7eed
+
+// CHECK: fnsave	32493
+// CHECK:  encoding: [0xdd,0x35,0xed,0x7e,0x00,0x00]
+        	fnsave	0x7eed
+
+// CHECK: frstor	32493
+// CHECK:  encoding: [0xdd,0x25,0xed,0x7e,0x00,0x00]
+        	frstor	0x7eed
+
+// CHECK: ffree	%st(2)
+// CHECK:  encoding: [0xdd,0xc2]
+        	ffree	%st(2)
+
+// CHECK: fnop
+// CHECK:  encoding: [0xd9,0xd0]
+        	fnop
+
+// CHECK: invd
+// CHECK:  encoding: [0x0f,0x08]
+        	invd
+
+// CHECK: wbinvd
+// CHECK:  encoding: [0x0f,0x09]
+        	wbinvd
+
+// CHECK: cpuid
+// CHECK:  encoding: [0x0f,0xa2]
+        	cpuid
+
+// CHECK: wrmsr
+// CHECK:  encoding: [0x0f,0x30]
+        	wrmsr
+
+// CHECK: rdtsc
+// CHECK:  encoding: [0x0f,0x31]
+        	rdtsc
+
+// CHECK: rdmsr
+// CHECK:  encoding: [0x0f,0x32]
+        	rdmsr
+
+// CHECK: cmpxchg8b	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc7,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	cmpxchg8b	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpxchg8b	32493
+// CHECK:  encoding: [0x0f,0xc7,0x0d,0xed,0x7e,0x00,0x00]
+        	cmpxchg8b	0x7eed
+
+// CHECK: cmpxchg8b	3133065982
+// CHECK:  encoding: [0x0f,0xc7,0x0d,0xfe,0xca,0xbe,0xba]
+        	cmpxchg8b	0xbabecafe
+
+// CHECK: cmpxchg8b	305419896
+// CHECK:  encoding: [0x0f,0xc7,0x0d,0x78,0x56,0x34,0x12]
+        	cmpxchg8b	0x12345678
+
+// CHECK: sysenter
+// CHECK:  encoding: [0x0f,0x34]
+        	sysenter
+
+// CHECK: sysexit
+// CHECK:  encoding: [0x0f,0x35]
+        	sysexit
+
+// CHECK: fxsave	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fxsave	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fxsave	32493
+// CHECK:  encoding: [0x0f,0xae,0x05,0xed,0x7e,0x00,0x00]
+        	fxsave	0x7eed
+
+// CHECK: fxsave	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x05,0xfe,0xca,0xbe,0xba]
+        	fxsave	0xbabecafe
+
+// CHECK: fxsave	305419896
+// CHECK:  encoding: [0x0f,0xae,0x05,0x78,0x56,0x34,0x12]
+        	fxsave	0x12345678
+
+// CHECK: fxrstor	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fxrstor	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fxrstor	32493
+// CHECK:  encoding: [0x0f,0xae,0x0d,0xed,0x7e,0x00,0x00]
+        	fxrstor	0x7eed
+
+// CHECK: fxrstor	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x0d,0xfe,0xca,0xbe,0xba]
+        	fxrstor	0xbabecafe
+
+// CHECK: fxrstor	305419896
+// CHECK:  encoding: [0x0f,0xae,0x0d,0x78,0x56,0x34,0x12]
+        	fxrstor	0x12345678
+
+// CHECK: rdpmc
+// CHECK:  encoding: [0x0f,0x33]
+        	rdpmc
+
+// CHECK: ud2
+// CHECK:  encoding: [0x0f,0x0b]
+        	ud2
+
+// CHECK: fcmovb	%st(2), %st(0)
+// CHECK:  encoding: [0xda,0xc2]
+        	fcmovb	%st(2),%st
+
+// CHECK: fcmove	%st(2), %st(0)
+// CHECK:  encoding: [0xda,0xca]
+        	fcmove	%st(2),%st
+
+// CHECK: fcmovbe	%st(2), %st(0)
+// CHECK:  encoding: [0xda,0xd2]
+        	fcmovbe	%st(2),%st
+
+// CHECK: fcmovu	 %st(2), %st(0)
+// CHECK:  encoding: [0xda,0xda]
+        	fcmovu	%st(2),%st
+
+// CHECK: fcmovnb	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xc2]
+        	fcmovnb	%st(2),%st
+
+// CHECK: fcmovne	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xca]
+        	fcmovne	%st(2),%st
+
+// CHECK: fcmovnbe	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xd2]
+        	fcmovnbe	%st(2),%st
+
+// CHECK: fcmovnu	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xda]
+        	fcmovnu	%st(2),%st
+
+// CHECK: fcomi	%st(2)
+// CHECK:  encoding: [0xdb,0xf2]
+        	fcomi	%st(2),%st
+
+// CHECK: fucomi	%st(2)
+// CHECK:  encoding: [0xdb,0xea]
+        	fucomi	%st(2),%st
+
+// CHECK: fcompi	%st(2)
+// CHECK:  encoding: [0xdf,0xf2]
+        	fcomip	%st(2),%st
+
+// CHECK: fucompi	%st(2)
+// CHECK:  encoding: [0xdf,0xea]
+        	fucomip	%st(2),%st
+
+// CHECK: movnti	%ecx, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc3,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movnti	%ecx, 69
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0x45,0x00,0x00,0x00]
+        	movnti	%ecx,0x45
+
+// CHECK: movnti	%ecx, 32493
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0xed,0x7e,0x00,0x00]
+        	movnti	%ecx,0x7eed
+
+// CHECK: movnti	%ecx, 3133065982
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0xfe,0xca,0xbe,0xba]
+        	movnti	%ecx,0xbabecafe
+
+// CHECK: movnti	%ecx, 305419896
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0x78,0x56,0x34,0x12]
+        	movnti	%ecx,0x12345678
+
+// CHECK: clflush	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	clflush	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: clflush	32493
+// CHECK:  encoding: [0x0f,0xae,0x3d,0xed,0x7e,0x00,0x00]
+        	clflush	0x7eed
+
+// CHECK: clflush	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x3d,0xfe,0xca,0xbe,0xba]
+        	clflush	0xbabecafe
+
+// CHECK: clflush	305419896
+// CHECK:  encoding: [0x0f,0xae,0x3d,0x78,0x56,0x34,0x12]
+        	clflush	0x12345678
+
+// CHECK: emms
+// CHECK:  encoding: [0x0f,0x77]
+        	emms
+
+// CHECK: movd	%ecx, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0xd9]
+        	movd	%ecx,%mm3
+
+// CHECK: movd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: movd	69, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0x45,0x00,0x00,0x00]
+        	movd	0x45,%mm3
+
+// CHECK: movd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0xed,0x7e,0x00,0x00]
+        	movd	0x7eed,%mm3
+
+// CHECK: movd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0xfe,0xca,0xbe,0xba]
+        	movd	0xbabecafe,%mm3
+
+// CHECK: movd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0x78,0x56,0x34,0x12]
+        	movd	0x12345678,%mm3
+
+// CHECK: movd	%mm3, %ecx
+// CHECK:  encoding: [0x0f,0x7e,0xd9]
+        	movd	%mm3,%ecx
+
+// CHECK: movd	%mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movd	%mm3, 69
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0x45,0x00,0x00,0x00]
+        	movd	%mm3,0x45
+
+// CHECK: movd	%mm3, 32493
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0xed,0x7e,0x00,0x00]
+        	movd	%mm3,0x7eed
+
+// CHECK: movd	%mm3, 3133065982
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0xfe,0xca,0xbe,0xba]
+        	movd	%mm3,0xbabecafe
+
+// CHECK: movd	%mm3, 305419896
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0x78,0x56,0x34,0x12]
+        	movd	%mm3,0x12345678
+
+// CHECK: movd	%ecx, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xe9]
+        	movd	%ecx,%xmm5
+
+// CHECK: movd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0x45,0x00,0x00,0x00]
+        	movd	0x45,%xmm5
+
+// CHECK: movd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0xed,0x7e,0x00,0x00]
+        	movd	0x7eed,%xmm5
+
+// CHECK: movd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0xfe,0xca,0xbe,0xba]
+        	movd	0xbabecafe,%xmm5
+
+// CHECK: movd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0x78,0x56,0x34,0x12]
+        	movd	0x12345678,%xmm5
+
+// CHECK: movd	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0x7e,0xe9]
+        	movd	%xmm5,%ecx
+
+// CHECK: movd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0x45,0x00,0x00,0x00]
+        	movd	%xmm5,0x45
+
+// CHECK: movd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0xed,0x7e,0x00,0x00]
+        	movd	%xmm5,0x7eed
+
+// CHECK: movd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0xfe,0xca,0xbe,0xba]
+        	movd	%xmm5,0xbabecafe
+
+// CHECK: movd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0x78,0x56,0x34,0x12]
+        	movd	%xmm5,0x12345678
+
+// CHECK: movq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: movq	69, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0x45,0x00,0x00,0x00]
+        	movq	0x45,%mm3
+
+// CHECK: movq	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0xed,0x7e,0x00,0x00]
+        	movq	0x7eed,%mm3
+
+// CHECK: movq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0xfe,0xca,0xbe,0xba]
+        	movq	0xbabecafe,%mm3
+
+// CHECK: movq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0x78,0x56,0x34,0x12]
+        	movq	0x12345678,%mm3
+
+// CHECK: movq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0xdb]
+        	movq	%mm3,%mm3
+
+// CHECK: movq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0xdb]
+        	movq	%mm3,%mm3
+
+// CHECK: movq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xed]
+        	movq	%xmm5,%xmm5
+
+// CHECK: movq	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0xd6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movq	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0x45,0x00,0x00,0x00]
+        	movq	%xmm5,0x45
+
+// CHECK: movq	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0xed,0x7e,0x00,0x00]
+        	movq	%xmm5,0x7eed
+
+// CHECK: movq	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0xfe,0xca,0xbe,0xba]
+        	movq	%xmm5,0xbabecafe
+
+// CHECK: movq	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0x78,0x56,0x34,0x12]
+        	movq	%xmm5,0x12345678
+
+// CHECK: movq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xed]
+        	movq	%xmm5,%xmm5
+
+// CHECK: packssdw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: packssdw	69, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0x45,0x00,0x00,0x00]
+        	packssdw	0x45,%mm3
+
+// CHECK: packssdw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0xed,0x7e,0x00,0x00]
+        	packssdw	0x7eed,%mm3
+
+// CHECK: packssdw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0xfe,0xca,0xbe,0xba]
+        	packssdw	0xbabecafe,%mm3
+
+// CHECK: packssdw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0x78,0x56,0x34,0x12]
+        	packssdw	0x12345678,%mm3
+
+// CHECK: packssdw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0xdb]
+        	packssdw	%mm3,%mm3
+
+// CHECK: packssdw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packssdw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0x45,0x00,0x00,0x00]
+        	packssdw	0x45,%xmm5
+
+// CHECK: packssdw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0xed,0x7e,0x00,0x00]
+        	packssdw	0x7eed,%xmm5
+
+// CHECK: packssdw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0xfe,0xca,0xbe,0xba]
+        	packssdw	0xbabecafe,%xmm5
+
+// CHECK: packssdw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0x78,0x56,0x34,0x12]
+        	packssdw	0x12345678,%xmm5
+
+// CHECK: packssdw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0xed]
+        	packssdw	%xmm5,%xmm5
+
+// CHECK: packsswb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x63,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: packsswb	69, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0x45,0x00,0x00,0x00]
+        	packsswb	0x45,%mm3
+
+// CHECK: packsswb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0xed,0x7e,0x00,0x00]
+        	packsswb	0x7eed,%mm3
+
+// CHECK: packsswb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0xfe,0xca,0xbe,0xba]
+        	packsswb	0xbabecafe,%mm3
+
+// CHECK: packsswb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0x78,0x56,0x34,0x12]
+        	packsswb	0x12345678,%mm3
+
+// CHECK: packsswb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x63,0xdb]
+        	packsswb	%mm3,%mm3
+
+// CHECK: packsswb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packsswb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0x45,0x00,0x00,0x00]
+        	packsswb	0x45,%xmm5
+
+// CHECK: packsswb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0xed,0x7e,0x00,0x00]
+        	packsswb	0x7eed,%xmm5
+
+// CHECK: packsswb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0xfe,0xca,0xbe,0xba]
+        	packsswb	0xbabecafe,%xmm5
+
+// CHECK: packsswb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0x78,0x56,0x34,0x12]
+        	packsswb	0x12345678,%xmm5
+
+// CHECK: packsswb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0xed]
+        	packsswb	%xmm5,%xmm5
+
+// CHECK: packuswb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x67,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: packuswb	69, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0x45,0x00,0x00,0x00]
+        	packuswb	0x45,%mm3
+
+// CHECK: packuswb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0xed,0x7e,0x00,0x00]
+        	packuswb	0x7eed,%mm3
+
+// CHECK: packuswb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0xfe,0xca,0xbe,0xba]
+        	packuswb	0xbabecafe,%mm3
+
+// CHECK: packuswb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0x78,0x56,0x34,0x12]
+        	packuswb	0x12345678,%mm3
+
+// CHECK: packuswb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x67,0xdb]
+        	packuswb	%mm3,%mm3
+
+// CHECK: packuswb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packuswb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0x45,0x00,0x00,0x00]
+        	packuswb	0x45,%xmm5
+
+// CHECK: packuswb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0xed,0x7e,0x00,0x00]
+        	packuswb	0x7eed,%xmm5
+
+// CHECK: packuswb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0xfe,0xca,0xbe,0xba]
+        	packuswb	0xbabecafe,%xmm5
+
+// CHECK: packuswb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0x78,0x56,0x34,0x12]
+        	packuswb	0x12345678,%xmm5
+
+// CHECK: packuswb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0xed]
+        	packuswb	%xmm5,%xmm5
+
+// CHECK: paddb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddb	69, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0x45,0x00,0x00,0x00]
+        	paddb	0x45,%mm3
+
+// CHECK: paddb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0xed,0x7e,0x00,0x00]
+        	paddb	0x7eed,%mm3
+
+// CHECK: paddb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddb	0xbabecafe,%mm3
+
+// CHECK: paddb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0x78,0x56,0x34,0x12]
+        	paddb	0x12345678,%mm3
+
+// CHECK: paddb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0xdb]
+        	paddb	%mm3,%mm3
+
+// CHECK: paddb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0x45,0x00,0x00,0x00]
+        	paddb	0x45,%xmm5
+
+// CHECK: paddb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0xed,0x7e,0x00,0x00]
+        	paddb	0x7eed,%xmm5
+
+// CHECK: paddb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddb	0xbabecafe,%xmm5
+
+// CHECK: paddb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0x78,0x56,0x34,0x12]
+        	paddb	0x12345678,%xmm5
+
+// CHECK: paddb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0xed]
+        	paddb	%xmm5,%xmm5
+
+// CHECK: paddw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddw	69, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0x45,0x00,0x00,0x00]
+        	paddw	0x45,%mm3
+
+// CHECK: paddw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0xed,0x7e,0x00,0x00]
+        	paddw	0x7eed,%mm3
+
+// CHECK: paddw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddw	0xbabecafe,%mm3
+
+// CHECK: paddw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0x78,0x56,0x34,0x12]
+        	paddw	0x12345678,%mm3
+
+// CHECK: paddw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0xdb]
+        	paddw	%mm3,%mm3
+
+// CHECK: paddw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0x45,0x00,0x00,0x00]
+        	paddw	0x45,%xmm5
+
+// CHECK: paddw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0xed,0x7e,0x00,0x00]
+        	paddw	0x7eed,%xmm5
+
+// CHECK: paddw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddw	0xbabecafe,%xmm5
+
+// CHECK: paddw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0x78,0x56,0x34,0x12]
+        	paddw	0x12345678,%xmm5
+
+// CHECK: paddw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0xed]
+        	paddw	%xmm5,%xmm5
+
+// CHECK: paddd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddd	69, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0x45,0x00,0x00,0x00]
+        	paddd	0x45,%mm3
+
+// CHECK: paddd	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0xed,0x7e,0x00,0x00]
+        	paddd	0x7eed,%mm3
+
+// CHECK: paddd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddd	0xbabecafe,%mm3
+
+// CHECK: paddd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0x78,0x56,0x34,0x12]
+        	paddd	0x12345678,%mm3
+
+// CHECK: paddd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0xdb]
+        	paddd	%mm3,%mm3
+
+// CHECK: paddd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0x45,0x00,0x00,0x00]
+        	paddd	0x45,%xmm5
+
+// CHECK: paddd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0xed,0x7e,0x00,0x00]
+        	paddd	0x7eed,%xmm5
+
+// CHECK: paddd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddd	0xbabecafe,%xmm5
+
+// CHECK: paddd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0x78,0x56,0x34,0x12]
+        	paddd	0x12345678,%xmm5
+
+// CHECK: paddd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0xed]
+        	paddd	%xmm5,%xmm5
+
+// CHECK: paddq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddq	69, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0x45,0x00,0x00,0x00]
+        	paddq	0x45,%mm3
+
+// CHECK: paddq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0xed,0x7e,0x00,0x00]
+        	paddq	0x7eed,%mm3
+
+// CHECK: paddq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddq	0xbabecafe,%mm3
+
+// CHECK: paddq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0x78,0x56,0x34,0x12]
+        	paddq	0x12345678,%mm3
+
+// CHECK: paddq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0xdb]
+        	paddq	%mm3,%mm3
+
+// CHECK: paddq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0x45,0x00,0x00,0x00]
+        	paddq	0x45,%xmm5
+
+// CHECK: paddq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0xed,0x7e,0x00,0x00]
+        	paddq	0x7eed,%xmm5
+
+// CHECK: paddq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddq	0xbabecafe,%xmm5
+
+// CHECK: paddq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0x78,0x56,0x34,0x12]
+        	paddq	0x12345678,%xmm5
+
+// CHECK: paddq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0xed]
+        	paddq	%xmm5,%xmm5
+
+// CHECK: paddsb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xec,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddsb	69, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0x45,0x00,0x00,0x00]
+        	paddsb	0x45,%mm3
+
+// CHECK: paddsb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0xed,0x7e,0x00,0x00]
+        	paddsb	0x7eed,%mm3
+
+// CHECK: paddsb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddsb	0xbabecafe,%mm3
+
+// CHECK: paddsb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0x78,0x56,0x34,0x12]
+        	paddsb	0x12345678,%mm3
+
+// CHECK: paddsb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xec,0xdb]
+        	paddsb	%mm3,%mm3
+
+// CHECK: paddsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0x45,0x00,0x00,0x00]
+        	paddsb	0x45,%xmm5
+
+// CHECK: paddsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0xed,0x7e,0x00,0x00]
+        	paddsb	0x7eed,%xmm5
+
+// CHECK: paddsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddsb	0xbabecafe,%xmm5
+
+// CHECK: paddsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0x78,0x56,0x34,0x12]
+        	paddsb	0x12345678,%xmm5
+
+// CHECK: paddsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0xed]
+        	paddsb	%xmm5,%xmm5
+
+// CHECK: paddsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xed,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0x45,0x00,0x00,0x00]
+        	paddsw	0x45,%mm3
+
+// CHECK: paddsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0xed,0x7e,0x00,0x00]
+        	paddsw	0x7eed,%mm3
+
+// CHECK: paddsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddsw	0xbabecafe,%mm3
+
+// CHECK: paddsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0x78,0x56,0x34,0x12]
+        	paddsw	0x12345678,%mm3
+
+// CHECK: paddsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xed,0xdb]
+        	paddsw	%mm3,%mm3
+
+// CHECK: paddsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0x45,0x00,0x00,0x00]
+        	paddsw	0x45,%xmm5
+
+// CHECK: paddsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0xed,0x7e,0x00,0x00]
+        	paddsw	0x7eed,%xmm5
+
+// CHECK: paddsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddsw	0xbabecafe,%xmm5
+
+// CHECK: paddsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0x78,0x56,0x34,0x12]
+        	paddsw	0x12345678,%xmm5
+
+// CHECK: paddsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0xed]
+        	paddsw	%xmm5,%xmm5
+
+// CHECK: paddusb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddusb	69, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0x45,0x00,0x00,0x00]
+        	paddusb	0x45,%mm3
+
+// CHECK: paddusb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0xed,0x7e,0x00,0x00]
+        	paddusb	0x7eed,%mm3
+
+// CHECK: paddusb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddusb	0xbabecafe,%mm3
+
+// CHECK: paddusb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0x78,0x56,0x34,0x12]
+        	paddusb	0x12345678,%mm3
+
+// CHECK: paddusb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0xdb]
+        	paddusb	%mm3,%mm3
+
+// CHECK: paddusb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddusb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0x45,0x00,0x00,0x00]
+        	paddusb	0x45,%xmm5
+
+// CHECK: paddusb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0xed,0x7e,0x00,0x00]
+        	paddusb	0x7eed,%xmm5
+
+// CHECK: paddusb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddusb	0xbabecafe,%xmm5
+
+// CHECK: paddusb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0x78,0x56,0x34,0x12]
+        	paddusb	0x12345678,%xmm5
+
+// CHECK: paddusb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0xed]
+        	paddusb	%xmm5,%xmm5
+
+// CHECK: paddusw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddusw	69, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0x45,0x00,0x00,0x00]
+        	paddusw	0x45,%mm3
+
+// CHECK: paddusw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0xed,0x7e,0x00,0x00]
+        	paddusw	0x7eed,%mm3
+
+// CHECK: paddusw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddusw	0xbabecafe,%mm3
+
+// CHECK: paddusw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0x78,0x56,0x34,0x12]
+        	paddusw	0x12345678,%mm3
+
+// CHECK: paddusw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0xdb]
+        	paddusw	%mm3,%mm3
+
+// CHECK: paddusw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddusw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0x45,0x00,0x00,0x00]
+        	paddusw	0x45,%xmm5
+
+// CHECK: paddusw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0xed,0x7e,0x00,0x00]
+        	paddusw	0x7eed,%xmm5
+
+// CHECK: paddusw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddusw	0xbabecafe,%xmm5
+
+// CHECK: paddusw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0x78,0x56,0x34,0x12]
+        	paddusw	0x12345678,%xmm5
+
+// CHECK: paddusw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0xed]
+        	paddusw	%xmm5,%xmm5
+
+// CHECK: pand	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pand	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pand	69, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0x45,0x00,0x00,0x00]
+        	pand	0x45,%mm3
+
+// CHECK: pand	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0xed,0x7e,0x00,0x00]
+        	pand	0x7eed,%mm3
+
+// CHECK: pand	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0xfe,0xca,0xbe,0xba]
+        	pand	0xbabecafe,%mm3
+
+// CHECK: pand	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0x78,0x56,0x34,0x12]
+        	pand	0x12345678,%mm3
+
+// CHECK: pand	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0xdb]
+        	pand	%mm3,%mm3
+
+// CHECK: pand	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pand	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pand	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0x45,0x00,0x00,0x00]
+        	pand	0x45,%xmm5
+
+// CHECK: pand	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0xed,0x7e,0x00,0x00]
+        	pand	0x7eed,%xmm5
+
+// CHECK: pand	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0xfe,0xca,0xbe,0xba]
+        	pand	0xbabecafe,%xmm5
+
+// CHECK: pand	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0x78,0x56,0x34,0x12]
+        	pand	0x12345678,%xmm5
+
+// CHECK: pand	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0xed]
+        	pand	%xmm5,%xmm5
+
+// CHECK: pandn	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pandn	69, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0x45,0x00,0x00,0x00]
+        	pandn	0x45,%mm3
+
+// CHECK: pandn	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0xed,0x7e,0x00,0x00]
+        	pandn	0x7eed,%mm3
+
+// CHECK: pandn	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0xfe,0xca,0xbe,0xba]
+        	pandn	0xbabecafe,%mm3
+
+// CHECK: pandn	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0x78,0x56,0x34,0x12]
+        	pandn	0x12345678,%mm3
+
+// CHECK: pandn	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0xdb]
+        	pandn	%mm3,%mm3
+
+// CHECK: pandn	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pandn	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0x45,0x00,0x00,0x00]
+        	pandn	0x45,%xmm5
+
+// CHECK: pandn	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0xed,0x7e,0x00,0x00]
+        	pandn	0x7eed,%xmm5
+
+// CHECK: pandn	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0xfe,0xca,0xbe,0xba]
+        	pandn	0xbabecafe,%xmm5
+
+// CHECK: pandn	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0x78,0x56,0x34,0x12]
+        	pandn	0x12345678,%xmm5
+
+// CHECK: pandn	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0xed]
+        	pandn	%xmm5,%xmm5
+
+// CHECK: pcmpeqb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x74,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpeqb	69, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpeqb	0x45,%mm3
+
+// CHECK: pcmpeqb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpeqb	0x7eed,%mm3
+
+// CHECK: pcmpeqb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqb	0xbabecafe,%mm3
+
+// CHECK: pcmpeqb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpeqb	0x12345678,%mm3
+
+// CHECK: pcmpeqb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x74,0xdb]
+        	pcmpeqb	%mm3,%mm3
+
+// CHECK: pcmpeqb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqb	0x45,%xmm5
+
+// CHECK: pcmpeqb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqb	0x7eed,%xmm5
+
+// CHECK: pcmpeqb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqb	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqb	0x12345678,%xmm5
+
+// CHECK: pcmpeqb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0xed]
+        	pcmpeqb	%xmm5,%xmm5
+
+// CHECK: pcmpeqw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x75,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpeqw	69, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpeqw	0x45,%mm3
+
+// CHECK: pcmpeqw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpeqw	0x7eed,%mm3
+
+// CHECK: pcmpeqw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqw	0xbabecafe,%mm3
+
+// CHECK: pcmpeqw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpeqw	0x12345678,%mm3
+
+// CHECK: pcmpeqw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x75,0xdb]
+        	pcmpeqw	%mm3,%mm3
+
+// CHECK: pcmpeqw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqw	0x45,%xmm5
+
+// CHECK: pcmpeqw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqw	0x7eed,%xmm5
+
+// CHECK: pcmpeqw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqw	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqw	0x12345678,%xmm5
+
+// CHECK: pcmpeqw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0xed]
+        	pcmpeqw	%xmm5,%xmm5
+
+// CHECK: pcmpeqd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x76,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpeqd	69, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpeqd	0x45,%mm3
+
+// CHECK: pcmpeqd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpeqd	0x7eed,%mm3
+
+// CHECK: pcmpeqd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqd	0xbabecafe,%mm3
+
+// CHECK: pcmpeqd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpeqd	0x12345678,%mm3
+
+// CHECK: pcmpeqd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x76,0xdb]
+        	pcmpeqd	%mm3,%mm3
+
+// CHECK: pcmpeqd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqd	0x45,%xmm5
+
+// CHECK: pcmpeqd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqd	0x7eed,%xmm5
+
+// CHECK: pcmpeqd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqd	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqd	0x12345678,%xmm5
+
+// CHECK: pcmpeqd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0xed]
+        	pcmpeqd	%xmm5,%xmm5
+
+// CHECK: pcmpgtb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x64,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpgtb	69, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpgtb	0x45,%mm3
+
+// CHECK: pcmpgtb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpgtb	0x7eed,%mm3
+
+// CHECK: pcmpgtb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtb	0xbabecafe,%mm3
+
+// CHECK: pcmpgtb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpgtb	0x12345678,%mm3
+
+// CHECK: pcmpgtb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x64,0xdb]
+        	pcmpgtb	%mm3,%mm3
+
+// CHECK: pcmpgtb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtb	0x45,%xmm5
+
+// CHECK: pcmpgtb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtb	0x7eed,%xmm5
+
+// CHECK: pcmpgtb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtb	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtb	0x12345678,%xmm5
+
+// CHECK: pcmpgtb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0xed]
+        	pcmpgtb	%xmm5,%xmm5
+
+// CHECK: pcmpgtw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x65,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpgtw	69, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpgtw	0x45,%mm3
+
+// CHECK: pcmpgtw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpgtw	0x7eed,%mm3
+
+// CHECK: pcmpgtw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtw	0xbabecafe,%mm3
+
+// CHECK: pcmpgtw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpgtw	0x12345678,%mm3
+
+// CHECK: pcmpgtw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x65,0xdb]
+        	pcmpgtw	%mm3,%mm3
+
+// CHECK: pcmpgtw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtw	0x45,%xmm5
+
+// CHECK: pcmpgtw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtw	0x7eed,%xmm5
+
+// CHECK: pcmpgtw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtw	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtw	0x12345678,%xmm5
+
+// CHECK: pcmpgtw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0xed]
+        	pcmpgtw	%xmm5,%xmm5
+
+// CHECK: pcmpgtd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x66,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpgtd	69, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpgtd	0x45,%mm3
+
+// CHECK: pcmpgtd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpgtd	0x7eed,%mm3
+
+// CHECK: pcmpgtd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtd	0xbabecafe,%mm3
+
+// CHECK: pcmpgtd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpgtd	0x12345678,%mm3
+
+// CHECK: pcmpgtd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x66,0xdb]
+        	pcmpgtd	%mm3,%mm3
+
+// CHECK: pcmpgtd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtd	0x45,%xmm5
+
+// CHECK: pcmpgtd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtd	0x7eed,%xmm5
+
+// CHECK: pcmpgtd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtd	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtd	0x12345678,%xmm5
+
+// CHECK: pcmpgtd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0xed]
+        	pcmpgtd	%xmm5,%xmm5
+
+// CHECK: pmaddwd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaddwd	69, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0x45,0x00,0x00,0x00]
+        	pmaddwd	0x45,%mm3
+
+// CHECK: pmaddwd	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaddwd	0x7eed,%mm3
+
+// CHECK: pmaddwd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaddwd	0xbabecafe,%mm3
+
+// CHECK: pmaddwd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0x78,0x56,0x34,0x12]
+        	pmaddwd	0x12345678,%mm3
+
+// CHECK: pmaddwd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0xdb]
+        	pmaddwd	%mm3,%mm3
+
+// CHECK: pmaddwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaddwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0x45,0x00,0x00,0x00]
+        	pmaddwd	0x45,%xmm5
+
+// CHECK: pmaddwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaddwd	0x7eed,%xmm5
+
+// CHECK: pmaddwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaddwd	0xbabecafe,%xmm5
+
+// CHECK: pmaddwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0x78,0x56,0x34,0x12]
+        	pmaddwd	0x12345678,%xmm5
+
+// CHECK: pmaddwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0xed]
+        	pmaddwd	%xmm5,%xmm5
+
+// CHECK: pmulhw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmulhw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0x45,0x00,0x00,0x00]
+        	pmulhw	0x45,%mm3
+
+// CHECK: pmulhw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0xed,0x7e,0x00,0x00]
+        	pmulhw	0x7eed,%mm3
+
+// CHECK: pmulhw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmulhw	0xbabecafe,%mm3
+
+// CHECK: pmulhw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0x78,0x56,0x34,0x12]
+        	pmulhw	0x12345678,%mm3
+
+// CHECK: pmulhw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0xdb]
+        	pmulhw	%mm3,%mm3
+
+// CHECK: pmulhw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulhw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0x45,0x00,0x00,0x00]
+        	pmulhw	0x45,%xmm5
+
+// CHECK: pmulhw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulhw	0x7eed,%xmm5
+
+// CHECK: pmulhw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulhw	0xbabecafe,%xmm5
+
+// CHECK: pmulhw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0x78,0x56,0x34,0x12]
+        	pmulhw	0x12345678,%xmm5
+
+// CHECK: pmulhw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0xed]
+        	pmulhw	%xmm5,%xmm5
+
+// CHECK: pmullw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmullw	69, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0x45,0x00,0x00,0x00]
+        	pmullw	0x45,%mm3
+
+// CHECK: pmullw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0xed,0x7e,0x00,0x00]
+        	pmullw	0x7eed,%mm3
+
+// CHECK: pmullw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmullw	0xbabecafe,%mm3
+
+// CHECK: pmullw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0x78,0x56,0x34,0x12]
+        	pmullw	0x12345678,%mm3
+
+// CHECK: pmullw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0xdb]
+        	pmullw	%mm3,%mm3
+
+// CHECK: pmullw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmullw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0x45,0x00,0x00,0x00]
+        	pmullw	0x45,%xmm5
+
+// CHECK: pmullw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0xed,0x7e,0x00,0x00]
+        	pmullw	0x7eed,%xmm5
+
+// CHECK: pmullw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmullw	0xbabecafe,%xmm5
+
+// CHECK: pmullw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0x78,0x56,0x34,0x12]
+        	pmullw	0x12345678,%xmm5
+
+// CHECK: pmullw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0xed]
+        	pmullw	%xmm5,%xmm5
+
+// CHECK: por	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	por	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: por	69, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0x45,0x00,0x00,0x00]
+        	por	0x45,%mm3
+
+// CHECK: por	32493, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0xed,0x7e,0x00,0x00]
+        	por	0x7eed,%mm3
+
+// CHECK: por	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0xfe,0xca,0xbe,0xba]
+        	por	0xbabecafe,%mm3
+
+// CHECK: por	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0x78,0x56,0x34,0x12]
+        	por	0x12345678,%mm3
+
+// CHECK: por	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0xdb]
+        	por	%mm3,%mm3
+
+// CHECK: por	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	por	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: por	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0x45,0x00,0x00,0x00]
+        	por	0x45,%xmm5
+
+// CHECK: por	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0xed,0x7e,0x00,0x00]
+        	por	0x7eed,%xmm5
+
+// CHECK: por	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0xfe,0xca,0xbe,0xba]
+        	por	0xbabecafe,%xmm5
+
+// CHECK: por	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0x78,0x56,0x34,0x12]
+        	por	0x12345678,%xmm5
+
+// CHECK: por	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0xed]
+        	por	%xmm5,%xmm5
+
+// CHECK: psllw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psllw	69, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0x45,0x00,0x00,0x00]
+        	psllw	0x45,%mm3
+
+// CHECK: psllw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0xed,0x7e,0x00,0x00]
+        	psllw	0x7eed,%mm3
+
+// CHECK: psllw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0xfe,0xca,0xbe,0xba]
+        	psllw	0xbabecafe,%mm3
+
+// CHECK: psllw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0x78,0x56,0x34,0x12]
+        	psllw	0x12345678,%mm3
+
+// CHECK: psllw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0xdb]
+        	psllw	%mm3,%mm3
+
+// CHECK: psllw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psllw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0x45,0x00,0x00,0x00]
+        	psllw	0x45,%xmm5
+
+// CHECK: psllw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0xed,0x7e,0x00,0x00]
+        	psllw	0x7eed,%xmm5
+
+// CHECK: psllw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0xfe,0xca,0xbe,0xba]
+        	psllw	0xbabecafe,%xmm5
+
+// CHECK: psllw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0x78,0x56,0x34,0x12]
+        	psllw	0x12345678,%xmm5
+
+// CHECK: psllw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0xed]
+        	psllw	%xmm5,%xmm5
+
+// CHECK: psllw	$127, %mm3
+// CHECK:  encoding: [0x0f,0x71,0xf3,0x7f]
+        	psllw	$0x7f,%mm3
+
+// CHECK: psllw	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x71,0xf5,0x7f]
+        	psllw	$0x7f,%xmm5
+
+// CHECK: pslld	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pslld	69, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0x45,0x00,0x00,0x00]
+        	pslld	0x45,%mm3
+
+// CHECK: pslld	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0xed,0x7e,0x00,0x00]
+        	pslld	0x7eed,%mm3
+
+// CHECK: pslld	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0xfe,0xca,0xbe,0xba]
+        	pslld	0xbabecafe,%mm3
+
+// CHECK: pslld	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0x78,0x56,0x34,0x12]
+        	pslld	0x12345678,%mm3
+
+// CHECK: pslld	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0xdb]
+        	pslld	%mm3,%mm3
+
+// CHECK: pslld	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pslld	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0x45,0x00,0x00,0x00]
+        	pslld	0x45,%xmm5
+
+// CHECK: pslld	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0xed,0x7e,0x00,0x00]
+        	pslld	0x7eed,%xmm5
+
+// CHECK: pslld	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0xfe,0xca,0xbe,0xba]
+        	pslld	0xbabecafe,%xmm5
+
+// CHECK: pslld	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0x78,0x56,0x34,0x12]
+        	pslld	0x12345678,%xmm5
+
+// CHECK: pslld	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0xed]
+        	pslld	%xmm5,%xmm5
+
+// CHECK: pslld	$127, %mm3
+// CHECK:  encoding: [0x0f,0x72,0xf3,0x7f]
+        	pslld	$0x7f,%mm3
+
+// CHECK: pslld	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x72,0xf5,0x7f]
+        	pslld	$0x7f,%xmm5
+
+// CHECK: psllq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psllq	69, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0x45,0x00,0x00,0x00]
+        	psllq	0x45,%mm3
+
+// CHECK: psllq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0xed,0x7e,0x00,0x00]
+        	psllq	0x7eed,%mm3
+
+// CHECK: psllq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0xfe,0xca,0xbe,0xba]
+        	psllq	0xbabecafe,%mm3
+
+// CHECK: psllq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0x78,0x56,0x34,0x12]
+        	psllq	0x12345678,%mm3
+
+// CHECK: psllq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0xdb]
+        	psllq	%mm3,%mm3
+
+// CHECK: psllq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psllq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0x45,0x00,0x00,0x00]
+        	psllq	0x45,%xmm5
+
+// CHECK: psllq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0xed,0x7e,0x00,0x00]
+        	psllq	0x7eed,%xmm5
+
+// CHECK: psllq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0xfe,0xca,0xbe,0xba]
+        	psllq	0xbabecafe,%xmm5
+
+// CHECK: psllq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0x78,0x56,0x34,0x12]
+        	psllq	0x12345678,%xmm5
+
+// CHECK: psllq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0xed]
+        	psllq	%xmm5,%xmm5
+
+// CHECK: psllq	$127, %mm3
+// CHECK:  encoding: [0x0f,0x73,0xf3,0x7f]
+        	psllq	$0x7f,%mm3
+
+// CHECK: psllq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xf5,0x7f]
+        	psllq	$0x7f,%xmm5
+
+// CHECK: psraw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psraw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0x45,0x00,0x00,0x00]
+        	psraw	0x45,%mm3
+
+// CHECK: psraw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0xed,0x7e,0x00,0x00]
+        	psraw	0x7eed,%mm3
+
+// CHECK: psraw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0xfe,0xca,0xbe,0xba]
+        	psraw	0xbabecafe,%mm3
+
+// CHECK: psraw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0x78,0x56,0x34,0x12]
+        	psraw	0x12345678,%mm3
+
+// CHECK: psraw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0xdb]
+        	psraw	%mm3,%mm3
+
+// CHECK: psraw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psraw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0x45,0x00,0x00,0x00]
+        	psraw	0x45,%xmm5
+
+// CHECK: psraw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0xed,0x7e,0x00,0x00]
+        	psraw	0x7eed,%xmm5
+
+// CHECK: psraw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0xfe,0xca,0xbe,0xba]
+        	psraw	0xbabecafe,%xmm5
+
+// CHECK: psraw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0x78,0x56,0x34,0x12]
+        	psraw	0x12345678,%xmm5
+
+// CHECK: psraw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0xed]
+        	psraw	%xmm5,%xmm5
+
+// CHECK: psraw	$127, %mm3
+// CHECK:  encoding: [0x0f,0x71,0xe3,0x7f]
+        	psraw	$0x7f,%mm3
+
+// CHECK: psraw	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x71,0xe5,0x7f]
+        	psraw	$0x7f,%xmm5
+
+// CHECK: psrad	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrad	69, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0x45,0x00,0x00,0x00]
+        	psrad	0x45,%mm3
+
+// CHECK: psrad	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0xed,0x7e,0x00,0x00]
+        	psrad	0x7eed,%mm3
+
+// CHECK: psrad	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrad	0xbabecafe,%mm3
+
+// CHECK: psrad	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0x78,0x56,0x34,0x12]
+        	psrad	0x12345678,%mm3
+
+// CHECK: psrad	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0xdb]
+        	psrad	%mm3,%mm3
+
+// CHECK: psrad	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrad	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0x45,0x00,0x00,0x00]
+        	psrad	0x45,%xmm5
+
+// CHECK: psrad	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0xed,0x7e,0x00,0x00]
+        	psrad	0x7eed,%xmm5
+
+// CHECK: psrad	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrad	0xbabecafe,%xmm5
+
+// CHECK: psrad	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0x78,0x56,0x34,0x12]
+        	psrad	0x12345678,%xmm5
+
+// CHECK: psrad	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0xed]
+        	psrad	%xmm5,%xmm5
+
+// CHECK: psrad	$127, %mm3
+// CHECK:  encoding: [0x0f,0x72,0xe3,0x7f]
+        	psrad	$0x7f,%mm3
+
+// CHECK: psrad	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x72,0xe5,0x7f]
+        	psrad	$0x7f,%xmm5
+
+// CHECK: psrlw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrlw	69, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0x45,0x00,0x00,0x00]
+        	psrlw	0x45,%mm3
+
+// CHECK: psrlw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0xed,0x7e,0x00,0x00]
+        	psrlw	0x7eed,%mm3
+
+// CHECK: psrlw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrlw	0xbabecafe,%mm3
+
+// CHECK: psrlw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0x78,0x56,0x34,0x12]
+        	psrlw	0x12345678,%mm3
+
+// CHECK: psrlw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0xdb]
+        	psrlw	%mm3,%mm3
+
+// CHECK: psrlw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrlw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0x45,0x00,0x00,0x00]
+        	psrlw	0x45,%xmm5
+
+// CHECK: psrlw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0xed,0x7e,0x00,0x00]
+        	psrlw	0x7eed,%xmm5
+
+// CHECK: psrlw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrlw	0xbabecafe,%xmm5
+
+// CHECK: psrlw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0x78,0x56,0x34,0x12]
+        	psrlw	0x12345678,%xmm5
+
+// CHECK: psrlw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0xed]
+        	psrlw	%xmm5,%xmm5
+
+// CHECK: psrlw	$127, %mm3
+// CHECK:  encoding: [0x0f,0x71,0xd3,0x7f]
+        	psrlw	$0x7f,%mm3
+
+// CHECK: psrlw	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x71,0xd5,0x7f]
+        	psrlw	$0x7f,%xmm5
+
+// CHECK: psrld	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrld	69, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0x45,0x00,0x00,0x00]
+        	psrld	0x45,%mm3
+
+// CHECK: psrld	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0xed,0x7e,0x00,0x00]
+        	psrld	0x7eed,%mm3
+
+// CHECK: psrld	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrld	0xbabecafe,%mm3
+
+// CHECK: psrld	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0x78,0x56,0x34,0x12]
+        	psrld	0x12345678,%mm3
+
+// CHECK: psrld	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0xdb]
+        	psrld	%mm3,%mm3
+
+// CHECK: psrld	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrld	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0x45,0x00,0x00,0x00]
+        	psrld	0x45,%xmm5
+
+// CHECK: psrld	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0xed,0x7e,0x00,0x00]
+        	psrld	0x7eed,%xmm5
+
+// CHECK: psrld	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrld	0xbabecafe,%xmm5
+
+// CHECK: psrld	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0x78,0x56,0x34,0x12]
+        	psrld	0x12345678,%xmm5
+
+// CHECK: psrld	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0xed]
+        	psrld	%xmm5,%xmm5
+
+// CHECK: psrld	$127, %mm3
+// CHECK:  encoding: [0x0f,0x72,0xd3,0x7f]
+        	psrld	$0x7f,%mm3
+
+// CHECK: psrld	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x72,0xd5,0x7f]
+        	psrld	$0x7f,%xmm5
+
+// CHECK: psrlq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrlq	69, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0x45,0x00,0x00,0x00]
+        	psrlq	0x45,%mm3
+
+// CHECK: psrlq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0xed,0x7e,0x00,0x00]
+        	psrlq	0x7eed,%mm3
+
+// CHECK: psrlq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrlq	0xbabecafe,%mm3
+
+// CHECK: psrlq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0x78,0x56,0x34,0x12]
+        	psrlq	0x12345678,%mm3
+
+// CHECK: psrlq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0xdb]
+        	psrlq	%mm3,%mm3
+
+// CHECK: psrlq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrlq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0x45,0x00,0x00,0x00]
+        	psrlq	0x45,%xmm5
+
+// CHECK: psrlq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0xed,0x7e,0x00,0x00]
+        	psrlq	0x7eed,%xmm5
+
+// CHECK: psrlq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrlq	0xbabecafe,%xmm5
+
+// CHECK: psrlq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0x78,0x56,0x34,0x12]
+        	psrlq	0x12345678,%xmm5
+
+// CHECK: psrlq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0xed]
+        	psrlq	%xmm5,%xmm5
+
+// CHECK: psrlq	$127, %mm3
+// CHECK:  encoding: [0x0f,0x73,0xd3,0x7f]
+        	psrlq	$0x7f,%mm3
+
+// CHECK: psrlq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xd5,0x7f]
+        	psrlq	$0x7f,%xmm5
+
+// CHECK: psubb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubb	69, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0x45,0x00,0x00,0x00]
+        	psubb	0x45,%mm3
+
+// CHECK: psubb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0xed,0x7e,0x00,0x00]
+        	psubb	0x7eed,%mm3
+
+// CHECK: psubb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubb	0xbabecafe,%mm3
+
+// CHECK: psubb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0x78,0x56,0x34,0x12]
+        	psubb	0x12345678,%mm3
+
+// CHECK: psubb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0xdb]
+        	psubb	%mm3,%mm3
+
+// CHECK: psubb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0x45,0x00,0x00,0x00]
+        	psubb	0x45,%xmm5
+
+// CHECK: psubb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0xed,0x7e,0x00,0x00]
+        	psubb	0x7eed,%xmm5
+
+// CHECK: psubb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubb	0xbabecafe,%xmm5
+
+// CHECK: psubb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0x78,0x56,0x34,0x12]
+        	psubb	0x12345678,%xmm5
+
+// CHECK: psubb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0xed]
+        	psubb	%xmm5,%xmm5
+
+// CHECK: psubw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubw	69, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0x45,0x00,0x00,0x00]
+        	psubw	0x45,%mm3
+
+// CHECK: psubw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0xed,0x7e,0x00,0x00]
+        	psubw	0x7eed,%mm3
+
+// CHECK: psubw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubw	0xbabecafe,%mm3
+
+// CHECK: psubw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0x78,0x56,0x34,0x12]
+        	psubw	0x12345678,%mm3
+
+// CHECK: psubw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0xdb]
+        	psubw	%mm3,%mm3
+
+// CHECK: psubw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0x45,0x00,0x00,0x00]
+        	psubw	0x45,%xmm5
+
+// CHECK: psubw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0xed,0x7e,0x00,0x00]
+        	psubw	0x7eed,%xmm5
+
+// CHECK: psubw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubw	0xbabecafe,%xmm5
+
+// CHECK: psubw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0x78,0x56,0x34,0x12]
+        	psubw	0x12345678,%xmm5
+
+// CHECK: psubw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0xed]
+        	psubw	%xmm5,%xmm5
+
+// CHECK: psubd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubd	69, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0x45,0x00,0x00,0x00]
+        	psubd	0x45,%mm3
+
+// CHECK: psubd	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0xed,0x7e,0x00,0x00]
+        	psubd	0x7eed,%mm3
+
+// CHECK: psubd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubd	0xbabecafe,%mm3
+
+// CHECK: psubd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0x78,0x56,0x34,0x12]
+        	psubd	0x12345678,%mm3
+
+// CHECK: psubd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0xdb]
+        	psubd	%mm3,%mm3
+
+// CHECK: psubd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0x45,0x00,0x00,0x00]
+        	psubd	0x45,%xmm5
+
+// CHECK: psubd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0xed,0x7e,0x00,0x00]
+        	psubd	0x7eed,%xmm5
+
+// CHECK: psubd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubd	0xbabecafe,%xmm5
+
+// CHECK: psubd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0x78,0x56,0x34,0x12]
+        	psubd	0x12345678,%xmm5
+
+// CHECK: psubd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0xed]
+        	psubd	%xmm5,%xmm5
+
+// CHECK: psubq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubq	69, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0x45,0x00,0x00,0x00]
+        	psubq	0x45,%mm3
+
+// CHECK: psubq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0xed,0x7e,0x00,0x00]
+        	psubq	0x7eed,%mm3
+
+// CHECK: psubq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubq	0xbabecafe,%mm3
+
+// CHECK: psubq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0x78,0x56,0x34,0x12]
+        	psubq	0x12345678,%mm3
+
+// CHECK: psubq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0xdb]
+        	psubq	%mm3,%mm3
+
+// CHECK: psubq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0x45,0x00,0x00,0x00]
+        	psubq	0x45,%xmm5
+
+// CHECK: psubq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0xed,0x7e,0x00,0x00]
+        	psubq	0x7eed,%xmm5
+
+// CHECK: psubq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubq	0xbabecafe,%xmm5
+
+// CHECK: psubq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0x78,0x56,0x34,0x12]
+        	psubq	0x12345678,%xmm5
+
+// CHECK: psubq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0xed]
+        	psubq	%xmm5,%xmm5
+
+// CHECK: psubsb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubsb	69, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0x45,0x00,0x00,0x00]
+        	psubsb	0x45,%mm3
+
+// CHECK: psubsb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0xed,0x7e,0x00,0x00]
+        	psubsb	0x7eed,%mm3
+
+// CHECK: psubsb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubsb	0xbabecafe,%mm3
+
+// CHECK: psubsb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0x78,0x56,0x34,0x12]
+        	psubsb	0x12345678,%mm3
+
+// CHECK: psubsb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0xdb]
+        	psubsb	%mm3,%mm3
+
+// CHECK: psubsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0x45,0x00,0x00,0x00]
+        	psubsb	0x45,%xmm5
+
+// CHECK: psubsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0xed,0x7e,0x00,0x00]
+        	psubsb	0x7eed,%xmm5
+
+// CHECK: psubsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubsb	0xbabecafe,%xmm5
+
+// CHECK: psubsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0x78,0x56,0x34,0x12]
+        	psubsb	0x12345678,%xmm5
+
+// CHECK: psubsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0xed]
+        	psubsb	%xmm5,%xmm5
+
+// CHECK: psubsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0x45,0x00,0x00,0x00]
+        	psubsw	0x45,%mm3
+
+// CHECK: psubsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0xed,0x7e,0x00,0x00]
+        	psubsw	0x7eed,%mm3
+
+// CHECK: psubsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubsw	0xbabecafe,%mm3
+
+// CHECK: psubsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0x78,0x56,0x34,0x12]
+        	psubsw	0x12345678,%mm3
+
+// CHECK: psubsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0xdb]
+        	psubsw	%mm3,%mm3
+
+// CHECK: psubsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0x45,0x00,0x00,0x00]
+        	psubsw	0x45,%xmm5
+
+// CHECK: psubsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0xed,0x7e,0x00,0x00]
+        	psubsw	0x7eed,%xmm5
+
+// CHECK: psubsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubsw	0xbabecafe,%xmm5
+
+// CHECK: psubsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0x78,0x56,0x34,0x12]
+        	psubsw	0x12345678,%xmm5
+
+// CHECK: psubsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0xed]
+        	psubsw	%xmm5,%xmm5
+
+// CHECK: psubusb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubusb	69, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0x45,0x00,0x00,0x00]
+        	psubusb	0x45,%mm3
+
+// CHECK: psubusb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0xed,0x7e,0x00,0x00]
+        	psubusb	0x7eed,%mm3
+
+// CHECK: psubusb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubusb	0xbabecafe,%mm3
+
+// CHECK: psubusb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0x78,0x56,0x34,0x12]
+        	psubusb	0x12345678,%mm3
+
+// CHECK: psubusb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0xdb]
+        	psubusb	%mm3,%mm3
+
+// CHECK: psubusb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubusb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0x45,0x00,0x00,0x00]
+        	psubusb	0x45,%xmm5
+
+// CHECK: psubusb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0xed,0x7e,0x00,0x00]
+        	psubusb	0x7eed,%xmm5
+
+// CHECK: psubusb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubusb	0xbabecafe,%xmm5
+
+// CHECK: psubusb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0x78,0x56,0x34,0x12]
+        	psubusb	0x12345678,%xmm5
+
+// CHECK: psubusb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0xed]
+        	psubusb	%xmm5,%xmm5
+
+// CHECK: psubusw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubusw	69, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0x45,0x00,0x00,0x00]
+        	psubusw	0x45,%mm3
+
+// CHECK: psubusw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0xed,0x7e,0x00,0x00]
+        	psubusw	0x7eed,%mm3
+
+// CHECK: psubusw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubusw	0xbabecafe,%mm3
+
+// CHECK: psubusw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0x78,0x56,0x34,0x12]
+        	psubusw	0x12345678,%mm3
+
+// CHECK: psubusw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0xdb]
+        	psubusw	%mm3,%mm3
+
+// CHECK: psubusw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubusw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0x45,0x00,0x00,0x00]
+        	psubusw	0x45,%xmm5
+
+// CHECK: psubusw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0xed,0x7e,0x00,0x00]
+        	psubusw	0x7eed,%xmm5
+
+// CHECK: psubusw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubusw	0xbabecafe,%xmm5
+
+// CHECK: psubusw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0x78,0x56,0x34,0x12]
+        	psubusw	0x12345678,%xmm5
+
+// CHECK: psubusw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0xed]
+        	psubusw	%xmm5,%xmm5
+
+// CHECK: punpckhbw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x68,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckhbw	69, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0x45,0x00,0x00,0x00]
+        	punpckhbw	0x45,%mm3
+
+// CHECK: punpckhbw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckhbw	0x7eed,%mm3
+
+// CHECK: punpckhbw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckhbw	0xbabecafe,%mm3
+
+// CHECK: punpckhbw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0x78,0x56,0x34,0x12]
+        	punpckhbw	0x12345678,%mm3
+
+// CHECK: punpckhbw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x68,0xdb]
+        	punpckhbw	%mm3,%mm3
+
+// CHECK: punpckhbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhbw	0x45,%xmm5
+
+// CHECK: punpckhbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhbw	0x7eed,%xmm5
+
+// CHECK: punpckhbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhbw	0xbabecafe,%xmm5
+
+// CHECK: punpckhbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhbw	0x12345678,%xmm5
+
+// CHECK: punpckhbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0xed]
+        	punpckhbw	%xmm5,%xmm5
+
+// CHECK: punpckhwd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x69,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckhwd	69, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0x45,0x00,0x00,0x00]
+        	punpckhwd	0x45,%mm3
+
+// CHECK: punpckhwd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckhwd	0x7eed,%mm3
+
+// CHECK: punpckhwd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckhwd	0xbabecafe,%mm3
+
+// CHECK: punpckhwd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0x78,0x56,0x34,0x12]
+        	punpckhwd	0x12345678,%mm3
+
+// CHECK: punpckhwd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x69,0xdb]
+        	punpckhwd	%mm3,%mm3
+
+// CHECK: punpckhwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhwd	0x45,%xmm5
+
+// CHECK: punpckhwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhwd	0x7eed,%xmm5
+
+// CHECK: punpckhwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhwd	0xbabecafe,%xmm5
+
+// CHECK: punpckhwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhwd	0x12345678,%xmm5
+
+// CHECK: punpckhwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0xed]
+        	punpckhwd	%xmm5,%xmm5
+
+// CHECK: punpckhdq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckhdq	69, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0x45,0x00,0x00,0x00]
+        	punpckhdq	0x45,%mm3
+
+// CHECK: punpckhdq	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckhdq	0x7eed,%mm3
+
+// CHECK: punpckhdq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckhdq	0xbabecafe,%mm3
+
+// CHECK: punpckhdq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0x78,0x56,0x34,0x12]
+        	punpckhdq	0x12345678,%mm3
+
+// CHECK: punpckhdq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0xdb]
+        	punpckhdq	%mm3,%mm3
+
+// CHECK: punpckhdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhdq	0x45,%xmm5
+
+// CHECK: punpckhdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhdq	0x7eed,%xmm5
+
+// CHECK: punpckhdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhdq	0xbabecafe,%xmm5
+
+// CHECK: punpckhdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhdq	0x12345678,%xmm5
+
+// CHECK: punpckhdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0xed]
+        	punpckhdq	%xmm5,%xmm5
+
+// CHECK: punpcklbw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x60,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpcklbw	69, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0x45,0x00,0x00,0x00]
+        	punpcklbw	0x45,%mm3
+
+// CHECK: punpcklbw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0xed,0x7e,0x00,0x00]
+        	punpcklbw	0x7eed,%mm3
+
+// CHECK: punpcklbw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpcklbw	0xbabecafe,%mm3
+
+// CHECK: punpcklbw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0x78,0x56,0x34,0x12]
+        	punpcklbw	0x12345678,%mm3
+
+// CHECK: punpcklbw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x60,0xdb]
+        	punpcklbw	%mm3,%mm3
+
+// CHECK: punpcklbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpcklbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0x45,0x00,0x00,0x00]
+        	punpcklbw	0x45,%xmm5
+
+// CHECK: punpcklbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0xed,0x7e,0x00,0x00]
+        	punpcklbw	0x7eed,%xmm5
+
+// CHECK: punpcklbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpcklbw	0xbabecafe,%xmm5
+
+// CHECK: punpcklbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0x78,0x56,0x34,0x12]
+        	punpcklbw	0x12345678,%xmm5
+
+// CHECK: punpcklbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0xed]
+        	punpcklbw	%xmm5,%xmm5
+
+// CHECK: punpcklwd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x61,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpcklwd	69, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0x45,0x00,0x00,0x00]
+        	punpcklwd	0x45,%mm3
+
+// CHECK: punpcklwd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0xed,0x7e,0x00,0x00]
+        	punpcklwd	0x7eed,%mm3
+
+// CHECK: punpcklwd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpcklwd	0xbabecafe,%mm3
+
+// CHECK: punpcklwd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0x78,0x56,0x34,0x12]
+        	punpcklwd	0x12345678,%mm3
+
+// CHECK: punpcklwd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x61,0xdb]
+        	punpcklwd	%mm3,%mm3
+
+// CHECK: punpcklwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpcklwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0x45,0x00,0x00,0x00]
+        	punpcklwd	0x45,%xmm5
+
+// CHECK: punpcklwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0xed,0x7e,0x00,0x00]
+        	punpcklwd	0x7eed,%xmm5
+
+// CHECK: punpcklwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpcklwd	0xbabecafe,%xmm5
+
+// CHECK: punpcklwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0x78,0x56,0x34,0x12]
+        	punpcklwd	0x12345678,%xmm5
+
+// CHECK: punpcklwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0xed]
+        	punpcklwd	%xmm5,%xmm5
+
+// CHECK: punpckldq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x62,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckldq	69, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0x45,0x00,0x00,0x00]
+        	punpckldq	0x45,%mm3
+
+// CHECK: punpckldq	32493, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckldq	0x7eed,%mm3
+
+// CHECK: punpckldq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckldq	0xbabecafe,%mm3
+
+// CHECK: punpckldq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0x78,0x56,0x34,0x12]
+        	punpckldq	0x12345678,%mm3
+
+// CHECK: punpckldq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x62,0xdb]
+        	punpckldq	%mm3,%mm3
+
+// CHECK: punpckldq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckldq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0x45,0x00,0x00,0x00]
+        	punpckldq	0x45,%xmm5
+
+// CHECK: punpckldq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckldq	0x7eed,%xmm5
+
+// CHECK: punpckldq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckldq	0xbabecafe,%xmm5
+
+// CHECK: punpckldq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0x78,0x56,0x34,0x12]
+        	punpckldq	0x12345678,%xmm5
+
+// CHECK: punpckldq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0xed]
+        	punpckldq	%xmm5,%xmm5
+
+// CHECK: pxor	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xef,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pxor	69, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0x45,0x00,0x00,0x00]
+        	pxor	0x45,%mm3
+
+// CHECK: pxor	32493, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0xed,0x7e,0x00,0x00]
+        	pxor	0x7eed,%mm3
+
+// CHECK: pxor	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0xfe,0xca,0xbe,0xba]
+        	pxor	0xbabecafe,%mm3
+
+// CHECK: pxor	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0x78,0x56,0x34,0x12]
+        	pxor	0x12345678,%mm3
+
+// CHECK: pxor	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xef,0xdb]
+        	pxor	%mm3,%mm3
+
+// CHECK: pxor	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pxor	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0x45,0x00,0x00,0x00]
+        	pxor	0x45,%xmm5
+
+// CHECK: pxor	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0xed,0x7e,0x00,0x00]
+        	pxor	0x7eed,%xmm5
+
+// CHECK: pxor	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0xfe,0xca,0xbe,0xba]
+        	pxor	0xbabecafe,%xmm5
+
+// CHECK: pxor	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0x78,0x56,0x34,0x12]
+        	pxor	0x12345678,%xmm5
+
+// CHECK: pxor	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0xed]
+        	pxor	%xmm5,%xmm5
+
+// CHECK: addps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addps	0x45,%xmm5
+
+// CHECK: addps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addps	0x7eed,%xmm5
+
+// CHECK: addps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addps	0xbabecafe,%xmm5
+
+// CHECK: addps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addps	0x12345678,%xmm5
+
+// CHECK: addps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0xed]
+        	addps	%xmm5,%xmm5
+
+// CHECK: addss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addss	0x45,%xmm5
+
+// CHECK: addss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addss	0x7eed,%xmm5
+
+// CHECK: addss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addss	0xbabecafe,%xmm5
+
+// CHECK: addss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addss	0x12345678,%xmm5
+
+// CHECK: addss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0xed]
+        	addss	%xmm5,%xmm5
+
+// CHECK: andnps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x55,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andnps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andnps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0x45,0x00,0x00,0x00]
+        	andnps	0x45,%xmm5
+
+// CHECK: andnps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0xed,0x7e,0x00,0x00]
+        	andnps	0x7eed,%xmm5
+
+// CHECK: andnps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0xfe,0xca,0xbe,0xba]
+        	andnps	0xbabecafe,%xmm5
+
+// CHECK: andnps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0x78,0x56,0x34,0x12]
+        	andnps	0x12345678,%xmm5
+
+// CHECK: andnps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0xed]
+        	andnps	%xmm5,%xmm5
+
+// CHECK: andps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x54,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0x45,0x00,0x00,0x00]
+        	andps	0x45,%xmm5
+
+// CHECK: andps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0xed,0x7e,0x00,0x00]
+        	andps	0x7eed,%xmm5
+
+// CHECK: andps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0xfe,0xca,0xbe,0xba]
+        	andps	0xbabecafe,%xmm5
+
+// CHECK: andps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0x78,0x56,0x34,0x12]
+        	andps	0x12345678,%xmm5
+
+// CHECK: andps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0xed]
+        	andps	%xmm5,%xmm5
+
+// CHECK: comiss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	comiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: comiss	69, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0x45,0x00,0x00,0x00]
+        	comiss	0x45,%xmm5
+
+// CHECK: comiss	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0xed,0x7e,0x00,0x00]
+        	comiss	0x7eed,%xmm5
+
+// CHECK: comiss	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0xfe,0xca,0xbe,0xba]
+        	comiss	0xbabecafe,%xmm5
+
+// CHECK: comiss	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0x78,0x56,0x34,0x12]
+        	comiss	0x12345678,%xmm5
+
+// CHECK: comiss	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0xed]
+        	comiss	%xmm5,%xmm5
+
+// CHECK: cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpi2ps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpi2ps	0x45,%xmm5
+
+// CHECK: cvtpi2ps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpi2ps	0x7eed,%xmm5
+
+// CHECK: cvtpi2ps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpi2ps	0xbabecafe,%xmm5
+
+// CHECK: cvtpi2ps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpi2ps	0x12345678,%xmm5
+
+// CHECK: cvtpi2ps	%mm3, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0xeb]
+        	cvtpi2ps	%mm3,%xmm5
+
+// CHECK: cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvtps2pi	69, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0x45,0x00,0x00,0x00]
+        	cvtps2pi	0x45,%mm3
+
+// CHECK: cvtps2pi	32493, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0xed,0x7e,0x00,0x00]
+        	cvtps2pi	0x7eed,%mm3
+
+// CHECK: cvtps2pi	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvtps2pi	0xbabecafe,%mm3
+
+// CHECK: cvtps2pi	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0x78,0x56,0x34,0x12]
+        	cvtps2pi	0x12345678,%mm3
+
+// CHECK: cvtps2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0xdd]
+        	cvtps2pi	%xmm5,%mm3
+
+// CHECK: cvtsi2ss	%ecx, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0xe9]
+        	cvtsi2ss	%ecx,%xmm5
+
+// CHECK: cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtsi2ss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtsi2ss	0x45,%xmm5
+
+// CHECK: cvtsi2ss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtsi2ss	0x7eed,%xmm5
+
+// CHECK: cvtsi2ss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtsi2ss	0xbabecafe,%xmm5
+
+// CHECK: cvtsi2ss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtsi2ss	0x12345678,%xmm5
+
+// CHECK: cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvttps2pi	69, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0x45,0x00,0x00,0x00]
+        	cvttps2pi	0x45,%mm3
+
+// CHECK: cvttps2pi	32493, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0xed,0x7e,0x00,0x00]
+        	cvttps2pi	0x7eed,%mm3
+
+// CHECK: cvttps2pi	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvttps2pi	0xbabecafe,%mm3
+
+// CHECK: cvttps2pi	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0x78,0x56,0x34,0x12]
+        	cvttps2pi	0x12345678,%mm3
+
+// CHECK: cvttps2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0xdd]
+        	cvttps2pi	%xmm5,%mm3
+
+// CHECK: cvttss2si	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: cvttss2si	69, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0x45,0x00,0x00,0x00]
+        	cvttss2si	0x45,%ecx
+
+// CHECK: cvttss2si	32493, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0xed,0x7e,0x00,0x00]
+        	cvttss2si	0x7eed,%ecx
+
+// CHECK: cvttss2si	3133065982, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0xfe,0xca,0xbe,0xba]
+        	cvttss2si	0xbabecafe,%ecx
+
+// CHECK: cvttss2si	305419896, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0x78,0x56,0x34,0x12]
+        	cvttss2si	0x12345678,%ecx
+
+// CHECK: cvttss2si	%xmm5, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0xcd]
+        	cvttss2si	%xmm5,%ecx
+
+// CHECK: divps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divps	0x45,%xmm5
+
+// CHECK: divps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divps	0x7eed,%xmm5
+
+// CHECK: divps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divps	0xbabecafe,%xmm5
+
+// CHECK: divps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divps	0x12345678,%xmm5
+
+// CHECK: divps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0xed]
+        	divps	%xmm5,%xmm5
+
+// CHECK: divss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divss	0x45,%xmm5
+
+// CHECK: divss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divss	0x7eed,%xmm5
+
+// CHECK: divss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divss	0xbabecafe,%xmm5
+
+// CHECK: divss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divss	0x12345678,%xmm5
+
+// CHECK: divss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0xed]
+        	divss	%xmm5,%xmm5
+
+// CHECK: ldmxcsr	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ldmxcsr	32493
+// CHECK:  encoding: [0x0f,0xae,0x15,0xed,0x7e,0x00,0x00]
+        	ldmxcsr	0x7eed
+
+// CHECK: ldmxcsr	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x15,0xfe,0xca,0xbe,0xba]
+        	ldmxcsr	0xbabecafe
+
+// CHECK: ldmxcsr	305419896
+// CHECK:  encoding: [0x0f,0xae,0x15,0x78,0x56,0x34,0x12]
+        	ldmxcsr	0x12345678
+
+// CHECK: maskmovq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf7,0xdb]
+        	maskmovq	%mm3,%mm3
+
+// CHECK: maxps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxps	0x45,%xmm5
+
+// CHECK: maxps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxps	0x7eed,%xmm5
+
+// CHECK: maxps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxps	0xbabecafe,%xmm5
+
+// CHECK: maxps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxps	0x12345678,%xmm5
+
+// CHECK: maxps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0xed]
+        	maxps	%xmm5,%xmm5
+
+// CHECK: maxss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxss	0x45,%xmm5
+
+// CHECK: maxss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxss	0x7eed,%xmm5
+
+// CHECK: maxss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxss	0xbabecafe,%xmm5
+
+// CHECK: maxss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxss	0x12345678,%xmm5
+
+// CHECK: maxss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0xed]
+        	maxss	%xmm5,%xmm5
+
+// CHECK: minps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minps	0x45,%xmm5
+
+// CHECK: minps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minps	0x7eed,%xmm5
+
+// CHECK: minps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minps	0xbabecafe,%xmm5
+
+// CHECK: minps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minps	0x12345678,%xmm5
+
+// CHECK: minps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0xed]
+        	minps	%xmm5,%xmm5
+
+// CHECK: minss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minss	0x45,%xmm5
+
+// CHECK: minss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minss	0x7eed,%xmm5
+
+// CHECK: minss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minss	0xbabecafe,%xmm5
+
+// CHECK: minss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minss	0x12345678,%xmm5
+
+// CHECK: minss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0xed]
+        	minss	%xmm5,%xmm5
+
+// CHECK: movaps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movaps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0x45,0x00,0x00,0x00]
+        	movaps	0x45,%xmm5
+
+// CHECK: movaps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0xed,0x7e,0x00,0x00]
+        	movaps	0x7eed,%xmm5
+
+// CHECK: movaps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0xfe,0xca,0xbe,0xba]
+        	movaps	0xbabecafe,%xmm5
+
+// CHECK: movaps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0x78,0x56,0x34,0x12]
+        	movaps	0x12345678,%xmm5
+
+// CHECK: movaps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0xed]
+        	movaps	%xmm5,%xmm5
+
+// CHECK: movaps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movaps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x29,0x2d,0x45,0x00,0x00,0x00]
+        	movaps	%xmm5,0x45
+
+// CHECK: movaps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x29,0x2d,0xed,0x7e,0x00,0x00]
+        	movaps	%xmm5,0x7eed
+
+// CHECK: movaps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x29,0x2d,0xfe,0xca,0xbe,0xba]
+        	movaps	%xmm5,0xbabecafe
+
+// CHECK: movaps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x29,0x2d,0x78,0x56,0x34,0x12]
+        	movaps	%xmm5,0x12345678
+
+// CHECK: movaps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0xed]
+        	movaps	%xmm5,%xmm5
+
+// CHECK: movhlps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0xed]
+        	movhlps	%xmm5,%xmm5
+
+// CHECK: movhps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movhps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
+        	movhps	0x45,%xmm5
+
+// CHECK: movhps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
+        	movhps	0x7eed,%xmm5
+
+// CHECK: movhps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhps	0xbabecafe,%xmm5
+
+// CHECK: movhps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
+        	movhps	0x12345678,%xmm5
+
+// CHECK: movhps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movhps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x17,0x2d,0x45,0x00,0x00,0x00]
+        	movhps	%xmm5,0x45
+
+// CHECK: movhps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x17,0x2d,0xed,0x7e,0x00,0x00]
+        	movhps	%xmm5,0x7eed
+
+// CHECK: movhps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x17,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhps	%xmm5,0xbabecafe
+
+// CHECK: movhps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x17,0x2d,0x78,0x56,0x34,0x12]
+        	movhps	%xmm5,0x12345678
+
+// CHECK: movlhps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0xed]
+        	movlhps	%xmm5,%xmm5
+
+// CHECK: movlps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movlps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movlps	0x45,%xmm5
+
+// CHECK: movlps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movlps	0x7eed,%xmm5
+
+// CHECK: movlps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlps	0xbabecafe,%xmm5
+
+// CHECK: movlps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movlps	0x12345678,%xmm5
+
+// CHECK: movlps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x13,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movlps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x13,0x2d,0x45,0x00,0x00,0x00]
+        	movlps	%xmm5,0x45
+
+// CHECK: movlps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x13,0x2d,0xed,0x7e,0x00,0x00]
+        	movlps	%xmm5,0x7eed
+
+// CHECK: movlps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x13,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlps	%xmm5,0xbabecafe
+
+// CHECK: movlps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x13,0x2d,0x78,0x56,0x34,0x12]
+        	movlps	%xmm5,0x12345678
+
+// CHECK: movmskps	%xmm5, %ecx
+// CHECK:  encoding: [0x0f,0x50,0xcd]
+        	movmskps	%xmm5,%ecx
+
+// CHECK: movntps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0x45,0x00,0x00,0x00]
+        	movntps	%xmm5,0x45
+
+// CHECK: movntps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0xed,0x7e,0x00,0x00]
+        	movntps	%xmm5,0x7eed
+
+// CHECK: movntps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntps	%xmm5,0xbabecafe
+
+// CHECK: movntps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0x78,0x56,0x34,0x12]
+        	movntps	%xmm5,0x12345678
+
+// CHECK: movntq	%mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xe7,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntq	%mm3, 69
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0x45,0x00,0x00,0x00]
+        	movntq	%mm3,0x45
+
+// CHECK: movntq	%mm3, 32493
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0xed,0x7e,0x00,0x00]
+        	movntq	%mm3,0x7eed
+
+// CHECK: movntq	%mm3, 3133065982
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0xfe,0xca,0xbe,0xba]
+        	movntq	%mm3,0xbabecafe
+
+// CHECK: movntq	%mm3, 305419896
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0x78,0x56,0x34,0x12]
+        	movntq	%mm3,0x12345678
+
+// CHECK: movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0xe7,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntdq	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0x45,0x00,0x00,0x00]
+        	movntdq	%xmm5,0x45
+
+// CHECK: movntdq	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0xed,0x7e,0x00,0x00]
+        	movntdq	%xmm5,0x7eed
+
+// CHECK: movntdq	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntdq	%xmm5,0xbabecafe
+
+// CHECK: movntdq	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0x78,0x56,0x34,0x12]
+        	movntdq	%xmm5,0x12345678
+
+// CHECK: movss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movss	0x45,%xmm5
+
+// CHECK: movss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movss	0x7eed,%xmm5
+
+// CHECK: movss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movss	0xbabecafe,%xmm5
+
+// CHECK: movss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movss	0x12345678,%xmm5
+
+// CHECK: movss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0xed]
+        	movss	%xmm5,%xmm5
+
+// CHECK: movss	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf3,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movss	%xmm5, 69
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movss	%xmm5,0x45
+
+// CHECK: movss	%xmm5, 32493
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movss	%xmm5,0x7eed
+
+// CHECK: movss	%xmm5, 3133065982
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movss	%xmm5,0xbabecafe
+
+// CHECK: movss	%xmm5, 305419896
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movss	%xmm5,0x12345678
+
+// CHECK: movss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0xed]
+        	movss	%xmm5,%xmm5
+
+// CHECK: movups	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movups	69, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movups	0x45,%xmm5
+
+// CHECK: movups	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movups	0x7eed,%xmm5
+
+// CHECK: movups	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movups	0xbabecafe,%xmm5
+
+// CHECK: movups	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movups	0x12345678,%xmm5
+
+// CHECK: movups	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0xed]
+        	movups	%xmm5,%xmm5
+
+// CHECK: movups	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movups	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movups	%xmm5,0x45
+
+// CHECK: movups	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movups	%xmm5,0x7eed
+
+// CHECK: movups	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movups	%xmm5,0xbabecafe
+
+// CHECK: movups	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movups	%xmm5,0x12345678
+
+// CHECK: movups	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0xed]
+        	movups	%xmm5,%xmm5
+
+// CHECK: mulps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulps	0x45,%xmm5
+
+// CHECK: mulps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulps	0x7eed,%xmm5
+
+// CHECK: mulps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulps	0xbabecafe,%xmm5
+
+// CHECK: mulps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulps	0x12345678,%xmm5
+
+// CHECK: mulps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0xed]
+        	mulps	%xmm5,%xmm5
+
+// CHECK: mulss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulss	0x45,%xmm5
+
+// CHECK: mulss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulss	0x7eed,%xmm5
+
+// CHECK: mulss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulss	0xbabecafe,%xmm5
+
+// CHECK: mulss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulss	0x12345678,%xmm5
+
+// CHECK: mulss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0xed]
+        	mulss	%xmm5,%xmm5
+
+// CHECK: orps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x56,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	orps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: orps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0x45,0x00,0x00,0x00]
+        	orps	0x45,%xmm5
+
+// CHECK: orps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0xed,0x7e,0x00,0x00]
+        	orps	0x7eed,%xmm5
+
+// CHECK: orps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0xfe,0xca,0xbe,0xba]
+        	orps	0xbabecafe,%xmm5
+
+// CHECK: orps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0x78,0x56,0x34,0x12]
+        	orps	0x12345678,%xmm5
+
+// CHECK: orps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0xed]
+        	orps	%xmm5,%xmm5
+
+// CHECK: pavgb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pavgb	69, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0x45,0x00,0x00,0x00]
+        	pavgb	0x45,%mm3
+
+// CHECK: pavgb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0xed,0x7e,0x00,0x00]
+        	pavgb	0x7eed,%mm3
+
+// CHECK: pavgb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0xfe,0xca,0xbe,0xba]
+        	pavgb	0xbabecafe,%mm3
+
+// CHECK: pavgb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0x78,0x56,0x34,0x12]
+        	pavgb	0x12345678,%mm3
+
+// CHECK: pavgb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0xdb]
+        	pavgb	%mm3,%mm3
+
+// CHECK: pavgb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pavgb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0x45,0x00,0x00,0x00]
+        	pavgb	0x45,%xmm5
+
+// CHECK: pavgb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0xed,0x7e,0x00,0x00]
+        	pavgb	0x7eed,%xmm5
+
+// CHECK: pavgb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0xfe,0xca,0xbe,0xba]
+        	pavgb	0xbabecafe,%xmm5
+
+// CHECK: pavgb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0x78,0x56,0x34,0x12]
+        	pavgb	0x12345678,%xmm5
+
+// CHECK: pavgb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0xed]
+        	pavgb	%xmm5,%xmm5
+
+// CHECK: pavgw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pavgw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0x45,0x00,0x00,0x00]
+        	pavgw	0x45,%mm3
+
+// CHECK: pavgw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0xed,0x7e,0x00,0x00]
+        	pavgw	0x7eed,%mm3
+
+// CHECK: pavgw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0xfe,0xca,0xbe,0xba]
+        	pavgw	0xbabecafe,%mm3
+
+// CHECK: pavgw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0x78,0x56,0x34,0x12]
+        	pavgw	0x12345678,%mm3
+
+// CHECK: pavgw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0xdb]
+        	pavgw	%mm3,%mm3
+
+// CHECK: pavgw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pavgw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0x45,0x00,0x00,0x00]
+        	pavgw	0x45,%xmm5
+
+// CHECK: pavgw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0xed,0x7e,0x00,0x00]
+        	pavgw	0x7eed,%xmm5
+
+// CHECK: pavgw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0xfe,0xca,0xbe,0xba]
+        	pavgw	0xbabecafe,%xmm5
+
+// CHECK: pavgw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0x78,0x56,0x34,0x12]
+        	pavgw	0x12345678,%xmm5
+
+// CHECK: pavgw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0xed]
+        	pavgw	%xmm5,%xmm5
+
+// CHECK: pmaxsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xee,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaxsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0x45,0x00,0x00,0x00]
+        	pmaxsw	0x45,%mm3
+
+// CHECK: pmaxsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaxsw	0x7eed,%mm3
+
+// CHECK: pmaxsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaxsw	0xbabecafe,%mm3
+
+// CHECK: pmaxsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0x78,0x56,0x34,0x12]
+        	pmaxsw	0x12345678,%mm3
+
+// CHECK: pmaxsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xee,0xdb]
+        	pmaxsw	%mm3,%mm3
+
+// CHECK: pmaxsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxsw	0x45,%xmm5
+
+// CHECK: pmaxsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxsw	0x7eed,%xmm5
+
+// CHECK: pmaxsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxsw	0xbabecafe,%xmm5
+
+// CHECK: pmaxsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxsw	0x12345678,%xmm5
+
+// CHECK: pmaxsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0xed]
+        	pmaxsw	%xmm5,%xmm5
+
+// CHECK: pmaxub	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xde,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaxub	69, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0x45,0x00,0x00,0x00]
+        	pmaxub	0x45,%mm3
+
+// CHECK: pmaxub	32493, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaxub	0x7eed,%mm3
+
+// CHECK: pmaxub	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaxub	0xbabecafe,%mm3
+
+// CHECK: pmaxub	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0x78,0x56,0x34,0x12]
+        	pmaxub	0x12345678,%mm3
+
+// CHECK: pmaxub	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xde,0xdb]
+        	pmaxub	%mm3,%mm3
+
+// CHECK: pmaxub	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxub	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxub	0x45,%xmm5
+
+// CHECK: pmaxub	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxub	0x7eed,%xmm5
+
+// CHECK: pmaxub	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxub	0xbabecafe,%xmm5
+
+// CHECK: pmaxub	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxub	0x12345678,%xmm5
+
+// CHECK: pmaxub	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0xed]
+        	pmaxub	%xmm5,%xmm5
+
+// CHECK: pminsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xea,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pminsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0x45,0x00,0x00,0x00]
+        	pminsw	0x45,%mm3
+
+// CHECK: pminsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0xed,0x7e,0x00,0x00]
+        	pminsw	0x7eed,%mm3
+
+// CHECK: pminsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0xfe,0xca,0xbe,0xba]
+        	pminsw	0xbabecafe,%mm3
+
+// CHECK: pminsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0x78,0x56,0x34,0x12]
+        	pminsw	0x12345678,%mm3
+
+// CHECK: pminsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xea,0xdb]
+        	pminsw	%mm3,%mm3
+
+// CHECK: pminsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0x45,0x00,0x00,0x00]
+        	pminsw	0x45,%xmm5
+
+// CHECK: pminsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0xed,0x7e,0x00,0x00]
+        	pminsw	0x7eed,%xmm5
+
+// CHECK: pminsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminsw	0xbabecafe,%xmm5
+
+// CHECK: pminsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0x78,0x56,0x34,0x12]
+        	pminsw	0x12345678,%xmm5
+
+// CHECK: pminsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0xed]
+        	pminsw	%xmm5,%xmm5
+
+// CHECK: pminub	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pminub	69, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0x45,0x00,0x00,0x00]
+        	pminub	0x45,%mm3
+
+// CHECK: pminub	32493, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0xed,0x7e,0x00,0x00]
+        	pminub	0x7eed,%mm3
+
+// CHECK: pminub	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0xfe,0xca,0xbe,0xba]
+        	pminub	0xbabecafe,%mm3
+
+// CHECK: pminub	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0x78,0x56,0x34,0x12]
+        	pminub	0x12345678,%mm3
+
+// CHECK: pminub	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xda,0xdb]
+        	pminub	%mm3,%mm3
+
+// CHECK: pminub	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminub	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0x45,0x00,0x00,0x00]
+        	pminub	0x45,%xmm5
+
+// CHECK: pminub	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0xed,0x7e,0x00,0x00]
+        	pminub	0x7eed,%xmm5
+
+// CHECK: pminub	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminub	0xbabecafe,%xmm5
+
+// CHECK: pminub	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0x78,0x56,0x34,0x12]
+        	pminub	0x12345678,%xmm5
+
+// CHECK: pminub	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0xed]
+        	pminub	%xmm5,%xmm5
+
+// CHECK: pmovmskb	%mm3, %ecx
+// CHECK:  encoding: [0x0f,0xd7,0xcb]
+        	pmovmskb	%mm3,%ecx
+
+// CHECK: pmovmskb	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0xd7,0xcd]
+        	pmovmskb	%xmm5,%ecx
+
+// CHECK: pmulhuw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmulhuw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0x45,0x00,0x00,0x00]
+        	pmulhuw	0x45,%mm3
+
+// CHECK: pmulhuw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0xed,0x7e,0x00,0x00]
+        	pmulhuw	0x7eed,%mm3
+
+// CHECK: pmulhuw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmulhuw	0xbabecafe,%mm3
+
+// CHECK: pmulhuw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0x78,0x56,0x34,0x12]
+        	pmulhuw	0x12345678,%mm3
+
+// CHECK: pmulhuw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0xdb]
+        	pmulhuw	%mm3,%mm3
+
+// CHECK: pmulhuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulhuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0x45,0x00,0x00,0x00]
+        	pmulhuw	0x45,%xmm5
+
+// CHECK: pmulhuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulhuw	0x7eed,%xmm5
+
+// CHECK: pmulhuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulhuw	0xbabecafe,%xmm5
+
+// CHECK: pmulhuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0x78,0x56,0x34,0x12]
+        	pmulhuw	0x12345678,%xmm5
+
+// CHECK: pmulhuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0xed]
+        	pmulhuw	%xmm5,%xmm5
+
+// CHECK: prefetchnta	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetchnta	32493
+// CHECK:  encoding: [0x0f,0x18,0x05,0xed,0x7e,0x00,0x00]
+        	prefetchnta	0x7eed
+
+// CHECK: prefetchnta	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x05,0xfe,0xca,0xbe,0xba]
+        	prefetchnta	0xbabecafe
+
+// CHECK: prefetchnta	305419896
+// CHECK:  encoding: [0x0f,0x18,0x05,0x78,0x56,0x34,0x12]
+        	prefetchnta	0x12345678
+
+// CHECK: prefetcht0	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetcht0	32493
+// CHECK:  encoding: [0x0f,0x18,0x0d,0xed,0x7e,0x00,0x00]
+        	prefetcht0	0x7eed
+
+// CHECK: prefetcht0	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x0d,0xfe,0xca,0xbe,0xba]
+        	prefetcht0	0xbabecafe
+
+// CHECK: prefetcht0	305419896
+// CHECK:  encoding: [0x0f,0x18,0x0d,0x78,0x56,0x34,0x12]
+        	prefetcht0	0x12345678
+
+// CHECK: prefetcht1	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetcht1	32493
+// CHECK:  encoding: [0x0f,0x18,0x15,0xed,0x7e,0x00,0x00]
+        	prefetcht1	0x7eed
+
+// CHECK: prefetcht1	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x15,0xfe,0xca,0xbe,0xba]
+        	prefetcht1	0xbabecafe
+
+// CHECK: prefetcht1	305419896
+// CHECK:  encoding: [0x0f,0x18,0x15,0x78,0x56,0x34,0x12]
+        	prefetcht1	0x12345678
+
+// CHECK: prefetcht2	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetcht2	32493
+// CHECK:  encoding: [0x0f,0x18,0x1d,0xed,0x7e,0x00,0x00]
+        	prefetcht2	0x7eed
+
+// CHECK: prefetcht2	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x1d,0xfe,0xca,0xbe,0xba]
+        	prefetcht2	0xbabecafe
+
+// CHECK: prefetcht2	305419896
+// CHECK:  encoding: [0x0f,0x18,0x1d,0x78,0x56,0x34,0x12]
+        	prefetcht2	0x12345678
+
+// CHECK: psadbw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psadbw	69, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0x45,0x00,0x00,0x00]
+        	psadbw	0x45,%mm3
+
+// CHECK: psadbw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0xed,0x7e,0x00,0x00]
+        	psadbw	0x7eed,%mm3
+
+// CHECK: psadbw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0xfe,0xca,0xbe,0xba]
+        	psadbw	0xbabecafe,%mm3
+
+// CHECK: psadbw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0x78,0x56,0x34,0x12]
+        	psadbw	0x12345678,%mm3
+
+// CHECK: psadbw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0xdb]
+        	psadbw	%mm3,%mm3
+
+// CHECK: psadbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psadbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0x45,0x00,0x00,0x00]
+        	psadbw	0x45,%xmm5
+
+// CHECK: psadbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0xed,0x7e,0x00,0x00]
+        	psadbw	0x7eed,%xmm5
+
+// CHECK: psadbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0xfe,0xca,0xbe,0xba]
+        	psadbw	0xbabecafe,%xmm5
+
+// CHECK: psadbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0x78,0x56,0x34,0x12]
+        	psadbw	0x12345678,%xmm5
+
+// CHECK: psadbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0xed]
+        	psadbw	%xmm5,%xmm5
+
+// CHECK: rcpps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x53,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rcpps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0x45,0x00,0x00,0x00]
+        	rcpps	0x45,%xmm5
+
+// CHECK: rcpps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0xed,0x7e,0x00,0x00]
+        	rcpps	0x7eed,%xmm5
+
+// CHECK: rcpps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0xfe,0xca,0xbe,0xba]
+        	rcpps	0xbabecafe,%xmm5
+
+// CHECK: rcpps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0x78,0x56,0x34,0x12]
+        	rcpps	0x12345678,%xmm5
+
+// CHECK: rcpps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0xed]
+        	rcpps	%xmm5,%xmm5
+
+// CHECK: rcpss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rcpss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0x45,0x00,0x00,0x00]
+        	rcpss	0x45,%xmm5
+
+// CHECK: rcpss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0xed,0x7e,0x00,0x00]
+        	rcpss	0x7eed,%xmm5
+
+// CHECK: rcpss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0xfe,0xca,0xbe,0xba]
+        	rcpss	0xbabecafe,%xmm5
+
+// CHECK: rcpss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0x78,0x56,0x34,0x12]
+        	rcpss	0x12345678,%xmm5
+
+// CHECK: rcpss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0xed]
+        	rcpss	%xmm5,%xmm5
+
+// CHECK: rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x52,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rsqrtps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0x45,0x00,0x00,0x00]
+        	rsqrtps	0x45,%xmm5
+
+// CHECK: rsqrtps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0xed,0x7e,0x00,0x00]
+        	rsqrtps	0x7eed,%xmm5
+
+// CHECK: rsqrtps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0xfe,0xca,0xbe,0xba]
+        	rsqrtps	0xbabecafe,%xmm5
+
+// CHECK: rsqrtps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0x78,0x56,0x34,0x12]
+        	rsqrtps	0x12345678,%xmm5
+
+// CHECK: rsqrtps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0xed]
+        	rsqrtps	%xmm5,%xmm5
+
+// CHECK: rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rsqrtss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0x45,0x00,0x00,0x00]
+        	rsqrtss	0x45,%xmm5
+
+// CHECK: rsqrtss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0xed,0x7e,0x00,0x00]
+        	rsqrtss	0x7eed,%xmm5
+
+// CHECK: rsqrtss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0xfe,0xca,0xbe,0xba]
+        	rsqrtss	0xbabecafe,%xmm5
+
+// CHECK: rsqrtss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0x78,0x56,0x34,0x12]
+        	rsqrtss	0x12345678,%xmm5
+
+// CHECK: rsqrtss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0xed]
+        	rsqrtss	%xmm5,%xmm5
+
+// CHECK: sqrtps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtps	0x45,%xmm5
+
+// CHECK: sqrtps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtps	0x7eed,%xmm5
+
+// CHECK: sqrtps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtps	0xbabecafe,%xmm5
+
+// CHECK: sqrtps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtps	0x12345678,%xmm5
+
+// CHECK: sqrtps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0xed]
+        	sqrtps	%xmm5,%xmm5
+
+// CHECK: sqrtss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtss	0x45,%xmm5
+
+// CHECK: sqrtss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtss	0x7eed,%xmm5
+
+// CHECK: sqrtss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtss	0xbabecafe,%xmm5
+
+// CHECK: sqrtss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtss	0x12345678,%xmm5
+
+// CHECK: sqrtss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0xed]
+        	sqrtss	%xmm5,%xmm5
+
+// CHECK: stmxcsr	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: stmxcsr	32493
+// CHECK:  encoding: [0x0f,0xae,0x1d,0xed,0x7e,0x00,0x00]
+        	stmxcsr	0x7eed
+
+// CHECK: stmxcsr	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x1d,0xfe,0xca,0xbe,0xba]
+        	stmxcsr	0xbabecafe
+
+// CHECK: stmxcsr	305419896
+// CHECK:  encoding: [0x0f,0xae,0x1d,0x78,0x56,0x34,0x12]
+        	stmxcsr	0x12345678
+
+// CHECK: subps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subps	0x45,%xmm5
+
+// CHECK: subps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subps	0x7eed,%xmm5
+
+// CHECK: subps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subps	0xbabecafe,%xmm5
+
+// CHECK: subps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subps	0x12345678,%xmm5
+
+// CHECK: subps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0xed]
+        	subps	%xmm5,%xmm5
+
+// CHECK: subss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subss	0x45,%xmm5
+
+// CHECK: subss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subss	0x7eed,%xmm5
+
+// CHECK: subss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subss	0xbabecafe,%xmm5
+
+// CHECK: subss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subss	0x12345678,%xmm5
+
+// CHECK: subss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0xed]
+        	subss	%xmm5,%xmm5
+
+// CHECK: ucomiss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: ucomiss	69, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0x45,0x00,0x00,0x00]
+        	ucomiss	0x45,%xmm5
+
+// CHECK: ucomiss	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0xed,0x7e,0x00,0x00]
+        	ucomiss	0x7eed,%xmm5
+
+// CHECK: ucomiss	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0xfe,0xca,0xbe,0xba]
+        	ucomiss	0xbabecafe,%xmm5
+
+// CHECK: ucomiss	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0x78,0x56,0x34,0x12]
+        	ucomiss	0x12345678,%xmm5
+
+// CHECK: ucomiss	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0xed]
+        	ucomiss	%xmm5,%xmm5
+
+// CHECK: unpckhps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x15,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpckhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpckhps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0x45,0x00,0x00,0x00]
+        	unpckhps	0x45,%xmm5
+
+// CHECK: unpckhps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0xed,0x7e,0x00,0x00]
+        	unpckhps	0x7eed,%xmm5
+
+// CHECK: unpckhps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpckhps	0xbabecafe,%xmm5
+
+// CHECK: unpckhps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0x78,0x56,0x34,0x12]
+        	unpckhps	0x12345678,%xmm5
+
+// CHECK: unpckhps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0xed]
+        	unpckhps	%xmm5,%xmm5
+
+// CHECK: unpcklps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x14,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpcklps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpcklps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0x45,0x00,0x00,0x00]
+        	unpcklps	0x45,%xmm5
+
+// CHECK: unpcklps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0xed,0x7e,0x00,0x00]
+        	unpcklps	0x7eed,%xmm5
+
+// CHECK: unpcklps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpcklps	0xbabecafe,%xmm5
+
+// CHECK: unpcklps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0x78,0x56,0x34,0x12]
+        	unpcklps	0x12345678,%xmm5
+
+// CHECK: unpcklps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0xed]
+        	unpcklps	%xmm5,%xmm5
+
+// CHECK: xorps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x57,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	xorps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: xorps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0x45,0x00,0x00,0x00]
+        	xorps	0x45,%xmm5
+
+// CHECK: xorps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0xed,0x7e,0x00,0x00]
+        	xorps	0x7eed,%xmm5
+
+// CHECK: xorps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0xfe,0xca,0xbe,0xba]
+        	xorps	0xbabecafe,%xmm5
+
+// CHECK: xorps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0x78,0x56,0x34,0x12]
+        	xorps	0x12345678,%xmm5
+
+// CHECK: xorps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0xed]
+        	xorps	%xmm5,%xmm5
+
+// CHECK: addpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addpd	0x45,%xmm5
+
+// CHECK: addpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addpd	0x7eed,%xmm5
+
+// CHECK: addpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addpd	0xbabecafe,%xmm5
+
+// CHECK: addpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addpd	0x12345678,%xmm5
+
+// CHECK: addpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0xed]
+        	addpd	%xmm5,%xmm5
+
+// CHECK: addsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addsd	0x45,%xmm5
+
+// CHECK: addsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addsd	0x7eed,%xmm5
+
+// CHECK: addsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addsd	0xbabecafe,%xmm5
+
+// CHECK: addsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addsd	0x12345678,%xmm5
+
+// CHECK: addsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0xed]
+        	addsd	%xmm5,%xmm5
+
+// CHECK: andnpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andnpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andnpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0x45,0x00,0x00,0x00]
+        	andnpd	0x45,%xmm5
+
+// CHECK: andnpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0xed,0x7e,0x00,0x00]
+        	andnpd	0x7eed,%xmm5
+
+// CHECK: andnpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0xfe,0xca,0xbe,0xba]
+        	andnpd	0xbabecafe,%xmm5
+
+// CHECK: andnpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0x78,0x56,0x34,0x12]
+        	andnpd	0x12345678,%xmm5
+
+// CHECK: andnpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0xed]
+        	andnpd	%xmm5,%xmm5
+
+// CHECK: andpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0x45,0x00,0x00,0x00]
+        	andpd	0x45,%xmm5
+
+// CHECK: andpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0xed,0x7e,0x00,0x00]
+        	andpd	0x7eed,%xmm5
+
+// CHECK: andpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0xfe,0xca,0xbe,0xba]
+        	andpd	0xbabecafe,%xmm5
+
+// CHECK: andpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0x78,0x56,0x34,0x12]
+        	andpd	0x12345678,%xmm5
+
+// CHECK: andpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0xed]
+        	andpd	%xmm5,%xmm5
+
+// CHECK: comisd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: comisd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0x45,0x00,0x00,0x00]
+        	comisd	0x45,%xmm5
+
+// CHECK: comisd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0xed,0x7e,0x00,0x00]
+        	comisd	0x7eed,%xmm5
+
+// CHECK: comisd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0xfe,0xca,0xbe,0xba]
+        	comisd	0xbabecafe,%xmm5
+
+// CHECK: comisd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0x78,0x56,0x34,0x12]
+        	comisd	0x12345678,%xmm5
+
+// CHECK: comisd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0xed]
+        	comisd	%xmm5,%xmm5
+
+// CHECK: cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpi2pd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpi2pd	0x45,%xmm5
+
+// CHECK: cvtpi2pd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpi2pd	0x7eed,%xmm5
+
+// CHECK: cvtpi2pd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpi2pd	0xbabecafe,%xmm5
+
+// CHECK: cvtpi2pd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpi2pd	0x12345678,%xmm5
+
+// CHECK: cvtpi2pd	%mm3, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0xeb]
+        	cvtpi2pd	%mm3,%xmm5
+
+// CHECK: cvtsi2sd	%ecx, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0xe9]
+        	cvtsi2sd	%ecx,%xmm5
+
+// CHECK: cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtsi2sd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtsi2sd	0x45,%xmm5
+
+// CHECK: cvtsi2sd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtsi2sd	0x7eed,%xmm5
+
+// CHECK: cvtsi2sd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtsi2sd	0xbabecafe,%xmm5
+
+// CHECK: cvtsi2sd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtsi2sd	0x12345678,%xmm5
+
+// CHECK: divpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divpd	0x45,%xmm5
+
+// CHECK: divpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divpd	0x7eed,%xmm5
+
+// CHECK: divpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divpd	0xbabecafe,%xmm5
+
+// CHECK: divpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divpd	0x12345678,%xmm5
+
+// CHECK: divpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0xed]
+        	divpd	%xmm5,%xmm5
+
+// CHECK: divsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divsd	0x45,%xmm5
+
+// CHECK: divsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divsd	0x7eed,%xmm5
+
+// CHECK: divsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divsd	0xbabecafe,%xmm5
+
+// CHECK: divsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divsd	0x12345678,%xmm5
+
+// CHECK: divsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0xed]
+        	divsd	%xmm5,%xmm5
+
+// CHECK: maxpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxpd	0x45,%xmm5
+
+// CHECK: maxpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxpd	0x7eed,%xmm5
+
+// CHECK: maxpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxpd	0xbabecafe,%xmm5
+
+// CHECK: maxpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxpd	0x12345678,%xmm5
+
+// CHECK: maxpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0xed]
+        	maxpd	%xmm5,%xmm5
+
+// CHECK: maxsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxsd	0x45,%xmm5
+
+// CHECK: maxsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxsd	0x7eed,%xmm5
+
+// CHECK: maxsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxsd	0xbabecafe,%xmm5
+
+// CHECK: maxsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxsd	0x12345678,%xmm5
+
+// CHECK: maxsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0xed]
+        	maxsd	%xmm5,%xmm5
+
+// CHECK: minpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minpd	0x45,%xmm5
+
+// CHECK: minpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minpd	0x7eed,%xmm5
+
+// CHECK: minpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minpd	0xbabecafe,%xmm5
+
+// CHECK: minpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minpd	0x12345678,%xmm5
+
+// CHECK: minpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0xed]
+        	minpd	%xmm5,%xmm5
+
+// CHECK: minsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minsd	0x45,%xmm5
+
+// CHECK: minsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minsd	0x7eed,%xmm5
+
+// CHECK: minsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minsd	0xbabecafe,%xmm5
+
+// CHECK: minsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minsd	0x12345678,%xmm5
+
+// CHECK: minsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0xed]
+        	minsd	%xmm5,%xmm5
+
+// CHECK: movapd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movapd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0x45,0x00,0x00,0x00]
+        	movapd	0x45,%xmm5
+
+// CHECK: movapd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0xed,0x7e,0x00,0x00]
+        	movapd	0x7eed,%xmm5
+
+// CHECK: movapd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0xfe,0xca,0xbe,0xba]
+        	movapd	0xbabecafe,%xmm5
+
+// CHECK: movapd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0x78,0x56,0x34,0x12]
+        	movapd	0x12345678,%xmm5
+
+// CHECK: movapd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0xed]
+        	movapd	%xmm5,%xmm5
+
+// CHECK: movapd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movapd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0x45,0x00,0x00,0x00]
+        	movapd	%xmm5,0x45
+
+// CHECK: movapd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0xed,0x7e,0x00,0x00]
+        	movapd	%xmm5,0x7eed
+
+// CHECK: movapd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0xfe,0xca,0xbe,0xba]
+        	movapd	%xmm5,0xbabecafe
+
+// CHECK: movapd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0x78,0x56,0x34,0x12]
+        	movapd	%xmm5,0x12345678
+
+// CHECK: movapd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0xed]
+        	movapd	%xmm5,%xmm5
+
+// CHECK: movhpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movhpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
+        	movhpd	0x45,%xmm5
+
+// CHECK: movhpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
+        	movhpd	0x7eed,%xmm5
+
+// CHECK: movhpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhpd	0xbabecafe,%xmm5
+
+// CHECK: movhpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
+        	movhpd	0x12345678,%xmm5
+
+// CHECK: movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movhpd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0x45,0x00,0x00,0x00]
+        	movhpd	%xmm5,0x45
+
+// CHECK: movhpd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0xed,0x7e,0x00,0x00]
+        	movhpd	%xmm5,0x7eed
+
+// CHECK: movhpd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhpd	%xmm5,0xbabecafe
+
+// CHECK: movhpd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0x78,0x56,0x34,0x12]
+        	movhpd	%xmm5,0x12345678
+
+// CHECK: movlpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movlpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movlpd	0x45,%xmm5
+
+// CHECK: movlpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movlpd	0x7eed,%xmm5
+
+// CHECK: movlpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlpd	0xbabecafe,%xmm5
+
+// CHECK: movlpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movlpd	0x12345678,%xmm5
+
+// CHECK: movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x13,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movlpd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0x45,0x00,0x00,0x00]
+        	movlpd	%xmm5,0x45
+
+// CHECK: movlpd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0xed,0x7e,0x00,0x00]
+        	movlpd	%xmm5,0x7eed
+
+// CHECK: movlpd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlpd	%xmm5,0xbabecafe
+
+// CHECK: movlpd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0x78,0x56,0x34,0x12]
+        	movlpd	%xmm5,0x12345678
+
+// CHECK: movmskpd	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0x50,0xcd]
+        	movmskpd	%xmm5,%ecx
+
+// CHECK: movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntpd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0x45,0x00,0x00,0x00]
+        	movntpd	%xmm5,0x45
+
+// CHECK: movntpd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0xed,0x7e,0x00,0x00]
+        	movntpd	%xmm5,0x7eed
+
+// CHECK: movntpd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntpd	%xmm5,0xbabecafe
+
+// CHECK: movntpd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0x78,0x56,0x34,0x12]
+        	movntpd	%xmm5,0x12345678
+
+// CHECK: movsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movsd	0x45,%xmm5
+
+// CHECK: movsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movsd	0x7eed,%xmm5
+
+// CHECK: movsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movsd	0xbabecafe,%xmm5
+
+// CHECK: movsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movsd	0x12345678,%xmm5
+
+// CHECK: movsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0xed]
+        	movsd	%xmm5,%xmm5
+
+// CHECK: movsd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf2,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movsd	%xmm5, 69
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movsd	%xmm5,0x45
+
+// CHECK: movsd	%xmm5, 32493
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movsd	%xmm5,0x7eed
+
+// CHECK: movsd	%xmm5, 3133065982
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movsd	%xmm5,0xbabecafe
+
+// CHECK: movsd	%xmm5, 305419896
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movsd	%xmm5,0x12345678
+
+// CHECK: movsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0xed]
+        	movsd	%xmm5,%xmm5
+
+// CHECK: movupd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movupd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movupd	0x45,%xmm5
+
+// CHECK: movupd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movupd	0x7eed,%xmm5
+
+// CHECK: movupd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movupd	0xbabecafe,%xmm5
+
+// CHECK: movupd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movupd	0x12345678,%xmm5
+
+// CHECK: movupd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0xed]
+        	movupd	%xmm5,%xmm5
+
+// CHECK: movupd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movupd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movupd	%xmm5,0x45
+
+// CHECK: movupd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movupd	%xmm5,0x7eed
+
+// CHECK: movupd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movupd	%xmm5,0xbabecafe
+
+// CHECK: movupd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movupd	%xmm5,0x12345678
+
+// CHECK: movupd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0xed]
+        	movupd	%xmm5,%xmm5
+
+// CHECK: mulpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulpd	0x45,%xmm5
+
+// CHECK: mulpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulpd	0x7eed,%xmm5
+
+// CHECK: mulpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulpd	0xbabecafe,%xmm5
+
+// CHECK: mulpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulpd	0x12345678,%xmm5
+
+// CHECK: mulpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0xed]
+        	mulpd	%xmm5,%xmm5
+
+// CHECK: mulsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulsd	0x45,%xmm5
+
+// CHECK: mulsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulsd	0x7eed,%xmm5
+
+// CHECK: mulsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulsd	0xbabecafe,%xmm5
+
+// CHECK: mulsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulsd	0x12345678,%xmm5
+
+// CHECK: mulsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0xed]
+        	mulsd	%xmm5,%xmm5
+
+// CHECK: orpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	orpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: orpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0x45,0x00,0x00,0x00]
+        	orpd	0x45,%xmm5
+
+// CHECK: orpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0xed,0x7e,0x00,0x00]
+        	orpd	0x7eed,%xmm5
+
+// CHECK: orpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0xfe,0xca,0xbe,0xba]
+        	orpd	0xbabecafe,%xmm5
+
+// CHECK: orpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0x78,0x56,0x34,0x12]
+        	orpd	0x12345678,%xmm5
+
+// CHECK: orpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0xed]
+        	orpd	%xmm5,%xmm5
+
+// CHECK: sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtpd	0x45,%xmm5
+
+// CHECK: sqrtpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtpd	0x7eed,%xmm5
+
+// CHECK: sqrtpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtpd	0xbabecafe,%xmm5
+
+// CHECK: sqrtpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtpd	0x12345678,%xmm5
+
+// CHECK: sqrtpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0xed]
+        	sqrtpd	%xmm5,%xmm5
+
+// CHECK: sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtsd	0x45,%xmm5
+
+// CHECK: sqrtsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtsd	0x7eed,%xmm5
+
+// CHECK: sqrtsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtsd	0xbabecafe,%xmm5
+
+// CHECK: sqrtsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtsd	0x12345678,%xmm5
+
+// CHECK: sqrtsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0xed]
+        	sqrtsd	%xmm5,%xmm5
+
+// CHECK: subpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subpd	0x45,%xmm5
+
+// CHECK: subpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subpd	0x7eed,%xmm5
+
+// CHECK: subpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subpd	0xbabecafe,%xmm5
+
+// CHECK: subpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subpd	0x12345678,%xmm5
+
+// CHECK: subpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0xed]
+        	subpd	%xmm5,%xmm5
+
+// CHECK: subsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subsd	0x45,%xmm5
+
+// CHECK: subsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subsd	0x7eed,%xmm5
+
+// CHECK: subsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subsd	0xbabecafe,%xmm5
+
+// CHECK: subsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subsd	0x12345678,%xmm5
+
+// CHECK: subsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0xed]
+        	subsd	%xmm5,%xmm5
+
+// CHECK: ucomisd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: ucomisd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0x45,0x00,0x00,0x00]
+        	ucomisd	0x45,%xmm5
+
+// CHECK: ucomisd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0xed,0x7e,0x00,0x00]
+        	ucomisd	0x7eed,%xmm5
+
+// CHECK: ucomisd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0xfe,0xca,0xbe,0xba]
+        	ucomisd	0xbabecafe,%xmm5
+
+// CHECK: ucomisd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0x78,0x56,0x34,0x12]
+        	ucomisd	0x12345678,%xmm5
+
+// CHECK: ucomisd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0xed]
+        	ucomisd	%xmm5,%xmm5
+
+// CHECK: unpckhpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpckhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpckhpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0x45,0x00,0x00,0x00]
+        	unpckhpd	0x45,%xmm5
+
+// CHECK: unpckhpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0xed,0x7e,0x00,0x00]
+        	unpckhpd	0x7eed,%xmm5
+
+// CHECK: unpckhpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpckhpd	0xbabecafe,%xmm5
+
+// CHECK: unpckhpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0x78,0x56,0x34,0x12]
+        	unpckhpd	0x12345678,%xmm5
+
+// CHECK: unpckhpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0xed]
+        	unpckhpd	%xmm5,%xmm5
+
+// CHECK: unpcklpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpcklpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpcklpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0x45,0x00,0x00,0x00]
+        	unpcklpd	0x45,%xmm5
+
+// CHECK: unpcklpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0xed,0x7e,0x00,0x00]
+        	unpcklpd	0x7eed,%xmm5
+
+// CHECK: unpcklpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpcklpd	0xbabecafe,%xmm5
+
+// CHECK: unpcklpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0x78,0x56,0x34,0x12]
+        	unpcklpd	0x12345678,%xmm5
+
+// CHECK: unpcklpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0xed]
+        	unpcklpd	%xmm5,%xmm5
+
+// CHECK: xorpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	xorpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: xorpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0x45,0x00,0x00,0x00]
+        	xorpd	0x45,%xmm5
+
+// CHECK: xorpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0xed,0x7e,0x00,0x00]
+        	xorpd	0x7eed,%xmm5
+
+// CHECK: xorpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0xfe,0xca,0xbe,0xba]
+        	xorpd	0xbabecafe,%xmm5
+
+// CHECK: xorpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0x78,0x56,0x34,0x12]
+        	xorpd	0x12345678,%xmm5
+
+// CHECK: xorpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0xed]
+        	xorpd	%xmm5,%xmm5
+
+// CHECK: cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtdq2pd	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0x45,0x00,0x00,0x00]
+        	cvtdq2pd	0x45,%xmm5
+
+// CHECK: cvtdq2pd	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtdq2pd	0x7eed,%xmm5
+
+// CHECK: cvtdq2pd	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtdq2pd	0xbabecafe,%xmm5
+
+// CHECK: cvtdq2pd	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0x78,0x56,0x34,0x12]
+        	cvtdq2pd	0x12345678,%xmm5
+
+// CHECK: cvtdq2pd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0xed]
+        	cvtdq2pd	%xmm5,%xmm5
+
+// CHECK: cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpd2dq	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpd2dq	0x45,%xmm5
+
+// CHECK: cvtpd2dq	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpd2dq	0x7eed,%xmm5
+
+// CHECK: cvtpd2dq	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpd2dq	0xbabecafe,%xmm5
+
+// CHECK: cvtpd2dq	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpd2dq	0x12345678,%xmm5
+
+// CHECK: cvtpd2dq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0xed]
+        	cvtpd2dq	%xmm5,%xmm5
+
+// CHECK: cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtdq2ps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
+        	cvtdq2ps	0x45,%xmm5
+
+// CHECK: cvtdq2ps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtdq2ps	0x7eed,%xmm5
+
+// CHECK: cvtdq2ps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtdq2ps	0xbabecafe,%xmm5
+
+// CHECK: cvtdq2ps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
+        	cvtdq2ps	0x12345678,%xmm5
+
+// CHECK: cvtdq2ps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0xed]
+        	cvtdq2ps	%xmm5,%xmm5
+
+// CHECK: cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvtpd2pi	69, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0x45,0x00,0x00,0x00]
+        	cvtpd2pi	0x45,%mm3
+
+// CHECK: cvtpd2pi	32493, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0xed,0x7e,0x00,0x00]
+        	cvtpd2pi	0x7eed,%mm3
+
+// CHECK: cvtpd2pi	3133065982, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvtpd2pi	0xbabecafe,%mm3
+
+// CHECK: cvtpd2pi	305419896, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0x78,0x56,0x34,0x12]
+        	cvtpd2pi	0x12345678,%mm3
+
+// CHECK: cvtpd2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0xdd]
+        	cvtpd2pi	%xmm5,%mm3
+
+// CHECK: cvtpd2ps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpd2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpd2ps	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpd2ps	0x45,%xmm5
+
+// CHECK: cvtpd2ps	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpd2ps	0x7eed,%xmm5
+
+// CHECK: cvtpd2ps	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpd2ps	0xbabecafe,%xmm5
+
+// CHECK: cvtpd2ps	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpd2ps	0x12345678,%xmm5
+
+// CHECK: cvtpd2ps	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0xed]
+        	cvtpd2ps	%xmm5,%xmm5
+
+// CHECK: cvtps2pd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtps2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtps2pd	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtps2pd	0x45,%xmm5
+
+// CHECK: cvtps2pd	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtps2pd	0x7eed,%xmm5
+
+// CHECK: cvtps2pd	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtps2pd	0xbabecafe,%xmm5
+
+// CHECK: cvtps2pd	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtps2pd	0x12345678,%xmm5
+
+// CHECK: cvtps2pd	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0xed]
+        	cvtps2pd	%xmm5,%xmm5
+
+// CHECK: cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtps2dq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
+        	cvtps2dq	0x45,%xmm5
+
+// CHECK: cvtps2dq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtps2dq	0x7eed,%xmm5
+
+// CHECK: cvtps2dq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtps2dq	0xbabecafe,%xmm5
+
+// CHECK: cvtps2dq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
+        	cvtps2dq	0x12345678,%xmm5
+
+// CHECK: cvtps2dq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0xed]
+        	cvtps2dq	%xmm5,%xmm5
+
+// CHECK: cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtsd2ss	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtsd2ss	0x45,%xmm5
+
+// CHECK: cvtsd2ss	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtsd2ss	0x7eed,%xmm5
+
+// CHECK: cvtsd2ss	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtsd2ss	0xbabecafe,%xmm5
+
+// CHECK: cvtsd2ss	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtsd2ss	0x12345678,%xmm5
+
+// CHECK: cvtsd2ss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0xed]
+        	cvtsd2ss	%xmm5,%xmm5
+
+// CHECK: cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtss2sd	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtss2sd	0x45,%xmm5
+
+// CHECK: cvtss2sd	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtss2sd	0x7eed,%xmm5
+
+// CHECK: cvtss2sd	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtss2sd	0xbabecafe,%xmm5
+
+// CHECK: cvtss2sd	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtss2sd	0x12345678,%xmm5
+
+// CHECK: cvtss2sd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0xed]
+        	cvtss2sd	%xmm5,%xmm5
+
+// CHECK: cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvttpd2pi	69, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0x45,0x00,0x00,0x00]
+        	cvttpd2pi	0x45,%mm3
+
+// CHECK: cvttpd2pi	32493, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0xed,0x7e,0x00,0x00]
+        	cvttpd2pi	0x7eed,%mm3
+
+// CHECK: cvttpd2pi	3133065982, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvttpd2pi	0xbabecafe,%mm3
+
+// CHECK: cvttpd2pi	305419896, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0x78,0x56,0x34,0x12]
+        	cvttpd2pi	0x12345678,%mm3
+
+// CHECK: cvttpd2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0xdd]
+        	cvttpd2pi	%xmm5,%mm3
+
+// CHECK: cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: cvttsd2si	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0x45,0x00,0x00,0x00]
+        	cvttsd2si	0x45,%ecx
+
+// CHECK: cvttsd2si	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0xed,0x7e,0x00,0x00]
+        	cvttsd2si	0x7eed,%ecx
+
+// CHECK: cvttsd2si	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0xfe,0xca,0xbe,0xba]
+        	cvttsd2si	0xbabecafe,%ecx
+
+// CHECK: cvttsd2si	305419896, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0x78,0x56,0x34,0x12]
+        	cvttsd2si	0x12345678,%ecx
+
+// CHECK: cvttsd2si	%xmm5, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0xcd]
+        	cvttsd2si	%xmm5,%ecx
+
+// CHECK: cvttps2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvttps2dq	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
+        	cvttps2dq	0x45,%xmm5
+
+// CHECK: cvttps2dq	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
+        	cvttps2dq	0x7eed,%xmm5
+
+// CHECK: cvttps2dq	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvttps2dq	0xbabecafe,%xmm5
+
+// CHECK: cvttps2dq	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
+        	cvttps2dq	0x12345678,%xmm5
+
+// CHECK: cvttps2dq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0xed]
+        	cvttps2dq	%xmm5,%xmm5
+
+// CHECK: maskmovdqu	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf7,0xed]
+        	maskmovdqu	%xmm5,%xmm5
+
+// CHECK: movdqa	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movdqa	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqa	0x45,%xmm5
+
+// CHECK: movdqa	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqa	0x7eed,%xmm5
+
+// CHECK: movdqa	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqa	0xbabecafe,%xmm5
+
+// CHECK: movdqa	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqa	0x12345678,%xmm5
+
+// CHECK: movdqa	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0xed]
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x7f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movdqa	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqa	%xmm5,0x45
+
+// CHECK: movdqa	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqa	%xmm5,0x7eed
+
+// CHECK: movdqa	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqa	%xmm5,0xbabecafe
+
+// CHECK: movdqa	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqa	%xmm5,0x12345678
+
+// CHECK: movdqa	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0xed]
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: movdqu	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movdqu	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqu	0x45,%xmm5
+
+// CHECK: movdqu	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqu	0x7eed,%xmm5
+
+// CHECK: movdqu	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqu	0xbabecafe,%xmm5
+
+// CHECK: movdqu	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqu	0x12345678,%xmm5
+
+// CHECK: movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movdqu	%xmm5, 69
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqu	%xmm5,0x45
+
+// CHECK: movdqu	%xmm5, 32493
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqu	%xmm5,0x7eed
+
+// CHECK: movdqu	%xmm5, 3133065982
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqu	%xmm5,0xbabecafe
+
+// CHECK: movdqu	%xmm5, 305419896
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqu	%xmm5,0x12345678
+
+// CHECK: movdq2q	%xmm5, %mm3
+// CHECK:  encoding: [0xf2,0x0f,0xd6,0xdd]
+        	movdq2q	%xmm5,%mm3
+
+// CHECK: movq2dq	%mm3, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xd6,0xeb]
+        	movq2dq	%mm3,%xmm5
+
+// CHECK: pmuludq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmuludq	69, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0x45,0x00,0x00,0x00]
+        	pmuludq	0x45,%mm3
+
+// CHECK: pmuludq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0xed,0x7e,0x00,0x00]
+        	pmuludq	0x7eed,%mm3
+
+// CHECK: pmuludq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmuludq	0xbabecafe,%mm3
+
+// CHECK: pmuludq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0x78,0x56,0x34,0x12]
+        	pmuludq	0x12345678,%mm3
+
+// CHECK: pmuludq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0xdb]
+        	pmuludq	%mm3,%mm3
+
+// CHECK: pmuludq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmuludq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0x45,0x00,0x00,0x00]
+        	pmuludq	0x45,%xmm5
+
+// CHECK: pmuludq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0xed,0x7e,0x00,0x00]
+        	pmuludq	0x7eed,%xmm5
+
+// CHECK: pmuludq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmuludq	0xbabecafe,%xmm5
+
+// CHECK: pmuludq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0x78,0x56,0x34,0x12]
+        	pmuludq	0x12345678,%xmm5
+
+// CHECK: pmuludq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0xed]
+        	pmuludq	%xmm5,%xmm5
+
+// CHECK: pslldq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xfd,0x7f]
+        	pslldq	$0x7f,%xmm5
+
+// CHECK: psrldq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xdd,0x7f]
+        	psrldq	$0x7f,%xmm5
+
+// CHECK: punpckhqdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhqdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhqdq	0x45,%xmm5
+
+// CHECK: punpckhqdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhqdq	0x7eed,%xmm5
+
+// CHECK: punpckhqdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhqdq	0xbabecafe,%xmm5
+
+// CHECK: punpckhqdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhqdq	0x12345678,%xmm5
+
+// CHECK: punpckhqdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0xed]
+        	punpckhqdq	%xmm5,%xmm5
+
+// CHECK: punpcklqdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpcklqdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0x45,0x00,0x00,0x00]
+        	punpcklqdq	0x45,%xmm5
+
+// CHECK: punpcklqdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0xed,0x7e,0x00,0x00]
+        	punpcklqdq	0x7eed,%xmm5
+
+// CHECK: punpcklqdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpcklqdq	0xbabecafe,%xmm5
+
+// CHECK: punpcklqdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0x78,0x56,0x34,0x12]
+        	punpcklqdq	0x12345678,%xmm5
+
+// CHECK: punpcklqdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0xed]
+        	punpcklqdq	%xmm5,%xmm5
+
+// CHECK: addsubpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addsubpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0x45,0x00,0x00,0x00]
+        	addsubpd	0x45,%xmm5
+
+// CHECK: addsubpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0xed,0x7e,0x00,0x00]
+        	addsubpd	0x7eed,%xmm5
+
+// CHECK: addsubpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0xfe,0xca,0xbe,0xba]
+        	addsubpd	0xbabecafe,%xmm5
+
+// CHECK: addsubpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0x78,0x56,0x34,0x12]
+        	addsubpd	0x12345678,%xmm5
+
+// CHECK: addsubpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0xed]
+        	addsubpd	%xmm5,%xmm5
+
+// CHECK: addsubps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addsubps	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0x45,0x00,0x00,0x00]
+        	addsubps	0x45,%xmm5
+
+// CHECK: addsubps	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0xed,0x7e,0x00,0x00]
+        	addsubps	0x7eed,%xmm5
+
+// CHECK: addsubps	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0xfe,0xca,0xbe,0xba]
+        	addsubps	0xbabecafe,%xmm5
+
+// CHECK: addsubps	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0x78,0x56,0x34,0x12]
+        	addsubps	0x12345678,%xmm5
+
+// CHECK: addsubps	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0xed]
+        	addsubps	%xmm5,%xmm5
+
+// CHECK: fisttpl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fisttpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fisttpl	3133065982
+// CHECK:  encoding: [0xdb,0x0d,0xfe,0xca,0xbe,0xba]
+        	fisttpl	0xbabecafe
+
+// CHECK: fisttpl	305419896
+// CHECK:  encoding: [0xdb,0x0d,0x78,0x56,0x34,0x12]
+        	fisttpl	0x12345678
+
+// CHECK: haddpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	haddpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: haddpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0x45,0x00,0x00,0x00]
+        	haddpd	0x45,%xmm5
+
+// CHECK: haddpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0xed,0x7e,0x00,0x00]
+        	haddpd	0x7eed,%xmm5
+
+// CHECK: haddpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0xfe,0xca,0xbe,0xba]
+        	haddpd	0xbabecafe,%xmm5
+
+// CHECK: haddpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0x78,0x56,0x34,0x12]
+        	haddpd	0x12345678,%xmm5
+
+// CHECK: haddpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0xed]
+        	haddpd	%xmm5,%xmm5
+
+// CHECK: haddps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	haddps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: haddps	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0x45,0x00,0x00,0x00]
+        	haddps	0x45,%xmm5
+
+// CHECK: haddps	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0xed,0x7e,0x00,0x00]
+        	haddps	0x7eed,%xmm5
+
+// CHECK: haddps	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0xfe,0xca,0xbe,0xba]
+        	haddps	0xbabecafe,%xmm5
+
+// CHECK: haddps	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0x78,0x56,0x34,0x12]
+        	haddps	0x12345678,%xmm5
+
+// CHECK: haddps	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0xed]
+        	haddps	%xmm5,%xmm5
+
+// CHECK: hsubpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	hsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: hsubpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0x45,0x00,0x00,0x00]
+        	hsubpd	0x45,%xmm5
+
+// CHECK: hsubpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0xed,0x7e,0x00,0x00]
+        	hsubpd	0x7eed,%xmm5
+
+// CHECK: hsubpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0xfe,0xca,0xbe,0xba]
+        	hsubpd	0xbabecafe,%xmm5
+
+// CHECK: hsubpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0x78,0x56,0x34,0x12]
+        	hsubpd	0x12345678,%xmm5
+
+// CHECK: hsubpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0xed]
+        	hsubpd	%xmm5,%xmm5
+
+// CHECK: hsubps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	hsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: hsubps	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0x45,0x00,0x00,0x00]
+        	hsubps	0x45,%xmm5
+
+// CHECK: hsubps	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0xed,0x7e,0x00,0x00]
+        	hsubps	0x7eed,%xmm5
+
+// CHECK: hsubps	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0xfe,0xca,0xbe,0xba]
+        	hsubps	0xbabecafe,%xmm5
+
+// CHECK: hsubps	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0x78,0x56,0x34,0x12]
+        	hsubps	0x12345678,%xmm5
+
+// CHECK: hsubps	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0xed]
+        	hsubps	%xmm5,%xmm5
+
+// CHECK: lddqu	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: lddqu	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0x45,0x00,0x00,0x00]
+        	lddqu	0x45,%xmm5
+
+// CHECK: lddqu	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0xed,0x7e,0x00,0x00]
+        	lddqu	0x7eed,%xmm5
+
+// CHECK: lddqu	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0xfe,0xca,0xbe,0xba]
+        	lddqu	0xbabecafe,%xmm5
+
+// CHECK: lddqu	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0x78,0x56,0x34,0x12]
+        	lddqu	0x12345678,%xmm5
+
+// CHECK: movddup	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movddup	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movddup	0x45,%xmm5
+
+// CHECK: movddup	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movddup	0x7eed,%xmm5
+
+// CHECK: movddup	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movddup	0xbabecafe,%xmm5
+
+// CHECK: movddup	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movddup	0x12345678,%xmm5
+
+// CHECK: movddup	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0xed]
+        	movddup	%xmm5,%xmm5
+
+// CHECK: movshdup	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movshdup	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
+        	movshdup	0x45,%xmm5
+
+// CHECK: movshdup	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
+        	movshdup	0x7eed,%xmm5
+
+// CHECK: movshdup	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
+        	movshdup	0xbabecafe,%xmm5
+
+// CHECK: movshdup	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
+        	movshdup	0x12345678,%xmm5
+
+// CHECK: movshdup	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0xed]
+        	movshdup	%xmm5,%xmm5
+
+// CHECK: movsldup	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movsldup	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movsldup	0x45,%xmm5
+
+// CHECK: movsldup	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movsldup	0x7eed,%xmm5
+
+// CHECK: movsldup	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movsldup	0xbabecafe,%xmm5
+
+// CHECK: movsldup	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movsldup	0x12345678,%xmm5
+
+// CHECK: movsldup	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0xed]
+        	movsldup	%xmm5,%xmm5
+
+// CHECK: vmclear	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0xc7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	vmclear	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: vmclear	32493
+// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0xed,0x7e,0x00,0x00]
+        	vmclear	0x7eed
+
+// CHECK: vmclear	3133065982
+// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0xfe,0xca,0xbe,0xba]
+        	vmclear	0xbabecafe
+
+// CHECK: vmclear	305419896
+// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0x78,0x56,0x34,0x12]
+        	vmclear	0x12345678
+
+// CHECK: vmptrld	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	vmptrld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: vmptrld	32493
+// CHECK:  encoding: [0x0f,0xc7,0x35,0xed,0x7e,0x00,0x00]
+        	vmptrld	0x7eed
+
+// CHECK: vmptrld	3133065982
+// CHECK:  encoding: [0x0f,0xc7,0x35,0xfe,0xca,0xbe,0xba]
+        	vmptrld	0xbabecafe
+
+// CHECK: vmptrld	305419896
+// CHECK:  encoding: [0x0f,0xc7,0x35,0x78,0x56,0x34,0x12]
+        	vmptrld	0x12345678
+
+// CHECK: vmptrst	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc7,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	vmptrst	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: vmptrst	32493
+// CHECK:  encoding: [0x0f,0xc7,0x3d,0xed,0x7e,0x00,0x00]
+        	vmptrst	0x7eed
+
+// CHECK: vmptrst	3133065982
+// CHECK:  encoding: [0x0f,0xc7,0x3d,0xfe,0xca,0xbe,0xba]
+        	vmptrst	0xbabecafe
+
+// CHECK: vmptrst	305419896
+// CHECK:  encoding: [0x0f,0xc7,0x3d,0x78,0x56,0x34,0x12]
+        	vmptrst	0x12345678
+
+// CHECK: phaddw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phaddw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0x45,0x00,0x00,0x00]
+        	phaddw	0x45,%mm3
+
+// CHECK: phaddw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0xed,0x7e,0x00,0x00]
+        	phaddw	0x7eed,%mm3
+
+// CHECK: phaddw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0xfe,0xca,0xbe,0xba]
+        	phaddw	0xbabecafe,%mm3
+
+// CHECK: phaddw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0x78,0x56,0x34,0x12]
+        	phaddw	0x12345678,%mm3
+
+// CHECK: phaddw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0xdb]
+        	phaddw	%mm3,%mm3
+
+// CHECK: phaddw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phaddw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0x45,0x00,0x00,0x00]
+        	phaddw	0x45,%xmm5
+
+// CHECK: phaddw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0xed,0x7e,0x00,0x00]
+        	phaddw	0x7eed,%xmm5
+
+// CHECK: phaddw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0xfe,0xca,0xbe,0xba]
+        	phaddw	0xbabecafe,%xmm5
+
+// CHECK: phaddw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0x78,0x56,0x34,0x12]
+        	phaddw	0x12345678,%xmm5
+
+// CHECK: phaddw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0xed]
+        	phaddw	%xmm5,%xmm5
+
+// CHECK: phaddd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phaddd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0x45,0x00,0x00,0x00]
+        	phaddd	0x45,%mm3
+
+// CHECK: phaddd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0xed,0x7e,0x00,0x00]
+        	phaddd	0x7eed,%mm3
+
+// CHECK: phaddd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0xfe,0xca,0xbe,0xba]
+        	phaddd	0xbabecafe,%mm3
+
+// CHECK: phaddd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0x78,0x56,0x34,0x12]
+        	phaddd	0x12345678,%mm3
+
+// CHECK: phaddd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0xdb]
+        	phaddd	%mm3,%mm3
+
+// CHECK: phaddd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phaddd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0x45,0x00,0x00,0x00]
+        	phaddd	0x45,%xmm5
+
+// CHECK: phaddd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0xed,0x7e,0x00,0x00]
+        	phaddd	0x7eed,%xmm5
+
+// CHECK: phaddd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0xfe,0xca,0xbe,0xba]
+        	phaddd	0xbabecafe,%xmm5
+
+// CHECK: phaddd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0x78,0x56,0x34,0x12]
+        	phaddd	0x12345678,%xmm5
+
+// CHECK: phaddd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0xed]
+        	phaddd	%xmm5,%xmm5
+
+// CHECK: phaddsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phaddsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0x45,0x00,0x00,0x00]
+        	phaddsw	0x45,%mm3
+
+// CHECK: phaddsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0xed,0x7e,0x00,0x00]
+        	phaddsw	0x7eed,%mm3
+
+// CHECK: phaddsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0xfe,0xca,0xbe,0xba]
+        	phaddsw	0xbabecafe,%mm3
+
+// CHECK: phaddsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0x78,0x56,0x34,0x12]
+        	phaddsw	0x12345678,%mm3
+
+// CHECK: phaddsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0xdb]
+        	phaddsw	%mm3,%mm3
+
+// CHECK: phaddsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phaddsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0x45,0x00,0x00,0x00]
+        	phaddsw	0x45,%xmm5
+
+// CHECK: phaddsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0xed,0x7e,0x00,0x00]
+        	phaddsw	0x7eed,%xmm5
+
+// CHECK: phaddsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0xfe,0xca,0xbe,0xba]
+        	phaddsw	0xbabecafe,%xmm5
+
+// CHECK: phaddsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0x78,0x56,0x34,0x12]
+        	phaddsw	0x12345678,%xmm5
+
+// CHECK: phaddsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0xed]
+        	phaddsw	%xmm5,%xmm5
+
+// CHECK: phsubw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phsubw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0x45,0x00,0x00,0x00]
+        	phsubw	0x45,%mm3
+
+// CHECK: phsubw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0xed,0x7e,0x00,0x00]
+        	phsubw	0x7eed,%mm3
+
+// CHECK: phsubw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0xfe,0xca,0xbe,0xba]
+        	phsubw	0xbabecafe,%mm3
+
+// CHECK: phsubw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0x78,0x56,0x34,0x12]
+        	phsubw	0x12345678,%mm3
+
+// CHECK: phsubw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0xdb]
+        	phsubw	%mm3,%mm3
+
+// CHECK: phsubw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phsubw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0x45,0x00,0x00,0x00]
+        	phsubw	0x45,%xmm5
+
+// CHECK: phsubw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0xed,0x7e,0x00,0x00]
+        	phsubw	0x7eed,%xmm5
+
+// CHECK: phsubw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0xfe,0xca,0xbe,0xba]
+        	phsubw	0xbabecafe,%xmm5
+
+// CHECK: phsubw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0x78,0x56,0x34,0x12]
+        	phsubw	0x12345678,%xmm5
+
+// CHECK: phsubw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0xed]
+        	phsubw	%xmm5,%xmm5
+
+// CHECK: phsubd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phsubd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0x45,0x00,0x00,0x00]
+        	phsubd	0x45,%mm3
+
+// CHECK: phsubd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0xed,0x7e,0x00,0x00]
+        	phsubd	0x7eed,%mm3
+
+// CHECK: phsubd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0xfe,0xca,0xbe,0xba]
+        	phsubd	0xbabecafe,%mm3
+
+// CHECK: phsubd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0x78,0x56,0x34,0x12]
+        	phsubd	0x12345678,%mm3
+
+// CHECK: phsubd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0xdb]
+        	phsubd	%mm3,%mm3
+
+// CHECK: phsubd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phsubd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0x45,0x00,0x00,0x00]
+        	phsubd	0x45,%xmm5
+
+// CHECK: phsubd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0xed,0x7e,0x00,0x00]
+        	phsubd	0x7eed,%xmm5
+
+// CHECK: phsubd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0xfe,0xca,0xbe,0xba]
+        	phsubd	0xbabecafe,%xmm5
+
+// CHECK: phsubd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0x78,0x56,0x34,0x12]
+        	phsubd	0x12345678,%xmm5
+
+// CHECK: phsubd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0xed]
+        	phsubd	%xmm5,%xmm5
+
+// CHECK: phsubsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phsubsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0x45,0x00,0x00,0x00]
+        	phsubsw	0x45,%mm3
+
+// CHECK: phsubsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0xed,0x7e,0x00,0x00]
+        	phsubsw	0x7eed,%mm3
+
+// CHECK: phsubsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0xfe,0xca,0xbe,0xba]
+        	phsubsw	0xbabecafe,%mm3
+
+// CHECK: phsubsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0x78,0x56,0x34,0x12]
+        	phsubsw	0x12345678,%mm3
+
+// CHECK: phsubsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0xdb]
+        	phsubsw	%mm3,%mm3
+
+// CHECK: phsubsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phsubsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0x45,0x00,0x00,0x00]
+        	phsubsw	0x45,%xmm5
+
+// CHECK: phsubsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0xed,0x7e,0x00,0x00]
+        	phsubsw	0x7eed,%xmm5
+
+// CHECK: phsubsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0xfe,0xca,0xbe,0xba]
+        	phsubsw	0xbabecafe,%xmm5
+
+// CHECK: phsubsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0x78,0x56,0x34,0x12]
+        	phsubsw	0x12345678,%xmm5
+
+// CHECK: phsubsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0xed]
+        	phsubsw	%xmm5,%xmm5
+
+// CHECK: pmaddubsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaddubsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0x45,0x00,0x00,0x00]
+        	pmaddubsw	0x45,%mm3
+
+// CHECK: pmaddubsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaddubsw	0x7eed,%mm3
+
+// CHECK: pmaddubsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaddubsw	0xbabecafe,%mm3
+
+// CHECK: pmaddubsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0x78,0x56,0x34,0x12]
+        	pmaddubsw	0x12345678,%mm3
+
+// CHECK: pmaddubsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0xdb]
+        	pmaddubsw	%mm3,%mm3
+
+// CHECK: pmaddubsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaddubsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0x45,0x00,0x00,0x00]
+        	pmaddubsw	0x45,%xmm5
+
+// CHECK: pmaddubsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaddubsw	0x7eed,%xmm5
+
+// CHECK: pmaddubsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaddubsw	0xbabecafe,%xmm5
+
+// CHECK: pmaddubsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0x78,0x56,0x34,0x12]
+        	pmaddubsw	0x12345678,%xmm5
+
+// CHECK: pmaddubsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0xed]
+        	pmaddubsw	%xmm5,%xmm5
+
+// CHECK: pmulhrsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmulhrsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0x45,0x00,0x00,0x00]
+        	pmulhrsw	0x45,%mm3
+
+// CHECK: pmulhrsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0xed,0x7e,0x00,0x00]
+        	pmulhrsw	0x7eed,%mm3
+
+// CHECK: pmulhrsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmulhrsw	0xbabecafe,%mm3
+
+// CHECK: pmulhrsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0x78,0x56,0x34,0x12]
+        	pmulhrsw	0x12345678,%mm3
+
+// CHECK: pmulhrsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0xdb]
+        	pmulhrsw	%mm3,%mm3
+
+// CHECK: pmulhrsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulhrsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0x45,0x00,0x00,0x00]
+        	pmulhrsw	0x45,%xmm5
+
+// CHECK: pmulhrsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulhrsw	0x7eed,%xmm5
+
+// CHECK: pmulhrsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulhrsw	0xbabecafe,%xmm5
+
+// CHECK: pmulhrsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0x78,0x56,0x34,0x12]
+        	pmulhrsw	0x12345678,%xmm5
+
+// CHECK: pmulhrsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0xed]
+        	pmulhrsw	%xmm5,%xmm5
+
+// CHECK: pshufb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pshufb	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0x45,0x00,0x00,0x00]
+        	pshufb	0x45,%mm3
+
+// CHECK: pshufb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0xed,0x7e,0x00,0x00]
+        	pshufb	0x7eed,%mm3
+
+// CHECK: pshufb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0xfe,0xca,0xbe,0xba]
+        	pshufb	0xbabecafe,%mm3
+
+// CHECK: pshufb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0x78,0x56,0x34,0x12]
+        	pshufb	0x12345678,%mm3
+
+// CHECK: pshufb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0xdb]
+        	pshufb	%mm3,%mm3
+
+// CHECK: pshufb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pshufb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0x45,0x00,0x00,0x00]
+        	pshufb	0x45,%xmm5
+
+// CHECK: pshufb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0xed,0x7e,0x00,0x00]
+        	pshufb	0x7eed,%xmm5
+
+// CHECK: pshufb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0xfe,0xca,0xbe,0xba]
+        	pshufb	0xbabecafe,%xmm5
+
+// CHECK: pshufb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0x78,0x56,0x34,0x12]
+        	pshufb	0x12345678,%xmm5
+
+// CHECK: pshufb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0xed]
+        	pshufb	%xmm5,%xmm5
+
+// CHECK: psignb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psignb	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0x45,0x00,0x00,0x00]
+        	psignb	0x45,%mm3
+
+// CHECK: psignb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0xed,0x7e,0x00,0x00]
+        	psignb	0x7eed,%mm3
+
+// CHECK: psignb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0xfe,0xca,0xbe,0xba]
+        	psignb	0xbabecafe,%mm3
+
+// CHECK: psignb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0x78,0x56,0x34,0x12]
+        	psignb	0x12345678,%mm3
+
+// CHECK: psignb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0xdb]
+        	psignb	%mm3,%mm3
+
+// CHECK: psignb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psignb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0x45,0x00,0x00,0x00]
+        	psignb	0x45,%xmm5
+
+// CHECK: psignb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0xed,0x7e,0x00,0x00]
+        	psignb	0x7eed,%xmm5
+
+// CHECK: psignb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0xfe,0xca,0xbe,0xba]
+        	psignb	0xbabecafe,%xmm5
+
+// CHECK: psignb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0x78,0x56,0x34,0x12]
+        	psignb	0x12345678,%xmm5
+
+// CHECK: psignb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0xed]
+        	psignb	%xmm5,%xmm5
+
+// CHECK: psignw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psignw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0x45,0x00,0x00,0x00]
+        	psignw	0x45,%mm3
+
+// CHECK: psignw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0xed,0x7e,0x00,0x00]
+        	psignw	0x7eed,%mm3
+
+// CHECK: psignw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0xfe,0xca,0xbe,0xba]
+        	psignw	0xbabecafe,%mm3
+
+// CHECK: psignw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0x78,0x56,0x34,0x12]
+        	psignw	0x12345678,%mm3
+
+// CHECK: psignw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0xdb]
+        	psignw	%mm3,%mm3
+
+// CHECK: psignw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psignw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0x45,0x00,0x00,0x00]
+        	psignw	0x45,%xmm5
+
+// CHECK: psignw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0xed,0x7e,0x00,0x00]
+        	psignw	0x7eed,%xmm5
+
+// CHECK: psignw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0xfe,0xca,0xbe,0xba]
+        	psignw	0xbabecafe,%xmm5
+
+// CHECK: psignw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0x78,0x56,0x34,0x12]
+        	psignw	0x12345678,%xmm5
+
+// CHECK: psignw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0xed]
+        	psignw	%xmm5,%xmm5
+
+// CHECK: psignd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psignd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0x45,0x00,0x00,0x00]
+        	psignd	0x45,%mm3
+
+// CHECK: psignd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0xed,0x7e,0x00,0x00]
+        	psignd	0x7eed,%mm3
+
+// CHECK: psignd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0xfe,0xca,0xbe,0xba]
+        	psignd	0xbabecafe,%mm3
+
+// CHECK: psignd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0x78,0x56,0x34,0x12]
+        	psignd	0x12345678,%mm3
+
+// CHECK: psignd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0xdb]
+        	psignd	%mm3,%mm3
+
+// CHECK: psignd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psignd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0x45,0x00,0x00,0x00]
+        	psignd	0x45,%xmm5
+
+// CHECK: psignd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0xed,0x7e,0x00,0x00]
+        	psignd	0x7eed,%xmm5
+
+// CHECK: psignd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0xfe,0xca,0xbe,0xba]
+        	psignd	0xbabecafe,%xmm5
+
+// CHECK: psignd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0x78,0x56,0x34,0x12]
+        	psignd	0x12345678,%xmm5
+
+// CHECK: psignd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0xed]
+        	psignd	%xmm5,%xmm5
+
+// CHECK: pabsb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pabsb	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0x45,0x00,0x00,0x00]
+        	pabsb	0x45,%mm3
+
+// CHECK: pabsb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0xed,0x7e,0x00,0x00]
+        	pabsb	0x7eed,%mm3
+
+// CHECK: pabsb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0xfe,0xca,0xbe,0xba]
+        	pabsb	0xbabecafe,%mm3
+
+// CHECK: pabsb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0x78,0x56,0x34,0x12]
+        	pabsb	0x12345678,%mm3
+
+// CHECK: pabsb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0xdb]
+        	pabsb	%mm3,%mm3
+
+// CHECK: pabsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pabsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0x45,0x00,0x00,0x00]
+        	pabsb	0x45,%xmm5
+
+// CHECK: pabsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0xed,0x7e,0x00,0x00]
+        	pabsb	0x7eed,%xmm5
+
+// CHECK: pabsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0xfe,0xca,0xbe,0xba]
+        	pabsb	0xbabecafe,%xmm5
+
+// CHECK: pabsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0x78,0x56,0x34,0x12]
+        	pabsb	0x12345678,%xmm5
+
+// CHECK: pabsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0xed]
+        	pabsb	%xmm5,%xmm5
+
+// CHECK: pabsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pabsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0x45,0x00,0x00,0x00]
+        	pabsw	0x45,%mm3
+
+// CHECK: pabsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0xed,0x7e,0x00,0x00]
+        	pabsw	0x7eed,%mm3
+
+// CHECK: pabsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0xfe,0xca,0xbe,0xba]
+        	pabsw	0xbabecafe,%mm3
+
+// CHECK: pabsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0x78,0x56,0x34,0x12]
+        	pabsw	0x12345678,%mm3
+
+// CHECK: pabsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0xdb]
+        	pabsw	%mm3,%mm3
+
+// CHECK: pabsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pabsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0x45,0x00,0x00,0x00]
+        	pabsw	0x45,%xmm5
+
+// CHECK: pabsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0xed,0x7e,0x00,0x00]
+        	pabsw	0x7eed,%xmm5
+
+// CHECK: pabsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0xfe,0xca,0xbe,0xba]
+        	pabsw	0xbabecafe,%xmm5
+
+// CHECK: pabsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0x78,0x56,0x34,0x12]
+        	pabsw	0x12345678,%xmm5
+
+// CHECK: pabsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0xed]
+        	pabsw	%xmm5,%xmm5
+
+// CHECK: pabsd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pabsd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0x45,0x00,0x00,0x00]
+        	pabsd	0x45,%mm3
+
+// CHECK: pabsd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0xed,0x7e,0x00,0x00]
+        	pabsd	0x7eed,%mm3
+
+// CHECK: pabsd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0xfe,0xca,0xbe,0xba]
+        	pabsd	0xbabecafe,%mm3
+
+// CHECK: pabsd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0x78,0x56,0x34,0x12]
+        	pabsd	0x12345678,%mm3
+
+// CHECK: pabsd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0xdb]
+        	pabsd	%mm3,%mm3
+
+// CHECK: pabsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pabsd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0x45,0x00,0x00,0x00]
+        	pabsd	0x45,%xmm5
+
+// CHECK: pabsd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0xed,0x7e,0x00,0x00]
+        	pabsd	0x7eed,%xmm5
+
+// CHECK: pabsd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0xfe,0xca,0xbe,0xba]
+        	pabsd	0xbabecafe,%xmm5
+
+// CHECK: pabsd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0x78,0x56,0x34,0x12]
+        	pabsd	0x12345678,%xmm5
+
+// CHECK: pabsd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0xed]
+        	pabsd	%xmm5,%xmm5
+
+// CHECK: femms
+// CHECK:  encoding: [0x0f,0x0e]
+        	femms
+
+// CHECK: movntdqa	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movntdqa	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	movntdqa	0x45,%xmm5
+
+// CHECK: movntdqa	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	movntdqa	0x7eed,%xmm5
+
+// CHECK: movntdqa	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntdqa	0xbabecafe,%xmm5
+
+// CHECK: movntdqa	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	movntdqa	0x12345678,%xmm5
+
+// CHECK: packusdw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packusdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packusdw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0x45,0x00,0x00,0x00]
+        	packusdw	0x45,%xmm5
+
+// CHECK: packusdw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0xed,0x7e,0x00,0x00]
+        	packusdw	0x7eed,%xmm5
+
+// CHECK: packusdw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
+        	packusdw	0xbabecafe,%xmm5
+
+// CHECK: packusdw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0x78,0x56,0x34,0x12]
+        	packusdw	0x12345678,%xmm5
+
+// CHECK: packusdw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0xed]
+        	packusdw	%xmm5,%xmm5
+
+// CHECK: pcmpeqq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqq	0x45,%xmm5
+
+// CHECK: pcmpeqq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqq	0x7eed,%xmm5
+
+// CHECK: pcmpeqq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqq	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqq	0x12345678,%xmm5
+
+// CHECK: pcmpeqq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0xed]
+        	pcmpeqq	%xmm5,%xmm5
+
+// CHECK: phminposuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phminposuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0x45,0x00,0x00,0x00]
+        	phminposuw	0x45,%xmm5
+
+// CHECK: phminposuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0xed,0x7e,0x00,0x00]
+        	phminposuw	0x7eed,%xmm5
+
+// CHECK: phminposuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0xfe,0xca,0xbe,0xba]
+        	phminposuw	0xbabecafe,%xmm5
+
+// CHECK: phminposuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0x78,0x56,0x34,0x12]
+        	phminposuw	0x12345678,%xmm5
+
+// CHECK: phminposuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0xed]
+        	phminposuw	%xmm5,%xmm5
+
+// CHECK: pmaxsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxsb	0x45,%xmm5
+
+// CHECK: pmaxsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxsb	0x7eed,%xmm5
+
+// CHECK: pmaxsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxsb	0xbabecafe,%xmm5
+
+// CHECK: pmaxsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxsb	0x12345678,%xmm5
+
+// CHECK: pmaxsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0xed]
+        	pmaxsb	%xmm5,%xmm5
+
+// CHECK: pmaxsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxsd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxsd	0x45,%xmm5
+
+// CHECK: pmaxsd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxsd	0x7eed,%xmm5
+
+// CHECK: pmaxsd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxsd	0xbabecafe,%xmm5
+
+// CHECK: pmaxsd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxsd	0x12345678,%xmm5
+
+// CHECK: pmaxsd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0xed]
+        	pmaxsd	%xmm5,%xmm5
+
+// CHECK: pmaxud	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxud	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxud	0x45,%xmm5
+
+// CHECK: pmaxud	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxud	0x7eed,%xmm5
+
+// CHECK: pmaxud	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxud	0xbabecafe,%xmm5
+
+// CHECK: pmaxud	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxud	0x12345678,%xmm5
+
+// CHECK: pmaxud	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0xed]
+        	pmaxud	%xmm5,%xmm5
+
+// CHECK: pmaxuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxuw	0x45,%xmm5
+
+// CHECK: pmaxuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxuw	0x7eed,%xmm5
+
+// CHECK: pmaxuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxuw	0xbabecafe,%xmm5
+
+// CHECK: pmaxuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxuw	0x12345678,%xmm5
+
+// CHECK: pmaxuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0xed]
+        	pmaxuw	%xmm5,%xmm5
+
+// CHECK: pminsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0x45,0x00,0x00,0x00]
+        	pminsb	0x45,%xmm5
+
+// CHECK: pminsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0xed,0x7e,0x00,0x00]
+        	pminsb	0x7eed,%xmm5
+
+// CHECK: pminsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminsb	0xbabecafe,%xmm5
+
+// CHECK: pminsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0x78,0x56,0x34,0x12]
+        	pminsb	0x12345678,%xmm5
+
+// CHECK: pminsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0xed]
+        	pminsb	%xmm5,%xmm5
+
+// CHECK: pminsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminsd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0x45,0x00,0x00,0x00]
+        	pminsd	0x45,%xmm5
+
+// CHECK: pminsd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0xed,0x7e,0x00,0x00]
+        	pminsd	0x7eed,%xmm5
+
+// CHECK: pminsd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminsd	0xbabecafe,%xmm5
+
+// CHECK: pminsd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0x78,0x56,0x34,0x12]
+        	pminsd	0x12345678,%xmm5
+
+// CHECK: pminsd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0xed]
+        	pminsd	%xmm5,%xmm5
+
+// CHECK: pminud	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminud	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0x45,0x00,0x00,0x00]
+        	pminud	0x45,%xmm5
+
+// CHECK: pminud	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0xed,0x7e,0x00,0x00]
+        	pminud	0x7eed,%xmm5
+
+// CHECK: pminud	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminud	0xbabecafe,%xmm5
+
+// CHECK: pminud	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0x78,0x56,0x34,0x12]
+        	pminud	0x12345678,%xmm5
+
+// CHECK: pminud	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0xed]
+        	pminud	%xmm5,%xmm5
+
+// CHECK: pminuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0x45,0x00,0x00,0x00]
+        	pminuw	0x45,%xmm5
+
+// CHECK: pminuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0xed,0x7e,0x00,0x00]
+        	pminuw	0x7eed,%xmm5
+
+// CHECK: pminuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminuw	0xbabecafe,%xmm5
+
+// CHECK: pminuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0x78,0x56,0x34,0x12]
+        	pminuw	0x12345678,%xmm5
+
+// CHECK: pminuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0xed]
+        	pminuw	%xmm5,%xmm5
+
+// CHECK: pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxbw	0x45,%xmm5
+
+// CHECK: pmovsxbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxbw	0x7eed,%xmm5
+
+// CHECK: pmovsxbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxbw	0xbabecafe,%xmm5
+
+// CHECK: pmovsxbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxbw	0x12345678,%xmm5
+
+// CHECK: pmovsxbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0xed]
+        	pmovsxbw	%xmm5,%xmm5
+
+// CHECK: pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxbd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxbd	0x45,%xmm5
+
+// CHECK: pmovsxbd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxbd	0x7eed,%xmm5
+
+// CHECK: pmovsxbd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxbd	0xbabecafe,%xmm5
+
+// CHECK: pmovsxbd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxbd	0x12345678,%xmm5
+
+// CHECK: pmovsxbd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0xed]
+        	pmovsxbd	%xmm5,%xmm5
+
+// CHECK: pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxbq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxbq	0x45,%xmm5
+
+// CHECK: pmovsxbq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxbq	0x7eed,%xmm5
+
+// CHECK: pmovsxbq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxbq	0xbabecafe,%xmm5
+
+// CHECK: pmovsxbq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxbq	0x12345678,%xmm5
+
+// CHECK: pmovsxbq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0xed]
+        	pmovsxbq	%xmm5,%xmm5
+
+// CHECK: pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxwd	0x45,%xmm5
+
+// CHECK: pmovsxwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxwd	0x7eed,%xmm5
+
+// CHECK: pmovsxwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxwd	0xbabecafe,%xmm5
+
+// CHECK: pmovsxwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxwd	0x12345678,%xmm5
+
+// CHECK: pmovsxwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0xed]
+        	pmovsxwd	%xmm5,%xmm5
+
+// CHECK: pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxwq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxwq	0x45,%xmm5
+
+// CHECK: pmovsxwq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxwq	0x7eed,%xmm5
+
+// CHECK: pmovsxwq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxwq	0xbabecafe,%xmm5
+
+// CHECK: pmovsxwq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxwq	0x12345678,%xmm5
+
+// CHECK: pmovsxwq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0xed]
+        	pmovsxwq	%xmm5,%xmm5
+
+// CHECK: pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxdq	0x45,%xmm5
+
+// CHECK: pmovsxdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxdq	0x7eed,%xmm5
+
+// CHECK: pmovsxdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxdq	0xbabecafe,%xmm5
+
+// CHECK: pmovsxdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxdq	0x12345678,%xmm5
+
+// CHECK: pmovsxdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0xed]
+        	pmovsxdq	%xmm5,%xmm5
+
+// CHECK: pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxbw	0x45,%xmm5
+
+// CHECK: pmovzxbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxbw	0x7eed,%xmm5
+
+// CHECK: pmovzxbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxbw	0xbabecafe,%xmm5
+
+// CHECK: pmovzxbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxbw	0x12345678,%xmm5
+
+// CHECK: pmovzxbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0xed]
+        	pmovzxbw	%xmm5,%xmm5
+
+// CHECK: pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxbd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxbd	0x45,%xmm5
+
+// CHECK: pmovzxbd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxbd	0x7eed,%xmm5
+
+// CHECK: pmovzxbd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxbd	0xbabecafe,%xmm5
+
+// CHECK: pmovzxbd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxbd	0x12345678,%xmm5
+
+// CHECK: pmovzxbd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0xed]
+        	pmovzxbd	%xmm5,%xmm5
+
+// CHECK: pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxbq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxbq	0x45,%xmm5
+
+// CHECK: pmovzxbq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxbq	0x7eed,%xmm5
+
+// CHECK: pmovzxbq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxbq	0xbabecafe,%xmm5
+
+// CHECK: pmovzxbq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxbq	0x12345678,%xmm5
+
+// CHECK: pmovzxbq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0xed]
+        	pmovzxbq	%xmm5,%xmm5
+
+// CHECK: pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxwd	0x45,%xmm5
+
+// CHECK: pmovzxwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxwd	0x7eed,%xmm5
+
+// CHECK: pmovzxwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxwd	0xbabecafe,%xmm5
+
+// CHECK: pmovzxwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxwd	0x12345678,%xmm5
+
+// CHECK: pmovzxwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0xed]
+        	pmovzxwd	%xmm5,%xmm5
+
+// CHECK: pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxwq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxwq	0x45,%xmm5
+
+// CHECK: pmovzxwq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxwq	0x7eed,%xmm5
+
+// CHECK: pmovzxwq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxwq	0xbabecafe,%xmm5
+
+// CHECK: pmovzxwq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxwq	0x12345678,%xmm5
+
+// CHECK: pmovzxwq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0xed]
+        	pmovzxwq	%xmm5,%xmm5
+
+// CHECK: pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxdq	0x45,%xmm5
+
+// CHECK: pmovzxdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxdq	0x7eed,%xmm5
+
+// CHECK: pmovzxdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxdq	0xbabecafe,%xmm5
+
+// CHECK: pmovzxdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxdq	0x12345678,%xmm5
+
+// CHECK: pmovzxdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0xed]
+        	pmovzxdq	%xmm5,%xmm5
+
+// CHECK: pmuldq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmuldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmuldq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0x45,0x00,0x00,0x00]
+        	pmuldq	0x45,%xmm5
+
+// CHECK: pmuldq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0xed,0x7e,0x00,0x00]
+        	pmuldq	0x7eed,%xmm5
+
+// CHECK: pmuldq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmuldq	0xbabecafe,%xmm5
+
+// CHECK: pmuldq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0x78,0x56,0x34,0x12]
+        	pmuldq	0x12345678,%xmm5
+
+// CHECK: pmuldq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0xed]
+        	pmuldq	%xmm5,%xmm5
+
+// CHECK: pmulld	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulld	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0x45,0x00,0x00,0x00]
+        	pmulld	0x45,%xmm5
+
+// CHECK: pmulld	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulld	0x7eed,%xmm5
+
+// CHECK: pmulld	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulld	0xbabecafe,%xmm5
+
+// CHECK: pmulld	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0x78,0x56,0x34,0x12]
+        	pmulld	0x12345678,%xmm5
+
+// CHECK: pmulld	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0xed]
+        	pmulld	%xmm5,%xmm5
+
+// CHECK: ptest 	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: ptest 	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0x45,0x00,0x00,0x00]
+        	ptest	0x45,%xmm5
+
+// CHECK: ptest 	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0xed,0x7e,0x00,0x00]
+        	ptest	0x7eed,%xmm5
+
+// CHECK: ptest 	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0xfe,0xca,0xbe,0xba]
+        	ptest	0xbabecafe,%xmm5
+
+// CHECK: ptest 	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0x78,0x56,0x34,0x12]
+        	ptest	0x12345678,%xmm5
+
+// CHECK: ptest 	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0xed]
+        	ptest	%xmm5,%xmm5
+
+// CHECK: pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtq	0x45,%xmm5
+
+// CHECK: pcmpgtq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtq	0x7eed,%xmm5
+
+// CHECK: pcmpgtq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtq	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtq	0x12345678,%xmm5
+
+// CHECK: pcmpgtq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xed]
+        	pcmpgtq	%xmm5,%xmm5
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+                crc32b %bl, %eax
+
+// CHECK: crc32b 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+                crc32b 4(%ebx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32w %bx, %eax
+
+// CHECK: crc32w 	4(%ebx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32w 4(%ebx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32l %ebx, %eax
+
+// CHECK: crc32l 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32l 4(%ebx), %eax
+
+// CHECK: crc32l 	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0x45,0x00,0x00,0x00]
+                crc32l 0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xed,0x7e,0x00,0x00]
+                crc32l 0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xfe,0xca,0xbe,0xba]
+                crc32l 0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+                crc32l %ecx,%ecx
+
+// CHECK: pcmpistrm	$125, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0x62,0xd1,0x7d]
+                pcmpistrm $125, %xmm1, %xmm2
+
+// CHECK: pcmpistrm	$125, (%edx,%eax,4), %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0x62,0x14,0x82,0x7d]
+                pcmpistrm $125, (%edx,%eax,4), %xmm2
+
+// CHECK: aesimc	%xmm0, %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdb,0xc8]
+                aesimc %xmm0,%xmm1
+
+// CHECK: aesimc	(%eax), %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdb,0x08]
+                aesimc (%eax),%xmm1
+
+// CHECK: aesenc	%xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdc,0xd1]
+                aesenc %xmm1,%xmm2
+
+// CHECK: aesenc	4(%ebx), %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdc,0x53,0x04]
+                aesenc 4(%ebx),%xmm2
+
+// CHECK: aesenclast	%xmm3, %xmm4
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdd,0xe3]
+                aesenclast %xmm3,%xmm4
+
+// CHECK: aesenclast	4(%edx,%edi), %xmm4
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdd,0x64,0x3a,0x04]
+                aesenclast 4(%edx,%edi),%xmm4
+
+// CHECK: aesdec	%xmm5, %xmm6
+// CHECK:  encoding: [0x66,0x0f,0x38,0xde,0xf5]
+                aesdec %xmm5,%xmm6
+
+// CHECK: aesdec	4(%ecx,%eax,8), %xmm6
+// CHECK:  encoding: [0x66,0x0f,0x38,0xde,0x74,0xc1,0x04]
+                aesdec 4(%ecx,%eax,8),%xmm6
+
+// CHECK: aesdeclast	%xmm7, %xmm0
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdf,0xc7]
+                aesdeclast %xmm7,%xmm0
+
+// CHECK: aesdeclast	3405691582, %xmm0
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdf,0x05,0xbe,0xba,0xfe,0xca]
+                aesdeclast 0xcafebabe,%xmm0
+
+// CHECK: aeskeygenassist	$125, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0xd1,0x7d]
+                aeskeygenassist $125, %xmm1, %xmm2
+
+// CHECK: aeskeygenassist	$125, (%edx,%eax,4), %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x7d]
+                aeskeygenassist $125, (%edx,%eax,4), %xmm2
+
+// rdar://8017638
+// CHECK: aeskeygenassist	$128, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0xd1,0x80]
+		aeskeygenassist $128, %xmm1, %xmm2
+
+// rdar://7910087
+// CHECK: bsfw	%bx, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbc,0xdb]
+          bsfw  %bx, %bx
+
+// CHECK: bsfw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xbc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          bsfw  3735928559(%ebx,%ecx,8), %bx
+
+// CHECK: bsrw	%bx, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbd,0xdb]
+          bsrw  %bx, %bx
+
+// CHECK: bsrw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbd,0x1d,0x78,0x56,0x34,0x12]
+          bsrw  305419896, %bx
+
+// radr://7901779
+// CHECK: pushl   $127
+// CHECK:  encoding: [0x6a,0x7f]
+          pushl   $127
+
+// CHECK: pushw   $254
+// CHECK:  encoding: [0x66,0x68,0xfe,0x00]
+          pushw   $254
+
+// CHECK: pushl   $254
+// CHECK:  encoding: [0x68,0xfe,0x00,0x00,0x00]
+          pushl   $254
+
+// radr://7928400
+// CHECK: movq    %mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          movq    %mm3, 3735928559(%ebx,%ecx,8)
+
+// CHECK: movd    %mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          movd    %mm3, 3735928559(%ebx,%ecx,8)
+
+// CHECK: movq    3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          movq    3735928559(%ebx,%ecx,8), %xmm5
+
+// CHECK: movd    3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          movd    3735928559(%ebx,%ecx,8), %xmm5
+
+// radr://7914715
+// CHECK: fcoml   3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x94,0xcb,0xef,0xbe,0xad,0xde]
+          fcoml   3735928559(%ebx,%ecx,8)
+
+// CHECK: fcoms   32493
+// CHECK:  encoding: [0xd8,0x15,0xed,0x7e,0x00,0x00]
+          fcoms   32493
+
+// CHECK: fcompl  3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          fcompl  3735928559(%ebx,%ecx,8)
+
+// CHECK: fcomps  32493
+// CHECK:  encoding: [0xd8,0x1d,0xed,0x7e,0x00,0x00]
+          fcomps  32493
+
+// CHECK: ficoml  3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
+          ficoml  3735928559(%ebx,%ecx,8)
+
+// CHECK: ficoms  32493
+// CHECK:  encoding: [0xde,0x15,0xed,0x7e,0x00,0x00]
+          ficoms  32493
+
+// CHECK: ficompl 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          ficompl 3735928559(%ebx,%ecx,8)
+
+// CHECK: ficomps 32493
+// CHECK:  encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00]
+          ficomps 32493
+
+// CHECK: movl  57005(,%eiz), %ebx
+// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
+          movl  57005(,%eiz), %ebx
+
+// CHECK: movl  48879(,%eiz), %eax
+// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
+          movl  48879(,%eiz), %eax
+
+// CHECK: movl  -4(,%eiz,8), %eax
+// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
+          movl  -4(,%eiz,8), %eax
+
+// CHECK: movl  (%ecx,%eiz), %eax
+// CHECK: encoding: [0x8b,0x04,0x21]
+          movl  (%ecx,%eiz), %eax
+
+// CHECK: movl  (%ecx,%eiz,8), %eax
+// CHECK: encoding: [0x8b,0x04,0xe1]
+          movl  (%ecx,%eiz,8), %eax
+
+// CHECK: addl	$4294967295, %eax       # encoding: [0x83,0xc0,0xff]
+        addl $0xFFFFFFFF, %eax
+
+// CHECK: addw	$65535, %ax       # encoding: [0x66,0x83,0xc0,0xff]
+        addw $0xFFFF, %ax
+
+
+// CHECK: 	movb	$127, 3735928559(%ebx,%ecx,8)
+        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movb	$127, 69
+        	movb	$0x7f,0x45
+
+// CHECK: 	movb	$127, 32493
+        	movb	$0x7f,0x7eed
+
+// CHECK: 	movb	$127, 3133065982
+        	movb	$0x7f,0xbabecafe
+
+// CHECK: 	movb	$127, 305419896
+        	movb	$0x7f,0x12345678
+
+// CHECK: 	movw	$31438, 3735928559(%ebx,%ecx,8)
+        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movw	$31438, 69
+        	movw	$0x7ace,0x45
+
+// CHECK: 	movw	$31438, 32493
+        	movw	$0x7ace,0x7eed
+
+// CHECK: 	movw	$31438, 3133065982
+        	movw	$0x7ace,0xbabecafe
+
+// CHECK: 	movw	$31438, 305419896
+        	movw	$0x7ace,0x12345678
+
+// CHECK: 	movl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$2063514302, 69
+        	movl	$0x7afebabe,0x45
+
+// CHECK: 	movl	$2063514302, 32493
+        	movl	$0x7afebabe,0x7eed
+
+// CHECK: 	movl	$2063514302, 3133065982
+        	movl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	movl	$2063514302, 305419896
+        	movl	$0x7afebabe,0x12345678
+
+// CHECK: 	movl	$324478056, 3735928559(%ebx,%ecx,8)
+        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$324478056, 69
+        	movl	$0x13572468,0x45
+
+// CHECK: 	movl	$324478056, 32493
+        	movl	$0x13572468,0x7eed
+
+// CHECK: 	movl	$324478056, 3133065982
+        	movl	$0x13572468,0xbabecafe
+
+// CHECK: 	movl	$324478056, 305419896
+        	movl	$0x13572468,0x12345678
+
+// CHECK: 	movsbl	3735928559(%ebx,%ecx,8), %ecx
+        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movsbl	69, %ecx
+        	movsbl	0x45,%ecx
+
+// CHECK: 	movsbl	32493, %ecx
+        	movsbl	0x7eed,%ecx
+
+// CHECK: 	movsbl	3133065982, %ecx
+        	movsbl	0xbabecafe,%ecx
+
+// CHECK: 	movsbl	305419896, %ecx
+        	movsbl	0x12345678,%ecx
+
+// CHECK: 	movsbw	3735928559(%ebx,%ecx,8), %bx
+        	movsbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: 	movsbw	69, %bx
+        	movsbw	0x45,%bx
+
+// CHECK: 	movsbw	32493, %bx
+        	movsbw	0x7eed,%bx
+
+// CHECK: 	movsbw	3133065982, %bx
+        	movsbw	0xbabecafe,%bx
+
+// CHECK: 	movsbw	305419896, %bx
+        	movsbw	0x12345678,%bx
+
+// CHECK: 	movswl	3735928559(%ebx,%ecx,8), %ecx
+        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movswl	69, %ecx
+        	movswl	0x45,%ecx
+
+// CHECK: 	movswl	32493, %ecx
+        	movswl	0x7eed,%ecx
+
+// CHECK: 	movswl	3133065982, %ecx
+        	movswl	0xbabecafe,%ecx
+
+// CHECK: 	movswl	305419896, %ecx
+        	movswl	0x12345678,%ecx
+
+// CHECK: 	movzbl	3735928559(%ebx,%ecx,8), %ecx
+        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzbl	69, %ecx
+        	movzbl	0x45,%ecx
+
+// CHECK: 	movzbl	32493, %ecx
+        	movzbl	0x7eed,%ecx
+
+// CHECK: 	movzbl	3133065982, %ecx
+        	movzbl	0xbabecafe,%ecx
+
+// CHECK: 	movzbl	305419896, %ecx
+        	movzbl	0x12345678,%ecx
+
+// CHECK: 	movzbw	3735928559(%ebx,%ecx,8), %bx
+        	movzbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: 	movzbw	69, %bx
+        	movzbw	0x45,%bx
+
+// CHECK: 	movzbw	32493, %bx
+        	movzbw	0x7eed,%bx
+
+// CHECK: 	movzbw	3133065982, %bx
+        	movzbw	0xbabecafe,%bx
+
+// CHECK: 	movzbw	305419896, %bx
+        	movzbw	0x12345678,%bx
+
+// CHECK: 	movzwl	3735928559(%ebx,%ecx,8), %ecx
+        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzwl	69, %ecx
+        	movzwl	0x45,%ecx
+
+// CHECK: 	movzwl	32493, %ecx
+        	movzwl	0x7eed,%ecx
+
+// CHECK: 	movzwl	3133065982, %ecx
+        	movzwl	0xbabecafe,%ecx
+
+// CHECK: 	movzwl	305419896, %ecx
+        	movzwl	0x12345678,%ecx
+
+// CHECK: 	pushw	32493
+        	pushw	0x7eed
+
+// CHECK: 	popw	32493
+        	popw	0x7eed
+
+// CHECK: 	pushf
+        	pushfl
+
+// CHECK: 	pushfl
+        	pushfl
+
+// CHECK: 	popf
+        	popfl
+
+// CHECK: 	popfl
+        	popfl
+
+// CHECK: 	clc
+        	clc
+
+// CHECK: 	cld
+        	cld
+
+// CHECK: 	cli
+        	cli
+
+// CHECK: 	clts
+        	clts
+
+// CHECK: 	cmc
+        	cmc
+
+// CHECK: 	lahf
+        	lahf
+
+// CHECK: 	sahf
+        	sahf
+
+// CHECK: 	stc
+        	stc
+
+// CHECK: 	std
+        	std
+
+// CHECK: 	sti
+        	sti
+
+// CHECK: 	addb	$254, 3735928559(%ebx,%ecx,8)
+        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addb	$254, 69
+        	addb	$0xfe,0x45
+
+// CHECK: 	addb	$254, 32493
+        	addb	$0xfe,0x7eed
+
+// CHECK: 	addb	$254, 3133065982
+        	addb	$0xfe,0xbabecafe
+
+// CHECK: 	addb	$254, 305419896
+        	addb	$0xfe,0x12345678
+
+// CHECK: 	addb	$127, 3735928559(%ebx,%ecx,8)
+        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addb	$127, 69
+        	addb	$0x7f,0x45
+
+// CHECK: 	addb	$127, 32493
+        	addb	$0x7f,0x7eed
+
+// CHECK: 	addb	$127, 3133065982
+        	addb	$0x7f,0xbabecafe
+
+// CHECK: 	addb	$127, 305419896
+        	addb	$0x7f,0x12345678
+
+// CHECK: 	addw	$31438, 3735928559(%ebx,%ecx,8)
+        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addw	$31438, 69
+        	addw	$0x7ace,0x45
+
+// CHECK: 	addw	$31438, 32493
+        	addw	$0x7ace,0x7eed
+
+// CHECK: 	addw	$31438, 3133065982
+        	addw	$0x7ace,0xbabecafe
+
+// CHECK: 	addw	$31438, 305419896
+        	addw	$0x7ace,0x12345678
+
+// CHECK: 	addl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$2063514302, 69
+        	addl	$0x7afebabe,0x45
+
+// CHECK: 	addl	$2063514302, 32493
+        	addl	$0x7afebabe,0x7eed
+
+// CHECK: 	addl	$2063514302, 3133065982
+        	addl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	addl	$2063514302, 305419896
+        	addl	$0x7afebabe,0x12345678
+
+// CHECK: 	addl	$324478056, 3735928559(%ebx,%ecx,8)
+        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$324478056, 69
+        	addl	$0x13572468,0x45
+
+// CHECK: 	addl	$324478056, 32493
+        	addl	$0x13572468,0x7eed
+
+// CHECK: 	addl	$324478056, 3133065982
+        	addl	$0x13572468,0xbabecafe
+
+// CHECK: 	addl	$324478056, 305419896
+        	addl	$0x13572468,0x12345678
+
+// CHECK: 	incl	3735928559(%ebx,%ecx,8)
+        	incl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	incw	32493
+        	incw	0x7eed
+
+// CHECK: 	incl	3133065982
+        	incl	0xbabecafe
+
+// CHECK: 	incl	305419896
+        	incl	0x12345678
+
+// CHECK: 	subb	$254, 3735928559(%ebx,%ecx,8)
+        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$254, 69
+        	subb	$0xfe,0x45
+
+// CHECK: 	subb	$254, 32493
+        	subb	$0xfe,0x7eed
+
+// CHECK: 	subb	$254, 3133065982
+        	subb	$0xfe,0xbabecafe
+
+// CHECK: 	subb	$254, 305419896
+        	subb	$0xfe,0x12345678
+
+// CHECK: 	subb	$127, 3735928559(%ebx,%ecx,8)
+        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$127, 69
+        	subb	$0x7f,0x45
+
+// CHECK: 	subb	$127, 32493
+        	subb	$0x7f,0x7eed
+
+// CHECK: 	subb	$127, 3133065982
+        	subb	$0x7f,0xbabecafe
+
+// CHECK: 	subb	$127, 305419896
+        	subb	$0x7f,0x12345678
+
+// CHECK: 	subw	$31438, 3735928559(%ebx,%ecx,8)
+        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subw	$31438, 69
+        	subw	$0x7ace,0x45
+
+// CHECK: 	subw	$31438, 32493
+        	subw	$0x7ace,0x7eed
+
+// CHECK: 	subw	$31438, 3133065982
+        	subw	$0x7ace,0xbabecafe
+
+// CHECK: 	subw	$31438, 305419896
+        	subw	$0x7ace,0x12345678
+
+// CHECK: 	subl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$2063514302, 69
+        	subl	$0x7afebabe,0x45
+
+// CHECK: 	subl	$2063514302, 32493
+        	subl	$0x7afebabe,0x7eed
+
+// CHECK: 	subl	$2063514302, 3133065982
+        	subl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	subl	$2063514302, 305419896
+        	subl	$0x7afebabe,0x12345678
+
+// CHECK: 	subl	$324478056, 3735928559(%ebx,%ecx,8)
+        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$324478056, 69
+        	subl	$0x13572468,0x45
+
+// CHECK: 	subl	$324478056, 32493
+        	subl	$0x13572468,0x7eed
+
+// CHECK: 	subl	$324478056, 3133065982
+        	subl	$0x13572468,0xbabecafe
+
+// CHECK: 	subl	$324478056, 305419896
+        	subl	$0x13572468,0x12345678
+
+// CHECK: 	decl	3735928559(%ebx,%ecx,8)
+        	decl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	decw	32493
+        	decw	0x7eed
+
+// CHECK: 	decl	3133065982
+        	decl	0xbabecafe
+
+// CHECK: 	decl	305419896
+        	decl	0x12345678
+
+// CHECK: 	sbbb	$254, 3735928559(%ebx,%ecx,8)
+        	sbbb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbb	$254, 69
+        	sbbb	$0xfe,0x45
+
+// CHECK: 	sbbb	$254, 32493
+        	sbbb	$0xfe,0x7eed
+
+// CHECK: 	sbbb	$254, 3133065982
+        	sbbb	$0xfe,0xbabecafe
+
+// CHECK: 	sbbb	$254, 305419896
+        	sbbb	$0xfe,0x12345678
+
+// CHECK: 	sbbb	$127, 3735928559(%ebx,%ecx,8)
+        	sbbb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbb	$127, 69
+        	sbbb	$0x7f,0x45
+
+// CHECK: 	sbbb	$127, 32493
+        	sbbb	$0x7f,0x7eed
+
+// CHECK: 	sbbb	$127, 3133065982
+        	sbbb	$0x7f,0xbabecafe
+
+// CHECK: 	sbbb	$127, 305419896
+        	sbbb	$0x7f,0x12345678
+
+// CHECK: 	sbbw	$31438, 3735928559(%ebx,%ecx,8)
+        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbw	$31438, 69
+        	sbbw	$0x7ace,0x45
+
+// CHECK: 	sbbw	$31438, 32493
+        	sbbw	$0x7ace,0x7eed
+
+// CHECK: 	sbbw	$31438, 3133065982
+        	sbbw	$0x7ace,0xbabecafe
+
+// CHECK: 	sbbw	$31438, 305419896
+        	sbbw	$0x7ace,0x12345678
+
+// CHECK: 	sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$2063514302, 69
+        	sbbl	$0x7afebabe,0x45
+
+// CHECK: 	sbbl	$2063514302, 32493
+        	sbbl	$0x7afebabe,0x7eed
+
+// CHECK: 	sbbl	$2063514302, 3133065982
+        	sbbl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	sbbl	$2063514302, 305419896
+        	sbbl	$0x7afebabe,0x12345678
+
+// CHECK: 	sbbl	$324478056, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$324478056, 69
+        	sbbl	$0x13572468,0x45
+
+// CHECK: 	sbbl	$324478056, 32493
+        	sbbl	$0x13572468,0x7eed
+
+// CHECK: 	sbbl	$324478056, 3133065982
+        	sbbl	$0x13572468,0xbabecafe
+
+// CHECK: 	sbbl	$324478056, 305419896
+        	sbbl	$0x13572468,0x12345678
+
+// CHECK: 	cmpb	$254, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$254, 69
+        	cmpb	$0xfe,0x45
+
+// CHECK: 	cmpb	$254, 32493
+        	cmpb	$0xfe,0x7eed
+
+// CHECK: 	cmpb	$254, 3133065982
+        	cmpb	$0xfe,0xbabecafe
+
+// CHECK: 	cmpb	$254, 305419896
+        	cmpb	$0xfe,0x12345678
+
+// CHECK: 	cmpb	$127, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$127, 69
+        	cmpb	$0x7f,0x45
+
+// CHECK: 	cmpb	$127, 32493
+        	cmpb	$0x7f,0x7eed
+
+// CHECK: 	cmpb	$127, 3133065982
+        	cmpb	$0x7f,0xbabecafe
+
+// CHECK: 	cmpb	$127, 305419896
+        	cmpb	$0x7f,0x12345678
+
+// CHECK: 	cmpw	$31438, 3735928559(%ebx,%ecx,8)
+        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpw	$31438, 69
+        	cmpw	$0x7ace,0x45
+
+// CHECK: 	cmpw	$31438, 32493
+        	cmpw	$0x7ace,0x7eed
+
+// CHECK: 	cmpw	$31438, 3133065982
+        	cmpw	$0x7ace,0xbabecafe
+
+// CHECK: 	cmpw	$31438, 305419896
+        	cmpw	$0x7ace,0x12345678
+
+// CHECK: 	cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$2063514302, 69
+        	cmpl	$0x7afebabe,0x45
+
+// CHECK: 	cmpl	$2063514302, 32493
+        	cmpl	$0x7afebabe,0x7eed
+
+// CHECK: 	cmpl	$2063514302, 3133065982
+        	cmpl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	cmpl	$2063514302, 305419896
+        	cmpl	$0x7afebabe,0x12345678
+
+// CHECK: 	cmpl	$324478056, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$324478056, 69
+        	cmpl	$0x13572468,0x45
+
+// CHECK: 	cmpl	$324478056, 32493
+        	cmpl	$0x13572468,0x7eed
+
+// CHECK: 	cmpl	$324478056, 3133065982
+        	cmpl	$0x13572468,0xbabecafe
+
+// CHECK: 	cmpl	$324478056, 305419896
+        	cmpl	$0x13572468,0x12345678
+
+// CHECK: 	testb	$127, 3735928559(%ebx,%ecx,8)
+        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testb	$127, 69
+        	testb	$0x7f,0x45
+
+// CHECK: 	testb	$127, 32493
+        	testb	$0x7f,0x7eed
+
+// CHECK: 	testb	$127, 3133065982
+        	testb	$0x7f,0xbabecafe
+
+// CHECK: 	testb	$127, 305419896
+        	testb	$0x7f,0x12345678
+
+// CHECK: 	testw	$31438, 3735928559(%ebx,%ecx,8)
+        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testw	$31438, 69
+        	testw	$0x7ace,0x45
+
+// CHECK: 	testw	$31438, 32493
+        	testw	$0x7ace,0x7eed
+
+// CHECK: 	testw	$31438, 3133065982
+        	testw	$0x7ace,0xbabecafe
+
+// CHECK: 	testw	$31438, 305419896
+        	testw	$0x7ace,0x12345678
+
+// CHECK: 	testl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$2063514302, 69
+        	testl	$0x7afebabe,0x45
+
+// CHECK: 	testl	$2063514302, 32493
+        	testl	$0x7afebabe,0x7eed
+
+// CHECK: 	testl	$2063514302, 3133065982
+        	testl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	testl	$2063514302, 305419896
+        	testl	$0x7afebabe,0x12345678
+
+// CHECK: 	testl	$324478056, 3735928559(%ebx,%ecx,8)
+        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$324478056, 69
+        	testl	$0x13572468,0x45
+
+// CHECK: 	testl	$324478056, 32493
+        	testl	$0x13572468,0x7eed
+
+// CHECK: 	testl	$324478056, 3133065982
+        	testl	$0x13572468,0xbabecafe
+
+// CHECK: 	testl	$324478056, 305419896
+        	testl	$0x13572468,0x12345678
+
+// CHECK: 	andb	$254, 3735928559(%ebx,%ecx,8)
+        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$254, 69
+        	andb	$0xfe,0x45
+
+// CHECK: 	andb	$254, 32493
+        	andb	$0xfe,0x7eed
+
+// CHECK: 	andb	$254, 3133065982
+        	andb	$0xfe,0xbabecafe
+
+// CHECK: 	andb	$254, 305419896
+        	andb	$0xfe,0x12345678
+
+// CHECK: 	andb	$127, 3735928559(%ebx,%ecx,8)
+        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$127, 69
+        	andb	$0x7f,0x45
+
+// CHECK: 	andb	$127, 32493
+        	andb	$0x7f,0x7eed
+
+// CHECK: 	andb	$127, 3133065982
+        	andb	$0x7f,0xbabecafe
+
+// CHECK: 	andb	$127, 305419896
+        	andb	$0x7f,0x12345678
+
+// CHECK: 	andw	$31438, 3735928559(%ebx,%ecx,8)
+        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andw	$31438, 69
+        	andw	$0x7ace,0x45
+
+// CHECK: 	andw	$31438, 32493
+        	andw	$0x7ace,0x7eed
+
+// CHECK: 	andw	$31438, 3133065982
+        	andw	$0x7ace,0xbabecafe
+
+// CHECK: 	andw	$31438, 305419896
+        	andw	$0x7ace,0x12345678
+
+// CHECK: 	andl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$2063514302, 69
+        	andl	$0x7afebabe,0x45
+
+// CHECK: 	andl	$2063514302, 32493
+        	andl	$0x7afebabe,0x7eed
+
+// CHECK: 	andl	$2063514302, 3133065982
+        	andl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	andl	$2063514302, 305419896
+        	andl	$0x7afebabe,0x12345678
+
+// CHECK: 	andl	$324478056, 3735928559(%ebx,%ecx,8)
+        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$324478056, 69
+        	andl	$0x13572468,0x45
+
+// CHECK: 	andl	$324478056, 32493
+        	andl	$0x13572468,0x7eed
+
+// CHECK: 	andl	$324478056, 3133065982
+        	andl	$0x13572468,0xbabecafe
+
+// CHECK: 	andl	$324478056, 305419896
+        	andl	$0x13572468,0x12345678
+
+// CHECK: 	orb	$254, 3735928559(%ebx,%ecx,8)
+        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$254, 69
+        	orb	$0xfe,0x45
+
+// CHECK: 	orb	$254, 32493
+        	orb	$0xfe,0x7eed
+
+// CHECK: 	orb	$254, 3133065982
+        	orb	$0xfe,0xbabecafe
+
+// CHECK: 	orb	$254, 305419896
+        	orb	$0xfe,0x12345678
+
+// CHECK: 	orb	$127, 3735928559(%ebx,%ecx,8)
+        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$127, 69
+        	orb	$0x7f,0x45
+
+// CHECK: 	orb	$127, 32493
+        	orb	$0x7f,0x7eed
+
+// CHECK: 	orb	$127, 3133065982
+        	orb	$0x7f,0xbabecafe
+
+// CHECK: 	orb	$127, 305419896
+        	orb	$0x7f,0x12345678
+
+// CHECK: 	orw	$31438, 3735928559(%ebx,%ecx,8)
+        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orw	$31438, 69
+        	orw	$0x7ace,0x45
+
+// CHECK: 	orw	$31438, 32493
+        	orw	$0x7ace,0x7eed
+
+// CHECK: 	orw	$31438, 3133065982
+        	orw	$0x7ace,0xbabecafe
+
+// CHECK: 	orw	$31438, 305419896
+        	orw	$0x7ace,0x12345678
+
+// CHECK: 	orl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$2063514302, 69
+        	orl	$0x7afebabe,0x45
+
+// CHECK: 	orl	$2063514302, 32493
+        	orl	$0x7afebabe,0x7eed
+
+// CHECK: 	orl	$2063514302, 3133065982
+        	orl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	orl	$2063514302, 305419896
+        	orl	$0x7afebabe,0x12345678
+
+// CHECK: 	orl	$324478056, 3735928559(%ebx,%ecx,8)
+        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$324478056, 69
+        	orl	$0x13572468,0x45
+
+// CHECK: 	orl	$324478056, 32493
+        	orl	$0x13572468,0x7eed
+
+// CHECK: 	orl	$324478056, 3133065982
+        	orl	$0x13572468,0xbabecafe
+
+// CHECK: 	orl	$324478056, 305419896
+        	orl	$0x13572468,0x12345678
+
+// CHECK: 	xorb	$254, 3735928559(%ebx,%ecx,8)
+        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$254, 69
+        	xorb	$0xfe,0x45
+
+// CHECK: 	xorb	$254, 32493
+        	xorb	$0xfe,0x7eed
+
+// CHECK: 	xorb	$254, 3133065982
+        	xorb	$0xfe,0xbabecafe
+
+// CHECK: 	xorb	$254, 305419896
+        	xorb	$0xfe,0x12345678
+
+// CHECK: 	xorb	$127, 3735928559(%ebx,%ecx,8)
+        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$127, 69
+        	xorb	$0x7f,0x45
+
+// CHECK: 	xorb	$127, 32493
+        	xorb	$0x7f,0x7eed
+
+// CHECK: 	xorb	$127, 3133065982
+        	xorb	$0x7f,0xbabecafe
+
+// CHECK: 	xorb	$127, 305419896
+        	xorb	$0x7f,0x12345678
+
+// CHECK: 	xorw	$31438, 3735928559(%ebx,%ecx,8)
+        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorw	$31438, 69
+        	xorw	$0x7ace,0x45
+
+// CHECK: 	xorw	$31438, 32493
+        	xorw	$0x7ace,0x7eed
+
+// CHECK: 	xorw	$31438, 3133065982
+        	xorw	$0x7ace,0xbabecafe
+
+// CHECK: 	xorw	$31438, 305419896
+        	xorw	$0x7ace,0x12345678
+
+// CHECK: 	xorl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$2063514302, 69
+        	xorl	$0x7afebabe,0x45
+
+// CHECK: 	xorl	$2063514302, 32493
+        	xorl	$0x7afebabe,0x7eed
+
+// CHECK: 	xorl	$2063514302, 3133065982
+        	xorl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	xorl	$2063514302, 305419896
+        	xorl	$0x7afebabe,0x12345678
+
+// CHECK: 	xorl	$324478056, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$324478056, 69
+        	xorl	$0x13572468,0x45
+
+// CHECK: 	xorl	$324478056, 32493
+        	xorl	$0x13572468,0x7eed
+
+// CHECK: 	xorl	$324478056, 3133065982
+        	xorl	$0x13572468,0xbabecafe
+
+// CHECK: 	xorl	$324478056, 305419896
+        	xorl	$0x13572468,0x12345678
+
+// CHECK: 	adcb	$254, 3735928559(%ebx,%ecx,8)
+        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$254, 69
+        	adcb	$0xfe,0x45
+
+// CHECK: 	adcb	$254, 32493
+        	adcb	$0xfe,0x7eed
+
+// CHECK: 	adcb	$254, 3133065982
+        	adcb	$0xfe,0xbabecafe
+
+// CHECK: 	adcb	$254, 305419896
+        	adcb	$0xfe,0x12345678
+
+// CHECK: 	adcb	$127, 3735928559(%ebx,%ecx,8)
+        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$127, 69
+        	adcb	$0x7f,0x45
+
+// CHECK: 	adcb	$127, 32493
+        	adcb	$0x7f,0x7eed
+
+// CHECK: 	adcb	$127, 3133065982
+        	adcb	$0x7f,0xbabecafe
+
+// CHECK: 	adcb	$127, 305419896
+        	adcb	$0x7f,0x12345678
+
+// CHECK: 	adcw	$31438, 3735928559(%ebx,%ecx,8)
+        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcw	$31438, 69
+        	adcw	$0x7ace,0x45
+
+// CHECK: 	adcw	$31438, 32493
+        	adcw	$0x7ace,0x7eed
+
+// CHECK: 	adcw	$31438, 3133065982
+        	adcw	$0x7ace,0xbabecafe
+
+// CHECK: 	adcw	$31438, 305419896
+        	adcw	$0x7ace,0x12345678
+
+// CHECK: 	adcl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$2063514302, 69
+        	adcl	$0x7afebabe,0x45
+
+// CHECK: 	adcl	$2063514302, 32493
+        	adcl	$0x7afebabe,0x7eed
+
+// CHECK: 	adcl	$2063514302, 3133065982
+        	adcl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	adcl	$2063514302, 305419896
+        	adcl	$0x7afebabe,0x12345678
+
+// CHECK: 	adcl	$324478056, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$324478056, 69
+        	adcl	$0x13572468,0x45
+
+// CHECK: 	adcl	$324478056, 32493
+        	adcl	$0x13572468,0x7eed
+
+// CHECK: 	adcl	$324478056, 3133065982
+        	adcl	$0x13572468,0xbabecafe
+
+// CHECK: 	adcl	$324478056, 305419896
+        	adcl	$0x13572468,0x12345678
+
+// CHECK: 	negl	3735928559(%ebx,%ecx,8)
+        	negl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	negw	32493
+        	negw	0x7eed
+
+// CHECK: 	negl	3133065982
+        	negl	0xbabecafe
+
+// CHECK: 	negl	305419896
+        	negl	0x12345678
+
+// CHECK: 	notl	3735928559(%ebx,%ecx,8)
+        	notl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	notw	32493
+        	notw	0x7eed
+
+// CHECK: 	notl	3133065982
+        	notl	0xbabecafe
+
+// CHECK: 	notl	305419896
+        	notl	0x12345678
+
+// CHECK: 	cbtw
+        	cbtw
+
+// CHECK: 	cwtl
+        	cwtl
+
+// CHECK: 	cwtd
+        	cwtd
+
+// CHECK: 	cltd
+        	cltd
+
+// CHECK: 	mull	3735928559(%ebx,%ecx,8)
+        	mull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	mulw	32493
+        	mulw	0x7eed
+
+// CHECK: 	mull	3133065982
+        	mull	0xbabecafe
+
+// CHECK: 	mull	305419896
+        	mull	0x12345678
+
+// CHECK: 	imull	3735928559(%ebx,%ecx,8)
+        	imull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	imulw	32493
+        	imulw	0x7eed
+
+// CHECK: 	imull	3133065982
+        	imull	0xbabecafe
+
+// CHECK: 	imull	305419896
+        	imull	0x12345678
+
+// CHECK: 	divl	3735928559(%ebx,%ecx,8)
+        	divl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	divw	32493
+        	divw	0x7eed
+
+// CHECK: 	divl	3133065982
+        	divl	0xbabecafe
+
+// CHECK: 	divl	305419896
+        	divl	0x12345678
+
+// CHECK: 	idivl	3735928559(%ebx,%ecx,8)
+        	idivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	idivw	32493
+        	idivw	0x7eed
+
+// CHECK: 	idivl	3133065982
+        	idivl	0xbabecafe
+
+// CHECK: 	idivl	305419896
+        	idivl	0x12345678
+
+// CHECK: 	roll	$0, 3735928559(%ebx,%ecx,8)
+        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	roll	$0, 69
+        	roll	$0,0x45
+
+// CHECK: 	roll	$0, 32493
+        	roll	$0,0x7eed
+
+// CHECK: 	roll	$0, 3133065982
+        	roll	$0,0xbabecafe
+
+// CHECK: 	roll	$0, 305419896
+        	roll	$0,0x12345678
+
+// CHECK: 	rolb	$127, 3735928559(%ebx,%ecx,8)
+        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rolb	$127, 69
+        	rolb	$0x7f,0x45
+
+// CHECK: 	rolb	$127, 32493
+        	rolb	$0x7f,0x7eed
+
+// CHECK: 	rolb	$127, 3133065982
+        	rolb	$0x7f,0xbabecafe
+
+// CHECK: 	rolb	$127, 305419896
+        	rolb	$0x7f,0x12345678
+
+// CHECK: 	roll	3735928559(%ebx,%ecx,8)
+        	roll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rolw	32493
+        	rolw	0x7eed
+
+// CHECK: 	roll	3133065982
+        	roll	0xbabecafe
+
+// CHECK: 	roll	305419896
+        	roll	0x12345678
+
+// CHECK: 	rorl	$0, 3735928559(%ebx,%ecx,8)
+        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorl	$0, 69
+        	rorl	$0,0x45
+
+// CHECK: 	rorl	$0, 32493
+        	rorl	$0,0x7eed
+
+// CHECK: 	rorl	$0, 3133065982
+        	rorl	$0,0xbabecafe
+
+// CHECK: 	rorl	$0, 305419896
+        	rorl	$0,0x12345678
+
+// CHECK: 	rorb	$127, 3735928559(%ebx,%ecx,8)
+        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorb	$127, 69
+        	rorb	$0x7f,0x45
+
+// CHECK: 	rorb	$127, 32493
+        	rorb	$0x7f,0x7eed
+
+// CHECK: 	rorb	$127, 3133065982
+        	rorb	$0x7f,0xbabecafe
+
+// CHECK: 	rorb	$127, 305419896
+        	rorb	$0x7f,0x12345678
+
+// CHECK: 	rorl	3735928559(%ebx,%ecx,8)
+        	rorl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorw	32493
+        	rorw	0x7eed
+
+// CHECK: 	rorl	3133065982
+        	rorl	0xbabecafe
+
+// CHECK: 	rorl	305419896
+        	rorl	0x12345678
+
+// CHECK: 	rcll	$0, 3735928559(%ebx,%ecx,8)
+        	rcll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rcll	$0, 69
+        	rcll	$0,0x45
+
+// CHECK: 	rcll	$0, 32493
+        	rcll	$0,0x7eed
+
+// CHECK: 	rcll	$0, 3133065982
+        	rcll	$0,0xbabecafe
+
+// CHECK: 	rcll	$0, 305419896
+        	rcll	$0,0x12345678
+
+// CHECK: 	rclb	$127, 3735928559(%ebx,%ecx,8)
+        	rclb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rclb	$127, 69
+        	rclb	$0x7f,0x45
+
+// CHECK: 	rclb	$127, 32493
+        	rclb	$0x7f,0x7eed
+
+// CHECK: 	rclb	$127, 3133065982
+        	rclb	$0x7f,0xbabecafe
+
+// CHECK: 	rclb	$127, 305419896
+        	rclb	$0x7f,0x12345678
+
+// CHECK: 	rcrl	$0, 3735928559(%ebx,%ecx,8)
+        	rcrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rcrl	$0, 69
+        	rcrl	$0,0x45
+
+// CHECK: 	rcrl	$0, 32493
+        	rcrl	$0,0x7eed
+
+// CHECK: 	rcrl	$0, 3133065982
+        	rcrl	$0,0xbabecafe
+
+// CHECK: 	rcrl	$0, 305419896
+        	rcrl	$0,0x12345678
+
+// CHECK: 	rcrb	$127, 3735928559(%ebx,%ecx,8)
+        	rcrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rcrb	$127, 69
+        	rcrb	$0x7f,0x45
+
+// CHECK: 	rcrb	$127, 32493
+        	rcrb	$0x7f,0x7eed
+
+// CHECK: 	rcrb	$127, 3133065982
+        	rcrb	$0x7f,0xbabecafe
+
+// CHECK: 	rcrb	$127, 305419896
+        	rcrb	$0x7f,0x12345678
+
+// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
+        	sall	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	$0, 69
+        	sall	$0,0x45
+
+// CHECK: 	shll	$0, 32493
+        	sall	$0,0x7eed
+
+// CHECK: 	shll	$0, 3133065982
+        	sall	$0,0xbabecafe
+
+// CHECK: 	shll	$0, 305419896
+        	sall	$0,0x12345678
+
+// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
+        	salb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlb	$127, 69
+        	salb	$0x7f,0x45
+
+// CHECK: 	shlb	$127, 32493
+        	salb	$0x7f,0x7eed
+
+// CHECK: 	shlb	$127, 3133065982
+        	salb	$0x7f,0xbabecafe
+
+// CHECK: 	shlb	$127, 305419896
+        	salb	$0x7f,0x12345678
+
+// CHECK: 	shll	3735928559(%ebx,%ecx,8)
+        	sall	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlw	32493
+        	salw	0x7eed
+
+// CHECK: 	shll	3133065982
+        	sall	0xbabecafe
+
+// CHECK: 	shll	305419896
+        	sall	0x12345678
+
+// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
+        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	$0, 69
+        	shll	$0,0x45
+
+// CHECK: 	shll	$0, 32493
+        	shll	$0,0x7eed
+
+// CHECK: 	shll	$0, 3133065982
+        	shll	$0,0xbabecafe
+
+// CHECK: 	shll	$0, 305419896
+        	shll	$0,0x12345678
+
+// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
+        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlb	$127, 69
+        	shlb	$0x7f,0x45
+
+// CHECK: 	shlb	$127, 32493
+        	shlb	$0x7f,0x7eed
+
+// CHECK: 	shlb	$127, 3133065982
+        	shlb	$0x7f,0xbabecafe
+
+// CHECK: 	shlb	$127, 305419896
+        	shlb	$0x7f,0x12345678
+
+// CHECK: 	shll	3735928559(%ebx,%ecx,8)
+        	shll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlw	32493
+        	shlw	0x7eed
+
+// CHECK: 	shll	3133065982
+        	shll	0xbabecafe
+
+// CHECK: 	shll	305419896
+        	shll	0x12345678
+
+// CHECK: 	shrl	$0, 3735928559(%ebx,%ecx,8)
+        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrl	$0, 69
+        	shrl	$0,0x45
+
+// CHECK: 	shrl	$0, 32493
+        	shrl	$0,0x7eed
+
+// CHECK: 	shrl	$0, 3133065982
+        	shrl	$0,0xbabecafe
+
+// CHECK: 	shrl	$0, 305419896
+        	shrl	$0,0x12345678
+
+// CHECK: 	shrb	$127, 3735928559(%ebx,%ecx,8)
+        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrb	$127, 69
+        	shrb	$0x7f,0x45
+
+// CHECK: 	shrb	$127, 32493
+        	shrb	$0x7f,0x7eed
+
+// CHECK: 	shrb	$127, 3133065982
+        	shrb	$0x7f,0xbabecafe
+
+// CHECK: 	shrb	$127, 305419896
+        	shrb	$0x7f,0x12345678
+
+// CHECK: 	shrl	3735928559(%ebx,%ecx,8)
+        	shrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrw	32493
+        	shrw	0x7eed
+
+// CHECK: 	shrl	3133065982
+        	shrl	0xbabecafe
+
+// CHECK: 	shrl	305419896
+        	shrl	0x12345678
+
+// CHECK: 	sarl	$0, 3735928559(%ebx,%ecx,8)
+        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarl	$0, 69
+        	sarl	$0,0x45
+
+// CHECK: 	sarl	$0, 32493
+        	sarl	$0,0x7eed
+
+// CHECK: 	sarl	$0, 3133065982
+        	sarl	$0,0xbabecafe
+
+// CHECK: 	sarl	$0, 305419896
+        	sarl	$0,0x12345678
+
+// CHECK: 	sarb	$127, 3735928559(%ebx,%ecx,8)
+        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarb	$127, 69
+        	sarb	$0x7f,0x45
+
+// CHECK: 	sarb	$127, 32493
+        	sarb	$0x7f,0x7eed
+
+// CHECK: 	sarb	$127, 3133065982
+        	sarb	$0x7f,0xbabecafe
+
+// CHECK: 	sarb	$127, 305419896
+        	sarb	$0x7f,0x12345678
+
+// CHECK: 	sarl	3735928559(%ebx,%ecx,8)
+        	sarl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarw	32493
+        	sarw	0x7eed
+
+// CHECK: 	sarl	3133065982
+        	sarl	0xbabecafe
+
+// CHECK: 	sarl	305419896
+        	sarl	0x12345678
+
+// CHECK: 	calll	3133065982
+        	calll	0xbabecafe
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	calll	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	3133065982
+        	calll	0xbabecafe
+
+// CHECK: 	calll	305419896
+        	calll	0x12345678
+
+// CHECK: 	calll	*3135175374
+        	call	*0xbadeface
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	32493
+        	call	0x7eed
+
+// CHECK: 	calll	3133065982
+        	call	0xbabecafe
+
+// CHECK: 	calll	305419896
+        	call	0x12345678
+
+// CHECK: 	calll	*3135175374
+        	call	*0xbadeface
+
+// CHECK: 	lcallw	*32493
+        	lcallw	*0x7eed
+
+// CHECK: 	jmp	32493
+        	jmp	0x7eed
+
+// CHECK: 	jmp	3133065982
+        	jmp	0xbabecafe
+
+// CHECK: 	jmp	305419896
+        	jmp	0x12345678
+
+// CHECK: 	jmp	-77129852792157442
+        	jmp	0xfeedfacebabecafe
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmp	32493
+        	jmp	0x7eed
+
+// CHECK: 	jmp	3133065982
+        	jmp	0xbabecafe
+
+// CHECK: 	jmp	305419896
+        	jmp	0x12345678
+
+// CHECK: 	jmpl	*3135175374
+        	jmp	*0xbadeface
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmp	32493
+        	jmp	0x7eed
+
+// CHECK: 	jmp	3133065982
+        	jmp	0xbabecafe
+
+// CHECK: 	jmp	305419896
+        	jmp	0x12345678
+
+// CHECK: 	jmpl	*3135175374
+        	jmp	*0xbadeface
+
+// CHECK: 	ljmpl	*3735928559(%ebx,%ecx,8)
+        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ljmpw	*32493
+        	ljmpw	*0x7eed
+
+// CHECK: 	ljmpl	*3133065982
+        	ljmpl	*0xbabecafe
+
+// CHECK: 	ljmpl	*305419896
+        	ljmpl	*0x12345678
+
+// CHECK: 	ret
+        	ret
+
+// CHECK: 	lret
+        	lret
+
+// CHECK: 	enter	$31438, $127
+        	enter	$0x7ace,$0x7f
+
+// CHECK: 	leave
+        	leave
+
+// CHECK: 	jo	32493
+        	jo	0x7eed
+
+// CHECK: 	jo	3133065982
+        	jo	0xbabecafe
+
+// CHECK: 	jo	305419896
+        	jo	0x12345678
+
+// CHECK: 	jo	-77129852792157442
+        	jo	0xfeedfacebabecafe
+
+// CHECK: 	jno	32493
+        	jno	0x7eed
+
+// CHECK: 	jno	3133065982
+        	jno	0xbabecafe
+
+// CHECK: 	jno	305419896
+        	jno	0x12345678
+
+// CHECK: 	jno	-77129852792157442
+        	jno	0xfeedfacebabecafe
+
+// CHECK: 	jb	32493
+        	jb	0x7eed
+
+// CHECK: 	jb	3133065982
+        	jb	0xbabecafe
+
+// CHECK: 	jb	305419896
+        	jb	0x12345678
+
+// CHECK: 	jb	-77129852792157442
+        	jb	0xfeedfacebabecafe
+
+// CHECK: 	jae	32493
+        	jae	0x7eed
+
+// CHECK: 	jae	3133065982
+        	jae	0xbabecafe
+
+// CHECK: 	jae	305419896
+        	jae	0x12345678
+
+// CHECK: 	jae	-77129852792157442
+        	jae	0xfeedfacebabecafe
+
+// CHECK: 	je	32493
+        	je	0x7eed
+
+// CHECK: 	je	3133065982
+        	je	0xbabecafe
+
+// CHECK: 	je	305419896
+        	je	0x12345678
+
+// CHECK: 	je	-77129852792157442
+        	je	0xfeedfacebabecafe
+
+// CHECK: 	jne	32493
+        	jne	0x7eed
+
+// CHECK: 	jne	3133065982
+        	jne	0xbabecafe
+
+// CHECK: 	jne	305419896
+        	jne	0x12345678
+
+// CHECK: 	jne	-77129852792157442
+        	jne	0xfeedfacebabecafe
+
+// CHECK: 	jbe	32493
+        	jbe	0x7eed
+
+// CHECK: 	jbe	3133065982
+        	jbe	0xbabecafe
+
+// CHECK: 	jbe	305419896
+        	jbe	0x12345678
+
+// CHECK: 	jbe	-77129852792157442
+        	jbe	0xfeedfacebabecafe
+
+// CHECK: 	ja	32493
+        	ja	0x7eed
+
+// CHECK: 	ja	3133065982
+        	ja	0xbabecafe
+
+// CHECK: 	ja	305419896
+        	ja	0x12345678
+
+// CHECK: 	ja	-77129852792157442
+        	ja	0xfeedfacebabecafe
+
+// CHECK: 	js	32493
+        	js	0x7eed
+
+// CHECK: 	js	3133065982
+        	js	0xbabecafe
+
+// CHECK: 	js	305419896
+        	js	0x12345678
+
+// CHECK: 	js	-77129852792157442
+        	js	0xfeedfacebabecafe
+
+// CHECK: 	jns	32493
+        	jns	0x7eed
+
+// CHECK: 	jns	3133065982
+        	jns	0xbabecafe
+
+// CHECK: 	jns	305419896
+        	jns	0x12345678
+
+// CHECK: 	jns	-77129852792157442
+        	jns	0xfeedfacebabecafe
+
+// CHECK: 	jp	32493
+        	jp	0x7eed
+
+// CHECK: 	jp	3133065982
+        	jp	0xbabecafe
+
+// CHECK: 	jp	305419896
+        	jp	0x12345678
+
+// CHECK: 	jp	-77129852792157442
+        	jp	0xfeedfacebabecafe
+
+// CHECK: 	jnp	32493
+        	jnp	0x7eed
+
+// CHECK: 	jnp	3133065982
+        	jnp	0xbabecafe
+
+// CHECK: 	jnp	305419896
+        	jnp	0x12345678
+
+// CHECK: 	jnp	-77129852792157442
+        	jnp	0xfeedfacebabecafe
+
+// CHECK: 	jl	32493
+        	jl	0x7eed
+
+// CHECK: 	jl	3133065982
+        	jl	0xbabecafe
+
+// CHECK: 	jl	305419896
+        	jl	0x12345678
+
+// CHECK: 	jl	-77129852792157442
+        	jl	0xfeedfacebabecafe
+
+// CHECK: 	jge	32493
+        	jge	0x7eed
+
+// CHECK: 	jge	3133065982
+        	jge	0xbabecafe
+
+// CHECK: 	jge	305419896
+        	jge	0x12345678
+
+// CHECK: 	jge	-77129852792157442
+        	jge	0xfeedfacebabecafe
+
+// CHECK: 	jle	32493
+        	jle	0x7eed
+
+// CHECK: 	jle	3133065982
+        	jle	0xbabecafe
+
+// CHECK: 	jle	305419896
+        	jle	0x12345678
+
+// CHECK: 	jle	-77129852792157442
+        	jle	0xfeedfacebabecafe
+
+// CHECK: 	jg	32493
+        	jg	0x7eed
+
+// CHECK: 	jg	3133065982
+        	jg	0xbabecafe
+
+// CHECK: 	jg	305419896
+        	jg	0x12345678
+
+// CHECK: 	jg	-77129852792157442
+        	jg	0xfeedfacebabecafe
+
+// CHECK: 	seto	%bl
+        	seto	%bl
+
+// CHECK: 	seto	3735928559(%ebx,%ecx,8)
+        	seto	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	seto	32493
+        	seto	0x7eed
+
+// CHECK: 	seto	3133065982
+        	seto	0xbabecafe
+
+// CHECK: 	seto	305419896
+        	seto	0x12345678
+
+// CHECK: 	setno	%bl
+        	setno	%bl
+
+// CHECK: 	setno	3735928559(%ebx,%ecx,8)
+        	setno	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setno	32493
+        	setno	0x7eed
+
+// CHECK: 	setno	3133065982
+        	setno	0xbabecafe
+
+// CHECK: 	setno	305419896
+        	setno	0x12345678
+
+// CHECK: 	setb	%bl
+        	setb	%bl
+
+// CHECK: 	setb	3735928559(%ebx,%ecx,8)
+        	setb	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setb	32493
+        	setb	0x7eed
+
+// CHECK: 	setb	3133065982
+        	setb	0xbabecafe
+
+// CHECK: 	setb	305419896
+        	setb	0x12345678
+
+// CHECK: 	setae	%bl
+        	setae	%bl
+
+// CHECK: 	setae	3735928559(%ebx,%ecx,8)
+        	setae	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setae	32493
+        	setae	0x7eed
+
+// CHECK: 	setae	3133065982
+        	setae	0xbabecafe
+
+// CHECK: 	setae	305419896
+        	setae	0x12345678
+
+// CHECK: 	sete	%bl
+        	sete	%bl
+
+// CHECK: 	sete	3735928559(%ebx,%ecx,8)
+        	sete	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sete	32493
+        	sete	0x7eed
+
+// CHECK: 	sete	3133065982
+        	sete	0xbabecafe
+
+// CHECK: 	sete	305419896
+        	sete	0x12345678
+
+// CHECK: 	setne	%bl
+        	setne	%bl
+
+// CHECK: 	setne	3735928559(%ebx,%ecx,8)
+        	setne	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setne	32493
+        	setne	0x7eed
+
+// CHECK: 	setne	3133065982
+        	setne	0xbabecafe
+
+// CHECK: 	setne	305419896
+        	setne	0x12345678
+
+// CHECK: 	setbe	%bl
+        	setbe	%bl
+
+// CHECK: 	setbe	3735928559(%ebx,%ecx,8)
+        	setbe	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setbe	32493
+        	setbe	0x7eed
+
+// CHECK: 	setbe	3133065982
+        	setbe	0xbabecafe
+
+// CHECK: 	setbe	305419896
+        	setbe	0x12345678
+
+// CHECK: 	seta	%bl
+        	seta	%bl
+
+// CHECK: 	seta	3735928559(%ebx,%ecx,8)
+        	seta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	seta	32493
+        	seta	0x7eed
+
+// CHECK: 	seta	3133065982
+        	seta	0xbabecafe
+
+// CHECK: 	seta	305419896
+        	seta	0x12345678
+
+// CHECK: 	sets	%bl
+        	sets	%bl
+
+// CHECK: 	sets	3735928559(%ebx,%ecx,8)
+        	sets	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sets	32493
+        	sets	0x7eed
+
+// CHECK: 	sets	3133065982
+        	sets	0xbabecafe
+
+// CHECK: 	sets	305419896
+        	sets	0x12345678
+
+// CHECK: 	setns	%bl
+        	setns	%bl
+
+// CHECK: 	setns	3735928559(%ebx,%ecx,8)
+        	setns	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setns	32493
+        	setns	0x7eed
+
+// CHECK: 	setns	3133065982
+        	setns	0xbabecafe
+
+// CHECK: 	setns	305419896
+        	setns	0x12345678
+
+// CHECK: 	setp	%bl
+        	setp	%bl
+
+// CHECK: 	setp	3735928559(%ebx,%ecx,8)
+        	setp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setp	32493
+        	setp	0x7eed
+
+// CHECK: 	setp	3133065982
+        	setp	0xbabecafe
+
+// CHECK: 	setp	305419896
+        	setp	0x12345678
+
+// CHECK: 	setnp	%bl
+        	setnp	%bl
+
+// CHECK: 	setnp	3735928559(%ebx,%ecx,8)
+        	setnp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setnp	32493
+        	setnp	0x7eed
+
+// CHECK: 	setnp	3133065982
+        	setnp	0xbabecafe
+
+// CHECK: 	setnp	305419896
+        	setnp	0x12345678
+
+// CHECK: 	setl	%bl
+        	setl	%bl
+
+// CHECK: 	setl	3735928559(%ebx,%ecx,8)
+        	setl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setl	32493
+        	setl	0x7eed
+
+// CHECK: 	setl	3133065982
+        	setl	0xbabecafe
+
+// CHECK: 	setl	305419896
+        	setl	0x12345678
+
+// CHECK: 	setge	%bl
+        	setge	%bl
+
+// CHECK: 	setge	3735928559(%ebx,%ecx,8)
+        	setge	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setge	32493
+        	setge	0x7eed
+
+// CHECK: 	setge	3133065982
+        	setge	0xbabecafe
+
+// CHECK: 	setge	305419896
+        	setge	0x12345678
+
+// CHECK: 	setle	%bl
+        	setle	%bl
+
+// CHECK: 	setle	3735928559(%ebx,%ecx,8)
+        	setle	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setle	32493
+        	setle	0x7eed
+
+// CHECK: 	setle	3133065982
+        	setle	0xbabecafe
+
+// CHECK: 	setle	305419896
+        	setle	0x12345678
+
+// CHECK: 	setg	%bl
+        	setg	%bl
+
+// CHECK: 	setg	3735928559(%ebx,%ecx,8)
+        	setg	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setg	32493
+        	setg	0x7eed
+
+// CHECK: 	setg	3133065982
+        	setg	0xbabecafe
+
+// CHECK: 	setg	305419896
+        	setg	0x12345678
+
+// CHECK: 	int	$127
+        	int	$0x7f
+
+// CHECK: 	rsm
+        	rsm
+
+// CHECK: 	hlt
+        	hlt
+
+// CHECK: 	nopl	3735928559(%ebx,%ecx,8)
+        	nopl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	nopw	32493
+        	nopw	0x7eed
+
+// CHECK: 	nopl	3133065982
+        	nopl	0xbabecafe
+
+// CHECK: 	nopl	305419896
+        	nopl	0x12345678
+
+// CHECK: 	nop
+        	nop
+
+// CHECK: 	lldtw	32493
+        	lldtw	0x7eed
+
+// CHECK: 	lmsww	32493
+        	lmsww	0x7eed
+
+// CHECK: 	ltrw	32493
+        	ltrw	0x7eed
+
+// CHECK: 	sldtw	32493
+        	sldtw	0x7eed
+
+// CHECK: 	smsww	32493
+        	smsww	0x7eed
+
+// CHECK: 	strw	32493
+        	strw	0x7eed
+
+// CHECK: 	verr	%bx
+        	verr	%bx
+
+// CHECK: 	verr	3735928559(%ebx,%ecx,8)
+        	verr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	verr	3133065982
+        	verr	0xbabecafe
+
+// CHECK: 	verr	305419896
+        	verr	0x12345678
+
+// CHECK: 	verw	%bx
+        	verw	%bx
+
+// CHECK: 	verw	3735928559(%ebx,%ecx,8)
+        	verw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	verw	3133065982
+        	verw	0xbabecafe
+
+// CHECK: 	verw	305419896
+        	verw	0x12345678
+
+// CHECK: 	fld	%st(2)
+        	fld	%st(2)
+
+// CHECK: 	fldl	3735928559(%ebx,%ecx,8)
+        	fldl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldl	3133065982
+        	fldl	0xbabecafe
+
+// CHECK: 	fldl	305419896
+        	fldl	0x12345678
+
+// CHECK: 	fld	%st(2)
+        	fld	%st(2)
+
+// CHECK: 	fildl	3735928559(%ebx,%ecx,8)
+        	fildl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildl	3133065982
+        	fildl	0xbabecafe
+
+// CHECK: 	fildl	305419896
+        	fildl	0x12345678
+
+// CHECK: 	fildll	3735928559(%ebx,%ecx,8)
+        	fildll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildll	32493
+        	fildll	0x7eed
+
+// CHECK: 	fildll	3133065982
+        	fildll	0xbabecafe
+
+// CHECK: 	fildll	305419896
+        	fildll	0x12345678
+
+// CHECK: 	fldt	3735928559(%ebx,%ecx,8)
+        	fldt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldt	32493
+        	fldt	0x7eed
+
+// CHECK: 	fldt	3133065982
+        	fldt	0xbabecafe
+
+// CHECK: 	fldt	305419896
+        	fldt	0x12345678
+
+// CHECK: 	fbld	3735928559(%ebx,%ecx,8)
+        	fbld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbld	32493
+        	fbld	0x7eed
+
+// CHECK: 	fbld	3133065982
+        	fbld	0xbabecafe
+
+// CHECK: 	fbld	305419896
+        	fbld	0x12345678
+
+// CHECK: 	fst	%st(2)
+        	fst	%st(2)
+
+// CHECK: 	fstl	3735928559(%ebx,%ecx,8)
+        	fstl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstl	3133065982
+        	fstl	0xbabecafe
+
+// CHECK: 	fstl	305419896
+        	fstl	0x12345678
+
+// CHECK: 	fst	%st(2)
+        	fst	%st(2)
+
+// CHECK: 	fistl	3735928559(%ebx,%ecx,8)
+        	fistl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistl	3133065982
+        	fistl	0xbabecafe
+
+// CHECK: 	fistl	305419896
+        	fistl	0x12345678
+
+// CHECK: 	fstp	%st(2)
+        	fstp	%st(2)
+
+// CHECK: 	fstpl	3735928559(%ebx,%ecx,8)
+        	fstpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpl	3133065982
+        	fstpl	0xbabecafe
+
+// CHECK: 	fstpl	305419896
+        	fstpl	0x12345678
+
+// CHECK: 	fstp	%st(2)
+        	fstp	%st(2)
+
+// CHECK: 	fistpl	3735928559(%ebx,%ecx,8)
+        	fistpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpl	3133065982
+        	fistpl	0xbabecafe
+
+// CHECK: 	fistpl	305419896
+        	fistpl	0x12345678
+
+// CHECK: 	fistpll	3735928559(%ebx,%ecx,8)
+        	fistpll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpll	32493
+        	fistpll	0x7eed
+
+// CHECK: 	fistpll	3133065982
+        	fistpll	0xbabecafe
+
+// CHECK: 	fistpll	305419896
+        	fistpll	0x12345678
+
+// CHECK: 	fstpt	3735928559(%ebx,%ecx,8)
+        	fstpt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpt	32493
+        	fstpt	0x7eed
+
+// CHECK: 	fstpt	3133065982
+        	fstpt	0xbabecafe
+
+// CHECK: 	fstpt	305419896
+        	fstpt	0x12345678
+
+// CHECK: 	fbstp	3735928559(%ebx,%ecx,8)
+        	fbstp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbstp	32493
+        	fbstp	0x7eed
+
+// CHECK: 	fbstp	3133065982
+        	fbstp	0xbabecafe
+
+// CHECK: 	fbstp	305419896
+        	fbstp	0x12345678
+
+// CHECK: 	fxch	%st(2)
+        	fxch	%st(2)
+
+// CHECK: 	fcom	%st(2)
+        	fcom	%st(2)
+
+// CHECK: 	fcoml	3735928559(%ebx,%ecx,8)
+        	fcoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fcoml	3133065982
+        	fcoml	0xbabecafe
+
+// CHECK: 	fcoml	305419896
+        	fcoml	0x12345678
+
+// CHECK: 	fcom	%st(2)
+        	fcom	%st(2)
+
+// CHECK: 	ficoml	3735928559(%ebx,%ecx,8)
+        	ficoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficoml	3133065982
+        	ficoml	0xbabecafe
+
+// CHECK: 	ficoml	305419896
+        	ficoml	0x12345678
+
+// CHECK: 	fcomp	%st(2)
+        	fcomp	%st(2)
+
+// CHECK: 	fcompl	3735928559(%ebx,%ecx,8)
+        	fcompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fcompl	3133065982
+        	fcompl	0xbabecafe
+
+// CHECK: 	fcompl	305419896
+        	fcompl	0x12345678
+
+// CHECK: 	fcomp	%st(2)
+        	fcomp	%st(2)
+
+// CHECK: 	ficompl	3735928559(%ebx,%ecx,8)
+        	ficompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficompl	3133065982
+        	ficompl	0xbabecafe
+
+// CHECK: 	ficompl	305419896
+        	ficompl	0x12345678
+
+// CHECK: 	fcompp
+        	fcompp
+
+// CHECK: 	fucom	%st(2)
+        	fucom	%st(2)
+
+// CHECK: 	fucomp	%st(2)
+        	fucomp	%st(2)
+
+// CHECK: 	fucompp
+        	fucompp
+
+// CHECK: 	ftst
+        	ftst
+
+// CHECK: 	fxam
+        	fxam
+
+// CHECK: 	fld1
+        	fld1
+
+// CHECK: 	fldl2t
+        	fldl2t
+
+// CHECK: 	fldl2e
+        	fldl2e
+
+// CHECK: 	fldpi
+        	fldpi
+
+// CHECK: 	fldlg2
+        	fldlg2
+
+// CHECK: 	fldln2
+        	fldln2
+
+// CHECK: 	fldz
+        	fldz
+
+// CHECK: 	fadd	%st(2)
+        	fadd	%st(2)
+
+// CHECK: 	faddl	3735928559(%ebx,%ecx,8)
+        	faddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	faddl	3133065982
+        	faddl	0xbabecafe
+
+// CHECK: 	faddl	305419896
+        	faddl	0x12345678
+
+// CHECK: 	fiaddl	3735928559(%ebx,%ecx,8)
+        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fiaddl	3133065982
+        	fiaddl	0xbabecafe
+
+// CHECK: 	fiaddl	305419896
+        	fiaddl	0x12345678
+
+// CHECK: 	faddp	%st(2)
+        	faddp	%st(2)
+
+// CHECK: 	fsub	%st(2)
+        	fsub	%st(2)
+
+// CHECK: 	fsubl	3735928559(%ebx,%ecx,8)
+        	fsubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubl	3133065982
+        	fsubl	0xbabecafe
+
+// CHECK: 	fsubl	305419896
+        	fsubl	0x12345678
+
+// CHECK: 	fisubl	3735928559(%ebx,%ecx,8)
+        	fisubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubl	3133065982
+        	fisubl	0xbabecafe
+
+// CHECK: 	fisubl	305419896
+        	fisubl	0x12345678
+
+// CHECK: 	fsubp	%st(2)
+        	fsubp	%st(2)
+
+// CHECK: 	fsubr	%st(2)
+        	fsubr	%st(2)
+
+// CHECK: 	fsubrl	3735928559(%ebx,%ecx,8)
+        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubrl	3133065982
+        	fsubrl	0xbabecafe
+
+// CHECK: 	fsubrl	305419896
+        	fsubrl	0x12345678
+
+// CHECK: 	fisubrl	3735928559(%ebx,%ecx,8)
+        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubrl	3133065982
+        	fisubrl	0xbabecafe
+
+// CHECK: 	fisubrl	305419896
+        	fisubrl	0x12345678
+
+// CHECK: 	fsubrp	%st(2)
+        	fsubrp	%st(2)
+
+// CHECK: 	fmul	%st(2)
+        	fmul	%st(2)
+
+// CHECK: 	fmull	3735928559(%ebx,%ecx,8)
+        	fmull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fmull	3133065982
+        	fmull	0xbabecafe
+
+// CHECK: 	fmull	305419896
+        	fmull	0x12345678
+
+// CHECK: 	fimull	3735928559(%ebx,%ecx,8)
+        	fimull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fimull	3133065982
+        	fimull	0xbabecafe
+
+// CHECK: 	fimull	305419896
+        	fimull	0x12345678
+
+// CHECK: 	fmulp	%st(2)
+        	fmulp	%st(2)
+
+// CHECK: 	fdiv	%st(2)
+        	fdiv	%st(2)
+
+// CHECK: 	fdivl	3735928559(%ebx,%ecx,8)
+        	fdivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivl	3133065982
+        	fdivl	0xbabecafe
+
+// CHECK: 	fdivl	305419896
+        	fdivl	0x12345678
+
+// CHECK: 	fidivl	3735928559(%ebx,%ecx,8)
+        	fidivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivl	3133065982
+        	fidivl	0xbabecafe
+
+// CHECK: 	fidivl	305419896
+        	fidivl	0x12345678
+
+// CHECK: 	fdivp	%st(2)
+        	fdivp	%st(2)
+
+// CHECK: 	fdivr	%st(2)
+        	fdivr	%st(2)
+
+// CHECK: 	fdivrl	3735928559(%ebx,%ecx,8)
+        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivrl	3133065982
+        	fdivrl	0xbabecafe
+
+// CHECK: 	fdivrl	305419896
+        	fdivrl	0x12345678
+
+// CHECK: 	fidivrl	3735928559(%ebx,%ecx,8)
+        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivrl	3133065982
+        	fidivrl	0xbabecafe
+
+// CHECK: 	fidivrl	305419896
+        	fidivrl	0x12345678
+
+// CHECK: 	fdivrp	%st(2)
+        	fdivrp	%st(2)
+
+// CHECK: 	f2xm1
+        	f2xm1
+
+// CHECK: 	fyl2x
+        	fyl2x
+
+// CHECK: 	fptan
+        	fptan
+
+// CHECK: 	fpatan
+        	fpatan
+
+// CHECK: 	fxtract
+        	fxtract
+
+// CHECK: 	fprem1
+        	fprem1
+
+// CHECK: 	fdecstp
+        	fdecstp
+
+// CHECK: 	fincstp
+        	fincstp
+
+// CHECK: 	fprem
+        	fprem
+
+// CHECK: 	fyl2xp1
+        	fyl2xp1
+
+// CHECK: 	fsqrt
+        	fsqrt
+
+// CHECK: 	fsincos
+        	fsincos
+
+// CHECK: 	frndint
+        	frndint
+
+// CHECK: 	fscale
+        	fscale
+
+// CHECK: 	fsin
+        	fsin
+
+// CHECK: 	fcos
+        	fcos
+
+// CHECK: 	fchs
+        	fchs
+
+// CHECK: 	fabs
+        	fabs
+
+// CHECK: 	fninit
+        	fninit
+
+// CHECK: 	fldcw	3735928559(%ebx,%ecx,8)
+        	fldcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldcw	3133065982
+        	fldcw	0xbabecafe
+
+// CHECK: 	fldcw	305419896
+        	fldcw	0x12345678
+
+// CHECK: 	fnstcw	3735928559(%ebx,%ecx,8)
+        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fnstcw	3133065982
+        	fnstcw	0xbabecafe
+
+// CHECK: 	fnstcw	305419896
+        	fnstcw	0x12345678
+
+// CHECK: 	fnstsw	3735928559(%ebx,%ecx,8)
+        	fnstsw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fnstsw	3133065982
+        	fnstsw	0xbabecafe
+
+// CHECK: 	fnstsw	305419896
+        	fnstsw	0x12345678
+
+// CHECK: 	fnclex
+        	fnclex
+
+// CHECK: 	fnstenv	32493
+        	fnstenv	0x7eed
+
+// CHECK: 	fldenv	32493
+        	fldenv	0x7eed
+
+// CHECK: 	fnsave	32493
+        	fnsave	0x7eed
+
+// CHECK: 	frstor	32493
+        	frstor	0x7eed
+
+// CHECK: 	ffree	%st(2)
+        	ffree	%st(2)
+
+// CHECK: 	fnop
+        	fnop
+
+// CHECK: 	invd
+        	invd
+
+// CHECK: 	wbinvd
+        	wbinvd
+
+// CHECK: 	cpuid
+        	cpuid
+
+// CHECK: 	wrmsr
+        	wrmsr
+
+// CHECK: 	rdtsc
+        	rdtsc
+
+// CHECK: 	rdmsr
+        	rdmsr
+
+// CHECK: 	cmpxchg8b	3735928559(%ebx,%ecx,8)
+        	cmpxchg8b	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpxchg8b	32493
+        	cmpxchg8b	0x7eed
+
+// CHECK: 	cmpxchg8b	3133065982
+        	cmpxchg8b	0xbabecafe
+
+// CHECK: 	cmpxchg8b	305419896
+        	cmpxchg8b	0x12345678
+
+// CHECK: 	sysenter
+        	sysenter
+
+// CHECK: 	sysexit
+        	sysexit
+
+// CHECK: 	fxsave	3735928559(%ebx,%ecx,8)
+        	fxsave	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fxsave	32493
+        	fxsave	0x7eed
+
+// CHECK: 	fxsave	3133065982
+        	fxsave	0xbabecafe
+
+// CHECK: 	fxsave	305419896
+        	fxsave	0x12345678
+
+// CHECK: 	fxrstor	3735928559(%ebx,%ecx,8)
+        	fxrstor	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fxrstor	32493
+        	fxrstor	0x7eed
+
+// CHECK: 	fxrstor	3133065982
+        	fxrstor	0xbabecafe
+
+// CHECK: 	fxrstor	305419896
+        	fxrstor	0x12345678
+
+// CHECK: 	rdpmc
+        	rdpmc
+
+// CHECK: 	ud2
+        	ud2
+
+// CHECK: 	fcmovb	%st(2), %st(0)
+        	fcmovb	%st(2),%st
+
+// CHECK: 	fcmove	%st(2), %st(0)
+        	fcmove	%st(2),%st
+
+// CHECK: 	fcmovbe	%st(2), %st(0)
+        	fcmovbe	%st(2),%st
+
+// CHECK: 	fcmovu	 %st(2), %st(0)
+        	fcmovu	%st(2),%st
+
+// CHECK: 	fcmovnb	%st(2), %st(0)
+        	fcmovnb	%st(2),%st
+
+// CHECK: 	fcmovne	%st(2), %st(0)
+        	fcmovne	%st(2),%st
+
+// CHECK: 	fcmovnbe	%st(2), %st(0)
+        	fcmovnbe	%st(2),%st
+
+// CHECK: 	fcmovnu	%st(2), %st(0)
+        	fcmovnu	%st(2),%st
+
+// CHECK: 	fcomi	%st(2)
+        	fcomi	%st(2),%st
+
+// CHECK: 	fucomi	%st(2)
+        	fucomi	%st(2),%st
+
+// CHECK: 	fcompi	%st(2)
+        	fcomip	%st(2),%st
+
+// CHECK: 	fucompi	%st(2)
+        	fucomip	%st(2),%st
+
+// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
+        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movnti	%ecx, 69
+        	movnti	%ecx,0x45
+
+// CHECK: 	movnti	%ecx, 32493
+        	movnti	%ecx,0x7eed
+
+// CHECK: 	movnti	%ecx, 3133065982
+        	movnti	%ecx,0xbabecafe
+
+// CHECK: 	movnti	%ecx, 305419896
+        	movnti	%ecx,0x12345678
+
+// CHECK: 	clflush	3735928559(%ebx,%ecx,8)
+        	clflush	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	clflush	32493
+        	clflush	0x7eed
+
+// CHECK: 	clflush	3133065982
+        	clflush	0xbabecafe
+
+// CHECK: 	clflush	305419896
+        	clflush	0x12345678
+
+// CHECK: 	pause
+        	pause
+
+// CHECK: 	sfence
+        	sfence
+
+// CHECK: 	lfence
+        	lfence
+
+// CHECK: 	mfence
+        	mfence
+
+// CHECK: 	emms
+        	emms
+
+// CHECK: 	movd	%ecx, %mm3
+        	movd	%ecx,%mm3
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %mm3
+        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movd	69, %mm3
+        	movd	0x45,%mm3
+
+// CHECK: 	movd	32493, %mm3
+        	movd	0x7eed,%mm3
+
+// CHECK: 	movd	3133065982, %mm3
+        	movd	0xbabecafe,%mm3
+
+// CHECK: 	movd	305419896, %mm3
+        	movd	0x12345678,%mm3
+
+// CHECK: 	movd	%mm3, %ecx
+        	movd	%mm3,%ecx
+
+// CHECK: 	movd	%mm3, 3735928559(%ebx,%ecx,8)
+        	movd	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movd	%mm3, 69
+        	movd	%mm3,0x45
+
+// CHECK: 	movd	%mm3, 32493
+        	movd	%mm3,0x7eed
+
+// CHECK: 	movd	%mm3, 3133065982
+        	movd	%mm3,0xbabecafe
+
+// CHECK: 	movd	%mm3, 305419896
+        	movd	%mm3,0x12345678
+
+// CHECK: 	movd	%ecx, %xmm5
+        	movd	%ecx,%xmm5
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %xmm5
+        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movd	69, %xmm5
+        	movd	0x45,%xmm5
+
+// CHECK: 	movd	32493, %xmm5
+        	movd	0x7eed,%xmm5
+
+// CHECK: 	movd	3133065982, %xmm5
+        	movd	0xbabecafe,%xmm5
+
+// CHECK: 	movd	305419896, %xmm5
+        	movd	0x12345678,%xmm5
+
+// CHECK: 	movd	%xmm5, %ecx
+        	movd	%xmm5,%ecx
+
+// CHECK: 	movd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movd	%xmm5, 69
+        	movd	%xmm5,0x45
+
+// CHECK: 	movd	%xmm5, 32493
+        	movd	%xmm5,0x7eed
+
+// CHECK: 	movd	%xmm5, 3133065982
+        	movd	%xmm5,0xbabecafe
+
+// CHECK: 	movd	%xmm5, 305419896
+        	movd	%xmm5,0x12345678
+
+// CHECK: 	movq	3735928559(%ebx,%ecx,8), %mm3
+        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movq	69, %mm3
+        	movq	0x45,%mm3
+
+// CHECK: 	movq	32493, %mm3
+        	movq	0x7eed,%mm3
+
+// CHECK: 	movq	3133065982, %mm3
+        	movq	0xbabecafe,%mm3
+
+// CHECK: 	movq	305419896, %mm3
+        	movq	0x12345678,%mm3
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	%mm3, 3735928559(%ebx,%ecx,8)
+        	movq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movq	%mm3, 69
+        	movq	%mm3,0x45
+
+// CHECK: 	movq	%mm3, 32493
+        	movq	%mm3,0x7eed
+
+// CHECK: 	movq	%mm3, 3133065982
+        	movq	%mm3,0xbabecafe
+
+// CHECK: 	movq	%mm3, 305419896
+        	movq	%mm3,0x12345678
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	3735928559(%ebx,%ecx,8), %xmm5
+        	movq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movq	69, %xmm5
+        	movq	0x45,%xmm5
+
+// CHECK: 	movq	32493, %xmm5
+        	movq	0x7eed,%xmm5
+
+// CHECK: 	movq	3133065982, %xmm5
+        	movq	0xbabecafe,%xmm5
+
+// CHECK: 	movq	305419896, %xmm5
+        	movq	0x12345678,%xmm5
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	movq	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movq	%xmm5, 69
+        	movq	%xmm5,0x45
+
+// CHECK: 	movq	%xmm5, 32493
+        	movq	%xmm5,0x7eed
+
+// CHECK: 	movq	%xmm5, 3133065982
+        	movq	%xmm5,0xbabecafe
+
+// CHECK: 	movq	%xmm5, 305419896
+        	movq	%xmm5,0x12345678
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	packssdw	3735928559(%ebx,%ecx,8), %mm3
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	packssdw	69, %mm3
+        	packssdw	0x45,%mm3
+
+// CHECK: 	packssdw	32493, %mm3
+        	packssdw	0x7eed,%mm3
+
+// CHECK: 	packssdw	3133065982, %mm3
+        	packssdw	0xbabecafe,%mm3
+
+// CHECK: 	packssdw	305419896, %mm3
+        	packssdw	0x12345678,%mm3
+
+// CHECK: 	packssdw	%mm3, %mm3
+        	packssdw	%mm3,%mm3
+
+// CHECK: 	packssdw	3735928559(%ebx,%ecx,8), %xmm5
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packssdw	69, %xmm5
+        	packssdw	0x45,%xmm5
+
+// CHECK: 	packssdw	32493, %xmm5
+        	packssdw	0x7eed,%xmm5
+
+// CHECK: 	packssdw	3133065982, %xmm5
+        	packssdw	0xbabecafe,%xmm5
+
+// CHECK: 	packssdw	305419896, %xmm5
+        	packssdw	0x12345678,%xmm5
+
+// CHECK: 	packssdw	%xmm5, %xmm5
+        	packssdw	%xmm5,%xmm5
+
+// CHECK: 	packsswb	3735928559(%ebx,%ecx,8), %mm3
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	packsswb	69, %mm3
+        	packsswb	0x45,%mm3
+
+// CHECK: 	packsswb	32493, %mm3
+        	packsswb	0x7eed,%mm3
+
+// CHECK: 	packsswb	3133065982, %mm3
+        	packsswb	0xbabecafe,%mm3
+
+// CHECK: 	packsswb	305419896, %mm3
+        	packsswb	0x12345678,%mm3
+
+// CHECK: 	packsswb	%mm3, %mm3
+        	packsswb	%mm3,%mm3
+
+// CHECK: 	packsswb	3735928559(%ebx,%ecx,8), %xmm5
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packsswb	69, %xmm5
+        	packsswb	0x45,%xmm5
+
+// CHECK: 	packsswb	32493, %xmm5
+        	packsswb	0x7eed,%xmm5
+
+// CHECK: 	packsswb	3133065982, %xmm5
+        	packsswb	0xbabecafe,%xmm5
+
+// CHECK: 	packsswb	305419896, %xmm5
+        	packsswb	0x12345678,%xmm5
+
+// CHECK: 	packsswb	%xmm5, %xmm5
+        	packsswb	%xmm5,%xmm5
+
+// CHECK: 	packuswb	3735928559(%ebx,%ecx,8), %mm3
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	packuswb	69, %mm3
+        	packuswb	0x45,%mm3
+
+// CHECK: 	packuswb	32493, %mm3
+        	packuswb	0x7eed,%mm3
+
+// CHECK: 	packuswb	3133065982, %mm3
+        	packuswb	0xbabecafe,%mm3
+
+// CHECK: 	packuswb	305419896, %mm3
+        	packuswb	0x12345678,%mm3
+
+// CHECK: 	packuswb	%mm3, %mm3
+        	packuswb	%mm3,%mm3
+
+// CHECK: 	packuswb	3735928559(%ebx,%ecx,8), %xmm5
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packuswb	69, %xmm5
+        	packuswb	0x45,%xmm5
+
+// CHECK: 	packuswb	32493, %xmm5
+        	packuswb	0x7eed,%xmm5
+
+// CHECK: 	packuswb	3133065982, %xmm5
+        	packuswb	0xbabecafe,%xmm5
+
+// CHECK: 	packuswb	305419896, %xmm5
+        	packuswb	0x12345678,%xmm5
+
+// CHECK: 	packuswb	%xmm5, %xmm5
+        	packuswb	%xmm5,%xmm5
+
+// CHECK: 	paddb	3735928559(%ebx,%ecx,8), %mm3
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddb	69, %mm3
+        	paddb	0x45,%mm3
+
+// CHECK: 	paddb	32493, %mm3
+        	paddb	0x7eed,%mm3
+
+// CHECK: 	paddb	3133065982, %mm3
+        	paddb	0xbabecafe,%mm3
+
+// CHECK: 	paddb	305419896, %mm3
+        	paddb	0x12345678,%mm3
+
+// CHECK: 	paddb	%mm3, %mm3
+        	paddb	%mm3,%mm3
+
+// CHECK: 	paddb	3735928559(%ebx,%ecx,8), %xmm5
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddb	69, %xmm5
+        	paddb	0x45,%xmm5
+
+// CHECK: 	paddb	32493, %xmm5
+        	paddb	0x7eed,%xmm5
+
+// CHECK: 	paddb	3133065982, %xmm5
+        	paddb	0xbabecafe,%xmm5
+
+// CHECK: 	paddb	305419896, %xmm5
+        	paddb	0x12345678,%xmm5
+
+// CHECK: 	paddb	%xmm5, %xmm5
+        	paddb	%xmm5,%xmm5
+
+// CHECK: 	paddw	3735928559(%ebx,%ecx,8), %mm3
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddw	69, %mm3
+        	paddw	0x45,%mm3
+
+// CHECK: 	paddw	32493, %mm3
+        	paddw	0x7eed,%mm3
+
+// CHECK: 	paddw	3133065982, %mm3
+        	paddw	0xbabecafe,%mm3
+
+// CHECK: 	paddw	305419896, %mm3
+        	paddw	0x12345678,%mm3
+
+// CHECK: 	paddw	%mm3, %mm3
+        	paddw	%mm3,%mm3
+
+// CHECK: 	paddw	3735928559(%ebx,%ecx,8), %xmm5
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddw	69, %xmm5
+        	paddw	0x45,%xmm5
+
+// CHECK: 	paddw	32493, %xmm5
+        	paddw	0x7eed,%xmm5
+
+// CHECK: 	paddw	3133065982, %xmm5
+        	paddw	0xbabecafe,%xmm5
+
+// CHECK: 	paddw	305419896, %xmm5
+        	paddw	0x12345678,%xmm5
+
+// CHECK: 	paddw	%xmm5, %xmm5
+        	paddw	%xmm5,%xmm5
+
+// CHECK: 	paddd	3735928559(%ebx,%ecx,8), %mm3
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddd	69, %mm3
+        	paddd	0x45,%mm3
+
+// CHECK: 	paddd	32493, %mm3
+        	paddd	0x7eed,%mm3
+
+// CHECK: 	paddd	3133065982, %mm3
+        	paddd	0xbabecafe,%mm3
+
+// CHECK: 	paddd	305419896, %mm3
+        	paddd	0x12345678,%mm3
+
+// CHECK: 	paddd	%mm3, %mm3
+        	paddd	%mm3,%mm3
+
+// CHECK: 	paddd	3735928559(%ebx,%ecx,8), %xmm5
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddd	69, %xmm5
+        	paddd	0x45,%xmm5
+
+// CHECK: 	paddd	32493, %xmm5
+        	paddd	0x7eed,%xmm5
+
+// CHECK: 	paddd	3133065982, %xmm5
+        	paddd	0xbabecafe,%xmm5
+
+// CHECK: 	paddd	305419896, %xmm5
+        	paddd	0x12345678,%xmm5
+
+// CHECK: 	paddd	%xmm5, %xmm5
+        	paddd	%xmm5,%xmm5
+
+// CHECK: 	paddq	3735928559(%ebx,%ecx,8), %mm3
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddq	69, %mm3
+        	paddq	0x45,%mm3
+
+// CHECK: 	paddq	32493, %mm3
+        	paddq	0x7eed,%mm3
+
+// CHECK: 	paddq	3133065982, %mm3
+        	paddq	0xbabecafe,%mm3
+
+// CHECK: 	paddq	305419896, %mm3
+        	paddq	0x12345678,%mm3
+
+// CHECK: 	paddq	%mm3, %mm3
+        	paddq	%mm3,%mm3
+
+// CHECK: 	paddq	3735928559(%ebx,%ecx,8), %xmm5
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddq	69, %xmm5
+        	paddq	0x45,%xmm5
+
+// CHECK: 	paddq	32493, %xmm5
+        	paddq	0x7eed,%xmm5
+
+// CHECK: 	paddq	3133065982, %xmm5
+        	paddq	0xbabecafe,%xmm5
+
+// CHECK: 	paddq	305419896, %xmm5
+        	paddq	0x12345678,%xmm5
+
+// CHECK: 	paddq	%xmm5, %xmm5
+        	paddq	%xmm5,%xmm5
+
+// CHECK: 	paddsb	3735928559(%ebx,%ecx,8), %mm3
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddsb	69, %mm3
+        	paddsb	0x45,%mm3
+
+// CHECK: 	paddsb	32493, %mm3
+        	paddsb	0x7eed,%mm3
+
+// CHECK: 	paddsb	3133065982, %mm3
+        	paddsb	0xbabecafe,%mm3
+
+// CHECK: 	paddsb	305419896, %mm3
+        	paddsb	0x12345678,%mm3
+
+// CHECK: 	paddsb	%mm3, %mm3
+        	paddsb	%mm3,%mm3
+
+// CHECK: 	paddsb	3735928559(%ebx,%ecx,8), %xmm5
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddsb	69, %xmm5
+        	paddsb	0x45,%xmm5
+
+// CHECK: 	paddsb	32493, %xmm5
+        	paddsb	0x7eed,%xmm5
+
+// CHECK: 	paddsb	3133065982, %xmm5
+        	paddsb	0xbabecafe,%xmm5
+
+// CHECK: 	paddsb	305419896, %xmm5
+        	paddsb	0x12345678,%xmm5
+
+// CHECK: 	paddsb	%xmm5, %xmm5
+        	paddsb	%xmm5,%xmm5
+
+// CHECK: 	paddsw	3735928559(%ebx,%ecx,8), %mm3
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddsw	69, %mm3
+        	paddsw	0x45,%mm3
+
+// CHECK: 	paddsw	32493, %mm3
+        	paddsw	0x7eed,%mm3
+
+// CHECK: 	paddsw	3133065982, %mm3
+        	paddsw	0xbabecafe,%mm3
+
+// CHECK: 	paddsw	305419896, %mm3
+        	paddsw	0x12345678,%mm3
+
+// CHECK: 	paddsw	%mm3, %mm3
+        	paddsw	%mm3,%mm3
+
+// CHECK: 	paddsw	3735928559(%ebx,%ecx,8), %xmm5
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddsw	69, %xmm5
+        	paddsw	0x45,%xmm5
+
+// CHECK: 	paddsw	32493, %xmm5
+        	paddsw	0x7eed,%xmm5
+
+// CHECK: 	paddsw	3133065982, %xmm5
+        	paddsw	0xbabecafe,%xmm5
+
+// CHECK: 	paddsw	305419896, %xmm5
+        	paddsw	0x12345678,%xmm5
+
+// CHECK: 	paddsw	%xmm5, %xmm5
+        	paddsw	%xmm5,%xmm5
+
+// CHECK: 	paddusb	3735928559(%ebx,%ecx,8), %mm3
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddusb	69, %mm3
+        	paddusb	0x45,%mm3
+
+// CHECK: 	paddusb	32493, %mm3
+        	paddusb	0x7eed,%mm3
+
+// CHECK: 	paddusb	3133065982, %mm3
+        	paddusb	0xbabecafe,%mm3
+
+// CHECK: 	paddusb	305419896, %mm3
+        	paddusb	0x12345678,%mm3
+
+// CHECK: 	paddusb	%mm3, %mm3
+        	paddusb	%mm3,%mm3
+
+// CHECK: 	paddusb	3735928559(%ebx,%ecx,8), %xmm5
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddusb	69, %xmm5
+        	paddusb	0x45,%xmm5
+
+// CHECK: 	paddusb	32493, %xmm5
+        	paddusb	0x7eed,%xmm5
+
+// CHECK: 	paddusb	3133065982, %xmm5
+        	paddusb	0xbabecafe,%xmm5
+
+// CHECK: 	paddusb	305419896, %xmm5
+        	paddusb	0x12345678,%xmm5
+
+// CHECK: 	paddusb	%xmm5, %xmm5
+        	paddusb	%xmm5,%xmm5
+
+// CHECK: 	paddusw	3735928559(%ebx,%ecx,8), %mm3
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddusw	69, %mm3
+        	paddusw	0x45,%mm3
+
+// CHECK: 	paddusw	32493, %mm3
+        	paddusw	0x7eed,%mm3
+
+// CHECK: 	paddusw	3133065982, %mm3
+        	paddusw	0xbabecafe,%mm3
+
+// CHECK: 	paddusw	305419896, %mm3
+        	paddusw	0x12345678,%mm3
+
+// CHECK: 	paddusw	%mm3, %mm3
+        	paddusw	%mm3,%mm3
+
+// CHECK: 	paddusw	3735928559(%ebx,%ecx,8), %xmm5
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddusw	69, %xmm5
+        	paddusw	0x45,%xmm5
+
+// CHECK: 	paddusw	32493, %xmm5
+        	paddusw	0x7eed,%xmm5
+
+// CHECK: 	paddusw	3133065982, %xmm5
+        	paddusw	0xbabecafe,%xmm5
+
+// CHECK: 	paddusw	305419896, %xmm5
+        	paddusw	0x12345678,%xmm5
+
+// CHECK: 	paddusw	%xmm5, %xmm5
+        	paddusw	%xmm5,%xmm5
+
+// CHECK: 	pand	3735928559(%ebx,%ecx,8), %mm3
+        	pand	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pand	69, %mm3
+        	pand	0x45,%mm3
+
+// CHECK: 	pand	32493, %mm3
+        	pand	0x7eed,%mm3
+
+// CHECK: 	pand	3133065982, %mm3
+        	pand	0xbabecafe,%mm3
+
+// CHECK: 	pand	305419896, %mm3
+        	pand	0x12345678,%mm3
+
+// CHECK: 	pand	%mm3, %mm3
+        	pand	%mm3,%mm3
+
+// CHECK: 	pand	3735928559(%ebx,%ecx,8), %xmm5
+        	pand	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pand	69, %xmm5
+        	pand	0x45,%xmm5
+
+// CHECK: 	pand	32493, %xmm5
+        	pand	0x7eed,%xmm5
+
+// CHECK: 	pand	3133065982, %xmm5
+        	pand	0xbabecafe,%xmm5
+
+// CHECK: 	pand	305419896, %xmm5
+        	pand	0x12345678,%xmm5
+
+// CHECK: 	pand	%xmm5, %xmm5
+        	pand	%xmm5,%xmm5
+
+// CHECK: 	pandn	3735928559(%ebx,%ecx,8), %mm3
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pandn	69, %mm3
+        	pandn	0x45,%mm3
+
+// CHECK: 	pandn	32493, %mm3
+        	pandn	0x7eed,%mm3
+
+// CHECK: 	pandn	3133065982, %mm3
+        	pandn	0xbabecafe,%mm3
+
+// CHECK: 	pandn	305419896, %mm3
+        	pandn	0x12345678,%mm3
+
+// CHECK: 	pandn	%mm3, %mm3
+        	pandn	%mm3,%mm3
+
+// CHECK: 	pandn	3735928559(%ebx,%ecx,8), %xmm5
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pandn	69, %xmm5
+        	pandn	0x45,%xmm5
+
+// CHECK: 	pandn	32493, %xmm5
+        	pandn	0x7eed,%xmm5
+
+// CHECK: 	pandn	3133065982, %xmm5
+        	pandn	0xbabecafe,%xmm5
+
+// CHECK: 	pandn	305419896, %xmm5
+        	pandn	0x12345678,%xmm5
+
+// CHECK: 	pandn	%xmm5, %xmm5
+        	pandn	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqb	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpeqb	69, %mm3
+        	pcmpeqb	0x45,%mm3
+
+// CHECK: 	pcmpeqb	32493, %mm3
+        	pcmpeqb	0x7eed,%mm3
+
+// CHECK: 	pcmpeqb	3133065982, %mm3
+        	pcmpeqb	0xbabecafe,%mm3
+
+// CHECK: 	pcmpeqb	305419896, %mm3
+        	pcmpeqb	0x12345678,%mm3
+
+// CHECK: 	pcmpeqb	%mm3, %mm3
+        	pcmpeqb	%mm3,%mm3
+
+// CHECK: 	pcmpeqb	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqb	69, %xmm5
+        	pcmpeqb	0x45,%xmm5
+
+// CHECK: 	pcmpeqb	32493, %xmm5
+        	pcmpeqb	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqb	3133065982, %xmm5
+        	pcmpeqb	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqb	305419896, %xmm5
+        	pcmpeqb	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqb	%xmm5, %xmm5
+        	pcmpeqb	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqw	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpeqw	69, %mm3
+        	pcmpeqw	0x45,%mm3
+
+// CHECK: 	pcmpeqw	32493, %mm3
+        	pcmpeqw	0x7eed,%mm3
+
+// CHECK: 	pcmpeqw	3133065982, %mm3
+        	pcmpeqw	0xbabecafe,%mm3
+
+// CHECK: 	pcmpeqw	305419896, %mm3
+        	pcmpeqw	0x12345678,%mm3
+
+// CHECK: 	pcmpeqw	%mm3, %mm3
+        	pcmpeqw	%mm3,%mm3
+
+// CHECK: 	pcmpeqw	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqw	69, %xmm5
+        	pcmpeqw	0x45,%xmm5
+
+// CHECK: 	pcmpeqw	32493, %xmm5
+        	pcmpeqw	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqw	3133065982, %xmm5
+        	pcmpeqw	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqw	305419896, %xmm5
+        	pcmpeqw	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqw	%xmm5, %xmm5
+        	pcmpeqw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqd	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpeqd	69, %mm3
+        	pcmpeqd	0x45,%mm3
+
+// CHECK: 	pcmpeqd	32493, %mm3
+        	pcmpeqd	0x7eed,%mm3
+
+// CHECK: 	pcmpeqd	3133065982, %mm3
+        	pcmpeqd	0xbabecafe,%mm3
+
+// CHECK: 	pcmpeqd	305419896, %mm3
+        	pcmpeqd	0x12345678,%mm3
+
+// CHECK: 	pcmpeqd	%mm3, %mm3
+        	pcmpeqd	%mm3,%mm3
+
+// CHECK: 	pcmpeqd	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqd	69, %xmm5
+        	pcmpeqd	0x45,%xmm5
+
+// CHECK: 	pcmpeqd	32493, %xmm5
+        	pcmpeqd	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqd	3133065982, %xmm5
+        	pcmpeqd	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqd	305419896, %xmm5
+        	pcmpeqd	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqd	%xmm5, %xmm5
+        	pcmpeqd	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtb	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpgtb	69, %mm3
+        	pcmpgtb	0x45,%mm3
+
+// CHECK: 	pcmpgtb	32493, %mm3
+        	pcmpgtb	0x7eed,%mm3
+
+// CHECK: 	pcmpgtb	3133065982, %mm3
+        	pcmpgtb	0xbabecafe,%mm3
+
+// CHECK: 	pcmpgtb	305419896, %mm3
+        	pcmpgtb	0x12345678,%mm3
+
+// CHECK: 	pcmpgtb	%mm3, %mm3
+        	pcmpgtb	%mm3,%mm3
+
+// CHECK: 	pcmpgtb	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtb	69, %xmm5
+        	pcmpgtb	0x45,%xmm5
+
+// CHECK: 	pcmpgtb	32493, %xmm5
+        	pcmpgtb	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtb	3133065982, %xmm5
+        	pcmpgtb	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtb	305419896, %xmm5
+        	pcmpgtb	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtb	%xmm5, %xmm5
+        	pcmpgtb	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtw	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpgtw	69, %mm3
+        	pcmpgtw	0x45,%mm3
+
+// CHECK: 	pcmpgtw	32493, %mm3
+        	pcmpgtw	0x7eed,%mm3
+
+// CHECK: 	pcmpgtw	3133065982, %mm3
+        	pcmpgtw	0xbabecafe,%mm3
+
+// CHECK: 	pcmpgtw	305419896, %mm3
+        	pcmpgtw	0x12345678,%mm3
+
+// CHECK: 	pcmpgtw	%mm3, %mm3
+        	pcmpgtw	%mm3,%mm3
+
+// CHECK: 	pcmpgtw	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtw	69, %xmm5
+        	pcmpgtw	0x45,%xmm5
+
+// CHECK: 	pcmpgtw	32493, %xmm5
+        	pcmpgtw	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtw	3133065982, %xmm5
+        	pcmpgtw	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtw	305419896, %xmm5
+        	pcmpgtw	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtw	%xmm5, %xmm5
+        	pcmpgtw	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtd	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpgtd	69, %mm3
+        	pcmpgtd	0x45,%mm3
+
+// CHECK: 	pcmpgtd	32493, %mm3
+        	pcmpgtd	0x7eed,%mm3
+
+// CHECK: 	pcmpgtd	3133065982, %mm3
+        	pcmpgtd	0xbabecafe,%mm3
+
+// CHECK: 	pcmpgtd	305419896, %mm3
+        	pcmpgtd	0x12345678,%mm3
+
+// CHECK: 	pcmpgtd	%mm3, %mm3
+        	pcmpgtd	%mm3,%mm3
+
+// CHECK: 	pcmpgtd	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtd	69, %xmm5
+        	pcmpgtd	0x45,%xmm5
+
+// CHECK: 	pcmpgtd	32493, %xmm5
+        	pcmpgtd	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtd	3133065982, %xmm5
+        	pcmpgtd	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtd	305419896, %xmm5
+        	pcmpgtd	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtd	%xmm5, %xmm5
+        	pcmpgtd	%xmm5,%xmm5
+
+// CHECK: 	pmaddwd	3735928559(%ebx,%ecx,8), %mm3
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaddwd	69, %mm3
+        	pmaddwd	0x45,%mm3
+
+// CHECK: 	pmaddwd	32493, %mm3
+        	pmaddwd	0x7eed,%mm3
+
+// CHECK: 	pmaddwd	3133065982, %mm3
+        	pmaddwd	0xbabecafe,%mm3
+
+// CHECK: 	pmaddwd	305419896, %mm3
+        	pmaddwd	0x12345678,%mm3
+
+// CHECK: 	pmaddwd	%mm3, %mm3
+        	pmaddwd	%mm3,%mm3
+
+// CHECK: 	pmaddwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaddwd	69, %xmm5
+        	pmaddwd	0x45,%xmm5
+
+// CHECK: 	pmaddwd	32493, %xmm5
+        	pmaddwd	0x7eed,%xmm5
+
+// CHECK: 	pmaddwd	3133065982, %xmm5
+        	pmaddwd	0xbabecafe,%xmm5
+
+// CHECK: 	pmaddwd	305419896, %xmm5
+        	pmaddwd	0x12345678,%xmm5
+
+// CHECK: 	pmaddwd	%xmm5, %xmm5
+        	pmaddwd	%xmm5,%xmm5
+
+// CHECK: 	pmulhw	3735928559(%ebx,%ecx,8), %mm3
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmulhw	69, %mm3
+        	pmulhw	0x45,%mm3
+
+// CHECK: 	pmulhw	32493, %mm3
+        	pmulhw	0x7eed,%mm3
+
+// CHECK: 	pmulhw	3133065982, %mm3
+        	pmulhw	0xbabecafe,%mm3
+
+// CHECK: 	pmulhw	305419896, %mm3
+        	pmulhw	0x12345678,%mm3
+
+// CHECK: 	pmulhw	%mm3, %mm3
+        	pmulhw	%mm3,%mm3
+
+// CHECK: 	pmulhw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulhw	69, %xmm5
+        	pmulhw	0x45,%xmm5
+
+// CHECK: 	pmulhw	32493, %xmm5
+        	pmulhw	0x7eed,%xmm5
+
+// CHECK: 	pmulhw	3133065982, %xmm5
+        	pmulhw	0xbabecafe,%xmm5
+
+// CHECK: 	pmulhw	305419896, %xmm5
+        	pmulhw	0x12345678,%xmm5
+
+// CHECK: 	pmulhw	%xmm5, %xmm5
+        	pmulhw	%xmm5,%xmm5
+
+// CHECK: 	pmullw	3735928559(%ebx,%ecx,8), %mm3
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmullw	69, %mm3
+        	pmullw	0x45,%mm3
+
+// CHECK: 	pmullw	32493, %mm3
+        	pmullw	0x7eed,%mm3
+
+// CHECK: 	pmullw	3133065982, %mm3
+        	pmullw	0xbabecafe,%mm3
+
+// CHECK: 	pmullw	305419896, %mm3
+        	pmullw	0x12345678,%mm3
+
+// CHECK: 	pmullw	%mm3, %mm3
+        	pmullw	%mm3,%mm3
+
+// CHECK: 	pmullw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmullw	69, %xmm5
+        	pmullw	0x45,%xmm5
+
+// CHECK: 	pmullw	32493, %xmm5
+        	pmullw	0x7eed,%xmm5
+
+// CHECK: 	pmullw	3133065982, %xmm5
+        	pmullw	0xbabecafe,%xmm5
+
+// CHECK: 	pmullw	305419896, %xmm5
+        	pmullw	0x12345678,%xmm5
+
+// CHECK: 	pmullw	%xmm5, %xmm5
+        	pmullw	%xmm5,%xmm5
+
+// CHECK: 	por	3735928559(%ebx,%ecx,8), %mm3
+        	por	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	por	69, %mm3
+        	por	0x45,%mm3
+
+// CHECK: 	por	32493, %mm3
+        	por	0x7eed,%mm3
+
+// CHECK: 	por	3133065982, %mm3
+        	por	0xbabecafe,%mm3
+
+// CHECK: 	por	305419896, %mm3
+        	por	0x12345678,%mm3
+
+// CHECK: 	por	%mm3, %mm3
+        	por	%mm3,%mm3
+
+// CHECK: 	por	3735928559(%ebx,%ecx,8), %xmm5
+        	por	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	por	69, %xmm5
+        	por	0x45,%xmm5
+
+// CHECK: 	por	32493, %xmm5
+        	por	0x7eed,%xmm5
+
+// CHECK: 	por	3133065982, %xmm5
+        	por	0xbabecafe,%xmm5
+
+// CHECK: 	por	305419896, %xmm5
+        	por	0x12345678,%xmm5
+
+// CHECK: 	por	%xmm5, %xmm5
+        	por	%xmm5,%xmm5
+
+// CHECK: 	psllw	3735928559(%ebx,%ecx,8), %mm3
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psllw	69, %mm3
+        	psllw	0x45,%mm3
+
+// CHECK: 	psllw	32493, %mm3
+        	psllw	0x7eed,%mm3
+
+// CHECK: 	psllw	3133065982, %mm3
+        	psllw	0xbabecafe,%mm3
+
+// CHECK: 	psllw	305419896, %mm3
+        	psllw	0x12345678,%mm3
+
+// CHECK: 	psllw	%mm3, %mm3
+        	psllw	%mm3,%mm3
+
+// CHECK: 	psllw	3735928559(%ebx,%ecx,8), %xmm5
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psllw	69, %xmm5
+        	psllw	0x45,%xmm5
+
+// CHECK: 	psllw	32493, %xmm5
+        	psllw	0x7eed,%xmm5
+
+// CHECK: 	psllw	3133065982, %xmm5
+        	psllw	0xbabecafe,%xmm5
+
+// CHECK: 	psllw	305419896, %xmm5
+        	psllw	0x12345678,%xmm5
+
+// CHECK: 	psllw	%xmm5, %xmm5
+        	psllw	%xmm5,%xmm5
+
+// CHECK: 	psllw	$127, %mm3
+        	psllw	$0x7f,%mm3
+
+// CHECK: 	psllw	$127, %xmm5
+        	psllw	$0x7f,%xmm5
+
+// CHECK: 	pslld	3735928559(%ebx,%ecx,8), %mm3
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pslld	69, %mm3
+        	pslld	0x45,%mm3
+
+// CHECK: 	pslld	32493, %mm3
+        	pslld	0x7eed,%mm3
+
+// CHECK: 	pslld	3133065982, %mm3
+        	pslld	0xbabecafe,%mm3
+
+// CHECK: 	pslld	305419896, %mm3
+        	pslld	0x12345678,%mm3
+
+// CHECK: 	pslld	%mm3, %mm3
+        	pslld	%mm3,%mm3
+
+// CHECK: 	pslld	3735928559(%ebx,%ecx,8), %xmm5
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pslld	69, %xmm5
+        	pslld	0x45,%xmm5
+
+// CHECK: 	pslld	32493, %xmm5
+        	pslld	0x7eed,%xmm5
+
+// CHECK: 	pslld	3133065982, %xmm5
+        	pslld	0xbabecafe,%xmm5
+
+// CHECK: 	pslld	305419896, %xmm5
+        	pslld	0x12345678,%xmm5
+
+// CHECK: 	pslld	%xmm5, %xmm5
+        	pslld	%xmm5,%xmm5
+
+// CHECK: 	pslld	$127, %mm3
+        	pslld	$0x7f,%mm3
+
+// CHECK: 	pslld	$127, %xmm5
+        	pslld	$0x7f,%xmm5
+
+// CHECK: 	psllq	3735928559(%ebx,%ecx,8), %mm3
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psllq	69, %mm3
+        	psllq	0x45,%mm3
+
+// CHECK: 	psllq	32493, %mm3
+        	psllq	0x7eed,%mm3
+
+// CHECK: 	psllq	3133065982, %mm3
+        	psllq	0xbabecafe,%mm3
+
+// CHECK: 	psllq	305419896, %mm3
+        	psllq	0x12345678,%mm3
+
+// CHECK: 	psllq	%mm3, %mm3
+        	psllq	%mm3,%mm3
+
+// CHECK: 	psllq	3735928559(%ebx,%ecx,8), %xmm5
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psllq	69, %xmm5
+        	psllq	0x45,%xmm5
+
+// CHECK: 	psllq	32493, %xmm5
+        	psllq	0x7eed,%xmm5
+
+// CHECK: 	psllq	3133065982, %xmm5
+        	psllq	0xbabecafe,%xmm5
+
+// CHECK: 	psllq	305419896, %xmm5
+        	psllq	0x12345678,%xmm5
+
+// CHECK: 	psllq	%xmm5, %xmm5
+        	psllq	%xmm5,%xmm5
+
+// CHECK: 	psllq	$127, %mm3
+        	psllq	$0x7f,%mm3
+
+// CHECK: 	psllq	$127, %xmm5
+        	psllq	$0x7f,%xmm5
+
+// CHECK: 	psraw	3735928559(%ebx,%ecx,8), %mm3
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psraw	69, %mm3
+        	psraw	0x45,%mm3
+
+// CHECK: 	psraw	32493, %mm3
+        	psraw	0x7eed,%mm3
+
+// CHECK: 	psraw	3133065982, %mm3
+        	psraw	0xbabecafe,%mm3
+
+// CHECK: 	psraw	305419896, %mm3
+        	psraw	0x12345678,%mm3
+
+// CHECK: 	psraw	%mm3, %mm3
+        	psraw	%mm3,%mm3
+
+// CHECK: 	psraw	3735928559(%ebx,%ecx,8), %xmm5
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psraw	69, %xmm5
+        	psraw	0x45,%xmm5
+
+// CHECK: 	psraw	32493, %xmm5
+        	psraw	0x7eed,%xmm5
+
+// CHECK: 	psraw	3133065982, %xmm5
+        	psraw	0xbabecafe,%xmm5
+
+// CHECK: 	psraw	305419896, %xmm5
+        	psraw	0x12345678,%xmm5
+
+// CHECK: 	psraw	%xmm5, %xmm5
+        	psraw	%xmm5,%xmm5
+
+// CHECK: 	psraw	$127, %mm3
+        	psraw	$0x7f,%mm3
+
+// CHECK: 	psraw	$127, %xmm5
+        	psraw	$0x7f,%xmm5
+
+// CHECK: 	psrad	3735928559(%ebx,%ecx,8), %mm3
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrad	69, %mm3
+        	psrad	0x45,%mm3
+
+// CHECK: 	psrad	32493, %mm3
+        	psrad	0x7eed,%mm3
+
+// CHECK: 	psrad	3133065982, %mm3
+        	psrad	0xbabecafe,%mm3
+
+// CHECK: 	psrad	305419896, %mm3
+        	psrad	0x12345678,%mm3
+
+// CHECK: 	psrad	%mm3, %mm3
+        	psrad	%mm3,%mm3
+
+// CHECK: 	psrad	3735928559(%ebx,%ecx,8), %xmm5
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrad	69, %xmm5
+        	psrad	0x45,%xmm5
+
+// CHECK: 	psrad	32493, %xmm5
+        	psrad	0x7eed,%xmm5
+
+// CHECK: 	psrad	3133065982, %xmm5
+        	psrad	0xbabecafe,%xmm5
+
+// CHECK: 	psrad	305419896, %xmm5
+        	psrad	0x12345678,%xmm5
+
+// CHECK: 	psrad	%xmm5, %xmm5
+        	psrad	%xmm5,%xmm5
+
+// CHECK: 	psrad	$127, %mm3
+        	psrad	$0x7f,%mm3
+
+// CHECK: 	psrad	$127, %xmm5
+        	psrad	$0x7f,%xmm5
+
+// CHECK: 	psrlw	3735928559(%ebx,%ecx,8), %mm3
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrlw	69, %mm3
+        	psrlw	0x45,%mm3
+
+// CHECK: 	psrlw	32493, %mm3
+        	psrlw	0x7eed,%mm3
+
+// CHECK: 	psrlw	3133065982, %mm3
+        	psrlw	0xbabecafe,%mm3
+
+// CHECK: 	psrlw	305419896, %mm3
+        	psrlw	0x12345678,%mm3
+
+// CHECK: 	psrlw	%mm3, %mm3
+        	psrlw	%mm3,%mm3
+
+// CHECK: 	psrlw	3735928559(%ebx,%ecx,8), %xmm5
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrlw	69, %xmm5
+        	psrlw	0x45,%xmm5
+
+// CHECK: 	psrlw	32493, %xmm5
+        	psrlw	0x7eed,%xmm5
+
+// CHECK: 	psrlw	3133065982, %xmm5
+        	psrlw	0xbabecafe,%xmm5
+
+// CHECK: 	psrlw	305419896, %xmm5
+        	psrlw	0x12345678,%xmm5
+
+// CHECK: 	psrlw	%xmm5, %xmm5
+        	psrlw	%xmm5,%xmm5
+
+// CHECK: 	psrlw	$127, %mm3
+        	psrlw	$0x7f,%mm3
+
+// CHECK: 	psrlw	$127, %xmm5
+        	psrlw	$0x7f,%xmm5
+
+// CHECK: 	psrld	3735928559(%ebx,%ecx,8), %mm3
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrld	69, %mm3
+        	psrld	0x45,%mm3
+
+// CHECK: 	psrld	32493, %mm3
+        	psrld	0x7eed,%mm3
+
+// CHECK: 	psrld	3133065982, %mm3
+        	psrld	0xbabecafe,%mm3
+
+// CHECK: 	psrld	305419896, %mm3
+        	psrld	0x12345678,%mm3
+
+// CHECK: 	psrld	%mm3, %mm3
+        	psrld	%mm3,%mm3
+
+// CHECK: 	psrld	3735928559(%ebx,%ecx,8), %xmm5
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrld	69, %xmm5
+        	psrld	0x45,%xmm5
+
+// CHECK: 	psrld	32493, %xmm5
+        	psrld	0x7eed,%xmm5
+
+// CHECK: 	psrld	3133065982, %xmm5
+        	psrld	0xbabecafe,%xmm5
+
+// CHECK: 	psrld	305419896, %xmm5
+        	psrld	0x12345678,%xmm5
+
+// CHECK: 	psrld	%xmm5, %xmm5
+        	psrld	%xmm5,%xmm5
+
+// CHECK: 	psrld	$127, %mm3
+        	psrld	$0x7f,%mm3
+
+// CHECK: 	psrld	$127, %xmm5
+        	psrld	$0x7f,%xmm5
+
+// CHECK: 	psrlq	3735928559(%ebx,%ecx,8), %mm3
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrlq	69, %mm3
+        	psrlq	0x45,%mm3
+
+// CHECK: 	psrlq	32493, %mm3
+        	psrlq	0x7eed,%mm3
+
+// CHECK: 	psrlq	3133065982, %mm3
+        	psrlq	0xbabecafe,%mm3
+
+// CHECK: 	psrlq	305419896, %mm3
+        	psrlq	0x12345678,%mm3
+
+// CHECK: 	psrlq	%mm3, %mm3
+        	psrlq	%mm3,%mm3
+
+// CHECK: 	psrlq	3735928559(%ebx,%ecx,8), %xmm5
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrlq	69, %xmm5
+        	psrlq	0x45,%xmm5
+
+// CHECK: 	psrlq	32493, %xmm5
+        	psrlq	0x7eed,%xmm5
+
+// CHECK: 	psrlq	3133065982, %xmm5
+        	psrlq	0xbabecafe,%xmm5
+
+// CHECK: 	psrlq	305419896, %xmm5
+        	psrlq	0x12345678,%xmm5
+
+// CHECK: 	psrlq	%xmm5, %xmm5
+        	psrlq	%xmm5,%xmm5
+
+// CHECK: 	psrlq	$127, %mm3
+        	psrlq	$0x7f,%mm3
+
+// CHECK: 	psrlq	$127, %xmm5
+        	psrlq	$0x7f,%xmm5
+
+// CHECK: 	psubb	3735928559(%ebx,%ecx,8), %mm3
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubb	69, %mm3
+        	psubb	0x45,%mm3
+
+// CHECK: 	psubb	32493, %mm3
+        	psubb	0x7eed,%mm3
+
+// CHECK: 	psubb	3133065982, %mm3
+        	psubb	0xbabecafe,%mm3
+
+// CHECK: 	psubb	305419896, %mm3
+        	psubb	0x12345678,%mm3
+
+// CHECK: 	psubb	%mm3, %mm3
+        	psubb	%mm3,%mm3
+
+// CHECK: 	psubb	3735928559(%ebx,%ecx,8), %xmm5
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubb	69, %xmm5
+        	psubb	0x45,%xmm5
+
+// CHECK: 	psubb	32493, %xmm5
+        	psubb	0x7eed,%xmm5
+
+// CHECK: 	psubb	3133065982, %xmm5
+        	psubb	0xbabecafe,%xmm5
+
+// CHECK: 	psubb	305419896, %xmm5
+        	psubb	0x12345678,%xmm5
+
+// CHECK: 	psubb	%xmm5, %xmm5
+        	psubb	%xmm5,%xmm5
+
+// CHECK: 	psubw	3735928559(%ebx,%ecx,8), %mm3
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubw	69, %mm3
+        	psubw	0x45,%mm3
+
+// CHECK: 	psubw	32493, %mm3
+        	psubw	0x7eed,%mm3
+
+// CHECK: 	psubw	3133065982, %mm3
+        	psubw	0xbabecafe,%mm3
+
+// CHECK: 	psubw	305419896, %mm3
+        	psubw	0x12345678,%mm3
+
+// CHECK: 	psubw	%mm3, %mm3
+        	psubw	%mm3,%mm3
+
+// CHECK: 	psubw	3735928559(%ebx,%ecx,8), %xmm5
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubw	69, %xmm5
+        	psubw	0x45,%xmm5
+
+// CHECK: 	psubw	32493, %xmm5
+        	psubw	0x7eed,%xmm5
+
+// CHECK: 	psubw	3133065982, %xmm5
+        	psubw	0xbabecafe,%xmm5
+
+// CHECK: 	psubw	305419896, %xmm5
+        	psubw	0x12345678,%xmm5
+
+// CHECK: 	psubw	%xmm5, %xmm5
+        	psubw	%xmm5,%xmm5
+
+// CHECK: 	psubd	3735928559(%ebx,%ecx,8), %mm3
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubd	69, %mm3
+        	psubd	0x45,%mm3
+
+// CHECK: 	psubd	32493, %mm3
+        	psubd	0x7eed,%mm3
+
+// CHECK: 	psubd	3133065982, %mm3
+        	psubd	0xbabecafe,%mm3
+
+// CHECK: 	psubd	305419896, %mm3
+        	psubd	0x12345678,%mm3
+
+// CHECK: 	psubd	%mm3, %mm3
+        	psubd	%mm3,%mm3
+
+// CHECK: 	psubd	3735928559(%ebx,%ecx,8), %xmm5
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubd	69, %xmm5
+        	psubd	0x45,%xmm5
+
+// CHECK: 	psubd	32493, %xmm5
+        	psubd	0x7eed,%xmm5
+
+// CHECK: 	psubd	3133065982, %xmm5
+        	psubd	0xbabecafe,%xmm5
+
+// CHECK: 	psubd	305419896, %xmm5
+        	psubd	0x12345678,%xmm5
+
+// CHECK: 	psubd	%xmm5, %xmm5
+        	psubd	%xmm5,%xmm5
+
+// CHECK: 	psubq	3735928559(%ebx,%ecx,8), %mm3
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubq	69, %mm3
+        	psubq	0x45,%mm3
+
+// CHECK: 	psubq	32493, %mm3
+        	psubq	0x7eed,%mm3
+
+// CHECK: 	psubq	3133065982, %mm3
+        	psubq	0xbabecafe,%mm3
+
+// CHECK: 	psubq	305419896, %mm3
+        	psubq	0x12345678,%mm3
+
+// CHECK: 	psubq	%mm3, %mm3
+        	psubq	%mm3,%mm3
+
+// CHECK: 	psubq	3735928559(%ebx,%ecx,8), %xmm5
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubq	69, %xmm5
+        	psubq	0x45,%xmm5
+
+// CHECK: 	psubq	32493, %xmm5
+        	psubq	0x7eed,%xmm5
+
+// CHECK: 	psubq	3133065982, %xmm5
+        	psubq	0xbabecafe,%xmm5
+
+// CHECK: 	psubq	305419896, %xmm5
+        	psubq	0x12345678,%xmm5
+
+// CHECK: 	psubq	%xmm5, %xmm5
+        	psubq	%xmm5,%xmm5
+
+// CHECK: 	psubsb	3735928559(%ebx,%ecx,8), %mm3
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubsb	69, %mm3
+        	psubsb	0x45,%mm3
+
+// CHECK: 	psubsb	32493, %mm3
+        	psubsb	0x7eed,%mm3
+
+// CHECK: 	psubsb	3133065982, %mm3
+        	psubsb	0xbabecafe,%mm3
+
+// CHECK: 	psubsb	305419896, %mm3
+        	psubsb	0x12345678,%mm3
+
+// CHECK: 	psubsb	%mm3, %mm3
+        	psubsb	%mm3,%mm3
+
+// CHECK: 	psubsb	3735928559(%ebx,%ecx,8), %xmm5
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubsb	69, %xmm5
+        	psubsb	0x45,%xmm5
+
+// CHECK: 	psubsb	32493, %xmm5
+        	psubsb	0x7eed,%xmm5
+
+// CHECK: 	psubsb	3133065982, %xmm5
+        	psubsb	0xbabecafe,%xmm5
+
+// CHECK: 	psubsb	305419896, %xmm5
+        	psubsb	0x12345678,%xmm5
+
+// CHECK: 	psubsb	%xmm5, %xmm5
+        	psubsb	%xmm5,%xmm5
+
+// CHECK: 	psubsw	3735928559(%ebx,%ecx,8), %mm3
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubsw	69, %mm3
+        	psubsw	0x45,%mm3
+
+// CHECK: 	psubsw	32493, %mm3
+        	psubsw	0x7eed,%mm3
+
+// CHECK: 	psubsw	3133065982, %mm3
+        	psubsw	0xbabecafe,%mm3
+
+// CHECK: 	psubsw	305419896, %mm3
+        	psubsw	0x12345678,%mm3
+
+// CHECK: 	psubsw	%mm3, %mm3
+        	psubsw	%mm3,%mm3
+
+// CHECK: 	psubsw	3735928559(%ebx,%ecx,8), %xmm5
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubsw	69, %xmm5
+        	psubsw	0x45,%xmm5
+
+// CHECK: 	psubsw	32493, %xmm5
+        	psubsw	0x7eed,%xmm5
+
+// CHECK: 	psubsw	3133065982, %xmm5
+        	psubsw	0xbabecafe,%xmm5
+
+// CHECK: 	psubsw	305419896, %xmm5
+        	psubsw	0x12345678,%xmm5
+
+// CHECK: 	psubsw	%xmm5, %xmm5
+        	psubsw	%xmm5,%xmm5
+
+// CHECK: 	psubusb	3735928559(%ebx,%ecx,8), %mm3
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubusb	69, %mm3
+        	psubusb	0x45,%mm3
+
+// CHECK: 	psubusb	32493, %mm3
+        	psubusb	0x7eed,%mm3
+
+// CHECK: 	psubusb	3133065982, %mm3
+        	psubusb	0xbabecafe,%mm3
+
+// CHECK: 	psubusb	305419896, %mm3
+        	psubusb	0x12345678,%mm3
+
+// CHECK: 	psubusb	%mm3, %mm3
+        	psubusb	%mm3,%mm3
+
+// CHECK: 	psubusb	3735928559(%ebx,%ecx,8), %xmm5
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubusb	69, %xmm5
+        	psubusb	0x45,%xmm5
+
+// CHECK: 	psubusb	32493, %xmm5
+        	psubusb	0x7eed,%xmm5
+
+// CHECK: 	psubusb	3133065982, %xmm5
+        	psubusb	0xbabecafe,%xmm5
+
+// CHECK: 	psubusb	305419896, %xmm5
+        	psubusb	0x12345678,%xmm5
+
+// CHECK: 	psubusb	%xmm5, %xmm5
+        	psubusb	%xmm5,%xmm5
+
+// CHECK: 	psubusw	3735928559(%ebx,%ecx,8), %mm3
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubusw	69, %mm3
+        	psubusw	0x45,%mm3
+
+// CHECK: 	psubusw	32493, %mm3
+        	psubusw	0x7eed,%mm3
+
+// CHECK: 	psubusw	3133065982, %mm3
+        	psubusw	0xbabecafe,%mm3
+
+// CHECK: 	psubusw	305419896, %mm3
+        	psubusw	0x12345678,%mm3
+
+// CHECK: 	psubusw	%mm3, %mm3
+        	psubusw	%mm3,%mm3
+
+// CHECK: 	psubusw	3735928559(%ebx,%ecx,8), %xmm5
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubusw	69, %xmm5
+        	psubusw	0x45,%xmm5
+
+// CHECK: 	psubusw	32493, %xmm5
+        	psubusw	0x7eed,%xmm5
+
+// CHECK: 	psubusw	3133065982, %xmm5
+        	psubusw	0xbabecafe,%xmm5
+
+// CHECK: 	psubusw	305419896, %xmm5
+        	psubusw	0x12345678,%xmm5
+
+// CHECK: 	psubusw	%xmm5, %xmm5
+        	psubusw	%xmm5,%xmm5
+
+// CHECK: 	punpckhbw	3735928559(%ebx,%ecx,8), %mm3
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckhbw	69, %mm3
+        	punpckhbw	0x45,%mm3
+
+// CHECK: 	punpckhbw	32493, %mm3
+        	punpckhbw	0x7eed,%mm3
+
+// CHECK: 	punpckhbw	3133065982, %mm3
+        	punpckhbw	0xbabecafe,%mm3
+
+// CHECK: 	punpckhbw	305419896, %mm3
+        	punpckhbw	0x12345678,%mm3
+
+// CHECK: 	punpckhbw	%mm3, %mm3
+        	punpckhbw	%mm3,%mm3
+
+// CHECK: 	punpckhbw	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhbw	69, %xmm5
+        	punpckhbw	0x45,%xmm5
+
+// CHECK: 	punpckhbw	32493, %xmm5
+        	punpckhbw	0x7eed,%xmm5
+
+// CHECK: 	punpckhbw	3133065982, %xmm5
+        	punpckhbw	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhbw	305419896, %xmm5
+        	punpckhbw	0x12345678,%xmm5
+
+// CHECK: 	punpckhbw	%xmm5, %xmm5
+        	punpckhbw	%xmm5,%xmm5
+
+// CHECK: 	punpckhwd	3735928559(%ebx,%ecx,8), %mm3
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckhwd	69, %mm3
+        	punpckhwd	0x45,%mm3
+
+// CHECK: 	punpckhwd	32493, %mm3
+        	punpckhwd	0x7eed,%mm3
+
+// CHECK: 	punpckhwd	3133065982, %mm3
+        	punpckhwd	0xbabecafe,%mm3
+
+// CHECK: 	punpckhwd	305419896, %mm3
+        	punpckhwd	0x12345678,%mm3
+
+// CHECK: 	punpckhwd	%mm3, %mm3
+        	punpckhwd	%mm3,%mm3
+
+// CHECK: 	punpckhwd	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhwd	69, %xmm5
+        	punpckhwd	0x45,%xmm5
+
+// CHECK: 	punpckhwd	32493, %xmm5
+        	punpckhwd	0x7eed,%xmm5
+
+// CHECK: 	punpckhwd	3133065982, %xmm5
+        	punpckhwd	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhwd	305419896, %xmm5
+        	punpckhwd	0x12345678,%xmm5
+
+// CHECK: 	punpckhwd	%xmm5, %xmm5
+        	punpckhwd	%xmm5,%xmm5
+
+// CHECK: 	punpckhdq	3735928559(%ebx,%ecx,8), %mm3
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckhdq	69, %mm3
+        	punpckhdq	0x45,%mm3
+
+// CHECK: 	punpckhdq	32493, %mm3
+        	punpckhdq	0x7eed,%mm3
+
+// CHECK: 	punpckhdq	3133065982, %mm3
+        	punpckhdq	0xbabecafe,%mm3
+
+// CHECK: 	punpckhdq	305419896, %mm3
+        	punpckhdq	0x12345678,%mm3
+
+// CHECK: 	punpckhdq	%mm3, %mm3
+        	punpckhdq	%mm3,%mm3
+
+// CHECK: 	punpckhdq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhdq	69, %xmm5
+        	punpckhdq	0x45,%xmm5
+
+// CHECK: 	punpckhdq	32493, %xmm5
+        	punpckhdq	0x7eed,%xmm5
+
+// CHECK: 	punpckhdq	3133065982, %xmm5
+        	punpckhdq	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhdq	305419896, %xmm5
+        	punpckhdq	0x12345678,%xmm5
+
+// CHECK: 	punpckhdq	%xmm5, %xmm5
+        	punpckhdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklbw	3735928559(%ebx,%ecx,8), %mm3
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpcklbw	69, %mm3
+        	punpcklbw	0x45,%mm3
+
+// CHECK: 	punpcklbw	32493, %mm3
+        	punpcklbw	0x7eed,%mm3
+
+// CHECK: 	punpcklbw	3133065982, %mm3
+        	punpcklbw	0xbabecafe,%mm3
+
+// CHECK: 	punpcklbw	305419896, %mm3
+        	punpcklbw	0x12345678,%mm3
+
+// CHECK: 	punpcklbw	%mm3, %mm3
+        	punpcklbw	%mm3,%mm3
+
+// CHECK: 	punpcklbw	3735928559(%ebx,%ecx,8), %xmm5
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpcklbw	69, %xmm5
+        	punpcklbw	0x45,%xmm5
+
+// CHECK: 	punpcklbw	32493, %xmm5
+        	punpcklbw	0x7eed,%xmm5
+
+// CHECK: 	punpcklbw	3133065982, %xmm5
+        	punpcklbw	0xbabecafe,%xmm5
+
+// CHECK: 	punpcklbw	305419896, %xmm5
+        	punpcklbw	0x12345678,%xmm5
+
+// CHECK: 	punpcklbw	%xmm5, %xmm5
+        	punpcklbw	%xmm5,%xmm5
+
+// CHECK: 	punpcklwd	3735928559(%ebx,%ecx,8), %mm3
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpcklwd	69, %mm3
+        	punpcklwd	0x45,%mm3
+
+// CHECK: 	punpcklwd	32493, %mm3
+        	punpcklwd	0x7eed,%mm3
+
+// CHECK: 	punpcklwd	3133065982, %mm3
+        	punpcklwd	0xbabecafe,%mm3
+
+// CHECK: 	punpcklwd	305419896, %mm3
+        	punpcklwd	0x12345678,%mm3
+
+// CHECK: 	punpcklwd	%mm3, %mm3
+        	punpcklwd	%mm3,%mm3
+
+// CHECK: 	punpcklwd	3735928559(%ebx,%ecx,8), %xmm5
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpcklwd	69, %xmm5
+        	punpcklwd	0x45,%xmm5
+
+// CHECK: 	punpcklwd	32493, %xmm5
+        	punpcklwd	0x7eed,%xmm5
+
+// CHECK: 	punpcklwd	3133065982, %xmm5
+        	punpcklwd	0xbabecafe,%xmm5
+
+// CHECK: 	punpcklwd	305419896, %xmm5
+        	punpcklwd	0x12345678,%xmm5
+
+// CHECK: 	punpcklwd	%xmm5, %xmm5
+        	punpcklwd	%xmm5,%xmm5
+
+// CHECK: 	punpckldq	3735928559(%ebx,%ecx,8), %mm3
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckldq	69, %mm3
+        	punpckldq	0x45,%mm3
+
+// CHECK: 	punpckldq	32493, %mm3
+        	punpckldq	0x7eed,%mm3
+
+// CHECK: 	punpckldq	3133065982, %mm3
+        	punpckldq	0xbabecafe,%mm3
+
+// CHECK: 	punpckldq	305419896, %mm3
+        	punpckldq	0x12345678,%mm3
+
+// CHECK: 	punpckldq	%mm3, %mm3
+        	punpckldq	%mm3,%mm3
+
+// CHECK: 	punpckldq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckldq	69, %xmm5
+        	punpckldq	0x45,%xmm5
+
+// CHECK: 	punpckldq	32493, %xmm5
+        	punpckldq	0x7eed,%xmm5
+
+// CHECK: 	punpckldq	3133065982, %xmm5
+        	punpckldq	0xbabecafe,%xmm5
+
+// CHECK: 	punpckldq	305419896, %xmm5
+        	punpckldq	0x12345678,%xmm5
+
+// CHECK: 	punpckldq	%xmm5, %xmm5
+        	punpckldq	%xmm5,%xmm5
+
+// CHECK: 	pxor	3735928559(%ebx,%ecx,8), %mm3
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pxor	69, %mm3
+        	pxor	0x45,%mm3
+
+// CHECK: 	pxor	32493, %mm3
+        	pxor	0x7eed,%mm3
+
+// CHECK: 	pxor	3133065982, %mm3
+        	pxor	0xbabecafe,%mm3
+
+// CHECK: 	pxor	305419896, %mm3
+        	pxor	0x12345678,%mm3
+
+// CHECK: 	pxor	%mm3, %mm3
+        	pxor	%mm3,%mm3
+
+// CHECK: 	pxor	3735928559(%ebx,%ecx,8), %xmm5
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pxor	69, %xmm5
+        	pxor	0x45,%xmm5
+
+// CHECK: 	pxor	32493, %xmm5
+        	pxor	0x7eed,%xmm5
+
+// CHECK: 	pxor	3133065982, %xmm5
+        	pxor	0xbabecafe,%xmm5
+
+// CHECK: 	pxor	305419896, %xmm5
+        	pxor	0x12345678,%xmm5
+
+// CHECK: 	pxor	%xmm5, %xmm5
+        	pxor	%xmm5,%xmm5
+
+// CHECK: 	addps	3735928559(%ebx,%ecx,8), %xmm5
+        	addps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addps	69, %xmm5
+        	addps	0x45,%xmm5
+
+// CHECK: 	addps	32493, %xmm5
+        	addps	0x7eed,%xmm5
+
+// CHECK: 	addps	3133065982, %xmm5
+        	addps	0xbabecafe,%xmm5
+
+// CHECK: 	addps	305419896, %xmm5
+        	addps	0x12345678,%xmm5
+
+// CHECK: 	addps	%xmm5, %xmm5
+        	addps	%xmm5,%xmm5
+
+// CHECK: 	addss	3735928559(%ebx,%ecx,8), %xmm5
+        	addss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addss	69, %xmm5
+        	addss	0x45,%xmm5
+
+// CHECK: 	addss	32493, %xmm5
+        	addss	0x7eed,%xmm5
+
+// CHECK: 	addss	3133065982, %xmm5
+        	addss	0xbabecafe,%xmm5
+
+// CHECK: 	addss	305419896, %xmm5
+        	addss	0x12345678,%xmm5
+
+// CHECK: 	addss	%xmm5, %xmm5
+        	addss	%xmm5,%xmm5
+
+// CHECK: 	andnps	3735928559(%ebx,%ecx,8), %xmm5
+        	andnps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andnps	69, %xmm5
+        	andnps	0x45,%xmm5
+
+// CHECK: 	andnps	32493, %xmm5
+        	andnps	0x7eed,%xmm5
+
+// CHECK: 	andnps	3133065982, %xmm5
+        	andnps	0xbabecafe,%xmm5
+
+// CHECK: 	andnps	305419896, %xmm5
+        	andnps	0x12345678,%xmm5
+
+// CHECK: 	andnps	%xmm5, %xmm5
+        	andnps	%xmm5,%xmm5
+
+// CHECK: 	andps	3735928559(%ebx,%ecx,8), %xmm5
+        	andps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andps	69, %xmm5
+        	andps	0x45,%xmm5
+
+// CHECK: 	andps	32493, %xmm5
+        	andps	0x7eed,%xmm5
+
+// CHECK: 	andps	3133065982, %xmm5
+        	andps	0xbabecafe,%xmm5
+
+// CHECK: 	andps	305419896, %xmm5
+        	andps	0x12345678,%xmm5
+
+// CHECK: 	andps	%xmm5, %xmm5
+        	andps	%xmm5,%xmm5
+
+// CHECK: 	comiss	3735928559(%ebx,%ecx,8), %xmm5
+        	comiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	comiss	69, %xmm5
+        	comiss	0x45,%xmm5
+
+// CHECK: 	comiss	32493, %xmm5
+        	comiss	0x7eed,%xmm5
+
+// CHECK: 	comiss	3133065982, %xmm5
+        	comiss	0xbabecafe,%xmm5
+
+// CHECK: 	comiss	305419896, %xmm5
+        	comiss	0x12345678,%xmm5
+
+// CHECK: 	comiss	%xmm5, %xmm5
+        	comiss	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2ps	69, %xmm5
+        	cvtpi2ps	0x45,%xmm5
+
+// CHECK: 	cvtpi2ps	32493, %xmm5
+        	cvtpi2ps	0x7eed,%xmm5
+
+// CHECK: 	cvtpi2ps	3133065982, %xmm5
+        	cvtpi2ps	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpi2ps	305419896, %xmm5
+        	cvtpi2ps	0x12345678,%xmm5
+
+// CHECK: 	cvtpi2ps	%mm3, %xmm5
+        	cvtpi2ps	%mm3,%xmm5
+
+// CHECK: 	cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtps2pi	69, %mm3
+        	cvtps2pi	0x45,%mm3
+
+// CHECK: 	cvtps2pi	32493, %mm3
+        	cvtps2pi	0x7eed,%mm3
+
+// CHECK: 	cvtps2pi	3133065982, %mm3
+        	cvtps2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvtps2pi	305419896, %mm3
+        	cvtps2pi	0x12345678,%mm3
+
+// CHECK: 	cvtps2pi	%xmm5, %mm3
+        	cvtps2pi	%xmm5,%mm3
+
+// CHECK: 	cvtsi2ss	%ecx, %xmm5
+        	cvtsi2ss	%ecx,%xmm5
+
+// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsi2ss	69, %xmm5
+        	cvtsi2ss	0x45,%xmm5
+
+// CHECK: 	cvtsi2ss	32493, %xmm5
+        	cvtsi2ss	0x7eed,%xmm5
+
+// CHECK: 	cvtsi2ss	3133065982, %xmm5
+        	cvtsi2ss	0xbabecafe,%xmm5
+
+// CHECK: 	cvtsi2ss	305419896, %xmm5
+        	cvtsi2ss	0x12345678,%xmm5
+
+// CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttps2pi	69, %mm3
+        	cvttps2pi	0x45,%mm3
+
+// CHECK: 	cvttps2pi	32493, %mm3
+        	cvttps2pi	0x7eed,%mm3
+
+// CHECK: 	cvttps2pi	3133065982, %mm3
+        	cvttps2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvttps2pi	305419896, %mm3
+        	cvttps2pi	0x12345678,%mm3
+
+// CHECK: 	cvttps2pi	%xmm5, %mm3
+        	cvttps2pi	%xmm5,%mm3
+
+// CHECK: 	cvttss2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttss2si	69, %ecx
+        	cvttss2si	0x45,%ecx
+
+// CHECK: 	cvttss2si	32493, %ecx
+        	cvttss2si	0x7eed,%ecx
+
+// CHECK: 	cvttss2si	3133065982, %ecx
+        	cvttss2si	0xbabecafe,%ecx
+
+// CHECK: 	cvttss2si	305419896, %ecx
+        	cvttss2si	0x12345678,%ecx
+
+// CHECK: 	cvttss2si	%xmm5, %ecx
+        	cvttss2si	%xmm5,%ecx
+
+// CHECK: 	divps	3735928559(%ebx,%ecx,8), %xmm5
+        	divps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divps	69, %xmm5
+        	divps	0x45,%xmm5
+
+// CHECK: 	divps	32493, %xmm5
+        	divps	0x7eed,%xmm5
+
+// CHECK: 	divps	3133065982, %xmm5
+        	divps	0xbabecafe,%xmm5
+
+// CHECK: 	divps	305419896, %xmm5
+        	divps	0x12345678,%xmm5
+
+// CHECK: 	divps	%xmm5, %xmm5
+        	divps	%xmm5,%xmm5
+
+// CHECK: 	divss	3735928559(%ebx,%ecx,8), %xmm5
+        	divss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divss	69, %xmm5
+        	divss	0x45,%xmm5
+
+// CHECK: 	divss	32493, %xmm5
+        	divss	0x7eed,%xmm5
+
+// CHECK: 	divss	3133065982, %xmm5
+        	divss	0xbabecafe,%xmm5
+
+// CHECK: 	divss	305419896, %xmm5
+        	divss	0x12345678,%xmm5
+
+// CHECK: 	divss	%xmm5, %xmm5
+        	divss	%xmm5,%xmm5
+
+// CHECK: 	ldmxcsr	3735928559(%ebx,%ecx,8)
+        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ldmxcsr	32493
+        	ldmxcsr	0x7eed
+
+// CHECK: 	ldmxcsr	3133065982
+        	ldmxcsr	0xbabecafe
+
+// CHECK: 	ldmxcsr	305419896
+        	ldmxcsr	0x12345678
+
+// CHECK: 	maskmovq	%mm3, %mm3
+        	maskmovq	%mm3,%mm3
+
+// CHECK: 	maxps	3735928559(%ebx,%ecx,8), %xmm5
+        	maxps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxps	69, %xmm5
+        	maxps	0x45,%xmm5
+
+// CHECK: 	maxps	32493, %xmm5
+        	maxps	0x7eed,%xmm5
+
+// CHECK: 	maxps	3133065982, %xmm5
+        	maxps	0xbabecafe,%xmm5
+
+// CHECK: 	maxps	305419896, %xmm5
+        	maxps	0x12345678,%xmm5
+
+// CHECK: 	maxps	%xmm5, %xmm5
+        	maxps	%xmm5,%xmm5
+
+// CHECK: 	maxss	3735928559(%ebx,%ecx,8), %xmm5
+        	maxss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxss	69, %xmm5
+        	maxss	0x45,%xmm5
+
+// CHECK: 	maxss	32493, %xmm5
+        	maxss	0x7eed,%xmm5
+
+// CHECK: 	maxss	3133065982, %xmm5
+        	maxss	0xbabecafe,%xmm5
+
+// CHECK: 	maxss	305419896, %xmm5
+        	maxss	0x12345678,%xmm5
+
+// CHECK: 	maxss	%xmm5, %xmm5
+        	maxss	%xmm5,%xmm5
+
+// CHECK: 	minps	3735928559(%ebx,%ecx,8), %xmm5
+        	minps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minps	69, %xmm5
+        	minps	0x45,%xmm5
+
+// CHECK: 	minps	32493, %xmm5
+        	minps	0x7eed,%xmm5
+
+// CHECK: 	minps	3133065982, %xmm5
+        	minps	0xbabecafe,%xmm5
+
+// CHECK: 	minps	305419896, %xmm5
+        	minps	0x12345678,%xmm5
+
+// CHECK: 	minps	%xmm5, %xmm5
+        	minps	%xmm5,%xmm5
+
+// CHECK: 	minss	3735928559(%ebx,%ecx,8), %xmm5
+        	minss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minss	69, %xmm5
+        	minss	0x45,%xmm5
+
+// CHECK: 	minss	32493, %xmm5
+        	minss	0x7eed,%xmm5
+
+// CHECK: 	minss	3133065982, %xmm5
+        	minss	0xbabecafe,%xmm5
+
+// CHECK: 	minss	305419896, %xmm5
+        	minss	0x12345678,%xmm5
+
+// CHECK: 	minss	%xmm5, %xmm5
+        	minss	%xmm5,%xmm5
+
+// CHECK: 	movaps	3735928559(%ebx,%ecx,8), %xmm5
+        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movaps	69, %xmm5
+        	movaps	0x45,%xmm5
+
+// CHECK: 	movaps	32493, %xmm5
+        	movaps	0x7eed,%xmm5
+
+// CHECK: 	movaps	3133065982, %xmm5
+        	movaps	0xbabecafe,%xmm5
+
+// CHECK: 	movaps	305419896, %xmm5
+        	movaps	0x12345678,%xmm5
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movaps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movaps	%xmm5, 69
+        	movaps	%xmm5,0x45
+
+// CHECK: 	movaps	%xmm5, 32493
+        	movaps	%xmm5,0x7eed
+
+// CHECK: 	movaps	%xmm5, 3133065982
+        	movaps	%xmm5,0xbabecafe
+
+// CHECK: 	movaps	%xmm5, 305419896
+        	movaps	%xmm5,0x12345678
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movhlps	%xmm5, %xmm5
+        	movhlps	%xmm5,%xmm5
+
+// CHECK: 	movhps	3735928559(%ebx,%ecx,8), %xmm5
+        	movhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movhps	69, %xmm5
+        	movhps	0x45,%xmm5
+
+// CHECK: 	movhps	32493, %xmm5
+        	movhps	0x7eed,%xmm5
+
+// CHECK: 	movhps	3133065982, %xmm5
+        	movhps	0xbabecafe,%xmm5
+
+// CHECK: 	movhps	305419896, %xmm5
+        	movhps	0x12345678,%xmm5
+
+// CHECK: 	movhps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movhps	%xmm5, 69
+        	movhps	%xmm5,0x45
+
+// CHECK: 	movhps	%xmm5, 32493
+        	movhps	%xmm5,0x7eed
+
+// CHECK: 	movhps	%xmm5, 3133065982
+        	movhps	%xmm5,0xbabecafe
+
+// CHECK: 	movhps	%xmm5, 305419896
+        	movhps	%xmm5,0x12345678
+
+// CHECK: 	movlhps	%xmm5, %xmm5
+        	movlhps	%xmm5,%xmm5
+
+// CHECK: 	movlps	3735928559(%ebx,%ecx,8), %xmm5
+        	movlps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movlps	69, %xmm5
+        	movlps	0x45,%xmm5
+
+// CHECK: 	movlps	32493, %xmm5
+        	movlps	0x7eed,%xmm5
+
+// CHECK: 	movlps	3133065982, %xmm5
+        	movlps	0xbabecafe,%xmm5
+
+// CHECK: 	movlps	305419896, %xmm5
+        	movlps	0x12345678,%xmm5
+
+// CHECK: 	movlps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlps	%xmm5, 69
+        	movlps	%xmm5,0x45
+
+// CHECK: 	movlps	%xmm5, 32493
+        	movlps	%xmm5,0x7eed
+
+// CHECK: 	movlps	%xmm5, 3133065982
+        	movlps	%xmm5,0xbabecafe
+
+// CHECK: 	movlps	%xmm5, 305419896
+        	movlps	%xmm5,0x12345678
+
+// CHECK: 	movmskps	%xmm5, %ecx
+        	movmskps	%xmm5,%ecx
+
+// CHECK: 	movntps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntps	%xmm5, 69
+        	movntps	%xmm5,0x45
+
+// CHECK: 	movntps	%xmm5, 32493
+        	movntps	%xmm5,0x7eed
+
+// CHECK: 	movntps	%xmm5, 3133065982
+        	movntps	%xmm5,0xbabecafe
+
+// CHECK: 	movntps	%xmm5, 305419896
+        	movntps	%xmm5,0x12345678
+
+// CHECK: 	movntq	%mm3, 3735928559(%ebx,%ecx,8)
+        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntq	%mm3, 69
+        	movntq	%mm3,0x45
+
+// CHECK: 	movntq	%mm3, 32493
+        	movntq	%mm3,0x7eed
+
+// CHECK: 	movntq	%mm3, 3133065982
+        	movntq	%mm3,0xbabecafe
+
+// CHECK: 	movntq	%mm3, 305419896
+        	movntq	%mm3,0x12345678
+
+// CHECK: 	movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntdq	%xmm5, 69
+        	movntdq	%xmm5,0x45
+
+// CHECK: 	movntdq	%xmm5, 32493
+        	movntdq	%xmm5,0x7eed
+
+// CHECK: 	movntdq	%xmm5, 3133065982
+        	movntdq	%xmm5,0xbabecafe
+
+// CHECK: 	movntdq	%xmm5, 305419896
+        	movntdq	%xmm5,0x12345678
+
+// CHECK: 	movss	3735928559(%ebx,%ecx,8), %xmm5
+        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movss	69, %xmm5
+        	movss	0x45,%xmm5
+
+// CHECK: 	movss	32493, %xmm5
+        	movss	0x7eed,%xmm5
+
+// CHECK: 	movss	3133065982, %xmm5
+        	movss	0xbabecafe,%xmm5
+
+// CHECK: 	movss	305419896, %xmm5
+        	movss	0x12345678,%xmm5
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movss	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movss	%xmm5, 69
+        	movss	%xmm5,0x45
+
+// CHECK: 	movss	%xmm5, 32493
+        	movss	%xmm5,0x7eed
+
+// CHECK: 	movss	%xmm5, 3133065982
+        	movss	%xmm5,0xbabecafe
+
+// CHECK: 	movss	%xmm5, 305419896
+        	movss	%xmm5,0x12345678
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movups	3735928559(%ebx,%ecx,8), %xmm5
+        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movups	69, %xmm5
+        	movups	0x45,%xmm5
+
+// CHECK: 	movups	32493, %xmm5
+        	movups	0x7eed,%xmm5
+
+// CHECK: 	movups	3133065982, %xmm5
+        	movups	0xbabecafe,%xmm5
+
+// CHECK: 	movups	305419896, %xmm5
+        	movups	0x12345678,%xmm5
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	movups	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movups	%xmm5, 69
+        	movups	%xmm5,0x45
+
+// CHECK: 	movups	%xmm5, 32493
+        	movups	%xmm5,0x7eed
+
+// CHECK: 	movups	%xmm5, 3133065982
+        	movups	%xmm5,0xbabecafe
+
+// CHECK: 	movups	%xmm5, 305419896
+        	movups	%xmm5,0x12345678
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	mulps	3735928559(%ebx,%ecx,8), %xmm5
+        	mulps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulps	69, %xmm5
+        	mulps	0x45,%xmm5
+
+// CHECK: 	mulps	32493, %xmm5
+        	mulps	0x7eed,%xmm5
+
+// CHECK: 	mulps	3133065982, %xmm5
+        	mulps	0xbabecafe,%xmm5
+
+// CHECK: 	mulps	305419896, %xmm5
+        	mulps	0x12345678,%xmm5
+
+// CHECK: 	mulps	%xmm5, %xmm5
+        	mulps	%xmm5,%xmm5
+
+// CHECK: 	mulss	3735928559(%ebx,%ecx,8), %xmm5
+        	mulss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulss	69, %xmm5
+        	mulss	0x45,%xmm5
+
+// CHECK: 	mulss	32493, %xmm5
+        	mulss	0x7eed,%xmm5
+
+// CHECK: 	mulss	3133065982, %xmm5
+        	mulss	0xbabecafe,%xmm5
+
+// CHECK: 	mulss	305419896, %xmm5
+        	mulss	0x12345678,%xmm5
+
+// CHECK: 	mulss	%xmm5, %xmm5
+        	mulss	%xmm5,%xmm5
+
+// CHECK: 	orps	3735928559(%ebx,%ecx,8), %xmm5
+        	orps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	orps	69, %xmm5
+        	orps	0x45,%xmm5
+
+// CHECK: 	orps	32493, %xmm5
+        	orps	0x7eed,%xmm5
+
+// CHECK: 	orps	3133065982, %xmm5
+        	orps	0xbabecafe,%xmm5
+
+// CHECK: 	orps	305419896, %xmm5
+        	orps	0x12345678,%xmm5
+
+// CHECK: 	orps	%xmm5, %xmm5
+        	orps	%xmm5,%xmm5
+
+// CHECK: 	pavgb	3735928559(%ebx,%ecx,8), %mm3
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pavgb	69, %mm3
+        	pavgb	0x45,%mm3
+
+// CHECK: 	pavgb	32493, %mm3
+        	pavgb	0x7eed,%mm3
+
+// CHECK: 	pavgb	3133065982, %mm3
+        	pavgb	0xbabecafe,%mm3
+
+// CHECK: 	pavgb	305419896, %mm3
+        	pavgb	0x12345678,%mm3
+
+// CHECK: 	pavgb	%mm3, %mm3
+        	pavgb	%mm3,%mm3
+
+// CHECK: 	pavgb	3735928559(%ebx,%ecx,8), %xmm5
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pavgb	69, %xmm5
+        	pavgb	0x45,%xmm5
+
+// CHECK: 	pavgb	32493, %xmm5
+        	pavgb	0x7eed,%xmm5
+
+// CHECK: 	pavgb	3133065982, %xmm5
+        	pavgb	0xbabecafe,%xmm5
+
+// CHECK: 	pavgb	305419896, %xmm5
+        	pavgb	0x12345678,%xmm5
+
+// CHECK: 	pavgb	%xmm5, %xmm5
+        	pavgb	%xmm5,%xmm5
+
+// CHECK: 	pavgw	3735928559(%ebx,%ecx,8), %mm3
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pavgw	69, %mm3
+        	pavgw	0x45,%mm3
+
+// CHECK: 	pavgw	32493, %mm3
+        	pavgw	0x7eed,%mm3
+
+// CHECK: 	pavgw	3133065982, %mm3
+        	pavgw	0xbabecafe,%mm3
+
+// CHECK: 	pavgw	305419896, %mm3
+        	pavgw	0x12345678,%mm3
+
+// CHECK: 	pavgw	%mm3, %mm3
+        	pavgw	%mm3,%mm3
+
+// CHECK: 	pavgw	3735928559(%ebx,%ecx,8), %xmm5
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pavgw	69, %xmm5
+        	pavgw	0x45,%xmm5
+
+// CHECK: 	pavgw	32493, %xmm5
+        	pavgw	0x7eed,%xmm5
+
+// CHECK: 	pavgw	3133065982, %xmm5
+        	pavgw	0xbabecafe,%xmm5
+
+// CHECK: 	pavgw	305419896, %xmm5
+        	pavgw	0x12345678,%xmm5
+
+// CHECK: 	pavgw	%xmm5, %xmm5
+        	pavgw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsw	3735928559(%ebx,%ecx,8), %mm3
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaxsw	69, %mm3
+        	pmaxsw	0x45,%mm3
+
+// CHECK: 	pmaxsw	32493, %mm3
+        	pmaxsw	0x7eed,%mm3
+
+// CHECK: 	pmaxsw	3133065982, %mm3
+        	pmaxsw	0xbabecafe,%mm3
+
+// CHECK: 	pmaxsw	305419896, %mm3
+        	pmaxsw	0x12345678,%mm3
+
+// CHECK: 	pmaxsw	%mm3, %mm3
+        	pmaxsw	%mm3,%mm3
+
+// CHECK: 	pmaxsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxsw	69, %xmm5
+        	pmaxsw	0x45,%xmm5
+
+// CHECK: 	pmaxsw	32493, %xmm5
+        	pmaxsw	0x7eed,%xmm5
+
+// CHECK: 	pmaxsw	3133065982, %xmm5
+        	pmaxsw	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxsw	305419896, %xmm5
+        	pmaxsw	0x12345678,%xmm5
+
+// CHECK: 	pmaxsw	%xmm5, %xmm5
+        	pmaxsw	%xmm5,%xmm5
+
+// CHECK: 	pmaxub	3735928559(%ebx,%ecx,8), %mm3
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaxub	69, %mm3
+        	pmaxub	0x45,%mm3
+
+// CHECK: 	pmaxub	32493, %mm3
+        	pmaxub	0x7eed,%mm3
+
+// CHECK: 	pmaxub	3133065982, %mm3
+        	pmaxub	0xbabecafe,%mm3
+
+// CHECK: 	pmaxub	305419896, %mm3
+        	pmaxub	0x12345678,%mm3
+
+// CHECK: 	pmaxub	%mm3, %mm3
+        	pmaxub	%mm3,%mm3
+
+// CHECK: 	pmaxub	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxub	69, %xmm5
+        	pmaxub	0x45,%xmm5
+
+// CHECK: 	pmaxub	32493, %xmm5
+        	pmaxub	0x7eed,%xmm5
+
+// CHECK: 	pmaxub	3133065982, %xmm5
+        	pmaxub	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxub	305419896, %xmm5
+        	pmaxub	0x12345678,%xmm5
+
+// CHECK: 	pmaxub	%xmm5, %xmm5
+        	pmaxub	%xmm5,%xmm5
+
+// CHECK: 	pminsw	3735928559(%ebx,%ecx,8), %mm3
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pminsw	69, %mm3
+        	pminsw	0x45,%mm3
+
+// CHECK: 	pminsw	32493, %mm3
+        	pminsw	0x7eed,%mm3
+
+// CHECK: 	pminsw	3133065982, %mm3
+        	pminsw	0xbabecafe,%mm3
+
+// CHECK: 	pminsw	305419896, %mm3
+        	pminsw	0x12345678,%mm3
+
+// CHECK: 	pminsw	%mm3, %mm3
+        	pminsw	%mm3,%mm3
+
+// CHECK: 	pminsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminsw	69, %xmm5
+        	pminsw	0x45,%xmm5
+
+// CHECK: 	pminsw	32493, %xmm5
+        	pminsw	0x7eed,%xmm5
+
+// CHECK: 	pminsw	3133065982, %xmm5
+        	pminsw	0xbabecafe,%xmm5
+
+// CHECK: 	pminsw	305419896, %xmm5
+        	pminsw	0x12345678,%xmm5
+
+// CHECK: 	pminsw	%xmm5, %xmm5
+        	pminsw	%xmm5,%xmm5
+
+// CHECK: 	pminub	3735928559(%ebx,%ecx,8), %mm3
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pminub	69, %mm3
+        	pminub	0x45,%mm3
+
+// CHECK: 	pminub	32493, %mm3
+        	pminub	0x7eed,%mm3
+
+// CHECK: 	pminub	3133065982, %mm3
+        	pminub	0xbabecafe,%mm3
+
+// CHECK: 	pminub	305419896, %mm3
+        	pminub	0x12345678,%mm3
+
+// CHECK: 	pminub	%mm3, %mm3
+        	pminub	%mm3,%mm3
+
+// CHECK: 	pminub	3735928559(%ebx,%ecx,8), %xmm5
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminub	69, %xmm5
+        	pminub	0x45,%xmm5
+
+// CHECK: 	pminub	32493, %xmm5
+        	pminub	0x7eed,%xmm5
+
+// CHECK: 	pminub	3133065982, %xmm5
+        	pminub	0xbabecafe,%xmm5
+
+// CHECK: 	pminub	305419896, %xmm5
+        	pminub	0x12345678,%xmm5
+
+// CHECK: 	pminub	%xmm5, %xmm5
+        	pminub	%xmm5,%xmm5
+
+// CHECK: 	pmovmskb	%mm3, %ecx
+        	pmovmskb	%mm3,%ecx
+
+// CHECK: 	pmovmskb	%xmm5, %ecx
+        	pmovmskb	%xmm5,%ecx
+
+// CHECK: 	pmulhuw	3735928559(%ebx,%ecx,8), %mm3
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmulhuw	69, %mm3
+        	pmulhuw	0x45,%mm3
+
+// CHECK: 	pmulhuw	32493, %mm3
+        	pmulhuw	0x7eed,%mm3
+
+// CHECK: 	pmulhuw	3133065982, %mm3
+        	pmulhuw	0xbabecafe,%mm3
+
+// CHECK: 	pmulhuw	305419896, %mm3
+        	pmulhuw	0x12345678,%mm3
+
+// CHECK: 	pmulhuw	%mm3, %mm3
+        	pmulhuw	%mm3,%mm3
+
+// CHECK: 	pmulhuw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulhuw	69, %xmm5
+        	pmulhuw	0x45,%xmm5
+
+// CHECK: 	pmulhuw	32493, %xmm5
+        	pmulhuw	0x7eed,%xmm5
+
+// CHECK: 	pmulhuw	3133065982, %xmm5
+        	pmulhuw	0xbabecafe,%xmm5
+
+// CHECK: 	pmulhuw	305419896, %xmm5
+        	pmulhuw	0x12345678,%xmm5
+
+// CHECK: 	pmulhuw	%xmm5, %xmm5
+        	pmulhuw	%xmm5,%xmm5
+
+// CHECK: 	prefetchnta	3735928559(%ebx,%ecx,8)
+        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetchnta	32493
+        	prefetchnta	0x7eed
+
+// CHECK: 	prefetchnta	3133065982
+        	prefetchnta	0xbabecafe
+
+// CHECK: 	prefetchnta	305419896
+        	prefetchnta	0x12345678
+
+// CHECK: 	prefetcht0	3735928559(%ebx,%ecx,8)
+        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht0	32493
+        	prefetcht0	0x7eed
+
+// CHECK: 	prefetcht0	3133065982
+        	prefetcht0	0xbabecafe
+
+// CHECK: 	prefetcht0	305419896
+        	prefetcht0	0x12345678
+
+// CHECK: 	prefetcht1	3735928559(%ebx,%ecx,8)
+        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht1	32493
+        	prefetcht1	0x7eed
+
+// CHECK: 	prefetcht1	3133065982
+        	prefetcht1	0xbabecafe
+
+// CHECK: 	prefetcht1	305419896
+        	prefetcht1	0x12345678
+
+// CHECK: 	prefetcht2	3735928559(%ebx,%ecx,8)
+        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht2	32493
+        	prefetcht2	0x7eed
+
+// CHECK: 	prefetcht2	3133065982
+        	prefetcht2	0xbabecafe
+
+// CHECK: 	prefetcht2	305419896
+        	prefetcht2	0x12345678
+
+// CHECK: 	psadbw	3735928559(%ebx,%ecx,8), %mm3
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psadbw	69, %mm3
+        	psadbw	0x45,%mm3
+
+// CHECK: 	psadbw	32493, %mm3
+        	psadbw	0x7eed,%mm3
+
+// CHECK: 	psadbw	3133065982, %mm3
+        	psadbw	0xbabecafe,%mm3
+
+// CHECK: 	psadbw	305419896, %mm3
+        	psadbw	0x12345678,%mm3
+
+// CHECK: 	psadbw	%mm3, %mm3
+        	psadbw	%mm3,%mm3
+
+// CHECK: 	psadbw	3735928559(%ebx,%ecx,8), %xmm5
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psadbw	69, %xmm5
+        	psadbw	0x45,%xmm5
+
+// CHECK: 	psadbw	32493, %xmm5
+        	psadbw	0x7eed,%xmm5
+
+// CHECK: 	psadbw	3133065982, %xmm5
+        	psadbw	0xbabecafe,%xmm5
+
+// CHECK: 	psadbw	305419896, %xmm5
+        	psadbw	0x12345678,%xmm5
+
+// CHECK: 	psadbw	%xmm5, %xmm5
+        	psadbw	%xmm5,%xmm5
+
+// CHECK: 	rcpps	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpps	69, %xmm5
+        	rcpps	0x45,%xmm5
+
+// CHECK: 	rcpps	32493, %xmm5
+        	rcpps	0x7eed,%xmm5
+
+// CHECK: 	rcpps	3133065982, %xmm5
+        	rcpps	0xbabecafe,%xmm5
+
+// CHECK: 	rcpps	305419896, %xmm5
+        	rcpps	0x12345678,%xmm5
+
+// CHECK: 	rcpps	%xmm5, %xmm5
+        	rcpps	%xmm5,%xmm5
+
+// CHECK: 	rcpss	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpss	69, %xmm5
+        	rcpss	0x45,%xmm5
+
+// CHECK: 	rcpss	32493, %xmm5
+        	rcpss	0x7eed,%xmm5
+
+// CHECK: 	rcpss	3133065982, %xmm5
+        	rcpss	0xbabecafe,%xmm5
+
+// CHECK: 	rcpss	305419896, %xmm5
+        	rcpss	0x12345678,%xmm5
+
+// CHECK: 	rcpss	%xmm5, %xmm5
+        	rcpss	%xmm5,%xmm5
+
+// CHECK: 	rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtps	69, %xmm5
+        	rsqrtps	0x45,%xmm5
+
+// CHECK: 	rsqrtps	32493, %xmm5
+        	rsqrtps	0x7eed,%xmm5
+
+// CHECK: 	rsqrtps	3133065982, %xmm5
+        	rsqrtps	0xbabecafe,%xmm5
+
+// CHECK: 	rsqrtps	305419896, %xmm5
+        	rsqrtps	0x12345678,%xmm5
+
+// CHECK: 	rsqrtps	%xmm5, %xmm5
+        	rsqrtps	%xmm5,%xmm5
+
+// CHECK: 	rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtss	69, %xmm5
+        	rsqrtss	0x45,%xmm5
+
+// CHECK: 	rsqrtss	32493, %xmm5
+        	rsqrtss	0x7eed,%xmm5
+
+// CHECK: 	rsqrtss	3133065982, %xmm5
+        	rsqrtss	0xbabecafe,%xmm5
+
+// CHECK: 	rsqrtss	305419896, %xmm5
+        	rsqrtss	0x12345678,%xmm5
+
+// CHECK: 	rsqrtss	%xmm5, %xmm5
+        	rsqrtss	%xmm5,%xmm5
+
+// CHECK: 	sfence
+        	sfence
+
+// CHECK: 	sqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtps	69, %xmm5
+        	sqrtps	0x45,%xmm5
+
+// CHECK: 	sqrtps	32493, %xmm5
+        	sqrtps	0x7eed,%xmm5
+
+// CHECK: 	sqrtps	3133065982, %xmm5
+        	sqrtps	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtps	305419896, %xmm5
+        	sqrtps	0x12345678,%xmm5
+
+// CHECK: 	sqrtps	%xmm5, %xmm5
+        	sqrtps	%xmm5,%xmm5
+
+// CHECK: 	sqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtss	69, %xmm5
+        	sqrtss	0x45,%xmm5
+
+// CHECK: 	sqrtss	32493, %xmm5
+        	sqrtss	0x7eed,%xmm5
+
+// CHECK: 	sqrtss	3133065982, %xmm5
+        	sqrtss	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtss	305419896, %xmm5
+        	sqrtss	0x12345678,%xmm5
+
+// CHECK: 	sqrtss	%xmm5, %xmm5
+        	sqrtss	%xmm5,%xmm5
+
+// CHECK: 	stmxcsr	3735928559(%ebx,%ecx,8)
+        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	stmxcsr	32493
+        	stmxcsr	0x7eed
+
+// CHECK: 	stmxcsr	3133065982
+        	stmxcsr	0xbabecafe
+
+// CHECK: 	stmxcsr	305419896
+        	stmxcsr	0x12345678
+
+// CHECK: 	subps	3735928559(%ebx,%ecx,8), %xmm5
+        	subps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subps	69, %xmm5
+        	subps	0x45,%xmm5
+
+// CHECK: 	subps	32493, %xmm5
+        	subps	0x7eed,%xmm5
+
+// CHECK: 	subps	3133065982, %xmm5
+        	subps	0xbabecafe,%xmm5
+
+// CHECK: 	subps	305419896, %xmm5
+        	subps	0x12345678,%xmm5
+
+// CHECK: 	subps	%xmm5, %xmm5
+        	subps	%xmm5,%xmm5
+
+// CHECK: 	subss	3735928559(%ebx,%ecx,8), %xmm5
+        	subss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subss	69, %xmm5
+        	subss	0x45,%xmm5
+
+// CHECK: 	subss	32493, %xmm5
+        	subss	0x7eed,%xmm5
+
+// CHECK: 	subss	3133065982, %xmm5
+        	subss	0xbabecafe,%xmm5
+
+// CHECK: 	subss	305419896, %xmm5
+        	subss	0x12345678,%xmm5
+
+// CHECK: 	subss	%xmm5, %xmm5
+        	subss	%xmm5,%xmm5
+
+// CHECK: 	ucomiss	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomiss	69, %xmm5
+        	ucomiss	0x45,%xmm5
+
+// CHECK: 	ucomiss	32493, %xmm5
+        	ucomiss	0x7eed,%xmm5
+
+// CHECK: 	ucomiss	3133065982, %xmm5
+        	ucomiss	0xbabecafe,%xmm5
+
+// CHECK: 	ucomiss	305419896, %xmm5
+        	ucomiss	0x12345678,%xmm5
+
+// CHECK: 	ucomiss	%xmm5, %xmm5
+        	ucomiss	%xmm5,%xmm5
+
+// CHECK: 	unpckhps	3735928559(%ebx,%ecx,8), %xmm5
+        	unpckhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpckhps	69, %xmm5
+        	unpckhps	0x45,%xmm5
+
+// CHECK: 	unpckhps	32493, %xmm5
+        	unpckhps	0x7eed,%xmm5
+
+// CHECK: 	unpckhps	3133065982, %xmm5
+        	unpckhps	0xbabecafe,%xmm5
+
+// CHECK: 	unpckhps	305419896, %xmm5
+        	unpckhps	0x12345678,%xmm5
+
+// CHECK: 	unpckhps	%xmm5, %xmm5
+        	unpckhps	%xmm5,%xmm5
+
+// CHECK: 	unpcklps	3735928559(%ebx,%ecx,8), %xmm5
+        	unpcklps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpcklps	69, %xmm5
+        	unpcklps	0x45,%xmm5
+
+// CHECK: 	unpcklps	32493, %xmm5
+        	unpcklps	0x7eed,%xmm5
+
+// CHECK: 	unpcklps	3133065982, %xmm5
+        	unpcklps	0xbabecafe,%xmm5
+
+// CHECK: 	unpcklps	305419896, %xmm5
+        	unpcklps	0x12345678,%xmm5
+
+// CHECK: 	unpcklps	%xmm5, %xmm5
+        	unpcklps	%xmm5,%xmm5
+
+// CHECK: 	xorps	3735928559(%ebx,%ecx,8), %xmm5
+        	xorps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	xorps	69, %xmm5
+        	xorps	0x45,%xmm5
+
+// CHECK: 	xorps	32493, %xmm5
+        	xorps	0x7eed,%xmm5
+
+// CHECK: 	xorps	3133065982, %xmm5
+        	xorps	0xbabecafe,%xmm5
+
+// CHECK: 	xorps	305419896, %xmm5
+        	xorps	0x12345678,%xmm5
+
+// CHECK: 	xorps	%xmm5, %xmm5
+        	xorps	%xmm5,%xmm5
+
+// CHECK: 	addpd	3735928559(%ebx,%ecx,8), %xmm5
+        	addpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addpd	69, %xmm5
+        	addpd	0x45,%xmm5
+
+// CHECK: 	addpd	32493, %xmm5
+        	addpd	0x7eed,%xmm5
+
+// CHECK: 	addpd	3133065982, %xmm5
+        	addpd	0xbabecafe,%xmm5
+
+// CHECK: 	addpd	305419896, %xmm5
+        	addpd	0x12345678,%xmm5
+
+// CHECK: 	addpd	%xmm5, %xmm5
+        	addpd	%xmm5,%xmm5
+
+// CHECK: 	addsd	3735928559(%ebx,%ecx,8), %xmm5
+        	addsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addsd	69, %xmm5
+        	addsd	0x45,%xmm5
+
+// CHECK: 	addsd	32493, %xmm5
+        	addsd	0x7eed,%xmm5
+
+// CHECK: 	addsd	3133065982, %xmm5
+        	addsd	0xbabecafe,%xmm5
+
+// CHECK: 	addsd	305419896, %xmm5
+        	addsd	0x12345678,%xmm5
+
+// CHECK: 	addsd	%xmm5, %xmm5
+        	addsd	%xmm5,%xmm5
+
+// CHECK: 	andnpd	3735928559(%ebx,%ecx,8), %xmm5
+        	andnpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andnpd	69, %xmm5
+        	andnpd	0x45,%xmm5
+
+// CHECK: 	andnpd	32493, %xmm5
+        	andnpd	0x7eed,%xmm5
+
+// CHECK: 	andnpd	3133065982, %xmm5
+        	andnpd	0xbabecafe,%xmm5
+
+// CHECK: 	andnpd	305419896, %xmm5
+        	andnpd	0x12345678,%xmm5
+
+// CHECK: 	andnpd	%xmm5, %xmm5
+        	andnpd	%xmm5,%xmm5
+
+// CHECK: 	andpd	3735928559(%ebx,%ecx,8), %xmm5
+        	andpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andpd	69, %xmm5
+        	andpd	0x45,%xmm5
+
+// CHECK: 	andpd	32493, %xmm5
+        	andpd	0x7eed,%xmm5
+
+// CHECK: 	andpd	3133065982, %xmm5
+        	andpd	0xbabecafe,%xmm5
+
+// CHECK: 	andpd	305419896, %xmm5
+        	andpd	0x12345678,%xmm5
+
+// CHECK: 	andpd	%xmm5, %xmm5
+        	andpd	%xmm5,%xmm5
+
+// CHECK: 	comisd	3735928559(%ebx,%ecx,8), %xmm5
+        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	comisd	69, %xmm5
+        	comisd	0x45,%xmm5
+
+// CHECK: 	comisd	32493, %xmm5
+        	comisd	0x7eed,%xmm5
+
+// CHECK: 	comisd	3133065982, %xmm5
+        	comisd	0xbabecafe,%xmm5
+
+// CHECK: 	comisd	305419896, %xmm5
+        	comisd	0x12345678,%xmm5
+
+// CHECK: 	comisd	%xmm5, %xmm5
+        	comisd	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2pd	69, %xmm5
+        	cvtpi2pd	0x45,%xmm5
+
+// CHECK: 	cvtpi2pd	32493, %xmm5
+        	cvtpi2pd	0x7eed,%xmm5
+
+// CHECK: 	cvtpi2pd	3133065982, %xmm5
+        	cvtpi2pd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpi2pd	305419896, %xmm5
+        	cvtpi2pd	0x12345678,%xmm5
+
+// CHECK: 	cvtpi2pd	%mm3, %xmm5
+        	cvtpi2pd	%mm3,%xmm5
+
+// CHECK: 	cvtsi2sd	%ecx, %xmm5
+        	cvtsi2sd	%ecx,%xmm5
+
+// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsi2sd	69, %xmm5
+        	cvtsi2sd	0x45,%xmm5
+
+// CHECK: 	cvtsi2sd	32493, %xmm5
+        	cvtsi2sd	0x7eed,%xmm5
+
+// CHECK: 	cvtsi2sd	3133065982, %xmm5
+        	cvtsi2sd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtsi2sd	305419896, %xmm5
+        	cvtsi2sd	0x12345678,%xmm5
+
+// CHECK: 	divpd	3735928559(%ebx,%ecx,8), %xmm5
+        	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divpd	69, %xmm5
+        	divpd	0x45,%xmm5
+
+// CHECK: 	divpd	32493, %xmm5
+        	divpd	0x7eed,%xmm5
+
+// CHECK: 	divpd	3133065982, %xmm5
+        	divpd	0xbabecafe,%xmm5
+
+// CHECK: 	divpd	305419896, %xmm5
+        	divpd	0x12345678,%xmm5
+
+// CHECK: 	divpd	%xmm5, %xmm5
+        	divpd	%xmm5,%xmm5
+
+// CHECK: 	divsd	3735928559(%ebx,%ecx,8), %xmm5
+        	divsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divsd	69, %xmm5
+        	divsd	0x45,%xmm5
+
+// CHECK: 	divsd	32493, %xmm5
+        	divsd	0x7eed,%xmm5
+
+// CHECK: 	divsd	3133065982, %xmm5
+        	divsd	0xbabecafe,%xmm5
+
+// CHECK: 	divsd	305419896, %xmm5
+        	divsd	0x12345678,%xmm5
+
+// CHECK: 	divsd	%xmm5, %xmm5
+        	divsd	%xmm5,%xmm5
+
+// CHECK: 	maxpd	3735928559(%ebx,%ecx,8), %xmm5
+        	maxpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxpd	69, %xmm5
+        	maxpd	0x45,%xmm5
+
+// CHECK: 	maxpd	32493, %xmm5
+        	maxpd	0x7eed,%xmm5
+
+// CHECK: 	maxpd	3133065982, %xmm5
+        	maxpd	0xbabecafe,%xmm5
+
+// CHECK: 	maxpd	305419896, %xmm5
+        	maxpd	0x12345678,%xmm5
+
+// CHECK: 	maxpd	%xmm5, %xmm5
+        	maxpd	%xmm5,%xmm5
+
+// CHECK: 	maxsd	3735928559(%ebx,%ecx,8), %xmm5
+        	maxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxsd	69, %xmm5
+        	maxsd	0x45,%xmm5
+
+// CHECK: 	maxsd	32493, %xmm5
+        	maxsd	0x7eed,%xmm5
+
+// CHECK: 	maxsd	3133065982, %xmm5
+        	maxsd	0xbabecafe,%xmm5
+
+// CHECK: 	maxsd	305419896, %xmm5
+        	maxsd	0x12345678,%xmm5
+
+// CHECK: 	maxsd	%xmm5, %xmm5
+        	maxsd	%xmm5,%xmm5
+
+// CHECK: 	minpd	3735928559(%ebx,%ecx,8), %xmm5
+        	minpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minpd	69, %xmm5
+        	minpd	0x45,%xmm5
+
+// CHECK: 	minpd	32493, %xmm5
+        	minpd	0x7eed,%xmm5
+
+// CHECK: 	minpd	3133065982, %xmm5
+        	minpd	0xbabecafe,%xmm5
+
+// CHECK: 	minpd	305419896, %xmm5
+        	minpd	0x12345678,%xmm5
+
+// CHECK: 	minpd	%xmm5, %xmm5
+        	minpd	%xmm5,%xmm5
+
+// CHECK: 	minsd	3735928559(%ebx,%ecx,8), %xmm5
+        	minsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minsd	69, %xmm5
+        	minsd	0x45,%xmm5
+
+// CHECK: 	minsd	32493, %xmm5
+        	minsd	0x7eed,%xmm5
+
+// CHECK: 	minsd	3133065982, %xmm5
+        	minsd	0xbabecafe,%xmm5
+
+// CHECK: 	minsd	305419896, %xmm5
+        	minsd	0x12345678,%xmm5
+
+// CHECK: 	minsd	%xmm5, %xmm5
+        	minsd	%xmm5,%xmm5
+
+// CHECK: 	movapd	3735928559(%ebx,%ecx,8), %xmm5
+        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movapd	69, %xmm5
+        	movapd	0x45,%xmm5
+
+// CHECK: 	movapd	32493, %xmm5
+        	movapd	0x7eed,%xmm5
+
+// CHECK: 	movapd	3133065982, %xmm5
+        	movapd	0xbabecafe,%xmm5
+
+// CHECK: 	movapd	305419896, %xmm5
+        	movapd	0x12345678,%xmm5
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movapd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movapd	%xmm5, 69
+        	movapd	%xmm5,0x45
+
+// CHECK: 	movapd	%xmm5, 32493
+        	movapd	%xmm5,0x7eed
+
+// CHECK: 	movapd	%xmm5, 3133065982
+        	movapd	%xmm5,0xbabecafe
+
+// CHECK: 	movapd	%xmm5, 305419896
+        	movapd	%xmm5,0x12345678
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movhpd	3735928559(%ebx,%ecx,8), %xmm5
+        	movhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movhpd	69, %xmm5
+        	movhpd	0x45,%xmm5
+
+// CHECK: 	movhpd	32493, %xmm5
+        	movhpd	0x7eed,%xmm5
+
+// CHECK: 	movhpd	3133065982, %xmm5
+        	movhpd	0xbabecafe,%xmm5
+
+// CHECK: 	movhpd	305419896, %xmm5
+        	movhpd	0x12345678,%xmm5
+
+// CHECK: 	movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movhpd	%xmm5, 69
+        	movhpd	%xmm5,0x45
+
+// CHECK: 	movhpd	%xmm5, 32493
+        	movhpd	%xmm5,0x7eed
+
+// CHECK: 	movhpd	%xmm5, 3133065982
+        	movhpd	%xmm5,0xbabecafe
+
+// CHECK: 	movhpd	%xmm5, 305419896
+        	movhpd	%xmm5,0x12345678
+
+// CHECK: 	movlpd	3735928559(%ebx,%ecx,8), %xmm5
+        	movlpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movlpd	69, %xmm5
+        	movlpd	0x45,%xmm5
+
+// CHECK: 	movlpd	32493, %xmm5
+        	movlpd	0x7eed,%xmm5
+
+// CHECK: 	movlpd	3133065982, %xmm5
+        	movlpd	0xbabecafe,%xmm5
+
+// CHECK: 	movlpd	305419896, %xmm5
+        	movlpd	0x12345678,%xmm5
+
+// CHECK: 	movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlpd	%xmm5, 69
+        	movlpd	%xmm5,0x45
+
+// CHECK: 	movlpd	%xmm5, 32493
+        	movlpd	%xmm5,0x7eed
+
+// CHECK: 	movlpd	%xmm5, 3133065982
+        	movlpd	%xmm5,0xbabecafe
+
+// CHECK: 	movlpd	%xmm5, 305419896
+        	movlpd	%xmm5,0x12345678
+
+// CHECK: 	movmskpd	%xmm5, %ecx
+        	movmskpd	%xmm5,%ecx
+
+// CHECK: 	movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntpd	%xmm5, 69
+        	movntpd	%xmm5,0x45
+
+// CHECK: 	movntpd	%xmm5, 32493
+        	movntpd	%xmm5,0x7eed
+
+// CHECK: 	movntpd	%xmm5, 3133065982
+        	movntpd	%xmm5,0xbabecafe
+
+// CHECK: 	movntpd	%xmm5, 305419896
+        	movntpd	%xmm5,0x12345678
+
+// CHECK: 	movsd	3735928559(%ebx,%ecx,8), %xmm5
+        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsd	69, %xmm5
+        	movsd	0x45,%xmm5
+
+// CHECK: 	movsd	32493, %xmm5
+        	movsd	0x7eed,%xmm5
+
+// CHECK: 	movsd	3133065982, %xmm5
+        	movsd	0xbabecafe,%xmm5
+
+// CHECK: 	movsd	305419896, %xmm5
+        	movsd	0x12345678,%xmm5
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movsd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsd	%xmm5, 69
+        	movsd	%xmm5,0x45
+
+// CHECK: 	movsd	%xmm5, 32493
+        	movsd	%xmm5,0x7eed
+
+// CHECK: 	movsd	%xmm5, 3133065982
+        	movsd	%xmm5,0xbabecafe
+
+// CHECK: 	movsd	%xmm5, 305419896
+        	movsd	%xmm5,0x12345678
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movupd	3735928559(%ebx,%ecx,8), %xmm5
+        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movupd	69, %xmm5
+        	movupd	0x45,%xmm5
+
+// CHECK: 	movupd	32493, %xmm5
+        	movupd	0x7eed,%xmm5
+
+// CHECK: 	movupd	3133065982, %xmm5
+        	movupd	0xbabecafe,%xmm5
+
+// CHECK: 	movupd	305419896, %xmm5
+        	movupd	0x12345678,%xmm5
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	movupd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movupd	%xmm5, 69
+        	movupd	%xmm5,0x45
+
+// CHECK: 	movupd	%xmm5, 32493
+        	movupd	%xmm5,0x7eed
+
+// CHECK: 	movupd	%xmm5, 3133065982
+        	movupd	%xmm5,0xbabecafe
+
+// CHECK: 	movupd	%xmm5, 305419896
+        	movupd	%xmm5,0x12345678
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	mulpd	3735928559(%ebx,%ecx,8), %xmm5
+        	mulpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulpd	69, %xmm5
+        	mulpd	0x45,%xmm5
+
+// CHECK: 	mulpd	32493, %xmm5
+        	mulpd	0x7eed,%xmm5
+
+// CHECK: 	mulpd	3133065982, %xmm5
+        	mulpd	0xbabecafe,%xmm5
+
+// CHECK: 	mulpd	305419896, %xmm5
+        	mulpd	0x12345678,%xmm5
+
+// CHECK: 	mulpd	%xmm5, %xmm5
+        	mulpd	%xmm5,%xmm5
+
+// CHECK: 	mulsd	3735928559(%ebx,%ecx,8), %xmm5
+        	mulsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulsd	69, %xmm5
+        	mulsd	0x45,%xmm5
+
+// CHECK: 	mulsd	32493, %xmm5
+        	mulsd	0x7eed,%xmm5
+
+// CHECK: 	mulsd	3133065982, %xmm5
+        	mulsd	0xbabecafe,%xmm5
+
+// CHECK: 	mulsd	305419896, %xmm5
+        	mulsd	0x12345678,%xmm5
+
+// CHECK: 	mulsd	%xmm5, %xmm5
+        	mulsd	%xmm5,%xmm5
+
+// CHECK: 	orpd	3735928559(%ebx,%ecx,8), %xmm5
+        	orpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	orpd	69, %xmm5
+        	orpd	0x45,%xmm5
+
+// CHECK: 	orpd	32493, %xmm5
+        	orpd	0x7eed,%xmm5
+
+// CHECK: 	orpd	3133065982, %xmm5
+        	orpd	0xbabecafe,%xmm5
+
+// CHECK: 	orpd	305419896, %xmm5
+        	orpd	0x12345678,%xmm5
+
+// CHECK: 	orpd	%xmm5, %xmm5
+        	orpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtpd	69, %xmm5
+        	sqrtpd	0x45,%xmm5
+
+// CHECK: 	sqrtpd	32493, %xmm5
+        	sqrtpd	0x7eed,%xmm5
+
+// CHECK: 	sqrtpd	3133065982, %xmm5
+        	sqrtpd	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtpd	305419896, %xmm5
+        	sqrtpd	0x12345678,%xmm5
+
+// CHECK: 	sqrtpd	%xmm5, %xmm5
+        	sqrtpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtsd	69, %xmm5
+        	sqrtsd	0x45,%xmm5
+
+// CHECK: 	sqrtsd	32493, %xmm5
+        	sqrtsd	0x7eed,%xmm5
+
+// CHECK: 	sqrtsd	3133065982, %xmm5
+        	sqrtsd	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtsd	305419896, %xmm5
+        	sqrtsd	0x12345678,%xmm5
+
+// CHECK: 	sqrtsd	%xmm5, %xmm5
+        	sqrtsd	%xmm5,%xmm5
+
+// CHECK: 	subpd	3735928559(%ebx,%ecx,8), %xmm5
+        	subpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subpd	69, %xmm5
+        	subpd	0x45,%xmm5
+
+// CHECK: 	subpd	32493, %xmm5
+        	subpd	0x7eed,%xmm5
+
+// CHECK: 	subpd	3133065982, %xmm5
+        	subpd	0xbabecafe,%xmm5
+
+// CHECK: 	subpd	305419896, %xmm5
+        	subpd	0x12345678,%xmm5
+
+// CHECK: 	subpd	%xmm5, %xmm5
+        	subpd	%xmm5,%xmm5
+
+// CHECK: 	subsd	3735928559(%ebx,%ecx,8), %xmm5
+        	subsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subsd	69, %xmm5
+        	subsd	0x45,%xmm5
+
+// CHECK: 	subsd	32493, %xmm5
+        	subsd	0x7eed,%xmm5
+
+// CHECK: 	subsd	3133065982, %xmm5
+        	subsd	0xbabecafe,%xmm5
+
+// CHECK: 	subsd	305419896, %xmm5
+        	subsd	0x12345678,%xmm5
+
+// CHECK: 	subsd	%xmm5, %xmm5
+        	subsd	%xmm5,%xmm5
+
+// CHECK: 	ucomisd	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomisd	69, %xmm5
+        	ucomisd	0x45,%xmm5
+
+// CHECK: 	ucomisd	32493, %xmm5
+        	ucomisd	0x7eed,%xmm5
+
+// CHECK: 	ucomisd	3133065982, %xmm5
+        	ucomisd	0xbabecafe,%xmm5
+
+// CHECK: 	ucomisd	305419896, %xmm5
+        	ucomisd	0x12345678,%xmm5
+
+// CHECK: 	ucomisd	%xmm5, %xmm5
+        	ucomisd	%xmm5,%xmm5
+
+// CHECK: 	unpckhpd	3735928559(%ebx,%ecx,8), %xmm5
+        	unpckhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpckhpd	69, %xmm5
+        	unpckhpd	0x45,%xmm5
+
+// CHECK: 	unpckhpd	32493, %xmm5
+        	unpckhpd	0x7eed,%xmm5
+
+// CHECK: 	unpckhpd	3133065982, %xmm5
+        	unpckhpd	0xbabecafe,%xmm5
+
+// CHECK: 	unpckhpd	305419896, %xmm5
+        	unpckhpd	0x12345678,%xmm5
+
+// CHECK: 	unpckhpd	%xmm5, %xmm5
+        	unpckhpd	%xmm5,%xmm5
+
+// CHECK: 	unpcklpd	3735928559(%ebx,%ecx,8), %xmm5
+        	unpcklpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpcklpd	69, %xmm5
+        	unpcklpd	0x45,%xmm5
+
+// CHECK: 	unpcklpd	32493, %xmm5
+        	unpcklpd	0x7eed,%xmm5
+
+// CHECK: 	unpcklpd	3133065982, %xmm5
+        	unpcklpd	0xbabecafe,%xmm5
+
+// CHECK: 	unpcklpd	305419896, %xmm5
+        	unpcklpd	0x12345678,%xmm5
+
+// CHECK: 	unpcklpd	%xmm5, %xmm5
+        	unpcklpd	%xmm5,%xmm5
+
+// CHECK: 	xorpd	3735928559(%ebx,%ecx,8), %xmm5
+        	xorpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	xorpd	69, %xmm5
+        	xorpd	0x45,%xmm5
+
+// CHECK: 	xorpd	32493, %xmm5
+        	xorpd	0x7eed,%xmm5
+
+// CHECK: 	xorpd	3133065982, %xmm5
+        	xorpd	0xbabecafe,%xmm5
+
+// CHECK: 	xorpd	305419896, %xmm5
+        	xorpd	0x12345678,%xmm5
+
+// CHECK: 	xorpd	%xmm5, %xmm5
+        	xorpd	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2pd	69, %xmm5
+        	cvtdq2pd	0x45,%xmm5
+
+// CHECK: 	cvtdq2pd	32493, %xmm5
+        	cvtdq2pd	0x7eed,%xmm5
+
+// CHECK: 	cvtdq2pd	3133065982, %xmm5
+        	cvtdq2pd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtdq2pd	305419896, %xmm5
+        	cvtdq2pd	0x12345678,%xmm5
+
+// CHECK: 	cvtdq2pd	%xmm5, %xmm5
+        	cvtdq2pd	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpd2dq	69, %xmm5
+        	cvtpd2dq	0x45,%xmm5
+
+// CHECK: 	cvtpd2dq	32493, %xmm5
+        	cvtpd2dq	0x7eed,%xmm5
+
+// CHECK: 	cvtpd2dq	3133065982, %xmm5
+        	cvtpd2dq	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpd2dq	305419896, %xmm5
+        	cvtpd2dq	0x12345678,%xmm5
+
+// CHECK: 	cvtpd2dq	%xmm5, %xmm5
+        	cvtpd2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2ps	69, %xmm5
+        	cvtdq2ps	0x45,%xmm5
+
+// CHECK: 	cvtdq2ps	32493, %xmm5
+        	cvtdq2ps	0x7eed,%xmm5
+
+// CHECK: 	cvtdq2ps	3133065982, %xmm5
+        	cvtdq2ps	0xbabecafe,%xmm5
+
+// CHECK: 	cvtdq2ps	305419896, %xmm5
+        	cvtdq2ps	0x12345678,%xmm5
+
+// CHECK: 	cvtdq2ps	%xmm5, %xmm5
+        	cvtdq2ps	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtpd2pi	69, %mm3
+        	cvtpd2pi	0x45,%mm3
+
+// CHECK: 	cvtpd2pi	32493, %mm3
+        	cvtpd2pi	0x7eed,%mm3
+
+// CHECK: 	cvtpd2pi	3133065982, %mm3
+        	cvtpd2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvtpd2pi	305419896, %mm3
+        	cvtpd2pi	0x12345678,%mm3
+
+// CHECK: 	cvtpd2pi	%xmm5, %mm3
+        	cvtpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvtpd2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpd2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpd2ps	69, %xmm5
+        	cvtpd2ps	0x45,%xmm5
+
+// CHECK: 	cvtpd2ps	32493, %xmm5
+        	cvtpd2ps	0x7eed,%xmm5
+
+// CHECK: 	cvtpd2ps	3133065982, %xmm5
+        	cvtpd2ps	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpd2ps	305419896, %xmm5
+        	cvtpd2ps	0x12345678,%xmm5
+
+// CHECK: 	cvtpd2ps	%xmm5, %xmm5
+        	cvtpd2ps	%xmm5,%xmm5
+
+// CHECK: 	cvtps2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtps2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtps2pd	69, %xmm5
+        	cvtps2pd	0x45,%xmm5
+
+// CHECK: 	cvtps2pd	32493, %xmm5
+        	cvtps2pd	0x7eed,%xmm5
+
+// CHECK: 	cvtps2pd	3133065982, %xmm5
+        	cvtps2pd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtps2pd	305419896, %xmm5
+        	cvtps2pd	0x12345678,%xmm5
+
+// CHECK: 	cvtps2pd	%xmm5, %xmm5
+        	cvtps2pd	%xmm5,%xmm5
+
+// CHECK: 	cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtps2dq	69, %xmm5
+        	cvtps2dq	0x45,%xmm5
+
+// CHECK: 	cvtps2dq	32493, %xmm5
+        	cvtps2dq	0x7eed,%xmm5
+
+// CHECK: 	cvtps2dq	3133065982, %xmm5
+        	cvtps2dq	0xbabecafe,%xmm5
+
+// CHECK: 	cvtps2dq	305419896, %xmm5
+        	cvtps2dq	0x12345678,%xmm5
+
+// CHECK: 	cvtps2dq	%xmm5, %xmm5
+        	cvtps2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsd2ss	69, %xmm5
+        	cvtsd2ss	0x45,%xmm5
+
+// CHECK: 	cvtsd2ss	32493, %xmm5
+        	cvtsd2ss	0x7eed,%xmm5
+
+// CHECK: 	cvtsd2ss	3133065982, %xmm5
+        	cvtsd2ss	0xbabecafe,%xmm5
+
+// CHECK: 	cvtsd2ss	305419896, %xmm5
+        	cvtsd2ss	0x12345678,%xmm5
+
+// CHECK: 	cvtsd2ss	%xmm5, %xmm5
+        	cvtsd2ss	%xmm5,%xmm5
+
+// CHECK: 	cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtss2sd	69, %xmm5
+        	cvtss2sd	0x45,%xmm5
+
+// CHECK: 	cvtss2sd	32493, %xmm5
+        	cvtss2sd	0x7eed,%xmm5
+
+// CHECK: 	cvtss2sd	3133065982, %xmm5
+        	cvtss2sd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtss2sd	305419896, %xmm5
+        	cvtss2sd	0x12345678,%xmm5
+
+// CHECK: 	cvtss2sd	%xmm5, %xmm5
+        	cvtss2sd	%xmm5,%xmm5
+
+// CHECK: 	cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttpd2pi	69, %mm3
+        	cvttpd2pi	0x45,%mm3
+
+// CHECK: 	cvttpd2pi	32493, %mm3
+        	cvttpd2pi	0x7eed,%mm3
+
+// CHECK: 	cvttpd2pi	3133065982, %mm3
+        	cvttpd2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvttpd2pi	305419896, %mm3
+        	cvttpd2pi	0x12345678,%mm3
+
+// CHECK: 	cvttpd2pi	%xmm5, %mm3
+        	cvttpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttsd2si	69, %ecx
+        	cvttsd2si	0x45,%ecx
+
+// CHECK: 	cvttsd2si	32493, %ecx
+        	cvttsd2si	0x7eed,%ecx
+
+// CHECK: 	cvttsd2si	3133065982, %ecx
+        	cvttsd2si	0xbabecafe,%ecx
+
+// CHECK: 	cvttsd2si	305419896, %ecx
+        	cvttsd2si	0x12345678,%ecx
+
+// CHECK: 	cvttsd2si	%xmm5, %ecx
+        	cvttsd2si	%xmm5,%ecx
+
+// CHECK: 	cvttps2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvttps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvttps2dq	69, %xmm5
+        	cvttps2dq	0x45,%xmm5
+
+// CHECK: 	cvttps2dq	32493, %xmm5
+        	cvttps2dq	0x7eed,%xmm5
+
+// CHECK: 	cvttps2dq	3133065982, %xmm5
+        	cvttps2dq	0xbabecafe,%xmm5
+
+// CHECK: 	cvttps2dq	305419896, %xmm5
+        	cvttps2dq	0x12345678,%xmm5
+
+// CHECK: 	cvttps2dq	%xmm5, %xmm5
+        	cvttps2dq	%xmm5,%xmm5
+
+// CHECK: 	maskmovdqu	%xmm5, %xmm5
+        	maskmovdqu	%xmm5,%xmm5
+
+// CHECK: 	movdqa	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqa	69, %xmm5
+        	movdqa	0x45,%xmm5
+
+// CHECK: 	movdqa	32493, %xmm5
+        	movdqa	0x7eed,%xmm5
+
+// CHECK: 	movdqa	3133065982, %xmm5
+        	movdqa	0xbabecafe,%xmm5
+
+// CHECK: 	movdqa	305419896, %xmm5
+        	movdqa	0x12345678,%xmm5
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdqa	%xmm5, 69
+        	movdqa	%xmm5,0x45
+
+// CHECK: 	movdqa	%xmm5, 32493
+        	movdqa	%xmm5,0x7eed
+
+// CHECK: 	movdqa	%xmm5, 3133065982
+        	movdqa	%xmm5,0xbabecafe
+
+// CHECK: 	movdqa	%xmm5, 305419896
+        	movdqa	%xmm5,0x12345678
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqu	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqu	69, %xmm5
+        	movdqu	0x45,%xmm5
+
+// CHECK: 	movdqu	32493, %xmm5
+        	movdqu	0x7eed,%xmm5
+
+// CHECK: 	movdqu	3133065982, %xmm5
+        	movdqu	0xbabecafe,%xmm5
+
+// CHECK: 	movdqu	305419896, %xmm5
+        	movdqu	0x12345678,%xmm5
+
+// CHECK: 	movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdqu	%xmm5, 69
+        	movdqu	%xmm5,0x45
+
+// CHECK: 	movdqu	%xmm5, 32493
+        	movdqu	%xmm5,0x7eed
+
+// CHECK: 	movdqu	%xmm5, 3133065982
+        	movdqu	%xmm5,0xbabecafe
+
+// CHECK: 	movdqu	%xmm5, 305419896
+        	movdqu	%xmm5,0x12345678
+
+// CHECK: 	movdq2q	%xmm5, %mm3
+        	movdq2q	%xmm5,%mm3
+
+// CHECK: 	movq2dq	%mm3, %xmm5
+        	movq2dq	%mm3,%xmm5
+
+// CHECK: 	pmuludq	3735928559(%ebx,%ecx,8), %mm3
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmuludq	69, %mm3
+        	pmuludq	0x45,%mm3
+
+// CHECK: 	pmuludq	32493, %mm3
+        	pmuludq	0x7eed,%mm3
+
+// CHECK: 	pmuludq	3133065982, %mm3
+        	pmuludq	0xbabecafe,%mm3
+
+// CHECK: 	pmuludq	305419896, %mm3
+        	pmuludq	0x12345678,%mm3
+
+// CHECK: 	pmuludq	%mm3, %mm3
+        	pmuludq	%mm3,%mm3
+
+// CHECK: 	pmuludq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmuludq	69, %xmm5
+        	pmuludq	0x45,%xmm5
+
+// CHECK: 	pmuludq	32493, %xmm5
+        	pmuludq	0x7eed,%xmm5
+
+// CHECK: 	pmuludq	3133065982, %xmm5
+        	pmuludq	0xbabecafe,%xmm5
+
+// CHECK: 	pmuludq	305419896, %xmm5
+        	pmuludq	0x12345678,%xmm5
+
+// CHECK: 	pmuludq	%xmm5, %xmm5
+        	pmuludq	%xmm5,%xmm5
+
+// CHECK: 	pslldq	$127, %xmm5
+        	pslldq	$0x7f,%xmm5
+
+// CHECK: 	psrldq	$127, %xmm5
+        	psrldq	$0x7f,%xmm5
+
+// CHECK: 	punpckhqdq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhqdq	69, %xmm5
+        	punpckhqdq	0x45,%xmm5
+
+// CHECK: 	punpckhqdq	32493, %xmm5
+        	punpckhqdq	0x7eed,%xmm5
+
+// CHECK: 	punpckhqdq	3133065982, %xmm5
+        	punpckhqdq	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhqdq	305419896, %xmm5
+        	punpckhqdq	0x12345678,%xmm5
+
+// CHECK: 	punpckhqdq	%xmm5, %xmm5
+        	punpckhqdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklqdq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpcklqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpcklqdq	69, %xmm5
+        	punpcklqdq	0x45,%xmm5
+
+// CHECK: 	punpcklqdq	32493, %xmm5
+        	punpcklqdq	0x7eed,%xmm5
+
+// CHECK: 	punpcklqdq	3133065982, %xmm5
+        	punpcklqdq	0xbabecafe,%xmm5
+
+// CHECK: 	punpcklqdq	305419896, %xmm5
+        	punpcklqdq	0x12345678,%xmm5
+
+// CHECK: 	punpcklqdq	%xmm5, %xmm5
+        	punpcklqdq	%xmm5,%xmm5
+
+// CHECK: 	addsubpd	3735928559(%ebx,%ecx,8), %xmm5
+        	addsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addsubpd	69, %xmm5
+        	addsubpd	0x45,%xmm5
+
+// CHECK: 	addsubpd	32493, %xmm5
+        	addsubpd	0x7eed,%xmm5
+
+// CHECK: 	addsubpd	3133065982, %xmm5
+        	addsubpd	0xbabecafe,%xmm5
+
+// CHECK: 	addsubpd	305419896, %xmm5
+        	addsubpd	0x12345678,%xmm5
+
+// CHECK: 	addsubpd	%xmm5, %xmm5
+        	addsubpd	%xmm5,%xmm5
+
+// CHECK: 	addsubps	3735928559(%ebx,%ecx,8), %xmm5
+        	addsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addsubps	69, %xmm5
+        	addsubps	0x45,%xmm5
+
+// CHECK: 	addsubps	32493, %xmm5
+        	addsubps	0x7eed,%xmm5
+
+// CHECK: 	addsubps	3133065982, %xmm5
+        	addsubps	0xbabecafe,%xmm5
+
+// CHECK: 	addsubps	305419896, %xmm5
+        	addsubps	0x12345678,%xmm5
+
+// CHECK: 	addsubps	%xmm5, %xmm5
+        	addsubps	%xmm5,%xmm5
+
+// CHECK: 	fisttpl	3735928559(%ebx,%ecx,8)
+        	fisttpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisttpl	3133065982
+        	fisttpl	0xbabecafe
+
+// CHECK: 	fisttpl	305419896
+        	fisttpl	0x12345678
+
+// CHECK: 	haddpd	3735928559(%ebx,%ecx,8), %xmm5
+        	haddpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	haddpd	69, %xmm5
+        	haddpd	0x45,%xmm5
+
+// CHECK: 	haddpd	32493, %xmm5
+        	haddpd	0x7eed,%xmm5
+
+// CHECK: 	haddpd	3133065982, %xmm5
+        	haddpd	0xbabecafe,%xmm5
+
+// CHECK: 	haddpd	305419896, %xmm5
+        	haddpd	0x12345678,%xmm5
+
+// CHECK: 	haddpd	%xmm5, %xmm5
+        	haddpd	%xmm5,%xmm5
+
+// CHECK: 	haddps	3735928559(%ebx,%ecx,8), %xmm5
+        	haddps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	haddps	69, %xmm5
+        	haddps	0x45,%xmm5
+
+// CHECK: 	haddps	32493, %xmm5
+        	haddps	0x7eed,%xmm5
+
+// CHECK: 	haddps	3133065982, %xmm5
+        	haddps	0xbabecafe,%xmm5
+
+// CHECK: 	haddps	305419896, %xmm5
+        	haddps	0x12345678,%xmm5
+
+// CHECK: 	haddps	%xmm5, %xmm5
+        	haddps	%xmm5,%xmm5
+
+// CHECK: 	hsubpd	3735928559(%ebx,%ecx,8), %xmm5
+        	hsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	hsubpd	69, %xmm5
+        	hsubpd	0x45,%xmm5
+
+// CHECK: 	hsubpd	32493, %xmm5
+        	hsubpd	0x7eed,%xmm5
+
+// CHECK: 	hsubpd	3133065982, %xmm5
+        	hsubpd	0xbabecafe,%xmm5
+
+// CHECK: 	hsubpd	305419896, %xmm5
+        	hsubpd	0x12345678,%xmm5
+
+// CHECK: 	hsubpd	%xmm5, %xmm5
+        	hsubpd	%xmm5,%xmm5
+
+// CHECK: 	hsubps	3735928559(%ebx,%ecx,8), %xmm5
+        	hsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	hsubps	69, %xmm5
+        	hsubps	0x45,%xmm5
+
+// CHECK: 	hsubps	32493, %xmm5
+        	hsubps	0x7eed,%xmm5
+
+// CHECK: 	hsubps	3133065982, %xmm5
+        	hsubps	0xbabecafe,%xmm5
+
+// CHECK: 	hsubps	305419896, %xmm5
+        	hsubps	0x12345678,%xmm5
+
+// CHECK: 	hsubps	%xmm5, %xmm5
+        	hsubps	%xmm5,%xmm5
+
+// CHECK: 	lddqu	3735928559(%ebx,%ecx,8), %xmm5
+        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	lddqu	69, %xmm5
+        	lddqu	0x45,%xmm5
+
+// CHECK: 	lddqu	32493, %xmm5
+        	lddqu	0x7eed,%xmm5
+
+// CHECK: 	lddqu	3133065982, %xmm5
+        	lddqu	0xbabecafe,%xmm5
+
+// CHECK: 	lddqu	305419896, %xmm5
+        	lddqu	0x12345678,%xmm5
+
+// CHECK: 	monitor
+        	monitor
+
+// CHECK: 	movddup	3735928559(%ebx,%ecx,8), %xmm5
+        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movddup	69, %xmm5
+        	movddup	0x45,%xmm5
+
+// CHECK: 	movddup	32493, %xmm5
+        	movddup	0x7eed,%xmm5
+
+// CHECK: 	movddup	3133065982, %xmm5
+        	movddup	0xbabecafe,%xmm5
+
+// CHECK: 	movddup	305419896, %xmm5
+        	movddup	0x12345678,%xmm5
+
+// CHECK: 	movddup	%xmm5, %xmm5
+        	movddup	%xmm5,%xmm5
+
+// CHECK: 	movshdup	3735928559(%ebx,%ecx,8), %xmm5
+        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movshdup	69, %xmm5
+        	movshdup	0x45,%xmm5
+
+// CHECK: 	movshdup	32493, %xmm5
+        	movshdup	0x7eed,%xmm5
+
+// CHECK: 	movshdup	3133065982, %xmm5
+        	movshdup	0xbabecafe,%xmm5
+
+// CHECK: 	movshdup	305419896, %xmm5
+        	movshdup	0x12345678,%xmm5
+
+// CHECK: 	movshdup	%xmm5, %xmm5
+        	movshdup	%xmm5,%xmm5
+
+// CHECK: 	movsldup	3735928559(%ebx,%ecx,8), %xmm5
+        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsldup	69, %xmm5
+        	movsldup	0x45,%xmm5
+
+// CHECK: 	movsldup	32493, %xmm5
+        	movsldup	0x7eed,%xmm5
+
+// CHECK: 	movsldup	3133065982, %xmm5
+        	movsldup	0xbabecafe,%xmm5
+
+// CHECK: 	movsldup	305419896, %xmm5
+        	movsldup	0x12345678,%xmm5
+
+// CHECK: 	movsldup	%xmm5, %xmm5
+        	movsldup	%xmm5,%xmm5
+
+// CHECK: 	mwait
+        	mwait
+
+// CHECK: 	vmcall
+        	vmcall
+
+// CHECK: 	vmclear	3735928559(%ebx,%ecx,8)
+        	vmclear	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmclear	32493
+        	vmclear	0x7eed
+
+// CHECK: 	vmclear	3133065982
+        	vmclear	0xbabecafe
+
+// CHECK: 	vmclear	305419896
+        	vmclear	0x12345678
+
+// CHECK: 	vmlaunch
+        	vmlaunch
+
+// CHECK: 	vmresume
+        	vmresume
+
+// CHECK: 	vmptrld	3735928559(%ebx,%ecx,8)
+        	vmptrld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmptrld	32493
+        	vmptrld	0x7eed
+
+// CHECK: 	vmptrld	3133065982
+        	vmptrld	0xbabecafe
+
+// CHECK: 	vmptrld	305419896
+        	vmptrld	0x12345678
+
+// CHECK: 	vmptrst	3735928559(%ebx,%ecx,8)
+        	vmptrst	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmptrst	32493
+        	vmptrst	0x7eed
+
+// CHECK: 	vmptrst	3133065982
+        	vmptrst	0xbabecafe
+
+// CHECK: 	vmptrst	305419896
+        	vmptrst	0x12345678
+
+// CHECK: 	vmxoff
+        	vmxoff
+
+// CHECK: 	vmxon	3735928559(%ebx,%ecx,8)
+        	vmxon	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmxon	32493
+        	vmxon	0x7eed
+
+// CHECK: 	vmxon	3133065982
+        	vmxon	0xbabecafe
+
+// CHECK: 	vmxon	305419896
+        	vmxon	0x12345678
+
+// CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %mm3
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phaddw	69, %mm3
+        	phaddw	0x45,%mm3
+
+// CHECK: 	phaddw	32493, %mm3
+        	phaddw	0x7eed,%mm3
+
+// CHECK: 	phaddw	3133065982, %mm3
+        	phaddw	0xbabecafe,%mm3
+
+// CHECK: 	phaddw	305419896, %mm3
+        	phaddw	0x12345678,%mm3
+
+// CHECK: 	phaddw	%mm3, %mm3
+        	phaddw	%mm3,%mm3
+
+// CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %xmm5
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phaddw	69, %xmm5
+        	phaddw	0x45,%xmm5
+
+// CHECK: 	phaddw	32493, %xmm5
+        	phaddw	0x7eed,%xmm5
+
+// CHECK: 	phaddw	3133065982, %xmm5
+        	phaddw	0xbabecafe,%xmm5
+
+// CHECK: 	phaddw	305419896, %xmm5
+        	phaddw	0x12345678,%xmm5
+
+// CHECK: 	phaddw	%xmm5, %xmm5
+        	phaddw	%xmm5,%xmm5
+
+// CHECK: 	phaddd	3735928559(%ebx,%ecx,8), %mm3
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phaddd	69, %mm3
+        	phaddd	0x45,%mm3
+
+// CHECK: 	phaddd	32493, %mm3
+        	phaddd	0x7eed,%mm3
+
+// CHECK: 	phaddd	3133065982, %mm3
+        	phaddd	0xbabecafe,%mm3
+
+// CHECK: 	phaddd	305419896, %mm3
+        	phaddd	0x12345678,%mm3
+
+// CHECK: 	phaddd	%mm3, %mm3
+        	phaddd	%mm3,%mm3
+
+// CHECK: 	phaddd	3735928559(%ebx,%ecx,8), %xmm5
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phaddd	69, %xmm5
+        	phaddd	0x45,%xmm5
+
+// CHECK: 	phaddd	32493, %xmm5
+        	phaddd	0x7eed,%xmm5
+
+// CHECK: 	phaddd	3133065982, %xmm5
+        	phaddd	0xbabecafe,%xmm5
+
+// CHECK: 	phaddd	305419896, %xmm5
+        	phaddd	0x12345678,%xmm5
+
+// CHECK: 	phaddd	%xmm5, %xmm5
+        	phaddd	%xmm5,%xmm5
+
+// CHECK: 	phaddsw	3735928559(%ebx,%ecx,8), %mm3
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phaddsw	69, %mm3
+        	phaddsw	0x45,%mm3
+
+// CHECK: 	phaddsw	32493, %mm3
+        	phaddsw	0x7eed,%mm3
+
+// CHECK: 	phaddsw	3133065982, %mm3
+        	phaddsw	0xbabecafe,%mm3
+
+// CHECK: 	phaddsw	305419896, %mm3
+        	phaddsw	0x12345678,%mm3
+
+// CHECK: 	phaddsw	%mm3, %mm3
+        	phaddsw	%mm3,%mm3
+
+// CHECK: 	phaddsw	3735928559(%ebx,%ecx,8), %xmm5
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phaddsw	69, %xmm5
+        	phaddsw	0x45,%xmm5
+
+// CHECK: 	phaddsw	32493, %xmm5
+        	phaddsw	0x7eed,%xmm5
+
+// CHECK: 	phaddsw	3133065982, %xmm5
+        	phaddsw	0xbabecafe,%xmm5
+
+// CHECK: 	phaddsw	305419896, %xmm5
+        	phaddsw	0x12345678,%xmm5
+
+// CHECK: 	phaddsw	%xmm5, %xmm5
+        	phaddsw	%xmm5,%xmm5
+
+// CHECK: 	phsubw	3735928559(%ebx,%ecx,8), %mm3
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phsubw	69, %mm3
+        	phsubw	0x45,%mm3
+
+// CHECK: 	phsubw	32493, %mm3
+        	phsubw	0x7eed,%mm3
+
+// CHECK: 	phsubw	3133065982, %mm3
+        	phsubw	0xbabecafe,%mm3
+
+// CHECK: 	phsubw	305419896, %mm3
+        	phsubw	0x12345678,%mm3
+
+// CHECK: 	phsubw	%mm3, %mm3
+        	phsubw	%mm3,%mm3
+
+// CHECK: 	phsubw	3735928559(%ebx,%ecx,8), %xmm5
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phsubw	69, %xmm5
+        	phsubw	0x45,%xmm5
+
+// CHECK: 	phsubw	32493, %xmm5
+        	phsubw	0x7eed,%xmm5
+
+// CHECK: 	phsubw	3133065982, %xmm5
+        	phsubw	0xbabecafe,%xmm5
+
+// CHECK: 	phsubw	305419896, %xmm5
+        	phsubw	0x12345678,%xmm5
+
+// CHECK: 	phsubw	%xmm5, %xmm5
+        	phsubw	%xmm5,%xmm5
+
+// CHECK: 	phsubd	3735928559(%ebx,%ecx,8), %mm3
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phsubd	69, %mm3
+        	phsubd	0x45,%mm3
+
+// CHECK: 	phsubd	32493, %mm3
+        	phsubd	0x7eed,%mm3
+
+// CHECK: 	phsubd	3133065982, %mm3
+        	phsubd	0xbabecafe,%mm3
+
+// CHECK: 	phsubd	305419896, %mm3
+        	phsubd	0x12345678,%mm3
+
+// CHECK: 	phsubd	%mm3, %mm3
+        	phsubd	%mm3,%mm3
+
+// CHECK: 	phsubd	3735928559(%ebx,%ecx,8), %xmm5
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phsubd	69, %xmm5
+        	phsubd	0x45,%xmm5
+
+// CHECK: 	phsubd	32493, %xmm5
+        	phsubd	0x7eed,%xmm5
+
+// CHECK: 	phsubd	3133065982, %xmm5
+        	phsubd	0xbabecafe,%xmm5
+
+// CHECK: 	phsubd	305419896, %xmm5
+        	phsubd	0x12345678,%xmm5
+
+// CHECK: 	phsubd	%xmm5, %xmm5
+        	phsubd	%xmm5,%xmm5
+
+// CHECK: 	phsubsw	3735928559(%ebx,%ecx,8), %mm3
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phsubsw	69, %mm3
+        	phsubsw	0x45,%mm3
+
+// CHECK: 	phsubsw	32493, %mm3
+        	phsubsw	0x7eed,%mm3
+
+// CHECK: 	phsubsw	3133065982, %mm3
+        	phsubsw	0xbabecafe,%mm3
+
+// CHECK: 	phsubsw	305419896, %mm3
+        	phsubsw	0x12345678,%mm3
+
+// CHECK: 	phsubsw	%mm3, %mm3
+        	phsubsw	%mm3,%mm3
+
+// CHECK: 	phsubsw	3735928559(%ebx,%ecx,8), %xmm5
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phsubsw	69, %xmm5
+        	phsubsw	0x45,%xmm5
+
+// CHECK: 	phsubsw	32493, %xmm5
+        	phsubsw	0x7eed,%xmm5
+
+// CHECK: 	phsubsw	3133065982, %xmm5
+        	phsubsw	0xbabecafe,%xmm5
+
+// CHECK: 	phsubsw	305419896, %xmm5
+        	phsubsw	0x12345678,%xmm5
+
+// CHECK: 	phsubsw	%xmm5, %xmm5
+        	phsubsw	%xmm5,%xmm5
+
+// CHECK: 	pmaddubsw	3735928559(%ebx,%ecx,8), %mm3
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaddubsw	69, %mm3
+        	pmaddubsw	0x45,%mm3
+
+// CHECK: 	pmaddubsw	32493, %mm3
+        	pmaddubsw	0x7eed,%mm3
+
+// CHECK: 	pmaddubsw	3133065982, %mm3
+        	pmaddubsw	0xbabecafe,%mm3
+
+// CHECK: 	pmaddubsw	305419896, %mm3
+        	pmaddubsw	0x12345678,%mm3
+
+// CHECK: 	pmaddubsw	%mm3, %mm3
+        	pmaddubsw	%mm3,%mm3
+
+// CHECK: 	pmaddubsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaddubsw	69, %xmm5
+        	pmaddubsw	0x45,%xmm5
+
+// CHECK: 	pmaddubsw	32493, %xmm5
+        	pmaddubsw	0x7eed,%xmm5
+
+// CHECK: 	pmaddubsw	3133065982, %xmm5
+        	pmaddubsw	0xbabecafe,%xmm5
+
+// CHECK: 	pmaddubsw	305419896, %xmm5
+        	pmaddubsw	0x12345678,%xmm5
+
+// CHECK: 	pmaddubsw	%xmm5, %xmm5
+        	pmaddubsw	%xmm5,%xmm5
+
+// CHECK: 	pmulhrsw	3735928559(%ebx,%ecx,8), %mm3
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmulhrsw	69, %mm3
+        	pmulhrsw	0x45,%mm3
+
+// CHECK: 	pmulhrsw	32493, %mm3
+        	pmulhrsw	0x7eed,%mm3
+
+// CHECK: 	pmulhrsw	3133065982, %mm3
+        	pmulhrsw	0xbabecafe,%mm3
+
+// CHECK: 	pmulhrsw	305419896, %mm3
+        	pmulhrsw	0x12345678,%mm3
+
+// CHECK: 	pmulhrsw	%mm3, %mm3
+        	pmulhrsw	%mm3,%mm3
+
+// CHECK: 	pmulhrsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulhrsw	69, %xmm5
+        	pmulhrsw	0x45,%xmm5
+
+// CHECK: 	pmulhrsw	32493, %xmm5
+        	pmulhrsw	0x7eed,%xmm5
+
+// CHECK: 	pmulhrsw	3133065982, %xmm5
+        	pmulhrsw	0xbabecafe,%xmm5
+
+// CHECK: 	pmulhrsw	305419896, %xmm5
+        	pmulhrsw	0x12345678,%xmm5
+
+// CHECK: 	pmulhrsw	%xmm5, %xmm5
+        	pmulhrsw	%xmm5,%xmm5
+
+// CHECK: 	pshufb	3735928559(%ebx,%ecx,8), %mm3
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pshufb	69, %mm3
+        	pshufb	0x45,%mm3
+
+// CHECK: 	pshufb	32493, %mm3
+        	pshufb	0x7eed,%mm3
+
+// CHECK: 	pshufb	3133065982, %mm3
+        	pshufb	0xbabecafe,%mm3
+
+// CHECK: 	pshufb	305419896, %mm3
+        	pshufb	0x12345678,%mm3
+
+// CHECK: 	pshufb	%mm3, %mm3
+        	pshufb	%mm3,%mm3
+
+// CHECK: 	pshufb	3735928559(%ebx,%ecx,8), %xmm5
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pshufb	69, %xmm5
+        	pshufb	0x45,%xmm5
+
+// CHECK: 	pshufb	32493, %xmm5
+        	pshufb	0x7eed,%xmm5
+
+// CHECK: 	pshufb	3133065982, %xmm5
+        	pshufb	0xbabecafe,%xmm5
+
+// CHECK: 	pshufb	305419896, %xmm5
+        	pshufb	0x12345678,%xmm5
+
+// CHECK: 	pshufb	%xmm5, %xmm5
+        	pshufb	%xmm5,%xmm5
+
+// CHECK: 	psignb	3735928559(%ebx,%ecx,8), %mm3
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psignb	69, %mm3
+        	psignb	0x45,%mm3
+
+// CHECK: 	psignb	32493, %mm3
+        	psignb	0x7eed,%mm3
+
+// CHECK: 	psignb	3133065982, %mm3
+        	psignb	0xbabecafe,%mm3
+
+// CHECK: 	psignb	305419896, %mm3
+        	psignb	0x12345678,%mm3
+
+// CHECK: 	psignb	%mm3, %mm3
+        	psignb	%mm3,%mm3
+
+// CHECK: 	psignb	3735928559(%ebx,%ecx,8), %xmm5
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psignb	69, %xmm5
+        	psignb	0x45,%xmm5
+
+// CHECK: 	psignb	32493, %xmm5
+        	psignb	0x7eed,%xmm5
+
+// CHECK: 	psignb	3133065982, %xmm5
+        	psignb	0xbabecafe,%xmm5
+
+// CHECK: 	psignb	305419896, %xmm5
+        	psignb	0x12345678,%xmm5
+
+// CHECK: 	psignb	%xmm5, %xmm5
+        	psignb	%xmm5,%xmm5
+
+// CHECK: 	psignw	3735928559(%ebx,%ecx,8), %mm3
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psignw	69, %mm3
+        	psignw	0x45,%mm3
+
+// CHECK: 	psignw	32493, %mm3
+        	psignw	0x7eed,%mm3
+
+// CHECK: 	psignw	3133065982, %mm3
+        	psignw	0xbabecafe,%mm3
+
+// CHECK: 	psignw	305419896, %mm3
+        	psignw	0x12345678,%mm3
+
+// CHECK: 	psignw	%mm3, %mm3
+        	psignw	%mm3,%mm3
+
+// CHECK: 	psignw	3735928559(%ebx,%ecx,8), %xmm5
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psignw	69, %xmm5
+        	psignw	0x45,%xmm5
+
+// CHECK: 	psignw	32493, %xmm5
+        	psignw	0x7eed,%xmm5
+
+// CHECK: 	psignw	3133065982, %xmm5
+        	psignw	0xbabecafe,%xmm5
+
+// CHECK: 	psignw	305419896, %xmm5
+        	psignw	0x12345678,%xmm5
+
+// CHECK: 	psignw	%xmm5, %xmm5
+        	psignw	%xmm5,%xmm5
+
+// CHECK: 	psignd	3735928559(%ebx,%ecx,8), %mm3
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psignd	69, %mm3
+        	psignd	0x45,%mm3
+
+// CHECK: 	psignd	32493, %mm3
+        	psignd	0x7eed,%mm3
+
+// CHECK: 	psignd	3133065982, %mm3
+        	psignd	0xbabecafe,%mm3
+
+// CHECK: 	psignd	305419896, %mm3
+        	psignd	0x12345678,%mm3
+
+// CHECK: 	psignd	%mm3, %mm3
+        	psignd	%mm3,%mm3
+
+// CHECK: 	psignd	3735928559(%ebx,%ecx,8), %xmm5
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psignd	69, %xmm5
+        	psignd	0x45,%xmm5
+
+// CHECK: 	psignd	32493, %xmm5
+        	psignd	0x7eed,%xmm5
+
+// CHECK: 	psignd	3133065982, %xmm5
+        	psignd	0xbabecafe,%xmm5
+
+// CHECK: 	psignd	305419896, %xmm5
+        	psignd	0x12345678,%xmm5
+
+// CHECK: 	psignd	%xmm5, %xmm5
+        	psignd	%xmm5,%xmm5
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %mm3
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsb	69, %mm3
+        	pabsb	0x45,%mm3
+
+// CHECK: 	pabsb	32493, %mm3
+        	pabsb	0x7eed,%mm3
+
+// CHECK: 	pabsb	3133065982, %mm3
+        	pabsb	0xbabecafe,%mm3
+
+// CHECK: 	pabsb	305419896, %mm3
+        	pabsb	0x12345678,%mm3
+
+// CHECK: 	pabsb	%mm3, %mm3
+        	pabsb	%mm3,%mm3
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsb	69, %xmm5
+        	pabsb	0x45,%xmm5
+
+// CHECK: 	pabsb	32493, %xmm5
+        	pabsb	0x7eed,%xmm5
+
+// CHECK: 	pabsb	3133065982, %xmm5
+        	pabsb	0xbabecafe,%xmm5
+
+// CHECK: 	pabsb	305419896, %xmm5
+        	pabsb	0x12345678,%xmm5
+
+// CHECK: 	pabsb	%xmm5, %xmm5
+        	pabsb	%xmm5,%xmm5
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %mm3
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsw	69, %mm3
+        	pabsw	0x45,%mm3
+
+// CHECK: 	pabsw	32493, %mm3
+        	pabsw	0x7eed,%mm3
+
+// CHECK: 	pabsw	3133065982, %mm3
+        	pabsw	0xbabecafe,%mm3
+
+// CHECK: 	pabsw	305419896, %mm3
+        	pabsw	0x12345678,%mm3
+
+// CHECK: 	pabsw	%mm3, %mm3
+        	pabsw	%mm3,%mm3
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsw	69, %xmm5
+        	pabsw	0x45,%xmm5
+
+// CHECK: 	pabsw	32493, %xmm5
+        	pabsw	0x7eed,%xmm5
+
+// CHECK: 	pabsw	3133065982, %xmm5
+        	pabsw	0xbabecafe,%xmm5
+
+// CHECK: 	pabsw	305419896, %xmm5
+        	pabsw	0x12345678,%xmm5
+
+// CHECK: 	pabsw	%xmm5, %xmm5
+        	pabsw	%xmm5,%xmm5
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %mm3
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsd	69, %mm3
+        	pabsd	0x45,%mm3
+
+// CHECK: 	pabsd	32493, %mm3
+        	pabsd	0x7eed,%mm3
+
+// CHECK: 	pabsd	3133065982, %mm3
+        	pabsd	0xbabecafe,%mm3
+
+// CHECK: 	pabsd	305419896, %mm3
+        	pabsd	0x12345678,%mm3
+
+// CHECK: 	pabsd	%mm3, %mm3
+        	pabsd	%mm3,%mm3
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsd	69, %xmm5
+        	pabsd	0x45,%xmm5
+
+// CHECK: 	pabsd	32493, %xmm5
+        	pabsd	0x7eed,%xmm5
+
+// CHECK: 	pabsd	3133065982, %xmm5
+        	pabsd	0xbabecafe,%xmm5
+
+// CHECK: 	pabsd	305419896, %xmm5
+        	pabsd	0x12345678,%xmm5
+
+// CHECK: 	pabsd	%xmm5, %xmm5
+        	pabsd	%xmm5,%xmm5
+
+// CHECK: 	femms
+        	femms
+
+// CHECK: 	movntdqa	3735928559(%ebx,%ecx,8), %xmm5
+        	movntdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movntdqa	69, %xmm5
+        	movntdqa	0x45,%xmm5
+
+// CHECK: 	movntdqa	32493, %xmm5
+        	movntdqa	0x7eed,%xmm5
+
+// CHECK: 	movntdqa	3133065982, %xmm5
+        	movntdqa	0xbabecafe,%xmm5
+
+// CHECK: 	movntdqa	305419896, %xmm5
+        	movntdqa	0x12345678,%xmm5
+
+// CHECK: 	packusdw	3735928559(%ebx,%ecx,8), %xmm5
+        	packusdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packusdw	69, %xmm5
+        	packusdw	0x45,%xmm5
+
+// CHECK: 	packusdw	32493, %xmm5
+        	packusdw	0x7eed,%xmm5
+
+// CHECK: 	packusdw	3133065982, %xmm5
+        	packusdw	0xbabecafe,%xmm5
+
+// CHECK: 	packusdw	305419896, %xmm5
+        	packusdw	0x12345678,%xmm5
+
+// CHECK: 	packusdw	%xmm5, %xmm5
+        	packusdw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqq	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqq	69, %xmm5
+        	pcmpeqq	0x45,%xmm5
+
+// CHECK: 	pcmpeqq	32493, %xmm5
+        	pcmpeqq	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqq	3133065982, %xmm5
+        	pcmpeqq	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqq	305419896, %xmm5
+        	pcmpeqq	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqq	%xmm5, %xmm5
+        	pcmpeqq	%xmm5,%xmm5
+
+// CHECK: 	phminposuw	3735928559(%ebx,%ecx,8), %xmm5
+        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phminposuw	69, %xmm5
+        	phminposuw	0x45,%xmm5
+
+// CHECK: 	phminposuw	32493, %xmm5
+        	phminposuw	0x7eed,%xmm5
+
+// CHECK: 	phminposuw	3133065982, %xmm5
+        	phminposuw	0xbabecafe,%xmm5
+
+// CHECK: 	phminposuw	305419896, %xmm5
+        	phminposuw	0x12345678,%xmm5
+
+// CHECK: 	phminposuw	%xmm5, %xmm5
+        	phminposuw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxsb	69, %xmm5
+        	pmaxsb	0x45,%xmm5
+
+// CHECK: 	pmaxsb	32493, %xmm5
+        	pmaxsb	0x7eed,%xmm5
+
+// CHECK: 	pmaxsb	3133065982, %xmm5
+        	pmaxsb	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxsb	305419896, %xmm5
+        	pmaxsb	0x12345678,%xmm5
+
+// CHECK: 	pmaxsb	%xmm5, %xmm5
+        	pmaxsb	%xmm5,%xmm5
+
+// CHECK: 	pmaxsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxsd	69, %xmm5
+        	pmaxsd	0x45,%xmm5
+
+// CHECK: 	pmaxsd	32493, %xmm5
+        	pmaxsd	0x7eed,%xmm5
+
+// CHECK: 	pmaxsd	3133065982, %xmm5
+        	pmaxsd	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxsd	305419896, %xmm5
+        	pmaxsd	0x12345678,%xmm5
+
+// CHECK: 	pmaxsd	%xmm5, %xmm5
+        	pmaxsd	%xmm5,%xmm5
+
+// CHECK: 	pmaxud	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxud	69, %xmm5
+        	pmaxud	0x45,%xmm5
+
+// CHECK: 	pmaxud	32493, %xmm5
+        	pmaxud	0x7eed,%xmm5
+
+// CHECK: 	pmaxud	3133065982, %xmm5
+        	pmaxud	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxud	305419896, %xmm5
+        	pmaxud	0x12345678,%xmm5
+
+// CHECK: 	pmaxud	%xmm5, %xmm5
+        	pmaxud	%xmm5,%xmm5
+
+// CHECK: 	pmaxuw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxuw	69, %xmm5
+        	pmaxuw	0x45,%xmm5
+
+// CHECK: 	pmaxuw	32493, %xmm5
+        	pmaxuw	0x7eed,%xmm5
+
+// CHECK: 	pmaxuw	3133065982, %xmm5
+        	pmaxuw	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxuw	305419896, %xmm5
+        	pmaxuw	0x12345678,%xmm5
+
+// CHECK: 	pmaxuw	%xmm5, %xmm5
+        	pmaxuw	%xmm5,%xmm5
+
+// CHECK: 	pminsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pminsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminsb	69, %xmm5
+        	pminsb	0x45,%xmm5
+
+// CHECK: 	pminsb	32493, %xmm5
+        	pminsb	0x7eed,%xmm5
+
+// CHECK: 	pminsb	3133065982, %xmm5
+        	pminsb	0xbabecafe,%xmm5
+
+// CHECK: 	pminsb	305419896, %xmm5
+        	pminsb	0x12345678,%xmm5
+
+// CHECK: 	pminsb	%xmm5, %xmm5
+        	pminsb	%xmm5,%xmm5
+
+// CHECK: 	pminsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pminsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminsd	69, %xmm5
+        	pminsd	0x45,%xmm5
+
+// CHECK: 	pminsd	32493, %xmm5
+        	pminsd	0x7eed,%xmm5
+
+// CHECK: 	pminsd	3133065982, %xmm5
+        	pminsd	0xbabecafe,%xmm5
+
+// CHECK: 	pminsd	305419896, %xmm5
+        	pminsd	0x12345678,%xmm5
+
+// CHECK: 	pminsd	%xmm5, %xmm5
+        	pminsd	%xmm5,%xmm5
+
+// CHECK: 	pminud	3735928559(%ebx,%ecx,8), %xmm5
+        	pminud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminud	69, %xmm5
+        	pminud	0x45,%xmm5
+
+// CHECK: 	pminud	32493, %xmm5
+        	pminud	0x7eed,%xmm5
+
+// CHECK: 	pminud	3133065982, %xmm5
+        	pminud	0xbabecafe,%xmm5
+
+// CHECK: 	pminud	305419896, %xmm5
+        	pminud	0x12345678,%xmm5
+
+// CHECK: 	pminud	%xmm5, %xmm5
+        	pminud	%xmm5,%xmm5
+
+// CHECK: 	pminuw	3735928559(%ebx,%ecx,8), %xmm5
+        	pminuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminuw	69, %xmm5
+        	pminuw	0x45,%xmm5
+
+// CHECK: 	pminuw	32493, %xmm5
+        	pminuw	0x7eed,%xmm5
+
+// CHECK: 	pminuw	3133065982, %xmm5
+        	pminuw	0xbabecafe,%xmm5
+
+// CHECK: 	pminuw	305419896, %xmm5
+        	pminuw	0x12345678,%xmm5
+
+// CHECK: 	pminuw	%xmm5, %xmm5
+        	pminuw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbw	69, %xmm5
+        	pmovsxbw	0x45,%xmm5
+
+// CHECK: 	pmovsxbw	32493, %xmm5
+        	pmovsxbw	0x7eed,%xmm5
+
+// CHECK: 	pmovsxbw	3133065982, %xmm5
+        	pmovsxbw	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxbw	305419896, %xmm5
+        	pmovsxbw	0x12345678,%xmm5
+
+// CHECK: 	pmovsxbw	%xmm5, %xmm5
+        	pmovsxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbd	69, %xmm5
+        	pmovsxbd	0x45,%xmm5
+
+// CHECK: 	pmovsxbd	32493, %xmm5
+        	pmovsxbd	0x7eed,%xmm5
+
+// CHECK: 	pmovsxbd	3133065982, %xmm5
+        	pmovsxbd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxbd	305419896, %xmm5
+        	pmovsxbd	0x12345678,%xmm5
+
+// CHECK: 	pmovsxbd	%xmm5, %xmm5
+        	pmovsxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbq	69, %xmm5
+        	pmovsxbq	0x45,%xmm5
+
+// CHECK: 	pmovsxbq	32493, %xmm5
+        	pmovsxbq	0x7eed,%xmm5
+
+// CHECK: 	pmovsxbq	3133065982, %xmm5
+        	pmovsxbq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxbq	305419896, %xmm5
+        	pmovsxbq	0x12345678,%xmm5
+
+// CHECK: 	pmovsxbq	%xmm5, %xmm5
+        	pmovsxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwd	69, %xmm5
+        	pmovsxwd	0x45,%xmm5
+
+// CHECK: 	pmovsxwd	32493, %xmm5
+        	pmovsxwd	0x7eed,%xmm5
+
+// CHECK: 	pmovsxwd	3133065982, %xmm5
+        	pmovsxwd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxwd	305419896, %xmm5
+        	pmovsxwd	0x12345678,%xmm5
+
+// CHECK: 	pmovsxwd	%xmm5, %xmm5
+        	pmovsxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwq	69, %xmm5
+        	pmovsxwq	0x45,%xmm5
+
+// CHECK: 	pmovsxwq	32493, %xmm5
+        	pmovsxwq	0x7eed,%xmm5
+
+// CHECK: 	pmovsxwq	3133065982, %xmm5
+        	pmovsxwq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxwq	305419896, %xmm5
+        	pmovsxwq	0x12345678,%xmm5
+
+// CHECK: 	pmovsxwq	%xmm5, %xmm5
+        	pmovsxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxdq	69, %xmm5
+        	pmovsxdq	0x45,%xmm5
+
+// CHECK: 	pmovsxdq	32493, %xmm5
+        	pmovsxdq	0x7eed,%xmm5
+
+// CHECK: 	pmovsxdq	3133065982, %xmm5
+        	pmovsxdq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxdq	305419896, %xmm5
+        	pmovsxdq	0x12345678,%xmm5
+
+// CHECK: 	pmovsxdq	%xmm5, %xmm5
+        	pmovsxdq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbw	69, %xmm5
+        	pmovzxbw	0x45,%xmm5
+
+// CHECK: 	pmovzxbw	32493, %xmm5
+        	pmovzxbw	0x7eed,%xmm5
+
+// CHECK: 	pmovzxbw	3133065982, %xmm5
+        	pmovzxbw	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxbw	305419896, %xmm5
+        	pmovzxbw	0x12345678,%xmm5
+
+// CHECK: 	pmovzxbw	%xmm5, %xmm5
+        	pmovzxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbd	69, %xmm5
+        	pmovzxbd	0x45,%xmm5
+
+// CHECK: 	pmovzxbd	32493, %xmm5
+        	pmovzxbd	0x7eed,%xmm5
+
+// CHECK: 	pmovzxbd	3133065982, %xmm5
+        	pmovzxbd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxbd	305419896, %xmm5
+        	pmovzxbd	0x12345678,%xmm5
+
+// CHECK: 	pmovzxbd	%xmm5, %xmm5
+        	pmovzxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbq	69, %xmm5
+        	pmovzxbq	0x45,%xmm5
+
+// CHECK: 	pmovzxbq	32493, %xmm5
+        	pmovzxbq	0x7eed,%xmm5
+
+// CHECK: 	pmovzxbq	3133065982, %xmm5
+        	pmovzxbq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxbq	305419896, %xmm5
+        	pmovzxbq	0x12345678,%xmm5
+
+// CHECK: 	pmovzxbq	%xmm5, %xmm5
+        	pmovzxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwd	69, %xmm5
+        	pmovzxwd	0x45,%xmm5
+
+// CHECK: 	pmovzxwd	32493, %xmm5
+        	pmovzxwd	0x7eed,%xmm5
+
+// CHECK: 	pmovzxwd	3133065982, %xmm5
+        	pmovzxwd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxwd	305419896, %xmm5
+        	pmovzxwd	0x12345678,%xmm5
+
+// CHECK: 	pmovzxwd	%xmm5, %xmm5
+        	pmovzxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwq	69, %xmm5
+        	pmovzxwq	0x45,%xmm5
+
+// CHECK: 	pmovzxwq	32493, %xmm5
+        	pmovzxwq	0x7eed,%xmm5
+
+// CHECK: 	pmovzxwq	3133065982, %xmm5
+        	pmovzxwq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxwq	305419896, %xmm5
+        	pmovzxwq	0x12345678,%xmm5
+
+// CHECK: 	pmovzxwq	%xmm5, %xmm5
+        	pmovzxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxdq	69, %xmm5
+        	pmovzxdq	0x45,%xmm5
+
+// CHECK: 	pmovzxdq	32493, %xmm5
+        	pmovzxdq	0x7eed,%xmm5
+
+// CHECK: 	pmovzxdq	3133065982, %xmm5
+        	pmovzxdq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxdq	305419896, %xmm5
+        	pmovzxdq	0x12345678,%xmm5
+
+// CHECK: 	pmovzxdq	%xmm5, %xmm5
+        	pmovzxdq	%xmm5,%xmm5
+
+// CHECK: 	pmuldq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmuldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmuldq	69, %xmm5
+        	pmuldq	0x45,%xmm5
+
+// CHECK: 	pmuldq	32493, %xmm5
+        	pmuldq	0x7eed,%xmm5
+
+// CHECK: 	pmuldq	3133065982, %xmm5
+        	pmuldq	0xbabecafe,%xmm5
+
+// CHECK: 	pmuldq	305419896, %xmm5
+        	pmuldq	0x12345678,%xmm5
+
+// CHECK: 	pmuldq	%xmm5, %xmm5
+        	pmuldq	%xmm5,%xmm5
+
+// CHECK: 	pmulld	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulld	69, %xmm5
+        	pmulld	0x45,%xmm5
+
+// CHECK: 	pmulld	32493, %xmm5
+        	pmulld	0x7eed,%xmm5
+
+// CHECK: 	pmulld	3133065982, %xmm5
+        	pmulld	0xbabecafe,%xmm5
+
+// CHECK: 	pmulld	305419896, %xmm5
+        	pmulld	0x12345678,%xmm5
+
+// CHECK: 	pmulld	%xmm5, %xmm5
+        	pmulld	%xmm5,%xmm5
+
+// CHECK: 	ptest 	3735928559(%ebx,%ecx,8), %xmm5
+        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ptest 	69, %xmm5
+        	ptest	0x45,%xmm5
+
+// CHECK: 	ptest 	32493, %xmm5
+        	ptest	0x7eed,%xmm5
+
+// CHECK: 	ptest 	3133065982, %xmm5
+        	ptest	0xbabecafe,%xmm5
+
+// CHECK: 	ptest 	305419896, %xmm5
+        	ptest	0x12345678,%xmm5
+
+// CHECK: 	ptest 	%xmm5, %xmm5
+        	ptest	%xmm5,%xmm5
+
+// CHECK: 	crc32b 	%bl, %eax
+                crc32b %bl, %eax
+
+// CHECK: 	crc32b 	4(%ebx), %eax
+                crc32b 4(%ebx), %eax
+
+// CHECK: 	crc32w 	%bx, %eax
+                crc32w %bx, %eax
+
+// CHECK: 	crc32w 	4(%ebx), %eax
+                crc32w 4(%ebx), %eax
+
+// CHECK: 	crc32l 	%ebx, %eax
+                crc32l %ebx, %eax
+
+// CHECK: 	crc32l 	4(%ebx), %eax
+                crc32l 4(%ebx), %eax
+
+// CHECK: 	crc32l 	3735928559(%ebx,%ecx,8), %ecx
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	crc32l 	69, %ecx
+                crc32l 0x45,%ecx
+
+// CHECK: 	crc32l 	32493, %ecx
+                crc32l 0x7eed,%ecx
+
+// CHECK: 	crc32l 	3133065982, %ecx
+                crc32l 0xbabecafe,%ecx
+
+// CHECK: 	crc32l 	%ecx, %ecx
+                crc32l %ecx,%ecx
+
+// CHECK: 	pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtq	69, %xmm5
+        	pcmpgtq	0x45,%xmm5
+
+// CHECK: 	pcmpgtq	32493, %xmm5
+        	pcmpgtq	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtq	3133065982, %xmm5
+        	pcmpgtq	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtq	305419896, %xmm5
+        	pcmpgtq	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtq	%xmm5, %xmm5
+        	pcmpgtq	%xmm5,%xmm5
+
+// CHECK: 	aesimc	%xmm0, %xmm1
+                aesimc %xmm0,%xmm1
+
+// CHECK: 	aesimc	(%eax), %xmm1
+                aesimc (%eax),%xmm1
+
+// CHECK: 	aesenc	%xmm1, %xmm2
+                aesenc %xmm1,%xmm2
+
+// CHECK: 	aesenc	4(%ebx), %xmm2
+                aesenc 4(%ebx),%xmm2
+
+// CHECK: 	aesenclast	%xmm3, %xmm4
+                aesenclast %xmm3,%xmm4
+
+// CHECK: 	aesenclast	4(%edx,%edi), %xmm4
+                aesenclast 4(%edx,%edi),%xmm4
+
+// CHECK: 	aesdec	%xmm5, %xmm6
+                aesdec %xmm5,%xmm6
+
+// CHECK: 	aesdec	4(%ecx,%eax,8), %xmm6
+                aesdec 4(%ecx,%eax,8),%xmm6
+
+// CHECK: 	aesdeclast	%xmm7, %xmm0
+                aesdeclast %xmm7,%xmm0
+
+// CHECK: 	aesdeclast	3405691582, %xmm0
+                aesdeclast 0xcafebabe,%xmm0
+
+// CHECK: 	aeskeygenassist	$125, %xmm1, %xmm2
+                aeskeygenassist $125, %xmm1, %xmm2
+
+// CHECK: 	aeskeygenassist	$125, (%edx,%eax,4), %xmm2
+                aeskeygenassist $125, (%edx,%eax,4), %xmm2
diff --git a/test/MC/X86/x86-32-fma3.s b/test/MC/X86/x86-32-fma3.s
new file mode 100644
index 000000000000..db7efecfb51b
--- /dev/null
+++ b/test/MC/X86/x86-32-fma3.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
+          vfmadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
+          vfmadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
+          vfmadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
+          vfmadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
+          vfmadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
+          vfmadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
+          vfmadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
+          vfmadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
+          vfmadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
+          vfmadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
+          vfmadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
+          vfmadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
+          vfmadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
+          vfmadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
+          vfmadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
+          vfmadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
+          vfmadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
+          vfmadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
+          vfmadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
+          vfmadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
+          vfmadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
+          vfmadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
+          vfmadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
+          vfmadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
+          vfmadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
+          vfmadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
+          vfmadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
+          vfmadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
+          vfmadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
+          vfmadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
+          vfmadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
+          vfmadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
+          vfmadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
+          vfmadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
+          vfmadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
+          vfmadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca]
+          vfmaddsub132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08]
+          vfmaddsub132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca]
+          vfmaddsub132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08]
+          vfmaddsub132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca]
+          vfmaddsub213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08]
+          vfmaddsub213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca]
+          vfmaddsub213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08]
+          vfmaddsub213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca]
+          vfmaddsub231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08]
+          vfmaddsub231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca]
+          vfmaddsub231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08]
+          vfmaddsub231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca]
+          vfmsubadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08]
+          vfmsubadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca]
+          vfmsubadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08]
+          vfmsubadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca]
+          vfmsubadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08]
+          vfmsubadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca]
+          vfmsubadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08]
+          vfmsubadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca]
+          vfmsubadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08]
+          vfmsubadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca]
+          vfmsubadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08]
+          vfmsubadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca]
+          vfmsub132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08]
+          vfmsub132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca]
+          vfmsub132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08]
+          vfmsub132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca]
+          vfmsub213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08]
+          vfmsub213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca]
+          vfmsub213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08]
+          vfmsub213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca]
+          vfmsub231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08]
+          vfmsub231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca]
+          vfmsub231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08]
+          vfmsub231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca]
+          vfnmadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08]
+          vfnmadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca]
+          vfnmadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08]
+          vfnmadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca]
+          vfnmadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08]
+          vfnmadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca]
+          vfnmadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08]
+          vfnmadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca]
+          vfnmadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08]
+          vfnmadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca]
+          vfnmadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08]
+          vfnmadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca]
+          vfnmsub132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08]
+          vfnmsub132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca]
+          vfnmsub132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08]
+          vfnmsub132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca]
+          vfnmsub213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08]
+          vfnmsub213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca]
+          vfnmsub213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08]
+          vfnmsub213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca]
+          vfnmsub231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08]
+          vfnmsub231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca]
+          vfnmsub231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08]
+          vfnmsub231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
+          vfmadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
+          vfmadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
+          vfmadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
+          vfmadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
+          vfmadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
+          vfmadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
+          vfmadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
+          vfmadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
+          vfmadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
+          vfmadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
+          vfmadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
+          vfmadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca]
+          vfmaddsub132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08]
+          vfmaddsub132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca]
+          vfmaddsub132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08]
+          vfmaddsub132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca]
+          vfmaddsub213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08]
+          vfmaddsub213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca]
+          vfmaddsub213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08]
+          vfmaddsub213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca]
+          vfmaddsub231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08]
+          vfmaddsub231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca]
+          vfmaddsub231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08]
+          vfmaddsub231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca]
+          vfmsubadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08]
+          vfmsubadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca]
+          vfmsubadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08]
+          vfmsubadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca]
+          vfmsubadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08]
+          vfmsubadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca]
+          vfmsubadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08]
+          vfmsubadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca]
+          vfmsubadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08]
+          vfmsubadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca]
+          vfmsubadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08]
+          vfmsubadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca]
+          vfmsub132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08]
+          vfmsub132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca]
+          vfmsub132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08]
+          vfmsub132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca]
+          vfmsub213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08]
+          vfmsub213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca]
+          vfmsub213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08]
+          vfmsub213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca]
+          vfmsub231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08]
+          vfmsub231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca]
+          vfmsub231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08]
+          vfmsub231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca]
+          vfnmadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08]
+          vfnmadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca]
+          vfnmadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08]
+          vfnmadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca]
+          vfnmadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08]
+          vfnmadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca]
+          vfnmadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08]
+          vfnmadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca]
+          vfnmadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08]
+          vfnmadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca]
+          vfnmadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08]
+          vfnmadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca]
+          vfnmsub132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08]
+          vfnmsub132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca]
+          vfnmsub132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08]
+          vfnmsub132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca]
+          vfnmsub213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08]
+          vfnmsub213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca]
+          vfnmsub213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08]
+          vfnmsub213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca]
+          vfnmsub231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08]
+          vfnmsub231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca]
+          vfnmsub231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08]
+          vfnmsub231ps  (%eax), %ymm5, %ymm1
+
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
new file mode 100644
index 000000000000..de6b96389ead
--- /dev/null
+++ b/test/MC/X86/x86-32.s
@@ -0,0 +1,810 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+	pause
+// CHECK: pause
+// CHECK: encoding: [0xf3,0x90]
+	sfence
+// CHECK: sfence
+// CHECK: encoding: [0x0f,0xae,0xf8]
+	lfence
+// CHECK: lfence
+// CHECK: encoding: [0x0f,0xae,0xe8]
+	mfence
+// CHECK: mfence
+// CHECK: encoding: [0x0f,0xae,0xf0]
+	monitor
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	monitor %eax, %ecx, %edx
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	mwait
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+	mwait %eax, %ecx
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+
+	vmcall
+// CHECK: vmcall
+// CHECK: encoding: [0x0f,0x01,0xc1]
+	vmlaunch
+// CHECK: vmlaunch
+// CHECK: encoding: [0x0f,0x01,0xc2]
+	vmresume
+// CHECK: vmresume
+// CHECK: encoding: [0x0f,0x01,0xc3]
+	vmxoff
+// CHECK: vmxoff
+// CHECK: encoding: [0x0f,0x01,0xc4]
+	swapgs
+// CHECK: swapgs
+// CHECK: encoding: [0x0f,0x01,0xf8]
+
+rdtscp
+// CHECK: rdtscp
+// CHECK:  encoding: [0x0f,0x01,0xf9]
+
+
+// CHECK: movl	%eax, 16(%ebp)          # encoding: [0x89,0x45,0x10]
+	movl	%eax, 16(%ebp)
+// CHECK: movl	%eax, -16(%ebp)          # encoding: [0x89,0x45,0xf0]
+	movl	%eax, -16(%ebp)
+
+// CHECK: testb	%bl, %cl                # encoding: [0x84,0xcb]
+        testb %bl, %cl
+
+// CHECK: cmpl	%eax, %ebx              # encoding: [0x39,0xc3]
+        cmpl %eax, %ebx
+
+// CHECK: addw	%ax, %ax                # encoding: [0x66,0x01,0xc0]
+        addw %ax, %ax
+
+// CHECK: shrl	%eax                    # encoding: [0xd1,0xe8]
+        shrl $1, %eax
+
+// CHECK: shll	%eax                    # encoding: [0xd1,0xe0]
+        sall $1, %eax
+// CHECK: shll	%eax                    # encoding: [0xd1,0xe0]
+        sal $1, %eax
+
+// moffset forms of moves, rdar://7947184
+movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
+movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
+movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
+
+// rdar://7973775
+into
+// CHECK: into
+// CHECK:  encoding: [0xce]
+int3
+// CHECK: int3
+// CHECK:  encoding: [0xcc]
+int $4
+// CHECK: int $4
+// CHECK:  encoding: [0xcd,0x04]
+int $255
+// CHECK: int $255
+// CHECK:  encoding: [0xcd,0xff]
+
+// CHECK: pushfl	# encoding: [0x9c]
+        pushf
+// CHECK: pushfl	# encoding: [0x9c]
+        pushfl
+// CHECK: popfl	        # encoding: [0x9d]
+        popf
+// CHECK: popfl	        # encoding: [0x9d]
+        popfl
+
+// rdar://8014869
+retl
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+
+// rdar://7973854
+// CHECK: cmoval	%eax, %edx
+// CHECK:  encoding: [0x0f,0x47,0xd0]
+        	cmoval	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovael	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovbel	%eax,%edx
+
+// CHECK: cmovbl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x42,0xd0]
+        	cmovbl	%eax,%edx
+
+// CHECK: cmovbw %bx, %bx
+cmovnae	%bx,%bx
+
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovbel	%eax,%edx
+
+// CHECK: cmovbl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x42,0xd0]
+        	cmovcl	%eax,%edx
+
+// CHECK: cmovel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x44,0xd0]
+        	cmovel	%eax,%edx
+
+// CHECK: cmovgl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4f,0xd0]
+        	cmovgl	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovgel	%eax,%edx
+
+// CHECK: cmovll	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4c,0xd0]
+        	cmovll	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovlel	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovnal	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovnbl	%eax,%edx
+
+// CHECK: cmoval	%eax, %edx
+// CHECK:  encoding: [0x0f,0x47,0xd0]
+        	cmovnbel	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovncl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovngl	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovnl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovngl	%eax,%edx
+
+// CHECK: cmovll	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4c,0xd0]
+        	cmovngel	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovnll	%eax,%edx
+
+// CHECK: cmovgl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4f,0xd0]
+        	cmovnlel	%eax,%edx
+
+// CHECK: cmovnol	%eax, %edx
+// CHECK:  encoding: [0x0f,0x41,0xd0]
+        	cmovnol	%eax,%edx
+
+// CHECK: cmovnpl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4b,0xd0]
+        	cmovnpl	%eax,%edx
+
+// CHECK: cmovnsl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x49,0xd0]
+        	cmovnsl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnzl	%eax,%edx
+
+// CHECK: cmovol	%eax, %edx
+// CHECK:  encoding: [0x0f,0x40,0xd0]
+        	cmovol	%eax,%edx
+
+// CHECK: cmovpl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4a,0xd0]
+        	cmovpl	%eax,%edx
+
+// CHECK: cmovsl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x48,0xd0]
+        	cmovsl	%eax,%edx
+
+// CHECK: cmovel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x44,0xd0]
+        	cmovzl	%eax,%edx
+
+// CHECK: cmpps	$0, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
+        cmpps $0, %xmm0, %xmm1
+// CHECK:	cmpps	$0, (%eax), %xmm1
+// CHECK: encoding: [0x0f,0xc2,0x08,0x00]
+        cmpps $0, 0(%eax), %xmm1
+// CHECK:	cmppd	$0, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x00]
+        cmppd $0, %xmm0, %xmm1
+// CHECK:	cmppd	$0, (%eax), %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0x08,0x00]
+        cmppd $0, 0(%eax), %xmm1
+// CHECK:	cmpss	$0, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x00]
+        cmpss $0, %xmm0, %xmm1
+// CHECK:	cmpss	$0, (%eax), %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0x08,0x00]
+        cmpss $0, 0(%eax), %xmm1
+// CHECK:	cmpsd	$0, %xmm0, %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x00]
+        cmpsd $0, %xmm0, %xmm1
+// CHECK:	cmpsd	$0, (%eax), %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0x08,0x00]
+        cmpsd $0, 0(%eax), %xmm1
+
+// Check matching of instructions which embed the SSE comparison code.
+
+// CHECK: cmpps $0, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
+        cmpeqps %xmm0, %xmm1
+
+// CHECK: cmppd $1, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x01]
+        cmpltpd %xmm0, %xmm1
+
+// CHECK: cmpss $2, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x02]
+        cmpless %xmm0, %xmm1
+
+// CHECK: cmppd $3, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x03]
+        cmpunordpd %xmm0, %xmm1
+
+// CHECK: cmpps $4, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x04]
+        cmpneqps %xmm0, %xmm1
+
+// CHECK: cmppd $5, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x05]
+        cmpnltpd %xmm0, %xmm1
+
+// CHECK: cmpss $6, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x06]
+        cmpnless %xmm0, %xmm1
+
+// CHECK: cmpsd $7, %xmm0, %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x07]
+        cmpordsd %xmm0, %xmm1
+
+// rdar://7995856
+// CHECK: fmul	%st(0)
+// CHECK:  encoding: [0xd8,0xc8]
+        fmul %st(0), %st
+
+// CHECK: fadd	%st(0)
+// CHECK:  encoding: [0xd8,0xc0]
+        fadd %st(0), %st
+
+// CHECK: fsub	%st(0)
+// CHECK:  encoding: [0xd8,0xe0]
+        fsub %st(0), %st
+
+// CHECK: fsubr	%st(0)
+// CHECK:  encoding: [0xd8,0xe8]
+        fsubr %st(0), %st
+
+// CHECK: fdivr	%st(0)
+// CHECK:  encoding: [0xd8,0xf8]
+        fdivr %st(0), %st
+
+// CHECK: fdiv	%st(0)
+// CHECK:  encoding: [0xd8,0xf0]
+        fdiv %st(0), %st
+
+// radr://8017519
+// CHECK: movl	%cs, %eax
+// CHECK:  encoding: [0x8c,0xc8]
+        movl %cs, %eax
+
+// CHECK: movw	%cs, %ax
+// CHECK:  encoding: [0x66,0x8c,0xc8]
+        movw %cs, %ax
+
+// CHECK: movl	%cs, (%eax)
+// CHECK:  encoding: [0x8c,0x08]
+        movl %cs, (%eax)
+
+// CHECK: movw	%cs, (%eax)
+// CHECK:  encoding: [0x66,0x8c,0x08]
+        movw %cs, (%eax)
+
+// CHECK: movl	%eax, %cs
+// CHECK:  encoding: [0x8e,0xc8]
+        movl %eax, %cs
+
+// CHECK: movl	(%eax), %cs
+// CHECK:  encoding: [0x8e,0x08]
+        movl (%eax), %cs
+
+// CHECK: movw	(%eax), %cs
+// CHECK:  encoding: [0x66,0x8e,0x08]
+        movw (%eax), %cs
+
+// radr://8033374
+// CHECK: movl	%cr0, %eax
+// CHECK:  encoding: [0x0f,0x20,0xc0]
+        movl %cr0,%eax
+
+// CHECK: movl	%cr1, %eax
+// CHECK:  encoding: [0x0f,0x20,0xc8]
+        movl %cr1,%eax
+
+// CHECK: movl	%cr2, %eax
+// CHECK:  encoding: [0x0f,0x20,0xd0]
+        movl %cr2,%eax
+
+// CHECK: movl	%cr3, %eax
+// CHECK:  encoding: [0x0f,0x20,0xd8]
+        movl %cr3,%eax
+
+// CHECK: movl	%cr4, %eax
+// CHECK:  encoding: [0x0f,0x20,0xe0]
+        movl %cr4,%eax
+
+// CHECK: movl	%dr0, %eax
+// CHECK:  encoding: [0x0f,0x21,0xc0]
+        movl %dr0,%eax
+
+// CHECK: movl	%dr1, %eax
+// CHECK:  encoding: [0x0f,0x21,0xc8]
+        movl %dr1,%eax
+
+// CHECK: movl	%dr1, %eax
+// CHECK:  encoding: [0x0f,0x21,0xc8]
+        movl %dr1,%eax
+
+// CHECK: movl	%dr2, %eax
+// CHECK:  encoding: [0x0f,0x21,0xd0]
+        movl %dr2,%eax
+
+// CHECK: movl	%dr3, %eax
+// CHECK:  encoding: [0x0f,0x21,0xd8]
+        movl %dr3,%eax
+
+// CHECK: movl	%dr4, %eax
+// CHECK:  encoding: [0x0f,0x21,0xe0]
+        movl %dr4,%eax
+
+// CHECK: movl	%dr5, %eax
+// CHECK:  encoding: [0x0f,0x21,0xe8]
+        movl %dr5,%eax
+
+// CHECK: movl	%dr6, %eax
+// CHECK:  encoding: [0x0f,0x21,0xf0]
+        movl %dr6,%eax
+
+// CHECK: movl	%dr7, %eax
+// CHECK:  encoding: [0x0f,0x21,0xf8]
+        movl %dr7,%eax
+
+// radr://8017522
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+	fwait
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x05,0x7c,0x00,0x00,0x00]
+// FIXME: This is a correct bug poor encoding: Use 65 a1 7c 00 00 00 
+        movl	%gs:124, %eax
+
+// CHECK: pusha
+// CHECK:  encoding: [0x60]
+        	pusha
+
+// CHECK: popa
+// CHECK:  encoding: [0x61]
+        	popa
+
+// CHECK: pushal
+// CHECK:  encoding: [0x60]
+        	pushal
+
+// CHECK: popal
+// CHECK:  encoding: [0x61]
+        	popal
+
+// CHECK: jmpl *8(%eax)
+// CHECK:   encoding: [0xff,0x60,0x08]
+	jmp	*8(%eax)
+
+// PR7465
+// CHECK: lcalll $2, $4660
+// CHECK:   encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00]
+lcalll $0x2, $0x1234
+
+
+// rdar://8061602
+L1:
+  jcxz L1
+// CHECK: jcxz L1
+// CHECK:   encoding: [0x67,0xe3,A]
+  jecxz L1
+// CHECK: jecxz L1
+// CHECK:   encoding: [0xe3,A]
+
+// rdar://8403974
+iret
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+iretw
+// CHECK: iretw
+// CHECK: encoding: [0x66,0xcf]
+iretl
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+
+// rdar://8403907
+sysret
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+sysretl
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+
+// rdar://8018260
+testl	%ecx, -24(%ebp)
+// CHECK: testl	-24(%ebp), %ecx
+testl	-24(%ebp), %ecx
+// CHECK: testl	-24(%ebp), %ecx
+
+
+// rdar://8407242
+push %cs
+// CHECK: pushl	%cs
+// CHECK: encoding: [0x0e]
+push %ds
+// CHECK: pushl	%ds
+// CHECK: encoding: [0x1e]
+push %ss
+// CHECK: pushl	%ss
+// CHECK: encoding: [0x16]
+push %es
+// CHECK: pushl	%es
+// CHECK: encoding: [0x06]
+push %fs
+// CHECK: pushl	%fs
+// CHECK: encoding: [0x0f,0xa0]
+push %gs
+// CHECK: pushl	%gs
+// CHECK: encoding: [0x0f,0xa8]
+
+pushw %cs
+// CHECK: pushw	%cs
+// CHECK: encoding: [0x66,0x0e]
+pushw %ds
+// CHECK: pushw	%ds
+// CHECK: encoding: [0x66,0x1e]
+pushw %ss
+// CHECK: pushw	%ss
+// CHECK: encoding: [0x66,0x16]
+pushw %es
+// CHECK: pushw	%es
+// CHECK: encoding: [0x66,0x06]
+pushw %fs
+// CHECK: pushw	%fs
+// CHECK: encoding: [0x66,0x0f,0xa0]
+pushw %gs
+// CHECK: pushw	%gs
+// CHECK: encoding: [0x66,0x0f,0xa8]
+
+pop %ss
+// CHECK: popl	%ss
+// CHECK: encoding: [0x17]
+pop %ds
+// CHECK: popl	%ds
+// CHECK: encoding: [0x1f]
+pop %es
+// CHECK: popl	%es
+// CHECK: encoding: [0x07]
+
+// rdar://8408129
+pushfd
+// CHECK: pushfl
+popfd
+// CHECK: popfl
+pushfl
+// CHECK: pushfl
+popfl
+// CHECK: popfl
+
+
+// rdar://8416805
+	setc	%bl
+	setnae	%bl
+	setnb	%bl
+	setnc	%bl
+	setna	%bl
+	setnbe	%bl
+	setpe	%bl
+	setpo	%bl
+	setnge	%bl
+	setnl	%bl
+	setng	%bl
+	setnle	%bl
+
+// PR8686
+        setneb  %cl // CHECK: setne %cl
+	setcb	%bl // CHECK: setb %bl
+	setnaeb	%bl // CHECK: setb %bl
+
+
+// CHECK: lcalll	$31438, $31438
+// CHECK: lcalll	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+
+calll	$0x7ace,$0x7ace
+lcalll	$0x7ace,$0x7ace
+jmpl	$0x7ace,$0x7ace
+ljmpl	$0x7ace,$0x7ace
+
+// CHECK: lcalll	$31438, $31438
+// CHECK: lcalll	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+
+call	$0x7ace,$0x7ace
+lcall	$0x7ace,$0x7ace
+jmp	$0x7ace,$0x7ace
+ljmp	$0x7ace,$0x7ace
+
+// rdar://8456370
+// CHECK: calll a
+ calll a
+
+// CHECK:	incb	%al # encoding: [0xfe,0xc0]
+	incb %al
+
+// CHECK:	incw	%ax # encoding: [0x66,0x40]
+	incw %ax
+
+// CHECK:	incl	%eax # encoding: [0x40]
+	incl %eax
+
+// CHECK:	decb	%al # encoding: [0xfe,0xc8]
+	decb %al
+
+// CHECK:	decw	%ax # encoding: [0x66,0x48]
+	decw %ax
+
+// CHECK:	decl	%eax # encoding: [0x48]
+	decl %eax
+
+// CHECK: pshufw $14, %mm4, %mm0 # encoding: [0x0f,0x70,0xc4,0x0e]
+pshufw $14, %mm4, %mm0
+
+// CHECK: pshufw $90, %mm4, %mm0 # encoding: [0x0f,0x70,0xc4,0x5a]
+// PR8288
+pshufw $90, %mm4, %mm0
+
+// rdar://8416805
+// CHECK: aaa
+// CHECK:  encoding: [0x37]
+        	aaa
+
+// CHECK: aad	$1
+// CHECK:  encoding: [0xd5,0x01]
+        	aad	$1
+
+// CHECK: aad	$10
+// CHECK:  encoding: [0xd5,0x0a]
+        	aad	$0xA
+
+// CHECK: aad	$10
+// CHECK:  encoding: [0xd5,0x0a]
+        	aad
+
+// CHECK: aam	$2
+// CHECK:  encoding: [0xd4,0x02]
+        	aam	$2
+
+// CHECK: aam	$10
+// CHECK:  encoding: [0xd4,0x0a]
+        	aam	$0xA
+
+// CHECK: aam	$10
+// CHECK:  encoding: [0xd4,0x0a]
+        	aam
+
+// CHECK: aas
+// CHECK:  encoding: [0x3f]
+        	aas
+
+// CHECK: daa
+// CHECK:  encoding: [0x27]
+        	daa
+
+// CHECK: das
+// CHECK:  encoding: [0x2f]
+        	das
+
+// CHECK: retw	$31438
+// CHECK:  encoding: [0x66,0xc2,0xce,0x7a]
+        	retw	$0x7ace
+
+// CHECK: lretw	$31438
+// CHECK:  encoding: [0x66,0xca,0xce,0x7a]
+        	lretw	$0x7ace
+
+// CHECK: bound	2(%eax), %bx
+// CHECK:  encoding: [0x66,0x62,0x58,0x02]
+        	bound	2(%eax),%bx
+
+// CHECK: bound	4(%ebx), %ecx
+// CHECK:  encoding: [0x62,0x4b,0x04]
+        	bound	4(%ebx),%ecx
+
+// CHECK: arpl	%bx, %bx
+// CHECK:  encoding: [0x63,0xdb]
+        	arpl	%bx,%bx
+
+// CHECK: arpl	%bx, 6(%ecx)
+// CHECK:  encoding: [0x63,0x59,0x06]
+        	arpl	%bx,6(%ecx)
+
+// CHECK: lgdtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x50,0x04]
+        	lgdtw	4(%eax)
+
+// CHECK: lgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdt	4(%eax)
+
+// CHECK: lgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdtl	4(%eax)
+
+// CHECK: lidtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x58,0x04]
+        	lidtw	4(%eax)
+
+// CHECK: lidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidt	4(%eax)
+
+// CHECK: lidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidtl	4(%eax)
+
+// CHECK: sgdtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x40,0x04]
+        	sgdtw	4(%eax)
+
+// CHECK: sgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdt	4(%eax)
+
+// CHECK: sgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdtl	4(%eax)
+
+// CHECK: sidtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x48,0x04]
+        	sidtw	4(%eax)
+
+// CHECK: sidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidt	4(%eax)
+
+// CHECK: sidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidtl	4(%eax)
+
+// CHECK: fcompi	%st(2)
+// CHECK:  encoding: [0xdf,0xf2]
+        	fcompi	%st(2), %st
+
+// CHECK: fcompi	%st(2)
+// CHECK:  encoding: [0xdf,0xf2]
+        	fcompi	%st(2)
+
+// CHECK: fcompi	%st(1)
+// CHECK:  encoding: [0xdf,0xf1]
+        	fcompi
+
+// CHECK: fucompi	%st(2)
+// CHECK:  encoding: [0xdf,0xea]
+        	fucompi	%st(2),%st
+
+// CHECK: fucompi	%st(2)
+// CHECK:  encoding: [0xdf,0xea]
+        	fucompi	%st(2)
+
+// CHECK: fucompi	%st(1)
+// CHECK:  encoding: [0xdf,0xe9]
+        	fucompi
+
+// CHECK: fldcw	32493
+// CHECK:  encoding: [0xd9,0x2d,0xed,0x7e,0x00,0x00]
+        	fldcww	0x7eed
+
+// CHECK: fldcw	32493
+// CHECK:  encoding: [0xd9,0x2d,0xed,0x7e,0x00,0x00]
+        	fldcw	0x7eed
+
+// CHECK: fnstcw	32493
+// CHECK:  encoding: [0xd9,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstcww	0x7eed
+
+// CHECK: fnstcw	32493
+// CHECK:  encoding: [0xd9,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstcw	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstcww	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstcw	0x7eed
+
+// CHECK: fnstsw	32493
+// CHECK:  encoding: [0xdd,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstsww	0x7eed
+
+// CHECK: fnstsw	32493
+// CHECK:  encoding: [0xdd,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstsw	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstsww	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstsw	0x7eed
+
+// CHECK: verr	32493
+// CHECK:  encoding: [0x0f,0x00,0x25,0xed,0x7e,0x00,0x00]
+        	verrw	0x7eed
+
+// CHECK: verr	32493
+// CHECK:  encoding: [0x0f,0x00,0x25,0xed,0x7e,0x00,0x00]
+        	verr	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fclex
+
+// CHECK: fnclex
+// CHECK:  encoding: [0xdb,0xe2]
+        	fnclex
+
+// CHECK: ud2
+// CHECK:  encoding: [0x0f,0x0b]
+        	ud2
+
+// CHECK: ud2
+// CHECK:  encoding: [0x0f,0x0b]
+        	ud2a
+
+// CHECK: ud2b
+// CHECK:  encoding: [0x0f,0xb9]
+        	ud2b
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
new file mode 100644
index 000000000000..c8b6414d59a6
--- /dev/null
+++ b/test/MC/X86/x86-64.s
@@ -0,0 +1,944 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s > %t 2> %t.err
+// RUN: FileCheck < %t %s
+// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s
+
+	monitor
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	monitor %rax, %rcx, %rdx
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	mwait
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+	mwait %rax, %rcx
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+
+// Suffix inference:
+
+// CHECK: addl $0, %eax
+        add $0, %eax
+// CHECK: addb $255, %al
+        add $0xFF, %al
+// CHECK: orq %rax, %rdx
+        or %rax, %rdx
+// CHECK: shlq $3, %rax
+        shl $3, %rax
+
+
+// CHECK: subb %al, %al
+        subb %al, %al
+
+// CHECK: addl $24, %eax
+        addl $24, %eax
+
+// CHECK: movl %eax, 10(%ebp)
+        movl %eax, 10(%ebp)
+// CHECK: movl %eax, 10(%ebp,%ebx)
+        movl %eax, 10(%ebp, %ebx)
+// CHECK: movl %eax, 10(%ebp,%ebx,4)
+        movl %eax, 10(%ebp, %ebx, 4)
+// CHECK: movl %eax, 10(,%ebx,4)
+        movl %eax, 10(, %ebx, 4)
+
+// CHECK: movl 0, %eax        
+        movl 0, %eax
+// CHECK: movl $0, %eax        
+        movl $0, %eax
+        
+// CHECK: ret
+        ret
+        
+// FIXME: Check that this matches SUB32ri8
+// CHECK: subl $1, %eax
+        subl $1, %eax
+        
+// FIXME: Check that this matches SUB32ri8
+// CHECK: subl $-1, %eax
+        subl $-1, %eax
+        
+// FIXME: Check that this matches SUB32ri
+// CHECK: subl $256, %eax
+        subl $256, %eax
+
+// FIXME: Check that this matches XOR64ri8
+// CHECK: xorq $1, %rax
+        xorq $1, %rax
+        
+// FIXME: Check that this matches XOR64ri32
+// CHECK: xorq $256, %rax
+        xorq $256, %rax
+
+// FIXME: Check that this matches SUB8rr
+// CHECK: subb %al, %bl
+        subb %al, %bl
+
+// FIXME: Check that this matches SUB16rr
+// CHECK: subw %ax, %bx
+        subw %ax, %bx
+        
+// FIXME: Check that this matches SUB32rr
+// CHECK: subl %eax, %ebx
+        subl %eax, %ebx
+        
+// FIXME: Check that this matches the correct instruction.
+// CHECK: callq *%rax
+        call *%rax
+
+// FIXME: Check that this matches the correct instruction.
+// CHECK: shldl %cl, %eax, %ebx
+        shldl %cl, %eax, %ebx
+
+// CHECK: shll $2, %eax
+        shll $2, %eax
+
+// CHECK: shll $2, %eax
+        sall $2, %eax
+
+// CHECK: rep
+// CHECK: insb
+        rep;insb
+
+// CHECK: rep
+// CHECK: outsb
+        rep;outsb
+
+// CHECK: rep
+// CHECK: movsb
+        rep;movsb
+
+
+// rdar://8470918
+smovb // CHECK: movsb
+smovw // CHECK: movsw
+smovl // CHECK: movsl
+smovq // CHECK: movsq
+
+// rdar://8456361
+// CHECK: rep
+// CHECK: movsl
+        rep movsd
+
+// CHECK: rep
+// CHECK: lodsb
+        rep;lodsb
+
+// CHECK: rep
+// CHECK: stosb
+        rep;stosb
+
+// NOTE: repz and repe have the same opcode as rep
+// CHECK: rep
+// CHECK: cmpsb
+        repz;cmpsb
+
+// NOTE: repnz has the same opcode as repne
+// CHECK: repne
+// CHECK: cmpsb
+        repnz;cmpsb
+
+// NOTE: repe and repz have the same opcode as rep
+// CHECK: rep
+// CHECK: scasb
+        repe;scasb
+
+// CHECK: repne
+// CHECK: scasb
+        repne;scasb
+
+// CHECK: lock
+// CHECK: cmpxchgb %al, (%ebx)
+        lock;cmpxchgb %al, 0(%ebx)
+
+// CHECK: cs
+// CHECK: movb (%eax), %al
+        cs;movb 0(%eax), %al
+
+// CHECK: ss
+// CHECK: movb (%eax), %al
+        ss;movb 0(%eax), %al
+
+// CHECK: ds
+// CHECK: movb (%eax), %al
+        ds;movb 0(%eax), %al
+
+// CHECK: es
+// CHECK: movb (%eax), %al
+        es;movb 0(%eax), %al
+
+// CHECK: fs
+// CHECK: movb (%eax), %al
+        fs;movb 0(%eax), %al
+
+// CHECK: gs
+// CHECK: movb (%eax), %al
+        gs;movb 0(%eax), %al
+
+// CHECK: fadd %st(0)
+// CHECK: fadd %st(1)
+// CHECK: fadd %st(7)
+
+fadd %st(0)
+fadd %st(1)
+fadd %st(7)
+
+// CHECK: leal 0, %eax
+        leal 0, %eax
+
+// rdar://7986634 - Insensitivity on opcodes.
+// CHECK: int3
+INT3
+
+
+// Allow scale factor without index register.
+// CHECK: movaps	%xmm3, (%esi)
+// CHECK-STDERR: warning: scale factor without index register is ignored
+movaps %xmm3, (%esi, 2)
+
+// CHECK: imull $12, %eax, %eax
+imul $12, %eax
+
+// CHECK: imull %ecx, %eax
+imull %ecx, %eax
+
+
+// rdar://8208481
+// CHECK: outb	%al, $161
+outb	%al, $161
+// CHECK: outw	%ax, $128
+outw	%ax, $128
+// CHECK: inb	$161, %al
+inb	$161, %al
+
+// rdar://8017621
+// CHECK: pushq	$1
+push $1
+
+// rdar://8017530
+// CHECK: sldtw	4
+sldt	4
+
+// rdar://8208499
+// CHECK: cmovnew	%bx, %ax
+cmovnz %bx, %ax
+// CHECK: cmovneq	%rbx, %rax
+cmovnzq %rbx, %rax
+
+
+// rdar://8407928
+// CHECK: inb	$127, %al
+// CHECK: inw	%dx, %ax
+// CHECK: outb	%al, $127
+// CHECK: outw	%ax, %dx
+// CHECK: inl	%dx, %eax
+inb	$0x7f
+inw	%dx
+outb	$0x7f
+outw	%dx
+inl	%dx
+
+
+// PR8114
+// CHECK: outb	%al, %dx
+// CHECK: outw	%ax, %dx
+// CHECK: outl	%eax, %dx
+
+out %al, (%dx)
+out %ax, (%dx)
+outl %eax, (%dx)
+
+
+// rdar://8431422
+
+// CHECK: fxch	%st(1)
+// CHECK: fucom	%st(1)
+// CHECK: fucomp	%st(1)
+// CHECK: faddp	%st(1)
+// CHECK: faddp	%st(0)
+// CHECK: fsubp	%st(1)
+// CHECK: fsubrp	%st(1)
+// CHECK: fmulp	%st(1)
+// CHECK: fdivp	%st(1)
+// CHECK: fdivrp	%st(1)
+
+fxch
+fucom
+fucomp
+faddp
+faddp %st
+fsubp
+fsubrp
+fmulp
+fdivp
+fdivrp
+
+// CHECK: fcomi	%st(1)
+// CHECK: fcomi	%st(2)
+// CHECK: fucomi	%st(1)
+// CHECK: fucomi	%st(2)
+// CHECK: fucomi	%st(2)
+
+fcomi
+fcomi	%st(2)
+fucomi
+fucomi	%st(2)
+fucomi	%st(2), %st
+
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+
+fnstsw
+fnstsw %ax
+fnstsw %eax
+fnstsw %al
+
+// rdar://8431880
+// CHECK: rclb	%bl
+// CHECK: rcll	3735928559(%ebx,%ecx,8)
+// CHECK: rcrl	%ecx
+// CHECK: rcrl	305419896
+rcl	%bl
+rcll	0xdeadbeef(%ebx,%ecx,8)
+rcr	%ecx
+rcrl	0x12345678
+
+rclb	%bl       // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
+rclb	$1, %bl   // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
+rclb	$2, %bl   // CHECK: rclb $2, %bl # encoding: [0xc0,0xd3,0x02]
+
+// rdar://8418316
+// CHECK: shldw	$1, %bx, %bx
+// CHECK: shldw	$1, %bx, %bx
+// CHECK: shrdw	$1, %bx, %bx
+// CHECK: shrdw	$1, %bx, %bx
+
+shld	%bx,%bx
+shld	$1, %bx,%bx
+shrd	%bx,%bx
+shrd	$1, %bx,%bx
+
+// CHECK: sldtl	%ecx
+// CHECK: encoding: [0x0f,0x00,0xc1]
+// CHECK: sldtw	%cx
+// CHECK: encoding: [0x66,0x0f,0x00,0xc1]
+
+sldt	%ecx
+sldt	%cx
+
+// CHECK: lcalll	*3135175374 
+// CHECK: ljmpl	*3135175374
+lcall	*0xbadeface
+ljmp	*0xbadeface
+
+
+// rdar://8444631
+// CHECK: enter	$31438, $0
+// CHECK: encoding: [0xc8,0xce,0x7a,0x00]
+// CHECK: enter	$31438, $1
+// CHECK: encoding: [0xc8,0xce,0x7a,0x01]
+// CHECK: enter	$31438, $127
+// CHECK: encoding: [0xc8,0xce,0x7a,0x7f]
+enter $0x7ace,$0
+enter $0x7ace,$1
+enter $0x7ace,$0x7f
+
+
+// rdar://8456364
+// CHECK: movw	%cs, %ax
+mov %CS, %ax
+
+// rdar://8456391
+fcmovb %st(1), %st(0)   // CHECK: fcmovb	%st(1), %st(0)
+fcmove %st(1), %st(0)   // CHECK: fcmove	%st(1), %st(0)
+fcmovbe %st(1), %st(0)  // CHECK: fcmovbe	%st(1), %st(0)
+fcmovu %st(1), %st(0)   // CHECK: fcmovu	 %st(1), %st(0)
+
+fcmovnb %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
+fcmovne %st(1), %st(0)  // CHECK: fcmovne	%st(1), %st(0)
+fcmovnbe %st(1), %st(0) // CHECK: fcmovnbe	%st(1), %st(0)
+fcmovnu %st(1), %st(0)  // CHECK: fcmovnu	%st(1), %st(0)
+
+fcmovnae %st(1), %st(0) // CHECK: fcmovb	%st(1), %st(0)
+fcmovna %st(1), %st(0)  // CHECK: fcmovbe	%st(1), %st(0)
+
+fcmovae %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
+fcmova %st(1), %st(0)   // CHECK: fcmovnbe	%st(1), %st(0)
+
+// rdar://8456417
+.byte 88 + 1 & 15  // CHECK: .byte	9
+
+// rdar://8456412
+mov %rdx, %cr0
+// CHECK: movq	%rdx, %cr0
+// CHECK: encoding: [0x0f,0x22,0xc2]
+mov %rdx, %cr4
+// CHECK: movq	%rdx, %cr4
+// CHECK: encoding: [0x0f,0x22,0xe2]
+mov %rdx, %cr8
+// CHECK: movq	%rdx, %cr8
+// CHECK: encoding: [0x44,0x0f,0x22,0xc2]
+mov %rdx, %cr15
+// CHECK: movq	%rdx, %cr15
+// CHECK: encoding: [0x44,0x0f,0x22,0xfa]
+
+// rdar://8456371 - Handle commutable instructions written backward.
+// CHECK: 	faddp	%st(1)
+// CHECK:	fmulp	%st(2)
+faddp %st, %st(1)
+fmulp %st, %st(2)
+
+// rdar://8468087 - Encode these accurately, they are not synonyms.
+// CHECK: fmul	%st(0), %st(1)
+// CHECK: encoding: [0xdc,0xc9]
+// CHECK: fmul	%st(1)
+// CHECK: encoding: [0xd8,0xc9]
+fmul %st, %st(1)
+fmul %st(1), %st
+
+// CHECK: fadd	%st(0), %st(1)
+// CHECK: encoding: [0xdc,0xc1]
+// CHECK: fadd	%st(1)
+// CHECK: encoding: [0xd8,0xc1]
+fadd %st, %st(1)
+fadd %st(1), %st
+
+
+// rdar://8416805
+// CHECK: xorb	%al, %al
+// CHECK: encoding: [0x30,0xc0]
+// CHECK: xorw	%di, %di
+// CHECK: encoding: [0x66,0x31,0xff]
+// CHECK: xorl	%esi, %esi
+// CHECK: encoding: [0x31,0xf6]
+// CHECK: xorq	%rsi, %rsi
+// CHECK: encoding: [0x48,0x31,0xf6]
+clrb    %al
+clr    %di
+clr    %esi
+clr    %rsi
+
+// rdar://8456378
+cltq  // CHECK: cltq
+cdqe  // CHECK: cltq
+cwde  // CHECK: cwtl
+cwtl  // CHECK: cwtl
+
+// rdar://8416805
+cbw   // CHECK: cbtw
+cwd   // CHECK: cwtd
+cdq   // CHECK: cltd
+
+// rdar://8456378 and PR7557 - fstsw
+fstsw %ax
+// CHECK: wait
+// CHECK: fnstsw %ax
+fstsw (%rax)
+// CHECK: wait
+// CHECK: fnstsw (%rax)
+
+// PR8259
+fstcw (%rsp)
+// CHECK: wait
+// CHECK: fnstcw (%rsp)
+
+// PR8259
+fstcw (%rsp)
+// CHECK: wait
+// CHECK: fnstcw (%rsp)
+
+// PR8258
+finit
+// CHECK: wait
+// CHECK: fninit
+
+fsave	32493
+// CHECK: wait
+// CHECK: fnsave 32493
+
+
+// rdar://8456382 - cvtsd2si support.
+cvtsd2si	%xmm1, %rax
+// CHECK: cvtsd2siq	%xmm1, %rax
+// CHECK: encoding: [0xf2,0x48,0x0f,0x2d,0xc1]
+cvtsd2si	%xmm1, %eax
+// CHECK: cvtsd2sil	%xmm1, %eax
+// CHECK: encoding: [0xf2,0x0f,0x2d,0xc1]
+
+cvtsd2siq %xmm0, %rax // CHECK: cvtsd2siq	%xmm0, %rax
+cvtsd2sil %xmm0, %eax // CHECK: cvtsd2sil	%xmm0, %eax
+cvtsd2si %xmm0, %rax  // CHECK: cvtsd2siq	%xmm0, %rax
+
+
+cvttpd2dq %xmm1, %xmm0  // CHECK: cvttpd2dq %xmm1, %xmm0
+cvttpd2dq (%rax), %xmm0 // CHECK: cvttpd2dq (%rax), %xmm0
+
+cvttps2dq %xmm1, %xmm0  // CHECK: cvttps2dq %xmm1, %xmm0
+cvttps2dq (%rax), %xmm0 // CHECK: cvttps2dq (%rax), %xmm0
+
+// rdar://8456376 - llvm-mc rejects 'roundss'
+roundss $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x0a,0xc0,0x0e]
+roundps $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x08,0xc0,0x0e]
+roundsd $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x0b,0xc0,0x0e]
+roundpd $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x09,0xc0,0x0e]
+
+
+// rdar://8482675 - 32-bit mem operand support in 64-bit mode (0x67 prefix)
+leal	8(%eax), %esi
+// CHECK: leal	8(%eax), %esi
+// CHECK: encoding: [0x67,0x8d,0x70,0x08]
+leaq	8(%eax), %rsi
+// CHECK: leaq	8(%eax), %rsi
+// CHECK: encoding: [0x67,0x48,0x8d,0x70,0x08]
+leaq	8(%rax), %rsi
+// CHECK: leaq	8(%rax), %rsi
+// CHECK: encoding: [0x48,0x8d,0x70,0x08]
+
+
+cvttpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: cvttpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0x67,0x66,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+
+// rdar://8490728 - llvm-mc rejects 'movmskpd'
+movmskpd	%xmm6, %rax
+// CHECK: movmskpd	%xmm6, %rax
+// CHECK: encoding: [0x66,0x48,0x0f,0x50,0xc6]
+movmskpd	%xmm6, %eax
+// CHECK: movmskpd	%xmm6, %eax
+// CHECK: encoding: [0x66,0x0f,0x50,0xc6]
+
+// rdar://8491845 - Gas supports commuted forms of non-commutable instructions.
+fdivrp %st(0), %st(1) // CHECK: encoding: [0xde,0xf9]
+fdivrp %st(1), %st(0) // CHECK: encoding: [0xde,0xf9]
+
+fsubrp %ST(0), %ST(1) // CHECK: encoding: [0xde,0xe9]
+fsubrp %ST(1), %ST(0) // CHECK: encoding: [0xde,0xe9]
+
+// also PR8861
+fdivp %st(0), %st(1) // CHECK: encoding: [0xde,0xf1]
+fdivp %st(1), %st(0) // CHECK: encoding: [0xde,0xf1]
+
+
+movl	foo(%rip), %eax
+// CHECK: movl	foo(%rip), %eax
+// CHECK: encoding: [0x8b,0x05,A,A,A,A]
+// CHECK: fixup A - offset: 2, value: foo-4, kind: reloc_riprel_4byte
+
+movb	$12, foo(%rip)
+// CHECK: movb	$12, foo(%rip)
+// CHECK: encoding: [0xc6,0x05,A,A,A,A,0x0c]
+// CHECK:    fixup A - offset: 2, value: foo-5, kind: reloc_riprel_4byte
+
+movw	$12, foo(%rip)
+// CHECK: movw	$12, foo(%rip)
+// CHECK: encoding: [0x66,0xc7,0x05,A,A,A,A,0x0c,0x00]
+// CHECK:    fixup A - offset: 3, value: foo-6, kind: reloc_riprel_4byte
+
+movl	$12, foo(%rip)
+// CHECK: movl	$12, foo(%rip)
+// CHECK: encoding: [0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
+// CHECK:    fixup A - offset: 2, value: foo-8, kind: reloc_riprel_4byte
+
+movq	$12, foo(%rip)
+// CHECK:  movq	$12, foo(%rip)
+// CHECK: encoding: [0x48,0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
+// CHECK:    fixup A - offset: 3, value: foo-8, kind: reloc_riprel_4byte
+
+// CHECK: addq	$-424, %rax
+// CHECK: encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
+addq $-424, %rax
+
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %rax
+// CHECK:  encoding: [0x48,0x8b,0x05,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %rax
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %r14
+// CHECK:  encoding: [0x4c,0x8b,0x35,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %r14
+
+
+// CHECK: movq	(%r13,%rax,8), %r13
+// CHECK:  encoding: [0x4d,0x8b,0x6c,0xc5,0x00]
+movq 0x00(%r13,%rax,8),%r13
+
+// CHECK: testq	%rax, %rbx
+// CHECK:  encoding: [0x48,0x85,0xd8]
+testq %rax, %rbx
+
+// CHECK: cmpq	%rbx, %r14
+// CHECK:   encoding: [0x49,0x39,0xde]
+        cmpq %rbx, %r14
+
+// rdar://7947167
+
+movsq
+// CHECK: movsq
+// CHECK:   encoding: [0x48,0xa5]
+
+movsl
+// CHECK: movsl
+// CHECK:   encoding: [0xa5]
+
+stosq
+// CHECK: stosq
+// CHECK:   encoding: [0x48,0xab]
+stosl
+// CHECK: stosl
+// CHECK:   encoding: [0xab]
+
+
+// Not moffset forms of moves, they are x86-32 only! rdar://7947184
+movb	0, %al    // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00]
+movw	0, %ax    // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
+movl	0, %eax   // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
+
+// CHECK: pushfq	# encoding: [0x9c]
+        pushf
+// CHECK: pushfq	# encoding: [0x9c]
+        pushfq
+// CHECK: popfq	        # encoding: [0x9d]
+        popf
+// CHECK: popfq	        # encoding: [0x9d]
+        popfq
+
+// CHECK: movabsq $-281474976710654, %rax
+// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
+        movabsq $0xFFFF000000000002, %rax
+
+// CHECK: movabsq $-281474976710654, %rax
+// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
+        movq $0xFFFF000000000002, %rax
+
+// CHECK: movq $-65536, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0x00,0xff,0xff]
+        movq $0xFFFFFFFFFFFF0000, %rax
+
+// CHECK: movq $-256, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0xff,0xff,0xff]
+        movq $0xFFFFFFFFFFFFFF00, %rax
+
+// CHECK: movq $10, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x0a,0x00,0x00,0x00]
+        movq $10, %rax
+
+// rdar://8014869
+//
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+        retq
+
+// CHECK: sete %al
+// CHECK: encoding: [0x0f,0x94,0xc0]
+        setz %al
+
+// CHECK: setne %al
+// CHECK: encoding: [0x0f,0x95,0xc0]
+        setnz %al
+
+// CHECK: je 0
+// CHECK: encoding: [0x74,A]
+        jz 0
+
+// CHECK: jne
+// CHECK: encoding: [0x75,A]
+        jnz 0
+
+// PR9264
+btl	$1, 0 // CHECK: btl $1, 0 # encoding: [0x0f,0xba,0x24,0x25,0x00,0x00,0x00,0x00,0x01]
+bt	$1, 0 // CHECK: btl $1, 0 # encoding: [0x0f,0xba,0x24,0x25,0x00,0x00,0x00,0x00,0x01]
+
+// rdar://8017515
+btq $0x01,%rdx
+// CHECK: btq	$1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0xba,0xe2,0x01]
+
+//rdar://8017633
+// CHECK: movzbl	%al, %esi
+// CHECK:  encoding: [0x0f,0xb6,0xf0]
+        movzx %al, %esi
+
+// CHECK: movzbq	%al, %rsi
+// CHECK:  encoding: [0x48,0x0f,0xb6,0xf0]
+        movzx %al, %rsi
+
+// CHECK: movsbw	%al, %ax
+// CHECK: encoding: [0x66,0x0f,0xbe,0xc0]
+movsx %al, %ax
+
+// CHECK: movsbl	%al, %eax
+// CHECK: encoding: [0x0f,0xbe,0xc0]
+movsx %al, %eax
+
+// CHECK: movswl	%ax, %eax
+// CHECK: encoding: [0x0f,0xbf,0xc0]
+movsx %ax, %eax
+
+// CHECK: movsbq	%bl, %rax
+// CHECK: encoding: [0x48,0x0f,0xbe,0xc3]
+movsx %bl, %rax
+
+// CHECK: movswq %cx, %rax
+// CHECK: encoding: [0x48,0x0f,0xbf,0xc1]
+movsx %cx, %rax
+
+// CHECK: movslq	%edi, %rax
+// CHECK: encoding: [0x48,0x63,0xc7]
+movsx %edi, %rax
+
+// CHECK: movzbw	%al, %ax
+// CHECK: encoding: [0x66,0x0f,0xb6,0xc0]
+movzx %al, %ax
+
+// CHECK: movzbl	%al, %eax
+// CHECK: encoding: [0x0f,0xb6,0xc0]
+movzx %al, %eax
+
+// CHECK: movzwl	%ax, %eax
+// CHECK: encoding: [0x0f,0xb7,0xc0]
+movzx %ax, %eax
+
+// CHECK: movzbq	%bl, %rax
+// CHECK: encoding: [0x48,0x0f,0xb6,0xc3]
+movzx %bl, %rax
+
+// CHECK: movzwq	%cx, %rax
+// CHECK: encoding: [0x48,0x0f,0xb7,0xc1]
+movzx %cx, %rax
+
+// CHECK: movsbw	(%rax), %ax
+// CHECK: encoding: [0x66,0x0f,0xbe,0x00]
+movsx (%rax), %ax
+
+// CHECK: movzbw	(%rax), %ax
+// CHECK: encoding: [0x66,0x0f,0xb6,0x00]
+movzx (%rax), %ax
+
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00]
+        movl	%gs:124, %eax
+
+// CHECK: jmpq *8(%rax)
+// CHECK:   encoding: [0xff,0x60,0x08]
+	jmp	*8(%rax)
+
+// CHECK: btq $61, -216(%rbp)
+// CHECK:   encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d]
+	btq	$61, -216(%rbp)
+
+
+// rdar://8061602
+L1:
+  jecxz L1
+// CHECK: jecxz L1
+// CHECK:   encoding: [0x67,0xe3,A]
+  jrcxz L1
+// CHECK: jrcxz L1
+// CHECK:   encoding: [0xe3,A]
+
+// PR8061
+xchgl   368(%rax),%ecx
+// CHECK: xchgl	%ecx, 368(%rax)
+xchgl   %ecx, 368(%rax)
+// CHECK: xchgl	%ecx, 368(%rax)
+
+// rdar://8407548
+xchg	0xdeadbeef(%rbx,%rcx,8),%bl
+// CHECK: xchgb	%bl, 3735928559(%rbx,%rcx,8)
+
+
+
+// PR7254
+lock  incl 1(%rsp)
+// CHECK: lock
+// CHECK: incl 1(%rsp)
+
+// rdar://8741045
+lock/incl 1(%rsp)
+// CHECK: lock
+// CHECK: incl 1(%rsp)
+
+// rdar://8033482
+rep movsl
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: movsl
+// CHECK: encoding: [0xa5]
+
+
+// rdar://8403974
+iret
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+iretw
+// CHECK: iretw
+// CHECK: encoding: [0x66,0xcf]
+iretl
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+iretq
+// CHECK: iretq
+// CHECK: encoding: [0x48,0xcf]
+
+// rdar://8416805
+// CHECK: retw	$31438
+// CHECK:  encoding: [0x66,0xc2,0xce,0x7a]
+        	retw	$0x7ace
+
+// CHECK: lretw	$31438
+// CHECK:  encoding: [0x66,0xca,0xce,0x7a]
+        	lretw	$0x7ace
+
+// PR8592
+lretq  // CHECK: lretq # encoding: [0x48,0xcb]
+lretl  // CHECK: lretl # encoding: [0xcb]
+lret   // CHECK: lretl # encoding: [0xcb]
+
+// rdar://8403907
+sysret
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+sysretl
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+sysretq
+// CHECK: sysretq
+// CHECK: encoding: [0x48,0x0f,0x07]
+
+// rdar://8407242
+push %fs
+// CHECK: pushq	%fs
+// CHECK: encoding: [0x0f,0xa0]
+push %gs
+// CHECK: pushq	%gs
+// CHECK: encoding: [0x0f,0xa8]
+
+pushw %fs
+// CHECK: pushw	%fs
+// CHECK: encoding: [0x66,0x0f,0xa0]
+pushw %gs
+// CHECK: pushw	%gs
+// CHECK: encoding: [0x66,0x0f,0xa8]
+
+
+pop %fs
+// CHECK: popq	%fs
+// CHECK: encoding: [0x0f,0xa1]
+pop %gs
+// CHECK: popq	%gs
+// CHECK: encoding: [0x0f,0xa9]
+
+popw %fs
+// CHECK: popw	%fs
+// CHECK: encoding: [0x66,0x0f,0xa1]
+popw %gs
+// CHECK: popw	%gs
+// CHECK: encoding: [0x66,0x0f,0xa9]
+
+// rdar://8438816
+fildq -8(%rsp)
+fildll -8(%rsp)
+// CHECK: fildll	-8(%rsp)
+// CHECK: encoding: [0xdf,0x6c,0x24,0xf8]
+// CHECK: fildll	-8(%rsp)
+// CHECK: encoding: [0xdf,0x6c,0x24,0xf8]
+
+// CHECK: callq a
+        callq a
+
+// CHECK: leaq	-40(%rbp), %r15
+	leaq	-40(%rbp), %r15
+
+
+
+// rdar://8013734 - Alias dr6=db6
+mov %dr6, %rax
+mov %db6, %rax
+// CHECK: movq	%dr6, %rax
+// CHECK: movq	%dr6, %rax
+
+
+// INC/DEC encodings.
+incb %al  // CHECK:	incb	%al # encoding: [0xfe,0xc0]
+incw %ax  // CHECK:	incw	%ax # encoding: [0x66,0xff,0xc0]
+incl %eax // CHECK:	incl	%eax # encoding: [0xff,0xc0]
+decb %al  // CHECK:	decb	%al # encoding: [0xfe,0xc8]
+decw %ax  // CHECK:	decw	%ax # encoding: [0x66,0xff,0xc8]
+decl %eax // CHECK:	decl	%eax # encoding: [0xff,0xc8]
+
+// rdar://8416805
+// CHECK: lgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdt	4(%rax)
+
+// CHECK: lgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdtq	4(%rax)
+
+// CHECK: lidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidt	4(%rax)
+
+// CHECK: lidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidtq	4(%rax)
+
+// CHECK: sgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdt	4(%rax)
+
+// CHECK: sgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdtq	4(%rax)
+
+// CHECK: sidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidt	4(%rax)
+
+// CHECK: sidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidtq	4(%rax)
+
+
+// rdar://8208615
+mov (%rsi), %gs  // CHECK: movl	(%rsi), %gs # encoding: [0x8e,0x2e]
+mov %gs, (%rsi)  // CHECK: movl	%gs, (%rsi) # encoding: [0x8c,0x2e]
+
+
+// rdar://8431864
+	div	%bl,%al
+	div	%bx,%ax
+	div	%ecx,%eax
+	div	0xdeadbeef(%ebx,%ecx,8),%eax
+	div	0x45,%eax
+	div	0x7eed,%eax
+	div	0xbabecafe,%eax
+	div	0x12345678,%eax
+	idiv	%bl,%al
+	idiv	%bx,%ax
+	idiv	%ecx,%eax
+	idiv	0xdeadbeef(%ebx,%ecx,8),%eax
+	idiv	0x45,%eax
+	idiv	0x7eed,%eax
+	idiv	0xbabecafe,%eax
+	idiv	0x12345678,%eax
+
+// PR8524
+movd	%rax, %mm5 // CHECK: movd %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
+movd	%mm5, %rbx // CHECK: movd %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
+movq	%rax, %mm5 // CHECK: movd %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
+movq	%mm5, %rbx // CHECK: movd %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
+
+rex64 // CHECK: rex64 # encoding: [0x48]
+data16 // CHECK: data16 # encoding: [0x66]
+
+// PR8855
+movq 18446744073709551615,%rbx   // CHECK: movq	-1, %rbx
+
+// PR8946
+movdqu	%xmm0, %xmm1 // CHECK: movdqu	%xmm0, %xmm1 # encoding: [0xf3,0x0f,0x6f,0xc8]
diff --git a/test/MC/X86/x86_64-avx-clmul-encoding.s b/test/MC/X86/x86_64-avx-clmul-encoding.s
new file mode 100644
index 000000000000..67e82c6cd0d2
--- /dev/null
+++ b/test/MC/X86/x86_64-avx-clmul-encoding.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
+          vpclmulhqhqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $17, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
+          vpclmulhqhqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $1, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01]
+          vpclmulhqlqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $1, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01]
+          vpclmulhqlqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $16, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10]
+          vpclmullqhqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $16, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10]
+          vpclmullqhqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $0, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00]
+          vpclmullqlqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $0, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00]
+          vpclmullqlqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
+          vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $17, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
+          vpclmulqdq  $17, (%rax), %xmm10, %xmm13
+
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
new file mode 100644
index 000000000000..7a96bb5a2b48
--- /dev/null
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -0,0 +1,3318 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vaddss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x58,0xd0]
+vaddss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x59,0xd0]
+vmulss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x5c,0xd0]
+vsubss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x5e,0xd0]
+vdivss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vaddsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x58,0xd0]
+vaddsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x59,0xd0]
+vmulsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x5c,0xd0]
+vsubsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x5e,0xd0]
+vdivsd  %xmm8, %xmm9, %xmm10
+
+// CHECK:   vaddss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc]
+vaddss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vsubss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc]
+vsubss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vmulss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc]
+vmulss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vdivss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc]
+vdivss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vaddsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc]
+vaddsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vsubsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc]
+vsubsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc]
+vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc]
+vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa]
+vaddps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa]
+vsubps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa]
+vmulps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa]
+vdivps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa]
+vaddpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa]
+vsubpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa]
+vmulpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa]
+vdivpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc]
+vaddps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc]
+vsubps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc]
+vmulps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc]
+vdivps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc]
+vaddpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc]
+vsubpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc]
+vmulpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc]
+vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmaxss  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2]
+          vmaxss  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxsd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2]
+          vmaxsd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminss  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2]
+          vminss  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminsd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2]
+          vminsd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc]
+          vmaxss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc]
+          vmaxsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc]
+          vminss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
+          vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
+          vmaxps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
+          vmaxpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
+          vminps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
+          vminpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
+          vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
+          vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
+          vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
+          vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
+          vandps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
+          vandpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
+          vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
+          vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
+          vorps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vorpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
+          vorpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
+          vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
+          vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
+          vxorps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
+          vxorpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
+          vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
+          vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
+          vandnps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
+          vandnpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
+          vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
+          vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmovss  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
+          vmovss  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovss  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
+          vmovss  %xmm14, %xmm10, %xmm15
+
+// CHECK: vmovsd  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
+          vmovsd  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovsd  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
+          vmovsd  %xmm14, %xmm10, %xmm15
+
+// CHECK: vunpckhps  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
+          vunpckhps  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhpd  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
+          vunpckhpd  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklps  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
+          vunpcklps  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklpd  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
+          vunpcklpd  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
+          vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
+          vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
+          vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
+          vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vcmpps  $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
+          vcmpps  $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmpps  $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
+          vcmpps  $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmpps  $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
+          vcmpps  $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd  $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
+          vcmppd  $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd  $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
+          vcmppd  $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmppd  $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
+          vcmppd  $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vshufps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
+          vshufps  $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
+          vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vshufpd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
+          vshufpd  $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
+          vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
+          vcmpeqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
+          vcmpleps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
+          vcmpltps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
+          vcmpneqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
+          vcmpnleps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
+          vcmpnltps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
+          vcmpordps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
+          vcmpunordps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
+          vcmpeqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
+          vcmplepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
+          vcmpltpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
+          vcmpneqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
+          vcmpnlepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
+          vcmpnltpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
+          vcmpordpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
+          vcmpunordpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
+          vcmpeqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
+          vcmpless   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
+          vcmpltss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
+          vcmpneqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
+          vcmpnless   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
+          vcmpnltss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
+          vcmpordss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
+          vcmpunordss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpless   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnless   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
+          vcmpeqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
+          vcmplesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
+          vcmpltsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
+          vcmpneqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
+          vcmpnlesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
+          vcmpnltsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
+          vcmpordsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
+          vcmpunordsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vucomiss  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
+          vucomiss  %xmm11, %xmm12
+
+// CHECK: vucomiss  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
+          vucomiss  (%rax), %xmm12
+
+// CHECK: vcomiss  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
+          vcomiss  %xmm11, %xmm12
+
+// CHECK: vcomiss  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
+          vcomiss  (%rax), %xmm12
+
+// CHECK: vucomisd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
+          vucomisd  %xmm11, %xmm12
+
+// CHECK: vucomisd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
+          vucomisd  (%rax), %xmm12
+
+// CHECK: vcomisd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
+          vcomisd  %xmm11, %xmm12
+
+// CHECK: vcomisd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
+          vcomisd  (%rax), %xmm12
+
+// CHECK: vcvttss2si  (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+          vcvttss2si  (%rcx), %eax
+
+// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+          vcvtsi2ss  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+          vcvtsi2ss  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvttsd2si  (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+          vcvttsd2si  (%rcx), %eax
+
+// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+          vcvtsi2sd  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+          vcvtsi2sd  (%rax), %xmm11, %xmm12
+
+// CHECK: vmovaps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x28,0x20]
+          vmovaps  (%rax), %xmm12
+
+// CHECK: vmovaps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
+          vmovaps  %xmm11, %xmm12
+
+// CHECK: vmovaps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x29,0x18]
+          vmovaps  %xmm11, (%rax)
+
+// CHECK: vmovapd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x28,0x20]
+          vmovapd  (%rax), %xmm12
+
+// CHECK: vmovapd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
+          vmovapd  %xmm11, %xmm12
+
+// CHECK: vmovapd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x29,0x18]
+          vmovapd  %xmm11, (%rax)
+
+// CHECK: vmovups  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x10,0x20]
+          vmovups  (%rax), %xmm12
+
+// CHECK: vmovups  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
+          vmovups  %xmm11, %xmm12
+
+// CHECK: vmovups  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x11,0x18]
+          vmovups  %xmm11, (%rax)
+
+// CHECK: vmovupd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x10,0x20]
+          vmovupd  (%rax), %xmm12
+
+// CHECK: vmovupd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
+          vmovupd  %xmm11, %xmm12
+
+// CHECK: vmovupd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x11,0x18]
+          vmovupd  %xmm11, (%rax)
+
+// CHECK: vmovlps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x13,0x18]
+          vmovlps  %xmm11, (%rax)
+
+// CHECK: vmovlps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x12,0x28]
+          vmovlps  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x13,0x18]
+          vmovlpd  %xmm11, (%rax)
+
+// CHECK: vmovlpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x12,0x28]
+          vmovlpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x17,0x18]
+          vmovhps  %xmm11, (%rax)
+
+// CHECK: vmovhps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x16,0x28]
+          vmovhps  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x17,0x18]
+          vmovhpd  %xmm11, (%rax)
+
+// CHECK: vmovhpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x16,0x28]
+          vmovhpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlhps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
+          vmovlhps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovhlps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
+          vmovhlps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vcvtss2sil  %xmm11, %eax
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
+          vcvtss2si  %xmm11, %eax
+
+// CHECK: vcvtss2sil  (%rax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2si  (%rax), %ebx
+
+// CHECK: vcvtdq2ps  %xmm10, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
+          vcvtdq2ps  %xmm10, %xmm12
+
+// CHECK: vcvtdq2ps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
+          vcvtdq2ps  (%rax), %xmm12
+
+// CHECK: vcvtsd2ss  %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
+          vcvtsd2ss  %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtsd2ss  (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
+          vcvtsd2ss  (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtps2dq  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
+          vcvtps2dq  %xmm12, %xmm11
+
+// CHECK: vcvtps2dq  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
+          vcvtps2dq  (%rax), %xmm11
+
+// CHECK: vcvtss2sd  %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
+          vcvtss2sd  %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtss2sd  (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
+          vcvtss2sd  (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps  %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
+          vcvtdq2ps  %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps  (%ecx), %xmm13
+// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
+          vcvtdq2ps  (%ecx), %xmm13
+
+// CHECK: vcvttps2dq  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
+          vcvttps2dq  %xmm12, %xmm11
+
+// CHECK: vcvttps2dq  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
+          vcvttps2dq  (%rax), %xmm11
+
+// CHECK: vcvtps2pd  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
+          vcvtps2pd  %xmm12, %xmm11
+
+// CHECK: vcvtps2pd  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
+          vcvtps2pd  (%rax), %xmm11
+
+// CHECK: vcvtpd2ps  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
+          vcvtpd2ps  %xmm12, %xmm11
+
+// CHECK: vsqrtpd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
+          vsqrtpd  %xmm11, %xmm12
+
+// CHECK: vsqrtpd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x51,0x20]
+          vsqrtpd  (%rax), %xmm12
+
+// CHECK: vsqrtps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
+          vsqrtps  %xmm11, %xmm12
+
+// CHECK: vsqrtps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x51,0x20]
+          vsqrtps  (%rax), %xmm12
+
+// CHECK: vsqrtsd  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
+          vsqrtsd  %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtsd  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
+          vsqrtsd  (%rax), %xmm12, %xmm10
+
+// CHECK: vsqrtss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
+          vsqrtss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
+          vsqrtss  (%rax), %xmm12, %xmm10
+
+// CHECK: vrsqrtps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
+          vrsqrtps  %xmm11, %xmm12
+
+// CHECK: vrsqrtps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x52,0x20]
+          vrsqrtps  (%rax), %xmm12
+
+// CHECK: vrsqrtss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
+          vrsqrtss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vrsqrtss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
+          vrsqrtss  (%rax), %xmm12, %xmm10
+
+// CHECK: vrcpps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
+          vrcpps  %xmm11, %xmm12
+
+// CHECK: vrcpps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x53,0x20]
+          vrcpps  (%rax), %xmm12
+
+// CHECK: vrcpss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
+          vrcpss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vrcpss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
+          vrcpss  (%rax), %xmm12, %xmm10
+
+// CHECK: vmovntdq  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
+          vmovntdq  %xmm11, (%rax)
+
+// CHECK: vmovntpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
+          vmovntpd  %xmm11, (%rax)
+
+// CHECK: vmovntps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
+          vmovntps  %xmm11, (%rax)
+
+// CHECK: vldmxcsr  -4(%rip)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
+          vldmxcsr  -4(%rip)
+
+// CHECK: vstmxcsr  -4(%rsp)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+          vstmxcsr  -4(%rsp)
+
+// CHECK: vpsubb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
+          vpsubb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
+          vpsubb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
+          vpsubw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
+          vpsubw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
+          vpsubd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
+          vpsubd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
+          vpsubq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
+          vpsubq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
+          vpsubsb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
+          vpsubsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
+          vpsubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
+          vpsubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
+          vpsubusb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
+          vpsubusb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
+          vpsubusw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
+          vpsubusw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
+          vpaddb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
+          vpaddb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
+          vpaddw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
+          vpaddw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
+          vpaddd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
+          vpaddd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
+          vpaddq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
+          vpaddq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
+          vpaddsb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xec,0x28]
+          vpaddsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
+          vpaddsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xed,0x28]
+          vpaddsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
+          vpaddusb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
+          vpaddusb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
+          vpaddusw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
+          vpaddusw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhuw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
+          vpmulhuw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
+          vpmulhuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
+          vpmulhw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
+          vpmulhw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmullw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
+          vpmullw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmullw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
+          vpmullw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuludq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
+          vpmuludq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmuludq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
+          vpmuludq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
+          vpavgb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
+          vpavgb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
+          vpavgw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
+          vpavgw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
+          vpminsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xea,0x28]
+          vpminsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminub  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
+          vpminub  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminub  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xda,0x28]
+          vpminub  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
+          vpmaxsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xee,0x28]
+          vpmaxsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxub  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
+          vpmaxub  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxub  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xde,0x28]
+          vpmaxub  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsadbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
+          vpsadbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsadbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
+          vpsadbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
+          vpsllw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
+          vpsllw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
+          vpslld  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpslld  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
+          vpslld  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
+          vpsllq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
+          vpsllq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsraw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
+          vpsraw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsraw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
+          vpsraw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrad  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
+          vpsrad  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrad  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
+          vpsrad  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
+          vpsrlw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
+          vpsrlw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrld  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
+          vpsrld  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrld  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
+          vpsrld  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
+          vpsrlq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
+          vpsrlq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+          vpslld  $10, %xmm12, %xmm13
+
+// CHECK: vpslldq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
+          vpslldq  $10, %xmm12, %xmm13
+
+// CHECK: vpsllq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
+          vpsllq  $10, %xmm12, %xmm13
+
+// CHECK: vpsllw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
+          vpsllw  $10, %xmm12, %xmm13
+
+// CHECK: vpsrad  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
+          vpsrad  $10, %xmm12, %xmm13
+
+// CHECK: vpsraw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
+          vpsraw  $10, %xmm12, %xmm13
+
+// CHECK: vpsrld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
+          vpsrld  $10, %xmm12, %xmm13
+
+// CHECK: vpsrldq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
+          vpsrldq  $10, %xmm12, %xmm13
+
+// CHECK: vpsrlq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
+          vpsrlq  $10, %xmm12, %xmm13
+
+// CHECK: vpsrlw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
+          vpsrlw  $10, %xmm12, %xmm13
+
+// CHECK: vpslld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+          vpslld  $10, %xmm12, %xmm13
+
+// CHECK: vpand  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
+          vpand  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpand  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
+          vpand  (%rax), %xmm12, %xmm13
+
+// CHECK: vpor  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
+          vpor  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpor  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
+          vpor  (%rax), %xmm12, %xmm13
+
+// CHECK: vpxor  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
+          vpxor  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpxor  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xef,0x28]
+          vpxor  (%rax), %xmm12, %xmm13
+
+// CHECK: vpandn  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
+          vpandn  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpandn  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
+          vpandn  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
+          vpcmpeqb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x74,0x28]
+          vpcmpeqb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
+          vpcmpeqw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x75,0x28]
+          vpcmpeqw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
+          vpcmpeqd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x76,0x28]
+          vpcmpeqd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
+          vpcmpgtb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x64,0x28]
+          vpcmpgtb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
+          vpcmpgtw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x65,0x28]
+          vpcmpgtw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
+          vpcmpgtd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x66,0x28]
+          vpcmpgtd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpacksswb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
+          vpacksswb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpacksswb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x63,0x28]
+          vpacksswb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpackssdw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
+          vpackssdw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackssdw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
+          vpackssdw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpackuswb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
+          vpackuswb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackuswb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x67,0x28]
+          vpackuswb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufd  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
+          vpshufd  $4, %xmm12, %xmm13
+
+// CHECK: vpshufd  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
+          vpshufd  $4, (%rax), %xmm13
+
+// CHECK: vpshufhw  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
+          vpshufhw  $4, %xmm12, %xmm13
+
+// CHECK: vpshufhw  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
+          vpshufhw  $4, (%rax), %xmm13
+
+// CHECK: vpshuflw  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
+          vpshuflw  $4, %xmm12, %xmm13
+
+// CHECK: vpshuflw  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
+          vpshuflw  $4, (%rax), %xmm13
+
+// CHECK: vpunpcklbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
+          vpunpcklbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x60,0x28]
+          vpunpcklbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklwd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
+          vpunpcklwd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklwd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x61,0x28]
+          vpunpcklwd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckldq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
+          vpunpckldq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckldq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x62,0x28]
+          vpunpckldq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
+          vpunpcklqdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
+          vpunpcklqdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
+          vpunpckhbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x68,0x28]
+          vpunpckhbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhwd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
+          vpunpckhwd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhwd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x69,0x28]
+          vpunpckhwd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
+          vpunpckhdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
+          vpunpckhdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
+          vpunpckhqdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
+          vpunpckhqdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
+          vpinsrw  $7, %eax, %xmm12, %xmm13
+
+// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
+          vpinsrw  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vpextrw  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+          vpextrw  $7, %xmm12, %eax
+
+// CHECK: vpmovmskb  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
+          vpmovmskb  %xmm12, %eax
+
+// CHECK: vmaskmovdqu  %xmm14, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
+          vmaskmovdqu  %xmm14, %xmm15
+
+// CHECK: vmovd  %eax, %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
+          vmovd  %eax, %xmm14
+
+// CHECK: vmovd  (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
+          vmovd  (%rax), %xmm14
+
+// CHECK: vmovd  %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
+          vmovd  %xmm14, (%rax)
+
+// CHECK: vmovd  %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+          vmovd  %rax, %xmm14
+
+// CHECK: vmovq  %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
+          vmovq  %xmm14, (%rax)
+
+// CHECK: vmovq  %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
+          vmovq  %xmm14, %xmm12
+
+// CHECK: vmovq  (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
+          vmovq  (%rax), %xmm14
+
+// CHECK: vmovq  %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+          vmovq  %rax, %xmm14
+
+// CHECK: vmovq  %xmm14, %rax
+// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
+          vmovq  %xmm14, %rax
+
+// CHECK: vcvtpd2dq  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
+          vcvtpd2dq  %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
+          vcvtdq2pd  %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
+          vcvtdq2pd  (%rax), %xmm12
+
+// CHECK: vmovshdup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
+          vmovshdup  %xmm11, %xmm12
+
+// CHECK: vmovshdup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
+          vmovshdup  (%rax), %xmm12
+
+// CHECK: vmovsldup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
+          vmovsldup  %xmm11, %xmm12
+
+// CHECK: vmovsldup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
+          vmovsldup  (%rax), %xmm12
+
+// CHECK: vmovddup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
+          vmovddup  %xmm11, %xmm12
+
+// CHECK: vmovddup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
+          vmovddup  (%rax), %xmm12
+
+// CHECK: vaddsubps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
+          vaddsubps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubps  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
+          vaddsubps  (%rax), %xmm11, %xmm12
+
+// CHECK: vaddsubpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
+          vaddsubpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubpd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
+          vaddsubpd  (%rax), %xmm11, %xmm12
+
+// CHECK: vhaddps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
+          vhaddps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
+          vhaddps  (%rax), %xmm12, %xmm13
+
+// CHECK: vhaddpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
+          vhaddpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
+          vhaddpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
+          vhsubps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
+          vhsubps  (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
+          vhsubpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
+          vhsubpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpabsb  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3]
+          vpabsb  %xmm11, %xmm12
+
+// CHECK: vpabsb  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20]
+          vpabsb  (%rax), %xmm12
+
+// CHECK: vpabsw  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3]
+          vpabsw  %xmm11, %xmm12
+
+// CHECK: vpabsw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20]
+          vpabsw  (%rax), %xmm12
+
+// CHECK: vpabsd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3]
+          vpabsd  %xmm11, %xmm12
+
+// CHECK: vpabsd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20]
+          vpabsd  (%rax), %xmm12
+
+// CHECK: vphaddw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb]
+          vphaddw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28]
+          vphaddw  (%rax), %xmm12, %xmm13
+
+// CHECK: vphaddd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb]
+          vphaddd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28]
+          vphaddd  (%rax), %xmm12, %xmm13
+
+// CHECK: vphaddsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb]
+          vphaddsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28]
+          vphaddsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb]
+          vphsubw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28]
+          vphsubw  (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb]
+          vphsubd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28]
+          vphsubd  (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb]
+          vphsubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28]
+          vphsubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaddubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb]
+          vpmaddubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaddubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28]
+          vpmaddubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb]
+          vpshufb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpshufb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28]
+          vpshufb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb]
+          vpsignb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28]
+          vpsignb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb]
+          vpsignw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28]
+          vpsignw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb]
+          vpsignd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28]
+          vpsignd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhrsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb]
+          vpmulhrsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhrsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28]
+          vpmulhrsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpalignr  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07]
+          vpalignr  $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vpalignr  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07]
+          vpalignr  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundsd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07]
+          vroundsd  $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vroundsd  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07]
+          vroundsd  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundss  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07]
+          vroundss  $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vroundss  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07]
+          vroundss  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundpd  $7, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07]
+          vroundpd  $7, %xmm12, %xmm13
+
+// CHECK: vroundpd  $7, (%rax), %xmm13
+// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07]
+          vroundpd  $7, (%rax), %xmm13
+
+// CHECK: vroundps  $7, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07]
+          vroundps  $7, %xmm12, %xmm13
+
+// CHECK: vroundps  $7, (%rax), %xmm13
+// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07]
+          vroundps  $7, (%rax), %xmm13
+
+// CHECK: vphminposuw  %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec]
+          vphminposuw  %xmm12, %xmm13
+
+// CHECK: vphminposuw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20]
+          vphminposuw  (%rax), %xmm12
+
+// CHECK: vpackusdw  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc]
+          vpackusdw  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpackusdw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28]
+          vpackusdw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqq  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc]
+          vpcmpeqq  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpcmpeqq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28]
+          vpcmpeqq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsb  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc]
+          vpminsb  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28]
+          vpminsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsd  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc]
+          vpminsd  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminsd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28]
+          vpminsd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminud  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc]
+          vpminud  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminud  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28]
+          vpminud  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminuw  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc]
+          vpminuw  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28]
+          vpminuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsb  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc]
+          vpmaxsb  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28]
+          vpmaxsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsd  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc]
+          vpmaxsd  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxsd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28]
+          vpmaxsd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxud  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc]
+          vpmaxud  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxud  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28]
+          vpmaxud  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxuw  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc]
+          vpmaxuw  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28]
+          vpmaxuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuldq  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc]
+          vpmuldq  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmuldq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28]
+          vpmuldq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulld  %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc]
+          vpmulld  %xmm12, %xmm5, %xmm11
+
+// CHECK: vpmulld  (%rax), %xmm5, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28]
+          vpmulld  (%rax), %xmm5, %xmm13
+
+// CHECK: vblendps  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03]
+          vblendps  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vblendps  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03]
+          vblendps  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vblendpd  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03]
+          vblendpd  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vblendpd  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03]
+          vblendpd  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vpblendw  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03]
+          vpblendw  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vpblendw  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03]
+          vpblendw  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vmpsadbw  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03]
+          vmpsadbw  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vmpsadbw  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03]
+          vmpsadbw  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vdpps  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03]
+          vdpps  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vdpps  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03]
+          vdpps  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vdppd  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03]
+          vdppd  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vdppd  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03]
+          vdppd  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0]
+          vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0]
+          vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0]
+          vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvps  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0]
+          vblendvps  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0]
+          vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0]
+          vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpmovsxbw  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4]
+          vpmovsxbw  %xmm12, %xmm10
+
+// CHECK: vpmovsxbw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20]
+          vpmovsxbw  (%rax), %xmm12
+
+// CHECK: vpmovsxwd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4]
+          vpmovsxwd  %xmm12, %xmm10
+
+// CHECK: vpmovsxwd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20]
+          vpmovsxwd  (%rax), %xmm12
+
+// CHECK: vpmovsxdq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4]
+          vpmovsxdq  %xmm12, %xmm10
+
+// CHECK: vpmovsxdq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20]
+          vpmovsxdq  (%rax), %xmm12
+
+// CHECK: vpmovzxbw  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4]
+          vpmovzxbw  %xmm12, %xmm10
+
+// CHECK: vpmovzxbw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20]
+          vpmovzxbw  (%rax), %xmm12
+
+// CHECK: vpmovzxwd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4]
+          vpmovzxwd  %xmm12, %xmm10
+
+// CHECK: vpmovzxwd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20]
+          vpmovzxwd  (%rax), %xmm12
+
+// CHECK: vpmovzxdq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4]
+          vpmovzxdq  %xmm12, %xmm10
+
+// CHECK: vpmovzxdq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20]
+          vpmovzxdq  (%rax), %xmm12
+
+// CHECK: vpmovsxbq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4]
+          vpmovsxbq  %xmm12, %xmm10
+
+// CHECK: vpmovsxbq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20]
+          vpmovsxbq  (%rax), %xmm12
+
+// CHECK: vpmovzxbq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4]
+          vpmovzxbq  %xmm12, %xmm10
+
+// CHECK: vpmovzxbq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20]
+          vpmovzxbq  (%rax), %xmm12
+
+// CHECK: vpmovsxbd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4]
+          vpmovsxbd  %xmm12, %xmm10
+
+// CHECK: vpmovsxbd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20]
+          vpmovsxbd  (%rax), %xmm12
+
+// CHECK: vpmovsxwq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4]
+          vpmovsxwq  %xmm12, %xmm10
+
+// CHECK: vpmovsxwq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20]
+          vpmovsxwq  (%rax), %xmm12
+
+// CHECK: vpmovzxbd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4]
+          vpmovzxbd  %xmm12, %xmm10
+
+// CHECK: vpmovzxbd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20]
+          vpmovzxbd  (%rax), %xmm12
+
+// CHECK: vpmovzxwq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4]
+          vpmovzxwq  %xmm12, %xmm10
+
+// CHECK: vpmovzxwq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20]
+          vpmovzxwq  (%rax), %xmm12
+
+// CHECK: vpextrw  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+          vpextrw  $7, %xmm12, %eax
+
+// CHECK: vpextrw  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07]
+          vpextrw  $7, %xmm12, (%rax)
+
+// CHECK: vpextrd  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07]
+          vpextrd  $7, %xmm12, %eax
+
+// CHECK: vpextrd  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07]
+          vpextrd  $7, %xmm12, (%rax)
+
+// CHECK: vpextrb  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07]
+          vpextrb  $7, %xmm12, %eax
+
+// CHECK: vpextrb  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07]
+          vpextrb  $7, %xmm12, (%rax)
+
+// CHECK: vpextrq  $7, %xmm12, %rcx
+// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07]
+          vpextrq  $7, %xmm12, %rcx
+
+// CHECK: vpextrq  $7, %xmm12, (%rcx)
+// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07]
+          vpextrq  $7, %xmm12, (%rcx)
+
+// CHECK: vextractps  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07]
+          vextractps  $7, %xmm12, (%rax)
+
+// CHECK: vextractps  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07]
+          vextractps  $7, %xmm12, %eax
+
+// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07]
+          vpinsrw  $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07]
+          vpinsrw  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrb  $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07]
+          vpinsrb  $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrb  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07]
+          vpinsrb  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrd  $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07]
+          vpinsrd  $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrd  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07]
+          vpinsrd  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrq  $7, %rax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07]
+          vpinsrq  $7, %rax, %xmm12, %xmm10
+
+// CHECK: vpinsrq  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
+          vpinsrq  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vinsertps  $7, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07]
+          vinsertps  $7, %xmm12, %xmm10, %xmm11
+
+// CHECK: vinsertps  $7, (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07]
+          vinsertps  $7, (%rax), %xmm10, %xmm11
+
+// CHECK: vptest  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4]
+          vptest  %xmm12, %xmm10
+
+// CHECK: vptest  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20]
+          vptest  (%rax), %xmm12
+
+// CHECK: vmovntdqa  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20]
+          vmovntdqa  (%rax), %xmm12
+
+// CHECK: vpcmpgtq  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc]
+          vpcmpgtq  %xmm12, %xmm10, %xmm11
+
+// CHECK: vpcmpgtq  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28]
+          vpcmpgtq  (%rax), %xmm10, %xmm13
+
+// CHECK: vpcmpistrm  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07]
+          vpcmpistrm  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpistrm  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07]
+          vpcmpistrm  $7, (%rax), %xmm10
+
+// CHECK: vpcmpestrm  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07]
+          vpcmpestrm  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpestrm  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07]
+          vpcmpestrm  $7, (%rax), %xmm10
+
+// CHECK: vpcmpistri  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07]
+          vpcmpistri  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpistri  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07]
+          vpcmpistri  $7, (%rax), %xmm10
+
+// CHECK: vpcmpestri  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07]
+          vpcmpestri  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpestri  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07]
+          vpcmpestri  $7, (%rax), %xmm10
+
+// CHECK: vaesimc  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4]
+          vaesimc  %xmm12, %xmm10
+
+// CHECK: vaesimc  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20]
+          vaesimc  (%rax), %xmm12
+
+// CHECK: vaesenc  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc]
+          vaesenc  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesenc  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28]
+          vaesenc  (%rax), %xmm10, %xmm13
+
+// CHECK: vaesenclast  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc]
+          vaesenclast  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesenclast  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28]
+          vaesenclast  (%rax), %xmm10, %xmm13
+
+// CHECK: vaesdec  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc]
+          vaesdec  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesdec  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28]
+          vaesdec  (%rax), %xmm10, %xmm13
+
+// CHECK: vaesdeclast  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc]
+          vaesdeclast  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesdeclast  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28]
+          vaesdeclast  (%rax), %xmm10, %xmm13
+
+// CHECK: vaeskeygenassist  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07]
+          vaeskeygenassist  $7, %xmm12, %xmm10
+
+// CHECK: vaeskeygenassist  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07]
+          vaeskeygenassist  $7, (%rax), %xmm10
+
+// CHECK: vcmpps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
+          vcmpeq_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
+          vcmpngeps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
+          vcmpngtps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
+          vcmpfalseps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
+          vcmpneq_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
+          vcmpgeps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
+          vcmpgtps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
+          vcmptrueps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
+          vcmpeq_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
+          vcmplt_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
+          vcmple_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
+          vcmpunord_sps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
+          vcmpneq_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
+          vcmpnlt_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
+          vcmpnle_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
+          vcmpord_sps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
+          vcmpeq_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
+          vcmpnge_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
+          vcmpngt_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
+          vcmpfalse_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
+          vcmpneq_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
+          vcmpge_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
+          vcmpgt_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
+          vcmptrue_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovaps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x28,0x20]
+          vmovaps  (%rax), %ymm12
+
+// CHECK: vmovaps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3]
+          vmovaps  %ymm11, %ymm12
+
+// CHECK: vmovaps  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x29,0x18]
+          vmovaps  %ymm11, (%rax)
+
+// CHECK: vmovapd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x28,0x20]
+          vmovapd  (%rax), %ymm12
+
+// CHECK: vmovapd  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3]
+          vmovapd  %ymm11, %ymm12
+
+// CHECK: vmovapd  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x29,0x18]
+          vmovapd  %ymm11, (%rax)
+
+// CHECK: vmovups  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x10,0x20]
+          vmovups  (%rax), %ymm12
+
+// CHECK: vmovups  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3]
+          vmovups  %ymm11, %ymm12
+
+// CHECK: vmovups  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x11,0x18]
+          vmovups  %ymm11, (%rax)
+
+// CHECK: vmovupd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x10,0x20]
+          vmovupd  (%rax), %ymm12
+
+// CHECK: vmovupd  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3]
+          vmovupd  %ymm11, %ymm12
+
+// CHECK: vmovupd  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
+          vmovupd  %ymm11, (%rax)
+
+// CHECK: vunpckhps  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3]
+          vunpckhps  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhpd  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3]
+          vunpckhpd  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklps  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3]
+          vunpcklps  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklpd  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3]
+          vunpcklpd  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc]
+          vunpckhps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc]
+          vunpckhpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc]
+          vunpcklps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc]
+          vunpcklpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vmovntdq  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0xe7,0x18]
+          vmovntdq  %ymm11, (%rax)
+
+// CHECK: vmovntpd  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x2b,0x18]
+          vmovntpd  %ymm11, (%rax)
+
+// CHECK: vmovntps  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x2b,0x18]
+          vmovntps  %ymm11, (%rax)
+
+// CHECK: vmovmskps  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4]
+          vmovmskps  %xmm12, %eax
+
+// CHECK: vmovmskpd  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4]
+          vmovmskpd  %xmm12, %eax
+
+// CHECK: vmaxps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4]
+          vmaxps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmaxpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4]
+          vmaxpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vminps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4]
+          vminps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vminpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4]
+          vminpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vsubps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4]
+          vsubps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vsubpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4]
+          vsubpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vdivps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4]
+          vdivps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vdivpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
+          vdivpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vaddps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
+          vaddps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vaddpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
+          vaddpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmulps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
+          vmulps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmulpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
+          vmulpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmaxps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
+          vmaxps  (%rax), %ymm4, %ymm6
+
+// CHECK: vmaxpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
+          vmaxpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vminps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
+          vminps  (%rax), %ymm4, %ymm6
+
+// CHECK: vminpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
+          vminpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vsubps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
+          vsubps  (%rax), %ymm4, %ymm6
+
+// CHECK: vsubpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
+          vsubpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vdivps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
+          vdivps  (%rax), %ymm4, %ymm6
+
+// CHECK: vdivpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
+          vdivpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vaddps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
+          vaddps  (%rax), %ymm4, %ymm6
+
+// CHECK: vaddpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
+          vaddpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vmulps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
+          vmulps  (%rax), %ymm4, %ymm6
+
+// CHECK: vmulpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
+          vmulpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vsqrtpd  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3]
+          vsqrtpd  %ymm11, %ymm12
+
+// CHECK: vsqrtpd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x51,0x20]
+          vsqrtpd  (%rax), %ymm12
+
+// CHECK: vsqrtps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3]
+          vsqrtps  %ymm11, %ymm12
+
+// CHECK: vsqrtps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x51,0x20]
+          vsqrtps  (%rax), %ymm12
+
+// CHECK: vrsqrtps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3]
+          vrsqrtps  %ymm11, %ymm12
+
+// CHECK: vrsqrtps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x52,0x20]
+          vrsqrtps  (%rax), %ymm12
+
+// CHECK: vrcpps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3]
+          vrcpps  %ymm11, %ymm12
+
+// CHECK: vrcpps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x53,0x20]
+          vrcpps  (%rax), %ymm12
+
+// CHECK: vandps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc]
+          vandps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc]
+          vandpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc]
+          vandps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc]
+          vandpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vorps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc]
+          vorps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vorpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc]
+          vorpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc]
+          vorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc]
+          vorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vxorps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc]
+          vxorps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vxorpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc]
+          vxorpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vxorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc]
+          vxorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vxorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc]
+          vxorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandnps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc]
+          vandnps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandnpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc]
+          vandnpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandnps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc]
+          vandnps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc]
+          vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vcvtps2pd  %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5]
+          vcvtps2pd  %xmm13, %ymm12
+
+// CHECK: vcvtps2pd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5a,0x20]
+          vcvtps2pd  (%rax), %ymm12
+
+// CHECK: vcvtdq2pd  %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5]
+          vcvtdq2pd  %xmm13, %ymm12
+
+// CHECK: vcvtdq2pd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0xe6,0x20]
+          vcvtdq2pd  (%rax), %ymm12
+
+// CHECK: vcvtdq2ps  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4]
+          vcvtdq2ps  %ymm12, %ymm10
+
+// CHECK: vcvtdq2ps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5b,0x20]
+          vcvtdq2ps  (%rax), %ymm12
+
+// CHECK: vcvtps2dq  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4]
+          vcvtps2dq  %ymm12, %ymm10
+
+// CHECK: vcvtps2dq  (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7d,0x5b,0x10]
+          vcvtps2dq  (%rax), %ymm10
+
+// CHECK: vcvttps2dq  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4]
+          vcvttps2dq  %ymm12, %ymm10
+
+// CHECK: vcvttps2dq  (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7e,0x5b,0x10]
+          vcvttps2dq  (%rax), %ymm10
+
+// CHECK: vcvttpd2dq  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+          vcvttpd2dq  %xmm11, %xmm10
+
+// CHECK: vcvttpd2dq  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
+          vcvttpd2dq  %ymm12, %xmm10
+
+// CHECK: vcvttpd2dqx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+          vcvttpd2dqx  %xmm11, %xmm10
+
+// CHECK: vcvttpd2dqx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0xe6,0x18]
+          vcvttpd2dqx  (%rax), %xmm11
+
+// CHECK: vcvttpd2dqy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc]
+          vcvttpd2dqy  %ymm12, %xmm11
+
+// CHECK: vcvttpd2dqy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
+          vcvttpd2dqy  (%rax), %xmm11
+
+// CHECK: vcvtpd2ps  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
+          vcvtpd2ps  %ymm12, %xmm10
+
+// CHECK: vcvtpd2psx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
+          vcvtpd2psx  %xmm11, %xmm10
+
+// CHECK: vcvtpd2psx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5a,0x18]
+          vcvtpd2psx  (%rax), %xmm11
+
+// CHECK: vcvtpd2psy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc]
+          vcvtpd2psy  %ymm12, %xmm11
+
+// CHECK: vcvtpd2psy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
+          vcvtpd2psy  (%rax), %xmm11
+
+// CHECK: vcvtpd2dq  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
+          vcvtpd2dq  %ymm12, %xmm10
+
+// CHECK: vcvtpd2dqy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc]
+          vcvtpd2dqy  %ymm12, %xmm11
+
+// CHECK: vcvtpd2dqy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
+          vcvtpd2dqy  (%rax), %xmm11
+
+// CHECK: vcvtpd2dqx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
+          vcvtpd2dqx  %xmm11, %xmm10
+
+// CHECK: vcvtpd2dqx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7b,0xe6,0x18]
+          vcvtpd2dqx  (%rax), %xmm11
+
+// CHECK: vcmpps  $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00]
+          vcmpeqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02]
+          vcmpleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01]
+          vcmpltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04]
+          vcmpneqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06]
+          vcmpnleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05]
+          vcmpnltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07]
+          vcmpordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03]
+          vcmpunordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07]
+          vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00]
+          vcmpeqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02]
+          vcmplepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01]
+          vcmpltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04]
+          vcmpneqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06]
+          vcmpnlepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05]
+          vcmpnltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07]
+          vcmpordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03]
+          vcmpunordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07]
+          vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $8, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08]
+          vcmpeq_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $9, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09]
+          vcmpngeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $10, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a]
+          vcmpngtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $11, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b]
+          vcmpfalseps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $12, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c]
+          vcmpneq_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $13, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d]
+          vcmpgeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $14, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e]
+          vcmpgtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $15, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f]
+          vcmptrueps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $16, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10]
+          vcmpeq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $17, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11]
+          vcmplt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $18, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12]
+          vcmple_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $19, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13]
+          vcmpunord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $20, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14]
+          vcmpneq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $21, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15]
+          vcmpnlt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $22, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16]
+          vcmpnle_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $23, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17]
+          vcmpord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $24, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18]
+          vcmpeq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $25, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19]
+          vcmpnge_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $26, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a]
+          vcmpngt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $27, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b]
+          vcmpfalse_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $28, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c]
+          vcmpneq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $29, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d]
+          vcmpge_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $30, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e]
+          vcmpgt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $31, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
+          vcmptrue_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubps  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb]
+          vaddsubps  %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubps  (%rax), %ymm11, %ymm12
+// CHECK: encoding: [0xc5,0x27,0xd0,0x20]
+          vaddsubps  (%rax), %ymm11, %ymm12
+
+// CHECK: vaddsubpd  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb]
+          vaddsubpd  %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubpd  (%rax), %ymm11, %ymm12
+// CHECK: encoding: [0xc5,0x25,0xd0,0x20]
+          vaddsubpd  (%rax), %ymm11, %ymm12
+
+// CHECK: vhaddps  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb]
+          vhaddps  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhaddps  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1f,0x7c,0x28]
+          vhaddps  (%rax), %ymm12, %ymm13
+
+// CHECK: vhaddpd  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb]
+          vhaddpd  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhaddpd  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0x7c,0x28]
+          vhaddpd  (%rax), %ymm12, %ymm13
+
+// CHECK: vhsubps  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb]
+          vhsubps  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhsubps  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1f,0x7d,0x28]
+          vhsubps  (%rax), %ymm12, %ymm13
+
+// CHECK: vhsubpd  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb]
+          vhsubpd  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhsubpd  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0x7d,0x28]
+          vhsubpd  (%rax), %ymm12, %ymm13
+
+// CHECK: vblendps  $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03]
+          vblendps  $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vblendps  $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03]
+          vblendps  $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vblendpd  $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03]
+          vblendpd  $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vblendpd  $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03]
+          vblendpd  $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vdpps  $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03]
+          vdpps  $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vdpps  $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03]
+          vdpps  $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vbroadcastf128  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20]
+          vbroadcastf128  (%rax), %ymm12
+
+// CHECK: vbroadcastsd  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20]
+          vbroadcastsd  (%rax), %ymm12
+
+// CHECK: vbroadcastss  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20]
+          vbroadcastss  (%rax), %xmm12
+
+// CHECK: vbroadcastss  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20]
+          vbroadcastss  (%rax), %ymm12
+
+// CHECK: vinsertf128  $7, %xmm12, %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07]
+          vinsertf128  $7, %xmm12, %ymm12, %ymm10
+
+// CHECK: vinsertf128  $7, (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07]
+          vinsertf128  $7, (%rax), %ymm12, %ymm10
+
+// CHECK: vextractf128  $7, %ymm12, %xmm12
+// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07]
+          vextractf128  $7, %ymm12, %xmm12
+
+// CHECK: vextractf128  $7, %ymm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07]
+          vextractf128  $7, %ymm12, (%rax)
+
+// CHECK: vmaskmovpd  %xmm12, %xmm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20]
+          vmaskmovpd  %xmm12, %xmm10, (%rax)
+
+// CHECK: vmaskmovpd  %ymm12, %ymm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20]
+          vmaskmovpd  %ymm12, %ymm10, (%rax)
+
+// CHECK: vmaskmovpd  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10]
+          vmaskmovpd  (%rax), %xmm12, %xmm10
+
+// CHECK: vmaskmovpd  (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10]
+          vmaskmovpd  (%rax), %ymm12, %ymm10
+
+// CHECK: vmaskmovps  %xmm12, %xmm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20]
+          vmaskmovps  %xmm12, %xmm10, (%rax)
+
+// CHECK: vmaskmovps  %ymm12, %ymm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20]
+          vmaskmovps  %ymm12, %ymm10, (%rax)
+
+// CHECK: vmaskmovps  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10]
+          vmaskmovps  (%rax), %xmm12, %xmm10
+
+// CHECK: vmaskmovps  (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10]
+          vmaskmovps  (%rax), %ymm12, %ymm10
+
+// CHECK: vpermilps  $7, %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07]
+          vpermilps  $7, %xmm11, %xmm10
+
+// CHECK: vpermilps  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07]
+          vpermilps  $7, %ymm10, %ymm11
+
+// CHECK: vpermilps  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07]
+          vpermilps  $7, (%rax), %xmm10
+
+// CHECK: vpermilps  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07]
+          vpermilps  $7, (%rax), %ymm10
+
+// CHECK: vpermilps  %xmm11, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb]
+          vpermilps  %xmm11, %xmm10, %xmm11
+
+// CHECK: vpermilps  %ymm11, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb]
+          vpermilps  %ymm11, %ymm10, %ymm11
+
+// CHECK: vpermilps  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28]
+          vpermilps  (%rax), %xmm10, %xmm13
+
+// CHECK: vpermilps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18]
+          vpermilps  (%rax), %ymm10, %ymm11
+
+// CHECK: vpermilpd  $7, %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07]
+          vpermilpd  $7, %xmm11, %xmm10
+
+// CHECK: vpermilpd  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07]
+          vpermilpd  $7, %ymm10, %ymm11
+
+// CHECK: vpermilpd  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07]
+          vpermilpd  $7, (%rax), %xmm10
+
+// CHECK: vpermilpd  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07]
+          vpermilpd  $7, (%rax), %ymm10
+
+// CHECK: vpermilpd  %xmm11, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb]
+          vpermilpd  %xmm11, %xmm10, %xmm11
+
+// CHECK: vpermilpd  %ymm11, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb]
+          vpermilpd  %ymm11, %ymm10, %ymm11
+
+// CHECK: vpermilpd  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28]
+          vpermilpd  (%rax), %xmm10, %xmm13
+
+// CHECK: vpermilpd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18]
+          vpermilpd  (%rax), %ymm10, %ymm11
+
+// CHECK: vperm2f128  $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07]
+          vperm2f128  $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vperm2f128  $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
+          vperm2f128  $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vcvtsd2si  %xmm8, %r8d
+// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
+          vcvtsd2si  %xmm8, %r8d
+
+// CHECK: vcvtsd2si  (%rcx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+          vcvtsd2si  (%rcx), %ecx
+
+// CHECK: vcvtss2si  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
+          vcvtss2si  %xmm4, %rcx
+
+// CHECK: vcvtss2si  (%rcx), %r8
+// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
+          vcvtss2si  (%rcx), %r8
+
+// CHECK: vcvtsi2sdl  %r8d, %xmm8, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
+          vcvtsi2sdl  %r8d, %xmm8, %xmm15
+
+// CHECK: vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
+          vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+
+// CHECK: vcvtsi2sdq  %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
+          vcvtsi2sdq  %rcx, %xmm4, %xmm6
+
+// CHECK: vcvtsi2sdq  (%rcx), %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31]
+          vcvtsi2sdq  (%rcx), %xmm4, %xmm6
+
+// CHECK: vcvtsi2ssq  %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1]
+          vcvtsi2ssq  %rcx, %xmm4, %xmm6
+
+// CHECK: vcvtsi2ssq  (%rcx), %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
+          vcvtsi2ssq  (%rcx), %xmm4, %xmm6
+
+// CHECK: vcvttsd2si  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
+          vcvttsd2si  %xmm4, %rcx
+
+// CHECK: vcvttsd2si  (%rcx), %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
+          vcvttsd2si  (%rcx), %rcx
+
+// CHECK: vcvttss2si  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
+          vcvttss2si  %xmm4, %rcx
+
+// CHECK: vcvttss2si  (%rcx), %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
+          vcvttss2si  (%rcx), %rcx
+
+// CHECK: vlddqu  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
+          vlddqu  (%rax), %ymm12
+
+// CHECK: vmovddup  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4]
+          vmovddup  %ymm12, %ymm10
+
+// CHECK: vmovddup  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7f,0x12,0x20]
+          vmovddup  (%rax), %ymm12
+
+// CHECK: vmovdqa  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4]
+          vmovdqa  %ymm12, %ymm10
+
+// CHECK: vmovdqa  %ymm12, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x7f,0x20]
+          vmovdqa  %ymm12, (%rax)
+
+// CHECK: vmovdqa  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x6f,0x20]
+          vmovdqa  (%rax), %ymm12
+
+// CHECK: vmovdqu  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4]
+          vmovdqu  %ymm12, %ymm10
+
+// CHECK: vmovdqu  %ymm12, (%rax)
+// CHECK: encoding: [0xc5,0x7e,0x7f,0x20]
+          vmovdqu  %ymm12, (%rax)
+
+// CHECK: vmovdqu  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x6f,0x20]
+          vmovdqu  (%rax), %ymm12
+
+// CHECK: vmovshdup  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4]
+          vmovshdup  %ymm12, %ymm10
+
+// CHECK: vmovshdup  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x16,0x20]
+          vmovshdup  (%rax), %ymm12
+
+// CHECK: vmovsldup  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4]
+          vmovsldup  %ymm12, %ymm10
+
+// CHECK: vmovsldup  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x12,0x20]
+          vmovsldup  (%rax), %ymm12
+
+// CHECK: vptest  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4]
+          vptest  %ymm12, %ymm10
+
+// CHECK: vptest  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20]
+          vptest  (%rax), %ymm12
+
+// CHECK: vroundpd  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07]
+          vroundpd  $7, %ymm10, %ymm11
+
+// CHECK: vroundpd  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07]
+          vroundpd  $7, (%rax), %ymm10
+
+// CHECK: vroundps  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07]
+          vroundps  $7, %ymm10, %ymm11
+
+// CHECK: vroundps  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07]
+          vroundps  $7, (%rax), %ymm10
+
+// CHECK: vshufpd  $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07]
+          vshufpd  $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vshufpd  $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07]
+          vshufpd  $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vshufps  $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07]
+          vshufps  $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vshufps  $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07]
+          vshufps  $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vtestpd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4]
+          vtestpd  %xmm12, %xmm10
+
+// CHECK: vtestpd  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4]
+          vtestpd  %ymm12, %ymm10
+
+// CHECK: vtestpd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20]
+          vtestpd  (%rax), %xmm12
+
+// CHECK: vtestpd  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20]
+          vtestpd  (%rax), %ymm12
+
+// CHECK: vtestps  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4]
+          vtestps  %xmm12, %xmm10
+
+// CHECK: vtestps  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4]
+          vtestps  %ymm12, %ymm10
+
+// CHECK: vtestps  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20]
+          vtestps  (%rax), %xmm12
+
+// CHECK: vtestps  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20]
+          vtestps  (%rax), %ymm12
+
+// CHECK: vextractps   $10, %xmm8, %r8
+// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
+          vextractps   $10, %xmm8, %r8
+
+// CHECK: vextractps   $7, %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
+          vextractps   $7, %xmm4, %rcx
+
+// CHECK: vmovd  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
+          vmovd  %xmm4, %rcx
+
+// CHECK: vmovmskpd  %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf9,0x50,0xcc]
+          vmovmskpd  %xmm4, %rcx
+
+// CHECK: vmovmskpd  %ymm4, %rcx
+// CHECK: encoding: [0xc5,0xfd,0x50,0xcc]
+          vmovmskpd  %ymm4, %rcx
+
+// CHECK: vmovmskps  %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf8,0x50,0xcc]
+          vmovmskps  %xmm4, %rcx
+
+// CHECK: vmovmskps  %ymm4, %rcx
+// CHECK: encoding: [0xc5,0xfc,0x50,0xcc]
+          vmovmskps  %ymm4, %rcx
+
+// CHECK: vpextrb  $7, %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07]
+          vpextrb  $7, %xmm4, %rcx
+
+// CHECK: vpinsrw  $7, %r8, %xmm15, %xmm8
+// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07]
+          vpinsrw  $7, %r8, %xmm15, %xmm8
+
+// CHECK: vpinsrw  $7, %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07]
+          vpinsrw  $7, %rcx, %xmm4, %xmm6
+
+// CHECK: vpmovmskb  %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc]
+          vpmovmskb  %xmm4, %rcx
+
+// CHECK: vblendvpd  %ymm11, 57005(%rax,%riz), %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0]
+          vblendvpd  %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13
+
diff --git a/test/MC/X86/x86_64-encoding.s b/test/MC/X86/x86_64-encoding.s
new file mode 100644
index 000000000000..756da4dc352d
--- /dev/null
+++ b/test/MC/X86/x86_64-encoding.s
@@ -0,0 +1,157 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// PR7195
+// CHECK: callw 42
+// CHECK: encoding: [0x66,0xe8,A,A]
+       callw 42
+
+// rdar://8127102
+// CHECK: movq	%gs:(%rdi), %rax
+// CHECK: encoding: [0x65,0x48,0x8b,0x07]
+movq	%gs:(%rdi), %rax
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+        crc32b	%bl, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32w	%bx, %eax
+
+// CHECK: crc32w 	4(%rbx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32w	4(%rbx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32l	%ebx, %eax
+
+// CHECK: crc32l 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32l	4(%rbx), %eax
+
+// CHECK: crc32l 	3735928559(%rbx,%rcx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	crc32l   0xdeadbeef(%rbx,%rcx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0x45,0x00,0x00,0x00]
+        	crc32l   0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xed,0x7e,0x00,0x00]
+        	crc32l   0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xfe,0xca,0xbe,0xba]
+        	crc32l   0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+        	crc32l   %ecx,%ecx
+
+// CHECK: crc32b 	%r11b, %eax
+// CHECK:  encoding: [0xf2,0x41,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32b 	%dil, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0xc7]
+        crc32b	%dil,%rax
+
+// CHECK: crc32b 	%r11b, %rax
+// CHECK:  encoding: [0xf2,0x49,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b,%rax
+
+// CHECK: crc32b 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %rax
+
+// CHECK: crc32q 	%rbx, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc3]
+        crc32q	%rbx, %rax
+
+// CHECK: crc32q 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
+        crc32q	4(%rbx), %rax
+
+// CHECK: movd %r8, %mm1
+// CHECK:  encoding: [0x49,0x0f,0x6e,0xc8]
+movd %r8, %mm1
+
+// CHECK: movd %r8d, %mm1
+// CHECK:  encoding: [0x41,0x0f,0x6e,0xc8]
+movd %r8d, %mm1
+
+// CHECK: movd %rdx, %mm1
+// CHECK:  encoding: [0x48,0x0f,0x6e,0xca]
+movd %rdx, %mm1
+
+// CHECK: movd %edx, %mm1
+// CHECK:  encoding: [0x0f,0x6e,0xca]
+movd %edx, %mm1
+
+// CHECK: movd %mm1, %r8
+// CHECK:  encoding: [0x49,0x0f,0x7e,0xc8]
+movd %mm1, %r8
+
+// CHECK: movd %mm1, %r8d
+// CHECK:  encoding: [0x41,0x0f,0x7e,0xc8]
+movd %mm1, %r8d
+
+// CHECK: movd %mm1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0x7e,0xca]
+movd %mm1, %rdx
+
+// CHECK: movd %mm1, %edx
+// CHECK:  encoding: [0x0f,0x7e,0xca]
+movd %mm1, %edx
+
+// rdar://7840289
+// CHECK: pshufb	CPI1_0(%rip), %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
+// CHECK:  fixup A - offset: 5, value: CPI1_0-4
+pshufb	CPI1_0(%rip), %xmm1
+
+// CHECK: movq  57005(,%riz), %rbx
+// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
+          movq  57005(,%riz), %rbx
+
+// CHECK: movq  48879(,%riz), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
+          movq  48879(,%riz), %rax
+
+// CHECK: movq  -4(,%riz,8), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
+          movq  -4(,%riz,8), %rax
+
+// CHECK: movq  (%rcx,%riz), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0x21]
+          movq  (%rcx,%riz), %rax
+
+// CHECK: movq  (%rcx,%riz,8), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0xe1]
+          movq  (%rcx,%riz,8), %rax
+
+// CHECK: fxsaveq (%rax)
+// CHECK: encoding: [0x48,0x0f,0xae,0x00]
+          fxsaveq (%rax)
+
+// CHECK: fxrstorq (%rax)
+// CHECK: encoding: [0x48,0x0f,0xae,0x08]
+          fxrstorq (%rax)
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leave
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leaveq
diff --git a/test/MC/X86/x86_64-fma3-encoding.s b/test/MC/X86/x86_64-fma3-encoding.s
new file mode 100644
index 000000000000..d08a7329a09f
--- /dev/null
+++ b/test/MC/X86/x86_64-fma3-encoding.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
+          vfmadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
+          vfmadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
+          vfmadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
+          vfmadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
+          vfmadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
+          vfmadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
+          vfmadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
+          vfmadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
+          vfmadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
+          vfmadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
+          vfmadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
+          vfmadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
+          vfmadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
+          vfmadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
+          vfmadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
+          vfmadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
+          vfmadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
+          vfmadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
+          vfmadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
+          vfmadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
+          vfmadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
+          vfmadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
+          vfmadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
+          vfmadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
+          vfmadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
+          vfmadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
+          vfmadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
+          vfmadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
+          vfmadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
+          vfmadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
+          vfmadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
+          vfmadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
+          vfmadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
+          vfmadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
+          vfmadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
+          vfmadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc]
+          vfmaddsub132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18]
+          vfmaddsub132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc]
+          vfmaddsub132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18]
+          vfmaddsub132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc]
+          vfmaddsub213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18]
+          vfmaddsub213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc]
+          vfmaddsub213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18]
+          vfmaddsub213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc]
+          vfmaddsub231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18]
+          vfmaddsub231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc]
+          vfmaddsub231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18]
+          vfmaddsub231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc]
+          vfmsubadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18]
+          vfmsubadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc]
+          vfmsubadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18]
+          vfmsubadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc]
+          vfmsubadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18]
+          vfmsubadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc]
+          vfmsubadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18]
+          vfmsubadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc]
+          vfmsubadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18]
+          vfmsubadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc]
+          vfmsubadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18]
+          vfmsubadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc]
+          vfmsub132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18]
+          vfmsub132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc]
+          vfmsub132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18]
+          vfmsub132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc]
+          vfmsub213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18]
+          vfmsub213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc]
+          vfmsub213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18]
+          vfmsub213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc]
+          vfmsub231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18]
+          vfmsub231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc]
+          vfmsub231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18]
+          vfmsub231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc]
+          vfnmadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18]
+          vfnmadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc]
+          vfnmadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18]
+          vfnmadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc]
+          vfnmadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18]
+          vfnmadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc]
+          vfnmadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18]
+          vfnmadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc]
+          vfnmadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18]
+          vfnmadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc]
+          vfnmadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18]
+          vfnmadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc]
+          vfnmsub132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18]
+          vfnmsub132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc]
+          vfnmsub132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18]
+          vfnmsub132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc]
+          vfnmsub213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18]
+          vfnmsub213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc]
+          vfnmsub213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18]
+          vfnmsub213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc]
+          vfnmsub231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18]
+          vfnmsub231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc]
+          vfnmsub231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18]
+          vfnmsub231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
+          vfmadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
+          vfmadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
+          vfmadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
+          vfmadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
+          vfmadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
+          vfmadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
+          vfmadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
+          vfmadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
+          vfmadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
+          vfmadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
+          vfmadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
+          vfmadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc]
+          vfmaddsub132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18]
+          vfmaddsub132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc]
+          vfmaddsub132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18]
+          vfmaddsub132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc]
+          vfmaddsub213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18]
+          vfmaddsub213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc]
+          vfmaddsub213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18]
+          vfmaddsub213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc]
+          vfmaddsub231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18]
+          vfmaddsub231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc]
+          vfmaddsub231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18]
+          vfmaddsub231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc]
+          vfmsubadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18]
+          vfmsubadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc]
+          vfmsubadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18]
+          vfmsubadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc]
+          vfmsubadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18]
+          vfmsubadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc]
+          vfmsubadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18]
+          vfmsubadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc]
+          vfmsubadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18]
+          vfmsubadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc]
+          vfmsubadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18]
+          vfmsubadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc]
+          vfmsub132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18]
+          vfmsub132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc]
+          vfmsub132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18]
+          vfmsub132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc]
+          vfmsub213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18]
+          vfmsub213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc]
+          vfmsub213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18]
+          vfmsub213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc]
+          vfmsub231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18]
+          vfmsub231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc]
+          vfmsub231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18]
+          vfmsub231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc]
+          vfnmadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18]
+          vfnmadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc]
+          vfnmadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18]
+          vfnmadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc]
+          vfnmadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18]
+          vfnmadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc]
+          vfnmadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18]
+          vfnmadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc]
+          vfnmadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18]
+          vfnmadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc]
+          vfnmadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18]
+          vfnmadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc]
+          vfnmsub132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18]
+          vfnmsub132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc]
+          vfnmsub132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18]
+          vfnmsub132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc]
+          vfnmsub213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18]
+          vfnmsub213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc]
+          vfnmsub213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18]
+          vfnmsub213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc]
+          vfnmsub231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18]
+          vfnmsub231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc]
+          vfnmsub231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18]
+          vfnmsub231ps  (%rax), %ymm10, %ymm11
+
diff --git a/test/MC/X86/x86_64-imm-widths.s b/test/MC/X86/x86_64-imm-widths.s
new file mode 100644
index 000000000000..97b60ff3aff8
--- /dev/null
+++ b/test/MC/X86/x86_64-imm-widths.s
@@ -0,0 +1,105 @@
+// RUN: llvm-mc -triple x86_64- --show-encoding %s | FileCheck %s
+
+// CHECK: addb $0, %al
+// CHECK: encoding: [0x04,0x00]
+ 	addb $0x00, %al
+
+// CHECK: addb $127, %al
+// CHECK: encoding: [0x04,0x7f]
+ 	addb $0x7F, %al
+
+// CHECK: addb $128, %al
+// CHECK: encoding: [0x04,0x80]
+ 	addb $0x80, %al
+
+// CHECK: addb $255, %al
+// CHECK: encoding: [0x04,0xff]
+ 	addb $0xFF, %al
+
+// CHECK: addw $0, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x00]
+ 	addw $0x0000, %ax
+
+// CHECK: addw $127, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x7f]
+ 	addw $0x007F, %ax
+
+// CHECK: addw $65408, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x80]
+ 	addw $0xFF80, %ax
+
+// CHECK: addw $65535, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0xff]
+	addw $0xFFFF, %ax
+
+// CHECK: addl $0, %eax
+// CHECK: encoding: [0x83,0xc0,0x00]
+ 	addl $0x00000000, %eax
+
+// CHECK: addl $127, %eax
+// CHECK: encoding: [0x83,0xc0,0x7f]
+ 	addl $0x0000007F, %eax
+
+// CHECK: addl $65408, %eax
+// CHECK: encoding: [0x05,0x80,0xff,0x00,0x00]
+ 	addl $0xFF80, %eax
+
+// CHECK: addl $65535, %eax
+// CHECK: encoding: [0x05,0xff,0xff,0x00,0x00]
+	addl $0xFFFF, %eax
+
+// CHECK: addl $4294967168, %eax
+// CHECK: encoding: [0x83,0xc0,0x80]
+ 	addl $0xFFFFFF80, %eax
+
+// CHECK: addl $4294967295, %eax
+// CHECK: encoding: [0x83,0xc0,0xff]
+ 	addl $0xFFFFFFFF, %eax
+
+// CHECK: addq $0, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x00]
+ 	addq $0x0000000000000000, %rax
+
+// CHECK: addq $127, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x7f]
+ 	addq $0x000000000000007F, %rax
+
+// CHECK: addq $-128, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x80]
+ 	addq $0xFFFFFFFFFFFFFF80, %rax
+
+// CHECK: addq $-1, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0xff]
+ 	addq $0xFFFFFFFFFFFFFFFF, %rax
+
+// CHECK: addq $0, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x00]
+ 	addq $0x0000000000000000, %rax
+
+// CHECK: addq $65408, %rax
+// CHECK: encoding: [0x48,0x05,0x80,0xff,0x00,0x00]
+ 	addq $0xFF80, %rax
+
+// CHECK: addq $65535, %rax
+// CHECK: encoding: [0x48,0x05,0xff,0xff,0x00,0x00]
+	addq $0xFFFF, %rax
+
+// CHECK: movabsq $4294967168, %rax
+// CHECK: encoding: [0x48,0xb8,0x80,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+ 	movq $0xFFFFFF80, %rax
+
+// CHECK: movabsq $4294967295, %rax
+// CHECK: encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+        movq $0xFFFFFFFF, %rax
+
+// CHECK: addq $2147483647, %rax
+// CHECK: encoding: [0x48,0x05,0xff,0xff,0xff,0x7f]
+ 	addq $0x000000007FFFFFFF, %rax
+
+// CHECK: addq $-2147483648, %rax
+// CHECK: encoding: [0x48,0x05,0x00,0x00,0x00,0x80]
+	addq $0xFFFFFFFF80000000, %rax
+
+// CHECK: addq $-256, %rax
+// CHECK: encoding: [0x48,0x05,0x00,0xff,0xff,0xff]
+ 	addq $0xFFFFFFFFFFFFFF00, %rax
diff --git a/test/MC/X86/x86_directives.s b/test/MC/X86/x86_directives.s
new file mode 100644
index 000000000000..2950c8cd5f12
--- /dev/null
+++ b/test/MC/X86/x86_directives.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .short 3
+TEST0:  
+        .word 3
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
new file mode 100644
index 000000000000..183306be2c11
--- /dev/null
+++ b/test/MC/X86/x86_errors.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err
+// RUN: FileCheck < %t.err %s
+
+// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq')
+cmp $0, 0(%eax)
diff --git a/test/MC/X86/x86_operands.s b/test/MC/X86/x86_operands.s
new file mode 100644
index 000000000000..b34713db8637
--- /dev/null
+++ b/test/MC/X86/x86_operands.s
@@ -0,0 +1,58 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# Immediates
+# CHECK: addl $1, %eax
+        addl $1, %eax
+# CHECK: addl $3, %eax
+        addl $(1+2), %eax
+# CHECK: addl $a, %eax
+        addl $a, %eax
+# CHECK: addl $3, %eax
+        addl $1 + 2, %eax
+        
+# Disambiguation
+
+# CHECK: addl $1, 8
+        addl $1, 4+4
+# CHECK: addl $1, 8
+        addl $1, (4+4)
+# CHECK: addl $1, 8(%eax)
+        addl $1, 4+4(%eax)
+# CHECK: addl $1, 8(%eax)
+        addl $1, (4+4)(%eax)
+# CHECK: addl $1, 8(%eax)
+        addl $1, 8(%eax)
+# CHECK: addl $1, (%eax)
+        addl $1, (%eax)
+# CHECK: addl $1, 8(,%eax)
+        addl $1, (4+4)(,%eax)
+        
+# Indirect Memory Operands
+# CHECK: addl $1, 1(%eax)
+        addl $1, 1(%eax)
+# CHECK: addl $1, 1(%eax,%ebx)
+        addl $1, 1(%eax,%ebx)
+# CHECK: addl $1, 1(%eax,%ebx)
+        addl $1, 1(%eax,%ebx,)
+# CHECK: addl $1, 1(%eax,%ebx,4)
+        addl $1, 1(%eax,%ebx,4)
+# CHECK: addl $1, 1(,%ebx)
+        addl $1, 1(,%ebx)
+# CHECK: addl $1, 1(,%ebx)
+        addl $1, 1(,%ebx,)
+# CHECK: addl $1, 1(,%ebx,4)
+        addl $1, 1(,%ebx,4)
+# CHECK: addl $1, 1(,%ebx,4)
+        addl $1, 1(,%ebx,(2+2))
+
+# '*'
+# CHECK: calll a
+        call a
+# CHECK: calll *%eax
+        call *%eax
+# CHECK: calll *4(%eax)
+        call *4(%eax)
+
+# CHECK: movl	%gs:8, %eax
+movl %gs:8, %eax
+
diff --git a/test/Makefile b/test/Makefile
index 7ca46beccc3f..b37bbfcc6f2a 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -33,6 +33,11 @@ else
 LIT_ARGS := -s -v
 endif
 
+# -jN causes crash on Cygwin's python.
+ifneq (,$(filter $(HOST_OS),Cygwin))
+  LIT_ARGS += -j1
+endif
+
 ifdef TESTSUITE
 LIT_TESTSUITE := $(TESTSUITE)
 CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE))
@@ -59,7 +64,7 @@ LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test
 
 # Force creation of Clang's lit.site.cfg.
 clang-lit-site-cfg: FORCE
-	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg Unit/lit.site.cfg
 extra-lit-site-cfgs:: clang-lit-site-cfg
 endif
 endif
@@ -138,6 +143,7 @@ site.exp: FORCE
 	@echo 'set llvmgcc_langs "$(LLVMGCC_LANGS)"' >> site.tmp
 	@echo 'set llvmtoolsdir "$(ToolDir)"' >>site.tmp
 	@echo 'set llvmlibsdir "$(LibDir)"' >>site.tmp
+	@echo 'set llvmshlibdir "$(SharedLibDir)"' >>site.tmp
 	@echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp
 	@echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp
 	@echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp
@@ -157,6 +163,7 @@ site.exp: FORCE
 	@echo 'set grep "$(GREP)"' >>site.tmp
 	@echo 'set gas "$(GAS)"' >>site.tmp
 	@echo 'set llvmdsymutil "$(DSYMUTIL)"' >>site.tmp
+	@echo 'set emitir "$(LLVMCC_EMITIR_FLAG)"' >>site.tmp
 	@echo '## All variables above are generated by configure. Do Not Edit ## ' >>site.tmp
 	@test ! -f site.exp || \
 	sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp
@@ -170,6 +177,7 @@ lit.site.cfg: site.exp
 	     -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
 	     -e "s#@LLVM_TOOLS_DIR@#$(ToolDir)#g" \
 	     -e "s#@LLVMGCCDIR@#$(LLVMGCCDIR)#g" \
+	     -e "s#@ENABLE_SHARED@#$(ENABLE_SHARED)#g" \
 	     $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 
 Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
@@ -180,6 +188,7 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
 	     -e "s#@LLVMGCCDIR@#$(LLVMGCCDIR)#g" \
 	     -e "s#@LLVM_BUILD_MODE@#$(BuildMode)#g" \
 	     -e "s#@ENABLE_SHARED@#$(ENABLE_SHARED)#g" \
+	     -e "s#@SHLIBDIR@#$(SharedLibDir)#g" \
 	     -e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \
 	     $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
 
diff --git a/test/Object/TestObjectFiles/trivial-object-test.coff-i386 b/test/Object/TestObjectFiles/trivial-object-test.coff-i386
new file mode 100644
index 000000000000..8cfd9949b1bd
Binary files /dev/null and b/test/Object/TestObjectFiles/trivial-object-test.coff-i386 differ
diff --git a/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64 b/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64
new file mode 100644
index 000000000000..077591482cea
Binary files /dev/null and b/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64 differ
diff --git a/test/Object/TestObjectFiles/trivial-object-test.elf-i386 b/test/Object/TestObjectFiles/trivial-object-test.elf-i386
new file mode 100644
index 000000000000..1a0ea40dfe12
Binary files /dev/null and b/test/Object/TestObjectFiles/trivial-object-test.elf-i386 differ
diff --git a/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64 b/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64
new file mode 100644
index 000000000000..889f5d96a699
Binary files /dev/null and b/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64 differ
diff --git a/test/Object/TestObjectFiles/trivial-object-test.macho-i386 b/test/Object/TestObjectFiles/trivial-object-test.macho-i386
new file mode 100644
index 000000000000..099bd1ebf23b
Binary files /dev/null and b/test/Object/TestObjectFiles/trivial-object-test.macho-i386 differ
diff --git a/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64 b/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64
new file mode 100644
index 000000000000..93eeb5deceb0
Binary files /dev/null and b/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64 differ
diff --git a/test/Object/dg.exp b/test/Object/dg.exp
new file mode 100644
index 000000000000..be82c513920e
--- /dev/null
+++ b/test/Object/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{test}]]
diff --git a/test/Object/nm-trivial-object.test-broken b/test/Object/nm-trivial-object.test-broken
new file mode 100644
index 000000000000..6de1780a1608
--- /dev/null
+++ b/test/Object/nm-trivial-object.test-broken
@@ -0,0 +1,19 @@
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN:         | FileCheck %s -check-prefix COFF
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN:         | FileCheck %s -check-prefix COFF
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+
+COFF: 00000000 d .data
+COFF: 00000000 t .text
+COFF: 00000000 d L{{_?}}.str
+COFF:          U {{_?}}SomeOtherFunction
+COFF: 00000000 T {{_?}}main
+COFF:          U {{_?}}puts
+
+ELF:          U SomeOtherFunction
+ELF: 00000000 T main
+ELF:          U puts
diff --git a/test/Object/objdump-trivial-object.test-broken b/test/Object/objdump-trivial-object.test-broken
new file mode 100644
index 000000000000..c4855fdfd682
--- /dev/null
+++ b/test/Object/objdump-trivial-object.test-broken
@@ -0,0 +1,54 @@
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN:              | FileCheck %s -check-prefix COFF-x86-64
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+
+COFF-i386: file format COFF-i386
+COFF-i386: Disassembly of section .text:
+COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+COFF-i386:       12:       e8 00 00 00 00                                  calll   0
+COFF-i386:       17:       e8 00 00 00 00                                  calll   0
+COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+COFF-i386:       23:       c3                                              ret
+
+COFF-x86-64: file format COFF-x86-64
+COFF-x86-64: Disassembly of section .text:
+COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
+COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
+COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
+COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
+COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
+COFF-x86-64:       25:       c3                                              ret
+
+
+ELF-i386: file format ELF32-i386
+ELF-i386: Disassembly of section .text:
+ELF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+ELF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+ELF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+ELF-i386:       12:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       17:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+ELF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+ELF-i386:       23:       c3                                              ret
+
+ELF-x86-64: file format ELF64-x86-64
+ELF-x86-64: Disassembly of section .text:
+ELF-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
+ELF-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+ELF-x86-64:        c:       bf 00 00 00 00                                  movl    $0, %edi
+ELF-x86-64:       11:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       16:       30 c0                                           xorb    %al, %al
+ELF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       1d:       8b 44 24 04                                     movl    4(%rsp), %eax
+ELF-x86-64:       21:       48 83 c4 08                                     addq    $8, %rsp
+ELF-x86-64:       25:       c3                                              ret
diff --git a/test/Other/2008-08-14-PassManager.ll b/test/Other/2008-08-14-PassManager.ll
deleted file mode 100644
index 8d6a6d825f10..000000000000
--- a/test/Other/2008-08-14-PassManager.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: opt < %s -loop-deletion -loop-index-split -disable-output
-; PR2640
-define i32 @test1() {
-       ret i32 0
-}
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
index 60fab3df0d9f..40a01cc3143e 100644
--- a/test/Other/close-stderr.ll
+++ b/test/Other/close-stderr.ll
@@ -4,6 +4,8 @@
 ; RUN:       " | FileCheck %s
 ; CHECK: {{^1$}}
 ; CHECK: {{^0$}}
+; XFAIL: vg_leak
+; REQUIRES: shell
 
 ; Test that the error handling when writing to stderr fails exits the
 ; program cleanly rather than aborting.
diff --git a/test/Other/extract.ll b/test/Other/extract.ll
new file mode 100644
index 000000000000..57573ed76f9a
--- /dev/null
+++ b/test/Other/extract.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-extract -func foo -S < %s | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
+; RUN: llvm-as < %s > %t
+; RUN: llvm-extract -func foo -S %t | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S %t | FileCheck --check-prefix=DELETE %s
+
+; llvm-extract uses lazy bitcode loading, so make sure it correctly reads
+; from bitcode files in addition to assembly files.
+
+; CHECK: define void @foo() {
+; CHECK:   ret void
+; CHECK: }
+
+; The linkonce_odr linkage for foo() should be changed to external linkage.
+; DELETE: declare void @foo()
+; DELETE: define void @bar() {
+; DELETE:   call void @foo()
+; DELETE:   ret void
+; DELETE: }
+
+define linkonce_odr void @foo() {
+  ret void
+}
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index fcef7ee2d571..4aa984e2e1b7 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -1,4 +1,4 @@
-; RUN: opt -lint -disable-output < %s |& FileCheck %s
+; RUN: opt -basicaa -lint -disable-output < %s |& FileCheck %s
 target datalayout = "e-p:64:64:64"
 
 declare fastcc void @bar()
diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py
index 0af3d368d5d0..36ec53932c63 100755
--- a/test/Scripts/coff-dump.py
+++ b/test/Scripts/coff-dump.py
@@ -13,216 +13,217 @@
 #
 
 def string_table_entry (offset):
-	return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s'))
+  return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s'))
 
 def secname(value):
-	if value[0] == '/':
-		return string_table_entry (value [1:].rstrip('\0'))
-	else:
-		return '%s'
+  if value[0] == '/':
+    return string_table_entry(value[1:].rstrip('\0'))
+  else:
+    return '%s'
 
 def symname(value):
-	parts = struct.unpack("<2L", value)
-	if parts [0] == 0:
-		return string_table_entry (parts [1])
-	else:
-		return '%s'
+  parts = struct.unpack("<2L", value)
+  if parts[0] == 0:
+    return string_table_entry(parts[1])
+  else:
+    return '%s'
 
 file = ('struct', [
-	('MachineType', ('enum', '<H', '0x%X', {
-		0x0:    'IMAGE_FILE_MACHINE_UNKNOWN',
-		0x1d3:  'IMAGE_FILE_MACHINE_AM33',
-		0x8664: 'IMAGE_FILE_MACHINE_AMD64',
-		0x1c0:  'IMAGE_FILE_MACHINE_ARM',
-		0xebc:  'IMAGE_FILE_MACHINE_EBC',
-		0x14c:  'IMAGE_FILE_MACHINE_I386',
-		0x200:  'IMAGE_FILE_MACHINE_IA64',
-		0x904:  'IMAGE_FILE_MACHINE_M32R',
-		0x266:  'IMAGE_FILE_MACHINE_MIPS16',
-		0x366:  'IMAGE_FILE_MACHINE_MIPSFPU',
-		0x466:  'IMAGE_FILE_MACHINE_MIPSFPU16',
-		0x1f0:  'IMAGE_FILE_MACHINE_POWERPC',
-		0x1f1:  'IMAGE_FILE_MACHINE_POWERPCFP',
-		0x166:  'IMAGE_FILE_MACHINE_R4000',
-		0x1a2:  'IMAGE_FILE_MACHINE_SH3',
-		0x1a3:  'IMAGE_FILE_MACHINE_SH3DSP',
-		0x1a6:  'IMAGE_FILE_MACHINE_SH4',
-		0x1a8:  'IMAGE_FILE_MACHINE_SH5',
-		0x1c2:  'IMAGE_FILE_MACHINE_THUMB',
-		0x169:  'IMAGE_FILE_MACHINE_WCEMIPSV2',
-	})),
-	('NumberOfSections',     ('scalar',  '<H', '%d')),
-	('TimeDateStamp',        ('scalar',  '<L', '%d')),
-	('PointerToSymbolTable', ('scalar',  '<L', '0x%0X')),
-	('NumberOfSymbols',      ('scalar',  '<L', '%d')),
-	('SizeOfOptionalHeader', ('scalar',  '<H', '%d')),
-	('Characteristics',      ('flags',   '<H', '0x%x', [
-		(0x0001,      'IMAGE_FILE_RELOCS_STRIPPED',         ),
-		(0x0002,      'IMAGE_FILE_EXECUTABLE_IMAGE',        ),
-		(0x0004,      'IMAGE_FILE_LINE_NUMS_STRIPPED',      ),
-		(0x0008,      'IMAGE_FILE_LOCAL_SYMS_STRIPPED',     ),
-		(0x0010,      'IMAGE_FILE_AGGRESSIVE_WS_TRIM',      ),
-		(0x0020,      'IMAGE_FILE_LARGE_ADDRESS_AWARE',     ),
-		(0x0080,      'IMAGE_FILE_BYTES_REVERSED_LO',       ),
-		(0x0100,      'IMAGE_FILE_32BIT_MACHINE',           ),
-		(0x0200,      'IMAGE_FILE_DEBUG_STRIPPED',          ),
-		(0x0400,      'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ),
-		(0x0800,      'IMAGE_FILE_NET_RUN_FROM_SWAP',       ),
-		(0x1000,      'IMAGE_FILE_SYSTEM',                  ),
-		(0x2000,      'IMAGE_FILE_DLL',                     ),
-		(0x4000,      'IMAGE_FILE_UP_SYSTEM_ONLY',          ),
-		(0x8000,      'IMAGE_FILE_BYTES_REVERSED_HI',       ),
-	])),
-	('Sections', ('array', 'NumberOfSections', ('struct', [
-		('Name',                 ('scalar',  '<8s', secname)),
-		('VirtualSize',          ('scalar',  '<L',  '%d'   )),
-		('VirtualAddress',       ('scalar',  '<L',  '%d'   )),
-		('SizeOfRawData',        ('scalar',  '<L',  '%d'   )),
-		('PointerToRawData',     ('scalar',  '<L',  '0x%X' )),
-		('PointerToRelocations', ('scalar',  '<L',  '0x%X' )),
-		('PointerToLineNumbers', ('scalar',  '<L',  '0x%X' )),
-		('NumberOfRelocations',  ('scalar',  '<H',  '%d'   )),
-		('NumberOfLineNumbers',  ('scalar',  '<H',  '%d'   )),
-		('Charateristics',       ('flags',   '<L',  '0x%X', [
-			(0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'),
-			(0x00000020, 'IMAGE_SCN_CNT_CODE'),
-			(0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'),
-			(0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'),
-			(0x00000100, 'IMAGE_SCN_LNK_OTHER'),
-			(0x00000200, 'IMAGE_SCN_LNK_INFO'),
-			(0x00000800, 'IMAGE_SCN_LNK_REMOVE'),
-			(0x00001000, 'IMAGE_SCN_LNK_COMDAT'),
-			(0x00008000, 'IMAGE_SCN_GPREL'),
-			(0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'),
-			(0x00020000, 'IMAGE_SCN_MEM_16BIT'),
-			(0x00040000, 'IMAGE_SCN_MEM_LOCKED'),
-			(0x00080000, 'IMAGE_SCN_MEM_PRELOAD'),
-			(0x00F00000, 'IMAGE_SCN_ALIGN', {
-				0x00100000: 'IMAGE_SCN_ALIGN_1BYTES',
-				0x00200000: 'IMAGE_SCN_ALIGN_2BYTES',
-				0x00300000: 'IMAGE_SCN_ALIGN_4BYTES',
-				0x00400000: 'IMAGE_SCN_ALIGN_8BYTES',
-				0x00500000: 'IMAGE_SCN_ALIGN_16BYTES',
-				0x00600000: 'IMAGE_SCN_ALIGN_32BYTES',
-				0x00700000: 'IMAGE_SCN_ALIGN_64BYTES',
-				0x00800000: 'IMAGE_SCN_ALIGN_128BYTES',
-				0x00900000: 'IMAGE_SCN_ALIGN_256BYTES',
-				0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES',
-				0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES',
-				0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES',
-				0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES',
-				0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES',
-			}),
-			(0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'),
-			(0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'),
-			(0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'),
-			(0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'),
-			(0x10000000, 'IMAGE_SCN_MEM_SHARED'),
-			(0x20000000, 'IMAGE_SCN_MEM_EXECUTE'),
-			(0x40000000, 'IMAGE_SCN_MEM_READ'),
-			(0x80000000, 'IMAGE_SCN_MEM_WRITE'),
-		])),
-		('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))),
-		('Relocations', ('ptr', 'PointerToRelocations', ('array', 'NumberOfRelocations', ('struct', [
-			('VirtualAddress',   ('scalar', '<L', '0x%X')),
-			('SymbolTableIndex', ('scalar', '<L', '%d'  )),
-			('Type',             ('enum', '<H', '%d', ('MachineType', {
-				0x14c: {
-					0x0000: 'IMAGE_REL_I386_ABSOLUTE',
-					0x0001: 'IMAGE_REL_I386_DIR16',
-					0x0002: 'IMAGE_REL_I386_REL16',
-					0x0006: 'IMAGE_REL_I386_DIR32',
-					0x0007: 'IMAGE_REL_I386_DIR32NB',
-					0x0009: 'IMAGE_REL_I386_SEG12',
-					0x000A: 'IMAGE_REL_I386_SECTION',
-					0x000B: 'IMAGE_REL_I386_SECREL',
-					0x000C: 'IMAGE_REL_I386_TOKEN',
-					0x000D: 'IMAGE_REL_I386_SECREL7',
-					0x0014: 'IMAGE_REL_I386_REL32',
-				},
-				0x8664: {
-					0x0000: 'IMAGE_REL_AMD64_ABSOLUTE',
-					0x0001: 'IMAGE_REL_AMD64_ADDR64',
-					0x0002: 'IMAGE_REL_AMD64_ADDR32',
-					0x0003: 'IMAGE_REL_AMD64_ADDR32NB',
-					0x0004: 'IMAGE_REL_AMD64_REL32',
-					0x0005: 'IMAGE_REL_AMD64_REL32_1',
-					0x0006: 'IMAGE_REL_AMD64_REL32_2',
-					0x0007: 'IMAGE_REL_AMD64_REL32_3',
-					0x0008: 'IMAGE_REL_AMD64_REL32_4',
-					0x0009: 'IMAGE_REL_AMD64_REL32_5',
-					0x000A: 'IMAGE_REL_AMD64_SECTION',
-					0x000B: 'IMAGE_REL_AMD64_SECREL',
-					0x000C: 'IMAGE_REL_AMD64_SECREL7',
-					0x000D: 'IMAGE_REL_AMD64_TOKEN',
-					0x000E: 'IMAGE_REL_AMD64_SREL32',
-					0x000F: 'IMAGE_REL_AMD64_PAIR',
-					0x0010: 'IMAGE_REL_AMD64_SSPAN32',
-				},
-			}))),
-			('SymbolName',       ('ptr', '+ PointerToSymbolTable * - SymbolTableIndex 1 18', ('scalar',  '<8s', symname)))
-		])))),
-	]))),
-	('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '* NumberOfSymbols 18',  ('struct', [
-		('Name',                ('scalar',  '<8s', symname)),
-		('Value',               ('scalar',  '<L',  '%d'   )),
-		('SectionNumber',       ('scalar',  '<H',  '%d'   )),
-		('SimpleType',          ('enum',    '<B',  '%d', {
-			0: 'IMAGE_SYM_TYPE_NULL',
-			1: 'IMAGE_SYM_TYPE_VOID',
-			2: 'IMAGE_SYM_TYPE_CHAR',
-			3: 'IMAGE_SYM_TYPE_SHORT',
-			4: 'IMAGE_SYM_TYPE_INT',
-			5: 'IMAGE_SYM_TYPE_LONG',
-			6: 'IMAGE_SYM_TYPE_FLOAT',
-			7: 'IMAGE_SYM_TYPE_DOUBLE',
-			8: 'IMAGE_SYM_TYPE_STRUCT',
-			9: 'IMAGE_SYM_TYPE_UNION',
-			10: 'IMAGE_SYM_TYPE_ENUM',
-			11: 'IMAGE_SYM_TYPE_MOE',
-			12: 'IMAGE_SYM_TYPE_BYTE',
-			13: 'IMAGE_SYM_TYPE_WORD',
-			14: 'IMAGE_SYM_TYPE_UINT',
-			15: 'IMAGE_SYM_TYPE_DWORD',
-		})),
-		('ComplexType',         ('enum',    '<B',  '%d', {
-			0: 'IMAGE_SYM_DTYPE_NULL',
-			1: 'IMAGE_SYM_DTYPE_POINTER',
-			2: 'IMAGE_SYM_DTYPE_FUNCTION',
-			3: 'IMAGE_SYM_DTYPE_ARRAY',
-		})),
-		('StorageClass',        ('enum',    '<B',  '%d', {
-			-1:	'IMAGE_SYM_CLASS_END_OF_FUNCTION',
-			0: 'IMAGE_SYM_CLASS_NULL',
-			1: 'IMAGE_SYM_CLASS_AUTOMATIC',
-			2: 'IMAGE_SYM_CLASS_EXTERNAL',
-			3: 'IMAGE_SYM_CLASS_STATIC',
-			4: 'IMAGE_SYM_CLASS_REGISTER',
-			5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF',
-			6: 'IMAGE_SYM_CLASS_LABEL',
-			7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL',
-			8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT',
-			9: 'IMAGE_SYM_CLASS_ARGUMENT',
-			10: 'IMAGE_SYM_CLASS_STRUCT_TAG',
-			11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION',
-			12: 'IMAGE_SYM_CLASS_UNION_TAG',
-			13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION',
-			14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC',
-			15: 'IMAGE_SYM_CLASS_ENUM_TAG',
-			16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM',
-			17: 'IMAGE_SYM_CLASS_REGISTER_PARAM',
-			18: 'IMAGE_SYM_CLASS_BIT_FIELD',
-			100: 'IMAGE_SYM_CLASS_BLOCK',
-			101: 'IMAGE_SYM_CLASS_FUNCTION',
-			102: 'IMAGE_SYM_CLASS_END_OF_STRUCT',
-			103: 'IMAGE_SYM_CLASS_FILE',
-			104: 'IMAGE_SYM_CLASS_SECTION',
-			105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL',
-			107: 'IMAGE_SYM_CLASS_CLR_TOKEN',
-		})),
-		('NumberOfAuxSymbols',  ('scalar',  '<B',  '%d'  )),
-		('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')),
-	])))),
+  ('MachineType', ('enum', '<H', '0x%X', {
+    0x0:    'IMAGE_FILE_MACHINE_UNKNOWN',
+    0x1d3:  'IMAGE_FILE_MACHINE_AM33',
+    0x8664: 'IMAGE_FILE_MACHINE_AMD64',
+    0x1c0:  'IMAGE_FILE_MACHINE_ARM',
+    0xebc:  'IMAGE_FILE_MACHINE_EBC',
+    0x14c:  'IMAGE_FILE_MACHINE_I386',
+    0x200:  'IMAGE_FILE_MACHINE_IA64',
+    0x904:  'IMAGE_FILE_MACHINE_M32R',
+    0x266:  'IMAGE_FILE_MACHINE_MIPS16',
+    0x366:  'IMAGE_FILE_MACHINE_MIPSFPU',
+    0x466:  'IMAGE_FILE_MACHINE_MIPSFPU16',
+    0x1f0:  'IMAGE_FILE_MACHINE_POWERPC',
+    0x1f1:  'IMAGE_FILE_MACHINE_POWERPCFP',
+    0x166:  'IMAGE_FILE_MACHINE_R4000',
+    0x1a2:  'IMAGE_FILE_MACHINE_SH3',
+    0x1a3:  'IMAGE_FILE_MACHINE_SH3DSP',
+    0x1a6:  'IMAGE_FILE_MACHINE_SH4',
+    0x1a8:  'IMAGE_FILE_MACHINE_SH5',
+    0x1c2:  'IMAGE_FILE_MACHINE_THUMB',
+    0x169:  'IMAGE_FILE_MACHINE_WCEMIPSV2',
+  })),
+  ('NumberOfSections',     ('scalar',  '<H', '%d')),
+  ('TimeDateStamp',        ('scalar',  '<L', '%d')),
+  ('PointerToSymbolTable', ('scalar',  '<L', '0x%0X')),
+  ('NumberOfSymbols',      ('scalar',  '<L', '%d')),
+  ('SizeOfOptionalHeader', ('scalar',  '<H', '%d')),
+  ('Characteristics',      ('flags',   '<H', '0x%x', [
+    (0x0001,      'IMAGE_FILE_RELOCS_STRIPPED',         ),
+    (0x0002,      'IMAGE_FILE_EXECUTABLE_IMAGE',        ),
+    (0x0004,      'IMAGE_FILE_LINE_NUMS_STRIPPED',      ),
+    (0x0008,      'IMAGE_FILE_LOCAL_SYMS_STRIPPED',     ),
+    (0x0010,      'IMAGE_FILE_AGGRESSIVE_WS_TRIM',      ),
+    (0x0020,      'IMAGE_FILE_LARGE_ADDRESS_AWARE',     ),
+    (0x0080,      'IMAGE_FILE_BYTES_REVERSED_LO',       ),
+    (0x0100,      'IMAGE_FILE_32BIT_MACHINE',           ),
+    (0x0200,      'IMAGE_FILE_DEBUG_STRIPPED',          ),
+    (0x0400,      'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ),
+    (0x0800,      'IMAGE_FILE_NET_RUN_FROM_SWAP',       ),
+    (0x1000,      'IMAGE_FILE_SYSTEM',                  ),
+    (0x2000,      'IMAGE_FILE_DLL',                     ),
+    (0x4000,      'IMAGE_FILE_UP_SYSTEM_ONLY',          ),
+    (0x8000,      'IMAGE_FILE_BYTES_REVERSED_HI',       ),
+  ])),
+  ('Sections', ('array', '1', 'NumberOfSections', ('struct', [
+    ('Name',                 ('scalar',  '<8s', secname)),
+    ('VirtualSize',          ('scalar',  '<L',  '%d'   )),
+    ('VirtualAddress',       ('scalar',  '<L',  '%d'   )),
+    ('SizeOfRawData',        ('scalar',  '<L',  '%d'   )),
+    ('PointerToRawData',     ('scalar',  '<L',  '0x%X' )),
+    ('PointerToRelocations', ('scalar',  '<L',  '0x%X' )),
+    ('PointerToLineNumbers', ('scalar',  '<L',  '0x%X' )),
+    ('NumberOfRelocations',  ('scalar',  '<H',  '%d'   )),
+    ('NumberOfLineNumbers',  ('scalar',  '<H',  '%d'   )),
+    ('Charateristics',       ('flags',   '<L',  '0x%X', [
+      (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'),
+      (0x00000020, 'IMAGE_SCN_CNT_CODE'),
+      (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'),
+      (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'),
+      (0x00000100, 'IMAGE_SCN_LNK_OTHER'),
+      (0x00000200, 'IMAGE_SCN_LNK_INFO'),
+      (0x00000800, 'IMAGE_SCN_LNK_REMOVE'),
+      (0x00001000, 'IMAGE_SCN_LNK_COMDAT'),
+      (0x00008000, 'IMAGE_SCN_GPREL'),
+      (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'),
+      (0x00020000, 'IMAGE_SCN_MEM_16BIT'),
+      (0x00040000, 'IMAGE_SCN_MEM_LOCKED'),
+      (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'),
+      (0x00F00000, 'IMAGE_SCN_ALIGN', {
+        0x00100000: 'IMAGE_SCN_ALIGN_1BYTES',
+        0x00200000: 'IMAGE_SCN_ALIGN_2BYTES',
+        0x00300000: 'IMAGE_SCN_ALIGN_4BYTES',
+        0x00400000: 'IMAGE_SCN_ALIGN_8BYTES',
+        0x00500000: 'IMAGE_SCN_ALIGN_16BYTES',
+        0x00600000: 'IMAGE_SCN_ALIGN_32BYTES',
+        0x00700000: 'IMAGE_SCN_ALIGN_64BYTES',
+        0x00800000: 'IMAGE_SCN_ALIGN_128BYTES',
+        0x00900000: 'IMAGE_SCN_ALIGN_256BYTES',
+        0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES',
+        0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES',
+        0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES',
+        0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES',
+        0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES',
+      }),
+      (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'),
+      (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'),
+      (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'),
+      (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'),
+      (0x10000000, 'IMAGE_SCN_MEM_SHARED'),
+      (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'),
+      (0x40000000, 'IMAGE_SCN_MEM_READ'),
+      (0x80000000, 'IMAGE_SCN_MEM_WRITE'),
+    ])),
+    ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))),
+    ('Relocations', ('ptr', 'PointerToRelocations', ('array', '0', 'NumberOfRelocations', ('struct', [
+      ('VirtualAddress',   ('scalar', '<L', '0x%X')),
+      ('SymbolTableIndex', ('scalar', '<L', '%d'  )),
+      ('Type',             ('enum', '<H', '%d', ('MachineType', {
+        0x14c: {
+          0x0000: 'IMAGE_REL_I386_ABSOLUTE',
+          0x0001: 'IMAGE_REL_I386_DIR16',
+          0x0002: 'IMAGE_REL_I386_REL16',
+          0x0006: 'IMAGE_REL_I386_DIR32',
+          0x0007: 'IMAGE_REL_I386_DIR32NB',
+          0x0009: 'IMAGE_REL_I386_SEG12',
+          0x000A: 'IMAGE_REL_I386_SECTION',
+          0x000B: 'IMAGE_REL_I386_SECREL',
+          0x000C: 'IMAGE_REL_I386_TOKEN',
+          0x000D: 'IMAGE_REL_I386_SECREL7',
+          0x0014: 'IMAGE_REL_I386_REL32',
+        },
+        0x8664: {
+          0x0000: 'IMAGE_REL_AMD64_ABSOLUTE',
+          0x0001: 'IMAGE_REL_AMD64_ADDR64',
+          0x0002: 'IMAGE_REL_AMD64_ADDR32',
+          0x0003: 'IMAGE_REL_AMD64_ADDR32NB',
+          0x0004: 'IMAGE_REL_AMD64_REL32',
+          0x0005: 'IMAGE_REL_AMD64_REL32_1',
+          0x0006: 'IMAGE_REL_AMD64_REL32_2',
+          0x0007: 'IMAGE_REL_AMD64_REL32_3',
+          0x0008: 'IMAGE_REL_AMD64_REL32_4',
+          0x0009: 'IMAGE_REL_AMD64_REL32_5',
+          0x000A: 'IMAGE_REL_AMD64_SECTION',
+          0x000B: 'IMAGE_REL_AMD64_SECREL',
+          0x000C: 'IMAGE_REL_AMD64_SECREL7',
+          0x000D: 'IMAGE_REL_AMD64_TOKEN',
+          0x000E: 'IMAGE_REL_AMD64_SREL32',
+          0x000F: 'IMAGE_REL_AMD64_PAIR',
+          0x0010: 'IMAGE_REL_AMD64_SSPAN32',
+        },
+      }))),
+      ('SymbolName',       ('ptr', '+ PointerToSymbolTable * SymbolTableIndex 18', ('scalar',  '<8s', symname)))
+    ])))),
+  ]))),
+  ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '18', '* NumberOfSymbols 18',  ('struct', [
+    ('Name',                ('scalar',  '<8s', symname)),
+    ('Value',               ('scalar',  '<L',  '%d'   )),
+    ('SectionNumber',       ('scalar',  '<H',  '%d'   )),
+    ('_Type',               ('scalar',  '<H',  None   )),
+    ('SimpleType',          ('enum',    '& _Type 15',  '%d', {
+      0: 'IMAGE_SYM_TYPE_NULL',
+      1: 'IMAGE_SYM_TYPE_VOID',
+      2: 'IMAGE_SYM_TYPE_CHAR',
+      3: 'IMAGE_SYM_TYPE_SHORT',
+      4: 'IMAGE_SYM_TYPE_INT',
+      5: 'IMAGE_SYM_TYPE_LONG',
+      6: 'IMAGE_SYM_TYPE_FLOAT',
+      7: 'IMAGE_SYM_TYPE_DOUBLE',
+      8: 'IMAGE_SYM_TYPE_STRUCT',
+      9: 'IMAGE_SYM_TYPE_UNION',
+      10: 'IMAGE_SYM_TYPE_ENUM',
+      11: 'IMAGE_SYM_TYPE_MOE',
+      12: 'IMAGE_SYM_TYPE_BYTE',
+      13: 'IMAGE_SYM_TYPE_WORD',
+      14: 'IMAGE_SYM_TYPE_UINT',
+      15: 'IMAGE_SYM_TYPE_DWORD',
+    })),                                # (Type & 0xF0) >> 4
+    ('ComplexType',         ('enum',    '>> & _Type 240 4',  '%d', {
+      0: 'IMAGE_SYM_DTYPE_NULL',
+      1: 'IMAGE_SYM_DTYPE_POINTER',
+      2: 'IMAGE_SYM_DTYPE_FUNCTION',
+      3: 'IMAGE_SYM_DTYPE_ARRAY',
+    })),
+    ('StorageClass',        ('enum',    '<B',  '%d', {
+      -1:  'IMAGE_SYM_CLASS_END_OF_FUNCTION',
+      0: 'IMAGE_SYM_CLASS_NULL',
+      1: 'IMAGE_SYM_CLASS_AUTOMATIC',
+      2: 'IMAGE_SYM_CLASS_EXTERNAL',
+      3: 'IMAGE_SYM_CLASS_STATIC',
+      4: 'IMAGE_SYM_CLASS_REGISTER',
+      5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF',
+      6: 'IMAGE_SYM_CLASS_LABEL',
+      7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL',
+      8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT',
+      9: 'IMAGE_SYM_CLASS_ARGUMENT',
+      10: 'IMAGE_SYM_CLASS_STRUCT_TAG',
+      11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION',
+      12: 'IMAGE_SYM_CLASS_UNION_TAG',
+      13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION',
+      14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC',
+      15: 'IMAGE_SYM_CLASS_ENUM_TAG',
+      16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM',
+      17: 'IMAGE_SYM_CLASS_REGISTER_PARAM',
+      18: 'IMAGE_SYM_CLASS_BIT_FIELD',
+      100: 'IMAGE_SYM_CLASS_BLOCK',
+      101: 'IMAGE_SYM_CLASS_FUNCTION',
+      102: 'IMAGE_SYM_CLASS_END_OF_STRUCT',
+      103: 'IMAGE_SYM_CLASS_FILE',
+      104: 'IMAGE_SYM_CLASS_SECTION',
+      105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL',
+      107: 'IMAGE_SYM_CLASS_CLR_TOKEN',
+    })),
+    ('NumberOfAuxSymbols',  ('scalar',  '<B',  '%d'  )),
+    ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')),
+  ])))),
 ])
 
 #
@@ -239,328 +240,351 @@ Indent = 0
 NewLine = True
 
 def indent():
-	global Indent
-	Indent += 1
+  global Indent
+  Indent += 1
 
 def dedent():
-	global Indent
-	Indent -= 1
+  global Indent
+  Indent -= 1
 
 def write(input):
-	global NewLine
-	output = ""
-	
-	for char in input:
-		
-		if NewLine:
-			output += Indent * '  '
-			NewLine = False
-			
-		output += char
-		
-		if char == '\n':
-			NewLine = True
-	
-	sys.stdout.write (output)
+  global NewLine
+  output = ""
+
+  for char in input:
+
+    if NewLine:
+      output += Indent * '  '
+      NewLine = False
+
+    output += char
+
+    if char == '\n':
+      NewLine = True
+
+  sys.stdout.write(output)
 
 def read(format):
-	return struct.unpack (format, Input.read(struct.calcsize(format)))
-
-def read_cstr ():
-	output = ""
-	while True:
-		char = Input.read (1)
-		if len (char) == 0:
-			raise RuntimeError ("EOF while reading cstr")
-		if char == '\0':
-			break
-		output += char
-	return output
+  return struct.unpack(format, Input.read(struct.calcsize(format)))
+
+def read_cstr():
+  output = ""
+  while True:
+    char = Input.read(1)
+    if len(char) == 0:
+      raise RuntimeError ("EOF while reading cstr")
+    if char == '\0':
+      break
+    output += char
+  return output
 
 def push_pos(seek_to = None):
-	Stack [0:0] = [Input.tell ()]
-	if seek_to:
-		Input.seek (seek_to)
+  Stack [0:0] = [Input.tell()]
+  if seek_to:
+    Input.seek(seek_to)
 
 def pop_pos():
-	assert(len (Stack) > 0)
-	Input.seek (Stack [0])
-	del Stack [0]
+  assert(len(Stack) > 0)
+  Input.seek(Stack[0])
+  del Stack[0]
 
 def print_binary_data(size):
-	value = ""
-	while size > 0:
-		if size >= 16:
-			data = Input.read(16)
-			size -= 16
-		else:
-			data = Input.read(size)
-			size = 0
-		value += data
-		bytes = ""
-		text = ""
-		for index in xrange (16):
-			if index < len (data):
-				if index == 8:
-					bytes += "- "
-				ch = ord (data [index])
-				bytes += "%02X " % ch
-				if ch >= 0x20 and ch <= 0x7F:
-					text += data [index]
-				else:
-					text += "."
-			else:
-				if index == 8:
-					bytes += "  "
-				bytes += "   "
-		
-		write ("%s|%s|\n" % (bytes, text))
-	return value
-
-idlit = re.compile ("[a-zA-Z][a-zA-Z0-9_-]*")
-numlit = re.compile ("[0-9]+")
+  value = ""
+  while size > 0:
+    if size >= 16:
+      data = Input.read(16)
+      size -= 16
+    else:
+      data = Input.read(size)
+      size = 0
+    value += data
+    bytes = ""
+    text = ""
+    for index in xrange(16):
+      if index < len(data):
+        if index == 8:
+          bytes += "- "
+        ch = ord(data[index])
+        bytes += "%02X " % ch
+        if ch >= 0x20 and ch <= 0x7F:
+          text += data[index]
+        else:
+          text += "."
+      else:
+        if index == 8:
+          bytes += "  "
+        bytes += "   "
+
+    write("%s|%s|\n" % (bytes, text))
+  return value
+
+idlit = re.compile("[a-zA-Z_][a-zA-Z0-9_-]*")
+numlit = re.compile("[0-9]+")
 
 def read_value(expr):
 
-	input = iter (expr.split ())
-	
-	def eval():
-		
-		token = input.next ()
-		
-		if expr == 'cstr':
-			return read_cstr ()
-		if expr == 'true':
-			return True
-		if expr == 'false':
-			return False
-		
-		if len (token) > 1 and token [0] in ('=', '@', '<', '!', '>'):
-			val = read(expr)
-			assert (len (val) == 1)
-			return val [0]
-		
-		if token == '+':
-			return eval () + eval ()
-		if token == '-':
-			return eval () - eval ()
-		if token == '*':
-			return eval () * eval ()
-		if token == '/':
-			return eval () / eval ()
-		
-		if idlit.match (token):
-			return Fields [token]
-		if numlit.match (token):
-			return int (token)
-		
-		raise RuntimeError ("unexpected token %s" % repr(token))
-	
-	value = eval ()
-	
-	try:
-		input.next ()
-	except StopIteration:
-		return value
-	raise RuntimeError("unexpected input at end of expression")
+  input = iter(expr.split())
+
+  def eval():
+
+    token = input.next()
+
+    if expr == 'cstr':
+      return read_cstr()
+    if expr == 'true':
+      return True
+    if expr == 'false':
+      return False
+
+    if token == '+':
+      return eval() + eval()
+    if token == '-':
+      return eval() - eval()
+    if token == '*':
+      return eval() * eval()
+    if token == '/':
+      return eval() / eval()
+    if token == '&':
+      return eval() & eval()
+    if token == '|':
+      return eval() | eval()
+    if token == '>>':
+      return eval() >> eval()
+    if token == '<<':
+      return eval() << eval()
+
+    if len(token) > 1 and token[0] in ('=', '@', '<', '!', '>'):
+      val = read(expr)
+      assert(len(val) == 1)
+      return val[0]
+
+    if idlit.match(token):
+      return Fields[token]
+    if numlit.match(token):
+      return int(token)
+
+    raise RuntimeError("unexpected token %s" % repr(token))
+
+  value = eval()
+
+  try:
+    input.next()
+  except StopIteration:
+    return value
+  raise RuntimeError("unexpected input at end of expression")
 
 def write_value(format,value):
-	format_type = type (format)
-	if format_type is types.StringType:
-		write (format%value)
-	elif format_type is types.FunctionType:
-		write_value (format (value), value)
-	elif format_type is types.TupleType:
-		Fields ['this'] = value
-		handle_element (format)
-	else:
-		raise RuntimeError("unexpected type: %s" % repr(format_type))
+  format_type = type(format)
+  if format_type is types.StringType:
+    write(format % value)
+  elif format_type is types.FunctionType:
+    write_value(format(value), value)
+  elif format_type is types.TupleType:
+    Fields['this'] = value
+    handle_element(format)
+  elif format_type is types.NoneType:
+    pass
+  else:
+    raise RuntimeError("unexpected type: %s" % repr(format_type))
 
 def handle_scalar(entry):
-	iformat = entry [1]
-	oformat = entry [2]
-	
-	value = read_value (iformat)
-	
-	write_value (oformat, value)
-	
-	return value
+  iformat = entry[1]
+  oformat = entry[2]
+
+  value = read_value(iformat)
+
+  write_value(oformat, value)
+
+  return value
 
 def handle_enum(entry):
-	iformat = entry [1]
-	oformat = entry [2]
-	definitions = entry [3]
-	
-	value = read_value (iformat)
-	
-	if type (definitions) is types.TupleType:
-		selector = read_value (definitions [0])
-		definitions = definitions [1] [selector]
-	
-	if value in definitions:
-		description = definitions[value]
-	else:
-		description = "unknown"
-
-	write ("%s (" % description)
-	write_value (oformat, value)
-	write (")")
-	
-	return value
+  iformat = entry[1]
+  oformat = entry[2]
+  definitions = entry[3]
+
+  value = read_value(iformat)
+
+  if type(definitions) is types.TupleType:
+    selector = read_value(definitions[0])
+    definitions = definitions[1][selector]
+
+  if value in definitions:
+    description = definitions[value]
+  else:
+    description = "unknown"
+
+  write("%s (" % description)
+  write_value(oformat, value)
+  write(")")
+
+  return value
 
 def handle_flags(entry):
-	iformat = entry [1]
-	oformat = entry [2]
-	definitions = entry [3]
-	
-	value = read_value (iformat)
-	
-	write_value (oformat, value)
-	
-	indent ()
-	for entry in definitions:
-		mask = entry [0]
-		name = entry [1]
-		if len (entry) == 3:
-			map = entry [2]
-			selection = value & mask
-			if selection in map:
-				write("\n%s" % map[selection])
-			else:
-				write("\n%s <%d>" % (name, selection))
-		elif len (entry) == 2:
-			if value & mask != 0:
-				write("\n%s" % name)
-	dedent ()
-	
-	return value
+  iformat = entry[1]
+  oformat = entry[2]
+  definitions = entry[3]
+
+  value = read_value(iformat)
+
+  write_value(oformat, value)
+
+  indent()
+  for entry in definitions:
+    mask = entry[0]
+    name = entry[1]
+    if len (entry) == 3:
+      map = entry[2]
+      selection = value & mask
+      if selection in map:
+        write("\n%s" % map[selection])
+      else:
+        write("\n%s <%d>" % (name, selection))
+    elif len(entry) == 2:
+      if value & mask != 0:
+        write("\n%s" % name)
+  dedent()
+
+  return value
 
 def handle_struct(entry):
-	global Fields
-	members = entry [1]
-	
-	newFields = {}
-	
-	write ("{\n");
-	indent ()
-	
-	for member in members:
-		name = member [0]
-		type = member [1]
-		
-		write("%s = "%name.ljust(24))
-		
-		value = handle_element(type)
-		
-		write("\n")
-		
-		Fields [name] = value
-		newFields [name] = value
-	
-	dedent ()
-	write ("}")
-	
-	return newFields
+  global Fields
+  members = entry[1]
+
+  newFields = {}
+
+  write("{\n");
+  indent()
+
+  for member in members:
+    name = member[0]
+    type = member[1]
+
+    if name[0] != "_":
+      write("%s = " % name.ljust(24))
+
+    value = handle_element(type)
+
+    if name[0] != "_":
+      write("\n")
+
+    Fields[name] = value
+    newFields[name] = value
+
+  dedent()
+  write("}")
+
+  return newFields
 
 def handle_array(entry):
-	length = entry [1]
-	element = entry [2]
-	
-	newItems = []
-	
-	write ("[\n")
-	indent ()
-	
-	value = read_value (length)
-	
-	for index in xrange (value):
-		write ("%d = "%index)
-		value = handle_element(element)
-		write ("\n")
-		newItems.append (value)
-	
-	dedent ()
-	write ("]")
-	
-	return newItems
+  start_index = entry[1]
+  length = entry[2]
+  element = entry[3]
+
+  newItems = []
+
+  write("[\n")
+  indent()
+
+  start_index = read_value(start_index)
+  value = read_value(length)
+
+  for index in xrange(value):
+    write("%d = " % (index + start_index))
+    value = handle_element(element)
+    write("\n")
+    newItems.append(value)
+
+  dedent()
+  write("]")
+
+  return newItems
 
 def handle_byte_array(entry):
-	length = entry [1]
-	element = entry [2]
-	
-	newItems = []
-	
-	write ("[\n")
-	indent ()
-	
-	value = read_value (length)
-	end_of_array = Input.tell () + value
-
-	index = 0
-	while Input.tell () < end_of_array:
-		write ("%d = "%index)
-		value = handle_element(element)
-		write ("\n")
-		newItems.append (value)
-		index += 1
-	
-	dedent ()
-	write ("]")
-	
-	return newItems
+  ent_size = entry[1]
+  length = entry[2]
+  element = entry[3]
+
+  newItems = []
+
+  write("[\n")
+  indent()
+
+  item_size = read_value(ent_size)
+  value = read_value(length)
+  end_of_array = Input.tell() + value
+
+  prev_loc = Input.tell()
+  index = 0
+  while Input.tell() < end_of_array:
+    write("%d = " % index)
+    value = handle_element(element)
+    write("\n")
+    newItems.append(value)
+    index += (Input.tell() - prev_loc) / item_size
+    prev_loc = Input.tell()
+
+  dedent()
+  write("]")
+
+  return newItems
 
 def handle_ptr(entry):
-	offset = entry[1]
-	element = entry [2]
-	
-	value = None
-	offset = read_value (offset)
-	
-	if offset != 0:
-		
-		push_pos (offset)
-		
-		value = handle_element (element)
-		
-		pop_pos ()
-	
-	else:
-		write ("None")
-	
-	return value
+  offset = entry[1]
+  element = entry[2]
+
+  value = None
+  offset = read_value(offset)
+
+  if offset != 0:
+
+    push_pos(offset)
+
+    value = handle_element(element)
+
+    pop_pos()
+
+  else:
+    write("None")
+
+  return value
 
 def handle_blob(entry):
-	length = entry [1]
-	
-	write ("\n")
-	indent ()
-	
-	value = print_binary_data (read_value (length))
-	
-	dedent ()
-	
-	return value
+  length = entry[1]
+
+  write("\n")
+  indent()
+
+  value = print_binary_data(read_value(length))
+
+  dedent()
+
+  return value
 
 def handle_element(entry):
-	handlers = {
-		'struct':      handle_struct,
-		'scalar':      handle_scalar,
-		'enum':        handle_enum,
-		'flags':       handle_flags,
-		'ptr':         handle_ptr,
-		'blob':        handle_blob,
-		'array':       handle_array,
-		'byte-array':  handle_byte_array,
-	}
-	
-	if not entry [0] in handlers:
-		raise RuntimeError ("unexpected type '%s'" % str (entry[0]))
-	
-	return handlers [entry [0]] (entry)
-
-Input = open (sys.argv [1], "rb")
+  handlers = {
+    'struct':      handle_struct,
+    'scalar':      handle_scalar,
+    'enum':        handle_enum,
+    'flags':       handle_flags,
+    'ptr':         handle_ptr,
+    'blob':        handle_blob,
+    'array':       handle_array,
+    'byte-array':  handle_byte_array,
+  }
+
+  if not entry[0] in handlers:
+    raise RuntimeError ("unexpected type '%s'" % str (entry[0]))
+
+  return handlers[entry[0]](entry)
+
+if len(sys.argv) <= 1 or sys.argv[1] == '-':
+  import StringIO
+  Input = StringIO.StringIO(sys.stdin.read())
+else:
+  Input = open (sys.argv[1], "rb")
+
 try:
-	handle_element (file)
+  handle_element(file)
 finally:
-	Input.close ()
-	Input = None
+  Input.close()
+  Input = None
diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat
index cc83eba1c446..56428e1a605c 100644
--- a/test/Scripts/coff-dump.py.bat
+++ b/test/Scripts/coff-dump.py.bat
@@ -1,4 +1,7 @@
 @echo off
 
-%PYTHON_EXECUTABLE% %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9
+@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
+@rem this in code, but I haven't found a way to do this in 2.6 yet.
+
+%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9
 
diff --git a/test/Scripts/common_dump.py b/test/Scripts/common_dump.py
new file mode 100644
index 000000000000..3d69c3fb27f2
--- /dev/null
+++ b/test/Scripts/common_dump.py
@@ -0,0 +1,46 @@
+def dataToHex(d):
+    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
+    """
+    bytes = []
+    for i,c in enumerate(d):
+        byte = ord(c)
+        hex_byte = hex(byte)[2:]
+        if byte <= 0xf:
+            hex_byte = '0' + hex_byte
+        if i % 4 == 3:
+            hex_byte += ' '
+        bytes.append(hex_byte)
+    return ''.join(bytes).strip()
+
+def dataToHexUnified(d):
+    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
+    Each 4byte number is prefixed with 0x for easy sed/rx
+    Fixme: convert all MC tests to use this routine instead of the above
+    """
+    bytes = []
+    for i,c in enumerate(d):
+        byte = ord(c)
+        hex_byte = hex(byte)[2:]
+        if byte <= 0xf:
+            hex_byte = '0' + hex_byte
+        if i % 4 == 0:
+            hex_byte = '0x' + hex_byte
+        if i % 4 == 3:
+            hex_byte += ' '
+        bytes.append(hex_byte)
+    return ''.join(bytes).strip()
+
+
+def HexDump(val, numBits=32):
+    """
+    1. do not print 'L'
+    2. Handle negatives and large numbers by mod (2^numBits)
+    3. print fixed length, prepend with zeros.
+       Length is exactly 2+(numBits/4)
+    4. Do print 0x Why?
+       so that they can be easily distinguished using sed/rx
+    """
+    val = val & (( 1 << numBits) - 1)
+    newFmt = "0x%0" + "%d" % (numBits / 4) + "x"
+    return newFmt % val
+
diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump
new file mode 100755
index 000000000000..76cdbf91c73f
--- /dev/null
+++ b/test/Scripts/elf-dump
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import StringIO
+
+import common_dump
+
+class Reader:
+    def __init__(self, path):
+        if path == "-":
+            # Snarf all the data so we can seek.
+            self.file = StringIO.StringIO(sys.stdin.read())
+        else:
+            self.file = open(path, "rb")
+        self.isLSB = None
+        self.is64Bit = None
+
+    def seek(self, pos):
+        self.file.seek(pos)
+
+    def read(self, N):
+        data = self.file.read(N)
+        if len(data) != N:
+            raise ValueError, "Out of data!"
+        return data
+
+    def read8(self):
+        return ord(self.read(1))
+
+    def read16(self):
+        return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+    def read32(self):
+        return struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+
+    def read32S(self):
+        return struct.unpack('><'[self.isLSB] + 'i', self.read(4))[0]
+
+    def read64(self):
+        return struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
+
+    def read64S(self):
+        return struct.unpack('><'[self.isLSB] + 'q', self.read(8))[0]
+
+    def readWord(self):
+        if self.is64Bit:
+            return self.read64()
+        else:
+            return self.read32()
+
+    def readWordS(self):
+        if self.is64Bit:
+            return self.read64S()
+        else:
+            return self.read32S()
+
+class StringTable:
+    def __init__(self, strings):
+       self.string_table = strings
+
+    def __getitem__(self, index):
+       end = self.string_table.index('\x00', index)
+       return self.string_table[index:end]
+
+class Section:
+    def __init__(self, f):
+        self.sh_name = f.read32()
+        self.sh_type = f.read32()
+        self.sh_flags = f.readWord()
+        self.sh_addr = f.readWord()
+        self.sh_offset = f.readWord()
+        self.sh_size = f.readWord()
+        self.sh_link = f.read32()
+        self.sh_info = f.read32()
+        self.sh_addralign = f.readWord()
+        self.sh_entsize = f.readWord()
+
+    def dump(self, shstrtab, f, strtab, dumpdata):
+        print "  (('sh_name', %s)" % common_dump.HexDump(self.sh_name), "# %r" % shstrtab[self.sh_name]
+        print "   ('sh_type', %s)" % common_dump.HexDump(self.sh_type)
+        print "   ('sh_flags', %s)" % common_dump.HexDump(self.sh_flags)
+        print "   ('sh_addr', %s)" % common_dump.HexDump(self.sh_addr)
+        print "   ('sh_offset', %s)" % common_dump.HexDump(self.sh_offset)
+        print "   ('sh_size', %s)" % common_dump.HexDump(self.sh_size)
+        print "   ('sh_link', %s)" % common_dump.HexDump(self.sh_link)
+        print "   ('sh_info', %s)" % common_dump.HexDump(self.sh_info)
+        print "   ('sh_addralign', %s)" % common_dump.HexDump(self.sh_addralign)
+        print "   ('sh_entsize', %s)" % common_dump.HexDump(self.sh_entsize)
+        if self.sh_type == 2: # SHT_SYMTAB
+            print "   ('_symbols', ["
+            dumpSymtab(f, self, strtab)
+            print "   ])"
+        elif self.sh_type == 4 or self.sh_type == 9: # SHT_RELA / SHT_REL
+            print "   ('_relocations', ["
+            dumpRel(f, self, self.sh_type == 4)
+            print "   ])"
+        elif dumpdata:
+            f.seek(self.sh_offset)
+            if self.sh_type != 8: # != SHT_NOBITS
+                data = f.read(self.sh_size)
+                print "   ('_section_data', '%s')" % common_dump.dataToHex(data)
+            else:
+                print "   ('_section_data', '')" 
+        print "  ),"
+
+def dumpSymtab(f, section, strtab):
+    entries = section.sh_size // section.sh_entsize
+
+    for index in range(entries):
+        f.seek(section.sh_offset + index * section.sh_entsize)
+        print "    # Symbol %s" % common_dump.HexDump(index)
+        name = f.read32()
+        print "    (('st_name', %s)" % common_dump.HexDump(name), "# %r" % strtab[name]
+        if not f.is64Bit:
+            print "     ('st_value', %s)" % common_dump.HexDump(f.read32())
+            print "     ('st_size', %s)" % common_dump.HexDump(f.read32())
+        st_info = f.read8()
+        print "     ('st_bind', %s)" % common_dump.HexDump((st_info >> 4))
+        print "     ('st_type', %s)" % common_dump.HexDump((st_info & 0xf))
+        print "     ('st_other', %s)" % common_dump.HexDump(f.read8())
+        print "     ('st_shndx', %s)" % common_dump.HexDump(f.read16())
+        if f.is64Bit:
+            print "     ('st_value', %s)" % common_dump.HexDump(f.read64(), 64)
+            print "     ('st_size', %s)" % common_dump.HexDump(f.read64(), 64)
+        print "    ),"
+
+def dumpRel(f, section, dumprela = False):
+    entries = section.sh_size // section.sh_entsize
+
+    for index in range(entries):
+        f.seek(section.sh_offset + index * section.sh_entsize)
+        print "    # Relocation %s" % common_dump.HexDump(index)
+        print "    (('r_offset', %s)" % common_dump.HexDump(f.readWord())
+        r_info = f.readWord()
+        if f.is64Bit:
+            print "     ('r_sym', %s)" % common_dump.HexDump((r_info >> 32))
+            print "     ('r_type', %s)" % common_dump.HexDump((r_info & 0xffffffff))
+        else:
+            print "     ('r_sym', %s)" % common_dump.HexDump((r_info >> 8))
+            print "     ('r_type', %s)" % common_dump.HexDump((r_info & 0xff))
+        if dumprela:
+            print "     ('r_addend', %s)" % common_dump.HexDump(f.readWordS())
+        print "    ),"
+
+def dumpELF(path, opts):
+    f = Reader(path)
+
+    magic = f.read(4)
+    assert magic == '\x7FELF'
+
+    fileclass = f.read8()
+    if fileclass == 1: # ELFCLASS32
+        f.is64Bit = False
+    elif fileclass == 2: # ELFCLASS64
+        f.is64Bit = True
+    else:
+        raise ValueError, "Unknown file class %s" % common_dump.HexDump(fileclass)
+    print "('e_indent[EI_CLASS]', %s)" % common_dump.HexDump(fileclass)
+
+    byteordering = f.read8()
+    if byteordering == 1: # ELFDATA2LSB
+        f.isLSB = True
+    elif byteordering == 2: # ELFDATA2MSB
+        f.isLSB = False
+    else:
+        raise ValueError, "Unknown byte ordering %s" % common_dump.HexDump(byteordering)
+    print "('e_indent[EI_DATA]', %s)" % common_dump.HexDump(byteordering)
+
+    print "('e_indent[EI_VERSION]', %s)" % common_dump.HexDump(f.read8())
+    print "('e_indent[EI_OSABI]', %s)" % common_dump.HexDump(f.read8())
+    print "('e_indent[EI_ABIVERSION]', %s)" % common_dump.HexDump(f.read8())
+
+    f.seek(16) # Seek to end of e_ident.
+
+    print "('e_type', %s)" % common_dump.HexDump(f.read16())
+    print "('e_machine', %s)" % common_dump.HexDump(f.read16())
+    print "('e_version', %s)" % common_dump.HexDump(f.read32())
+    print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
+    print "('e_phoff', %s)" % common_dump.HexDump(f.readWord())
+    e_shoff = f.readWord()
+    print "('e_shoff', %s)" % common_dump.HexDump(e_shoff)
+    print "('e_flags', %s)" % common_dump.HexDump(f.read32())
+    print "('e_ehsize', %s)" % common_dump.HexDump(f.read16())
+    print "('e_phentsize', %s)" % common_dump.HexDump(f.read16())
+    print "('e_phnum', %s)" % common_dump.HexDump(f.read16())
+    e_shentsize = f.read16()
+    print "('e_shentsize', %s)" % common_dump.HexDump(e_shentsize)
+    e_shnum = f.read16()
+    print "('e_shnum', %s)" % common_dump.HexDump(e_shnum)
+    e_shstrndx = f.read16()
+    print "('e_shstrndx', %s)" % common_dump.HexDump(e_shstrndx)
+
+    # Read all section headers
+    sections = []
+    for index in range(e_shnum):
+        f.seek(e_shoff + index * e_shentsize)
+        s = Section(f)
+        sections.append(s)
+
+    # Read .shstrtab so we can resolve section names
+    f.seek(sections[e_shstrndx].sh_offset)
+    shstrtab = StringTable(f.read(sections[e_shstrndx].sh_size))
+
+    # Get the symbol string table
+    strtab = None
+    for section in sections:
+        if shstrtab[section.sh_name] == ".strtab":
+            f.seek(section.sh_offset)
+            strtab = StringTable(f.read(section.sh_size))
+            break
+
+    print "('_sections', ["
+    for index in range(e_shnum):
+        print "  # Section %s" % common_dump.HexDump(index)
+        sections[index].dump(shstrtab, f, strtab, opts.dumpSectionData)
+    print "])"
+
+if __name__ == "__main__":
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {files}")
+    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
+                      help="Dump the contents of sections",
+                      action="store_true", default=False)
+    (opts, args) = parser.parse_args()
+
+    if not args:
+        args.append('-')
+
+    for arg in args:
+        dumpELF(arg, opts)
diff --git a/test/Scripts/elf-dump.bat b/test/Scripts/elf-dump.bat
new file mode 100644
index 000000000000..9c708083b302
--- /dev/null
+++ b/test/Scripts/elf-dump.bat
@@ -0,0 +1,7 @@
+@echo off
+
+@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
+@rem this in code, but I haven't found a way to do this in 2.6 yet.
+
+%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\elf-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
+
diff --git a/test/Scripts/macho-dump b/test/Scripts/macho-dump
deleted file mode 100755
index 72f833975d04..000000000000
--- a/test/Scripts/macho-dump
+++ /dev/null
@@ -1,289 +0,0 @@
-#!/usr/bin/env python
-
-import struct
-import sys
-import StringIO
-
-class Reader:
-   def __init__(self, path):
-      if path == '-':
-         # Snarf all the data so we can seek.
-         self.file = StringIO.StringIO(sys.stdin.read())
-      else:
-         self.file = open(path,'rb')
-      self.isLSB = None
-      self.is64Bit = None
-
-      self.string_table = None
-
-   def tell(self):
-      return self.file.tell()
-
-   def seek(self, pos):
-      self.file.seek(pos)
-
-   def read(self, N):
-      data = self.file.read(N)
-      if len(data) != N:
-         raise ValueError,"Out of data!"
-      return data
-
-   def read8(self):
-      return ord(self.read(1))
-
-   def read16(self):
-      return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
-
-   def read32(self):
-      # Force to 32-bit, if possible; otherwise these might be long ints on a
-      # big-endian platform. FIXME: Why???
-      Value = struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
-      return int(Value)
-
-   def read64(self):
-      return struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
-
-   def registerStringTable(self, strings):
-      if self.string_table is not None:
-         raise ValueError,"%s: warning: multiple string tables" % sys.argv[0]
-
-      self.string_table = strings
-
-   def getString(self, index):
-      if self.string_table is None:
-         raise ValueError,"%s: warning: no string table registered" % sys.argv[0]
-      
-      end = self.string_table.index('\x00', index)
-      return self.string_table[index:end]
-
-def dumpmacho(path, opts):
-   f = Reader(path)
-
-   magic = f.read(4)
-   if magic == '\xFE\xED\xFA\xCE':
-      f.isLSB, f.is64Bit = False, False
-   elif magic == '\xCE\xFA\xED\xFE':
-      f.isLSB, f.is64Bit = True, False
-   elif magic == '\xFE\xED\xFA\xCF':
-      f.isLSB, f.is64Bit = False, True
-   elif magic == '\xCF\xFA\xED\xFE':
-      f.isLSB, f.is64Bit = True, True
-   else:
-      raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
-
-   print "('cputype', %r)" % f.read32()
-   print "('cpusubtype', %r)" % f.read32()
-   filetype = f.read32()
-   print "('filetype', %r)" % filetype
-   
-   numLoadCommands = f.read32()
-   print "('num_load_commands', %r)" % filetype
-
-   loadCommandsSize = f.read32()
-   print "('load_commands_size', %r)" % loadCommandsSize
-
-   print "('flag', %r)" % f.read32()
-
-   if f.is64Bit:
-      print "('reserved', %r)" % f.read32()
-
-   start = f.tell()
-
-   print "('load_commands', ["
-   for i in range(numLoadCommands):
-      dumpLoadCommand(f, i, opts)
-   print "])"
-
-   if f.tell() - start != loadCommandsSize:
-      raise ValueError,"%s: warning: invalid load commands size: %r" % (
-         sys.argv[0], loadCommandsSize)
-
-def dumpLoadCommand(f, i, opts):
-   start = f.tell()
-
-   print "  # Load Command %r" % i
-   cmd = f.read32()
-   print " (('command', %r)" % cmd
-   cmdSize = f.read32()
-   print "  ('size', %r)" % cmdSize
-
-   if cmd == 1:
-      dumpSegmentLoadCommand(f, opts, False)
-   elif cmd == 2:
-      dumpSymtabCommand(f, opts)
-   elif cmd == 11:
-      dumpDysymtabCommand(f, opts)
-   elif cmd == 25:
-      dumpSegmentLoadCommand(f, opts, True)
-   elif cmd == 27:
-      import uuid
-      print "  ('uuid', %s)" % uuid.UUID(bytes=f.read(16))
-   else:
-      print >>sys.stderr,"%s: warning: unknown load command: %r" % (
-         sys.argv[0], cmd)
-      f.read(cmdSize - 8)
-   print " ),"
-
-   if f.tell() - start != cmdSize:
-      raise ValueError,"%s: warning: invalid load command size: %r" % (
-         sys.argv[0], cmdSize)
-
-def dumpSegmentLoadCommand(f, opts, is64Bit):
-   print "  ('segment_name', %r)" % f.read(16) 
-   if is64Bit:
-      print "  ('vm_addr', %r)" % f.read64()
-      print "  ('vm_size', %r)" % f.read64()
-      print "  ('file_offset', %r)" % f.read64()
-      print "  ('file_size', %r)" % f.read64()
-   else:
-      print "  ('vm_addr', %r)" % f.read32()
-      print "  ('vm_size', %r)" % f.read32()
-      print "  ('file_offset', %r)" % f.read32()
-      print "  ('file_size', %r)" % f.read32()
-   print "  ('maxprot', %r)" % f.read32()
-   print "  ('initprot', %r)" % f.read32()
-   numSections = f.read32()
-   print "  ('num_sections', %r)" % numSections
-   print "  ('flags', %r)" % f.read32()
-
-   print "  ('sections', ["
-   for i in range(numSections):
-      dumpSection(f, i, opts, is64Bit)
-   print "  ])"
-
-def dumpSymtabCommand(f, opts):
-   symoff = f.read32()
-   print "  ('symoff', %r)" % symoff
-   nsyms = f.read32()
-   print "  ('nsyms', %r)" % nsyms
-   stroff = f.read32()
-   print "  ('stroff', %r)" % stroff
-   strsize = f.read32()
-   print "  ('strsize', %r)" % strsize
-
-   prev_pos = f.tell()
-
-   f.seek(stroff)
-   string_data = f.read(strsize)
-   print "  ('_string_data', %r)" % string_data
-
-   f.registerStringTable(string_data)
-
-   f.seek(symoff)
-   print "  ('_symbols', ["
-   for i in range(nsyms):
-      dumpNlist32(f, i, opts)
-   print "  ])"
-      
-   f.seek(prev_pos)
-
-def dumpNlist32(f, i, opts):
-   print "    # Symbol %r" % i
-   n_strx = f.read32()
-   print "   (('n_strx', %r)" % n_strx
-   n_type = f.read8()
-   print "    ('n_type', %#x)" % n_type
-   n_sect = f.read8()
-   print "    ('n_sect', %r)" % n_sect
-   n_desc = f.read16()
-   print "    ('n_desc', %r)" % n_desc
-   if f.is64Bit:
-      n_value = f.read64()
-      print "    ('n_value', %r)" % n_value
-   else:
-      n_value = f.read32()
-      print "    ('n_value', %r)" % n_value
-   print "    ('_string', %r)" % f.getString(n_strx)
-   print "   ),"
-
-def dumpDysymtabCommand(f, opts):   
-   print "  ('ilocalsym', %r)" % f.read32()
-   print "  ('nlocalsym', %r)" % f.read32()
-   print "  ('iextdefsym', %r)" % f.read32()
-   print "  ('nextdefsym', %r)" % f.read32()
-   print "  ('iundefsym', %r)" % f.read32()
-   print "  ('nundefsym', %r)" % f.read32()
-   print "  ('tocoff', %r)" % f.read32()
-   print "  ('ntoc', %r)" % f.read32()
-   print "  ('modtaboff', %r)" % f.read32()
-   print "  ('nmodtab', %r)" % f.read32()
-   print "  ('extrefsymoff', %r)" % f.read32()
-   print "  ('nextrefsyms', %r)" % f.read32()
-   indirectsymoff = f.read32()
-   print "  ('indirectsymoff', %r)" % indirectsymoff
-   nindirectsyms = f.read32()
-   print "  ('nindirectsyms', %r)" % nindirectsyms
-   print "  ('extreloff', %r)" % f.read32()
-   print "  ('nextrel', %r)" % f.read32()
-   print "  ('locreloff', %r)" % f.read32()
-   print "  ('nlocrel', %r)" % f.read32()
-
-   prev_pos = f.tell()
-
-   f.seek(indirectsymoff)
-   print "  ('_indirect_symbols', ["
-   for i in range(nindirectsyms):
-      print "    # Indirect Symbol %r" % i
-      print "    (('symbol_index', %#x),)," % f.read32()
-   print "  ])"
-      
-   f.seek(prev_pos)
-
-def dumpSection(f, i, opts, is64Bit):
-   print "    # Section %r" % i
-   print "   (('section_name', %r)" % f.read(16)
-   print "    ('segment_name', %r)" % f.read(16)
-   if is64Bit:
-      print "    ('address', %r)" % f.read64()
-      size = f.read64()
-      print "    ('size', %r)" % size
-   else:
-      print "    ('address', %r)" % f.read32()
-      size = f.read32()
-      print "    ('size', %r)" % size
-   offset = f.read32()
-   print "    ('offset', %r)" % offset
-   print "    ('alignment', %r)" % f.read32()   
-   reloc_offset = f.read32()
-   print "    ('reloc_offset', %r)" % reloc_offset
-   num_reloc = f.read32()
-   print "    ('num_reloc', %r)" % num_reloc
-   print "    ('flags', %#x)" % f.read32()
-   print "    ('reserved1', %r)" % f.read32()
-   print "    ('reserved2', %r)" % f.read32()
-   if is64Bit:
-      print "    ('reserved3', %r)" % f.read32()
-   print "   ),"
-
-   prev_pos = f.tell()
-
-   f.seek(reloc_offset)
-   print "  ('_relocations', ["
-   for i in range(num_reloc):
-      print "    # Relocation %r" % i
-      print "    (('word-0', %#x)," % f.read32()
-      print "     ('word-1', %#x))," % f.read32()
-   print "  ])"
-
-   if opts.dumpSectionData:
-      f.seek(offset)
-      print "  ('_section_data', %r)" % f.read(size)
-      
-   f.seek(prev_pos)
-   
-def main():
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("usage: %prog [options] {files}")
-    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
-                      help="Dump the contents of sections",
-                      action="store_true", default=False)    
-    (opts, args) = parser.parse_args()
-
-    if not args:
-       args.append('-')
-
-    for arg in args:
-       dumpmacho(arg, opts)
-
-if __name__ == '__main__':
-   main()
diff --git a/test/Scripts/macho-dump.bat b/test/Scripts/macho-dump.bat
deleted file mode 100644
index 81484f67d702..000000000000
--- a/test/Scripts/macho-dump.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\macho-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/Scripts/macho-dumpx b/test/Scripts/macho-dumpx
new file mode 100755
index 000000000000..71e06d837b90
--- /dev/null
+++ b/test/Scripts/macho-dumpx
@@ -0,0 +1,294 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import StringIO
+
+import common_dump
+
+class Reader:
+   def __init__(self, path):
+      if path == '-':
+         # Snarf all the data so we can seek.
+         self.file = StringIO.StringIO(sys.stdin.read())
+      else:
+         self.file = open(path,'rb')
+      self.isLSB = None
+      self.is64Bit = None
+
+      self.string_table = None
+
+   def tell(self):
+      return self.file.tell()
+
+   def seek(self, pos):
+      self.file.seek(pos)
+
+   def read(self, N):
+      data = self.file.read(N)
+      if len(data) != N:
+         raise ValueError,"Out of data!"
+      return data
+
+   def read8(self):
+      return ord(self.read(1))
+
+   def read16(self):
+      return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+   def read32(self):
+      # Force to 32-bit, if possible; otherwise these might be long ints on a
+      # big-endian platform. FIXME: Why???
+      Value = struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+      return int(Value)
+
+   def read64(self):
+      Value = struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
+      if Value == int(Value):
+         Value = int(Value)
+      return Value
+
+   def registerStringTable(self, strings):
+      if self.string_table is not None:
+         raise ValueError,"%s: warning: multiple string tables" % sys.argv[0]
+
+      self.string_table = strings
+
+   def getString(self, index):
+      if self.string_table is None:
+         raise ValueError,"%s: warning: no string table registered" % sys.argv[0]
+      
+      end = self.string_table.index('\x00', index)
+      return self.string_table[index:end]
+
+def dumpmacho(path, opts):
+   f = Reader(path)
+
+   magic = f.read(4)
+   if magic == '\xFE\xED\xFA\xCE':
+      f.isLSB, f.is64Bit = False, False
+   elif magic == '\xCE\xFA\xED\xFE':
+      f.isLSB, f.is64Bit = True, False
+   elif magic == '\xFE\xED\xFA\xCF':
+      f.isLSB, f.is64Bit = False, True
+   elif magic == '\xCF\xFA\xED\xFE':
+      f.isLSB, f.is64Bit = True, True
+   else:
+      raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
+
+   print "('cputype', %r)" % f.read32()
+   print "('cpusubtype', %r)" % f.read32()
+   filetype = f.read32()
+   print "('filetype', %r)" % filetype
+   
+   numLoadCommands = f.read32()
+   print "('num_load_commands', %r)" % numLoadCommands
+
+   loadCommandsSize = f.read32()
+   print "('load_commands_size', %r)" % loadCommandsSize
+
+   print "('flag', %r)" % f.read32()
+
+   if f.is64Bit:
+      print "('reserved', %r)" % f.read32()
+
+   start = f.tell()
+
+   print "('load_commands', ["
+   for i in range(numLoadCommands):
+      dumpLoadCommand(f, i, opts)
+   print "])"
+
+   if f.tell() - start != loadCommandsSize:
+      raise ValueError,"%s: warning: invalid load commands size: %r" % (
+         sys.argv[0], loadCommandsSize)
+
+def dumpLoadCommand(f, i, opts):
+   start = f.tell()
+
+   print "  # Load Command %r" % i
+   cmd = f.read32()
+   print " (('command', %r)" % cmd
+   cmdSize = f.read32()
+   print "  ('size', %r)" % cmdSize
+
+   if cmd == 1:
+      dumpSegmentLoadCommand(f, opts, False)
+   elif cmd == 2:
+      dumpSymtabCommand(f, opts)
+   elif cmd == 11:
+      dumpDysymtabCommand(f, opts)
+   elif cmd == 25:
+      dumpSegmentLoadCommand(f, opts, True)
+   elif cmd == 27:
+      import uuid
+      print "  ('uuid', %s)" % uuid.UUID(bytes=f.read(16))
+   else:
+      print >>sys.stderr,"%s: warning: unknown load command: %r" % (
+         sys.argv[0], cmd)
+      f.read(cmdSize - 8)
+   print " ),"
+
+   if f.tell() - start != cmdSize:
+      raise ValueError,"%s: warning: invalid load command size: %r" % (
+         sys.argv[0], cmdSize)
+
+def dumpSegmentLoadCommand(f, opts, is64Bit):
+   print "  ('segment_name', %r)" % f.read(16) 
+   if is64Bit:
+      print "  ('vm_addr', %r)" % f.read64()
+      print "  ('vm_size', %r)" % f.read64()
+      print "  ('file_offset', %r)" % f.read64()
+      print "  ('file_size', %r)" % f.read64()
+   else:
+      print "  ('vm_addr', %r)" % f.read32()
+      print "  ('vm_size', %r)" % f.read32()
+      print "  ('file_offset', %r)" % f.read32()
+      print "  ('file_size', %r)" % f.read32()
+   print "  ('maxprot', %r)" % f.read32()
+   print "  ('initprot', %r)" % f.read32()
+   numSections = f.read32()
+   print "  ('num_sections', %r)" % numSections
+   print "  ('flags', %r)" % f.read32()
+
+   print "  ('sections', ["
+   for i in range(numSections):
+      dumpSection(f, i, opts, is64Bit)
+   print "  ])"
+
+def dumpSymtabCommand(f, opts):
+   symoff = f.read32()
+   print "  ('symoff', %r)" % symoff
+   nsyms = f.read32()
+   print "  ('nsyms', %r)" % nsyms
+   stroff = f.read32()
+   print "  ('stroff', %r)" % stroff
+   strsize = f.read32()
+   print "  ('strsize', %r)" % strsize
+
+   prev_pos = f.tell()
+
+   f.seek(stroff)
+   string_data = f.read(strsize)
+   print "  ('_string_data', %r)" % string_data
+
+   f.registerStringTable(string_data)
+
+   f.seek(symoff)
+   print "  ('_symbols', ["
+   for i in range(nsyms):
+      dumpNlist32(f, i, opts)
+   print "  ])"
+      
+   f.seek(prev_pos)
+
+def dumpNlist32(f, i, opts):
+   print "    # Symbol %r" % i
+   n_strx = f.read32()
+   print "   (('n_strx', %r)" % n_strx
+   n_type = f.read8()
+   print "    ('n_type', %#x)" % n_type
+   n_sect = f.read8()
+   print "    ('n_sect', %r)" % n_sect
+   n_desc = f.read16()
+   print "    ('n_desc', %r)" % n_desc
+   if f.is64Bit:
+      n_value = f.read64()
+      print "    ('n_value', %r)" % n_value
+   else:
+      n_value = f.read32()
+      print "    ('n_value', %r)" % n_value
+   print "    ('_string', %r)" % f.getString(n_strx)
+   print "   ),"
+
+def dumpDysymtabCommand(f, opts):   
+   print "  ('ilocalsym', %r)" % f.read32()
+   print "  ('nlocalsym', %r)" % f.read32()
+   print "  ('iextdefsym', %r)" % f.read32()
+   print "  ('nextdefsym', %r)" % f.read32()
+   print "  ('iundefsym', %r)" % f.read32()
+   print "  ('nundefsym', %r)" % f.read32()
+   print "  ('tocoff', %r)" % f.read32()
+   print "  ('ntoc', %r)" % f.read32()
+   print "  ('modtaboff', %r)" % f.read32()
+   print "  ('nmodtab', %r)" % f.read32()
+   print "  ('extrefsymoff', %r)" % f.read32()
+   print "  ('nextrefsyms', %r)" % f.read32()
+   indirectsymoff = f.read32()
+   print "  ('indirectsymoff', %r)" % indirectsymoff
+   nindirectsyms = f.read32()
+   print "  ('nindirectsyms', %r)" % nindirectsyms
+   print "  ('extreloff', %r)" % f.read32()
+   print "  ('nextrel', %r)" % f.read32()
+   print "  ('locreloff', %r)" % f.read32()
+   print "  ('nlocrel', %r)" % f.read32()
+
+   prev_pos = f.tell()
+
+   f.seek(indirectsymoff)
+   print "  ('_indirect_symbols', ["
+   for i in range(nindirectsyms):
+      print "    # Indirect Symbol %r" % i
+      print "    (('symbol_index', %#x),)," % f.read32()
+   print "  ])"
+      
+   f.seek(prev_pos)
+
+def dumpSection(f, i, opts, is64Bit):
+   print "    # Section %r" % i
+   print "   (('section_name', %r)" % f.read(16)
+   print "    ('segment_name', %r)" % f.read(16)
+   if is64Bit:
+      print "    ('address', %r)" % f.read64()
+      size = f.read64()
+      print "    ('size', %r)" % size
+   else:
+      print "    ('address', %r)" % f.read32()
+      size = f.read32()
+      print "    ('size', %r)" % size
+   offset = f.read32()
+   print "    ('offset', %r)" % offset
+   print "    ('alignment', %r)" % f.read32()   
+   reloc_offset = f.read32()
+   print "    ('reloc_offset', %r)" % reloc_offset
+   num_reloc = f.read32()
+   print "    ('num_reloc', %r)" % num_reloc
+   print "    ('flags', %#x)" % f.read32()
+   print "    ('reserved1', %r)" % f.read32()
+   print "    ('reserved2', %r)" % f.read32()
+   if is64Bit:
+      print "    ('reserved3', %r)" % f.read32()
+   print "   ),"
+
+   prev_pos = f.tell()
+
+   f.seek(reloc_offset)
+   print "  ('_relocations', ["
+   for i in range(num_reloc):
+      print "    # Relocation %r" % i
+      print "    (('word-0', %#x)," % f.read32()
+      print "     ('word-1', %#x))," % f.read32()
+   print "  ])"
+
+   if opts.dumpSectionData:
+      f.seek(offset)
+      print "  ('_section_data', '%s')" % common_dump.dataToHex(f.read(size))
+      
+   f.seek(prev_pos)
+   
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {files}")
+    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
+                      help="Dump the contents of sections",
+                      action="store_true", default=False)    
+    (opts, args) = parser.parse_args()
+
+    if not args:
+       args.append('-')
+
+    for arg in args:
+       dumpmacho(arg, opts)
+
+if __name__ == '__main__':
+   main()
diff --git a/test/Scripts/macho-dumpx.bat b/test/Scripts/macho-dumpx.bat
new file mode 100644
index 000000000000..81484f67d702
--- /dev/null
+++ b/test/Scripts/macho-dumpx.bat
@@ -0,0 +1,7 @@
+@echo off
+
+@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
+@rem this in code, but I haven't found a way to do this in 2.6 yet.
+
+%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\macho-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
+
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
new file mode 100644
index 000000000000..d3481a550c34
--- /dev/null
+++ b/test/TableGen/Dag.td
@@ -0,0 +1,71 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+//===----------------------------------------------------------------------===//
+// Substitution of an int.
+def X1;
+
+class C1<int N> {
+  dag d = (X1 N);
+}
+
+def VAL1 : C1<13>;
+
+// CHECK: def VAL1 {
+// CHECK-NEXT: dag d = (X1 13)
+
+
+//===----------------------------------------------------------------------===//
+// Substitution of a DAG.
+def X2;
+
+class yclass;
+def Y2 : yclass;
+
+class C2<yclass N> {
+  dag d = (X2 N);
+  dag e = (N X2);
+}
+
+def VAL2 : C2<Y2>;
+
+// CHECK: def VAL2 {
+// CHECK-NEXT: dag d = (X2 Y2)
+// CHECK-NEXT: dag e = (Y2 X2)
+
+
+//===----------------------------------------------------------------------===//
+// Complex dag operator (F.TheOp).
+
+class operator;
+def somedef1 : operator;
+def somedef2 : operator;
+
+class foo<operator a> {
+ operator TheOp = a;
+}
+
+class bar<foo F, operator a> {
+  dag Dag1 = (somedef1 1);
+  dag Dag2 = (a 2);
+  dag Dag3 = (F.TheOp 2);
+}
+
+def foo1 : foo<somedef1>;
+def foo2 : foo<somedef2>;
+
+def VAL3 : bar<foo1, somedef1>;
+
+// CHECK:     def VAL3 {	// bar
+// CHECK-NEXT:  dag Dag1 = (somedef1 1);
+// CHECK-NEXT:  dag Dag2 = (somedef1 2);
+// CHECK-NEXT:  dag Dag3 = (somedef1 2);
+// CHECK-NEXT: }
+
+
+def VAL4 : bar<foo2, somedef2>;
+// CHECK:      def VAL4 {
+// CHECK-NEXT:  dag Dag1 = (somedef1 1);
+// CHECK-NEXT:  dag Dag2 = (somedef2 2);
+// CHECK-NEXT:  dag Dag3 = (somedef2 2);
+// CHECK-NEXT: }
diff --git a/test/TableGen/DagDefSubst.td b/test/TableGen/DagDefSubst.td
deleted file mode 100644
index 92a207f41829..000000000000
--- a/test/TableGen/DagDefSubst.td
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: tblgen %s | grep {dag d = (X Y)}
-// RUN: tblgen %s | grep {dag e = (Y X)}
-// XFAIL: vg_leak
-def X;
-
-class yclass;
-def Y : yclass;
-
-class C<yclass N> {
-  dag d = (X N);
-  dag e = (N X);
-}
-
-def VAL : C<Y>;
-
-
diff --git a/test/TableGen/DagIntSubst.td b/test/TableGen/DagIntSubst.td
deleted file mode 100644
index 00fde694e7dc..000000000000
--- a/test/TableGen/DagIntSubst.td
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: tblgen %s | grep {dag d = (X 13)}
-// XFAIL: vg_leak
-def X;
-
-class C<int N> {
-  dag d = (X N);
-}
-
-def VAL : C<13>;
-
-
diff --git a/test/TableGen/FieldAccess.td b/test/TableGen/FieldAccess.td
index ad652e79ea7c..8b4dc83e0a5b 100644
--- a/test/TableGen/FieldAccess.td
+++ b/test/TableGen/FieldAccess.td
@@ -1,4 +1,6 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
+
 class Bla<string t>
 {
   string blu = t;
diff --git a/test/TableGen/ListManip.td b/test/TableGen/ListManip.td
index c221bb1335b6..6b1e491cd25f 100644
--- a/test/TableGen/ListManip.td
+++ b/test/TableGen/ListManip.td
@@ -1,10 +1,12 @@
 // RUN: tblgen %s
+// XFAIL: vg_leak
+
 class Bli<string _t>
 {
   string t = _t;
 }
 
 class Bla<list<Bli> _bli>
-: Bli<!car(_bli).t>
+: Bli<!head(_bli).t>
 {
 }
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index 22bf7fbfe8cf..13d9da2b9fd6 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -66,19 +66,19 @@ def not : SDNode;
 multiclass scalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
   def SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
                   !strconcat(asmstr, "\t$dst, $src"),
-                  !if(!null(patterns),[]<dag>,patterns[0])>;
+                  !if(!empty(patterns),[]<dag>,patterns[0])>;
   def SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
                   !strconcat(asmstr, "\t$dst, $src"),
-                  !if(!null(patterns),[]<dag>,!if(!null(!cdr(patterns)),patterns[0],patterns[1]))>;
+                  !if(!empty(patterns),[]<dag>,!if(!empty(!tail(patterns)),patterns[0],patterns[1]))>;
 }
 
 multiclass vscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
   def V#NAME#SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
                   !strconcat(asmstr, "\t$dst, $src"),
-                  !if(!null(patterns),[]<dag>,patterns[0])>;
+                  !if(!empty(patterns),[]<dag>,patterns[0])>;
   def V#NAME#SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
                   !strconcat(asmstr, "\t$dst, $src"),
-                  !if(!null(patterns),[]<dag>,!if(!null(!cdr(patterns)),patterns[0],patterns[1]))>;
+                  !if(!empty(patterns),[]<dag>,!if(!empty(!tail(patterns)),patterns[0],patterns[1]))>;
 }
 
 multiclass myscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> :
diff --git a/test/TableGen/defmclass.td b/test/TableGen/defmclass.td
index 55482da4d072..57972b6dae54 100644
--- a/test/TableGen/defmclass.td
+++ b/test/TableGen/defmclass.td
@@ -36,3 +36,15 @@ multiclass Y {
 
 // CHECK: int check = 0;
 defm Instr : Y, VEX;
+
+
+// Anonymous defm.
+
+multiclass SomeAnonymous<int x> {
+  def rm;
+  def mr;
+}
+
+// These multiclasses shouldn't conflict.
+defm : SomeAnonymous<1>;
+defm : SomeAnonymous<2>;
\ No newline at end of file
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 0bac0bac3e98..c4d953ea2245 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,20 +1,44 @@
-// RUN: tblgen %s | grep {\\\[1, 2, 3\\\]} | count 4
-// RUN: tblgen %s | grep {\\\[4, 5, 6\\\]} | count 2
+// RUN: tblgen %s | FileCheck %s
 // XFAIL: vg_leak
 
+// Support for an `!if' operator as part of a `let' statement.
+// CHECK:      class C
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, !if({ C:x{2} }, 0, 1), !if({ C:x{2} }, 1, 1), !if({ C:x{2} }, 0, 0), !if({ C:x{1} }, C:y{3}, 0), !if({ C:x{1} }, C:y{2}, 1), !if({ C:x{0} }, C:y{3}, C:z), !if({ C:x{0} }, C:y{2}, C:y{2}), !if({ C:x{0} }, C:y{1}, C:y{1}), !if({ C:x{0} }, C:y{0}, C:y{0}) };
+class C<bits<3> x, bits<4> y, bit z> {
+  bits<16> n;
+
+  let n{8-6} = !if(x{2}, 0b010, 0b110);
+  let n{5-4} = !if(x{1}, y{3-2}, {0, 1});
+  let n{3-0} = !if(x{0}, y{3-0}, {z, y{2}, y{1}, y{0}});
+}
+
+// CHECK:      def One
+// CHECK-NEXT: list<int> first = [1, 2, 3];
+// CHECK-NEXT: list<int> rest = [1, 2, 3];
+
+// CHECK:      def OneB
+// CHECK-NEXT: list<int> vals = [1, 2, 3];
+
+// CHECK:      def Two
+// CHECK-NEXT: list<int> first = [1, 2, 3];
+// CHECK-NEXT: list<int> rest = [4, 5, 6];
+
+// CHECK:      def TwoB
+// CHECK-NEXT: list<int> vals = [4, 5, 6];
+
 class A<list<list<int>> vals> {
   list<int> first = vals[0];
-  list<int> rest  = !if(!null(!cdr(vals)), vals[0], vals[1]);
+  list<int> rest  = !if(!empty(!tail(vals)), vals[0], vals[1]);
 }
 
 def One : A<[[1,2,3]]>;
-def Two : A<[[1,2,3],[4,5,6]]>;
+def Two : A<[[1,2,3], [4,5,6]]>;
 
 class B<list<int> v> {
   list<int> vals = v;
 }
 
-class BB<list<list<int>> vals> : B<!if(!null(!cdr(vals)), vals[0], vals[1])>;
+class BB<list<list<int>> vals> : B<!if(!empty(!tail(vals)), vals[0], vals[1])>;
 class BBB<list<list<int>> vals> : BB<vals>;
 
 def OneB : BBB<[[1,2,3]]>;
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index b521e04c8913..bbed8690dd5d 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -15,7 +15,7 @@ class CDR<list<string> r, int n> {
 }
 
 class NameList<list<string> Names> :
-  List<Names>, CAR<!car(Names)>, CDR<!cdr(Names), !null(!cdr(Names))>;
+  List<Names>, CAR<!head(Names)>, CDR<!tail(Names), !empty(!tail(Names))>;
 
 def Three : NameList<["Tom", "Dick", "Harry"]>;
 
diff --git a/test/TableGen/nameconcat.td b/test/TableGen/nameconcat.td
deleted file mode 100644
index fd2880a80dff..000000000000
--- a/test/TableGen/nameconcat.td
+++ /dev/null
@@ -1,91 +0,0 @@
-// RUN: tblgen %s | grep {add_ps} | count 3
-// XFAIL: vg_leak
-
-class ValueType<int size, int value> {
-  int Size = size;
-  int Value = value;
-}
-
-def v2i64  : ValueType<128, 22>;   //  2 x i64 vector value
-def v2f64  : ValueType<128, 28>;   //  2 x f64 vector value
-
-class Intrinsic<string name> {
-  string Name = name;
-}
-
-class Inst<bits<8> opcode, dag oopnds, dag iopnds, string asmstr, 
-           list<dag> pattern> {
-  bits<8> Opcode = opcode;
-  dag OutOperands = oopnds;
-  dag InOperands = iopnds;
-  string AssemblyString = asmstr;
-  list<dag> Pattern = pattern;
-}
-
-def ops;
-def outs;
-def ins;
-
-def set;
-
-// Define registers
-class Register<string n> {
-  string Name = n;
-}
-
-class RegisterClass<list<ValueType> regTypes, list<Register> regList> {
-  list<ValueType> RegTypes = regTypes;
-  list<Register> MemberList = regList;
-}
-
-def XMM0: Register<"xmm0">;
-def XMM1: Register<"xmm1">;
-def XMM2: Register<"xmm2">;
-def XMM3: Register<"xmm3">;
-def XMM4: Register<"xmm4">;
-def XMM5: Register<"xmm5">;
-def XMM6: Register<"xmm6">;
-def XMM7: Register<"xmm7">;
-def XMM8:  Register<"xmm8">;
-def XMM9:  Register<"xmm9">;
-def XMM10: Register<"xmm10">;
-def XMM11: Register<"xmm11">;
-def XMM12: Register<"xmm12">;
-def XMM13: Register<"xmm13">;
-def XMM14: Register<"xmm14">;
-def XMM15: Register<"xmm15">;
-
-def VR128 : RegisterClass<[v2i64, v2f64],
-                          [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                           XMM8, XMM9, XMM10, XMM11,
-                           XMM12, XMM13, XMM14, XMM15]>;
-
-// Define intrinsics
-def int_x86_sse2_add_ps : Intrinsic<"addps">;
-def int_x86_sse2_add_pd : Intrinsic<"addpd">;
-
-multiclass arith<bits<8> opcode, string asmstr, string Intr> {
-  def PS : Inst<opcode, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                 !strconcat(asmstr, "\t$dst, $src1, $src2"),
-                 [(set VR128:$dst, (!nameconcat<Intrinsic>(Intr, "_ps") VR128:$src1, VR128:$src2))]>;
-
-  def PD : Inst<opcode, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                 !strconcat(asmstr, "\t$dst, $src1, $src2"),
-                 [(set VR128:$dst, (!nameconcat<Intrinsic>(Intr, "_pd") VR128:$src1, VR128:$src2))]>;
-}
-
-defm ADD : arith<0x58, "add", "int_x86_sse2_add">;
-
-class IntInst<bits<8> opcode, string asmstr, Intrinsic Intr> :
-  Inst<opcode,(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-       !strconcat(asmstr, "\t$dst, $src1, $src2"),
-       [(set VR128:$dst, (Intr VR128:$src1, VR128:$src2))]>;
-
-
-multiclass arith_int<bits<8> opcode, string asmstr, string Intr> {
-  def PS_Int : IntInst<opcode, asmstr, !nameconcat<Intrinsic>(Intr, "_ps")>;
-
-  def PD_Int : IntInst<opcode, asmstr, !nameconcat<Intrinsic>(Intr, "_pd")>;
-}
-
-defm ADD : arith_int<0x58, "add", "int_x86_sse2_add">;
diff --git a/test/Transforms/ArgumentPromotion/basictest.ll b/test/Transforms/ArgumentPromotion/basictest.ll
index ac9d7bf5abb6..d3d21fcabee1 100644
--- a/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/test/Transforms/ArgumentPromotion/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -argpromotion -mem2reg -S | not grep alloca
+; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | not grep alloca
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 define internal i32 @test(i32* %X, i32* %Y) {
         %A = load i32* %X               ; <i32> [#uses=1]
diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll
index e2d3d4de9edb..fed002aa98a9 100644
--- a/test/Transforms/ArgumentPromotion/crash.ll
+++ b/test/Transforms/ArgumentPromotion/crash.ll
@@ -36,3 +36,24 @@ entry:
   ret i1 undef
 }
 
+
+; PR8932 - infinite promotion.
+%0 = type { %0* }
+
+define i32 @test2(i32 %a) {
+init:
+  %0 = alloca %0
+  %1 = alloca %0
+  %2 = call i32 @"clay_assign(Chain, Chain)"(%0* %0, %0* %1)
+  ret i32 0
+}
+
+define internal i32 @"clay_assign(Chain, Chain)"(%0* %c, %0* %d) {
+init:
+  %0 = getelementptr %0* %d, i32 0, i32 0
+  %1 = load %0** %0
+  %2 = getelementptr %0* %c, i32 0, i32 0
+  %3 = load %0** %2
+  %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1)
+  ret i32 0
+}
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
new file mode 100644
index 000000000000..3b1fca328c5b
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -0,0 +1,29 @@
+; RUN: opt -codegenprepare %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: @test1
+; objectsize should fold to a constant, which causes the branch to fold to an
+; uncond branch.
+; rdar://8785296
+define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
+entry:
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %1 = icmp ugt i64 %0, 3
+  br i1 %1, label %T, label %trap
+
+; CHECK: entry:
+; HECK-NEXT: ret i32 4
+
+trap:                                             ; preds = %0, %entry
+  tail call void @llvm.trap() noreturn nounwind
+  unreachable
+
+T:
+  ret i32 4
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+declare void @llvm.trap() nounwind
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index 14580c1eab28..df57fb6870b8 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -31,3 +31,12 @@ bb:
 ; CHECK: @test2
 ; CHECK: ret i1 true
 }
+
+define i1 @TNAN() {
+; CHECK: @TNAN
+; CHECK: ret i1 true
+  %A = fcmp uno double 0x7FF8000000000000, 1.000000e+00
+  %B = fcmp uno double 1.230000e+02, 1.000000e+00
+  %C = or i1 %A, %B
+  ret i1 %C
+}
diff --git a/test/Transforms/ConstProp/bitcast.ll b/test/Transforms/ConstProp/bitcast.ll
index bf943c971f3a..53239c7e4fe0 100644
--- a/test/Transforms/ConstProp/bitcast.ll
+++ b/test/Transforms/ConstProp/bitcast.ll
@@ -1,2 +1,10 @@
-; RUN: llvm-as < %s | llvm-dis | grep 0x36A0000000000000
-@A = global float 0x36A0000000000000            ; <float*> [#uses=0]
+; RUN: opt < %s -constprop -S | FileCheck %s
+; PR2165
+
+define <1 x i64> @test1() {
+  %A = bitcast i64 63 to <1 x i64>
+  ret <1 x i64> %A
+; CHECK: @test1
+; CHECK: ret <1 x i64> <i64 63>
+}
+
diff --git a/test/Transforms/ConstProp/bitcast2.ll b/test/Transforms/ConstProp/bitcast2.ll
deleted file mode 100644
index 5c5eab1879c3..000000000000
--- a/test/Transforms/ConstProp/bitcast2.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep bitcast
-; PR2165
-
-define <1 x i64> @test() {
-  %A = bitcast i64 63 to <1 x i64>
-  ret <1 x i64> %A
-}
-
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 3c266fe62ed8..82d73245ad15 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -constprop -S | not grep call
+; RUN: opt < %s -constprop -S | FileCheck %s
 
 declare double @cos(double)
 
@@ -8,23 +8,49 @@ declare double @tan(double)
 
 declare double @sqrt(double)
 
-declare i1 @llvm.isunordered.f64(double, double)
-
 define double @T() {
-        %A = call double @cos( double 0.000000e+00 )            ; <double> [#uses=1]
-        %B = call double @sin( double 0.000000e+00 )            ; <double> [#uses=1]
-        %a = fadd double %A, %B          ; <double> [#uses=1]
-        %C = call double @tan( double 0.000000e+00 )            ; <double> [#uses=1]
-        %b = fadd double %a, %C          ; <double> [#uses=1]
-        %D = call double @sqrt( double 4.000000e+00 )           ; <double> [#uses=1]
-        %c = fadd double %b, %D          ; <double> [#uses=1]
-        ret double %c
+; CHECK: @T
+; CHECK-NOT: call
+; CHECK: ret
+  %A = call double @cos(double 0.000000e+00)
+  %B = call double @sin(double 0.000000e+00)
+  %a = fadd double %A, %B
+  %C = call double @tan(double 0.000000e+00)
+  %b = fadd double %a, %C
+  %D = call double @sqrt(double 4.000000e+00)
+  %c = fadd double %b, %D
+  ret double %c
 }
 
-define i1 @TNAN() {
-        %A = fcmp uno double 0x7FF8000000000000, 1.000000e+00           ; <i1> [#uses=1]
-        %B = fcmp uno double 1.230000e+02, 1.000000e+00         ; <i1> [#uses=1]
-        %C = or i1 %A, %B               ; <i1> [#uses=1]
-        ret i1 %C
+define i1 @test_sse_cvt() nounwind readnone {
+; CHECK: @test_sse_cvt
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i1 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i2 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i4 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
+  %i5 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
+  %i6 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+  %i7 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+  %sum11 = add i32 %i0, %i1
+  %sum12 = add i32 %i4, %i5
+  %sum1 = add i32 %sum11, %sum12
+  %sum21 = add i64 %i2, %i3
+  %sum22 = add i64 %i6, %i7
+  %sum2 = add i64 %sum21, %sum22
+  %sum1.sext = sext i32 %sum1 to i64
+  %b = icmp eq i64 %sum1.sext, %sum2
+  ret i1 %b
 }
 
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
diff --git a/test/Transforms/ConstProp/constant-expr.ll b/test/Transforms/ConstProp/constant-expr.ll
index 556ed1f652af..1088fa6959ad 100644
--- a/test/Transforms/ConstProp/constant-expr.ll
+++ b/test/Transforms/ConstProp/constant-expr.ll
@@ -65,3 +65,47 @@
 
 ; No check line. This used to crash llvm-as.
 @T6 = global <2 x i1> fcmp ole (<2 x float> fdiv (<2 x float> undef, <2 x float> <float 1.000000e+00, float 1.000000e+00>), <2 x float> zeroinitializer)
+
+
+; PR9011
+
+@pr9011_1 = constant <4 x i32> zext (<4 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_1 = constant <4 x i32> zeroinitializer
+@pr9011_2 = constant <4 x i32> sext (<4 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_2 = constant <4 x i32> zeroinitializer
+@pr9011_3 = constant <4 x i32> bitcast (<16 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_3 = constant <4 x i32> zeroinitializer
+@pr9011_4 = constant <4 x float> uitofp (<4 x i8> zeroinitializer to <4 x float>)
+; CHECK: pr9011_4 = constant <4 x float> zeroinitializer
+@pr9011_5 = constant <4 x float> sitofp (<4 x i8> zeroinitializer to <4 x float>)
+; CHECK: pr9011_5 = constant <4 x float> zeroinitializer
+@pr9011_6 = constant <4 x i32> fptosi (<4 x float> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_6 = constant <4 x i32> zeroinitializer
+@pr9011_7 = constant <4 x i32> fptoui (<4 x float> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_7 = constant <4 x i32> zeroinitializer
+@pr9011_8 = constant <4 x float> fptrunc (<4 x double> zeroinitializer to <4 x float>)
+; CHECK: pr9011_8 = constant <4 x float> zeroinitializer
+@pr9011_9 = constant <4 x double> fpext (<4 x float> zeroinitializer to <4 x double>)
+; CHECK: pr9011_9 = constant <4 x double> zeroinitializer
+
+@pr9011_10 = constant <4 x double> bitcast (i256 0 to <4 x double>)
+; CHECK: pr9011_10 = constant <4 x double> zeroinitializer
+@pr9011_11 = constant <4 x float> bitcast (i128 0 to <4 x float>)
+; CHECK: pr9011_11 = constant <4 x float> zeroinitializer
+@pr9011_12 = constant <4 x i32> bitcast (i128 0 to <4 x i32>)
+; CHECK: pr9011_12 = constant <4 x i32> zeroinitializer
+@pr9011_13 = constant i256 bitcast (<4 x double> zeroinitializer to i256)
+; CHECK: pr9011_13 = constant i256 0
+@pr9011_14 = constant i128 bitcast (<4 x float> zeroinitializer to i128)
+; CHECK: pr9011_14 = constant i128 0
+@pr9011_15 = constant i128 bitcast (<4 x i32> zeroinitializer to i128)
+; CHECK: pr9011_15 = constant i128 0
+
+@select = internal constant
+          i32 select (i1 icmp ult (i32 ptrtoint (i8* @X to i32),
+                                   i32 ptrtoint (i8* @Y to i32)),
+            i32 select (i1 icmp ult (i32 ptrtoint (i8* @X to i32),
+                                     i32 ptrtoint (i8* @Y to i32)),
+               i32 10, i32 20),
+            i32 30)
+; CHECK: select = internal constant i32 select {{.*}} i32 10, i32 30
diff --git a/test/Transforms/ConstProp/extractvalue.ll b/test/Transforms/ConstProp/extractvalue.ll
new file mode 100644
index 000000000000..32d529181b1f
--- /dev/null
+++ b/test/Transforms/ConstProp/extractvalue.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%struct = type { i32, [4 x i8] }
+%array = type [3 x %struct]
+
+define i32 @test1() {
+  %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 0
+  ret i32 %A
+; CHECK: @test1
+; CHECK: ret i32 2
+}
+
+define i8 @test2() {
+  %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 1, 2
+  ret i8 %A
+; CHECK: @test2
+; CHECK: ret i8 111
+}
+
+define i32 @test3() {
+  %A = extractvalue %array [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], 1, 0
+  ret i32 %A
+; CHECK: @test3
+; CHECK: ret i32 1
+}
+
+define i32 @zeroinitializer-test1() {
+  %A = extractvalue %struct zeroinitializer, 0
+  ret i32 %A
+; CHECK: @zeroinitializer-test1
+; CHECK: ret i32 0
+}
+
+define i8 @zeroinitializer-test2() {
+  %A = extractvalue %struct zeroinitializer, 1, 2
+  ret i8 %A
+; CHECK: @zeroinitializer-test2
+; CHECK: ret i8 0
+}
+
+define i32 @zeroinitializer-test3() {
+  %A = extractvalue %array zeroinitializer, 1, 0
+  ret i32 %A
+; CHECK: @zeroinitializer-test3
+; CHECK: ret i32 0
+}
+
+define i32 @undef-test1() {
+  %A = extractvalue %struct undef, 0
+  ret i32 %A
+; CHECK: @undef-test1
+; CHECK: ret i32 undef
+}
+
+define i8 @undef-test2() {
+  %A = extractvalue %struct undef, 1, 2
+  ret i8 %A
+; CHECK: @undef-test2
+; CHECK: ret i8 undef
+}
+
+define i32 @undef-test3() {
+  %A = extractvalue %array undef, 1, 0
+  ret i32 %A
+; CHECK: @undef-test3
+; CHECK: ret i32 undef
+}
+
diff --git a/test/Transforms/ConstProp/insertvalue.ll b/test/Transforms/ConstProp/insertvalue.ll
new file mode 100644
index 000000000000..f0eb553b2144
--- /dev/null
+++ b/test/Transforms/ConstProp/insertvalue.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%struct = type { i32, [4 x i8] }
+%array = type [3 x %struct]
+
+define %struct @test1() {
+  %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i32 1, 0
+  ret %struct %A
+; CHECK: @test1
+; CHECK: ret %struct { i32 1, [4 x i8] c"foo\00" }
+}
+
+define %struct @test2() {
+  %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i8 1, 1, 2
+  ret %struct %A
+; CHECK: @test2
+; CHECK: ret %struct { i32 2, [4 x i8] c"fo\01\00" }
+}
+
+define %array @test3() {
+  %A = insertvalue %array [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], i32 -1, 1, 0
+  ret %array %A
+; CHECK: @test3
+; CHECK:ret %array [%struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 -1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" }]
+}
+
+define %struct @zeroinitializer-test1() {
+  %A = insertvalue %struct zeroinitializer, i32 1, 0
+  ret %struct %A
+; CHECK: @zeroinitializer-test1
+; CHECK: ret %struct { i32 1, [4 x i8] zeroinitializer }
+}
+
+define %struct @zeroinitializer-test2() {
+  %A = insertvalue %struct zeroinitializer, i8 1, 1, 2
+  ret %struct %A
+; CHECK: @zeroinitializer-test2
+; CHECK: ret %struct { i32 0, [4 x i8] c"\00\00\01\00" }
+}
+
+define %array @zeroinitializer-test3() {
+  %A = insertvalue %array zeroinitializer, i32 1, 1, 0
+  ret %array %A
+; CHECK: @zeroinitializer-test3
+; CHECK: ret %array [%struct zeroinitializer, %struct { i32 1, [4 x i8] zeroinitializer }, %struct zeroinitializer]
+}
+
+define %struct @undef-test1() {
+  %A = insertvalue %struct undef, i32 1, 0
+  ret %struct %A
+; CHECK: @undef-test1
+; CHECK: ret %struct { i32 1, [4 x i8] undef }
+}
+
+define %struct @undef-test2() {
+  %A = insertvalue %struct undef, i8 0, 1, 2
+  ret %struct %A
+; CHECK: @undef-test2
+; CHECK: ret %struct { i32 undef, [4 x i8] [i8 undef, i8 undef, i8 0, i8 undef] }
+}
+
+define %array @undef-test3() {
+  %A = insertvalue %array undef, i32 0, 1, 0
+  ret %array %A
+; CHECK: @undef-test3
+; CHECK: ret %array [%struct undef, %struct { i32 0, [4 x i8] undef }, %struct undef]
+}
+
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 9fbba2b35528..74d80aa18729 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -120,3 +120,20 @@ define i1 @test13() {
 ; CHECK: @test13
 ; CHECK: ret i1 false
 }
+
+@g6 = constant [2 x i8*] [i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*)]
+define i64 @test14() nounwind {
+entry:
+  %tmp = load i64* bitcast ([2 x i8*]* @g6 to i64*)
+  ret i64 %tmp
+; CHECK: @test14
+; CHECK: ret i64 1
+}
+
+define i64 @test15() nounwind {
+entry:
+  %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
+  ret i64 %tmp
+; CHECK: @test15
+; CHECK: ret i64 2
+}
diff --git a/test/Transforms/ConstProp/logicaltest.ll b/test/Transforms/ConstProp/logicaltest.ll
index 7a90a7199f8d..c74296aa2c0c 100644
--- a/test/Transforms/ConstProp/logicaltest.ll
+++ b/test/Transforms/ConstProp/logicaltest.ll
@@ -1,7 +1,7 @@
 ; Ensure constant propogation of logical instructions is working correctly.
 
-; RUN: opt < %s -constprop -die -S | \
-; RUN:   not egrep {and|or|xor}
+; RUN: opt < %s -constprop -die -S | FileCheck %s
+; CHECK-NOT:     {{and|or|xor}}
 
 define i32 @test1() {
         %R = and i32 4, 1234            ; <i32> [#uses=1]
diff --git a/test/Transforms/ConstProp/nottest.ll b/test/Transforms/ConstProp/nottest.ll
deleted file mode 100644
index 799cecaae60a..000000000000
--- a/test/Transforms/ConstProp/nottest.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Ensure constant propogation of 'not' instructions is working correctly.
-
-; RUN: opt < %s -constprop -die -S | not grep xor
-
-define i32 @test1() {
-        %R = xor i32 4, -1              ; <i32> [#uses=1]
-        ret i32 %R
-}
-
-define i32 @test2() {
-        %R = xor i32 -23, -1            ; <i32> [#uses=1]
-        ret i32 %R
-}
-
-define i1 @test3() {
-        %R = xor i1 true, true          ; <i1> [#uses=1]
-        ret i1 %R
-}
-
diff --git a/test/Transforms/ConstProp/overflow-ops.ll b/test/Transforms/ConstProp/overflow-ops.ll
index 1547a4d0f5b8..5587e9b62330 100644
--- a/test/Transforms/ConstProp/overflow-ops.ll
+++ b/test/Transforms/ConstProp/overflow-ops.ll
@@ -170,3 +170,14 @@ declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
 
 declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
 declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
+
+; rdar://8501501
+define {i8, i1} @smul_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 -20, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @smul_1
+; CHECK: ret %i8i1 { i8 -56, i1 true }
+}
diff --git a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
new file mode 100644
index 000000000000..f561daf66781
--- /dev/null
+++ b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
@@ -0,0 +1,18 @@
+; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; PR8978
+
+declare i32 @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+; CHECK: bar.d
+@bar.d =  unnamed_addr constant %struct.foobar zeroinitializer, align 4
+; CHECK-NOT: foo.d
+@foo.d = internal constant %struct.foobar zeroinitializer, align 4
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: bar.d
+  %call2 = tail call i32 @zed(%struct.foobar* @foo.d, %struct.foobar* @bar.d)
+nounwind
+  ret i32 0
+}
+
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
new file mode 100644
index 000000000000..0282f464aeee
--- /dev/null
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -0,0 +1,26 @@
+; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; Test that in one run var3 is merged into var2 and var1 into var4.
+
+declare void @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+
+@var1 = internal constant %struct.foobar { i32 2 }
+@var2 = unnamed_addr constant %struct.foobar { i32 2 }
+@var3 = internal constant %struct.foobar { i32 2 }
+@var4 = unnamed_addr constant %struct.foobar { i32 2 }
+
+; CHECK:      %struct.foobar = type { i32 }
+; CHECK-NOT: @
+; CHECK: @var2 = constant %struct.foobar { i32 2 }
+; CHECK-NEXT: @var4 = constant %struct.foobar { i32 2 }
+; CHECK-NOT: @
+; CHECK: declare void @zed(%struct.foobar*, %struct.foobar*)
+
+define i32 @main() {
+entry:
+  call void @zed(%struct.foobar* @var1, %struct.foobar* @var2)
+  call void @zed(%struct.foobar* @var3, %struct.foobar* @var4)
+  ret i32 0
+}
+
diff --git a/test/Transforms/ConstantMerge/unnamed-addr.ll b/test/Transforms/ConstantMerge/unnamed-addr.ll
new file mode 100644
index 000000000000..24100837aabd
--- /dev/null
+++ b/test/Transforms/ConstantMerge/unnamed-addr.ll
@@ -0,0 +1,40 @@
+; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; Test which corresponding x and y are merged and that unnamed_addr
+; is correctly set.
+
+declare void @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+
+@test1.x = internal constant %struct.foobar { i32 1 }
+@test1.y = constant %struct.foobar { i32 1 }
+
+@test2.x = internal constant %struct.foobar { i32 2 }
+@test2.y = unnamed_addr constant %struct.foobar { i32 2 }
+
+@test3.x = internal unnamed_addr constant %struct.foobar { i32 3 }
+@test3.y = constant %struct.foobar { i32 3 }
+
+@test4.x = internal unnamed_addr constant %struct.foobar { i32 4 }
+@test4.y = unnamed_addr constant %struct.foobar { i32 4 }
+
+
+; CHECK:      %struct.foobar = type { i32 }
+; CHECK-NOT: @
+; CHECK: @test1.x = internal constant %struct.foobar { i32 1 }
+; CHECK-NEXT: @test1.y = constant %struct.foobar { i32 1 }
+; CHECK-NEXT: @test2.y = constant %struct.foobar { i32 2 }
+; CHECK-NEXT: @test3.y = constant %struct.foobar { i32 3 }
+; CHECK-NEXT: @test4.y = unnamed_addr constant %struct.foobar { i32 4 }
+; CHECK-NOT: @
+; CHECK: declare void @zed(%struct.foobar*, %struct.foobar*)
+
+define i32 @main() {
+entry:
+  call void @zed(%struct.foobar* @test1.x, %struct.foobar* @test1.y)
+  call void @zed(%struct.foobar* @test2.x, %struct.foobar* @test2.y)
+  call void @zed(%struct.foobar* @test3.x, %struct.foobar* @test3.y)
+  call void @zed(%struct.foobar* @test4.x, %struct.foobar* @test4.y)
+  ret i32 0
+}
+
diff --git a/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll b/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
new file mode 100644
index 000000000000..9ccc7870631e
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -jump-threading -correlated-propagation
+
+%struct.S2 = type {}
+
+@g_128 = external global %struct.S2, align 1
+@g_106 = external global i16, align 2
+
+define void @int328(i16 signext %p_82) noreturn nounwind ssp {
+entry:
+  %tobool3 = icmp eq i16 %p_82, 0
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %for.cond.loopexit, %entry
+  br label %for.cond
+
+for.cond.loopexit:                                ; preds = %bb.nph, %for.cond9.preheader
+  br label %for.cond.outer
+
+for.cond.loopexit4.us-lcssa:                      ; preds = %if.then
+  br label %for.cond.loopexit4
+
+for.cond.loopexit4:                               ; preds = %for.cond.loopexit4.us-lcssa.us, %for.cond.loopexit4.us-lcssa
+  br label %for.cond.backedge
+
+for.cond:                                         ; preds = %for.cond.backedge, %for.cond.outer
+  br i1 %tobool3, label %for.cond.split.us, label %for.cond.for.cond.split_crit_edge
+
+for.cond.for.cond.split_crit_edge:                ; preds = %for.cond
+  br label %lbl_133
+
+for.cond.split.us:                                ; preds = %for.cond
+  br label %lbl_133.us
+
+lbl_133.us:                                       ; preds = %lbl_134.us, %for.cond.split.us
+  br i1 undef, label %if.else14.us-lcssa.us, label %if.then.us
+
+lbl_134.us:                                       ; preds = %if.then.us
+  br i1 icmp eq (i16 ptrtoint (%struct.S2* @g_128 to i16), i16 0), label %for.cond9.preheader.us-lcssa.us, label %lbl_133.us
+
+if.then.us:                                       ; preds = %lbl_133.us
+  br i1 true, label %for.cond.loopexit4.us-lcssa.us, label %lbl_134.us
+
+if.else14.us-lcssa.us:                            ; preds = %lbl_133.us
+  br label %if.else14
+
+for.cond9.preheader.us-lcssa.us:                  ; preds = %lbl_134.us
+  br label %for.cond9.preheader
+
+for.cond.loopexit4.us-lcssa.us:                   ; preds = %if.then.us
+  br label %for.cond.loopexit4
+
+lbl_133:                                          ; preds = %lbl_134, %for.cond.for.cond.split_crit_edge
+  %l_109.0 = phi i16 [ 0, %for.cond.for.cond.split_crit_edge ], [ ptrtoint (%struct.S2* @g_128 to i16), %lbl_134 ]
+  %tobool = icmp eq i32 undef, 0
+  br i1 %tobool, label %if.else14.us-lcssa, label %if.then
+
+if.then:                                          ; preds = %lbl_133
+  br i1 false, label %for.cond.loopexit4.us-lcssa, label %lbl_134
+
+lbl_134:                                          ; preds = %if.then
+  br i1 icmp eq (i16 ptrtoint (%struct.S2* @g_128 to i16), i16 0), label %for.cond9.preheader.us-lcssa, label %lbl_133
+
+for.cond9.preheader.us-lcssa:                     ; preds = %lbl_134
+  br label %for.cond9.preheader
+
+for.cond9.preheader:                              ; preds = %for.cond9.preheader.us-lcssa, %for.cond9.preheader.us-lcssa.us
+  br i1 undef, label %bb.nph, label %for.cond.loopexit
+
+bb.nph:                                           ; preds = %for.cond9.preheader
+  br label %for.cond.loopexit
+
+if.else14.us-lcssa:                               ; preds = %lbl_133
+  br label %if.else14
+
+if.else14:                                        ; preds = %if.else14.us-lcssa, %if.else14.us-lcssa.us
+  %l_109.0.lcssa = phi i16 [ %l_109.0, %if.else14.us-lcssa ], [ 0, %if.else14.us-lcssa.us ]
+  store i16 undef, i16* @g_106, align 2
+  br label %for.cond.backedge
+
+for.cond.backedge:                                ; preds = %if.else14, %for.cond.loopexit4
+  br label %for.cond
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 24666e901e9e..270c048e2f98 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -52,9 +52,8 @@ bb:             ; preds = %entry
         ret i8 0
 
 bb2:            ; preds = %entry
-; CHECK-NOT: load i8* %a
+; CHECK: %should_be_const = load i8* @gv
         %should_be_const = load i8* %a
-; CHECK: ret i8 7
         ret i8 %should_be_const
 }
 
diff --git a/test/Transforms/CorrelatedValuePropagation/crash.ll b/test/Transforms/CorrelatedValuePropagation/crash.ll
new file mode 100644
index 000000000000..80c43d0f1da5
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/crash.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -correlated-propagation
+
+; PR8161
+define void @test1() nounwind ssp {
+entry:
+  br label %for.end
+
+for.cond.us.us:                                   ; preds = %for.cond.us.us
+  %cmp6.i.us.us = icmp sgt i32 1, 0
+  %lor.ext.i.us.us = zext i1 %cmp6.i.us.us to i32
+  %lor.ext.add.i.us.us = select i1 %cmp6.i.us.us, i32 %lor.ext.i.us.us, i32 undef
+  %conv.i.us.us = trunc i32 %lor.ext.add.i.us.us to i16
+  %sext.us.us = shl i16 %conv.i.us.us, 8
+  %conv6.us.us = ashr i16 %sext.us.us, 8
+  %and.us.us = and i16 %conv6.us.us, %and.us.us
+  br i1 false, label %for.end, label %for.cond.us.us
+
+for.end:                                          ; preds = %for.cond.us, %for.cond.us.us, %entry
+  ret void
+}
+
+; PR 8790
+define void @test2() nounwind ssp {
+entry:
+  br label %func_29.exit
+
+sdf.exit.i:
+  %l_44.1.mux.i = select i1 %tobool5.not.i, i8 %l_44.1.mux.i, i8 1
+  br label %srf.exit.i
+
+srf.exit.i:
+  %tobool5.not.i = icmp ne i8 undef, 0
+  br i1 %tobool5.not.i, label %sdf.exit.i, label %func_29.exit
+
+func_29.exit:
+  ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/non-null.ll b/test/Transforms/CorrelatedValuePropagation/non-null.ll
new file mode 100644
index 000000000000..b14abd83694a
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -0,0 +1,103 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+define void @test1(i8* %ptr) {
+; CHECK: test1
+  %A = load i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+define void @test2(i8* %ptr) {
+; CHECK: test2
+  store i8 0, i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+define void @test3() {
+; CHECK: test3
+  %ptr = alloca i8
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+define void @test4(i8* %dest, i8* %src) {
+; CHECK: test4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+define void @test5(i8* %dest, i8* %src) {
+; CHECK: test5
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+define void @test6(i8* %dest) {
+; CHECK: test6
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+define void @test7(i8* %dest, i8* %src, i32 %len) {
+; CHECK: test7
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8* %dest, null
+; CHECK: KEEP1
+  %KEEP2 = icmp ne i8* %src, null
+; CHECK: KEEP2
+  ret void
+}
+
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i32, i1)
+define void @test8(i8 addrspace(1) * %dest, i8 addrspace(1) * %src) {
+; CHECK: test8
+  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8 addrspace(1) * %dest, null
+; CHECK: KEEP1
+  %KEEP2 = icmp ne i8 addrspace(1) * %src, null
+; CHECK: KEEP2
+  ret void
+}
+
+define void @test9(i8* %dest, i8* %src) {
+; CHECK: test9
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 true)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8* %dest, null
+; CHECK: KEEP1
+  %KEEP2 = icmp ne i8* %src, null
+; CHECK: KEEP2
+  ret void
+}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index 7256b93af1a2..5a80aba6e2db 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -deadargelim -S %s | FileCheck %s
-; XFAIL: *
 
 define void @test(i32) {
   ret void
@@ -11,3 +10,30 @@ define void @foo() {
 ; CHECK: @foo
 ; CHECK: i32 undef
 }
+
+define void @f(i32 %X) {
+entry:
+  tail call void @sideeffect() nounwind
+  ret void
+}
+
+declare void @sideeffect()
+
+define void @g(i32 %n) {
+entry:
+  %add = add nsw i32 %n, 1
+; CHECK: tail call void @f(i32 undef)
+  tail call void @f(i32 %add)
+  ret void
+}
+
+define void @h() {
+entry:
+  %i = alloca i32, align 4
+  volatile store i32 10, i32* %i, align 4
+; CHECK: %tmp = volatile load i32* %i, align 4
+; CHECK-next: call void @f(i32 undef)
+  %tmp = volatile load i32* %i, align 4
+  call void @f(i32 %tmp)
+  ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll b/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll
deleted file mode 100644
index d1a9dd8fb59a..000000000000
--- a/test/Transforms/DeadStoreElimination/2004-11-28-LiveStoreDeleted.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -dse -scalarrepl -instcombine | \
-; RUN:   llvm-dis | not grep {ret i32 undef}
-
-define i32 @test(double %__x) {
-        %__u = alloca { [3 x i32] }             ; <{ [3 x i32] }*> [#uses=2]
-        %tmp.1 = bitcast { [3 x i32] }* %__u to double*         ; <double*> [#uses=1]
-        store double %__x, double* %tmp.1
-        %tmp.4 = getelementptr { [3 x i32] }* %__u, i32 0, i32 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]
-        %tmp.6 = icmp slt i32 %tmp.5, 0         ; <i1> [#uses=1]
-        %tmp.7 = zext i1 %tmp.6 to i32          ; <i32> [#uses=1]
-        ret i32 %tmp.7
-}
-
diff --git a/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll b/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll
deleted file mode 100644
index cae2a6f50a50..000000000000
--- a/test/Transforms/DeadStoreElimination/2004-12-28-PartialStore.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -dse -S | \
-; RUN:    grep {store i32 1234567}
-
-; Do not delete stores that are only partially killed.
-
-define i32 @test() {
-        %V = alloca i32         ; <i32*> [#uses=3]
-        store i32 1234567, i32* %V
-        %V2 = bitcast i32* %V to i8*            ; <i8*> [#uses=1]
-        store i8 0, i8* %V2
-        %X = load i32* %V               ; <i32> [#uses=1]
-        ret i32 %X
-}
diff --git a/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll b/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll
deleted file mode 100644
index 147ec84efe55..000000000000
--- a/test/Transforms/DeadStoreElimination/2005-11-30-vaarg.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: opt < %s -dse -S | grep store
-
-define double @foo(i8* %X) {
-        %X_addr = alloca i8*            ; <i8**> [#uses=2]
-        store i8* %X, i8** %X_addr
-        %tmp.0 = va_arg i8** %X_addr, double            ; <double> [#uses=1]
-        ret double %tmp.0
-}
-
diff --git a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll b/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
deleted file mode 100644
index 0b0830685f44..000000000000
--- a/test/Transforms/DeadStoreElimination/2006-06-27-AST-Remove.ll
+++ /dev/null
@@ -1,1113 +0,0 @@
-; RUN: opt < %s -globalsmodref-aa -dse -disable-output
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8"
-	%struct.ECacheType = type { i32, i32, i32 }
-	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-	%struct.QTType = type { i8, i8, i16, i32, i32, i32 }
-	%struct.TType = type { i8, i8, i8, i8, i16, i32, i32, i32 }
-	%struct._RuneEntry = type { i32, i32, i32, i32* }
-	%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32 }
-	%struct._RuneRange = type { i32, %struct._RuneEntry* }
-	%struct.__sFILEX = type opaque
-	%struct.__sbuf = type { i8*, i32 }
-	%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
-	%struct.move_x = type { i32, i32, i32, i32 }
-	%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
-	%struct.see_data = type { i32, i32 }
-@rook_o.2925 = internal global [4 x i32] [ i32 12, i32 -12, i32 1, i32 -1 ]		; <[4 x i32]*> [#uses=0]
-@bishop_o.2926 = internal global [4 x i32] [ i32 11, i32 -11, i32 13, i32 -13 ]		; <[4 x i32]*> [#uses=0]
-@knight_o.2927 = internal global [8 x i32] [ i32 10, i32 -10, i32 14, i32 -14, i32 23, i32 -23, i32 25, i32 -25 ]		; <[8 x i32]*> [#uses=0]
-@board = internal global [144 x i32] zeroinitializer		; <[144 x i32]*> [#uses=0]
-@holding = internal global [2 x [16 x i32]] zeroinitializer		; <[2 x [16 x i32]]*> [#uses=0]
-@hold_hash = internal global i32 0		; <i32*> [#uses=0]
-@white_hand_eval = internal global i32 0		; <i32*> [#uses=0]
-@black_hand_eval = internal global i32 0		; <i32*> [#uses=0]
-@num_holding = internal global [2 x i32] zeroinitializer		; <[2 x i32]*> [#uses=0]
-@zobrist = internal global [14 x [144 x i32]] zeroinitializer		; <[14 x [144 x i32]]*> [#uses=0]
-@Variant = internal global i32 0		; <i32*> [#uses=7]
-@userealholdings.b = internal global i1 false		; <i1*> [#uses=1]
-@realholdings = internal global [255 x i8] zeroinitializer		; <[255 x i8]*> [#uses=0]
-@comp_color = internal global i32 0		; <i32*> [#uses=0]
-@C.97.3177 = internal global [13 x i32] [ i32 0, i32 2, i32 1, i32 4, i32 3, i32 0, i32 0, i32 8, i32 7, i32 10, i32 9, i32 12, i32 11 ]		; <[13 x i32]*> [#uses=0]
-@str = internal global [30 x i8] c"%s:%u: failed assertion `%s'\0A\00"		; <[30 x i8]*> [#uses=0]
-@str.upgrd.1 = internal global [81 x i8] c"/Volumes/Stuff/src/speccpu2006-091-llvm/benchspec//CPU2006/458.sjeng/src/crazy.c\00"		; <[81 x i8]*> [#uses=0]
-@str.upgrd.2 = internal global [32 x i8] c"piece > frame && piece < npiece\00"		; <[32 x i8]*> [#uses=0]
-@C.101.3190 = internal global [13 x i32] [ i32 0, i32 2, i32 1, i32 2, i32 1, i32 0, i32 0, i32 2, i32 1, i32 2, i32 1, i32 2, i32 1 ]		; <[13 x i32]*> [#uses=0]
-@hand_value = internal global [13 x i32] [ i32 0, i32 100, i32 -100, i32 210, i32 -210, i32 0, i32 0, i32 250, i32 -250, i32 450, i32 -450, i32 230, i32 -230 ]		; <[13 x i32]*> [#uses=0]
-@material = internal global [14 x i32] zeroinitializer		; <[14 x i32]*> [#uses=0]
-@Material = internal global i32 0		; <i32*> [#uses=0]
-@str.upgrd.3 = internal global [23 x i8] c"holding[who][what] > 0\00"		; <[23 x i8]*> [#uses=0]
-@str.upgrd.4 = internal global [24 x i8] c"holding[who][what] < 20\00"		; <[24 x i8]*> [#uses=0]
-@fifty = internal global i32 0		; <i32*> [#uses=0]
-@move_number = internal global i32 0		; <i32*> [#uses=1]
-@ply = internal global i32 0		; <i32*> [#uses=2]
-@hash_history = internal global [600 x i32] zeroinitializer		; <[600 x i32]*> [#uses=1]
-@hash = internal global i32 0		; <i32*> [#uses=1]
-@ECacheSize.b = internal global i1 false		; <i1*> [#uses=1]
-@ECache = internal global %struct.ECacheType* null		; <%struct.ECacheType**> [#uses=1]
-@ECacheProbes = internal global i32 0		; <i32*> [#uses=1]
-@ECacheHits = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.5 = internal global [34 x i8] c"Out of memory allocating ECache.\0A\00"		; <[34 x i8]*> [#uses=0]
-@rankoffsets.2930 = internal global [8 x i32] [ i32 110, i32 98, i32 86, i32 74, i32 62, i32 50, i32 38, i32 26 ]		; <[8 x i32]*> [#uses=0]
-@white_castled = internal global i32 0		; <i32*> [#uses=0]
-@black_castled = internal global i32 0		; <i32*> [#uses=0]
-@book_ply = internal global i32 0		; <i32*> [#uses=0]
-@bking_loc = internal global i32 0		; <i32*> [#uses=1]
-@wking_loc = internal global i32 0		; <i32*> [#uses=1]
-@white_to_move = internal global i32 0		; <i32*> [#uses=3]
-@moved = internal global [144 x i32] zeroinitializer		; <[144 x i32]*> [#uses=0]
-@ep_square = internal global i32 0		; <i32*> [#uses=0]
-@_DefaultRuneLocale = external global %struct._RuneLocale		; <%struct._RuneLocale*> [#uses=0]
-@str.upgrd.6 = internal global [3 x i8] c"bm\00"		; <[3 x i8]*> [#uses=0]
-@str1 = internal global [3 x i8] c"am\00"		; <[3 x i8]*> [#uses=0]
-@str1.upgrd.7 = internal global [34 x i8] c"No best-move or avoid-move found!\00"		; <[34 x i8]*> [#uses=0]
-@str.upgrd.8 = internal global [25 x i8] c"\0AName of EPD testsuite: \00"		; <[25 x i8]*> [#uses=0]
-@__sF = external global [0 x %struct.FILE]		; <[0 x %struct.FILE]*> [#uses=0]
-@str.upgrd.9 = internal global [21 x i8] c"\0ATime per move (s): \00"		; <[21 x i8]*> [#uses=0]
-@str.upgrd.10 = internal global [2 x i8] c"\0A\00"		; <[2 x i8]*> [#uses=0]
-@str2 = internal global [2 x i8] c"r\00"		; <[2 x i8]*> [#uses=0]
-@root_to_move = internal global i32 0		; <i32*> [#uses=1]
-@forcedwin.b = internal global i1 false		; <i1*> [#uses=2]
-@fixed_time = internal global i32 0		; <i32*> [#uses=1]
-@nodes = internal global i32 0		; <i32*> [#uses=1]
-@qnodes = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.11 = internal global [29 x i8] c"\0ANodes: %i (%0.2f%% qnodes)\0A\00"		; <[29 x i8]*> [#uses=0]
-@str.upgrd.12 = internal global [54 x i8] c"ECacheProbes : %u   ECacheHits : %u   HitRate : %f%%\0A\00"		; <[54 x i8]*> [#uses=0]
-@TTStores = internal global i32 0		; <i32*> [#uses=1]
-@TTProbes = internal global i32 0		; <i32*> [#uses=1]
-@TTHits = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.13 = internal global [60 x i8] c"TTStores : %u TTProbes : %u   TTHits : %u   HitRate : %f%%\0A\00"		; <[60 x i8]*> [#uses=0]
-@NTries = internal global i32 0		; <i32*> [#uses=1]
-@NCuts = internal global i32 0		; <i32*> [#uses=1]
-@TExt = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.14 = internal global [51 x i8] c"NTries : %u  NCuts : %u  CutRate : %f%%  TExt: %u\0A\00"		; <[51 x i8]*> [#uses=0]
-@ext_check = internal global i32 0		; <i32*> [#uses=1]
-@razor_drop = internal global i32 0		; <i32*> [#uses=1]
-@razor_material = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.15 = internal global [61 x i8] c"Check extensions: %u  Razor drops : %u  Razor Material : %u\0A\00"		; <[61 x i8]*> [#uses=0]
-@FHF = internal global i32 0		; <i32*> [#uses=1]
-@FH = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.16 = internal global [22 x i8] c"Move ordering : %f%%\0A\00"		; <[22 x i8]*> [#uses=0]
-@maxposdiff = internal global i32 0		; <i32*> [#uses=1]
-@str.upgrd.17 = internal global [47 x i8] c"Material score: %d  Eval : %d  MaxPosDiff: %d\0A\00"		; <[47 x i8]*> [#uses=0]
-@str.upgrd.18 = internal global [17 x i8] c"Solution found.\0A\00"		; <[17 x i8]*> [#uses=0]
-@str3 = internal global [21 x i8] c"Solution not found.\0A\00"		; <[21 x i8]*> [#uses=0]
-@str.upgrd.19 = internal global [15 x i8] c"Solved: %d/%d\0A\00"		; <[15 x i8]*> [#uses=0]
-@str.upgrd.20 = internal global [9 x i8] c"EPD: %s\0A\00"		; <[9 x i8]*> [#uses=0]
-@str4 = internal global [21 x i8] c"Searching to %d ply\0A\00"		; <[21 x i8]*> [#uses=0]
-@maxdepth = internal global i32 0		; <i32*> [#uses=0]
-@std_material = internal global [14 x i32] [ i32 0, i32 100, i32 -100, i32 310, i32 -310, i32 4000, i32 -4000, i32 500, i32 -500, i32 900, i32 -900, i32 325, i32 -325, i32 0 ]		; <[14 x i32]*> [#uses=0]
-@zh_material = internal global [14 x i32] [ i32 0, i32 100, i32 -100, i32 210, i32 -210, i32 4000, i32 -4000, i32 250, i32 -250, i32 450, i32 -450, i32 230, i32 -230, i32 0 ]		; <[14 x i32]*> [#uses=0]
-@suicide_material = internal global [14 x i32] [ i32 0, i32 15, i32 -15, i32 150, i32 -150, i32 500, i32 -500, i32 150, i32 -150, i32 50, i32 -50, i32 0, i32 0, i32 0 ]		; <[14 x i32]*> [#uses=0]
-@losers_material = internal global [14 x i32] [ i32 0, i32 80, i32 -80, i32 320, i32 -320, i32 1000, i32 -1000, i32 350, i32 -350, i32 400, i32 -400, i32 270, i32 -270, i32 0 ]		; <[14 x i32]*> [#uses=0]
-@Xfile = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@Xrank = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 0, i32 0, i32 0, i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 0, i32 0, i32 0, i32 0, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@Xdiagl = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 10, i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 0, i32 0, i32 0, i32 11, i32 10, i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 0, i32 0, i32 0, i32 12, i32 11, i32 10, i32 9, i32 1, i32 2, i32 3, i32 4, i32 0, i32 0, i32 0, i32 0, i32 13, i32 12, i32 11, i32 10, i32 9, i32 1, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@Xdiagr = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 1, i32 0, i32 0, i32 0, i32 0, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 13, i32 12, i32 11, i32 10, i32 9, i32 1, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 12, i32 11, i32 10, i32 9, i32 1, i32 2, i32 3, i32 4, i32 0, i32 0, i32 0, i32 0, i32 11, i32 10, i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 0, i32 0, i32 0, i32 10, i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0, i32 0, i32 0, i32 0, i32 9, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@sqcolor = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@pcsqbishop = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 -5, i32 -10, i32 -5, i32 -5, i32 -10, i32 -5, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 10, i32 5, i32 10, i32 10, i32 5, i32 10, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 5, i32 6, i32 15, i32 15, i32 6, i32 5, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 3, i32 15, i32 10, i32 10, i32 15, i32 3, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 3, i32 15, i32 10, i32 10, i32 15, i32 3, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 5, i32 6, i32 15, i32 15, i32 6, i32 5, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 10, i32 5, i32 10, i32 10, i32 5, i32 10, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 -5, i32 -10, i32 -5, i32 -5, i32 -10, i32 -5, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@black_knight = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 15, i32 25, i32 25, i32 25, i32 25, i32 15, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 15, i32 25, i32 35, i32 35, i32 35, i32 15, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 10, i32 25, i32 20, i32 25, i32 25, i32 10, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 20, i32 20, i32 20, i32 20, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 15, i32 15, i32 15, i32 15, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 3, i32 3, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -35, i32 -10, i32 -10, i32 -10, i32 -10, i32 -35, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@white_knight = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -35, i32 -10, i32 -10, i32 -10, i32 -10, i32 -35, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 3, i32 3, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 15, i32 15, i32 15, i32 15, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 20, i32 20, i32 20, i32 20, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 10, i32 25, i32 20, i32 25, i32 25, i32 10, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 15, i32 25, i32 35, i32 35, i32 35, i32 15, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 15, i32 25, i32 25, i32 25, i32 25, i32 15, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@white_pawn = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 25, i32 35, i32 5, i32 5, i32 50, i32 45, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 7, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 20, i32 20, i32 10, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 12, i32 18, i32 18, i32 27, i32 27, i32 18, i32 18, i32 18, i32 0, i32 0, i32 0, i32 0, i32 25, i32 30, i32 30, i32 35, i32 35, i32 35, i32 30, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@black_pawn = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 30, i32 30, i32 30, i32 35, i32 35, i32 35, i32 30, i32 25, i32 0, i32 0, i32 0, i32 0, i32 12, i32 18, i32 18, i32 27, i32 27, i32 18, i32 18, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 20, i32 20, i32 10, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 7, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 25, i32 35, i32 5, i32 5, i32 50, i32 45, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@white_king = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -100, i32 7, i32 4, i32 0, i32 10, i32 4, i32 7, i32 -100, i32 0, i32 0, i32 0, i32 0, i32 -250, i32 -200, i32 -150, i32 -100, i32 -100, i32 -150, i32 -200, i32 -250, i32 0, i32 0, i32 0, i32 0, i32 -350, i32 -300, i32 -300, i32 -250, i32 -250, i32 -300, i32 -300, i32 -350, i32 0, i32 0, i32 0, i32 0, i32 -400, i32 -400, i32 -400, i32 -350, i32 -350, i32 -400, i32 -400, i32 -400, i32 0, i32 0, i32 0, i32 0, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 0, i32 0, i32 0, i32 0, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 0, i32 0, i32 0, i32 0, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 0, i32 0, i32 0, i32 0, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@black_king = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 0, i32 0, i32 0, i32 0, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 0, i32 0, i32 0, i32 0, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 -500, i32 0, i32 0, i32 0, i32 0, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 -450, i32 0, i32 0, i32 0, i32 0, i32 -400, i32 -400, i32 -400, i32 -350, i32 -350, i32 -400, i32 -400, i32 -400, i32 0, i32 0, i32 0, i32 0, i32 -350, i32 -300, i32 -300, i32 -250, i32 -250, i32 -300, i32 -300, i32 -350, i32 0, i32 0, i32 0, i32 0, i32 -250, i32 -200, i32 -150, i32 -100, i32 -100, i32 -150, i32 -200, i32 -250, i32 0, i32 0, i32 0, i32 0, i32 -100, i32 7, i32 4, i32 0, i32 10, i32 4, i32 7, i32 -100, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@black_queen = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 5, i32 5, i32 10, i32 10, i32 5, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 0, i32 0, i32 0, i32 0, i32 -60, i32 -40, i32 -40, i32 -60, i32 -60, i32 -40, i32 -40, i32 -60, i32 0, i32 0, i32 0, i32 0, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 -15, i32 -15, i32 -10, i32 -10, i32 -15, i32 -15, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 10, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@white_queen = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 10, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 -15, i32 -15, i32 -10, i32 -10, i32 -15, i32 -15, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 -40, i32 0, i32 0, i32 0, i32 0, i32 -60, i32 -40, i32 -40, i32 -60, i32 -60, i32 -40, i32 -40, i32 -60, i32 0, i32 0, i32 0, i32 0, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 -30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 5, i32 5, i32 10, i32 10, i32 5, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@black_rook = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 15, i32 20, i32 25, i32 25, i32 20, i32 15, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 15, i32 20, i32 20, i32 15, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -20, i32 -20, i32 -30, i32 -30, i32 -20, i32 -20, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 -15, i32 -15, i32 -10, i32 -10, i32 -15, i32 -15, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@white_rook = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 -15, i32 -15, i32 -10, i32 -10, i32 -15, i32 -15, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -20, i32 -20, i32 -30, i32 -30, i32 -20, i32 -20, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 15, i32 20, i32 20, i32 15, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 15, i32 20, i32 25, i32 25, i32 20, i32 15, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@upscale = internal global [64 x i32] [ i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117 ]		; <[64 x i32]*> [#uses=0]
-@pre_p_tropism = internal global [9 x i32] [ i32 9999, i32 40, i32 20, i32 10, i32 3, i32 1, i32 1, i32 0, i32 9999 ]		; <[9 x i32]*> [#uses=0]
-@pre_r_tropism = internal global [9 x i32] [ i32 9999, i32 50, i32 40, i32 15, i32 5, i32 1, i32 1, i32 0, i32 9999 ]		; <[9 x i32]*> [#uses=0]
-@pre_n_tropism = internal global [9 x i32] [ i32 9999, i32 50, i32 70, i32 35, i32 10, i32 2, i32 1, i32 0, i32 9999 ]		; <[9 x i32]*> [#uses=0]
-@pre_q_tropism = internal global [9 x i32] [ i32 9999, i32 100, i32 60, i32 20, i32 5, i32 2, i32 0, i32 0, i32 9999 ]		; <[9 x i32]*> [#uses=0]
-@pre_b_tropism = internal global [9 x i32] [ i32 9999, i32 50, i32 25, i32 15, i32 5, i32 2, i32 2, i32 2, i32 9999 ]		; <[9 x i32]*> [#uses=0]
-@rookdistance = internal global [144 x [144 x i32]] zeroinitializer		; <[144 x [144 x i32]]*> [#uses=0]
-@distance = internal global [144 x [144 x i32]] zeroinitializer		; <[144 x [144 x i32]]*> [#uses=0]
-@p_tropism = internal global [144 x [144 x i8]] zeroinitializer		; <[144 x [144 x i8]]*> [#uses=0]
-@b_tropism = internal global [144 x [144 x i8]] zeroinitializer		; <[144 x [144 x i8]]*> [#uses=0]
-@n_tropism = internal global [144 x [144 x i8]] zeroinitializer		; <[144 x [144 x i8]]*> [#uses=0]
-@r_tropism = internal global [144 x [144 x i8]] zeroinitializer		; <[144 x [144 x i8]]*> [#uses=0]
-@q_tropism = internal global [144 x [144 x i8]] zeroinitializer		; <[144 x [144 x i8]]*> [#uses=0]
-@cfg_devscale.b = internal global i1 false		; <i1*> [#uses=0]
-@pieces = internal global [62 x i32] zeroinitializer		; <[62 x i32]*> [#uses=0]
-@piece_count = internal global i32 0		; <i32*> [#uses=1]
-@cfg_smarteval.b = internal global i1 false		; <i1*> [#uses=0]
-@lcentral = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -15, i32 -15, i32 -15, i32 -15, i32 -15, i32 -15, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 0, i32 3, i32 5, i32 5, i32 3, i32 0, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 0, i32 15, i32 15, i32 15, i32 15, i32 0, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 0, i32 15, i32 30, i32 30, i32 15, i32 0, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 0, i32 15, i32 30, i32 30, i32 15, i32 0, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 0, i32 15, i32 15, i32 15, i32 15, i32 0, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -15, i32 0, i32 3, i32 5, i32 5, i32 3, i32 0, i32 -15, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -15, i32 -15, i32 -15, i32 -15, i32 -15, i32 -15, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@str3.upgrd.21 = internal global [81 x i8] c"/Volumes/Stuff/src/speccpu2006-091-llvm/benchspec//CPU2006/458.sjeng/src/leval.c\00"		; <[81 x i8]*> [#uses=0]
-@str5 = internal global [21 x i8] c"(i > 0) && (i < 145)\00"		; <[21 x i8]*> [#uses=0]
-@kingcap.b = internal global i1 false		; <i1*> [#uses=0]
-@numb_moves = internal global i32 0		; <i32*> [#uses=2]
-@genfor = internal global %struct.move_s* null		; <%struct.move_s**> [#uses=0]
-@captures = internal global i32 0		; <i32*> [#uses=1]
-@fcaptures.b = internal global i1 false		; <i1*> [#uses=0]
-@gfrom = internal global i32 0		; <i32*> [#uses=0]
-@Giveaway.b = internal global i1 false		; <i1*> [#uses=0]
-@path_x = internal global [300 x %struct.move_x] zeroinitializer		; <[300 x %struct.move_x]*> [#uses=0]
-@str7 = internal global [81 x i8] c"/Volumes/Stuff/src/speccpu2006-091-llvm/benchspec//CPU2006/458.sjeng/src/moves.c\00"		; <[81 x i8]*> [#uses=0]
-@str8 = internal global [15 x i8] c"find_slot < 63\00"		; <[15 x i8]*> [#uses=0]
-@is_promoted = internal global [62 x i32] zeroinitializer		; <[62 x i32]*> [#uses=0]
-@squares = internal global [144 x i32] zeroinitializer		; <[144 x i32]*> [#uses=0]
-@str.upgrd.22 = internal global [38 x i8] c"promoted > frame && promoted < npiece\00"		; <[38 x i8]*> [#uses=0]
-@str1.upgrd.23 = internal global [38 x i8] c"promoted < npiece && promoted > frame\00"		; <[38 x i8]*> [#uses=0]
-@evalRoutines = internal global [7 x i32 (i32, i32)*] [ i32 (i32, i32)* @ErrorIt, i32 (i32, i32)* @Pawn, i32 (i32, i32)* @Knight, i32 (i32, i32)* @King, i32 (i32, i32)* @Rook, i32 (i32, i32)* @Queen, i32 (i32, i32)* @Bishop ]		; <[7 x i32 (i32, i32)*]*> [#uses=0]
-@sbishop = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 8, i32 5, i32 5, i32 5, i32 5, i32 8, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 3, i32 3, i32 5, i32 5, i32 3, i32 3, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 2, i32 5, i32 4, i32 4, i32 5, i32 2, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 2, i32 5, i32 4, i32 4, i32 5, i32 2, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 3, i32 3, i32 5, i32 5, i32 3, i32 3, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 8, i32 5, i32 5, i32 5, i32 5, i32 8, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 -2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@sknight = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 3, i32 3, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 5, i32 5, i32 5, i32 5, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 5, i32 10, i32 10, i32 5, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 5, i32 10, i32 10, i32 5, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 5, i32 5, i32 5, i32 5, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 3, i32 3, i32 0, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@swhite_pawn = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 10, i32 10, i32 3, i32 2, i32 1, i32 0, i32 0, i32 0, i32 0, i32 2, i32 4, i32 6, i32 12, i32 12, i32 6, i32 4, i32 2, i32 0, i32 0, i32 0, i32 0, i32 3, i32 6, i32 9, i32 14, i32 14, i32 9, i32 6, i32 3, i32 0, i32 0, i32 0, i32 0, i32 10, i32 12, i32 14, i32 16, i32 16, i32 14, i32 12, i32 10, i32 0, i32 0, i32 0, i32 0, i32 20, i32 22, i32 24, i32 26, i32 26, i32 24, i32 22, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@sblack_pawn = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 22, i32 24, i32 26, i32 26, i32 24, i32 22, i32 20, i32 0, i32 0, i32 0, i32 0, i32 10, i32 12, i32 14, i32 16, i32 16, i32 14, i32 12, i32 10, i32 0, i32 0, i32 0, i32 0, i32 3, i32 6, i32 9, i32 14, i32 14, i32 9, i32 6, i32 3, i32 0, i32 0, i32 0, i32 0, i32 2, i32 4, i32 6, i32 12, i32 12, i32 6, i32 4, i32 2, i32 0, i32 0, i32 0, i32 0, i32 1, i32 2, i32 3, i32 10, i32 10, i32 3, i32 2, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@swhite_king = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 14, i32 0, i32 0, i32 0, i32 9, i32 14, i32 2, i32 0, i32 0, i32 0, i32 0, i32 -3, i32 -5, i32 -6, i32 -6, i32 -6, i32 -6, i32 -5, i32 -3, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 -5, i32 -8, i32 -8, i32 -8, i32 -8, i32 -5, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -8, i32 -8, i32 -13, i32 -13, i32 -13, i32 -13, i32 -8, i32 -8, i32 0, i32 0, i32 0, i32 0, i32 -13, i32 -13, i32 -21, i32 -21, i32 -21, i32 -21, i32 -13, i32 -13, i32 0, i32 0, i32 0, i32 0, i32 -21, i32 -21, i32 -34, i32 -34, i32 -34, i32 -34, i32 -21, i32 -21, i32 0, i32 0, i32 0, i32 0, i32 -34, i32 -34, i32 -55, i32 -55, i32 -55, i32 -55, i32 -34, i32 -34, i32 0, i32 0, i32 0, i32 0, i32 -55, i32 -55, i32 -89, i32 -89, i32 -89, i32 -89, i32 -55, i32 -55, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@sblack_king = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -55, i32 -55, i32 -89, i32 -89, i32 -89, i32 -89, i32 -55, i32 -55, i32 0, i32 0, i32 0, i32 0, i32 -34, i32 -34, i32 -55, i32 -55, i32 -55, i32 -55, i32 -34, i32 -34, i32 0, i32 0, i32 0, i32 0, i32 -21, i32 -21, i32 -34, i32 -34, i32 -34, i32 -34, i32 -21, i32 -21, i32 0, i32 0, i32 0, i32 0, i32 -13, i32 -13, i32 -21, i32 -21, i32 -21, i32 -21, i32 -13, i32 -13, i32 0, i32 0, i32 0, i32 0, i32 -8, i32 -8, i32 -13, i32 -13, i32 -13, i32 -13, i32 -8, i32 -8, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 -5, i32 -8, i32 -8, i32 -8, i32 -8, i32 -5, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -3, i32 -5, i32 -6, i32 -6, i32 -6, i32 -6, i32 -5, i32 -3, i32 0, i32 0, i32 0, i32 0, i32 2, i32 14, i32 0, i32 0, i32 0, i32 9, i32 14, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@send_king = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 -3, i32 -1, i32 0, i32 0, i32 -1, i32 -3, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 -3, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 -3, i32 0, i32 0, i32 0, i32 0, i32 -1, i32 10, i32 25, i32 25, i32 25, i32 25, i32 10, i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 25, i32 50, i32 50, i32 25, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 25, i32 50, i32 50, i32 25, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -1, i32 10, i32 25, i32 25, i32 25, i32 25, i32 10, i32 -1, i32 0, i32 0, i32 0, i32 0, i32 -3, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 -3, i32 0, i32 0, i32 0, i32 0, i32 -5, i32 -3, i32 -1, i32 0, i32 0, i32 -1, i32 -3, i32 -5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@srev_rank = internal global [9 x i32] [ i32 0, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 ]		; <[9 x i32]*> [#uses=0]
-@std_p_tropism = internal global [8 x i32] [ i32 9999, i32 15, i32 10, i32 7, i32 2, i32 0, i32 0, i32 0 ]		; <[8 x i32]*> [#uses=0]
-@std_own_p_tropism = internal global [8 x i32] [ i32 9999, i32 30, i32 10, i32 2, i32 0, i32 0, i32 0, i32 0 ]		; <[8 x i32]*> [#uses=0]
-@std_r_tropism = internal global [16 x i32] [ i32 9999, i32 0, i32 15, i32 5, i32 2, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[16 x i32]*> [#uses=0]
-@std_n_tropism = internal global [8 x i32] [ i32 9999, i32 14, i32 9, i32 6, i32 1, i32 0, i32 0, i32 0 ]		; <[8 x i32]*> [#uses=0]
-@std_q_tropism = internal global [8 x i32] [ i32 9999, i32 200, i32 50, i32 15, i32 3, i32 2, i32 1, i32 0 ]		; <[8 x i32]*> [#uses=0]
-@std_b_tropism = internal global [8 x i32] [ i32 9999, i32 12, i32 7, i32 5, i32 0, i32 0, i32 0, i32 0 ]		; <[8 x i32]*> [#uses=0]
-@phase = internal global i32 0		; <i32*> [#uses=1]
-@dir.3001 = internal global [4 x i32] [ i32 -13, i32 -11, i32 11, i32 13 ]		; <[4 x i32]*> [#uses=0]
-@dir.3021 = internal global [4 x i32] [ i32 -1, i32 1, i32 12, i32 -12 ]		; <[4 x i32]*> [#uses=0]
-@king_locs = internal global [2 x i32] zeroinitializer		; <[2 x i32]*> [#uses=0]
-@square_d1.3081 = internal global [2 x i32] [ i32 29, i32 113 ]		; <[2 x i32]*> [#uses=0]
-@wmat = internal global i32 0		; <i32*> [#uses=0]
-@bmat = internal global i32 0		; <i32*> [#uses=0]
-@str.upgrd.24 = internal global [35 x i8] c"Illegal piece detected sq=%i c=%i\0A\00"		; <[35 x i8]*> [#uses=0]
-@str10 = internal global [81 x i8] c"/Volumes/Stuff/src/speccpu2006-091-llvm/benchspec//CPU2006/458.sjeng/src/neval.c\00"		; <[81 x i8]*> [#uses=0]
-@std_hand_value = internal global [13 x i32] [ i32 0, i32 100, i32 -100, i32 210, i32 -210, i32 0, i32 0, i32 250, i32 -250, i32 450, i32 -450, i32 230, i32 -230 ]		; <[13 x i32]*> [#uses=0]
-@xb_mode = internal global i32 0		; <i32*> [#uses=0]
-@str.upgrd.25 = internal global [69 x i8] c"tellics ptell Hello! I am Sjeng and hope you enjoy playing with me.\0A\00"		; <[69 x i8]*> [#uses=0]
-@str.upgrd.26 = internal global [76 x i8] c"tellics ptell For help on some commands that I understand, ptell me 'help'\0A\00"		; <[76 x i8]*> [#uses=0]
-@str12 = internal global [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=0]
-@my_partner = internal global [256 x i8] zeroinitializer		; <[256 x i8]*> [#uses=0]
-@str13 = internal global [25 x i8] c"tellics set f5 bughouse\0A\00"		; <[25 x i8]*> [#uses=0]
-@str.upgrd.27 = internal global [16 x i8] c"tellics unseek\0A\00"		; <[16 x i8]*> [#uses=0]
-@str.upgrd.28 = internal global [20 x i8] c"tellics set f5 1=1\0A\00"		; <[20 x i8]*> [#uses=0]
-@str.upgrd.29 = internal global [80 x i8] c"is...uh...what did you say?\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[80 x i8]*> [#uses=0]
-@str.upgrd.30 = internal global [5 x i8] c"help\00"		; <[5 x i8]*> [#uses=0]
-@str.upgrd.31 = internal global [147 x i8] c"tellics ptell Commands that I understand are : sit, go, fast, slow, abort, flag, +/++/+++/-/--/---{p,n,b,r,q,d,h,trades}, x, dead, formula, help.\0A\00"		; <[147 x i8]*> [#uses=0]
-@str.upgrd.32 = internal global [6 x i8] c"sorry\00"		; <[6 x i8]*> [#uses=0]
-@str.upgrd.33 = internal global [59 x i8] c"tellics ptell Sorry, but I'm not playing a bughouse game.\0A\00"		; <[59 x i8]*> [#uses=0]
-@str.upgrd.34 = internal global [4 x i8] c"sit\00"		; <[4 x i8]*> [#uses=0]
-@str.upgrd.35 = internal global [56 x i8] c"tellics ptell Ok, I sit next move. Tell me when to go.\0A\00"		; <[56 x i8]*> [#uses=0]
-@must_sit.b = internal global i1 false		; <i1*> [#uses=0]
-@str114 = internal global [3 x i8] c"go\00"		; <[3 x i8]*> [#uses=0]
-@str2.upgrd.36 = internal global [5 x i8] c"move\00"		; <[5 x i8]*> [#uses=0]
-@str.upgrd.37 = internal global [31 x i8] c"tellics ptell Ok, I'm moving.\0A\00"		; <[31 x i8]*> [#uses=0]
-@str3.upgrd.38 = internal global [5 x i8] c"fast\00"		; <[5 x i8]*> [#uses=0]
-@str4.upgrd.39 = internal global [5 x i8] c"time\00"		; <[5 x i8]*> [#uses=0]
-@str15 = internal global [35 x i8] c"tellics ptell Ok, I'm going FAST!\0A\00"		; <[35 x i8]*> [#uses=0]
-@go_fast.b = internal global i1 false		; <i1*> [#uses=0]
-@str5.upgrd.40 = internal global [5 x i8] c"slow\00"		; <[5 x i8]*> [#uses=0]
-@str16 = internal global [36 x i8] c"tellics ptell Ok, moving normally.\0A\00"		; <[36 x i8]*> [#uses=0]
-@str6 = internal global [6 x i8] c"abort\00"		; <[6 x i8]*> [#uses=0]
-@str7.upgrd.41 = internal global [35 x i8] c"tellics ptell Requesting abort...\0A\00"		; <[35 x i8]*> [#uses=0]
-@str17 = internal global [15 x i8] c"tellics abort\0A\00"		; <[15 x i8]*> [#uses=0]
-@str8.upgrd.42 = internal global [5 x i8] c"flag\00"		; <[5 x i8]*> [#uses=0]
-@str.upgrd.43 = internal global [27 x i8] c"tellics ptell Flagging...\0A\00"		; <[27 x i8]*> [#uses=0]
-@str.upgrd.44 = internal global [14 x i8] c"tellics flag\0A\00"		; <[14 x i8]*> [#uses=0]
-@str18 = internal global [2 x i8] c"+\00"		; <[2 x i8]*> [#uses=0]
-@str9 = internal global [6 x i8] c"trade\00"		; <[6 x i8]*> [#uses=0]
-@str10.upgrd.45 = internal global [35 x i8] c"tellics ptell Ok, trading is GOOD\0A\00"		; <[35 x i8]*> [#uses=0]
-@str11 = internal global [4 x i8] c"+++\00"		; <[4 x i8]*> [#uses=0]
-@str12.upgrd.46 = internal global [6 x i8] c"mates\00"		; <[6 x i8]*> [#uses=0]
-@str13.upgrd.47 = internal global [3 x i8] c"++\00"		; <[3 x i8]*> [#uses=0]
-@str.upgrd.48 = internal global [49 x i8] c"is VERY good (ptell me 'x' to play normal again)\00"		; <[49 x i8]*> [#uses=0]
-@str.upgrd.49 = internal global [44 x i8] c"is good (ptell me 'x' to play normal again)\00"		; <[44 x i8]*> [#uses=0]
-@str19 = internal global [29 x i8] c"tellics ptell Ok, Knight %s\0A\00"		; <[29 x i8]*> [#uses=0]
-@str14 = internal global [29 x i8] c"tellics ptell Ok, Bishop %s\0A\00"		; <[29 x i8]*> [#uses=0]
-@str15.upgrd.50 = internal global [27 x i8] c"tellics ptell Ok, Rook %s\0A\00"		; <[27 x i8]*> [#uses=0]
-@str.upgrd.51 = internal global [28 x i8] c"tellics ptell Ok, Queen %s\0A\00"		; <[28 x i8]*> [#uses=0]
-@str16.upgrd.52 = internal global [27 x i8] c"tellics ptell Ok, Pawn %s\0A\00"		; <[27 x i8]*> [#uses=0]
-@str17.upgrd.53 = internal global [31 x i8] c"tellics ptell Ok, Diagonal %s\0A\00"		; <[31 x i8]*> [#uses=0]
-@str18.upgrd.54 = internal global [28 x i8] c"tellics ptell Ok, Heavy %s\0A\00"		; <[28 x i8]*> [#uses=0]
-@str20 = internal global [34 x i8] c"tellics ptell Ok, trading is BAD\0A\00"		; <[34 x i8]*> [#uses=0]
-@str20.upgrd.55 = internal global [4 x i8] c"---\00"		; <[4 x i8]*> [#uses=0]
-@str.upgrd.56 = internal global [53 x i8] c"mates you (ptell me 'x' when it no longer mates you)\00"		; <[53 x i8]*> [#uses=0]
-@str21 = internal global [3 x i8] c"--\00"		; <[3 x i8]*> [#uses=0]
-@str.upgrd.57 = internal global [52 x i8] c"is VERY bad (ptell me 'x' when it is no longer bad)\00"		; <[52 x i8]*> [#uses=0]
-@str21.upgrd.58 = internal global [47 x i8] c"is bad (ptell me 'x' when it is no longer bad)\00"		; <[47 x i8]*> [#uses=0]
-@str23 = internal global [16 x i8] c"mate me anymore\00"		; <[16 x i8]*> [#uses=0]
-@str24 = internal global [6 x i8] c"never\00"		; <[6 x i8]*> [#uses=0]
-@str25 = internal global [5 x i8] c"mind\00"		; <[5 x i8]*> [#uses=0]
-@str22 = internal global [9 x i8] c"ptell me\00"		; <[9 x i8]*> [#uses=0]
-@str.upgrd.59 = internal global [55 x i8] c"tellics ptell Ok, reverting to STANDARD piece values!\0A\00"		; <[55 x i8]*> [#uses=0]
-@partnerdead.b = internal global i1 false		; <i1*> [#uses=0]
-@piecedead.b = internal global i1 false		; <i1*> [#uses=0]
-@str.upgrd.60 = internal global [26 x i8] c"i'll have to sit...(dead)\00"		; <[26 x i8]*> [#uses=0]
-@str27 = internal global [5 x i8] c"dead\00"		; <[5 x i8]*> [#uses=0]
-@str28 = internal global [27 x i8] c"i'll have to sit...(piece)\00"		; <[27 x i8]*> [#uses=0]
-@str29 = internal global [3 x i8] c"ok\00"		; <[3 x i8]*> [#uses=0]
-@str30 = internal global [3 x i8] c"hi\00"		; <[3 x i8]*> [#uses=0]
-@str31 = internal global [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=0]
-@str32 = internal global [26 x i8] c"tellics ptell Greetings.\0A\00"		; <[26 x i8]*> [#uses=0]
-@str.upgrd.61 = internal global [8 x i8] c"formula\00"		; <[8 x i8]*> [#uses=0]
-@str.upgrd.62 = internal global [87 x i8] c"tellics ptell Setting formula, if you are still interrupted, complain to my operator.\0A\00"		; <[87 x i8]*> [#uses=0]
-@str33 = internal global [59 x i8] c"tellics ptell Sorry, but I don't understand that command.\0A\00"		; <[59 x i8]*> [#uses=0]
-@pawnmated.3298 = internal global i32 0		; <i32*> [#uses=0]
-@knightmated.3299 = internal global i32 0		; <i32*> [#uses=0]
-@bishopmated.3300 = internal global i32 0		; <i32*> [#uses=0]
-@rookmated.3301 = internal global i32 0		; <i32*> [#uses=0]
-@queenmated.3302 = internal global i32 0		; <i32*> [#uses=0]
-@str.upgrd.63 = internal global [41 x i8] c"tellics ptell p doesn't mate me anymore\0A\00"		; <[41 x i8]*> [#uses=0]
-@str34 = internal global [41 x i8] c"tellics ptell n doesn't mate me anymore\0A\00"		; <[41 x i8]*> [#uses=0]
-@str35 = internal global [41 x i8] c"tellics ptell b doesn't mate me anymore\0A\00"		; <[41 x i8]*> [#uses=0]
-@str36 = internal global [41 x i8] c"tellics ptell r doesn't mate me anymore\0A\00"		; <[41 x i8]*> [#uses=0]
-@str37 = internal global [41 x i8] c"tellics ptell q doesn't mate me anymore\0A\00"		; <[41 x i8]*> [#uses=0]
-@str38 = internal global [20 x i8] c"tellics ptell ---p\0A\00"		; <[20 x i8]*> [#uses=0]
-@str39 = internal global [20 x i8] c"tellics ptell ---n\0A\00"		; <[20 x i8]*> [#uses=0]
-@str40 = internal global [20 x i8] c"tellics ptell ---b\0A\00"		; <[20 x i8]*> [#uses=0]
-@str41 = internal global [20 x i8] c"tellics ptell ---r\0A\00"		; <[20 x i8]*> [#uses=0]
-@str42 = internal global [20 x i8] c"tellics ptell ---q\0A\00"		; <[20 x i8]*> [#uses=0]
-@str23.upgrd.64 = internal global [17 x i8] c"tellics ptell x\0A\00"		; <[17 x i8]*> [#uses=0]
-@str.upgrd.65 = internal global [18 x i8] c"tellics ptell go\0A\00"		; <[18 x i8]*> [#uses=0]
-@bufftop = internal global i32 0		; <i32*> [#uses=2]
-@membuff = internal global i8* null		; <i8**> [#uses=3]
-@maxply = internal global i32 0		; <i32*> [#uses=1]
-@forwards = internal global i32 0		; <i32*> [#uses=1]
-@nodecount = internal global i32 0		; <i32*> [#uses=1]
-@frees = internal global i32 0		; <i32*> [#uses=0]
-@PBSize.b = internal global i1 false		; <i1*> [#uses=1]
-@alllosers.b = internal global i1 false		; <i1*> [#uses=1]
-@rootlosers = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=1]
-@pn_move = internal global %struct.move_s zeroinitializer		; <%struct.move_s*> [#uses=7]
-@iters = internal global i32 0		; <i32*> [#uses=1]
-@kibitzed.b = internal global i1 false		; <i1*> [#uses=0]
-@str24.upgrd.66 = internal global [28 x i8] c"tellics kibitz Forced win!\0A\00"		; <[28 x i8]*> [#uses=0]
-@str25.upgrd.67 = internal global [34 x i8] c"tellics kibitz Forced win! (alt)\0A\00"		; <[34 x i8]*> [#uses=0]
-@pn_time = internal global i32 0		; <i32*> [#uses=1]
-@post = internal global i32 0		; <i32*> [#uses=0]
-@str.upgrd.68 = internal global [94 x i8] c"tellics whisper proof %d, disproof %d, %d losers, highest depth %d, primary %d, secondary %d\0A\00"		; <[94 x i8]*> [#uses=0]
-@str26 = internal global [30 x i8] c"tellics whisper Forced reply\0A\00"		; <[30 x i8]*> [#uses=0]
-@str27.upgrd.69 = internal global [60 x i8] c"P: %d D: %d N: %d S: %d Mem: %2.2fM Iters: %d MaxDepth: %d\0A\00"		; <[60 x i8]*> [#uses=0]
-@str.upgrd.70 = internal global [90 x i8] c"tellics whisper proof %d, disproof %d, %d nodes, %d forwards, %d iters, highest depth %d\0A\00"		; <[90 x i8]*> [#uses=0]
-@str.upgrd.71 = internal global [11 x i8] c"Time : %f\0A\00"		; <[11 x i8]*> [#uses=0]
-@str28.upgrd.72 = internal global [23 x i8] c"This position is WON.\0A\00"		; <[23 x i8]*> [#uses=0]
-@str29.upgrd.73 = internal global [5 x i8] c"PV: \00"		; <[5 x i8]*> [#uses=0]
-@str30.upgrd.74 = internal global [4 x i8] c"%s \00"		; <[4 x i8]*> [#uses=0]
-@str31.upgrd.75 = internal global [2 x i8] c" \00"		; <[2 x i8]*> [#uses=0]
-@str32.upgrd.76 = internal global [41 x i8] c"\0Atellics kibitz Forced win in %d moves.\0A\00"		; <[41 x i8]*> [#uses=0]
-@str33.upgrd.77 = internal global [20 x i8] c"\0A1-0 {White mates}\0A\00"		; <[20 x i8]*> [#uses=0]
-@result = internal global i32 0		; <i32*> [#uses=4]
-@str1.upgrd.78 = internal global [20 x i8] c"\0A0-1 {Black mates}\0A\00"		; <[20 x i8]*> [#uses=0]
-@str35.upgrd.79 = internal global [24 x i8] c"This position is LOST.\0A\00"		; <[24 x i8]*> [#uses=0]
-@str36.upgrd.80 = internal global [27 x i8] c"This position is UNKNOWN.\0A\00"		; <[27 x i8]*> [#uses=0]
-@str37.upgrd.81 = internal global [47 x i8] c"P: %d D: %d N: %d S: %d Mem: %2.2fM Iters: %d\0A\00"		; <[47 x i8]*> [#uses=0]
-@s_threat.b = internal global i1 false		; <i1*> [#uses=0]
-@TTSize.b = internal global i1 false		; <i1*> [#uses=3]
-@cfg_razordrop.b = internal global i1 false		; <i1*> [#uses=0]
-@cfg_futprune.b = internal global i1 false		; <i1*> [#uses=0]
-@cfg_onerep.b = internal global i1 false		; <i1*> [#uses=0]
-@setcode = internal global [30 x i8] zeroinitializer		; <[30 x i8]*> [#uses=0]
-@str38.upgrd.82 = internal global [3 x i8] c"%u\00"		; <[3 x i8]*> [#uses=0]
-@searching_pv.b = internal global i1 false		; <i1*> [#uses=0]
-@pv = internal global [300 x [300 x %struct.move_s]] zeroinitializer		; <[300 x [300 x %struct.move_s]]*> [#uses=0]
-@i_depth = internal global i32 0		; <i32*> [#uses=0]
-@history_h = internal global [144 x [144 x i32]] zeroinitializer		; <[144 x [144 x i32]]*> [#uses=0]
-@killer1 = internal global [300 x %struct.move_s] zeroinitializer		; <[300 x %struct.move_s]*> [#uses=0]
-@killer2 = internal global [300 x %struct.move_s] zeroinitializer		; <[300 x %struct.move_s]*> [#uses=0]
-@killer3 = internal global [300 x %struct.move_s] zeroinitializer		; <[300 x %struct.move_s]*> [#uses=0]
-@rootnodecount = internal global [512 x i32] zeroinitializer		; <[512 x i32]*> [#uses=0]
-@raw_nodes = internal global i32 0		; <i32*> [#uses=0]
-@pv_length = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@time_exit.b = internal global i1 false		; <i1*> [#uses=0]
-@time_for_move = internal global i32 0		; <i32*> [#uses=3]
-@failed = internal global i32 0		; <i32*> [#uses=0]
-@extendedtime.b = internal global i1 false		; <i1*> [#uses=1]
-@time_left = internal global i32 0		; <i32*> [#uses=0]
-@str39.upgrd.83 = internal global [38 x i8] c"Extended from %d to %d, time left %d\0A\00"		; <[38 x i8]*> [#uses=0]
-@checks = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@singular = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@recaps = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@ext_onerep = internal global i32 0		; <i32*> [#uses=1]
-@FULL = internal global i32 0		; <i32*> [#uses=1]
-@PVS = internal global i32 0		; <i32*> [#uses=1]
-@PVSF = internal global i32 0		; <i32*> [#uses=1]
-@killer_scores = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@killer_scores2 = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@killer_scores3 = internal global [300 x i32] zeroinitializer		; <[300 x i32]*> [#uses=0]
-@time_failure.b = internal global i1 false		; <i1*> [#uses=0]
-@cur_score = internal global i32 0		; <i32*> [#uses=0]
-@legals = internal global i32 0		; <i32*> [#uses=3]
-@movetotal = internal global i32 0		; <i32*> [#uses=0]
-@searching_move = internal global [20 x i8] zeroinitializer		; <[20 x i8]*> [#uses=0]
-@is_pondering.b = internal global i1 false		; <i1*> [#uses=6]
-@true_i_depth = internal global i8 0		; <i8*> [#uses=1]
-@is_analyzing.b = internal global i1 false		; <i1*> [#uses=0]
-@inc = internal global i32 0		; <i32*> [#uses=1]
-@time_cushion = internal global i32 0		; <i32*> [#uses=2]
-@str40.upgrd.84 = internal global [16 x i8] c"Opening phase.\0A\00"		; <[16 x i8]*> [#uses=1]
-@str.upgrd.85 = internal global [19 x i8] c"Middlegame phase.\0A\00"		; <[19 x i8]*> [#uses=1]
-@str1.upgrd.86 = internal global [16 x i8] c"Endgame phase.\0A\00"		; <[16 x i8]*> [#uses=1]
-@str43 = internal global [20 x i8] c"Time for move : %d\0A\00"		; <[20 x i8]*> [#uses=1]
-@postpv = internal global [256 x i8] zeroinitializer		; <[256 x i8]*> [#uses=0]
-@str44 = internal global [49 x i8] c"tellics whisper %d restart(s), ended up with %s\0A\00"		; <[49 x i8]*> [#uses=0]
-@moves_to_tc = internal global i32 0		; <i32*> [#uses=0]
-@str45 = internal global [27 x i8] c"tellics kibitz Mate in %d\0A\00"		; <[27 x i8]*> [#uses=0]
-@str46 = internal global [52 x i8] c"tellics ptell Mate in %d, give him no more pieces.\0A\00"		; <[52 x i8]*> [#uses=0]
-@tradefreely.b = internal global i1 false		; <i1*> [#uses=0]
-@str.upgrd.87 = internal global [37 x i8] c"tellics ptell You can trade freely.\0A\00"		; <[37 x i8]*> [#uses=0]
-@str47 = internal global [25 x i8] c"tellics ptell ---trades\0A\00"		; <[25 x i8]*> [#uses=0]
-@str2.upgrd.88 = internal global [49 x i8] c"tellics kibitz Both players dead...resigning...\0A\00"		; <[49 x i8]*> [#uses=0]
-@str3.upgrd.89 = internal global [16 x i8] c"tellics resign\0A\00"		; <[16 x i8]*> [#uses=0]
-@str48 = internal global [81 x i8] c"tellics ptell I am forcedly mated (dead). Tell me 'go' to start moving into it.\0A\00"		; <[81 x i8]*> [#uses=0]
-@str.upgrd.90 = internal global [62 x i8] c"tellics ptell I'll have to sit...(lose piece that mates you)\0A\00"		; <[62 x i8]*> [#uses=0]
-@see_num_attackers = internal global [2 x i32] zeroinitializer		; <[2 x i32]*> [#uses=0]
-@see_attackers = internal global [2 x [16 x %struct.see_data]] zeroinitializer		; <[2 x [16 x %struct.see_data]]*> [#uses=0]
-@scentral = internal global [144 x i32] [ i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 3, i32 5, i32 5, i32 3, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 2, i32 15, i32 15, i32 15, i32 15, i32 2, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 7, i32 15, i32 25, i32 25, i32 15, i32 7, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 7, i32 15, i32 25, i32 25, i32 15, i32 7, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 2, i32 15, i32 15, i32 15, i32 15, i32 2, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -10, i32 0, i32 3, i32 5, i32 5, i32 3, i32 0, i32 -10, i32 0, i32 0, i32 0, i32 0, i32 -20, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -10, i32 -20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 ]		; <[144 x i32]*> [#uses=0]
-@str51 = internal global [81 x i8] c"/Volumes/Stuff/src/speccpu2006-091-llvm/benchspec//CPU2006/458.sjeng/src/seval.c\00"		; <[81 x i8]*> [#uses=0]
-@divider = internal global [50 x i8] c"-------------------------------------------------\00"		; <[50 x i8]*> [#uses=0]
-@min_per_game = internal global i32 0		; <i32*> [#uses=0]
-@opp_rating = internal global i32 0		; <i32*> [#uses=0]
-@my_rating = internal global i32 0		; <i32*> [#uses=0]
-@str53 = internal global [15 x i8] c"SPEC Workload\0A\00"		; <[15 x i8]*> [#uses=0]
-@opening_history = internal global [256 x i8] zeroinitializer		; <[256 x i8]*> [#uses=0]
-@str60 = internal global [81 x i8] c"Material score: %d   Eval : %d  MaxPosDiff: %d  White hand: %d  Black hand : %d\0A\00"		; <[81 x i8]*> [#uses=0]
-@str61 = internal global [26 x i8] c"Hash : %X  HoldHash : %X\0A\00"		; <[26 x i8]*> [#uses=0]
-@str62 = internal global [9 x i8] c"move %s\0A\00"		; <[9 x i8]*> [#uses=0]
-@str63 = internal global [5 x i8] c"\0A%s\0A\00"		; <[5 x i8]*> [#uses=0]
-@str64 = internal global [19 x i8] c"0-1 {Black Mates}\0A\00"		; <[19 x i8]*> [#uses=0]
-@str1.upgrd.91 = internal global [19 x i8] c"1-0 {White Mates}\0A\00"		; <[19 x i8]*> [#uses=0]
-@str65 = internal global [27 x i8] c"1/2-1/2 {Fifty move rule}\0A\00"		; <[27 x i8]*> [#uses=0]
-@str2.upgrd.92 = internal global [29 x i8] c"1/2-1/2 {3 fold repetition}\0A\00"		; <[29 x i8]*> [#uses=0]
-@str66 = internal global [16 x i8] c"1/2-1/2 {Draw}\0A\00"		; <[16 x i8]*> [#uses=0]
-@str68 = internal global [8 x i8] c"Sjeng: \00"		; <[8 x i8]*> [#uses=0]
-@str69 = internal global [18 x i8] c"Illegal move: %s\0A\00"		; <[18 x i8]*> [#uses=0]
-@str3.upgrd.93 = internal global [9 x i8] c"setboard\00"		; <[9 x i8]*> [#uses=0]
-@str470 = internal global [5 x i8] c"quit\00"		; <[5 x i8]*> [#uses=0]
-@str571 = internal global [5 x i8] c"exit\00"		; <[5 x i8]*> [#uses=0]
-@str6.upgrd.94 = internal global [8 x i8] c"diagram\00"		; <[8 x i8]*> [#uses=0]
-@str7.upgrd.95 = internal global [2 x i8] c"d\00"		; <[2 x i8]*> [#uses=0]
-@str72 = internal global [6 x i8] c"perft\00"		; <[6 x i8]*> [#uses=0]
-@str73 = internal global [3 x i8] c"%d\00"		; <[3 x i8]*> [#uses=0]
-@str74 = internal global [28 x i8] c"Raw nodes for depth %d: %i\0A\00"		; <[28 x i8]*> [#uses=0]
-@str.upgrd.96 = internal global [13 x i8] c"Time : %.2f\0A\00"		; <[13 x i8]*> [#uses=0]
-@str75 = internal global [4 x i8] c"new\00"		; <[4 x i8]*> [#uses=0]
-@str.upgrd.97 = internal global [40 x i8] c"tellics set 1 Sjeng SPEC 1.0 (SPEC/%s)\0A\00"		; <[40 x i8]*> [#uses=0]
-@str.upgrd.98 = internal global [7 x i8] c"xboard\00"		; <[7 x i8]*> [#uses=0]
-@str8.upgrd.99 = internal global [6 x i8] c"nodes\00"		; <[6 x i8]*> [#uses=0]
-@str77 = internal global [38 x i8] c"Number of nodes: %i (%0.2f%% qnodes)\0A\00"		; <[38 x i8]*> [#uses=0]
-@str9.upgrd.100 = internal global [5 x i8] c"post\00"		; <[5 x i8]*> [#uses=0]
-@str10.upgrd.101 = internal global [7 x i8] c"nopost\00"		; <[7 x i8]*> [#uses=0]
-@str11.upgrd.102 = internal global [7 x i8] c"random\00"		; <[7 x i8]*> [#uses=0]
-@str12.upgrd.103 = internal global [5 x i8] c"hard\00"		; <[5 x i8]*> [#uses=0]
-@str13.upgrd.104 = internal global [5 x i8] c"easy\00"		; <[5 x i8]*> [#uses=0]
-@str14.upgrd.105 = internal global [2 x i8] c"?\00"		; <[2 x i8]*> [#uses=0]
-@str15.upgrd.106 = internal global [6 x i8] c"white\00"		; <[6 x i8]*> [#uses=0]
-@str16.upgrd.107 = internal global [6 x i8] c"black\00"		; <[6 x i8]*> [#uses=0]
-@str17.upgrd.108 = internal global [6 x i8] c"force\00"		; <[6 x i8]*> [#uses=0]
-@str18.upgrd.109 = internal global [5 x i8] c"eval\00"		; <[5 x i8]*> [#uses=0]
-@str.upgrd.110 = internal global [10 x i8] c"Eval: %d\0A\00"		; <[10 x i8]*> [#uses=0]
-@str2178 = internal global [3 x i8] c"%i\00"		; <[3 x i8]*> [#uses=0]
-@str22.upgrd.111 = internal global [5 x i8] c"otim\00"		; <[5 x i8]*> [#uses=0]
-@opp_time = internal global i32 0		; <i32*> [#uses=0]
-@str23.upgrd.112 = internal global [6 x i8] c"level\00"		; <[6 x i8]*> [#uses=0]
-@str.upgrd.113 = internal global [12 x i8] c"%i %i:%i %i\00"		; <[12 x i8]*> [#uses=0]
-@sec_per_game = internal global i32 0		; <i32*> [#uses=0]
-@str24.upgrd.114 = internal global [9 x i8] c"%i %i %i\00"		; <[9 x i8]*> [#uses=0]
-@str25.upgrd.115 = internal global [7 x i8] c"rating\00"		; <[7 x i8]*> [#uses=0]
-@str26.upgrd.116 = internal global [6 x i8] c"%i %i\00"		; <[6 x i8]*> [#uses=0]
-@str27.upgrd.117 = internal global [8 x i8] c"holding\00"		; <[8 x i8]*> [#uses=0]
-@str28.upgrd.118 = internal global [8 x i8] c"variant\00"		; <[8 x i8]*> [#uses=0]
-@str29.upgrd.119 = internal global [7 x i8] c"normal\00"		; <[7 x i8]*> [#uses=0]
-@str79 = internal global [11 x i8] c"crazyhouse\00"		; <[11 x i8]*> [#uses=0]
-@str30.upgrd.120 = internal global [9 x i8] c"bughouse\00"		; <[9 x i8]*> [#uses=0]
-@str31.upgrd.121 = internal global [8 x i8] c"suicide\00"		; <[8 x i8]*> [#uses=0]
-@str32.upgrd.122 = internal global [9 x i8] c"giveaway\00"		; <[9 x i8]*> [#uses=0]
-@str33.upgrd.123 = internal global [7 x i8] c"losers\00"		; <[7 x i8]*> [#uses=0]
-@str34.upgrd.124 = internal global [8 x i8] c"analyze\00"		; <[8 x i8]*> [#uses=0]
-@str35.upgrd.125 = internal global [5 x i8] c"undo\00"		; <[5 x i8]*> [#uses=0]
-@str36.upgrd.126 = internal global [18 x i8] c"Move number : %d\0A\00"		; <[18 x i8]*> [#uses=0]
-@str37.upgrd.127 = internal global [7 x i8] c"remove\00"		; <[7 x i8]*> [#uses=0]
-@str38.upgrd.128 = internal global [5 x i8] c"edit\00"		; <[5 x i8]*> [#uses=0]
-@str41.upgrd.129 = internal global [2 x i8] c"#\00"		; <[2 x i8]*> [#uses=0]
-@str42.upgrd.130 = internal global [8 x i8] c"partner\00"		; <[8 x i8]*> [#uses=0]
-@str43.upgrd.131 = internal global [9 x i8] c"$partner\00"		; <[9 x i8]*> [#uses=0]
-@str44.upgrd.132 = internal global [6 x i8] c"ptell\00"		; <[6 x i8]*> [#uses=0]
-@str45.upgrd.133 = internal global [5 x i8] c"test\00"		; <[5 x i8]*> [#uses=0]
-@str46.upgrd.134 = internal global [3 x i8] c"st\00"		; <[3 x i8]*> [#uses=0]
-@str47.upgrd.135 = internal global [7 x i8] c"result\00"		; <[7 x i8]*> [#uses=0]
-@str48.upgrd.136 = internal global [6 x i8] c"prove\00"		; <[6 x i8]*> [#uses=0]
-@str49 = internal global [26 x i8] c"\0AMax time to search (s): \00"		; <[26 x i8]*> [#uses=0]
-@str50 = internal global [5 x i8] c"ping\00"		; <[5 x i8]*> [#uses=0]
-@str51.upgrd.137 = internal global [9 x i8] c"pong %d\0A\00"		; <[9 x i8]*> [#uses=0]
-@str52 = internal global [6 x i8] c"fritz\00"		; <[6 x i8]*> [#uses=0]
-@str53.upgrd.138 = internal global [6 x i8] c"reset\00"		; <[6 x i8]*> [#uses=0]
-@str54 = internal global [3 x i8] c"sd\00"		; <[3 x i8]*> [#uses=0]
-@str55 = internal global [26 x i8] c"New max depth set to: %d\0A\00"		; <[26 x i8]*> [#uses=0]
-@str56 = internal global [5 x i8] c"auto\00"		; <[5 x i8]*> [#uses=0]
-@str57 = internal global [9 x i8] c"protover\00"		; <[9 x i8]*> [#uses=0]
-@str.upgrd.139 = internal global [63 x i8] c"feature ping=0 setboard=1 playother=0 san=0 usermove=0 time=1\0A\00"		; <[63 x i8]*> [#uses=0]
-@str80 = internal global [53 x i8] c"feature draw=0 sigint=0 sigterm=0 reuse=1 analyze=0\0A\00"		; <[53 x i8]*> [#uses=0]
-@str.upgrd.140 = internal global [33 x i8] c"feature myname=\22Sjeng SPEC 1.0\22\0A\00"		; <[33 x i8]*> [#uses=0]
-@str.upgrd.141 = internal global [71 x i8] c"feature variants=\22normal,bughouse,crazyhouse,suicide,giveaway,losers\22\0A\00"		; <[71 x i8]*> [#uses=0]
-@str.upgrd.142 = internal global [46 x i8] c"feature colors=1 ics=0 name=0 pause=0 done=1\0A\00"		; <[46 x i8]*> [#uses=0]
-@str58 = internal global [9 x i8] c"accepted\00"		; <[9 x i8]*> [#uses=0]
-@str59 = internal global [9 x i8] c"rejected\00"		; <[9 x i8]*> [#uses=0]
-@str.upgrd.143 = internal global [65 x i8] c"Interface does not support a required feature...expect trouble.\0A\00"		; <[65 x i8]*> [#uses=0]
-@str61.upgrd.144 = internal global [6 x i8] c"\0A%s\0A\0A\00"		; <[6 x i8]*> [#uses=0]
-@str81 = internal global [41 x i8] c"diagram/d:       toggle diagram display\0A\00"		; <[41 x i8]*> [#uses=0]
-@str82 = internal global [34 x i8] c"exit/quit:       terminate Sjeng\0A\00"		; <[34 x i8]*> [#uses=0]
-@str62.upgrd.145 = internal global [51 x i8] c"go:              make Sjeng play the side to move\0A\00"		; <[51 x i8]*> [#uses=0]
-@str83 = internal global [35 x i8] c"new:             start a new game\0A\00"		; <[35 x i8]*> [#uses=0]
-@str84 = internal global [55 x i8] c"level <x>:       the xboard style command to set time\0A\00"		; <[55 x i8]*> [#uses=0]
-@str85 = internal global [49 x i8] c"  <x> should be in the form: <a> <b> <c> where:\0A\00"		; <[49 x i8]*> [#uses=0]
-@str63.upgrd.146 = internal global [49 x i8] c"  a -> moves to TC (0 if using an ICS style TC)\0A\00"		; <[49 x i8]*> [#uses=0]
-@str86 = internal global [25 x i8] c"  b -> minutes per game\0A\00"		; <[25 x i8]*> [#uses=0]
-@str64.upgrd.147 = internal global [29 x i8] c"  c -> increment in seconds\0A\00"		; <[29 x i8]*> [#uses=0]
-@str65.upgrd.148 = internal global [55 x i8] c"nodes:           outputs the number of nodes searched\0A\00"		; <[55 x i8]*> [#uses=0]
-@str87 = internal global [47 x i8] c"perft <x>:       compute raw nodes to depth x\0A\00"		; <[47 x i8]*> [#uses=0]
-@str.upgrd.149 = internal global [42 x i8] c"post:            toggles thinking output\0A\00"		; <[42 x i8]*> [#uses=0]
-@str.upgrd.150 = internal global [45 x i8] c"xboard:          put Sjeng into xboard mode\0A\00"		; <[45 x i8]*> [#uses=0]
-@str.upgrd.151 = internal global [39 x i8] c"test:            run an EPD testsuite\0A\00"		; <[39 x i8]*> [#uses=0]
-@str88 = internal global [52 x i8] c"speed:           test movegen and evaluation speed\0A\00"		; <[52 x i8]*> [#uses=0]
-@str89 = internal global [59 x i8] c"proof:           try to prove or disprove the current pos\0A\00"		; <[59 x i8]*> [#uses=0]
-@str90 = internal global [44 x i8] c"sd <x>:          limit thinking to depth x\0A\00"		; <[44 x i8]*> [#uses=0]
-@str66.upgrd.152 = internal global [51 x i8] c"st <x>:          limit thinking to x centiseconds\0A\00"		; <[51 x i8]*> [#uses=0]
-@str67 = internal global [54 x i8] c"setboard <FEN>:  set board to a specified FEN string\0A\00"		; <[54 x i8]*> [#uses=0]
-@str68.upgrd.153 = internal global [38 x i8] c"undo:            back up a half move\0A\00"		; <[38 x i8]*> [#uses=0]
-@str69.upgrd.154 = internal global [38 x i8] c"remove:          back up a full move\0A\00"		; <[38 x i8]*> [#uses=0]
-@str70 = internal global [42 x i8] c"force:           disable computer moving\0A\00"		; <[42 x i8]*> [#uses=0]
-@str71 = internal global [44 x i8] c"auto:            computer plays both sides\0A\00"		; <[44 x i8]*> [#uses=0]
-@DP_TTable = internal global %struct.TType* null		; <%struct.TType**> [#uses=1]
-@AS_TTable = internal global %struct.TType* null		; <%struct.TType**> [#uses=1]
-@QS_TTable = internal global %struct.QTType* null		; <%struct.QTType**> [#uses=1]
-@str93 = internal global [38 x i8] c"Out of memory allocating hashtables.\0A\00"		; <[38 x i8]*> [#uses=0]
-@type_to_char.3058 = internal global [14 x i32] [ i32 70, i32 80, i32 80, i32 78, i32 78, i32 75, i32 75, i32 82, i32 82, i32 81, i32 81, i32 66, i32 66, i32 69 ]		; <[14 x i32]*> [#uses=0]
-@str94 = internal global [8 x i8] c"%c@%c%d\00"		; <[8 x i8]*> [#uses=0]
-@str95 = internal global [5 x i8] c"%c%d\00"		; <[5 x i8]*> [#uses=0]
-@str1.upgrd.155 = internal global [8 x i8] c"%c%d=%c\00"		; <[8 x i8]*> [#uses=0]
-@str2.upgrd.156 = internal global [8 x i8] c"%cx%c%d\00"		; <[8 x i8]*> [#uses=0]
-@str96 = internal global [11 x i8] c"%cx%c%d=%c\00"		; <[11 x i8]*> [#uses=0]
-@str97 = internal global [4 x i8] c"O-O\00"		; <[4 x i8]*> [#uses=0]
-@str98 = internal global [6 x i8] c"O-O-O\00"		; <[6 x i8]*> [#uses=0]
-@str99 = internal global [9 x i8] c"%c%c%c%d\00"		; <[9 x i8]*> [#uses=0]
-@str3100 = internal global [9 x i8] c"%c%d%c%d\00"		; <[9 x i8]*> [#uses=0]
-@str101 = internal global [10 x i8] c"%c%cx%c%d\00"		; <[10 x i8]*> [#uses=0]
-@str4.upgrd.157 = internal global [10 x i8] c"%c%dx%c%d\00"		; <[10 x i8]*> [#uses=0]
-@str102 = internal global [7 x i8] c"%c%c%d\00"		; <[7 x i8]*> [#uses=0]
-@str5103 = internal global [5 x i8] c"illg\00"		; <[5 x i8]*> [#uses=0]
-@type_to_char.3190 = internal global [14 x i32] [ i32 70, i32 80, i32 112, i32 78, i32 110, i32 75, i32 107, i32 82, i32 114, i32 81, i32 113, i32 66, i32 98, i32 69 ]		; <[14 x i32]*> [#uses=0]
-@str7.upgrd.158 = internal global [10 x i8] c"%c%d%c%dn\00"		; <[10 x i8]*> [#uses=0]
-@str8.upgrd.159 = internal global [10 x i8] c"%c%d%c%dr\00"		; <[10 x i8]*> [#uses=0]
-@str9.upgrd.160 = internal global [10 x i8] c"%c%d%c%db\00"		; <[10 x i8]*> [#uses=0]
-@str10.upgrd.161 = internal global [10 x i8] c"%c%d%c%dk\00"		; <[10 x i8]*> [#uses=0]
-@str11.upgrd.162 = internal global [10 x i8] c"%c%d%c%dq\00"		; <[10 x i8]*> [#uses=0]
-@C.88.3251 = internal global [14 x i8*] [ i8* getelementptr ([3 x i8]* @str105, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str12106, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str13107, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str141, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str152, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str163, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str174, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str185, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str19108, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str206, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str21109, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str227, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str238, i32 0, i32 0), i8* getelementptr ([3 x i8]* @str249, i32 0, i32 0) ]		; <[14 x i8*]*> [#uses=0]
-@str105 = internal global [3 x i8] c"!!\00"		; <[3 x i8]*> [#uses=1]
-@str12106 = internal global [3 x i8] c" P\00"		; <[3 x i8]*> [#uses=1]
-@str13107 = internal global [3 x i8] c"*P\00"		; <[3 x i8]*> [#uses=1]
-@str141 = internal global [3 x i8] c" N\00"		; <[3 x i8]*> [#uses=1]
-@str152 = internal global [3 x i8] c"*N\00"		; <[3 x i8]*> [#uses=1]
-@str163 = internal global [3 x i8] c" K\00"		; <[3 x i8]*> [#uses=1]
-@str174 = internal global [3 x i8] c"*K\00"		; <[3 x i8]*> [#uses=1]
-@str185 = internal global [3 x i8] c" R\00"		; <[3 x i8]*> [#uses=1]
-@str19108 = internal global [3 x i8] c"*R\00"		; <[3 x i8]*> [#uses=1]
-@str206 = internal global [3 x i8] c" Q\00"		; <[3 x i8]*> [#uses=1]
-@str21109 = internal global [3 x i8] c"*Q\00"		; <[3 x i8]*> [#uses=1]
-@str227 = internal global [3 x i8] c" B\00"		; <[3 x i8]*> [#uses=1]
-@str238 = internal global [3 x i8] c"*B\00"		; <[3 x i8]*> [#uses=1]
-@str249 = internal global [3 x i8] c"  \00"		; <[3 x i8]*> [#uses=1]
-@str110 = internal global [42 x i8] c"+----+----+----+----+----+----+----+----+\00"		; <[42 x i8]*> [#uses=0]
-@str25.upgrd.163 = internal global [6 x i8] c"  %s\0A\00"		; <[6 x i8]*> [#uses=0]
-@str26.upgrd.164 = internal global [5 x i8] c"%d |\00"		; <[5 x i8]*> [#uses=0]
-@str27.upgrd.165 = internal global [6 x i8] c" %s |\00"		; <[6 x i8]*> [#uses=0]
-@str28.upgrd.166 = internal global [7 x i8] c"\0A  %s\0A\00"		; <[7 x i8]*> [#uses=0]
-@str111 = internal global [45 x i8] c"\0A     a    b    c    d    e    f    g    h\0A\0A\00"		; <[45 x i8]*> [#uses=0]
-@str29.upgrd.167 = internal global [45 x i8] c"\0A     h    g    f    e    d    c    b    a\0A\0A\00"		; <[45 x i8]*> [#uses=0]
-@str33.upgrd.168 = internal global [2 x i8] c"<\00"		; <[2 x i8]*> [#uses=0]
-@str34.upgrd.169 = internal global [3 x i8] c"> \00"		; <[3 x i8]*> [#uses=0]
-@str114.upgrd.170 = internal global [18 x i8] c"%2i %7i %5i %8i  \00"		; <[18 x i8]*> [#uses=0]
-@str115 = internal global [20 x i8] c"%2i %c%1i.%02i %9i \00"		; <[20 x i8]*> [#uses=0]
-@str39.upgrd.171 = internal global [5 x i8] c"%s !\00"		; <[5 x i8]*> [#uses=0]
-@str40.upgrd.172 = internal global [6 x i8] c"%s !!\00"		; <[6 x i8]*> [#uses=0]
-@str41.upgrd.173 = internal global [6 x i8] c"%s ??\00"		; <[6 x i8]*> [#uses=0]
-@str124 = internal global [71 x i8] c"\0ASjeng version SPEC 1.0, Copyright (C) 2000-2005 Gian-Carlo Pascutto\0A\0A\00"		; <[71 x i8]*> [#uses=0]
-@state = internal global [625 x i32] zeroinitializer		; <[625 x i32]*> [#uses=0]
-
-declare fastcc i32 @calc_attackers(i32, i32)
-
-declare fastcc i32 @is_attacked(i32, i32)
-
-declare fastcc void @ProcessHoldings(i8*)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
-declare i8* @strncpy(i8*, i8*, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @__eprintf(i8*, i8*, i32, i8*)
-
-declare fastcc void @addHolding(i32, i32)
-
-declare fastcc void @removeHolding(i32, i32)
-
-declare fastcc void @DropremoveHolding(i32, i32)
-
-declare i32 @printf(i8*, ...)
-
-declare fastcc i32 @is_draw()
-
-declare void @exit(i32)
-
-declare fastcc void @setup_epd_line(i8*)
-
-declare i32 @atoi(i8*)
-
-declare fastcc void @reset_piece_square()
-
-declare fastcc void @initialize_hash()
-
-declare i32 @__maskrune(i32, i32)
-
-declare fastcc void @comp_to_san(i64, i64, i64, i8*)
-
-declare i8* @strstr(i8*, i8*)
-
-declare i32 @atol(i8*)
-
-declare %struct.FILE* @fopen(i8*, i8*)
-
-declare fastcc void @display_board(i32)
-
-define internal void @think(%struct.move_s* sret  %agg.result) {
-entry:
-	%output.i = alloca [8 x i8], align 8		; <[8 x i8]*> [#uses=0]
-	%comp_move = alloca %struct.move_s, align 16		; <%struct.move_s*> [#uses=7]
-	%temp_move = alloca %struct.move_s, align 16		; <%struct.move_s*> [#uses=6]
-	%moves = alloca [512 x %struct.move_s], align 16		; <[512 x %struct.move_s]*> [#uses=7]
-	%output = alloca [8 x i8], align 8		; <[8 x i8]*> [#uses=1]
-	store i1 false, i1* @userealholdings.b
-	%tmp = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0		; <%struct.move_s*> [#uses=3]
-	%tmp362 = getelementptr %struct.move_s* %comp_move, i32 0, i32 0		; <i32*> [#uses=0]
-	%tmp365 = getelementptr %struct.move_s* %comp_move, i32 0, i32 1		; <i32*> [#uses=0]
-	%tmp368 = getelementptr %struct.move_s* %comp_move, i32 0, i32 2		; <i32*> [#uses=0]
-	%tmp371 = getelementptr %struct.move_s* %comp_move, i32 0, i32 3		; <i32*> [#uses=0]
-	%tmp374 = getelementptr %struct.move_s* %comp_move, i32 0, i32 4		; <i32*> [#uses=0]
-	%tmp377 = getelementptr %struct.move_s* %comp_move, i32 0, i32 5		; <i32*> [#uses=0]
-	%tmp.upgrd.174 = bitcast %struct.move_s* %comp_move to { i64, i64, i64 }*		; <{ i64, i64, i64 }*> [#uses=3]
-	%tmp.upgrd.175 = getelementptr { i64, i64, i64 }* %tmp.upgrd.174, i32 0, i32 0		; <i64*> [#uses=0]
-	%tmp829 = getelementptr { i64, i64, i64 }* %tmp.upgrd.174, i32 0, i32 1		; <i64*> [#uses=0]
-	%tmp832 = getelementptr { i64, i64, i64 }* %tmp.upgrd.174, i32 0, i32 2		; <i64*> [#uses=0]
-	%output.upgrd.176 = getelementptr [8 x i8]* %output, i32 0, i32 0		; <i8*> [#uses=0]
-	%tmp573 = getelementptr %struct.move_s* %temp_move, i32 0, i32 0		; <i32*> [#uses=0]
-	%tmp576 = getelementptr %struct.move_s* %temp_move, i32 0, i32 1		; <i32*> [#uses=0]
-	%tmp579 = getelementptr %struct.move_s* %temp_move, i32 0, i32 2		; <i32*> [#uses=0]
-	%tmp582 = getelementptr %struct.move_s* %temp_move, i32 0, i32 3		; <i32*> [#uses=0]
-	%tmp585 = getelementptr %struct.move_s* %temp_move, i32 0, i32 4		; <i32*> [#uses=0]
-	%tmp588 = getelementptr %struct.move_s* %temp_move, i32 0, i32 5		; <i32*> [#uses=0]
-	%pn_restart.0.ph = bitcast i32 0 to i32		; <i32> [#uses=2]
-	%tmp21362 = icmp eq i32 0, 0		; <i1> [#uses=2]
-	%tmp216 = sitofp i32 %pn_restart.0.ph to float		; <float> [#uses=1]
-	%tmp216.upgrd.177 = fpext float %tmp216 to double		; <double> [#uses=1]
-	%tmp217 = fadd double %tmp216.upgrd.177, 1.000000e+00		; <double> [#uses=1]
-	%tmp835 = icmp sgt i32 %pn_restart.0.ph, 9		; <i1> [#uses=0]
-	store i32 0, i32* @nodes
-	store i32 0, i32* @qnodes
-	store i32 1, i32* @ply
-	store i32 0, i32* @ECacheProbes
-	store i32 0, i32* @ECacheHits
-	store i32 0, i32* @TTProbes
-	store i32 0, i32* @TTHits
-	store i32 0, i32* @TTStores
-	store i32 0, i32* @NCuts
-	store i32 0, i32* @NTries
-	store i32 0, i32* @TExt
-	store i32 0, i32* @FH
-	store i32 0, i32* @FHF
-	store i32 0, i32* @PVS
-	store i32 0, i32* @FULL
-	store i32 0, i32* @PVSF
-	store i32 0, i32* @ext_check
-	store i32 0, i32* @ext_onerep
-	store i32 0, i32* @razor_drop
-	store i32 0, i32* @razor_material
-	store i1 false, i1* @extendedtime.b
-	store i1 false, i1* @forcedwin.b
-	store i32 200, i32* @maxposdiff
-	store i8 0, i8* @true_i_depth
-	store i32 0, i32* @legals
-	%tmp48 = load i32* @Variant		; <i32> [#uses=1]
-	%tmp49 = icmp eq i32 %tmp48, 4		; <i1> [#uses=1]
-	%storemerge = zext i1 %tmp49 to i32		; <i32> [#uses=1]
-	store i32 %storemerge, i32* @captures
-	call fastcc void @gen( %struct.move_s* %tmp )
-	%tmp53 = load i32* @numb_moves		; <i32> [#uses=1]
-	%tmp.i = load i32* @Variant		; <i32> [#uses=1]
-	%tmp.i.upgrd.178 = icmp eq i32 %tmp.i, 3		; <i1> [#uses=1]
-	br i1 %tmp.i.upgrd.178, label %in_check.exit, label %cond_next.i
-
-cond_next.i:		; preds = %entry
-	%tmp2.i5 = load i32* @white_to_move		; <i32> [#uses=1]
-	%tmp3.i = icmp eq i32 %tmp2.i5, 1		; <i1> [#uses=0]
-	ret void
-
-in_check.exit:		; preds = %entry
-	%tmp7637 = icmp sgt i32 %tmp53, 0		; <i1> [#uses=1]
-	br i1 %tmp7637, label %cond_true77, label %bb80
-
-cond_true77:		; preds = %in_check.exit
-	%l.1.0 = bitcast i32 0 to i32		; <i32> [#uses=2]
-	call fastcc void @make( %struct.move_s* %tmp, i32 %l.1.0 )
-	%tmp61 = call fastcc i32 @check_legal( %struct.move_s* %tmp, i32 %l.1.0, i32 0 )		; <i32> [#uses=1]
-	%tmp62 = icmp eq i32 %tmp61, 0		; <i1> [#uses=0]
-	ret void
-
-bb80:		; preds = %in_check.exit
-	%tmp81 = load i32* @Variant		; <i32> [#uses=1]
-	%tmp82 = icmp eq i32 %tmp81, 4		; <i1> [#uses=1]
-	br i1 %tmp82, label %cond_true83, label %cond_next118
-
-cond_true83:		; preds = %bb80
-	%tmp84 = load i32* @legals		; <i32> [#uses=1]
-	%tmp85 = icmp eq i32 %tmp84, 0		; <i1> [#uses=0]
-	ret void
-
-cond_next118:		; preds = %bb80
-	%tmp119 = load i32* @Variant		; <i32> [#uses=1]
-	%tmp120 = icmp eq i32 %tmp119, 1		; <i1> [#uses=1]
-	br i1 %tmp120, label %cond_next176, label %cond_true121
-
-cond_true121:		; preds = %cond_next118
-	%tmp122.b = load i1* @is_pondering.b		; <i1> [#uses=1]
-	br i1 %tmp122.b, label %cond_next176, label %cond_true124
-
-cond_true124:		; preds = %cond_true121
-	%tmp125 = load i32* @legals		; <i32> [#uses=1]
-	%tmp126 = icmp eq i32 %tmp125, 1		; <i1> [#uses=1]
-	br i1 %tmp126, label %cond_true127, label %cond_next176
-
-cond_true127:		; preds = %cond_true124
-	%tmp128 = load i32* @inc		; <i32> [#uses=1]
-	%tmp129 = mul i32 %tmp128, 100		; <i32> [#uses=1]
-	%tmp130 = load i32* @time_cushion		; <i32> [#uses=1]
-	%tmp131 = add i32 %tmp129, %tmp130		; <i32> [#uses=1]
-	store i32 %tmp131, i32* @time_cushion
-	%tmp134 = getelementptr %struct.move_s* %agg.result, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp135 = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp136 = load i32* %tmp135		; <i32> [#uses=1]
-	store i32 %tmp136, i32* %tmp134
-	%tmp137 = getelementptr %struct.move_s* %agg.result, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp138 = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp139 = load i32* %tmp138		; <i32> [#uses=1]
-	store i32 %tmp139, i32* %tmp137
-	%tmp140 = getelementptr %struct.move_s* %agg.result, i32 0, i32 2		; <i32*> [#uses=1]
-	%tmp141 = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0, i32 2		; <i32*> [#uses=1]
-	%tmp142 = load i32* %tmp141		; <i32> [#uses=1]
-	store i32 %tmp142, i32* %tmp140
-	%tmp143 = getelementptr %struct.move_s* %agg.result, i32 0, i32 3		; <i32*> [#uses=1]
-	%tmp144 = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0, i32 3		; <i32*> [#uses=1]
-	%tmp145 = load i32* %tmp144		; <i32> [#uses=1]
-	store i32 %tmp145, i32* %tmp143
-	%tmp146 = getelementptr %struct.move_s* %agg.result, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp147 = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp148 = load i32* %tmp147		; <i32> [#uses=1]
-	store i32 %tmp148, i32* %tmp146
-	%tmp149 = getelementptr %struct.move_s* %agg.result, i32 0, i32 5		; <i32*> [#uses=1]
-	%tmp150 = getelementptr [512 x %struct.move_s]* %moves, i32 0, i32 0, i32 5		; <i32*> [#uses=1]
-	%tmp151 = load i32* %tmp150		; <i32> [#uses=1]
-	store i32 %tmp151, i32* %tmp149
-	ret void
-
-cond_next176:		; preds = %cond_true124, %cond_true121, %cond_next118
-	call fastcc void @check_phase( )
-	%tmp177 = load i32* @phase		; <i32> [#uses=1]
-	switch i32 %tmp177, label %bb187 [
-		 i32 0, label %bb178
-		 i32 1, label %bb180
-		 i32 2, label %bb183
-	]
-
-bb178:		; preds = %cond_next176
-	%tmp179 = call i32 (i8*, ...)* @printf( i8* getelementptr ([16 x i8]* @str40.upgrd.84, i32 0, i64 0) )		; <i32> [#uses=0]
-	%tmp18854.b = load i1* @is_pondering.b		; <i1> [#uses=1]
-	br i1 %tmp18854.b, label %cond_false210, label %cond_true190
-
-bb180:		; preds = %cond_next176
-	%tmp182 = call i32 (i8*, ...)* @printf( i8* getelementptr ([19 x i8]* @str.upgrd.85, i32 0, i64 0) )		; <i32> [#uses=0]
-	%tmp18856.b = load i1* @is_pondering.b		; <i1> [#uses=0]
-	ret void
-
-bb183:		; preds = %cond_next176
-	%tmp185 = call i32 (i8*, ...)* @printf( i8* getelementptr ([16 x i8]* @str1.upgrd.86, i32 0, i64 0) )		; <i32> [#uses=0]
-	%tmp18858.b = load i1* @is_pondering.b		; <i1> [#uses=0]
-	ret void
-
-bb187:		; preds = %cond_next176
-	%tmp188.b = load i1* @is_pondering.b		; <i1> [#uses=0]
-	ret void
-
-cond_true190:		; preds = %bb178
-	%tmp191 = load i32* @fixed_time		; <i32> [#uses=1]
-	%tmp192 = icmp eq i32 %tmp191, 0		; <i1> [#uses=0]
-	ret void
-
-cond_false210:		; preds = %bb178
-	store i32 999999, i32* @time_for_move
-	br i1 %tmp21362, label %cond_true226.critedge, label %bb287.critedge
-
-cond_true226.critedge:		; preds = %cond_false210
-	%tmp223.c = call i32 (i8*, ...)* @printf( i8* getelementptr ([20 x i8]* @str43, i32 0, i64 0), i32 999999 )		; <i32> [#uses=0]
-	%tmp.i.upgrd.179 = load %struct.TType** @DP_TTable		; <%struct.TType*> [#uses=1]
-	%tmp.i7.b = load i1* @TTSize.b		; <i1> [#uses=1]
-	%tmp1.i = select i1 %tmp.i7.b, i32 60000000, i32 0		; <i32> [#uses=1]
-	%tmp.i.sb = getelementptr %struct.TType* %tmp.i.upgrd.179, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %tmp.i.sb, i8 0, i32 %tmp1.i, i32 4 )
-	%tmp2.i = load %struct.TType** @AS_TTable		; <%struct.TType*> [#uses=1]
-	%tmp3.i8.b = load i1* @TTSize.b		; <i1> [#uses=1]
-	%tmp4.i = select i1 %tmp3.i8.b, i32 60000000, i32 0		; <i32> [#uses=1]
-	%tmp2.i.upgrd.180 = getelementptr %struct.TType* %tmp2.i, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %tmp2.i.upgrd.180, i8 0, i32 %tmp4.i, i32 4 )
-	%tmp.i.QTT = load %struct.QTType** @QS_TTable		; <%struct.QTType*> [#uses=1]
-	%tmp5.i9.b = load i1* @TTSize.b		; <i1> [#uses=1]
-	%tmp6.i10 = select i1 %tmp5.i9.b, i32 48000000, i32 0		; <i32> [#uses=1]
-	%tmp7.i = getelementptr %struct.QTType* %tmp.i.QTT, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %tmp7.i, i8 0, i32 %tmp6.i10, i32 4 )
-	%tmp.i.ECache = load %struct.ECacheType** @ECache		; <%struct.ECacheType*> [#uses=1]
-	%tmp.i14.b = load i1* @ECacheSize.b		; <i1> [#uses=1]
-	%tmp1.i16 = select i1 %tmp.i14.b, i32 12000000, i32 0		; <i32> [#uses=1]
-	%tmp.i17 = bitcast %struct.ECacheType* %tmp.i.ECache to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %tmp.i17, i8 0, i32 %tmp1.i16, i32 4 )
-	call void @llvm.memset.i32( i8* bitcast ([300 x i32]* @rootlosers to i8*), i8 0, i32 1200, i32 4 )
-	%tmp234.b = load i1* @is_pondering.b		; <i1> [#uses=1]
-	br i1 %tmp234.b, label %bb263, label %cond_next238
-
-cond_next238:		; preds = %cond_true226.critedge
-	%tmp239 = load i32* @Variant		; <i32> [#uses=2]
-	switch i32 %tmp239, label %bb263 [
-		 i32 3, label %bb249
-		 i32 4, label %bb249
-	]
-
-bb249:		; preds = %cond_next238, %cond_next238
-	%tmp250 = load i32* @piece_count		; <i32> [#uses=1]
-	%tmp251 = icmp sgt i32 %tmp250, 3		; <i1> [#uses=1]
-	%tmp240.not = icmp ne i32 %tmp239, 3		; <i1> [#uses=1]
-	%brmerge = or i1 %tmp251, %tmp240.not		; <i1> [#uses=1]
-	br i1 %brmerge, label %bb260, label %bb263
-
-bb260:		; preds = %bb249
-	%tmp261 = load i32* @time_for_move		; <i32> [#uses=1]
-	%tmp261.upgrd.181 = sitofp i32 %tmp261 to float		; <float> [#uses=1]
-	%tmp261.upgrd.182 = fpext float %tmp261.upgrd.181 to double		; <double> [#uses=1]
-	%tmp262 = fdiv double %tmp261.upgrd.182, 3.000000e+00		; <double> [#uses=1]
-	%tmp262.upgrd.183 = fptosi double %tmp262 to i32		; <i32> [#uses=1]
-	store i32 %tmp262.upgrd.183, i32* @pn_time
-	%tmp1.b.i = load i1* @PBSize.b		; <i1> [#uses=1]
-	%tmp1.i1 = select i1 %tmp1.b.i, i32 200000, i32 0		; <i32> [#uses=1]
-	%tmp.i2 = call i8* @calloc( i32 %tmp1.i1, i32 44 )		; <i8*> [#uses=1]
-	%tmp.i.ub = bitcast i8* %tmp.i2 to i8*		; <i8*> [#uses=1]
-	store i8* %tmp.i.ub, i8** @membuff
-	%tmp2.i3 = call i8* @calloc( i32 1, i32 44 )		; <i8*> [#uses=3]
-	%tmp2.i.upgrd.184 = bitcast i8* %tmp2.i3 to %struct.node_t*		; <%struct.node_t*> [#uses=6]
-	%tmp.i.move_s = getelementptr [512 x %struct.move_s]* null, i32 0, i32 0		; <%struct.move_s*> [#uses=3]
-	call fastcc void @gen( %struct.move_s* %tmp.i.move_s )
-	%tmp3.i4 = load i32* @numb_moves		; <i32> [#uses=4]
-	%tmp3.i5 = bitcast i32 %tmp3.i4 to i32		; <i32> [#uses=0]
-	store i1 false, i1* @alllosers.b
-	call void @llvm.memset.i32( i8* bitcast ([300 x i32]* @rootlosers to i8*), i8 0, i32 1200, i32 4 )
-	%nodesspent.i = bitcast [512 x i32]* null to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %nodesspent.i, i8 0, i32 2048, i32 16 )
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 0)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 1)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 2)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 3)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 4)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 5)
-	%tmp.i.i = load i32* @Variant		; <i32> [#uses=1]
-	%tmp.i.i.upgrd.185 = icmp eq i32 %tmp.i.i, 3		; <i1> [#uses=1]
-	br i1 %tmp.i.i.upgrd.185, label %in_check.exit.i, label %cond_next.i.i
-
-cond_next.i.i:		; preds = %bb260
-	%tmp2.i.i = load i32* @white_to_move		; <i32> [#uses=1]
-	%tmp3.i.i = icmp eq i32 %tmp2.i.i, 1		; <i1> [#uses=1]
-	br i1 %tmp3.i.i, label %cond_true4.i.i, label %cond_false12.i.i
-
-cond_true4.i.i:		; preds = %cond_next.i.i
-	%tmp5.i.i = load i32* @wking_loc		; <i32> [#uses=1]
-	%tmp6.i.i = call fastcc i32 @is_attacked( i32 %tmp5.i.i, i32 0 )		; <i32> [#uses=1]
-	%not.tmp7.i.i = icmp ne i32 %tmp6.i.i, 0		; <i1> [#uses=1]
-	%tmp217.i = zext i1 %not.tmp7.i.i to i32		; <i32> [#uses=1]
-	%tmp4219.i = icmp sgt i32 %tmp3.i4, 0		; <i1> [#uses=1]
-	br i1 %tmp4219.i, label %cond_true43.i, label %bb46.i
-
-cond_false12.i.i:		; preds = %cond_next.i.i
-	%tmp13.i.i = load i32* @bking_loc		; <i32> [#uses=1]
-	%tmp14.i.i = call fastcc i32 @is_attacked( i32 %tmp13.i.i, i32 1 )		; <i32> [#uses=1]
-	%not.tmp15.i.i = icmp ne i32 %tmp14.i.i, 0		; <i1> [#uses=1]
-	%tmp2120.i = zext i1 %not.tmp15.i.i to i32		; <i32> [#uses=1]
-	%tmp4222.i = icmp sgt i32 %tmp3.i4, 0		; <i1> [#uses=1]
-	br i1 %tmp4222.i, label %cond_true43.i, label %bb46.i
-
-in_check.exit.i:		; preds = %bb260
-	%tmp4224.i = icmp sgt i32 %tmp3.i4, 0		; <i1> [#uses=0]
-	ret void
-
-cond_true43.i:		; preds = %cond_false12.i.i, %cond_true4.i.i
-	%tmp21.0.ph.i = phi i32 [ %tmp217.i, %cond_true4.i.i ], [ %tmp2120.i, %cond_false12.i.i ]		; <i32> [#uses=1]
-	%i.0.0.i = bitcast i32 0 to i32		; <i32> [#uses=2]
-	call fastcc void @make( %struct.move_s* %tmp.i.move_s, i32 %i.0.0.i )
-	%tmp27.i = call fastcc i32 @check_legal( %struct.move_s* %tmp.i.move_s, i32 %i.0.0.i, i32 %tmp21.0.ph.i )		; <i32> [#uses=1]
-	%tmp.i6 = icmp eq i32 %tmp27.i, 0		; <i1> [#uses=0]
-	ret void
-
-bb46.i:		; preds = %cond_false12.i.i, %cond_true4.i.i
-	%tmp48.i = icmp eq i32 0, 0		; <i1> [#uses=1]
-	br i1 %tmp48.i, label %cond_true49.i, label %cond_next53.i
-
-cond_true49.i:		; preds = %bb46.i
-	store i32 0, i32* @bufftop
-	%tmp50.i = load i8** @membuff		; <i8*> [#uses=1]
-	free i8* %tmp50.i
-	free i8* %tmp2.i3
-	ret void
-
-cond_next53.i:		; preds = %bb46.i
-	store i32 1, i32* @nodecount
-	store i32 0, i32* @iters
-	store i32 0, i32* @maxply
-	store i32 0, i32* @forwards
-	%tmp54.i = load i32* @move_number		; <i32> [#uses=1]
-	%tmp55.i = load i32* @ply		; <i32> [#uses=1]
-	%tmp56.i = add i32 %tmp54.i, -1		; <i32> [#uses=1]
-	%tmp57.i = add i32 %tmp56.i, %tmp55.i		; <i32> [#uses=1]
-	%tmp58.i = load i32* @hash		; <i32> [#uses=1]
-	%tmp.i.upgrd.186 = getelementptr [600 x i32]* @hash_history, i32 0, i32 %tmp57.i		; <i32*> [#uses=1]
-	store i32 %tmp58.i, i32* %tmp.i.upgrd.186
-	%tmp59.i = load i32* @white_to_move		; <i32> [#uses=1]
-	%tmp60.i = icmp eq i32 %tmp59.i, 0		; <i1> [#uses=1]
-	%tmp60.i.upgrd.187 = zext i1 %tmp60.i to i32		; <i32> [#uses=1]
-	store i32 %tmp60.i.upgrd.187, i32* @root_to_move
-	%tmp.i4.i = load i32* @Variant		; <i32> [#uses=2]
-	%tmp.i5.i = icmp eq i32 %tmp.i4.i, 3		; <i1> [#uses=1]
-	br i1 %tmp.i5.i, label %cond_true.i.i, label %cond_false.i.i
-
-cond_true.i.i:		; preds = %cond_next53.i
-	call fastcc void @suicide_pn_eval( %struct.node_t* %tmp2.i.upgrd.184 )
-	%tmp6328.i = getelementptr %struct.node_t* %tmp2.i.upgrd.184, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp29.i = load i8* %tmp6328.i		; <i8> [#uses=1]
-	%tmp6430.i = icmp eq i8 %tmp29.i, 1		; <i1> [#uses=0]
-	ret void
-
-cond_false.i.i:		; preds = %cond_next53.i
-	%tmp2.i.i.upgrd.188 = icmp eq i32 %tmp.i4.i, 4		; <i1> [#uses=1]
-	%tmp63.i = getelementptr %struct.node_t* %tmp2.i.upgrd.184, i32 0, i32 0		; <i8*> [#uses=2]
-	br i1 %tmp2.i.i.upgrd.188, label %cond_true3.i.i, label %cond_false5.i.i
-
-cond_true3.i.i:		; preds = %cond_false.i.i
-	call fastcc void @losers_pn_eval( %struct.node_t* %tmp2.i.upgrd.184 )
-	%tmp31.i = load i8* %tmp63.i		; <i8> [#uses=1]
-	%tmp6432.i = icmp eq i8 %tmp31.i, 1		; <i1> [#uses=1]
-	br i1 %tmp6432.i, label %bb75.i, label %cond_next67.i
-
-cond_false5.i.i:		; preds = %cond_false.i.i
-	call fastcc void @std_pn_eval( %struct.node_t* %tmp2.i.upgrd.184 )
-	%tmp.i.upgrd.189 = load i8* %tmp63.i		; <i8> [#uses=1]
-	%tmp64.i = icmp eq i8 %tmp.i.upgrd.189, 1		; <i1> [#uses=0]
-	ret void
-
-cond_next67.i:		; preds = %cond_true3.i.i
-	%tmp69.i = getelementptr %struct.node_t* %tmp2.i.upgrd.184, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp70.i = load i8* %tmp69.i		; <i8> [#uses=1]
-	%tmp71.i = icmp eq i8 %tmp70.i, 0		; <i1> [#uses=0]
-	ret void
-
-bb75.i:		; preds = %cond_true3.i.i
-	store i32 0, i32* @bufftop
-	%tmp76.i = load i8** @membuff		; <i8*> [#uses=1]
-	free i8* %tmp76.i
-	free i8* %tmp2.i3
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 0)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 1)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 2)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 3)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 4)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 5)
-	%tmp28869 = load i32* @result		; <i32> [#uses=1]
-	%tmp28970 = icmp eq i32 %tmp28869, 0		; <i1> [#uses=1]
-	br i1 %tmp28970, label %cond_next337, label %cond_true290
-
-bb263:		; preds = %bb249, %cond_next238, %cond_true226.critedge
-	br i1 %tmp21362, label %cond_true266, label %bb287
-
-cond_true266:		; preds = %bb263
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 0)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 1)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 2)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 3)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 4)
-	store i32 0, i32* getelementptr (%struct.move_s* @pn_move, i64 0, i32 5)
-	%tmp28871 = load i32* @result		; <i32> [#uses=1]
-	%tmp28972 = icmp eq i32 %tmp28871, 0		; <i1> [#uses=0]
-	ret void
-
-bb287.critedge:		; preds = %cond_false210
-	%tmp218.c = fdiv double 1.999998e+06, %tmp217		; <double> [#uses=1]
-	%tmp218.c.upgrd.190 = fptosi double %tmp218.c to i32		; <i32> [#uses=2]
-	store i32 %tmp218.c.upgrd.190, i32* @time_for_move
-	%tmp22367.c = call i32 (i8*, ...)* @printf( i8* getelementptr ([20 x i8]* @str43, i32 0, i64 0), i32 %tmp218.c.upgrd.190 )		; <i32> [#uses=0]
-	ret void
-
-bb287:		; preds = %bb263
-	%tmp288 = load i32* @result		; <i32> [#uses=1]
-	%tmp289 = icmp eq i32 %tmp288, 0		; <i1> [#uses=0]
-	ret void
-
-cond_true290:		; preds = %bb75.i
-	%tmp292 = load i32* getelementptr (%struct.move_s* @pn_move, i32 0, i32 1)		; <i32> [#uses=1]
-	%tmp295 = icmp eq i32 %tmp292, 0		; <i1> [#uses=0]
-	ret void
-
-cond_next337:		; preds = %bb75.i
-	%tmp338.b = load i1* @forcedwin.b		; <i1> [#uses=1]
-	br i1 %tmp338.b, label %bb348, label %cond_next342
-
-cond_next342:		; preds = %cond_next337
-	%tmp343 = load i32* @result		; <i32> [#uses=1]
-	%tmp344 = icmp eq i32 %tmp343, 0		; <i1> [#uses=0]
-	ret void
-
-bb348:		; preds = %cond_next337
-	%tmp350 = load i32* getelementptr (%struct.move_s* @pn_move, i32 0, i32 1)		; <i32> [#uses=1]
-	%tmp353 = icmp eq i32 %tmp350, 0		; <i1> [#uses=0]
-	ret void
-}
-
-declare fastcc i32 @eval(i32, i32)
-
-declare i8* @fgets(i8*, i32, %struct.FILE*)
-
-declare i32 @fclose(%struct.FILE*)
-
-declare fastcc i32 @losers_eval()
-
-declare fastcc i32 @l_bishop_mobility(i32)
-
-declare fastcc i32 @l_rook_mobility(i32)
-
-declare fastcc i32 @check_legal(%struct.move_s*, i32, i32)
-
-declare fastcc void @gen(%struct.move_s*)
-
-declare fastcc void @push_pawn(i32, i32)
-
-declare fastcc void @push_knighT(i32)
-
-declare fastcc void @push_slidE(i32)
-
-declare fastcc void @push_king(i32)
-
-declare fastcc i32 @f_in_check(%struct.move_s*, i32)
-
-declare fastcc void @make(%struct.move_s*, i32)
-
-declare fastcc void @add_capture(i32, i32, i32)
-
-declare fastcc void @unmake(%struct.move_s*, i32)
-
-declare i32 @ErrorIt(i32, i32)
-
-declare i32 @Pawn(i32, i32)
-
-declare i32 @Knight(i32, i32)
-
-declare i32 @King(i32, i32)
-
-declare i32 @Rook(i32, i32)
-
-declare i32 @Queen(i32, i32)
-
-declare i32 @Bishop(i32, i32)
-
-declare fastcc void @check_phase()
-
-declare fastcc i32 @bishop_mobility(i32)
-
-declare fastcc i32 @rook_mobility(i32)
-
-declare i32 @sscanf(i8*, i8*, ...)
-
-declare i32 @strncmp(i8*, i8*, i32)
-
-declare i8* @strchr(i8*, i32)
-
-declare fastcc void @CheckBadFlow(i32)
-
-declare fastcc void @suicide_pn_eval(%struct.node_t*)
-
-declare fastcc void @losers_pn_eval(%struct.node_t*)
-
-declare fastcc void @std_pn_eval(%struct.node_t*)
-
-declare fastcc %struct.node_t* @select_most_proving(%struct.node_t*)
-
-declare fastcc void @set_proof_and_disproof_numbers(%struct.node_t*)
-
-declare fastcc void @StoreTT(i32, i32, i32, i32, i32, i32)
-
-declare fastcc void @develop_node(%struct.node_t*)
-
-declare fastcc void @update_ancestors(%struct.node_t*)
-
-declare i8* @calloc(i32, i32)
-
-declare fastcc void @comp_to_coord(i64, i64, i64, i8*)
-
-declare i8* @strcat(i8*, i8*)
-
-declare i32 @sprintf(i8*, i8*, ...)
-
-declare fastcc void @order_moves(%struct.move_s*, i32*, i32*, i32, i32)
-
-declare fastcc i32 @see(i32, i32, i32)
-
-declare fastcc void @perft(i32)
-
-declare fastcc i32 @qsearch(i32, i32, i32)
-
-declare fastcc i32 @allocate_time()
-
-declare fastcc void @QStoreTT(i32, i32, i32, i32)
-
-declare fastcc i32 @search(i32, i32, i32, i32)
-
-declare fastcc i32 @ProbeTT(i32*, i32, i32*, i32*, i32*, i32)
-
-declare void @search_root(%struct.move_s* sret , i32, i32, i32)
-
-declare fastcc void @post_fh_thinking(i32, %struct.move_s*)
-
-declare fastcc void @post_thinking(i32)
-
-declare i32 @fprintf(%struct.FILE*, i8*, ...)
-
-declare fastcc i32 @s_bishop_mobility(i32)
-
-declare fastcc i32 @s_rook_mobility(i32)
-
-declare fastcc i32 @suicide_mid_eval()
-
-declare i32 @main(i32, i8**)
-
-declare fastcc void @init_game()
-
-declare void @setbuf(%struct.FILE*, i8*)
-
-declare i8* @strcpy(i8*, i8*)
-
-declare i32 @__tolower(i32)
-
-declare i32 @strcmp(i8*, i8*)
-
-declare void (i32)* @signal(i32, void (i32)*)
-
-declare fastcc void @hash_extract_pv(i32, i8*)
-
-declare double @difftime(i32, i32)
-
-declare i32 @getc(%struct.FILE*)
-
-declare i32 @strlen(i8*)
-
-declare i32 @fwrite(i8*, i32, i32, %struct.FILE*)
diff --git a/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll b/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
deleted file mode 100644
index 9fcbf078c8b4..000000000000
--- a/test/Transforms/DeadStoreElimination/2008-07-28-load-store.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -dse -S | not grep tmp5
-; PR2599
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-define void @foo({ i32, i32 }* %x) nounwind  {
-entry:
-	%tmp4 = getelementptr { i32, i32 }* %x, i32 0, i32 0		; <i32*> [#uses=2]
-	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
-	%tmp7 = getelementptr { i32, i32 }* %x, i32 0, i32 1		; <i32*> [#uses=2]
-	%tmp8 = load i32* %tmp7, align 4		; <i32> [#uses=1]
-	%tmp17 = sub i32 0, %tmp8		; <i32> [#uses=1]
-	store i32 %tmp5, i32* %tmp4, align 4
-	store i32 %tmp17, i32* %tmp7, align 4
-	ret void
-}
diff --git a/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll b/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll
deleted file mode 100644
index 5958c6c50800..000000000000
--- a/test/Transforms/DeadStoreElimination/2008-11-28-MemDepUpdate.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -dse | llvm-dis
-; PR3141
-	%struct.ada__tags__dispatch_table = type { [1 x i32] }
-	%struct.f393a00_1__object = type { %struct.ada__tags__dispatch_table*, i8 }
-	%struct.f393a00_2__windmill = type { %struct.f393a00_1__object, i16 }
-
-define void @f393a00_2__swap(%struct.f393a00_2__windmill* %a, %struct.f393a00_2__windmill* %b) {
-entry:
-	%t = alloca %struct.f393a00_2__windmill		; <%struct.f393a00_2__windmill*> [#uses=1]
-	%0 = getelementptr %struct.f393a00_2__windmill* %t, i32 0, i32 0, i32 0		; <%struct.ada__tags__dispatch_table**> [#uses=1]
-	%1 = load %struct.ada__tags__dispatch_table** null, align 4		; <%struct.ada__tags__dispatch_table*> [#uses=1]
-	%2 = load %struct.ada__tags__dispatch_table** %0, align 8		; <%struct.ada__tags__dispatch_table*> [#uses=1]
-	store %struct.ada__tags__dispatch_table* %2, %struct.ada__tags__dispatch_table** null, align 4
-	store %struct.ada__tags__dispatch_table* %1, %struct.ada__tags__dispatch_table** null, align 4
-	ret void
-}
diff --git a/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll b/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll
deleted file mode 100644
index c320a3eb6f7a..000000000000
--- a/test/Transforms/DeadStoreElimination/2008-11-29-OffEndOfBlock.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -dse | llvm-dis
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
-	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
-	%struct.cab_file = type { i32, i16, i64, i8*, i32, i32, i32, %struct.cab_folder*, %struct.cab_file*, %struct.cab_archive*, %struct.cab_state* }
-	%struct.cab_folder = type { i16, i16, %struct.cab_archive*, i64, %struct.cab_folder* }
-	%struct.cab_state = type { i8*, i8*, [38912 x i8], i16, i16, i8*, i16 }
-	%struct.lzx_stream = type { i32, i32, i8, i64, i64, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i8, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, [84 x i8], [720 x i8], [314 x i8], [72 x i8], [104 x i16], [5408 x i16], [4596 x i16], [144 x i16], [51 x i32], [51 x i8], [32768 x i8], %struct.cab_file*, i32 (%struct.cab_file*, i8*, i32)* }
-
-declare fastcc i32 @lzx_read_lens(%struct.lzx_stream*, i8*, i32, i32) nounwind
-
-define i32 @lzx_decompress(%struct.lzx_stream* %lzx, i64 %out_bytes) nounwind {
-bb13:		; preds = %entry
-	%0 = getelementptr %struct.lzx_stream* %lzx, i32 0, i32 25		; <i8**> [#uses=2]
-	%1 = getelementptr %struct.lzx_stream* %lzx, i32 0, i32 26		; <i8**> [#uses=2]
-	%2 = getelementptr %struct.lzx_stream* %lzx, i32 0, i32 29		; <i32*> [#uses=0]
-	br label %bb14
-
-bb14:		; preds = %bb13
-	%3 = load i8** %0, align 4		; <i8*> [#uses=1]
-	%4 = load i8** %1, align 4		; <i8*> [#uses=1]
-	store i8* %3, i8** %0, align 4
-	store i8* %4, i8** %1, align 4
-	%5 = call fastcc i32 @lzx_read_lens(%struct.lzx_stream* %lzx, i8* null, i32 256, i32 0) nounwind		; <i32> [#uses=0]
-	unreachable
-}
diff --git a/test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll b/test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll
deleted file mode 100644
index 9a943b476029..000000000000
--- a/test/Transforms/DeadStoreElimination/2009-11-10-Trampoline.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt -S -dse < %s | FileCheck %s
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-declare void @f()
-
-define void @unused_trampoline() {
-; CHECK: @unused_trampoline
-	%storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-; CHECK-NOT: alloca
-	%cast = getelementptr [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @f to i8*), i8* null )		; <i8*> [#uses=1]
-; CHECK-NOT: trampoline
-	ret void
-; CHECK: ret void
-}
diff --git a/test/Transforms/DeadStoreElimination/PartialStore.ll b/test/Transforms/DeadStoreElimination/PartialStore.ll
index ab1edf5b4731..7ac1e0844ed4 100644
--- a/test/Transforms/DeadStoreElimination/PartialStore.ll
+++ b/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -1,14 +1,71 @@
-; RUN: opt < %s -dse -S | \
-; RUN:    not grep {store i8}
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
 ; Ensure that the dead store is deleted in this case.  It is wholely
 ; overwritten by the second store.
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-define i32 @test() {
-        %V = alloca i32         ; <i32*> [#uses=3]
+define void @test1(i32 *%V) {
         %V2 = bitcast i32* %V to i8*            ; <i8*> [#uses=1]
         store i8 0, i8* %V2
         store i32 1234567, i32* %V
-        %X = load i32* %V               ; <i32> [#uses=1]
-        ret i32 %X
+        ret void
+; CHECK: @test1
+; CHECK-NEXT: store i32 1234567
+}
+
+; Note that we could do better by merging the two stores into one.
+define void @test2(i32* %P) {
+; CHECK: @test2
+  store i32 0, i32* %P
+; CHECK: store i32
+  %Q = bitcast i32* %P to i16*
+  store i16 1, i16* %Q
+; CHECK: store i16
+  ret void
+}
+
+
+define i32 @test3(double %__x) {
+; CHECK: @test3
+; CHECK: store double
+  %__u = alloca { [3 x i32] }
+  %tmp.1 = bitcast { [3 x i32] }* %__u to double*
+  store double %__x, double* %tmp.1
+  %tmp.4 = getelementptr { [3 x i32] }* %__u, i32 0, i32 0, i32 1
+  %tmp.5 = load i32* %tmp.4
+  %tmp.6 = icmp slt i32 %tmp.5, 0
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+}
+
+; PR6043
+define void @test4(i8* %P) {
+; CHECK: @test4
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: store double
+
+  store i8 19, i8* %P  ;; dead
+  %A = getelementptr i8* %P, i32 3
+  
+  store i8 42, i8* %A  ;; dead
+  
+  %Q = bitcast i8* %P to double*
+  store double 0.0, double* %Q
+  ret void
 }
 
+; PR8657
+declare void @test5a(i32*)
+define void @test5(i32 %i) nounwind ssp {
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  %C = getelementptr i8* %B, i32 %i
+  store i8 10, i8* %C        ;; Dead store to variable index.
+  store i32 20, i32* %A
+  
+  call void @test5a(i32* %A)
+  ret void
+; CHECK: @test5(
+; CHECK-NEXT: alloca
+; CHECK-NEXT: store i32 20
+; CHECK-NEXT: call void @test5a
+}
diff --git a/test/Transforms/DeadStoreElimination/alloca.ll b/test/Transforms/DeadStoreElimination/alloca.ll
deleted file mode 100644
index b6818eb3824f..000000000000
--- a/test/Transforms/DeadStoreElimination/alloca.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: opt < %s -dse -S | not grep DEAD
-
-define void @test(i32* %Q) {
-        %P = alloca i32         ; <i32*> [#uses=1]
-        %DEAD = load i32* %Q            ; <i32> [#uses=1]
-        store i32 %DEAD, i32* %P
-        ret void
-}
-
diff --git a/test/Transforms/DeadStoreElimination/byval.ll b/test/Transforms/DeadStoreElimination/byval.ll
deleted file mode 100644
index fa651b1ee382..000000000000
--- a/test/Transforms/DeadStoreElimination/byval.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s -dse -S | not grep store
-
-%struct.x = type { i32, i32, i32, i32 }
-
-define i32 @foo(%struct.x* byval  %a) nounwind  {
-entry:
-	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0
-	store i32 1, i32* %tmp2, align 4
-	ret i32 1
-}
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index 728a118944d6..7d57804631da 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -dse -S | FileCheck %s
+; RUN: opt %s -basicaa -dse -S | FileCheck %s
 
 %t = type { i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/context-sensitive.ll b/test/Transforms/DeadStoreElimination/context-sensitive.ll
deleted file mode 100644
index 7954310f56bd..000000000000
--- a/test/Transforms/DeadStoreElimination/context-sensitive.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -dse -S | not grep DEAD
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-declare void @ext()
-
-define i32* @caller() {
-        %P = malloc i32         ; <i32*> [#uses=4]
-        %DEAD = load i32* %P            ; <i32> [#uses=1]
-        %DEAD2 = add i32 %DEAD, 1               ; <i32> [#uses=1]
-        store i32 %DEAD2, i32* %P
-        call void @ext( )
-        store i32 0, i32* %P
-        ret i32* %P
-}
-
diff --git a/test/Transforms/DeadStoreElimination/crash.ll b/test/Transforms/DeadStoreElimination/crash.ll
index 5aac877a9ecf..bb279cdb97f0 100644
--- a/test/Transforms/DeadStoreElimination/crash.ll
+++ b/test/Transforms/DeadStoreElimination/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -dse -S
+; RUN: opt < %s -basicaa -dse -S
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
@@ -55,3 +55,20 @@ dead:
   store i32 4, i32* %Q2
   br label %dead
 }
+
+
+; PR3141
+%struct.ada__tags__dispatch_table = type { [1 x i32] }
+%struct.f393a00_1__object = type { %struct.ada__tags__dispatch_table*, i8 }
+%struct.f393a00_2__windmill = type { %struct.f393a00_1__object, i16 }
+
+define void @test4(%struct.f393a00_2__windmill* %a, %struct.f393a00_2__windmill* %b) {
+entry:
+	%t = alloca %struct.f393a00_2__windmill		; <%struct.f393a00_2__windmill*> [#uses=1]
+	%0 = getelementptr %struct.f393a00_2__windmill* %t, i32 0, i32 0, i32 0		; <%struct.ada__tags__dispatch_table**> [#uses=1]
+	%1 = load %struct.ada__tags__dispatch_table** null, align 4		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	%2 = load %struct.ada__tags__dispatch_table** %0, align 8		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	store %struct.ada__tags__dispatch_table* %2, %struct.ada__tags__dispatch_table** null, align 4
+	store %struct.ada__tags__dispatch_table* %1, %struct.ada__tags__dispatch_table** null, align 4
+	ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index 8b81ee353033..3c980ccac6b4 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -1,5 +1,11 @@
-; RUN: opt < %s -dse -S | not grep DEAD
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64"
+
+; CHECK: @test
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @free
+; CHECK-NEXT: ret void
 define void @test(i32* %Q, i32* %P) {
         %DEAD = load i32* %Q            ; <i32> [#uses=1]
         store i32 %DEAD, i32* %P
@@ -7,9 +13,28 @@ define void @test(i32* %Q, i32* %P) {
         ret void
 }
 
+; CHECK: @test2
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @free
+; CHECK-NEXT: ret void
 define void @test2({i32, i32}* %P) {
 	%Q = getelementptr {i32, i32} *%P, i32 0, i32 1
 	store i32 4, i32* %Q
 	free {i32,i32}* %P
 	ret void
 }
+
+; CHECK: @test4
+; CHECK-NOT: store
+; CHECK: ret void
+define void @test4() {
+  %m = call i8* @malloc(i64 24)
+  store i8 0, i8* %m
+  %m1 = getelementptr i8* %m, i64 1
+  store i8 1, i8* %m1
+  call void @free(i8* %m)
+  ret void
+}
+
+declare void @free(i8*)
+declare i8* @malloc(i64)
diff --git a/test/Transforms/DeadStoreElimination/lifetime.ll b/test/Transforms/DeadStoreElimination/lifetime.ll
index fd127d9f51bc..2b5cc5aedb7f 100644
--- a/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dse < %s | FileCheck %s
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
diff --git a/test/Transforms/DeadStoreElimination/memcpy.ll b/test/Transforms/DeadStoreElimination/memcpy.ll
deleted file mode 100644
index 8d996318ba8c..000000000000
--- a/test/Transforms/DeadStoreElimination/memcpy.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: opt < %s -dse -S | not grep alloca
-; ModuleID = 'placeholder.adb'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-	%struct.placeholder__T5b = type { i32, [1 x i32] }
-	%struct.placeholder__an_interval___PAD = type { %struct.placeholder__interval, [4 x i32] }
-	%struct.placeholder__interval = type { i32, i32 }
-	%struct.placeholder__s__s__the_interval___PAD = type { %struct.placeholder__interval }
-
-define void @_ada_placeholder() nounwind  {
-entry:
-	%an_interval = alloca %struct.placeholder__an_interval___PAD		; <%struct.placeholder__an_interval___PAD*> [#uses=3]
-	%tmp34 = bitcast %struct.placeholder__an_interval___PAD* %an_interval to %struct.placeholder__T5b*		; <%struct.placeholder__T5b*> [#uses=1]
-	%tmp5 = getelementptr %struct.placeholder__an_interval___PAD* %an_interval, i32 0, i32 0, i32 0		; <i32*> [#uses=2]
-	store i32 1, i32* %tmp5, align 8
-	%tmp10 = getelementptr %struct.placeholder__T5b* %tmp34, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp10, align 4
-	%tmp82 = load i32* %tmp5, align 8		; <i32> [#uses=5]
-	%tmp83 = icmp slt i32 %tmp82, 6		; <i1> [#uses=1]
-	%min84 = select i1 %tmp83, i32 %tmp82, i32 5		; <i32> [#uses=3]
-	%tmp85 = icmp sgt i32 %min84, -1		; <i1> [#uses=2]
-	%min84.cast193 = zext i32 %min84 to i64		; <i64> [#uses=1]
-	%min84.cast193.op = shl i64 %min84.cast193, 33		; <i64> [#uses=1]
-	%tmp104 = icmp sgt i32 %tmp82, -1		; <i1> [#uses=2]
-	%tmp103.cast192 = zext i32 %tmp82 to i64		; <i64> [#uses=1]
-	%tmp103.cast192.op = shl i64 %tmp103.cast192, 33		; <i64> [#uses=1]
-	%min84.cast193.op.op = ashr i64 %min84.cast193.op, 28		; <i64> [#uses=1]
-	%sextr121 = select i1 %tmp85, i64 %min84.cast193.op.op, i64 0		; <i64> [#uses=2]
-	%tmp103.cast192.op.op = ashr i64 %tmp103.cast192.op, 28		; <i64> [#uses=1]
-	%sextr123 = select i1 %tmp104, i64 %tmp103.cast192.op.op, i64 0		; <i64> [#uses=2]
-	%tmp124 = icmp sle i64 %sextr121, %sextr123		; <i1> [#uses=1]
-	%min125 = select i1 %tmp124, i64 %sextr121, i64 %sextr123		; <i64> [#uses=1]
-	%sextr131194 = and i64 %min125, 34359738336		; <i64> [#uses=1]
-	%tmp134 = add i64 %sextr131194, 63		; <i64> [#uses=1]
-	lshr i64 %tmp134, 3		; <i64>:0 [#uses=1]
-	%tmp150188.shrunk = trunc i64 %0 to i32		; <i32> [#uses=1]
-	%tmp159 = and i32 %tmp150188.shrunk, -4		; <i32> [#uses=1]
-	%tmp161 = alloca i8, i32 %tmp159		; <i8*> [#uses=1]
-	%min167.op = shl i32 %min84, 2		; <i32> [#uses=1]
-	%tmp170 = select i1 %tmp85, i32 %min167.op, i32 0		; <i32> [#uses=2]
-	%tmp173.op = shl i32 %tmp82, 2		; <i32> [#uses=1]
-	%tmp176 = select i1 %tmp104, i32 %tmp173.op, i32 0		; <i32> [#uses=2]
-	%tmp177 = icmp sle i32 %tmp170, %tmp176		; <i1> [#uses=1]
-	%min178 = select i1 %tmp177, i32 %tmp170, i32 %tmp176		; <i32> [#uses=1]
-	%tmp179 = add i32 %min178, 7		; <i32> [#uses=1]
-	%tmp180 = and i32 %tmp179, -4		; <i32> [#uses=1]
-	%tmp183185 = bitcast %struct.placeholder__an_interval___PAD* %an_interval to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp161, i8* %tmp183185, i32 %tmp180, i32 4 )
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index 7e8f52a085bf..6c7f940316a0 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -dse -S | FileCheck %s
+; RUN: opt %s -basicaa -dse -S | FileCheck %s
 
 declare void @test1f()
 
diff --git a/test/Transforms/DeadStoreElimination/partial-overwrite.ll b/test/Transforms/DeadStoreElimination/partial-overwrite.ll
deleted file mode 100644
index 048d4645fcc5..000000000000
--- a/test/Transforms/DeadStoreElimination/partial-overwrite.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt -dse -S %s | FileCheck %s
-; Note that we could do better by merging the two stores into one.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @test(i32* %P) {
-  store i32 0, i32* %P
-; CHECK: store i32
-  %Q = bitcast i32* %P to i16*
-  store i16 1, i16* %Q
-; CHECK: store i16
-  ret void
-}
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index d8596401b30c..a61eac9729e8 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -1,10 +1,238 @@
-; RUN: opt < %s -dse -S | not grep DEAD
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-define void @test(i32* %Q, i32* %P) {
-        %DEAD = load i32* %Q            ; <i32> [#uses=1]
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
+
+define void @test1(i32* %Q, i32* %P) {
+        %DEAD = load i32* %Q
         store i32 %DEAD, i32* %P
         store i32 0, i32* %P
         ret void
+; CHECK: @test1
+; CHECK-NEXT: store i32 0, i32* %P
+; CHECK-NEXT: ret void
+}
+
+; PR8576 - Should delete store of 10 even though p/q are may aliases.
+define void @test2(i32 *%p, i32 *%q) {
+  store i32 10, i32* %p, align 4
+  store i32 20, i32* %q, align 4
+  store i32 30, i32* %p, align 4
+  ret void
+; CHECK: @test2
+; CHECK-NEXT: store i32 20
+}
+
+
+; PR8677
+@g = global i32 1
+
+define i32 @test3(i32* %g_addr) nounwind {
+; CHECK: @test3
+; CHECK: load i32* %g_addr
+  %g_value = load i32* %g_addr, align 4
+  store i32 -1, i32* @g, align 4
+  store i32 %g_value, i32* %g_addr, align 4
+  %tmp3 = load i32* @g, align 4
+  ret i32 %tmp3
+}
+
+
+define void @test4(i32* %Q) {
+        %a = load i32* %Q
+        volatile store i32 %a, i32* %Q
+        ret void
+; CHECK: @test4
+; CHECK-NEXT: load i32
+; CHECK-NEXT: volatile store
+; CHECK-NEXT: ret void
+}
+
+define void @test5(i32* %Q) {
+        %a = volatile load i32* %Q
+        store i32 %a, i32* %Q
+        ret void
+; CHECK: @test5
+; CHECK-NEXT: volatile load
+; CHECK-NEXT: ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6(i32 *%p, i8 *%q) {
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memset.i64(i8* %q, i8 42, i64 900, i32 1)
+  store i32 30, i32* %p, align 4
+  ret void
+; CHECK: @test6
+; CHECK-NEXT: call void @llvm.memset
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memcpy.i64(i8* %q, i8* %r, i64 900, i32 1)
+  store i32 30, i32* %p, align 4
+  ret void
+; CHECK: @test7
+; CHECK-NEXT: call void @llvm.memcpy
+}
+
+; Do not delete stores that are only partially killed.
+define i32 @test8() {
+        %V = alloca i32
+        store i32 1234567, i32* %V
+        %V2 = bitcast i32* %V to i8*
+        store i8 0, i8* %V2
+        %X = load i32* %V
+        ret i32 %X
+        
+; CHECK: @test8
+; CHECK: store i32 1234567
+}
+
+
+; Test for byval handling.
+%struct.x = type { i32, i32, i32, i32 }
+define void @test9(%struct.x* byval  %a) nounwind  {
+	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0
+	store i32 1, i32* %tmp2, align 4
+	ret void
+; CHECK: @test9
+; CHECK-NEXT: ret void
+}
+
+; va_arg has fuzzy dependence, the store shouldn't be zapped.
+define double @test10(i8* %X) {
+        %X_addr = alloca i8*
+        store i8* %X, i8** %X_addr
+        %tmp.0 = va_arg i8** %X_addr, double
+        ret double %tmp.0
+; CHECK: @test10
+; CHECK: store
+}
+
+
+; DSE should delete the dead trampoline.
+declare void @test11f()
+define void @test11() {
+; CHECK: @test11
+	%storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+; CHECK-NOT: alloca
+	%cast = getelementptr [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
+; CHECK-NOT: trampoline
+	ret void
+; CHECK: ret void
+}
+
+
+; PR2599 - load -> store to same address.
+define void @test12({ i32, i32 }* %x) nounwind  {
+	%tmp4 = getelementptr { i32, i32 }* %x, i32 0, i32 0
+	%tmp5 = load i32* %tmp4, align 4
+	%tmp7 = getelementptr { i32, i32 }* %x, i32 0, i32 1
+	%tmp8 = load i32* %tmp7, align 4
+	%tmp17 = sub i32 0, %tmp8
+	store i32 %tmp5, i32* %tmp4, align 4
+	store i32 %tmp17, i32* %tmp7, align 4
+	ret void
+; CHECK: @test12
+; CHECK-NOT: tmp5
+; CHECK: ret void
+}
+
+
+; %P doesn't escape, the DEAD instructions should be removed.
+declare void @test13f()
+define i32* @test13() {
+        %p = tail call i8* @malloc(i32 4)
+        %P = bitcast i8* %p to i32*
+        %DEAD = load i32* %P
+        %DEAD2 = add i32 %DEAD, 1
+        store i32 %DEAD2, i32* %P
+        call void @test13f( )
+        store i32 0, i32* %P
+        ret i32* %P
+; CHECK: @test13()
+; CHECK-NEXT: malloc
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: call void
+}
+
+declare noalias i8* @malloc(i32)
+
+
+
+define void @test14(i32* %Q) {
+        %P = alloca i32
+        %DEAD = load i32* %Q
+        store i32 %DEAD, i32* %P
+        ret void
+
+; CHECK: @test14
+; CHECK-NEXT: ret void
 }
 
+
+; PR8701
+
+;; Fully dead overwrite of memcpy.
+define void @test15(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  ret void
+; CHECK: @test15
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+;; Full overwrite of smaller memcpy.
+define void @test16(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 8, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  ret void
+; CHECK: @test16
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+;; Overwrite of memset by memcpy.
+define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memset.i64(i8* %P, i8 42, i64 8, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  ret void
+; CHECK: @test17
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+; Should not delete the volatile memset.
+define void @test17v(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 true)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+; CHECK: @test17v
+; CHECK-NEXT: call void @llvm.memset
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+; PR8728
+; Do not delete instruction where possible situation is:
+; A = B
+; A = A
+define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %R, i64 12, i32 1)
+  ret void
+; CHECK: @test18
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/DeadStoreElimination/volatile-load.ll b/test/Transforms/DeadStoreElimination/volatile-load.ll
deleted file mode 100644
index 59a1129348b9..000000000000
--- a/test/Transforms/DeadStoreElimination/volatile-load.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -dse -S | grep {volatile load}
-
-@g_1 = global i32 0
-
-define void @foo() nounwind  {
-	%t = volatile load i32* @g_1
-	ret void
-}
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
new file mode 100644
index 000000000000..e3c75f97dcfc
--- /dev/null
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -0,0 +1,121 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+
+; CHECK: @test1
+define void @test1(i8 %V, i32 *%P) {
+  %A = bitcast i64 42 to double  ;; dead
+  %B = add i32 4, 19             ;; constant folds
+  store i32 %B, i32* %P
+  ; CHECK-NEXT: store i32 23, i32* %P
+  
+  %C = zext i8 %V to i32
+  %D = zext i8 %V to i32  ;; CSE
+  volatile store i32 %C, i32* %P
+  volatile store i32 %D, i32* %P
+  ; CHECK-NEXT: %C = zext i8 %V to i32
+  ; CHECK-NEXT: volatile store i32 %C
+  ; CHECK-NEXT: volatile store i32 %C
+  
+  %E = add i32 %C, %C
+  %F = add i32 %C, %C
+  volatile store i32 %E, i32* %P
+  volatile store i32 %F, i32* %P
+  ; CHECK-NEXT: %E = add i32 %C, %C
+  ; CHECK-NEXT: volatile store i32 %E
+  ; CHECK-NEXT: volatile store i32 %E
+
+  %G = add nuw i32 %C, %C         ;; not a CSE with E
+  volatile store i32 %G, i32* %P
+  ; CHECK-NEXT: %G = add nuw i32 %C, %C
+  ; CHECK-NEXT: volatile store i32 %G
+  ret void
+}
+
+
+;; Simple load value numbering.
+; CHECK: @test2
+define i32 @test2(i32 *%P) {
+  %V1 = load i32* %P
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
+;; Cross block load value numbering.
+; CHECK: @test3
+define i32 @test3(i32 *%P, i1 %Cond) {
+  %V1 = load i32* %P
+  br i1 %Cond, label %T, label %F
+T:
+  store i32 4, i32* %P
+  ret i32 42
+F:
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: F:
+  ; CHECK: ret i32 0
+}
+
+;; Cross block load value numbering stops when stores happen.
+; CHECK: @test4
+define i32 @test4(i32 *%P, i1 %Cond) {
+  %V1 = load i32* %P
+  br i1 %Cond, label %T, label %F
+T:
+  ret i32 42
+F:
+  ; Clobbers V1
+  store i32 42, i32* %P
+  
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: F:
+  ; CHECK: ret i32 %Diff
+}
+
+declare i32 @func(i32 *%P) readonly
+
+;; Simple call CSE'ing.
+; CHECK: @test5
+define i32 @test5(i32 *%P) {
+  %V1 = call i32 @func(i32* %P)
+  %V2 = call i32 @func(i32* %P)
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
+;; Trivial Store->load forwarding
+; CHECK: @test6
+define i32 @test6(i32 *%P) {
+  store i32 42, i32* %P
+  %V1 = load i32* %P
+  ret i32 %V1
+  ; CHECK: ret i32 42
+}
+
+;; Trivial dead store elimination.
+; CHECK: @test7
+define void @test7(i32 *%P) {
+  store i32 42, i32* %P
+  store i32 45, i32* %P
+  ret void
+  ; CHECK-NEXT: store i32 45
+  ; CHECK-NEXT: ret void
+}
+
+;; Readnone functions aren't invalidated by stores.
+; CHECK: @test8
+define i32 @test8(i32 *%P) {
+  %V1 = call i32 @func(i32* %P) readnone
+  store i32 4, i32* %P
+  %V2 = call i32 @func(i32* %P) readnone
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
+
diff --git a/test/Transforms/EarlyCSE/dg.exp b/test/Transforms/EarlyCSE/dg.exp
new file mode 100644
index 000000000000..de42dad163fd
--- /dev/null
+++ b/test/Transforms/EarlyCSE/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index 535a1d0fba6b..946453f586ed 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -functionattrs -S | grep readnone | count 4
+; RUN: opt < %s -basicaa -functionattrs -S | grep readnone | count 4
 @x = global i32 0
 
 declare i32 @e() readnone
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index b455fdd8c3ec..22eca1320415 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -functionattrs -S | grep readonly | count 2
+; RUN: opt < %s -basicaa -functionattrs -S | grep readonly | count 2
 
 define i32 @f() {
 entry:
diff --git a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
index c6c2e13e33e1..faac1184b601 100644
--- a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
+++ b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 
 %struct.X = type { i32*, i32* }
 
diff --git a/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll b/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
index 672b5e1392b7..9655da45c646 100644
--- a/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
+++ b/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -functionattrs -S | grep readnone
+; RUN: opt < %s -basicaa -functionattrs -S | grep readnone
 
 @s = external constant i8		; <i8*> [#uses=1]
 
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
new file mode 100644
index 000000000000..f21fabc493c7
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+; PR8279
+
+@g = constant i32 1
+
+define void @foo() {
+; CHECK: void @foo() {
+  %tmp = volatile load i32* @g
+  ret void
+}
diff --git a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
index 2e0a1015caf5..9983374b154e 100644
--- a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
+++ b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | not grep {tmp10 =}
+; RUN: opt < %s -basicaa -gvn -S | not grep {tmp10 =}
 
 	%struct.INT2 = type { i32, i32 }
 @blkshifts = external global %struct.INT2*		; <%struct.INT2**> [#uses=2]
diff --git a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index 0be33791f617..14cb91b53167 100644
--- a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -1,30 +1,33 @@
-; RUN: opt < %s -gvn -S | grep {tmp17625.* = phi i32. }
-; RUN: opt < %s -gvn -S | grep {tmp17631.* = phi i32. }
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
-@last = external global [65 x i32*]		; <[65 x i32*]*> [#uses=1]
+@last = external global [65 x i32*]
 
 define i32 @NextRootMove(i32 %wtm) {
-cond_next95:		; preds = %cond_true85, %cond_true79, %cond_true73, %bb68
-	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4		; <i32*> [#uses=0]
+entry:
+	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+; CHECK: entry:
+; CHECK-NEXT: %tmp17618 = load
+; CHECK-NOT: load
+; CHECK-NOT: phi
 	br label %cond_true116
 
-cond_true116:		; preds = %cond_true111
+cond_true116:
 	br i1 false, label %cond_true128, label %cond_true145
 
-cond_true128:		; preds = %cond_true121
-	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4		; <i32*> [#uses=0]
+cond_true128:
+	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
 	br i1 false, label %bb98.backedge, label %return.loopexit
 
-bb98.backedge:		; preds = %bb171, %cond_true145, %cond_true128
+bb98.backedge:
 	br label %cond_true116
 
-cond_true145:		; preds = %cond_false
-	%tmp17631 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4		; <i32*> [#uses=0]
+cond_true145:
+	%tmp17631 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
 	br i1 false, label %bb98.backedge, label %return.loopexit
 
-return.loopexit:		; preds = %bb171, %cond_true145, %cond_true128
+return.loopexit:
 	br label %return
 
-return:		; preds = %return.loopexit, %cond_next95, %cond_true85
+return:
 	ret i32 0
 }
diff --git a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index faa1157dd8be..f2c001296f63 100644
--- a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep {tmp47 = phi i32 }
+; RUN: opt < %s -basicaa -gvn -S | grep {tmp47 = phi i32 }
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
 @debug = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
index 0d1d8bced000..a570e3571ee0 100644
--- a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
+++ b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | not grep {tmp701 =}
+; RUN: opt < %s -basicaa -gvn -S | not grep {tmp701 =}
 
 @img_width = external global i16		; <i16*> [#uses=2]
 
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index 361c1557f24f..be69cfc0319c 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep {ret i8 \[%\]tmp3}
+; RUN: opt < %s -basicaa -gvn -S | grep {ret i8 \[%\]tmp3}
 ; PR2503
 
 @g_3 = external global i8		; <i8*> [#uses=2]
diff --git a/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll b/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
index 066e3038b087..d6e1c6b76d0b 100644
--- a/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
+++ b/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -gvn -enable-full-load-pre -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+; CHECK-NOT: load
+; CHECK-NOT: phi
 
 define i8* @cat(i8* %s1, ...) nounwind {
 entry:
@@ -29,18 +32,11 @@ bb10:                                             ; preds = %bb8
   br label %bb11
 
 bb11:                                             ; preds = %bb10, %bb9
-; CHECK: bb11:
-; CHECK: phi
-; CHECK-NOT: phi
   br label %bb12
 
 bb12:                                             ; preds = %bb11, %bb6
-; CHECK: bb12:
-; CHECK: phi
-; CHECK-NOT: phi
   br i1 undef, label %bb8, label %bb13
 
 bb13:                                             ; preds = %bb12
-; CHECK: bb13:
   ret i8* undef
 }
diff --git a/test/Transforms/GVN/2010-11-13-Simplify.ll b/test/Transforms/GVN/2010-11-13-Simplify.ll
new file mode 100644
index 000000000000..07585a20b6ca
--- /dev/null
+++ b/test/Transforms/GVN/2010-11-13-Simplify.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+declare i32 @foo(i32) readnone
+
+define i1 @bar() {
+; CHECK: @bar
+  %a = call i32 @foo (i32 0) readnone
+  %b = call i32 @foo (i32 0) readnone
+  %c = and i32 %a, %b
+  %x = call i32 @foo (i32 %a) readnone
+  %y = call i32 @foo (i32 %c) readnone
+  %z = icmp eq i32 %x, %y
+  ret i1 %z
+; CHECK: ret i1 true
+} 
diff --git a/test/Transforms/GVN/calls-nonlocal.ll b/test/Transforms/GVN/calls-nonlocal.ll
index f0edf09bff98..24ef2e9ec412 100644
--- a/test/Transforms/GVN/calls-nonlocal.ll
+++ b/test/Transforms/GVN/calls-nonlocal.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep strlen | count 2
+; RUN: opt < %s -basicaa -gvn -S | grep strlen | count 2
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
 
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index e212d791ae5a..be6c3498fe40 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -1,52 +1,55 @@
-; RUN: opt < %s -gvn -S | grep {br i1 false}
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 @a = external global i32		; <i32*> [#uses=7]
 
+; CHECK: @foo
 define i32 @foo() nounwind {
 entry:
-	%0 = load i32* @a, align 4		; <i32> [#uses=1]
-	%1 = icmp eq i32 %0, 4		; <i1> [#uses=1]
+	%0 = load i32* @a, align 4
+	%1 = icmp eq i32 %0, 4
 	br i1 %1, label %bb, label %bb1
 
 bb:		; preds = %entry
 	br label %bb8
 
 bb1:		; preds = %entry
-	%2 = load i32* @a, align 4		; <i32> [#uses=1]
-	%3 = icmp eq i32 %2, 5		; <i1> [#uses=1]
+	%2 = load i32* @a, align 4
+	%3 = icmp eq i32 %2, 5
 	br i1 %3, label %bb2, label %bb3
 
 bb2:		; preds = %bb1
 	br label %bb8
 
 bb3:		; preds = %bb1
-	%4 = load i32* @a, align 4		; <i32> [#uses=1]
-	%5 = icmp eq i32 %4, 4		; <i1> [#uses=1]
+	%4 = load i32* @a, align 4
+	%5 = icmp eq i32 %4, 4
+; CHECK: br i1 false, label %bb4, label %bb5
 	br i1 %5, label %bb4, label %bb5
 
 bb4:		; preds = %bb3
-	%6 = load i32* @a, align 4		; <i32> [#uses=1]
-	%7 = add i32 %6, 5		; <i32> [#uses=1]
+	%6 = load i32* @a, align 4
+	%7 = add i32 %6, 5
 	br label %bb8
 
 bb5:		; preds = %bb3
-	%8 = load i32* @a, align 4		; <i32> [#uses=1]
-	%9 = icmp eq i32 %8, 5		; <i1> [#uses=1]
+	%8 = load i32* @a, align 4
+	%9 = icmp eq i32 %8, 5
+; CHECK: br i1 false, label %bb6, label %bb7
 	br i1 %9, label %bb6, label %bb7
 
 bb6:		; preds = %bb5
-	%10 = load i32* @a, align 4		; <i32> [#uses=1]
-	%11 = add i32 %10, 4		; <i32> [#uses=1]
+	%10 = load i32* @a, align 4
+	%11 = add i32 %10, 4
 	br label %bb8
 
 bb7:		; preds = %bb5
-	%12 = load i32* @a, align 4		; <i32> [#uses=1]
+	%12 = load i32* @a, align 4
 	br label %bb8
 
 bb8:		; preds = %bb7, %bb6, %bb4, %bb2, %bb
-	%.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ]		; <i32> [#uses=1]
+	%.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ]
 	br label %return
 
 return:		; preds = %bb8
 	ret i32 %.0
-}
+}
\ No newline at end of file
diff --git a/test/Transforms/GVN/invariant-simple.ll b/test/Transforms/GVN/invariant-simple.ll
index 0a4182c410ae..98ea48cdde32 100644
--- a/test/Transforms/GVN/invariant-simple.ll
+++ b/test/Transforms/GVN/invariant-simple.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/Transforms/GVN/lifetime-simple.ll b/test/Transforms/GVN/lifetime-simple.ll
index 48e5bc8bb63f..02f7bcc9e857 100644
--- a/test/Transforms/GVN/lifetime-simple.ll
+++ b/test/Transforms/GVN/lifetime-simple.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
diff --git a/test/Transforms/GVN/load-constant-mem.ll b/test/Transforms/GVN/load-constant-mem.ll
index 87f33eaadadb..314c8069cace 100644
--- a/test/Transforms/GVN/load-constant-mem.ll
+++ b/test/Transforms/GVN/load-constant-mem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -instcombine -S | grep {ret i32 0}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
 ; PR4189
 @G = external constant [4 x i32]
 
diff --git a/test/Transforms/GVN/load-pre-licm.ll b/test/Transforms/GVN/load-pre-licm.ll
new file mode 100644
index 000000000000..63541ad181d1
--- /dev/null
+++ b/test/Transforms/GVN/load-pre-licm.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+@sortlist = external global [5001 x i32], align 4
+
+define void @Bubble() nounwind noinline {
+; CHECK: entry:
+; CHECK-NEXT: %tmp7.pre = load i32
+entry:
+  br label %while.body5
+
+; CHECK: while.body5:
+; CHECK: %tmp7 = phi i32
+; CHECK-NOT: %tmp7 = load i32
+while.body5:
+  %indvar = phi i32 [ 0, %entry ], [ %tmp6, %if.end ]
+  %tmp5 = add i32 %indvar, 2
+  %arrayidx9 = getelementptr [5001 x i32]* @sortlist, i32 0, i32 %tmp5
+  %tmp6 = add i32 %indvar, 1
+  %arrayidx = getelementptr [5001 x i32]* @sortlist, i32 0, i32 %tmp6
+  %tmp7 = load i32* %arrayidx, align 4
+  %tmp10 = load i32* %arrayidx9, align 4
+  %cmp11 = icmp sgt i32 %tmp7, %tmp10
+  br i1 %cmp11, label %if.then, label %if.end
+
+; CHECK: if.then:
+if.then:
+  store i32 %tmp10, i32* %arrayidx, align 4
+  store i32 %tmp7, i32* %arrayidx9, align 4
+  br label %if.end
+
+if.end:
+  %exitcond = icmp eq i32 %tmp6, 100
+  br i1 %exitcond, label %while.end.loopexit, label %while.body5
+
+while.end.loopexit:
+  ret void
+}
diff --git a/test/Transforms/GVN/lpre-call-wrap-2.ll b/test/Transforms/GVN/lpre-call-wrap-2.ll
index 79512a33d993..e39f3ed87d1c 100644
--- a/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -enable-load-pre %s | FileCheck %s
 ;
 ; The partially redundant load in bb1 should be hoisted to "bb".  This comes
 ; from this C code (GCC PR 23455):
diff --git a/test/Transforms/GVN/mixed.ll b/test/Transforms/GVN/mixed.ll
index 5152f68f0efb..6bfada2f4d5c 100644
--- a/test/Transforms/GVN/mixed.ll
+++ b/test/Transforms/GVN/mixed.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -gvn -S | not grep DEADLOAD
-; RUN: opt < %s -gvn -S | not grep DEADGEP
+; RUN: opt < %s -basicaa -gvn -S | not grep DEADLOAD
+; RUN: opt < %s -basicaa -gvn -S | not grep DEADGEP
 
 define i32 @main(i32** %p) {
 block1:
diff --git a/test/Transforms/GVN/non-local-offset.ll b/test/Transforms/GVN/non-local-offset.ll
new file mode 100644
index 000000000000..8eaa99933ab9
--- /dev/null
+++ b/test/Transforms/GVN/non-local-offset.ll
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should ignore the store to p[1] to see that the load from p[0] is
+; fully redundant.
+
+; CHECK: @yes
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+
+define void @yes(i1 %c, i32* %p, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %p1
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  ret void
+}
+
+; GVN should ignore the store to p[1] to see that the first load from p[0] is
+; fully redundant. However, the second load is larger, so it's not a simple
+; redundancy.
+
+; CHECK: @watch_out_for_size_change
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+; CHECK: if.else:
+; CHECK: load i64* %pc
+; CHECK: store i64
+
+define void @watch_out_for_size_change(i1 %c, i32* %p, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %p1
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %pc = bitcast i32* %p to i64*
+  %qc = bitcast i32* %q to i64*
+  %t64 = load i64* %pc
+  store i64 %t64, i64* %qc
+  ret void
+}
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index 5a42d9536ca1..1d50205c6851 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -stats -disable-output |& grep {Number of loads deleted}
+; RUN: opt < %s -basicaa -gvn -stats -disable-output |& grep {Number of loads deleted}
 ; rdar://7363102
 
 ; GVN should be able to eliminate load %tmp22.i, because it is redundant with
diff --git a/test/Transforms/GVN/null-aliases-nothing.ll b/test/Transforms/GVN/null-aliases-nothing.ll
index 4d533bbc4064..9e4ae18c710c 100644
--- a/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/test/Transforms/GVN/null-aliases-nothing.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S | FileCheck %s
+; RUN: opt %s -basicaa -gvn -S | FileCheck %s
 
 %t = type { i32 }
 declare void @test1f(i8*)
diff --git a/test/Transforms/GVN/phi-translate.ll b/test/Transforms/GVN/phi-translate.ll
new file mode 100644
index 000000000000..f10537e0c930
--- /dev/null
+++ b/test/Transforms/GVN/phi-translate.ll
@@ -0,0 +1,31 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; CHECK: @foo
+; CHECK: entry.end_crit_edge:
+; CHECK:   %n.pre = load i32* %q.phi.trans.insert
+; CHECK: then:
+; CHECK:   store i32 %z
+; CHECK: end:
+; CHECK:   %n = phi i32 [ %n.pre, %entry.end_crit_edge ], [ %z, %then ]
+; CHECK:   ret i32 %n
+
+@G = external global [100 x i32]
+define i32 @foo(i32 %x, i32 %z) {
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %end, label %then
+
+then:
+  %i = sext i32 %x to i64
+  %p = getelementptr [100 x i32]* @G, i64 0, i64 %i
+  store i32 %z, i32* %p
+  br label %end
+
+end:
+  %j = sext i32 %x to i64
+  %q = getelementptr [100 x i32]* @G, i64 0, i64 %j
+  %n = load i32* %q
+  ret i32 %n
+}
diff --git a/test/Transforms/GVN/pre-load.ll b/test/Transforms/GVN/pre-load.ll
index d40a467bf677..bf4add42e80b 100644
--- a/test/Transforms/GVN/pre-load.ll
+++ b/test/Transforms/GVN/pre-load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32* %p, i1 %C) {
diff --git a/test/Transforms/GVN/pre-single-pred.ll b/test/Transforms/GVN/pre-single-pred.ll
index 706a16b7bdd2..f1f5c71a93ab 100644
--- a/test/Transforms/GVN/pre-single-pred.ll
+++ b/test/Transforms/GVN/pre-single-pred.ll
@@ -1,4 +1,13 @@
-; RUN: opt < %s -gvn -enable-load-pre -S | not grep {tmp3 = load}
+; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s
+; This testcase assumed we'll PRE the load into %for.cond, but we don't actually
+; verify that doing so is safe.  If there didn't _happen_ to be a load in
+; %for.end, we would actually be lengthening the execution on some paths, and
+; we were never actually checking that case.  Now we actually do perform some
+; conservative checking to make sure we don't make paths longer, but we don't
+; currently get this case, which we got lucky on previously.
+;
+; Now that that faulty assumption is corrected, test that we DON'T incorrectly
+; hoist the load.  Doing the right thing for the wrong reasons is still a bug.
 
 @p = external global i32
 define i32 @f(i32 %n) nounwind {
@@ -13,6 +22,8 @@ for.cond:		; preds = %for.inc, %entry
 for.cond.for.end_crit_edge:		; preds = %for.cond
 	br label %for.end
 
+; CHECK: for.body:
+; CHECK-NEXT: %tmp3 = load i32* @p
 for.body:		; preds = %for.cond
 	%tmp3 = load i32* @p		; <i32> [#uses=1]
 	%dec = add i32 %tmp3, -1		; <i32> [#uses=2]
@@ -20,6 +31,7 @@ for.body:		; preds = %for.cond
 	%cmp6 = icmp slt i32 %dec, 0		; <i1> [#uses=1]
 	br i1 %cmp6, label %for.body.for.end_crit_edge, label %for.inc
 
+; CHECK: for.body.for.end_crit_edge:
 for.body.for.end_crit_edge:		; preds = %for.body
 	br label %for.end
 
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/preserve-tbaa.ll
new file mode 100644
index 000000000000..2fcfc471c5d2
--- /dev/null
+++ b/test/Transforms/GVN/preserve-tbaa.ll
@@ -0,0 +1,28 @@
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should preserve the TBAA tag on loads when doing PRE.
+
+; CHECK: @test
+; CHECK: %tmp33.pre = load i16* undef, align 2, !tbaa !0
+; CHECK: br label %for.body
+define void @test() nounwind {
+entry:
+  br i1 undef, label %bb.nph, label %for.end
+
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %tmp33 = load i16* undef, align 2, !tbaa !0
+  store i16 undef, i16* undef, align 2, !tbaa !0
+  br i1 false, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll
index d61eb81b0691..479724063e0b 100644
--- a/test/Transforms/GVN/rle-must-alias.ll
+++ b/test/Transforms/GVN/rle-must-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep {DEAD = phi i32 }
+; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 }
 
 ; GVN should eliminate the fully redundant %9 GEP which 
 ; allows DEAD to be removed.  This is PR3198.
diff --git a/test/Transforms/GVN/rle-nonlocal.ll b/test/Transforms/GVN/rle-nonlocal.ll
index 5c73dad399e6..6b74e9a946d1 100644
--- a/test/Transforms/GVN/rle-nonlocal.ll
+++ b/test/Transforms/GVN/rle-nonlocal.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 define i32 @main(i32** %p) {
 block1:
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
index 04e8c3856845..c6cd1fdc00c5 100644
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ b/test/Transforms/GVN/rle-semidominated.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep {DEAD = phi i32 }
+; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 }
 
 define i32 @main(i32* %p) {
 block1:
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index d656c1a5fcc7..2e4332175070 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 ; 32-bit little endian target.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index cfc9f302ff00..5b06fea5d92c 100644
--- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -2,9 +2,9 @@
 ; alignments.  Elements 0 and 2 must be 16-byte aligned, and element 
 ; 1 must be at least 8 byte aligned (but could be more). 
 
-; RUN: opt < %s -globalopt -S | grep {@G.0 = internal global .*align 16}
-; RUN: opt < %s -globalopt -S | grep {@G.1 = internal global .*align 8}
-; RUN: opt < %s -globalopt -S | grep {@G.2 = internal global .*align 16}
+; RUN: opt < %s -globalopt -S | grep {@G.0 = internal unnamed_addr global .*align 16}
+; RUN: opt < %s -globalopt -S | grep {@G.1 = internal unnamed_addr global .*align 8}
+; RUN: opt < %s -globalopt -S | grep {@G.2 = internal unnamed_addr global .*align 16}
 ; rdar://5891920
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
index e024fc27ecb6..d645ce49438b 100644
--- a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
+++ b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalopt -S | grep {@X = internal global i32}
+; RUN: opt < %s -globalopt -S | grep {@X = internal unnamed_addr global i32}
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 @X = internal global i32* null		; <i32**> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll b/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
index c43565af635b..b73f62ba148b 100644
--- a/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
+++ b/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0"
 
 @TOP = internal global i64* null                    ; <i64**> [#uses=2]
-; CHECK: @TOP = internal global i64* null
+; CHECK: @TOP = internal unnamed_addr global i64* null
 @channelColumns = internal global i64 0             ; <i64*> [#uses=2]
 
 ; Derived from @DescribeChannel() in yacr2
diff --git a/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll b/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
new file mode 100644
index 000000000000..ad5b440a5abe
--- /dev/null
+++ b/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; PR8389: Globals with weak_odr linkage type must not be modified
+
+; CHECK: weak_odr global i32 0
+
+@SomeVar = weak_odr global i32 0
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR } ]
+
+define internal void @CTOR() {
+  store i32 23, i32* @SomeVar
+  ret void
+}
+
+
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index bb1fc84f46f9..9da5a5e3c935 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -55,3 +55,12 @@ entry:
   ret void
 }
 
+
+
+
+@data8 = internal global [8000 x i8] zeroinitializer, align 16
+define void @memset_with_strange_user() ssp {
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8000 x i8]* @data8, i64 0, i64 0), i8 undef, i64 ptrtoint (i8* getelementptr ([8000 x i8]* @data8, i64 1, i64 sub (i64 0, i64 ptrtoint ([8000 x i8]* @data8 to i64))) to i64), i32 16, i1 false)
+  ret void
+}
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
new file mode 100644
index 000000000000..204f979ed3e1
--- /dev/null
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -0,0 +1,23 @@
+; RUN: opt -globalopt %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { i32, void ()* }
+%struct.foo = type { i32* }
+
+@G = global i32 0, align 4
+@H = global i32 0, align 4
+@X = global %struct.foo zeroinitializer, align 8
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @init }]
+
+; PR8710 - GlobalOpt shouldn't change the global's initializer to have this
+; arbitrary constant expression, the code generator can't handle it.
+define internal void @init() {
+entry:
+  %tmp = getelementptr inbounds %struct.foo* @X, i32 0, i32 0
+  store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
+  ret void
+}
+
+; CHECK: @init
+; CHECK: store i32*
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt.ll b/test/Transforms/GlobalOpt/ctor-list-opt.ll
index 887e7ee643c5..542c786762ea 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -globalopt -S | not grep CTOR
-@llvm.global_ctors = appending global [10 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR2 }, { i32, void ()* } { i32 65535, void ()* @CTOR3 }, { i32, void ()* } { i32 65535, void ()* @CTOR4 }, { i32, void ()* } { i32 65535, void ()* @CTOR5 }, { i32, void ()* } { i32 65535, void ()* @CTOR6 }, { i32, void ()* } { i32 65535, void ()* @CTOR7 }, { i32, void ()* } { i32 65535, void ()* @CTOR8 }, { i32, void ()* } { i32 2147483647, void ()* null } ]		; <[10 x { i32, void ()* }]*> [#uses=0]
+@llvm.global_ctors = appending global [11 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR2 }, { i32, void ()* } { i32 65535, void ()* @CTOR3 }, { i32, void ()* } { i32 65535, void ()* @CTOR4 }, { i32, void ()* } { i32 65535, void ()* @CTOR5 }, { i32, void ()* } { i32 65535, void ()* @CTOR6 }, { i32, void ()* } { i32 65535, void ()* @CTOR7 }, { i32, void ()* } { i32 65535, void ()* @CTOR8 }, { i32, void ()* } { i32 65535, void ()* @CTOR9 }, { i32, void ()* } { i32 2147483647, void ()* null } ]		; <[10 x { i32, void ()* }]*> [#uses=0]
 @G = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
 @G3 = global i32 -123		; <i32*> [#uses=2]
@@ -74,7 +74,7 @@ define void @setto(i32* %P, i32 %V) {
 declare double @cos(double)
 
 define internal void @CTOR8() {
-	%X = call double @cos( double 1.000000e+00 )		; <double> [#uses=1]
+	%X = call double @cos( double 0.000000e+00 )		; <double> [#uses=1]
 	store double %X, double* @D
 	ret void
 }
@@ -83,3 +83,18 @@ define i1 @accessor() {
 	%V = load i1* @CTORGV		; <i1> [#uses=1]
 	ret i1 %V
 }
+
+%struct.A = type { i32 }
+%struct.B = type { i32 (...)**, i8*, [4 x i8] }
+@GV1 = global %struct.B zeroinitializer, align 8
+@GV2 =  constant [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64 }* null to i8*)]
+; CHECK-NOT: CTOR9
+define internal void @CTOR9() {
+entry:
+  %0 = bitcast %struct.B* @GV1 to i8*
+  %1 = getelementptr inbounds i8* %0, i64 16
+  %2 = bitcast i8* %1 to %struct.A*
+  %3 = bitcast %struct.B* @GV1 to i8***
+  store i8** getelementptr inbounds ([3 x i8*]* @GV2, i64 1, i64 0), i8*** %3
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
index 9397a1236551..1e0db6a998bd 100644
--- a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
+++ b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -globalopt -S > %t
-; RUN: grep {@Y = internal global \\\[3 x \[%\]struct.X\\\] zeroinitializer} %t
+; RUN: grep {@Y = internal unnamed_addr global \\\[3 x \[%\]struct.X\\\] zeroinitializer} %t
 ; RUN: grep load %t | count 6
 ; RUN: grep {add i32 \[%\]a, \[%\]b} %t | count 3
 
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index 335f5ec3a368..8f063a2fe420 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -globalopt -S | \
-; RUN:   grep {G1 = internal constant}
+; RUN:   grep {G1 = internal unnamed_addr constant}
 
 @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
new file mode 100644
index 000000000000..be0282122775
--- /dev/null
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -0,0 +1,54 @@
+; RUN: opt %s -globalopt -S | FileCheck %s
+
+@a = internal global i32 0, align 4
+@b = internal global i32 0, align 4
+@c = internal global i32 0, align 4
+@d = internal constant [4 x i8] c"foo\00", align 1
+
+; CHECK: @a = internal global i32 0, align 4
+; CHECK: @b = internal global i32 0, align 4
+; CHECK: @c = internal unnamed_addr global i32 0, align 4
+; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
+
+define i1 @bah(i64 %i) nounwind readonly optsize ssp {
+entry:
+  %arrayidx4 = getelementptr inbounds [4 x i8]* @d, i64 0, i64 %i
+  %tmp5 = load i8* %arrayidx4, align 1
+  %cmp = icmp eq i8 %tmp5, 42
+  ret i1 %cmp
+}
+
+define void @baz(i32 %x) {
+entry:
+  store i32 %x, i32* @a, align 4
+  store i32 %x, i32* @b, align 4
+  store i32 %x, i32* @c, align 4
+  ret void
+}
+
+define i32 @foo(i32* %x) nounwind readnone optsize ssp {
+entry:
+  %cmp = icmp eq i32* %x, @a
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @bar() {
+entry:
+  switch i64 ptrtoint (i32* @b to i64), label %sw.epilog [
+    i64 1, label %return
+    i64 0, label %return
+  ]
+
+sw.epilog:
+  ret i32 0
+
+return:
+  ret i32 1
+}
+
+define i32 @zed() {
+entry:
+  %tmp1 = load i32* @c, align 4
+  ret i32 %tmp1
+}
diff --git a/test/Transforms/IndVarSimplify/loop-invariant-step.ll b/test/Transforms/IndVarSimplify/loop-invariant-step.ll
deleted file mode 100644
index 2d2d1fe264ab..000000000000
--- a/test/Transforms/IndVarSimplify/loop-invariant-step.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -loop-index-split -instcombine -indvars -disable-output
-; PR4455
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-declare i8* @fast_memcpy(i8*, i8*, i64)
-
-define void @dvdsub_decode() nounwind {
-entry:		; preds = %bb1
-	br label %LoopA
-
-LoopA:		; preds = %LoopA, %entry
-	%x1.0.i17 = phi i32 [ %t0, %LoopA ], [ 0, %entry ]		; <i32> [#uses=2]
-	%t0 = add i32 %x1.0.i17, 1		; <i32> [#uses=1]
-	br i1 undef, label %LoopA, label %middle
-
-middle:		; preds = %LoopA
-	%t1 = sub i32 0, %x1.0.i17		; <i32> [#uses=1]
-	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
-	br label %LoopB
-
-LoopB:		; preds = %LoopB, %bb.nph.i27
-	%y.029.i = phi i32 [ 0, %middle ], [ %t7, %LoopB ]		; <i32> [#uses=2]
-	%t3 = mul i32 %y.029.i, %t2		; <i32> [#uses=1]
-	%t4 = sext i32 %t3 to i64		; <i64> [#uses=1]
-	%t5 = getelementptr i8* null, i64 %t4		; <i8*> [#uses=1]
-	%t6 = call i8* @fast_memcpy(i8* %t5, i8* undef, i64 undef) nounwind		; <i8*> [#uses=0]
-	%t7 = add i32 %y.029.i, 1		; <i32> [#uses=1]
-	br i1 undef, label %LoopB, label %exit
-
-exit:
-	ret void
-}
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 6531b9e277e1..609a3d4e153e 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -43,5 +43,5 @@ define i32 @test2(i1 %cond) {
   
 ; CHECK: @test2(
 ; CHECK-NOT: = alloca
-; CHECK: ret i32 42
+; CHECK: ret i32
 }
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll
index c3552f696433..e601faf2bb37 100644
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | grep {llvm.memcpy}
+; RUN: opt < %s -inline -S | FileCheck %s
 
 ; Inlining a byval struct should cause an explicit copy into an alloca.
 
@@ -16,7 +16,7 @@ entry:
 
 declare i32 @printf(i8*, ...) nounwind 
 
-define i32 @main() nounwind  {
+define i32 @test1() nounwind  {
 entry:
 	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
 	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
@@ -25,4 +25,82 @@ entry:
 	store i64 2, i64* %tmp4, align 4
 	call void @f( %struct.ss* byval  %S ) nounwind 
 	ret i32 0
+; CHECK: @test1()
+; CHECK: %S1 = alloca %struct.ss
+; CHECK: %S = alloca %struct.ss
+; CHECK: call void @llvm.memcpy
+; CHECK: ret i32 0
 }
+
+; Inlining a byval struct should NOT cause an explicit copy 
+; into an alloca if the function is readonly
+
+define internal i32 @f2(%struct.ss* byval  %b) nounwind readonly {
+entry:
+	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
+	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test2() nounwind  {
+entry:
+	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 8
+	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+	store i64 2, i64* %tmp4, align 4
+	%X = call i32 @f2( %struct.ss* byval  %S ) nounwind 
+	ret i32 %X
+; CHECK: @test2()
+; CHECK: %S = alloca %struct.ss
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret i32
+}
+
+
+; Inlining a byval with an explicit alignment needs to use *at least* that
+; alignment on the generated alloca.
+; PR8769
+declare void @g3(%struct.ss* %p)
+
+define internal void @f3(%struct.ss* byval align 64 %b) nounwind {
+   call void @g3(%struct.ss* %b)  ;; Could make alignment assumptions!
+   ret void
+}
+
+define void @test3() nounwind  {
+entry:
+	%S = alloca %struct.ss, align 1  ;; May not be aligned.
+	call void @f3( %struct.ss* byval align 64 %S) nounwind 
+	ret void
+; CHECK: @test3()
+; CHECK: %S1 = alloca %struct.ss, align 64
+; CHECK: %S = alloca %struct.ss
+; CHECK: call void @llvm.memcpy
+; CHECK: call void @g3(%struct.ss* %S1)
+; CHECK: ret void
+}
+
+
+; Inlining a byval struct should NOT cause an explicit copy 
+; into an alloca if the function is readonly, but should increase an alloca's
+; alignment to satisfy an explicit alignment request.
+
+define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly {
+        call void @g3(%struct.ss* %b)
+	ret i32 4
+}
+
+define i32 @test4() nounwind  {
+entry:
+	%S = alloca %struct.ss, align 2		; <%struct.ss*> [#uses=4]
+	%X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind 
+	ret i32 %X
+; CHECK: @test4()
+; CHECK: %S = alloca %struct.ss, align 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call void @g3
+; CHECK: ret i32 4
+}
+
diff --git a/test/Transforms/Inline/byval2.ll b/test/Transforms/Inline/byval2.ll
deleted file mode 100644
index a7ab77cb562c..000000000000
--- a/test/Transforms/Inline/byval2.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -inline -S | not grep {llvm.memcpy}
-
-; Inlining a byval struct should NOT cause an explicit copy 
-; into an alloca if the function is readonly
-
-	%struct.ss = type { i32, i64 }
-@.str = internal constant [10 x i8] c"%d, %lld\0A\00"		; <[10 x i8]*> [#uses=1]
-
-define internal i32 @f(%struct.ss* byval  %b) nounwind readonly {
-entry:
-	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
-	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
-	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
-	ret i32 %tmp2
-}
-
-declare i32 @printf(i8*, ...) nounwind 
-
-define i32 @main() nounwind  {
-entry:
-	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
-	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp1, align 8
-	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
-	store i64 2, i64* %tmp4, align 4
-	%X = call i32 @f( %struct.ss* byval  %S ) nounwind 
-	ret i32 %X
-}
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll
index 0a50786498df..c32be4e024a3 100644
--- a/test/Transforms/Inline/devirtualize-3.ll
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
 ; PR5009
 
 ; CHECK: define i32 @main() 
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
index 9ed4b6958c38..51ea4baa3866 100644
--- a/test/Transforms/Inline/devirtualize.ll
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
+; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/Inline/gvn-inline-iteration.ll b/test/Transforms/Inline/gvn-inline-iteration.ll
index 32144d4ebba5..e502fd5777d5 100644
--- a/test/Transforms/Inline/gvn-inline-iteration.ll
+++ b/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -1,4 +1,4 @@
-; RUN: opt -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
+; RUN: opt -basicaa -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
 ; rdar://6295824 and PR6724
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index 6d227547c8d5..c02d33ccc1b7 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -1,7 +1,6 @@
 ; This testcase can be simplified by "realizing" that alloca can never return 
 ; null.
-; RUN: opt < %s -instcombine -simplifycfg | \
-; RUN:    llvm-dis | not grep br
+; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
 
 declare i32 @bitmap_clear(...)
 
diff --git a/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll b/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
index c337ea781e4b..baaafefa81de 100644
--- a/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
+++ b/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
@@ -6,7 +6,7 @@ define i64 @test(i64 %X) {
         %Y = shl i64 %X, 16             ; <i64> [#uses=1]
 ; CHECK: %Y = shl i64 %X, 16
         %Z = ashr i64 %Y, 16            ; <i64> [#uses=1]
-; CHECK: %Z = ashr i64 %Y, 16
+; CHECK: %Z = ashr exact i64 %Y, 16
         ret i64 %Z
 ; CHECK: ret i64 %Z
 }
diff --git a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
index 5bcb54306065..807efcf29f19 100644
--- a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
+++ b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
@@ -1,7 +1,6 @@
 ; PR1271
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep {ashr i32 %.mp137, 2}
-; END.
+; RUN:    grep {ashr exact i32 %.mp137, 2}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
index b2774d6522df..43af190abcea 100644
--- a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
+++ b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
@@ -1,34 +1,67 @@
-; RUN: opt < %s -instcombine -S > %t
-; RUN: grep urem %t | count 3
-; RUN: grep srem %t | count 1
-; RUN: grep sub %t | count 2
-; RUN: grep add %t | count 1
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR3103
 
 define i8 @test1(i8 %x, i8 %y) {
+; CHECK: @test1
   %A = udiv i8 %x, %y
+; CHECK-NEXT: urem
   %B = mul i8 %A, %y
   %C = sub i8 %x, %B
   ret i8 %C
+; CHECK-NEXT: ret
 }
 
 define i8 @test2(i8 %x, i8 %y) {
+; CHECK: @test2
   %A = sdiv i8 %x, %y
+; CHECK-NEXT: srem
   %B = mul i8 %A, %y
   %C = sub i8 %x, %B
   ret i8 %C
+; CHECK-NEXT: ret
 }
 
 define i8 @test3(i8 %x, i8 %y) {
+; CHECK: @test3
   %A = udiv i8 %x, %y
+; CHECK-NEXT: urem
   %B = mul i8 %A, %y
   %C = sub i8 %B, %x
+; CHECK-NEXT: sub
   ret i8 %C
+; CHECK-NEXT: ret
 }
 
 define i8 @test4(i8 %x) {
+; CHECK: @test4
   %A = udiv i8 %x, 3
+; CHECK-NEXT: urem
   %B = mul i8 %A, -3
+; CHECK-NEXT: sub
   %C = sub i8 %x, %B
+; CHECK-NEXT: add
   ret i8 %C
+; CHECK-NEXT: ret
+}
+
+define i32 @test5(i32 %x, i32 %y) {
+; CHECK: @test5
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = sdiv i32 %x, %y
+; CHECK-NEXT: sdiv
+  %mul = mul i32 %div, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+; CHECK-NEXT: ret
+}
+
+define i32 @test6(i32 %x, i32 %y) {
+; CHECK: @test6
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = udiv i32 %x, %y
+; CHECK-NEXT: udiv
+  %mul = mul i32 %div, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+; CHECK-NEXT: ret
 }
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
new file mode 100644
index 000000000000..441d5f9b0b64
--- /dev/null
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -0,0 +1,46 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; <rdar://problem/8606771>
+; CHECK: @main
+define i32 @main(i32 %argc) nounwind ssp {
+entry:
+  %tmp3151 = trunc i32 %argc to i8
+; CHECK: %tmp3162 = shl i8 %tmp3151, 5
+; CHECK: and i8 %tmp3162, 64
+; CHECK-NOT: shl
+; CHECK-NOT: shr
+  %tmp3161 = or i8 %tmp3151, -17
+  %tmp3162 = and i8 %tmp3151, 122
+  %tmp3163 = xor i8 %tmp3162, -17
+  %tmp4114 = shl i8 %tmp3163, 6
+  %tmp4115 = xor i8 %tmp4114, %tmp3163
+  %tmp4120 = xor i8 %tmp3161, %tmp4115
+  %tmp4126 = lshr i8 %tmp4120, 7
+  %tmp4127 = mul i8 %tmp4126, 64
+  %tmp4086 = zext i8 %tmp4127 to i32
+; CHECK: ret i32
+  ret i32 %tmp4086
+}
+
+; rdar://8739316
+; CHECK: @foo
+define i8 @foo(i8 %arg, i8 %arg1) nounwind {
+bb:
+  %tmp = shl i8 %arg, 7
+  %tmp2 = and i8 %arg1, 84
+  %tmp3 = and i8 %arg1, -118
+  %tmp4 = and i8 %arg1, 33
+  %tmp5 = sub i8 -88, %tmp2
+  %tmp6 = and i8 %tmp5, 84
+  %tmp7 = or i8 %tmp4, %tmp6
+  %tmp8 = xor i8 %tmp, %tmp3
+  %tmp9 = or i8 %tmp7, %tmp8
+  %tmp10 = lshr i8 %tmp8, 7
+  %tmp11 = shl i8 %tmp10, 5
+
+; CHECK: %0 = lshr i8 %tmp8, 2
+; CHECK: %tmp11 = and i8 %0, 32
+
+  %tmp12 = xor i8 %tmp11, %tmp9
+  ret i8 %tmp12
+}
diff --git a/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll b/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
new file mode 100644
index 000000000000..720365c4d6b2
--- /dev/null
+++ b/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define {}* @foo({}* %x, i32 %n) {
+; CHECK: @foo
+; CHECK-NOT: getelementptr
+  %p = getelementptr {}* %x, i32 %n
+  ret {}* %p
+}
+
+define i8* @bar(i64 %n, {{}, [0 x {[0 x i8]}]}* %p) {
+; CHECK: @bar
+  %g = getelementptr {{}, [0 x {[0 x i8]}]}* %p, i64 %n, i32 1, i64 %n, i32 0, i64 %n
+; CHECK: %p, i64 0, i32 1, i64 0, i32 0, i64 %n
+  ret i8* %g
+}
diff --git a/test/Transforms/InstCombine/2010-11-23-Distributed.ll b/test/Transforms/InstCombine/2010-11-23-Distributed.ll
new file mode 100644
index 000000000000..4f8e8dc713b4
--- /dev/null
+++ b/test/Transforms/InstCombine/2010-11-23-Distributed.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+define i32 @foo(i32 %x, i32 %y) {
+; CHECK: @foo
+  %add = add nsw i32 %y, %x
+  %mul = mul nsw i32 %add, %y
+  %square = mul nsw i32 %y, %y
+  %res = sub i32 %mul, %square
+  ret i32 %res
+; CHECK-NEXT: mul i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i1 @bar(i64 %x, i64 %y) {
+; CHECK: @bar
+  %a = and i64 %y, %x
+; CHECK: and
+; CHECK-NOT: and
+  %not = xor i64 %a, -1
+  %b = and i64 %y, %not
+  %r = icmp eq i64 %b, 0
+  ret i1 %r
+; CHECK: ret i1
+}
diff --git a/test/Transforms/InstCombine/2011-02-14-InfLoop.ll b/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
new file mode 100644
index 000000000000..6d8a7ddbe46f
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
@@ -0,0 +1,19 @@
+; This testcase causes an infinite loop in the instruction combiner,
+; because it changes a pattern and the original pattern is almost
+; identical to the newly-generated pattern.
+; RUN: opt < %s -instcombine -disable-output
+
+;PR PR9216
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @m_387(i8* noalias nocapture %A, i8* nocapture %B, <4 x i1> %C) nounwind {
+entry:
+  %movcsext20 = sext <4 x i1> %C to <4 x i32>
+  %tmp2389 = xor <4 x i32> %movcsext20, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %movcand25 = and <4 x i32> %tmp2389, <i32 undef, i32 undef, i32 undef, i32 -1>
+  %movcor26 = or <4 x i32> %movcand25, zeroinitializer
+  %L2 = bitcast <4 x i32> %movcor26 to <4 x float>
+  %L3 = shufflevector <4 x float> zeroinitializer, <4 x float> %L2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %L3
+}
diff --git a/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll b/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
new file mode 100644
index 000000000000..2f6034e158a3
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR9218
+
+%vec2x2 = type { <2 x double>, <2 x double> }
+
+define %vec2x2 @split(double) nounwind alwaysinline {
+; CHECK: @split
+; CHECK: ret %vec2x2 undef
+  %vba = insertelement <2 x double> undef, double %0, i32 2
+  ret <2 x double> %vba, <2 x double> %vba
+}
diff --git a/test/Transforms/InstCombine/add.ll b/test/Transforms/InstCombine/add.ll
index 4719809d6d36..a316d06894ac 100644
--- a/test/Transforms/InstCombine/add.ll
+++ b/test/Transforms/InstCombine/add.ll
@@ -275,3 +275,27 @@ define i32 @test36(i32 %a) {
 	%q = and i32 %z, 1  ; always zero
 	ret i32 %q
 }
+
+define i1 @test37(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test38(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
+
+define i1 @test39(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test40(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 1cbdd3a3cd54..c5109c528857 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i64 @test1(i64 %A, i32 %B) {
         %tmp12 = zext i32 %B to i64
@@ -6,19 +6,38 @@ define i64 @test1(i64 %A, i32 %B) {
         %tmp5 = add i64 %tmp3, %A
         %tmp6 = and i64 %tmp5, 123
         ret i64 %tmp6
+; CHECK: @test1
+; CHECK-NEXT: and i64 %A, 123
+; CHECK-NEXT: ret i64
 }
 
-define i32 @test3(i32 %A) {
+define i32 @test2(i32 %A) {
   %B = and i32 %A, 7
   %C = and i32 %A, 32
   %F = add i32 %B, %C
   ret i32 %F
+; CHECK: @test2
+; CHECK-NEXT: and i32 %A, 39
+; CHECK-NEXT: ret i32
 }
 
-define i32 @test4(i32 %A) {
+define i32 @test3(i32 %A) {
   %B = and i32 %A, 128
   %C = lshr i32 %A, 30
   %F = add i32 %B, %C
   ret i32 %F
+; CHECK: @test3
+; CHECK-NEXT: and
+; CHECK-NEXT: lshr
+; CHECK-NEXT: or i32 %B, %C
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test4(i32 %A) {
+  %B = add nuw i32 %A, %A
+  ret i32 %B
+; CHECK: @test4
+; CHECK-NEXT: %B = shl nuw i32 %A, 1
+; CHECK-NEXT: ret i32 %B
 }
 
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index a5a6574e9f46..d898ea3b037a 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -8,3 +8,21 @@ define i1 @test1(double %X, double %Y) {
         ret i1 %bothcond
 ; CHECK:  fcmp ord double %Y, %X
 }
+
+define i1 @test2(i1 %X, i1 %Y) {
+  %a = and i1 %X, %Y
+  %b = and i1 %a, %X
+  ret i1 %b
+; CHECK: @test2
+; CHECK-NEXT: and i1 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+  %a = and i32 %X, %Y
+  %b = and i32 %Y, %a
+  ret i32 %b
+; CHECK: @test3
+; CHECK-NEXT: and i32 %X, %Y
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll
index d774c0972def..79a096ff0f19 100644
--- a/test/Transforms/InstCombine/bit-checks.ll
+++ b/test/Transforms/InstCombine/bit-checks.ll
@@ -23,4 +23,350 @@ entry:
   %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
   %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
   ret i32 %storemerge
-}
\ No newline at end of file
+}
+
+; tests to check combining (icmp eq (A & B), C) & (icmp eq (A & D), E)
+; tests to check if (icmp eq (A & B), 0) is treated like (icmp eq (A & B), B)
+; if B is a single bit constant
+
+; (icmp eq (A & B), 0) & (icmp eq (A & D), 0) -> (icmp eq (A & (B|D)), 0)
+define i32 @main3(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3b(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 16                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), 0) | (icmp ne (A & D), 0) -> (icmp ne (A & (B|D)), 0)
+define i32 @main3c(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3d(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 16                ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), B) & (icmp eq (A & D), D) -> (icmp eq (A & (B|D)), (B|D))
+define i32 @main4(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 48                ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4b(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, %argc2              ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %argc3            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), B) | (icmp ne (A & D), D) -> (icmp ne (A & (B|D)), (B|D))
+define i32 @main4c(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 48                ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4d(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, %argc2              ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, %argc3            ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), A) & (icmp eq (A & D), A) -> (icmp eq (A & (B&D)), A)
+define i32 @main5_like(i32 %argc, i32 %argc2, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc2, 7                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 7                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, %argc               ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %argc             ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), A) | (icmp ne (A & D), A) -> (icmp ne (A & (B&D)), A)
+define i32 @main5c_like(i32 %argc, i32 %argc2, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc2, 7                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 7                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, %argc               ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, %argc             ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), C) & (icmp eq (A & D), E) -> (icmp eq (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 16                ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main6b(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), C) | (icmp ne (A & D), E) -> (icmp ne (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6c(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 16                ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main6d(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; test parameter permutations
+; (B & A) == B & (D & A) == D
+define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and1 = and i32 %argc2, %argc                   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and1, %argc2              ; <i1> [#uses=1]
+  %and2 = and i32 %argc3, %argc                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %argc3            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; B == (A & B) & D == (A & D)
+define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and1 = and i32 %argc, %argc2                   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %argc2, %and1             ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %argc3, %and2            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; B == (B & A) & D == (D & A)
+define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and1 = and i32 %argc2, %argc                   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %argc2, %and1             ; <i1> [#uses=1]
+  %and2 = and i32 %argc3, %argc                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %argc3, %and2            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (A & (B & C)) == (B & C) & (A & (D & E)) == (D & E)
+define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %argc, %bc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and1, %bc                ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %de                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %de               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; ((B & C) & A) == (B & C) & ((D & E) & A) == (D & E)
+define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %bc, %argc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and1, %bc                ; <i1> [#uses=1]
+  %and2 = and i32 %de, %argc                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %de               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (B & C) == (A & (B & C)) & (D & E) == (A & (D & E))
+define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %argc, %bc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %bc, %and1                ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %de                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %de, %and2               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (B & C) == ((B & C) & A) & (D & E) == ((D & E) & A)
+define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %bc, %argc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %bc, %and1                ; <i1> [#uses=1]
+  %and2 = and i32 %de, %argc                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %de, %and2               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
diff --git a/test/Transforms/InstCombine/bitcast-store.ll b/test/Transforms/InstCombine/bitcast-store.ll
new file mode 100644
index 000000000000..e4a61e98e4f5
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-store.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Instcombine should preserve metadata and alignment while
+; folding a bitcast into a store.
+
+; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.A = type { i32 (...)** }
+
+@G = external constant [5 x i8*]
+
+define void @foo(%struct.A* %a) nounwind {
+entry:
+  %0 = bitcast %struct.A* %a to i8***
+  store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0
+  ret void
+}
+
+!0 = metadata !{metadata !"hello"}
diff --git a/test/Transforms/InstCombine/bitcast-vec-uniform.ll b/test/Transforms/InstCombine/bitcast-vec-uniform.ll
new file mode 100644
index 000000000000..5975f1ec396e
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-vec-uniform.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: @a
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <4 x i32> @a(<1 x i64> %y) {
+  %c = bitcast <2 x i64> <i64 0, i64 0> to <4 x i32>
+  ret <4 x i32> %c
+}
+
+; CHECK: @b
+; CHECK-NOT: bitcast
+; CHECK: ret
+
+define <4 x i32> @b(<1 x i64> %y) {
+  %c = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
+  ret <4 x i32> %c
+}
+
+; CHECK: @foo
+; CHECK-NOT: bitcast
+; CHECK: ret
+
+; from MultiSource/Benchmarks/Bullet
+define <2 x float> @foo() {
+  %cast = bitcast i64 -1 to <2 x float>
+  ret <2 x float> %cast
+}
+
+
+; CHECK: @foo2
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <2 x double> @foo2() {
+  %cast = bitcast i128 -1 to <2 x double>
+  ret <2 x double> %cast
+}
+
+; CHECK: @foo3
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <1 x float> @foo3() {
+  %cast = bitcast i32 -1 to <1 x float>
+  ret <1 x float> %cast
+}
+
+; CHECK: @foo4
+; CHECK-NOT: bitcast
+; CHECK: ret
+define float @foo4() {
+  %cast = bitcast <1 x i32 ><i32 -1> to float
+  ret float %cast
+}
+
+; CHECK: @foo5
+; CHECK-NOT: bitcast
+; CHECK: ret
+define double @foo5() {
+  %cast = bitcast <2 x i32 ><i32 -1, i32 -1> to double
+  ret double %cast
+}
+
+
+; CHECK: @foo6
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <2 x double> @foo6() {
+  %cast = bitcast <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1> to <2 x double>
+  ret <2 x double> %cast
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 102d2f048f1c..bc5e3655c19a 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -437,8 +437,8 @@ define i64 @test47(i8 %A) {
  ret i64 %E
 ; CHECK: @test47
 ; CHECK-NEXT:   %B = sext i8 %A to i64
-; CHECK-NEXT: %C = or i64 %B, 42
-; CHECK-NEXT:  %E = and i64 %C, 4294967295
+; CHECK-NEXT: %C = and i64 %B, 4294967253
+; CHECK-NEXT:  %E = or i64 %C, 42
 ; CHECK-NEXT:  ret i64 %E
 }
 
@@ -452,7 +452,7 @@ define i64 @test48(i8 %A, i8 %a) {
 ; CHECK: @test48
 ; CHECK-NEXT: %b = zext i8 %a to i64
 ; CHECK-NEXT: %B = zext i8 %A to i64
-; CHECK-NEXT: %C = shl i64 %B, 8
+; CHECK-NEXT: %C = shl nuw nsw i64 %B, 8
 ; CHECK-NEXT: %D = or i64 %C, %b
 ; CHECK-NEXT: ret i64 %D
 }
@@ -464,7 +464,7 @@ define i64 @test49(i64 %A) {
  ret i64 %D
 ; CHECK: @test49
 ; CHECK-NEXT: %C = shl i64 %A, 32
-; CHECK-NEXT: ashr i64 %C, 32
+; CHECK-NEXT: ashr exact i64 %C, 32
 ; CHECK-NEXT: %D = or i64 {{.*}}, 1
 ; CHECK-NEXT: ret i64 %D
 }
@@ -478,8 +478,8 @@ define i64 @test50(i64 %A) {
 ; CHECK: @test50
 ; CHECK-NEXT: shl i64 %A, 30
 ; CHECK-NEXT: add i64 {{.*}}, -4294967296
-; CHECK-NEXT: %E = ashr i64 {{.*}}, 32
-; CHECK-NEXT: ret i64 %E
+; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
+; CHECK-NEXT: ret i64 %sext
 }
 
 define i64 @test51(i64 %A, i1 %cond) {
@@ -508,8 +508,8 @@ define i32 @test52(i64 %A) {
   ret i32 %E
 ; CHECK: @test52
 ; CHECK-NEXT: %B = trunc i64 %A to i32
-; CHECK-NEXT: %C = or i32 %B, 32962
-; CHECK-NEXT: %D = and i32 %C, 40186
+; CHECK-NEXT: %C = and i32 %B, 7224
+; CHECK-NEXT: %D = or i32 %C, 32962
 ; CHECK-NEXT: ret i32 %D
 }
 
@@ -521,8 +521,8 @@ define i64 @test53(i32 %A) {
   ret i64 %E
 ; CHECK: @test53
 ; CHECK-NEXT: %B = zext i32 %A to i64
-; CHECK-NEXT: %C = or i64 %B, 32962
-; CHECK-NEXT: %D = and i64 %C, 40186
+; CHECK-NEXT: %C = and i64 %B, 7224
+; CHECK-NEXT: %D = or i64 %C, 32962
 ; CHECK-NEXT: ret i64 %D
 }
 
@@ -534,8 +534,8 @@ define i32 @test54(i64 %A) {
   ret i32 %E
 ; CHECK: @test54
 ; CHECK-NEXT: %B = trunc i64 %A to i32
-; CHECK-NEXT: %C = or i32 %B, -32574
-; CHECK-NEXT: %D = and i32 %C, -25350
+; CHECK-NEXT: %C = and i32 %B, 7224
+; CHECK-NEXT: %D = or i32 %C, -32574
 ; CHECK-NEXT: ret i32 %D
 }
 
@@ -547,8 +547,8 @@ define i64 @test55(i32 %A) {
   ret i64 %E
 ; CHECK: @test55
 ; CHECK-NEXT: %B = zext i32 %A to i64
-; CHECK-NEXT: %C = or i64 %B, -32574
-; CHECK-NEXT: %D = and i64 %C, -25350
+; CHECK-NEXT: %C = and i64 %B, 7224
+; CHECK-NEXT: %D = or i64 %C, -32574
 ; CHECK-NEXT: ret i64 %D
 }
 
@@ -584,8 +584,8 @@ define i64 @test58(i64 %A) nounwind {
  
 ; CHECK: @test58
 ; CHECK-NEXT:   %C = lshr i64 %A, 8
-; CHECK-NEXT:   %D = or i64 %C, 128
-; CHECK-NEXT:   %E = and i64 %D, 16777215
+; CHECK-NEXT:   %D = and i64 %C, 16777087
+; CHECK-NEXT:   %E = or i64 %D, 128
 ; CHECK-NEXT:   ret i64 %E
 }
 
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index 4be1a9c838d2..c679226d4a9d 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -53,3 +53,22 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
   ret void
 }
+
+
+; PR8883 - Constant fold exotic gep subtract
+; CHECK: @test2
+@X = global [1000 x i8] zeroinitializer, align 16
+
+define i64 @test2() {
+entry:
+  %A = bitcast i8* getelementptr inbounds ([1000 x i8]* @X, i64 1, i64 0) to i8*
+  %B = bitcast i8* getelementptr inbounds ([1000 x i8]* @X, i64 0, i64 0) to i8*
+
+  %B2 = ptrtoint i8* %B to i64
+  %C = sub i64 0, %B2
+  %D = getelementptr i8* %A, i64 %C
+  %E = ptrtoint i8* %D to i64
+
+  ret i64 %E
+  ; CHECK: ret i64 1000
+}
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index 30d467e0ca85..e17774d7b0e2 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -252,3 +252,121 @@ entry:
   %conv6 = zext i1 %cmp5 to i32                   ; <i32> [#uses=0]
   ret void
 }
+
+%s1 = type { %s2, %s2, [6 x %s2], i32, i32, i32, [1 x i32], [0 x i8] }
+%s2 = type { i64 }
+define void @test13() nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %s1* null, i64 0, i32 2, i64 0, i32 0
+  %1 = bitcast i64* %0 to i32*
+  %2 = getelementptr inbounds %s1* null, i64 0, i32 2, i64 1, i32 0
+  %.pre = load i32* %1, align 8
+  %3 = lshr i32 %.pre, 19
+  %brmerge = or i1 undef, undef
+  %4 = and i32 %3, 3
+  %5 = add nsw i32 %4, 1
+  %6 = shl i32 %5, 19
+  %7 = add i32 %6, 1572864
+  %8 = and i32 %7, 1572864
+  %9 = load i64* %2, align 8
+  %trunc156 = trunc i64 %9 to i32
+  %10 = and i32 %trunc156, -1537
+  %11 = and i32 %10, -6145
+  %12 = or i32 %11, 2048
+  %13 = and i32 %12, -24577
+  %14 = or i32 %13, 16384
+  %15 = or i32 %14, 98304
+  store i32 %15, i32* undef, align 8
+  %16 = and i32 %15, -1572865
+  %17 = or i32 %16, %8
+  store i32 %17, i32* undef, align 8
+  %18 = and i32 %17, -449
+  %19 = or i32 %18, 64
+  store i32 %19, i32* undef, align 8
+  unreachable
+}
+
+
+; PR8807
+declare i32 @test14f(i8* (i8*)*) nounwind
+
+define void @test14() nounwind readnone {
+entry:
+  %tmp = bitcast i32 (i8* (i8*)*)* @test14f to i32 (i32*)*
+  %call10 = call i32 %tmp(i32* byval undef)
+  ret void
+}
+
+
+; PR8896
+@g_54 = external global [7 x i16]
+
+define void @test15(i32* %p_92) nounwind {
+entry:
+%0 = load i32* %p_92, align 4
+%1 = icmp ne i32 %0, 0
+%2 = zext i1 %1 to i32
+%3 = call i32 @func_14() nounwind
+%4 = trunc i32 %3 to i16
+%5 = sext i16 %4 to i32
+%6 = trunc i32 %5 to i16
+br i1 undef, label %"3", label %"5"
+
+"3":                                              ; preds = %entry
+%7 = sext i16 %6 to i32
+%8 = ashr i32 %7, -1649554541
+%9 = trunc i32 %8 to i16
+br label %"5"
+
+"5":                                              ; preds = %"3", %entry
+%10 = phi i16 [ %9, %"3" ], [ %6, %entry ]
+%11 = sext i16 %10 to i32
+%12 = xor i32 %2, %11
+%13 = sext i32 %12 to i64
+%14 = icmp ne i64 %13, 0
+br i1 %14, label %return, label %"7"
+
+"7":                                              ; preds = %"5"
+ret void
+
+return:                                           ; preds = %"5"
+ret void
+}
+
+declare i32 @func_14()
+
+
+define double @test16(i32 %a) nounwind {
+  %cmp = icmp slt i32 %a, 2
+  %select = select i1 %cmp, double 2.000000e+00, double 3.141592e+00
+  ret double %select
+}
+
+
+; PR8983
+%struct.basic_ios = type { i8 }
+
+define %struct.basic_ios *@test17() ssp {
+entry:
+  %add.ptr.i = getelementptr i8* null, i64 undef
+  %0 = bitcast i8* %add.ptr.i to %struct.basic_ios*
+  ret %struct.basic_ios* %0
+}
+
+; PR9013
+define void @test18() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %l_197.0 = phi i32 [ 0, %entry ], [ %sub.i, %for.inc ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.cond
+  %conv = and i32 %l_197.0, 255
+  %sub.i = add nsw i32 %conv, -1
+  br label %for.cond
+
+return:                                           ; No predecessors!
+  ret void
+}
diff --git a/test/Transforms/InstCombine/div-cmp-overflow.ll b/test/Transforms/InstCombine/div-cmp-overflow.ll
deleted file mode 100644
index 6f63adcd2e17..000000000000
--- a/test/Transforms/InstCombine/div-cmp-overflow.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep sdiv
-; PR2740
-
-define i1 @func_75(i32 %i2) nounwind {
-	%i3 = sdiv i32 %i2, -1328634635
-	%i4 = icmp eq i32 %i3, -1
-	ret i1 %i4
-}
diff --git a/test/Transforms/InstCombine/exact-sdiv.ll b/test/Transforms/InstCombine/exact-sdiv.ll
deleted file mode 100644
index e5677541e952..000000000000
--- a/test/Transforms/InstCombine/exact-sdiv.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; CHECK: define i32 @foo
-; CHECK: sdiv i32 %x, 8
-define i32 @foo(i32 %x) {
-  %y = sdiv i32 %x, 8
-  ret i32 %y
-}
-
-; CHECK: define i32 @bar
-; CHECK: ashr i32 %x, 3
-define i32 @bar(i32 %x) {
-  %y = sdiv exact i32 %x, 8
-  ret i32 %y
-}
-
-; CHECK: i32 @a0
-; CHECK: %y = srem i32 %x, 3
-; CHECK: %z = sub i32 %x, %y
-; CHECK: ret i32 %z
-define i32 @a0(i32 %x) {
-  %y = sdiv i32 %x, 3
-  %z = mul i32 %y, 3
-  ret i32 %z
-}
-
-; CHECK: i32 @b0
-; CHECK: ret i32 %x
-define i32 @b0(i32 %x) {
-  %y = sdiv exact i32 %x, 3
-  %z = mul i32 %y, 3
-  ret i32 %z
-}
-
-; CHECK: i32 @a1
-; CHECK: %y = srem i32 %x, 3
-; CHECK: %z = sub i32 %y, %x
-; CHECK: ret i32 %z
-define i32 @a1(i32 %x) {
-  %y = sdiv i32 %x, 3
-  %z = mul i32 %y, -3
-  ret i32 %z
-}
-
-; CHECK: i32 @b1
-; CHECK: %z = sub i32 0, %x
-; CHECK: ret i32 %z
-define i32 @b1(i32 %x) {
-  %y = sdiv exact i32 %x, 3
-  %z = mul i32 %y, -3
-  ret i32 %z
-}
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
new file mode 100644
index 000000000000..58f8b5d5bcd8
--- /dev/null
+++ b/test/Transforms/InstCombine/exact.ll
@@ -0,0 +1,154 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: @sdiv1
+; CHECK: sdiv i32 %x, 8
+define i32 @sdiv1(i32 %x) {
+  %y = sdiv i32 %x, 8
+  ret i32 %y
+}
+
+; CHECK: @sdiv2
+; CHECK: ashr exact i32 %x, 3
+define i32 @sdiv2(i32 %x) {
+  %y = sdiv exact i32 %x, 8
+  ret i32 %y
+}
+
+; CHECK: @sdiv3
+; CHECK: %y = srem i32 %x, 3
+; CHECK: %z = sub i32 %x, %y
+; CHECK: ret i32 %z
+define i32 @sdiv3(i32 %x) {
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+; CHECK: @sdiv4
+; CHECK: ret i32 %x
+define i32 @sdiv4(i32 %x) {
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+; CHECK: i32 @sdiv5
+; CHECK: %y = srem i32 %x, 3
+; CHECK: %z = sub i32 %y, %x
+; CHECK: ret i32 %z
+define i32 @sdiv5(i32 %x) {
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+; CHECK: @sdiv6
+; CHECK: %z = sub i32 0, %x
+; CHECK: ret i32 %z
+define i32 @sdiv6(i32 %x) {
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+; CHECK: @udiv1
+; CHECK: ret i32 %x
+define i32 @udiv1(i32 %x, i32 %w) {
+  %y = udiv exact i32 %x, %w
+  %z = mul i32 %y, %w
+  ret i32 %z
+}
+
+; CHECK: @udiv2
+; CHECK: %z = lshr exact i32 %x, %w
+; CHECK: ret i32 %z
+define i32 @udiv2(i32 %x, i32 %w) {
+  %y = shl i32 1, %w
+  %z = udiv exact i32 %x, %y
+  ret i32 %z
+}
+
+; CHECK: @ashr1
+; CHECK: %B = ashr exact i64 %A, 2
+; CHECK: ret i64 %B
+define i64 @ashr1(i64 %X) nounwind {
+  %A = shl i64 %X, 8
+  %B = ashr i64 %A, 2   ; X/4
+  ret i64 %B
+}
+
+; PR9120
+; CHECK: @ashr_icmp1
+; CHECK: %B = icmp eq i64 %X, 0
+; CHECK: ret i1 %B
+define i1 @ashr_icmp1(i64 %X) nounwind {
+  %A = ashr exact i64 %X, 2   ; X/4
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @ashr_icmp2
+; CHECK: %Z = icmp slt i64 %X, 16
+; CHECK: ret i1 %Z
+define i1 @ashr_icmp2(i64 %X) nounwind {
+ %Y = ashr exact i64 %X, 2  ; x / 4
+ %Z = icmp slt i64 %Y, 4    ; x < 16
+ ret i1 %Z
+}
+
+; CHECK: @udiv_icmp1
+; CHECK: icmp ne i64 %X, 0
+define i1 @udiv_icmp1(i64 %X) nounwind {
+  %A = udiv exact i64 %X, 5   ; X/5
+  %B = icmp ne i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp1
+; CHECK: icmp eq i64 %X, 0
+define i1 @sdiv_icmp1(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, 5   ; X/5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp2
+; CHECK: icmp eq i64 %X, 5
+define i1 @sdiv_icmp2(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, 5   ; X/5 == 1 --> x == 5
+  %B = icmp eq i64 %A, 1
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp3
+; CHECK: icmp eq i64 %X, -5
+define i1 @sdiv_icmp3(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, 5   ; X/5 == -1 --> x == -5
+  %B = icmp eq i64 %A, -1
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp4
+; CHECK: icmp eq i64 %X, 0
+define i1 @sdiv_icmp4(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp5
+; CHECK: icmp eq i64 %X, -5
+define i1 @sdiv_icmp5(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == -5
+  %B = icmp eq i64 %A, 1
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp6
+; CHECK: icmp eq i64 %X, 5
+define i1 @sdiv_icmp6(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == 5
+  %B = icmp eq i64 %A, -1
+  ret i1 %B
+}
+
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index 875f860b3d7f..64edc18d4507 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -1,13 +1,16 @@
-; RUN: opt < %s -instcombine -S | not grep extractvalue
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; Instcombine should fold various combinations of insertvalue and extractvalue
-; together
 declare void @bar({i32, i32} %a)
+declare i32 @baz(i32 %a)
 
-define i32 @foo() {
+; CHECK: define i32 @foo
+; CHECK-NOT: extractvalue
+define i32 @foo(i32 %a, i32 %b) {
+; Instcombine should fold various combinations of insertvalue and extractvalue
+; together
         ; Build a simple struct and pull values out again
-        %s1.1 = insertvalue {i32, i32} undef, i32 0, 0
-        %s1 = insertvalue {i32, i32} %s1.1, i32 1, 1
+        %s1.1 = insertvalue {i32, i32} undef, i32 %a, 0
+        %s1 = insertvalue {i32, i32} %s1.1, i32 %b, 1
         %v1 = extractvalue {i32, i32} %s1, 0
         %v2 = extractvalue {i32, i32} %s1, 1
         
@@ -36,3 +39,69 @@ define i32 @foo() {
         ret i32 %v5
 }
 
+; CHECK: define i32 @extract2gep
+; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}* %pair, i32 0, i32 1
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
+; CHECK-NEXT: store
+; CHECK-NEXT: br label %loop
+; CHECK-NOT: extractvalue
+; CHECK: call {{.*}}(i32 [[LOAD]])
+; CHECK-NOT: extractvalue
+; CHECK: ret i32 [[LOAD]]
+define i32 @extract2gep({i32, i32}* %pair, i32* %P) {
+        ; The load + extractvalue should be converted
+        ; to an inbounds gep + smaller load.
+        ; The new load should be in the same spot as the old load.
+        %L = load {i32, i32}* %pair
+        store i32 0, i32* %P
+        br label %loop
+
+loop:
+        %E = extractvalue {i32, i32} %L, 1
+        %C = call i32 @baz(i32 %E)
+        store i32 %C, i32* %P
+        %cond = icmp eq i32 %C, 0
+        br i1 %cond, label %end, label %loop
+
+end:
+        ret i32 %E
+}
+
+; CHECK: define i32 @doubleextract2gep
+; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}* %arg, i32 0, i32 1, i32 1
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
+; CHECK-NEXT: ret i32 [[LOAD]]
+define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
+        ; The load + extractvalues should be converted
+        ; to a 3-index inbounds gep + smaller load.
+        %L = load {i32, {i32, i32}}* %arg
+        %E1 = extractvalue {i32, {i32, i32}} %L, 1
+        %E2 = extractvalue {i32, i32} %E1, 1
+        ret i32 %E2
+}
+
+; CHECK: define i32 @nogep-multiuse
+; CHECK-NEXT: load {{.*}} %pair
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+define i32 @nogep-multiuse({i32, i32}* %pair) {
+        ; The load should be left unchanged since both parts are needed.
+        %L = volatile load {i32, i32}* %pair
+        %LHS = extractvalue {i32, i32} %L, 0
+        %RHS = extractvalue {i32, i32} %L, 1
+        %R = add i32 %LHS, %RHS
+        ret i32 %R
+}
+
+; CHECK: define i32 @nogep-volatile
+; CHECK-NEXT: volatile load {{.*}} %pair
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: ret
+define i32 @nogep-volatile({i32, i32}* %pair) {
+        ; The volatile load should be left unchanged.
+        %L = volatile load {i32, i32}* %pair
+        %E = extractvalue {i32, i32} %L, 1
+        ret i32 %E
+}
diff --git a/test/Transforms/InstCombine/fold-calls.ll b/test/Transforms/InstCombine/fold-calls.ll
new file mode 100644
index 000000000000..504f874beaeb
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-calls.ll
@@ -0,0 +1,19 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This shouldn't fold, because sin(inf) is invalid.
+; CHECK: @foo
+; CHECK:   %t = call double @sin(double 0x7FF0000000000000)
+define double @foo() {
+  %t = call double @sin(double 0x7FF0000000000000)
+  ret double %t
+}
+
+; This should fold.
+; CHECK: @bar
+; CHECK:   ret double 0.0
+define double @bar() {
+  %t = call double @sin(double 0.0)
+  ret double %t
+}
+
+declare double @sin(double)
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
new file mode 100644
index 000000000000..3f22522a6ce4
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | not grep select
+
+define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D) {
+ %r = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> zeroinitializer
+ %g = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,  <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 6, i32 9, i32 1>
+ %b = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>,  <4 x i32> zeroinitializer, <4 x i32> <i32 7, i32 1, i32 4, i32 9>
+ %a = select <4 x i1> zeroinitializer,  <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 8, i32 5>
+ store <4 x i32> %r, <4 x i32>* %A
+ store <4 x i32> %g, <4 x i32>* %B
+ store <4 x i32> %b, <4 x i32>* %C
+ store <4 x i32> %a, <4 x i32>* %D
+ ret void
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 802957f47b3a..3150883e7d71 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
 define i32 @test1(i32 %X) {
 entry:
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
@@ -154,3 +157,223 @@ entry:
 ; CHECK: @test16
 ; CHECK: ret i1 undef
 }
+
+define i1 @test17(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+; CHECK: @test17
+; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
+}
+
+
+define i1 @test18(i32 %x) nounwind {
+  %sh = lshr i32 8, %x
+  %and = and i32 %sh, 1
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+; CHECK: @test18
+; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
+}
+
+define i1 @test19(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp eq i32 %and, 8
+  ret i1 %cmp
+; CHECK: @test19
+; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
+}
+
+define i1 @test20(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+; CHECK: @test20
+; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
+}
+
+define i1 @test21(i8 %x, i8 %y) {
+; CHECK: @test21
+; CHECK-NOT: or i8
+; CHECK: icmp ugt
+  %A = or i8 %x, 1
+  %B = icmp ugt i8 %A, 3
+  ret i1 %B
+}
+
+define i1 @test22(i8 %x, i8 %y) {
+; CHECK: @test22
+; CHECK-NOT: or i8
+; CHECK: icmp ult
+  %A = or i8 %x, 1
+  %B = icmp ult i8 %A, 4
+  ret i1 %B
+}
+
+; PR2740
+; CHECK: @test23
+; CHECK: icmp sgt i32 %x, 1328634634
+define i1 @test23(i32 %x) nounwind {
+	%i3 = sdiv i32 %x, -1328634635
+	%i4 = icmp eq i32 %i3, -1
+	ret i1 %i4
+}
+
+@X = global [1000 x i32] zeroinitializer
+
+; PR8882
+; CHECK: @test24
+; CHECK:    %cmp = icmp eq i64 %i, 1000
+; CHECK:   ret i1 %cmp
+define i1 @test24(i64 %i) {
+  %p1 = getelementptr inbounds i32* getelementptr inbounds ([1000 x i32]* @X, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32* %p1, getelementptr inbounds ([1000 x i32]* @X, i64 1, i64 0)
+  ret i1 %cmp
+}
+
+; CHECK: @test25
+; X + Z > Y + Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp sgt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test25(i32 %x, i32 %y, i32 %z) {
+  %lhs = add nsw i32 %x, %z
+  %rhs = add nsw i32 %y, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test26
+; X + Z > Y + Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp ugt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test26(i32 %x, i32 %y, i32 %z) {
+  %lhs = add nuw i32 %x, %z
+  %rhs = add nuw i32 %y, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test27
+; X - Z > Y - Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp sgt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test27(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nsw i32 %x, %z
+  %rhs = sub nsw i32 %y, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test28
+; X - Z > Y - Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp ugt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test28(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nuw i32 %x, %z
+  %rhs = sub nuw i32 %y, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test29
+; X + Y > X -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp sgt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test29(i32 %x, i32 %y) {
+  %lhs = add nsw i32 %x, %y
+  %c = icmp sgt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test30
+; X + Y > X -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp ne i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test30(i32 %x, i32 %y) {
+  %lhs = add nuw i32 %x, %y
+  %c = icmp ugt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test31
+; X > X + Y -> 0 > Y if there is no overflow.
+; CHECK: %c = icmp slt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test31(i32 %x, i32 %y) {
+  %rhs = add nsw i32 %x, %y
+  %c = icmp sgt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test32
+; X > X + Y -> 0 > Y if there is no overflow.
+; CHECK: ret i1 false
+define i1 @test32(i32 %x, i32 %y) {
+  %rhs = add nuw i32 %x, %y
+  %c = icmp ugt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test33
+; X - Y > X -> 0 > Y if there is no overflow.
+; CHECK: %c = icmp slt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test33(i32 %x, i32 %y) {
+  %lhs = sub nsw i32 %x, %y
+  %c = icmp sgt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test34
+; X - Y > X -> 0 > Y if there is no overflow.
+; CHECK: ret i1 false
+define i1 @test34(i32 %x, i32 %y) {
+  %lhs = sub nuw i32 %x, %y
+  %c = icmp ugt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test35
+; X > X - Y -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp sgt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test35(i32 %x, i32 %y) {
+  %rhs = sub nsw i32 %x, %y
+  %c = icmp sgt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test36
+; X > X - Y -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp ne i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test36(i32 %x, i32 %y) {
+  %rhs = sub nuw i32 %x, %y
+  %c = icmp ugt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test37
+; X - Y > X - Z -> Z > Y if there is no overflow.
+; CHECK: %c = icmp sgt i32 %z, %y
+; CHECK: ret i1 %c
+define i1 @test37(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nsw i32 %x, %y
+  %rhs = sub nsw i32 %x, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test38
+; X - Y > X - Z -> Z > Y if there is no overflow.
+; CHECK: %c = icmp ugt i32 %z, %y
+; CHECK: ret i1 %c
+define i1 @test38(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nuw i32 %x, %y
+  %rhs = sub nuw i32 %x, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index d672d8c1535e..50e7f1f7c92d 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -71,6 +71,25 @@ define i8 @uaddtest5(i8 %A, i1* %overflowPtr) {
 ; CHECK: ret i8 %A
 }
 
+define i1 @uaddtest6(i8 %A, i8 %B) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 -4)
+  %z = extractvalue %overflow.result %x, 1
+  ret i1 %z
+; CHECK: @uaddtest6
+; CHECK-NEXT: %z = icmp ugt i8 %A, 3
+; CHECK-NEXT: ret i1 %z
+}
+
+define i8 @uaddtest7(i8 %A, i8 %B) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
+  %z = extractvalue %overflow.result %x, 0
+  ret i8 %z
+; CHECK: @uaddtest7
+; CHECK-NEXT: %z = add i8 %A, %B
+; CHECK-NEXT: ret i8 %z
+}
+
+
 define i8 @umultest1(i8 %A, i1* %overflowPtr) {
   %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
   %y = extractvalue %overflow.result %x, 0
diff --git a/test/Transforms/InstCombine/memcpy.ll b/test/Transforms/InstCombine/memcpy.ll
index 2e7b2c0bb41d..8a2e3aaad027 100644
--- a/test/Transforms/InstCombine/memcpy.ll
+++ b/test/Transforms/InstCombine/memcpy.ll
@@ -1,10 +1,19 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-define void @test4(i8* %a) {
-        tail call void @llvm.memcpy.i32( i8* %a, i8* %a, i32 100, i32 1 )
+define void @test1(i8* %a) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
         ret void
-}
-; CHECK: define void @test4
+; CHECK: define void @test1
 ; CHECK-NEXT: ret void
+}
+
+
+; PR8267
+define void @test2(i8* %a) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 true)
+        ret void
+; CHECK: define void @test2
+; CHECK-NEXT: call void @llvm.memcpy
+}
diff --git a/test/Transforms/InstCombine/memset2.ll b/test/Transforms/InstCombine/memset2.ll
new file mode 100644
index 000000000000..87639f0d6a22
--- /dev/null
+++ b/test/Transforms/InstCombine/memset2.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Test to check that instcombine doesn't drop the address space when optimizing
+; memset.
+%struct.Moves = type { [9 x i8], i8, i8, i8, [5 x i8] }
+
+define i32 @test(%struct.Moves addrspace(1)* nocapture %moves) {
+entry:
+; CHECK: bitcast i8 addrspace(1)* %gep to i64 addrspace(1)*
+	%gep = getelementptr inbounds %struct.Moves addrspace(1)* %moves, i32 1, i32 0, i32 9
+	 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i32 1, i1 false)                                                                     
+	ret i32 0
+}
+
+declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/neon-intrinsics.ll b/test/Transforms/InstCombine/neon-intrinsics.ll
new file mode 100644
index 000000000000..3ad09cc6c694
--- /dev/null
+++ b/test/Transforms/InstCombine/neon-intrinsics.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; The alignment arguments for NEON load/store intrinsics can be increased
+; by instcombine.  Check for this.
+
+; CHECK: vld4.v2i32({{.*}}, i32 32)
+; CHECK: vst4.v2i32({{.*}}, i32 16)
+
+@x = common global [8 x i32] zeroinitializer, align 32
+@y = common global [8 x i32] zeroinitializer, align 16
+
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+define void @test() nounwind ssp {
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
+  %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
+  %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+  %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
+  call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
+  ret void
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/Transforms/InstCombine/nsw.ll b/test/Transforms/InstCombine/nsw.ll
index 821cebed30bb..681bdc234b7d 100644
--- a/test/Transforms/InstCombine/nsw.ll
+++ b/test/Transforms/InstCombine/nsw.ll
@@ -1,20 +1,39 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK: define i32 @foo
-; %y = sub i32 0, %x
-; %z = sdiv i32 %y, 337
-; ret i32 %y
-define i32 @foo(i32 %x) {
+; CHECK: @sub1
+; CHECK: %y = sub i32 0, %x
+; CHECK: %z = sdiv i32 %y, 337
+; CHECK: ret i32 %z
+define i32 @sub1(i32 %x) {
   %y = sub i32 0, %x
   %z = sdiv i32 %y, 337
-  ret i32 %y
+  ret i32 %z
 }
 
-; CHECK: define i32 @bar
-; %y = sdiv i32 %x, -337
-; ret i32 %y
-define i32 @bar(i32 %x) {
+; CHECK: @sub2
+; CHECK: %z = sdiv i32 %x, -337
+; CHECK: ret i32 %z
+define i32 @sub2(i32 %x) {
   %y = sub nsw i32 0, %x
   %z = sdiv i32 %y, 337
-  ret i32 %y
+  ret i32 %z
+}
+
+; CHECK: @shl_icmp
+; CHECK: %B = icmp eq i64 %X, 0
+; CHECK: ret i1 %B
+define i1 @shl_icmp(i64 %X) nounwind {
+  %A = shl nuw i64 %X, 2   ; X/4
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @shl1
+; CHECK: %B = shl nuw nsw i64 %A, 8
+; CHECK: ret i64 %B
+define i64 @shl1(i64 %X, i64* %P) nounwind {
+  %A = and i64 %X, 312
+  store i64 %A, i64* %P  ; multiple uses of A.
+  %B = shl i64 %A, 8
+  ret i64 %B
 }
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 664701bf2114..043525b75556 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -150,3 +150,13 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 declare noalias i8* @malloc(i32) nounwind
 
 declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+define i32 @test7() {
+; CHECK: @test7
+  %alloc = call noalias i8* @malloc(i32 48) nounwind
+  %gep = getelementptr inbounds i8* %alloc, i32 16
+  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+; CHECK-NEXT: ret i32 32
+  ret i32 %objsize
+}
+
diff --git a/test/Transforms/InstCombine/or-fcmp.ll b/test/Transforms/InstCombine/or-fcmp.ll
index 9692bfcc5970..09a3c994d93e 100644
--- a/test/Transforms/InstCombine/or-fcmp.ll
+++ b/test/Transforms/InstCombine/or-fcmp.ll
@@ -1,34 +1,58 @@
-; RUN: opt < %s -instcombine -S | grep fcmp | count 3
-; RUN: opt < %s -instcombine -S | grep ret | grep 1
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; CHECK: @t1
 define zeroext i8 @t1(float %x, float %y) nounwind {
        %a = fcmp ueq float %x, %y             ; <i1> [#uses=1]
        %b = fcmp uno float %x, %y               ; <i1> [#uses=1]
        %c = or i1 %a, %b
+; CHECK-NOT: fcmp uno
+; CHECK: fcmp ueq
        %retval = zext i1 %c to i8
        ret i8 %retval
 }
 
+; CHECK: @t2
 define zeroext i8 @t2(float %x, float %y) nounwind {
        %a = fcmp olt float %x, %y             ; <i1> [#uses=1]
        %b = fcmp oeq float %x, %y               ; <i1> [#uses=1]
+; CHECK-NOT: fcmp olt
+; CHECK-NOT: fcmp oeq
+; CHECK: fcmp ole
        %c = or i1 %a, %b
        %retval = zext i1 %c to i8
        ret i8 %retval
 }
 
+; CHECK: @t3
 define zeroext i8 @t3(float %x, float %y) nounwind {
        %a = fcmp ult float %x, %y             ; <i1> [#uses=1]
        %b = fcmp uge float %x, %y               ; <i1> [#uses=1]
        %c = or i1 %a, %b
        %retval = zext i1 %c to i8
+; CHECK: ret i8 1
        ret i8 %retval
 }
 
+; CHECK: @t4
 define zeroext i8 @t4(float %x, float %y) nounwind {
        %a = fcmp ult float %x, %y             ; <i1> [#uses=1]
        %b = fcmp ugt float %x, %y               ; <i1> [#uses=1]
        %c = or i1 %a, %b
+; CHECK-NOT: fcmp ult
+; CHECK-NOT: fcmp ugt
+; CHECK: fcmp une
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+; CHECK: @t5
+define zeroext i8 @t5(float %x, float %y) nounwind {
+       %a = fcmp olt float %x, %y             ; <i1> [#uses=1]
+       %b = fcmp oge float %x, %y               ; <i1> [#uses=1]
+       %c = or i1 %a, %b
+; CHECK-NOT: fcmp olt
+; CHECK-NOT: fcmp oge
+; CHECK: fcmp ord
        %retval = zext i1 %c to i8
        ret i8 %retval
 }
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index c3526b77f6a5..f82f9faab2d5 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -316,8 +316,8 @@ entry:
   %E = or i32 %D, %C
   ret i32 %E
 ; CHECK: @test30
-; CHECK: %B = or i32 %A, 32962
-; CHECK: %E = and i32 %B, -25350
+; CHECK: %D = and i32 %A, -58312
+; CHECK: %E = or i32 %D, 32962
 ; CHECK: ret i32 %E
 }
 
@@ -332,8 +332,8 @@ define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
   %F = or i64 %D, %E
   ret i64 %F
 ; CHECK: @test31
-; CHECK-NEXT: %bitfield = or i64 %A, 32962
-; CHECK-NEXT: %F = and i64 %bitfield, 4294941946
+; CHECK-NEXT: %E1 = and i64 %A, 4294908984
+; CHECK-NEXT: %F = or i64 %E1, 32962
 ; CHECK-NEXT: ret i64 %F
 }
 
@@ -350,3 +350,43 @@ define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32
 ; CHECK: or <4 x i32> %and.i, %and.i129
 }
 
+define i1 @test33(i1 %X, i1 %Y) {
+  %a = or i1 %X, %Y
+  %b = or i1 %a, %X
+  ret i1 %b
+; CHECK: @test33
+; CHECK-NEXT: or i1 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i32 @test34(i32 %X, i32 %Y) {
+  %a = or i32 %X, %Y
+  %b = or i32 %Y, %a
+  ret i32 %b
+; CHECK: @test34
+; CHECK-NEXT: or i32 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i32 @test35(i32 %a, i32 %b) {
+  %1 = or i32 %a, 1135
+  %2 = or i32 %1, %b
+  ret i32 %2
+  ; CHECK: @test35
+  ; CHECK-NEXT: or i32 %a, %b
+  ; CHECK-NEXT: or i32 %1, 1135
+}
+
+define i1 @test36(i32 %x) {
+  %cmp1 = icmp eq i32 %x, 23
+  %cmp2 = icmp eq i32 %x, 24
+  %ret1 = or i1 %cmp1, %cmp2
+  %cmp3 = icmp eq i32 %x, 25
+  %ret2 = or i1 %ret1, %cmp3
+  ret i1 %ret2
+; CHECK: @test36
+; CHECK-NEXT: %x.off = add i32 %x, -23
+; CHECK-NEXT: icmp ult i32 %x.off, 3
+; CHECK-NEXT: ret i1
+}
+
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
new file mode 100644
index 000000000000..9123283988de
--- /dev/null
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -0,0 +1,133 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; <rdar://problem/8558713>
+
+declare void @throwAnExceptionOrWhatever()
+
+; CHECK: @test1
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK-NOT: sext
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+; CHECK: llvm.sadd.with.overflow.i32
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+; CHECK-NOT: trunc
+  %conv9 = trunc i64 %add to i32
+; CHECK: ret i32
+  ret i32 %conv9
+}
+
+; CHECK: @test2
+; This form should not be promoted for two reasons: 1) it is unprofitable to
+; promote it since the add.off instruction has another use, and 2) it is unsafe
+; because the add-with-off makes the high bits of the original add live.
+define i32 @test2(i32 %a, i32 %b, i64* %P) nounwind ssp {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+  
+  store i64 %add.off, i64* %P
+  
+; CHECK-NOT: llvm.sadd.with.overflow
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  %conv9 = trunc i64 %add to i32
+; CHECK: ret i32
+  ret i32 %conv9
+}
+
+; CHECK: test3
+; PR8816
+; This is illegal to transform because the high bits of the original add are
+; live out.
+define i64 @test3(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+; CHECK-NOT: llvm.sadd.with.overflow
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  ret i64 %add
+; CHECK: ret i64
+}
+
+; CHECK: @test4
+; Should be able to form an i8 sadd computed in an i32.
+define zeroext i8 @test4(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %conv = sext i8 %a to i32
+  %conv2 = sext i8 %b to i32
+  %add = add nsw i32 %conv2, %conv
+  %add4 = add nsw i32 %add, 128
+  %cmp = icmp ugt i32 %add4, 255
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK: llvm.sadd.with.overflow.i8
+if.then:                                          ; preds = %entry
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %conv7 = trunc i32 %add to i8
+  ret i8 %conv7
+; CHECK: ret i8
+}
+
+; CHECK: @test5
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test5(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK: @test6
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test6(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK: @test7
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test7(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index c6c3f2ff6a68..62c6a63a7e58 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -125,7 +125,7 @@ Exit:           ; preds = %Loop
 
 define i32* @test8({ i32, i32 } *%A, i1 %b) {
 BB0:
-        %X = getelementptr { i32, i32 } *%A, i32 0, i32 1
+        %X = getelementptr inbounds { i32, i32 } *%A, i32 0, i32 1
         br i1 %b, label %BB1, label %BB2
 
 BB1:
@@ -139,7 +139,7 @@ BB2:
 ; CHECK: @test8
 ; CHECK-NOT: phi
 ; CHECK: BB2:
-; CHECK-NEXT: %B = getelementptr 
+; CHECK-NEXT: %B = getelementptr %0 
 ; CHECK-NEXT: ret i32* %B
 }
 
@@ -423,3 +423,124 @@ bb2:        ; preds = %bb1, %entry
     ret i32 %res
 }
 
+define i1 @test18(i1 %cond) {
+  %zero = alloca i32
+  %one = alloca i32
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %ptr = phi i32* [ %zero, %true ] , [ %one, %false ]
+  %isnull = icmp eq i32* %ptr, null
+  ret i1 %isnull
+; CHECK: @test18
+; CHECK: ret i1 false
+}
+
+define i1 @test19(i1 %cond, double %x) {
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %p = phi double [ %x, %true ], [ 0x7FF0000000000000, %false ]; RHS = +infty
+  %cmp = fcmp ule double %x, %p
+  ret i1 %cmp
+; CHECK: @test19
+; CHECK: ret i1 true
+}
+
+define i1 @test20(i1 %cond) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %p = phi i32* [ %a, %true ], [ %b, %false ]
+  %r = icmp eq i32* %p, %c
+  ret i1 %r
+; CHECK: @test20
+; CHECK: ret i1 false
+}
+
+define i1 @test21(i1 %c1, i1 %c2) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  br i1 %c1, label %true, label %false
+true:
+  br label %loop
+false:
+  br label %loop
+loop:
+  %p = phi i32* [ %a, %true ], [ %b, %false ], [ %p, %loop ]
+  %r = icmp eq i32* %p, %c
+  br i1 %c2, label %ret, label %loop
+ret:
+  ret i1 %r
+; CHECK: @test21
+; CHECK: ret i1 false
+}
+
+define void @test22() {
+; CHECK: @test22
+entry:
+  br label %loop
+loop:
+  %phi = phi i32 [ 0, %entry ], [ %y, %loop ]
+  %y = add i32 %phi, 1
+  %o = or i32 %y, %phi
+  %e = icmp eq i32 %o, %y
+  br i1 %e, label %loop, label %ret
+; CHECK: br i1 %e
+ret:
+  ret void
+}
+
+define i32 @test23(i32 %A, i1 %b, i32 * %P) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ 42, %Loop ]
+        %D = add i32 %B, 19
+        store i32 %D, i32* %P
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        %E = add i32 %B, 19
+        ret i32 %E
+; CHECK: @test23
+; CHECK: %phitmp = add i32 %A, 19
+; CHECK: Loop:
+; CHECK-NEXT: %B = phi i32 [ %phitmp, %BB0 ], [ 61, %Loop ]
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test24(i32 %A, i1 %cond) {
+BB0:
+        %X = add nuw nsw i32 %A, 1
+        br i1 %cond, label %BB1, label %BB2
+
+BB1:
+        %Y = add nuw i32 %A, 1
+        br label %BB2
+
+BB2:
+        %C = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]
+        ret i32 %C
+; CHECK: @test24
+; CHECK-NOT: phi
+; CHECK: BB2:
+; CHECK-NEXT: %C = add nuw i32 %A, 1
+; CHECK-NEXT: ret i32 %C
+}
diff --git a/test/Transforms/InstCombine/pr8547.ll b/test/Transforms/InstCombine/pr8547.ll
new file mode 100644
index 000000000000..485f4d9644f3
--- /dev/null
+++ b/test/Transforms/InstCombine/pr8547.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Converting the 2 shifts to SHL 6 without the AND is wrong.  PR 8547.
+
+@g_2 = global i32 0, align 4
+@.str = constant [10 x i8] c"g_2 = %d\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() nounwind {
+codeRepl:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %codeRepl
+  %storemerge = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
+  store i32 %storemerge, i32* @g_2, align 4
+  %shl = shl i32 %storemerge, 30
+  %conv2 = lshr i32 %shl, 24
+; CHECK:  %0 = shl nuw nsw i32 %storemerge, 6
+; CHECK:  %conv2 = and i32 %0, 64
+  %tobool = icmp eq i32 %conv2, 0
+  br i1 %tobool, label %for.cond, label %codeRepl2
+
+codeRepl2:                                        ; preds = %for.cond
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i32 %conv2) nounwind
+  ret i32 0
+}
\ No newline at end of file
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index bac248e58d7a..b421b7c0e8b4 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -81,3 +81,8 @@ define i32 @test12(i32 %i) {
 	%tmp.5 = srem i32 %tmp.1, 2
 	ret i32 %tmp.5
 }
+
+define i32 @test13(i32 %i) {
+	%x = srem i32 %i, %i
+	ret i32 %x
+}
diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll
new file mode 100644
index 000000000000..8ee33690d8ce
--- /dev/null
+++ b/test/Transforms/InstCombine/select-crash.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Formerly crashed, PR8490.
+
+define fastcc double @gimp_operation_color_balance_map(float %value, double %highlights) nounwind readnone inlinehint {
+entry:
+; CHECK: gimp_operation_color_balance_map
+; CHECK: fsub double -0.000000
+  %conv = fpext float %value to double
+  %div = fdiv double %conv, 1.600000e+01
+  %add = fadd double %div, 1.000000e+00
+  %div1 = fdiv double 1.000000e+00, %add
+  %sub = fsub double 1.075000e+00, %div1
+  %sub24 = fsub double 1.000000e+00, %sub
+  %add26 = fadd double %sub, 1.000000e+00
+  %cmp86 = fcmp ogt double %highlights, 0.000000e+00
+  %cond90 = select i1 %cmp86, double %sub24, double %add26
+  %mul91 = fmul double %highlights, %cond90
+  %add94 = fadd double undef, %mul91
+  ret double %add94
+}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 246a7bc59783..ba9d99c97dd5 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -223,6 +223,81 @@ define i32 @test15d(i32 %X) {
 ; CHECK: ret i32 %t1
 }
 
+;; (a & 128) ? 256 : 0
+define i32 @test15e(i32 %X) {
+        %t1 = and i32 %X, 128
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 256, i32 0
+        ret i32 %t3
+; CHECK: @test15e
+; CHECK: %t1 = shl i32 %X, 1
+; CHECK: and i32 %t1, 256
+; CHECK: ret i32
+}
+
+;; (a & 128) ? 0 : 256
+define i32 @test15f(i32 %X) {
+        %t1 = and i32 %X, 128
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 0, i32 256
+        ret i32 %t3
+; CHECK: @test15f
+; CHECK: %t1 = shl i32 %X, 1
+; CHECK: and i32 %t1, 256
+; CHECK: xor i32 %{{.*}}, 256
+; CHECK: ret i32
+}
+
+;; (a & 8) ? -1 : -9
+define i32 @test15g(i32 %X) {
+        %t1 = and i32 %X, 8
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 -1, i32 -9
+        ret i32 %t3
+; CHECK: @test15g
+; CHECK-NEXT: %1 = or i32 %X, -9
+; CHECK-NEXT: ret i32 %1
+}
+
+;; (a & 8) ? -9 : -1
+define i32 @test15h(i32 %X) {
+        %t1 = and i32 %X, 8
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 -9, i32 -1
+        ret i32 %t3
+; CHECK: @test15h
+; CHECK-NEXT: %1 = or i32 %X, -9
+; CHECK-NEXT: %2 = xor i32 %1, 8
+; CHECK-NEXT: ret i32 %2
+}
+
+;; (a & 2) ? 577 : 1089
+define i32 @test15i(i32 %X) {
+        %t1 = and i32 %X, 2
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 577, i32 1089
+        ret i32 %t3
+; CHECK: @test15i
+; CHECK-NEXT: %t1 = shl i32 %X, 8
+; CHECK-NEXT: %1 = and i32 %t1, 512
+; CHECK-NEXT: %2 = xor i32 %1, 512
+; CHECK-NEXT: %3 = add i32 %2, 577
+; CHECK-NEXT: ret i32 %3
+}
+
+;; (a & 2) ? 1089 : 577
+define i32 @test15j(i32 %X) {
+        %t1 = and i32 %X, 2
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 1089, i32 577
+        ret i32 %t3
+; CHECK: @test15j
+; CHECK-NEXT: %t1 = shl i32 %X, 8
+; CHECK-NEXT: %1 = and i32 %t1, 512
+; CHECK-NEXT: %2 = add i32 %1, 577
+; CHECK-NEXT: ret i32 %2
+}
+
 define i32 @test16(i1 %C, i32* %P) {
         %P2 = select i1 %C, i32* %P, i32* null          
         %V = load i32* %P2              
@@ -470,3 +545,172 @@ define i32 @test37(i32 %x) {
 ; CHECK: or i32 {{.*}}, 1
 ; CHECK: ret
 }
+
+define i1 @test38(i1 %cond) {
+  %zero = alloca i32
+  %one = alloca i32
+  %ptr = select i1 %cond, i32* %zero, i32* %one
+  %isnull = icmp eq i32* %ptr, null
+  ret i1 %isnull
+; CHECK: @test38
+; CHECK: ret i1 false
+}
+
+define i1 @test39(i1 %cond, double %x) {
+  %s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
+  %cmp = fcmp ule double %x, %s
+  ret i1 %cmp
+; CHECK: @test39
+; CHECK: ret i1 true
+}
+
+define i1 @test40(i1 %cond) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  %s = select i1 %cond, i32* %a, i32* %b
+  %r = icmp eq i32* %s, %c
+  ret i1 %r
+; CHECK: @test40
+; CHECK: ret i1 false
+}
+
+define i32 @test41(i1 %cond, i32 %x, i32 %y) {
+  %z = and i32 %x, %y
+  %s = select i1 %cond, i32 %y, i32 %z
+  %r = and i32 %x, %s
+  ret i32 %r
+; CHECK: @test41
+; CHECK-NEXT: and i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test42(i32 %x, i32 %y) {
+  %b = add i32 %y, -1
+  %cond = icmp eq i32 %x, 0
+  %c = select i1 %cond, i32 %b, i32 %y
+  ret i32 %c
+; CHECK: @test42
+; CHECK-NEXT: %cond = icmp eq i32 %x, 0
+; CHECK-NEXT: %b = sext i1 %cond to i32
+; CHECK-NEXT: %c = add i32 %b, %y
+; CHECK-NEXT: ret i32 %c
+}
+
+define i64 @test43(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonnegative = icmp sgt i32 %a, -1
+	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 0
+	ret i64 %max
+; CHECK: @test43
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonnegative = icmp slt i64 %a_ext, 0
+; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 0, i64 %a_ext
+; CHECK-NEXT: ret i64 %max
+}
+
+define i64 @test44(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonpositive = icmp slt i32 %a, 1
+	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 0
+	ret i64 %min
+; CHECK: @test44
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp sgt i64 %a_ext, 0
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 0, i64 %a_ext
+; CHECK-NEXT: ret i64 %min
+}
+define i64 @test45(i32 %a) nounwind {
+	%a_ext = zext i32 %a to i64
+	%is_a_nonnegative = icmp ugt i32 %a, 2
+	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 3
+	ret i64 %max
+; CHECK: @test45
+; CHECK-NEXT: %a_ext = zext i32 %a to i64
+; CHECK-NEXT: %is_a_nonnegative = icmp ult i64 %a_ext, 3
+; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 3, i64 %a_ext
+; CHECK-NEXT: ret i64 %max
+}
+
+define i64 @test46(i32 %a) nounwind {
+	%a_ext = zext i32 %a to i64
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+	ret i64 %min
+; CHECK: @test46
+; CHECK-NEXT: %a_ext = zext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+; CHECK-NEXT: ret i64 %min
+}
+define i64 @test47(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonnegative = icmp ugt i32 %a, 2
+	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 3
+	ret i64 %max
+; CHECK: @test47
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonnegative = icmp ult i64 %a_ext, 3
+; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 3, i64 %a_ext
+; CHECK-NEXT: ret i64 %max
+}
+
+define i64 @test48(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+	ret i64 %min
+; CHECK: @test48
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+; CHECK-NEXT: ret i64 %min
+}
+
+define i64 @test49(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+	ret i64 %min
+; CHECK: @test49
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+; CHECK-NEXT: ret i64 %min
+}
+define i64 @test50(i32 %a) nounwind {
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%a_ext = sext i32 %a to i64
+	%min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+	ret i64 %min
+; CHECK: @test50
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+; CHECK-NEXT: ret i64 %min
+}
+
+; PR8994
+
+; This select instruction can't be eliminated because trying to do so would
+; change the number of vector elements. This used to assert.
+define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
+; CHECK: @test51
+  %select = select <3 x i1> %icmp, <3 x i16> zeroinitializer, <3 x i16> %tmp
+; CHECK: select <3 x i1>
+  %tmp2 = bitcast <3 x i16> %select to i48
+  ret i48 %tmp2
+}
+
+; PR8575
+
+define i32 @test52(i32 %n, i32 %m) nounwind {
+; CHECK: @test52
+  %cmp = icmp sgt i32 %n, %m
+  %. = select i1 %cmp, i32 1, i32 3
+  %add = add nsw i32 %., 3
+  %storemerge = select i1 %cmp, i32 %., i32 %add
+; CHECK: select i1 %cmp, i32 1, i32 6
+  ret i32 %storemerge
+}
+
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index 6deee1ff5653..60669b7a109f 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -110,7 +110,7 @@ entry:
         ret i32 %b
 ; CHECK: @test10
 ; CHECK:  shl i32 %i, 30
-; CHECK-NEXT: ashr i32
+; CHECK-NEXT: ashr exact i32
 ; CHECK-NEXT: ret i32
 }
 
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 871e9fe070e7..7fab1d2cab54 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -35,18 +35,32 @@ define i32 @test4(i8 %A) {
 
 define i32 @test5(i32 %A) {
 ; CHECK: @test5
-; CHECK: ret i32 0
+; CHECK: ret i32 undef
         %B = lshr i32 %A, 32  ;; shift all bits out 
         ret i32 %B
 }
 
 define i32 @test5a(i32 %A) {
 ; CHECK: @test5a
-; CHECK: ret i32 0
+; CHECK: ret i32 undef
         %B = shl i32 %A, 32     ;; shift all bits out 
         ret i32 %B
 }
 
+define i32 @test5b() {
+; CHECK: @test5b
+; CHECK: ret i32 -1
+        %B = ashr i32 undef, 2  ;; top two bits must be equal, so not undef
+        ret i32 %B
+}
+
+define i32 @test5b2(i32 %A) {
+; CHECK: @test5b2
+; CHECK: ret i32 -1
+        %B = ashr i32 undef, %A  ;; top %A bits must be equal, so not undef
+        ret i32 %B
+}
+
 define i32 @test6(i32 %A) {
 ; CHECK: @test6
 ; CHECK-NEXT: mul i32 %A, 6
@@ -437,7 +451,37 @@ entry:
   ret i64 %tmp46
   
 ; CHECK: @test37
-; CHECK:  %tmp23 = shl i128 %tmp22, 32
+; CHECK:  %tmp23 = shl nuw nsw i128 %tmp22, 32
 ; CHECK:  %ins = or i128 %tmp23, %A
 ; CHECK:  %tmp46 = trunc i128 %ins to i64
 }
+
+define i32 @test38(i32 %x) nounwind readnone {
+  %rem = srem i32 %x, 32
+  %shl = shl i32 1, %rem
+  ret i32 %shl
+; CHECK: @test38
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: shl i32 1
+; CHECK-NEXT: ret i32
+}
+
+; <rdar://problem/8756731>
+; CHECK: @test39
+define i8 @test39(i32 %a0) {
+entry:
+  %tmp4 = trunc i32 %a0 to i8
+; CHECK: and i8 %tmp49, 64
+  %tmp5 = shl i8 %tmp4, 5
+  %tmp48 = and i8 %tmp5, 32
+  %tmp49 = lshr i8 %tmp48, 5
+  %tmp50 = mul i8 %tmp49, 64
+  %tmp51 = xor i8 %tmp50, %tmp5
+; CHECK: and i8 %0, 16
+  %tmp52 = and i8 %tmp51, -128
+  %tmp53 = lshr i8 %tmp52, 7
+  %tmp54 = mul i8 %tmp53, 16
+  %tmp55 = xor i8 %tmp54, %tmp51
+; CHECK: ret i8 %tmp551
+  ret i8 %tmp55
+}
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index 49384d6275e8..ecee9830cd57 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 %x) {
         ret i32 %tmp.3
 ; CHECK: @test1
 ; CHECK: %sext = shl i32 %x, 16
-; CHECK: %tmp.3 = ashr i32 %sext, 16
+; CHECK: %tmp.3 = ashr exact i32 %sext, 16
 ; CHECK: ret i32 %tmp.3
 }
 
@@ -20,7 +20,7 @@ define i32 @test2(i32 %x) {
         ret i32 %tmp.3
 ; CHECK: @test2
 ; CHECK: %sext = shl i32 %x, 16
-; CHECK: %tmp.3 = ashr i32 %sext, 16
+; CHECK: %tmp.3 = ashr exact i32 %sext, 16
 ; CHECK: ret i32 %tmp.3
 }
 
@@ -51,7 +51,7 @@ define i32 @test5(i32 %x) {
         ret i32 %tmp.3
 ; CHECK: @test5
 ; CHECK: %sext = shl i32 %x, 24
-; CHECK: %tmp.3 = ashr i32 %sext, 24
+; CHECK: %tmp.3 = ashr exact i32 %sext, 24
 ; CHECK: ret i32 %tmp.3
 }
 
@@ -61,7 +61,7 @@ define i32 @test6(i32 %x) {
         ret i32 %tmp.4
 ; CHECK: @test6
 ; CHECK: %tmp.2 = shl i32 %x, 16
-; CHECK: %tmp.4 = ashr i32 %tmp.2, 16
+; CHECK: %tmp.4 = ashr exact i32 %tmp.2, 16
 ; CHECK: ret i32 %tmp.4
 }
 
@@ -82,6 +82,6 @@ entry:
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
 ; CHECK: @test8
-; CHECK: %sub = ashr i32 %x, 5
-; CHECK: ret i32 %sub
+; CHECK: %shr = ashr i32 %x, 5
+; CHECK: ret i32 %shr
 }
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 29bd7be2ff84..9656a7e862a8 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -40,7 +40,7 @@ define i32 @test5(i32 %A, i32 %B, i32 %C) {
 	%E = sub i32 %A, %D	
 	ret i32 %E
 ; CHECK: @test5
-; CHECK: %D = sub i32 %C, %B
+; CHECK: %D1 = sub i32 %C, %B
 ; CHECK: %E = add
 ; CHECK: ret i32 %E
 }
@@ -209,7 +209,7 @@ define i1 @test22(i32 %a, i32 %b) zeroext nounwind  {
 	%tmp5 = icmp eq i32 %tmp2, %tmp4	
 	ret i1 %tmp5
 ; CHECK: @test22
-; CHECK: %tmp5 = icmp eq i32 %a, %b
+; CHECK: %tmp5 = icmp eq i32 %b, %a
 ; CHECK: ret i1 %tmp5
 }
 
@@ -256,7 +256,7 @@ define i64 @test24b(i8* %P, i64 %A){
   %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64)
   ret i64 %G
 ; CHECK: @test24b
-; CHECK-NEXT: shl i64 %A, 1
+; CHECK-NEXT: shl nuw i64 %A, 1
 ; CHECK-NEXT: ret i64 
 }
 
@@ -267,7 +267,7 @@ define i64 @test25(i8* %P, i64 %A){
   %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16]* @Arr, i64 1, i64 0) to i64)
   ret i64 %G
 ; CHECK: @test25
-; CHECK-NEXT: shl i64 %A, 1
+; CHECK-NEXT: shl nuw i64 %A, 1
 ; CHECK-NEXT: add i64 {{.*}}, -84
 ; CHECK-NEXT: ret i64 
 }
@@ -281,3 +281,23 @@ define i32 @test26(i32 %x) {
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @test27(i32 %x, i32 %y) {
+  %mul = mul i32 %y, -8
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+; CHECK: @test27
+; CHECK-NEXT: shl i32 %y, 3
+; CHECK-NEXT: add i32
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test28(i32 %x, i32 %y, i32 %z) {
+  %neg = sub i32 0, %z
+  %mul = mul i32 %neg, %y
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+; CHECK: @test28
+; CHECK-NEXT: mul i32 %z, %y
+; CHECK-NEXT: add i32
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index f98bfd9236cd..6ec342a4f5c1 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -24,7 +24,7 @@ define i64 @test2(i64 %a) {
   ret i64 %d
 ; CHECK: @test2
 ; CHECK: shl i64 %a, 36
-; CHECK: %d = ashr i64 {{.*}}, 36
+; CHECK: %d = ashr exact i64 {{.*}}, 36
 ; CHECK: ret i64 %d
 }
 define i64 @test3(i64 %a) {
@@ -92,8 +92,28 @@ define i64 @test8(i32 %A, i32 %B) {
 ; CHECK: @test8
 ; CHECK:   %tmp38 = zext i32 %A to i64
 ; CHECK:   %tmp32 = zext i32 %B to i64
-; CHECK:   %tmp33 = shl i64 %tmp32, 32
+; CHECK:   %tmp33 = shl nuw i64 %tmp32, 32
 ; CHECK:   %ins35 = or i64 %tmp33, %tmp38
 ; CHECK:   ret i64 %ins35
 }
 
+define i8 @test9(i32 %X) {
+  %Y = and i32 %X, 42
+  %Z = trunc i32 %Y to i8
+  ret i8 %Z
+; CHECK: @test9
+; CHECK: trunc
+; CHECK: and
+; CHECK: ret
+}
+
+; rdar://8808586
+define i8 @test10(i32 %X) {
+  %Y = trunc i32 %X to i8
+  %Z = and i8 %Y, 42
+  ret i8 %Z
+; CHECK: @test10
+; CHECK: trunc
+; CHECK: and
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts-2.ll b/test/Transforms/InstCombine/vec_demanded_elts-2.ll
deleted file mode 100644
index 41593612e55f..000000000000
--- a/test/Transforms/InstCombine/vec_demanded_elts-2.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep extractelement
-
-define void @get_image() nounwind {
-entry:
-        %0 = call i32 @fgetc(i8* null) nounwind               ; <i32> [#uses=1]
-        %1 = trunc i32 %0 to i8         ; <i8> [#uses=1]
-        %tmp2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1          ; <<100 x i8>> [#uses=1]
-        %tmp1 = extractelement <100 x i8> %tmp2, i32 0          ; <i8> [#uses=1]
-        %2 = icmp eq i8 %tmp1, 80               ; <i1> [#uses=1]
-        br i1 %2, label %bb2, label %bb3
-
-bb2:            ; preds = %entry
-        br label %bb3
-
-bb3:            ; preds = %bb2, %entry
-        unreachable
-}
-
-declare i32 @fgetc(i8*)
diff --git a/test/Transforms/InstCombine/vec_demanded_elts-3.ll b/test/Transforms/InstCombine/vec_demanded_elts-3.ll
deleted file mode 100644
index 62e43701d24e..000000000000
--- a/test/Transforms/InstCombine/vec_demanded_elts-3.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep load
-; PR4340
-
-define void @vac(<4 x float>* nocapture %a) nounwind {
-entry:
-	%tmp1 = load <4 x float>* %a		; <<4 x float>> [#uses=1]
-	%vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
-	%vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
-	%vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
-	%vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
-	store <4 x float> %vecins8, <4 x float>* %a
-	ret void
-}
-
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 2009a776b151..9f308aa093ed 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,17 +1,13 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {fadd float}
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {fmul float}
-; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep {insertelement.*0.00}
-; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep {call.*llvm.x86.sse.mul}
-; RUN: opt < %s -instcombine -S | \
-; RUN:   not grep {call.*llvm.x86.sse.sub}
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i16 @test1(float %f) {
 entry:
+; CHECK: @test1
+; CHECK: fmul float
+; CHECK-NOT: insertelement {{.*}} 0.00
+; CHECK-NOT: call {{.*}} @llvm.x86.sse.mul
+; CHECK-NOT: call {{.*}} @llvm.x86.sse.sub
+; CHECK: ret
 	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
 	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
 	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
@@ -26,16 +22,104 @@ entry:
 }
 
 define i32 @test2(float %f) {
-        %tmp5 = fmul float %f, %f
-        %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0             
-        %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1    
-        %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2  
-        %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 
-        %tmp19 = bitcast <4 x float> %tmp12 to <4 x i32>  
-        %tmp21 = extractelement <4 x i32> %tmp19, i32 0  
-        ret i32 %tmp21
+; CHECK: @test2
+; CHECK-NOT: insertelement
+; CHECK-NOT: extractelement
+; CHECK: ret
+  %tmp5 = fmul float %f, %f
+  %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp19 = bitcast <4 x float> %tmp12 to <4 x i32>
+  %tmp21 = extractelement <4 x i32> %tmp19, i32 0
+  ret i32 %tmp21
 }
 
+define i64 @test3(float %f, double %d) {
+; CHECK: @test3
+; CHECK-NOT: insertelement {{.*}} 0.00
+; CHECK: ret
+entry:
+  %v00 = insertelement <4 x float> undef, float %f, i32 0
+  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+  %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
+  %v10 = insertelement <4 x float> undef, float %f, i32 0
+  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+  %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
+  %v20 = insertelement <4 x float> undef, float %f, i32 0
+  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+  %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
+  %v30 = insertelement <4 x float> undef, float %f, i32 0
+  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+  %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
+  %v40 = insertelement <2 x double> undef, double %d, i32 0
+  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+  %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
+  %v50 = insertelement <2 x double> undef, double %d, i32 0
+  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+  %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
+  %v60 = insertelement <2 x double> undef, double %d, i32 0
+  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+  %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
+  %v70 = insertelement <2 x double> undef, double %d, i32 0
+  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+  %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
+  %tmp8 = add i32 %tmp0, %tmp2
+  %tmp9 = add i32 %tmp4, %tmp6
+  %tmp10 = add i32 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp1, %tmp3
+  %tmp13 = add i64 %tmp5, %tmp7
+  %tmp14 = add i64 %tmp12, %tmp13
+  %tmp15 = add i64 %tmp11, %tmp14
+  ret i64 %tmp15
+}
+
+define void @get_image() nounwind {
+; CHECK: @get_image
+; CHECK-NOT: extractelement
+; CHECK: unreachable
+entry:
+  %0 = call i32 @fgetc(i8* null) nounwind               ; <i32> [#uses=1]
+  %1 = trunc i32 %0 to i8         ; <i8> [#uses=1]
+  %tmp2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1          ; <<100 x i8>> [#uses=1]
+  %tmp1 = extractelement <100 x i8> %tmp2, i32 0          ; <i8> [#uses=1]
+  %2 = icmp eq i8 %tmp1, 80               ; <i1> [#uses=1]
+  br i1 %2, label %bb2, label %bb3
+
+bb2:            ; preds = %entry
+  br label %bb3
+
+bb3:            ; preds = %bb2, %entry
+  unreachable
+}
+
+; PR4340
+define void @vac(<4 x float>* nocapture %a) nounwind {
+; CHECK: @vac
+; CHECK-NOT: load
+; CHECK: ret
+entry:
+	%tmp1 = load <4 x float>* %a		; <<4 x float>> [#uses=1]
+	%vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
+	%vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
+	%vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
+	%vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
+	store <4 x float> %vecins8, <4 x float>* %a
+	ret void
+}
+
+declare i32 @fgetc(i8*)
+
 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
 
 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
@@ -44,4 +128,11 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
 
 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
 
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
diff --git a/test/Transforms/InstCombine/vec_sext.ll b/test/Transforms/InstCombine/vec_sext.ll
new file mode 100644
index 000000000000..d7ab96b9cfd8
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_sext.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x i32> @psignd_3(<4 x i32> %a, <4 x i32> %b) nounwind ssp {
+entry:
+  %cmp = icmp slt <4 x i32> %b, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = icmp slt <4 x i32> %sext, zeroinitializer
+  %sext3 = sext <4 x i1> %0 to <4 x i32>
+  %1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %2 = and <4 x i32> %a, %1
+  %3 = and <4 x i32> %sext3, %sub
+  %cond = or <4 x i32> %2, %3
+  ret <4 x i32> %cond
+
+; CHECK:   ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+; CHECK:   sub nsw <4 x i32> zeroinitializer, %a
+; CHECK:   xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK:   and <4 x i32> %a, %0
+; CHECK:   and <4 x i32> %b.lobit, %sub
+; CHECK:   or <4 x i32> %1, %2
+}
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 5132a8ff9bd1..bd36e9ecb1eb 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -87,3 +87,26 @@ define <4 x i8> @test9(<16 x i8> %tmp6) nounwind {
 	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
 	ret <4 x i8> %tmp9
 }
+
+; Same as test9, but make sure that "undef" mask values are not confused with
+; mask values of 2*N, where N is the mask length.  These shuffles should not
+; be folded (because [8,9,4,8] may not be a mask supported by the target).
+define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind {
+; CHECK: @test9a
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 >		; <<4 x i8>> [#uses=1]
+	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
+	ret <4 x i8> %tmp9
+}
+
+; Redundant vector splats should be removed.  Radar 8597790.
+define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp7 = shufflevector <4 x i32> %tmp6, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %tmp7
+}
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 24bd04dcb2db..e931dc79ef41 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -121,3 +121,31 @@ define <2 x double> @fc(<2 x double> %t) {
   %b = sitofp <2 x i64> %a to <2 x double>
   ret <2 x double> %b
 }
+
+; PR9228
+; This was a crasher, so no CHECK statements.
+define <4 x float> @f(i32 %a) nounwind alwaysinline {
+; CHECK: @f
+entry:
+  %dim = insertelement <4 x i32> undef, i32 %a, i32 0
+  %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
+  %dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2
+  %dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3
+
+  %offset_ptr = getelementptr <4 x float>* null, i32 1
+  %offset_int = ptrtoint <4 x float>* %offset_ptr to i64
+  %sizeof32 = trunc i64 %offset_int to i32
+
+  %smearinsert33 = insertelement <4 x i32> undef, i32 %sizeof32, i32 0
+  %smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1
+  %smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2
+  %smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3
+
+  %delta_scale = mul <4 x i32> %dim32, %smearinsert36
+  %offset_delta = add <4 x i32> zeroinitializer, %delta_scale
+
+  %offset_varying_delta = add <4 x i32> %offset_delta, undef
+
+  ret <4 x float> undef
+}
+
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 67f05efa23d4..89f00bd68475 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -34,7 +34,7 @@ define i32 @test2(i32 %tmp1) {
 define i32 @test3(i32 %tmp1) {
 ; CHECK:      @test3
 ; CHECK-NEXT:   and i32 %tmp1, 32
-; CHECK-NEXT:   or i32 %tmp, 8
+; CHECK-NEXT:   or i32 %ovm, 8
 ; CHECK-NEXT:   ret i32
   %ovm = or i32 %tmp1, 145 
   %ov31 = and i32 %ovm, 177
diff --git a/test/Transforms/InstSimplify/2010-12-20-Boolean.ll b/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
new file mode 100644
index 000000000000..3aa1bd60cfd6
--- /dev/null
+++ b/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i1 @add(i1 %x) {
+; CHECK: @add
+  %z = add i1 %x, %x
+  ret i1 %z
+; CHECK: ret i1 false
+}
+
+define i1 @sub(i1 %x) {
+; CHECK: @sub
+  %z = sub i1 false, %x
+  ret i1 %z
+; CHECK: ret i1 %x
+}
+
+define i1 @mul(i1 %x) {
+; CHECK: @mul
+  %z = mul i1 %x, %x
+  ret i1 %z
+; CHECK: ret i1 %x
+}
+
+define i1 @ne(i1 %x) {
+; CHECK: @ne
+  %z = icmp ne i1 %x, 0
+  ret i1 %z
+; CHECK: ret i1 %x
+}
diff --git a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll b/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
new file mode 100644
index 000000000000..d20abd68c200
--- /dev/null
+++ b/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @factorize(i32 %x, i32 %y) {
+; CHECK: @factorize
+; (X | 1) & (X | 2) -> X | (1 & 2) -> X
+  %l = or i32 %x, 1
+  %r = or i32 %x, 2
+  %z = and i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %x
+}
+
+define i32 @factorize2(i32 %x) {
+; CHECK: @factorize2
+; 3*X - 2*X -> X
+  %l = mul i32 3, %x
+  %r = mul i32 2, %x
+  %z = sub i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %x
+}
+
+define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
+; CHECK: @factorize3
+; (X | (A|B)) & (X | B) -> X | ((A|B) & B) -> X | B
+  %aORb = or i32 %a, %b
+  %l = or i32 %x, %aORb
+  %r = or i32 %x, %b
+  %z = and i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %r
+}
+
+define i32 @factorize4(i32 %x, i32 %y) {
+; CHECK: @factorize4
+  %sh = shl i32 %y, 1
+  %ml = mul i32 %sh, %x
+  %mr = mul i32 %x, %y
+  %s = sub i32 %ml, %mr
+  ret i32 %s
+; CHECK: ret i32 %mr
+}
+
+define i32 @factorize5(i32 %x, i32 %y) {
+; CHECK: @factorize5
+  %sh = mul i32 %y, 2
+  %ml = mul i32 %sh, %x
+  %mr = mul i32 %x, %y
+  %s = sub i32 %ml, %mr
+  ret i32 %s
+; CHECK: ret i32 %mr
+}
+
+define i32 @expand(i32 %x) {
+; CHECK: @expand
+; ((X & 1) | 2) & 1 -> ((X & 1) & 1) | (2 & 1) -> (X & 1) | 0 -> X & 1
+  %a = and i32 %x, 1
+  %b = or i32 %a, 2
+  %c = and i32 %b, 1
+  ret i32 %c
+; CHECK: ret i32 %a
+}
diff --git a/test/Transforms/InstSimplify/2011-01-14-Thread.ll b/test/Transforms/InstSimplify/2011-01-14-Thread.ll
new file mode 100644
index 000000000000..8fc4dc5d5bb7
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-01-14-Thread.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @shift_select(i1 %cond) {
+; CHECK: @shift_select
+  %s = select i1 %cond, i32 0, i32 1
+  %r = lshr i32 %s, 1
+  ret i32 %r
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstSimplify/2011-02-01-Vector.ll b/test/Transforms/InstSimplify/2011-02-01-Vector.ll
new file mode 100644
index 000000000000..3039a663fa45
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-02-01-Vector.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define <2 x i32> @sdiv(<2 x i32> %x) {
+; CHECK: @sdiv
+  %div = sdiv <2 x i32> %x, <i32 1, i32 1>
+  ret <2 x i32> %div
+; CHECK: ret <2 x i32> %x
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
new file mode 100644
index 000000000000..250e44ce340f
--- /dev/null
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -0,0 +1,189 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "p:32:32"
+
+define i1 @ptrtoint() {
+; CHECK: @ptrtoint
+  %a = alloca i8
+  %tmp = ptrtoint i8* %a to i32
+  %r = icmp eq i32 %tmp, 0
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @zext(i32 %x) {
+; CHECK: @zext
+  %e1 = zext i32 %x to i64
+  %e2 = zext i32 %x to i64
+  %r = icmp eq i64 %e1, %e2
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @zext2(i1 %x) {
+; CHECK: @zext2
+  %e = zext i1 %x to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 %x
+}
+
+define i1 @zext3() {
+; CHECK: @zext3
+  %e = zext i1 1 to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @sext(i32 %x) {
+; CHECK: @sext
+  %e1 = sext i32 %x to i64
+  %e2 = sext i32 %x to i64
+  %r = icmp eq i64 %e1, %e2
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @sext2(i1 %x) {
+; CHECK: @sext2
+  %e = sext i1 %x to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 %x
+}
+
+define i1 @sext3() {
+; CHECK: @sext3
+  %e = sext i1 1 to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @add(i32 %x, i32 %y) {
+; CHECK: @add
+  %l = lshr i32 %x, 1
+  %q = lshr i32 %y, 1
+  %r = or i32 %q, 1
+  %s = add i32 %l, %r
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @add2(i8 %x, i8 %y) {
+; CHECK: @add2
+  %l = or i8 %x, 128
+  %r = or i8 %y, 129
+  %s = add i8 %l, %r
+  %c = icmp eq i8 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @add3(i8 %x, i8 %y) {
+; CHECK: @add3
+  %l = zext i8 %x to i32
+  %r = zext i8 %y to i32
+  %s = add i32 %l, %r
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %c
+}
+
+define i1 @add4(i32 %x, i32 %y) {
+; CHECK: @add4
+  %z = add nsw i32 %y, 1
+  %s1 = add nsw i32 %x, %y
+  %s2 = add nsw i32 %x, %z
+  %c = icmp slt i32 %s1, %s2
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @add5(i32 %x, i32 %y) {
+; CHECK: @add5
+  %z = add nuw i32 %y, 1
+  %s1 = add nuw i32 %x, %z
+  %s2 = add nuw i32 %x, %y
+  %c = icmp ugt i32 %s1, %s2
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @addpowtwo(i32 %x, i32 %y) {
+; CHECK: @addpowtwo
+  %l = lshr i32 %x, 1
+  %r = shl i32 1, %y
+  %s = add i32 %l, %r
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @or(i32 %x) {
+; CHECK: @or
+  %o = or i32 %x, 1
+  %c = icmp eq i32 %o, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @shl(i32 %x) {
+; CHECK: @shl
+  %s = shl i32 1, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @lshr(i32 %x) {
+; CHECK: @lshr
+  %s = lshr i32 -1, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @ashr(i32 %x) {
+; CHECK: @ashr
+  %s = ashr i32 -1, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @select1(i1 %cond) {
+; CHECK: @select1
+  %s = select i1 %cond, i32 1, i32 0
+  %c = icmp eq i32 %s, 1
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @select2(i1 %cond) {
+; CHECK: @select2
+  %x = zext i1 %cond to i32
+  %s = select i1 %cond, i32 %x, i32 0
+  %c = icmp ne i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @select3(i1 %cond) {
+; CHECK: @select3
+  %x = zext i1 %cond to i32
+  %s = select i1 %cond, i32 1, i32 %x
+  %c = icmp ne i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @select4(i1 %cond) {
+; CHECK: @select4
+  %invert = xor i1 %cond, 1
+  %s = select i1 %invert, i32 0, i32 1
+  %c = icmp ne i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
diff --git a/test/Transforms/InstSimplify/dg.exp b/test/Transforms/InstSimplify/dg.exp
new file mode 100644
index 000000000000..f2005891a59a
--- /dev/null
+++ b/test/Transforms/InstSimplify/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstSimplify/exact-nsw-nuw.ll b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
new file mode 100644
index 000000000000..f3a804eb5b5e
--- /dev/null
+++ b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; PR8862
+
+; CHECK: @shift1
+; CHECK: ret i32 %A
+define i32 @shift1(i32 %A, i32 %B) {
+  %C = lshr exact i32 %A, %B
+  %D = shl nuw i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift2
+; CHECK: lshr
+; CHECK: ret i32 %D
+define i32 @shift2(i32 %A, i32 %B) {
+  %C = lshr i32 %A, %B
+  %D = shl nuw i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift3
+; CHECK: ret i32 %A
+define i32 @shift3(i32 %A, i32 %B) {
+  %C = ashr exact i32 %A, %B
+  %D = shl nuw i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift4
+; CHECK: ret i32 %A
+define i32 @shift4(i32 %A, i32 %B) {
+  %C = shl nuw i32 %A, %B
+  %D = lshr i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift5
+; CHECK: ret i32 %A
+define i32 @shift5(i32 %A, i32 %B) {
+  %C = shl nsw i32 %A, %B
+  %D = ashr i32 %C, %B
+  ret i32 %D
+}
diff --git a/test/Transforms/InstSimplify/fdiv.ll b/test/Transforms/InstSimplify/fdiv.ll
new file mode 100644
index 000000000000..9d85154b240f
--- /dev/null
+++ b/test/Transforms/InstSimplify/fdiv.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define double @fdiv_of_undef(double %X) {
+; CHECK: @fdiv_of_undef
+; undef / X -> undef
+  %r = fdiv double undef, %X
+  ret double %r
+; CHECK: ret double undef
+}
+
+define double @fdiv_by_undef(double %X) {
+; CHECK: @fdiv_by_undef
+; X / undef -> undef
+  %r = fdiv double %X, undef
+  ret double %r
+; CHECK: ret double undef
+}
diff --git a/test/Transforms/InstSimplify/reassociate.ll b/test/Transforms/InstSimplify/reassociate.ll
new file mode 100644
index 000000000000..3c8169e5e283
--- /dev/null
+++ b/test/Transforms/InstSimplify/reassociate.ll
@@ -0,0 +1,186 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @add1(i32 %x) {
+; CHECK: @add1
+; (X + -1) + 1 -> X
+  %l = add i32 %x, -1
+  %r = add i32 %l, 1
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @and1(i32 %x, i32 %y) {
+; CHECK: @and1
+; (X & Y) & X -> X & Y
+  %l = and i32 %x, %y
+  %r = and i32 %l, %x
+  ret i32 %r
+; CHECK: ret i32 %l
+}
+
+define i32 @and2(i32 %x, i32 %y) {
+; CHECK: @and2
+; X & (X & Y) -> X & Y
+  %r = and i32 %x, %y
+  %l = and i32 %x, %r
+  ret i32 %l
+; CHECK: ret i32 %r
+}
+
+define i32 @or1(i32 %x, i32 %y) {
+; CHECK: @or1
+; (X | Y) | X -> X | Y
+  %l = or i32 %x, %y
+  %r = or i32 %l, %x
+  ret i32 %r
+; CHECK: ret i32 %l
+}
+
+define i32 @or2(i32 %x, i32 %y) {
+; CHECK: @or2
+; X | (X | Y) -> X | Y
+  %r = or i32 %x, %y
+  %l = or i32 %x, %r
+  ret i32 %l
+; CHECK: ret i32 %r
+}
+
+define i32 @xor1(i32 %x, i32 %y) {
+; CHECK: @xor1
+; (X ^ Y) ^ X = Y
+  %l = xor i32 %x, %y
+  %r = xor i32 %l, %x
+  ret i32 %r
+; CHECK: ret i32 %y
+}
+
+define i32 @xor2(i32 %x, i32 %y) {
+; CHECK: @xor2
+; X ^ (X ^ Y) = Y
+  %r = xor i32 %x, %y
+  %l = xor i32 %x, %r
+  ret i32 %l
+; CHECK: ret i32 %y
+}
+
+define i32 @sub1(i32 %x, i32 %y) {
+; CHECK: @sub1
+  %d = sub i32 %x, %y
+  %r = sub i32 %x, %d
+  ret i32 %r
+; CHECK: ret i32 %y
+}
+
+define i32 @sub2(i32 %x) {
+; CHECK: @sub2
+; X - (X + 1) -> -1
+  %xp1 = add i32 %x, 1
+  %r = sub i32 %x, %xp1
+  ret i32 %r
+; CHECK: ret i32 -1
+}
+
+define i32 @sub3(i32 %x, i32 %y) {
+; CHECK: @sub3
+; ((X + 1) + Y) - (Y + 1) -> X
+  %xp1 = add i32 %x, 1
+  %lhs = add i32 %xp1, %y
+  %rhs = add i32 %y, 1
+  %r = sub i32 %lhs, %rhs
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @sdiv1(i32 %x, i32 %y) {
+; CHECK: @sdiv1
+; (no overflow X * Y) / Y -> X
+  %mul = mul nsw i32 %x, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @sdiv2(i32 %x, i32 %y) {
+; CHECK: @sdiv2
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = sdiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %div
+}
+
+define i32 @sdiv3(i32 %x, i32 %y) {
+; CHECK: @sdiv3
+; (X rem Y) / Y -> 0
+  %rem = srem i32 %x, %y
+  %div = sdiv i32 %rem, %y
+  ret i32 %div
+; CHECK: ret i32 0
+}
+
+define i32 @sdiv4(i32 %x, i32 %y) {
+; CHECK: @sdiv4
+; (X / Y) * Y -> X if the division is exact
+  %div = sdiv exact i32 %x, %y
+  %mul = mul i32 %div, %y
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
+define i32 @sdiv5(i32 %x, i32 %y) {
+; CHECK: @sdiv5
+; Y * (X / Y) -> X if the division is exact
+  %div = sdiv exact i32 %x, %y
+  %mul = mul i32 %y, %div
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
+
+define i32 @udiv1(i32 %x, i32 %y) {
+; CHECK: @udiv1
+; (no overflow X * Y) / Y -> X
+  %mul = mul nuw i32 %x, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @udiv2(i32 %x, i32 %y) {
+; CHECK: @udiv2
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = udiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %div
+}
+
+define i32 @udiv3(i32 %x, i32 %y) {
+; CHECK: @udiv3
+; (X rem Y) / Y -> 0
+  %rem = urem i32 %x, %y
+  %div = udiv i32 %rem, %y
+  ret i32 %div
+; CHECK: ret i32 0
+}
+
+define i32 @udiv4(i32 %x, i32 %y) {
+; CHECK: @udiv4
+; (X / Y) * Y -> X if the division is exact
+  %div = udiv exact i32 %x, %y
+  %mul = mul i32 %div, %y
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
+define i32 @udiv5(i32 %x, i32 %y) {
+; CHECK: @udiv5
+; Y * (X / Y) -> X if the division is exact
+  %div = udiv exact i32 %x, %y
+  %mul = mul i32 %y, %div
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
diff --git a/test/Transforms/JumpThreading/2010-08-26-and.ll b/test/Transforms/JumpThreading/2010-08-26-and.ll
index 17a0aba2faef..2d6caf752e09 100644
--- a/test/Transforms/JumpThreading/2010-08-26-and.ll
+++ b/test/Transforms/JumpThreading/2010-08-26-and.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -enable-jump-threading-lvi -S < %s | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/JumpThreading/and-and-cond.ll b/test/Transforms/JumpThreading/and-and-cond.ll
index e6db9ee5a325..765d940cc7c3 100644
--- a/test/Transforms/JumpThreading/and-and-cond.ll
+++ b/test/Transforms/JumpThreading/and-and-cond.ll
@@ -1,14 +1,14 @@
-; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | grep {ret i32 %v1}
-; There should be no uncond branches left.
-; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | not grep {br label}
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
 declare void @f3()
 
 define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) {
+; CHECK: test
 	br i1 %cond, label %T1, label %F1
 
+; CHECK-NOT: T1:
 T1:
 	%v1 = call i32 @f1()
 	br label %Merge
@@ -18,6 +18,10 @@ F1:
 	br label %Merge
 
 Merge:
+; CHECK: Merge:
+; CHECK: %v1 = call i32 @f1()
+; CHECK-NEXT: %D = and i1 %cond2, %cond3
+; CHECK-NEXT: br i1 %D
 	%A = phi i1 [true, %T1], [false, %F1]
 	%B = phi i32 [%v1, %T1], [%v2, %F1]
 	%C = and i1 %A, %cond2
diff --git a/test/Transforms/JumpThreading/and-cond.ll b/test/Transforms/JumpThreading/and-cond.ll
index 58dbec72a76e..0159bb3bb761 100644
--- a/test/Transforms/JumpThreading/and-cond.ll
+++ b/test/Transforms/JumpThreading/and-cond.ll
@@ -1,14 +1,14 @@
-; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | grep {ret i32 %v1}
-; There should be no uncond branches left.
-; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | not grep {br label}
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
 declare void @f3()
 
 define i32 @test(i1 %cond, i1 %cond2) {
+; CHECK: test
 	br i1 %cond, label %T1, label %F1
 
+; CHECK-NOT: T1
 T1:
 	%v1 = call i32 @f1()
 	br label %Merge
@@ -18,6 +18,9 @@ F1:
 	br label %Merge
 
 Merge:
+; CHECK: Merge:
+; CHECK: %v1 = call i32 @f1()
+; CHECK-NEXT: br i1 %cond2
 	%A = phi i1 [true, %T1], [false, %F1]
 	%B = phi i32 [%v1, %T1], [%v2, %F1]
 	%C = and i1 %A, %cond2
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index cd274e78c9fc..46271379bd0d 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -jump-threading -S -enable-jump-threading-lvi | FileCheck %s
+; RUN: opt %s -jump-threading -S | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index 751bc6518a1a..aed51a1c9ee9 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -484,3 +484,30 @@ bb269.us.us:
 bb288.bb289.loopexit_crit_edge:
   unreachable
 }
+
+; PR 8247
+%struct.S1 = type { i8, i8 }
+@func_89.l_245 = internal constant %struct.S1 { i8 33, i8 6 }, align 1
+define void @func_89(i16 zeroext %p_90, %struct.S1* nocapture %p_91, i32* nocapture %p_92) nounwind ssp {
+entry:
+  store i32 0, i32* %p_92, align 4
+  br i1 false, label %lbl_260, label %if.else
+
+if.else:                                          ; preds = %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %lbl_260, %if.else
+  %l_245.0 = phi i16 [ %l_245.1, %lbl_260 ], [ 33, %if.else ]
+  %l_261.0 = phi i32 [ %and, %lbl_260 ], [ 255, %if.else ]
+  %tobool21 = icmp ult i16 %l_245.0, 256
+  br i1 %tobool21, label %if.end, label %lbl_260
+
+lbl_260:                                          ; preds = %for.cond, %entry
+  %l_245.1 = phi i16 [ 1569, %entry ], [ %l_245.0, %for.cond ]
+  %l_261.1 = phi i32 [ 255, %entry ], [ %l_261.0, %for.cond ]
+  %and = and i32 %l_261.1, 1
+  br label %for.cond
+
+if.end:                                           ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/degenerate-phi.ll b/test/Transforms/JumpThreading/degenerate-phi.ll
new file mode 100644
index 000000000000..35d9fdec4281
--- /dev/null
+++ b/test/Transforms/JumpThreading/degenerate-phi.ll
@@ -0,0 +1,24 @@
+; RUN: opt -jump-threading -disable-output %s
+; PR9112
+
+; This is actually a test for value tracking. Jump threading produces
+; "%phi = phi i16" when it removes all edges leading to %unreachable.
+; The .ll parser won't let us write that directly since it's invalid code.
+
+define void @func() nounwind {
+entry:
+  br label %bb
+
+bb:
+  br label %bb
+
+unreachable:
+  %phi = phi i16 [ %add, %unreachable ], [ 0, %next ]
+  %add = add i16 0, %phi
+  %cmp = icmp slt i16 %phi, 0
+  br i1 %cmp, label %unreachable, label %next
+
+next:
+  br label %unreachable
+}
+
diff --git a/test/Transforms/JumpThreading/indirectbr.ll b/test/Transforms/JumpThreading/indirectbr.ll
new file mode 100644
index 000000000000..141277fec626
--- /dev/null
+++ b/test/Transforms/JumpThreading/indirectbr.ll
@@ -0,0 +1,94 @@
+; RUN: opt -S < %s -jump-threading | FileCheck %s
+
+; Keep block addresses alive.
+@addresses = constant [4 x i8*] [
+  i8* blockaddress(@test1, %L1), i8* blockaddress(@test1, %L2),
+  i8* blockaddress(@test2, %L1), i8* blockaddress(@test2, %L2)
+]
+
+declare void @bar()
+declare void @baz()
+
+
+
+; Check basic jump threading for indirectbr instructions.
+
+; CHECK: void @test1
+; CHECK: br i1 %tobool, label %L1, label %indirectgoto
+; CHECK-NOT: if.else:
+; CHECK: L1:
+; CHECK: indirectbr i8* %address, [label %L1, label %L2]
+define void @test1(i32 %i, i8* %address) nounwind {
+entry:
+  %rem = srem i32 %i, 2
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %indirectgoto, label %if.else
+
+if.else:                                          ; preds = %entry
+  br label %indirectgoto
+
+L1:                                               ; preds = %indirectgoto
+  call void @bar()
+  ret void
+
+L2:                                               ; preds = %indirectgoto
+  call void @baz()
+  ret void
+
+indirectgoto:                                     ; preds = %if.else, %entry
+  %indirect.goto.dest = phi i8* [ %address, %if.else ], [ blockaddress(@test1, %L1), %entry ]
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2]
+}
+
+
+; Check constant folding of indirectbr
+
+; CHECK: void @test2
+; CHECK: entry:
+; CHECK-NEXT: br label %L1
+; CHECK: L1:
+; CHECK-NEXT: call void @bar
+; CHECK-NEXT: ret void
+define void @test2() nounwind {
+entry:
+  indirectbr i8* blockaddress(@test2, %L1), [label %L1, label %L2]
+
+L1:                                               ; preds = %indirectgoto
+  call void @bar()
+  ret void
+
+L2:                                               ; preds = %indirectgoto
+  call void @baz()
+  ret void
+}
+
+
+; PR4151
+; Don't merge address-taken blocks.
+@.str = private unnamed_addr constant [4 x i8] c"%p\0A\00"
+
+; CHECK: @test3
+; CHECK: __here:
+; CHECK: blockaddress(@test3, %__here)
+; CHECK: __here1:
+; CHECK: blockaddress(@test3, %__here1)
+; CHECK: __here3:
+; CHECK: blockaddress(@test3, %__here3)
+define void @test3() nounwind ssp noredzone {
+entry:
+  br label %__here
+
+__here:                                           ; preds = %entry
+  %call = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here) to i64)) nounwind noredzone
+  br label %__here1
+
+__here1:                                          ; preds = %__here
+  %call2 = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here1) to i64)) nounwind noredzone
+  br label %__here3
+
+__here3:                                          ; preds = %__here1
+  %call4 = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here3) to i64)) nounwind noredzone
+  ret void
+}
+
+declare i32 @printf(...) noredzone
diff --git a/test/Transforms/JumpThreading/lvi-load.ll b/test/Transforms/JumpThreading/lvi-load.ll
index 0bf4187d544b..2a4cf925102f 100644
--- a/test/Transforms/JumpThreading/lvi-load.ll
+++ b/test/Transforms/JumpThreading/lvi-load.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -jump-threading -enable-jump-threading-lvi -dce < %s | FileCheck %s
+; RUN: opt -S -jump-threading -dce < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.4"
 
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
new file mode 100644
index 000000000000..8a81857736a7
--- /dev/null
+++ b/test/Transforms/JumpThreading/select.ll
@@ -0,0 +1,123 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+declare void @baz()
+declare void @quux()
+
+
+; Jump threading of branch with select as condition.
+; Mostly theoretical since instruction combining simplifies all selects of
+; booleans where at least one operand is true/false/undef.
+
+; CHECK: @test_br
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1,
+define void @test_br(i1 %cond, i1 %value) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L3
+L0:
+  %expr = select i1 %cond, i1 true, i1 %value
+  br i1 %expr, label %L1, label %L2
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  br label %L0
+}
+
+
+; Jump threading of switch with select as condition.
+
+; CHECK: @test_switch
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1,
+define void @test_switch(i1 %cond, i8 %value) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L4
+L0:
+  %expr = select i1 %cond, i8 1, i8 %value
+  switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+L4:
+  call void @quux()
+  br label %L0
+}
+
+; Make sure the blocks in the indirectbr test aren't trivially removable as
+; successors by taking their addresses.
+@anchor = constant [3 x i8*] [
+  i8* blockaddress(@test_indirectbr, %L1),
+  i8* blockaddress(@test_indirectbr, %L2),
+  i8* blockaddress(@test_indirectbr, %L3)
+]
+
+
+; Jump threading of indirectbr with select as address.
+
+; CHECK: @test_indirectbr
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L3
+define void @test_indirectbr(i1 %cond, i8* %address) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L3
+L0:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@test_indirectbr, %L1), i8* %address
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+}
+
+
+; A more complicated case: the condition is a select based on a comparison.
+
+; CHECK: @test_switch_cmp
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L0, label %[[THREADED:[A-Za-z.0-9]+]]
+; CHECK: [[THREADED]]:
+; CHECK-NEXT: call void @quux
+; CHECK-NEXT: br label %L1
+define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L4
+L0:
+  %val.phi = phi i32 [%val, %entry], [-1, %L4]
+  %cmp = icmp slt i32 %val.phi, 0
+  %expr = select i1 %cmp, i8 1, i8 %value
+  switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+L4:
+  call void @quux()
+  br label %L0
+}
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index 96ba701046d8..cce23ea319c8 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1}
+; RUN: opt < %s -jump-threading -S | FileCheck %s
 ; rdar://6402033
 
 ; Test that we can thread through the block with the partially redundant load (%2).
@@ -6,12 +6,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin7"
 
 define i32 @foo(i32* %P) nounwind {
+; CHECK: foo
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb1, label %bb
 
 bb:		; preds = %entry
+; CHECK: bb1.thread:
+; CHECK: store
+; CHECK: br label %bb3
 	store i32 42, i32* %P, align 4
 	br label %bb1
 
@@ -26,6 +30,9 @@ bb2:		; preds = %bb1
 	ret i32 %res.0
 
 bb3:		; preds = %bb1
+; CHECK: bb3:
+; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.01
 	ret i32 %res.0
 }
 
diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index 5381c88aea63..7a80f8052b0d 100644
--- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -loopsimplify -lcssa -S | \
+; RUN: opt < %s -loop-simplify -lcssa -S | \
 ; RUN:   grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
-; RUN: opt < %s -loopsimplify -lcssa -S | \
+; RUN: opt < %s -loop-simplify -lcssa -S | \
 ; RUN:   grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry}
 
         %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
diff --git a/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll b/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
index 4782bd17f893..4559e3101792 100644
--- a/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
+++ b/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
@@ -1,6 +1,6 @@
 ; Exit blocks need to be updated for all nested loops...
 
-; RUN: opt < %s -loopsimplify
+; RUN: opt < %s -loop-simplify
 
 define i32 @yyparse() {
 bb0:
diff --git a/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll b/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
index 10b00bab8fd9..d4df26e67abc 100644
--- a/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
+++ b/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm -S | FileCheck %s
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
 
 @a = external constant float*
 
diff --git a/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll b/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll
deleted file mode 100644
index d1fe48c2357a..000000000000
--- a/test/Transforms/LICM/2009-03-25-AliasSetTracker.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-
-; RUN: opt < %s -licm -loop-index-split -instcombine -disable-output
-
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-@"\01LC81" = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
-
-define fastcc void @hex_dump_internal(i8* %avcl, %struct.FILE* %f, i32 %level, i8* nocapture %buf, i32 %size) nounwind {
-entry:
-	br i1 false, label %bb4, label %return
-
-bb4:		; preds = %bb30, %entry
-	br label %bb6
-
-bb6:		; preds = %bb15, %bb4
-	%j.0.reg2mem.0 = phi i32 [ %2, %bb15 ], [ 0, %bb4 ]		; <i32> [#uses=2]
-	%0 = icmp slt i32 %j.0.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %0, label %bb7, label %bb13
-
-bb7:		; preds = %bb6
-	br label %bb15
-
-bb13:		; preds = %bb6
-	%1 = tail call i32 @fwrite(i8* getelementptr ([4 x i8]* @"\01LC81", i32 0, i32 0), i32 1, i32 3, i8* null) nounwind		; <i32> [#uses=0]
-	br label %bb15
-
-bb15:		; preds = %bb13, %bb7
-	%2 = add i32 %j.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%3 = icmp sgt i32 %2, 15		; <i1> [#uses=1]
-	br i1 %3, label %bb30, label %bb6
-
-bb30:		; preds = %bb15
-	br i1 false, label %bb4, label %return
-
-return:		; preds = %bb30, %entry
-	ret void
-}
-
-declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index 88be5c41ccc5..ff7fa0b19a82 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -59,3 +59,16 @@ for.end:                                          ; preds = %for.cond, %entry
 }
 
 declare i32* @test3helper(i32*)
+
+
+; PR8602
+@g_47 = external global i32, align 4
+
+define void @test4() noreturn nounwind {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  volatile store i32* @g_47, i32** undef, align 8
+  store i32 undef, i32* @g_47, align 4
+  br label %1
+}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index c1d2b24b0bba..d8acdc1a3ad7 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -licm -S | FileCheck %s
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @X = global i32 7		; <i32*> [#uses=4]
@@ -118,3 +118,33 @@ exit:
   ret void
 }
 
+define void @test5(i32 %i, i32** noalias %P2) {
+Entry:
+	br label %Loop
+; CHECK: @test5
+; CHECK: Entry:
+; CHECK-NEXT:   load i32* @X
+; CHECK-NEXT:   br label %Loop
+
+
+Loop:		; preds = %Loop, %0
+	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
+	%x = load i32* @X		; <i32> [#uses=1]
+	%x2 = add i32 %x, 1		; <i32> [#uses=1]
+	store i32 %x2, i32* @X
+        
+        volatile store i32* @X, i32** %P2
+        
+	%Next = add i32 %j, 1		; <i32> [#uses=2]
+	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
+	br i1 %cond, label %Out, label %Loop
+
+Out:	
+	ret void
+; CHECK: Out:
+; CHECK-NEXT:   store i32 %x2, i32* @X
+; CHECK-NEXT:   ret void
+
+}
+
+
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
new file mode 100644
index 000000000000..485114c8d480
--- /dev/null
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -0,0 +1,349 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @test1(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+; This is a loop that was rotated but where the blocks weren't merged.  This
+; shouldn't perturb us.
+define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body.cont
+for.body.cont:
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test1a
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+
+define void @test2(i32* %Base, i64 %Size) nounwind ssp {
+entry:
+  %cmp10 = icmp eq i64 %Size, 0
+  br i1 %cmp10, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
+  store i32 16843009, i32* %add.ptr.i, align 4
+  %inc = add nsw i64 %i.011, 1
+  %exitcond = icmp eq i64 %inc, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test2
+; CHECK: br i1 %cmp10,
+; CHECK: %tmp = mul i64 %Size, 4
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)
+; CHECK-NOT: store
+}
+
+; This is a case where there is an extra may-aliased store in the loop, we can't
+; promote the memset.
+define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
+  store i32 16843009, i32* %add.ptr.i, align 4
+  
+  store i8 42, i8* %MayAlias
+  %inc = add nsw i64 %i.011, 1
+  %exitcond = icmp eq i64 %inc, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %entry
+  ret void
+; CHECK: @test3
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+
+;; TODO: We should be able to promote this memset.  Not yet though.
+define void @test4(i8* %Base) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  %Base100 = getelementptr i8* %Base, i64 1000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  
+  ;; Store beyond the range memset, should be safe to promote.
+  store i8 42, i8* %Base100
+  
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK-TODO: @test4
+; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
+; CHECK-TODO-NOT: store
+}
+
+; This can't be promoted: the memset is a store of a loop variant value.
+define void @test5(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  
+  %V = trunc i64 %indvar to i8
+  store i8 %V, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test5
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+
+;; memcpy formation
+define void @test6(i64 %Size) nounwind ssp {
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  %DestI = getelementptr i8* %Dest, i64 %indvar
+  %V = load i8* %I.0.014, align 1
+  store i8 %V, i8* %DestI, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test6
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+
+; This is a loop that was rotated but where the blocks weren't merged.  This
+; shouldn't perturb us.
+define void @test7(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
+  br label %for.body.cont
+for.body.cont:
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test7
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+; This is a loop should not be transformed, it only executes one iteration.
+define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %PI = getelementptr i64* %Ptr, i64 %indvar
+  store i64 0, i64 *%PI
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test8
+; CHECK: store i64 0, i64* %PI
+}
+
+declare i8* @external(i8*)
+
+;; This cannot be transformed into a memcpy, because the read-from location is
+;; mutated by the loop.
+define void @test9(i64 %Size) nounwind ssp {
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  
+  %BaseAlias = call i8* @external(i8* %Base)
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  %DestI = getelementptr i8* %Dest, i64 %indvar
+  %V = load i8* %I.0.014, align 1
+  store i8 %V, i8* %DestI, align 1
+
+  ;; This store can clobber the input.
+  store i8 4, i8* %BaseAlias
+ 
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test9
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret void
+}
+
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8* %X) nounwind ssp {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry, %for.inc10
+  %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
+  br label %for.body5
+
+for.body5:                                        ; preds = %for.body5, %bb.nph
+  %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+  %mul = mul nsw i32 %i.04, 100
+  %add = add nsw i32 %j.02, %mul
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %inc = add nsw i32 %j.02, 1
+  %cmp4 = icmp eq i32 %inc, 100
+  br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10:                                        ; preds = %for.body5
+  %inc12 = add nsw i32 %i.04, 1
+  %cmp = icmp eq i32 %inc12, 100
+  br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13:                                        ; preds = %for.inc10
+  ret void
+; CHECK: @test10
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; On darwin10 (which is the triple in this .ll file) this loop can be turned
+; into a memset_pattern call.
+; rdar://9009151
+define void @test11_pattern(i32* nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32* %P, i64 %indvar
+  store i32 1, i32* %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test11_pattern
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: memset_pattern
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; Store of null should turn into memset of zero.
+define void @test12(i32** nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32** %P, i64 %indvar
+  store i32* null, i32** %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test12
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+@G = global i32 5
+
+; This store-of-address loop can be turned into a memset_pattern call.
+; rdar://9009151
+define void @test13_pattern(i32** nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32** %P, i64 %indvar
+  store i32* @G, i32** %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test13_pattern
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: memset_pattern
+; CHECK-NOT: store
+; CHECK: ret void
+}
diff --git a/test/Transforms/LoopIdiom/dg.exp b/test/Transforms/LoopIdiom/dg.exp
new file mode 100644
index 000000000000..f2005891a59a
--- /dev/null
+++ b/test/Transforms/LoopIdiom/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll b/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll
deleted file mode 100644
index d922ecbd4f5f..000000000000
--- a/test/Transforms/LoopIndexSplit/2007-09-21-LoopBound.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; PR1692
-; RUN: opt < %s -loop-index-split -disable-output 
-	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
-	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
-	%struct.LITERAL_HELP = type { i32, i32, i32, %struct.CLAUSE_HELP*, %struct.term* }
-	%struct.anon = type { %struct.LIST_NODE* }
-	%struct.st = type { %struct.subst*, %struct.LIST_NODE*, %struct.LIST_NODE*, i16, i16 }
-	%struct.subst = type { %struct.subst*, i32, %struct.term* }
-	%struct.term = type { i32, %struct.anon, %struct.LIST_NODE*, i32, i32 }
-
-define %struct.LIST_NODE* @inf_HyperResolvents(%struct.CLAUSE_HELP* %Clause, %struct.subst* %Subst, %struct.LIST_NODE* %Restlits, i32 %GlobalMaxVar, %struct.LIST_NODE* %FoundMap, i32 %StrictlyMaximal, { %struct.st*, [3001 x %struct.term*], [4000 x %struct.term*], i32 }* %Index, i32* %Flags, i32* %Precedence) {
-entry:
-	br i1 false, label %cond_next44, label %bb37
-
-bb37:		; preds = %entry
-	ret %struct.LIST_NODE* null
-
-cond_next44:		; preds = %entry
-	br i1 false, label %bb29.i, label %bb.i31
-
-bb.i31:		; preds = %cond_next44
-	ret %struct.LIST_NODE* null
-
-bb29.i:		; preds = %cond_next44
-	br i1 false, label %cond_next89.i, label %bb34.i
-
-bb34.i:		; preds = %bb29.i
-	ret %struct.LIST_NODE* null
-
-cond_next89.i:		; preds = %bb29.i
-	br i1 false, label %clause_LiteralGetIndex.exit70.i, label %bb.i59.i
-
-bb.i59.i:		; preds = %cond_next89.i
-	ret %struct.LIST_NODE* null
-
-clause_LiteralGetIndex.exit70.i:		; preds = %cond_next89.i
-	br label %bb3.i.i
-
-bb3.i.i:		; preds = %bb3.i.i, %clause_LiteralGetIndex.exit70.i
-	br i1 false, label %bb40.i.i, label %bb3.i.i
-
-subst_Apply.exit.i.i:		; preds = %bb40.i.i
-	%tmp21.i.i = icmp sgt i32 %j.0.i.i, 0		; <i1> [#uses=1]
-	br i1 %tmp21.i.i, label %cond_false.i47.i, label %cond_true24.i.i
-
-cond_true24.i.i:		; preds = %subst_Apply.exit.i.i
-	br label %cond_next37.i.i
-
-cond_false.i47.i:		; preds = %subst_Apply.exit.i.i
-	br label %cond_next37.i.i
-
-cond_next37.i.i:		; preds = %cond_false.i47.i, %cond_true24.i.i
-	%tmp39.i.i = add i32 %j.0.i.i, 1		; <i32> [#uses=1]
-	br label %bb40.i.i
-
-bb40.i.i:		; preds = %cond_next37.i.i, %bb3.i.i
-	%j.0.i.i = phi i32 [ %tmp39.i.i, %cond_next37.i.i ], [ 0, %bb3.i.i ]		; <i32> [#uses=3]
-	%tmp43.i.i = icmp sgt i32 %j.0.i.i, 0		; <i1> [#uses=1]
-	br i1 %tmp43.i.i, label %inf_CopyHyperElectron.exit.i, label %subst_Apply.exit.i.i
-
-inf_CopyHyperElectron.exit.i:		; preds = %bb40.i.i
-	ret %struct.LIST_NODE* null
-}
diff --git a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll b/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
deleted file mode 100644
index 3ebd9b3401f5..000000000000
--- a/test/Transforms/LoopIndexSplit/2007-09-24-UpdateIterationSpace.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-
-; Update loop iteraton space to eliminate condition inside loop.
-; RUN: opt < %s -loop-index-split -S | not grep bothcond
-define void @test(float* %x, i32 %ndat, float** %y, float %xcen, i32 %xmin, i32 %xmax, float %sigmal, float %contribution) {
-entry:
-	%tmp519 = icmp sgt i32 %xmin, %xmax		; <i1> [#uses=1]
-	br i1 %tmp519, label %return, label %bb.preheader
-
-bb.preheader:		; preds = %entry
-	%tmp3031 = fpext float %contribution to double		; <double> [#uses=1]
-	%tmp32 = fmul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
-	%tmp3839 = fpext float %sigmal to double		; <double> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb.preheader, %cond_next45
-	%i.01.0 = phi i32 [ %tmp47, %cond_next45 ], [ 0, %bb.preheader ]		; <i32> [#uses=6]
-	%tmp2 = icmp sgt i32 %i.01.0, -1		; <i1> [#uses=1]
-	%tmp6 = icmp slt i32 %i.01.0, %ndat		; <i1> [#uses=1]
-	%bothcond = and i1 %tmp2, %tmp6		; <i1> [#uses=1]
-	br i1 %bothcond, label %cond_true9, label %cond_next45
-
-cond_true9:		; preds = %bb
-	%tmp12 = getelementptr float* %x, i32 %i.01.0		; <float*> [#uses=1]
-	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
-	%tmp15 = fsub float %xcen, %tmp13		; <float> [#uses=1]
-	%tmp16 = tail call float @fabsf( float %tmp15 )		; <float> [#uses=1]
-	%tmp18 = fdiv float %tmp16, %sigmal		; <float> [#uses=1]
-	%tmp21 = load float** %y, align 4		; <float*> [#uses=2]
-	%tmp27 = getelementptr float* %tmp21, i32 %i.01.0		; <float*> [#uses=1]
-	%tmp28 = load float* %tmp27, align 4		; <float> [#uses=1]
-	%tmp2829 = fpext float %tmp28 to double		; <double> [#uses=1]
-	%tmp34 = fsub float -0.000000e+00, %tmp18		; <float> [#uses=1]
-	%tmp3435 = fpext float %tmp34 to double		; <double> [#uses=1]
-	%tmp36 = tail call double @exp( double %tmp3435 )		; <double> [#uses=1]
-	%tmp37 = fmul double %tmp32, %tmp36		; <double> [#uses=1]
-	%tmp40 = fdiv double %tmp37, %tmp3839		; <double> [#uses=1]
-	%tmp41 = fadd double %tmp2829, %tmp40		; <double> [#uses=1]
-	%tmp4142 = fptrunc double %tmp41 to float		; <float> [#uses=1]
-	%tmp44 = getelementptr float* %tmp21, i32 %i.01.0		; <float*> [#uses=1]
-	store float %tmp4142, float* %tmp44, align 4
-	br label %cond_next45
-
-cond_next45:		; preds = %bb, %cond_true9
-	%tmp47 = add i32 %i.01.0, 1		; <i32> [#uses=2]
-	%tmp51 = icmp sgt i32 %tmp47, %xmax		; <i1> [#uses=1]
-	br i1 %tmp51, label %return.loopexit, label %bb
-
-return.loopexit:		; preds = %cond_next45
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
-
-declare float @fabsf(float)
-
-declare double @exp(double)
diff --git a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll b/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
deleted file mode 100644
index 8f4ee24c1233..000000000000
--- a/test/Transforms/LoopIndexSplit/2007-09-25-UpdateIterationSpace-2.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; PR714
-; Update loop iteraton space to eliminate condition inside loop.
-; RUN: opt < %s -loop-index-split -S | not grep bothcond
-
-define void @test(float* %x, i32 %ndat, float** %y, float %xcen, i32 %xmin, i32 %xmax, float %sigmal, float %contribution) {
-entry:
-	%tmp5310 = icmp sgt i32 %xmin, %xmax		; <i1> [#uses=1]
-	br i1 %tmp5310, label %return, label %bb.preheader
-
-bb.preheader:		; preds = %entry
-	%tmp3031 = fpext float %contribution to double		; <double> [#uses=1]
-	%tmp32 = fmul double %tmp3031, 5.000000e-01		; <double> [#uses=1]
-	%tmp3839 = fpext float %sigmal to double		; <double> [#uses=1]
-	br label %bb
-
-bb:		; preds = %cond_next45, %bb.preheader
-	%k.06.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %cond_next45 ]		; <i32> [#uses=4]
-	%i.01.0 = add i32 %k.06.0, %xmin		; <i32> [#uses=4]
-	%tmp2 = icmp sgt i32 %i.01.0, -1		; <i1> [#uses=1]
-	%tmp6 = icmp slt i32 %i.01.0, %ndat		; <i1> [#uses=1]
-	%bothcond = and i1 %tmp2, %tmp6		; <i1> [#uses=1]
-	br i1 %bothcond, label %cond_true9, label %cond_next45
-
-cond_true9:		; preds = %bb
-	%tmp12 = getelementptr float* %x, i32 %i.01.0		; <float*> [#uses=1]
-	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
-	%tmp15 = fsub float %xcen, %tmp13		; <float> [#uses=1]
-	%tmp16 = tail call float @fabsf(float %tmp15)		; <float> [#uses=1]
-	%tmp18 = fdiv float %tmp16, %sigmal		; <float> [#uses=1]
-	%tmp21 = load float** %y, align 4		; <float*> [#uses=2]
-	%tmp27 = getelementptr float* %tmp21, i32 %k.06.0		; <float*> [#uses=1]
-	%tmp28 = load float* %tmp27, align 4		; <float> [#uses=1]
-	%tmp2829 = fpext float %tmp28 to double		; <double> [#uses=1]
-	%tmp34 = fsub float -0.000000e+00, %tmp18		; <float> [#uses=1]
-	%tmp3435 = fpext float %tmp34 to double		; <double> [#uses=1]
-	%tmp36 = tail call double @exp(double %tmp3435)		; <double> [#uses=1]
-	%tmp37 = fmul double %tmp32, %tmp36		; <double> [#uses=1]
-	%tmp40 = fdiv double %tmp37, %tmp3839		; <double> [#uses=1]
-	%tmp41 = fadd double %tmp2829, %tmp40		; <double> [#uses=1]
-	%tmp4142 = fptrunc double %tmp41 to float		; <float> [#uses=1]
-	%tmp44 = getelementptr float* %tmp21, i32 %k.06.0		; <float*> [#uses=1]
-	store float %tmp4142, float* %tmp44, align 4
-	br label %cond_next45
-
-cond_next45:		; preds = %cond_true9, %bb
-	%tmp47 = add i32 %i.01.0, 1		; <i32> [#uses=1]
-	%tmp53 = icmp sgt i32 %tmp47, %xmax		; <i1> [#uses=1]
-	%indvar.next = add i32 %k.06.0, 1		; <i32> [#uses=1]
-	br i1 %tmp53, label %return.loopexit, label %bb
-
-return.loopexit:		; preds = %cond_next45
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
-
-declare float @fabsf(float)
-
-declare double @exp(double)
diff --git a/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll b/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll
deleted file mode 100644
index 1550bc7abb5d..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-01-28-IndDecrement.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output -stats |& \
-; RUN: not grep "loop-index-split" 
-
-; Induction variable decrement is not yet handled.
-; pr1912.bc
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
-	%struct.cset = type { i8*, i8, i8, i32, i8* }
-	%struct.parse = type { i8*, i8*, i32, i32*, i32, i32, i32, %struct.re_guts*, [10 x i32], [10 x i32] }
-	%struct.re_guts = type { i32, i32*, i32, i32, %struct.cset*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, [1 x i8] }
-
-define fastcc void @p_bracket(%struct.parse* %p) {
-entry:
-	br i1 false, label %bb160, label %bb195
-
-bb160:		; preds = %entry
-	br i1 false, label %bb.i169, label %bb9.i
-
-bb195:		; preds = %entry
-	ret void
-
-bb.i169:		; preds = %bb160
-	br i1 false, label %bb372, label %bb565
-
-bb9.i:		; preds = %bb160
-	ret void
-
-bb372:		; preds = %bb418, %bb.i169
-	%i1.0.reg2mem.0 = phi i32 [ %i1.0, %bb418 ], [ 0, %bb.i169 ]		; <i32> [#uses=2]
-	%tmp3.i.i.i170 = icmp ult i32 %i1.0.reg2mem.0, 128		; <i1> [#uses=1]
-	br i1 %tmp3.i.i.i170, label %bb.i.i173, label %bb13.i.i
-
-bb.i.i173:		; preds = %bb372
-	br label %bb418
-
-bb13.i.i:		; preds = %bb372
-	br label %bb418
-
-bb418:		; preds = %bb13.i.i, %bb.i.i173
-	%i1.0 = add i32 %i1.0.reg2mem.0, -1		; <i32> [#uses=2]
-	%tmp420 = icmp sgt i32 %i1.0, -1		; <i1> [#uses=1]
-	br i1 %tmp420, label %bb372, label %bb565
-
-bb565:		; preds = %bb418, %bb.i169
-	ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll b/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
deleted file mode 100644
index 3cfd6c96b7b2..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-; PR 1995
-
-define void @add_blkdev_randomness(i32 %major) nounwind  {
-entry:
-	br label %bb
-
-bb:		; preds = %bb39, %entry
-	%A.0.reg2mem.0 = phi i32 [ undef, %entry ], [ %TEMP.0, %bb39 ]		; <i32> [#uses=1]
-	%D.0.reg2mem.0 = phi i32 [ undef, %entry ], [ %C.0.reg2mem.0, %bb39 ]		; <i32> [#uses=3]
-	%C.0.reg2mem.0 = phi i32 [ undef, %entry ], [ %tmp34, %bb39 ]		; <i32> [#uses=4]
-	%TEMP.1.reg2mem.0 = phi i32 [ undef, %entry ], [ %TEMP.0, %bb39 ]		; <i32> [#uses=1]
-	%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp38, %bb39 ]		; <i32> [#uses=3]
-	%B.0.reg2mem.0 = phi i32 [ undef, %entry ], [ %A.0.reg2mem.0, %bb39 ]		; <i32> [#uses=5]
-	%tmp1 = icmp slt i32 %i.0.reg2mem.0, 40		; <i1> [#uses=1]
-	br i1 %tmp1, label %bb3, label %bb12
-
-bb3:		; preds = %bb
-	%tmp6 = xor i32 %C.0.reg2mem.0, %D.0.reg2mem.0		; <i32> [#uses=1]
-	%tmp8 = and i32 %B.0.reg2mem.0, %tmp6		; <i32> [#uses=1]
-	%tmp10 = xor i32 %tmp8, %D.0.reg2mem.0		; <i32> [#uses=1]
-	%tmp11 = add i32 %tmp10, 1518500249		; <i32> [#uses=1]
-	br label %bb39
-
-bb12:		; preds = %bb
-	%tmp14 = icmp slt i32 %i.0.reg2mem.0, 60		; <i1> [#uses=1]
-	br i1 %tmp14, label %bb17, label %bb39
-
-bb17:		; preds = %bb12
-	%tmp20 = and i32 %B.0.reg2mem.0, %C.0.reg2mem.0		; <i32> [#uses=1]
-	%tmp23 = xor i32 %B.0.reg2mem.0, %C.0.reg2mem.0		; <i32> [#uses=1]
-	%tmp25 = and i32 %tmp23, %D.0.reg2mem.0		; <i32> [#uses=1]
-	%tmp26 = add i32 %tmp20, -1894007588		; <i32> [#uses=1]
-	%tmp27 = add i32 %tmp26, %tmp25		; <i32> [#uses=1]
-	br label %bb39
-
-bb39:		; preds = %bb12, %bb3, %bb17
-	%TEMP.0 = phi i32 [ %tmp27, %bb17 ], [ %tmp11, %bb3 ], [ %TEMP.1.reg2mem.0, %bb12 ]		; <i32> [#uses=2]
-	%tmp31 = lshr i32 %B.0.reg2mem.0, 2		; <i32> [#uses=1]
-	%tmp33 = shl i32 %B.0.reg2mem.0, 30		; <i32> [#uses=1]
-	%tmp34 = or i32 %tmp31, %tmp33		; <i32> [#uses=1]
-	%tmp38 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%tmp41 = icmp slt i32 %tmp38, 80		; <i1> [#uses=1]
-	br i1 %tmp41, label %bb, label %return
-
-return:		; preds = %bb39
-	ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll b/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll
deleted file mode 100644
index 980a42f20aa9..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-02-13-ExitValueNum.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: opt < %s -disable-output -loop-index-split
-; PR 2011
-	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
-	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
-	%struct.LITERAL_HELP = type { i32, i32, i32, %struct.CLAUSE_HELP*, %struct.term* }
-	%struct.anon = type { %struct.LIST_NODE* }
-	%struct.st = type { %struct.subst*, %struct.LIST_NODE*, %struct.LIST_NODE*, i16, i16 }
-	%struct.subst = type { %struct.subst*, i32, %struct.term* }
-	%struct.term = type { i32, %struct.anon, %struct.LIST_NODE*, i32, i32 }
-
-define fastcc %struct.LIST_NODE* @inf_HyperResolvents(%struct.CLAUSE_HELP* %Clause, %struct.subst* %Subst, %struct.LIST_NODE* %Restlits, i32 %GlobalMaxVar, %struct.LIST_NODE* %FoundMap, i32 %StrictlyMaximal, { %struct.st*, [3001 x %struct.term*], [4000 x %struct.term*], i32 }* %Index, i32* %Flags, i32* %Precedence) nounwind  {
-entry:
-	br i1 false, label %bb960, label %bb885
-
-bb885:		; preds = %entry
-	ret %struct.LIST_NODE* null
-
-bb960:		; preds = %entry
-	br i1 false, label %bb1097, label %bb1005.preheader
-
-bb1005.preheader:		; preds = %bb960
-	ret %struct.LIST_NODE* null
-
-bb1097:		; preds = %bb960
-	br i1 false, label %bb1269.preheader, label %bb1141.preheader
-
-bb1141.preheader:		; preds = %bb1097
-	ret %struct.LIST_NODE* null
-
-bb1269.preheader:		; preds = %bb1097
-	br i1 false, label %bb1318, label %bb1281
-
-bb1281:		; preds = %bb1269.preheader
-	ret %struct.LIST_NODE* null
-
-bb1318:		; preds = %bb1269.preheader
-	br i1 false, label %bb1459, label %bb.nph52
-
-bb.nph52:		; preds = %bb1318
-	ret %struct.LIST_NODE* null
-
-bb1459:		; preds = %bb1318
-	br i1 false, label %bb1553, label %bb.nph62
-
-bb.nph62:		; preds = %bb1459
-	ret %struct.LIST_NODE* null
-
-bb1553:		; preds = %bb1669, %bb1459
-	%j295.0.reg2mem.0 = phi i32 [ %storemerge110, %bb1669 ], [ 0, %bb1459 ]		; <i32> [#uses=2]
-	%tmp1629 = icmp sgt i32 %j295.0.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %tmp1629, label %bb1649, label %bb1632
-
-bb1632:		; preds = %bb1553
-	br label %bb1669
-
-bb1649:		; preds = %bb1553
-	br label %bb1669
-
-bb1669:		; preds = %bb1649, %bb1632
-	%storemerge110 = add i32 %j295.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%tmp1672 = icmp sgt i32 %storemerge110, 0		; <i1> [#uses=1]
-	br i1 %tmp1672, label %bb1678, label %bb1553
-
-bb1678:		; preds = %bb1669
-	ret %struct.LIST_NODE* null
-}
-
diff --git a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll b/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll
deleted file mode 100644
index 9351cafcf646..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatch.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-; PR 2011
-	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
-	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
-	%struct.LITERAL_HELP = type { i32, i32, i32, %struct.CLAUSE_HELP*, %struct.term* }
-	%struct.anon = type { %struct.LIST_NODE* }
-	%struct.st = type { %struct.subst*, %struct.LIST_NODE*, %struct.LIST_NODE*, i16, i16 }
-	%struct.subst = type { %struct.subst*, i32, %struct.term* }
-	%struct.term = type { i32, %struct.anon, %struct.LIST_NODE*, i32, i32 }
-
-define fastcc %struct.LIST_NODE* @inf_HyperResolvents(%struct.CLAUSE_HELP* %Clause, %struct.subst* %Subst, %struct.LIST_NODE* %Restlits, i32 %GlobalMaxVar, %struct.LIST_NODE* %FoundMap, i32 %StrictlyMaximal, { %struct.st*, [3001 x %struct.term*], [4000 x %struct.term*], i32 }* %Index, i32* %Flags, i32* %Precedence) nounwind  {
-entry:
-	br i1 false, label %bb960, label %bb885
-
-bb885:		; preds = %entry
-	ret %struct.LIST_NODE* null
-
-bb960:		; preds = %entry
-	br i1 false, label %bb1097, label %bb1005.preheader
-
-bb1005.preheader:		; preds = %bb960
-	ret %struct.LIST_NODE* null
-
-bb1097:		; preds = %bb960
-	br i1 false, label %bb1269.preheader, label %bb1141.preheader
-
-bb1141.preheader:		; preds = %bb1097
-	ret %struct.LIST_NODE* null
-
-bb1269.preheader:		; preds = %bb1097
-	br i1 false, label %bb1318, label %bb1281
-
-bb1281:		; preds = %bb1269.preheader
-	ret %struct.LIST_NODE* null
-
-bb1318:		; preds = %bb1269.preheader
-	br i1 false, label %bb1459, label %bb.nph52
-
-bb.nph52:		; preds = %bb1318
-	ret %struct.LIST_NODE* null
-
-bb1459:		; preds = %bb1318
-	br i1 false, label %bb1553, label %bb.nph62
-
-bb.nph62:		; preds = %bb1459
-	ret %struct.LIST_NODE* null
-
-bb1553:		; preds = %bb1669, %bb1459
-	%j295.0.reg2mem.0 = phi i32 [ %storemerge110, %bb1669 ], [ 0, %bb1459 ]		; <i32> [#uses=2]
-	br i1 false, label %bb1588, label %bb1616
-
-bb1588:		; preds = %bb1553
-	br label %bb1616
-
-bb1616:		; preds = %bb1588, %bb1553
-	%tmp1629 = icmp sgt i32 %j295.0.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %tmp1629, label %bb1649, label %bb1632
-
-bb1632:		; preds = %bb1616
-	br label %bb1669
-
-bb1649:		; preds = %bb1616
-	br label %bb1669
-
-bb1669:		; preds = %bb1649, %bb1632
-	%storemerge110 = add i32 %j295.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%tmp1672 = icmp sgt i32 %storemerge110, 0		; <i1> [#uses=1]
-	br i1 %tmp1672, label %bb1678, label %bb1553
-
-bb1678:		; preds = %bb1669
-	ret %struct.LIST_NODE* null
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll b/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll
deleted file mode 100644
index 6d6defa85de0..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-02-13-LoopLatchPHI.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-; PR 2011
-	%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_NODE*, %struct.LIST_NODE*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
-	%struct.LIST_NODE = type { %struct.LIST_NODE*, i8* }
-	%struct.LITERAL_HELP = type { i32, i32, i32, %struct.CLAUSE_HELP*, %struct.term* }
-	%struct.anon = type { %struct.LIST_NODE* }
-	%struct.st = type { %struct.subst*, %struct.LIST_NODE*, %struct.LIST_NODE*, i16, i16 }
-	%struct.subst = type { %struct.subst*, i32, %struct.term* }
-	%struct.term = type { i32, %struct.anon, %struct.LIST_NODE*, i32, i32 }
-
-define fastcc %struct.LIST_NODE* @inf_HyperResolvents(%struct.CLAUSE_HELP* %Clause, %struct.subst* %Subst, %struct.LIST_NODE* %Restlits, i32 %GlobalMaxVar, %struct.LIST_NODE* %FoundMap, i32 %StrictlyMaximal, { %struct.st*, [3001 x %struct.term*], [4000 x %struct.term*], i32 }* %Index, i32* %Flags, i32* %Precedence) nounwind  {
-entry:
-	br i1 false, label %bb960, label %bb885
-
-bb885:		; preds = %entry
-	ret %struct.LIST_NODE* null
-
-bb960:		; preds = %entry
-	br i1 false, label %bb1097, label %bb1005.preheader
-
-bb1005.preheader:		; preds = %bb960
-	ret %struct.LIST_NODE* null
-
-bb1097:		; preds = %bb960
-	br i1 false, label %bb1269.preheader, label %bb1141.preheader
-
-bb1141.preheader:		; preds = %bb1097
-	ret %struct.LIST_NODE* null
-
-bb1269.preheader:		; preds = %bb1097
-	br i1 false, label %bb1318, label %bb1281
-
-bb1281:		; preds = %bb1269.preheader
-	ret %struct.LIST_NODE* null
-
-bb1318:		; preds = %bb1269.preheader
-	br i1 false, label %bb1459, label %bb.nph52
-
-bb.nph52:		; preds = %bb1318
-	ret %struct.LIST_NODE* null
-
-bb1459:		; preds = %bb1318
-	br i1 false, label %bb1553, label %bb.nph62
-
-bb.nph62:		; preds = %bb1459
-	ret %struct.LIST_NODE* null
-
-bb1553:		; preds = %bb1669, %bb1459
-	%j295.0.reg2mem.0 = phi i32 [ %storemerge110, %bb1669 ], [ 0, %bb1459 ]		; <i32> [#uses=2]
-	%Constraint403.2.reg2mem.0 = phi %struct.LIST_NODE* [ %Constraint403.1.reg2mem.0, %bb1669 ], [ null, %bb1459 ]		; <%struct.LIST_NODE*> [#uses=1]
-	br i1 false, label %bb1588, label %bb1616
-
-bb1588:		; preds = %bb1553
-	br label %bb1616
-
-bb1616:		; preds = %bb1588, %bb1553
-	%tmp1629 = icmp sgt i32 %j295.0.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %tmp1629, label %bb1649, label %bb1632
-
-bb1632:		; preds = %bb1616
-	br label %bb1669
-
-bb1649:		; preds = %bb1616
-	br label %bb1669
-
-bb1669:		; preds = %bb1649, %bb1632
-	%Constraint403.1.reg2mem.0 = phi %struct.LIST_NODE* [ null, %bb1632 ], [ %Constraint403.2.reg2mem.0, %bb1649 ]		; <%struct.LIST_NODE*> [#uses=1]
-	%storemerge110 = add i32 %j295.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%tmp1672 = icmp sgt i32 %storemerge110, 0		; <i1> [#uses=1]
-	br i1 %tmp1672, label %bb1678, label %bb1553
-
-bb1678:		; preds = %bb1669
-	ret %struct.LIST_NODE* null
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll b/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll
deleted file mode 100644
index f1a03e2f18a9..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll
+++ /dev/null
@@ -1,464 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-; PR 2030
-	%struct.FULL = type { i32, i32, [1000 x float*] }
-
-define i32 @matgen(%struct.FULL* %a, float** %x, float** %b, float** %bt, i32** %ipvt, i32 %test_case, i32 %scale) {
-entry:
-	br i1 false, label %bb, label %entry.bb30_crit_edge
-
-entry.bb30_crit_edge:		; preds = %entry
-	br label %bb30
-
-bb:		; preds = %entry
-	br label %bb14
-
-bb6:		; preds = %bb14
-	br label %bb14
-
-bb14:		; preds = %bb6, %bb
-	br i1 false, label %bb6, label %bb22
-
-bb22:		; preds = %bb14
-	br label %bb30
-
-bb30:		; preds = %bb22, %entry.bb30_crit_edge
-	switch i32 %test_case, label %bb648 [
-		 i32 1, label %bb30.bb32_crit_edge
-		 i32 2, label %bb30.bb32_crit_edge1
-		 i32 3, label %bb30.bb32_crit_edge2
-		 i32 4, label %bb30.bb108_crit_edge
-		 i32 5, label %bb30.bb108_crit_edge3
-		 i32 6, label %bb30.bb142_crit_edge
-		 i32 7, label %bb30.bb142_crit_edge4
-		 i32 8, label %bb30.bb142_crit_edge5
-		 i32 9, label %bb234
-		 i32 10, label %bb292
-		 i32 11, label %bb353
-		 i32 12, label %bb419
-		 i32 13, label %bb485
-		 i32 14, label %bb567
-	]
-
-bb30.bb142_crit_edge5:		; preds = %bb30
-	br label %bb142
-
-bb30.bb142_crit_edge4:		; preds = %bb30
-	br label %bb142
-
-bb30.bb142_crit_edge:		; preds = %bb30
-	br label %bb142
-
-bb30.bb108_crit_edge3:		; preds = %bb30
-	br label %bb108
-
-bb30.bb108_crit_edge:		; preds = %bb30
-	br label %bb108
-
-bb30.bb32_crit_edge2:		; preds = %bb30
-	br label %bb32
-
-bb30.bb32_crit_edge1:		; preds = %bb30
-	br label %bb32
-
-bb30.bb32_crit_edge:		; preds = %bb30
-	br label %bb32
-
-bb32:		; preds = %bb30.bb32_crit_edge, %bb30.bb32_crit_edge1, %bb30.bb32_crit_edge2
-	br i1 false, label %bb53, label %bb52
-
-bb52:		; preds = %bb32
-	br label %bb739
-
-bb53:		; preds = %bb32
-	br label %bb101
-
-bb58:		; preds = %bb101
-	br label %bb92
-
-bb64:		; preds = %bb92
-	br i1 false, label %bb64.bb87_crit_edge, label %bb72
-
-bb64.bb87_crit_edge:		; preds = %bb64
-	br label %bb87
-
-bb72:		; preds = %bb64
-	br i1 false, label %bb72.bb87_crit_edge, label %bb79
-
-bb72.bb87_crit_edge:		; preds = %bb72
-	br label %bb87
-
-bb79:		; preds = %bb72
-	br label %bb87
-
-bb87:		; preds = %bb79, %bb72.bb87_crit_edge, %bb64.bb87_crit_edge
-	br label %bb92
-
-bb92:		; preds = %bb87, %bb58
-	br i1 false, label %bb64, label %bb98
-
-bb98:		; preds = %bb92
-	br label %bb101
-
-bb101:		; preds = %bb98, %bb53
-	br i1 false, label %bb58, label %bb107
-
-bb107:		; preds = %bb101
-	br label %bb651
-
-bb108:		; preds = %bb30.bb108_crit_edge, %bb30.bb108_crit_edge3
-	br i1 false, label %bb125, label %bb124
-
-bb124:		; preds = %bb108
-	br label %bb739
-
-bb125:		; preds = %bb108
-	br i1 false, label %bb138, label %bb139
-
-bb138:		; preds = %bb125
-	br label %bb140
-
-bb139:		; preds = %bb125
-	br label %bb140
-
-bb140:		; preds = %bb139, %bb138
-	br label %bb651
-
-bb142:		; preds = %bb30.bb142_crit_edge, %bb30.bb142_crit_edge4, %bb30.bb142_crit_edge5
-	br i1 false, label %bb161, label %bb160
-
-bb160:		; preds = %bb142
-	br label %bb739
-
-bb161:		; preds = %bb142
-	br i1 false, label %bb170, label %bb161.bb171_crit_edge
-
-bb161.bb171_crit_edge:		; preds = %bb161
-	br label %bb171
-
-bb170:		; preds = %bb161
-	br label %bb171
-
-bb171:		; preds = %bb170, %bb161.bb171_crit_edge
-	br i1 false, label %bb176, label %bb171.bb177_crit_edge
-
-bb171.bb177_crit_edge:		; preds = %bb171
-	br label %bb177
-
-bb176:		; preds = %bb171
-	br label %bb177
-
-bb177:		; preds = %bb176, %bb171.bb177_crit_edge
-	br label %bb227
-
-bb178:		; preds = %bb227
-	br label %bb218
-
-bb184:		; preds = %bb218
-	br i1 false, label %bb191, label %bb193
-
-bb191:		; preds = %bb184
-	br label %bb213
-
-bb193:		; preds = %bb184
-	br i1 false, label %bb200, label %bb203
-
-bb200:		; preds = %bb193
-	br label %bb213
-
-bb203:		; preds = %bb193
-	br i1 false, label %bb210, label %bb203.bb213_crit_edge
-
-bb203.bb213_crit_edge:		; preds = %bb203
-	br label %bb213
-
-bb210:		; preds = %bb203
-	br label %bb213
-
-bb213:		; preds = %bb210, %bb203.bb213_crit_edge, %bb200, %bb191
-	br label %bb218
-
-bb218:		; preds = %bb213, %bb178
-	br i1 false, label %bb184, label %bb224
-
-bb224:		; preds = %bb218
-	br label %bb227
-
-bb227:		; preds = %bb224, %bb177
-	br i1 false, label %bb178, label %bb233
-
-bb233:		; preds = %bb227
-	br label %bb651
-
-bb234:		; preds = %bb30
-	br i1 false, label %bb253, label %bb252
-
-bb252:		; preds = %bb234
-	br label %bb739
-
-bb253:		; preds = %bb234
-	br label %bb285
-
-bb258:		; preds = %bb285
-	br label %bb276
-
-bb264:		; preds = %bb276
-	br label %bb276
-
-bb276:		; preds = %bb264, %bb258
-	br i1 false, label %bb264, label %bb282
-
-bb282:		; preds = %bb276
-	br label %bb285
-
-bb285:		; preds = %bb282, %bb253
-	br i1 false, label %bb258, label %bb291
-
-bb291:		; preds = %bb285
-	br label %bb651
-
-bb292:		; preds = %bb30
-	br i1 false, label %bb311, label %bb310
-
-bb310:		; preds = %bb292
-	br label %bb739
-
-bb311:		; preds = %bb292
-	br label %bb346
-
-bb316:		; preds = %bb346
-	br label %bb337
-
-bb322:		; preds = %bb337
-	br label %bb337
-
-bb337:		; preds = %bb322, %bb316
-	br i1 false, label %bb322, label %bb343
-
-bb343:		; preds = %bb337
-	br label %bb346
-
-bb346:		; preds = %bb343, %bb311
-	br i1 false, label %bb316, label %bb352
-
-bb352:		; preds = %bb346
-	br label %bb651
-
-bb353:		; preds = %bb30
-	br i1 false, label %bb372, label %bb371
-
-bb371:		; preds = %bb353
-	br label %bb739
-
-bb372:		; preds = %bb353
-	br label %bb412
-
-bb377:		; preds = %bb412
-	br label %bb403
-
-bb383:		; preds = %bb403
-	br i1 false, label %bb395, label %bb389
-
-bb389:		; preds = %bb383
-	br label %bb396
-
-bb395:		; preds = %bb383
-	br label %bb396
-
-bb396:		; preds = %bb395, %bb389
-	br label %bb403
-
-bb403:		; preds = %bb396, %bb377
-	br i1 false, label %bb383, label %bb409
-
-bb409:		; preds = %bb403
-	br label %bb412
-
-bb412:		; preds = %bb409, %bb372
-	br i1 false, label %bb377, label %bb418
-
-bb418:		; preds = %bb412
-	br label %bb651
-
-bb419:		; preds = %bb30
-	br i1 false, label %bb438, label %bb437
-
-bb437:		; preds = %bb419
-	br label %bb739
-
-bb438:		; preds = %bb419
-	br label %bb478
-
-bb443:		; preds = %bb478
-	br label %bb469
-
-bb449:		; preds = %bb469
-	br i1 false, label %bb461, label %bb455
-
-bb455:		; preds = %bb449
-	br label %bb462
-
-bb461:		; preds = %bb449
-	br label %bb462
-
-bb462:		; preds = %bb461, %bb455
-	br label %bb469
-
-bb469:		; preds = %bb462, %bb443
-	br i1 false, label %bb449, label %bb475
-
-bb475:		; preds = %bb469
-	br label %bb478
-
-bb478:		; preds = %bb475, %bb438
-	br i1 false, label %bb443, label %bb484
-
-bb484:		; preds = %bb478
-	br label %bb651
-
-bb485:		; preds = %bb30
-	br i1 false, label %bb504, label %bb503
-
-bb503:		; preds = %bb485
-	br label %bb739
-
-bb504:		; preds = %bb485
-	br label %bb560
-
-bb513:		; preds = %bb560
-	br label %bb551
-
-bb519:		; preds = %bb551
-	br i1 false, label %bb528, label %bb532
-
-bb528:		; preds = %bb519
-	br label %bb536
-
-bb532:		; preds = %bb519
-	br label %bb536
-
-bb536:		; preds = %bb532, %bb528
-	br label %bb551
-
-bb551:		; preds = %bb536, %bb513
-	br i1 false, label %bb519, label %bb557
-
-bb557:		; preds = %bb551
-	br label %bb560
-
-bb560:		; preds = %bb557, %bb504
-	br i1 false, label %bb513, label %bb566
-
-bb566:		; preds = %bb560
-	br label %bb651
-
-bb567:		; preds = %bb30
-	br i1 false, label %bb586, label %bb585
-
-bb585:		; preds = %bb567
-	br label %bb739
-
-bb586:		; preds = %bb567
-	br label %bb641
-
-bb595:		; preds = %bb641
-	br label %bb632
-
-bb601:		; preds = %bb632
-	%tmp604 = icmp sgt i32 %i.7, 0		; <i1> [#uses=1]
-	br i1 %tmp604, label %bb607, label %bb611
-
-bb607:		; preds = %bb601
-	br label %bb615
-
-bb611:		; preds = %bb601
-	br label %bb615
-
-bb615:		; preds = %bb611, %bb607
-	%tmp629 = add i32 %i.7, 1		; <i32> [#uses=1]
-	%tmp631 = getelementptr float* %col.7, i32 1		; <float*> [#uses=1]
-	br label %bb632
-
-bb632:		; preds = %bb615, %bb595
-	%col.7 = phi float* [ null, %bb595 ], [ %tmp631, %bb615 ]		; <float*> [#uses=1]
-	%i.7 = phi i32 [ 0, %bb595 ], [ %tmp629, %bb615 ]		; <i32> [#uses=3]
-	%tmp635 = icmp slt i32 %i.7, 0		; <i1> [#uses=1]
-	br i1 %tmp635, label %bb601, label %bb638
-
-bb638:		; preds = %bb632
-	br label %bb641
-
-bb641:		; preds = %bb638, %bb586
-	br i1 false, label %bb595, label %bb647
-
-bb647:		; preds = %bb641
-	br label %bb651
-
-bb648:		; preds = %bb30
-	br label %bb739
-
-bb651:		; preds = %bb647, %bb566, %bb484, %bb418, %bb352, %bb291, %bb233, %bb140, %bb107
-	br i1 false, label %bb658, label %bb651.bb661_crit_edge
-
-bb651.bb661_crit_edge:		; preds = %bb651
-	br label %bb661
-
-bb658:		; preds = %bb651
-	br label %bb661
-
-bb661:		; preds = %bb658, %bb651.bb661_crit_edge
-	br i1 false, label %bb666, label %bb661.bb686_crit_edge
-
-bb661.bb686_crit_edge:		; preds = %bb661
-	br label %bb686
-
-bb666:		; preds = %bb661
-	br label %bb680
-
-bb670:		; preds = %bb680
-	br label %bb680
-
-bb680:		; preds = %bb670, %bb666
-	br i1 false, label %bb670, label %bb680.bb686_crit_edge
-
-bb680.bb686_crit_edge:		; preds = %bb680
-	br label %bb686
-
-bb686:		; preds = %bb680.bb686_crit_edge, %bb661.bb686_crit_edge
-	br i1 false, label %bb699, label %bb696
-
-bb696:		; preds = %bb686
-	br label %bb739
-
-bb699:		; preds = %bb686
-	br i1 false, label %bb712, label %bb709
-
-bb709:		; preds = %bb699
-	br label %bb739
-
-bb712:		; preds = %bb699
-	br i1 false, label %bb717, label %bb712.bb720_crit_edge
-
-bb712.bb720_crit_edge:		; preds = %bb712
-	br label %bb720
-
-bb717:		; preds = %bb712
-	br label %bb720
-
-bb720:		; preds = %bb717, %bb712.bb720_crit_edge
-	br i1 false, label %bb725, label %bb720.bb738_crit_edge
-
-bb720.bb738_crit_edge:		; preds = %bb720
-	br label %bb738
-
-bb725:		; preds = %bb720
-	br label %bb738
-
-bb738:		; preds = %bb725, %bb720.bb738_crit_edge
-	br label %bb739
-
-bb739:		; preds = %bb738, %bb709, %bb696, %bb648, %bb585, %bb503, %bb437, %bb371, %bb310, %bb252, %bb160, %bb124, %bb52
-	br label %return
-
-return:		; preds = %bb739
-	ret i32 0
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll b/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll
deleted file mode 100644
index ca22e50eadfc..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-03-24-ExitPhi.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-; Handle Exit block phis that do not have any use inside the loop.
-
-	%struct.ATOM = type { double, double, double, double, double, double, i32, double, double, double, double, i8*, i8, [9 x i8], double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, [200 x i8*], [32 x i8*], [32 x i8], i32 }
-	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-	%struct.__sFILEX = type opaque
-	%struct.__sbuf = type { i8*, i32 }
-
-define i32 @math([80 x i8]* %tokens, double* %fvalue, i32* %ivalue, %struct.FILE* %ip, %struct.FILE* %op, i32 %echo) nounwind  {
-entry:
-	br i1 false, label %bb.i, label %bb35.i
-bb.i:		; preds = %entry
-	br i1 false, label %bb6.i, label %bb9.i
-bb9.i:		; preds = %bb.i
-	ret i32 0
-bb35.i:		; preds = %entry
-	ret i32 0
-bb6.i:		; preds = %bb.i
-	br i1 false, label %a_l2_f.exit, label %bb16.i
-bb16.i:		; preds = %bb6.i
-	ret i32 0
-a_l2_f.exit:		; preds = %bb6.i
-	br i1 false, label %bb7.i97, label %bb6.i71
-bb6.i71:		; preds = %a_l2_f.exit
-	ret i32 0
-bb7.i97:		; preds = %a_l2_f.exit
-	br i1 false, label %bb, label %bb18.i102
-bb18.i102:		; preds = %bb7.i97
-	ret i32 0
-bb:		; preds = %bb7.i97
-	br i1 false, label %bb38, label %AFOUND
-bb38:		; preds = %bb
-	br i1 false, label %bb111, label %bb7.i120
-AFOUND:		; preds = %bb
-	ret i32 0
-bb7.i120:		; preds = %bb38
-	ret i32 0
-bb111:		; preds = %bb38
-	switch i32 0, label %bb574 [
-		 i32 1, label %bb158
-		 i32 0, label %bb166
-	]
-bb158:		; preds = %bb111
-	ret i32 0
-bb166:		; preds = %bb111
-	ret i32 0
-bb574:		; preds = %bb111
-	br i1 false, label %bb11.i249, label %bb600
-bb11.i249:		; preds = %bb574
-	br i1 false, label %bb11.i265, label %bb596
-bb11.i265:		; preds = %bb590, %bb11.i249
-	%i.1.reg2mem.0 = phi i32 [ %tmp589.reg2mem.0, %bb590 ], [ 0, %bb11.i249 ]		; <i32> [#uses=2]
-	%tmp13.i264 = icmp slt i32 %i.1.reg2mem.0, 1		; <i1> [#uses=1]
-	br i1 %tmp13.i264, label %bb16.i267, label %bb30.i279
-bb16.i267:		; preds = %bb11.i265
-	br label %bb590
-bb30.i279:		; preds = %bb11.i265
-	br label %bb590
-bb590:		; preds = %bb30.i279, %bb16.i267
-	%tmp5876282.reg2mem.0 = phi %struct.ATOM* [ null, %bb30.i279 ], [ null, %bb16.i267 ]		; <%struct.ATOM*> [#uses=1]
-	%tmp589.reg2mem.0 = add i32 %i.1.reg2mem.0, 1		; <i32> [#uses=2]
-	%tmp593 = icmp slt i32 %tmp589.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %tmp593, label %bb11.i265, label %bb596
-bb596:		; preds = %bb590, %bb11.i249
-	%ap.0.reg2mem.0 = phi %struct.ATOM* [ null, %bb11.i249 ], [ %tmp5876282.reg2mem.0, %bb590 ]		; <%struct.ATOM*> [#uses=0]
-	ret i32 0
-bb600:		; preds = %bb574
-	ret i32 0
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll b/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll
deleted file mode 100644
index 7447e6d4d4f0..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-05-19-IndVar.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: opt < %s -loop-index-split -stats -disable-output | not grep "loop-index-split"
-;PR2294
-@g_2 = external global i16		; <i16*> [#uses=4]
-@g_5 = external global i32		; <i32*> [#uses=1]
-@.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
-
-declare void @func_1() nounwind 
-
-define i32 @main() nounwind  {
-entry:
-	%tmp101.i = load i16* @g_2, align 2		; <i16> [#uses=1]
-	%tmp112.i = icmp sgt i16 %tmp101.i, 0		; <i1> [#uses=1]
-	br i1 %tmp112.i, label %bb.preheader.i, label %func_1.exit
-bb.preheader.i:		; preds = %entry
-	%g_2.promoted.i = load i16* @g_2		; <i16> [#uses=1]
-	br label %bb.i
-bb.i:		; preds = %bb6.i, %bb.preheader.i
-	%g_2.tmp.0.i = phi i16 [ %g_2.promoted.i, %bb.preheader.i ], [ %tmp8.i, %bb6.i ]		; <i16> [#uses=2]
-	%tmp2.i = icmp eq i16 %g_2.tmp.0.i, 0		; <i1> [#uses=1]
-	br i1 %tmp2.i, label %bb4.i, label %bb6.i
-bb4.i:		; preds = %bb.i
-	%tmp5.i = volatile load i32* @g_5, align 4		; <i32> [#uses=0]
-	br label %bb6.i
-bb6.i:		; preds = %bb4.i, %bb.i
-	%tmp8.i = add i16 %g_2.tmp.0.i, 1		; <i16> [#uses=3]
-	%tmp11.i = icmp sgt i16 %tmp8.i, 42		; <i1> [#uses=1]
-	br i1 %tmp11.i, label %bb.i, label %return.loopexit.i
-return.loopexit.i:		; preds = %bb6.i
-	%tmp8.i.lcssa = phi i16 [ %tmp8.i, %bb6.i ]		; <i16> [#uses=1]
-	store i16 %tmp8.i.lcssa, i16* @g_2
-	br label %func_1.exit
-func_1.exit:		; preds = %return.loopexit.i, %entry
-	%tmp1 = load i16* @g_2, align 2		; <i16> [#uses=1]
-	%tmp12 = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%tmp3 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp12 ) nounwind 		; <i32> [#uses=0]
-	ret i32 0
-}
-
-declare i32 @printf(i8*, ...) nounwind 
-
diff --git a/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll b/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll
deleted file mode 100644
index 6f691de537b1..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-06-03-DomFrontier.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: opt < %s -loop-rotate -loop-unswitch -loop-index-split -instcombine -disable-output
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9"
-	%struct.__CFData = type opaque
-	%struct.__CFString = type opaque
-
-define %struct.__CFData* @WirelessCreatePSK(%struct.__CFString* %inPassphrase, %struct.__CFData* %inSSID) nounwind  {
-entry:
-	br label %bb52
-
-bb52:		; preds = %bb142, %bb52, %entry
-	br i1 false, label %bb142, label %bb52
-
-bb63:		; preds = %bb142, %bb131
-	%t.0.reg2mem.0 = phi i32 [ %tmp133, %bb131 ], [ 0, %bb142 ]		; <i32> [#uses=2]
-	%tmp65 = icmp ult i32 %t.0.reg2mem.0, 16		; <i1> [#uses=1]
-	br i1 %tmp65, label %bb68, label %bb89
-
-bb68:		; preds = %bb63
-	br label %bb131
-
-bb89:		; preds = %bb63
-	br label %bb131
-
-bb131:		; preds = %bb89, %bb68
-	%tmp133 = add i32 %t.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%tmp136 = icmp ult i32 %tmp133, 80		; <i1> [#uses=1]
-	br i1 %tmp136, label %bb63, label %bb142
-
-bb142:		; preds = %bb131, %bb52
-	br i1 undef, label %bb63, label %bb52
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll b/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll
deleted file mode 100644
index 1fcd960e0511..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-07-08-MisCompilation.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt < %s -loop-index-split -stats -disable-output | not grep "1 loop-index-split"
-; PR 2487
-@g_6 = external global i32		; <i32*> [#uses=1]
-
-define void @func_1() nounwind  {
-entry:
-	br label %bb
-
-bb:		; preds = %bb4, %entry
-	%l_3.0 = phi i8 [ 0, %entry ], [ %tmp6, %bb4 ]		; <i8> [#uses=2]
-	%tmp1 = icmp eq i8 %l_3.0, 0		; <i1> [#uses=1]
-	br i1 %tmp1, label %bb3, label %bb4
-
-bb3:		; preds = %bb
-	store i32 1, i32* @g_6, align 4
-	br label %bb4
-
-bb4:		; preds = %bb3, %bb
-	%tmp6 = add i8 %l_3.0, 1		; <i8> [#uses=2]
-	%tmp9 = icmp sgt i8 %tmp6, -1		; <i1> [#uses=1]
-	br i1 %tmp9, label %bb, label %return
-
-return:		; preds = %bb4
-	ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll b/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll
deleted file mode 100644
index ee8e7a3eb863..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: opt < %s -loop-index-split -stats -disable-output | not grep "loop-index-split"
-; PR 2791
-@g_40 = common global i32 0		; <i32*> [#uses=1]
-@g_192 = common global i32 0		; <i32*> [#uses=2]
-@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
-
-define void @func_29() nounwind {
-entry:
-	%0 = load i32* @g_40, align 4		; <i32> [#uses=1]
-	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
-	%g_192.promoted = load i32* @g_192		; <i32> [#uses=0]
-	br i1 %1, label %entry.split.us, label %entry.split
-
-entry.split.us:		; preds = %entry
-	br label %bb.us
-
-bb.us:		; preds = %bb5.us, %entry.split.us
-	%i.0.reg2mem.0.us = phi i32 [ 0, %entry.split.us ], [ %3, %bb5.us ]		; <i32> [#uses=2]
-	%2 = icmp eq i32 %i.0.reg2mem.0.us, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb1.us, label %bb5.us
-
-bb5.us:		; preds = %bb1.us, %bb4.us, %bb.us
-	%iftmp.0.0.us = phi i32 [ 0, %bb4.us ], [ 1, %bb.us ], [ 1, %bb1.us ]		; <i32> [#uses=1]
-	%3 = add i32 %i.0.reg2mem.0.us, 1		; <i32> [#uses=3]
-	%4 = icmp ult i32 %3, 10		; <i1> [#uses=1]
-	br i1 %4, label %bb.us, label %bb8.us
-
-bb4.us:		; preds = %bb1.us
-	br label %bb5.us
-
-bb1.us:		; preds = %bb.us
-	br i1 true, label %bb4.us, label %bb5.us
-
-bb8.us:		; preds = %bb5.us
-	%iftmp.0.0.lcssa.us = phi i32 [ %iftmp.0.0.us, %bb5.us ]		; <i32> [#uses=1]
-	%.lcssa.us = phi i32 [ %3, %bb5.us ]		; <i32> [#uses=1]
-	br label %bb8.split
-
-entry.split:		; preds = %entry
-	br label %bb
-
-bb:		; preds = %bb5, %entry.split
-	%i.0.reg2mem.0 = phi i32 [ 0, %entry.split ], [ %6, %bb5 ]		; <i32> [#uses=2]
-	%5 = icmp eq i32 %i.0.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb1, label %bb5
-
-bb1:		; preds = %bb
-	br i1 false, label %bb4, label %bb5
-
-bb4:		; preds = %bb1
-	br label %bb5
-
-bb5:		; preds = %bb1, %bb, %bb4
-	%iftmp.0.0 = phi i32 [ 0, %bb4 ], [ 1, %bb ], [ 1, %bb1 ]		; <i32> [#uses=1]
-	%6 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=3]
-	%7 = icmp ult i32 %6, 10		; <i1> [#uses=1]
-	br i1 %7, label %bb, label %bb8
-
-bb8:		; preds = %bb5
-	%iftmp.0.0.lcssa = phi i32 [ %iftmp.0.0, %bb5 ]		; <i32> [#uses=1]
-	%.lcssa = phi i32 [ %6, %bb5 ]		; <i32> [#uses=1]
-	br label %bb8.split
-
-bb8.split:		; preds = %bb8.us, %bb8
-	%iftmp.0.0.lcssa.us-lcssa = phi i32 [ %iftmp.0.0.lcssa, %bb8 ], [ %iftmp.0.0.lcssa.us, %bb8.us ]		; <i32> [#uses=1]
-	%.lcssa.us-lcssa = phi i32 [ %.lcssa, %bb8 ], [ %.lcssa.us, %bb8.us ]		; <i32> [#uses=1]
-	store i32 %iftmp.0.0.lcssa.us-lcssa, i32* @g_192
-	%8 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %.lcssa.us-lcssa ) nounwind		; <i32> [#uses=0]
-	ret void
-}
-
-declare i32 @printf(i8*, ...) nounwind
-
-define i32 @main() nounwind {
-entry:
-	call void @func_29( ) nounwind
-	ret i32 0
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll b/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll
deleted file mode 100644
index ef677369cc47..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-; PR 2805
-@g_330 = common global i32 0		; <i32*> [#uses=1]
-
-define i32 @func_45(i32 %p_47) nounwind {
-entry:
-	br label %bb
-
-bb:		; preds = %bb3, %entry
-	%p_47_addr.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %2, %bb3 ]		; <i32> [#uses=2]
-	%0 = icmp eq i32 %p_47_addr.0.reg2mem.0, 0		; <i1> [#uses=1]
-	br i1 %0, label %bb2, label %bb1
-
-bb1:		; preds = %bb
-	%1 = tail call i32 (...)* @func_70( i32 1 ) nounwind		; <i32> [#uses=0]
-	br label %bb3
-
-bb2:		; preds = %bb
-	store i32 1, i32* @g_330, align 4
-	br label %bb3
-
-bb3:		; preds = %bb2, %bb1
-	%2 = add i32 %p_47_addr.0.reg2mem.0, 1		; <i32> [#uses=3]
-	%3 = icmp ult i32 %2, 22		; <i1> [#uses=1]
-	br i1 %3, label %bb, label %bb6
-
-bb6:		; preds = %bb3
-	%.lcssa = phi i32 [ %2, %bb3 ]		; <i32> [#uses=1]
-	%4 = tail call i32 (...)* @func_95( i32 %.lcssa ) nounwind		; <i32> [#uses=1]
-	%5 = tail call i32 (...)* @func_56( i32 %4 ) nounwind		; <i32> [#uses=0]
-	ret i32 undef
-}
-
-declare i32 @func_70(...)
-
-declare i32 @func_95(...)
-
-declare i32 @func_56(...)
diff --git a/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll b/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll
deleted file mode 100644
index cca54adb1955..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-	%struct.RExC_state_t = type { i32, i8*, %struct.regexp*, i8*, i8*, i8*, i32, %struct.regnode*, %struct.regnode*, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.SV = type { i8*, i32, i32 }
-	%struct.reg_data = type { i32, i8*, [1 x i8*] }
-	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
-	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
-	%struct.regexp = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
-	%struct.regnode = type { i8, i8, i16 }
-
-define fastcc %struct.regnode* @S_regclass(%struct.RExC_state_t* %pRExC_state) nounwind {
-entry:
-	br label %bb439
-
-bb439:		; preds = %bb444, %entry
-	%value23.16.reg2mem.0 = phi i32 [ %3, %bb444 ], [ 0, %entry ]		; <i32> [#uses=3]
-	%0 = icmp ugt i32 %value23.16.reg2mem.0, 31		; <i1> [#uses=1]
-	%1 = icmp ne i32 %value23.16.reg2mem.0, 127		; <i1> [#uses=1]
-	%2 = and i1 %0, %1		; <i1> [#uses=1]
-	br i1 %2, label %bb443, label %bb444
-
-bb443:		; preds = %bb439
-	br label %bb444
-
-bb444:		; preds = %bb443, %bb439
-	%3 = add i32 %value23.16.reg2mem.0, 1		; <i32> [#uses=2]
-	%4 = icmp ugt i32 %3, 255		; <i1> [#uses=1]
-	br i1 %4, label %bb675, label %bb439
-
-bb675:		; preds = %bb444
-	unreachable
-}
diff --git a/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll b/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll
deleted file mode 100644
index 372fee51a09b..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-10-10-OneIteration.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: opt < %s -loop-index-split -stats -disable-output |& grep "1 loop-index-split" 
-; PR 2869
-
-@w = external global [2 x [2 x i32]]		; <[2 x [2 x i32]]*> [#uses=5]
-
-declare i32 @f() nounwind
-
-define i32 @main() noreturn nounwind {
-entry:
-	br label %bb1.i.outer
-
-bb1.i.outer:		; preds = %bb5.i, %entry
-	%i.0.reg2mem.0.ph.i.ph = phi i32 [ 0, %entry ], [ %indvar.next1, %bb5.i ]		; <i32> [#uses=3]
-	br label %bb1.i
-
-bb1.i:		; preds = %bb3.i, %bb1.i.outer
-	%j.0.reg2mem.0.i = phi i32 [ 0, %bb1.i.outer ], [ %indvar.next, %bb3.i ]		; <i32> [#uses=3]
-	%0 = icmp eq i32 %i.0.reg2mem.0.ph.i.ph, %j.0.reg2mem.0.i		; <i1> [#uses=1]
-	br i1 %0, label %bb2.i, label %bb3.i
-
-bb2.i:		; preds = %bb1.i
-	%1 = getelementptr [2 x [2 x i32]]* @w, i32 0, i32 %i.0.reg2mem.0.ph.i.ph, i32 %j.0.reg2mem.0.i		; <i32*> [#uses=1]
-	store i32 1, i32* %1, align 4
-	br label %bb3.i
-
-bb3.i:		; preds = %bb2.i, %bb1.i
-	%indvar.next = add i32 %j.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, 2		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb5.i, label %bb1.i
-
-bb5.i:		; preds = %bb3.i
-	%indvar.next1 = add i32 %i.0.reg2mem.0.ph.i.ph, 1		; <i32> [#uses=2]
-	%exitcond2 = icmp eq i32 %indvar.next1, 2		; <i1> [#uses=1]
-	br i1 %exitcond2, label %f.exit, label %bb1.i.outer
-
-f.exit:		; preds = %bb5.i
-	%2 = load i32* getelementptr ([2 x [2 x i32]]* @w, i32 0, i32 0, i32 0), align 4		; <i32> [#uses=1]
-	%3 = icmp eq i32 %2, 1		; <i1> [#uses=1]
-	br i1 %3, label %bb, label %bb3
-
-bb:		; preds = %f.exit
-	%4 = load i32* getelementptr ([2 x [2 x i32]]* @w, i32 0, i32 1, i32 1), align 4		; <i32> [#uses=1]
-	%5 = icmp eq i32 %4, 1		; <i1> [#uses=1]
-	br i1 %5, label %bb1, label %bb3
-
-bb1:		; preds = %bb
-	%6 = load i32* getelementptr ([2 x [2 x i32]]* @w, i32 0, i32 1, i32 0), align 4		; <i32> [#uses=1]
-	%7 = icmp eq i32 %6, 0		; <i1> [#uses=1]
-	br i1 %7, label %bb2, label %bb3
-
-bb2:		; preds = %bb1
-	%8 = load i32* getelementptr ([2 x [2 x i32]]* @w, i32 0, i32 0, i32 1), align 4		; <i32> [#uses=1]
-	%9 = icmp eq i32 %8, 0		; <i1> [#uses=1]
-	br i1 %9, label %bb4, label %bb3
-
-bb3:		; preds = %bb2, %bb1, %bb, %f.exit
-	tail call void @abort() noreturn nounwind
-	unreachable
-
-bb4:		; preds = %bb2
-	ret i32 0
-}
-
-declare void @abort() noreturn nounwind
-
-declare void @exit(i32) noreturn nounwind
diff --git a/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll b/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll
deleted file mode 100644
index 217ff52bb2c5..000000000000
--- a/test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: opt < %s -loop-index-split -stats | not grep "loop-index-split"
-; PR3029
-
-@g_138 = common global i32 0		; <i32*> [#uses=3]
-@g_188 = common global i32 0		; <i32*> [#uses=4]
-@g_207 = common global i32 0		; <i32*> [#uses=3]
-@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
-@g_102 = common global i32 0		; <i32*> [#uses=0]
-
-define i32 @func_119() nounwind {
-entry:
-	%0 = volatile load i32* @g_138, align 4		; <i32> [#uses=1]
-	ret i32 %0
-}
-
-define void @func_110(i32 %p_111) nounwind {
-entry:
-	%0 = load i32* @g_188, align 4		; <i32> [#uses=1]
-	%1 = icmp ugt i32 %0, -1572397472		; <i1> [#uses=1]
-	br i1 %1, label %bb, label %bb1
-
-bb:		; preds = %entry
-	%2 = volatile load i32* @g_138, align 4		; <i32> [#uses=0]
-	ret void
-
-bb1:		; preds = %entry
-	store i32 1, i32* @g_207, align 4
-	ret void
-}
-
-define void @func_34() nounwind {
-entry:
-	store i32 0, i32* @g_188
-	%g_188.promoted = load i32* @g_188		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %func_110.exit, %entry
-	%g_188.tmp.0 = phi i32 [ %g_188.promoted, %entry ], [ %2, %func_110.exit ]		; <i32> [#uses=2]
-	%0 = icmp ugt i32 %g_188.tmp.0, -1572397472		; <i1> [#uses=1]
-	br i1 %0, label %bb.i, label %bb1.i
-
-bb.i:		; preds = %bb
-	%1 = volatile load i32* @g_138, align 4		; <i32> [#uses=0]
-	br label %func_110.exit
-
-bb1.i:		; preds = %bb
-	store i32 1, i32* @g_207, align 4
-	br label %func_110.exit
-
-func_110.exit:		; preds = %bb.i, %bb1.i
-	%2 = add i32 %g_188.tmp.0, 1		; <i32> [#uses=3]
-	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
-	br i1 %3, label %return, label %bb
-
-return:		; preds = %func_110.exit
-	%.lcssa = phi i32 [ %2, %func_110.exit ]		; <i32> [#uses=1]
-	store i32 %.lcssa, i32* @g_188
-	ret void
-}
-
-define i32 @main() nounwind {
-entry:
-	call void @func_34() nounwind
-	%0 = load i32* @g_207, align 4		; <i32> [#uses=1]
-	%1 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %0) nounwind		; <i32> [#uses=0]
-	ret i32 0
-}
-
-declare i32 @printf(i8*, ...) nounwind
diff --git a/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll b/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll
deleted file mode 100644
index 9acf3915c0a2..000000000000
--- a/test/Transforms/LoopIndexSplit/2009-03-02-UpdateIterationSpace-crash.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output
-	%struct.CGPoint = type { double, double }
-	%struct.IBCFMutableDictionary = type { %struct.NSMutableArray, %struct.__CFDictionary*, %struct.NSSortDescriptor*, %struct.NSSortDescriptor* }
-	%struct.IBInspectorMode = type opaque
-	%struct.IBInspectorModeView = type { %struct.NSView, %struct.NSArray*, %struct.IBCFMutableDictionary*, %struct.IBInspectorMode*, %struct.IBInspectorMode*, %struct.IBInspectorMode*, %struct.objc_selector*, %struct.NSObject* }
-	%struct.NSArray = type { %struct.NSObject }
-	%struct.NSImage = type { %struct.NSObject, %struct.NSArray*, %struct.CGPoint, %struct.__imageFlags, %struct.NSObject*, %struct._NSImageAuxiliary* }
-	%struct.NSMutableArray = type { %struct.NSArray }
-	%struct.NSObject = type { %struct.objc_class* }
-	%struct.NSRect = type { %struct.CGPoint, %struct.CGPoint }
-	%struct.NSResponder = type { %struct.NSObject, %struct.NSObject* }
-	%struct.NSSortDescriptor = type { %struct.NSObject, i64, %struct.NSArray*, %struct.objc_selector*, %struct.NSObject* }
-	%struct.NSURL = type { %struct.NSObject, %struct.NSArray*, %struct.NSURL*, i8*, i8* }
-	%struct.NSView = type { %struct.NSResponder, %struct.NSRect, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct._NSViewAuxiliary*, %struct._VFlags, %struct.__VFlags2 }
-	%struct.NSWindow = type { %struct.NSResponder, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSResponder*, %struct.NSView*, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, i32, i64, i32, %struct.NSArray*, %struct.NSObject*, i8, i8, i8, i8, i8*, i8*, %struct.NSImage*, i32, %struct.NSMutableArray*, %struct.NSURL*, %struct.CGPoint*, %struct.NSArray*, %struct.NSArray*, %struct.__wFlags, %struct.NSObject*, %struct.NSView*, %struct.NSWindowAuxiliary* }
-	%struct.NSWindowAuxiliary = type opaque
-	%struct._NSImageAuxiliary = type opaque
-	%struct._NSViewAuxiliary = type opaque
-	%struct._VFlags = type <{ i8, i8, i8, i8 }>
-	%struct.__CFDictionary = type opaque
-	%struct.__VFlags2 = type <{ i32 }>
-	%struct.__imageFlags = type <{ i8, [3 x i8] }>
-	%struct.__wFlags = type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>
-	%struct.objc_class = type opaque
-	%struct.objc_selector = type opaque
-
-define %struct.NSArray* @"\01-[IBInspectorModeView calculateModeRects]"(%struct.IBInspectorModeView* %self, %struct.objc_selector* %_cmd) optsize ssp {
-entry:
-	br i1 false, label %bb7, label %bb
-
-bb:		; preds = %entry
-	br i1 false, label %bb.nph, label %bb7.loopexit
-
-bb.nph:		; preds = %bb
-	br label %bb1
-
-bb1:		; preds = %bb6, %bb.nph
-	%midx.01 = phi i64 [ %3, %bb6 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
-	%0 = icmp sge i64 %midx.01, 0		; <i1> [#uses=1]
-	%1 = icmp sle i64 %midx.01, 0		; <i1> [#uses=1]
-	%2 = and i1 %0, %1		; <i1> [#uses=1]
-	br i1 %2, label %bb4, label %bb5
-
-bb4:		; preds = %bb1
-	br label %bb5
-
-bb5:		; preds = %bb4, %bb1
-	%modeWidth.0 = phi double [ 0.000000e+00, %bb1 ], [ 0.000000e+00, %bb4 ]		; <double> [#uses=0]
-	%3 = add i64 %midx.01, 1		; <i64> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb5
-	%4 = icmp slt i64 0, 0		; <i1> [#uses=1]
-	br i1 %4, label %bb1, label %bb6.bb7.loopexit_crit_edge
-
-bb6.bb7.loopexit_crit_edge:		; preds = %bb6
-	br label %bb7.loopexit
-
-bb7.loopexit:		; preds = %bb6.bb7.loopexit_crit_edge, %bb
-	br label %bb7
-
-bb7:		; preds = %bb7.loopexit, %entry
-	ret %struct.NSArray* null
-}
diff --git a/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll b/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll
deleted file mode 100644
index deef94128973..000000000000
--- a/test/Transforms/LoopIndexSplit/2009-03-30-undef.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -loop-index-split -S | not grep undef
-define i32 @main() {
-entry:
-	br label %header
-
-header:
-	%r = phi i32 [ 0, %entry ], [ %r3, %skip ]
-	%i = phi i32 [ 0, %entry ], [ %i1, %skip ]
-        %i99 = add i32 %i, 99
-	%cond = icmp eq i32 %i99, 3
-        br i1 %cond, label %body, label %skip
-
-body:
-        br label %skip
-
-skip:
-        %r3 = phi i32 [ %r, %header ], [ 3, %body ]
-        %i1 = add i32 %i, 1
-        %exitcond = icmp eq i32 %i1, 10
-        br i1 %exitcond, label %exit, label %header
-
-exit:
-        ret i32 %r3
-}
diff --git a/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll b/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll
deleted file mode 100644
index ad2b794218cc..000000000000
--- a/test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output 
-
-        %struct._edit_script = type { %struct._edit_script*, i32, i8 }
-
-define void @align_path(i8* %seq1, i8* %seq2, i32 %i1, i32 %j1, i32 %i2, i32 %j2, i32 %dist, %struct._edit_script** %head, %struct._edit_script** %tail, i32 %M, i32 %N) {
-entry:
-        br label %bb354
-
-bb354:          ; preds = %bb511, %entry
-        br i1 false, label %bb495, label %bb368
-
-bb368:          ; preds = %bb354
-        ret void
-
-bb495:          ; preds = %bb495, %bb354
-        br i1 false, label %bb511, label %bb495
-
-bb511:          ; preds = %bb495
-        br i1 false, label %xmalloc.exit69, label %bb354
-
-xmalloc.exit69:         ; preds = %bb511
-        br i1 false, label %bb556, label %bb542.preheader
-
-bb542.preheader:                ; preds = %xmalloc.exit69
-        ret void
-
-bb556:          ; preds = %xmalloc.exit69
-        br label %bb583
-
-bb583:          ; preds = %cond_next693, %bb556
-        %k.4342.0 = phi i32 [ %tmp707, %cond_next693 ], [ 0, %bb556 ]           ; <i32> [#uses=2]
-        %tmp586 = icmp eq i32 %k.4342.0, 0              ; <i1> [#uses=1]
-        br i1 %tmp586, label %cond_true589, label %cond_false608
-
-cond_true589:           ; preds = %bb583
-        br label %cond_next693
-
-cond_false608:          ; preds = %bb583
-        br i1 false, label %cond_next661, label %cond_next693
-
-cond_next661:           ; preds = %cond_false608
-        br label %cond_next693
-
-cond_next693:           ; preds = %cond_next661, %cond_false608, %cond_true589
-        %tmp705 = getelementptr i32* null, i32 0                ; <i32*> [#uses=0]
-        %tmp707 = add i32 %k.4342.0, 1          ; <i32> [#uses=2]
-        %tmp711 = icmp sgt i32 %tmp707, 0               ; <i1> [#uses=1]
-        br i1 %tmp711, label %bb726.preheader, label %bb583
-
-bb726.preheader:                ; preds = %cond_next693
-        ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll b/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll
deleted file mode 100644
index 187484ad0bd0..000000000000
--- a/test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output 
-; PR1828.bc
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-	%RPyOpaque_RuntimeTypeInfo = type opaque*
-	%arraytype_Char_1 = type { i32, [0 x i8] }
-	%arraytype_Signed = type { i32, [0 x i32] }
-	%functiontype_11 = type %structtype_object* ()
-	%functiontype_360 = type %structtype_rpy_string* (%structtype_pypy.rlib.rbigint.rbigint*, %structtype_rpy_string*, %structtype_rpy_string*, %structtype_rpy_string*)
-	%structtype_list_18 = type { i32, %arraytype_Signed* }
-	%structtype_object = type { %structtype_object_vtable* }
-	%structtype_object_vtable = type { i32, i32, %RPyOpaque_RuntimeTypeInfo*, %arraytype_Char_1*, %functiontype_11* }
-	%structtype_pypy.rlib.rbigint.rbigint = type { %structtype_object, %structtype_list_18*, i32 }
-	%structtype_rpy_string = type { i32, %arraytype_Char_1 }
-
-define fastcc %structtype_rpy_string* @pypy__format(%structtype_pypy.rlib.rbigint.rbigint* %a_1, %structtype_rpy_string* %digits_0, %structtype_rpy_string* %prefix_3, %structtype_rpy_string* %suffix_0) {
-block0:
-	br i1 false, label %block67, label %block13
-
-block13:		; preds = %block0
-	ret %structtype_rpy_string* null
-
-block31:		; preds = %block67, %block44
-	ret %structtype_rpy_string* null
-
-block42:		; preds = %block67, %block44
-	%j_167.reg2mem.0 = phi i32 [ %v63822, %block44 ], [ 0, %block67 ]		; <i32> [#uses=1]
-	%v63822 = add i32 %j_167.reg2mem.0, -1		; <i32> [#uses=3]
-	%v63823 = icmp slt i32 %v63822, 0		; <i1> [#uses=1]
-	br i1 %v63823, label %block46, label %block43
-
-block43:		; preds = %block42
-	br label %block44
-
-block44:		; preds = %block46, %block43
-	%v6377959 = icmp sgt i32 %v63822, 0		; <i1> [#uses=1]
-	br i1 %v6377959, label %block42, label %block31
-
-block46:		; preds = %block42
-	br label %block44
-
-block67:		; preds = %block0
-	br i1 false, label %block42, label %block31
-}
diff --git a/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll b/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll
deleted file mode 100644
index 098e407a3300..000000000000
--- a/test/Transforms/LoopIndexSplit/Crash2-2007-08-17.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output 
-
-        %struct._edit_script = type { %struct._edit_script*, i32, i8 }
-
-define void @align_path(i8* %seq1, i8* %seq2, i32 %i1, i32 %j1, i32 %i2, i32 %j2, i32 %dist, %struct._edit_script** %head, %struct._edit_script** %tail, i32 %M, i32 %N) {
-entry:
-        br label %bb354
-
-bb354:          ; preds = %bb511, %entry
-        br i1 false, label %bb495, label %bb368
-
-bb368:          ; preds = %bb354
-        ret void
-
-bb495:          ; preds = %bb495, %bb354
-        br i1 false, label %bb511, label %bb495
-
-bb511:          ; preds = %bb495
-        br i1 false, label %xmalloc.exit69, label %bb354
-
-xmalloc.exit69:         ; preds = %bb511
-        br i1 false, label %bb556, label %bb542.preheader
-
-bb542.preheader:                ; preds = %xmalloc.exit69
-        ret void
-
-bb556:          ; preds = %xmalloc.exit69
-        br label %bb583
-
-bb583:          ; preds = %cond_next693, %bb556
-        %k.4342.0 = phi i32 [ %tmp707, %cond_next693 ], [ 0, %bb556 ]           ; <i32> [#uses=2]
-        %tmp586 = icmp eq i32 %k.4342.0, 0              ; <i1> [#uses=1]
-        br i1 %tmp586, label %cond_true589, label %cond_false608
-
-cond_true589:           ; preds = %bb583
-        br label %cond_next693
-
-cond_false608:          ; preds = %bb583
-        br i1 false, label %bb645, label %cond_next693
-
-bb645:          ; preds = %cond_false608
-        br i1 false, label %bb684, label %cond_next661
-
-cond_next661:           ; preds = %bb645
-        br i1 false, label %bb684, label %cond_next693
-
-bb684:          ; preds = %cond_next661, %bb645
-        br label %cond_next693
-
-cond_next693:           ; preds = %bb684, %cond_next661, %cond_false608, %cond_true589
-        %tmp705 = getelementptr i32* null, i32 0                ; <i32*> [#uses=0]
-        %tmp707 = add i32 %k.4342.0, 1          ; <i32> [#uses=2]
-        %tmp711 = icmp sgt i32 %tmp707, 0               ; <i1> [#uses=1]
-        br i1 %tmp711, label %bb726.preheader, label %bb583
-
-bb726.preheader:                ; preds = %cond_next693
-        ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll b/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll
deleted file mode 100644
index a04715a7e952..000000000000
--- a/test/Transforms/LoopIndexSplit/ExitCondition-2007-09-10.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt < %s -loop-index-split -disable-output 
-
-@k = external global i32		; <i32*> [#uses=2]
-
-define void @foobar(i32 %a, i32 %b) {
-entry:
-	br label %bb
-
-bb:		; preds = %cond_next16, %entry
-	%i.01.0 = phi i32 [ 0, %entry ], [ %tmp18, %cond_next16 ]		; <i32> [#uses=5]
-	%tsum.18.0 = phi i32 [ 42, %entry ], [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=3]
-	%tmp1 = icmp slt i32 %i.01.0, 50		; <i1> [#uses=1]
-	br i1 %tmp1, label %cond_true, label %cond_false
-
-cond_true:		; preds = %bb
-	%tmp4 = tail call i32 @foo( i32 %i.01.0 )		; <i32> [#uses=1]
-	%tmp6 = add i32 %tmp4, %tsum.18.0		; <i32> [#uses=2]
-	%tmp914 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp1015 = icmp eq i32 %tmp914, 0		; <i1> [#uses=1]
-	br i1 %tmp1015, label %cond_next16, label %cond_true13
-
-cond_false:		; preds = %bb
-	%tmp8 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	%tmp9 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp10 = icmp eq i32 %tmp9, 0		; <i1> [#uses=1]
-	br i1 %tmp10, label %cond_next16, label %cond_true13
-
-cond_true13:		; preds = %cond_false, %cond_true
-	%tsum.013.0 = phi i32 [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=1]
-	%tmp15 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	br label %cond_next16
-
-cond_next16:		; preds = %cond_false, %cond_true, %cond_true13
-	%tsum.013.1 = phi i32 [ %tsum.013.0, %cond_true13 ], [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=2]
-	%tmp18 = add i32 %i.01.0, 1		; <i32> [#uses=3]
-	%tmp21 = icmp eq i32 %tmp18, 100		; <i1> [#uses=1]
-	br i1 %tmp21, label %bb, label %bb24
-
-bb24:		; preds = %cond_next16
-	%tmp18.lcssa = phi i32 [ %tmp18, %cond_next16 ]		; <i32> [#uses=1]
-	%tsum.013.1.lcssa = phi i32 [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=1]
-	%tmp27 = tail call i32 @t( i32 %tmp18.lcssa, i32 %tsum.013.1.lcssa )		; <i32> [#uses=0]
-	ret void
-}
-
-declare i32 @foo(i32)
-
-declare i32 @bar(i32)
-
-declare i32 @t(i32, i32)
diff --git a/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll b/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll
deleted file mode 100644
index d18b3b71aeda..000000000000
--- a/test/Transforms/LoopIndexSplit/OneIterLoop-2007-08-17.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; Loop is elimianted
-; RUN: opt < %s -loop-index-split -disable-output -stats |& \
-; RUN: grep "loop-index-split" | count 1
-	%struct.anon = type { i32 }
-@S1 = external global i32		; <i32*> [#uses=1]
-@W1 = external global i32		; <i32*> [#uses=1]
-@Y = weak global [100 x %struct.anon] zeroinitializer, align 32		; <[100 x %struct.anon]*> [#uses=1]
-@ti = external global i32		; <i32*> [#uses=1]
-@T2 = external global [100 x [100 x i32]]		; <[100 x [100 x i32]]*> [#uses=1]
-@d = external global i32		; <i32*> [#uses=1]
-@T1 = external global i32		; <i32*> [#uses=2]
-@N2 = external global i32		; <i32*> [#uses=2]
-
-define void @foo() {
-entry:
-	%tmp = load i32* @S1, align 4		; <i32> [#uses=4]
-	%tmp266 = load i32* @N2, align 4		; <i32> [#uses=1]
-	%tmp288 = icmp ult i32 %tmp, %tmp266		; <i1> [#uses=1]
-	br i1 %tmp288, label %bb.preheader, label %return
-
-bb.preheader:		; preds = %entry
-	%tmp1 = load i32* @W1, align 4		; <i32> [#uses=1]
-	%tmp13 = load i32* @ti, align 4		; <i32> [#uses=1]
-	%tmp18 = load i32* @d, align 4		; <i32> [#uses=1]
-	%tmp26 = load i32* @N2, align 4		; <i32> [#uses=2]
-	%T1.promoted = load i32* @T1		; <i32> [#uses=1]
-	%tmp2 = add i32 %tmp, 1		; <i32> [#uses=2]
-	%tmp4 = icmp ugt i32 %tmp2, %tmp26		; <i1> [#uses=1]
-	%umax = select i1 %tmp4, i32 %tmp2, i32 %tmp26		; <i32> [#uses=1]
-	%tmp5 = sub i32 0, %tmp		; <i32> [#uses=1]
-	%tmp6 = add i32 %umax, %tmp5		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb25, %bb.preheader
-	%indvar = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
-	%T1.tmp.1 = phi i32 [ %T1.promoted, %bb.preheader ], [ %T1.tmp.0, %bb25 ]		; <i32> [#uses=3]
-	%tj.01.0 = add i32 %indvar, %tmp		; <i32> [#uses=3]
-	%tmp3 = icmp eq i32 %tj.01.0, %tmp1		; <i1> [#uses=1]
-	br i1 %tmp3, label %cond_true, label %bb25
-
-cond_true:		; preds = %bb
-	%tmp7 = getelementptr [100 x %struct.anon]* @Y, i32 0, i32 %tj.01.0, i32 0		; <i32*> [#uses=1]
-	%tmp8 = load i32* %tmp7, align 4		; <i32> [#uses=1]
-	%tmp9 = icmp sgt i32 %tmp8, 0		; <i1> [#uses=1]
-	br i1 %tmp9, label %cond_true12, label %bb25
-
-cond_true12:		; preds = %cond_true
-	%tmp16 = getelementptr [100 x [100 x i32]]* @T2, i32 0, i32 %tmp13, i32 %tj.01.0		; <i32*> [#uses=1]
-	%tmp17 = load i32* %tmp16, align 4		; <i32> [#uses=1]
-	%tmp19 = mul i32 %tmp18, %tmp17		; <i32> [#uses=1]
-	%tmp21 = add i32 %tmp19, %T1.tmp.1		; <i32> [#uses=1]
-	br label %bb25
-
-bb25:		; preds = %cond_true12, %cond_true, %bb
-	%T1.tmp.0 = phi i32 [ %T1.tmp.1, %bb ], [ %T1.tmp.1, %cond_true ], [ %tmp21, %cond_true12 ]		; <i32> [#uses=2]
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %tmp6		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb, label %return.loopexit
-
-return.loopexit:		; preds = %bb25
-	%T1.tmp.0.lcssa = phi i32 [ %T1.tmp.0, %bb25 ]		; <i32> [#uses=1]
-	store i32 %T1.tmp.0.lcssa, i32* @T1
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll b/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll
deleted file mode 100644
index ff73a5b44b3e..000000000000
--- a/test/Transforms/LoopIndexSplit/OneIterLoop2-2007-08-17.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; Loop is elimianted. Save last value assignment.
-; RUN: opt < %s -loop-index-split -disable-output -stats |& \
-; RUN: grep "loop-index-split" | count 1
-
-	%struct.anon = type { i32 }
-@S1 = external global i32		; <i32*> [#uses=1]
-@W1 = external global i32		; <i32*> [#uses=1]
-@Y = weak global [100 x %struct.anon] zeroinitializer, align 32		; <[100 x %struct.anon]*> [#uses=1]
-@ti = external global i32		; <i32*> [#uses=1]
-@T2 = external global [100 x [100 x i32]]		; <[100 x [100 x i32]]*> [#uses=1]
-@d = external global i32		; <i32*> [#uses=1]
-@T1 = external global i32		; <i32*> [#uses=2]
-@N1 = external global i32		; <i32*> [#uses=2]
-
-define void @foo() {
-entry:
-	%tmp = load i32* @S1, align 4		; <i32> [#uses=4]
-	%tmp266 = load i32* @N1, align 4		; <i32> [#uses=1]
-	%tmp288 = icmp ult i32 %tmp, %tmp266		; <i1> [#uses=1]
-	br i1 %tmp288, label %bb.preheader, label %return
-
-bb.preheader:		; preds = %entry
-	%tmp1 = load i32* @W1, align 4		; <i32> [#uses=1]
-	%tmp13 = load i32* @ti, align 4		; <i32> [#uses=1]
-	%tmp18 = load i32* @d, align 4		; <i32> [#uses=1]
-	%tmp26 = load i32* @N1, align 4		; <i32> [#uses=2]
-	%T1.promoted = load i32* @T1		; <i32> [#uses=1]
-	%tmp2 = add i32 %tmp, 1		; <i32> [#uses=2]
-	%tmp4 = icmp ugt i32 %tmp2, %tmp26		; <i1> [#uses=1]
-	%umax = select i1 %tmp4, i32 %tmp2, i32 %tmp26		; <i32> [#uses=1]
-	%tmp5 = sub i32 0, %tmp		; <i32> [#uses=1]
-	%tmp6 = add i32 %umax, %tmp5		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb25, %bb.preheader
-	%indvar = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
-	%T1.tmp.1 = phi i32 [ %T1.promoted, %bb.preheader ], [ %T1.tmp.0, %bb25 ]		; <i32> [#uses=3]
-	%tj.01.0 = add i32 %indvar, %tmp		; <i32> [#uses=3]
-	%tmp24 = add i32 %tj.01.0, 1		; <i32> [#uses=1]
-	%tmp3 = icmp eq i32 %tmp24, %tmp1		; <i1> [#uses=1]
-	br i1 %tmp3, label %cond_true, label %bb25
-
-cond_true:		; preds = %bb
-	%tmp7 = getelementptr [100 x %struct.anon]* @Y, i32 0, i32 %tj.01.0, i32 0		; <i32*> [#uses=1]
-	%tmp8 = load i32* %tmp7, align 4		; <i32> [#uses=1]
-	%tmp9 = icmp sgt i32 %tmp8, 0		; <i1> [#uses=1]
-	br i1 %tmp9, label %cond_true12, label %bb25
-
-cond_true12:		; preds = %cond_true
-	%tmp16 = getelementptr [100 x [100 x i32]]* @T2, i32 0, i32 %tmp13, i32 %tj.01.0		; <i32*> [#uses=1]
-	%tmp17 = load i32* %tmp16, align 4		; <i32> [#uses=1]
-	%tmp19 = mul i32 %tmp18, %tmp17		; <i32> [#uses=1]
-	%tmp21 = add i32 %tmp19, %T1.tmp.1		; <i32> [#uses=1]
-	br label %bb25
-
-bb25:		; preds = %cond_true12, %cond_true, %bb
-	%T1.tmp.0 = phi i32 [ %T1.tmp.1, %bb ], [ %T1.tmp.1, %cond_true ], [ %tmp21, %cond_true12 ]		; <i32> [#uses=2]
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %tmp6		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb, label %return.loopexit
-
-return.loopexit:		; preds = %bb25
-	%T1.tmp.0.lcssa = phi i32 [ %T1.tmp.0, %bb25 ]		; <i32> [#uses=1]
-	store i32 %T1.tmp.0.lcssa, i32* @T1
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
diff --git a/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll b/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll
deleted file mode 100644
index 6adb26877680..000000000000
--- a/test/Transforms/LoopIndexSplit/OneIterLoop3-2007-08-17.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Loop is elimianted. Save last value assignments, including induction variable.
-; RUN: opt < %s -loop-index-split -disable-output -stats | not grep "loop-index-split"
-
-declare i32 @foo(i32)
-declare i32 @bar(i32, i32)
-
-define void @foobar(i32 %a, i32 %b) {
-entry:
-	br label %bb
-
-bb:		; preds = %cond_next, %entry
-	%i.01.0 = phi i32 [ 0, %entry ], [ %tmp8, %cond_next ]		; <i32> [#uses=3]
-	%tsum.16.0 = phi i32 [ 42, %entry ], [ %tsum.0, %cond_next ]		; <i32> [#uses=2]
-	%tmp1 = icmp eq i32 %i.01.0, 50		; <i1> [#uses=1]
-	br i1 %tmp1, label %cond_true, label %cond_next
-
-cond_true:		; preds = %bb
-	%tmp4 = tail call i32 @foo( i32 %i.01.0 )		; <i32> [#uses=1]
-	%tmp6 = add i32 %tmp4, %tsum.16.0		; <i32> [#uses=1]
-	br label %cond_next
-
-cond_next:		; preds = %bb, %cond_true
-	%tsum.0 = phi i32 [ %tmp6, %cond_true ], [ %tsum.16.0, %bb ]		; <i32> [#uses=2]
-	%tmp8 = add i32 %i.01.0, 1		; <i32> [#uses=3]
-	%tmp11 = icmp slt i32 %tmp8, 100		; <i1> [#uses=1]
-	br i1 %tmp11, label %bb, label %bb14
-
-bb14:		; preds = %cond_next
-	%tmp8.lcssa = phi i32 [ %tmp8, %cond_next ]		; <i32> [#uses=1]
-	%tsum.0.lcssa = phi i32 [ %tsum.0, %cond_next ]		; <i32> [#uses=1]
-	%tmp17 = tail call i32 @bar( i32 %tmp8.lcssa, i32 %tsum.0.lcssa )		; <i32> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/LoopIndexSplit/PR3913.ll b/test/Transforms/LoopIndexSplit/PR3913.ll
deleted file mode 100644
index a2bf57c05162..000000000000
--- a/test/Transforms/LoopIndexSplit/PR3913.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -loop-index-split -S | not grep "icmp ne"
-
-define i32 @main() {
-entry:
-	br label %header
-
-header:
-	%r = phi i32 [ 0, %entry ], [ %r3, %skip ]
-	%i = phi i32 [ 0, %entry ], [ %i1, %skip ]
-	%cond = icmp eq i32 %i, 99
-	br i1 %cond, label %body, label %skip
-
-body:
-	br label %skip
-
-skip:
-	%r3 = phi i32 [ %r, %header ], [ 3, %body ]
-	%i1 = add i32 %i, 1
-	%exitcond = icmp eq i32 %i1, 10
-	br i1 %exitcond, label %exit, label %header
-
-exit:
-	ret i32 %r3
-}
diff --git a/test/Transforms/LoopIndexSplit/PR4174-2.ll b/test/Transforms/LoopIndexSplit/PR4174-2.ll
deleted file mode 100644
index cc17bc0a9337..000000000000
--- a/test/Transforms/LoopIndexSplit/PR4174-2.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep clone
-
-declare void @f()
-
-define fastcc i32 @main() nounwind {
-entry:
-        br label %bb1552
-
-bb1552:
-        %j295.0.reg2mem.0 = phi i32 [ %storemerge110, %bb1669 ], [ 0, %entry ]
-        br label %bb1553
-
-bb1553:
-        call void @f()
-        %tmp1628 = icmp sgt i32 %j295.0.reg2mem.0, -3
-        br i1 %tmp1628, label %bb1588, label %bb1616
-
-bb1588:
-        br label %bb1616
-
-bb1616:
-        %tmp1629 = icmp sgt i32 %j295.0.reg2mem.0, -3
-        br i1 %tmp1629, label %bb1649, label %bb1632
-
-bb1632:
-        br label %bb1669
-
-bb1649:
-        br label %bb1669
-
-bb1669:
-        %storemerge110 = add i32 %j295.0.reg2mem.0, 1
-        %tmp1672 = icmp sgt i32 %storemerge110, 3
-        br i1 %tmp1672, label %bb1678, label %bb1552
-
-bb1678:
-        ret i32 0
-}
diff --git a/test/Transforms/LoopIndexSplit/PR4174.ll b/test/Transforms/LoopIndexSplit/PR4174.ll
deleted file mode 100644
index e8f5a737f05b..000000000000
--- a/test/Transforms/LoopIndexSplit/PR4174.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as < %s | opt -loop-index-split | llvm-dis | not grep clone
-
-declare void @f()
-
-define i32 @main() {
-entry:
-        br label %head
-head:
-        %i = phi i32 [0, %entry], [%i1, %tail]
-        call void @f()
-        %splitcond = icmp slt i32 %i, 2
-        br i1 %splitcond, label %yes, label %no
-yes:
-        br label %tail
-no:
-        br label %tail
-tail:
-        %i1 = add i32 %i, 1
-        %exitcond = icmp slt i32 %i1, 4
-        br i1 %exitcond, label %head, label %exit
-exit:
-        ret i32 0
-}
diff --git a/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll b/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll
deleted file mode 100644
index fc7d9e9862cb..000000000000
--- a/test/Transforms/LoopIndexSplit/SaveLastValue-2007-08-17.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; Split loop. Save last value.
-; RUN: opt < %s -loop-index-split -disable-output -stats |& \
-; RUN: grep "loop-index-split" | count 1
-
-@k = external global i32		; <i32*> [#uses=2]
-
-define void @foobar(i32 %a, i32 %b) {
-entry:
-	br label %bb
-
-bb:		; preds = %cond_next16, %entry
-	%i.01.0 = phi i32 [ 0, %entry ], [ %tmp18, %cond_next16 ]		; <i32> [#uses=5]
-	%tsum.18.0 = phi i32 [ 42, %entry ], [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=3]
-	%tmp1 = icmp slt i32 %i.01.0, 50		; <i1> [#uses=1]
-	br i1 %tmp1, label %cond_true, label %cond_false
-
-cond_true:		; preds = %bb
-	%tmp4 = tail call i32 @foo( i32 %i.01.0 )		; <i32> [#uses=1]
-	%tmp6 = add i32 %tmp4, %tsum.18.0		; <i32> [#uses=2]
-	%tmp914 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp1015 = icmp eq i32 %tmp914, 0		; <i1> [#uses=1]
-	br i1 %tmp1015, label %cond_next16, label %cond_true13
-
-cond_false:		; preds = %bb
-	%tmp8 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	%tmp9 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp10 = icmp eq i32 %tmp9, 0		; <i1> [#uses=1]
-	br i1 %tmp10, label %cond_next16, label %cond_true13
-
-cond_true13:		; preds = %cond_false, %cond_true
-	%tsum.013.0 = phi i32 [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=1]
-	%tmp15 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	br label %cond_next16
-
-cond_next16:		; preds = %cond_false, %cond_true, %cond_true13
-	%tsum.013.1 = phi i32 [ %tsum.013.0, %cond_true13 ], [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=2]
-	%tmp18 = add i32 %i.01.0, 1		; <i32> [#uses=3]
-	%tmp21 = icmp slt i32 %tmp18, 100		; <i1> [#uses=1]
-	br i1 %tmp21, label %bb, label %bb24
-
-bb24:		; preds = %cond_next16
-	%tmp18.lcssa = phi i32 [ %tmp18, %cond_next16 ]		; <i32> [#uses=1]
-	%tsum.013.1.lcssa = phi i32 [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=1]
-	%tmp27 = tail call i32 @t( i32 %tmp18.lcssa, i32 %tsum.013.1.lcssa )		; <i32> [#uses=0]
-	ret void
-}
-
-declare i32 @foo(i32)
-
-declare i32 @bar(i32)
-
-declare i32 @t(i32, i32)
diff --git a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll b/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll
deleted file mode 100644
index f61d9671409f..000000000000
--- a/test/Transforms/LoopIndexSplit/SplitValue-2007-08-24.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; Split loop. Save last value. Split value is off by one in this example.
-; RUN: opt < %s -loop-index-split -disable-output -stats |& \
-; RUN: grep "loop-index-split" | count 1
-
-@k = external global i32		; <i32*> [#uses=2]
-
-define void @foobar(i32 %a, i32 %b) {
-entry:
-	br label %bb
-
-bb:		; preds = %cond_next16, %entry
-	%i.01.0 = phi i32 [ 0, %entry ], [ %tmp18, %cond_next16 ]		; <i32> [#uses=5]
-	%tsum.18.0 = phi i32 [ 42, %entry ], [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=3]
-	%tmp1 = icmp sgt i32 %i.01.0, 50		; <i1> [#uses=1]
-	br i1 %tmp1, label %cond_true, label %cond_false
-
-cond_true:		; preds = %bb
-	%tmp4 = tail call i32 @foo( i32 %i.01.0 )		; <i32> [#uses=1]
-	%tmp6 = add i32 %tmp4, %tsum.18.0		; <i32> [#uses=2]
-	%tmp914 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp1015 = icmp eq i32 %tmp914, 0		; <i1> [#uses=1]
-	br i1 %tmp1015, label %cond_next16, label %cond_true13
-
-cond_false:		; preds = %bb
-	%tmp8 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	%tmp9 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp10 = icmp eq i32 %tmp9, 0		; <i1> [#uses=1]
-	br i1 %tmp10, label %cond_next16, label %cond_true13
-
-cond_true13:		; preds = %cond_false, %cond_true
-	%tsum.013.0 = phi i32 [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=1]
-	%tmp15 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	br label %cond_next16
-
-cond_next16:		; preds = %cond_false, %cond_true, %cond_true13
-	%tsum.013.1 = phi i32 [ %tsum.013.0, %cond_true13 ], [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=2]
-	%tmp18 = add i32 %i.01.0, 1		; <i32> [#uses=3]
-	%tmp21 = icmp slt i32 %tmp18, 100		; <i1> [#uses=1]
-	br i1 %tmp21, label %bb, label %bb24
-
-bb24:		; preds = %cond_next16
-	%tmp18.lcssa = phi i32 [ %tmp18, %cond_next16 ]		; <i32> [#uses=1]
-	%tsum.013.1.lcssa = phi i32 [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=1]
-	%tmp27 = tail call i32 @t( i32 %tmp18.lcssa, i32 %tsum.013.1.lcssa )		; <i32> [#uses=0]
-	ret void
-}
-
-declare i32 @foo(i32)
-
-declare i32 @bar(i32)
-
-declare i32 @t(i32, i32)
diff --git a/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll b/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll
deleted file mode 100644
index 17f75d7509e7..000000000000
--- a/test/Transforms/LoopIndexSplit/UpperBound-2007-08-24.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; Split loop. Split value is a constant and greater then exit value. 
-; Check whether optimizer inserts proper checkfor split value or not.
-; RUN: opt < %s -loop-index-split -S | grep select
-
-@k = external global i32		; <i32*> [#uses=2]
-
-define void @foobar(i32 %a, i32 %b) {
-entry:
-	br label %bb
-
-bb:		; preds = %cond_next16, %entry
-	%i.01.0 = phi i32 [ 0, %entry ], [ %tmp18, %cond_next16 ]		; <i32> [#uses=5]
-	%tsum.18.0 = phi i32 [ 42, %entry ], [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=3]
-	%tmp1 = icmp slt i32 %i.01.0, 500		; <i1> [#uses=1]
-	br i1 %tmp1, label %cond_true, label %cond_false
-
-cond_true:		; preds = %bb
-	%tmp4 = tail call i32 @foo( i32 %i.01.0 )		; <i32> [#uses=1]
-	%tmp6 = add i32 %tmp4, %tsum.18.0		; <i32> [#uses=2]
-	%tmp914 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp1015 = icmp eq i32 %tmp914, 0		; <i1> [#uses=1]
-	br i1 %tmp1015, label %cond_next16, label %cond_true13
-
-cond_false:		; preds = %bb
-	%tmp8 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	%tmp9 = load i32* @k, align 4		; <i32> [#uses=1]
-	%tmp10 = icmp eq i32 %tmp9, 0		; <i1> [#uses=1]
-	br i1 %tmp10, label %cond_next16, label %cond_true13
-
-cond_true13:		; preds = %cond_false, %cond_true
-	%tsum.013.0 = phi i32 [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=1]
-	%tmp15 = tail call i32 @bar( i32 %i.01.0 )		; <i32> [#uses=0]
-	br label %cond_next16
-
-cond_next16:		; preds = %cond_false, %cond_true, %cond_true13
-	%tsum.013.1 = phi i32 [ %tsum.013.0, %cond_true13 ], [ %tmp6, %cond_true ], [ %tsum.18.0, %cond_false ]		; <i32> [#uses=2]
-	%tmp18 = add i32 %i.01.0, 1		; <i32> [#uses=3]
-	%tmp21 = icmp slt i32 %tmp18, 100		; <i1> [#uses=1]
-	br i1 %tmp21, label %bb, label %bb24
-
-bb24:		; preds = %cond_next16
-	%tmp18.lcssa = phi i32 [ %tmp18, %cond_next16 ]		; <i32> [#uses=1]
-	%tsum.013.1.lcssa = phi i32 [ %tsum.013.1, %cond_next16 ]		; <i32> [#uses=1]
-	%tmp27 = tail call i32 @t( i32 %tmp18.lcssa, i32 %tsum.013.1.lcssa )		; <i32> [#uses=0]
-	ret void
-}
-
-declare i32 @foo(i32)
-
-declare i32 @bar(i32)
-
-declare i32 @t(i32, i32)
diff --git a/test/Transforms/LoopIndexSplit/dg.exp b/test/Transforms/LoopIndexSplit/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopIndexSplit/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll b/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll
deleted file mode 100644
index 6eed98177d09..000000000000
--- a/test/Transforms/LoopIndexSplit/non-iv-cmp-operand.ll
+++ /dev/null
@@ -1,195 +0,0 @@
-; RUN: opt < %s -inline -reassociate -loop-rotate -loop-index-split -indvars -simplifycfg -verify
-; PR4471
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
-	%struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
-	%struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
-	%struct.VEC_edge_gc = type { %struct.VEC_edge_base }
-	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
-	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
-	%struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
-	%struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
-	%struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
-	%struct.VEC_tree_gc = type { %struct.VEC_tree_base }
-	%struct.__sbuf = type { i8*, i32 }
-	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
-	%struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
-	%struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
-	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
-	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
-	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
-	%struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
-	%struct.case_node = type { %struct.case_node*, %struct.case_node*, %struct.case_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
-	%struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
-	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }
-	%struct.edge_def_insns = type { %struct.rtx_def* }
-	%struct.edge_prediction = type opaque
-	%struct.eh_status = type opaque
-	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }
-	%struct.et_node = type opaque
-	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
-	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, %struct.__sbuf, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8 }
-	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
-	%struct.initial_value_struct = type opaque
-	%struct.lang_decl = type opaque
-	%struct.language_function = type opaque
-	%struct.loop = type opaque
-	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, [4 x i32], i32, i32, i32 }
-	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
-	%struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
-	%struct.omp_clause_subcode = type { i32 }
-	%struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
-	%struct.rtunion = type { i8* }
-	%struct.rtx_def = type { i16, i8, i8, %struct.u }
-	%struct.section = type { %struct.unnamed_section }
-	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
-	%struct.stack_local_entry = type opaque
-	%struct.temp_slot = type opaque
-	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8 }
-	%struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
-	%struct.tree_decl_minimal = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_decl_u1 = type { i64 }
-	%struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def* }
-	%struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
-	%struct.tree_function_decl = type { %struct.tree_decl_non_common, i8, i8, %struct.function* }
-	%struct.tree_node = type { %struct.tree_function_decl }
-	%struct.u = type { %struct.block_symbol }
-	%struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
-	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
-	%struct.varasm_status = type opaque
-	%union.tree_ann_d = type opaque
-
-define void @emit_case_bit_tests(%struct.tree_node* %index_type, %struct.tree_node* %index_expr, %struct.tree_node* %minval, %struct.tree_node* %range, %struct.case_node* %nodes, %struct.rtx_def* %default_label) nounwind {
-entry:
-	br label %bb17
-
-bb:		; preds = %bb17
-	%0 = call i64 @tree_low_cst(%struct.tree_node* undef, i32 1) nounwind		; <i64> [#uses=1]
-	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
-	br label %bb15
-
-bb10:		; preds = %bb15
-	%2 = icmp ugt i32 %j.0, 63		; <i1> [#uses=1]
-	br i1 %2, label %bb11, label %bb12
-
-bb11:		; preds = %bb10
-	%3 = zext i32 0 to i64		; <i64> [#uses=0]
-	br label %bb14
-
-bb12:		; preds = %bb10
-	%4 = or i64 undef, undef		; <i64> [#uses=0]
-	br label %bb14
-
-bb14:		; preds = %bb12, %bb11
-	%5 = add i32 %j.0, 1		; <i32> [#uses=1]
-	br label %bb15
-
-bb15:		; preds = %bb14, %bb
-	%j.0 = phi i32 [ %1, %bb ], [ %5, %bb14 ]		; <i32> [#uses=3]
-	%6 = icmp ugt i32 %j.0, undef		; <i1> [#uses=1]
-	br i1 %6, label %bb16, label %bb10
-
-bb16:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb16, %entry
-	br i1 undef, label %bb18, label %bb
-
-bb18:		; preds = %bb17
-	unreachable
-}
-
-declare i64 @tree_low_cst(%struct.tree_node*, i32)
-
-define void @expand_case(%struct.tree_node* %exp) nounwind {
-entry:
-	br i1 undef, label %bb2, label %bb
-
-bb:		; preds = %entry
-	unreachable
-
-bb2:		; preds = %entry
-	br i1 undef, label %bb3, label %bb4
-
-bb3:		; preds = %bb2
-	unreachable
-
-bb4:		; preds = %bb2
-	br i1 undef, label %bb127, label %bb5
-
-bb5:		; preds = %bb4
-	br i1 undef, label %bb6, label %bb7
-
-bb6:		; preds = %bb5
-	unreachable
-
-bb7:		; preds = %bb5
-	br i1 undef, label %bb9, label %bb8
-
-bb8:		; preds = %bb7
-	unreachable
-
-bb9:		; preds = %bb7
-	br i1 undef, label %bb11, label %bb10
-
-bb10:		; preds = %bb9
-	unreachable
-
-bb11:		; preds = %bb9
-	br i1 undef, label %bb37, label %bb21
-
-bb21:		; preds = %bb11
-	unreachable
-
-bb37:		; preds = %bb11
-	br i1 undef, label %bb38, label %bb39
-
-bb38:		; preds = %bb37
-	ret void
-
-bb39:		; preds = %bb37
-	br i1 undef, label %bb59, label %bb40
-
-bb40:		; preds = %bb39
-	br i1 undef, label %bb41, label %bb59
-
-bb41:		; preds = %bb40
-	br i1 undef, label %bb42, label %bb59
-
-bb42:		; preds = %bb41
-	br i1 undef, label %bb43, label %bb59
-
-bb43:		; preds = %bb42
-	br i1 undef, label %bb59, label %bb44
-
-bb44:		; preds = %bb43
-	br i1 undef, label %bb56, label %bb58
-
-bb56:		; preds = %bb44
-	unreachable
-
-bb58:		; preds = %bb44
-	call void @emit_case_bit_tests(%struct.tree_node* undef, %struct.tree_node* undef, %struct.tree_node* null, %struct.tree_node* undef, %struct.case_node* undef, %struct.rtx_def* undef) nounwind
-	br i1 undef, label %bb126, label %bb125
-
-bb59:		; preds = %bb43, %bb42, %bb41, %bb40, %bb39
-	br i1 undef, label %bb70, label %bb60
-
-bb60:		; preds = %bb59
-	unreachable
-
-bb70:		; preds = %bb59
-	unreachable
-
-bb125:		; preds = %bb58
-	unreachable
-
-bb126:		; preds = %bb58
-	unreachable
-
-bb127:		; preds = %bb4
-	ret void
-}
diff --git a/test/Transforms/LoopRotate/LRCrash-1.ll b/test/Transforms/LoopRotate/LRCrash-1.ll
deleted file mode 100644
index f16dd0487d23..000000000000
--- a/test/Transforms/LoopRotate/LRCrash-1.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
-
-	%struct.relation = type { [4 x i16], i32, [4 x i16], i32, i32 }
-
-define void @findAllPairs() {
-entry:
-	br i1 false, label %bb139, label %bb10.i44
-bb10.i44:		; preds = %entry
-	ret void
-bb127:		; preds = %bb139
-	br label %bb139
-bb139:		; preds = %bb127, %entry
-	br i1 false, label %bb127, label %bb142
-bb142:		; preds = %bb139
-	%r91.0.lcssa = phi %struct.relation* [ null, %bb139 ]		; <%struct.relation*> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/LoopRotate/LRCrash-2.ll b/test/Transforms/LoopRotate/LRCrash-2.ll
deleted file mode 100644
index 0a10989ae103..000000000000
--- a/test/Transforms/LoopRotate/LRCrash-2.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
-
-define void @findAllPairs() {
-entry:
-	br i1 false, label %bb139, label %cond_true
-cond_true:		; preds = %entry
-	ret void
-bb90:		; preds = %bb139
-	br i1 false, label %bb136, label %cond_next121
-cond_next121:		; preds = %bb90
-	br i1 false, label %bb136, label %bb127
-bb127:		; preds = %cond_next121
-	br label %bb136
-bb136:		; preds = %bb127, %cond_next121, %bb90
-	%changes.1 = phi i32 [ %changes.2, %bb90 ], [ %changes.2, %cond_next121 ], [ 1, %bb127 ]		; <i32> [#uses=1]
-	br label %bb139
-bb139:		; preds = %bb136, %entry
-	%changes.2 = phi i32 [ %changes.1, %bb136 ], [ 0, %entry ]		; <i32> [#uses=3]
-	br i1 false, label %bb90, label %bb142
-bb142:		; preds = %bb139
-	%changes.2.lcssa = phi i32 [ %changes.2, %bb139 ]		; <i32> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/LoopRotate/LRCrash-3.ll b/test/Transforms/LoopRotate/LRCrash-3.ll
deleted file mode 100644
index 79f21fb40df7..000000000000
--- a/test/Transforms/LoopRotate/LRCrash-3.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
-
-define void @_ZN9Classfile4readEv() {
-entry:
-	br i1 false, label %cond_false485, label %bb405
-bb405:		; preds = %entry
-	ret void
-cond_false485:		; preds = %entry
-	br label %bb830
-bb511:		; preds = %bb830
-	br i1 false, label %bb816, label %bb830
-cond_next667:		; preds = %bb816
-	br i1 false, label %cond_next695, label %bb680
-bb676:		; preds = %bb680
-	br label %bb680
-bb680:		; preds = %bb676, %cond_next667
-	%iftmp.68.0 = zext i1 false to i8		; <i8> [#uses=1]
-	br i1 false, label %bb676, label %cond_next695
-cond_next695:		; preds = %bb680, %cond_next667
-	%iftmp.68.2 = phi i8 [ %iftmp.68.0, %bb680 ], [ undef, %cond_next667 ]		; <i8> [#uses=0]
-	ret void
-bb816:		; preds = %bb816, %bb511
-	br i1 false, label %cond_next667, label %bb816
-bb830:		; preds = %bb511, %cond_false485
-	br i1 false, label %bb511, label %bb835
-bb835:		; preds = %bb830
-	ret void
-}
-
diff --git a/test/Transforms/LoopRotate/LRCrash-4.ll b/test/Transforms/LoopRotate/LRCrash-4.ll
deleted file mode 100644
index 7d35c16f337a..000000000000
--- a/test/Transforms/LoopRotate/LRCrash-4.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
-
-define void @InterpretSEIMessage(i8* %msg) {
-entry:
-	br label %bb15
-bb6:		; preds = %bb15
-	%gep.upgrd.1 = zext i32 %offset.1 to i64		; <i64> [#uses=1]
-	%tmp11 = getelementptr i8* %msg, i64 %gep.upgrd.1		; <i8*> [#uses=0]
-	br label %bb15
-bb15:		; preds = %bb6, %entry
-	%offset.1 = add i32 0, 1		; <i32> [#uses=2]
-	br i1 false, label %bb6, label %bb17
-bb17:		; preds = %bb15
-	%offset.1.lcssa = phi i32 [ %offset.1, %bb15 ]		; <i32> [#uses=0]
-	%payload_type.1.lcssa = phi i32 [ 0, %bb15 ]		; <i32> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/LoopRotate/LRCrash-5.ll b/test/Transforms/LoopRotate/LRCrash-5.ll
deleted file mode 100644
index 6643cc176c72..000000000000
--- a/test/Transforms/LoopRotate/LRCrash-5.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin9"
-	%struct.NSArray = type { %struct.NSObject }
-	%struct.NSObject = type { %struct.objc_class* }
-	%struct.NSRange = type { i64, i64 }
-	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
-	%struct.objc_class = type opaque
-	%struct.objc_selector = type opaque
-@"\01L_OBJC_MESSAGE_REF_26" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
-
-define %struct.NSArray* @"-[NSString(DocSetPrivateAddition) _dsa_stringAsPathComponent]"(%struct.NSArray* %self, %struct._message_ref_t* %_cmd) {
-entry:
-	br label %bb116
-
-bb116:		; preds = %bb131, %entry
-	%tmp123 = call %struct.NSRange null( %struct.NSObject* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_26", %struct.NSArray* null )		; <%struct.NSRange> [#uses=1]
-	br i1 false, label %bb141, label %bb131
-
-bb131:		; preds = %bb116
-	%mrv_gr125 = getresult %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
-	br label %bb116
-
-bb141:		; preds = %bb116
-	ret %struct.NSArray* null
-}
diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll
new file mode 100644
index 000000000000..b7bcb21d56f8
--- /dev/null
+++ b/test/Transforms/LoopRotate/basic.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -loop-rotate %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR5319 - The "arrayidx" gep should be hoisted, not duplicated.  We should
+; end up with one phi node.
+define void @test1() nounwind ssp {
+; CHECK: @test1
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 100
+  %arrayidx = getelementptr inbounds [20 x i32]* %array, i64 0, i64 0
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK-NEXT: phi i32 [ 0
+; CHECK-NEXT: store i32 0
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %arrayidx, align 16
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+  call void @g(i32* %arrayidx.lcssa) nounwind
+  ret void
+}
+
+declare void @g(i32*)
+
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
new file mode 100644
index 000000000000..9dc9862d150a
--- /dev/null
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -0,0 +1,139 @@
+; RUN: opt -loop-rotate %s -disable-output -verify-dom-info -verify-loop-info
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR8955 - Rotating an outer loop that has a condbr for a latch block.
+define void @test1() nounwind ssp {
+entry:
+  br label %lbl_283
+
+lbl_283:                                          ; preds = %if.end, %entry
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %lbl_283
+  br i1 undef, label %if.end, label %for.condthread-pre-split
+
+for.condthread-pre-split:                         ; preds = %if.then
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %for.condthread-pre-split
+  br i1 undef, label %lbl_281, label %for.cond
+
+lbl_281:                                          ; preds = %if.end, %for.cond
+  br label %if.end
+
+if.end:                                           ; preds = %lbl_281, %if.then
+  br i1 undef, label %lbl_283, label %lbl_281
+
+if.else:                                          ; preds = %lbl_283
+  ret void
+}
+
+        %struct.relation = type { [4 x i16], i32, [4 x i16], i32, i32 }
+
+define void @test2() {
+entry:
+        br i1 false, label %bb139, label %bb10.i44
+bb10.i44:               ; preds = %entry
+        ret void
+bb127:          ; preds = %bb139
+        br label %bb139
+bb139:          ; preds = %bb127, %entry
+        br i1 false, label %bb127, label %bb142
+bb142:          ; preds = %bb139
+        %r91.0.lcssa = phi %struct.relation* [ null, %bb139 ]           ; <%struct.relation*> [#uses=0]
+        ret void
+}
+
+
+define void @test3() {
+entry:
+	br i1 false, label %bb139, label %cond_true
+cond_true:		; preds = %entry
+	ret void
+bb90:		; preds = %bb139
+	br i1 false, label %bb136, label %cond_next121
+cond_next121:		; preds = %bb90
+	br i1 false, label %bb136, label %bb127
+bb127:		; preds = %cond_next121
+	br label %bb136
+bb136:		; preds = %bb127, %cond_next121, %bb90
+	%changes.1 = phi i32 [ %changes.2, %bb90 ], [ %changes.2, %cond_next121 ], [ 1, %bb127 ]		; <i32> [#uses=1]
+	br label %bb139
+bb139:		; preds = %bb136, %entry
+	%changes.2 = phi i32 [ %changes.1, %bb136 ], [ 0, %entry ]		; <i32> [#uses=3]
+	br i1 false, label %bb90, label %bb142
+bb142:		; preds = %bb139
+	%changes.2.lcssa = phi i32 [ %changes.2, %bb139 ]		; <i32> [#uses=0]
+	ret void
+}
+
+define void @test4() {
+entry:
+	br i1 false, label %cond_false485, label %bb405
+bb405:		; preds = %entry
+	ret void
+cond_false485:		; preds = %entry
+	br label %bb830
+bb511:		; preds = %bb830
+	br i1 false, label %bb816, label %bb830
+cond_next667:		; preds = %bb816
+	br i1 false, label %cond_next695, label %bb680
+bb676:		; preds = %bb680
+	br label %bb680
+bb680:		; preds = %bb676, %cond_next667
+	%iftmp.68.0 = zext i1 false to i8		; <i8> [#uses=1]
+	br i1 false, label %bb676, label %cond_next695
+cond_next695:		; preds = %bb680, %cond_next667
+	%iftmp.68.2 = phi i8 [ %iftmp.68.0, %bb680 ], [ undef, %cond_next667 ]	; <i8> [#uses=0]
+	ret void
+bb816:		; preds = %bb816, %bb511
+	br i1 false, label %cond_next667, label %bb816
+bb830:		; preds = %bb511, %cond_false485
+	br i1 false, label %bb511, label %bb835
+bb835:		; preds = %bb830
+	ret void
+}
+
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSObject = type { %struct.objc_class* }
+	%struct.NSRange = type { i64, i64 }
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_class = type opaque
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_26" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+
+define %struct.NSArray* @test5(%struct.NSArray* %self, %struct._message_ref_t* %_cmd) {
+entry:
+	br label %bb116
+
+bb116:		; preds = %bb131, %entry
+	%tmp123 = call %struct.NSRange null( %struct.NSObject* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_26", %struct.NSArray* null )		; <%struct.NSRange> [#uses=1]
+	br i1 false, label %bb141, label %bb131
+
+bb131:		; preds = %bb116
+	%mrv_gr125 = getresult %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
+	br label %bb116
+
+bb141:		; preds = %bb116
+	ret %struct.NSArray* null
+}
+
+define void @test6(i8* %msg) {
+entry:
+	br label %bb15
+bb6:		; preds = %bb15
+	%gep.upgrd.1 = zext i32 %offset.1 to i64		; <i64> [#uses=1]
+	%tmp11 = getelementptr i8* %msg, i64 %gep.upgrd.1		; <i8*> [#uses=0]
+	br label %bb15
+bb15:		; preds = %bb6, %entry
+	%offset.1 = add i32 0, 1		; <i32> [#uses=2]
+	br i1 false, label %bb6, label %bb17
+bb17:		; preds = %bb15
+	%offset.1.lcssa = phi i32 [ %offset.1, %bb15 ]		; <i32> [#uses=0]
+	%payload_type.1.lcssa = phi i32 [ 0, %bb15 ]		; <i32> [#uses=0]
+	ret void
+}
+
+
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
new file mode 100644
index 000000000000..92871780a4da
--- /dev/null
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -loop-rotate  %s  | FileCheck %s
+
+; CHECK: entry
+; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then, %entry
+  %x.tr = phi i32 [ %x, %entry ], [ %call, %if.then ]
+  %y.tr = phi i32 [ %y, %entry ], [ %call9, %if.then ]
+  %z.tr = phi i32 [ %z, %entry ], [ %call14, %if.then ]
+  tail call void @llvm.dbg.value(metadata !{i32 %x.tr}, i64 0, metadata !6), !dbg !7
+  tail call void @llvm.dbg.value(metadata !{i32 %y.tr}, i64 0, metadata !8), !dbg !9
+  tail call void @llvm.dbg.value(metadata !{i32 %z.tr}, i64 0, metadata !10), !dbg !11
+  %cmp = icmp slt i32 %y.tr, %x.tr, !dbg !12
+  br i1 %cmp, label %if.then, label %if.end, !dbg !12
+
+if.then:                                          ; preds = %tailrecurse
+  %sub = sub nsw i32 %x.tr, 1, !dbg !14
+  %call = tail call i32 @tak(i32 %sub, i32 %y.tr, i32 %z.tr), !dbg !14
+  %sub6 = sub nsw i32 %y.tr, 1, !dbg !14
+  %call9 = tail call i32 @tak(i32 %sub6, i32 %z.tr, i32 %x.tr), !dbg !14
+  %sub11 = sub nsw i32 %z.tr, 1, !dbg !14
+  %call14 = tail call i32 @tak(i32 %sub11, i32 %x.tr, i32 %y.tr), !dbg !14
+  br label %tailrecurse
+
+if.end:                                           ; preds = %tailrecurse
+  br label %return, !dbg !16
+
+return:                                           ; preds = %if.end
+  ret i32 %z.tr, !dbg !17
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"tak", metadata !"tak", metadata !"", metadata !1, i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32, i32)* @tak} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame", metadata !"clang version 2.9 (trunk 125492)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 32, i32 13, metadata !0, null}
+!8 = metadata !{i32 590081, metadata !0, metadata !"y", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 32, i32 20, metadata !0, null}
+!10 = metadata !{i32 590081, metadata !0, metadata !"z", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 32, i32 27, metadata !0, null}
+!12 = metadata !{i32 33, i32 3, metadata !13, null}
+!13 = metadata !{i32 589835, metadata !0, i32 32, i32 30, metadata !1, i32 6} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 34, i32 5, metadata !15, null}
+!15 = metadata !{i32 589835, metadata !13, i32 33, i32 14, metadata !1, i32 7} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 36, i32 3, metadata !13, null}
+!17 = metadata !{i32 37, i32 1, metadata !13, null}
diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll
index 5403e723ee15..737283092250 100644
--- a/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -28,20 +28,13 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-; Should only end up with one phi. Also, the original for.cond block should
-; be moved to the end of the loop so that the new loop header pleasantly
-; ends up at the top.
-
+; Should only end up with one phi.
 ; CHECK:      define void @test
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   icmp slt i64
-; CHECK-NEXT:   br i1
-; CHECK-NOT:  :
-; CHECK:      bb.nph:
 ; CHECK-NEXT:   br label %for.body
-; CHECK-NOT:  :
 ; CHECK:      for.body:
-; CHECK-NEXT:   %j.02 = phi i64
-; CHECK-NOT:    phi
-; CHECK:        ret void
-; CHECK-NEXT: }
+; CHECK-NEXT:   %j.01 = phi i64
+; CHECK-NOT:  br
+; CHECK:   br i1 %cmp, label %for.body, label %for.end
+; CHECK:      for.end:
+; CHECK-NEXT:        ret void
diff --git a/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll b/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
index bf862f69e94b..66bf1a0caa97 100644
--- a/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
+++ b/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
@@ -1,7 +1,7 @@
 ; This testcase exposed a problem with the loop identification pass (LoopInfo).
 ; Basically, it was incorrectly calculating the loop nesting information.
 ;
-; RUN: opt < %s -loopsimplify
+; RUN: opt < %s -loop-simplify
 
 define i32 @yylex() {
 	br label %loopentry.0
diff --git a/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll b/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
index cd9749bbf6d0..2b2afae36613 100644
--- a/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
+++ b/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
@@ -2,7 +2,7 @@
 ; inserted for the "fail" loop, but the exit block of a loop is not updated
 ; to be the preheader instead of the exit loop itself.
 
-; RUN: opt < %s -loopsimplify
+; RUN: opt < %s -loop-simplify
 define i32 @re_match_2() {
 	br label %loopentry.1
 loopentry.1:		; preds = %endif.82, %0
diff --git a/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll b/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
index a5d0ba7ad76a..aae8476c8300 100644
--- a/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
+++ b/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -verify -licm -disable-output
+; RUN: opt < %s -loop-simplify -verify -licm -disable-output
 
 define void @.subst_48() {
 entry:
diff --git a/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll b/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
index dc5c31354641..3e7661ecb572 100644
--- a/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
+++ b/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -licm -disable-output
+; RUN: opt < %s -loop-simplify -licm -disable-output
 define void @main() {
 entry:
 	br i1 false, label %Out, label %loop
diff --git a/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll b/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
index 721f9b3a0340..c29383764af6 100644
--- a/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
+++ b/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -licm -disable-output
+; RUN: opt < %s -loop-simplify -licm -disable-output
 
 ; This is PR306
 
diff --git a/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll b/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
index cbdfe8bbc0ae..c522ec9463b7 100644
--- a/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
+++ b/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -disable-output
+; RUN: opt < %s -loop-simplify -disable-output
 
 define void @test() {
 loopentry.0:
diff --git a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
index 4fe6e2156f97..5818808ae0cd 100644
--- a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
+++ b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -loopsimplify -licm -disable-output -verify-dom-info -verify-loop-info
+; RUN: opt < %s -scalarrepl -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
 
 define void @inflate() {
 entry:
diff --git a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
index 10202dcf98ce..e73fff18bc5d 100644
--- a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
+++ b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -disable-output
+; RUN: opt < %s -loop-simplify -disable-output
 ; PR1752
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "i686-pc-mingw32"
diff --git a/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
index 2a1ee7d1a72f..f179da234cb1 100644
--- a/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
+++ b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -domfrontier -loopsimplify -domfrontier -verify-dom-info -analyze 
+; RUN: opt < %s -domfrontier -loop-simplify -domfrontier -verify-dom-info -analyze 
 
 
 define void @a() nounwind {
diff --git a/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll b/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll
new file mode 100644
index 000000000000..00f520bf7975
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-simplify -S
+; PR8702
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-freebsd9.0"
+
+declare void @foo(i32 %x)
+
+define fastcc void @inm_merge() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %while.cond36.i, %entry
+  br i1 undef, label %do.body, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br i1 undef, label %while.cond36.i, label %if.end44
+
+if.end44:                                         ; preds = %for.body
+  %call49 = call fastcc i32 @inm_get_source()
+  br i1 undef, label %if.end54, label %for.cond64
+
+if.end54:                                         ; preds = %if.end44
+  br label %while.cond36.i
+
+while.cond36.i:                                   ; preds = %if.end54, %for.body
+  br label %for.cond
+
+for.cond64:                                       ; preds = %if.end88, %for.cond64, %if.end44
+  %error.161 = phi i32 [ %error.161, %for.cond64 ], [ %error.161, %if.end88 ], [ %call49, %if.end44 ]
+  call void @foo(i32 %error.161)
+  br i1 undef, label %for.cond64, label %if.end88
+
+if.end88:                                         ; preds = %for.cond64
+  br i1 undef, label %for.cond64, label %if.end98
+
+if.end98:                                         ; preds = %if.end88
+  unreachable
+
+do.body:                                          ; preds = %for.cond
+  unreachable
+}
+
+declare fastcc i32 @inm_get_source() nounwind
diff --git a/test/Transforms/LoopSimplify/basictest.ll b/test/Transforms/LoopSimplify/basictest.ll
index 4241d8ad0895..6b31848a94bd 100644
--- a/test/Transforms/LoopSimplify/basictest.ll
+++ b/test/Transforms/LoopSimplify/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify
+; RUN: opt < %s -loop-simplify
 
 ; This function should get a preheader inserted before BB3, that is jumped
 ; to by BB1 & BB2
diff --git a/test/Transforms/LoopSimplify/hardertest.ll b/test/Transforms/LoopSimplify/hardertest.ll
index e0a7f81603b0..1ccb396490ca 100644
--- a/test/Transforms/LoopSimplify/hardertest.ll
+++ b/test/Transforms/LoopSimplify/hardertest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify
+; RUN: opt < %s -loop-simplify
 
 define void @foo(i1 %C) {
 	br i1 %C, label %T, label %F
diff --git a/test/Transforms/LoopSimplify/indirectbr-backedge.ll b/test/Transforms/LoopSimplify/indirectbr-backedge.ll
index ca6e47fcecd3..7eabc09cd7da 100644
--- a/test/Transforms/LoopSimplify/indirectbr-backedge.ll
+++ b/test/Transforms/LoopSimplify/indirectbr-backedge.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loopsimplify -S < %s | FileCheck %s
+; RUN: opt -loop-simplify -S < %s | FileCheck %s
 
 ; LoopSimplify shouldn't split loop backedges that use indirectbr.
 
diff --git a/test/Transforms/LoopSimplify/indirectbr.ll b/test/Transforms/LoopSimplify/indirectbr.ll
index 2e4549d1e9c0..9814d4ad93f1 100644
--- a/test/Transforms/LoopSimplify/indirectbr.ll
+++ b/test/Transforms/LoopSimplify/indirectbr.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -lcssa -verify-loop-info -verify-dom-info -S \
+; RUN: opt < %s -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S \
 ; RUN:   | grep -F {indirectbr i8* %x, \[label %L0, label %L1\]} \
 ; RUN:   | count 6
 
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index 0e15f081a864..93a224744ca3 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loopsimplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
+; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
 ; RUN: not grep sext %t
 ; RUN: grep {phi i64} %t | count 1
 
diff --git a/test/Transforms/LoopSimplify/phi-node-simplify.ll b/test/Transforms/LoopSimplify/phi-node-simplify.ll
index 5e957ccbd880..8eb63d9111a7 100644
--- a/test/Transforms/LoopSimplify/phi-node-simplify.ll
+++ b/test/Transforms/LoopSimplify/phi-node-simplify.ll
@@ -1,5 +1,5 @@
 ; Loop Simplify should turn phi nodes like X = phi [X, Y]  into just Y, eliminating them.
-; RUN: opt < %s -loopsimplify -S | grep phi | count 6
+; RUN: opt < %s -loop-simplify -S | grep phi | count 6
 
 @A = weak global [3000000 x i32] zeroinitializer		; <[3000000 x i32]*> [#uses=1]
 @B = weak global [20000 x i32] zeroinitializer		; <[20000 x i32]*> [#uses=1]
diff --git a/test/Transforms/LoopSimplify/unreachable-loop-pred.ll b/test/Transforms/LoopSimplify/unreachable-loop-pred.ll
index faaaf97d72fa..76b7bb21e468 100644
--- a/test/Transforms/LoopSimplify/unreachable-loop-pred.ll
+++ b/test/Transforms/LoopSimplify/unreachable-loop-pred.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loopsimplify -disable-output -verify-loop-info -verify-dom-info < %s
+; RUN: opt -S -loop-simplify -disable-output -verify-loop-info -verify-dom-info < %s
 ; PR5235
 
 ; When loopsimplify inserts a preheader for this loop, it should add the new
diff --git a/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll b/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
new file mode 100644
index 000000000000..7982fbc8c4a5
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-reduce -verify
+target triple = "x86_64-apple-darwin10"
+
+define void @myquicksort(i8* %a) nounwind ssp {
+entry:
+  br i1 undef, label %loop1, label %return
+
+loop1:                                            ; preds = %bb13.loopexit, %entry
+  %indvar419 = phi i64 [ %indvar.next420, %loop2.exit ], [ 0, %entry ]
+  %tmp474 = shl i64 %indvar419, 2
+  %tmp484 = add i64 %tmp474, 4
+  br label %loop2
+
+loop2:                                            ; preds = %loop1, %loop2.backedge
+  %indvar414 = phi i64 [ %indvar.next415, %loop2.backedge ], [ 0, %loop1 ]
+  %tmp473 = mul i64 %indvar414, -4
+  %tmp485 = add i64 %tmp484, %tmp473
+  %storemerge4 = getelementptr i8* %a, i64 %tmp485
+  %0 = icmp ugt i8* %storemerge4, %a
+  br i1 false, label %loop2.exit, label %loop2.backedge
+
+loop2.backedge:                                   ; preds = %loop2
+  %indvar.next415 = add i64 %indvar414, 1
+  br label %loop2
+
+loop2.exit:                                       ; preds = %loop2
+  %indvar.next420 = add i64 %indvar419, 1
+  br i1 undef, label %loop1, label %return
+
+return:                                           ; preds = %loop2.exit, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/pr2570.ll b/test/Transforms/LoopStrengthReduce/pr2570.ll
index aafd24ebba1e..80efb9f87e53 100644
--- a/test/Transforms/LoopStrengthReduce/pr2570.ll
+++ b/test/Transforms/LoopStrengthReduce/pr2570.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | grep {phi\\>} | count 10
+; RUN: opt < %s -loop-reduce -S | grep {phi\\>} | count 8
 ; PR2570
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll b/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
index a26346b2eb40..374f46d10cb3 100644
--- a/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
+++ b/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-unroll -loopsimplify -disable-output
+; RUN: opt < %s -loop-unroll -loop-simplify -disable-output
 
 define void @print_board() {
 entry:
diff --git a/test/Transforms/LoopUnroll/basic.ll b/test/Transforms/LoopUnroll/basic.ll
new file mode 100644
index 000000000000..eeb3e9a57b06
--- /dev/null
+++ b/test/Transforms/LoopUnroll/basic.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+
+
+; This should not unroll since the address of the loop header is taken.
+
+; CHECK: @test1
+; CHECK: store i8* blockaddress(@test1, %l1), i8** %P
+; CHECK: l1:
+; CHECK-NEXT: phi i32
+; rdar://8287027
+define i32 @test1(i8** %P) nounwind ssp {
+entry:
+  store i8* blockaddress(@test1, %l1), i8** %P
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+}
diff --git a/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll b/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
new file mode 100644
index 000000000000..a976d18d4445
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch
+; PR8622
+@g_38 = external global i32, align 4
+
+define void @func_67(i32 %p_68.coerce) nounwind {
+entry:
+  br i1 true, label %for.end12, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %g_38.promoted = load i32* @g_38
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %bb.nph
+  %tobool.i = icmp eq i32 %p_68.coerce, 1
+  %xor4.i = xor i32 %p_68.coerce, 1
+  %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body
+  br i1 true, label %for.cond.for.end12_crit_edge, label %for.body
+
+for.cond.for.end12_crit_edge:                     ; preds = %for.cond
+  store i32 %call1, i32* @g_38
+  br label %for.end12
+
+for.end12:                                        ; preds = %for.cond.for.end12_crit_edge, %entry
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 30c27137d909..9f1e28046707 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -memcpyopt -dse -S | grep {call.*initialize} | not grep memtmp
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*initialize} | not grep memtmp
 ; PR2077
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 38a727148e57..418761e9361c 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -memcpyopt -S | not grep {call.*memcpy.}
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 	%a = type { i32 }
 	%b = type { float }
diff --git a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll b/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
deleted file mode 100644
index 4fec169fd06a..000000000000
--- a/test/Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | grep {call.*memcpy.*agg.result}
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-@x = external global { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-
-define void @foo({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
-entry:
-	%x.0 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
-	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll
index b0ae5f413a5d..b1f900d9da4c 100644
--- a/test/Transforms/MemCpyOpt/align.ll
+++ b/test/Transforms/MemCpyOpt/align.ll
@@ -2,9 +2,9 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; The resulting memset is only 4-byte aligned, despite containing
-; a 16-byte alignmed store in the middle.
+; a 16-byte aligned store in the middle.
 
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %a01, i8 0, i64 16, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
 
 define void @foo(i32* %p) {
   %a0 = getelementptr i32* %p, i64 0
diff --git a/test/Transforms/MemCpyOpt/crash.ll b/test/Transforms/MemCpyOpt/crash.ll
index bf5b23467577..cc3a6b05278a 100644
--- a/test/Transforms/MemCpyOpt/crash.ll
+++ b/test/Transforms/MemCpyOpt/crash.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -memcpyopt -disable-output
-; PR4882
+; RUN: opt < %s -basicaa -memcpyopt -disable-output
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "armv7-eabi"
@@ -7,7 +6,8 @@ target triple = "armv7-eabi"
 %struct.qw = type { [4 x float] }
 %struct.bar = type { %struct.qw, %struct.qw, %struct.qw, %struct.qw, %struct.qw, float, float}
 
-define arm_aapcs_vfpcc void @test1(%struct.bar* %this) {
+; PR4882
+define void @test1(%struct.bar* %this) {
 entry:
   %0 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 0
   store float 0.000000e+00, float* %0, align 4
@@ -43,3 +43,16 @@ entry:
   store float 0.000000e+00, float* %15, align 4
   unreachable
 }
+
+; PR8753
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
+i1) nounwind
+
+define void @test2(i32 %cmd) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 20, i32 1, i1
+false) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i32 1, i1
+false) nounwind
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index eb8dbe3a614f..1ac97e9e6b91 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -1,12 +1,11 @@
-; RUN: opt < %s -memcpyopt -S | not grep store
-; RUN: opt < %s -memcpyopt -S | grep {call.*llvm.memset}
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
 
 ; All the stores in this example should be merged into a single memset.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
-define void @foo(i8 signext  %c) nounwind  {
+define void @test1(i8 signext  %c) nounwind  {
 entry:
 	%x = alloca [19 x i8]		; <[19 x i8]*> [#uses=20]
 	%tmp = getelementptr [19 x i8]* %x, i32 0, i32 0		; <i8*> [#uses=1]
@@ -47,9 +46,177 @@ entry:
 	store i8 %c, i8* %tmp69, align 1
 	%tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1]
 	store i8 %c, i8* %tmp73, align 1
-	%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind 		; <i32> [#uses=0]
+	%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind
 	ret void
+; CHECK: @test1
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: store
+; CHECK: ret
 }
 
 declare i32 @bar(...)
 
+
+	%struct.MV = type { i16, i16 }
+
+define void @test2() nounwind  {
+entry:
+	%ref_idx = alloca [8 x i8]		; <[8 x i8]*> [#uses=8]
+	%left_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
+	%up_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
+	%tmp20 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 7		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp20, align 1
+	%tmp23 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 6		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp23, align 1
+	%tmp26 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 5		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp26, align 1
+	%tmp29 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 4		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp29, align 1
+	%tmp32 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp32, align 1
+	%tmp35 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp35, align 1
+	%tmp38 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 1		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp38, align 1
+	%tmp41 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 0		; <i8*> [#uses=2]
+	store i8 -1, i8* %tmp41, align 1
+	%tmp43 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp43, align 2
+	%tmp46 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp46, align 2
+	%tmp57 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp57, align 2
+	%tmp60 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp60, align 2
+	%tmp71 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp71, align 2
+	%tmp74 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp74, align 2
+	%tmp85 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp85, align 2
+	%tmp88 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp88, align 2
+	%tmp99 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp99, align 2
+	%tmp102 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp102, align 2
+	%tmp113 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp113, align 2
+	%tmp116 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp116, align 2
+	%tmp127 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp127, align 2
+	%tmp130 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp130, align 2
+	%tmp141 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp141, align 8
+	%tmp144 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp144, align 2
+	%tmp148 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp148, align 2
+	%tmp151 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp151, align 2
+	%tmp162 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp162, align 2
+	%tmp165 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp165, align 2
+	%tmp176 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp176, align 2
+	%tmp179 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp179, align 2
+	%tmp190 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp190, align 2
+	%tmp193 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp193, align 2
+	%tmp204 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp204, align 2
+	%tmp207 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp207, align 2
+	%tmp218 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp218, align 2
+	%tmp221 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp221, align 2
+	%tmp232 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp232, align 2
+	%tmp235 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp235, align 2
+	%tmp246 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp246, align 8
+	%tmp249 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp249, align 2
+	%up_mvd252 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
+	%left_mvd253 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
+	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 
+	ret void
+        
+; CHECK: @test2
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false)
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false)
+; CHECK-NOT: store
+; CHECK: ret
+}
+
+declare void @foo(%struct.MV*, %struct.MV*, i8*)
+
+
+; Store followed by memset.
+define void @test3(i32* nocapture %P) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  %add.ptr = getelementptr inbounds i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  ret void
+; CHECK: @test3
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+; store followed by memset, different offset scenario
+define void @test4(i32* nocapture %P) nounwind ssp {
+entry:
+  store i32 0, i32* %P, align 4
+  %add.ptr = getelementptr inbounds i32* %P, i64 1
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  ret void
+; CHECK: @test4
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; Memset followed by store.
+define void @test5(i32* nocapture %P) nounwind ssp {
+entry:
+  %add.ptr = getelementptr inbounds i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  ret void
+; CHECK: @test5
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+;; Memset followed by memset.
+define void @test6(i32* nocapture %P) nounwind ssp {
+entry:
+  %0 = bitcast i32* %P to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
+  %add.ptr = getelementptr inbounds i32* %P, i64 3
+  %1 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
+  ret void
+; CHECK: @test6
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
+}
+
diff --git a/test/Transforms/MemCpyOpt/form-memset2.ll b/test/Transforms/MemCpyOpt/form-memset2.ll
deleted file mode 100644
index c90af9c73ca3..000000000000
--- a/test/Transforms/MemCpyOpt/form-memset2.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: opt < %s -memcpyopt -S | not grep store
-; RUN: opt < %s -memcpyopt -S | grep {call.*llvm.memset} | count 3
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-	%struct.MV = type { i16, i16 }
-
-define i32 @t() nounwind  {
-entry:
-	%ref_idx = alloca [8 x i8]		; <[8 x i8]*> [#uses=8]
-	%left_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
-	%up_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
-	%tmp20 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 7		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp20, align 1
-	%tmp23 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 6		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp23, align 1
-	%tmp26 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 5		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp26, align 1
-	%tmp29 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 4		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp29, align 1
-	%tmp32 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp32, align 1
-	%tmp35 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 2		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp35, align 1
-	%tmp38 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 1		; <i8*> [#uses=1]
-	store i8 -1, i8* %tmp38, align 1
-	%tmp41 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 0		; <i8*> [#uses=2]
-	store i8 -1, i8* %tmp41, align 1
-	%tmp43 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp43, align 2
-	%tmp46 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp46, align 2
-	%tmp57 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp57, align 2
-	%tmp60 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp60, align 2
-	%tmp71 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp71, align 2
-	%tmp74 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp74, align 2
-	%tmp85 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp85, align 2
-	%tmp88 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp88, align 2
-	%tmp99 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp99, align 2
-	%tmp102 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp102, align 2
-	%tmp113 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp113, align 2
-	%tmp116 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp116, align 2
-	%tmp127 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp127, align 2
-	%tmp130 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp130, align 2
-	%tmp141 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp141, align 8
-	%tmp144 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp144, align 2
-	%tmp148 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp148, align 2
-	%tmp151 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp151, align 2
-	%tmp162 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp162, align 2
-	%tmp165 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp165, align 2
-	%tmp176 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp176, align 2
-	%tmp179 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp179, align 2
-	%tmp190 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp190, align 2
-	%tmp193 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp193, align 2
-	%tmp204 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp204, align 2
-	%tmp207 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp207, align 2
-	%tmp218 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp218, align 2
-	%tmp221 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp221, align 2
-	%tmp232 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp232, align 2
-	%tmp235 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp235, align 2
-	%tmp246 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp246, align 8
-	%tmp249 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
-	store i16 0, i16* %tmp249, align 2
-	%up_mvd252 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
-	%left_mvd253 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
-	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 
-	ret i32 undef
-}
-
-declare void @foo(%struct.MV*, %struct.MV*, i8*)
diff --git a/test/Transforms/MemCpyOpt/loadstore-sret.ll b/test/Transforms/MemCpyOpt/loadstore-sret.ll
new file mode 100644
index 000000000000..67e7137e7e49
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -basicaa -memcpyopt | FileCheck %s
+; <rdar://problem/8536696>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%"class.std::auto_ptr" = type { i32* }
+
+; CHECK: @_Z3foov
+define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp {
+_ZNSt8auto_ptrIiED1Ev.exit:
+  %temp.lvalue = alloca %"class.std::auto_ptr", align 8
+; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result)
+  call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
+  %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
+; CHECK-NOT: load
+  %tmp2.i.i = load i32** %tmp.i.i, align 8
+  %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
+; CHECK-NOT: store
+  store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8
+; CHECK: ret void
+  ret void
+}
+
+declare void @_Z3barv(%"class.std::auto_ptr"* sret)
diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
new file mode 100644
index 000000000000..b18d176f0030
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
@@ -0,0 +1,19 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+
+@cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @foo(i32*) nounwind
+
+define void @test1() nounwind {
+  %arr = alloca [3 x i32], align 4
+  %arr_i8 = bitcast [3 x i32]* %arr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false)
+  %arraydecay = getelementptr inbounds [3 x i32]* %arr, i64 0, i64 0
+  call void @foo(i32* %arraydecay) nounwind
+  ret void
+; CHECK: @test1
+; CHECK: call void @llvm.memset
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 724acfab4750..b387d32a7d5e 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -memcpyopt -dse -S | grep {call.*memcpy} | count 1
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
+define void @test1({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
 entry:
 	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
@@ -14,9 +14,98 @@ entry:
 	call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
 	%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+
+; Check that one of the memcpy's are removed.
+;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
+
+; CHECK: @test1
+; CHECK: call void @ccoshl
+; CHECK: call void @llvm.memcpy
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret void
 	ret void
 }
 
 declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memmove.
+define void @test2(i8* %P, i8* %Q) nounwind  {
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
+	%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
+	call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
+	call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
+        ret void
+        
+; CHECK: @test2
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
+; CHECK-NEXT: ret void
+}
+
+
+
+
+@x = external global { x86_fp80, x86_fp80 }
+
+define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
+	%x.0 = alloca { x86_fp80, x86_fp80 }
+	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
+	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
+	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
+	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
+	ret void
+; CHECK: @test3
+; CHECK-NEXT: %agg.result2 = bitcast 
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret void
+}
+
+
+; PR8644
+define void @test4(i8 *%P) {
+  %A = alloca {i32, i32}
+  %a = bitcast {i32, i32}* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
+  call void @test4a(i8* byval align 1 %a) 
+  ret void
+; CHECK: @test4
+; CHECK-NEXT: call void @test4a(
+}
+
+declare void @test4a(i8* byval align 1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+%struct.S = type { i128, [4 x i8]}
+
+@sS = external global %struct.S, align 16
+
+declare void @test5a(%struct.S* byval align 16) nounwind ssp
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; rdar://8713376 - This memcpy can't be eliminated.
+define i32 @test5(i32 %x) nounwind ssp {
+entry:
+  %y = alloca %struct.S, align 16
+  %tmp = bitcast %struct.S* %y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
+  %a = getelementptr %struct.S* %y, i64 0, i32 1, i64 0
+  store i8 4, i8* %a
+  call void @test5a(%struct.S* byval align 16 %y)
+  ret i32 0
+  ; CHECK: @test5(
+  ; CHECK: store i8 4
+  ; CHECK: call void @test5a(%struct.S* byval align 16 %y)
+}
+
+;; Noop memcpy should be zapped.
+define void @test6(i8 *%P) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false)
+  ret void
+; CHECK: @test6
+; CHECK-NEXT: ret void
+}
+
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 73bbf0bd2e4e..8babb04e4b38 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -memcpyopt -S | FileCheck %s
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
 ; These memmoves should get optimized to memcpys.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/MemCpyOpt/smaller.ll b/test/Transforms/MemCpyOpt/smaller.ll
new file mode 100644
index 000000000000..1d35582d993e
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/smaller.ll
@@ -0,0 +1,28 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+; rdar://8875553
+
+; Memcpyopt shouldn't optimize the second memcpy using the first
+; because the first has a smaller size.
+
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+
+target datalayout = "e-p:32:32:32"
+
+%struct.s = type { [11 x i8], i32 }
+
+@.str = private constant [11 x i8] c"0123456789\00"
+@cell = external global %struct.s
+
+declare void @check(%struct.s* byval %p) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @foo() nounwind {
+entry:
+  %agg.tmp = alloca %struct.s, align 4
+  store i32 99, i32* getelementptr inbounds (%struct.s* @cell, i32 0, i32 1), align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1, i1 false)
+  %tmp = getelementptr inbounds %struct.s* %agg.tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+  call void @check(%struct.s* byval %agg.tmp)
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index 5002875ae328..ddfd0fd1fcce 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -memcpyopt -S | not grep {call.*memcpy}
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy}
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  %z) nounwind  {
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
 entry:
 	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
@@ -16,7 +16,7 @@ entry:
 	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp3, x86_fp80* %real, align 16
 	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval  %iz ) nounwind 
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
 	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
 	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
diff --git a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
new file mode 100644
index 000000000000..201903e99545
--- /dev/null
+++ b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -0,0 +1,276 @@
+; RUN: opt -mergefunc %s -disable-output
+; This used to crash.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
+%"struct.kc::impl_CexpressionDQ" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_CexpressionDQ"* }
+%"struct.kc::impl_Ctext" = type { %"struct.kc::impl_Ccode_option", i32, %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_Ctext_elem"*, %"struct.kc::impl_Ctext"* }
+%"struct.kc::impl_Ctext_elem" = type { %"struct.kc::impl_abstract_phylum", i32, %"struct.kc::impl_casestring__Str"* }
+%"struct.kc::impl_ID" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_casestring__Str"*, i32, %"struct.kc::impl_casestring__Str"* }
+%"struct.kc::impl_abstract_phylum" = type { i32 (...)** }
+%"struct.kc::impl_ac_abstract_declarator_AcAbsdeclDirdecl" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_Ccode_option"* }
+%"struct.kc::impl_casestring__Str" = type { %"struct.kc::impl_abstract_phylum", i8* }
+%"struct.kc::impl_elem_patternrepresentation" = type { %"struct.kc::impl_abstract_phylum", i32, %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_ID"* }
+%"struct.kc::impl_fileline" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_casestring__Str"*, i32 }
+%"struct.kc::impl_fileline_FileLine" = type { %"struct.kc::impl_fileline" }
+%"struct.kc::impl_outmostpatterns" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_elem_patternrepresentation"*, %"struct.kc::impl_outmostpatterns"* }
+%"struct.kc::impl_withcaseinfo_Withcaseinfo" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_outmostpatterns"*, %"struct.kc::impl_outmostpatterns"*, %"struct.kc::impl_Ctext"* }
+
+@_ZTVN2kc13impl_filelineE = external constant [13 x i32 (...)*], align 32
+@.str = external constant [1 x i8], align 1
+@_ZTVN2kc22impl_fileline_FileLineE = external constant [13 x i32 (...)*], align 32
+
+define void @_ZN2kc22impl_fileline_FileLineC2EPNS_20impl_casestring__StrEi(%"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_casestring__Str"* %_file, i32 %_line) align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline_FileLine"*, align 4
+  %_file_addr = alloca %"struct.kc::impl_casestring__Str"*, align 4
+  %_line_addr = alloca i32, align 4
+  %save_filt.150 = alloca i32
+  %save_eptr.149 = alloca i8*
+  %iftmp.99 = alloca %"struct.kc::impl_casestring__Str"*
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
+  store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
+  store i32 %_line, i32* %_line_addr
+  %0 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
+  call void @_ZN2kc13impl_filelineC2Ev() nounwind
+  %2 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %"struct.kc::impl_fileline"* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = invoke %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  store %"struct.kc::impl_casestring__Str"* %8, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %9 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %invcont
+  %10 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
+  %12 = getelementptr inbounds %"struct.kc::impl_fileline"* %11, i32 0, i32 1
+  %13 = load %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
+  %16 = getelementptr inbounds %"struct.kc::impl_fileline"* %15, i32 0, i32 2
+  %17 = load i32* %_line_addr, align 4
+  store i32 %17, i32* %16, align 4
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = call i8* @llvm.eh.exception()
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr4 = load i8** %eh_exception
+  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  store i32 %eh_select5, i32* %eh_selector
+  %eh_select = load i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.150, align 4
+  %eh_value = load i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.149, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
+  call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
+  %20 = load i8** %save_eptr.149, align 4
+  store i8* %20, i8** %eh_exception, align 4
+  %21 = load i32* %save_filt.150, align 4
+  store i32 %21, i32* %eh_selector, align 4
+  %eh_ptr6 = load i8** %eh_exception
+  call void @_Unwind_Resume_or_Rethrow()
+  unreachable
+}
+
+declare void @_ZN2kc13impl_filelineC2Ev() nounwind align 2
+
+define void @_ZN2kc13impl_filelineD1Ev(%"struct.kc::impl_fileline"* %this) nounwind align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
+  %0 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline"* %0, i32 0, i32 0
+  %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
+  %3 = trunc i32 0 to i8
+  %toBool = icmp ne i8 %3, 0
+  br i1 %toBool, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  %4 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
+  call void @_ZdlPv() nounwind
+  br label %return
+
+return:                                           ; preds = %bb1, %entry
+  ret void
+}
+
+declare void @_ZdlPv() nounwind
+
+define void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %this) nounwind align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
+  %0 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline"* %0, i32 0, i32 0
+  %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
+  %3 = trunc i32 0 to i8
+  %toBool = icmp ne i8 %3, 0
+  br i1 %toBool, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  %4 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
+  call void @_ZdlPv() nounwind
+  br label %return
+
+return:                                           ; preds = %bb1, %entry
+  ret void
+}
+
+define void @_ZN2kc22impl_fileline_FileLineC1EPNS_20impl_casestring__StrEi(%"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_casestring__Str"* %_file, i32 %_line) align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline_FileLine"*, align 4
+  %_file_addr = alloca %"struct.kc::impl_casestring__Str"*, align 4
+  %_line_addr = alloca i32, align 4
+  %save_filt.148 = alloca i32
+  %save_eptr.147 = alloca i8*
+  %iftmp.99 = alloca %"struct.kc::impl_casestring__Str"*
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
+  store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
+  store i32 %_line, i32* %_line_addr
+  %0 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
+  call void @_ZN2kc13impl_filelineC2Ev() nounwind
+  %2 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %"struct.kc::impl_fileline"* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = invoke %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  store %"struct.kc::impl_casestring__Str"* %8, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %9 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %invcont
+  %10 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
+  %12 = getelementptr inbounds %"struct.kc::impl_fileline"* %11, i32 0, i32 1
+  %13 = load %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
+  %16 = getelementptr inbounds %"struct.kc::impl_fileline"* %15, i32 0, i32 2
+  %17 = load i32* %_line_addr, align 4
+  store i32 %17, i32* %16, align 4
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = call i8* @llvm.eh.exception()
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr4 = load i8** %eh_exception
+  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  store i32 %eh_select5, i32* %eh_selector
+  %eh_select = load i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.148, align 4
+  %eh_value = load i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.147, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
+  call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
+  %20 = load i8** %save_eptr.147, align 4
+  store i8* %20, i8** %eh_exception, align 4
+  %21 = load i32* %save_filt.148, align 4
+  store i32 %21, i32* %eh_selector, align 4
+  %eh_ptr6 = load i8** %eh_exception
+  call void @_Unwind_Resume_or_Rethrow()
+  unreachable
+}
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Unwind_Resume_or_Rethrow()
+
+define void @_ZN2kc21printer_functor_classC2Ev(%"struct.kc::impl_abstract_phylum"* %this) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define %"struct.kc::impl_Ccode_option"* @_ZN2kc11phylum_castIPNS_17impl_withcaseinfoES1_EET_PT0_(%"struct.kc::impl_Ccode_option"* %t) nounwind {
+entry:
+  ret %"struct.kc::impl_Ccode_option"* null
+}
+
+define %"struct.kc::impl_abstract_phylum"* @_ZNK2kc43impl_ac_direct_declarator_AcDirectDeclProto9subphylumEi(%"struct.kc::impl_ac_abstract_declarator_AcAbsdeclDirdecl"* %this, i32 %no) nounwind align 2 {
+entry:
+  ret %"struct.kc::impl_abstract_phylum"* undef
+}
+
+define void @_ZN2kc30impl_withcaseinfo_WithcaseinfoD0Ev(%"struct.kc::impl_withcaseinfo_Withcaseinfo"* %this) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc30impl_withcaseinfo_WithcaseinfoC1EPNS_26impl_patternrepresentationES2_PNS_10impl_CtextE(%"struct.kc::impl_withcaseinfo_Withcaseinfo"* %this, %"struct.kc::impl_outmostpatterns"* %_patternrepresentation_1, %"struct.kc::impl_outmostpatterns"* %_patternrepresentation_2, %"struct.kc::impl_Ctext"* %_Ctext_1) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc21impl_rewriteviewsinfoC2EPNS_20impl_rewriteviewinfoEPS0_(%"struct.kc::impl_CexpressionDQ"* %this, %"struct.kc::impl_Ccode_option"* %p1, %"struct.kc::impl_CexpressionDQ"* %p2) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define %"struct.kc::impl_Ctext_elem"* @_ZN2kc11phylum_castIPNS_9impl_termENS_20impl_abstract_phylumEEET_PT0_(%"struct.kc::impl_abstract_phylum"* %t) nounwind {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc27impl_ac_parameter_type_listD2Ev(%"struct.kc::impl_Ccode_option"* %this) nounwind align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN2kc21impl_ac_operator_nameD2Ev(%"struct.kc::impl_Ctext_elem"* %this) nounwind align 2 {
+entry:
+  ret void
+}
+
+declare %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
diff --git a/test/Transforms/MergeFunc/fold-weak.ll b/test/Transforms/MergeFunc/fold-weak.ll
index e12473125c78..23e4d33c3a9f 100644
--- a/test/Transforms/MergeFunc/fold-weak.ll
+++ b/test/Transforms/MergeFunc/fold-weak.ll
@@ -1,6 +1,10 @@
 ; RUN: opt < %s -mergefunc -S > %t
 ; RUN: grep {define weak} %t | count 2
 ; RUN: grep {call} %t | count 2
+; XFAIL: *
+
+; This test is off for a bit as we change this particular sort of folding to
+; only apply on ELF systems and not Mach-O systems.
 
 define weak i32 @sum(i32 %x, i32 %y) {
   %sum = add i32 %x, %y
diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/vector.ll
new file mode 100644
index 000000000000..6954fcec3daf
--- /dev/null
+++ b/test/Transforms/MergeFunc/vector.ll
@@ -0,0 +1,76 @@
+; RUN: opt -mergefunc -stats -disable-output < %s |& grep {functions merged}
+
+; This test is checks whether we can merge
+;   vector<intptr_t>::push_back(0)
+; and
+;   vector<void *>::push_back(0)
+; .
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { i32, void ()* }
+%1 = type { i64, i1 }
+%"class.std::vector" = type { [24 x i8] }
+
+@vi = global %"class.std::vector" zeroinitializer, align 8
+@__dso_handle = external unnamed_addr global i8*
+@vp = global %"class.std::vector" zeroinitializer, align 8
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define linkonce_odr void @_ZNSt6vectorIlSaIlEED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
+entry:
+  %tmp2.i.i = bitcast %"class.std::vector"* %this to i64**
+  %tmp3.i.i = load i64** %tmp2.i.i, align 8, !tbaa !0
+  %tobool.i.i.i = icmp eq i64* %tmp3.i.i, null
+  br i1 %tobool.i.i.i, label %_ZNSt6vectorIlSaIlEED2Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %entry
+  %0 = bitcast i64* %tmp3.i.i to i8*
+  tail call void @_ZdlPv(i8* %0) nounwind
+  ret void
+
+_ZNSt6vectorIlSaIlEED2Ev.exit:                    ; preds = %entry
+  ret void
+}
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define linkonce_odr void @_ZNSt6vectorIPvSaIS0_EED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
+entry:
+  %tmp2.i.i = bitcast %"class.std::vector"* %this to i8***
+  %tmp3.i.i = load i8*** %tmp2.i.i, align 8, !tbaa !0
+  %tobool.i.i.i = icmp eq i8** %tmp3.i.i, null
+  br i1 %tobool.i.i.i, label %_ZNSt6vectorIPvSaIS0_EED2Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %entry
+  %0 = bitcast i8** %tmp3.i.i to i8*
+  tail call void @_ZdlPv(i8* %0) nounwind
+  ret void
+
+_ZNSt6vectorIPvSaIS0_EED2Ev.exit:                 ; preds = %entry
+  ret void
+}
+
+declare void @_Z1fv()
+
+declare void @_ZNSt6vectorIPvSaIS0_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector"* nocapture %this, i8** %__position.coerce, i8** nocapture %__x) align 2
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare void @_ZSt17__throw_bad_allocv() noreturn
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @_ZNSt6vectorIlSaIlEE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPlS1_EERKl(%"class.std::vector"* nocapture %this, i64* %__position.coerce, i64* nocapture %__x) align 2
+
+declare void @_GLOBAL__I_a()
+
+declare %1 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"long", metadata !1}
diff --git a/test/Transforms/PartialSpecialize/dg.exp b/test/Transforms/PartialSpecialize/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/PartialSpecialize/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PartialSpecialize/two-specializations.ll b/test/Transforms/PartialSpecialize/two-specializations.ll
deleted file mode 100644
index bc3da22e1855..000000000000
--- a/test/Transforms/PartialSpecialize/two-specializations.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; If there are two specializations of a function, make sure each callsite
-; calls the right one.
-;
-; RUN: opt -S -partialspecialization -disable-inlining %s | opt -S -inline | FileCheck %s -check-prefix=CORRECT
-; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s 
-declare void @callback1()
-declare void @callback2()
-
-define internal void @UseCallback(void()* %pCallback) {
-  call void %pCallback()
-  ret void
-}
-
-define void @foo(void()* %pNonConstCallback)
-{
-Entry:
-; CORRECT: Entry
-; CORRECT-NEXT: call void @callback1()
-; CORRECT-NEXT: call void @callback1()
-; CORRECT-NEXT: call void @callback2()
-; CORRECT-NEXT: call void %pNonConstCallback()
-; CORRECT-NEXT: call void @callback1()
-; CORRECT-NEXT: call void @callback2()
-; CORRECT-NEXT: call void @callback2()
-; CHECK: Entry
-; CHECK-NOT: call void @UseCallback(void ()* @callback1)
-; CHECK-NOT: call void @UseCallback(void ()* @callback2)
-; CHECK: ret void
-  call void @UseCallback(void()* @callback1)
-  call void @UseCallback(void()* @callback1)
-  call void @UseCallback(void()* @callback2)
-  call void @UseCallback(void()* %pNonConstCallback)
-  call void @UseCallback(void()* @callback1)
-  call void @UseCallback(void()* @callback2)
-  call void @UseCallback(void()* @callback2)
-  ret void
-}
diff --git a/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll b/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
new file mode 100644
index 000000000000..e6c76b344486
--- /dev/null
+++ b/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -reassociate
+; PR9039
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-gnu-linux"
+
+%ada__tags__T15s = type void ()
+
+define void @exp_averages_intraday__deviation() {
+entry:
+  %0 = load i32* undef, align 4
+  %1 = shl i32 %0, 2
+  %2 = add nsw i32 undef, %1
+  %3 = add nsw i32 %2, undef
+  %4 = mul nsw i32 %0, 12
+  %5 = add nsw i32 %3, %4
+  %6 = add nsw i32 %5, %4
+  %7 = add nsw i32 %6, undef
+  br i1 false, label %"4", label %"12"
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"5", label %"8"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"8":                                              ; preds = %"4"
+  %8 = getelementptr inbounds i8* undef, i32 %6
+  br i1 undef, label %"13", label %"12"
+
+"12":                                             ; preds = %"8", %entry
+  ret void
+
+"13":                                             ; preds = %"8"
+  ret void
+}
diff --git a/test/Transforms/Reassociate/optional-flags.ll b/test/Transforms/Reassociate/optional-flags.ll
new file mode 100644
index 000000000000..40f7d5bf5b80
--- /dev/null
+++ b/test/Transforms/Reassociate/optional-flags.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+; rdar://8944681
+
+; Reassociate should clear optional flags like nsw when reassociating.
+
+; CHECK: @test0
+; CHECK: %y = add i64 %b, %a
+; CHECK: %z = add i64 %y, %c
+define i64 @test0(i64 %a, i64 %b, i64 %c) {
+  %y = add nsw i64 %c, %b
+  %z = add i64 %y, %a
+  ret i64 %z
+}
+
+; CHECK: @test1
+; CHECK: %y = add i64 %b, %a
+; CHECK: %z = add i64 %y, %c
+define i64 @test1(i64 %a, i64 %b, i64 %c) {
+  %y = add i64 %c, %b
+  %z = add nsw i64 %y, %a
+  ret i64 %z
+}
+
+; PR9215
+; CHECK: %s = add nsw i32 %y, %x
+define i32 @test2(i32 %x, i32 %y) {
+  %s = add nsw i32 %x, %y
+  ret i32 %s
+}
diff --git a/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll b/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll
deleted file mode 100644
index b147ec960807..000000000000
--- a/test/Transforms/ScalarRepl/2003-05-30-InvalidIndices.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -scalarrepl
-
-define void @main() {
-	%E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } }	; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1]
-	%tmp.151 = getelementptr { { i32, float, double, i64 }, { i32, float, double, i64 } }* %E, i64 0, i32 1, i32 3		; <i64*> [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll b/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll
deleted file mode 100644
index 89c0b05333a2..000000000000
--- a/test/Transforms/ScalarRepl/2003-05-30-MultiLevel.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s -scalarrepl
-
-define i32 @test() {
-	%X = alloca { [4 x i32] }		; <{ [4 x i32] }*> [#uses=1]
-	%Y = getelementptr { [4 x i32] }* %X, i64 0, i32 0, i64 2		; <i32*> [#uses=2]
-	store i32 4, i32* %Y
-	%Z = load i32* %Y		; <i32> [#uses=1]
-	ret i32 %Z
-}
-
diff --git a/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll b/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll
deleted file mode 100644
index ea23c3114682..000000000000
--- a/test/Transforms/ScalarRepl/2005-12-14-UnionPromoteCrash.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output
-
-target datalayout = "E-p:32:32"
-	%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] }
-	%union.rtunion_def = type { i32 }
-
-define void @find_reloads() {
-entry:
-	%c_addr.i = alloca i8		; <i8*> [#uses=1]
-	switch i32 0, label %return [
-		 i32 36, label %label.7
-		 i32 34, label %label.7
-		 i32 41, label %label.5
-	]
-label.5:		; preds = %entry
-	ret void
-label.7:		; preds = %entry, %entry
-	br i1 false, label %then.4, label %switchexit.0
-then.4:		; preds = %label.7
-	%tmp.0.i = bitcast i8* %c_addr.i to i32*		; <i32*> [#uses=1]
-	store i32 44, i32* %tmp.0.i
-	ret void
-switchexit.0:		; preds = %label.7
-	ret void
-return:		; preds = %entry
-	ret void
-}
-
diff --git a/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll b/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll
deleted file mode 100644
index 03c7452115c1..000000000000
--- a/test/Transforms/ScalarRepl/2006-01-24-IllegalUnionPromoteCrash.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output
-
-target datalayout = "E-p:32:32"
-
-define i32 @test(i64 %L) {
-	%X = alloca i32		; <i32*> [#uses=2]
-	%Y = bitcast i32* %X to i64*		; <i64*> [#uses=1]
-	store i64 0, i64* %Y
-	%Z = load i32* %X		; <i32> [#uses=1]
-	ret i32 %Z
-}
-
diff --git a/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll b/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll
deleted file mode 100644
index 63840f1b3c03..000000000000
--- a/test/Transforms/ScalarRepl/2006-04-20-PromoteCrash.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output
-
-define void @output_toc() {
-entry:
-	%buf = alloca [256 x i8], align 16		; <[256 x i8]*> [#uses=1]
-	%name = alloca i8*, align 4		; <i8**> [#uses=1]
-	%real_name = alloca i8*, align 4		; <i8**> [#uses=0]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%buf.upgrd.1 = bitcast [256 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	store i8* %buf.upgrd.1, i8** %name
-	call void @abort( )
-	unreachable
-return:		; No predecessors!
-	ret void
-}
-
-declare void @abort()
-
diff --git a/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll b/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll
deleted file mode 100644
index dcd7e534f10b..000000000000
--- a/test/Transforms/ScalarRepl/2006-10-23-PointerUnionCrash.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output
-; END.
-target datalayout = "e-p:32:32"
-target triple = "i686-apple-darwin8.7.2"
-
-define void @glgProcessColor() {
-entry:
-	%source_ptr = alloca i8*, align 4		; <i8**> [#uses=2]
-	br i1 false, label %bb1357, label %cond_next583
-cond_next583:		; preds = %entry
-	ret void
-bb1357:		; preds = %entry
-	br i1 false, label %bb1365, label %bb27055
-bb1365:		; preds = %bb1357
-	switch i32 0, label %cond_next10377 [
-		 i32 0, label %bb4679
-		 i32 1, label %bb4679
-		 i32 2, label %bb4679
-		 i32 3, label %bb4679
-		 i32 4, label %bb5115
-		 i32 5, label %bb6651
-		 i32 6, label %bb7147
-		 i32 7, label %bb8683
-		 i32 8, label %bb9131
-		 i32 9, label %bb9875
-		 i32 10, label %bb4679
-		 i32 11, label %bb4859
-		 i32 12, label %bb4679
-		 i32 16, label %bb10249
-	]
-bb4679:		; preds = %bb1365, %bb1365, %bb1365, %bb1365, %bb1365, %bb1365
-	ret void
-bb4859:		; preds = %bb1365
-	ret void
-bb5115:		; preds = %bb1365
-	ret void
-bb6651:		; preds = %bb1365
-	ret void
-bb7147:		; preds = %bb1365
-	ret void
-bb8683:		; preds = %bb1365
-	ret void
-bb9131:		; preds = %bb1365
-	ret void
-bb9875:		; preds = %bb1365
-	%source_ptr9884 = bitcast i8** %source_ptr to i8**		; <i8**> [#uses=1]
-	%tmp9885 = load i8** %source_ptr9884		; <i8*> [#uses=0]
-	ret void
-bb10249:		; preds = %bb1365
-	%source_ptr10257 = bitcast i8** %source_ptr to i16**		; <i16**> [#uses=1]
-	%tmp10258 = load i16** %source_ptr10257		; <i16*> [#uses=0]
-	ret void
-cond_next10377:		; preds = %bb1365
-	ret void
-bb27055:		; preds = %bb1357
-	ret void
-}
diff --git a/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll b/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll
deleted file mode 100644
index 260620352f31..000000000000
--- a/test/Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output
-; PR1045
-
-target datalayout = "e-p:32:32"
-target triple = "i686-pc-linux-gnu"
-	%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>" = type { %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"* }
-	%"struct.__gnu_cxx::bitmap_allocator<char>" = type { i8 }
-	%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block" = type { [8 x i8] }
-
-define void @_ZN9__gnu_cxx16bitmap_allocatorIwE27_M_deallocate_single_objectEPw() {
-entry:
-	%this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*		; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"**> [#uses=3]
-	%tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", align 4		; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
-	store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i
-	%tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i		; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
-	%tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator<char>"*		; <%"struct.__gnu_cxx::bitmap_allocator<char>"*> [#uses=0]
-	%tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i		; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
-	%tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp1.i, i32 0, i32 0		; <%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"**> [#uses=0]
-	unreachable
-}
diff --git a/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll b/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll
deleted file mode 100644
index bd4910682f1a..000000000000
--- a/test/Transforms/ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-apple-darwin8"
-	%struct.CGPoint = type { float, float }
-	%struct.aal_big_range_t = type { i32, i32 }
-	%struct.aal_callback_t = type { i8* (i8*, i32)*, void (i8*, i8*)* }
-	%struct.aal_edge_pool_t = type { %struct.aal_edge_pool_t*, i32, i32, [0 x %struct.aal_edge_t] }
-	%struct.aal_edge_t = type { %struct.CGPoint, %struct.CGPoint, i32 }
-	%struct.aal_range_t = type { i16, i16 }
-	%struct.aal_span_pool_t = type { %struct.aal_span_pool_t*, [341 x %struct.aal_span_t] }
-	%struct.aal_span_t = type { %struct.aal_span_t*, %struct.aal_big_range_t }
-	%struct.aal_spanarray_t = type { [2 x %struct.aal_range_t] }
-	%struct.aal_spanbucket_t = type { i16, [2 x i8], %struct.anon }
-	%struct.aal_state_t = type { %struct.CGPoint, %struct.CGPoint, %struct.CGPoint, i32, float, float, float, float, %struct.CGPoint, %struct.CGPoint, float, float, float, float, i32, i32, i32, i32, float, float, i8*, i32, i32, %struct.aal_edge_pool_t*, %struct.aal_edge_pool_t*, i8*, %struct.aal_callback_t*, i32, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_pool_t*, i8, float, i8, i32 }
-	%struct.anon = type { %struct.aal_spanarray_t }
-
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define fastcc void @aal_insert_span() {
-entry:
-	%SB = alloca %struct.aal_spanbucket_t, align 4		; <%struct.aal_spanbucket_t*> [#uses=2]
-	br i1 false, label %cond_true, label %cond_next79
-
-cond_true:		; preds = %entry
-	br i1 false, label %cond_next, label %cond_next114.i
-
-cond_next114.i:		; preds = %cond_true
-	ret void
-
-cond_next:		; preds = %cond_true
-	%SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %SB19, i8* null, i32 12, i32 0 )
-	br i1 false, label %cond_next34, label %cond_next79
-
-cond_next34:		; preds = %cond_next
-	%i.2.reload22 = load i32* null		; <i32> [#uses=1]
-	%tmp51 = getelementptr %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1		; <i16*> [#uses=0]
-	ret void
-
-cond_next79:		; preds = %cond_next, %entry
-	ret void
-}
diff --git a/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll b/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
deleted file mode 100644
index 62f7d19dbdab..000000000000
--- a/test/Transforms/ScalarRepl/2009-01-09-scalarrepl-empty.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-; PR3304
-
-       %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN }
-        %struct.c37304a__vrec___disc___XVN = type {
-%struct.c37304a__vrec___disc___XVN___O }
-        %struct.c37304a__vrec___disc___XVN___O = type {  }
-
-define void @_ada_c37304a() {
-entry:
-        %v = alloca %struct.c37304a__vrec
-        %0 = getelementptr %struct.c37304a__vrec* %v, i32 0, i32 0             
-        store i8 8, i8* %0, align 1
-        unreachable
-}
diff --git a/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll b/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll
deleted file mode 100644
index c5ebf8eb5237..000000000000
--- a/test/Transforms/ScalarRepl/2009-04-21-ZeroLengthMemSet.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-; rdar://6808691
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "x86_64-apple-darwin9.0"
-	type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>		
-
-define i32 @foo() {
-entry:
-	%.compoundliteral = alloca %0		
-	%tmp228 = getelementptr %0* %.compoundliteral, i32 0, i32 7
-	%tmp229 = bitcast [0 x i16]* %tmp228 to i8*		
-	call void @llvm.memset.i64(i8* %tmp229, i8 0, i64 0, i32 2)
-	unreachable
-}
-
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll b/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll
deleted file mode 100644
index aa3487b27707..000000000000
--- a/test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-; PR4146
-
- %wrapper = type { i1 }
-
-define void @f() {
-entry:
-        %w = alloca %wrapper, align 8           ; <%wrapper*> [#uses=1]
-        %0 = getelementptr %wrapper* %w, i64 0, i32 0           ; <i1*>
-        store i1 true, i1* %0
-        ret void
-}
diff --git a/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll b/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll
deleted file mode 100644
index cecbdd4b881a..000000000000
--- a/test/Transforms/ScalarRepl/2009-06-01-BitcastIntPadding.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -scalarrepl
-; PR4286
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "x86_64-undermydesk-freebsd8.0"
-	%struct.singlebool = type <{ i8 }>
-
-define zeroext i8 @doit() nounwind {
-entry:
-	%a = alloca %struct.singlebool, align 1		; <%struct.singlebool*> [#uses=2]
-	%storetmp.i = bitcast %struct.singlebool* %a to i1*		; <i1*> [#uses=1]
-	store i1 true, i1* %storetmp.i
-	%tmp = getelementptr %struct.singlebool* %a, i64 0, i32 0		; <i8*> [#uses=1]
-	%tmp1 = load i8* %tmp		; <i8> [#uses=1]
-	ret i8 %tmp1
-}
-
diff --git a/test/Transforms/ScalarRepl/2009-08-16-VLA.ll b/test/Transforms/ScalarRepl/2009-08-16-VLA.ll
deleted file mode 100644
index d69af114ad90..000000000000
--- a/test/Transforms/ScalarRepl/2009-08-16-VLA.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-opt
-
-	%struct.Item = type { [4 x i16], %struct.rule* }
-	%struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
-	%struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
-	%struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
-	%struct.list = type { i8*, %struct.list* }
-	%struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
-	%struct.plank = type { i8*, %struct.list*, i32 }
-	%struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
-	%struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
-	%struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
-	%struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
-	%struct.Index_Map = type { i32, %struct.item_set** }
-	%struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
-	%struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
-
-define void @addHP_2_0() {
-bb4.i:
-	%0 = malloc [0 x %struct.Item]		; <[0 x %struct.Item]*> [#uses=1]
-	%.sub.i.c.i = getelementptr [0 x %struct.Item]* %0, i32 0, i32 0		; <%struct.Item*> [#uses=0]
-	unreachable
-}
diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll
index a26b62d0ad76..9676873c30c0 100644
--- a/test/Transforms/ScalarRepl/basictest.ll
+++ b/test/Transforms/ScalarRepl/basictest.ll
@@ -1,11 +1,30 @@
-; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
-define i32 @test() {
+define i32 @test1() {
 	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
 	%Y = getelementptr { i32, float }* %X, i64 0, i32 0		; <i32*> [#uses=2]
 	store i32 0, i32* %Y
 	%Z = load i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+}
+
+; PR8980
+define i64 @test2(i64 %X) {
+	%A = alloca [8 x i8]
+        %B = bitcast [8 x i8]* %A to i64*
+        
+	store i64 %X, i64* %B
+        br label %L2
+        
+L2:
+	%Z = load i64* %B		; <i32> [#uses=1]
+	ret i64 %Z
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
 }
 
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
index 29924138762f..997da4bdb2fc 100644
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -1,9 +1,11 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 ; PR3290
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ;; Store of integer to whole alloca struct.
 define i32 @test1(i64 %V) nounwind {
+; CHECK: test1
+; CHECK-NOT: alloca
 	%X = alloca {{i32, i32}}
 	%Y = bitcast {{i32,i32}}* %X to i64*
 	store i64 %V, i64* %Y
@@ -18,6 +20,8 @@ define i32 @test1(i64 %V) nounwind {
 
 ;; Store of integer to whole struct/array alloca.
 define float @test2(i128 %V) nounwind {
+; CHECK: test2
+; CHECK-NOT: alloca
 	%X = alloca {[4 x float]}
 	%Y = bitcast {[4 x float]}* %X to i128*
 	store i128 %V, i128* %Y
@@ -32,6 +36,8 @@ define float @test2(i128 %V) nounwind {
 
 ;; Load of whole alloca struct as integer
 define i64 @test3(i32 %a, i32 %b) nounwind {
+; CHECK: test3
+; CHECK-NOT: alloca
 	%X = alloca {{i32, i32}}
 
 	%A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0
@@ -46,6 +52,8 @@ define i64 @test3(i32 %a, i32 %b) nounwind {
 
 ;; load of integer from whole struct/array alloca.
 define i128 @test4(float %a, float %b) nounwind {
+; CHECK: test4
+; CHECK-NOT: alloca
 	%X = alloca {[4 x float]}
 	%A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0
 	%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
@@ -56,3 +64,45 @@ define i128 @test4(float %a, float %b) nounwind {
 	%V = load i128* %Y
 	ret i128 %V
 }
+
+;; If the elements of a struct or array alloca contain padding, SROA can still
+;; split up the alloca as long as there is no padding between the elements.
+%padded = type { i16, i8 }
+%arr = type [4 x %padded]
+define void @test5(%arr* %p, %arr* %q) {
+entry:
+; CHECK: test5
+; CHECK-NOT: i128
+  %var = alloca %arr, align 4
+  %vari8 = bitcast %arr* %var to i8*
+  %pi8 = bitcast %arr* %p to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
+  %qi8 = bitcast %arr* %q to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
+  ret void
+}
+
+;; Check that an array alloca can be split up when it is also accessed with
+;; a load or store as a homogeneous structure with the same element type and
+;; number of elements as the array.
+%homogeneous = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%wrapped_array = type { [3 x <8 x i16>] }
+define void @test6(i8* %p, %wrapped_array* %arr) {
+entry:
+; CHECK: test6
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+  %var = alloca %wrapped_array, align 16
+  %res = call %homogeneous @test6callee(i8* %p)
+  %varcast = bitcast %wrapped_array* %var to %homogeneous*
+  store %homogeneous %res, %homogeneous* %varcast
+  %tmp1 = bitcast %wrapped_array* %arr to i8*
+  %tmp2 = bitcast %wrapped_array* %var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false)
+  ret void
+}
+
+declare %homogeneous @test6callee(i8* nocapture) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
new file mode 100644
index 000000000000..7b62f09e86db
--- /dev/null
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -0,0 +1,260 @@
+; RUN: opt -scalarrepl %s -disable-output
+; RUN: opt -scalarrepl-ssa %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR9017
+define void @test1() nounwind readnone ssp {
+entry:
+  %l_72 = alloca i32*, align 8
+  unreachable
+
+for.cond:                                         ; preds = %for.cond
+  %tmp1.i = load i32** %l_72, align 8
+  store i32* %tmp1.i, i32** %l_72, align 8
+  br label %for.cond
+
+if.end:                                           ; No predecessors!
+  ret void
+}
+
+
+define void @test2() {
+  %E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } }        ; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1]
+  %tmp.151 = getelementptr { { i32, float, double, i64 }, { i32, float, double, i64 } }* %E, i64 0, i32 1, i32 3          ; <i64*> [#uses=0]
+  ret void
+}
+
+define i32 @test3() {
+        %X = alloca { [4 x i32] }               ; <{ [4 x i32] }*> [#uses=1]
+        %Y = getelementptr { [4 x i32] }* %X, i64 0, i32 0, i64 2               ; <i32*> [#uses=2]
+        store i32 4, i32* %Y
+        %Z = load i32* %Y               ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+
+%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] }
+%union.rtunion_def = type { i32 }
+
+define void @test4() {
+entry:
+        %c_addr.i = alloca i8           ; <i8*> [#uses=1]
+        switch i32 0, label %return [
+                 i32 36, label %label.7
+                 i32 34, label %label.7
+                 i32 41, label %label.5
+        ]
+label.5:                ; preds = %entry
+        ret void
+label.7:                ; preds = %entry, %entry
+        br i1 false, label %then.4, label %switchexit.0
+then.4:         ; preds = %label.7
+        %tmp.0.i = bitcast i8* %c_addr.i to i32*                ; <i32*> [#uses=1]
+        store i32 44, i32* %tmp.0.i
+        ret void
+switchexit.0:           ; preds = %label.7
+        ret void
+return:         ; preds = %entry
+        ret void
+}
+
+
+define void @test5() {
+entry:
+        %source_ptr = alloca i8*, align 4               ; <i8**> [#uses=2]
+        br i1 false, label %bb1357, label %cond_next583
+cond_next583:           ; preds = %entry
+        ret void
+bb1357:         ; preds = %entry
+        br i1 false, label %bb1365, label %bb27055
+bb1365:         ; preds = %bb1357
+        switch i32 0, label %cond_next10377 [
+                 i32 0, label %bb4679
+                 i32 1, label %bb4679
+                 i32 2, label %bb4679
+                 i32 3, label %bb4679
+                 i32 4, label %bb5115
+                 i32 5, label %bb6651
+                 i32 6, label %bb7147
+                 i32 7, label %bb8683
+                 i32 8, label %bb9131
+                 i32 9, label %bb9875
+                 i32 10, label %bb4679
+                 i32 11, label %bb4859
+                 i32 12, label %bb4679
+                 i32 16, label %bb10249
+        ]
+bb4679:         ; preds = %bb1365, %bb1365, %bb1365, %bb1365, %bb1365, %bb1365
+        ret void
+bb4859:         ; preds = %bb1365
+        ret void
+bb5115:         ; preds = %bb1365
+        ret void
+bb6651:         ; preds = %bb1365
+        ret void
+bb7147:         ; preds = %bb1365
+        ret void
+bb8683:         ; preds = %bb1365
+        ret void
+bb9131:         ; preds = %bb1365
+        ret void
+bb9875:         ; preds = %bb1365
+        %source_ptr9884 = bitcast i8** %source_ptr to i8**              ; <i8**> [#uses=1]
+        %tmp9885 = load i8** %source_ptr9884            ; <i8*> [#uses=0]
+        ret void
+bb10249:                ; preds = %bb1365
+        %source_ptr10257 = bitcast i8** %source_ptr to i16**            ; <i16**> [#uses=1]
+        %tmp10258 = load i16** %source_ptr10257         ; <i16*> [#uses=0]
+        ret void
+cond_next10377:         ; preds = %bb1365
+        ret void
+bb27055:                ; preds = %bb1357
+        ret void
+}
+
+
+        %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>" = type { %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"* }
+        %"struct.__gnu_cxx::bitmap_allocator<char>" = type { i8 }
+        %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block" = type { [8 x i8] }
+
+; PR1045
+define void @test6() {
+entry:
+        %this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"**> [#uses=3]
+        %tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", align 4                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i
+        %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i          ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        %tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator<char>"*              ; <%"struct.__gnu_cxx::bitmap_allocator<char>"*> [#uses=0]
+        %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i         ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        %tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp1.i, i32 0, i32 0         ; <%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"**> [#uses=0]
+        unreachable
+}
+
+        %struct.CGPoint = type { float, float }
+        %struct.aal_big_range_t = type { i32, i32 }        %struct.aal_callback_t = type { i8* (i8*, i32)*, void (i8*, i8*)* }        %struct.aal_edge_pool_t = type { %struct.aal_edge_pool_t*, i32, i32, [0 x %struct.aal_edge_t] }        %struct.aal_edge_t = type { %struct.CGPoint, %struct.CGPoint, i32 }
+        %struct.aal_range_t = type { i16, i16 }
+        %struct.aal_span_pool_t = type { %struct.aal_span_pool_t*, [341 x %struct.aal_span_t] }
+        %struct.aal_span_t = type { %struct.aal_span_t*, %struct.aal_big_range_t }
+        %struct.aal_spanarray_t = type { [2 x %struct.aal_range_t] }
+        %struct.aal_spanbucket_t = type { i16, [2 x i8], %struct.anon }
+        %struct.aal_state_t = type { %struct.CGPoint, %struct.CGPoint, %struct.CGPoint, i32, float, float, float, float, %struct.CGPoint, %struct.CGPoint, float, float, float, float, i32, i32, i32, i32, float, float, i8*, i32, i32, %struct.aal_edge_pool_t*, %struct.aal_edge_pool_t*, i8*, %struct.aal_callback_t*, i32, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_pool_t*, i8, float, i8, i32 }
+        %struct.anon = type { %struct.aal_spanarray_t }
+
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define fastcc void @test7() {
+entry:
+        %SB = alloca %struct.aal_spanbucket_t, align 4          ; <%struct.aal_spanbucket_t*> [#uses=2]
+        br i1 false, label %cond_true, label %cond_next79
+
+cond_true:              ; preds = %entry
+        br i1 false, label %cond_next, label %cond_next114.i
+
+cond_next114.i:         ; preds = %cond_true
+        ret void
+
+cond_next:              ; preds = %cond_true
+        %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %SB19, i8* null, i32 12, i32 0 )
+        br i1 false, label %cond_next34, label %cond_next79
+
+cond_next34:            ; preds = %cond_next
+        %i.2.reload22 = load i32* null          ; <i32> [#uses=1]
+        %tmp51 = getelementptr %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1      
+        ; <i16*> [#uses=0]
+        ret void
+
+cond_next79:            ; preds = %cond_next, %entry
+        ret void
+}
+
+
+       %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN }
+        %struct.c37304a__vrec___disc___XVN = type {
+%struct.c37304a__vrec___disc___XVN___O }
+        %struct.c37304a__vrec___disc___XVN___O = type {  }
+
+; PR3304
+define void @test8() {
+entry:
+        %v = alloca %struct.c37304a__vrec
+        %0 = getelementptr %struct.c37304a__vrec* %v, i32 0, i32 0             
+        store i8 8, i8* %0, align 1
+        unreachable
+}
+
+
+
+; rdar://6808691 - ZeroLengthMemSet
+        type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
+
+define i32 @test9() {
+entry:
+        %.compoundliteral = alloca %0           
+        %tmp228 = getelementptr %0* %.compoundliteral, i32 0, i32 7
+        %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
+        call void @llvm.memset.i64(i8* %tmp229, i8 0, i64 0, i32 2)
+        unreachable
+}
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+
+; PR4146 - i1 handling
+%wrapper = type { i1 }
+define void @test10() {
+entry:
+        %w = alloca %wrapper, align 8           ; <%wrapper*> [#uses=1]
+        %0 = getelementptr %wrapper* %w, i64 0, i32 0           ; <i1*>
+        store i1 true, i1* %0
+        ret void
+}
+
+
+        %struct.singlebool = type <{ i8 }>
+; PR4286
+define zeroext i8 @test11() nounwind {
+entry:
+        %a = alloca %struct.singlebool, align 1         ; <%struct.singlebool*> [#uses=2]
+        %storetmp.i = bitcast %struct.singlebool* %a to i1*             ; <i1*> [#uses=1]
+        store i1 true, i1* %storetmp.i
+        %tmp = getelementptr %struct.singlebool* %a, i64 0, i32 0               ; <i8*> [#uses=1]
+        %tmp1 = load i8* %tmp           ; <i8> [#uses=1]
+        ret i8 %tmp1
+}
+
+
+       %struct.Item = type { [4 x i16], %struct.rule* }
+        %struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
+        %struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
+        %struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
+        %struct.list = type { i8*, %struct.list* }
+        %struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
+        %struct.plank = type { i8*, %struct.list*, i32 }
+        %struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
+        %struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
+        %struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
+        %struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
+        %struct.Index_Map = type { i32, %struct.item_set** }
+        %struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
+        %struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
+
+; VLAs.
+define void @test12() {
+bb4.i:
+        %0 = malloc [0 x %struct.Item]          ; <[0 x %struct.Item]*> [#uses=1]
+        %.sub.i.c.i = getelementptr [0 x %struct.Item]* %0, i32 0, i32 0                ; <%struct.Item*> [#uses=0]
+        unreachable
+}
+
+; PR8680
+define void @test13() nounwind {
+entry:
+  %memtmp = alloca i32, align 4
+  %0 = bitcast i32* %memtmp to void ()*
+  call void %0() nounwind
+  ret void
+}
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index 81527853c53b..5b258645344d 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -1,12 +1,19 @@
-; RUN: opt < %s -scalarrepl -S | not grep {call.*memcpy}
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 @C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32		; <[128 x float]*> [#uses=1]
 
-define float @grad4(i32 %hash, float %x, float %y, float %z, float %w) {
+define float @test1(i32 %hash, float %x, float %y, float %z, float %w) {
 entry:
 	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
 	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32( i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i32 512, i32 16 )
+        
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK-NOT: call{{.*}}@llvm.memcpy
+; CHECK: %lookupTable1 = bitcast [128 x float]* @C.0.1248 to i8*
+; CHECK-NOT: call{{.*}}@llvm.memcpy
+        
 	%tmp3 = shl i32 %hash, 2		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
 	%tmp753 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
@@ -32,3 +39,58 @@ entry:
 }
 
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+%T = type { i8, [123 x i8] }
+
+@G = constant %T {i8 1, [123 x i8] zeroinitializer }
+
+define void @test2() {
+  %A = alloca %T
+  %B = alloca %T
+  %a = bitcast %T* %A to i8*
+  %b = bitcast %T* %B to i8*
+
+; CHECK: @test2
+
+; %A alloca is deleted
+; CHECK-NEXT: %B = alloca %T
+
+; use @G instead of %A
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
+  call void @bar(i8* %b)
+  ret void
+}
+
+declare void @bar(i8*)
+
+
+;; Should be able to eliminate the alloca.
+define void @test3() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test3
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @bar(i8* %a)
+  ret void
+}
+
+define void @test4() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @baz(i8* byval %a) 
+; CHECK: @test4
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @baz(i8* byval %a)
+  ret void
+}
+
+declare void @baz(i8* byval)
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
new file mode 100644
index 000000000000..fa3972de90a4
--- /dev/null
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@@ -0,0 +1,153 @@
+; RUN: opt %s -scalarrepl -S | FileCheck %s
+; Test promotion of allocas that have phis and select users.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%struct.X = type { i32 }
+%PairTy = type {i32, i32}
+
+; CHECK: @test1
+; CHECK: %a.0 = alloca i32
+; CHECK: %b.0 = alloca i32
+define i32 @test1(i32 %x) nounwind readnone ssp {
+entry:
+  %a = alloca %struct.X, align 8                  ; <%struct.X*> [#uses=2]
+  %b = alloca %struct.X, align 8                  ; <%struct.X*> [#uses=2]
+  %0 = getelementptr inbounds %struct.X* %a, i64 0, i32 0 ; <i32*> [#uses=1]
+  store i32 1, i32* %0, align 8
+  %1 = getelementptr inbounds %struct.X* %b, i64 0, i32 0 ; <i32*> [#uses=1]
+  store i32 2, i32* %1, align 8
+  %2 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1]
+  %3 = getelementptr inbounds %struct.X* %p.0, i64 0, i32 0 ; <i32*> [#uses=1]
+  %4 = load i32* %3, align 8                      ; <i32> [#uses=1]
+  ret i32 %4
+}
+
+; CHECK: @test2
+; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ]
+; CHECK-NEXT: ret i32 %X.ld
+define i32 @test2(i1 %c) {
+entry:
+  %A = alloca {i32, i32}
+  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  store i32 1, i32* %B
+  br i1 %c, label %T, label %F
+T:
+  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  store i32 2, i32* %C
+  br label %F
+F:
+  %X = phi i32* [%B, %entry], [%C, %T]
+  %Q = load i32* %X
+  ret i32 %Q
+}
+
+; CHECK: @test3
+; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2
+; CHECK-NEXT: ret i32 %Q
+; rdar://8904039
+define i32 @test3(i1 %c) {
+  %A = alloca {i32, i32}
+  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  store i32 1, i32* %B
+  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  store i32 2, i32* %C
+  
+  %X = select i1 %c, i32* %B, i32* %C
+  %Q = load i32* %X
+  ret i32 %Q
+}
+
+;; We can't scalarize this, a use of the select is not an element access.
+define i64 @test4(i1 %c) {
+entry:
+  %A = alloca %PairTy
+  ; CHECK: @test4
+  ; CHECK: %A = alloca %PairTy
+  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  store i32 1, i32* %B
+  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  store i32 2, i32* %B
+  
+  %X = select i1 %c, i32* %B, i32* %C
+  %Y = bitcast i32* %X to i64*
+  %Q = load i64* %Y
+  ret i64 %Q
+}
+
+
+;;
+;; Tests for promoting allocas used by selects.
+;; rdar://7339113
+;;
+
+define i32 @test5(i32 *%P) nounwind readnone ssp {
+entry:
+  %b = alloca i32, align 8 
+  store i32 2, i32* %b, align 8
+  
+  ;; Select on constant condition should be folded.
+  %p.0 = select i1 false, i32* %b, i32* %P
+  store i32 123, i32* %p.0
+  
+  %r = load i32* %b, align 8
+  ret i32 %r
+  
+; CHECK: @test5
+; CHECK: store i32 123, i32* %P
+; CHECK: ret i32 2
+}
+
+define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp {
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  store i32 1, i32* %a, align 8
+  store i32 2, i32* %b, align 8
+  %p.0 = select i1 %c, i32* %b, i32* %a
+  %r = load i32* %p.0, align 8
+  ret i32 %r
+; CHECK: @test6
+; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1
+; CHECK-NEXT: ret i32 %r
+}
+
+; Verify that the loads happen where the loads are, not where the select is.
+define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp {
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  store i32 1, i32* %a
+  store i32 2, i32* %b
+  %p.0 = select i1 %c, i32* %b, i32* %a
+  
+  store i32 0, i32* %a
+  
+  %r = load i32* %p.0, align 8
+  ret i32 %r
+; CHECK: @test7
+; CHECK-NOT: alloca i32
+; CHECK: %r = select i1 %c, i32 2, i32 0
+; CHECK: ret i32 %r
+}
+
+;; Promote allocs that are PHI'd together by moving the loads.
+define i32 @test8(i32 %x) nounwind readnone ssp {
+; CHECK: @test8
+; CHECK-NOT: load i32
+; CHECK-NOT: store i32
+; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ]
+; CHECK-NEXT: ret i32 %p.0.ld
+entry:
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  store i32 1, i32* %a, align 8
+  store i32 2, i32* %b, align 8
+  %c = icmp eq i32 %x, 0 
+  br i1 %c, label %T, label %Cont
+T:
+  br label %Cont
+Cont:
+  %p.0 = phi i32* [%b, %entry],[%a, %T]
+  %r = load i32* %p.0, align 8
+  ret i32 %r
+}
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index fe55426b24a1..37cb49f539d6 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -87,8 +87,6 @@ define i32 @test5(float %X) {  ;; should turn into bitcast.
 }
 
 
-;; should not turn into <1 x i64> - It is a banned MMX datatype.
-;; rdar://8380055
 define i64 @test6(<2 x float> %X) {
 	%X_addr = alloca <2 x float>
         store <2 x float> %X, <2 x float>* %X_addr
@@ -96,7 +94,7 @@ define i64 @test6(<2 x float> %X) {
 	%tmp = load i64* %P
 	ret i64 %tmp
 ; CHECK: @test6
-; CHECK-NEXT: bitcast <2 x float> %X to i64
-; CHECK-NEXT: ret i64
+; CHECK: bitcast <2 x float> %X to <1 x i64>
+; CHECK: ret i64
 }
 
diff --git a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll b/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
new file mode 100644
index 000000000000..ebacf2fe9a9e
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
@@ -0,0 +1,6 @@
+; RUN: opt %s -simplifycfg -disable-output
+; PR8445
+
+define void @test() {
+      unwind
+}
diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll
index 54e5b14880c0..93b9a276eac4 100644
--- a/test/Transforms/SimplifyCFG/MagicPointer.ll
+++ b/test/Transforms/SimplifyCFG/MagicPointer.ll
@@ -8,7 +8,6 @@
 ; CHECK: i64 2, label
 ; CHECK: i64 3, label
 ; CHECK: i64 4, label
-; CHECK-NOT: br
 ; CHECK: }
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
index a648efd1740e..c28d0bac3759 100644
--- a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
@@ -1,12 +1,14 @@
 ; Test merging of blocks that only have PHI nodes in them
 ;
-; RUN: opt < %s -simplifycfg -S | not grep N:
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 ;
 
 define i32 @test(i1 %a, i1 %b) {
-; <label>:0
+; CHECK: br i1 %a
         br i1 %a, label %M, label %O
 O:              ; preds = %0
+; CHECK: select i1 %b, i32 0, i32 1
+; CHECK-NOT: phi
         br i1 %b, label %N, label %Q
 Q:              ; preds = %O
         br label %N
@@ -15,6 +17,7 @@ N:              ; preds = %Q, %O
         %Wp = phi i32 [ 0, %O ], [ 1, %Q ]              ; <i32> [#uses=1]
         br label %M
 M:              ; preds = %N, %0
+; CHECK: %W = phi i32
         %W = phi i32 [ %Wp, %N ], [ 2, %0 ]             ; <i32> [#uses=1]
         %R = add i32 %W, 1              ; <i32> [#uses=1]
         ret i32 %R
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate.ll b/test/Transforms/SimplifyCFG/PhiEliminate.ll
index 73cf466a4f74..d5ce9a7e6bc8 100644
--- a/test/Transforms/SimplifyCFG/PhiEliminate.ll
+++ b/test/Transforms/SimplifyCFG/PhiEliminate.ll
@@ -11,20 +11,6 @@ declare void @use(i1)
 
 declare void @use.upgrd.1(i32)
 
-define void @test2(i1 %c, i1 %d, i32 %V, i32 %V2) {
-; <label>:0
-        br i1 %d, label %X, label %F
-X:              ; preds = %0
-        br i1 %c, label %T, label %F
-T:              ; preds = %X
-        br label %F
-F:              ; preds = %T, %X, %0
-        %B1 = phi i1 [ true, %0 ], [ false, %T ], [ false, %X ]         ; <i1> [#uses=1]
-        %I7 = phi i32 [ %V, %0 ], [ %V2, %T ], [ %V2, %X ]              ; <i32> [#uses=1]
-        call void @use( i1 %B1 )
-        call void @use.upgrd.1( i32 %I7 )
-        ret void
-}
 
 define void @test(i1 %c, i32 %V, i32 %V2) {
 ; <label>:0
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 7315ff66bd12..052e10667da5 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -3,8 +3,7 @@
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test1() {
-        br label %BB1
-BB1:            ; preds = %0
+        br label %1
         ret void
 ; CHECK: @test1
 ; CHECK-NEXT: ret void
@@ -12,7 +11,6 @@ BB1:            ; preds = %0
 
 define void @test2() {
         ret void
-BB1:            ; No predecessors!
         ret void
 ; CHECK: @test2
 ; CHECK-NEXT: ret void
@@ -20,35 +18,22 @@ BB1:            ; No predecessors!
 }
 
 define void @test3(i1 %T) {
-        br i1 %T, label %BB1, label %BB1
-BB1:            ; preds = %0, %0
+        br i1 %T, label %1, label %1
         ret void
 ; CHECK: @test3
 ; CHECK-NEXT: ret void
 }
 
 
-define void @test4() {
-  br label %return
-return:
-  ret void
-; CHECK: @test4
-; CHECK-NEXT: ret void
-}
-@test4g = global i8* blockaddress(@test4, %return)
-
-
 ; PR5795
 define void @test5(i32 %A) {
   switch i32 %A, label %return [
-    i32 2, label %bb
-    i32 10, label %bb1
+    i32 2, label %1
+    i32 10, label %2
   ]
 
-bb:                                               ; preds = %entry
   ret void
 
-bb1:                                              ; preds = %entry
   ret void
 
 return:                                           ; preds = %entry
diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll
index de4f5b607551..7fb4def5b932 100644
--- a/test/Transforms/SimplifyCFG/indirectbr.ll
+++ b/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -62,3 +62,121 @@ entry:
 BB0:
   ret void
 }
+
+
+; Make sure the blocks in the next few tests aren't trivially removable as
+; successors by taking their addresses.
+
+@anchor = constant [13 x i8*] [
+  i8* blockaddress(@indbrtest3, %L1), i8* blockaddress(@indbrtest3, %L2), i8* blockaddress(@indbrtest3, %L3),
+  i8* blockaddress(@indbrtest4, %L1), i8* blockaddress(@indbrtest4, %L2), i8* blockaddress(@indbrtest4, %L3),
+  i8* blockaddress(@indbrtest5, %L1), i8* blockaddress(@indbrtest5, %L2), i8* blockaddress(@indbrtest5, %L3), i8* blockaddress(@indbrtest5, %L4),
+  i8* blockaddress(@indbrtest6, %L1), i8* blockaddress(@indbrtest6, %L2), i8* blockaddress(@indbrtest6, %L3)
+]
+
+; SimplifyCFG should turn the indirectbr into a conditional branch on the
+; condition of the select.
+
+; CHECK: @indbrtest3
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L2
+; CHECK-NOT: indirectbr
+; CHECK-NOT: br
+; CHECK-NOT: L3:
+define void @indbrtest3(i1 %cond, i8* %address) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest3, %L1), i8* blockaddress(@indbrtest3, %L2)
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+  ret void
+}
+
+; SimplifyCFG should turn the indirectbr into an unconditional branch to the
+; only possible destination.
+; As in @indbrtest1, it should really remove the branch entirely, but it doesn't
+; because it's in the entry block.
+
+; CHECK: @indbrtest4
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %L1
+define void @indbrtest4(i1 %cond) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest4, %L1), i8* blockaddress(@indbrtest4, %L1)
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+  ret void
+}
+
+; SimplifyCFG should turn the indirectbr into an unreachable because neither
+; destination is listed as a successor.
+
+; CHECK: @indbrtest5
+; CHECK-NEXT: entry:
+; CHECK-NEXT: unreachable
+; CHECK-NEXT: }
+define void @indbrtest5(i1 %cond, i8* %anchor) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest5, %L1), i8* blockaddress(@indbrtest5, %L2)
+; This needs to have more than one successor for this test, otherwise it gets
+; replaced with an unconditional branch to the single successor.
+  indirectbr i8* %indirect.goto.dest, [label %L3, label %L4]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+  ret void
+L4:
+  call void @foo()
+
+; This keeps blockaddresses not otherwise listed as successors from being zapped
+; before SimplifyCFG even looks at the indirectbr.
+  indirectbr i8* %anchor, [label %L1, label %L2]
+}
+
+; The same as above, except the selected addresses are equal.
+
+; CHECK: @indbrtest6
+; CHECK-NEXT: entry:
+; CHECK-NEXT: unreachable
+; CHECK-NEXT: }
+define void @indbrtest6(i1 %cond, i8* %anchor) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest6, %L1), i8* blockaddress(@indbrtest6, %L1)
+; This needs to have more than one successor for this test, otherwise it gets
+; replaced with an unconditional branch to the single successor.
+  indirectbr i8* %indirect.goto.dest, [label %L2, label %L3]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+
+; This keeps blockaddresses not otherwise listed as successors from being zapped
+; before SimplifyCFG even looks at the indirectbr.
+  indirectbr i8* %anchor, [label %L1, label %L2]
+}
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
index bbd779beb489..73faa952fba9 100644
--- a/test/Transforms/SimplifyCFG/invoke_unwind.ll
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -9,10 +9,9 @@ define i32 @test1() {
 ; CHECK-NEXT: call void @bar()
 ; CHECK-NEXT: ret i32 0
         invoke void @bar( )
-                        to label %Ok unwind label %Rethrow
-Ok:             ; preds = %0
+                        to label %1 unwind label %Rethrow
         ret i32 0
-Rethrow:                ; preds = %0
+Rethrow:
         unwind
 }
 
diff --git a/test/Transforms/SimplifyCFG/speculate-with-offset.ll b/test/Transforms/SimplifyCFG/speculate-with-offset.ll
new file mode 100644
index 000000000000..a737d5602e84
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-with-offset.ll
@@ -0,0 +1,94 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+; This load is safe to speculate, as it's from a safe offset
+; within an alloca.
+
+; CHECK: @yes
+; CHECK-NOT: br
+
+define void @yes(i1 %c) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 0, i64 3
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+; CHECK: @no0
+; CHECK: br i1 %c
+
+define void @no0(i1 %c) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 0, i64 4
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+; CHECK: @no1
+; CHECK: br i1 %c
+
+define void @no1(i1 %c, i64 %n) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 0, i64 %n
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+; CHECK: @no2
+; CHECK: br i1 %c
+
+define void @no2(i1 %c, i64 %n) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 1, i64 0
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+declare void @frob(i64** nocapture %p)
diff --git a/test/Transforms/SimplifyCFG/switch-to-icmp.ll b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
new file mode 100644
index 000000000000..414f8475bc23
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+define zeroext i1 @test1(i32 %x) nounwind readnone ssp noredzone {
+entry:
+ switch i32 %x, label %lor.rhs [
+   i32 2, label %lor.end
+   i32 1, label %lor.end
+   i32 3, label %lor.end
+ ]
+
+lor.rhs:
+ br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
+ ret i1 %0
+
+; CHECK: @test1
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
+}
+
+define zeroext i1 @test2(i32 %x) nounwind readnone ssp noredzone {
+entry:
+ switch i32 %x, label %lor.rhs [
+   i32 0, label %lor.end
+   i32 1, label %lor.end
+ ]
+
+lor.rhs:
+ br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ]
+ ret i1 %0
+
+; CHECK: @test2
+; CHECK: %switch = icmp ult i32 %x, 2
+}
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 9b3aaf7f20de..546cc75f2973 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 declare void @foo1()
 
@@ -15,6 +15,11 @@ T:              ; preds = %0
 F:              ; preds = %0
         call void @foo2( )
         ret void
+; CHECK: @test1
+; CHECK:  switch i32 %V, label %F [
+; CHECK:    i32 17, label %T
+; CHECK:    i32 4, label %T
+; CHECK:  ]
 }
 
 define void @test2(i32 %V) {
@@ -28,6 +33,11 @@ T:              ; preds = %0
 F:              ; preds = %0
         call void @foo2( )
         ret void
+; CHECK: @test2
+; CHECK:  switch i32 %V, label %T [
+; CHECK:    i32 17, label %F
+; CHECK:    i32 4, label %F
+; CHECK:  ]
 }
 
 define void @test3(i32 %V) {
@@ -42,6 +52,430 @@ T:              ; preds = %N, %0
 F:              ; preds = %N
         call void @foo2( )
         ret void
+
+; CHECK: @test3
+; CHECK: switch i32 %V, label %F [
+; CHECK:     i32 4, label %T
+; CHECK:     i32 17, label %T
+; CHECK:   ]
 }
 
 
+
+define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  %cmp = icmp eq i8 %c, 62
+  br i1 %cmp, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp4 = icmp eq i8 %c, 34
+  br i1 %cmp4, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %lor.lhs.false
+  %cmp8 = icmp eq i8 %c, 92
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %lor.lhs.false, %entry
+  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ]
+  %lor.ext = zext i1 %0 to i32
+  ret i32 %lor.ext
+  
+; CHECK: @test4
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
+}
+
+define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  switch i8 %c, label %lor.rhs [
+    i8 62, label %lor.end
+    i8 34, label %lor.end
+    i8 92, label %lor.end
+  ]
+
+lor.rhs:                                          ; preds = %entry
+  %V = icmp eq i8 %c, 92
+  br label %lor.end
+
+lor.end:                                          ; preds = %entry, %entry, %entry, %lor.rhs
+  %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
+  %lor.ext = zext i1 %0 to i32
+  ret i32 %lor.ext
+; CHECK: @test5
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
+}
+
+
+define i1 @test6({ i32, i32 }* %I) {
+entry:
+        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+shortcirc_next.0:               ; preds = %entry
+        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+shortcirc_next.1:               ; preds = %shortcirc_next.0
+        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+shortcirc_next.2:               ; preds = %shortcirc_next.1
+        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+shortcirc_next.3:               ; preds = %shortcirc_next.2
+        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+shortcirc_next.4:               ; preds = %shortcirc_next.3
+        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+        br label %UnifiedReturnBlock
+shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
+        br label %UnifiedReturnBlock
+UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
+        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+        ret i1 %UnifiedRetVal
+        
+; CHECK: @test6
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
+}
+
+define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i32 %x, 32
+  %cmp4 = icmp eq i8 %c, 97
+  %or.cond = or i1 %cmp, %cmp4
+  %cmp9 = icmp eq i8 %c, 99
+  %or.cond11 = or i1 %or.cond, %cmp9
+  br i1 %or.cond11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo1() nounwind noredzone
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+  
+; CHECK: @test7
+; CHECK:   %cmp = icmp ult i32 %x, 32
+; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+}
+
+define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
+entry:
+  br i1 %C, label %N, label %if.then
+N:
+  %cmp = icmp ult i32 %x, 32
+  %cmp4 = icmp eq i8 %c, 97
+  %or.cond = or i1 %cmp, %cmp4
+  %cmp9 = icmp eq i8 %c, 99
+  %or.cond11 = or i1 %or.cond, %cmp9
+  br i1 %or.cond11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %A = phi i32 [0, %entry], [42, %N]
+  tail call void @foo1() nounwind noredzone
+  ret i32 %A
+
+if.end:                                           ; preds = %entry
+  ret i32 0
+  
+; CHECK: @test8
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
+}
+
+;; This is "Example 7" from http://blog.regehr.org/archives/320
+define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i8 %c, 33
+  br i1 %cmp, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp4 = icmp eq i8 %c, 46
+  br i1 %cmp4, label %lor.end, label %lor.lhs.false6
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false
+  %cmp9 = icmp eq i8 %c, 44
+  br i1 %cmp9, label %lor.end, label %lor.lhs.false11
+
+lor.lhs.false11:                                  ; preds = %lor.lhs.false6
+  %cmp14 = icmp eq i8 %c, 58
+  br i1 %cmp14, label %lor.end, label %lor.lhs.false16
+
+lor.lhs.false16:                                  ; preds = %lor.lhs.false11
+  %cmp19 = icmp eq i8 %c, 59
+  br i1 %cmp19, label %lor.end, label %lor.lhs.false21
+
+lor.lhs.false21:                                  ; preds = %lor.lhs.false16
+  %cmp24 = icmp eq i8 %c, 60
+  br i1 %cmp24, label %lor.end, label %lor.lhs.false26
+
+lor.lhs.false26:                                  ; preds = %lor.lhs.false21
+  %cmp29 = icmp eq i8 %c, 62
+  br i1 %cmp29, label %lor.end, label %lor.lhs.false31
+
+lor.lhs.false31:                                  ; preds = %lor.lhs.false26
+  %cmp34 = icmp eq i8 %c, 34
+  br i1 %cmp34, label %lor.end, label %lor.lhs.false36
+
+lor.lhs.false36:                                  ; preds = %lor.lhs.false31
+  %cmp39 = icmp eq i8 %c, 92
+  br i1 %cmp39, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %lor.lhs.false36
+  %cmp43 = icmp eq i8 %c, 39
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %lor.lhs.false36, %lor.lhs.false31, %lor.lhs.false26, %lor.lhs.false21, %lor.lhs.false16, %lor.lhs.false11, %lor.lhs.false6, %lor.lhs.false, %entry
+  %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ]
+  %conv46 = zext i1 %0 to i32
+  ret i32 %conv46
+  
+; CHECK: @test9
+; CHECK:   %cmp = icmp ult i8 %c, 33
+; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
+
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %lor.rhs [
+; CHECK:     i8 92, label %lor.end
+; CHECK:     i8 62, label %lor.end
+; CHECK:     i8 60, label %lor.end
+; CHECK:     i8 59, label %lor.end
+; CHECK:     i8 58, label %lor.end
+; CHECK:     i8 46, label %lor.end
+; CHECK:     i8 44, label %lor.end
+; CHECK:     i8 34, label %lor.end
+; CHECK:     i8 39, label %lor.end
+; CHECK:   ]
+}
+
+define i32 @test10(i32 %mode, i1 %Cond) {
+  %A = icmp ne i32 %mode, 0
+  %B = icmp ne i32 %mode, 51
+  %C = and i1 %A, %B
+  %D = and i1 %C, %Cond
+  br i1 %D, label %T, label %F
+T:
+  ret i32 123
+F:
+  ret i32 324
+
+; CHECK: @test10
+; CHECK:  br i1 %Cond, label %switch.early.test, label %F
+; CHECK:switch.early.test:
+; CHECK:  switch i32 %mode, label %T [
+; CHECK:    i32 51, label %F
+; CHECK:    i32 0, label %F
+; CHECK:  ]
+}
+
+; PR8780
+define i32 @test11(i32 %bar) nounwind {
+entry:
+  %cmp = icmp eq i32 %bar, 4
+  %cmp2 = icmp eq i32 %bar, 35
+  %or.cond = or i1 %cmp, %cmp2
+  %cmp5 = icmp eq i32 %bar, 53
+  %or.cond1 = or i1 %or.cond, %cmp5
+  %cmp8 = icmp eq i32 %bar, 24
+  %or.cond2 = or i1 %or.cond1, %cmp8
+  %cmp11 = icmp eq i32 %bar, 23
+  %or.cond3 = or i1 %or.cond2, %cmp11
+  %cmp14 = icmp eq i32 %bar, 55
+  %or.cond4 = or i1 %or.cond3, %cmp14
+  %cmp17 = icmp eq i32 %bar, 12
+  %or.cond5 = or i1 %or.cond4, %cmp17
+  %cmp20 = icmp eq i32 %bar, 35
+  %or.cond6 = or i1 %or.cond5, %cmp20
+  br i1 %or.cond6, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
+  ret i32 %retval.0
+
+; CHECK: @test11
+; CHECK: switch i32 %bar, label %if.end [
+; CHECK:   i32 55, label %return
+; CHECK:   i32 53, label %return
+; CHECK:   i32 35, label %return
+; CHECK:   i32 24, label %return
+; CHECK:   i32 23, label %return
+; CHECK:   i32 12, label %return
+; CHECK:   i32 4, label %return
+; CHECK: ]
+}
+
+define void @test12() nounwind {
+entry:
+  br label %bb49.us.us
+
+bb49.us.us:
+  %A = icmp eq i32 undef, undef
+  br i1 %A, label %bb55.us.us, label %malformed
+
+bb48.us.us:
+  %B = icmp ugt i32 undef, undef
+  br i1 %B, label %bb55.us.us, label %bb49.us.us
+
+bb55.us.us:
+  br label %bb48.us.us
+
+malformed:
+  ret void
+; CHECK: @test12
+
+}
+
+; test13 - handle switch formation with ult.
+define void @test13(i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i32 %x, 2
+  br i1 %cmp, label %if.then, label %lor.lhs.false3
+
+lor.lhs.false3:                                   ; preds = %lor.lhs.false
+  %cmp5 = icmp eq i32 %x, 3
+  br i1 %cmp5, label %if.then, label %lor.lhs.false6
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false3
+  %cmp8 = icmp eq i32 %x, 4
+  br i1 %cmp8, label %if.then, label %lor.lhs.false9
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false6
+  %cmp11 = icmp eq i32 %x, 6
+  br i1 %cmp11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false9
+  ret void
+; CHECK: @test13
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
+}
+
+; test14 - handle switch formation with ult.
+define void @test14(i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ugt i32 %x, 2
+  br i1 %cmp, label %lor.lhs.false3, label %if.then
+
+lor.lhs.false3:                                   ; preds = %lor.lhs.false
+  %cmp5 = icmp ne i32 %x, 3
+  br i1 %cmp5, label %lor.lhs.false6, label %if.then
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false3
+  %cmp8 = icmp ne i32 %x, 4
+  br i1 %cmp8, label %lor.lhs.false9, label %if.then
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false6
+  %cmp11 = icmp ne i32 %x, 6
+  br i1 %cmp11, label %if.end, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false9
+  ret void
+; CHECK: @test14
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
+}
+
+; Don't crash on ginormous ranges.
+define void @test15(i128 %x) nounwind {
+  %cmp = icmp ugt i128 %x, 2
+  br i1 %cmp, label %if.end, label %lor.false
+
+lor.false:
+  %cmp2 = icmp ne i128 %x, 100000000000000000000
+  br i1 %cmp2, label %if.end, label %if.then
+
+if.then:
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test15
+; CHECK-NOT: switch
+; CHECK: ret void
+}
+
+; PR8675
+; rdar://5134905
+define zeroext i1 @test16(i32 %x) nounwind {
+entry:
+; CHECK: @test16
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
+  %cmp.i = icmp eq i32 %x, 1
+  br i1 %cmp.i, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:
+  %cmp.i2 = icmp eq i32 %x, 2
+  br i1 %cmp.i2, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %cmp.i1 = icmp eq i32 %x, 3
+  br label %lor.end
+
+lor.end:
+  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
+  ret i1 %0
+}
+
+; Check that we don't turn an icmp into a switch where it's not useful.
+define void @test17(i32 %x, i32 %y) {
+  %cmp = icmp ult i32 %x, 3
+  %switch = icmp ult i32 %y, 2
+  %or.cond775 = or i1 %cmp, %switch
+  br i1 %or.cond775, label %lor.lhs.false8, label %return
+
+lor.lhs.false8:
+  tail call void @foo1()
+  ret void
+
+return:
+  ret void
+
+; CHECK: @test17
+; CHECK-NOT: switch.early.test
+; CHECK-NOT: switch i32
+; CHECK: ret void
+}
+
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
index f1c820ec43be..2723ec608e1d 100644
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
@@ -13,7 +12,10 @@
 
 declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
 
-define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
+define i1 @t({ i32, i32 }* %I) {
+; CHECK: @t
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
 entry:
         %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
         %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
diff --git a/test/Transforms/SimplifyCFG/switch_formation.ll b/test/Transforms/SimplifyCFG/switch_formation.ll
deleted file mode 100644
index 787904a5d4f8..000000000000
--- a/test/Transforms/SimplifyCFG/switch_formation.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
-entry:
-        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
-        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
-shortcirc_next.0:               ; preds = %entry
-        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
-shortcirc_next.1:               ; preds = %shortcirc_next.0
-        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
-shortcirc_next.2:               ; preds = %shortcirc_next.1
-        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
-shortcirc_next.3:               ; preds = %shortcirc_next.2
-        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
-shortcirc_next.4:               ; preds = %shortcirc_next.3
-        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-        br label %UnifiedReturnBlock
-shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-        br label %UnifiedReturnBlock
-UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-        ret i1 %UnifiedRetVal
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
index cb9819cacea6..f8a0c88d2f89 100644
--- a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -simplify-libcalls -S > %t
 ; RUN: grep nocapture %t | count 2
 ; RUN: grep null %t | grep nocapture | count 1
-; RUN: grep null %t | grep call | grep readonly | count 1
+; RUN: grep null %t | grep call | not grep readonly
 
 ; Test that we add nocapture to the declaration, and to the second call only.
 
diff --git a/test/Transforms/SimplifyLibCalls/FPuts.ll b/test/Transforms/SimplifyLibCalls/FPuts.ll
new file mode 100644
index 000000000000..1f72ede79614
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/FPuts.ll
@@ -0,0 +1,29 @@
+; Test that the FPutsOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*fputs}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=1]
+@empty = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@len1 = constant [2 x i8] c"A\00"		; <[2 x i8]*> [#uses=1]
+@long = constant [7 x i8] c"hello\0A\00"		; <[7 x i8]*> [#uses=1]
+
+declare i32 @fputs(i8*, %struct._IO_FILE*)
+
+define i32 @main() {
+entry:
+	%out = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=3]
+	%s1 = getelementptr [1 x i8]* @empty, i32 0, i32 0		; <i8*> [#uses=1]
+	%s2 = getelementptr [2 x i8]* @len1, i32 0, i32 0		; <i8*> [#uses=1]
+	%s3 = getelementptr [7 x i8]* @long, i32 0, i32 0		; <i8*> [#uses=1]
+	%a = call i32 @fputs( i8* %s1, %struct._IO_FILE* %out )		; <i32> [#uses=0]
+	%b = call i32 @fputs( i8* %s2, %struct._IO_FILE* %out )		; <i32> [#uses=0]
+	%c = call i32 @fputs( i8* %s3, %struct._IO_FILE* %out )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
index 858a09c96f6f..caea311ba142 100644
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ b/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -1,21 +1,36 @@
-; RUN: opt < %s -simplify-libcalls -S | grep putchar
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*printf}
+; RUN: opt < %s -simplify-libcalls -S -o %t
+; RUN: FileCheck < %t %s
 
 @str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
 @str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
 
-define void @foo() {
+declare i32 @printf(i8*, ...)
+
+; CHECK: define void @f0
+; CHECK-NOT: printf
+; CHECK: }
+define void @f0() {
 entry:
         %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str, i32 0, i32 0) )         ; <i32> [#uses=0]
         ret void
 }
 
-declare i32 @printf(i8*, ...)
-
-define void @bar() {
+; CHECK: define void @f1
+; CHECK-NOT: printf
+; CHECK: }
+define void @f1() {
 entry:
         %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([2 x i8]* @str1, i32 0, i32 0) )         ; <i32> [#uses=0]
         ret void
 }
 
+; Verify that we don't turn this into a putchar call (thus changing the return
+; value).
+;
+; CHECK: define i32 @f2
+; CHECK: printf
+; CHECK: }
+define i32 @f2() {
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @str1, i32 0, i32 0))
+  ret i32 %call
+}
diff --git a/test/Transforms/SimplifyLibCalls/Puts.ll b/test/Transforms/SimplifyLibCalls/Puts.ll
index 47a33c2d98d5..48431434cc61 100644
--- a/test/Transforms/SimplifyLibCalls/Puts.ll
+++ b/test/Transforms/SimplifyLibCalls/Puts.ll
@@ -1,29 +1,15 @@
-; Test that the PutsCatOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*fputs}
+; Test that the PutsOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
 target datalayout = "-p:64:64:64"
 
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=1]
-@empty = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@len1 = constant [2 x i8] c"A\00"		; <[2 x i8]*> [#uses=1]
-@long = constant [7 x i8] c"hello\0A\00"		; <[7 x i8]*> [#uses=1]
+@.str = private constant [1 x i8] zeroinitializer
 
-declare i32 @fputs(i8*, %struct._IO_FILE*)
+declare i32 @puts(i8*)
 
-define i32 @main() {
+define void @foo() {
 entry:
-	%out = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=3]
-	%s1 = getelementptr [1 x i8]* @empty, i32 0, i32 0		; <i8*> [#uses=1]
-	%s2 = getelementptr [2 x i8]* @len1, i32 0, i32 0		; <i8*> [#uses=1]
-	%s3 = getelementptr [7 x i8]* @long, i32 0, i32 0		; <i8*> [#uses=1]
-	%a = call i32 @fputs( i8* %s1, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%b = call i32 @fputs( i8* %s2, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%c = call i32 @fputs( i8* %s3, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	ret i32 0
+; CHECK: call i32 @putchar(i32 10)
+  %call = call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0))
+  ret void
 }
-
diff --git a/test/Transforms/SimplifyLibCalls/StrChr.ll b/test/Transforms/SimplifyLibCalls/StrChr.ll
index 50ca0a6edbf8..eaabeb2feb8f 100644
--- a/test/Transforms/SimplifyLibCalls/StrChr.ll
+++ b/test/Transforms/SimplifyLibCalls/StrChr.ll
@@ -1,26 +1,26 @@
 ; Test that the StrChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep {call.*@strchr}
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
 ; This transformation requires the pointer size, as it assumes that size_t is
 ; the size of a pointer.
 target datalayout = "-p:64:64:64"
 
-@hello = constant [14 x i8] c"hello world\5Cn\00"		; <[14 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
 
 declare i8* @strchr(i8*, i32)
 
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=2]
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%world = call i8* @strchr( i8* %hello_p, i32 119 )		; <i8*> [#uses=1]
-	%ignore = call i8* @strchr( i8* %null_p, i32 119 )		; <i8*> [#uses=0]
-	%len = call i32 @puts( i8* %world )		; <i32> [#uses=1]
-	%index = add i32 %len, 112		; <i32> [#uses=2]
-	%result = call i8* @strchr( i8* %hello_p, i32 %index )		; <i8*> [#uses=0]
+define i32 @foo(i32 %index) {
+	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%world = call i8* @strchr(i8* %hello_p, i32 119)
+; CHECK: getelementptr i8* %hello_p, i64 6
+	%ignore = call i8* @strchr(i8* %null_p, i32 119)
+; CHECK-NOT: call i8* strchr
+	%null = call i8* @strchr(i8* %hello_p, i32 0)
+; CHECK: getelementptr i8* %hello_p, i64 13
+	%result = call i8* @strchr(i8* %hello_p, i32 %index)
+; CHECK: call i8* @memchr(i8* %hello_p, i32 %index, i64 14)
 	ret i32 %index
 }
 
diff --git a/test/Transforms/SimplifyLibCalls/StrPBrk.ll b/test/Transforms/SimplifyLibCalls/StrPBrk.ll
new file mode 100644
index 000000000000..29c3b7477b47
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/StrPBrk.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@hello = constant [12 x i8] c"hello world\00"
+@w = constant [2 x i8] c"w\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strpbrk(i8*, i8*)
+
+define void @test(i8* %s1, i8* %s2) {
+	%hello_p = getelementptr [12 x i8]* @hello, i32 0, i32 0
+	%w_p = getelementptr [2 x i8]* @w, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%test1 = call i8* @strpbrk(i8* %null_p, i8* %s2)
+	%test2 = call i8* @strpbrk(i8* %s1, i8* %null_p)
+; CHECK-NOT: call i8* @strpbrk
+	%test3 = call i8* @strpbrk(i8* %s1, i8* %w_p)
+; CHECK: call i8* @strchr(i8* %s1, i32 119)
+	%test4 = call i8* @strpbrk(i8* %hello_p, i8* %w_p)
+; CHECK: getelementptr i8* %hello_p, i64 6
+	%test5 = call i8* @strpbrk(i8* %s1, i8* %s2)
+; CHECK: call i8* @strpbrk(i8* %s1, i8* %s2)
+	ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/StrRChr.ll b/test/Transforms/SimplifyLibCalls/StrRChr.ll
new file mode 100644
index 000000000000..2259fc0289fb
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/StrRChr.ll
@@ -0,0 +1,23 @@
+; Test that the StrRChrOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strrchr(i8*, i32)
+
+define void @foo(i8* %bar) {
+	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%world = call i8* @strrchr(i8* %hello_p, i32 119)
+; CHECK: getelementptr i8* %hello_p, i64 6
+	%ignore = call i8* @strrchr(i8* %null_p, i32 119)
+; CHECK-NOT: call i8* strrchr
+	%null = call i8* @strrchr(i8* %hello_p, i32 0)
+; CHECK: getelementptr i8* %hello_p, i64 13
+	%strchr = call i8* @strrchr(i8* %bar, i32 0)
+; CHECK: call i8* @strchr(i8* %bar, i32 0)
+	ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/StrSpn.ll b/test/Transforms/SimplifyLibCalls/StrSpn.ll
new file mode 100644
index 000000000000..800c19088337
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/StrSpn.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@abcba = constant [6 x i8] c"abcba\00"
+@abc = constant [4 x i8] c"abc\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i64 @strspn(i8*, i8*)
+
+define i64 @testspn(i8* %s1, i8* %s2) {
+  	%abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0
+	%abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%test1 = call i64 @strspn(i8* %s1, i8* %null_p)
+	%test2 = call i64 @strspn(i8* %null_p, i8* %s2)
+	%test3 = call i64 @strspn(i8* %abcba_p, i8* %abc_p)
+; CHECK-NOT: call i64 @strspn
+	%test4 = call i64 @strspn(i8* %s1, i8* %s2)
+; CHECK: call i64 @strspn(i8* %s1, i8* %s2)
+	ret i64 %test3
+; CHECK: ret i64 5
+}
+
+declare i64 @strcspn(i8*, i8*)
+
+define i64 @testcspn(i8* %s1, i8* %s2) {
+  	%abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0
+	%abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%test1 = call i64 @strcspn(i8* %s1, i8* %null_p)
+; CHECK: call i64 @strlen(i8* %s1)
+	%test2 = call i64 @strcspn(i8* %null_p, i8* %s2)
+	%test3 = call i64 @strcspn(i8* %abcba_p, i8* %abc_p)
+; CHECK-NOT: call i64 @strcspn
+	%test4 = call i64 @strcspn(i8* %s1, i8* %s2)
+; CHECK: call i64 @strcspn(i8* %s1, i8* %s2)
+        %add0 = add i64 %test1, %test3
+; CHECK: add i64 %{{.+}}, 0
+	ret i64 %add0
+}
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
index a7af5a968639..8780e32e0a0c 100644
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ b/test/Transforms/SimplifyLibCalls/floor.ll
@@ -29,6 +29,8 @@ define float @test_ceil(float %C) {
 	ret float %F
 }
 
+; PR8466
+; XFAIL: win32
 define float @test_nearbyint(float %C) {
 	%D = fpext float %C to double		; <double> [#uses=1]
 	; --> nearbyintf
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index beb9481c840f..54b7f1369de4 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -sink -S | FileCheck %s
+; RUN: opt < %s -basicaa -sink -S | FileCheck %s
 
 @A = external global i32
 @B = external global i32
diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll
new file mode 100644
index 000000000000..93638804f92e
--- /dev/null
+++ b/test/Transforms/TailCallElim/dup_tail.ll
@@ -0,0 +1,23 @@
+; Duplicate the return into if.end to enable TCE.
+; RUN: opt %s -tailcallelim -stats -disable-output |& grep {Number of return duplicated}
+
+define i32 @fib(i32 %n) nounwind ssp {
+entry:
+  %cmp = icmp slt i32 %n, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %sub = add nsw i32 %n, -2
+  %call = call i32 @fib(i32 %sub)
+  %sub3 = add nsw i32 %n, -1
+  %call4 = call i32 @fib(i32 %sub3)
+  %add = add nsw i32 %call, %call4
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ %add, %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 5fe07324a1ba..35092964a5cd 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -21,15 +21,21 @@ if llvm_obj_root is not None:
 llvm_build_mode = getattr(config, 'llvm_build_mode', "Debug")
 config.test_format = lit.formats.GoogleTest(llvm_build_mode, 'Tests')
 
+# Propagate the temp directory. Windows requires this because it uses \Windows\
+# if none of these are present.
+if 'TMP' in os.environ:
+    config.environment['TMP'] = os.environ['TMP']
+if 'TEMP' in os.environ:
+    config.environment['TEMP'] = os.environ['TEMP']
+
 ###
 
 # If necessary, point the dynamic loader at libLLVM.so.
 if config.enable_shared:
-    libdir = os.path.join(config.llvm_obj_root, config.llvm_build_mode, 'lib')
     shlibpath = config.environment.get(config.shlibpath_var,'')
     if shlibpath:
-        shlibpath = ':' + shlibpath
-    shlibpath = libdir + shlibpath
+        shlibpath = os.pathsep + shlibpath
+    shlibpath = config.shlibdir + shlibpath
     config.environment[config.shlibpath_var] = shlibpath
 
 # Check that the object root is known.
diff --git a/test/Unit/lit.site.cfg.in b/test/Unit/lit.site.cfg.in
index 51b5bc416f69..9643507b1640 100644
--- a/test/Unit/lit.site.cfg.in
+++ b/test/Unit/lit.site.cfg.in
@@ -6,7 +6,17 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
 config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.llvm_build_mode = "@LLVM_BUILD_MODE@"
 config.enable_shared = @ENABLE_SHARED@
+config.shlibdir = "@SHLIBDIR@"
 config.shlibpath_var = "@SHLIBPATH_VAR@"
 
+# Support substitution of the tools_dir and build_mode with user parameters.
+# This is used when we can't determine the tool dir at configuration time.
+try:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+    config.llvm_build_mode = config.llvm_build_mode % lit.params
+except KeyError,e:
+    key, = e.args
+    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
 # Let the main config do the real work.
 lit.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
diff --git a/test/lib/llvm.exp b/test/lib/llvm.exp
index 19a27296044b..d92503a37337 100644
--- a/test/lib/llvm.exp
+++ b/test/lib/llvm.exp
@@ -47,7 +47,7 @@ proc execOneLine { test PRS outcome lineno line } {
 # cases.
 proc substitute { line test tmpFile } {
   global srcroot objroot srcdir objdir subdir target_triplet
-  global llvmgcc llvmgxx ocamlopt
+  global llvmgcc llvmgxx emitir ocamlopt
   global gccpath gxxpath compile_c compile_cxx link shlibext llvmlibsdir
   global llvmdsymutil valgrind grep gas bugpoint_topts
   set path [file join $srcdir $subdir]
@@ -60,9 +60,9 @@ proc substitute { line test tmpFile } {
   #replace %llvmgcc_only with actual path to llvmgcc
   regsub -all {%llvmgcc_only} $new_line "$llvmgcc" new_line
   #replace %llvmgcc with actual path to llvmgcc
-  regsub -all {%llvmgcc} $new_line "$llvmgcc -emit-llvm -w" new_line
+  regsub -all {%llvmgcc} $new_line "$llvmgcc $emitir -w" new_line
   #replace %llvmgxx with actual path to llvmg++
-  regsub -all {%llvmgxx} $new_line "$llvmgxx -emit-llvm -w" new_line
+  regsub -all {%llvmgxx} $new_line "$llvmgxx $emitir -w" new_line
   #replace %compile_cxx with C++ compilation command
   regsub -all {%compile_cxx} $new_line "$compile_cxx" new_line
   #replace %compile_c with C compilation command
diff --git a/test/lit.cfg b/test/lit.cfg
index f15777c99912..21b0a48be9f1 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -3,6 +3,8 @@
 # Configuration file for the 'lit' test runner.
 
 import os
+import sys
+import re
 
 # name: The name of this test suite.
 config.name = 'LLVM'
@@ -17,6 +19,18 @@ config.suffixes = []
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
 
+# Tweak PATH for Win32
+if sys.platform in ['win32']:
+    # Seek sane tools in directories and set to $PATH.
+    path = getattr(config, 'lit_tools_dir', None)
+    path = lit.getToolsPath(path,
+                            config.environment['PATH'],
+                            ['cmp.exe', 'grep.exe', 'sed.exe'])
+    if path is not None:
+        path = os.path.pathsep.join((path,
+                                     config.environment['PATH']))
+        config.environment['PATH'] = path
+
 # test_exec_root: The root path where tests should be run.
 llvm_obj_root = getattr(config, 'llvm_obj_root', None)
 if llvm_obj_root is not None:
@@ -25,6 +39,18 @@ if llvm_obj_root is not None:
 # Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
 # dir (if available).
 if llvm_obj_root is not None:
+    # Include llvm-gcc first, as the llvm-gcc binaryies will not appear
+    # neither in the tools nor in the scripts dir. However it might be
+    # possible, that some old llvm tools are in the llvm-gcc dir. Adding
+    # llvm-gcc dir first ensures, that those will always be overwritten
+    # by the new tools in llvm_tools_dir. So now outdated tools are used
+      # for testing
+    llvmgcc_dir = getattr(config, 'llvmgcc_dir', None)
+    if llvmgcc_dir:
+        path = os.path.pathsep.join((os.path.join(llvmgcc_dir, 'bin'),
+                                     config.environment['PATH']))
+        config.environment['PATH'] = path
+
     llvm_src_root = getattr(config, 'llvm_src_root', None)
     if not llvm_src_root:
         lit.fatal('No LLVM source root set!')
@@ -39,28 +65,29 @@ if llvm_obj_root is not None:
     path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
     config.environment['PATH'] = path
 
-    llvmgcc_dir = getattr(config, 'llvmgcc_dir', None)
-    if llvmgcc_dir:
-        path = os.path.pathsep.join((os.path.join(llvmgcc_dir, 'bin'),
-                                     config.environment['PATH']))
-        config.environment['PATH'] = path
-
-# Propogate 'HOME' through the environment.
+# Propagate 'HOME' through the environment.
 if 'HOME' in os.environ:
     config.environment['HOME'] = os.environ['HOME']
 
-# Propogate 'INCLUDE' through the environment.
+# Propagate 'INCLUDE' through the environment.
 if 'INCLUDE' in os.environ:
     config.environment['INCLUDE'] = os.environ['INCLUDE']
 
-# Propogate 'LIB' through the environment.
+# Propagate 'LIB' through the environment.
 if 'LIB' in os.environ:
     config.environment['LIB'] = os.environ['LIB']
 
-# Propogate LLVM_SRC_ROOT into the environment.
+# Propagate the temp directory. Windows requires this because it uses \Windows\
+# if none of these are present.
+if 'TMP' in os.environ:
+    config.environment['TMP'] = os.environ['TMP']
+if 'TEMP' in os.environ:
+    config.environment['TEMP'] = os.environ['TEMP']
+
+# Propagate LLVM_SRC_ROOT into the environment.
 config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
 
-# Propogate PYTHON_EXEUTABLE into the environment
+# Propagate PYTHON_EXECUTABLE into the environment
 config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable',
                                                   '')
 
@@ -125,12 +152,13 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
 
 # Add substitutions.
 config.substitutions.append(('%llvmgcc_only', site_exp['llvmgcc']))
-for sub in ['llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
+for sub in ['llvmgcc', 'llvmgxx', 'emitir', 'compile_cxx', 'compile_c',
             'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'llvmshlibdir',
             'bugpoint_topts']:
     if sub in ('llvmgcc', 'llvmgxx'):
         config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' -emit-llvm -w'))
+                                     site_exp[sub] + ' %emitir -w'))
     # FIXME: This is a hack to avoid LLVMC tests failing due to a clang driver
     #        warning when passing in "-fexceptions -fno-exceptions".
     elif sub == 'compile_cxx':
@@ -139,6 +167,45 @@ for sub in ['llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
     else:
         config.substitutions.append(('%' + sub, site_exp[sub]))
 
+# For each occurrence of an llvm tool name as its own word, replace it
+# with the full path to the build directory holding that tool.  This
+# ensures that we are testing the tools just built and not some random
+# tools that might happen to be in the user's PATH.  Thus this list
+# includes every tool placed in $(LLVM_OBJ_ROOT)/$(BuildMode)/bin
+# (llvm_tools_dir in lit parlance).
+                # Don't match 'bugpoint-' or 'clang-'.
+                                        # Don't match '/clang'.
+for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/)\bclang\b(?!-)",
+                r"\bedis\b",            r"\bgold\b",
+                r"\bllc\b",             r"\blli\b",
+                r"\bllvm-ar\b",         r"\bllvm-as\b",
+                r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
+                r"\bllvm-diff\b",       r"\bllvm-dis\b",
+                r"\bllvm-extract\b",    r"\bllvm-ld\b",
+                r"\bllvm-link\b",       r"\bllvm-mc\b",
+                r"\bllvm-nm\b",         r"\bllvm-prof\b",
+                r"\bllvm-ranlib\b",     r"\bllvm-shlib\b",
+                r"\bllvm-stub\b",       r"\bllvm2cpp\b",
+                # Don't match '-llvmc'.
+                r"(?<!-)\bllvmc\b",     r"\blto\b",
+                                        # Don't match '.opt', '-opt',
+                                        # '^opt' or '/opt'.
+                r"\bmacho-dump\b",      r"(?<!\.|-|\^|/)\bopt\b",
+                r"\btblgen\b",          r"\bFileCheck\b",
+                r"\bFileUpdate\b",      r"\bc-index-test\b",
+                r"\bfpcmp\b",           r"\bllvm-PerfectShuffle\b",
+                # Handle these specially as they are strings searched
+                # for during testing.
+                r"\| \bcount\b",         r"\| \bnot\b"]:
+    # Extract the tool name from the pattern.  This relies on the tool
+    # name being surrounded by \b word match operators.  If the
+    # pattern starts with "| ", include it in the string to be
+    # substituted.
+    substitution = re.sub(r"^(\\)?((\| )?)\W+b([0-9A-Za-z-_]+)\\b\W*$",
+                          r"\2" + llvm_tools_dir + "/" + r"\4",
+                          pattern)
+    config.substitutions.append((pattern, substitution))
+
 excludes = []
 
 # Provide target_triple for use in XFAIL and XTARGET.
@@ -214,3 +281,19 @@ def on_clone(parent, cfg, for_path):
     lit.error('unable to understand %r:\n%s' % (libPath, lib))
 
 config.on_clone = on_clone
+
+### Features
+
+# Shell execution
+if sys.platform not in ['win32'] or lit.getBashPath() != '':
+    config.available_features.add('shell')
+
+# Loadable module
+# FIXME: This should be supplied by Makefile or autoconf.
+if sys.platform in ['win32', 'cygwin']:
+    loadable_module = (config.enable_shared == 1)
+else:
+    loadable_module = True
+
+if loadable_module:
+    config.available_features.add('loadable_module')
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 79b2c602a07c..3588aa6245d7 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -4,7 +4,17 @@ config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
 config.llvmgcc_dir = "@LLVMGCCDIR@"
+config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
+config.enable_shared = @ENABLE_SHARED@
+
+# Support substitution of the tools_dir with user parameters. This is
+# used when we can't determine the tool dir at configuration time.
+try:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+except KeyError,e:
+    key, = e.args
+    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
 
 # Let the main config do the real work.
 lit.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
diff --git a/test/site.exp.in b/test/site.exp.in
index c760c2c1992c..277d54995f6b 100644
--- a/test/site.exp.in
+++ b/test/site.exp.in
@@ -5,6 +5,7 @@ set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
 set llvmgcc_langs "@LLVMGCC_LANGS@"
 set llvmtoolsdir "@LLVM_TOOLS_DIR@"
 set llvmlibsdir "@LLVM_LIBS_DIR@"
+set llvmshlibdir "@SHLIBDIR@"
 set llvm_bindings "@LLVM_BINDINGS@"
 set srcroot "@LLVM_SOURCE_DIR@"
 set objroot "@LLVM_BINARY_DIR@"
@@ -24,3 +25,4 @@ set valgrind "@VALGRIND@"
 set grep "@GREP@"
 set gas "@AS@"
 set llvmdsymutil "@DSYMUTIL@"
+set emitir "@LLVMCC_EMITIR_FLAG@"
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 7ed10e9729de..2f37911d2518 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,6 +1,14 @@
-# NOTE: The tools are organized into five groups of four consisting of one
-# large and three small executables. This is done to minimize memory load
-# in parallel builds.  Please retain this ordering.
+# NOTE: The tools are organized into groups of four consisting of one large and
+# three small executables. This is done to minimize memory load in parallel
+# builds.  Please retain this ordering.
+
+# If polly exists and is not disabled compile it and add it to the LLVM tools.
+option(LLVM_BUILD_POLLY "Compile polly" ON)
+if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
+  if (LLVM_BUILD_POLLY)
+    add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/polly)
+  endif (LLVM_BUILD_POLLY)
+endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
 
 if( NOT WIN32 OR MSYS OR CYGWIN )
   # It is useful to build llvm-config before the other tools, so we
@@ -28,6 +36,8 @@ add_subdirectory(lli)
 
 add_subdirectory(llvm-extract)
 add_subdirectory(llvm-diff)
+add_subdirectory(macho-dump)
+add_subdirectory(llvm-objdump)
 
 add_subdirectory(bugpoint)
 add_subdirectory(bugpoint-passes)
diff --git a/tools/Makefile b/tools/Makefile
index aa07a2b1b77f..731024763b3a 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -12,6 +12,10 @@ LEVEL := ..
 # Build clang if present.
 OPTIONAL_PARALLEL_DIRS := clang
 
+# Build LLDB if present. Note LLDB must be built last as it depends on the
+# wider LLVM infrastructure (including Clang). 
+OPTIONAL_DIRS := lldb
+
 # NOTE: The tools are organized into five groups of four consisting of one
 # large and three small executables. This is done to minimize memory load
 # in parallel builds.  Please retain this ordering.
@@ -21,12 +25,13 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llvm-ld llvm-prof llvm-link \
                  lli llvm-extract llvm-mc \
                  bugpoint llvm-bcanalyzer llvm-stub \
-                 llvmc llvm-diff
+                 llvmc llvm-diff macho-dump llvm-objdump
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
   OPTIONAL_PARALLEL_DIRS :=
-  PARALLEL_DIRS := $(ONLY_TOOLS)
+  OPTIONAL_DIRS := $(findstring lldb,$(ONLY_TOOLS))
+  PARALLEL_DIRS := $(filter-out lldb,$(ONLY_TOOLS))
 endif
 
 include $(LEVEL)/Makefile.config
@@ -34,26 +39,37 @@ include $(LEVEL)/Makefile.config
 
 # These libraries build as dynamic libraries (.dylib /.so), they can only be
 # built if ENABLE_PIC is set.
+ifndef ONLY_TOOLS
 ifeq ($(ENABLE_PIC),1)
-  # No support for dynamic libraries on windows targets.
-  ifneq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
-    # gold only builds if binutils is around.  It requires "lto" to build before
-    # it so it is added to DIRS.
-    ifdef BINUTILS_INCDIR
-      DIRS += lto gold
-    else
-      PARALLEL_DIRS += lto
-    endif
+  # gold only builds if binutils is around.  It requires "lto" to build before
+  # it so it is added to DIRS.
+  ifdef BINUTILS_INCDIR
+    DIRS += lto gold
+  else
+    PARALLEL_DIRS += lto
+  endif
 
-    PARALLEL_DIRS += bugpoint-passes
+  PARALLEL_DIRS += bugpoint-passes
 
-    # The edis library is only supported if ARM and/or X86 are enabled, and if
-    # LLVM is being built PIC on platforms that support dylibs.
-    ifneq ($(DISABLE_EDIS),1)
+  # The edis library is only supported if ARM and/or X86 are enabled, and if
+  # LLVM is being built PIC on platforms that support dylibs.
+  ifneq ($(DISABLE_EDIS),1)
     ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),)
       PARALLEL_DIRS += edis
     endif
-    endif
+  endif
+endif
+
+ifdef LLVM_HAS_POLLY
+  PARALLEL_DIRS += polly
+endif
+endif
+
+# On Win32, loadable modules can be built with ENABLE_SHARED.
+ifneq ($(ENABLE_SHARED),1)
+  ifneq (,$(filter $(HOST_OS), Cygwin MingW))
+    PARALLEL_DIRS := $(filter-out bugpoint-passes, \
+                        $(PARALLEL_DIRS))
   endif
 endif
 
diff --git a/tools/bugpoint-passes/CMakeLists.txt b/tools/bugpoint-passes/CMakeLists.txt
index 50109a52c249..b2f1bb5d1350 100644
--- a/tools/bugpoint-passes/CMakeLists.txt
+++ b/tools/bugpoint-passes/CMakeLists.txt
@@ -1,3 +1,5 @@
 add_llvm_loadable_module( BugpointPasses
   TestPasses.cpp
   )
+
+add_dependencies(BugpointPasses bugpoint)
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 6966671f9cb2..1cbf6328b363 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -23,7 +23,7 @@
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index e48806aee6bc..cc78489e3d90 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -17,6 +17,7 @@
 #define BUGDRIVER_H
 
 #include "llvm/ADT/ValueMap.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include <vector>
 #include <string>
 
@@ -322,7 +323,7 @@ void DeleteFunctionBody(Function *F);
 /// module, split the functions OUT of the specified module, and place them in
 /// the new module.
 Module *SplitFunctionsOutOfModule(Module *M, const std::vector<Function*> &F,
-                                  ValueMap<const Value*, Value*> &VMap);
+                                  ValueToValueMapTy &VMap);
 
 } // End llvm namespace
 
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index 57dc1c830c10..f19ef6222f56 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -130,7 +130,7 @@ bool
 ReduceCrashingGlobalVariables::TestGlobalVariables(
                               std::vector<GlobalVariable*> &GVs) {
   // Clone the program to try hacking it apart...
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
@@ -204,7 +204,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
     return false;
 
   // Clone the program to try hacking it apart...
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
@@ -271,7 +271,7 @@ namespace {
 
 bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
   // Clone the program to try hacking it apart...
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
@@ -381,7 +381,7 @@ namespace {
 bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
                                            &Insts) {
   // Clone the program to try hacking it apart...
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap);
 
   // Convert list to set for fast lookup...
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 731248493247..f1601cdb8457 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -28,7 +28,8 @@ namespace {
   // for miscompilation.
   //
   enum OutputType {
-    AutoPick, RunLLI, RunJIT, RunLLC, RunLLCIA, RunCBE, CBE_bug, LLC_Safe,Custom
+    AutoPick, RunLLI, RunJIT, RunLLC, RunLLCIA, RunCBE, CBE_bug, LLC_Safe,
+    CompileCustom, Custom
   };
 
   cl::opt<double>
@@ -50,6 +51,9 @@ namespace {
                             clEnumValN(RunCBE, "run-cbe", "Compile with CBE"),
                             clEnumValN(CBE_bug,"cbe-bug", "Find CBE bugs"),
                             clEnumValN(LLC_Safe, "llc-safe", "Use LLC for all"),
+                            clEnumValN(CompileCustom, "compile-custom",
+                            "Use -compile-command to define a command to "
+                            "compile the bitcode. Useful to avoid linking."),
                             clEnumValN(Custom, "run-custom",
                             "Use -exec-command to define a command to execute "
                             "the bitcode. Useful for cross-compilation."),
@@ -87,9 +91,14 @@ namespace {
                          "into executing programs"));
 
   cl::list<std::string>
-  AdditionalLinkerArgs("Xlinker", 
+  AdditionalLinkerArgs("Xlinker",
       cl::desc("Additional arguments to pass to the linker"));
 
+  cl::opt<std::string>
+  CustomCompileCommand("compile-command", cl::init("llc"),
+      cl::desc("Command to compile the bitcode (use with -compile-custom) "
+               "(default: llc)"));
+
   cl::opt<std::string>
   CustomExecCommand("exec-command", cl::init("simulate"),
       cl::desc("Command to execute the bitcode (use with -run-custom) "
@@ -119,7 +128,7 @@ namespace {
                cl::ZeroOrMore, cl::PositionalEatsArgs);
 
   cl::opt<std::string>
-  GCCBinary("gcc", cl::init("gcc"), 
+  GCCBinary("gcc", cl::init("gcc"),
               cl::desc("The gcc binary to use. (default 'gcc')"));
 
   cl::list<std::string>
@@ -157,7 +166,7 @@ bool BugDriver::initializeExecutionEnvironment() {
     if (!Interpreter) {
       InterpreterSel = RunLLC;
       Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                   GCCBinary, &ToolArgv, 
+                                                   GCCBinary, &ToolArgv,
                                                    &GCCToolArgv);
     }
     if (!Interpreter) {
@@ -178,7 +187,7 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunLLCIA:
   case LLC_Safe:
     Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                 GCCBinary, &ToolArgv, 
+                                                 GCCBinary, &ToolArgv,
                                                  &GCCToolArgv,
                                                  InterpreterSel == RunLLCIA);
     break;
@@ -189,11 +198,16 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunCBE:
   case CBE_bug:
     Interpreter = AbstractInterpreter::createCBE(getToolName(), Message,
-                                                 GCCBinary, &ToolArgv, 
+                                                 GCCBinary, &ToolArgv,
                                                  &GCCToolArgv);
     break;
+  case CompileCustom:
+    Interpreter =
+      AbstractInterpreter::createCustomCompiler(Message, CustomCompileCommand);
+    break;
   case Custom:
-    Interpreter = AbstractInterpreter::createCustom(Message, CustomExecCommand);
+    Interpreter =
+      AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
     break;
   default:
     Message = "Sorry, this back-end is not supported by bugpoint right now!\n";
@@ -216,7 +230,7 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       GCCBinary, 
+                                                       GCCBinary,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -227,7 +241,7 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       GCCBinary, 
+                                                       GCCBinary,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -249,7 +263,7 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       GCCBinary, 
+                                                       GCCBinary,
                                                        &SafeToolArgs,
                                                        &GCCToolArgv);
     }
@@ -272,8 +286,8 @@ bool BugDriver::initializeExecutionEnvironment() {
                                                      &GCCToolArgv);
     break;
   case Custom:
-    SafeInterpreter = AbstractInterpreter::createCustom(Message,
-                                                        CustomExecCommand);
+    SafeInterpreter =
+      AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
     break;
   default:
     Message = "Sorry, this back-end is not supported by bugpoint as the "
@@ -281,7 +295,7 @@ bool BugDriver::initializeExecutionEnvironment() {
     break;
   }
   if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
-  
+
   gcc = GCC::create(Message, GCCBinary, &GCCToolArgv);
   if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); }
 
@@ -298,7 +312,7 @@ void BugDriver::compileProgram(Module *M, std::string *Error) const {
   sys::Path BitcodeFile (OutputPrefix + "-test-program.bc");
   std::string ErrMsg;
   if (BitcodeFile.makeUnique(true, &ErrMsg)) {
-    errs() << ToolName << ": Error making unique filename: " << ErrMsg 
+    errs() << ToolName << ": Error making unique filename: " << ErrMsg
            << "\n";
     exit(1);
   }
@@ -432,7 +446,7 @@ std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
 }
 
 /// createReferenceFile - calls compileProgram and then records the output
-/// into ReferenceOutputFile. Returns true if reference file created, false 
+/// into ReferenceOutputFile. Returns true if reference file created, false
 /// otherwise. Note: initializeExecutionEnvironment should be called BEFORE
 /// this function.
 ///
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 524f130ba751..593765cb70fc 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -29,9 +29,9 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
 #include <set>
 using namespace llvm;
 
@@ -193,7 +193,7 @@ static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
 /// static ctors/dtors, we need to add an llvm.global_[cd]tors global to M2, and
 /// prune appropriate entries out of M1s list.
 static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
-                                ValueMap<const Value*, Value*> &VMap) {
+                                ValueToValueMapTy &VMap) {
   GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
   if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
       !GV->use_empty()) return;
@@ -256,7 +256,7 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
 Module *
 llvm::SplitFunctionsOutOfModule(Module *M,
                                 const std::vector<Function*> &F,
-                                ValueMap<const Value*, Value*> &VMap) {
+                                ValueToValueMapTy &VMap) {
   // Make sure functions & globals are all external so that linkage
   // between the two modules will work.
   for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
@@ -268,7 +268,7 @@ llvm::SplitFunctionsOutOfModule(Module *M,
     I->setLinkage(GlobalValue::ExternalLinkage);
   }
 
-  ValueMap<const Value*, Value*> NewVMap;
+  ValueToValueMapTy NewVMap;
   Module *New = CloneModule(M, NewVMap);
 
   // Make sure global initializers exist only in the safe module (CBE->.so)
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 3f2b6968718b..3a5f143ace64 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -261,7 +261,7 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
   //   a function, we want to continue with the original function. Otherwise
   //   we can conclude that a function triggers the bug when in fact one
   //   needs a larger set of original functions to do so.
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *Clone = CloneModule(BD.getProgram(), VMap);
   Module *Orig = BD.swapProgramIn(Clone);
 
@@ -310,7 +310,7 @@ static bool ExtractLoops(BugDriver &BD,
   while (1) {
     if (BugpointIsInterrupted) return MadeChange;
     
-    ValueMap<const Value*, Value*> VMap;
+    ValueToValueMapTy VMap;
     Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
     Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
                                                    MiscompiledFunctions,
@@ -476,7 +476,7 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
   outs() << '\n';
 
   // Split the module into the two halves of the program we want.
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *Clone = CloneModule(BD.getProgram(), VMap);
   Module *Orig = BD.swapProgramIn(Clone);
   std::vector<Function*> FuncsOnClone;
@@ -551,7 +551,7 @@ static bool ExtractBlocks(BugDriver &BD,
       return false;
   }
 
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *ProgClone = CloneModule(BD.getProgram(), VMap);
   Module *ToExtract = SplitFunctionsOutOfModule(ProgClone,
                                                 MiscompiledFunctions,
@@ -738,7 +738,7 @@ void BugDriver::debugMiscompilation(std::string *Error) {
 
   // Output a bunch of bitcode files for the user...
   outs() << "Outputting reduced bitcode files which expose the problem:\n";
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *ToNotOptimize = CloneModule(getProgram(), VMap);
   Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
                                                  MiscompiledFunctions,
@@ -1011,7 +1011,7 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
     return true;
 
   // Split the module into the two halves of the program we want.
-  ValueMap<const Value*, Value*> VMap;
+  ValueToValueMapTy VMap;
   Module *ToNotCodeGen = CloneModule(getProgram(), VMap);
   Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, VMap);
 
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 3600ca6a81e3..2471cc1866ca 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -15,10 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Note: as a short term hack, the old Unix-specific code and platform-
-// independent code co-exist via conditional compilation until it is verified
-// that the new code works correctly on Unix.
-
 #include "BugDriver.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
@@ -29,9 +25,9 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
 
 #define DONT_GET_PLUGIN_LOADER_OPTION
 #include "llvm/Support/PluginLoader.h"
@@ -130,12 +126,12 @@ bool BugDriver::runPasses(Module *Program,
            << ErrMsg << "\n";
     return(1);
   }
-  
+
   std::string ErrInfo;
   tool_output_file InFile(inputFilename.c_str(), ErrInfo,
                           raw_fd_ostream::F_Binary);
-  
-  
+
+
   if (!ErrInfo.empty()) {
     errs() << "Error opening bitcode file: " << inputFilename.str() << "\n";
     return 1;
@@ -147,11 +143,19 @@ bool BugDriver::runPasses(Module *Program,
     InFile.os().clear_error();
     return 1;
   }
+
+  sys::Path tool = PrependMainExecutablePath("opt", getToolName(),
+                                             (void*)"opt");
+  if (tool.empty()) {
+    errs() << "Cannot find `opt' in executable directory!\n";
+    return 1;
+  }
+
+  // Ok, everything that could go wrong before running opt is done.
   InFile.keep();
 
   // setup the child process' arguments
   SmallVector<const char*, 8> Args;
-  sys::Path tool = FindExecutable("opt", getToolName(), (void*)"opt");
   std::string Opt = tool.str();
   if (UseValgrind) {
     Args.push_back("valgrind");
@@ -192,7 +196,7 @@ bool BugDriver::runPasses(Module *Program,
     prog = sys::Program::FindProgramByName("valgrind");
   else
     prog = tool;
-  
+
   // Redirect stdout and stderr to nowhere if SilencePasses is given
   sys::Path Nowhere;
   const sys::Path *Redirects[3] = {0, &Nowhere, &Nowhere};
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 36dbe144c1c5..37cc9028e073 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -13,7 +13,7 @@
 
 #define DEBUG_TYPE "toolrunner"
 #include "ToolRunner.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
@@ -59,7 +59,8 @@ static int RunProgramWithTimeout(const sys::Path &ProgramPath,
                                  const sys::Path &StdOutFile,
                                  const sys::Path &StdErrFile,
                                  unsigned NumSeconds = 0,
-                                 unsigned MemoryLimit = 0) {
+                                 unsigned MemoryLimit = 0,
+                                 std::string *ErrMsg = 0) {
   const sys::Path* redirects[3];
   redirects[0] = &StdInFile;
   redirects[1] = &StdOutFile;
@@ -76,7 +77,7 @@ static int RunProgramWithTimeout(const sys::Path &ProgramPath,
 
   return
     sys::Program::ExecuteAndWait(ProgramPath, Args, 0, redirects,
-                                 NumSeconds, MemoryLimit);
+                                 NumSeconds, MemoryLimit, ErrMsg);
 }
 
 /// RunProgramRemotelyWithTimeout - This function runs the given program
@@ -141,7 +142,7 @@ static std::string ProcessFailure(sys::Path ProgPath, const char** Args,
   for (const char **Arg = Args; *Arg; ++Arg)
     OS << " " << *Arg;
   OS << "\n";
-  
+
   // Rerun the compiler, capturing any error messages to print them.
   sys::Path ErrorFilename("bugpoint.program_error_messages");
   std::string ErrMsg;
@@ -206,7 +207,8 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
   LLIArgs.push_back(LLIPath.c_str());
   LLIArgs.push_back("-force-interpreter=true");
 
-  for (std::vector<std::string>::const_iterator i = SharedLibs.begin(), e = SharedLibs.end(); i != e; ++i) {
+  for (std::vector<std::string>::const_iterator i = SharedLibs.begin(),
+         e = SharedLibs.end(); i != e; ++i) {
     LLIArgs.push_back("-load");
     LLIArgs.push_back((*i).c_str());
   }
@@ -229,7 +231,7 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
         );
   return RunProgramWithTimeout(sys::Path(LLIPath), &LLIArgs[0],
       sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
-      Timeout, MemoryLimit);
+      Timeout, MemoryLimit, Error);
 }
 
 // LLI create method - Try to find the LLI executable
@@ -237,21 +239,82 @@ AbstractInterpreter *AbstractInterpreter::createLLI(const char *Argv0,
                                                     std::string &Message,
                                      const std::vector<std::string> *ToolArgs) {
   std::string LLIPath =
-    FindExecutable("lli", Argv0, (void *)(intptr_t)&createLLI).str();
+    PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createLLI).str();
   if (!LLIPath.empty()) {
     Message = "Found lli: " + LLIPath + "\n";
     return new LLI(LLIPath, ToolArgs);
   }
 
-  Message = "Cannot find `lli' in executable directory or PATH!\n";
+  Message = "Cannot find `lli' in executable directory!\n";
   return 0;
 }
 
+//===---------------------------------------------------------------------===//
+// Custom compiler command implementation of AbstractIntepreter interface
+//
+// Allows using a custom command for compiling the bitcode, thus allows, for
+// example, to compile a bitcode fragment without linking or executing, then
+// using a custom wrapper script to check for compiler errors.
+namespace {
+  class CustomCompiler : public AbstractInterpreter {
+    std::string CompilerCommand;
+    std::vector<std::string> CompilerArgs;
+  public:
+    CustomCompiler(
+      const std::string &CompilerCmd, std::vector<std::string> CompArgs) :
+      CompilerCommand(CompilerCmd), CompilerArgs(CompArgs) {}
+
+    virtual void compileProgram(const std::string &Bitcode,
+                                std::string *Error,
+                                unsigned Timeout = 0,
+                                unsigned MemoryLimit = 0);
+
+    virtual int ExecuteProgram(const std::string &Bitcode,
+                               const std::vector<std::string> &Args,
+                               const std::string &InputFile,
+                               const std::string &OutputFile,
+                               std::string *Error,
+                               const std::vector<std::string> &GCCArgs =
+                               std::vector<std::string>(),
+                               const std::vector<std::string> &SharedLibs =
+                               std::vector<std::string>(),
+                               unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0) {
+      *Error = "Execution not supported with -compile-custom";
+      return -1;
+    }
+  };
+}
+
+void CustomCompiler::compileProgram(const std::string &Bitcode,
+                                    std::string *Error,
+                                    unsigned Timeout,
+                                    unsigned MemoryLimit) {
+
+  std::vector<const char*> ProgramArgs;
+  ProgramArgs.push_back(CompilerCommand.c_str());
+
+  for (std::size_t i = 0; i < CompilerArgs.size(); ++i)
+    ProgramArgs.push_back(CompilerArgs.at(i).c_str());
+  ProgramArgs.push_back(Bitcode.c_str());
+  ProgramArgs.push_back(0);
+
+  // Add optional parameters to the running program from Argv
+  for (unsigned i = 0, e = CompilerArgs.size(); i != e; ++i)
+    ProgramArgs.push_back(CompilerArgs[i].c_str());
+
+  if (RunProgramWithTimeout( sys::Path(CompilerCommand), &ProgramArgs[0],
+                             sys::Path(), sys::Path(), sys::Path(),
+                             Timeout, MemoryLimit, Error))
+    *Error = ProcessFailure(sys::Path(CompilerCommand), &ProgramArgs[0],
+                           Timeout, MemoryLimit);
+}
+
 //===---------------------------------------------------------------------===//
 // Custom execution command implementation of AbstractIntepreter interface
 //
 // Allows using a custom command for executing the bitcode, thus allows,
-// for example, to invoke a cross compiler for code generation followed by 
+// for example, to invoke a cross compiler for code generation followed by
 // a simulator that executes the generated binary.
 namespace {
   class CustomExecutor : public AbstractInterpreter {
@@ -299,55 +362,78 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
 
   return RunProgramWithTimeout(
     sys::Path(ExecutionCommand),
-    &ProgramArgs[0], sys::Path(InputFile), sys::Path(OutputFile), 
-    sys::Path(OutputFile), Timeout, MemoryLimit);
+    &ProgramArgs[0], sys::Path(InputFile), sys::Path(OutputFile),
+    sys::Path(OutputFile), Timeout, MemoryLimit, Error);
 }
 
-// Custom execution environment create method, takes the execution command
-// as arguments
-AbstractInterpreter *AbstractInterpreter::createCustom(
-                    std::string &Message,
-                    const std::string &ExecCommandLine) {
+// Tokenize the CommandLine to the command and the args to allow
+// defining a full command line as the command instead of just the
+// executed program. We cannot just pass the whole string after the command
+// as a single argument because then program sees only a single
+// command line argument (with spaces in it: "foo bar" instead
+// of "foo" and "bar").
+//
+// code borrowed from:
+// http://oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html
+static void lexCommand(std::string &Message, const std::string &CommandLine,
+                       std::string &CmdPath, std::vector<std::string> Args) {
 
   std::string Command = "";
-  std::vector<std::string> Args;
   std::string delimiters = " ";
 
-  // Tokenize the ExecCommandLine to the command and the args to allow
-  // defining a full command line as the command instead of just the
-  // executed program. We cannot just pass the whole string after the command
-  // as a single argument because then program sees only a single
-  // command line argument (with spaces in it: "foo bar" instead 
-  // of "foo" and "bar").
-
-  // code borrowed from: 
-  // http://oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html
-  std::string::size_type lastPos = 
-    ExecCommandLine.find_first_not_of(delimiters, 0);
-  std::string::size_type pos = 
-    ExecCommandLine.find_first_of(delimiters, lastPos);
+  std::string::size_type lastPos = CommandLine.find_first_not_of(delimiters, 0);
+  std::string::size_type pos = CommandLine.find_first_of(delimiters, lastPos);
 
   while (std::string::npos != pos || std::string::npos != lastPos) {
-    std::string token = ExecCommandLine.substr(lastPos, pos - lastPos);
+    std::string token = CommandLine.substr(lastPos, pos - lastPos);
     if (Command == "")
        Command = token;
     else
        Args.push_back(token);
     // Skip delimiters.  Note the "not_of"
-    lastPos = ExecCommandLine.find_first_not_of(delimiters, pos);
+    lastPos = CommandLine.find_first_not_of(delimiters, pos);
     // Find next "non-delimiter"
-    pos = ExecCommandLine.find_first_of(delimiters, lastPos);
+    pos = CommandLine.find_first_of(delimiters, lastPos);
   }
 
-  std::string CmdPath = sys::Program::FindProgramByName(Command).str();
+  CmdPath = sys::Program::FindProgramByName(Command).str();
   if (CmdPath.empty()) {
-    Message = 
-      std::string("Cannot find '") + Command + 
-      "' in executable directory or PATH!\n";
-    return 0;
+    Message =
+      std::string("Cannot find '") + Command +
+      "' in PATH!\n";
+    return;
   }
 
   Message = "Found command in: " + CmdPath + "\n";
+}
+
+// Custom execution environment create method, takes the execution command
+// as arguments
+AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
+                    std::string &Message,
+                    const std::string &CompileCommandLine) {
+
+  std::string CmdPath;
+  std::vector<std::string> Args;
+  lexCommand(Message, CompileCommandLine, CmdPath, Args);
+  if (CmdPath.empty())
+    return 0;
+
+  return new CustomCompiler(CmdPath, Args);
+}
+
+// Custom execution environment create method, takes the execution command
+// as arguments
+AbstractInterpreter *AbstractInterpreter::createCustomExecutor(
+                    std::string &Message,
+                    const std::string &ExecCommandLine) {
+
+
+  std::string CmdPath;
+  std::vector<std::string> Args;
+  lexCommand(Message, ExecCommandLine, CmdPath, Args);
+  if (CmdPath.empty())
+    return 0;
 
   return new CustomExecutor(CmdPath, Args);
 }
@@ -355,7 +441,7 @@ AbstractInterpreter *AbstractInterpreter::createCustom(
 //===----------------------------------------------------------------------===//
 // LLC Implementation of AbstractIntepreter interface
 //
-GCC::FileType LLC::OutputCode(const std::string &Bitcode, 
+GCC::FileType LLC::OutputCode(const std::string &Bitcode,
                               sys::Path &OutputAsmFile, std::string &Error,
                               unsigned Timeout, unsigned MemoryLimit) {
   const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
@@ -376,10 +462,10 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
   LLCArgs.push_back("-o");
   LLCArgs.push_back(OutputAsmFile.c_str()); // Output to the Asm file
   LLCArgs.push_back(Bitcode.c_str());      // This is the input bitcode
-  
+
   if (UseIntegratedAssembler)
     LLCArgs.push_back("-filetype=obj");
-  
+
   LLCArgs.push_back (0);
 
   outs() << (UseIntegratedAssembler ? "<llc-ia>" : "<llc>");
@@ -394,7 +480,7 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
                             Timeout, MemoryLimit))
     Error = ProcessFailure(sys::Path(LLCPath), &LLCArgs[0],
                            Timeout, MemoryLimit);
-  return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;  
+  return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;
 }
 
 void LLC::compileProgram(const std::string &Bitcode, std::string *Error,
@@ -437,9 +523,9 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0,
                                     const std::vector<std::string> *GCCArgs,
                                     bool UseIntegratedAssembler) {
   std::string LLCPath =
-    FindExecutable("llc", Argv0, (void *)(intptr_t)&createLLC).str();
+    PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createLLC).str();
   if (LLCPath.empty()) {
-    Message = "Cannot find `llc' in executable directory or PATH!\n";
+    Message = "Cannot find `llc' in executable directory!\n";
     return 0;
   }
 
@@ -474,7 +560,7 @@ namespace {
                                const std::vector<std::string> &GCCArgs =
                                  std::vector<std::string>(),
                                const std::vector<std::string> &SharedLibs =
-                                 std::vector<std::string>(), 
+                                 std::vector<std::string>(),
                                unsigned Timeout = 0,
                                unsigned MemoryLimit = 0);
   };
@@ -517,7 +603,7 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
   DEBUG(errs() << "\nSending output to " << OutputFile << "\n");
   return RunProgramWithTimeout(sys::Path(LLIPath), &JITArgs[0],
       sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
-      Timeout, MemoryLimit);
+      Timeout, MemoryLimit, Error);
 }
 
 /// createJIT - Try to find the LLI executable
@@ -525,13 +611,13 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
 AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
                    std::string &Message, const std::vector<std::string> *Args) {
   std::string LLIPath =
-    FindExecutable("lli", Argv0, (void *)(intptr_t)&createJIT).str();
+    PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createJIT).str();
   if (!LLIPath.empty()) {
     Message = "Found lli: " + LLIPath + "\n";
     return new JIT(LLIPath, Args);
   }
 
-  Message = "Cannot find `lli' in executable directory or PATH!\n";
+  Message = "Cannot find `lli' in executable directory!\n";
   return 0;
 }
 
@@ -603,14 +689,14 @@ int CBE::ExecuteProgram(const std::string &Bitcode,
 ///
 CBE *AbstractInterpreter::createCBE(const char *Argv0,
                                     std::string &Message,
-                                    const std::string &GCCBinary, 
+                                    const std::string &GCCBinary,
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *GCCArgs) {
   sys::Path LLCPath =
-    FindExecutable("llc", Argv0, (void *)(intptr_t)&createCBE);
+    PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createCBE);
   if (LLCPath.isEmpty()) {
     Message =
-      "Cannot find `llc' in executable directory or PATH!\n";
+      "Cannot find `llc' in executable directory!\n";
     return 0;
   }
 
@@ -677,9 +763,9 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
         GCCArgs.push_back("-force_cpusubtype_ALL");
     }
   }
-  
+
   GCCArgs.push_back(ProgramFile.c_str());  // Specify the input filename.
-  
+
   GCCArgs.push_back("-x");
   GCCArgs.push_back("none");
   GCCArgs.push_back("-o");
@@ -771,7 +857,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
     DEBUG(errs() << "<run locally>");
     return RunProgramWithTimeout(OutputBinary, &ProgramArgs[0],
         sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
-        Timeout, MemoryLimit);
+        Timeout, MemoryLimit, Error);
   } else {
     outs() << "<run remotely>"; outs().flush();
     return RunProgramRemotelyWithTimeout(sys::Path(RemoteClientPath),
@@ -793,7 +879,7 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   OutputFile = uniqueFilename.str();
 
   std::vector<const char*> GCCArgs;
-  
+
   GCCArgs.push_back(GCCPath.c_str());
 
   if (TargetTriple.getArch() == Triple::x86)
@@ -816,7 +902,7 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
     GCCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
   else if (TargetTriple.getOS() == Triple::Darwin) {
     // link all source files into a single module in data segment, rather than
-    // generating blocks. dynamic_lookup requires that you set 
+    // generating blocks. dynamic_lookup requires that you set
     // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
     // bugpoint to just pass that in the environment of GCC.
     GCCArgs.push_back("-single_module");
@@ -837,8 +923,8 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   GCCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
   GCCArgs.push_back("-O2");              // Optimize the program a bit.
 
-  
-  
+
+
   // Add any arguments intended for GCC. We locate them here because this is
   // most likely -L and -l options that need to come before other libraries but
   // after the source. Other options won't be sensitive to placement on the
@@ -847,7 +933,7 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
     GCCArgs.push_back(ArgsForGCC[i].c_str());
   GCCArgs.push_back(0);                    // NULL terminator
 
-  
+
 
   outs() << "<gcc>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
@@ -870,7 +956,7 @@ GCC *GCC::create(std::string &Message,
                  const std::vector<std::string> *Args) {
   sys::Path GCCPath = sys::Program::FindProgramByName(GCCBinary);
   if (GCCPath.isEmpty()) {
-    Message = "Cannot find `"+ GCCBinary +"' in executable directory or PATH!\n";
+    Message = "Cannot find `"+ GCCBinary +"' in PATH!\n";
     return 0;
   }
 
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index cda0ddfa71d9..cfa8acf6b240 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -21,7 +21,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include <exception>
 #include <vector>
 
@@ -66,7 +66,7 @@ public:
                      const std::string &OutputFile,
                      std::string *Error = 0,
                      const std::vector<std::string> &GCCArgs =
-                         std::vector<std::string>(), 
+                         std::vector<std::string>(),
                      unsigned Timeout = 0,
                      unsigned MemoryLimit = 0);
 
@@ -103,8 +103,13 @@ public:
   static AbstractInterpreter* createJIT(const char *Argv0, std::string &Message,
                                         const std::vector<std::string> *Args=0);
 
-  static AbstractInterpreter* createCustom(std::string &Message,
-                                           const std::string &ExecCommandLine);
+  static AbstractInterpreter*
+  createCustomCompiler(std::string &Message,
+                       const std::string &CompileCommandLine);
+
+  static AbstractInterpreter*
+  createCustomExecutor(std::string &Message,
+                       const std::string &ExecCommandLine);
 
 
   virtual ~AbstractInterpreter() {}
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 79cf563ec6fd..f9c9e18099ef 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -23,10 +23,14 @@
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/StandardPasses.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Valgrind.h"
 #include "llvm/LinkAllVMCore.h"
+
+//Enable this macro to debug bugpoint itself.
+//#define DEBUG_BUGPOINT 1
+
 using namespace llvm;
 
 static cl::opt<bool> 
@@ -71,9 +75,11 @@ OverrideTriple("mtriple", cl::desc("Override target triple for module"));
 /// BugpointIsInterrupted - Set to true when the user presses ctrl-c.
 bool llvm::BugpointIsInterrupted = false;
 
+#ifndef DEBUG_BUGPOINT
 static void BugpointInterruptFunction() {
   BugpointIsInterrupted = true;
 }
+#endif
 
 // Hack to capture a pass list.
 namespace {
@@ -91,14 +97,31 @@ namespace {
 }
 
 int main(int argc, char **argv) {
+#ifndef DEBUG_BUGPOINT
   llvm::sys::PrintStackTraceOnErrorSignal();
   llvm::PrettyStackTraceProgram X(argc, argv);
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+#endif
+  
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeIPA(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeInstrumentation(Registry);
+  initializeTarget(Registry);
+  
   cl::ParseCommandLineOptions(argc, argv,
                               "LLVM automatic testcase reducer. See\nhttp://"
                               "llvm.org/cmds/bugpoint.html"
                               " for more information.\n");
+#ifndef DEBUG_BUGPOINT
   sys::SetInterruptFunction(BugpointInterruptFunction);
+#endif
 
   LLVMContext& Context = getGlobalContext();
   // If we have an override, set it and then track the triple we want Modules
@@ -147,7 +170,9 @@ int main(int argc, char **argv) {
 
   // Bugpoint has the ability of generating a plethora of core files, so to
   // avoid filling up the disk, we prevent it
+#ifndef DEBUG_BUGPOINT
   sys::Process::PreventCoreFiles();
+#endif
 
   std::string Error;
   bool Failure = D.run(Error);
diff --git a/tools/edis/CMakeLists.txt b/tools/edis/CMakeLists.txt
index 2019995dcb79..5037f9f6f8b5 100644
--- a/tools/edis/CMakeLists.txt
+++ b/tools/edis/CMakeLists.txt
@@ -1,5 +1,3 @@
-set(LLVM_NO_RTTI 1)
-
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
 add_llvm_library(EnhancedDisassembly
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
index 92484bf0ac4d..b5557fcb915d 100644
--- a/tools/edis/Makefile
+++ b/tools/edis/Makefile
@@ -1,4 +1,4 @@
-##===- tools/ed/Makefile -----------------------------------*- Makefile -*-===##
+##===- tools/edis/Makefile -----------------------------------*- Makefile -*-===##
 # 
 #                     The LLVM Compiler Infrastructure
 #
@@ -26,7 +26,7 @@ ifneq ($(filter $(TARGETS_TO_BUILD), X86),)
 LINK_COMPONENTS += x86asmprinter x86disassembler
 endif
 
-# If the X86 target is enabled, link in the asmprinter and disassembler.
+# If the ARM target is enabled, link in the asmprinter and disassembler.
 ifneq ($(filter $(TARGETS_TO_BUILD), ARM),)
 LINK_COMPONENTS += armasmprinter armdisassembler
 endif
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index 162734681070..66a0271fa257 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -19,10 +19,9 @@ include $(LEVEL)/Makefile.config
 
 LINK_LIBS_IN_SHARED=1
 SHARED_LIBRARY = 1
-BUILD_ARCHIVE = 0
 LOADABLE_MODULE = 1
 
-LINK_COMPONENTS := support system
+LINK_COMPONENTS := support
 LIBS += -llto
 
 # Because off_t is used in the public API, the largefile parts are required for
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 4b58fae96de4..ad2774a64ef4 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -17,10 +17,10 @@
 
 #include "llvm-c/lto.h"
 
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Errno.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
 
 #include <cerrno>
 #include <cstdlib>
@@ -29,6 +29,13 @@
 #include <list>
 #include <vector>
 
+// Support Windows/MinGW crazyness.
+#ifdef _WIN32
+# include <io.h>
+# define lseek _lseek
+# define read _read
+#endif
+
 using namespace llvm;
 
 namespace {
@@ -49,7 +56,6 @@ namespace {
   int gold_version = 0;
 
   struct claimed_file {
-    lto_module_t M;
     void *handle;
     std::vector<ld_plugin_symbol> syms;
   };
@@ -58,6 +64,7 @@ namespace {
   std::string output_name = "";
   std::list<claimed_file> Modules;
   std::vector<sys::Path> Cleanup;
+  lto_code_gen_t code_gen;
 }
 
 namespace options {
@@ -65,6 +72,7 @@ namespace options {
   static bool generate_api_file = false;
   static generate_bc generate_bc_file = BC_NO;
   static std::string bc_path;
+  static std::string obj_path;
   static std::string as_path;
   static std::vector<std::string> as_args;
   static std::vector<std::string> pass_through;
@@ -105,6 +113,8 @@ namespace options {
       pass_through.push_back(item.str());
     } else if (opt.startswith("mtriple=")) {
       triple = opt.substr(strlen("mtriple="));
+    } else if (opt.startswith("obj-path=")) {
+      obj_path = opt.substr(strlen("obj-path="));
     } else if (opt == "emit-llvm") {
       generate_bc_file = BC_ONLY;
     } else if (opt == "also-emit-llvm") {
@@ -226,6 +236,8 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     return LDPS_ERR;
   }
 
+  code_gen = lto_codegen_create();
+
   return LDPS_OK;
 }
 
@@ -234,7 +246,8 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
 /// with add_symbol if possible.
 static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                         int *claimed) {
-  void *buf = NULL;
+  lto_module_t M;
+
   if (file->offset) {
     // Gold has found what might be IR part-way inside of a file, such as
     // an .a archive.
@@ -245,7 +258,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                  file->offset, sys::StrError(errno).c_str());
       return LDPS_ERR;
     }
-    buf = malloc(file->filesize);
+    void *buf = malloc(file->filesize);
     if (!buf) {
       (*message)(LDPL_ERROR,
                  "Failed to allocate buffer for archive member of size: %d\n",
@@ -265,37 +278,50 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
       free(buf);
       return LDPS_OK;
     }
-  } else if (!lto_module_is_object_file(file->name))
-    return LDPS_OK;
+    M = lto_module_create_from_memory(buf, file->filesize);
+    if (!M) {
+      (*message)(LDPL_ERROR, "Failed to create LLVM module: %s",
+                 lto_get_error_message());
+      return LDPS_ERR;
+    }
+    free(buf);
+  } else {
+    // FIXME: We should not need to pass -1 as the file size, but there
+    // is a bug in BFD that causes it to pass 0 to us. Remove this once
+    // that is fixed.
+    off_t size = file->filesize ? file->filesize : -1;
+
+    // FIXME: We should not need to reset the position in the file, but there
+    // is a bug in BFD. Remove this once that is fixed.
+    off_t old_pos = lseek(file->fd, 0, SEEK_CUR);
+
+    lseek(file->fd, 0, SEEK_SET);
+    M = lto_module_create_from_fd(file->fd, file->name, size);
+
+    lseek(file->fd, old_pos, SEEK_SET);
+    if (!M)
+      return LDPS_OK;
+  }
 
   *claimed = 1;
   Modules.resize(Modules.size() + 1);
   claimed_file &cf = Modules.back();
 
-  cf.M = buf ? lto_module_create_from_memory(buf, file->filesize) :
-               lto_module_create(file->name);
-  free(buf);
-  if (!cf.M) {
-    (*message)(LDPL_ERROR, "Failed to create LLVM module: %s",
-               lto_get_error_message());
-    return LDPS_ERR;
-  }
-
   if (!options::triple.empty())
-    lto_module_set_target_triple(cf.M, options::triple.c_str());
+    lto_module_set_target_triple(M, options::triple.c_str());
 
   cf.handle = file->handle;
-  unsigned sym_count = lto_module_get_num_symbols(cf.M);
+  unsigned sym_count = lto_module_get_num_symbols(M);
   cf.syms.reserve(sym_count);
 
   for (unsigned i = 0; i != sym_count; ++i) {
-    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(cf.M, i);
+    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(M, i);
     if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
       continue;
 
     cf.syms.push_back(ld_plugin_symbol());
     ld_plugin_symbol &sym = cf.syms.back();
-    sym.name = const_cast<char *>(lto_module_get_symbol_name(cf.M, i));
+    sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i));
     sym.version = NULL;
 
     int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
@@ -316,6 +342,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     }
 
     int definition = attrs & LTO_SYMBOL_DEFINITION_MASK;
+    sym.comdat_key = NULL;
     switch (definition) {
       case LTO_SYMBOL_DEFINITION_REGULAR:
         sym.def = LDPK_DEF;
@@ -327,6 +354,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
         sym.def = LDPK_COMMON;
         break;
       case LTO_SYMBOL_DEFINITION_WEAK:
+        sym.comdat_key = sym.name;
         sym.def = LDPK_WEAKDEF;
         break;
       case LTO_SYMBOL_DEFINITION_WEAKUNDEF:
@@ -337,9 +365,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
         return LDPS_ERR;
     }
 
-    // LLVM never emits COMDAT.
     sym.size = 0;
-    sym.comdat_key = NULL;
 
     sym.resolution = LDPR_UNKNOWN;
   }
@@ -353,6 +379,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     }
   }
 
+  lto_codegen_add_module(code_gen, M);
   return LDPS_OK;
 }
 
@@ -361,12 +388,6 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
 /// been overridden by a native object file. Then, perform optimization and
 /// codegen.
 static ld_plugin_status all_symbols_read_hook(void) {
-  lto_code_gen_t cg = lto_codegen_create();
-
-  for (std::list<claimed_file>::iterator I = Modules.begin(),
-       E = Modules.end(); I != E; ++I)
-    lto_codegen_add_module(cg, I->M);
-
   std::ofstream api_file;
   if (options::generate_api_file) {
     api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
@@ -384,7 +405,7 @@ static ld_plugin_status all_symbols_read_hook(void) {
     (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
     for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
       if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
-        lto_codegen_add_must_preserve_symbol(cg, I->syms[i].name);
+        lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
         anySymbolsPreserved = true;
 
         if (options::generate_api_file)
@@ -398,15 +419,15 @@ static ld_plugin_status all_symbols_read_hook(void) {
 
   if (!anySymbolsPreserved) {
     // All of the IL is unnecessary!
-    lto_codegen_dispose(cg);
+    lto_codegen_dispose(code_gen);
     return LDPS_OK;
   }
 
-  lto_codegen_set_pic_model(cg, output_type);
-  lto_codegen_set_debug_model(cg, LTO_DEBUG_MODEL_DWARF);
+  lto_codegen_set_pic_model(code_gen, output_type);
+  lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF);
   if (!options::as_path.empty()) {
     sys::Path p = sys::Program::FindProgramByName(options::as_path);
-    lto_codegen_set_assembler_path(cg, p.c_str());
+    lto_codegen_set_assembler_path(code_gen, p.c_str());
   }
   if (!options::as_args.empty()) {
     std::vector<const char *> as_args_p;
@@ -414,20 +435,19 @@ static ld_plugin_status all_symbols_read_hook(void) {
            E = options::as_args.end(); I != E; ++I) {
       as_args_p.push_back(I->c_str());
     }
-    lto_codegen_set_assembler_args(cg, &as_args_p[0], as_args_p.size());
+    lto_codegen_set_assembler_args(code_gen, &as_args_p[0], as_args_p.size());
   }
   if (!options::mcpu.empty())
-    lto_codegen_set_cpu(cg, options::mcpu.c_str());
+    lto_codegen_set_cpu(code_gen, options::mcpu.c_str());
 
   // Pass through extra options to the code generator.
   if (!options::extra.empty()) {
     for (std::vector<std::string>::iterator it = options::extra.begin();
          it != options::extra.end(); ++it) {
-      lto_codegen_debug_options(cg, (*it).c_str());
+      lto_codegen_debug_options(code_gen, (*it).c_str());
     }
   }
 
-
   if (options::generate_bc_file != options::BC_NO) {
     std::string path;
     if (options::generate_bc_file == options::BC_ONLY)
@@ -436,45 +456,51 @@ static ld_plugin_status all_symbols_read_hook(void) {
       path = options::bc_path;
     else
       path = output_name + ".bc";
-    bool err = lto_codegen_write_merged_modules(cg, path.c_str());
+    bool err = lto_codegen_write_merged_modules(code_gen, path.c_str());
     if (err)
       (*message)(LDPL_FATAL, "Failed to write the output file.");
     if (options::generate_bc_file == options::BC_ONLY)
       exit(0);
   }
   size_t bufsize = 0;
-  const char *buffer = static_cast<const char *>(lto_codegen_compile(cg,
+  const char *buffer = static_cast<const char *>(lto_codegen_compile(code_gen,
                                                                      &bufsize));
 
   std::string ErrMsg;
 
-  sys::Path uniqueObjPath("/tmp/llvmgold.o");
-  if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) {
-    (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
-    return LDPS_ERR;
-  }
-  tool_output_file objFile(uniqueObjPath.c_str(), ErrMsg,
-                           raw_fd_ostream::F_Binary);
-  if (!ErrMsg.empty()) {
-    (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
-    return LDPS_ERR;
+  const char *objPath;
+  if (!options::obj_path.empty()) {
+    objPath = options::obj_path.c_str();
+  } else {
+    sys::Path uniqueObjPath("/tmp/llvmgold.o");
+    if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) {
+      (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
+      return LDPS_ERR;
+    }
+    objPath = uniqueObjPath.c_str();
   }
+  tool_output_file objFile(objPath, ErrMsg,
+                             raw_fd_ostream::F_Binary);
+    if (!ErrMsg.empty()) {
+      (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
+      return LDPS_ERR;
+    }
 
   objFile.os().write(buffer, bufsize);
   objFile.os().close();
   if (objFile.os().has_error()) {
     (*message)(LDPL_ERROR, "Error writing output file '%s'",
-               uniqueObjPath.c_str());
+               objPath);
     objFile.os().clear_error();
     return LDPS_ERR;
   }
   objFile.keep();
 
-  lto_codegen_dispose(cg);
+  lto_codegen_dispose(code_gen);
 
-  if ((*add_input_file)(uniqueObjPath.c_str()) != LDPS_OK) {
+  if ((*add_input_file)(objPath) != LDPS_OK) {
     (*message)(LDPL_ERROR, "Unable to add .o file to the link.");
-    (*message)(LDPL_ERROR, "File left behind in: %s", uniqueObjPath.c_str());
+    (*message)(LDPL_ERROR, "File left behind in: %s", objPath);
     return LDPS_ERR;
   }
 
@@ -502,7 +528,8 @@ static ld_plugin_status all_symbols_read_hook(void) {
     }
   }
 
-  Cleanup.push_back(uniqueObjPath);
+  if (options::obj_path.empty())
+    Cleanup.push_back(sys::Path(objPath));
 
   return LDPS_OK;
 }
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 8bcc2d8d27e9..bb426a9de8c2 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -28,8 +28,9 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/System/Host.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -95,6 +96,8 @@ FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
 cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
                        cl::desc("Do not verify input module"));
 
+cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
+                            cl::desc("Do not use .loc entries"));
 
 static cl::opt<bool>
 DisableRedZone("disable-red-zone",
@@ -273,6 +276,21 @@ int main(int argc, char **argv) {
   assert(target.get() && "Could not allocate target machine!");
   TargetMachine &Target = *target.get();
 
+  if (DisableDotLoc)
+    Target.setMCUseLoc(false);
+  if (TheTriple.getOS() == Triple::Darwin) {
+    switch (TheTriple.getDarwinMajorNumber()) {
+    case 7:
+    case 8:
+    case 9:
+      // disable .loc support for older darwin OS.
+      Target.setMCUseLoc(false);
+      break;
+    default:
+      break;
+    }
+  }
+
   // Figure out where we are going to send the output...
   OwningPtr<tool_output_file> Out
     (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]));
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index ce70d46e4a5e..9378ef255466 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen bitreader selectiondag)
+set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
 
 add_llvm_tool(lli
   lli.cpp
diff --git a/tools/lli/Makefile b/tools/lli/Makefile
index 8f6eeed486bb..80aa82b4d681 100644
--- a/tools/lli/Makefile
+++ b/tools/lli/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL    := ../..
 TOOLNAME := lli
-LINK_COMPONENTS := jit interpreter nativecodegen bitreader selectiondag
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag
 
 # Enable JIT support
 include $(LEVEL)/Makefile.common
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 4c377805e6a3..a756459ecc23 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -23,16 +23,26 @@
 #include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Target/TargetSelect.h"
 #include <cerrno>
+
+#ifdef __CYGWIN__
+#include <cygwin/version.h>
+#if defined(CYGWIN_VERSION_DLL_MAJOR) && CYGWIN_VERSION_DLL_MAJOR<1007
+#define DO_NOTHING_ATEXIT 1
+#endif
+#endif
+
 using namespace llvm;
 
 namespace {
@@ -46,6 +56,10 @@ namespace {
                                  cl::desc("Force interpretation: disable JIT"),
                                  cl::init(false));
 
+  cl::opt<bool> UseMCJIT(
+    "use-mcjit", cl::desc("Enable use of the MC-based JIT (if available)"),
+    cl::init(false));
+
   // Determine optimization level.
   cl::opt<char>
   OptLevel("O",
@@ -99,8 +113,11 @@ namespace {
 static ExecutionEngine *EE = 0;
 
 static void do_shutdown() {
+  // Cygwin-1.5 invokes DLL's dtors before atexit handler.
+#ifndef DO_NOTHING_ATEXIT
   delete EE;
   llvm_shutdown();
+#endif
 }
 
 //===----------------------------------------------------------------------===//
@@ -125,20 +142,15 @@ int main(int argc, char **argv, char * const *envp) {
     sys::Process::PreventCoreFiles();
   
   // Load the bitcode...
-  std::string ErrorMsg;
-  Module *Mod = NULL;
-  if (MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFile,&ErrorMsg)){
-    Mod = getLazyBitcodeModule(Buffer, Context, &ErrorMsg);
-    if (!Mod) delete Buffer;
-  }
-  
+  SMDiagnostic Err;
+  Module *Mod = ParseIRFile(InputFile, Err, Context);
   if (!Mod) {
-    errs() << argv[0] << ": error loading program '" << InputFile << "': "
-           << ErrorMsg << "\n";
-    exit(1);
+    Err.Print(argv[0], errs());
+    return 1;
   }
 
   // If not jitting lazily, load the whole bitcode file eagerly too.
+  std::string ErrorMsg;
   if (NoLazyCompilation) {
     if (Mod->MaterializeAllPermanently(&ErrorMsg)) {
       errs() << argv[0] << ": bitcode didn't read correctly.\n";
@@ -160,6 +172,10 @@ int main(int argc, char **argv, char * const *envp) {
   if (!TargetTriple.empty())
     Mod->setTargetTriple(Triple::normalize(TargetTriple));
 
+  // Enable MCJIT, if desired.
+  if (UseMCJIT)
+    builder.setUseMCJIT(true);
+
   CodeGenOpt::Level OLvl = CodeGenOpt::Default;
   switch (OptLevel) {
   default:
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 021a3691e850..c1c8b2474e79 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -16,13 +16,13 @@
 #include "llvm/Module.h"
 #include "llvm/Bitcode/Archive.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
-#include <iostream>
+#include "llvm/Support/Signals.h"
 #include <algorithm>
-#include <iomanip>
 #include <memory>
 #include <fstream>
 using namespace llvm;
@@ -274,7 +274,7 @@ ArchiveOperation parseCommandLine() {
 // finds with all the files in that directory (recursively). It uses the
 // sys::Path::getDirectoryContent method to perform the actual directory scans.
 bool
-recurseDirectories(const sys::Path& path, 
+recurseDirectories(const sys::Path& path,
                    std::set<sys::Path>& result, std::string* ErrMsg) {
   result.clear();
   if (RecurseDirectories) {
@@ -311,7 +311,8 @@ bool buildPaths(bool checkExistence, std::string* ErrMsg) {
     if (!aPath.set(Members[i]))
       throw std::string("File member name invalid: ") + Members[i];
     if (checkExistence) {
-      if (!aPath.exists())
+      bool Exists;
+      if (sys::fs::exists(aPath.str(), Exists) || !Exists)
         throw std::string("File does not exist: ") + Members[i];
       std::string Err;
       sys::PathWithStatus PwS(aPath);
@@ -335,12 +336,12 @@ bool buildPaths(bool checkExistence, std::string* ErrMsg) {
 
 // printSymbolTable - print out the archive's symbol table.
 void printSymbolTable() {
-  std::cout << "\nArchive Symbol Table:\n";
+  outs() << "\nArchive Symbol Table:\n";
   const Archive::SymTabType& symtab = TheArchive->getSymbolTable();
   for (Archive::SymTabType::const_iterator I=symtab.begin(), E=symtab.end();
        I != E; ++I ) {
     unsigned offset = TheArchive->getFirstFileOffset() + I->second;
-    std::cout << " " << std::setw(9) << offset << "\t" << I->first <<"\n";
+    outs() << " " << format("%9u", offset) << "\t" << I->first <<"\n";
   }
 }
 
@@ -365,10 +366,10 @@ bool doPrint(std::string* ErrMsg) {
           continue;
 
         if (Verbose)
-          std::cout << "Printing " << I->getPath().str() << "\n";
+          outs() << "Printing " << I->getPath().str() << "\n";
 
         unsigned len = I->getSize();
-        std::cout.write(data, len);
+        outs().write(data, len);
       } else {
         countDown--;
       }
@@ -379,27 +380,27 @@ bool doPrint(std::string* ErrMsg) {
 
 // putMode - utility function for printing out the file mode when the 't'
 // operation is in verbose mode.
-void 
+void
 printMode(unsigned mode) {
   if (mode & 004)
-    std::cout << "r";
+    outs() << "r";
   else
-    std::cout << "-";
+    outs() << "-";
   if (mode & 002)
-    std::cout << "w";
+    outs() << "w";
   else
-    std::cout << "-";
+    outs() << "-";
   if (mode & 001)
-    std::cout << "x";
+    outs() << "x";
   else
-    std::cout << "-";
+    outs() << "-";
 }
 
 // doDisplayTable - Implement the 't' operation. This function prints out just
 // the file names of each of the members. However, if verbose mode is requested
 // ('v' modifier) then the file type, permission mode, user, group, size, and
 // modification time are also printed.
-bool 
+bool
 doDisplayTable(std::string* ErrMsg) {
   if (buildPaths(false, ErrMsg))
     return true;
@@ -411,22 +412,22 @@ doDisplayTable(std::string* ErrMsg) {
         // FIXME: Output should be this format:
         // Zrw-r--r--  500/ 500    525 Nov  8 17:42 2004 Makefile
         if (I->isBitcode())
-          std::cout << "b";
+          outs() << "b";
         else if (I->isCompressed())
-          std::cout << "Z";
+          outs() << "Z";
         else
-          std::cout << " ";
+          outs() << " ";
         unsigned mode = I->getMode();
         printMode((mode >> 6) & 007);
         printMode((mode >> 3) & 007);
         printMode(mode & 007);
-        std::cout << " " << std::setw(4) << I->getUser();
-        std::cout << "/" << std::setw(4) << I->getGroup();
-        std::cout << " " << std::setw(8) << I->getSize();
-        std::cout << " " << std::setw(20) << I->getModTime().str().substr(4);
-        std::cout << " " << I->getPath().str() << "\n";
+        outs() << " " << format("%4u", I->getUser());
+        outs() << "/" << format("%4u", I->getGroup());
+        outs() << " " << format("%8u", I->getSize());
+        outs() << " " << format("%20s", I->getModTime().str().substr(4).c_str());
+        outs() << " " << I->getPath().str() << "\n";
       } else {
-        std::cout << I->getPath().str() << "\n";
+        outs() << I->getPath().str() << "\n";
       }
     }
   }
@@ -437,7 +438,7 @@ doDisplayTable(std::string* ErrMsg) {
 
 // doExtract - Implement the 'x' operation. This function extracts files back to
 // the file system, making sure to uncompress any that were compressed
-bool 
+bool
 doExtract(std::string* ErrMsg) {
   if (buildPaths(false, ErrMsg))
     return true;
@@ -450,7 +451,7 @@ doExtract(std::string* ErrMsg) {
       if (I->hasPath()) {
         sys::Path dirs(I->getPath());
         dirs.eraseComponent();
-        if (dirs.createDirectoryOnDisk(/*create_parents=*/true, ErrMsg)) 
+        if (dirs.createDirectoryOnDisk(/*create_parents=*/true, ErrMsg))
           return true;
       }
 
@@ -480,11 +481,11 @@ doExtract(std::string* ErrMsg) {
 // members from the archive. Note that if the count is specified, there should
 // be no more than one path in the Paths list or else this algorithm breaks.
 // That check is enforced in parseCommandLine (above).
-bool 
+bool
 doDelete(std::string* ErrMsg) {
   if (buildPaths(false, ErrMsg))
     return true;
-  if (Paths.empty()) 
+  if (Paths.empty())
     return false;
   unsigned countDown = Count;
   for (Archive::iterator I = TheArchive->begin(), E = TheArchive->end();
@@ -513,9 +514,9 @@ doDelete(std::string* ErrMsg) {
 // order of the archive members so that when the archive is written the move
 // of the members is accomplished. Note the use of the RelPos variable to
 // determine where the items should be moved to.
-bool 
+bool
 doMove(std::string* ErrMsg) {
-  if (buildPaths(false, ErrMsg)) 
+  if (buildPaths(false, ErrMsg))
     return true;
 
   // By default and convention the place to move members to is the end of the
@@ -566,12 +567,12 @@ doMove(std::string* ErrMsg) {
 
 // doQuickAppend - Implements the 'q' operation. This function just
 // indiscriminantly adds the members to the archive and rebuilds it.
-bool 
+bool
 doQuickAppend(std::string* ErrMsg) {
   // Get the list of paths to append.
   if (buildPaths(true, ErrMsg))
     return true;
-  if (Paths.empty()) 
+  if (Paths.empty())
     return false;
 
   // Append them quickly.
@@ -591,13 +592,13 @@ doQuickAppend(std::string* ErrMsg) {
 
 // doReplaceOrInsert - Implements the 'r' operation. This function will replace
 // any existing files or insert new ones into the archive.
-bool 
+bool
 doReplaceOrInsert(std::string* ErrMsg) {
 
   // Build the list of files to be added/replaced.
   if (buildPaths(true, ErrMsg))
     return true;
-  if (Paths.empty()) 
+  if (Paths.empty())
     return false;
 
   // Keep track of the paths that remain to be inserted.
@@ -637,7 +638,7 @@ doReplaceOrInsert(std::string* ErrMsg) {
 
     if (found != remaining.end()) {
       std::string Err;
-      sys::PathWithStatus PwS(*found); 
+      sys::PathWithStatus PwS(*found);
       const sys::FileStatus *si = PwS.getFileStatus(false, &Err);
       if (!si)
         return true;
@@ -716,7 +717,8 @@ int main(int argc, char **argv) {
       throw std::string("Archive name invalid: ") + ArchiveName;
 
     // Create or open the archive object.
-    if (!ArchivePath.exists()) {
+    bool Exists;
+    if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) {
       // Produce a warning if we should and we're creating the archive
       if (!Create)
         errs() << argv[0] << ": creating " << ArchivePath.str() << "\n";
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 1eaa4b3bea44..c1661cdcb196 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -25,8 +25,8 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 9c0d675793d3..980f278d4a31 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -27,6 +27,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
@@ -37,7 +38,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include <cstdio>
 #include <map>
 #include <algorithm>
@@ -57,15 +59,22 @@ static cl::opt<bool> NoHistogram("disable-histogram",
 
 static cl::opt<bool>
 NonSymbolic("non-symbolic",
-            cl::desc("Emit numberic info in dump even if"
+            cl::desc("Emit numeric info in dump even if"
                      " symbolic info is available"));
 
-/// CurStreamType - If we can sniff the flavor of this stream, we can produce
-/// better dump info.
-static enum {
+namespace {
+
+/// CurStreamTypeType - A type for CurStreamType
+enum CurStreamTypeType {
   UnknownBitstream,
   LLVMIRBitstream
-} CurStreamType;
+};
+
+}
+
+/// CurStreamType - If we can sniff the flavor of this stream, we can produce
+/// better dump info.
+static CurStreamTypeType CurStreamType;
 
 
 /// GetBlockName - Return a symbolic block name if known, otherwise return
@@ -254,6 +263,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     switch(CodeID) {
     default:return 0;
     case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
+    case bitc::METADATA_ATTACHMENT2: return "METADATA_ATTACHMENT2";
     }
   case bitc::METADATA_BLOCK_ID:
     switch(CodeID) {
@@ -268,7 +278,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::METADATA_NODE2:       return "METADATA_NODE2";
     case bitc::METADATA_FN_NODE2:    return "METADATA_FN_NODE2";
     case bitc::METADATA_NAMED_NODE2: return "METADATA_NAMED_NODE2";
-    case bitc::METADATA_ATTACHMENT2: return "METADATA_ATTACHMENT2";
     }
   }
 }
@@ -473,10 +482,11 @@ static void PrintSize(uint64_t Bits) {
 /// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
 static int AnalyzeBitcode() {
   // Read the input file.
-  MemoryBuffer *MemBuf = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str());
+  OwningPtr<MemoryBuffer> MemBuf;
 
-  if (MemBuf == 0)
-    return Error("Error reading '" + InputFilename + "'.");
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf))
+    return Error("Error reading '" + InputFilename + "': " + ec.message());
 
   if (MemBuf->getBufferSize() & 3)
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index 663cae5ed2d4..d33ff0dad843 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -70,6 +70,8 @@ if( NOT NM_PATH )
   message(FATAL_ERROR "`nm' not found")
 endif()
 
+get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
+
 add_custom_command(OUTPUT ${LIBDEPS_TMP}
   COMMAND ${PERL_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/GenLibDeps.pl -flat ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR} ${NM_PATH} > ${LIBDEPS_TMP}
   DEPENDS ${llvm_libs}
@@ -80,8 +82,11 @@ add_custom_command(OUTPUT ${LIBDEPS}
   DEPENDS ${LIBDEPS_TMP}
   COMMENT "Updating ${LIBDEPS} if necessary...")
 
+# This must stop the build if find-cycles.pl returns error:
 add_custom_command(OUTPUT ${FINAL_LIBDEPS}
-  COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/find-cycles.pl < ${LIBDEPS} > ${FINAL_LIBDEPS} || ${CMAKE_COMMAND} -E remove -f ${FINAL_LIBDEPS}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${FINAL_LIBDEPS} ${FINAL_LIBDEPS}.tmp
+  COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/find-cycles.pl < ${LIBDEPS} > ${FINAL_LIBDEPS}.tmp
+  COMMAND ${CMAKE_COMMAND} -E copy ${FINAL_LIBDEPS}.tmp ${FINAL_LIBDEPS}
   DEPENDS ${LIBDEPS}
   COMMENT "Checking for cyclic dependencies between LLVM libraries.")
 
@@ -89,6 +94,17 @@ set(C_FLGS "${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(CXX_FLGS "${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(CPP_FLGS "${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 
+# We don't want certain flags on the output of
+# llvm-config --cflags --cxxflags
+macro(remove_option_from_llvm_config option)
+  llvm_replace_compiler_option(C_FLGS "${option}" "")
+  llvm_replace_compiler_option(CXX_FLGS "${option}" "")
+  llvm_replace_compiler_option(CPP_FLGS "${option}" "")
+endmacro(remove_option_from_llvm_config)
+remove_option_from_llvm_config("-pedantic")
+remove_option_from_llvm_config("-Wall")
+remove_option_from_llvm_config("-W")
+
 add_custom_command(OUTPUT ${LLVM_CONFIG}
   COMMAND echo 's!@LLVM_CPPFLAGS@!${CPP_FLGS}!' > temp.sed
   COMMAND echo 's!@LLVM_CFLAGS@!${C_FLGS}!' >> temp.sed
@@ -108,6 +124,7 @@ add_custom_command(OUTPUT ${LLVM_CONFIG}
 add_custom_target(llvm-config.target ALL
   DEPENDS ${LLVM_CONFIG})
 
+get_property(llvm_lib_targets GLOBAL PROPERTY LLVM_LIB_TARGETS)
 add_dependencies(llvm-config.target ${llvm_lib_targets})
 
 # Make sure that llvm-config builds before the llvm tools, so we have
diff --git a/tools/llvm-config/llvm-config.in.in b/tools/llvm-config/llvm-config.in.in
index d435d57adf37..840a10e23a1e 100644
--- a/tools/llvm-config/llvm-config.in.in
+++ b/tools/llvm-config/llvm-config.in.in
@@ -197,7 +197,7 @@ Options:
 Typical components:
   all                All LLVM libraries (default).
   backend            Either a native backend or the C backend.
-  engine             Either a native JIT or a bytecode interpreter.
+  engine             Either a native JIT or a bitcode interpreter.
 __EOD__
     exit(1);
 }
@@ -320,6 +320,9 @@ sub build_name_map {
             $NAME_MAP{$target} = [$target.'info',
                                   $target.'asmprinter', 
                                   $target.'codegen']
+        } elsif (defined $NAME_MAP{$target.'codegen'}) {
+          $NAME_MAP{$target} = [$target.'info',
+                                $target.'codegen']
         } else {
             $NAME_MAP{$target} = [$target.'info',
                                   $NAME_MAP{$target}[0]]
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index 16a990fb2812..b932ccc7437c 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -17,13 +17,12 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
-#include "llvm/Assembly/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
@@ -34,47 +33,30 @@
 
 using namespace llvm;
 
-/// Reads a module from a file.  If the filename ends in .ll, it is
-/// interpreted as an assembly file;  otherwise, it is interpreted as
-/// bitcode.  On error, messages are written to stderr and null is
-/// returned.
+/// Reads a module from a file.  On error, messages are written to stderr
+/// and null is returned.
 static Module *ReadModule(LLVMContext &Context, StringRef Name) {
-  // LLVM assembly path.
-  if (Name.endswith(".ll")) {
-    SMDiagnostic Diag;
-    Module *M = ParseAssemblyFile(Name, Diag, Context);
-    if (M) return M;
-
+  SMDiagnostic Diag;
+  Module *M = ParseIRFile(Name, Diag, Context);
+  if (!M)
     Diag.Print("llvmdiff", errs());
-    return 0;
-  }
-
-  // Bitcode path.
-  MemoryBuffer *Buffer = MemoryBuffer::getFile(Name);
-
-  // ParseBitcodeFile takes ownership of the buffer if it succeeds.
-  std::string Error;
-  Module *M = ParseBitcodeFile(Buffer, Context, &Error);
-  if (M) return M;
-
-  errs() << "error parsing " << Name << ": " << Error;
-  delete Buffer;
-  return 0;
+  return M;
 }
 
 namespace {
-struct DiffContext {
-  DiffContext(Value *L, Value *R)
-    : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
-  Value *L;
-  Value *R;
-  bool Differences;
-  bool IsFunction;
-  DenseMap<Value*,unsigned> LNumbering;
-  DenseMap<Value*,unsigned> RNumbering;
-};
+  struct DiffContext {
+    DiffContext(Value *L, Value *R)
+      : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
+    Value *L;
+    Value *R;
+    bool Differences;
+    bool IsFunction;
+    DenseMap<Value*,unsigned> LNumbering;
+    DenseMap<Value*,unsigned> RNumbering;
+  };
+}
 
-void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering) {
+static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){
   unsigned IN = 0;
 
   // Arguments get the first numbers.
@@ -98,6 +80,7 @@ void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering) {
   assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
 }
 
+namespace {
 class DiffConsumer : public DifferenceEngine::Consumer {
 private:
   raw_ostream &out;
@@ -273,7 +256,7 @@ public:
   }
   
 };
-}
+} // end anonymous namespace
 
 static void diffGlobal(DifferenceEngine &Engine, Module *L, Module *R,
                        StringRef Name) {
@@ -292,14 +275,14 @@ static void diffGlobal(DifferenceEngine &Engine, Module *L, Module *R,
     errs() << "No function named @" << Name << " in right module\n";
 }
 
-cl::opt<std::string> LeftFilename(cl::Positional,
-                                  cl::desc("<first file>"),
-                                  cl::Required);
-cl::opt<std::string> RightFilename(cl::Positional,
-                                   cl::desc("<second file>"),
-                                   cl::Required);
-cl::list<std::string> GlobalsToCompare(cl::Positional,
-                                       cl::desc("<globals to compare>"));
+static cl::opt<std::string> LeftFilename(cl::Positional,
+                                         cl::desc("<first file>"),
+                                         cl::Required);
+static cl::opt<std::string> RightFilename(cl::Positional,
+                                          cl::desc("<second file>"),
+                                          cl::Required);
+static cl::list<std::string> GlobalsToCompare(cl::Positional,
+                                              cl::desc("<globals to compare>"));
 
 int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv);
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 9d2d31da164d..b4977ced5bdd 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -26,8 +26,9 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -48,7 +49,7 @@ ShowAnnotations("show-annotations",
                 cl::desc("Add informational comments to the .ll file"));
 
 namespace {
-  
+
 class CommentWriter : public AssemblyAnnotationWriter {
 public:
   void emitFunctionAnnot(const Function *F,
@@ -58,32 +59,34 @@ public:
   }
   void printInfoComment(const Value &V, formatted_raw_ostream &OS) {
     if (V.getType()->isVoidTy()) return;
-      
+
     OS.PadToColumn(50);
     OS << "; [#uses=" << V.getNumUses() << ']';  // Output # uses
   }
 };
-  
+
 } // end anon namespace
 
 int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  
+
   LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-  
-  
+
+
   cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
 
   std::string ErrorMessage;
   std::auto_ptr<Module> M;
- 
-  if (MemoryBuffer *Buffer
-         = MemoryBuffer::getFileOrSTDIN(InputFilename, &ErrorMessage)) {
-    M.reset(ParseBitcodeFile(Buffer, Context, &ErrorMessage));
-    delete Buffer;
+
+  {
+    OwningPtr<MemoryBuffer> BufferPtr;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
+      ErrorMessage = ec.message();
+    else
+      M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
   }
 
   if (M.get() == 0) {
@@ -94,11 +97,11 @@ int main(int argc, char **argv) {
       errs() << "bitcode didn't read correctly.\n";
     return 1;
   }
-  
+
   // Just use stdout.  We won't actually print anything on it.
   if (DontPrint)
     OutputFilename = "-";
-  
+
   if (OutputFilename.empty()) { // Unspecified output, infer it.
     if (InputFilename == "-") {
       OutputFilename = "-";
@@ -114,7 +117,7 @@ int main(int argc, char **argv) {
   }
 
   std::string ErrorInfo;
-  OwningPtr<tool_output_file> 
+  OwningPtr<tool_output_file>
   Out(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
                            raw_fd_ostream::F_Binary));
   if (!ErrorInfo.empty()) {
@@ -125,7 +128,7 @@ int main(int argc, char **argv) {
   OwningPtr<AssemblyAnnotationWriter> Annotator;
   if (ShowAnnotations)
     Annotator.reset(new CommentWriter());
-  
+
   // All that llvm-dis does is write the assembly to a file.
   if (!DontPrint)
     M->print(Out->os(), Annotator.get());
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 91a59e5a56da..8c2f43a4f7d2 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -23,9 +23,10 @@
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <memory>
 using namespace llvm;
 
@@ -102,13 +103,39 @@ int main(int argc, char **argv) {
   }
 
   // Materialize requisite global values.
-  for (size_t i = 0, e = GVs.size(); i != e; ++i) {
-    GlobalValue *GV = GVs[i];
-    if (GV->isMaterializable()) {
-      std::string ErrInfo;
-      if (GV->Materialize(&ErrInfo)) {
-        errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
-        return 1;
+  if (!DeleteFn)
+    for (size_t i = 0, e = GVs.size(); i != e; ++i) {
+      GlobalValue *GV = GVs[i];
+      if (GV->isMaterializable()) {
+        std::string ErrInfo;
+        if (GV->Materialize(&ErrInfo)) {
+          errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+          return 1;
+        }
+      }
+    }
+  else {
+    // Deleting. Materialize every GV that's *not* in GVs.
+    SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end());
+    for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+         I != E; ++I) {
+      GlobalVariable *G = I;
+      if (!GVSet.count(G) && G->isMaterializable()) {
+        std::string ErrInfo;
+        if (G->Materialize(&ErrInfo)) {
+          errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+          return 1;
+        }
+      }
+    }
+    for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
+      Function *F = I;
+      if (!GVSet.count(F) && F->isMaterializable()) {
+        std::string ErrInfo;
+        if (F->Materialize(&ErrInfo)) {
+          errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+          return 1;
+        }
       }
     }
   }
diff --git a/tools/llvm-ld/CMakeLists.txt b/tools/llvm-ld/CMakeLists.txt
index 2ae4a1dba386..370bcb4abf52 100644
--- a/tools/llvm-ld/CMakeLists.txt
+++ b/tools/llvm-ld/CMakeLists.txt
@@ -4,3 +4,5 @@ add_llvm_tool(llvm-ld
   Optimize.cpp
   llvm-ld.cpp
   )
+
+add_dependencies(llvm-ld llvm-stub)
diff --git a/tools/llvm-ld/Optimize.cpp b/tools/llvm-ld/Optimize.cpp
index 3fb0079dfede..ef4502bab8d4 100644
--- a/tools/llvm-ld/Optimize.cpp
+++ b/tools/llvm-ld/Optimize.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/PassNameParser.h"
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index 3bbea9dc7287..cd6ce256db7b 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -23,7 +23,7 @@
 #include "llvm/LinkAllVMCore.h"
 #include "llvm/Linker.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Bitcode/ReaderWriter.h"
@@ -35,8 +35,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Config/config.h"
 #include <memory>
 #include <cstring>
@@ -98,7 +98,7 @@ static cl::list<std::string> PostLinkOpts("post-link-opts",
 static cl::list<std::string> XLinker("Xlinker", cl::value_desc("option"),
   cl::desc("Pass options to the system linker"));
 
-// Compatibility options that llvm-ld ignores but are supported for 
+// Compatibility options that llvm-ld ignores but are supported for
 // compatibility with LD
 static cl::opt<std::string> CO3("soname", cl::Hidden,
   cl::desc("Compatibility option: ignored"));
@@ -112,13 +112,13 @@ static cl::opt<bool> CO5("eh-frame-hdr", cl::Hidden,
 static  cl::opt<std::string> CO6("h", cl::Hidden,
   cl::desc("Compatibility option: ignored"));
 
-static cl::opt<bool> CO7("start-group", cl::Hidden, 
+static cl::opt<bool> CO7("start-group", cl::Hidden,
   cl::desc("Compatibility option: ignored"));
 
-static cl::opt<bool> CO8("end-group", cl::Hidden, 
+static cl::opt<bool> CO8("end-group", cl::Hidden,
   cl::desc("Compatibility option: ignored"));
 
-static cl::opt<std::string> CO9("m", cl::Hidden, 
+static cl::opt<std::string> CO9("m", cl::Hidden,
   cl::desc("Compatibility option: ignored"));
 
 /// This is just for convenience so it doesn't have to be passed around
@@ -142,7 +142,7 @@ static void PrintAndExit(const std::string &Message, Module *M, int errcode = 1)
 }
 
 static void PrintCommand(const std::vector<const char*> &args) {
-  std::vector<const char*>::const_iterator I = args.begin(), E = args.end(); 
+  std::vector<const char*>::const_iterator I = args.begin(), E = args.end();
   for (; I != E; ++I)
     if (*I)
       errs() << "'" << *I << "'" << " ";
@@ -178,7 +178,7 @@ static char ** CopyEnv(char ** const envp) {
 
   // Allocate a new environment list.
   char **newenv = new char* [entries];
-  if ((newenv = new char* [entries]) == NULL)
+  if (newenv == NULL)
     return NULL;
 
   // Make a copy of the list.  Don't forget the NULL that ends the list.
@@ -384,7 +384,7 @@ static int GenerateNative(const std::string &OutputFilename,
     args.push_back("-framework");
     args.push_back(Frameworks[index]);
   }
-      
+
   // Now that "args" owns all the std::strings for the arguments, call the c_str
   // method to get the underlying string array.  We do this game so that the
   // std::string array is guaranteed to outlive the const char* array.
@@ -410,13 +410,13 @@ static int GenerateNative(const std::string &OutputFilename,
 static void EmitShellScript(char **argv, Module *M) {
   if (Verbose)
     errs() << "Emitting Shell Script\n";
-#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_WIN32)
   // Windows doesn't support #!/bin/sh style shell scripts in .exe files.  To
   // support windows systems, we copy the llvm-stub.exe executable from the
   // build tree to the destination file.
-  std::string ErrMsg;  
-  sys::Path llvmstub = FindExecutable("llvm-stub.exe", argv[0],
-                                      (void *)(intptr_t)&Optimize);
+  std::string ErrMsg;
+  sys::Path llvmstub = PrependMainExecutablePath("llvm-stub", argv[0],
+                                                 (void *)(intptr_t)&Optimize);
   if (llvmstub.isEmpty())
     PrintAndExit("Could not find llvm-stub.exe executable!", M);
 
@@ -455,7 +455,7 @@ static void EmitShellScript(char **argv, Module *M) {
            E = LibPaths.end(); P != E; ++P) {
       FullLibraryPath = *P;
       FullLibraryPath.appendComponent("lib" + *i);
-      FullLibraryPath.appendSuffix(&(LTDL_SHLIB_EXT[1]));
+      FullLibraryPath.appendSuffix(sys::Path::GetDLLSuffix());
       if (!FullLibraryPath.isEmpty()) {
         if (!FullLibraryPath.isDynamicLibrary()) {
           // Not a native shared library; mark as invalid
@@ -513,9 +513,20 @@ int main(int argc, char **argv, char **envp) {
 
   LLVMContext &Context = getGlobalContext();
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-  
+
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeIPA(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeTarget(Registry);
+
   // Initial global variable above for convenience printing of program name.
-  progname = sys::Path(argv[0]).getBasename();
+  progname = sys::path::stem(argv[0]);
 
   // Parse the command line options
   cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
@@ -527,11 +538,8 @@ int main(int argc, char **argv, char **envp) {
       OutputFilename = "a.exe";
 
     // If there is no suffix add an "exe" one.
-    sys::Path ExeFile( OutputFilename );
-    if (ExeFile.getSuffix() == "") {
-      ExeFile.appendSuffix("exe");
-      OutputFilename = ExeFile.str();
-    }
+    if (sys::path::extension(OutputFilename).empty())
+      OutputFilename.append(".exe");
   }
 #endif
 
@@ -653,8 +661,8 @@ int main(int argc, char **argv, char **envp) {
       sys::RemoveFileOnSignal(AssemblyFile);
 
       // Determine the locations of the llc and gcc programs.
-      sys::Path llc = FindExecutable("llc", argv[0],
-                                     (void *)(intptr_t)&Optimize);
+      sys::Path llc = PrependMainExecutablePath("llc", argv[0],
+                                                (void *)(intptr_t)&Optimize);
       if (llc.isEmpty())
         PrintAndExit("Failed to find llc", Composite.get());
 
@@ -680,8 +688,8 @@ int main(int argc, char **argv, char **envp) {
       sys::RemoveFileOnSignal(CFile);
 
       // Determine the locations of the llc and gcc programs.
-      sys::Path llc = FindExecutable("llc", argv[0],
-                                     (void *)(intptr_t)&Optimize);
+      sys::Path llc = PrependMainExecutablePath("llc", argv[0],
+                                                (void *)(intptr_t)&Optimize);
       if (llc.isEmpty())
         PrintAndExit("Failed to find llc", Composite.get());
 
@@ -694,7 +702,7 @@ int main(int argc, char **argv, char **envp) {
       if (GenerateCFile(CFile.str(), BitcodeOutputFilename, llc, ErrMsg))
         PrintAndExit(ErrMsg, Composite.get());
 
-      if (GenerateNative(OutputFilename, CFile.str(), 
+      if (GenerateNative(OutputFilename, CFile.str(),
                          NativeLinkItems, gcc, envp, ErrMsg))
         PrintAndExit(ErrMsg, Composite.get());
     } else {
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index e55d0de0f9b5..3fb7ba42cd05 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -20,11 +20,11 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/IRReader.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Path.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 13080b481f14..c29d82a2cb38 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/raw_ostream.h"
@@ -43,7 +44,7 @@ public:
   uint64_t getExtent() const { return Bytes.size(); }
 
   int readByte(uint64_t Addr, uint8_t *Byte) const {
-    if (Addr > getExtent())
+    if (Addr >= getExtent())
       return -1;
     *Byte = Bytes[Addr].first;
     return 0;
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index aef0a3dffa45..2c22bedf1c2d 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -23,6 +23,10 @@
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/SubtargetFeature.h" // FIXME.
+#include "llvm/Target/TargetAsmInfo.h"  // FIXME.
+#include "llvm/Target/TargetLowering.h"  // FIXME.
+#include "llvm/Target/TargetLoweringObjectFile.h"  // FIXME.
 #include "llvm/Target/TargetMachine.h"  // FIXME.
 #include "llvm/Target/TargetSelect.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -33,9 +37,10 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include "Disassembler.h"
 using namespace llvm;
 
@@ -63,6 +68,9 @@ OutputAsmVariant("output-asm-variant",
 static cl::opt<bool>
 RelaxAll("mc-relax-all", cl::desc("Relax all fixups"));
 
+static cl::opt<bool>
+NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack"));
+
 static cl::opt<bool>
 EnableLogging("enable-api-logging", cl::desc("Enable MC API logging"));
 
@@ -95,6 +103,12 @@ static cl::opt<std::string>
 TripleName("triple", cl::desc("Target triple to assemble for, "
                               "see -version for available targets"));
 
+static cl::opt<std::string>
+MCPU("mcpu",
+     cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+     cl::value_desc("cpu-name"),
+     cl::init(""));
+
 static cl::opt<bool>
 NoInitialTextSection("n", cl::desc(
                    "Don't assume assembly file starts in the text section"));
@@ -157,17 +171,12 @@ static tool_output_file *GetOutputStream() {
 }
 
 static int AsLexInput(const char *ProgName) {
-  std::string ErrorMessage;
-  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
-                                                      &ErrorMessage);
-  if (Buffer == 0) {
-    errs() << ProgName << ": ";
-    if (ErrorMessage.size())
-      errs() << ErrorMessage << "\n";
-    else
-      errs() << "input file didn't read correctly.\n";
+  OwningPtr<MemoryBuffer> BufferPtr;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) {
+    errs() << ProgName << ": " << ec.message() << '\n';
     return 1;
   }
+  MemoryBuffer *Buffer = BufferPtr.take();
 
   SourceMgr SrcMgr;
   
@@ -194,7 +203,9 @@ static int AsLexInput(const char *ProgName) {
 
   bool Error = false;
   while (Lexer.Lex().isNot(AsmToken::Eof)) {
-    switch (Lexer.getKind()) {
+    AsmToken Tok = Lexer.getTok();
+
+    switch (Tok.getKind()) {
     default:
       SrcMgr.PrintMessage(Lexer.getLoc(), "unknown token", "warning");
       Error = true;
@@ -203,45 +214,63 @@ static int AsLexInput(const char *ProgName) {
       Error = true; // error already printed.
       break;
     case AsmToken::Identifier:
-      Out->os() << "identifier: " << Lexer.getTok().getString() << '\n';
-      break;
-    case AsmToken::String:
-      Out->os() << "string: " << Lexer.getTok().getString() << '\n';
+      Out->os() << "identifier: " << Lexer.getTok().getString();
       break;
     case AsmToken::Integer:
-      Out->os() << "int: " << Lexer.getTok().getString() << '\n';
+      Out->os() << "int: " << Lexer.getTok().getString();
+      break;
+    case AsmToken::Real:
+      Out->os() << "real: " << Lexer.getTok().getString();
+      break;
+    case AsmToken::Register:
+      Out->os() << "register: " << Lexer.getTok().getRegVal();
+      break;
+    case AsmToken::String:
+      Out->os() << "string: " << Lexer.getTok().getString();
       break;
 
-    case AsmToken::Amp:            Out->os() << "Amp\n"; break;
-    case AsmToken::AmpAmp:         Out->os() << "AmpAmp\n"; break;
-    case AsmToken::Caret:          Out->os() << "Caret\n"; break;
-    case AsmToken::Colon:          Out->os() << "Colon\n"; break;
-    case AsmToken::Comma:          Out->os() << "Comma\n"; break;
-    case AsmToken::Dollar:         Out->os() << "Dollar\n"; break;
-    case AsmToken::EndOfStatement: Out->os() << "EndOfStatement\n"; break;
-    case AsmToken::Eof:            Out->os() << "Eof\n"; break;
-    case AsmToken::Equal:          Out->os() << "Equal\n"; break;
-    case AsmToken::EqualEqual:     Out->os() << "EqualEqual\n"; break;
-    case AsmToken::Exclaim:        Out->os() << "Exclaim\n"; break;
-    case AsmToken::ExclaimEqual:   Out->os() << "ExclaimEqual\n"; break;
-    case AsmToken::Greater:        Out->os() << "Greater\n"; break;
-    case AsmToken::GreaterEqual:   Out->os() << "GreaterEqual\n"; break;
-    case AsmToken::GreaterGreater: Out->os() << "GreaterGreater\n"; break;
-    case AsmToken::LParen:         Out->os() << "LParen\n"; break;
-    case AsmToken::Less:           Out->os() << "Less\n"; break;
-    case AsmToken::LessEqual:      Out->os() << "LessEqual\n"; break;
-    case AsmToken::LessGreater:    Out->os() << "LessGreater\n"; break;
-    case AsmToken::LessLess:       Out->os() << "LessLess\n"; break;
-    case AsmToken::Minus:          Out->os() << "Minus\n"; break;
-    case AsmToken::Percent:        Out->os() << "Percent\n"; break;
-    case AsmToken::Pipe:           Out->os() << "Pipe\n"; break;
-    case AsmToken::PipePipe:       Out->os() << "PipePipe\n"; break;
-    case AsmToken::Plus:           Out->os() << "Plus\n"; break;
-    case AsmToken::RParen:         Out->os() << "RParen\n"; break;
-    case AsmToken::Slash:          Out->os() << "Slash\n"; break;
-    case AsmToken::Star:           Out->os() << "Star\n"; break;
-    case AsmToken::Tilde:          Out->os() << "Tilde\n"; break;
+    case AsmToken::Amp:            Out->os() << "Amp"; break;
+    case AsmToken::AmpAmp:         Out->os() << "AmpAmp"; break;
+    case AsmToken::At:             Out->os() << "At"; break;
+    case AsmToken::Caret:          Out->os() << "Caret"; break;
+    case AsmToken::Colon:          Out->os() << "Colon"; break;
+    case AsmToken::Comma:          Out->os() << "Comma"; break;
+    case AsmToken::Dollar:         Out->os() << "Dollar"; break;
+    case AsmToken::Dot:            Out->os() << "Dot"; break;
+    case AsmToken::EndOfStatement: Out->os() << "EndOfStatement"; break;
+    case AsmToken::Eof:            Out->os() << "Eof"; break;
+    case AsmToken::Equal:          Out->os() << "Equal"; break;
+    case AsmToken::EqualEqual:     Out->os() << "EqualEqual"; break;
+    case AsmToken::Exclaim:        Out->os() << "Exclaim"; break;
+    case AsmToken::ExclaimEqual:   Out->os() << "ExclaimEqual"; break;
+    case AsmToken::Greater:        Out->os() << "Greater"; break;
+    case AsmToken::GreaterEqual:   Out->os() << "GreaterEqual"; break;
+    case AsmToken::GreaterGreater: Out->os() << "GreaterGreater"; break;
+    case AsmToken::Hash:           Out->os() << "Hash"; break;
+    case AsmToken::LBrac:          Out->os() << "LBrac"; break;
+    case AsmToken::LCurly:         Out->os() << "LCurly"; break;
+    case AsmToken::LParen:         Out->os() << "LParen"; break;
+    case AsmToken::Less:           Out->os() << "Less"; break;
+    case AsmToken::LessEqual:      Out->os() << "LessEqual"; break;
+    case AsmToken::LessGreater:    Out->os() << "LessGreater"; break;
+    case AsmToken::LessLess:       Out->os() << "LessLess"; break;
+    case AsmToken::Minus:          Out->os() << "Minus"; break;
+    case AsmToken::Percent:        Out->os() << "Percent"; break;
+    case AsmToken::Pipe:           Out->os() << "Pipe"; break;
+    case AsmToken::PipePipe:       Out->os() << "PipePipe"; break;
+    case AsmToken::Plus:           Out->os() << "Plus"; break;
+    case AsmToken::RBrac:          Out->os() << "RBrac"; break;
+    case AsmToken::RCurly:         Out->os() << "RCurly"; break;
+    case AsmToken::RParen:         Out->os() << "RParen"; break;
+    case AsmToken::Slash:          Out->os() << "Slash"; break;
+    case AsmToken::Star:           Out->os() << "Star"; break;
+    case AsmToken::Tilde:          Out->os() << "Tilde"; break;
     }
+
+    // Print the token string.
+    Out->os() << " (\"";
+    Out->os().write_escaped(Tok.getString());
+    Out->os() << "\")\n";
   }
 
   // Keep output if no errors.
@@ -255,16 +284,12 @@ static int AssembleInput(const char *ProgName) {
   if (!TheTarget)
     return 1;
 
-  std::string Error;
-  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, &Error);
-  if (Buffer == 0) {
-    errs() << ProgName << ": ";
-    if (Error.size())
-      errs() << Error << "\n";
-    else
-      errs() << "input file didn't read correctly.\n";
+  OwningPtr<MemoryBuffer> BufferPtr;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) {
+    errs() << ProgName << ": " << ec.message() << '\n';
     return 1;
   }
+  MemoryBuffer *Buffer = BufferPtr.take();
   
   SourceMgr SrcMgr;
   
@@ -279,10 +304,20 @@ static int AssembleInput(const char *ProgName) {
   llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
   
-  MCContext Ctx(*MAI);
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (MCPU.size()) {
+    SubtargetFeatures Features;
+    Features.setCPU(MCPU);
+    FeaturesStr = Features.getString();
+  }
 
   // FIXME: We shouldn't need to do this (and link in codegen).
-  OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName, ""));
+  //        When we split this out, we should do it in a way that makes
+  //        it straightforward to switch subtargets on the fly (.e.g,
+  //        the .cpu and .code16 directives).
+  OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
+                                                             FeaturesStr));
 
   if (!TM) {
     errs() << ProgName << ": error: could not create target for triple '"
@@ -290,6 +325,9 @@ static int AssembleInput(const char *ProgName) {
     return 1;
   }
 
+  const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
+  MCContext Ctx(*MAI, tai);
+
   OwningPtr<tool_output_file> Out(GetOutputStream());
   if (!Out)
     return 1;
@@ -297,15 +335,23 @@ static int AssembleInput(const char *ProgName) {
   formatted_raw_ostream FOS(Out->os());
   OwningPtr<MCStreamer> Str;
 
+  const TargetLoweringObjectFile &TLOF =
+    TM->getTargetLowering()->getObjFileLowering();
+  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(Ctx, *TM);
+
+  // FIXME: There is a bit of code duplication with addPassesToEmitFile.
   if (FileType == OFT_AssemblyFile) {
     MCInstPrinter *IP =
       TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI);
     MCCodeEmitter *CE = 0;
-    if (ShowEncoding)
+    TargetAsmBackend *TAB = 0;
+    if (ShowEncoding) {
       CE = TheTarget->createCodeEmitter(*TM, Ctx);
-    Str.reset(createAsmStreamer(Ctx, FOS,
-                                TM->getTargetData()->isLittleEndian(),
-                                /*asmverbose*/true, IP, CE, ShowInst));
+      TAB = TheTarget->createAsmBackend(TripleName);
+    }
+    Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
+                                           /*useLoc*/ true, IP, CE, TAB,
+                                           ShowInst));
   } else if (FileType == OFT_Null) {
     Str.reset(createNullStreamer(Ctx));
   } else {
@@ -313,7 +359,8 @@ static int AssembleInput(const char *ProgName) {
     MCCodeEmitter *CE = TheTarget->createCodeEmitter(*TM, Ctx);
     TargetAsmBackend *TAB = TheTarget->createAsmBackend(TripleName);
     Str.reset(TheTarget->createObjectStreamer(TripleName, Ctx, *TAB,
-                                              FOS, CE, RelaxAll));
+                                              FOS, CE, RelaxAll,
+                                              NoExecStack));
   }
 
   if (EnableLogging) {
@@ -344,18 +391,10 @@ static int DisassembleInput(const char *ProgName, bool Enhanced) {
   const Target *TheTarget = GetTarget(ProgName);
   if (!TheTarget)
     return 0;
-  
-  std::string ErrorMessage;
-  
-  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
-                                                      &ErrorMessage);
-
-  if (Buffer == 0) {
-    errs() << ProgName << ": ";
-    if (ErrorMessage.size())
-      errs() << ErrorMessage << "\n";
-    else
-      errs() << "input file didn't read correctly.\n";
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, Buffer)) {
+    errs() << ProgName << ": " << ec.message() << '\n';
     return 1;
   }
   
@@ -365,9 +404,11 @@ static int DisassembleInput(const char *ProgName, bool Enhanced) {
 
   int Res;
   if (Enhanced)
-    Res = Disassembler::disassembleEnhanced(TripleName, *Buffer, Out->os());
+    Res =
+      Disassembler::disassembleEnhanced(TripleName, *Buffer.take(), Out->os());
   else
-    Res = Disassembler::disassemble(*TheTarget, TripleName, *Buffer, Out->os());
+    Res = Disassembler::disassemble(*TheTarget, TripleName,
+                                    *Buffer.take(), Out->os());
 
   // Keep output if no errors.
   if (Res == 0) Out->keep();
diff --git a/tools/llvm-nm/CMakeLists.txt b/tools/llvm-nm/CMakeLists.txt
index 45cf1b67f94d..b6cd80b477a9 100644
--- a/tools/llvm-nm/CMakeLists.txt
+++ b/tools/llvm-nm/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS archive bitreader)
+set(LLVM_LINK_COMPONENTS archive bitreader object)
 
 add_llvm_tool(llvm-nm
   llvm-nm.cpp
diff --git a/tools/llvm-nm/Makefile b/tools/llvm-nm/Makefile
index ecf5f8c3dcfa..6bb4cd4acc23 100644
--- a/tools/llvm-nm/Makefile
+++ b/tools/llvm-nm/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ../..
 
 TOOLNAME = llvm-nm
-LINK_COMPONENTS = archive bitreader
+LINK_COMPONENTS = archive bitreader object
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index daa85712379a..1afa5032957c 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -20,17 +20,23 @@
 #include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <cctype>
 #include <cerrno>
 #include <cstring>
+#include <vector>
 using namespace llvm;
+using namespace object;
 
 namespace {
   enum OutputFormatTy { bsd, sysv, posix };
@@ -64,11 +70,148 @@ namespace {
   cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"));
   cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"));
 
+  cl::opt<bool> PrintFileName("print-file-name",
+    cl::desc("Precede each symbol with the object file it came from"));
+
+  cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
+                                cl::aliasopt(PrintFileName));
+  cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
+                                cl::aliasopt(PrintFileName));
+
+  cl::opt<bool> DebugSyms("debug-syms",
+    cl::desc("Show all symbols, even debugger only"));
+  cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
+                            cl::aliasopt(DebugSyms));
+
+  cl::opt<bool> NumericSort("numeric-sort",
+    cl::desc("Sort symbols by address"));
+  cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
+                              cl::aliasopt(NumericSort));
+  cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
+                              cl::aliasopt(NumericSort));
+
+  cl::opt<bool> NoSort("no-sort",
+    cl::desc("Show symbols in order encountered"));
+  cl::alias NoSortp("p", cl::desc("Alias for --no-sort"),
+                         cl::aliasopt(NoSort));
+
+  cl::opt<bool> PrintSize("print-size",
+    cl::desc("Show symbol size instead of address"));
+  cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
+                            cl::aliasopt(PrintSize));
+
+  cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"));
+
+  bool PrintAddress = true;
+
   bool MultipleFiles = false;
 
   std::string ToolName;
 }
 
+namespace {
+  struct NMSymbol {
+    uint64_t  Address;
+    uint64_t  Size;
+    char      TypeChar;
+    StringRef Name;
+  };
+
+  static bool CompareSymbolAddress(const NMSymbol &a, const NMSymbol &b) {
+    if (a.Address < b.Address)
+      return true;
+    else if (a.Address == b.Address && a.Name < b.Name)
+      return true;
+    else
+      return false;
+
+  }
+
+  static bool CompareSymbolSize(const NMSymbol &a, const NMSymbol &b) {
+    if (a.Size < b.Size)
+      return true;
+    else if (a.Size == b.Size && a.Name < b.Name)
+      return true;
+    else
+      return false;
+  }
+
+  static bool CompareSymbolName(const NMSymbol &a, const NMSymbol &b) {
+    return a.Name < b.Name;
+  }
+
+  StringRef CurrentFilename;
+  typedef std::vector<NMSymbol> SymbolListT;
+  SymbolListT SymbolList;
+}
+
+static void SortAndPrintSymbolList() {
+  if (!NoSort) {
+    if (NumericSort)
+      std::sort(SymbolList.begin(), SymbolList.end(), CompareSymbolAddress);
+    else if (SizeSort)
+      std::sort(SymbolList.begin(), SymbolList.end(), CompareSymbolSize);
+    else
+      std::sort(SymbolList.begin(), SymbolList.end(), CompareSymbolName);
+  }
+
+  if (OutputFormat == posix && MultipleFiles) {
+    outs() << '\n' << CurrentFilename << ":\n";
+  } else if (OutputFormat == bsd && MultipleFiles) {
+    outs() << "\n" << CurrentFilename << ":\n";
+  } else if (OutputFormat == sysv) {
+    outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n"
+           << "Name                  Value   Class        Type"
+           << "         Size   Line  Section\n";
+  }
+
+  for (SymbolListT::iterator i = SymbolList.begin(),
+                             e = SymbolList.end(); i != e; ++i) {
+    if ((i->TypeChar != 'U') && UndefinedOnly)
+      continue;
+    if ((i->TypeChar == 'U') && DefinedOnly)
+      continue;
+    if (SizeSort && !PrintAddress && i->Size == UnknownAddressOrSize)
+      continue;
+
+    char SymbolAddrStr[10] = "";
+    char SymbolSizeStr[10] = "";
+
+    if (OutputFormat == sysv || i->Address == object::UnknownAddressOrSize)
+      strcpy(SymbolAddrStr, "        ");
+    if (OutputFormat == sysv)
+      strcpy(SymbolSizeStr, "        ");
+
+    if (i->Address != object::UnknownAddressOrSize)
+      format("%08x", i->Address).print(SymbolAddrStr, sizeof(SymbolAddrStr));
+    if (i->Size != object::UnknownAddressOrSize)
+      format("%08x", i->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+
+    if (OutputFormat == posix) {
+      outs() << i->Name << " " << i->TypeChar << " "
+             << SymbolAddrStr << SymbolSizeStr << "\n";
+    } else if (OutputFormat == bsd) {
+      if (PrintAddress)
+        outs() << SymbolAddrStr << ' ';
+      if (PrintSize) {
+        outs() << SymbolSizeStr;
+        if (i->Size != object::UnknownAddressOrSize)
+          outs() << ' ';
+      }
+      outs() << i->TypeChar << " " << i->Name  << "\n";
+    } else if (OutputFormat == sysv) {
+      std::string PaddedName (i->Name);
+      while (PaddedName.length () < 20)
+        PaddedName += " ";
+      outs() << PaddedName << "|" << SymbolAddrStr << "|   "
+             << i->TypeChar
+             << "  |                  |" << SymbolSizeStr << "|     |\n";
+    }
+  }
+
+  SymbolList.clear();
+}
+
 static char TypeCharForSymbol(GlobalValue &GV) {
   if (GV.isDeclaration())                                  return 'U';
   if (GV.hasLinkOnceLinkage())                             return 'C';
@@ -94,57 +237,62 @@ static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
       GV.hasLinkerPrivateWeakDefAutoLinkage() ||
       GV.hasAvailableExternallyLinkage())
     return;
-
-  const std::string SymbolAddrStr = "        "; // Not used yet...
   char TypeChar = TypeCharForSymbol(GV);
-  if ((TypeChar != 'U') && UndefinedOnly)
-    return;
-  if ((TypeChar == 'U') && DefinedOnly)
-    return;
   if (GV.hasLocalLinkage () && ExternalOnly)
     return;
-  if (OutputFormat == posix) {
-    outs() << GV.getName () << " " << TypeCharForSymbol(GV) << " "
-           << SymbolAddrStr << "\n";
-  } else if (OutputFormat == bsd) {
-    outs() << SymbolAddrStr << " " << TypeCharForSymbol(GV) << " "
-           << GV.getName () << "\n";
-  } else if (OutputFormat == sysv) {
-    std::string PaddedName (GV.getName ());
-    while (PaddedName.length () < 20)
-      PaddedName += " ";
-    outs() << PaddedName << "|" << SymbolAddrStr << "|   "
-           << TypeCharForSymbol(GV)
-           << "  |                  |      |     |\n";
-  }
+
+  NMSymbol s;
+  s.Address = object::UnknownAddressOrSize;
+  s.Size = object::UnknownAddressOrSize;
+  s.TypeChar = TypeChar;
+  s.Name     = GV.getName();
+  SymbolList.push_back(s);
 }
 
 static void DumpSymbolNamesFromModule(Module *M) {
-  const std::string &Filename = M->getModuleIdentifier ();
-  if (OutputFormat == posix && MultipleFiles) {
-    outs() << Filename << ":\n";
-  } else if (OutputFormat == bsd && MultipleFiles) {
-    outs() << "\n" << Filename << ":\n";
-  } else if (OutputFormat == sysv) {
-    outs() << "\n\nSymbols from " << Filename << ":\n\n"
-           << "Name                  Value   Class        Type"
-           << "         Size   Line  Section\n";
-  }
+  CurrentFilename = M->getModuleIdentifier();
   std::for_each (M->begin(), M->end(), DumpSymbolNameForGlobalValue);
   std::for_each (M->global_begin(), M->global_end(),
                  DumpSymbolNameForGlobalValue);
   std::for_each (M->alias_begin(), M->alias_end(),
                  DumpSymbolNameForGlobalValue);
+
+  SortAndPrintSymbolList();
+}
+
+static void DumpSymbolNamesFromObject(ObjectFile *obj) {
+  for (ObjectFile::symbol_iterator i = obj->begin_symbols(),
+                                   e = obj->end_symbols(); i != e; ++i) {
+    if (!DebugSyms && i->isInternal())
+      continue;
+    NMSymbol s;
+    s.Size = object::UnknownAddressOrSize;
+    s.Address = object::UnknownAddressOrSize;
+    if (PrintSize || SizeSort)
+      s.Size = i->getSize();
+    if (PrintAddress)
+      s.Address = i->getAddress();
+    s.TypeChar = i->getNMTypeChar();
+    s.Name     = i->getName();
+    SymbolList.push_back(s);
+  }
+
+  CurrentFilename = obj->getFilename();
+  SortAndPrintSymbolList();
 }
 
 static void DumpSymbolNamesFromFile(std::string &Filename) {
   LLVMContext &Context = getGlobalContext();
   std::string ErrorMessage;
   sys::Path aPath(Filename);
+  bool exists;
+  if (sys::fs::exists(aPath.str(), exists) || !exists)
+    errs() << ToolName << ": '" << Filename << "': " << "No such file\n";
   // Note: Currently we do not support reading an archive from stdin.
   if (Filename == "-" || aPath.isBitcodeFile()) {
-    std::auto_ptr<MemoryBuffer> Buffer(
-                   MemoryBuffer::getFileOrSTDIN(Filename, &ErrorMessage));
+    OwningPtr<MemoryBuffer> Buffer;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buffer))
+      ErrorMessage = ec.message();
     Module *Result = 0;
     if (Buffer.get())
       Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
@@ -168,6 +316,14 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
     }
     MultipleFiles = true;
     std::for_each (Modules.begin(), Modules.end(), DumpSymbolNamesFromModule);
+  } else if (aPath.isObjectFile()) {
+    std::auto_ptr<ObjectFile> obj(ObjectFile::createObjectFile(aPath.str()));
+    if (!obj.get()) {
+      errs() << ToolName << ": " << Filename << ": "
+             << "Failed to open object file\n";
+      return;
+    }
+    DumpSymbolNamesFromObject(obj.get());
   } else {
     errs() << ToolName << ": " << Filename << ": "
            << "unrecognizable file type\n";
@@ -187,6 +343,12 @@ int main(int argc, char **argv) {
   if (BSDFormat) OutputFormat = bsd;
   if (POSIXFormat) OutputFormat = posix;
 
+  // The relative order of these is important. If you pass --size-sort it should
+  // only print out the size. However, if you pass -S --size-sort, it should
+  // print out both the size and address.
+  if (SizeSort && !PrintSize) PrintAddress = false;
+  if (OutputFormat == sysv || SizeSort) PrintSize = true;
+
   switch (InputFilenames.size()) {
   case 0: InputFilenames.push_back("-");
   case 1: break;
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
new file mode 100644
index 000000000000..4181b32bca67
--- /dev/null
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  MC
+  MCParser
+  MCDisassembler
+  Object
+  )
+
+add_llvm_tool(llvm-objdump
+  llvm-objdump.cpp
+  )
diff --git a/tools/llvm-objdump/Makefile b/tools/llvm-objdump/Makefile
new file mode 100644
index 000000000000..4d7cd34eac97
--- /dev/null
+++ b/tools/llvm-objdump/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-objdump/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-objdump
+LINK_COMPONENTS = $(TARGETS_TO_BUILD) MC MCParser MCDisassembler Object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
new file mode 100644
index 000000000000..1fef8b6e2496
--- /dev/null
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -0,0 +1,255 @@
+//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a utility that works like binutils "objdump", that is, it
+// dumps out a plethora of information about an object file depending on the
+// flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+// This config must be included before llvm-config.h.
+#include "llvm/Config/config.h"
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSelect.h"
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cstring>
+#include <vector>
+using namespace llvm;
+using namespace object;
+
+namespace {
+  cl::list<std::string>
+  InputFilenames(cl::Positional, cl::desc("<input object files>"),
+                 cl::ZeroOrMore);
+
+  cl::opt<bool>
+  Disassemble("disassemble",
+    cl::desc("Display assembler mnemonics for the machine instructions"));
+  cl::alias
+  Disassembled("d", cl::desc("Alias for --disassemble"),
+               cl::aliasopt(Disassemble));
+
+  cl::opt<std::string>
+  TripleName("triple", cl::desc("Target triple to disassemble for, "
+                                "see -version for available targets"));
+
+  cl::opt<std::string>
+  ArchName("arch", cl::desc("Target arch to disassemble for, "
+                            "see -version for available targets"));
+
+  StringRef ToolName;
+}
+
+static const Target *GetTarget(const ObjectFile *Obj = NULL) {
+  // Figure out the target triple.
+  llvm::Triple TT("unknown-unknown-unknown");
+  if (TripleName.empty()) {
+    if (Obj)
+      TT.setArch(Triple::ArchType(Obj->getArch()));
+  } else
+    TT.setTriple(Triple::normalize(TripleName));
+
+  if (!ArchName.empty())
+    TT.setArchName(ArchName);
+
+  TripleName = TT.str();
+
+  // Get the target specific parser.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  if (TheTarget)
+    return TheTarget;
+
+  errs() << ToolName << ": error: unable to get target for '" << TripleName
+         << "', see --version and --triple.\n";
+  return 0;
+}
+
+namespace {
+class StringRefMemoryObject : public MemoryObject {
+private:
+  StringRef Bytes;
+public:
+  StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
+
+  uint64_t getBase() const { return 0; }
+  uint64_t getExtent() const { return Bytes.size(); }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const {
+    if (Addr > getExtent())
+      return -1;
+    *Byte = Bytes[Addr];
+    return 0;
+  }
+};
+}
+
+static void DumpBytes(StringRef bytes) {
+  static char hex_rep[] = "0123456789abcdef";
+  // FIXME: The real way to do this is to figure out the longest instruction
+  //        and align to that size before printing. I'll fix this when I get
+  //        around to outputting relocations.
+  // 15 is the longest x86 instruction
+  // 3 is for the hex rep of a byte + a space.
+  // 1 is for the null terminator.
+  enum { OutputSize = (15 * 3) + 1 };
+  char output[OutputSize];
+
+  assert(bytes.size() <= 15
+    && "DumpBytes only supports instructions of up to 15 bytes");
+  memset(output, ' ', sizeof(output));
+  unsigned index = 0;
+  for (StringRef::iterator i = bytes.begin(),
+                           e = bytes.end(); i != e; ++i) {
+    output[index] = hex_rep[(*i & 0xF0) >> 4];
+    output[index + 1] = hex_rep[*i & 0xF];
+    index += 3;
+  }
+
+  output[sizeof(output) - 1] = 0;
+  outs() << output;
+}
+
+static void DisassembleInput(const StringRef &Filename) {
+  OwningPtr<MemoryBuffer> Buff;
+
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
+    errs() << ToolName << ": " << Filename << ": " << ec.message() << "\n";
+    return;
+  }
+
+  OwningPtr<ObjectFile> Obj(ObjectFile::createObjectFile(Buff.take()));
+
+  const Target *TheTarget = GetTarget(Obj.get());
+  if (!TheTarget) {
+    // GetTarget prints out stuff.
+    return;
+  }
+
+  outs() << '\n';
+  outs() << Filename
+         << ":\tfile format " << Obj->getFileFormatName() << "\n\n\n";
+
+  for (ObjectFile::section_iterator i = Obj->begin_sections(),
+                                    e = Obj->end_sections();
+                                    i != e; ++i) {
+    if (!i->isText())
+      continue;
+    outs() << "Disassembly of section " << i->getName() << ":\n\n";
+
+    // Set up disassembler.
+    OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createAsmInfo(TripleName));
+
+    if (!AsmInfo) {
+      errs() << "error: no assembly info for target " << TripleName << "\n";
+      return;
+    }
+
+    OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler());
+    if (!DisAsm) {
+      errs() << "error: no disassembler for target " << TripleName << "\n";
+      return;
+    }
+
+    int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+    OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+                                  AsmPrinterVariant, *AsmInfo));
+    if (!IP) {
+      errs() << "error: no instruction printer for target " << TripleName << '\n';
+      return;
+    }
+
+    StringRef Bytes = i->getContents();
+    StringRefMemoryObject memoryObject(Bytes);
+    uint64_t Size;
+    uint64_t Index;
+
+    for (Index = 0; Index < Bytes.size(); Index += Size) {
+      MCInst Inst;
+
+#     ifndef NDEBUG
+      raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+#     else
+      raw_ostream &DebugOut = nulls();
+#     endif
+
+      if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
+        outs() << format("%8x:\t", i->getAddress() + Index);
+        DumpBytes(StringRef(Bytes.data() + Index, Size));
+        IP->printInst(&Inst, outs());
+        outs() << "\n";
+      } else {
+        errs() << ToolName << ": warning: invalid instruction encoding\n";
+        if (Size == 0)
+          Size = 1; // skip illegible bytes
+      }
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
+  TripleName = Triple::normalize(TripleName);
+
+  ToolName = argv[0];
+
+  // Defaults to a.out if no filenames specified.
+  if (InputFilenames.size() == 0)
+    InputFilenames.push_back("a.out");
+
+  // -d is the only flag that is currently implemented, so just print help if
+  // it is not set.
+  if (!Disassemble) {
+    cl::PrintHelpMessage();
+    return 2;
+  }
+
+  std::for_each(InputFilenames.begin(), InputFilenames.end(),
+                DisassembleInput);
+
+  return 0;
+}
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 1c63d974eae5..9d0b46833bef 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -29,7 +29,8 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <iomanip>
 #include <map>
@@ -263,12 +264,13 @@ int main(int argc, char **argv) {
 
   // Read in the bitcode file...
   std::string ErrorMessage;
+  OwningPtr<MemoryBuffer> Buffer;
+  error_code ec;
   Module *M = 0;
-  if (MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(BitcodeFile,
-                                                          &ErrorMessage)) {
-    M = ParseBitcodeFile(Buffer, Context, &ErrorMessage);
-    delete Buffer;
-  }
+  if (!(ec = MemoryBuffer::getFileOrSTDIN(BitcodeFile, Buffer))) {
+    M = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
+  } else
+    ErrorMessage = ec.message();
   if (M == 0) {
     errs() << argv[0] << ": " << BitcodeFile << ": "
       << ErrorMessage << "\n";
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index dffe3ada5f10..64f795f7f63d 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -15,14 +15,13 @@
 #include "llvm/Module.h"
 #include "llvm/Bitcode/Archive.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
-#include <iostream>
-#include <iomanip>
+#include "llvm/Support/Signals.h"
 #include <memory>
-
 using namespace llvm;
 
 // llvm-ar operation code and modifier flags
@@ -35,12 +34,12 @@ Verbose("verbose",cl::Optional,cl::init(false),
 
 // printSymbolTable - print out the archive's symbol table.
 void printSymbolTable(Archive* TheArchive) {
-  std::cout << "\nArchive Symbol Table:\n";
+  outs() << "\nArchive Symbol Table:\n";
   const Archive::SymTabType& symtab = TheArchive->getSymbolTable();
   for (Archive::SymTabType::const_iterator I=symtab.begin(), E=symtab.end();
        I != E; ++I ) {
     unsigned offset = TheArchive->getFirstFileOffset() + I->second;
-    std::cout << " " << std::setw(9) << offset << "\t" << I->first <<"\n";
+    outs() << " " << format("%9u", offset) << "\t" << I->first <<"\n";
   }
 }
 
@@ -71,7 +70,8 @@ int main(int argc, char **argv) {
       throw std::string("Archive name invalid: ") + ArchiveName;
 
     // Make sure it exists, we don't create empty archives
-    if (!ArchivePath.exists())
+    bool Exists;
+    if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists)
       throw std::string("Archive file does not exist");
 
     std::string err_msg;
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index 52381304bb93..9e6facab7028 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -18,11 +18,12 @@ SHARED_LIBRARY = 1
 include $(LEVEL)/Makefile.config
 
 ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
-    EXPORTED_SYMBOL_FILE = $(ObjDir)/$(LIBRARYNAME).exports
+  EXPORTED_SYMBOL_FILE = $(ObjDir)/$(LIBRARYNAME).exports
 
+  ifeq (1,$(ENABLE_EMBED_STDCXX))
     # It is needed to force static-stdc++.a linked.
-    # FIXME: It should be omitted when configure detects system's stdc++.dll.
     SHLIB_FRAG_NAMES += stdc++.a.o
+  endif
 
 endif
 
@@ -61,14 +62,22 @@ ifeq ($(HOST_OS),Darwin)
     endif
 endif
 
-ifeq ($(HOST_OS), Linux)
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD))
     # Include everything from the .a's into the shared library.
     LLVMLibsOptions := -Wl,--whole-archive $(LLVMLibsOptions) \
                        -Wl,--no-whole-archive
+endif
+
+ifeq ($(HOST_OS),Linux)
     # Don't allow unresolved symbols.
     LLVMLibsOptions += -Wl,--no-undefined
 endif
 
+ifeq ($(HOST_OS),SunOS)
+    # add -z allextract ahead of other libraries on Solaris
+    LLVMLibsOptions := -Wl,-z -Wl,allextract $(LLVMLibsOptions)
+endif
+
 ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
 
 SHLIB_STUBS := $(addprefix $(ObjDir)/, $(SHLIB_FRAG_NAMES))
diff --git a/tools/llvm-stub/llvm-stub.c b/tools/llvm-stub/llvm-stub.c
index f2e478e69583..31c2d09c6b7e 100644
--- a/tools/llvm-stub/llvm-stub.c
+++ b/tools/llvm-stub/llvm-stub.c
@@ -1,10 +1,10 @@
 /*===- llvm-stub.c - Stub executable to run llvm bitcode files ------------===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This tool is used by the gccld program to enable transparent execution of
@@ -64,7 +64,11 @@ int main(int argc, char** argv) {
   memcpy((char **)Args+2, argv+1, sizeof(char*)*argc);
 
   /* Run the JIT. */
-  execvp(Interp, (char **)Args);
+#ifndef _WIN32
+  execvp(Interp, (char **)Args); /* POSIX execvp takes a char *const[]. */
+#else
+  execvp(Interp, Args); /* windows execvp takes a const char *const *. */
+#endif
   /* if _execv returns, the JIT could not be started. */
   fprintf(stderr, "Could not execute the LLVM JIT.  Either add 'lli' to your"
           " path, or set the\ninterpreter you want to use in the LLVMINTERP "
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index d160e758ea62..ec9098b90cd5 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -319,7 +319,8 @@ separate option groups syntactically.
 
    - ``alias_option`` - a special option type for creating aliases. Unlike other
      option types, aliases are not allowed to have any properties besides the
-     aliased option name. Usage example: ``(alias_option "preprocess", "E")``
+     aliased option name.
+     Usage example: ``(alias_option "preprocess", "E")``
 
    - ``switch_list_option`` - like ``switch_option`` with the ``zero_or_more``
      property, but remembers how many times the switch was turned on. Useful
@@ -456,22 +457,22 @@ use TableGen inheritance instead.
 * Possible tests are:
 
   - ``switch_on`` - Returns true if a given command-line switch is provided by
-    the user. Can be given a list as argument, in that case ``(switch_on ["foo",
-    "bar", "baz"])`` is equivalent to ``(and (switch_on "foo"), (switch_on
+    the user. Can be given multiple arguments, in that case ``(switch_on "foo",
+    "bar", "baz")`` is equivalent to ``(and (switch_on "foo"), (switch_on
     "bar"), (switch_on "baz"))``.
     Example: ``(switch_on "opt")``.
 
-  - ``any_switch_on`` - Given a list of switch options, returns true if any of
+  - ``any_switch_on`` - Given a number of switch options, returns true if any of
     the switches is turned on.
-    Example: ``(any_switch_on ["foo", "bar", "baz"])`` is equivalent to ``(or
+    Example: ``(any_switch_on "foo", "bar", "baz")`` is equivalent to ``(or
     (switch_on "foo"), (switch_on "bar"), (switch_on "baz"))``.
 
-  - ``parameter_equals`` - Returns true if a command-line parameter equals
-    a given value.
+  - ``parameter_equals`` - Returns true if a command-line parameter (first
+    argument) equals a given value (second argument).
     Example: ``(parameter_equals "W", "all")``.
 
-  - ``element_in_list`` - Returns true if a command-line parameter
-    list contains a given value.
+  - ``element_in_list`` - Returns true if a command-line parameter list (first
+    argument) contains a given value (second argument).
     Example: ``(element_in_list "l", "pthread")``.
 
   - ``input_languages_contain`` - Returns true if a given language
@@ -479,27 +480,27 @@ use TableGen inheritance instead.
     Example: ``(input_languages_contain "c++")``.
 
   - ``in_language`` - Evaluates to true if the input file language is equal to
-    the argument. At the moment works only with ``cmd_line`` and ``actions`` (on
+    the argument. At the moment works only with ``command`` and ``actions`` (on
     non-join nodes).
     Example: ``(in_language "c++")``.
 
   - ``not_empty`` - Returns true if a given option (which should be either a
     parameter or a parameter list) is set by the user. Like ``switch_on``, can
-    be also given a list as argument.
-    Example: ``(not_empty "o")``.
+    be also given multiple arguments.
+    Examples: ``(not_empty "o")``, ``(not_empty "o", "l")``.
 
   - ``any_not_empty`` - Returns true if ``not_empty`` returns true for any of
-    the options in the list.
-    Example: ``(any_not_empty ["foo", "bar", "baz"])`` is equivalent to ``(or
+    the provided options.
+    Example: ``(any_not_empty "foo", "bar", "baz")`` is equivalent to ``(or
     (not_empty "foo"), (not_empty "bar"), (not_empty "baz"))``.
 
   - ``empty`` - The opposite of ``not_empty``. Equivalent to ``(not (not_empty
-    X))``. Provided for convenience. Can be given a list as argument.
+    X))``. Can be given multiple arguments.
 
   - ``any_not_empty`` - Returns true if ``not_empty`` returns true for any of
-    the options in the list.
-    Example: ``(any_empty ["foo", "bar", "baz"])`` is equivalent to ``(not (and
-    (not_empty "foo"), (not_empty "bar"), (not_empty "baz")))``.
+    the provided options.
+    Example: ``(any_empty "foo", "bar", "baz")`` is equivalent to ``(or
+    (not_empty "foo"), (not_empty "bar"), (not_empty "baz"))``.
 
   - ``single_input_file`` - Returns true if there was only one input file
     provided on the command-line. Used without arguments:
@@ -511,16 +512,18 @@ use TableGen inheritance instead.
   - ``default`` - Always evaluates to true. Should always be the last
     test in the ``case`` expression.
 
-  - ``and`` - A standard binary logical combinator that returns true iff all of
+  - ``and`` - A standard logical combinator that returns true iff all of
     its arguments return true. Used like this: ``(and (test1), (test2),
     ... (testN))``. Nesting of ``and`` and ``or`` is allowed, but not
     encouraged.
 
-  - ``or`` - A binary logical combinator that returns true iff any of its
-    arguments returns true. Example: ``(or (test1), (test2), ... (testN))``.
+  - ``or`` - A logical combinator that returns true iff any of its arguments
+    return true.
+    Example: ``(or (test1), (test2), ... (testN))``.
 
   - ``not`` - Standard unary logical combinator that negates its
-    argument. Example: ``(not (or (test1), (test2), ... (testN)))``.
+    argument.
+    Example: ``(not (or (test1), (test2), ... (testN)))``.
 
 
 
@@ -549,10 +552,10 @@ The complete list of all currently implemented tool properties follows.
 
 * Possible tool properties:
 
-  - ``in_language`` - input language name. Can be either a string or a
-    list, in case the tool supports multiple input languages.
+  - ``in_language`` - input language name. Can be given multiple arguments, in
+    case the tool supports multiple input languages.
 
-  - ``out_language`` - output language name. Multiple output languages are not
+  - ``out_language`` - output language name. Multiple output languages are
     allowed.
 
   - ``output_suffix`` - output file suffix. Can also be changed
@@ -687,12 +690,12 @@ occasions. Example (adapted from the built-in Base plugin)::
 
 
     def Preprocess : OptionPreprocessor<
-    (case (not (any_switch_on ["O0", "O1", "O2", "O3"])),
+    (case (not (any_switch_on "O0", "O1", "O2", "O3")),
                (set_option "O2"),
-          (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])),
-               (unset_option ["O0", "O1", "O2"]),
-          (and (switch_on "O2"), (any_switch_on ["O0", "O1"])),
-               (unset_option ["O0", "O1"]),
+          (and (switch_on "O3"), (any_switch_on "O0", "O1", "O2")),
+               (unset_option "O0", "O1", "O2"),
+          (and (switch_on "O2"), (any_switch_on "O0", "O1")),
+               (unset_option "O0", "O1"),
           (and (switch_on "O1"), (switch_on "O0")),
                (unset_option "O0"))
     >;
@@ -709,10 +712,10 @@ set or unset a given option. To set an option with ``set_option``, use the
 two-argument form: ``(set_option "parameter", VALUE)``. Here, ``VALUE`` can be
 either a string, a string list, or a boolean constant.
 
-For convenience, ``set_option`` and ``unset_option`` also work on lists. That
-is, instead of ``[(unset_option "A"), (unset_option "B")]`` you can use
-``(unset_option ["A", "B"])``. Obviously, ``(set_option ["A", "B"])`` is valid
-only if both ``A`` and ``B`` are switches.
+For convenience, ``set_option`` and ``unset_option`` also work with multiple
+arguments. That is, instead of ``[(unset_option "A"), (unset_option "B")]`` you
+can use ``(unset_option "A", "B")``. Obviously, ``(set_option "A", "B")`` is
+only valid if both ``A`` and ``B`` are switches.
 
 
 More advanced topics
diff --git a/tools/llvmc/examples/mcc16/Hooks.cpp b/tools/llvmc/examples/mcc16/Hooks.cpp
index edb91e16aa90..95158efeece0 100644
--- a/tools/llvmc/examples/mcc16/Hooks.cpp
+++ b/tools/llvmc/examples/mcc16/Hooks.cpp
@@ -1,4 +1,4 @@
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/tools/llvmc/examples/mcc16/Main.cpp b/tools/llvmc/examples/mcc16/Main.cpp
index 55ae9128394a..5d4992dd9ce7 100644
--- a/tools/llvmc/examples/mcc16/Main.cpp
+++ b/tools/llvmc/examples/mcc16/Main.cpp
@@ -16,7 +16,7 @@
 #include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/Main.h"
 
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Config/config.h"
 
 #include <iostream>
diff --git a/tools/llvmc/src/Base.td.in b/tools/llvmc/src/Base.td.in
index 0c4de4c3fa50..50533f11fa4d 100644
--- a/tools/llvmc/src/Base.td.in
+++ b/tools/llvmc/src/Base.td.in
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 // Options
 
 def OptList : OptionList<[
@@ -35,8 +34,6 @@ def OptList : OptionList<[
     (help "Stop after compilation, do not assemble")),
  (switch_option "c",
     (help "Compile and assemble, but do not link")),
- (switch_option "pthread",
-    (help "Enable threads")),
  (switch_option "m32",
     (help "Generate code for a 32-bit environment"), (hidden)),
  (switch_option "m64",
@@ -45,22 +42,21 @@ def OptList : OptionList<[
     (help "Relocation model: PIC"), (hidden)),
  (switch_option "mdynamic-no-pic",
     (help "Relocation model: dynamic-no-pic"), (hidden)),
- (switch_option "shared",
-     (help "Create a DLL instead of the regular executable")),
  (parameter_option "linker",
     (help "Choose linker (possible values: gcc, g++)")),
  (parameter_option "mtune",
-    (help "Target a specific CPU type"), (hidden), (forward_not_split)),
-
- // TODO: Add a conditional compilation mechanism to make Darwin-only options
- // like '-arch' really Darwin-only.
-
- (parameter_option "arch",
-    (help "Compile for the specified target architecture"), (hidden)),
- (parameter_option "march",
-    (help "A synonym for -mtune"), (hidden), (forward_not_split)),
+    (help "Target a specific CPU type"), (forward_not_split)),
+ (parameter_list_option "march",
+    (help "Generate code for the specified machine type")),
  (parameter_option "mcpu",
     (help "A deprecated synonym for -mtune"), (hidden), (forward_not_split)),
+ (parameter_option "mfpu",
+    (help "Specify type of floating point unit"),
+    (hidden), (forward_not_split)),
+ (parameter_option "mabi",
+    (help "Generate code for the specified ABI"), (hidden)),
+ (parameter_option "mfloat-abi",
+    (help "Specifies which floating-point ABI to use"), (hidden)),
  (switch_option "mfix-and-continue",
     (help "Needed by gdb to load .o files dynamically"), (hidden)),
  (parameter_option "MF",
@@ -73,14 +69,6 @@ def OptList : OptionList<[
  (parameter_list_option "iquote",
     (help "Search dir only for files requested with #inlcude \"file\""),
     (hidden)),
- (parameter_list_option "framework",
-    (help "Specifies a framework to link against")),
- (parameter_list_option "weak_framework",
-    (help "Specifies a framework to weakly link against"), (hidden)),
- (parameter_option "filelist", (hidden),
-    (help "Link the files listed in file")),
- (prefix_list_option "F",
-    (help "Add a directory to framework search path")),
  (prefix_list_option "I",
     (help "Add a directory to include path")),
  (prefix_list_option "D",
@@ -93,10 +81,6 @@ def OptList : OptionList<[
     (help "Pass options to assembler")),
  (prefix_list_option "Wllc,", (comma_separated),
     (help "Pass options to llc")),
- (prefix_list_option "L",
-    (help "Add a directory to link path")),
- (prefix_list_option "l",
-    (help "Search a library when linking")),
  (prefix_list_option "Wl,",
     (help "Pass options to linker")),
  (parameter_list_option "Xlinker", (hidden),
@@ -105,7 +89,56 @@ def OptList : OptionList<[
     (help "Pass options to opt")),
  (prefix_list_option "m",
      (help "Enable or disable various extensions (-mmmx, -msse, etc.)"),
-     (hidden)),
+     (hidden))
+]>;
+
+def LinkerOptList : OptionList<[
+ (prefix_list_option "L",
+    (help "Add a directory to link path")),
+ (prefix_list_option "l",
+    (help "Search a library when linking")),
+ (parameter_option "filelist", (hidden),
+    (help "Link the files listed in file")),
+ (switch_option "nostartfiles",
+    (help "Do not use the standard system startup files when linking"),
+    (hidden)),
+ (switch_option "nodefaultlibs",
+    (help "Do not use the standard system libraries when linking"), (hidden)),
+ (switch_option "nostdlib",
+    (help
+    "Do not use the standard system startup files or libraries when linking"),
+    (hidden)),
+ (switch_option "pie",
+     (help "Produce a position independent executable"), (hidden)),
+ (switch_option "rdynamic",
+     (help "Add all symbols to the dynamic export table"), (hidden)),
+ (switch_option "s",
+    (help "Strip all symbols"), (hidden)),
+ (switch_option "static",
+     (help "Do not link against shared libraries"), (hidden)),
+ (switch_option "static-libgcc",
+     (help "Use static libgcc"), (hidden)),
+ (switch_option "shared",
+     (help "Create a DLL instead of the regular executable")),
+ (switch_option "shared-libgcc",
+     (help "Use shared libgcc"), (hidden)),
+ (parameter_option "T",
+     (help "Read linker script"), (hidden)),
+ (parameter_option "u",
+     (help "Start with undefined reference to SYMBOL"), (hidden)),
+ (switch_option "pthread",
+    (help "Enable threads")),
+
+ // TODO: Add a conditional compilation mechanism to make Darwin-only options
+ // like '-arch' really Darwin-only.
+ (parameter_option "arch",
+    (help "Compile for the specified target architecture"), (hidden)),
+ (prefix_list_option "F",
+    (help "Add a directory to framework search path")),
+ (parameter_list_option "framework",
+    (help "Specifies a framework to link against")),
+ (parameter_list_option "weak_framework",
+    (help "Specifies a framework to weakly link against"), (hidden)),
  (switch_option "dynamiclib", (hidden),
      (help "Produce a dynamic library")),
  (switch_option "prebind", (hidden),
@@ -125,24 +158,24 @@ def OptList : OptionList<[
 // Option preprocessor.
 
 def Preprocess : OptionPreprocessor<
-(case (not (any_switch_on ["O0", "O1", "O2", "O3"])),
+(case (not (any_switch_on "O0", "O1", "O2", "O3")),
            (set_option "O2"),
-      (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])),
-           (unset_option ["O0", "O1", "O2"]),
-      (and (switch_on "O2"), (any_switch_on ["O0", "O1"])),
-           (unset_option ["O0", "O1"]),
-      (switch_on ["O1", "O0"]),
+      (and (switch_on "O3"), (any_switch_on "O0", "O1", "O2")),
+           (unset_option "O0", "O1", "O2"),
+      (and (switch_on "O2"), (any_switch_on "O0", "O1")),
+           (unset_option "O0", "O1"),
+      (switch_on "O1", "O0"),
            (unset_option "O0"))
 >;
 
 // Tools
 
-class llvm_gcc_based <string cmd_prefix, string in_lang,
-                      string E_ext, string out_lang> : Tool<
+class llvm_gcc_based <string cmd, string in_lang, string E_ext, dag out_lang,
+                      string out_ext> : Tool<
 [(in_language in_lang),
- (out_language "llvm-bitcode"),
- (output_suffix out_lang),
- (command cmd_prefix),
+ out_lang,
+ (output_suffix out_ext),
+ (command cmd),
  (actions
      (case
          (and (not_empty "o"),
@@ -151,14 +184,20 @@ class llvm_gcc_based <string cmd_prefix, string in_lang,
          (switch_on "E"),
               [(forward "E"), (stop_compilation), (output_suffix E_ext)],
          (and (switch_on "E"), (empty "o")), (no_out_file),
-         (switch_on ["emit-llvm", "S"]),
-              [(output_suffix "ll"), (stop_compilation)],
-         (switch_on ["emit-llvm", "c"]), (stop_compilation),
+
+         // ('-emit-llvm') && !('opt') -> stop compilation
+         (and (switch_on "emit-llvm"), (not (switch_on "opt"))),
+              (stop_compilation),
+         // ('-S' && '-emit-llvm') && !('opt') -> output .ll
+         (and (switch_on "emit-llvm", "S"), (not (switch_on "opt"))),
+              [(forward "S"), (output_suffix "ll")],
+         // Ususally just output .bc
+         (not (switch_on "fsyntax-only")),
+              [(append_cmd "-c"), (append_cmd "-emit-llvm")],
+
+         // -fsyntax-only
          (switch_on "fsyntax-only"), [(forward "fsyntax-only"),
                                       (no_out_file), (stop_compilation)],
-         (switch_on ["S", "emit-llvm"]), [(forward "S"), (forward "emit-llvm")],
-         (not (or (switch_on ["S", "emit-llvm"]), (switch_on "fsyntax-only"))),
-             [(append_cmd "-c"), (append_cmd "-emit-llvm")],
 
          // Forwards
          (not_empty "Xpreprocessor"), (forward "Xpreprocessor"),
@@ -170,8 +209,11 @@ class llvm_gcc_based <string cmd_prefix, string in_lang,
          (not_empty "D"), (forward "D"),
          (not_empty "arch"), (forward "arch"),
          (not_empty "march"), (forward "march"),
-         (not_empty "mtune"), (forward "mtune"),
          (not_empty "mcpu"), (forward "mcpu"),
+         (not_empty "mtune"), (forward "mtune"),
+         (not_empty "mfpu"), (forward "mfpu"),
+         (not_empty "mabi"), (forward "mabi"),
+         (not_empty "mfloat-abi"), (forward "mfloat-abi"),
          (not_empty "m"), (forward "m"),
          (switch_on "mfix-and-continue"), (forward "mfix-and-continue"),
          (switch_on "m32"), (forward "m32"),
@@ -187,30 +229,40 @@ class llvm_gcc_based <string cmd_prefix, string in_lang,
  (sink)
 ]>;
 
-def llvm_gcc_c : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x c", "c", "i", "bc">;
-def llvm_gcc_cpp : llvm_gcc_based<"@LLVMGXXCOMMAND@ -x c++", "c++", "i", "bc">;
-def llvm_gcc_m : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c",
-                                                  "objective-c", "mi", "bc">;
-def llvm_gcc_mxx : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c++",
-                                  "objective-c++", "mi", "bc">;
+class llvm_gcc_comp_based <string cmd, string in_lang, string E_ext>
+: llvm_gcc_based<cmd, in_lang, E_ext,
+  (out_language "llvm-bitcode", "object-code"), "bc">;
+
+class llvm_gcc_pch_based <string cmd, string in_lang, string E_ext>
+: llvm_gcc_based<cmd, in_lang, E_ext,
+  (out_language "precompiled-header"), "gch">;
 
-def llvm_gcc_c_pch : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x c-header",
-                                                      "c-header", "i", "gch">;
-def llvm_gcc_cpp_pch : llvm_gcc_based<"@LLVMGXXCOMMAND@ -x c++-header",
-                                                        "c++-header",
-                                                        "i", "gch">;
-def llvm_gcc_m_pch : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c-header",
-                                                      "objective-c-header",
-                                                      "mi", "gch">;
-def llvm_gcc_mxx_pch
-    : llvm_gcc_based<"@LLVMGCCCOMMAND@ -x objective-c++-header",
-                                       "objective-c++-header", "mi", "gch">;
+def llvm_gcc_c : llvm_gcc_comp_based
+    <"@LLVMGCCCOMMAND@ -x c", "c", "i">;
+def llvm_gcc_cpp : llvm_gcc_comp_based
+    <"@LLVMGXXCOMMAND@ -x c++", "c++", "i">;
+def llvm_gcc_m : llvm_gcc_comp_based
+    <"@LLVMGCCCOMMAND@ -x objective-c", "objective-c", "mi">;
+def llvm_gcc_mxx : llvm_gcc_comp_based
+    <"@LLVMGCCCOMMAND@ -x objective-c++", "objective-c++", "mi">;
+
+def llvm_gcc_c_pch : llvm_gcc_pch_based
+    <"@LLVMGCCCOMMAND@ -x c-header", "c-header", "i">;
+def llvm_gcc_cpp_pch : llvm_gcc_pch_based
+    <"@LLVMGXXCOMMAND@ -x c++-header", "c++-header", "i">;
+def llvm_gcc_m_pch : llvm_gcc_pch_based
+    <"@LLVMGCCCOMMAND@ -x objective-c-header", "objective-c-header", "mi">;
+def llvm_gcc_mxx_pch : llvm_gcc_pch_based
+    <"@LLVMGCCCOMMAND@ -x objective-c++-header", "objective-c++-header", "mi">;
 
 def opt : Tool<
 [(in_language "llvm-bitcode"),
  (out_language "llvm-bitcode"),
- (output_suffix "bc"),
- (actions (case (not_empty "Wo,"), (forward_value "Wo,"),
+ (output_suffix "opt.bc"),
+ (actions (case (switch_on "emit-llvm"), (stop_compilation),
+                (switch_on "emit-llvm", "S"),
+                [(append_cmd "-S"), (output_suffix "ll")],
+                (not_empty "Wo,"), (forward_value "Wo,"),
                 (switch_on "O1"), (forward "O1"),
                 (switch_on "O2"), (forward "O2"),
                 (switch_on "O3"), (forward "O3"))),
@@ -222,7 +274,8 @@ def llvm_as : Tool<
  (out_language "llvm-bitcode"),
  (output_suffix "bc"),
  (command "llvm-as"),
- (actions (case (switch_on "emit-llvm"), (stop_compilation)))
+ (actions (case (and (switch_on "emit-llvm"), (not (switch_on "opt"))),
+                (stop_compilation)))
 ]>;
 
 def llvm_gcc_assembler : Tool<
@@ -234,13 +287,18 @@ def llvm_gcc_assembler : Tool<
           (switch_on "c"), (stop_compilation),
           (not_empty "arch"), (forward "arch"),
           (not_empty "Xassembler"), (forward "Xassembler"),
+          (not_empty "march"), (forward "march"),
+          (not_empty "mcpu"), (forward "mcpu"),
+          (not_empty "mtune"), (forward "mtune"),
+          (not_empty "mabi"), (forward "mabi"),
+          (not_empty "mfloat-abi"), (forward "mfloat-abi"),
           (switch_on "m32"), (forward "m32"),
           (switch_on "m64"), (forward "m64"),
           (not_empty "Wa,"), (forward "Wa,")))
 ]>;
 
 def llc : Tool<
-[(in_language ["llvm-bitcode", "llvm-assembler"]),
+[(in_language "llvm-bitcode", "llvm-assembler"),
  (out_language "assembler"),
  (output_suffix "s"),
  (command "llc"),
@@ -253,19 +311,22 @@ def llc : Tool<
           (switch_on "fPIC"), (append_cmd "-relocation-model=pic"),
           (switch_on "mdynamic-no-pic"),
                      (append_cmd "-relocation-model=dynamic-no-pic"),
-          (not_empty "march"), (forward_as "mtune", "-mcpu"),
-          (not_empty "mtune"), (forward_as "mtune", "-mcpu"),
-          (not_empty "mcpu"), (forward "mcpu"),
+          (not_empty "march"), (forward_transformed_value
+                                "march", "ConvertMArchToMAttr"),
+          (not_empty "mcpu"), (forward_transformed_value "mcpu", "ConvertMCpu"),
+          (and (not_empty "mtune"), (empty "mcpu")),
+                     (forward_as "mtune", "-mcpu"),
+          (not_empty "mfpu"), (forward_transformed_value "mfpu", "ConvertMFpu"),
           (not_empty "m"), (forward_transformed_value "m", "ConvertToMAttr"),
           (not_empty "Wllc,"), (forward_value "Wllc,")))
 ]>;
 
 // Base class for linkers
-class llvm_gcc_based_linker <string cmd_prefix, dag on_empty> : Tool<
-[(in_language ["object-code", "static-library", "dynamic-library"]),
+class llvm_gcc_based_linker <string cmd, dag on_empty> : Tool<
+[(in_language "object-code", "static-library", "dynamic-library"),
  (out_language "executable"),
  (output_suffix "out"),
- (command cmd_prefix),
+ (command cmd),
  (works_on_empty (case (and (not_empty "filelist"), on_empty), true,
                        (default), false)),
  (join),
@@ -277,12 +338,28 @@ class llvm_gcc_based_linker <string cmd_prefix, dag on_empty> : Tool<
           (not_empty "framework"), (forward "framework"),
           (not_empty "weak_framework"), (forward "weak_framework"),
           (not_empty "filelist"), (forward "filelist"),
+          (not_empty "march"), (forward "march"),
+          (not_empty "mcpu"), (forward "mcpu"),
+          (not_empty "mtune"), (forward "mtune"),
+          (not_empty "mabi"), (forward "mabi"),
+          (not_empty "mfloat-abi"), (forward "mfloat-abi"),
           (switch_on "m32"), (forward "m32"),
           (switch_on "m64"), (forward "m64"),
           (not_empty "l"), (forward "l"),
           (not_empty "Xlinker"), (forward "Xlinker"),
           (not_empty "Wl,"), (forward "Wl,"),
+          (switch_on "nostartfiles"), (forward "nostartfiles"),
+          (switch_on "nodefaultlibs"), (forward "nodefaultlibs"),
+          (switch_on "nostdlib"), (forward "nostdlib"),
+          (switch_on "pie"), (forward "pie"),
+          (switch_on "rdynamic"), (forward "rdynamic"),
+          (switch_on "s"), (forward "s"),
+          (switch_on "static"), (forward "static"),
+          (switch_on "static-libgcc"), (forward "static-libgcc"),
           (switch_on "shared"), (forward "shared"),
+          (switch_on "shared-libgcc"), (forward "shared-libgcc"),
+          (not_empty "T"), (forward "T"),
+          (not_empty "u"), (forward "u"),
           (switch_on "dynamiclib"), (forward "dynamiclib"),
           (switch_on "prebind"), (forward "prebind"),
           (switch_on "dead_strip"), (forward "dead_strip"),
@@ -305,6 +382,7 @@ def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@",
 // Language map
 
 def LanguageMap : LanguageMap<[
+    (lang_to_suffixes "precompiled-header", ["gch", "pch"]),
     (lang_to_suffixes "c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]),
     (lang_to_suffixes "c++-header", "hpp"),
     (lang_to_suffixes "c", "c"),
@@ -322,7 +400,7 @@ def LanguageMap : LanguageMap<[
     (lang_to_suffixes "object-code", ["o", "*empty*"]),
     (lang_to_suffixes "static-library", ["a", "lib"]),
     (lang_to_suffixes "dynamic-library", ["so", "dylib", "dll"]),
-    (lang_to_suffixes "executable", ["out"])
+    (lang_to_suffixes "executable", "out")
 ]>;
 
 // Compilation graph
@@ -347,7 +425,8 @@ def CompilationGraph : CompilationGraph<[
     (edge "llvm_as", "llc"),
 
     (optional_edge "root", "llvm_as",
-                   (case (switch_on "emit-llvm"), (inc_weight))),
+                   (case (or (switch_on "emit-llvm"),
+                             (switch_on "opt")), (inc_weight))),
     (optional_edge "llvm_gcc_c", "opt",
                    (case (switch_on "opt"), (inc_weight))),
     (optional_edge "llvm_gcc_cpp", "opt",
diff --git a/tools/llvmc/src/Clang.td b/tools/llvmc/src/Clang.td
index 1d75743f4fee..e2d32e88ff22 100644
--- a/tools/llvmc/src/Clang.td
+++ b/tools/llvmc/src/Clang.td
@@ -25,12 +25,12 @@ class clang_based<string language, string cmd, string ext_E> : Tool<
                     [(forward "E"), (stop_compilation), (output_suffix ext_E)],
                 (and (switch_on "E"), (empty "o")), (no_out_file),
                 (switch_on "fsyntax-only"), (stop_compilation),
-                (switch_on ["S", "emit-llvm"]),
+                (switch_on "S", "emit-llvm"),
                            [(append_cmd "-emit-llvm"),
                             (stop_compilation), (output_suffix "ll")],
-                (not (switch_on ["S", "emit-llvm"])),
+                (not (switch_on "S", "emit-llvm")),
                      (append_cmd "-emit-llvm-bc"),
-                (switch_on ["c", "emit-llvm"]),
+                (switch_on "c", "emit-llvm"),
                            (stop_compilation),
                 (not_empty "include"), (forward "include"),
                 (not_empty "I"), (forward "I"))),
diff --git a/tools/llvmc/src/Hooks.cpp b/tools/llvmc/src/Hooks.cpp
index 661a914489db..5aa250e512b9 100644
--- a/tools/llvmc/src/Hooks.cpp
+++ b/tools/llvmc/src/Hooks.cpp
@@ -1,14 +1,182 @@
+#include "llvm/ADT/StringMap.h"
+
 #include <string>
 #include <vector>
 
 namespace hooks {
+
+/// NUM_KEYS - Calculate the size of a const char* array.
+#define NUM_KEYS(Keys) sizeof(Keys) / sizeof(const char*)
+
+// See http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+inline unsigned NextHighestPowerOf2 (unsigned i) {
+  --i;
+  i |= i >> 1;
+  i |= i >> 2;
+  i |= i >> 4;
+  i |= i >> 8;
+  i |= i >> 16;
+  ++i;
+  return i;
+}
+
 typedef std::vector<std::string> StrVec;
+typedef llvm::StringMap<const char*> ArgMap;
+
+/// AddPlusOrMinus - Convert 'no-foo' to '-foo' and 'foo' to '+foo'.
+void AddPlusOrMinus (const std::string& Arg, std::string& out) {
+  if (Arg.find("no-") == 0 && Arg[3] != 0) {
+    out += '-';
+    out += Arg.c_str() + 3;
+  }
+  else {
+    out += '+';
+    out += Arg;
+  }
+}
+
+// -march values that need to be special-cased.
+const char* MArchKeysARM[] = { "armv4t", "armv5t", "armv5te", "armv6",
+                               "armv6-m", "armv6t2", "armv7-a", "armv7-m" };
+const char* MArchValuesARM[] = { "v4t", "v5t", "v5te", "v6", "v6m", "v6t2",
+                                 "v7a", "v7m" };
+const unsigned MArchNumKeysARM = NUM_KEYS(MArchKeysARM);
+const unsigned MArchMapSize = NextHighestPowerOf2(MArchNumKeysARM);
+
+// -march values that should be forwarded as -mcpu
+const char* MArchMCpuKeysARM[] = { "iwmmxt", "ep9312" };
+const char* MArchMCpuValuesARM[] = { "iwmmxt", "ep9312"};
+const unsigned MArchMCpuNumKeysARM = NUM_KEYS(MArchMCpuKeysARM);
+const unsigned MArchMCpuMapSize = NextHighestPowerOf2(MArchMCpuNumKeysARM);
+
+
+void FillInArgMap(ArgMap& Args, const char* Keys[],
+                  const char* Values[], unsigned NumKeys)
+{
+  for (unsigned i = 0; i < NumKeys; ++i) {
+    // Explicit cast to StringRef here is necessary to pick up the right
+    // overload.
+    Args.GetOrCreateValue(llvm::StringRef(Keys[i]), Values[i]);
+  }
+}
+
+/// ConvertMArchToMAttr - Convert -march from the gcc dialect to
+/// something llc can understand.
+std::string ConvertMArchToMAttr(const StrVec& Opts) {
+  static ArgMap MArchMap(MArchMapSize);
+  static ArgMap MArchMCpuMap(MArchMapSize);
+  static bool StaticDataInitialized = false;
+
+  if (!StaticDataInitialized) {
+    FillInArgMap(MArchMap, MArchKeysARM, MArchValuesARM, MArchNumKeysARM);
+    FillInArgMap(MArchMCpuMap, MArchMCpuKeysARM,
+                 MArchMCpuValuesARM, MArchMCpuNumKeysARM);
+    StaticDataInitialized = true;
+  }
+
+  std::string mattr("-mattr=");
+  std::string mcpu("-mcpu=");
+  bool mattrTouched = false;
+  bool mcpuTouched = false;
+
+  for (StrVec::const_iterator B = Opts.begin(), E = Opts.end(); B!=E; ++B) {
+    const std::string& Arg = *B;
+
+    // Check if the argument should be forwarded to -mcpu instead of -mattr.
+    {
+      ArgMap::const_iterator I = MArchMCpuMap.find(Arg);
+
+      if (I != MArchMCpuMap.end()) {
+        mcpuTouched = true;
+        mcpu += I->getValue();
+        continue;
+      }
+    }
+
+    if (mattrTouched)
+      mattr += ",";
+
+    // Check if the argument is a special case.
+    {
+      ArgMap::const_iterator I = MArchMap.find(Arg);
 
-/// ConvertToMAttr - Convert -m* and -mno-* to -mattr=+*,-*
+      if (I != MArchMap.end()) {
+        mattrTouched = true;
+        mattr += '+';
+        mattr += I->getValue();
+        continue;
+      }
+    }
+
+    AddPlusOrMinus(Arg, mattr);
+  }
+
+  std::string out;
+  if (mattrTouched)
+    out += mattr;
+  if (mcpuTouched)
+    out += (mattrTouched ? " " : "") + mcpu;
+
+  return out;
+}
+
+// -mcpu values that need to be special-cased.
+const char* MCpuKeysPPC[] = { "G3", "G4", "G5", "powerpc", "powerpc64"};
+const char* MCpuValuesPPC[] = { "g3", "g4", "g5", "ppc", "ppc64"};
+const unsigned MCpuNumKeysPPC = NUM_KEYS(MCpuKeysPPC);
+const unsigned MCpuMapSize = NextHighestPowerOf2(MCpuNumKeysPPC);
+
+/// ConvertMCpu - Convert -mcpu value from the gcc to the llc dialect.
+std::string ConvertMCpu(const char* Val) {
+  static ArgMap MCpuMap(MCpuMapSize);
+  static bool StaticDataInitialized = false;
+
+  if (!StaticDataInitialized) {
+    FillInArgMap(MCpuMap, MCpuKeysPPC, MCpuValuesPPC, MCpuNumKeysPPC);
+    StaticDataInitialized = true;
+  }
+
+  std::string ret = "-mcpu=";
+  ArgMap::const_iterator I = MCpuMap.find(Val);
+  if (I != MCpuMap.end()) {
+    return ret + I->getValue();
+  }
+  return ret + Val;
+}
+
+// -mfpu values that need to be special-cased.
+const char* MFpuKeysARM[] = { "vfp", "vfpv3",
+                              "vfpv3-fp16", "vfpv3-d16", "vfpv3-d16-fp16",
+                              "neon", "neon-fp16" };
+const char* MFpuValuesARM[] = { "vfp2", "vfp3",
+                                "+vfp3,+fp16", "+vfp3,+d16", "+vfp3,+d16,+fp16",
+                                "+neon", "+neon,+neonfp" };
+const unsigned MFpuNumKeysARM = NUM_KEYS(MFpuKeysARM);
+const unsigned MFpuMapSize = NextHighestPowerOf2(MFpuNumKeysARM);
+
+/// ConvertMFpu - Convert -mfpu value from the gcc to the llc dialect.
+std::string ConvertMFpu(const char* Val) {
+  static ArgMap MFpuMap(MFpuMapSize);
+  static bool StaticDataInitialized = false;
+
+  if (!StaticDataInitialized) {
+    FillInArgMap(MFpuMap, MFpuKeysARM, MFpuValuesARM, MFpuNumKeysARM);
+    StaticDataInitialized = true;
+  }
+
+  std::string ret = "-mattr=";
+  ArgMap::const_iterator I = MFpuMap.find(Val);
+  if (I != MFpuMap.end()) {
+    return ret + I->getValue();
+  }
+  return ret + '+' + Val;
+}
+
+/// ConvertToMAttr - Convert '-mfoo' and '-mno-bar' to '-mattr=+foo,-bar'.
 std::string ConvertToMAttr(const StrVec& Opts) {
   std::string out("-mattr=");
-
   bool firstIter = true;
+
   for (StrVec::const_iterator B = Opts.begin(), E = Opts.end(); B!=E; ++B) {
     const std::string& Arg = *B;
 
@@ -17,14 +185,7 @@ std::string ConvertToMAttr(const StrVec& Opts) {
     else
       out += ",";
 
-    if (Arg.find("no-") == 0 && Arg[3] != 0) {
-      out += '-';
-      out += Arg.c_str() + 3;
-    }
-    else {
-      out += '+';
-      out += Arg;
-    }
+    AddPlusOrMinus(Arg, out);
   }
 
   return out;
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 671348c8333b..adb7102b3c76 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -39,9 +39,11 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Host.h"
-#include "llvm/System/Program.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/Config/config.h"
 #include <cstdlib>
 #include <unistd.h>
@@ -183,7 +185,7 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
 {
     // make unique temp .s file to put generated assembly code
     sys::Path uniqueAsmPath("lto-llvm.s");
-    if ( uniqueAsmPath.createTemporaryFileOnDisk(true, &errMsg) )
+    if ( uniqueAsmPath.createTemporaryFileOnDisk(false, &errMsg) )
         return NULL;
     sys::RemoveFileOnSignal(uniqueAsmPath);
        
@@ -208,7 +210,7 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
     
     // make unique temp .o file to put generated object file
     sys::PathWithStatus uniqueObjPath("lto-llvm.o");
-    if ( uniqueObjPath.createTemporaryFileOnDisk(true, &errMsg) ) {
+    if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) {
         uniqueAsmPath.eraseFromDisk();
         return NULL;
     }
@@ -220,9 +222,12 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
     if ( !asmResult ) {
         // remove old buffer if compile() called twice
         delete _nativeObjectFile;
-        
+
         // read .o file into memory buffer
-        _nativeObjectFile = MemoryBuffer::getFile(uniqueObjStr.c_str(),&errMsg);
+        OwningPtr<MemoryBuffer> BuffPtr;
+        if (error_code ec = MemoryBuffer::getFile(uniqueObjStr.c_str(),BuffPtr))
+          errMsg = ec.message();
+        _nativeObjectFile = BuffPtr.take();
     }
 
     // remove temp files
@@ -342,21 +347,34 @@ void LTOCodeGenerator::applyScopeRestrictions() {
 
   // mark which symbols can not be internalized 
   if (!_mustPreserveSymbols.empty()) {
-    MCContext Context(*_target->getMCAsmInfo());
+    MCContext Context(*_target->getMCAsmInfo(), NULL);
     Mangler mangler(Context, *_target->getTargetData());
     std::vector<const char*> mustPreserveList;
+    SmallString<64> Buffer;
     for (Module::iterator f = mergedModule->begin(),
          e = mergedModule->end(); f != e; ++f) {
+      Buffer.clear();
+      mangler.getNameWithPrefix(Buffer, f, false);
       if (!f->isDeclaration() &&
-          _mustPreserveSymbols.count(mangler.getNameWithPrefix(f)))
+          _mustPreserveSymbols.count(Buffer))
         mustPreserveList.push_back(::strdup(f->getNameStr().c_str()));
     }
     for (Module::global_iterator v = mergedModule->global_begin(), 
          e = mergedModule->global_end(); v !=  e; ++v) {
+      Buffer.clear();
+      mangler.getNameWithPrefix(Buffer, v, false);
       if (!v->isDeclaration() &&
-          _mustPreserveSymbols.count(mangler.getNameWithPrefix(v)))
+          _mustPreserveSymbols.count(Buffer))
         mustPreserveList.push_back(::strdup(v->getNameStr().c_str()));
     }
+    for (Module::alias_iterator a = mergedModule->alias_begin(),
+         e = mergedModule->alias_end(); a != e; ++a) {
+      Buffer.clear();
+      mangler.getNameWithPrefix(Buffer, a, false);
+      if (!a->isDeclaration() &&
+          _mustPreserveSymbols.count(Buffer))
+        mustPreserveList.push_back(::strdup(a->getNameStr().c_str()));
+    }
     passes.add(createInternalizePass(mustPreserveList));
   }
   
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index c7cd585d03dd..8562f747d0ee 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -23,9 +23,10 @@
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/System/Host.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -56,23 +57,18 @@ bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
 
 bool LTOModule::isBitcodeFileForTarget(const char *path,
                                        const char *triplePrefix) {
-  MemoryBuffer *buffer = MemoryBuffer::getFile(path);
-  if (buffer == NULL)
+  OwningPtr<MemoryBuffer> buffer;
+  if (MemoryBuffer::getFile(path, buffer))
     return false;
-  return isTargetMatch(buffer, triplePrefix);
+  return isTargetMatch(buffer.take(), triplePrefix);
 }
 
 // Takes ownership of buffer.
 bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
-  OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext()));
-  // On success, m owns buffer and both are deleted at end of this method.
-  if (!m) {
-    delete buffer;
-    return false;
-  }
-  std::string actualTarget = m->getTargetTriple();
-  return (strncmp(actualTarget.c_str(), triplePrefix,
-                  strlen(triplePrefix)) == 0);
+  std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
+  delete buffer;
+  return (strncmp(Triple.c_str(), triplePrefix,
+ 		  strlen(triplePrefix)) == 0);
 }
 
 
@@ -83,9 +79,22 @@ LTOModule::LTOModule(Module *m, TargetMachine *t)
 
 LTOModule *LTOModule::makeLTOModule(const char *path,
                                     std::string &errMsg) {
-  OwningPtr<MemoryBuffer> buffer(MemoryBuffer::getFile(path, &errMsg));
-  if (!buffer)
+  OwningPtr<MemoryBuffer> buffer;
+  if (error_code ec = MemoryBuffer::getFile(path, buffer)) {
+    errMsg = ec.message();
     return NULL;
+  }
+  return makeLTOModule(buffer.get(), errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
+                                    off_t size,
+                                    std::string &errMsg) {
+  OwningPtr<MemoryBuffer> buffer;
+  if (error_code ec = MemoryBuffer::getOpenFile(fd, path, buffer, size)) {
+    errMsg = ec.message();
+    return NULL;
+  }
   return makeLTOModule(buffer.get(), errMsg);
 }
 
@@ -306,8 +315,14 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
   if (def->getName().startswith("llvm."))
     return;
 
+  // ignore available_externally
+  if (def->hasAvailableExternallyLinkage())
+    return;
+
   // string is owned by _defines
-  const char *symbolName = ::strdup(mangler.getNameWithPrefix(def).c_str());
+  SmallString<64> Buffer;
+  mangler.getNameWithPrefix(Buffer, def, false);
+  const char *symbolName = ::strdup(Buffer.c_str());
 
   // set alignment part log2() can have rounding errors
   uint32_t align = def->getAlignment();
@@ -325,24 +340,26 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
   }
 
   // set definition part
-  if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) {
+  if (def->hasWeakLinkage() || def->hasLinkOnceLinkage() ||
+      def->hasLinkerPrivateWeakLinkage() ||
+      def->hasLinkerPrivateWeakDefAutoLinkage())
     attr |= LTO_SYMBOL_DEFINITION_WEAK;
-  }
-  else if (def->hasCommonLinkage()) {
+  else if (def->hasCommonLinkage())
     attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
-  }
-  else {
+  else
     attr |= LTO_SYMBOL_DEFINITION_REGULAR;
-  }
 
   // set scope part
   if (def->hasHiddenVisibility())
     attr |= LTO_SYMBOL_SCOPE_HIDDEN;
   else if (def->hasProtectedVisibility())
     attr |= LTO_SYMBOL_SCOPE_PROTECTED;
-  else if (def->hasExternalLinkage() || def->hasWeakLinkage()
-           || def->hasLinkOnceLinkage() || def->hasCommonLinkage())
+  else if (def->hasExternalLinkage() || def->hasWeakLinkage() ||
+           def->hasLinkOnceLinkage() || def->hasCommonLinkage() ||
+           def->hasLinkerPrivateWeakLinkage())
     attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+  else if (def->hasLinkerPrivateWeakDefAutoLinkage())
+    attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
   else
     attr |= LTO_SYMBOL_SCOPE_INTERNAL;
 
@@ -380,7 +397,8 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
   if (isa<GlobalAlias>(decl))
     return;
 
-  std::string name = mangler.getNameWithPrefix(decl);
+  SmallString<64> name;
+  mangler.getNameWithPrefix(name, decl, false);
 
   // we already have the symbol
   if (_undefines.find(name) != _undefines.end())
@@ -426,7 +444,7 @@ void LTOModule::lazyParseSymbols() {
   _symbolsParsed = true;
 
   // Use mangler to add GlobalPrefix to names to match linker names.
-  MCContext Context(*_target->getMCAsmInfo());
+  MCContext Context(*_target->getMCAsmInfo(), NULL);
   Mangler mangler(Context, *_target->getTargetData());
 
   // add functions
@@ -472,6 +490,15 @@ void LTOModule::lazyParseSymbols() {
     pos = inlineAsm.find(glbl, pend);
   }
 
+  // add aliases
+  for (Module::alias_iterator i = _module->alias_begin(),
+         e = _module->alias_end(); i != e; ++i) {
+    if (i->isDeclaration())
+      addPotentialUndefinedSymbol(i, mangler);
+    else
+      addDefinedDataSymbol(i, mangler);
+  }
+
   // make symbols for all undefines
   for (StringMap<NameAndAttributes>::iterator it=_undefines.begin();
        it != _undefines.end(); ++it) {
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index a19acc0d7378..1794d81c0a9c 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -51,6 +51,9 @@ struct LTOModule {
 
     static LTOModule*        makeLTOModule(const char* path,
                                           std::string& errMsg);
+    static LTOModule*        makeLTOModule(int fd, const char *path,
+                                           off_t size,
+                                           std::string& errMsg);
     static LTOModule*        makeLTOModule(const void* mem, size_t length,
                                            std::string& errMsg);
 
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index e157a4c48aca..294c81b9e734 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -24,6 +24,10 @@ LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader bitwriter
 
 include $(LEVEL)/Makefile.common
 
+ifdef LLVM_VERSION_INFO
+CXX.Flags += -DLLVM_VERSION_INFO='"$(LLVM_VERSION_INFO)"'
+endif
+
 ifeq ($(HOST_OS),Darwin)
     # Special hack to allow libLTO to have an offset version number.
     ifdef LLVM_LTO_VERSION_OFFSET
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 3d7ef0a1cb94..7d4871d9253d 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -91,6 +91,14 @@ lto_module_t lto_module_create(const char* path)
      return LTOModule::makeLTOModule(path, sLastErrorString);
 }
 
+//
+// loads an object file from disk
+// returns NULL on error (check lto_get_error_message() for details)
+//
+lto_module_t lto_module_create_from_fd(int fd, const char *path, off_t size)
+{
+     return LTOModule::makeLTOModule(fd, path, size, sLastErrorString);
+}
 
 //
 // loads an object file from memory 
@@ -132,7 +140,7 @@ void lto_module_set_target_triple(lto_module_t mod, const char *triple)
 //
 // returns the number of symbols in the object module
 //
-uint32_t lto_module_get_num_symbols(lto_module_t mod)
+unsigned int lto_module_get_num_symbols(lto_module_t mod)
 {
     return mod->getSymbolCount();
 }
@@ -140,7 +148,7 @@ uint32_t lto_module_get_num_symbols(lto_module_t mod)
 //
 // returns the name of the ith symbol in the object module
 //
-const char* lto_module_get_symbol_name(lto_module_t mod, uint32_t index)
+const char* lto_module_get_symbol_name(lto_module_t mod, unsigned int index)
 {
     return mod->getSymbolName(index);
 }
@@ -150,7 +158,7 @@ const char* lto_module_get_symbol_name(lto_module_t mod, uint32_t index)
 // returns the attributes of the ith symbol in the object module
 //
 lto_symbol_attributes lto_module_get_symbol_attribute(lto_module_t mod, 
-                                                      uint32_t index)
+                                                      unsigned int index)
 {
     return mod->getSymbolAttributes(index);
 }
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 4dbf760d3882..a3740911edc1 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -1,6 +1,7 @@
 lto_get_error_message
 lto_get_version
 lto_module_create
+lto_module_create_from_fd
 lto_module_create_from_memory
 lto_module_get_num_symbols
 lto_module_get_symbol_attribute
diff --git a/tools/macho-dump/CMakeLists.txt b/tools/macho-dump/CMakeLists.txt
new file mode 100644
index 000000000000..d55e1d5c4139
--- /dev/null
+++ b/tools/macho-dump/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support object)
+
+add_llvm_tool(macho-dump
+  macho-dump.cpp
+  )
diff --git a/tools/macho-dump/Makefile b/tools/macho-dump/Makefile
new file mode 100644
index 000000000000..638015e9289a
--- /dev/null
+++ b/tools/macho-dump/Makefile
@@ -0,0 +1,23 @@
+##===- tools/macho-dump/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = macho-dump
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := support object
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
new file mode 100644
index 000000000000..c4c558d9acd3
--- /dev/null
+++ b/tools/macho-dump/macho-dump.cpp
@@ -0,0 +1,391 @@
+//===-- macho-dump.cpp - Mach Object Dumping Tool -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a testing tool for use with the MC/Mach-O LLVM components.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+using namespace llvm::object;
+
+static cl::opt<std::string>
+InputFile(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+static cl::opt<bool>
+ShowSectionData("dump-section-data", cl::desc("Dump the contents of sections"),
+                cl::init(false));
+
+///
+
+static const char *ProgramName;
+
+static void Message(const char *Type, const Twine &Msg) {
+  errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
+}
+
+static int Error(const Twine &Msg) {
+  Message("error", Msg);
+  return 1;
+}
+
+static void Warning(const Twine &Msg) {
+  Message("warning", Msg);
+}
+
+///
+
+static int DumpHeader(MachOObject &Obj) {
+  // Read the header.
+  const macho::Header &Hdr = Obj.getHeader();
+  outs() << "('cputype', " << Hdr.CPUType << ")\n";
+  outs() << "('cpusubtype', " << Hdr.CPUSubtype << ")\n";
+  outs() << "('filetype', " << Hdr.FileType << ")\n";
+  outs() << "('num_load_commands', " << Hdr.NumLoadCommands << ")\n";
+  outs() << "('load_commands_size', " << Hdr.SizeOfLoadCommands << ")\n";
+  outs() << "('flag', " << Hdr.Flags << ")\n";
+
+  // Print extended header if 64-bit.
+  if (Obj.is64Bit()) {
+    const macho::Header64Ext &Hdr64 = Obj.getHeader64Ext();
+    outs() << "('reserved', " << Hdr64.Reserved << ")\n";
+  }
+
+  return 0;
+}
+
+static void DumpSegmentCommandData(StringRef Name,
+                                   uint64_t VMAddr, uint64_t VMSize,
+                                   uint64_t FileOffset, uint64_t FileSize,
+                                   uint32_t MaxProt, uint32_t InitProt,
+                                   uint32_t NumSections, uint32_t Flags) {
+  outs() << "  ('segment_name', '";
+  outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
+  outs() << "  ('vm_addr', " << VMAddr << ")\n";
+  outs() << "  ('vm_size', " << VMSize << ")\n";
+  outs() << "  ('file_offset', " << FileOffset << ")\n";
+  outs() << "  ('file_size', " << FileSize << ")\n";
+  outs() << "  ('maxprot', " << MaxProt << ")\n";
+  outs() << "  ('initprot', " << InitProt << ")\n";
+  outs() << "  ('num_sections', " << NumSections << ")\n";
+  outs() << "  ('flags', " << Flags << ")\n";
+}
+
+static int DumpSectionData(MachOObject &Obj, unsigned Index, StringRef Name,
+                           StringRef SegmentName, uint64_t Address,
+                           uint64_t Size, uint32_t Offset,
+                           uint32_t Align, uint32_t RelocationTableOffset,
+                           uint32_t NumRelocationTableEntries,
+                           uint32_t Flags, uint32_t Reserved1,
+                           uint32_t Reserved2, uint64_t Reserved3 = ~0ULL) {
+  outs() << "    # Section " << Index << "\n";
+  outs() << "   (('section_name', '";
+  outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
+  outs() << "    ('segment_name', '";
+  outs().write_escaped(SegmentName, /*UseHexEscapes=*/true) << "')\n";
+  outs() << "    ('address', " << Address << ")\n";
+  outs() << "    ('size', " << Size << ")\n";
+  outs() << "    ('offset', " << Offset << ")\n";
+  outs() << "    ('alignment', " << Align << ")\n";
+  outs() << "    ('reloc_offset', " << RelocationTableOffset << ")\n";
+  outs() << "    ('num_reloc', " << NumRelocationTableEntries << ")\n";
+  outs() << "    ('flags', " << format("0x%x", Flags) << ")\n";
+  outs() << "    ('reserved1', " << Reserved1 << ")\n";
+  outs() << "    ('reserved2', " << Reserved2 << ")\n";
+  if (Reserved3 != ~0ULL)
+    outs() << "    ('reserved3', " << Reserved3 << ")\n";
+  outs() << "   ),\n";
+
+  // Dump the relocation entries.
+  int Res = 0;
+  outs() << "  ('_relocations', [\n";
+  for (unsigned i = 0; i != NumRelocationTableEntries; ++i) {
+    InMemoryStruct<macho::RelocationEntry> RE;
+    Obj.ReadRelocationEntry(RelocationTableOffset, i, RE);
+    if (!RE) {
+      Res = Error("unable to read relocation table entry '" + Twine(i) + "'");
+      break;
+    }
+    
+    outs() << "    # Relocation " << i << "\n";
+    outs() << "    (('word-0', " << format("0x%x", RE->Word0) << "),\n";
+    outs() << "     ('word-1', " << format("0x%x", RE->Word1) << ")),\n";
+  }
+  outs() << "  ])\n";
+
+  // Dump the section data, if requested.
+  if (ShowSectionData) {
+    outs() << "  ('_section_data', '";
+    StringRef Data = Obj.getData(Offset, Size);
+    for (unsigned i = 0; i != Data.size(); ++i) {
+      if (i && (i % 4) == 0)
+        outs() << ' ';
+      outs() << hexdigit((Data[i] >> 4) & 0xF, /*LowerCase=*/true);
+      outs() << hexdigit((Data[i] >> 0) & 0xF, /*LowerCase=*/true);
+    }
+    outs() << "')\n";
+  }
+
+  return Res;
+}
+
+static int DumpSegmentCommand(MachOObject &Obj,
+                               const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+  Obj.ReadSegmentLoadCommand(LCI, SLC);
+  if (!SLC)
+    return Error("unable to read segment load command");
+
+  DumpSegmentCommandData(StringRef(SLC->Name, 16), SLC->VMAddress,
+                         SLC->VMSize, SLC->FileOffset, SLC->FileSize,
+                         SLC->MaxVMProtection, SLC->InitialVMProtection,
+                         SLC->NumSections, SLC->Flags);
+
+  // Dump the sections.
+  int Res = 0;
+  outs() << "  ('sections', [\n";
+  for (unsigned i = 0; i != SLC->NumSections; ++i) {
+    InMemoryStruct<macho::Section> Sect;
+    Obj.ReadSection(LCI, i, Sect);
+    if (!SLC) {
+      Res = Error("unable to read section '" + Twine(i) + "'");
+      break;
+    }
+
+    if ((Res = DumpSectionData(Obj, i, StringRef(Sect->Name, 16),
+                               StringRef(Sect->SegmentName, 16), Sect->Address,
+                               Sect->Size, Sect->Offset, Sect->Align,
+                               Sect->RelocationTableOffset,
+                               Sect->NumRelocationTableEntries, Sect->Flags,
+                               Sect->Reserved1, Sect->Reserved2)))
+      break;
+  }
+  outs() << "  ])\n";
+
+  return Res;
+}
+
+static int DumpSegment64Command(MachOObject &Obj,
+                               const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::Segment64LoadCommand> SLC;
+  Obj.ReadSegment64LoadCommand(LCI, SLC);
+  if (!SLC)
+    return Error("unable to read segment load command");
+
+  DumpSegmentCommandData(StringRef(SLC->Name, 16), SLC->VMAddress,
+                         SLC->VMSize, SLC->FileOffset, SLC->FileSize,
+                         SLC->MaxVMProtection, SLC->InitialVMProtection,
+                         SLC->NumSections, SLC->Flags);
+
+  // Dump the sections.
+  int Res = 0;
+  outs() << "  ('sections', [\n";
+  for (unsigned i = 0; i != SLC->NumSections; ++i) {
+    InMemoryStruct<macho::Section64> Sect;
+    Obj.ReadSection64(LCI, i, Sect);
+    if (!SLC) {
+      Res = Error("unable to read section '" + Twine(i) + "'");
+      break;
+    }
+
+    if ((Res = DumpSectionData(Obj, i, StringRef(Sect->Name, 16),
+                               StringRef(Sect->SegmentName, 16), Sect->Address,
+                               Sect->Size, Sect->Offset, Sect->Align,
+                               Sect->RelocationTableOffset,
+                               Sect->NumRelocationTableEntries, Sect->Flags,
+                               Sect->Reserved1, Sect->Reserved2,
+                               Sect->Reserved3)))
+      break;
+  }
+  outs() << "  ])\n";
+
+  return 0;
+}
+
+static void DumpSymbolTableEntryData(MachOObject &Obj,
+                                     unsigned Index, uint32_t StringIndex,
+                                     uint8_t Type, uint8_t SectionIndex,
+                                     uint16_t Flags, uint64_t Value) {
+  outs() << "    # Symbol " << Index << "\n";
+  outs() << "   (('n_strx', " << StringIndex << ")\n";
+  outs() << "    ('n_type', " << format("0x%x", Type) << ")\n";
+  outs() << "    ('n_sect', " << uint32_t(SectionIndex) << ")\n";
+  outs() << "    ('n_desc', " << Flags << ")\n";
+  outs() << "    ('n_value', " << Value << ")\n";
+  outs() << "    ('_string', '" << Obj.getStringAtIndex(StringIndex) << "')\n";
+  outs() << "   ),\n";
+}
+
+static int DumpSymtabCommand(MachOObject &Obj,
+                             const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::SymtabLoadCommand> SLC;
+  Obj.ReadSymtabLoadCommand(LCI, SLC);
+  if (!SLC)
+    return Error("unable to read segment load command");
+
+  outs() << "  ('symoff', " << SLC->SymbolTableOffset << ")\n";
+  outs() << "  ('nsyms', " << SLC->NumSymbolTableEntries << ")\n";
+  outs() << "  ('stroff', " << SLC->StringTableOffset << ")\n";
+  outs() << "  ('strsize', " << SLC->StringTableSize << ")\n";
+
+  // Cache the string table data.
+  Obj.RegisterStringTable(*SLC);
+
+  // Dump the string data.
+  outs() << "  ('_string_data', '";
+  outs().write_escaped(Obj.getStringTableData(),
+                       /*UseHexEscapes=*/true) << "')\n";
+
+  // Dump the symbol table.
+  int Res = 0;
+  outs() << "  ('_symbols', [\n";
+  for (unsigned i = 0; i != SLC->NumSymbolTableEntries; ++i) {
+    if (Obj.is64Bit()) {
+      InMemoryStruct<macho::Symbol64TableEntry> STE;
+      Obj.ReadSymbol64TableEntry(SLC->SymbolTableOffset, i, STE);
+      if (!STE) {
+        Res = Error("unable to read symbol: '" + Twine(i) + "'");
+        break;
+      }
+
+      DumpSymbolTableEntryData(Obj, i, STE->StringIndex, STE->Type,
+                               STE->SectionIndex, STE->Flags, STE->Value);
+    } else {
+      InMemoryStruct<macho::SymbolTableEntry> STE;
+      Obj.ReadSymbolTableEntry(SLC->SymbolTableOffset, i, STE);
+      if (!SLC) {
+        Res = Error("unable to read symbol: '" + Twine(i) + "'");
+        break;
+      }
+
+      DumpSymbolTableEntryData(Obj, i, STE->StringIndex, STE->Type,
+                               STE->SectionIndex, STE->Flags, STE->Value);
+    }
+  }
+  outs() << "  ])\n";
+
+  return Res;
+}
+
+static int DumpDysymtabCommand(MachOObject &Obj,
+                             const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::DysymtabLoadCommand> DLC;
+  Obj.ReadDysymtabLoadCommand(LCI, DLC);
+  if (!DLC)
+    return Error("unable to read segment load command");
+
+  outs() << "  ('ilocalsym', " << DLC->LocalSymbolsIndex << ")\n";
+  outs() << "  ('nlocalsym', " << DLC->NumLocalSymbols << ")\n";
+  outs() << "  ('iextdefsym', " << DLC->ExternalSymbolsIndex << ")\n";
+  outs() << "  ('nextdefsym', " << DLC->NumExternalSymbols << ")\n";
+  outs() << "  ('iundefsym', " << DLC->UndefinedSymbolsIndex << ")\n";
+  outs() << "  ('nundefsym', " << DLC->NumUndefinedSymbols << ")\n";
+  outs() << "  ('tocoff', " << DLC->TOCOffset << ")\n";
+  outs() << "  ('ntoc', " << DLC->NumTOCEntries << ")\n";
+  outs() << "  ('modtaboff', " << DLC->ModuleTableOffset << ")\n";
+  outs() << "  ('nmodtab', " << DLC->NumModuleTableEntries << ")\n";
+  outs() << "  ('extrefsymoff', " << DLC->ReferenceSymbolTableOffset << ")\n";
+  outs() << "  ('nextrefsyms', "
+         << DLC->NumReferencedSymbolTableEntries << ")\n";
+  outs() << "  ('indirectsymoff', " << DLC->IndirectSymbolTableOffset << ")\n";
+  outs() << "  ('nindirectsyms', "
+         << DLC->NumIndirectSymbolTableEntries << ")\n";
+  outs() << "  ('extreloff', " << DLC->ExternalRelocationTableOffset << ")\n";
+  outs() << "  ('nextrel', " << DLC->NumExternalRelocationTableEntries << ")\n";
+  outs() << "  ('locreloff', " << DLC->LocalRelocationTableOffset << ")\n";
+  outs() << "  ('nlocrel', " << DLC->NumLocalRelocationTableEntries << ")\n";
+
+  // Dump the indirect symbol table.
+  int Res = 0;
+  outs() << "  ('_indirect_symbols', [\n";
+  for (unsigned i = 0; i != DLC->NumIndirectSymbolTableEntries; ++i) {
+    InMemoryStruct<macho::IndirectSymbolTableEntry> ISTE;
+    Obj.ReadIndirectSymbolTableEntry(*DLC, i, ISTE);
+    if (!ISTE) {
+      Res = Error("unable to read segment load command");
+      break;
+    }
+
+    outs() << "    # Indirect Symbol " << i << "\n";
+    outs() << "    (('symbol_index', "
+           << format("0x%x", ISTE->Index) << "),),\n";
+  }
+  outs() << "  ])\n";
+
+  return Res;
+}
+
+static int DumpLoadCommand(MachOObject &Obj, unsigned Index) {
+  const MachOObject::LoadCommandInfo &LCI = Obj.getLoadCommandInfo(Index);
+  int Res = 0;
+
+  outs() << "  # Load Command " << Index << "\n"
+         << " (('command', " << LCI.Command.Type << ")\n"
+         << "  ('size', " << LCI.Command.Size << ")\n";
+  switch (LCI.Command.Type) {
+  case macho::LCT_Segment:
+    Res = DumpSegmentCommand(Obj, LCI);
+    break;
+  case macho::LCT_Segment64:
+    Res = DumpSegment64Command(Obj, LCI);
+    break;
+  case macho::LCT_Symtab:
+    Res = DumpSymtabCommand(Obj, LCI);
+    break;
+  case macho::LCT_Dysymtab:
+    Res = DumpDysymtabCommand(Obj, LCI);
+    break;
+  default:
+    Warning("unknown load command: " + Twine(LCI.Command.Type));
+    break;
+  }
+  outs() << " ),\n";
+
+  return Res;
+}
+
+int main(int argc, char **argv) {
+  ProgramName = argv[0];
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm Mach-O dumping tool\n");
+
+  // Load the input file.
+  std::string ErrorStr;
+  OwningPtr<MemoryBuffer> InputBuffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFile, InputBuffer))
+    return Error("unable to read input: '" + ec.message() + "'");
+
+  // Construct the Mach-O wrapper object.
+  OwningPtr<MachOObject> InputObject(
+    MachOObject::LoadFromBuffer(InputBuffer.take(), &ErrorStr));
+  if (!InputObject)
+    return Error("unable to load object: '" + ErrorStr + "'");
+
+  if (int Res = DumpHeader(*InputObject))
+    return Res;
+
+  // Print the load commands.
+  int Res = 0;
+  outs() << "('load_commands', [\n";
+  for (unsigned i = 0; i != InputObject->getHeader().NumLoadCommands; ++i)
+    if ((Res = DumpLoadCommand(*InputObject, i)))
+      break;
+  outs() << "])\n";
+
+  return Res;
+}
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index 9de7d6ac5459..791caf571c29 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -18,8 +18,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/ToolOutputFile.h"
 using namespace llvm;
 
 template<typename GraphType>
@@ -61,8 +61,7 @@ namespace llvm {
     static std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
       if (Node->getFunction())
         return ((Value*)Node->getFunction())->getName();
-      else
-        return "external node";
+      return "external node";
     }
   };
 }
@@ -109,10 +108,8 @@ namespace {
     }
 
     virtual bool runOnFunction(Function &F) {
-      DominatorTree &DT = getAnalysis<DominatorTree>();
-      DT.dump();
-      DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
-      DF.dump();
+      getAnalysis<DominatorTree>().dump();
+      getAnalysis<DominanceFrontier>().dump();
       return false;
     }
   };
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index d83718517918..e55b4b3e2afc 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -18,13 +18,17 @@
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/RegionPass.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/PassNameParser.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -32,7 +36,7 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/LinkAllVMCore.h"
 #include <memory>
@@ -127,6 +131,10 @@ QuietA("quiet", cl::desc("Alias for -q"), cl::aliasopt(Quiet));
 static cl::opt<bool>
 AnalyzeOnly("analyze", cl::desc("Only perform analysis, no optimization"));
 
+static cl::opt<bool>
+PrintBreakpoints("print-breakpoints-for-testing", 
+                 cl::desc("Print select breakpoints location for testing"));
+
 static cl::opt<std::string>
 DefaultDataLayout("default-data-layout", 
           cl::desc("data layout string to use if not specified by module"),
@@ -139,25 +147,29 @@ struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
   static char ID;
   const PassInfo *PassToPrint;
   raw_ostream &Out;
+  std::string PassName;
+
   CallGraphSCCPassPrinter(const PassInfo *PI, raw_ostream &out) :
-    CallGraphSCCPass(ID), PassToPrint(PI), Out(out) {}
+    CallGraphSCCPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "CallGraphSCCPass Printer: " + PassToPrintName;
+    }
 
   virtual bool runOnSCC(CallGraphSCC &SCC) {
-    if (!Quiet) {
+    if (!Quiet)
       Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
 
-      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-        Function *F = (*I)->getFunction();
-        if (F)
-          getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, 
-                F->getParent());
-      }
-    }
     // Get and print pass...
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      Function *F = (*I)->getFunction();
+      if (F)
+        getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+                                                              F->getParent());
+    }
     return false;
   }
 
-  virtual const char *getPassName() const { return "'Pass' Printer"; }
+  virtual const char *getPassName() const { return PassName.c_str(); }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -171,20 +183,24 @@ struct ModulePassPrinter : public ModulePass {
   static char ID;
   const PassInfo *PassToPrint;
   raw_ostream &Out;
+  std::string PassName;
+
   ModulePassPrinter(const PassInfo *PI, raw_ostream &out)
-    : ModulePass(ID), PassToPrint(PI), Out(out) {}
+    : ModulePass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "ModulePass Printer: " + PassToPrintName;
+    }
 
   virtual bool runOnModule(Module &M) {
-    if (!Quiet) {
+    if (!Quiet)
       Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-      getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, &M);
-    }
 
     // Get and print pass...
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, &M);
     return false;
   }
 
-  virtual const char *getPassName() const { return "'Pass' Printer"; }
+  virtual const char *getPassName() const { return PassName.c_str(); }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -197,21 +213,26 @@ struct FunctionPassPrinter : public FunctionPass {
   const PassInfo *PassToPrint;
   raw_ostream &Out;
   static char ID;
+  std::string PassName;
+
   FunctionPassPrinter(const PassInfo *PI, raw_ostream &out)
-    : FunctionPass(ID), PassToPrint(PI), Out(out) {}
+    : FunctionPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "FunctionPass Printer: " + PassToPrintName;
+    }
 
   virtual bool runOnFunction(Function &F) {
-    if (!Quiet) {
+    if (!Quiet)
       Out << "Printing analysis '" << PassToPrint->getPassName()
           << "' for function '" << F.getName() << "':\n";
-    }
+
     // Get and print pass...
     getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
             F.getParent());
     return false;
   }
 
-  virtual const char *getPassName() const { return "FunctionPass Printer"; }
+  virtual const char *getPassName() const { return PassName.c_str(); }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -225,20 +246,26 @@ struct LoopPassPrinter : public LoopPass {
   static char ID;
   const PassInfo *PassToPrint;
   raw_ostream &Out;
+  std::string PassName;
+
   LoopPassPrinter(const PassInfo *PI, raw_ostream &out) :
-    LoopPass(ID), PassToPrint(PI), Out(out) {}
+    LoopPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "LoopPass Printer: " + PassToPrintName;
+    }
+
 
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
-    if (!Quiet) {
+    if (!Quiet)
       Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-      getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
-                                  L->getHeader()->getParent()->getParent());
-    }
+
     // Get and print pass...
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+                        L->getHeader()->getParent()->getParent());
     return false;
   }
 
-  virtual const char *getPassName() const { return "'Pass' Printer"; }
+  virtual const char *getPassName() const { return PassName.c_str(); }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -248,18 +275,56 @@ struct LoopPassPrinter : public LoopPass {
 
 char LoopPassPrinter::ID = 0;
 
+struct RegionPassPrinter : public RegionPass {
+  static char ID;
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  std::string PassName;
+
+  RegionPassPrinter(const PassInfo *PI, raw_ostream &out) : RegionPass(ID),
+    PassToPrint(PI), Out(out) {
+    std::string PassToPrintName =  PassToPrint->getPassName();
+    PassName = "RegionPass Printer: " + PassToPrintName;
+  }
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+    if (!Quiet) {
+      Out << "Printing analysis '" << PassToPrint->getPassName() << "' for "
+        << "region: '" << R->getNameStr() << "' in function '"
+        << R->getEntry()->getParent()->getNameStr() << "':\n";
+    }
+    // Get and print pass...
+   getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+                       R->getEntry()->getParent()->getParent());
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char RegionPassPrinter::ID = 0;
+
 struct BasicBlockPassPrinter : public BasicBlockPass {
   const PassInfo *PassToPrint;
   raw_ostream &Out;
   static char ID;
+  std::string PassName;
+
   BasicBlockPassPrinter(const PassInfo *PI, raw_ostream &out)
-    : BasicBlockPass(ID), PassToPrint(PI), Out(out) {}
+    : BasicBlockPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "BasicBlockPass Printer: " + PassToPrintName;
+    }
 
   virtual bool runOnBasicBlock(BasicBlock &BB) {
-    if (!Quiet) {
+    if (!Quiet)
       Out << "Printing Analysis info for BasicBlock '" << BB.getName()
           << "': Pass " << PassToPrint->getPassName() << ":\n";
-    }
 
     // Get and print pass...
     getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, 
@@ -267,7 +332,7 @@ struct BasicBlockPassPrinter : public BasicBlockPass {
     return false;
   }
 
-  virtual const char *getPassName() const { return "BasicBlockPass Printer"; }
+  virtual const char *getPassName() const { return PassName.c_str(); }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -276,6 +341,39 @@ struct BasicBlockPassPrinter : public BasicBlockPass {
 };
 
 char BasicBlockPassPrinter::ID = 0;
+
+struct BreakpointPrinter : public FunctionPass {
+  raw_ostream &Out;
+  static char ID;
+
+  BreakpointPrinter(raw_ostream &out)
+    : FunctionPass(ID), Out(out) {
+    }
+
+  virtual bool runOnFunction(Function &F) {
+    BasicBlock &EntryBB = F.getEntryBlock();
+    BasicBlock::const_iterator BI = EntryBB.end();
+    --BI;
+    do {
+      const Instruction *In = BI;
+      const DebugLoc DL = In->getDebugLoc();
+      if (!DL.isUnknown()) {
+        DIScope S(DL.getScope(getGlobalContext()));
+        Out << S.getFilename() << " " << DL.getLine() << "\n";
+        break;
+      }
+      --BI;
+    } while (BI != EntryBB.begin());
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+};
+
+char BreakpointPrinter::ID = 0;
+
 inline void addPass(PassManagerBase &PM, Pass *P) {
   // Add the pass to the pass manager...
   PM.add(P);
@@ -297,9 +395,9 @@ void AddOptimizationPasses(PassManagerBase &MPM, PassManagerBase &FPM,
   if (DisableInline) {
     // No inlining pass
   } else if (OptLevel) {
-    unsigned Threshold = 200;
+    unsigned Threshold = 225;
     if (OptLevel > 2)
-      Threshold = 250;
+      Threshold = 275;
     InliningPass = createFunctionInliningPass(Threshold);
   } else {
     InliningPass = createAlwaysInlinerPass();
@@ -331,7 +429,7 @@ void AddStandardCompilePasses(PassManagerBase &PM) {
                              /*OptimizeSize=*/ false,
                              /*UnitAtATime=*/ true,
                              /*UnrollLoops=*/ true,
-                             /*SimplifyLibCalls=*/ true,
+                             !DisableSimplifyLibCalls,
                              /*HaveExceptions=*/ true,
                              InliningPass);
 }
@@ -360,20 +458,32 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   llvm::PrettyStackTraceProgram X(argc, argv);
 
-  if (AnalyzeOnly && NoOutput) {
-    errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
-    return 1;
-  }
-  
   // Enable debug stream buffering.
   EnableDebugBuffering = true;
 
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   LLVMContext &Context = getGlobalContext();
   
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeIPA(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeInstrumentation(Registry);
+  initializeTarget(Registry);
+  
   cl::ParseCommandLineOptions(argc, argv,
     "llvm .bc -> .bc modular optimizer and analysis printer\n");
 
+  if (AnalyzeOnly && NoOutput) {
+    errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
+    return 1;
+  }
+
   // Allocate a full target machine description only if necessary.
   // FIXME: The choice of target should be controllable on the command line.
   std::auto_ptr<TargetMachine> target;
@@ -417,11 +527,19 @@ int main(int argc, char **argv) {
       NoOutput = true;
 
   // Create a PassManager to hold and optimize the collection of passes we are
-  // about to build...
+  // about to build.
   //
   PassManager Passes;
 
-  // Add an appropriate TargetData instance for this module...
+  // Add an appropriate TargetLibraryInfo pass for the module's triple.
+  TargetLibraryInfo *TLI = new TargetLibraryInfo(Triple(M->getTargetTriple()));
+  
+  // The -disable-simplify-libcalls flag actually disables all builtin optzns.
+  if (DisableSimplifyLibCalls)
+    TLI->disableAllFunctions();
+  Passes.add(TLI);
+  
+  // Add an appropriate TargetData instance for this module.
   TargetData *TD = 0;
   const std::string &ModuleDataLayout = M.get()->getDataLayout();
   if (!ModuleDataLayout.empty())
@@ -439,6 +557,24 @@ int main(int argc, char **argv) {
       FPasses->add(new TargetData(*TD));
   }
 
+  if (PrintBreakpoints) {
+    // Default to standard output.
+    if (!Out) {
+      if (OutputFilename.empty())
+        OutputFilename = "-";
+      
+      std::string ErrorInfo;
+      Out.reset(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                                     raw_fd_ostream::F_Binary));
+      if (!ErrorInfo.empty()) {
+        errs() << ErrorInfo << '\n';
+        return 1;
+      }
+    }
+    Passes.add(new BreakpointPrinter(Out->os()));
+    NoOutput = true;
+  }
+
   // If the -strip-debug command line option was specified, add it.  If
   // -std-compile-opts was also specified, it will handle StripDebug.
   if (StripDebug && !StandardCompileOpts)
@@ -491,6 +627,9 @@ int main(int argc, char **argv) {
         case PT_BasicBlock:
           Passes.add(new BasicBlockPassPrinter(PassInf, Out->os()));
           break;
+        case PT_Region:
+          Passes.add(new RegionPassPrinter(PassInf, Out->os()));
+          break;
         case PT_Loop:
           Passes.add(new LoopPassPrinter(PassInf, Out->os()));
           break;
@@ -550,7 +689,7 @@ int main(int argc, char **argv) {
   Passes.run(*M.get());
 
   // Declare success.
-  if (!NoOutput)
+  if (!NoOutput || PrintBreakpoints)
     Out->keep();
 
   return 0;
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 0cb79963f505..557d835bacda 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -39,6 +39,9 @@ TEST(APIntTest, i128_NegativeCount) {
   EXPECT_EQ(-1, Minus1.getSExtValue());
 }
 
+// XFAIL this test on FreeBSD where the system gcc-4.2.1 seems to miscompile it.
+#if defined(__llvm__) || !defined(__FreeBSD__)
+
 TEST(APIntTest, i33_Count) {
   APInt i33minus2(33, static_cast<uint64_t>(-2), true);
   EXPECT_EQ(0u, i33minus2.countLeadingZeros());
@@ -50,9 +53,11 @@ TEST(APIntTest, i33_Count) {
   EXPECT_EQ(((uint64_t)-2)&((1ull<<33) -1), i33minus2.getZExtValue());
 }
 
+#endif
+
 TEST(APIntTest, i65_Count) {
   APInt i65minus(65, 0, true);
-  i65minus.set(64);
+  i65minus.setBit(64);
   EXPECT_EQ(0u, i65minus.countLeadingZeros());
   EXPECT_EQ(1u, i65minus.countLeadingOnes());
   EXPECT_EQ(65u, i65minus.getActiveBits());
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
index a9fc133c72e2..fa663121a8a6 100644
--- a/unittests/ADT/BitVectorTest.cpp
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -22,6 +22,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Vec.count());
   EXPECT_EQ(0U, Vec.size());
   EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
   EXPECT_TRUE(Vec.none());
   EXPECT_TRUE(Vec.empty());
 
@@ -29,6 +30,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(5U, Vec.count());
   EXPECT_EQ(5U, Vec.size());
   EXPECT_TRUE(Vec.any());
+  EXPECT_TRUE(Vec.all());
   EXPECT_FALSE(Vec.none());
   EXPECT_FALSE(Vec.empty());
 
@@ -36,6 +38,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(5U, Vec.count());
   EXPECT_EQ(11U, Vec.size());
   EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.all());
   EXPECT_FALSE(Vec.none());
   EXPECT_FALSE(Vec.empty());
 
@@ -43,6 +46,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(6U, Inv.count());
   EXPECT_EQ(11U, Inv.size());
   EXPECT_TRUE(Inv.any());
+  EXPECT_FALSE(Inv.all());
   EXPECT_FALSE(Inv.none());
   EXPECT_FALSE(Inv.empty());
 
@@ -123,6 +127,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Vec.count());
   EXPECT_EQ(130U, Vec.size());
   EXPECT_FALSE(Vec.any());
+  EXPECT_FALSE(Vec.all());
   EXPECT_TRUE(Vec.none());
   EXPECT_FALSE(Vec.empty());
 
@@ -130,6 +135,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Inv.count());
   EXPECT_EQ(0U, Inv.size());
   EXPECT_FALSE(Inv.any());
+  EXPECT_TRUE(Inv.all());
   EXPECT_TRUE(Inv.none());
   EXPECT_TRUE(Inv.empty());
 
@@ -137,6 +143,7 @@ TEST(BitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Vec.count());
   EXPECT_EQ(0U, Vec.size());
   EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
   EXPECT_TRUE(Vec.none());
   EXPECT_TRUE(Vec.empty());
 }
diff --git a/unittests/ADT/FoldingSet.cpp b/unittests/ADT/FoldingSet.cpp
new file mode 100644
index 000000000000..a18a0df95716
--- /dev/null
+++ b/unittests/ADT/FoldingSet.cpp
@@ -0,0 +1,39 @@
+//===- llvm/unittest/ADT/FoldingSetTest.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FoldingSet unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/FoldingSet.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+// Unaligned string test.
+TEST(FoldingSetTest, UnalignedStringTest) {
+  SCOPED_TRACE("UnalignedStringTest");
+
+  FoldingSetNodeID a, b;
+  // An aligned string
+  std::string str1= "a test string";
+  a.AddString(str1);
+
+  // An unaligned string
+  std::string str2 = ">" + str1;
+  b.AddString(str2.c_str() + 1);
+
+  EXPECT_EQ(a.ComputeHash(), b.ComputeHash());
+}
+
+}
+
diff --git a/unittests/ADT/ImmutableSetTest.cpp b/unittests/ADT/ImmutableSetTest.cpp
index 1be510d599e3..febd441db166 100644
--- a/unittests/ADT/ImmutableSetTest.cpp
+++ b/unittests/ADT/ImmutableSetTest.cpp
@@ -37,11 +37,11 @@ char ImmutableSetTest::buffer[10];
 TEST_F(ImmutableSetTest, EmptyIntSetTest) {
   ImmutableSet<int>::Factory f;
 
-  EXPECT_TRUE(f.GetEmptySet() == f.GetEmptySet());
-  EXPECT_FALSE(f.GetEmptySet() != f.GetEmptySet());
-  EXPECT_TRUE(f.GetEmptySet().isEmpty());
+  EXPECT_TRUE(f.getEmptySet() == f.getEmptySet());
+  EXPECT_FALSE(f.getEmptySet() != f.getEmptySet());
+  EXPECT_TRUE(f.getEmptySet().isEmpty());
 
-  ImmutableSet<int> S = f.GetEmptySet();
+  ImmutableSet<int> S = f.getEmptySet();
   EXPECT_EQ(0u, S.getHeight());
   EXPECT_TRUE(S.begin() == S.end());
   EXPECT_FALSE(S.begin() != S.end());
@@ -50,9 +50,9 @@ TEST_F(ImmutableSetTest, EmptyIntSetTest) {
 
 TEST_F(ImmutableSetTest, OneElemIntSetTest) {
   ImmutableSet<int>::Factory f;
-  ImmutableSet<int> S = f.GetEmptySet();
+  ImmutableSet<int> S = f.getEmptySet();
 
-  ImmutableSet<int> S2 = f.Add(S, 3);
+  ImmutableSet<int> S2 = f.add(S, 3);
   EXPECT_TRUE(S.isEmpty());
   EXPECT_FALSE(S2.isEmpty());
   EXPECT_FALSE(S == S2);
@@ -62,7 +62,7 @@ TEST_F(ImmutableSetTest, OneElemIntSetTest) {
   EXPECT_FALSE(S2.begin() == S2.end());
   EXPECT_TRUE(S2.begin() != S2.end());
 
-  ImmutableSet<int> S3 = f.Add(S, 2);
+  ImmutableSet<int> S3 = f.add(S, 2);
   EXPECT_TRUE(S.isEmpty());
   EXPECT_FALSE(S3.isEmpty());
   EXPECT_FALSE(S == S3);
@@ -78,11 +78,11 @@ TEST_F(ImmutableSetTest, OneElemIntSetTest) {
 
 TEST_F(ImmutableSetTest, MultiElemIntSetTest) {
   ImmutableSet<int>::Factory f;
-  ImmutableSet<int> S = f.GetEmptySet();
+  ImmutableSet<int> S = f.getEmptySet();
 
-  ImmutableSet<int> S2 = f.Add(f.Add(f.Add(S, 3), 4), 5);
-  ImmutableSet<int> S3 = f.Add(f.Add(f.Add(S2, 9), 20), 43);
-  ImmutableSet<int> S4 = f.Add(S2, 9);
+  ImmutableSet<int> S2 = f.add(f.add(f.add(S, 3), 4), 5);
+  ImmutableSet<int> S3 = f.add(f.add(f.add(S2, 9), 20), 43);
+  ImmutableSet<int> S4 = f.add(S2, 9);
 
   EXPECT_TRUE(S.isEmpty());
   EXPECT_FALSE(S2.isEmpty());
@@ -116,11 +116,11 @@ TEST_F(ImmutableSetTest, MultiElemIntSetTest) {
 
 TEST_F(ImmutableSetTest, RemoveIntSetTest) {
   ImmutableSet<int>::Factory f;
-  ImmutableSet<int> S = f.GetEmptySet();
+  ImmutableSet<int> S = f.getEmptySet();
 
-  ImmutableSet<int> S2 = f.Add(f.Add(S, 4), 5);
-  ImmutableSet<int> S3 = f.Add(S2, 3);
-  ImmutableSet<int> S4 = f.Remove(S3, 3);
+  ImmutableSet<int> S2 = f.add(f.add(S, 4), 5);
+  ImmutableSet<int> S3 = f.add(S2, 3);
+  ImmutableSet<int> S4 = f.remove(S3, 3);
 
   EXPECT_TRUE(S3.contains(3));
   EXPECT_FALSE(S2.contains(3));
@@ -139,10 +139,10 @@ TEST_F(ImmutableSetTest, RemoveIntSetTest) {
 
 TEST_F(ImmutableSetTest, CallbackCharSetTest) {
   ImmutableSet<char>::Factory f;
-  ImmutableSet<char> S = f.GetEmptySet();
+  ImmutableSet<char> S = f.getEmptySet();
 
-  ImmutableSet<char> S2 = f.Add(f.Add(f.Add(S, 'a'), 'e'), 'i');
-  ImmutableSet<char> S3 = f.Add(f.Add(S2, 'o'), 'u');
+  ImmutableSet<char> S2 = f.add(f.add(f.add(S, 'a'), 'e'), 'i');
+  ImmutableSet<char> S3 = f.add(f.add(S2, 'o'), 'u');
 
   S3.foreach<MyIter>();
 
@@ -151,10 +151,10 @@ TEST_F(ImmutableSetTest, CallbackCharSetTest) {
 
 TEST_F(ImmutableSetTest, Callback2CharSetTest) {
   ImmutableSet<char>::Factory f;
-  ImmutableSet<char> S = f.GetEmptySet();
+  ImmutableSet<char> S = f.getEmptySet();
 
-  ImmutableSet<char> S2 = f.Add(f.Add(f.Add(S, 'b'), 'c'), 'd');
-  ImmutableSet<char> S3 = f.Add(f.Add(f.Add(S2, 'f'), 'g'), 'h');
+  ImmutableSet<char> S2 = f.add(f.add(f.add(S, 'b'), 'c'), 'd');
+  ImmutableSet<char> S3 = f.add(f.add(f.add(S2, 'f'), 'g'), 'h');
 
   MyIter obj;
   S3.foreach<MyIter>(obj);
@@ -174,10 +174,10 @@ TEST_F(ImmutableSetTest, Callback2CharSetTest) {
 
 TEST_F(ImmutableSetTest, IterLongSetTest) {
   ImmutableSet<long>::Factory f;
-  ImmutableSet<long> S = f.GetEmptySet();
+  ImmutableSet<long> S = f.getEmptySet();
 
-  ImmutableSet<long> S2 = f.Add(f.Add(f.Add(S, 0), 1), 2);
-  ImmutableSet<long> S3 = f.Add(f.Add(f.Add(S2, 3), 4), 5);
+  ImmutableSet<long> S2 = f.add(f.add(f.add(S, 0), 1), 2);
+  ImmutableSet<long> S3 = f.add(f.add(f.add(S2, 3), 4), 5);
 
   int i = 0;
   for (ImmutableSet<long>::iterator I = S.begin(), E = S.end(); I != E; ++I) {
diff --git a/unittests/ADT/IntEqClassesTest.cpp b/unittests/ADT/IntEqClassesTest.cpp
new file mode 100644
index 000000000000..fc908c1e8bff
--- /dev/null
+++ b/unittests/ADT/IntEqClassesTest.cpp
@@ -0,0 +1,107 @@
+//===---- ADT/IntEqClassesTest.cpp - IntEqClasses unit tests ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(IntEqClasses, Simple) {
+  IntEqClasses ec(10);
+
+  ec.join(0, 1);
+  ec.join(3, 2);
+  ec.join(4, 5);
+  ec.join(7, 6);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(2u, ec.findLeader(2));
+  EXPECT_EQ(2u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(8u, ec.findLeader(8));
+  EXPECT_EQ(9u, ec.findLeader(9));
+
+  // join two non-leaders.
+  ec.join(1, 3);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(8u, ec.findLeader(8));
+  EXPECT_EQ(9u, ec.findLeader(9));
+
+  // join two leaders.
+  ec.join(4, 8);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(4u, ec.findLeader(8));
+  EXPECT_EQ(9u, ec.findLeader(9));
+
+  // join mixed.
+  ec.join(9, 1);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(4u, ec.findLeader(8));
+  EXPECT_EQ(0u, ec.findLeader(9));
+
+  // compressed map.
+  ec.compress();
+  EXPECT_EQ(3u, ec.getNumClasses());
+
+  EXPECT_EQ(0u, ec[0]);
+  EXPECT_EQ(0u, ec[1]);
+  EXPECT_EQ(0u, ec[2]);
+  EXPECT_EQ(0u, ec[3]);
+  EXPECT_EQ(1u, ec[4]);
+  EXPECT_EQ(1u, ec[5]);
+  EXPECT_EQ(2u, ec[6]);
+  EXPECT_EQ(2u, ec[7]);
+  EXPECT_EQ(1u, ec[8]);
+  EXPECT_EQ(0u, ec[9]);
+
+  // uncompressed map.
+  ec.uncompress();
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(4u, ec.findLeader(8));
+  EXPECT_EQ(0u, ec.findLeader(9));
+}
+
+} // end anonymous namespace
diff --git a/unittests/ADT/IntervalMapTest.cpp b/unittests/ADT/IntervalMapTest.cpp
new file mode 100644
index 000000000000..b5556d265ae4
--- /dev/null
+++ b/unittests/ADT/IntervalMapTest.cpp
@@ -0,0 +1,716 @@
+//===---- ADT/IntervalMapTest.cpp - IntervalMap unit tests ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+typedef IntervalMap<unsigned, unsigned, 4> UUMap;
+
+// Empty map tests
+TEST(IntervalMapTest, EmptyMap) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  EXPECT_TRUE(map.empty());
+
+  // Lookup on empty map.
+  EXPECT_EQ(0u, map.lookup(0));
+  EXPECT_EQ(7u, map.lookup(0, 7));
+  EXPECT_EQ(0u, map.lookup(~0u-1));
+  EXPECT_EQ(7u, map.lookup(~0u-1, 7));
+
+  // Iterators.
+  EXPECT_TRUE(map.begin() == map.begin());
+  EXPECT_TRUE(map.begin() == map.end());
+  EXPECT_TRUE(map.end() == map.end());
+  EXPECT_FALSE(map.begin() != map.begin());
+  EXPECT_FALSE(map.begin() != map.end());
+  EXPECT_FALSE(map.end() != map.end());
+  EXPECT_FALSE(map.begin().valid());
+  EXPECT_FALSE(map.end().valid());
+  UUMap::iterator I = map.begin();
+  EXPECT_FALSE(I.valid());
+  EXPECT_TRUE(I == map.end());
+
+  // Default constructor and cross-constness compares.
+  UUMap::const_iterator CI;
+  CI = map.begin();
+  EXPECT_TRUE(CI == I);
+  UUMap::iterator I2;
+  I2 = map.end();
+  EXPECT_TRUE(I2 == CI);
+}
+
+// Single entry map tests
+TEST(IntervalMapTest, SingleEntryMap) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  map.insert(100, 150, 1);
+  EXPECT_FALSE(map.empty());
+
+  // Lookup around interval.
+  EXPECT_EQ(0u, map.lookup(0));
+  EXPECT_EQ(0u, map.lookup(99));
+  EXPECT_EQ(1u, map.lookup(100));
+  EXPECT_EQ(1u, map.lookup(101));
+  EXPECT_EQ(1u, map.lookup(125));
+  EXPECT_EQ(1u, map.lookup(149));
+  EXPECT_EQ(1u, map.lookup(150));
+  EXPECT_EQ(0u, map.lookup(151));
+  EXPECT_EQ(0u, map.lookup(200));
+  EXPECT_EQ(0u, map.lookup(~0u-1));
+
+  // Iterators.
+  EXPECT_TRUE(map.begin() == map.begin());
+  EXPECT_FALSE(map.begin() == map.end());
+  EXPECT_TRUE(map.end() == map.end());
+  EXPECT_TRUE(map.begin().valid());
+  EXPECT_FALSE(map.end().valid());
+
+  // Iter deref.
+  UUMap::iterator I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(1u, I.value());
+
+  // Preincrement.
+  ++I;
+  EXPECT_FALSE(I.valid());
+  EXPECT_FALSE(I == map.begin());
+  EXPECT_TRUE(I == map.end());
+
+  // PreDecrement.
+  --I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(1u, I.value());
+  EXPECT_TRUE(I == map.begin());
+  EXPECT_FALSE(I == map.end());
+
+  // Change the value.
+  I.setValue(2);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Grow the bounds.
+  I.setStart(0);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(0u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  I.setStop(200);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(0u, I.start());
+  EXPECT_EQ(200u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Shrink the bounds.
+  I.setStart(150);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(150u, I.start());
+  EXPECT_EQ(200u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  I.setStop(160);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(150u, I.start());
+  EXPECT_EQ(160u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Erase last elem.
+  I.erase();
+  EXPECT_TRUE(map.empty());
+  EXPECT_EQ(0, std::distance(map.begin(), map.end()));
+}
+
+// Flat coalescing tests.
+TEST(IntervalMapTest, RootCoalescing) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  map.insert(100, 150, 1);
+
+  // Coalesce from the left.
+  map.insert(90, 99, 1);
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(150u, map.stop());
+
+  // Coalesce from the right.
+  map.insert(151, 200, 1);
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(200u, map.stop());
+
+  // Non-coalesce from the left.
+  map.insert(60, 89, 2);
+  EXPECT_EQ(2, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(60u, map.start());
+  EXPECT_EQ(200u, map.stop());
+  EXPECT_EQ(2u, map.lookup(89));
+  EXPECT_EQ(1u, map.lookup(90));
+
+  UUMap::iterator I = map.begin();
+  EXPECT_EQ(60u, I.start());
+  EXPECT_EQ(89u, I.stop());
+  EXPECT_EQ(2u, I.value());
+  ++I;
+  EXPECT_EQ(90u, I.start());
+  EXPECT_EQ(200u, I.stop());
+  EXPECT_EQ(1u, I.value());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Non-coalesce from the right.
+  map.insert(201, 210, 2);
+  EXPECT_EQ(3, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(60u, map.start());
+  EXPECT_EQ(210u, map.stop());
+  EXPECT_EQ(2u, map.lookup(201));
+  EXPECT_EQ(1u, map.lookup(200));
+
+  // Erase from the left.
+  map.begin().erase();
+  EXPECT_EQ(2, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(210u, map.stop());
+
+  // Erase from the right.
+  (--map.end()).erase();
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(200u, map.stop());
+
+  // Add non-coalescing, then trigger coalescing with setValue.
+  map.insert(80, 89, 2);
+  map.insert(201, 210, 2);
+  EXPECT_EQ(3, std::distance(map.begin(), map.end()));
+  (++map.begin()).setValue(2);
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(80u, I.start());
+  EXPECT_EQ(210u, I.stop());
+  EXPECT_EQ(2u, I.value());
+}
+
+// Flat multi-coalescing tests.
+TEST(IntervalMapTest, RootMultiCoalescing) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  map.insert(140, 150, 1);
+  map.insert(160, 170, 1);
+  map.insert(100, 110, 1);
+  map.insert(120, 130, 1);
+  EXPECT_EQ(4, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(100u, map.start());
+  EXPECT_EQ(170u, map.stop());
+
+  // Verify inserts.
+  UUMap::iterator I = map.begin();
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(110u, I.stop());
+  ++I;
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(130u, I.stop());
+  ++I;
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Test advanceTo on flat tree.
+  I = map.begin();
+  I.advanceTo(135);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+
+  I.advanceTo(145);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+
+  I.advanceTo(200);
+  EXPECT_FALSE(I.valid());
+
+  I.advanceTo(300);
+  EXPECT_FALSE(I.valid());
+
+  // Coalesce left with followers.
+  // [100;110] [120;130] [140;150] [160;170]
+  map.insert(111, 115, 1);
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(115u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(130u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Coalesce right with followers.
+  // [100;115] [120;130] [140;150] [160;170]
+  map.insert(135, 139, 1);
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(115u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(130u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(135u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Coalesce left and right with followers.
+  // [100;115] [120;130] [135;150] [160;170]
+  map.insert(131, 134, 1);
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(115u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Test clear() on non-branched map.
+  map.clear();
+  EXPECT_TRUE(map.empty());
+  EXPECT_TRUE(map.begin() == map.end());
+}
+
+// Branched, non-coalescing tests.
+TEST(IntervalMapTest, Branched) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+
+  // Insert enough intervals to force a branched tree.
+  // This creates 9 leaf nodes with 11 elements each, tree height = 1.
+  for (unsigned i = 1; i < 100; ++i) {
+    map.insert(10*i, 10*i+5, i);
+    EXPECT_EQ(10u, map.start());
+    EXPECT_EQ(10*i+5, map.stop());
+  }
+
+  // Tree limits.
+  EXPECT_FALSE(map.empty());
+  EXPECT_EQ(10u, map.start());
+  EXPECT_EQ(995u, map.stop());
+
+  // Tree lookup.
+  for (unsigned i = 1; i < 100; ++i) {
+    EXPECT_EQ(0u, map.lookup(10*i-1));
+    EXPECT_EQ(i, map.lookup(10*i));
+    EXPECT_EQ(i, map.lookup(10*i+5));
+    EXPECT_EQ(0u, map.lookup(10*i+6));
+  }
+
+  // Forward iteration.
+  UUMap::iterator I = map.begin();
+  for (unsigned i = 1; i < 100; ++i) {
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+    ++I;
+  }
+  EXPECT_FALSE(I.valid());
+  EXPECT_TRUE(I == map.end());
+
+  // Backwards iteration.
+  for (unsigned i = 99; i; --i) {
+    --I;
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+  }
+  EXPECT_TRUE(I == map.begin());
+
+  // Test advanceTo in same node.
+  I.advanceTo(20);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(25u, I.stop());
+
+  // Change value, no coalescing.
+  I.setValue(0);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(25u, I.stop());
+  EXPECT_EQ(0u, I.value());
+
+  // Close the gap right, no coalescing.
+  I.setStop(29);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(29u, I.stop());
+  EXPECT_EQ(0u, I.value());
+
+  // Change value, no coalescing.
+  I.setValue(2);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(29u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Change value, now coalescing.
+  I.setValue(3);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(35u, I.stop());
+  EXPECT_EQ(3u, I.value());
+
+  // Close the gap, now coalescing.
+  I.setValue(4);
+  ASSERT_TRUE(I.valid());
+  I.setStop(39);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(45u, I.stop());
+  EXPECT_EQ(4u, I.value());
+
+  // advanceTo another node.
+  I.advanceTo(200);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(200u, I.start());
+  EXPECT_EQ(205u, I.stop());
+
+  // Close the gap left, no coalescing.
+  I.setStart(196);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(196u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(20u, I.value());
+
+  // Change value, no coalescing.
+  I.setValue(0);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(196u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(0u, I.value());
+
+  // Change value, now coalescing.
+  I.setValue(19);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(190u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(19u, I.value());
+
+  // Close the gap, now coalescing.
+  I.setValue(18);
+  ASSERT_TRUE(I.valid());
+  I.setStart(186);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(180u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(18u, I.value());
+
+  // Erase from the front.
+  I = map.begin();
+  for (unsigned i = 0; i != 20; ++i) {
+    I.erase();
+    EXPECT_TRUE(I == map.begin());
+    EXPECT_FALSE(map.empty());
+    EXPECT_EQ(I.start(), map.start());
+    EXPECT_EQ(995u, map.stop());
+  }
+
+  // Test clear() on branched map.
+  map.clear();
+  EXPECT_TRUE(map.empty());
+  EXPECT_TRUE(map.begin() == map.end());
+}
+
+// Branched, high, non-coalescing tests.
+TEST(IntervalMapTest, Branched2) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+
+  // Insert enough intervals to force a height >= 2 tree.
+  for (unsigned i = 1; i < 1000; ++i)
+    map.insert(10*i, 10*i+5, i);
+
+  // Tree limits.
+  EXPECT_FALSE(map.empty());
+  EXPECT_EQ(10u, map.start());
+  EXPECT_EQ(9995u, map.stop());
+
+  // Tree lookup.
+  for (unsigned i = 1; i < 1000; ++i) {
+    EXPECT_EQ(0u, map.lookup(10*i-1));
+    EXPECT_EQ(i, map.lookup(10*i));
+    EXPECT_EQ(i, map.lookup(10*i+5));
+    EXPECT_EQ(0u, map.lookup(10*i+6));
+  }
+
+  // Forward iteration.
+  UUMap::iterator I = map.begin();
+  for (unsigned i = 1; i < 1000; ++i) {
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+    ++I;
+  }
+  EXPECT_FALSE(I.valid());
+  EXPECT_TRUE(I == map.end());
+
+  // Backwards iteration.
+  for (unsigned i = 999; i; --i) {
+    --I;
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+  }
+  EXPECT_TRUE(I == map.begin());
+
+  // Test advanceTo in same node.
+  I.advanceTo(20);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(25u, I.stop());
+
+  // advanceTo sibling leaf node.
+  I.advanceTo(200);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(200u, I.start());
+  EXPECT_EQ(205u, I.stop());
+
+  // advanceTo further.
+  I.advanceTo(2000);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(2000u, I.start());
+  EXPECT_EQ(2005u, I.stop());
+
+  // advanceTo beyond end()
+  I.advanceTo(20000);
+  EXPECT_FALSE(I.valid());
+
+  // end().advanceTo() is valid as long as x > map.stop()
+  I.advanceTo(30000);
+  EXPECT_FALSE(I.valid());
+
+  // Test clear() on branched map.
+  map.clear();
+  EXPECT_TRUE(map.empty());
+  EXPECT_TRUE(map.begin() == map.end());
+}
+
+// Random insertions, coalescing to a single interval.
+TEST(IntervalMapTest, RandomCoalescing) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+
+  // This is a poor PRNG with maximal period:
+  // x_n = 5 x_{n-1} + 1 mod 2^N
+
+  unsigned x = 100;
+  for (unsigned i = 0; i != 4096; ++i) {
+    map.insert(10*x, 10*x+9, 1);
+    EXPECT_GE(10*x, map.start());
+    EXPECT_LE(10*x+9, map.stop());
+    x = (5*x+1)%4096;
+  }
+
+  // Map should be fully coalesced after that exercise.
+  EXPECT_FALSE(map.empty());
+  EXPECT_EQ(0u, map.start());
+  EXPECT_EQ(40959u, map.stop());
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+
+}
+
+TEST(IntervalMapOverlapsTest, SmallMaps) {
+  typedef IntervalMapOverlaps<UUMap,UUMap> UUOverlaps;
+  UUMap::Allocator allocator;
+  UUMap mapA(allocator);
+  UUMap mapB(allocator);
+
+  // empty, empty.
+  EXPECT_FALSE(UUOverlaps(mapA, mapB).valid());
+
+  mapA.insert(1, 2, 3);
+
+  // full, empty
+  EXPECT_FALSE(UUOverlaps(mapA, mapB).valid());
+  // empty, full
+  EXPECT_FALSE(UUOverlaps(mapB, mapA).valid());
+
+  mapB.insert(3, 4, 5);
+
+  // full, full, non-overlapping
+  EXPECT_FALSE(UUOverlaps(mapA, mapB).valid());
+  EXPECT_FALSE(UUOverlaps(mapB, mapA).valid());
+
+  // Add an overlapping segment.
+  mapA.insert(4, 5, 6);
+
+  UUOverlaps AB(mapA, mapB);
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(4u, AB.a().start());
+  EXPECT_EQ(3u, AB.b().start());
+  ++AB;
+  EXPECT_FALSE(AB.valid());
+
+  UUOverlaps BA(mapB, mapA);
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(3u, BA.a().start());
+  EXPECT_EQ(4u, BA.b().start());
+  // advance past end.
+  BA.advanceTo(6);
+  EXPECT_FALSE(BA.valid());
+  // advance an invalid iterator.
+  BA.advanceTo(7);
+  EXPECT_FALSE(BA.valid());
+}
+
+TEST(IntervalMapOverlapsTest, BigMaps) {
+  typedef IntervalMapOverlaps<UUMap,UUMap> UUOverlaps;
+  UUMap::Allocator allocator;
+  UUMap mapA(allocator);
+  UUMap mapB(allocator);
+
+  // [0;4] [10;14] [20;24] ...
+  for (unsigned n = 0; n != 100; ++n)
+    mapA.insert(10*n, 10*n+4, n);
+
+  // [5;6] [15;16] [25;26] ...
+  for (unsigned n = 10; n != 20; ++n)
+    mapB.insert(10*n+5, 10*n+6, n);
+
+  // [208;209] [218;219] ...
+  for (unsigned n = 20; n != 30; ++n)
+    mapB.insert(10*n+8, 10*n+9, n);
+
+  // insert some overlapping segments.
+  mapB.insert(400, 400, 400);
+  mapB.insert(401, 401, 401);
+  mapB.insert(402, 500, 402);
+  mapB.insert(600, 601, 402);
+
+  UUOverlaps AB(mapA, mapB);
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(400u, AB.a().start());
+  EXPECT_EQ(400u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(400u, AB.a().start());
+  EXPECT_EQ(401u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(400u, AB.a().start());
+  EXPECT_EQ(402u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(410u, AB.a().start());
+  EXPECT_EQ(402u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(420u, AB.a().start());
+  EXPECT_EQ(402u, AB.b().start());
+  AB.skipB();
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(600u, AB.a().start());
+  EXPECT_EQ(600u, AB.b().start());
+  ++AB;
+  EXPECT_FALSE(AB.valid());
+
+  // Test advanceTo.
+  UUOverlaps AB2(mapA, mapB);
+  AB2.advanceTo(410);
+  ASSERT_TRUE(AB2.valid());
+  EXPECT_EQ(410u, AB2.a().start());
+  EXPECT_EQ(402u, AB2.b().start());
+
+  // It is valid to advanceTo with any monotonic sequence.
+  AB2.advanceTo(411);
+  ASSERT_TRUE(AB2.valid());
+  EXPECT_EQ(410u, AB2.a().start());
+  EXPECT_EQ(402u, AB2.b().start());
+
+  // Check reversed maps.
+  UUOverlaps BA(mapB, mapA);
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(400u, BA.b().start());
+  EXPECT_EQ(400u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(400u, BA.b().start());
+  EXPECT_EQ(401u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(400u, BA.b().start());
+  EXPECT_EQ(402u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(410u, BA.b().start());
+  EXPECT_EQ(402u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(420u, BA.b().start());
+  EXPECT_EQ(402u, BA.a().start());
+  BA.skipA();
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(600u, BA.b().start());
+  EXPECT_EQ(600u, BA.a().start());
+  ++BA;
+  EXPECT_FALSE(BA.valid());
+
+  // Test advanceTo.
+  UUOverlaps BA2(mapB, mapA);
+  BA2.advanceTo(410);
+  ASSERT_TRUE(BA2.valid());
+  EXPECT_EQ(410u, BA2.b().start());
+  EXPECT_EQ(402u, BA2.a().start());
+
+  BA2.advanceTo(411);
+  ASSERT_TRUE(BA2.valid());
+  EXPECT_EQ(410u, BA2.b().start());
+  EXPECT_EQ(402u, BA2.a().start());
+}
+
+} // namespace
diff --git a/unittests/ADT/Makefile b/unittests/ADT/Makefile
index fe0832894d32..c255a0b44d09 100644
--- a/unittests/ADT/Makefile
+++ b/unittests/ADT/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TESTNAME = ADT
-LINK_COMPONENTS := core support
+LINK_COMPONENTS := support
 
 include $(LEVEL)/Makefile.config
 
diff --git a/unittests/ADT/SmallBitVectorTest.cpp b/unittests/ADT/SmallBitVectorTest.cpp
index 9c69aad5a973..c4dda9e88d44 100644
--- a/unittests/ADT/SmallBitVectorTest.cpp
+++ b/unittests/ADT/SmallBitVectorTest.cpp
@@ -19,6 +19,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Vec.count());
   EXPECT_EQ(0U, Vec.size());
   EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
   EXPECT_TRUE(Vec.none());
   EXPECT_TRUE(Vec.empty());
 
@@ -26,6 +27,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(5U, Vec.count());
   EXPECT_EQ(5U, Vec.size());
   EXPECT_TRUE(Vec.any());
+  EXPECT_TRUE(Vec.all());
   EXPECT_FALSE(Vec.none());
   EXPECT_FALSE(Vec.empty());
 
@@ -33,6 +35,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(5U, Vec.count());
   EXPECT_EQ(11U, Vec.size());
   EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.all());
   EXPECT_FALSE(Vec.none());
   EXPECT_FALSE(Vec.empty());
 
@@ -40,6 +43,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(6U, Inv.count());
   EXPECT_EQ(11U, Inv.size());
   EXPECT_TRUE(Inv.any());
+  EXPECT_FALSE(Inv.all());
   EXPECT_FALSE(Inv.none());
   EXPECT_FALSE(Inv.empty());
 
@@ -120,6 +124,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Vec.count());
   EXPECT_EQ(130U, Vec.size());
   EXPECT_FALSE(Vec.any());
+  EXPECT_FALSE(Vec.all());
   EXPECT_TRUE(Vec.none());
   EXPECT_FALSE(Vec.empty());
 
@@ -127,6 +132,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Inv.count());
   EXPECT_EQ(0U, Inv.size());
   EXPECT_FALSE(Inv.any());
+  EXPECT_TRUE(Inv.all());
   EXPECT_TRUE(Inv.none());
   EXPECT_TRUE(Inv.empty());
 
@@ -134,6 +140,7 @@ TEST(SmallBitVectorTest, TrivialOperation) {
   EXPECT_EQ(0U, Vec.count());
   EXPECT_EQ(0U, Vec.size());
   EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
   EXPECT_TRUE(Vec.none());
   EXPECT_TRUE(Vec.empty());
 }
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 78dc393e5c18..f4da54dbca1a 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -77,7 +77,7 @@ public:
     return c0.getValue() == c1.getValue();
   }
 
-  friend bool ATTRIBUTE_UNUSED
+  friend bool LLVM_ATTRIBUTE_UNUSED
   operator!=(const Constructable & c0, const Constructable & c1) {
     return c0.getValue() != c1.getValue();
   }
diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp
index 413f068d4906..ea91348a5bdf 100644
--- a/unittests/ADT/StringMapTest.cpp
+++ b/unittests/ADT/StringMapTest.cpp
@@ -9,7 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index 7e4d0dcd413f..5731e4abaf15 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
 namespace llvm {
 
 std::ostream &operator<<(std::ostream &OS, const StringRef &S) {
-  OS << S;
+  OS << S.str();
   return OS;
 }
 
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 067f5e5116cd..160b69253b6f 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -79,16 +79,25 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::x86, T.getArch());
   EXPECT_EQ(Triple::Apple, T.getVendor());
   EXPECT_EQ(Triple::Darwin, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
   T = Triple("x86_64-pc-linux-gnu");
   EXPECT_EQ(Triple::x86_64, T.getArch());
   EXPECT_EQ(Triple::PC, T.getVendor());
   EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::GNU, T.getEnvironment());
 
   T = Triple("powerpc-dunno-notsure");
   EXPECT_EQ(Triple::ppc, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("arm-none-none-eabi");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::EABI, T.getEnvironment());
 
   T = Triple("huh");
   EXPECT_EQ(Triple::UnknownArch, T.getArch());
@@ -105,6 +114,7 @@ static std::string Join(StringRef A, StringRef B, StringRef C, StringRef D) {
 }
 
 TEST(TripleTest, Normalization) {
+
   EXPECT_EQ("", Triple::normalize(""));
   EXPECT_EQ("-", Triple::normalize("-"));
   EXPECT_EQ("--", Triple::normalize("--"));
@@ -119,10 +129,12 @@ TEST(TripleTest, Normalization) {
   EXPECT_EQ("i386-b-c", Triple::normalize("i386-b-c"));
   EXPECT_EQ("i386-a-c", Triple::normalize("a-i386-c"));
   EXPECT_EQ("i386-a-b", Triple::normalize("a-b-i386"));
+  EXPECT_EQ("i386-a-b-c", Triple::normalize("a-b-c-i386"));
 
   EXPECT_EQ("a-pc-c", Triple::normalize("a-pc-c"));
   EXPECT_EQ("-pc-b-c", Triple::normalize("pc-b-c"));
   EXPECT_EQ("a-pc-b", Triple::normalize("a-b-pc"));
+  EXPECT_EQ("a-pc-b-c", Triple::normalize("a-b-c-pc"));
 
   EXPECT_EQ("a-b-linux", Triple::normalize("a-b-linux"));
   EXPECT_EQ("--linux-b-c", Triple::normalize("linux-b-c"));
@@ -137,10 +149,11 @@ TEST(TripleTest, Normalization) {
   EXPECT_EQ("-pc", Triple::normalize("pc"));
   EXPECT_EQ("--linux", Triple::normalize("linux"));
 
+  EXPECT_EQ("x86_64--linux-gnu", Triple::normalize("x86_64-gnu-linux"));
+
   // Check that normalizing a permutated set of valid components returns a
   // triple with the unpermuted components.
   StringRef C[4];
-  C[3] = "environment";
   for (int Arch = 1+Triple::UnknownArch; Arch < Triple::InvalidArch; ++Arch) {
     C[0] = Triple::getArchTypeName(Triple::ArchType(Arch));
     for (int Vendor = 1+Triple::UnknownVendor; Vendor <= Triple::PC;
@@ -149,46 +162,52 @@ TEST(TripleTest, Normalization) {
       for (int OS = 1+Triple::UnknownOS; OS <= Triple::Minix; ++OS) {
         C[2] = Triple::getOSTypeName(Triple::OSType(OS));
 
-        std::string E = Join(C[0], C[1], C[2]);
-        std::string F = Join(C[0], C[1], C[2], C[3]);
-        EXPECT_EQ(E, Triple::normalize(Join(C[0], C[1], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[2], C[3])));
-
         // If a value has multiple interpretations, then the permutation
         // test will inevitably fail.  Currently this is only the case for
         // "psp" which parses as both an architecture and an O/S.
         if (OS == Triple::Psp)
           continue;
 
+        std::string E = Join(C[0], C[1], C[2]);
+        EXPECT_EQ(E, Triple::normalize(Join(C[0], C[1], C[2])));
+
         EXPECT_EQ(E, Triple::normalize(Join(C[0], C[2], C[1])));
         EXPECT_EQ(E, Triple::normalize(Join(C[1], C[2], C[0])));
         EXPECT_EQ(E, Triple::normalize(Join(C[1], C[0], C[2])));
         EXPECT_EQ(E, Triple::normalize(Join(C[2], C[0], C[1])));
         EXPECT_EQ(E, Triple::normalize(Join(C[2], C[1], C[0])));
 
-        EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[3], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[3], C[1])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[1], C[3])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[1], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[2], C[1])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[3], C[0])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[0], C[3])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[0], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[2], C[0])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[2], C[3])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[3], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[0], C[1])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[1], C[0])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[1], C[3])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[3], C[1])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[3], C[0])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[0], C[3])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[1], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[2], C[1])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[2], C[0])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[0], C[2])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[0], C[1])));
-        EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[1], C[0])));
+        for (int Env = 1+Triple::UnknownEnvironment; Env <= Triple::MachO;
+             ++Env) {
+          C[3] = Triple::getEnvironmentTypeName(Triple::EnvironmentType(Env));
+
+          std::string F = Join(C[0], C[1], C[2], C[3]);
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[2], C[3])));
+
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[3], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[3], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[1], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[1], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[2], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[3], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[0], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[0], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[2], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[2], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[3], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[0], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[1], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[1], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[3], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[3], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[0], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[1], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[2], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[2], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[0], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[0], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[1], C[0])));
+        }
       }
     }
   }
@@ -203,6 +222,7 @@ TEST(TripleTest, Normalization) {
   EXPECT_EQ("i486--linux-gnu", Triple::normalize("i486-linux-gnu")); // i486-pc-linux-gnu
   EXPECT_EQ("i386-redhat-linux", Triple::normalize("i386-redhat-linux")); // i386-redhat-linux-gnu
   EXPECT_EQ("i686--linux", Triple::normalize("i686-linux")); // i686-pc-linux-gnu
+  EXPECT_EQ("arm-none--eabi", Triple::normalize("arm-none-eabi")); // arm-none-eabi
 }
 
 TEST(TripleTest, MutateName) {
@@ -210,6 +230,7 @@ TEST(TripleTest, MutateName) {
   EXPECT_EQ(Triple::UnknownArch, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
   T.setArchName("i386");
   EXPECT_EQ(Triple::x86, T.getArch());
@@ -243,6 +264,7 @@ TEST(TripleTest, MutateName) {
   EXPECT_EQ(Triple::PC, T.getVendor());
   EXPECT_EQ(Triple::Darwin, T.getOS());
   EXPECT_EQ("i386-pc-darwin", T.getTriple());
+
 }
 
 }
diff --git a/unittests/ADT/TwineTest.cpp b/unittests/ADT/TwineTest.cpp
index 61e8a0ac37cb..57f54cb0060f 100644
--- a/unittests/ADT/TwineTest.cpp
+++ b/unittests/ADT/TwineTest.cpp
@@ -9,6 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -69,6 +70,13 @@ TEST(TwineTest, Concat) {
             repr(Twine("a").concat(Twine("b").concat(Twine("c")))));
 }
 
+TEST(TwineTest, toNullTerminatedStringRef) {
+  SmallString<8> storage;
+  EXPECT_EQ(0, *Twine("hello").toNullTerminatedStringRef(storage).end());
+  EXPECT_EQ(0,
+           *Twine(StringRef("hello")).toNullTerminatedStringRef(storage).end());
+}
+
   // I suppose linking in the entire code generator to add a unit test to check
   // the code size of the concat operation is overkill... :)
 
diff --git a/unittests/ADT/ValueMapTest.cpp b/unittests/ADT/ValueMapTest.cpp
deleted file mode 100644
index 152e8eaaf1f1..000000000000
--- a/unittests/ADT/ValueMapTest.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-//===- llvm/unittest/ADT/ValueMapTest.cpp - ValueMap unit tests -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Config/config.h"
-
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-
-// Test fixture
-template<typename T>
-class ValueMapTest : public testing::Test {
-protected:
-  Constant *ConstantV;
-  OwningPtr<BitCastInst> BitcastV;
-  OwningPtr<BinaryOperator> AddV;
-
-  ValueMapTest() :
-    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
-    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))),
-    AddV(BinaryOperator::CreateAdd(ConstantV, ConstantV)) {
-  }
-};
-
-// Run everything on Value*, a subtype to make sure that casting works as
-// expected, and a const subtype to make sure we cast const correctly.
-typedef ::testing::Types<Value, Instruction, const Instruction> KeyTypes;
-TYPED_TEST_CASE(ValueMapTest, KeyTypes);
-
-TYPED_TEST(ValueMapTest, Null) {
-  ValueMap<TypeParam*, int> VM1;
-  VM1[NULL] = 7;
-  EXPECT_EQ(7, VM1.lookup(NULL));
-}
-
-TYPED_TEST(ValueMapTest, FollowsValue) {
-  ValueMap<TypeParam*, int> VM;
-  VM[this->BitcastV.get()] = 7;
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.count(this->AddV.get()));
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(7, VM.lookup(this->AddV.get()));
-  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
-  this->AddV.reset();
-  EXPECT_EQ(0, VM.count(this->AddV.get()));
-  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
-  EXPECT_EQ(0U, VM.size());
-}
-
-TYPED_TEST(ValueMapTest, OperationsWork) {
-  ValueMap<TypeParam*, int> VM;
-  ValueMap<TypeParam*, int> VM2(16);  (void)VM2;
-  typename ValueMapConfig<TypeParam*>::ExtraData Data;
-  ValueMap<TypeParam*, int> VM3(Data, 16);  (void)VM3;
-  EXPECT_TRUE(VM.empty());
-
-  VM[this->BitcastV.get()] = 7;
-
-  // Find:
-  typename ValueMap<TypeParam*, int>::iterator I =
-    VM.find(this->BitcastV.get());
-  ASSERT_TRUE(I != VM.end());
-  EXPECT_EQ(this->BitcastV.get(), I->first);
-  EXPECT_EQ(7, I->second);
-  EXPECT_TRUE(VM.find(this->AddV.get()) == VM.end());
-
-  // Const find:
-  const ValueMap<TypeParam*, int> &CVM = VM;
-  typename ValueMap<TypeParam*, int>::const_iterator CI =
-    CVM.find(this->BitcastV.get());
-  ASSERT_TRUE(CI != CVM.end());
-  EXPECT_EQ(this->BitcastV.get(), CI->first);
-  EXPECT_EQ(7, CI->second);
-  EXPECT_TRUE(CVM.find(this->AddV.get()) == CVM.end());
-
-  // Insert:
-  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult1 =
-    VM.insert(std::make_pair(this->AddV.get(), 3));
-  EXPECT_EQ(this->AddV.get(), InsertResult1.first->first);
-  EXPECT_EQ(3, InsertResult1.first->second);
-  EXPECT_TRUE(InsertResult1.second);
-  EXPECT_EQ(true, VM.count(this->AddV.get()));
-  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult2 =
-    VM.insert(std::make_pair(this->AddV.get(), 5));
-  EXPECT_EQ(this->AddV.get(), InsertResult2.first->first);
-  EXPECT_EQ(3, InsertResult2.first->second);
-  EXPECT_FALSE(InsertResult2.second);
-
-  // Erase:
-  VM.erase(InsertResult2.first);
-  EXPECT_EQ(0U, VM.count(this->AddV.get()));
-  EXPECT_EQ(1U, VM.count(this->BitcastV.get()));
-  VM.erase(this->BitcastV.get());
-  EXPECT_EQ(0U, VM.count(this->BitcastV.get()));
-  EXPECT_EQ(0U, VM.size());
-
-  // Range insert:
-  SmallVector<std::pair<Instruction*, int>, 2> Elems;
-  Elems.push_back(std::make_pair(this->AddV.get(), 1));
-  Elems.push_back(std::make_pair(this->BitcastV.get(), 2));
-  VM.insert(Elems.begin(), Elems.end());
-  EXPECT_EQ(1, VM.lookup(this->AddV.get()));
-  EXPECT_EQ(2, VM.lookup(this->BitcastV.get()));
-}
-
-template<typename ExpectedType, typename VarType>
-void CompileAssertHasType(VarType) {
-  typedef char assert[is_same<ExpectedType, VarType>::value ? 1 : -1];
-}
-
-TYPED_TEST(ValueMapTest, Iteration) {
-  ValueMap<TypeParam*, int> VM;
-  VM[this->BitcastV.get()] = 2;
-  VM[this->AddV.get()] = 3;
-  size_t size = 0;
-  for (typename ValueMap<TypeParam*, int>::iterator I = VM.begin(), E = VM.end();
-       I != E; ++I) {
-    ++size;
-    std::pair<TypeParam*, int> value = *I; (void)value;
-    CompileAssertHasType<TypeParam*>(I->first);
-    if (I->second == 2) {
-      EXPECT_EQ(this->BitcastV.get(), I->first);
-      I->second = 5;
-    } else if (I->second == 3) {
-      EXPECT_EQ(this->AddV.get(), I->first);
-      I->second = 6;
-    } else {
-      ADD_FAILURE() << "Iterated through an extra value.";
-    }
-  }
-  EXPECT_EQ(2U, size);
-  EXPECT_EQ(5, VM[this->BitcastV.get()]);
-  EXPECT_EQ(6, VM[this->AddV.get()]);
-
-  size = 0;
-  // Cast to const ValueMap to avoid a bug in DenseMap's iterators.
-  const ValueMap<TypeParam*, int>& CVM = VM;
-  for (typename ValueMap<TypeParam*, int>::const_iterator I = CVM.begin(),
-         E = CVM.end(); I != E; ++I) {
-    ++size;
-    std::pair<TypeParam*, int> value = *I;  (void)value;
-    CompileAssertHasType<TypeParam*>(I->first);
-    if (I->second == 5) {
-      EXPECT_EQ(this->BitcastV.get(), I->first);
-    } else if (I->second == 6) {
-      EXPECT_EQ(this->AddV.get(), I->first);
-    } else {
-      ADD_FAILURE() << "Iterated through an extra value.";
-    }
-  }
-  EXPECT_EQ(2U, size);
-}
-
-TYPED_TEST(ValueMapTest, DefaultCollisionBehavior) {
-  // By default, we overwrite the old value with the replaced value.
-  ValueMap<TypeParam*, int> VM;
-  VM[this->BitcastV.get()] = 7;
-  VM[this->AddV.get()] = 9;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
-  EXPECT_EQ(9, VM.lookup(this->AddV.get()));
-}
-
-TYPED_TEST(ValueMapTest, ConfiguredCollisionBehavior) {
-  // TODO: Implement this when someone needs it.
-}
-
-template<typename KeyT>
-struct LockMutex : ValueMapConfig<KeyT> {
-  struct ExtraData {
-    sys::Mutex *M;
-    bool *CalledRAUW;
-    bool *CalledDeleted;
-  };
-  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
-    *Data.CalledRAUW = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
-  }
-  static void onDelete(const ExtraData &Data, KeyT Old) {
-    *Data.CalledDeleted = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
-  }
-  static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
-};
-#if ENABLE_THREADS
-TYPED_TEST(ValueMapTest, LocksMutex) {
-  sys::Mutex M(false);  // Not recursive.
-  bool CalledRAUW = false, CalledDeleted = false;
-  typename LockMutex<TypeParam*>::ExtraData Data =
-    {&M, &CalledRAUW, &CalledDeleted};
-  ValueMap<TypeParam*, int, LockMutex<TypeParam*> > VM(Data);
-  VM[this->BitcastV.get()] = 7;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  this->AddV.reset();
-  EXPECT_TRUE(CalledRAUW);
-  EXPECT_TRUE(CalledDeleted);
-}
-#endif
-
-template<typename KeyT>
-struct NoFollow : ValueMapConfig<KeyT> {
-  enum { FollowRAUW = false };
-};
-
-TYPED_TEST(ValueMapTest, NoFollowRAUW) {
-  ValueMap<TypeParam*, int, NoFollow<TypeParam*> > VM;
-  VM[this->BitcastV.get()] = 7;
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.count(this->AddV.get()));
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
-  this->AddV.reset();
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
-  this->BitcastV.reset();
-  EXPECT_EQ(0, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
-  EXPECT_EQ(0U, VM.size());
-}
-
-template<typename KeyT>
-struct CountOps : ValueMapConfig<KeyT> {
-  struct ExtraData {
-    int *Deletions;
-    int *RAUWs;
-  };
-
-  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
-    ++*Data.RAUWs;
-  }
-  static void onDelete(const ExtraData &Data, KeyT Old) {
-    ++*Data.Deletions;
-  }
-};
-
-TYPED_TEST(ValueMapTest, CallsConfig) {
-  int Deletions = 0, RAUWs = 0;
-  typename CountOps<TypeParam*>::ExtraData Data = {&Deletions, &RAUWs};
-  ValueMap<TypeParam*, int, CountOps<TypeParam*> > VM(Data);
-  VM[this->BitcastV.get()] = 7;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(0, Deletions);
-  EXPECT_EQ(1, RAUWs);
-  this->AddV.reset();
-  EXPECT_EQ(1, Deletions);
-  EXPECT_EQ(1, RAUWs);
-  this->BitcastV.reset();
-  EXPECT_EQ(1, Deletions);
-  EXPECT_EQ(1, RAUWs);
-}
-
-template<typename KeyT>
-struct ModifyingConfig : ValueMapConfig<KeyT> {
-  // We'll put a pointer here back to the ValueMap this key is in, so
-  // that we can modify it (and clobber *this) before the ValueMap
-  // tries to do the same modification.  In previous versions of
-  // ValueMap, that exploded.
-  typedef ValueMap<KeyT, int, ModifyingConfig<KeyT> > **ExtraData;
-
-  static void onRAUW(ExtraData Map, KeyT Old, KeyT New) {
-    (*Map)->erase(Old);
-  }
-  static void onDelete(ExtraData Map, KeyT Old) {
-    (*Map)->erase(Old);
-  }
-};
-TYPED_TEST(ValueMapTest, SurvivesModificationByConfig) {
-  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > *MapAddress;
-  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > VM(&MapAddress);
-  MapAddress = &VM;
-  // Now the ModifyingConfig can modify the Map inside a callback.
-  VM[this->BitcastV.get()] = 7;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_FALSE(VM.count(this->BitcastV.get()));
-  EXPECT_FALSE(VM.count(this->AddV.get()));
-  VM[this->AddV.get()] = 7;
-  this->AddV.reset();
-  EXPECT_FALSE(VM.count(this->AddV.get()));
-}
-
-}
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
new file mode 100644
index 000000000000..5f09fa248357
--- /dev/null
+++ b/unittests/CMakeLists.txt
@@ -0,0 +1,142 @@
+function(add_llvm_unittest test_dirname)
+  string(REGEX MATCH "([^/]+)$" test_name ${test_dirname})
+  if (CMAKE_BUILD_TYPE)
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY
+      ${LLVM_BINARY_DIR}/unittests/${test_dirname}/${CMAKE_BUILD_TYPE})
+  else()
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY
+      ${LLVM_BINARY_DIR}/unittests/${test_dirname})
+  endif()
+  if( NOT LLVM_BUILD_TESTS )
+    set(EXCLUDE_FROM_ALL ON)
+  endif()
+  add_llvm_executable(${test_name}Tests ${ARGN})
+  add_dependencies(UnitTests ${test_name}Tests)
+endfunction()
+
+add_custom_target(UnitTests)
+
+include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include)
+add_definitions(-DGTEST_HAS_RTTI=0)
+if( CMAKE_COMPILER_IS_GNUCXX )
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS "-frtti" "-fno-rtti")
+elseif( MSVC )
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/GR" "/GR-")
+endif()
+
+if (NOT LLVM_ENABLE_THREADS)
+  add_definitions(-DGTEST_HAS_PTHREAD=0)
+endif()
+
+if(SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+  add_definitions("-Wno-variadic-macros")
+endif()
+
+set(LLVM_LINK_COMPONENTS
+  jit
+  interpreter
+  nativecodegen
+  BitWriter
+  BitReader
+  AsmParser
+  Core
+  Support
+  )
+
+set(LLVM_USED_LIBS
+  gtest
+  gtest_main
+  LLVMSupport # gtest needs it for raw_ostream.
+  )
+
+add_llvm_unittest(ADT
+  ADT/APFloatTest.cpp
+  ADT/APIntTest.cpp
+  ADT/BitVectorTest.cpp
+  ADT/DAGDeltaAlgorithmTest.cpp
+  ADT/DeltaAlgorithmTest.cpp
+  ADT/DenseMapTest.cpp
+  ADT/DenseSetTest.cpp
+  ADT/FoldingSet.cpp
+  ADT/ilistTest.cpp
+  ADT/ImmutableSetTest.cpp
+  ADT/IntEqClassesTest.cpp
+  ADT/IntervalMapTest.cpp
+  ADT/SmallBitVectorTest.cpp
+  ADT/SmallStringTest.cpp
+  ADT/SmallVectorTest.cpp
+  ADT/SparseBitVectorTest.cpp
+  ADT/StringMapTest.cpp
+  ADT/StringRefTest.cpp
+  ADT/TripleTest.cpp
+  ADT/TwineTest.cpp
+ )
+
+add_llvm_unittest(Analysis
+  Analysis/ScalarEvolutionTest.cpp
+  )
+
+add_llvm_unittest(ExecutionEngine
+  ExecutionEngine/ExecutionEngineTest.cpp
+  )
+
+set(JITTestsSources
+  ExecutionEngine/JIT/JITEventListenerTest.cpp
+  ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+  ExecutionEngine/JIT/JITTest.cpp
+  ExecutionEngine/JIT/MultiJITTest.cpp
+  )
+
+if(MSVC)
+  list(APPEND JITTestsSources ExecutionEngine/JIT/JITTests.def)
+endif()
+
+add_llvm_unittest(ExecutionEngine/JIT ${JITTestsSources})
+
+if(MINGW)
+  set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols)
+endif()
+
+add_llvm_unittest(Transforms/Utils
+  Transforms/Utils/Cloning.cpp
+  )
+
+set(VMCoreSources
+  VMCore/ConstantsTest.cpp
+  VMCore/DerivedTypesTest.cpp
+  VMCore/InstructionsTest.cpp
+  VMCore/MetadataTest.cpp
+  VMCore/PassManagerTest.cpp
+  VMCore/ValueMapTest.cpp
+  VMCore/VerifierTest.cpp
+  )
+
+# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug.
+# See issue#331418 in Visual Studio.
+if(MSVC AND MSVC_VERSION LESS 1600)
+  list(REMOVE_ITEM VMCoreSources VMCore/ValueMapTest.cpp)
+endif()
+
+add_llvm_unittest(VMCore ${VMCoreSources})
+
+set(LLVM_LINK_COMPONENTS
+  Support
+  Core
+  )
+
+add_llvm_unittest(Support
+  Support/AllocatorTest.cpp
+  Support/Casting.cpp
+  Support/CommandLineTest.cpp
+  Support/ConstantRangeTest.cpp
+  Support/EndianTest.cpp
+  Support/LeakDetectorTest.cpp
+  Support/MathExtrasTest.cpp
+  Support/Path.cpp
+  Support/raw_ostream_test.cpp
+  Support/RegexTest.cpp
+  Support/SwapByteOrderTest.cpp
+  Support/TimeValue.cpp
+  Support/TypeBuilderTest.cpp
+  Support/ValueHandleTest.cpp
+  )
diff --git a/unittests/ExecutionEngine/JIT/JITTests.def b/unittests/ExecutionEngine/JIT/JITTests.def
new file mode 100644
index 000000000000..17c91e87bcd3
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/JITTests.def
@@ -0,0 +1,4 @@
+EXPORTS
+getPointerToNamedFunction
+JITTest_AvailableExternallyFunction
+JITTest_AvailableExternallyGlobal
diff --git a/unittests/Makefile.unittest b/unittests/Makefile.unittest
index 9a75b2c2eb8f..580ad7d71918 100644
--- a/unittests/Makefile.unittest
+++ b/unittests/Makefile.unittest
@@ -14,7 +14,9 @@
 # Set up variables for building a unit test.
 ifdef TESTNAME
 
+ifndef MAKEFILE_UNITTEST_NO_INCLUDE_COMMON
 include $(LEVEL)/Makefile.common
+endif
 
 LLVMUnitTestExe = $(BuildMode)/$(TESTNAME)Tests$(EXEEXT)
 
@@ -35,9 +37,11 @@ endif
 TESTLIBS = -lGoogleTest -lUnitTestMain
 
 ifeq ($(ENABLE_SHARED), 1)
-  # Add the absolute path to the dynamic library.  This is ok because
-  # we'll never install unittests.
-  LD.Flags += $(RPATH) -Wl,$(SharedLibDir)
+  ifneq (,$(RPATH))
+    # Add the absolute path to the dynamic library.  This is ok because
+    # we'll never install unittests.
+    LD.Flags += $(RPATH) -Wl,$(SharedLibDir)
+  endif
   # Also set {DYLD,LD}_LIBRARY_PATH because OSX ignores the rpath most
   # of the time.
   Run.Shared := $(SHLIBPATH_VAR)="$(SharedLibDir)$${$(SHLIBPATH_VAR):+:}$$$(SHLIBPATH_VAR)"
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
index 091ecd4aadeb..161e2cfb7e54 100644
--- a/unittests/Support/ConstantRangeTest.cpp
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Instructions.h"
 
 #include "gtest/gtest.h"
 
@@ -146,6 +147,22 @@ TEST_F(ConstantRangeTest, GetMinsAndMaxes) {
             APInt(4, 7));
 }
 
+TEST_F(ConstantRangeTest, SignWrapped) {
+  EXPECT_TRUE(Full.isSignWrappedSet());
+  EXPECT_FALSE(Empty.isSignWrappedSet());
+  EXPECT_FALSE(One.isSignWrappedSet());
+  EXPECT_FALSE(Some.isSignWrappedSet());
+  EXPECT_TRUE(Wrap.isSignWrappedSet());
+
+  EXPECT_FALSE(ConstantRange(APInt(8, 127), APInt(8, 128)).isSignWrappedSet());
+  EXPECT_TRUE(ConstantRange(APInt(8, 127), APInt(8, 129)).isSignWrappedSet());
+  EXPECT_FALSE(ConstantRange(APInt(8, 128), APInt(8, 129)).isSignWrappedSet());
+  EXPECT_TRUE(ConstantRange(APInt(8, 10), APInt(8, 9)).isSignWrappedSet());
+  EXPECT_TRUE(ConstantRange(APInt(8, 10), APInt(8, 250)).isSignWrappedSet());
+  EXPECT_FALSE(ConstantRange(APInt(8, 250), APInt(8, 10)).isSignWrappedSet());
+  EXPECT_FALSE(ConstantRange(APInt(8, 250), APInt(8, 251)).isSignWrappedSet());
+}
+
 TEST_F(ConstantRangeTest, Trunc) {
   ConstantRange TFull = Full.truncate(10);
   ConstantRange TEmpty = Empty.truncate(10);
@@ -154,8 +171,8 @@ TEST_F(ConstantRangeTest, Trunc) {
   ConstantRange TWrap = Wrap.truncate(10);
   EXPECT_TRUE(TFull.isFullSet());
   EXPECT_TRUE(TEmpty.isEmptySet());
-  EXPECT_EQ(TOne, ConstantRange(APInt(One.getLower()).trunc(10),
-                                APInt(One.getUpper()).trunc(10)));
+  EXPECT_EQ(TOne, ConstantRange(One.getLower().trunc(10),
+                                One.getUpper().trunc(10)));
   EXPECT_TRUE(TSome.isFullSet());
 }
 
@@ -167,12 +184,11 @@ TEST_F(ConstantRangeTest, ZExt) {
   ConstantRange ZWrap = Wrap.zeroExtend(20);
   EXPECT_EQ(ZFull, ConstantRange(APInt(20, 0), APInt(20, 0x10000)));
   EXPECT_TRUE(ZEmpty.isEmptySet());
-  EXPECT_EQ(ZOne, ConstantRange(APInt(One.getLower()).zext(20),
-                                APInt(One.getUpper()).zext(20)));
-  EXPECT_EQ(ZSome, ConstantRange(APInt(Some.getLower()).zext(20),
-                                 APInt(Some.getUpper()).zext(20)));
-  EXPECT_EQ(ZWrap, ConstantRange(APInt(Wrap.getLower()).zext(20),
-                                 APInt(Wrap.getUpper()).zext(20)));
+  EXPECT_EQ(ZOne, ConstantRange(One.getLower().zext(20),
+                                One.getUpper().zext(20)));
+  EXPECT_EQ(ZSome, ConstantRange(Some.getLower().zext(20),
+                                 Some.getUpper().zext(20)));
+  EXPECT_EQ(ZWrap, ConstantRange(APInt(20, 0), APInt(20, 0x10000)));
 }
 
 TEST_F(ConstantRangeTest, SExt) {
@@ -184,12 +200,15 @@ TEST_F(ConstantRangeTest, SExt) {
   EXPECT_EQ(SFull, ConstantRange(APInt(20, (uint64_t)INT16_MIN, true),
                                  APInt(20, INT16_MAX + 1, true)));
   EXPECT_TRUE(SEmpty.isEmptySet());
-  EXPECT_EQ(SOne, ConstantRange(APInt(One.getLower()).sext(20),
-                                APInt(One.getUpper()).sext(20)));
-  EXPECT_EQ(SSome, ConstantRange(APInt(Some.getLower()).sext(20),
-                                 APInt(Some.getUpper()).sext(20)));
-  EXPECT_EQ(SWrap, ConstantRange(APInt(Wrap.getLower()).sext(20),
-                                 APInt(Wrap.getUpper()).sext(20)));
+  EXPECT_EQ(SOne, ConstantRange(One.getLower().sext(20),
+                                One.getUpper().sext(20)));
+  EXPECT_EQ(SSome, ConstantRange(Some.getLower().sext(20),
+                                 Some.getUpper().sext(20)));
+  EXPECT_EQ(SWrap, ConstantRange(APInt(20, (uint64_t)INT16_MIN, true),
+                                 APInt(20, INT16_MAX + 1, true)));
+
+  EXPECT_EQ(ConstantRange(APInt(8, 120), APInt(8, 140)).signExtend(16),
+            ConstantRange(APInt(16, -128), APInt(16, 128)));
 }
 
 TEST_F(ConstantRangeTest, IntersectWith) {
@@ -411,4 +430,11 @@ TEST_F(ConstantRangeTest, Lshr) {
   EXPECT_EQ(Wrap.lshr(Wrap), Full);
 }
 
+TEST(ConstantRange, MakeICmpRegion) {
+  // PR8250
+  ConstantRange SMax = ConstantRange(APInt::getSignedMaxValue(32));
+  EXPECT_TRUE(ConstantRange::makeICmpRegion(ICmpInst::ICMP_SGT,
+                                            SMax).isEmptySet());
+}
+
 }  // anonymous namespace
diff --git a/unittests/Support/EndianTest.cpp b/unittests/Support/EndianTest.cpp
new file mode 100644
index 000000000000..6fe0247d46b0
--- /dev/null
+++ b/unittests/Support/EndianTest.cpp
@@ -0,0 +1,72 @@
+//===- unittests/Support/EndianTest.cpp - Endian.h tests ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/DataTypes.h"
+#include <cstdlib>
+#include <ctime>
+using namespace llvm;
+using namespace support;
+
+#undef max
+
+namespace {
+
+TEST(Endian, Read) {
+  // These are 5 bytes so we can be sure at least one of the reads is unaligned.
+  unsigned char big[] = {0x00, 0x01, 0x02, 0x03, 0x04};
+  unsigned char little[] = {0x00, 0x04, 0x03, 0x02, 0x01};
+  int32_t BigAsHost = 0x00010203;
+  EXPECT_EQ(BigAsHost, (endian::read_be<int32_t, unaligned>(big)));
+  int32_t LittleAsHost = 0x02030400;
+  EXPECT_EQ(LittleAsHost, (endian::read_le<int32_t, unaligned>(little)));
+
+  EXPECT_EQ((endian::read_be<int32_t, unaligned>(big + 1)),
+            (endian::read_le<int32_t, unaligned>(little + 1)));
+}
+
+TEST(Endian, Write) {
+  unsigned char data[5];
+  endian::write_be<int32_t, unaligned>(data, -1362446643);
+  EXPECT_EQ(data[0], 0xAE);
+  EXPECT_EQ(data[1], 0xCA);
+  EXPECT_EQ(data[2], 0xB6);
+  EXPECT_EQ(data[3], 0xCD);
+  endian::write_be<int32_t, unaligned>(data + 1, -1362446643);
+  EXPECT_EQ(data[1], 0xAE);
+  EXPECT_EQ(data[2], 0xCA);
+  EXPECT_EQ(data[3], 0xB6);
+  EXPECT_EQ(data[4], 0xCD);
+
+  endian::write_le<int32_t, unaligned>(data, -1362446643);
+  EXPECT_EQ(data[0], 0xCD);
+  EXPECT_EQ(data[1], 0xB6);
+  EXPECT_EQ(data[2], 0xCA);
+  EXPECT_EQ(data[3], 0xAE);
+  endian::write_le<int32_t, unaligned>(data + 1, -1362446643);
+  EXPECT_EQ(data[1], 0xCD);
+  EXPECT_EQ(data[2], 0xB6);
+  EXPECT_EQ(data[3], 0xCA);
+  EXPECT_EQ(data[4], 0xAE);
+}
+
+TEST(Endian, PackedEndianSpecificIntegral) {
+  // These are 5 bytes so we can be sure at least one of the reads is unaligned.
+  unsigned char big[] = {0x00, 0x01, 0x02, 0x03, 0x04};
+  unsigned char little[] = {0x00, 0x04, 0x03, 0x02, 0x01};
+  big32_t    *big_val    =
+    reinterpret_cast<big32_t *>(big + 1);
+  little32_t *little_val =
+    reinterpret_cast<little32_t *>(little + 1);
+
+  EXPECT_EQ(*big_val, *little_val);
+}
+
+}
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
new file mode 100644
index 000000000000..60d08bc92dbe
--- /dev/null
+++ b/unittests/Support/Path.cpp
@@ -0,0 +1,253 @@
+//===- llvm/unittest/Support/Path.cpp - Path tests ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sys;
+
+#define ASSERT_NO_ERROR(x) \
+  if (error_code ASSERT_NO_ERROR_ec = x) { \
+    SmallString<128> MessageStorage; \
+    raw_svector_ostream Message(MessageStorage); \
+    Message << #x ": did not return errc::success.\n" \
+            << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n" \
+            << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n"; \
+    GTEST_FATAL_FAILURE_(MessageStorage.c_str()); \
+  } else {}
+
+namespace {
+
+TEST(is_separator, Works) {
+  EXPECT_TRUE(path::is_separator('/'));
+  EXPECT_FALSE(path::is_separator('\0'));
+  EXPECT_FALSE(path::is_separator('-'));
+  EXPECT_FALSE(path::is_separator(' '));
+
+#ifdef LLVM_ON_WIN32
+  EXPECT_TRUE(path::is_separator('\\'));
+#else
+  EXPECT_FALSE(path::is_separator('\\'));
+#endif
+}
+
+TEST(Support, Path) {
+  SmallVector<StringRef, 40> paths;
+  paths.push_back("");
+  paths.push_back(".");
+  paths.push_back("..");
+  paths.push_back("foo");
+  paths.push_back("/");
+  paths.push_back("/foo");
+  paths.push_back("foo/");
+  paths.push_back("/foo/");
+  paths.push_back("foo/bar");
+  paths.push_back("/foo/bar");
+  paths.push_back("//net");
+  paths.push_back("//net/foo");
+  paths.push_back("///foo///");
+  paths.push_back("///foo///bar");
+  paths.push_back("/.");
+  paths.push_back("./");
+  paths.push_back("/..");
+  paths.push_back("../");
+  paths.push_back("foo/.");
+  paths.push_back("foo/..");
+  paths.push_back("foo/./");
+  paths.push_back("foo/./bar");
+  paths.push_back("foo/..");
+  paths.push_back("foo/../");
+  paths.push_back("foo/../bar");
+  paths.push_back("c:");
+  paths.push_back("c:/");
+  paths.push_back("c:foo");
+  paths.push_back("c:/foo");
+  paths.push_back("c:foo/");
+  paths.push_back("c:/foo/");
+  paths.push_back("c:/foo/bar");
+  paths.push_back("prn:");
+  paths.push_back("c:\\");
+  paths.push_back("c:foo");
+  paths.push_back("c:\\foo");
+  paths.push_back("c:foo\\");
+  paths.push_back("c:\\foo\\");
+  paths.push_back("c:\\foo/");
+  paths.push_back("c:/foo\\bar");
+
+  for (SmallVector<StringRef, 40>::const_iterator i = paths.begin(),
+                                                  e = paths.end();
+                                                  i != e;
+                                                  ++i) {
+    for (sys::path::const_iterator ci = sys::path::begin(*i),
+                                   ce = sys::path::end(*i);
+                                   ci != ce;
+                                   ++ci) {
+      ASSERT_FALSE(ci->empty());
+    }
+
+#if 0 // Valgrind is whining about this.
+    outs() << "    Reverse Iteration: [";
+    for (sys::path::reverse_iterator ci = sys::path::rbegin(*i),
+                                     ce = sys::path::rend(*i);
+                                     ci != ce;
+                                     ++ci) {
+      outs() << *ci << ',';
+    }
+    outs() << "]\n";
+#endif
+
+    path::has_root_path(*i);
+    path::root_path(*i);
+    path::has_root_name(*i);
+    path::root_name(*i);
+    path::has_root_directory(*i);
+    path::root_directory(*i);
+    path::has_parent_path(*i);
+    path::parent_path(*i);
+    path::has_filename(*i);
+    path::filename(*i);
+    path::has_stem(*i);
+    path::stem(*i);
+    path::has_extension(*i);
+    path::extension(*i);
+    path::is_absolute(*i);
+    path::is_relative(*i);
+
+    SmallString<128> temp_store;
+    temp_store = *i;
+    ASSERT_NO_ERROR(fs::make_absolute(temp_store));
+    temp_store = *i;
+    path::remove_filename(temp_store);
+
+    temp_store = *i;
+    path::replace_extension(temp_store, "ext");
+    StringRef filename(temp_store.begin(), temp_store.size()), stem, ext;
+    stem = path::stem(filename);
+    ext  = path::extension(filename);
+    EXPECT_EQ(*(--sys::path::end(filename)), (stem + ext).str());
+
+    path::native(*i, temp_store);
+  }
+}
+
+class FileSystemTest : public testing::Test {
+protected:
+  /// Unique temporary directory in which all created filesystem entities must
+  /// be placed. It is recursively removed at the end of each test.
+  SmallString<128> TestDirectory;
+
+  virtual void SetUp() {
+    int fd;
+    ASSERT_NO_ERROR(
+      fs::unique_file("file-system-test-%%-%%-%%-%%/test-directory.anchor", fd,
+                      TestDirectory));
+    // We don't care about this specific file.
+    ::close(fd);
+    TestDirectory = path::parent_path(TestDirectory);
+    errs() << "Test Directory: " << TestDirectory << '\n';
+    errs().flush();
+  }
+
+  virtual void TearDown() {
+    uint32_t removed;
+    ASSERT_NO_ERROR(fs::remove_all(TestDirectory.str(), removed));
+  }
+};
+
+TEST_F(FileSystemTest, TempFiles) {
+  // Create a temp file.
+  int FileDescriptor;
+  SmallString<64> TempPath;
+  ASSERT_NO_ERROR(
+    fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath));
+
+  // Make sure it exists.
+  bool TempFileExists;
+  ASSERT_NO_ERROR(sys::fs::exists(Twine(TempPath), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Create another temp tile.
+  int FD2;
+  SmallString<64> TempPath2;
+  ASSERT_NO_ERROR(fs::unique_file("%%-%%-%%-%%.temp", FD2, TempPath2));
+  ASSERT_NE(TempPath.str(), TempPath2.str());
+
+  // Try to copy the first to the second.
+  EXPECT_EQ(
+    fs::copy_file(Twine(TempPath), Twine(TempPath2)), errc::file_exists);
+
+  ::close(FD2);
+  // Try again with the proper options.
+  ASSERT_NO_ERROR(fs::copy_file(Twine(TempPath), Twine(TempPath2),
+                                fs::copy_option::overwrite_if_exists));
+  // Remove Temp2.
+  ASSERT_NO_ERROR(fs::remove(Twine(TempPath2), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Make sure Temp2 doesn't exist.
+  ASSERT_NO_ERROR(fs::exists(Twine(TempPath2), TempFileExists));
+  EXPECT_FALSE(TempFileExists);
+
+  // Create a hard link to Temp1.
+  ASSERT_NO_ERROR(fs::create_hard_link(Twine(TempPath), Twine(TempPath2)));
+  bool equal;
+  ASSERT_NO_ERROR(fs::equivalent(Twine(TempPath), Twine(TempPath2), equal));
+  EXPECT_TRUE(equal);
+
+  // Remove Temp1.
+  ::close(FileDescriptor);
+  ASSERT_NO_ERROR(fs::remove(Twine(TempPath), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Remove the hard link.
+  ASSERT_NO_ERROR(fs::remove(Twine(TempPath2), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Make sure Temp1 doesn't exist.
+  ASSERT_NO_ERROR(fs::exists(Twine(TempPath), TempFileExists));
+  EXPECT_FALSE(TempFileExists);
+}
+
+TEST_F(FileSystemTest, DirectoryIteration) {
+  error_code ec;
+  for (fs::directory_iterator i(".", ec), e; i != e; i.increment(ec))
+    ASSERT_NO_ERROR(ec);
+}
+
+TEST_F(FileSystemTest, Magic) {
+  struct type {
+    const char *filename;
+    const char *magic_str;
+    size_t      magic_str_len;
+  } types [] = {{"magic.archive", "!<arch>\x0A", 8}};
+
+  // Create some files filled with magic.
+  for (type *i = types, *e = types + (sizeof(types) / sizeof(type)); i != e;
+                                                                     ++i) {
+    SmallString<128> file_pathname(TestDirectory);
+    path::append(file_pathname, i->filename);
+    std::string ErrMsg;
+    raw_fd_ostream file(file_pathname.c_str(), ErrMsg,
+                        raw_fd_ostream::F_Binary);
+    ASSERT_FALSE(file.has_error());
+    StringRef magic(i->magic_str, i->magic_str_len);
+    file << magic;
+    file.close();
+    bool res = false;
+    ASSERT_NO_ERROR(fs::has_magic(file_pathname.c_str(), magic, res));
+    EXPECT_TRUE(res);
+  }
+}
+
+} // anonymous namespace
diff --git a/unittests/Support/SwapByteOrderTest.cpp b/unittests/Support/SwapByteOrderTest.cpp
new file mode 100644
index 000000000000..c2a0c2793889
--- /dev/null
+++ b/unittests/Support/SwapByteOrderTest.cpp
@@ -0,0 +1,128 @@
+//===- unittests/Support/SwapByteOrderTest.cpp - swap byte order test -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include <cstdlib>
+#include <ctime>
+using namespace llvm;
+
+#undef max
+
+namespace {
+
+// In these first two tests all of the origional_uintx values are truncated
+// except for 64. We could avoid this, but there's really no point.
+
+TEST(SwapByteOrder, UnsignedRoundTrip) {
+  // The point of the bit twiddling of magic is to test with and without bits
+  // in every byte.
+  uint64_t value = 1;
+  for (std::size_t i = 0; i <= sizeof(value); ++i) {
+    uint8_t origional_uint8 = static_cast<uint8_t>(value);
+    EXPECT_EQ(origional_uint8,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint8)));
+
+    uint16_t origional_uint16 = static_cast<uint16_t>(value);
+    EXPECT_EQ(origional_uint16,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint16)));
+
+    uint32_t origional_uint32 = static_cast<uint32_t>(value);
+    EXPECT_EQ(origional_uint32,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint32)));
+
+    uint64_t origional_uint64 = static_cast<uint64_t>(value);
+    EXPECT_EQ(origional_uint64,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint64)));
+
+    value = (value << 8) | 0x55; // binary 0101 0101.
+  }
+}
+
+TEST(SwapByteOrder, SignedRoundTrip) {
+  // The point of the bit twiddling of magic is to test with and without bits
+  // in every byte.
+  uint64_t value = 1;
+  for (std::size_t i = 0; i <= sizeof(value); ++i) {
+    int8_t origional_int8 = static_cast<int8_t>(value);
+    EXPECT_EQ(origional_int8,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int8)));
+
+    int16_t origional_int16 = static_cast<int16_t>(value);
+    EXPECT_EQ(origional_int16,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int16)));
+
+    int32_t origional_int32 = static_cast<int32_t>(value);
+    EXPECT_EQ(origional_int32,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int32)));
+
+    int64_t origional_int64 = static_cast<int64_t>(value);
+    EXPECT_EQ(origional_int64,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int64)));
+
+    // Test other sign.
+    value *= -1;
+
+    origional_int8 = static_cast<int8_t>(value);
+    EXPECT_EQ(origional_int8,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int8)));
+
+    origional_int16 = static_cast<int16_t>(value);
+    EXPECT_EQ(origional_int16,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int16)));
+
+    origional_int32 = static_cast<int32_t>(value);
+    EXPECT_EQ(origional_int32,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int32)));
+
+    origional_int64 = static_cast<int64_t>(value);
+    EXPECT_EQ(origional_int64,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int64)));
+
+    // Return to normal sign and twiddle.
+    value *= -1;
+    value = (value << 8) | 0x55; // binary 0101 0101.
+  }
+}
+
+TEST(SwapByteOrder, uint8_t) {
+  EXPECT_EQ(uint8_t(0x11), sys::SwapByteOrder(uint8_t(0x11)));
+}
+
+TEST(SwapByteOrder, uint16_t) {
+  EXPECT_EQ(uint16_t(0x1122), sys::SwapByteOrder(uint16_t(0x2211)));
+}
+
+TEST(SwapByteOrder, uint32_t) {
+  EXPECT_EQ(uint32_t(0x11223344), sys::SwapByteOrder(uint32_t(0x44332211)));
+}
+
+TEST(SwapByteOrder, uint64_t) {
+  EXPECT_EQ(uint64_t(0x1122334455667788ULL),
+    sys::SwapByteOrder(uint64_t(0x8877665544332211ULL)));
+}
+
+TEST(SwapByteOrder, int8_t) {
+  EXPECT_EQ(int8_t(0x11), sys::SwapByteOrder(int8_t(0x11)));
+}
+
+TEST(SwapByteOrder, int16_t) {
+  EXPECT_EQ(int16_t(0x1122), sys::SwapByteOrder(int16_t(0x2211)));
+}
+
+TEST(SwapByteOrder, int32_t) {
+  EXPECT_EQ(int32_t(0x11223344), sys::SwapByteOrder(int32_t(0x44332211)));
+}
+
+TEST(SwapByteOrder, int64_t) {
+  EXPECT_EQ(int64_t(0x1122334455667788LL),
+    sys::SwapByteOrder(int64_t(0x8877665544332211LL)));
+}
+
+}
diff --git a/unittests/Support/System.cpp b/unittests/Support/System.cpp
deleted file mode 100644
index b3dd17d380d2..000000000000
--- a/unittests/Support/System.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-//===- llvm/unittest/Support/System.cpp - System tests --===//
-#include "gtest/gtest.h"
-#include "llvm/System/TimeValue.h"
-#include <time.h>
-
-using namespace llvm;
-namespace {
-class SystemTest : public ::testing::Test {
-};
-
-TEST_F(SystemTest, TimeValue) {
-  sys::TimeValue now = sys::TimeValue::now();
-  time_t now_t = time(NULL);
-  EXPECT_TRUE(abs(now_t - now.toEpochTime()) < 2);
-}
-}
diff --git a/unittests/Support/TimeValue.cpp b/unittests/Support/TimeValue.cpp
new file mode 100644
index 000000000000..27883ae33564
--- /dev/null
+++ b/unittests/Support/TimeValue.cpp
@@ -0,0 +1,23 @@
+//===- llvm/unittest/Support/TimeValue.cpp - Time Value tests -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/TimeValue.h"
+#include <time.h>
+
+using namespace llvm;
+namespace {
+
+TEST(Support, TimeValue) {
+  sys::TimeValue now = sys::TimeValue::now();
+  time_t now_t = time(NULL);
+  EXPECT_TRUE(abs(static_cast<long>(now_t - now.toEpochTime())) < 2);
+}
+
+}
diff --git a/unittests/Support/ValueHandleTest.cpp b/unittests/Support/ValueHandleTest.cpp
index ba610ea4ff9a..2e5e5b167c41 100644
--- a/unittests/Support/ValueHandleTest.cpp
+++ b/unittests/Support/ValueHandleTest.cpp
@@ -108,7 +108,7 @@ TEST_F(ValueHandle, WeakVH_NullOnDeletion) {
 TEST_F(ValueHandle, AssertingVH_BasicOperation) {
   AssertingVH<CastInst> AVH(BitcastV.get());
   CastInst *implicit_to_exact_type = AVH;
-  implicit_to_exact_type = implicit_to_exact_type;  // Avoid warning.
+  (void)implicit_to_exact_type;  // Avoid warning.
 
   AssertingVH<Value> GenericAVH(BitcastV.get());
   EXPECT_EQ(BitcastV.get(), GenericAVH);
@@ -125,7 +125,7 @@ TEST_F(ValueHandle, AssertingVH_Const) {
   const CastInst *ConstBitcast = BitcastV.get();
   AssertingVH<const CastInst> AVH(ConstBitcast);
   const CastInst *implicit_to_exact_type = AVH;
-  implicit_to_exact_type = implicit_to_exact_type;  // Avoid warning.
+  (void)implicit_to_exact_type;  // Avoid warning.
 }
 
 TEST_F(ValueHandle, AssertingVH_Comparisons) {
diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp
new file mode 100644
index 000000000000..e969e958a742
--- /dev/null
+++ b/unittests/Transforms/Utils/Local.cpp
@@ -0,0 +1,49 @@
+//===- Local.cpp - Unit tests for Local -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+TEST(Local, RecursivelyDeleteDeadPHINodes) {
+  LLVMContext &C(getGlobalContext());
+
+  IRBuilder<> builder(C);
+
+  // Make blocks
+  BasicBlock *bb0 = BasicBlock::Create(C);
+  BasicBlock *bb1 = BasicBlock::Create(C);
+
+  builder.SetInsertPoint(bb0);
+  PHINode    *phi = builder.CreatePHI(Type::getInt32Ty(C));
+  BranchInst *br0 = builder.CreateCondBr(builder.getTrue(), bb0, bb1);
+
+  builder.SetInsertPoint(bb1);
+  BranchInst *br1 = builder.CreateBr(bb0);
+
+  phi->addIncoming(phi, bb0);
+  phi->addIncoming(phi, bb1);
+
+  // The PHI will be removed
+  EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi));
+
+  // Make sure the blocks only contain the branches
+  EXPECT_EQ(&bb0->front(), br0);
+  EXPECT_EQ(&bb1->front(), br1);
+
+  bb0->dropAllReferences();
+  bb1->dropAllReferences();
+  delete bb0;
+  delete bb1;
+}
diff --git a/unittests/VMCore/ConstantsTest.cpp b/unittests/VMCore/ConstantsTest.cpp
index 8f28407b8dec..8277584ba24d 100644
--- a/unittests/VMCore/ConstantsTest.cpp
+++ b/unittests/VMCore/ConstantsTest.cpp
@@ -109,5 +109,14 @@ TEST(ConstantsTest, IntSigns) {
   EXPECT_EQ(0x3b, ConstantInt::get(Int8Ty, 0x13b)->getSExtValue());
 }
 
+TEST(ConstantsTest, FP128Test) {
+  const Type *FP128Ty = Type::getFP128Ty(getGlobalContext());
+
+  const IntegerType *Int128Ty = Type::getIntNTy(getGlobalContext(), 128);
+  Constant *Zero128 = Constant::getNullValue(Int128Ty);
+  Constant *X = ConstantExpr::getUIToFP(Zero128, FP128Ty);
+  EXPECT_TRUE(isa<ConstantFP>(X));
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index c9fe2a13166f..1d1127d863b8 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -99,23 +99,6 @@ TEST(InstructionsTest, BranchInst) {
 
   EXPECT_EQ(b, b1->op_end());
 
-  // shrink it
-  b1->setUnconditionalDest(bb1);
-
-  // check num operands
-  EXPECT_EQ(b1->getNumOperands(), 1U);
-
-  User::const_op_iterator c(b1->op_begin());
-  EXPECT_NE(c, b1->op_end());
-
-  // check THEN
-  EXPECT_EQ(*c, bb1);
-  EXPECT_EQ(b1->getOperand(0), bb1);
-  EXPECT_EQ(b1->getSuccessor(0), bb1);
-  ++c;
-
-  EXPECT_EQ(c, b1->op_end());
-
   // clean up
   delete b0;
   delete b1;
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index 96ee5b458960..0073751e4cd6 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -32,7 +32,15 @@
 #include "llvm/Assembly/PrintModulePass.h"
 #include "gtest/gtest.h"
 
+using namespace llvm;
+
 namespace llvm {
+  void initializeModuleNDMPass(PassRegistry&);
+  void initializeFPassPass(PassRegistry&);
+  void initializeCGPassPass(PassRegistry&);
+  void initializeLPassPass(PassRegistry&);
+  void initializeBPassPass(PassRegistry&);
+  
   namespace {
     // ND = no deps
     // NM = no modifications
@@ -40,7 +48,7 @@ namespace llvm {
     public:
       static char run;
       static char ID;
-      ModuleNDNM() : ModulePass(ID) {}
+      ModuleNDNM() : ModulePass(ID) { }
       virtual bool runOnModule(Module &M) {
         run++;
         return false;
@@ -64,7 +72,6 @@ namespace llvm {
     };
     char ModuleNDM::ID=0;
     char ModuleNDM::run=0;
-    RegisterPass<ModuleNDM> X("mndm","mndm",false,false);
 
     struct ModuleNDM2 : public ModulePass {
     public:
@@ -83,7 +90,9 @@ namespace llvm {
     public:
       static char run;
       static char ID;
-      ModuleDNM() : ModulePass(ID) {}
+      ModuleDNM() : ModulePass(ID) {
+        initializeModuleNDMPass(*PassRegistry::getPassRegistry());
+      }
       virtual bool runOnModule(Module &M) {
         EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
         run++;
@@ -154,13 +163,15 @@ namespace llvm {
 
     struct CGPass : public PassTest<CallGraph, CallGraphSCCPass> {
     public:
+      CGPass() {
+        initializeCGPassPass(*PassRegistry::getPassRegistry());
+      }
       virtual bool runOnSCC(CallGraphSCC &SCMM) {
         EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
         run();
         return false;
       }
     };
-    RegisterPass<CGPass> X1("cgp","cgp");
 
     struct FPass : public PassTest<Module, FunctionPass> {
     public:
@@ -171,7 +182,6 @@ namespace llvm {
         return false;
       }
     };
-    RegisterPass<FPass> X2("fp","fp");
 
     struct LPass : public PassTestBase<LoopPass> {
     private:
@@ -179,6 +189,7 @@ namespace llvm {
       static int fincount;
     public:
       LPass() {
+        initializeLPassPass(*PassRegistry::getPassRegistry());
         initcount = 0; fincount=0;
         EXPECT_FALSE(initialized);
       }
@@ -205,7 +216,6 @@ namespace llvm {
     };
     int LPass::initcount=0;
     int LPass::fincount=0;
-    RegisterPass<LPass> X3("lp","lp");
 
     struct BPass : public PassTestBase<BasicBlockPass> {
     private:
@@ -248,12 +258,13 @@ namespace llvm {
     };
     int BPass::inited=0;
     int BPass::fin=0;
-    RegisterPass<BPass> X4("bp","bp");
 
     struct OnTheFlyTest: public ModulePass {
     public:
       static char ID;
-      OnTheFlyTest() : ModulePass(ID) {}
+      OnTheFlyTest() : ModulePass(ID) {
+        initializeFPassPass(*PassRegistry::getPassRegistry());
+      }
       virtual bool runOnModule(Module &M) {
         EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
         for (Module::iterator I=M.begin(),E=M.end(); I != E; ++I) {
@@ -525,3 +536,13 @@ namespace llvm {
 
   }
 }
+
+INITIALIZE_PASS(ModuleNDM, "mndm", "mndm", false, false)
+INITIALIZE_PASS_BEGIN(CGPass, "cgp","cgp", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(CGPass, "cgp","cgp", false, false)
+INITIALIZE_PASS(FPass, "fp","fp", false, false)
+INITIALIZE_PASS_BEGIN(LPass, "lp","lp", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LPass, "lp","lp", false, false)
+INITIALIZE_PASS(BPass, "bp","bp", false, false)
diff --git a/unittests/VMCore/ValueMapTest.cpp b/unittests/VMCore/ValueMapTest.cpp
new file mode 100644
index 000000000000..152e8eaaf1f1
--- /dev/null
+++ b/unittests/VMCore/ValueMapTest.cpp
@@ -0,0 +1,294 @@
+//===- llvm/unittest/ADT/ValueMapTest.cpp - ValueMap unit tests -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Config/config.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture
+template<typename T>
+class ValueMapTest : public testing::Test {
+protected:
+  Constant *ConstantV;
+  OwningPtr<BitCastInst> BitcastV;
+  OwningPtr<BinaryOperator> AddV;
+
+  ValueMapTest() :
+    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
+    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))),
+    AddV(BinaryOperator::CreateAdd(ConstantV, ConstantV)) {
+  }
+};
+
+// Run everything on Value*, a subtype to make sure that casting works as
+// expected, and a const subtype to make sure we cast const correctly.
+typedef ::testing::Types<Value, Instruction, const Instruction> KeyTypes;
+TYPED_TEST_CASE(ValueMapTest, KeyTypes);
+
+TYPED_TEST(ValueMapTest, Null) {
+  ValueMap<TypeParam*, int> VM1;
+  VM1[NULL] = 7;
+  EXPECT_EQ(7, VM1.lookup(NULL));
+}
+
+TYPED_TEST(ValueMapTest, FollowsValue) {
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 7;
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(7, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  this->AddV.reset();
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(0U, VM.size());
+}
+
+TYPED_TEST(ValueMapTest, OperationsWork) {
+  ValueMap<TypeParam*, int> VM;
+  ValueMap<TypeParam*, int> VM2(16);  (void)VM2;
+  typename ValueMapConfig<TypeParam*>::ExtraData Data;
+  ValueMap<TypeParam*, int> VM3(Data, 16);  (void)VM3;
+  EXPECT_TRUE(VM.empty());
+
+  VM[this->BitcastV.get()] = 7;
+
+  // Find:
+  typename ValueMap<TypeParam*, int>::iterator I =
+    VM.find(this->BitcastV.get());
+  ASSERT_TRUE(I != VM.end());
+  EXPECT_EQ(this->BitcastV.get(), I->first);
+  EXPECT_EQ(7, I->second);
+  EXPECT_TRUE(VM.find(this->AddV.get()) == VM.end());
+
+  // Const find:
+  const ValueMap<TypeParam*, int> &CVM = VM;
+  typename ValueMap<TypeParam*, int>::const_iterator CI =
+    CVM.find(this->BitcastV.get());
+  ASSERT_TRUE(CI != CVM.end());
+  EXPECT_EQ(this->BitcastV.get(), CI->first);
+  EXPECT_EQ(7, CI->second);
+  EXPECT_TRUE(CVM.find(this->AddV.get()) == CVM.end());
+
+  // Insert:
+  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult1 =
+    VM.insert(std::make_pair(this->AddV.get(), 3));
+  EXPECT_EQ(this->AddV.get(), InsertResult1.first->first);
+  EXPECT_EQ(3, InsertResult1.first->second);
+  EXPECT_TRUE(InsertResult1.second);
+  EXPECT_EQ(true, VM.count(this->AddV.get()));
+  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult2 =
+    VM.insert(std::make_pair(this->AddV.get(), 5));
+  EXPECT_EQ(this->AddV.get(), InsertResult2.first->first);
+  EXPECT_EQ(3, InsertResult2.first->second);
+  EXPECT_FALSE(InsertResult2.second);
+
+  // Erase:
+  VM.erase(InsertResult2.first);
+  EXPECT_EQ(0U, VM.count(this->AddV.get()));
+  EXPECT_EQ(1U, VM.count(this->BitcastV.get()));
+  VM.erase(this->BitcastV.get());
+  EXPECT_EQ(0U, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(0U, VM.size());
+
+  // Range insert:
+  SmallVector<std::pair<Instruction*, int>, 2> Elems;
+  Elems.push_back(std::make_pair(this->AddV.get(), 1));
+  Elems.push_back(std::make_pair(this->BitcastV.get(), 2));
+  VM.insert(Elems.begin(), Elems.end());
+  EXPECT_EQ(1, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(2, VM.lookup(this->BitcastV.get()));
+}
+
+template<typename ExpectedType, typename VarType>
+void CompileAssertHasType(VarType) {
+  typedef char assert[is_same<ExpectedType, VarType>::value ? 1 : -1];
+}
+
+TYPED_TEST(ValueMapTest, Iteration) {
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 2;
+  VM[this->AddV.get()] = 3;
+  size_t size = 0;
+  for (typename ValueMap<TypeParam*, int>::iterator I = VM.begin(), E = VM.end();
+       I != E; ++I) {
+    ++size;
+    std::pair<TypeParam*, int> value = *I; (void)value;
+    CompileAssertHasType<TypeParam*>(I->first);
+    if (I->second == 2) {
+      EXPECT_EQ(this->BitcastV.get(), I->first);
+      I->second = 5;
+    } else if (I->second == 3) {
+      EXPECT_EQ(this->AddV.get(), I->first);
+      I->second = 6;
+    } else {
+      ADD_FAILURE() << "Iterated through an extra value.";
+    }
+  }
+  EXPECT_EQ(2U, size);
+  EXPECT_EQ(5, VM[this->BitcastV.get()]);
+  EXPECT_EQ(6, VM[this->AddV.get()]);
+
+  size = 0;
+  // Cast to const ValueMap to avoid a bug in DenseMap's iterators.
+  const ValueMap<TypeParam*, int>& CVM = VM;
+  for (typename ValueMap<TypeParam*, int>::const_iterator I = CVM.begin(),
+         E = CVM.end(); I != E; ++I) {
+    ++size;
+    std::pair<TypeParam*, int> value = *I;  (void)value;
+    CompileAssertHasType<TypeParam*>(I->first);
+    if (I->second == 5) {
+      EXPECT_EQ(this->BitcastV.get(), I->first);
+    } else if (I->second == 6) {
+      EXPECT_EQ(this->AddV.get(), I->first);
+    } else {
+      ADD_FAILURE() << "Iterated through an extra value.";
+    }
+  }
+  EXPECT_EQ(2U, size);
+}
+
+TYPED_TEST(ValueMapTest, DefaultCollisionBehavior) {
+  // By default, we overwrite the old value with the replaced value.
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 7;
+  VM[this->AddV.get()] = 9;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(9, VM.lookup(this->AddV.get()));
+}
+
+TYPED_TEST(ValueMapTest, ConfiguredCollisionBehavior) {
+  // TODO: Implement this when someone needs it.
+}
+
+template<typename KeyT>
+struct LockMutex : ValueMapConfig<KeyT> {
+  struct ExtraData {
+    sys::Mutex *M;
+    bool *CalledRAUW;
+    bool *CalledDeleted;
+  };
+  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
+    *Data.CalledRAUW = true;
+    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+  }
+  static void onDelete(const ExtraData &Data, KeyT Old) {
+    *Data.CalledDeleted = true;
+    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+  }
+  static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
+};
+#if ENABLE_THREADS
+TYPED_TEST(ValueMapTest, LocksMutex) {
+  sys::Mutex M(false);  // Not recursive.
+  bool CalledRAUW = false, CalledDeleted = false;
+  typename LockMutex<TypeParam*>::ExtraData Data =
+    {&M, &CalledRAUW, &CalledDeleted};
+  ValueMap<TypeParam*, int, LockMutex<TypeParam*> > VM(Data);
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  this->AddV.reset();
+  EXPECT_TRUE(CalledRAUW);
+  EXPECT_TRUE(CalledDeleted);
+}
+#endif
+
+template<typename KeyT>
+struct NoFollow : ValueMapConfig<KeyT> {
+  enum { FollowRAUW = false };
+};
+
+TYPED_TEST(ValueMapTest, NoFollowRAUW) {
+  ValueMap<TypeParam*, int, NoFollow<TypeParam*> > VM;
+  VM[this->BitcastV.get()] = 7;
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  this->AddV.reset();
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  this->BitcastV.reset();
+  EXPECT_EQ(0, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(0U, VM.size());
+}
+
+template<typename KeyT>
+struct CountOps : ValueMapConfig<KeyT> {
+  struct ExtraData {
+    int *Deletions;
+    int *RAUWs;
+  };
+
+  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
+    ++*Data.RAUWs;
+  }
+  static void onDelete(const ExtraData &Data, KeyT Old) {
+    ++*Data.Deletions;
+  }
+};
+
+TYPED_TEST(ValueMapTest, CallsConfig) {
+  int Deletions = 0, RAUWs = 0;
+  typename CountOps<TypeParam*>::ExtraData Data = {&Deletions, &RAUWs};
+  ValueMap<TypeParam*, int, CountOps<TypeParam*> > VM(Data);
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(0, Deletions);
+  EXPECT_EQ(1, RAUWs);
+  this->AddV.reset();
+  EXPECT_EQ(1, Deletions);
+  EXPECT_EQ(1, RAUWs);
+  this->BitcastV.reset();
+  EXPECT_EQ(1, Deletions);
+  EXPECT_EQ(1, RAUWs);
+}
+
+template<typename KeyT>
+struct ModifyingConfig : ValueMapConfig<KeyT> {
+  // We'll put a pointer here back to the ValueMap this key is in, so
+  // that we can modify it (and clobber *this) before the ValueMap
+  // tries to do the same modification.  In previous versions of
+  // ValueMap, that exploded.
+  typedef ValueMap<KeyT, int, ModifyingConfig<KeyT> > **ExtraData;
+
+  static void onRAUW(ExtraData Map, KeyT Old, KeyT New) {
+    (*Map)->erase(Old);
+  }
+  static void onDelete(ExtraData Map, KeyT Old) {
+    (*Map)->erase(Old);
+  }
+};
+TYPED_TEST(ValueMapTest, SurvivesModificationByConfig) {
+  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > *MapAddress;
+  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > VM(&MapAddress);
+  MapAddress = &VM;
+  // Now the ModifyingConfig can modify the Map inside a callback.
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_FALSE(VM.count(this->BitcastV.get()));
+  EXPECT_FALSE(VM.count(this->AddV.get()));
+  VM[this->AddV.get()] = 7;
+  this->AddV.reset();
+  EXPECT_FALSE(VM.count(this->AddV.get()));
+}
+
+}
diff --git a/unittests/VMCore/VerifierTest.cpp b/unittests/VMCore/VerifierTest.cpp
index 1173b2d18f76..1924661200b5 100644
--- a/unittests/VMCore/VerifierTest.cpp
+++ b/unittests/VMCore/VerifierTest.cpp
@@ -10,8 +10,11 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/Verifier.h"
 #include "gtest/gtest.h"
@@ -41,5 +44,21 @@ TEST(VerifierTest, Branch_i1) {
   EXPECT_TRUE(verifyFunction(*F, ReturnStatusAction));
 }
 
+TEST(VerifierTest, AliasUnnamedAddr) {
+  LLVMContext &C = getGlobalContext();
+  Module M("M", C);
+  const Type *Ty = Type::getInt8Ty(C);
+  Constant *Init = Constant::getNullValue(Ty);
+  GlobalVariable *Aliasee = new GlobalVariable(M, Ty, true,
+                                               GlobalValue::ExternalLinkage,
+                                               Init, "foo");
+  GlobalAlias *GA = new GlobalAlias(Type::getInt8PtrTy(C),
+                                    GlobalValue::ExternalLinkage,
+                                    "bar", Aliasee, &M);
+  GA->setUnnamedAddr(true);
+  std::string Error;
+  EXPECT_TRUE(verifyModule(M, ReturnStatusAction, &Error));
+  EXPECT_TRUE(StringRef(Error).startswith("Alias cannot have unnamed_addr"));
+}
 }
 }
diff --git a/utils/CollectDebugInfoUsingLLDB.py b/utils/CollectDebugInfoUsingLLDB.py
new file mode 100755
index 000000000000..4dbd19afa62b
--- /dev/null
+++ b/utils/CollectDebugInfoUsingLLDB.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+
+#----------------------------------------------------------------------
+# 
+# Be sure to add the python path that points to the LLDB shared library.
+# On MacOSX csh, tcsh:
+#   setenv PYTHONPATH /Developer/Library/PrivateFrameworks/LLDB.framework/Resources/Python
+# On MacOSX sh, bash:
+#   export PYTHONPATH=/Developer/Library/PrivateFrameworks/LLDB.framework/Resources/Python
+#
+# This script collect debugging information using LLDB. This script is
+# used by TEST=dbg in llvm testsuite to measure quality of debug info in
+# optimized builds.
+#
+# Usage:
+# export PYTHONPATH=...
+# ./CollectDebugInfUsingLLDB.py program bp_file out_file
+#     program - Executable program with debug info.
+#     bp_file - Simple text file listing breakpoints.
+#               <absolute file name> <line number>
+#     out_file - Output file where the debug info will be emitted.
+#----------------------------------------------------------------------
+
+import lldb
+import os
+import sys
+import time
+
+# AlreadyPrintedValues - A place to keep track of recursive values.
+AlreadyPrintedValues = {}
+
+# ISAlreadyPrinted - Return true if value is already printed.
+def IsAlreadyPrinted(value_name):
+        if AlreadyPrintedValues.get(value_name) is None:
+                AlreadyPrintedValues[value_name] = 1
+                return False
+        return True
+
+
+# print_var_value - Print a variable's value.
+def print_var_value (v, file, frame):
+        if v.IsValid() == False:
+                return
+        if IsAlreadyPrinted(v.GetName()):
+                return
+        total_children = v.GetNumChildren()
+        if total_children > 0:
+            c = 0
+            while (c < total_children) :
+                    child = v.GetChildAtIndex(c)
+                    if child is None:
+                        file.write("None")
+                    else:
+                        if (child.GetName()) is None:
+                                file.write("None")
+                        else:
+                                file.write(child.GetName())
+                                file.write('=')
+                                print_var_value(child, file, frame)
+                                file.write(',')
+                    c = c + 1
+        else:
+            if v.GetValue(frame) is None:
+                file.write("None")
+            else:
+                file.write(v.GetValue(frame))
+
+# print_vars - Print variable values in output file.
+def print_vars (tag, vars, fname, line, file, frame, target, thread):
+    # disable this thread.
+    count = thread.GetStopReasonDataCount()
+    bid = 0
+    tid = 0
+    for i in range(count):
+        id = thread.GetStopReasonDataAtIndex(i)
+        bp = target.FindBreakpointByID(id)
+        if bp.IsValid():
+            if bp.IsEnabled() == True:
+                    bid = bp.GetID()
+                    tid = bp.GetThreadID()
+                    bp.SetEnabled(False)
+        else:
+            bp_loc = bp.FindLocationByID(thread.GetStopReasonDataAtIndex(i+1))
+            if bp_loc.IsValid():
+                bid = bp_loc.GetBreakPoint().GetID()
+                tid = bp_loc.ThreadGetID()
+                bp_loc.SetEnabled(False);
+
+    for i in range(vars.GetSize()):
+            v = vars.GetValueAtIndex(i)
+            if v.GetName() is not None:
+                    file.write(tag)
+                    file.write(fname)
+                    file.write(':')
+                    file.write(str(line))
+                    file.write(' ')
+                    file.write(str(tid))
+                    file.write(':')
+                    file.write(str(bid))
+                    file.write(' ')
+                    file.write(v.GetName())
+                    file.write(' ')
+                    AlreadyPrintedValues.clear()
+                    print_var_value (v, file, frame)
+                    file.write('\n')
+
+# set_breakpoints - set breakpoints as listed in input file.
+def set_breakpoints (target, breakpoint_filename, file):
+    f = open(breakpoint_filename, "r")
+    lines = f.readlines()
+    for l in range(len(lines)):
+        c = lines[l].split()
+        # print "setting break point - ", c
+        bp = target.BreakpointCreateByLocation (str(c[0]), int(c[1]))
+        file.write("#Breakpoint ")
+        file.write(str(c[0]))
+        file.write(':')
+        file.write(str(c[1]))
+        file.write(' ')
+        file.write(str(bp.GetThreadID()))
+        file.write(':')
+        file.write(str(bp.GetID()))
+        file.write('\n')
+    f.close()
+
+# stopeed_at_breakpoint - Return True if process is stopeed at a
+# breakpoint.
+def stopped_at_breakpoint (process):
+    if process.IsValid():
+        state = process.GetState()
+        if state == lldb.eStateStopped:
+                thread = process.GetThreadAtIndex(0)
+                if thread.IsValid():
+                        if thread.GetStopReason() == lldb.eStopReasonBreakpoint:
+                                return True
+    return False
+
+# Create a new debugger instance
+debugger = lldb.SBDebugger.Create()
+
+# When we step or continue, don't return from the function until the process 
+# stops. We do this by setting the async mode to false.
+debugger.SetAsync (False)
+
+# Create a target from a file and arch
+##print "Creating a target for '%s'" % sys.argv[1]
+
+target = debugger.CreateTargetWithFileAndArch (sys.argv[1], lldb.LLDB_ARCH_DEFAULT)
+
+if target.IsValid():
+    #print "target is valid"
+    file=open(str(sys.argv[3]), 'w')    
+    set_breakpoints (target, sys.argv[2], file)
+
+    # Launch the process. Since we specified synchronous mode, we won't return
+    # from this function until we hit the breakpoint at main
+    sberror = lldb.SBError()
+    process = target.Launch (None, None, os.ctermid(), os.ctermid(), os.ctermid(), None, 0, False, sberror)
+    # Make sure the launch went ok
+    while stopped_at_breakpoint(process):
+        thread = process.GetThreadAtIndex (0)
+        frame = thread.GetFrameAtIndex (0)
+        if frame.IsValid():
+            # #Print some simple frame info
+            ##print frame
+            #print "frame is valid"
+            function = frame.GetFunction()
+            if function.IsValid():
+                fname = function.GetMangledName()
+                if fname is None:
+                    fname = function.GetName()
+                #print "function : ",fname
+                line = frame.GetLineEntry().GetLine()
+                vars = frame.GetVariables(1,0,0,0)
+                print_vars ("#Argument ", vars, fname, line, file, frame, target, thread)
+                # vars = frame.GetVariables(0,1,0,0)
+                # print_vars ("#Variables ", vars, fname, line, file, frame, target, thread)
+
+        process.Continue()
+    file.close()
+
+lldb.SBDebugger.Terminate()
diff --git a/utils/CompareDebugInfo.py b/utils/CompareDebugInfo.py
new file mode 100755
index 000000000000..2cd647e43a85
--- /dev/null
+++ b/utils/CompareDebugInfo.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".dbg.out"
+OPT_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".dbg.opt.out"
+LOG_FILE="Output/" + sys.argv[1] + ".log"
+NATIVE_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".native.dbg.out"
+NATIVE_OPT_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".native.dbg.opt.out"
+NATIVE_LOG_FILE="Output/" + sys.argv[1] + ".native.log"
+REPORT_FILE="Output/" + sys.argv[1] + ".dbg.report.html"
+
+class BreakPoint:
+    def __init__(self, bp_name):
+        self.name = bp_name
+        self.values = {}
+        self.missing_args = []
+        self.matching_args = []
+        self.notmatching_args = []
+        self.missing_bp = False
+
+    def setMissing(self):
+        self.missing_bp = True
+
+    def getArgCount(self):
+        return len(self.values)
+
+    def getMissingArgCount(self):
+        if self.missing_bp == True:
+            return len(self.values)
+        return len(self.missing_args)
+
+    def getMatchingArgCount(self):
+        if self.missing_bp == True:
+            return 0
+        return len(self.matching_args)
+
+    def getNotMatchingArgCount(self):
+        if self.missing_bp == True:
+            return 0
+        return len(self.notmatching_args)
+
+    def recordArgument(self, arg_name, value):
+        self.values[arg_name] = value
+        
+    def __repr__(self):
+        print self.name
+        items = self.values.items()
+        for i in range(len(items)):
+            print items[i][0]," = ",items[i][1]
+        return ''
+
+    def compare_args(self, other, file):
+        myitems = self.values.items()
+        otheritems = other.values.items()
+        match = False
+        for i in range(len(myitems)):
+            if i >= len(otheritems):
+                match = True
+                self.missing_args.append(myitems[i][0])
+            elif cmp(myitems[i][1], otheritems[i][1]):
+                match = True
+                self.notmatching_args.append(myitems[i][0])
+            else:
+                self.matching_args.append(myitems[i][0])
+
+        self.print_list(self.matching_args, " Matching arguments ", file)
+        self.print_list(self.notmatching_args, " Not Matching arguments ", file)
+        self.print_list(self.missing_args, " Missing arguments ", file)
+        return match
+
+    def print_list(self, items, txt, pfile):
+        if len(items) == 0:
+            return
+        pfile.write(self.name)
+        pfile.write(txt)
+        for e in items:
+            pfile.write(e)
+            pfile.write(' ')
+        pfile.write('\n')
+
+def read_input(filename, dict):
+    f = open(filename, "r")
+    lines = f.readlines()
+    for l in range(len(lines)):
+        c = lines[l].split()
+        if c[0] == "#Breakpoint":
+            bp = dict.get(c[2])
+            if bp is None:
+                bp = BreakPoint(c[1])
+            dict[c[2]] = bp
+        if c[0] == "#Argument":
+            bp = dict.get(c[2])
+            if bp is None:
+                bp = BreakPoint(c[1])
+            dict[c[2]] = bp
+            bp.recordArgument(c[3], c[4])
+    return
+
+f1_breakpoints = {}
+read_input(DBG_OUTPUT_FILE, f1_breakpoints)
+f1_items = f1_breakpoints.items()
+
+f2_breakpoints = {}
+read_input(OPT_DBG_OUTPUT_FILE, f2_breakpoints)
+f2_items = f2_breakpoints.items()
+    
+f = open(LOG_FILE, "w")
+f.write("Log output\n")
+for f2bp in range(len(f2_items)):
+    id = f2_items[f2bp][0]
+    bp = f2_items[f2bp][1]
+    bp1 = f1_breakpoints.get(id)
+    if bp1 is None:
+        bp.setMissing()
+    else:
+        bp1.compare_args(bp,f)
+f.close()
+
+nf1_breakpoints = {}
+read_input(NATIVE_DBG_OUTPUT_FILE, nf1_breakpoints)
+nf1_items = nf1_breakpoints.items()
+
+nf2_breakpoints = {}
+read_input(NATIVE_OPT_DBG_OUTPUT_FILE, nf2_breakpoints)
+nf2_items = nf2_breakpoints.items()
+    
+nfl = open(NATIVE_LOG_FILE, "w")
+for nf2bp in range(len(nf2_items)):
+    id = nf2_items[nf2bp][0]
+    bp = nf2_items[nf2bp][1]
+    bp1 = nf1_breakpoints.get(id)
+    if bp1 is None:
+        bp.setMissing()
+    else:
+        bp1.compare_args(bp,nfl)
+nfl.close()
+
+f1_arg_count = 0
+f1_matching_arg_count = 0
+f1_notmatching_arg_count = 0
+f1_missing_arg_count = 0
+for idx in range(len(f1_items)):
+    bp = f1_items[idx][1]
+    f1_arg_count = f1_arg_count + bp.getArgCount()
+    f1_matching_arg_count = f1_matching_arg_count + bp.getMatchingArgCount()
+    f1_notmatching_arg_count = f1_notmatching_arg_count + bp.getNotMatchingArgCount()
+    f1_missing_arg_count = f1_missing_arg_count + bp.getMissingArgCount()
+
+nf1_arg_count = 0
+nf1_matching_arg_count = 0
+nf1_notmatching_arg_count = 0
+nf1_missing_arg_count = 0
+for idx in range(len(nf1_items)):
+    bp = nf1_items[idx][1]
+    nf1_arg_count = nf1_arg_count + bp.getArgCount()
+    nf1_matching_arg_count = nf1_matching_arg_count + bp.getMatchingArgCount()
+    nf1_notmatching_arg_count = nf1_notmatching_arg_count + bp.getNotMatchingArgCount()
+    nf1_missing_arg_count = nf1_missing_arg_count + bp.getMissingArgCount()
+
+rf = open(REPORT_FILE, "w")
+rf.write("<tr><td>")
+rf.write(str(sys.argv[1]))
+rf.write("</td><td>|</td><td>")
+rf.write(str(nf1_arg_count))
+rf.write("</td><td><b>")
+rf.write(str(nf1_matching_arg_count))
+rf.write("</b></td><td>")
+rf.write(str(nf1_notmatching_arg_count))
+rf.write("</td><td>")
+rf.write(str(nf1_missing_arg_count))
+rf.write("</td><td>|</td><td>")
+rf.write(str(f1_arg_count))
+rf.write("</td><td><b>")
+rf.write(str(f1_matching_arg_count))
+rf.write("</b></td><td>")
+rf.write(str(f1_notmatching_arg_count))
+rf.write("</td><td>")
+rf.write(str(f1_missing_arg_count))
+rf.write("\n")
+rf.close()
diff --git a/utils/FileCheck/CMakeLists.txt b/utils/FileCheck/CMakeLists.txt
index 8fee03fb57b0..54db453e70e8 100644
--- a/utils/FileCheck/CMakeLists.txt
+++ b/utils/FileCheck/CMakeLists.txt
@@ -2,7 +2,7 @@ add_executable(FileCheck
   FileCheck.cpp
   )
 
-target_link_libraries(FileCheck LLVMSupport LLVMSystem)
+target_link_libraries(FileCheck LLVMSupport)
 if( MINGW )
   target_link_libraries(FileCheck imagehlp psapi)
 endif( MINGW )
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index cd76d4404308..5d4cb0c0c5f0 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -16,13 +16,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include <algorithm>
@@ -50,6 +52,10 @@ NoCanonicalizeWhiteSpace("strict-whitespace",
 class Pattern {
   SMLoc PatternLoc;
 
+  /// MatchEOF - When set, this pattern only matches the end of file. This is
+  /// used for trailing CHECK-NOTs.
+  bool MatchEOF;
+
   /// FixedStr - If non-empty, this pattern is a fixed string match with the
   /// specified fixed string.
   StringRef FixedStr;
@@ -71,7 +77,7 @@ class Pattern {
 
 public:
 
-  Pattern() { }
+  Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
 
   bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
 
@@ -271,6 +277,12 @@ bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
 /// there is a match, the size of the matched string is returned in MatchLen.
 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
                       StringMap<StringRef> &VariableTable) const {
+  // If this is the EOF pattern, match it immediately.
+  if (MatchEOF) {
+    MatchLen = 0;
+    return Buffer.size();
+  }
+
   // If this is a fixed string pattern, just match it now.
   if (!FixedStr.empty()) {
     MatchLen = FixedStr.size();
@@ -446,6 +458,11 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
 
   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
        Ptr != End; ++Ptr) {
+    // Eliminate trailing dosish \r.
+    if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
+      continue;
+    }
+
     // If C is not a horizontal whitespace, skip it.
     if (*Ptr != ' ' && *Ptr != '\t') {
       NewFile.push_back(*Ptr);
@@ -473,14 +490,14 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
 static bool ReadCheckFile(SourceMgr &SM,
                           std::vector<CheckString> &CheckStrings) {
   // Open the check file, and tell SourceMgr about it.
-  std::string ErrorStr;
-  MemoryBuffer *F =
-    MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
-  if (F == 0) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) {
     errs() << "Could not open check file '" << CheckFilename << "': "
-           << ErrorStr << '\n';
+           << ec.message() << '\n';
     return true;
   }
+  MemoryBuffer *F = File.take();
 
   // If we want to canonicalize whitespace, strip excess whitespace from the
   // buffer containing the CHECK lines.
@@ -565,18 +582,20 @@ static bool ReadCheckFile(SourceMgr &SM,
     std::swap(NotMatches, CheckStrings.back().NotStrings);
   }
 
+  // Add an EOF pattern for any trailing CHECK-NOTs.
+  if (!NotMatches.empty()) {
+    CheckStrings.push_back(CheckString(Pattern(true),
+                                       SMLoc::getFromPointer(Buffer.data()),
+                                       false));
+    std::swap(NotMatches, CheckStrings.back().NotStrings);
+  }
+
   if (CheckStrings.empty()) {
     errs() << "error: no check strings found with prefix '" << CheckPrefix
            << ":'\n";
     return true;
   }
 
-  if (!NotMatches.empty()) {
-    errs() << "error: '" << CheckPrefix
-           << "-NOT:' not supported after last check line.\n";
-    return true;
-  }
-
   return false;
 }
 
@@ -631,15 +650,20 @@ int main(int argc, char **argv) {
     return 2;
 
   // Open the file to check and add it to SourceMgr.
-  std::string ErrorStr;
-  MemoryBuffer *F =
-    MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
-  if (F == 0) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
     errs() << "Could not open input file '" << InputFilename << "': "
-           << ErrorStr << '\n';
+           << ec.message() << '\n';
     return true;
   }
+  MemoryBuffer *F = File.take();
 
+  if (F->getBufferSize() == 0) {
+    errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
+    return 1;
+  }
+  
   // Remove duplicate spaces in the input file if requested.
   if (!NoCanonicalizeWhiteSpace)
     F = CanonicalizeInputFile(F);
@@ -662,10 +686,11 @@ int main(int argc, char **argv) {
 
     // Find StrNo in the file.
     size_t MatchLen = 0;
-    Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable));
+    size_t MatchPos = CheckStr.Pat.Match(Buffer, MatchLen, VariableTable);
+    Buffer = Buffer.substr(MatchPos);
 
     // If we didn't find a match, reject the input.
-    if (Buffer.empty()) {
+    if (MatchPos == StringRef::npos) {
       PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
       return 1;
     }
diff --git a/utils/FileCheck/Makefile b/utils/FileCheck/Makefile
index f1af5b649e7a..268b7bc919a1 100644
--- a/utils/FileCheck/Makefile
+++ b/utils/FileCheck/Makefile
@@ -1,15 +1,15 @@
 ##===- utils/FileCheck/Makefile ----------------------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
 TOOLNAME = FileCheck
-USEDLIBS = LLVMSupport.a LLVMSystem.a
+USEDLIBS = LLVMSupport.a
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/utils/FileUpdate/CMakeLists.txt b/utils/FileUpdate/CMakeLists.txt
index bacbd16b90f9..5dda49e0e4c5 100644
--- a/utils/FileUpdate/CMakeLists.txt
+++ b/utils/FileUpdate/CMakeLists.txt
@@ -2,7 +2,7 @@ add_executable(FileUpdate
   FileUpdate.cpp
   )
 
-target_link_libraries(FileUpdate LLVMSupport LLVMSystem)
+target_link_libraries(FileUpdate LLVMSupport)
 if( MINGW )
   target_link_libraries(FileUpdate imagehlp psapi)
 endif( MINGW )
diff --git a/utils/FileUpdate/FileUpdate.cpp b/utils/FileUpdate/FileUpdate.cpp
index 2cf366fa55f8..3ea1e4f306ee 100644
--- a/utils/FileUpdate/FileUpdate.cpp
+++ b/utils/FileUpdate/FileUpdate.cpp
@@ -15,9 +15,11 @@
 
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -42,17 +44,16 @@ int main(int argc, char **argv) {
   }
 
   // Get the input data.
-  std::string ErrorStr;
-  MemoryBuffer *In =
-    MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
-  if (In == 0) {
+  OwningPtr<MemoryBuffer> In;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), In)) {
     errs() << argv[0] << ": error: Unable to get input '"
-           << InputFilename << "': " << ErrorStr << '\n';
+           << InputFilename << "': " << ec.message() << '\n';
     return 1;
   }
 
   // Get the output data.
-  MemoryBuffer *Out = MemoryBuffer::getFile(OutputFilename.c_str(), &ErrorStr);
+  OwningPtr<MemoryBuffer> Out;
+  MemoryBuffer::getFile(OutputFilename.c_str(), Out);
 
   // If the output exists and the contents match, we are done.
   if (Out && In->getBufferSize() == Out->getBufferSize() &&
@@ -64,12 +65,11 @@ int main(int argc, char **argv) {
     return 0;
   }
 
-  delete Out;
-
   // Otherwise, overwrite the output.
   if (!Quiet)
     errs() << argv[0] << ": Updating '" << OutputFilename
            << "', contents changed.\n";
+  std::string ErrorStr;
   tool_output_file OutStream(OutputFilename.c_str(), ErrorStr,
                              raw_fd_ostream::F_Binary);
   if (!ErrorStr.empty()) {
diff --git a/utils/FileUpdate/Makefile b/utils/FileUpdate/Makefile
index 5b545c207297..1e6c0a838c27 100644
--- a/utils/FileUpdate/Makefile
+++ b/utils/FileUpdate/Makefile
@@ -1,15 +1,15 @@
 ##===- utils/FileUpdate/Makefile ---------------------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
 TOOLNAME = FileUpdate
-USEDLIBS = LLVMSupport.a LLVMSystem.a
+USEDLIBS = LLVMSupport.a
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/utils/GenLibDeps.pl b/utils/GenLibDeps.pl
index f1f7e72bc1a5..ca852adfcc0d 100755
--- a/utils/GenLibDeps.pl
+++ b/utils/GenLibDeps.pl
@@ -107,7 +107,6 @@ if ($PEROBJ) {
     $libpath =~ s/^TransformUtils/Transforms\/Utils/;
     $libpath =~ s/^ipa/Analysis\/IPA/;
     $libpath =~ s/^ipo/Transforms\/IPO/;
-    $libpath =~ s/^pic16passes/Target\/PIC16\/PIC16Passes/;
     $libpath = "lib/".$libpath."/";
     open DEFS, "$nmPath -sg $Directory/$lib|";
     while (<DEFS>) {
@@ -150,7 +149,6 @@ if ($PEROBJ) {
     $libpath =~ s/^TransformUtils/Transforms\/Utils/;
     $libpath =~ s/^ipa/Analysis\/IPA/;
     $libpath =~ s/^ipo/Transforms\/IPO/;
-    $libpath =~ s/^pic16passes/Target\/PIC16\/PIC16Passes/;
     $libpath = "lib/".$libpath."/";
     open UDEFS, "$nmPath -Aup $Directory/$lib|";
     while (<UDEFS>) {
diff --git a/utils/GetRepositoryPath b/utils/GetRepositoryPath
new file mode 100755
index 000000000000..326231c9e5d4
--- /dev/null
+++ b/utils/GetRepositoryPath
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+usage() {
+  echo "usage: $0 <source root>"
+  echo "  Prints the source control repository path of the given source"
+  echo "  directory, the exact format of the revision string depends on the"
+  echo "  source control system. If the source control system isn't known,"
+  echo "  the output is empty and the exit code is 1."
+  exit 1
+}
+
+if [ $# != 1 ] || [ ! -d $1 ]; then
+  usage;
+fi
+
+cd $1
+if [ -d .svn ]; then
+  svn info | grep 'URL:' | cut -d: -f2-
+elif [ -d .git/svn ]; then
+  git svn info | grep 'URL:' | cut -d: -f2-
+elif [ -d .git ]; then
+  git remote -v | grep 'fetch' | awk '{ print $2 }'
+else
+  exit 1;
+fi
+
+exit 0
diff --git a/utils/GetSourceVersion b/utils/GetSourceVersion
index b25f2f97736b..cbed7daf5b61 100755
--- a/utils/GetSourceVersion
+++ b/utils/GetSourceVersion
@@ -1,12 +1,12 @@
 #!/bin/sh
 
 usage() {
-   echo "usage: $0 <source root>"
-   echo "  Prints the source control revision of the given source directory,"
-   echo "  the exact format of the revision string depends on the source "
-   echo "  control system. If the source control system isn't known, the output"
-   echo "  is empty and the exit code is 1."
-   exit 1
+  echo "usage: $0 <source root>"
+  echo "  Prints the source control revision of the given source directory,"
+  echo "  the exact format of the revision string depends on the source "
+  echo "  control system. If the source control system isn't known, the output"
+  echo "  is empty and the exit code is 1."
+  exit 1
 }
 
 if [ $# != 1 ] || [ ! -d $1 ]; then
@@ -15,13 +15,13 @@ fi
 
 cd $1
 if [ -d .svn ]; then
-   svnversion
+  svnversion | sed -e "s#\([0-9]*\)[A-Z]*#\1#"
 elif [ -d .git/svn ]; then
-   git svn info | grep 'Revision:' | cut -d: -f2-
+  git svn info | grep 'Revision:' | cut -d: -f2-
 elif [ -d .git ]; then
-   git log -1 --pretty=format:%H
+  git log -1 --pretty=format:%H
 else
-   exit 1;
+  exit 1;
 fi
 
 exit 0
diff --git a/utils/KillTheDoctor/CMakeLists.txt b/utils/KillTheDoctor/CMakeLists.txt
new file mode 100644
index 000000000000..99c671e74af9
--- /dev/null
+++ b/utils/KillTheDoctor/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_executable(KillTheDoctor
+  KillTheDoctor.cpp
+  )
+
+target_link_libraries(KillTheDoctor LLVMSupport)
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
new file mode 100644
index 000000000000..7a89dd379b70
--- /dev/null
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -0,0 +1,596 @@
+//===- KillTheDoctor - Prevent Dr. Watson from stopping tests ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program provides an extremely hacky way to stop Dr. Watson from starting
+// due to unhandled exceptions in child processes.
+//
+// This simply starts the program named in the first positional argument with
+// the arguments following it under a debugger. All this debugger does is catch
+// any unhandled exceptions thrown in the child process and close the program
+// (and hopefully tells someone about it).
+//
+// This also provides another really hacky method to prevent assert dialog boxes
+// from popping up. When --no-user32 is passed, if any process loads user32.dll,
+// we assume it is trying to call MessageBoxEx and terminate it. The proper way
+// to do this would be to actually set a break point, but there's quite a bit
+// of code involved to get the address of MessageBoxEx in the remote process's
+// address space due to Address space layout randomization (ASLR). This can be
+// added if it's ever actually needed.
+//
+// If the subprocess exits for any reason other than successful termination, -1
+// is returned. If the process exits normally the value it returned is returned.
+//
+// I hate Windows.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/type_traits.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <cerrno>
+#include <cstdlib>
+#include <map>
+#include <string>
+#include <Windows.h>
+#include <WinError.h>
+#include <Dbghelp.h>
+#include <psapi.h>
+using namespace llvm;
+
+#undef max
+
+namespace {
+  cl::opt<std::string> ProgramToRun(cl::Positional,
+    cl::desc("<program to run>"));
+  cl::list<std::string>  Argv(cl::ConsumeAfter,
+    cl::desc("<program arguments>..."));
+  cl::opt<bool> TraceExecution("x",
+    cl::desc("Print detailed output about what is being run to stderr."));
+  cl::opt<unsigned> Timeout("t", cl::init(0),
+    cl::desc("Set maximum runtime in seconds. Defaults to infinite."));
+  cl::opt<bool> NoUser32("no-user32",
+    cl::desc("Terminate process if it loads user32.dll."));
+
+  StringRef ToolName;
+
+  template <typename HandleType>
+  class ScopedHandle {
+    typedef typename HandleType::handle_type handle_type;
+
+    handle_type Handle;
+
+  public:
+    ScopedHandle()
+      : Handle(HandleType::GetInvalidHandle()) {}
+
+    explicit ScopedHandle(handle_type handle)
+      : Handle(handle) {}
+
+    ~ScopedHandle() {
+      HandleType::Destruct(Handle);
+    }
+
+    ScopedHandle& operator=(handle_type handle) {
+      // Cleanup current handle.
+      if (!HandleType::isValid(Handle))
+        HandleType::Destruct(Handle);
+      Handle = handle;
+      return *this;
+    }
+
+    operator bool() const {
+      return HandleType::isValid(Handle);
+    }
+
+    operator handle_type() {
+      return Handle;
+    }
+  };
+
+  // This implements the most common handle in the Windows API.
+  struct CommonHandle {
+    typedef HANDLE handle_type;
+
+    static handle_type GetInvalidHandle() {
+      return INVALID_HANDLE_VALUE;
+    }
+
+    static void Destruct(handle_type Handle) {
+      ::CloseHandle(Handle);
+    }
+
+    static bool isValid(handle_type Handle) {
+      return Handle != GetInvalidHandle();
+    }
+  };
+
+  struct FileMappingHandle {
+    typedef HANDLE handle_type;
+
+    static handle_type GetInvalidHandle() {
+      return NULL;
+    }
+
+    static void Destruct(handle_type Handle) {
+      ::CloseHandle(Handle);
+    }
+
+    static bool isValid(handle_type Handle) {
+      return Handle != GetInvalidHandle();
+    }
+  };
+
+  struct MappedViewOfFileHandle {
+    typedef LPVOID handle_type;
+
+    static handle_type GetInvalidHandle() {
+      return NULL;
+    }
+
+    static void Destruct(handle_type Handle) {
+      ::UnmapViewOfFile(Handle);
+    }
+
+    static bool isValid(handle_type Handle) {
+      return Handle != GetInvalidHandle();
+    }
+  };
+
+  struct ProcessHandle : CommonHandle {};
+  struct ThreadHandle  : CommonHandle {};
+  struct TokenHandle   : CommonHandle {};
+  struct FileHandle    : CommonHandle {};
+
+  typedef ScopedHandle<FileMappingHandle>       FileMappingScopedHandle;
+  typedef ScopedHandle<MappedViewOfFileHandle>  MappedViewOfFileScopedHandle;
+  typedef ScopedHandle<ProcessHandle>           ProcessScopedHandle;
+  typedef ScopedHandle<ThreadHandle>            ThreadScopedHandle;
+  typedef ScopedHandle<TokenHandle>             TokenScopedHandle;
+  typedef ScopedHandle<FileHandle>              FileScopedHandle;
+}
+
+static error_code GetFileNameFromHandle(HANDLE FileHandle,
+                                        std::string& Name) {
+  char Filename[MAX_PATH+1];
+  bool Sucess = false;
+  Name.clear();
+
+  // Get the file size.
+  LARGE_INTEGER FileSize;
+  Sucess = ::GetFileSizeEx(FileHandle, &FileSize);
+
+  if (!Sucess)
+    return windows_error(::GetLastError());
+
+  // Create a file mapping object.
+  FileMappingScopedHandle FileMapping(
+    ::CreateFileMappingA(FileHandle,
+                         NULL,
+                         PAGE_READONLY,
+                         0,
+                         1,
+                         NULL));
+
+  if (!FileMapping)
+    return windows_error(::GetLastError());
+
+  // Create a file mapping to get the file name.
+  MappedViewOfFileScopedHandle MappedFile(
+    ::MapViewOfFile(FileMapping, FILE_MAP_READ, 0, 0, 1));
+
+  if (!MappedFile)
+    return windows_error(::GetLastError());
+
+  Sucess = ::GetMappedFileNameA(::GetCurrentProcess(),
+                                MappedFile,
+                                Filename,
+                                array_lengthof(Filename) - 1);
+
+  if (!Sucess)
+    return windows_error(::GetLastError());
+  else {
+    Name = Filename;
+    return windows_error::success;
+  }
+}
+
+static std::string QuoteProgramPathIfNeeded(StringRef Command) {
+  if (Command.find_first_of(' ') == StringRef::npos)
+    return Command;
+  else {
+    std::string ret;
+    ret.reserve(Command.size() + 3);
+    ret.push_back('"');
+    ret.append(Command.begin(), Command.end());
+    ret.push_back('"');
+    return ret;
+  }
+}
+
+/// @brief Find program using shell lookup rules.
+/// @param Program This is either an absolute path, relative path, or simple a
+///        program name. Look in PATH for any programs that match. If no
+///        extension is present, try all extensions in PATHEXT.
+/// @return If ec == errc::success, The absolute path to the program. Otherwise
+///         the return value is undefined.
+static std::string FindProgram(const std::string &Program, error_code &ec) {
+  char PathName[MAX_PATH + 1];
+  typedef SmallVector<StringRef, 12> pathext_t;
+  pathext_t pathext;
+  // Check for the program without an extension (in case it already has one).
+  pathext.push_back("");
+  SplitString(std::getenv("PATHEXT"), pathext, ";");
+
+  for (pathext_t::iterator i = pathext.begin(), e = pathext.end(); i != e; ++i){
+    SmallString<5> ext;
+    for (std::size_t ii = 0, e = i->size(); ii != e; ++ii)
+      ext.push_back(::tolower((*i)[ii]));
+    LPCSTR Extension = NULL;
+    if (ext.size() && ext[0] == '.')
+      Extension = ext.c_str();
+    DWORD length = ::SearchPathA(NULL,
+                                 Program.c_str(),
+                                 Extension,
+                                 array_lengthof(PathName),
+                                 PathName,
+                                 NULL);
+    if (length == 0)
+      ec = windows_error(::GetLastError());
+    else if (length > array_lengthof(PathName)) {
+      // This may have been the file, return with error.
+      ec = windows_error::buffer_overflow;
+      break;
+    } else {
+      // We found the path! Return it.
+      ec = windows_error::success;
+      break;
+    }
+  }
+
+  // Make sure PathName is valid.
+  PathName[MAX_PATH] = 0;
+  return PathName;
+}
+
+static error_code EnableDebugPrivileges() {
+  HANDLE TokenHandle;
+  BOOL success = ::OpenProcessToken(::GetCurrentProcess(),
+                                    TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
+                                    &TokenHandle);
+  if (!success)
+    return windows_error(::GetLastError());
+
+  TokenScopedHandle Token(TokenHandle);
+  TOKEN_PRIVILEGES  TokenPrivileges;
+  LUID              LocallyUniqueID;
+
+  success = ::LookupPrivilegeValueA(NULL,
+                                    SE_DEBUG_NAME,
+                                    &LocallyUniqueID);
+  if (!success)
+    return windows_error(::GetLastError());
+
+  TokenPrivileges.PrivilegeCount = 1;
+  TokenPrivileges.Privileges[0].Luid = LocallyUniqueID;
+  TokenPrivileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+  success = ::AdjustTokenPrivileges(Token,
+                                    FALSE,
+                                    &TokenPrivileges,
+                                    sizeof(TOKEN_PRIVILEGES),
+                                    NULL,
+                                    NULL);
+  // The value of success is basically useless. Either way we are just returning
+  // the value of ::GetLastError().
+  return windows_error(::GetLastError());
+}
+
+static StringRef ExceptionCodeToString(DWORD ExceptionCode) {
+  switch(ExceptionCode) {
+  case EXCEPTION_ACCESS_VIOLATION: return "EXCEPTION_ACCESS_VIOLATION";
+  case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+    return "EXCEPTION_ARRAY_BOUNDS_EXCEEDED";
+  case EXCEPTION_BREAKPOINT: return "EXCEPTION_BREAKPOINT";
+  case EXCEPTION_DATATYPE_MISALIGNMENT:
+    return "EXCEPTION_DATATYPE_MISALIGNMENT";
+  case EXCEPTION_FLT_DENORMAL_OPERAND: return "EXCEPTION_FLT_DENORMAL_OPERAND";
+  case EXCEPTION_FLT_DIVIDE_BY_ZERO: return "EXCEPTION_FLT_DIVIDE_BY_ZERO";
+  case EXCEPTION_FLT_INEXACT_RESULT: return "EXCEPTION_FLT_INEXACT_RESULT";
+  case EXCEPTION_FLT_INVALID_OPERATION:
+    return "EXCEPTION_FLT_INVALID_OPERATION";
+  case EXCEPTION_FLT_OVERFLOW: return "EXCEPTION_FLT_OVERFLOW";
+  case EXCEPTION_FLT_STACK_CHECK: return "EXCEPTION_FLT_STACK_CHECK";
+  case EXCEPTION_FLT_UNDERFLOW: return "EXCEPTION_FLT_UNDERFLOW";
+  case EXCEPTION_ILLEGAL_INSTRUCTION: return "EXCEPTION_ILLEGAL_INSTRUCTION";
+  case EXCEPTION_IN_PAGE_ERROR: return "EXCEPTION_IN_PAGE_ERROR";
+  case EXCEPTION_INT_DIVIDE_BY_ZERO: return "EXCEPTION_INT_DIVIDE_BY_ZERO";
+  case EXCEPTION_INT_OVERFLOW: return "EXCEPTION_INT_OVERFLOW";
+  case EXCEPTION_INVALID_DISPOSITION: return "EXCEPTION_INVALID_DISPOSITION";
+  case EXCEPTION_NONCONTINUABLE_EXCEPTION:
+    return "EXCEPTION_NONCONTINUABLE_EXCEPTION";
+  case EXCEPTION_PRIV_INSTRUCTION: return "EXCEPTION_PRIV_INSTRUCTION";
+  case EXCEPTION_SINGLE_STEP: return "EXCEPTION_SINGLE_STEP";
+  case EXCEPTION_STACK_OVERFLOW: return "EXCEPTION_STACK_OVERFLOW";
+  default: return "<unknown>";
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  ToolName = argv[0];
+
+  cl::ParseCommandLineOptions(argc, argv, "Dr. Watson Assassin.\n");
+  if (ProgramToRun.size() == 0) {
+    cl::PrintHelpMessage();
+    return -1;
+  }
+
+  if (Timeout > std::numeric_limits<uint32_t>::max() / 1000) {
+    errs() << ToolName << ": Timeout value too large, must be less than: "
+                       << std::numeric_limits<uint32_t>::max() / 1000
+                       << '\n';
+    return -1;
+  }
+
+  std::string CommandLine(ProgramToRun);
+
+  error_code ec;
+  ProgramToRun = FindProgram(ProgramToRun, ec);
+  if (ec) {
+    errs() << ToolName << ": Failed to find program: '" << CommandLine
+           << "': " << ec.message() << '\n';
+    return -1;
+  }
+
+  if (TraceExecution)
+    errs() << ToolName << ": Found Program: " << ProgramToRun << '\n';
+
+  for (std::vector<std::string>::iterator i = Argv.begin(),
+                                          e = Argv.end();
+                                          i != e; ++i) {
+    CommandLine.push_back(' ');
+    CommandLine.append(*i);
+  }
+
+  if (TraceExecution)
+    errs() << ToolName << ": Program Image Path: " << ProgramToRun << '\n'
+           << ToolName << ": Command Line: " << CommandLine << '\n';
+
+  STARTUPINFO StartupInfo;
+  PROCESS_INFORMATION ProcessInfo;
+  std::memset(&StartupInfo, 0, sizeof(StartupInfo));
+  StartupInfo.cb = sizeof(StartupInfo);
+  std::memset(&ProcessInfo, 0, sizeof(ProcessInfo));
+
+  // Set error mode to not display any message boxes. The child process inherits
+  // this.
+  ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
+  ::_set_error_mode(_OUT_TO_STDERR);
+
+  BOOL success = ::CreateProcessA(ProgramToRun.c_str(),
+                            LPSTR(CommandLine.c_str()),
+                                  NULL,
+                                  NULL,
+                                  FALSE,
+                                  DEBUG_PROCESS,
+                                  NULL,
+                                  NULL,
+                                  &StartupInfo,
+                                  &ProcessInfo);
+  if (!success) {
+    errs() << ToolName << ": Failed to run program: '" << ProgramToRun
+           << "': " << error_code(windows_error(::GetLastError())).message()
+           << '\n';
+    return -1;
+  }
+
+  // Make sure ::CloseHandle is called on exit.
+  std::map<DWORD, HANDLE> ProcessIDToHandle;
+
+  DEBUG_EVENT DebugEvent;
+  std::memset(&DebugEvent, 0, sizeof(DebugEvent));
+  DWORD dwContinueStatus = DBG_CONTINUE;
+
+  // Run the program under the debugger until either it exits, or throws an
+  // exception.
+  if (TraceExecution)
+    errs() << ToolName << ": Debugging...\n";
+
+  while(true) {
+    DWORD TimeLeft = INFINITE;
+    if (Timeout > 0) {
+      FILETIME CreationTime, ExitTime, KernelTime, UserTime;
+      ULARGE_INTEGER a, b;
+      success = ::GetProcessTimes(ProcessInfo.hProcess,
+                                  &CreationTime,
+                                  &ExitTime,
+                                  &KernelTime,
+                                  &UserTime);
+      if (!success) {
+        ec = windows_error(::GetLastError());
+
+        errs() << ToolName << ": Failed to get process times: "
+               << ec.message() << '\n';
+        return -1;
+      }
+      a.LowPart = KernelTime.dwLowDateTime;
+      a.HighPart = KernelTime.dwHighDateTime;
+      b.LowPart = UserTime.dwLowDateTime;
+      b.HighPart = UserTime.dwHighDateTime;
+      // Convert 100-nanosecond units to milliseconds.
+      uint64_t TotalTimeMiliseconds = (a.QuadPart + b.QuadPart) / 10000;
+      // Handle the case where the process has been running for more than 49
+      // days.
+      if (TotalTimeMiliseconds > std::numeric_limits<uint32_t>::max()) {
+        errs() << ToolName << ": Timeout Failed: Process has been running for"
+                              "more than 49 days.\n";
+        return -1;
+      }
+
+      // We check with > instead of using Timeleft because if
+      // TotalTimeMiliseconds is greater than Timeout * 1000, TimeLeft would
+      // underflow.
+      if (TotalTimeMiliseconds > (Timeout * 1000)) {
+        errs() << ToolName << ": Process timed out.\n";
+        ::TerminateProcess(ProcessInfo.hProcess, -1);
+        // Otherwise other stuff starts failing...
+        return -1;
+      }
+
+      TimeLeft = (Timeout * 1000) - static_cast<uint32_t>(TotalTimeMiliseconds);
+    }
+    success = WaitForDebugEvent(&DebugEvent, TimeLeft);
+
+    if (!success) {
+      ec = windows_error(::GetLastError());
+
+      if (ec == errc::timed_out) {
+        errs() << ToolName << ": Process timed out.\n";
+        ::TerminateProcess(ProcessInfo.hProcess, -1);
+        // Otherwise other stuff starts failing...
+        return -1;
+      }
+
+      errs() << ToolName << ": Failed to wait for debug event in program: '"
+             << ProgramToRun << "': " << ec.message() << '\n';
+      return -1;
+    }
+
+    switch(DebugEvent.dwDebugEventCode) {
+    case CREATE_PROCESS_DEBUG_EVENT:
+      // Make sure we remove the handle on exit.
+      if (TraceExecution)
+        errs() << ToolName << ": Debug Event: CREATE_PROCESS_DEBUG_EVENT\n";
+      ProcessIDToHandle[DebugEvent.dwProcessId] =
+        DebugEvent.u.CreateProcessInfo.hProcess;
+      ::CloseHandle(DebugEvent.u.CreateProcessInfo.hFile);
+      break;
+    case EXIT_PROCESS_DEBUG_EVENT: {
+        if (TraceExecution)
+          errs() << ToolName << ": Debug Event: EXIT_PROCESS_DEBUG_EVENT\n";
+
+        // If this is the process we originally created, exit with its exit
+        // code.
+        if (DebugEvent.dwProcessId == ProcessInfo.dwProcessId)
+          return DebugEvent.u.ExitProcess.dwExitCode;
+
+        // Otherwise cleanup any resources we have for it.
+        std::map<DWORD, HANDLE>::iterator ExitingProcess =
+          ProcessIDToHandle.find(DebugEvent.dwProcessId);
+        if (ExitingProcess == ProcessIDToHandle.end()) {
+          errs() << ToolName << ": Got unknown process id!\n";
+          return -1;
+        }
+        ::CloseHandle(ExitingProcess->second);
+        ProcessIDToHandle.erase(ExitingProcess);
+      }
+      break;
+    case CREATE_THREAD_DEBUG_EVENT:
+      ::CloseHandle(DebugEvent.u.CreateThread.hThread);
+      break;
+    case LOAD_DLL_DEBUG_EVENT: {
+        // Cleanup the file handle.
+        FileScopedHandle DLLFile(DebugEvent.u.LoadDll.hFile);
+        std::string DLLName;
+        ec = GetFileNameFromHandle(DLLFile, DLLName);
+        if (ec) {
+          DLLName = "<failed to get file name from file handle> : ";
+          DLLName += ec.message();
+        }
+        if (TraceExecution) {
+          errs() << ToolName << ": Debug Event: LOAD_DLL_DEBUG_EVENT\n";
+          errs().indent(ToolName.size()) << ": DLL Name : " << DLLName << '\n';
+        }
+
+        if (NoUser32 && sys::path::stem(DLLName) == "user32") {
+          // Program is loading user32.dll, in the applications we are testing,
+          // this only happens if an assert has fired. By now the message has
+          // already been printed, so simply close the program.
+          errs() << ToolName << ": user32.dll loaded!\n";
+          errs().indent(ToolName.size())
+                 << ": This probably means that assert was called. Closing "
+                    "program to prevent message box from popping up.\n";
+          dwContinueStatus = DBG_CONTINUE;
+          ::TerminateProcess(ProcessIDToHandle[DebugEvent.dwProcessId], -1);
+          return -1;
+        }
+      }
+      break;
+    case EXCEPTION_DEBUG_EVENT: {
+        // Close the application if this exception will not be handled by the
+        // child application.
+        if (TraceExecution)
+          errs() << ToolName << ": Debug Event: EXCEPTION_DEBUG_EVENT\n";
+
+        EXCEPTION_DEBUG_INFO  &Exception = DebugEvent.u.Exception;
+        if (Exception.dwFirstChance > 0) {
+          if (TraceExecution) {
+            errs().indent(ToolName.size()) << ": Debug Info : ";
+            errs() << "First chance exception at "
+                   << Exception.ExceptionRecord.ExceptionAddress
+                   << ", exception code: "
+                   << ExceptionCodeToString(
+                        Exception.ExceptionRecord.ExceptionCode)
+                   << " (" << Exception.ExceptionRecord.ExceptionCode << ")\n";
+          }
+          dwContinueStatus = DBG_EXCEPTION_NOT_HANDLED;
+        } else {
+          errs() << ToolName << ": Unhandled exception in: " << ProgramToRun
+                 << "!\n";
+                 errs().indent(ToolName.size()) << ": location: ";
+                 errs() << Exception.ExceptionRecord.ExceptionAddress
+                        << ", exception code: "
+                        << ExceptionCodeToString(
+                            Exception.ExceptionRecord.ExceptionCode)
+                        << " (" << Exception.ExceptionRecord.ExceptionCode
+                        << ")\n";
+          dwContinueStatus = DBG_CONTINUE;
+          ::TerminateProcess(ProcessIDToHandle[DebugEvent.dwProcessId], -1);
+          return -1;
+        }
+      }
+      break;
+    default:
+      // Do nothing.
+      if (TraceExecution)
+        errs() << ToolName << ": Debug Event: <unknown>\n";
+      break;
+    }
+
+    success = ContinueDebugEvent(DebugEvent.dwProcessId,
+                                 DebugEvent.dwThreadId,
+                                 dwContinueStatus);
+    if (!success) {
+      ec = windows_error(::GetLastError());
+      errs() << ToolName << ": Failed to continue debugging program: '"
+             << ProgramToRun << "': " << ec.message() << '\n';
+      return -1;
+    }
+
+    dwContinueStatus = DBG_CONTINUE;
+  }
+
+  assert(0 && "Fell out of debug loop. This shouldn't be possible!");
+  return -1;
+}
diff --git a/utils/Makefile b/utils/Makefile
index 1a4dcca8c5ee..9d4dc5c2f90b 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -15,7 +15,7 @@ EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
 	      getsrcs.sh importNLT.pl llvmdo llvmgrep llvm-native-gcc \
 	      llvm-native-gxx makellvm NightlyTest.gnuplot NightlyTest.pl \
-	      NightlyTestTemplate.html NLT.schema OldenDataRecover.pl \
+	      NightlyTestTemplate.html NLT.schema \
 	      parseNLT.pl plotNLT.pl profile.pl \
 	      webNLT.pl vim
 
diff --git a/utils/OldenDataRecover.pl b/utils/OldenDataRecover.pl
deleted file mode 100644
index 767839488b34..000000000000
--- a/utils/OldenDataRecover.pl
+++ /dev/null
@@ -1,37 +0,0 @@
-#this script is intended to help recover the running graphs when
-#the nightly tester decides to eat them.
-
-#zgrep -E "(=========)|(TEST-RESULT-llc-time)" *-Olden-tests.txt* |perl this > file
-#zgrep -E "(=========)|(TEST-RESULT-compile.*bc)" *-Olden-tests.tx* |perl this >file
-
-while (<>) {
-  if (/(\d*-\d*-\d*)-.*=========.*\/(.*)\' Program/) {
-#    print "$1 $2\n";
-    $curP = $2;
-    $curD = $1;
-    $dates{$1} = 1;
-  } elsif (/(\d*-\d*-\d*)-.*TEST-RESULT-.*: program (\d*\.\d*)/) {
-#    print "$1 $2\n";
-    if ($curD eq $1) {
-      $$data{$curD}{$curP} = $2;
-    }
-  } elsif (/(\d*-\d*-\d*)-.*TEST-RESULT-.*: (\d*)/) {
-#    print "$1 $2\n";
-    if ($curD eq $1) {
-      $$data{$curD}{$curP} = $2;
-    }
-  }
-}
-@progs = ("bh", "em3d", "mst", "power", "tsp", "bisort", "health", "perimeter", "treeadd", "voronoi");
-
-foreach $date (sort keys %dates) {
-  print "$date: ";
-  foreach $prog (@progs) {
-    if ($$data{$date}{$prog}) {
-      print " $$data{$date}{$prog}";
-    } else {
-      print " 0";
-    }
-  }
-  print "\n";
-}
diff --git a/utils/PerfectShuffle/PerfectShuffle.cpp b/utils/PerfectShuffle/PerfectShuffle.cpp
index b94a7d326d19..98f8f4cc0cab 100644
--- a/utils/PerfectShuffle/PerfectShuffle.cpp
+++ b/utils/PerfectShuffle/PerfectShuffle.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include <iostream>
+#include <iomanip>
 #include <vector>
 #include <cassert>
 #include <cstdlib>
@@ -400,7 +401,7 @@ int main() {
     // LHS, and 13 bits of RHS = 32 bits.
     unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
 
-    std::cout << "  " << Val << "U,\t// ";
+    std::cout << "  " << std::setw(10) << Val << "U, // ";
     PrintMask(i, std::cout);
     std::cout << ": Cost " << ShufTab[i].Cost;
     std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
index 03b01f6bdb74..a8de7452ea06 100644
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -221,7 +221,7 @@ typedef enum {
 #define BIT_WIDTH 32
 
 // Forward declaration.
-class FilterChooser;
+class ARMFilterChooser;
 
 // Representation of the instruction to work on.
 typedef bit_value_t insn_t[BIT_WIDTH];
@@ -240,7 +240,7 @@ typedef bit_value_t insn_t[BIT_WIDTH];
 /// the Filter/FilterChooser combo does not know how to distinguish among the
 /// Opcodes assigned.
 ///
-/// An example of a conflcit is 
+/// An example of a conflict is 
 ///
 /// Conflict:
 ///                     111101000.00........00010000....
@@ -262,9 +262,9 @@ typedef bit_value_t insn_t[BIT_WIDTH];
 /// decoder could try to decode the even/odd register numbering and assign to
 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
 /// version and return the Opcode since the two have the same Asm format string.
-class Filter {
+class ARMFilter {
 protected:
-  FilterChooser *Owner; // points to the FilterChooser who owns this filter
+  ARMFilterChooser *Owner; // points to the FilterChooser who owns this filter
   unsigned StartBit; // the starting bit position
   unsigned NumBits; // number of bits to filter
   bool Mixed; // a mixed region contains both set and unset bits
@@ -276,7 +276,7 @@ protected:
   std::vector<unsigned> VariableInstructions;
 
   // Map of well-known segment value to its delegate.
-  std::map<unsigned, FilterChooser*> FilterChooserMap;
+  std::map<unsigned, ARMFilterChooser*> FilterChooserMap;
 
   // Number of instructions which fall under FilteredInstructions category.
   unsigned NumFiltered;
@@ -296,16 +296,17 @@ public:
   }
   // Return the filter chooser for the group of instructions without constant
   // segment values.
-  FilterChooser &getVariableFC() {
+  ARMFilterChooser &getVariableFC() {
     assert(NumFiltered == 1);
     assert(FilterChooserMap.size() == 1);
     return *(FilterChooserMap.find((unsigned)-1)->second);
   }
 
-  Filter(const Filter &f);
-  Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
+  ARMFilter(const ARMFilter &f);
+  ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
+            bool mixed);
 
-  ~Filter();
+  ~ARMFilter();
 
   // Divides the decoding task into sub tasks and delegates them to the
   // inferior FilterChooser's.
@@ -333,7 +334,7 @@ typedef enum {
   ATTR_MIXED
 } bitAttr_t;
 
-/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
+/// ARMFilterChooser - FilterChooser chooses the best filter among a set of Filters
 /// in order to perform the decoding of instructions at the current level.
 ///
 /// Decoding proceeds from the top down.  Based on the well-known encoding bits
@@ -348,11 +349,11 @@ typedef enum {
 /// It is useful to think of a Filter as governing the switch stmts of the
 /// decoding tree.  And each case is delegated to an inferior FilterChooser to
 /// decide what further remaining bits to look at.
-class FilterChooser {
+class ARMFilterChooser {
   static TARGET_NAME_t TargetName;
 
 protected:
-  friend class Filter;
+  friend class ARMFilter;
 
   // Vector of codegen instructions to choose our filter.
   const std::vector<const CodeGenInstruction*> &AllInstructions;
@@ -361,14 +362,14 @@ protected:
   const std::vector<unsigned> Opcodes;
 
   // Vector of candidate filters.
-  std::vector<Filter> Filters;
+  std::vector<ARMFilter> Filters;
 
   // Array of bit values passed down from our parent.
   // Set to all BIT_UNFILTERED's for Parent == NULL.
   bit_value_t FilterBitValues[BIT_WIDTH];
 
   // Links to the FilterChooser above us in the decoding tree.
-  FilterChooser *Parent;
+  ARMFilterChooser *Parent;
   
   // Index of the best filter from Filters.
   int BestIndex;
@@ -376,13 +377,13 @@ protected:
 public:
   static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; }
 
-  FilterChooser(const FilterChooser &FC) :
+  ARMFilterChooser(const ARMFilterChooser &FC) :
       AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
       Filters(FC.Filters), Parent(FC.Parent), BestIndex(FC.BestIndex) {
     memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
   }
 
-  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
                 const std::vector<unsigned> &IDs) :
       AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(NULL),
       BestIndex(-1) {
@@ -392,10 +393,10 @@ public:
     doFilter();
   }
 
-  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
-                const std::vector<unsigned> &IDs,
-                bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
-                FilterChooser &parent) :
+  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                   const std::vector<unsigned> &IDs,
+                   bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
+                   ARMFilterChooser &parent) :
       AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(&parent),
       BestIndex(-1) {
     for (unsigned i = 0; i < BIT_WIDTH; ++i)
@@ -426,8 +427,9 @@ protected:
       Insn[i] = bitFromBits(Bits, i);
 
     // Set Inst{21} to 1 (wback) when IndexModeBits == IndexModeUpd.
-    if (getByteField(*AllInstructions[Opcode]->TheDef, "IndexModeBits")
-        == IndexModeUpd)
+    Record *R = AllInstructions[Opcode]->TheDef;
+    if (R->getValue("IndexModeBits") &&
+        getByteField(*R, "IndexModeBits") == IndexModeUpd)
       Insn[21] = BIT_TRUE;
   }
 
@@ -452,7 +454,7 @@ protected:
   /// dumpFilterArray on each filter chooser up to the top level one.
   void dumpStack(raw_ostream &o, const char *prefix);
 
-  Filter &bestFilter() {
+  ARMFilter &bestFilter() {
     assert(BestIndex != -1 && "BestIndex not set");
     return Filters[BestIndex];
   }
@@ -497,11 +499,12 @@ protected:
   bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
 
   // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,Filter &Best);
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                            ARMFilter &Best);
 
   // Assign a single filter and run with it.
-  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
-      bool mixed);
+  void runSingleFilter(ARMFilterChooser &owner, unsigned startBit,
+                       unsigned numBit, bool mixed);
 
   // reportRegion is a helper function for filterProcessor to mark a region as
   // eligible for use as a filter region.
@@ -530,7 +533,7 @@ protected:
 //                       //
 ///////////////////////////
 
-Filter::Filter(const Filter &f) :
+ARMFilter::ARMFilter(const ARMFilter &f) :
   Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
   FilteredInstructions(f.FilteredInstructions),
   VariableInstructions(f.VariableInstructions),
@@ -538,7 +541,7 @@ Filter::Filter(const Filter &f) :
   LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
 }
 
-Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
+ARMFilter::ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
     bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
                   Mixed(mixed) {
   assert(StartBit + NumBits - 1 < BIT_WIDTH);
@@ -575,8 +578,8 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
          && "Filter returns no instruction categories");
 }
 
-Filter::~Filter() {
-  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+ARMFilter::~ARMFilter() {
+  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
   for (filterIterator = FilterChooserMap.begin();
        filterIterator != FilterChooserMap.end();
        filterIterator++) {
@@ -590,7 +593,7 @@ Filter::~Filter() {
 // A special case arises when there's only one entry in the filtered
 // instructions.  In order to unambiguously decode the singleton, we need to
 // match the remaining undecoded encoding bits against the singleton.
-void Filter::recurse() {
+void ARMFilter::recurse() {
   std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
 
   bit_value_t BitValueArray[BIT_WIDTH];
@@ -606,12 +609,12 @@ void Filter::recurse() {
 
     // Delegates to an inferior filter chooser for futher processing on this
     // group of instructions whose segment values are variable.
-    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
                               (unsigned)-1,
-                              new FilterChooser(Owner->AllInstructions,
-                                                VariableInstructions,
-                                                BitValueArray,
-                                                *Owner)
+                              new ARMFilterChooser(Owner->AllInstructions,
+                                                   VariableInstructions,
+                                                   BitValueArray,
+                                                   *Owner)
                               ));
   }
 
@@ -638,18 +641,18 @@ void Filter::recurse() {
 
     // Delegates to an inferior filter chooser for futher processing on this
     // category of instructions.
-    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
                               mapIterator->first,
-                              new FilterChooser(Owner->AllInstructions,
-                                                mapIterator->second,
-                                                BitValueArray,
-                                                *Owner)
+                              new ARMFilterChooser(Owner->AllInstructions,
+                                                   mapIterator->second,
+                                                   BitValueArray,
+                                                   *Owner)
                               ));
   }
 }
 
 // Emit code to decode instructions given a segment or segments of bits.
-void Filter::emit(raw_ostream &o, unsigned &Indentation) {
+void ARMFilter::emit(raw_ostream &o, unsigned &Indentation) {
   o.indent(Indentation) << "// Check Inst{";
 
   if (NumBits > 1)
@@ -660,7 +663,7 @@ void Filter::emit(raw_ostream &o, unsigned &Indentation) {
   o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
                         << StartBit << ", " << NumBits << ")) {\n";
 
-  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
 
   bool DefaultCase = false;
   for (filterIterator = FilterChooserMap.begin();
@@ -709,7 +712,7 @@ void Filter::emit(raw_ostream &o, unsigned &Indentation) {
 
 // Returns the number of fanout produced by the filter.  More fanout implies
 // the filter distinguishes more categories of instructions.
-unsigned Filter::usefulness() const {
+unsigned ARMFilter::usefulness() const {
   if (VariableInstructions.size())
     return FilteredInstructions.size();
   else
@@ -723,10 +726,10 @@ unsigned Filter::usefulness() const {
 //////////////////////////////////
 
 // Define the symbol here.
-TARGET_NAME_t FilterChooser::TargetName;
+TARGET_NAME_t ARMFilterChooser::TargetName;
 
 // This provides an opportunity for target specific code emission.
-void FilterChooser::emitTopHook(raw_ostream &o) {
+void ARMFilterChooser::emitTopHook(raw_ostream &o) {
   if (TargetName == TARGET_ARM) {
     // Emit code that references the ARMFormat data type.
     o << "static const ARMFormat ARMFormats[] = {\n";
@@ -747,7 +750,7 @@ void FilterChooser::emitTopHook(raw_ostream &o) {
 }
 
 // Emit the top level typedef and decodeInstruction() function.
-void FilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
+void ARMFilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
   // Run the target specific emit hook.
   emitTopHook(o);
 
@@ -801,7 +804,7 @@ void FilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
 
   o << '\n';
 
-  o.indent(Indentation) << "static uint16_t decodeInstruction(field_t insn) {\n";
+  o.indent(Indentation) <<"static uint16_t decodeInstruction(field_t insn) {\n";
 
   ++Indentation; ++Indentation;
   // Emits code to decode the instructions.
@@ -818,7 +821,7 @@ void FilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
 
 // This provides an opportunity for target specific code emission after
 // emitTop().
-void FilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
+void ARMFilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
   if (TargetName != TARGET_THUMB) return;
 
   // Emit code that decodes the Thumb ISA.
@@ -843,7 +846,7 @@ void FilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
 //
 // Returns false if and on the first uninitialized bit value encountered.
 // Returns true, otherwise.
-bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
+bool ARMFilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
     unsigned StartBit, unsigned NumBits) const {
   Field = 0;
 
@@ -860,7 +863,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
 
 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
 /// filter array as a series of chars.
-void FilterChooser::dumpFilterArray(raw_ostream &o,
+void ARMFilterChooser::dumpFilterArray(raw_ostream &o,
     bit_value_t (&filter)[BIT_WIDTH]) {
   unsigned bitIndex;
 
@@ -884,8 +887,8 @@ void FilterChooser::dumpFilterArray(raw_ostream &o,
 
 /// dumpStack - dumpStack traverses the filter chooser chain and calls
 /// dumpFilterArray on each filter chooser up to the top level one.
-void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
-  FilterChooser *current = this;
+void ARMFilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
+  ARMFilterChooser *current = this;
 
   while (current) {
     o << prefix;
@@ -896,7 +899,7 @@ void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
 }
 
 // Called from Filter::recurse() when singleton exists.  For debug purpose.
-void FilterChooser::SingletonExists(unsigned Opc) {
+void ARMFilterChooser::SingletonExists(unsigned Opc) {
   insn_t Insn0;
   insnWithID(Insn0, Opc);
 
@@ -923,7 +926,7 @@ void FilterChooser::SingletonExists(unsigned Opc) {
 // This returns a list of undecoded bits of an instructions, for example,
 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
 // decoded bits in order to verify that the instruction matches the Opcode.
-unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
+unsigned ARMFilterChooser::getIslands(std::vector<unsigned> &StartBits,
     std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
     insn_t &Insn) {
   unsigned Num, BitNo;
@@ -983,7 +986,7 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
 
 // Emits code to decode the singleton.  Return true if we have matched all the
 // well-known bits.
-bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+bool ARMFilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
                                          unsigned Opc) {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
@@ -1046,8 +1049,9 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
 }
 
 // Emits code to decode the singleton, and then to decode the rest.
-void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-    Filter &Best) {
+void ARMFilterChooser::emitSingletonDecoder(raw_ostream &o,
+                                            unsigned &Indentation,
+                                            ARMFilter &Best) {
 
   unsigned Opc = Best.getSingletonOpc();
 
@@ -1063,10 +1067,11 @@ void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
 
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
-void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
-    unsigned numBit, bool mixed) {
+void ARMFilterChooser::runSingleFilter(ARMFilterChooser &owner,
+                                       unsigned startBit,
+                                       unsigned numBit, bool mixed) {
   Filters.clear();
-  Filter F(*this, startBit, numBit, true);
+  ARMFilter F(*this, startBit, numBit, true);
   Filters.push_back(F);
   BestIndex = 0; // Sole Filter instance to choose from.
   bestFilter().recurse();
@@ -1074,18 +1079,18 @@ void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
 
 // reportRegion is a helper function for filterProcessor to mark a region as
 // eligible for use as a filter region.
-void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
-    unsigned BitIndex, bool AllowMixed) {
+void ARMFilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
+                                    unsigned BitIndex, bool AllowMixed) {
   if (RA == ATTR_MIXED && AllowMixed)
-    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));   
+    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, true));   
   else if (RA == ATTR_ALL_SET && !AllowMixed)
-    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, false));
+    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, false));
 }
 
 // FilterProcessor scans the well-known encoding bits of the instructions and
 // builds up a list of candidate filters.  It chooses the best filter and
 // recursively descends down the decoding tree.
-bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
+bool ARMFilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   Filters.clear();
   BestIndex = -1;
   unsigned numInstructions = Opcodes.size();
@@ -1317,7 +1322,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
 // Decides on the best configuration of filter(s) to use in order to decode
 // the instructions.  A conflict of instructions may occur, in which case we
 // dump the conflict set to the standard error.
-void FilterChooser::doFilter() {
+void ARMFilterChooser::doFilter() {
   unsigned Num = Opcodes.size();
   assert(Num && "FilterChooser created with no instructions");
 
@@ -1350,7 +1355,7 @@ void FilterChooser::doFilter() {
 // Emits code to decode our share of instructions.  Returns true if the
 // emitted code causes a return, which occurs if we know how to decode
 // the instruction at this level or the instruction is not decodeable.
-bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
   if (Opcodes.size() == 1)
     // There is only one instruction in the set, which is great!
     // Call emitSingletonDecoder() to see whether there are any remaining
@@ -1359,7 +1364,7 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
   // Choose the best filter to do the decodings!
   if (BestIndex != -1) {
-    Filter &Best = bestFilter();
+    ARMFilter &Best = bestFilter();
     if (Best.getNumFiltered() == 1)
       emitSingletonDecoder(o, Indentation, Best);
     else
@@ -1488,11 +1493,11 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
 class ARMDecoderEmitter::ARMDEBackend {
 public:
-  ARMDEBackend(ARMDecoderEmitter &frontend) :
+  ARMDEBackend(ARMDecoderEmitter &frontend, RecordKeeper &Records) :
     NumberedInstructions(),
     Opcodes(),
     Frontend(frontend),
-    Target(),
+    Target(Records),
     FC(NULL)
   {
     if (Target.getName() == "ARM")
@@ -1538,13 +1543,14 @@ protected:
   std::vector<unsigned> Opcodes2;
   ARMDecoderEmitter &Frontend;
   CodeGenTarget Target;
-  FilterChooser *FC;
+  ARMFilterChooser *FC;
 
   TARGET_NAME_t TargetName;
 };
 
-bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
-    const CodeGenInstruction &CGI, TARGET_NAME_t TN) {
+bool ARMDecoderEmitter::
+ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
+                                  TARGET_NAME_t TN) {
   const Record &Def = *CGI.TheDef;
   const StringRef Name = Def.getName();
   uint8_t Form = getByteField(Def, "Form");
@@ -1559,6 +1565,10 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
   // which is a better design and less fragile than the name matchings.
   if (Bits.allInComplete()) return false;
 
+  // Ignore "asm parser only" instructions.
+  if (Def.getValueAsBit("isAsmParserOnly"))
+    return false;
+
   if (TN == TARGET_ARM) {
     // FIXME: what about Int_MemBarrierV6 and Int_SyncBarrierV6?
     if ((Name != "Int_MemBarrierV7" && Name != "Int_SyncBarrierV7") &&
@@ -1566,13 +1576,6 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
       return false;
     if (thumbInstruction(Form))
       return false;
-    if (Name.find("CMPz") != std::string::npos /* ||
-        Name.find("CMNz") != std::string::npos */)
-      return false;
-
-    // Ignore pseudo instructions.
-    if (Name == "BXr9" || Name == "BMOVPCRX" || Name == "BMOVPCRXr9")
-      return false;
 
     // Tail calls are other patterns that generate existing instructions.
     if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" ||
@@ -1583,11 +1586,6 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
         Name == "MOVr_TC")
       return false;
 
-    // VLDMQ/VSTMQ can be handled with the more generic VLDMD/VSTMD.
-    if (Name == "VLDMQ" || Name == "VLDMQ_UPD" ||
-        Name == "VSTMQ" || Name == "VSTMQ_UPD")
-      return false;
-
     //
     // The following special cases are for conflict resolutions.
     //
@@ -1610,13 +1608,13 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     // better off using the generic RSCri and RSCrs instructions.
     if (Name == "RSCSri" || Name == "RSCSrs") return false;
 
-    // MOVCCr, MOVCCs, MOVCCi, FCYPScc, FCYPDcc, FNEGScc, and FNEGDcc are used
-    // in the compiler to implement conditional moves.  We can ignore them in
-    // favor of their more generic versions of instructions.
-    // See also SDNode *ARMDAGToDAGISel::Select(SDValue Op).
-    if (Name == "MOVCCr" || Name == "MOVCCs" || Name == "MOVCCi" ||
-        Name == "FCPYScc" || Name == "FCPYDcc" ||
-        Name == "FNEGScc" || Name == "FNEGDcc")
+    // MOVCCr, MOVCCs, MOVCCi, MOVCCi16, FCYPScc, FCYPDcc, FNEGScc, and
+    // FNEGDcc are used in the compiler to implement conditional moves.
+    // We can ignore them in favor of their more generic versions of
+    // instructions. See also SDNode *ARMDAGToDAGISel::Select(SDValue Op).
+    if (Name == "MOVCCr"   || Name == "MOVCCs"  || Name == "MOVCCi" ||
+        Name == "MOVCCi16" || Name == "FCPYScc" || Name == "FCPYDcc" ||
+        Name == "FNEGScc"  || Name == "FNEGDcc")
       return false;
 
     // Ditto for VMOVDcc, VMOVScc, VNEGDcc, and VNEGScc.
@@ -1624,15 +1622,10 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
         Name == "VNEGScc")
       return false;
 
-    // Ignore the *_sfp instructions when decoding.  They are used by the
-    // compiler to implement scalar floating point operations using vector
-    // operations in order to work around some performance issues.
-    if (Name.find("_sfp") != std::string::npos) return false;
-
-    // LDM_RET is a special case of LDM (Load Multiple) where the registers
+    // LDMIA_RET is a special case of LDM (Load Multiple) where the registers
     // loaded include the PC, causing a branch to a loaded address.  Ignore
-    // the LDM_RET instruction when decoding.
-    if (Name == "LDM_RET") return false;
+    // the LDMIA_RET instruction when decoding.
+    if (Name == "LDMIA_RET") return false;
 
     // Bcc is in a more generic form than B.  Ignore B when decoding.
     if (Name == "B") return false;
@@ -1671,18 +1664,17 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     // VREV64qf is equivalent to VREV64q32.
     if (Name == "VREV64df" || Name == "VREV64qf") return false;
 
-    // VDUPLNfd is equivalent to VDUPLN32d; VDUPfdf is specialized VDUPLN32d.
-    // VDUPLNfq is equivalent to VDUPLN32q; VDUPfqf is specialized VDUPLN32q.
+    // VDUPLNfd is equivalent to VDUPLN32d.
+    // VDUPLNfq is equivalent to VDUPLN32q.
     // VLD1df is equivalent to VLD1d32.
     // VLD1qf is equivalent to VLD1q32.
     // VLD2d64 is equivalent to VLD1q64.
     // VST1df is equivalent to VST1d32.
     // VST1qf is equivalent to VST1q32.
     // VST2d64 is equivalent to VST1q64.
-    if (Name == "VDUPLNfd" || Name == "VDUPfdf" ||
-        Name == "VDUPLNfq" || Name == "VDUPfqf" ||
-        Name == "VLD1df" || Name == "VLD1qf" || Name == "VLD2d64" ||
-        Name == "VST1df" || Name == "VST1qf" || Name == "VST2d64")
+    if (Name == "VDUPLNfd" || Name == "VDUPLNfq" ||
+        Name == "VLD1df"   || Name == "VLD1qf"   || Name == "VLD2d64" ||
+        Name == "VST1df"   || Name == "VST1qf"   || Name == "VST2d64")
       return false;
   } else if (TN == TARGET_THUMB) {
     if (!thumbInstruction(Form))
@@ -1696,12 +1688,8 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     if (Name == "tTPsoft" || Name == "t2TPsoft")
       return false;
 
-    // Ignore tLEApcrel and tLEApcrelJT, prefer tADDrPCi.
-    if (Name == "tLEApcrel" || Name == "tLEApcrelJT")
-      return false;
-
-    // Ignore t2LEApcrel, prefer the generic t2ADD* for disassembly printing.
-    if (Name == "t2LEApcrel")
+    // Ignore tADR, prefer tADDrPCi.
+    if (Name == "tADR")
       return false;
 
     // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
@@ -1719,42 +1707,33 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
     if (Name == "t2LDRDpci")
       return false;
 
-    // Ignore t2TBB, t2TBH and prefer the generic t2TBBgen, t2TBHgen.
-    if (Name == "t2TBB" || Name == "t2TBH")
-      return false;
-
     // Resolve conflicts:
     //
     //   tBfar conflicts with tBLr9
-    //   tCMNz conflicts with tCMN (with assembly format strings being equal)
-    //   tPOP_RET/t2LDM_RET conflict with tPOP/t2LDM (ditto)
+    //   tPOP_RET/t2LDMIA_RET conflict with tPOP/t2LDM (ditto)
     //   tMOVCCi conflicts with tMOVi8
     //   tMOVCCr conflicts with tMOVgpr2gpr
-    //   tBR_JTr conflicts with tBRIND
     //   tSpill conflicts with tSTRspi
     //   tLDRcp conflicts with tLDRspi
     //   tRestore conflicts with tLDRspi
-    //   t2LEApcrelJT conflicts with t2LEApcrel
+    //   t2MOVCCi16 conflicts with tMOVi16
     if (Name == "tBfar" ||
-        /* Name == "tCMNz" || */ Name == "tCMPzi8" || Name == "tCMPzr" ||
-        Name == "tCMPzhir" || /* Name == "t2CMNzrr" || Name == "t2CMNzrs" ||
-        Name == "t2CMNzri" || */ Name == "t2CMPzrr" || Name == "t2CMPzrs" ||
-        Name == "t2CMPzri" || Name == "tPOP_RET" || Name == "t2LDM_RET" ||
-        Name == "tMOVCCi" || Name == "tMOVCCr" || Name == "tBR_JTr" ||
+        Name == "tPOP_RET" || Name == "t2LDMIA_RET" ||
+        Name == "tMOVCCi" || Name == "tMOVCCr" ||
         Name == "tSpill" || Name == "tLDRcp" || Name == "tRestore" ||
-        Name == "t2LEApcrelJT")
+        Name == "t2MOVCCi16")
       return false;
   }
 
-  // Dumps the instruction encoding format.
-  switch (TargetName) {
-  case TARGET_ARM:
-  case TARGET_THUMB:
-    DEBUG(errs() << Name << " " << stringForARMFormat((ARMFormat)Form));
-    break;
-  }
-
   DEBUG({
+      // Dumps the instruction encoding format.
+      switch (TargetName) {
+      case TARGET_ARM:
+      case TARGET_THUMB:
+        errs() << Name << " " << stringForARMFormat((ARMFormat)Form);
+        break;
+      }
+
       errs() << " ";
 
       // Dumps the instruction encoding bits.
@@ -1763,8 +1742,8 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
       errs() << '\n';
 
       // Dumps the list of operand info.
-      for (unsigned i = 0, e = CGI.OperandList.size(); i != e; ++i) {
-        CodeGenInstruction::OperandInfo Info = CGI.OperandList[i];
+      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+        const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
         const std::string &OperandName = Info.Name;
         const Record &OperandDef = *Info.Rec;
 
@@ -1778,32 +1757,20 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
 void ARMDecoderEmitter::ARMDEBackend::populateInstructions() {
   getInstructionsByEnumValue(NumberedInstructions);
 
-  uint16_t numUIDs = NumberedInstructions.size();
-  uint16_t uid;
-
-  const char *instClass = NULL;
-
-  switch (TargetName) {
-  case TARGET_ARM:
-    instClass = "InstARM";
-    break;
-  default:
-    assert(0 && "Unreachable code!");
-  }
-
-  for (uid = 0; uid < numUIDs; uid++) {
-    // filter out intrinsics
-    if (!NumberedInstructions[uid]->TheDef->isSubClassOf(instClass))
-      continue;
+  unsigned numUIDs = NumberedInstructions.size();
+  if (TargetName == TARGET_ARM) {
+    for (unsigned uid = 0; uid < numUIDs; uid++) {
+      // filter out intrinsics
+      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM"))
+        continue;
 
-    if (populateInstruction(*NumberedInstructions[uid], TargetName))
-      Opcodes.push_back(uid);
-  }
+      if (populateInstruction(*NumberedInstructions[uid], TargetName))
+        Opcodes.push_back(uid);
+    }
 
-  // Special handling for the ARM chip, which supports two modes of execution.
-  // This branch handles the Thumb opcodes.
-  if (TargetName == TARGET_ARM) {
-    for (uid = 0; uid < numUIDs; uid++) {
+    // Special handling for the ARM chip, which supports two modes of execution.
+    // This branch handles the Thumb opcodes.
+    for (unsigned uid = 0; uid < numUIDs; uid++) {
       // filter out intrinsics
       if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM")
           && !NumberedInstructions[uid]->TheDef->isSubClassOf("InstThumb"))
@@ -1812,6 +1779,18 @@ void ARMDecoderEmitter::ARMDEBackend::populateInstructions() {
       if (populateInstruction(*NumberedInstructions[uid], TARGET_THUMB))
         Opcodes2.push_back(uid);
     }
+
+    return;
+  }
+
+  // For other targets.
+  for (unsigned uid = 0; uid < numUIDs; uid++) {
+    Record *R = NumberedInstructions[uid]->TheDef;
+    if (R->getValueAsString("Namespace") == "TargetOpcode")
+      continue;
+
+    if (populateInstruction(*NumberedInstructions[uid], TargetName))
+      Opcodes.push_back(uid);
   }
 }
 
@@ -1826,25 +1805,25 @@ void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) {
     assert(0 && "Unreachable code!");
   }
 
-  o << "#include \"llvm/System/DataTypes.h\"\n";
+  o << "#include \"llvm/Support/DataTypes.h\"\n";
   o << "#include <assert.h>\n";
   o << '\n';
   o << "namespace llvm {\n\n";
 
-  FilterChooser::setTargetName(TargetName);
+  ARMFilterChooser::setTargetName(TargetName);
 
   switch (TargetName) {
   case TARGET_ARM: {
     // Emit common utility and ARM ISA decoder.
-    FC = new FilterChooser(NumberedInstructions, Opcodes);
+    FC = new ARMFilterChooser(NumberedInstructions, Opcodes);
     // Reset indentation level.
     unsigned Indentation = 0;
     FC->emitTop(o, Indentation);
     delete FC;
 
     // Emit Thumb ISA decoder as well.
-    FilterChooser::setTargetName(TARGET_THUMB);
-    FC = new FilterChooser(NumberedInstructions, Opcodes2);
+    ARMFilterChooser::setTargetName(TARGET_THUMB);
+    FC = new ARMFilterChooser(NumberedInstructions, Opcodes2);
     // Reset indentation level.
     Indentation = 0;
     FC->emitBot(o, Indentation);
@@ -1863,7 +1842,7 @@ void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) {
 
 void ARMDecoderEmitter::initBackend()
 {
-    Backend = new ARMDEBackend(*this);
+  Backend = new ARMDEBackend(*this, Records);
 }
 
 void ARMDecoderEmitter::run(raw_ostream &o)
diff --git a/utils/TableGen/ARMDecoderEmitter.h b/utils/TableGen/ARMDecoderEmitter.h
index 571a94778acb..1faeb91fae8a 100644
--- a/utils/TableGen/ARMDecoderEmitter.h
+++ b/utils/TableGen/ARMDecoderEmitter.h
@@ -17,7 +17,7 @@
 
 #include "TableGenBackend.h"
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 558398648d2c..e3def4185238 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -8,7 +8,11 @@
 //===----------------------------------------------------------------------===//
 //
 // This tablegen backend emits a target specifier matcher for converting parsed
-// assembly operands in the MCInst structures.
+// assembly operands in the MCInst structures. It also emits a matcher for
+// custom operand parsing.
+//
+// Converting assembly operands into MCInst structures
+// ---------------------------------------------------
 //
 // The input to the target specific matcher is a list of literal tokens and
 // operands. The target specific parser should generally eliminate any syntax
@@ -20,7 +24,7 @@
 // Some example inputs, for X86:
 //   'addl' (immediate ...) (register ...)
 //   'add' (immediate ...) (memory ...)
-//   'call' '*' %epc 
+//   'call' '*' %epc
 //
 // The assembly matcher is responsible for converting this input into a precise
 // machine instruction (i.e., an instruction with a well defined encoding). This
@@ -63,26 +67,47 @@
 //      In addition, the subset relation amongst classes induces a partial order
 //      on such tuples, which we use to resolve ambiguities.
 //
-//      FIXME: What do we do if a crazy case shows up where this is the wrong
-//      resolution?
-//
 //   2. The input can now be treated as a tuple of classes (static tokens are
 //      simple singleton sets). Each such tuple should generally map to a single
 //      instruction (we currently ignore cases where this isn't true, whee!!!),
 //      which we can emit a simple matcher for.
 //
+// Custom Operand Parsing
+// ----------------------
+//
+//  Some targets need a custom way to parse operands, some specific instructions
+//  can contain arguments that can represent processor flags and other kinds of
+//  identifiers that need to be mapped to specific valeus in the final encoded
+//  instructions. The target specific custom operand parsing works in the
+//  following way:
+//
+//   1. A operand match table is built, each entry contains a mnemonic, an
+//      operand class, a mask for all operand positions for that same
+//      class/mnemonic and target features to be checked while trying to match.
+//
+//   2. The operand matcher will try every possible entry with the same
+//      mnemonic and will check if the target feature for this mnemonic also
+//      matches. After that, if the operand to be matched has its index
+//      present in the mask, a successfull match occurs. Otherwise, fallback
+//      to the regular operand parsing.
+//
+//   3. For a match success, each operand class that has a 'ParserMethod'
+//      becomes part of a switch from where the custom method is called.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AsmMatcherEmitter.h"
 #include "CodeGenTarget.h"
 #include "Record.h"
+#include "StringMatcher.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include <list>
 #include <map>
 #include <set>
 using namespace llvm;
@@ -91,197 +116,8 @@ static cl::opt<std::string>
 MatchPrefix("match-prefix", cl::init(""),
             cl::desc("Only match instructions with the given prefix"));
 
-/// FlattenVariants - Flatten an .td file assembly string by selecting the
-/// variant at index \arg N.
-static std::string FlattenVariants(const std::string &AsmString,
-                                   unsigned N) {
-  StringRef Cur = AsmString;
-  std::string Res = "";
-  
-  for (;;) {
-    // Find the start of the next variant string.
-    size_t VariantsStart = 0;
-    for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
-      if (Cur[VariantsStart] == '{' && 
-          (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
-                                  Cur[VariantsStart-1] != '\\')))
-        break;
-
-    // Add the prefix to the result.
-    Res += Cur.slice(0, VariantsStart);
-    if (VariantsStart == Cur.size())
-      break;
-
-    ++VariantsStart; // Skip the '{'.
-
-    // Scan to the end of the variants string.
-    size_t VariantsEnd = VariantsStart;
-    unsigned NestedBraces = 1;
-    for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
-      if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
-        if (--NestedBraces == 0)
-          break;
-      } else if (Cur[VariantsEnd] == '{')
-        ++NestedBraces;
-    }
-
-    // Select the Nth variant (or empty).
-    StringRef Selection = Cur.slice(VariantsStart, VariantsEnd);
-    for (unsigned i = 0; i != N; ++i)
-      Selection = Selection.split('|').second;
-    Res += Selection.split('|').first;
-
-    assert(VariantsEnd != Cur.size() && 
-           "Unterminated variants in assembly string!");
-    Cur = Cur.substr(VariantsEnd + 1);
-  } 
-
-  return Res;
-}
-
-/// TokenizeAsmString - Tokenize a simplified assembly string.
-static void TokenizeAsmString(StringRef AsmString, 
-                              SmallVectorImpl<StringRef> &Tokens) {
-  unsigned Prev = 0;
-  bool InTok = true;
-  for (unsigned i = 0, e = AsmString.size(); i != e; ++i) {
-    switch (AsmString[i]) {
-    case '[':
-    case ']':
-    case '*':
-    case '!':
-    case ' ':
-    case '\t':
-    case ',':
-      if (InTok) {
-        Tokens.push_back(AsmString.slice(Prev, i));
-        InTok = false;
-      }
-      if (!isspace(AsmString[i]) && AsmString[i] != ',')
-        Tokens.push_back(AsmString.substr(i, 1));
-      Prev = i + 1;
-      break;
-      
-    case '\\':
-      if (InTok) {
-        Tokens.push_back(AsmString.slice(Prev, i));
-        InTok = false;
-      }
-      ++i;
-      assert(i != AsmString.size() && "Invalid quoted character");
-      Tokens.push_back(AsmString.substr(i, 1));
-      Prev = i + 1;
-      break;
-
-    case '$': {
-      // If this isn't "${", treat like a normal token.
-      if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') {
-        if (InTok) {
-          Tokens.push_back(AsmString.slice(Prev, i));
-          InTok = false;
-        }
-        Prev = i;
-        break;
-      }
-
-      if (InTok) {
-        Tokens.push_back(AsmString.slice(Prev, i));
-        InTok = false;
-      }
-
-      StringRef::iterator End =
-        std::find(AsmString.begin() + i, AsmString.end(), '}');
-      assert(End != AsmString.end() && "Missing brace in operand reference!");
-      size_t EndPos = End - AsmString.begin();
-      Tokens.push_back(AsmString.slice(i, EndPos+1));
-      Prev = EndPos + 1;
-      i = EndPos;
-      break;
-    }
-
-    case '.':
-      if (InTok) {
-        Tokens.push_back(AsmString.slice(Prev, i));
-      }
-      Prev = i;
-      InTok = true;
-      break;
-
-    default:
-      InTok = true;
-    }
-  }
-  if (InTok && Prev != AsmString.size())
-    Tokens.push_back(AsmString.substr(Prev));
-}
-
-static bool IsAssemblerInstruction(StringRef Name,
-                                   const CodeGenInstruction &CGI, 
-                                   const SmallVectorImpl<StringRef> &Tokens) {
-  // Ignore "codegen only" instructions.
-  if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
-    return false;
-
-  // Ignore pseudo ops.
-  //
-  // FIXME: This is a hack; can we convert these instructions to set the
-  // "codegen only" bit instead?
-  if (const RecordVal *Form = CGI.TheDef->getValue("Form"))
-    if (Form->getValue()->getAsString() == "Pseudo")
-      return false;
-
-  // Ignore "Int_*" and "*_Int" instructions, which are internal aliases.
-  //
-  // FIXME: This is a total hack.
-  if (StringRef(Name).startswith("Int_") || StringRef(Name).endswith("_Int"))
-    return false;
-
-  // Ignore instructions with no .s string.
-  //
-  // FIXME: What are these?
-  if (CGI.AsmString.empty())
-    return false;
-
-  // FIXME: Hack; ignore any instructions with a newline in them.
-  if (std::find(CGI.AsmString.begin(), 
-                CGI.AsmString.end(), '\n') != CGI.AsmString.end())
-    return false;
-  
-  // Ignore instructions with attributes, these are always fake instructions for
-  // simplifying codegen.
-  //
-  // FIXME: Is this true?
-  //
-  // Also, check for instructions which reference the operand multiple times;
-  // this implies a constraint we would not honor.
-  std::set<std::string> OperandNames;
-  for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
-    if (Tokens[i][0] == '$' && 
-        std::find(Tokens[i].begin(), 
-                  Tokens[i].end(), ':') != Tokens[i].end()) {
-      DEBUG({
-          errs() << "warning: '" << Name << "': "
-                 << "ignoring instruction; operand with attribute '" 
-                 << Tokens[i] << "'\n";
-        });
-      return false;
-    }
-
-    if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) {
-      DEBUG({
-          errs() << "warning: '" << Name << "': "
-                 << "ignoring instruction with tied operand '"
-                 << Tokens[i].str() << "'\n";
-        });
-      return false;
-    }
-  }
-
-  return true;
-}
-
 namespace {
-
+class AsmMatcherInfo;
 struct SubtargetFeatureInfo;
 
 /// ClassInfo - Helper class for storing the information about a particular
@@ -331,6 +167,10 @@ struct ClassInfo {
   /// MCInst; this is not valid for Token or register kinds.
   std::string RenderMethod;
 
+  /// ParserMethod - The name of the operand method to do a target specific
+  /// parsing on the operand.
+  std::string ParserMethod;
+
   /// For register classes, the records for all the registers in this class.
   std::set<Record*> Registers;
 
@@ -360,7 +200,7 @@ public:
 
       std::set<Record*> Tmp;
       std::insert_iterator< std::set<Record*> > II(Tmp, Tmp.begin());
-      std::set_intersection(Registers.begin(), Registers.end(), 
+      std::set_intersection(Registers.begin(), Registers.end(),
                             RHS.Registers.begin(), RHS.Registers.end(),
                             II);
 
@@ -380,11 +220,11 @@ public:
     const ClassInfo *RHSRoot = &RHS;
     while (!RHSRoot->SuperClasses.empty())
       RHSRoot = RHSRoot->SuperClasses.front();
-    
+
     return Root == RHSRoot;
   }
 
-  /// isSubsetOf - Test whether this class is a subset of \arg RHS; 
+  /// isSubsetOf - Test whether this class is a subset of \arg RHS;
   bool isSubsetOf(const ClassInfo &RHS) const {
     // This is a subset of RHS if it is the same class...
     if (this == &RHS)
@@ -430,32 +270,131 @@ public:
   }
 };
 
-/// InstructionInfo - Helper class for storing the necessary information for an
-/// instruction which is capable of being matched.
-struct InstructionInfo {
-  struct Operand {
+/// MatchableInfo - Helper class for storing the necessary information for an
+/// instruction or alias which is capable of being matched.
+struct MatchableInfo {
+  struct AsmOperand {
+    /// Token - This is the token that the operand came from.
+    StringRef Token;
+
     /// The unique class instance this operand should match.
     ClassInfo *Class;
 
-    /// The original operand this corresponds to, if any.
-    const CodeGenInstruction::OperandInfo *OperandInfo;
+    /// The operand name this is, if anything.
+    StringRef SrcOpName;
+
+    /// The suboperand index within SrcOpName, or -1 for the entire operand.
+    int SubOpIdx;
+
+    explicit AsmOperand(StringRef T) : Token(T), Class(0), SubOpIdx(-1) {}
+  };
+
+  /// ResOperand - This represents a single operand in the result instruction
+  /// generated by the match.  In cases (like addressing modes) where a single
+  /// assembler operand expands to multiple MCOperands, this represents the
+  /// single assembler operand, not the MCOperand.
+  struct ResOperand {
+    enum {
+      /// RenderAsmOperand - This represents an operand result that is
+      /// generated by calling the render method on the assembly operand.  The
+      /// corresponding AsmOperand is specified by AsmOperandNum.
+      RenderAsmOperand,
+
+      /// TiedOperand - This represents a result operand that is a duplicate of
+      /// a previous result operand.
+      TiedOperand,
+
+      /// ImmOperand - This represents an immediate value that is dumped into
+      /// the operand.
+      ImmOperand,
+
+      /// RegOperand - This represents a fixed register that is dumped in.
+      RegOperand
+    } Kind;
+
+    union {
+      /// This is the operand # in the AsmOperands list that this should be
+      /// copied from.
+      unsigned AsmOperandNum;
+
+      /// TiedOperandNum - This is the (earlier) result operand that should be
+      /// copied from.
+      unsigned TiedOperandNum;
+
+      /// ImmVal - This is the immediate value added to the instruction.
+      int64_t ImmVal;
+
+      /// Register - This is the register record.
+      Record *Register;
+    };
+
+    /// MINumOperands - The number of MCInst operands populated by this
+    /// operand.
+    unsigned MINumOperands;
+
+    static ResOperand getRenderedOp(unsigned AsmOpNum, unsigned NumOperands) {
+      ResOperand X;
+      X.Kind = RenderAsmOperand;
+      X.AsmOperandNum = AsmOpNum;
+      X.MINumOperands = NumOperands;
+      return X;
+    }
+
+    static ResOperand getTiedOp(unsigned TiedOperandNum) {
+      ResOperand X;
+      X.Kind = TiedOperand;
+      X.TiedOperandNum = TiedOperandNum;
+      X.MINumOperands = 1;
+      return X;
+    }
+
+    static ResOperand getImmOp(int64_t Val) {
+      ResOperand X;
+      X.Kind = ImmOperand;
+      X.ImmVal = Val;
+      X.MINumOperands = 1;
+      return X;
+    }
+
+    static ResOperand getRegOp(Record *Reg) {
+      ResOperand X;
+      X.Kind = RegOperand;
+      X.Register = Reg;
+      X.MINumOperands = 1;
+      return X;
+    }
   };
 
-  /// InstrName - The target name for this instruction.
-  std::string InstrName;
+  /// TheDef - This is the definition of the instruction or InstAlias that this
+  /// matchable came from.
+  Record *const TheDef;
+
+  /// DefRec - This is the definition that it came from.
+  PointerUnion<const CodeGenInstruction*, const CodeGenInstAlias*> DefRec;
+
+  const CodeGenInstruction *getResultInst() const {
+    if (DefRec.is<const CodeGenInstruction*>())
+      return DefRec.get<const CodeGenInstruction*>();
+    return DefRec.get<const CodeGenInstAlias*>()->ResultInst;
+  }
 
-  /// Instr - The instruction this matches.
-  const CodeGenInstruction *Instr;
+  /// ResOperands - This is the operand list that should be built for the result
+  /// MCInst.
+  std::vector<ResOperand> ResOperands;
 
   /// AsmString - The assembly string for this instruction (with variants
-  /// removed).
+  /// removed), e.g. "movsx $src, $dst".
   std::string AsmString;
 
-  /// Tokens - The tokenized assembly pattern that this instruction matches.
-  SmallVector<StringRef, 4> Tokens;
+  /// Mnemonic - This is the first token of the matched instruction, its
+  /// mnemonic.
+  StringRef Mnemonic;
 
-  /// Operands - The operands that this instruction matches.
-  SmallVector<Operand, 4> Operands;
+  /// AsmOperands - The textual operands that this instruction matches,
+  /// annotated with a class and where in the OperandList they were defined.
+  /// This directly corresponds to the tokenized AsmString after the mnemonic is
+  /// removed.
+  SmallVector<AsmOperand, 4> AsmOperands;
 
   /// Predicates - The required subtarget features to match this instruction.
   SmallVector<SubtargetFeatureInfo*, 4> RequiredFeatures;
@@ -465,29 +404,79 @@ struct InstructionInfo {
   /// function.
   std::string ConversionFnKind;
 
-  /// operator< - Compare two instructions.
-  bool operator<(const InstructionInfo &RHS) const {
-    if (Operands.size() != RHS.Operands.size())
-      return Operands.size() < RHS.Operands.size();
+  MatchableInfo(const CodeGenInstruction &CGI)
+    : TheDef(CGI.TheDef), DefRec(&CGI), AsmString(CGI.AsmString) {
+  }
+
+  MatchableInfo(const CodeGenInstAlias *Alias)
+    : TheDef(Alias->TheDef), DefRec(Alias), AsmString(Alias->AsmString) {
+  }
+
+  void Initialize(const AsmMatcherInfo &Info,
+                  SmallPtrSet<Record*, 16> &SingletonRegisters);
+
+  /// Validate - Return true if this matchable is a valid thing to match against
+  /// and perform a bunch of validity checking.
+  bool Validate(StringRef CommentDelimiter, bool Hack) const;
+
+  /// getSingletonRegisterForAsmOperand - If the specified token is a singleton
+  /// register, return the Record for it, otherwise return null.
+  Record *getSingletonRegisterForAsmOperand(unsigned i,
+                                            const AsmMatcherInfo &Info) const;
+
+  /// FindAsmOperand - Find the AsmOperand with the specified name and
+  /// suboperand index.
+  int FindAsmOperand(StringRef N, int SubOpIdx) const {
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
+      if (N == AsmOperands[i].SrcOpName &&
+          SubOpIdx == AsmOperands[i].SubOpIdx)
+        return i;
+    return -1;
+  }
+
+  /// FindAsmOperandNamed - Find the first AsmOperand with the specified name.
+  /// This does not check the suboperand index.
+  int FindAsmOperandNamed(StringRef N) const {
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
+      if (N == AsmOperands[i].SrcOpName)
+        return i;
+    return -1;
+  }
+
+  void BuildInstructionResultOperands();
+  void BuildAliasResultOperands();
+
+  /// operator< - Compare two matchables.
+  bool operator<(const MatchableInfo &RHS) const {
+    // The primary comparator is the instruction mnemonic.
+    if (Mnemonic != RHS.Mnemonic)
+      return Mnemonic < RHS.Mnemonic;
+
+    if (AsmOperands.size() != RHS.AsmOperands.size())
+      return AsmOperands.size() < RHS.AsmOperands.size();
 
     // Compare lexicographically by operand. The matcher validates that other
-    // orderings wouldn't be ambiguous using \see CouldMatchAmiguouslyWith().
-    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-      if (*Operands[i].Class < *RHS.Operands[i].Class)
+    // orderings wouldn't be ambiguous using \see CouldMatchAmbiguouslyWith().
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+      if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
         return true;
-      if (*RHS.Operands[i].Class < *Operands[i].Class)
+      if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
         return false;
     }
 
     return false;
   }
 
-  /// CouldMatchAmiguouslyWith - Check whether this instruction could
+  /// CouldMatchAmbiguouslyWith - Check whether this matchable could
   /// ambiguously match the same set of operands as \arg RHS (without being a
   /// strictly superior match).
-  bool CouldMatchAmiguouslyWith(const InstructionInfo &RHS) {
+  bool CouldMatchAmbiguouslyWith(const MatchableInfo &RHS) {
+    // The primary comparator is the instruction mnemonic.
+    if (Mnemonic != RHS.Mnemonic)
+      return false;
+
     // The number of operands is unambiguous.
-    if (Operands.size() != RHS.Operands.size())
+    if (AsmOperands.size() != RHS.AsmOperands.size())
       return false;
 
     // Otherwise, make sure the ordering of the two instructions is unambiguous
@@ -496,29 +485,31 @@ struct InstructionInfo {
 
     // Tokens and operand kinds are unambiguous (assuming a correct target
     // specific parser).
-    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-      if (Operands[i].Class->Kind != RHS.Operands[i].Class->Kind ||
-          Operands[i].Class->Kind == ClassInfo::Token)
-        if (*Operands[i].Class < *RHS.Operands[i].Class ||
-            *RHS.Operands[i].Class < *Operands[i].Class)
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
+      if (AsmOperands[i].Class->Kind != RHS.AsmOperands[i].Class->Kind ||
+          AsmOperands[i].Class->Kind == ClassInfo::Token)
+        if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class ||
+            *RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
           return false;
-    
+
     // Otherwise, this operand could commute if all operands are equivalent, or
     // there is a pair of operands that compare less than and a pair that
     // compare greater than.
     bool HasLT = false, HasGT = false;
-    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-      if (*Operands[i].Class < *RHS.Operands[i].Class)
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+      if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
         HasLT = true;
-      if (*RHS.Operands[i].Class < *Operands[i].Class)
+      if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
         HasGT = true;
     }
 
     return !(HasLT ^ HasGT);
   }
 
-public:
   void dump();
+
+private:
+  void TokenizeAsmString(const AsmMatcherInfo &Info);
 };
 
 /// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -530,26 +521,52 @@ struct SubtargetFeatureInfo {
   /// \brief An unique index assigned to represent this feature.
   unsigned Index;
 
+  SubtargetFeatureInfo(Record *D, unsigned Idx) : TheDef(D), Index(Idx) {}
+
   /// \brief The name of the enumerated constant identifying this feature.
-  std::string EnumName;
+  std::string getEnumName() const {
+    return "Feature_" + TheDef->getName();
+  }
 };
 
+struct OperandMatchEntry {
+  unsigned OperandMask;
+  MatchableInfo* MI;
+  ClassInfo *CI;
+
+  static OperandMatchEntry Create(MatchableInfo* mi, ClassInfo *ci,
+                                  unsigned opMask) {
+    OperandMatchEntry X;
+    X.OperandMask = opMask;
+    X.CI = ci;
+    X.MI = mi;
+    return X;
+  }
+};
+
+
 class AsmMatcherInfo {
 public:
+  /// Tracked Records
+  RecordKeeper &Records;
+
   /// The tablegen AsmParser record.
   Record *AsmParser;
 
-  /// The AsmParser "CommentDelimiter" value.
-  std::string CommentDelimiter;
+  /// Target - The target information.
+  CodeGenTarget &Target;
 
   /// The AsmParser "RegisterPrefix" value.
   std::string RegisterPrefix;
 
   /// The classes which are needed for matching.
   std::vector<ClassInfo*> Classes;
-  
-  /// The information on the instruction to match.
-  std::vector<InstructionInfo*> Instructions;
+
+  /// The information on the matchables to match.
+  std::vector<MatchableInfo*> Matchables;
+
+  /// Info for custom matching operands by user defined methods.
+  std::vector<OperandMatchEntry> OperandMatchInfo;
 
   /// Map of Register records to their class information.
   std::map<Record*, ClassInfo*> RegisterClasses;
@@ -572,108 +589,265 @@ private:
   ClassInfo *getTokenClass(StringRef Token);
 
   /// getOperandClass - Lookup or create the class for the given operand.
-  ClassInfo *getOperandClass(StringRef Token,
-                             const CodeGenInstruction::OperandInfo &OI);
-
-  /// getSubtargetFeature - Lookup or create the subtarget feature info for the
-  /// given operand.
-  SubtargetFeatureInfo *getSubtargetFeature(Record *Def) {
-    assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
-
-    SubtargetFeatureInfo *&Entry = SubtargetFeatures[Def];
-    if (!Entry) {
-      Entry = new SubtargetFeatureInfo;
-      Entry->TheDef = Def;
-      Entry->Index = SubtargetFeatures.size() - 1;
-      Entry->EnumName = "Feature_" + Def->getName();
-      assert(Entry->Index < 32 && "Too many subtarget features!");
-    }
-
-    return Entry;
-  }
+  ClassInfo *getOperandClass(const CGIOperandList::OperandInfo &OI,
+                             int SubOpIdx = -1);
 
   /// BuildRegisterClasses - Build the ClassInfo* instances for register
   /// classes.
-  void BuildRegisterClasses(CodeGenTarget &Target, 
-                            std::set<std::string> &SingletonRegisterNames);
+  void BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters);
 
   /// BuildOperandClasses - Build the ClassInfo* instances for user defined
   /// operand classes.
-  void BuildOperandClasses(CodeGenTarget &Target);
+  void BuildOperandClasses();
+
+  void BuildInstructionOperandReference(MatchableInfo *II, StringRef OpName,
+                                        unsigned AsmOpIdx);
+  void BuildAliasOperandReference(MatchableInfo *II, StringRef OpName,
+                                  MatchableInfo::AsmOperand &Op);
 
 public:
-  AsmMatcherInfo(Record *_AsmParser);
+  AsmMatcherInfo(Record *AsmParser,
+                 CodeGenTarget &Target,
+                 RecordKeeper &Records);
 
   /// BuildInfo - Construct the various tables used during matching.
-  void BuildInfo(CodeGenTarget &Target);
+  void BuildInfo();
+
+  /// BuildOperandMatchInfo - Build the necessary information to handle user
+  /// defined operand parsing methods.
+  void BuildOperandMatchInfo();
+
+  /// getSubtargetFeature - Lookup or create the subtarget feature info for the
+  /// given operand.
+  SubtargetFeatureInfo *getSubtargetFeature(Record *Def) const {
+    assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
+    std::map<Record*, SubtargetFeatureInfo*>::const_iterator I =
+      SubtargetFeatures.find(Def);
+    return I == SubtargetFeatures.end() ? 0 : I->second;
+  }
+
+  RecordKeeper &getRecords() const {
+    return Records;
+  }
 };
 
 }
 
-void InstructionInfo::dump() {
-  errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"'
-         << ", tokens:[";
-  for (unsigned i = 0, e = Tokens.size(); i != e; ++i) {
-    errs() << Tokens[i];
-    if (i + 1 != e)
-      errs() << ", ";
-  }
-  errs() << "]\n";
+void MatchableInfo::dump() {
+  errs() << TheDef->getName() << " -- " << "flattened:\"" << AsmString <<"\"\n";
 
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-    Operand &Op = Operands[i];
+  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+    AsmOperand &Op = AsmOperands[i];
     errs() << "  op[" << i << "] = " << Op.Class->ClassName << " - ";
-    if (Op.Class->Kind == ClassInfo::Token) {
-      errs() << '\"' << Tokens[i] << "\"\n";
-      continue;
+    errs() << '\"' << Op.Token << "\"\n";
+  }
+}
+
+void MatchableInfo::Initialize(const AsmMatcherInfo &Info,
+                               SmallPtrSet<Record*, 16> &SingletonRegisters) {
+  // TODO: Eventually support asmparser for Variant != 0.
+  AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, 0);
+
+  TokenizeAsmString(Info);
+
+  // Compute the require features.
+  std::vector<Record*> Predicates =TheDef->getValueAsListOfDefs("Predicates");
+  for (unsigned i = 0, e = Predicates.size(); i != e; ++i)
+    if (SubtargetFeatureInfo *Feature =
+        Info.getSubtargetFeature(Predicates[i]))
+      RequiredFeatures.push_back(Feature);
+
+  // Collect singleton registers, if used.
+  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+    if (Record *Reg = getSingletonRegisterForAsmOperand(i, Info))
+      SingletonRegisters.insert(Reg);
+  }
+}
+
+/// TokenizeAsmString - Tokenize a simplified assembly string.
+void MatchableInfo::TokenizeAsmString(const AsmMatcherInfo &Info) {
+  StringRef String = AsmString;
+  unsigned Prev = 0;
+  bool InTok = true;
+  for (unsigned i = 0, e = String.size(); i != e; ++i) {
+    switch (String[i]) {
+    case '[':
+    case ']':
+    case '*':
+    case '!':
+    case ' ':
+    case '\t':
+    case ',':
+      if (InTok) {
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+        InTok = false;
+      }
+      if (!isspace(String[i]) && String[i] != ',')
+        AsmOperands.push_back(AsmOperand(String.substr(i, 1)));
+      Prev = i + 1;
+      break;
+
+    case '\\':
+      if (InTok) {
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+        InTok = false;
+      }
+      ++i;
+      assert(i != String.size() && "Invalid quoted character");
+      AsmOperands.push_back(AsmOperand(String.substr(i, 1)));
+      Prev = i + 1;
+      break;
+
+    case '$': {
+      if (InTok) {
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+        InTok = false;
+      }
+
+      // If this isn't "${", treat like a normal token.
+      if (i + 1 == String.size() || String[i + 1] != '{') {
+        Prev = i;
+        break;
+      }
+
+      StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
+      assert(End != String.end() && "Missing brace in operand reference!");
+      size_t EndPos = End - String.begin();
+      AsmOperands.push_back(AsmOperand(String.slice(i, EndPos+1)));
+      Prev = EndPos + 1;
+      i = EndPos;
+      break;
     }
 
-    if (!Op.OperandInfo) {
-      errs() << "(singleton register)\n";
-      continue;
+    case '.':
+      if (InTok)
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+      Prev = i;
+      InTok = true;
+      break;
+
+    default:
+      InTok = true;
     }
+  }
+  if (InTok && Prev != String.size())
+    AsmOperands.push_back(AsmOperand(String.substr(Prev)));
+
+  // The first token of the instruction is the mnemonic, which must be a
+  // simple string, not a $foo variable or a singleton register.
+  assert(!AsmOperands.empty() && "Instruction has no tokens?");
+  Mnemonic = AsmOperands[0].Token;
+  if (Mnemonic[0] == '$' || getSingletonRegisterForAsmOperand(0, Info))
+    throw TGError(TheDef->getLoc(),
+                  "Invalid instruction mnemonic '" + Mnemonic.str() + "'!");
+
+  // Remove the first operand, it is tracked in the mnemonic field.
+  AsmOperands.erase(AsmOperands.begin());
+}
 
-    const CodeGenInstruction::OperandInfo &OI = *Op.OperandInfo;
-    errs() << OI.Name << " " << OI.Rec->getName()
-           << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n";
+bool MatchableInfo::Validate(StringRef CommentDelimiter, bool Hack) const {
+  // Reject matchables with no .s string.
+  if (AsmString.empty())
+    throw TGError(TheDef->getLoc(), "instruction with empty asm string");
+
+  // Reject any matchables with a newline in them, they should be marked
+  // isCodeGenOnly if they are pseudo instructions.
+  if (AsmString.find('\n') != std::string::npos)
+    throw TGError(TheDef->getLoc(),
+                  "multiline instruction is not valid for the asmparser, "
+                  "mark it isCodeGenOnly");
+
+  // Remove comments from the asm string.  We know that the asmstring only
+  // has one line.
+  if (!CommentDelimiter.empty() &&
+      StringRef(AsmString).find(CommentDelimiter) != StringRef::npos)
+    throw TGError(TheDef->getLoc(),
+                  "asmstring for instruction has comment character in it, "
+                  "mark it isCodeGenOnly");
+
+  // Reject matchables with operand modifiers, these aren't something we can
+  // handle, the target should be refactored to use operands instead of
+  // modifiers.
+  //
+  // Also, check for instructions which reference the operand multiple times;
+  // this implies a constraint we would not honor.
+  std::set<std::string> OperandNames;
+  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+    StringRef Tok = AsmOperands[i].Token;
+    if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
+      throw TGError(TheDef->getLoc(),
+                    "matchable with operand modifier '" + Tok.str() +
+                    "' not supported by asm matcher.  Mark isCodeGenOnly!");
+
+    // Verify that any operand is only mentioned once.
+    // We reject aliases and ignore instructions for now.
+    if (Tok[0] == '$' && !OperandNames.insert(Tok).second) {
+      if (!Hack)
+        throw TGError(TheDef->getLoc(),
+                      "ERROR: matchable with tied operand '" + Tok.str() +
+                      "' can never be matched!");
+      // FIXME: Should reject these.  The ARM backend hits this with $lane in a
+      // bunch of instructions.  It is unclear what the right answer is.
+      DEBUG({
+        errs() << "warning: '" << TheDef->getName() << "': "
+               << "ignoring instruction with tied operand '"
+               << Tok.str() << "'\n";
+      });
+      return false;
+    }
   }
+
+  return true;
+}
+
+/// getSingletonRegisterForAsmOperand - If the specified token is a singleton
+/// register, return the register name, otherwise return a null StringRef.
+Record *MatchableInfo::
+getSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info) const{
+  StringRef Tok = AsmOperands[i].Token;
+  if (!Tok.startswith(Info.RegisterPrefix))
+    return 0;
+
+  StringRef RegName = Tok.substr(Info.RegisterPrefix.size());
+  if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
+    return Reg->TheDef;
+
+  // If there is no register prefix (i.e. "%" in "%eax"), then this may
+  // be some random non-register token, just ignore it.
+  if (Info.RegisterPrefix.empty())
+    return 0;
+
+  // Otherwise, we have something invalid prefixed with the register prefix,
+  // such as %foo.
+  std::string Err = "unable to find register for '" + RegName.str() +
+  "' (which matches register prefix)";
+  throw TGError(TheDef->getLoc(), Err);
 }
 
 static std::string getEnumNameForToken(StringRef Str) {
   std::string Res;
-  
+
   for (StringRef::iterator it = Str.begin(), ie = Str.end(); it != ie; ++it) {
     switch (*it) {
     case '*': Res += "_STAR_"; break;
     case '%': Res += "_PCT_"; break;
     case ':': Res += "_COLON_"; break;
-
+    case '!': Res += "_EXCLAIM_"; break;
+    case '.': Res += "_DOT_"; break;
     default:
-      if (isalnum(*it))  {
+      if (isalnum(*it))
         Res += *it;
-      } else {
+      else
         Res += "_" + utostr((unsigned) *it) + "_";
-      }
     }
   }
 
   return Res;
 }
 
-/// getRegisterRecord - Get the register record for \arg name, or 0.
-static Record *getRegisterRecord(CodeGenTarget &Target, StringRef Name) {
-  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Target.getRegisters()[i];
-    if (Name == Reg.TheDef->getValueAsString("AsmName"))
-      return Reg.TheDef;
-  }
-
-  return 0;
-}
-
 ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) {
   ClassInfo *&Entry = TokenClasses[Token];
-  
+
   if (!Entry) {
     Entry = new ClassInfo();
     Entry->Kind = ClassInfo::Token;
@@ -682,6 +856,7 @@ ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) {
     Entry->ValueName = Token;
     Entry->PredicateMethod = "<invalid>";
     Entry->RenderMethod = "<invalid>";
+    Entry->ParserMethod = "";
     Classes.push_back(Entry);
   }
 
@@ -689,65 +864,58 @@ ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) {
 }
 
 ClassInfo *
-AsmMatcherInfo::getOperandClass(StringRef Token,
-                                const CodeGenInstruction::OperandInfo &OI) {
-  if (OI.Rec->isSubClassOf("RegisterClass")) {
-    ClassInfo *CI = RegisterClassClasses[OI.Rec];
-
-    if (!CI) {
-      PrintError(OI.Rec->getLoc(), "register class has no class info!");
-      throw std::string("ERROR: Missing register class!");
-    }
-
-    return CI;
+AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
+                                int SubOpIdx) {
+  Record *Rec = OI.Rec;
+  if (SubOpIdx != -1)
+    Rec = dynamic_cast<DefInit*>(OI.MIOperandInfo->getArg(SubOpIdx))->getDef();
+
+  if (Rec->isSubClassOf("RegisterClass")) {
+    if (ClassInfo *CI = RegisterClassClasses[Rec])
+      return CI;
+    throw TGError(Rec->getLoc(), "register class has no class info!");
   }
 
-  assert(OI.Rec->isSubClassOf("Operand") && "Unexpected operand!");
-  Record *MatchClass = OI.Rec->getValueAsDef("ParserMatchClass");
-  ClassInfo *CI = AsmOperandClasses[MatchClass];
-
-  if (!CI) {
-    PrintError(OI.Rec->getLoc(), "operand has no match class!");
-    throw std::string("ERROR: Missing match class!");
-  }
+  assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
+  Record *MatchClass = Rec->getValueAsDef("ParserMatchClass");
+  if (ClassInfo *CI = AsmOperandClasses[MatchClass])
+    return CI;
 
-  return CI;
+  throw TGError(Rec->getLoc(), "operand has no match class!");
 }
 
-void AsmMatcherInfo::BuildRegisterClasses(CodeGenTarget &Target,
-                                          std::set<std::string>
-                                            &SingletonRegisterNames) {
-  std::vector<CodeGenRegisterClass> RegisterClasses;
-  std::vector<CodeGenRegister> Registers;
-
-  RegisterClasses = Target.getRegisterClasses();
-  Registers = Target.getRegisters();
+void AsmMatcherInfo::
+BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegisterClass> &RegClassList =
+    Target.getRegisterClasses();
 
   // The register sets used for matching.
   std::set< std::set<Record*> > RegisterSets;
 
-  // Gather the defined sets.  
-  for (std::vector<CodeGenRegisterClass>::iterator it = RegisterClasses.begin(),
-         ie = RegisterClasses.end(); it != ie; ++it)
+  // Gather the defined sets.
+  for (std::vector<CodeGenRegisterClass>::const_iterator it =
+       RegClassList.begin(), ie = RegClassList.end(); it != ie; ++it)
     RegisterSets.insert(std::set<Record*>(it->Elements.begin(),
                                           it->Elements.end()));
 
   // Add any required singleton sets.
-  for (std::set<std::string>::iterator it = SingletonRegisterNames.begin(),
-         ie = SingletonRegisterNames.end(); it != ie; ++it)
-    if (Record *Rec = getRegisterRecord(Target, *it))
-      RegisterSets.insert(std::set<Record*>(&Rec, &Rec + 1));
-         
+  for (SmallPtrSet<Record*, 16>::iterator it = SingletonRegisters.begin(),
+       ie = SingletonRegisters.end(); it != ie; ++it) {
+    Record *Rec = *it;
+    RegisterSets.insert(std::set<Record*>(&Rec, &Rec + 1));
+  }
+
   // Introduce derived sets where necessary (when a register does not determine
   // a unique register set class), and build the mapping of registers to the set
   // they should classify to.
   std::map<Record*, std::set<Record*> > RegisterMap;
-  for (std::vector<CodeGenRegister>::iterator it = Registers.begin(),
+  for (std::vector<CodeGenRegister>::const_iterator it = Registers.begin(),
          ie = Registers.end(); it != ie; ++it) {
-    CodeGenRegister &CGR = *it;
+    const CodeGenRegister &CGR = *it;
     // Compute the intersection of all sets containing this register.
     std::set<Record*> ContainingSet;
-    
+
     for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
            ie = RegisterSets.end(); it != ie; ++it) {
       if (!it->count(CGR.TheDef))
@@ -755,14 +923,14 @@ void AsmMatcherInfo::BuildRegisterClasses(CodeGenTarget &Target,
 
       if (ContainingSet.empty()) {
         ContainingSet = *it;
-      } else {
-        std::set<Record*> Tmp;
-        std::swap(Tmp, ContainingSet);
-        std::insert_iterator< std::set<Record*> > II(ContainingSet,
-                                                     ContainingSet.begin());
-        std::set_intersection(Tmp.begin(), Tmp.end(), it->begin(), it->end(),
-                              II);
+        continue;
       }
+
+      std::set<Record*> Tmp;
+      std::swap(Tmp, ContainingSet);
+      std::insert_iterator< std::set<Record*> > II(ContainingSet,
+                                                   ContainingSet.begin());
+      std::set_intersection(Tmp.begin(), Tmp.end(), it->begin(), it->end(), II);
     }
 
     if (!ContainingSet.empty()) {
@@ -795,14 +963,14 @@ void AsmMatcherInfo::BuildRegisterClasses(CodeGenTarget &Target,
     ClassInfo *CI = RegisterSetClasses[*it];
     for (std::set< std::set<Record*> >::iterator it2 = RegisterSets.begin(),
            ie2 = RegisterSets.end(); it2 != ie2; ++it2)
-      if (*it != *it2 && 
+      if (*it != *it2 &&
           std::includes(it2->begin(), it2->end(), it->begin(), it->end()))
         CI->SuperClasses.push_back(RegisterSetClasses[*it2]);
   }
 
   // Name the register classes which correspond to a user defined RegisterClass.
-  for (std::vector<CodeGenRegisterClass>::iterator it = RegisterClasses.begin(),
-         ie = RegisterClasses.end(); it != ie; ++it) {
+  for (std::vector<CodeGenRegisterClass>::const_iterator
+       it = RegClassList.begin(), ie = RegClassList.end(); it != ie; ++it) {
     ClassInfo *CI = RegisterSetClasses[std::set<Record*>(it->Elements.begin(),
                                                          it->Elements.end())];
     if (CI->ValueName.empty()) {
@@ -818,36 +986,35 @@ void AsmMatcherInfo::BuildRegisterClasses(CodeGenTarget &Target,
   // Populate the map for individual registers.
   for (std::map<Record*, std::set<Record*> >::iterator it = RegisterMap.begin(),
          ie = RegisterMap.end(); it != ie; ++it)
-    this->RegisterClasses[it->first] = RegisterSetClasses[it->second];
+    RegisterClasses[it->first] = RegisterSetClasses[it->second];
 
   // Name the register classes which correspond to singleton registers.
-  for (std::set<std::string>::iterator it = SingletonRegisterNames.begin(),
-         ie = SingletonRegisterNames.end(); it != ie; ++it) {
-    if (Record *Rec = getRegisterRecord(Target, *it)) {
-      ClassInfo *CI = this->RegisterClasses[Rec];
-      assert(CI && "Missing singleton register class info!");
-
-      if (CI->ValueName.empty()) {
-        CI->ClassName = Rec->getName();
-        CI->Name = "MCK_" + Rec->getName();
-        CI->ValueName = Rec->getName();
-      } else
-        CI->ValueName = CI->ValueName + "," + Rec->getName();
-    }
+  for (SmallPtrSet<Record*, 16>::iterator it = SingletonRegisters.begin(),
+         ie = SingletonRegisters.end(); it != ie; ++it) {
+    Record *Rec = *it;
+    ClassInfo *CI = RegisterClasses[Rec];
+    assert(CI && "Missing singleton register class info!");
+
+    if (CI->ValueName.empty()) {
+      CI->ClassName = Rec->getName();
+      CI->Name = "MCK_" + Rec->getName();
+      CI->ValueName = Rec->getName();
+    } else
+      CI->ValueName = CI->ValueName + "," + Rec->getName();
   }
 }
 
-void AsmMatcherInfo::BuildOperandClasses(CodeGenTarget &Target) {
-  std::vector<Record*> AsmOperands;
-  AsmOperands = Records.getAllDerivedDefinitions("AsmOperandClass");
+void AsmMatcherInfo::BuildOperandClasses() {
+  std::vector<Record*> AsmOperands =
+    Records.getAllDerivedDefinitions("AsmOperandClass");
 
   // Pre-populate AsmOperandClasses map.
-  for (std::vector<Record*>::iterator it = AsmOperands.begin(), 
+  for (std::vector<Record*>::iterator it = AsmOperands.begin(),
          ie = AsmOperands.end(); it != ie; ++it)
     AsmOperandClasses[*it] = new ClassInfo();
 
   unsigned Index = 0;
-  for (std::vector<Record*>::iterator it = AsmOperands.begin(), 
+  for (std::vector<Record*>::iterator it = AsmOperands.begin(),
          ie = AsmOperands.end(); it != ie; ++it, ++Index) {
     ClassInfo *CI = AsmOperandClasses[*it];
     CI->Kind = ClassInfo::UserClass0 + Index;
@@ -875,7 +1042,7 @@ void AsmMatcherInfo::BuildOperandClasses(CodeGenTarget &Target) {
     if (StringInit *SI = dynamic_cast<StringInit*>(PMName)) {
       CI->PredicateMethod = SI->getValue();
     } else {
-      assert(dynamic_cast<UnsetInit*>(PMName) && 
+      assert(dynamic_cast<UnsetInit*>(PMName) &&
              "Unexpected PredicateMethod field!");
       CI->PredicateMethod = "is" + CI->ClassName;
     }
@@ -890,128 +1057,192 @@ void AsmMatcherInfo::BuildOperandClasses(CodeGenTarget &Target) {
       CI->RenderMethod = "add" + CI->ClassName + "Operands";
     }
 
+    // Get the parse method name or leave it as empty.
+    Init *PRMName = (*it)->getValueInit("ParserMethod");
+    if (StringInit *SI = dynamic_cast<StringInit*>(PRMName))
+      CI->ParserMethod = SI->getValue();
+
     AsmOperandClasses[*it] = CI;
     Classes.push_back(CI);
   }
 }
 
-AsmMatcherInfo::AsmMatcherInfo(Record *_AsmParser) 
-  : AsmParser(_AsmParser),
-    CommentDelimiter(AsmParser->getValueAsString("CommentDelimiter")),
-    RegisterPrefix(AsmParser->getValueAsString("RegisterPrefix"))
-{
+AsmMatcherInfo::AsmMatcherInfo(Record *asmParser,
+                               CodeGenTarget &target,
+                               RecordKeeper &records)
+  : Records(records), AsmParser(asmParser), Target(target),
+    RegisterPrefix(AsmParser->getValueAsString("RegisterPrefix")) {
 }
 
-void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
+/// BuildOperandMatchInfo - Build the necessary information to handle user
+/// defined operand parsing methods.
+void AsmMatcherInfo::BuildOperandMatchInfo() {
+
+  /// Map containing a mask with all operands indicies that can be found for
+  /// that class inside a instruction.
+  std::map<ClassInfo*, unsigned> OpClassMask;
+
+  for (std::vector<MatchableInfo*>::const_iterator it =
+       Matchables.begin(), ie = Matchables.end();
+       it != ie; ++it) {
+    MatchableInfo &II = **it;
+    OpClassMask.clear();
+
+    // Keep track of all operands of this instructions which belong to the
+    // same class.
+    for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
+      MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
+      if (Op.Class->ParserMethod.empty())
+        continue;
+      unsigned &OperandMask = OpClassMask[Op.Class];
+      OperandMask |= (1 << i);
+    }
+
+    // Generate operand match info for each mnemonic/operand class pair.
+    for (std::map<ClassInfo*, unsigned>::iterator iit = OpClassMask.begin(),
+         iie = OpClassMask.end(); iit != iie; ++iit) {
+      unsigned OpMask = iit->second;
+      ClassInfo *CI = iit->first;
+      OperandMatchInfo.push_back(OperandMatchEntry::Create(&II, CI, OpMask));
+    }
+  }
+}
+
+void AsmMatcherInfo::BuildInfo() {
+  // Build information about all of the AssemblerPredicates.
+  std::vector<Record*> AllPredicates =
+    Records.getAllDerivedDefinitions("Predicate");
+  for (unsigned i = 0, e = AllPredicates.size(); i != e; ++i) {
+    Record *Pred = AllPredicates[i];
+    // Ignore predicates that are not intended for the assembler.
+    if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
+      continue;
+
+    if (Pred->getName().empty())
+      throw TGError(Pred->getLoc(), "Predicate has no name!");
+
+    unsigned FeatureNo = SubtargetFeatures.size();
+    SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo);
+    assert(FeatureNo < 32 && "Too many subtarget features!");
+  }
+
+  StringRef CommentDelimiter = AsmParser->getValueAsString("CommentDelimiter");
+
   // Parse the instructions; we need to do this first so that we can gather the
   // singleton register classes.
-  std::set<std::string> SingletonRegisterNames;
-  
-  const std::vector<const CodeGenInstruction*> &InstrList =
-    Target.getInstructionsByEnumValue();
-  
-  for (unsigned i = 0, e = InstrList.size(); i != e; ++i) {
-    const CodeGenInstruction &CGI = *InstrList[i];
+  SmallPtrSet<Record*, 16> SingletonRegisters;
+  for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
+       E = Target.inst_end(); I != E; ++I) {
+    const CodeGenInstruction &CGI = **I;
 
+    // If the tblgen -match-prefix option is specified (for tblgen hackers),
+    // filter the set of instructions we consider.
     if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix))
       continue;
 
-    OwningPtr<InstructionInfo> II(new InstructionInfo());
-    
-    II->InstrName = CGI.TheDef->getName();
-    II->Instr = &CGI;
-    II->AsmString = FlattenVariants(CGI.AsmString, 0);
-
-    // Remove comments from the asm string.
-    if (!CommentDelimiter.empty()) {
-      size_t Idx = StringRef(II->AsmString).find(CommentDelimiter);
-      if (Idx != StringRef::npos)
-        II->AsmString = II->AsmString.substr(0, Idx);
-    }
-
-    TokenizeAsmString(II->AsmString, II->Tokens);
-
-    // Ignore instructions which shouldn't be matched.
-    if (!IsAssemblerInstruction(CGI.TheDef->getName(), CGI, II->Tokens))
+    // Ignore "codegen only" instructions.
+    if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
       continue;
 
-    // Collect singleton registers, if used.
-    if (!RegisterPrefix.empty()) {
-      for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
-        if (II->Tokens[i].startswith(RegisterPrefix)) {
-          StringRef RegName = II->Tokens[i].substr(RegisterPrefix.size());
-          Record *Rec = getRegisterRecord(Target, RegName);
-          
-          if (!Rec) {
-            std::string Err = "unable to find register for '" + RegName.str() + 
-              "' (which matches register prefix)";
-            throw TGError(CGI.TheDef->getLoc(), Err);
-          }
-
-          SingletonRegisterNames.insert(RegName);
+    // Validate the operand list to ensure we can handle this instruction.
+    for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+      const CGIOperandList::OperandInfo &OI = CGI.Operands[i];
+
+      // Validate tied operands.
+      if (OI.getTiedRegister() != -1) {
+        // If we have a tied operand that consists of multiple MCOperands,
+        // reject it.  We reject aliases and ignore instructions for now.
+        if (OI.MINumOperands != 1) {
+          // FIXME: Should reject these.  The ARM backend hits this with $lane
+          // in a bunch of instructions. It is unclear what the right answer is.
+          DEBUG({
+            errs() << "warning: '" << CGI.TheDef->getName() << "': "
+            << "ignoring instruction with multi-operand tied operand '"
+            << OI.Name << "'\n";
+          });
+          continue;
         }
       }
     }
 
-    // Compute the require features.
-    ListInit *Predicates = CGI.TheDef->getValueAsListInit("Predicates");
-    for (unsigned i = 0, e = Predicates->getSize(); i != e; ++i) {
-      if (DefInit *Pred = dynamic_cast<DefInit*>(Predicates->getElement(i))) {
-        // Ignore OptForSize and OptForSpeed, they aren't really requirements,
-        // rather they are hints to isel.
-        //
-        // FIXME: Find better way to model this.
-        if (Pred->getDef()->getName() == "OptForSize" ||
-            Pred->getDef()->getName() == "OptForSpeed")
-          continue;
+    OwningPtr<MatchableInfo> II(new MatchableInfo(CGI));
 
-        // FIXME: Total hack; for now, we just limit ourselves to In32BitMode
-        // and In64BitMode, because we aren't going to have the right feature
-        // masks for SSE and friends. We need to decide what we are going to do
-        // about CPU subtypes to implement this the right way.
-        if (Pred->getDef()->getName() != "In32BitMode" &&
-            Pred->getDef()->getName() != "In64BitMode")
-          continue;
+    II->Initialize(*this, SingletonRegisters);
 
-        II->RequiredFeatures.push_back(getSubtargetFeature(Pred->getDef()));
-      }
-    }
+    // Ignore instructions which shouldn't be matched and diagnose invalid
+    // instruction definitions with an error.
+    if (!II->Validate(CommentDelimiter, true))
+      continue;
 
-    Instructions.push_back(II.take());
+    // Ignore "Int_*" and "*_Int" instructions, which are internal aliases.
+    //
+    // FIXME: This is a total hack.
+    if (StringRef(II->TheDef->getName()).startswith("Int_") ||
+        StringRef(II->TheDef->getName()).endswith("_Int"))
+      continue;
+
+     Matchables.push_back(II.take());
+  }
+
+  // Parse all of the InstAlias definitions and stick them in the list of
+  // matchables.
+  std::vector<Record*> AllInstAliases =
+    Records.getAllDerivedDefinitions("InstAlias");
+  for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
+    CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target);
+
+    // If the tblgen -match-prefix option is specified (for tblgen hackers),
+    // filter the set of instruction aliases we consider, based on the target
+    // instruction.
+    if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith(
+          MatchPrefix))
+      continue;
+
+    OwningPtr<MatchableInfo> II(new MatchableInfo(Alias));
+
+    II->Initialize(*this, SingletonRegisters);
+
+    // Validate the alias definitions.
+    II->Validate(CommentDelimiter, false);
+
+    Matchables.push_back(II.take());
   }
 
   // Build info for the register classes.
-  BuildRegisterClasses(Target, SingletonRegisterNames);
+  BuildRegisterClasses(SingletonRegisters);
 
   // Build info for the user defined assembly operand classes.
-  BuildOperandClasses(Target);
+  BuildOperandClasses();
 
-  // Build the instruction information.
-  for (std::vector<InstructionInfo*>::iterator it = Instructions.begin(),
-         ie = Instructions.end(); it != ie; ++it) {
-    InstructionInfo *II = *it;
+  // Build the information about matchables, now that we have fully formed
+  // classes.
+  for (std::vector<MatchableInfo*>::iterator it = Matchables.begin(),
+         ie = Matchables.end(); it != ie; ++it) {
+    MatchableInfo *II = *it;
 
-    for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
-      StringRef Token = II->Tokens[i];
+    // Parse the tokens after the mnemonic.
+    // Note: BuildInstructionOperandReference may insert new AsmOperands, so
+    // don't precompute the loop bound.
+    for (unsigned i = 0; i != II->AsmOperands.size(); ++i) {
+      MatchableInfo::AsmOperand &Op = II->AsmOperands[i];
+      StringRef Token = Op.Token;
 
       // Check for singleton registers.
-      if (!RegisterPrefix.empty() && Token.startswith(RegisterPrefix)) {
-        StringRef RegName = II->Tokens[i].substr(RegisterPrefix.size());
-        InstructionInfo::Operand Op;
-        Op.Class = RegisterClasses[getRegisterRecord(Target, RegName)];
-        Op.OperandInfo = 0;
+      if (Record *RegRecord = II->getSingletonRegisterForAsmOperand(i, *this)) {
+        Op.Class = RegisterClasses[RegRecord];
         assert(Op.Class && Op.Class->Registers.size() == 1 &&
                "Unexpected class for singleton register");
-        II->Operands.push_back(Op);
         continue;
       }
 
       // Check for simple tokens.
       if (Token[0] != '$') {
-        InstructionInfo::Operand Op;
         Op.Class = getTokenClass(Token);
-        Op.OperandInfo = 0;
-        II->Operands.push_back(Op);
+        continue;
+      }
+
+      if (Token.size() > 1 && isdigit(Token[1])) {
+        Op.Class = getTokenClass(Token);
         continue;
       }
 
@@ -1022,58 +1253,204 @@ void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
       else
         OperandName = Token.substr(1);
 
-      // Map this token to an operand. FIXME: Move elsewhere.
-      unsigned Idx;
-      try {
-        Idx = II->Instr->getOperandNamed(OperandName);
-      } catch(...) {
-        throw std::string("error: unable to find operand: '" + 
-                          OperandName.str() + "'");
-      }
+      if (II->DefRec.is<const CodeGenInstruction*>())
+        BuildInstructionOperandReference(II, OperandName, i);
+      else
+        BuildAliasOperandReference(II, OperandName, Op);
+    }
 
-      // FIXME: This is annoying, the named operand may be tied (e.g.,
-      // XCHG8rm). What we want is the untied operand, which we now have to
-      // grovel for. Only worry about this for single entry operands, we have to
-      // clean this up anyway.
-      const CodeGenInstruction::OperandInfo *OI = &II->Instr->OperandList[Idx];
-      if (OI->Constraints[0].isTied()) {
-        unsigned TiedOp = OI->Constraints[0].getTiedOperand();
-
-        // The tied operand index is an MIOperand index, find the operand that
-        // contains it.
-        for (unsigned i = 0, e = II->Instr->OperandList.size(); i != e; ++i) {
-          if (II->Instr->OperandList[i].MIOperandNo == TiedOp) {
-            OI = &II->Instr->OperandList[i];
-            break;
-          }
-        }
+    if (II->DefRec.is<const CodeGenInstruction*>())
+      II->BuildInstructionResultOperands();
+    else
+      II->BuildAliasResultOperands();
+  }
 
-        assert(OI && "Unable to find tied operand target!");
-      }
+  // Reorder classes so that classes preceed super classes.
+  std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
+}
 
-      InstructionInfo::Operand Op;
-      Op.Class = getOperandClass(Token, *OI);
-      Op.OperandInfo = OI;
-      II->Operands.push_back(Op);
+/// BuildInstructionOperandReference - The specified operand is a reference to a
+/// named operand such as $src.  Resolve the Class and OperandInfo pointers.
+void AsmMatcherInfo::
+BuildInstructionOperandReference(MatchableInfo *II,
+                                 StringRef OperandName,
+                                 unsigned AsmOpIdx) {
+  const CodeGenInstruction &CGI = *II->DefRec.get<const CodeGenInstruction*>();
+  const CGIOperandList &Operands = CGI.Operands;
+  MatchableInfo::AsmOperand *Op = &II->AsmOperands[AsmOpIdx];
+
+  // Map this token to an operand.
+  unsigned Idx;
+  if (!Operands.hasOperandNamed(OperandName, Idx))
+    throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" +
+                  OperandName.str() + "'");
+
+  // If the instruction operand has multiple suboperands, but the parser
+  // match class for the asm operand is still the default "ImmAsmOperand",
+  // then handle each suboperand separately.
+  if (Op->SubOpIdx == -1 && Operands[Idx].MINumOperands > 1) {
+    Record *Rec = Operands[Idx].Rec;
+    assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
+    Record *MatchClass = Rec->getValueAsDef("ParserMatchClass");
+    if (MatchClass && MatchClass->getValueAsString("Name") == "Imm") {
+      // Insert remaining suboperands after AsmOpIdx in II->AsmOperands.
+      StringRef Token = Op->Token; // save this in case Op gets moved
+      for (unsigned SI = 1, SE = Operands[Idx].MINumOperands; SI != SE; ++SI) {
+        MatchableInfo::AsmOperand NewAsmOp(Token);
+        NewAsmOp.SubOpIdx = SI;
+        II->AsmOperands.insert(II->AsmOperands.begin()+AsmOpIdx+SI, NewAsmOp);
+      }
+      // Replace Op with first suboperand.
+      Op = &II->AsmOperands[AsmOpIdx]; // update the pointer in case it moved
+      Op->SubOpIdx = 0;
     }
   }
 
-  // Reorder classes so that classes preceed super classes.
-  std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
+  // Set up the operand class.
+  Op->Class = getOperandClass(Operands[Idx], Op->SubOpIdx);
+
+  // If the named operand is tied, canonicalize it to the untied operand.
+  // For example, something like:
+  //   (outs GPR:$dst), (ins GPR:$src)
+  // with an asmstring of
+  //   "inc $src"
+  // we want to canonicalize to:
+  //   "inc $dst"
+  // so that we know how to provide the $dst operand when filling in the result.
+  int OITied = Operands[Idx].getTiedRegister();
+  if (OITied != -1) {
+    // The tied operand index is an MIOperand index, find the operand that
+    // contains it.
+    std::pair<unsigned, unsigned> Idx = Operands.getSubOperandNumber(OITied);
+    OperandName = Operands[Idx.first].Name;
+    Op->SubOpIdx = Idx.second;
+  }
+
+  Op->SrcOpName = OperandName;
 }
 
-static std::pair<unsigned, unsigned> *
-GetTiedOperandAtIndex(SmallVectorImpl<std::pair<unsigned, unsigned> > &List,
-                      unsigned Index) {
-  for (unsigned i = 0, e = List.size(); i != e; ++i)
-    if (Index == List[i].first)
-      return &List[i];
+/// BuildAliasOperandReference - When parsing an operand reference out of the
+/// matching string (e.g. "movsx $src, $dst"), determine what the class of the
+/// operand reference is by looking it up in the result pattern definition.
+void AsmMatcherInfo::BuildAliasOperandReference(MatchableInfo *II,
+                                                StringRef OperandName,
+                                                MatchableInfo::AsmOperand &Op) {
+  const CodeGenInstAlias &CGA = *II->DefRec.get<const CodeGenInstAlias*>();
+
+  // Set up the operand class.
+  for (unsigned i = 0, e = CGA.ResultOperands.size(); i != e; ++i)
+    if (CGA.ResultOperands[i].isRecord() &&
+        CGA.ResultOperands[i].getName() == OperandName) {
+      // It's safe to go with the first one we find, because CodeGenInstAlias
+      // validates that all operands with the same name have the same record.
+      unsigned ResultIdx = CGA.ResultInstOperandIndex[i].first;
+      Op.SubOpIdx = CGA.ResultInstOperandIndex[i].second;
+      Op.Class = getOperandClass(CGA.ResultInst->Operands[ResultIdx],
+                                 Op.SubOpIdx);
+      Op.SrcOpName = OperandName;
+      return;
+    }
 
-  return 0;
+  throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" +
+                OperandName.str() + "'");
 }
 
-static void EmitConvertToMCInst(CodeGenTarget &Target,
-                                std::vector<InstructionInfo*> &Infos,
+void MatchableInfo::BuildInstructionResultOperands() {
+  const CodeGenInstruction *ResultInst = getResultInst();
+
+  // Loop over all operands of the result instruction, determining how to
+  // populate them.
+  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+    const CGIOperandList::OperandInfo &OpInfo = ResultInst->Operands[i];
+
+    // If this is a tied operand, just copy from the previously handled operand.
+    int TiedOp = OpInfo.getTiedRegister();
+    if (TiedOp != -1) {
+      ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
+      continue;
+    }
+
+    // Find out what operand from the asmparser this MCInst operand comes from.
+    int SrcOperand = FindAsmOperandNamed(OpInfo.Name);
+    if (OpInfo.Name.empty() || SrcOperand == -1)
+      throw TGError(TheDef->getLoc(), "Instruction '" +
+                    TheDef->getName() + "' has operand '" + OpInfo.Name +
+                    "' that doesn't appear in asm string!");
+
+    // Check if the one AsmOperand populates the entire operand.
+    unsigned NumOperands = OpInfo.MINumOperands;
+    if (AsmOperands[SrcOperand].SubOpIdx == -1) {
+      ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand, NumOperands));
+      continue;
+    }
+
+    // Add a separate ResOperand for each suboperand.
+    for (unsigned AI = 0; AI < NumOperands; ++AI) {
+      assert(AsmOperands[SrcOperand+AI].SubOpIdx == (int)AI &&
+             AsmOperands[SrcOperand+AI].SrcOpName == OpInfo.Name &&
+             "unexpected AsmOperands for suboperands");
+      ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand + AI, 1));
+    }
+  }
+}
+
+void MatchableInfo::BuildAliasResultOperands() {
+  const CodeGenInstAlias &CGA = *DefRec.get<const CodeGenInstAlias*>();
+  const CodeGenInstruction *ResultInst = getResultInst();
+
+  // Loop over all operands of the result instruction, determining how to
+  // populate them.
+  unsigned AliasOpNo = 0;
+  unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
+  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+    const CGIOperandList::OperandInfo *OpInfo = &ResultInst->Operands[i];
+
+    // If this is a tied operand, just copy from the previously handled operand.
+    int TiedOp = OpInfo->getTiedRegister();
+    if (TiedOp != -1) {
+      ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
+      continue;
+    }
+
+    // Handle all the suboperands for this operand.
+    const std::string &OpName = OpInfo->Name;
+    for ( ; AliasOpNo <  LastOpNo &&
+            CGA.ResultInstOperandIndex[AliasOpNo].first == i; ++AliasOpNo) {
+      int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second;
+
+      // Find out what operand from the asmparser that this MCInst operand
+      // comes from.
+      switch (CGA.ResultOperands[AliasOpNo].Kind) {
+      default: assert(0 && "unexpected InstAlias operand kind");
+      case CodeGenInstAlias::ResultOperand::K_Record: {
+        StringRef Name = CGA.ResultOperands[AliasOpNo].getName();
+        int SrcOperand = FindAsmOperand(Name, SubIdx);
+        if (SrcOperand == -1)
+          throw TGError(TheDef->getLoc(), "Instruction '" +
+                        TheDef->getName() + "' has operand '" + OpName +
+                        "' that doesn't appear in asm string!");
+        unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1);
+        ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand,
+                                                        NumOperands));
+        break;
+      }
+      case CodeGenInstAlias::ResultOperand::K_Imm: {
+        int64_t ImmVal = CGA.ResultOperands[AliasOpNo].getImm();
+        ResOperands.push_back(ResOperand::getImmOp(ImmVal));
+        break;
+      }
+      case CodeGenInstAlias::ResultOperand::K_Reg: {
+        Record *Reg = CGA.ResultOperands[AliasOpNo].getRegister();
+        ResOperands.push_back(ResOperand::getRegOp(Reg));
+        break;
+      }
+      }
+    }
+  }
+}
+
+static void EmitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName,
+                                std::vector<MatchableInfo*> &Infos,
                                 raw_ostream &OS) {
   // Write the convert function to a separate stream, so we can drop it after
   // the enum.
@@ -1084,8 +1461,8 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
   std::set<std::string> GeneratedFns;
 
   // Start the unified conversion function.
-
-  CvtOS << "static void ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
+  CvtOS << "bool " << Target.getName() << ClassName << "::\n";
+  CvtOS << "ConvertToMCInst(unsigned Kind, MCInst &Inst, "
         << "unsigned Opcode,\n"
         << "                      const SmallVectorImpl<MCParsedAsmOperand*"
         << "> &Operands) {\n";
@@ -1095,92 +1472,94 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
 
   // Start the enum, which we will generate inline.
 
-  OS << "// Unified function for converting operants to MCInst instances.\n\n";
+  OS << "// Unified function for converting operands to MCInst instances.\n\n";
   OS << "enum ConversionKind {\n";
-  
+
   // TargetOperandClass - This is the target's operand class, like X86Operand.
   std::string TargetOperandClass = Target.getName() + "Operand";
-  
-  for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
+
+  for (std::vector<MatchableInfo*>::const_iterator it = Infos.begin(),
          ie = Infos.end(); it != ie; ++it) {
-    InstructionInfo &II = **it;
-
-    // Order the (class) operands by the order to convert them into an MCInst.
-    SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList;
-    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
-      InstructionInfo::Operand &Op = II.Operands[i];
-      if (Op.OperandInfo)
-        MIOperandList.push_back(std::make_pair(Op.OperandInfo->MIOperandNo, i));
-    }
+    MatchableInfo &II = **it;
 
-    // Find any tied operands.
-    SmallVector<std::pair<unsigned, unsigned>, 4> TiedOperands;
-    for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) {
-      const CodeGenInstruction::OperandInfo &OpInfo = II.Instr->OperandList[i];
-      for (unsigned j = 0, e = OpInfo.Constraints.size(); j != e; ++j) {
-        const CodeGenInstruction::ConstraintInfo &CI = OpInfo.Constraints[j];
-        if (CI.isTied())
-          TiedOperands.push_back(std::make_pair(OpInfo.MIOperandNo + j,
-                                                CI.getTiedOperand()));
-      }
-    }
+    // Check if we have a custom match function.
+    StringRef AsmMatchConverter = II.getResultInst()->TheDef->getValueAsString(
+      "AsmMatchConverter");
+    if (!AsmMatchConverter.empty()) {
+      std::string Signature = "ConvertCustom_" + AsmMatchConverter.str();
+      II.ConversionFnKind = Signature;
+
+      // Check if we have already generated this signature.
+      if (!GeneratedFns.insert(Signature).second)
+        continue;
 
-    std::sort(MIOperandList.begin(), MIOperandList.end());
+      // If not, emit it now.  Add to the enum list.
+      OS << "  " << Signature << ",\n";
 
-    // Compute the total number of operands.
-    unsigned NumMIOperands = 0;
-    for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) {
-      const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i];
-      NumMIOperands = std::max(NumMIOperands, 
-                               OI.MIOperandNo + OI.MINumOperands);
+      CvtOS << "  case " << Signature << ":\n";
+      CvtOS << "    return " << AsmMatchConverter
+            << "(Inst, Opcode, Operands);\n";
+      continue;
     }
 
     // Build the conversion function signature.
     std::string Signature = "Convert";
-    unsigned CurIndex = 0;
-    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
-      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
-      assert(CurIndex <= Op.OperandInfo->MIOperandNo &&
-             "Duplicate match for instruction operand!");
-      
-      // Skip operands which weren't matched by anything, this occurs when the
-      // .td file encodes "implicit" operands as explicit ones.
-      //
-      // FIXME: This should be removed from the MCInst structure.
-      for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex) {
-        std::pair<unsigned, unsigned> *Tie = GetTiedOperandAtIndex(TiedOperands,
-                                                                   CurIndex);
-        if (!Tie)
-          Signature += "__Imp";
+    std::string CaseBody;
+    raw_string_ostream CaseOS(CaseBody);
+
+    // Compute the convert enum and the case body.
+    for (unsigned i = 0, e = II.ResOperands.size(); i != e; ++i) {
+      const MatchableInfo::ResOperand &OpInfo = II.ResOperands[i];
+
+      // Generate code to populate each result operand.
+      switch (OpInfo.Kind) {
+      case MatchableInfo::ResOperand::RenderAsmOperand: {
+        // This comes from something we parsed.
+        MatchableInfo::AsmOperand &Op = II.AsmOperands[OpInfo.AsmOperandNum];
+
+        // Registers are always converted the same, don't duplicate the
+        // conversion function based on them.
+        Signature += "__";
+        if (Op.Class->isRegisterClass())
+          Signature += "Reg";
         else
-          Signature += "__Tie" + utostr(Tie->second);
-      }
-
-      Signature += "__";
+          Signature += Op.Class->ClassName;
+        Signature += utostr(OpInfo.MINumOperands);
+        Signature += "_" + itostr(OpInfo.AsmOperandNum);
 
-      // Registers are always converted the same, don't duplicate the conversion
-      // function based on them.
-      //
-      // FIXME: We could generalize this based on the render method, if it
-      // mattered.
-      if (Op.Class->isRegisterClass())
-        Signature += "Reg";
-      else
-        Signature += Op.Class->ClassName;
-      Signature += utostr(Op.OperandInfo->MINumOperands);
-      Signature += "_" + utostr(MIOperandList[i].second);
-
-      CurIndex += Op.OperandInfo->MINumOperands;
-    }
+        CaseOS << "    ((" << TargetOperandClass << "*)Operands["
+               << (OpInfo.AsmOperandNum+1) << "])->" << Op.Class->RenderMethod
+               << "(Inst, " << OpInfo.MINumOperands << ");\n";
+        break;
+      }
 
-    // Add any trailing implicit operands.
-    for (; CurIndex != NumMIOperands; ++CurIndex) {
-      std::pair<unsigned, unsigned> *Tie = GetTiedOperandAtIndex(TiedOperands,
-                                                                 CurIndex);
-      if (!Tie)
-        Signature += "__Imp";
-      else
-        Signature += "__Tie" + utostr(Tie->second);
+      case MatchableInfo::ResOperand::TiedOperand: {
+        // If this operand is tied to a previous one, just copy the MCInst
+        // operand from the earlier one.We can only tie single MCOperand values.
+        //assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand");
+        unsigned TiedOp = OpInfo.TiedOperandNum;
+        assert(i > TiedOp && "Tied operand preceeds its target!");
+        CaseOS << "    Inst.addOperand(Inst.getOperand(" << TiedOp << "));\n";
+        Signature += "__Tie" + utostr(TiedOp);
+        break;
+      }
+      case MatchableInfo::ResOperand::ImmOperand: {
+        int64_t Val = OpInfo.ImmVal;
+        CaseOS << "    Inst.addOperand(MCOperand::CreateImm(" << Val << "));\n";
+        Signature += "__imm" + itostr(Val);
+        break;
+      }
+      case MatchableInfo::ResOperand::RegOperand: {
+        if (OpInfo.Register == 0) {
+          CaseOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
+          Signature += "__reg0";
+        } else {
+          std::string N = getQualifiedName(OpInfo.Register);
+          CaseOS << "    Inst.addOperand(MCOperand::CreateReg(" << N << "));\n";
+          Signature += "__reg" + OpInfo.Register->getName();
+        }
+      }
+      }
     }
 
     II.ConversionFnKind = Signature;
@@ -1189,72 +1568,25 @@ static void EmitConvertToMCInst(CodeGenTarget &Target,
     if (!GeneratedFns.insert(Signature).second)
       continue;
 
-    // If not, emit it now.
-
-    // Add to the enum list.
+    // If not, emit it now.  Add to the enum list.
     OS << "  " << Signature << ",\n";
 
-    // And to the convert function.
     CvtOS << "  case " << Signature << ":\n";
-    CurIndex = 0;
-    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
-      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
-
-      // Add the implicit operands.
-      for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex) {
-        // See if this is a tied operand.
-        std::pair<unsigned, unsigned> *Tie = GetTiedOperandAtIndex(TiedOperands,
-                                                                   CurIndex);
-
-        if (!Tie) {
-          // If not, this is some implicit operand. Just assume it is a register
-          // for now.
-          CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
-        } else {
-          // Copy the tied operand.
-          assert(Tie->first>Tie->second && "Tied operand preceeds its target!");
-          CvtOS << "    Inst.addOperand(Inst.getOperand("
-                << Tie->second << "));\n";
-        }
-      }
-
-      CvtOS << "    ((" << TargetOperandClass << "*)Operands["
-         << MIOperandList[i].second 
-         << "])->" << Op.Class->RenderMethod 
-         << "(Inst, " << Op.OperandInfo->MINumOperands << ");\n";
-      CurIndex += Op.OperandInfo->MINumOperands;
-    }
-    
-    // And add trailing implicit operands.
-    for (; CurIndex != NumMIOperands; ++CurIndex) {
-      std::pair<unsigned, unsigned> *Tie = GetTiedOperandAtIndex(TiedOperands,
-                                                                 CurIndex);
-
-      if (!Tie) {
-        // If not, this is some implicit operand. Just assume it is a register
-        // for now.
-        CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
-      } else {
-        // Copy the tied operand.
-        assert(Tie->first>Tie->second && "Tied operand preceeds its target!");
-        CvtOS << "    Inst.addOperand(Inst.getOperand("
-              << Tie->second << "));\n";
-      }
-    }
-
-    CvtOS << "    return;\n";
+    CvtOS << CaseOS.str();
+    CvtOS << "    return true;\n";
   }
 
   // Finish the convert function.
 
   CvtOS << "  }\n";
+  CvtOS << "  return false;\n";
   CvtOS << "}\n\n";
 
   // Finish the enum, and drop the convert function after it.
 
   OS << "  NumConversionVariants\n";
   OS << "};\n\n";
-  
+
   OS << CvtOS.str();
 }
 
@@ -1268,7 +1600,7 @@ static void EmitMatchClassEnumeration(CodeGenTarget &Target,
      << "/// instruction matching.\n";
   OS << "enum MatchClassKind {\n";
   OS << "  InvalidMatchClass = 0,\n";
-  for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
          ie = Infos.end(); it != ie; ++it) {
     ClassInfo &CI = **it;
     OS << "  " << CI.Name << ", // ";
@@ -1289,64 +1621,50 @@ static void EmitMatchClassEnumeration(CodeGenTarget &Target,
   OS << "}\n\n";
 }
 
-/// EmitClassifyOperand - Emit the function to classify an operand.
-static void EmitClassifyOperand(CodeGenTarget &Target,
-                                AsmMatcherInfo &Info,
-                                raw_ostream &OS) {
-  OS << "static MatchClassKind ClassifyOperand(MCParsedAsmOperand *GOp) {\n"
-     << "  " << Target.getName() << "Operand &Operand = *("
-     << Target.getName() << "Operand*)GOp;\n";
+/// EmitValidateOperandClass - Emit the function to validate an operand class.
+static void EmitValidateOperandClass(AsmMatcherInfo &Info,
+                                     raw_ostream &OS) {
+  OS << "static bool ValidateOperandClass(MCParsedAsmOperand *GOp, "
+     << "MatchClassKind Kind) {\n";
+  OS << "  " << Info.Target.getName() << "Operand &Operand = *("
+     << Info.Target.getName() << "Operand*)GOp;\n";
 
-  // Classify tokens.
+  // Check for Token operands first.
   OS << "  if (Operand.isToken())\n";
-  OS << "    return MatchTokenString(Operand.getToken());\n\n";
+  OS << "    return MatchTokenString(Operand.getToken()) == Kind;\n\n";
 
-  // Classify registers.
-  //
-  // FIXME: Don't hardcode isReg, getReg.
+  // Check for register operands, including sub-classes.
   OS << "  if (Operand.isReg()) {\n";
+  OS << "    MatchClassKind OpKind;\n";
   OS << "    switch (Operand.getReg()) {\n";
-  OS << "    default: return InvalidMatchClass;\n";
-  for (std::map<Record*, ClassInfo*>::iterator 
+  OS << "    default: OpKind = InvalidMatchClass; break;\n";
+  for (std::map<Record*, ClassInfo*>::iterator
          it = Info.RegisterClasses.begin(), ie = Info.RegisterClasses.end();
        it != ie; ++it)
-    OS << "    case " << Target.getName() << "::" 
-       << it->first->getName() << ": return " << it->second->Name << ";\n";
+    OS << "    case " << Info.Target.getName() << "::"
+       << it->first->getName() << ": OpKind = " << it->second->Name
+       << "; break;\n";
   OS << "    }\n";
+  OS << "    return IsSubclass(OpKind, Kind);\n";
   OS << "  }\n\n";
 
-  // Classify user defined operands.
-  for (std::vector<ClassInfo*>::iterator it = Info.Classes.begin(), 
+  // Check the user classes. We don't care what order since we're only
+  // actually matching against one of them.
+  for (std::vector<ClassInfo*>::iterator it = Info.Classes.begin(),
          ie = Info.Classes.end(); it != ie; ++it) {
     ClassInfo &CI = **it;
 
     if (!CI.isUserClass())
       continue;
 
-    OS << "  // '" << CI.ClassName << "' class";
-    if (!CI.SuperClasses.empty()) {
-      OS << ", subclass of ";
-      for (unsigned i = 0, e = CI.SuperClasses.size(); i != e; ++i) {
-        if (i) OS << ", ";
-        OS << "'" << CI.SuperClasses[i]->ClassName << "'";
-        assert(CI < *CI.SuperClasses[i] && "Invalid class relation!");
-      }
-    }
-    OS << "\n";
-
-    OS << "  if (Operand." << CI.PredicateMethod << "()) {\n";
-      
-    // Validate subclass relationships.
-    if (!CI.SuperClasses.empty()) {
-      for (unsigned i = 0, e = CI.SuperClasses.size(); i != e; ++i)
-        OS << "    assert(Operand." << CI.SuperClasses[i]->PredicateMethod
-           << "() && \"Invalid class relationship!\");\n";
-    }
-
-    OS << "    return " << CI.Name << ";\n";
+    OS << "  // '" << CI.ClassName << "' class\n";
+    OS << "  if (Kind == " << CI.Name
+       << " && Operand." << CI.PredicateMethod << "()) {\n";
+    OS << "    return true;\n";
     OS << "  }\n\n";
   }
-  OS << "  return InvalidMatchClass;\n";
+
+  OS << "  return false;\n";
   OS << "}\n\n";
 }
 
@@ -1362,13 +1680,13 @@ static void EmitIsSubclass(CodeGenTarget &Target,
   OS << "  switch (A) {\n";
   OS << "  default:\n";
   OS << "    return false;\n";
-  for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
          ie = Infos.end(); it != ie; ++it) {
     ClassInfo &A = **it;
 
     if (A.Kind != ClassInfo::Token) {
       std::vector<StringRef> SuperClasses;
-      for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+      for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
              ie = Infos.end(); it != ie; ++it) {
         ClassInfo &B = **it;
 
@@ -1397,153 +1715,25 @@ static void EmitIsSubclass(CodeGenTarget &Target,
   OS << "}\n\n";
 }
 
-typedef std::pair<std::string, std::string> StringPair;
-
-/// FindFirstNonCommonLetter - Find the first character in the keys of the
-/// string pairs that is not shared across the whole set of strings.  All
-/// strings are assumed to have the same length.
-static unsigned 
-FindFirstNonCommonLetter(const std::vector<const StringPair*> &Matches) {
-  assert(!Matches.empty());
-  for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
-    // Check to see if letter i is the same across the set.
-    char Letter = Matches[0]->first[i];
-    
-    for (unsigned str = 0, e = Matches.size(); str != e; ++str)
-      if (Matches[str]->first[i] != Letter)
-        return i;
-  }
-  
-  return Matches[0]->first.size();
-}
-
-/// EmitStringMatcherForChar - Given a set of strings that are known to be the
-/// same length and whose characters leading up to CharNo are the same, emit
-/// code to verify that CharNo and later are the same.
-///
-/// \return - True if control can leave the emitted code fragment.
-static bool EmitStringMatcherForChar(const std::string &StrVariableName,
-                                  const std::vector<const StringPair*> &Matches,
-                                     unsigned CharNo, unsigned IndentCount,
-                                     raw_ostream &OS) {
-  assert(!Matches.empty() && "Must have at least one string to match!");
-  std::string Indent(IndentCount*2+4, ' ');
-
-  // If we have verified that the entire string matches, we're done: output the
-  // matching code.
-  if (CharNo == Matches[0]->first.size()) {
-    assert(Matches.size() == 1 && "Had duplicate keys to match on");
-    
-    // FIXME: If Matches[0].first has embeded \n, this will be bad.
-    OS << Indent << Matches[0]->second << "\t // \"" << Matches[0]->first
-       << "\"\n";
-    return false;
-  }
-  
-  // Bucket the matches by the character we are comparing.
-  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
-  
-  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
-    MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
-  
-
-  // If we have exactly one bucket to match, see how many characters are common
-  // across the whole set and match all of them at once.
-  if (MatchesByLetter.size() == 1) {
-    unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
-    unsigned NumChars = FirstNonCommonLetter-CharNo;
-    
-    // Emit code to break out if the prefix doesn't match.
-    if (NumChars == 1) {
-      // Do the comparison with if (Str[1] != 'f')
-      // FIXME: Need to escape general characters.
-      OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
-         << Matches[0]->first[CharNo] << "')\n";
-      OS << Indent << "  break;\n";
-    } else {
-      // Do the comparison with if (Str.substr(1,3) != "foo").    
-      // FIXME: Need to escape general strings.
-      OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ","
-         << NumChars << ") != \"";
-      OS << Matches[0]->first.substr(CharNo, NumChars) << "\")\n";
-      OS << Indent << "  break;\n";
-    }
-    
-    return EmitStringMatcherForChar(StrVariableName, Matches, 
-                                    FirstNonCommonLetter, IndentCount, OS);
-  }
-  
-  // Otherwise, we have multiple possible things, emit a switch on the
-  // character.
-  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
-  OS << Indent << "default: break;\n";
-  
-  for (std::map<char, std::vector<const StringPair*> >::iterator LI = 
-       MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
-    // TODO: escape hard stuff (like \n) if we ever care about it.
-    OS << Indent << "case '" << LI->first << "':\t // "
-       << LI->second.size() << " strings to match.\n";
-    if (EmitStringMatcherForChar(StrVariableName, LI->second, CharNo+1,
-                                 IndentCount+1, OS))
-      OS << Indent << "  break;\n";
-  }
-  
-  OS << Indent << "}\n";
-  return true;
-}
-
-
-/// EmitStringMatcher - Given a list of strings and code to execute when they
-/// match, output a simple switch tree to classify the input string.
-/// 
-/// If a match is found, the code in Vals[i].second is executed; control must
-/// not exit this code fragment.  If nothing matches, execution falls through.
-///
-/// \param StrVariableName - The name of the variable to test.
-static void EmitStringMatcher(const std::string &StrVariableName,
-                              const std::vector<StringPair> &Matches,
-                              raw_ostream &OS) {
-  // First level categorization: group strings by length.
-  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
-  
-  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
-    MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
-  
-  // Output a switch statement on length and categorize the elements within each
-  // bin.
-  OS << "  switch (" << StrVariableName << ".size()) {\n";
-  OS << "  default: break;\n";
-  
-  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
-       MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
-    OS << "  case " << LI->first << ":\t // " << LI->second.size()
-       << " strings to match.\n";
-    if (EmitStringMatcherForChar(StrVariableName, LI->second, 0, 0, OS))
-      OS << "    break;\n";
-  }
-  
-  OS << "  }\n";
-}
-
-
 /// EmitMatchTokenString - Emit the function to match a token string to the
 /// appropriate match class value.
 static void EmitMatchTokenString(CodeGenTarget &Target,
                                  std::vector<ClassInfo*> &Infos,
                                  raw_ostream &OS) {
   // Construct the match list.
-  std::vector<StringPair> Matches;
-  for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 
+  std::vector<StringMatcher::StringPair> Matches;
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
          ie = Infos.end(); it != ie; ++it) {
     ClassInfo &CI = **it;
 
     if (CI.Kind == ClassInfo::Token)
-      Matches.push_back(StringPair(CI.ValueName, "return " + CI.Name + ";"));
+      Matches.push_back(StringMatcher::StringPair(CI.ValueName,
+                                                  "return " + CI.Name + ";"));
   }
 
   OS << "static MatchClassKind MatchTokenString(StringRef Name) {\n";
 
-  EmitStringMatcher("Name", Matches, OS);
+  StringMatcher("Name", Matches, OS).Emit();
 
   OS << "  return InvalidMatchClass;\n";
   OS << "}\n\n";
@@ -1554,28 +1744,28 @@ static void EmitMatchTokenString(CodeGenTarget &Target,
 static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
                                   raw_ostream &OS) {
   // Construct the match list.
-  std::vector<StringPair> Matches;
+  std::vector<StringMatcher::StringPair> Matches;
   for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
     const CodeGenRegister &Reg = Target.getRegisters()[i];
     if (Reg.TheDef->getValueAsString("AsmName").empty())
       continue;
 
-    Matches.push_back(StringPair(Reg.TheDef->getValueAsString("AsmName"),
-                                 "return " + utostr(i + 1) + ";"));
+    Matches.push_back(StringMatcher::StringPair(
+                                        Reg.TheDef->getValueAsString("AsmName"),
+                                        "return " + utostr(i + 1) + ";"));
   }
-  
+
   OS << "static unsigned MatchRegisterName(StringRef Name) {\n";
 
-  EmitStringMatcher("Name", Matches, OS);
-  
+  StringMatcher("Name", Matches, OS).Emit();
+
   OS << "  return 0;\n";
   OS << "}\n\n";
 }
 
 /// EmitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag
 /// definitions.
-static void EmitSubtargetFeatureFlagEnumeration(CodeGenTarget &Target,
-                                                AsmMatcherInfo &Info,
+static void EmitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info,
                                                 raw_ostream &OS) {
   OS << "// Flags for subtarget features that participate in "
      << "instruction matching.\n";
@@ -1584,7 +1774,7 @@ static void EmitSubtargetFeatureFlagEnumeration(CodeGenTarget &Target,
          it = Info.SubtargetFeatures.begin(),
          ie = Info.SubtargetFeatures.end(); it != ie; ++it) {
     SubtargetFeatureInfo &SFI = *it->second;
-    OS << "  " << SFI.EnumName << " = (1 << " << SFI.Index << "),\n";
+    OS << "  " << SFI.getEnumName() << " = (1 << " << SFI.Index << "),\n";
   }
   OS << "  Feature_None = 0\n";
   OS << "};\n\n";
@@ -1592,14 +1782,13 @@ static void EmitSubtargetFeatureFlagEnumeration(CodeGenTarget &Target,
 
 /// EmitComputeAvailableFeatures - Emit the function to compute the list of
 /// available features given a subtarget.
-static void EmitComputeAvailableFeatures(CodeGenTarget &Target,
-                                         AsmMatcherInfo &Info,
+static void EmitComputeAvailableFeatures(AsmMatcherInfo &Info,
                                          raw_ostream &OS) {
   std::string ClassName =
     Info.AsmParser->getValueAsString("AsmParserClassName");
 
-  OS << "unsigned " << Target.getName() << ClassName << "::\n"
-     << "ComputeAvailableFeatures(const " << Target.getName()
+  OS << "unsigned " << Info.Target.getName() << ClassName << "::\n"
+     << "ComputeAvailableFeatures(const " << Info.Target.getName()
      << "Subtarget *Subtarget) const {\n";
   OS << "  unsigned Features = 0;\n";
   for (std::map<Record*, SubtargetFeatureInfo*>::const_iterator
@@ -1608,73 +1797,375 @@ static void EmitComputeAvailableFeatures(CodeGenTarget &Target,
     SubtargetFeatureInfo &SFI = *it->second;
     OS << "  if (" << SFI.TheDef->getValueAsString("CondString")
        << ")\n";
-    OS << "    Features |= " << SFI.EnumName << ";\n";
+    OS << "    Features |= " << SFI.getEnumName() << ";\n";
   }
   OS << "  return Features;\n";
   OS << "}\n\n";
 }
 
+static std::string GetAliasRequiredFeatures(Record *R,
+                                            const AsmMatcherInfo &Info) {
+  std::vector<Record*> ReqFeatures = R->getValueAsListOfDefs("Predicates");
+  std::string Result;
+  unsigned NumFeatures = 0;
+  for (unsigned i = 0, e = ReqFeatures.size(); i != e; ++i) {
+    SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]);
+
+    if (F == 0)
+      throw TGError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() +
+                    "' is not marked as an AssemblerPredicate!");
+
+    if (NumFeatures)
+      Result += '|';
+
+    Result += F->getEnumName();
+    ++NumFeatures;
+  }
+
+  if (NumFeatures > 1)
+    Result = '(' + Result + ')';
+  return Result;
+}
+
+/// EmitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
+/// emit a function for them and return true, otherwise return false.
+static bool EmitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
+  // Ignore aliases when match-prefix is set.
+  if (!MatchPrefix.empty())
+    return false;
+
+  std::vector<Record*> Aliases =
+    Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
+  if (Aliases.empty()) return false;
+
+  OS << "static void ApplyMnemonicAliases(StringRef &Mnemonic, "
+        "unsigned Features) {\n";
+
+  // Keep track of all the aliases from a mnemonic.  Use an std::map so that the
+  // iteration order of the map is stable.
+  std::map<std::string, std::vector<Record*> > AliasesFromMnemonic;
+
+  for (unsigned i = 0, e = Aliases.size(); i != e; ++i) {
+    Record *R = Aliases[i];
+    AliasesFromMnemonic[R->getValueAsString("FromMnemonic")].push_back(R);
+  }
+
+  // Process each alias a "from" mnemonic at a time, building the code executed
+  // by the string remapper.
+  std::vector<StringMatcher::StringPair> Cases;
+  for (std::map<std::string, std::vector<Record*> >::iterator
+       I = AliasesFromMnemonic.begin(), E = AliasesFromMnemonic.end();
+       I != E; ++I) {
+    const std::vector<Record*> &ToVec = I->second;
+
+    // Loop through each alias and emit code that handles each case.  If there
+    // are two instructions without predicates, emit an error.  If there is one,
+    // emit it last.
+    std::string MatchCode;
+    int AliasWithNoPredicate = -1;
+
+    for (unsigned i = 0, e = ToVec.size(); i != e; ++i) {
+      Record *R = ToVec[i];
+      std::string FeatureMask = GetAliasRequiredFeatures(R, Info);
+
+      // If this unconditionally matches, remember it for later and diagnose
+      // duplicates.
+      if (FeatureMask.empty()) {
+        if (AliasWithNoPredicate != -1) {
+          // We can't have two aliases from the same mnemonic with no predicate.
+          PrintError(ToVec[AliasWithNoPredicate]->getLoc(),
+                     "two MnemonicAliases with the same 'from' mnemonic!");
+          throw TGError(R->getLoc(), "this is the other MnemonicAlias.");
+        }
+
+        AliasWithNoPredicate = i;
+        continue;
+      }
+      if (R->getValueAsString("ToMnemonic") == I->first)
+        throw TGError(R->getLoc(), "MnemonicAlias to the same string");
+
+      if (!MatchCode.empty())
+        MatchCode += "else ";
+      MatchCode += "if ((Features & " + FeatureMask + ") == "+FeatureMask+")\n";
+      MatchCode += "  Mnemonic = \"" +R->getValueAsString("ToMnemonic")+"\";\n";
+    }
+
+    if (AliasWithNoPredicate != -1) {
+      Record *R = ToVec[AliasWithNoPredicate];
+      if (!MatchCode.empty())
+        MatchCode += "else\n  ";
+      MatchCode += "Mnemonic = \"" + R->getValueAsString("ToMnemonic")+"\";\n";
+    }
+
+    MatchCode += "return;";
+
+    Cases.push_back(std::make_pair(I->first, MatchCode));
+  }
+
+  StringMatcher("Mnemonic", Cases, OS).Emit();
+  OS << "}\n\n";
+
+  return true;
+}
+
+static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
+                              const AsmMatcherInfo &Info, StringRef ClassName) {
+  // Emit the static custom operand parsing table;
+  OS << "namespace {\n";
+  OS << "  struct OperandMatchEntry {\n";
+  OS << "    const char *Mnemonic;\n";
+  OS << "    unsigned OperandMask;\n";
+  OS << "    MatchClassKind Class;\n";
+  OS << "    unsigned RequiredFeatures;\n";
+  OS << "  };\n\n";
+
+  OS << "  // Predicate for searching for an opcode.\n";
+  OS << "  struct LessOpcodeOperand {\n";
+  OS << "    bool operator()(const OperandMatchEntry &LHS, StringRef RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < RHS;\n";
+  OS << "    }\n";
+  OS << "    bool operator()(StringRef LHS, const OperandMatchEntry &RHS) {\n";
+  OS << "      return LHS < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "    bool operator()(const OperandMatchEntry &LHS,";
+  OS << " const OperandMatchEntry &RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "  };\n";
+
+  OS << "} // end anonymous namespace.\n\n";
+
+  OS << "static const OperandMatchEntry OperandMatchTable["
+     << Info.OperandMatchInfo.size() << "] = {\n";
+
+  OS << "  /* Mnemonic, Operand List Mask, Operand Class, Features */\n";
+  for (std::vector<OperandMatchEntry>::const_iterator it =
+       Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end();
+       it != ie; ++it) {
+    const OperandMatchEntry &OMI = *it;
+    const MatchableInfo &II = *OMI.MI;
+
+    OS << "  { \"" << II.Mnemonic << "\""
+       << ", " << OMI.OperandMask;
+
+    OS << " /* ";
+    bool printComma = false;
+    for (int i = 0, e = 31; i !=e; ++i)
+      if (OMI.OperandMask & (1 << i)) {
+        if (printComma)
+          OS << ", ";
+        OS << i;
+        printComma = true;
+      }
+    OS << " */";
+
+    OS << ", " << OMI.CI->Name
+       << ", ";
+
+    // Write the required features mask.
+    if (!II.RequiredFeatures.empty()) {
+      for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
+        if (i) OS << "|";
+        OS << II.RequiredFeatures[i]->getEnumName();
+      }
+    } else
+      OS << "0";
+    OS << " },\n";
+  }
+  OS << "};\n\n";
+
+  // Emit the operand class switch to call the correct custom parser for
+  // the found operand class.
+  OS << Target.getName() << ClassName << "::OperandMatchResultTy "
+     << Target.getName() << ClassName << "::\n"
+     << "TryCustomParseOperand(SmallVectorImpl<MCParsedAsmOperand*>"
+     << " &Operands,\n                      unsigned MCK) {\n\n"
+     << "  switch(MCK) {\n";
+
+  for (std::vector<ClassInfo*>::const_iterator it = Info.Classes.begin(),
+       ie = Info.Classes.end(); it != ie; ++it) {
+    ClassInfo *CI = *it;
+    if (CI->ParserMethod.empty())
+      continue;
+    OS << "  case " << CI->Name << ":\n"
+       << "    return " << CI->ParserMethod << "(Operands);\n";
+  }
+
+  OS << "  default:\n";
+  OS << "    return MatchOperand_NoMatch;\n";
+  OS << "  }\n";
+  OS << "  return MatchOperand_NoMatch;\n";
+  OS << "}\n\n";
+
+  // Emit the static custom operand parser. This code is very similar with
+  // the other matcher. Also use MatchResultTy here just in case we go for
+  // a better error handling.
+  OS << Target.getName() << ClassName << "::OperandMatchResultTy "
+     << Target.getName() << ClassName << "::\n"
+     << "MatchOperandParserImpl(SmallVectorImpl<MCParsedAsmOperand*>"
+     << " &Operands,\n                       StringRef Mnemonic) {\n";
+
+  // Emit code to get the available features.
+  OS << "  // Get the current feature set.\n";
+  OS << "  unsigned AvailableFeatures = getAvailableFeatures();\n\n";
+
+  OS << "  // Get the next operand index.\n";
+  OS << "  unsigned NextOpNum = Operands.size()-1;\n";
+
+  // Emit code to search the table.
+  OS << "  // Search the table.\n";
+  OS << "  std::pair<const OperandMatchEntry*, const OperandMatchEntry*>";
+  OS << " MnemonicRange =\n";
+  OS << "    std::equal_range(OperandMatchTable, OperandMatchTable+"
+     << Info.OperandMatchInfo.size() << ", Mnemonic,\n"
+     << "                     LessOpcodeOperand());\n\n";
+
+  OS << "  if (MnemonicRange.first == MnemonicRange.second)\n";
+  OS << "    return MatchOperand_NoMatch;\n\n";
+
+  OS << "  for (const OperandMatchEntry *it = MnemonicRange.first,\n"
+     << "       *ie = MnemonicRange.second; it != ie; ++it) {\n";
+
+  OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
+  OS << "    assert(Mnemonic == it->Mnemonic);\n\n";
+
+  // Emit check that the required features are available.
+  OS << "    // check if the available features match\n";
+  OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
+     << "!= it->RequiredFeatures) {\n";
+  OS << "      continue;\n";
+  OS << "    }\n\n";
+
+  // Emit check to ensure the operand number matches.
+  OS << "    // check if the operand in question has a custom parser.\n";
+  OS << "    if (!(it->OperandMask & (1 << NextOpNum)))\n";
+  OS << "      continue;\n\n";
+
+  // Emit call to the custom parser method
+  OS << "    // call custom parse method to handle the operand\n";
+  OS << "    OperandMatchResultTy Result = ";
+  OS << "TryCustomParseOperand(Operands, it->Class);\n";
+  OS << "    if (Result != MatchOperand_NoMatch)\n";
+  OS << "      return Result;\n";
+  OS << "  }\n\n";
+
+  OS << "  // Okay, we had no match.\n";
+  OS << "  return MatchOperand_NoMatch;\n";
+  OS << "}\n\n";
+}
+
 void AsmMatcherEmitter::run(raw_ostream &OS) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   Record *AsmParser = Target.getAsmParser();
   std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
 
   // Compute the information on the instructions to match.
-  AsmMatcherInfo Info(AsmParser);
-  Info.BuildInfo(Target);
+  AsmMatcherInfo Info(AsmParser, Target, Records);
+  Info.BuildInfo();
 
   // Sort the instruction table using the partial order on classes. We use
   // stable_sort to ensure that ambiguous instructions are still
   // deterministically ordered.
-  std::stable_sort(Info.Instructions.begin(), Info.Instructions.end(),
-                   less_ptr<InstructionInfo>());
-  
+  std::stable_sort(Info.Matchables.begin(), Info.Matchables.end(),
+                   less_ptr<MatchableInfo>());
+
   DEBUG_WITH_TYPE("instruction_info", {
-      for (std::vector<InstructionInfo*>::iterator 
-             it = Info.Instructions.begin(), ie = Info.Instructions.end(); 
+      for (std::vector<MatchableInfo*>::iterator
+             it = Info.Matchables.begin(), ie = Info.Matchables.end();
            it != ie; ++it)
         (*it)->dump();
     });
 
-  // Check for ambiguous instructions.
-  unsigned NumAmbiguous = 0;
-  for (unsigned i = 0, e = Info.Instructions.size(); i != e; ++i) {
-    for (unsigned j = i + 1; j != e; ++j) {
-      InstructionInfo &A = *Info.Instructions[i];
-      InstructionInfo &B = *Info.Instructions[j];
-    
-      if (A.CouldMatchAmiguouslyWith(B)) {
-        DEBUG_WITH_TYPE("ambiguous_instrs", {
-            errs() << "warning: ambiguous instruction match:\n";
-            A.dump();
-            errs() << "\nis incomparable with:\n";
-            B.dump();
-            errs() << "\n\n";
-          });
-        ++NumAmbiguous;
+  // Check for ambiguous matchables.
+  DEBUG_WITH_TYPE("ambiguous_instrs", {
+    unsigned NumAmbiguous = 0;
+    for (unsigned i = 0, e = Info.Matchables.size(); i != e; ++i) {
+      for (unsigned j = i + 1; j != e; ++j) {
+        MatchableInfo &A = *Info.Matchables[i];
+        MatchableInfo &B = *Info.Matchables[j];
+
+        if (A.CouldMatchAmbiguouslyWith(B)) {
+          errs() << "warning: ambiguous matchables:\n";
+          A.dump();
+          errs() << "\nis incomparable with:\n";
+          B.dump();
+          errs() << "\n\n";
+          ++NumAmbiguous;
+        }
       }
     }
-  }
-  if (NumAmbiguous)
-    DEBUG_WITH_TYPE("ambiguous_instrs", {
-        errs() << "warning: " << NumAmbiguous 
-               << " ambiguous instructions!\n";
-      });
+    if (NumAmbiguous)
+      errs() << "warning: " << NumAmbiguous
+             << " ambiguous matchables!\n";
+  });
+
+  // Compute the information on the custom operand parsing.
+  Info.BuildOperandMatchInfo();
 
   // Write the output.
 
   EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
 
+  // Information for the class declaration.
+  OS << "\n#ifdef GET_ASSEMBLER_HEADER\n";
+  OS << "#undef GET_ASSEMBLER_HEADER\n";
+  OS << "  // This should be included into the middle of the declaration of\n";
+  OS << "  // your subclasses implementation of TargetAsmParser.\n";
+  OS << "  unsigned ComputeAvailableFeatures(const " <<
+           Target.getName() << "Subtarget *Subtarget) const;\n";
+  OS << "  enum MatchResultTy {\n";
+  OS << "    Match_ConversionFail,\n";
+  OS << "    Match_InvalidOperand,\n";
+  OS << "    Match_MissingFeature,\n";
+  OS << "    Match_MnemonicFail,\n";
+  OS << "    Match_Success\n";
+  OS << "  };\n";
+  OS << "  bool ConvertToMCInst(unsigned Kind, MCInst &Inst, "
+     << "unsigned Opcode,\n"
+     << "                       const SmallVectorImpl<MCParsedAsmOperand*> "
+     << "&Operands);\n";
+  OS << "  bool MnemonicIsValid(StringRef Mnemonic);\n";
+  OS << "  MatchResultTy MatchInstructionImpl(\n";
+  OS << "    const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+  OS << "    MCInst &Inst, unsigned &ErrorInfo);\n";
+
+  if (Info.OperandMatchInfo.size()) {
+    OS << "\n  enum OperandMatchResultTy {\n";
+    OS << "    MatchOperand_Success,    // operand matched successfully\n";
+    OS << "    MatchOperand_NoMatch,    // operand did not match\n";
+    OS << "    MatchOperand_ParseFail   // operand matched but had errors\n";
+    OS << "  };\n";
+    OS << "  OperandMatchResultTy MatchOperandParserImpl(\n";
+    OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+    OS << "    StringRef Mnemonic);\n";
+
+    OS << "  OperandMatchResultTy TryCustomParseOperand(\n";
+    OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+    OS << "    unsigned MCK);\n\n";
+  }
+
+  OS << "#endif // GET_ASSEMBLER_HEADER_INFO\n\n";
+
+  OS << "\n#ifdef GET_REGISTER_MATCHER\n";
+  OS << "#undef GET_REGISTER_MATCHER\n\n";
+
   // Emit the subtarget feature enumeration.
-  EmitSubtargetFeatureFlagEnumeration(Target, Info, OS);
+  EmitSubtargetFeatureFlagEnumeration(Info, OS);
 
   // Emit the function to match a register name to number.
   EmitMatchRegisterName(Target, AsmParser, OS);
-  
-  OS << "#ifndef REGISTERS_ONLY\n\n";
+
+  OS << "#endif // GET_REGISTER_MATCHER\n\n";
+
+
+  OS << "\n#ifdef GET_MATCHER_IMPLEMENTATION\n";
+  OS << "#undef GET_MATCHER_IMPLEMENTATION\n\n";
+
+  // Generate the function that remaps for mnemonic aliases.
+  bool HasMnemonicAliases = EmitMnemonicAliases(OS, Info);
 
   // Generate the unified function to convert operands into an MCInst.
-  EmitConvertToMCInst(Target, Info.Instructions, OS);
+  EmitConvertToMCInst(Target, ClassName, Info.Matchables, OS);
 
   // Emit the enumeration for classes which participate in matching.
   EmitMatchClassEnumeration(Target, Info.Classes, OS);
@@ -1682,27 +2173,21 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // Emit the routine to match token strings to their match class.
   EmitMatchTokenString(Target, Info.Classes, OS);
 
-  // Emit the routine to classify an operand.
-  EmitClassifyOperand(Target, Info, OS);
-
   // Emit the subclass predicate routine.
   EmitIsSubclass(Target, Info.Classes, OS);
 
+  // Emit the routine to validate an operand against a match class.
+  EmitValidateOperandClass(Info, OS);
+
   // Emit the available features compute function.
-  EmitComputeAvailableFeatures(Target, Info, OS);
+  EmitComputeAvailableFeatures(Info, OS);
 
-  // Finally, build the match function.
 
   size_t MaxNumOperands = 0;
-  for (std::vector<InstructionInfo*>::const_iterator it =
-         Info.Instructions.begin(), ie = Info.Instructions.end();
+  for (std::vector<MatchableInfo*>::const_iterator it =
+         Info.Matchables.begin(), ie = Info.Matchables.end();
        it != ie; ++it)
-    MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size());
-
-  OS << "bool " << Target.getName() << ClassName << "::\n"
-     << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
-     << " &Operands,\n";
-  OS << "                     MCInst &Inst) {\n";
+    MaxNumOperands = std::max(MaxNumOperands, (*it)->AsmOperands.size());
 
   // Emit the static match table; unused classes get initalized to 0 which is
   // guaranteed to be InvalidMatchClass.
@@ -1714,23 +2199,44 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // order the match kinds appropriately (putting mnemonics last), then we
   // should only end up using a few bits for each class, especially the ones
   // following the mnemonic.
-  OS << "  static const struct MatchEntry {\n";
+  OS << "namespace {\n";
+  OS << "  struct MatchEntry {\n";
   OS << "    unsigned Opcode;\n";
+  OS << "    const char *Mnemonic;\n";
   OS << "    ConversionKind ConvertFn;\n";
   OS << "    MatchClassKind Classes[" << MaxNumOperands << "];\n";
   OS << "    unsigned RequiredFeatures;\n";
-  OS << "  } MatchTable[" << Info.Instructions.size() << "] = {\n";
+  OS << "  };\n\n";
+
+  OS << "  // Predicate for searching for an opcode.\n";
+  OS << "  struct LessOpcode {\n";
+  OS << "    bool operator()(const MatchEntry &LHS, StringRef RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < RHS;\n";
+  OS << "    }\n";
+  OS << "    bool operator()(StringRef LHS, const MatchEntry &RHS) {\n";
+  OS << "      return LHS < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "    bool operator()(const MatchEntry &LHS, const MatchEntry &RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "  };\n";
+
+  OS << "} // end anonymous namespace.\n\n";
+
+  OS << "static const MatchEntry MatchTable["
+     << Info.Matchables.size() << "] = {\n";
 
-  for (std::vector<InstructionInfo*>::const_iterator it =
-         Info.Instructions.begin(), ie = Info.Instructions.end();
+  for (std::vector<MatchableInfo*>::const_iterator it =
+       Info.Matchables.begin(), ie = Info.Matchables.end();
        it != ie; ++it) {
-    InstructionInfo &II = **it;
+    MatchableInfo &II = **it;
 
-    OS << "    { " << Target.getName() << "::" << II.InstrName
+    OS << "  { " << Target.getName() << "::"
+       << II.getResultInst()->TheDef->getName() << ", \"" << II.Mnemonic << "\""
        << ", " << II.ConversionFnKind << ", { ";
-    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
-      InstructionInfo::Operand &Op = II.Operands[i];
-      
+    for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
+      MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
+
       if (i) OS << ", ";
       OS << Op.Class->Name;
     }
@@ -1740,7 +2246,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     if (!II.RequiredFeatures.empty()) {
       for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
         if (i) OS << "|";
-        OS << II.RequiredFeatures[i]->EnumName;
+        OS << II.RequiredFeatures[i]->getEnumName();
       }
     } else
       OS << "0";
@@ -1748,52 +2254,101 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "},\n";
   }
 
-  OS << "  };\n\n";
+  OS << "};\n\n";
 
+  // A method to determine if a mnemonic is in the list.
+  OS << "bool " << Target.getName() << ClassName << "::\n"
+     << "MnemonicIsValid(StringRef Mnemonic) {\n";
+  OS << "  // Search the table.\n";
+  OS << "  std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
+  OS << "    std::equal_range(MatchTable, MatchTable+"
+     << Info.Matchables.size() << ", Mnemonic, LessOpcode());\n";
+  OS << "  return MnemonicRange.first != MnemonicRange.second;\n";
+  OS << "}\n\n";
+
+  // Finally, build the match function.
+  OS << Target.getName() << ClassName << "::MatchResultTy "
+     << Target.getName() << ClassName << "::\n"
+     << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
+     << " &Operands,\n";
+  OS << "                     MCInst &Inst, unsigned &ErrorInfo) {\n";
 
   // Emit code to get the available features.
   OS << "  // Get the current feature set.\n";
   OS << "  unsigned AvailableFeatures = getAvailableFeatures();\n\n";
 
-  // Emit code to compute the class list for this operand vector.
-  OS << "  // Eliminate obvious mismatches.\n";
-  OS << "  if (Operands.size() > " << MaxNumOperands << ")\n";
-  OS << "    return true;\n\n";
+  OS << "  // Get the instruction mnemonic, which is the first token.\n";
+  OS << "  StringRef Mnemonic = ((" << Target.getName()
+     << "Operand*)Operands[0])->getToken();\n\n";
 
-  OS << "  // Compute the class list for this operand vector.\n";
-  OS << "  MatchClassKind Classes[" << MaxNumOperands << "];\n";
-  OS << "  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {\n";
-  OS << "    Classes[i] = ClassifyOperand(Operands[i]);\n\n";
+  if (HasMnemonicAliases) {
+    OS << "  // Process all MnemonicAliases to remap the mnemonic.\n";
+    OS << "  ApplyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
+  }
 
-  OS << "    // Check for invalid operands before matching.\n";
-  OS << "    if (Classes[i] == InvalidMatchClass)\n";
-  OS << "      return true;\n";
+  // Emit code to compute the class list for this operand vector.
+  OS << "  // Eliminate obvious mismatches.\n";
+  OS << "  if (Operands.size() > " << (MaxNumOperands+1) << ") {\n";
+  OS << "    ErrorInfo = " << (MaxNumOperands+1) << ";\n";
+  OS << "    return Match_InvalidOperand;\n";
   OS << "  }\n\n";
 
-  OS << "  // Mark unused classes.\n";
-  OS << "  for (unsigned i = Operands.size(), e = " << MaxNumOperands << "; "
-     << "i != e; ++i)\n";
-  OS << "    Classes[i] = InvalidMatchClass;\n\n";
+  OS << "  // Some state to try to produce better error messages.\n";
+  OS << "  bool HadMatchOtherThanFeatures = false;\n\n";
+  OS << "  // Set ErrorInfo to the operand that mismatches if it is\n";
+  OS << "  // wrong for all instances of the instruction.\n";
+  OS << "  ErrorInfo = ~0U;\n";
 
   // Emit code to search the table.
   OS << "  // Search the table.\n";
-  OS << "  for (const MatchEntry *it = MatchTable, "
-     << "*ie = MatchTable + " << Info.Instructions.size()
-     << "; it != ie; ++it) {\n";
+  OS << "  std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
+  OS << "    std::equal_range(MatchTable, MatchTable+"
+     << Info.Matchables.size() << ", Mnemonic, LessOpcode());\n\n";
 
-  // Emit check that the required features are available.
-    OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
-       << "!= it->RequiredFeatures)\n";
-    OS << "      continue;\n";
+  OS << "  // Return a more specific error code if no mnemonics match.\n";
+  OS << "  if (MnemonicRange.first == MnemonicRange.second)\n";
+  OS << "    return Match_MnemonicFail;\n\n";
+
+  OS << "  for (const MatchEntry *it = MnemonicRange.first, "
+     << "*ie = MnemonicRange.second;\n";
+  OS << "       it != ie; ++it) {\n";
+
+  OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
+  OS << "    assert(Mnemonic == it->Mnemonic);\n";
 
   // Emit check that the subclasses match.
-  for (unsigned i = 0; i != MaxNumOperands; ++i) {
-    OS << "    if (!IsSubclass(Classes[" 
-       << i << "], it->Classes[" << i << "]))\n";
-    OS << "      continue;\n";
-  }
+  OS << "    bool OperandsValid = true;\n";
+  OS << "    for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
+  OS << "      if (i + 1 >= Operands.size()) {\n";
+  OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
+  OS << "        break;";
+  OS << "      }\n";
+  OS << "      if (ValidateOperandClass(Operands[i+1], it->Classes[i]))\n";
+  OS << "        continue;\n";
+  OS << "      // If this operand is broken for all of the instances of this\n";
+  OS << "      // mnemonic, keep track of it so we can report loc info.\n";
+  OS << "      if (it == MnemonicRange.first || ErrorInfo <= i+1)\n";
+  OS << "        ErrorInfo = i+1;\n";
+  OS << "      // Otherwise, just reject this instance of the mnemonic.\n";
+  OS << "      OperandsValid = false;\n";
+  OS << "      break;\n";
+  OS << "    }\n\n";
+
+  OS << "    if (!OperandsValid) continue;\n";
+
+  // Emit check that the required features are available.
+  OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
+     << "!= it->RequiredFeatures) {\n";
+  OS << "      HadMatchOtherThanFeatures = true;\n";
+  OS << "      continue;\n";
+  OS << "    }\n";
+  OS << "\n";
+  OS << "    // We have selected a definite instruction, convert the parsed\n"
+     << "    // operands into the appropriate MCInst.\n";
+  OS << "    if (!ConvertToMCInst(it->ConvertFn, Inst,\n"
+     << "                         it->Opcode, Operands))\n";
+  OS << "      return Match_ConversionFail;\n";
   OS << "\n";
-  OS << "    ConvertToMCInst(it->ConvertFn, Inst, it->Opcode, Operands);\n";
 
   // Call the post-processing function, if used.
   std::string InsnCleanupFn =
@@ -1801,11 +2356,16 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   if (!InsnCleanupFn.empty())
     OS << "    " << InsnCleanupFn << "(Inst);\n";
 
-  OS << "    return false;\n";
+  OS << "    return Match_Success;\n";
   OS << "  }\n\n";
 
-  OS << "  return true;\n";
+  OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
+  OS << "  if (HadMatchOtherThanFeatures) return Match_MissingFeature;\n";
+  OS << "  return Match_InvalidOperand;\n";
   OS << "}\n\n";
-  
-  OS << "#endif // REGISTERS_ONLY\n";
+
+  if (Info.OperandMatchInfo.size())
+    EmitCustomOperandParsing(OS, Target, Info, ClassName);
+
+  OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
 }
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 23f13c2ae2d4..448ebad91f09 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -101,22 +101,22 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
 }
 
 void AsmWriterEmitter::
-FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands, 
+FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
                           std::vector<unsigned> &InstIdxs,
                           std::vector<unsigned> &InstOpsUsed) const {
   InstIdxs.assign(NumberedInstructions.size(), ~0U);
-  
+
   // This vector parallels UniqueOperandCommands, keeping track of which
   // instructions each case are used for.  It is a comma separated string of
   // enums.
   std::vector<std::string> InstrsForCase;
   InstrsForCase.resize(UniqueOperandCommands.size());
   InstOpsUsed.assign(UniqueOperandCommands.size(), 0);
-  
+
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
     const AsmWriterInst *Inst = getAsmWriterInstByID(i);
     if (Inst == 0) continue;  // PHI, INLINEASM, PROLOG_LABEL, etc.
-    
+
     std::string Command;
     if (Inst->Operands.empty())
       continue;   // Instruction already done.
@@ -143,13 +143,13 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
       InstOpsUsed.push_back(1);
     }
   }
-  
+
   // For each entry of UniqueOperandCommands, there is a set of instructions
   // that uses it.  If the next command of all instructions in the set are
   // identical, fold it into the command.
   for (unsigned CommandIdx = 0, e = UniqueOperandCommands.size();
        CommandIdx != e; ++CommandIdx) {
-    
+
     for (unsigned Op = 1; ; ++Op) {
       // Scan for the first instruction in the set.
       std::vector<unsigned>::iterator NIT =
@@ -158,7 +158,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
 
       // If this instruction has no more operands, we isn't anything to merge
       // into this command.
-      const AsmWriterInst *FirstInst = 
+      const AsmWriterInst *FirstInst =
         getAsmWriterInstByID(NIT-InstIdxs.begin());
       if (!FirstInst || FirstInst->Operands.size() == Op)
         break;
@@ -175,7 +175,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
            NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) {
         // Okay, found another instruction in this command set.  If the operand
         // matches, we're ok, otherwise bail out.
-        const AsmWriterInst *OtherInst = 
+        const AsmWriterInst *OtherInst =
           getAsmWriterInstByID(NIT-InstIdxs.begin());
 
         if (OtherInst &&
@@ -189,16 +189,16 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
         }
       }
       if (!AllSame) break;
-      
+
       // Okay, everything in this command set has the same next operand.  Add it
       // to UniqueOperandCommands and remember that it was consumed.
       std::string Command = "    " + FirstInst->Operands[Op].getCode() + "\n";
-      
+
       UniqueOperandCommands[CommandIdx] += Command;
       InstOpsUsed[CommandIdx]++;
     }
   }
-  
+
   // Prepend some of the instructions each case is used for onto the case val.
   for (unsigned i = 0, e = InstrsForCase.size(); i != e; ++i) {
     std::string Instrs = InstrsForCase[i];
@@ -206,9 +206,9 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
       Instrs.erase(Instrs.begin()+70, Instrs.end());
       Instrs += "...";
     }
-    
+
     if (!Instrs.empty())
-      UniqueOperandCommands[i] = "    // " + Instrs + "\n" + 
+      UniqueOperandCommands[i] = "    // " + Instrs + "\n" +
         UniqueOperandCommands[i];
   }
 }
@@ -240,15 +240,18 @@ static void UnescapeString(std::string &Str) {
 /// EmitPrintInstruction - Generate the code for the "printInstruction" method
 /// implementation.
 void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
-  
+  bool isMC = AsmWriter->getValueAsBit("isMCAsmWriter");
+  const char *MachineInstrClassName = isMC ? "MCInst" : "MachineInstr";
+
   O <<
   "/// printInstruction - This method is automatically generated by tablegen\n"
   "/// from the instruction set description.\n"
     "void " << Target.getName() << ClassName
-            << "::printInstruction(const MachineInstr *MI, raw_ostream &O) {\n";
+            << "::printInstruction(const " << MachineInstrClassName
+            << " *MI, raw_ostream &O) {\n";
 
   std::vector<AsmWriterInst> Instructions;
 
@@ -257,14 +260,14 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     if (!(*I)->AsmString.empty() &&
         (*I)->TheDef->getName() != "PHI")
       Instructions.push_back(
-        AsmWriterInst(**I, 
+        AsmWriterInst(**I,
                       AsmWriter->getValueAsInt("Variant"),
                       AsmWriter->getValueAsInt("FirstOperandColumn"),
                       AsmWriter->getValueAsInt("OperandSpacing")));
 
   // Get the instruction numbering.
   NumberedInstructions = Target.getInstructionsByEnumValue();
-  
+
   // Compute the CodeGenInstruction -> AsmWriterInst mapping.  Note that not
   // all machine instructions are necessarily being printed, so there may be
   // target instructions not in this map.
@@ -273,11 +276,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
   // Build an aggregate string, and build a table of offsets into it.
   StringToOffsetTable StringTable;
-  
+
   /// OpcodeInfo - This encodes the index of the string to use for the first
   /// chunk of the output as well as indices used for operand printing.
   std::vector<unsigned> OpcodeInfo;
-  
+
   unsigned MaxStringIdx = 0;
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
     AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
@@ -285,7 +288,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     if (AWI == 0) {
       // Something not handled by the asmwriter printer.
       Idx = ~0U;
-    } else if (AWI->Operands[0].OperandType != 
+    } else if (AWI->Operands[0].OperandType !=
                         AsmWriterOperand::isLiteralTextOperand ||
                AWI->Operands[0].Str.empty()) {
       // Something handled by the asmwriter printer, but with no leading string.
@@ -295,51 +298,51 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
       UnescapeString(Str);
       Idx = StringTable.GetOrAddStringOffset(Str);
       MaxStringIdx = std::max(MaxStringIdx, Idx);
-      
+
       // Nuke the string from the operand list.  It is now handled!
       AWI->Operands.erase(AWI->Operands.begin());
     }
-    
+
     // Bias offset by one since we want 0 as a sentinel.
     OpcodeInfo.push_back(Idx+1);
   }
-  
+
   // Figure out how many bits we used for the string index.
   unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+2);
-  
+
   // To reduce code size, we compactify common instructions into a few bits
   // in the opcode-indexed table.
   unsigned BitsLeft = 32-AsmStrBits;
 
   std::vector<std::vector<std::string> > TableDrivenOperandPrinters;
-  
+
   while (1) {
     std::vector<std::string> UniqueOperandCommands;
     std::vector<unsigned> InstIdxs;
     std::vector<unsigned> NumInstOpsHandled;
     FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs,
                               NumInstOpsHandled);
-    
+
     // If we ran out of operands to print, we're done.
     if (UniqueOperandCommands.empty()) break;
-    
+
     // Compute the number of bits we need to represent these cases, this is
     // ceil(log2(numentries)).
     unsigned NumBits = Log2_32_Ceil(UniqueOperandCommands.size());
-    
+
     // If we don't have enough bits for this operand, don't include it.
     if (NumBits > BitsLeft) {
       DEBUG(errs() << "Not enough bits to densely encode " << NumBits
                    << " more bits\n");
       break;
     }
-    
+
     // Otherwise, we can include this in the initial lookup table.  Add it in.
     BitsLeft -= NumBits;
     for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
       if (InstIdxs[i] != ~0U)
         OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits);
-    
+
     // Remove the info about this operand.
     for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
       if (AsmWriterInst *Inst = getAsmWriterInstByID(i))
@@ -351,13 +354,13 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
                                Inst->Operands.begin()+NumOps);
         }
     }
-    
+
     // Remember the handlers for this set of operands.
     TableDrivenOperandPrinters.push_back(UniqueOperandCommands);
   }
-  
-  
-  
+
+
+
   O<<"  static const unsigned OpInfo[] = {\n";
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
     O << "    " << OpcodeInfo[i] << "U,\t// "
@@ -366,7 +369,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   // Add a dummy entry so the array init doesn't end with a comma.
   O << "    0U\n";
   O << "  };\n\n";
-  
+
   // Emit the string itself.
   O << "  const char *AsmStrs = \n";
   StringTable.EmitString(O);
@@ -388,13 +391,13 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     // ceil(log2(numentries)).
     unsigned NumBits = Log2_32_Ceil(Commands.size());
     assert(NumBits <= BitsLeft && "consistency error");
-    
+
     // Emit code to extract this field from Bits.
     BitsLeft -= NumBits;
-    
+
     O << "\n  // Fragment " << i << " encoded into " << NumBits
       << " bits for " << Commands.size() << " unique commands.\n";
-    
+
     if (Commands.size() == 2) {
       // Emit two possibilitys with if/else.
       O << "  if ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
@@ -403,11 +406,14 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
         << "  } else {\n"
         << Commands[0]
         << "  }\n\n";
+    } else if (Commands.size() == 1) {
+      // Emit a single possibility.
+      O << Commands[0] << "\n\n";
     } else {
       O << "  switch ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
         << ((1 << NumBits)-1) << ") {\n"
         << "  default:   // unreachable.\n";
-      
+
       // Print out all the cases.
       for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
         O << "  case " << i << ":\n";
@@ -417,7 +423,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
       O << "  }\n\n";
     }
   }
-  
+
   // Okay, delete instructions with no operand info left.
   for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
     // Entire instruction has been emitted?
@@ -428,12 +434,12 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     }
   }
 
-    
+
   // Because this is a vector, we want to emit from the end.  Reverse all of the
   // elements in the vector.
   std::reverse(Instructions.begin(), Instructions.end());
-  
-  
+
+
   // Now that we've emitted all of the operand info that fit into 32 bits, emit
   // information for those instructions that are left.  This is a less dense
   // encoding, but we expect the main 32-bit table to handle the majority of
@@ -453,11 +459,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
 
 void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
   const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
-  
+
   StringToOffsetTable StringTable;
   O <<
   "\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
@@ -475,33 +481,33 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
     std::string AsmName = Reg.TheDef->getValueAsString("AsmName");
     if (AsmName.empty())
       AsmName = Reg.getName();
-    
-    
+
+
     if ((i % 14) == 0)
       O << "\n    ";
-    
+
     O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
   }
   O << "0\n"
     << "  };\n"
     << "\n";
-  
+
   O << "  const char *AsmStrs =\n";
   StringTable.EmitString(O);
   O << ";\n";
-  
+
   O << "  return AsmStrs+RegAsmOffset[RegNo-1];\n"
     << "}\n";
 }
 
 void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
 
   const std::vector<const CodeGenInstruction*> &NumberedInstructions =
     Target.getInstructionsByEnumValue();
-  
+
   StringToOffsetTable StringTable;
   O <<
 "\n\n#ifdef GET_INSTRUCTION_NAME\n"
@@ -517,21 +523,21 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
   << "  static const unsigned InstAsmOffset[] = {";
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
     const CodeGenInstruction &Inst = *NumberedInstructions[i];
-    
+
     std::string AsmName = Inst.TheDef->getName();
     if ((i % 14) == 0)
       O << "\n    ";
-    
+
     O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
   }
   O << "0\n"
   << "  };\n"
   << "\n";
-  
+
   O << "  const char *Strs =\n";
   StringTable.EmitString(O);
   O << ";\n";
-  
+
   O << "  return Strs+InstAsmOffset[Opcode];\n"
   << "}\n\n#endif\n";
 }
@@ -540,7 +546,7 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
 
 void AsmWriterEmitter::run(raw_ostream &O) {
   EmitSourceFileHeader("Assembly Writer Source Fragment", O);
-  
+
   EmitPrintInstruction(O);
   EmitGetRegisterName(O);
   EmitGetInstructionName(O);
diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp
index b2228b037ddd..fdf447f2aaf3 100644
--- a/utils/TableGen/AsmWriterInst.cpp
+++ b/utils/TableGen/AsmWriterInst.cpp
@@ -53,8 +53,6 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI,
                              int OperandSpacing) {
   this->CGI = &CGI;
   
-  unsigned CurVariant = ~0U;  // ~0 if we are outside a {.|.|.} region, other #.
-  
   // This is the number of tabs we've seen if we're doing columnar layout.
   unsigned CurColumn = 0;
   
@@ -62,54 +60,48 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI,
   // NOTE: Any extensions to this code need to be mirrored in the 
   // AsmPrinter::printInlineAsm code that executes as compile time (assuming
   // that inline asm strings should also get the new feature)!
-  const std::string &AsmString = CGI.AsmString;
+  std::string AsmString = CGI.FlattenAsmStringVariants(CGI.AsmString, Variant);
   std::string::size_type LastEmitted = 0;
   while (LastEmitted != AsmString.size()) {
     std::string::size_type DollarPos =
-    AsmString.find_first_of("${|}\\", LastEmitted);
+      AsmString.find_first_of("$\\", LastEmitted);
     if (DollarPos == std::string::npos) DollarPos = AsmString.size();
     
     // Emit a constant string fragment.
-    
     if (DollarPos != LastEmitted) {
-      if (CurVariant == Variant || CurVariant == ~0U) {
-        for (; LastEmitted != DollarPos; ++LastEmitted)
-          switch (AsmString[LastEmitted]) {
-            case '\n':
-              AddLiteralString("\\n");
-              break;
-            case '\t':
-              // If the asm writer is not using a columnar layout, \t is not
-              // magic.
-              if (FirstOperandColumn == -1 || OperandSpacing == -1) {
-                AddLiteralString("\\t");
-              } else {
-                // We recognize a tab as an operand delimeter.
-                unsigned DestColumn = FirstOperandColumn + 
-                CurColumn++ * OperandSpacing;
-                Operands.push_back(
-                  AsmWriterOperand(
-                    "O.PadToColumn(" +
-                    utostr(DestColumn) + ");\n",
-                    AsmWriterOperand::isLiteralStatementOperand));
-              }
-              break;
-            case '"':
-              AddLiteralString("\\\"");
-              break;
-            case '\\':
-              AddLiteralString("\\\\");
-              break;
-            default:
-              AddLiteralString(std::string(1, AsmString[LastEmitted]));
-              break;
-          }
-      } else {
-        LastEmitted = DollarPos;
-      }
+      for (; LastEmitted != DollarPos; ++LastEmitted)
+        switch (AsmString[LastEmitted]) {
+          case '\n':
+            AddLiteralString("\\n");
+            break;
+          case '\t':
+            // If the asm writer is not using a columnar layout, \t is not
+            // magic.
+            if (FirstOperandColumn == -1 || OperandSpacing == -1) {
+              AddLiteralString("\\t");
+            } else {
+              // We recognize a tab as an operand delimeter.
+              unsigned DestColumn = FirstOperandColumn + 
+              CurColumn++ * OperandSpacing;
+              Operands.push_back(
+                AsmWriterOperand(
+                  "O.PadToColumn(" +
+                  utostr(DestColumn) + ");\n",
+                  AsmWriterOperand::isLiteralStatementOperand));
+            }
+            break;
+          case '"':
+            AddLiteralString("\\\"");
+            break;
+          case '\\':
+            AddLiteralString("\\\\");
+            break;
+          default:
+            AddLiteralString(std::string(1, AsmString[LastEmitted]));
+            break;
+        }
     } else if (AsmString[DollarPos] == '\\') {
-      if (DollarPos+1 != AsmString.size() &&
-          (CurVariant == Variant || CurVariant == ~0U)) {
+      if (DollarPos+1 != AsmString.size()) {
         if (AsmString[DollarPos+1] == 'n') {
           AddLiteralString("\\n");
         } else if (AsmString[DollarPos+1] == 't') {
@@ -137,29 +129,9 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI,
         LastEmitted = DollarPos+2;
         continue;
       }
-    } else if (AsmString[DollarPos] == '{') {
-      if (CurVariant != ~0U)
-        throw "Nested variants found for instruction '" +
-        CGI.TheDef->getName() + "'!";
-      LastEmitted = DollarPos+1;
-      CurVariant = 0;   // We are now inside of the variant!
-    } else if (AsmString[DollarPos] == '|') {
-      if (CurVariant == ~0U)
-        throw "'|' character found outside of a variant in instruction '"
-        + CGI.TheDef->getName() + "'!";
-      ++CurVariant;
-      ++LastEmitted;
-    } else if (AsmString[DollarPos] == '}') {
-      if (CurVariant == ~0U)
-        throw "'}' character found outside of a variant in instruction '"
-        + CGI.TheDef->getName() + "'!";
-      ++LastEmitted;
-      CurVariant = ~0U;
     } else if (DollarPos+1 != AsmString.size() &&
                AsmString[DollarPos+1] == '$') {
-      if (CurVariant == Variant || CurVariant == ~0U) {
-        AddLiteralString("$");  // "$$" -> $
-      }
+      AddLiteralString("$");  // "$$" -> $
       LastEmitted = DollarPos+2;
     } else {
       // Get the name of the variable.
@@ -226,16 +198,12 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI,
                                             Modifier));
       } else {
         // Otherwise, normal operand.
-        unsigned OpNo = CGI.getOperandNamed(VarName);
-        CodeGenInstruction::OperandInfo OpInfo = CGI.OperandList[OpNo];
+        unsigned OpNo = CGI.Operands.getOperandNamed(VarName);
+        CGIOperandList::OperandInfo OpInfo = CGI.Operands[OpNo];
         
-        if (CurVariant == Variant || CurVariant == ~0U) {
-          unsigned MIOp = OpInfo.MIOperandNo;
-          Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, 
-                                              OpNo,
-                                              MIOp,
-                                              Modifier));
-        }
+        unsigned MIOp = OpInfo.MIOperandNo;
+        Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, 
+                                            OpNo, MIOp, Modifier));
       }
       LastEmitted = VarEnd;
     }
diff --git a/utils/TableGen/AsmWriterInst.h b/utils/TableGen/AsmWriterInst.h
index 20b8588862b7..ec7d8eb10395 100644
--- a/utils/TableGen/AsmWriterInst.h
+++ b/utils/TableGen/AsmWriterInst.h
@@ -23,51 +23,51 @@
 namespace llvm {
   class CodeGenInstruction;
   class Record;
-  
+
   struct AsmWriterOperand {
     enum OpType {
       // Output this text surrounded by quotes to the asm.
-      isLiteralTextOperand, 
+      isLiteralTextOperand,
       // This is the name of a routine to call to print the operand.
       isMachineInstrOperand,
       // Output this text verbatim to the asm writer.  It is code that
       // will output some text to the asm.
       isLiteralStatementOperand
     } OperandType;
-    
+
     /// Str - For isLiteralTextOperand, this IS the literal text.  For
     /// isMachineInstrOperand, this is the PrinterMethodName for the operand..
-    /// For isLiteralStatementOperand, this is the code to insert verbatim 
+    /// For isLiteralStatementOperand, this is the code to insert verbatim
     /// into the asm writer.
     std::string Str;
-    
+
     /// CGIOpNo - For isMachineInstrOperand, this is the index of the operand in
     /// the CodeGenInstruction.
     unsigned CGIOpNo;
-    
+
     /// MiOpNo - For isMachineInstrOperand, this is the operand number of the
     /// machine instruction.
     unsigned MIOpNo;
-    
+
     /// MiModifier - For isMachineInstrOperand, this is the modifier string for
     /// an operand, specified with syntax like ${opname:modifier}.
     std::string MiModifier;
-    
+
     // To make VS STL happy
     AsmWriterOperand(OpType op = isLiteralTextOperand):OperandType(op) {}
-    
+
     AsmWriterOperand(const std::string &LitStr,
                      OpType op = isLiteralTextOperand)
     : OperandType(op), Str(LitStr) {}
-    
+
     AsmWriterOperand(const std::string &Printer,
                      unsigned _CGIOpNo,
                      unsigned _MIOpNo,
                      const std::string &Modifier,
-                     OpType op = isMachineInstrOperand) 
+                     OpType op = isMachineInstrOperand)
     : OperandType(op), Str(Printer), CGIOpNo(_CGIOpNo), MIOpNo(_MIOpNo),
     MiModifier(Modifier) {}
-    
+
     bool operator!=(const AsmWriterOperand &Other) const {
       if (OperandType != Other.OperandType || Str != Other.Str) return true;
       if (OperandType == isMachineInstrOperand)
@@ -77,26 +77,26 @@ namespace llvm {
     bool operator==(const AsmWriterOperand &Other) const {
       return !operator!=(Other);
     }
-    
+
     /// getCode - Return the code that prints this operand.
     std::string getCode() const;
   };
-  
+
   class AsmWriterInst {
   public:
     std::vector<AsmWriterOperand> Operands;
     const CodeGenInstruction *CGI;
-    
-    AsmWriterInst(const CodeGenInstruction &CGI, 
+
+    AsmWriterInst(const CodeGenInstruction &CGI,
                   unsigned Variant,
                   int FirstOperandColumn,
                   int OperandSpacing);
-    
+
     /// MatchesAllButOneOp - If this instruction is exactly identical to the
     /// specified instruction except for one differing operand, return the
     /// differing operand number.  Otherwise return ~0.
     unsigned MatchesAllButOneOp(const AsmWriterInst &Other) const;
-    
+
   private:
     void AddLiteralString(const std::string &Str) {
       // If the last operand was already a literal text string, append this to
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 972989ba6232..e24314c3e0ec 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -1,3 +1,8 @@
+set(LLVM_REQUIRES_EH 1)
+set(LLVM_REQUIRES_RTTI 1)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR})
+
 add_executable(tblgen
   ARMDecoderEmitter.cpp
   AsmMatcherEmitter.cpp
@@ -7,6 +12,7 @@ add_executable(tblgen
   ClangASTNodesEmitter.cpp
   ClangAttrEmitter.cpp
   ClangDiagnosticsEmitter.cpp
+  ClangSACheckersEmitter.cpp
   CodeEmitterGen.cpp
   CodeGenDAGPatterns.cpp
   CodeGenInstruction.cpp
@@ -19,6 +25,7 @@ add_executable(tblgen
   DisassemblerEmitter.cpp
   EDEmitter.cpp
   FastISelEmitter.cpp
+  FixedLenDecoderEmitter.cpp
   InstrEnumEmitter.cpp
   InstrInfoEmitter.cpp
   IntrinsicEmitter.cpp
@@ -27,6 +34,7 @@ add_executable(tblgen
   OptParserEmitter.cpp
   Record.cpp
   RegisterInfoEmitter.cpp
+  StringMatcher.cpp
   SubtargetEmitter.cpp
   TGLexer.cpp
   TGParser.cpp
@@ -37,10 +45,12 @@ add_executable(tblgen
   X86RecognizableInstr.cpp
   )
 
-target_link_libraries(tblgen LLVMSupport LLVMSystem)
+target_link_libraries(tblgen LLVMSupport)
 if( MINGW )
   target_link_libraries(tblgen imagehlp psapi)
 endif( MINGW )
 if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD AND NOT BEOS )
   target_link_libraries(tblgen pthread)
 endif()
+
+install(TARGETS tblgen RUNTIME DESTINATION bin)
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index 7643609b8724..c51afd82a37a 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -26,9 +26,9 @@ void CallingConvEmitter::run(raw_ostream &O) {
   // other.
   for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
     O << "static bool " << CCs[i]->getName()
-      << "(unsigned ValNo, EVT ValVT,\n"
+      << "(unsigned ValNo, MVT ValVT,\n"
       << std::string(CCs[i]->getName().size()+13, ' ')
-      << "EVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+      << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
       << std::string(CCs[i]->getName().size()+13, ' ')
       << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
   }
@@ -44,9 +44,9 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
   Counter = 0;
 
   O << "\n\nstatic bool " << CC->getName()
-    << "(unsigned ValNo, EVT ValVT,\n"
+    << "(unsigned ValNo, MVT ValVT,\n"
     << std::string(CC->getName().size()+13, ' ')
-    << "EVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+    << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
     << std::string(CC->getName().size()+13, ' ')
     << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
   // Emit all of the actions, in order.
@@ -163,12 +163,12 @@ void CallingConvEmitter::EmitAction(Record *Action,
         O << Size << ", ";
       else
         O << "\n" << IndentStr << "  State.getTarget().getTargetData()"
-          "->getTypeAllocSize(LocVT.getTypeForEVT(State.getContext())), ";
+          "->getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext())), ";
       if (Align)
         O << Align;
       else
         O << "\n" << IndentStr << "  State.getTarget().getTargetData()"
-          "->getABITypeAlignment(LocVT.getTypeForEVT(State.getContext()))";
+          "->getABITypeAlignment(EVT(LocVT).getTypeForEVT(State.getContext()))";
       if (Action->isSubClassOf("CCAssignToStackWithShadow"))
         O << ", " << getQualifiedName(Action->getValueAsDef("ShadowReg"));
       O << ");\n" << IndentStr
diff --git a/utils/TableGen/ClangASTNodesEmitter.h b/utils/TableGen/ClangASTNodesEmitter.h
index abf9c9a0f164..712333bd2d25 100644
--- a/utils/TableGen/ClangASTNodesEmitter.h
+++ b/utils/TableGen/ClangASTNodesEmitter.h
@@ -57,7 +57,7 @@ class ClangASTNodesEmitter : public TableGenBackend {
 public:
   explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N,
                                 const std::string &S)
-    : Records(R), Root(N, SMLoc()), BaseSuffix(S)
+    : Records(R), Root(N, SMLoc(), R), BaseSuffix(S)
     {}
 
   // run - Output the .inc file contents
diff --git a/utils/TableGen/ClangAttrEmitter.cpp b/utils/TableGen/ClangAttrEmitter.cpp
index 8d3399a95970..27e1e027d0fa 100644
--- a/utils/TableGen/ClangAttrEmitter.cpp
+++ b/utils/TableGen/ClangAttrEmitter.cpp
@@ -19,8 +19,8 @@
 
 using namespace llvm;
 
-static const std::vector<StringRef> getValueAsListOfStrings(Record &R,
-                                                            StringRef FieldName) {
+static const std::vector<StringRef>
+getValueAsListOfStrings(Record &R, StringRef FieldName) {
   ListInit *List = R.getValueAsListInit(FieldName);
   assert (List && "Got a null ListInit");
 
@@ -44,7 +44,8 @@ std::string ReadPCHRecord(StringRef type) {
   return StringSwitch<std::string>(type)
     .EndsWith("Decl *", "cast_or_null<" + std::string(type, 0, type.size()-1) +
               ">(GetDecl(Record[Idx++]))")
-    .Case("QualType", "ReadTypeRecord(Idx++)")
+    .Case("QualType", "GetType(Record[Idx++])")
+    .Case("Expr *", "ReadSubExpr()")
     .Default("Record[Idx++]");
 }
 
@@ -54,6 +55,7 @@ std::string WritePCHRecord(StringRef type, StringRef name) {
     .EndsWith("Decl *", "AddDeclRef(" + std::string(name) +
                         ", Record);\n")
     .Case("QualType", "AddTypeRef(" + std::string(name) + ", Record);\n")
+    .Case("Expr *", "AddStmt(" + std::string(name) + ");\n")
     .Default("Record.push_back(" + std::string(name) + ");\n");
 }
 
@@ -171,7 +173,8 @@ namespace {
       OS << "char *" << getLowerName() << ";";
     }
     void writePCHReadDecls(raw_ostream &OS) const {
-      OS << "    std::string " << getLowerName() << "= ReadString(Record, Idx);\n";
+      OS << "    std::string " << getLowerName()
+         << "= ReadString(Record, Idx);\n";
     }
     void writePCHReadArgs(raw_ostream &OS) const {
       OS << getLowerName();
@@ -269,10 +272,10 @@ namespace {
       OS << "    bool is" << getLowerName() << "Expr = Record[Idx++];\n";
       OS << "    void *" << getLowerName() << "Ptr;\n";
       OS << "    if (is" << getLowerName() << "Expr)\n";
-      OS << "      " << getLowerName() << "Ptr = ReadExpr(DeclsCursor);\n";
+      OS << "      " << getLowerName() << "Ptr = ReadExpr(F);\n";
       OS << "    else\n";
       OS << "      " << getLowerName()
-         << "Ptr = GetTypeSourceInfo(DeclsCursor, Record, Idx);\n";
+         << "Ptr = GetTypeSourceInfo(F, Record, Idx);\n";
     }
     void writePCHWrite(raw_ostream &OS) const {
       OS << "    Record.push_back(SA->is" << getUpperName() << "Expr());\n";
@@ -461,8 +464,9 @@ void ClangAttrClassEmitter::run(raw_ostream &OS) {
   for (std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end();
        i != e; ++i) {
     Record &R = **i;
+    const std::string &SuperName = R.getSuperClasses().back()->getName();
 
-    OS << "class " << R.getName() << "Attr : public Attr {\n";
+    OS << "class " << R.getName() << "Attr : public " << SuperName << " {\n";
 
     std::vector<Record*> ArgRecords = R.getValueAsListOfDefs("Args");
     std::vector<Argument*> Args;
@@ -493,7 +497,7 @@ void ClangAttrClassEmitter::run(raw_ostream &OS) {
     }
     
     OS << "             )\n";
-    OS << "    : Attr(attr::" << R.getName() << ", L)\n";
+    OS << "    : " << SuperName << "(attr::" << R.getName() << ", L)\n";
 
     for (ai = Args.begin(); ai != ae; ++ai) {
       OS << "              , ";
@@ -557,31 +561,58 @@ void ClangAttrImplEmitter::run(raw_ostream &OS) {
   }
 }
 
+static void EmitAttrList(raw_ostream &OS, StringRef Class,
+                         const std::vector<Record*> &AttrList) {
+  std::vector<Record*>::const_iterator i = AttrList.begin(), e = AttrList.end();
+
+  if (i != e) {
+    // Move the end iterator back to emit the last attribute.
+    for(--e; i != e; ++i)
+      OS << Class << "(" << (*i)->getName() << ")\n";
+    
+    OS << "LAST_" << Class << "(" << (*i)->getName() << ")\n\n";
+  }
+}
+
 void ClangAttrListEmitter::run(raw_ostream &OS) {
   OS << "// This file is generated by TableGen. Do not edit.\n\n";
 
   OS << "#ifndef LAST_ATTR\n";
   OS << "#define LAST_ATTR(NAME) ATTR(NAME)\n";
   OS << "#endif\n\n";
-   
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
-  std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end();
 
-  if (i != e) {
-    // Move the end iterator back to emit the last attribute.
-    for(--e; i != e; ++i)
-      OS << "ATTR(" << (*i)->getName() << ")\n";
-    
-    OS << "LAST_ATTR(" << (*i)->getName() << ")\n\n";
+  OS << "#ifndef INHERITABLE_ATTR\n";
+  OS << "#define INHERITABLE_ATTR(NAME) ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef LAST_INHERITABLE_ATTR\n";
+  OS << "#define LAST_INHERITABLE_ATTR(NAME) INHERITABLE_ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  Record *InhClass = Records.getClass("InheritableAttr");
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
+                       NonInhAttrs, InhAttrs;
+  for (std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end();
+       i != e; ++i) {
+    if ((*i)->isSubClassOf(InhClass))
+      InhAttrs.push_back(*i);
+    else
+      NonInhAttrs.push_back(*i);
   }
 
+  EmitAttrList(OS, "INHERITABLE_ATTR", InhAttrs);
+  EmitAttrList(OS, "ATTR", NonInhAttrs);
+
   OS << "#undef LAST_ATTR\n";
+  OS << "#undef INHERITABLE_ATTR\n";
+  OS << "#undef LAST_INHERITABLE_ATTR\n";
   OS << "#undef ATTR\n";
 }
 
 void ClangAttrPCHReadEmitter::run(raw_ostream &OS) {
-  OS << "// This file is generated by TableGen. Do not edi.\n\n";
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
 
+  Record *InhClass = Records.getClass("InheritableAttr");
   std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
                        ArgRecords;
   std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ai, ae;
@@ -595,6 +626,8 @@ void ClangAttrPCHReadEmitter::run(raw_ostream &OS) {
   for (; i != e; ++i) {
     Record &R = **i;
     OS << "  case attr::" << R.getName() << ": {\n";
+    if (R.isSubClassOf(InhClass))
+      OS << "    bool isInherited = Record[Idx++];\n";
     ArgRecords = R.getValueAsListOfDefs("Args");
     Args.clear();
     for (ai = ArgRecords.begin(), ae = ArgRecords.end(); ai != ae; ++ai) {
@@ -608,6 +641,8 @@ void ClangAttrPCHReadEmitter::run(raw_ostream &OS) {
       (*ri)->writePCHReadArgs(OS);
     }
     OS << ");\n";
+    if (R.isSubClassOf(InhClass))
+      OS << "    cast<InheritableAttr>(New)->setInherited(isInherited);\n";
     OS << "    break;\n";
     OS << "  }\n";
   }
@@ -615,6 +650,7 @@ void ClangAttrPCHReadEmitter::run(raw_ostream &OS) {
 }
 
 void ClangAttrPCHWriteEmitter::run(raw_ostream &OS) {
+  Record *InhClass = Records.getClass("InheritableAttr");
   std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"), Args;
   std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ai, ae;
 
@@ -626,9 +662,11 @@ void ClangAttrPCHWriteEmitter::run(raw_ostream &OS) {
     Record &R = **i;
     OS << "  case attr::" << R.getName() << ": {\n";
     Args = R.getValueAsListOfDefs("Args");
-    if (!Args.empty())
+    if (R.isSubClassOf(InhClass) || !Args.empty())
       OS << "    const " << R.getName() << "Attr *SA = cast<" << R.getName()
          << "Attr>(A);\n";
+    if (R.isSubClassOf(InhClass))
+      OS << "    Record.push_back(SA->isInherited());\n";
     for (ai = Args.begin(), ae = Args.end(); ai != ae; ++ai)
       createArgument(**ai, R.getName())->writePCHWrite(OS);
     OS << "    break;\n";
@@ -636,3 +674,21 @@ void ClangAttrPCHWriteEmitter::run(raw_ostream &OS) {
   }
   OS << "  }\n";
 }
+
+void ClangAttrSpellingListEmitter::run(raw_ostream &OS) {
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+  
+  for (std::vector<Record*>::iterator I = Attrs.begin(), E = Attrs.end(); I != E; ++I) {
+    Record &Attr = **I;
+
+    std::vector<StringRef> Spellings = getValueAsListOfStrings(Attr, "Spellings");
+
+    for (std::vector<StringRef>::const_iterator I = Spellings.begin(), E = Spellings.end(); I != E; ++I) {
+      StringRef Spelling = *I;
+      OS << ".Case(\"" << Spelling << "\", true)\n";
+    }
+  }
+
+}
diff --git a/utils/TableGen/ClangAttrEmitter.h b/utils/TableGen/ClangAttrEmitter.h
index 83149824b2e7..af870098a842 100644
--- a/utils/TableGen/ClangAttrEmitter.h
+++ b/utils/TableGen/ClangAttrEmitter.h
@@ -83,6 +83,19 @@ public:
   void run(raw_ostream &OS);
 };
 
+/// ClangAttrSpellingListEmitter - class emits the list of spellings for attributes for
+///   clang.
+class ClangAttrSpellingListEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+
+ public:
+  explicit ClangAttrSpellingListEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
 }
 
 #endif
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 75b6252c4f9f..60e67c467466 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -29,9 +29,10 @@ using namespace llvm;
 
 namespace {
 class DiagGroupParentMap {
+  RecordKeeper &Records;
   std::map<const Record*, std::vector<Record*> > Mapping;
 public:
-  DiagGroupParentMap() {
+  DiagGroupParentMap(RecordKeeper &records) : Records(records) {
     std::vector<Record*> DiagGroups
       = Records.getAllDerivedDefinitions("DiagGroup");
     for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) {
@@ -84,11 +85,12 @@ static std::string getDiagnosticCategory(const Record *R,
 
 namespace {
   class DiagCategoryIDMap {
+    RecordKeeper &Records;
     StringMap<unsigned> CategoryIDs;
     std::vector<std::string> CategoryStrings;
   public:
-    DiagCategoryIDMap() {
-      DiagGroupParentMap ParentInfo;
+    DiagCategoryIDMap(RecordKeeper &records) : Records(records) {
+      DiagGroupParentMap ParentInfo(Records);
       
       // The zero'th category is "".
       CategoryStrings.push_back("");
@@ -138,8 +140,8 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
   const std::vector<Record*> &Diags =
     Records.getAllDerivedDefinitions("Diagnostic");
   
-  DiagCategoryIDMap CategoryIDs;
-  DiagGroupParentMap DGParentMap;
+  DiagCategoryIDMap CategoryIDs(Records);
+  DiagGroupParentMap DGParentMap(Records);
 
   for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
     const Record &R = *Diags[i];
@@ -168,7 +170,13 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
       OS << ", true";
     else
       OS << ", false";
-    
+
+    // Access control bit
+    if (R.getValueAsBit("AccessControl"))
+      OS << ", true";
+    else
+      OS << ", false";
+
     // Category number.
     OS << ", " << CategoryIDs.getID(getDiagnosticCategory(&R, DGParentMap));
     OS << ")\n";
@@ -179,15 +187,17 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
 // Warning Group Tables generation
 //===----------------------------------------------------------------------===//
 
+namespace {
 struct GroupInfo {
   std::vector<const Record*> DiagsInGroup;
   std::vector<std::string> SubGroups;
   unsigned IDNo;
 };
+} // end anonymous namespace.
 
 void ClangDiagGroupsEmitter::run(raw_ostream &OS) {
   // Compute a mapping from a DiagGroup to all of its parents.
-  DiagGroupParentMap DGParentMap;
+  DiagGroupParentMap DGParentMap(Records);
   
   // Invert the 1-[0/1] mapping of diags to group into a one to many mapping of
   // groups to diags in the group.
@@ -277,7 +287,7 @@ void ClangDiagGroupsEmitter::run(raw_ostream &OS) {
   OS << "#endif // GET_DIAG_TABLE\n\n";
   
   // Emit the category table next.
-  DiagCategoryIDMap CategoriesByID;
+  DiagCategoryIDMap CategoriesByID(Records);
   OS << "\n#ifdef GET_CATEGORY_TABLE\n";
   for (DiagCategoryIDMap::iterator I = CategoriesByID.begin(),
        E = CategoriesByID.end(); I != E; ++I)
diff --git a/utils/TableGen/ClangSACheckersEmitter.cpp b/utils/TableGen/ClangSACheckersEmitter.cpp
new file mode 100644
index 000000000000..3e49ab138fcd
--- /dev/null
+++ b/utils/TableGen/ClangSACheckersEmitter.cpp
@@ -0,0 +1,229 @@
+//=- ClangSACheckersEmitter.cpp - Generate Clang SA checkers tables -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits Clang Static Analyzer checkers tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckersEmitter.h"
+#include "Record.h"
+#include "llvm/ADT/DenseSet.h"
+#include <map>
+#include <string>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Analyzer Checkers Tables generation
+//===----------------------------------------------------------------------===//
+
+/// \brief True if it is specified hidden or a parent package is specified
+/// as hidden, otherwise false.
+static bool isHidden(const Record &R) {
+  if (R.getValueAsBit("Hidden"))
+    return true;
+  // Not declared as hidden, check the parent package if it is hidden.
+  if (DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("ParentPackage")))
+    return isHidden(*DI->getDef());
+
+  return false;
+}
+
+static bool isCheckerNamed(const Record *R) {
+  return !R->getValueAsString("CheckerName").empty();
+}
+
+static std::string getPackageFullName(const Record *R);
+
+static std::string getParentPackageFullName(const Record *R) {
+  std::string name;
+  if (DefInit *DI = dynamic_cast<DefInit*>(R->getValueInit("ParentPackage")))
+    name = getPackageFullName(DI->getDef());
+  return name;
+}
+
+static std::string getPackageFullName(const Record *R) {
+  std::string name = getParentPackageFullName(R);
+  if (!name.empty()) name += ".";
+  return name + R->getValueAsString("PackageName");
+}
+
+static std::string getCheckerFullName(const Record *R) {
+  std::string name = getParentPackageFullName(R);
+  if (isCheckerNamed(R)) {
+    if (!name.empty()) name += ".";
+    name += R->getValueAsString("CheckerName");
+  }
+  return name;
+}
+
+static std::string getStringValue(const Record &R, StringRef field) {
+  if (StringInit *
+        SI = dynamic_cast<StringInit*>(R.getValueInit(field)))
+    return SI->getValue();
+  return std::string();
+}
+
+namespace {
+struct GroupInfo {
+  std::vector<const Record*> Checkers;
+  llvm::DenseSet<const Record *> SubGroups;
+  bool Hidden;
+  unsigned Index;
+
+  GroupInfo() : Hidden(false) { }
+};
+}
+
+void ClangSACheckersEmitter::run(raw_ostream &OS) {
+  std::vector<Record*> checkers = Records.getAllDerivedDefinitions("Checker");
+  llvm::DenseMap<const Record *, unsigned> checkerRecIndexMap;
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i)
+    checkerRecIndexMap[checkers[i]] = i;
+  
+  OS << "\n#ifdef GET_CHECKERS\n";
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i) {
+    const Record &R = *checkers[i];
+
+    OS << "CHECKER(" << "\"";
+    std::string name;
+    if (isCheckerNamed(&R))
+      name = getCheckerFullName(&R);
+    OS.write_escaped(name) << "\", ";
+    OS << R.getName() << ", ";
+    OS << getStringValue(R, "DescFile") << ", ";
+    OS << "\"";
+    OS.write_escaped(getStringValue(R, "HelpText")) << "\", ";
+    // Hidden bit
+    if (isHidden(R))
+      OS << "true";
+    else
+      OS << "false";
+    OS << ")\n";
+  }
+  OS << "#endif // GET_CHECKERS\n\n";
+
+  // Invert the mapping of checkers to package/group into a one to many
+  // mapping of packages/groups to checkers.
+  std::map<std::string, GroupInfo> groupInfoByName;
+  llvm::DenseMap<const Record *, GroupInfo *> recordGroupMap;
+
+  std::vector<Record*> packages = Records.getAllDerivedDefinitions("Package");
+  for (unsigned i = 0, e = packages.size(); i != e; ++i) {
+    Record *R = packages[i];
+    std::string fullName = getPackageFullName(R);
+    if (!fullName.empty()) {
+      GroupInfo &info = groupInfoByName[fullName];
+      info.Hidden = isHidden(*R);
+      recordGroupMap[R] = &info;
+    }
+  }
+
+  std::vector<Record*>
+      checkerGroups = Records.getAllDerivedDefinitions("CheckerGroup");
+  for (unsigned i = 0, e = checkerGroups.size(); i != e; ++i) {
+    Record *R = checkerGroups[i];
+    std::string name = R->getValueAsString("GroupName");
+    if (!name.empty()) {
+      GroupInfo &info = groupInfoByName[name];
+      recordGroupMap[R] = &info;
+    }
+  }
+
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i) {
+    Record *R = checkers[i];
+    Record *package = 0;
+    if (DefInit *
+          DI = dynamic_cast<DefInit*>(R->getValueInit("ParentPackage")))
+      package = DI->getDef();
+    if (!isCheckerNamed(R) && !package)
+      throw "Checker '" + R->getName() + "' is neither named, nor in a package!";
+
+    if (isCheckerNamed(R)) {
+      // Create a pseudo-group to hold this checker.
+      std::string fullName = getCheckerFullName(R);
+      GroupInfo &info = groupInfoByName[fullName];
+      recordGroupMap[R] = &info;
+      info.Checkers.push_back(R);
+    } else {
+      recordGroupMap[package]->Checkers.push_back(R);
+    }
+
+    Record *currR = isCheckerNamed(R) ? R : package;
+    // Insert the checker and its parent packages into the subgroups set of
+    // the corresponding parent package.
+    while (DefInit *DI
+             = dynamic_cast<DefInit*>(currR->getValueInit("ParentPackage"))) {
+      Record *parentPackage = DI->getDef();
+      recordGroupMap[parentPackage]->SubGroups.insert(currR);
+      currR = parentPackage;
+    }
+    // Insert the checker into the set of its group.
+    if (DefInit *DI = dynamic_cast<DefInit*>(R->getValueInit("Group")))
+      recordGroupMap[DI->getDef()]->Checkers.push_back(R);
+  }
+
+  unsigned index = 0;
+  for (std::map<std::string, GroupInfo>::iterator
+         I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I)
+    I->second.Index = index++;
+
+  // Walk through the packages/groups/checkers emitting an array for each
+  // set of checkers and an array for each set of subpackages.
+
+  OS << "\n#ifdef GET_MEMBER_ARRAYS\n";
+  unsigned maxLen = 0;
+  for (std::map<std::string, GroupInfo>::iterator
+         I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I) {
+    maxLen = std::max(maxLen, (unsigned)I->first.size());
+    
+    std::vector<const Record*> &V = I->second.Checkers;
+    if (!V.empty()) {
+      OS << "static const short CheckerArray" << I->second.Index << "[] = { ";
+      for (unsigned i = 0, e = V.size(); i != e; ++i)
+        OS << checkerRecIndexMap[V[i]] << ", ";
+      OS << "-1 };\n";
+    }
+    
+    llvm::DenseSet<const Record *> &subGroups = I->second.SubGroups;
+    if (!subGroups.empty()) {
+      OS << "static const short SubPackageArray" << I->second.Index << "[] = { ";
+      for (llvm::DenseSet<const Record *>::iterator
+             I = subGroups.begin(), E = subGroups.end(); I != E; ++I) {
+        OS << recordGroupMap[*I]->Index << ", ";
+      }
+      OS << "-1 };\n";
+    }
+  }
+  OS << "#endif // GET_MEMBER_ARRAYS\n\n";
+
+  OS << "\n#ifdef GET_CHECKNAME_TABLE\n";
+  for (std::map<std::string, GroupInfo>::iterator
+         I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I) {
+    // Group option string.
+    OS << "  { \"";
+    OS.write_escaped(I->first) << "\","
+                               << std::string(maxLen-I->first.size()+1, ' ');
+    
+    if (I->second.Checkers.empty())
+      OS << "0, ";
+    else
+      OS << "CheckerArray" << I->second.Index << ", ";
+    
+    // Subgroups.
+    if (I->second.SubGroups.empty())
+      OS << "0, ";
+    else
+      OS << "SubPackageArray" << I->second.Index << ", ";
+
+    OS << (I->second.Hidden ? "true" : "false");
+
+    OS << " },\n";
+  }
+  OS << "#endif // GET_CHECKNAME_TABLE\n\n";
+}
diff --git a/utils/TableGen/ClangSACheckersEmitter.h b/utils/TableGen/ClangSACheckersEmitter.h
new file mode 100644
index 000000000000..6bd163547329
--- /dev/null
+++ b/utils/TableGen/ClangSACheckersEmitter.h
@@ -0,0 +1,31 @@
+//===- ClangSACheckersEmitter.h - Generate Clang SA checkers tables -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits Clang Static Analyzer checkers tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANGSACHECKERS_EMITTER_H
+#define CLANGSACHECKERS_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+class ClangSACheckersEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+public:
+  explicit ClangSACheckersEmitter(RecordKeeper &R) : Records(R) {}
+
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index ec702c2a5d9c..957dd19da1c2 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -17,9 +17,19 @@
 #include "CodeGenTarget.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include <map>
 using namespace llvm;
 
+// FIXME: Somewhat hackish to use a command line option for this. There should
+// be a CodeEmitter class in the Target.td that controls this sort of thing
+// instead.
+static cl::opt<bool>
+MCEmitter("mc-emitter",
+          cl::desc("Generate CodeEmitter for use with the MC library."),
+          cl::init(false));
+
 void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
   for (std::vector<Record*>::iterator I = Insts.begin(), E = Insts.end();
        I != E; ++I) {
@@ -42,46 +52,171 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
       unsigned middle = (numBits + 1) / 2;
       NewBI->setBit(middle, BI->getBit(middle));
     }
-    
+
     // Update the bits in reversed order so that emitInstrOpBits will get the
     // correct endianness.
     R->getValue("Inst")->setValue(NewBI);
   }
 }
 
-
 // If the VarBitInit at position 'bit' matches the specified variable then
 // return the variable bit position.  Otherwise return -1.
 int CodeEmitterGen::getVariableBit(const std::string &VarName,
-            BitsInit *BI, int bit) {
-  if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit))) {
-    TypedInit *TI = VBI->getVariable();
+                                   BitsInit *BI, int bit) {
+  if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit)))
+    if (VarInit *VI = dynamic_cast<VarInit*>(VBI->getVariable()))
+      if (VI->getName() == VarName)
+        return VBI->getBitNum();
+
+  return -1;
+}
+
+void CodeEmitterGen::
+AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
+                        unsigned &NumberedOp,
+                        std::string &Case, CodeGenTarget &Target) {
+  CodeGenInstruction &CGI = Target.getInstruction(R);
+
+  // Determine if VarName actually contributes to the Inst encoding.
+  int bit = BI->getNumBits()-1;
+
+  // Scan for a bit that this contributed to.
+  for (; bit >= 0; ) {
+    if (getVariableBit(VarName, BI, bit) != -1)
+      break;
     
-    if (VarInit *VI = dynamic_cast<VarInit*>(TI)) {
-      if (VI->getName() == VarName) return VBI->getBitNum();
+    --bit;
+  }
+  
+  // If we found no bits, ignore this value, otherwise emit the call to get the
+  // operand encoding.
+  if (bit < 0) return;
+  
+  // If the operand matches by name, reference according to that
+  // operand number. Non-matching operands are assumed to be in
+  // order.
+  unsigned OpIdx;
+  if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
+    // Get the machine operand number for the indicated operand.
+    OpIdx = CGI.Operands[OpIdx].MIOperandNo;
+    assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) &&
+           "Explicitly used operand also marked as not emitted!");
+  } else {
+    /// If this operand is not supposed to be emitted by the
+    /// generated emitter, skip it.
+    while (CGI.Operands.isFlatOperandNotEmitted(NumberedOp))
+      ++NumberedOp;
+    OpIdx = NumberedOp++;
+  }
+  
+  std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
+  std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
+  
+  // If the source operand has a custom encoder, use it. This will
+  // get the encoding for all of the suboperands.
+  if (!EncoderMethodName.empty()) {
+    // A custom encoder has all of the information for the
+    // sub-operands, if there are more than one, so only
+    // query the encoder once per source operand.
+    if (SO.second == 0) {
+      Case += "      // op: " + VarName + "\n" +
+              "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
+      if (MCEmitter)
+        Case += ", Fixups";
+      Case += ");\n";
     }
+  } else {
+    Case += "      // op: " + VarName + "\n" +
+      "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
+    if (MCEmitter)
+      Case += ", Fixups";
+    Case += ");\n";
   }
   
-  return -1;
-} 
+  for (; bit >= 0; ) {
+    int varBit = getVariableBit(VarName, BI, bit);
+    
+    // If this bit isn't from a variable, skip it.
+    if (varBit == -1) {
+      --bit;
+      continue;
+    }
+    
+    // Figure out the consecutive range of bits covered by this operand, in
+    // order to generate better encoding code.
+    int beginInstBit = bit;
+    int beginVarBit = varBit;
+    int N = 1;
+    for (--bit; bit >= 0;) {
+      varBit = getVariableBit(VarName, BI, bit);
+      if (varBit == -1 || varBit != (beginVarBit - N)) break;
+      ++N;
+      --bit;
+    }
+     
+    unsigned opMask = ~0U >> (32-N);
+    int opShift = beginVarBit - N + 1;
+    opMask <<= opShift;
+    opShift = beginInstBit - beginVarBit;
+    
+    if (opShift > 0) {
+      Case += "      Value |= (op & " + utostr(opMask) + "U) << " +
+              itostr(opShift) + ";\n";
+    } else if (opShift < 0) {
+      Case += "      Value |= (op & " + utostr(opMask) + "U) >> " + 
+              itostr(-opShift) + ";\n";
+    } else {
+      Case += "      Value |= op & " + utostr(opMask) + "U;\n";
+    }
+  }
+}
 
 
+std::string CodeEmitterGen::getInstructionCase(Record *R,
+                                               CodeGenTarget &Target) {
+  std::string Case;
+  
+  BitsInit *BI = R->getValueAsBitsInit("Inst");
+  const std::vector<RecordVal> &Vals = R->getValues();
+  unsigned NumberedOp = 0;
+
+  // Loop over all of the fields in the instruction, determining which are the
+  // operands to the instruction.
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+    // Ignore fixed fields in the record, we're looking for values like:
+    //    bits<5> RST = { ?, ?, ?, ?, ? };
+    if (Vals[i].getPrefix() || Vals[i].getValue()->isComplete())
+      continue;
+    
+    AddCodeToMergeInOperand(R, BI, Vals[i].getName(), NumberedOp, Case, Target);
+  }
+  
+  std::string PostEmitter = R->getValueAsString("PostEncoderMethod");
+  if (!PostEmitter.empty())
+    Case += "      Value = " + PostEmitter + "(MI, Value);\n";
+  
+  return Case;
+}
+
 void CodeEmitterGen::run(raw_ostream &o) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
-  
+
   // For little-endian instruction bit encodings, reverse the bit order
   if (Target.isLittleEndianEncoding()) reverseBits(Insts);
 
   EmitSourceFileHeader("Machine Code Emitter", o);
-  std::string Namespace = Insts[0]->getValueAsString("Namespace") + "::";
-  
+
   const std::vector<const CodeGenInstruction*> &NumberedInstructions =
     Target.getInstructionsByEnumValue();
 
   // Emit function declaration
-  o << "unsigned " << Target.getName() << "CodeEmitter::"
-    << "getBinaryCodeForInstr(const MachineInstr &MI) {\n";
+  o << "unsigned " << Target.getName();
+  if (MCEmitter)
+    o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+      << "    SmallVectorImpl<MCFixup> &Fixups) const {\n";
+  else
+    o << "CodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {\n";
 
   // Emit instruction base values
   o << "  static const unsigned InstBits[] = {\n";
@@ -91,109 +226,45 @@ void CodeEmitterGen::run(raw_ostream &o) {
        IN != EN; ++IN) {
     const CodeGenInstruction *CGI = *IN;
     Record *R = CGI->TheDef;
-    
+
     if (R->getValueAsString("Namespace") == "TargetOpcode") {
       o << "    0U,\n";
       continue;
     }
-    
+
     BitsInit *BI = R->getValueAsBitsInit("Inst");
 
-    // Start by filling in fixed values...
+    // Start by filling in fixed values.
     unsigned Value = 0;
     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
-      if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(e-i-1))) {
+      if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(e-i-1)))
         Value |= B->getValue() << (e-i-1);
-      }
     }
     o << "    " << Value << "U," << '\t' << "// " << R->getName() << "\n";
   }
   o << "    0U\n  };\n";
-  
+
   // Map to accumulate all the cases.
   std::map<std::string, std::vector<std::string> > CaseMap;
-  
+
   // Construct all cases statement for each opcode
   for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end();
         IC != EC; ++IC) {
     Record *R = *IC;
     if (R->getValueAsString("Namespace") == "TargetOpcode")
       continue;
-    const std::string &InstName = R->getName();
-    std::string Case("");
-
-    BitsInit *BI = R->getValueAsBitsInit("Inst");
-    const std::vector<RecordVal> &Vals = R->getValues();
-    CodeGenInstruction &CGI = Target.getInstruction(R);
-    
-    // Loop over all of the fields in the instruction, determining which are the
-    // operands to the instruction.
-    unsigned op = 0;
-    for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
-      if (!Vals[i].getPrefix() && !Vals[i].getValue()->isComplete()) {
-        // Is the operand continuous? If so, we can just mask and OR it in
-        // instead of doing it bit-by-bit, saving a lot in runtime cost.
-        const std::string &VarName = Vals[i].getName();
-        bool gotOp = false;
-        
-        for (int bit = BI->getNumBits()-1; bit >= 0; ) {
-          int varBit = getVariableBit(VarName, BI, bit);
-          
-          if (varBit == -1) {
-            --bit;
-          } else {
-            int beginInstBit = bit;
-            int beginVarBit = varBit;
-            int N = 1;
-            
-            for (--bit; bit >= 0;) {
-              varBit = getVariableBit(VarName, BI, bit);
-              if (varBit == -1 || varBit != (beginVarBit - N)) break;
-              ++N;
-              --bit;
-            }
-
-            if (!gotOp) {
-              /// If this operand is not supposed to be emitted by the generated
-              /// emitter, skip it.
-              while (CGI.isFlatOperandNotEmitted(op))
-                ++op;
-              
-              Case += "      // op: " + VarName + "\n"
-                   +  "      op = getMachineOpValue(MI, MI.getOperand("
-                   +  utostr(op++) + "));\n";
-              gotOp = true;
-            }
-            
-            unsigned opMask = ~0U >> (32-N);
-            int opShift = beginVarBit - N + 1;
-            opMask <<= opShift;
-            opShift = beginInstBit - beginVarBit;
-            
-            if (opShift > 0) {
-              Case += "      Value |= (op & " + utostr(opMask) + "U) << "
-                   +  itostr(opShift) + ";\n";
-            } else if (opShift < 0) {
-              Case += "      Value |= (op & " + utostr(opMask) + "U) >> "
-                   +  itostr(-opShift) + ";\n";
-            } else {
-              Case += "      Value |= op & " + utostr(opMask) + "U;\n";
-            }
-          }
-        }
-      }
-    }
+    const std::string &InstName = R->getValueAsString("Namespace") + "::"
+      + R->getName();
+    std::string Case = getInstructionCase(R, Target);
 
-    std::vector<std::string> &InstList = CaseMap[Case];
-    InstList.push_back(InstName);
+    CaseMap[Case].push_back(InstName);
   }
 
-
   // Emit initial function code
   o << "  const unsigned opcode = MI.getOpcode();\n"
     << "  unsigned Value = InstBits[opcode];\n"
     << "  unsigned op = 0;\n"
-    << "  op = op;  // suppress warning\n"
+    << "  (void)op;  // suppress warning\n"
     << "  switch (opcode) {\n";
 
   // Emit each case statement
@@ -204,7 +275,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
     for (int i = 0, N = InstList.size(); i < N; i++) {
       if (i) o << "\n";
-      o << "    case " << Namespace << InstList[i]  << ":";
+      o << "    case " << InstList[i]  << ":";
     }
     o << " {\n";
     o << Case;
diff --git a/utils/TableGen/CodeEmitterGen.h b/utils/TableGen/CodeEmitterGen.h
index f0b3229c0411..a874d970feac 100644
--- a/utils/TableGen/CodeEmitterGen.h
+++ b/utils/TableGen/CodeEmitterGen.h
@@ -15,7 +15,6 @@
 #define CODEMITTERGEN_H
 
 #include "TableGenBackend.h"
-#include <map>
 #include <vector>
 #include <string>
 
@@ -23,6 +22,7 @@ namespace llvm {
 
 class RecordVal;
 class BitsInit;
+class CodeGenTarget;
 
 class CodeEmitterGen : public TableGenBackend {
   RecordKeeper &Records;
@@ -36,6 +36,12 @@ private:
   void emitGetValueBit(raw_ostream &o, const std::string &Namespace);
   void reverseBits(std::vector<Record*> &Insts);
   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
+  std::string getInstructionCase(Record *R, CodeGenTarget &Target);
+  void
+  AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
+                          unsigned &NumberedOp,
+                          std::string &Case, CodeGenTarget &Target);
+    
 };
 
 } // End llvm namespace
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 303aa6c450c2..aa60f871bff5 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -56,11 +56,11 @@ EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) {
 EEVT::TypeSet::TypeSet(const std::vector<MVT::SimpleValueType> &VTList) {
   assert(!VTList.empty() && "empty list?");
   TypeVec.append(VTList.begin(), VTList.end());
-  
+
   if (!VTList.empty())
     assert(VTList[0] != MVT::iAny && VTList[0] != MVT::vAny &&
            VTList[0] != MVT::fAny);
-  
+
   // Verify no duplicates.
   array_pod_sort(TypeVec.begin(), TypeVec.end());
   assert(std::unique(TypeVec.begin(), TypeVec.end()) == TypeVec.end());
@@ -72,9 +72,9 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
                                           bool (*Pred)(MVT::SimpleValueType),
                                           const char *PredicateName) {
   assert(isCompletelyUnknown());
-  const std::vector<MVT::SimpleValueType> &LegalTypes = 
+  const std::vector<MVT::SimpleValueType> &LegalTypes =
     TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-  
+
   for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
     if (Pred == 0 || Pred(LegalTypes[i]))
       TypeVec.push_back(LegalTypes[i]);
@@ -82,14 +82,14 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
   // If we have nothing that matches the predicate, bail out.
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, no " +
-             std::string(PredicateName) + " types found");  
+             std::string(PredicateName) + " types found");
   // No need to sort with one element.
   if (TypeVec.size() == 1) return true;
 
   // Remove duplicates.
   array_pod_sort(TypeVec.begin(), TypeVec.end());
   TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
-  
+
   return true;
 }
 
@@ -100,7 +100,7 @@ bool EEVT::TypeSet::hasIntegerTypes() const {
     if (isInteger(TypeVec[i]))
       return true;
   return false;
-}  
+}
 
 /// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
 /// a floating point value type.
@@ -109,7 +109,7 @@ bool EEVT::TypeSet::hasFloatingPointTypes() const {
     if (isFloatingPoint(TypeVec[i]))
       return true;
   return false;
-}  
+}
 
 /// hasVectorTypes - Return true if this TypeSet contains a vAny or a vector
 /// value type.
@@ -123,9 +123,9 @@ bool EEVT::TypeSet::hasVectorTypes() const {
 
 std::string EEVT::TypeSet::getName() const {
   if (TypeVec.empty()) return "<empty>";
-  
+
   std::string Result;
-    
+
   for (unsigned i = 0, e = TypeVec.size(); i != e; ++i) {
     std::string VTName = llvm::getEnumName(TypeVec[i]);
     // Strip off MVT:: prefix if present.
@@ -134,7 +134,7 @@ std::string EEVT::TypeSet::getName() const {
     if (i) Result += ':';
     Result += VTName;
   }
-  
+
   if (TypeVec.size() == 1)
     return Result;
   return "{" + Result + "}";
@@ -146,14 +146,14 @@ std::string EEVT::TypeSet::getName() const {
 bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
   if (InVT.isCompletelyUnknown() || *this == InVT)
     return false;
-  
+
   if (isCompletelyUnknown()) {
     *this = InVT;
     return true;
   }
-  
+
   assert(TypeVec.size() >= 1 && InVT.TypeVec.size() >= 1 && "No unknowns");
-  
+
   // Handle the abstract cases, seeing if we can resolve them better.
   switch (TypeVec[0]) {
   default: break;
@@ -163,26 +163,26 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
       EEVT::TypeSet InCopy(InVT);
       InCopy.EnforceInteger(TP);
       InCopy.EnforceScalar(TP);
-      
+
       if (InCopy.isConcrete()) {
         // If the RHS has one integer type, upgrade iPTR to i32.
         TypeVec[0] = InVT.TypeVec[0];
         return true;
       }
-      
+
       // If the input has multiple scalar integers, this doesn't add any info.
       if (!InCopy.isCompletelyUnknown())
         return false;
     }
     break;
   }
-  
+
   // If the input constraint is iAny/iPTR and this is an integer type list,
   // remove non-integer types from the list.
   if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
       hasIntegerTypes()) {
     bool MadeChange = EnforceInteger(TP);
-    
+
     // If we're merging in iPTR/iPTRAny and the node currently has a list of
     // multiple different integer types, replace them with a single iPTR.
     if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
@@ -191,10 +191,10 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
       TypeVec[0] = InVT.TypeVec[0];
       MadeChange = true;
     }
-    
+
     return MadeChange;
   }
-  
+
   // If this is a type list and the RHS is a typelist as well, eliminate entries
   // from this list that aren't in the other one.
   bool MadeChange = false;
@@ -207,16 +207,16 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
         InInVT = true;
         break;
       }
-    
+
     if (InInVT) continue;
     TypeVec.erase(TypeVec.begin()+i--);
     MadeChange = true;
   }
-  
+
   // If we removed all of our types, we have a type contradiction.
   if (!TypeVec.empty())
     return MadeChange;
-  
+
   // FIXME: Really want an SMLoc here!
   TP.error("Type inference contradiction found, merging '" +
            InVT.getName() + "' into '" + InputSet.getName() + "'");
@@ -232,12 +232,12 @@ bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
     return false;
 
   TypeSet InputSet(*this);
-  
+
   // Filter out all the fp types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
     if (!isInteger(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
-  
+
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be integer");
@@ -254,12 +254,12 @@ bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
     return false;
 
   TypeSet InputSet(*this);
-  
+
   // Filter out all the fp types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
     if (!isFloatingPoint(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
-  
+
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be floating point");
@@ -276,12 +276,12 @@ bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
     return false;
 
   TypeSet InputSet(*this);
-  
+
   // Filter out all the vector types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
     if (!isScalar(TypeVec[i]))
       TypeVec.erase(TypeVec.begin()+i--);
-  
+
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be scalar");
@@ -296,14 +296,14 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
 
   TypeSet InputSet(*this);
   bool MadeChange = false;
-  
+
   // Filter out all the scalar types.
   for (unsigned i = 0; i != TypeVec.size(); ++i)
     if (!isVector(TypeVec[i])) {
       TypeVec.erase(TypeVec.begin()+i--);
       MadeChange = true;
     }
-  
+
   if (TypeVec.empty())
     TP.error("Type inference contradiction found, '" +
              InputSet.getName() + "' needs to be a vector");
@@ -317,13 +317,13 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
 bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
   // Both operands must be integer or FP, but we don't care which.
   bool MadeChange = false;
-  
+
   if (isCompletelyUnknown())
     MadeChange = FillWithPossibleTypes(TP);
 
   if (Other.isCompletelyUnknown())
     MadeChange = Other.FillWithPossibleTypes(TP);
-    
+
   // If one side is known to be integer or known to be FP but the other side has
   // no information, get at least the type integrality info in there.
   if (!hasFloatingPointTypes())
@@ -334,62 +334,165 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
     MadeChange |= EnforceInteger(TP);
   else if (!Other.hasIntegerTypes())
     MadeChange |= EnforceFloatingPoint(TP);
-  
+
   assert(!isCompletelyUnknown() && !Other.isCompletelyUnknown() &&
          "Should have a type list now");
-  
+
   // If one contains vectors but the other doesn't pull vectors out.
   if (!hasVectorTypes())
     MadeChange |= Other.EnforceScalar(TP);
   if (!hasVectorTypes())
     MadeChange |= EnforceScalar(TP);
+
+  if (TypeVec.size() == 1 && Other.TypeVec.size() == 1) {
+    // If we are down to concrete types, this code does not currently
+    // handle nodes which have multiple types, where some types are
+    // integer, and some are fp.  Assert that this is not the case.
+    assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
+           !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
+           "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
+
+    // Otherwise, if these are both vector types, either this vector
+    // must have a larger bitsize than the other, or this element type
+    // must be larger than the other.
+    EVT Type(TypeVec[0]);
+    EVT OtherType(Other.TypeVec[0]);
+
+    if (hasVectorTypes() && Other.hasVectorTypes()) {
+      if (Type.getSizeInBits() >= OtherType.getSizeInBits())
+        if (Type.getVectorElementType().getSizeInBits()
+            >= OtherType.getVectorElementType().getSizeInBits())
+          TP.error("Type inference contradiction found, '" +
+                   getName() + "' element type not smaller than '" +
+                   Other.getName() +"'!");
+    }
+    else
+      // For scalar types, the bitsize of this type must be larger
+      // than that of the other.
+      if (Type.getSizeInBits() >= OtherType.getSizeInBits())
+        TP.error("Type inference contradiction found, '" +
+                 getName() + "' is not smaller than '" +
+                 Other.getName() +"'!");
+
+  }
   
-  // This code does not currently handle nodes which have multiple types,
-  // where some types are integer, and some are fp.  Assert that this is not
-  // the case.
-  assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
-         !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
-         "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
-  
+
+  // Handle int and fp as disjoint sets.  This won't work for patterns
+  // that have mixed fp/int types but those are likely rare and would
+  // not have been accepted by this code previously.
+
   // Okay, find the smallest type from the current set and remove it from the
   // largest set.
-  MVT::SimpleValueType Smallest = TypeVec[0];
+  MVT::SimpleValueType SmallestInt = MVT::LAST_VALUETYPE;
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isInteger(TypeVec[i])) {
+      SmallestInt = TypeVec[i];
+      break;
+    }
   for (unsigned i = 1, e = TypeVec.size(); i != e; ++i)
-    if (TypeVec[i] < Smallest)
-      Smallest = TypeVec[i];
-  
+    if (isInteger(TypeVec[i]) && TypeVec[i] < SmallestInt)
+      SmallestInt = TypeVec[i];
+
+  MVT::SimpleValueType SmallestFP = MVT::LAST_VALUETYPE;
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(TypeVec[i])) {
+      SmallestFP = TypeVec[i];
+      break;
+    }
+  for (unsigned i = 1, e = TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(TypeVec[i]) && TypeVec[i] < SmallestFP)
+      SmallestFP = TypeVec[i];
+
+  int OtherIntSize = 0;
+  int OtherFPSize = 0;
+  for (SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
+         Other.TypeVec.begin();
+       TVI != Other.TypeVec.end();
+       /* NULL */) {
+    if (isInteger(*TVI)) {
+      ++OtherIntSize;
+      if (*TVI == SmallestInt) {
+        TVI = Other.TypeVec.erase(TVI);
+        --OtherIntSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    else if (isFloatingPoint(*TVI)) {
+      ++OtherFPSize;
+      if (*TVI == SmallestFP) {
+        TVI = Other.TypeVec.erase(TVI);
+        --OtherFPSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    ++TVI;
+  }
+
   // If this is the only type in the large set, the constraint can never be
   // satisfied.
-  if (Other.TypeVec.size() == 1 && Other.TypeVec[0] == Smallest)
+  if ((Other.hasIntegerTypes() && OtherIntSize == 0)
+      || (Other.hasFloatingPointTypes() && OtherFPSize == 0))
     TP.error("Type inference contradiction found, '" +
              Other.getName() + "' has nothing larger than '" + getName() +"'!");
-  
-  SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
-    std::find(Other.TypeVec.begin(), Other.TypeVec.end(), Smallest);
-  if (TVI != Other.TypeVec.end()) {
-    Other.TypeVec.erase(TVI);
-    MadeChange = true;
-  }
-  
+
   // Okay, find the largest type in the Other set and remove it from the
   // current set.
-  MVT::SimpleValueType Largest = Other.TypeVec[0];
+  MVT::SimpleValueType LargestInt = MVT::Other;
+  for (unsigned i = 0, e = Other.TypeVec.size(); i != e; ++i)
+    if (isInteger(Other.TypeVec[i])) {
+      LargestInt = Other.TypeVec[i];
+      break;
+    }
   for (unsigned i = 1, e = Other.TypeVec.size(); i != e; ++i)
-    if (Other.TypeVec[i] > Largest)
-      Largest = Other.TypeVec[i];
-  
+    if (isInteger(Other.TypeVec[i]) && Other.TypeVec[i] > LargestInt)
+      LargestInt = Other.TypeVec[i];
+
+  MVT::SimpleValueType LargestFP = MVT::Other;
+  for (unsigned i = 0, e = Other.TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(Other.TypeVec[i])) {
+      LargestFP = Other.TypeVec[i];
+      break;
+    }
+  for (unsigned i = 1, e = Other.TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(Other.TypeVec[i]) && Other.TypeVec[i] > LargestFP)
+      LargestFP = Other.TypeVec[i];
+
+  int IntSize = 0;
+  int FPSize = 0;
+  for (SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
+         TypeVec.begin();
+       TVI != TypeVec.end();
+       /* NULL */) {
+    if (isInteger(*TVI)) {
+      ++IntSize;
+      if (*TVI == LargestInt) {
+        TVI = TypeVec.erase(TVI);
+        --IntSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    else if (isFloatingPoint(*TVI)) {
+      ++FPSize;
+      if (*TVI == LargestFP) {
+        TVI = TypeVec.erase(TVI);
+        --FPSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    ++TVI;
+  }
+
   // If this is the only type in the small set, the constraint can never be
   // satisfied.
-  if (TypeVec.size() == 1 && TypeVec[0] == Largest)
+  if ((hasIntegerTypes() && IntSize == 0)
+      || (hasFloatingPointTypes() && FPSize == 0))
     TP.error("Type inference contradiction found, '" +
              getName() + "' has nothing smaller than '" + Other.getName()+"'!");
-  
-  TVI = std::find(TypeVec.begin(), TypeVec.end(), Largest);
-  if (TVI != TypeVec.end()) {
-    TypeVec.erase(TVI);
-    MadeChange = true;
-  }
-  
+
   return MadeChange;
 }
 
@@ -406,7 +509,7 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
   if (isConcrete()) {
     EVT IVT = getConcrete();
     IVT = IVT.getVectorElementType();
-    return MadeChange | 
+    return MadeChange |
       VTOperand.MergeInTypeInfo(IVT.getSimpleVT().SimpleTy, TP);
   }
 
@@ -414,11 +517,11 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
   // disagree.
   if (!VTOperand.isConcrete())
     return MadeChange;
-  
+
   MVT::SimpleValueType VT = VTOperand.getConcrete();
-  
+
   TypeSet InputSet(*this);
-  
+
   // Filter out all the types which don't have the right element type.
   for (unsigned i = 0; i != TypeVec.size(); ++i) {
     assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
@@ -427,13 +530,43 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
       MadeChange = true;
     }
   }
-  
+
   if (TypeVec.empty())  // FIXME: Really want an SMLoc here!
     TP.error("Type inference contradiction found, forcing '" +
              InputSet.getName() + "' to have a vector element");
   return MadeChange;
 }
 
+/// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to be a
+/// vector type specified by VTOperand.
+bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
+                                                 TreePattern &TP) {
+  // "This" must be a vector and "VTOperand" must be a vector.
+  bool MadeChange = false;
+  MadeChange |= EnforceVector(TP);
+  MadeChange |= VTOperand.EnforceVector(TP);
+
+  // "This" must be larger than "VTOperand."
+  MadeChange |= VTOperand.EnforceSmallerThan(*this, TP);
+
+  // If we know the vector type, it forces the scalar types to agree.
+  if (isConcrete()) {
+    EVT IVT = getConcrete();
+    IVT = IVT.getVectorElementType();
+
+    EEVT::TypeSet EltTypeSet(IVT.getSimpleVT().SimpleTy, TP);
+    MadeChange |= VTOperand.EnforceVectorEltTypeIs(EltTypeSet, TP);
+  } else if (VTOperand.isConcrete()) {
+    EVT IVT = VTOperand.getConcrete();
+    IVT = IVT.getVectorElementType();
+
+    EEVT::TypeSet EltTypeSet(IVT.getSimpleVT().SimpleTy, TP);
+    MadeChange |= EnforceVectorEltTypeIs(EltTypeSet, TP);
+  }
+
+  return MadeChange;
+}
+
 //===----------------------------------------------------------------------===//
 // Helpers for working with extended types.
 
@@ -473,18 +606,21 @@ void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
 }
 
 //! Dump the dependent variable set:
+#ifndef NDEBUG
 void DumpDepVars(MultipleUseVarSet &DepVars) {
   if (DepVars.empty()) {
     DEBUG(errs() << "<empty set>");
   } else {
     DEBUG(errs() << "[ ");
-    for (MultipleUseVarSet::const_iterator i = DepVars.begin(), e = DepVars.end();
-         i != e; ++i) {
+    for (MultipleUseVarSet::const_iterator i = DepVars.begin(),
+         e = DepVars.end(); i != e; ++i) {
       DEBUG(errs() << (*i) << " ");
     }
     DEBUG(errs() << "]");
   }
 }
+#endif
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -502,7 +638,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
   // e.g. (set R32:$dst, 0).
   if (P->isLeaf() && dynamic_cast<IntInit*>(P->getLeafValue()))
     Size += 2;
-  
+
   // FIXME: This is a hack to statically increase the priority of patterns
   // which maps a sub-dag to a complex pattern. e.g. favors LEA over ADD.
   // Later we can allow complexity / cost for each pattern to be (optionally)
@@ -511,12 +647,12 @@ static unsigned getPatternSize(const TreePatternNode *P,
   const ComplexPattern *AM = P->getComplexPatternInfo(CGP);
   if (AM)
     Size += AM->getNumOperands() * 3;
-  
+
   // If this node has some predicate function that must match, it adds to the
   // complexity of this node.
   if (!P->getPredicateFns().empty())
     ++Size;
-  
+
   // Count children in the count if they are also nodes.
   for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
     TreePatternNode *Child = P->getChild(i);
@@ -524,7 +660,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
         Child->getType(0) != MVT::Other)
       Size += getPatternSize(Child, CGP);
     else if (Child->isLeaf()) {
-      if (dynamic_cast<IntInit*>(Child->getLeafValue())) 
+      if (dynamic_cast<IntInit*>(Child->getLeafValue()))
         Size += 5;  // Matches a ConstantSDNode (+3) and a specific value (+2).
       else if (Child->getComplexPatternInfo(CGP))
         Size += getPatternSize(Child, CGP);
@@ -532,7 +668,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
         ++Size;
     }
   }
-  
+
   return Size;
 }
 
@@ -573,13 +709,13 @@ std::string PatternToMatch::getPredicateCheck() const {
 
 SDTypeConstraint::SDTypeConstraint(Record *R) {
   OperandNo = R->getValueAsInt("OperandNum");
-  
+
   if (R->isSubClassOf("SDTCisVT")) {
     ConstraintType = SDTCisVT;
     x.SDTCisVT_Info.VT = getValueType(R->getValueAsDef("VT"));
     if (x.SDTCisVT_Info.VT == MVT::isVoid)
       throw TGError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT");
-      
+
   } else if (R->isSubClassOf("SDTCisPtrTy")) {
     ConstraintType = SDTCisPtrTy;
   } else if (R->isSubClassOf("SDTCisInt")) {
@@ -593,15 +729,19 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
     x.SDTCisSameAs_Info.OtherOperandNum = R->getValueAsInt("OtherOperandNum");
   } else if (R->isSubClassOf("SDTCisVTSmallerThanOp")) {
     ConstraintType = SDTCisVTSmallerThanOp;
-    x.SDTCisVTSmallerThanOp_Info.OtherOperandNum = 
+    x.SDTCisVTSmallerThanOp_Info.OtherOperandNum =
       R->getValueAsInt("OtherOperandNum");
   } else if (R->isSubClassOf("SDTCisOpSmallerThanOp")) {
     ConstraintType = SDTCisOpSmallerThanOp;
-    x.SDTCisOpSmallerThanOp_Info.BigOperandNum = 
+    x.SDTCisOpSmallerThanOp_Info.BigOperandNum =
       R->getValueAsInt("BigOperandNum");
   } else if (R->isSubClassOf("SDTCisEltOfVec")) {
     ConstraintType = SDTCisEltOfVec;
     x.SDTCisEltOfVec_Info.OtherOperandNum = R->getValueAsInt("OtherOpNum");
+  } else if (R->isSubClassOf("SDTCisSubVecOfVec")) {
+    ConstraintType = SDTCisSubVecOfVec;
+    x.SDTCisSubVecOfVec_Info.OtherOperandNum =
+      R->getValueAsInt("OtherOpNum");
   } else {
     errs() << "Unrecognized SDTypeConstraint '" << R->getName() << "'!\n";
     exit(1);
@@ -618,11 +758,11 @@ static TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N,
     ResNo = OpNo;
     return N;
   }
-  
+
   OpNo -= NumResults;
-  
+
   if (OpNo >= N->getNumChildren()) {
-    errs() << "Invalid operand number in type constraint " 
+    errs() << "Invalid operand number in type constraint "
            << (OpNo+NumResults) << " ";
     N->dump();
     errs() << '\n';
@@ -641,7 +781,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
                                            TreePattern &TP) const {
   unsigned ResNo = 0; // The result number being referenced.
   TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
-  
+
   switch (ConstraintType) {
   default: assert(0 && "Unknown constraint type!");
   case SDTCisVT:
@@ -676,9 +816,9 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
       TP.error(N->getOperator()->getName() + " expects a VT operand!");
     MVT::SimpleValueType VT =
      getValueType(static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef());
-    
+
     EEVT::TypeSet TypeListTmp(VT, TP);
-    
+
     unsigned OResNo = 0;
     TreePatternNode *OtherNode =
       getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
@@ -699,13 +839,24 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     TreePatternNode *VecOperand =
       getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NodeInfo,
                     VResNo);
-    
+
     // Filter vector types out of VecOperand that don't have the right element
     // type.
     return VecOperand->getExtType(VResNo).
       EnforceVectorEltTypeIs(NodeToApply->getExtType(ResNo), TP);
   }
-  }  
+  case SDTCisSubVecOfVec: {
+    unsigned VResNo = 0;
+    TreePatternNode *BigVecOperand =
+      getOperandNum(x.SDTCisSubVecOfVec_Info.OtherOperandNum, N, NodeInfo,
+                    VResNo);
+
+    // Filter vector types out of BigVecOperand that don't have the
+    // right subvector type.
+    return BigVecOperand->getExtType(VResNo).
+      EnforceVectorSubVectorTypeIs(NodeToApply->getExtType(ResNo), TP);
+  }
+  }
   return false;
 }
 
@@ -718,7 +869,7 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
   Record *TypeProfile = R->getValueAsDef("TypeProfile");
   NumResults = TypeProfile->getValueAsInt("NumResults");
   NumOperands = TypeProfile->getValueAsInt("NumOperands");
-  
+
   // Parse the properties.
   Properties = 0;
   std::vector<Record*> PropList = R->getValueAsListOfDefs("Properties");
@@ -729,12 +880,12 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
       Properties |= 1 << SDNPAssociative;
     } else if (PropList[i]->getName() == "SDNPHasChain") {
       Properties |= 1 << SDNPHasChain;
-    } else if (PropList[i]->getName() == "SDNPOutFlag") {
-      Properties |= 1 << SDNPOutFlag;
-    } else if (PropList[i]->getName() == "SDNPInFlag") {
-      Properties |= 1 << SDNPInFlag;
-    } else if (PropList[i]->getName() == "SDNPOptInFlag") {
-      Properties |= 1 << SDNPOptInFlag;
+    } else if (PropList[i]->getName() == "SDNPOutGlue") {
+      Properties |= 1 << SDNPOutGlue;
+    } else if (PropList[i]->getName() == "SDNPInGlue") {
+      Properties |= 1 << SDNPInGlue;
+    } else if (PropList[i]->getName() == "SDNPOptInGlue") {
+      Properties |= 1 << SDNPOptInGlue;
     } else if (PropList[i]->getName() == "SDNPMayStore") {
       Properties |= 1 << SDNPMayStore;
     } else if (PropList[i]->getName() == "SDNPMayLoad") {
@@ -751,8 +902,8 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
       exit(1);
     }
   }
-  
-  
+
+
   // Parse the type constraints.
   std::vector<Record*> ConstraintList =
     TypeProfile->getValueAsListOfDefs("Constraints");
@@ -767,12 +918,12 @@ MVT::SimpleValueType SDNodeInfo::getKnownType(unsigned ResNo) const {
   assert(NumResults <= 1 &&
          "We only work with nodes with zero or one result so far!");
   assert(ResNo == 0 && "Only handles single result nodes so far");
-  
+
   for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i) {
     // Make sure that this applies to the correct node result.
     if (TypeConstraints[i].OperandNo >= NumResults)  // FIXME: need value #
       continue;
-    
+
     switch (TypeConstraints[i].ConstraintType) {
     default: break;
     case SDTypeConstraint::SDTCisVT:
@@ -799,20 +950,20 @@ static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
   if (Operator->getName() == "set" ||
       Operator->getName() == "implicit")
     return 0;  // All return nothing.
-  
+
   if (Operator->isSubClassOf("Intrinsic"))
     return CDP.getIntrinsic(Operator).IS.RetVTs.size();
-  
+
   if (Operator->isSubClassOf("SDNode"))
     return CDP.getSDNodeInfo(Operator).getNumResults();
-  
+
   if (Operator->isSubClassOf("PatFrag")) {
     // If we've already parsed this pattern fragment, get it.  Otherwise, handle
     // the forward reference case where one pattern fragment references another
     // before it is processed.
     if (TreePattern *PFRec = CDP.getPatternFragmentIfRead(Operator))
       return PFRec->getOnlyTree()->getNumTypes();
-    
+
     // Get the result tree.
     DagInit *Tree = Operator->getValueAsDag("Fragment");
     Record *Op = 0;
@@ -821,22 +972,22 @@ static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
     assert(Op && "Invalid Fragment");
     return GetNumNodeResults(Op, CDP);
   }
-  
+
   if (Operator->isSubClassOf("Instruction")) {
     CodeGenInstruction &InstInfo = CDP.getTargetInfo().getInstruction(Operator);
 
     // FIXME: Should allow access to all the results here.
-    unsigned NumDefsToAdd = InstInfo.NumDefs ? 1 : 0;
-    
+    unsigned NumDefsToAdd = InstInfo.Operands.NumDefs ? 1 : 0;
+
     // Add on one implicit def if it has a resolvable type.
     if (InstInfo.HasOneImplicitDefWithKnownVT(CDP.getTargetInfo()) !=MVT::Other)
       ++NumDefsToAdd;
     return NumDefsToAdd;
   }
-  
+
   if (Operator->isSubClassOf("SDNodeXForm"))
     return 1;  // FIXME: Generalize SDNodeXForm
-  
+
   Operator->dump();
   errs() << "Unhandled node in GetNumNodeResults\n";
   exit(1);
@@ -862,7 +1013,7 @@ void TreePatternNode::print(raw_ostream &OS) const {
     }
     OS << ")";
   }
-  
+
   for (unsigned i = 0, e = PredicateFns.size(); i != e; ++i)
     OS << "<<P:" << PredicateFns[i] << ">>";
   if (TransformFn)
@@ -900,7 +1051,7 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
     }
     return getLeafValue() == N->getLeafValue();
   }
-  
+
   if (N->getOperator() != getOperator() ||
       N->getNumChildren() != getNumChildren()) return false;
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
@@ -944,7 +1095,7 @@ void TreePatternNode::RemoveAllTypes() {
 void TreePatternNode::
 SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
   if (isLeaf()) return;
-  
+
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
     TreePatternNode *Child = getChild(i);
     if (Child->isLeaf()) {
@@ -972,7 +1123,7 @@ SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
 TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
   if (isLeaf()) return this;  // nothing to do.
   Record *Op = getOperator();
-  
+
   if (!Op->isSubClassOf("PatFrag")) {
     // Just recursively inline children nodes.
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
@@ -991,7 +1142,7 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
   // Otherwise, we found a reference to a fragment.  First, look up its
   // TreePattern record.
   TreePattern *Frag = TP.getDAGPatterns().getPatternFragment(Op);
-  
+
   // Verify that we are passing the right number of operands.
   if (Frag->getNumArgs() != Children.size())
     TP.error("'" + Op->getName() + "' fragment requires " +
@@ -1009,10 +1160,10 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
     std::map<std::string, TreePatternNode*> ArgMap;
     for (unsigned i = 0, e = Frag->getNumArgs(); i != e; ++i)
       ArgMap[Frag->getArgName(i)] = getChild(i)->InlinePatternFragments(TP);
-  
+
     FragTree->SubstituteFormalArguments(ArgMap);
   }
-  
+
   FragTree->setName(getName());
   for (unsigned i = 0, e = Types.size(); i != e; ++i)
     FragTree->UpdateNodeType(i, getExtType(i), TP);
@@ -1023,7 +1174,7 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 
   // Get a new copy of this fragment to stitch into here.
   //delete this;    // FIXME: implement refcounting!
-  
+
   // The fragment we inlined could have recursive inlining that is needed.  See
   // if there are any pattern fragments in it and inline them as needed.
   return FragTree->InlinePatternFragments(TP);
@@ -1038,21 +1189,21 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
   // Check to see if this is a register or a register class.
   if (R->isSubClassOf("RegisterClass")) {
     assert(ResNo == 0 && "Regclass ref only has one result!");
-    if (NotRegisters) 
+    if (NotRegisters)
       return EEVT::TypeSet(); // Unknown.
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
     return EEVT::TypeSet(T.getRegisterClass(R).getValueTypes());
   }
-  
+
   if (R->isSubClassOf("PatFrag")) {
     assert(ResNo == 0 && "FIXME: PatFrag with multiple results?");
     // Pattern fragment types will be resolved when they are inlined.
     return EEVT::TypeSet(); // Unknown.
   }
-  
+
   if (R->isSubClassOf("Register")) {
     assert(ResNo == 0 && "Registers only produce one result!");
-    if (NotRegisters) 
+    if (NotRegisters)
       return EEVT::TypeSet(); // Unknown.
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
     return EEVT::TypeSet(T.getRegisterVTs(R));
@@ -1062,16 +1213,16 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     assert(ResNo == 0 && "SubRegisterIndices only produce one result!");
     return EEVT::TypeSet();
   }
-  
+
   if (R->isSubClassOf("ValueType") || R->isSubClassOf("CondCode")) {
     assert(ResNo == 0 && "This node only has one result!");
     // Using a VTSDNode or CondCodeSDNode.
     return EEVT::TypeSet(MVT::Other, TP);
   }
-  
+
   if (R->isSubClassOf("ComplexPattern")) {
     assert(ResNo == 0 && "FIXME: ComplexPattern with multiple results?");
-    if (NotRegisters) 
+    if (NotRegisters)
       return EEVT::TypeSet(); // Unknown.
    return EEVT::TypeSet(TP.getDAGPatterns().getComplexPattern(R).getValueType(),
                          TP);
@@ -1080,13 +1231,13 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     assert(ResNo == 0 && "Regclass can only have one result!");
     return EEVT::TypeSet(MVT::iPTR, TP);
   }
-  
+
   if (R->getName() == "node" || R->getName() == "srcvalue" ||
       R->getName() == "zero_reg") {
     // Placeholder.
     return EEVT::TypeSet(); // Unknown.
   }
-  
+
   TP.error("Unknown node flavor used in pattern: " + R->getName());
   return EEVT::TypeSet(MVT::Other, TP);
 }
@@ -1100,8 +1251,8 @@ getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const {
       getOperator() != CDP.get_intrinsic_w_chain_sdnode() &&
       getOperator() != CDP.get_intrinsic_wo_chain_sdnode())
     return 0;
-    
-  unsigned IID = 
+
+  unsigned IID =
     dynamic_cast<IntInit*>(getChild(0)->getLeafValue())->getValue();
   return &CDP.getIntrinsicInfo(IID);
 }
@@ -1111,7 +1262,7 @@ getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const {
 const ComplexPattern *
 TreePatternNode::getComplexPatternInfo(const CodeGenDAGPatterns &CGP) const {
   if (!isLeaf()) return 0;
-  
+
   DefInit *DI = dynamic_cast<DefInit*>(getLeafValue());
   if (DI && DI->getDef()->isSubClassOf("ComplexPattern"))
     return &CGP.getComplexPattern(DI->getDef());
@@ -1126,10 +1277,10 @@ bool TreePatternNode::NodeHasProperty(SDNP Property,
       return CP->hasProperty(Property);
     return false;
   }
-  
+
   Record *Operator = getOperator();
   if (!Operator->isSubClassOf("SDNode")) return false;
-  
+
   return CGP.getSDNodeInfo(Operator).hasProperty(Property);
 }
 
@@ -1146,7 +1297,7 @@ bool TreePatternNode::TreeHasProperty(SDNP Property,
     if (getChild(i)->TreeHasProperty(Property, CGP))
       return true;
   return false;
-}  
+}
 
 /// isCommutativeIntrinsic - Return true if the node corresponds to a
 /// commutative intrinsic.
@@ -1173,27 +1324,27 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
                                                         NotRegisters, TP), TP);
       return MadeChange;
     }
-    
+
     if (IntInit *II = dynamic_cast<IntInit*>(getLeafValue())) {
       assert(Types.size() == 1 && "Invalid IntInit");
-      
+
       // Int inits are always integers. :)
       bool MadeChange = Types[0].EnforceInteger(TP);
-      
+
       if (!Types[0].isConcrete())
         return MadeChange;
-      
+
       MVT::SimpleValueType VT = getType(0);
       if (VT == MVT::iPTR || VT == MVT::iPTRAny)
         return MadeChange;
-      
+
       unsigned Size = EVT(VT).getSizeInBits();
       // Make sure that the value is representable for this type.
       if (Size >= 32) return MadeChange;
-      
+
       int Val = (II->getValue() << (32-Size)) >> (32-Size);
       if (Val == II->getValue()) return MadeChange;
-      
+
       // If sign-extended doesn't fit, does it fit as unsigned?
       unsigned ValueMask;
       unsigned UnsignedVal;
@@ -1202,34 +1353,34 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 
       if ((ValueMask & UnsignedVal) == UnsignedVal)
         return MadeChange;
-      
+
       TP.error("Integer value '" + itostr(II->getValue())+
                "' is out of range for type '" + getEnumName(getType(0)) + "'!");
       return MadeChange;
     }
     return false;
   }
-  
+
   // special handling for set, which isn't really an SDNode.
   if (getOperator()->getName() == "set") {
     assert(getNumTypes() == 0 && "Set doesn't produce a value");
     assert(getNumChildren() >= 2 && "Missing RHS of a set?");
     unsigned NC = getNumChildren();
-    
+
     TreePatternNode *SetVal = getChild(NC-1);
     bool MadeChange = SetVal->ApplyTypeConstraints(TP, NotRegisters);
 
     for (unsigned i = 0; i < NC-1; ++i) {
       TreePatternNode *Child = getChild(i);
       MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
-    
+
       // Types of operands must match.
       MadeChange |= Child->UpdateNodeType(0, SetVal->getExtType(i), TP);
       MadeChange |= SetVal->UpdateNodeType(i, Child->getExtType(0), TP);
     }
     return MadeChange;
   }
-  
+
   if (getOperator()->getName() == "implicit") {
     assert(getNumTypes() == 0 && "Node doesn't produce a value");
 
@@ -1238,15 +1389,15 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
     return MadeChange;
   }
-  
+
   if (getOperator()->getName() == "COPY_TO_REGCLASS") {
     bool MadeChange = false;
     MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
     MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
-    
+
     assert(getChild(0)->getNumTypes() == 1 &&
            getChild(1)->getNumTypes() == 1 && "Unhandled case");
-    
+
     // child #1 of COPY_TO_REGCLASS should be a register class.  We don't care
     // what type it gets, so if it didn't get a concrete type just give it the
     // first viable type from the reg class.
@@ -1257,14 +1408,14 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     }
     return MadeChange;
   }
-  
+
   if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) {
     bool MadeChange = false;
 
     // Apply the result type to the node.
     unsigned NumRetVTs = Int->IS.RetVTs.size();
     unsigned NumParamVTs = Int->IS.ParamVTs.size();
-    
+
     for (unsigned i = 0, e = NumRetVTs; i != e; ++i)
       MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP);
 
@@ -1275,46 +1426,46 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 
     // Apply type info to the intrinsic ID.
     MadeChange |= getChild(0)->UpdateNodeType(0, MVT::iPTR, TP);
-    
+
     for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i) {
       MadeChange |= getChild(i+1)->ApplyTypeConstraints(TP, NotRegisters);
-      
+
       MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i];
       assert(getChild(i+1)->getNumTypes() == 1 && "Unhandled case");
       MadeChange |= getChild(i+1)->UpdateNodeType(0, OpVT, TP);
     }
     return MadeChange;
   }
-  
+
   if (getOperator()->isSubClassOf("SDNode")) {
     const SDNodeInfo &NI = CDP.getSDNodeInfo(getOperator());
-    
+
     // Check that the number of operands is sane.  Negative operands -> varargs.
     if (NI.getNumOperands() >= 0 &&
         getNumChildren() != (unsigned)NI.getNumOperands())
       TP.error(getOperator()->getName() + " node requires exactly " +
                itostr(NI.getNumOperands()) + " operands!");
-    
+
     bool MadeChange = NI.ApplyTypeConstraints(this, TP);
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
     return MadeChange;
   }
-  
+
   if (getOperator()->isSubClassOf("Instruction")) {
     const DAGInstruction &Inst = CDP.getInstruction(getOperator());
     CodeGenInstruction &InstInfo =
       CDP.getTargetInfo().getInstruction(getOperator());
-    
+
     bool MadeChange = false;
 
     // Apply the result types to the node, these come from the things in the
     // (outs) list of the instruction.
     // FIXME: Cap at one result so far.
-    unsigned NumResultsToAdd = InstInfo.NumDefs ? 1 : 0;
+    unsigned NumResultsToAdd = InstInfo.Operands.NumDefs ? 1 : 0;
     for (unsigned ResNo = 0; ResNo != NumResultsToAdd; ++ResNo) {
       Record *ResultNode = Inst.getResult(ResNo);
-      
+
       if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
         MadeChange |= UpdateNodeType(ResNo, MVT::iPTR, TP);
       } else if (ResultNode->getName() == "unknown") {
@@ -1322,26 +1473,26 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       } else {
         assert(ResultNode->isSubClassOf("RegisterClass") &&
                "Operands should be register classes!");
-        const CodeGenRegisterClass &RC = 
+        const CodeGenRegisterClass &RC =
           CDP.getTargetInfo().getRegisterClass(ResultNode);
         MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP);
       }
     }
-    
+
     // If the instruction has implicit defs, we apply the first one as a result.
     // FIXME: This sucks, it should apply all implicit defs.
     if (!InstInfo.ImplicitDefs.empty()) {
       unsigned ResNo = NumResultsToAdd;
-      
+
       // FIXME: Generalize to multiple possible types and multiple possible
       // ImplicitDefs.
       MVT::SimpleValueType VT =
         InstInfo.HasOneImplicitDefWithKnownVT(CDP.getTargetInfo());
-      
+
       if (VT != MVT::Other)
         MadeChange |= UpdateNodeType(ResNo, VT, TP);
     }
-    
+
     // If this is an INSERT_SUBREG, constrain the source and destination VTs to
     // be the same.
     if (getOperator()->getName() == "INSERT_SUBREG") {
@@ -1353,7 +1504,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     unsigned ChildNo = 0;
     for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
       Record *OperandNode = Inst.getOperand(i);
-      
+
       // If the instruction expects a predicate or optional def operand, we
       // codegen this by setting the operand to it's default value if it has a
       // non-empty DefaultOps field.
@@ -1361,18 +1512,18 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
            OperandNode->isSubClassOf("OptionalDefOperand")) &&
           !CDP.getDefaultOperand(OperandNode).DefaultOps.empty())
         continue;
-       
+
       // Verify that we didn't run out of provided operands.
       if (ChildNo >= getNumChildren())
         TP.error("Instruction '" + getOperator()->getName() +
                  "' expects more operands than were provided.");
-      
+
       MVT::SimpleValueType VT;
       TreePatternNode *Child = getChild(ChildNo++);
       unsigned ChildResNo = 0;  // Instructions always use res #0 of their op.
-      
+
       if (OperandNode->isSubClassOf("RegisterClass")) {
-        const CodeGenRegisterClass &RC = 
+        const CodeGenRegisterClass &RC =
           CDP.getTargetInfo().getRegisterClass(OperandNode);
         MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP);
       } else if (OperandNode->isSubClassOf("Operand")) {
@@ -1392,12 +1543,12 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     if (ChildNo != getNumChildren())
       TP.error("Instruction '" + getOperator()->getName() +
                "' was provided too many operands!");
-    
+
     return MadeChange;
   }
-  
+
   assert(getOperator()->isSubClassOf("SDNodeXForm") && "Unknown node type!");
-  
+
   // Node transforms always take one operand.
   if (getNumChildren() != 1)
     TP.error("Node transform '" + getOperator()->getName() +
@@ -1405,7 +1556,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 
   bool MadeChange = getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
 
-  
+
   // If either the output or input of the xform does not have exact
   // type info. We assume they must be the same. Otherwise, it is perfectly
   // legal to transform from one type to a completely different type.
@@ -1435,7 +1586,7 @@ static bool OnlyOnRHSOfCommutative(TreePatternNode *N) {
 /// used as a sanity check for .td files (to prevent people from writing stuff
 /// that can never possibly work), and to prevent the pattern permuter from
 /// generating stuff that is useless.
-bool TreePatternNode::canPatternMatch(std::string &Reason, 
+bool TreePatternNode::canPatternMatch(std::string &Reason,
                                       const CodeGenDAGPatterns &CDP) {
   if (isLeaf()) return true;
 
@@ -1449,7 +1600,7 @@ bool TreePatternNode::canPatternMatch(std::string &Reason,
     // TODO:
     return true;
   }
-  
+
   // If this node is a commutative operator, check that the LHS isn't an
   // immediate.
   const SDNodeInfo &NodeInfo = CDP.getSDNodeInfo(getOperator());
@@ -1466,7 +1617,7 @@ bool TreePatternNode::canPatternMatch(std::string &Reason,
         }
     }
   }
-  
+
   return true;
 }
 
@@ -1506,7 +1657,7 @@ void TreePattern::ComputeNamedNodes() {
 void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
   if (!N->getName().empty())
     NamedNodes[N->getName()].push_back(N);
-  
+
   for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
     ComputeNamedNodes(N->getChild(i));
 }
@@ -1515,7 +1666,7 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
 TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
   if (DefInit *DI = dynamic_cast<DefInit*>(TheInit)) {
     Record *R = DI->getDef();
-    
+
     // Direct reference to a leaf DagNode or PatFrag?  Turn it into a
     // TreePatternNode if its own.  For example:
     ///   (foo GPR, imm) -> (foo GPR, (imm))
@@ -1523,7 +1674,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
       return ParseTreePattern(new DagInit(DI, "",
                           std::vector<std::pair<Init*, std::string> >()),
                               OpName);
-    
+
     // Input argument?
     TreePatternNode *Res = new TreePatternNode(DI, 1);
     if (R->getName() == "node" && !OpName.empty()) {
@@ -1535,13 +1686,13 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     Res->setName(OpName);
     return Res;
   }
-  
+
   if (IntInit *II = dynamic_cast<IntInit*>(TheInit)) {
     if (!OpName.empty())
       error("Constant int argument should not have a name!");
     return new TreePatternNode(II, 1);
   }
-  
+
   if (BitsInit *BI = dynamic_cast<BitsInit*>(TheInit)) {
     // Turn this into an IntInit.
     Init *II = BI->convertInitializerTo(new IntRecTy());
@@ -1558,34 +1709,34 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
   DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
   if (!OpDef) error("Pattern has unexpected operator type!");
   Record *Operator = OpDef->getDef();
-  
+
   if (Operator->isSubClassOf("ValueType")) {
     // If the operator is a ValueType, then this must be "type cast" of a leaf
     // node.
     if (Dag->getNumArgs() != 1)
       error("Type cast only takes one operand!");
-    
+
     TreePatternNode *New = ParseTreePattern(Dag->getArg(0), Dag->getArgName(0));
-    
+
     // Apply the type cast.
     assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
     New->UpdateNodeType(0, getValueType(Operator), *this);
-    
+
     if (!OpName.empty())
       error("ValueType cast should not have a name!");
     return New;
   }
-  
+
   // Verify that this is something that makes sense for an operator.
-  if (!Operator->isSubClassOf("PatFrag") && 
+  if (!Operator->isSubClassOf("PatFrag") &&
       !Operator->isSubClassOf("SDNode") &&
-      !Operator->isSubClassOf("Instruction") && 
+      !Operator->isSubClassOf("Instruction") &&
       !Operator->isSubClassOf("SDNodeXForm") &&
       !Operator->isSubClassOf("Intrinsic") &&
       Operator->getName() != "set" &&
       Operator->getName() != "implicit")
     error("Unrecognized node '" + Operator->getName() + "'!");
-  
+
   //  Check to see if this is something that is illegal in an input pattern.
   if (isInputPattern) {
     if (Operator->isSubClassOf("Instruction") ||
@@ -1594,7 +1745,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
   } else {
     if (Operator->isSubClassOf("Intrinsic"))
       error("Cannot use '" + Operator->getName() + "' in an output pattern!");
-    
+
     if (Operator->isSubClassOf("SDNode") &&
         Operator->getName() != "imm" &&
         Operator->getName() != "fpimm" &&
@@ -1609,15 +1760,15 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
         Operator->getName() != "vt")
       error("Cannot use '" + Operator->getName() + "' in an output pattern!");
   }
-  
+
   std::vector<TreePatternNode*> Children;
 
   // Parse all the operands.
   for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i)
     Children.push_back(ParseTreePattern(Dag->getArg(i), Dag->getArgName(i)));
-  
+
   // If the operator is an intrinsic, then this is just syntactic sugar for for
-  // (intrinsic_* <number>, ..children..).  Pick the right intrinsic node, and 
+  // (intrinsic_* <number>, ..children..).  Pick the right intrinsic node, and
   // convert the intrinsic name to a number.
   if (Operator->isSubClassOf("Intrinsic")) {
     const CodeGenIntrinsic &Int = getDAGPatterns().getIntrinsic(Operator);
@@ -1632,15 +1783,15 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
       Operator = getDAGPatterns().get_intrinsic_w_chain_sdnode();
     else // Otherwise, no chain.
       Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
-    
+
     TreePatternNode *IIDNode = new TreePatternNode(new IntInit(IID), 1);
     Children.insert(Children.begin(), IIDNode);
   }
-  
+
   unsigned NumResults = GetNumNodeResults(Operator, CDP);
   TreePatternNode *Result = new TreePatternNode(Operator, Children, NumResults);
   Result->setName(OpName);
-  
+
   if (!Dag->getName().empty()) {
     assert(Result->getName().empty());
     Result->setName(Dag->getName());
@@ -1698,10 +1849,10 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
     }
 
     // If there are constraints on our named nodes, apply them.
-    for (StringMap<SmallVector<TreePatternNode*,1> >::iterator 
+    for (StringMap<SmallVector<TreePatternNode*,1> >::iterator
          I = NamedNodes.begin(), E = NamedNodes.end(); I != E; ++I) {
       SmallVectorImpl<TreePatternNode*> &Nodes = I->second;
-      
+
       // If we have input named node types, propagate their types to the named
       // values here.
       if (InNamedTypes) {
@@ -1724,7 +1875,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
             if (DI && DI->getDef()->isSubClassOf("RegisterClass"))
               continue;
           }
-          
+
           assert(Nodes[i]->getNumTypes() == 1 &&
                  InNodes[0]->getNumTypes() == 1 &&
                  "FIXME: cannot name multiple result nodes yet");
@@ -1732,7 +1883,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
                                                  *this);
         }
       }
-      
+
       // If there are multiple nodes with the same name, they must all have the
       // same type.
       if (I->second.size() > 1) {
@@ -1740,14 +1891,14 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
           TreePatternNode *N1 = Nodes[i], *N2 = Nodes[i+1];
           assert(N1->getNumTypes() == 1 && N2->getNumTypes() == 1 &&
                  "FIXME: cannot name multiple result nodes yet");
-          
+
           MadeChange |= N1->UpdateNodeType(0, N2->getExtType(0), *this);
           MadeChange |= N2->UpdateNodeType(0, N1->getExtType(0), *this);
         }
       }
     }
   }
-  
+
   bool HasUnresolvedTypes = false;
   for (unsigned i = 0, e = Trees.size(); i != e; ++i)
     HasUnresolvedTypes |= Trees[i]->ContainsUnresolvedType();
@@ -1763,7 +1914,7 @@ void TreePattern::print(raw_ostream &OS) const {
     OS << ")";
   }
   OS << ": ";
-  
+
   if (Trees.size() > 1)
     OS << "[\n";
   for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
@@ -1782,7 +1933,9 @@ void TreePattern::dump() const { print(errs()); }
 // CodeGenDAGPatterns implementation
 //
 
-CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) : Records(R) {
+CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) :
+  Records(R), Target(R) {
+
   Intrinsics = LoadIntrinsics(Records, false);
   TgtIntrinsics = LoadIntrinsics(Records, true);
   ParseNodeInfo();
@@ -1792,7 +1945,7 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) : Records(R) {
   ParseDefaultOperands();
   ParseInstructions();
   ParsePatterns();
-  
+
   // Generate variants.  For example, commutative patterns can match
   // multiple ways.  Add them to PatternsToMatch as well.
   GenerateVariants();
@@ -1863,20 +2016,20 @@ void CodeGenDAGPatterns::ParseComplexPatterns() {
 ///
 void CodeGenDAGPatterns::ParsePatternFragments() {
   std::vector<Record*> Fragments = Records.getAllDerivedDefinitions("PatFrag");
-  
+
   // First step, parse all of the fragments.
   for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
     DagInit *Tree = Fragments[i]->getValueAsDag("Fragment");
     TreePattern *P = new TreePattern(Fragments[i], Tree, true, *this);
     PatternFragments[Fragments[i]] = P;
-    
+
     // Validate the argument list, converting it to set, to discard duplicates.
     std::vector<std::string> &Args = P->getArgList();
     std::set<std::string> OperandsSet(Args.begin(), Args.end());
-    
+
     if (OperandsSet.count(""))
       P->error("Cannot have unnamed 'node' values in pattern fragment!");
-    
+
     // Parse the operands list.
     DagInit *OpsList = Fragments[i]->getValueAsDag("Operands");
     DefInit *OpsOp = dynamic_cast<DefInit*>(OpsList->getOperator());
@@ -1887,8 +2040,8 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
          OpsOp->getDef()->getName() != "outs" &&
          OpsOp->getDef()->getName() != "ins"))
       P->error("Operands list should start with '(ops ... '!");
-    
-    // Copy over the arguments.       
+
+    // Copy over the arguments.
     Args.clear();
     for (unsigned j = 0, e = OpsList->getNumArgs(); j != e; ++j) {
       if (!dynamic_cast<DefInit*>(OpsList->getArg(j)) ||
@@ -1903,7 +2056,7 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
       OperandsSet.erase(OpsList->getArgName(j));
       Args.push_back(OpsList->getArgName(j));
     }
-    
+
     if (!OperandsSet.empty())
       P->error("Operands list does not contain an entry for operand '" +
                *OperandsSet.begin() + "'!");
@@ -1913,20 +2066,20 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
     std::string Code = Fragments[i]->getValueAsCode("Predicate");
     if (!Code.empty())
       P->getOnlyTree()->addPredicateFn("Predicate_"+Fragments[i]->getName());
-    
+
     // If there is a node transformation corresponding to this, keep track of
     // it.
     Record *Transform = Fragments[i]->getValueAsDef("OperandTransform");
     if (!getSDNodeTransform(Transform).second.empty())    // not noop xform?
       P->getOnlyTree()->setTransformFn(Transform);
   }
-  
+
   // Now that we've parsed all of the tree fragments, do a closure on them so
   // that there are not references to PatFrags left inside of them.
   for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
     TreePattern *ThePat = PatternFragments[Fragments[i]];
     ThePat->InlinePatternFragments();
-        
+
     // Infer as many types as possible.  Don't worry about it if we don't infer
     // all of them, some may depend on the inputs of the pattern.
     try {
@@ -1937,7 +2090,7 @@ void CodeGenDAGPatterns::ParsePatternFragments() {
       // actually used by instructions, the type consistency error will be
       // reported there.
     }
-    
+
     // If debugging, print out the pattern fragment result.
     DEBUG(ThePat->dump());
   }
@@ -1951,11 +2104,11 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
   // Find some SDNode.
   assert(!SDNodes.empty() && "No SDNodes parsed?");
   Init *SomeSDNode = new DefInit(SDNodes.begin()->first);
-  
+
   for (unsigned iter = 0; iter != 2; ++iter) {
     for (unsigned i = 0, e = DefaultOps[iter].size(); i != e; ++i) {
       DagInit *DefaultInfo = DefaultOps[iter][i]->getValueAsDag("DefaultOps");
-    
+
       // Clone the DefaultInfo dag node, changing the operator from 'ops' to
       // SomeSDnode so that we can parse this.
       std::vector<std::pair<Init*, std::string> > Ops;
@@ -1963,20 +2116,20 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
         Ops.push_back(std::make_pair(DefaultInfo->getArg(op),
                                      DefaultInfo->getArgName(op)));
       DagInit *DI = new DagInit(SomeSDNode, "", Ops);
-    
+
       // Create a TreePattern to parse this.
       TreePattern P(DefaultOps[iter][i], DI, false, *this);
       assert(P.getNumTrees() == 1 && "This ctor can only produce one tree!");
 
       // Copy the operands over into a DAGDefaultOperand.
       DAGDefaultOperand DefaultOpInfo;
-    
+
       TreePatternNode *T = P.getTree(0);
       for (unsigned op = 0, e = T->getNumChildren(); op != e; ++op) {
         TreePatternNode *TPN = T->getChild(op);
         while (TPN->ApplyTypeConstraints(P, false))
           /* Resolve all types */;
-      
+
         if (TPN->ContainsUnresolvedType()) {
           if (iter == 0)
             throw "Value #" + utostr(i) + " of PredicateOperand '" +
@@ -2033,7 +2186,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
     assert(Slot->getNumChildren() == 0 && "can't be a use with children!");
     SlotRec = Slot->getOperator();
   }
-  
+
   // Ensure that the inputs agree if we've already seen this input.
   if (Rec != SlotRec)
     I->error("All $" + Pat->getName() + " inputs must agree with each other");
@@ -2056,13 +2209,13 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       I->error("Cannot specify a transform function for a non-input value!");
     return;
   }
-  
+
   if (Pat->getOperator()->getName() == "implicit") {
     for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
       TreePatternNode *Dest = Pat->getChild(i);
       if (!Dest->isLeaf())
         I->error("implicitly defined value should be a register!");
-    
+
       DefInit *Val = dynamic_cast<DefInit*>(Dest->getLeafValue());
       if (!Val || !Val->getDef()->isSubClassOf("Register"))
         I->error("implicitly defined value should be a register!");
@@ -2070,7 +2223,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     }
     return;
   }
-  
+
   if (Pat->getOperator()->getName() != "set") {
     // If this is not a set, verify that the children nodes are not void typed,
     // and recurse.
@@ -2080,30 +2233,30 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       FindPatternInputsAndOutputs(I, Pat->getChild(i), InstInputs, InstResults,
                                   InstImpResults);
     }
-    
+
     // If this is a non-leaf node with no children, treat it basically as if
     // it were a leaf.  This handles nodes like (imm).
     bool isUse = HandleUse(I, Pat, InstInputs);
-    
+
     if (!isUse && Pat->getTransformFn())
       I->error("Cannot specify a transform function for a non-input value!");
     return;
   }
-  
+
   // Otherwise, this is a set, validate and collect instruction results.
   if (Pat->getNumChildren() == 0)
     I->error("set requires operands!");
-  
+
   if (Pat->getTransformFn())
     I->error("Cannot specify a transform function on a set node!");
-  
+
   // Check the set destinations.
   unsigned NumDests = Pat->getNumChildren()-1;
   for (unsigned i = 0; i != NumDests; ++i) {
     TreePatternNode *Dest = Pat->getChild(i);
     if (!Dest->isLeaf())
       I->error("set destination should be a register!");
-    
+
     DefInit *Val = dynamic_cast<DefInit*>(Dest->getLeafValue());
     if (!Val)
       I->error("set destination should be a register!");
@@ -2121,7 +2274,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       I->error("set destination should be a register!");
     }
   }
-    
+
   // Verify and collect info from the computation.
   FindPatternInputsAndOutputs(I, Pat->getChild(NumDests),
                               InstInputs, InstResults, InstImpResults);
@@ -2254,8 +2407,8 @@ static void InferFromPattern(const CodeGenInstruction &Inst,
               "which already inferred this.\n", Inst.TheDef->getName().c_str());
     HasSideEffects = true;
   }
-  
-  if (Inst.isVariadic)
+
+  if (Inst.Operands.isVariadic)
     IsVariadic = true;  // Can warn if we want.
 }
 
@@ -2264,64 +2417,64 @@ static void InferFromPattern(const CodeGenInstruction &Inst,
 /// resolved instructions.
 void CodeGenDAGPatterns::ParseInstructions() {
   std::vector<Record*> Instrs = Records.getAllDerivedDefinitions("Instruction");
-  
+
   for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
     ListInit *LI = 0;
-    
+
     if (dynamic_cast<ListInit*>(Instrs[i]->getValueInit("Pattern")))
       LI = Instrs[i]->getValueAsListInit("Pattern");
-    
+
     // If there is no pattern, only collect minimal information about the
     // instruction for its operand list.  We have to assume that there is one
     // result, as we have no detailed info.
     if (!LI || LI->getSize() == 0) {
       std::vector<Record*> Results;
       std::vector<Record*> Operands;
-      
+
       CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
 
-      if (InstInfo.OperandList.size() != 0) {
-        if (InstInfo.NumDefs == 0) {
+      if (InstInfo.Operands.size() != 0) {
+        if (InstInfo.Operands.NumDefs == 0) {
           // These produce no results
-          for (unsigned j = 0, e = InstInfo.OperandList.size(); j < e; ++j)
-            Operands.push_back(InstInfo.OperandList[j].Rec);
+          for (unsigned j = 0, e = InstInfo.Operands.size(); j < e; ++j)
+            Operands.push_back(InstInfo.Operands[j].Rec);
         } else {
           // Assume the first operand is the result.
-          Results.push_back(InstInfo.OperandList[0].Rec);
-      
+          Results.push_back(InstInfo.Operands[0].Rec);
+
           // The rest are inputs.
-          for (unsigned j = 1, e = InstInfo.OperandList.size(); j < e; ++j)
-            Operands.push_back(InstInfo.OperandList[j].Rec);
+          for (unsigned j = 1, e = InstInfo.Operands.size(); j < e; ++j)
+            Operands.push_back(InstInfo.Operands[j].Rec);
         }
       }
-      
+
       // Create and insert the instruction.
       std::vector<Record*> ImpResults;
-      Instructions.insert(std::make_pair(Instrs[i], 
+      Instructions.insert(std::make_pair(Instrs[i],
                           DAGInstruction(0, Results, Operands, ImpResults)));
       continue;  // no pattern.
     }
-    
+
     // Parse the instruction.
     TreePattern *I = new TreePattern(Instrs[i], LI, true, *this);
     // Inline pattern fragments into it.
     I->InlinePatternFragments();
-    
+
     // Infer as many types as possible.  If we cannot infer all of them, we can
     // never do anything with this instruction pattern: report it to the user.
     if (!I->InferAllTypes())
       I->error("Could not infer all types in pattern!");
-    
-    // InstInputs - Keep track of all of the inputs of the instruction, along 
+
+    // InstInputs - Keep track of all of the inputs of the instruction, along
     // with the record they are declared as.
     std::map<std::string, TreePatternNode*> InstInputs;
-    
+
     // InstResults - Keep track of all the virtual registers that are 'set'
     // in the instruction, including what reg class they are.
     std::map<std::string, TreePatternNode*> InstResults;
 
     std::vector<Record*> InstImpResults;
-    
+
     // Verify that the top-level forms in the instruction are of void type, and
     // fill in the InstResults map.
     for (unsigned j = 0, e = I->getNumTrees(); j != e; ++j) {
@@ -2348,29 +2501,29 @@ void CodeGenDAGPatterns::ParseInstructions() {
     std::vector<Record*> Results;
     TreePatternNode *Res0Node = 0;
     for (unsigned i = 0; i != NumResults; ++i) {
-      if (i == CGI.OperandList.size())
+      if (i == CGI.Operands.size())
         I->error("'" + InstResults.begin()->first +
                  "' set but does not appear in operand list!");
-      const std::string &OpName = CGI.OperandList[i].Name;
-      
+      const std::string &OpName = CGI.Operands[i].Name;
+
       // Check that it exists in InstResults.
       TreePatternNode *RNode = InstResults[OpName];
       if (RNode == 0)
         I->error("Operand $" + OpName + " does not exist in operand list!");
-        
+
       if (i == 0)
         Res0Node = RNode;
       Record *R = dynamic_cast<DefInit*>(RNode->getLeafValue())->getDef();
       if (R == 0)
         I->error("Operand $" + OpName + " should be a set destination: all "
                  "outputs must occur before inputs in operand list!");
-      
-      if (CGI.OperandList[i].Rec != R)
+
+      if (CGI.Operands[i].Rec != R)
         I->error("Operand $" + OpName + " class mismatch!");
-      
+
       // Remember the return type.
-      Results.push_back(CGI.OperandList[i].Rec);
-      
+      Results.push_back(CGI.Operands[i].Rec);
+
       // Okay, this one checks out.
       InstResults.erase(OpName);
     }
@@ -2381,8 +2534,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
     std::vector<TreePatternNode*> ResultNodeOperands;
     std::vector<Record*> Operands;
-    for (unsigned i = NumResults, e = CGI.OperandList.size(); i != e; ++i) {
-      CodeGenInstruction::OperandInfo &Op = CGI.OperandList[i];
+    for (unsigned i = NumResults, e = CGI.Operands.size(); i != e; ++i) {
+      CGIOperandList::OperandInfo &Op = CGI.Operands[i];
       const std::string &OpName = Op.Name;
       if (OpName.empty())
         I->error("Operand #" + utostr(i) + " in operands list has no name!");
@@ -2403,7 +2556,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
       }
       TreePatternNode *InVal = InstInputsCheck[OpName];
       InstInputsCheck.erase(OpName);   // It occurred, remove from map.
-      
+
       if (InVal->isLeaf() &&
           dynamic_cast<DefInit*>(InVal->getLeafValue())) {
         Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
@@ -2412,13 +2565,13 @@ void CodeGenDAGPatterns::ParseInstructions() {
                    " between the operand and pattern");
       }
       Operands.push_back(Op.Rec);
-      
+
       // Construct the result for the dest-pattern operand list.
       TreePatternNode *OpNode = InVal->clone();
-      
+
       // No predicate is useful on the result.
       OpNode->clearPredicateFns();
-      
+
       // Promote the xform function to be an explicit node if set.
       if (Record *Xform = OpNode->getTransformFn()) {
         OpNode->setTransformFn(0);
@@ -2426,10 +2579,10 @@ void CodeGenDAGPatterns::ParseInstructions() {
         Children.push_back(OpNode);
         OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
       }
-      
+
       ResultNodeOperands.push_back(OpNode);
     }
-    
+
     if (!InstInputsCheck.empty())
       I->error("Input operand $" + InstInputsCheck.begin()->first +
                " occurs in pattern but not in operands list!");
@@ -2454,10 +2607,10 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
     DAGInstruction &TheInsertedInst = Instructions.find(I->getRecord())->second;
     TheInsertedInst.setResultPattern(Temp.getOnlyTree());
-    
+
     DEBUG(I->dump());
   }
-   
+
   // If we can, convert the instructions to be patterns that are matched!
   for (std::map<Record*, DAGInstruction, RecordPtrCmp>::iterator II =
         Instructions.begin(),
@@ -2476,10 +2629,11 @@ void CodeGenDAGPatterns::ParseInstructions() {
       // Not a set (store or something?)
       SrcPattern = Pattern;
     }
-    
+
     Record *Instr = II->first;
     AddPatternToMatch(I,
-                      PatternToMatch(Instr->getValueAsListInit("Predicates"),
+                      PatternToMatch(Instr,
+                                     Instr->getValueAsListInit("Predicates"),
                                      SrcPattern,
                                      TheInst.getResultPattern(),
                                      TheInst.getImpResults(),
@@ -2491,7 +2645,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
 typedef std::pair<const TreePatternNode*, unsigned> NameRecord;
 
-static void FindNames(const TreePatternNode *P, 
+static void FindNames(const TreePatternNode *P,
                       std::map<std::string, NameRecord> &Names,
                       const TreePattern *PatternTop) {
   if (!P->getName().empty()) {
@@ -2503,7 +2657,7 @@ static void FindNames(const TreePatternNode *P,
       PatternTop->error("repetition of value: $" + P->getName() +
                         " where different uses have different types!");
   }
-  
+
   if (!P->isLeaf()) {
     for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i)
       FindNames(P->getChild(i), Names, PatternTop);
@@ -2516,7 +2670,7 @@ void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
   std::string Reason;
   if (!PTM.getSrcPattern()->canPatternMatch(Reason, *this))
     Pattern->error("Pattern can never match: " + Reason);
-  
+
   // If the source pattern's root is a complex pattern, that complex pattern
   // must specify the nodes it can potentially match.
   if (const ComplexPattern *CP =
@@ -2524,8 +2678,8 @@ void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
     if (CP->getRootNodes().empty())
       Pattern->error("ComplexPattern at root must specify list of opcodes it"
                      " could match");
-  
-  
+
+
   // Find all of the named values in the input and output, ensure they have the
   // same type.
   std::map<std::string, NameRecord> SrcNames, DstNames;
@@ -2540,14 +2694,14 @@ void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
       Pattern->error("Pattern has input without matching name in output: $" +
                      I->first);
   }
-  
+
   // Scan all of the named values in the source pattern, rejecting them if the
   // name isn't used in the dest, and isn't used to tie two values together.
   for (std::map<std::string, NameRecord>::iterator
        I = SrcNames.begin(), E = SrcNames.end(); I != E; ++I)
     if (DstNames[I->first].first == 0 && SrcNames[I->first].second == 1)
       Pattern->error("Pattern has dead named input: $" + I->first);
-  
+
   PatternsToMatch.push_back(PTM);
 }
 
@@ -2566,7 +2720,7 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
     InstInfo.mayStore = MayStore;
     InstInfo.mayLoad = MayLoad;
     InstInfo.hasSideEffects = HasSideEffects;
-    InstInfo.isVariadic = IsVariadic;
+    InstInfo.Operands.isVariadic = IsVariadic;
   }
 }
 
@@ -2576,7 +2730,7 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
 static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
   if (N->isLeaf())
     return false;
-  
+
   // Analyze children.
   for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
     if (ForceArbitraryInstResultType(N->getChild(i), TP))
@@ -2590,12 +2744,12 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
   for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i) {
     if (N->getExtType(i).isCompletelyUnknown() || N->getExtType(i).isConcrete())
       continue;
-  
+
     // Otherwise, force its type to the first possibility (an arbitrary choice).
     if (N->getExtType(i).MergeInTypeInfo(N->getExtType(i).getTypeList()[0], TP))
       return true;
   }
-  
+
   return false;
 }
 
@@ -2609,20 +2763,20 @@ void CodeGenDAGPatterns::ParsePatterns() {
 
     // Inline pattern fragments into it.
     Pattern->InlinePatternFragments();
-    
+
     ListInit *LI = CurPattern->getValueAsListInit("ResultInstrs");
     if (LI->getSize() == 0) continue;  // no pattern.
-    
+
     // Parse the instruction.
     TreePattern *Result = new TreePattern(CurPattern, LI, false, *this);
-    
+
     // Inline pattern fragments into it.
     Result->InlinePatternFragments();
 
     if (Result->getNumTrees() != 1)
       Result->error("Cannot handle instructions producing instructions "
                     "with temporaries yet!");
-    
+
     bool IterateInference;
     bool InferredAllPatternTypes, InferredAllResultTypes;
     do {
@@ -2630,14 +2784,14 @@ void CodeGenDAGPatterns::ParsePatterns() {
       // can never do anything with this pattern: report it to the user.
       InferredAllPatternTypes =
         Pattern->InferAllTypes(&Pattern->getNamedNodesMap());
-      
+
       // Infer as many types as possible.  If we cannot infer all of them, we
       // can never do anything with this pattern: report it to the user.
       InferredAllResultTypes =
         Result->InferAllTypes(&Pattern->getNamedNodesMap());
 
       IterateInference = false;
-      
+
       // Apply the type of the result to the source pattern.  This helps us
       // resolve cases where the input type is known to be a pointer type (which
       // is considered resolved), but the result knows it needs to be 32- or
@@ -2650,7 +2804,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
         IterateInference |= Result->getTree(0)->
           UpdateNodeType(i, Pattern->getTree(0)->getExtType(i), *Result);
       }
-      
+
       // If our iteration has converged and the input pattern's types are fully
       // resolved but the result pattern is not fully resolved, we may have a
       // situation where we have two instructions in the result pattern and
@@ -2665,7 +2819,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
         IterateInference = ForceArbitraryInstResultType(Result->getTree(0),
                                                         *Result);
     } while (IterateInference);
-    
+
     // Verify that we inferred enough types that we can do something with the
     // pattern and result.  If these fire the user has to add type casts.
     if (!InferredAllPatternTypes)
@@ -2674,7 +2828,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
       Pattern->dump();
       Result->error("Could not infer all types in pattern result!");
     }
-    
+
     // Validate that the input pattern is correct.
     std::map<std::string, TreePatternNode*> InstInputs;
     std::map<std::string, TreePatternNode*> InstResults;
@@ -2702,16 +2856,17 @@ void CodeGenDAGPatterns::ParsePatterns() {
       DstPattern = new TreePatternNode(DstPattern->getOperator(),
                                        ResultNodeOperands,
                                        DstPattern->getNumTypes());
-    
+
     for (unsigned i = 0, e = Result->getOnlyTree()->getNumTypes(); i != e; ++i)
       DstPattern->setType(i, Result->getOnlyTree()->getExtType(i));
-    
+
     TreePattern Temp(Result->getRecord(), DstPattern, false, *this);
     Temp.InferAllTypes();
 
-    
+
     AddPatternToMatch(Pattern,
-                    PatternToMatch(CurPattern->getValueAsListInit("Predicates"),
+                    PatternToMatch(CurPattern,
+                                   CurPattern->getValueAsListInit("Predicates"),
                                    Pattern->getTree(0),
                                    Temp.getOnlyTree(), InstImpResults,
                                    CurPattern->getValueAsInt("AddedComplexity"),
@@ -2721,7 +2876,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
 
 /// CombineChildVariants - Given a bunch of permutations of each child of the
 /// 'operator' node, put them together in all possible ways.
-static void CombineChildVariants(TreePatternNode *Orig, 
+static void CombineChildVariants(TreePatternNode *Orig,
                const std::vector<std::vector<TreePatternNode*> > &ChildVariants,
                                  std::vector<TreePatternNode*> &OutVariants,
                                  CodeGenDAGPatterns &CDP,
@@ -2730,7 +2885,7 @@ static void CombineChildVariants(TreePatternNode *Orig,
   for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
     if (ChildVariants[i].empty())
       return;
-        
+
   // The end result is an all-pairs construction of the resultant pattern.
   std::vector<unsigned> Idxs;
   Idxs.resize(ChildVariants.size());
@@ -2751,21 +2906,21 @@ static void CombineChildVariants(TreePatternNode *Orig,
       NewChildren.push_back(ChildVariants[i][Idxs[i]]);
     TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren,
                                              Orig->getNumTypes());
-    
+
     // Copy over properties.
     R->setName(Orig->getName());
     R->setPredicateFns(Orig->getPredicateFns());
     R->setTransformFn(Orig->getTransformFn());
     for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
       R->setType(i, Orig->getExtType(i));
-    
+
     // If this pattern cannot match, do not include it as a variant.
     std::string ErrString;
     if (!R->canPatternMatch(ErrString, CDP)) {
       delete R;
     } else {
       bool AlreadyExists = false;
-      
+
       // Scan to see if this pattern has already been emitted.  We can get
       // duplication due to things like commuting:
       //   (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
@@ -2775,13 +2930,13 @@ static void CombineChildVariants(TreePatternNode *Orig,
           AlreadyExists = true;
           break;
         }
-      
+
       if (AlreadyExists)
         delete R;
       else
         OutVariants.push_back(R);
     }
-    
+
     // Increment indices to the next permutation by incrementing the
     // indicies from last index backward, e.g., generate the sequence
     // [0, 0], [0, 1], [1, 0], [1, 1].
@@ -2798,7 +2953,7 @@ static void CombineChildVariants(TreePatternNode *Orig,
 
 /// CombineChildVariants - A helper function for binary operators.
 ///
-static void CombineChildVariants(TreePatternNode *Orig, 
+static void CombineChildVariants(TreePatternNode *Orig,
                                  const std::vector<TreePatternNode*> &LHS,
                                  const std::vector<TreePatternNode*> &RHS,
                                  std::vector<TreePatternNode*> &OutVariants,
@@ -2808,14 +2963,14 @@ static void CombineChildVariants(TreePatternNode *Orig,
   ChildVariants.push_back(LHS);
   ChildVariants.push_back(RHS);
   CombineChildVariants(Orig, ChildVariants, OutVariants, CDP, DepVars);
-}  
+}
 
 
 static void GatherChildrenOfAssociativeOpcode(TreePatternNode *N,
                                      std::vector<TreePatternNode *> &Children) {
   assert(N->getNumChildren()==2 &&"Associative but doesn't have 2 children!");
   Record *Operator = N->getOperator();
-  
+
   // Only permit raw nodes.
   if (!N->getName().empty() || !N->getPredicateFns().empty() ||
       N->getTransformFn()) {
@@ -2852,7 +3007,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
 
   // If this node is associative, re-associate.
   if (NodeInfo.hasProperty(SDNPAssociative)) {
-    // Re-associate by pulling together all of the linked operators 
+    // Re-associate by pulling together all of the linked operators
     std::vector<TreePatternNode*> MaximalChildren;
     GatherChildrenOfAssociativeOpcode(N, MaximalChildren);
 
@@ -2864,11 +3019,11 @@ static void GenerateVariantsOf(TreePatternNode *N,
       GenerateVariantsOf(MaximalChildren[0], AVariants, CDP, DepVars);
       GenerateVariantsOf(MaximalChildren[1], BVariants, CDP, DepVars);
       GenerateVariantsOf(MaximalChildren[2], CVariants, CDP, DepVars);
-      
+
       // There are only two ways we can permute the tree:
       //   (A op B) op C    and    A op (B op C)
       // Within these forms, we can also permute A/B/C.
-      
+
       // Generate legal pair permutations of A/B/C.
       std::vector<TreePatternNode*> ABVariants;
       std::vector<TreePatternNode*> BAVariants;
@@ -2901,7 +3056,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
       return;
     }
   }
-  
+
   // Compute permutations of all children.
   std::vector<std::vector<TreePatternNode*> > ChildVariants;
   ChildVariants.resize(N->getNumChildren());
@@ -2953,7 +3108,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
 // match multiple ways.  Add them to PatternsToMatch as well.
 void CodeGenDAGPatterns::GenerateVariants() {
   DEBUG(errs() << "Generating instruction variants.\n");
-  
+
   // Loop over all of the patterns we've collected, checking to see if we can
   // generate variants of the instruction, through the exploitation of
   // identities.  This permits the target to provide aggressive matching without
@@ -2970,7 +3125,8 @@ void CodeGenDAGPatterns::GenerateVariants() {
     DEBUG(errs() << "Dependent/multiply used variables: ");
     DEBUG(DumpDepVars(DepVars));
     DEBUG(errs() << "\n");
-    GenerateVariantsOf(PatternsToMatch[i].getSrcPattern(), Variants, *this, DepVars);
+    GenerateVariantsOf(PatternsToMatch[i].getSrcPattern(), Variants, *this,
+                       DepVars);
 
     assert(!Variants.empty() && "Must create at least original variant!");
     Variants.erase(Variants.begin());  // Remove the original pattern.
@@ -2988,7 +3144,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
       DEBUG(errs() << "  VAR#" << v <<  ": ";
             Variant->dump();
             errs() << "\n");
-      
+
       // Scan to see if an instruction or explicit pattern already matches this.
       bool AlreadyExists = false;
       for (unsigned p = 0, e = PatternsToMatch.size(); p != e; ++p) {
@@ -2997,7 +3153,8 @@ void CodeGenDAGPatterns::GenerateVariants() {
             PatternsToMatch[p].getPredicates())
           continue;
         // Check to see if this variant already exists.
-        if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(), DepVars)) {
+        if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(),
+                                    DepVars)) {
           DEBUG(errs() << "  *** ALREADY EXISTS, ignoring variant.\n");
           AlreadyExists = true;
           break;
@@ -3008,7 +3165,8 @@ void CodeGenDAGPatterns::GenerateVariants() {
 
       // Otherwise, add it to the list of patterns we have.
       PatternsToMatch.
-        push_back(PatternToMatch(PatternsToMatch[i].getPredicates(),
+        push_back(PatternToMatch(PatternsToMatch[i].getSrcRecord(),
+                                 PatternsToMatch[i].getPredicates(),
                                  Variant, PatternsToMatch[i].getDstPattern(),
                                  PatternsToMatch[i].getDstRegs(),
                                  PatternsToMatch[i].getAddedComplexity(),
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 0a1362ab2494..946dceed66c0 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -58,50 +58,50 @@ namespace EEVT {
   public:
     TypeSet() {}
     TypeSet(MVT::SimpleValueType VT, TreePattern &TP);
-    TypeSet(const std::vector<MVT::SimpleValueType> &VTList);    
-    
+    TypeSet(const std::vector<MVT::SimpleValueType> &VTList);
+
     bool isCompletelyUnknown() const { return TypeVec.empty(); }
-    
+
     bool isConcrete() const {
       if (TypeVec.size() != 1) return false;
       unsigned char T = TypeVec[0]; (void)T;
       assert(T < MVT::LAST_VALUETYPE || T == MVT::iPTR || T == MVT::iPTRAny);
       return true;
     }
-    
+
     MVT::SimpleValueType getConcrete() const {
       assert(isConcrete() && "Type isn't concrete yet");
       return (MVT::SimpleValueType)TypeVec[0];
     }
-    
+
     bool isDynamicallyResolved() const {
       return getConcrete() == MVT::iPTR || getConcrete() == MVT::iPTRAny;
     }
-    
+
     const SmallVectorImpl<MVT::SimpleValueType> &getTypeList() const {
       assert(!TypeVec.empty() && "Not a type list!");
       return TypeVec;
     }
-    
+
     bool isVoid() const {
       return TypeVec.size() == 1 && TypeVec[0] == MVT::isVoid;
     }
-    
+
     /// hasIntegerTypes - Return true if this TypeSet contains any integer value
     /// types.
     bool hasIntegerTypes() const;
-    
+
     /// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
     /// a floating point value type.
     bool hasFloatingPointTypes() const;
-    
+
     /// hasVectorTypes - Return true if this TypeSet contains a vector value
     /// type.
     bool hasVectorTypes() const;
-    
+
     /// getName() - Return this TypeSet as a string.
     std::string getName() const;
-    
+
     /// MergeInTypeInfo - This merges in type information from the specified
     /// argument.  If 'this' changes, it returns true.  If the two types are
     /// contradictory (e.g. merge f32 into i32) then this throws an exception.
@@ -126,14 +126,18 @@ namespace EEVT {
     /// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
     /// this an other based on this information.
     bool EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP);
-    
+
     /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
     /// whose element is VT.
     bool EnforceVectorEltTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
-    
+
+    /// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to
+    /// be a vector type VT.
+    bool EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
+
     bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
     bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
-    
+
   private:
     /// FillWithPossibleTypes - Set to all legal types and return true, only
     /// valid on completely unknown type sets.  If Pred is non-null, only MVTs
@@ -151,13 +155,14 @@ typedef std::set<std::string> MultipleUseVarSet;
 /// corresponding to the SDTypeConstraint tablegen class in Target.td.
 struct SDTypeConstraint {
   SDTypeConstraint(Record *R);
-  
+
   unsigned OperandNo;   // The operand # this constraint applies to.
-  enum { 
-    SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisVec, SDTCisSameAs, 
-    SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec
+  enum {
+    SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisVec, SDTCisSameAs,
+    SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec,
+    SDTCisSubVecOfVec
   } ConstraintType;
-  
+
   union {   // The discriminated union.
     struct {
       MVT::SimpleValueType VT;
@@ -174,6 +179,9 @@ struct SDTypeConstraint {
     struct {
       unsigned OtherOperandNum;
     } SDTCisEltOfVec_Info;
+    struct {
+      unsigned OtherOperandNum;
+    } SDTCisSubVecOfVec_Info;
   } x;
 
   /// ApplyTypeConstraint - Given a node in a pattern, apply this type
@@ -197,25 +205,25 @@ class SDNodeInfo {
   std::vector<SDTypeConstraint> TypeConstraints;
 public:
   SDNodeInfo(Record *R);  // Parse the specified record.
-  
+
   unsigned getNumResults() const { return NumResults; }
-  
+
   /// getNumOperands - This is the number of operands required or -1 if
   /// variadic.
   int getNumOperands() const { return NumOperands; }
   Record *getRecord() const { return Def; }
   const std::string &getEnumName() const { return EnumName; }
   const std::string &getSDClassName() const { return SDClassName; }
-  
+
   const std::vector<SDTypeConstraint> &getTypeConstraints() const {
     return TypeConstraints;
   }
-  
+
   /// getKnownType - If the type constraints on this node imply a fixed type
   /// (e.g. all stores return void, etc), then return it as an
   /// MVT::SimpleValueType.  Otherwise, return MVT::Other.
   MVT::SimpleValueType getKnownType(unsigned ResNo) const;
-  
+
   /// hasProperty - Return true if this node has the specified property.
   ///
   bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
@@ -240,31 +248,31 @@ class TreePatternNode {
   /// result may be a set of possible types.  After (successful) type inference,
   /// each is a single concrete type.
   SmallVector<EEVT::TypeSet, 1> Types;
-  
+
   /// Operator - The Record for the operator if this is an interior node (not
   /// a leaf).
   Record *Operator;
-  
+
   /// Val - The init value (e.g. the "GPRC" record, or "7") for a leaf.
   ///
   Init *Val;
-  
+
   /// Name - The name given to this node with the :$foo notation.
   ///
   std::string Name;
-  
+
   /// PredicateFns - The predicate functions to execute on this node to check
   /// for a match.  If this list is empty, no predicate is involved.
   std::vector<std::string> PredicateFns;
-  
+
   /// TransformFn - The transformation function to execute on this node before
   /// it can be substituted into the resulting instruction on a pattern match.
   Record *TransformFn;
-  
+
   std::vector<TreePatternNode*> Children;
 public:
   TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch,
-                  unsigned NumResults) 
+                  unsigned NumResults)
     : Operator(Op), Val(0), TransformFn(0), Children(Ch) {
     Types.resize(NumResults);
   }
@@ -273,12 +281,12 @@ public:
     Types.resize(NumResults);
   }
   ~TreePatternNode();
-  
+
   const std::string &getName() const { return Name; }
   void setName(StringRef N) { Name.assign(N.begin(), N.end()); }
-  
+
   bool isLeaf() const { return Val != 0; }
-  
+
   // Type accessors.
   unsigned getNumTypes() const { return Types.size(); }
   MVT::SimpleValueType getType(unsigned ResNo) const {
@@ -288,7 +296,7 @@ public:
   const EEVT::TypeSet &getExtType(unsigned ResNo) const { return Types[ResNo]; }
   EEVT::TypeSet &getExtType(unsigned ResNo) { return Types[ResNo]; }
   void setType(unsigned ResNo, const EEVT::TypeSet &T) { Types[ResNo] = T; }
-  
+
   bool hasTypeSet(unsigned ResNo) const {
     return Types[ResNo].isConcrete();
   }
@@ -298,16 +306,16 @@ public:
   bool isTypeDynamicallyResolved(unsigned ResNo) const {
     return Types[ResNo].isDynamicallyResolved();
   }
-  
+
   Init *getLeafValue() const { assert(isLeaf()); return Val; }
   Record *getOperator() const { assert(!isLeaf()); return Operator; }
-  
+
   unsigned getNumChildren() const { return Children.size(); }
   TreePatternNode *getChild(unsigned N) const { return Children[N]; }
   void setChild(unsigned i, TreePatternNode *N) {
     Children[i] = N;
   }
-  
+
   /// hasChild - Return true if N is any of our children.
   bool hasChild(const TreePatternNode *N) const {
     for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -321,7 +329,7 @@ public:
     assert(PredicateFns.empty() && "Overwriting non-empty predicate list!");
     PredicateFns = Fns;
   }
-  void addPredicateFn(const std::string &Fn) { 
+  void addPredicateFn(const std::string &Fn) {
     assert(!Fn.empty() && "Empty predicate string!");
     if (std::find(PredicateFns.begin(), PredicateFns.end(), Fn) ==
           PredicateFns.end())
@@ -330,7 +338,7 @@ public:
 
   Record *getTransformFn() const { return TransformFn; }
   void setTransformFn(Record *Fn) { TransformFn = Fn; }
-  
+
   /// getIntrinsicInfo - If this node corresponds to an intrinsic, return the
   /// CodeGenIntrinsic information for it, otherwise return a null pointer.
   const CodeGenIntrinsic *getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const;
@@ -342,18 +350,18 @@ public:
 
   /// NodeHasProperty - Return true if this node has the specified property.
   bool NodeHasProperty(SDNP Property, const CodeGenDAGPatterns &CGP) const;
-  
+
   /// TreeHasProperty - Return true if any node in this tree has the specified
   /// property.
   bool TreeHasProperty(SDNP Property, const CodeGenDAGPatterns &CGP) const;
-  
+
   /// isCommutativeIntrinsic - Return true if the node is an intrinsic which is
   /// marked isCommutative.
   bool isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const;
-  
+
   void print(raw_ostream &OS) const;
   void dump() const;
-  
+
 public:   // Higher level manipulation routines.
 
   /// clone - Return a new copy of this tree.
@@ -362,14 +370,14 @@ public:   // Higher level manipulation routines.
 
   /// RemoveAllTypes - Recursively strip all the types of this tree.
   void RemoveAllTypes();
-  
+
   /// isIsomorphicTo - Return true if this node is recursively isomorphic to
   /// the specified node.  For this comparison, all of the state of the node
   /// is considered, except for the assigned name.  Nodes with differing names
   /// that are otherwise identical are considered isomorphic.
   bool isIsomorphicTo(const TreePatternNode *N,
                       const MultipleUseVarSet &DepVars) const;
-  
+
   /// SubstituteFormalArguments - Replace the formal arguments in this tree
   /// with actual values specified by ArgMap.
   void SubstituteFormalArguments(std::map<std::string,
@@ -379,13 +387,13 @@ public:   // Higher level manipulation routines.
   /// fragments, inline them into place, giving us a pattern without any
   /// PatFrag references.
   TreePatternNode *InlinePatternFragments(TreePattern &TP);
-  
+
   /// ApplyTypeConstraints - Apply all of the type constraints relevant to
   /// this node and its children in the tree.  This returns true if it makes a
   /// change, false otherwise.  If a type contradiction is found, throw an
   /// exception.
   bool ApplyTypeConstraints(TreePattern &TP, bool NotRegisters);
-  
+
   /// UpdateNodeType - Set the node type of N to VT if VT contains
   /// information.  If N already contains a conflicting type, then throw an
   /// exception.  This returns true if any information was updated.
@@ -399,18 +407,18 @@ public:   // Higher level manipulation routines.
                       TreePattern &TP) {
     return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
   }
-  
+
   /// ContainsUnresolvedType - Return true if this tree contains any
   /// unresolved types.
   bool ContainsUnresolvedType() const {
     for (unsigned i = 0, e = Types.size(); i != e; ++i)
       if (!Types[i].isConcrete()) return true;
-    
+
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       if (getChild(i)->ContainsUnresolvedType()) return true;
     return false;
   }
-  
+
   /// canPatternMatch - If it is impossible for this pattern to match on this
   /// target, fill in Reason and return false.  Otherwise, return true.
   bool canPatternMatch(std::string &Reason, const CodeGenDAGPatterns &CDP);
@@ -420,7 +428,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const TreePatternNode &TPN) {
   TPN.print(OS);
   return OS;
 }
-  
+
 
 /// TreePattern - Represent a pattern, used for instructions, pattern
 /// fragments, etc.
@@ -430,19 +438,19 @@ class TreePattern {
   /// Note that PatFrag's only have a single tree.
   ///
   std::vector<TreePatternNode*> Trees;
-  
+
   /// NamedNodes - This is all of the nodes that have names in the trees in this
   /// pattern.
   StringMap<SmallVector<TreePatternNode*,1> > NamedNodes;
-  
+
   /// TheRecord - The actual TableGen record corresponding to this pattern.
   ///
   Record *TheRecord;
-    
+
   /// Args - This is a list of all of the arguments to this pattern (for
   /// PatFrag patterns), which are the 'node' markers in this pattern.
   std::vector<std::string> Args;
-  
+
   /// CDP - the top-level object coordinating this madness.
   ///
   CodeGenDAGPatterns &CDP;
@@ -451,7 +459,7 @@ class TreePattern {
   /// False if this is an output pattern, something to emit.
   bool isInputPattern;
 public:
-    
+
   /// TreePattern constructor - Parse the specified DagInits into the
   /// current record.
   TreePattern(Record *TheRec, ListInit *RawPat, bool isInput,
@@ -460,7 +468,7 @@ public:
               CodeGenDAGPatterns &ise);
   TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
               CodeGenDAGPatterns &ise);
-      
+
   /// getTrees - Return the tree patterns which corresponds to this pattern.
   ///
   const std::vector<TreePatternNode*> &getTrees() const { return Trees; }
@@ -470,25 +478,25 @@ public:
     assert(Trees.size() == 1 && "Doesn't have exactly one pattern!");
     return Trees[0];
   }
-  
+
   const StringMap<SmallVector<TreePatternNode*,1> > &getNamedNodesMap() {
     if (NamedNodes.empty())
       ComputeNamedNodes();
     return NamedNodes;
   }
-      
+
   /// getRecord - Return the actual TableGen record corresponding to this
   /// pattern.
   ///
   Record *getRecord() const { return TheRecord; }
-  
+
   unsigned getNumArgs() const { return Args.size(); }
   const std::string &getArgName(unsigned i) const {
     assert(i < Args.size() && "Argument reference out of range!");
     return Args[i];
   }
   std::vector<std::string> &getArgList() { return Args; }
-  
+
   CodeGenDAGPatterns &getDAGPatterns() const { return CDP; }
 
   /// InlinePatternFragments - If this pattern refers to any pattern
@@ -498,20 +506,20 @@ public:
     for (unsigned i = 0, e = Trees.size(); i != e; ++i)
       Trees[i] = Trees[i]->InlinePatternFragments(*this);
   }
-  
+
   /// InferAllTypes - Infer/propagate as many types throughout the expression
   /// patterns as possible.  Return true if all types are inferred, false
   /// otherwise.  Throw an exception if a type contradiction is found.
   bool InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> >
                           *NamedTypes=0);
-  
+
   /// error - Throw an exception, prefixing it with information about this
   /// pattern.
   void error(const std::string &Msg) const;
-  
+
   void print(raw_ostream &OS) const;
   void dump() const;
-  
+
 private:
   TreePatternNode *ParseTreePattern(Init *DI, StringRef OpName);
   void ComputeNamedNodes();
@@ -535,7 +543,7 @@ public:
                  const std::vector<Record*> &results,
                  const std::vector<Record*> &operands,
                  const std::vector<Record*> &impresults)
-    : Pattern(TP), Results(results), Operands(operands), 
+    : Pattern(TP), Results(results), Operands(operands),
       ImpResults(impresults), ResultPattern(0) {}
 
   const TreePattern *getPattern() const { return Pattern; }
@@ -543,14 +551,14 @@ public:
   unsigned getNumOperands() const { return Operands.size(); }
   unsigned getNumImpResults() const { return ImpResults.size(); }
   const std::vector<Record*>& getImpResults() const { return ImpResults; }
-  
+
   void setResultPattern(TreePatternNode *R) { ResultPattern = R; }
-  
+
   Record *getResult(unsigned RN) const {
     assert(RN < Results.size());
     return Results[RN];
   }
-  
+
   Record *getOperand(unsigned ON) const {
     assert(ON < Operands.size());
     return Operands[ON];
@@ -560,21 +568,22 @@ public:
     assert(RN < ImpResults.size());
     return ImpResults[RN];
   }
-  
+
   TreePatternNode *getResultPattern() const { return ResultPattern; }
 };
-  
+
 /// PatternToMatch - Used by CodeGenDAGPatterns to keep tab of patterns
 /// processed to produce isel.
 class PatternToMatch {
 public:
-  PatternToMatch(ListInit *preds,
+  PatternToMatch(Record *srcrecord, ListInit *preds,
                  TreePatternNode *src, TreePatternNode *dst,
                  const std::vector<Record*> &dstregs,
                  unsigned complexity, unsigned uid)
-    : Predicates(preds), SrcPattern(src), DstPattern(dst),
+    : SrcRecord(srcrecord), Predicates(preds), SrcPattern(src), DstPattern(dst),
       Dstregs(dstregs), AddedComplexity(complexity), ID(uid) {}
 
+  Record          *SrcRecord;   // Originating Record for the pattern.
   ListInit        *Predicates;  // Top level predicate conditions to match.
   TreePatternNode *SrcPattern;  // Source pattern to match.
   TreePatternNode *DstPattern;  // Resulting pattern.
@@ -582,6 +591,7 @@ public:
   unsigned         AddedComplexity; // Add to matching pattern complexity.
   unsigned         ID;          // Unique ID for the record.
 
+  Record          *getSrcRecord()  const { return SrcRecord; }
   ListInit        *getPredicates() const { return Predicates; }
   TreePatternNode *getSrcPattern() const { return SrcPattern; }
   TreePatternNode *getDstPattern() const { return DstPattern; }
@@ -589,7 +599,7 @@ public:
   unsigned         getAddedComplexity() const { return AddedComplexity; }
 
   std::string getPredicateCheck() const;
-  
+
   /// Compute the complexity metric for the input pattern.  This roughly
   /// corresponds to the number of nodes that are covered.
   unsigned getPatternComplexity(const CodeGenDAGPatterns &CGP) const;
@@ -599,60 +609,60 @@ public:
 struct RecordPtrCmp {
   bool operator()(const Record *LHS, const Record *RHS) const;
 };
-  
+
 class CodeGenDAGPatterns {
   RecordKeeper &Records;
   CodeGenTarget Target;
   std::vector<CodeGenIntrinsic> Intrinsics;
   std::vector<CodeGenIntrinsic> TgtIntrinsics;
-  
+
   std::map<Record*, SDNodeInfo, RecordPtrCmp> SDNodes;
   std::map<Record*, std::pair<Record*, std::string>, RecordPtrCmp> SDNodeXForms;
   std::map<Record*, ComplexPattern, RecordPtrCmp> ComplexPatterns;
   std::map<Record*, TreePattern*, RecordPtrCmp> PatternFragments;
   std::map<Record*, DAGDefaultOperand, RecordPtrCmp> DefaultOperands;
   std::map<Record*, DAGInstruction, RecordPtrCmp> Instructions;
-  
+
   // Specific SDNode definitions:
   Record *intrinsic_void_sdnode;
   Record *intrinsic_w_chain_sdnode, *intrinsic_wo_chain_sdnode;
-  
+
   /// PatternsToMatch - All of the things we are matching on the DAG.  The first
   /// value is the pattern to match, the second pattern is the result to
   /// emit.
   std::vector<PatternToMatch> PatternsToMatch;
 public:
-  CodeGenDAGPatterns(RecordKeeper &R); 
+  CodeGenDAGPatterns(RecordKeeper &R);
   ~CodeGenDAGPatterns();
-  
+
   CodeGenTarget &getTargetInfo() { return Target; }
   const CodeGenTarget &getTargetInfo() const { return Target; }
-  
+
   Record *getSDNodeNamed(const std::string &Name) const;
-  
+
   const SDNodeInfo &getSDNodeInfo(Record *R) const {
     assert(SDNodes.count(R) && "Unknown node!");
     return SDNodes.find(R)->second;
   }
-  
+
   // Node transformation lookups.
   typedef std::pair<Record*, std::string> NodeXForm;
   const NodeXForm &getSDNodeTransform(Record *R) const {
     assert(SDNodeXForms.count(R) && "Invalid transform!");
     return SDNodeXForms.find(R)->second;
   }
-  
+
   typedef std::map<Record*, NodeXForm, RecordPtrCmp>::const_iterator
           nx_iterator;
   nx_iterator nx_begin() const { return SDNodeXForms.begin(); }
   nx_iterator nx_end() const { return SDNodeXForms.end(); }
 
-  
+
   const ComplexPattern &getComplexPattern(Record *R) const {
     assert(ComplexPatterns.count(R) && "Unknown addressing mode!");
     return ComplexPatterns.find(R)->second;
   }
-  
+
   const CodeGenIntrinsic &getIntrinsic(Record *R) const {
     for (unsigned i = 0, e = Intrinsics.size(); i != e; ++i)
       if (Intrinsics[i].TheDef == R) return Intrinsics[i];
@@ -661,7 +671,7 @@ public:
     assert(0 && "Unknown intrinsic!");
     abort();
   }
-  
+
   const CodeGenIntrinsic &getIntrinsicInfo(unsigned IID) const {
     if (IID-1 < Intrinsics.size())
       return Intrinsics[IID-1];
@@ -670,7 +680,7 @@ public:
     assert(0 && "Bad intrinsic ID!");
     abort();
   }
-  
+
   unsigned getIntrinsicID(Record *R) const {
     for (unsigned i = 0, e = Intrinsics.size(); i != e; ++i)
       if (Intrinsics[i].TheDef == R) return i;
@@ -679,12 +689,12 @@ public:
     assert(0 && "Unknown intrinsic!");
     abort();
   }
-  
+
   const DAGDefaultOperand &getDefaultOperand(Record *R) const {
     assert(DefaultOperands.count(R) &&"Isn't an analyzed default operand!");
     return DefaultOperands.find(R)->second;
   }
-  
+
   // Pattern Fragment information.
   TreePattern *getPatternFragment(Record *R) const {
     assert(PatternFragments.count(R) && "Invalid pattern fragment request!");
@@ -694,7 +704,7 @@ public:
     if (!PatternFragments.count(R)) return 0;
     return PatternFragments.find(R)->second;
   }
-  
+
   typedef std::map<Record*, TreePattern*, RecordPtrCmp>::const_iterator
           pf_iterator;
   pf_iterator pf_begin() const { return PatternFragments.begin(); }
@@ -704,14 +714,14 @@ public:
   typedef std::vector<PatternToMatch>::const_iterator ptm_iterator;
   ptm_iterator ptm_begin() const { return PatternsToMatch.begin(); }
   ptm_iterator ptm_end() const { return PatternsToMatch.end(); }
-  
-  
-  
+
+
+
   const DAGInstruction &getInstruction(Record *R) const {
     assert(Instructions.count(R) && "Unknown instruction!");
     return Instructions.find(R)->second;
   }
-  
+
   Record *get_intrinsic_void_sdnode() const {
     return intrinsic_void_sdnode;
   }
@@ -721,7 +731,7 @@ public:
   Record *get_intrinsic_wo_chain_sdnode() const {
     return intrinsic_wo_chain_sdnode;
   }
-  
+
   bool hasTargetIntrinsics() { return !TgtIntrinsics.empty(); }
 
 private:
@@ -734,7 +744,7 @@ private:
   void ParsePatterns();
   void InferInstructionFlags();
   void GenerateVariants();
-  
+
   void AddPatternToMatch(const TreePattern *Pattern, const PatternToMatch &PTM);
   void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
                                    std::map<std::string,
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 01a1fe11f531..f37d3eabcd41 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -15,120 +15,19 @@
 #include "CodeGenTarget.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include <set>
 using namespace llvm;
 
-static void ParseConstraint(const std::string &CStr, CodeGenInstruction *I) {
-  // EARLY_CLOBBER: @early $reg
-  std::string::size_type wpos = CStr.find_first_of(" \t");
-  std::string::size_type start = CStr.find_first_not_of(" \t");
-  std::string Tok = CStr.substr(start, wpos - start);
-  if (Tok == "@earlyclobber") {
-    std::string Name = CStr.substr(wpos+1);
-    wpos = Name.find_first_not_of(" \t");
-    if (wpos == std::string::npos)
-      throw "Illegal format for @earlyclobber constraint: '" + CStr + "'";
-    Name = Name.substr(wpos);
-    std::pair<unsigned,unsigned> Op =
-      I->ParseOperandName(Name, false);
-
-    // Build the string for the operand
-    if (!I->OperandList[Op.first].Constraints[Op.second].isNone())
-      throw "Operand '" + Name + "' cannot have multiple constraints!";
-    I->OperandList[Op.first].Constraints[Op.second] =
-      CodeGenInstruction::ConstraintInfo::getEarlyClobber();
-    return;
-  }
-
-  // Only other constraint is "TIED_TO" for now.
-  std::string::size_type pos = CStr.find_first_of('=');
-  assert(pos != std::string::npos && "Unrecognized constraint");
-  start = CStr.find_first_not_of(" \t");
-  std::string Name = CStr.substr(start, pos - start);
-
-  // TIED_TO: $src1 = $dst
-  wpos = Name.find_first_of(" \t");
-  if (wpos == std::string::npos)
-    throw "Illegal format for tied-to constraint: '" + CStr + "'";
-  std::string DestOpName = Name.substr(0, wpos);
-  std::pair<unsigned,unsigned> DestOp = I->ParseOperandName(DestOpName, false);
-
-  Name = CStr.substr(pos+1);
-  wpos = Name.find_first_not_of(" \t");
-  if (wpos == std::string::npos)
-    throw "Illegal format for tied-to constraint: '" + CStr + "'";
-
-  std::pair<unsigned,unsigned> SrcOp =
-  I->ParseOperandName(Name.substr(wpos), false);
-  if (SrcOp > DestOp)
-    throw "Illegal tied-to operand constraint '" + CStr + "'";
-
-
-  unsigned FlatOpNo = I->getFlattenedOperandNumber(SrcOp);
-
-  if (!I->OperandList[DestOp.first].Constraints[DestOp.second].isNone())
-    throw "Operand '" + DestOpName + "' cannot have multiple constraints!";
-  I->OperandList[DestOp.first].Constraints[DestOp.second] =
-    CodeGenInstruction::ConstraintInfo::getTied(FlatOpNo);
-}
-
-static void ParseConstraints(const std::string &CStr, CodeGenInstruction *I) {
-  // Make sure the constraints list for each operand is large enough to hold
-  // constraint info, even if none is present.
-  for (unsigned i = 0, e = I->OperandList.size(); i != e; ++i)
-    I->OperandList[i].Constraints.resize(I->OperandList[i].MINumOperands);
-
-  if (CStr.empty()) return;
-
-  const std::string delims(",");
-  std::string::size_type bidx, eidx;
-
-  bidx = CStr.find_first_not_of(delims);
-  while (bidx != std::string::npos) {
-    eidx = CStr.find_first_of(delims, bidx);
-    if (eidx == std::string::npos)
-      eidx = CStr.length();
-
-    ParseConstraint(CStr.substr(bidx, eidx - bidx), I);
-    bidx = CStr.find_first_not_of(delims, eidx);
-  }
-}
-
-CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
-  : TheDef(R), AsmString(AsmStr) {
-  Namespace = R->getValueAsString("Namespace");
+//===----------------------------------------------------------------------===//
+// CGIOperandList Implementation
+//===----------------------------------------------------------------------===//
 
-  isReturn     = R->getValueAsBit("isReturn");
-  isBranch     = R->getValueAsBit("isBranch");
-  isIndirectBranch = R->getValueAsBit("isIndirectBranch");
-  isCompare    = R->getValueAsBit("isCompare");
-  isBarrier    = R->getValueAsBit("isBarrier");
-  isCall       = R->getValueAsBit("isCall");
-  canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
-  mayLoad      = R->getValueAsBit("mayLoad");
-  mayStore     = R->getValueAsBit("mayStore");
-  isPredicable = R->getValueAsBit("isPredicable");
-  isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
-  isCommutable = R->getValueAsBit("isCommutable");
-  isTerminator = R->getValueAsBit("isTerminator");
-  isReMaterializable = R->getValueAsBit("isReMaterializable");
-  hasDelaySlot = R->getValueAsBit("hasDelaySlot");
-  usesCustomInserter = R->getValueAsBit("usesCustomInserter");
-  hasCtrlDep   = R->getValueAsBit("hasCtrlDep");
-  isNotDuplicable = R->getValueAsBit("isNotDuplicable");
-  hasSideEffects = R->getValueAsBit("hasSideEffects");
-  neverHasSideEffects = R->getValueAsBit("neverHasSideEffects");
-  isAsCheapAsAMove = R->getValueAsBit("isAsCheapAsAMove");
-  hasExtraSrcRegAllocReq = R->getValueAsBit("hasExtraSrcRegAllocReq");
-  hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
+CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
+  isPredicable = false;
   hasOptionalDef = false;
   isVariadic = false;
-  ImplicitDefs = R->getValueAsListOfDefs("Defs");
-  ImplicitUses = R->getValueAsListOfDefs("Uses");
-
-  if (neverHasSideEffects + hasSideEffects > 1)
-    throw R->getName() + ": multiple conflicting side-effect flags set!";
 
   DagInit *OutDI = R->getValueAsDag("OutOperandList");
 
@@ -137,16 +36,16 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
       throw R->getName() + ": invalid def name for output list: use 'outs'";
   } else
     throw R->getName() + ": invalid output list: use 'outs'";
-    
+
   NumDefs = OutDI->getNumArgs();
-    
+
   DagInit *InDI = R->getValueAsDag("InOperandList");
   if (DefInit *Init = dynamic_cast<DefInit*>(InDI->getOperator())) {
     if (Init->getDef()->getName() != "ins")
       throw R->getName() + ": invalid def name for input list: use 'ins'";
   } else
     throw R->getName() + ": invalid input list: use 'ins'";
-    
+
   unsigned MIOperandNo = 0;
   std::set<std::string> OperandNames;
   for (unsigned i = 0, e = InDI->getNumArgs()+OutDI->getNumArgs(); i != e; ++i){
@@ -159,25 +58,28 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
       ArgInit = InDI->getArg(i-NumDefs);
       ArgName = InDI->getArgName(i-NumDefs);
     }
-    
+
     DefInit *Arg = dynamic_cast<DefInit*>(ArgInit);
     if (!Arg)
       throw "Illegal operand for the '" + R->getName() + "' instruction!";
 
     Record *Rec = Arg->getDef();
     std::string PrintMethod = "printOperand";
+    std::string EncoderMethod;
     unsigned NumOps = 1;
     DagInit *MIOpInfo = 0;
     if (Rec->isSubClassOf("Operand")) {
       PrintMethod = Rec->getValueAsString("PrintMethod");
+      // If there is an explicit encoder method, use it.
+      EncoderMethod = Rec->getValueAsString("EncoderMethod");
       MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
 
       // Verify that MIOpInfo has an 'ops' root value.
       if (!dynamic_cast<DefInit*>(MIOpInfo->getOperator()) ||
           dynamic_cast<DefInit*>(MIOpInfo->getOperator())
-               ->getDef()->getName() != "ops")
+          ->getDef()->getName() != "ops")
         throw "Bad value for MIOperandInfo in operand '" + Rec->getName() +
-              "'\n";
+        "'\n";
 
       // If we have MIOpInfo, then we have #operands equal to number of entries
       // in MIOperandInfo.
@@ -192,58 +94,58 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
       isVariadic = true;
       continue;
     } else if (!Rec->isSubClassOf("RegisterClass") &&
-               Rec->getName() != "ptr_rc" && Rec->getName() != "unknown")
+               !Rec->isSubClassOf("PointerLikeRegClass") &&
+               Rec->getName() != "unknown")
       throw "Unknown operand class '" + Rec->getName() +
-            "' in '" + R->getName() + "' instruction!";
+      "' in '" + R->getName() + "' instruction!";
 
     // Check that the operand has a name and that it's unique.
     if (ArgName.empty())
       throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
-        " has no name!";
+      " has no name!";
     if (!OperandNames.insert(ArgName).second)
       throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
-        " has the same name as a previous operand!";
+      " has the same name as a previous operand!";
 
-    OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod,
+    OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod, EncoderMethod,
                                       MIOperandNo, NumOps, MIOpInfo));
     MIOperandNo += NumOps;
   }
 
-  // Parse Constraints.
-  ParseConstraints(R->getValueAsString("Constraints"), this);
-
-  // Parse the DisableEncoding field.
-  std::string DisableEncoding = R->getValueAsString("DisableEncoding");
-  while (1) {
-    std::string OpName;
-    tie(OpName, DisableEncoding) = getToken(DisableEncoding, " ,\t");
-    if (OpName.empty()) break;
-
-    // Figure out which operand this is.
-    std::pair<unsigned,unsigned> Op = ParseOperandName(OpName, false);
 
-    // Mark the operand as not-to-be encoded.
-    if (Op.second >= OperandList[Op.first].DoNotEncode.size())
-      OperandList[Op.first].DoNotEncode.resize(Op.second+1);
-    OperandList[Op.first].DoNotEncode[Op.second] = true;
-  }
+  // Make sure the constraints list for each operand is large enough to hold
+  // constraint info, even if none is present.
+  for (unsigned i = 0, e = OperandList.size(); i != e; ++i)
+    OperandList[i].Constraints.resize(OperandList[i].MINumOperands);
 }
 
+
 /// getOperandNamed - Return the index of the operand with the specified
 /// non-empty name.  If the instruction does not have an operand with the
 /// specified name, throw an exception.
 ///
-unsigned CodeGenInstruction::getOperandNamed(const std::string &Name) const {
+unsigned CGIOperandList::getOperandNamed(StringRef Name) const {
+  unsigned OpIdx;
+  if (hasOperandNamed(Name, OpIdx)) return OpIdx;
+  throw "'" + TheDef->getName() + "' does not have an operand named '$" +
+    Name.str() + "'!";
+}
+
+/// hasOperandNamed - Query whether the instruction has an operand of the
+/// given name. If so, return true and set OpIdx to the index of the
+/// operand. Otherwise, return false.
+bool CGIOperandList::hasOperandNamed(StringRef Name, unsigned &OpIdx) const {
   assert(!Name.empty() && "Cannot search for operand with no name!");
   for (unsigned i = 0, e = OperandList.size(); i != e; ++i)
-    if (OperandList[i].Name == Name) return i;
-  throw "Instruction '" + TheDef->getName() +
-        "' does not have an operand named '$" + Name + "'!";
+    if (OperandList[i].Name == Name) {
+      OpIdx = i;
+      return true;
+    }
+  return false;
 }
 
 std::pair<unsigned,unsigned>
-CodeGenInstruction::ParseOperandName(const std::string &Op,
-                                     bool AllowWholeOp) {
+CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   if (Op.empty() || Op[0] != '$')
     throw TheDef->getName() + ": Illegal operand name: '" + Op + "'";
 
@@ -266,7 +168,7 @@ CodeGenInstruction::ParseOperandName(const std::string &Op,
     if (OperandList[OpIdx].MINumOperands > 1 && !AllowWholeOp &&
         SubOpName.empty())
       throw TheDef->getName() + ": Illegal to refer to"
-            " whole operand part of complex operand '" + Op + "'";
+      " whole operand part of complex operand '" + Op + "'";
 
     // Otherwise, return the operand.
     return std::make_pair(OpIdx, 0U);
@@ -286,6 +188,137 @@ CodeGenInstruction::ParseOperandName(const std::string &Op,
   throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'";
 }
 
+static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
+  // EARLY_CLOBBER: @early $reg
+  std::string::size_type wpos = CStr.find_first_of(" \t");
+  std::string::size_type start = CStr.find_first_not_of(" \t");
+  std::string Tok = CStr.substr(start, wpos - start);
+  if (Tok == "@earlyclobber") {
+    std::string Name = CStr.substr(wpos+1);
+    wpos = Name.find_first_not_of(" \t");
+    if (wpos == std::string::npos)
+      throw "Illegal format for @earlyclobber constraint: '" + CStr + "'";
+    Name = Name.substr(wpos);
+    std::pair<unsigned,unsigned> Op = Ops.ParseOperandName(Name, false);
+
+    // Build the string for the operand
+    if (!Ops[Op.first].Constraints[Op.second].isNone())
+      throw "Operand '" + Name + "' cannot have multiple constraints!";
+    Ops[Op.first].Constraints[Op.second] =
+    CGIOperandList::ConstraintInfo::getEarlyClobber();
+    return;
+  }
+
+  // Only other constraint is "TIED_TO" for now.
+  std::string::size_type pos = CStr.find_first_of('=');
+  assert(pos != std::string::npos && "Unrecognized constraint");
+  start = CStr.find_first_not_of(" \t");
+  std::string Name = CStr.substr(start, pos - start);
+
+  // TIED_TO: $src1 = $dst
+  wpos = Name.find_first_of(" \t");
+  if (wpos == std::string::npos)
+    throw "Illegal format for tied-to constraint: '" + CStr + "'";
+  std::string DestOpName = Name.substr(0, wpos);
+  std::pair<unsigned,unsigned> DestOp = Ops.ParseOperandName(DestOpName, false);
+
+  Name = CStr.substr(pos+1);
+  wpos = Name.find_first_not_of(" \t");
+  if (wpos == std::string::npos)
+    throw "Illegal format for tied-to constraint: '" + CStr + "'";
+
+  std::pair<unsigned,unsigned> SrcOp =
+  Ops.ParseOperandName(Name.substr(wpos), false);
+  if (SrcOp > DestOp)
+    throw "Illegal tied-to operand constraint '" + CStr + "'";
+
+
+  unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp);
+
+  if (!Ops[DestOp.first].Constraints[DestOp.second].isNone())
+    throw "Operand '" + DestOpName + "' cannot have multiple constraints!";
+  Ops[DestOp.first].Constraints[DestOp.second] =
+  CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
+}
+
+static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
+  if (CStr.empty()) return;
+
+  const std::string delims(",");
+  std::string::size_type bidx, eidx;
+
+  bidx = CStr.find_first_not_of(delims);
+  while (bidx != std::string::npos) {
+    eidx = CStr.find_first_of(delims, bidx);
+    if (eidx == std::string::npos)
+      eidx = CStr.length();
+
+    ParseConstraint(CStr.substr(bidx, eidx - bidx), Ops);
+    bidx = CStr.find_first_not_of(delims, eidx);
+  }
+}
+
+void CGIOperandList::ProcessDisableEncoding(std::string DisableEncoding) {
+  while (1) {
+    std::string OpName;
+    tie(OpName, DisableEncoding) = getToken(DisableEncoding, " ,\t");
+    if (OpName.empty()) break;
+
+    // Figure out which operand this is.
+    std::pair<unsigned,unsigned> Op = ParseOperandName(OpName, false);
+
+    // Mark the operand as not-to-be encoded.
+    if (Op.second >= OperandList[Op.first].DoNotEncode.size())
+      OperandList[Op.first].DoNotEncode.resize(Op.second+1);
+    OperandList[Op.first].DoNotEncode[Op.second] = true;
+  }
+
+}
+
+//===----------------------------------------------------------------------===//
+// CodeGenInstruction Implementation
+//===----------------------------------------------------------------------===//
+
+CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
+  Namespace = R->getValueAsString("Namespace");
+  AsmString = R->getValueAsString("AsmString");
+
+  isReturn     = R->getValueAsBit("isReturn");
+  isBranch     = R->getValueAsBit("isBranch");
+  isIndirectBranch = R->getValueAsBit("isIndirectBranch");
+  isCompare    = R->getValueAsBit("isCompare");
+  isMoveImm    = R->getValueAsBit("isMoveImm");
+  isBarrier    = R->getValueAsBit("isBarrier");
+  isCall       = R->getValueAsBit("isCall");
+  canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
+  mayLoad      = R->getValueAsBit("mayLoad");
+  mayStore     = R->getValueAsBit("mayStore");
+  isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable");
+  isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
+  isCommutable = R->getValueAsBit("isCommutable");
+  isTerminator = R->getValueAsBit("isTerminator");
+  isReMaterializable = R->getValueAsBit("isReMaterializable");
+  hasDelaySlot = R->getValueAsBit("hasDelaySlot");
+  usesCustomInserter = R->getValueAsBit("usesCustomInserter");
+  hasCtrlDep   = R->getValueAsBit("hasCtrlDep");
+  isNotDuplicable = R->getValueAsBit("isNotDuplicable");
+  hasSideEffects = R->getValueAsBit("hasSideEffects");
+  neverHasSideEffects = R->getValueAsBit("neverHasSideEffects");
+  isAsCheapAsAMove = R->getValueAsBit("isAsCheapAsAMove");
+  hasExtraSrcRegAllocReq = R->getValueAsBit("hasExtraSrcRegAllocReq");
+  hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
+  ImplicitDefs = R->getValueAsListOfDefs("Defs");
+  ImplicitUses = R->getValueAsListOfDefs("Uses");
+
+  if (neverHasSideEffects + hasSideEffects > 1)
+    throw R->getName() + ": multiple conflicting side-effect flags set!";
+
+  // Parse Constraints.
+  ParseConstraints(R->getValueAsString("Constraints"), Operands);
+
+  // Parse the DisableEncoding field.
+  Operands.ProcessDisableEncoding(R->getValueAsString("DisableEncoding"));
+}
 
 /// HasOneImplicitDefWithKnownVT - If the instruction has at least one
 /// implicit def and it has a known VT, return the VT, otherwise return
@@ -293,14 +326,212 @@ CodeGenInstruction::ParseOperandName(const std::string &Op,
 MVT::SimpleValueType CodeGenInstruction::
 HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const {
   if (ImplicitDefs.empty()) return MVT::Other;
-  
+
   // Check to see if the first implicit def has a resolvable type.
   Record *FirstImplicitDef = ImplicitDefs[0];
   assert(FirstImplicitDef->isSubClassOf("Register"));
-  const std::vector<MVT::SimpleValueType> &RegVTs = 
+  const std::vector<MVT::SimpleValueType> &RegVTs =
     TargetInfo.getRegisterVTs(FirstImplicitDef);
   if (RegVTs.size() == 1)
     return RegVTs[0];
   return MVT::Other;
 }
 
+
+/// FlattenAsmStringVariants - Flatten the specified AsmString to only
+/// include text from the specified variant, returning the new string.
+std::string CodeGenInstruction::
+FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
+  std::string Res = "";
+
+  for (;;) {
+    // Find the start of the next variant string.
+    size_t VariantsStart = 0;
+    for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
+      if (Cur[VariantsStart] == '{' &&
+          (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
+                                  Cur[VariantsStart-1] != '\\')))
+        break;
+
+    // Add the prefix to the result.
+    Res += Cur.slice(0, VariantsStart);
+    if (VariantsStart == Cur.size())
+      break;
+
+    ++VariantsStart; // Skip the '{'.
+
+    // Scan to the end of the variants string.
+    size_t VariantsEnd = VariantsStart;
+    unsigned NestedBraces = 1;
+    for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
+      if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
+        if (--NestedBraces == 0)
+          break;
+      } else if (Cur[VariantsEnd] == '{')
+        ++NestedBraces;
+    }
+
+    // Select the Nth variant (or empty).
+    StringRef Selection = Cur.slice(VariantsStart, VariantsEnd);
+    for (unsigned i = 0; i != Variant; ++i)
+      Selection = Selection.split('|').second;
+    Res += Selection.split('|').first;
+
+    assert(VariantsEnd != Cur.size() &&
+           "Unterminated variants in assembly string!");
+    Cur = Cur.substr(VariantsEnd + 1);
+  }
+
+  return Res;
+}
+
+
+//===----------------------------------------------------------------------===//
+/// CodeGenInstAlias Implementation
+//===----------------------------------------------------------------------===//
+
+/// tryAliasOpMatch - This is a helper function for the CodeGenInstAlias
+/// constructor.  It checks if an argument in an InstAlias pattern matches
+/// the corresponding operand of the instruction.  It returns true on a
+/// successful match, with ResOp set to the result operand to be used.
+bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
+                                       Record *InstOpRec, bool hasSubOps,
+                                       SMLoc Loc, CodeGenTarget &T,
+                                       ResultOperand &ResOp) {
+  Init *Arg = Result->getArg(AliasOpNo);
+  DefInit *ADI = dynamic_cast<DefInit*>(Arg);
+
+  if (ADI && ADI->getDef() == InstOpRec) {
+    // If the operand is a record, it must have a name, and the record type
+    // must match up with the instruction's argument type.
+    if (Result->getArgName(AliasOpNo).empty())
+      throw TGError(Loc, "result argument #" + utostr(AliasOpNo) +
+                    " must have a name!");
+    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef());
+    return true;
+  }
+
+  // Handle explicit registers.
+  if (ADI && ADI->getDef()->isSubClassOf("Register")) {
+    if (!InstOpRec->isSubClassOf("RegisterClass"))
+      return false;
+
+    if (!T.getRegisterClass(InstOpRec).containsRegister(ADI->getDef()))
+      throw TGError(Loc, "fixed register " +ADI->getDef()->getName()
+                    + " is not a member of the " + InstOpRec->getName() +
+                    " register class!");
+
+    if (!Result->getArgName(AliasOpNo).empty())
+      throw TGError(Loc, "result fixed register argument must "
+                    "not have a name!");
+
+    ResOp = ResultOperand(ADI->getDef());
+    return true;
+  }
+
+  // Handle "zero_reg" for optional def operands.
+  if (ADI && ADI->getDef()->getName() == "zero_reg") {
+
+    // Check if this is an optional def.
+    if (!InstOpRec->isSubClassOf("OptionalDefOperand"))
+      throw TGError(Loc, "reg0 used for result that is not an "
+                    "OptionalDefOperand!");
+
+    ResOp = ResultOperand(static_cast<Record*>(0));
+    return true;
+  }
+
+  if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
+    if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
+      return false;
+    // Integer arguments can't have names.
+    if (!Result->getArgName(AliasOpNo).empty())
+      throw TGError(Loc, "result argument #" + utostr(AliasOpNo) +
+                    " must not have a name!");
+    ResOp = ResultOperand(II->getValue());
+    return true;
+  }
+
+  return false;
+}
+
+CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
+  AsmString = R->getValueAsString("AsmString");
+  Result = R->getValueAsDag("ResultInst");
+
+  // Verify that the root of the result is an instruction.
+  DefInit *DI = dynamic_cast<DefInit*>(Result->getOperator());
+  if (DI == 0 || !DI->getDef()->isSubClassOf("Instruction"))
+    throw TGError(R->getLoc(), "result of inst alias should be an instruction");
+
+  ResultInst = &T.getInstruction(DI->getDef());
+
+  // NameClass - If argument names are repeated, we need to verify they have
+  // the same class.
+  StringMap<Record*> NameClass;
+  for (unsigned i = 0, e = Result->getNumArgs(); i != e; ++i) {
+    DefInit *ADI = dynamic_cast<DefInit*>(Result->getArg(i));
+    if (!ADI || Result->getArgName(i).empty())
+      continue;
+    // Verify we don't have something like: (someinst GR16:$foo, GR32:$foo)
+    // $foo can exist multiple times in the result list, but it must have the
+    // same type.
+    Record *&Entry = NameClass[Result->getArgName(i)];
+    if (Entry && Entry != ADI->getDef())
+      throw TGError(R->getLoc(), "result value $" + Result->getArgName(i) +
+                    " is both " + Entry->getName() + " and " +
+                    ADI->getDef()->getName() + "!");
+    Entry = ADI->getDef();
+  }
+
+  // Decode and validate the arguments of the result.
+  unsigned AliasOpNo = 0;
+  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+
+    // Tied registers don't have an entry in the result dag.
+    if (ResultInst->Operands[i].getTiedRegister() != -1)
+      continue;
+
+    if (AliasOpNo >= Result->getNumArgs())
+      throw TGError(R->getLoc(), "not enough arguments for instruction!");
+
+    Record *InstOpRec = ResultInst->Operands[i].Rec;
+    unsigned NumSubOps = ResultInst->Operands[i].MINumOperands;
+    ResultOperand ResOp(static_cast<int64_t>(0));
+    if (tryAliasOpMatch(Result, AliasOpNo, InstOpRec, (NumSubOps > 1),
+                        R->getLoc(), T, ResOp)) {
+      ResultOperands.push_back(ResOp);
+      ResultInstOperandIndex.push_back(std::make_pair(i, -1));
+      ++AliasOpNo;
+      continue;
+    }
+
+    // If the argument did not match the instruction operand, and the operand
+    // is composed of multiple suboperands, try matching the suboperands.
+    if (NumSubOps > 1) {
+      DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
+      for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
+        if (AliasOpNo >= Result->getNumArgs())
+          throw TGError(R->getLoc(), "not enough arguments for instruction!");
+        Record *SubRec = dynamic_cast<DefInit*>(MIOI->getArg(SubOp))->getDef();
+        if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false,
+                            R->getLoc(), T, ResOp)) {
+          ResultOperands.push_back(ResOp);
+          ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
+          ++AliasOpNo;
+        } else {
+          throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
+                        " does not match instruction operand class " +
+                        (SubOp == 0 ? InstOpRec->getName() :SubRec->getName()));
+        }
+      }
+      continue;
+    }
+    throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
+                  " does not match instruction operand class " +
+                  InstOpRec->getName());
+  }
+
+  if (AliasOpNo != Result->getNumArgs())
+    throw TGError(R->getLoc(), "too many operands for instruction!");
+}
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index b02d0d38f975..58913b9da26b 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -15,6 +15,8 @@
 #define CODEGEN_INSTRUCTION_H
 
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SourceMgr.h"
 #include <string>
 #include <vector>
 #include <utility>
@@ -23,22 +25,16 @@ namespace llvm {
   class Record;
   class DagInit;
   class CodeGenTarget;
-
-  class CodeGenInstruction {
+  class StringRef;
+  
+  class CGIOperandList {
   public:
-    Record *TheDef;            // The actual record defining this instruction.
-    std::string Namespace;     // The namespace the instruction is in.
-
-    /// AsmString - The format string used to emit a .s file for the
-    /// instruction.
-    std::string AsmString;
-    
     class ConstraintInfo {
       enum { None, EarlyClobber, Tied } Kind;
       unsigned OtherTiedOperand;
     public:
       ConstraintInfo() : Kind(None) {}
-
+      
       static ConstraintInfo getEarlyClobber() {
         ConstraintInfo I;
         I.Kind = EarlyClobber;
@@ -62,22 +58,26 @@ namespace llvm {
         return OtherTiedOperand;
       }
     };
-    
+
     /// OperandInfo - The information we keep track of for each operand in the
     /// operand list for a tablegen instruction.
     struct OperandInfo {
       /// Rec - The definition this operand is declared as.
       ///
       Record *Rec;
-
+      
       /// Name - If this operand was assigned a symbolic name, this is it,
       /// otherwise, it's empty.
       std::string Name;
-
+      
       /// PrinterMethodName - The method used to print operands of this type in
       /// the asmprinter.
       std::string PrinterMethodName;
-
+      
+      /// EncoderMethodName - The method used to get the machine operand value
+      /// for binary encoding. "getMachineOpValue" by default.
+      std::string EncoderMethodName;
+      
       /// MIOperandNo - Currently (this is meant to be phased out), some logical
       /// operands correspond to multiple MachineInstr operands.  In the X86
       /// target for example, one address operand is represented as 4
@@ -86,7 +86,7 @@ namespace llvm {
       /// does, this contains the MI operand index of this operand.
       unsigned MIOperandNo;
       unsigned MINumOperands;   // The number of operands.
-
+      
       /// DoNotEncode - Bools are set to true in this vector for each operand in
       /// the DisableEncoding list.  These should not be emitted by the code
       /// emitter.
@@ -99,52 +99,61 @@ namespace llvm {
       /// Constraint info for this operand.  This operand can have pieces, so we
       /// track constraint info for each.
       std::vector<ConstraintInfo> Constraints;
-
-      OperandInfo(Record *R, const std::string &N, const std::string &PMN, 
-                  unsigned MION, unsigned MINO, DagInit *MIOI)
-        : Rec(R), Name(N), PrinterMethodName(PMN), MIOperandNo(MION),
-          MINumOperands(MINO), MIOperandInfo(MIOI) {}
+      
+      OperandInfo(Record *R, const std::string &N, const std::string &PMN,
+                  const std::string &EMN, unsigned MION, unsigned MINO,
+                  DagInit *MIOI)
+      : Rec(R), Name(N), PrinterMethodName(PMN), EncoderMethodName(EMN),
+        MIOperandNo(MION), MINumOperands(MINO), MIOperandInfo(MIOI) {}
+      
+      
+      /// getTiedOperand - If this operand is tied to another one, return the
+      /// other operand number.  Otherwise, return -1.
+      int getTiedRegister() const {
+        for (unsigned j = 0, e = Constraints.size(); j != e; ++j) {
+          const CGIOperandList::ConstraintInfo &CI = Constraints[j];
+          if (CI.isTied()) return CI.getTiedOperand();
+        }
+        return -1;
+      }
     };
+    
+    CGIOperandList(Record *D);
+    
+    Record *TheDef;            // The actual record containing this OperandList.
 
     /// NumDefs - Number of def operands declared, this is the number of
     /// elements in the instruction's (outs) list.
     ///
     unsigned NumDefs;
-
+    
     /// OperandList - The list of declared operands, along with their declared
     /// type (which is a record).
     std::vector<OperandInfo> OperandList;
-
-    /// ImplicitDefs/ImplicitUses - These are lists of registers that are
-    /// implicitly defined and used by the instruction.
-    std::vector<Record*> ImplicitDefs, ImplicitUses;
-
-    // Various boolean values we track for the instruction.
-    bool isReturn;
-    bool isBranch;
-    bool isIndirectBranch;
-    bool isCompare;
-    bool isBarrier;
-    bool isCall;
-    bool canFoldAsLoad;
-    bool mayLoad, mayStore;
+    
+    // Information gleaned from the operand list.
     bool isPredicable;
-    bool isConvertibleToThreeAddress;
-    bool isCommutable;
-    bool isTerminator;
-    bool isReMaterializable;
-    bool hasDelaySlot;
-    bool usesCustomInserter;
-    bool isVariadic;
-    bool hasCtrlDep;
-    bool isNotDuplicable;
     bool hasOptionalDef;
-    bool hasSideEffects;
-    bool neverHasSideEffects;
-    bool isAsCheapAsAMove;
-    bool hasExtraSrcRegAllocReq;
-    bool hasExtraDefRegAllocReq;
+    bool isVariadic;
+    
+    // Provide transparent accessors to the operand list.
+    unsigned size() const { return OperandList.size(); }
+    const OperandInfo &operator[](unsigned i) const { return OperandList[i]; }
+    OperandInfo &operator[](unsigned i) { return OperandList[i]; }
+    OperandInfo &back() { return OperandList.back(); }
+    const OperandInfo &back() const { return OperandList.back(); }
+    
+    
+    /// getOperandNamed - Return the index of the operand with the specified
+    /// non-empty name.  If the instruction does not have an operand with the
+    /// specified name, throw an exception.
+    unsigned getOperandNamed(StringRef Name) const;
     
+    /// hasOperandNamed - Query whether the instruction has an operand of the
+    /// given name. If so, return true and set OpIdx to the index of the
+    /// operand. Otherwise, return false.
+    bool hasOperandNamed(StringRef Name, unsigned &OpIdx) const;
+      
     /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar",
     /// where $foo is a whole operand and $foo.bar refers to a suboperand.
     /// This throws an exception if the name is invalid.  If AllowWholeOp is
@@ -178,20 +187,130 @@ namespace llvm {
         return OperandList[Op.first].DoNotEncode[Op.second];
       return false;
     }
+    
+    void ProcessDisableEncoding(std::string Value);
+  };
+  
 
-    CodeGenInstruction(Record *R, const std::string &AsmStr);
+  class CodeGenInstruction {
+  public:
+    Record *TheDef;            // The actual record defining this instruction.
+    std::string Namespace;     // The namespace the instruction is in.
+
+    /// AsmString - The format string used to emit a .s file for the
+    /// instruction.
+    std::string AsmString;
+
+    /// Operands - This is information about the (ins) and (outs) list specified
+    /// to the instruction.
+    CGIOperandList Operands;
+
+    /// ImplicitDefs/ImplicitUses - These are lists of registers that are
+    /// implicitly defined and used by the instruction.
+    std::vector<Record*> ImplicitDefs, ImplicitUses;
+
+    // Various boolean values we track for the instruction.
+    bool isReturn;
+    bool isBranch;
+    bool isIndirectBranch;
+    bool isCompare;
+    bool isMoveImm;
+    bool isBarrier;
+    bool isCall;
+    bool canFoldAsLoad;
+    bool mayLoad, mayStore;
+    bool isPredicable;
+    bool isConvertibleToThreeAddress;
+    bool isCommutable;
+    bool isTerminator;
+    bool isReMaterializable;
+    bool hasDelaySlot;
+    bool usesCustomInserter;
+    bool hasCtrlDep;
+    bool isNotDuplicable;
+    bool hasSideEffects;
+    bool neverHasSideEffects;
+    bool isAsCheapAsAMove;
+    bool hasExtraSrcRegAllocReq;
+    bool hasExtraDefRegAllocReq;
+
+
+    CodeGenInstruction(Record *R);
 
-    /// getOperandNamed - Return the index of the operand with the specified
-    /// non-empty name.  If the instruction does not have an operand with the
-    /// specified name, throw an exception.
-    unsigned getOperandNamed(const std::string &Name) const;
-    
     /// HasOneImplicitDefWithKnownVT - If the instruction has at least one
     /// implicit def and it has a known VT, return the VT, otherwise return
     /// MVT::Other.
-    MVT::SimpleValueType 
+    MVT::SimpleValueType
       HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const;
+    
+    
+    /// FlattenAsmStringVariants - Flatten the specified AsmString to only
+    /// include text from the specified variant, returning the new string.
+    static std::string FlattenAsmStringVariants(StringRef AsmString,
+                                                unsigned Variant);
   };
+  
+  
+  /// CodeGenInstAlias - This represents an InstAlias definition.
+  class CodeGenInstAlias {
+  public:
+    Record *TheDef;            // The actual record defining this InstAlias.
+    
+    /// AsmString - The format string used to emit a .s file for the
+    /// instruction.
+    std::string AsmString;
+    
+    /// Result - The result instruction.
+    DagInit *Result;
+    
+    /// ResultInst - The instruction generated by the alias (decoded from
+    /// Result).
+    CodeGenInstruction *ResultInst;
+    
+    
+    struct ResultOperand {
+    private:
+      StringRef Name;
+      Record *R;
+      
+      int64_t Imm;
+    public:      
+      enum {
+        K_Record,
+        K_Imm,
+        K_Reg
+      } Kind;
+      
+      ResultOperand(StringRef N, Record *r) : Name(N), R(r), Kind(K_Record) {}
+      ResultOperand(int64_t I) : Imm(I), Kind(K_Imm) {}
+      ResultOperand(Record *r) : R(r), Kind(K_Reg) {}
+
+      bool isRecord() const { return Kind == K_Record; }
+      bool isImm() const { return Kind == K_Imm; }
+      bool isReg() const { return Kind == K_Reg; }
+      
+      StringRef getName() const { assert(isRecord()); return Name; }
+      Record *getRecord() const { assert(isRecord()); return R; }
+      int64_t getImm() const { assert(isImm()); return Imm; }
+      Record *getRegister() const { assert(isReg()); return R; }
+    };
+    
+    /// ResultOperands - The decoded operands for the result instruction.
+    std::vector<ResultOperand> ResultOperands;
+
+    /// ResultInstOperandIndex - For each operand, this vector holds a pair of
+    /// indices to identify the corresponding operand in the result
+    /// instruction.  The first index specifies the operand and the second
+    /// index specifies the suboperand.  If there are no suboperands or if all
+    /// of them are matched by the operand, the second value should be -1.
+    std::vector<std::pair<unsigned, int> > ResultInstOperandIndex;
+    
+    CodeGenInstAlias(Record *R, CodeGenTarget &T);
+
+    bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
+                         Record *InstOpRec, bool hasSubOps, SMLoc Loc,
+                         CodeGenTarget &T, ResultOperand &ResOp);
+  };    
 }
 
 #endif
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index ccd3d222bbad..bbd0cefa5804 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -57,6 +57,12 @@ namespace llvm {
       abort();
     }
     
+    bool containsRegister(Record *R) const {
+      for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+        if (Elements[i] == R) return true;
+      return false;
+    }
+    
     // Returns true if RC is a strict subclass.
     // RC is a sub-class of this class if it is a valid replacement for any
     // instruction operand where a register of this classis required. It must 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index cbfe2addbf2b..d0f7d8b44079 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -48,46 +48,47 @@ std::string llvm::getName(MVT::SimpleValueType T) {
 
 std::string llvm::getEnumName(MVT::SimpleValueType T) {
   switch (T) {
-  case MVT::Other: return "MVT::Other";
-  case MVT::i1:    return "MVT::i1";
-  case MVT::i8:    return "MVT::i8";
-  case MVT::i16:   return "MVT::i16";
-  case MVT::i32:   return "MVT::i32";
-  case MVT::i64:   return "MVT::i64";
-  case MVT::i128:  return "MVT::i128";
-  case MVT::iAny:  return "MVT::iAny";
-  case MVT::fAny:  return "MVT::fAny";
-  case MVT::vAny:  return "MVT::vAny";
-  case MVT::f32:   return "MVT::f32";
-  case MVT::f64:   return "MVT::f64";
-  case MVT::f80:   return "MVT::f80";
-  case MVT::f128:  return "MVT::f128";
+  case MVT::Other:    return "MVT::Other";
+  case MVT::i1:       return "MVT::i1";
+  case MVT::i8:       return "MVT::i8";
+  case MVT::i16:      return "MVT::i16";
+  case MVT::i32:      return "MVT::i32";
+  case MVT::i64:      return "MVT::i64";
+  case MVT::i128:     return "MVT::i128";
+  case MVT::iAny:     return "MVT::iAny";
+  case MVT::fAny:     return "MVT::fAny";
+  case MVT::vAny:     return "MVT::vAny";
+  case MVT::f32:      return "MVT::f32";
+  case MVT::f64:      return "MVT::f64";
+  case MVT::f80:      return "MVT::f80";
+  case MVT::f128:     return "MVT::f128";
   case MVT::ppcf128:  return "MVT::ppcf128";
-  case MVT::Flag:  return "MVT::Flag";
-  case MVT::isVoid:return "MVT::isVoid";
-  case MVT::v2i8:  return "MVT::v2i8";
-  case MVT::v4i8:  return "MVT::v4i8";
-  case MVT::v8i8:  return "MVT::v8i8";
-  case MVT::v16i8: return "MVT::v16i8";
-  case MVT::v32i8: return "MVT::v32i8";
-  case MVT::v2i16: return "MVT::v2i16";
-  case MVT::v4i16: return "MVT::v4i16";
-  case MVT::v8i16: return "MVT::v8i16";
-  case MVT::v16i16: return "MVT::v16i16";
-  case MVT::v2i32: return "MVT::v2i32";
-  case MVT::v4i32: return "MVT::v4i32";
-  case MVT::v8i32: return "MVT::v8i32";
-  case MVT::v1i64: return "MVT::v1i64";
-  case MVT::v2i64: return "MVT::v2i64";
-  case MVT::v4i64: return "MVT::v4i64";
-  case MVT::v8i64: return "MVT::v8i64";
-  case MVT::v2f32: return "MVT::v2f32";
-  case MVT::v4f32: return "MVT::v4f32";
-  case MVT::v8f32: return "MVT::v8f32";
-  case MVT::v2f64: return "MVT::v2f64";
-  case MVT::v4f64: return "MVT::v4f64";
+  case MVT::x86mmx:   return "MVT::x86mmx";
+  case MVT::Glue:     return "MVT::Glue";
+  case MVT::isVoid:   return "MVT::isVoid";
+  case MVT::v2i8:     return "MVT::v2i8";
+  case MVT::v4i8:     return "MVT::v4i8";
+  case MVT::v8i8:     return "MVT::v8i8";
+  case MVT::v16i8:    return "MVT::v16i8";
+  case MVT::v32i8:    return "MVT::v32i8";
+  case MVT::v2i16:    return "MVT::v2i16";
+  case MVT::v4i16:    return "MVT::v4i16";
+  case MVT::v8i16:    return "MVT::v8i16";
+  case MVT::v16i16:   return "MVT::v16i16";
+  case MVT::v2i32:    return "MVT::v2i32";
+  case MVT::v4i32:    return "MVT::v4i32";
+  case MVT::v8i32:    return "MVT::v8i32";
+  case MVT::v1i64:    return "MVT::v1i64";
+  case MVT::v2i64:    return "MVT::v2i64";
+  case MVT::v4i64:    return "MVT::v4i64";
+  case MVT::v8i64:    return "MVT::v8i64";
+  case MVT::v2f32:    return "MVT::v2f32";
+  case MVT::v4f32:    return "MVT::v4f32";
+  case MVT::v8f32:    return "MVT::v8f32";
+  case MVT::v2f64:    return "MVT::v2f64";
+  case MVT::v4f64:    return "MVT::v4f64";
   case MVT::Metadata: return "MVT::Metadata";
-  case MVT::iPTR:  return "MVT::iPTR";
+  case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
   default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
   }
@@ -107,7 +108,7 @@ std::string llvm::getQualifiedName(const Record *R) {
 
 /// getTarget - Return the current instance of the Target class.
 ///
-CodeGenTarget::CodeGenTarget() {
+CodeGenTarget::CodeGenTarget(RecordKeeper &records) : Records(records) {
   std::vector<Record*> Targets = Records.getAllDerivedDefinitions("Target");
   if (Targets.size() == 0)
     throw std::string("ERROR: No 'Target' subclasses defined!");
@@ -189,6 +190,19 @@ void CodeGenTarget::ReadRegisterClasses() const {
   RegisterClasses.assign(RegClasses.begin(), RegClasses.end());
 }
 
+/// getRegisterByName - If there is a register with the specific AsmName,
+/// return it.
+const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
+  const std::vector<CodeGenRegister> &Regs = getRegisters();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Regs[i];
+    if (Reg.TheDef->getValueAsString("AsmName") == Name)
+      return &Reg;
+  }
+  
+  return 0;
+}
+
 std::vector<MVT::SimpleValueType> CodeGenTarget::
 getRegisterVTs(Record *R) const {
   std::vector<MVT::SimpleValueType> Result;
@@ -294,18 +308,14 @@ void CodeGenTarget::ReadInstructions() const {
     throw std::string("No 'Instruction' subclasses defined!");
 
   // Parse the instructions defined in the .td file.
-  std::string InstFormatName =
-    getAsmWriter()->getValueAsString("InstFormatName");
-
-  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
-    std::string AsmStr = Insts[i]->getValueAsString(InstFormatName);
-    Instructions[Insts[i]] = new CodeGenInstruction(Insts[i], AsmStr);
-  }
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+    Instructions[Insts[i]] = new CodeGenInstruction(Insts[i]);
 }
 
 static const CodeGenInstruction *
 GetInstByName(const char *Name,
-              const DenseMap<const Record*, CodeGenInstruction*> &Insts) {
+              const DenseMap<const Record*, CodeGenInstruction*> &Insts, 
+              RecordKeeper &Records) {
   const Record *Rec = Records.getDef(Name);
   
   DenseMap<const Record*, CodeGenInstruction*>::const_iterator
@@ -349,7 +359,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
   };
   const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
   for (const char *const *p = FixedInstrs; *p; ++p) {
-    const CodeGenInstruction *Instr = GetInstByName(*p, Insts);
+    const CodeGenInstruction *Instr = GetInstByName(*p, Insts, Records);
     assert(Instr && "Missing target independent instruction");
     assert(Instr->Namespace == "TargetOpcode" && "Bad namespace");
     InstrsByEnum.push_back(Instr);
@@ -394,8 +404,8 @@ ComplexPattern::ComplexPattern(Record *R) {
   for (unsigned i = 0, e = PropList.size(); i != e; ++i)
     if (PropList[i]->getName() == "SDNPHasChain") {
       Properties |= 1 << SDNPHasChain;
-    } else if (PropList[i]->getName() == "SDNPOptInFlag") {
-      Properties |= 1 << SDNPOptInFlag;
+    } else if (PropList[i]->getName() == "SDNPOptInGlue") {
+      Properties |= 1 << SDNPOptInGlue;
     } else if (PropList[i]->getName() == "SDNPMayStore") {
       Properties |= 1 << SDNPMayStore;
     } else if (PropList[i]->getName() == "SDNPMayLoad") {
@@ -406,6 +416,10 @@ ComplexPattern::ComplexPattern(Record *R) {
       Properties |= 1 << SDNPMemOperand;
     } else if (PropList[i]->getName() == "SDNPVariadic") {
       Properties |= 1 << SDNPVariadic;
+    } else if (PropList[i]->getName() == "SDNPWantRoot") {
+      Properties |= 1 << SDNPWantRoot;
+    } else if (PropList[i]->getName() == "SDNPWantParent") {
+      Properties |= 1 << SDNPWantParent;
     } else {
       errs() << "Unsupported SD Node property '" << PropList[i]->getName()
              << "' on ComplexPattern '" << R->getName() << "'!\n";
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 6b06b66c29bc..f1058eb63181 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -35,14 +35,16 @@ enum SDNP {
   SDNPCommutative, 
   SDNPAssociative, 
   SDNPHasChain,
-  SDNPOutFlag,
-  SDNPInFlag,
-  SDNPOptInFlag,
+  SDNPOutGlue,
+  SDNPInGlue,
+  SDNPOptInGlue,
   SDNPMayLoad,
   SDNPMayStore,
   SDNPSideEffect,
   SDNPMemOperand,
-  SDNPVariadic
+  SDNPVariadic,
+  SDNPWantRoot,
+  SDNPWantParent
 };
 
 /// getValueType - Return the MVT::SimpleValueType that the specified TableGen
@@ -59,6 +61,7 @@ std::string getQualifiedName(const Record *R);
 /// CodeGenTarget - This class corresponds to the Target class in the .td files.
 ///
 class CodeGenTarget {
+  RecordKeeper &Records;
   Record *TargetRec;
 
   mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
@@ -74,7 +77,7 @@ class CodeGenTarget {
   
   mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
 public:
-  CodeGenTarget();
+  CodeGenTarget(RecordKeeper &Records);
 
   Record *getTargetRecord() const { return TargetRec; }
   const std::string &getName() const;
@@ -99,6 +102,10 @@ public:
     if (Registers.empty()) ReadRegisters();
     return Registers;
   }
+  
+  /// getRegisterByName - If there is a register with the specific AsmName,
+  /// return it.
+  const CodeGenRegister *getRegisterByName(StringRef Name) const;
 
   const std::vector<Record*> &getSubRegIndices() const {
     if (SubRegIndices.empty()) ReadSubRegIndices();
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index 9f12a686e4cf..2afa2b907bc4 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -35,7 +35,7 @@ void Matcher::printOne(raw_ostream &OS) const {
 Matcher *Matcher::unlinkNode(Matcher *Other) {
   if (this == Other)
     return takeNext();
- 
+
   // Scan until we find the predecessor of Other.
   Matcher *Cur = this;
   for (; Cur && Cur->getNext() != Other; Cur = Cur->getNext())
@@ -67,11 +67,11 @@ bool Matcher::canMoveBeforeNode(const Matcher *Other) const {
   // We can move simple predicates before record nodes.
   if (isSimplePredicateNode())
     return Other->isSimplePredicateOrRecordNode();
-  
+
   // We can move record nodes across simple predicates.
   if (isSimplePredicateOrRecordNode())
     return isSimplePredicateNode();
-  
+
   // We can't move record nodes across each other etc.
   return false;
 }
@@ -107,8 +107,8 @@ void RecordMemRefMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "RecordMemRef\n";
 }
 
-void CaptureFlagInputMatcher::printImpl(raw_ostream &OS, unsigned indent) const{
-  OS.indent(indent) << "CaptureFlagInput\n";
+void CaptureGlueInputMatcher::printImpl(raw_ostream &OS, unsigned indent) const{
+  OS.indent(indent) << "CaptureGlueInput\n";
 }
 
 void MoveChildMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
@@ -246,8 +246,8 @@ void EmitNodeMatcherCommon::printImpl(raw_ostream &OS, unsigned indent) const {
   OS << ")\n";
 }
 
-void MarkFlagResultsMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
-  OS.indent(indent) << "MarkFlagResults <todo: args>\n";
+void MarkGlueResultsMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "MarkGlueResults <todo: args>\n";
 }
 
 void CompleteMatchMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
@@ -296,7 +296,7 @@ unsigned EmitMergeInputChainsMatcher::getHashImpl() const {
 
 bool CheckOpcodeMatcher::isEqualImpl(const Matcher *M) const {
   // Note: pointer equality isn't enough here, we have to check the enum names
-  // to ensure that the nodes are for the same opcode. 
+  // to ensure that the nodes are for the same opcode.
   return cast<CheckOpcodeMatcher>(M)->Opcode.getEnumName() ==
           Opcode.getEnumName();
 }
@@ -306,7 +306,7 @@ bool EmitNodeMatcherCommon::isEqualImpl(const Matcher *m) const {
   const EmitNodeMatcherCommon *M = cast<EmitNodeMatcherCommon>(m);
   return M->OpcodeName == OpcodeName && M->VTs == VTs &&
          M->Operands == Operands && M->HasChain == HasChain &&
-         M->HasInFlag == HasInFlag && M->HasOutFlag == HasOutFlag &&
+         M->HasInGlue == HasInGlue && M->HasOutGlue == HasOutGlue &&
          M->HasMemRefs == HasMemRefs &&
          M->NumFixedArityOperands == NumFixedArityOperands;
 }
@@ -316,12 +316,12 @@ unsigned EmitNodeMatcherCommon::getHashImpl() const {
 }
 
 
-unsigned MarkFlagResultsMatcher::getHashImpl() const {
-  return HashUnsigneds(FlagResultNodes.begin(), FlagResultNodes.end());
+unsigned MarkGlueResultsMatcher::getHashImpl() const {
+  return HashUnsigneds(GlueResultNodes.begin(), GlueResultNodes.end());
 }
 
 unsigned CompleteMatchMatcher::getHashImpl() const {
-  return HashUnsigneds(Results.begin(), Results.end()) ^ 
+  return HashUnsigneds(Results.begin(), Results.end()) ^
           ((unsigned)(intptr_t)&Pattern << 8);
 }
 
@@ -332,15 +332,15 @@ static bool TypesAreContradictory(MVT::SimpleValueType T1,
   // If the two types are the same, then they are the same, so they don't
   // contradict.
   if (T1 == T2) return false;
-  
+
   // If either type is about iPtr, then they don't conflict unless the other
   // one is not a scalar integer type.
   if (T1 == MVT::iPTR)
     return !MVT(T2).isInteger() || MVT(T2).isVector();
-  
+
   if (T2 == MVT::iPTR)
     return !MVT(T1).isInteger() || MVT(T1).isVector();
-  
+
   // Otherwise, they are two different non-iPTR types, they conflict.
   return true;
 }
@@ -349,10 +349,10 @@ bool CheckOpcodeMatcher::isContradictoryImpl(const Matcher *M) const {
   if (const CheckOpcodeMatcher *COM = dyn_cast<CheckOpcodeMatcher>(M)) {
     // One node can't have two different opcodes!
     // Note: pointer equality isn't enough here, we have to check the enum names
-    // to ensure that the nodes are for the same opcode. 
+    // to ensure that the nodes are for the same opcode.
     return COM->getOpcode().getEnumName() != getOpcode().getEnumName();
   }
-  
+
   // If the node has a known type, and if the type we're checking for is
   // different, then we know they contradict.  For example, a check for
   // ISD::STORE will never be true at the same time a check for Type i32 is.
@@ -360,12 +360,12 @@ bool CheckOpcodeMatcher::isContradictoryImpl(const Matcher *M) const {
     // If checking for a result the opcode doesn't have, it can't match.
     if (CT->getResNo() >= getOpcode().getNumResults())
       return true;
-    
+
     MVT::SimpleValueType NodeType = getOpcode().getKnownType(CT->getResNo());
     if (NodeType != MVT::Other)
       return TypesAreContradictory(NodeType, CT->getType());
   }
-  
+
   return false;
 }
 
@@ -381,12 +381,12 @@ bool CheckChildTypeMatcher::isContradictoryImpl(const Matcher *M) const {
     // conflict!
     if (CC->getChildNo() != getChildNo())
       return false;
-    
+
     return TypesAreContradictory(getType(), CC->getType());
   }
   return false;
 }
-  
+
 bool CheckIntegerMatcher::isContradictoryImpl(const Matcher *M) const {
   if (const CheckIntegerMatcher *CIM = dyn_cast<CheckIntegerMatcher>(M))
     return CIM->getValue() != getValue();
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index d9b25d556430..8e6e44647ea1 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -31,7 +31,7 @@ Matcher *OptimizeMatcher(Matcher *Matcher, const CodeGenDAGPatterns &CGP);
 void EmitMatcherTable(const Matcher *Matcher, const CodeGenDAGPatterns &CGP,
                       raw_ostream &OS);
 
-  
+
 /// Matcher - Base class for all the the DAG ISel Matcher representation
 /// nodes.
 class Matcher {
@@ -45,10 +45,10 @@ public:
     RecordNode,           // Record the current node.
     RecordChild,          // Record a child of the current node.
     RecordMemRef,         // Record the memref in the current node.
-    CaptureFlagInput,     // If the current node has an input flag, save it.
+    CaptureGlueInput,     // If the current node has an input glue, save it.
     MoveChild,            // Move current node to specified child.
     MoveParent,           // Move current node to parent.
-    
+
     // Predicate checking.
     CheckSame,            // Fail if not same as prev match.
     CheckPatternPredicate,
@@ -65,7 +65,7 @@ public:
     CheckAndImm,
     CheckOrImm,
     CheckFoldableChainNode,
-    
+
     // Node creation/emisssion.
     EmitInteger,          // Create a TargetConstant
     EmitStringInteger,    // Create a TargetConstant from a string.
@@ -75,7 +75,7 @@ public:
     EmitCopyToReg,        // Emit a copytoreg into a physreg.
     EmitNode,             // Create a DAG node
     EmitNodeXForm,        // Run a SDNodeXForm
-    MarkFlagResults,      // Indicate which interior nodes have flag results.
+    MarkGlueResults,      // Indicate which interior nodes have glue results.
     CompleteMatch,        // Finish a match and update the results.
     MorphNodeTo           // Build a node, finish a match and update results.
   };
@@ -85,7 +85,7 @@ protected:
   Matcher(KindTy K) : Kind(K) {}
 public:
   virtual ~Matcher() {}
-  
+
   KindTy getKind() const { return Kind; }
 
   Matcher *getNext() { return Next.get(); }
@@ -94,25 +94,25 @@ public:
   Matcher *takeNext() { return Next.take(); }
 
   OwningPtr<Matcher> &getNextPtr() { return Next; }
-  
+
   static inline bool classof(const Matcher *) { return true; }
-  
+
   bool isEqual(const Matcher *M) const {
     if (getKind() != M->getKind()) return false;
     return isEqualImpl(M);
   }
-  
+
   unsigned getHash() const {
     // Clear the high bit so we don't conflict with tombstones etc.
     return ((getHashImpl() << 4) ^ getKind()) & (~0U>>1);
   }
-  
+
   /// isSafeToReorderWithPatternPredicate - Return true if it is safe to sink a
   /// PatternPredicate node past this one.
   virtual bool isSafeToReorderWithPatternPredicate() const {
     return false;
   }
-  
+
   /// isSimplePredicateNode - Return true if this is a simple predicate that
   /// operates on the node or its children without potential side effects or a
   /// change of the current node.
@@ -134,28 +134,28 @@ public:
       return true;
     }
   }
-  
+
   /// isSimplePredicateOrRecordNode - Return true if this is a record node or
   /// a simple predicate.
   bool isSimplePredicateOrRecordNode() const {
     return isSimplePredicateNode() ||
            getKind() == RecordNode || getKind() == RecordChild;
   }
-  
+
   /// unlinkNode - Unlink the specified node from this chain.  If Other == this,
   /// we unlink the next pointer and return it.  Otherwise we unlink Other from
   /// the list and return this.
   Matcher *unlinkNode(Matcher *Other);
-  
+
   /// canMoveBefore - Return true if this matcher is the same as Other, or if
   /// we can move this matcher past all of the nodes in-between Other and this
   /// node.  Other must be equal to or before this.
   bool canMoveBefore(const Matcher *Other) const;
-  
+
   /// canMoveBefore - Return true if it is safe to move the current matcher
   /// across the specified one.
   bool canMoveBeforeNode(const Matcher *Other) const;
-  
+
   /// isContradictory - Return true of these two matchers could never match on
   /// the same node.
   bool isContradictory(const Matcher *Other) const {
@@ -167,7 +167,7 @@ public:
       return isContradictoryImpl(Other);
     return Other->isContradictoryImpl(this);
   }
-  
+
   void print(raw_ostream &OS, unsigned indent = 0) const;
   void printOne(raw_ostream &OS) const;
   void dump() const;
@@ -177,7 +177,7 @@ protected:
   virtual unsigned getHashImpl() const = 0;
   virtual bool isContradictoryImpl(const Matcher *M) const { return false; }
 };
-  
+
 /// ScopeMatcher - This attempts to match each of its children to find the first
 /// one that successfully matches.  If one child fails, it tries the next child.
 /// If none of the children match then this check fails.  It never has a 'next'.
@@ -188,12 +188,12 @@ public:
     : Matcher(Scope), Children(children, children+numchildren) {
   }
   virtual ~ScopeMatcher();
-  
+
   unsigned getNumChildren() const { return Children.size(); }
-  
+
   Matcher *getChild(unsigned i) { return Children[i]; }
   const Matcher *getChild(unsigned i) const { return Children[i]; }
-  
+
   void resetChild(unsigned i, Matcher *N) {
     delete Children[i];
     Children[i] = N;
@@ -204,7 +204,7 @@ public:
     Children[i] = 0;
     return Res;
   }
-  
+
   void setNumChildren(unsigned NC) {
     if (NC < Children.size()) {
       // delete any children we're about to lose pointers to.
@@ -217,7 +217,7 @@ public:
   static inline bool classof(const Matcher *N) {
     return N->getKind() == Scope;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const { return false; }
@@ -229,38 +229,38 @@ class RecordMatcher : public Matcher {
   /// WhatFor - This is a string indicating why we're recording this.  This
   /// should only be used for comment generation not anything semantic.
   std::string WhatFor;
-  
+
   /// ResultNo - The slot number in the RecordedNodes vector that this will be,
   /// just printed as a comment.
   unsigned ResultNo;
 public:
   RecordMatcher(const std::string &whatfor, unsigned resultNo)
     : Matcher(RecordNode), WhatFor(whatfor), ResultNo(resultNo) {}
-  
+
   const std::string &getWhatFor() const { return WhatFor; }
   unsigned getResultNo() const { return ResultNo; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == RecordNode;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const { return true; }
   virtual unsigned getHashImpl() const { return 0; }
 };
-  
+
 /// RecordChildMatcher - Save a numbered child of the current node, or fail
 /// the match if it doesn't exist.  This is logically equivalent to:
 ///    MoveChild N + RecordNode + MoveParent.
 class RecordChildMatcher : public Matcher {
   unsigned ChildNo;
-  
+
   /// WhatFor - This is a string indicating why we're recording this.  This
   /// should only be used for comment generation not anything semantic.
   std::string WhatFor;
-  
+
   /// ResultNo - The slot number in the RecordedNodes vector that this will be,
   /// just printed as a comment.
   unsigned ResultNo;
@@ -269,7 +269,7 @@ public:
                      unsigned resultNo)
   : Matcher(RecordChild), ChildNo(childno), WhatFor(whatfor),
     ResultNo(resultNo) {}
-  
+
   unsigned getChildNo() const { return ChildNo; }
   const std::string &getWhatFor() const { return WhatFor; }
   unsigned getResultNo() const { return ResultNo; }
@@ -277,7 +277,7 @@ public:
   static inline bool classof(const Matcher *N) {
     return N->getKind() == RecordChild;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -287,16 +287,16 @@ private:
   }
   virtual unsigned getHashImpl() const { return getChildNo(); }
 };
-  
+
 /// RecordMemRefMatcher - Save the current node's memref.
 class RecordMemRefMatcher : public Matcher {
 public:
   RecordMemRefMatcher() : Matcher(RecordMemRef) {}
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == RecordMemRef;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -305,17 +305,17 @@ private:
   virtual unsigned getHashImpl() const { return 0; }
 };
 
-  
-/// CaptureFlagInputMatcher - If the current record has a flag input, record
+
+/// CaptureGlueInputMatcher - If the current record has a glue input, record
 /// it so that it is used as an input to the generated code.
-class CaptureFlagInputMatcher : public Matcher {
+class CaptureGlueInputMatcher : public Matcher {
 public:
-  CaptureFlagInputMatcher() : Matcher(CaptureFlagInput) {}
-  
+  CaptureGlueInputMatcher() : Matcher(CaptureGlueInput) {}
+
   static inline bool classof(const Matcher *N) {
-    return N->getKind() == CaptureFlagInput;
+    return N->getKind() == CaptureGlueInput;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -323,20 +323,20 @@ private:
   virtual bool isEqualImpl(const Matcher *M) const { return true; }
   virtual unsigned getHashImpl() const { return 0; }
 };
-  
+
 /// MoveChildMatcher - This tells the interpreter to move into the
 /// specified child node.
 class MoveChildMatcher : public Matcher {
   unsigned ChildNo;
 public:
   MoveChildMatcher(unsigned childNo) : Matcher(MoveChild), ChildNo(childNo) {}
-  
+
   unsigned getChildNo() const { return ChildNo; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == MoveChild;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -346,17 +346,17 @@ private:
   }
   virtual unsigned getHashImpl() const { return getChildNo(); }
 };
-  
+
 /// MoveParentMatcher - This tells the interpreter to move to the parent
 /// of the current node.
 class MoveParentMatcher : public Matcher {
 public:
   MoveParentMatcher() : Matcher(MoveParent) {}
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == MoveParent;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -373,13 +373,13 @@ class CheckSameMatcher : public Matcher {
 public:
   CheckSameMatcher(unsigned matchnumber)
     : Matcher(CheckSame), MatchNumber(matchnumber) {}
-  
+
   unsigned getMatchNumber() const { return MatchNumber; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckSame;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -389,7 +389,7 @@ private:
   }
   virtual unsigned getHashImpl() const { return getMatchNumber(); }
 };
-  
+
 /// CheckPatternPredicateMatcher - This checks the target-specific predicate
 /// to see if the entire pattern is capable of matching.  This predicate does
 /// not take a node as input.  This is used for subtarget feature checks etc.
@@ -398,13 +398,13 @@ class CheckPatternPredicateMatcher : public Matcher {
 public:
   CheckPatternPredicateMatcher(StringRef predicate)
     : Matcher(CheckPatternPredicate), Predicate(predicate) {}
-  
+
   StringRef getPredicate() const { return Predicate; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckPatternPredicate;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -414,7 +414,7 @@ private:
   }
   virtual unsigned getHashImpl() const;
 };
-  
+
 /// CheckPredicateMatcher - This checks the target-specific predicate to
 /// see if the node is acceptable.
 class CheckPredicateMatcher : public Matcher {
@@ -422,13 +422,13 @@ class CheckPredicateMatcher : public Matcher {
 public:
   CheckPredicateMatcher(StringRef predname)
     : Matcher(CheckPredicate), PredName(predname) {}
-  
+
   StringRef getPredicateName() const { return PredName; }
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckPredicate;
   }
-  
+
   // TODO: Ok?
   //virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
@@ -439,8 +439,8 @@ private:
   }
   virtual unsigned getHashImpl() const;
 };
-  
-  
+
+
 /// CheckOpcodeMatcher - This checks to see if the current node has the
 /// specified opcode, if not it fails to match.
 class CheckOpcodeMatcher : public Matcher {
@@ -448,13 +448,13 @@ class CheckOpcodeMatcher : public Matcher {
 public:
   CheckOpcodeMatcher(const SDNodeInfo &opcode)
     : Matcher(CheckOpcode), Opcode(opcode) {}
-  
+
   const SDNodeInfo &getOpcode() const { return Opcode; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckOpcode;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -478,19 +478,19 @@ public:
   static inline bool classof(const Matcher *N) {
     return N->getKind() == SwitchOpcode;
   }
-  
+
   unsigned getNumCases() const { return Cases.size(); }
-  
+
   const SDNodeInfo &getCaseOpcode(unsigned i) const { return *Cases[i].first; }
   Matcher *getCaseMatcher(unsigned i) { return Cases[i].second; }
   const Matcher *getCaseMatcher(unsigned i) const { return Cases[i].second; }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const { return false; }
   virtual unsigned getHashImpl() const { return 4123; }
 };
-  
+
 /// CheckTypeMatcher - This checks to see if the current node has the
 /// specified type at the specified result, if not it fails to match.
 class CheckTypeMatcher : public Matcher {
@@ -499,14 +499,14 @@ class CheckTypeMatcher : public Matcher {
 public:
   CheckTypeMatcher(MVT::SimpleValueType type, unsigned resno)
     : Matcher(CheckType), Type(type), ResNo(resno) {}
-  
+
   MVT::SimpleValueType getType() const { return Type; }
   unsigned getResNo() const { return ResNo; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckType;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -517,7 +517,7 @@ private:
   virtual unsigned getHashImpl() const { return Type; }
   virtual bool isContradictoryImpl(const Matcher *M) const;
 };
-  
+
 /// SwitchTypeMatcher - Switch based on the current node's type, dispatching
 /// to one matcher per case.  If the type doesn't match any of the cases,
 /// then the match fails.  This is semantically equivalent to a Scope node where
@@ -528,24 +528,24 @@ public:
   SwitchTypeMatcher(const std::pair<MVT::SimpleValueType, Matcher*> *cases,
                     unsigned numcases)
   : Matcher(SwitchType), Cases(cases, cases+numcases) {}
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == SwitchType;
   }
-  
+
   unsigned getNumCases() const { return Cases.size(); }
-  
+
   MVT::SimpleValueType getCaseType(unsigned i) const { return Cases[i].first; }
   Matcher *getCaseMatcher(unsigned i) { return Cases[i].second; }
   const Matcher *getCaseMatcher(unsigned i) const { return Cases[i].second; }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const { return false; }
   virtual unsigned getHashImpl() const { return 4123; }
 };
-  
-  
+
+
 /// CheckChildTypeMatcher - This checks to see if a child node has the
 /// specified type, if not it fails to match.
 class CheckChildTypeMatcher : public Matcher {
@@ -554,14 +554,14 @@ class CheckChildTypeMatcher : public Matcher {
 public:
   CheckChildTypeMatcher(unsigned childno, MVT::SimpleValueType type)
     : Matcher(CheckChildType), ChildNo(childno), Type(type) {}
-  
+
   unsigned getChildNo() const { return ChildNo; }
   MVT::SimpleValueType getType() const { return Type; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckChildType;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -573,7 +573,7 @@ private:
   virtual unsigned getHashImpl() const { return (Type << 3) | ChildNo; }
   virtual bool isContradictoryImpl(const Matcher *M) const;
 };
-  
+
 
 /// CheckIntegerMatcher - This checks to see if the current node is a
 /// ConstantSDNode with the specified integer value, if not it fails to match.
@@ -582,13 +582,13 @@ class CheckIntegerMatcher : public Matcher {
 public:
   CheckIntegerMatcher(int64_t value)
     : Matcher(CheckInteger), Value(value) {}
-  
+
   int64_t getValue() const { return Value; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckInteger;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -599,7 +599,7 @@ private:
   virtual unsigned getHashImpl() const { return Value; }
   virtual bool isContradictoryImpl(const Matcher *M) const;
 };
-  
+
 /// CheckCondCodeMatcher - This checks to see if the current node is a
 /// CondCodeSDNode with the specified condition, if not it fails to match.
 class CheckCondCodeMatcher : public Matcher {
@@ -607,13 +607,13 @@ class CheckCondCodeMatcher : public Matcher {
 public:
   CheckCondCodeMatcher(StringRef condcodename)
     : Matcher(CheckCondCode), CondCodeName(condcodename) {}
-  
+
   StringRef getCondCodeName() const { return CondCodeName; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckCondCode;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -623,7 +623,7 @@ private:
   }
   virtual unsigned getHashImpl() const;
 };
-  
+
 /// CheckValueTypeMatcher - This checks to see if the current node is a
 /// VTSDNode with the specified type, if not it fails to match.
 class CheckValueTypeMatcher : public Matcher {
@@ -631,13 +631,13 @@ class CheckValueTypeMatcher : public Matcher {
 public:
   CheckValueTypeMatcher(StringRef type_name)
     : Matcher(CheckValueType), TypeName(type_name) {}
-  
+
   StringRef getTypeName() const { return TypeName; }
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckValueType;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -648,21 +648,21 @@ private:
   virtual unsigned getHashImpl() const;
   bool isContradictoryImpl(const Matcher *M) const;
 };
-  
-  
-  
+
+
+
 /// CheckComplexPatMatcher - This node runs the specified ComplexPattern on
 /// the current node.
 class CheckComplexPatMatcher : public Matcher {
   const ComplexPattern &Pattern;
-  
-  /// MatchNumber - This is the recorded nodes slot that contains the node we want to
-  /// match against.
+
+  /// MatchNumber - This is the recorded nodes slot that contains the node we
+  /// want to match against.
   unsigned MatchNumber;
-  
+
   /// Name - The name of the node we're matching, for comment emission.
   std::string Name;
-  
+
   /// FirstResult - This is the first slot in the RecordedNodes list that the
   /// result of the match populates.
   unsigned FirstResult;
@@ -671,17 +671,17 @@ public:
                          const std::string &name, unsigned firstresult)
     : Matcher(CheckComplexPat), Pattern(pattern), MatchNumber(matchnumber),
       Name(name), FirstResult(firstresult) {}
-  
+
   const ComplexPattern &getPattern() const { return Pattern; }
   unsigned getMatchNumber() const { return MatchNumber; }
-  
+
   const std::string getName() const { return Name; }
   unsigned getFirstResult() const { return FirstResult; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckComplexPat;
   }
-  
+
   // Not safe to move a pattern predicate past a complex pattern.
   virtual bool isSafeToReorderWithPatternPredicate() const { return false; }
 
@@ -695,7 +695,7 @@ private:
     return (unsigned)(intptr_t)&Pattern ^ MatchNumber;
   }
 };
-  
+
 /// CheckAndImmMatcher - This checks to see if the current node is an 'and'
 /// with something equivalent to the specified immediate.
 class CheckAndImmMatcher : public Matcher {
@@ -703,13 +703,13 @@ class CheckAndImmMatcher : public Matcher {
 public:
   CheckAndImmMatcher(int64_t value)
     : Matcher(CheckAndImm), Value(value) {}
-  
+
   int64_t getValue() const { return Value; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckAndImm;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -727,13 +727,13 @@ class CheckOrImmMatcher : public Matcher {
 public:
   CheckOrImmMatcher(int64_t value)
     : Matcher(CheckOrImm), Value(value) {}
-  
+
   int64_t getValue() const { return Value; }
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckOrImm;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -750,11 +750,11 @@ class CheckFoldableChainNodeMatcher : public Matcher {
 public:
   CheckFoldableChainNodeMatcher()
     : Matcher(CheckFoldableChainNode) {}
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CheckFoldableChainNode;
   }
-  
+
   virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
 
 private:
@@ -770,14 +770,14 @@ class EmitIntegerMatcher : public Matcher {
 public:
   EmitIntegerMatcher(int64_t val, MVT::SimpleValueType vt)
     : Matcher(EmitInteger), Val(val), VT(vt) {}
-  
+
   int64_t getValue() const { return Val; }
   MVT::SimpleValueType getVT() const { return VT; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitInteger;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
@@ -795,14 +795,14 @@ class EmitStringIntegerMatcher : public Matcher {
 public:
   EmitStringIntegerMatcher(const std::string &val, MVT::SimpleValueType vt)
     : Matcher(EmitStringInteger), Val(val), VT(vt) {}
-  
+
   const std::string &getValue() const { return Val; }
   MVT::SimpleValueType getVT() const { return VT; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitStringInteger;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
@@ -811,7 +811,7 @@ private:
   }
   virtual unsigned getHashImpl() const;
 };
-  
+
 /// EmitRegisterMatcher - This creates a new TargetConstant.
 class EmitRegisterMatcher : public Matcher {
   /// Reg - The def for the register that we're emitting.  If this is null, then
@@ -821,14 +821,14 @@ class EmitRegisterMatcher : public Matcher {
 public:
   EmitRegisterMatcher(Record *reg, MVT::SimpleValueType vt)
     : Matcher(EmitRegister), Reg(reg), VT(vt) {}
-  
+
   Record *getReg() const { return Reg; }
   MVT::SimpleValueType getVT() const { return VT; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitRegister;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
@@ -848,13 +848,13 @@ class EmitConvertToTargetMatcher : public Matcher {
 public:
   EmitConvertToTargetMatcher(unsigned slot)
     : Matcher(EmitConvertToTarget), Slot(slot) {}
-  
+
   unsigned getSlot() const { return Slot; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitConvertToTarget;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
@@ -862,7 +862,7 @@ private:
   }
   virtual unsigned getHashImpl() const { return Slot; }
 };
-  
+
 /// EmitMergeInputChainsMatcher - Emit a node that merges a list of input
 /// chains together with a token factor.  The list of nodes are the nodes in the
 /// matched pattern that have chain input/outputs.  This node adds all input
@@ -872,18 +872,18 @@ class EmitMergeInputChainsMatcher : public Matcher {
 public:
   EmitMergeInputChainsMatcher(const unsigned *nodes, unsigned NumNodes)
     : Matcher(EmitMergeInputChains), ChainNodes(nodes, nodes+NumNodes) {}
-  
+
   unsigned getNumNodes() const { return ChainNodes.size(); }
-  
+
   unsigned getNode(unsigned i) const {
     assert(i < ChainNodes.size());
     return ChainNodes[i];
-  }  
-  
+  }
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitMergeInputChains;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
@@ -891,9 +891,9 @@ private:
   }
   virtual unsigned getHashImpl() const;
 };
-  
+
 /// EmitCopyToRegMatcher - Emit a CopyToReg node from a value to a physreg,
-/// pushing the chain and flag results.
+/// pushing the chain and glue results.
 ///
 class EmitCopyToRegMatcher : public Matcher {
   unsigned SrcSlot; // Value to copy into the physreg.
@@ -901,27 +901,27 @@ class EmitCopyToRegMatcher : public Matcher {
 public:
   EmitCopyToRegMatcher(unsigned srcSlot, Record *destPhysReg)
     : Matcher(EmitCopyToReg), SrcSlot(srcSlot), DestPhysReg(destPhysReg) {}
-  
+
   unsigned getSrcSlot() const { return SrcSlot; }
   Record *getDestPhysReg() const { return DestPhysReg; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitCopyToReg;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
     return cast<EmitCopyToRegMatcher>(M)->SrcSlot == SrcSlot &&
-           cast<EmitCopyToRegMatcher>(M)->DestPhysReg == DestPhysReg; 
+           cast<EmitCopyToRegMatcher>(M)->DestPhysReg == DestPhysReg;
   }
   virtual unsigned getHashImpl() const {
     return SrcSlot ^ ((unsigned)(intptr_t)DestPhysReg << 4);
   }
 };
-  
-    
-  
+
+
+
 /// EmitNodeXFormMatcher - Emit an operation that runs an SDNodeXForm on a
 /// recorded node and records the result.
 class EmitNodeXFormMatcher : public Matcher {
@@ -930,33 +930,33 @@ class EmitNodeXFormMatcher : public Matcher {
 public:
   EmitNodeXFormMatcher(unsigned slot, Record *nodeXForm)
     : Matcher(EmitNodeXForm), Slot(slot), NodeXForm(nodeXForm) {}
-  
+
   unsigned getSlot() const { return Slot; }
   Record *getNodeXForm() const { return NodeXForm; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitNodeXForm;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
     return cast<EmitNodeXFormMatcher>(M)->Slot == Slot &&
-           cast<EmitNodeXFormMatcher>(M)->NodeXForm == NodeXForm; 
+           cast<EmitNodeXFormMatcher>(M)->NodeXForm == NodeXForm;
   }
   virtual unsigned getHashImpl() const {
     return Slot ^ ((unsigned)(intptr_t)NodeXForm << 4);
   }
 };
-  
+
 /// EmitNodeMatcherCommon - Common class shared between EmitNode and
 /// MorphNodeTo.
 class EmitNodeMatcherCommon : public Matcher {
   std::string OpcodeName;
   const SmallVector<MVT::SimpleValueType, 3> VTs;
   const SmallVector<unsigned, 6> Operands;
-  bool HasChain, HasInFlag, HasOutFlag, HasMemRefs;
-  
+  bool HasChain, HasInGlue, HasOutGlue, HasMemRefs;
+
   /// NumFixedArityOperands - If this is a fixed arity node, this is set to -1.
   /// If this is a varidic node, this is set to the number of fixed arity
   /// operands in the root of the pattern.  The rest are appended to this node.
@@ -965,16 +965,16 @@ public:
   EmitNodeMatcherCommon(const std::string &opcodeName,
                         const MVT::SimpleValueType *vts, unsigned numvts,
                         const unsigned *operands, unsigned numops,
-                        bool hasChain, bool hasInFlag, bool hasOutFlag,
+                        bool hasChain, bool hasInGlue, bool hasOutGlue,
                         bool hasmemrefs,
                         int numfixedarityoperands, bool isMorphNodeTo)
     : Matcher(isMorphNodeTo ? MorphNodeTo : EmitNode), OpcodeName(opcodeName),
       VTs(vts, vts+numvts), Operands(operands, operands+numops),
-      HasChain(hasChain), HasInFlag(hasInFlag), HasOutFlag(hasOutFlag),
+      HasChain(hasChain), HasInGlue(hasInGlue), HasOutGlue(hasOutGlue),
       HasMemRefs(hasmemrefs), NumFixedArityOperands(numfixedarityoperands) {}
-  
+
   const std::string &getOpcodeName() const { return OpcodeName; }
-  
+
   unsigned getNumVTs() const { return VTs.size(); }
   MVT::SimpleValueType getVT(unsigned i) const {
     assert(i < VTs.size());
@@ -986,27 +986,27 @@ public:
     assert(i < Operands.size());
     return Operands[i];
   }
-  
+
   const SmallVectorImpl<MVT::SimpleValueType> &getVTList() const { return VTs; }
   const SmallVectorImpl<unsigned> &getOperandList() const { return Operands; }
 
-  
+
   bool hasChain() const { return HasChain; }
-  bool hasInFlag() const { return HasInFlag; }
-  bool hasOutFlag() const { return HasOutFlag; }
+  bool hasInFlag() const { return HasInGlue; }
+  bool hasOutFlag() const { return HasOutGlue; }
   bool hasMemRefs() const { return HasMemRefs; }
   int getNumFixedArityOperands() const { return NumFixedArityOperands; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitNode || N->getKind() == MorphNodeTo;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const;
   virtual unsigned getHashImpl() const;
 };
-  
+
 /// EmitNodeMatcher - This signals a successful match and generates a node.
 class EmitNodeMatcher : public EmitNodeMatcherCommon {
   unsigned FirstResultSlot;
@@ -1021,15 +1021,15 @@ public:
                           hasInFlag, hasOutFlag, hasmemrefs,
                           numfixedarityoperands, false),
     FirstResultSlot(firstresultslot) {}
-  
+
   unsigned getFirstResultSlot() const { return FirstResultSlot; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == EmitNode;
   }
-  
+
 };
-  
+
 class MorphNodeToMatcher : public EmitNodeMatcherCommon {
   const PatternToMatch &Pattern;
 public:
@@ -1044,38 +1044,38 @@ public:
                             numfixedarityoperands, true),
       Pattern(pattern) {
   }
-  
+
   const PatternToMatch &getPattern() const { return Pattern; }
 
   static inline bool classof(const Matcher *N) {
     return N->getKind() == MorphNodeTo;
   }
 };
-  
-/// MarkFlagResultsMatcher - This node indicates which non-root nodes in the
-/// pattern produce flags.  This allows CompleteMatchMatcher to update them
-/// with the output flag of the resultant code.
-class MarkFlagResultsMatcher : public Matcher {
-  SmallVector<unsigned, 3> FlagResultNodes;
+
+/// MarkGlueResultsMatcher - This node indicates which non-root nodes in the
+/// pattern produce glue.  This allows CompleteMatchMatcher to update them
+/// with the output glue of the resultant code.
+class MarkGlueResultsMatcher : public Matcher {
+  SmallVector<unsigned, 3> GlueResultNodes;
 public:
-  MarkFlagResultsMatcher(const unsigned *nodes, unsigned NumNodes)
-    : Matcher(MarkFlagResults), FlagResultNodes(nodes, nodes+NumNodes) {}
-  
-  unsigned getNumNodes() const { return FlagResultNodes.size(); }
-  
+  MarkGlueResultsMatcher(const unsigned *nodes, unsigned NumNodes)
+    : Matcher(MarkGlueResults), GlueResultNodes(nodes, nodes+NumNodes) {}
+
+  unsigned getNumNodes() const { return GlueResultNodes.size(); }
+
   unsigned getNode(unsigned i) const {
-    assert(i < FlagResultNodes.size());
-    return FlagResultNodes[i];
-  }  
-  
+    assert(i < GlueResultNodes.size());
+    return GlueResultNodes[i];
+  }
+
   static inline bool classof(const Matcher *N) {
-    return N->getKind() == MarkFlagResults;
+    return N->getKind() == MarkGlueResults;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
-    return cast<MarkFlagResultsMatcher>(M)->FlagResultNodes == FlagResultNodes;
+    return cast<MarkGlueResultsMatcher>(M)->GlueResultNodes == GlueResultNodes;
   }
   virtual unsigned getHashImpl() const;
 };
@@ -1095,11 +1095,11 @@ public:
   unsigned getNumResults() const { return Results.size(); }
   unsigned getResult(unsigned R) const { return Results[R]; }
   const PatternToMatch &getPattern() const { return Pattern; }
-  
+
   static inline bool classof(const Matcher *N) {
     return N->getKind() == CompleteMatch;
   }
-  
+
 private:
   virtual void printImpl(raw_ostream &OS, unsigned indent) const;
   virtual bool isEqualImpl(const Matcher *M) const {
@@ -1108,7 +1108,7 @@ private:
   }
   virtual unsigned getHashImpl() const;
 };
- 
+
 } // end namespace llvm
 
 #endif
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index dfbfe80c0a1d..0b7fbf7c8518 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -220,8 +220,8 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << "OPC_RecordMemRef,\n";
     return 1;
       
-  case Matcher::CaptureFlagInput:
-    OS << "OPC_CaptureFlagInput,\n";
+  case Matcher::CaptureGlueInput:
+    OS << "OPC_CaptureGlueInput,\n";
     return 1;
       
   case Matcher::MoveChild:
@@ -485,8 +485,8 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << ", TARGET_OPCODE(" << EN->getOpcodeName() << "), 0";
     
     if (EN->hasChain())   OS << "|OPFL_Chain";
-    if (EN->hasInFlag())  OS << "|OPFL_FlagInput";
-    if (EN->hasOutFlag()) OS << "|OPFL_FlagOutput";
+    if (EN->hasInFlag())  OS << "|OPFL_GlueInput";
+    if (EN->hasOutFlag()) OS << "|OPFL_GlueOutput";
     if (EN->hasMemRefs()) OS << "|OPFL_MemRefs";
     if (EN->getNumFixedArityOperands() != -1)
       OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
@@ -531,9 +531,9 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     
     return 6+EN->getNumVTs()+NumOperandBytes;
   }
-  case Matcher::MarkFlagResults: {
-    const MarkFlagResultsMatcher *CFR = cast<MarkFlagResultsMatcher>(N);
-    OS << "OPC_MarkFlagResults, " << CFR->getNumNodes() << ", ";
+  case Matcher::MarkGlueResults: {
+    const MarkGlueResultsMatcher *CFR = cast<MarkGlueResultsMatcher>(N);
+    OS << "OPC_MarkGlueResults, " << CFR->getNumNodes() << ", ";
     unsigned NumOperandBytes = 0;
     for (unsigned i = 0, e = CFR->getNumNodes(); i != e; ++i)
       NumOperandBytes += EmitVBRValue(CFR->getNode(i), OS);
@@ -633,8 +633,9 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
   // Emit CompletePattern matchers.
   // FIXME: This should be const.
   if (!ComplexPatterns.empty()) {
-    OS << "bool CheckComplexPattern(SDNode *Root, SDValue N,\n";
-    OS << "      unsigned PatternNo, SmallVectorImpl<SDValue> &Result) {\n";
+    OS << "bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,\n";
+    OS << "                         unsigned PatternNo,\n";
+    OS << "         SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {\n";
     OS << "  unsigned NextRes = Result.size();\n";
     OS << "  switch (PatternNo) {\n";
     OS << "  default: assert(0 && \"Invalid pattern # in table?\");\n";
@@ -649,9 +650,20 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
       OS << "    Result.resize(NextRes+" << NumOps << ");\n";
       OS << "    return "  << P.getSelectFunc();
 
-      OS << "(Root, N";
+      OS << "(";
+      // If the complex pattern wants the root of the match, pass it in as the
+      // first argument.
+      if (P.hasProperty(SDNPWantRoot))
+        OS << "Root, ";
+      
+      // If the complex pattern wants the parent of the operand being matched,
+      // pass it in as the next argument.
+      if (P.hasProperty(SDNPWantParent))
+        OS << "Parent, ";
+      
+      OS << "N";
       for (unsigned i = 0; i != NumOps; ++i)
-        OS << ", Result[NextRes+" << i << ']';
+        OS << ", Result[NextRes+" << i << "].first";
       OS << ");\n";
     }
     OS << "  }\n";
@@ -730,7 +742,7 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
     case Matcher::RecordNode: OS << "OPC_RecordNode"; break; 
     case Matcher::RecordChild: OS << "OPC_RecordChild"; break;
     case Matcher::RecordMemRef: OS << "OPC_RecordMemRef"; break;
-    case Matcher::CaptureFlagInput: OS << "OPC_CaptureFlagInput"; break;
+    case Matcher::CaptureGlueInput: OS << "OPC_CaptureGlueInput"; break;
     case Matcher::MoveChild: OS << "OPC_MoveChild"; break;
     case Matcher::MoveParent: OS << "OPC_MoveParent"; break;
     case Matcher::CheckSame: OS << "OPC_CheckSame"; break;
@@ -759,7 +771,7 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
     case Matcher::EmitNode: OS << "OPC_EmitNode"; break;
     case Matcher::MorphNodeTo: OS << "OPC_MorphNodeTo"; break;
     case Matcher::EmitNodeXForm: OS << "OPC_EmitNodeXForm"; break;
-    case Matcher::MarkFlagResults: OS << "OPC_MarkFlagResults"; break;
+    case Matcher::MarkGlueResults: OS << "OPC_MarkGlueResults"; break;
     case Matcher::CompleteMatch: OS << "OPC_CompleteMatch"; break;    
     }
     
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index aba6636a1370..7c0badec1d6d 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -25,12 +25,12 @@ static MVT::SimpleValueType getRegisterValueType(Record *R,
   MVT::SimpleValueType VT = MVT::Other;
   const std::vector<CodeGenRegisterClass> &RCs = T.getRegisterClasses();
   std::vector<Record*>::const_iterator Element;
-  
+
   for (unsigned rc = 0, e = RCs.size(); rc != e; ++rc) {
     const CodeGenRegisterClass &RC = RCs[rc];
     if (!std::count(RC.Elements.begin(), RC.Elements.end(), R))
       continue;
-    
+
     if (!FoundRC) {
       FoundRC = true;
       VT = RC.getValueTypeNum(0);
@@ -48,30 +48,30 @@ namespace {
   class MatcherGen {
     const PatternToMatch &Pattern;
     const CodeGenDAGPatterns &CGP;
-    
+
     /// PatWithNoTypes - This is a clone of Pattern.getSrcPattern() that starts
     /// out with all of the types removed.  This allows us to insert type checks
     /// as we scan the tree.
     TreePatternNode *PatWithNoTypes;
-    
+
     /// VariableMap - A map from variable names ('$dst') to the recorded operand
     /// number that they were captured as.  These are biased by 1 to make
     /// insertion easier.
     StringMap<unsigned> VariableMap;
-    
+
     /// NextRecordedOperandNo - As we emit opcodes to record matched values in
     /// the RecordedNodes array, this keeps track of which slot will be next to
     /// record into.
     unsigned NextRecordedOperandNo;
-    
+
     /// MatchedChainNodes - This maintains the position in the recorded nodes
     /// array of all of the recorded input nodes that have chains.
     SmallVector<unsigned, 2> MatchedChainNodes;
 
-    /// MatchedFlagResultNodes - This maintains the position in the recorded
-    /// nodes array of all of the recorded input nodes that have flag results.
-    SmallVector<unsigned, 2> MatchedFlagResultNodes;
-    
+    /// MatchedGlueResultNodes - This maintains the position in the recorded
+    /// nodes array of all of the recorded input nodes that have glue results.
+    SmallVector<unsigned, 2> MatchedGlueResultNodes;
+
     /// MatchedComplexPatterns - This maintains a list of all of the
     /// ComplexPatterns that we need to check.  The patterns are known to have
     /// names which were recorded.  The second element of each pair is the first
@@ -79,40 +79,39 @@ namespace {
     /// results into.
     SmallVector<std::pair<const TreePatternNode*,
                           unsigned>, 2> MatchedComplexPatterns;
-    
+
     /// PhysRegInputs - List list has an entry for each explicitly specified
     /// physreg input to the pattern.  The first elt is the Register node, the
     /// second is the recorded slot number the input pattern match saved it in.
     SmallVector<std::pair<Record*, unsigned>, 2> PhysRegInputs;
-    
+
     /// Matcher - This is the top level of the generated matcher, the result.
     Matcher *TheMatcher;
-    
+
     /// CurPredicate - As we emit matcher nodes, this points to the latest check
     /// which should have future checks stuck into its Next position.
     Matcher *CurPredicate;
   public:
     MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp);
-    
+
     ~MatcherGen() {
       delete PatWithNoTypes;
     }
-    
+
     bool EmitMatcherCode(unsigned Variant);
     void EmitResultCode();
-    
+
     Matcher *GetMatcher() const { return TheMatcher; }
-    Matcher *GetCurPredicate() const { return CurPredicate; }
   private:
     void AddMatcher(Matcher *NewNode);
     void InferPossibleTypes();
-    
+
     // Matcher Generation.
     void EmitMatchCode(const TreePatternNode *N, TreePatternNode *NodeNoTypes);
     void EmitLeafMatchCode(const TreePatternNode *N);
     void EmitOperatorMatchCode(const TreePatternNode *N,
                                TreePatternNode *NodeNoTypes);
-    
+
     // Result Code Generation.
     unsigned getNamedArgumentSlot(StringRef Name) {
       unsigned VarMapEntry = VariableMap[Name];
@@ -124,7 +123,7 @@ namespace {
     /// GetInstPatternNode - Get the pattern for an instruction.
     const TreePatternNode *GetInstPatternNode(const DAGInstruction &Ins,
                                               const TreePatternNode *N);
-    
+
     void EmitResultOperand(const TreePatternNode *N,
                            SmallVectorImpl<unsigned> &ResultOps);
     void EmitResultOfNamedOperand(const TreePatternNode *N,
@@ -136,7 +135,7 @@ namespace {
     void EmitResultSDNodeXFormAsOperand(const TreePatternNode *N,
                                         SmallVectorImpl<unsigned> &ResultOps);
     };
-  
+
 } // end anon namespace.
 
 MatcherGen::MatcherGen(const PatternToMatch &pattern,
@@ -157,7 +156,7 @@ MatcherGen::MatcherGen(const PatternToMatch &pattern,
   //
   PatWithNoTypes = Pattern.getSrcPattern()->clone();
   PatWithNoTypes->RemoveAllTypes();
-    
+
   // If there are types that are manifestly known, infer them.
   InferPossibleTypes();
 }
@@ -170,7 +169,7 @@ void MatcherGen::InferPossibleTypes() {
   // TP - Get *SOME* tree pattern, we don't care which.  It is only used for
   // diagnostics, which we know are impossible at this point.
   TreePattern &TP = *CGP.pf_begin()->second;
-  
+
   try {
     bool MadeChange = true;
     while (MadeChange)
@@ -183,7 +182,7 @@ void MatcherGen::InferPossibleTypes() {
 }
 
 
-/// AddMatcher - Add a matcher node to the current graph we're building. 
+/// AddMatcher - Add a matcher node to the current graph we're building.
 void MatcherGen::AddMatcher(Matcher *NewNode) {
   if (CurPredicate != 0)
     CurPredicate->setNext(NewNode);
@@ -200,7 +199,7 @@ void MatcherGen::AddMatcher(Matcher *NewNode) {
 /// EmitLeafMatchCode - Generate matching code for leaf nodes.
 void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   assert(N->isLeaf() && "Not a leaf?");
-  
+
   // Direct match against an integer constant.
   if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
     // If this is the root of the dag we're matching, we emit a redundant opcode
@@ -213,16 +212,16 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
 
     return AddMatcher(new CheckIntegerMatcher(II->getValue()));
   }
-  
+
   DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue());
   if (DI == 0) {
     errs() << "Unknown leaf kind: " << *DI << "\n";
     abort();
   }
-  
+
   Record *LeafRec = DI->getDef();
   if (// Handle register references.  Nothing to do here, they always match.
-      LeafRec->isSubClassOf("RegisterClass") || 
+      LeafRec->isSubClassOf("RegisterClass") ||
       LeafRec->isSubClassOf("PointerLikeRegClass") ||
       LeafRec->isSubClassOf("SubRegIndex") ||
       // Place holder for SRCVALUE nodes. Nothing to do here.
@@ -230,20 +229,20 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return;
 
   // If we have a physreg reference like (mul gpr:$src, EAX) then we need to
-  // record the register 
+  // record the register
   if (LeafRec->isSubClassOf("Register")) {
     AddMatcher(new RecordMatcher("physreg input "+LeafRec->getName(),
                                  NextRecordedOperandNo));
     PhysRegInputs.push_back(std::make_pair(LeafRec, NextRecordedOperandNo++));
     return;
   }
-  
+
   if (LeafRec->isSubClassOf("ValueType"))
     return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName()));
-  
+
   if (LeafRec->isSubClassOf("CondCode"))
     return AddMatcher(new CheckCondCodeMatcher(LeafRec->getName()));
-  
+
   if (LeafRec->isSubClassOf("ComplexPattern")) {
     // We can't model ComplexPattern uses that don't have their name taken yet.
     // The OPC_CheckComplexPattern operation implicitly records the results.
@@ -257,7 +256,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     MatchedComplexPatterns.push_back(std::make_pair(N, 0));
     return;
   }
-  
+
   errs() << "Unknown leaf kind: " << *N << "\n";
   abort();
 }
@@ -266,7 +265,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
                                        TreePatternNode *NodeNoTypes) {
   assert(!N->isLeaf() && "Not an operator?");
   const SDNodeInfo &CInfo = CGP.getSDNodeInfo(N->getOperator());
-  
+
   // If this is an 'and R, 1234' where the operation is AND/OR and the RHS is
   // a constant without a predicate fn that has more that one bit set, handle
   // this as a special case.  This is usually for targets that have special
@@ -277,7 +276,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
   // them from the mask in the dag.  For example, it might turn 'AND X, 255'
   // into 'AND X, 254' if it knows the low bit is set.  Emit code that checks
   // to handle this.
-  if ((N->getOperator()->getName() == "and" || 
+  if ((N->getOperator()->getName() == "and" ||
        N->getOperator()->getName() == "or") &&
       N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateFns().empty() &&
       N->getPredicateFns().empty()) {
@@ -303,15 +302,15 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
       }
     }
   }
-  
+
   // Check that the current opcode lines up.
   AddMatcher(new CheckOpcodeMatcher(CInfo));
-  
+
   // If this node has memory references (i.e. is a load or store), tell the
   // interpreter to capture them in the memref array.
   if (N->NodeHasProperty(SDNPMemOperand, CGP))
     AddMatcher(new RecordMemRefMatcher());
-  
+
   // If this node has a chain, then the chain is operand #0 is the SDNode, and
   // the child numbers of the node are all offset by one.
   unsigned OpNo = 0;
@@ -322,7 +321,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
                                  NextRecordedOperandNo));
     // Remember all of the input chains our pattern will match.
     MatchedChainNodes.push_back(NextRecordedOperandNo++);
-    
+
     // Don't look at the input chain when matching the tree pattern to the
     // SDNode.
     OpNo = 1;
@@ -353,11 +352,11 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
       // If there is a node between the root and this node, then we definitely
       // need to emit the check.
       bool NeedCheck = !Root->hasChild(N);
-      
+
       // If it *is* an immediate child of the root, we can still need a check if
       // the root SDNode has multiple inputs.  For us, this means that it is an
       // intrinsic, has multiple operands, or has other inputs like chain or
-      // flag).
+      // glue).
       if (!NeedCheck) {
         const SDNodeInfo &PInfo = CGP.getSDNodeInfo(Root->getOperator());
         NeedCheck =
@@ -366,34 +365,34 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
           Root->getOperator() == CGP.get_intrinsic_wo_chain_sdnode() ||
           PInfo.getNumOperands() > 1 ||
           PInfo.hasProperty(SDNPHasChain) ||
-          PInfo.hasProperty(SDNPInFlag) ||
-          PInfo.hasProperty(SDNPOptInFlag);
+          PInfo.hasProperty(SDNPInGlue) ||
+          PInfo.hasProperty(SDNPOptInGlue);
       }
-      
+
       if (NeedCheck)
         AddMatcher(new CheckFoldableChainNodeMatcher());
     }
   }
 
-  // If this node has an output flag and isn't the root, remember it.
-  if (N->NodeHasProperty(SDNPOutFlag, CGP) && 
+  // If this node has an output glue and isn't the root, remember it.
+  if (N->NodeHasProperty(SDNPOutGlue, CGP) &&
       N != Pattern.getSrcPattern()) {
-    // TODO: This redundantly records nodes with both flags and chains.
-    
+    // TODO: This redundantly records nodes with both glues and chains.
+
     // Record the node and remember it in our chained nodes list.
     AddMatcher(new RecordMatcher("'" + N->getOperator()->getName() +
-                                         "' flag output node",
+                                         "' glue output node",
                                  NextRecordedOperandNo));
-    // Remember all of the nodes with output flags our pattern will match.
-    MatchedFlagResultNodes.push_back(NextRecordedOperandNo++);
+    // Remember all of the nodes with output glue our pattern will match.
+    MatchedGlueResultNodes.push_back(NextRecordedOperandNo++);
   }
-  
-  // If this node is known to have an input flag or if it *might* have an input
-  // flag, capture it as the flag input of the pattern.
-  if (N->NodeHasProperty(SDNPOptInFlag, CGP) ||
-      N->NodeHasProperty(SDNPInFlag, CGP))
-    AddMatcher(new CaptureFlagInputMatcher());
-      
+
+  // If this node is known to have an input glue or if it *might* have an input
+  // glue, capture it as the glue input of the pattern.
+  if (N->NodeHasProperty(SDNPOptInGlue, CGP) ||
+      N->NodeHasProperty(SDNPInGlue, CGP))
+    AddMatcher(new CaptureGlueInputMatcher());
+
   for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i, ++OpNo) {
     // Get the code suitable for matching this child.  Move to the child, check
     // it then move back to the parent.
@@ -410,14 +409,14 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   // need to do a type check.  Emit the check, apply the tyep to NodeNoTypes and
   // reinfer any correlated types.
   SmallVector<unsigned, 2> ResultsToTypeCheck;
-  
+
   for (unsigned i = 0, e = NodeNoTypes->getNumTypes(); i != e; ++i) {
     if (NodeNoTypes->getExtType(i) == N->getExtType(i)) continue;
     NodeNoTypes->setType(i, N->getExtType(i));
     InferPossibleTypes();
     ResultsToTypeCheck.push_back(i);
   }
-  
+
   // If this node has a name associated with it, capture it in VariableMap. If
   // we already saw this in the pattern, emit code to verify dagness.
   if (!N->getName().empty()) {
@@ -435,16 +434,16 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
       return;
     }
   }
-  
+
   if (N->isLeaf())
     EmitLeafMatchCode(N);
   else
     EmitOperatorMatchCode(N, NodeNoTypes);
-  
+
   // If there are node predicates for this node, generate their checks.
   for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
     AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
-  
+
   for (unsigned i = 0, e = ResultsToTypeCheck.size(); i != e; ++i)
     AddMatcher(new CheckTypeMatcher(N->getType(ResultsToTypeCheck[i]),
                                     ResultsToTypeCheck[i]));
@@ -463,27 +462,27 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
     const std::vector<Record*> &OpNodes = CP->getRootNodes();
     assert(!OpNodes.empty() &&"Complex Pattern must specify what it can match");
     if (Variant >= OpNodes.size()) return true;
-    
+
     AddMatcher(new CheckOpcodeMatcher(CGP.getSDNodeInfo(OpNodes[Variant])));
   } else {
     if (Variant != 0) return true;
   }
-    
+
   // Emit the matcher for the pattern structure and types.
   EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes);
-  
+
   // If the pattern has a predicate on it (e.g. only enabled when a subtarget
   // feature is around, do the check).
   if (!Pattern.getPredicateCheck().empty())
     AddMatcher(new CheckPatternPredicateMatcher(Pattern.getPredicateCheck()));
-  
+
   // Now that we've completed the structural type match, emit any ComplexPattern
   // checks (e.g. addrmode matches).  We emit this after the structural match
   // because they are generally more expensive to evaluate and more difficult to
   // factor.
   for (unsigned i = 0, e = MatchedComplexPatterns.size(); i != e; ++i) {
     const TreePatternNode *N = MatchedComplexPatterns[i].first;
-    
+
     // Remember where the results of this match get stuck.
     MatchedComplexPatterns[i].second = NextRecordedOperandNo;
 
@@ -492,15 +491,15 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
     assert(!N->getName().empty() && RecNodeEntry &&
            "Complex pattern should have a name and slot");
     --RecNodeEntry;  // Entries in VariableMap are biased.
-    
+
     const ComplexPattern &CP =
       CGP.getComplexPattern(((DefInit*)N->getLeafValue())->getDef());
-    
+
     // Emit a CheckComplexPat operation, which does the match (aborting if it
     // fails) and pushes the matched operands onto the recorded nodes list.
     AddMatcher(new CheckComplexPatMatcher(CP, RecNodeEntry,
                                           N->getName(), NextRecordedOperandNo));
-    
+
     // Record the right number of operands.
     NextRecordedOperandNo += CP.getNumOperands();
     if (CP.hasProperty(SDNPHasChain)) {
@@ -508,17 +507,17 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
       // fact that we just recorded a chain input.  The chain input will be
       // matched as the last operand of the predicate if it was successful.
       ++NextRecordedOperandNo; // Chained node operand.
-    
+
       // It is the last operand recorded.
       assert(NextRecordedOperandNo > 1 &&
              "Should have recorded input/result chains at least!");
       MatchedChainNodes.push_back(NextRecordedOperandNo-1);
     }
-    
-    // TODO: Complex patterns can't have output flags, if they did, we'd want
+
+    // TODO: Complex patterns can't have output glues, if they did, we'd want
     // to record them.
   }
-  
+
   return false;
 }
 
@@ -530,7 +529,7 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
 void MatcherGen::EmitResultOfNamedOperand(const TreePatternNode *N,
                                           SmallVectorImpl<unsigned> &ResultOps){
   assert(!N->getName().empty() && "Operand not named!");
-  
+
   // A reference to a complex pattern gets all of the results of the complex
   // pattern's match.
   if (const ComplexPattern *CP = N->getComplexPatternInfo(CGP)) {
@@ -541,7 +540,7 @@ void MatcherGen::EmitResultOfNamedOperand(const TreePatternNode *N,
         break;
       }
     assert(SlotNo != 0 && "Didn't get a slot number assigned?");
-    
+
     // The first slot entry is the node itself, the subsequent entries are the
     // matched values.
     for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
@@ -562,20 +561,20 @@ void MatcherGen::EmitResultOfNamedOperand(const TreePatternNode *N,
       return;
     }
   }
-  
+
   ResultOps.push_back(SlotNo);
 }
 
 void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
                                          SmallVectorImpl<unsigned> &ResultOps) {
   assert(N->isLeaf() && "Must be a leaf");
-  
+
   if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
     AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType(0)));
     ResultOps.push_back(NextRecordedOperandNo++);
     return;
   }
-  
+
   // If this is an explicit register reference, handle it.
   if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
     if (DI->getDef()->isSubClassOf("Register")) {
@@ -583,13 +582,13 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
-    
+
     if (DI->getDef()->getName() == "zero_reg") {
       AddMatcher(new EmitRegisterMatcher(0, N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
-    
+
     // Handle a reference to a register class. This is used
     // in COPY_TO_SUBREG instructions.
     if (DI->getDef()->isSubClassOf("RegisterClass")) {
@@ -607,17 +606,17 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
       return;
     }
   }
-  
+
   errs() << "unhandled leaf node: \n";
   N->dump();
 }
 
 /// GetInstPatternNode - Get the pattern for an instruction.
-/// 
+///
 const TreePatternNode *MatcherGen::
 GetInstPatternNode(const DAGInstruction &Inst, const TreePatternNode *N) {
   const TreePattern *InstPat = Inst.getPattern();
-  
+
   // FIXME2?: Assume actual pattern comes before "implicit".
   TreePatternNode *InstPatNode;
   if (InstPat)
@@ -626,11 +625,11 @@ GetInstPatternNode(const DAGInstruction &Inst, const TreePatternNode *N) {
     InstPatNode = Pattern.getSrcPattern();
   else
     return 0;
-  
+
   if (InstPatNode && !InstPatNode->isLeaf() &&
       InstPatNode->getOperator()->getName() == "set")
     InstPatNode = InstPatNode->getChild(InstPatNode->getNumChildren()-1);
-  
+
   return InstPatNode;
 }
 
@@ -641,7 +640,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   const CodeGenTarget &CGT = CGP.getTargetInfo();
   CodeGenInstruction &II = CGT.getInstruction(Op);
   const DAGInstruction &Inst = CGP.getInstruction(Op);
-  
+
   // If we can, get the pattern for the instruction we're generating.  We derive
   // a variety of information from this pattern, such as whether it has a chain.
   //
@@ -650,27 +649,27 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   // nodes can't duplicate.
   const TreePatternNode *InstPatNode = GetInstPatternNode(Inst, N);
 
-  // NodeHasChain - Whether the instruction node we're creating takes chains.  
+  // NodeHasChain - Whether the instruction node we're creating takes chains.
   bool NodeHasChain = InstPatNode &&
                       InstPatNode->TreeHasProperty(SDNPHasChain, CGP);
-  
+
   bool isRoot = N == Pattern.getDstPattern();
 
-  // TreeHasOutFlag - True if this tree has a flag.
-  bool TreeHasInFlag = false, TreeHasOutFlag = false;
+  // TreeHasOutGlue - True if this tree has glue.
+  bool TreeHasInGlue = false, TreeHasOutGlue = false;
   if (isRoot) {
     const TreePatternNode *SrcPat = Pattern.getSrcPattern();
-    TreeHasInFlag = SrcPat->TreeHasProperty(SDNPOptInFlag, CGP) ||
-                    SrcPat->TreeHasProperty(SDNPInFlag, CGP);
-  
+    TreeHasInGlue = SrcPat->TreeHasProperty(SDNPOptInGlue, CGP) ||
+                    SrcPat->TreeHasProperty(SDNPInGlue, CGP);
+
     // FIXME2: this is checking the entire pattern, not just the node in
     // question, doing this just for the root seems like a total hack.
-    TreeHasOutFlag = SrcPat->TreeHasProperty(SDNPOutFlag, CGP);
+    TreeHasOutGlue = SrcPat->TreeHasProperty(SDNPOutGlue, CGP);
   }
 
   // NumResults - This is the number of results produced by the instruction in
   // the "outs" list.
-  unsigned NumResults = Inst.getNumResults();    
+  unsigned NumResults = Inst.getNumResults();
 
   // Loop over all of the operands of the instruction pattern, emitting code
   // to fill them all in.  The node 'N' usually has number children equal to
@@ -679,41 +678,41 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   // in the 'execute always' values.  Match up the node operands to the
   // instruction operands to do this.
   SmallVector<unsigned, 8> InstOps;
-  for (unsigned ChildNo = 0, InstOpNo = NumResults, e = II.OperandList.size();
+  for (unsigned ChildNo = 0, InstOpNo = NumResults, e = II.Operands.size();
        InstOpNo != e; ++InstOpNo) {
-    
+
     // Determine what to emit for this operand.
-    Record *OperandNode = II.OperandList[InstOpNo].Rec;
+    Record *OperandNode = II.Operands[InstOpNo].Rec;
     if ((OperandNode->isSubClassOf("PredicateOperand") ||
          OperandNode->isSubClassOf("OptionalDefOperand")) &&
         !CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) {
       // This is a predicate or optional def operand; emit the
       // 'default ops' operands.
       const DAGDefaultOperand &DefaultOp
-	= CGP.getDefaultOperand(OperandNode);
+        = CGP.getDefaultOperand(OperandNode);
       for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
         EmitResultOperand(DefaultOp.DefaultOps[i], InstOps);
       continue;
     }
-    
+
     const TreePatternNode *Child = N->getChild(ChildNo);
-    
+
     // Otherwise this is a normal operand or a predicate operand without
     // 'execute always'; emit it.
     unsigned BeforeAddingNumOps = InstOps.size();
     EmitResultOperand(Child, InstOps);
     assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");
-    
+
     // If the operand is an instruction and it produced multiple results, just
     // take the first one.
     if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))
       InstOps.resize(BeforeAddingNumOps+1);
-    
+
     ++ChildNo;
   }
-  
-  // If this node has an input flag or explicitly specified input physregs, we
-  // need to add chained and flagged copyfromreg nodes and materialize the flag
+
+  // If this node has input glue or explicitly specified input physregs, we
+  // need to add chained and glued copyfromreg nodes and materialize the glue
   // input.
   if (isRoot && !PhysRegInputs.empty()) {
     // Emit all of the CopyToReg nodes for the input physical registers.  These
@@ -721,18 +720,18 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
     for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i)
       AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second,
                                           PhysRegInputs[i].first));
-    // Even if the node has no other flag inputs, the resultant node must be
-    // flagged to the CopyFromReg nodes we just generated.
-    TreeHasInFlag = true;
+    // Even if the node has no other glue inputs, the resultant node must be
+    // glued to the CopyFromReg nodes we just generated.
+    TreeHasInGlue = true;
   }
-  
-  // Result order: node results, chain, flags
-  
+
+  // Result order: node results, chain, glue
+
   // Determine the result types.
   SmallVector<MVT::SimpleValueType, 4> ResultVTs;
   for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i)
     ResultVTs.push_back(N->getType(i));
-  
+
   // If this is the root instruction of a pattern that has physical registers in
   // its result pattern, add output VTs for them.  For example, X86 has:
   //   (set AL, (mul ...))
@@ -744,7 +743,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
     Record *HandledReg = 0;
     if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other)
       HandledReg = II.ImplicitDefs[0];
-    
+
     for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
       Record *Reg = Pattern.getDstRegs()[i];
       if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
@@ -759,7 +758,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   if (isRoot &&
       (Pattern.getSrcPattern()->NodeHasProperty(SDNPVariadic, CGP)))
     NumFixedArityOperands = Pattern.getSrcPattern()->getNumChildren();
-  
+
   // If this is the root node and any of the nodes matched nodes in the input
   // pattern have MemRefs in them, have the interpreter collect them and plop
   // them onto this node.
@@ -776,19 +775,19 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   bool NodeHasMemRefs =
     isRoot && Pattern.getSrcPattern()->TreeHasProperty(SDNPMemOperand, CGP);
 
-  assert((!ResultVTs.empty() || TreeHasOutFlag || NodeHasChain) &&
+  assert((!ResultVTs.empty() || TreeHasOutGlue || NodeHasChain) &&
          "Node has no result");
-  
+
   AddMatcher(new EmitNodeMatcher(II.Namespace+"::"+II.TheDef->getName(),
                                  ResultVTs.data(), ResultVTs.size(),
                                  InstOps.data(), InstOps.size(),
-                                 NodeHasChain, TreeHasInFlag, TreeHasOutFlag,
+                                 NodeHasChain, TreeHasInGlue, TreeHasOutGlue,
                                  NodeHasMemRefs, NumFixedArityOperands,
                                  NextRecordedOperandNo));
-  
-  // The non-chain and non-flag results of the newly emitted node get recorded.
+
+  // The non-chain and non-glue results of the newly emitted node get recorded.
   for (unsigned i = 0, e = ResultVTs.size(); i != e; ++i) {
-    if (ResultVTs[i] == MVT::Other || ResultVTs[i] == MVT::Flag) break;
+    if (ResultVTs[i] == MVT::Other || ResultVTs[i] == MVT::Glue) break;
     OutputOps.push_back(NextRecordedOperandNo++);
   }
 }
@@ -800,7 +799,7 @@ EmitResultSDNodeXFormAsOperand(const TreePatternNode *N,
 
   // Emit the operand.
   SmallVector<unsigned, 8> InputOps;
-  
+
   // FIXME2: Could easily generalize this to support multiple inputs and outputs
   // to the SDNodeXForm.  For now we just support one input and one output like
   // the old instruction selector.
@@ -839,7 +838,7 @@ void MatcherGen::EmitResultCode() {
   if (!MatchedChainNodes.empty())
     AddMatcher(new EmitMergeInputChainsMatcher
                (MatchedChainNodes.data(), MatchedChainNodes.size()));
-  
+
   // Codegen the root of the result pattern, capturing the resulting values.
   SmallVector<unsigned, 8> Ops;
   EmitResultOperand(Pattern.getDstPattern(), Ops);
@@ -847,11 +846,11 @@ void MatcherGen::EmitResultCode() {
   // At this point, we have however many values the result pattern produces.
   // However, the input pattern might not need all of these.  If there are
   // excess values at the end (such as implicit defs of condition codes etc)
-  // just lop them off.  This doesn't need to worry about flags or chains, just
+  // just lop them off.  This doesn't need to worry about glue or chains, just
   // explicit results.
   //
   unsigned NumSrcResults = Pattern.getSrcPattern()->getNumTypes();
-  
+
   // If the pattern also has (implicit) results, count them as well.
   if (!Pattern.getDstRegs().empty()) {
     // If the root came from an implicit def in the instruction handling stuff,
@@ -865,23 +864,23 @@ void MatcherGen::EmitResultCode() {
       if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other)
         HandledReg = II.ImplicitDefs[0];
     }
-    
+
     for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
       Record *Reg = Pattern.getDstRegs()[i];
       if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
       ++NumSrcResults;
     }
-  }    
-  
+  }
+
   assert(Ops.size() >= NumSrcResults && "Didn't provide enough results");
   Ops.resize(NumSrcResults);
 
-  // If the matched pattern covers nodes which define a flag result, emit a node
+  // If the matched pattern covers nodes which define a glue result, emit a node
   // that tells the matcher about them so that it can update their results.
-  if (!MatchedFlagResultNodes.empty())
-    AddMatcher(new MarkFlagResultsMatcher(MatchedFlagResultNodes.data(),
-                                          MatchedFlagResultNodes.size()));
-  
+  if (!MatchedGlueResultNodes.empty())
+    AddMatcher(new MarkGlueResultsMatcher(MatchedGlueResultNodes.data(),
+                                          MatchedGlueResultNodes.size()));
+
   AddMatcher(new CompleteMatchMatcher(Ops.data(), Ops.size(), Pattern));
 }
 
@@ -896,12 +895,12 @@ Matcher *llvm::ConvertPatternToMatcher(const PatternToMatch &Pattern,
   // Generate the code for the matcher.
   if (Gen.EmitMatcherCode(Variant))
     return 0;
-  
+
   // FIXME2: Kill extra MoveParent commands at the end of the matcher sequence.
   // FIXME2: Split result code out to another table, and make the matcher end
   // with an "Emit <index>" command.  This allows result generation stuff to be
   // shared and factored?
-  
+
   // If the match succeeds, then we generate Pattern.
   Gen.EmitResultCode();
 
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index c73bdb9efb68..3169ea1e16af 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -75,7 +75,7 @@ static void ContractNodes(OwningPtr<Matcher> &MatcherPtr,
   // MarkFlagResults->EmitNode->CompleteMatch when we can to encourage
   // MorphNodeTo formation.  This is safe because MarkFlagResults never refers
   // to the root of the pattern.
-  if (isa<EmitNodeMatcher>(N) && isa<MarkFlagResultsMatcher>(N->getNext()) &&
+  if (isa<EmitNodeMatcher>(N) && isa<MarkGlueResultsMatcher>(N->getNext()) &&
       isa<CompleteMatchMatcher>(N->getNext()->getNext())) {
     // Unlink the two nodes from the list.
     Matcher *EmitNode = MatcherPtr.take();
@@ -100,7 +100,7 @@ static void ContractNodes(OwningPtr<Matcher> &MatcherPtr,
         if (CM->getResult(i) != RootResultFirst+i)
           ResultsMatch = false;
       
-      // If the selected node defines a subset of the flag/chain results, we
+      // If the selected node defines a subset of the glue/chain results, we
       // can't use MorphNodeTo.  For example, we can't use MorphNodeTo if the
       // matched pattern has a chain but the root node doesn't.
       const PatternToMatch &Pattern = CM->getPattern();
@@ -109,23 +109,23 @@ static void ContractNodes(OwningPtr<Matcher> &MatcherPtr,
           Pattern.getSrcPattern()->NodeHasProperty(SDNPHasChain, CGP))
         ResultsMatch = false;
 
-      // If the matched node has a flag and the output root doesn't, we can't
+      // If the matched node has glue and the output root doesn't, we can't
       // use MorphNodeTo.
       //
-      // NOTE: Strictly speaking, we don't have to check for the flag here
+      // NOTE: Strictly speaking, we don't have to check for glue here
       // because the code in the pattern generator doesn't handle it right.  We
       // do it anyway for thoroughness.
       if (!EN->hasOutFlag() &&
-          Pattern.getSrcPattern()->NodeHasProperty(SDNPOutFlag, CGP))
+          Pattern.getSrcPattern()->NodeHasProperty(SDNPOutGlue, CGP))
         ResultsMatch = false;
       
       
       // If the root result node defines more results than the source root node
-      // *and* has a chain or flag input, then we can't match it because it
-      // would end up replacing the extra result with the chain/flag.
+      // *and* has a chain or glue input, then we can't match it because it
+      // would end up replacing the extra result with the chain/glue.
 #if 0
-      if ((EN->hasFlag() || EN->hasChain()) &&
-          EN->getNumNonChainFlagVTs() > ... need to get no results reliably ...)
+      if ((EN->hasGlue() || EN->hasChain()) &&
+          EN->getNumNonChainGlueVTs() > ... need to get no results reliably ...)
         ResultMatch = false;
 #endif
           
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 3284366c6dd8..90a2af21f3a4 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -13,6 +13,7 @@
 #include "X86DisassemblerTables.h"
 #include "X86RecognizableInstr.h"
 #include "ARMDecoderEmitter.h"
+#include "FixedLenDecoderEmitter.h"
 
 using namespace llvm;
 using namespace llvm::X86Disassembler;
@@ -94,7 +95,7 @@ using namespace llvm::X86Disassembler;
 ///   instruction.
 
 void DisassemblerEmitter::run(raw_ostream &OS) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
 
   OS << "/*===- TableGen'erated file "
      << "---------------------------------------*- C -*-===*\n"
@@ -127,11 +128,11 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
   }
 
   // Fixed-instruction-length targets use a common disassembler.
+  // ARM use its own implementation for now.
   if (Target.getName() == "ARM") {
     ARMDecoderEmitter(Records).run(OS);
     return;
   }  
 
-  throw TGError(Target.getTargetRecord()->getLoc(),
-                "Unable to generate disassembler for this target");
+  FixedLenDecoderEmitter(Records).run(OS);
 }
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 525fffb0ee2c..020a4a312d7b 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -35,22 +35,22 @@ using namespace llvm;
 ///////////////////////////////////////////////////////////
 
 namespace {
-  
+
   class EnumEmitter {
   private:
     std::string Name;
     std::vector<std::string> Entries;
   public:
-    EnumEmitter(const char *N) : Name(N) { 
+    EnumEmitter(const char *N) : Name(N) {
     }
-    int addEntry(const char *e) { 
+    int addEntry(const char *e) {
       Entries.push_back(std::string(e));
-      return Entries.size() - 1; 
+      return Entries.size() - 1;
     }
     void emit(raw_ostream &o, unsigned int &i) {
       o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
       i += 2;
-      
+
       unsigned int index = 0;
       unsigned int numEntries = Entries.size();
       for (index = 0; index < numEntries; ++index) {
@@ -59,15 +59,15 @@ namespace {
           o << ",";
         o << "\n";
       }
-      
+
       i -= 2;
       o.indent(i) << "};" << "\n";
     }
-    
+
     void emitAsFlags(raw_ostream &o, unsigned int &i) {
       o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
       i += 2;
-      
+
       unsigned int index = 0;
       unsigned int numEntries = Entries.size();
       unsigned int flag = 1;
@@ -78,7 +78,7 @@ namespace {
         o << "\n";
         flag <<= 1;
       }
-      
+
       i -= 2;
       o.indent(i) << "};" << "\n";
     }
@@ -89,7 +89,7 @@ namespace {
     virtual ~ConstantEmitter() { }
     virtual void emit(raw_ostream &o, unsigned int &i) = 0;
   };
-  
+
   class LiteralConstantEmitter : public ConstantEmitter {
   private:
     bool IsNumber;
@@ -98,7 +98,7 @@ namespace {
       const char* String;
     };
   public:
-    LiteralConstantEmitter(int number = 0) : 
+    LiteralConstantEmitter(int number = 0) :
       IsNumber(true),
       Number(number) {
     }
@@ -117,7 +117,7 @@ namespace {
         o << String;
     }
   };
-  
+
   class CompoundConstantEmitter : public ConstantEmitter {
   private:
     unsigned int Padding;
@@ -127,7 +127,7 @@ namespace {
     }
     CompoundConstantEmitter &addEntry(ConstantEmitter *e) {
       Entries.push_back(e);
-      
+
       return *this;
     }
     ~CompoundConstantEmitter() {
@@ -140,12 +140,12 @@ namespace {
     void emit(raw_ostream &o, unsigned int &i) {
       o << "{" << "\n";
       i += 2;
-  
+
       unsigned int index;
       unsigned int numEntries = Entries.size();
-      
+
       unsigned int numToPrint;
-      
+
       if (Padding) {
         if (numEntries > Padding) {
           fprintf(stderr, "%u entries but %u padding\n", numEntries, Padding);
@@ -155,24 +155,24 @@ namespace {
       } else {
         numToPrint = numEntries;
       }
-          
+
       for (index = 0; index < numToPrint; ++index) {
         o.indent(i);
         if (index < numEntries)
           Entries[index]->emit(o, i);
         else
           o << "-1";
-        
+
         if (index < (numToPrint - 1))
           o << ",";
         o << "\n";
       }
-      
+
       i -= 2;
       o.indent(i) << "}";
     }
   };
-  
+
   class FlagsConstantEmitter : public ConstantEmitter {
   private:
     std::vector<std::string> Flags;
@@ -188,7 +188,7 @@ namespace {
       unsigned int numFlags = Flags.size();
       if (numFlags == 0)
         o << "0";
-      
+
       for (index = 0; index < numFlags; ++index) {
         o << Flags[index].c_str();
         if (index < (numFlags - 1))
@@ -218,15 +218,15 @@ void populateOperandOrder(CompoundConstantEmitter *operandOrder,
                           const CodeGenInstruction &inst,
                           unsigned syntax) {
   unsigned int numArgs = 0;
-  
+
   AsmWriterInst awInst(inst, syntax, -1, -1);
-  
+
   std::vector<AsmWriterOperand>::iterator operandIterator;
-  
+
   for (operandIterator = awInst.Operands.begin();
        operandIterator != awInst.Operands.end();
        ++operandIterator) {
-    if (operandIterator->OperandType == 
+    if (operandIterator->OperandType ==
         AsmWriterOperand::isMachineInstrOperand) {
       operandOrder->addEntry(
         new LiteralConstantEmitter(operandIterator->CGIOpNo));
@@ -274,7 +274,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   REG("SEGMENT_REG");
   REG("DEBUG_REG");
   REG("CONTROL_REG");
-  
+
   IMM("i8imm");
   IMM("i16imm");
   IMM("i16i8imm");
@@ -284,7 +284,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   IMM("i64i8imm");
   IMM("i64i32imm");
   IMM("SSECC");
-  
+
   // all R, I, R, I, R
   MEM("i8mem");
   MEM("i8mem_NOREX");
@@ -306,12 +306,12 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   MEM("f128mem");
   MEM("f256mem");
   MEM("opaque512mem");
-  
+
   // all R, I, R, I
   LEA("lea32mem");
   LEA("lea64_32mem");
   LEA("lea64mem");
-  
+
   // all I
   PCR("i16imm_pcrel");
   PCR("i32imm_pcrel");
@@ -322,7 +322,12 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   PCR("offset32");
   PCR("offset64");
   PCR("brtarget");
-  
+  PCR("uncondbrtarget");
+  PCR("bltarget");
+
+  // all I, ARM mode only, conditional/unconditional
+  PCR("br_target");
+  PCR("bl_target");
   return 1;
 }
 
@@ -344,19 +349,19 @@ static void X86PopulateOperands(
   const CodeGenInstruction &inst) {
   if (!inst.TheDef->isSubClassOf("X86Inst"))
     return;
-  
+
   unsigned int index;
-  unsigned int numOperands = inst.OperandList.size();
-  
+  unsigned int numOperands = inst.Operands.size();
+
   for (index = 0; index < numOperands; ++index) {
-    const CodeGenInstruction::OperandInfo &operandInfo = 
-      inst.OperandList[index];
+    const CGIOperandList::OperandInfo &operandInfo = inst.Operands[index];
     Record &rec = *operandInfo.Rec;
-    
-    if (X86TypeFromOpName(operandTypes[index], rec.getName())) {
+
+    if (X86TypeFromOpName(operandTypes[index], rec.getName()) &&
+        !rec.isSubClassOf("PointerLikeRegClass")) {
       errs() << "Operand type: " << rec.getName().c_str() << "\n";
       errs() << "Operand name: " << operandInfo.Name.c_str() << "\n";
-      errs() << "Instruction mame: " << inst.TheDef->getName().c_str() << "\n";
+      errs() << "Instruction name: " << inst.TheDef->getName().c_str() << "\n";
       llvm_unreachable("Unhandled type");
     }
   }
@@ -375,9 +380,9 @@ static inline void decorate1(
   const char *opName,
   const char *opFlag) {
   unsigned opIndex;
-  
-  opIndex = inst.getOperandNamed(std::string(opName));
-  
+
+  opIndex = inst.Operands.getOperandNamed(std::string(opName));
+
   operandFlags[opIndex]->addEntry(opFlag);
 }
 
@@ -414,7 +419,7 @@ static inline void decorate1(
 }
 
 /// X86ExtractSemantics - Performs various checks on the name of an X86
-///   instruction to determine what sort of an instruction it is and then adds 
+///   instruction to determine what sort of an instruction it is and then adds
 ///   the appropriate flags to the instruction and its operands
 ///
 /// @arg instType     - A reference to the type for the instruction as a whole
@@ -425,7 +430,7 @@ static void X86ExtractSemantics(
   FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
   const CodeGenInstruction &inst) {
   const std::string &name = inst.TheDef->getName();
-    
+
   if (name.find("MOV") != name.npos) {
     if (name.find("MOV_V") != name.npos) {
       // ignore (this is a pseudoinstruction)
@@ -450,7 +455,7 @@ static void X86ExtractSemantics(
       MOV("src", "dst");
     }
   }
-  
+
   if (name.find("JMP") != name.npos ||
       name.find("J") == 0) {
     if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
@@ -459,10 +464,14 @@ static void X86ExtractSemantics(
       BRANCH("dst");
     }
   }
-  
+
   if (name.find("PUSH") != name.npos) {
-    if (name.find("FS") != name.npos ||
-        name.find("GS") != name.npos) {
+    if (name.find("CS") != name.npos ||
+        name.find("DS") != name.npos ||
+        name.find("ES") != name.npos ||
+        name.find("FS") != name.npos ||
+        name.find("GS") != name.npos ||
+        name.find("SS") != name.npos) {
       instType.set("kInstructionTypePush");
       // TODO add support for fixed operands
     } else if (name.find("F") != name.npos) {
@@ -477,12 +486,16 @@ static void X86ExtractSemantics(
       PUSH("reg");
     }
   }
-  
+
   if (name.find("POP") != name.npos) {
     if (name.find("POPCNT") != name.npos) {
       // ignore (not a real pop)
-    } else if (name.find("FS") != name.npos ||
-             name.find("GS") != name.npos) {
+    } else if (name.find("CS") != name.npos ||
+               name.find("DS") != name.npos ||
+               name.find("ES") != name.npos ||
+               name.find("FS") != name.npos ||
+               name.find("GS") != name.npos ||
+               name.find("SS") != name.npos) {
       instType.set("kInstructionTypePop");
       // TODO add support for fixed operands
     } else if (name.find("F") != name.npos) {
@@ -495,7 +508,7 @@ static void X86ExtractSemantics(
       POP("reg");
     }
   }
-  
+
   if (name.find("CALL") != name.npos) {
     if (name.find("ADJ") != name.npos) {
       // ignore (not a call)
@@ -509,7 +522,7 @@ static void X86ExtractSemantics(
       CALL("dst");
     }
   }
-  
+
   if (name.find("RET") != name.npos) {
     RETURN();
   }
@@ -553,12 +566,20 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("QPR");
   REG("QQPR");
   REG("QQQQPR");
-  
+
   IMM("i32imm");
+  IMM("i32imm_hilo16");
   IMM("bf_inv_mask_imm");
+  IMM("lsb_pos_imm");
+  IMM("width_imm");
   IMM("jtblock_operand");
   IMM("nohash_imm");
+  IMM("p_imm");
+  IMM("c_imm");
+  IMM("imod_op");
+  IMM("iflags_op");
   IMM("cpinst_operand");
+  IMM("setend_op");
   IMM("cps_opt");
   IMM("vfp_f64imm");
   IMM("vfp_f32imm");
@@ -566,46 +587,73 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   IMM("msr_mask");
   IMM("neg_zero");
   IMM("imm0_31");
+  IMM("imm0_31_m1");
   IMM("nModImm");
   IMM("imm0_4095");
   IMM("jt2block_operand");
   IMM("t_imm_s4");
   IMM("pclabel");
+  IMM("adrlabel");
+  IMM("t_adrlabel");
+  IMM("t2adrlabel");
   IMM("shift_imm");
-  
+  IMM("neon_vcvt_imm32");
+
   MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
+  MISC("uncondbrtarget", "kOperandTypeARMBranchTarget");           // ?
+  MISC("t_brtarget", "kOperandTypeARMBranchTarget");              // ?
+  MISC("t_bcctarget", "kOperandTypeARMBranchTarget");             // ?
+  MISC("t_cbtarget", "kOperandTypeARMBranchTarget");              // ?
+  MISC("bltarget", "kOperandTypeARMBranchTarget");                // ?
+
+  MISC("br_target", "kOperandTypeARMBranchTarget");                // ?
+  MISC("bl_target", "kOperandTypeARMBranchTarget");                // ?
+
+  MISC("t_bltarget", "kOperandTypeARMBranchTarget");              // ?
+  MISC("t_blxtarget", "kOperandTypeARMBranchTarget");             // ?
   MISC("so_reg", "kOperandTypeARMSoReg");                         // R, R, I
+  MISC("shift_so_reg", "kOperandTypeARMSoReg");                   // R, R, I
   MISC("t2_so_reg", "kOperandTypeThumb2SoReg");                   // R, I
   MISC("so_imm", "kOperandTypeARMSoImm");                         // I
+  MISC("rot_imm", "kOperandTypeARMRotImm");                       // I
   MISC("t2_so_imm", "kOperandTypeThumb2SoImm");                   // I
   MISC("so_imm2part", "kOperandTypeARMSoImm2Part");               // I
   MISC("pred", "kOperandTypeARMPredicate");                       // I, R
   MISC("it_pred", "kOperandTypeARMPredicate");                    // I
+  MISC("addrmode_imm12", "kOperandTypeAddrModeImm12");            // R, I
+  MISC("ldst_so_reg", "kOperandTypeLdStSOReg");                   // R, R, I
   MISC("addrmode2", "kOperandTypeARMAddrMode2");                  // R, R, I
   MISC("am2offset", "kOperandTypeARMAddrMode2Offset");            // R, I
   MISC("addrmode3", "kOperandTypeARMAddrMode3");                  // R, R, I
   MISC("am3offset", "kOperandTypeARMAddrMode3Offset");            // R, I
-  MISC("addrmode4", "kOperandTypeARMAddrMode4");                  // R, I
+  MISC("ldstm_mode", "kOperandTypeARMLdStmMode");                 // I
   MISC("addrmode5", "kOperandTypeARMAddrMode5");                  // R, I
   MISC("addrmode6", "kOperandTypeARMAddrMode6");                  // R, R, I, I
   MISC("am6offset", "kOperandTypeARMAddrMode6Offset");            // R, I, I
+  MISC("addrmode6dup", "kOperandTypeARMAddrMode6");               // R, R, I, I
   MISC("addrmodepc", "kOperandTypeARMAddrModePC");                // R, I
   MISC("reglist", "kOperandTypeARMRegisterList");                 // I, R, ...
+  MISC("dpr_reglist", "kOperandTypeARMDPRRegisterList");          // I, R, ...
+  MISC("spr_reglist", "kOperandTypeARMSPRRegisterList");          // I, R, ...
   MISC("it_mask", "kOperandTypeThumbITMask");                     // I
   MISC("t2addrmode_imm8", "kOperandTypeThumb2AddrModeImm8");      // R, I
   MISC("t2am_imm8_offset", "kOperandTypeThumb2AddrModeImm8Offset");//I
   MISC("t2addrmode_imm12", "kOperandTypeThumb2AddrModeImm12");    // R, I
   MISC("t2addrmode_so_reg", "kOperandTypeThumb2AddrModeSoReg");   // R, R, I
   MISC("t2addrmode_imm8s4", "kOperandTypeThumb2AddrModeImm8s4");  // R, I
-  MISC("t2am_imm8s4_offset", "kOperandTypeThumb2AddrModeImm8s4Offset");  
+  MISC("t2am_imm8s4_offset", "kOperandTypeThumb2AddrModeImm8s4Offset");
                                                                   // R, I
   MISC("tb_addrmode", "kOperandTypeARMTBAddrMode");               // I
-  MISC("t_addrmode_s1", "kOperandTypeThumbAddrModeS1");           // R, I, R
-  MISC("t_addrmode_s2", "kOperandTypeThumbAddrModeS2");           // R, I, R
-  MISC("t_addrmode_s4", "kOperandTypeThumbAddrModeS4");           // R, I, R
+  MISC("t_addrmode_rrs1", "kOperandTypeThumbAddrModeRegS");       // R, R
+  MISC("t_addrmode_rrs2", "kOperandTypeThumbAddrModeRegS");       // R, R
+  MISC("t_addrmode_rrs4", "kOperandTypeThumbAddrModeRegS");       // R, R
+  MISC("t_addrmode_is1", "kOperandTypeThumbAddrModeImmS");        // R, I
+  MISC("t_addrmode_is2", "kOperandTypeThumbAddrModeImmS");        // R, I
+  MISC("t_addrmode_is4", "kOperandTypeThumbAddrModeImmS");        // R, I
   MISC("t_addrmode_rr", "kOperandTypeThumbAddrModeRR");           // R, R
   MISC("t_addrmode_sp", "kOperandTypeThumbAddrModeSP");           // R, I
-  
+  MISC("t_addrmode_pc", "kOperandTypeThumbAddrModePC");           // R, I
+
   return 1;
 }
 
@@ -631,25 +679,24 @@ static void ARMPopulateOperands(
   if (!inst.TheDef->isSubClassOf("InstARM") &&
       !inst.TheDef->isSubClassOf("InstThumb"))
     return;
-  
+
   unsigned int index;
-  unsigned int numOperands = inst.OperandList.size();
-  
+  unsigned int numOperands = inst.Operands.size();
+
   if (numOperands > EDIS_MAX_OPERANDS) {
-    errs() << "numOperands == " << numOperands << " > " << 
+    errs() << "numOperands == " << numOperands << " > " <<
       EDIS_MAX_OPERANDS << '\n';
     llvm_unreachable("Too many operands");
   }
-  
+
   for (index = 0; index < numOperands; ++index) {
-    const CodeGenInstruction::OperandInfo &operandInfo = 
-    inst.OperandList[index];
+    const CGIOperandList::OperandInfo &operandInfo = inst.Operands[index];
     Record &rec = *operandInfo.Rec;
-    
+
     if (ARMFlagFromOpName(operandTypes[index], rec.getName())) {
       errs() << "Operand type: " << rec.getName() << '\n';
       errs() << "Operand name: " << operandInfo.Name << '\n';
-      errs() << "Instruction mame: " << inst.TheDef->getName() << '\n';
+      errs() << "Instruction name: " << inst.TheDef->getName() << '\n';
       llvm_unreachable("Unhandled type");
     }
   }
@@ -661,7 +708,7 @@ static void ARMPopulateOperands(
 }
 
 /// ARMExtractSemantics - Performs various checks on the name of an ARM
-///   instruction to determine what sort of an instruction it is and then adds 
+///   instruction to determine what sort of an instruction it is and then adds
 ///   the appropriate flags to the instruction and its operands
 ///
 /// @arg instType     - A reference to the type for the instruction as a whole
@@ -674,7 +721,7 @@ static void ARMExtractSemantics(
   FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
   const CodeGenInstruction &inst) {
   const std::string &name = inst.TheDef->getName();
-  
+
   if (name == "tBcc"   ||
       name == "tB"     ||
       name == "t2Bcc"  ||
@@ -683,7 +730,7 @@ static void ARMExtractSemantics(
       name == "tCBNZ") {
     BRANCH("target");
   }
-  
+
   if (name == "tBLr9"      ||
       name == "BLr9_pred"  ||
       name == "tBLXi_r9"   ||
@@ -692,9 +739,9 @@ static void ARMExtractSemantics(
       name == "t2BXJ"      ||
       name == "BXJ") {
     BRANCH("func");
-    
+
     unsigned opIndex;
-    opIndex = inst.getOperandNamed("func");
+    opIndex = inst.Operands.getOperandNamed("func");
     if (operandTypes[opIndex]->is("kOperandTypeImmediate"))
       operandTypes[opIndex]->set("kOperandTypeARMBranchTarget");
   }
@@ -702,7 +749,7 @@ static void ARMExtractSemantics(
 
 #undef BRANCH
 
-/// populateInstInfo - Fills an array of InstInfos with information about each 
+/// populateInstInfo - Fills an array of InstInfos with information about each
 ///   instruction in a target
 ///
 /// @arg infoArray  - The array of InstInfo objects to populate
@@ -711,45 +758,45 @@ static void populateInstInfo(CompoundConstantEmitter &infoArray,
                              CodeGenTarget &target) {
   const std::vector<const CodeGenInstruction*> &numberedInstructions =
     target.getInstructionsByEnumValue();
-  
+
   unsigned int index;
   unsigned int numInstructions = numberedInstructions.size();
-  
+
   for (index = 0; index < numInstructions; ++index) {
     const CodeGenInstruction& inst = *numberedInstructions[index];
-    
+
     CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
     infoArray.addEntry(infoStruct);
-    
+
     LiteralConstantEmitter *instType = new LiteralConstantEmitter;
     infoStruct->addEntry(instType);
-    
-    LiteralConstantEmitter *numOperandsEmitter = 
-      new LiteralConstantEmitter(inst.OperandList.size());
+
+    LiteralConstantEmitter *numOperandsEmitter =
+      new LiteralConstantEmitter(inst.Operands.size());
     infoStruct->addEntry(numOperandsEmitter);
-    
+
     CompoundConstantEmitter *operandTypeArray = new CompoundConstantEmitter;
     infoStruct->addEntry(operandTypeArray);
-    
+
     LiteralConstantEmitter *operandTypes[EDIS_MAX_OPERANDS];
-                         
+
     CompoundConstantEmitter *operandFlagArray = new CompoundConstantEmitter;
     infoStruct->addEntry(operandFlagArray);
-        
+
     FlagsConstantEmitter *operandFlags[EDIS_MAX_OPERANDS];
-    
-    for (unsigned operandIndex = 0; 
-         operandIndex < EDIS_MAX_OPERANDS; 
+
+    for (unsigned operandIndex = 0;
+         operandIndex < EDIS_MAX_OPERANDS;
          ++operandIndex) {
       operandTypes[operandIndex] = new LiteralConstantEmitter;
       operandTypeArray->addEntry(operandTypes[operandIndex]);
-      
+
       operandFlags[operandIndex] = new FlagsConstantEmitter;
       operandFlagArray->addEntry(operandFlags[operandIndex]);
     }
- 
+
     unsigned numSyntaxes = 0;
-    
+
     if (target.getName() == "X86") {
       X86PopulateOperands(operandTypes, inst);
       X86ExtractSemantics(*instType, operandFlags, inst);
@@ -760,24 +807,24 @@ static void populateInstInfo(CompoundConstantEmitter &infoArray,
       ARMExtractSemantics(*instType, operandTypes, operandFlags, inst);
       numSyntaxes = 1;
     }
-    
-    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;    
-    
+
+    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;
+
     infoStruct->addEntry(operandOrderArray);
-    
-    for (unsigned syntaxIndex = 0; 
-         syntaxIndex < EDIS_MAX_SYNTAXES; 
+
+    for (unsigned syntaxIndex = 0;
+         syntaxIndex < EDIS_MAX_SYNTAXES;
          ++syntaxIndex) {
-      CompoundConstantEmitter *operandOrder = 
+      CompoundConstantEmitter *operandOrder =
         new CompoundConstantEmitter(EDIS_MAX_OPERANDS);
-      
+
       operandOrderArray->addEntry(operandOrder);
-      
+
       if (syntaxIndex < numSyntaxes) {
         populateOperandOrder(operandOrder, inst, syntaxIndex);
       }
     }
-    
+
     infoStruct = NULL;
   }
 }
@@ -793,25 +840,30 @@ static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
   operandTypes.addEntry("kOperandTypeARMBranchTarget");
   operandTypes.addEntry("kOperandTypeARMSoReg");
   operandTypes.addEntry("kOperandTypeARMSoImm");
+  operandTypes.addEntry("kOperandTypeARMRotImm");
   operandTypes.addEntry("kOperandTypeARMSoImm2Part");
   operandTypes.addEntry("kOperandTypeARMPredicate");
+  operandTypes.addEntry("kOperandTypeAddrModeImm12");
+  operandTypes.addEntry("kOperandTypeLdStSOReg");
   operandTypes.addEntry("kOperandTypeARMAddrMode2");
   operandTypes.addEntry("kOperandTypeARMAddrMode2Offset");
   operandTypes.addEntry("kOperandTypeARMAddrMode3");
   operandTypes.addEntry("kOperandTypeARMAddrMode3Offset");
-  operandTypes.addEntry("kOperandTypeARMAddrMode4");
+  operandTypes.addEntry("kOperandTypeARMLdStmMode");
   operandTypes.addEntry("kOperandTypeARMAddrMode5");
   operandTypes.addEntry("kOperandTypeARMAddrMode6");
   operandTypes.addEntry("kOperandTypeARMAddrMode6Offset");
   operandTypes.addEntry("kOperandTypeARMAddrModePC");
   operandTypes.addEntry("kOperandTypeARMRegisterList");
+  operandTypes.addEntry("kOperandTypeARMDPRRegisterList");
+  operandTypes.addEntry("kOperandTypeARMSPRRegisterList");
   operandTypes.addEntry("kOperandTypeARMTBAddrMode");
   operandTypes.addEntry("kOperandTypeThumbITMask");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeS1");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeS2");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeS4");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS");
   operandTypes.addEntry("kOperandTypeThumbAddrModeRR");
   operandTypes.addEntry("kOperandTypeThumbAddrModeSP");
+  operandTypes.addEntry("kOperandTypeThumbAddrModePC");
   operandTypes.addEntry("kOperandTypeThumb2SoReg");
   operandTypes.addEntry("kOperandTypeThumb2SoImm");
   operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8");
@@ -821,16 +873,16 @@ static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
   operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4");
   operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4Offset");
   operandTypes.emit(o, i);
-  
+
   o << "\n";
-  
+
   EnumEmitter operandFlags("OperandFlags");
   operandFlags.addEntry("kOperandFlagSource");
   operandFlags.addEntry("kOperandFlagTarget");
   operandFlags.emitAsFlags(o, i);
-  
+
   o << "\n";
-  
+
   EnumEmitter instructionTypes("InstructionTypes");
   instructionTypes.addEntry("kInstructionTypeNone");
   instructionTypes.addEntry("kInstructionTypeMove");
@@ -840,25 +892,25 @@ static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
   instructionTypes.addEntry("kInstructionTypeCall");
   instructionTypes.addEntry("kInstructionTypeReturn");
   instructionTypes.emit(o, i);
-  
+
   o << "\n";
 }
 
 void EDEmitter::run(raw_ostream &o) {
   unsigned int i = 0;
-  
+
   CompoundConstantEmitter infoArray;
-  CodeGenTarget target;
-  
+  CodeGenTarget target(Records);
+
   populateInstInfo(infoArray, target);
-  
+
   emitCommonEnums(o, i);
-  
+
   o << "namespace {\n";
-  
+
   o << "llvm::EDInstInfo instInfo" << target.getName().c_str() << "[] = ";
   infoArray.emit(o, i);
   o << ";" << "\n";
-  
+
   o << "}\n";
 }
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 6c16fcfaa8a2..f01de1dcfce6 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -20,6 +20,7 @@
 #include "FastISelEmitter.h"
 #include "Record.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/VectorExtras.h"
 using namespace llvm;
 
@@ -65,23 +66,23 @@ struct OperandsSignature {
         return true;
       }
     }
-    
+
     const CodeGenRegisterClass *DstRC = 0;
-    
+
     for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
       TreePatternNode *Op = InstPatNode->getChild(i);
-      
+
       // For now, filter out any operand with a predicate.
       // For now, filter out any operand with multiple values.
       if (!Op->getPredicateFns().empty() ||
           Op->getNumTypes() != 1)
         return false;
-      
+
       assert(Op->hasTypeSet(0) && "Type infererence not done?");
       // For now, all the operands must have the same type.
       if (Op->getType(0) != VT)
         return false;
-      
+
       if (!Op->isLeaf()) {
         if (Op->getOperator()->getName() == "imm") {
           Operands.push_back("i");
@@ -107,7 +108,7 @@ struct OperandsSignature {
         RC = Target.getRegisterClassForRegister(OpLeafRec);
       else
         return false;
-        
+
       // For now, this needs to be a register class of some sort.
       if (!RC)
         return false;
@@ -212,7 +213,7 @@ class FastISelMap {
   typedef std::map<MVT::SimpleValueType, PredMap> RetPredMap;
   typedef std::map<MVT::SimpleValueType, RetPredMap> TypeRetPredMap;
   typedef std::map<std::string, TypeRetPredMap> OpcodeTypeRetPredMap;
-  typedef std::map<OperandsSignature, OpcodeTypeRetPredMap> 
+  typedef std::map<OperandsSignature, OpcodeTypeRetPredMap>
             OperandsOpcodeTypeRetPredMap;
 
   OperandsOpcodeTypeRetPredMap SimplePatterns;
@@ -263,9 +264,9 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     if (!Op->isSubClassOf("Instruction"))
       continue;
     CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
-    if (II.OperandList.empty())
+    if (II.Operands.size() == 0)
       continue;
-      
+
     // For now, ignore multi-instruction patterns.
     bool MultiInsts = false;
     for (unsigned i = 0, e = Dst->getNumChildren(); i != e; ++i) {
@@ -285,7 +286,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     const CodeGenRegisterClass *DstRC = 0;
     std::string SubRegNo;
     if (Op->getName() != "EXTRACT_SUBREG") {
-      Record *Op0Rec = II.OperandList[0].Rec;
+      Record *Op0Rec = II.Operands[0].Rec;
       if (!Op0Rec->isSubClassOf("RegisterClass"))
         continue;
       DstRC = &Target.getRegisterClass(Op0Rec);
@@ -295,7 +296,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
       // If this isn't a leaf, then continue since the register classes are
       // a bit too complicated for now.
       if (!Dst->getChild(1)->isLeaf()) continue;
-      
+
       DefInit *SR = dynamic_cast<DefInit*>(Dst->getChild(1)->getLeafValue());
       if (SR)
         SubRegNo = getQualifiedName(SR->getDef());
@@ -310,7 +311,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
 
     // Ignore multiple result nodes for now.
     if (InstPatNode->getNumTypes() > 1) continue;
-    
+
     Record *InstPatOp = InstPatNode->getOperator();
     std::string OpcodeName = getOpcodeName(InstPatOp, CGP);
     MVT::SimpleValueType RetVT = MVT::isVoid;
@@ -334,7 +335,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
     OperandsSignature Operands;
     if (!Operands.initialize(InstPatNode, Target, VT))
       continue;
-    
+
     std::vector<std::string>* PhysRegInputs = new std::vector<std::string>();
     if (!InstPatNode->isLeaf() &&
         (InstPatNode->getOperator()->getName() == "imm" ||
@@ -347,7 +348,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
           PhysRegInputs->push_back("");
           continue;
         }
-        
+
         DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
         Record *OpLeafRec = OpDI->getDef();
         std::string PhysReg;
@@ -355,7 +356,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
           PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
                      "Namespace")->getValue())->getValue();
           PhysReg += "::";
-          
+
           std::vector<CodeGenRegister> Regs = Target.getRegisters();
           for (unsigned i = 0; i < Regs.size(); ++i) {
             if (Regs[i].TheDef == OpLeafRec) {
@@ -364,7 +365,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
             }
           }
         }
-      
+
         PhysRegInputs->push_back(PhysReg);
       }
     } else
@@ -380,9 +381,10 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
       SubRegNo,
       PhysRegInputs
     };
-    assert(!SimplePatterns[Operands][OpcodeName][VT][RetVT]
-            .count(PredicateCheck) &&
-           "Duplicate pattern!");
+    if (SimplePatterns[Operands][OpcodeName][VT][RetVT]
+            .count(PredicateCheck))
+      throw TGError(Pattern.getSrcRecord()->getLoc(), "Duplicate record!");
+
     SimplePatterns[Operands][OpcodeName][VT][RetVT][PredicateCheck] = Memo;
   }
 }
@@ -429,7 +431,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                  PI != PE; ++PI) {
               std::string PredicateCheck = PI->first;
               const InstructionMemo &Memo = PI->second;
-  
+
               if (PredicateCheck.empty()) {
                 assert(!HasPred &&
                        "Multiple instructions match, at least one has "
@@ -439,14 +441,14 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                 OS << "  ";
                 HasPred = true;
               }
-              
+
               for (unsigned i = 0; i < Memo.PhysRegs->size(); ++i) {
                 if ((*Memo.PhysRegs)[i] != "")
                   OS << "  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, "
                      << "TII.get(TargetOpcode::COPY), "
                      << (*Memo.PhysRegs)[i] << ").addReg(Op" << i << ");\n";
               }
-              
+
               OS << "  return FastEmitInst_";
               if (Memo.SubRegNo.empty()) {
                 Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
@@ -462,10 +464,10 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
                 OS << Memo.SubRegNo;
                 OS << ");\n";
               }
-              
+
               if (HasPred)
                 OS << "  }\n";
-              
+
             }
             // Return 0 if none of the predicates were satisfied.
             if (HasPred)
@@ -473,7 +475,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
             OS << "}\n";
             OS << "\n";
           }
-          
+
           // Emit one function for the type that demultiplexes on return type.
           OS << "unsigned FastEmit_"
              << getLegalCName(Opcode) << "_"
@@ -496,7 +498,7 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
             OS << ");\n";
           }
           OS << "  default: return 0;\n}\n}\n\n";
-          
+
         } else {
           // Non-variadic return type.
           OS << "unsigned FastEmit_"
@@ -508,13 +510,13 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
             OS << ", ";
           Operands.PrintParameters(OS);
           OS << ") {\n";
-          
+
           OS << "  if (RetVT.SimpleTy != " << getName(RM.begin()->first)
              << ")\n    return 0;\n";
-          
+
           const PredMap &PM = RM.begin()->second;
           bool HasPred = false;
-          
+
           // Emit code for each possible instruction. There may be
           // multiple if there are subtarget concerns.
           for (PredMap::const_iterator PI = PM.begin(), PE = PM.end(); PI != PE;
@@ -531,16 +533,16 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
               OS << "  ";
               HasPred = true;
             }
-            
+
             for (unsigned i = 0; i < Memo.PhysRegs->size(); ++i) {
               if ((*Memo.PhysRegs)[i] != "")
                 OS << "  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, "
                    << "TII.get(TargetOpcode::COPY), "
                    << (*Memo.PhysRegs)[i] << ").addReg(Op" << i << ");\n";
             }
-            
+
             OS << "  return FastEmitInst_";
-            
+
             if (Memo.SubRegNo.empty()) {
               Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
               OS << "(" << InstNS << Memo.Name << ", ";
@@ -554,11 +556,11 @@ void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
               OS << Memo.SubRegNo;
               OS << ");\n";
             }
-            
+
              if (HasPred)
                OS << "  }\n";
           }
-          
+
           // Return 0 if none of the predicates were satisfied.
           if (HasPred)
             OS << "  return 0;\n";
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
new file mode 100644
index 000000000000..2c222b39b137
--- /dev/null
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -0,0 +1,1372 @@
+//===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// It contains the tablegen backend that emits the decoder functions for
+// targets with fixed length instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "decoder-emitter"
+
+#include "FixedLenDecoderEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+#include <map>
+#include <string>
+
+using namespace llvm;
+
+// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
+// for a bit value.
+//
+// BIT_UNFILTERED is used as the init value for a filter position.  It is used
+// only for filter processings.
+typedef enum {
+  BIT_TRUE,      // '1'
+  BIT_FALSE,     // '0'
+  BIT_UNSET,     // '?'
+  BIT_UNFILTERED // unfiltered
+} bit_value_t;
+
+static bool ValueSet(bit_value_t V) {
+  return (V == BIT_TRUE || V == BIT_FALSE);
+}
+static bool ValueNotSet(bit_value_t V) {
+  return (V == BIT_UNSET);
+}
+static int Value(bit_value_t V) {
+  return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
+}
+static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
+    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
+
+  // The bit is uninitialized.
+  return BIT_UNSET;
+}
+// Prints the bit value for each position.
+static void dumpBits(raw_ostream &o, BitsInit &bits) {
+  unsigned index;
+
+  for (index = bits.getNumBits(); index > 0; index--) {
+    switch (bitFromBits(bits, index - 1)) {
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    default:
+      assert(0 && "unexpected return value from bitFromBits");
+    }
+  }
+}
+
+static BitsInit &getBitsField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return *bits;
+}
+
+// Forward declaration.
+class FilterChooser;
+
+// FIXME: Possibly auto-detected?
+#define BIT_WIDTH 32
+
+// Representation of the instruction to work on.
+typedef bit_value_t insn_t[BIT_WIDTH];
+
+/// Filter - Filter works with FilterChooser to produce the decoding tree for
+/// the ISA.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree in a certain level.  Each case stmt delegates to an inferior
+/// FilterChooser to decide what further decoding logic to employ, or in another
+/// words, what other remaining bits to look at.  The FilterChooser eventually
+/// chooses a best Filter to do its job.
+///
+/// This recursive scheme ends when the number of Opcodes assigned to the
+/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
+/// the Filter/FilterChooser combo does not know how to distinguish among the
+/// Opcodes assigned.
+///
+/// An example of a conflict is
+///
+/// Conflict:
+///                     111101000.00........00010000....
+///                     111101000.00........0001........
+///                     1111010...00........0001........
+///                     1111010...00....................
+///                     1111010.........................
+///                     1111............................
+///                     ................................
+///     VST4q8a         111101000_00________00010000____
+///     VST4q8b         111101000_00________00010000____
+///
+/// The Debug output shows the path that the decoding tree follows to reach the
+/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
+/// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters.
+///
+/// The encoding info in the .td files does not specify this meta information,
+/// which could have been used by the decoder to resolve the conflict.  The
+/// decoder could try to decode the even/odd register numbering and assign to
+/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
+/// version and return the Opcode since the two have the same Asm format string.
+class Filter {
+protected:
+  FilterChooser *Owner; // points to the FilterChooser who owns this filter
+  unsigned StartBit; // the starting bit position
+  unsigned NumBits; // number of bits to filter
+  bool Mixed; // a mixed region contains both set and unset bits
+
+  // Map of well-known segment value to the set of uid's with that value.
+  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
+
+  // Set of uid's with non-constant segment values.
+  std::vector<unsigned> VariableInstructions;
+
+  // Map of well-known segment value to its delegate.
+  std::map<unsigned, FilterChooser*> FilterChooserMap;
+
+  // Number of instructions which fall under FilteredInstructions category.
+  unsigned NumFiltered;
+
+  // Keeps track of the last opcode in the filtered bucket.
+  unsigned LastOpcFiltered;
+
+  // Number of instructions which fall under VariableInstructions category.
+  unsigned NumVariable;
+
+public:
+  unsigned getNumFiltered() { return NumFiltered; }
+  unsigned getNumVariable() { return NumVariable; }
+  unsigned getSingletonOpc() {
+    assert(NumFiltered == 1);
+    return LastOpcFiltered;
+  }
+  // Return the filter chooser for the group of instructions without constant
+  // segment values.
+  FilterChooser &getVariableFC() {
+    assert(NumFiltered == 1);
+    assert(FilterChooserMap.size() == 1);
+    return *(FilterChooserMap.find((unsigned)-1)->second);
+  }
+
+  Filter(const Filter &f);
+  Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
+
+  ~Filter();
+
+  // Divides the decoding task into sub tasks and delegates them to the
+  // inferior FilterChooser's.
+  //
+  // A special case arises when there's only one entry in the filtered
+  // instructions.  In order to unambiguously decode the singleton, we need to
+  // match the remaining undecoded encoding bits against the singleton.
+  void recurse();
+
+  // Emit code to decode instructions given a segment or segments of bits.
+  void emit(raw_ostream &o, unsigned &Indentation);
+
+  // Returns the number of fanout produced by the filter.  More fanout implies
+  // the filter distinguishes more categories of instructions.
+  unsigned usefulness() const;
+}; // End of class Filter
+
+// These are states of our finite state machines used in FilterChooser's
+// filterProcessor() which produces the filter candidates to use.
+typedef enum {
+  ATTR_NONE,
+  ATTR_FILTERED,
+  ATTR_ALL_SET,
+  ATTR_ALL_UNSET,
+  ATTR_MIXED
+} bitAttr_t;
+
+/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
+/// in order to perform the decoding of instructions at the current level.
+///
+/// Decoding proceeds from the top down.  Based on the well-known encoding bits
+/// of instructions available, FilterChooser builds up the possible Filters that
+/// can further the task of decoding by distinguishing among the remaining
+/// candidate instructions.
+///
+/// Once a filter has been chosen, it is called upon to divide the decoding task
+/// into sub-tasks and delegates them to its inferior FilterChoosers for further
+/// processings.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree.  And each case is delegated to an inferior FilterChooser to
+/// decide what further remaining bits to look at.
+class FilterChooser {
+protected:
+  friend class Filter;
+
+  // Vector of codegen instructions to choose our filter.
+  const std::vector<const CodeGenInstruction*> &AllInstructions;
+
+  // Vector of uid's for this filter chooser to work on.
+  const std::vector<unsigned> Opcodes;
+
+  // Lookup table for the operand decoding of instructions.
+  std::map<unsigned, std::vector<OperandInfo> > &Operands;
+
+  // Vector of candidate filters.
+  std::vector<Filter> Filters;
+
+  // Array of bit values passed down from our parent.
+  // Set to all BIT_UNFILTERED's for Parent == NULL.
+  bit_value_t FilterBitValues[BIT_WIDTH];
+
+  // Links to the FilterChooser above us in the decoding tree.
+  FilterChooser *Parent;
+
+  // Index of the best filter from Filters.
+  int BestIndex;
+
+public:
+  FilterChooser(const FilterChooser &FC) :
+    AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
+      Operands(FC.Operands), Filters(FC.Filters), Parent(FC.Parent),
+      BestIndex(FC.BestIndex) {
+    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs,
+    std::map<unsigned, std::vector<OperandInfo> > &Ops) :
+      AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
+      Parent(NULL), BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = BIT_UNFILTERED;
+
+    doFilter();
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs,
+        std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
+                FilterChooser &parent) :
+      AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+      Filters(), Parent(&parent), BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = ParentFilterBitValues[i];
+
+    doFilter();
+  }
+
+  // The top level filter chooser has NULL as its parent.
+  bool isTopLevel() { return Parent == NULL; }
+
+  // Emit the top level typedef and decodeInstruction() function.
+  void emitTop(raw_ostream &o, unsigned Indentation);
+
+protected:
+  // Populates the insn given the uid.
+  void insnWithID(insn_t &Insn, unsigned Opcode) const {
+    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      Insn[i] = bitFromBits(Bits, i);
+  }
+
+  // Returns the record name.
+  const std::string &nameWithID(unsigned Opcode) const {
+    return AllInstructions[Opcode]->TheDef->getName();
+  }
+
+  // Populates the field of the insn given the start position and the number of
+  // consecutive bits to scan for.
+  //
+  // Returns false if there exists any uninitialized bit value in the range.
+  // Returns true, otherwise.
+  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
+      unsigned NumBits) const;
+
+  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+  /// filter array as a series of chars.
+  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]);
+
+  /// dumpStack - dumpStack traverses the filter chooser chain and calls
+  /// dumpFilterArray on each filter chooser up to the top level one.
+  void dumpStack(raw_ostream &o, const char *prefix);
+
+  Filter &bestFilter() {
+    assert(BestIndex != -1 && "BestIndex not set");
+    return Filters[BestIndex];
+  }
+
+  // Called from Filter::recurse() when singleton exists.  For debug purpose.
+  void SingletonExists(unsigned Opc);
+
+  bool PositionFiltered(unsigned i) {
+    return ValueSet(FilterBitValues[i]);
+  }
+
+  // Calculates the island(s) needed to decode the instruction.
+  // This returns a lit of undecoded bits of an instructions, for example,
+  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+  // decoded bits in order to verify that the instruction matches the Opcode.
+  unsigned getIslands(std::vector<unsigned> &StartBits,
+      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+      insn_t &Insn);
+
+  // Emits code to decode the singleton.  Return true if we have matched all the
+  // well-known bits.
+  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+
+  // Emits code to decode the singleton, and then to decode the rest.
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,Filter &Best);
+
+  // Assign a single filter and run with it.
+  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
+      bool mixed);
+
+  // reportRegion is a helper function for filterProcessor to mark a region as
+  // eligible for use as a filter region.
+  void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
+      bool AllowMixed);
+
+  // FilterProcessor scans the well-known encoding bits of the instructions and
+  // builds up a list of candidate filters.  It chooses the best filter and
+  // recursively descends down the decoding tree.
+  bool filterProcessor(bool AllowMixed, bool Greedy = true);
+
+  // Decides on the best configuration of filter(s) to use in order to decode
+  // the instructions.  A conflict of instructions may occur, in which case we
+  // dump the conflict set to the standard error.
+  void doFilter();
+
+  // Emits code to decode our share of instructions.  Returns true if the
+  // emitted code causes a return, which occurs if we know how to decode
+  // the instruction at this level or the instruction is not decodeable.
+  bool emit(raw_ostream &o, unsigned &Indentation);
+};
+
+///////////////////////////
+//                       //
+// Filter Implmenetation //
+//                       //
+///////////////////////////
+
+Filter::Filter(const Filter &f) :
+  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
+  FilteredInstructions(f.FilteredInstructions),
+  VariableInstructions(f.VariableInstructions),
+  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
+  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
+}
+
+Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
+    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
+                  Mixed(mixed) {
+  assert(StartBit + NumBits - 1 < BIT_WIDTH);
+
+  NumFiltered = 0;
+  LastOpcFiltered = 0;
+  NumVariable = 0;
+
+  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
+    insn_t Insn;
+
+    // Populates the insn given the uid.
+    Owner->insnWithID(Insn, Owner->Opcodes[i]);
+
+    uint64_t Field;
+    // Scans the segment for possibly well-specified encoding bits.
+    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
+
+    if (ok) {
+      // The encoding bits are well-known.  Lets add the uid of the
+      // instruction into the bucket keyed off the constant field value.
+      LastOpcFiltered = Owner->Opcodes[i];
+      FilteredInstructions[Field].push_back(LastOpcFiltered);
+      ++NumFiltered;
+    } else {
+      // Some of the encoding bit(s) are unspecfied.  This contributes to
+      // one additional member of "Variable" instructions.
+      VariableInstructions.push_back(Owner->Opcodes[i]);
+      ++NumVariable;
+    }
+  }
+
+  assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
+         && "Filter returns no instruction categories");
+}
+
+Filter::~Filter() {
+  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+    delete filterIterator->second;
+  }
+}
+
+// Divides the decoding task into sub tasks and delegates them to the
+// inferior FilterChooser's.
+//
+// A special case arises when there's only one entry in the filtered
+// instructions.  In order to unambiguously decode the singleton, we need to
+// match the remaining undecoded encoding bits against the singleton.
+void Filter::recurse() {
+  std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
+
+  bit_value_t BitValueArray[BIT_WIDTH];
+  // Starts by inheriting our parent filter chooser's filter bit values.
+  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
+
+  unsigned bitIndex;
+
+  if (VariableInstructions.size()) {
+    // Conservatively marks each segment position as BIT_UNSET.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+      BitValueArray[StartBit + bitIndex] = BIT_UNSET;
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // group of instructions whose segment values are variable.
+    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+                              (unsigned)-1,
+                              new FilterChooser(Owner->AllInstructions,
+                                                VariableInstructions,
+                                                Owner->Operands,
+                                                BitValueArray,
+                                                *Owner)
+                              ));
+  }
+
+  // No need to recurse for a singleton filtered instruction.
+  // See also Filter::emit().
+  if (getNumFiltered() == 1) {
+    //Owner->SingletonExists(LastOpcFiltered);
+    assert(FilterChooserMap.size() == 1);
+    return;
+  }
+
+  // Otherwise, create sub choosers.
+  for (mapIterator = FilteredInstructions.begin();
+       mapIterator != FilteredInstructions.end();
+       mapIterator++) {
+
+    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+      if (mapIterator->first & (1ULL << bitIndex))
+        BitValueArray[StartBit + bitIndex] = BIT_TRUE;
+      else
+        BitValueArray[StartBit + bitIndex] = BIT_FALSE;
+    }
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // category of instructions.
+    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+                              mapIterator->first,
+                              new FilterChooser(Owner->AllInstructions,
+                                                mapIterator->second,
+                                                Owner->Operands,
+                                                BitValueArray,
+                                                *Owner)
+                              ));
+  }
+}
+
+// Emit code to decode instructions given a segment or segments of bits.
+void Filter::emit(raw_ostream &o, unsigned &Indentation) {
+  o.indent(Indentation) << "// Check Inst{";
+
+  if (NumBits > 1)
+    o << (StartBit + NumBits - 1) << '-';
+
+  o << StartBit << "} ...\n";
+
+  o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
+                        << StartBit << ", " << NumBits << ")) {\n";
+
+  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+
+  bool DefaultCase = false;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+
+    // Field value -1 implies a non-empty set of variable instructions.
+    // See also recurse().
+    if (filterIterator->first == (unsigned)-1) {
+      DefaultCase = true;
+
+      o.indent(Indentation) << "default:\n";
+      o.indent(Indentation) << "  break; // fallthrough\n";
+
+      // Closing curly brace for the switch statement.
+      // This is unconventional because we want the default processing to be
+      // performed for the fallthrough cases as well, i.e., when the "cases"
+      // did not prove a decoded instruction.
+      o.indent(Indentation) << "}\n";
+
+    } else
+      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+
+    // We arrive at a category of instructions with the same segment value.
+    // Now delegate to the sub filter chooser for further decodings.
+    // The case may fallthrough, which happens if the remaining well-known
+    // encoding bits do not match exactly.
+    if (!DefaultCase) { ++Indentation; ++Indentation; }
+
+    bool finished = filterIterator->second->emit(o, Indentation);
+    // For top level default case, there's no need for a break statement.
+    if (Owner->isTopLevel() && DefaultCase)
+      break;
+    if (!finished)
+      o.indent(Indentation) << "break;\n";
+
+    if (!DefaultCase) { --Indentation; --Indentation; }
+  }
+
+  // If there is no default case, we still need to supply a closing brace.
+  if (!DefaultCase) {
+    // Closing curly brace for the switch statement.
+    o.indent(Indentation) << "}\n";
+  }
+}
+
+// Returns the number of fanout produced by the filter.  More fanout implies
+// the filter distinguishes more categories of instructions.
+unsigned Filter::usefulness() const {
+  if (VariableInstructions.size())
+    return FilteredInstructions.size();
+  else
+    return FilteredInstructions.size() + 1;
+}
+
+//////////////////////////////////
+//                              //
+// Filterchooser Implementation //
+//                              //
+//////////////////////////////////
+
+// Emit the top level typedef and decodeInstruction() function.
+void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation) {
+  switch (BIT_WIDTH) {
+  case 8:
+    o.indent(Indentation) << "typedef uint8_t field_t;\n";
+    break;
+  case 16:
+    o.indent(Indentation) << "typedef uint16_t field_t;\n";
+    break;
+  case 32:
+    o.indent(Indentation) << "typedef uint32_t field_t;\n";
+    break;
+  case 64:
+    o.indent(Indentation) << "typedef uint64_t field_t;\n";
+    break;
+  default:
+    assert(0 && "Unexpected instruction size!");
+  }
+
+  o << '\n';
+
+  o.indent(Indentation) << "static field_t " <<
+    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
+
+  o.indent(Indentation) << "{\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH
+                        << " && \"Instruction field out of bounds!\");\n";
+  o << '\n';
+  o.indent(Indentation) << "field_t fieldMask;\n";
+  o << '\n';
+  o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = (field_t)-1;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "else\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
+  --Indentation; --Indentation;
+
+  o << '\n';
+  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+
+  o.indent(Indentation) <<
+    "static bool decodeInstruction(MCInst &MI, field_t insn) {\n";
+  o.indent(Indentation) << "  unsigned tmp = 0;\n";
+
+  ++Indentation; ++Indentation;
+  // Emits code to decode the instructions.
+  emit(o, Indentation);
+
+  o << '\n';
+  o.indent(Indentation) << "return false;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+}
+
+// Populates the field of the insn given the start position and the number of
+// consecutive bits to scan for.
+//
+// Returns false if and on the first uninitialized bit value encountered.
+// Returns true, otherwise.
+bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
+    unsigned StartBit, unsigned NumBits) const {
+  Field = 0;
+
+  for (unsigned i = 0; i < NumBits; ++i) {
+    if (Insn[StartBit + i] == BIT_UNSET)
+      return false;
+
+    if (Insn[StartBit + i] == BIT_TRUE)
+      Field = Field | (1ULL << i);
+  }
+
+  return true;
+}
+
+/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+/// filter array as a series of chars.
+void FilterChooser::dumpFilterArray(raw_ostream &o,
+                                    bit_value_t (&filter)[BIT_WIDTH]) {
+  unsigned bitIndex;
+
+  for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) {
+    switch (filter[bitIndex - 1]) {
+    case BIT_UNFILTERED:
+      o << ".";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    }
+  }
+}
+
+/// dumpStack - dumpStack traverses the filter chooser chain and calls
+/// dumpFilterArray on each filter chooser up to the top level one.
+void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
+  FilterChooser *current = this;
+
+  while (current) {
+    o << prefix;
+    dumpFilterArray(o, current->FilterBitValues);
+    o << '\n';
+    current = current->Parent;
+  }
+}
+
+// Called from Filter::recurse() when singleton exists.  For debug purpose.
+void FilterChooser::SingletonExists(unsigned Opc) {
+  insn_t Insn0;
+  insnWithID(Insn0, Opc);
+
+  errs() << "Singleton exists: " << nameWithID(Opc)
+         << " with its decoding dominating ";
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
+    if (Opcodes[i] == Opc) continue;
+    errs() << nameWithID(Opcodes[i]) << ' ';
+  }
+  errs() << '\n';
+
+  dumpStack(errs(), "\t\t");
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+}
+
+// Calculates the island(s) needed to decode the instruction.
+// This returns a list of undecoded bits of an instructions, for example,
+// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+// decoded bits in order to verify that the instruction matches the Opcode.
+unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
+    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+    insn_t &Insn) {
+  unsigned Num, BitNo;
+  Num = BitNo = 0;
+
+  uint64_t FieldVal = 0;
+
+  // 0: Init
+  // 1: Water (the bit value does not affect decoding)
+  // 2: Island (well-known bit value needed for decoding)
+  int State = 0;
+  int Val = -1;
+
+  for (unsigned i = 0; i < BIT_WIDTH; ++i) {
+    Val = Value(Insn[i]);
+    bool Filtered = PositionFiltered(i);
+    switch (State) {
+    default:
+      assert(0 && "Unreachable code!");
+      break;
+    case 0:
+    case 1:
+      if (Filtered || Val == -1)
+        State = 1; // Still in Water
+      else {
+        State = 2; // Into the Island
+        BitNo = 0;
+        StartBits.push_back(i);
+        FieldVal = Val;
+      }
+      break;
+    case 2:
+      if (Filtered || Val == -1) {
+        State = 1; // Into the Water
+        EndBits.push_back(i - 1);
+        FieldVals.push_back(FieldVal);
+        ++Num;
+      } else {
+        State = 2; // Still in Island
+        ++BitNo;
+        FieldVal = FieldVal | Val << BitNo;
+      }
+      break;
+    }
+  }
+  // If we are still in Island after the loop, do some housekeeping.
+  if (State == 2) {
+    EndBits.push_back(BIT_WIDTH - 1);
+    FieldVals.push_back(FieldVal);
+    ++Num;
+  }
+
+  assert(StartBits.size() == Num && EndBits.size() == Num &&
+         FieldVals.size() == Num);
+  return Num;
+}
+
+// Emits code to decode the singleton.  Return true if we have matched all the
+// well-known bits.
+bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                                         unsigned Opc) {
+  std::vector<unsigned> StartBits;
+  std::vector<unsigned> EndBits;
+  std::vector<uint64_t> FieldVals;
+  insn_t Insn;
+  insnWithID(Insn, Opc);
+
+  // Look for islands of undecoded bits of the singleton.
+  getIslands(StartBits, EndBits, FieldVals, Insn);
+
+  unsigned Size = StartBits.size();
+  unsigned I, NumBits;
+
+  // If we have matched all the well-known bits, just issue a return.
+  if (Size == 0) {
+    o.indent(Indentation) << "{\n";
+    o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
+    std::vector<OperandInfo>& InsnOperands = Operands[Opc];
+    for (std::vector<OperandInfo>::iterator
+         I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+      // If a custom instruction decoder was specified, use that.
+      if (I->FieldBase == ~0U && I->FieldLength == ~0U) {
+        o.indent(Indentation) << "  " << I->Decoder << "(MI, insn);\n";
+        break;
+      }
+
+      o.indent(Indentation)
+        << "  tmp = fieldFromInstruction(insn, " << I->FieldBase
+        << ", " << I->FieldLength << ");\n";
+      if (I->Decoder != "") {
+        o.indent(Indentation) << "  " << I->Decoder << "(MI, tmp);\n";
+      } else {
+        o.indent(Indentation)
+          << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+      }
+    }
+
+    o.indent(Indentation) << "  return true; // " << nameWithID(Opc)
+                          << '\n';
+    o.indent(Indentation) << "}\n";
+    return true;
+  }
+
+  // Otherwise, there are more decodings to be done!
+
+  // Emit code to match the island(s) for the singleton.
+  o.indent(Indentation) << "// Check ";
+
+  for (I = Size; I != 0; --I) {
+    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
+    if (I > 1)
+      o << "&& ";
+    else
+      o << "for singleton decoding...\n";
+  }
+
+  o.indent(Indentation) << "if (";
+
+  for (I = Size; I != 0; --I) {
+    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
+      << ") == " << FieldVals[I-1];
+    if (I > 1)
+      o << " && ";
+    else
+      o << ") {\n";
+  }
+  o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
+  std::vector<OperandInfo>& InsnOperands = Operands[Opc];
+  for (std::vector<OperandInfo>::iterator
+       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+    // If a custom instruction decoder was specified, use that.
+    if (I->FieldBase == ~0U && I->FieldLength == ~0U) {
+      o.indent(Indentation) << "  " << I->Decoder << "(MI, insn);\n";
+      break;
+    }
+
+    o.indent(Indentation)
+      << "  tmp = fieldFromInstruction(insn, " << I->FieldBase
+      << ", " << I->FieldLength << ");\n";
+    if (I->Decoder != "") {
+      o.indent(Indentation) << "  " << I->Decoder << "(MI, tmp);\n";
+    } else {
+      o.indent(Indentation)
+        << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+    }
+  }
+  o.indent(Indentation) << "  return true; // " << nameWithID(Opc)
+                        << '\n';
+  o.indent(Indentation) << "}\n";
+
+  return false;
+}
+
+// Emits code to decode the singleton, and then to decode the rest.
+void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+    Filter &Best) {
+
+  unsigned Opc = Best.getSingletonOpc();
+
+  emitSingletonDecoder(o, Indentation, Opc);
+
+  // Emit code for the rest.
+  o.indent(Indentation) << "else\n";
+
+  Indentation += 2;
+  Best.getVariableFC().emit(o, Indentation);
+  Indentation -= 2;
+}
+
+// Assign a single filter and run with it.  Top level API client can initialize
+// with a single filter to start the filtering process.
+void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
+    unsigned numBit, bool mixed) {
+  Filters.clear();
+  Filter F(*this, startBit, numBit, true);
+  Filters.push_back(F);
+  BestIndex = 0; // Sole Filter instance to choose from.
+  bestFilter().recurse();
+}
+
+// reportRegion is a helper function for filterProcessor to mark a region as
+// eligible for use as a filter region.
+void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
+    unsigned BitIndex, bool AllowMixed) {
+  if (RA == ATTR_MIXED && AllowMixed)
+    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));
+  else if (RA == ATTR_ALL_SET && !AllowMixed)
+    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, false));
+}
+
+// FilterProcessor scans the well-known encoding bits of the instructions and
+// builds up a list of candidate filters.  It chooses the best filter and
+// recursively descends down the decoding tree.
+bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
+  Filters.clear();
+  BestIndex = -1;
+  unsigned numInstructions = Opcodes.size();
+
+  assert(numInstructions && "Filter created with no instructions");
+
+  // No further filtering is necessary.
+  if (numInstructions == 1)
+    return true;
+
+  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
+  // instructions is 3.
+  if (AllowMixed && !Greedy) {
+    assert(numInstructions == 3);
+
+    for (unsigned i = 0; i < Opcodes.size(); ++i) {
+      std::vector<unsigned> StartBits;
+      std::vector<unsigned> EndBits;
+      std::vector<uint64_t> FieldVals;
+      insn_t Insn;
+
+      insnWithID(Insn, Opcodes[i]);
+
+      // Look for islands of undecoded bits of any instruction.
+      if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
+        // Found an instruction with island(s).  Now just assign a filter.
+        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
+                        true);
+        return true;
+      }
+    }
+  }
+
+  unsigned BitIndex, InsnIndex;
+
+  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
+  // The automaton consumes the corresponding bit from each
+  // instruction.
+  //
+  //   Input symbols: 0, 1, and _ (unset).
+  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
+  //   Initial state: NONE.
+  //
+  // (NONE) ------- [01] -> (ALL_SET)
+  // (NONE) ------- _ ----> (ALL_UNSET)
+  // (ALL_SET) ---- [01] -> (ALL_SET)
+  // (ALL_SET) ---- _ ----> (MIXED)
+  // (ALL_UNSET) -- [01] -> (MIXED)
+  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
+  // (MIXED) ------ . ----> (MIXED)
+  // (FILTERED)---- . ----> (FILTERED)
+
+  bitAttr_t bitAttrs[BIT_WIDTH];
+
+  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
+  // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex)
+    if (FilterBitValues[BitIndex] == BIT_TRUE ||
+        FilterBitValues[BitIndex] == BIT_FALSE)
+      bitAttrs[BitIndex] = ATTR_FILTERED;
+    else
+      bitAttrs[BitIndex] = ATTR_NONE;
+
+  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+    insn_t insn;
+
+    insnWithID(insn, Opcodes[InsnIndex]);
+
+    for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) {
+      switch (bitAttrs[BitIndex]) {
+      case ATTR_NONE:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_ALL_UNSET;
+        else
+          bitAttrs[BitIndex] = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_SET:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_ALL_UNSET:
+        if (insn[BitIndex] != BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_MIXED:
+      case ATTR_FILTERED:
+        break;
+      }
+    }
+  }
+
+  // The regionAttr automaton consumes the bitAttrs automatons' state,
+  // lowest-to-highest.
+  //
+  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
+  //   States:        NONE, ALL_SET, MIXED
+  //   Initial state: NONE
+  //
+  // (NONE) ----- F --> (NONE)
+  // (NONE) ----- S --> (ALL_SET)     ; and set region start
+  // (NONE) ----- U --> (NONE)
+  // (NONE) ----- M --> (MIXED)       ; and set region start
+  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- S --> (ALL_SET)
+  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
+  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
+  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- M --> (MIXED)
+
+  bitAttr_t RA = ATTR_NONE;
+  unsigned StartBit = 0;
+
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) {
+    bitAttr_t bitAttr = bitAttrs[BitIndex];
+
+    assert(bitAttr != ATTR_NONE && "Bit without attributes");
+
+    switch (RA) {
+    case ATTR_NONE:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        break;
+      case ATTR_ALL_SET:
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        break;
+      case ATTR_MIXED:
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_SET:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_MIXED:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_UNSET:
+      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
+    case ATTR_FILTERED:
+      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
+    }
+  }
+
+  // At the end, if we're still in ALL_SET or MIXED states, report a region
+  switch (RA) {
+  case ATTR_NONE:
+    break;
+  case ATTR_FILTERED:
+    break;
+  case ATTR_ALL_SET:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  case ATTR_ALL_UNSET:
+    break;
+  case ATTR_MIXED:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  }
+
+  // We have finished with the filter processings.  Now it's time to choose
+  // the best performing filter.
+  BestIndex = 0;
+  bool AllUseless = true;
+  unsigned BestScore = 0;
+
+  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
+    unsigned Usefulness = Filters[i].usefulness();
+
+    if (Usefulness)
+      AllUseless = false;
+
+    if (Usefulness > BestScore) {
+      BestIndex = i;
+      BestScore = Usefulness;
+    }
+  }
+
+  if (!AllUseless)
+    bestFilter().recurse();
+
+  return !AllUseless;
+} // end of FilterChooser::filterProcessor(bool)
+
+// Decides on the best configuration of filter(s) to use in order to decode
+// the instructions.  A conflict of instructions may occur, in which case we
+// dump the conflict set to the standard error.
+void FilterChooser::doFilter() {
+  unsigned Num = Opcodes.size();
+  assert(Num && "FilterChooser created with no instructions");
+
+  // Try regions of consecutive known bit values first.
+  if (filterProcessor(false))
+    return;
+
+  // Then regions of mixed bits (both known and unitialized bit values allowed).
+  if (filterProcessor(true))
+    return;
+
+  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
+  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
+  // well-known encoding pattern.  In such case, we backtrack and scan for the
+  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
+  if (Num == 3 && filterProcessor(true, false))
+    return;
+
+  // If we come to here, the instruction decoding has failed.
+  // Set the BestIndex to -1 to indicate so.
+  BestIndex = -1;
+}
+
+// Emits code to decode our share of instructions.  Returns true if the
+// emitted code causes a return, which occurs if we know how to decode
+// the instruction at this level or the instruction is not decodeable.
+bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+  if (Opcodes.size() == 1)
+    // There is only one instruction in the set, which is great!
+    // Call emitSingletonDecoder() to see whether there are any remaining
+    // encodings bits.
+    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+
+  // Choose the best filter to do the decodings!
+  if (BestIndex != -1) {
+    Filter &Best = bestFilter();
+    if (Best.getNumFiltered() == 1)
+      emitSingletonDecoder(o, Indentation, Best);
+    else
+      bestFilter().emit(o, Indentation);
+    return false;
+  }
+
+  // We don't know how to decode these instructions!  Return 0 and dump the
+  // conflict set!
+  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
+  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
+    o << nameWithID(Opcodes[i]);
+    if (i < (N - 1))
+      o << ", ";
+    else
+      o << '\n';
+  }
+
+  // Print out useful conflict information for postmortem analysis.
+  errs() << "Decoding Conflict:\n";
+
+  dumpStack(errs(), "\t\t");
+
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+
+  return true;
+}
+
+bool FixedLenDecoderEmitter::populateInstruction(const CodeGenInstruction &CGI,
+                                                 unsigned Opc){
+  const Record &Def = *CGI.TheDef;
+  // If all the bit positions are not specified; do not decode this instruction.
+  // We are bound to fail!  For proper disassembly, the well-known encoding bits
+  // of the instruction must be fully specified.
+  //
+  // This also removes pseudo instructions from considerations of disassembly,
+  // which is a better design and less fragile than the name matchings.
+  BitsInit &Bits = getBitsField(Def, "Inst");
+  if (Bits.allInComplete()) return false;
+
+  // Ignore "asm parser only" instructions.
+  if (Def.getValueAsBit("isAsmParserOnly"))
+    return false;
+
+  std::vector<OperandInfo> InsnOperands;
+
+  // If the instruction has specified a custom decoding hook, use that instead
+  // of trying to auto-generate the decoder.
+  std::string InstDecoder = Def.getValueAsString("DecoderMethod");
+  if (InstDecoder != "") {
+    InsnOperands.push_back(OperandInfo(~0U, ~0U, InstDecoder));
+    Operands[Opc] = InsnOperands;
+    return true;
+  }
+
+  // Generate a description of the operand of the instruction that we know
+  // how to decode automatically.
+  // FIXME: We'll need to have a way to manually override this as needed.
+
+  // Gather the outputs/inputs of the instruction, so we can find their
+  // positions in the encoding.  This assumes for now that they appear in the
+  // MCInst in the order that they're listed.
+  std::vector<std::pair<Init*, std::string> > InOutOperands;
+  DagInit *Out  = Def.getValueAsDag("OutOperandList");
+  DagInit *In  = Def.getValueAsDag("InOperandList");
+  for (unsigned i = 0; i < Out->getNumArgs(); ++i)
+    InOutOperands.push_back(std::make_pair(Out->getArg(i), Out->getArgName(i)));
+  for (unsigned i = 0; i < In->getNumArgs(); ++i)
+    InOutOperands.push_back(std::make_pair(In->getArg(i), In->getArgName(i)));
+
+  // For each operand, see if we can figure out where it is encoded.
+  for (std::vector<std::pair<Init*, std::string> >::iterator
+       NI = InOutOperands.begin(), NE = InOutOperands.end(); NI != NE; ++NI) {
+    unsigned PrevBit = ~0;
+    unsigned Base = ~0;
+    unsigned PrevPos = ~0;
+    std::string Decoder = "";
+
+    for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
+      VarBitInit *BI = dynamic_cast<VarBitInit*>(Bits.getBit(bi));
+      if (!BI) continue;
+
+      VarInit *Var = dynamic_cast<VarInit*>(BI->getVariable());
+      assert(Var);
+      unsigned CurrBit = BI->getBitNum();
+      if (Var->getName() != NI->second) continue;
+
+      // Figure out the lowest bit of the value, and the width of the field.
+      // Deliberately don't try to handle cases where the field is scattered,
+      // or where not all bits of the the field are explicit.
+      if (Base == ~0U && PrevBit == ~0U && PrevPos == ~0U) {
+        if (CurrBit == 0)
+          Base = bi;
+        else
+          continue;
+      }
+
+      if ((PrevPos != ~0U && bi-1 != PrevPos) ||
+          (CurrBit != ~0U && CurrBit-1 != PrevBit)) {
+        PrevBit = ~0;
+        Base = ~0;
+        PrevPos = ~0;
+      }
+
+      PrevPos = bi;
+      PrevBit = CurrBit;
+
+      // At this point, we can locate the field, but we need to know how to
+      // interpret it.  As a first step, require the target to provide callbacks
+      // for decoding register classes.
+      // FIXME: This need to be extended to handle instructions with custom
+      // decoder methods, and operands with (simple) MIOperandInfo's.
+      TypedInit *TI = dynamic_cast<TypedInit*>(NI->first);
+      RecordRecTy *Type = dynamic_cast<RecordRecTy*>(TI->getType());
+      Record *TypeRecord = Type->getRecord();
+      bool isReg = false;
+      if (TypeRecord->isSubClassOf("RegisterClass")) {
+        Decoder = "Decode" + Type->getRecord()->getName() + "RegisterClass";
+        isReg = true;
+      }
+
+      RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
+      StringInit *String = DecoderString ?
+        dynamic_cast<StringInit*>(DecoderString->getValue()) :
+        0;
+      if (!isReg && String && String->getValue() != "")
+        Decoder = String->getValue();
+    }
+
+    if (Base != ~0U) {
+      InsnOperands.push_back(OperandInfo(Base, PrevBit+1, Decoder));
+      DEBUG(errs() << "ENCODED OPERAND: $" << NI->second << " @ ("
+                   << utostr(Base+PrevBit) << ", " << utostr(Base) << ")\n");
+    }
+  }
+
+  Operands[Opc] = InsnOperands;
+
+
+#if 0
+  DEBUG({
+      // Dumps the instruction encoding bits.
+      dumpBits(errs(), Bits);
+
+      errs() << '\n';
+
+      // Dumps the list of operand info.
+      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+        const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
+        const std::string &OperandName = Info.Name;
+        const Record &OperandDef = *Info.Rec;
+
+        errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
+      }
+    });
+#endif
+
+  return true;
+}
+
+void FixedLenDecoderEmitter::populateInstructions() {
+  for (unsigned i = 0, e = NumberedInstructions.size(); i < e; ++i) {
+    Record *R = NumberedInstructions[i]->TheDef;
+    if (R->getValueAsString("Namespace") == "TargetOpcode")
+      continue;
+
+    if (populateInstruction(*NumberedInstructions[i], i))
+      Opcodes.push_back(i);
+  }
+}
+
+// Emits disassembler code for instruction decoding.
+void FixedLenDecoderEmitter::run(raw_ostream &o)
+{
+  o << "#include \"llvm/MC/MCInst.h\"\n";
+  o << "#include \"llvm/Support/DataTypes.h\"\n";
+  o << "#include <assert.h>\n";
+  o << '\n';
+  o << "namespace llvm {\n\n";
+
+  NumberedInstructions = Target.getInstructionsByEnumValue();
+  populateInstructions();
+  FilterChooser FC(NumberedInstructions, Opcodes, Operands);
+  FC.emitTop(o, 0);
+
+  o << "\n} // End llvm namespace \n";
+}
diff --git a/utils/TableGen/FixedLenDecoderEmitter.h b/utils/TableGen/FixedLenDecoderEmitter.h
new file mode 100644
index 000000000000..d46a495540ea
--- /dev/null
+++ b/utils/TableGen/FixedLenDecoderEmitter.h
@@ -0,0 +1,56 @@
+//===------------ FixedLenDecoderEmitter.h - Decoder Generator --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// It contains the tablegen backend that emits the decoder functions for
+// targets with fixed length instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FixedLenDECODEREMITTER_H
+#define FixedLenDECODEREMITTER_H
+
+#include "CodeGenTarget.h"
+#include "TableGenBackend.h"
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct OperandInfo {
+  unsigned FieldBase;
+  unsigned FieldLength;
+  std::string Decoder;
+
+  OperandInfo(unsigned FB, unsigned FL, std::string D)
+    : FieldBase(FB), FieldLength(FL), Decoder(D) { }
+};
+
+class FixedLenDecoderEmitter : public TableGenBackend {
+public:
+  FixedLenDecoderEmitter(RecordKeeper &R) :
+    Records(R), Target(R),
+    NumberedInstructions(Target.getInstructionsByEnumValue()) {}
+
+  // run - Output the code emitter
+  void run(raw_ostream &o);
+
+private:
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+  std::vector<const CodeGenInstruction*> NumberedInstructions;
+  std::vector<unsigned> Opcodes;
+  std::map<unsigned, std::vector<OperandInfo> > Operands;
+
+  bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc);
+  void populateInstructions();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/utils/TableGen/InstrEnumEmitter.cpp b/utils/TableGen/InstrEnumEmitter.cpp
index 47a8474c35ec..aa596892f52f 100644
--- a/utils/TableGen/InstrEnumEmitter.cpp
+++ b/utils/TableGen/InstrEnumEmitter.cpp
@@ -23,7 +23,7 @@ void InstrEnumEmitter::run(raw_ostream &OS) {
   EmitSourceFileHeader("Target Instruction Enum Values", OS);
   OS << "namespace llvm {\n\n";
 
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
 
   // We must emit the PHI opcode first...
   std::string Namespace = Target.getInstNamespace();
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 4d3aa5e621c9..2b684bede3ea 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -60,23 +60,23 @@ std::vector<std::string>
 InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
   std::vector<std::string> Result;
   
-  for (unsigned i = 0, e = Inst.OperandList.size(); i != e; ++i) {
+  for (unsigned i = 0, e = Inst.Operands.size(); i != e; ++i) {
     // Handle aggregate operands and normal operands the same way by expanding
     // either case into a list of operands for this op.
-    std::vector<CodeGenInstruction::OperandInfo> OperandList;
+    std::vector<CGIOperandList::OperandInfo> OperandList;
 
     // This might be a multiple operand thing.  Targets like X86 have
     // registers in their multi-operand operands.  It may also be an anonymous
     // operand, which has a single operand, but no declared class for the
     // operand.
-    DagInit *MIOI = Inst.OperandList[i].MIOperandInfo;
+    DagInit *MIOI = Inst.Operands[i].MIOperandInfo;
     
     if (!MIOI || MIOI->getNumArgs() == 0) {
       // Single, anonymous, operand.
-      OperandList.push_back(Inst.OperandList[i]);
+      OperandList.push_back(Inst.Operands[i]);
     } else {
-      for (unsigned j = 0, e = Inst.OperandList[i].MINumOperands; j != e; ++j) {
-        OperandList.push_back(Inst.OperandList[i]);
+      for (unsigned j = 0, e = Inst.Operands[i].MINumOperands; j != e; ++j) {
+        OperandList.push_back(Inst.Operands[i]);
 
         Record *OpR = dynamic_cast<DefInit*>(MIOI->getArg(j))->getDef();
         OperandList.back().Rec = OpR;
@@ -104,19 +104,19 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
 
       // Predicate operands.  Check to see if the original unexpanded operand
       // was of type PredicateOperand.
-      if (Inst.OperandList[i].Rec->isSubClassOf("PredicateOperand"))
+      if (Inst.Operands[i].Rec->isSubClassOf("PredicateOperand"))
         Res += "|(1<<TOI::Predicate)";
         
       // Optional def operands.  Check to see if the original unexpanded operand
       // was of type OptionalDefOperand.
-      if (Inst.OperandList[i].Rec->isSubClassOf("OptionalDefOperand"))
+      if (Inst.Operands[i].Rec->isSubClassOf("OptionalDefOperand"))
         Res += "|(1<<TOI::OptionalDef)";
 
       // Fill in constraint info.
       Res += ", ";
       
-      const CodeGenInstruction::ConstraintInfo &Constraint =
-        Inst.OperandList[i].Constraints[j];
+      const CGIOperandList::ConstraintInfo &Constraint =
+        Inst.Operands[i].Constraints[j];
       if (Constraint.isNone())
         Res += "0";
       else if (Constraint.isEarlyClobber())
@@ -256,14 +256,14 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                                   const OperandInfoMapTy &OpInfo,
                                   raw_ostream &OS) {
   int MinOperands = 0;
-  if (!Inst.OperandList.empty())
+  if (!Inst.Operands.size() == 0)
     // Each logical operand can be multiple MI operands.
-    MinOperands = Inst.OperandList.back().MIOperandNo +
-                  Inst.OperandList.back().MINumOperands;
+    MinOperands = Inst.Operands.back().MIOperandNo +
+                  Inst.Operands.back().MINumOperands;
 
   OS << "  { ";
   OS << Num << ",\t" << MinOperands << ",\t"
-     << Inst.NumDefs << ",\t" << getItinClassNumber(Inst.TheDef)
+     << Inst.Operands.NumDefs << ",\t" << getItinClassNumber(Inst.TheDef)
      << ",\t\"" << Inst.TheDef->getName() << "\", 0";
 
   // Emit all of the target indepedent flags...
@@ -271,6 +271,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isBranch)           OS << "|(1<<TID::Branch)";
   if (Inst.isIndirectBranch)   OS << "|(1<<TID::IndirectBranch)";
   if (Inst.isCompare)          OS << "|(1<<TID::Compare)";
+  if (Inst.isMoveImm)          OS << "|(1<<TID::MoveImm)";
   if (Inst.isBarrier)          OS << "|(1<<TID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1<<TID::DelaySlot)";
   if (Inst.isCall)             OS << "|(1<<TID::Call)";
@@ -283,9 +284,9 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isTerminator)       OS << "|(1<<TID::Terminator)";
   if (Inst.isReMaterializable) OS << "|(1<<TID::Rematerializable)";
   if (Inst.isNotDuplicable)    OS << "|(1<<TID::NotDuplicable)";
-  if (Inst.hasOptionalDef)     OS << "|(1<<TID::HasOptionalDef)";
+  if (Inst.Operands.hasOptionalDef) OS << "|(1<<TID::HasOptionalDef)";
   if (Inst.usesCustomInserter) OS << "|(1<<TID::UsesCustomInserter)";
-  if (Inst.isVariadic)         OS << "|(1<<TID::Variadic)";
+  if (Inst.Operands.isVariadic)OS << "|(1<<TID::Variadic)";
   if (Inst.hasSideEffects)     OS << "|(1<<TID::UnmodeledSideEffects)";
   if (Inst.isAsCheapAsAMove)   OS << "|(1<<TID::CheapAsAMove)";
   if (Inst.hasExtraSrcRegAllocReq) OS << "|(1<<TID::ExtraSrcRegAllocReq)";
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index ba30d97eaa35..08f67284a279 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -14,6 +14,7 @@
 #include "CodeGenTarget.h"
 #include "IntrinsicEmitter.h"
 #include "Record.h"
+#include "StringMatcher.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 using namespace llvm;
@@ -67,16 +68,19 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
 
 void IntrinsicEmitter::EmitPrefix(raw_ostream &OS) {
   OS << "// VisualStudio defines setjmp as _setjmp\n"
-        "#if defined(_MSC_VER) && defined(setjmp)\n"
-        "#define setjmp_undefined_for_visual_studio\n"
-        "#undef setjmp\n"
+        "#if defined(_MSC_VER) && defined(setjmp) && \\\n"
+        "                         !defined(setjmp_undefined_for_msvc)\n"
+        "#  pragma push_macro(\"setjmp\")\n"
+        "#  undef setjmp\n"
+        "#  define setjmp_undefined_for_msvc\n"
         "#endif\n\n";
 }
 
 void IntrinsicEmitter::EmitSuffix(raw_ostream &OS) {
-  OS << "#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)\n"
+  OS << "#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)\n"
         "// let's return it to _setjmp state\n"
-        "#define setjmp _setjmp\n"
+        "#  pragma pop_macro(\"setjmp\")\n"
+        "#  undef setjmp_undefined_for_msvc\n"
         "#endif\n\n";
 }
 
@@ -96,37 +100,48 @@ void IntrinsicEmitter::EmitEnumInfo(const std::vector<CodeGenIntrinsic> &Ints,
 void IntrinsicEmitter::
 EmitFnNameRecognizer(const std::vector<CodeGenIntrinsic> &Ints, 
                      raw_ostream &OS) {
-  // Build a function name -> intrinsic name mapping.
-  std::map<std::string, unsigned> IntMapping;
+  // Build a 'first character of function name' -> intrinsic # mapping.
+  std::map<char, std::vector<unsigned> > IntMapping;
   for (unsigned i = 0, e = Ints.size(); i != e; ++i)
-    IntMapping[Ints[i].Name] = i;
-    
+    IntMapping[Ints[i].Name[5]].push_back(i);
+  
   OS << "// Function name -> enum value recognizer code.\n";
   OS << "#ifdef GET_FUNCTION_RECOGNIZER\n";
-  OS << "  switch (Name[5]) {\n";
-  OS << "  default:\n";
-  // Emit the intrinsics in sorted order.
-  char LastChar = 0;
-  for (std::map<std::string, unsigned>::iterator I = IntMapping.begin(),
+  OS << "  StringRef NameR(Name+6, Len-6);   // Skip over 'llvm.'\n";
+  OS << "  switch (Name[5]) {                  // Dispatch on first letter.\n";
+  OS << "  default: break;\n";
+  // Emit the intrinsic matching stuff by first letter.
+  for (std::map<char, std::vector<unsigned> >::iterator I = IntMapping.begin(),
        E = IntMapping.end(); I != E; ++I) {
-    if (I->first[5] != LastChar) {
-      LastChar = I->first[5];
-      OS << "    break;\n";
-      OS << "  case '" << LastChar << "':\n";
+    OS << "  case '" << I->first << "':\n";
+    std::vector<unsigned> &IntList = I->second;
+
+    // Emit all the overloaded intrinsics first, build a table of the
+    // non-overloaded ones.
+    std::vector<StringMatcher::StringPair> MatchTable;
+    
+    for (unsigned i = 0, e = IntList.size(); i != e; ++i) {
+      unsigned IntNo = IntList[i];
+      std::string Result = "return " + TargetPrefix + "Intrinsic::" +
+        Ints[IntNo].EnumName + ";";
+
+      if (!Ints[IntNo].isOverloaded) {
+        MatchTable.push_back(std::make_pair(Ints[IntNo].Name.substr(6),Result));
+        continue;
+      }
+
+      // For overloaded intrinsics, only the prefix needs to match
+      std::string TheStr = Ints[IntNo].Name.substr(6);
+      TheStr += '.';  // Require "bswap." instead of bswap.
+      OS << "    if (NameR.startswith(\"" << TheStr << "\")) "
+         << Result << '\n';
     }
     
-    // For overloaded intrinsics, only the prefix needs to match
-    if (Ints[I->second].isOverloaded)
-      OS << "    if (Len > " << I->first.size()
-       << " && !memcmp(Name, \"" << I->first << ".\", "
-       << (I->first.size() + 1) << ")) return " << TargetPrefix << "Intrinsic::"
-       << Ints[I->second].EnumName << ";\n";
-    else 
-      OS << "    if (Len == " << I->first.size()
-         << " && !memcmp(Name, \"" << I->first << "\", "
-         << I->first.size() << ")) return " << TargetPrefix << "Intrinsic::"
-         << Ints[I->second].EnumName << ";\n";
+    // Emit the matcher logic for the fixed length strings.
+    StringMatcher("NameR", MatchTable, OS).Emit(1);
+    OS << "    break;  // end of '" << I->first << "' case.\n";
   }
+  
   OS << "  }\n";
   OS << "#endif\n\n";
 }
@@ -180,6 +195,8 @@ static void EmitTypeForValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
     OS << "Type::getVoidTy(Context)";
   } else if (VT == MVT::Metadata) {
     OS << "Type::getMetadataTy(Context)";
+  } else if (VT == MVT::x86mmx) {
+    OS << "Type::getX86_MMXTy(Context)";
   } else {
     assert(false && "Unsupported ValueType!");
   }
@@ -556,11 +573,13 @@ EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
       OS << "  return DoesNotAccessMemory;\n";
       break;
     case CodeGenIntrinsic::ReadArgMem:
+      OS << "  return OnlyReadsArgumentPointees;\n";
+      break;
     case CodeGenIntrinsic::ReadMem:
       OS << "  return OnlyReadsMemory;\n";
       break;
     case CodeGenIntrinsic::ReadWriteArgMem:
-      OS << "  return AccessesArguments;\n";
+      OS << "  return OnlyAccessesArgumentPointees;\n";
       break;
     }
   }
@@ -584,112 +603,22 @@ EmitGCCBuiltinList(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
   OS << "#endif\n\n";
 }
 
-/// EmitBuiltinComparisons - Emit comparisons to determine whether the specified
-/// sorted range of builtin names is equal to the current builtin.  This breaks
-/// it down into a simple tree.
-///
-/// At this point, we know that all the builtins in the range have the same name
-/// for the first 'CharStart' characters.  Only the end of the name needs to be
-/// discriminated.
-typedef std::map<std::string, std::string>::const_iterator StrMapIterator;
-static void EmitBuiltinComparisons(StrMapIterator Start, StrMapIterator End,
-                                   unsigned CharStart, unsigned Indent,
-                                   std::string TargetPrefix, raw_ostream &OS) {
-  if (Start == End) return; // empty range.
-  
-  // Determine what, if anything, is the same about all these strings.
-  std::string CommonString = Start->first;
-  unsigned NumInRange = 0;
-  for (StrMapIterator I = Start; I != End; ++I, ++NumInRange) {
-    // Find the first character that doesn't match.
-    const std::string &ThisStr = I->first;
-    unsigned NonMatchChar = CharStart;
-    while (NonMatchChar < CommonString.size() && 
-           NonMatchChar < ThisStr.size() &&
-           CommonString[NonMatchChar] == ThisStr[NonMatchChar])
-      ++NonMatchChar;
-    // Truncate off pieces that don't match.
-    CommonString.resize(NonMatchChar);
-  }
-  
-  // Just compare the rest of the string.
-  if (NumInRange == 1) {
-    if (CharStart != CommonString.size()) {
-      OS << std::string(Indent*2, ' ') << "if (!memcmp(BuiltinName";
-      if (CharStart) OS << "+" << CharStart;
-      OS << ", \"" << (CommonString.c_str()+CharStart) << "\", ";
-      OS << CommonString.size() - CharStart << "))\n";
-      ++Indent;
-    }
-    OS << std::string(Indent*2, ' ') << "IntrinsicID = " << TargetPrefix
-       << "Intrinsic::";
-    OS << Start->second << ";\n";
-    return;
-  }
-
-  // At this point, we potentially have a common prefix for these builtins, emit
-  // a check for this common prefix.
-  if (CommonString.size() != CharStart) {
-    OS << std::string(Indent*2, ' ') << "if (!memcmp(BuiltinName";
-    if (CharStart) OS << "+" << CharStart;
-    OS << ", \"" << (CommonString.c_str()+CharStart) << "\", ";
-    OS << CommonString.size()-CharStart << ")) {\n";
-    
-    EmitBuiltinComparisons(Start, End, CommonString.size(), Indent+1, 
-                           TargetPrefix, OS);
-    OS << std::string(Indent*2, ' ') << "}\n";
-    return;
-  }
-  
-  // Output a switch on the character that differs across the set.
-  OS << std::string(Indent*2, ' ') << "switch (BuiltinName[" << CharStart
-      << "]) {";
-  if (CharStart)
-    OS << "  // \"" << std::string(Start->first.begin(), 
-                                   Start->first.begin()+CharStart) << "\"";
-  OS << "\n";
-  
-  for (StrMapIterator I = Start; I != End; ) {
-    char ThisChar = I->first[CharStart];
-    OS << std::string(Indent*2, ' ') << "case '" << ThisChar << "':\n";
-    // Figure out the range that has this common character.
-    StrMapIterator NextChar = I;
-    for (++NextChar; NextChar != End && NextChar->first[CharStart] == ThisChar;
-         ++NextChar)
-      /*empty*/;
-    EmitBuiltinComparisons(I, NextChar, CharStart+1, Indent+1, TargetPrefix,OS);
-    OS << std::string(Indent*2, ' ') << "  break;\n";
-    I = NextChar;
-  }
-  OS << std::string(Indent*2, ' ') << "}\n";
-}
-
 /// EmitTargetBuiltins - All of the builtins in the specified map are for the
 /// same target, and we already checked it.
 static void EmitTargetBuiltins(const std::map<std::string, std::string> &BIM,
                                const std::string &TargetPrefix,
                                raw_ostream &OS) {
-  // Rearrange the builtins by length.
-  std::vector<std::map<std::string, std::string> > BuiltinsByLen;
-  BuiltinsByLen.reserve(100);
   
-  for (StrMapIterator I = BIM.begin(), E = BIM.end(); I != E; ++I) {
-    if (I->first.size() >= BuiltinsByLen.size())
-      BuiltinsByLen.resize(I->first.size()+1);
-    BuiltinsByLen[I->first.size()].insert(*I);
-  }
+  std::vector<StringMatcher::StringPair> Results;
   
-  // Now that we have all the builtins by their length, emit a switch stmt.
-  OS << "    switch (strlen(BuiltinName)) {\n";
-  OS << "    default: break;\n";
-  for (unsigned i = 0, e = BuiltinsByLen.size(); i != e; ++i) {
-    if (BuiltinsByLen[i].empty()) continue;
-    OS << "    case " << i << ":\n";
-    EmitBuiltinComparisons(BuiltinsByLen[i].begin(), BuiltinsByLen[i].end(),
-                           0, 3, TargetPrefix, OS);
-    OS << "      break;\n";
+  for (std::map<std::string, std::string>::const_iterator I = BIM.begin(),
+       E = BIM.end(); I != E; ++I) {
+    std::string ResultCode =
+    "return " + TargetPrefix + "Intrinsic::" + I->second + ";";
+    Results.push_back(StringMatcher::StringPair(I->first, ResultCode));
   }
-  OS << "    }\n";
+
+  StringMatcher("BuiltinName", Results, OS).Emit();
 }
 
         
@@ -719,24 +648,20 @@ EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints,
   if (TargetOnly) {
     OS << "static " << TargetPrefix << "Intrinsic::ID "
        << "getIntrinsicForGCCBuiltin(const char "
-       << "*TargetPrefix, const char *BuiltinName) {\n";
-    OS << "  " << TargetPrefix << "Intrinsic::ID IntrinsicID = ";
+       << "*TargetPrefixStr, const char *BuiltinNameStr) {\n";
   } else {
     OS << "Intrinsic::ID Intrinsic::getIntrinsicForGCCBuiltin(const char "
-       << "*TargetPrefix, const char *BuiltinName) {\n";
-    OS << "  Intrinsic::ID IntrinsicID = ";
+       << "*TargetPrefixStr, const char *BuiltinNameStr) {\n";
   }
   
-  if (TargetOnly)
-    OS << "(" << TargetPrefix<< "Intrinsic::ID)";
-
-  OS << "Intrinsic::not_intrinsic;\n";
+  OS << "  StringRef BuiltinName(BuiltinNameStr);\n";
+  OS << "  StringRef TargetPrefix(TargetPrefixStr);\n\n";
   
   // Note: this could emit significantly better code if we cared.
   for (BIMTy::iterator I = BuiltinMap.begin(), E = BuiltinMap.end();I != E;++I){
     OS << "  ";
     if (!I->first.empty())
-      OS << "if (!strcmp(TargetPrefix, \"" << I->first << "\")) ";
+      OS << "if (TargetPrefix == \"" << I->first << "\") ";
     else
       OS << "/* Target Independent Builtins */ ";
     OS << "{\n";
@@ -745,7 +670,10 @@ EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints,
     EmitTargetBuiltins(I->second, TargetPrefix, OS);
     OS << "  }\n";
   }
-  OS << "  return IntrinsicID;\n";
+  OS << "  return ";
+  if (!TargetPrefix.empty())
+    OS << "(" << TargetPrefix << "Intrinsic::ID)";
+  OS << "Intrinsic::not_intrinsic;\n";
   OS << "}\n";
   OS << "#endif\n\n";
 }
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index 8b81e14cc26a..c40a39dff729 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -25,6 +25,7 @@
 #include <string>
 #include <typeinfo>
 
+
 using namespace llvm;
 
 namespace {
@@ -164,18 +165,6 @@ void CheckedIncrement(I& P, I E, S ErrorString) {
     throw ErrorString;
 }
 
-// apply is needed because C++'s syntax doesn't let us construct a function
-// object and call it in the same statement.
-template<typename F, typename T0>
-void apply(F Fun, T0& Arg0) {
-  return Fun(Arg0);
-}
-
-template<typename F, typename T0, typename T1>
-void apply(F Fun, T0& Arg0, T1& Arg1) {
-  return Fun(Arg0, Arg1);
-}
-
 //===----------------------------------------------------------------------===//
 /// Back-end specific code
 
@@ -779,14 +768,21 @@ public:
 
     CheckNumberOfArguments(d, 2);
 
+    // Alias option store the aliased option name in the 'Help' field and do not
+    // have any properties.
     if (OD.isAlias()) {
-      // Aliases store the aliased option name in the 'Help' field.
       OD.Help = InitPtrToString(d.getArg(1));
     }
     else {
       processOptionProperties(d, OD);
     }
 
+    // Switch options are ZeroOrMore by default.
+    if (OD.isSwitch()) {
+      if (!(OD.isOptional() || OD.isOneOrMore() || OD.isRequired()))
+        OD.setZeroOrMore();
+    }
+
     OptDescs_.InsertDescription(OD);
   }
 
@@ -809,13 +805,12 @@ void CollectOptionDescriptions (const RecordVector& V,
                                 OptionDescriptions& OptDescs)
 {
   // For every OptionList:
-  for (RecordVector::const_iterator B = V.begin(),
-         E = V.end(); B!=E; ++B) {
+  for (RecordVector::const_iterator B = V.begin(), E = V.end(); B!=E; ++B)
+  {
     // Throws an exception if the value does not exist.
     ListInit* PropList = (*B)->getValueAsListInit("options");
 
-    // For every option description in this list:
-    // collect the information and
+    // For every option description in this list: invoke AddOption.
     std::for_each(PropList->begin(), PropList->end(), AddOption(OptDescs));
   }
 }
@@ -833,7 +828,7 @@ struct ToolDescription : public RefCountedBase<ToolDescription> {
   StrVector InLanguage;
   std::string InFileOption;
   std::string OutFileOption;
-  std::string OutLanguage;
+  StrVector OutLanguage;
   std::string OutputSuffix;
   unsigned Flags;
   const Init* OnEmpty;
@@ -919,31 +914,24 @@ private:
     toolDesc_.CmdLine = d.getArg(0);
   }
 
-  void onInLanguage (const DagInit& d) {
+  /// onInOutLanguage - Common implementation of on{In,Out}Language().
+  void onInOutLanguage (const DagInit& d, StrVector& OutVec) {
     CheckNumberOfArguments(d, 1);
-    Init* arg = d.getArg(0);
 
-    // Find out the argument's type.
-    if (typeid(*arg) == typeid(StringInit)) {
-      // It's a string.
-      toolDesc_.InLanguage.push_back(InitPtrToString(arg));
+    // Copy strings to the output vector.
+    for (unsigned i = 0, NumArgs = d.getNumArgs(); i < NumArgs; ++i) {
+      OutVec.push_back(InitPtrToString(d.getArg(i)));
     }
-    else {
-      // It's a list.
-      const ListInit& lst = InitPtrToList(arg);
-      StrVector& out = toolDesc_.InLanguage;
 
-      // Copy strings to the output vector.
-      for (ListInit::const_iterator B = lst.begin(), E = lst.end();
-           B != E; ++B) {
-        out.push_back(InitPtrToString(*B));
-      }
+    // Remove duplicates.
+    std::sort(OutVec.begin(), OutVec.end());
+    StrVector::iterator newE = std::unique(OutVec.begin(), OutVec.end());
+    OutVec.erase(newE, OutVec.end());
+  }
 
-      // Remove duplicates.
-      std::sort(out.begin(), out.end());
-      StrVector::iterator newE = std::unique(out.begin(), out.end());
-      out.erase(newE, out.end());
-    }
+
+  void onInLanguage (const DagInit& d) {
+    this->onInOutLanguage(d, toolDesc_.InLanguage);
   }
 
   void onJoin (const DagInit& d) {
@@ -952,8 +940,7 @@ private:
   }
 
   void onOutLanguage (const DagInit& d) {
-    CheckNumberOfArguments(d, 1);
-    toolDesc_.OutLanguage = InitPtrToString(d.getArg(0));
+    this->onInOutLanguage(d, toolDesc_.OutLanguage);
   }
 
   void onOutFileOption (const DagInit& d) {
@@ -1062,47 +1049,62 @@ void FilterNotInGraph (const DagVector& EdgeVector,
 }
 
 /// FillInToolToLang - Fills in two tables that map tool names to
-/// (input, output) languages.  Helper function used by TypecheckGraph().
+/// input & output language names.  Helper function used by TypecheckGraph().
 void FillInToolToLang (const ToolDescriptions& ToolDescs,
                        StringMap<StringSet<> >& ToolToInLang,
-                       StringMap<std::string>& ToolToOutLang) {
+                       StringMap<StringSet<> >& ToolToOutLang) {
   for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
          E = ToolDescs.end(); B != E; ++B) {
     const ToolDescription& D = *(*B);
     for (StrVector::const_iterator B = D.InLanguage.begin(),
            E = D.InLanguage.end(); B != E; ++B)
       ToolToInLang[D.Name].insert(*B);
-    ToolToOutLang[D.Name] = D.OutLanguage;
+    for (StrVector::const_iterator B = D.OutLanguage.begin(),
+           E = D.OutLanguage.end(); B != E; ++B)
+      ToolToOutLang[D.Name].insert(*B);
   }
 }
 
+/// Intersect - Is set intersection non-empty?
+bool Intersect (const StringSet<>& S1, const StringSet<>& S2) {
+  for (StringSet<>::const_iterator B = S1.begin(), E = S1.end(); B != E; ++B) {
+    if (S2.count(B->first()) != 0)
+      return true;
+  }
+  return false;
+}
+
 /// TypecheckGraph - Check that names for output and input languages
 /// on all edges do match.
 void TypecheckGraph (const DagVector& EdgeVector,
                      const ToolDescriptions& ToolDescs) {
   StringMap<StringSet<> > ToolToInLang;
-  StringMap<std::string> ToolToOutLang;
+  StringMap<StringSet<> > ToolToOutLang;
 
   FillInToolToLang(ToolDescs, ToolToInLang, ToolToOutLang);
-  StringMap<std::string>::iterator IAE = ToolToOutLang.end();
-  StringMap<StringSet<> >::iterator IBE = ToolToInLang.end();
 
   for (DagVector::const_iterator B = EdgeVector.begin(),
          E = EdgeVector.end(); B != E; ++B) {
     const DagInit* Edge = *B;
     const std::string& NodeA = InitPtrToString(Edge->getArg(0));
     const std::string& NodeB = InitPtrToString(Edge->getArg(1));
-    StringMap<std::string>::iterator IA = ToolToOutLang.find(NodeA);
+    StringMap<StringSet<> >::iterator IA = ToolToOutLang.find(NodeA);
     StringMap<StringSet<> >::iterator IB = ToolToInLang.find(NodeB);
 
+    if (NodeB == "root")
+      throw "Edges back to the root are not allowed!";
+
     if (NodeA != "root") {
-      if (IA != IAE && IB != IBE && IB->second.count(IA->second) == 0)
+      if (IA == ToolToOutLang.end())
+        throw NodeA + ": no output language defined!";
+      if (IB == ToolToInLang.end())
+        throw NodeB + ": no input language defined!";
+
+      if (!Intersect(IA->second, IB->second)) {
         throw "Edge " + NodeA + "->" + NodeB
           + ": output->input language mismatch";
+      }
     }
-
-    if (NodeB == "root")
-      throw "Edges back to the root are not allowed!";
   }
 }
 
@@ -1178,25 +1180,20 @@ class ExtractOptionNames {
     if (ActionName == "forward" || ActionName == "forward_as" ||
         ActionName == "forward_value" ||
         ActionName == "forward_transformed_value" ||
-        ActionName == "switch_on" || ActionName == "any_switch_on" ||
-        ActionName == "parameter_equals" ||
-        ActionName == "element_in_list" || ActionName == "not_empty" ||
-        ActionName == "empty") {
+        ActionName == "parameter_equals" || ActionName == "element_in_list") {
       CheckNumberOfArguments(Stmt, 1);
 
       Init* Arg = Stmt.getArg(0);
-      if (typeid(*Arg) == typeid(StringInit)) {
-        const std::string& Name = InitPtrToString(Arg);
-        OptionNames_.insert(Name);
-      }
-      else {
-        // It's a list.
-        const ListInit& List = InitPtrToList(Arg);
-        for (ListInit::const_iterator B = List.begin(), E = List.end();
-             B != E; ++B) {
-          const std::string& Name = InitPtrToString(*B);
-          OptionNames_.insert(Name);
-        }
+      if (typeid(*Arg) == typeid(StringInit))
+        OptionNames_.insert(InitPtrToString(Arg));
+    }
+    else if (ActionName == "any_switch_on" || ActionName == "switch_on" ||
+             ActionName == "any_not_empty" || ActionName == "any_empty" ||
+             ActionName == "not_empty" || ActionName == "empty") {
+      for (unsigned i = 0, NumArgs = Stmt.getNumArgs(); i < NumArgs; ++i) {
+        Init* Arg = Stmt.getArg(i);
+        if (typeid(*Arg) == typeid(StringInit))
+          OptionNames_.insert(InitPtrToString(Arg));
       }
     }
     else if (ActionName == "and" || ActionName == "or" || ActionName == "not") {
@@ -1211,6 +1208,7 @@ public:
   {}
 
   void operator()(const Init* Statement) {
+    // Statement is either a dag, or a list of dags.
     if (typeid(*Statement) == typeid(ListInit)) {
       const ListInit& DagList = *static_cast<const ListInit*>(Statement);
       for (ListInit::const_iterator B = DagList.begin(), E = DagList.end();
@@ -1291,24 +1289,20 @@ bool EmitCaseTest0Args(const std::string& TestName, raw_ostream& O) {
   return false;
 }
 
-/// EmitListTest - Helper function used by EmitCaseTest1ArgList().
+/// EmitMultipleArgumentTest - Helper function used by
+/// EmitCaseTestMultipleArgs()
 template <typename F>
-void EmitListTest(const ListInit& L, const char* LogicOp,
-                  F Callback, raw_ostream& O)
+void EmitMultipleArgumentTest(const DagInit& D, const char* LogicOp,
+                              F Callback, raw_ostream& O)
 {
-  // This is a lot like EmitLogicalOperationTest, but works on ListInits instead
-  // of Dags...
-  bool isFirst = true;
-  for (ListInit::const_iterator B = L.begin(), E = L.end(); B != E; ++B) {
-    if (isFirst)
-      isFirst = false;
-    else
-      O << ' ' << LogicOp << ' ';
-    Callback(InitPtrToString(*B), O);
+  for (unsigned i = 0, NumArgs = D.getNumArgs(); i < NumArgs; ++i) {
+    if (i != 0)
+       O << ' ' << LogicOp << ' ';
+    Callback(InitPtrToString(D.getArg(i)), O);
   }
 }
 
-// Callbacks for use with EmitListTest.
+// Callbacks for use with EmitMultipleArgumentTest
 
 class EmitSwitchOn {
   const OptionDescriptions& OptDescs_;
@@ -1346,54 +1340,48 @@ public:
 };
 
 
-/// EmitCaseTest1ArgList - Helper function used by EmitCaseTest1Arg();
-bool EmitCaseTest1ArgList(const std::string& TestName,
-                          const DagInit& d,
-                          const OptionDescriptions& OptDescs,
-                          raw_ostream& O) {
-  const ListInit& L = InitPtrToList(d.getArg(0));
-
+/// EmitCaseTestMultipleArgs - Helper function used by EmitCaseTest1Arg()
+bool EmitCaseTestMultipleArgs (const std::string& TestName,
+                               const DagInit& d,
+                               const OptionDescriptions& OptDescs,
+                               raw_ostream& O) {
   if (TestName == "any_switch_on") {
-    EmitListTest(L, "||", EmitSwitchOn(OptDescs), O);
+    EmitMultipleArgumentTest(d, "||", EmitSwitchOn(OptDescs), O);
     return true;
   }
   else if (TestName == "switch_on") {
-    EmitListTest(L, "&&", EmitSwitchOn(OptDescs), O);
+    EmitMultipleArgumentTest(d, "&&", EmitSwitchOn(OptDescs), O);
     return true;
   }
   else if (TestName == "any_not_empty") {
-    EmitListTest(L, "||", EmitEmptyTest(true, OptDescs), O);
+    EmitMultipleArgumentTest(d, "||", EmitEmptyTest(true, OptDescs), O);
     return true;
   }
   else if (TestName == "any_empty") {
-    EmitListTest(L, "||", EmitEmptyTest(false, OptDescs), O);
+    EmitMultipleArgumentTest(d, "||", EmitEmptyTest(false, OptDescs), O);
     return true;
   }
   else if (TestName == "not_empty") {
-    EmitListTest(L, "&&", EmitEmptyTest(true, OptDescs), O);
+    EmitMultipleArgumentTest(d, "&&", EmitEmptyTest(true, OptDescs), O);
     return true;
   }
   else if (TestName == "empty") {
-    EmitListTest(L, "&&", EmitEmptyTest(false, OptDescs), O);
+    EmitMultipleArgumentTest(d, "&&", EmitEmptyTest(false, OptDescs), O);
     return true;
   }
 
   return false;
 }
 
-/// EmitCaseTest1ArgStr - Helper function used by EmitCaseTest1Arg();
-bool EmitCaseTest1ArgStr(const std::string& TestName,
-                         const DagInit& d,
-                         const OptionDescriptions& OptDescs,
-                         raw_ostream& O) {
-  const std::string& OptName = InitPtrToString(d.getArg(0));
+/// EmitCaseTest1Arg - Helper function used by EmitCaseTest1OrMoreArgs()
+bool EmitCaseTest1Arg (const std::string& TestName,
+                       const DagInit& d,
+                       const OptionDescriptions& OptDescs,
+                       raw_ostream& O) {
+  const std::string& Arg = InitPtrToString(d.getArg(0));
 
-  if (TestName == "switch_on") {
-    apply(EmitSwitchOn(OptDescs), OptName, O);
-    return true;
-  }
-  else if (TestName == "input_languages_contain") {
-    O << "InLangs.count(\"" << OptName << "\") != 0";
+  if (TestName == "input_languages_contain") {
+    O << "InLangs.count(\"" << Arg << "\") != 0";
     return true;
   }
   else if (TestName == "in_language") {
@@ -1401,28 +1389,22 @@ bool EmitCaseTest1ArgStr(const std::string& TestName,
     // tools can process several files in different languages simultaneously.
 
     // TODO: make this work with Edge::Weight (if possible).
-    O << "LangMap.GetLanguage(inFile) == \"" << OptName << '\"';
-    return true;
-  }
-  else if (TestName == "not_empty" || TestName == "empty") {
-    bool EmitNegate = (TestName == "not_empty");
-    apply(EmitEmptyTest(EmitNegate, OptDescs), OptName, O);
+    O << "LangMap.GetLanguage(inFile) == \"" << Arg << '\"';
     return true;
   }
 
   return false;
 }
 
-/// EmitCaseTest1Arg - Helper function used by EmitCaseConstructHandler();
-bool EmitCaseTest1Arg(const std::string& TestName,
-                      const DagInit& d,
-                      const OptionDescriptions& OptDescs,
-                      raw_ostream& O) {
+/// EmitCaseTest1OrMoreArgs - Helper function used by
+/// EmitCaseConstructHandler()
+bool EmitCaseTest1OrMoreArgs(const std::string& TestName,
+                             const DagInit& d,
+                             const OptionDescriptions& OptDescs,
+                             raw_ostream& O) {
   CheckNumberOfArguments(d, 1);
-  if (typeid(*d.getArg(0)) == typeid(ListInit))
-    return EmitCaseTest1ArgList(TestName, d, OptDescs, O);
-  else
-    return EmitCaseTest1ArgStr(TestName, d, OptDescs, O);
+  return EmitCaseTest1Arg(TestName, d, OptDescs, O) ||
+    EmitCaseTestMultipleArgs(TestName, d, OptDescs, O);
 }
 
 /// EmitCaseTest2Args - Helper function used by EmitCaseConstructHandler().
@@ -1466,10 +1448,10 @@ void EmitLogicalOperationTest(const DagInit& d, const char* LogicOp,
                               const OptionDescriptions& OptDescs,
                               raw_ostream& O) {
   O << '(';
-  for (unsigned j = 0, NumArgs = d.getNumArgs(); j < NumArgs; ++j) {
-    const DagInit& InnerTest = InitPtrToDag(d.getArg(j));
+  for (unsigned i = 0, NumArgs = d.getNumArgs(); i < NumArgs; ++i) {
+    const DagInit& InnerTest = InitPtrToDag(d.getArg(i));
     EmitCaseTest(InnerTest, IndentLevel, OptDescs, O);
-    if (j != NumArgs - 1) {
+    if (i != NumArgs - 1) {
       O << ")\n";
       O.indent(IndentLevel + Indent1) << ' ' << LogicOp << " (";
     }
@@ -1503,7 +1485,7 @@ void EmitCaseTest(const DagInit& d, unsigned IndentLevel,
     EmitLogicalNot(d, IndentLevel, OptDescs, O);
   else if (EmitCaseTest0Args(TestName, O))
     return;
-  else if (EmitCaseTest1Arg(TestName, d, OptDescs, O))
+  else if (EmitCaseTest1OrMoreArgs(TestName, d, OptDescs, O))
     return;
   else if (EmitCaseTest2Args(TestName, d, IndentLevel, OptDescs, O))
     return;
@@ -1550,10 +1532,12 @@ public:
   {}
 
   void operator() (const Init* Statement, unsigned IndentLevel) {
+    // Is this a nested 'case'?
+    bool IsCase = dynamic_cast<const DagInit*>(Statement) &&
+      GetOperatorName(static_cast<const DagInit&>(*Statement)) == "case";
 
-    // Ignore nested 'case' DAG.
-    if (!(dynamic_cast<const DagInit*>(Statement) &&
-          GetOperatorName(static_cast<const DagInit&>(*Statement)) == "case")) {
+    // If so, ignore it, it is handled by our caller, WalkCase.
+    if (!IsCase) {
       if (typeid(*Statement) == typeid(ListInit)) {
         const ListInit& DagList = *static_cast<const ListInit*>(Statement);
         for (ListInit::const_iterator B = DagList.begin(), E = DagList.end();
@@ -2250,11 +2234,8 @@ void EmitInOutLanguageMethods (const ToolDescription& D, raw_ostream& O) {
   O.indent(Indent2) << "return InputLanguages_;\n";
   O.indent(Indent1) << "}\n\n";
 
-  if (D.OutLanguage.empty())
-    throw "Tool " + D.Name + " has no 'out_language' property!";
-
-  O.indent(Indent1) << "const char* OutputLanguage() const {\n";
-  O.indent(Indent2) << "return \"" << D.OutLanguage << "\";\n";
+  O.indent(Indent1) << "const char** OutputLanguages() const {\n";
+  O.indent(Indent2) << "return OutputLanguages_;\n";
   O.indent(Indent1) << "}\n\n";
 }
 
@@ -2299,17 +2280,28 @@ void EmitWorksOnEmptyMethod (const ToolDescription& D,
   O.indent(Indent1) << "}\n\n";
 }
 
+/// EmitStrArray - Emit definition of a 'const char**' static member
+/// variable. Helper used by EmitStaticMemberDefinitions();
+void EmitStrArray(const std::string& Name, const std::string& VarName,
+                  const StrVector& StrVec, raw_ostream& O) {
+  O << "const char* " << Name << "::" << VarName << "[] = {";
+  for (StrVector::const_iterator B = StrVec.begin(), E = StrVec.end();
+       B != E; ++B)
+    O << '\"' << *B << "\", ";
+  O << "0};\n";
+}
+
 /// EmitStaticMemberDefinitions - Emit static member definitions for a
 /// given Tool class.
 void EmitStaticMemberDefinitions(const ToolDescription& D, raw_ostream& O) {
   if (D.InLanguage.empty())
     throw "Tool " + D.Name + " has no 'in_language' property!";
+  if (D.OutLanguage.empty())
+    throw "Tool " + D.Name + " has no 'out_language' property!";
 
-  O << "const char* " << D.Name << "::InputLanguages_[] = {";
-  for (StrVector::const_iterator B = D.InLanguage.begin(),
-         E = D.InLanguage.end(); B != E; ++B)
-    O << '\"' << *B << "\", ";
-  O << "0};\n\n";
+  EmitStrArray(D.Name, "InputLanguages_", D.InLanguage, O);
+  EmitStrArray(D.Name, "OutputLanguages_", D.OutLanguage, O);
+  O << '\n';
 }
 
 /// EmitToolClassDefinition - Emit a Tool class definition.
@@ -2327,7 +2319,8 @@ void EmitToolClassDefinition (const ToolDescription& D,
     O << "Tool";
 
   O << " {\nprivate:\n";
-  O.indent(Indent1) << "static const char* InputLanguages_[];\n\n";
+  O.indent(Indent1) << "static const char* InputLanguages_[];\n";
+  O.indent(Indent1) << "static const char* OutputLanguages_[];\n\n";
 
   O << "public:\n";
   EmitNameMethod(D, O);
@@ -2448,21 +2441,13 @@ class EmitPreprocessOptionsCallback :
 
   const OptionDescriptions& OptDescs_;
 
-  void onListOrDag(const DagInit& d, HandlerImpl h,
-                   unsigned IndentLevel, raw_ostream& O) const
+  void onEachArgument(const DagInit& d, HandlerImpl h,
+                      unsigned IndentLevel, raw_ostream& O) const
   {
     CheckNumberOfArguments(d, 1);
-    const Init* I = d.getArg(0);
 
-    // If I is a list, apply h to each element.
-    if (typeid(*I) == typeid(ListInit)) {
-      const ListInit& L = *static_cast<const ListInit*>(I);
-      for (ListInit::const_iterator B = L.begin(), E = L.end(); B != E; ++B)
-        ((this)->*(h))(*B, IndentLevel, O);
-    }
-    // Otherwise, apply h to I.
-    else {
-      ((this)->*(h))(I, IndentLevel, O);
+    for (unsigned i = 0, NumArgs = d.getNumArgs(); i < NumArgs; ++i) {
+      ((this)->*(h))(d.getArg(i), IndentLevel, O);
     }
   }
 
@@ -2489,16 +2474,17 @@ class EmitPreprocessOptionsCallback :
   void onUnsetOption(const DagInit& d,
                      unsigned IndentLevel, raw_ostream& O) const
   {
-    this->onListOrDag(d, &EmitPreprocessOptionsCallback::onUnsetOptionImpl,
-                      IndentLevel, O);
+    this->onEachArgument(d, &EmitPreprocessOptionsCallback::onUnsetOptionImpl,
+                         IndentLevel, O);
   }
 
-  void onSetOptionImpl(const DagInit& d,
+  void onSetOptionImpl(const DagInit& D,
                        unsigned IndentLevel, raw_ostream& O) const {
-    CheckNumberOfArguments(d, 2);
-    const std::string& OptName = InitPtrToString(d.getArg(0));
-    const Init* Value = d.getArg(1);
+    CheckNumberOfArguments(D, 2);
+
+    const std::string& OptName = InitPtrToString(D.getArg(0));
     const OptionDescription& OptDesc = OptDescs_.FindOption(OptName);
+    const Init* Value = D.getArg(1);
 
     if (OptDesc.isList()) {
       const ListInit& List = InitPtrToList(Value);
@@ -2528,7 +2514,7 @@ class EmitPreprocessOptionsCallback :
                             << " = \"" << Str << "\";\n";
     }
     else {
-      throw "Can't apply 'set_option' to alias option -" + OptName + " !";
+      throw "Can't apply 'set_option' to alias option '" + OptName + "'!";
     }
   }
 
@@ -2548,15 +2534,22 @@ class EmitPreprocessOptionsCallback :
   {
     CheckNumberOfArguments(d, 1);
 
-    // Two arguments: (set_option "parameter", VALUE), where VALUE can be a
-    // boolean, a string or a string list.
-    if (d.getNumArgs() > 1)
-      this->onSetOptionImpl(d, IndentLevel, O);
-    // One argument: (set_option "switch")
-    // or (set_option ["switch1", "switch2", ...])
-    else
-      this->onListOrDag(d, &EmitPreprocessOptionsCallback::onSetSwitch,
-                        IndentLevel, O);
+    // 2-argument form: (set_option "A", true), (set_option "B", "C"),
+    // (set_option "D", ["E", "F"])
+    if (d.getNumArgs() == 2) {
+      const OptionDescription& OptDesc =
+        OptDescs_.FindOption(InitPtrToString(d.getArg(0)));
+      const Init* Opt2 = d.getArg(1);
+
+      if (!OptDesc.isSwitch() || typeid(*Opt2) != typeid(StringInit)) {
+        this->onSetOptionImpl(d, IndentLevel, O);
+        return;
+      }
+    }
+
+    // Multiple argument form: (set_option "A"), (set_option "B", "C", "D")
+    this->onEachArgument(d, &EmitPreprocessOptionsCallback::onSetSwitch,
+                         IndentLevel, O);
   }
 
 public:
@@ -2661,10 +2654,11 @@ void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
 {
   O << "int PopulateLanguageMap (LanguageMap& langMap) {\n";
 
-  // For each LangMap:
+  // For each LanguageMap:
   const RecordVector& LangMaps =
     Records.getAllDerivedDefinitions("LanguageMap");
 
+  // Call DoEmitPopulateLanguageMap.
   for (RecordVector::const_iterator B = LangMaps.begin(),
          E = LangMaps.end(); B!=E; ++B) {
     ListInit* LangMap = (*B)->getValueAsListInit("map");
@@ -2899,7 +2893,7 @@ public:
       return;
     }
 
-    // We're invoked on a command line.
+    // We're invoked on a command line string.
     this->onCmdLine(InitPtrToString(Arg));
   }
 
@@ -3041,7 +3035,8 @@ void CheckDriverData(DriverData& Data) {
   CheckForSuperfluousOptions(Data.Edges, Data.ToolDescs, Data.OptDescs);
 }
 
-void EmitDriverCode(const DriverData& Data, raw_ostream& O) {
+void EmitDriverCode(const DriverData& Data, 
+                    raw_ostream& O, RecordKeeper &Records) {
   // Emit file header.
   EmitIncludes(O);
 
@@ -3102,7 +3097,7 @@ void LLVMCConfigurationEmitter::run (raw_ostream &O) {
     CheckDriverData(Data);
 
     this->EmitSourceFileHeader("llvmc-based driver: auto-generated code", O);
-    EmitDriverCode(Data, O);
+    EmitDriverCode(Data, O, Records);
 
   } catch (std::exception& Error) {
     throw Error.what() + std::string(" - usually this means a syntax error.");
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.h b/utils/TableGen/LLVMCConfigurationEmitter.h
index b37b83fb9255..0f2ff3719678 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.h
+++ b/utils/TableGen/LLVMCConfigurationEmitter.h
@@ -21,8 +21,10 @@ namespace llvm {
   /// LLVMCConfigurationEmitter - TableGen backend that generates
   /// configuration code for LLVMC.
   class LLVMCConfigurationEmitter : public TableGenBackend {
+    RecordKeeper &Records;
   public:
-    explicit LLVMCConfigurationEmitter(RecordKeeper&) {}
+    explicit LLVMCConfigurationEmitter(RecordKeeper &records) : 
+      Records(records) {}
 
     // run - Output the asmwriter, returning true on failure.
     void run(raw_ostream &o);
diff --git a/utils/TableGen/Makefile b/utils/TableGen/Makefile
index f27cd995783e..c01b6602faa3 100644
--- a/utils/TableGen/Makefile
+++ b/utils/TableGen/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TOOLNAME = tblgen
-USEDLIBS = LLVMSupport.a LLVMSystem.a
+USEDLIBS = LLVMSupport.a
 REQUIRES_EH := 1
 REQUIRES_RTTI := 1
 
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 0a12f3766699..64224d9e51d0 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -8,17 +8,18 @@
 //===----------------------------------------------------------------------===//
 //
 // This tablegen backend is responsible for emitting arm_neon.h, which includes
-// a declaration and definition of each function specified by the ARM NEON 
+// a declaration and definition of each function specified by the ARM NEON
 // compiler interface.  See ARM document DUI0348B.
 //
 // Each NEON instruction is implemented in terms of 1 or more functions which
-// are suffixed with the element type of the input vectors.  Functions may be 
+// are suffixed with the element type of the input vectors.  Functions may be
 // implemented in terms of generic vector operations such as +, *, -, etc. or
 // by calling a __builtin_-prefixed function which will be handled by clang's
 // CodeGen library.
 //
 // Additional validation code can be generated by this file when runHeader() is
-// called, rather than the normal run() entry point.
+// called, rather than the normal run() entry point.  A complete set of tests
+// for Neon intrinsics can be generated by calling the runTests() entry point.
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,11 +39,11 @@ static void ParseTypes(Record *r, std::string &s,
                        SmallVectorImpl<StringRef> &TV) {
   const char *data = s.data();
   int len = 0;
-  
+
   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
       continue;
-    
+
     switch (data[len]) {
       case 'c':
       case 's':
@@ -72,6 +73,8 @@ static char Widen(const char t) {
       return 'i';
     case 'i':
       return 'l';
+    case 'h':
+      return 'f';
     default: throw "unhandled type in widen!";
   }
   return '\0';
@@ -89,7 +92,7 @@ static char Narrow(const char t) {
       return 'i';
     case 'f':
       return 'h';
-    default: throw "unhandled type in widen!";
+    default: throw "unhandled type in narrow!";
   }
   return '\0';
 }
@@ -98,25 +101,25 @@ static char Narrow(const char t) {
 /// the quad-vector, polynomial, or unsigned modifiers set.
 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
   unsigned off = 0;
-  
+
   // remember quad.
   if (ty[off] == 'Q') {
     quad = true;
     ++off;
   }
-  
+
   // remember poly.
   if (ty[off] == 'P') {
     poly = true;
     ++off;
   }
-  
+
   // remember unsigned.
   if (ty[off] == 'U') {
     usgn = true;
     ++off;
   }
-  
+
   // base type to get the type string for.
   return ty[off];
 }
@@ -134,7 +137,12 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
       break;
     case 'u':
       usgn = true;
+      poly = false;
+      if (type == 'f')
+        type = 'i';
+      break;
     case 'x':
+      usgn = false;
       poly = false;
       if (type == 'f')
         type = 'i';
@@ -155,6 +163,10 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
     case 'n':
       type = Widen(type);
       break;
+    case 'i':
+      type = 'i';
+      scal = true;
+      break;
     case 'l':
       type = 'l';
       scal = true;
@@ -189,36 +201,31 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
 }
 
 /// TypeString - for a modifier and type, generate the name of the typedef for
-/// that type.  If generic is true, emit the generic vector type rather than
-/// the public NEON type. QUc -> uint8x8_t / __neon_uint8x8_t.
-static std::string TypeString(const char mod, StringRef typestr,
-                              bool generic = false) {
+/// that type.  QUc -> uint8x8_t.
+static std::string TypeString(const char mod, StringRef typestr) {
   bool quad = false;
   bool poly = false;
   bool usgn = false;
   bool scal = false;
   bool cnst = false;
   bool pntr = false;
-  
+
   if (mod == 'v')
     return "void";
   if (mod == 'i')
     return "int";
-  
+
   // base type to get the type string for.
   char type = ClassifyType(typestr, quad, poly, usgn);
-  
+
   // Based on the modifying character, change the type and width if necessary.
   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
-  
+
   SmallString<128> s;
-  
-  if (generic)
-    s += "__neon_";
-  
+
   if (usgn)
     s.push_back('u');
-  
+
   switch (type) {
     case 'c':
       s += poly ? "poly8" : "int8";
@@ -267,16 +274,16 @@ static std::string TypeString(const char mod, StringRef typestr,
     s += "x3";
   if (mod == '4')
     s += "x4";
-  
+
   // Append _t, finishing the type string typedef type.
   s += "_t";
-  
+
   if (cnst)
     s += " const";
-  
+
   if (pntr)
     s += " *";
-  
+
   return s.str();
 }
 
@@ -291,23 +298,25 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
   bool scal = false;
   bool cnst = false;
   bool pntr = false;
-  
+
   if (mod == 'v')
-    return "v";
+    return "v"; // void
   if (mod == 'i')
-    return "i";
-  
+    return "i"; // int
+
   // base type to get the type string for.
   char type = ClassifyType(typestr, quad, poly, usgn);
-  
+
   // Based on the modifying character, change the type and width if necessary.
   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
 
+  // All pointers are void* pointers.  Change type to 'v' now.
   if (pntr) {
     usgn = false;
     poly = false;
     type = 'v';
   }
+  // Treat half-float ('h') types as unsigned short ('s') types.
   if (type == 'h') {
     type = 's';
     usgn = true;
@@ -319,12 +328,14 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
 
     if (usgn)
       s.push_back('U');
-    
-    if (type == 'l')
+    else if (type == 'c')
+      s.push_back('S'); // make chars explicitly signed
+
+    if (type == 'l') // 64-bit long
       s += "LLi";
     else
       s.push_back(type);
- 
+
     if (cnst)
       s.push_back('C');
     if (pntr)
@@ -337,8 +348,8 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
   // fashion, storing them to a pointer arg.
   if (ret) {
-    if (mod == '2' || mod == '3' || mod == '4')
-      return "vv*";
+    if (mod >= '2' && mod <= '4')
+      return "vv*"; // void result with void* first argument
     if (mod == 'f' || (ck != ClassB && type == 'f'))
       return quad ? "V4f" : "V2f";
     if (ck != ClassB && type == 's')
@@ -347,17 +358,17 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
       return quad ? "V4i" : "V2i";
     if (ck != ClassB && type == 'l')
       return quad ? "V2LLi" : "V1LLi";
-    
-    return quad ? "V16c" : "V8c";
-  }    
+
+    return quad ? "V16Sc" : "V8Sc";
+  }
 
   // Non-return array types are passed as individual vectors.
   if (mod == '2')
-    return quad ? "V16cV16c" : "V8cV8c";
+    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
   if (mod == '3')
-    return quad ? "V16cV16cV16c" : "V8cV8cV8c";
+    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
   if (mod == '4')
-    return quad ? "V16cV16cV16cV16c" : "V8cV8cV8cV8c";
+    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
 
   if (mod == 'f' || (ck != ClassB && type == 'f'))
     return quad ? "V4f" : "V2f";
@@ -367,71 +378,25 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
     return quad ? "V4i" : "V2i";
   if (ck != ClassB && type == 'l')
     return quad ? "V2LLi" : "V1LLi";
-  
-  return quad ? "V16c" : "V8c";
-}
 
-/// StructTag - generate the name of the struct tag for a type.
-/// These names are mandated by ARM's ABI.
-static std::string StructTag(StringRef typestr) {
-  bool quad = false;
-  bool poly = false;
-  bool usgn = false;
-  
-  // base type to get the type string for.
-  char type = ClassifyType(typestr, quad, poly, usgn);
-  
-  SmallString<128> s;
-  s += "__simd";
-  s += quad ? "128_" : "64_";
-  if (usgn)
-    s.push_back('u');
-  
-  switch (type) {
-    case 'c':
-      s += poly ? "poly8" : "int8";
-      break;
-    case 's':
-      s += poly ? "poly16" : "int16";
-      break;
-    case 'i':
-      s += "int32";
-      break;
-    case 'l':
-      s += "int64";
-      break;
-    case 'h':
-      s += "float16";
-      break;
-    case 'f':
-      s += "float32";
-      break;
-    default:
-      throw "unhandled type!";
-      break;
-  }
-
-  // Append _t, finishing the struct tag name.
-  s += "_t";
-  
-  return s.str();
+  return quad ? "V16Sc" : "V8Sc";
 }
 
-/// MangleName - Append a type or width suffix to a base neon function name, 
+/// MangleName - Append a type or width suffix to a base neon function name,
 /// and insert a 'q' in the appropriate location if the operation works on
 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
 static std::string MangleName(const std::string &name, StringRef typestr,
                               ClassKind ck) {
   if (name == "vcvt_f32_f16")
     return name;
-  
+
   bool quad = false;
   bool poly = false;
   bool usgn = false;
   char type = ClassifyType(typestr, quad, poly, usgn);
 
   std::string s = name;
-  
+
   switch (type) {
   case 'c':
     switch (ck) {
@@ -487,8 +452,8 @@ static std::string MangleName(const std::string &name, StringRef typestr,
   }
   if (ck == ClassB)
     s += "_v";
-    
-  // Insert a 'q' before the first '_' character so that it ends up before 
+
+  // Insert a 'q' before the first '_' character so that it ends up before
   // _lane or _n on vector-scalar operations.
   if (quad) {
     size_t pos = s.find('_');
@@ -501,177 +466,344 @@ static std::string MangleName(const std::string &name, StringRef typestr,
 static std::string GenArgs(const std::string &proto, StringRef typestr) {
   bool define = proto.find('i') != std::string::npos;
   char arg = 'a';
-  
+
   std::string s;
   s += "(";
-  
+
   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
-    if (!define) {
-      s += TypeString(proto[i], typestr);
-      s.push_back(' ');
+    if (define) {
+      // Immediate macro arguments are used directly instead of being assigned
+      // to local temporaries; prepend an underscore prefix to make their
+      // names consistent with the local temporaries.
+      if (proto[i] == 'i')
+        s += "__";
+    } else {
+      s += TypeString(proto[i], typestr) + " __";
     }
     s.push_back(arg);
     if ((i + 1) < e)
       s += ", ";
   }
-  
+
   s += ")";
   return s;
 }
 
-static std::string Duplicate(unsigned nElts, StringRef typestr, 
+// Macro arguments are not type-checked like inline function arguments, so
+// assign them to local temporaries to get the right type checking.
+static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
+  char arg = 'a';
+  std::string s;
+
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    // Do not create a temporary for an immediate argument.
+    // That would defeat the whole point of using a macro!
+    if (proto[i] == 'i') continue;
+
+    s += TypeString(proto[i], typestr) + " __";
+    s.push_back(arg);
+    s += " = (";
+    s.push_back(arg);
+    s += "); ";
+  }
+
+  s += "\\\n  ";
+  return s;
+}
+
+// Use the vmovl builtin to sign-extend or zero-extend a vector.
+static std::string Extend(StringRef typestr, const std::string &a) {
+  std::string s;
+  s = MangleName("vmovl", typestr, ClassS);
+  s += "(" + a + ")";
+  return s;
+}
+
+static std::string Duplicate(unsigned nElts, StringRef typestr,
                              const std::string &a) {
   std::string s;
-  
-  s = "(__neon_" + TypeString('d', typestr) + "){ ";
+
+  s = "(" + TypeString('d', typestr) + "){ ";
   for (unsigned i = 0; i != nElts; ++i) {
     s += a;
     if ((i + 1) < nElts)
       s += ", ";
   }
   s += " }";
-  
+
   return s;
 }
 
-// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
-// If structTypes is true, the NEON types are structs of vector types rather
-// than vector types, and the call becomes "a.val + b.val"
-static std::string GenOpString(OpKind op, const std::string &proto,
-                               StringRef typestr, bool structTypes = true) {
-  bool dummy, quad = false;
+static std::string SplatLane(unsigned nElts, const std::string &vec,
+                             const std::string &lane) {
+  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
+  for (unsigned i = 0; i < nElts; ++i)
+    s += ", " + lane;
+  s += ")";
+  return s;
+}
+
+static unsigned GetNumElements(StringRef typestr, bool &quad) {
+  quad = false;
+  bool dummy = false;
   char type = ClassifyType(typestr, quad, dummy, dummy);
   unsigned nElts = 0;
   switch (type) {
-    case 'c': nElts = 8; break;
-    case 's': nElts = 4; break;
-    case 'i': nElts = 2; break;
-    case 'l': nElts = 1; break;
-    case 'h': nElts = 4; break;
-    case 'f': nElts = 2; break;
+  case 'c': nElts = 8; break;
+  case 's': nElts = 4; break;
+  case 'i': nElts = 2; break;
+  case 'l': nElts = 1; break;
+  case 'h': nElts = 4; break;
+  case 'f': nElts = 2; break;
+  default:
+    throw "unhandled type!";
+    break;
   }
-  
+  if (quad) nElts <<= 1;
+  return nElts;
+}
+
+// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
+static std::string GenOpString(OpKind op, const std::string &proto,
+                               StringRef typestr) {
+  bool quad;
+  unsigned nElts = GetNumElements(typestr, quad);
+
+  // If this builtin takes an immediate argument, we need to #define it rather
+  // than use a standard declaration, so that SemaChecking can range check
+  // the immediate passed by the user.
+  bool define = proto.find('i') != std::string::npos;
+
   std::string ts = TypeString(proto[0], typestr);
-  std::string s = ts + " r; r";
-  
-  if (structTypes)
-    s += ".val";
-  
-  s += " = ";
-
-  std::string a, b, c;
-  if (proto.size() > 1)
-    a = (structTypes && proto[1] != 'l' && proto[1] != 's') ? "a.val" : "a";
-  b = structTypes ? "b.val" : "b";
-  c = structTypes ? "c.val" : "c";
-  
+  std::string s;
+  if (!define) {
+    s = "return ";
+  }
+
   switch(op) {
   case OpAdd:
-    s += a + " + " + b;
+    s += "__a + __b;";
+    break;
+  case OpAddl:
+    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
+    break;
+  case OpAddw:
+    s += "__a + " + Extend(typestr, "__b") + ";";
     break;
   case OpSub:
-    s += a + " - " + b;
+    s += "__a - __b;";
+    break;
+  case OpSubl:
+    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
+    break;
+  case OpSubw:
+    s += "__a - " + Extend(typestr, "__b") + ";";
     break;
   case OpMulN:
-    b = Duplicate(nElts << (int)quad, typestr, "b");
+    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
+    break;
+  case OpMulLane:
+    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
+    break;
   case OpMul:
-    s += a + " * " + b;
+    s += "__a * __b;";
+    break;
+  case OpMullN:
+    s += Extend(typestr, "__a") + " * " +
+      Extend(typestr, Duplicate(nElts << (int)quad, typestr, "__b")) + ";";
+    break;
+  case OpMullLane:
+    s += Extend(typestr, "__a") + " * " +
+      Extend(typestr, SplatLane(nElts, "__b", "__c")) + ";";
+    break;
+  case OpMull:
+    s += Extend(typestr, "__a") + " * " + Extend(typestr, "__b") + ";";
     break;
   case OpMlaN:
-    c = Duplicate(nElts << (int)quad, typestr, "c");
+    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
+    break;
+  case OpMlaLane:
+    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
+    break;
   case OpMla:
-    s += a + " + ( " + b + " * " + c + " )";
+    s += "__a + (__b * __c);";
+    break;
+  case OpMlalN:
+    s += "__a + (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, Duplicate(nElts, typestr, "__c")) + ");";
+    break;
+  case OpMlalLane:
+    s += "__a + (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, SplatLane(nElts, "__c", "__d")) + ");";
+    break;
+  case OpMlal:
+    s += "__a + (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, "__c") + ");";
     break;
   case OpMlsN:
-    c = Duplicate(nElts << (int)quad, typestr, "c");
+    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
+    break;
+  case OpMlsLane:
+    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
+    break;
   case OpMls:
-    s += a + " - ( " + b + " * " + c + " )";
+    s += "__a - (__b * __c);";
+    break;
+  case OpMlslN:
+    s += "__a - (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, Duplicate(nElts, typestr, "__c")) + ");";
+    break;
+  case OpMlslLane:
+    s += "__a - (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, SplatLane(nElts, "__c", "__d")) + ");";
+    break;
+  case OpMlsl:
+    s += "__a - (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, "__c") + ");";
+    break;
+  case OpQDMullLane:
+    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
+    break;
+  case OpQDMlalLane:
+    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
+      SplatLane(nElts, "__c", "__d") + ");";
+    break;
+  case OpQDMlslLane:
+    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
+      SplatLane(nElts, "__c", "__d") + ");";
+    break;
+  case OpQDMulhLane:
+    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
+    break;
+  case OpQRDMulhLane:
+    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
     break;
   case OpEq:
-    s += "(__neon_" + ts + ")(" + a + " == " + b + ")";
+    s += "(" + ts + ")(__a == __b);";
     break;
   case OpGe:
-    s += "(__neon_" + ts + ")(" + a + " >= " + b + ")";
+    s += "(" + ts + ")(__a >= __b);";
     break;
   case OpLe:
-    s += "(__neon_" + ts + ")(" + a + " <= " + b + ")";
+    s += "(" + ts + ")(__a <= __b);";
     break;
   case OpGt:
-    s += "(__neon_" + ts + ")(" + a + " > " + b + ")";
+    s += "(" + ts + ")(__a > __b);";
     break;
   case OpLt:
-    s += "(__neon_" + ts + ")(" + a + " < " + b + ")";
+    s += "(" + ts + ")(__a < __b);";
     break;
   case OpNeg:
-    s += " -" + a;
+    s += " -__a;";
     break;
   case OpNot:
-    s += " ~" + a;
+    s += " ~__a;";
     break;
   case OpAnd:
-    s += a + " & " + b;
+    s += "__a & __b;";
     break;
   case OpOr:
-    s += a + " | " + b;
+    s += "__a | __b;";
     break;
   case OpXor:
-    s += a + " ^ " + b;
+    s += "__a ^ __b;";
     break;
   case OpAndNot:
-    s += a + " & ~" + b;
+    s += "__a & ~__b;";
     break;
   case OpOrNot:
-    s += a + " | ~" + b;
+    s += "__a | ~__b;";
     break;
   case OpCast:
-    s += "(__neon_" + ts + ")" + a;
+    s += "(" + ts + ")__a;";
     break;
   case OpConcat:
-    s += "__builtin_shufflevector((__neon_int64x1_t)" + a;
-    s += ", (__neon_int64x1_t)" + b + ", 0, 1)";
+    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
+    s += ", (int64x1_t)__b, 0, 1);";
     break;
   case OpHi:
-    s += "(__neon_int64x1_t)(((__neon_int64x2_t)" + a + ")[1])";
+    s += "(" + ts +
+      ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
     break;
   case OpLo:
-    s += "(__neon_int64x1_t)(((__neon_int64x2_t)" + a + ")[0])";
+    s += "(" + ts +
+      ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
     break;
   case OpDup:
-    s += Duplicate(nElts << (int)quad, typestr, a);
+    s += Duplicate(nElts, typestr, "__a") + ";";
+    break;
+  case OpDupLane:
+    s += SplatLane(nElts, "__a", "__b") + ";";
     break;
   case OpSelect:
     // ((0 & 1) | (~0 & 2))
+    s += "(" + ts + ")";
     ts = TypeString(proto[1], typestr);
-    s += "( " + a + " & (__neon_" + ts + ")" + b + ") | ";
-    s += "(~" + a + " & (__neon_" + ts + ")" + c + ")";
+    s += "((__a & (" + ts + ")__b) | ";
+    s += "(~__a & (" + ts + ")__c));";
     break;
   case OpRev16:
-    s += "__builtin_shufflevector(" + a + ", " + a;
-    for (unsigned i = 2; i <= nElts << (int)quad; i += 2)
+    s += "__builtin_shufflevector(__a, __a";
+    for (unsigned i = 2; i <= nElts; i += 2)
       for (unsigned j = 0; j != 2; ++j)
         s += ", " + utostr(i - j - 1);
-    s += ")";
+    s += ");";
     break;
-  case OpRev32:
-    nElts >>= 1;
-    s += "__builtin_shufflevector(" + a + ", " + a;
-    for (unsigned i = nElts; i <= nElts << (1 + (int)quad); i += nElts)
-      for (unsigned j = 0; j != nElts; ++j)
+  case OpRev32: {
+    unsigned WordElts = nElts >> (1 + (int)quad);
+    s += "__builtin_shufflevector(__a, __a";
+    for (unsigned i = WordElts; i <= nElts; i += WordElts)
+      for (unsigned j = 0; j != WordElts; ++j)
         s += ", " + utostr(i - j - 1);
-    s += ")";
+    s += ");";
     break;
-  case OpRev64:
-    s += "__builtin_shufflevector(" + a + ", " + a;
-    for (unsigned i = nElts; i <= nElts << (int)quad; i += nElts)
-      for (unsigned j = 0; j != nElts; ++j)
+  }
+  case OpRev64: {
+    unsigned DblWordElts = nElts >> (int)quad;
+    s += "__builtin_shufflevector(__a, __a";
+    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
+      for (unsigned j = 0; j != DblWordElts; ++j)
         s += ", " + utostr(i - j - 1);
-    s += ")";
+    s += ");";
+    break;
+  }
+  case OpAbdl: {
+    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
+    if (typestr[0] != 'U') {
+      // vabd results are always unsigned and must be zero-extended.
+      std::string utype = "U" + typestr.str();
+      s += "(" + TypeString(proto[0], typestr) + ")";
+      abd = "(" + TypeString('d', utype) + ")" + abd;
+      s += Extend(utype, abd) + ";";
+    } else {
+      s += Extend(typestr, abd) + ";";
+    }
+    break;
+  }
+  case OpAba:
+    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
     break;
+  case OpAbal: {
+    s += "__a + ";
+    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
+    if (typestr[0] != 'U') {
+      // vabd results are always unsigned and must be zero-extended.
+      std::string utype = "U" + typestr.str();
+      s += "(" + TypeString(proto[0], typestr) + ")";
+      abd = "(" + TypeString('d', utype) + ")" + abd;
+      s += Extend(utype, abd) + ";";
+    } else {
+      s += Extend(typestr, abd) + ";";
+    }
+    break;
+  }
   default:
     throw "unknown OpKind!";
     break;
   }
-  s += "; return r;";
   return s;
 }
 
@@ -688,10 +820,10 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
   bool scal = false;
   bool cnst = false;
   bool pntr = false;
-  
+
   // Base type to get the type string for.
   char type = ClassifyType(typestr, quad, poly, usgn);
-  
+
   // Based on the modifying character, change the type and width if necessary.
   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
 
@@ -699,9 +831,9 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
     ret |= 0x08;
   if (quad && proto[1] != 'g')
     ret |= 0x10;
-  
+
   switch (type) {
-    case 'c': 
+    case 'c':
       ret |= poly ? 5 : 0;
       break;
     case 's':
@@ -727,65 +859,45 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
 }
 
 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
-// If structTypes is true, the NEON types are structs of vector types rather
-// than vector types, and the call becomes __builtin_neon_cls(a.val)
 static std::string GenBuiltin(const std::string &name, const std::string &proto,
-                              StringRef typestr, ClassKind ck,
-                              bool structTypes = true) {
-  bool dummy, quad = false;
-  char type = ClassifyType(typestr, quad, dummy, dummy);
-  unsigned nElts = 0;
-  switch (type) {
-    case 'c': nElts = 8; break;
-    case 's': nElts = 4; break;
-    case 'i': nElts = 2; break;
-    case 'l': nElts = 1; break;
-    case 'h': nElts = 4; break;
-    case 'f': nElts = 2; break;
-  }
-  if (quad) nElts <<= 1;
-
-  char arg = 'a';
+                              StringRef typestr, ClassKind ck) {
   std::string s;
 
   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   // sret-like argument.
-  bool sret = (proto[0] == '2' || proto[0] == '3' || proto[0] == '4');
+  bool sret = (proto[0] >= '2' && proto[0] <= '4');
 
   // If this builtin takes an immediate argument, we need to #define it rather
   // than use a standard declaration, so that SemaChecking can range check
   // the immediate passed by the user.
   bool define = proto.find('i') != std::string::npos;
 
-  // If all types are the same size, bitcasting the args will take care 
-  // of arg checking.  The actual signedness etc. will be taken care of with
-  // special enums.
+  // Check if the prototype has a scalar operand with the type of the vector
+  // elements.  If not, bitcasting the args will take care of arg checking.
+  // The actual signedness etc. will be taken care of with special enums.
   if (proto.find('s') == std::string::npos)
     ck = ClassB;
 
   if (proto[0] != 'v') {
     std::string ts = TypeString(proto[0], typestr);
-    
+
     if (define) {
       if (sret)
-        s += "({ " + ts + " r; ";
-      else if (proto[0] != 's')
-        s += "(" + ts + "){(__neon_" + ts + ")";
+        s += ts + " r; ";
+      else
+        s += "(" + ts + ")";
     } else if (sret) {
       s += ts + " r; ";
     } else {
-      s += ts + " r; r";
-      if (structTypes && proto[0] != 's' && proto[0] != 'i' && proto[0] != 'l')
-        s += ".val";
-      
-      s += " = ";
+      s += "return (" + ts + ")";
     }
   }
-  
+
   bool splat = proto.find('a') != std::string::npos;
-  
+
   s += "__builtin_neon_";
   if (splat) {
+    // Call the non-splat builtin: chop off the "_n" suffix from the name.
     std::string vname(name, 0, name.size()-2);
     s += MangleName(vname, typestr, ck);
   } else {
@@ -797,17 +909,32 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
   // builtins.
   if (sret)
     s += "&r, ";
-  
+
+  char arg = 'a';
   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
     std::string args = std::string(&arg, 1);
-    if (define)
-      args = "(" + args + ")";
-    
+
+    // Use the local temporaries instead of the macro arguments.
+    args = "__" + args;
+
+    bool argQuad = false;
+    bool argPoly = false;
+    bool argUsgn = false;
+    bool argScalar = false;
+    bool dummy = false;
+    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
+    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
+                      dummy, dummy);
+
     // Handle multiple-vector values specially, emitting each subvector as an
     // argument to the __builtin.
-    if (structTypes && (proto[i] == '2' || proto[i] == '3' || proto[i] == '4')){
+    if (proto[i] >= '2' && proto[i] <= '4') {
+      // Check if an explicit cast is needed.
+      if (argType != 'c' || argPoly || argUsgn)
+        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
+
       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
-        s += args + ".val[" + utostr(vi) + "].val";
+        s += args + ".val[" + utostr(vi) + "]";
         if ((vi + 1) < ve)
           s += ", ";
       }
@@ -816,77 +943,158 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
 
       continue;
     }
-    
-    if (splat && (i + 1) == e) 
-      s += Duplicate(nElts, typestr, args);
-    else
-      s += args;
-    
-    if (structTypes && proto[i] != 's' && proto[i] != 'i' && proto[i] != 'l' &&
-        proto[i] != 'p' && proto[i] != 'c' && proto[i] != 'a') {
-      s += ".val";
+
+    if (splat && (i + 1) == e)
+      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
+
+    // Check if an explicit cast is needed.
+    if ((splat || !argScalar) &&
+        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
+      std::string argTypeStr = "c";
+      if (ck != ClassB)
+        argTypeStr = argType;
+      if (argQuad)
+        argTypeStr = "Q" + argTypeStr;
+      args = "(" + TypeString('d', argTypeStr) + ")" + args;
     }
+
+    s += args;
     if ((i + 1) < e)
       s += ", ";
   }
-  
+
   // Extra constant integer to hold type class enum for this function, e.g. s8
   if (ck == ClassB)
     s += ", " + utostr(GetNeonEnum(proto, typestr));
-  
-  if (define)
-    s += ")";
-  else
-    s += ");";
 
-  if (proto[0] != 'v') {
-    if (define) {
-      if (sret)
-        s += "; r; })";
-      else if (proto[0] != 's')
-        s += "}";
-    } else {
+  s += ");";
+
+  if (proto[0] != 'v' && sret) {
+    if (define)
+      s += " r;";
+    else
       s += " return r;";
-    }
   }
   return s;
 }
 
-static std::string GenBuiltinDef(const std::string &name, 
+static std::string GenBuiltinDef(const std::string &name,
                                  const std::string &proto,
                                  StringRef typestr, ClassKind ck) {
   std::string s("BUILTIN(__builtin_neon_");
 
-  // If all types are the same size, bitcasting the args will take care 
+  // If all types are the same size, bitcasting the args will take care
   // of arg checking.  The actual signedness etc. will be taken care of with
   // special enums.
   if (proto.find('s') == std::string::npos)
     ck = ClassB;
-  
+
   s += MangleName(name, typestr, ck);
   s += ", \"";
-  
+
   for (unsigned i = 0, e = proto.size(); i != e; ++i)
     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
 
   // Extra constant integer to hold type class enum for this function, e.g. s8
   if (ck == ClassB)
     s += "i";
-  
+
   s += "\", \"n\")";
   return s;
 }
 
+static std::string GenIntrinsic(const std::string &name,
+                                const std::string &proto,
+                                StringRef outTypeStr, StringRef inTypeStr,
+                                OpKind kind, ClassKind classKind) {
+  assert(!proto.empty() && "");
+  bool define = proto.find('i') != std::string::npos;
+  std::string s;
+
+  // static always inline + return type
+  if (define)
+    s += "#define ";
+  else
+    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
+
+  // Function name with type suffix
+  std::string mangledName = MangleName(name, outTypeStr, ClassS);
+  if (outTypeStr != inTypeStr) {
+    // If the input type is different (e.g., for vreinterpret), append a suffix
+    // for the input type.  String off a "Q" (quad) prefix so that MangleName
+    // does not insert another "q" in the name.
+    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
+    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
+    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
+  }
+  s += mangledName;
+
+  // Function arguments
+  s += GenArgs(proto, inTypeStr);
+
+  // Definition.
+  if (define) {
+    s += " __extension__ ({ \\\n  ";
+    s += GenMacroLocals(proto, inTypeStr);
+  } else {
+    s += " { \\\n  ";
+  }
+
+  if (kind != OpNone)
+    s += GenOpString(kind, proto, outTypeStr);
+  else
+    s += GenBuiltin(name, proto, outTypeStr, classKind);
+  if (define)
+    s += " })";
+  else
+    s += " }";
+  s += "\n";
+  return s;
+}
+
 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
 /// is comprised of type definitions and function declarations.
 void NeonEmitter::run(raw_ostream &OS) {
-  EmitSourceFileHeader("ARM NEON Header", OS);
-  
-  // FIXME: emit license into file?
-  
+  OS << 
+    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
+    "---===\n"
+    " *\n"
+    " * Permission is hereby granted, free of charge, to any person obtaining "
+    "a copy\n"
+    " * of this software and associated documentation files (the \"Software\"),"
+    " to deal\n"
+    " * in the Software without restriction, including without limitation the "
+    "rights\n"
+    " * to use, copy, modify, merge, publish, distribute, sublicense, "
+    "and/or sell\n"
+    " * copies of the Software, and to permit persons to whom the Software is\n"
+    " * furnished to do so, subject to the following conditions:\n"
+    " *\n"
+    " * The above copyright notice and this permission notice shall be "
+    "included in\n"
+    " * all copies or substantial portions of the Software.\n"
+    " *\n"
+    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
+    "EXPRESS OR\n"
+    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
+    "MERCHANTABILITY,\n"
+    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
+    "SHALL THE\n"
+    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
+    "OTHER\n"
+    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
+    "ARISING FROM,\n"
+    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
+    "DEALINGS IN\n"
+    " * THE SOFTWARE.\n"
+    " *\n"
+    " *===--------------------------------------------------------------------"
+    "---===\n"
+    " */\n\n";
+
   OS << "#ifndef __ARM_NEON_H\n";
   OS << "#define __ARM_NEON_H\n\n";
-  
+
   OS << "#ifndef __ARM_NEON__\n";
   OS << "#error \"NEON support not enabled\"\n";
   OS << "#endif\n\n";
@@ -895,8 +1103,8 @@ void NeonEmitter::run(raw_ostream &OS) {
 
   // Emit NEON-specific scalar typedefs.
   OS << "typedef float float32_t;\n";
-  OS << "typedef uint8_t poly8_t;\n";
-  OS << "typedef uint16_t poly16_t;\n";
+  OS << "typedef int8_t poly8_t;\n";
+  OS << "typedef int16_t poly16_t;\n";
   OS << "typedef uint16_t float16_t;\n";
 
   // Emit Neon vector typedefs.
@@ -905,105 +1113,105 @@ void NeonEmitter::run(raw_ostream &OS) {
   ParseTypes(0, TypedefTypes, TDTypeVec);
 
   // Emit vector typedefs.
-  for (unsigned v = 1; v != 5; ++v) {
-    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
-      bool dummy, quad = false;
-      (void) ClassifyType(TDTypeVec[i], quad, dummy, dummy);
-      OS << "typedef __attribute__(( __vector_size__(";
-      
-      OS << utostr(8*v*(quad ? 2 : 1)) << ") )) ";
-      if (!quad)
-        OS << " ";
-      
-      OS << TypeString('s', TDTypeVec[i]);
-      OS << " __neon_";
-      
-      char t = (v == 1) ? 'd' : '0' + v;
-      OS << TypeString(t, TDTypeVec[i]) << ";\n";
-    }
+  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
+    bool dummy, quad = false, poly = false;
+    (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
+    if (poly)
+      OS << "typedef __attribute__((neon_polyvector_type(";
+    else
+      OS << "typedef __attribute__((neon_vector_type(";
+
+    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
+    OS << utostr(nElts) << "))) ";
+    if (nElts < 10)
+      OS << " ";
+
+    OS << TypeString('s', TDTypeVec[i]);
+    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
   }
   OS << "\n";
 
   // Emit struct typedefs.
-  for (unsigned vi = 1; vi != 5; ++vi) {
+  for (unsigned vi = 2; vi != 5; ++vi) {
     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
-      std::string ts = TypeString('d', TDTypeVec[i], vi == 1);
-      std::string vs = TypeString((vi > 1) ? '0' + vi : 'd', TDTypeVec[i]);
-      std::string tag = (vi > 1) ? vs : StructTag(TDTypeVec[i]);
-      OS << "typedef struct " << tag << " {\n";
+      std::string ts = TypeString('d', TDTypeVec[i]);
+      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
+      OS << "typedef struct " << vs << " {\n";
       OS << "  " << ts << " val";
-      if (vi > 1)
-        OS << "[" << utostr(vi) << "]";
-      OS << ";\n} " << vs << ";\n\n";
+      OS << "[" << utostr(vi) << "]";
+      OS << ";\n} ";
+      OS << vs << ";\n\n";
     }
   }
-  
+
   OS << "#define __ai static __attribute__((__always_inline__))\n\n";
 
   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
-  
-  // Unique the return+pattern types, and assign them.
+
+  // Emit vmovl and vabd intrinsics first so they can be used by other
+  // intrinsics.  (Some of the saturating multiply instructions are also
+  // used to implement the corresponding "_lane" variants, but tablegen
+  // sorts the records into alphabetical order so that the "_lane" variants
+  // come after the intrinsics they use.)
+  emitIntrinsic(OS, Records.getDef("VMOVL"));
+  emitIntrinsic(OS, Records.getDef("VABD"));
+
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
     Record *R = RV[i];
-    std::string name = LowercaseString(R->getName());
-    std::string Proto = R->getValueAsString("Prototype");
-    std::string Types = R->getValueAsString("Types");
-    
-    SmallVector<StringRef, 16> TypeVec;
-    ParseTypes(R, Types, TypeVec);
-    
-    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
-    
-    bool define = Proto.find('i') != std::string::npos;
-    
-    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
-      assert(!Proto.empty() && "");
-      
-      // static always inline + return type
-      if (define)
-        OS << "#define";
-      else
-        OS << "__ai " << TypeString(Proto[0], TypeVec[ti]);
-      
-      // Function name with type suffix
-      OS << " " << MangleName(name, TypeVec[ti], ClassS);
-      
-      // Function arguments
-      OS << GenArgs(Proto, TypeVec[ti]);
-      
-      // Definition.
-      if (define)
-        OS << " ";
-      else
-        OS << " { ";
-      
-      if (k != OpNone) {
-        OS << GenOpString(k, Proto, TypeVec[ti]);
-      } else {
-        if (R->getSuperClasses().size() < 2)
-          throw TGError(R->getLoc(), "Builtin has no class kind");
-        
-        ClassKind ck = ClassMap[R->getSuperClasses()[1]];
-
-        if (ck == ClassNone)
-          throw TGError(R->getLoc(), "Builtin has no class kind");
-        OS << GenBuiltin(name, Proto, TypeVec[ti], ck);
-      }
-      if (!define)
-        OS << " }";
-      OS << "\n";
-    }
-    OS << "\n";
+    if (R->getName() != "VMOVL" && R->getName() != "VABD")
+      emitIntrinsic(OS, R);
   }
+
   OS << "#undef __ai\n\n";
   OS << "#endif /* __ARM_NEON_H */\n";
 }
 
-static unsigned RangeFromType(StringRef typestr) {
+/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
+/// intrinsics specified by record R.
+void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
+  std::string name = R->getValueAsString("Name");
+  std::string Proto = R->getValueAsString("Prototype");
+  std::string Types = R->getValueAsString("Types");
+
+  SmallVector<StringRef, 16> TypeVec;
+  ParseTypes(R, Types, TypeVec);
+
+  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
+
+  ClassKind classKind = ClassNone;
+  if (R->getSuperClasses().size() >= 2)
+    classKind = ClassMap[R->getSuperClasses()[1]];
+  if (classKind == ClassNone && kind == OpNone)
+    throw TGError(R->getLoc(), "Builtin has no class kind");
+
+  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+    if (kind == OpReinterpret) {
+      bool outQuad = false;
+      bool dummy = false;
+      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
+      for (unsigned srcti = 0, srcte = TypeVec.size();
+           srcti != srcte; ++srcti) {
+        bool inQuad = false;
+        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
+        if (srcti == ti || inQuad != outQuad)
+          continue;
+        OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
+                           OpCast, ClassS);
+      }
+    } else {
+      OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
+                         kind, classKind);
+    }
+  }
+  OS << "\n";
+}
+
+static unsigned RangeFromType(const char mod, StringRef typestr) {
   // base type to get the type string for.
   bool quad = false, dummy = false;
   char type = ClassifyType(typestr, quad, dummy, dummy);
-  
+  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
+
   switch (type) {
     case 'c':
       return (8 << (int)quad) - 1;
@@ -1031,7 +1239,7 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
 
   StringMap<OpKind> EmittedMap;
-  
+
   // Generate BuiltinsARM.def for NEON
   OS << "#ifdef GET_NEON_BUILTINS\n";
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
@@ -1041,22 +1249,22 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
       continue;
 
     std::string Proto = R->getValueAsString("Prototype");
-    
+
     // Functions with 'a' (the splat code) in the type prototype should not get
     // their own builtin as they use the non-splat variant.
     if (Proto.find('a') != std::string::npos)
       continue;
-    
+
     std::string Types = R->getValueAsString("Types");
     SmallVector<StringRef, 16> TypeVec;
     ParseTypes(R, Types, TypeVec);
-    
+
     if (R->getSuperClasses().size() < 2)
       throw TGError(R->getLoc(), "Builtin has no class kind");
-    
-    std::string name = LowercaseString(R->getName());
+
+    std::string name = R->getValueAsString("Name");
     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
-    
+
     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
       // Generate the BuiltinsARM.def declaration for this builtin, ensuring
       // that each unique BUILTIN() macro appears only once in the output
@@ -1064,13 +1272,13 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
       if (EmittedMap.count(bd))
         continue;
-      
+
       EmittedMap[bd] = OpNone;
       OS << bd << "\n";
     }
   }
   OS << "#endif\n\n";
-  
+
   // Generate the overloaded type checking code for SemaChecking.cpp
   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
@@ -1078,34 +1286,34 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
     if (k != OpNone)
       continue;
-    
+
     std::string Proto = R->getValueAsString("Prototype");
     std::string Types = R->getValueAsString("Types");
-    std::string name = LowercaseString(R->getName());
-    
+    std::string name = R->getValueAsString("Name");
+
     // Functions with 'a' (the splat code) in the type prototype should not get
     // their own builtin as they use the non-splat variant.
     if (Proto.find('a') != std::string::npos)
       continue;
-    
+
     // Functions which have a scalar argument cannot be overloaded, no need to
     // check them if we are emitting the type checking code.
     if (Proto.find('s') != std::string::npos)
       continue;
-    
+
     SmallVector<StringRef, 16> TypeVec;
     ParseTypes(R, Types, TypeVec);
-    
+
     if (R->getSuperClasses().size() < 2)
       throw TGError(R->getLoc(), "Builtin has no class kind");
-    
+
     int si = -1, qi = -1;
     unsigned mask = 0, qmask = 0;
     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
       // Generate the switch case(s) for this builtin for the type validation.
       bool quad = false, poly = false, usgn = false;
       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
-      
+
       if (quad) {
         qi = ti;
         qmask |= 1 << GetNeonEnum(Proto, TypeVec[ti]);
@@ -1115,64 +1323,67 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
       }
     }
     if (mask)
-      OS << "case ARM::BI__builtin_neon_" 
-      << MangleName(name, TypeVec[si], ClassB)
-      << ": mask = " << "0x" << utohexstr(mask) << "; break;\n";
+      OS << "case ARM::BI__builtin_neon_"
+         << MangleName(name, TypeVec[si], ClassB)
+         << ": mask = " << "0x" << utohexstr(mask) << "; break;\n";
     if (qmask)
-      OS << "case ARM::BI__builtin_neon_" 
-      << MangleName(name, TypeVec[qi], ClassB)
-      << ": mask = " << "0x" << utohexstr(qmask) << "; break;\n";
+      OS << "case ARM::BI__builtin_neon_"
+         << MangleName(name, TypeVec[qi], ClassB)
+         << ": mask = " << "0x" << utohexstr(qmask) << "; break;\n";
   }
   OS << "#endif\n\n";
-  
+
   // Generate the intrinsic range checking code for shift/lane immediates.
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
     Record *R = RV[i];
-    
+
     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
     if (k != OpNone)
       continue;
-    
-    std::string name = LowercaseString(R->getName());
+
+    std::string name = R->getValueAsString("Name");
     std::string Proto = R->getValueAsString("Prototype");
     std::string Types = R->getValueAsString("Types");
-    
+
     // Functions with 'a' (the splat code) in the type prototype should not get
     // their own builtin as they use the non-splat variant.
     if (Proto.find('a') != std::string::npos)
       continue;
-    
+
     // Functions which do not have an immediate do not need to have range
     // checking code emitted.
-    if (Proto.find('i') == std::string::npos)
+    size_t immPos = Proto.find('i');
+    if (immPos == std::string::npos)
       continue;
-    
+
     SmallVector<StringRef, 16> TypeVec;
     ParseTypes(R, Types, TypeVec);
-    
+
     if (R->getSuperClasses().size() < 2)
       throw TGError(R->getLoc(), "Builtin has no class kind");
-    
+
     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
-    
+
     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
       std::string namestr, shiftstr, rangestr;
-      
+
       // Builtins which are overloaded by type will need to have their upper
       // bound computed at Sema time based on the type constant.
       if (Proto.find('s') == std::string::npos) {
         ck = ClassB;
         if (R->getValueAsBit("isShift")) {
           shiftstr = ", true";
-          
+
           // Right shifts have an 'r' in the name, left shifts do not.
           if (name.find('r') != std::string::npos)
             rangestr = "l = 1; ";
         }
         rangestr += "u = RFT(TV" + shiftstr + ")";
       } else {
-        rangestr = "u = " + utostr(RangeFromType(TypeVec[ti]));
+        // The immediate generally refers to a lane in the preceding argument.
+        assert(immPos > 0 && "unexpected immediate operand");
+        rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
       }
       // Make sure cases appear only once by uniquing them in a string map.
       namestr = MangleName(name, TypeVec[ti], ck);
@@ -1182,13 +1393,13 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
 
       // Calculate the index of the immediate that should be range checked.
       unsigned immidx = 0;
-      
+
       // Builtins that return a struct of multiple vectors have an extra
       // leading arg for the struct return.
-      if (Proto[0] == '2' || Proto[0] == '3' || Proto[0] == '4')
+      if (Proto[0] >= '2' && Proto[0] <= '4')
         ++immidx;
-      
-      // Add one to the index for each argument until we reach the immediate 
+
+      // Add one to the index for each argument until we reach the immediate
       // to be checked.  Structs of vectors are passed as multiple arguments.
       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
         switch (Proto[ii]) {
@@ -1199,9 +1410,113 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
           case 'i': ie = ii + 1; break;
         }
       }
-      OS << "case ARM::BI__builtin_neon_"  << MangleName(name, TypeVec[ti], ck)
+      OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
          << ": i = " << immidx << "; " << rangestr << "; break;\n";
     }
   }
   OS << "#endif\n\n";
 }
+
+/// GenTest - Write out a test for the intrinsic specified by the name and
+/// type strings, including the embedded patterns for FileCheck to match.
+static std::string GenTest(const std::string &name,
+                           const std::string &proto,
+                           StringRef outTypeStr, StringRef inTypeStr,
+                           bool isShift) {
+  assert(!proto.empty() && "");
+  std::string s;
+
+  // Function name with type suffix
+  std::string mangledName = MangleName(name, outTypeStr, ClassS);
+  if (outTypeStr != inTypeStr) {
+    // If the input type is different (e.g., for vreinterpret), append a suffix
+    // for the input type.  String off a "Q" (quad) prefix so that MangleName
+    // does not insert another "q" in the name.
+    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
+    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
+    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
+  }
+
+  // Emit the FileCheck patterns.
+  s += "// CHECK: test_" + mangledName + "\n";
+  // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
+
+  // Emit the start of the test function.
+  s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
+  char arg = 'a';
+  std::string comma;
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    // Do not create arguments for values that must be immediate constants.
+    if (proto[i] == 'i')
+      continue;
+    s += comma + TypeString(proto[i], inTypeStr) + " ";
+    s.push_back(arg);
+    comma = ", ";
+  }
+  s += ") { \\\n  ";
+
+  if (proto[0] != 'v')
+    s += "return ";
+  s += mangledName + "(";
+  arg = 'a';
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    if (proto[i] == 'i') {
+      // For immediate operands, test the maximum value.
+      if (isShift)
+        s += "1"; // FIXME
+      else
+        // The immediate generally refers to a lane in the preceding argument.
+        s += utostr(RangeFromType(proto[i-1], inTypeStr));
+    } else {
+      s.push_back(arg);
+    }
+    if ((i + 1) < e)
+      s += ", ";
+  }
+  s += ");\n}\n\n";
+  return s;
+}
+
+/// runTests - Write out a complete set of tests for all of the Neon
+/// intrinsics.
+void NeonEmitter::runTests(raw_ostream &OS) {
+  OS <<
+    "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
+    "// RUN:  -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
+    "\n"
+    "#include <arm_neon.h>\n"
+    "\n";
+
+  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+    Record *R = RV[i];
+    std::string name = R->getValueAsString("Name");
+    std::string Proto = R->getValueAsString("Prototype");
+    std::string Types = R->getValueAsString("Types");
+    bool isShift = R->getValueAsBit("isShift");
+
+    SmallVector<StringRef, 16> TypeVec;
+    ParseTypes(R, Types, TypeVec);
+
+    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
+    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+      if (kind == OpReinterpret) {
+        bool outQuad = false;
+        bool dummy = false;
+        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
+        for (unsigned srcti = 0, srcte = TypeVec.size();
+             srcti != srcte; ++srcti) {
+          bool inQuad = false;
+          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
+          if (srcti == ti || inQuad != outQuad)
+            continue;
+          OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
+        }
+      } else {
+        OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
+      }
+    }
+    OS << "\n";
+  }
+}
+
diff --git a/utils/TableGen/NeonEmitter.h b/utils/TableGen/NeonEmitter.h
index 6c6760d732fa..1e6fcbf555df 100644
--- a/utils/TableGen/NeonEmitter.h
+++ b/utils/TableGen/NeonEmitter.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This tablegen backend is responsible for emitting arm_neon.h, which includes
-// a declaration and definition of each function specified by the ARM NEON 
+// a declaration and definition of each function specified by the ARM NEON
 // compiler interface.  See ARM document DUI0348B.
 //
 //===----------------------------------------------------------------------===//
@@ -24,13 +24,34 @@
 enum OpKind {
   OpNone,
   OpAdd,
+  OpAddl,
+  OpAddw,
   OpSub,
+  OpSubl,
+  OpSubw,
   OpMul,
+  OpMull,
   OpMla,
+  OpMlal,
   OpMls,
+  OpMlsl,
   OpMulN,
+  OpMullN,
   OpMlaN,
   OpMlsN,
+  OpMlalN,
+  OpMlslN,
+  OpMulLane,
+  OpMullLane,
+  OpMlaLane,
+  OpMlsLane,
+  OpMlalLane,
+  OpMlslLane,
+  OpQDMullLane,
+  OpQDMlalLane,
+  OpQDMlslLane,
+  OpQDMulhLane,
+  OpQRDMulhLane,
   OpEq,
   OpGe,
   OpLe,
@@ -46,40 +67,66 @@ enum OpKind {
   OpCast,
   OpConcat,
   OpDup,
+  OpDupLane,
   OpHi,
   OpLo,
   OpSelect,
   OpRev16,
   OpRev32,
-  OpRev64
+  OpRev64,
+  OpReinterpret,
+  OpAbdl,
+  OpAba,
+  OpAbal
 };
 
 enum ClassKind {
   ClassNone,
-  ClassI,
-  ClassS,
-  ClassW,
-  ClassB
+  ClassI,           // generic integer instruction, e.g., "i8" suffix
+  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
+  ClassW,           // width-specific instruction, e.g., "8" suffix
+  ClassB            // bitcast arguments with enum argument to specify type
 };
 
 namespace llvm {
-  
+
   class NeonEmitter : public TableGenBackend {
     RecordKeeper &Records;
     StringMap<OpKind> OpMap;
     DenseMap<Record*, ClassKind> ClassMap;
-    
+
   public:
     NeonEmitter(RecordKeeper &R) : Records(R) {
       OpMap["OP_NONE"]  = OpNone;
       OpMap["OP_ADD"]   = OpAdd;
+      OpMap["OP_ADDL"]  = OpAddl;
+      OpMap["OP_ADDW"]  = OpAddw;
       OpMap["OP_SUB"]   = OpSub;
+      OpMap["OP_SUBL"]  = OpSubl;
+      OpMap["OP_SUBW"]  = OpSubw;
       OpMap["OP_MUL"]   = OpMul;
+      OpMap["OP_MULL"]  = OpMull;
       OpMap["OP_MLA"]   = OpMla;
+      OpMap["OP_MLAL"]  = OpMlal;
       OpMap["OP_MLS"]   = OpMls;
+      OpMap["OP_MLSL"]  = OpMlsl;
       OpMap["OP_MUL_N"] = OpMulN;
+      OpMap["OP_MULL_N"]= OpMullN;
       OpMap["OP_MLA_N"] = OpMlaN;
       OpMap["OP_MLS_N"] = OpMlsN;
+      OpMap["OP_MLAL_N"] = OpMlalN;
+      OpMap["OP_MLSL_N"] = OpMlslN;
+      OpMap["OP_MUL_LN"]= OpMulLane;
+      OpMap["OP_MULL_LN"] = OpMullLane;
+      OpMap["OP_MLA_LN"]= OpMlaLane;
+      OpMap["OP_MLS_LN"]= OpMlsLane;
+      OpMap["OP_MLAL_LN"] = OpMlalLane;
+      OpMap["OP_MLSL_LN"] = OpMlslLane;
+      OpMap["OP_QDMULL_LN"] = OpQDMullLane;
+      OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
+      OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
+      OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
+      OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
       OpMap["OP_EQ"]    = OpEq;
       OpMap["OP_GE"]    = OpGe;
       OpMap["OP_LE"]    = OpLe;
@@ -97,10 +144,15 @@ namespace llvm {
       OpMap["OP_HI"]    = OpHi;
       OpMap["OP_LO"]    = OpLo;
       OpMap["OP_DUP"]   = OpDup;
+      OpMap["OP_DUP_LN"] = OpDupLane;
       OpMap["OP_SEL"]   = OpSelect;
       OpMap["OP_REV16"] = OpRev16;
       OpMap["OP_REV32"] = OpRev32;
       OpMap["OP_REV64"] = OpRev64;
+      OpMap["OP_REINT"] = OpReinterpret;
+      OpMap["OP_ABDL"]  = OpAbdl;
+      OpMap["OP_ABA"]   = OpAba;
+      OpMap["OP_ABAL"]  = OpAbal;
 
       Record *SI = R.getClass("SInst");
       Record *II = R.getClass("IInst");
@@ -109,14 +161,20 @@ namespace llvm {
       ClassMap[II] = ClassI;
       ClassMap[WI] = ClassW;
     }
-    
+
     // run - Emit arm_neon.h.inc
     void run(raw_ostream &o);
 
     // runHeader - Emit all the __builtin prototypes used in arm_neon.h
     void runHeader(raw_ostream &o);
+
+    // runTests - Emit tests for all the Neon intrinsics.
+    void runTests(raw_ostream &o);
+
+  private:
+    void emitIntrinsic(raw_ostream &OS, Record *R);
   };
-  
+
 } // End llvm namespace
 
 #endif
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index dc793586fbee..abbbafed09d8 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Record.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Format.h"
 #include "llvm/ADT/StringExtras.h"
 
@@ -59,26 +59,34 @@ Init *BitsRecTy::convertValue(UnsetInit *UI) {
 }
 
 Init *BitsRecTy::convertValue(BitInit *UI) {
-  if (Size != 1) return 0;  // Can only convert single bit...
+  if (Size != 1) return 0;  // Can only convert single bit.
   BitsInit *Ret = new BitsInit(1);
   Ret->setBit(0, UI);
   return Ret;
 }
 
-// convertValue from Int initializer to bits type: Split the integer up into the
-// appropriate bits...
-//
-Init *BitsRecTy::convertValue(IntInit *II) {
-  int64_t Value = II->getValue();
-  // Make sure this bitfield is large enough to hold the integer value...
+/// canFitInBitfield - Return true if the number of bits is large enough to hold
+/// the integer value.
+static bool canFitInBitfield(int64_t Value, unsigned NumBits) {
   if (Value >= 0) {
-    if (Value & ~((1LL << Size)-1))
-      return 0;
-  } else {
-    if ((Value >> Size) != -1 || ((Value & (1LL << (Size-1))) == 0))
-      return 0;
+    if (Value & ~((1LL << NumBits) - 1))
+      return false;
+  } else if ((Value >> NumBits) != -1 || (Value & (1LL << (NumBits-1))) == 0) {
+    return false;
   }
 
+  return true;
+}
+
+/// convertValue from Int initializer to bits type: Split the integer up into the
+/// appropriate bits.
+///
+Init *BitsRecTy::convertValue(IntInit *II) {
+  int64_t Value = II->getValue();
+  // Make sure this bitfield is large enough to hold the integer value.
+  if (!canFitInBitfield(Value, Size))
+    return 0;
+
   BitsInit *Ret = new BitsInit(Size);
   for (unsigned i = 0; i != Size; ++i)
     Ret->setBit(i, new BitInit(Value & (1LL << i)));
@@ -88,7 +96,7 @@ Init *BitsRecTy::convertValue(IntInit *II) {
 
 Init *BitsRecTy::convertValue(BitsInit *BI) {
   // If the number of bits is right, return it.  Otherwise we need to expand or
-  // truncate...
+  // truncate.
   if (BI->getNumBits() == Size) return BI;
   return 0;
 }
@@ -101,12 +109,56 @@ Init *BitsRecTy::convertValue(TypedInit *VI) {
         Ret->setBit(i, new VarBitInit(VI, i));
       return Ret;
     }
+
   if (Size == 1 && dynamic_cast<BitRecTy*>(VI->getType())) {
     BitsInit *Ret = new BitsInit(1);
     Ret->setBit(0, VI);
     return Ret;
   }
 
+  if (TernOpInit *Tern = dynamic_cast<TernOpInit*>(VI)) {
+    if (Tern->getOpcode() == TernOpInit::IF) {
+      Init *LHS = Tern->getLHS();
+      Init *MHS = Tern->getMHS();
+      Init *RHS = Tern->getRHS();
+
+      IntInit *MHSi = dynamic_cast<IntInit*>(MHS);
+      IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+
+      if (MHSi && RHSi) {
+        int64_t MHSVal = MHSi->getValue();
+        int64_t RHSVal = RHSi->getValue();
+
+        if (canFitInBitfield(MHSVal, Size) && canFitInBitfield(RHSVal, Size)) {
+          BitsInit *Ret = new BitsInit(Size);
+
+          for (unsigned i = 0; i != Size; ++i)
+            Ret->setBit(i, new TernOpInit(TernOpInit::IF, LHS,
+                                          new IntInit((MHSVal & (1LL << i)) ? 1 : 0),
+                                          new IntInit((RHSVal & (1LL << i)) ? 1 : 0),
+                                          VI->getType()));
+
+          return Ret;
+        }
+      } else {
+        BitsInit *MHSbs = dynamic_cast<BitsInit*>(MHS);
+        BitsInit *RHSbs = dynamic_cast<BitsInit*>(RHS);
+
+        if (MHSbs && RHSbs) {
+          BitsInit *Ret = new BitsInit(Size);
+
+          for (unsigned i = 0; i != Size; ++i)
+            Ret->setBit(i, new TernOpInit(TernOpInit::IF, LHS,
+                                          MHSbs->getBit(i),
+                                          RHSbs->getBit(i),
+                                          VI->getType()));
+
+          return Ret;
+        }
+      }
+    }
+  }
+
   return 0;
 }
 
@@ -152,16 +204,6 @@ Init *StringRecTy::convertValue(BinOpInit *BO) {
       return new BinOpInit(BinOpInit::STRCONCAT, L, R, new StringRecTy);
     return BO;
   }
-  if (BO->getOpcode() == BinOpInit::NAMECONCAT) {
-    if (BO->getType()->getAsString() == getAsString()) {
-      Init *L = BO->getLHS()->convertInitializerTo(this);
-      Init *R = BO->getRHS()->convertInitializerTo(this);
-      if (L == 0 || R == 0) return 0;
-      if (L != BO->getLHS() || R != BO->getRHS())
-        return new BinOpInit(BinOpInit::NAMECONCAT, L, R, new StringRecTy);
-      return BO;
-    }
-  }
 
   return convertValue((TypedInit*)BO);
 }
@@ -236,16 +278,6 @@ Init *DagRecTy::convertValue(BinOpInit *BO) {
       return new BinOpInit(BinOpInit::CONCAT, L, R, new DagRecTy);
     return BO;
   }
-  if (BO->getOpcode() == BinOpInit::NAMECONCAT) {
-    if (BO->getType()->getAsString() == getAsString()) {
-      Init *L = BO->getLHS()->convertInitializerTo(this);
-      Init *R = BO->getRHS()->convertInitializerTo(this);
-      if (L == 0 || R == 0) return 0;
-      if (L != BO->getLHS() || R != BO->getRHS())
-        return new BinOpInit(BinOpInit::CONCAT, L, R, new DagRecTy);
-      return BO;
-    }
-  }
   return 0;
 }
 
@@ -518,9 +550,8 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
         // From TGParser::ParseIDValue
         if (CurRec) {
           if (const RecordVal *RV = CurRec->getValue(Name)) {
-            if (RV->getType() != getType()) {
-              throw "type mismatch in nameconcat";
-            }
+            if (RV->getType() != getType())
+              throw "type mismatch in cast";
             return new VarInit(Name, RV->getType());
           }
 
@@ -529,9 +560,8 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
             const RecordVal *RV = CurRec->getValue(TemplateArgName);
             assert(RV && "Template arg doesn't exist??");
 
-            if (RV->getType() != getType()) {
-              throw "type mismatch in nameconcat";
-            }
+            if (RV->getType() != getType())
+              throw "type mismatch in cast";
 
             return new VarInit(TemplateArgName, RV->getType());
           }
@@ -543,15 +573,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
             const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
             assert(RV && "Template arg doesn't exist??");
 
-            if (RV->getType() != getType()) {
-              throw "type mismatch in nameconcat";
-            }
+            if (RV->getType() != getType())
+              throw "type mismatch in cast";
 
             return new VarInit(MCName, RV->getType());
           }
         }
 
-        if (Record *D = Records.getDef(Name))
+        if (Record *D = (CurRec->getRecords()).getDef(Name))
           return new DefInit(D);
 
         errs() << "Variable not defined: '" + Name + "'\n";
@@ -561,7 +590,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
     }
     break;
   }
-  case CAR: {
+  case HEAD: {
     ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
     if (LHSl) {
       if (LHSl->getSize() == 0) {
@@ -572,7 +601,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
     }
     break;
   }
-  case CDR: {
+  case TAIL: {
     ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
     if (LHSl) {
       if (LHSl->getSize() == 0) {
@@ -585,7 +614,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
     }
     break;
   }
-  case LNULL: {
+  case EMPTY: {
     ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
     if (LHSl) {
       if (LHSl->getSize() == 0) {
@@ -621,9 +650,9 @@ std::string UnOpInit::getAsString() const {
   std::string Result;
   switch (Opc) {
   case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
-  case CAR: Result = "!car"; break;
-  case CDR: Result = "!cdr"; break;
-  case LNULL: Result = "!null"; break;
+  case HEAD: Result = "!head"; break;
+  case TAIL: Result = "!tail"; break;
+  case EMPTY: Result = "!empty"; break;
   }
   return Result + "(" + LHS->getAsString() + ")";
 }
@@ -660,57 +689,6 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
       return new StringInit(LHSs->getValue() + RHSs->getValue());
     break;
   }
-  case NAMECONCAT: {
-    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
-    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
-    if (LHSs && RHSs) {
-      std::string Name(LHSs->getValue() + RHSs->getValue());
-
-      // From TGParser::ParseIDValue
-      if (CurRec) {
-        if (const RecordVal *RV = CurRec->getValue(Name)) {
-          if (RV->getType() != getType()) {
-            throw "type mismatch in nameconcat";
-          }
-          return new VarInit(Name, RV->getType());
-        }
-
-        std::string TemplateArgName = CurRec->getName()+":"+Name;
-        if (CurRec->isTemplateArg(TemplateArgName)) {
-          const RecordVal *RV = CurRec->getValue(TemplateArgName);
-          assert(RV && "Template arg doesn't exist??");
-
-          if (RV->getType() != getType()) {
-            throw "type mismatch in nameconcat";
-          }
-
-          return new VarInit(TemplateArgName, RV->getType());
-        }
-      }
-
-      if (CurMultiClass) {
-        std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
-        if (CurMultiClass->Rec.isTemplateArg(MCName)) {
-          const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
-          assert(RV && "Template arg doesn't exist??");
-
-          if (RV->getType() != getType()) {
-            throw "type mismatch in nameconcat";
-          }
-
-          return new VarInit(MCName, RV->getType());
-        }
-      }
-
-      if (Record *D = Records.getDef(Name))
-        return new DefInit(D);
-
-      errs() << "Variable not defined in !nameconcat: '" + Name + "'\n";
-      assert(0 && "Variable not found in !nameconcat");
-      return 0;
-    }
-    break;
-  }
   case EQ: {
     // try to fold eq comparison for 'bit' and 'int', otherwise fallback
     // to string objects.
@@ -771,8 +749,6 @@ std::string BinOpInit::getAsString() const {
   case SRL: Result = "!srl"; break;
   case EQ: Result = "!eq"; break;
   case STRCONCAT: Result = "!strconcat"; break;
-  case NAMECONCAT:
-    Result = "!nameconcat<" + getType()->getAsString() + ">"; break;
   }
   return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
 }
@@ -1042,7 +1018,7 @@ RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
 
 Init *TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) {
   BitsRecTy *T = dynamic_cast<BitsRecTy*>(getType());
-  if (T == 0) return 0;  // Cannot subscript a non-bits variable...
+  if (T == 0) return 0;  // Cannot subscript a non-bits variable.
   unsigned NumBits = T->getNumBits();
 
   BitsInit *BI = new BitsInit(Bits.size());
@@ -1058,7 +1034,7 @@ Init *TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) {
 
 Init *TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) {
   ListRecTy *T = dynamic_cast<ListRecTy*>(getType());
-  if (T == 0) return 0;  // Cannot subscript a non-list variable...
+  if (T == 0) return 0;  // Cannot subscript a non-list variable.
 
   if (Elements.size() == 1)
     return new VarListElementInit(this, Elements[0]);
@@ -1211,7 +1187,7 @@ Init *FieldInit::resolveBitReference(Record &R, const RecordVal *RV,
       assert(Bit < BI->getNumBits() && "Bit reference out of range!");
       Init *B = BI->getBit(Bit);
 
-      if (dynamic_cast<BitInit*>(B))  // If the bit is set...
+      if (dynamic_cast<BitInit*>(B))  // If the bit is set.
         return B;                     // Replace the VarBitInit with it.
     }
   return 0;
@@ -1303,14 +1279,14 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
 unsigned Record::LastID = 0;
 
 void Record::setName(const std::string &Name) {
-  if (Records.getDef(getName()) == this) {
-    Records.removeDef(getName());
+  if (TrackedRecords.getDef(getName()) == this) {
+    TrackedRecords.removeDef(getName());
     this->Name = Name;
-    Records.addDef(this);
+    TrackedRecords.addDef(this);
   } else {
-    Records.removeClass(getName());
+    TrackedRecords.removeClass(getName());
     this->Name = Name;
-    Records.addClass(this);
+    TrackedRecords.addClass(this);
   }
 }
 
@@ -1573,7 +1549,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
 /// name does not exist, an error is printed and true is returned.
 std::vector<Record*>
 RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
-  Record *Class = Records.getClass(ClassName);
+  Record *Class = getClass(ClassName);
   if (!Class)
     throw "ERROR: Couldn't find the `" + ClassName + "' class!\n";
 
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index d6f37eec749e..f3a5df23ec5c 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -16,7 +16,7 @@
 #define RECORD_H
 
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 
@@ -57,6 +57,7 @@ class VarListElementInit;
 class Record;
 class RecordVal;
 struct MultiClass;
+class RecordKeeper;
 
 //===----------------------------------------------------------------------===//
 //  Type Classes
@@ -810,7 +811,7 @@ public:
 ///
 class UnOpInit : public OpInit {
 public:
-  enum UnaryOp { CAST, CAR, CDR, LNULL };
+  enum UnaryOp { CAST, HEAD, TAIL, EMPTY };
 private:
   UnaryOp Opc;
   Init *LHS;
@@ -848,7 +849,7 @@ public:
 ///
 class BinOpInit : public OpInit {
 public:
-  enum BinaryOp { SHL, SRA, SRL, STRCONCAT, CONCAT, NAMECONCAT, EQ };
+  enum BinaryOp { SHL, SRA, SRL, STRCONCAT, CONCAT, EQ };
 private:
   BinaryOp Opc;
   Init *LHS, *RHS;
@@ -930,6 +931,8 @@ public:
   // possible to fold.
   Init *Fold(Record *CurRec, MultiClass *CurMultiClass);
 
+  virtual bool isComplete() const { return false; }
+
   virtual Init *resolveReferences(Record &R, const RecordVal *RV);
 
   virtual std::string getAsString() const;
@@ -1227,16 +1230,21 @@ class Record {
   std::vector<std::string> TemplateArgs;
   std::vector<RecordVal> Values;
   std::vector<Record*> SuperClasses;
+
+  // Tracks Record instances. Not owned by Record.
+  RecordKeeper &TrackedRecords;
+
 public:
 
-  explicit Record(const std::string &N, SMLoc loc) :
-    ID(LastID++), Name(N), Loc(loc) {}
+  // Constructs a record.
+  explicit Record(const std::string &N, SMLoc loc, RecordKeeper &records) :
+    ID(LastID++), Name(N), Loc(loc), TrackedRecords(records) {}
   ~Record() {}
 
-  
+
   static unsigned getNewUID() { return LastID++; }
-    
-    
+
+
   unsigned getID() const { return ID; }
 
   const std::string &getName() const { return Name; }
@@ -1315,6 +1323,10 @@ public:
   /// possible references.
   void resolveReferencesTo(const RecordVal *RV);
 
+  RecordKeeper &getRecords() const {
+    return TrackedRecords;
+  }
+
   void dump() const;
 
   //===--------------------------------------------------------------------===//
@@ -1350,9 +1362,9 @@ public:
   ///
   std::vector<Record*> getValueAsListOfDefs(StringRef FieldName) const;
 
-  /// getValueAsListOfInts - This method looks up the specified field and returns
-  /// its value as a vector of integers, throwing an exception if the field does
-  /// not exist or if the value is not the right type.
+  /// getValueAsListOfInts - This method looks up the specified field and
+  /// returns its value as a vector of integers, throwing an exception if the
+  /// field does not exist or if the value is not the right type.
   ///
   std::vector<int64_t> getValueAsListOfInts(StringRef FieldName) const;
 
@@ -1396,7 +1408,8 @@ struct MultiClass {
 
   void dump() const;
 
-  MultiClass(const std::string &Name, SMLoc Loc) : Rec(Name, Loc) {}
+  MultiClass(const std::string &Name, SMLoc Loc, RecordKeeper &Records) : 
+    Rec(Name, Loc, Records) {}
 };
 
 class RecordKeeper {
@@ -1453,7 +1466,6 @@ public:
   std::vector<Record*>
   getAllDerivedDefinitions(const std::string &ClassName) const;
 
-
   void dump() const;
 };
 
@@ -1488,9 +1500,7 @@ public:
 
 raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK);
 
-extern RecordKeeper Records;
-
-void PrintError(SMLoc ErrorLoc, const std::string &Msg);
+void PrintError(SMLoc ErrorLoc, const Twine &Msg);
 
 } // End llvm namespace
 
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 6f06705243e9..96399a4d0525 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
 
 // runEnums - Print out enum values for all of the registers.
 void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
 
   std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
@@ -63,7 +63,7 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
 
 void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
   EmitSourceFileHeader("Register Information Header Fragment", OS);
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   const std::string &TargetName = Target.getName();
   std::string ClassName = TargetName + "GenRegisterInfo";
 
@@ -333,7 +333,7 @@ public:
 // RegisterInfoEmitter::run - Main register file description emitter.
 //
 void RegisterInfoEmitter::run(raw_ostream &OS) {
-  CodeGenTarget Target;
+  CodeGenTarget Target(Records);
   EmitSourceFileHeader("Register Information Source Fragment", OS);
 
   OS << "namespace llvm {\n\n";
@@ -777,17 +777,13 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   delete [] AliasesHashTable;
 
   if (!RegisterAliases.empty())
-    OS << "\n\n  // Register Alias Sets...\n";
+    OS << "\n\n  // Register Overlap Lists...\n";
 
-  // Emit the empty alias list
-  OS << "  const unsigned Empty_AliasSet[] = { 0 };\n";
-  // Loop over all of the registers which have aliases, emitting the alias list
-  // to memory.
+  // Emit an overlap list for all registers.
   for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
          I = RegisterAliases.begin(), E = RegisterAliases.end(); I != E; ++I) {
-    if (I->second.empty())
-      continue;
-    OS << "  const unsigned " << I->first->getName() << "_AliasSet[] = { ";
+    OS << "  const unsigned " << I->first->getName() << "_Overlaps[] = { "
+       << getQualifiedName(I->first) << ", ";
     for (std::set<Record*>::iterator ASI = I->second.begin(),
            E = I->second.end(); ASI != E; ++ASI)
       OS << getQualifiedName(*ASI) << ", ";
@@ -849,11 +845,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     const CodeGenRegister &Reg = Regs[i];
     OS << "    { \"";
-    OS << Reg.getName() << "\",\t";
-    if (!RegisterAliases[Reg.TheDef].empty())
-      OS << Reg.getName() << "_AliasSet,\t";
-    else
-      OS << "Empty_AliasSet,\t";
+    OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t";
     if (!RegisterSubRegs[Reg.TheDef].empty())
       OS << Reg.getName() << "_SubRegsSet,\t";
     else
diff --git a/utils/TableGen/StringMatcher.cpp b/utils/TableGen/StringMatcher.cpp
new file mode 100644
index 000000000000..6aedcbf458a8
--- /dev/null
+++ b/utils/TableGen/StringMatcher.cpp
@@ -0,0 +1,149 @@
+//===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMatcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StringMatcher.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+/// FindFirstNonCommonLetter - Find the first character in the keys of the
+/// string pairs that is not shared across the whole set of strings.  All
+/// strings are assumed to have the same length.
+static unsigned 
+FindFirstNonCommonLetter(const std::vector<const
+                              StringMatcher::StringPair*> &Matches) {
+  assert(!Matches.empty());
+  for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
+    // Check to see if letter i is the same across the set.
+    char Letter = Matches[0]->first[i];
+    
+    for (unsigned str = 0, e = Matches.size(); str != e; ++str)
+      if (Matches[str]->first[i] != Letter)
+        return i;
+  }
+  
+  return Matches[0]->first.size();
+}
+
+/// EmitStringMatcherForChar - Given a set of strings that are known to be the
+/// same length and whose characters leading up to CharNo are the same, emit
+/// code to verify that CharNo and later are the same.
+///
+/// \return - True if control can leave the emitted code fragment.
+bool StringMatcher::
+EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
+                         unsigned CharNo, unsigned IndentCount) const {
+  assert(!Matches.empty() && "Must have at least one string to match!");
+  std::string Indent(IndentCount*2+4, ' ');
+  
+  // If we have verified that the entire string matches, we're done: output the
+  // matching code.
+  if (CharNo == Matches[0]->first.size()) {
+    assert(Matches.size() == 1 && "Had duplicate keys to match on");
+    
+    // If the to-execute code has \n's in it, indent each subsequent line.
+    StringRef Code = Matches[0]->second;
+    
+    std::pair<StringRef, StringRef> Split = Code.split('\n');
+    OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
+
+    Code = Split.second;
+    while (!Code.empty()) {
+      Split = Code.split('\n');
+      OS << Indent << Split.first << "\n";
+      Code = Split.second;
+    }
+    return false;
+  }
+  
+  // Bucket the matches by the character we are comparing.
+  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
+  
+  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+    MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
+  
+  
+  // If we have exactly one bucket to match, see how many characters are common
+  // across the whole set and match all of them at once.
+  if (MatchesByLetter.size() == 1) {
+    unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
+    unsigned NumChars = FirstNonCommonLetter-CharNo;
+    
+    // Emit code to break out if the prefix doesn't match.
+    if (NumChars == 1) {
+      // Do the comparison with if (Str[1] != 'f')
+      // FIXME: Need to escape general characters.
+      OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
+      << Matches[0]->first[CharNo] << "')\n";
+      OS << Indent << "  break;\n";
+    } else {
+      // Do the comparison with if (Str.substr(1, 3) != "foo").    
+      // FIXME: Need to escape general strings.
+      OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ", "
+      << NumChars << ") != \"";
+      OS << Matches[0]->first.substr(CharNo, NumChars) << "\")\n";
+      OS << Indent << "  break;\n";
+    }
+    
+    return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount);
+  }
+  
+  // Otherwise, we have multiple possible things, emit a switch on the
+  // character.
+  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
+  OS << Indent << "default: break;\n";
+  
+  for (std::map<char, std::vector<const StringPair*> >::iterator LI = 
+       MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
+    // TODO: escape hard stuff (like \n) if we ever care about it.
+    OS << Indent << "case '" << LI->first << "':\t // "
+       << LI->second.size() << " string";
+    if (LI->second.size() != 1) OS << 's';
+    OS << " to match.\n";
+    if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1))
+      OS << Indent << "  break;\n";
+  }
+  
+  OS << Indent << "}\n";
+  return true;
+}
+
+
+/// Emit - Top level entry point.
+///
+void StringMatcher::Emit(unsigned Indent) const {
+  // If nothing to match, just fall through.
+  if (Matches.empty()) return;
+  
+  // First level categorization: group strings by length.
+  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
+  
+  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+    MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
+  
+  // Output a switch statement on length and categorize the elements within each
+  // bin.
+  OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
+  OS.indent(Indent*2+2) << "default: break;\n";
+  
+  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
+       MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
+    OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
+       << LI->second.size()
+       << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n";
+    if (EmitStringMatcherForChar(LI->second, 0, Indent))
+      OS.indent(Indent*2+4) << "break;\n";
+  }
+  
+  OS.indent(Indent*2+2) << "}\n";
+}
diff --git a/utils/TableGen/StringMatcher.h b/utils/TableGen/StringMatcher.h
new file mode 100644
index 000000000000..1dadc76200b0
--- /dev/null
+++ b/utils/TableGen/StringMatcher.h
@@ -0,0 +1,54 @@
+//===- StringMatcher.h - Generate a matcher for input strings ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMatcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef STRINGMATCHER_H
+#define STRINGMATCHER_H
+
+#include <vector>
+#include <string>
+#include <utility>
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class raw_ostream;
+  
+/// StringMatcher - Given a list of strings and code to execute when they match,
+/// output a simple switch tree to classify the input string.
+/// 
+/// If a match is found, the code in Vals[i].second is executed; control must
+/// not exit this code fragment.  If nothing matches, execution falls through.
+///
+class StringMatcher {
+public:
+  typedef std::pair<std::string, std::string> StringPair;
+private:
+  StringRef StrVariableName;
+  const std::vector<StringPair> &Matches;
+  raw_ostream &OS;
+  
+public:
+  StringMatcher(StringRef strVariableName, 
+                const std::vector<StringPair> &matches, raw_ostream &os)
+    : StrVariableName(strVariableName), Matches(matches), OS(os) {}
+  
+  void Emit(unsigned Indent = 0) const;
+  
+  
+private:
+  bool EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
+                                unsigned CharNo, unsigned IndentCount) const;
+};
+
+} // end llvm namespace.
+
+#endif
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index b04eaf88f73a..e35bdca97887 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -172,13 +172,10 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
 // CollectAllItinClasses - Gathers and enumerates all the itinerary classes.
 // Returns itinerary class count.
 //
-unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS,
-                              std::map<std::string, unsigned> &ItinClassesMap) {
-  // Gather and sort all itinerary classes
-  std::vector<Record*> ItinClassList =
-                            Records.getAllDerivedDefinitions("InstrItinClass");
-  std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
-
+unsigned SubtargetEmitter::
+CollectAllItinClasses(raw_ostream &OS,
+                      std::map<std::string, unsigned> &ItinClassesMap,
+                      std::vector<Record*> &ItinClassList) {
   // For each itinerary class
   unsigned N = ItinClassList.size();
   for (unsigned i = 0; i < N; i++) {
@@ -265,13 +262,32 @@ void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData,
   }
 }
 
+void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
+                                                 Record *ItinData,
+                                                 std::string &ItinString,
+                                                 unsigned NOperandCycles) {
+  const std::vector<Record*> &BypassList =
+    ItinData->getValueAsListOfDefs("Bypasses");
+  unsigned N = BypassList.size();
+  unsigned i = 0;
+  for (; i < N;) {
+    ItinString += Name + "Bypass::" + BypassList[i]->getName();
+    if (++i < NOperandCycles) ItinString += ", ";
+  }
+  for (; i < NOperandCycles;) {
+    ItinString += " 0";
+    if (++i < NOperandCycles) ItinString += ", ";
+  }
+}
+
 //
 // EmitStageAndOperandCycleData - Generate unique itinerary stages and
 // operand cycle tables.  Record itineraries for processors.
 //
 void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
        unsigned NItinClasses,
-       std::map<std::string, unsigned> &ItinClassesMap, 
+       std::map<std::string, unsigned> &ItinClassesMap,
+       std::vector<Record*> &ItinClassList,
        std::vector<std::vector<InstrItinerary> > &ProcList) {
   // Gather processor iteraries
   std::vector<Record*> ProcItinList =
@@ -298,6 +314,19 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
          << " = 1 << " << j << ";\n";
 
     OS << "}\n";
+
+    std::vector<Record*> BPs = Proc->getValueAsListOfDefs("BP");
+    if (BPs.size()) {
+      OS << "\n// Pipeline forwarding pathes for itineraries \"" << Name
+         << "\"\n" << "namespace " << Name << "Bypass {\n";
+
+      OS << "  const unsigned NoBypass = 0;\n";
+      for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
+        OS << "  const unsigned " << BPs[j]->getName()
+           << " = 1 << " << j << ";\n";
+
+      OS << "}\n";
+    }
   }
 
   // Begin stages table
@@ -307,10 +336,14 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   // Begin operand cycle table
   std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
   OperandCycleTable += "  0, // No itinerary\n";
+
+  // Begin pipeline bypass table
+  std::string BypassTable = "static const unsigned ForwardingPathes[] = {\n";
+  BypassTable += "  0, // No itinerary\n";
         
   unsigned StageCount = 1, OperandCycleCount = 1;
   unsigned ItinStageEnum = 1, ItinOperandCycleEnum = 1;
-  std::map<std::string, unsigned> ItinStageMap, ItinOperandCycleMap;
+  std::map<std::string, unsigned> ItinStageMap, ItinOperandMap;
   for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
     // Next record
     Record *Proc = ProcItinList[i];
@@ -344,6 +377,10 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       FormItineraryOperandCycleString(ItinData, ItinOperandCycleString,
                                       NOperandCycles);
 
+      std::string ItinBypassString;
+      FormItineraryBypassString(Name, ItinData, ItinBypassString,
+                                NOperandCycles);
+
       // Check to see if stage already exists and create if it doesn't
       unsigned FindStage = 0;
       if (NStages > 0) {
@@ -361,27 +398,35 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       // Check to see if operand cycle already exists and create if it doesn't
       unsigned FindOperandCycle = 0;
       if (NOperandCycles > 0) {
-        FindOperandCycle = ItinOperandCycleMap[ItinOperandCycleString];
+        std::string ItinOperandString = ItinOperandCycleString+ItinBypassString;
+        FindOperandCycle = ItinOperandMap[ItinOperandString];
         if (FindOperandCycle == 0) {
           // Emit as  cycle, // index
           OperandCycleTable += ItinOperandCycleString + ", // " + 
             itostr(ItinOperandCycleEnum) + "\n";
           // Record Itin class number.
-          ItinOperandCycleMap[ItinOperandCycleString] = 
+          ItinOperandMap[ItinOperandCycleString] = 
             FindOperandCycle = OperandCycleCount;
+
+          // Emit as bypass, // index
+          BypassTable += ItinBypassString + ", // " + 
+            itostr(ItinOperandCycleEnum) + "\n";
+
           OperandCycleCount += NOperandCycles;
           ItinOperandCycleEnum++;
         }
       }
       
-      // Set up itinerary as location and location + stage count
-      InstrItinerary Intinerary = { FindStage, FindStage + NStages,
-                                    FindOperandCycle, FindOperandCycle + NOperandCycles};
-
       // Locate where to inject into processor itinerary table
       const std::string &Name = ItinData->getValueAsDef("TheClass")->getName();
       unsigned Find = ItinClassesMap[Name];
       
+      // Set up itinerary as location and location + stage count
+      unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps");
+      InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
+                                    FindOperandCycle,
+                                    FindOperandCycle + NOperandCycles};
+
       // Inject - empty slots will be 0, 0
       ItinList[Find] = Intinerary;
     }
@@ -389,7 +434,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
     // Add process itinerary to list
     ProcList.push_back(ItinList);
   }
-  
+
   // Closing stage
   StageTable += "  { 0, 0, 0, llvm::InstrStage::Required } // End itinerary\n";
   StageTable += "};\n";
@@ -398,9 +443,13 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   OperandCycleTable += "  0 // End itinerary\n";
   OperandCycleTable += "};\n";
 
+  BypassTable += "  0 // End itinerary\n";
+  BypassTable += "};\n";
+
   // Emit tables.
   OS << StageTable;
   OS << OperandCycleTable;
+  OS << BypassTable;
   
   // Emit size of tables
   OS<<"\nenum {\n";
@@ -443,9 +492,11 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
       // Emit in the form of 
       // { firstStage, lastStage, firstCycle, lastCycle } // index
       if (Intinerary.FirstStage == 0) {
-        OS << "  { 0, 0, 0, 0 }";
+        OS << "  { 1, 0, 0, 0, 0 }";
       } else {
-        OS << "  { " << Intinerary.FirstStage << ", " << 
+        OS << "  { " <<
+          Intinerary.NumMicroOps << ", " <<
+          Intinerary.FirstStage << ", " << 
           Intinerary.LastStage << ", " << 
           Intinerary.FirstOperandCycle << ", " << 
           Intinerary.LastOperandCycle << " }";
@@ -455,7 +506,7 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
     }
     
     // End processor itinerary table
-    OS << "  { ~0U, ~0U, ~0U, ~0U } // end marker\n";
+    OS << "  { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n";
     OS << "};\n";
   }
 }
@@ -511,16 +562,22 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
 //
 void SubtargetEmitter::EmitData(raw_ostream &OS) {
   std::map<std::string, unsigned> ItinClassesMap;
-  std::vector<std::vector<InstrItinerary> > ProcList;
+  // Gather and sort all itinerary classes
+  std::vector<Record*> ItinClassList =
+    Records.getAllDerivedDefinitions("InstrItinClass");
+  std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
   
   // Enumerate all the itinerary classes
-  unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap);
+  unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap,
+                                                ItinClassList);
   // Make sure the rest is worth the effort
   HasItineraries = NItinClasses != 1;   // Ignore NoItinerary.
   
   if (HasItineraries) {
+    std::vector<std::vector<InstrItinerary> > ProcList;
     // Emit the stage data
-    EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap, ProcList);
+    EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap,
+                                 ItinClassList, ProcList);
     // Emit the processor itinerary data
     EmitProcessorData(OS, ProcList);
     // Emit the processor lookup data
@@ -569,7 +626,8 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
     OS << "\n"
        << "  InstrItinerary *Itinerary = (InstrItinerary *)"
        <<              "Features.getInfo(ProcItinKV, ProcItinKVSize);\n"
-       << "  InstrItins = InstrItineraryData(Stages, OperandCycles, Itinerary);\n";
+       << "  InstrItins = InstrItineraryData(Stages, OperandCycles, "
+       << "ForwardingPathes, Itinerary);\n";
   }
 
   OS << "  return Features.getCPU();\n"
@@ -580,7 +638,7 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
 // SubtargetEmitter::run - Main subtarget enumeration emitter.
 //
 void SubtargetEmitter::run(raw_ostream &OS) {
-  Target = CodeGenTarget().getName();
+  Target = CodeGenTarget(Records).getName();
 
   EmitSourceFileHeader("Subtarget Enumeration Source Fragment", OS);
 
diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h
index f43a4431d61e..3abec3b24091 100644
--- a/utils/TableGen/SubtargetEmitter.h
+++ b/utils/TableGen/SubtargetEmitter.h
@@ -33,14 +33,19 @@ class SubtargetEmitter : public TableGenBackend {
   void FeatureKeyValues(raw_ostream &OS);
   void CPUKeyValues(raw_ostream &OS);
   unsigned CollectAllItinClasses(raw_ostream &OS,
-                               std::map<std::string, unsigned> &ItinClassesMap);
+                                 std::map<std::string,unsigned> &ItinClassesMap,
+                                 std::vector<Record*> &ItinClassList);
   void FormItineraryStageString(const std::string &Names,
                                 Record *ItinData, std::string &ItinString,
                                 unsigned &NStages);
   void FormItineraryOperandCycleString(Record *ItinData, std::string &ItinString,
                                        unsigned &NOperandCycles);
+  void FormItineraryBypassString(const std::string &Names,
+                                 Record *ItinData,
+                                 std::string &ItinString, unsigned NOperandCycles);
   void EmitStageAndOperandCycleData(raw_ostream &OS, unsigned NItinClasses,
                      std::map<std::string, unsigned> &ItinClassesMap,
+                     std::vector<Record*> &ItinClassList,
                      std::vector<std::vector<InstrItinerary> > &ProcList);
   void EmitProcessorData(raw_ostream &OS,
                        std::vector<std::vector<InstrItinerary> > &ProcList);
diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp
index 2c7becc71824..82d2b6491aac 100644
--- a/utils/TableGen/TGLexer.cpp
+++ b/utils/TableGen/TGLexer.cpp
@@ -15,6 +15,8 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Config/config.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include <cctype>
 #include <cstdio>
 #include <cstdlib>
@@ -36,17 +38,17 @@ SMLoc TGLexer::getLoc() const {
 
 /// ReturnError - Set the error to the specified string at the specified
 /// location.  This is defined to always return tgtok::Error.
-tgtok::TokKind TGLexer::ReturnError(const char *Loc, const std::string &Msg) {
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
   PrintError(Loc, Msg);
   return tgtok::Error;
 }
 
 
-void TGLexer::PrintError(const char *Loc, const std::string &Msg) const {
+void TGLexer::PrintError(const char *Loc, const Twine &Msg) const {
   SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
 }
 
-void TGLexer::PrintError(SMLoc Loc, const std::string &Msg) const {
+void TGLexer::PrintError(SMLoc Loc, const Twine &Msg) const {
   SrcMgr.PrintMessage(Loc, Msg, "error");
 }
 
@@ -95,7 +97,7 @@ tgtok::TokKind TGLexer::LexToken() {
 
   switch (CurChar) {
   default:
-    // Handle letters: [a-zA-Z_]
+    // Handle letters: [a-zA-Z_#]
     if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
       return LexIdentifier();
       
@@ -214,23 +216,13 @@ tgtok::TokKind TGLexer::LexVarName() {
 
 
 tgtok::TokKind TGLexer::LexIdentifier() {
-  // The first letter is [a-zA-Z_].
+  // The first letter is [a-zA-Z_#].
   const char *IdentStart = TokStart;
   
-  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
-  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_'
-         || *CurPtr == '#') {
-    // If this contains a '#', make sure it's value
-    if (*CurPtr == '#') {
-      if (strncmp(CurPtr, "#NAME#", 6) != 0) {
-        return tgtok::Error;
-      }
-      CurPtr += 6;
-    }
-    else {
-      ++CurPtr;
-    }
-  }
+  // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
+         *CurPtr == '#')
+    ++CurPtr;
   
   
   // Check to see if this identifier is a keyword.
@@ -421,30 +413,30 @@ tgtok::TokKind TGLexer::LexBracket() {
 /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
 tgtok::TokKind TGLexer::LexExclaim() {
   if (!isalpha(*CurPtr))
-    return ReturnError(CurPtr-1, "Invalid \"!operator\"");
+    return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
   
   const char *Start = CurPtr++;
   while (isalpha(*CurPtr))
     ++CurPtr;
   
   // Check to see which operator this is.
-  unsigned Len = CurPtr-Start;
-  
-  if (Len == 3  && !memcmp(Start, "con", 3)) return tgtok::XConcat;
-  if (Len == 3  && !memcmp(Start, "sra", 3)) return tgtok::XSRA;
-  if (Len == 3  && !memcmp(Start, "srl", 3)) return tgtok::XSRL;
-  if (Len == 3  && !memcmp(Start, "shl", 3)) return tgtok::XSHL;
-  if (Len == 2  && !memcmp(Start, "eq", 2)) return tgtok::XEq;
-  if (Len == 9  && !memcmp(Start, "strconcat", 9))   return tgtok::XStrConcat;
-  if (Len == 10 && !memcmp(Start, "nameconcat", 10)) return tgtok::XNameConcat;
-  if (Len == 5 && !memcmp(Start, "subst", 5)) return tgtok::XSubst;
-  if (Len == 7 && !memcmp(Start, "foreach", 7)) return tgtok::XForEach;
-  if (Len == 4 && !memcmp(Start, "cast", 4)) return tgtok::XCast;
-  if (Len == 3 && !memcmp(Start, "car", 3)) return tgtok::XCar;
-  if (Len == 3 && !memcmp(Start, "cdr", 3)) return tgtok::XCdr;
-  if (Len == 4 && !memcmp(Start, "null", 4)) return tgtok::XNull;
-  if (Len == 2 && !memcmp(Start, "if", 2)) return tgtok::XIf;
-
-  return ReturnError(Start-1, "Unknown operator");
+  tgtok::TokKind Kind =
+    StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
+    .Case("eq", tgtok::XEq)
+    .Case("if", tgtok::XIf)
+    .Case("head", tgtok::XHead)
+    .Case("tail", tgtok::XTail)
+    .Case("con", tgtok::XConcat)
+    .Case("shl", tgtok::XSHL)
+    .Case("sra", tgtok::XSRA)
+    .Case("srl", tgtok::XSRL)
+    .Case("cast", tgtok::XCast)
+    .Case("empty", tgtok::XEmpty)
+    .Case("subst", tgtok::XSubst)
+    .Case("foreach", tgtok::XForEach)
+    .Case("strconcat", tgtok::XStrConcat)
+    .Default(tgtok::Error);
+
+  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
 }
 
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index 835f351d3d0d..55a6c5d9b52e 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -14,7 +14,7 @@
 #ifndef TGLEXER_H
 #define TGLEXER_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include <vector>
 #include <string>
 #include <cassert>
@@ -23,7 +23,8 @@ namespace llvm {
 class MemoryBuffer;
 class SourceMgr;
 class SMLoc;
-  
+class Twine;
+
 namespace tgtok {
   enum TokKind {
     // Markers
@@ -44,8 +45,8 @@ namespace tgtok {
     MultiClass, String,
     
     // !keywords.
-    XConcat, XSRA, XSRL, XSHL, XStrConcat, XNameConcat, XCast, XSubst,
-    XForEach, XCar, XCdr, XNull, XIf, XEq,
+    XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
+    XForEach, XHead, XTail, XEmpty, XIf, XEq,
 
     // Integer value.
     IntVal,
@@ -95,14 +96,14 @@ public:
 
   SMLoc getLoc() const;
 
-  void PrintError(const char *Loc, const std::string &Msg) const;
-  void PrintError(SMLoc Loc, const std::string &Msg) const;
+  void PrintError(const char *Loc, const Twine &Msg) const;
+  void PrintError(SMLoc Loc, const Twine &Msg) const;
   
 private:
   /// LexToken - Read the next token and return its code.
   tgtok::TokKind LexToken();
   
-  tgtok::TokKind ReturnError(const char *Loc, const std::string &Msg);
+  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
   
   int getNextChar();
   void SkipBCPLComment();
diff --git a/utils/TableGen/TGParser.cpp b/utils/TableGen/TGParser.cpp
index f81aabe79b03..f6041be95e16 100644
--- a/utils/TableGen/TGParser.cpp
+++ b/utils/TableGen/TGParser.cpp
@@ -16,6 +16,8 @@
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <sstream>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -294,20 +296,23 @@ static bool isObjectStart(tgtok::TokKind K) {
          K == tgtok::Defm || K == tgtok::Let || K == tgtok::MultiClass;
 }
 
+static std::string GetNewAnonymousName() {
+  static unsigned AnonCounter = 0;
+  return "anonymous."+utostr(AnonCounter++);
+}
+
 /// ParseObjectName - If an object name is specified, return it.  Otherwise,
 /// return an anonymous name.
 ///   ObjectName ::= ID
 ///   ObjectName ::= /*empty*/
 ///
 std::string TGParser::ParseObjectName() {
-  if (Lex.getCode() == tgtok::Id) {
-    std::string Ret = Lex.getCurStrVal();
-    Lex.Lex();
-    return Ret;
-  }
+  if (Lex.getCode() != tgtok::Id)
+    return GetNewAnonymousName();
 
-  static unsigned AnonCounter = 0;
-  return "anonymous."+utostr(AnonCounter++);
+  std::string Ret = Lex.getCurStrVal();
+  Lex.Lex();
+  return Ret;
 }
 
 
@@ -678,9 +683,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     TokError("unknown operation");
     return 0;
     break;
-  case tgtok::XCar:
-  case tgtok::XCdr:
-  case tgtok::XNull:
+  case tgtok::XHead:
+  case tgtok::XTail:
+  case tgtok::XEmpty:
   case tgtok::XCast: {  // Value ::= !unop '(' Value ')'
     UnOpInit::UnaryOp Code;
     RecTy *Type = 0;
@@ -699,17 +704,17 @@ Init *TGParser::ParseOperation(Record *CurRec) {
       }
 
       break;
-    case tgtok::XCar:
+    case tgtok::XHead:
       Lex.Lex();  // eat the operation
-      Code = UnOpInit::CAR;
+      Code = UnOpInit::HEAD;
       break;
-    case tgtok::XCdr:
+    case tgtok::XTail:
       Lex.Lex();  // eat the operation
-      Code = UnOpInit::CDR;
+      Code = UnOpInit::TAIL;
       break;
-    case tgtok::XNull:
+    case tgtok::XEmpty:
       Lex.Lex();  // eat the operation
-      Code = UnOpInit::LNULL;
+      Code = UnOpInit::EMPTY;
       Type = new IntRecTy;
       break;
     }
@@ -722,9 +727,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     Init *LHS = ParseValue(CurRec);
     if (LHS == 0) return 0;
 
-    if (Code == UnOpInit::CAR
-        || Code == UnOpInit::CDR
-        || Code == UnOpInit::LNULL) {
+    if (Code == UnOpInit::HEAD
+        || Code == UnOpInit::TAIL
+        || Code == UnOpInit::EMPTY) {
       ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
       StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
       TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
@@ -741,8 +746,8 @@ Init *TGParser::ParseOperation(Record *CurRec) {
         }
       }
 
-      if (Code == UnOpInit::CAR
-          || Code == UnOpInit::CDR) {
+      if (Code == UnOpInit::HEAD
+          || Code == UnOpInit::TAIL) {
         if (LHSl == 0 && LHSt == 0) {
           TokError("expected list type argumnet in unary operator");
           return 0;
@@ -759,7 +764,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
             TokError("untyped list element in unary operator");
             return 0;
           }
-          if (Code == UnOpInit::CAR) {
+          if (Code == UnOpInit::HEAD) {
             Type = Itemt->getType();
           } else {
             Type = new ListRecTy(Itemt->getType());
@@ -771,7 +776,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
             TokError("expected list type argumnet in unary operator");
             return 0;
           }
-          if (Code == UnOpInit::CAR) {
+          if (Code == UnOpInit::HEAD) {
             Type = LType->getElementType();
           } else {
             Type = LType;
@@ -793,81 +798,68 @@ Init *TGParser::ParseOperation(Record *CurRec) {
   case tgtok::XSRL:
   case tgtok::XSHL:
   case tgtok::XEq:
-  case tgtok::XStrConcat:
-  case tgtok::XNameConcat: {  // Value ::= !binop '(' Value ',' Value ')'
+  case tgtok::XStrConcat: {  // Value ::= !binop '(' Value ',' Value ')'
+    tgtok::TokKind OpTok = Lex.getCode();
+    SMLoc OpLoc = Lex.getLoc();
+    Lex.Lex();  // eat the operation
+
     BinOpInit::BinaryOp Code;
     RecTy *Type = 0;
 
-
-    switch (Lex.getCode()) {
+    switch (OpTok) {
     default: assert(0 && "Unhandled code!");
-    case tgtok::XConcat:
-      Lex.Lex();  // eat the operation
-      Code = BinOpInit::CONCAT;
-      Type = new DagRecTy();
-      break;
-    case tgtok::XSRA:
-      Lex.Lex();  // eat the operation
-      Code = BinOpInit::SRA;
-      Type = new IntRecTy();
-      break;
-    case tgtok::XSRL:
-      Lex.Lex();  // eat the operation
-      Code = BinOpInit::SRL;
-      Type = new IntRecTy();
-      break;
-    case tgtok::XSHL:
-      Lex.Lex();  // eat the operation
-      Code = BinOpInit::SHL;
-      Type = new IntRecTy();
-      break;
-    case tgtok::XEq:  
-      Lex.Lex();  // eat the operation
-      Code = BinOpInit::EQ;
-      Type = new IntRecTy();
-      break;
+    case tgtok::XConcat: Code = BinOpInit::CONCAT; Type = new DagRecTy(); break;
+    case tgtok::XSRA:    Code = BinOpInit::SRA;    Type = new IntRecTy(); break;
+    case tgtok::XSRL:    Code = BinOpInit::SRL;    Type = new IntRecTy(); break;
+    case tgtok::XSHL:    Code = BinOpInit::SHL;    Type = new IntRecTy(); break;
+    case tgtok::XEq:     Code = BinOpInit::EQ;     Type = new BitRecTy(); break;
     case tgtok::XStrConcat:
-      Lex.Lex();  // eat the operation
       Code = BinOpInit::STRCONCAT;
       Type = new StringRecTy();
-      break;
-    case tgtok::XNameConcat:
-      Lex.Lex();  // eat the operation
-      Code = BinOpInit::NAMECONCAT;
-
-      Type = ParseOperatorType();
-
-      if (Type == 0) {
-        TokError("did not get type for binary operator");
-        return 0;
-      }
-
       break;
     }
+
     if (Lex.getCode() != tgtok::l_paren) {
       TokError("expected '(' after binary operator");
       return 0;
     }
     Lex.Lex();  // eat the '('
 
-    Init *LHS = ParseValue(CurRec);
-    if (LHS == 0) return 0;
+    SmallVector<Init*, 2> InitList;
 
-    if (Lex.getCode() != tgtok::comma) {
-      TokError("expected ',' in binary operator");
-      return 0;
-    }
-    Lex.Lex();  // eat the ','
+    InitList.push_back(ParseValue(CurRec));
+    if (InitList.back() == 0) return 0;
 
-    Init *RHS = ParseValue(CurRec);
-    if (RHS == 0) return 0;
+    while (Lex.getCode() == tgtok::comma) {
+      Lex.Lex();  // eat the ','
+
+      InitList.push_back(ParseValue(CurRec));
+      if (InitList.back() == 0) return 0;
+    }
 
     if (Lex.getCode() != tgtok::r_paren) {
-      TokError("expected ')' in binary operator");
+      TokError("expected ')' in operator");
       return 0;
     }
     Lex.Lex();  // eat the ')'
-    return (new BinOpInit(Code, LHS, RHS, Type))->Fold(CurRec, CurMultiClass);
+
+    // We allow multiple operands to associative operators like !strconcat as
+    // shorthand for nesting them.
+    if (Code == BinOpInit::STRCONCAT) {
+      while (InitList.size() > 2) {
+        Init *RHS = InitList.pop_back_val();
+        RHS = (new BinOpInit(Code, InitList.back(), RHS, Type))
+                      ->Fold(CurRec, CurMultiClass);
+        InitList.back() = RHS;
+      }
+    }
+
+    if (InitList.size() == 2)
+      return (new BinOpInit(Code, InitList[0], InitList[1], Type))
+        ->Fold(CurRec, CurMultiClass);
+
+    Error(OpLoc, "expected two operands to operator");
+    return 0;
   }
 
   case tgtok::XIf:
@@ -876,7 +868,6 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     TernOpInit::TernaryOp Code;
     RecTy *Type = 0;
 
-
     tgtok::TokKind LexCode = Lex.getCode();
     Lex.Lex();  // eat the operation
     switch (LexCode) {
@@ -927,16 +918,45 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     switch (LexCode) {
     default: assert(0 && "Unhandled code!");
     case tgtok::XIf: {
-      TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS);
-      TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS);
-      if (MHSt == 0 || RHSt == 0) {
+      // FIXME: The `!if' operator doesn't handle non-TypedInit well at
+      // all. This can be made much more robust.
+      TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS);
+      TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS);
+
+      RecTy *MHSTy = 0;
+      RecTy *RHSTy = 0;
+
+      if (MHSt == 0 && RHSt == 0) {
+        BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS);
+        BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS);
+
+        if (MHSbits && RHSbits &&
+            MHSbits->getNumBits() == RHSbits->getNumBits()) {
+          Type = new BitRecTy();
+          break;
+        } else {
+          BitInit *MHSbit = dynamic_cast<BitInit*>(MHS);
+          BitInit *RHSbit = dynamic_cast<BitInit*>(RHS);
+
+          if (MHSbit && RHSbit) {
+            Type = new BitRecTy();
+            break;
+          }
+        }
+      } else if (MHSt != 0 && RHSt != 0) {
+        MHSTy = MHSt->getType();
+        RHSTy = RHSt->getType();
+      }
+
+      if (!MHSTy || !RHSTy) {
         TokError("could not get type for !if");
         return 0;
       }
-      if (MHSt->getType()->typeIsConvertibleTo(RHSt->getType())) {
-        Type = RHSt->getType();
-      } else if (RHSt->getType()->typeIsConvertibleTo(MHSt->getType())) {
-        Type = MHSt->getType();
+
+      if (MHSTy->typeIsConvertibleTo(RHSTy)) {
+        Type = RHSTy;
+      } else if (RHSTy->typeIsConvertibleTo(MHSTy)) {
+        Type = MHSTy;
       } else {
         TokError("inconsistent types for !if");
         return 0;
@@ -1037,8 +1057,13 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
     break;
   }
   case tgtok::CodeFragment:
-    R = new CodeInit(Lex.getCurStrVal()); Lex.Lex(); break;
-  case tgtok::question: R = new UnsetInit(); Lex.Lex(); break;
+    R = new CodeInit(Lex.getCurStrVal());
+    Lex.Lex();
+    break;
+  case tgtok::question:
+    R = new UnsetInit();
+    Lex.Lex();
+    break;
   case tgtok::Id: {
     SMLoc NameLoc = Lex.getLoc();
     std::string Name = Lex.getCurStrVal();
@@ -1071,7 +1096,9 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
 
     // Create the new record, set it as CurRec temporarily.
     static unsigned AnonCounter = 0;
-    Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++),NameLoc);
+    Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++),
+                                NameLoc,
+                                Records);
     SubClassReference SCRef;
     SCRef.RefLoc = NameLoc;
     SCRef.Rec = Class;
@@ -1212,21 +1239,13 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
   }
   case tgtok::l_paren: {         // Value ::= '(' IDValue DagArgList ')'
     Lex.Lex();   // eat the '('
-    if (Lex.getCode() != tgtok::Id
-        && Lex.getCode() != tgtok::XCast
-        && Lex.getCode() != tgtok::XNameConcat) {
+    if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) {
       TokError("expected identifier in dag init");
       return 0;
     }
 
-    Init *Operator = 0;
-    if (Lex.getCode() == tgtok::Id) {
-      Operator = ParseIDValue(CurRec);
-      if (Operator == 0) return 0;
-    } else {
-      Operator = ParseOperation(CurRec);
-      if (Operator == 0) return 0;
-    }
+    Init *Operator = ParseValue(CurRec);
+    if (Operator == 0) return 0;
 
     // If the operator name is present, parse it.
     std::string OperatorName;
@@ -1252,25 +1271,22 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
     Lex.Lex();  // eat the ')'
 
     return new DagInit(Operator, OperatorName, DagArgs);
-    break;
   }
 
-  case tgtok::XCar:
-  case tgtok::XCdr:
-  case tgtok::XNull:
+  case tgtok::XHead:
+  case tgtok::XTail:
+  case tgtok::XEmpty:
   case tgtok::XCast:  // Value ::= !unop '(' Value ')'
   case tgtok::XConcat:
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
   case tgtok::XEq:
-  case tgtok::XStrConcat:
-  case tgtok::XNameConcat:  // Value ::= !binop '(' Value ',' Value ')'
+  case tgtok::XStrConcat:   // Value ::= !binop '(' Value ',' Value ')'
   case tgtok::XIf:
   case tgtok::XForEach:
   case tgtok::XSubst: {  // Value ::= !ternop '(' Value ',' Value ',' Value ')'
     return ParseOperation(CurRec);
-    break;
   }
   }
 
@@ -1646,7 +1662,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
   Lex.Lex();  // Eat the 'def' token.
 
   // Parse ObjectName and make a record for it.
-  Record *CurRec = new Record(ParseObjectName(), DefLoc);
+  Record *CurRec = new Record(ParseObjectName(), DefLoc, Records);
 
   if (!CurMultiClass) {
     // Top-level def definition.
@@ -1713,7 +1729,7 @@ bool TGParser::ParseClass() {
       return TokError("Class '" + CurRec->getName() + "' already defined");
   } else {
     // If this is the first reference to this class, create and add it.
-    CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc());
+    CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records);
     Records.addClass(CurRec);
   }
   Lex.Lex(); // eat the name.
@@ -1830,7 +1846,8 @@ bool TGParser::ParseMultiClass() {
   if (MultiClasses.count(Name))
     return TokError("multiclass '" + Name + "' already defined");
 
-  CurMultiClass = MultiClasses[Name] = new MultiClass(Name, Lex.getLoc());
+  CurMultiClass = MultiClasses[Name] = new MultiClass(Name, 
+                                                      Lex.getLoc(), Records);
   Lex.Lex();  // Eat the identifier.
 
   // If there are template args, parse them.
@@ -1899,12 +1916,15 @@ bool TGParser::ParseMultiClass() {
 ///
 bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
   assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
-  if (Lex.Lex() != tgtok::Id)  // eat the defm.
-    return TokError("expected identifier after defm");
+
+  std::string DefmPrefix;
+  if (Lex.Lex() == tgtok::Id) {  // eat the defm.
+    DefmPrefix = Lex.getCurStrVal();
+    Lex.Lex();  // Eat the defm prefix.
+  }
 
   SMLoc DefmPrefixLoc = Lex.getLoc();
-  std::string DefmPrefix = Lex.getCurStrVal();
-  if (Lex.Lex() != tgtok::colon)
+  if (Lex.getCode() != tgtok::colon)
     return TokError("expected ':' after defm identifier");
 
   // Keep track of the new generated record definitions.
@@ -1939,17 +1959,24 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
     for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) {
       Record *DefProto = MC->DefPrototypes[i];
 
-      // Add in the defm name
+      // Add in the defm name.  If the defm prefix is empty, give each
+      // instantiated def a unique name.  Otherwise, if "#NAME#" exists in the
+      // name, substitute the prefix for #NAME#.  Otherwise, use the defm name
+      // as a prefix.
       std::string DefName = DefProto->getName();
-      std::string::size_type idx = DefName.find("#NAME#");
-      if (idx != std::string::npos) {
-        DefName.replace(idx, 6, DefmPrefix);
+      if (DefmPrefix.empty()) {
+        DefName = GetNewAnonymousName();
       } else {
-        // Add the suffix to the defm name to get the new name.
-        DefName = DefmPrefix + DefName;
+        std::string::size_type idx = DefName.find("#NAME#");
+        if (idx != std::string::npos) {
+          DefName.replace(idx, 6, DefmPrefix);
+        } else {
+          // Add the suffix to the defm name to get the new name.
+          DefName = DefmPrefix + DefName;
+        }
       }
 
-      Record *CurRec = new Record(DefName, DefmPrefixLoc);
+      Record *CurRec = new Record(DefName, DefmPrefixLoc, Records);
 
       SubClassReference Ref;
       Ref.RefLoc = DefmPrefixLoc;
@@ -2091,7 +2118,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
 ///   Object ::= LETCommand Object
 bool TGParser::ParseObject(MultiClass *MC) {
   switch (Lex.getCode()) {
-  default: assert(0 && "This is not an object");
+  default:
+    return TokError("Expected class, def, defm, multiclass or let definition");
   case tgtok::Let:   return ParseTopLevelLet(MC);
   case tgtok::Def:   return ParseDef(MC);
   case tgtok::Defm:  return ParseDefm(MC);
diff --git a/utils/TableGen/TGParser.h b/utils/TableGen/TGParser.h
index 0aee931423a6..9cdf68ff9749 100644
--- a/utils/TableGen/TGParser.h
+++ b/utils/TableGen/TGParser.h
@@ -15,12 +15,14 @@
 #define TGPARSER_H
 
 #include "TGLexer.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include <map>
 
 namespace llvm {
   class Record;
   class RecordVal;
+  class RecordKeeper;
   struct RecTy;
   struct Init;
   struct MultiClass;
@@ -46,18 +48,22 @@ class TGParser {
   /// CurMultiClass - If we are parsing a 'multiclass' definition, this is the 
   /// current value.
   MultiClass *CurMultiClass;
+
+  // Record tracker
+  RecordKeeper &Records;
 public:
-  TGParser(SourceMgr &SrcMgr) : Lex(SrcMgr), CurMultiClass(0) {}
+  TGParser(SourceMgr &SrcMgr, RecordKeeper &records) : 
+    Lex(SrcMgr), CurMultiClass(0), Records(records) {}
   
   /// ParseFile - Main entrypoint for parsing a tblgen file.  These parser
   /// routines return true on error, or false on success.
   bool ParseFile();
   
-  bool Error(SMLoc L, const std::string &Msg) const {
+  bool Error(SMLoc L, const Twine &Msg) const {
     Lex.PrintError(L, Msg);
     return true;
   }
-  bool TokError(const std::string &Msg) const {
+  bool TokError(const Twine &Msg) const {
     return Error(Lex.getLoc(), Msg);
   }
 private:  // Semantic analysis methods.
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 5e3e2829b87f..3b7dc0193b28 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -21,6 +21,7 @@
 #include "ClangASTNodesEmitter.h"
 #include "ClangAttrEmitter.h"
 #include "ClangDiagnosticsEmitter.h"
+#include "ClangSACheckersEmitter.h"
 #include "CodeEmitterGen.h"
 #include "DAGISelEmitter.h"
 #include "DisassemblerEmitter.h"
@@ -37,11 +38,13 @@
 #include "ARMDecoderEmitter.h"
 #include "SubtargetEmitter.h"
 #include "TGParser.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <cstdio>
 using namespace llvm;
@@ -59,10 +62,12 @@ enum ActionType {
   GenClangAttrList,
   GenClangAttrPCHRead,
   GenClangAttrPCHWrite,
+  GenClangAttrSpellingList,
   GenClangDiagsDefs,
   GenClangDiagGroups,
   GenClangDeclNodes,
   GenClangStmtNodes,
+  GenClangSACheckers,
   GenDAGISel,
   GenFastISel,
   GenOptParserDefs, GenOptParserImpl,
@@ -73,6 +78,7 @@ enum ActionType {
   GenEDInfo,
   GenArmNeon,
   GenArmNeonSema,
+  GenArmNeonTest,
   PrintEnums
 };
 
@@ -127,14 +133,18 @@ namespace {
                                "Generate clang PCH attribute reader"),
                     clEnumValN(GenClangAttrPCHWrite, "gen-clang-attr-pch-write",
                                "Generate clang PCH attribute writer"),
+                    clEnumValN(GenClangAttrSpellingList, "gen-clang-attr-spelling-list",
+                               "Generate a clang attribute spelling list"),
                     clEnumValN(GenClangDiagsDefs, "gen-clang-diags-defs",
                                "Generate Clang diagnostics definitions"),
                     clEnumValN(GenClangDiagGroups, "gen-clang-diag-groups",
                                "Generate Clang diagnostic groups"),
                     clEnumValN(GenClangDeclNodes, "gen-clang-decl-nodes",
-                               "Generate Clang AST statement nodes"),
+                               "Generate Clang AST declaration nodes"),
                     clEnumValN(GenClangStmtNodes, "gen-clang-stmt-nodes",
                                "Generate Clang AST statement nodes"),
+                    clEnumValN(GenClangSACheckers, "gen-clang-sa-checkers",
+                               "Generate Clang Static Analyzer checkers"),
                     clEnumValN(GenLLVMCConf, "gen-llvmc",
                                "Generate LLVMC configuration library"),
                     clEnumValN(GenEDInfo, "gen-enhanced-disassembly-info",
@@ -143,6 +153,8 @@ namespace {
                                "Generate arm_neon.h for clang"),
                     clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
                                "Generate ARM NEON sema support for clang"),
+                    clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
+                               "Generate ARM NEON tests for clang"),
                     clEnumValN(PrintEnums, "print-enums",
                                "Print enum values for a class"),
                     clEnumValEnd));
@@ -169,12 +181,9 @@ namespace {
 }
 
 
-// FIXME: Eliminate globals from tblgen.
-RecordKeeper llvm::Records;
-
 static SourceMgr SrcMgr;
 
-void llvm::PrintError(SMLoc ErrorLoc, const std::string &Msg) {
+void llvm::PrintError(SMLoc ErrorLoc, const Twine &Msg) {
   SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
 }
 
@@ -184,14 +193,15 @@ void llvm::PrintError(SMLoc ErrorLoc, const std::string &Msg) {
 /// file.
 static bool ParseFile(const std::string &Filename,
                       const std::vector<std::string> &IncludeDirs,
-                      SourceMgr &SrcMgr) {
-  std::string ErrorStr;
-  MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
-  if (F == 0) {
+                      SourceMgr &SrcMgr,
+                      RecordKeeper &Records) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
     errs() << "Could not open input file '" << Filename << "': "
-           << ErrorStr <<"\n";
+           << ec.message() <<"\n";
     return true;
   }
+  MemoryBuffer *F = File.take();
 
   // Tell SrcMgr about this buffer, which is what TGParser will pick up.
   SrcMgr.AddNewSourceBuffer(F, SMLoc());
@@ -200,19 +210,21 @@ static bool ParseFile(const std::string &Filename,
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
 
-  TGParser Parser(SrcMgr);
+  TGParser Parser(SrcMgr, Records);
 
   return Parser.ParseFile();
 }
 
 int main(int argc, char **argv) {
+  RecordKeeper Records;
+
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);
 
 
   // Parse the input file.
-  if (ParseFile(InputFilename, IncludeDirs, SrcMgr))
+  if (ParseFile(InputFilename, IncludeDirs, SrcMgr, Records))
     return 1;
 
   std::string Error;
@@ -274,6 +286,9 @@ int main(int argc, char **argv) {
     case GenClangAttrPCHWrite:
       ClangAttrPCHWriteEmitter(Records).run(Out.os());
       break;
+    case GenClangAttrSpellingList:
+      ClangAttrSpellingListEmitter(Records).run(Out.os());
+      break;
     case GenClangDiagsDefs:
       ClangDiagsDefsEmitter(Records, ClangComponent).run(Out.os());
       break;
@@ -287,6 +302,9 @@ int main(int argc, char **argv) {
     case GenClangStmtNodes:
       ClangASTNodesEmitter(Records, "Stmt", "").run(Out.os());
       break;
+    case GenClangSACheckers:
+      ClangSACheckersEmitter(Records).run(Out.os());
+      break;
     case GenDisassembler:
       DisassemblerEmitter(Records).run(Out.os());
       break;
@@ -323,6 +341,9 @@ int main(int argc, char **argv) {
     case GenArmNeonSema:
       NeonEmitter(Records).runHeader(Out.os());
       break;
+    case GenArmNeonTest:
+      NeonEmitter(Records).runTests(Out.os());
+      break;
     case PrintEnums:
     {
       std::vector<Record*> Recs = Records.getAllDerivedDefinitions(Class);
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 2176224523a3..94797f55f713 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -161,7 +161,7 @@ void DisassemblerTables::emitOneID(raw_ostream &o,
 /// @param i        - The indentation level for that output stream.
 static void emitEmptyTable(raw_ostream &o, uint32_t &i)
 {
-  o.indent(i * 2) << "static InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
+  o.indent(i * 2) << "static const InstrUID modRMEmptyTable[1] = { 0 };\n";
   o << "\n";
 }
 
@@ -275,7 +275,7 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
     return;
   }
     
-  o1.indent(i1) << "static InstrUID modRMTable" << thisTableNumber;
+  o1.indent(i1) << "static const InstrUID modRMTable" << thisTableNumber;
     
   switch (dt) {
     default:
@@ -365,7 +365,7 @@ void DisassemblerTables::emitContextDecision(
   uint32_t &i2,
   ContextDecision &decision,
   const char* name) const {
-  o2.indent(i2) << "struct ContextDecision " << name << " = {" << "\n";
+  o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n";
   i2++;
   o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
   i2++;
@@ -392,10 +392,8 @@ void DisassemblerTables::emitContextDecision(
 
 void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) 
   const {
-  o.indent(i * 2) << "struct InstructionSpecifier ";
-  o << INSTRUCTIONS_STR << "[";
-  o << InstructionSpecifiers.size();
-  o << "] = {" << "\n";
+  o.indent(i * 2) << "static const struct InstructionSpecifier ";
+  o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n";
   
   i++;
 
@@ -456,8 +454,8 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
 void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
   uint16_t index;
 
-  o.indent(i * 2) << "InstructionContext ";
-  o << CONTEXTS_STR << "[256] = {" << "\n";
+  o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR
+                     "[256] = {\n";
   i++;
 
   for (index = 0; index < 256; ++index) {
diff --git a/utils/TableGen/X86ModRMFilters.h b/utils/TableGen/X86ModRMFilters.h
index 45cb07a3d393..199040bad840 100644
--- a/utils/TableGen/X86ModRMFilters.h
+++ b/utils/TableGen/X86ModRMFilters.h
@@ -18,7 +18,7 @@
 #ifndef X86MODRMFILTERS_H
 #define X86MODRMFILTERS_H
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 4dba85b16681..ccd3efd980a2 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -51,10 +51,11 @@ namespace X86Local {
     MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
     MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
     MRMInitReg  = 32,
-    
 #define MAP(from, to) MRM_##from = to,
     MRM_MAPPING
 #undef MAP
+    RawFrmImm8  = 43,
+    RawFrmImm16 = 44,
     lastMRM
   };
   
@@ -113,7 +114,6 @@ namespace X86Local {
   EXTENSION_TABLE(72)             \
   EXTENSION_TABLE(73)             \
   EXTENSION_TABLE(ae)             \
-  EXTENSION_TABLE(b9)             \
   EXTENSION_TABLE(ba)             \
   EXTENSION_TABLE(c7)
 
@@ -219,7 +219,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   Name      = Rec->getName();
   AsmString = Rec->getValueAsString("AsmString");
   
-  Operands = &insn.OperandList;
+  Operands = &insn.Operands.OperandList;
   
   IsSSE            = HasOpSizePrefix && (Name.find("16") == Name.npos);
   HasFROperands    = false;
@@ -311,7 +311,7 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
     return FILTER_STRONG;
 
   // Special cases.
-  
+
   if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI")
     return FILTER_WEAK;
   if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI")
@@ -424,7 +424,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   
   Spec->insnContext = insnContext();
     
-  const std::vector<CodeGenInstruction::OperandInfo> &OperandList = *Operands;
+  const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
   
   unsigned operandIndex;
   unsigned numOperands = OperandList.size();
@@ -440,7 +440,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   
   for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     if (OperandList[operandIndex].Constraints.size()) {
-      const CodeGenInstruction::ConstraintInfo &Constraint =
+      const CGIOperandList::ConstraintInfo &Constraint =
         OperandList[operandIndex].Constraints[0];
       if (Constraint.isTied()) {
         operandMapping[operandIndex] = Constraint.getTiedOperand();
@@ -587,6 +587,20 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     HANDLE_OPERAND(memory)
     HANDLE_OPTIONAL(relocation)
     break;
+  case X86Local::RawFrmImm8:
+    // operand 1 is a 16-bit immediate
+    // operand 2 is an 8-bit immediate
+    assert(numPhysicalOperands == 2 &&
+           "Unexpected number of operands for X86Local::RawFrmImm8");
+    HANDLE_OPERAND(immediate)
+    HANDLE_OPERAND(immediate)
+    break;
+  case X86Local::RawFrmImm16:
+    // operand 1 is a 16-bit immediate
+    // operand 2 is a 16-bit immediate
+    HANDLE_OPERAND(immediate)
+    HANDLE_OPERAND(immediate)
+    break;
   case X86Local::MRMInitReg:
     // Ignored.
     break;
@@ -829,10 +843,13 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("GR8",                 TYPE_R8)
   TYPE("VR128",               TYPE_XMM128)
   TYPE("f128mem",             TYPE_M128)
+  TYPE("f256mem",             TYPE_M256)
   TYPE("FR64",                TYPE_XMM64)
   TYPE("f64mem",              TYPE_M64FP)
+  TYPE("sdmem",               TYPE_M64FP)
   TYPE("FR32",                TYPE_XMM32)
   TYPE("f32mem",              TYPE_M32FP)
+  TYPE("ssmem",               TYPE_M32FP)
   TYPE("RST",                 TYPE_ST)
   TYPE("i128mem",             TYPE_M128)
   TYPE("i64i32imm_pcrel",     TYPE_REL64)
@@ -840,6 +857,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i32imm_pcrel",        TYPE_REL32)
   TYPE("SSECC",               TYPE_IMM3)
   TYPE("brtarget",            TYPE_RELv)
+  TYPE("uncondbrtarget",      TYPE_RELv)
   TYPE("brtarget8",           TYPE_REL8)
   TYPE("f80mem",              TYPE_M80FP)
   TYPE("lea32mem",            TYPE_LEA)
@@ -924,7 +942,10 @@ OperandEncoding RecognizableInstr::memoryEncodingFromString
   ENCODING("i32mem",          ENCODING_RM)
   ENCODING("i64mem",          ENCODING_RM)
   ENCODING("i8mem",           ENCODING_RM)
+  ENCODING("ssmem",           ENCODING_RM)
+  ENCODING("sdmem",           ENCODING_RM)
   ENCODING("f128mem",         ENCODING_RM)
+  ENCODING("f256mem",         ENCODING_RM)
   ENCODING("f64mem",          ENCODING_RM)
   ENCODING("f32mem",          ENCODING_RM)
   ENCODING("i128mem",         ENCODING_RM)
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index db4d96dda032..c043b909b42f 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -22,7 +22,7 @@
 #include "CodeGenTarget.h"
 #include "Record.h"
 
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
@@ -76,7 +76,8 @@ private:
   /// The operands of the instruction, as listed in the CodeGenInstruction.
   /// They are not one-to-one with operands listed in the MCInst; for example,
   /// memory operands expand to 5 operands in the MCInst
-  const std::vector<CodeGenInstruction::OperandInfo>* Operands;
+  const std::vector<CGIOperandList::OperandInfo>* Operands;
+  
   /// The description of the instruction that is emitted into the instruction
   /// info table
   InstructionSpecifier* Spec;
diff --git a/utils/Target/ARM/analyze-match-table.py b/utils/Target/ARM/analyze-match-table.py
new file mode 100644
index 000000000000..aa952d40085a
--- /dev/null
+++ b/utils/Target/ARM/analyze-match-table.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+def analyze_match_table(path):
+    # Extract the instruction table.
+    data = open(path).read()
+    start = data.index("static const MatchEntry MatchTable")
+    end = data.index("\n};\n", start)
+    lines = data[start:end].split("\n")[1:]
+
+    # Parse the instructions.
+    insns = []
+    for ln in lines:
+        ln = ln.split("{", 1)[1]
+        ln = ln.rsplit("}", 1)[0]
+        a,bc = ln.split("{", 1)
+        b,c = bc.split("}", 1)
+        code, string, converter, _ = [s.strip()
+                                      for s in a.split(",")]
+        items = [s.strip() for s in b.split(",")]
+        _,features = [s.strip() for s in c.split(",")]
+        assert string[0] == string[-1] == '"'
+        string = string[1:-1]
+        insns.append((code,string,converter,items,features))
+
+    # For every mnemonic, compute whether or not it can have a carry setting
+    # operand and whether or not it can have a predication code.
+    mnemonic_flags = {}
+    for insn in insns:
+        mnemonic = insn[1]
+        items = insn[3]
+        flags = mnemonic_flags[mnemonic] = mnemonic_flags.get(mnemonic, set())
+        flags.update(items)
+
+    mnemonics = set(mnemonic_flags)
+    ccout_mnemonics = set(m for m in mnemonics
+                          if 'MCK_CCOut' in mnemonic_flags[m])
+    condcode_mnemonics = set(m for m in mnemonics
+                             if 'MCK_CondCode' in mnemonic_flags[m])
+    noncondcode_mnemonics = mnemonics - condcode_mnemonics
+    print ' || '.join('Mnemonic == "%s"' % m
+                      for m in ccout_mnemonics)
+    print ' || '.join('Mnemonic == "%s"' % m
+                      for m in noncondcode_mnemonics)
+
+def main():
+    import sys
+    if len(sys.argv) == 1:
+        import os
+        from lit.Util import capture
+        llvm_obj_root = capture(["llvm-config", "--obj-root"])
+        file = os.path.join(llvm_obj_root,
+                            "lib/Target/ARM/ARMGenAsmMatcher.inc")
+    elif len(sys.argv) == 2:
+        file = sys.argv[1]
+    else:
+        raise NotImplementedError
+
+    analyze_match_table(file)
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 39ec1ccda9c0..5e8369cdd326 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -105,7 +105,7 @@ if [ "$ARM_HOSTED_BUILD" = yes ]; then
       T=`xcrun -sdk $SDKROOT -find ${prog}`
     fi
     echo '#!/bin/sh' > $P || exit 1
-    echo 'exec '$T' -arch armv6 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
+    echo 'exec '$T' -arch armv7 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
     chmod a+x $P || exit 1
   done
 
@@ -174,11 +174,6 @@ if [ "x$MAJ_VER" != "x4" -o "x$MIN_VER" != "x0" ]; then
     # Figure out how many make processes to run.
     SYSCTL=`sysctl -n hw.activecpu`
 
-    # hw.activecpu only available in 10.2.6 and later
-    if [ -z "$SYSCTL" ]; then
-        SYSCTL=`sysctl -n hw.ncpu`
-    fi
-
     # sysctl -n hw.* does not work when invoked via B&I chroot /BuildRoot.
     # Builders can default to 2, since even if they are single processor,
     # nothing else is running on the machine.
@@ -269,8 +264,11 @@ else
         -exec lipo -extract ppc7400 -extract i386 -extract x86_64 {} -output {} \;
 fi
 
-# The Hello dylib is an example of how to build a pass. No need to install it.
-rm $DEST_DIR$DEST_ROOT/lib/LLVMHello.dylib
+# The Hello dylib is an example of how to build a pass.
+# The BugpointPasses module is only used to test bugpoint.
+# These unversioned dylibs cause verification failures, so do not install them.
+rm $DEST_DIR$DEST_ROOT/lib/libLLVMHello.dylib
+rm $DEST_DIR$DEST_ROOT/lib/libBugpointPasses.dylib
 
 # Compress manpages
 MDIR=$DEST_DIR$DEST_ROOT/share/man/man1
diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el
index b1af853883ad..3780624b5a43 100644
--- a/utils/emacs/llvm-mode.el
+++ b/utils/emacs/llvm-mode.el
@@ -19,7 +19,7 @@
    ;; Unnamed variable slots
    '("%[-]?[0-9]+" . font-lock-variable-name-face)
    ;; Types
-   '("\\bvoid\\b\\|\\bi[0-9]+\\b\\|\\float\\b\\|\\bdouble\\b\\|\\btype\\b\\|\\blabel\\b\\|\\bopaque\\b" . font-lock-type-face)
+   `(,(regexp-opt '("void" "i[0-9]+" "float" "double" "type" "label" "opaque") 'words) . font-lock-type-face)
    ;; Integer literals
    '("\\b[-]?[0-9]+\\b" . font-lock-preprocessor-face)
    ;; Floating point constants
@@ -27,15 +27,20 @@
    ;; Hex constants
    '("\\b0x[0-9A-Fa-f]+\\b" . font-lock-preprocessor-face)
    ;; Keywords
-   '("\\bbegin\\b\\|\\bend\\b\\|\\btrue\\b\\|\\bfalse\\b\\|\\bzeroinitializer\\b\\|\\bdeclare\\b\\|\\bdefine\\b\\|\\bglobal\\b\\|\\bconstant\\b\\|\\bconst\\b\\|\\binternal\\b\\|\\blinkonce\\b\\|\\blinkonce_odr\\b\\|\\bweak\\b\\|\\bweak_odr\\b\\|\\bappending\\b\\|\\buninitialized\\b\\|\\bimplementation\\b\\|\\b\\.\\.\\.\\b\\|\\bnull\\b\\|\\bundef\\b\\|\\bto\\b\\|\\bexcept\\b\\|\\bnot\\b\\|\\btarget\\b\\|\\bendian\\b\\|\\blittle\\b\\|\\bbig\\b\\|\\bpointersize\\b\\|\\bdeplibs\\b\\|\\bvolatile\\b\\|\\bfastcc\\b\\|\\bcoldcc\\b\\|\\bcc\\b" . font-lock-keyword-face)
+   `(,(regexp-opt '("begin" "end" "true" "false" "zeroinitializer" "declare"
+                    "define" "global" "constant" "const" "internal" "linkonce" "linkonce_odr"
+                    "weak" "weak_odr" "appending" "uninitialized" "implementation" "..."
+                    "null" "undef" "to" "except" "not" "target" "endian" "little" "big"
+                    "pointersize" "deplibs" "volatile" "fastcc" "coldcc" "cc") 'words) . font-lock-keyword-face)
    ;; Arithmetic and Logical Operators
-   '("\\badd\\b\\|\\bsub\\b\\|\\bmul\\b\\|\\bdiv\\b\\|\\brem\\b\\|\\band\\b\\|\\bor\\b\\|\\bxor\\b\\|\\bset\\(ne\\b\\|\\beq\\b\\|\\blt\\b\\|\\bgt\\b\\|\\ble\\b\\|\\bge\\b\\)" . font-lock-keyword-face)
+   `(,(regexp-opt '("add" "sub" "mul" "div" "rem" "and" "or" "xor"
+                    "setne" "seteq" "setlt" "setgt" "setle" "setge") 'words) . font-lock-keyword-face)
    ;; Special instructions
-   '("\\bphi\\b\\|\\btail\\b\\|\\bcall\\b\\|\\bcast\\b\\|\\bselect\\b\\|\\bto\\b\\|\\bshl\\b\\|\\bshr\\b\\|\\bvaarg\\b\\|\\bvanext\\b" . font-lock-keyword-face)
+   `(,(regexp-opt '("phi" "tail" "call" "cast" "select" "to" "shl" "shr" "vaarg" "vanext") 'words) . font-lock-keyword-face)
    ;; Control instructions
-   '("\\bret\\b\\|\\bbr\\b\\|\\bswitch\\b\\|\\binvoke\\b\\|\\bunwind\\b\\|\\bunreachable\\b" . font-lock-keyword-face)
+   `(,(regexp-opt '("ret" "br" "switch" "invoke" "unwind" "unreachable") 'words) . font-lock-keyword-face)
    ;; Memory operators
-   '("\\bmalloc\\b\\|\\balloca\\b\\|\\bfree\\b\\|\\bload\\b\\|\\bstore\\b\\|\\bgetelementptr\\b" . font-lock-keyword-face)
+   `(,(regexp-opt '("malloc" "alloca" "free" "load" "store" "getelementptr") 'words) . font-lock-keyword-face)
    )
   "Syntax highlighting for LLVM"
   )
diff --git a/utils/emacs/tablegen-mode.el b/utils/emacs/tablegen-mode.el
index 833c16c599d4..3853ce66a285 100644
--- a/utils/emacs/tablegen-mode.el
+++ b/utils/emacs/tablegen-mode.el
@@ -12,13 +12,11 @@
 (make-face 'td-decorators-face)
 
 (defvar tablegen-font-lock-keywords
-  (let ((kw (mapconcat 'identity
-                       '("class" "defm" "def" "field" "include" "in"
+  (let ((kw (regexp-opt '("class" "defm" "def" "field" "include" "in"
                          "let" "multiclass")
-                       "\\|"))
-        (type-kw (mapconcat 'identity
-                            '("bit" "bits" "code" "dag" "int" "list" "string")
-                            "\\|"))
+                        'words))
+        (type-kw (regexp-opt '("bit" "bits" "code" "dag" "int" "list" "string")
+                             'words))
         )
     (list
      ;; Comments
@@ -36,10 +34,10 @@
 
      '("^[ \t]*\\(@.+\\)" 1 'td-decorators-face)
      ;; Keywords
-     (cons (concat "\\<\\(" kw "\\)\\>[ \n\t(]") 1)
+     (cons (concat kw "[ \n\t(]") 1)
 
      ;; Type keywords
-     (cons (concat "\\<\\(" type-kw "\\)[ \n\t(]") 1)
+     (cons (concat type-kw "[ \n\t(]") 1)
      ))
   "Additional expressions to highlight in TableGen mode.")
 (put 'tablegen-mode 'font-lock-defaults '(tablegen-font-lock-keywords))
diff --git a/utils/findmisopt b/utils/findmisopt
index b7ffbd9947d5..f2a872c6dc3e 100755
--- a/utils/findmisopt
+++ b/utils/findmisopt
@@ -7,7 +7,7 @@
 #      it from finding a problem unless the set of failing optimizations are
 #      known and given to it on the command line.
 #
-#      Given a bytecode file that produces correct output (or return code), 
+#      Given a bitcode file that produces correct output (or return code), 
 #      this script will run through all the optimizations passes that gccas
 #      uses (in the same order) and will narrow down which optimizations
 #      cause the program either generate different output or return a 
@@ -21,7 +21,7 @@
 #
 #   Where:
 #      bcfile 
-#          is the bytecode file input (the unoptimized working case)
+#          is the bitcode file input (the unoptimized working case)
 #      outdir
 #          is a directory into which intermediate results are placed
 #      progargs
diff --git a/utils/findoptdiff b/utils/findoptdiff
index 4f8d08dbffe1..7a2eab05d71a 100755
--- a/utils/findoptdiff
+++ b/utils/findoptdiff
@@ -14,7 +14,7 @@
 #      second build contains some experimental optimization features that
 #      are suspected of producing a misoptimization.
 #
-#      The script takes two bytecode files, one from each build. They are
+#      The script takes two bitcode files, one from each build. They are
 #      presumed to be a compilation of the same program or program fragment
 #      with the only difference being the builds.
 #
@@ -39,9 +39,9 @@
 #      llvm2
 #          is the path to the second llvm build dir
 #      bc1
-#          is the bytecode file for the first llvm environment
+#          is the bitcode file for the first llvm environment
 #      bc2
-#          is the bytecode file for the second llvm environment
+#          is the bitcode file for the second llvm environment
 #      filter1
 #          is an optional filter for filtering the llvm1 generated assembly
 #      filter2
diff --git a/utils/fpcmp/Makefile b/utils/fpcmp/Makefile
index fd2f7477bb4e..81db3b9c3f6e 100644
--- a/utils/fpcmp/Makefile
+++ b/utils/fpcmp/Makefile
@@ -1,15 +1,15 @@
 ##===- utils/fpcmp/Makefile --------------------------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
 TOOLNAME = fpcmp
-USEDLIBS = LLVMSupport.a LLVMSystem.a
+USEDLIBS = LLVMSupport.a
 NO_INSTALL = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/utils/kate/README b/utils/kate/README
new file mode 100644
index 000000000000..efe53b7e237e
--- /dev/null
+++ b/utils/kate/README
@@ -0,0 +1,12 @@
+-*- llvm/utils/kate/README -*-
+
+These are syntax highlighting files for the Kate editor. Included are:
+
+* llvm.xml
+
+  Syntax Highlighting Mode for the KDE Kate editor. To install just copy
+  this file to ~/.kde/share/apps/katepart/syntax (or better yet, symlink it).
+
+Note: If you notice missing or incorrect syntax highlighting, please contact
+<llvmbugs [at] cs.uiuc.edu>; if you wish to provide a patch to improve the
+functionality, it will be most appreciated. Thank you.
diff --git a/utils/kate/llvm.xml b/utils/kate/llvm.xml
new file mode 100644
index 000000000000..074fa16cb884
--- /dev/null
+++ b/utils/kate/llvm.xml
@@ -0,0 +1,255 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE language SYSTEM "language.dtd">
+<language name="LLVM" section="Sources"
+          version="1.00" kateversion="3.4.4"
+          extensions="*.ll"
+          mimetype=""
+          author="LLVM Team"
+          license="LLVM Release License">
+  <highlighting>
+    <list name="keywords">
+      <item> begin </item>
+      <item> end </item>
+      <item> true </item>
+      <item> false </item>
+      <item> declare </item>
+      <item> define </item>
+      <item> global </item>
+      <item> constant </item>
+      <item> gc </item>
+      <item> module </item>
+      <item> asm </item>
+      <item> target </item>
+      <item> datalayout </item>
+      <item> null </item>
+      <item> undef </item>
+      <item> blockaddress </item>
+      <item> sideeffect </item>
+      <item> alignstack </item>
+      <item> to </item>
+      <item> unwind </item>
+      <item> nuw </item>
+      <item> nsw </item>
+      <item> inbounds </item>
+      <item> tail </item>
+      <item> triple </item>
+      <item> type </item>
+      <item> align </item>
+      <item> alias </item>
+    </list>
+    <list name="linkage-types">
+      <item> private </item>
+      <item> linker_private </item>
+      <item> linker_private_weak </item>
+      <item> linker_private_weak_def_auto </item>
+      <item> internal </item>
+      <item> available_externally </item>
+      <item> linkonce </item>
+      <item> weak </item>
+      <item> common </item>
+      <item> appending </item>
+      <item> extern_weak </item>
+      <item> linkonce_odr </item>
+      <item> weak_odr </item>
+      <item> dllimport </item>
+      <item> dllexport </item>
+    </list>
+    <list name="calling-conventions">
+      <item> ccc </item>
+      <item> fastcc </item>
+      <item> coldcc </item>
+      <item> cc </item>
+    </list>
+    <list name="visibility-styles">
+      <item> default </item>
+      <item> hidden </item>
+      <item> protected </item>
+    </list>
+    <list name="parameter-attributes">
+      <item> zeroext </item>
+      <item> signext </item>
+      <item> inreg </item>
+      <item> byval </item>
+      <item> sret </item>
+      <item> noalias </item>
+      <item> nocapture </item>
+      <item> nest </item>
+    </list>
+    <list name="function-attributes">
+      <item> alignstack </item>
+      <item> alwaysinline </item>
+      <item> inlinehint </item>
+      <item> naked </item>
+      <item> noimplicitfloat </item>
+      <item> noinline </item>
+      <item> noredzone </item>
+      <item> noreturn </item>
+      <item> nounwind </item>
+      <item> optsize </item>
+      <item> readnone </item>
+      <item> readonly </item>
+      <item> ssp </item>
+      <item> sspreq </item>
+    </list>
+    <list name="types">
+      <item> float </item>
+      <item> double </item>
+      <item> fp128 </item>
+      <item> x86_fp80 </item>
+      <item> ppc_fp128 </item>
+      <item> x86mmx </item>
+      <item> void </item>
+      <item> label </item>
+      <item> metadata </item>
+      <item> opaque </item>
+    </list>
+    <list name="intrinsic-global-variables">
+      <item> llvm.used </item>
+      <item> llvm.compiler.used </item>
+      <item> llvm.global_ctors </item>
+      <item> llvm.global_dtors </item>
+    </list>
+    <list name="instructions">
+      <item> ret </item>
+      <item> br </item>
+      <item> switch </item>
+      <item> indirectbr </item>
+      <item> invoke </item>
+      <item> unwind </item>
+      <item> unreachable </item>
+      <item> add </item>
+      <item> fadd </item>
+      <item> sub </item>
+      <item> fsub </item>
+      <item> mul </item>
+      <item> fmul </item>
+      <item> udiv </item>
+      <item> sdiv </item>
+      <item> fdiv </item>
+      <item> urem </item>
+      <item> srem </item>
+      <item> frem </item>
+      <item> shl </item>
+      <item> lshr </item>
+      <item> ashr </item>
+      <item> and </item>
+      <item> or </item>
+      <item> xor </item>
+      <item> extractelement </item>
+      <item> insertelement </item>
+      <item> shufflevector </item>
+      <item> extractvalue </item>
+      <item> insertvalue </item>
+      <item> alloca </item>
+      <item> load </item>
+      <item> store </item>
+      <item> getelementptr </item>
+      <item> trunc </item>
+      <item> zext </item>
+      <item> sext </item>
+      <item> fptrunc </item>
+      <item> fpext </item>
+      <item> fptoui </item>
+      <item> fptosi </item>
+      <item> uitofp </item>
+      <item> sitofp </item>
+      <item> ptrtoint </item>
+      <item> inttoptr </item>
+      <item> bitcast </item>
+      <item> icmp </item>
+      <item> fcmp </item>
+      <item> phi </item>
+      <item> select </item>
+      <item> call </item>
+      <item> va_arg </item>
+    </list>
+    <list name="conditions">
+      <item> eq </item>
+      <item> ne </item>
+      <item> ugt </item>
+      <item> uge </item>
+      <item> ult </item>
+      <item> ule </item>
+      <item> sgt </item>
+      <item> sge </item>
+      <item> slt </item>
+      <item> sle </item>
+      <item> oeq </item>
+      <item> ogt </item>
+      <item> oge </item>
+      <item> olt </item>
+      <item> ole </item>
+      <item> one </item>
+      <item> ord </item>
+      <item> ueq </item>
+      <item> une </item>
+      <item> uno </item>
+    </list>
+    <contexts>
+      <context name="llvm" attribute="Normal Text" lineEndContext="#stay">
+        <DetectSpaces />
+        <AnyChar String="@%" attribute="Symbol" context="symbol" />
+
+        <DetectChar char="{" beginRegion="Brace1" />
+        <DetectChar char="}" endRegion="Brace1" />
+        <DetectChar char=";" attribute="Comment" context="comment" />
+        <DetectChar attribute="String" context="string" char="&quot;" />
+        <RegExpr String="i[0-9]+" attribute="Data Type" context="#stay" />
+        <RegExpr attribute="Symbol" String="[-a-zA-Z$._][-a-zA-Z$._0-9]*:" context="#stay" />
+        <Int attribute="Int" context="#stay" />
+
+        <keyword attribute="Keyword"   String="keywords" />
+        <keyword attribute="Keyword"   String="linkage-types" />
+        <keyword attribute="Keyword"   String="calling-conventions" />
+        <keyword attribute="Keyword"   String="visibility-styles" />
+        <keyword attribute="Keyword"   String="parameter-attributes" />
+        <keyword attribute="Keyword"   String="function-attributes" />
+        <keyword attribute="Data Type" String="types" />
+        <keyword attribute="Keyword"   String="intrinsic-global-variables" />
+        <keyword attribute="Keyword"   String="instructions" />
+        <keyword attribute="Keyword"   String="conditions" />
+      </context>
+
+      <context name="symbol" attribute="Symbol" lineEndContext="#pop">
+        <DetectChar attribute="Symbol" context="symbol-string" char="&quot;" />
+        <RegExpr attribute="Symbol" String="([-a-zA-Z$._][-a-zA-Z$._0-9]*|[0-9]+)" context="#pop" />
+      </context>
+
+      <context name="symbol-string" attribute="Symbol" lineEndContext="#stay">
+        <DetectChar attribute="Symbol" context="#pop#pop" char="&quot;" />
+      </context>
+
+      <context name="string" attribute="String" lineEndContext="#stay">
+        <DetectChar attribute="String" context="#pop" char="&quot;" />
+      </context>
+
+      <context name="comment" attribute="Comment" lineEndContext="#pop">
+        <DetectSpaces />
+        <!-- TODO: Add FileCheck syntax highlighting -->
+        <IncludeRules context="##Alerts" />
+        <DetectIdentifier />
+      </context>
+    </contexts>
+    <itemDatas>
+      <itemData name="Normal Text" defStyleNum="dsNormal" />
+      <itemData name="Keyword" defStyleNum="dsKeyword" />
+      <itemData name="Data Type" defStyleNum="dsDataType" />
+      <itemData name="Int" defStyleNum="dsDecVal" />
+      <itemData name="Hex" defStyleNum="dsBaseN" />
+      <itemData name="Float" defStyleNum="dsFloat" />
+      <itemData name="String" defStyleNum="dsString" />
+      <itemData name="Comment" defStyleNum="dsComment" />
+      <itemData name="Function" defStyleNum="dsFunction" />
+      <itemData name="Symbol" defStyleNum="dsFunction" />
+    </itemDatas>
+  </highlighting>
+  <general>
+    <comments>
+      <comment name="singleLine" start=";" />
+    </comments>
+    <keywords casesensitive="1" weakDeliminator="." />
+  </general>
+</language>
+<!--
+// kate: space-indent on; indent-width 2; replace-tabs on;
+-->
diff --git a/utils/lit/TODO b/utils/lit/TODO
index 4d00d2c1cfcd..6d7f7ea529ae 100644
--- a/utils/lit/TODO
+++ b/utils/lit/TODO
@@ -2,18 +2,8 @@
 
  - Add --show-unsupported, don't show by default?
 
- - Finish documentation.
-
  - Optionally use multiprocessing.
 
- - Support llvmc and ocaml tests.
-
  - Support valgrind in all configs, and LLVM style valgrind.
 
- - Provide test suite config for running unit tests.
-
  - Support a timeout / ulimit.
-
- - Support "disabling" tests? The advantage of making this distinct from XFAIL
-   is it makes it more obvious that it is a temporary measure (and lit can put
-   in a separate category).
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index ac4859137e54..7ca1b9c4c634 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -8,6 +8,9 @@ class LitConfig:
     easily.
     """
 
+    # Provide access to Test module.
+    import Test
+
     # Provide access to built-in formats.
     import LitFormats as formats
 
@@ -82,6 +85,22 @@ class LitConfig:
 
         return self.bashPath
 
+    def getToolsPath(self, dir, paths, tools):
+        import os, Util
+        if dir is not None and os.path.isabs(dir) and os.path.isdir(dir):
+            if not Util.checkToolsPath(dir, tools):
+                return None
+        else:
+            dir = Util.whichTools(tools, paths)
+
+        # bash
+        self.bashPath = Util.which('bash', dir)
+        if self.bashPath is None:
+            self.warning("Unable to find 'bash.exe'.")
+            self.bashPath = ''
+
+        return dir
+
     def _write_message(self, kind, message):
         import inspect, os, sys
 
diff --git a/utils/lit/lit/LitFormats.py b/utils/lit/lit/LitFormats.py
index e86f103fe6b2..931d107109b3 100644
--- a/utils/lit/lit/LitFormats.py
+++ b/utils/lit/lit/LitFormats.py
@@ -1,2 +1,3 @@
+from TestFormats import FileBasedTest
 from TestFormats import GoogleTest, ShTest, TclTest
 from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index 7ffbd2bf7663..6dda2fdb608d 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -1,15 +1,15 @@
 import os
-import platform
+import sys
 
 import Test
 import TestRunner
 import Util
 
-kIsWindows = platform.system() == 'Windows'
+kIsWindows = sys.platform in ['win32', 'cygwin']
 
 class GoogleTest(object):
     def __init__(self, test_sub_dir, test_suffix):
-        self.test_sub_dir = str(test_sub_dir)
+        self.test_sub_dir = os.path.normcase(str(test_sub_dir)).split(';')
         self.test_suffix = str(test_suffix)
 
         # On Windows, assume tests will also end in '.exe'.
@@ -28,7 +28,10 @@ class GoogleTest(object):
 
         try:
             lines = Util.capture([path, '--gtest_list_tests'],
-                                 env=localConfig.environment).split('\n')
+                                 env=localConfig.environment)
+            if kIsWindows:
+              lines = lines.replace('\r', '')
+            lines = lines.split('\n')
         except:
             litConfig.error("unable to discover google-tests in %r" % path)
             raise StopIteration
@@ -44,7 +47,7 @@ class GoogleTest(object):
                 index += 1
             while len(nested_tests) > index:
                 nested_tests.pop()
-            
+
             ln = ln[index*2:]
             if ln.endswith('.'):
                 nested_tests.append(ln)
@@ -56,10 +59,14 @@ class GoogleTest(object):
         source_path = testSuite.getSourcePath(path_in_suite)
         for filename in os.listdir(source_path):
             # Check for the one subdirectory (build directory) tests will be in.
-            if filename != self.test_sub_dir:
-                continue
+            if not '.' in self.test_sub_dir:
+                if not os.path.normcase(filename) in self.test_sub_dir:
+                    continue
 
             filepath = os.path.join(source_path, filename)
+            if not os.path.isdir(filepath):
+                continue
+
             for subfilename in os.listdir(filepath):
                 if subfilename.endswith(self.test_suffix):
                     execpath = os.path.join(filepath, subfilename)
@@ -84,7 +91,7 @@ class GoogleTest(object):
 
         out, err, exitCode = TestRunner.executeCommand(
             cmd, env=test.config.environment)
-            
+
         if not exitCode:
             return Test.PASS,''
 
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 0eb51a829408..dba78143bee2 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -8,6 +8,8 @@ import Util
 import platform
 import tempfile
 
+import re
+
 class InternalShellError(Exception):
     def __init__(self, command, message):
         self.command = command
@@ -178,6 +180,13 @@ def executeShCmd(cmd, cfg, cwd, results):
         else:
             input = subprocess.PIPE
 
+    # Explicitly close any redirected files. We need to do this now because we
+    # need to release any handles we may have on the temporary files (important
+    # on Win32, for example). Since we have already spawned the subprocess, our
+    # handles have already been transferred so we do not need them anymore.
+    for f in opened_files:
+        f.close()
+
     # FIXME: There is probably still deadlock potential here. Yawn.
     procData = [None] * len(procs)
     procData[-1] = procs[-1].communicate()
@@ -215,10 +224,6 @@ def executeShCmd(cmd, cfg, cwd, results):
         else:
             exitCode = res
 
-    # Explicitly close any redirected files.
-    for f in opened_files:
-        f.close()
-
     # Remove any named temporary files we created.
     for f in named_temp_files:
         try:
@@ -441,11 +446,15 @@ def parseIntegratedTestScript(test, normalize_slashes=False):
             if ln[ln.index('END.'):].strip() == 'END.':
                 break
 
-    # Apply substitutions to the script.
+    # Apply substitutions to the script.  Allow full regular
+    # expression syntax.  Replace each matching occurrence of regular
+    # expression pattern a with substitution b in line ln.
     def processLine(ln):
         # Apply substitutions
         for a,b in substitutions:
-            ln = ln.replace(a,b)
+            if kIsWindows:
+                b = b.replace("\\","\\\\")
+            ln = re.sub(a, b, ln)
 
         # Strip the trailing newline and any extra whitespace.
         return ln.strip()
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 5c1b27394857..0d9bc00a8357 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -10,12 +10,14 @@ class TestingConfig:
         if config is None:
             # Set the environment based on the command line arguments.
             environment = {
+                'LIBRARY_PATH' : os.environ.get('LIBRARY_PATH',''),
                 'LD_LIBRARY_PATH' : os.environ.get('LD_LIBRARY_PATH',''),
                 'PATH' : os.pathsep.join(litConfig.path +
                                          [os.environ.get('PATH','')]),
                 'PATHEXT' : os.environ.get('PATHEXT',''),
                 'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
                 'LLVM_DISABLE_CRT_DEBUG' : '1',
+                'PRINTF_EXPONENT_DIGITS' : '2',
                 }
 
             config = TestingConfig(parent,
diff --git a/utils/lit/lit/Util.py b/utils/lit/lit/Util.py
index 414b714c82c3..5635f50baef3 100644
--- a/utils/lit/lit/Util.py
+++ b/utils/lit/lit/Util.py
@@ -64,7 +64,11 @@ def which(command, paths = None):
         paths = os.defpath
 
     # Get suffixes to search.
-    pathext = os.environ.get('PATHEXT', '').split(os.pathsep)
+    # On Cygwin, 'PATHEXT' may exist but it should not be used.
+    if os.pathsep == ';':
+        pathext = os.environ.get('PATHEXT', '').split(';')
+    else:
+        pathext = ['']
 
     # Search the paths...
     for path in paths.split(os.pathsep):
@@ -75,6 +79,18 @@ def which(command, paths = None):
 
     return None
 
+def checkToolsPath(dir, tools):
+    for tool in tools:
+        if not os.path.exists(os.path.join(dir, tool)):
+            return False;
+    return True;
+
+def whichTools(tools, paths):
+    for path in paths.split(os.pathsep):
+        if checkToolsPath(path, tools):
+            return path
+    return None
+
 def printHistogram(items, title = 'Items'):
     import itertools, math
 
diff --git a/utils/lit/lit/__init__.py b/utils/lit/lit/__init__.py
index 01026023d29b..f3fbb1cd8276 100644
--- a/utils/lit/lit/__init__.py
+++ b/utils/lit/lit/__init__.py
@@ -1,10 +1,10 @@
 """'lit' Testing Tool"""
 
-from lit import main
+from main import main
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@zuster.org'
-__versioninfo__ = (0, 1, 0)
-__version__ = '.'.join(map(str, __versioninfo__))
+__versioninfo__ = (0, 2, 0)
+__version__ = '.'.join(map(str, __versioninfo__)) + 'dev'
 
 __all__ = []
diff --git a/utils/lit/lit/lit.py b/utils/lit/lit/lit.py
deleted file mode 100755
index 13d263009ddd..000000000000
--- a/utils/lit/lit/lit.py
+++ /dev/null
@@ -1,648 +0,0 @@
-#!/usr/bin/env python
-
-"""
-lit - LLVM Integrated Tester.
-
-See lit.pod for more information.
-"""
-
-import math, os, platform, random, re, sys, time, threading, traceback
-
-import ProgressBar
-import TestRunner
-import Util
-
-from TestingConfig import TestingConfig
-import LitConfig
-import Test
-
-# Configuration files to look for when discovering test suites. These can be
-# overridden with --config-prefix.
-#
-# FIXME: Rename to 'config.lit', 'site.lit', and 'local.lit' ?
-gConfigName = 'lit.cfg'
-gSiteConfigName = 'lit.site.cfg'
-
-kLocalConfigName = 'lit.local.cfg'
-
-class TestingProgressDisplay:
-    def __init__(self, opts, numTests, progressBar=None):
-        self.opts = opts
-        self.numTests = numTests
-        self.current = None
-        self.lock = threading.Lock()
-        self.progressBar = progressBar
-        self.completed = 0
-
-    def update(self, test):
-        # Avoid locking overhead in quiet mode
-        if self.opts.quiet and not test.result.isFailure:
-            self.completed += 1
-            return
-
-        # Output lock.
-        self.lock.acquire()
-        try:
-            self.handleUpdate(test)
-        finally:
-            self.lock.release()
-
-    def finish(self):
-        if self.progressBar:
-            self.progressBar.clear()
-        elif self.opts.quiet:
-            pass
-        elif self.opts.succinct:
-            sys.stdout.write('\n')
-
-    def handleUpdate(self, test):
-        self.completed += 1
-        if self.progressBar:
-            self.progressBar.update(float(self.completed)/self.numTests,
-                                    test.getFullName())
-
-        if self.opts.succinct and not test.result.isFailure:
-            return
-
-        if self.progressBar:
-            self.progressBar.clear()
-
-        print '%s: %s (%d of %d)' % (test.result.name, test.getFullName(),
-                                     self.completed, self.numTests)
-
-        if test.result.isFailure and self.opts.showOutput:
-            print "%s TEST '%s' FAILED %s" % ('*'*20, test.getFullName(),
-                                              '*'*20)
-            print test.output
-            print "*" * 20
-
-        sys.stdout.flush()
-
-class TestProvider:
-    def __init__(self, tests, maxTime):
-        self.maxTime = maxTime
-        self.iter = iter(tests)
-        self.lock = threading.Lock()
-        self.startTime = time.time()
-
-    def get(self):
-        # Check if we have run out of time.
-        if self.maxTime is not None:
-            if time.time() - self.startTime > self.maxTime:
-                return None
-
-        # Otherwise take the next test.
-        self.lock.acquire()
-        try:
-            item = self.iter.next()
-        except StopIteration:
-            item = None
-        self.lock.release()
-        return item
-
-class Tester(threading.Thread):
-    def __init__(self, litConfig, provider, display):
-        threading.Thread.__init__(self)
-        self.litConfig = litConfig
-        self.provider = provider
-        self.display = display
-
-    def run(self):
-        while 1:
-            item = self.provider.get()
-            if item is None:
-                break
-            self.runTest(item)
-
-    def runTest(self, test):
-        result = None
-        startTime = time.time()
-        try:
-            result, output = test.config.test_format.execute(test,
-                                                             self.litConfig)
-        except KeyboardInterrupt:
-            # This is a sad hack. Unfortunately subprocess goes
-            # bonkers with ctrl-c and we start forking merrily.
-            print '\nCtrl-C detected, goodbye.'
-            os.kill(0,9)
-        except:
-            if self.litConfig.debug:
-                raise
-            result = Test.UNRESOLVED
-            output = 'Exception during script execution:\n'
-            output += traceback.format_exc()
-            output += '\n'
-        elapsed = time.time() - startTime
-
-        test.setResult(result, output, elapsed)
-        self.display.update(test)
-
-def dirContainsTestSuite(path):
-    cfgpath = os.path.join(path, gSiteConfigName)
-    if os.path.exists(cfgpath):
-        return cfgpath
-    cfgpath = os.path.join(path, gConfigName)
-    if os.path.exists(cfgpath):
-        return cfgpath
-
-def getTestSuite(item, litConfig, cache):
-    """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
-
-    Find the test suite containing @arg item.
-
-    @retval (None, ...) - Indicates no test suite contains @arg item.
-    @retval (suite, relative_path) - The suite that @arg item is in, and its
-    relative path inside that suite.
-    """
-    def search1(path):
-        # Check for a site config or a lit config.
-        cfgpath = dirContainsTestSuite(path)
-
-        # If we didn't find a config file, keep looking.
-        if not cfgpath:
-            parent,base = os.path.split(path)
-            if parent == path:
-                return (None, ())
-
-            ts, relative = search(parent)
-            return (ts, relative + (base,))
-
-        # We found a config file, load it.
-        if litConfig.debug:
-            litConfig.note('loading suite config %r' % cfgpath)
-
-        cfg = TestingConfig.frompath(cfgpath, None, litConfig, mustExist = True)
-        source_root = os.path.realpath(cfg.test_source_root or path)
-        exec_root = os.path.realpath(cfg.test_exec_root or path)
-        return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()
-
-    def search(path):
-        # Check for an already instantiated test suite.
-        res = cache.get(path)
-        if res is None:
-            cache[path] = res = search1(path)
-        return res
-
-    # Canonicalize the path.
-    item = os.path.realpath(item)
-
-    # Skip files and virtual components.
-    components = []
-    while not os.path.isdir(item):
-        parent,base = os.path.split(item)
-        if parent == item:
-            return (None, ())
-        components.append(base)
-        item = parent
-    components.reverse()
-
-    ts, relative = search(item)
-    return ts, tuple(relative + tuple(components))
-
-def getLocalConfig(ts, path_in_suite, litConfig, cache):
-    def search1(path_in_suite):
-        # Get the parent config.
-        if not path_in_suite:
-            parent = ts.config
-        else:
-            parent = search(path_in_suite[:-1])
-
-        # Load the local configuration.
-        source_path = ts.getSourcePath(path_in_suite)
-        cfgpath = os.path.join(source_path, kLocalConfigName)
-        if litConfig.debug:
-            litConfig.note('loading local config %r' % cfgpath)
-        return TestingConfig.frompath(cfgpath, parent, litConfig,
-                                    mustExist = False,
-                                    config = parent.clone(cfgpath))
-
-    def search(path_in_suite):
-        key = (ts, path_in_suite)
-        res = cache.get(key)
-        if res is None:
-            cache[key] = res = search1(path_in_suite)
-        return res
-
-    return search(path_in_suite)
-
-def getTests(path, litConfig, testSuiteCache, localConfigCache):
-    # Find the test suite for this input and its relative path.
-    ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
-    if ts is None:
-        litConfig.warning('unable to find test suite for %r' % path)
-        return (),()
-
-    if litConfig.debug:
-        litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
-                                                        path_in_suite))
-
-    return ts, getTestsInSuite(ts, path_in_suite, litConfig,
-                               testSuiteCache, localConfigCache)
-
-def getTestsInSuite(ts, path_in_suite, litConfig,
-                    testSuiteCache, localConfigCache):
-    # Check that the source path exists (errors here are reported by the
-    # caller).
-    source_path = ts.getSourcePath(path_in_suite)
-    if not os.path.exists(source_path):
-        return
-
-    # Check if the user named a test directly.
-    if not os.path.isdir(source_path):
-        lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache)
-        yield Test.Test(ts, path_in_suite, lc)
-        return
-
-    # Otherwise we have a directory to search for tests, start by getting the
-    # local configuration.
-    lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
-
-    # Search for tests.
-    if lc.test_format is not None:
-        for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
-                                                      litConfig, lc):
-            yield res
-
-    # Search subdirectories.
-    for filename in os.listdir(source_path):
-        # FIXME: This doesn't belong here?
-        if filename in ('Output', '.svn') or filename in lc.excludes:
-            continue
-
-        # Ignore non-directories.
-        file_sourcepath = os.path.join(source_path, filename)
-        if not os.path.isdir(file_sourcepath):
-            continue
-
-        # Check for nested test suites, first in the execpath in case there is a
-        # site configuration and then in the source path.
-        file_execpath = ts.getExecPath(path_in_suite + (filename,))
-        if dirContainsTestSuite(file_execpath):
-            sub_ts, subiter = getTests(file_execpath, litConfig,
-                                       testSuiteCache, localConfigCache)
-        elif dirContainsTestSuite(file_sourcepath):
-            sub_ts, subiter = getTests(file_sourcepath, litConfig,
-                                       testSuiteCache, localConfigCache)
-        else:
-            # Otherwise, continue loading from inside this test suite.
-            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
-                                      litConfig, testSuiteCache,
-                                      localConfigCache)
-            sub_ts = None
-
-        N = 0
-        for res in subiter:
-            N += 1
-            yield res
-        if sub_ts and not N:
-            litConfig.warning('test suite %r contained no tests' % sub_ts.name)
-
-def runTests(numThreads, litConfig, provider, display):
-    # If only using one testing thread, don't use threads at all; this lets us
-    # profile, among other things.
-    if numThreads == 1:
-        t = Tester(litConfig, provider, display)
-        t.run()
-        return
-
-    # Otherwise spin up the testing threads and wait for them to finish.
-    testers = [Tester(litConfig, provider, display)
-               for i in range(numThreads)]
-    for t in testers:
-        t.start()
-    try:
-        for t in testers:
-            t.join()
-    except KeyboardInterrupt:
-        sys.exit(2)
-
-def load_test_suite(inputs):
-    import unittest
-
-    # Create the global config object.
-    litConfig = LitConfig.LitConfig(progname = 'lit',
-                                    path = [],
-                                    quiet = False,
-                                    useValgrind = False,
-                                    valgrindLeakCheck = False,
-                                    valgrindArgs = [],
-                                    useTclAsSh = False,
-                                    noExecute = False,
-                                    debug = False,
-                                    isWindows = (platform.system()=='Windows'),
-                                    params = {})
-
-    # Load the tests from the inputs.
-    tests = []
-    testSuiteCache = {}
-    localConfigCache = {}
-    for input in inputs:
-        prev = len(tests)
-        tests.extend(getTests(input, litConfig,
-                              testSuiteCache, localConfigCache)[1])
-        if prev == len(tests):
-            litConfig.warning('input %r contained no tests' % input)
-
-    # If there were any errors during test discovery, exit now.
-    if litConfig.numErrors:
-        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
-        sys.exit(2)
-
-    # Return a unittest test suite which just runs the tests in order.
-    def get_test_fn(test):
-        return unittest.FunctionTestCase(
-            lambda: test.config.test_format.execute(
-                test, litConfig),
-            description = test.getFullName())
-
-    from LitTestCase import LitTestCase
-    return unittest.TestSuite([LitTestCase(test, litConfig) for test in tests])
-
-def main(builtinParameters = {}):    # Bump the GIL check interval, its more important to get any one thread to a
-    # blocking operation (hopefully exec) than to try and unblock other threads.
-    #
-    # FIXME: This is a hack.
-    import sys
-    sys.setcheckinterval(1000)
-
-    global options
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("usage: %prog [options] {file-or-path}")
-
-    parser.add_option("-j", "--threads", dest="numThreads", metavar="N",
-                      help="Number of testing threads",
-                      type=int, action="store", default=None)
-    parser.add_option("", "--config-prefix", dest="configPrefix",
-                      metavar="NAME", help="Prefix for 'lit' config files",
-                      action="store", default=None)
-    parser.add_option("", "--param", dest="userParameters",
-                      metavar="NAME=VAL",
-                      help="Add 'NAME' = 'VAL' to the user defined parameters",
-                      type=str, action="append", default=[])
-
-    group = OptionGroup(parser, "Output Format")
-    # FIXME: I find these names very confusing, although I like the
-    # functionality.
-    group.add_option("-q", "--quiet", dest="quiet",
-                     help="Suppress no error output",
-                     action="store_true", default=False)
-    group.add_option("-s", "--succinct", dest="succinct",
-                     help="Reduce amount of output",
-                     action="store_true", default=False)
-    group.add_option("-v", "--verbose", dest="showOutput",
-                     help="Show all test output",
-                     action="store_true", default=False)
-    group.add_option("", "--no-progress-bar", dest="useProgressBar",
-                     help="Do not use curses based progress bar",
-                     action="store_false", default=True)
-    parser.add_option_group(group)
-
-    group = OptionGroup(parser, "Test Execution")
-    group.add_option("", "--path", dest="path",
-                     help="Additional paths to add to testing environment",
-                     action="append", type=str, default=[])
-    group.add_option("", "--vg", dest="useValgrind",
-                     help="Run tests under valgrind",
-                     action="store_true", default=False)
-    group.add_option("", "--vg-leak", dest="valgrindLeakCheck",
-                     help="Check for memory leaks under valgrind",
-                     action="store_true", default=False)
-    group.add_option("", "--vg-arg", dest="valgrindArgs", metavar="ARG",
-                     help="Specify an extra argument for valgrind",
-                     type=str, action="append", default=[])
-    group.add_option("", "--time-tests", dest="timeTests",
-                     help="Track elapsed wall time for each test",
-                     action="store_true", default=False)
-    group.add_option("", "--no-execute", dest="noExecute",
-                     help="Don't execute any tests (assume PASS)",
-                     action="store_true", default=False)
-    parser.add_option_group(group)
-
-    group = OptionGroup(parser, "Test Selection")
-    group.add_option("", "--max-tests", dest="maxTests", metavar="N",
-                     help="Maximum number of tests to run",
-                     action="store", type=int, default=None)
-    group.add_option("", "--max-time", dest="maxTime", metavar="N",
-                     help="Maximum time to spend testing (in seconds)",
-                     action="store", type=float, default=None)
-    group.add_option("", "--shuffle", dest="shuffle",
-                     help="Run tests in random order",
-                     action="store_true", default=False)
-    parser.add_option_group(group)
-
-    group = OptionGroup(parser, "Debug and Experimental Options")
-    group.add_option("", "--debug", dest="debug",
-                      help="Enable debugging (for 'lit' development)",
-                      action="store_true", default=False)
-    group.add_option("", "--show-suites", dest="showSuites",
-                      help="Show discovered test suites",
-                      action="store_true", default=False)
-    group.add_option("", "--no-tcl-as-sh", dest="useTclAsSh",
-                      help="Don't run Tcl scripts using 'sh'",
-                      action="store_false", default=True)
-    group.add_option("", "--repeat", dest="repeatTests", metavar="N",
-                      help="Repeat tests N times (for timing)",
-                      action="store", default=None, type=int)
-    parser.add_option_group(group)
-
-    (opts, args) = parser.parse_args()
-
-    if not args:
-        parser.error('No inputs specified')
-
-    if opts.configPrefix is not None:
-        global gConfigName, gSiteConfigName
-        gConfigName = '%s.cfg' % opts.configPrefix
-        gSiteConfigName = '%s.site.cfg' % opts.configPrefix
-
-    if opts.numThreads is None:
-# Python <2.5 has a race condition causing lit to always fail with numThreads>1
-# http://bugs.python.org/issue1731717
-# I haven't seen this bug occur with 2.5.2 and later, so only enable multiple
-# threads by default there.
-       if sys.hexversion >= 0x2050200:
-               opts.numThreads = Util.detectCPUs()
-       else:
-               opts.numThreads = 1
-
-    inputs = args
-
-    # Create the user defined parameters.
-    userParams = dict(builtinParameters)
-    for entry in opts.userParameters:
-        if '=' not in entry:
-            name,val = entry,''
-        else:
-            name,val = entry.split('=', 1)
-        userParams[name] = val
-
-    # Create the global config object.
-    litConfig = LitConfig.LitConfig(progname = os.path.basename(sys.argv[0]),
-                                    path = opts.path,
-                                    quiet = opts.quiet,
-                                    useValgrind = opts.useValgrind,
-                                    valgrindLeakCheck = opts.valgrindLeakCheck,
-                                    valgrindArgs = opts.valgrindArgs,
-                                    useTclAsSh = opts.useTclAsSh,
-                                    noExecute = opts.noExecute,
-                                    debug = opts.debug,
-                                    isWindows = (platform.system()=='Windows'),
-                                    params = userParams)
-
-    # Expand '@...' form in inputs.
-    actual_inputs = []
-    for input in inputs:
-        if os.path.exists(input) or not input.startswith('@'):
-            actual_inputs.append(input)
-        else:
-            f = open(input[1:])
-            try:
-                for ln in f:
-                    ln = ln.strip()
-                    if ln:
-                        actual_inputs.append(ln)
-            finally:
-                f.close()
-                    
-            
-    # Load the tests from the inputs.
-    tests = []
-    testSuiteCache = {}
-    localConfigCache = {}
-    for input in actual_inputs:
-        prev = len(tests)
-        tests.extend(getTests(input, litConfig,
-                              testSuiteCache, localConfigCache)[1])
-        if prev == len(tests):
-            litConfig.warning('input %r contained no tests' % input)
-
-    # If there were any errors during test discovery, exit now.
-    if litConfig.numErrors:
-        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
-        sys.exit(2)
-
-    if opts.showSuites:
-        suitesAndTests = dict([(ts,[])
-                               for ts,_ in testSuiteCache.values()
-                               if ts])
-        for t in tests:
-            suitesAndTests[t.suite].append(t)
-
-        print '-- Test Suites --'
-        suitesAndTests = suitesAndTests.items()
-        suitesAndTests.sort(key = lambda (ts,_): ts.name)
-        for ts,ts_tests in suitesAndTests:
-            print '  %s - %d tests' %(ts.name, len(ts_tests))
-            print '    Source Root: %s' % ts.source_root
-            print '    Exec Root  : %s' % ts.exec_root
-
-    # Select and order the tests.
-    numTotalTests = len(tests)
-    if opts.shuffle:
-        random.shuffle(tests)
-    else:
-        tests.sort(key = lambda t: t.getFullName())
-    if opts.maxTests is not None:
-        tests = tests[:opts.maxTests]
-
-    extra = ''
-    if len(tests) != numTotalTests:
-        extra = ' of %d' % numTotalTests
-    header = '-- Testing: %d%s tests, %d threads --'%(len(tests),extra,
-                                                      opts.numThreads)
-
-    if opts.repeatTests:
-        tests = [t.copyWithIndex(i)
-                 for t in tests
-                 for i in range(opts.repeatTests)]
-
-    progressBar = None
-    if not opts.quiet:
-        if opts.succinct and opts.useProgressBar:
-            try:
-                tc = ProgressBar.TerminalController()
-                progressBar = ProgressBar.ProgressBar(tc, header)
-            except ValueError:
-                print header
-                progressBar = ProgressBar.SimpleProgressBar('Testing: ')
-        else:
-            print header
-
-    # Don't create more threads than tests.
-    opts.numThreads = min(len(tests), opts.numThreads)
-
-    startTime = time.time()
-    display = TestingProgressDisplay(opts, len(tests), progressBar)
-    provider = TestProvider(tests, opts.maxTime)
-    runTests(opts.numThreads, litConfig, provider, display)
-    display.finish()
-
-    if not opts.quiet:
-        print 'Testing Time: %.2fs'%(time.time() - startTime)
-
-    # Update results for any tests which weren't run.
-    for t in tests:
-        if t.result is None:
-            t.setResult(Test.UNRESOLVED, '', 0.0)
-
-    # List test results organized by kind.
-    hasFailures = False
-    byCode = {}
-    for t in tests:
-        if t.result not in byCode:
-            byCode[t.result] = []
-        byCode[t.result].append(t)
-        if t.result.isFailure:
-            hasFailures = True
-
-    # FIXME: Show unresolved and (optionally) unsupported tests.
-    for title,code in (('Unexpected Passing Tests', Test.XPASS),
-                       ('Failing Tests', Test.FAIL)):
-        elts = byCode.get(code)
-        if not elts:
-            continue
-        print '*'*20
-        print '%s (%d):' % (title, len(elts))
-        for t in elts:
-            print '    %s' % t.getFullName()
-        print
-
-    if opts.timeTests:
-        # Collate, in case we repeated tests.
-        times = {}
-        for t in tests:
-            key = t.getFullName()
-            times[key] = times.get(key, 0.) + t.elapsed
-
-        byTime = list(times.items())
-        byTime.sort(key = lambda (name,elapsed): elapsed)
-        if byTime:
-            Util.printHistogram(byTime, title='Tests')
-
-    for name,code in (('Expected Passes    ', Test.PASS),
-                      ('Expected Failures  ', Test.XFAIL),
-                      ('Unsupported Tests  ', Test.UNSUPPORTED),
-                      ('Unresolved Tests   ', Test.UNRESOLVED),
-                      ('Unexpected Passes  ', Test.XPASS),
-                      ('Unexpected Failures', Test.FAIL),):
-        if opts.quiet and not code.isFailure:
-            continue
-        N = len(byCode.get(code,[]))
-        if N:
-            print '  %s: %d' % (name,N)
-
-    # If we encountered any additional errors, exit abnormally.
-    if litConfig.numErrors:
-        print >>sys.stderr, '\n%d error(s), exiting.' % litConfig.numErrors
-        sys.exit(2)
-
-    # Warn about warnings.
-    if litConfig.numWarnings:
-        print >>sys.stderr, '\n%d warning(s) in tests.' % litConfig.numWarnings
-
-    if hasFailures:
-        sys.exit(1)
-    sys.exit(0)
-
-if __name__=='__main__':
-    main()
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
new file mode 100755
index 000000000000..13d263009ddd
--- /dev/null
+++ b/utils/lit/lit/main.py
@@ -0,0 +1,648 @@
+#!/usr/bin/env python
+
+"""
+lit - LLVM Integrated Tester.
+
+See lit.pod for more information.
+"""
+
+import math, os, platform, random, re, sys, time, threading, traceback
+
+import ProgressBar
+import TestRunner
+import Util
+
+from TestingConfig import TestingConfig
+import LitConfig
+import Test
+
+# Configuration files to look for when discovering test suites. These can be
+# overridden with --config-prefix.
+#
+# FIXME: Rename to 'config.lit', 'site.lit', and 'local.lit' ?
+gConfigName = 'lit.cfg'
+gSiteConfigName = 'lit.site.cfg'
+
+kLocalConfigName = 'lit.local.cfg'
+
+class TestingProgressDisplay:
+    def __init__(self, opts, numTests, progressBar=None):
+        self.opts = opts
+        self.numTests = numTests
+        self.current = None
+        self.lock = threading.Lock()
+        self.progressBar = progressBar
+        self.completed = 0
+
+    def update(self, test):
+        # Avoid locking overhead in quiet mode
+        if self.opts.quiet and not test.result.isFailure:
+            self.completed += 1
+            return
+
+        # Output lock.
+        self.lock.acquire()
+        try:
+            self.handleUpdate(test)
+        finally:
+            self.lock.release()
+
+    def finish(self):
+        if self.progressBar:
+            self.progressBar.clear()
+        elif self.opts.quiet:
+            pass
+        elif self.opts.succinct:
+            sys.stdout.write('\n')
+
+    def handleUpdate(self, test):
+        self.completed += 1
+        if self.progressBar:
+            self.progressBar.update(float(self.completed)/self.numTests,
+                                    test.getFullName())
+
+        if self.opts.succinct and not test.result.isFailure:
+            return
+
+        if self.progressBar:
+            self.progressBar.clear()
+
+        print '%s: %s (%d of %d)' % (test.result.name, test.getFullName(),
+                                     self.completed, self.numTests)
+
+        if test.result.isFailure and self.opts.showOutput:
+            print "%s TEST '%s' FAILED %s" % ('*'*20, test.getFullName(),
+                                              '*'*20)
+            print test.output
+            print "*" * 20
+
+        sys.stdout.flush()
+
+class TestProvider:
+    def __init__(self, tests, maxTime):
+        self.maxTime = maxTime
+        self.iter = iter(tests)
+        self.lock = threading.Lock()
+        self.startTime = time.time()
+
+    def get(self):
+        # Check if we have run out of time.
+        if self.maxTime is not None:
+            if time.time() - self.startTime > self.maxTime:
+                return None
+
+        # Otherwise take the next test.
+        self.lock.acquire()
+        try:
+            item = self.iter.next()
+        except StopIteration:
+            item = None
+        self.lock.release()
+        return item
+
+class Tester(threading.Thread):
+    def __init__(self, litConfig, provider, display):
+        threading.Thread.__init__(self)
+        self.litConfig = litConfig
+        self.provider = provider
+        self.display = display
+
+    def run(self):
+        while 1:
+            item = self.provider.get()
+            if item is None:
+                break
+            self.runTest(item)
+
+    def runTest(self, test):
+        result = None
+        startTime = time.time()
+        try:
+            result, output = test.config.test_format.execute(test,
+                                                             self.litConfig)
+        except KeyboardInterrupt:
+            # This is a sad hack. Unfortunately subprocess goes
+            # bonkers with ctrl-c and we start forking merrily.
+            print '\nCtrl-C detected, goodbye.'
+            os.kill(0,9)
+        except:
+            if self.litConfig.debug:
+                raise
+            result = Test.UNRESOLVED
+            output = 'Exception during script execution:\n'
+            output += traceback.format_exc()
+            output += '\n'
+        elapsed = time.time() - startTime
+
+        test.setResult(result, output, elapsed)
+        self.display.update(test)
+
+def dirContainsTestSuite(path):
+    cfgpath = os.path.join(path, gSiteConfigName)
+    if os.path.exists(cfgpath):
+        return cfgpath
+    cfgpath = os.path.join(path, gConfigName)
+    if os.path.exists(cfgpath):
+        return cfgpath
+
+def getTestSuite(item, litConfig, cache):
+    """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
+
+    Find the test suite containing @arg item.
+
+    @retval (None, ...) - Indicates no test suite contains @arg item.
+    @retval (suite, relative_path) - The suite that @arg item is in, and its
+    relative path inside that suite.
+    """
+    def search1(path):
+        # Check for a site config or a lit config.
+        cfgpath = dirContainsTestSuite(path)
+
+        # If we didn't find a config file, keep looking.
+        if not cfgpath:
+            parent,base = os.path.split(path)
+            if parent == path:
+                return (None, ())
+
+            ts, relative = search(parent)
+            return (ts, relative + (base,))
+
+        # We found a config file, load it.
+        if litConfig.debug:
+            litConfig.note('loading suite config %r' % cfgpath)
+
+        cfg = TestingConfig.frompath(cfgpath, None, litConfig, mustExist = True)
+        source_root = os.path.realpath(cfg.test_source_root or path)
+        exec_root = os.path.realpath(cfg.test_exec_root or path)
+        return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()
+
+    def search(path):
+        # Check for an already instantiated test suite.
+        res = cache.get(path)
+        if res is None:
+            cache[path] = res = search1(path)
+        return res
+
+    # Canonicalize the path.
+    item = os.path.realpath(item)
+
+    # Skip files and virtual components.
+    components = []
+    while not os.path.isdir(item):
+        parent,base = os.path.split(item)
+        if parent == item:
+            return (None, ())
+        components.append(base)
+        item = parent
+    components.reverse()
+
+    ts, relative = search(item)
+    return ts, tuple(relative + tuple(components))
+
+def getLocalConfig(ts, path_in_suite, litConfig, cache):
+    def search1(path_in_suite):
+        # Get the parent config.
+        if not path_in_suite:
+            parent = ts.config
+        else:
+            parent = search(path_in_suite[:-1])
+
+        # Load the local configuration.
+        source_path = ts.getSourcePath(path_in_suite)
+        cfgpath = os.path.join(source_path, kLocalConfigName)
+        if litConfig.debug:
+            litConfig.note('loading local config %r' % cfgpath)
+        return TestingConfig.frompath(cfgpath, parent, litConfig,
+                                    mustExist = False,
+                                    config = parent.clone(cfgpath))
+
+    def search(path_in_suite):
+        key = (ts, path_in_suite)
+        res = cache.get(key)
+        if res is None:
+            cache[key] = res = search1(path_in_suite)
+        return res
+
+    return search(path_in_suite)
+
+def getTests(path, litConfig, testSuiteCache, localConfigCache):
+    # Find the test suite for this input and its relative path.
+    ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
+    if ts is None:
+        litConfig.warning('unable to find test suite for %r' % path)
+        return (),()
+
+    if litConfig.debug:
+        litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
+                                                        path_in_suite))
+
+    return ts, getTestsInSuite(ts, path_in_suite, litConfig,
+                               testSuiteCache, localConfigCache)
+
+def getTestsInSuite(ts, path_in_suite, litConfig,
+                    testSuiteCache, localConfigCache):
+    # Check that the source path exists (errors here are reported by the
+    # caller).
+    source_path = ts.getSourcePath(path_in_suite)
+    if not os.path.exists(source_path):
+        return
+
+    # Check if the user named a test directly.
+    if not os.path.isdir(source_path):
+        lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache)
+        yield Test.Test(ts, path_in_suite, lc)
+        return
+
+    # Otherwise we have a directory to search for tests, start by getting the
+    # local configuration.
+    lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
+
+    # Search for tests.
+    if lc.test_format is not None:
+        for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
+                                                      litConfig, lc):
+            yield res
+
+    # Search subdirectories.
+    for filename in os.listdir(source_path):
+        # FIXME: This doesn't belong here?
+        if filename in ('Output', '.svn') or filename in lc.excludes:
+            continue
+
+        # Ignore non-directories.
+        file_sourcepath = os.path.join(source_path, filename)
+        if not os.path.isdir(file_sourcepath):
+            continue
+
+        # Check for nested test suites, first in the execpath in case there is a
+        # site configuration and then in the source path.
+        file_execpath = ts.getExecPath(path_in_suite + (filename,))
+        if dirContainsTestSuite(file_execpath):
+            sub_ts, subiter = getTests(file_execpath, litConfig,
+                                       testSuiteCache, localConfigCache)
+        elif dirContainsTestSuite(file_sourcepath):
+            sub_ts, subiter = getTests(file_sourcepath, litConfig,
+                                       testSuiteCache, localConfigCache)
+        else:
+            # Otherwise, continue loading from inside this test suite.
+            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
+                                      litConfig, testSuiteCache,
+                                      localConfigCache)
+            sub_ts = None
+
+        N = 0
+        for res in subiter:
+            N += 1
+            yield res
+        if sub_ts and not N:
+            litConfig.warning('test suite %r contained no tests' % sub_ts.name)
+
+def runTests(numThreads, litConfig, provider, display):
+    # If only using one testing thread, don't use threads at all; this lets us
+    # profile, among other things.
+    if numThreads == 1:
+        t = Tester(litConfig, provider, display)
+        t.run()
+        return
+
+    # Otherwise spin up the testing threads and wait for them to finish.
+    testers = [Tester(litConfig, provider, display)
+               for i in range(numThreads)]
+    for t in testers:
+        t.start()
+    try:
+        for t in testers:
+            t.join()
+    except KeyboardInterrupt:
+        sys.exit(2)
+
+def load_test_suite(inputs):
+    import unittest
+
+    # Create the global config object.
+    litConfig = LitConfig.LitConfig(progname = 'lit',
+                                    path = [],
+                                    quiet = False,
+                                    useValgrind = False,
+                                    valgrindLeakCheck = False,
+                                    valgrindArgs = [],
+                                    useTclAsSh = False,
+                                    noExecute = False,
+                                    debug = False,
+                                    isWindows = (platform.system()=='Windows'),
+                                    params = {})
+
+    # Load the tests from the inputs.
+    tests = []
+    testSuiteCache = {}
+    localConfigCache = {}
+    for input in inputs:
+        prev = len(tests)
+        tests.extend(getTests(input, litConfig,
+                              testSuiteCache, localConfigCache)[1])
+        if prev == len(tests):
+            litConfig.warning('input %r contained no tests' % input)
+
+    # If there were any errors during test discovery, exit now.
+    if litConfig.numErrors:
+        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    # Return a unittest test suite which just runs the tests in order.
+    def get_test_fn(test):
+        return unittest.FunctionTestCase(
+            lambda: test.config.test_format.execute(
+                test, litConfig),
+            description = test.getFullName())
+
+    from LitTestCase import LitTestCase
+    return unittest.TestSuite([LitTestCase(test, litConfig) for test in tests])
+
+def main(builtinParameters = {}):    # Bump the GIL check interval, its more important to get any one thread to a
+    # blocking operation (hopefully exec) than to try and unblock other threads.
+    #
+    # FIXME: This is a hack.
+    import sys
+    sys.setcheckinterval(1000)
+
+    global options
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {file-or-path}")
+
+    parser.add_option("-j", "--threads", dest="numThreads", metavar="N",
+                      help="Number of testing threads",
+                      type=int, action="store", default=None)
+    parser.add_option("", "--config-prefix", dest="configPrefix",
+                      metavar="NAME", help="Prefix for 'lit' config files",
+                      action="store", default=None)
+    parser.add_option("", "--param", dest="userParameters",
+                      metavar="NAME=VAL",
+                      help="Add 'NAME' = 'VAL' to the user defined parameters",
+                      type=str, action="append", default=[])
+
+    group = OptionGroup(parser, "Output Format")
+    # FIXME: I find these names very confusing, although I like the
+    # functionality.
+    group.add_option("-q", "--quiet", dest="quiet",
+                     help="Suppress no error output",
+                     action="store_true", default=False)
+    group.add_option("-s", "--succinct", dest="succinct",
+                     help="Reduce amount of output",
+                     action="store_true", default=False)
+    group.add_option("-v", "--verbose", dest="showOutput",
+                     help="Show all test output",
+                     action="store_true", default=False)
+    group.add_option("", "--no-progress-bar", dest="useProgressBar",
+                     help="Do not use curses based progress bar",
+                     action="store_false", default=True)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Test Execution")
+    group.add_option("", "--path", dest="path",
+                     help="Additional paths to add to testing environment",
+                     action="append", type=str, default=[])
+    group.add_option("", "--vg", dest="useValgrind",
+                     help="Run tests under valgrind",
+                     action="store_true", default=False)
+    group.add_option("", "--vg-leak", dest="valgrindLeakCheck",
+                     help="Check for memory leaks under valgrind",
+                     action="store_true", default=False)
+    group.add_option("", "--vg-arg", dest="valgrindArgs", metavar="ARG",
+                     help="Specify an extra argument for valgrind",
+                     type=str, action="append", default=[])
+    group.add_option("", "--time-tests", dest="timeTests",
+                     help="Track elapsed wall time for each test",
+                     action="store_true", default=False)
+    group.add_option("", "--no-execute", dest="noExecute",
+                     help="Don't execute any tests (assume PASS)",
+                     action="store_true", default=False)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Test Selection")
+    group.add_option("", "--max-tests", dest="maxTests", metavar="N",
+                     help="Maximum number of tests to run",
+                     action="store", type=int, default=None)
+    group.add_option("", "--max-time", dest="maxTime", metavar="N",
+                     help="Maximum time to spend testing (in seconds)",
+                     action="store", type=float, default=None)
+    group.add_option("", "--shuffle", dest="shuffle",
+                     help="Run tests in random order",
+                     action="store_true", default=False)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Debug and Experimental Options")
+    group.add_option("", "--debug", dest="debug",
+                      help="Enable debugging (for 'lit' development)",
+                      action="store_true", default=False)
+    group.add_option("", "--show-suites", dest="showSuites",
+                      help="Show discovered test suites",
+                      action="store_true", default=False)
+    group.add_option("", "--no-tcl-as-sh", dest="useTclAsSh",
+                      help="Don't run Tcl scripts using 'sh'",
+                      action="store_false", default=True)
+    group.add_option("", "--repeat", dest="repeatTests", metavar="N",
+                      help="Repeat tests N times (for timing)",
+                      action="store", default=None, type=int)
+    parser.add_option_group(group)
+
+    (opts, args) = parser.parse_args()
+
+    if not args:
+        parser.error('No inputs specified')
+
+    if opts.configPrefix is not None:
+        global gConfigName, gSiteConfigName
+        gConfigName = '%s.cfg' % opts.configPrefix
+        gSiteConfigName = '%s.site.cfg' % opts.configPrefix
+
+    if opts.numThreads is None:
+# Python <2.5 has a race condition causing lit to always fail with numThreads>1
+# http://bugs.python.org/issue1731717
+# I haven't seen this bug occur with 2.5.2 and later, so only enable multiple
+# threads by default there.
+       if sys.hexversion >= 0x2050200:
+               opts.numThreads = Util.detectCPUs()
+       else:
+               opts.numThreads = 1
+
+    inputs = args
+
+    # Create the user defined parameters.
+    userParams = dict(builtinParameters)
+    for entry in opts.userParameters:
+        if '=' not in entry:
+            name,val = entry,''
+        else:
+            name,val = entry.split('=', 1)
+        userParams[name] = val
+
+    # Create the global config object.
+    litConfig = LitConfig.LitConfig(progname = os.path.basename(sys.argv[0]),
+                                    path = opts.path,
+                                    quiet = opts.quiet,
+                                    useValgrind = opts.useValgrind,
+                                    valgrindLeakCheck = opts.valgrindLeakCheck,
+                                    valgrindArgs = opts.valgrindArgs,
+                                    useTclAsSh = opts.useTclAsSh,
+                                    noExecute = opts.noExecute,
+                                    debug = opts.debug,
+                                    isWindows = (platform.system()=='Windows'),
+                                    params = userParams)
+
+    # Expand '@...' form in inputs.
+    actual_inputs = []
+    for input in inputs:
+        if os.path.exists(input) or not input.startswith('@'):
+            actual_inputs.append(input)
+        else:
+            f = open(input[1:])
+            try:
+                for ln in f:
+                    ln = ln.strip()
+                    if ln:
+                        actual_inputs.append(ln)
+            finally:
+                f.close()
+                    
+            
+    # Load the tests from the inputs.
+    tests = []
+    testSuiteCache = {}
+    localConfigCache = {}
+    for input in actual_inputs:
+        prev = len(tests)
+        tests.extend(getTests(input, litConfig,
+                              testSuiteCache, localConfigCache)[1])
+        if prev == len(tests):
+            litConfig.warning('input %r contained no tests' % input)
+
+    # If there were any errors during test discovery, exit now.
+    if litConfig.numErrors:
+        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    if opts.showSuites:
+        suitesAndTests = dict([(ts,[])
+                               for ts,_ in testSuiteCache.values()
+                               if ts])
+        for t in tests:
+            suitesAndTests[t.suite].append(t)
+
+        print '-- Test Suites --'
+        suitesAndTests = suitesAndTests.items()
+        suitesAndTests.sort(key = lambda (ts,_): ts.name)
+        for ts,ts_tests in suitesAndTests:
+            print '  %s - %d tests' %(ts.name, len(ts_tests))
+            print '    Source Root: %s' % ts.source_root
+            print '    Exec Root  : %s' % ts.exec_root
+
+    # Select and order the tests.
+    numTotalTests = len(tests)
+    if opts.shuffle:
+        random.shuffle(tests)
+    else:
+        tests.sort(key = lambda t: t.getFullName())
+    if opts.maxTests is not None:
+        tests = tests[:opts.maxTests]
+
+    extra = ''
+    if len(tests) != numTotalTests:
+        extra = ' of %d' % numTotalTests
+    header = '-- Testing: %d%s tests, %d threads --'%(len(tests),extra,
+                                                      opts.numThreads)
+
+    if opts.repeatTests:
+        tests = [t.copyWithIndex(i)
+                 for t in tests
+                 for i in range(opts.repeatTests)]
+
+    progressBar = None
+    if not opts.quiet:
+        if opts.succinct and opts.useProgressBar:
+            try:
+                tc = ProgressBar.TerminalController()
+                progressBar = ProgressBar.ProgressBar(tc, header)
+            except ValueError:
+                print header
+                progressBar = ProgressBar.SimpleProgressBar('Testing: ')
+        else:
+            print header
+
+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
+    startTime = time.time()
+    display = TestingProgressDisplay(opts, len(tests), progressBar)
+    provider = TestProvider(tests, opts.maxTime)
+    runTests(opts.numThreads, litConfig, provider, display)
+    display.finish()
+
+    if not opts.quiet:
+        print 'Testing Time: %.2fs'%(time.time() - startTime)
+
+    # Update results for any tests which weren't run.
+    for t in tests:
+        if t.result is None:
+            t.setResult(Test.UNRESOLVED, '', 0.0)
+
+    # List test results organized by kind.
+    hasFailures = False
+    byCode = {}
+    for t in tests:
+        if t.result not in byCode:
+            byCode[t.result] = []
+        byCode[t.result].append(t)
+        if t.result.isFailure:
+            hasFailures = True
+
+    # FIXME: Show unresolved and (optionally) unsupported tests.
+    for title,code in (('Unexpected Passing Tests', Test.XPASS),
+                       ('Failing Tests', Test.FAIL)):
+        elts = byCode.get(code)
+        if not elts:
+            continue
+        print '*'*20
+        print '%s (%d):' % (title, len(elts))
+        for t in elts:
+            print '    %s' % t.getFullName()
+        print
+
+    if opts.timeTests:
+        # Collate, in case we repeated tests.
+        times = {}
+        for t in tests:
+            key = t.getFullName()
+            times[key] = times.get(key, 0.) + t.elapsed
+
+        byTime = list(times.items())
+        byTime.sort(key = lambda (name,elapsed): elapsed)
+        if byTime:
+            Util.printHistogram(byTime, title='Tests')
+
+    for name,code in (('Expected Passes    ', Test.PASS),
+                      ('Expected Failures  ', Test.XFAIL),
+                      ('Unsupported Tests  ', Test.UNSUPPORTED),
+                      ('Unresolved Tests   ', Test.UNRESOLVED),
+                      ('Unexpected Passes  ', Test.XPASS),
+                      ('Unexpected Failures', Test.FAIL),):
+        if opts.quiet and not code.isFailure:
+            continue
+        N = len(byCode.get(code,[]))
+        if N:
+            print '  %s: %d' % (name,N)
+
+    # If we encountered any additional errors, exit abnormally.
+    if litConfig.numErrors:
+        print >>sys.stderr, '\n%d error(s), exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    # Warn about warnings.
+    if litConfig.numWarnings:
+        print >>sys.stderr, '\n%d warning(s) in tests.' % litConfig.numWarnings
+
+    if hasFailures:
+        sys.exit(1)
+    sys.exit(0)
+
+if __name__=='__main__':
+    main()
diff --git a/utils/lit/setup.py b/utils/lit/setup.py
index e6ae3d880488..738ee23776d8 100644
--- a/utils/lit/setup.py
+++ b/utils/lit/setup.py
@@ -3,7 +3,7 @@ import lit
 # FIXME: Support distutils?
 from setuptools import setup, find_packages
 setup(
-    name = "Lit",
+    name = "lit",
     version = lit.__version__,
 
     author = lit.__author__,
@@ -14,15 +14,16 @@ setup(
     description = "A Software Testing Tool",
     keywords = 'test C++ automatic discovery',
     long_description = """\
-Lit
-+++
+*lit*
++++++
 
 About
 =====
 
-Lit is a portable tool for executing LLVM and Clang style test suites,
-summarizing their results, and providing indication of failures. Lit is designed
-to be a lightweight testing tool with as simple a user interface as possible.
+*lit* is a portable tool for executing LLVM and Clang style test suites,
+summarizing their results, and providing indication of failures. *lit* is
+designed to be a lightweight testing tool with as simple a user interface as
+possible.
 
 
 Features
@@ -37,15 +38,15 @@ Features
 Documentation
 =============
 
-The offical Lit documentation is in the man page, available online in the `LLVM
-Command Guide http://llvm.org/cmds/lit.html`_.
+The offical *lit* documentation is in the man page, available online at the LLVM
+Command Guide: http://llvm.org/cmds/lit.html.
 
 
 Source
 ======
 
-The Lit source is available as part of LLVM, in the `LLVM SVN repository
-<http://llvm.org/svn/llvm-project/llvm/trunk/utils/lit`_.
+The *lit* source is available as part of LLVM, in the LLVM SVN repository:
+http://llvm.org/svn/llvm-project/llvm/trunk/utils/lit.
 """,
 
     classifiers=[
@@ -55,7 +56,7 @@ The Lit source is available as part of LLVM, in the `LLVM SVN repository
         'License :: OSI Approved :: University of Illinois/NCSA Open Source License',
         'Natural Language :: English',
         'Operating System :: OS Independent',
-        'Progamming Language :: Python',
+        'Programming Language :: Python',
         'Topic :: Software Development :: Testing',
         ],
 
diff --git a/utils/llvm-lit/CMakeLists.txt b/utils/llvm-lit/CMakeLists.txt
new file mode 100644
index 000000000000..602cc881cd5a
--- /dev/null
+++ b/utils/llvm-lit/CMakeLists.txt
@@ -0,0 +1,12 @@
+configure_file(
+  llvm-lit.in
+  ${LLVM_TOOLS_BINARY_DIR}/llvm-lit
+  )
+
+install(FILES
+  ${LLVM_TOOLS_BINARY_DIR}/llvm-lit
+  DESTINATION bin
+  PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE
+              GROUP_READ GROUP_EXECUTE
+              WORLD_READ WORLD_EXECUTE
+  )
diff --git a/utils/llvm-lit/llvm-lit.in b/utils/llvm-lit/llvm-lit.in
index 3ff2c2489c45..1df1747a1cc6 100644
--- a/utils/llvm-lit/llvm-lit.in
+++ b/utils/llvm-lit/llvm-lit.in
@@ -13,9 +13,15 @@ sys.path.append(os.path.join(llvm_source_root, 'utils', 'lit'))
 # Set up some builtin parameters, so that by default the LLVM test suite
 # configuration file knows how to find the object tree.
 builtin_parameters = {
+    'build_config' : "@CMAKE_CFG_INTDIR@",
+    'build_mode' : "@RUNTIME_BUILD_MODE@",
     'llvm_site_config' : os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
     }
 
+clang_site_config = os.path.join(llvm_obj_root, 'tools', 'clang', 'test', 'lit.site.cfg')
+if os.path.exists(clang_site_config):
+    builtin_parameters['clang_site_config'] = clang_site_config
+
 if __name__=='__main__':
     import lit
     lit.main(builtin_parameters)
diff --git a/utils/llvm-native-gcc b/utils/llvm-native-gcc
index b3cecb14118f..91a557cc161d 100755
--- a/utils/llvm-native-gcc
+++ b/utils/llvm-native-gcc
@@ -193,10 +193,10 @@ native-build [OPTIONS...] FILE
 
 llvm-native-gcc is a wrapper around the LLVM command-line tools which generates
 a native object (.o) file by compiling FILE with llvm-gcc, and then running 
-an LLVM back-end (CBE by default) over the resulting bytecode, and then
+an LLVM back-end (CBE by default) over the resulting bitcode, and then
 compiling the resulting code to a native object file.
 
-If called as "native-build", it compiles bytecode to native code, and takes
+If called as "native-build", it compiles bitcode to native code, and takes
 different options.
 
 =head1 OPTIONS
diff --git a/utils/llvm-native-gxx b/utils/llvm-native-gxx
index 75164af237ef..db547f654e2f 100755
--- a/utils/llvm-native-gxx
+++ b/utils/llvm-native-gxx
@@ -193,10 +193,10 @@ native-build [OPTIONS...] FILE
 
 llvm-native-g++ is a wrapper around the LLVM command-line tools which generates
 a native object (.o) file by compiling FILE with llvm-g++, and then running 
-an LLVM back-end (CBE by default) over the resulting bytecode, and then
+an LLVM back-end (CBE by default) over the resulting bitcode, and then
 compiling the resulting code to a native object file.
 
-If called as "native-build", it compiles bytecode to native code, and takes
+If called as "native-build", it compiles bitcode to native code, and takes
 different options.
 
 =head1 OPTIONS
diff --git a/utils/not/CMakeLists.txt b/utils/not/CMakeLists.txt
index 407c82eeeadd..155d2e3ae7e4 100644
--- a/utils/not/CMakeLists.txt
+++ b/utils/not/CMakeLists.txt
@@ -2,7 +2,7 @@ add_executable(not
   not.cpp
   )
 
-target_link_libraries(not LLVMSystem)
+target_link_libraries(not LLVMSupport)
 if( MINGW )
   target_link_libraries(not imagehlp psapi)
 endif( MINGW )
diff --git a/utils/not/Makefile b/utils/not/Makefile
index fef4802229d2..f37f166c6c7b 100644
--- a/utils/not/Makefile
+++ b/utils/not/Makefile
@@ -1,15 +1,15 @@
 ##===- utils/not/Makefile ----------------------------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
 TOOLNAME = not
-USEDLIBS = LLVMSupport.a LLVMSystem.a
+USEDLIBS = LLVMSupport.a
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/utils/not/not.cpp b/utils/not/not.cpp
index dd89b8f11c0e..9a924b56a792 100644
--- a/utils/not/not.cpp
+++ b/utils/not/not.cpp
@@ -7,11 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/System/Path.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 int main(int argc, const char **argv) {
   sys::Path Program = sys::Program::FindProgramByName(argv[1]);
-  return !sys::Program::ExecuteAndWait(Program, argv + 1);
+
+  std::string ErrMsg;
+  int Result = sys::Program::ExecuteAndWait(Program, argv + 1, 0, 0, 0, 0,
+                                            &ErrMsg);
+  if (Result < 0) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return 1;
+  }
+
+  return Result == 0;
 }
diff --git a/utils/profile.pl b/utils/profile.pl
index f9950f97fea8..318011560bc8 100755
--- a/utils/profile.pl
+++ b/utils/profile.pl
@@ -5,7 +5,7 @@
 # Synopsis: Insert instrumentation code into a program, run it with the JIT,
 #           then print out a profile report.
 #
-# Syntax:   profile.pl [OPTIONS] bytecodefile <arguments>
+# Syntax:   profile.pl [OPTIONS] bitcodefile <arguments>
 #
 # OPTIONS may include one or more of the following:
 #     -block    - Enable basicblock profiling
@@ -56,7 +56,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   $LLVMProfOpts .= " " . $_;
 }
 
-die "Must specify LLVM bytecode file as first argument!" if (@ARGV == 0);
+die "Must specify LLVM bitcode file as first argument!" if (@ARGV == 0);
 
 my $BytecodeFile = $ARGV[0];
 
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
new file mode 100755
index 000000000000..e24638e714a3
--- /dev/null
+++ b/utils/release/test-release.sh
@@ -0,0 +1,398 @@
+#!/bin/bash
+#===-- test-release.sh - Test the LLVM release candidates ------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License.
+#
+#===------------------------------------------------------------------------===#
+#
+# Download, build, and test the release candidate for an LLVM release.
+#
+#===------------------------------------------------------------------------===#
+
+set -e
+
+Release=""
+Release_no_dot=""
+RC=""
+do_checkout="yes"
+do_ada="no"
+do_objc="yes"
+do_fortran="yes"
+do_64bit="yes"
+BuildDir="`pwd`"
+
+function usage() {
+    echo "usage: `basename $0` -release X.Y -rc NUM [OPTIONS]"
+    echo ""
+    echo " -release X.Y      The release number to test."
+    echo " -rc NUM           The pre-release candidate number."
+    echo " -j NUM            Number of compile jobs to run. [default: 3]"
+    echo " -build-dir DIR    Directory to perform testing in. [default: pwd]"
+    echo " -no-checkout      Don't checkout the sources from SVN."
+    echo " -no-64bit         Don't test the 64-bit version. [default: yes]"
+    echo " -ada              Build Ada. [default: no]"
+    echo " -disable-objc     Disable ObjC build. [default: build]"
+    echo " -disable-fortran  Disable Fortran build. [default: build]"
+}
+
+while [ $# -gt 0 ]; do
+    case $1 in
+        -release | --release )
+            shift
+            Release="$1"
+            Release_no_dot="`echo $1 | sed -e 's,\.,,'`"
+            ;;
+        -rc | --rc | -RC | --RC )
+            shift
+            RC=$1
+            ;;
+        -j* )
+            NumJobs="`echo $1 | sed -e 's,-j\([0-9]*\),\1,g'`"
+            if [ -z "$NumJobs" ]; then
+                shift
+                NumJobs="$1"
+            fi
+            ;;
+        -build-dir | --build-dir | -builddir | --builddir )
+            shift
+            BuildDir="$1"
+            ;;
+        -no-checkout | --no-checkout )
+            do_checkout="no"
+            ;;
+        -no-64bit | --no-64bit )
+            do_64bit="no"
+            ;;
+        -ada | --ada )
+            do_ada="yes"
+            ;;
+        -disable-objc | --disable-objc )
+            do_objc="no"
+            ;;
+        -disable-fortran | --disable-fortran )
+            echo "WARNING: Do you *really* need to disable Fortran?"
+            sleep 5
+            do_fortran="no"
+            ;;
+        -help | --help | -h | --h | -\? )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+# Check required arguments.
+if [ -z "$Release" ]; then
+    echo "No release number specified!"
+    exit 1
+fi
+if [ -z "$RC" ]; then
+    echo "No release candidate number specified!"
+    exit 1
+fi
+
+# Figure out how many make processes to run.
+if [ -z "$NumJobs" ]; then
+    NumJobs=`sysctl -n hw.activecpu 2> /dev/null || true`
+fi
+if [ -z "$NumJobs" ]; then
+    NumJobs=`sysctl -n hw.ncpu 2> /dev/null || true`
+fi
+if [ -z "$NumJobs" ]; then
+    NumJobs=`grep -c processor /proc/cpuinfo 2> /dev/null || true`
+fi
+if [ -z "$NumJobs" ]; then
+    NumJobs=3
+fi
+
+# Location of sources.
+llvmCore_srcdir=$BuildDir/llvmCore-$Release-rc$RC.src
+llvmgcc42_srcdir=$BuildDir/llvmgcc42-$Release-rc$RC.src
+
+# Location of log files.
+LogDirName="$Release-rc$RC.logs"
+LogDir=$BuildDir/$LogDirName
+mkdir -p $LogDir
+
+# SVN URLs for the sources.
+Base_url="http://llvm.org/svn/llvm-project"
+llvmCore_RC_url="$Base_url/llvm/tags/RELEASE_$Release_no_dot/rc$RC"
+llvmgcc42_RC_url="$Base_url/llvm-gcc-4.2/tags/RELEASE_$Release_no_dot/rc$RC"
+clang_RC_url="$Base_url/cfe/tags/RELEASE_$Release_no_dot/rc$RC"
+test_suite_RC_url="$Base_url/test-suite/tags/RELEASE_$Release_no_dot/rc$RC"
+
+# Make sure that the URLs are valid.
+function check_valid_urls() {
+    echo "# Validating SVN URLs"
+    if ! svn ls $llvmCore_RC_url > /dev/null 2>&1 ; then
+        echo "llvm $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+    if ! svn ls $llvmgcc42_RC_url > /dev/null 2>&1 ; then
+        echo "llvm-gcc-4.2 $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+    if ! svn ls $clang_RC_url > /dev/null 2>&1 ; then
+        echo "clang $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+    if ! svn ls $test_suite_RC_url > /dev/null 2>&1 ; then
+        echo "test-suite $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+}
+
+# Export sources to the the build directory.
+function export_sources() {
+    check_valid_urls
+
+    echo "# Exporting llvm $Release-RC$RC sources"
+    svn export -q $llvmCore_RC_url $llvmCore_srcdir
+    echo "# Exporting llvm-gcc-4.2 $Release-rc$RC  sources"
+    svn export -q $llvmgcc42_RC_url $llvmgcc42_srcdir
+    echo "# Exporting clang $Release-rc$RC sources"
+    svn export -q $clang_RC_url $llvmCore_srcdir/tools/clang
+    echo "# Exporting llvm test suite $Release-rc$RC sources"
+    svn export -q $test_suite_RC_url $llvmCore_srcdir/projects/llvm-test
+}
+
+function configure_llvmCore() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    InstallDir="$4"
+    llvmgccDir="$5"
+
+    case $Flavor in
+        Release | Release-64 )
+            Optimized="yes"
+            Assertions="no"
+            ;;
+        Release+Asserts )
+            Optimized="yes"
+            Assertions="yes"
+            ;;
+        Debug )
+            Optimized="no"
+            Assertions="yes"
+            ;;
+        * )
+            echo "# Invalid flavor $Flavor!"
+            echo ""
+            return
+            ;;
+    esac
+
+    cd $ObjDir
+    echo "# Configuring llvm $Release-rc$RC $Flavor"
+    echo "# $llvmCore_srcdir/configure --prefix=$InstallDir \
+        --enable-optimized=$Optimized \
+        --enable-assertions=$Assertions \
+        --with-llvmgccdir=$llvmgccDir"
+    $llvmCore_srcdir/configure --prefix=$InstallDir \
+        --enable-optimized=$Optimized \
+        --enable-assertions=$Assertions \
+        --with-llvmgccdir=$llvmgccDir \
+        > $LogDir/llvm.configure.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    cd -
+}
+
+function build_llvmCore() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    ExtraOpts=""
+
+    CompilerFlags=""
+    if [ "$Phase" = "2" ]; then
+        CompilerFlags="CC=$llvmgccDir/bin/llvm-gcc CXX=$llvmgccDir/bin/llvm-g++"
+    fi
+    if [ "$Flavor" = "Release-64" ]; then
+        ExtraOpts="EXTRA_OPTIONS=-m64"
+    fi
+
+    cd $ObjDir
+    echo "# Compiling llvm $Release-rc$RC $Flavor"
+    echo "# make -j $NumJobs VERBOSE=1 $ExtraOpts"
+    make -j $NumJobs VERBOSE=1 $ExtraOpts $CompilerFlags \
+        > $LogDir/llvm.make.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+
+    echo "# Installing llvm $Release-rc$RC $Flavor"
+    echo "# make install"
+    make install \
+        > $LogDir/llvm.install.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    cd -
+}
+
+function test_llvmCore() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+
+    cd $ObjDir
+    make check \
+        > $LogDir/llvm.check.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    make -C tools/clang test \
+        > $LogDir/clang.check.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    make unittests \
+        > $LogDir/llvm.unittests.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    cd -
+}
+
+function configure_llvm_gcc() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    InstallDir="$4"
+    llvmObjDir="$5"
+
+    languages="c,c++"
+    if [ "$do_objc" = "yes" ]; then
+        languages="$languages,objc,obj-c++"
+    fi
+    if [ "$do_fortran" = "yes" ]; then
+        languages="$languages,fortran"
+    fi
+    if [ "$do_ada" = "yes" ]; then
+        languages="$languages,ada"
+    fi
+
+    cd $ObjDir
+    echo "# Configuring llvm-gcc $Release-rc$RC $Flavor"
+    echo "# $llvmgcc42_srcdir/configure --prefix=$InstallDir \
+        --program-prefix=llvm- --enable-llvm=$llvmObjDir \
+        --enable-languages=$languages"
+    $llvmgcc42_srcdir/configure --prefix=$InstallDir \
+        --program-prefix=llvm- --enable-llvm=$llvmObjDir \
+        --enable-languages=$languages \
+        > $LogDir/llvm-gcc.configure.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    cd -
+}
+
+function build_llvm_gcc() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    llvmgccDir="$4"
+
+    CompilerFlags=""
+    if [ "$Phase" = "2" ]; then
+        CompilerFlags="CC=$llvmgccDir/bin/llvm-gcc CXX=$llvmgccDir/bin/llvm-g++"
+    fi
+
+    cd $ObjDir
+    echo "# Compiling llvm-gcc $Release-rc$RC $Flavor"
+    echo "# make -j $NumJobs bootstrap LLVM_VERSION_INFO=$Release"
+    make -j $NumJobs bootstrap LLVM_VERSION_INFO=$Release $CompilerFlags \
+        > $LogDir/llvm-gcc.make.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+
+    echo "# Installing llvm-gcc $Release-rc$RC $Flavor"
+    echo "# make install"
+    make install \
+        > $LogDir/llvm-gcc.install.$Release-rc$RC-Phase$Phase-$Flavor.log 2>&1
+    cd -
+}
+
+if [ "$do_checkout" = "yes" ]; then
+    export_sources
+fi
+
+(
+Flavors="Debug Release Release+Asserts"
+if [ "$do_64bit" = "yes" ]; then
+    Flavors="$Flavors Release-64"
+fi
+
+for Flavor in $Flavors ; do
+    echo ""
+    echo ""
+    echo "********************************************************************************"
+    echo "  Release:     $Release-rc$RC"
+    echo "  Build:       $Flavor"
+    echo "  System Info: "
+    echo "    `uname -a`"
+    echo "********************************************************************************"
+    echo ""
+
+    llvmCore_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.obj
+    llvmCore_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.install
+
+    llvmCore_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.obj
+    llvmCore_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.install
+
+    rm -rf $llvmCore_phase1_objdir
+    rm -rf $llvmCore_phase1_installdir
+    rm -rf $llvmCore_phase2_objdir
+    rm -rf $llvmCore_phase2_installdir
+
+    mkdir -p $llvmCore_phase1_objdir
+    mkdir -p $llvmCore_phase1_installdir
+    mkdir -p $llvmCore_phase2_objdir
+    mkdir -p $llvmCore_phase2_installdir
+
+    llvmgcc42_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmgcc42-$Release-rc$RC.obj
+    llvmgcc42_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmgcc42-$Release-rc$RC.install
+
+    llvmgcc42_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmgcc42-$Release-rc$RC.obj
+    llvmgcc42_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmgcc42-$Release-rc$RC.install
+
+    rm -rf $llvmgcc42_phase1_objdir
+    rm -rf $llvmgcc42_phase1_installdir
+    rm -rf $llvmgcc42_phase2_objdir
+    rm -rf $llvmgcc42_phase2_installdir
+
+    mkdir -p $llvmgcc42_phase1_objdir
+    mkdir -p $llvmgcc42_phase1_installdir
+    mkdir -p $llvmgcc42_phase2_objdir
+    mkdir -p $llvmgcc42_phase2_installdir
+
+    ############################################################################
+    # Phase 1: Build llvmCore and llvmgcc42
+    echo "# Phase 1: Building llvmCore"
+    configure_llvmCore 1 $Flavor \
+        $llvmCore_phase1_objdir $llvmCore_phase1_installdir \
+        $llvmgcc42_phase1_installdir
+    build_llvmCore 1 $Flavor \
+        $llvmCore_phase1_objdir
+
+    echo "# Phase 1: Building llvmgcc42"
+    configure_llvm_gcc 1 $Flavor \
+        $llvmgcc42_phase1_objdir $llvmgcc42_phase1_installdir \
+        $llvmCore_phase1_objdir
+    build_llvm_gcc 1 $Flavor \
+        $llvmgcc42_phase1_objdir $llvmgcc42_phase1_installdir
+
+    ############################################################################
+    # Phase 2: Build llvmCore with newly built llvmgcc42 from phase 1.
+    echo "# Phase 2: Building llvmCore"
+    configure_llvmCore 2 $Flavor \
+        $llvmCore_phase2_objdir $llvmCore_phase2_installdir \
+        $llvmgcc42_phase1_installdir
+    build_llvmCore 2 $Flavor \
+        $llvmCore_phase2_objdir
+
+    echo "# Phase 2: Building llvmgcc42"
+    configure_llvm_gcc 2 $Flavor \
+        $llvmgcc42_phase2_objdir $llvmgcc42_phase2_installdir \
+        $llvmCore_phase2_objdir
+    build_llvm_gcc 2 $Flavor \
+        $llvmgcc42_phase2_objdir $llvmgcc42_phase1_installdir
+
+    echo "# Testing - built with llvmgcc42"
+    test_llvmCore 2 $Flavor $llvmCore_phase2_objdir
+done
+) 2>&1 | tee $LogDir/testing.$Release-rc$RC.log
+
+# Woo hoo!
+echo "### Testing Finished ###"
+echo "### Logs: $LogDir"
+exit 0
diff --git a/utils/test_debuginfo.pl b/utils/test_debuginfo.pl
new file mode 100755
index 000000000000..fb61fb02616d
--- /dev/null
+++ b/utils/test_debuginfo.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+#
+# This script tests debugging information generated by a compiler.
+# Input arguments
+#   - Input source program. Usually this source file is decorated using
+#     special comments to communicate debugger commands.
+#   - Executable file. This file is generated by the compiler.
+#
+# This perl script extracts debugger commands from input source program 
+# comments in a script. A debugger is used to load the executable file
+# and run the script generated from source program comments. Finally,
+# the debugger output is checked, using FileCheck, to validate 
+# debugging information.
+
+use File::Basename;
+
+my $testcase_file = $ARGV[0];
+my $executable_file = $ARGV[1];
+
+my $input_filename = basename $testcase_file;
+my $output_dir = dirname $executable_file;
+
+my $debugger_script_file = "$output_dir/$input_filename.debugger.script";
+my $output_file = "$output_dir/$input_filename.gdb.output";
+
+# Extract debugger commands from testcase. They are marked with DEBUGGER: 
+# at the beginnign of a comment line.
+open(INPUT, $testcase_file);
+open(OUTPUT, ">$debugger_script_file");
+while(<INPUT>) {
+    my($line) = $_;
+    $i = index($line, "DEBUGGER:");
+    if ( $i >= 0) {
+        $l = length("DEBUGGER:");
+        $s = substr($line, $i + $l);
+        print OUTPUT  "$s";
+    }
+}
+print OUTPUT "\n";
+print OUTPUT "quit\n";
+close(INPUT);
+close(OUTPUT);
+
+# setup debugger and debugger options to run a script.
+my $my_debugger = $ENV{'DEBUGGER'};
+if (!$my_debugger) {
+    $my_debugger = "gdb";
+}
+my $debugger_options = "-q -batch -n -x";
+
+# run debugger and capture output.
+system("$my_debugger $debugger_options $debugger_script_file $executable_file >& $output_file");
+
+# validate output.
+system("FileCheck", "-input-file", "$output_file", "$testcase_file");
+if ($?>>8 == 1) {
+    exit 1;
+}
+else {
+    exit 0;
+}
diff --git a/utils/unittest/CMakeLists.txt b/utils/unittest/CMakeLists.txt
new file mode 100644
index 000000000000..29218bb37c71
--- /dev/null
+++ b/utils/unittest/CMakeLists.txt
@@ -0,0 +1,41 @@
+########################################################################
+# Experimental CMake build script for Google Test.
+#
+# Consider this a prototype.  It will change drastically.  For now,
+# this is only for people on the cutting edge.
+#
+# To run the tests for Google Test itself on Linux, use 'make test' or
+# ctest.  You can select which tests to run using 'ctest -R regex'.
+# For more options, run 'ctest --help'.
+########################################################################
+#
+# Project-wide settings
+
+# Where gtest's .h files can be found.
+include_directories(
+  googletest/include
+  )
+
+if(WIN32)
+  add_definitions(-DGTEST_OS_WINDOWS=1)
+endif()
+
+if(SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+  add_definitions("-Wno-variadic-macros")
+endif()
+
+set(LLVM_REQUIRES_RTTI 1)
+add_definitions( -DGTEST_HAS_RTTI=0 )
+
+add_llvm_library(gtest
+  googletest/gtest.cc
+  googletest/gtest-death-test.cc
+  googletest/gtest-filepath.cc
+  googletest/gtest-port.cc
+  googletest/gtest-test-part.cc
+  googletest/gtest-typed-test.cc
+  )
+
+add_llvm_library(gtest_main
+  UnitTestMain/TestMain.cpp
+  )
diff --git a/utils/unittest/UnitTestMain/TestMain.cpp b/utils/unittest/UnitTestMain/TestMain.cpp
index d97dca872ad7..b35bae5abfb1 100644
--- a/utils/unittest/UnitTestMain/TestMain.cpp
+++ b/utils/unittest/UnitTestMain/TestMain.cpp
@@ -7,9 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Config/config.h"
+#include "llvm/Support/Signals.h"
 #include "gtest/gtest.h"
 
+
+#if defined(LLVM_ON_WIN32)
+# include <windows.h>
+# if defined(_MSC_VER)
+#   include <crtdbg.h>
+# endif
+#endif
+
 int main(int argc, char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal();
   testing::InitGoogleTest(&argc, argv);
+
+# if defined(LLVM_ON_WIN32)
+  // Disable all of the possible ways Windows conspires to make automated
+  // testing impossible.
+  ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
+#   if defined(_MSC_VER)
+    ::_set_error_mode(_OUT_TO_STDERR);
+    _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
+    _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
+    _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+#   endif
+# endif
+
   return RUN_ALL_TESTS();
 }
diff --git a/utils/unittest/googletest/gtest.cc b/utils/unittest/googletest/gtest.cc
index aa2d5bb92509..51732afd4999 100644
--- a/utils/unittest/googletest/gtest.cc
+++ b/utils/unittest/googletest/gtest.cc
@@ -1964,8 +1964,8 @@ void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
 
 }  // namespace internal
 
-#if GTEST_OS_WINDOWS
-// We are on Windows.
+#if GTEST_HAS_SEH
+// We are on Windows with SEH.
 
 // Adds an "exception thrown" fatal failure to the current test.
 static void AddExceptionThrownFailure(DWORD exception_code,
@@ -1978,7 +1978,7 @@ static void AddExceptionThrownFailure(DWORD exception_code,
                                            message.GetString());
 }
 
-#endif  // GTEST_OS_WINDOWS
+#endif  // GTEST_HAS_SEH
 
 // Google Test requires all tests in the same test case to use the same test
 // fixture class.  This function checks if the current test has the
@@ -2224,35 +2224,6 @@ int TestInfo::increment_death_test_count() {
   return impl_->result()->increment_death_test_count();
 }
 
-namespace {
-
-// A predicate that checks the test name of a TestInfo against a known
-// value.
-//
-// This is used for implementation of the TestCase class only.  We put
-// it in the anonymous namespace to prevent polluting the outer
-// namespace.
-//
-// TestNameIs is copyable.
-class TestNameIs {
- public:
-  // Constructor.
-  //
-  // TestNameIs has NO default constructor.
-  explicit TestNameIs(const char* name)
-      : name_(name) {}
-
-  // Returns true iff the test name of test_info matches name_.
-  bool operator()(const TestInfo * test_info) const {
-    return test_info && internal::String(test_info->name()).Compare(name_) == 0;
-  }
-
- private:
-  internal::String name_;
-};
-
-}  // namespace
-
 namespace internal {
 
 // This method expands all parameterized tests registered with macros TEST_P
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-port.h b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
index 9683271e48f0..3d076eb44c78 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-port.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -403,7 +403,8 @@
 // defining __GNUC__ and friends, but cannot compile GCC's tuple
 // implementation.  MSVC 2008 (9.0) provides TR1 tuple in a 323 MB
 // Feature Pack download, which we cannot assume the user has.
-#if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000)) \
+#if (defined(__GNUC__) && !(defined(__CUDACC__) || defined(__clang__)) \
+                       && (GTEST_GCC_VER_ >= 40000)) \
     || _MSC_VER >= 1600
 #define GTEST_USE_OWN_TR1_TUPLE 0
 #else
diff --git a/utils/valgrind/x86_64-pc-linux-gnu.supp b/utils/valgrind/x86_64-pc-linux-gnu.supp
index f5aae990f697..7b2dd4517daf 100644
--- a/utils/valgrind/x86_64-pc-linux-gnu.supp
+++ b/utils/valgrind/x86_64-pc-linux-gnu.supp
@@ -11,19 +11,19 @@
 {
    ADDRESS_IN_RANGE/Invalid read of size 4
    Memcheck:Addr4
-   obj:/usr/bin/python2.5
+   obj:/usr/bin/python*
 }
 
 {
    ADDRESS_IN_RANGE/Invalid read of size 4
    Memcheck:Value8
-   obj:/usr/bin/python2.5
+   obj:/usr/bin/python*
 }
 
 {
    ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value
    Memcheck:Cond
-   obj:/usr/bin/python2.5
+   obj:/usr/bin/python*
 }
 
 {
@@ -42,5 +42,5 @@
    We don't care if python leaks
    Memcheck:Leak
    fun:malloc
-   obj:/usr/bin/python2.5
+   obj:/usr/bin/python*
 }
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index acebc20bc344..83e4c232dbe5 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   llvm
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:      $Revision: 112382 $
+" Version:      $Revision: 114788 $
 
 if version < 600
   syntax clear
@@ -49,6 +49,7 @@ syn keyword llvmKeyword hidden protected default
 syn keyword llvmKeyword except deplibs
 syn keyword llvmKeyword volatile fastcc coldcc cc ccc
 syn keyword llvmKeyword x86_stdcallcc x86_fastcallcc
+syn keyword llvmKeyword ptx_kernel ptx_device
 syn keyword llvmKeyword signext zeroext inreg sret nounwind noreturn
 syn keyword llvmKeyword nocapture byval nest readnone readonly noalias
 syn keyword llvmKeyword inlinehint noinline alwaysinline optsize ssp sspreq
diff --git a/utils/vim/vimrc b/utils/vim/vimrc
index 1f314c2e3f37..3f863d64bc49 100644
--- a/utils/vim/vimrc
+++ b/utils/vim/vimrc
@@ -1,5 +1,5 @@
 " LLVM coding guidelines conformance for VIM
-" $Revision: 112982 $
+" $Revision: 117415 $
 "
 " Maintainer: The LLVM Team, http://llvm.org
 " WARNING:    Read before you source in all these commands and macros!  Some
@@ -92,7 +92,7 @@ augroup END
 "set incsearch
 "set ruler
 
-" Clang code-completion support. This is highly experimental!
+" Clang code-completion support. This is somewhat experimental!
 
 " A path to a clang executable.
 let g:clang_path = "clang++"
@@ -216,5 +216,6 @@ function! ClangComplete(findstart, base)
    return []
 endfunction ClangComplete
 
-" Uncomment this to enable the highly-broken autocompletion support.
-"set omnifunc=ClangComplete
+" This to enables the somewhat-experimental clang-based
+" autocompletion support.
+set omnifunc=ClangComplete
-- 
cgit v1.3